aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-12-30 11:46:15 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-12-30 11:46:15 +0000
commitdd58ef019b700900793a1eb48b52123db01b654e (patch)
treefcfbb4df56a744f4ddc6122c50521dd3f1c5e196 /test
parent2fe5752e3a7c345cdb59e869278d36af33c13fa4 (diff)
Vendor import of llvm trunk r256633:
Notes
Notes: svn path=/vendor/llvm/dist/; revision=292915
Diffstat (limited to 'test')
-rw-r--r--test/Analysis/BasicAA/2007-11-05-SizeCrash.ll2
-rw-r--r--test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll2
-rw-r--r--test/Analysis/BasicAA/bug.23540.ll17
-rw-r--r--test/Analysis/BasicAA/bug.23626.ll31
-rw-r--r--test/Analysis/BasicAA/cs-cs.ll41
-rw-r--r--test/Analysis/BasicAA/full-store-partial-alias.ll4
-rw-r--r--test/Analysis/BasicAA/intrinsics.ll27
-rw-r--r--test/Analysis/BasicAA/modref.ll37
-rw-r--r--test/Analysis/BasicAA/noalias-bugs.ll2
-rw-r--r--test/Analysis/BasicAA/phi-aa.ll1
-rw-r--r--test/Analysis/BasicAA/phi-loop.ll75
-rw-r--r--test/Analysis/BasicAA/q.bad.ll180
-rw-r--r--test/Analysis/BasicAA/sequential-gep.ll54
-rw-r--r--test/Analysis/BasicAA/zext.ll231
-rw-r--r--test/Analysis/BlockFrequencyInfo/bad_input.ll4
-rw-r--r--test/Analysis/BlockFrequencyInfo/basic.ll6
-rw-r--r--test/Analysis/BlockFrequencyInfo/irreducible_loop_crash.ll155
-rw-r--r--test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll6
-rw-r--r--test/Analysis/BranchProbabilityInfo/basic.ll68
-rw-r--r--test/Analysis/BranchProbabilityInfo/loop.ll152
-rw-r--r--test/Analysis/BranchProbabilityInfo/noreturn.ll67
-rw-r--r--test/Analysis/BranchProbabilityInfo/pr18705.ll4
-rw-r--r--test/Analysis/BranchProbabilityInfo/pr22718.ll4
-rw-r--r--test/Analysis/CFLAliasAnalysis/arguments-globals.ll2
-rw-r--r--test/Analysis/CFLAliasAnalysis/basic-interproc.ll2
-rw-r--r--test/Analysis/CFLAliasAnalysis/branch-alias.ll2
-rw-r--r--test/Analysis/CFLAliasAnalysis/const-expr-gep.ll2
-rw-r--r--test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll4
-rw-r--r--test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll2
-rw-r--r--test/Analysis/CFLAliasAnalysis/multilevel-combine.ll2
-rw-r--r--test/Analysis/CFLAliasAnalysis/must-and-partial.ll2
-rw-r--r--test/Analysis/CFLAliasAnalysis/opaque-call-alias.ll20
-rw-r--r--test/Analysis/CFLAliasAnalysis/va.ll2
-rw-r--r--test/Analysis/CallGraph/non-leaf-intrinsics.ll4
-rw-r--r--test/Analysis/CostModel/AArch64/select.ll12
-rw-r--r--test/Analysis/CostModel/AMDGPU/br.ll45
-rw-r--r--test/Analysis/CostModel/AMDGPU/extractelement.ll110
-rw-r--r--test/Analysis/CostModel/AMDGPU/lit.local.cfg2
-rw-r--r--test/Analysis/CostModel/ARM/cast.ll507
-rw-r--r--test/Analysis/CostModel/ARM/gep.ll88
-rw-r--r--test/Analysis/CostModel/ARM/select.ll6
-rw-r--r--test/Analysis/CostModel/PowerPC/load_store.ll4
-rw-r--r--test/Analysis/CostModel/PowerPC/unal-vec-ldst.ll404
-rw-r--r--test/Analysis/CostModel/X86/arith.ll4
-rw-r--r--test/Analysis/CostModel/X86/cast.ll38
-rw-r--r--test/Analysis/CostModel/X86/masked-intrinsic-cost.ll215
-rw-r--r--test/Analysis/CostModel/X86/reduction.ll2
-rw-r--r--test/Analysis/CostModel/X86/sitofp.ll346
-rw-r--r--test/Analysis/CostModel/X86/sse-itoi.ll353
-rw-r--r--test/Analysis/CostModel/X86/testshiftashr.ll32
-rw-r--r--test/Analysis/CostModel/X86/testshiftlshr.ll16
-rw-r--r--test/Analysis/CostModel/X86/testshiftshl.ll16
-rw-r--r--test/Analysis/CostModel/X86/uitofp.ll418
-rw-r--r--test/Analysis/CostModel/X86/vector_gep.ll17
-rw-r--r--test/Analysis/CostModel/X86/vshift-ashr-cost.ll392
-rw-r--r--test/Analysis/CostModel/X86/vshift-cost.ll167
-rw-r--r--test/Analysis/CostModel/X86/vshift-lshr-cost.ll400
-rw-r--r--test/Analysis/CostModel/X86/vshift-shl-cost.ll580
-rw-r--r--test/Analysis/CostModel/no_info.ll22
-rw-r--r--test/Analysis/Delinearization/a.ll2
-rw-r--r--test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll2
-rw-r--r--test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll2
-rw-r--r--test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll2
-rw-r--r--test/Analysis/Delinearization/parameter_addrec_product.ll56
-rw-r--r--test/Analysis/DemandedBits/basic.ll34
-rw-r--r--test/Analysis/DependenceAnalysis/GCD.ll14
-rw-r--r--test/Analysis/DependenceAnalysis/NonAffineExpr.ll36
-rw-r--r--test/Analysis/DependenceAnalysis/PR21585.ll2
-rw-r--r--test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll16
-rw-r--r--test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg2
-rw-r--r--test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll23
-rw-r--r--test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll2
-rw-r--r--test/Analysis/GlobalsModRef/aliastest.ll5
-rw-r--r--test/Analysis/GlobalsModRef/argmemonly-escape.ll47
-rw-r--r--test/Analysis/GlobalsModRef/atomic-instrs.ll37
-rw-r--r--test/Analysis/GlobalsModRef/chaining-analysis.ll2
-rw-r--r--test/Analysis/GlobalsModRef/indirect-global.ll5
-rw-r--r--test/Analysis/GlobalsModRef/memset-escape.ll65
-rw-r--r--test/Analysis/GlobalsModRef/modreftest.ll22
-rw-r--r--test/Analysis/GlobalsModRef/nocapture.ll57
-rw-r--r--test/Analysis/GlobalsModRef/nonescaping-noalias.ll116
-rw-r--r--test/Analysis/GlobalsModRef/pr12351.ll2
-rw-r--r--test/Analysis/GlobalsModRef/pr25309.ll27
-rw-r--r--test/Analysis/GlobalsModRef/purecse.ll2
-rw-r--r--test/Analysis/GlobalsModRef/weak-interposition.ll24
-rw-r--r--test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll4
-rw-r--r--test/Analysis/Lint/cppeh-catch-intrinsics.ll278
-rw-r--r--test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll44
-rw-r--r--test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll64
-rw-r--r--test/Analysis/LoopAccessAnalysis/nullptr.ll38
-rw-r--r--test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll51
-rw-r--r--test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll4
-rw-r--r--test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll4
-rw-r--r--test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll89
-rw-r--r--test/Analysis/LoopAccessAnalysis/safe-no-checks.ll8
-rw-r--r--test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll28
-rw-r--r--test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll2
-rw-r--r--test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll2
-rw-r--r--test/Analysis/MemoryDependenceAnalysis/memdep-block-scan-limit.ll15
-rw-r--r--test/Analysis/ScalarEvolution/avoid-assume-hang.ll139
-rw-r--r--test/Analysis/ScalarEvolution/constant_condition.ll51
-rw-r--r--test/Analysis/ScalarEvolution/flags-from-poison.ll592
-rw-r--r--test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll10
-rw-r--r--test/Analysis/ScalarEvolution/min-max-exprs.ll2
-rw-r--r--test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll122
-rw-r--r--test/Analysis/ScalarEvolution/non-IV-phi.ll59
-rw-r--r--test/Analysis/ScalarEvolution/pr24757.ll35
-rw-r--r--test/Analysis/ScalarEvolution/pr25369.ll78
-rw-r--r--test/Analysis/ScalarEvolution/scev-aa.ll2
-rw-r--r--test/Analysis/ScalarEvolution/shift-op.ll164
-rw-r--r--test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll128
-rw-r--r--test/Analysis/ScalarEvolution/trip-count.ll5
-rw-r--r--test/Analysis/ScalarEvolution/zext-wrap.ll2
-rw-r--r--test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll8
-rw-r--r--test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll17
-rw-r--r--test/Analysis/TypeBasedAliasAnalysis/licm.ll2
-rw-r--r--test/Analysis/TypeBasedAliasAnalysis/precedence.ll8
-rw-r--r--test/Analysis/ValueTracking/known-bits-from-range-md.ll34
-rw-r--r--test/Analysis/ValueTracking/known-non-equal.ll21
-rw-r--r--test/Analysis/ValueTracking/knownnonzero-shift.ll13
-rw-r--r--test/Analysis/ValueTracking/knownzero-shift.ll14
-rw-r--r--test/Analysis/ValueTracking/memory-dereferenceable.ll117
-rw-r--r--test/Analysis/ValueTracking/monotonic-phi.ll49
-rw-r--r--test/Analysis/ValueTracking/pr24866.ll44
-rw-r--r--test/Assembler/2007-09-10-AliasFwdRef.ll2
-rw-r--r--test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll8
-rw-r--r--test/Assembler/ConstantExprFoldCast.ll4
-rw-r--r--test/Assembler/ConstantExprNoFold.ll8
-rw-r--r--test/Assembler/addrspacecast-alias.ll4
-rw-r--r--test/Assembler/alias-redefinition.ll6
-rw-r--r--test/Assembler/alias-use-list-order.ll6
-rw-r--r--test/Assembler/anon-functions.ll4
-rw-r--r--test/Assembler/debug-info.ll19
-rw-r--r--test/Assembler/dicompileunit.ll31
-rw-r--r--test/Assembler/diimportedentity.ll4
-rw-r--r--test/Assembler/dilexicalblock.ll2
-rw-r--r--test/Assembler/dilocalvariable-arg-large.ll4
-rw-r--r--test/Assembler/dilocalvariable.ll16
-rw-r--r--test/Assembler/dilocation.ll4
-rw-r--r--test/Assembler/disubprogram.ll31
-rw-r--r--test/Assembler/drop-debug-info.ll6
-rw-r--r--test/Assembler/global-addrspace-forwardref.ll9
-rw-r--r--test/Assembler/incorrect-tdep-attrs-parsing.ll6
-rw-r--r--test/Assembler/internal-hidden-alias.ll2
-rw-r--r--test/Assembler/internal-protected-alias.ll2
-rw-r--r--test/Assembler/invalid-alias-mismatched-explicit-type.ll4
-rw-r--r--test/Assembler/invalid-dicompileunit-language-bad.ll6
-rw-r--r--test/Assembler/invalid-dicompileunit-language-overflow.ll10
-rw-r--r--test/Assembler/invalid-dicompileunit-missing-language.ll4
-rw-r--r--test/Assembler/invalid-dicompileunit-null-file.ll4
-rw-r--r--test/Assembler/invalid-dicompileunit-uniqued.ll4
-rw-r--r--test/Assembler/invalid-dilocalvariable-arg-large.ll6
-rw-r--r--test/Assembler/invalid-dilocalvariable-arg-negative.ll7
-rw-r--r--test/Assembler/invalid-dilocalvariable-missing-scope.ll4
-rw-r--r--test/Assembler/invalid-dilocalvariable-missing-tag.ll4
-rw-r--r--test/Assembler/invalid-disubprogram-uniqued-definition.ll4
-rw-r--r--test/Assembler/invalid-fp80hex.ll6
-rw-r--r--test/Assembler/invalid-fwdref2.ll2
-rw-r--r--test/Assembler/invalid-inline-constraint.ll7
-rw-r--r--test/Assembler/invalid-untyped-metadata.ll6
-rw-r--r--test/Assembler/invalid-uselistorder-indexes-duplicated.ll6
-rw-r--r--test/Assembler/invalid-uselistorder-indexes-one.ll2
-rw-r--r--test/Assembler/invalid-uselistorder-indexes-ordered.ll6
-rw-r--r--test/Assembler/invalid-uselistorder-indexes-range.ll6
-rw-r--r--test/Assembler/invalid-uselistorder-indexes-toofew.ll6
-rw-r--r--test/Assembler/invalid-uselistorder-indexes-toomany.ll4
-rw-r--r--test/Assembler/metadata.ll2
-rw-r--r--test/Assembler/private-hidden-alias.ll2
-rw-r--r--test/Assembler/private-protected-alias.ll2
-rw-r--r--test/Assembler/token.ll11
-rw-r--r--test/Assembler/unnamed-alias.ll8
-rw-r--r--test/Assembler/uselistorder.ll2
-rw-r--r--test/Bindings/Go/go.test2
-rw-r--r--test/Bindings/Go/lit.local.cfg3
-rw-r--r--test/Bindings/OCaml/analysis.ml4
-rw-r--r--test/Bindings/OCaml/bitreader.ml4
-rw-r--r--test/Bindings/OCaml/bitwriter.ml4
-rw-r--r--test/Bindings/OCaml/core.ml4
-rw-r--r--test/Bindings/OCaml/executionengine.ml4
-rw-r--r--test/Bindings/OCaml/ext_exc.ml4
-rw-r--r--test/Bindings/OCaml/ipo.ml4
-rw-r--r--test/Bindings/OCaml/irreader.ml4
-rw-r--r--test/Bindings/OCaml/linker.ml14
-rw-r--r--test/Bindings/OCaml/passmgr_builder.ml4
-rw-r--r--test/Bindings/OCaml/scalar_opts.ml4
-rw-r--r--test/Bindings/OCaml/target.ml4
-rw-r--r--test/Bindings/OCaml/transform_utils.ml4
-rw-r--r--test/Bindings/OCaml/vectorize.ml4
-rw-r--r--test/Bindings/llvm-c/Inputs/invalid.ll.bcbin332 -> 688 bytes
-rw-r--r--test/Bindings/llvm-c/functions.ll11
-rw-r--r--test/Bindings/llvm-c/invalid-bitcode.test9
-rw-r--r--test/Bitcode/DICompileUnit-no-DWOId.ll2
-rw-r--r--test/Bitcode/DILocalVariable-explicit-tags.ll16
-rw-r--r--test/Bitcode/DILocalVariable-explicit-tags.ll.bcbin0 -> 500 bytes
-rw-r--r--test/Bitcode/DISubprogram-distinct-definitions.ll11
-rw-r--r--test/Bitcode/DISubprogram-distinct-definitions.ll.bcbin0 -> 512 bytes
-rw-r--r--test/Bitcode/Inputs/invalid-abbrev.bcbin129 -> 132 bytes
-rw-r--r--test/Bitcode/Inputs/invalid-cast.bcbin0 -> 1236 bytes
-rw-r--r--test/Bitcode/Inputs/invalid-name-with-0-byte.bcbin0 -> 1265 bytes
-rw-r--r--test/Bitcode/Inputs/invalid-no-function-block.bcbin0 -> 548 bytes
-rw-r--r--test/Bitcode/anon-functions.ll18
-rw-r--r--test/Bitcode/attributes.ll32
-rw-r--r--test/Bitcode/compatibility-3.6.ll1207
-rw-r--r--test/Bitcode/compatibility-3.6.ll.bcbin0 -> 10192 bytes
-rw-r--r--test/Bitcode/compatibility-3.7.ll1280
-rw-r--r--test/Bitcode/compatibility-3.7.ll.bcbin0 -> 11584 bytes
-rw-r--r--test/Bitcode/compatibility.ll1560
-rw-r--r--test/Bitcode/debug-loc-again.ll6
-rw-r--r--test/Bitcode/highLevelStructure.3.2.ll20
-rw-r--r--test/Bitcode/identification.ll6
-rw-r--r--test/Bitcode/invalid.ll2
-rw-r--r--test/Bitcode/invalid.ll.bcbin332 -> 688 bytes
-rw-r--r--test/Bitcode/invalid.test15
-rw-r--r--test/Bitcode/local-linkage-default-visibility.3.4.ll24
-rw-r--r--test/Bitcode/old-aliases.ll16
-rw-r--r--test/Bitcode/operand-bundles.ll152
-rw-r--r--test/Bitcode/select.ll8
-rw-r--r--test/Bitcode/tailcall.ll16
-rw-r--r--test/Bitcode/thinlto-function-summary.ll45
-rw-r--r--test/Bitcode/upgrade-subprogram.ll17
-rw-r--r--test/Bitcode/upgrade-subprogram.ll.bcbin0 -> 784 bytes
-rw-r--r--test/Bitcode/use-list-order.ll14
-rw-r--r--test/Bitcode/use-list-order2.ll57
-rw-r--r--test/Bitcode/vst-forward-declaration.ll29
-rw-r--r--test/BugPoint/metadata.ll6
-rw-r--r--test/BugPoint/named-md.ll39
-rw-r--r--test/BugPoint/remove_arguments_test.ll6
-rw-r--r--test/BugPoint/replace-funcs-with-null.ll2
-rw-r--r--test/CMakeLists.txt33
-rw-r--r--test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll46
-rw-r--r--test/CodeGen/AArch64/aarch64-addv.ll98
-rw-r--r--test/CodeGen/AArch64/aarch64-deferred-spilling.ll514
-rw-r--r--test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll24
-rw-r--r--test/CodeGen/AArch64/aarch64-interleaved-accesses.ll147
-rw-r--r--test/CodeGen/AArch64/aarch64-loop-gep-opt.ll50
-rw-r--r--test/CodeGen/AArch64/aarch64-minmaxv.ll511
-rw-r--r--test/CodeGen/AArch64/aarch64-smax-constantfold.ll12
-rw-r--r--test/CodeGen/AArch64/addsub_ext.ll146
-rw-r--r--test/CodeGen/AArch64/alloca.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll14
-rw-r--r--test/CodeGen/AArch64/arm64-aapcs-be.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-aapcs.ll21
-rw-r--r--test/CodeGen/AArch64/arm64-abi_align.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-addr-type-promotion.ll9
-rw-r--r--test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-arith.ll3
-rw-r--r--test/CodeGen/AArch64/arm64-atomic-128.ll7
-rw-r--r--test/CodeGen/AArch64/arm64-atomic.ll70
-rw-r--r--test/CodeGen/AArch64/arm64-builtins-linux.ll11
-rw-r--r--test/CodeGen/AArch64/arm64-ccmp-heuristics.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-ccmp.ll166
-rw-r--r--test/CodeGen/AArch64/arm64-coalescing-MOVi32imm.ll17
-rw-r--r--test/CodeGen/AArch64/arm64-collect-loh.ll604
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-br.ll15
-rw-r--r--test/CodeGen/AArch64/arm64-fmax-safe.ll53
-rw-r--r--test/CodeGen/AArch64/arm64-fmax.ll46
-rw-r--r--test/CodeGen/AArch64/arm64-fp128.ll31
-rw-r--r--test/CodeGen/AArch64/arm64-hello.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-indexed-memory.ll33
-rw-r--r--test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll26
-rw-r--r--test/CodeGen/AArch64/arm64-inline-asm.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-join-reserved.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-large-frame.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-ld-from-st.ll666
-rw-r--r--test/CodeGen/AArch64/arm64-ldp.ll188
-rw-r--r--test/CodeGen/AArch64/arm64-long-shift.ll80
-rw-r--r--test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll406
-rw-r--r--test/CodeGen/AArch64/arm64-neon-2velem.ll55
-rw-r--r--test/CodeGen/AArch64/arm64-neon-copy.ll17
-rw-r--r--test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-platform-reg.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-popcnt.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-rounding.ll62
-rw-r--r--test/CodeGen/AArch64/arm64-shrink-wrapping.ll95
-rw-r--r--test/CodeGen/AArch64/arm64-spill-lr.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-stackmap.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-stp.ll34
-rw-r--r--test/CodeGen/AArch64/arm64-strict-align.ll5
-rw-r--r--test/CodeGen/AArch64/arm64-tls-dynamic-together.ll43
-rw-r--r--test/CodeGen/AArch64/arm64-trunc-store.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-vabs.ll66
-rw-r--r--test/CodeGen/AArch64/arm64-variadic-aapcs.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-vector-ext.ll54
-rw-r--r--test/CodeGen/AArch64/arm64-vminmaxnm.ll17
-rw-r--r--test/CodeGen/AArch64/arm64-xaluo.ll4
-rw-r--r--test/CodeGen/AArch64/atomic-ops.ll20
-rw-r--r--test/CodeGen/AArch64/bitcast-v2i8.ll2
-rw-r--r--test/CodeGen/AArch64/bitfield-insert.ll41
-rw-r--r--test/CodeGen/AArch64/bitfield.ll46
-rw-r--r--test/CodeGen/AArch64/bitreverse.ll87
-rw-r--r--test/CodeGen/AArch64/combine-comparisons-by-cse.ll26
-rw-r--r--test/CodeGen/AArch64/cpus.ll1
-rw-r--r--test/CodeGen/AArch64/cxx-tlscc.ll76
-rw-r--r--test/CodeGen/AArch64/dag-combine-select.ll47
-rw-r--r--test/CodeGen/AArch64/divrem.ll22
-rw-r--r--test/CodeGen/AArch64/emutls.ll116
-rw-r--r--test/CodeGen/AArch64/emutls_generic.ll59
-rw-r--r--test/CodeGen/AArch64/eon.ll29
-rw-r--r--test/CodeGen/AArch64/f16-instructions.ll111
-rw-r--r--test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll19
-rw-r--r--test/CodeGen/AArch64/fast-isel-branch-cond-split.ll52
-rw-r--r--test/CodeGen/AArch64/fast-isel-cmp-vec.ll100
-rw-r--r--test/CodeGen/AArch64/fast-isel-folded-shift.ll125
-rw-r--r--test/CodeGen/AArch64/fast-isel-logic-op.ll2
-rw-r--r--test/CodeGen/AArch64/fastcc-reserved.ll4
-rw-r--r--test/CodeGen/AArch64/fastcc.ll8
-rw-r--r--test/CodeGen/AArch64/fcvt_combine.ll154
-rw-r--r--test/CodeGen/AArch64/fdiv_combine.ll115
-rw-r--r--test/CodeGen/AArch64/fold-constants.ll19
-rw-r--r--test/CodeGen/AArch64/fp16-v4-instructions.ll51
-rw-r--r--test/CodeGen/AArch64/fp16-v8-instructions.ll63
-rw-r--r--test/CodeGen/AArch64/free-zext.ll59
-rw-r--r--test/CodeGen/AArch64/func-argpassing.ll4
-rw-r--r--test/CodeGen/AArch64/func-calls.ll4
-rw-r--r--test/CodeGen/AArch64/global-alignment.ll2
-rw-r--r--test/CodeGen/AArch64/global-merge-1.ll16
-rw-r--r--test/CodeGen/AArch64/global-merge-2.ll34
-rw-r--r--test/CodeGen/AArch64/global-merge-3.ll46
-rw-r--r--test/CodeGen/AArch64/global-merge-4.ll6
-rw-r--r--test/CodeGen/AArch64/global-merge-group-by-use.ll6
-rw-r--r--test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll2
-rw-r--r--test/CodeGen/AArch64/global-merge-ignore-single-use.ll2
-rw-r--r--test/CodeGen/AArch64/ldst-opt.ll477
-rw-r--r--test/CodeGen/AArch64/merge-store.ll30
-rw-r--r--test/CodeGen/AArch64/misched-fusion.ll34
-rw-r--r--test/CodeGen/AArch64/mul-lohi.ll29
-rw-r--r--test/CodeGen/AArch64/nest-register.ll2
-rw-r--r--test/CodeGen/AArch64/nontemporal.ll339
-rw-r--r--test/CodeGen/AArch64/pic-eh-stubs.ll2
-rw-r--r--test/CodeGen/AArch64/readcyclecounter.ll15
-rw-r--r--test/CodeGen/AArch64/regress-tblgen-chains.ll4
-rw-r--r--test/CodeGen/AArch64/remat.ll1
-rw-r--r--test/CodeGen/AArch64/rotate.ll14
-rw-r--r--test/CodeGen/AArch64/round-conv.ll330
-rwxr-xr-xtest/CodeGen/AArch64/shrink-wrap.ll184
-rw-r--r--test/CodeGen/AArch64/stackmap-frame-setup.ll20
-rw-r--r--test/CodeGen/AArch64/tail-call.ll6
-rw-r--r--test/CodeGen/AArch64/tailcall-explicit-sret.ll2
-rw-r--r--test/CodeGen/AArch64/tbi.ll102
-rw-r--r--test/CodeGen/AArch64/vector-fcopysign.ll178
-rw-r--r--test/CodeGen/AArch64/xbfiz.ll30
-rw-r--r--test/CodeGen/AMDGPU/add.ll14
-rw-r--r--test/CodeGen/AMDGPU/address-space.ll6
-rw-r--r--test/CodeGen/AMDGPU/addrspacecast.ll66
-rw-r--r--test/CodeGen/AMDGPU/and.ll101
-rw-r--r--test/CodeGen/AMDGPU/annotate-kernel-features.ll193
-rw-r--r--test/CodeGen/AMDGPU/array-ptr-calc-i32.ll8
-rw-r--r--test/CodeGen/AMDGPU/bitreverse.ll115
-rw-r--r--test/CodeGen/AMDGPU/calling-conventions.ll20
-rw-r--r--test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll98
-rw-r--r--test/CodeGen/AMDGPU/cgp-addressing-modes.ll254
-rw-r--r--test/CodeGen/AMDGPU/ci-use-flat-for-global.ll15
-rw-r--r--test/CodeGen/AMDGPU/ctpop64.ll22
-rw-r--r--test/CodeGen/AMDGPU/cvt_f32_ubyte.ll12
-rw-r--r--test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll52
-rw-r--r--test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll10
-rw-r--r--test/CodeGen/AMDGPU/ds-sub-offset.ll125
-rw-r--r--test/CodeGen/AMDGPU/ds_read2.ll10
-rw-r--r--test/CodeGen/AMDGPU/ds_read2_superreg.ll89
-rw-r--r--test/CodeGen/AMDGPU/ds_read2st64.ll8
-rw-r--r--test/CodeGen/AMDGPU/ds_write2.ll9
-rw-r--r--test/CodeGen/AMDGPU/ds_write2st64.ll4
-rw-r--r--test/CodeGen/AMDGPU/dynamic_stackalloc.ll11
-rw-r--r--test/CodeGen/AMDGPU/extract-vector-elt-i64.ll43
-rw-r--r--test/CodeGen/AMDGPU/fadd64.ll50
-rw-r--r--test/CodeGen/AMDGPU/fceil64.ll12
-rw-r--r--test/CodeGen/AMDGPU/fcmp.ll2
-rw-r--r--test/CodeGen/AMDGPU/flat-address-space.ll77
-rw-r--r--test/CodeGen/AMDGPU/flat-scratch-reg.ll36
-rw-r--r--test/CodeGen/AMDGPU/fma-combine.ll200
-rw-r--r--test/CodeGen/AMDGPU/fmax_legacy.ll40
-rw-r--r--test/CodeGen/AMDGPU/fmin_legacy.ll63
-rw-r--r--test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll102
-rw-r--r--test/CodeGen/AMDGPU/fneg-fabs.ll27
-rw-r--r--test/CodeGen/AMDGPU/ftrunc.f64.ll12
-rw-r--r--test/CodeGen/AMDGPU/gep-address-space.ll34
-rw-r--r--test/CodeGen/AMDGPU/global-constant.ll27
-rw-r--r--test/CodeGen/AMDGPU/global-extload-i32.ll327
-rw-r--r--test/CodeGen/AMDGPU/global_atomics.ll20
-rw-r--r--test/CodeGen/AMDGPU/half.ll256
-rw-r--r--test/CodeGen/AMDGPU/hsa-globals.ll132
-rw-r--r--test/CodeGen/AMDGPU/hsa-group-segment.ll14
-rw-r--r--test/CodeGen/AMDGPU/hsa.ll36
-rw-r--r--test/CodeGen/AMDGPU/image-attributes.ll206
-rw-r--r--test/CodeGen/AMDGPU/image-resource-id.ll409
-rw-r--r--test/CodeGen/AMDGPU/imm.ll24
-rw-r--r--test/CodeGen/AMDGPU/indirect-addressing-si.ll67
-rw-r--r--test/CodeGen/AMDGPU/indirect-private-64.ll34
-rw-r--r--test/CodeGen/AMDGPU/inline-constraints.ll23
-rw-r--r--test/CodeGen/AMDGPU/insert_vector_elt.ll103
-rw-r--r--test/CodeGen/AMDGPU/kernel-args.ll26
-rw-r--r--test/CodeGen/AMDGPU/large-alloca-compute.ll57
-rw-r--r--test/CodeGen/AMDGPU/large-alloca-graphics.ll47
-rw-r--r--test/CodeGen/AMDGPU/large-alloca.ll15
-rw-r--r--test/CodeGen/AMDGPU/literals.ll8
-rw-r--r--test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll4
-rw-r--r--test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll28
-rw-r--r--test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll6
-rw-r--r--test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll1
-rw-r--r--test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll37
-rw-r--r--test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.SI.packf16.ll29
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll16
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll14
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll16
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll16
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll30
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll24
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll29
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll29
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll27
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll27
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.dbg.value.ll12
-rw-r--r--test/CodeGen/AMDGPU/llvm.memcpy.ll66
-rw-r--r--test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll184
-rw-r--r--test/CodeGen/AMDGPU/llvm.round.f64.ll5
-rw-r--r--test/CodeGen/AMDGPU/load.ll34
-rw-r--r--test/CodeGen/AMDGPU/local-memory-two-objects.ll4
-rw-r--r--test/CodeGen/AMDGPU/local-memory.ll4
-rw-r--r--test/CodeGen/AMDGPU/max.ll116
-rw-r--r--test/CodeGen/AMDGPU/merge-stores.ll196
-rw-r--r--test/CodeGen/AMDGPU/min.ll171
-rw-r--r--test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll36
-rw-r--r--test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll52
-rw-r--r--test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll18
-rw-r--r--test/CodeGen/AMDGPU/no-shrink-extloads.ll12
-rw-r--r--test/CodeGen/AMDGPU/opencl-image-metadata.ll24
-rw-r--r--test/CodeGen/AMDGPU/operand-folding.ll2
-rw-r--r--test/CodeGen/AMDGPU/or.ll2
-rw-r--r--test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll28
-rw-r--r--test/CodeGen/AMDGPU/private-memory.ll12
-rw-r--r--test/CodeGen/AMDGPU/register-count-comments.ll3
-rw-r--r--test/CodeGen/AMDGPU/reorder-stores.ll58
-rw-r--r--test/CodeGen/AMDGPU/s_movk_i32.ll18
-rw-r--r--test/CodeGen/AMDGPU/salu-to-valu.ll418
-rw-r--r--test/CodeGen/AMDGPU/sampler-resource-id.ll65
-rw-r--r--test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll2
-rw-r--r--test/CodeGen/AMDGPU/scratch-buffer.ll2
-rw-r--r--test/CodeGen/AMDGPU/select64.ll8
-rw-r--r--test/CodeGen/AMDGPU/set-dx10.ll48
-rw-r--r--test/CodeGen/AMDGPU/setcc-opt.ll22
-rw-r--r--test/CodeGen/AMDGPU/sext-in-reg.ll54
-rw-r--r--test/CodeGen/AMDGPU/shl.ll15
-rw-r--r--test/CodeGen/AMDGPU/shl_add_constant.ll6
-rw-r--r--test/CodeGen/AMDGPU/shl_add_ptr.ll2
-rw-r--r--test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll16
-rw-r--r--test/CodeGen/AMDGPU/si-literal-folding.ll17
-rw-r--r--test/CodeGen/AMDGPU/si-sgpr-spill.ll10
-rw-r--r--test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll7
-rw-r--r--test/CodeGen/AMDGPU/sint_to_fp.f64.ll6
-rw-r--r--test/CodeGen/AMDGPU/sminmax.ll130
-rw-r--r--test/CodeGen/AMDGPU/smrd.ll73
-rw-r--r--test/CodeGen/AMDGPU/split-scalar-i64-add.ll42
-rw-r--r--test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll104
-rw-r--r--test/CodeGen/AMDGPU/sra.ll8
-rw-r--r--test/CodeGen/AMDGPU/srl.ll13
-rw-r--r--test/CodeGen/AMDGPU/store-barrier.ll4
-rw-r--r--test/CodeGen/AMDGPU/store.ll25
-rw-r--r--test/CodeGen/AMDGPU/store_typed.ll24
-rw-r--r--test/CodeGen/AMDGPU/sub.ll14
-rw-r--r--test/CodeGen/AMDGPU/trunc.ll8
-rw-r--r--test/CodeGen/AMDGPU/udivrem.ll130
-rw-r--r--test/CodeGen/AMDGPU/uint_to_fp.f64.ll6
-rw-r--r--test/CodeGen/AMDGPU/unsupported-cc.ll32
-rw-r--r--test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll167
-rw-r--r--test/CodeGen/AMDGPU/valu-i1.ll16
-rw-r--r--test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll585
-rw-r--r--test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll494
-rw-r--r--test/CodeGen/AMDGPU/vop-shrink.ll4
-rw-r--r--test/CodeGen/AMDGPU/wait.ll61
-rw-r--r--test/CodeGen/AMDGPU/work-item-intrinsics.ll263
-rw-r--r--test/CodeGen/AMDGPU/xor.ll2
-rw-r--r--test/CodeGen/AMDGPU/zero_extend.ll3
-rw-r--r--test/CodeGen/ARM/2007-03-13-InstrSched.ll2
-rw-r--r--test/CodeGen/ARM/2009-10-16-Scope.ll6
-rw-r--r--test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll6
-rw-r--r--test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll24
-rw-r--r--test/CodeGen/ARM/2010-05-21-BuildVector.ll4
-rw-r--r--test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll4
-rwxr-xr-xtest/CodeGen/ARM/2010-06-21-nondarwin-tc.ll2
-rw-r--r--test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll12
-rw-r--r--test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll4
-rw-r--r--test/CodeGen/ARM/2010-08-04-StackVariable.ll24
-rw-r--r--test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll42
-rw-r--r--test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll2
-rw-r--r--test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll42
-rw-r--r--test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll4
-rw-r--r--test/CodeGen/ARM/2011-10-26-memset-inline.ll2
-rw-r--r--test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll10
-rw-r--r--test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll4
-rw-r--r--test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll14
-rw-r--r--test/CodeGen/ARM/2012-11-14-subs_carry.ll10
-rw-r--r--test/CodeGen/ARM/2013-10-11-select-stalls.ll13
-rw-r--r--test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll4
-rw-r--r--test/CodeGen/ARM/MachO-subtypes.ll68
-rw-r--r--test/CodeGen/ARM/Windows/division.ll38
-rw-r--r--test/CodeGen/ARM/Windows/integer-floating-point-conversion.ll74
-rw-r--r--test/CodeGen/ARM/Windows/libcalls.ll75
-rw-r--r--test/CodeGen/ARM/Windows/no-eabi.ll10
-rw-r--r--test/CodeGen/ARM/Windows/no-frame-register.ll22
-rw-r--r--test/CodeGen/ARM/Windows/overflow.ll77
-rw-r--r--test/CodeGen/ARM/adv-copy-opt.ll14
-rw-r--r--test/CodeGen/ARM/aliases.ll30
-rw-r--r--test/CodeGen/ARM/align-sp-adjustment.ll47
-rw-r--r--test/CodeGen/ARM/apcs-vfp.ll153
-rw-r--r--test/CodeGen/ARM/arm-eabi.ll63
-rw-r--r--test/CodeGen/ARM/arm-interleaved-accesses.ll190
-rw-r--r--test/CodeGen/ARM/arm-shrink-wrapping-linux.ll142
-rw-r--r--test/CodeGen/ARM/arm-shrink-wrapping.ll683
-rw-r--r--test/CodeGen/ARM/atomic-64bit.ll6
-rw-r--r--test/CodeGen/ARM/atomic-cmp.ll4
-rw-r--r--test/CodeGen/ARM/atomic-cmpxchg.ll98
-rw-r--r--test/CodeGen/ARM/atomic-op.ll46
-rw-r--r--test/CodeGen/ARM/atomic-ops-v8.ll38
-rw-r--r--test/CodeGen/ARM/avoid-cpsr-rmw.ll16
-rw-r--r--test/CodeGen/ARM/bfi.ll95
-rw-r--r--test/CodeGen/ARM/build-attributes-optimization-minsize.ll18
-rw-r--r--test/CodeGen/ARM/build-attributes-optimization-mixed.ll23
-rw-r--r--test/CodeGen/ARM/build-attributes-optimization-optnone.ll18
-rw-r--r--test/CodeGen/ARM/build-attributes-optimization-optsize.ll18
-rw-r--r--test/CodeGen/ARM/build-attributes-optimization.ll23
-rw-r--r--test/CodeGen/ARM/build-attributes.ll142
-rw-r--r--test/CodeGen/ARM/call-tc.ll8
-rw-r--r--test/CodeGen/ARM/cfi-alignment.ll48
-rw-r--r--test/CodeGen/ARM/cmpxchg-idioms.ll6
-rw-r--r--test/CodeGen/ARM/cmpxchg-weak.ll56
-rw-r--r--test/CodeGen/ARM/coalesce-dbgvalue.ll10
-rw-r--r--test/CodeGen/ARM/coalesce-subregs.ll38
-rw-r--r--test/CodeGen/ARM/combine-vmovdrr.ll72
-rw-r--r--test/CodeGen/ARM/constants.ll6
-rw-r--r--test/CodeGen/ARM/dagcombine-concatvector.ll4
-rw-r--r--test/CodeGen/ARM/debug-frame-vararg.ll14
-rw-r--r--test/CodeGen/ARM/debug-frame.ll28
-rw-r--r--test/CodeGen/ARM/debug-info-arg.ll20
-rw-r--r--test/CodeGen/ARM/debug-info-blocks.ll40
-rw-r--r--test/CodeGen/ARM/debug-info-branch-folding.ll32
-rw-r--r--test/CodeGen/ARM/debug-info-d16-reg.ll38
-rw-r--r--test/CodeGen/ARM/debug-info-no-frame.ll8
-rw-r--r--test/CodeGen/ARM/debug-info-qreg.ll28
-rw-r--r--test/CodeGen/ARM/debug-info-s16-reg.ll38
-rw-r--r--test/CodeGen/ARM/debug-info-sreg2.ll10
-rw-r--r--test/CodeGen/ARM/debug-segmented-stacks.ll14
-rw-r--r--test/CodeGen/ARM/debugtrap.ll17
-rw-r--r--test/CodeGen/ARM/div.ll71
-rw-r--r--test/CodeGen/ARM/divmod-eabi.ll4
-rw-r--r--test/CodeGen/ARM/eh-resume-darwin.ll8
-rw-r--r--test/CodeGen/ARM/emutls.ll258
-rw-r--r--test/CodeGen/ARM/emutls1.ll31
-rw-r--r--test/CodeGen/ARM/emutls_generic.ll61
-rw-r--r--test/CodeGen/ARM/fast-isel-align.ll24
-rw-r--r--test/CodeGen/ARM/fast-isel-ext.ll35
-rw-r--r--test/CodeGen/ARM/fast-isel-mvn.ll10
-rw-r--r--test/CodeGen/ARM/fast-isel-pic.ll23
-rw-r--r--test/CodeGen/ARM/fold-stack-adjust.ll18
-rw-r--r--test/CodeGen/ARM/fp16-args.ll40
-rw-r--r--test/CodeGen/ARM/fp16-promote.ll471
-rw-r--r--test/CodeGen/ARM/fp16.ll62
-rw-r--r--test/CodeGen/ARM/fparith.ll4
-rw-r--r--test/CodeGen/ARM/gep-optimization.ll77
-rw-r--r--test/CodeGen/ARM/global-merge-1.ll6
-rw-r--r--test/CodeGen/ARM/global-merge-external.ll46
-rw-r--r--test/CodeGen/ARM/globals.ll9
-rw-r--r--test/CodeGen/ARM/ifcvt-branch-weight-bug.ll14
-rw-r--r--test/CodeGen/ARM/ifcvt-branch-weight.ll2
-rw-r--r--test/CodeGen/ARM/ifcvt-iter-indbr.ll6
-rw-r--r--test/CodeGen/ARM/ifcvt4.ll6
-rw-r--r--test/CodeGen/ARM/ifcvt5.ll4
-rw-r--r--test/CodeGen/ARM/ifcvt6.ll2
-rw-r--r--test/CodeGen/ARM/ifcvt8.ll4
-rw-r--r--test/CodeGen/ARM/inlineasm-switch-mode.ll4
-rw-r--r--test/CodeGen/ARM/ldm-stm-base-materialization.ll93
-rw-r--r--test/CodeGen/ARM/ldrd.ll58
-rw-r--r--test/CodeGen/ARM/legalize-unaligned-load.ll35
-rw-r--r--test/CodeGen/ARM/load-global.ll12
-rw-r--r--test/CodeGen/ARM/load-store-flags.ll4
-rw-r--r--test/CodeGen/ARM/load.ll571
-rw-r--r--test/CodeGen/ARM/machine-cse-cmp.ll2
-rw-r--r--test/CodeGen/ARM/memcpy-inline.ll2
-rw-r--r--test/CodeGen/ARM/memcpy-ldm-stm.ll94
-rw-r--r--test/CodeGen/ARM/memfunc.ll255
-rw-r--r--test/CodeGen/ARM/minmax.ll193
-rw-r--r--test/CodeGen/ARM/neon_minmax.ll1
-rw-r--r--test/CodeGen/ARM/neon_spill.ll6
-rw-r--r--test/CodeGen/ARM/neon_vabs.ll38
-rw-r--r--test/CodeGen/ARM/neon_vshl_minint.ll13
-rw-r--r--test/CodeGen/ARM/out-of-registers.ll8
-rw-r--r--test/CodeGen/ARM/pr25317.ll11
-rw-r--r--test/CodeGen/ARM/pr25838.ll34
-rw-r--r--test/CodeGen/ARM/rbit.ll11
-rw-r--r--test/CodeGen/ARM/reg_sequence.ll64
-rw-r--r--test/CodeGen/ARM/rotate.ll14
-rw-r--r--test/CodeGen/ARM/sat-arith.ll63
-rw-r--r--test/CodeGen/ARM/sched-it-debug-nodes.ll88
-rw-r--r--test/CodeGen/ARM/setjmp_longjmp.ll113
-rw-r--r--test/CodeGen/ARM/shifter_operand.ll228
-rw-r--r--test/CodeGen/ARM/sjlj-prepare-critical-edge.ll2
-rw-r--r--test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll1
-rw-r--r--test/CodeGen/ARM/softfp-fabs-fneg.ll41
-rw-r--r--test/CodeGen/ARM/special-reg-mcore.ll2
-rw-r--r--test/CodeGen/ARM/spill-q.ll28
-rw-r--r--test/CodeGen/ARM/ssat-lower.ll11
-rw-r--r--test/CodeGen/ARM/ssat-upper.ll11
-rw-r--r--test/CodeGen/ARM/subtarget-no-movt.ll45
-rw-r--r--test/CodeGen/ARM/tail-merge-branch-weight.ll2
-rw-r--r--test/CodeGen/ARM/taildup-branch-weight.ll4
-rw-r--r--test/CodeGen/ARM/test-sharedidx.ll15
-rw-r--r--test/CodeGen/ARM/thumb-alignment.ll2
-rw-r--r--test/CodeGen/ARM/thumb1-ldst-opt.ll27
-rw-r--r--test/CodeGen/ARM/thumb1_return_sequence.ll70
-rw-r--r--test/CodeGen/ARM/thumb2-it-block.ll24
-rw-r--r--test/CodeGen/ARM/thumb_indirect_calls.ll5
-rw-r--r--test/CodeGen/ARM/tls-models.ll74
-rw-r--r--test/CodeGen/ARM/tls3.ll29
-rw-r--r--test/CodeGen/ARM/unaligned_load_store.ll4
-rw-r--r--test/CodeGen/ARM/unaligned_load_store_vfp.ll98
-rw-r--r--test/CodeGen/ARM/usat-lower.ll11
-rw-r--r--test/CodeGen/ARM/usat-upper.ll11
-rw-r--r--test/CodeGen/ARM/v7k-abi-align.ll152
-rw-r--r--test/CodeGen/ARM/v7k-libcalls.ll154
-rw-r--r--test/CodeGen/ARM/v7k-sincos.ll16
-rw-r--r--test/CodeGen/ARM/vcge.ll4
-rw-r--r--test/CodeGen/ARM/vcombine.ll64
-rw-r--r--test/CodeGen/ARM/vcvt_combine.ll103
-rw-r--r--test/CodeGen/ARM/vdiv_combine.ll17
-rw-r--r--test/CodeGen/ARM/vdup.ll16
-rw-r--r--test/CodeGen/ARM/vector-DAGCombine.ll4
-rw-r--r--test/CodeGen/ARM/vector-load.ll4
-rw-r--r--test/CodeGen/ARM/vector-store.ll6
-rw-r--r--test/CodeGen/ARM/vext.ll34
-rw-r--r--test/CodeGen/ARM/vfp-reg-stride.ll42
-rw-r--r--test/CodeGen/ARM/vfp-regs-dwarf.ll6
-rw-r--r--test/CodeGen/ARM/vld-vst-upgrade.ll139
-rw-r--r--test/CodeGen/ARM/vld1.ll52
-rw-r--r--test/CodeGen/ARM/vld2.ll40
-rw-r--r--test/CodeGen/ARM/vld3.ll42
-rw-r--r--test/CodeGen/ARM/vld4.ll42
-rw-r--r--test/CodeGen/ARM/vlddup.ll30
-rw-r--r--test/CodeGen/ARM/vldlane.ll92
-rw-r--r--test/CodeGen/ARM/vminmaxnm-safe.ll396
-rw-r--r--test/CodeGen/ARM/vminmaxnm.ll358
-rw-r--r--test/CodeGen/ARM/vmov.ll4
-rw-r--r--test/CodeGen/ARM/vmul.ll14
-rw-r--r--test/CodeGen/ARM/vpadd.ll2
-rw-r--r--test/CodeGen/ARM/vselect_imax.ll26
-rw-r--r--test/CodeGen/ARM/vst1.ll48
-rw-r--r--test/CodeGen/ARM/vst2.ll44
-rw-r--r--test/CodeGen/ARM/vst3.ll42
-rw-r--r--test/CodeGen/ARM/vst4.ll42
-rw-r--r--test/CodeGen/ARM/vstlane.ll90
-rw-r--r--test/CodeGen/ARM/vtrn.ll124
-rw-r--r--test/CodeGen/ARM/vuzp.ll136
-rw-r--r--test/CodeGen/ARM/vzip.ll82
-rw-r--r--test/CodeGen/BPF/sockex2.ll2
-rw-r--r--test/CodeGen/CPP/gep.ll10
-rw-r--r--test/CodeGen/Generic/2009-03-17-LSR-APInt.ll28
-rw-r--r--test/CodeGen/Generic/ForceStackAlign.ll27
-rw-r--r--test/CodeGen/Generic/MachineBranchProb.ll8
-rw-r--r--test/CodeGen/Generic/dbg_value.ll5
-rw-r--r--test/CodeGen/Generic/lit.local.cfg3
-rw-r--r--test/CodeGen/Generic/overloaded-intrinsic-name.ll32
-rw-r--r--test/CodeGen/Generic/vector.ll6
-rw-r--r--test/CodeGen/Hexagon/NVJumpCmp.ll89
-rw-r--r--test/CodeGen/Hexagon/absaddr-store.ll1
-rw-r--r--test/CodeGen/Hexagon/adde.ll6
-rw-r--r--test/CodeGen/Hexagon/alu64.ll134
-rw-r--r--test/CodeGen/Hexagon/bit-eval.ll53
-rw-r--r--test/CodeGen/Hexagon/bit-loop.ll80
-rw-r--r--test/CodeGen/Hexagon/cfi-late.ll65
-rw-r--r--test/CodeGen/Hexagon/clr_set_toggle.ll2
-rw-r--r--test/CodeGen/Hexagon/combine.ll2
-rw-r--r--test/CodeGen/Hexagon/combine_ir.ll16
-rw-r--r--test/CodeGen/Hexagon/early-if-conversion-bug1.ll412
-rw-r--r--test/CodeGen/Hexagon/early-if-phi-i1.ll17
-rw-r--r--test/CodeGen/Hexagon/early-if-spare.ll57
-rw-r--r--test/CodeGen/Hexagon/early-if.ll75
-rw-r--r--test/CodeGen/Hexagon/extload-combine.ll2
-rw-r--r--test/CodeGen/Hexagon/hwloop-dbg.ll12
-rw-r--r--test/CodeGen/Hexagon/i16_VarArg.ll2
-rw-r--r--test/CodeGen/Hexagon/i1_VarArg.ll2
-rw-r--r--test/CodeGen/Hexagon/i8_VarArg.ll2
-rw-r--r--test/CodeGen/Hexagon/ifcvt-edge-weight.ll64
-rw-r--r--test/CodeGen/Hexagon/memcpy-likely-aligned.ll32
-rw-r--r--test/CodeGen/Hexagon/mux-basic.ll28
-rw-r--r--test/CodeGen/Hexagon/opt-fabs.ll2
-rw-r--r--test/CodeGen/Hexagon/pic-jumptables.ll48
-rw-r--r--test/CodeGen/Hexagon/pic-simple.ll22
-rw-r--r--test/CodeGen/Hexagon/pic-static.ll21
-rw-r--r--test/CodeGen/Hexagon/relax.ll9
-rw-r--r--test/CodeGen/Hexagon/sdr-basic.ll15
-rw-r--r--test/CodeGen/Hexagon/sdr-shr32.ll22
-rw-r--r--test/CodeGen/Hexagon/simple_addend.ll2
-rw-r--r--test/CodeGen/Hexagon/store-widen-aliased-load.ll21
-rw-r--r--test/CodeGen/Hexagon/store-widen-negv.ll11
-rw-r--r--test/CodeGen/Hexagon/store-widen-negv2.ll19
-rw-r--r--test/CodeGen/Hexagon/store-widen.ll18
-rw-r--r--test/CodeGen/Hexagon/struct_args.ll2
-rw-r--r--test/CodeGen/Hexagon/sube.ll8
-rw-r--r--test/CodeGen/Hexagon/tail-dup-subreg-abort.ll28
-rw-r--r--test/CodeGen/Hexagon/tfr-to-combine.ll2
-rw-r--r--test/CodeGen/Hexagon/union-1.ll2
-rw-r--r--test/CodeGen/Hexagon/v60Intrins.ll2559
-rw-r--r--test/CodeGen/Hexagon/v60Vasr.ll247
-rw-r--r--test/CodeGen/Hexagon/v60small.ll51
-rw-r--r--test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll2
-rw-r--r--test/CodeGen/Hexagon/vect/vect-loadv4i16.ll2
-rw-r--r--test/CodeGen/Hexagon/vect/vect-shuffle.ll2
-rw-r--r--test/CodeGen/Hexagon/vect/vect-splat.ll2
-rw-r--r--test/CodeGen/Hexagon/vect/vect-xor.ll2
-rw-r--r--test/CodeGen/Inputs/DbgValueOtherTargets.ll8
-rw-r--r--test/CodeGen/MIR/AArch64/cfi-def-cfa.mir31
-rw-r--r--test/CodeGen/MIR/AArch64/expected-target-flag-name.mir23
-rw-r--r--test/CodeGen/MIR/AArch64/invalid-target-flag-name.mir23
-rw-r--r--test/CodeGen/MIR/AArch64/lit.local.cfg8
-rw-r--r--test/CodeGen/MIR/AArch64/multiple-lhs-operands.mir28
-rw-r--r--test/CodeGen/MIR/AArch64/stack-object-local-offset.mir41
-rw-r--r--test/CodeGen/MIR/AArch64/target-flags.mir39
-rw-r--r--test/CodeGen/MIR/AMDGPU/expected-target-index-name.mir64
-rw-r--r--test/CodeGen/MIR/AMDGPU/invalid-target-index-operand.mir64
-rw-r--r--test/CodeGen/MIR/AMDGPU/lit.local.cfg2
-rw-r--r--test/CodeGen/MIR/AMDGPU/target-index-operands.mir104
-rw-r--r--test/CodeGen/MIR/ARM/ARMLoadStoreDBG.mir165
-rw-r--r--test/CodeGen/MIR/ARM/bundled-instructions.mir75
-rw-r--r--test/CodeGen/MIR/ARM/cfi-same-value.mir80
-rw-r--r--test/CodeGen/MIR/ARM/expected-closing-brace.mir50
-rw-r--r--test/CodeGen/MIR/ARM/extraneous-closing-brace-error.mir20
-rw-r--r--test/CodeGen/MIR/ARM/lit.local.cfg2
-rw-r--r--test/CodeGen/MIR/ARM/nested-instruction-bundle-error.mir30
-rw-r--r--test/CodeGen/MIR/ARM/sched-it-debug-nodes.mir160
-rw-r--r--test/CodeGen/MIR/Generic/basic-blocks.mir49
-rw-r--r--test/CodeGen/MIR/Generic/expected-colon-after-basic-block.mir16
-rw-r--r--test/CodeGen/MIR/Generic/expected-mbb-reference-for-successor-mbb.mir28
-rw-r--r--test/CodeGen/MIR/Generic/frame-info.mir89
-rw-r--r--test/CodeGen/MIR/Generic/function-missing-machine-function.mir (renamed from test/CodeGen/MIR/function-missing-machine-function.mir)0
-rw-r--r--test/CodeGen/MIR/Generic/invalid-jump-table-kind.mir53
-rw-r--r--test/CodeGen/MIR/Generic/lit.local.cfg3
-rw-r--r--test/CodeGen/MIR/Generic/llvm-ir-error-reported.mir (renamed from test/CodeGen/MIR/llvm-ir-error-reported.mir)0
-rw-r--r--test/CodeGen/MIR/Generic/llvmIR.mir37
-rw-r--r--test/CodeGen/MIR/Generic/llvmIRMissing.mir9
-rw-r--r--test/CodeGen/MIR/Generic/machine-basic-block-ir-block-reference.mir17
-rw-r--r--test/CodeGen/MIR/Generic/machine-basic-block-redefinition-error.mir18
-rw-r--r--test/CodeGen/MIR/Generic/machine-basic-block-undefined-ir-block.mir15
-rw-r--r--test/CodeGen/MIR/Generic/machine-basic-block-unknown-name.mir18
-rw-r--r--test/CodeGen/MIR/Generic/machine-function-missing-body-error.mir (renamed from test/CodeGen/MIR/machine-function-missing-body-error.mir)0
-rw-r--r--test/CodeGen/MIR/Generic/machine-function-missing-function.mir23
-rw-r--r--test/CodeGen/MIR/Generic/machine-function-missing-name.mir26
-rw-r--r--test/CodeGen/MIR/Generic/machine-function-redefinition-error.mir (renamed from test/CodeGen/MIR/machine-function-redefinition-error.mir)0
-rw-r--r--test/CodeGen/MIR/Generic/machine-function.mir66
-rw-r--r--test/CodeGen/MIR/Generic/register-info.mir40
-rw-r--r--test/CodeGen/MIR/Mips/expected-global-value-or-symbol-after-call-entry.mir41
-rw-r--r--test/CodeGen/MIR/Mips/lit.local.cfg2
-rw-r--r--test/CodeGen/MIR/Mips/memory-operands.mir102
-rw-r--r--test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir24
-rw-r--r--test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir81
-rw-r--r--test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir24
-rw-r--r--test/CodeGen/MIR/NVPTX/lit.local.cfg2
-rw-r--r--test/CodeGen/MIR/PowerPC/lit.local.cfg2
-rw-r--r--test/CodeGen/MIR/PowerPC/unordered-implicit-registers.mir45
-rw-r--r--test/CodeGen/MIR/X86/basic-block-liveins.mir57
-rw-r--r--test/CodeGen/MIR/X86/basic-block-not-at-start-of-line-error.mir41
-rw-r--r--test/CodeGen/MIR/X86/block-address-operands.mir121
-rw-r--r--test/CodeGen/MIR/X86/callee-saved-info.mir95
-rw-r--r--test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir29
-rw-r--r--test/CodeGen/MIR/X86/cfi-def-cfa-register.mir32
-rw-r--r--test/CodeGen/MIR/X86/cfi-offset.mir47
-rw-r--r--test/CodeGen/MIR/X86/constant-pool-item-redefinition-error.mir25
-rw-r--r--test/CodeGen/MIR/X86/constant-pool.mir139
-rw-r--r--test/CodeGen/MIR/X86/constant-value-error.mir25
-rw-r--r--test/CodeGen/MIR/X86/dead-register-flag.mir14
-rw-r--r--test/CodeGen/MIR/X86/def-register-already-tied-error.mir25
-rw-r--r--test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir27
-rw-r--r--test/CodeGen/MIR/X86/duplicate-register-flag-error.mir35
-rw-r--r--test/CodeGen/MIR/X86/early-clobber-register-flag.mir45
-rw-r--r--test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir30
-rw-r--r--test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir30
-rw-r--r--test/CodeGen/MIR/X86/expected-basic-block-at-start-of-body.mir40
-rw-r--r--test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir30
-rw-r--r--test/CodeGen/MIR/X86/expected-comma-after-cfi-register.mir42
-rw-r--r--test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir25
-rw-r--r--test/CodeGen/MIR/X86/expected-different-implicit-operand.mir28
-rw-r--r--test/CodeGen/MIR/X86/expected-different-implicit-register-flag.mir28
-rw-r--r--test/CodeGen/MIR/X86/expected-from-in-memory-operand.mir24
-rw-r--r--test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir30
-rw-r--r--test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir30
-rw-r--r--test/CodeGen/MIR/X86/expected-integer-after-offset-sign.mir24
-rw-r--r--test/CodeGen/MIR/X86/expected-integer-after-tied-def.mir25
-rw-r--r--test/CodeGen/MIR/X86/expected-integer-in-successor-weight.mir38
-rw-r--r--test/CodeGen/MIR/X86/expected-load-or-store-in-memory-operand.mir23
-rw-r--r--test/CodeGen/MIR/X86/expected-machine-operand.mir12
-rw-r--r--test/CodeGen/MIR/X86/expected-metadata-node-after-debug-location.mir59
-rw-r--r--test/CodeGen/MIR/X86/expected-metadata-node-after-exclaim.mir59
-rw-r--r--test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir25
-rw-r--r--test/CodeGen/MIR/X86/expected-named-register-in-allocation-hint.mir29
-rw-r--r--test/CodeGen/MIR/X86/expected-named-register-in-callee-saved-register.mir88
-rw-r--r--test/CodeGen/MIR/X86/expected-named-register-in-functions-livein.mir27
-rw-r--r--test/CodeGen/MIR/X86/expected-named-register-livein.mir15
-rw-r--r--test/CodeGen/MIR/X86/expected-newline-at-end-of-list.mir41
-rw-r--r--test/CodeGen/MIR/X86/expected-number-after-bb.mir28
-rw-r--r--test/CodeGen/MIR/X86/expected-offset-after-cfi-operand.mir27
-rw-r--r--test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir24
-rw-r--r--test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir30
-rw-r--r--test/CodeGen/MIR/X86/expected-register-after-cfi-operand.mir42
-rw-r--r--test/CodeGen/MIR/X86/expected-register-after-flags.mir12
-rw-r--r--test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation.mir24
-rw-r--r--test/CodeGen/MIR/X86/expected-stack-object.mir67
-rw-r--r--test/CodeGen/MIR/X86/expected-subregister-after-colon.mir18
-rw-r--r--test/CodeGen/MIR/X86/expected-target-flag-name.mir24
-rw-r--r--test/CodeGen/MIR/X86/expected-tied-def-after-lparen.mir25
-rw-r--r--test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir24
-rw-r--r--test/CodeGen/MIR/X86/expected-virtual-register-in-functions-livein.mir27
-rw-r--r--test/CodeGen/MIR/X86/external-symbol-operands.mir64
-rw-r--r--test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir39
-rw-r--r--test/CodeGen/MIR/X86/fixed-stack-object-redefinition-error.mir28
-rw-r--r--test/CodeGen/MIR/X86/fixed-stack-objects.mir12
-rw-r--r--test/CodeGen/MIR/X86/frame-info-save-restore-points.mir73
-rw-r--r--test/CodeGen/MIR/X86/frame-info-stack-references.mir79
-rw-r--r--test/CodeGen/MIR/X86/frame-setup-instruction-flag.mir35
-rw-r--r--test/CodeGen/MIR/X86/function-liveins.mir37
-rw-r--r--test/CodeGen/MIR/X86/global-value-operands.mir127
-rw-r--r--test/CodeGen/MIR/X86/immediate-operands.mir28
-rw-r--r--test/CodeGen/MIR/X86/implicit-register-flag.mir65
-rw-r--r--test/CodeGen/MIR/X86/inline-asm-registers.mir54
-rw-r--r--test/CodeGen/MIR/X86/instructions-debug-location.mir98
-rw-r--r--test/CodeGen/MIR/X86/invalid-constant-pool-item.mir25
-rw-r--r--test/CodeGen/MIR/X86/invalid-metadata-node-type.mir53
-rw-r--r--test/CodeGen/MIR/X86/invalid-target-flag-name.mir24
-rw-r--r--test/CodeGen/MIR/X86/invalid-tied-def-index-error.mir25
-rw-r--r--test/CodeGen/MIR/X86/jump-table-info.mir150
-rw-r--r--test/CodeGen/MIR/X86/jump-table-redefinition-error.mir76
-rw-r--r--test/CodeGen/MIR/X86/killed-register-flag.mir38
-rw-r--r--test/CodeGen/MIR/X86/large-cfi-offset-number-error.mir27
-rw-r--r--test/CodeGen/MIR/X86/large-immediate-operand-error.mir18
-rw-r--r--test/CodeGen/MIR/X86/large-index-number-error.mir26
-rw-r--r--test/CodeGen/MIR/X86/large-offset-number-error.mir24
-rw-r--r--test/CodeGen/MIR/X86/large-size-in-memory-operand-error.mir24
-rw-r--r--test/CodeGen/MIR/X86/liveout-register-mask.mir42
-rw-r--r--test/CodeGen/MIR/X86/machine-basic-block-operands.mir68
-rw-r--r--test/CodeGen/MIR/X86/machine-instructions.mir14
-rw-r--r--test/CodeGen/MIR/X86/machine-verifier.mir22
-rw-r--r--test/CodeGen/MIR/X86/memory-operands.mir508
-rw-r--r--test/CodeGen/MIR/X86/metadata-operands.mir63
-rw-r--r--test/CodeGen/MIR/X86/missing-closing-quote.mir22
-rw-r--r--test/CodeGen/MIR/X86/missing-comma.mir12
-rw-r--r--test/CodeGen/MIR/X86/missing-implicit-operand.mir30
-rw-r--r--test/CodeGen/MIR/X86/missing-instruction.mir19
-rw-r--r--test/CodeGen/MIR/X86/named-registers.mir14
-rw-r--r--test/CodeGen/MIR/X86/newline-handling.mir109
-rw-r--r--test/CodeGen/MIR/X86/null-register-operands.mir14
-rw-r--r--test/CodeGen/MIR/X86/register-mask-operands.mir28
-rw-r--r--test/CodeGen/MIR/X86/register-operands-target-flag-error.mir24
-rw-r--r--test/CodeGen/MIR/X86/simple-register-allocation-hints.mir34
-rw-r--r--test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-aliased.mir12
-rw-r--r--test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-immutable.mir12
-rw-r--r--test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir12
-rw-r--r--test/CodeGen/MIR/X86/stack-object-debug-info.mir65
-rw-r--r--test/CodeGen/MIR/X86/stack-object-invalid-name.mir28
-rw-r--r--test/CodeGen/MIR/X86/stack-object-operand-name-mismatch-error.mir33
-rw-r--r--test/CodeGen/MIR/X86/stack-object-operands.mir45
-rw-r--r--test/CodeGen/MIR/X86/stack-object-redefinition-error.mir37
-rw-r--r--test/CodeGen/MIR/X86/stack-objects.mir22
-rw-r--r--test/CodeGen/MIR/X86/standalone-register-error.mir24
-rw-r--r--test/CodeGen/MIR/X86/subregister-operands.mir21
-rw-r--r--test/CodeGen/MIR/X86/successor-basic-blocks-weights.mir42
-rw-r--r--test/CodeGen/MIR/X86/successor-basic-blocks.mir83
-rw-r--r--test/CodeGen/MIR/X86/tied-def-operand-invalid.mir25
-rw-r--r--test/CodeGen/MIR/X86/undef-register-flag.mir26
-rw-r--r--test/CodeGen/MIR/X86/undefined-fixed-stack-object.mir38
-rw-r--r--test/CodeGen/MIR/X86/undefined-global-value.mir16
-rw-r--r--test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir30
-rw-r--r--test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir29
-rw-r--r--test/CodeGen/MIR/X86/undefined-jump-table-id.mir73
-rw-r--r--test/CodeGen/MIR/X86/undefined-named-global-value.mir16
-rw-r--r--test/CodeGen/MIR/X86/undefined-register-class.mir8
-rw-r--r--test/CodeGen/MIR/X86/undefined-stack-object.mir30
-rw-r--r--test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir24
-rw-r--r--test/CodeGen/MIR/X86/undefined-virtual-register.mir14
-rw-r--r--test/CodeGen/MIR/X86/unknown-instruction.mir10
-rw-r--r--test/CodeGen/MIR/X86/unknown-machine-basic-block.mir26
-rw-r--r--test/CodeGen/MIR/X86/unknown-metadata-keyword.mir25
-rw-r--r--test/CodeGen/MIR/X86/unknown-metadata-node.mir59
-rw-r--r--test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir28
-rw-r--r--test/CodeGen/MIR/X86/unknown-register.mir12
-rw-r--r--test/CodeGen/MIR/X86/unknown-subregister-index.mir18
-rw-r--r--test/CodeGen/MIR/X86/unrecognized-character.mir10
-rw-r--r--test/CodeGen/MIR/X86/used-physical-register-info.mir109
-rw-r--r--test/CodeGen/MIR/X86/variable-sized-stack-object-size-error.mir14
-rw-r--r--test/CodeGen/MIR/X86/variable-sized-stack-objects.mir18
-rw-r--r--test/CodeGen/MIR/X86/virtual-register-redefinition-error.mir27
-rw-r--r--test/CodeGen/MIR/X86/virtual-registers.mir90
-rw-r--r--test/CodeGen/MIR/basic-blocks.mir49
-rw-r--r--test/CodeGen/MIR/expected-eof-after-successor-mbb.mir29
-rw-r--r--test/CodeGen/MIR/expected-mbb-reference-for-successor-mbb.mir29
-rw-r--r--test/CodeGen/MIR/frame-info.mir91
-rw-r--r--test/CodeGen/MIR/llvmIR.mir37
-rw-r--r--test/CodeGen/MIR/llvmIRMissing.mir9
-rw-r--r--test/CodeGen/MIR/machine-basic-block-redefinition-error.mir17
-rw-r--r--test/CodeGen/MIR/machine-basic-block-unknown-name.mir19
-rw-r--r--test/CodeGen/MIR/machine-function-missing-function.mir23
-rw-r--r--test/CodeGen/MIR/machine-function-missing-name.mir26
-rw-r--r--test/CodeGen/MIR/machine-function.mir66
-rw-r--r--test/CodeGen/MIR/register-info.mir40
-rw-r--r--test/CodeGen/MIR/successor-basic-blocks.mir58
-rw-r--r--test/CodeGen/Mips/Fast-ISel/check-disabled-mcpus.ll27
-rw-r--r--test/CodeGen/Mips/addi.ll2
-rw-r--r--test/CodeGen/Mips/adjust-callstack-sp.ll2
-rw-r--r--test/CodeGen/Mips/align16.ll2
-rw-r--r--test/CodeGen/Mips/alloca16.ll2
-rw-r--r--test/CodeGen/Mips/and1.ll2
-rw-r--r--test/CodeGen/Mips/asm-large-immediate.ll3
-rw-r--r--test/CodeGen/Mips/atomicops.ll2
-rw-r--r--test/CodeGen/Mips/beqzc.ll2
-rw-r--r--test/CodeGen/Mips/beqzc1.ll2
-rw-r--r--test/CodeGen/Mips/br-jmp.ll4
-rw-r--r--test/CodeGen/Mips/brconeq.ll2
-rw-r--r--test/CodeGen/Mips/brconeqk.ll2
-rw-r--r--test/CodeGen/Mips/brconeqz.ll2
-rw-r--r--test/CodeGen/Mips/brconge.ll2
-rw-r--r--test/CodeGen/Mips/brcongt.ll2
-rw-r--r--test/CodeGen/Mips/brconle.ll2
-rw-r--r--test/CodeGen/Mips/brconlt.ll2
-rw-r--r--test/CodeGen/Mips/brconne.ll2
-rw-r--r--test/CodeGen/Mips/brconnek.ll2
-rw-r--r--test/CodeGen/Mips/brconnez.ll2
-rw-r--r--test/CodeGen/Mips/brind.ll2
-rw-r--r--test/CodeGen/Mips/brsize3.ll4
-rw-r--r--test/CodeGen/Mips/brsize3a.ll2
-rw-r--r--test/CodeGen/Mips/cconv/arguments-varargs.ll72
-rw-r--r--test/CodeGen/Mips/ci2.ll2
-rw-r--r--test/CodeGen/Mips/cmplarge.ll2
-rw-r--r--test/CodeGen/Mips/const1.ll2
-rw-r--r--test/CodeGen/Mips/const4a.ll2
-rw-r--r--test/CodeGen/Mips/const6.ll4
-rw-r--r--test/CodeGen/Mips/const6a.ll4
-rw-r--r--test/CodeGen/Mips/div.ll2
-rw-r--r--test/CodeGen/Mips/div_rem.ll2
-rw-r--r--test/CodeGen/Mips/divu.ll2
-rw-r--r--test/CodeGen/Mips/divu_remu.ll2
-rw-r--r--test/CodeGen/Mips/eh.ll2
-rw-r--r--test/CodeGen/Mips/emergency-spill-slot-near-fp.ll4
-rw-r--r--test/CodeGen/Mips/emutls_generic.ll70
-rw-r--r--test/CodeGen/Mips/ex2.ll2
-rw-r--r--test/CodeGen/Mips/extins.ll2
-rw-r--r--test/CodeGen/Mips/f16abs.ll2
-rw-r--r--test/CodeGen/Mips/fixdfsf.ll4
-rw-r--r--test/CodeGen/Mips/fp16instrinsmc.ll4
-rw-r--r--test/CodeGen/Mips/fp16mix.ll6
-rw-r--r--test/CodeGen/Mips/fp16static.ll2
-rw-r--r--test/CodeGen/Mips/helloworld.ll12
-rw-r--r--test/CodeGen/Mips/hf16_1.ll4
-rw-r--r--test/CodeGen/Mips/hf16call32.ll408
-rw-r--r--test/CodeGen/Mips/hf16call32_body.ll206
-rw-r--r--test/CodeGen/Mips/hf1_body.ll18
-rw-r--r--test/CodeGen/Mips/hfptrcall.ll2
-rw-r--r--test/CodeGen/Mips/i32k.ll2
-rw-r--r--test/CodeGen/Mips/inlineasm-assembler-directives.ll4
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll36
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll4
-rw-r--r--test/CodeGen/Mips/inlineasm-operand-code.ll185
-rw-r--r--test/CodeGen/Mips/inlineasm_constraint.ll94
-rw-r--r--test/CodeGen/Mips/inlineasmmemop.ll8
-rw-r--r--test/CodeGen/Mips/insn-zero-size-bb.ll4
-rw-r--r--test/CodeGen/Mips/interrupt-attr-64-error.ll9
-rw-r--r--test/CodeGen/Mips/interrupt-attr-args-error.ll9
-rw-r--r--test/CodeGen/Mips/interrupt-attr-error.ll9
-rw-r--r--test/CodeGen/Mips/interrupt-attr.ll244
-rw-r--r--test/CodeGen/Mips/jtstat.ll2
-rw-r--r--test/CodeGen/Mips/l3mc.ll20
-rw-r--r--test/CodeGen/Mips/lb1.ll2
-rw-r--r--test/CodeGen/Mips/lbu1.ll2
-rw-r--r--test/CodeGen/Mips/lcb2.ll4
-rw-r--r--test/CodeGen/Mips/lcb3c.ll2
-rw-r--r--test/CodeGen/Mips/lcb4a.ll2
-rw-r--r--test/CodeGen/Mips/lcb5.ll2
-rw-r--r--test/CodeGen/Mips/lh1.ll2
-rw-r--r--test/CodeGen/Mips/lhu1.ll2
-rw-r--r--test/CodeGen/Mips/llcarry.ll2
-rw-r--r--test/CodeGen/Mips/llvm-ir/atomicrmx.ll26
-rw-r--r--test/CodeGen/Mips/llvm-ir/call.ll14
-rw-r--r--test/CodeGen/Mips/llvm-ir/load-atomic.ll42
-rw-r--r--test/CodeGen/Mips/llvm-ir/sqrt.ll13
-rw-r--r--test/CodeGen/Mips/llvm-ir/store-atomic.ll42
-rw-r--r--test/CodeGen/Mips/madd-msub.ll2
-rw-r--r--test/CodeGen/Mips/mbrsize4a.ll2
-rw-r--r--test/CodeGen/Mips/mips16-hf-attr-2.ll2
-rw-r--r--test/CodeGen/Mips/mips16-hf-attr.ll2
-rw-r--r--test/CodeGen/Mips/mips16_32_1.ll2
-rw-r--r--test/CodeGen/Mips/mips16_32_10.ll2
-rw-r--r--test/CodeGen/Mips/mips16_32_3.ll2
-rw-r--r--test/CodeGen/Mips/mips16_32_4.ll2
-rw-r--r--test/CodeGen/Mips/mips16_32_5.ll2
-rw-r--r--test/CodeGen/Mips/mips16_32_6.ll2
-rw-r--r--test/CodeGen/Mips/mips16_32_7.ll2
-rw-r--r--test/CodeGen/Mips/mips16_fpret.ll8
-rw-r--r--test/CodeGen/Mips/mips16ex.ll2
-rw-r--r--test/CodeGen/Mips/mips16fpe.ll6
-rw-r--r--test/CodeGen/Mips/misha.ll2
-rw-r--r--test/CodeGen/Mips/msa/elm_copy.ll5
-rw-r--r--test/CodeGen/Mips/mul.ll2
-rw-r--r--test/CodeGen/Mips/mulll.ll2
-rw-r--r--test/CodeGen/Mips/mulull.ll2
-rw-r--r--test/CodeGen/Mips/nacl-align.ll7
-rw-r--r--test/CodeGen/Mips/neg1.ll2
-rw-r--r--test/CodeGen/Mips/no-odd-spreg-msa.ll24
-rw-r--r--test/CodeGen/Mips/nomips16.ll2
-rw-r--r--test/CodeGen/Mips/not1.ll2
-rw-r--r--test/CodeGen/Mips/null.ll2
-rw-r--r--test/CodeGen/Mips/or1.ll2
-rw-r--r--test/CodeGen/Mips/powif64_16.ll2
-rw-r--r--test/CodeGen/Mips/rem.ll2
-rw-r--r--test/CodeGen/Mips/remu.ll2
-rw-r--r--test/CodeGen/Mips/s2rem.ll4
-rw-r--r--test/CodeGen/Mips/sb1.ll2
-rw-r--r--test/CodeGen/Mips/sel1c.ll2
-rw-r--r--test/CodeGen/Mips/sel2c.ll2
-rw-r--r--test/CodeGen/Mips/selTBteqzCmpi.ll2
-rw-r--r--test/CodeGen/Mips/selTBtnezCmpi.ll2
-rw-r--r--test/CodeGen/Mips/selTBtnezSlti.ll2
-rw-r--r--test/CodeGen/Mips/seleq.ll2
-rw-r--r--test/CodeGen/Mips/seleqk.ll2
-rw-r--r--test/CodeGen/Mips/selgek.ll2
-rw-r--r--test/CodeGen/Mips/selgt.ll2
-rw-r--r--test/CodeGen/Mips/selle.ll2
-rw-r--r--test/CodeGen/Mips/selltk.ll2
-rw-r--r--test/CodeGen/Mips/selne.ll2
-rw-r--r--test/CodeGen/Mips/selnek.ll2
-rw-r--r--test/CodeGen/Mips/selpat.ll2
-rw-r--r--test/CodeGen/Mips/seteq.ll2
-rw-r--r--test/CodeGen/Mips/seteqz.ll2
-rw-r--r--test/CodeGen/Mips/setge.ll2
-rw-r--r--test/CodeGen/Mips/setgek.ll2
-rw-r--r--test/CodeGen/Mips/setle.ll2
-rw-r--r--test/CodeGen/Mips/setlt.ll2
-rw-r--r--test/CodeGen/Mips/setltk.ll2
-rw-r--r--test/CodeGen/Mips/setne.ll2
-rw-r--r--test/CodeGen/Mips/setuge.ll2
-rw-r--r--test/CodeGen/Mips/setugt.ll2
-rw-r--r--test/CodeGen/Mips/setule.ll2
-rw-r--r--test/CodeGen/Mips/setult.ll2
-rw-r--r--test/CodeGen/Mips/setultk.ll2
-rw-r--r--test/CodeGen/Mips/sh1.ll2
-rw-r--r--test/CodeGen/Mips/simplebr.ll2
-rw-r--r--test/CodeGen/Mips/sitofp-selectcc-opt.ll3
-rw-r--r--test/CodeGen/Mips/sll1.ll2
-rw-r--r--test/CodeGen/Mips/sll2.ll2
-rw-r--r--test/CodeGen/Mips/sr1.ll4
-rw-r--r--test/CodeGen/Mips/sra1.ll2
-rw-r--r--test/CodeGen/Mips/sra2.ll2
-rw-r--r--test/CodeGen/Mips/srl1.ll2
-rw-r--r--test/CodeGen/Mips/srl2.ll2
-rw-r--r--test/CodeGen/Mips/stchar.ll4
-rw-r--r--test/CodeGen/Mips/stldst.ll2
-rw-r--r--test/CodeGen/Mips/sub1.ll2
-rw-r--r--test/CodeGen/Mips/sub2.ll2
-rw-r--r--test/CodeGen/Mips/tail16.ll2
-rw-r--r--test/CodeGen/Mips/tailcall.ll2
-rw-r--r--test/CodeGen/Mips/tls-alias.ll2
-rw-r--r--test/CodeGen/Mips/tls16.ll2
-rw-r--r--test/CodeGen/Mips/tls16_2.ll2
-rw-r--r--test/CodeGen/Mips/trap1.ll2
-rw-r--r--test/CodeGen/Mips/ul1.ll2
-rw-r--r--test/CodeGen/Mips/xor1.ll2
-rw-r--r--test/CodeGen/NVPTX/branch-fold.ll40
-rw-r--r--test/CodeGen/NVPTX/bypass-div.ll80
-rw-r--r--test/CodeGen/NVPTX/combine-min-max.ll307
-rw-r--r--test/CodeGen/NVPTX/fma-assoc.ll13
-rw-r--r--test/CodeGen/NVPTX/global-addrspace.ll12
-rw-r--r--test/CodeGen/NVPTX/load-with-non-coherent-cache.ll264
-rw-r--r--test/CodeGen/NVPTX/lower-aggr-copies.ll118
-rw-r--r--test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll20
-rw-r--r--test/CodeGen/NVPTX/reg-copy.ll224
-rw-r--r--test/CodeGen/NVPTX/symbol-naming.ll4
-rw-r--r--test/CodeGen/NVPTX/vector-call.ll2
-rw-r--r--test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll1
-rw-r--r--test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll1
-rw-r--r--test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll1
-rw-r--r--test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll1
-rw-r--r--test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll1
-rw-r--r--test/CodeGen/PowerPC/BoolRetToIntTest.ll203
-rw-r--r--test/CodeGen/PowerPC/BreakableToken-reduced.ll335
-rw-r--r--test/CodeGen/PowerPC/aantidep-def-ec.mir117
-rw-r--r--test/CodeGen/PowerPC/aantidep-inline-asm-use.ll305
-rw-r--r--test/CodeGen/PowerPC/addisdtprelha-nonr3.mir80
-rw-r--r--test/CodeGen/PowerPC/alias.ll4
-rw-r--r--test/CodeGen/PowerPC/bitcasts-direct-move.ll83
-rw-r--r--test/CodeGen/PowerPC/bitreverse.ll23
-rw-r--r--test/CodeGen/PowerPC/branch-hint.ll135
-rw-r--r--test/CodeGen/PowerPC/coal-sections.ll24
-rw-r--r--test/CodeGen/PowerPC/crbit-asm-disabled.ll16
-rw-r--r--test/CodeGen/PowerPC/crbit-asm.ll3
-rw-r--r--test/CodeGen/PowerPC/cttz.ll2
-rw-r--r--test/CodeGen/PowerPC/dbg.ll10
-rw-r--r--test/CodeGen/PowerPC/dyn-alloca-offset.ll21
-rw-r--r--test/CodeGen/PowerPC/e500-1.ll30
-rw-r--r--test/CodeGen/PowerPC/emutls_generic.ll41
-rw-r--r--test/CodeGen/PowerPC/fast-isel-binary.ll26
-rw-r--r--test/CodeGen/PowerPC/fast-isel-br-const.ll2
-rw-r--r--test/CodeGen/PowerPC/fast-isel-call.ll14
-rw-r--r--test/CodeGen/PowerPC/fast-isel-cmp-imm.ll34
-rw-r--r--test/CodeGen/PowerPC/fast-isel-const.ll2
-rw-r--r--test/CodeGen/PowerPC/fast-isel-conversion-p5.ll20
-rw-r--r--test/CodeGen/PowerPC/fast-isel-conversion.ll48
-rw-r--r--test/CodeGen/PowerPC/fast-isel-crash.ll4
-rw-r--r--test/CodeGen/PowerPC/fast-isel-ext.ll20
-rw-r--r--test/CodeGen/PowerPC/fast-isel-fold.ll26
-rw-r--r--test/CodeGen/PowerPC/fast-isel-indirectbr.ll2
-rw-r--r--test/CodeGen/PowerPC/fast-isel-load-store.ll34
-rw-r--r--test/CodeGen/PowerPC/fast-isel-redefinition.ll2
-rw-r--r--test/CodeGen/PowerPC/fast-isel-ret.ll52
-rw-r--r--test/CodeGen/PowerPC/fast-isel-shifter.ll12
-rw-r--r--test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll2
-rw-r--r--test/CodeGen/PowerPC/fma-mutate-register-constraint.ll89
-rw-r--r--test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll24
-rw-r--r--test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll137
-rw-r--r--test/CodeGen/PowerPC/load-shift-combine.ll1
-rw-r--r--test/CodeGen/PowerPC/long-compare.ll2
-rw-r--r--test/CodeGen/PowerPC/machine-combiner.ll188
-rw-r--r--test/CodeGen/PowerPC/mc-instrlat.ll25
-rw-r--r--test/CodeGen/PowerPC/mcm-13.ll27
-rw-r--r--test/CodeGen/PowerPC/memcpy-vec.ll7
-rw-r--r--test/CodeGen/PowerPC/merge-st-chain-op.ll41
-rw-r--r--test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll1476
-rw-r--r--test/CodeGen/PowerPC/peephole-align.ll335
-rw-r--r--test/CodeGen/PowerPC/ppc-shrink-wrapping.ll784
-rw-r--r--test/CodeGen/PowerPC/ppc32-i1-vaarg.ll2
-rw-r--r--test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll8
-rw-r--r--test/CodeGen/PowerPC/ppcsoftops.ll50
-rw-r--r--test/CodeGen/PowerPC/pr17168.ll366
-rw-r--r--test/CodeGen/PowerPC/pr24546.ll22
-rw-r--r--test/CodeGen/PowerPC/pr24636.ll41
-rw-r--r--test/CodeGen/PowerPC/pr25157-peephole.ll61
-rw-r--r--test/CodeGen/PowerPC/preincprep-nontrans-crash.ll94
-rw-r--r--test/CodeGen/PowerPC/qpx-unal-cons-lds.ll217
-rw-r--r--test/CodeGen/PowerPC/retaddr2.ll6
-rw-r--r--test/CodeGen/PowerPC/rm-zext.ll6
-rw-r--r--test/CodeGen/PowerPC/rotl-rotr-crash.ll12
-rw-r--r--test/CodeGen/PowerPC/sdiv-pow2.ll8
-rw-r--r--test/CodeGen/PowerPC/selectiondag-extload-computeknownbits.ll12
-rw-r--r--test/CodeGen/PowerPC/seteq-0.ll2
-rw-r--r--test/CodeGen/PowerPC/sjlj.ll20
-rw-r--r--test/CodeGen/PowerPC/stack-realign.ll26
-rw-r--r--test/CodeGen/PowerPC/stackmap-frame-setup.ll20
-rw-r--r--test/CodeGen/PowerPC/swaps-le-5.ll4
-rw-r--r--test/CodeGen/PowerPC/swaps-le-6.ll42
-rw-r--r--test/CodeGen/PowerPC/unal-vec-ldst.ll580
-rw-r--r--test/CodeGen/PowerPC/unal-vec-negarith.ll17
-rw-r--r--test/CodeGen/PowerPC/unwind-dw2-g.ll6
-rw-r--r--test/CodeGen/PowerPC/variable_elem_vec_extracts.ll114
-rw-r--r--test/CodeGen/PowerPC/vec-asm-disabled.ll14
-rw-r--r--test/CodeGen/PowerPC/vec_add_sub_quadword.ll6
-rw-r--r--test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll28
-rw-r--r--test/CodeGen/PowerPC/vsx.ll5
-rw-r--r--test/CodeGen/PowerPC/vsx_insert_extract_le.ll6
-rw-r--r--test/CodeGen/PowerPC/vsx_scalar_ld_st.ll6
-rw-r--r--test/CodeGen/PowerPC/vsx_shuffle_le.ll20
-rw-r--r--test/CodeGen/SPARC/2011-01-22-SRet.ll2
-rw-r--r--test/CodeGen/SPARC/32abi.ll191
-rw-r--r--test/CodeGen/SPARC/64abi.ll84
-rw-r--r--test/CodeGen/SPARC/basictest.ll21
-rw-r--r--test/CodeGen/SPARC/float-constants.ll41
-rw-r--r--test/CodeGen/SPARC/float.ll10
-rw-r--r--test/CodeGen/SPARC/fp128.ll4
-rw-r--r--test/CodeGen/SPARC/inlineasm.ll53
-rw-r--r--test/CodeGen/SPARC/missing-sret.ll9
-rw-r--r--test/CodeGen/SPARC/reserved-regs.ll135
-rw-r--r--test/CodeGen/SPARC/select-mask.ll17
-rw-r--r--test/CodeGen/SPARC/spill.ll64
-rw-r--r--test/CodeGen/SPARC/stack-align.ll22
-rw-r--r--test/CodeGen/SPARC/tls.ll2
-rw-r--r--test/CodeGen/SPARC/varargs.ll2
-rw-r--r--test/CodeGen/SystemZ/alloca-03.ll84
-rw-r--r--test/CodeGen/SystemZ/alloca-04.ll14
-rw-r--r--test/CodeGen/SystemZ/args-01.ll4
-rw-r--r--test/CodeGen/SystemZ/args-02.ll4
-rw-r--r--test/CodeGen/SystemZ/args-03.ll4
-rw-r--r--test/CodeGen/SystemZ/args-04.ll2
-rw-r--r--test/CodeGen/SystemZ/args-07.ll2
-rw-r--r--test/CodeGen/SystemZ/asm-17.ll3
-rw-r--r--test/CodeGen/SystemZ/asm-18.ll3
-rw-r--r--test/CodeGen/SystemZ/dag-combine-01.ll97
-rw-r--r--test/CodeGen/SystemZ/fp-abs-01.ll4
-rw-r--r--test/CodeGen/SystemZ/fp-abs-02.ll4
-rw-r--r--test/CodeGen/SystemZ/fp-add-02.ll2
-rw-r--r--test/CodeGen/SystemZ/fp-cmp-02.ll5
-rw-r--r--test/CodeGen/SystemZ/fp-cmp-05.ll80
-rw-r--r--test/CodeGen/SystemZ/fp-const-02.ll4
-rw-r--r--test/CodeGen/SystemZ/fp-libcall.ll273
-rw-r--r--test/CodeGen/SystemZ/fp-move-05.ll2
-rw-r--r--test/CodeGen/SystemZ/fp-neg-01.ll4
-rw-r--r--test/CodeGen/SystemZ/fp-sincos-01.ll56
-rw-r--r--test/CodeGen/SystemZ/insert-05.ll4
-rw-r--r--test/CodeGen/SystemZ/int-cmp-44.ll3
-rw-r--r--test/CodeGen/SystemZ/int-cmp-51.ll34
-rw-r--r--test/CodeGen/SystemZ/int-cmp-52.ll24
-rw-r--r--test/CodeGen/SystemZ/memchr-01.ll2
-rw-r--r--test/CodeGen/SystemZ/spill-01.ll2
-rw-r--r--test/CodeGen/SystemZ/vec-args-04.ll26
-rw-r--r--test/CodeGen/SystemZ/vec-args-05.ll10
-rw-r--r--test/CodeGen/SystemZ/vec-perm-12.ll43
-rw-r--r--test/CodeGen/SystemZ/vec-perm-13.ll38
-rw-r--r--test/CodeGen/SystemZ/xor-01.ll2
-rw-r--r--test/CodeGen/Thumb/2010-07-15-debugOrdering.ll14
-rw-r--r--test/CodeGen/Thumb/cortex-m0-unaligned-access.ll2
-rw-r--r--test/CodeGen/Thumb/large-stack.ll20
-rw-r--r--test/CodeGen/Thumb/ldm-stm-base-materialization-thumb2.ll93
-rw-r--r--test/CodeGen/Thumb/ldm-stm-base-materialization.ll77
-rw-r--r--test/CodeGen/Thumb/pop.ll4
-rw-r--r--test/CodeGen/Thumb/segmented-stacks.ll24
-rw-r--r--test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll36
-rw-r--r--test/CodeGen/Thumb/thumb-shrink-wrapping.ll691
-rw-r--r--test/CodeGen/Thumb/vargs.ll6
-rw-r--r--test/CodeGen/Thumb2/crash.ll14
-rw-r--r--test/CodeGen/Thumb2/emit-unwinding.ll11
-rw-r--r--test/CodeGen/Thumb2/float-cmp.ll44
-rw-r--r--test/CodeGen/Thumb2/float-intrinsics-double.ll11
-rw-r--r--test/CodeGen/Thumb2/float-intrinsics-float.ll4
-rw-r--r--test/CodeGen/Thumb2/ifcvt-compare.ll6
-rw-r--r--test/CodeGen/Thumb2/machine-licm.ll8
-rw-r--r--test/CodeGen/Thumb2/pic-load.ll12
-rw-r--r--test/CodeGen/Thumb2/setjmp_longjmp.ll89
-rw-r--r--test/CodeGen/Thumb2/thumb2-ifcvt1.ll14
-rw-r--r--test/CodeGen/Thumb2/thumb2-ifcvt2.ll4
-rw-r--r--test/CodeGen/Thumb2/thumb2-mulhi.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-smla.ll4
-rw-r--r--test/CodeGen/Thumb2/thumb2-smul.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-spill-q.ll28
-rw-r--r--test/CodeGen/Thumb2/thumb2-uxt_rot.ll8
-rw-r--r--test/CodeGen/Thumb2/v8_IT_1.ll4
-rw-r--r--test/CodeGen/Thumb2/v8_IT_3.ll5
-rw-r--r--test/CodeGen/Thumb2/v8_IT_5.ll4
-rw-r--r--test/CodeGen/WebAssembly/call.ll127
-rw-r--r--test/CodeGen/WebAssembly/cfg-stackify.ll1102
-rw-r--r--test/CodeGen/WebAssembly/comparisons_f32.ll181
-rw-r--r--test/CodeGen/WebAssembly/comparisons_f64.ll181
-rw-r--r--test/CodeGen/WebAssembly/comparisons_i32.ll98
-rw-r--r--test/CodeGen/WebAssembly/comparisons_i64.ll98
-rw-r--r--test/CodeGen/WebAssembly/conv.ll255
-rw-r--r--test/CodeGen/WebAssembly/copysign-casts.ll28
-rw-r--r--test/CodeGen/WebAssembly/cpus.ll17
-rw-r--r--test/CodeGen/WebAssembly/dead-vreg.ll51
-rw-r--r--test/CodeGen/WebAssembly/f32.ll154
-rw-r--r--test/CodeGen/WebAssembly/f64.ll154
-rw-r--r--test/CodeGen/WebAssembly/fast-isel.ll20
-rw-r--r--test/CodeGen/WebAssembly/frem.ll26
-rw-r--r--test/CodeGen/WebAssembly/func.ll62
-rw-r--r--test/CodeGen/WebAssembly/global.ll177
-rw-r--r--test/CodeGen/WebAssembly/globl.ll10
-rw-r--r--test/CodeGen/WebAssembly/i32.ll190
-rw-r--r--test/CodeGen/WebAssembly/i64.ll190
-rw-r--r--test/CodeGen/WebAssembly/ident.ll12
-rw-r--r--test/CodeGen/WebAssembly/immediates.ll198
-rw-r--r--test/CodeGen/WebAssembly/inline-asm.ll94
-rw-r--r--test/CodeGen/WebAssembly/legalize.ll62
-rw-r--r--test/CodeGen/WebAssembly/load-ext.ll96
-rw-r--r--test/CodeGen/WebAssembly/load-store-i1.ll68
-rw-r--r--test/CodeGen/WebAssembly/load.ll46
-rw-r--r--test/CodeGen/WebAssembly/loop-idiom.ll53
-rw-r--r--test/CodeGen/WebAssembly/memory-addr32.ll27
-rw-r--r--test/CodeGen/WebAssembly/memory-addr64.ll27
-rw-r--r--test/CodeGen/WebAssembly/offset-folding.ll48
-rw-r--r--test/CodeGen/WebAssembly/offset.ll185
-rw-r--r--test/CodeGen/WebAssembly/phi.ll47
-rw-r--r--test/CodeGen/WebAssembly/reg-stackify.ll126
-rw-r--r--test/CodeGen/WebAssembly/return-int32.ll10
-rw-r--r--test/CodeGen/WebAssembly/return-void.ll10
-rw-r--r--test/CodeGen/WebAssembly/returned.ll49
-rw-r--r--test/CodeGen/WebAssembly/select.ll135
-rw-r--r--test/CodeGen/WebAssembly/signext-zeroext.ll60
-rw-r--r--test/CodeGen/WebAssembly/store-results.ll61
-rw-r--r--test/CodeGen/WebAssembly/store-trunc.ll46
-rw-r--r--test/CodeGen/WebAssembly/store.ll42
-rw-r--r--test/CodeGen/WebAssembly/switch.ll174
-rw-r--r--test/CodeGen/WebAssembly/unreachable.ll34
-rw-r--r--test/CodeGen/WebAssembly/unused-argument.ll31
-rw-r--r--test/CodeGen/WebAssembly/userstack.ll81
-rw-r--r--test/CodeGen/WebAssembly/varargs.ll123
-rw-r--r--test/CodeGen/WebAssembly/vtable.ll171
-rw-r--r--test/CodeGen/WinEH/cppeh-alloca-sink.ll180
-rw-r--r--test/CodeGen/WinEH/cppeh-catch-all-win32.ll86
-rw-r--r--test/CodeGen/WinEH/cppeh-catch-all.ll97
-rw-r--r--test/CodeGen/WinEH/cppeh-catch-and-throw.ll143
-rw-r--r--test/CodeGen/WinEH/cppeh-catch-scalar.ll126
-rw-r--r--test/CodeGen/WinEH/cppeh-catch-unwind.ll240
-rw-r--r--test/CodeGen/WinEH/cppeh-cleanup-invoke.ll91
-rw-r--r--test/CodeGen/WinEH/cppeh-demote-liveout.ll72
-rw-r--r--test/CodeGen/WinEH/cppeh-frame-vars.ll272
-rw-r--r--test/CodeGen/WinEH/cppeh-inalloca.ll194
-rw-r--r--test/CodeGen/WinEH/cppeh-min-unwind.ll99
-rw-r--r--test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll106
-rw-r--r--test/CodeGen/WinEH/cppeh-multi-catch.ll226
-rw-r--r--test/CodeGen/WinEH/cppeh-nested-1.ll194
-rw-r--r--test/CodeGen/WinEH/cppeh-nested-2.ll324
-rw-r--r--test/CodeGen/WinEH/cppeh-nested-3.ll260
-rw-r--r--test/CodeGen/WinEH/cppeh-nested-rethrow.ll212
-rw-r--r--test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll278
-rw-r--r--test/CodeGen/WinEH/cppeh-prepared-catch-all.ll47
-rw-r--r--test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll165
-rw-r--r--test/CodeGen/WinEH/cppeh-prepared-catch.ll232
-rw-r--r--test/CodeGen/WinEH/cppeh-prepared-cleanups.ll245
-rw-r--r--test/CodeGen/WinEH/cppeh-shared-empty-catch.ll110
-rw-r--r--test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll394
-rw-r--r--test/CodeGen/WinEH/cppeh-state-calc-1.ll289
-rw-r--r--test/CodeGen/WinEH/seh-catch-all.ll59
-rw-r--r--test/CodeGen/WinEH/seh-exception-code.ll66
-rw-r--r--test/CodeGen/WinEH/seh-exception-code2.ll91
-rw-r--r--test/CodeGen/WinEH/seh-inlined-finally.ll83
-rw-r--r--test/CodeGen/WinEH/seh-outlined-finally-win32.ll172
-rw-r--r--test/CodeGen/WinEH/seh-outlined-finally.ll155
-rw-r--r--test/CodeGen/WinEH/seh-prepared-basic.ll83
-rw-r--r--test/CodeGen/WinEH/seh-resume-phi.ll66
-rw-r--r--test/CodeGen/WinEH/seh-simple.ll233
-rw-r--r--test/CodeGen/WinEH/wineh-cloning.ll391
-rw-r--r--test/CodeGen/WinEH/wineh-demotion.ll356
-rw-r--r--test/CodeGen/WinEH/wineh-intrinsics-invalid.ll26
-rw-r--r--test/CodeGen/WinEH/wineh-intrinsics.ll44
-rw-r--r--test/CodeGen/WinEH/wineh-no-demotion.ll130
-rw-r--r--test/CodeGen/WinEH/wineh-statenumbering-cleanups.ll62
-rw-r--r--test/CodeGen/WinEH/wineh-statenumbering.ll148
-rw-r--r--test/CodeGen/X86/2006-10-02-BoolRetCrash.ll1
-rw-r--r--test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll4
-rw-r--r--test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll2
-rw-r--r--test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll2
-rw-r--r--test/CodeGen/X86/2008-03-14-SpillerCrash.ll2
-rw-r--r--test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll8
-rw-r--r--test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll15
-rw-r--r--test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll8
-rw-r--r--test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll14
-rw-r--r--test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll8
-rw-r--r--test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll1
-rw-r--r--test/CodeGen/X86/2009-06-06-ConcatVectors.ll1
-rw-r--r--test/CodeGen/X86/2009-10-16-Scope.ll6
-rw-r--r--test/CodeGen/X86/2010-01-18-DbgValue.ll8
-rw-r--r--test/CodeGen/X86/2010-02-01-DbgValueCrash.ll8
-rw-r--r--test/CodeGen/X86/2010-05-25-DotDebugLoc.ll22
-rw-r--r--test/CodeGen/X86/2010-05-26-DotDebugLoc.ll20
-rw-r--r--test/CodeGen/X86/2010-05-28-Crash.ll18
-rw-r--r--test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll28
-rw-r--r--test/CodeGen/X86/2010-07-06-DbgCrash.ll7
-rw-r--r--test/CodeGen/X86/2010-08-04-StackVariable.ll24
-rw-r--r--test/CodeGen/X86/2010-09-16-EmptyFilename.ll10
-rw-r--r--test/CodeGen/X86/2010-11-02-DbgParameter.ll8
-rw-r--r--test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll20
-rw-r--r--test/CodeGen/X86/2011-10-21-widen-cmp.ll42
-rw-r--r--test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll13
-rw-r--r--test/CodeGen/X86/2011-20-21-zext-ui2fp.ll14
-rw-r--r--test/CodeGen/X86/2012-01-12-extract-sv.ll28
-rw-r--r--test/CodeGen/X86/2012-08-17-legalizer-crash.ll3
-rw-r--r--test/CodeGen/X86/2012-1-10-buildvector.ll1
-rw-r--r--test/CodeGen/X86/2012-11-30-handlemove-dbg.ll8
-rw-r--r--test/CodeGen/X86/2012-11-30-misched-dbg.ll16
-rw-r--r--test/CodeGen/X86/2012-11-30-regpres-dbg.ll8
-rw-r--r--test/CodeGen/X86/3dnow-intrinsics.ll4
-rw-r--r--test/CodeGen/X86/GC/alloc_loop.ll1
-rw-r--r--test/CodeGen/X86/GC/cg-O0.ll1
-rw-r--r--test/CodeGen/X86/GC/dynamic-frame-size.ll10
-rw-r--r--test/CodeGen/X86/GC/lower_gcroot.ll1
-rw-r--r--test/CodeGen/X86/MachineBranchProb.ll4
-rw-r--r--test/CodeGen/X86/MachineSink-DbgValue.ll12
-rw-r--r--test/CodeGen/X86/MergeConsecutiveStores.ll37
-rw-r--r--test/CodeGen/X86/StackColoring-dbg.ll6
-rw-r--r--test/CodeGen/X86/add-nsw-sext.ll168
-rw-r--r--test/CodeGen/X86/aliases.ll26
-rw-r--r--test/CodeGen/X86/and-encoding.ll41
-rw-r--r--test/CodeGen/X86/atomic-flags.ll61
-rw-r--r--test/CodeGen/X86/atomic-minmax-i6432.ll8
-rw-r--r--test/CodeGen/X86/atomic-non-integer.ll108
-rw-r--r--test/CodeGen/X86/atomic128.ll52
-rw-r--r--test/CodeGen/X86/atomic_mi.ll662
-rw-r--r--test/CodeGen/X86/avg.ll724
-rw-r--r--test/CodeGen/X86/avx-cvt-2.ll1
-rw-r--r--test/CodeGen/X86/avx-cvt.ll6
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll66
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll685
-rw-r--r--test/CodeGen/X86/avx-isa-check.ll570
-rw-r--r--test/CodeGen/X86/avx-load-store.ll4
-rw-r--r--test/CodeGen/X86/avx-logic.ll2
-rw-r--r--test/CodeGen/X86/avx-shift.ll1
-rwxr-xr-xtest/CodeGen/X86/avx-shuffle-x86_32.ll26
-rw-r--r--test/CodeGen/X86/avx-splat.ll114
-rw-r--r--test/CodeGen/X86/avx-vbroadcast.ll261
-rw-r--r--test/CodeGen/X86/avx-vperm2x128.ll44
-rw-r--r--test/CodeGen/X86/avx-win64.ll2
-rw-r--r--test/CodeGen/X86/avx.ll6
-rwxr-xr-xtest/CodeGen/X86/avx2-conversions.ll131
-rw-r--r--test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll120
-rw-r--r--test/CodeGen/X86/avx2-intrinsics-x86.ll94
-rw-r--r--test/CodeGen/X86/avx2-nontemporal.ll17
-rw-r--r--test/CodeGen/X86/avx2-vbroadcast.ll441
-rw-r--r--test/CodeGen/X86/avx512-arith.ll358
-rw-r--r--test/CodeGen/X86/avx512-bugfix-25270.ll35
-rw-r--r--test/CodeGen/X86/avx512-build-vector.ll1
-rw-r--r--test/CodeGen/X86/avx512-calling-conv.ll481
-rw-r--r--test/CodeGen/X86/avx512-cvt.ll119
-rw-r--r--test/CodeGen/X86/avx512-ext.ll1835
-rw-r--r--test/CodeGen/X86/avx512-extract-subvector.ll56
-rw-r--r--test/CodeGen/X86/avx512-fma.ll155
-rw-r--r--test/CodeGen/X86/avx512-gather-scatter-intrin.ll185
-rw-r--r--test/CodeGen/X86/avx512-insert-extract.ll519
-rw-r--r--test/CodeGen/X86/avx512-intrinsics.ll4961
-rw-r--r--test/CodeGen/X86/avx512-logic.ll164
-rw-r--r--test/CodeGen/X86/avx512-mask-op.ll1472
-rw-r--r--test/CodeGen/X86/avx512-skx-insert-subvec.ll135
-rw-r--r--test/CodeGen/X86/avx512-trunc-ext.ll961
-rw-r--r--test/CodeGen/X86/avx512-trunc.ll488
-rw-r--r--test/CodeGen/X86/avx512-vbroadcast.ll262
-rw-r--r--test/CodeGen/X86/avx512-vec-cmp.ll27
-rw-r--r--test/CodeGen/X86/avx512bw-intrinsics.ll2674
-rw-r--r--test/CodeGen/X86/avx512bwvl-intrinsics.ll748
-rw-r--r--test/CodeGen/X86/avx512cd-intrinsics.ll18
-rw-r--r--test/CodeGen/X86/avx512cdvl-intrinsics.ll179
-rw-r--r--test/CodeGen/X86/avx512dq-intrinsics.ll667
-rw-r--r--test/CodeGen/X86/avx512dqvl-intrinsics.ll818
-rw-r--r--test/CodeGen/X86/avx512vl-intrinsics.ll2977
-rw-r--r--test/CodeGen/X86/bit-piece-comment.ll64
-rw-r--r--test/CodeGen/X86/bitreverse.ll22
-rw-r--r--test/CodeGen/X86/branchfolding-catchpads.ll95
-rw-r--r--test/CodeGen/X86/buildvec-insertvec.ll1
-rw-r--r--test/CodeGen/X86/catchpad-realign-savexmm.ll53
-rw-r--r--test/CodeGen/X86/catchpad-regmask.ll144
-rw-r--r--test/CodeGen/X86/catchpad-weight.ll82
-rw-r--r--test/CodeGen/X86/catchret-empty-fallthrough.ll53
-rw-r--r--test/CodeGen/X86/catchret-fallthrough.ll42
-rw-r--r--test/CodeGen/X86/cleanuppad-inalloca.ll68
-rw-r--r--test/CodeGen/X86/cleanuppad-large-codemodel.ll27
-rw-r--r--test/CodeGen/X86/cleanuppad-realign.ll78
-rw-r--r--test/CodeGen/X86/clz.ll148
-rw-r--r--test/CodeGen/X86/cmp.ll44
-rw-r--r--test/CodeGen/X86/cmpxchg-clobber-flags.ll150
-rw-r--r--test/CodeGen/X86/coal-sections.ll23
-rw-r--r--test/CodeGen/X86/coalescer-win64.ll16
-rw-r--r--test/CodeGen/X86/code_placement_cold_loop_blocks.ll122
-rw-r--r--test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll123
-rw-r--r--test/CodeGen/X86/code_placement_loop_rotation.ll80
-rw-r--r--test/CodeGen/X86/code_placement_loop_rotation2.ll122
-rw-r--r--test/CodeGen/X86/codegen-prepare-cast.ll2
-rw-r--r--test/CodeGen/X86/coff-comdat.ll2
-rw-r--r--test/CodeGen/X86/combine-and.ll1
-rw-r--r--test/CodeGen/X86/combine-avx-intrinsics.ll59
-rw-r--r--test/CodeGen/X86/combine-avx2-intrinsics.ll74
-rw-r--r--test/CodeGen/X86/combine-multiplies.ll163
-rw-r--r--test/CodeGen/X86/combine-or.ll1
-rw-r--r--test/CodeGen/X86/combine-sse2-intrinsics.ll53
-rw-r--r--test/CodeGen/X86/combine-sse41-intrinsics.ll91
-rw-r--r--test/CodeGen/X86/commute-two-addr.ll2
-rw-r--r--test/CodeGen/X86/constant-hoisting-and.ll19
-rw-r--r--test/CodeGen/X86/constant-hoisting-cmp.ll25
-rw-r--r--test/CodeGen/X86/copysign-constant-magnitude.ll24
-rw-r--r--test/CodeGen/X86/cppeh-nounwind.ll35
-rw-r--r--test/CodeGen/X86/cxx_tlscc64.ll71
-rw-r--r--test/CodeGen/X86/dag-fmf-cse.ll22
-rw-r--r--test/CodeGen/X86/dag-merge-fast-accesses.ll90
-rw-r--r--test/CodeGen/X86/darwin-tls.ll28
-rw-r--r--test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll48
-rw-r--r--test/CodeGen/X86/dbg-changes-codegen.ll9
-rw-r--r--test/CodeGen/X86/dbg-combine.ll12
-rw-r--r--test/CodeGen/X86/debugloc-argsize.ll58
-rw-r--r--test/CodeGen/X86/divide-by-constant.ll32
-rw-r--r--test/CodeGen/X86/dllexport-x86_64.ll10
-rw-r--r--test/CodeGen/X86/dllexport.ll8
-rw-r--r--test/CodeGen/X86/dwarf-comp-dir.ll2
-rw-r--r--test/CodeGen/X86/dynamic-allocas-VLAs.ll2
-rw-r--r--test/CodeGen/X86/eh-null-personality.ll25
-rw-r--r--test/CodeGen/X86/eh_frame.ll4
-rw-r--r--test/CodeGen/X86/emutls-pic.ll168
-rw-r--r--test/CodeGen/X86/emutls-pie.ll131
-rw-r--r--test/CodeGen/X86/emutls.ll347
-rw-r--r--test/CodeGen/X86/emutls_generic.ll107
-rw-r--r--test/CodeGen/X86/exedeps-movq.ll19
-rw-r--r--test/CodeGen/X86/expand-vr64-gr64-copy.mir36
-rw-r--r--test/CodeGen/X86/extractelement-legalization-cycle.ll21
-rw-r--r--test/CodeGen/X86/extractelement-shuffle.ll1
-rw-r--r--test/CodeGen/X86/fadd-combines.ll224
-rw-r--r--test/CodeGen/X86/fast-isel-bitcasts-avx.ll244
-rw-r--r--test/CodeGen/X86/fast-isel-bitcasts.ll245
-rw-r--r--test/CodeGen/X86/fast-isel-cmp-branch.ll17
-rw-r--r--test/CodeGen/X86/fast-isel-deadcode.ll147
-rw-r--r--test/CodeGen/X86/fast-isel-emutls.ll48
-rw-r--r--test/CodeGen/X86/fast-isel-nontemporal.ll111
-rw-r--r--test/CodeGen/X86/fast-isel-stackcheck.ll44
-rw-r--r--test/CodeGen/X86/fast-isel-tls.ll2
-rw-r--r--test/CodeGen/X86/fdiv-combine.ll69
-rw-r--r--test/CodeGen/X86/fdiv.ll52
-rw-r--r--test/CodeGen/X86/fixup-lea.ll34
-rw-r--r--test/CodeGen/X86/float-asmprint.ll15
-rw-r--r--test/CodeGen/X86/floor-soft-float.ll2
-rw-r--r--test/CodeGen/X86/fma-commute-x86.ll761
-rw-r--r--test/CodeGen/X86/fma-do-not-commute.ll2
-rw-r--r--test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll499
-rw-r--r--test/CodeGen/X86/fma-intrinsics-x86.ll688
-rw-r--r--test/CodeGen/X86/fma-scalar-memfold.ll383
-rw-r--r--test/CodeGen/X86/fma_patterns.ll1301
-rw-r--r--test/CodeGen/X86/fma_patterns_wide.ll851
-rw-r--r--test/CodeGen/X86/fmaxnum.ll203
-rw-r--r--test/CodeGen/X86/fminnum.ll181
-rw-r--r--test/CodeGen/X86/fmul-combines.ll44
-rw-r--r--test/CodeGen/X86/fold-load-binops.ll1
-rw-r--r--test/CodeGen/X86/fold-load-unops.ll1
-rw-r--r--test/CodeGen/X86/fold-push.ll40
-rw-r--r--test/CodeGen/X86/force-align-stack-alloca.ll2
-rw-r--r--test/CodeGen/X86/force-align-stack.ll2
-rw-r--r--test/CodeGen/X86/fp-fast.ll1
-rw-r--r--test/CodeGen/X86/fp-logic.ll264
-rw-r--r--test/CodeGen/X86/fp128-calling-conv.ll47
-rw-r--r--test/CodeGen/X86/fp128-cast.ll279
-rw-r--r--test/CodeGen/X86/fp128-compare.ll96
-rw-r--r--test/CodeGen/X86/fp128-i128.ll320
-rw-r--r--test/CodeGen/X86/fp128-libcalls.ll107
-rw-r--r--test/CodeGen/X86/fp128-load.ll35
-rw-r--r--test/CodeGen/X86/fp128-store.ll14
-rw-r--r--test/CodeGen/X86/fpcmp-soft-fp.ll127
-rw-r--r--test/CodeGen/X86/fpstack-debuginstr-kill.ll16
-rw-r--r--test/CodeGen/X86/frameescape.ll128
-rw-r--r--test/CodeGen/X86/frem-msvc32.ll12
-rw-r--r--test/CodeGen/X86/funclet-layout.ll158
-rw-r--r--test/CodeGen/X86/function-alias.ll12
-rw-r--r--test/CodeGen/X86/gcc_except_table.ll2
-rw-r--r--test/CodeGen/X86/global-sections.ll7
-rw-r--r--test/CodeGen/X86/h-register-store.ll25
-rw-r--r--test/CodeGen/X86/h-registers-0.ll1
-rw-r--r--test/CodeGen/X86/h-registers-1.ll1
-rw-r--r--test/CodeGen/X86/h-registers-3.ll1
-rw-r--r--test/CodeGen/X86/half.ll4
-rw-r--r--test/CodeGen/X86/hhvm-cc.ll241
-rw-r--r--test/CodeGen/X86/i386-shrink-wrapping.ll113
-rw-r--r--test/CodeGen/X86/immediate_merging.ll82
-rw-r--r--test/CodeGen/X86/implicit-null-check.ll51
-rw-r--r--test/CodeGen/X86/imul.ll63
-rw-r--r--test/CodeGen/X86/inalloca-stdcall.ll5
-rw-r--r--test/CodeGen/X86/inalloca.ll15
-rw-r--r--test/CodeGen/X86/inconsistent_landingpad.ll30
-rw-r--r--test/CodeGen/X86/inline-asm-2addr.ll11
-rw-r--r--test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll2
-rw-r--r--test/CodeGen/X86/inline-sse.ll34
-rw-r--r--test/CodeGen/X86/insertps-from-constantpool.ll20
-rw-r--r--test/CodeGen/X86/insertps-unfold-load-bug.ll33
-rw-r--r--test/CodeGen/X86/int-intrinsic.ll2
-rw-r--r--test/CodeGen/X86/late-address-taken.ll68
-rw-r--r--test/CodeGen/X86/lea-opt.ll131
-rw-r--r--test/CodeGen/X86/lit.local.cfg2
-rw-r--r--test/CodeGen/X86/localescape.ll143
-rw-r--r--test/CodeGen/X86/lower-vec-shift-2.ll1
-rw-r--r--test/CodeGen/X86/lsr-static-addr.ll2
-rw-r--r--test/CodeGen/X86/machine-combiner-int-vec.ll112
-rw-r--r--test/CodeGen/X86/machine-combiner-int.ll194
-rw-r--r--test/CodeGen/X86/machine-combiner.ll467
-rw-r--r--test/CodeGen/X86/machine-cp.ll38
-rw-r--r--test/CodeGen/X86/machine-trace-metrics-crash.ll4
-rw-r--r--test/CodeGen/X86/masked_gather_scatter.ll2012
-rw-r--r--test/CodeGen/X86/masked_memop.ll524
-rw-r--r--test/CodeGen/X86/materialize.ll184
-rw-r--r--test/CodeGen/X86/mcu-abi.ll112
-rw-r--r--test/CodeGen/X86/memcpy-2.ll26
-rw-r--r--test/CodeGen/X86/memcpy.ll33
-rw-r--r--test/CodeGen/X86/merge-store-partially-alias-loads.ll52
-rw-r--r--test/CodeGen/X86/misched-code-difference-with-debug.ll12
-rw-r--r--test/CodeGen/X86/mmx-arg-passing-x86-64.ll1
-rw-r--r--test/CodeGen/X86/mmx-arg-passing.ll1
-rw-r--r--test/CodeGen/X86/mmx-coalescing.ll84
-rw-r--r--test/CodeGen/X86/mmx-intrinsics.ll291
-rw-r--r--test/CodeGen/X86/mmx-only.ll21
-rw-r--r--test/CodeGen/X86/movntdq-no-avx.ll2
-rw-r--r--test/CodeGen/X86/movpc32-check.ll42
-rw-r--r--test/CodeGen/X86/movtopush.ll25
-rw-r--r--test/CodeGen/X86/mult-alt-x86.ll2
-rw-r--r--test/CodeGen/X86/musttail-varargs.ll43
-rw-r--r--test/CodeGen/X86/nontemporal-2.ll21
-rw-r--r--test/CodeGen/X86/nontemporal.ll11
-rw-r--r--test/CodeGen/X86/null-streamer.ll4
-rw-r--r--test/CodeGen/X86/opt-ext-uses.ll8
-rw-r--r--test/CodeGen/X86/or-branch.ll30
-rw-r--r--test/CodeGen/X86/or-lea.ll120
-rw-r--r--test/CodeGen/X86/palignr.ll1
-rw-r--r--test/CodeGen/X86/patchpoint-verifiable.mir42
-rw-r--r--test/CodeGen/X86/peephole-na-phys-copy-folding.ll190
-rw-r--r--test/CodeGen/X86/pmul.ll297
-rw-r--r--test/CodeGen/X86/pop-stack-cleanup.ll76
-rw-r--r--test/CodeGen/X86/powi.ll38
-rw-r--r--test/CodeGen/X86/pr11415.ll8
-rw-r--r--test/CodeGen/X86/pr11468.ll2
-rw-r--r--test/CodeGen/X86/pr11985.ll30
-rw-r--r--test/CodeGen/X86/pr13577.ll5
-rw-r--r--test/CodeGen/X86/pr15267.ll240
-rw-r--r--test/CodeGen/X86/pr17631.ll2
-rw-r--r--test/CodeGen/X86/pr21529.ll15
-rw-r--r--test/CodeGen/X86/pr22019.ll2
-rw-r--r--test/CodeGen/X86/pr23900.ll29
-rw-r--r--test/CodeGen/X86/pr24139.ll148
-rw-r--r--test/CodeGen/X86/pr24602.ll17
-rw-r--r--test/CodeGen/X86/pr25828.ll30
-rw-r--r--test/CodeGen/X86/prolog-push-seq.ll19
-rw-r--r--test/CodeGen/X86/pseudo_cmov_lower.ll267
-rw-r--r--test/CodeGen/X86/pseudo_cmov_lower1.ll39
-rw-r--r--test/CodeGen/X86/pseudo_cmov_lower2.ll100
-rw-r--r--test/CodeGen/X86/psubus.ll580
-rw-r--r--test/CodeGen/X86/push-cfi-debug.ll53
-rw-r--r--test/CodeGen/X86/push-cfi-obj.ll44
-rw-r--r--test/CodeGen/X86/push-cfi.ll304
-rw-r--r--test/CodeGen/X86/ragreedy-hoist-spill.ll2
-rw-r--r--test/CodeGen/X86/rem_crash.ll257
-rw-r--r--test/CodeGen/X86/remat-invalid-liveness.ll85
-rw-r--r--test/CodeGen/X86/rodata-relocs.ll8
-rw-r--r--test/CodeGen/X86/rounding-ops.ll24
-rw-r--r--test/CodeGen/X86/safestack.ll32
-rw-r--r--test/CodeGen/X86/sar_fold.ll37
-rw-r--r--test/CodeGen/X86/sar_fold64.ll43
-rw-r--r--test/CodeGen/X86/scalar-fp-to-i64.ll151
-rw-r--r--test/CodeGen/X86/scalar-int-to-fp.ll132
-rw-r--r--test/CodeGen/X86/sdiv-pow2.ll33
-rw-r--r--test/CodeGen/X86/seh-catch-all-win32.ll33
-rw-r--r--test/CodeGen/X86/seh-catch-all.ll29
-rw-r--r--test/CodeGen/X86/seh-catchpad.ll198
-rw-r--r--test/CodeGen/X86/seh-except-finally.ll71
-rw-r--r--test/CodeGen/X86/seh-exception-code.ll38
-rw-r--r--test/CodeGen/X86/seh-filter.ll21
-rw-r--r--test/CodeGen/X86/seh-finally.ll50
-rw-r--r--test/CodeGen/X86/seh-safe-div-win32.ll42
-rw-r--r--test/CodeGen/X86/seh-safe-div.ll54
-rw-r--r--test/CodeGen/X86/seh-stack-realign-win32.ll99
-rw-r--r--test/CodeGen/X86/seh-stack-realign.ll34
-rw-r--r--test/CodeGen/X86/setcc-lowering.ll1
-rw-r--r--test/CodeGen/X86/setcc.ll20
-rw-r--r--test/CodeGen/X86/shift-bmi2.ll20
-rw-r--r--test/CodeGen/X86/shrink-wrap-chkstk.ll37
-rw-r--r--test/CodeGen/X86/slow-div.ll15
-rw-r--r--test/CodeGen/X86/slow-unaligned-mem.ll95
-rw-r--r--test/CodeGen/X86/soft-fp.ll34
-rw-r--r--test/CodeGen/X86/soft-sitofp.ll169
-rw-r--r--test/CodeGen/X86/splat-for-size.ll197
-rw-r--r--test/CodeGen/X86/sqrt-fastmath.ll9
-rw-r--r--test/CodeGen/X86/sse-align-12.ll1
-rw-r--r--test/CodeGen/X86/sse-minmax.ll2
-rw-r--r--test/CodeGen/X86/sse-only.ll20
-rw-r--r--test/CodeGen/X86/sse-scalar-fp-arith-unary.ll1
-rw-r--r--test/CodeGen/X86/sse2-vector-shifts.ll282
-rw-r--r--test/CodeGen/X86/sse2.ll1
-rw-r--r--test/CodeGen/X86/sse3-avx-addsub-2.ll312
-rw-r--r--test/CodeGen/X86/sse3-avx-addsub.ll197
-rw-r--r--test/CodeGen/X86/sse3-intrinsics-fast-isel.ll171
-rw-r--r--test/CodeGen/X86/sse3.ll7
-rw-r--r--test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll47
-rw-r--r--test/CodeGen/X86/sse41-intrinsics-x86.ll48
-rw-r--r--test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll185
-rw-r--r--test/CodeGen/X86/sse41.ll65
-rw-r--r--test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll98
-rw-r--r--test/CodeGen/X86/sse_partial_update.ll33
-rw-r--r--test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll290
-rw-r--r--test/CodeGen/X86/stack-align-memcpy.ll2
-rw-r--r--test/CodeGen/X86/stack-folding-adx-x86_64.ll45
-rw-r--r--test/CodeGen/X86/stack-folding-fp-avx1.ll18
-rw-r--r--test/CodeGen/X86/stack-folding-fp-sse42.ll28
-rw-r--r--test/CodeGen/X86/stack-folding-int-avx1.ll40
-rw-r--r--test/CodeGen/X86/stack-folding-int-avx2.ll55
-rw-r--r--test/CodeGen/X86/stack-folding-int-sse42.ll38
-rw-r--r--test/CodeGen/X86/stack-folding-mmx.ll148
-rw-r--r--test/CodeGen/X86/stack-folding-x86_64.ll2
-rw-r--r--test/CodeGen/X86/stack-folding-xop.ll2
-rw-r--r--test/CodeGen/X86/stack-probe-size.ll3
-rw-r--r--test/CodeGen/X86/stack-protector-dbginfo.ll36
-rw-r--r--test/CodeGen/X86/stack-protector-weight.ll4
-rw-r--r--test/CodeGen/X86/stackmap-frame-setup.ll20
-rw-r--r--test/CodeGen/X86/statepoint-allocas.ll10
-rw-r--r--test/CodeGen/X86/statepoint-call-lowering.ll103
-rw-r--r--test/CodeGen/X86/statepoint-far-call.ll4
-rw-r--r--test/CodeGen/X86/statepoint-forward.ll16
-rw-r--r--test/CodeGen/X86/statepoint-gctransition-call-lowering.ll66
-rw-r--r--test/CodeGen/X86/statepoint-invoke.ll78
-rw-r--r--test/CodeGen/X86/statepoint-stack-usage.ll54
-rw-r--r--test/CodeGen/X86/statepoint-stackmap-format.ll96
-rw-r--r--test/CodeGen/X86/stdarg.ll10
-rw-r--r--test/CodeGen/X86/stores-merging.ll46
-rw-r--r--test/CodeGen/X86/switch-bt.ll8
-rw-r--r--test/CodeGen/X86/switch-edge-weight.ll281
-rw-r--r--test/CodeGen/X86/switch-jump-table.ll54
-rw-r--r--test/CodeGen/X86/switch-order-weight.ll2
-rw-r--r--test/CodeGen/X86/switch.ll85
-rw-r--r--test/CodeGen/X86/swizzle-2.ll1
-rw-r--r--test/CodeGen/X86/system-intrinsics-64-xsave.ll41
-rw-r--r--test/CodeGen/X86/system-intrinsics-64-xsavec.ll21
-rw-r--r--test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll21
-rw-r--r--test/CodeGen/X86/system-intrinsics-64-xsaves.ll41
-rw-r--r--test/CodeGen/X86/system-intrinsics-64.ll2
-rw-r--r--test/CodeGen/X86/system-intrinsics-xsave.ll23
-rw-r--r--test/CodeGen/X86/system-intrinsics-xsavec.ll12
-rw-r--r--test/CodeGen/X86/system-intrinsics-xsaveopt.ll12
-rw-r--r--test/CodeGen/X86/system-intrinsics-xsaves.ll23
-rw-r--r--test/CodeGen/X86/system-intrinsics.ll2
-rw-r--r--test/CodeGen/X86/tail-dup-catchret.ll31
-rw-r--r--test/CodeGen/X86/tail-merge-wineh.ll107
-rw-r--r--test/CodeGen/X86/tail-opts.ll40
-rw-r--r--test/CodeGen/X86/tailcall-mem-intrinsics.ll4
-rw-r--r--test/CodeGen/X86/tailcall-msvc-conventions.ll189
-rw-r--r--test/CodeGen/X86/tailcall-readnone.ll15
-rw-r--r--test/CodeGen/X86/tls-android-negative.ll65
-rw-r--r--test/CodeGen/X86/tls-android.ll89
-rw-r--r--test/CodeGen/X86/tls-models.ll2
-rw-r--r--test/CodeGen/X86/tls-pie.ll8
-rw-r--r--test/CodeGen/X86/token_landingpad.ll21
-rw-r--r--test/CodeGen/X86/trunc-store.ll49
-rw-r--r--test/CodeGen/X86/unaligned-32-byte-memops.ll7
-rw-r--r--test/CodeGen/X86/unaligned-spill-folding.ll2
-rw-r--r--test/CodeGen/X86/unknown-location.ll8
-rw-r--r--test/CodeGen/X86/v2f32.ll1
-rw-r--r--test/CodeGen/X86/vec_cast2.ll31
-rw-r--r--test/CodeGen/X86/vec_cmp_sint-128.ll722
-rw-r--r--test/CodeGen/X86/vec_cmp_uint-128.ll860
-rw-r--r--test/CodeGen/X86/vec_ctbits.ll129
-rw-r--r--test/CodeGen/X86/vec_extract-avx.ll114
-rw-r--r--test/CodeGen/X86/vec_fabs.ll2
-rw-r--r--test/CodeGen/X86/vec_fp_to_int.ll1269
-rw-r--r--test/CodeGen/X86/vec_insert-5.ll1
-rw-r--r--test/CodeGen/X86/vec_int_to_fp.ll1920
-rw-r--r--test/CodeGen/X86/vec_minmax_sint.ll2090
-rw-r--r--test/CodeGen/X86/vec_minmax_uint.ll2229
-rw-r--r--test/CodeGen/X86/vec_sdiv_to_shift.ll13
-rw-r--r--test/CodeGen/X86/vec_trunc_sext.ll31
-rw-r--r--test/CodeGen/X86/vec_uint_to_fp-fastmath.ll130
-rw-r--r--test/CodeGen/X86/vec_uint_to_fp.ll8
-rw-r--r--test/CodeGen/X86/vector-blend.ll72
-rw-r--r--test/CodeGen/X86/vector-idiv.ll1
-rw-r--r--test/CodeGen/X86/vector-lzcnt-128.ll472
-rw-r--r--test/CodeGen/X86/vector-lzcnt-256.ll257
-rw-r--r--test/CodeGen/X86/vector-lzcnt-512.ll219
-rw-r--r--test/CodeGen/X86/vector-merge-store-fp-constants.ll35
-rw-r--r--test/CodeGen/X86/vector-popcnt-128.ll37
-rw-r--r--test/CodeGen/X86/vector-popcnt-256.ll73
-rw-r--r--test/CodeGen/X86/vector-popcnt-512.ll161
-rw-r--r--test/CodeGen/X86/vector-rotate-128.ll1595
-rw-r--r--test/CodeGen/X86/vector-rotate-256.ll1089
-rw-r--r--test/CodeGen/X86/vector-sext.ll3804
-rw-r--r--test/CodeGen/X86/vector-shift-ashr-128.ll917
-rw-r--r--test/CodeGen/X86/vector-shift-ashr-256.ll691
-rw-r--r--test/CodeGen/X86/vector-shift-ashr-512.ll378
-rw-r--r--test/CodeGen/X86/vector-shift-lshr-128.ll619
-rw-r--r--test/CodeGen/X86/vector-shift-lshr-256.ll444
-rw-r--r--test/CodeGen/X86/vector-shift-lshr-512.ll317
-rw-r--r--test/CodeGen/X86/vector-shift-shl-128.ll501
-rw-r--r--test/CodeGen/X86/vector-shift-shl-256.ll403
-rw-r--r--test/CodeGen/X86/vector-shift-shl-512.ll293
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v16.ll276
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v2.ll318
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v4.ll92
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v8.ll252
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v16.ll249
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v32.ll210
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v4.ll703
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v8.ll221
-rw-r--r--test/CodeGen/X86/vector-shuffle-512-v16.ll134
-rw-r--r--test/CodeGen/X86/vector-shuffle-512-v32.ll44
-rw-r--r--test/CodeGen/X86/vector-shuffle-512-v8.ll2487
-rw-r--r--test/CodeGen/X86/vector-shuffle-combining.ll1
-rw-r--r--test/CodeGen/X86/vector-shuffle-mmx.ll1
-rw-r--r--test/CodeGen/X86/vector-shuffle-sse1.ll1
-rw-r--r--test/CodeGen/X86/vector-shuffle-sse4a.ll140
-rw-r--r--test/CodeGen/X86/vector-shuffle-v1.ll439
-rw-r--r--test/CodeGen/X86/vector-trunc.ll681
-rw-r--r--test/CodeGen/X86/vector-tzcnt-128.ll2035
-rw-r--r--test/CodeGen/X86/vector-tzcnt-256.ll1455
-rw-r--r--test/CodeGen/X86/vector-tzcnt-512.ll271
-rw-r--r--test/CodeGen/X86/vector-zext.ll1229
-rw-r--r--test/CodeGen/X86/vector-zmov.ll1
-rw-r--r--test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll19
-rw-r--r--test/CodeGen/X86/vmovq.ll28
-rw-r--r--test/CodeGen/X86/vselect-2.ll1
-rw-r--r--test/CodeGen/X86/vselect-avx.ll12
-rw-r--r--test/CodeGen/X86/vselect-minmax.ll16332
-rw-r--r--test/CodeGen/X86/vselect.ll1
-rw-r--r--test/CodeGen/X86/vshift_scalar.ll1
-rw-r--r--test/CodeGen/X86/wide-integer-cmp.ll130
-rw-r--r--test/CodeGen/X86/widen_load-2.ll4
-rw-r--r--test/CodeGen/X86/widen_shuffle-1.ll1
-rw-r--r--test/CodeGen/X86/win-catchpad-csrs.ll268
-rw-r--r--test/CodeGen/X86/win-catchpad-nested-cxx.ll105
-rw-r--r--test/CodeGen/X86/win-catchpad-nested.ll42
-rw-r--r--test/CodeGen/X86/win-catchpad-varargs.ll101
-rw-r--r--test/CodeGen/X86/win-catchpad.ll353
-rw-r--r--test/CodeGen/X86/win-cleanuppad.ll199
-rw-r--r--test/CodeGen/X86/win-funclet-cfi.ll95
-rw-r--r--test/CodeGen/X86/win-mixed-ehpersonality.ll81
-rw-r--r--test/CodeGen/X86/win32-eh-states.ll213
-rw-r--r--test/CodeGen/X86/win32-eh.ll49
-rw-r--r--test/CodeGen/X86/win32-pic-jumptable.ll8
-rw-r--r--test/CodeGen/X86/win32-seh-catchpad-realign.ll77
-rw-r--r--test/CodeGen/X86/win32-seh-catchpad.ll231
-rw-r--r--test/CodeGen/X86/win32-seh-nested-finally.ll80
-rw-r--r--test/CodeGen/X86/win32-spill-xmm.ll40
-rw-r--r--test/CodeGen/X86/win64_frame.ll70
-rw-r--r--test/CodeGen/X86/win64_sibcall.ll38
-rw-r--r--test/CodeGen/X86/win_coreclr_chkstk.ll143
-rw-r--r--test/CodeGen/X86/win_eh_prepare.ll82
-rw-r--r--test/CodeGen/X86/win_ftol2.ll166
-rw-r--r--test/CodeGen/X86/wineh-coreclr.ll267
-rw-r--r--test/CodeGen/X86/wineh-exceptionpointer.ll26
-rw-r--r--test/CodeGen/X86/wineh-no-ehpads.ll20
-rw-r--r--test/CodeGen/X86/x32-function_pointer-3.ll2
-rw-r--r--test/CodeGen/X86/x32-indirectbr.ll26
-rw-r--r--test/CodeGen/X86/x32-landingpad.ll27
-rw-r--r--test/CodeGen/X86/x32-va_start.ll99
-rw-r--r--test/CodeGen/X86/x86-32-intrcc.ll79
-rw-r--r--test/CodeGen/X86/x86-64-baseptr.ll4
-rw-r--r--test/CodeGen/X86/x86-64-double-precision-shift-left.ll17
-rw-r--r--test/CodeGen/X86/x86-64-double-precision-shift-right.ll9
-rw-r--r--test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll4
-rw-r--r--test/CodeGen/X86/x86-64-intrcc.ll86
-rw-r--r--test/CodeGen/X86/x86-64-ms_abi-vararg.ll108
-rw-r--r--test/CodeGen/X86/x86-64-pic-10.ll2
-rw-r--r--test/CodeGen/X86/x86-fold-pshufb.ll20
-rw-r--r--test/CodeGen/X86/x86-sanitizer-shrink-wrapping.ll40
-rw-r--r--test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll16
-rw-r--r--test/CodeGen/X86/x86-shrink-wrap-unwind.ll153
-rw-r--r--test/CodeGen/X86/x86-shrink-wrapping.ll254
-rw-r--r--test/CodeGen/X86/x86-win64-shrink-wrapping.ll126
-rw-r--r--test/CodeGen/X86/xop-intrinsics-x86_64.ll33
-rw-r--r--test/CodeGen/X86/xop-pcmov.ll163
-rw-r--r--test/CodeGen/XCore/aliases.ll6
-rw-r--r--test/CodeGen/XCore/dwarf_debug.ll8
-rw-r--r--test/DebugInfo/2009-11-03-InsertExtractValue.ll23
-rw-r--r--test/DebugInfo/2009-11-05-DeadGlobalVariable.ll26
-rw-r--r--test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll14
-rw-r--r--test/DebugInfo/2009-11-10-CurrentFn.ll31
-rw-r--r--test/DebugInfo/2010-01-05-DbgScope.ll25
-rw-r--r--test/DebugInfo/2010-03-12-llc-crash.ll22
-rw-r--r--test/DebugInfo/2010-03-19-DbgDeclare.ll20
-rw-r--r--test/DebugInfo/2010-03-24-MemberFn.ll70
-rw-r--r--test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll112
-rw-r--r--test/DebugInfo/2010-04-19-FramePtr.ll37
-rw-r--r--test/DebugInfo/2010-05-03-DisableFramePtr.ll40
-rw-r--r--test/DebugInfo/2010-05-03-OriginDIE.ll94
-rw-r--r--test/DebugInfo/2010-05-10-MultipleCU.ll44
-rw-r--r--test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll61
-rw-r--r--test/DebugInfo/2010-07-19-Crash.ll30
-rw-r--r--test/DebugInfo/2010-10-01-crash.ll24
-rw-r--r--test/DebugInfo/AArch64/big-endian.ll2
-rw-r--r--test/DebugInfo/AArch64/bitfields.ll2
-rw-r--r--test/DebugInfo/AArch64/cfi-eof-prologue.ll16
-rw-r--r--test/DebugInfo/AArch64/coalescing.ll10
-rw-r--r--test/DebugInfo/AArch64/constant-dbgloc.ll6
-rw-r--r--test/DebugInfo/AArch64/dwarfdump.ll6
-rw-r--r--test/DebugInfo/AArch64/frameindices.ll24
-rw-r--r--test/DebugInfo/AArch64/prologue_end.ll43
-rw-r--r--test/DebugInfo/AArch64/struct_by_value.ll8
-rw-r--r--test/DebugInfo/ARM/PR16736.ll18
-rw-r--r--test/DebugInfo/ARM/bitfield.ll2
-rw-r--r--test/DebugInfo/ARM/cfi-eof-prologue.ll16
-rw-r--r--test/DebugInfo/ARM/constant-dbgloc.ll6
-rw-r--r--test/DebugInfo/ARM/float-args.ll45
-rw-r--r--test/DebugInfo/ARM/header.ll6
-rw-r--r--test/DebugInfo/ARM/lowerbdgdeclare_vla.ll14
-rw-r--r--test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll6
-rw-r--r--test/DebugInfo/ARM/prologue_end.ll46
-rw-r--r--test/DebugInfo/ARM/s-super-register.ll10
-rw-r--r--test/DebugInfo/ARM/selectiondag-deadcode.ll6
-rw-r--r--test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll8
-rw-r--r--test/DebugInfo/ARM/tls.ll12
-rw-r--r--test/DebugInfo/COFF/asan-module-ctor.ll8
-rw-r--r--test/DebugInfo/COFF/asan-module-without-functions.ll4
-rw-r--r--test/DebugInfo/COFF/asm.ll34
-rw-r--r--test/DebugInfo/COFF/cpp-mangling.ll6
-rw-r--r--test/DebugInfo/COFF/multifile.ll12
-rw-r--r--test/DebugInfo/COFF/multifunction.ll24
-rw-r--r--test/DebugInfo/COFF/simple.ll12
-rw-r--r--test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll8
-rw-r--r--test/DebugInfo/Generic/2009-10-16-Phi.ll (renamed from test/DebugInfo/2009-10-16-Phi.ll)0
-rw-r--r--test/DebugInfo/Generic/2009-11-03-InsertExtractValue.ll23
-rw-r--r--test/DebugInfo/Generic/2009-11-05-DeadGlobalVariable.ll26
-rw-r--r--test/DebugInfo/Generic/2009-11-06-NamelessGlobalVariable.ll14
-rw-r--r--test/DebugInfo/Generic/2009-11-10-CurrentFn.ll31
-rw-r--r--test/DebugInfo/Generic/2010-01-05-DbgScope.ll25
-rw-r--r--test/DebugInfo/Generic/2010-03-12-llc-crash.ll22
-rw-r--r--test/DebugInfo/Generic/2010-03-19-DbgDeclare.ll20
-rw-r--r--test/DebugInfo/Generic/2010-03-24-MemberFn.ll70
-rw-r--r--test/DebugInfo/Generic/2010-04-06-NestedFnDbgInfo.ll112
-rw-r--r--test/DebugInfo/Generic/2010-04-19-FramePtr.ll37
-rw-r--r--test/DebugInfo/Generic/2010-05-03-DisableFramePtr.ll40
-rw-r--r--test/DebugInfo/Generic/2010-05-03-OriginDIE.ll94
-rw-r--r--test/DebugInfo/Generic/2010-05-10-MultipleCU.ll44
-rw-r--r--test/DebugInfo/Generic/2010-06-29-InlinedFnLocalVar.ll61
-rw-r--r--test/DebugInfo/Generic/2010-07-19-Crash.ll30
-rw-r--r--test/DebugInfo/Generic/2010-10-01-crash.ll24
-rw-r--r--test/DebugInfo/Generic/Inputs/gmlt.ll153
-rw-r--r--test/DebugInfo/Generic/PR20038.ll172
-rw-r--r--test/DebugInfo/Generic/accel-table-hash-collisions.ll92
-rw-r--r--test/DebugInfo/Generic/array.ll40
-rw-r--r--test/DebugInfo/Generic/block-asan.ll87
-rw-r--r--test/DebugInfo/Generic/bug_null_debuginfo.ll8
-rw-r--r--test/DebugInfo/Generic/constant-pointers.ll51
-rw-r--r--test/DebugInfo/Generic/constant-sdnodes-have-dbg-location.ll26
-rw-r--r--test/DebugInfo/Generic/constantfp-sdnodes-have-dbg-location.ll24
-rw-r--r--test/DebugInfo/Generic/cross-cu-inlining.ll143
-rw-r--r--test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll95
-rw-r--r--test/DebugInfo/Generic/cross-cu-linkonce.ll73
-rw-r--r--test/DebugInfo/Generic/cu-range-hole.ll74
-rw-r--r--test/DebugInfo/Generic/cu-ranges.ll71
-rw-r--r--test/DebugInfo/Generic/dbg-at-specficiation.ll21
-rw-r--r--test/DebugInfo/Generic/dead-argument-order.ll81
-rw-r--r--test/DebugInfo/Generic/debug-info-always-inline.ll (renamed from test/DebugInfo/debug-info-always-inline.ll)0
-rw-r--r--test/DebugInfo/Generic/debug-info-qualifiers.ll98
-rw-r--r--test/DebugInfo/Generic/debuginfofinder-forward-declaration.ll42
-rw-r--r--test/DebugInfo/Generic/debuginfofinder-multiple-cu.ll41
-rw-r--r--test/DebugInfo/Generic/def-line.ll93
-rw-r--r--test/DebugInfo/Generic/discriminator.ll52
-rw-r--r--test/DebugInfo/Generic/dwarf-public-names.ll131
-rw-r--r--test/DebugInfo/Generic/empty.ll31
-rw-r--r--test/DebugInfo/Generic/enum-types.ll78
-rw-r--r--test/DebugInfo/Generic/enum.ll80
-rw-r--r--test/DebugInfo/Generic/global.ll42
-rw-r--r--test/DebugInfo/Generic/gmlt.test (renamed from test/DebugInfo/gmlt.test)0
-rw-r--r--test/DebugInfo/Generic/gvn.ll114
-rw-r--r--test/DebugInfo/Generic/incorrect-variable-debugloc.ll391
-rw-r--r--test/DebugInfo/Generic/incorrect-variable-debugloc1.ll77
-rw-r--r--test/DebugInfo/Generic/inheritance.ll154
-rw-r--r--test/DebugInfo/Generic/inline-debug-info-multiret.ll156
-rw-r--r--test/DebugInfo/Generic/inline-debug-info.ll174
-rw-r--r--test/DebugInfo/Generic/inline-no-debug-info.ll70
-rw-r--r--test/DebugInfo/Generic/inline-scopes.ll130
-rw-r--r--test/DebugInfo/Generic/inlined-arguments.ll79
-rw-r--r--test/DebugInfo/Generic/inlined-vars.ll56
-rw-r--r--test/DebugInfo/Generic/lit.local.cfg3
-rw-r--r--test/DebugInfo/Generic/location-verifier.ll33
-rw-r--r--test/DebugInfo/Generic/lto-comp-dir.ll84
-rw-r--r--test/DebugInfo/Generic/member-order.ll66
-rw-r--r--test/DebugInfo/Generic/member-pointers.ll40
-rw-r--r--test/DebugInfo/Generic/missing-abstract-variable.ll182
-rw-r--r--test/DebugInfo/Generic/multiline.ll82
-rw-r--r--test/DebugInfo/Generic/namespace.ll365
-rw-r--r--test/DebugInfo/Generic/namespace_function_definition.ll44
-rw-r--r--test/DebugInfo/Generic/namespace_inline_function_definition.ll95
-rw-r--r--test/DebugInfo/Generic/nodebug.ll51
-rw-r--r--test/DebugInfo/Generic/piece-verifier.ll56
-rwxr-xr-xtest/DebugInfo/Generic/ptrsize.ll47
-rw-r--r--test/DebugInfo/Generic/recursive_inlining.ll275
-rw-r--r--test/DebugInfo/Generic/restrict.ll53
-rw-r--r--test/DebugInfo/Generic/skeletoncu.ll16
-rw-r--r--test/DebugInfo/Generic/sugared-constants.ll82
-rw-r--r--test/DebugInfo/Generic/template-recursive-void.ll61
-rw-r--r--test/DebugInfo/Generic/tu-composite.ll184
-rw-r--r--test/DebugInfo/Generic/tu-member-pointer.ll30
-rw-r--r--test/DebugInfo/Generic/two-cus-from-same-file.ll73
-rw-r--r--test/DebugInfo/Generic/typedef.ll32
-rw-r--r--test/DebugInfo/Generic/unconditional-branch.ll65
-rw-r--r--test/DebugInfo/Generic/varargs.ll101
-rw-r--r--test/DebugInfo/Generic/version.ll32
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-dwp.x86_64.obin0 -> 2000 bytes
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-macho-relocs.macho.x86_64.obin0 -> 2364 bytes
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-macro-cmd.h1
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-macro.cc11
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-macro.h5
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-macro.obin0 -> 5616 bytes
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-test.cc2
-rw-r--r--test/DebugInfo/Inputs/dwarfdump-test.macho-i386.obin0 -> 3620 bytes
-rw-r--r--test/DebugInfo/Inputs/fat-test.obin0 -> 5000 bytes
-rw-r--r--test/DebugInfo/Inputs/gmlt.ll18
-rw-r--r--test/DebugInfo/Inputs/line.ll6
-rw-r--r--test/DebugInfo/MIR/X86/lit.local.cfg2
-rw-r--r--test/DebugInfo/MIR/X86/live-debug-values-3preds.mir299
-rw-r--r--test/DebugInfo/MIR/X86/live-debug-values.mir260
-rw-r--r--test/DebugInfo/MIR/lit.local.cfg2
-rw-r--r--test/DebugInfo/Mips/InlinedFnLocalVar.ll16
-rw-r--r--test/DebugInfo/Mips/delay-slot.ll12
-rw-r--r--test/DebugInfo/Mips/dsr-fixed-objects.ll156
-rw-r--r--test/DebugInfo/Mips/dsr-non-fixed-objects.ll125
-rw-r--r--test/DebugInfo/Mips/fn-call-line.ll6
-rw-r--r--test/DebugInfo/Mips/prologue_end.ll70
-rw-r--r--test/DebugInfo/PDB/DIA/lit.local.cfg (renamed from test/DebugInfo/PDB/lit.local.cfg)0
-rw-r--r--test/DebugInfo/PDB/DIA/pdbdump-flags.test40
-rw-r--r--test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test53
-rw-r--r--test/DebugInfo/PDB/pdbdump-flags.test40
-rw-r--r--test/DebugInfo/PDB/pdbdump-headers.test12
-rw-r--r--test/DebugInfo/PDB/pdbdump-symbol-format.test53
-rw-r--r--test/DebugInfo/PR20038.ll169
-rw-r--r--test/DebugInfo/PowerPC/tls-fission.ll2
-rw-r--r--test/DebugInfo/PowerPC/tls.ll2
-rw-r--r--test/DebugInfo/Sparc/gnu-window-save.ll6
-rw-r--r--test/DebugInfo/Sparc/prologue_end.ll41
-rw-r--r--test/DebugInfo/SystemZ/prologue_end.ll42
-rw-r--r--test/DebugInfo/SystemZ/variable-loc.ll14
-rw-r--r--test/DebugInfo/X86/2010-04-13-PubType.ll10
-rw-r--r--test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll8
-rw-r--r--test/DebugInfo/X86/2011-12-16-BadStructRef.ll44
-rw-r--r--test/DebugInfo/X86/DIModuleContext.ll30
-rw-r--r--test/DebugInfo/X86/DW_AT_byte_size.ll8
-rw-r--r--test/DebugInfo/X86/DW_AT_linkage_name.ll20
-rw-r--r--test/DebugInfo/X86/DW_AT_location-reference.ll8
-rw-r--r--test/DebugInfo/X86/DW_AT_object_pointer.ll22
-rw-r--r--test/DebugInfo/X86/DW_AT_specification.ll6
-rw-r--r--test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll6
-rw-r--r--test/DebugInfo/X86/DW_TAG_friend.ll2
-rw-r--r--test/DebugInfo/X86/InlinedFnLocalVar.ll16
-rw-r--r--test/DebugInfo/X86/aligned_stack_var.ll8
-rw-r--r--test/DebugInfo/X86/arange-and-stub.ll14
-rw-r--r--test/DebugInfo/X86/arange.ll2
-rw-r--r--test/DebugInfo/X86/arguments.ll10
-rw-r--r--test/DebugInfo/X86/array.ll30
-rw-r--r--test/DebugInfo/X86/array2.ll20
-rw-r--r--test/DebugInfo/X86/bbjoin.ll101
-rw-r--r--test/DebugInfo/X86/bitfields.ll2
-rw-r--r--test/DebugInfo/X86/block-capture.ll10
-rw-r--r--test/DebugInfo/X86/byvalstruct.ll16
-rw-r--r--test/DebugInfo/X86/c-type-units.ll2
-rw-r--r--test/DebugInfo/X86/coff_debug_info_type.ll10
-rw-r--r--test/DebugInfo/X86/coff_relative_names.ll6
-rw-r--r--test/DebugInfo/X86/concrete_out_of_line.ll26
-rw-r--r--test/DebugInfo/X86/constant-aggregate.ll20
-rw-r--r--test/DebugInfo/X86/cu-ranges-odr.ll18
-rw-r--r--test/DebugInfo/X86/cu-ranges.ll14
-rw-r--r--test/DebugInfo/X86/data_member_location.ll2
-rw-r--r--test/DebugInfo/X86/dbg-at-specficiation.ll21
-rw-r--r--test/DebugInfo/X86/dbg-byval-parameter.ll8
-rw-r--r--test/DebugInfo/X86/dbg-const-int.ll8
-rw-r--r--test/DebugInfo/X86/dbg-const.ll8
-rw-r--r--test/DebugInfo/X86/dbg-declare-arg.ll24
-rw-r--r--test/DebugInfo/X86/dbg-declare.ll10
-rw-r--r--test/DebugInfo/X86/dbg-file-name.ll6
-rw-r--r--test/DebugInfo/X86/dbg-i128-const.ll8
-rw-r--r--test/DebugInfo/X86/dbg-merge-loc-entry.ll12
-rw-r--r--test/DebugInfo/X86/dbg-prolog-end.ll14
-rw-r--r--test/DebugInfo/X86/dbg-subrange.ll6
-rw-r--r--test/DebugInfo/X86/dbg-value-const-byref.ll10
-rw-r--r--test/DebugInfo/X86/dbg-value-dag-combine.ll14
-rw-r--r--test/DebugInfo/X86/dbg-value-inlined-parameter.ll14
-rw-r--r--test/DebugInfo/X86/dbg-value-isel.ll14
-rw-r--r--test/DebugInfo/X86/dbg-value-location.ll16
-rw-r--r--test/DebugInfo/X86/dbg-value-range.ll10
-rw-r--r--test/DebugInfo/X86/dbg-value-terminator.ll12
-rw-r--r--test/DebugInfo/X86/dbg_value_direct.ll10
-rw-r--r--test/DebugInfo/X86/debug-dead-local-var.ll10
-rw-r--r--test/DebugInfo/X86/debug-info-access.ll6
-rw-r--r--test/DebugInfo/X86/debug-info-block-captured-self.ll14
-rw-r--r--test/DebugInfo/X86/debug-info-blocks.ll46
-rw-r--r--test/DebugInfo/X86/debug-info-packed-struct.ll2
-rw-r--r--test/DebugInfo/X86/debug-info-static-member.ll8
-rw-r--r--test/DebugInfo/X86/debug-loc-asan.ll19
-rw-r--r--test/DebugInfo/X86/debug-loc-empty-entries.ll8
-rw-r--r--test/DebugInfo/X86/debug-loc-offset.ll18
-rw-r--r--test/DebugInfo/X86/debug-ranges-offset.ll14
-rw-r--r--test/DebugInfo/X86/debug_frame.ll6
-rw-r--r--test/DebugInfo/X86/debugger-tune.ll44
-rw-r--r--test/DebugInfo/X86/decl-derived-member.ll26
-rw-r--r--test/DebugInfo/X86/deleted-bit-piece.ll8
-rw-r--r--test/DebugInfo/X86/discriminator.ll6
-rw-r--r--test/DebugInfo/X86/dw_op_minus.ll84
-rw-r--r--test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll18
-rw-r--r--test/DebugInfo/X86/dwarf-aranges.ll6
-rw-r--r--test/DebugInfo/X86/dwarf-linkage-names.ll71
-rw-r--r--test/DebugInfo/X86/dwarf-public-names.ll22
-rw-r--r--test/DebugInfo/X86/dwarf-pubnames-split.ll6
-rw-r--r--test/DebugInfo/X86/earlydup-crash.ll8
-rw-r--r--test/DebugInfo/X86/elf-names.ll20
-rw-r--r--test/DebugInfo/X86/empty-and-one-elem-array.ll10
-rw-r--r--test/DebugInfo/X86/empty-array.ll2
-rw-r--r--test/DebugInfo/X86/empty.ll2
-rw-r--r--test/DebugInfo/X86/ending-run.ll10
-rw-r--r--test/DebugInfo/X86/enum-class.ll2
-rw-r--r--test/DebugInfo/X86/enum-fwd-decl.ll2
-rw-r--r--test/DebugInfo/X86/externaltyperef.ll51
-rw-r--r--test/DebugInfo/X86/fission-cu.ll2
-rw-r--r--test/DebugInfo/X86/fission-hash.ll2
-rw-r--r--test/DebugInfo/X86/fission-inline.ll8
-rw-r--r--test/DebugInfo/X86/fission-ranges.ll30
-rw-r--r--test/DebugInfo/X86/float_const.ll8
-rw-r--r--test/DebugInfo/X86/formal_parameter.ll8
-rw-r--r--test/DebugInfo/X86/frame-register.ll12
-rw-r--r--test/DebugInfo/X86/generate-odr-hash.ll22
-rw-r--r--test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll18
-rw-r--r--test/DebugInfo/X86/gnu-public-names-empty.ll2
-rw-r--r--test/DebugInfo/X86/gnu-public-names.ll28
-rw-r--r--test/DebugInfo/X86/header.ll6
-rw-r--r--test/DebugInfo/X86/inline-member-function.ll12
-rw-r--r--test/DebugInfo/X86/inline-seldag-test.ll12
-rw-r--r--test/DebugInfo/X86/inlined-formal-parameter.ll10
-rw-r--r--test/DebugInfo/X86/inlined-indirect-value.ll8
-rw-r--r--test/DebugInfo/X86/instcombine-instrinsics.ll8
-rw-r--r--test/DebugInfo/X86/lexical_block.ll8
-rw-r--r--test/DebugInfo/X86/line-info.ll12
-rw-r--r--test/DebugInfo/X86/linkage-name.ll10
-rw-r--r--test/DebugInfo/X86/live-debug-values.ll152
-rw-r--r--test/DebugInfo/X86/low-pc-cu.ll6
-rw-r--r--test/DebugInfo/X86/memberfnptr.ll2
-rw-r--r--test/DebugInfo/X86/mi-print.ll14
-rw-r--r--test/DebugInfo/X86/misched-dbg-value.ll18
-rw-r--r--test/DebugInfo/X86/missing-file-line.ll8
-rw-r--r--test/DebugInfo/X86/multiple-aranges.ll4
-rw-r--r--test/DebugInfo/X86/multiple-at-const-val.ll6
-rw-r--r--test/DebugInfo/X86/nodebug_with_debug_loc.ll12
-rw-r--r--test/DebugInfo/X86/nondefault-subrange-array.ll2
-rw-r--r--test/DebugInfo/X86/nophysreg.ll24
-rw-r--r--test/DebugInfo/X86/objc-fwd-decl.ll2
-rw-r--r--test/DebugInfo/X86/objc-property-void.ll10
-rw-r--r--test/DebugInfo/X86/op_deref.ll18
-rw-r--r--test/DebugInfo/X86/parameters.ll16
-rw-r--r--test/DebugInfo/X86/pieces-1.ll12
-rw-r--r--test/DebugInfo/X86/pieces-2.ll10
-rw-r--r--test/DebugInfo/X86/pieces-3.ll18
-rw-r--r--test/DebugInfo/X86/pointer-type-size.ll2
-rw-r--r--test/DebugInfo/X86/pr11300.ll14
-rw-r--r--test/DebugInfo/X86/pr12831.ll28
-rw-r--r--test/DebugInfo/X86/pr13303.ll6
-rw-r--r--test/DebugInfo/X86/pr19307.ll12
-rw-r--r--test/DebugInfo/X86/processes-relocations.ll2
-rw-r--r--test/DebugInfo/X86/prologue-stack.ll6
-rw-r--r--test/DebugInfo/X86/recursive_inlining.ll275
-rw-r--r--test/DebugInfo/X86/ref_addr_relocation.ll4
-rw-r--r--test/DebugInfo/X86/reference-argument.ll47
-rw-r--r--test/DebugInfo/X86/rvalue-ref.ll8
-rw-r--r--test/DebugInfo/X86/safestack-byval.ll91
-rw-r--r--test/DebugInfo/X86/sret.ll76
-rw-r--r--test/DebugInfo/X86/sroasplit-1.ll12
-rw-r--r--test/DebugInfo/X86/sroasplit-2.ll14
-rw-r--r--test/DebugInfo/X86/sroasplit-3.ll10
-rw-r--r--test/DebugInfo/X86/sroasplit-4.ll10
-rw-r--r--test/DebugInfo/X86/sroasplit-5.ll10
-rw-r--r--test/DebugInfo/X86/stmt-list-multiple-compile-units.ll16
-rw-r--r--test/DebugInfo/X86/stmt-list.ll6
-rw-r--r--test/DebugInfo/X86/stringpool.ll2
-rw-r--r--test/DebugInfo/X86/struct-loc.ll2
-rw-r--r--test/DebugInfo/X86/subrange-type.ll8
-rw-r--r--test/DebugInfo/X86/subreg.ll8
-rw-r--r--test/DebugInfo/X86/subregisters.ll16
-rw-r--r--test/DebugInfo/X86/template.ll12
-rw-r--r--test/DebugInfo/X86/tls.ll28
-rw-r--r--test/DebugInfo/X86/type_units_with_addresses.ll2
-rw-r--r--test/DebugInfo/X86/union-const.ll8
-rw-r--r--test/DebugInfo/X86/union-template.ll10
-rw-r--r--test/DebugInfo/X86/vector.ll2
-rw-r--r--test/DebugInfo/X86/vla.ll20
-rw-r--r--test/DebugInfo/accel-table-hash-collisions.ll92
-rw-r--r--test/DebugInfo/array.ll40
-rw-r--r--test/DebugInfo/block-asan.ll87
-rw-r--r--test/DebugInfo/bug_null_debuginfo.ll8
-rw-r--r--test/DebugInfo/constant-pointers.ll51
-rw-r--r--test/DebugInfo/constant-sdnodes-have-dbg-location.ll26
-rw-r--r--test/DebugInfo/constantfp-sdnodes-have-dbg-location.ll24
-rw-r--r--test/DebugInfo/cross-cu-inlining.ll143
-rw-r--r--test/DebugInfo/cross-cu-linkonce-distinct.ll95
-rw-r--r--test/DebugInfo/cross-cu-linkonce.ll73
-rw-r--r--test/DebugInfo/cu-range-hole.ll74
-rw-r--r--test/DebugInfo/cu-ranges.ll71
-rw-r--r--test/DebugInfo/dead-argument-order.ll81
-rw-r--r--test/DebugInfo/debug-info-qualifiers.ll98
-rw-r--r--test/DebugInfo/debuginfofinder-forward-declaration.ll42
-rw-r--r--test/DebugInfo/debuginfofinder-multiple-cu.ll41
-rw-r--r--test/DebugInfo/debugmacinfo.test27
-rw-r--r--test/DebugInfo/dwarf-public-names.ll131
-rw-r--r--test/DebugInfo/dwarfdump-accel.test2
-rw-r--r--test/DebugInfo/dwarfdump-dump-flags.test3
-rw-r--r--test/DebugInfo/dwarfdump-dwp.test53
-rw-r--r--test/DebugInfo/dwarfdump-macho-relocs.test27
-rw-r--r--test/DebugInfo/dwarfdump-macho-universal.test17
-rw-r--r--test/DebugInfo/dwo.ll15
-rw-r--r--test/DebugInfo/empty.ll31
-rw-r--r--test/DebugInfo/enum-types.ll78
-rw-r--r--test/DebugInfo/enum.ll80
-rw-r--r--test/DebugInfo/global.ll42
-rw-r--r--test/DebugInfo/gvn.ll135
-rw-r--r--test/DebugInfo/incorrect-variable-debugloc.ll391
-rw-r--r--test/DebugInfo/incorrect-variable-debugloc1.ll77
-rw-r--r--test/DebugInfo/inheritance.ll154
-rw-r--r--test/DebugInfo/inline-debug-info-multiret.ll156
-rw-r--r--test/DebugInfo/inline-debug-info.ll174
-rw-r--r--test/DebugInfo/inline-no-debug-info.ll70
-rw-r--r--test/DebugInfo/inline-scopes.ll130
-rw-r--r--test/DebugInfo/inlined-arguments.ll79
-rw-r--r--test/DebugInfo/inlined-vars.ll56
-rw-r--r--test/DebugInfo/llvm-symbolizer.test6
-rw-r--r--test/DebugInfo/location-verifier.ll33
-rw-r--r--test/DebugInfo/lto-comp-dir.ll84
-rw-r--r--test/DebugInfo/member-order.ll66
-rw-r--r--test/DebugInfo/member-pointers.ll40
-rw-r--r--test/DebugInfo/missing-abstract-variable.ll182
-rw-r--r--test/DebugInfo/multiline.ll82
-rw-r--r--test/DebugInfo/namespace.ll366
-rw-r--r--test/DebugInfo/namespace_function_definition.ll44
-rw-r--r--test/DebugInfo/namespace_inline_function_definition.ll95
-rw-r--r--test/DebugInfo/nodebug.ll51
-rw-r--r--test/DebugInfo/piece-verifier.ll56
-rw-r--r--test/DebugInfo/restrict.ll53
-rw-r--r--test/DebugInfo/skeletoncu.ll17
-rw-r--r--test/DebugInfo/sugared-constants.ll82
-rw-r--r--test/DebugInfo/template-recursive-void.ll61
-rw-r--r--test/DebugInfo/tu-composite.ll184
-rw-r--r--test/DebugInfo/tu-member-pointer.ll30
-rw-r--r--test/DebugInfo/two-cus-from-same-file.ll73
-rw-r--r--test/DebugInfo/typedef.ll32
-rw-r--r--test/DebugInfo/unconditional-branch.ll65
-rw-r--r--test/DebugInfo/varargs.ll101
-rw-r--r--test/DebugInfo/version.ll32
-rw-r--r--test/Examples/Kaleidoscope/Chapter3.test17
-rw-r--r--test/Examples/Kaleidoscope/Chapter4.test17
-rw-r--r--test/Examples/Kaleidoscope/Chapter5.test19
-rw-r--r--test/Examples/Kaleidoscope/Chapter6.test15
-rw-r--r--test/Examples/Kaleidoscope/Chapter7.test15
-rw-r--r--test/Examples/lit.local.cfg1
-rw-r--r--test/ExecutionEngine/MCJIT/eh-lg-pic.ll2
-rw-r--r--test/ExecutionEngine/MCJIT/eh-sm-pic.ll2
-rw-r--r--test/ExecutionEngine/MCJIT/eh.ll2
-rw-r--r--test/ExecutionEngine/MCJIT/multi-module-eh-a.ll2
-rw-r--r--test/ExecutionEngine/OrcLazy/global_aliases.ll21
-rw-r--r--test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll2
-rw-r--r--test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll2
-rw-r--r--test/ExecutionEngine/OrcMCJIT/eh.ll2
-rw-r--r--test/ExecutionEngine/OrcMCJIT/multi-module-eh-a.ll2
-rw-r--r--test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s12
-rw-r--r--test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s7
-rw-r--r--test/ExecutionEngine/RuntimeDyld/Mips/ELF_Mips64r2N64_PIC_relocations.s10
-rw-r--r--test/ExecutionEngine/RuntimeDyld/Mips/ELF_N64R6_relocations.s54
-rw-r--r--test/ExecutionEngine/RuntimeDyld/Mips/ELF_O32_PIC_relocations.s13
-rw-r--r--test/ExecutionEngine/RuntimeDyld/PowerPC/lit.local.cfg3
-rw-r--r--test/ExecutionEngine/RuntimeDyld/PowerPC/ppc32_elf_rel_addr16.s47
-rw-r--r--test/ExecutionEngine/RuntimeDyld/X86/COFF_i386.s66
-rw-r--r--test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_6431
-rw-r--r--test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_64.s40
-rw-r--r--test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_FILE.s14
-rw-r--r--test/ExecutionEngine/RuntimeDyld/X86/ELF_x64-64_PC8_relocations.s26
-rw-r--r--test/ExecutionEngine/RuntimeDyld/X86/ELF_x86_64_StubBuf.s26
-rw-r--r--test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_FILE.s3
-rw-r--r--test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_GLOBAL.s2
-rw-r--r--test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_x86_64_StubBuf.ll12
-rw-r--r--test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s18
-rw-r--r--test/ExecutionEngine/lit.local.cfg5
-rw-r--r--test/Feature/OperandBundles/adce.ll49
-rw-r--r--test/Feature/OperandBundles/basic-aa-argmemonly.ll51
-rw-r--r--test/Feature/OperandBundles/dse.ll62
-rw-r--r--test/Feature/OperandBundles/early-cse.ll89
-rw-r--r--test/Feature/OperandBundles/function-attrs.ll33
-rw-r--r--test/Feature/OperandBundles/inliner-conservative.ll17
-rw-r--r--test/Feature/OperandBundles/merge-func.ll64
-rw-r--r--test/Feature/OperandBundles/special-state.ll21
-rw-r--r--test/Feature/alias2.ll24
-rw-r--r--test/Feature/aliases.ll18
-rw-r--r--test/Feature/callingconventions.ll7
-rw-r--r--test/Feature/comdat.ll4
-rw-r--r--test/Feature/exception.ll109
-rw-r--r--test/Feature/optnone-llc.ll10
-rw-r--r--test/Instrumentation/AddressSanitizer/basic.ll2
-rw-r--r--test/Instrumentation/AddressSanitizer/debug_info.ll14
-rw-r--r--test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll40
-rw-r--r--test/Instrumentation/AddressSanitizer/do-not-instrument-cstring.ll8
-rw-r--r--test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll19
-rw-r--r--test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll35
-rw-r--r--test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll15
-rw-r--r--test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll15
-rw-r--r--test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll4
-rw-r--r--test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll16
-rw-r--r--test/Instrumentation/AddressSanitizer/keep_going.ll14
-rw-r--r--test/Instrumentation/AddressSanitizer/localescape.ll86
-rw-r--r--test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll31
-rw-r--r--test/Instrumentation/AddressSanitizer/twice.ll8
-rw-r--r--test/Instrumentation/DataFlowSanitizer/abilist.ll4
-rw-r--r--test/Instrumentation/DataFlowSanitizer/debug.ll9
-rw-r--r--test/Instrumentation/DataFlowSanitizer/external_mask.ll14
-rw-r--r--test/Instrumentation/DataFlowSanitizer/prefix-rename.ll4
-rw-r--r--test/Instrumentation/InstrProfiling/PR23499.ll21
-rw-r--r--test/Instrumentation/InstrProfiling/linkage.ll49
-rw-r--r--test/Instrumentation/InstrProfiling/no-counters.ll4
-rw-r--r--test/Instrumentation/InstrProfiling/noruntime.ll4
-rw-r--r--test/Instrumentation/InstrProfiling/platform.ll31
-rw-r--r--test/Instrumentation/InstrProfiling/profiling.ll36
-rw-r--r--test/Instrumentation/MemorySanitizer/AArch64/vararg.ll75
-rw-r--r--test/Instrumentation/MemorySanitizer/atomics.ll24
-rw-r--r--test/Instrumentation/MemorySanitizer/check_access_address.ll4
-rw-r--r--test/Instrumentation/MemorySanitizer/msan_basic.ll201
-rw-r--r--test/Instrumentation/MemorySanitizer/mul_by_constant.ll23
-rw-r--r--test/Instrumentation/MemorySanitizer/origin-alignment.ll9
-rw-r--r--test/Instrumentation/MemorySanitizer/return_from_main.ll2
-rw-r--r--test/Instrumentation/MemorySanitizer/store-origin.ll12
-rw-r--r--test/Instrumentation/MemorySanitizer/unreachable.ll4
-rw-r--r--test/Instrumentation/MemorySanitizer/vector_cvt.ll6
-rw-r--r--test/Instrumentation/MemorySanitizer/vector_shift.ll10
-rw-r--r--test/Instrumentation/SanitizerCoverage/coverage-dbg.ll8
-rw-r--r--test/Instrumentation/SanitizerCoverage/coverage.ll4
-rw-r--r--test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll8
-rw-r--r--test/Instrumentation/SanitizerCoverage/seh.ll86
-rw-r--r--test/Instrumentation/SanitizerCoverage/switch-tracing.ll56
-rw-r--r--test/Instrumentation/ThreadSanitizer/atomic.ll2
-rw-r--r--test/JitListener/multiple.ll20
-rw-r--r--test/JitListener/simple.ll8
-rw-r--r--test/LTO/X86/Inputs/invalid.ll.bcbin332 -> 688 bytes
-rw-r--r--test/LTO/X86/bcsection.ll2
-rw-r--r--test/LTO/X86/current-section.ll1
-rw-r--r--test/LTO/X86/diagnostic-handler-noexit.ll4
-rw-r--r--test/LTO/X86/diagnostic-handler-remarks.ll9
-rw-r--r--test/LTO/X86/disable-verify.ll18
-rw-r--r--test/LTO/X86/invalid.ll2
-rw-r--r--test/LTO/X86/list-symbols.ll1
-rw-r--r--test/LTO/X86/llvm-lto-output.ll21
-rw-r--r--test/LTO/X86/parallel.ll25
-rw-r--r--test/LibDriver/thin.test9
-rw-r--r--test/Linker/2003-01-30-LinkerRename.ll4
-rw-r--r--test/Linker/2003-04-23-LinkOnceLost.ll13
-rw-r--r--test/Linker/2003-05-31-LinkerRename.ll12
-rw-r--r--test/Linker/2008-03-05-AliasReference.ll2
-rw-r--r--test/Linker/2008-07-06-AliasFnDecl.ll2
-rw-r--r--test/Linker/2008-07-06-AliasWeakDest.ll4
-rw-r--r--test/Linker/2009-09-03-mdnode.ll4
-rw-r--r--test/Linker/2009-09-03-mdnode2.ll4
-rw-r--r--test/Linker/2011-08-04-DebugLoc.ll6
-rw-r--r--test/Linker/2011-08-04-DebugLoc2.ll6
-rw-r--r--test/Linker/2011-08-04-Metadata.ll6
-rw-r--r--test/Linker/2011-08-04-Metadata2.ll6
-rw-r--r--test/Linker/2011-08-18-unique-class-type.ll8
-rw-r--r--test/Linker/2011-08-18-unique-class-type2.ll8
-rw-r--r--test/Linker/2011-08-18-unique-debug-type.ll6
-rw-r--r--test/Linker/2011-08-18-unique-debug-type2.ll6
-rw-r--r--test/Linker/ConstantGlobals.ll4
-rw-r--r--test/Linker/DbgDeclare.ll10
-rw-r--r--test/Linker/DbgDeclare2.ll12
-rw-r--r--test/Linker/Inputs/PR8300.b.ll2
-rw-r--r--test/Linker/Inputs/alias.ll4
-rw-r--r--test/Linker/Inputs/available_externally_over_decl.ll10
-rw-r--r--test/Linker/Inputs/comdat11.ll9
-rw-r--r--test/Linker/Inputs/comdat13.ll9
-rw-r--r--test/Linker/Inputs/comdat14.ll12
-rw-r--r--test/Linker/Inputs/comdat15.ll6
-rw-r--r--test/Linker/Inputs/comdat5.ll2
-rw-r--r--test/Linker/Inputs/comdat8.ll2
-rw-r--r--test/Linker/Inputs/ctors2.ll6
-rw-r--r--test/Linker/Inputs/ctors3.ll7
-rw-r--r--test/Linker/Inputs/funcimport.ll28
-rw-r--r--test/Linker/Inputs/funcimport_appending_global.ll6
-rw-r--r--test/Linker/Inputs/internalize-lazy.ll8
-rw-r--r--test/Linker/Inputs/linkage.c.ll4
-rw-r--r--test/Linker/Inputs/mdlocation.ll12
-rw-r--r--test/Linker/Inputs/only-needed-debug-metadata.ll27
-rw-r--r--test/Linker/Inputs/only-needed-named-metadata.ll9
-rw-r--r--test/Linker/Inputs/opaque.ll8
-rw-r--r--test/Linker/Inputs/replaced-function-matches-first-subprogram.ll6
-rw-r--r--test/Linker/Inputs/subprogram-linkonce-weak-odr.ll15
-rw-r--r--test/Linker/Inputs/subprogram-linkonce-weak.ll6
-rw-r--r--test/Linker/Inputs/testlink.ll4
-rw-r--r--test/Linker/Inputs/thinlto_funcimport_debug.ll38
-rw-r--r--test/Linker/Inputs/type-unique-alias.ll2
-rw-r--r--test/Linker/Inputs/type-unique-dst-types2.ll4
-rw-r--r--test/Linker/Inputs/type-unique-dst-types3.ll4
-rw-r--r--test/Linker/Inputs/type-unique-inheritance-a.ll10
-rw-r--r--test/Linker/Inputs/type-unique-inheritance-b.ll16
-rw-r--r--test/Linker/Inputs/type-unique-simple2-a.ll10
-rw-r--r--test/Linker/Inputs/type-unique-simple2-b.ll14
-rw-r--r--test/Linker/Inputs/visibility.ll6
-rw-r--r--test/Linker/alias.ll39
-rw-r--r--test/Linker/available_externally_over_decl.ll15
-rw-r--r--test/Linker/comdat11.ll13
-rw-r--r--test/Linker/comdat12.ll8
-rw-r--r--test/Linker/comdat13.ll30
-rw-r--r--test/Linker/comdat14.ll9
-rw-r--r--test/Linker/comdat15.ll9
-rw-r--r--test/Linker/comdat6.ll2
-rw-r--r--test/Linker/comdat8.ll2
-rw-r--r--test/Linker/comdat9.ll7
-rw-r--r--test/Linker/comdat_group.ll18
-rw-r--r--test/Linker/constructor-comdat.ll4
-rw-r--r--test/Linker/ctors.ll3
-rw-r--r--test/Linker/ctors2.ll7
-rw-r--r--test/Linker/ctors3.ll8
-rw-r--r--test/Linker/ctors4.ll14
-rw-r--r--test/Linker/ctors5.ll8
-rw-r--r--test/Linker/debug-info-version-a.ll2
-rw-r--r--test/Linker/debug-info-version-b.ll2
-rw-r--r--test/Linker/distinct.ll2
-rw-r--r--test/Linker/drop-debug.ll2
-rw-r--r--test/Linker/funcimport.ll195
-rw-r--r--test/Linker/funcimport_appending_global.ll20
-rw-r--r--test/Linker/global_ctors.ll5
-rw-r--r--test/Linker/internalize-lazy.ll4
-rw-r--r--test/Linker/link-flags.ll19
-rw-r--r--test/Linker/mdlocation.ll36
-rw-r--r--test/Linker/only-needed-debug-metadata.ll49
-rw-r--r--test/Linker/only-needed-named-metadata.ll65
-rw-r--r--test/Linker/opaque.ll4
-rw-r--r--test/Linker/override-with-internal-linkage.ll4
-rw-r--r--test/Linker/pr21494.ll4
-rw-r--r--test/Linker/prologuedata.ll10
-rw-r--r--test/Linker/replaced-function-matches-first-subprogram.ll27
-rw-r--r--test/Linker/subprogram-linkonce-weak-odr.ll177
-rw-r--r--test/Linker/subprogram-linkonce-weak.ll53
-rw-r--r--test/Linker/testlink.ll11
-rw-r--r--test/Linker/thinlto_funcimport_debug.ll80
-rw-r--r--test/Linker/type-unique-alias.ll4
-rw-r--r--test/Linker/type-unique-dst-types.ll4
-rw-r--r--test/Linker/type-unique-odr-a.ll16
-rw-r--r--test/Linker/type-unique-odr-b.ll16
-rw-r--r--test/Linker/type-unique-simple-a.ll12
-rw-r--r--test/Linker/type-unique-simple-b.ll14
-rw-r--r--test/Linker/type-unique-simple2-a.ll23
-rw-r--r--test/Linker/type-unique-simple2-b.ll14
-rw-r--r--test/Linker/type-unique-simple2.ll1
-rw-r--r--test/Linker/type-unique-src-type.ll4
-rw-r--r--test/Linker/type-unique-type-array-a.ll24
-rw-r--r--test/Linker/type-unique-type-array-b.ll18
-rw-r--r--test/Linker/uniqued-distinct-cycles.ll14
-rw-r--r--test/Linker/unnamed-addr1-a.ll18
-rw-r--r--test/Linker/unnamed-addr1-b.ll8
-rw-r--r--test/Linker/visibility.ll12
-rw-r--r--test/Linker/weakextern.ll14
-rw-r--r--test/MC/AArch64/arm64-advsimd.s153
-rw-r--r--test/MC/AArch64/arm64-diags.s63
-rw-r--r--test/MC/AArch64/arm64-fp-encoding.s448
-rw-r--r--test/MC/AArch64/arm64-leaf-compact-unwind.s1
-rw-r--r--test/MC/AArch64/arm64-small-data-fixups.s27
-rw-r--r--test/MC/AArch64/armv8.1a-pan.s10
-rw-r--r--test/MC/AArch64/armv8.1a-rdma.s18
-rw-r--r--test/MC/AArch64/armv8.2a-at.s9
-rw-r--r--test/MC/AArch64/armv8.2a-mmfr2.s6
-rw-r--r--test/MC/AArch64/armv8.2a-persistent-memory.s6
-rw-r--r--test/MC/AArch64/armv8.2a-statistical-profiling.s87
-rw-r--r--test/MC/AArch64/armv8.2a-uao.s17
-rw-r--r--test/MC/AArch64/basic-a64-diagnostics.s4
-rw-r--r--test/MC/AArch64/elf_osabi_flags.s9
-rw-r--r--test/MC/AArch64/error-location-ldr-pseudo.s5
-rw-r--r--test/MC/AArch64/error-location.s49
-rw-r--r--test/MC/AArch64/fullfp16-diagnostics.s82
-rw-r--r--test/MC/AArch64/fullfp16-neon-neg.s382
-rw-r--r--test/MC/AArch64/ldr-pseudo-diagnostics.s18
-rw-r--r--test/MC/AArch64/neon-2velem.s18
-rw-r--r--test/MC/AArch64/neon-aba-abd.s4
-rw-r--r--test/MC/AArch64/neon-across.s18
-rw-r--r--test/MC/AArch64/neon-add-pairwise.s6
-rw-r--r--test/MC/AArch64/neon-add-sub-instructions.s10
-rw-r--r--test/MC/AArch64/neon-compare-instructions.s62
-rw-r--r--test/MC/AArch64/neon-diagnostics.s908
-rw-r--r--test/MC/AArch64/neon-facge-facgt.s18
-rw-r--r--test/MC/AArch64/neon-frsqrt-frecp.s10
-rw-r--r--test/MC/AArch64/neon-max-min-pairwise.s18
-rw-r--r--test/MC/AArch64/neon-max-min.s18
-rw-r--r--test/MC/AArch64/neon-mla-mls-instructions.s10
-rw-r--r--test/MC/AArch64/neon-scalar-abs.s4
-rw-r--r--test/MC/AArch64/neon-scalar-by-elem-mla.s6
-rw-r--r--test/MC/AArch64/neon-scalar-by-elem-mul.s6
-rw-r--r--test/MC/AArch64/neon-scalar-cvt.s34
-rw-r--r--test/MC/AArch64/neon-scalar-fp-compare.s32
-rw-r--r--test/MC/AArch64/neon-scalar-mul.s4
-rw-r--r--test/MC/AArch64/neon-scalar-recip.s12
-rw-r--r--test/MC/AArch64/neon-scalar-reduce-pairwise.s7
-rw-r--r--test/MC/AArch64/neon-simd-misc.s98
-rw-r--r--test/MC/AArch64/neon-simd-shift.s18
-rw-r--r--test/MC/AArch64/noneon-diagnostics.s15
-rw-r--r--test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s7
-rw-r--r--test/MC/AMDGPU/flat-scratch.s33
-rw-r--r--test/MC/AMDGPU/flat.s516
-rw-r--r--test/MC/AMDGPU/hsa-text.s34
-rw-r--r--test/MC/AMDGPU/hsa.s31
-rw-r--r--test/MC/AMDGPU/mubuf.s224
-rw-r--r--test/MC/AMDGPU/out-of-range-registers.s62
-rw-r--r--test/MC/AMDGPU/smem.s11
-rw-r--r--test/MC/AMDGPU/smrd-err.s15
-rw-r--r--test/MC/AMDGPU/smrd.s61
-rw-r--r--test/MC/AMDGPU/sop1-err.s50
-rw-r--r--test/MC/AMDGPU/sop1.s17
-rw-r--r--test/MC/AMDGPU/sop2.s3
-rw-r--r--test/MC/AMDGPU/vop1.s19
-rw-r--r--test/MC/AMDGPU/vop2-err.s27
-rw-r--r--test/MC/AMDGPU/vop2.s123
-rw-r--r--test/MC/AMDGPU/vop3-vop1-nosrc.s14
-rw-r--r--test/MC/AMDGPU/vop3.s44
-rw-r--r--test/MC/AMDGPU/vopc-errs.s8
-rw-r--r--test/MC/AMDGPU/vopc.s21
-rw-r--r--test/MC/ARM/Windows/invalid-relocation.s2
-rw-r--r--test/MC/ARM/arm-elf-relocation-diagnostics.s29
-rw-r--r--test/MC/ARM/arm-thumb-trustzone.s1
-rw-r--r--test/MC/ARM/arm-trustzone.s3
-rw-r--r--test/MC/ARM/arm11-hint-instr.s23
-rw-r--r--test/MC/ARM/basic-arm-instructions-v8.1a.s2
-rw-r--r--test/MC/ARM/basic-arm-instructions.s11
-rw-r--r--test/MC/ARM/basic-thumb2-instructions-v8.s23
-rw-r--r--test/MC/ARM/big-endian-thumb2-fixup.s4
-rw-r--r--test/MC/ARM/coff-debugging-secrel.ll9
-rw-r--r--test/MC/ARM/data-in-code.ll16
-rw-r--r--test/MC/ARM/diagnostics.s64
-rw-r--r--test/MC/ARM/directive-arch-armv6j.s34
-rw-r--r--test/MC/ARM/directive-arch-armv6z.s4
-rw-r--r--test/MC/ARM/directive-arch-armv6zk.s38
-rw-r--r--test/MC/ARM/directive-arch-armv8.2-a.s46
-rw-r--r--test/MC/ARM/directive-arch-semantic-action.s4
-rw-r--r--test/MC/ARM/directive-arch_extension-sec.s13
-rw-r--r--test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s2
-rw-r--r--test/MC/ARM/dwarf-asm-multiple-sections.s23
-rw-r--r--test/MC/ARM/dwarf-asm-nonstandard-section.s2
-rw-r--r--test/MC/ARM/dwarf-asm-single-section.s4
-rw-r--r--test/MC/ARM/eh-compact-pr0.s4
-rw-r--r--test/MC/ARM/eh-compact-pr1.s2
-rw-r--r--test/MC/ARM/eh-directive-handlerdata.s4
-rw-r--r--test/MC/ARM/eh-directive-personalityindex.s12
-rw-r--r--test/MC/ARM/eh-directive-section-comdat.s16
-rw-r--r--test/MC/ARM/eh-directive-section-multiple-func.s4
-rw-r--r--test/MC/ARM/eh-directive-section.s8
-rw-r--r--test/MC/ARM/eh-directive-text-section.s2
-rw-r--r--test/MC/ARM/eh-link.s12
-rw-r--r--test/MC/ARM/error-location-ldr-pseudo.s5
-rw-r--r--test/MC/ARM/error-location.s49
-rw-r--r--test/MC/ARM/fullfp16-neon-neg.s289
-rw-r--r--test/MC/ARM/fullfp16-neon.s404
-rw-r--r--test/MC/ARM/neon-vcvt-fp16.s18
-rw-r--r--test/MC/ARM/thumb-branches.s25
-rw-r--r--test/MC/ARM/thumb-shift-encoding.s16
-rw-r--r--test/MC/ARM/thumb1-relax.s35
-rw-r--r--test/MC/ARM/thumb2-diagnostics.s19
-rw-r--r--test/MC/ARM/v7k-dsp.s4
-rw-r--r--test/MC/AsmParser/dot-symbol-non-absolute.s2
-rw-r--r--test/MC/AsmParser/expr-shr.s5
-rw-r--r--test/MC/AsmParser/exprs-invalid.s3
-rw-r--r--test/MC/AsmParser/exprs.s2
-rw-r--r--test/MC/AsmParser/macros-darwin-vararg.s2
-rw-r--r--test/MC/AsmParser/reassign.s12
-rw-r--r--test/MC/AsmParser/undefined-local-symbol.s8
-rw-r--r--test/MC/AsmParser/vararg.s2
-rw-r--r--test/MC/COFF/ARM/directive-type-diagnostics.s10
-rw-r--r--test/MC/COFF/alias.s4
-rw-r--r--test/MC/COFF/bad-expr.s2
-rw-r--r--test/MC/COFF/basic-coff-64.s4
-rw-r--r--test/MC/COFF/basic-coff.s4
-rw-r--r--test/MC/COFF/invalid-def.s5
-rw-r--r--test/MC/COFF/invalid-endef.s5
-rw-r--r--test/MC/COFF/invalid-scl-range.s3
-rw-r--r--test/MC/COFF/invalid-scl.s5
-rw-r--r--test/MC/COFF/invalid-type.s5
-rw-r--r--test/MC/COFF/label-undefined.s6
-rw-r--r--test/MC/COFF/secidx-diagnostic.s2
-rw-r--r--test/MC/COFF/simple-fixups.s4
-rw-r--r--test/MC/COFF/stdin.s3
-rw-r--r--test/MC/COFF/symbol-fragment-offset-64.s4
-rw-r--r--test/MC/COFF/symbol-fragment-offset.s4
-rw-r--r--test/MC/COFF/temporary-alias.s21
-rw-r--r--test/MC/COFF/timestamp.s6
-rw-r--r--test/MC/Disassembler/AArch64/arm64-scalar-fp.txt69
-rw-r--r--test/MC/Disassembler/AArch64/armv8.1a-pan.txt2
-rw-r--r--test/MC/Disassembler/AArch64/armv8.2a-at.txt9
-rw-r--r--test/MC/Disassembler/AArch64/armv8.2a-mmfr2.txt4
-rw-r--r--test/MC/Disassembler/AArch64/armv8.2a-persistent-memory.txt6
-rw-r--r--test/MC/Disassembler/AArch64/armv8.2a-statistical-profiling.txt87
-rw-r--r--test/MC/Disassembler/AArch64/armv8.2a-uao.txt19
-rw-r--r--test/MC/Disassembler/AArch64/basic-a64-instructions.txt116
-rw-r--r--test/MC/Disassembler/AArch64/fullfp16-neg.txt145
-rw-r--r--test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt382
-rw-r--r--test/MC/Disassembler/ARM/fullfp16-neon-arm-neg.txt274
-rw-r--r--test/MC/Disassembler/ARM/fullfp16-neon-arm.txt309
-rw-r--r--test/MC/Disassembler/ARM/fullfp16-neon-thumb-neg.txt274
-rw-r--r--test/MC/Disassembler/ARM/fullfp16-neon-thumb.txt309
-rw-r--r--test/MC/Disassembler/ARM/invalid-thumbv7.txt23
-rw-r--r--test/MC/Disassembler/ARM/thumb-v8.txt10
-rw-r--r--test/MC/Disassembler/Hexagon/invalid_packet.txt4
-rw-r--r--test/MC/Disassembler/Hexagon/j.txt148
-rw-r--r--test/MC/Disassembler/Hexagon/ld.txt90
-rw-r--r--test/MC/Disassembler/Hexagon/lit.local.cfg6
-rw-r--r--test/MC/Disassembler/Hexagon/nv_j.txt88
-rw-r--r--test/MC/Disassembler/Hexagon/nv_st.txt127
-rw-r--r--test/MC/Disassembler/Hexagon/st.txt84
-rw-r--r--test/MC/Disassembler/Hexagon/too_many_instructions.txt4
-rw-r--r--test/MC/Disassembler/Hexagon/too_many_loop_ends.txt4
-rw-r--r--test/MC/Disassembler/Hexagon/unextendable.txt9
-rw-r--r--test/MC/Disassembler/Mips/dsp/valid-el.txt12
-rw-r--r--test/MC/Disassembler/Mips/dsp/valid.txt125
-rw-r--r--test/MC/Disassembler/Mips/dspr2/valid.txt173
-rw-r--r--test/MC/Disassembler/Mips/eva/valid_R6-eva.txt38
-rw-r--r--test/MC/Disassembler/Mips/eva/valid_preR6-eva.txt54
-rw-r--r--test/MC/Disassembler/Mips/micromips-dsp/valid.txt103
-rw-r--r--test/MC/Disassembler/Mips/micromips-dspr2/valid.txt125
-rw-r--r--test/MC/Disassembler/Mips/micromips.txt342
-rw-r--r--test/MC/Disassembler/Mips/micromips32r3/invalid.txt4
-rw-r--r--test/MC/Disassembler/Mips/micromips32r3/valid-el.txt191
-rw-r--r--test/MC/Disassembler/Mips/micromips32r3/valid.txt191
-rw-r--r--test/MC/Disassembler/Mips/micromips32r6.txt114
-rw-r--r--test/MC/Disassembler/Mips/micromips32r6/valid.txt258
-rw-r--r--test/MC/Disassembler/Mips/micromips64r6/valid.txt171
-rw-r--r--test/MC/Disassembler/Mips/micromips_le.txt342
-rw-r--r--test/MC/Disassembler/Mips/mips-dsp.txt22
-rw-r--r--test/MC/Disassembler/Mips/mips1/invalid-xfail.txt11
-rw-r--r--test/MC/Disassembler/Mips/mips1/invalid.txt45
-rw-r--r--test/MC/Disassembler/Mips/mips1/valid-mips1-el.txt2
-rw-r--r--test/MC/Disassembler/Mips/mips1/valid-mips1.txt5
-rw-r--r--test/MC/Disassembler/Mips/mips1/valid-xfail.txt8
-rw-r--r--test/MC/Disassembler/Mips/mips2/invalid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips2/valid-mips2-el.txt2
-rw-r--r--test/MC/Disassembler/Mips/mips2/valid-mips2.txt20
-rw-r--r--test/MC/Disassembler/Mips/mips2/valid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips3/invalid-xfail.txt14
-rw-r--r--test/MC/Disassembler/Mips/mips3/valid-mips3-el.txt2
-rw-r--r--test/MC/Disassembler/Mips/mips3/valid-mips3.txt25
-rw-r--r--test/MC/Disassembler/Mips/mips3/valid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips32/invalid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt11
-rw-r--r--test/MC/Disassembler/Mips/mips32/valid-mips32.txt180
-rw-r--r--test/MC/Disassembler/Mips/mips32/valid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips32_le.txt450
-rw-r--r--test/MC/Disassembler/Mips/mips32r2/invalid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-el.txt2
-rw-r--r--test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt195
-rw-r--r--test/MC/Disassembler/Mips/mips32r2/valid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips32r2_le.txt459
-rw-r--r--test/MC/Disassembler/Mips/mips32r3/invalid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips32r3/valid-mips32r3.txt197
-rw-r--r--test/MC/Disassembler/Mips/mips32r3/valid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips32r5/invalid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips32r5/valid-mips32r5.txt198
-rw-r--r--test/MC/Disassembler/Mips/mips32r5/valid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt2
-rw-r--r--test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt3
-rw-r--r--test/MC/Disassembler/Mips/mips4/invalid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips4/valid-mips4-el.txt2
-rw-r--r--test/MC/Disassembler/Mips/mips4/valid-mips4.txt29
-rw-r--r--test/MC/Disassembler/Mips/mips4/valid-xfail.txt16
-rw-r--r--test/MC/Disassembler/Mips/mips64/invalid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt30
-rw-r--r--test/MC/Disassembler/Mips/mips64/valid-mips64.txt193
-rw-r--r--test/MC/Disassembler/Mips/mips64/valid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips64_le.txt84
-rw-r--r--test/MC/Disassembler/Mips/mips64r2/invalid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt32
-rw-r--r--test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt212
-rw-r--r--test/MC/Disassembler/Mips/mips64r2/valid-xfail.txt14
-rw-r--r--test/MC/Disassembler/Mips/mips64r2_le.txt90
-rw-r--r--test/MC/Disassembler/Mips/mips64r3/invalid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips64r3/valid-mips64r3-el.txt2
-rw-r--r--test/MC/Disassembler/Mips/mips64r3/valid-mips64r3.txt243
-rw-r--r--test/MC/Disassembler/Mips/mips64r3/valid-xfail.txt14
-rw-r--r--test/MC/Disassembler/Mips/mips64r5/invalid-xfail.txt13
-rw-r--r--test/MC/Disassembler/Mips/mips64r5/valid-mips64r5-el.txt2
-rw-r--r--test/MC/Disassembler/Mips/mips64r5/valid-mips64r5.txt244
-rw-r--r--test/MC/Disassembler/Mips/mips64r5/valid-xfail.txt14
-rw-r--r--test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt6
-rw-r--r--test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt7
-rw-r--r--test/MC/Disassembler/Mips/msa/test_elm.txt1
-rw-r--r--test/MC/Disassembler/Mips/msa/test_elm_msa64.txt5
-rw-r--r--test/MC/Disassembler/PowerPC/ppc64-encoding.txt4
-rw-r--r--test/MC/Disassembler/PowerPC/ppc64le-encoding.txt4
-rw-r--r--test/MC/Disassembler/PowerPC/vsx.txt12
-rw-r--r--test/MC/Disassembler/Sparc/sparc-mem.txt24
-rw-r--r--test/MC/Disassembler/Sparc/sparc-v9.txt4
-rw-r--r--test/MC/Disassembler/SystemZ/insns.txt74
-rw-r--r--test/MC/Disassembler/X86/x86-64.txt111
-rw-r--r--test/MC/ELF/ARM/directive-type-diagnostics.s10
-rw-r--r--test/MC/ELF/align-zero.s4
-rw-r--r--test/MC/ELF/align.s14
-rw-r--r--test/MC/ELF/cfi-adjust-cfa-offset.s2
-rw-r--r--test/MC/ELF/cfi-advance-loc2.s2
-rw-r--r--test/MC/ELF/cfi-def-cfa-offset.s2
-rw-r--r--test/MC/ELF/cfi-def-cfa-register.s2
-rw-r--r--test/MC/ELF/cfi-def-cfa.s2
-rw-r--r--test/MC/ELF/cfi-escape.s2
-rw-r--r--test/MC/ELF/cfi-large-model.s2
-rw-r--r--test/MC/ELF/cfi-offset.s2
-rw-r--r--test/MC/ELF/cfi-register.s2
-rw-r--r--test/MC/ELF/cfi-rel-offset.s2
-rw-r--r--test/MC/ELF/cfi-rel-offset2.s2
-rw-r--r--test/MC/ELF/cfi-remember.s2
-rw-r--r--test/MC/ELF/cfi-restore.s2
-rw-r--r--test/MC/ELF/cfi-same-value.s2
-rw-r--r--test/MC/ELF/cfi-signal-frame.s2
-rw-r--r--test/MC/ELF/cfi-undefined.s2
-rw-r--r--test/MC/ELF/cfi-version.ll8
-rw-r--r--test/MC/ELF/cfi-window-save.s2
-rw-r--r--test/MC/ELF/cfi-zero-addr-delta.s2
-rw-r--r--test/MC/ELF/cfi.s2
-rw-r--r--test/MC/ELF/comdat-dup-group-name.s12
-rw-r--r--test/MC/ELF/comdat-reloc.s6
-rw-r--r--test/MC/ELF/comdat.s14
-rw-r--r--test/MC/ELF/common-error1.s2
-rw-r--r--test/MC/ELF/common-error2.s2
-rw-r--r--test/MC/ELF/common2.s7
-rw-r--r--test/MC/ELF/debug-loc.s2
-rw-r--r--test/MC/ELF/div-by-zero.s6
-rw-r--r--test/MC/ELF/dot-symbol-assignment.s5
-rw-r--r--test/MC/ELF/empty-twice.ll6
-rw-r--r--test/MC/ELF/empty.s36
-rw-r--r--test/MC/ELF/many-sections-2.s3
-rw-r--r--test/MC/ELF/many-sections-3.s2
-rw-r--r--test/MC/ELF/many-sections.s3
-rw-r--r--test/MC/ELF/popsection.s4
-rw-r--r--test/MC/ELF/relax-arith.s32
-rw-r--r--test/MC/ELF/relocation-386.s5
-rw-r--r--test/MC/ELF/relocation-pc.s8
-rw-r--r--test/MC/ELF/relocation.s8
-rw-r--r--test/MC/ELF/section-sym.s18
-rw-r--r--test/MC/ELF/section-unique.s4
-rw-r--r--test/MC/ELF/section.s2
-rw-r--r--test/MC/ELF/sleb.s16
-rw-r--r--test/MC/ELF/strtab-suffix-opt.s6
-rw-r--r--test/MC/ELF/uleb.s16
-rw-r--r--test/MC/Hexagon/asmMap.s608
-rw-r--r--test/MC/Hexagon/capitalizedEndloop.s29
-rw-r--r--test/MC/Hexagon/dcfetch.s15
-rw-r--r--test/MC/Hexagon/empty_asm.s15
-rw-r--r--test/MC/Hexagon/endloop.s19
-rw-r--r--test/MC/Hexagon/got.s11
-rw-r--r--test/MC/Hexagon/inst_and64.ll2
-rw-r--r--test/MC/Hexagon/inst_or64.ll2
-rw-r--r--test/MC/Hexagon/inst_xor64.ll2
-rw-r--r--test/MC/Hexagon/instructions/alu32_alu.s84
-rw-r--r--test/MC/Hexagon/instructions/alu32_perm.s40
-rw-r--r--test/MC/Hexagon/instructions/alu32_pred.s222
-rw-r--r--test/MC/Hexagon/instructions/cr.s78
-rw-r--r--test/MC/Hexagon/instructions/j.s206
-rw-r--r--test/MC/Hexagon/instructions/jr.s38
-rw-r--r--test/MC/Hexagon/instructions/ld.s493
-rw-r--r--test/MC/Hexagon/instructions/memop.s56
-rw-r--r--test/MC/Hexagon/instructions/nv_j.s180
-rw-r--r--test/MC/Hexagon/instructions/nv_st.s290
-rw-r--r--test/MC/Hexagon/instructions/st.s434
-rw-r--r--test/MC/Hexagon/instructions/system_user.s26
-rw-r--r--test/MC/Hexagon/instructions/xtype_alu.s395
-rw-r--r--test/MC/Hexagon/instructions/xtype_bit.s118
-rw-r--r--test/MC/Hexagon/instructions/xtype_complex.s128
-rw-r--r--test/MC/Hexagon/instructions/xtype_fp.s146
-rw-r--r--test/MC/Hexagon/instructions/xtype_mpy.s400
-rw-r--r--test/MC/Hexagon/instructions/xtype_perm.s104
-rw-r--r--test/MC/Hexagon/instructions/xtype_pred.s136
-rw-r--r--test/MC/Hexagon/instructions/xtype_shift.s260
-rw-r--r--test/MC/Hexagon/jumpdoublepound.s13
-rw-r--r--test/MC/Hexagon/labels.s26
-rw-r--r--test/MC/Hexagon/new-value-check.s72
-rw-r--r--test/MC/Hexagon/out_of_range.s10
-rw-r--r--test/MC/Hexagon/pcrel.s11
-rw-r--r--test/MC/Hexagon/relaxed_newvalue.s10
-rw-r--r--test/MC/Hexagon/test.s4
-rw-r--r--test/MC/Hexagon/two_ext.s12
-rw-r--r--test/MC/Hexagon/v60-alu.s312
-rw-r--r--test/MC/Hexagon/v60-permute.s51
-rw-r--r--test/MC/Hexagon/v60-shift.s39
-rw-r--r--test/MC/Hexagon/v60-vcmp.s84
-rw-r--r--test/MC/Hexagon/v60-vmem.s424
-rw-r--r--test/MC/Hexagon/v60-vmpy-acc.s123
-rw-r--r--test/MC/Hexagon/v60-vmpy1.s138
-rw-r--r--test/MC/Hexagon/v60lookup.s14
-rw-r--r--test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s11
-rw-r--r--test/MC/MachO/AArch64/reloc-errors.s10
-rw-r--r--test/MC/MachO/ARM/bad-darwin-ARM-reloc.s6
-rw-r--r--test/MC/MachO/ARM/compact-unwind-armv7k.s124
-rw-r--r--test/MC/MachO/ARM/darwin-ARM-reloc.s315
-rw-r--r--test/MC/MachO/ARM/darwin-Thumb-reloc.s241
-rw-r--r--test/MC/MachO/ARM/data-in-code.s53
-rw-r--r--test/MC/MachO/ARM/empty-function-nop.ll24
-rw-r--r--test/MC/MachO/ARM/ios-version-min-load-command.s18
-rw-r--r--test/MC/MachO/ARM/long-call-branch-island-relocation.s22
-rw-r--r--test/MC/MachO/ARM/no-subsections-reloc.s8
-rw-r--r--test/MC/MachO/ARM/nop-armv4-padding.s7
-rw-r--r--test/MC/MachO/ARM/nop-armv6t2-padding.s7
-rw-r--r--test/MC/MachO/ARM/nop-thumb-padding.s7
-rw-r--r--test/MC/MachO/ARM/nop-thumb2-padding.s7
-rw-r--r--test/MC/MachO/ARM/relax-thumb-ldr-literal.s15
-rw-r--r--test/MC/MachO/ARM/relax-thumb2-branches.s39
-rw-r--r--test/MC/MachO/ARM/thumb-bl-jbits.s12
-rw-r--r--test/MC/MachO/ARM/thumb2-function-relative-load.s6
-rw-r--r--test/MC/MachO/ARM/thumb2-movt-fixup.s32
-rw-r--r--test/MC/MachO/ARM/thumb2-movw-fixup.s90
-rw-r--r--test/MC/MachO/ARM/tvos-version-min-load-command.s13
-rw-r--r--test/MC/MachO/ARM/version-min-diagnostics.s40
-rw-r--r--test/MC/MachO/ARM/version-min-diagnostics2.s34
-rw-r--r--test/MC/MachO/ARM/version-min.s16
-rw-r--r--test/MC/MachO/ARM/watchos-version-min-load-command.s13
-rw-r--r--test/MC/MachO/PowerPC/coal-sections-powerpc.s46
-rw-r--r--test/MC/MachO/PowerPC/lit.local.cfg2
-rw-r--r--test/MC/MachO/absolute.s297
-rw-r--r--test/MC/MachO/absolutize.s290
-rw-r--r--test/MC/MachO/bad-darwin-x86_64-diff-relocs.s10
-rw-r--r--test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s6
-rw-r--r--test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s6
-rw-r--r--test/MC/MachO/coal-sections-x86_64.s48
-rw-r--r--test/MC/MachO/comm-1.s223
-rw-r--r--test/MC/MachO/cstexpr-gotpcrel-64.ll12
-rw-r--r--test/MC/MachO/darwin-complex-difference.s228
-rw-r--r--test/MC/MachO/darwin-version-min-load-command.s28
-rw-r--r--test/MC/MachO/darwin-x86_64-diff-reloc-assign.s16
-rw-r--r--test/MC/MachO/darwin-x86_64-diff-relocs.s400
-rw-r--r--test/MC/MachO/darwin-x86_64-nobase-relocs.s98
-rw-r--r--test/MC/MachO/darwin-x86_64-reloc-offsets.s404
-rw-r--r--test/MC/MachO/data.s115
-rw-r--r--test/MC/MachO/debug_frame.s52
-rw-r--r--test/MC/MachO/diff-with-two-sections.s135
-rw-r--r--test/MC/MachO/direction_labels.s169
-rw-r--r--test/MC/MachO/empty-twice.ll12
-rw-r--r--test/MC/MachO/file.s2
-rw-r--r--test/MC/MachO/gen-dwarf.s4
-rw-r--r--test/MC/MachO/i386-large-relocations.s24
-rw-r--r--test/MC/MachO/indirect-symbols.s365
-rw-r--r--test/MC/MachO/jcc.s98
-rw-r--r--test/MC/MachO/lcomm-attributes.s253
-rw-r--r--test/MC/MachO/linker-option-2.s37
-rw-r--r--test/MC/MachO/linker-options.ll47
-rw-r--r--test/MC/MachO/loc.s50
-rw-r--r--test/MC/MachO/osx-version-min-load-command.s18
-rw-r--r--test/MC/MachO/pcrel-to-other-section.s216
-rw-r--r--test/MC/MachO/relax-jumps.s15
-rw-r--r--test/MC/MachO/relax-recompute-align.s35
-rw-r--r--test/MC/MachO/reloc-diff.s41
-rw-r--r--test/MC/MachO/reloc-pcrel-offset.s33
-rw-r--r--test/MC/MachO/reloc-pcrel.s53
-rw-r--r--test/MC/MachO/section-align-1.s164
-rw-r--r--test/MC/MachO/section-align-2.s257
-rw-r--r--test/MC/MachO/section-attributes.s9
-rw-r--r--test/MC/MachO/section-flags.s53
-rw-r--r--test/MC/MachO/string-table.s201
-rw-r--r--test/MC/MachO/symbol-diff.s241
-rw-r--r--test/MC/MachO/symbol-flags.s628
-rw-r--r--test/MC/MachO/symbol-indirect.s444
-rw-r--r--test/MC/MachO/symbols-1.s620
-rw-r--r--test/MC/MachO/tbss.s228
-rw-r--r--test/MC/MachO/tdata.s211
-rw-r--r--test/MC/MachO/temp-labels.s50
-rw-r--r--test/MC/MachO/thread_init_func.s132
-rw-r--r--test/MC/MachO/tls.s468
-rw-r--r--test/MC/MachO/tlv-bss.ll15
-rw-r--r--test/MC/MachO/tlv-reloc.s321
-rw-r--r--test/MC/MachO/tlv.s213
-rw-r--r--test/MC/MachO/values.s247
-rw-r--r--test/MC/MachO/variable-exprs.s814
-rw-r--r--test/MC/MachO/weakdef.s266
-rw-r--r--test/MC/MachO/x86-data-in-code.ll5
-rw-r--r--test/MC/MachO/x86_32-optimal_nop.s229
-rw-r--r--test/MC/MachO/x86_32-scattered-reloc-fallback.s8
-rw-r--r--test/MC/MachO/x86_32-sections.s1190
-rw-r--r--test/MC/MachO/x86_32-symbols.s2004
-rw-r--r--test/MC/MachO/x86_64-reloc-arithmetic.s42
-rw-r--r--test/MC/MachO/x86_64-sections.s1160
-rw-r--r--test/MC/MachO/zerofill-1.s234
-rw-r--r--test/MC/MachO/zerofill-2.s201
-rw-r--r--test/MC/MachO/zerofill-3.s253
-rw-r--r--test/MC/MachO/zerofill-4.s104
-rw-r--r--test/MC/MachO/zerofill-5.s209
-rw-r--r--test/MC/MachO/zerofill-sect-align.s32
-rw-r--r--test/MC/Mips/branch-pseudos-bad.s17
-rw-r--r--test/MC/Mips/branch-pseudos.s180
-rw-r--r--test/MC/Mips/cnmips/invalid.s15
-rw-r--r--test/MC/Mips/cprestore-bad.s23
-rw-r--r--test/MC/Mips/cprestore-noreorder.s97
-rw-r--r--test/MC/Mips/cprestore-reorder.s98
-rw-r--r--test/MC/Mips/cprestore-warning-unused.s10
-rw-r--r--test/MC/Mips/cpsetup.s158
-rw-r--r--test/MC/Mips/directive-ent.s50
-rw-r--r--test/MC/Mips/dsp/invalid.s25
-rw-r--r--test/MC/Mips/dsp/valid.s131
-rw-r--r--test/MC/Mips/dspr2/invalid.s20
-rw-r--r--test/MC/Mips/dspr2/valid.s179
-rw-r--r--test/MC/Mips/elf_basic.s2
-rw-r--r--test/MC/Mips/eva/invalid-noeva-wrong-error.s69
-rw-r--r--test/MC/Mips/eva/invalid-noeva.s22
-rw-r--r--test/MC/Mips/eva/invalid.s11
-rw-r--r--test/MC/Mips/eva/invalid_R6.s20
-rw-r--r--test/MC/Mips/eva/valid_R6.s47
-rw-r--r--test/MC/Mips/eva/valid_preR6.s62
-rw-r--r--test/MC/Mips/expansion-jal-sym-pic.s183
-rw-r--r--test/MC/Mips/instalias-imm-expanding.s273
-rw-r--r--test/MC/Mips/macro-bcc-imm-bad.s12
-rw-r--r--test/MC/Mips/macro-bcc-imm.s69
-rw-r--r--test/MC/Mips/macro-ddiv-bad.s18
-rw-r--r--test/MC/Mips/macro-ddiv.s85
-rw-r--r--test/MC/Mips/macro-ddivu-bad.s18
-rw-r--r--test/MC/Mips/macro-ddivu.s59
-rw-r--r--test/MC/Mips/macro-div-bad.s18
-rw-r--r--test/MC/Mips/macro-div.s64
-rw-r--r--test/MC/Mips/macro-divu-bad.s18
-rw-r--r--test/MC/Mips/macro-divu.s49
-rw-r--r--test/MC/Mips/macro-dla.s707
-rw-r--r--test/MC/Mips/macro-dli.s534
-rw-r--r--test/MC/Mips/macro-la-bad.s24
-rw-r--r--test/MC/Mips/macro-la.s40
-rw-r--r--test/MC/Mips/micromips-control-instructions.s43
-rw-r--r--test/MC/Mips/micromips-diagnostic-fixup.s7
-rw-r--r--test/MC/Mips/micromips-dsp/invalid-wrong-error.s7
-rw-r--r--test/MC/Mips/micromips-dsp/invalid.s23
-rw-r--r--test/MC/Mips/micromips-dsp/valid.s105
-rw-r--r--test/MC/Mips/micromips-dspr2/invalid.s9
-rw-r--r--test/MC/Mips/micromips-dspr2/valid.s127
-rw-r--r--test/MC/Mips/micromips-invalid.s27
-rw-r--r--test/MC/Mips/micromips-loadstore-instructions.s24
-rw-r--r--test/MC/Mips/micromips-pc16-fixup.s2
-rw-r--r--test/MC/Mips/micromips/invalid.s35
-rw-r--r--test/MC/Mips/micromips32r6/invalid.s119
-rw-r--r--test/MC/Mips/micromips32r6/valid.s199
-rw-r--r--test/MC/Mips/micromips64r6/invalid.s145
-rw-r--r--test/MC/Mips/micromips64r6/valid.s154
-rw-r--r--test/MC/Mips/mips-alu-instructions.s2
-rw-r--r--test/MC/Mips/mips-diagnostic-fixup.s7
-rw-r--r--test/MC/Mips/mips-dsp-instructions.s97
-rw-r--r--test/MC/Mips/mips-expansions-bad.s9
-rw-r--r--test/MC/Mips/mips-expansions.s215
-rw-r--r--test/MC/Mips/mips-pc16-fixup.s2
-rw-r--r--test/MC/Mips/mips-pdr.s2
-rw-r--r--test/MC/Mips/mips1/valid.s4
-rw-r--r--test/MC/Mips/mips2/valid.s4
-rw-r--r--test/MC/Mips/mips3/valid.s8
-rw-r--r--test/MC/Mips/mips32/valid.s6
-rw-r--r--test/MC/Mips/mips32r2/invalid-dsp.s97
-rw-r--r--test/MC/Mips/mips32r2/invalid-dspr2.s134
-rw-r--r--test/MC/Mips/mips32r2/invalid-msa.s62
-rw-r--r--test/MC/Mips/mips32r2/invalid.s26
-rw-r--r--test/MC/Mips/mips32r2/valid-xfail.s178
-rw-r--r--test/MC/Mips/mips32r2/valid.s6
-rw-r--r--test/MC/Mips/mips32r3/invalid.s10
-rw-r--r--test/MC/Mips/mips32r3/valid-xfail.s178
-rw-r--r--test/MC/Mips/mips32r3/valid.s6
-rw-r--r--test/MC/Mips/mips32r5/invalid-mips32.s8
-rw-r--r--test/MC/Mips/mips32r5/invalid-mips32r2.s8
-rw-r--r--test/MC/Mips/mips32r5/invalid-mips32r3.s8
-rw-r--r--test/MC/Mips/mips32r5/invalid.s10
-rw-r--r--test/MC/Mips/mips32r5/valid-xfail.s178
-rw-r--r--test/MC/Mips/mips32r5/valid.s7
-rw-r--r--test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s9
-rw-r--r--test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s1
-rw-r--r--test/MC/Mips/mips32r6/invalid-mips4.s1
-rw-r--r--test/MC/Mips/mips32r6/invalid.s43
-rw-r--r--test/MC/Mips/mips32r6/valid.s10
-rw-r--r--test/MC/Mips/mips4/valid.s8
-rw-r--r--test/MC/Mips/mips5/valid.s8
-rw-r--r--test/MC/Mips/mips64-alu-instructions.s6
-rw-r--r--test/MC/Mips/mips64-expansions.s221
-rw-r--r--test/MC/Mips/mips64/valid.s10
-rw-r--r--test/MC/Mips/mips64r2/invalid.s64
-rw-r--r--test/MC/Mips/mips64r2/valid-xfail.s197
-rw-r--r--test/MC/Mips/mips64r2/valid.s10
-rw-r--r--test/MC/Mips/mips64r3/invalid.s12
-rw-r--r--test/MC/Mips/mips64r3/valid-xfail.s194
-rw-r--r--test/MC/Mips/mips64r3/valid.s10
-rw-r--r--test/MC/Mips/mips64r5/invalid-mips64.s8
-rw-r--r--test/MC/Mips/mips64r5/invalid-mips64r2.s8
-rw-r--r--test/MC/Mips/mips64r5/invalid-mips64r3.s8
-rw-r--r--test/MC/Mips/mips64r5/invalid.s12
-rw-r--r--test/MC/Mips/mips64r5/valid-xfail.s194
-rw-r--r--test/MC/Mips/mips64r5/valid.s11
-rw-r--r--test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s8
-rw-r--r--test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s8
-rw-r--r--test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s1
-rw-r--r--test/MC/Mips/mips64r6/invalid-mips4.s1
-rw-r--r--test/MC/Mips/mips64r6/invalid.s45
-rw-r--r--test/MC/Mips/mips64r6/valid.s11
-rw-r--r--test/MC/Mips/msa/invalid-64.s66
-rw-r--r--test/MC/Mips/msa/invalid.s67
-rw-r--r--test/MC/Mips/msa/test_elm.s45
-rw-r--r--test/MC/Mips/msa/test_elm_msa64.s6
-rw-r--r--test/MC/Mips/reloc-directive-bad.s6
-rw-r--r--test/MC/Mips/reloc-directive.s58
-rw-r--r--test/MC/Mips/rotations32-bad.s31
-rw-r--r--test/MC/Mips/rotations32.s87
-rw-r--r--test/MC/Mips/rotations64.s238
-rw-r--r--test/MC/Mips/set-nomacro.s15
-rw-r--r--test/MC/PowerPC/ppc-llong.s2
-rw-r--r--test/MC/PowerPC/ppc-word.s2
-rw-r--r--test/MC/PowerPC/ppc64-encoding.s22
-rw-r--r--test/MC/PowerPC/ppc64-fixup-apply.s2
-rw-r--r--test/MC/PowerPC/pr24686.s7
-rw-r--r--test/MC/PowerPC/st-other-crash.s2
-rw-r--r--test/MC/PowerPC/vsx.s12
-rw-r--r--test/MC/Sparc/sparc-alu-instructions.s6
-rw-r--r--test/MC/Sparc/sparc-asm-errors.s8
-rw-r--r--test/MC/Sparc/sparc-assembly-exprs.s9
-rw-r--r--test/MC/Sparc/sparc-atomic-instructions.s9
-rw-r--r--test/MC/Sparc/sparc-ctrl-instructions.s4
-rw-r--r--test/MC/Sparc/sparc-fp-instructions.s9
-rw-r--r--test/MC/Sparc/sparc-mem-instructions.s18
-rw-r--r--test/MC/Sparc/sparc-pic.s44
-rw-r--r--test/MC/Sparc/sparc-relocations.s10
-rw-r--r--test/MC/Sparc/sparc-special-registers.s18
-rw-r--r--test/MC/Sparc/sparc-synthetic-instructions.s72
-rw-r--r--test/MC/Sparc/sparcv9-instructions.s272
-rw-r--r--test/MC/SystemZ/fixups.s8
-rw-r--r--test/MC/SystemZ/insn-good-z13.s66
-rw-r--r--test/MC/SystemZ/insn-good.s56
-rw-r--r--test/MC/SystemZ/lit.local.cfg4
-rw-r--r--test/MC/X86/X86_64-pku.s8
-rw-r--r--test/MC/X86/avx512-encodings.s5181
-rw-r--r--test/MC/X86/avx512vl-encoding.s209
-rw-r--r--test/MC/X86/cfi_def_cfa-crash.s24
-rw-r--r--test/MC/X86/encoder-fail.s3
-rw-r--r--test/MC/X86/expand-var.s7
-rw-r--r--test/MC/X86/i386-darwin-frame-register.ll2
-rw-r--r--test/MC/X86/intel-syntax-2.s14
-rw-r--r--test/MC/X86/intel-syntax-ambiguous.s12
-rw-r--r--test/MC/X86/intel-syntax-avx512.s96
-rw-r--r--test/MC/X86/intel-syntax-print.ll10
-rw-r--r--test/MC/X86/intel-syntax.s72
-rw-r--r--test/MC/X86/large-bss.s14
-rw-r--r--test/MC/X86/macho-reloc-errors-x86.s15
-rw-r--r--test/MC/X86/macho-reloc-errors-x86_64.s19
-rw-r--r--test/MC/X86/validate-inst-att.s17
-rw-r--r--test/MC/X86/validate-inst-intel.s8
-rw-r--r--test/MC/X86/x86-32-coverage.s20
-rw-r--r--test/MC/X86/x86-64-avx512bw.s1059
-rw-r--r--test/MC/X86/x86-64-avx512bw_vl.s3264
-rw-r--r--test/MC/X86/x86-64-avx512cd.s450
-rw-r--r--test/MC/X86/x86-64-avx512cd_vl.s913
-rw-r--r--test/MC/X86/x86-64-avx512dq.s2229
-rw-r--r--test/MC/X86/x86-64-avx512dq_vl.s1720
-rw-r--r--test/MC/X86/x86-64-avx512f_vl.s5560
-rw-r--r--test/MC/X86/x86-64.s18
-rw-r--r--test/MC/X86/x86-evenDirective.s47
-rw-r--r--test/MC/X86/x86_nop.s8
-rw-r--r--test/Makefile3
-rw-r--r--test/Object/AMDGPU/elf-definitios.yaml27
-rw-r--r--test/Object/Inputs/coff-short-import-codebin0 -> 31 bytes
-rw-r--r--test/Object/Inputs/coff-short-import-databin0 -> 31 bytes
-rwxr-xr-xtest/Object/Inputs/corrupt-invalid-dynamic-table-offset.elf.x86-64bin0 -> 1688 bytes
-rwxr-xr-xtest/Object/Inputs/corrupt-invalid-dynamic-table-size.elf.x86-64bin0 -> 1736 bytes
-rwxr-xr-xtest/Object/Inputs/corrupt-invalid-dynamic-table-too-large.elf.x86-64bin0 -> 1688 bytes
-rwxr-xr-xtest/Object/Inputs/corrupt-invalid-phentsize.elf.x86-64bin0 -> 1720 bytes
-rwxr-xr-xtest/Object/Inputs/corrupt-invalid-relocation-size.elf.x86-64bin0 -> 2160 bytes
-rwxr-xr-xtest/Object/Inputs/corrupt-invalid-strtab.elf.x86-64bin0 -> 1712 bytes
-rwxr-xr-xtest/Object/Inputs/corrupt-invalid-virtual-addr.elf.x86-64bin0 -> 1720 bytes
-rwxr-xr-xtest/Object/Inputs/invalid-symbol-table-size.elfbin0 -> 536 bytes
-rw-r--r--test/Object/Inputs/invalid-xindex-size.elfbin0 -> 624 bytes
-rwxr-xr-xtest/Object/Inputs/main-ret-zero-pe-i386.dllbin0 -> 5120 bytes
-rwxr-xr-xtest/Object/Inputs/main-ret-zero-pe-i386.exebin0 -> 5120 bytes
-rw-r--r--test/Object/Inputs/no-section-header-string-table.elf-x86-64bin0 -> 1024 bytes
-rw-r--r--test/Object/Inputs/pr25877.libbin0 -> 774 bytes
-rwxr-xr-xtest/Object/Inputs/rel-no-sec-table.elf-x86-64bin0 -> 2152 bytes
-rw-r--r--test/Object/Inputs/shndx.elfbin0 -> 824 bytes
-rwxr-xr-xtest/Object/Inputs/trivial-object-test.elf-avrbin0 -> 840 bytes
-rw-r--r--test/Object/X86/nm-ir.ll4
-rw-r--r--test/Object/archive-format.test41
-rw-r--r--test/Object/archive-symtab.test23
-rw-r--r--test/Object/archive-update.test17
-rw-r--r--test/Object/corrupt.test58
-rw-r--r--test/Object/invalid.test8
-rw-r--r--test/Object/nm-archive.test9
-rw-r--r--test/Object/nm-pe-image.test31
-rw-r--r--test/Object/no-section-header-string-table.test10
-rw-r--r--test/Object/obj2yaml.test65
-rw-r--r--test/Object/objdump-shndx.test8
-rw-r--r--test/Object/pr25877.test9
-rw-r--r--test/Object/readobj-absent.test2
-rw-r--r--test/Object/readobj-shared-object.test38
-rw-r--r--test/Object/relocation-executable.test12
-rw-r--r--test/Other/2010-05-06-Printer.ll1
-rw-r--r--test/Other/extract-alias.ll22
-rw-r--r--test/Other/llvm-nm-without-aliases.ll4
-rw-r--r--test/Other/opt-twice.ll14
-rw-r--r--test/SymbolRewriter/rewrite.ll2
-rw-r--r--test/TableGen/cast-list-initializer.td10
-rw-r--r--test/TableGen/intrinsic-varargs.td4
-rw-r--r--test/TableGen/trydecode-emission.td43
-rw-r--r--test/TableGen/trydecode-emission2.td44
-rw-r--r--test/TableGen/trydecode-emission3.td44
-rw-r--r--test/Transforms/ADCE/basictest.ll3
-rw-r--r--test/Transforms/AddDiscriminators/basic.ll14
-rw-r--r--test/Transforms/AddDiscriminators/call.ll52
-rw-r--r--test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll30
-rw-r--r--test/Transforms/AddDiscriminators/diamond.ll72
-rw-r--r--test/Transforms/AddDiscriminators/first-only.ll14
-rw-r--r--test/Transforms/AddDiscriminators/multiple.ll12
-rw-r--r--test/Transforms/AddDiscriminators/no-discriminators.ll14
-rw-r--r--test/Transforms/AddDiscriminators/oneline.ll102
-rw-r--r--test/Transforms/ArgumentPromotion/dbg.ll12
-rw-r--r--test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll24
-rw-r--r--test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll24
-rw-r--r--test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll20
-rw-r--r--test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll82
-rw-r--r--test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll11
-rw-r--r--test/Transforms/AtomicExpand/X86/lit.local.cfg2
-rw-r--r--test/Transforms/BBVectorize/X86/wr-aliases.ll2
-rw-r--r--test/Transforms/BBVectorize/simple3.ll16
-rw-r--r--test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll9
-rw-r--r--test/Transforms/CodeGenPrepare/AArch64/free-zext.ll82
-rw-r--r--test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll95
-rw-r--r--test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll118
-rw-r--r--test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll56
-rw-r--r--test/Transforms/CodeGenPrepare/X86/select.ll141
-rw-r--r--test/Transforms/CodeGenPrepare/X86/widen_switch.ll95
-rw-r--r--test/Transforms/CodeGenPrepare/invariant.group.ll23
-rw-r--r--test/Transforms/CodeGenPrepare/statepoint-relocate.ll87
-rw-r--r--test/Transforms/ConstProp/calls.ll236
-rw-r--r--test/Transforms/ConstProp/insertvalue.ll10
-rw-r--r--test/Transforms/ConstProp/loads.ll7
-rw-r--r--test/Transforms/ConstantMerge/merge-both.ll2
-rw-r--r--test/Transforms/CorrelatedValuePropagation/non-null.ll60
-rw-r--r--test/Transforms/CorrelatedValuePropagation/range.ll24
-rw-r--r--test/Transforms/CorrelatedValuePropagation/select.ll2
-rw-r--r--test/Transforms/CrossDSOCFI/basic.ll88
-rw-r--r--test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll24
-rw-r--r--test/Transforms/DeadArgElim/aggregates.ll26
-rw-r--r--test/Transforms/DeadArgElim/dbginfo.ll15
-rw-r--r--test/Transforms/DeadArgElim/naked_functions.ll31
-rw-r--r--test/Transforms/DeadArgElim/operandbundle.ll12
-rw-r--r--test/Transforms/DeadStoreElimination/calloc-store.ll65
-rw-r--r--test/Transforms/DeadStoreElimination/inst-limits.ll8
-rw-r--r--test/Transforms/DeadStoreElimination/simple.ll147
-rw-r--r--test/Transforms/EarlyCSE/AArch64/ldstN.ll18
-rw-r--r--test/Transforms/EarlyCSE/atomics.ll259
-rw-r--r--test/Transforms/EarlyCSE/basic.ll74
-rw-r--r--test/Transforms/EarlyCSE/fence.ll86
-rw-r--r--test/Transforms/Float2Int/basic.ll10
-rw-r--r--test/Transforms/ForcedFunctionAttrs/forced.ll12
-rw-r--r--test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll5
-rw-r--r--test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll21
-rw-r--r--test/Transforms/FunctionAttrs/2010-10-30-volatile.ll4
-rw-r--r--test/Transforms/FunctionAttrs/annotate-1.ll23
-rw-r--r--test/Transforms/FunctionAttrs/atomic.ll4
-rw-r--r--test/Transforms/FunctionAttrs/nonnull.ll74
-rw-r--r--test/Transforms/FunctionAttrs/norecurse.ll57
-rw-r--r--test/Transforms/FunctionAttrs/optnone.ll6
-rw-r--r--test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll30
-rw-r--r--test/Transforms/FunctionAttrs/readattrs.ll38
-rw-r--r--test/Transforms/FunctionImport/Inputs/funcimport.ll87
-rw-r--r--test/Transforms/FunctionImport/Inputs/funcimport_debug.ll27
-rw-r--r--test/Transforms/FunctionImport/funcimport.ll75
-rw-r--r--test/Transforms/FunctionImport/funcimport_debug.ll45
-rw-r--r--test/Transforms/GCOVProfiling/function-numbering.ll14
-rw-r--r--test/Transforms/GCOVProfiling/global-ctor.ll8
-rw-r--r--test/Transforms/GCOVProfiling/linezero.ll18
-rw-r--r--test/Transforms/GCOVProfiling/linkagename.ll6
-rw-r--r--test/Transforms/GCOVProfiling/return-block.ll6
-rw-r--r--test/Transforms/GCOVProfiling/version.ll6
-rw-r--r--test/Transforms/GVN/2009-03-10-PREOnVoid.ll28
-rw-r--r--test/Transforms/GVN/assume-equal.ll235
-rw-r--r--test/Transforms/GVN/crash-no-aa.ll2
-rw-r--r--test/Transforms/GVN/funclet.ll44
-rw-r--r--test/Transforms/GVN/invariant-load.ll17
-rw-r--r--test/Transforms/GVN/invariant.group.ll337
-rw-r--r--test/Transforms/GVN/load-pre-nonlocal.ll4
-rw-r--r--test/Transforms/GVN/no_speculative_loads_with_asan.ll57
-rw-r--r--test/Transforms/GVN/phi-translate.ll4
-rw-r--r--test/Transforms/GVN/pr14166.ll2
-rw-r--r--test/Transforms/GVN/pr24426.ll18
-rw-r--r--test/Transforms/GVN/pr25440.ll108
-rw-r--r--test/Transforms/GVN/pre-gep-load.ll31
-rw-r--r--test/Transforms/GVN/pre-load.ll41
-rw-r--r--test/Transforms/GVN/range.ll24
-rw-r--r--test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll14
-rw-r--r--test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll2
-rw-r--r--test/Transforms/GlobalDCE/pr20981.ll4
-rw-r--r--test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll2
-rw-r--r--test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll4
-rw-r--r--test/Transforms/GlobalOpt/2009-03-05-dbg.ll8
-rw-r--r--test/Transforms/GlobalOpt/alias-resolve.ll22
-rw-r--r--test/Transforms/GlobalOpt/alias-used-address-space.ll6
-rw-r--r--test/Transforms/GlobalOpt/alias-used-section.ll2
-rw-r--r--test/Transforms/GlobalOpt/alias-used.ll16
-rw-r--r--test/Transforms/GlobalOpt/assume.ll21
-rw-r--r--test/Transforms/GlobalOpt/available_externally_global_ctors.ll22
-rw-r--r--test/Transforms/GlobalOpt/deadglobal.ll3
-rw-r--r--test/Transforms/GlobalOpt/externally-initialized-aggregate.ll50
-rw-r--r--test/Transforms/GlobalOpt/externally-initialized.ll37
-rw-r--r--test/Transforms/GlobalOpt/global-demotion.ll80
-rw-r--r--test/Transforms/GlobalOpt/invariant.group.barrier.ll79
-rw-r--r--test/Transforms/GlobalOpt/localize-constexpr.ll28
-rw-r--r--test/Transforms/GlobalOpt/metadata.ll2
-rw-r--r--test/Transforms/GlobalOpt/tls.ll1
-rw-r--r--test/Transforms/GlobalOpt/unnamed-addr.ll6
-rw-r--r--test/Transforms/IndVarSimplify/bec-cmp.ll47
-rw-r--r--test/Transforms/IndVarSimplify/const_phi.ll33
-rw-r--r--test/Transforms/IndVarSimplify/eliminate-comparison.ll348
-rw-r--r--test/Transforms/IndVarSimplify/iv-widen.ll30
-rw-r--r--test/Transforms/IndVarSimplify/loop-invariant-conditions.ll279
-rw-r--r--test/Transforms/IndVarSimplify/pr24356.ll63
-rw-r--r--test/Transforms/IndVarSimplify/pr24783.ll30
-rw-r--r--test/Transforms/IndVarSimplify/pr24804.ll25
-rw-r--r--test/Transforms/IndVarSimplify/pr24952.ll27
-rw-r--r--test/Transforms/IndVarSimplify/pr24956.ll37
-rw-r--r--test/Transforms/IndVarSimplify/pr25047.ll49
-rw-r--r--test/Transforms/IndVarSimplify/pr25051.ll44
-rw-r--r--test/Transforms/IndVarSimplify/pr25060.ll37
-rw-r--r--test/Transforms/IndVarSimplify/pr25360.ll33
-rw-r--r--test/Transforms/IndVarSimplify/pr25421.ll30
-rw-r--r--test/Transforms/IndVarSimplify/pr25578.ll45
-rw-r--r--test/Transforms/IndVarSimplify/tripcount_infinite.ll15
-rw-r--r--test/Transforms/IndVarSimplify/widen-loop-comp.ll160
-rw-r--r--test/Transforms/IndVarSimplify/zext-nuw.ll49
-rw-r--r--test/Transforms/InferFunctionAttrs/annotate.ll24
-rw-r--r--test/Transforms/Inline/alloca-dbgdeclare-merge.ll102
-rw-r--r--test/Transforms/Inline/alloca-dbgdeclare.ll16
-rw-r--r--test/Transforms/Inline/debug-info-duplicate-calls.ll26
-rw-r--r--test/Transforms/Inline/debug-invoke.ll4
-rw-r--r--test/Transforms/Inline/deopt-bundles.ll203
-rw-r--r--test/Transforms/Inline/ignore-debug-info.ll16
-rw-r--r--test/Transforms/Inline/inline-assume.ll31
-rw-r--r--test/Transforms/Inline/inline-cold-callee.ll39
-rw-r--r--test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll30
-rw-r--r--test/Transforms/Inline/inline-hot-callee.ll39
-rw-r--r--test/Transforms/Inline/inline-optsize.ll2
-rw-r--r--test/Transforms/Inline/inline_dbg_declare.ll26
-rw-r--r--test/Transforms/Inline/inline_invoke.ll3
-rw-r--r--test/Transforms/Inline/noalias-calls.ll19
-rw-r--r--test/Transforms/Inline/noalias-cs.ll12
-rw-r--r--test/Transforms/Inline/noalias2.ll4
-rw-r--r--test/Transforms/Inline/zero-cost.ll17
-rw-r--r--test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll2
-rw-r--r--test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll2
-rw-r--r--test/Transforms/InstCombine/LandingPadClauses.ll11
-rw-r--r--test/Transforms/InstCombine/add2.ll10
-rw-r--r--test/Transforms/InstCombine/alias-recursion.ll2
-rw-r--r--test/Transforms/InstCombine/all-bits-shift.ll46
-rw-r--r--test/Transforms/InstCombine/alloca.ll11
-rw-r--r--test/Transforms/InstCombine/and-compare.ll23
-rw-r--r--test/Transforms/InstCombine/and2.ll68
-rw-r--r--test/Transforms/InstCombine/apint-or.ll79
-rw-r--r--test/Transforms/InstCombine/apint-or1.ll36
-rw-r--r--test/Transforms/InstCombine/apint-or2.ll35
-rw-r--r--test/Transforms/InstCombine/assume-redundant.ll26
-rw-r--r--test/Transforms/InstCombine/bitcast-alias-function.ll24
-rw-r--r--test/Transforms/InstCombine/bitcast-bitcast.ll84
-rw-r--r--test/Transforms/InstCombine/bitcast-vec-canon.ll25
-rw-r--r--test/Transforms/InstCombine/bitcast.ll55
-rw-r--r--test/Transforms/InstCombine/bitreverse-fold.ll11
-rw-r--r--test/Transforms/InstCombine/bitreverse-recognize.ll114
-rw-r--r--test/Transforms/InstCombine/blend_x86.ll102
-rw-r--r--test/Transforms/InstCombine/bswap-fold.ll6
-rw-r--r--test/Transforms/InstCombine/bswap-known-bits.ll47
-rw-r--r--test/Transforms/InstCombine/bswap.ll14
-rw-r--r--test/Transforms/InstCombine/call_nonnull_arg.ll20
-rw-r--r--test/Transforms/InstCombine/cast-callee-deopt-bundles.ll11
-rw-r--r--test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll108
-rw-r--r--test/Transforms/InstCombine/cast-set.ll4
-rw-r--r--test/Transforms/InstCombine/cast.ll54
-rw-r--r--test/Transforms/InstCombine/compare-alloca.ll97
-rw-r--r--test/Transforms/InstCombine/compare-signs.ll40
-rw-r--r--test/Transforms/InstCombine/constant-fold-alias.ll4
-rw-r--r--test/Transforms/InstCombine/ctpop.ll45
-rw-r--r--test/Transforms/InstCombine/debug-line.ll6
-rw-r--r--test/Transforms/InstCombine/debuginfo.ll12
-rw-r--r--test/Transforms/InstCombine/demorgan-zext.ll34
-rw-r--r--test/Transforms/InstCombine/div.ll24
-rw-r--r--test/Transforms/InstCombine/exp2-1.ll19
-rw-r--r--test/Transforms/InstCombine/extractvalue.ll22
-rw-r--r--test/Transforms/InstCombine/fabs.ll25
-rw-r--r--test/Transforms/InstCombine/fast-math.ll141
-rw-r--r--test/Transforms/InstCombine/ffs-1.ll69
-rw-r--r--test/Transforms/InstCombine/fold-phi-load-metadata.ll69
-rw-r--r--test/Transforms/InstCombine/gc.relocate.ll39
-rw-r--r--test/Transforms/InstCombine/gepphigep.ll50
-rw-r--r--test/Transforms/InstCombine/icmp-range.ll89
-rw-r--r--test/Transforms/InstCombine/icmp-shr.ll9
-rw-r--r--test/Transforms/InstCombine/icmp.ll73
-rw-r--r--test/Transforms/InstCombine/inline-intrinsic-assert.ll2
-rw-r--r--test/Transforms/InstCombine/insert-extract-shuffle.ll47
-rw-r--r--test/Transforms/InstCombine/intrinsics.ll65
-rw-r--r--test/Transforms/InstCombine/lifetime.ll93
-rw-r--r--test/Transforms/InstCombine/load-cmp.ll7
-rw-r--r--test/Transforms/InstCombine/load-combine-metadata-2.ll20
-rw-r--r--test/Transforms/InstCombine/load-combine-metadata-3.ll20
-rw-r--r--test/Transforms/InstCombine/load-combine-metadata-4.ll20
-rw-r--r--test/Transforms/InstCombine/load-combine-metadata.ll6
-rw-r--r--test/Transforms/InstCombine/loadstore-metadata.ll38
-rw-r--r--test/Transforms/InstCombine/log-pow-nofastmath.ll30
-rw-r--r--test/Transforms/InstCombine/log-pow.ll41
-rw-r--r--test/Transforms/InstCombine/malloc-free-delete.ll11
-rw-r--r--test/Transforms/InstCombine/memcmp-1.ll53
-rw-r--r--test/Transforms/InstCombine/memset_chk-1.ll26
-rw-r--r--test/Transforms/InstCombine/minmax-fp.ll156
-rw-r--r--test/Transforms/InstCombine/neon-intrinsics.ll12
-rw-r--r--test/Transforms/InstCombine/no_cgscc_assert.ll2
-rw-r--r--test/Transforms/InstCombine/nonnull-attribute.ll19
-rw-r--r--test/Transforms/InstCombine/not.ll47
-rw-r--r--test/Transforms/InstCombine/objsize-address-space.ll2
-rw-r--r--test/Transforms/InstCombine/objsize.ll4
-rw-r--r--test/Transforms/InstCombine/or.ll2
-rw-r--r--test/Transforms/InstCombine/phi-load-metadata-2.ll30
-rw-r--r--test/Transforms/InstCombine/phi-load-metadata-3.ll30
-rw-r--r--test/Transforms/InstCombine/phi-load-metadata.ll30
-rw-r--r--test/Transforms/InstCombine/phi.ll130
-rw-r--r--test/Transforms/InstCombine/pow-1.ll2
-rw-r--r--test/Transforms/InstCombine/pow-4.ll120
-rw-r--r--test/Transforms/InstCombine/pow-exp-nofastmath.ll17
-rw-r--r--test/Transforms/InstCombine/pow-exp.ll28
-rw-r--r--test/Transforms/InstCombine/pow-exp2.ll19
-rw-r--r--test/Transforms/InstCombine/pow-sqrt.ll15
-rw-r--r--test/Transforms/InstCombine/pr20059.ll16
-rw-r--r--test/Transforms/InstCombine/pr24605.ll15
-rw-r--r--test/Transforms/InstCombine/pr25745.ll20
-rw-r--r--test/Transforms/InstCombine/shift.ll4
-rw-r--r--test/Transforms/InstCombine/sincospi.ll9
-rw-r--r--test/Transforms/InstCombine/sqrt-nofast.ll25
-rw-r--r--test/Transforms/InstCombine/statepoint.ll20
-rw-r--r--test/Transforms/InstCombine/store.ll113
-rw-r--r--test/Transforms/InstCombine/strto-1.ll2
-rw-r--r--test/Transforms/InstCombine/tan-nofastmath.ll17
-rw-r--r--test/Transforms/InstCombine/tan.ll24
-rw-r--r--test/Transforms/InstCombine/token.ll89
-rw-r--r--test/Transforms/InstCombine/trunc.ll42
-rw-r--r--test/Transforms/InstCombine/unpack-fca.ll168
-rw-r--r--test/Transforms/InstCombine/vec_demanded_elts.ll359
-rw-r--r--test/Transforms/InstCombine/vec_shuffle.ll27
-rw-r--r--test/Transforms/InstCombine/vector_gep2.ll23
-rw-r--r--test/Transforms/InstCombine/x86-f16c.ll61
-rw-r--r--test/Transforms/InstCombine/x86-pmovsx.ll136
-rw-r--r--test/Transforms/InstCombine/x86-pmovzx.ll136
-rw-r--r--test/Transforms/InstCombine/x86-pshufb.ll267
-rw-r--r--test/Transforms/InstCombine/x86-sse4a.ll336
-rw-r--r--test/Transforms/InstCombine/x86-vector-shifts.ll1318
-rw-r--r--test/Transforms/InstCombine/x86-xop.ll209
-rw-r--r--test/Transforms/InstCombine/xor.ll8
-rw-r--r--test/Transforms/InstSimplify/add-mask.ll65
-rw-r--r--test/Transforms/InstSimplify/apint-or.ll36
-rw-r--r--test/Transforms/InstSimplify/bswap.ll41
-rw-r--r--test/Transforms/InstSimplify/compare.ll8
-rw-r--r--test/Transforms/InstSimplify/implies.ll217
-rw-r--r--test/Transforms/InstSimplify/shift-128-kb.ll22
-rw-r--r--test/Transforms/InstSimplify/shr-nop.ll12
-rw-r--r--test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll8
-rw-r--r--test/Transforms/Internalize/comdat.ll52
-rw-r--r--test/Transforms/Internalize/local-visibility.ll8
-rw-r--r--test/Transforms/JumpThreading/basic.ll34
-rw-r--r--test/Transforms/JumpThreading/implied-cond.ll98
-rw-r--r--test/Transforms/JumpThreading/phi-known.ll66
-rw-r--r--test/Transforms/JumpThreading/select.ll30
-rw-r--r--test/Transforms/JumpThreading/update-edge-weight.ll43
-rw-r--r--test/Transforms/LCSSA/mixed-catch.ll95
-rw-r--r--test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll2
-rw-r--r--test/Transforms/LICM/argmemonly-call.ll69
-rw-r--r--test/Transforms/LICM/debug-value.ll12
-rw-r--r--test/Transforms/LICM/hoist-deref-load.ll44
-rw-r--r--test/Transforms/LICM/hoist-invariant-load.ll2
-rw-r--r--test/Transforms/LICM/pr23608.ll2
-rw-r--r--test/Transforms/LoopDistribute/basic-with-memchecks.ll2
-rw-r--r--test/Transforms/LoopDistribute/bounds-expansion-bug.ll106
-rw-r--r--test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll57
-rw-r--r--test/Transforms/LoopIdiom/basic.ll107
-rw-r--r--test/Transforms/LoopIdiom/debug-line.ll10
-rw-r--r--test/Transforms/LoopLoadElim/backward.ll32
-rw-r--r--test/Transforms/LoopLoadElim/def-store-before-load.ll35
-rw-r--r--test/Transforms/LoopLoadElim/forward.ll47
-rw-r--r--test/Transforms/LoopLoadElim/memcheck.ll52
-rw-r--r--test/Transforms/LoopLoadElim/multiple-stores-same-block.ll48
-rw-r--r--test/Transforms/LoopLoadElim/unknown-dep.ll54
-rw-r--r--test/Transforms/LoopReroll/negative.ll48
-rw-r--r--test/Transforms/LoopReroll/reroll_with_dbg.ll139
-rw-r--r--test/Transforms/LoopRotate/dbgvalue.ll14
-rw-r--r--test/Transforms/LoopSimplify/dbg-loc.ll4
-rw-r--r--test/Transforms/LoopSimplify/single-backedge.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll156
-rw-r--r--test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg3
-rw-r--r--test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll113
-rw-r--r--test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll50
-rw-r--r--test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg2
-rw-r--r--test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll45
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/funclet.ll216
-rw-r--r--test/Transforms/LoopStrengthReduce/pr12018.ll5
-rw-r--r--test/Transforms/LoopStrengthReduce/pr25541.ll48
-rw-r--r--test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/sext-ind-var.ll140
-rw-r--r--test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg3
-rw-r--r--test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll33
-rw-r--r--test/Transforms/LoopUnroll/X86/partial.ll9
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-bad-geps.ll34
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-crashers.ll102
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll57
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll97
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll207
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll23
-rw-r--r--test/Transforms/LoopUnroll/pr18861.ll91
-rw-r--r--test/Transforms/LoopUnroll/rebuild_lcssa.ll119
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop1.ll4
-rw-r--r--test/Transforms/LoopUnroll/unroll-pragmas.ll66
-rw-r--r--test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll2
-rw-r--r--test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll2
-rw-r--r--test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll28
-rw-r--r--test/Transforms/LoopUnswitch/LIV-loop-condtion.ll28
-rw-r--r--test/Transforms/LoopUnswitch/basictest.ll39
-rw-r--r--test/Transforms/LoopUnswitch/cleanuppad.ll44
-rw-r--r--test/Transforms/LoopUnswitch/cold-loop.ll52
-rw-r--r--test/Transforms/LoopUnswitch/copy-metadata.ll23
-rw-r--r--test/Transforms/LoopUnswitch/infinite-loop.ll10
-rw-r--r--test/Transforms/LoopUnswitch/trivial-unswitch.ll47
-rw-r--r--test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll4
-rw-r--r--test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll54
-rw-r--r--test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll39
-rw-r--r--test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll243
-rw-r--r--test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll191
-rw-r--r--test/Transforms/LoopVectorize/ARM/interleaved_cost.ll39
-rw-r--r--test/Transforms/LoopVectorize/ARM/vector_cast.ll37
-rw-r--r--test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll40
-rw-r--r--test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll30
-rw-r--r--test/Transforms/LoopVectorize/X86/masked_load_store.ll142
-rw-r--r--test/Transforms/LoopVectorize/X86/metadata-enable.ll6
-rw-r--r--test/Transforms/LoopVectorize/X86/no_fpmath.ll104
-rw-r--r--test/Transforms/LoopVectorize/X86/powof2div.ll8
-rw-r--r--test/Transforms/LoopVectorize/X86/reduction-crash.ll2
-rw-r--r--test/Transforms/LoopVectorize/X86/reg-usage.ll71
-rw-r--r--test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll46
-rw-r--r--test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll8
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll16
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll113
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks.ll8
-rw-r--r--test/Transforms/LoopVectorize/conditional-assignment.ll6
-rw-r--r--test/Transforms/LoopVectorize/control-flow.ll8
-rw-r--r--test/Transforms/LoopVectorize/dbg.value.ll8
-rw-r--r--test/Transforms/LoopVectorize/debugloc.ll18
-rw-r--r--test/Transforms/LoopVectorize/gep_with_bitcast.ll40
-rw-r--r--test/Transforms/LoopVectorize/if-pred-stores.ll43
-rw-r--r--test/Transforms/LoopVectorize/induction.ll13
-rw-r--r--test/Transforms/LoopVectorize/miniters.ll45
-rw-r--r--test/Transforms/LoopVectorize/minmax_reduction.ll104
-rw-r--r--test/Transforms/LoopVectorize/no_array_bounds.ll6
-rw-r--r--test/Transforms/LoopVectorize/no_outside_user.ll2
-rw-r--r--test/Transforms/LoopVectorize/no_switch.ll18
-rw-r--r--test/Transforms/LoopVectorize/nontemporal.ll47
-rw-r--r--test/Transforms/LoopVectorize/optsize.ll43
-rw-r--r--test/Transforms/LoopVectorize/ptr-induction.ll34
-rw-r--r--test/Transforms/LoopVectorize/reduction.ll2
-rw-r--r--test/Transforms/LoopVectorize/reverse_induction.ll9
-rw-r--r--test/Transforms/LoopVectorize/runtime-check.ll6
-rw-r--r--test/Transforms/LoopVectorize/runtime-limit.ll21
-rw-r--r--test/Transforms/LowerBitSets/function-ext.ll22
-rw-r--r--test/Transforms/LowerBitSets/function.ll35
-rw-r--r--test/Transforms/LowerBitSets/nonstring.ll34
-rw-r--r--test/Transforms/LowerBitSets/pr25902.ll21
-rw-r--r--test/Transforms/LowerBitSets/simple.ll34
-rw-r--r--test/Transforms/LowerExpectIntrinsic/basic.ll2
-rw-r--r--test/Transforms/LowerSwitch/delete-default-block-crash.ll27
-rw-r--r--test/Transforms/LowerSwitch/feature.ll60
-rw-r--r--test/Transforms/Mem2Reg/ConvertDebugInfo.ll14
-rw-r--r--test/Transforms/Mem2Reg/ConvertDebugInfo2.ll28
-rw-r--r--test/Transforms/Mem2Reg/optnone.ll21
-rw-r--r--test/Transforms/Mem2Reg/pr24179.ll44
-rw-r--r--test/Transforms/MemCpyOpt/memcpy.ll5
-rw-r--r--test/Transforms/MemCpyOpt/nontemporal.ll49
-rw-r--r--test/Transforms/MergeFunc/apply_function_attributes.ll47
-rw-r--r--test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll18
-rw-r--r--test/Transforms/MergeFunc/constant-entire-value.ll42
-rw-r--r--test/Transforms/MergeFunc/crash2.ll54
-rw-r--r--test/Transforms/MergeFunc/gep-base-type.ll46
-rw-r--r--test/Transforms/MergeFunc/inttoptr-address-space.ll2
-rw-r--r--test/Transforms/MergeFunc/inttoptr.ll2
-rw-r--r--test/Transforms/MergeFunc/merge-block-address-other-function.ll49
-rw-r--r--test/Transforms/MergeFunc/merge-block-address.ll91
-rw-r--r--test/Transforms/MergeFunc/merge-const-ptr-and-int.ll20
-rw-r--r--test/Transforms/MergeFunc/merge-different-vector-types.ll18
-rw-r--r--test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll96
-rw-r--r--test/Transforms/MergeFunc/no-merge-block-address-other-function.ll61
-rw-r--r--test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll24
-rw-r--r--test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll23
-rw-r--r--test/Transforms/MergeFunc/ranges-multiple.ll44
-rw-r--r--test/Transforms/MergeFunc/ranges.ll8
-rw-r--r--test/Transforms/MergeFunc/self-referential-global.ll40
-rw-r--r--test/Transforms/MergeFunc/undef-different-types.ll21
-rw-r--r--test/Transforms/MetaRenamer/metarenamer.ll2
-rw-r--r--test/Transforms/NaryReassociate/NVPTX/nary-gep.ll17
-rw-r--r--test/Transforms/NaryReassociate/nary-add.ll6
-rw-r--r--test/Transforms/NaryReassociate/nary-mul.ll19
-rw-r--r--test/Transforms/NaryReassociate/pr24301.ll14
-rw-r--r--test/Transforms/ObjCARC/basic.ll6
-rw-r--r--test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll18
-rw-r--r--test/Transforms/ObjCARC/nested.ll4
-rw-r--r--test/Transforms/ObjCARC/provenance.ll2
-rw-r--r--test/Transforms/PGOProfile/Inputs/branch1.proftext6
-rw-r--r--test/Transforms/PGOProfile/Inputs/branch2.proftext6
-rw-r--r--test/Transforms/PGOProfile/Inputs/criticaledge.proftext17
-rw-r--r--test/Transforms/PGOProfile/Inputs/diag.proftext5
-rw-r--r--test/Transforms/PGOProfile/Inputs/landingpad.proftext14
-rw-r--r--test/Transforms/PGOProfile/Inputs/loop1.proftext6
-rw-r--r--test/Transforms/PGOProfile/Inputs/loop2.proftext7
-rw-r--r--test/Transforms/PGOProfile/Inputs/switch.proftext8
-rw-r--r--test/Transforms/PGOProfile/branch1.ll30
-rw-r--r--test/Transforms/PGOProfile/branch2.ll37
-rw-r--r--test/Transforms/PGOProfile/criticaledge.ll108
-rw-r--r--test/Transforms/PGOProfile/diag_mismatch.ll12
-rw-r--r--test/Transforms/PGOProfile/diag_no_funcprofdata.ll12
-rw-r--r--test/Transforms/PGOProfile/diag_no_profile.ll9
-rw-r--r--test/Transforms/PGOProfile/landingpad.ll124
-rw-r--r--test/Transforms/PGOProfile/loop1.ll42
-rw-r--r--test/Transforms/PGOProfile/loop2.ll70
-rw-r--r--test/Transforms/PGOProfile/single_bb.ll12
-rw-r--r--test/Transforms/PGOProfile/switch.ll47
-rw-r--r--test/Transforms/PlaceSafepoints/basic.ll2
-rw-r--r--test/Transforms/PlaceSafepoints/call_gc_result.ll4
-rw-r--r--test/Transforms/PlaceSafepoints/finite-loops.ll65
-rw-r--r--test/Transforms/PlaceSafepoints/patchable-statepoints.ll4
-rw-r--r--test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll4
-rw-r--r--test/Transforms/PlaceSafepoints/statepoint-format.ll4
-rw-r--r--test/Transforms/PruneEH/operand-bundles.ll26
-rw-r--r--test/Transforms/Reassociate/fast-ReassociateVector.ll10
-rw-r--r--test/Transforms/Reassociate/fast-basictest.ll2
-rw-r--r--test/Transforms/Reassociate/fast-fp-commute.ll4
-rw-r--r--test/Transforms/Reassociate/fast-multistep.ll6
-rw-r--r--test/Transforms/Reassociate/fp-expr.ll33
-rw-r--r--test/Transforms/Reassociate/multistep.ll6
-rw-r--r--test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll31
-rw-r--r--test/Transforms/Reassociate/secondary.ll2
-rw-r--r--test/Transforms/Reassociate/vaarg_movable.ll28
-rw-r--r--test/Transforms/Reassociate/xor_reassoc.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll8
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll11
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll20
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll19
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll21
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll8
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll8
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll10
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers.ll73
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-vector.ll167
-rw-r--r--test/Transforms/RewriteStatepointsForGC/basics.ll16
-rw-r--r--test/Transforms/RewriteStatepointsForGC/codegen-cond.ll74
-rw-r--r--test/Transforms/RewriteStatepointsForGC/constants.ll43
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll25
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll35
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll24
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll19
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll19
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll44
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll28
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll37
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll45
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll37
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll20
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll151
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll167
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll65
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll88
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll81
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll51
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll104
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll22
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll149
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll165
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll44
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll62
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll32
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll279
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll150
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll32
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deref-pointers.ll53
-rw-r--r--test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll6
-rw-r--r--test/Transforms/RewriteStatepointsForGC/live-vector.ll25
-rw-r--r--test/Transforms/RewriteStatepointsForGC/liveness-basics.ll22
-rw-r--r--test/Transforms/RewriteStatepointsForGC/preprocess.ll8
-rw-r--r--test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/relocation.ll40
-rw-r--r--test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll66
-rw-r--r--test/Transforms/SCCP/global-alias-constprop.ll11
-rw-r--r--test/Transforms/SLPVectorizer/AArch64/commute.ll2
-rw-r--r--test/Transforms/SLPVectorizer/AArch64/horizontal.ll270
-rw-r--r--test/Transforms/SLPVectorizer/AArch64/nontemporal.ll76
-rw-r--r--test/Transforms/SLPVectorizer/X86/bad_types.ll26
-rw-r--r--test/Transforms/SLPVectorizer/X86/commutativity.ll78
-rw-r--r--test/Transforms/SLPVectorizer/X86/debug_info.ll16
-rw-r--r--test/Transforms/SLPVectorizer/X86/horizontal.ll2
-rw-r--r--test/Transforms/SLPVectorizer/X86/pr23510.ll38
-rw-r--r--test/Transforms/SLPVectorizer/X86/schedule_budget.ll93
-rw-r--r--test/Transforms/SROA/basictest.ll25
-rw-r--r--test/Transforms/SROA/big-endian.ll1
-rw-r--r--test/Transforms/SROA/fca.ll1
-rw-r--r--test/Transforms/SafeStack/AArch64/abi.ll20
-rw-r--r--test/Transforms/SafeStack/AArch64/lit.local.cfg3
-rw-r--r--test/Transforms/SafeStack/ARM/abi.ll18
-rw-r--r--test/Transforms/SafeStack/ARM/lit.local.cfg3
-rw-r--r--test/Transforms/SafeStack/ARM/setjmp.ll34
-rw-r--r--test/Transforms/SafeStack/X86/abi.ll30
-rw-r--r--test/Transforms/SafeStack/X86/lit.local.cfg3
-rw-r--r--test/Transforms/SafeStack/array.ll53
-rw-r--r--test/Transforms/SafeStack/byval.ll51
-rw-r--r--test/Transforms/SafeStack/call.ll160
-rw-r--r--test/Transforms/SafeStack/cast.ll28
-rw-r--r--test/Transforms/SafeStack/debug-loc.ll83
-rw-r--r--test/Transforms/SafeStack/ret.ll17
-rw-r--r--test/Transforms/SafeStack/setjmp2.ll2
-rw-r--r--test/Transforms/SafeStack/store.ll63
-rw-r--r--test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof2
-rw-r--r--test/Transforms/SampleProfile/Inputs/bad_fn_header.prof4
-rw-r--r--test/Transforms/SampleProfile/Inputs/bad_mangle.prof4
-rw-r--r--test/Transforms/SampleProfile/Inputs/bad_sample_line.prof4
-rw-r--r--test/Transforms/SampleProfile/Inputs/bad_samples.prof2
-rw-r--r--test/Transforms/SampleProfile/Inputs/branch.prof16
-rw-r--r--test/Transforms/SampleProfile/Inputs/calls.prof16
-rw-r--r--test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof10
-rw-r--r--test/Transforms/SampleProfile/Inputs/coverage-warning.prof5
-rw-r--r--test/Transforms/SampleProfile/Inputs/discriminator.prof14
-rw-r--r--test/Transforms/SampleProfile/Inputs/entry_counts.prof4
-rw-r--r--test/Transforms/SampleProfile/Inputs/fnptr.binprofbin112 -> 105 bytes
-rw-r--r--test/Transforms/SampleProfile/Inputs/fnptr.prof18
-rw-r--r--test/Transforms/SampleProfile/Inputs/gcc-simple.afdobin0 -> 1972 bytes
-rw-r--r--test/Transforms/SampleProfile/Inputs/inline-coverage.prof7
-rw-r--r--test/Transforms/SampleProfile/Inputs/inline-hint.prof3
-rw-r--r--test/Transforms/SampleProfile/Inputs/inline.prof7
-rw-r--r--test/Transforms/SampleProfile/Inputs/nolocinfo.prof3
-rw-r--r--test/Transforms/SampleProfile/Inputs/offset.prof4
-rw-r--r--test/Transforms/SampleProfile/Inputs/propagate.prof32
-rw-r--r--test/Transforms/SampleProfile/Inputs/remarks.prof7
-rw-r--r--test/Transforms/SampleProfile/Inputs/syntax.prof4
-rw-r--r--test/Transforms/SampleProfile/branch.ll298
-rw-r--r--test/Transforms/SampleProfile/calls.ll18
-rw-r--r--test/Transforms/SampleProfile/cov-zero-samples.ll142
-rw-r--r--test/Transforms/SampleProfile/coverage-warning.ll46
-rw-r--r--test/Transforms/SampleProfile/discriminator.ll14
-rw-r--r--test/Transforms/SampleProfile/entry_counts.ll8
-rw-r--r--test/Transforms/SampleProfile/fnptr.ll24
-rw-r--r--test/Transforms/SampleProfile/gcc-simple.ll218
-rw-r--r--test/Transforms/SampleProfile/inline-coverage.ll135
-rw-r--r--test/Transforms/SampleProfile/inline-hint.ll38
-rw-r--r--test/Transforms/SampleProfile/inline.ll108
-rw-r--r--test/Transforms/SampleProfile/nolocinfo.ll38
-rw-r--r--test/Transforms/SampleProfile/offset.ll82
-rw-r--r--test/Transforms/SampleProfile/propagate.ll26
-rw-r--r--test/Transforms/SampleProfile/remarks.ll185
-rw-r--r--test/Transforms/SampleProfile/syntax.ll2
-rw-r--r--test/Transforms/ScalarRepl/debuginfo-preserved.ll12
-rw-r--r--test/Transforms/Scalarizer/dbginfo.ll12
-rw-r--r--test/Transforms/Scalarizer/store-bug.ll25
-rw-r--r--test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll14
-rw-r--r--test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll62
-rw-r--r--test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll16
-rw-r--r--test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll2
-rw-r--r--test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll43
-rw-r--r--test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll43
-rw-r--r--test/Transforms/SimplifyCFG/ARM/lit.local.cfg5
-rw-r--r--test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll43
-rw-r--r--test/Transforms/SimplifyCFG/Mips/lit.local.cfg5
-rw-r--r--test/Transforms/SimplifyCFG/PR25267.ll24
-rw-r--r--test/Transforms/SimplifyCFG/SpeculativeExec.ll26
-rw-r--r--test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll24
-rw-r--r--test/Transforms/SimplifyCFG/basictest.ll14
-rw-r--r--test/Transforms/SimplifyCFG/branch-fold-dbg.ll8
-rw-r--r--test/Transforms/SimplifyCFG/empty-cleanuppad.ll415
-rw-r--r--test/Transforms/SimplifyCFG/hoist-dbgvalue.ll10
-rw-r--r--test/Transforms/SimplifyCFG/implied-cond.ll81
-rw-r--r--test/Transforms/SimplifyCFG/invoke_unwind.ll13
-rw-r--r--test/Transforms/SimplifyCFG/merge-cond-stores-2.ll215
-rw-r--r--test/Transforms/SimplifyCFG/merge-cond-stores.ll241
-rw-r--r--test/Transforms/SimplifyCFG/no_speculative_loads_with_asan.ll40
-rw-r--r--test/Transforms/SimplifyCFG/preserve-load-metadata-2.ll32
-rw-r--r--test/Transforms/SimplifyCFG/preserve-load-metadata-3.ll32
-rw-r--r--test/Transforms/SimplifyCFG/preserve-load-metadata.ll32
-rw-r--r--test/Transforms/SimplifyCFG/preserve-make-implicit-on-switch-to-br.ll30
-rw-r--r--test/Transforms/SimplifyCFG/speculate-math.ll45
-rw-r--r--test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll6
-rw-r--r--test/Transforms/SimplifyCFG/switch-dead-default.ll179
-rw-r--r--test/Transforms/SimplifyCFG/trap-debugloc.ll6
-rw-r--r--test/Transforms/SimplifyCFG/wineh-unreachable.ll83
-rw-r--r--test/Transforms/Sink/catchswitch.ll37
-rw-r--r--test/Transforms/Sink/landingpad.ll33
-rw-r--r--test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll4
-rw-r--r--test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll71
-rw-r--r--test/Transforms/StripDeadPrototypes/basic.ll12
-rw-r--r--test/Transforms/StripSymbols/2010-06-30-StripDebug.ll8
-rw-r--r--test/Transforms/StripSymbols/2010-08-25-crash.ll6
-rw-r--r--test/Transforms/StripSymbols/strip-dead-debug-info.ll16
-rw-r--r--test/Transforms/StructurizeCFG/nested-loop-order.ll2
-rw-r--r--test/Transforms/TailCallElim/basic.ll12
-rw-r--r--test/Transforms/TailCallElim/notail.ll24
-rw-r--r--test/Transforms/Util/lowerswitch.ll6
-rw-r--r--test/Transforms/Util/simplify-dbg-declare-load.ll52
-rw-r--r--test/Verifier/alias.ll18
-rw-r--r--test/Verifier/align-md.ll59
-rw-r--r--test/Verifier/atomics.ll14
-rw-r--r--test/Verifier/bitcast-alias-address-space.ll2
-rw-r--r--test/Verifier/dbg-null-retained-type.ll10
-rw-r--r--test/Verifier/dbg-typerefs.ll2
-rw-r--r--test/Verifier/dbg.ll3
-rw-r--r--test/Verifier/dereferenceable-md.ll86
-rw-r--r--test/Verifier/func-dbg.ll25
-rw-r--r--test/Verifier/gc_relocate_addrspace.ll10
-rw-r--r--test/Verifier/gc_relocate_operand.ll8
-rw-r--r--test/Verifier/gc_relocate_return.ll8
-rw-r--r--test/Verifier/invalid-eh.ll38
-rw-r--r--test/Verifier/invalid-patchable-statepoint.ll14
-rw-r--r--test/Verifier/invalid-statepoint.ll8
-rw-r--r--test/Verifier/invalid-statepoint2.ll10
-rw-r--r--test/Verifier/invoke.ll4
-rw-r--r--test/Verifier/llvm.dbg.declare-address.ll4
-rw-r--r--test/Verifier/llvm.dbg.declare-expression.ll4
-rw-r--r--test/Verifier/llvm.dbg.declare-variable.ll2
-rw-r--r--test/Verifier/llvm.dbg.intrinsic-dbg-attachment.ll20
-rw-r--r--test/Verifier/llvm.dbg.value-expression.ll4
-rw-r--r--test/Verifier/llvm.dbg.value-value.ll4
-rw-r--r--test/Verifier/llvm.dbg.value-variable.ll2
-rw-r--r--test/Verifier/metadata-function-dbg.ll23
-rw-r--r--test/Verifier/operand-bundles.ll49
-rw-r--r--test/Verifier/statepoint.ll31
-rw-r--r--test/Verifier/token1.ll11
-rw-r--r--test/Verifier/token2.ll11
-rw-r--r--test/Verifier/token3.ll8
-rw-r--r--test/Verifier/token4.ll4
-rw-r--r--test/Verifier/token5.ll7
-rw-r--r--test/Verifier/token6.ll7
-rw-r--r--test/Verifier/token7.ll8
-rw-r--r--test/lit.cfg92
-rw-r--r--test/lit.site.cfg.in4
-rw-r--r--test/tools/dsymutil/ARM/dummy-debug-map-amr64.map15
-rw-r--r--test/tools/dsymutil/ARM/empty-map.test8
-rw-r--r--test/tools/dsymutil/ARM/fat-arch-name.test21
-rw-r--r--test/tools/dsymutil/ARM/fat-arch-not-found.test13
-rw-r--r--test/tools/dsymutil/ARM/inlined-low_pc.c15
-rw-r--r--test/tools/dsymutil/ARM/lit.local.cfg7
-rwxr-xr-xtest/tools/dsymutil/Inputs/absolute_sym.macho.i386bin0 -> 8592 bytes
-rw-r--r--test/tools/dsymutil/Inputs/absolute_sym.macho.i386.obin0 -> 2472 bytes
-rwxr-xr-xtest/tools/dsymutil/Inputs/basic.macho.i386bin0 -> 9080 bytes
-rw-r--r--test/tools/dsymutil/Inputs/basic2-custom-linetable.macho.x86_64.obin0 -> 3144 bytes
-rw-r--r--test/tools/dsymutil/Inputs/basic2.c6
-rw-r--r--test/tools/dsymutil/Inputs/dead-stripped/1.obin0 -> 3200 bytes
-rw-r--r--test/tools/dsymutil/Inputs/empty_range/1.obin0 -> 636 bytes
-rwxr-xr-xtest/tools/dsymutil/Inputs/fat-test.arm.dylibbin0 -> 25180 bytes
-rw-r--r--test/tools/dsymutil/Inputs/fat-test.arm.obin0 -> 50736 bytes
-rw-r--r--test/tools/dsymutil/Inputs/fat-test.c28
-rwxr-xr-xtest/tools/dsymutil/Inputs/fat-test.dylibbin0 -> 13012 bytes
-rw-r--r--test/tools/dsymutil/Inputs/fat-test.obin0 -> 5000 bytes
-rw-r--r--test/tools/dsymutil/Inputs/frame-dw2.ll16
-rw-r--r--test/tools/dsymutil/Inputs/frame-dw4.ll16
-rw-r--r--test/tools/dsymutil/Inputs/inlined-low_pc/1.obin0 -> 1960 bytes
-rw-r--r--test/tools/dsymutil/Inputs/libfat-test.abin0 -> 5136 bytes
-rw-r--r--test/tools/dsymutil/Inputs/mismatch/1.obin0 -> 1972 bytes
-rw-r--r--test/tools/dsymutil/Inputs/mismatch/mismatch.pcmbin0 -> 24940 bytes
-rw-r--r--test/tools/dsymutil/Inputs/modules/1.obin0 -> 2444 bytes
-rw-r--r--test/tools/dsymutil/Inputs/modules/Bar.pcmbin0 -> 25636 bytes
-rw-r--r--test/tools/dsymutil/Inputs/modules/Foo.pcmbin0 -> 26060 bytes
-rw-r--r--test/tools/dsymutil/Inputs/odr-anon-namespace/1.obin0 -> 2084 bytes
-rw-r--r--test/tools/dsymutil/Inputs/odr-anon-namespace/2.obin0 -> 2084 bytes
-rw-r--r--test/tools/dsymutil/Inputs/odr-member-functions/1.obin0 -> 2236 bytes
-rw-r--r--test/tools/dsymutil/Inputs/odr-member-functions/2.obin0 -> 2660 bytes
-rw-r--r--test/tools/dsymutil/Inputs/odr-member-functions/3.obin0 -> 2832 bytes
-rw-r--r--test/tools/dsymutil/Inputs/odr-uniquing/1.obin0 -> 2544 bytes
-rw-r--r--test/tools/dsymutil/Inputs/odr-uniquing/2.obin0 -> 2544 bytes
-rw-r--r--test/tools/dsymutil/Inputs/submodules/1.obin0 -> 2232 bytes
-rw-r--r--test/tools/dsymutil/Inputs/submodules/Parent.pcmbin0 -> 25260 bytes
-rw-r--r--test/tools/dsymutil/X86/basic-linking-bundle.test38
-rw-r--r--test/tools/dsymutil/X86/basic-linking-x86.test13
-rw-r--r--test/tools/dsymutil/X86/basic-lto-dw4-linking-x86.test3
-rw-r--r--test/tools/dsymutil/X86/basic-lto-linking-x86.test7
-rw-r--r--test/tools/dsymutil/X86/custom-line-table.test40
-rw-r--r--test/tools/dsymutil/X86/dead-stripped.cpp48
-rw-r--r--test/tools/dsymutil/X86/dsym-companion.test339
-rw-r--r--test/tools/dsymutil/X86/dummy-debug-map.map22
-rw-r--r--test/tools/dsymutil/X86/empty_range.s61
-rw-r--r--test/tools/dsymutil/X86/fat-archive-input-i386.test16
-rw-r--r--test/tools/dsymutil/X86/fat-object-input-x86_64.test16
-rw-r--r--test/tools/dsymutil/X86/fat-object-input-x86_64h.test16
-rw-r--r--test/tools/dsymutil/X86/frame-1.test4
-rw-r--r--test/tools/dsymutil/X86/frame-2.test4
-rw-r--r--test/tools/dsymutil/X86/lit.local.cfg2
-rw-r--r--test/tools/dsymutil/X86/mismatch.m23
-rw-r--r--test/tools/dsymutil/X86/modules.m117
-rw-r--r--test/tools/dsymutil/X86/multiple-inputs.test31
-rw-r--r--test/tools/dsymutil/X86/odr-anon-namespace.cpp65
-rw-r--r--test/tools/dsymutil/X86/odr-member-functions.cpp109
-rw-r--r--test/tools/dsymutil/X86/odr-uniquing.cpp187
-rw-r--r--test/tools/dsymutil/X86/submodules.m52
-rw-r--r--test/tools/dsymutil/absolute_symbol.test16
-rw-r--r--test/tools/dsymutil/arch-option.test39
-rw-r--r--test/tools/dsymutil/archive-timestamp.test24
-rw-r--r--test/tools/dsymutil/basic-linking.test7
-rw-r--r--test/tools/dsymutil/debug-map-parsing.test22
-rw-r--r--test/tools/dsymutil/dump-symtab.test44
-rw-r--r--test/tools/dsymutil/fat-binary-output.test32
-rw-r--r--test/tools/dsymutil/yaml-object-address-rewrite.test10
-rw-r--r--test/tools/gold/Inputs/comdat.ll25
-rw-r--r--test/tools/gold/Inputs/linkonce-weak.ll3
-rw-r--r--test/tools/gold/PowerPC/lit.local.cfg3
-rw-r--r--test/tools/gold/PowerPC/mtriple.ll (renamed from test/tools/gold/mtriple.ll)0
-rw-r--r--test/tools/gold/X86/Inputs/alias-1.ll (renamed from test/tools/gold/Inputs/alias-1.ll)0
-rw-r--r--test/tools/gold/X86/Inputs/available-externally.ll3
-rw-r--r--test/tools/gold/X86/Inputs/bcsection.s (renamed from test/tools/gold/Inputs/bcsection.s)0
-rw-r--r--test/tools/gold/X86/Inputs/comdat.ll25
-rw-r--r--test/tools/gold/X86/Inputs/comdat2.ll9
-rw-r--r--test/tools/gold/X86/Inputs/common.ll (renamed from test/tools/gold/Inputs/common.ll)0
-rw-r--r--test/tools/gold/X86/Inputs/ctors2.ll5
-rw-r--r--test/tools/gold/X86/Inputs/drop-debug.bc (renamed from test/tools/gold/Inputs/drop-debug.bc)bin1152 -> 1152 bytes
-rw-r--r--test/tools/gold/X86/Inputs/drop-linkage.ll9
-rw-r--r--test/tools/gold/X86/Inputs/invalid.bc (renamed from test/tools/gold/Inputs/invalid.bc)bin272 -> 272 bytes
-rw-r--r--test/tools/gold/X86/Inputs/linker-script.export (renamed from test/tools/gold/Inputs/linker-script.export)0
-rw-r--r--test/tools/gold/X86/Inputs/linkonce-weak.ll19
-rw-r--r--test/tools/gold/X86/Inputs/pr19901-1.ll (renamed from test/tools/gold/Inputs/pr19901-1.ll)0
-rw-r--r--test/tools/gold/X86/Inputs/resolve-to-alias.ll4
-rw-r--r--test/tools/gold/X86/Inputs/thinlto.ll4
-rw-r--r--test/tools/gold/X86/Inputs/type-merge.ll5
-rw-r--r--test/tools/gold/X86/Inputs/type-merge2.ll5
-rw-r--r--test/tools/gold/X86/Inputs/weak.ll (renamed from test/tools/gold/Inputs/weak.ll)0
-rw-r--r--test/tools/gold/X86/alias.ll13
-rw-r--r--test/tools/gold/X86/alias2.ll23
-rw-r--r--test/tools/gold/X86/available-externally.ll27
-rw-r--r--test/tools/gold/X86/bad-alias.ll13
-rw-r--r--test/tools/gold/X86/bcsection.ll13
-rw-r--r--test/tools/gold/X86/coff.ll (renamed from test/tools/gold/coff.ll)0
-rw-r--r--test/tools/gold/X86/comdat.ll65
-rw-r--r--test/tools/gold/X86/comdat2.ll19
-rw-r--r--test/tools/gold/X86/common.ll (renamed from test/tools/gold/common.ll)0
-rw-r--r--test/tools/gold/X86/ctors.ll13
-rw-r--r--test/tools/gold/X86/ctors2.ll14
-rw-r--r--test/tools/gold/X86/disable-verify.ll25
-rw-r--r--test/tools/gold/X86/drop-debug.ll (renamed from test/tools/gold/drop-debug.ll)0
-rw-r--r--test/tools/gold/X86/drop-linkage.ll14
-rw-r--r--test/tools/gold/X86/emit-llvm.ll93
-rw-r--r--test/tools/gold/X86/invalid.ll (renamed from test/tools/gold/invalid.ll)0
-rw-r--r--test/tools/gold/X86/linker-script.ll (renamed from test/tools/gold/linker-script.ll)0
-rw-r--r--test/tools/gold/X86/linkonce-weak.ll39
-rw-r--r--test/tools/gold/X86/lit.local.cfg3
-rw-r--r--test/tools/gold/X86/no-map-whole-file.ll (renamed from test/tools/gold/no-map-whole-file.ll)0
-rw-r--r--test/tools/gold/X86/opt-level.ll (renamed from test/tools/gold/opt-level.ll)0
-rw-r--r--test/tools/gold/X86/parallel.ll22
-rw-r--r--test/tools/gold/X86/pr19901.ll (renamed from test/tools/gold/pr19901.ll)0
-rw-r--r--test/tools/gold/X86/pr25907.ll28
-rw-r--r--test/tools/gold/X86/pr25915.ll17
-rw-r--r--test/tools/gold/X86/remarks.ll25
-rw-r--r--test/tools/gold/X86/resolve-to-alias.ll33
-rw-r--r--test/tools/gold/X86/slp-vectorize.ll (renamed from test/tools/gold/slp-vectorize.ll)0
-rw-r--r--test/tools/gold/X86/stats.ll (renamed from test/tools/gold/stats.ll)0
-rw-r--r--test/tools/gold/X86/thinlto.ll34
-rw-r--r--test/tools/gold/X86/type-merge.ll24
-rw-r--r--test/tools/gold/X86/type-merge2.ll26
-rw-r--r--test/tools/gold/X86/unnamed-addr.ll14
-rw-r--r--test/tools/gold/X86/vectorize.ll (renamed from test/tools/gold/vectorize.ll)0
-rw-r--r--test/tools/gold/X86/weak.ll (renamed from test/tools/gold/weak.ll)0
-rw-r--r--test/tools/gold/alias.ll13
-rw-r--r--test/tools/gold/bad-alias.ll13
-rw-r--r--test/tools/gold/bcsection.ll11
-rw-r--r--test/tools/gold/comdat.ll65
-rw-r--r--test/tools/gold/emit-llvm.ll92
-rw-r--r--test/tools/gold/linkonce-weak.ll19
-rw-r--r--test/tools/gold/lit.local.cfg4
-rw-r--r--test/tools/gold/remarks.ll22
-rw-r--r--test/tools/llvm-cxxdump/trivial.test3
-rw-r--r--test/tools/llvm-dwp/Inputs/simple/notypes/a.dwobin0 -> 1193 bytes
-rw-r--r--test/tools/llvm-dwp/Inputs/simple/notypes/b.dwobin0 -> 1241 bytes
-rw-r--r--test/tools/llvm-dwp/Inputs/simple/types/a.dwobin0 -> 1369 bytes
-rw-r--r--test/tools/llvm-dwp/Inputs/simple/types/b.dwobin0 -> 1409 bytes
-rw-r--r--test/tools/llvm-dwp/Inputs/type_dedup/a.dwobin0 -> 1449 bytes
-rw-r--r--test/tools/llvm-dwp/Inputs/type_dedup/b.dwobin0 -> 1449 bytes
-rw-r--r--test/tools/llvm-dwp/X86/lit.local.cfg4
-rw-r--r--test/tools/llvm-dwp/X86/simple.test98
-rw-r--r--test/tools/llvm-dwp/X86/type_dedup.test35
-rw-r--r--test/tools/llvm-lto/Inputs/thinlto.ll4
-rw-r--r--test/tools/llvm-lto/thinlto.ll24
-rw-r--r--test/tools/llvm-mc/basic.test3
-rw-r--r--test/tools/llvm-mc/fatal_warnings.test4
-rw-r--r--test/tools/llvm-mc/line_end_with_space.test1
-rw-r--r--test/tools/llvm-mc/lit.local.cfg4
-rw-r--r--test/tools/llvm-mc/no_warnings.test4
-rw-r--r--test/tools/llvm-nm/X86/IRobj.test11
-rw-r--r--test/tools/llvm-nm/X86/Inputs/hello.obj.macho-x86_64bin0 -> 844 bytes
-rw-r--r--test/tools/llvm-nm/X86/Inputs/test.IRobj-x86_64bin0 -> 1168 bytes
-rw-r--r--test/tools/llvm-nm/X86/externalonly.test4
-rw-r--r--test/tools/llvm-nm/X86/groupingflags.test5
-rw-r--r--test/tools/llvm-nm/X86/posixMachO.test7
-rw-r--r--test/tools/llvm-nm/lit.local.cfg2
-rw-r--r--test/tools/llvm-objdump/AArch64/elf-aarch64-mapping-symbols.test30
-rw-r--r--test/tools/llvm-objdump/Inputs/eh_frame.macho-arm64bin0 -> 888 bytes
-rw-r--r--test/tools/llvm-objdump/Inputs/libbogus1.a13
-rw-r--r--test/tools/llvm-objdump/Inputs/libbogus2.a13
-rw-r--r--test/tools/llvm-objdump/Inputs/libbogus3.a16
-rw-r--r--test/tools/llvm-objdump/Inputs/section-filter.objbin0 -> 441 bytes
-rw-r--r--test/tools/llvm-objdump/X86/Inputs/disassemble-data.objbin0 -> 254 bytes
-rwxr-xr-xtest/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386bin0 -> 1536 bytes
-rwxr-xr-xtest/tools/llvm-objdump/X86/Inputs/internal.exe.coff-x86_64bin0 -> 6144 bytes
-rw-r--r--test/tools/llvm-objdump/X86/Inputs/malformed-machos/00000031.abin0 -> 2768 bytes
-rw-r--r--test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0001.machobin0 -> 9248 bytes
-rw-r--r--test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0006.machobin0 -> 9248 bytes
-rw-r--r--test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0010.machobin0 -> 9248 bytes
-rw-r--r--test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0040.machobin0 -> 9248 bytes
-rw-r--r--test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0080.machobin0 -> 9166 bytes
-rw-r--r--test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0261.machobin0 -> 8752 bytes
-rw-r--r--test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0337.machobin0 -> 9248 bytes
-rw-r--r--test/tools/llvm-objdump/X86/coff-dis-internal.test3
-rw-r--r--test/tools/llvm-objdump/X86/coff-disassemble-export.test8
-rw-r--r--test/tools/llvm-objdump/X86/disassemble-data.test4
-rw-r--r--test/tools/llvm-objdump/X86/macho-symbol-table.test6
-rw-r--r--test/tools/llvm-objdump/X86/malformed-machos.test41
-rw-r--r--test/tools/llvm-objdump/eh_frame-arm64.test23
-rw-r--r--test/tools/llvm-objdump/malformed-archives.test20
-rw-r--r--test/tools/llvm-objdump/section-filter.test7
-rw-r--r--test/tools/llvm-pdbdump/regex-filter.test20
-rw-r--r--test/tools/llvm-profdata/Inputs/basic.proftext19
-rw-r--r--test/tools/llvm-profdata/Inputs/c-general.profrawbin1384 -> 1776 bytes
-rw-r--r--test/tools/llvm-profdata/Inputs/compat.profdata.v2bin0 -> 712 bytes
-rw-r--r--test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcovbin0 -> 1960 bytes
-rw-r--r--test/tools/llvm-profdata/Inputs/inline-samples.afdo20
-rw-r--r--test/tools/llvm-profdata/Inputs/overflow-instr.proftext6
-rw-r--r--test/tools/llvm-profdata/Inputs/overflow-sample.proftext7
-rw-r--r--test/tools/llvm-profdata/Inputs/sample-profile.proftext18
-rw-r--r--test/tools/llvm-profdata/Inputs/text-format-errors.text.bin1
-rw-r--r--test/tools/llvm-profdata/Inputs/vp-malform.proftext42
-rw-r--r--test/tools/llvm-profdata/Inputs/vp-malform2.proftext32
-rw-r--r--test/tools/llvm-profdata/Inputs/vp-truncate.proftext36
-rw-r--r--test/tools/llvm-profdata/Inputs/weight-instr-bar.profdatabin0 -> 1320 bytes
-rw-r--r--test/tools/llvm-profdata/Inputs/weight-instr-foo.profdatabin0 -> 1320 bytes
-rw-r--r--test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext8
-rw-r--r--test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext8
-rw-r--r--test/tools/llvm-profdata/c-general.test6
-rw-r--r--test/tools/llvm-profdata/compat.proftext20
-rw-r--r--test/tools/llvm-profdata/count-mismatch.proftext3
-rw-r--r--test/tools/llvm-profdata/gcc-gcov-sample-profile.test29
-rw-r--r--test/tools/llvm-profdata/inline-samples.test30
-rw-r--r--test/tools/llvm-profdata/overflow-instr.test17
-rw-r--r--test/tools/llvm-profdata/overflow-sample.test43
-rw-r--r--test/tools/llvm-profdata/overflow.proftext12
-rw-r--r--test/tools/llvm-profdata/raw-32-bits-be.test13
-rw-r--r--test/tools/llvm-profdata/raw-32-bits-le.test13
-rw-r--r--test/tools/llvm-profdata/raw-64-bits-be.test13
-rw-r--r--test/tools/llvm-profdata/raw-64-bits-le.test13
-rw-r--r--test/tools/llvm-profdata/raw-magic-but-no-header.test2
-rw-r--r--test/tools/llvm-profdata/raw-two-profiles.test31
-rw-r--r--test/tools/llvm-profdata/sample-profile-basic.test10
-rw-r--r--test/tools/llvm-profdata/text-dump.test21
-rw-r--r--test/tools/llvm-profdata/text-format-errors.test27
-rw-r--r--test/tools/llvm-profdata/value-prof.proftext57
-rw-r--r--test/tools/llvm-profdata/weight-instr.test69
-rw-r--r--test/tools/llvm-profdata/weight-sample.test56
-rw-r--r--test/tools/llvm-readobj/ARM/attribute-4.s7
-rw-r--r--test/tools/llvm-readobj/Inputs/gnuhash.so.elf-i386bin0 -> 1080 bytes
-rw-r--r--test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppcbin0 -> 1204 bytes
-rw-r--r--test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc64bin0 -> 1784 bytes
-rw-r--r--test/tools/llvm-readobj/Inputs/gnuhash.so.elf-x86_64bin0 -> 1616 bytes
-rwxr-xr-xtest/tools/llvm-readobj/Inputs/mips-rld-map-rel.elf-mipselbin0 -> 2484 bytes
-rwxr-xr-xtest/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveribin0 -> 13208 bytes
-rwxr-xr-xtest/tools/llvm-readobj/Inputs/verdef.elf-x86-64bin0 -> 2256 bytes
-rw-r--r--test/tools/llvm-readobj/amdgpu-elf-defs.test28
-rw-r--r--test/tools/llvm-readobj/basic.test2
-rw-r--r--test/tools/llvm-readobj/codeview-linetables.test36
-rw-r--r--test/tools/llvm-readobj/elf-gnuhash.test63
-rw-r--r--test/tools/llvm-readobj/elf-versioninfo.test81
-rw-r--r--test/tools/llvm-readobj/file-headers.test18
-rw-r--r--test/tools/llvm-readobj/mips-rld-map-rel.test24
-rw-r--r--test/tools/llvm-readobj/sections-ext.test7
-rw-r--r--test/tools/llvm-readobj/sections.test7
-rw-r--r--test/tools/llvm-size/basic.test2
-rw-r--r--test/tools/llvm-split/alias.ll19
-rw-r--r--test/tools/llvm-split/comdat.ll19
-rw-r--r--test/tools/llvm-split/function.ll17
-rw-r--r--test/tools/llvm-split/global.ll11
-rw-r--r--test/tools/llvm-split/internal.ll17
-rw-r--r--test/tools/llvm-split/unnamed.ll31
-rwxr-xr-xtest/tools/llvm-symbolizer/Inputs/addr.exebin0 -> 10109 bytes
-rw-r--r--test/tools/llvm-symbolizer/Inputs/addr.inp1
-rw-r--r--test/tools/llvm-symbolizer/Inputs/coff-dwarf.cpp19
-rw-r--r--test/tools/llvm-symbolizer/Inputs/coff-dwarf.exebin0 -> 18944 bytes
-rw-r--r--test/tools/llvm-symbolizer/Inputs/coff-exports.cpp20
-rw-r--r--test/tools/llvm-symbolizer/Inputs/coff-exports.exebin0 -> 8192 bytes
-rw-r--r--test/tools/llvm-symbolizer/coff-dwarf.test16
-rw-r--r--test/tools/llvm-symbolizer/coff-exports.test20
-rw-r--r--test/tools/llvm-symbolizer/pdb/Inputs/test.cpp7
-rw-r--r--test/tools/llvm-symbolizer/pdb/Inputs/test.exebin165888 -> 126464 bytes
-rw-r--r--test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input4
-rw-r--r--test/tools/llvm-symbolizer/pdb/Inputs/test.pdbbin1552384 -> 1626112 bytes
-rw-r--r--test/tools/llvm-symbolizer/pdb/pdb.test63
-rw-r--r--test/tools/llvm-symbolizer/sym.test30
-rw-r--r--test/tools/lto/opt-level.ll5
-rw-r--r--test/tools/sancov/Inputs/blacklist.txt1
-rw-r--r--test/tools/sancov/Inputs/foo.cpp5
-rwxr-xr-xtest/tools/sancov/Inputs/test-linux_x86_64bin0 -> 2355767 bytes
-rw-r--r--test/tools/sancov/Inputs/test-linux_x86_64-1.sancovbin0 -> 80 bytes
-rw-r--r--test/tools/sancov/Inputs/test-linux_x86_64.sancovbin0 -> 64 bytes
-rw-r--r--test/tools/sancov/Inputs/test.cpp19
-rw-r--r--test/tools/sancov/blacklist.test5
-rw-r--r--test/tools/sancov/covered_functions.test13
-rw-r--r--test/tools/sancov/not_covered_functions.test8
-rw-r--r--test/tools/sancov/print.test11
3932 files changed, 242530 insertions, 55540 deletions
diff --git a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
index 069bd0bcfd8f..b59ee42dec3a 100644
--- a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
+++ b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu"
%struct.usb_hcd = type { %struct.usb_bus, i64, [0 x i64] }
@uhci_pci_ids = constant [1 x %struct.pci_device_id] zeroinitializer
-@__mod_pci_device_table = alias [1 x %struct.pci_device_id]* @uhci_pci_ids
+@__mod_pci_device_table = alias [1 x %struct.pci_device_id], [1 x %struct.pci_device_id]* @uhci_pci_ids
; <[1 x %struct.pci_device_id]*> [#uses=0]
define i32 @uhci_suspend(%struct.usb_hcd* %hcd) {
diff --git a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
index 20be13d153bb..ba9740028180 100644
--- a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
+++ b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu"
%struct.usb_hcd = type { %struct.usb_bus, [0 x i64] }
@pci_ids = constant [1 x %struct.pci_device_id] zeroinitializer
-@__mod_pci_device_table = alias [1 x %struct.pci_device_id]* @pci_ids ; <[1 x %struct.pci_device_id]*> [#uses=0]
+@__mod_pci_device_table = alias [1 x %struct.pci_device_id], [1 x %struct.pci_device_id]* @pci_ids ; <[1 x %struct.pci_device_id]*> [#uses=0]
define i32 @ehci_pci_setup(%struct.usb_hcd* %hcd) {
entry:
diff --git a/test/Analysis/BasicAA/bug.23540.ll b/test/Analysis/BasicAA/bug.23540.ll
new file mode 100644
index 000000000000..f693bcf73cd6
--- /dev/null
+++ b/test/Analysis/BasicAA/bug.23540.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@c = external global i32
+
+; CHECK-LABEL: f
+; CHECK: PartialAlias: i32* %arrayidx, i32* %arrayidx6
+define void @f() {
+ %idxprom = zext i32 undef to i64
+ %add4 = add i32 0, 1
+ %idxprom5 = zext i32 %add4 to i64
+ %arrayidx6 = getelementptr inbounds i32, i32* @c, i64 %idxprom5
+ %arrayidx = getelementptr inbounds i32, i32* @c, i64 %idxprom
+ ret void
+}
+
diff --git a/test/Analysis/BasicAA/bug.23626.ll b/test/Analysis/BasicAA/bug.23626.ll
new file mode 100644
index 000000000000..6a1478c65cef
--- /dev/null
+++ b/test/Analysis/BasicAA/bug.23626.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin13.4.0"
+
+; CHECK-LABEL: compute1
+; CHECK: PartialAlias: i32* %arrayidx8, i32* %out
+; CHECK: PartialAlias: i32* %arrayidx11, i32* %out
+; CHECK: PartialAlias: i32* %arrayidx11, i32* %arrayidx8
+; CHECK: PartialAlias: i32* %arrayidx14, i32* %out
+; CHECK: PartialAlias: i32* %arrayidx14, i32* %arrayidx8
+; CHECK: PartialAlias: i32* %arrayidx11, i32* %arrayidx14
+define void @compute1(i32 %num.0.lcssa, i32* %out) {
+ %idxprom = zext i32 %num.0.lcssa to i64
+ %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %idxprom
+ %add9 = or i32 %num.0.lcssa, 1
+ %idxprom10 = zext i32 %add9 to i64
+ %arrayidx11 = getelementptr inbounds i32, i32* %out, i64 %idxprom10
+ %add12 = or i32 %num.0.lcssa, 2
+ %idxprom13 = zext i32 %add12 to i64
+ %arrayidx14 = getelementptr inbounds i32, i32* %out, i64 %idxprom13
+ ret void
+}
+
+; CHECK-LABEL: compute2
+; CHECK: PartialAlias: i32* %arrayidx11, i32* %out.addr
+define void @compute2(i32 %num, i32* %out.addr) {
+ %add9 = add i32 %num, 1
+ %idxprom10 = zext i32 %add9 to i64
+ %arrayidx11 = getelementptr inbounds i32, i32* %out.addr, i64 %idxprom10
+ ret void
+}
diff --git a/test/Analysis/BasicAA/cs-cs.ll b/test/Analysis/BasicAA/cs-cs.ll
index 78670b61ca1c..dc298f1668be 100644
--- a/test/Analysis/BasicAA/cs-cs.ll
+++ b/test/Analysis/BasicAA/cs-cs.ll
@@ -2,8 +2,8 @@
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
target triple = "arm-apple-ios"
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
@@ -13,27 +13,27 @@ declare void @a_readonly_func(i8 *) noinline nounwind readonly
define <8 x i16> @test1(i8* %p, <8 x i16> %y) {
entry:
%q = getelementptr i8, i8* %p, i64 16
- %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
- call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
- %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+ %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind
+ call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+ %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind
%c = add <8 x i16> %a, %b
ret <8 x i16> %c
; CHECK-LABEL: Function: test1:
; CHECK: NoAlias: i8* %p, i8* %q
-; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
-; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
-; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <-> call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
-; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <-> call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef: %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
}
define void @test2(i8* %P, i8* %Q) nounwind ssp {
@@ -233,4 +233,9 @@ define void @test6(i8* %P) nounwind ssp {
; CHECK: Just Ref: call void @a_readonly_func(i8* %P) <-> call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false)
}
-attributes #0 = { nounwind }
+attributes #0 = { nounwind readonly argmemonly }
+attributes #1 = { nounwind argmemonly }
+attributes #2 = { noinline nounwind readonly }
+attributes #3 = { nounwind ssp }
+attributes #4 = { nounwind }
+
diff --git a/test/Analysis/BasicAA/full-store-partial-alias.ll b/test/Analysis/BasicAA/full-store-partial-alias.ll
index 341f6ba23b3a..20f6f7ec4ad0 100644
--- a/test/Analysis/BasicAA/full-store-partial-alias.ll
+++ b/test/Analysis/BasicAA/full-store-partial-alias.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -tbaa -basicaa -gvn < %s | FileCheck -check-prefix=BASICAA %s
-; RUN: opt -S -tbaa -gvn < %s | FileCheck %s
+; RUN: opt -S -tbaa -gvn < %s | FileCheck -check-prefix=BASICAA %s
+; RUN: opt -S -tbaa -disable-basicaa -gvn < %s | FileCheck %s
; rdar://8875631, rdar://8875069
; BasicAA should notice that the store stores to the entire %u object,
diff --git a/test/Analysis/BasicAA/intrinsics.ll b/test/Analysis/BasicAA/intrinsics.ll
index 8c05587ce233..526a039ef7ac 100644
--- a/test/Analysis/BasicAA/intrinsics.ll
+++ b/test/Analysis/BasicAA/intrinsics.ll
@@ -7,14 +7,14 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
; CHECK: define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[ATTR:#[0-9]+]]
-; CHECK-NEXT: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) [[ATTR:#[0-9]+]]
+; CHECK-NEXT: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
; CHECK-NEXT: %c = add <8 x i16> %a, %a
define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) {
entry:
- %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
- call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
- %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+ %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind
+ call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+ %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind
%c = add <8 x i16> %a, %b
ret <8 x i16> %c
}
@@ -22,21 +22,22 @@ entry:
; CHECK: define <8 x i16> @test1(i8* %p, <8 x i16> %y) {
; CHECK-NEXT: entry:
; CHECK-NEXT: %q = getelementptr i8, i8* %p, i64 16
-; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[ATTR]]
-; CHECK-NEXT: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) [[ATTR]]
+; CHECK-NEXT: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
; CHECK-NEXT: %c = add <8 x i16> %a, %a
define <8 x i16> @test1(i8* %p, <8 x i16> %y) {
entry:
%q = getelementptr i8, i8* %p, i64 16
- %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
- call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
- %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+ %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind
+ call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+ %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind
%c = add <8 x i16> %a, %b
ret <8 x i16> %c
}
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
-; CHECK: attributes #0 = { nounwind readonly }
+; CHECK: attributes #0 = { argmemonly nounwind readonly }
+; CHECK: attributes #1 = { argmemonly nounwind }
; CHECK: attributes [[ATTR]] = { nounwind }
diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll
index 3084f809c370..e42793936c3d 100644
--- a/test/Analysis/BasicAA/modref.ll
+++ b/test/Analysis/BasicAA/modref.ll
@@ -190,6 +190,43 @@ define i32 @test10(i32* %P, i32* %P2) {
; CHECK: ret i32 %Diff
}
+; CHECK-LABEL: @test11(
+define i32 @test11(i32* %P, i32* %P2) {
+ %V1 = load i32, i32* %P
+ call i32 @func_argmemonly(i32* readonly %P2)
+ %V2 = load i32, i32* %P
+ %Diff = sub i32 %V1, %V2
+ ret i32 %Diff
+ ; CHECK-NOT: load
+ ; CHECK: ret i32 0
+}
+
+declare i32 @func_argmemonly_two_args(i32* %P, i32* %P2) argmemonly
+
+; CHECK-LABEL: @test12(
+define i32 @test12(i32* %P, i32* %P2, i32* %P3) {
+ %V1 = load i32, i32* %P
+ call i32 @func_argmemonly_two_args(i32* readonly %P2, i32* %P3)
+ %V2 = load i32, i32* %P
+ %Diff = sub i32 %V1, %V2
+ ret i32 %Diff
+ ; CHECK: load
+ ; CHECK: load
+ ; CHECK: sub
+ ; CHECK: ret i32 %Diff
+}
+
+; CHECK-LABEL: @test13(
+define i32 @test13(i32* %P, i32* %P2) {
+ %V1 = load i32, i32* %P
+ call i32 @func_argmemonly(i32* readnone %P2)
+ %V2 = load i32, i32* %P
+ %Diff = sub i32 %V1, %V2
+ ret i32 %Diff
+ ; CHECK-NOT: load
+ ; CHECK: ret i32 0
+}
+
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
diff --git a/test/Analysis/BasicAA/noalias-bugs.ll b/test/Analysis/BasicAA/noalias-bugs.ll
index acb230c45de4..71b3c443f542 100644
--- a/test/Analysis/BasicAA/noalias-bugs.ll
+++ b/test/Analysis/BasicAA/noalias-bugs.ll
@@ -24,7 +24,7 @@ define i64 @testcase(%nested * noalias %p1, %nested * noalias %p2,
; CHECK: store i64 2
; CHECK: load
-; CHECK; store i64 1
+; CHECK: store i64 1
store i64 2, i64* %ptr.64, align 8
%r = load i64, i64* %either_ptr.64, align 8
diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll
index a72778277bb2..3944e9e43566 100644
--- a/test/Analysis/BasicAA/phi-aa.ll
+++ b/test/Analysis/BasicAA/phi-aa.ll
@@ -39,6 +39,7 @@ return:
; CHECK-LABEL: pr18068
; CHECK: MayAlias: i32* %0, i32* %arrayidx5
+; CHECK: NoAlias: i32* %arrayidx13, i32* %arrayidx5
define i32 @pr18068(i32* %jj7, i32* %j) {
entry:
diff --git a/test/Analysis/BasicAA/phi-loop.ll b/test/Analysis/BasicAA/phi-loop.ll
new file mode 100644
index 000000000000..6337bfbc1cfb
--- /dev/null
+++ b/test/Analysis/BasicAA/phi-loop.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -basicaa -basicaa-recphi=1 -gvn -S | FileCheck %s
+;
+; Check that section->word_ofs doesn't get reloaded in every iteration of the
+; for loop.
+;
+; Code:
+;
+; typedef struct {
+; unsigned num_words;
+; unsigned word_ofs;
+; const unsigned *data;
+; } section_t;
+;
+;
+; void test2(const section_t * restrict section, unsigned * restrict dst) {;
+; while (section->data != NULL) {
+; const unsigned *src = section->data;
+; for (unsigned i=0; i < section->num_words; ++i) {
+; dst[section->word_ofs + i] = src[i];
+; }
+;
+; ++section;
+; }
+; }
+;
+
+; CHECK-LABEL: for.body:
+; CHECK-NOT: load i32, i32* %word_ofs
+
+%struct.section_t = type { i32, i32, i32* }
+
+define void @test2(%struct.section_t* noalias nocapture readonly %section, i32* noalias nocapture %dst) {
+entry:
+ %data13 = getelementptr inbounds %struct.section_t, %struct.section_t* %section, i32 0, i32 2
+ %0 = load i32*, i32** %data13, align 4
+ %cmp14 = icmp eq i32* %0, null
+ br i1 %cmp14, label %while.end, label %for.cond.preheader
+
+for.cond.preheader: ; preds = %entry, %for.end
+ %1 = phi i32* [ %6, %for.end ], [ %0, %entry ]
+ %section.addr.015 = phi %struct.section_t* [ %incdec.ptr, %for.end ], [ %section, %entry ]
+ %num_words = getelementptr inbounds %struct.section_t, %struct.section_t* %section.addr.015, i32 0, i32 0
+ %2 = load i32, i32* %num_words, align 4
+ %cmp211 = icmp eq i32 %2, 0
+ br i1 %cmp211, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %for.cond.preheader
+ %word_ofs = getelementptr inbounds %struct.section_t, %struct.section_t* %section.addr.015, i32 0, i32 1
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %arrayidx.phi = phi i32* [ %1, %for.body.lr.ph ], [ %arrayidx.inc, %for.body ]
+ %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %3 = load i32, i32* %arrayidx.phi, align 4
+ %4 = load i32, i32* %word_ofs, align 4
+ %add = add i32 %4, %i.012
+ %arrayidx3 = getelementptr inbounds i32, i32* %dst, i32 %add
+ store i32 %3, i32* %arrayidx3, align 4
+ %inc = add i32 %i.012, 1
+ %5 = load i32, i32* %num_words, align 4
+ %cmp2 = icmp ult i32 %inc, %5
+ %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
+ br i1 %cmp2, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %for.cond.preheader
+ %incdec.ptr = getelementptr inbounds %struct.section_t, %struct.section_t* %section.addr.015, i32 1
+ %data = getelementptr inbounds %struct.section_t, %struct.section_t* %section.addr.015, i32 1, i32 2
+ %6 = load i32*, i32** %data, align 4
+ %cmp = icmp eq i32* %6, null
+ br i1 %cmp, label %while.end, label %for.cond.preheader
+
+while.end: ; preds = %for.end, %entry
+ ret void
+}
+
diff --git a/test/Analysis/BasicAA/q.bad.ll b/test/Analysis/BasicAA/q.bad.ll
new file mode 100644
index 000000000000..f2de6a76c5e0
--- /dev/null
+++ b/test/Analysis/BasicAA/q.bad.ll
@@ -0,0 +1,180 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7--linux-gnueabi"
+
+; CHECK-LABEL: test_zext_sext_amounts255
+; CHECK: NoAlias: i8* %a, i8* %b
+define void @test_zext_sext_amounts255(i8* %mem) {
+ %sext.1 = sext i8 255 to i16
+ %sext.zext.1 = zext i16 %sext.1 to i64
+ %sext.2 = sext i8 255 to i32
+ %sext.zext.2 = zext i32 %sext.2 to i64
+ %a = getelementptr inbounds i8, i8* %mem, i64 %sext.zext.1
+ %b = getelementptr inbounds i8, i8* %mem, i64 %sext.zext.2
+ ret void
+}
+
+; CHECK-LABEL: test_zext_sext_amounts
+; CHECK: PartialAlias: i8* %a, i8* %b
+; %a and %b only PartialAlias as, although they're both zext(sext(%num)) they'll extend the sign by a different
+; number of bits before zext-ing the remainder.
+define void @test_zext_sext_amounts(i8* %mem, i8 %num) {
+ %sext.1 = sext i8 %num to i16
+ %sext.zext.1 = zext i16 %sext.1 to i64
+ %sext.2 = sext i8 %num to i32
+ %sext.zext.2 = zext i32 %sext.2 to i64
+ %a = getelementptr inbounds i8, i8* %mem, i64 %sext.zext.1
+ %b = getelementptr inbounds i8, i8* %mem, i64 %sext.zext.2
+ ret void
+}
+
+; CHECK-LABEL: based_on_pr18068
+; CHECK: NoAlias: i8* %a, i8* %b
+; CHECK: NoAlias: i8* %a, i8* %c
+define void @based_on_pr18068(i32 %loaded, i8* %mem) {
+ %loaded.64 = zext i32 %loaded to i64
+ %add1 = add i32 %loaded, -1 ; unsigned wraps unless %loaded == 0
+ %add1.64 = zext i32 %add1 to i64 ; is zext(%loaded) always != zext(%loaded - 1)? Yes -> NoAlias
+ %sub1 = sub i32 %loaded, 1 ; unsigned wraps iff %loaded == 0
+ %sub1.64 = zext i32 %sub1 to i64 ; is zext(%loaded) always != zext(%loaded - 1)? Yes -> NoAlias
+ %a = getelementptr inbounds i8, i8* %mem, i64 %loaded.64
+ %b = getelementptr inbounds i8, i8* %mem, i64 %add1.64
+ %c = getelementptr inbounds i8, i8* %mem, i64 %sub1.64
+ ret void
+}
+
+; CHECK-LABEL: test_path_dependence
+; CHECK: PartialAlias: i8* %a, i8* %b
+; CHECK: MustAlias: i8* %a, i8* %c
+; CHECK: PartialAlias: i8* %a, i8* %d
+define void @test_path_dependence(i32 %p, i8* %mem) {
+ %p.minus1 = add i32 %p, -1 ; this will always unsigned-wrap, unless %p == 0
+ %p.minus1.64 = zext i32 %p.minus1 to i64
+ %p.64.again = add i64 %p.minus1.64, 1 ; either %p (if we wrapped) or 4294967296 (if we didn't)
+
+ %p.nsw.nuw.minus1 = sub nsw nuw i32 %p, 1 ; as nuw we know %p >= 1, and as nsw %p <= 2147483647
+ %p.nsw.nuw.minus1.64 = zext i32 %p.nsw.nuw.minus1 to i64
+ %p.nsw.nuw.64.again = add nsw nuw i64 %p.nsw.nuw.minus1.64, 1 ; ...so always exactly %p
+
+ %p.nsw.minus1 = sub nsw i32 %p, 1 ; only nsw, so can only guarantee %p != 0x10000000
+ %p.nsw.minus1.64 = zext i32 %p.nsw.minus1 to i64 ; when %p > 0x10000000 (ie <= 0 as a signed number) then the zext will make this a huge positive number
+ %p.nsw.64.again = add nsw i64 %p.nsw.minus1.64, 1 ; ...and so this is very much != %p
+
+ %p.64 = zext i32 %p to i64
+ %a = getelementptr inbounds i8, i8* %mem, i64 %p.64
+ %b = getelementptr inbounds i8, i8* %mem, i64 %p.64.again
+ %c = getelementptr inbounds i8, i8* %mem, i64 %p.nsw.nuw.64.again
+ %d = getelementptr inbounds i8, i8* %mem, i64 %p.nsw.64.again
+ ret void
+}
+
+; CHECK-LABEL: test_zext_sext_255
+; CHECK: NoAlias: i8* %a, i8* %b
+define void @test_zext_sext_255(i8* %mem) {
+ %zext.255 = zext i8 255 to i16 ; 0x00FF
+ %sext.255 = sext i8 255 to i16 ; 0xFFFF
+ %zext.sext.255 = zext i16 %sext.255 to i32 ; 0x0000FFFF
+ %sext.zext.255 = sext i16 %zext.255 to i32 ; 0x000000FF
+ %zext.zext.sext.255 = zext i32 %zext.sext.255 to i64
+ %zext.sext.zext.255 = zext i32 %sext.zext.255 to i64
+ %a = getelementptr inbounds i8, i8* %mem, i64 %zext.zext.sext.255
+ %b = getelementptr inbounds i8, i8* %mem, i64 %zext.sext.zext.255
+ ret void
+}
+
+; CHECK-LABEL: test_zext_sext_num
+; CHECK: PartialAlias: i8* %a, i8* %b
+; %a and %b NoAlias if %num == 255 (see @test_zext_sext_255), but %a and %b NoAlias for other values of %num (e.g. 0)
+define void @test_zext_sext_num(i8* %mem, i8 %num) {
+ %zext.num = zext i8 %num to i16
+ %sext.num = sext i8 %num to i16
+ %zext.sext.num = zext i16 %sext.num to i32
+ %sext.zext.num = sext i16 %zext.num to i32
+ %zext.zext.sext.num = zext i32 %zext.sext.num to i64
+ %zext.sext.zext.num = zext i32 %sext.zext.num to i64
+ %a = getelementptr inbounds i8, i8* %mem, i64 %zext.zext.sext.num
+ %b = getelementptr inbounds i8, i8* %mem, i64 %zext.sext.zext.num
+ ret void
+}
+
+; CHECK-LABEL: uncompressStream
+; CHECK: MustAlias: i8* %a, i8* %b
+; CHECK: NoAlias: i8* %a, i8* %c
+define void @uncompressStream(i8* %mem) {
+ %zext.255 = zext i8 255 to i32
+ %sext.255 = sext i8 255 to i32
+ %a = getelementptr inbounds i8, i8* %mem, i32 255
+ %b = getelementptr inbounds i8, i8* %mem, i32 %zext.255
+ %c = getelementptr inbounds i8, i8* %mem, i32 %sext.255
+ ret void
+}
+
+; CHECK-LABEL: constantOffsetHeuristic_i3_i32
+; CHECK: NoAlias: i32* %a, i32* %b
+; CHECK: NoAlias: i32* %a, i32* %c
+; CHECK: NoAlias: i32* %b, i32* %c
+define void @constantOffsetHeuristic_i3_i32(i32* %mem, i3 %val) {
+ %zext.plus.7 = add nsw i3 %val, 7
+ %zext.plus.4 = add nsw i3 %val, 4
+ %zext.val = zext i3 %val to i32
+ %zext.4 = zext i3 %zext.plus.4 to i32
+ %zext.7 = zext i3 %zext.plus.7 to i32
+ %a = getelementptr inbounds i32, i32* %mem, i32 %zext.4
+ %b = getelementptr inbounds i32, i32* %mem, i32 %zext.7
+ %c = getelementptr inbounds i32, i32* %mem, i32 %zext.val
+ ret void
+}
+
+; CHECK-LABEL: constantOffsetHeuristic_i8_i32
+; CHECK: NoAlias: i32* %a, i32* %b
+; CHECK: NoAlias: i32* %a, i32* %c
+; CHECK: NoAlias: i32* %b, i32* %c
+define void @constantOffsetHeuristic_i8_i32(i32* %mem, i8 %val) {
+ %zext.plus.7 = add nsw i8 %val, 7
+ %zext.plus.4 = add nsw i8 %val, 4
+ %zext.val = zext i8 %val to i32
+ %zext.4 = zext i8 %zext.plus.4 to i32
+ %zext.7 = zext i8 %zext.plus.7 to i32
+ %a = getelementptr inbounds i32, i32* %mem, i32 %zext.4
+ %b = getelementptr inbounds i32, i32* %mem, i32 %zext.7
+ %c = getelementptr inbounds i32, i32* %mem, i32 %zext.val
+ ret void
+}
+
+; CHECK-LABEL: constantOffsetHeuristic_i3_i8
+; CHECK: PartialAlias: i32* %a, i32* %b
+; CHECK: NoAlias: i32* %a, i32* %c
+; CHECK: PartialAlias: i32* %b, i32* %c
+define void @constantOffsetHeuristic_i3_i8(i8* %mem, i3 %val) {
+ %zext.plus.7 = add nsw i3 %val, 7
+ %zext.plus.4 = add nsw i3 %val, 4
+ %zext.val = zext i3 %val to i32
+ %zext.4 = zext i3 %zext.plus.4 to i32
+ %zext.7 = zext i3 %zext.plus.7 to i32
+ %a.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.4
+ %b.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.7
+ %c.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.val
+ %a = bitcast i8* %a.8 to i32*
+ %b = bitcast i8* %b.8 to i32*
+ %c = bitcast i8* %c.8 to i32*
+ ret void
+}
+
+; CHECK-LABEL: constantOffsetHeuristic_i8_i8
+; CHECK: PartialAlias: i32* %a, i32* %b
+; CHECK: NoAlias: i32* %a, i32* %c
+; CHECK: NoAlias: i32* %b, i32* %c
+define void @constantOffsetHeuristic_i8_i8(i8* %mem, i8 %val) {
+ %zext.plus.7 = add nsw i8 %val, 7
+ %zext.plus.4 = add nsw i8 %val, 4
+ %zext.val = zext i8 %val to i32
+ %zext.4 = zext i8 %zext.plus.4 to i32
+ %zext.7 = zext i8 %zext.plus.7 to i32
+ %a.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.4
+ %b.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.7
+ %c.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.val
+ %a = bitcast i8* %a.8 to i32*
+ %b = bitcast i8* %b.8 to i32*
+ %c = bitcast i8* %c.8 to i32*
+ ret void
+}
diff --git a/test/Analysis/BasicAA/sequential-gep.ll b/test/Analysis/BasicAA/sequential-gep.ll
new file mode 100644
index 000000000000..c17a782aa04b
--- /dev/null
+++ b/test/Analysis/BasicAA/sequential-gep.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+; CHECK: Function: t1
+; CHECK: NoAlias: i32* %gep1, i32* %gep2
+define void @t1([8 x i32]* %p, i32 %addend, i32* %q) {
+ %knownnonzero = load i32, i32* %q, !range !0
+ %add = add nsw nuw i32 %addend, %knownnonzero
+ %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 2, i32 %addend
+ %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 2, i32 %add
+ ret void
+}
+
+; CHECK: Function: t2
+; CHECK: PartialAlias: i32* %gep1, i32* %gep2
+define void @t2([8 x i32]* %p, i32 %addend, i32* %q) {
+ %knownnonzero = load i32, i32* %q, !range !0
+ %add = add nsw nuw i32 %addend, %knownnonzero
+ %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 1, i32 %addend
+ %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 0, i32 %add
+ ret void
+}
+
+; CHECK: Function: t3
+; CHECK: MustAlias: i32* %gep1, i32* %gep2
+define void @t3([8 x i32]* %p, i32 %addend, i32* %q) {
+ %knownnonzero = load i32, i32* %q, !range !0
+ %add = add nsw nuw i32 %addend, %knownnonzero
+ %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 0, i32 %add
+ %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 0, i32 %add
+ ret void
+}
+
+; CHECK: Function: t4
+; CHECK: PartialAlias: i32* %gep1, i32* %gep2
+define void @t4([8 x i32]* %p, i32 %addend, i32* %q) {
+ %knownnonzero = load i32, i32* %q, !range !0
+ %add = add nsw nuw i32 %addend, %knownnonzero
+ %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 1, i32 %addend
+ %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 %add, i32 %add
+ ret void
+}
+
+; CHECK: Function: t5
+; CHECK: PartialAlias: i32* %gep2, i64* %bc
+define void @t5([8 x i32]* %p, i32 %addend, i32* %q) {
+ %knownnonzero = load i32, i32* %q, !range !0
+ %add = add nsw nuw i32 %addend, %knownnonzero
+ %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 2, i32 %addend
+ %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 2, i32 %add
+ %bc = bitcast i32* %gep1 to i64*
+ ret void
+}
+
+!0 = !{ i32 1, i32 5 }
diff --git a/test/Analysis/BasicAA/zext.ll b/test/Analysis/BasicAA/zext.ll
new file mode 100644
index 000000000000..685d45be6151
--- /dev/null
+++ b/test/Analysis/BasicAA/zext.ll
@@ -0,0 +1,231 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: test_with_zext
+; CHECK: NoAlias: i8* %a, i8* %b
+
+define void @test_with_zext() {
+ %1 = tail call i8* @malloc(i64 120)
+ %a = getelementptr inbounds i8, i8* %1, i64 8
+ %2 = getelementptr inbounds i8, i8* %1, i64 16
+ %3 = zext i32 3 to i64
+ %b = getelementptr inbounds i8, i8* %2, i64 %3
+ ret void
+}
+
+; CHECK-LABEL: test_with_lshr
+; CHECK: NoAlias: i8* %a, i8* %b
+
+define void @test_with_lshr(i64 %i) {
+ %1 = tail call i8* @malloc(i64 120)
+ %a = getelementptr inbounds i8, i8* %1, i64 8
+ %2 = getelementptr inbounds i8, i8* %1, i64 16
+ %3 = lshr i64 %i, 2
+ %b = getelementptr inbounds i8, i8* %2, i64 %3
+ ret void
+}
+
+; CHECK-LABEL: test_with_a_loop
+; CHECK: NoAlias: i8* %a, i8* %b
+
+define void @test_with_a_loop(i8* %mem) {
+ br label %for.loop
+
+for.loop:
+ %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
+ %a = getelementptr inbounds i8, i8* %mem, i64 8
+ %a.plus1 = getelementptr inbounds i8, i8* %mem, i64 16
+ %i.64 = zext i32 %i to i64
+ %b = getelementptr inbounds i8, i8* %a.plus1, i64 %i.64
+ %i.plus1 = add nuw nsw i32 %i, 1
+ %cmp = icmp eq i32 %i.plus1, 10
+ br i1 %cmp, label %for.loop.exit, label %for.loop
+
+for.loop.exit:
+ ret void
+}
+
+; CHECK-LABEL: test_with_varying_base_pointer_in_loop
+; CHECK: NoAlias: i8* %a, i8* %b
+
+define void @test_with_varying_base_pointer_in_loop(i8* %mem.orig) {
+ br label %for.loop
+
+for.loop:
+ %mem = phi i8* [ %mem.orig, %0 ], [ %mem.plus1, %for.loop ]
+ %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
+ %a = getelementptr inbounds i8, i8* %mem, i64 8
+ %a.plus1 = getelementptr inbounds i8, i8* %mem, i64 16
+ %i.64 = zext i32 %i to i64
+ %b = getelementptr inbounds i8, i8* %a.plus1, i64 %i.64
+ %i.plus1 = add nuw nsw i32 %i, 1
+ %mem.plus1 = getelementptr inbounds i8, i8* %mem, i64 8
+ %cmp = icmp eq i32 %i.plus1, 10
+ br i1 %cmp, label %for.loop.exit, label %for.loop
+
+for.loop.exit:
+ ret void
+}
+
+; CHECK-LABEL: test_sign_extension
+; CHECK: PartialAlias: i64* %b.i64, i8* %a
+
+define void @test_sign_extension(i32 %p) {
+ %1 = tail call i8* @malloc(i64 120)
+ %p.64 = zext i32 %p to i64
+ %a = getelementptr inbounds i8, i8* %1, i64 %p.64
+ %p.minus1 = add i32 %p, -1
+ %p.minus1.64 = zext i32 %p.minus1 to i64
+ %b.i8 = getelementptr inbounds i8, i8* %1, i64 %p.minus1.64
+ %b.i64 = bitcast i8* %b.i8 to i64*
+ ret void
+}
+
+; CHECK-LABEL: test_fe_tools
+; CHECK: PartialAlias: i32* %a, i32* %b
+
+define void @test_fe_tools([8 x i32]* %values) {
+ br label %reorder
+
+for.loop:
+ %i = phi i32 [ 0, %reorder ], [ %i.next, %for.loop ]
+ %idxprom = zext i32 %i to i64
+ %b = getelementptr inbounds [8 x i32], [8 x i32]* %values, i64 0, i64 %idxprom
+ %i.next = add nuw nsw i32 %i, 1
+ %1 = icmp eq i32 %i.next, 10
+ br i1 %1, label %for.loop.exit, label %for.loop
+
+reorder:
+ %a = getelementptr inbounds [8 x i32], [8 x i32]* %values, i64 0, i64 1
+ br label %for.loop
+
+for.loop.exit:
+ ret void
+}
+
+@b = global i32 0, align 4
+@d = global i32 0, align 4
+
+; CHECK-LABEL: test_spec2006
+; CHECK: PartialAlias: i32** %x, i32** %y
+
+define void @test_spec2006() {
+ %h = alloca [1 x [2 x i32*]], align 16
+ %d.val = load i32, i32* @d, align 4
+ %d.promoted = sext i32 %d.val to i64
+ %1 = icmp slt i32 %d.val, 2
+ br i1 %1, label %.lr.ph, label %3
+
+.lr.ph: ; preds = %0
+ br label %2
+
+; <label>:2 ; preds = %.lr.ph, %2
+ %i = phi i32 [ %d.val, %.lr.ph ], [ %i.plus1, %2 ]
+ %i.promoted = sext i32 %i to i64
+ %x = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 %d.promoted, i64 %i.promoted
+ %i.plus1 = add nsw i32 %i, 1
+ %cmp = icmp slt i32 %i.plus1, 2
+ br i1 %cmp, label %2, label %3
+
+; <label>:3 ; preds = %._crit_edge, %0
+ %y = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
+ ret void
+}
+
+; CHECK-LABEL: test_modulo_analysis_easy_case
+; CHECK: NoAlias: i32** %x, i32** %y
+
+define void @test_modulo_analysis_easy_case(i64 %i) {
+ %h = alloca [1 x [2 x i32*]], align 16
+ %x = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 %i, i64 0
+ %y = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
+ ret void
+}
+
+; CHECK-LABEL: test_modulo_analysis_in_loop
+; CHECK: NoAlias: i32** %x, i32** %y
+
+define void @test_modulo_analysis_in_loop() {
+ %h = alloca [1 x [2 x i32*]], align 16
+ br label %for.loop
+
+for.loop:
+ %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
+ %i.promoted = sext i32 %i to i64
+ %x = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 %i.promoted, i64 0
+ %y = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
+ %i.plus1 = add nsw i32 %i, 1
+ %cmp = icmp slt i32 %i.plus1, 2
+ br i1 %cmp, label %for.loop, label %for.loop.exit
+
+for.loop.exit:
+ ret void
+}
+
+; CHECK-LABEL: test_modulo_analysis_with_global
+; CHECK: PartialAlias: i32** %x, i32** %y
+
+define void @test_modulo_analysis_with_global() {
+ %h = alloca [1 x [2 x i32*]], align 16
+ %b = load i32, i32* @b, align 4
+ %b.promoted = sext i32 %b to i64
+ br label %for.loop
+
+for.loop:
+ %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
+ %i.promoted = sext i32 %i to i64
+ %x = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 %i.promoted, i64 %b.promoted
+ %y = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
+ %i.plus1 = add nsw i32 %i, 1
+ %cmp = icmp slt i32 %i.plus1, 2
+ br i1 %cmp, label %for.loop, label %for.loop.exit
+
+for.loop.exit:
+ ret void
+}
+
+; CHECK-LABEL: test_const_eval
+; CHECK: NoAlias: i8* %a, i8* %b
+define void @test_const_eval(i8* %ptr, i64 %offset) {
+ %a = getelementptr inbounds i8, i8* %ptr, i64 %offset
+ %a.dup = getelementptr inbounds i8, i8* %ptr, i64 %offset
+ %three = zext i32 3 to i64
+ %b = getelementptr inbounds i8, i8* %a.dup, i64 %three
+ ret void
+}
+
+; CHECK-LABEL: test_const_eval_scaled
+; CHECK: MustAlias: i8* %a, i8* %b
+define void @test_const_eval_scaled(i8* %ptr) {
+ %three = zext i32 3 to i64
+ %six = mul i64 %three, 2
+ %a = getelementptr inbounds i8, i8* %ptr, i64 %six
+ %b = getelementptr inbounds i8, i8* %ptr, i64 6
+ ret void
+}
+
+; CHECK-LABEL: Function: foo
+; CHECK: MustAlias: float* %arrayidx, float* %arrayidx4.84
+define float @foo(i32 *%A, float %rend, float** %wayar) {
+entry:
+ %x0 = load i32, i32* %A, align 4
+ %conv = sext i32 %x0 to i64
+ %mul = shl nsw i64 %conv, 3
+ %call = tail call i8* @malloc(i64 %mul)
+ %x1 = bitcast i8* %call to float*
+
+ %sub = add nsw i32 %x0, -1
+ %idxprom = sext i32 %sub to i64
+ %arrayidx = getelementptr inbounds float, float* %x1, i64 %idxprom
+ store float %rend, float* %arrayidx, align 8
+
+ %indvars.iv76.83 = add nsw i64 %conv, -1
+ %arrayidx4.84 = getelementptr inbounds float, float* %x1, i64 %indvars.iv76.83
+ %x4 = load float, float* %arrayidx4.84, align 8
+
+ ret float %x4
+}
+
+; Function Attrs: nounwind
+declare noalias i8* @malloc(i64)
diff --git a/test/Analysis/BlockFrequencyInfo/bad_input.ll b/test/Analysis/BlockFrequencyInfo/bad_input.ll
index e5b1f500e1e6..20b87e6dfcb4 100644
--- a/test/Analysis/BlockFrequencyInfo/bad_input.ll
+++ b/test/Analysis/BlockFrequencyInfo/bad_input.ll
@@ -9,8 +9,8 @@ define void @branch_weight_0(i32 %a) {
entry:
br label %for.body
-; Check that we get 1,4 instead of 0,3.
-; CHECK-NEXT: for.body: float = 4.0,
+; Check that we get 1 and a huge frequency instead of 0,3.
+; CHECK-NEXT: for.body: float = 2147483647.8,
for.body:
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
call void @g(i32 %i)
diff --git a/test/Analysis/BlockFrequencyInfo/basic.ll b/test/Analysis/BlockFrequencyInfo/basic.ll
index 728adf007f42..8e81cc2ea31c 100644
--- a/test/Analysis/BlockFrequencyInfo/basic.ll
+++ b/test/Analysis/BlockFrequencyInfo/basic.ll
@@ -104,13 +104,13 @@ for.cond1.preheader:
%x.024 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
br label %for.cond4.preheader
-; CHECK-NEXT: for.cond4.preheader: float = 16008001.0,
+; CHECK-NEXT: for.cond4.preheader: float = 16007984.8,
for.cond4.preheader:
%y.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc9, %for.inc8 ]
%add = add i32 %y.023, %x.024
br label %for.body6
-; CHECK-NEXT: for.body6: float = 64048012001.0,
+; CHECK-NEXT: for.body6: float = 64047914563.9,
for.body6:
%z.022 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
%add7 = add i32 %add, %z.022
@@ -119,7 +119,7 @@ for.body6:
%cmp5 = icmp ugt i32 %inc, %a
br i1 %cmp5, label %for.inc8, label %for.body6, !prof !2
-; CHECK-NEXT: for.inc8: float = 16008001.0,
+; CHECK-NEXT: for.inc8: float = 16007984.8,
for.inc8:
%inc9 = add i32 %y.023, 1
%cmp2 = icmp ugt i32 %inc9, %a
diff --git a/test/Analysis/BlockFrequencyInfo/irreducible_loop_crash.ll b/test/Analysis/BlockFrequencyInfo/irreducible_loop_crash.ll
new file mode 100644
index 000000000000..2bcd088dd16e
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/irreducible_loop_crash.ll
@@ -0,0 +1,155 @@
+; RUN: opt < %s -analyze -block-freq
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @fn1(i32* %f) {
+entry:
+ %tobool7 = icmp eq i32 undef, 0
+ br i1 undef, label %if.end.12, label %for.body.5
+
+for.inc:
+ store i32 undef, i32* %f, align 4
+ br label %for.body.5
+
+for.body.5: ; preds = %for.cond.4.preheader
+ br i1 %tobool7, label %for.inc.9, label %for.inc
+
+for.inc.9: ; preds = %for.body.5
+ br i1 %tobool7, label %for.inc.9.1, label %for.inc
+
+if.end.12: ; preds = %if.end.12, %for.body
+ br i1 undef, label %for.end.17, label %for.inc
+
+for.end.17: ; preds = %entry
+ ret void
+
+for.inc.9.1: ; preds = %for.inc.9
+ br i1 %tobool7, label %for.inc.9.2, label %for.inc
+
+for.inc.9.2: ; preds = %for.inc.9.1
+ br i1 %tobool7, label %for.inc.9.3, label %for.inc
+
+for.inc.9.3: ; preds = %for.inc.9.2
+ br i1 %tobool7, label %for.inc.9.4, label %for.inc
+
+for.inc.9.4: ; preds = %for.inc.9.3
+ br i1 %tobool7, label %for.inc.9.5, label %for.inc
+
+for.inc.9.5: ; preds = %for.inc.9.4
+ br i1 %tobool7, label %for.inc.9.6, label %for.inc
+
+for.inc.9.6: ; preds = %for.inc.9.5
+ br i1 %tobool7, label %for.inc.9.7, label %for.inc
+
+for.inc.9.7: ; preds = %for.inc.9.6
+ br i1 %tobool7, label %for.inc.9.8, label %for.inc
+
+for.inc.9.8: ; preds = %for.inc.9.7
+ br i1 %tobool7, label %for.inc.9.9, label %for.inc
+
+for.inc.9.9: ; preds = %for.inc.9.8
+ br i1 %tobool7, label %for.inc.9.10, label %for.inc
+
+for.inc.9.10: ; preds = %for.inc.9.9
+ br i1 %tobool7, label %for.inc.9.11, label %for.inc
+
+for.inc.9.11: ; preds = %for.inc.9.10
+ br i1 %tobool7, label %for.inc.9.12, label %for.inc
+
+for.inc.9.12: ; preds = %for.inc.9.11
+ br i1 %tobool7, label %for.inc.9.13, label %for.inc
+
+for.inc.9.13: ; preds = %for.inc.9.12
+ br i1 %tobool7, label %for.inc.9.14, label %for.inc
+
+for.inc.9.14: ; preds = %for.inc.9.13
+ br i1 %tobool7, label %for.inc.9.15, label %for.inc
+
+for.inc.9.15: ; preds = %for.inc.9.14
+ br i1 %tobool7, label %for.inc.9.16, label %for.inc
+
+for.inc.9.16: ; preds = %for.inc.9.15
+ br i1 %tobool7, label %for.inc.9.17, label %for.inc
+
+for.inc.9.17: ; preds = %for.inc.9.16
+ br i1 %tobool7, label %for.inc.9.18, label %for.inc
+
+for.inc.9.18: ; preds = %for.inc.9.17
+ br i1 %tobool7, label %for.inc.9.19, label %for.inc
+
+for.inc.9.19: ; preds = %for.inc.9.18
+ br i1 %tobool7, label %for.inc.9.20, label %for.inc
+
+for.inc.9.20: ; preds = %for.inc.9.19
+ br i1 %tobool7, label %for.inc.9.21, label %for.inc
+
+for.inc.9.21: ; preds = %for.inc.9.20
+ br i1 %tobool7, label %for.inc.9.22, label %for.inc
+
+for.inc.9.22: ; preds = %for.inc.9.21
+ br i1 %tobool7, label %for.inc.9.23, label %for.inc
+
+for.inc.9.23: ; preds = %for.inc.9.22
+ br i1 %tobool7, label %for.inc.9.24, label %for.inc
+
+for.inc.9.24: ; preds = %for.inc.9.23
+ br i1 %tobool7, label %for.inc.9.25, label %for.inc
+
+for.inc.9.25: ; preds = %for.inc.9.24
+ br i1 %tobool7, label %for.inc.9.26, label %for.inc
+
+for.inc.9.26: ; preds = %for.inc.9.25
+ br i1 %tobool7, label %for.inc.9.27, label %for.inc
+
+for.inc.9.27: ; preds = %for.inc.9.26
+ br i1 %tobool7, label %for.inc.9.28, label %for.inc
+
+for.inc.9.28: ; preds = %for.inc.9.27
+ br i1 %tobool7, label %for.inc.9.29, label %for.inc
+
+for.inc.9.29: ; preds = %for.inc.9.28
+ br i1 %tobool7, label %for.inc.9.30, label %for.inc
+
+for.inc.9.30: ; preds = %for.inc.9.29
+ br i1 %tobool7, label %for.inc.9.31, label %for.inc
+
+for.inc.9.31: ; preds = %for.inc.9.30
+ br i1 %tobool7, label %for.inc.9.32, label %for.inc
+
+for.inc.9.32: ; preds = %for.inc.9.31
+ br i1 %tobool7, label %for.inc.9.33, label %for.inc
+
+for.inc.9.33: ; preds = %for.inc.9.32
+ br i1 %tobool7, label %for.inc.9.34, label %for.inc
+
+for.inc.9.34: ; preds = %for.inc.9.33
+ br i1 %tobool7, label %for.inc.9.35, label %for.inc
+
+for.inc.9.35: ; preds = %for.inc.9.34
+ br i1 %tobool7, label %for.inc.9.36, label %for.inc
+
+for.inc.9.36: ; preds = %for.inc.9.35
+ br i1 %tobool7, label %for.inc.9.37, label %for.inc
+
+for.inc.9.37: ; preds = %for.inc.9.36
+ br i1 %tobool7, label %for.inc.9.38, label %for.inc
+
+for.inc.9.38: ; preds = %for.inc.9.37
+ br i1 %tobool7, label %for.inc.9.39, label %for.inc
+
+for.inc.9.39: ; preds = %for.inc.9.38
+ br i1 %tobool7, label %for.inc.9.40, label %for.inc
+
+for.inc.9.40: ; preds = %for.inc.9.39
+ br i1 %tobool7, label %for.inc.9.41, label %for.inc
+
+for.inc.9.41: ; preds = %for.inc.9.40
+ br i1 %tobool7, label %for.inc.9.42, label %for.inc
+
+for.inc.9.42: ; preds = %for.inc.9.41
+ br i1 %tobool7, label %for.inc.9.43, label %for.inc
+
+for.inc.9.43: ; preds = %for.inc.9.42
+ br i1 %tobool7, label %if.end.12, label %for.inc
+}
diff --git a/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll b/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
index 534c4ad0e94f..29a9f3b29fb0 100644
--- a/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
+++ b/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
@@ -93,7 +93,7 @@ for.cond4: ; preds = %for.inc, %for.body3
%cmp5 = icmp slt i32 %2, 100
br i1 %cmp5, label %for.body6, label %for.end, !prof !3
-; CHECK: - for.body6: float = 500000.5, int = 4000003
+; CHECK: - for.body6: float = 500000.5, int = 4000004
for.body6: ; preds = %for.cond4
call void @bar()
br label %for.inc
@@ -143,7 +143,7 @@ for.cond16: ; preds = %for.inc19, %for.bod
%cmp17 = icmp slt i32 %8, 10000
br i1 %cmp17, label %for.body18, label %for.end21, !prof !4
-; CHECK: - for.body18: float = 500000.5, int = 4000003
+; CHECK: - for.body18: float = 499999.9, int = 3999998
for.body18: ; preds = %for.cond16
call void @bar()
br label %for.inc19
@@ -175,7 +175,7 @@ for.cond26: ; preds = %for.inc29, %for.end
%cmp27 = icmp slt i32 %12, 1000000
br i1 %cmp27, label %for.body28, label %for.end31, !prof !5
-; CHECK: - for.body28: float = 500000.5, int = 4000003
+; CHECK: - for.body28: float = 499995.2, int = 3999961
for.body28: ; preds = %for.cond26
call void @bar()
br label %for.inc29
diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll
index 2c9c15618682..d833b8339aac 100644
--- a/test/Analysis/BranchProbabilityInfo/basic.ll
+++ b/test/Analysis/BranchProbabilityInfo/basic.ll
@@ -4,7 +4,7 @@ define i32 @test1(i32 %i, i32* %a) {
; CHECK: Printing analysis {{.*}} for function 'test1'
entry:
br label %body
-; CHECK: edge entry -> body probability is 16 / 16 = 100%
+; CHECK: edge entry -> body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
body:
%iv = phi i32 [ 0, %entry ], [ %next, %body ]
@@ -15,8 +15,8 @@ body:
%next = add i32 %iv, 1
%exitcond = icmp eq i32 %next, %i
br i1 %exitcond, label %exit, label %body
-; CHECK: edge body -> exit probability is 4 / 128
-; CHECK: edge body -> body probability is 124 / 128
+; CHECK: edge body -> exit probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge body -> body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
exit:
ret i32 %sum
@@ -27,16 +27,16 @@ define i32 @test2(i32 %i, i32 %a, i32 %b) {
entry:
%cond = icmp ult i32 %i, 42
br i1 %cond, label %then, label %else, !prof !0
-; CHECK: edge entry -> then probability is 64 / 68
-; CHECK: edge entry -> else probability is 4 / 68
+; CHECK: edge entry -> then probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge]
+; CHECK: edge entry -> else probability is 0x07878788 / 0x80000000 = 5.88%
then:
br label %exit
-; CHECK: edge then -> exit probability is 16 / 16 = 100%
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
else:
br label %exit
-; CHECK: edge else -> exit probability is 16 / 16 = 100%
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
exit:
%result = phi i32 [ %a, %then ], [ %b, %else ]
@@ -52,31 +52,31 @@ entry:
i32 2, label %case_c
i32 3, label %case_d
i32 4, label %case_e ], !prof !1
-; CHECK: edge entry -> case_a probability is 4 / 80
-; CHECK: edge entry -> case_b probability is 4 / 80
-; CHECK: edge entry -> case_c probability is 64 / 80
-; CHECK: edge entry -> case_d probability is 4 / 80
-; CHECK: edge entry -> case_e probability is 4 / 80
+; CHECK: edge entry -> case_a probability is 0x06666666 / 0x80000000 = 5.00%
+; CHECK: edge entry -> case_b probability is 0x06666666 / 0x80000000 = 5.00%
+; CHECK: edge entry -> case_c probability is 0x66666666 / 0x80000000 = 80.00%
+; CHECK: edge entry -> case_d probability is 0x06666666 / 0x80000000 = 5.00%
+; CHECK: edge entry -> case_e probability is 0x06666666 / 0x80000000 = 5.00%
case_a:
br label %exit
-; CHECK: edge case_a -> exit probability is 16 / 16 = 100%
+; CHECK: edge case_a -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
case_b:
br label %exit
-; CHECK: edge case_b -> exit probability is 16 / 16 = 100%
+; CHECK: edge case_b -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
case_c:
br label %exit
-; CHECK: edge case_c -> exit probability is 16 / 16 = 100%
+; CHECK: edge case_c -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
case_d:
br label %exit
-; CHECK: edge case_d -> exit probability is 16 / 16 = 100%
+; CHECK: edge case_d -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
case_e:
br label %exit
-; CHECK: edge case_e -> exit probability is 16 / 16 = 100%
+; CHECK: edge case_e -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
exit:
%result = phi i32 [ %a, %case_a ],
@@ -99,9 +99,9 @@ entry:
i64 2, label %sw.bb
i64 5, label %sw.bb1
], !prof !2
-; CHECK: edge entry -> return probability is 7 / 85
-; CHECK: edge entry -> sw.bb probability is 14 / 85
-; CHECK: edge entry -> sw.bb1 probability is 64 / 85
+; CHECK: edge entry -> return probability is 0x0a8a8a8b / 0x80000000 = 8.24%
+; CHECK: edge entry -> sw.bb probability is 0x15151515 / 0x80000000 = 16.47%
+; CHECK: edge entry -> sw.bb1 probability is 0x60606060 / 0x80000000 = 75.29%
sw.bb:
br label %return
@@ -122,17 +122,17 @@ define i32 @test5(i32 %a, i32 %b, i1 %flag) {
; CHECK: Printing analysis {{.*}} for function 'test5'
entry:
br i1 %flag, label %then, label %else
-; CHECK: edge entry -> then probability is 4 / 68
-; CHECK: edge entry -> else probability is 64 / 68
+; CHECK: edge entry -> then probability is 0x07878788 / 0x80000000 = 5.88%
+; CHECK: edge entry -> else probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge]
then:
call void @coldfunc()
br label %exit
-; CHECK: edge then -> exit probability is 16 / 16 = 100%
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
else:
br label %exit
-; CHECK: edge else -> exit probability is 16 / 16 = 100%
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
exit:
%result = phi i32 [ %a, %then ], [ %b, %else ]
@@ -149,8 +149,8 @@ define i32 @test_cold_call_sites(i32* %a) {
; after that is fixed.
; CHECK: Printing analysis {{.*}} for function 'test_cold_call_sites'
-; CHECK: edge entry -> then probability is 4 / 68 = 5.88235%
-; CHECK: edge entry -> else probability is 64 / 68 = 94.1176% [HOT edge]
+; CHECK: edge entry -> then probability is 0x07878788 / 0x80000000 = 5.88%
+; CHECK: edge entry -> else probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge]
entry:
%gep1 = getelementptr i32, i32* %a, i32 1
@@ -179,8 +179,8 @@ define i32 @zero1(i32 %i, i32 %a, i32 %b) {
entry:
%cond = icmp eq i32 %i, 0
br i1 %cond, label %then, label %else
-; CHECK: edge entry -> then probability is 12 / 32
-; CHECK: edge entry -> else probability is 20 / 32
+; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50%
+; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50%
then:
br label %exit
@@ -198,8 +198,8 @@ define i32 @zero2(i32 %i, i32 %a, i32 %b) {
entry:
%cond = icmp ne i32 %i, -1
br i1 %cond, label %then, label %else
-; CHECK: edge entry -> then probability is 20 / 32
-; CHECK: edge entry -> else probability is 12 / 32
+; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50%
then:
br label %exit
@@ -220,8 +220,8 @@ entry:
%and = and i32 %i, 2
%tobool = icmp eq i32 %and, 0
br i1 %tobool, label %then, label %else
-; CHECK: edge entry -> then probability is 16 / 32
-; CHECK: edge entry -> else probability is 16 / 32
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
then:
; AND'ing with other bitmask might be something else, so we still assume the
@@ -229,8 +229,8 @@ then:
%and2 = and i32 %i, 5
%tobool2 = icmp eq i32 %and2, 0
br i1 %tobool2, label %else, label %exit
-; CHECK: edge then -> else probability is 12 / 32
-; CHECK: edge then -> exit probability is 20 / 32
+; CHECK: edge then -> else probability is 0x30000000 / 0x80000000 = 37.50%
+; CHECK: edge then -> exit probability is 0x50000000 / 0x80000000 = 62.50%
else:
br label %exit
diff --git a/test/Analysis/BranchProbabilityInfo/loop.ll b/test/Analysis/BranchProbabilityInfo/loop.ll
index e792790f84f8..5be7adf3909d 100644
--- a/test/Analysis/BranchProbabilityInfo/loop.ll
+++ b/test/Analysis/BranchProbabilityInfo/loop.ll
@@ -9,13 +9,13 @@ declare void @g4()
define void @test1(i32 %a, i32 %b) {
entry:
br label %do.body
-; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
do.body:
%i.0 = phi i32 [ 0, %entry ], [ %inc3, %do.end ]
call void @g1()
br label %do.body1
-; CHECK: edge do.body -> do.body1 probability is 16 / 16 = 100%
+; CHECK: edge do.body -> do.body1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
do.body1:
%j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.body1 ]
@@ -23,16 +23,16 @@ do.body1:
%inc = add nsw i32 %j.0, 1
%cmp = icmp slt i32 %inc, %b
br i1 %cmp, label %do.body1, label %do.end
-; CHECK: edge do.body1 -> do.body1 probability is 124 / 128
-; CHECK: edge do.body1 -> do.end probability is 4 / 128
+; CHECK: edge do.body1 -> do.body1 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.body1 -> do.end probability is 0x04000000 / 0x80000000 = 3.12%
do.end:
call void @g3()
%inc3 = add nsw i32 %i.0, 1
%cmp4 = icmp slt i32 %inc3, %a
br i1 %cmp4, label %do.body, label %do.end5
-; CHECK: edge do.end -> do.body probability is 124 / 128
-; CHECK: edge do.end -> do.end5 probability is 4 / 128
+; CHECK: edge do.end -> do.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.end -> do.end5 probability is 0x04000000 / 0x80000000 = 3.12%
do.end5:
call void @g4()
@@ -43,20 +43,20 @@ define void @test2(i32 %a, i32 %b) {
entry:
%cmp9 = icmp sgt i32 %a, 0
br i1 %cmp9, label %for.body.lr.ph, label %for.end6
-; CHECK: edge entry -> for.body.lr.ph probability is 20 / 32
-; CHECK: edge entry -> for.end6 probability is 12 / 32
+; CHECK: edge entry -> for.body.lr.ph probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> for.end6 probability is 0x30000000 / 0x80000000 = 37.50%
for.body.lr.ph:
%cmp27 = icmp sgt i32 %b, 0
br label %for.body
-; CHECK: edge for.body.lr.ph -> for.body probability is 16 / 16 = 100%
+; CHECK: edge for.body.lr.ph -> for.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
for.body:
%i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc5, %for.end ]
call void @g1()
br i1 %cmp27, label %for.body3, label %for.end
-; CHECK: edge for.body -> for.body3 probability is 20 / 32 = 62.5%
-; CHECK: edge for.body -> for.end probability is 12 / 32 = 37.5%
+; CHECK: edge for.body -> for.body3 probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge for.body -> for.end probability is 0x30000000 / 0x80000000 = 37.50%
for.body3:
%j.08 = phi i32 [ %inc, %for.body3 ], [ 0, %for.body ]
@@ -64,16 +64,16 @@ for.body3:
%inc = add nsw i32 %j.08, 1
%exitcond = icmp eq i32 %inc, %b
br i1 %exitcond, label %for.end, label %for.body3
-; CHECK: edge for.body3 -> for.end probability is 4 / 128
-; CHECK: edge for.body3 -> for.body3 probability is 124 / 128
+; CHECK: edge for.body3 -> for.end probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge for.body3 -> for.body3 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
for.end:
call void @g3()
%inc5 = add nsw i32 %i.010, 1
%exitcond11 = icmp eq i32 %inc5, %a
br i1 %exitcond11, label %for.end6, label %for.body
-; CHECK: edge for.end -> for.end6 probability is 4 / 128
-; CHECK: edge for.end -> for.body probability is 124 / 128
+; CHECK: edge for.end -> for.end6 probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge for.end -> for.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
for.end6:
call void @g4()
@@ -83,7 +83,7 @@ for.end6:
define void @test3(i32 %a, i32 %b, i32* %c) {
entry:
br label %do.body
-; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
do.body:
%i.0 = phi i32 [ 0, %entry ], [ %inc4, %if.end ]
@@ -91,8 +91,8 @@ do.body:
%0 = load i32, i32* %c, align 4
%cmp = icmp slt i32 %0, 42
br i1 %cmp, label %do.body1, label %if.end
-; CHECK: edge do.body -> do.body1 probability is 16 / 32 = 50%
-; CHECK: edge do.body -> if.end probability is 16 / 32 = 50%
+; CHECK: edge do.body -> do.body1 probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge do.body -> if.end probability is 0x40000000 / 0x80000000 = 50.00%
do.body1:
%j.0 = phi i32 [ %inc, %do.body1 ], [ 0, %do.body ]
@@ -100,16 +100,16 @@ do.body1:
%inc = add nsw i32 %j.0, 1
%cmp2 = icmp slt i32 %inc, %b
br i1 %cmp2, label %do.body1, label %if.end
-; CHECK: edge do.body1 -> do.body1 probability is 124 / 128
-; CHECK: edge do.body1 -> if.end probability is 4 / 128
+; CHECK: edge do.body1 -> do.body1 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.body1 -> if.end probability is 0x04000000 / 0x80000000 = 3.12%
if.end:
call void @g3()
%inc4 = add nsw i32 %i.0, 1
%cmp5 = icmp slt i32 %inc4, %a
br i1 %cmp5, label %do.body, label %do.end6
-; CHECK: edge if.end -> do.body probability is 124 / 128
-; CHECK: edge if.end -> do.end6 probability is 4 / 128
+; CHECK: edge if.end -> do.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge if.end -> do.end6 probability is 0x04000000 / 0x80000000 = 3.12%
do.end6:
call void @g4()
@@ -119,7 +119,7 @@ do.end6:
define void @test4(i32 %a, i32 %b, i32* %c) {
entry:
br label %do.body
-; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
do.body:
%i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
@@ -127,8 +127,8 @@ do.body:
%0 = load i32, i32* %c, align 4
%cmp = icmp slt i32 %0, 42
br i1 %cmp, label %return, label %do.body1
-; CHECK: edge do.body -> return probability is 4 / 128
-; CHECK: edge do.body -> do.body1 probability is 124 / 128
+; CHECK: edge do.body -> return probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge do.body -> do.body1 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
do.body1:
%j.0 = phi i32 [ %inc, %do.body1 ], [ 0, %do.body ]
@@ -136,21 +136,21 @@ do.body1:
%inc = add nsw i32 %j.0, 1
%cmp2 = icmp slt i32 %inc, %b
br i1 %cmp2, label %do.body1, label %do.end
-; CHECK: edge do.body1 -> do.body1 probability is 124 / 128
-; CHECK: edge do.body1 -> do.end probability is 4 / 128
+; CHECK: edge do.body1 -> do.body1 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.body1 -> do.end probability is 0x04000000 / 0x80000000 = 3.12%
do.end:
call void @g3()
%inc4 = add nsw i32 %i.0, 1
%cmp5 = icmp slt i32 %inc4, %a
br i1 %cmp5, label %do.body, label %do.end6
-; CHECK: edge do.end -> do.body probability is 124 / 128
-; CHECK: edge do.end -> do.end6 probability is 4 / 128
+; CHECK: edge do.end -> do.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.end -> do.end6 probability is 0x04000000 / 0x80000000 = 3.12%
do.end6:
call void @g4()
br label %return
-; CHECK: edge do.end6 -> return probability is 16 / 16 = 100%
+; CHECK: edge do.end6 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
return:
ret void
@@ -159,42 +159,42 @@ return:
define void @test5(i32 %a, i32 %b, i32* %c) {
entry:
br label %do.body
-; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
do.body:
%i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
call void @g1()
br label %do.body1
-; CHECK: edge do.body -> do.body1 probability is 16 / 16 = 100%
+; CHECK: edge do.body -> do.body1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
do.body1:
%j.0 = phi i32 [ 0, %do.body ], [ %inc, %if.end ]
%0 = load i32, i32* %c, align 4
%cmp = icmp slt i32 %0, 42
br i1 %cmp, label %return, label %if.end
-; CHECK: edge do.body1 -> return probability is 4 / 128
-; CHECK: edge do.body1 -> if.end probability is 124 / 128
+; CHECK: edge do.body1 -> return probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge do.body1 -> if.end probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
if.end:
call void @g2()
%inc = add nsw i32 %j.0, 1
%cmp2 = icmp slt i32 %inc, %b
br i1 %cmp2, label %do.body1, label %do.end
-; CHECK: edge if.end -> do.body1 probability is 124 / 128
-; CHECK: edge if.end -> do.end probability is 4 / 128
+; CHECK: edge if.end -> do.body1 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge if.end -> do.end probability is 0x04000000 / 0x80000000 = 3.12%
do.end:
call void @g3()
%inc4 = add nsw i32 %i.0, 1
%cmp5 = icmp slt i32 %inc4, %a
br i1 %cmp5, label %do.body, label %do.end6
-; CHECK: edge do.end -> do.body probability is 124 / 128
-; CHECK: edge do.end -> do.end6 probability is 4 / 128
+; CHECK: edge do.end -> do.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.end -> do.end6 probability is 0x04000000 / 0x80000000 = 3.12%
do.end6:
call void @g4()
br label %return
-; CHECK: edge do.end6 -> return probability is 16 / 16 = 100%
+; CHECK: edge do.end6 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
return:
ret void
@@ -203,13 +203,13 @@ return:
define void @test6(i32 %a, i32 %b, i32* %c) {
entry:
br label %do.body
-; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
do.body:
%i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
call void @g1()
br label %do.body1
-; CHECK: edge do.body -> do.body1 probability is 16 / 16 = 100%
+; CHECK: edge do.body -> do.body1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
do.body1:
%j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.cond ]
@@ -217,28 +217,28 @@ do.body1:
%0 = load i32, i32* %c, align 4
%cmp = icmp slt i32 %0, 42
br i1 %cmp, label %return, label %do.cond
-; CHECK: edge do.body1 -> return probability is 4 / 128
-; CHECK: edge do.body1 -> do.cond probability is 124 / 128
+; CHECK: edge do.body1 -> return probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge do.body1 -> do.cond probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
do.cond:
%inc = add nsw i32 %j.0, 1
%cmp2 = icmp slt i32 %inc, %b
br i1 %cmp2, label %do.body1, label %do.end
-; CHECK: edge do.cond -> do.body1 probability is 124 / 128
-; CHECK: edge do.cond -> do.end probability is 4 / 128
+; CHECK: edge do.cond -> do.body1 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.cond -> do.end probability is 0x04000000 / 0x80000000 = 3.12%
do.end:
call void @g3()
%inc4 = add nsw i32 %i.0, 1
%cmp5 = icmp slt i32 %inc4, %a
br i1 %cmp5, label %do.body, label %do.end6
-; CHECK: edge do.end -> do.body probability is 124 / 128
-; CHECK: edge do.end -> do.end6 probability is 4 / 128
+; CHECK: edge do.end -> do.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.end -> do.end6 probability is 0x04000000 / 0x80000000 = 3.12%
do.end6:
call void @g4()
br label %return
-; CHECK: edge do.end6 -> return probability is 16 / 16 = 100%
+; CHECK: edge do.end6 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
return:
ret void
@@ -248,27 +248,27 @@ define void @test7(i32 %a, i32 %b, i32* %c) {
entry:
%cmp10 = icmp sgt i32 %a, 0
br i1 %cmp10, label %for.body.lr.ph, label %for.end7
-; CHECK: edge entry -> for.body.lr.ph probability is 20 / 32
-; CHECK: edge entry -> for.end7 probability is 12 / 32
+; CHECK: edge entry -> for.body.lr.ph probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> for.end7 probability is 0x30000000 / 0x80000000 = 37.50%
for.body.lr.ph:
%cmp38 = icmp sgt i32 %b, 0
br label %for.body
-; CHECK: edge for.body.lr.ph -> for.body probability is 16 / 16 = 100%
+; CHECK: edge for.body.lr.ph -> for.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
for.body:
%i.011 = phi i32 [ 0, %for.body.lr.ph ], [ %inc6, %for.inc5 ]
%0 = load i32, i32* %c, align 4
%cmp1 = icmp eq i32 %0, %i.011
br i1 %cmp1, label %for.inc5, label %if.end
-; CHECK: edge for.body -> for.inc5 probability is 16 / 32 = 50%
-; CHECK: edge for.body -> if.end probability is 16 / 32 = 50%
+; CHECK: edge for.body -> for.inc5 probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge for.body -> if.end probability is 0x40000000 / 0x80000000 = 50.00%
if.end:
call void @g1()
br i1 %cmp38, label %for.body4, label %for.end
-; CHECK: edge if.end -> for.body4 probability is 20 / 32 = 62.5%
-; CHECK: edge if.end -> for.end probability is 12 / 32 = 37.5%
+; CHECK: edge if.end -> for.body4 probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge if.end -> for.end probability is 0x30000000 / 0x80000000 = 37.50%
for.body4:
%j.09 = phi i32 [ %inc, %for.body4 ], [ 0, %if.end ]
@@ -276,20 +276,20 @@ for.body4:
%inc = add nsw i32 %j.09, 1
%exitcond = icmp eq i32 %inc, %b
br i1 %exitcond, label %for.end, label %for.body4
-; CHECK: edge for.body4 -> for.end probability is 4 / 128
-; CHECK: edge for.body4 -> for.body4 probability is 124 / 128
+; CHECK: edge for.body4 -> for.end probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge for.body4 -> for.body4 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
for.end:
call void @g3()
br label %for.inc5
-; CHECK: edge for.end -> for.inc5 probability is 16 / 16 = 100%
+; CHECK: edge for.end -> for.inc5 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
for.inc5:
%inc6 = add nsw i32 %i.011, 1
%exitcond12 = icmp eq i32 %inc6, %a
br i1 %exitcond12, label %for.end7, label %for.body
-; CHECK: edge for.inc5 -> for.end7 probability is 4 / 128
-; CHECK: edge for.inc5 -> for.body probability is 124 / 128
+; CHECK: edge for.inc5 -> for.end7 probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge for.inc5 -> for.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
for.end7:
call void @g4()
@@ -300,64 +300,64 @@ define void @test8(i32 %a, i32 %b, i32* %c) {
entry:
%cmp18 = icmp sgt i32 %a, 0
br i1 %cmp18, label %for.body.lr.ph, label %for.end15
-; CHECK: edge entry -> for.body.lr.ph probability is 20 / 32
-; CHECK: edge entry -> for.end15 probability is 12 / 32
+; CHECK: edge entry -> for.body.lr.ph probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> for.end15 probability is 0x30000000 / 0x80000000 = 37.50%
for.body.lr.ph:
%cmp216 = icmp sgt i32 %b, 0
%arrayidx5 = getelementptr inbounds i32, i32* %c, i64 1
%arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
br label %for.body
-; CHECK: edge for.body.lr.ph -> for.body probability is 16 / 16 = 100%
+; CHECK: edge for.body.lr.ph -> for.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
for.body:
%i.019 = phi i32 [ 0, %for.body.lr.ph ], [ %inc14, %for.end ]
call void @g1()
br i1 %cmp216, label %for.body3, label %for.end
-; CHECK: edge for.body -> for.body3 probability is 20 / 32 = 62.5%
-; CHECK: edge for.body -> for.end probability is 12 / 32 = 37.5%
+; CHECK: edge for.body -> for.body3 probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge for.body -> for.end probability is 0x30000000 / 0x80000000 = 37.50%
for.body3:
%j.017 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%0 = load i32, i32* %c, align 4
%cmp4 = icmp eq i32 %0, %j.017
br i1 %cmp4, label %for.inc, label %if.end
-; CHECK: edge for.body3 -> for.inc probability is 16 / 32 = 50%
-; CHECK: edge for.body3 -> if.end probability is 16 / 32 = 50%
+; CHECK: edge for.body3 -> for.inc probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge for.body3 -> if.end probability is 0x40000000 / 0x80000000 = 50.00%
if.end:
%1 = load i32, i32* %arrayidx5, align 4
%cmp6 = icmp eq i32 %1, %j.017
br i1 %cmp6, label %for.inc, label %if.end8
-; CHECK: edge if.end -> for.inc probability is 16 / 32 = 50%
-; CHECK: edge if.end -> if.end8 probability is 16 / 32 = 50%
+; CHECK: edge if.end -> for.inc probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge if.end -> if.end8 probability is 0x40000000 / 0x80000000 = 50.00%
if.end8:
%2 = load i32, i32* %arrayidx9, align 4
%cmp10 = icmp eq i32 %2, %j.017
br i1 %cmp10, label %for.inc, label %if.end12
-; CHECK: edge if.end8 -> for.inc probability is 16 / 32 = 50%
-; CHECK: edge if.end8 -> if.end12 probability is 16 / 32 = 50%
+; CHECK: edge if.end8 -> for.inc probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge if.end8 -> if.end12 probability is 0x40000000 / 0x80000000 = 50.00%
if.end12:
call void @g2()
br label %for.inc
-; CHECK: edge if.end12 -> for.inc probability is 16 / 16 = 100%
+; CHECK: edge if.end12 -> for.inc probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
for.inc:
%inc = add nsw i32 %j.017, 1
%exitcond = icmp eq i32 %inc, %b
br i1 %exitcond, label %for.end, label %for.body3
-; CHECK: edge for.inc -> for.end probability is 4 / 128
-; CHECK: edge for.inc -> for.body3 probability is 124 / 128
+; CHECK: edge for.inc -> for.end probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge for.inc -> for.body3 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
for.end:
call void @g3()
%inc14 = add nsw i32 %i.019, 1
%exitcond20 = icmp eq i32 %inc14, %a
br i1 %exitcond20, label %for.end15, label %for.body
-; CHECK: edge for.end -> for.end15 probability is 4 / 128
-; CHECK: edge for.end -> for.body probability is 124 / 128
+; CHECK: edge for.end -> for.end15 probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge for.end -> for.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
for.end15:
call void @g4()
diff --git a/test/Analysis/BranchProbabilityInfo/noreturn.ll b/test/Analysis/BranchProbabilityInfo/noreturn.ll
index 8b9ae11f7d35..7098c2f7b8cc 100644
--- a/test/Analysis/BranchProbabilityInfo/noreturn.ll
+++ b/test/Analysis/BranchProbabilityInfo/noreturn.ll
@@ -8,8 +8,8 @@ define i32 @test1(i32 %a, i32 %b) {
entry:
%cond = icmp eq i32 %a, 42
br i1 %cond, label %exit, label %abort
-; CHECK: edge entry -> exit probability is 1048575 / 1048576
-; CHECK: edge entry -> abort probability is 1 / 1048576
+; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge entry -> abort probability is 0x00000800 / 0x80000000 = 0.00%
abort:
call void @abort() noreturn
@@ -26,11 +26,11 @@ entry:
i32 2, label %case_b
i32 3, label %case_c
i32 4, label %case_d]
-; CHECK: edge entry -> exit probability is 1048575 / 1048579
-; CHECK: edge entry -> case_a probability is 1 / 1048579
-; CHECK: edge entry -> case_b probability is 1 / 1048579
-; CHECK: edge entry -> case_c probability is 1 / 1048579
-; CHECK: edge entry -> case_d probability is 1 / 1048579
+; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge entry -> case_a probability is 0x00000200 / 0x80000000 = 0.00%
+; CHECK: edge entry -> case_b probability is 0x00000200 / 0x80000000 = 0.00%
+; CHECK: edge entry -> case_c probability is 0x00000200 / 0x80000000 = 0.00%
+; CHECK: edge entry -> case_d probability is 0x00000200 / 0x80000000 = 0.00%
case_a:
br label %case_b
@@ -55,14 +55,14 @@ define i32 @test3(i32 %a, i32 %b) {
entry:
%cond1 = icmp eq i32 %a, 42
br i1 %cond1, label %exit, label %dom
-; CHECK: edge entry -> exit probability is 1048575 / 1048576
-; CHECK: edge entry -> dom probability is 1 / 1048576
+; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge entry -> dom probability is 0x00000800 / 0x80000000 = 0.00%
dom:
%cond2 = icmp ult i32 %a, 42
br i1 %cond2, label %idom1, label %idom2
-; CHECK: edge dom -> idom1 probability is 1 / 2
-; CHECK: edge dom -> idom2 probability is 1 / 2
+; CHECK: edge dom -> idom1 probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge dom -> idom2 probability is 0x40000000 / 0x80000000 = 50.00%
idom1:
br label %abort
@@ -77,3 +77,48 @@ abort:
exit:
ret i32 %b
}
+
+@_ZTIi = external global i8*
+
+; CHECK-LABEL: throwSmallException
+; CHECK-NOT: invoke i32 @smallFunction
+define i32 @throwSmallException(i32 %idx, i32 %limit) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %cmp = icmp sge i32 %idx, %limit
+ br i1 %cmp, label %if.then, label %if.end
+; CHECK: edge entry -> if.then probability is 0x00000800 / 0x80000000 = 0.00%
+; CHECK: edge entry -> if.end probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+
+if.then: ; preds = %entry
+ %exception = call i8* @__cxa_allocate_exception(i64 1) #0
+ invoke i32 @smallFunction(i32 %idx)
+ to label %invoke.cont unwind label %lpad
+; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00%
+
+invoke.cont: ; preds = %if.then
+ call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1
+ unreachable
+
+lpad: ; preds = %if.then
+ %ll = landingpad { i8*, i32 }
+ cleanup
+ ret i32 %idx
+
+if.end: ; preds = %entry
+ ret i32 %idx
+}
+
+@a = global i32 4
+define i32 @smallFunction(i32 %a) {
+entry:
+ %r = load volatile i32, i32* @a
+ ret i32 %r
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { noreturn }
+
+declare i8* @__cxa_allocate_exception(i64)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_throw(i8*, i8*, i8*)
diff --git a/test/Analysis/BranchProbabilityInfo/pr18705.ll b/test/Analysis/BranchProbabilityInfo/pr18705.ll
index aff08a63d506..f5f9612fcdb6 100644
--- a/test/Analysis/BranchProbabilityInfo/pr18705.ll
+++ b/test/Analysis/BranchProbabilityInfo/pr18705.ll
@@ -4,8 +4,8 @@
; calcLoopBranchHeuristics should return early without setting the weights.
; calcFloatingPointHeuristics, which is run later, sets the weights.
;
-; CHECK: edge while.body -> if.then probability is 20 / 32 = 62.5%
-; CHECK: edge while.body -> if.else probability is 12 / 32 = 37.5%
+; CHECK: edge while.body -> if.then probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge while.body -> if.else probability is 0x30000000 / 0x80000000 = 37.50%
define void @foo1(i32 %n, i32* nocapture %b, i32* nocapture %c, i32* nocapture %d, float* nocapture readonly %f0, float* nocapture readonly %f1) {
entry:
diff --git a/test/Analysis/BranchProbabilityInfo/pr22718.ll b/test/Analysis/BranchProbabilityInfo/pr22718.ll
index 0de4d643c9c9..51bbd13e83c1 100644
--- a/test/Analysis/BranchProbabilityInfo/pr22718.ll
+++ b/test/Analysis/BranchProbabilityInfo/pr22718.ll
@@ -4,8 +4,8 @@
; reflected in the probability computation because the weight is larger than
; the branch weight cap (about 2 billion).
;
-; CHECK: edge for.body -> if.then probability is 216661881 / 2166666667 = 9.9
-; CHECK: edge for.body -> if.else probability is 1950004786 / 2166666667 = 90.0
+; CHECK: edge for.body -> if.then probability is 0x0cccba45 / 0x80000000 = 10.00%
+; CHECK: edge for.body -> if.else probability is 0x733345bb / 0x80000000 = 90.00% [HOT edge]
@y = common global i64 0, align 8
@x = common global i64 0, align 8
diff --git a/test/Analysis/CFLAliasAnalysis/arguments-globals.ll b/test/Analysis/CFLAliasAnalysis/arguments-globals.ll
index 18bbe8b6b41f..4844577d6fef 100644
--- a/test/Analysis/CFLAliasAnalysis/arguments-globals.ll
+++ b/test/Analysis/CFLAliasAnalysis/arguments-globals.ll
@@ -3,7 +3,7 @@
; (Everything should alias everything, because args can alias globals, so the
; aliasing sets should of args+alloca+global should be combined)
-; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
; CHECK: Function: test
diff --git a/test/Analysis/CFLAliasAnalysis/basic-interproc.ll b/test/Analysis/CFLAliasAnalysis/basic-interproc.ll
index c0a5404eab6e..d6515eb9ebe5 100644
--- a/test/Analysis/CFLAliasAnalysis/basic-interproc.ll
+++ b/test/Analysis/CFLAliasAnalysis/basic-interproc.ll
@@ -1,7 +1,7 @@
; This testcase ensures that CFL AA gives conservative answers on variables
; that involve arguments.
-; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
; CHECK: Function: test
; CHECK: 2 Total Alias Queries Performed
diff --git a/test/Analysis/CFLAliasAnalysis/branch-alias.ll b/test/Analysis/CFLAliasAnalysis/branch-alias.ll
index 8307462928e7..dbbf035f3440 100644
--- a/test/Analysis/CFLAliasAnalysis/branch-alias.ll
+++ b/test/Analysis/CFLAliasAnalysis/branch-alias.ll
@@ -13,7 +13,7 @@
; int* ShouldAliasA = *AliasA1;
; }
-; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
; CHECK: Function: ptr_test
define void @ptr_test() #0 {
diff --git a/test/Analysis/CFLAliasAnalysis/const-expr-gep.ll b/test/Analysis/CFLAliasAnalysis/const-expr-gep.ll
index c7ff407db2f6..11b60dd33b55 100644
--- a/test/Analysis/CFLAliasAnalysis/const-expr-gep.ll
+++ b/test/Analysis/CFLAliasAnalysis/const-expr-gep.ll
@@ -2,7 +2,7 @@
; resolvable by cfl-aa, but require analysis of getelementptr constant exprs.
; Derived from BasicAA/2003-12-11-ConstExprGEP.ll
-; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
%T = type { i32, [10 x i8] }
diff --git a/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll b/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll
index adacf048d678..3503e16898c0 100644
--- a/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll
+++ b/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -tbaa -cfl-aa -gvn < %s | FileCheck -check-prefix=CFLAA %s
-; RUN: opt -S -tbaa -gvn < %s | FileCheck %s
+; RUN: opt -S -disable-basicaa -tbaa -cfl-aa -gvn < %s | FileCheck -check-prefix=CFLAA %s
+; RUN: opt -S -disable-basicaa -tbaa -gvn < %s | FileCheck %s
; Adapted from the BasicAA full-store-partial-alias.ll test.
; CFL AA could notice that the store stores to the entire %u object,
diff --git a/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll b/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll
index c2fcf32ce06b..1edbb9fff56b 100644
--- a/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll
+++ b/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
; Derived from BasicAA/2010-09-15-GEP-SignedArithmetic.ll
target datalayout = "e-p:32:32:32"
diff --git a/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll b/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll
index e997374e92d9..be671bfc4ca2 100644
--- a/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll
+++ b/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll
@@ -8,7 +8,7 @@
; }
;
-; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
%T = type { i32, [10 x i8] }
diff --git a/test/Analysis/CFLAliasAnalysis/must-and-partial.ll b/test/Analysis/CFLAliasAnalysis/must-and-partial.ll
index 9deacf860ed8..5bcc3f991022 100644
--- a/test/Analysis/CFLAliasAnalysis/must-and-partial.ll
+++ b/test/Analysis/CFLAliasAnalysis/must-and-partial.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s
; When merging MustAlias and PartialAlias, merge to PartialAlias
; instead of MayAlias.
diff --git a/test/Analysis/CFLAliasAnalysis/opaque-call-alias.ll b/test/Analysis/CFLAliasAnalysis/opaque-call-alias.ll
new file mode 100644
index 000000000000..6121dca1345d
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/opaque-call-alias.ll
@@ -0,0 +1,20 @@
+; We previously had a case where we would put results from a no-args call in
+; its own stratified set. This would make cases like the one in @test say that
+; nothing (except %Escapes and %Arg) can alias
+
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+
+; CHECK: Function: test
+; CHECK: MayAlias: i8* %Arg, i8* %Escapes
+; CHECK: MayAlias: i8* %Arg, i8* %Retrieved
+; CHECK: MayAlias: i8* %Escapes, i8* %Retrieved
+define void @test(i8* %Arg) {
+ %Noalias = alloca i8
+ %Escapes = alloca i8
+ call void @set_thepointer(i8* %Escapes)
+ %Retrieved = call i8* @get_thepointer()
+ ret void
+}
+
+declare void @set_thepointer(i8* %P)
+declare i8* @get_thepointer()
diff --git a/test/Analysis/CFLAliasAnalysis/va.ll b/test/Analysis/CFLAliasAnalysis/va.ll
index 3094cb0967f6..a432cea8a1df 100644
--- a/test/Analysis/CFLAliasAnalysis/va.ll
+++ b/test/Analysis/CFLAliasAnalysis/va.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
; CHECK-LABEL: Function: test1
; CHECK: 0 no alias responses
diff --git a/test/Analysis/CallGraph/non-leaf-intrinsics.ll b/test/Analysis/CallGraph/non-leaf-intrinsics.ll
index 11bed6abce60..5caecf7e2244 100644
--- a/test/Analysis/CallGraph/non-leaf-intrinsics.ll
+++ b/test/Analysis/CallGraph/non-leaf-intrinsics.ll
@@ -1,7 +1,7 @@
; RUN: opt -S -print-callgraph -disable-output < %s 2>&1 | FileCheck %s
declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
define private void @f() {
ret void
@@ -10,7 +10,7 @@ define private void @f() {
define void @calls_statepoint(i8 addrspace(1)* %arg) gc "statepoint-example" {
entry:
%cast = bitcast i8 addrspace(1)* %arg to i64 addrspace(1)*
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
ret void
}
diff --git a/test/Analysis/CostModel/AArch64/select.ll b/test/Analysis/CostModel/AArch64/select.ll
index 216dc5ddc488..1a1248e661c5 100644
--- a/test/Analysis/CostModel/AArch64/select.ll
+++ b/test/Analysis/CostModel/AArch64/select.ll
@@ -17,16 +17,16 @@ define void @select() {
; CHECK: cost of 1 {{.*}} select
%v6 = select i1 undef, double undef, double undef
- ; Vector values - check for vectors that have a high cost because they end up
- ; scalarized.
- ; CHECK: cost of 320 {{.*}} select
+ ; CHECK: cost of 16 {{.*}} select
%v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
- ; CHECK: cost of 160 {{.*}} select
+ ; CHECK: cost of 8 {{.*}} select
%v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
- ; CHECK: cost of 320 {{.*}} select
+ ; CHECK: cost of 16 {{.*}} select
%v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
+ ; Vector values - check for vectors of i64s that have a high cost because
+ ; they end up scalarized.
; CHECK: cost of 80 {{.*}} select
%v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
; CHECK: cost of 160 {{.*}} select
@@ -34,5 +34,5 @@ define void @select() {
; CHECK: cost of 320 {{.*}} select
%v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
- ret void
+ ret void
}
diff --git a/test/Analysis/CostModel/AMDGPU/br.ll b/test/Analysis/CostModel/AMDGPU/br.ll
new file mode 100644
index 000000000000..0b9649397563
--- /dev/null
+++ b/test/Analysis/CostModel/AMDGPU/br.ll
@@ -0,0 +1,45 @@
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
+
+; CHECK: 'test_br_cost'
+; CHECK: estimated cost of 10 for instruction: br i1
+; CHECK: estimated cost of 10 for instruction: br label
+; CHECK: estimated cost of 10 for instruction: ret void
+define void @test_br_cost(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
+bb0:
+ br i1 undef, label %bb1, label %bb2
+
+bb1:
+ %vec = load i32, i32 addrspace(1)* %vaddr
+ %add = add i32 %vec, %b
+ store i32 %add, i32 addrspace(1)* %out
+ br label %bb2
+
+bb2:
+ ret void
+
+}
+
+; CHECK: 'test_switch_cost'
+; CHECK: Unknown cost for instruction: switch
+define void @test_switch_cost(i32 %a) #0 {
+entry:
+ switch i32 %a, label %default [
+ i32 0, label %case0
+ i32 1, label %case1
+ ]
+
+case0:
+ store volatile i32 undef, i32 addrspace(1)* undef
+ ret void
+
+case1:
+ store volatile i32 undef, i32 addrspace(1)* undef
+ ret void
+
+default:
+ store volatile i32 undef, i32 addrspace(1)* undef
+ ret void
+
+end:
+ ret void
+}
diff --git a/test/Analysis/CostModel/AMDGPU/extractelement.ll b/test/Analysis/CostModel/AMDGPU/extractelement.ll
new file mode 100644
index 000000000000..c328d7686466
--- /dev/null
+++ b/test/Analysis/CostModel/AMDGPU/extractelement.ll
@@ -0,0 +1,110 @@
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
+
+; CHECK: 'extractelement_v2i32'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x i32>
+define void @extractelement_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr) {
+ %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr
+ %elt = extractelement <2 x i32> %vec, i32 1
+ store i32 %elt, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: 'extractelement_v2f32'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x float>
+define void @extractelement_v2f32(float addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) {
+ %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
+ %elt = extractelement <2 x float> %vec, i32 1
+ store float %elt, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK: 'extractelement_v3i32'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <3 x i32>
+define void @extractelement_v3i32(i32 addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr) {
+ %vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr
+ %elt = extractelement <3 x i32> %vec, i32 1
+ store i32 %elt, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: 'extractelement_v4i32'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <4 x i32>
+define void @extractelement_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr) {
+ %vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr
+ %elt = extractelement <4 x i32> %vec, i32 1
+ store i32 %elt, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: 'extractelement_v8i32'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <8 x i32>
+define void @extractelement_v8i32(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr) {
+ %vec = load <8 x i32>, <8 x i32> addrspace(1)* %vaddr
+ %elt = extractelement <8 x i32> %vec, i32 1
+ store i32 %elt, i32 addrspace(1)* %out
+ ret void
+}
+
+; FIXME: Should be non-0
+; CHECK: 'extractelement_v8i32_dynindex'
+; CHECK: estimated cost of 2 for {{.*}} extractelement <8 x i32>
+define void @extractelement_v8i32_dynindex(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr, i32 %idx) {
+ %vec = load <8 x i32>, <8 x i32> addrspace(1)* %vaddr
+ %elt = extractelement <8 x i32> %vec, i32 %idx
+ store i32 %elt, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: 'extractelement_v2i64'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x i64>
+define void @extractelement_v2i64(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr) {
+ %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr
+ %elt = extractelement <2 x i64> %vec, i64 1
+ store i64 %elt, i64 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: 'extractelement_v3i64'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <3 x i64>
+define void @extractelement_v3i64(i64 addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr) {
+ %vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr
+ %elt = extractelement <3 x i64> %vec, i64 1
+ store i64 %elt, i64 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: 'extractelement_v4i64'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <4 x i64>
+define void @extractelement_v4i64(i64 addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr) {
+ %vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr
+ %elt = extractelement <4 x i64> %vec, i64 1
+ store i64 %elt, i64 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: 'extractelement_v8i64'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <8 x i64>
+define void @extractelement_v8i64(i64 addrspace(1)* %out, <8 x i64> addrspace(1)* %vaddr) {
+ %vec = load <8 x i64>, <8 x i64> addrspace(1)* %vaddr
+ %elt = extractelement <8 x i64> %vec, i64 1
+ store i64 %elt, i64 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: 'extractelement_v4i8'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <4 x i8>
+define void @extractelement_v4i8(i8 addrspace(1)* %out, <4 x i8> addrspace(1)* %vaddr) {
+ %vec = load <4 x i8>, <4 x i8> addrspace(1)* %vaddr
+ %elt = extractelement <4 x i8> %vec, i8 1
+ store i8 %elt, i8 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: 'extractelement_v2i16'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x i16>
+define void @extractelement_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) {
+ %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
+ %elt = extractelement <2 x i16> %vec, i16 1
+ store i16 %elt, i16 addrspace(1)* %out
+ ret void
+}
diff --git a/test/Analysis/CostModel/AMDGPU/lit.local.cfg b/test/Analysis/CostModel/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..2a665f06be72
--- /dev/null
+++ b/test/Analysis/CostModel/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
index 18d6e841433f..21e292822099 100644
--- a/test/Analysis/CostModel/ARM/cast.ll
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -5,543 +5,544 @@ target triple = "thumbv7-apple-ios6.0.0"
define i32 @casts() {
; -- scalars --
- ; CHECK: cost of 1 {{.*}} sext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8
%r0 = sext i1 undef to i8
- ; CHECK: cost of 1 {{.*}} zext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r1 = zext i1 undef to i8
%r1 = zext i1 undef to i8
- ; CHECK: cost of 1 {{.*}} sext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r2 = sext i1 undef to i16
%r2 = sext i1 undef to i16
- ; CHECK: cost of 1 {{.*}} zext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r3 = zext i1 undef to i16
%r3 = zext i1 undef to i16
- ; CHECK: cost of 1 {{.*}} sext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r4 = sext i1 undef to i32
%r4 = sext i1 undef to i32
- ; CHECK: cost of 1 {{.*}} zext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r5 = zext i1 undef to i32
%r5 = zext i1 undef to i32
- ; CHECK: cost of 1 {{.*}} sext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r6 = sext i1 undef to i64
%r6 = sext i1 undef to i64
- ; CHECK: cost of 1 {{.*}} zext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r7 = zext i1 undef to i64
%r7 = zext i1 undef to i64
- ; CHECK: cost of 0 {{.*}} trunc
+ ; CHECK: Found an estimated cost of 0 for instruction: %r8 = trunc i8 undef to i1
%r8 = trunc i8 undef to i1
- ; CHECK: cost of 1 {{.*}} sext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r9 = sext i8 undef to i16
%r9 = sext i8 undef to i16
- ; CHECK: cost of 1 {{.*}} zext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r10 = zext i8 undef to i16
%r10 = zext i8 undef to i16
- ; CHECK: cost of 1 {{.*}} sext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r11 = sext i8 undef to i32
%r11 = sext i8 undef to i32
- ; CHECK: cost of 1 {{.*}} zext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r12 = zext i8 undef to i32
%r12 = zext i8 undef to i32
- ; CHECK: cost of 1 {{.*}} sext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r13 = sext i8 undef to i64
%r13 = sext i8 undef to i64
- ; CHECK: cost of 1 {{.*}} zext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r14 = zext i8 undef to i64
%r14 = zext i8 undef to i64
- ; CHECK: cost of 0 {{.*}} trunc
+ ; CHECK: Found an estimated cost of 0 for instruction: %r15 = trunc i16 undef to i1
%r15 = trunc i16 undef to i1
- ; CHECK: cost of 0 {{.*}} trunc
+ ; CHECK: Found an estimated cost of 0 for instruction: %r16 = trunc i16 undef to i8
%r16 = trunc i16 undef to i8
- ; CHECK: cost of 1 {{.*}} sext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r17 = sext i16 undef to i32
%r17 = sext i16 undef to i32
- ; CHECK: cost of 1 {{.*}} zext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r18 = zext i16 undef to i32
%r18 = zext i16 undef to i32
- ; CHECK: cost of 2 {{.*}} sext
+ ; CHECK: Found an estimated cost of 2 for instruction: %r19 = sext i16 undef to i64
%r19 = sext i16 undef to i64
- ; CHECK: cost of 1 {{.*}} zext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r20 = zext i16 undef to i64
%r20 = zext i16 undef to i64
- ; CHECK: cost of 0 {{.*}} trunc
+ ; CHECK: Found an estimated cost of 0 for instruction: %r21 = trunc i32 undef to i1
%r21 = trunc i32 undef to i1
- ; CHECK: cost of 0 {{.*}} trunc
+ ; CHECK: Found an estimated cost of 0 for instruction: %r22 = trunc i32 undef to i8
%r22 = trunc i32 undef to i8
- ; CHECK: cost of 0 {{.*}} trunc
+ ; CHECK: Found an estimated cost of 0 for instruction: %r23 = trunc i32 undef to i16
%r23 = trunc i32 undef to i16
- ; CHECK: cost of 1 {{.*}} sext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r24 = sext i32 undef to i64
%r24 = sext i32 undef to i64
- ; CHECK: cost of 1 {{.*}} zext
+ ; CHECK: Found an estimated cost of 1 for instruction: %r25 = zext i32 undef to i64
%r25 = zext i32 undef to i64
- ; CHECK: cost of 0 {{.*}} trunc
+ ; CHECK: Found an estimated cost of 0 for instruction: %r26 = trunc i64 undef to i1
%r26 = trunc i64 undef to i1
- ; CHECK: cost of 0 {{.*}} trunc
+ ; CHECK: Found an estimated cost of 0 for instruction: %r27 = trunc i64 undef to i8
%r27 = trunc i64 undef to i8
- ; CHECK: cost of 0 {{.*}} trunc
+ ; CHECK: Found an estimated cost of 0 for instruction: %r28 = trunc i64 undef to i16
%r28 = trunc i64 undef to i16
- ; CHECK: cost of 0 {{.*}} trunc
+ ; CHECK: Found an estimated cost of 0 for instruction: %r29 = trunc i64 undef to i32
%r29 = trunc i64 undef to i32
; -- floating point conversions --
; Moves between scalar and NEON registers.
- ; CHECK: cost of 2 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 2 for instruction: %r30 = fptoui float undef to i1
%r30 = fptoui float undef to i1
- ; CHECK: cost of 2 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 2 for instruction: %r31 = fptosi float undef to i1
%r31 = fptosi float undef to i1
- ; CHECK: cost of 2 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 2 for instruction: %r32 = fptoui float undef to i8
%r32 = fptoui float undef to i8
- ; CHECK: cost of 2 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 2 for instruction: %r33 = fptosi float undef to i8
%r33 = fptosi float undef to i8
- ; CHECK: cost of 2 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 2 for instruction: %r34 = fptoui float undef to i16
%r34 = fptoui float undef to i16
- ; CHECK: cost of 2 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 2 for instruction: %r35 = fptosi float undef to i16
%r35 = fptosi float undef to i16
- ; CHECK: cost of 2 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 2 for instruction: %r36 = fptoui float undef to i32
%r36 = fptoui float undef to i32
- ; CHECK: cost of 2 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 2 for instruction: %r37 = fptosi float undef to i32
%r37 = fptosi float undef to i32
- ; CHECK: cost of 10 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 10 for instruction: %r38 = fptoui float undef to i64
%r38 = fptoui float undef to i64
- ; CHECK: cost of 10 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 10 for instruction: %r39 = fptosi float undef to i64
%r39 = fptosi float undef to i64
- ; CHECK: cost of 2 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 2 for instruction: %r40 = fptoui double undef to i1
%r40 = fptoui double undef to i1
- ; CHECK: cost of 2 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 2 for instruction: %r41 = fptosi double undef to i1
%r41 = fptosi double undef to i1
- ; CHECK: cost of 2 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 2 for instruction: %r42 = fptoui double undef to i8
%r42 = fptoui double undef to i8
- ; CHECK: cost of 2 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 2 for instruction: %r43 = fptosi double undef to i8
%r43 = fptosi double undef to i8
- ; CHECK: cost of 2 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 2 for instruction: %r44 = fptoui double undef to i16
%r44 = fptoui double undef to i16
- ; CHECK: cost of 2 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 2 for instruction: %r45 = fptosi double undef to i16
%r45 = fptosi double undef to i16
- ; CHECK: cost of 2 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 2 for instruction: %r46 = fptoui double undef to i32
%r46 = fptoui double undef to i32
- ; CHECK: cost of 2 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 2 for instruction: %r47 = fptosi double undef to i32
%r47 = fptosi double undef to i32
; Function call
- ; CHECK: cost of 10 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 10 for instruction: %r48 = fptoui double undef to i64
%r48 = fptoui double undef to i64
- ; CHECK: cost of 10 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 10 for instruction: %r49 = fptosi double undef to i64
%r49 = fptosi double undef to i64
- ; CHECK: cost of 2 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r50 = sitofp i1 undef to float
%r50 = sitofp i1 undef to float
- ; CHECK: cost of 2 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r51 = uitofp i1 undef to float
%r51 = uitofp i1 undef to float
- ; CHECK: cost of 2 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r52 = sitofp i1 undef to double
%r52 = sitofp i1 undef to double
- ; CHECK: cost of 2 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r53 = uitofp i1 undef to double
%r53 = uitofp i1 undef to double
- ; CHECK: cost of 2 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r54 = sitofp i8 undef to float
%r54 = sitofp i8 undef to float
- ; CHECK: cost of 2 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r55 = uitofp i8 undef to float
%r55 = uitofp i8 undef to float
- ; CHECK: cost of 2 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r56 = sitofp i8 undef to double
%r56 = sitofp i8 undef to double
- ; CHECK: cost of 2 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r57 = uitofp i8 undef to double
%r57 = uitofp i8 undef to double
- ; CHECK: cost of 2 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r58 = sitofp i16 undef to float
%r58 = sitofp i16 undef to float
- ; CHECK: cost of 2 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r59 = uitofp i16 undef to float
%r59 = uitofp i16 undef to float
- ; CHECK: cost of 2 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r60 = sitofp i16 undef to double
%r60 = sitofp i16 undef to double
- ; CHECK: cost of 2 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r61 = uitofp i16 undef to double
%r61 = uitofp i16 undef to double
- ; CHECK: cost of 2 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r62 = sitofp i32 undef to float
%r62 = sitofp i32 undef to float
- ; CHECK: cost of 2 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r63 = uitofp i32 undef to float
%r63 = uitofp i32 undef to float
- ; CHECK: cost of 2 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r64 = sitofp i32 undef to double
%r64 = sitofp i32 undef to double
- ; CHECK: cost of 2 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r65 = uitofp i32 undef to double
%r65 = uitofp i32 undef to double
; Function call
- ; CHECK: cost of 10 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 10 for instruction: %r66 = sitofp i64 undef to float
%r66 = sitofp i64 undef to float
- ; CHECK: cost of 10 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 10 for instruction: %r67 = uitofp i64 undef to float
%r67 = uitofp i64 undef to float
- ; CHECK: cost of 10 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 10 for instruction: %r68 = sitofp i64 undef to double
%r68 = sitofp i64 undef to double
- ; CHECK: cost of 10 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 10 for instruction: %r69 = uitofp i64 undef to double
%r69 = uitofp i64 undef to double
- ; CHECK: cost of 3 {{.*}} sext
+ ; CHECK: Found an estimated cost of 3 for instruction: %r70 = sext <8 x i8> undef to <8 x i32>
%r70 = sext <8 x i8> undef to <8 x i32>
- ; CHECK: cost of 6 {{.*}} sext
+ ; CHECK: Found an estimated cost of 6 for instruction: %r71 = sext <16 x i8> undef to <16 x i32>
%r71 = sext <16 x i8> undef to <16 x i32>
- ; CHECK: cost of 3 {{.*}} zext
+ ; CHECK: Found an estimated cost of 3 for instruction: %r72 = zext <8 x i8> undef to <8 x i32>
%r72 = zext <8 x i8> undef to <8 x i32>
- ; CHECK: cost of 6 {{.*}} zext
+ ; CHECK: Found an estimated cost of 6 for instruction: %r73 = zext <16 x i8> undef to <16 x i32>
%r73 = zext <16 x i8> undef to <16 x i32>
- ; CHECK: cost of 7 {{.*}} sext
+ ; CHECK: Found an estimated cost of 7 for instruction: %rext_0 = sext <8 x i8> undef to <8 x i64>
%rext_0 = sext <8 x i8> undef to <8 x i64>
- ; CHECK: cost of 7 {{.*}} zext
+ ; CHECK: Found an estimated cost of 7 for instruction: %rext_1 = zext <8 x i8> undef to <8 x i64>
%rext_1 = zext <8 x i8> undef to <8 x i64>
- ; CHECK: cost of 6 {{.*}} sext
+ ; CHECK: Found an estimated cost of 6 for instruction: %rext_2 = sext <8 x i16> undef to <8 x i64>
%rext_2 = sext <8 x i16> undef to <8 x i64>
- ; CHECK: cost of 6 {{.*}} zext
+ ; CHECK: Found an estimated cost of 6 for instruction: %rext_3 = zext <8 x i16> undef to <8 x i64>
%rext_3 = zext <8 x i16> undef to <8 x i64>
- ; CHECK: cost of 3 {{.*}} sext
+ ; CHECK: Found an estimated cost of 3 for instruction: %rext_4 = sext <4 x i16> undef to <4 x i64>
%rext_4 = sext <4 x i16> undef to <4 x i64>
- ; CHECK: cost of 3 {{.*}} zext
+ ; CHECK: Found an estimated cost of 3 for instruction: %rext_5 = zext <4 x i16> undef to <4 x i64>
%rext_5 = zext <4 x i16> undef to <4 x i64>
; Vector cast cost of instructions lowering the cast to the stack.
- ; CHECK: cost of 3 {{.*}} trunc
+ ; CHECK: Found an estimated cost of 3 for instruction: %r74 = trunc <8 x i32> undef to <8 x i8>
%r74 = trunc <8 x i32> undef to <8 x i8>
- ; CHECK: cost of 6 {{.*}} trunc
+ ; CHECK: Found an estimated cost of 6 for instruction: %r75 = trunc <16 x i32> undef to <16 x i8>
%r75 = trunc <16 x i32> undef to <16 x i8>
; Floating point truncation costs.
- ; CHECK: cost of 1 {{.*}} fptrunc double
+ ; CHECK: Found an estimated cost of 1 for instruction: %r80 = fptrunc double undef to float
%r80 = fptrunc double undef to float
- ; CHECK: cost of 2 {{.*}} fptrunc <2 x double
+ ; CHECK: Found an estimated cost of 2 for instruction: %r81 = fptrunc <2 x double> undef to <2 x float>
%r81 = fptrunc <2 x double> undef to <2 x float>
- ; CHECK: cost of 4 {{.*}} fptrunc <4 x double
+ ; CHECK: Found an estimated cost of 4 for instruction: %r82 = fptrunc <4 x double> undef to <4 x float>
%r82 = fptrunc <4 x double> undef to <4 x float>
- ; CHECK: cost of 8 {{.*}} fptrunc <8 x double
+ ; CHECK: Found an estimated cost of 8 for instruction: %r83 = fptrunc <8 x double> undef to <8 x float>
%r83 = fptrunc <8 x double> undef to <8 x float>
- ; CHECK: cost of 16 {{.*}} fptrunc <16 x double
+ ; CHECK: Found an estimated cost of 16 for instruction: %r84 = fptrunc <16 x double> undef to <16 x float>
%r84 = fptrunc <16 x double> undef to <16 x float>
; Floating point extension costs.
- ; CHECK: cost of 1 {{.*}} fpext float
+ ; CHECK: Found an estimated cost of 1 for instruction: %r85 = fpext float undef to double
%r85 = fpext float undef to double
- ; CHECK: cost of 2 {{.*}} fpext <2 x float
+ ; CHECK: Found an estimated cost of 2 for instruction: %r86 = fpext <2 x float> undef to <2 x double>
%r86 = fpext <2 x float> undef to <2 x double>
- ; CHECK: cost of 4 {{.*}} fpext <4 x float
+ ; CHECK: Found an estimated cost of 4 for instruction: %r87 = fpext <4 x float> undef to <4 x double>
%r87 = fpext <4 x float> undef to <4 x double>
- ; CHECK: cost of 8 {{.*}} fpext <8 x float
+ ; CHECK: Found an estimated cost of 8 for instruction: %r88 = fpext <8 x float> undef to <8 x double>
%r88 = fpext <8 x float> undef to <8 x double>
- ; CHECK: cost of 16 {{.*}} fpext <16 x float
+ ; CHECK: Found an estimated cost of 16 for instruction: %r89 = fpext <16 x float> undef to <16 x double>
%r89 = fpext <16 x float> undef to <16 x double>
;; Floating point to integer vector casts.
- ; CHECK: cost of 1 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 1 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1>
%r90 = fptoui <2 x float> undef to <2 x i1>
- ; CHECK: cost of 1 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 1 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1>
%r91 = fptosi <2 x float> undef to <2 x i1>
- ; CHECK: cost of 1 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 1 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8>
%r92 = fptoui <2 x float> undef to <2 x i8>
- ; CHECK: cost of 1 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 1 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8>
%r93 = fptosi <2 x float> undef to <2 x i8>
- ; CHECK: cost of 1 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 1 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16>
%r94 = fptoui <2 x float> undef to <2 x i16>
- ; CHECK: cost of 1 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 1 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16>
%r95 = fptosi <2 x float> undef to <2 x i16>
- ; CHECK: cost of 1 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 1 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32>
%r96 = fptoui <2 x float> undef to <2 x i32>
- ; CHECK: cost of 1 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 1 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32>
%r97 = fptosi <2 x float> undef to <2 x i32>
- ; CHECK: cost of 32 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 32 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64>
%r98 = fptoui <2 x float> undef to <2 x i64>
- ; CHECK: cost of 32 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 32 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64>
%r99 = fptosi <2 x float> undef to <2 x i64>
- ; CHECK: cost of 16 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 16 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1>
%r100 = fptoui <2 x double> undef to <2 x i1>
- ; CHECK: cost of 16 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 16 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1>
%r101 = fptosi <2 x double> undef to <2 x i1>
- ; CHECK: cost of 16 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 16 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8>
%r102 = fptoui <2 x double> undef to <2 x i8>
- ; CHECK: cost of 16 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 16 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8>
%r103 = fptosi <2 x double> undef to <2 x i8>
- ; CHECK: cost of 16 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 16 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16>
%r104 = fptoui <2 x double> undef to <2 x i16>
- ; CHECK: cost of 16 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 16 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16>
%r105 = fptosi <2 x double> undef to <2 x i16>
- ; CHECK: cost of 2 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 2 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32>
%r106 = fptoui <2 x double> undef to <2 x i32>
- ; CHECK: cost of 2 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 2 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32>
%r107 = fptosi <2 x double> undef to <2 x i32>
- ; CHECK: cost of 32 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 32 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64>
%r108 = fptoui <2 x double> undef to <2 x i64>
- ; CHECK: cost of 32 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 32 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64>
%r109 = fptosi <2 x double> undef to <2 x i64>
- ; CHECK: cost of 32 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 32 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1>
%r110 = fptoui <4 x float> undef to <4 x i1>
- ; CHECK: cost of 32 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 32 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1>
%r111 = fptosi <4 x float> undef to <4 x i1>
- ; CHECK: cost of 3 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 3 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8>
%r112 = fptoui <4 x float> undef to <4 x i8>
- ; CHECK: cost of 3 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 3 for instruction: %r113 = fptosi <4 x float> undef to <4 x i8>
%r113 = fptosi <4 x float> undef to <4 x i8>
- ; CHECK: cost of 2 {{.*}} fptoui
+
+ ; CHECK: Found an estimated cost of 2 for instruction: %r114 = fptoui <4 x float> undef to <4 x i16>
%r114 = fptoui <4 x float> undef to <4 x i16>
- ; CHECK: cost of 2 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16>
%r115 = fptosi <4 x float> undef to <4 x i16>
- ; CHECK: cost of 1 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 1 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32>
%r116 = fptoui <4 x float> undef to <4 x i32>
- ; CHECK: cost of 1 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 1 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32>
%r117 = fptosi <4 x float> undef to <4 x i32>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 64 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64>
%r118 = fptoui <4 x float> undef to <4 x i64>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 64 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64>
%r119 = fptosi <4 x float> undef to <4 x i64>
- ; CHECK: cost of 32 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 32 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1>
%r120 = fptoui <4 x double> undef to <4 x i1>
- ; CHECK: cost of 32 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 32 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1>
%r121 = fptosi <4 x double> undef to <4 x i1>
- ; CHECK: cost of 32 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 32 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8>
%r122 = fptoui <4 x double> undef to <4 x i8>
- ; CHECK: cost of 32 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 32 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8>
%r123 = fptosi <4 x double> undef to <4 x i8>
- ; CHECK: cost of 32 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 32 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16>
%r124 = fptoui <4 x double> undef to <4 x i16>
- ; CHECK: cost of 32 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 32 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16>
%r125 = fptosi <4 x double> undef to <4 x i16>
- ; CHECK: cost of 32 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 32 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32>
%r126 = fptoui <4 x double> undef to <4 x i32>
- ; CHECK: cost of 32 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 32 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32>
%r127 = fptosi <4 x double> undef to <4 x i32>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 64 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64>
%r128 = fptoui <4 x double> undef to <4 x i64>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 64 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64>
%r129 = fptosi <4 x double> undef to <4 x i64>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 64 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1>
%r130 = fptoui <8 x float> undef to <8 x i1>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 64 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1>
%r131 = fptosi <8 x float> undef to <8 x i1>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 64 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8>
%r132 = fptoui <8 x float> undef to <8 x i8>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 64 for instruction: %r133 = fptosi <8 x float> undef to <8 x i8>
%r133 = fptosi <8 x float> undef to <8 x i8>
- ; CHECK: cost of 4 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 4 for instruction: %r134 = fptoui <8 x float> undef to <8 x i16>
%r134 = fptoui <8 x float> undef to <8 x i16>
- ; CHECK: cost of 4 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 4 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16>
%r135 = fptosi <8 x float> undef to <8 x i16>
- ; CHECK: cost of 2 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 2 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32>
%r136 = fptoui <8 x float> undef to <8 x i32>
- ; CHECK: cost of 2 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 2 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32>
%r137 = fptosi <8 x float> undef to <8 x i32>
- ; CHECK: cost of 128 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 128 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64>
%r138 = fptoui <8 x float> undef to <8 x i64>
- ; CHECK: cost of 128 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 128 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64>
%r139 = fptosi <8 x float> undef to <8 x i64>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 64 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1>
%r140 = fptoui <8 x double> undef to <8 x i1>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 64 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1>
%r141 = fptosi <8 x double> undef to <8 x i1>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 64 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8>
%r142 = fptoui <8 x double> undef to <8 x i8>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 64 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8>
%r143 = fptosi <8 x double> undef to <8 x i8>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 64 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16>
%r144 = fptoui <8 x double> undef to <8 x i16>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 64 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16>
%r145 = fptosi <8 x double> undef to <8 x i16>
- ; CHECK: cost of 64 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 64 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32>
%r146 = fptoui <8 x double> undef to <8 x i32>
- ; CHECK: cost of 64 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 64 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32>
%r147 = fptosi <8 x double> undef to <8 x i32>
- ; CHECK: cost of 128 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 128 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64>
%r148 = fptoui <8 x double> undef to <8 x i64>
- ; CHECK: cost of 128 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 128 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64>
%r149 = fptosi <8 x double> undef to <8 x i64>
- ; CHECK: cost of 128 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 128 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1>
%r150 = fptoui <16 x float> undef to <16 x i1>
- ; CHECK: cost of 128 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 128 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1>
%r151 = fptosi <16 x float> undef to <16 x i1>
- ; CHECK: cost of 128 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 128 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8>
%r152 = fptoui <16 x float> undef to <16 x i8>
- ; CHECK: cost of 128 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 128 for instruction: %r153 = fptosi <16 x float> undef to <16 x i8>
%r153 = fptosi <16 x float> undef to <16 x i8>
- ; CHECK: cost of 8 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 8 for instruction: %r154 = fptoui <16 x float> undef to <16 x i16>
%r154 = fptoui <16 x float> undef to <16 x i16>
- ; CHECK: cost of 8 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 8 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16>
%r155 = fptosi <16 x float> undef to <16 x i16>
- ; CHECK: cost of 4 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 4 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32>
%r156 = fptoui <16 x float> undef to <16 x i32>
- ; CHECK: cost of 4 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 4 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32>
%r157 = fptosi <16 x float> undef to <16 x i32>
- ; CHECK: cost of 256 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 256 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64>
%r158 = fptoui <16 x float> undef to <16 x i64>
- ; CHECK: cost of 256 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 256 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64>
%r159 = fptosi <16 x float> undef to <16 x i64>
- ; CHECK: cost of 128 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 128 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1>
%r160 = fptoui <16 x double> undef to <16 x i1>
- ; CHECK: cost of 128 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 128 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1>
%r161 = fptosi <16 x double> undef to <16 x i1>
- ; CHECK: cost of 128 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 128 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8>
%r162 = fptoui <16 x double> undef to <16 x i8>
- ; CHECK: cost of 128 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 128 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8>
%r163 = fptosi <16 x double> undef to <16 x i8>
- ; CHECK: cost of 128 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 128 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16>
%r164 = fptoui <16 x double> undef to <16 x i16>
- ; CHECK: cost of 128 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 128 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16>
%r165 = fptosi <16 x double> undef to <16 x i16>
- ; CHECK: cost of 128 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 128 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32>
%r166 = fptoui <16 x double> undef to <16 x i32>
- ; CHECK: cost of 128 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 128 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32>
%r167 = fptosi <16 x double> undef to <16 x i32>
- ; CHECK: cost of 256 {{.*}} fptoui
+ ; CHECK: Found an estimated cost of 256 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64>
%r168 = fptoui <16 x double> undef to <16 x i64>
- ; CHECK: cost of 256 {{.*}} fptosi
+ ; CHECK: Found an estimated cost of 256 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64>
%r169 = fptosi <16 x double> undef to <16 x i64>
- ; CHECK: cost of 8 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 12 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float>
%r170 = uitofp <2 x i1> undef to <2 x float>
- ; CHECK: cost of 8 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 12 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float>
%r171 = sitofp <2 x i1> undef to <2 x float>
- ; CHECK: cost of 3 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 3 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float>
%r172 = uitofp <2 x i8> undef to <2 x float>
- ; CHECK: cost of 3 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 3 for instruction: %r173 = sitofp <2 x i8> undef to <2 x float>
%r173 = sitofp <2 x i8> undef to <2 x float>
- ; CHECK: cost of 2 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r174 = uitofp <2 x i16> undef to <2 x float>
%r174 = uitofp <2 x i16> undef to <2 x float>
- ; CHECK: cost of 2 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float>
%r175 = sitofp <2 x i16> undef to <2 x float>
- ; CHECK: cost of 1 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float>
%r176 = uitofp <2 x i32> undef to <2 x float>
- ; CHECK: cost of 1 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float>
%r177 = sitofp <2 x i32> undef to <2 x float>
- ; CHECK: cost of 24 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 28 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float>
%r178 = uitofp <2 x i64> undef to <2 x float>
- ; CHECK: cost of 24 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 28 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float>
%r179 = sitofp <2 x i64> undef to <2 x float>
- ; CHECK: cost of 8 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 8 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double>
%r180 = uitofp <2 x i1> undef to <2 x double>
- ; CHECK: cost of 8 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 8 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double>
%r181 = sitofp <2 x i1> undef to <2 x double>
- ; CHECK: cost of 4 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double>
%r182 = uitofp <2 x i8> undef to <2 x double>
- ; CHECK: cost of 4 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 4 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double>
%r183 = sitofp <2 x i8> undef to <2 x double>
- ; CHECK: cost of 3 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 3 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double>
%r184 = uitofp <2 x i16> undef to <2 x double>
- ; CHECK: cost of 3 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 3 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double>
%r185 = sitofp <2 x i16> undef to <2 x double>
- ; CHECK: cost of 2 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double>
%r186 = uitofp <2 x i32> undef to <2 x double>
- ; CHECK: cost of 2 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double>
%r187 = sitofp <2 x i32> undef to <2 x double>
- ; CHECK: cost of 24 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 24 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double>
%r188 = uitofp <2 x i64> undef to <2 x double>
- ; CHECK: cost of 24 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 24 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double>
%r189 = sitofp <2 x i64> undef to <2 x double>
- ; CHECK: cost of 3 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 3 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float>
%r190 = uitofp <4 x i1> undef to <4 x float>
- ; CHECK: cost of 3 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 3 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float>
%r191 = sitofp <4 x i1> undef to <4 x float>
- ; CHECK: cost of 3 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 3 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float>
%r192 = uitofp <4 x i8> undef to <4 x float>
- ; CHECK: cost of 3 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 3 for instruction: %r193 = sitofp <4 x i8> undef to <4 x float>
%r193 = sitofp <4 x i8> undef to <4 x float>
- ; CHECK: cost of 2 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r194 = uitofp <4 x i16> undef to <4 x float>
%r194 = uitofp <4 x i16> undef to <4 x float>
- ; CHECK: cost of 2 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float>
%r195 = sitofp <4 x i16> undef to <4 x float>
- ; CHECK: cost of 1 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float>
%r196 = uitofp <4 x i32> undef to <4 x float>
- ; CHECK: cost of 1 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float>
%r197 = sitofp <4 x i32> undef to <4 x float>
- ; CHECK: cost of 48 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 56 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float>
%r198 = uitofp <4 x i64> undef to <4 x float>
- ; CHECK: cost of 48 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 56 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float>
%r199 = sitofp <4 x i64> undef to <4 x float>
- ; CHECK: cost of 16 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 16 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double>
%r200 = uitofp <4 x i1> undef to <4 x double>
- ; CHECK: cost of 16 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 16 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double>
%r201 = sitofp <4 x i1> undef to <4 x double>
- ; CHECK: cost of 16 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 16 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double>
%r202 = uitofp <4 x i8> undef to <4 x double>
- ; CHECK: cost of 16 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 16 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double>
%r203 = sitofp <4 x i8> undef to <4 x double>
- ; CHECK: cost of 16 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 16 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double>
%r204 = uitofp <4 x i16> undef to <4 x double>
- ; CHECK: cost of 16 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 16 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double>
%r205 = sitofp <4 x i16> undef to <4 x double>
- ; CHECK: cost of 16 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 16 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double>
%r206 = uitofp <4 x i32> undef to <4 x double>
- ; CHECK: cost of 16 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 16 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double>
%r207 = sitofp <4 x i32> undef to <4 x double>
- ; CHECK: cost of 48 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 48 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double>
%r208 = uitofp <4 x i64> undef to <4 x double>
- ; CHECK: cost of 48 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 48 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double>
%r209 = sitofp <4 x i64> undef to <4 x double>
- ; CHECK: cost of 32 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 48 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float>
%r210 = uitofp <8 x i1> undef to <8 x float>
- ; CHECK: cost of 32 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 48 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float>
%r211 = sitofp <8 x i1> undef to <8 x float>
- ; CHECK: cost of 32 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 48 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float>
%r212 = uitofp <8 x i8> undef to <8 x float>
- ; CHECK: cost of 32 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 48 for instruction: %r213 = sitofp <8 x i8> undef to <8 x float>
%r213 = sitofp <8 x i8> undef to <8 x float>
- ; CHECK: cost of 4 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 4 for instruction: %r214 = uitofp <8 x i16> undef to <8 x float>
%r214 = uitofp <8 x i16> undef to <8 x float>
- ; CHECK: cost of 4 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float>
%r215 = sitofp <8 x i16> undef to <8 x float>
- ; CHECK: cost of 2 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float>
%r216 = uitofp <8 x i32> undef to <8 x float>
- ; CHECK: cost of 2 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float>
%r217 = sitofp <8 x i32> undef to <8 x float>
- ; CHECK: cost of 96 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 112 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float>
%r218 = uitofp <8 x i64> undef to <8 x float>
- ; CHECK: cost of 96 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 112 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float>
%r219 = sitofp <8 x i64> undef to <8 x float>
- ; CHECK: cost of 32 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 32 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double>
%r220 = uitofp <8 x i1> undef to <8 x double>
- ; CHECK: cost of 32 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 32 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double>
%r221 = sitofp <8 x i1> undef to <8 x double>
- ; CHECK: cost of 32 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 32 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double>
%r222 = uitofp <8 x i8> undef to <8 x double>
- ; CHECK: cost of 32 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 32 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double>
%r223 = sitofp <8 x i8> undef to <8 x double>
- ; CHECK: cost of 32 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 32 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double>
%r224 = uitofp <8 x i16> undef to <8 x double>
- ; CHECK: cost of 32 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 32 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double>
%r225 = sitofp <8 x i16> undef to <8 x double>
- ; CHECK: cost of 32 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 32 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double>
%r226 = uitofp <8 x i16> undef to <8 x double>
- ; CHECK: cost of 32 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 32 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double>
%r227 = sitofp <8 x i16> undef to <8 x double>
- ; CHECK: cost of 96 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 96 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double>
%r228 = uitofp <8 x i64> undef to <8 x double>
- ; CHECK: cost of 96 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 96 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double>
%r229 = sitofp <8 x i64> undef to <8 x double>
- ; CHECK: cost of 64 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 96 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float>
%r230 = uitofp <16 x i1> undef to <16 x float>
- ; CHECK: cost of 64 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 96 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float>
%r231 = sitofp <16 x i1> undef to <16 x float>
- ; CHECK: cost of 64 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 96 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float>
%r232 = uitofp <16 x i8> undef to <16 x float>
- ; CHECK: cost of 64 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 96 for instruction: %r233 = sitofp <16 x i8> undef to <16 x float>
%r233 = sitofp <16 x i8> undef to <16 x float>
- ; CHECK: cost of 8 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 8 for instruction: %r234 = uitofp <16 x i16> undef to <16 x float>
%r234 = uitofp <16 x i16> undef to <16 x float>
- ; CHECK: cost of 8 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float>
%r235 = sitofp <16 x i16> undef to <16 x float>
- ; CHECK: cost of 4 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float>
%r236 = uitofp <16 x i32> undef to <16 x float>
- ; CHECK: cost of 4 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float>
%r237 = sitofp <16 x i32> undef to <16 x float>
- ; CHECK: cost of 192 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 224 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float>
%r238 = uitofp <16 x i64> undef to <16 x float>
- ; CHECK: cost of 192 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 224 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float>
%r239 = sitofp <16 x i64> undef to <16 x float>
- ; CHECK: cost of 64 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 64 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double>
%r240 = uitofp <16 x i1> undef to <16 x double>
- ; CHECK: cost of 64 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 64 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double>
%r241 = sitofp <16 x i1> undef to <16 x double>
- ; CHECK: cost of 64 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 64 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double>
%r242 = uitofp <16 x i8> undef to <16 x double>
- ; CHECK: cost of 64 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 64 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double>
%r243 = sitofp <16 x i8> undef to <16 x double>
- ; CHECK: cost of 64 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 64 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double>
%r244 = uitofp <16 x i16> undef to <16 x double>
- ; CHECK: cost of 64 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 64 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double>
%r245 = sitofp <16 x i16> undef to <16 x double>
- ; CHECK: cost of 64 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 64 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double>
%r246 = uitofp <16 x i16> undef to <16 x double>
- ; CHECK: cost of 64 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 64 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double>
%r247 = sitofp <16 x i16> undef to <16 x double>
- ; CHECK: cost of 192 {{.*}} uitofp
+ ; CHECK: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double>
%r248 = uitofp <16 x i64> undef to <16 x double>
- ; CHECK: cost of 192 {{.*}} sitofp
+ ; CHECK: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double>
%r249 = sitofp <16 x i64> undef to <16 x double>
- ;CHECK: cost of 0 {{.*}} ret
+ ; CHECK: Found an estimated cost of 0 for instruction: ret i32 undef
ret i32 undef
}
diff --git a/test/Analysis/CostModel/ARM/gep.ll b/test/Analysis/CostModel/ARM/gep.ll
index 624ca113a302..a70d6d42b61b 100644
--- a/test/Analysis/CostModel/ARM/gep.ll
+++ b/test/Analysis/CostModel/ARM/gep.ll
@@ -3,41 +3,85 @@
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios6.0.0"
-define void @test_geps() {
- ; Cost of scalar integer geps should be one. We can't always expect it to be
- ; folded into the instruction addressing mode.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
+define void @test_geps(i32 %i) {
+ ; GEPs with index 0 are essentially NOOPs.
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
%a0 = getelementptr inbounds i8, i8* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
%a1 = getelementptr inbounds i16, i16* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
%a2 = getelementptr inbounds i32, i32* undef, i32 0
-
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
%a3 = getelementptr inbounds i64, i64* undef, i32 0
-
- ; Cost of scalar floating point geps should be one. We cannot fold the address
- ; computation.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float*
%a4 = getelementptr inbounds float, float* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double*
%a5 = getelementptr inbounds double, double* undef, i32 0
-
-
- ; Cost of vector geps should be one. We cannot fold the address computation.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
%a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
%a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
%a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
%a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
%a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
%a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
+ ; Cost of GEPs is one if we cannot fold the address computation.
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %b0 = getelementptr inbounds i8, i8* undef, i32 1024
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %b1 = getelementptr inbounds i16, i16* undef, i32 1024
+ ; Thumb-2 cannot fold offset >= 2^12 into address computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %b2 = getelementptr inbounds i32, i32* undef, i32 1024
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %b3 = getelementptr inbounds i64, i64* undef, i32 1024
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float*
+ %b4 = getelementptr inbounds float, float* undef, i32 1024
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
+ %b5 = getelementptr inbounds double, double* undef, i32 1024
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+ %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+ %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+ %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+ %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+ %b11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+ %b12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
+
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+ %c0 = getelementptr inbounds i8, i8* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+ %c1 = getelementptr inbounds i16, i16* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
+ %c2 = getelementptr inbounds i32, i32* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
+ %c3 = getelementptr inbounds i64, i64* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float*
+ %c4 = getelementptr inbounds float, float* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double*
+ %c5 = getelementptr inbounds double, double* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+ %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+ %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
+ ; Thumb-2 cannot fold scales larger than 8 to address computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+ %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+ %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+ %c11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+ %c12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
ret void
}
diff --git a/test/Analysis/CostModel/ARM/select.ll b/test/Analysis/CostModel/ARM/select.ll
index 21eef83c4bbe..57e1418a3f28 100644
--- a/test/Analysis/CostModel/ARM/select.ll
+++ b/test/Analysis/CostModel/ARM/select.ll
@@ -34,16 +34,16 @@ define void @casts() {
%v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef
; CHECK: cost of 1 {{.*}} select
%v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef
- ; CHECK: cost of 40 {{.*}} select
+ ; CHECK: cost of 2 {{.*}} select
%v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef
; CHECK: cost of 1 {{.*}} select
%v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
; CHECK: cost of 1 {{.*}} select
%v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef
- ; CHECK: cost of 41 {{.*}} select
+ ; CHECK: cost of 2 {{.*}} select
%v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef
- ; CHECK: cost of 82 {{.*}} select
+ ; CHECK: cost of 4 {{.*}} select
%v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef
; CHECK: cost of 1 {{.*}} select
diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll
index 1e50f1651e0a..0a568b88e726 100644
--- a/test/Analysis/CostModel/PowerPC/load_store.ll
+++ b/test/Analysis/CostModel/PowerPC/load_store.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@@ -34,7 +34,7 @@ define i32 @loads(i32 %arg) {
; CHECK: cost of 48 {{.*}} load
load <4 x i16>, <4 x i16>* undef, align 2
- ; CHECK: cost of 1 {{.*}} load
+ ; CHECK: cost of 2 {{.*}} load
load <4 x i32>, <4 x i32>* undef, align 4
; CHECK: cost of 46 {{.*}} load
diff --git a/test/Analysis/CostModel/PowerPC/unal-vec-ldst.ll b/test/Analysis/CostModel/PowerPC/unal-vec-ldst.ll
new file mode 100644
index 000000000000..3b1bc3b3fdbc
--- /dev/null
+++ b/test/Analysis/CostModel/PowerPC/unal-vec-ldst.ll
@@ -0,0 +1,404 @@
+; RUN: opt < %s -cost-model -analyze | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 {
+entry:
+ %r = load <16 x i8>, <16 x i8>* %p, align 1
+ ret <16 x i8> %r
+
+; CHECK-LABEL: test_l_v16i8
+; CHECK: cost of 2 for instruction: %r = load <16 x i8>, <16 x i8>* %p, align 1
+}
+
+define <32 x i8> @test_l_v32i8(<32 x i8>* %p) #0 {
+entry:
+ %r = load <32 x i8>, <32 x i8>* %p, align 1
+ ret <32 x i8> %r
+
+; CHECK-LABEL: test_l_v32i8
+; CHECK: cost of 4 for instruction: %r = load <32 x i8>, <32 x i8>* %p, align 1
+}
+
+define <8 x i16> @test_l_v8i16(<8 x i16>* %p) #0 {
+entry:
+ %r = load <8 x i16>, <8 x i16>* %p, align 2
+ ret <8 x i16> %r
+
+; CHECK-LABEL: test_l_v8i16
+; CHECK: cost of 2 for instruction: %r = load <8 x i16>, <8 x i16>* %p, align 2
+}
+
+define <16 x i16> @test_l_v16i16(<16 x i16>* %p) #0 {
+entry:
+ %r = load <16 x i16>, <16 x i16>* %p, align 2
+ ret <16 x i16> %r
+
+; CHECK-LABEL: test_l_v16i16
+; CHECK: cost of 4 for instruction: %r = load <16 x i16>, <16 x i16>* %p, align 2
+}
+
+define <4 x i32> @test_l_v4i32(<4 x i32>* %p) #0 {
+entry:
+ %r = load <4 x i32>, <4 x i32>* %p, align 4
+ ret <4 x i32> %r
+
+; CHECK-LABEL: test_l_v4i32
+; CHECK: cost of 2 for instruction: %r = load <4 x i32>, <4 x i32>* %p, align 4
+}
+
+define <8 x i32> @test_l_v8i32(<8 x i32>* %p) #0 {
+entry:
+ %r = load <8 x i32>, <8 x i32>* %p, align 4
+ ret <8 x i32> %r
+
+; CHECK-LABEL: test_l_v8i32
+; CHECK: cost of 4 for instruction: %r = load <8 x i32>, <8 x i32>* %p, align 4
+}
+
+define <2 x i64> @test_l_v2i64(<2 x i64>* %p) #0 {
+entry:
+ %r = load <2 x i64>, <2 x i64>* %p, align 8
+ ret <2 x i64> %r
+
+; CHECK-LABEL: test_l_v2i64
+; CHECK: cost of 1 for instruction: %r = load <2 x i64>, <2 x i64>* %p, align 8
+}
+
+define <4 x i64> @test_l_v4i64(<4 x i64>* %p) #0 {
+entry:
+ %r = load <4 x i64>, <4 x i64>* %p, align 8
+ ret <4 x i64> %r
+
+; CHECK-LABEL: test_l_v4i64
+; CHECK: cost of 2 for instruction: %r = load <4 x i64>, <4 x i64>* %p, align 8
+}
+
+define <4 x float> @test_l_v4float(<4 x float>* %p) #0 {
+entry:
+ %r = load <4 x float>, <4 x float>* %p, align 4
+ ret <4 x float> %r
+
+; CHECK-LABEL: test_l_v4float
+; CHECK: cost of 2 for instruction: %r = load <4 x float>, <4 x float>* %p, align 4
+}
+
+define <8 x float> @test_l_v8float(<8 x float>* %p) #0 {
+entry:
+ %r = load <8 x float>, <8 x float>* %p, align 4
+ ret <8 x float> %r
+
+; CHECK-LABEL: test_l_v8float
+; CHECK: cost of 4 for instruction: %r = load <8 x float>, <8 x float>* %p, align 4
+}
+
+define <2 x double> @test_l_v2double(<2 x double>* %p) #0 {
+entry:
+ %r = load <2 x double>, <2 x double>* %p, align 8
+ ret <2 x double> %r
+
+; CHECK-LABEL: test_l_v2double
+; CHECK: cost of 1 for instruction: %r = load <2 x double>, <2 x double>* %p, align 8
+}
+
+define <4 x double> @test_l_v4double(<4 x double>* %p) #0 {
+entry:
+ %r = load <4 x double>, <4 x double>* %p, align 8
+ ret <4 x double> %r
+
+; CHECK-LABEL: test_l_v4double
+; CHECK: cost of 2 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
+}
+
+define <16 x i8> @test_l_p8v16i8(<16 x i8>* %p) #2 {
+entry:
+ %r = load <16 x i8>, <16 x i8>* %p, align 1
+ ret <16 x i8> %r
+
+; CHECK-LABEL: test_l_p8v16i8
+; CHECK: cost of 1 for instruction: %r = load <16 x i8>, <16 x i8>* %p, align 1
+}
+
+define <32 x i8> @test_l_p8v32i8(<32 x i8>* %p) #2 {
+entry:
+ %r = load <32 x i8>, <32 x i8>* %p, align 1
+ ret <32 x i8> %r
+
+; CHECK-LABEL: test_l_p8v32i8
+; CHECK: cost of 2 for instruction: %r = load <32 x i8>, <32 x i8>* %p, align 1
+}
+
+define <8 x i16> @test_l_p8v8i16(<8 x i16>* %p) #2 {
+entry:
+ %r = load <8 x i16>, <8 x i16>* %p, align 2
+ ret <8 x i16> %r
+
+; CHECK-LABEL: test_l_p8v8i16
+; CHECK: cost of 1 for instruction: %r = load <8 x i16>, <8 x i16>* %p, align 2
+}
+
+define <16 x i16> @test_l_p8v16i16(<16 x i16>* %p) #2 {
+entry:
+ %r = load <16 x i16>, <16 x i16>* %p, align 2
+ ret <16 x i16> %r
+
+; CHECK-LABEL: test_l_p8v16i16
+; CHECK: cost of 2 for instruction: %r = load <16 x i16>, <16 x i16>* %p, align 2
+}
+
+define <4 x i32> @test_l_p8v4i32(<4 x i32>* %p) #2 {
+entry:
+ %r = load <4 x i32>, <4 x i32>* %p, align 4
+ ret <4 x i32> %r
+
+; CHECK-LABEL: test_l_p8v4i32
+; CHECK: cost of 1 for instruction: %r = load <4 x i32>, <4 x i32>* %p, align 4
+}
+
+define <8 x i32> @test_l_p8v8i32(<8 x i32>* %p) #2 {
+entry:
+ %r = load <8 x i32>, <8 x i32>* %p, align 4
+ ret <8 x i32> %r
+
+; CHECK-LABEL: test_l_p8v8i32
+; CHECK: cost of 2 for instruction: %r = load <8 x i32>, <8 x i32>* %p, align 4
+}
+
+define <2 x i64> @test_l_p8v2i64(<2 x i64>* %p) #2 {
+entry:
+ %r = load <2 x i64>, <2 x i64>* %p, align 8
+ ret <2 x i64> %r
+
+; CHECK-LABEL: test_l_p8v2i64
+; CHECK: cost of 1 for instruction: %r = load <2 x i64>, <2 x i64>* %p, align 8
+}
+
+define <4 x i64> @test_l_p8v4i64(<4 x i64>* %p) #2 {
+entry:
+ %r = load <4 x i64>, <4 x i64>* %p, align 8
+ ret <4 x i64> %r
+
+; CHECK-LABEL: test_l_p8v4i64
+; CHECK: cost of 2 for instruction: %r = load <4 x i64>, <4 x i64>* %p, align 8
+}
+
+define <4 x float> @test_l_p8v4float(<4 x float>* %p) #2 {
+entry:
+ %r = load <4 x float>, <4 x float>* %p, align 4
+ ret <4 x float> %r
+
+; CHECK-LABEL: test_l_p8v4float
+; CHECK: cost of 1 for instruction: %r = load <4 x float>, <4 x float>* %p, align 4
+}
+
+define <8 x float> @test_l_p8v8float(<8 x float>* %p) #2 {
+entry:
+ %r = load <8 x float>, <8 x float>* %p, align 4
+ ret <8 x float> %r
+
+; CHECK-LABEL: test_l_p8v8float
+; CHECK: cost of 2 for instruction: %r = load <8 x float>, <8 x float>* %p, align 4
+}
+
+define <2 x double> @test_l_p8v2double(<2 x double>* %p) #2 {
+entry:
+ %r = load <2 x double>, <2 x double>* %p, align 8
+ ret <2 x double> %r
+
+; CHECK-LABEL: test_l_p8v2double
+; CHECK: cost of 1 for instruction: %r = load <2 x double>, <2 x double>* %p, align 8
+}
+
+define <4 x double> @test_l_p8v4double(<4 x double>* %p) #2 {
+entry:
+ %r = load <4 x double>, <4 x double>* %p, align 8
+ ret <4 x double> %r
+
+; CHECK-LABEL: test_l_p8v4double
+; CHECK: cost of 2 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
+}
+
+define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
+entry:
+ %r = load <4 x float>, <4 x float>* %p, align 4
+ ret <4 x float> %r
+
+; CHECK-LABEL: test_l_qv4float
+; CHECK: cost of 2 for instruction: %r = load <4 x float>, <4 x float>* %p, align 4
+}
+
+define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
+entry:
+ %r = load <8 x float>, <8 x float>* %p, align 4
+ ret <8 x float> %r
+
+; CHECK-LABEL: test_l_qv8float
+; CHECK: cost of 4 for instruction: %r = load <8 x float>, <8 x float>* %p, align 4
+}
+
+define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
+entry:
+ %r = load <4 x double>, <4 x double>* %p, align 8
+ ret <4 x double> %r
+
+; CHECK-LABEL: test_l_qv4double
+; CHECK: cost of 2 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
+}
+
+define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
+entry:
+ %r = load <8 x double>, <8 x double>* %p, align 8
+ ret <8 x double> %r
+
+; CHECK-LABEL: test_l_qv8double
+; CHECK: cost of 4 for instruction: %r = load <8 x double>, <8 x double>* %p, align 8
+}
+
+define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
+entry:
+ store <16 x i8> %v, <16 x i8>* %p, align 1
+ ret void
+
+; CHECK-LABEL: test_s_v16i8
+; CHECK: cost of 1 for instruction: store <16 x i8> %v, <16 x i8>* %p, align 1
+}
+
+define void @test_s_v32i8(<32 x i8>* %p, <32 x i8> %v) #0 {
+entry:
+ store <32 x i8> %v, <32 x i8>* %p, align 1
+ ret void
+
+; CHECK-LABEL: test_s_v32i8
+; CHECK: cost of 2 for instruction: store <32 x i8> %v, <32 x i8>* %p, align 1
+}
+
+define void @test_s_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
+entry:
+ store <8 x i16> %v, <8 x i16>* %p, align 2
+ ret void
+
+; CHECK-LABEL: test_s_v8i16
+; CHECK: cost of 1 for instruction: store <8 x i16> %v, <8 x i16>* %p, align 2
+}
+
+define void @test_s_v16i16(<16 x i16>* %p, <16 x i16> %v) #0 {
+entry:
+ store <16 x i16> %v, <16 x i16>* %p, align 2
+ ret void
+
+; CHECK-LABEL: test_s_v16i16
+; CHECK: cost of 2 for instruction: store <16 x i16> %v, <16 x i16>* %p, align 2
+}
+
+define void @test_s_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
+entry:
+ store <4 x i32> %v, <4 x i32>* %p, align 4
+ ret void
+
+; CHECK-LABEL: test_s_v4i32
+; CHECK: cost of 1 for instruction: store <4 x i32> %v, <4 x i32>* %p, align 4
+}
+
+define void @test_s_v8i32(<8 x i32>* %p, <8 x i32> %v) #0 {
+entry:
+ store <8 x i32> %v, <8 x i32>* %p, align 4
+ ret void
+
+; CHECK-LABEL: test_s_v8i32
+; CHECK: cost of 2 for instruction: store <8 x i32> %v, <8 x i32>* %p, align 4
+}
+
+define void @test_s_v2i64(<2 x i64>* %p, <2 x i64> %v) #0 {
+entry:
+ store <2 x i64> %v, <2 x i64>* %p, align 8
+ ret void
+
+; CHECK-LABEL: test_s_v2i64
+; CHECK: cost of 1 for instruction: store <2 x i64> %v, <2 x i64>* %p, align 8
+}
+
+define void @test_s_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
+entry:
+ store <4 x i64> %v, <4 x i64>* %p, align 8
+ ret void
+
+; CHECK-LABEL: test_s_v4i64
+; CHECK: cost of 2 for instruction: store <4 x i64> %v, <4 x i64>* %p, align 8
+}
+
+define void @test_s_v4float(<4 x float>* %p, <4 x float> %v) #0 {
+entry:
+ store <4 x float> %v, <4 x float>* %p, align 4
+ ret void
+
+; CHECK-LABEL: test_s_v4float
+; CHECK: cost of 1 for instruction: store <4 x float> %v, <4 x float>* %p, align 4
+}
+
+define void @test_s_v8float(<8 x float>* %p, <8 x float> %v) #0 {
+entry:
+ store <8 x float> %v, <8 x float>* %p, align 4
+ ret void
+
+; CHECK-LABEL: test_s_v8float
+; CHECK: cost of 2 for instruction: store <8 x float> %v, <8 x float>* %p, align 4
+}
+
+define void @test_s_v2double(<2 x double>* %p, <2 x double> %v) #0 {
+entry:
+ store <2 x double> %v, <2 x double>* %p, align 8
+ ret void
+
+; CHECK-LABEL: test_s_v2double
+; CHECK: cost of 1 for instruction: store <2 x double> %v, <2 x double>* %p, align 8
+}
+
+define void @test_s_v4double(<4 x double>* %p, <4 x double> %v) #0 {
+entry:
+ store <4 x double> %v, <4 x double>* %p, align 8
+ ret void
+
+; CHECK-LABEL: test_s_v4double
+; CHECK: cost of 2 for instruction: store <4 x double> %v, <4 x double>* %p, align 8
+}
+
+define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
+entry:
+ store <4 x float> %v, <4 x float>* %p, align 4
+ ret void
+
+; CHECK-LABEL: test_s_qv4float
+; CHECK: cost of 7 for instruction: store <4 x float> %v, <4 x float>* %p, align 4
+}
+
+define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
+entry:
+ store <8 x float> %v, <8 x float>* %p, align 4
+ ret void
+
+; CHECK-LABEL: test_s_qv8float
+; CHECK: cost of 15 for instruction: store <8 x float> %v, <8 x float>* %p, align 4
+}
+
+define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
+entry:
+ store <4 x double> %v, <4 x double>* %p, align 8
+ ret void
+
+; CHECK-LABEL: test_s_qv4double
+; CHECK: cost of 7 for instruction: store <4 x double> %v, <4 x double>* %p, align 8
+}
+
+define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
+entry:
+ store <8 x double> %v, <8 x double>* %p, align 8
+ ret void
+
+; CHECK-LABEL: test_s_qv8double
+; CHECK: cost of 15 for instruction: store <8 x double> %v, <8 x double>* %p, align 8
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr7" }
+attributes #1 = { nounwind "target-cpu"="a2q" }
+attributes #2 = { nounwind "target-cpu"="pwr8" }
+
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll
index 92f5a1ec3a00..0b61d3cd4214 100644
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -94,7 +94,7 @@ define void @shift() {
; AVX2: cost of 1 {{.*}} ashr
%C0 = ashr <4 x i32> undef, undef
; AVX: cost of 6 {{.*}} ashr
- ; AVX2: cost of 20 {{.*}} ashr
+ ; AVX2: cost of 4 {{.*}} ashr
%C1 = ashr <2 x i64> undef, undef
ret void
@@ -121,7 +121,7 @@ define void @avx2shift() {
; AVX2: cost of 1 {{.*}} ashr
%C0 = ashr <8 x i32> undef, undef
; AVX: cost of 12 {{.*}} ashr
- ; AVX2: cost of 40 {{.*}} ashr
+ ; AVX2: cost of 4 {{.*}} ashr
%C1 = ashr <4 x i64> undef, undef
ret void
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll
index fb16af635f07..c518587c0e1a 100644
--- a/test/Analysis/CostModel/X86/cast.ll
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -84,11 +84,11 @@ define i32 @zext_sext(<8 x i1> %in) {
;CHECK-AVX: cost of 4 {{.*}} zext
%D = zext <4 x i32> undef to <4 x i64>
- ;CHECK-AVX512: cost of 3 {{.*}} %D1 = zext
- %D1 = zext <16 x i32> undef to <16 x i64>
+ ;CHECK-AVX512: cost of 1 {{.*}} %D1 = zext
+ %D1 = zext <8 x i32> undef to <8 x i64>
- ;CHECK-AVX512: cost of 3 {{.*}} %D2 = sext
- %D2 = sext <16 x i32> undef to <16 x i64>
+ ;CHECK-AVX512: cost of 1 {{.*}} %D2 = sext
+ %D2 = sext <8 x i32> undef to <8 x i64>
;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext
%D3 = zext <16 x i16> undef to <16 x i32>
@@ -118,9 +118,11 @@ define i32 @zext_sext(<8 x i1> %in) {
;CHECK_AVX512: cost of 1 {{.*}} G = trunc
%G = trunc <8 x i64> undef to <8 x i32>
- ;CHECK-AVX512: cost of 4 {{.*}} %G1 = trunc
- %G1 = trunc <16 x i64> undef to <16 x i32>
+ ;CHECK-AVX512: cost of 1 {{.*}} %G1 = trunc
+ %G1 = trunc <16 x i32> undef to <16 x i16>
+ ;CHECK-AVX512: cost of 1 {{.*}} %G2 = trunc
+ %G2 = trunc <16 x i32> undef to <16 x i8>
ret i32 undef
}
@@ -207,38 +209,40 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
; CHECK: cost of 2 {{.*}} uitofp
%C2 = uitofp <4 x i16> %c to <4 x double>
- ; CHECK: cost of 6 {{.*}} uitofp
+ ; CHECK-AVX2: cost of 6 {{.*}} uitofp
%D1 = uitofp <4 x i32> %d to <4 x float>
- ; CHECK: cost of 6 {{.*}} uitofp
+ ; CHECK-AVX2: cost of 6 {{.*}} uitofp
%D2 = uitofp <4 x i32> %d to <4 x double>
ret void
}
define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
; CHECK-LABEL: for function 'uitofp8'
- ; CHECK: cost of 6 {{.*}} uitofp
+ ; CHECK-AVX2: cost of 6 {{.*}} uitofp
%A1 = uitofp <8 x i1> %a to <8 x float>
- ; CHECK: cost of 5 {{.*}} uitofp
+ ; CHECK-AVX2: cost of 5 {{.*}} uitofp
+ ; CHECK-AVX512: cost of 2 {{.*}} uitofp
%B1 = uitofp <8 x i8> %b to <8 x float>
- ; CHECK: cost of 5 {{.*}} uitofp
+ ; CHECK-AVX2: cost of 5 {{.*}} uitofp
+ ; CHECK-AVX512: cost of 2 {{.*}} uitofp
%C1 = uitofp <8 x i16> %c to <8 x float>
; CHECK-AVX2: cost of 8 {{.*}} uitofp
- ; CHECK-AVX512: cost of 8 {{.*}} uitofp
+ ; CHECK-AVX512: cost of 1 {{.*}} uitofp
; CHECK-AVX: cost of 9 {{.*}} uitofp
%D1 = uitofp <8 x i32> %d to <8 x float>
ret void
}
-define void @fp_conv(<8 x float> %a, <16 x float>%b) {
+define void @fp_conv(<8 x float> %a, <16 x float>%b, <4 x float> %c) {
;CHECK-LABEL: for function 'fp_conv'
; CHECK-AVX512: cost of 1 {{.*}} fpext
%A1 = fpext <8 x float> %a to <8 x double>
- ; CHECK-AVX512: cost of 3 {{.*}} fpext
- %A2 = fpext <16 x float> %b to <16 x double>
+ ; CHECK-AVX512: cost of 1 {{.*}} fpext
+ %A2 = fpext <4 x float> %c to <4 x double>
; CHECK-AVX2: cost of 3 {{.*}} %A3 = fpext
; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
@@ -248,7 +252,7 @@ define void @fp_conv(<8 x float> %a, <16 x float>%b) {
; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
%A4 = fptrunc <8 x double> undef to <8 x float>
- ; CHECK-AVX512: cost of 3 {{.*}} %A5 = fptrunc
- %A5 = fptrunc <16 x double> undef to <16 x float>
+ ; CHECK-AVX512: cost of 1 {{.*}} %A5 = fptrunc
+ %A5 = fptrunc <4 x double> undef to <4 x float>
ret void
}
diff --git a/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
index 4683c432c559..61d3e0116e8b 100644
--- a/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
+++ b/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
@@ -1,4 +1,6 @@
-; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze < %s | FileCheck %s -check-prefix=AVX2
+; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze < %s | FileCheck %s --check-prefix=AVX2
+; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze < %s | FileCheck %s --check-prefix=KNL
+; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=skx -cost-model -analyze < %s | FileCheck %s --check-prefix=SKX
; AVX2-LABEL: test1
@@ -65,6 +67,217 @@ define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
ret <2 x i32> %res
}
+define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0) {
+
+; AVX2-LABEL: test_gather_2f64
+; AVX2: Found an estimated cost of 7 {{.*}}.gather
+
+; KNL-LABEL: test_gather_2f64
+; KNL: Found an estimated cost of 7 {{.*}}.gather
+
+; SKX-LABEL: test_gather_2f64
+; SKX: Found an estimated cost of 7 {{.*}}.gather
+
+%res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
+
+define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) {
+
+; AVX2-LABEL: test_gather_4i32
+; AVX2: Found an estimated cost of 16 {{.*}}.gather
+
+; KNL-LABEL: test_gather_4i32
+; KNL: Found an estimated cost of 16 {{.*}}.gather
+
+; SKX-LABEL: test_gather_4i32
+; SKX: Found an estimated cost of 6 {{.*}}.gather
+
+%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) {
+
+; AVX2-LABEL: test_gather_4i32_const_mask
+; AVX2: Found an estimated cost of 8 {{.*}}.gather
+
+; KNL-LABEL: test_gather_4i32_const_mask
+; KNL: Found an estimated cost of 8 {{.*}}.gather
+
+; SKX-LABEL: test_gather_4i32_const_mask
+; SKX: Found an estimated cost of 6 {{.*}}.gather
+
+%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0)
+
+define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
+
+; AVX2-LABEL: test_gather_16f32_const_mask
+; AVX2: Found an estimated cost of 30 {{.*}}.gather
+
+; KNL-LABEL: test_gather_16f32_const_mask
+; KNL: Found an estimated cost of 18 {{.*}}.gather
+
+; SKX-LABEL: test_gather_16f32_const_mask
+; SKX: Found an estimated cost of 18 {{.*}}.gather
+
+ %sext_ind = sext <16 x i32> %ind to <16 x i64>
+ %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
+
+ %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+ ret <16 x float>%res
+}
+
+define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) {
+
+; AVX2-LABEL: test_gather_16f32_var_mask
+; AVX2: Found an estimated cost of 62 {{.*}}.gather
+
+; KNL-LABEL: test_gather_16f32_var_mask
+; KNL: Found an estimated cost of 18 {{.*}}.gather
+
+; SKX-LABEL: test_gather_16f32_var_mask
+; SKX: Found an estimated cost of 18 {{.*}}.gather
+
+ %sext_ind = sext <16 x i32> %ind to <16 x i64>
+ %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
+
+ %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+ ret <16 x float>%res
+}
+
+define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
+
+; AVX2-LABEL: test_gather_16f32_ra_var_mask
+; AVX2: Found an estimated cost of 62 {{.*}}.gather
+
+; KNL-LABEL: test_gather_16f32_ra_var_mask
+; KNL: Found an estimated cost of 20 {{.*}}.gather
+
+; SKX-LABEL: test_gather_16f32_ra_var_mask
+; SKX: Found an estimated cost of 20 {{.*}}.gather
+
+ %sext_ind = sext <16 x i32> %ind to <16 x i64>
+ %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
+
+ %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+ ret <16 x float>%res
+}
+
+define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) {
+
+; AVX2-LABEL: test_gather_16f32_const_mask2
+; AVX2: Found an estimated cost of 30 {{.*}}.gather
+
+; KNL-LABEL: test_gather_16f32_const_mask2
+; KNL: Found an estimated cost of 18 {{.*}}.gather
+
+; SKX-LABEL: test_gather_16f32_const_mask2
+; SKX: Found an estimated cost of 18 {{.*}}.gather
+
+ %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
+ %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+
+ %sext_ind = sext <16 x i32> %ind to <16 x i64>
+ %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
+
+ %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+ ret <16 x float>%res
+}
+
+define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
+; AVX2-LABEL: test_scatter_16i32
+; AVX2: Found an estimated cost of 64 {{.*}}.scatter
+
+; KNL-LABEL: test_scatter_16i32
+; KNL: Found an estimated cost of 18 {{.*}}.scatter
+
+; SKX-LABEL: test_scatter_16i32
+; SKX: Found an estimated cost of 18 {{.*}}.scatter
+
+ %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
+ %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
+
+ %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+ %imask = bitcast i16 %mask to <16 x i1>
+ call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+ ret void
+}
+
+define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
+; AVX2-LABEL: test_scatter_8i32
+; AVX2: Found an estimated cost of 32 {{.*}}.scatter
+
+; KNL-LABEL: test_scatter_8i32
+; KNL: Found an estimated cost of 10 {{.*}}.scatter
+
+; SKX-LABEL: test_scatter_8i32
+; SKX: Found an estimated cost of 10 {{.*}}.scatter
+
+ call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+ ret void
+}
+
+declare void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask)
+
+define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
+; AVX2-LABEL: test_scatter_4i32
+; AVX2: Found an estimated cost of 16 {{.*}}.scatter
+
+; KNL-LABEL: test_scatter_4i32
+; KNL: Found an estimated cost of 16 {{.*}}.scatter
+
+; SKX-LABEL: test_scatter_4i32
+; SKX: Found an estimated cost of 6 {{.*}}.scatter
+
+ call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+ ret void
+}
+
+define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) {
+
+; AVX2-LABEL: test_gather_4f32
+; AVX2: Found an estimated cost of 15 {{.*}}.gather
+
+; KNL-LABEL: test_gather_4f32
+; KNL: Found an estimated cost of 15 {{.*}}.gather
+
+; SKX-LABEL: test_gather_4f32
+; SKX: Found an estimated cost of 6 {{.*}}.gather
+
+ %sext_ind = sext <4 x i32> %ind to <4 x i64>
+ %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
+
+ %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+ ret <4 x float>%res
+}
+
+define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {
+
+; AVX2-LABEL: test_gather_4f32_const_mask
+; AVX2: Found an estimated cost of 7 {{.*}}.gather
+
+; KNL-LABEL: test_gather_4f32_const_mask
+; KNL: Found an estimated cost of 7 {{.*}}.gather
+
+; SKX-LABEL: test_gather_4f32_const_mask
+; SKX: Found an estimated cost of 6 {{.*}}.gather
+
+ %sext_ind = sext <4 x i32> %ind to <4 x i64>
+ %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
+
+ %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+ ret <4 x float>%res
+}
+
+declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> )
+declare void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask)
+declare void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
+declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
diff --git a/test/Analysis/CostModel/X86/reduction.ll b/test/Analysis/CostModel/X86/reduction.ll
index 78e65aee1460..aaafe07c1eb8 100644
--- a/test/Analysis/CostModel/X86/reduction.ll
+++ b/test/Analysis/CostModel/X86/reduction.ll
@@ -33,7 +33,7 @@ define fastcc i32 @reduction_cost_int(<8 x i32> %rdx) {
%bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3
; CHECK-LABEL: reduction_cost_int
-; CHECK: cost of 23 {{.*}} extractelement
+; CHECK: cost of 17 {{.*}} extractelement
%r = extractelement <8 x i32> %bin.rdx.3, i32 0
ret i32 %r
diff --git a/test/Analysis/CostModel/X86/sitofp.ll b/test/Analysis/CostModel/X86/sitofp.ll
index dcd0088d0df7..9f0c4065c178 100644
--- a/test/Analysis/CostModel/X86/sitofp.ll
+++ b/test/Analysis/CostModel/X86/sitofp.ll
@@ -4,656 +4,656 @@
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s
define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) {
- ; SSE2: sitofpv2i8v2double
+ ; SSE2-LABEL: sitofpv2i8v2double
; SSE2: cost of 20 {{.*}} sitofp
;
- ; AVX1: sitofpv2i8v2double
+ ; AVX1-LABEL: sitofpv2i8v2double
; AVX1: cost of 4 {{.*}} sitofp
;
- ; AVX2: sitofpv2i8v2double
+ ; AVX2-LABEL: sitofpv2i8v2double
; AVX2: cost of 4 {{.*}} sitofp
;
- ; AVX512F: sitofpv2i8v2double
+ ; AVX512F-LABEL: sitofpv2i8v2double
; AVX512F: cost of 4 {{.*}} sitofp
%1 = sitofp <2 x i8> %a to <2 x double>
ret <2 x double> %1
}
define <4 x double> @sitofpv4i8v4double(<4 x i8> %a) {
- ; SSE2: sitofpv4i8v4double
+ ; SSE2-LABEL: sitofpv4i8v4double
; SSE2: cost of 40 {{.*}} sitofp
;
- ; AVX1: sitofpv4i8v4double
+ ; AVX1-LABEL: sitofpv4i8v4double
; AVX1: cost of 3 {{.*}} sitofp
;
- ; AVX2: sitofpv4i8v4double
+ ; AVX2-LABEL: sitofpv4i8v4double
; AVX2: cost of 3 {{.*}} sitofp
;
- ; AVX512F: sitofpv4i8v4double
+ ; AVX512F-LABEL: sitofpv4i8v4double
; AVX512F: cost of 3 {{.*}} sitofp
%1 = sitofp <4 x i8> %a to <4 x double>
ret <4 x double> %1
}
define <8 x double> @sitofpv8i8v8double(<8 x i8> %a) {
- ; SSE2: sitofpv8i8v8double
+ ; SSE2-LABEL: sitofpv8i8v8double
; SSE2: cost of 80 {{.*}} sitofp
;
- ; AVX1: sitofpv8i8v8double
+ ; AVX1-LABEL: sitofpv8i8v8double
; AVX1: cost of 20 {{.*}} sitofp
;
- ; AVX2: sitofpv8i8v8double
+ ; AVX2-LABEL: sitofpv8i8v8double
; AVX2: cost of 20 {{.*}} sitofp
;
- ; AVX512F: sitofpv8i8v8double
+ ; AVX512F-LABEL: sitofpv8i8v8double
; AVX512F: cost of 2 {{.*}} sitofp
%1 = sitofp <8 x i8> %a to <8 x double>
ret <8 x double> %1
}
define <16 x double> @sitofpv16i8v16double(<16 x i8> %a) {
- ; SSE2: sitofpv16i8v16double
+ ; SSE2-LABEL: sitofpv16i8v16double
; SSE2: cost of 160 {{.*}} sitofp
;
- ; AVX1: sitofpv16i8v16double
+ ; AVX1-LABEL: sitofpv16i8v16double
; AVX1: cost of 40 {{.*}} sitofp
;
- ; AVX2: sitofpv16i8v16double
+ ; AVX2-LABEL: sitofpv16i8v16double
; AVX2: cost of 40 {{.*}} sitofp
;
- ; AVX512F: sitofpv16i8v16double
+ ; AVX512F-LABEL: sitofpv16i8v16double
; AVX512F: cost of 44 {{.*}} sitofp
%1 = sitofp <16 x i8> %a to <16 x double>
ret <16 x double> %1
}
define <32 x double> @sitofpv32i8v32double(<32 x i8> %a) {
- ; SSE2: sitofpv32i8v32double
+ ; SSE2-LABEL: sitofpv32i8v32double
; SSE2: cost of 320 {{.*}} sitofp
;
- ; AVX1: sitofpv32i8v32double
+ ; AVX1-LABEL: sitofpv32i8v32double
; AVX1: cost of 80 {{.*}} sitofp
;
- ; AVX2: sitofpv32i8v32double
+ ; AVX2-LABEL: sitofpv32i8v32double
; AVX2: cost of 80 {{.*}} sitofp
;
- ; AVX512F: sitofpv32i8v32double
+ ; AVX512F-LABEL: sitofpv32i8v32double
; AVX512F: cost of 88 {{.*}} sitofp
%1 = sitofp <32 x i8> %a to <32 x double>
ret <32 x double> %1
}
define <2 x double> @sitofpv2i16v2double(<2 x i16> %a) {
- ; SSE2: sitofpv2i16v2double
+ ; SSE2-LABEL: sitofpv2i16v2double
; SSE2: cost of 20 {{.*}} sitofp
;
- ; AVX1: sitofpv2i16v2double
+ ; AVX1-LABEL: sitofpv2i16v2double
; AVX1: cost of 4 {{.*}} sitofp
;
- ; AVX2: sitofpv2i16v2double
+ ; AVX2-LABEL: sitofpv2i16v2double
; AVX2: cost of 4 {{.*}} sitofp
;
- ; AVX512F: sitofpv2i16v2double
+ ; AVX512F-LABEL: sitofpv2i16v2double
; AVX512F: cost of 4 {{.*}} sitofp
%1 = sitofp <2 x i16> %a to <2 x double>
ret <2 x double> %1
}
define <4 x double> @sitofpv4i16v4double(<4 x i16> %a) {
- ; SSE2: sitofpv4i16v4double
+ ; SSE2-LABEL: sitofpv4i16v4double
; SSE2: cost of 40 {{.*}} sitofp
;
- ; AVX1: sitofpv4i16v4double
+ ; AVX1-LABEL: sitofpv4i16v4double
; AVX1: cost of 3 {{.*}} sitofp
;
- ; AVX2: sitofpv4i16v4double
+ ; AVX2-LABEL: sitofpv4i16v4double
; AVX2: cost of 3 {{.*}} sitofp
;
- ; AVX512F: sitofpv4i16v4double
+ ; AVX512F-LABEL: sitofpv4i16v4double
; AVX512F: cost of 3 {{.*}} sitofp
%1 = sitofp <4 x i16> %a to <4 x double>
ret <4 x double> %1
}
define <8 x double> @sitofpv8i16v8double(<8 x i16> %a) {
- ; SSE2: sitofpv8i16v8double
+ ; SSE2-LABEL: sitofpv8i16v8double
; SSE2: cost of 80 {{.*}} sitofp
;
- ; AVX1: sitofpv8i16v8double
+ ; AVX1-LABEL: sitofpv8i16v8double
; AVX1: cost of 20 {{.*}} sitofp
;
- ; AVX2: sitofpv8i16v8double
+ ; AVX2-LABEL: sitofpv8i16v8double
; AVX2: cost of 20 {{.*}} sitofp
;
- ; AVX512F: sitofpv8i16v8double
+ ; AVX512F-LABEL: sitofpv8i16v8double
; AVX512F: cost of 2 {{.*}} sitofp
%1 = sitofp <8 x i16> %a to <8 x double>
ret <8 x double> %1
}
define <16 x double> @sitofpv16i16v16double(<16 x i16> %a) {
- ; SSE2: sitofpv16i16v16double
+ ; SSE2-LABEL: sitofpv16i16v16double
; SSE2: cost of 160 {{.*}} sitofp
;
- ; AVX1: sitofpv16i16v16double
+ ; AVX1-LABEL: sitofpv16i16v16double
; AVX1: cost of 40 {{.*}} sitofp
;
- ; AVX2: sitofpv16i16v16double
+ ; AVX2-LABEL: sitofpv16i16v16double
; AVX2: cost of 40 {{.*}} sitofp
;
- ; AVX512F: sitofpv16i16v16double
+ ; AVX512F-LABEL: sitofpv16i16v16double
; AVX512F: cost of 44 {{.*}} sitofp
%1 = sitofp <16 x i16> %a to <16 x double>
ret <16 x double> %1
}
define <32 x double> @sitofpv32i16v32double(<32 x i16> %a) {
- ; SSE2: sitofpv32i16v32double
+ ; SSE2-LABEL: sitofpv32i16v32double
; SSE2: cost of 320 {{.*}} sitofp
;
- ; AVX1: sitofpv32i16v32double
+ ; AVX1-LABEL: sitofpv32i16v32double
; AVX1: cost of 80 {{.*}} sitofp
;
- ; AVX2: sitofpv32i16v32double
+ ; AVX2-LABEL: sitofpv32i16v32double
; AVX2: cost of 80 {{.*}} sitofp
;
- ; AVX512F: sitofpv32i16v32double
+ ; AVX512F-LABEL: sitofpv32i16v32double
; AVX512F: cost of 88 {{.*}} sitofp
%1 = sitofp <32 x i16> %a to <32 x double>
ret <32 x double> %1
}
define <2 x double> @sitofpv2i32v2double(<2 x i32> %a) {
- ; SSE2: sitofpv2i32v2double
+ ; SSE2-LABEL: sitofpv2i32v2double
; SSE2: cost of 20 {{.*}} sitofp
;
- ; AVX1: sitofpv2i32v2double
+ ; AVX1-LABEL: sitofpv2i32v2double
; AVX1: cost of 4 {{.*}} sitofp
;
- ; AVX2: sitofpv2i32v2double
+ ; AVX2-LABEL: sitofpv2i32v2double
; AVX2: cost of 4 {{.*}} sitofp
;
- ; AVX512F: sitofpv2i32v2double
+ ; AVX512F-LABEL: sitofpv2i32v2double
; AVX512F: cost of 4 {{.*}} sitofp
%1 = sitofp <2 x i32> %a to <2 x double>
ret <2 x double> %1
}
define <4 x double> @sitofpv4i32v4double(<4 x i32> %a) {
- ; SSE2: sitofpv4i32v4double
+ ; SSE2-LABEL: sitofpv4i32v4double
; SSE2: cost of 40 {{.*}} sitofp
;
- ; AVX1: sitofpv4i32v4double
+ ; AVX1-LABEL: sitofpv4i32v4double
; AVX1: cost of 1 {{.*}} sitofp
;
- ; AVX2: sitofpv4i32v4double
+ ; AVX2-LABEL: sitofpv4i32v4double
; AVX2: cost of 1 {{.*}} sitofp
;
- ; AVX512F: sitofpv4i32v4double
+ ; AVX512F-LABEL: sitofpv4i32v4double
; AVX512F: cost of 1 {{.*}} sitofp
%1 = sitofp <4 x i32> %a to <4 x double>
ret <4 x double> %1
}
define <8 x double> @sitofpv8i32v8double(<8 x i32> %a) {
- ; SSE2: sitofpv8i32v8double
+ ; SSE2-LABEL: sitofpv8i32v8double
; SSE2: cost of 80 {{.*}} sitofp
;
- ; AVX1: sitofpv8i32v8double
+ ; AVX1-LABEL: sitofpv8i32v8double
; AVX1: cost of 20 {{.*}} sitofp
;
- ; AVX2: sitofpv8i32v8double
+ ; AVX2-LABEL: sitofpv8i32v8double
; AVX2: cost of 20 {{.*}} sitofp
;
- ; AVX512F: sitofpv8i32v8double
+ ; AVX512F-LABEL: sitofpv8i32v8double
; AVX512F: cost of 1 {{.*}} sitofp
%1 = sitofp <8 x i32> %a to <8 x double>
ret <8 x double> %1
}
define <16 x double> @sitofpv16i32v16double(<16 x i32> %a) {
- ; SSE2: sitofpv16i32v16double
+ ; SSE2-LABEL: sitofpv16i32v16double
; SSE2: cost of 160 {{.*}} sitofp
;
- ; AVX1: sitofpv16i32v16double
+ ; AVX1-LABEL: sitofpv16i32v16double
; AVX1: cost of 40 {{.*}} sitofp
;
- ; AVX2: sitofpv16i32v16double
+ ; AVX2-LABEL: sitofpv16i32v16double
; AVX2: cost of 40 {{.*}} sitofp
;
- ; AVX512F: sitofpv16i32v16double
+ ; AVX512F-LABEL: sitofpv16i32v16double
; AVX512F: cost of 44 {{.*}} sitofp
%1 = sitofp <16 x i32> %a to <16 x double>
ret <16 x double> %1
}
define <32 x double> @sitofpv32i32v32double(<32 x i32> %a) {
- ; SSE2: sitofpv32i32v32double
+ ; SSE2-LABEL: sitofpv32i32v32double
; SSE2: cost of 320 {{.*}} sitofp
;
- ; AVX1: sitofpv32i32v32double
+ ; AVX1-LABEL: sitofpv32i32v32double
; AVX1: cost of 80 {{.*}} sitofp
;
- ; AVX2: sitofpv32i32v32double
+ ; AVX2-LABEL: sitofpv32i32v32double
; AVX2: cost of 80 {{.*}} sitofp
;
- ; AVX512F: sitofpv32i32v32double
+ ; AVX512F-LABEL: sitofpv32i32v32double
; AVX512F: cost of 88 {{.*}} sitofp
%1 = sitofp <32 x i32> %a to <32 x double>
ret <32 x double> %1
}
define <2 x double> @sitofpv2i64v2double(<2 x i64> %a) {
- ; SSE2: sitofpv2i64v2double
+ ; SSE2-LABEL: sitofpv2i64v2double
; SSE2: cost of 20 {{.*}} sitofp
;
- ; AVX1: sitofpv2i64v2double
- ; AVX1: cost of 4 {{.*}} sitofp
+ ; AVX1-LABEL: sitofpv2i64v2double
+ ; AVX1: cost of 20 {{.*}} sitofp
;
- ; AVX2: sitofpv2i64v2double
- ; AVX2: cost of 4 {{.*}} sitofp
+ ; AVX2-LABEL: sitofpv2i64v2double
+ ; AVX2: cost of 20 {{.*}} sitofp
;
- ; AVX512F: sitofpv2i64v2double
- ; AVX512F: cost of 4 {{.*}} sitofp
+ ; AVX512F-LABEL: sitofpv2i64v2double
+ ; AVX512F: cost of 20 {{.*}} sitofp
%1 = sitofp <2 x i64> %a to <2 x double>
ret <2 x double> %1
}
define <4 x double> @sitofpv4i64v4double(<4 x i64> %a) {
- ; SSE2: sitofpv4i64v4double
+ ; SSE2-LABEL: sitofpv4i64v4double
; SSE2: cost of 40 {{.*}} sitofp
;
- ; AVX1: sitofpv4i64v4double
+ ; AVX1-LABEL: sitofpv4i64v4double
; AVX1: cost of 10 {{.*}} sitofp
;
- ; AVX2: sitofpv4i64v4double
+ ; AVX2-LABEL: sitofpv4i64v4double
; AVX2: cost of 10 {{.*}} sitofp
;
- ; AVX512F: sitofpv4i64v4double
+ ; AVX512F-LABEL: sitofpv4i64v4double
; AVX512F: cost of 10 {{.*}} sitofp
%1 = sitofp <4 x i64> %a to <4 x double>
ret <4 x double> %1
}
define <8 x double> @sitofpv8i64v8double(<8 x i64> %a) {
- ; SSE2: sitofpv8i64v8double
+ ; SSE2-LABEL: sitofpv8i64v8double
; SSE2: cost of 80 {{.*}} sitofp
;
- ; AVX1: sitofpv8i64v8double
+ ; AVX1-LABEL: sitofpv8i64v8double
; AVX1: cost of 20 {{.*}} sitofp
;
- ; AVX2: sitofpv8i64v8double
+ ; AVX2-LABEL: sitofpv8i64v8double
; AVX2: cost of 20 {{.*}} sitofp
;
- ; AVX512F: sitofpv8i64v8double
+ ; AVX512F-LABEL: sitofpv8i64v8double
; AVX512F: cost of 22 {{.*}} sitofp
%1 = sitofp <8 x i64> %a to <8 x double>
ret <8 x double> %1
}
define <16 x double> @sitofpv16i64v16double(<16 x i64> %a) {
- ; SSE2: sitofpv16i64v16double
+ ; SSE2-LABEL: sitofpv16i64v16double
; SSE2: cost of 160 {{.*}} sitofp
;
- ; AVX1: sitofpv16i64v16double
+ ; AVX1-LABEL: sitofpv16i64v16double
; AVX1: cost of 40 {{.*}} sitofp
;
- ; AVX2: sitofpv16i64v16double
+ ; AVX2-LABEL: sitofpv16i64v16double
; AVX2: cost of 40 {{.*}} sitofp
;
- ; AVX512F: sitofpv16i64v16double
+ ; AVX512F-LABEL: sitofpv16i64v16double
; AVX512F: cost of 44 {{.*}} sitofp
%1 = sitofp <16 x i64> %a to <16 x double>
ret <16 x double> %1
}
define <32 x double> @sitofpv32i64v32double(<32 x i64> %a) {
- ; SSE2: sitofpv32i64v32double
+ ; SSE2-LABEL: sitofpv32i64v32double
; SSE2: cost of 320 {{.*}} sitofp
;
- ; AVX1: sitofpv32i64v32double
+ ; AVX1-LABEL: sitofpv32i64v32double
; AVX1: cost of 80 {{.*}} sitofp
;
- ; AVX2: sitofpv32i64v32double
+ ; AVX2-LABEL: sitofpv32i64v32double
; AVX2: cost of 80 {{.*}} sitofp
;
- ; AVX512F: sitofpv32i64v32double
+ ; AVX512F-LABEL: sitofpv32i64v32double
; AVX512F: cost of 88 {{.*}} sitofp
%1 = sitofp <32 x i64> %a to <32 x double>
ret <32 x double> %1
}
define <2 x float> @sitofpv2i8v2float(<2 x i8> %a) {
- ; SSE2: sitofpv2i8v2float
+ ; SSE2-LABEL: sitofpv2i8v2float
; SSE2: cost of 15 {{.*}} sitofp
;
- ; AVX1: sitofpv2i8v2float
+ ; AVX1-LABEL: sitofpv2i8v2float
; AVX1: cost of 4 {{.*}} sitofp
;
- ; AVX2: sitofpv2i8v2float
+ ; AVX2-LABEL: sitofpv2i8v2float
; AVX2: cost of 4 {{.*}} sitofp
;
- ; AVX512F: sitofpv2i8v2float
+ ; AVX512F-LABEL: sitofpv2i8v2float
; AVX512F: cost of 4 {{.*}} sitofp
%1 = sitofp <2 x i8> %a to <2 x float>
ret <2 x float> %1
}
define <4 x float> @sitofpv4i8v4float(<4 x i8> %a) {
- ; SSE2: sitofpv4i8v4float
+ ; SSE2-LABEL: sitofpv4i8v4float
; SSE2: cost of 15 {{.*}} sitofp
;
- ; AVX1: sitofpv4i8v4float
+ ; AVX1-LABEL: sitofpv4i8v4float
; AVX1: cost of 3 {{.*}} sitofp
;
- ; AVX2: sitofpv4i8v4float
+ ; AVX2-LABEL: sitofpv4i8v4float
; AVX2: cost of 3 {{.*}} sitofp
;
- ; AVX512F: sitofpv4i8v4float
+ ; AVX512F-LABEL: sitofpv4i8v4float
; AVX512F: cost of 3 {{.*}} sitofp
%1 = sitofp <4 x i8> %a to <4 x float>
ret <4 x float> %1
}
define <8 x float> @sitofpv8i8v8float(<8 x i8> %a) {
- ; SSE2: sitofpv8i8v8float
+ ; SSE2-LABEL: sitofpv8i8v8float
; SSE2: cost of 15 {{.*}} sitofp
;
- ; AVX1: sitofpv8i8v8float
+ ; AVX1-LABEL: sitofpv8i8v8float
; AVX1: cost of 8 {{.*}} sitofp
;
- ; AVX2: sitofpv8i8v8float
+ ; AVX2-LABEL: sitofpv8i8v8float
; AVX2: cost of 8 {{.*}} sitofp
;
- ; AVX512F: sitofpv8i8v8float
+ ; AVX512F-LABEL: sitofpv8i8v8float
; AVX512F: cost of 8 {{.*}} sitofp
%1 = sitofp <8 x i8> %a to <8 x float>
ret <8 x float> %1
}
define <16 x float> @sitofpv16i8v16float(<16 x i8> %a) {
- ; SSE2: sitofpv16i8v16float
+ ; SSE2-LABEL: sitofpv16i8v16float
; SSE2: cost of 8 {{.*}} sitofp
;
- ; AVX1: sitofpv16i8v16float
+ ; AVX1-LABEL: sitofpv16i8v16float
; AVX1: cost of 44 {{.*}} sitofp
;
- ; AVX2: sitofpv16i8v16float
+ ; AVX2-LABEL: sitofpv16i8v16float
; AVX2: cost of 44 {{.*}} sitofp
;
- ; AVX512F: sitofpv16i8v16float
+ ; AVX512F-LABEL: sitofpv16i8v16float
; AVX512F: cost of 2 {{.*}} sitofp
%1 = sitofp <16 x i8> %a to <16 x float>
ret <16 x float> %1
}
define <32 x float> @sitofpv32i8v32float(<32 x i8> %a) {
- ; SSE2: sitofpv32i8v32float
+ ; SSE2-LABEL: sitofpv32i8v32float
; SSE2: cost of 16 {{.*}} sitofp
;
- ; AVX1: sitofpv32i8v32float
+ ; AVX1-LABEL: sitofpv32i8v32float
; AVX1: cost of 88 {{.*}} sitofp
;
- ; AVX2: sitofpv32i8v32float
+ ; AVX2-LABEL: sitofpv32i8v32float
; AVX2: cost of 88 {{.*}} sitofp
;
- ; AVX512F: sitofpv32i8v32float
+ ; AVX512F-LABEL: sitofpv32i8v32float
; AVX512F: cost of 92 {{.*}} sitofp
%1 = sitofp <32 x i8> %a to <32 x float>
ret <32 x float> %1
}
define <2 x float> @sitofpv2i16v2float(<2 x i16> %a) {
- ; SSE2: sitofpv2i16v2float
+ ; SSE2-LABEL: sitofpv2i16v2float
; SSE2: cost of 15 {{.*}} sitofp
;
- ; AVX1: sitofpv2i16v2float
+ ; AVX1-LABEL: sitofpv2i16v2float
; AVX1: cost of 4 {{.*}} sitofp
;
- ; AVX2: sitofpv2i16v2float
+ ; AVX2-LABEL: sitofpv2i16v2float
; AVX2: cost of 4 {{.*}} sitofp
;
- ; AVX512F: sitofpv2i16v2float
+ ; AVX512F-LABEL: sitofpv2i16v2float
; AVX512F: cost of 4 {{.*}} sitofp
%1 = sitofp <2 x i16> %a to <2 x float>
ret <2 x float> %1
}
define <4 x float> @sitofpv4i16v4float(<4 x i16> %a) {
- ; SSE2: sitofpv4i16v4float
+ ; SSE2-LABEL: sitofpv4i16v4float
; SSE2: cost of 15 {{.*}} sitofp
;
- ; AVX1: sitofpv4i16v4float
+ ; AVX1-LABEL: sitofpv4i16v4float
; AVX1: cost of 3 {{.*}} sitofp
;
- ; AVX2: sitofpv4i16v4float
+ ; AVX2-LABEL: sitofpv4i16v4float
; AVX2: cost of 3 {{.*}} sitofp
;
- ; AVX512F: sitofpv4i16v4float
+ ; AVX512F-LABEL: sitofpv4i16v4float
; AVX512F: cost of 3 {{.*}} sitofp
%1 = sitofp <4 x i16> %a to <4 x float>
ret <4 x float> %1
}
define <8 x float> @sitofpv8i16v8float(<8 x i16> %a) {
- ; SSE2: sitofpv8i16v8float
+ ; SSE2-LABEL: sitofpv8i16v8float
; SSE2: cost of 15 {{.*}} sitofp
;
- ; AVX1: sitofpv8i16v8float
+ ; AVX1-LABEL: sitofpv8i16v8float
; AVX1: cost of 5 {{.*}} sitofp
;
- ; AVX2: sitofpv8i16v8float
+ ; AVX2-LABEL: sitofpv8i16v8float
; AVX2: cost of 5 {{.*}} sitofp
;
- ; AVX512F: sitofpv8i16v8float
+ ; AVX512F-LABEL: sitofpv8i16v8float
; AVX512F: cost of 5 {{.*}} sitofp
%1 = sitofp <8 x i16> %a to <8 x float>
ret <8 x float> %1
}
define <16 x float> @sitofpv16i16v16float(<16 x i16> %a) {
- ; SSE2: sitofpv16i16v16float
+ ; SSE2-LABEL: sitofpv16i16v16float
; SSE2: cost of 30 {{.*}} sitofp
;
- ; AVX1: sitofpv16i16v16float
+ ; AVX1-LABEL: sitofpv16i16v16float
; AVX1: cost of 44 {{.*}} sitofp
;
- ; AVX2: sitofpv16i16v16float
+ ; AVX2-LABEL: sitofpv16i16v16float
; AVX2: cost of 44 {{.*}} sitofp
;
- ; AVX512F: sitofpv16i16v16float
+ ; AVX512F-LABEL: sitofpv16i16v16float
; AVX512F: cost of 2 {{.*}} sitofp
%1 = sitofp <16 x i16> %a to <16 x float>
ret <16 x float> %1
}
define <32 x float> @sitofpv32i16v32float(<32 x i16> %a) {
- ; SSE2: sitofpv32i16v32float
+ ; SSE2-LABEL: sitofpv32i16v32float
; SSE2: cost of 60 {{.*}} sitofp
;
- ; AVX1: sitofpv32i16v32float
+ ; AVX1-LABEL: sitofpv32i16v32float
; AVX1: cost of 88 {{.*}} sitofp
;
- ; AVX2: sitofpv32i16v32float
+ ; AVX2-LABEL: sitofpv32i16v32float
; AVX2: cost of 88 {{.*}} sitofp
;
- ; AVX512F: sitofpv32i16v32float
- ; AVX512F: cost of 2 {{.*}} sitofp
+ ; AVX512F-LABEL: sitofpv32i16v32float
+ ; AVX512F: cost of 92 {{.*}} sitofp
%1 = sitofp <32 x i16> %a to <32 x float>
ret <32 x float> %1
}
define <2 x float> @sitofpv2i32v2float(<2 x i32> %a) {
- ; SSE2: sitofpv2i32v2float
+ ; SSE2-LABEL: sitofpv2i32v2float
; SSE2: cost of 15 {{.*}} sitofp
;
- ; AVX1: sitofpv2i32v2float
+ ; AVX1-LABEL: sitofpv2i32v2float
; AVX1: cost of 4 {{.*}} sitofp
;
- ; AVX2: sitofpv2i32v2float
+ ; AVX2-LABEL: sitofpv2i32v2float
; AVX2: cost of 4 {{.*}} sitofp
;
- ; AVX512F: sitofpv2i32v2float
+ ; AVX512F-LABEL: sitofpv2i32v2float
; AVX512F: cost of 4 {{.*}} sitofp
%1 = sitofp <2 x i32> %a to <2 x float>
ret <2 x float> %1
}
define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
- ; SSE2: sitofpv4i32v4float
+ ; SSE2-LABEL: sitofpv4i32v4float
; SSE2: cost of 15 {{.*}} sitofp
;
- ; AVX1: sitofpv4i32v4float
+ ; AVX1-LABEL: sitofpv4i32v4float
; AVX1: cost of 1 {{.*}} sitofp
;
- ; AVX2: sitofpv4i32v4float
+ ; AVX2-LABEL: sitofpv4i32v4float
; AVX2: cost of 1 {{.*}} sitofp
;
- ; AVX512F: sitofpv4i32v4float
+ ; AVX512F-LABEL: sitofpv4i32v4float
; AVX512F: cost of 1 {{.*}} sitofp
%1 = sitofp <4 x i32> %a to <4 x float>
ret <4 x float> %1
}
define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
- ; SSE2: sitofpv8i32v8float
+ ; SSE2-LABEL: sitofpv8i32v8float
; SSE2: cost of 30 {{.*}} sitofp
;
- ; AVX1: sitofpv8i32v8float
+ ; AVX1-LABEL: sitofpv8i32v8float
; AVX1: cost of 1 {{.*}} sitofp
;
- ; AVX2: sitofpv8i32v8float
+ ; AVX2-LABEL: sitofpv8i32v8float
; AVX2: cost of 1 {{.*}} sitofp
;
- ; AVX512F: sitofpv8i32v8float
+ ; AVX512F-LABEL: sitofpv8i32v8float
; AVX512F: cost of 1 {{.*}} sitofp
%1 = sitofp <8 x i32> %a to <8 x float>
ret <8 x float> %1
}
define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
- ; SSE2: sitofpv16i32v16float
+ ; SSE2-LABEL: sitofpv16i32v16float
; SSE2: cost of 60 {{.*}} sitofp
;
- ; AVX1: sitofpv16i32v16float
+ ; AVX1-LABEL: sitofpv16i32v16float
; AVX1: cost of 44 {{.*}} sitofp
;
- ; AVX2: sitofpv16i32v16float
+ ; AVX2-LABEL: sitofpv16i32v16float
; AVX2: cost of 44 {{.*}} sitofp
;
- ; AVX512F: sitofpv16i32v16float
+ ; AVX512F-LABEL: sitofpv16i32v16float
; AVX512F: cost of 1 {{.*}} sitofp
%1 = sitofp <16 x i32> %a to <16 x float>
ret <16 x float> %1
}
define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) {
- ; SSE2: sitofpv32i32v32float
+ ; SSE2-LABEL: sitofpv32i32v32float
; SSE2: cost of 120 {{.*}} sitofp
;
- ; AVX1: sitofpv32i32v32float
+ ; AVX1-LABEL: sitofpv32i32v32float
; AVX1: cost of 88 {{.*}} sitofp
;
- ; AVX2: sitofpv32i32v32float
+ ; AVX2-LABEL: sitofpv32i32v32float
; AVX2: cost of 88 {{.*}} sitofp
;
- ; AVX512F: sitofpv32i32v32float
- ; AVX512F: cost of 1 {{.*}} sitofp
+ ; AVX512F-LABEL: sitofpv32i32v32float
+ ; AVX512F: cost of 92 {{.*}} sitofp
%1 = sitofp <32 x i32> %a to <32 x float>
ret <32 x float> %1
}
define <2 x float> @sitofpv2i64v2float(<2 x i64> %a) {
- ; SSE2: sitofpv2i64v2float
+ ; SSE2-LABEL: sitofpv2i64v2float
; SSE2: cost of 15 {{.*}} sitofp
;
- ; AVX1: sitofpv2i64v2float
+ ; AVX1-LABEL: sitofpv2i64v2float
; AVX1: cost of 4 {{.*}} sitofp
;
- ; AVX2: sitofpv2i64v2float
+ ; AVX2-LABEL: sitofpv2i64v2float
; AVX2: cost of 4 {{.*}} sitofp
;
- ; AVX512F: sitofpv2i64v2float
+ ; AVX512F-LABEL: sitofpv2i64v2float
; AVX512F: cost of 4 {{.*}} sitofp
%1 = sitofp <2 x i64> %a to <2 x float>
ret <2 x float> %1
}
define <4 x float> @sitofpv4i64v4float(<4 x i64> %a) {
- ; SSE2: sitofpv4i64v4float
+ ; SSE2-LABEL: sitofpv4i64v4float
; SSE2: cost of 30 {{.*}} sitofp
;
- ; AVX1: sitofpv4i64v4float
+ ; AVX1-LABEL: sitofpv4i64v4float
; AVX1: cost of 10 {{.*}} sitofp
;
- ; AVX2: sitofpv4i64v4float
+ ; AVX2-LABEL: sitofpv4i64v4float
; AVX2: cost of 10 {{.*}} sitofp
;
- ; AVX512F: sitofpv4i64v4float
+ ; AVX512F-LABEL: sitofpv4i64v4float
; AVX512F: cost of 10 {{.*}} sitofp
%1 = sitofp <4 x i64> %a to <4 x float>
ret <4 x float> %1
}
define <8 x float> @sitofpv8i64v8float(<8 x i64> %a) {
- ; SSE2: sitofpv8i64v8float
+ ; SSE2-LABEL: sitofpv8i64v8float
; SSE2: cost of 60 {{.*}} sitofp
;
- ; AVX1: sitofpv8i64v8float
+ ; AVX1-LABEL: sitofpv8i64v8float
; AVX1: cost of 22 {{.*}} sitofp
;
- ; AVX2: sitofpv8i64v8float
+ ; AVX2-LABEL: sitofpv8i64v8float
; AVX2: cost of 22 {{.*}} sitofp
;
- ; AVX512F: sitofpv8i64v8float
+ ; AVX512F-LABEL: sitofpv8i64v8float
; AVX512F: cost of 22 {{.*}} sitofp
%1 = sitofp <8 x i64> %a to <8 x float>
ret <8 x float> %1
}
define <16 x float> @sitofpv16i64v16float(<16 x i64> %a) {
- ; SSE2: sitofpv16i64v16float
+ ; SSE2-LABEL: sitofpv16i64v16float
; SSE2: cost of 120 {{.*}} sitofp
;
- ; AVX1: sitofpv16i64v16float
+ ; AVX1-LABEL: sitofpv16i64v16float
; AVX1: cost of 44 {{.*}} sitofp
;
- ; AVX2: sitofpv16i64v16float
+ ; AVX2-LABEL: sitofpv16i64v16float
; AVX2: cost of 44 {{.*}} sitofp
;
- ; AVX512F: sitofpv16i64v16float
+ ; AVX512F-LABEL: sitofpv16i64v16float
; AVX512F: cost of 46 {{.*}} sitofp
%1 = sitofp <16 x i64> %a to <16 x float>
ret <16 x float> %1
}
define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) {
- ; SSE2: sitofpv32i64v32float
+ ; SSE2-LABEL: sitofpv32i64v32float
; SSE2: cost of 240 {{.*}} sitofp
;
- ; AVX1: sitofpv32i64v32float
+ ; AVX1-LABEL: sitofpv32i64v32float
; AVX1: cost of 88 {{.*}} sitofp
;
- ; AVX2: sitofpv32i64v32float
+ ; AVX2-LABEL: sitofpv32i64v32float
; AVX2: cost of 88 {{.*}} sitofp
;
- ; AVX512F: sitofpv32i64v32float
+ ; AVX512F-LABEL: sitofpv32i64v32float
; AVX512F: cost of 92 {{.*}} sitofp
%1 = sitofp <32 x i64> %a to <32 x float>
ret <32 x float> %1
}
define <8 x double> @sitofpv8i1v8double(<8 x double> %a) {
- ; SSE2: sitofpv8i1v8double
+ ; SSE2-LABEL: sitofpv8i1v8double
; SSE2: cost of 80 {{.*}} sitofp
;
- ; AVX1: sitofpv8i1v8double
+ ; AVX1-LABEL: sitofpv8i1v8double
; AVX1: cost of 20 {{.*}} sitofp
;
- ; AVX2: sitofpv8i1v8double
+ ; AVX2-LABEL: sitofpv8i1v8double
; AVX2: cost of 20 {{.*}} sitofp
;
- ; AVX512F: sitofpv8i1v8double
+ ; AVX512F-LABEL: sitofpv8i1v8double
; AVX512F: cost of 4 {{.*}} sitofp
%cmpres = fcmp ogt <8 x double> %a, zeroinitializer
%1 = sitofp <8 x i1> %cmpres to <8 x double>
@@ -661,16 +661,16 @@ define <8 x double> @sitofpv8i1v8double(<8 x double> %a) {
}
define <16 x float> @sitofpv16i1v16float(<16 x float> %a) {
- ; SSE2: sitofpv16i1v16float
+ ; SSE2-LABEL: sitofpv16i1v16float
; SSE2: cost of 8 {{.*}} sitofp
;
- ; AVX1: sitofpv16i1v16float
+ ; AVX1-LABEL: sitofpv16i1v16float
; AVX1: cost of 44 {{.*}} sitofp
;
- ; AVX2: sitofpv16i1v16float
+ ; AVX2-LABEL: sitofpv16i1v16float
; AVX2: cost of 44 {{.*}} sitofp
;
- ; AVX512F: sitofpv16i1v16float
+ ; AVX512F-LABEL: sitofpv16i1v16float
; AVX512F: cost of 3 {{.*}} sitofp
%cmpres = fcmp ogt <16 x float> %a, zeroinitializer
%1 = sitofp <16 x i1> %cmpres to <16 x float>
diff --git a/test/Analysis/CostModel/X86/sse-itoi.ll b/test/Analysis/CostModel/X86/sse-itoi.ll
new file mode 100644
index 000000000000..9e7f26e54d29
--- /dev/null
+++ b/test/Analysis/CostModel/X86/sse-itoi.ll
@@ -0,0 +1,353 @@
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse4.1 -cost-model -analyze < %s | FileCheck --check-prefix=SSE41 %s
+
+define void @zext_v16i16_to_v16i32(<16 x i16>* %a) {
+; SSE2: zext_v16i16_to_v16i32
+; SSE2: cost of 6 {{.*}} zext
+;
+; SSE41: zext_v16i16_to_v16i32
+; SSE41: cost of 4 {{.*}} zext
+;
+ %1 = load <16 x i16>, <16 x i16>* %a
+ %2 = zext <16 x i16> %1 to <16 x i32>
+ store <16 x i32> %2, <16 x i32>* undef, align 4
+ ret void
+}
+
+define void @sext_v16i16_to_v16i32(<16 x i16>* %a) {
+; SSE2: sext_v16i16_to_v16i32
+; SSE2: cost of 8 {{.*}} sext
+;
+; SSE41: sext_v16i16_to_v16i32
+; SSE41: cost of 4 {{.*}} sext
+;
+ %1 = load <16 x i16>, <16 x i16>* %a
+ %2 = sext <16 x i16> %1 to <16 x i32>
+ store <16 x i32> %2, <16 x i32>* undef, align 4
+ ret void
+}
+
+define void @zext_v8i16_to_v8i32(<8 x i16>* %a) {
+; SSE2: zext_v8i16_to_v8i32
+; SSE2: cost of 3 {{.*}} zext
+;
+; SSE41: zext_v8i16_to_v8i32
+; SSE41: cost of 2 {{.*}} zext
+;
+ %1 = load <8 x i16>, <8 x i16>* %a
+ %2 = zext <8 x i16> %1 to <8 x i32>
+ store <8 x i32> %2, <8 x i32>* undef, align 4
+ ret void
+}
+
+define void @sext_v8i16_to_v8i32(<8 x i16>* %a) {
+; SSE2: sext_v8i16_to_v8i32
+; SSE2: cost of 4 {{.*}} sext
+;
+; SSE41: sext_v8i16_to_v8i32
+; SSE41: cost of 2 {{.*}} sext
+;
+ %1 = load <8 x i16>, <8 x i16>* %a
+ %2 = sext <8 x i16> %1 to <8 x i32>
+ store <8 x i32> %2, <8 x i32>* undef, align 4
+ ret void
+}
+
+define void @zext_v4i16_to_v4i32(<4 x i16>* %a) {
+; SSE2: zext_v4i16_to_v4i32
+; SSE2: cost of 1 {{.*}} zext
+;
+; SSE41: zext_v4i16_to_v4i32
+; SSE41: cost of 1 {{.*}} zext
+;
+ %1 = load <4 x i16>, <4 x i16>* %a
+ %2 = zext <4 x i16> %1 to <4 x i32>
+ store <4 x i32> %2, <4 x i32>* undef, align 4
+ ret void
+}
+
+define void @sext_v4i16_to_v4i32(<4 x i16>* %a) {
+; SSE2: sext_v4i16_to_v4i32
+; SSE2: cost of 2 {{.*}} sext
+;
+; SSE41: sext_v4i16_to_v4i32
+; SSE41: cost of 1 {{.*}} sext
+;
+ %1 = load <4 x i16>, <4 x i16>* %a
+ %2 = sext <4 x i16> %1 to <4 x i32>
+ store <4 x i32> %2, <4 x i32>* undef, align 4
+ ret void
+}
+
+define void @zext_v16i8_to_v16i32(<16 x i8>* %a) {
+; SSE2: zext_v16i8_to_v16i32
+; SSE2: cost of 9 {{.*}} zext
+;
+; SSE41: zext_v16i8_to_v16i32
+; SSE41: cost of 4 {{.*}} zext
+;
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = zext <16 x i8> %1 to <16 x i32>
+ store <16 x i32> %2, <16 x i32>* undef, align 4
+ ret void
+}
+
+define void @sext_v16i8_to_v16i32(<16 x i8>* %a) {
+; SSE2: sext_v16i8_to_v16i32
+; SSE2: cost of 12 {{.*}} sext
+;
+; SSE41: sext_v16i8_to_v16i32
+; SSE41: cost of 4 {{.*}} sext
+;
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = sext <16 x i8> %1 to <16 x i32>
+ store <16 x i32> %2, <16 x i32>* undef, align 4
+ ret void
+}
+
+define void @zext_v8i8_to_v8i32(<8 x i8>* %a) {
+; SSE2: zext_v8i8_to_v8i32
+; SSE2: cost of 6 {{.*}} zext
+;
+; SSE41: zext_v8i8_to_v8i32
+; SSE41: cost of 2 {{.*}} zext
+;
+ %1 = load <8 x i8>, <8 x i8>* %a
+ %2 = zext <8 x i8> %1 to <8 x i32>
+ store <8 x i32> %2, <8 x i32>* undef, align 4
+ ret void
+}
+
+define void @sext_v8i8_to_v8i32(<8 x i8>* %a) {
+; SSE2: sext_v8i8_to_v8i32
+; SSE2: cost of 6 {{.*}} sext
+;
+; SSE41: sext_v8i8_to_v8i32
+; SSE41: cost of 2 {{.*}} sext
+;
+ %1 = load <8 x i8>, <8 x i8>* %a
+ %2 = sext <8 x i8> %1 to <8 x i32>
+ store <8 x i32> %2, <8 x i32>* undef, align 4
+ ret void
+}
+
+define void @zext_v4i8_to_v4i32(<4 x i8>* %a) {
+; SSE2: zext_v4i8_to_v4i32
+; SSE2: cost of 2 {{.*}} zext
+;
+; SSE41: zext_v4i8_to_v4i32
+; SSE41: cost of 1 {{.*}} zext
+;
+ %1 = load <4 x i8>, <4 x i8>* %a
+ %2 = zext <4 x i8> %1 to <4 x i32>
+ store <4 x i32> %2, <4 x i32>* undef, align 4
+ ret void
+}
+
+define void @sext_v4i8_to_v4i32(<4 x i8>* %a) {
+; SSE2: sext_v4i8_to_v4i32
+; SSE2: cost of 3 {{.*}} sext
+;
+; SSE41: sext_v4i8_to_v4i32
+; SSE41: cost of 1 {{.*}} sext
+;
+ %1 = load <4 x i8>, <4 x i8>* %a
+ %2 = sext <4 x i8> %1 to <4 x i32>
+ store <4 x i32> %2, <4 x i32>* undef, align 4
+ ret void
+}
+
+define void @zext_v16i8_to_v16i16(<16 x i8>* %a) {
+; SSE2: zext_v16i8_to_v16i16
+; SSE2: cost of 3 {{.*}} zext
+;
+; SSE41: zext_v16i8_to_v16i16
+; SSE41: cost of 2 {{.*}} zext
+;
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = zext <16 x i8> %1 to <16 x i16>
+ store <16 x i16> %2, <16 x i16>* undef, align 4
+ ret void
+}
+
+define void @sext_v16i8_to_v16i16(<16 x i8>* %a) {
+; SSE2: sext_v16i8_to_v16i16
+; SSE2: cost of 4 {{.*}} sext
+;
+; SSE41: sext_v16i8_to_v16i16
+; SSE41: cost of 2 {{.*}} sext
+;
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = sext <16 x i8> %1 to <16 x i16>
+ store <16 x i16> %2, <16 x i16>* undef, align 4
+ ret void
+}
+
+define void @zext_v8i8_to_v8i16(<8 x i8>* %a) {
+; SSE2: zext_v8i8_to_v8i16
+; SSE2: cost of 1 {{.*}} zext
+;
+; SSE41: zext_v8i8_to_v8i16
+; SSE41: cost of 1 {{.*}} zext
+;
+ %1 = load <8 x i8>, <8 x i8>* %a
+ %2 = zext <8 x i8> %1 to <8 x i16>
+ store <8 x i16> %2, <8 x i16>* undef, align 4
+ ret void
+}
+
+define void @sext_v8i8_to_v8i16(<8 x i8>* %a) {
+; SSE2: sext_v8i8_to_v8i16
+; SSE2: cost of 2 {{.*}} sext
+;
+; SSE41: sext_v8i8_to_v8i16
+; SSE41: cost of 1 {{.*}} sext
+;
+ %1 = load <8 x i8>, <8 x i8>* %a
+ %2 = sext <8 x i8> %1 to <8 x i16>
+ store <8 x i16> %2, <8 x i16>* undef, align 4
+ ret void
+}
+
+define void @zext_v4i8_to_v4i16(<4 x i8>* %a) {
+; SSE2: zext_v4i8_to_v4i16
+; SSE2: cost of 1 {{.*}} zext
+;
+; SSE41: zext_v4i8_to_v4i16
+; SSE41: cost of 1 {{.*}} zext
+;
+ %1 = load <4 x i8>, <4 x i8>* %a
+ %2 = zext <4 x i8> %1 to <4 x i16>
+ store <4 x i16> %2, <4 x i16>* undef, align 4
+ ret void
+}
+
+define void @sext_v4i8_to_v4i16(<4 x i8>* %a) {
+; SSE2: sext_v4i8_to_v4i16
+; SSE2: cost of 6 {{.*}} sext
+;
+; SSE41: sext_v4i8_to_v4i16
+; SSE41: cost of 2 {{.*}} sext
+;
+ %1 = load <4 x i8>, <4 x i8>* %a
+ %2 = sext <4 x i8> %1 to <4 x i16>
+ store <4 x i16> %2, <4 x i16>* undef, align 4
+ ret void
+}
+
+define void @truncate_v16i32_to_v16i16(<16 x i32>* %a) {
+; SSE2: truncate_v16i32_to_v16i16
+; SSE2: cost of 10 {{.*}} trunc
+;
+; SSE41: truncate_v16i32_to_v16i16
+; SSE41: cost of 6 {{.*}} trunc
+;
+ %1 = load <16 x i32>, <16 x i32>* %a
+ %2 = trunc <16 x i32> %1 to <16 x i16>
+ store <16 x i16> %2, <16 x i16>* undef, align 4
+ ret void
+}
+
+define void @truncate_v8i32_to_v8i16(<8 x i32>* %a) {
+; SSE2: truncate_v8i32_to_v8i16
+; SSE2: cost of 5 {{.*}} trunc
+;
+; SSE41: truncate_v8i32_to_v8i16
+; SSE41: cost of 3 {{.*}} trunc
+;
+ %1 = load <8 x i32>, <8 x i32>* %a
+ %2 = trunc <8 x i32> %1 to <8 x i16>
+ store <8 x i16> %2, <8 x i16>* undef, align 4
+ ret void
+}
+
+define void @truncate_v4i32_to_v4i16(<4 x i32>* %a) {
+; SSE2: truncate_v4i32_to_v4i16
+; SSE2: cost of 3 {{.*}} trunc
+;
+; SSE41: truncate_v4i32_to_v4i16
+; SSE41: cost of 1 {{.*}} trunc
+;
+ %1 = load <4 x i32>, <4 x i32>* %a
+ %2 = trunc <4 x i32> %1 to <4 x i16>
+ store <4 x i16> %2, <4 x i16>* undef, align 4
+ ret void
+}
+
+define void @truncate_v16i32_to_v16i8(<16 x i32>* %a) {
+; SSE2: truncate_v16i32_to_v16i8
+; SSE2: cost of 7 {{.*}} trunc
+;
+; SSE41: truncate_v16i32_to_v16i8
+; SSE41: cost of 30 {{.*}} trunc
+;
+ %1 = load <16 x i32>, <16 x i32>* %a
+ %2 = trunc <16 x i32> %1 to <16 x i8>
+ store <16 x i8> %2, <16 x i8>* undef, align 4
+ ret void
+}
+
+define void @truncate_v8i32_to_v8i8(<8 x i32>* %a) {
+; SSE2: truncate_v8i32_to_v8i8
+; SSE2: cost of 4 {{.*}} trunc
+;
+; SSE41: truncate_v8i32_to_v8i8
+; SSE41: cost of 3 {{.*}} trunc
+;
+ %1 = load <8 x i32>, <8 x i32>* %a
+ %2 = trunc <8 x i32> %1 to <8 x i8>
+ store <8 x i8> %2, <8 x i8>* undef, align 4
+ ret void
+}
+
+define void @truncate_v4i32_to_v4i8(<4 x i32>* %a) {
+; SSE2: truncate_v4i32_to_v4i8
+; SSE2: cost of 3 {{.*}} trunc
+;
+; SSE41: truncate_v4i32_to_v4i8
+; SSE41: cost of 1 {{.*}} trunc
+;
+ %1 = load <4 x i32>, <4 x i32>* %a
+ %2 = trunc <4 x i32> %1 to <4 x i8>
+ store <4 x i8> %2, <4 x i8>* undef, align 4
+ ret void
+}
+
+define void @truncate_v16i16_to_v16i8(<16 x i16>* %a) {
+; SSE2: truncate_v16i16_to_v16i8
+; SSE2: cost of 3 {{.*}} trunc
+;
+; SSE41: truncate_v16i16_to_v16i8
+; SSE41: cost of 3 {{.*}} trunc
+;
+ %1 = load <16 x i16>, <16 x i16>* %a
+ %2 = trunc <16 x i16> %1 to <16 x i8>
+ store <16 x i8> %2, <16 x i8>* undef, align 4
+ ret void
+}
+
+define void @truncate_v8i16_to_v8i8(<8 x i16>* %a) {
+; SSE2: truncate_v8i16_to_v8i8
+; SSE2: cost of 2 {{.*}} trunc
+;
+; SSE41: truncate_v8i16_to_v8i8
+; SSE41: cost of 1 {{.*}} trunc
+;
+ %1 = load <8 x i16>, <8 x i16>* %a
+ %2 = trunc <8 x i16> %1 to <8 x i8>
+ store <8 x i8> %2, <8 x i8>* undef, align 4
+ ret void
+}
+
+define void @truncate_v4i16_to_v4i8(<4 x i16>* %a) {
+; SSE2: truncate_v4i16_to_v4i8
+; SSE2: cost of 4 {{.*}} trunc
+;
+; SSE41: truncate_v4i16_to_v4i8
+; SSE41: cost of 2 {{.*}} trunc
+;
+ %1 = load <4 x i16>, <4 x i16>* %a
+ %2 = trunc <4 x i16> %1 to <4 x i8>
+ store <4 x i8> %2, <4 x i8>* undef, align 4
+ ret void
+}
diff --git a/test/Analysis/CostModel/X86/testshiftashr.ll b/test/Analysis/CostModel/X86/testshiftashr.ll
index da4e7d466e2b..13f2bd2019d3 100644
--- a/test/Analysis/CostModel/X86/testshiftashr.ll
+++ b/test/Analysis/CostModel/X86/testshiftashr.ll
@@ -5,9 +5,9 @@
define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
entry:
; SSE2: shift2i16
- ; SSE2: cost of 20 {{.*}} ashr
+ ; SSE2: cost of 12 {{.*}} ashr
; SSE2-CODEGEN: shift2i16
- ; SSE2-CODEGEN: sarq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = ashr %shifttype %a , %b
ret %shifttype %0
@@ -65,9 +65,9 @@ entry:
define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
entry:
; SSE2: shift2i32
- ; SSE2: cost of 20 {{.*}} ashr
+ ; SSE2: cost of 12 {{.*}} ashr
; SSE2-CODEGEN: shift2i32
- ; SSE2-CODEGEN: sarq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = ashr %shifttype2i32 %a , %b
ret %shifttype2i32 %0
@@ -125,9 +125,9 @@ entry:
define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
entry:
; SSE2: shift2i64
- ; SSE2: cost of 20 {{.*}} ashr
+ ; SSE2: cost of 12 {{.*}} ashr
; SSE2-CODEGEN: shift2i64
- ; SSE2-CODEGEN: sarq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = ashr %shifttype2i64 %a , %b
ret %shifttype2i64 %0
@@ -137,9 +137,9 @@ entry:
define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
entry:
; SSE2: shift4i64
- ; SSE2: cost of 40 {{.*}} ashr
+ ; SSE2: cost of 24 {{.*}} ashr
; SSE2-CODEGEN: shift4i64
- ; SSE2-CODEGEN: sarq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = ashr %shifttype4i64 %a , %b
ret %shifttype4i64 %0
@@ -149,9 +149,9 @@ entry:
define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
entry:
; SSE2: shift8i64
- ; SSE2: cost of 80 {{.*}} ashr
+ ; SSE2: cost of 48 {{.*}} ashr
; SSE2-CODEGEN: shift8i64
- ; SSE2-CODEGEN: sarq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = ashr %shifttype8i64 %a , %b
ret %shifttype8i64 %0
@@ -161,9 +161,9 @@ entry:
define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
entry:
; SSE2: shift16i64
- ; SSE2: cost of 160 {{.*}} ashr
+ ; SSE2: cost of 96 {{.*}} ashr
; SSE2-CODEGEN: shift16i64
- ; SSE2-CODEGEN: sarq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = ashr %shifttype16i64 %a , %b
ret %shifttype16i64 %0
@@ -173,9 +173,9 @@ entry:
define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
entry:
; SSE2: shift32i64
- ; SSE2: cost of 320 {{.*}} ashr
+ ; SSE2: cost of 192 {{.*}} ashr
; SSE2-CODEGEN: shift32i64
- ; SSE2-CODEGEN: sarq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = ashr %shifttype32i64 %a , %b
ret %shifttype32i64 %0
@@ -185,9 +185,9 @@ entry:
define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
entry:
; SSE2: shift2i8
- ; SSE2: cost of 20 {{.*}} ashr
+ ; SSE2: cost of 12 {{.*}} ashr
; SSE2-CODEGEN: shift2i8
- ; SSE2-CODEGEN: sarq %cl
+ ; SSE2-CODEGEN: psrlq
%0 = ashr %shifttype2i8 %a , %b
ret %shifttype2i8 %0
diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll
index 5775a42d08ad..52f176fe4d63 100644
--- a/test/Analysis/CostModel/X86/testshiftlshr.ll
+++ b/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -5,7 +5,7 @@
define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
entry:
; SSE2: shift2i16
- ; SSE2: cost of 20 {{.*}} lshr
+ ; SSE2: cost of 4 {{.*}} lshr
; SSE2-CODEGEN: shift2i16
; SSE2-CODEGEN: psrlq
@@ -65,7 +65,7 @@ entry:
define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
entry:
; SSE2: shift2i32
- ; SSE2: cost of 20 {{.*}} lshr
+ ; SSE2: cost of 4 {{.*}} lshr
; SSE2-CODEGEN: shift2i32
; SSE2-CODEGEN: psrlq
@@ -125,7 +125,7 @@ entry:
define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
entry:
; SSE2: shift2i64
- ; SSE2: cost of 20 {{.*}} lshr
+ ; SSE2: cost of 4 {{.*}} lshr
; SSE2-CODEGEN: shift2i64
; SSE2-CODEGEN: psrlq
@@ -137,7 +137,7 @@ entry:
define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
entry:
; SSE2: shift4i64
- ; SSE2: cost of 40 {{.*}} lshr
+ ; SSE2: cost of 8 {{.*}} lshr
; SSE2-CODEGEN: shift4i64
; SSE2-CODEGEN: psrlq
@@ -149,7 +149,7 @@ entry:
define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
entry:
; SSE2: shift8i64
- ; SSE2: cost of 80 {{.*}} lshr
+ ; SSE2: cost of 16 {{.*}} lshr
; SSE2-CODEGEN: shift8i64
; SSE2-CODEGEN: psrlq
@@ -161,7 +161,7 @@ entry:
define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
entry:
; SSE2: shift16i64
- ; SSE2: cost of 160 {{.*}} lshr
+ ; SSE2: cost of 32 {{.*}} lshr
; SSE2-CODEGEN: shift16i64
; SSE2-CODEGEN: psrlq
@@ -173,7 +173,7 @@ entry:
define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
entry:
; SSE2: shift32i64
- ; SSE2: cost of 320 {{.*}} lshr
+ ; SSE2: cost of 64 {{.*}} lshr
; SSE2-CODEGEN: shift32i64
; SSE2-CODEGEN: psrlq
@@ -185,7 +185,7 @@ entry:
define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
entry:
; SSE2: shift2i8
- ; SSE2: cost of 20 {{.*}} lshr
+ ; SSE2: cost of 4 {{.*}} lshr
; SSE2-CODEGEN: shift2i8
; SSE2-CODEGEN: psrlq
diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll
index d4e33818932b..e385c5bfeeac 100644
--- a/test/Analysis/CostModel/X86/testshiftshl.ll
+++ b/test/Analysis/CostModel/X86/testshiftshl.ll
@@ -5,7 +5,7 @@
define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
entry:
; SSE2: shift2i16
- ; SSE2: cost of 20 {{.*}} shl
+ ; SSE2: cost of 4 {{.*}} shl
; SSE2-CODEGEN: shift2i16
; SSE2-CODEGEN: psllq
@@ -65,7 +65,7 @@ entry:
define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
entry:
; SSE2: shift2i32
- ; SSE2: cost of 20 {{.*}} shl
+ ; SSE2: cost of 4 {{.*}} shl
; SSE2-CODEGEN: shift2i32
; SSE2-CODEGEN: psllq
@@ -125,7 +125,7 @@ entry:
define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
entry:
; SSE2: shift2i64
- ; SSE2: cost of 20 {{.*}} shl
+ ; SSE2: cost of 4 {{.*}} shl
; SSE2-CODEGEN: shift2i64
; SSE2-CODEGEN: psllq
@@ -137,7 +137,7 @@ entry:
define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
entry:
; SSE2: shift4i64
- ; SSE2: cost of 40 {{.*}} shl
+ ; SSE2: cost of 8 {{.*}} shl
; SSE2-CODEGEN: shift4i64
; SSE2-CODEGEN: psllq
@@ -149,7 +149,7 @@ entry:
define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
entry:
; SSE2: shift8i64
- ; SSE2: cost of 80 {{.*}} shl
+ ; SSE2: cost of 16 {{.*}} shl
; SSE2-CODEGEN: shift8i64
; SSE2-CODEGEN: psllq
@@ -161,7 +161,7 @@ entry:
define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
entry:
; SSE2: shift16i64
- ; SSE2: cost of 160 {{.*}} shl
+ ; SSE2: cost of 32 {{.*}} shl
; SSE2-CODEGEN: shift16i64
; SSE2-CODEGEN: psllq
@@ -173,7 +173,7 @@ entry:
define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
entry:
; SSE2: shift32i64
- ; SSE2: cost of 320 {{.*}} shl
+ ; SSE2: cost of 64 {{.*}} shl
; SSE2-CODEGEN: shift32i64
; SSE2-CODEGEN: psllq
@@ -185,7 +185,7 @@ entry:
define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
entry:
; SSE2: shift2i8
- ; SSE2: cost of 20 {{.*}} shl
+ ; SSE2: cost of 4 {{.*}} shl
; SSE2-CODEGEN: shift2i8
; SSE2-CODEGEN: psllq
diff --git a/test/Analysis/CostModel/X86/uitofp.ll b/test/Analysis/CostModel/X86/uitofp.ll
index 9ffc483e3f5a..08e36650bec4 100644
--- a/test/Analysis/CostModel/X86/uitofp.ll
+++ b/test/Analysis/CostModel/X86/uitofp.ll
@@ -2,644 +2,708 @@
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX1 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx2 -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX2 %s
; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512dq -cost-model -analyze < %s | FileCheck --check-prefix=AVX512DQ %s
define <2 x double> @uitofpv2i8v2double(<2 x i8> %a) {
- ; SSE2: uitofpv2i8v2double
+ ; SSE2-LABEL: uitofpv2i8v2double
; SSE2: cost of 20 {{.*}} uitofp
;
- ; AVX1: uitofpv2i8v2double
+ ; AVX1-LABEL: uitofpv2i8v2double
; AVX1: cost of 4 {{.*}} uitofp
;
- ; AVX2: uitofpv2i8v2double
+ ; AVX2-LABEL: uitofpv2i8v2double
; AVX2: cost of 4 {{.*}} uitofp
;
- ; AVX512F: uitofpv2i8v2double
- ; AVX512F: cost of 4 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv2i8v2double
+ ; AVX512F: cost of 2 {{.*}} uitofp
%1 = uitofp <2 x i8> %a to <2 x double>
ret <2 x double> %1
}
define <4 x double> @uitofpv4i8v4double(<4 x i8> %a) {
- ; SSE2: uitofpv4i8v4double
+ ; SSE2-LABEL: uitofpv4i8v4double
; SSE2: cost of 40 {{.*}} uitofp
;
- ; AVX1: uitofpv4i8v4double
+ ; AVX1-LABEL: uitofpv4i8v4double
; AVX1: cost of 2 {{.*}} uitofp
;
- ; AVX2: uitofpv4i8v4double
+ ; AVX2-LABEL: uitofpv4i8v4double
; AVX2: cost of 2 {{.*}} uitofp
;
- ; AVX512F: uitofpv4i8v4double
+ ; AVX512F-LABEL: uitofpv4i8v4double
; AVX512F: cost of 2 {{.*}} uitofp
%1 = uitofp <4 x i8> %a to <4 x double>
ret <4 x double> %1
}
define <8 x double> @uitofpv8i8v8double(<8 x i8> %a) {
- ; SSE2: uitofpv8i8v8double
+ ; SSE2-LABEL: uitofpv8i8v8double
; SSE2: cost of 80 {{.*}} uitofp
;
- ; AVX1: uitofpv8i8v8double
+ ; AVX1-LABEL: uitofpv8i8v8double
; AVX1: cost of 20 {{.*}} uitofp
;
- ; AVX2: uitofpv8i8v8double
+ ; AVX2-LABEL: uitofpv8i8v8double
; AVX2: cost of 20 {{.*}} uitofp
;
- ; AVX512F: uitofpv8i8v8double
- ; AVX512F: cost of 22 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv8i8v8double
+ ; AVX512F: cost of 2 {{.*}} uitofp
%1 = uitofp <8 x i8> %a to <8 x double>
ret <8 x double> %1
}
define <16 x double> @uitofpv16i8v16double(<16 x i8> %a) {
- ; SSE2: uitofpv16i8v16double
+ ; SSE2-LABEL: uitofpv16i8v16double
; SSE2: cost of 160 {{.*}} uitofp
;
- ; AVX1: uitofpv16i8v16double
+ ; AVX1-LABEL: uitofpv16i8v16double
; AVX1: cost of 40 {{.*}} uitofp
;
- ; AVX2: uitofpv16i8v16double
+ ; AVX2-LABEL: uitofpv16i8v16double
; AVX2: cost of 40 {{.*}} uitofp
;
- ; AVX512F: uitofpv16i8v16double
+ ; AVX512F-LABEL: uitofpv16i8v16double
; AVX512F: cost of 44 {{.*}} uitofp
%1 = uitofp <16 x i8> %a to <16 x double>
ret <16 x double> %1
}
define <32 x double> @uitofpv32i8v32double(<32 x i8> %a) {
- ; SSE2: uitofpv32i8v32double
+ ; SSE2-LABEL: uitofpv32i8v32double
; SSE2: cost of 320 {{.*}} uitofp
;
- ; AVX1: uitofpv32i8v32double
+ ; AVX1-LABEL: uitofpv32i8v32double
; AVX1: cost of 80 {{.*}} uitofp
;
- ; AVX2: uitofpv32i8v32double
+ ; AVX2-LABEL: uitofpv32i8v32double
; AVX2: cost of 80 {{.*}} uitofp
;
- ; AVX512F: uitofpv32i8v32double
+ ; AVX512F-LABEL: uitofpv32i8v32double
; AVX512F: cost of 88 {{.*}} uitofp
%1 = uitofp <32 x i8> %a to <32 x double>
ret <32 x double> %1
}
define <2 x double> @uitofpv2i16v2double(<2 x i16> %a) {
- ; SSE2: uitofpv2i16v2double
+ ; SSE2-LABEL: uitofpv2i16v2double
; SSE2: cost of 20 {{.*}} uitofp
;
- ; AVX1: uitofpv2i16v2double
+ ; AVX1-LABEL: uitofpv2i16v2double
; AVX1: cost of 4 {{.*}} uitofp
;
- ; AVX2: uitofpv2i16v2double
+ ; AVX2-LABEL: uitofpv2i16v2double
; AVX2: cost of 4 {{.*}} uitofp
;
- ; AVX512F: uitofpv2i16v2double
- ; AVX512F: cost of 4 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv2i16v2double
+ ; AVX512F: cost of 5 {{.*}} uitofp
%1 = uitofp <2 x i16> %a to <2 x double>
ret <2 x double> %1
}
define <4 x double> @uitofpv4i16v4double(<4 x i16> %a) {
- ; SSE2: uitofpv4i16v4double
+ ; SSE2-LABEL: uitofpv4i16v4double
; SSE2: cost of 40 {{.*}} uitofp
;
- ; AVX1: uitofpv4i16v4double
+ ; AVX1-LABEL: uitofpv4i16v4double
; AVX1: cost of 2 {{.*}} uitofp
;
- ; AVX2: uitofpv4i16v4double
+ ; AVX2-LABEL: uitofpv4i16v4double
; AVX2: cost of 2 {{.*}} uitofp
;
- ; AVX512F: uitofpv4i16v4double
+ ; AVX512F-LABEL: uitofpv4i16v4double
; AVX512F: cost of 2 {{.*}} uitofp
%1 = uitofp <4 x i16> %a to <4 x double>
ret <4 x double> %1
}
define <8 x double> @uitofpv8i16v8double(<8 x i16> %a) {
- ; SSE2: uitofpv8i16v8double
+ ; SSE2-LABEL: uitofpv8i16v8double
; SSE2: cost of 80 {{.*}} uitofp
;
- ; AVX1: uitofpv8i16v8double
+ ; AVX1-LABEL: uitofpv8i16v8double
; AVX1: cost of 20 {{.*}} uitofp
;
- ; AVX2: uitofpv8i16v8double
+ ; AVX2-LABEL: uitofpv8i16v8double
; AVX2: cost of 20 {{.*}} uitofp
;
- ; AVX512F: uitofpv8i16v8double
- ; AVX512F: cost of 22 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv8i16v8double
+ ; AVX512F: cost of 2 {{.*}} uitofp
%1 = uitofp <8 x i16> %a to <8 x double>
ret <8 x double> %1
}
define <16 x double> @uitofpv16i16v16double(<16 x i16> %a) {
- ; SSE2: uitofpv16i16v16double
+ ; SSE2-LABEL: uitofpv16i16v16double
; SSE2: cost of 160 {{.*}} uitofp
;
- ; AVX1: uitofpv16i16v16double
+ ; AVX1-LABEL: uitofpv16i16v16double
; AVX1: cost of 40 {{.*}} uitofp
;
- ; AVX2: uitofpv16i16v16double
+ ; AVX2-LABEL: uitofpv16i16v16double
; AVX2: cost of 40 {{.*}} uitofp
;
- ; AVX512F: uitofpv16i16v16double
+ ; AVX512F-LABEL: uitofpv16i16v16double
; AVX512F: cost of 44 {{.*}} uitofp
%1 = uitofp <16 x i16> %a to <16 x double>
ret <16 x double> %1
}
define <32 x double> @uitofpv32i16v32double(<32 x i16> %a) {
- ; SSE2: uitofpv32i16v32double
+ ; SSE2-LABEL: uitofpv32i16v32double
; SSE2: cost of 320 {{.*}} uitofp
;
- ; AVX1: uitofpv32i16v32double
+ ; AVX1-LABEL: uitofpv32i16v32double
; AVX1: cost of 80 {{.*}} uitofp
;
- ; AVX2: uitofpv32i16v32double
+ ; AVX2-LABEL: uitofpv32i16v32double
; AVX2: cost of 80 {{.*}} uitofp
;
- ; AVX512F: uitofpv32i16v32double
+ ; AVX512F-LABEL: uitofpv32i16v32double
; AVX512F: cost of 88 {{.*}} uitofp
%1 = uitofp <32 x i16> %a to <32 x double>
ret <32 x double> %1
}
define <2 x double> @uitofpv2i32v2double(<2 x i32> %a) {
- ; SSE2: uitofpv2i32v2double
+ ; SSE2-LABEL: uitofpv2i32v2double
; SSE2: cost of 20 {{.*}} uitofp
;
- ; AVX1: uitofpv2i32v2double
+ ; AVX1-LABEL: uitofpv2i32v2double
; AVX1: cost of 4 {{.*}} uitofp
;
- ; AVX2: uitofpv2i32v2double
+ ; AVX2-LABEL: uitofpv2i32v2double
; AVX2: cost of 4 {{.*}} uitofp
;
- ; AVX512F: uitofpv2i32v2double
+ ; AVX512F-LABEL: uitofpv2i32v2double
; AVX512F: cost of 4 {{.*}} uitofp
%1 = uitofp <2 x i32> %a to <2 x double>
ret <2 x double> %1
}
define <4 x double> @uitofpv4i32v4double(<4 x i32> %a) {
- ; SSE2: uitofpv4i32v4double
+ ; SSE2-LABEL: uitofpv4i32v4double
; SSE2: cost of 40 {{.*}} uitofp
;
- ; AVX1: uitofpv4i32v4double
+ ; AVX1-LABEL: uitofpv4i32v4double
; AVX1: cost of 6 {{.*}} uitofp
;
- ; AVX2: uitofpv4i32v4double
+ ; AVX2-LABEL: uitofpv4i32v4double
; AVX2: cost of 6 {{.*}} uitofp
;
- ; AVX512F: uitofpv4i32v4double
- ; AVX512F: cost of 6 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv4i32v4double
+ ; AVX512F: cost of 1 {{.*}} uitofp
%1 = uitofp <4 x i32> %a to <4 x double>
ret <4 x double> %1
}
define <8 x double> @uitofpv8i32v8double(<8 x i32> %a) {
- ; SSE2: uitofpv8i32v8double
+ ; SSE2-LABEL: uitofpv8i32v8double
; SSE2: cost of 80 {{.*}} uitofp
;
- ; AVX1: uitofpv8i32v8double
+ ; AVX1-LABEL: uitofpv8i32v8double
; AVX1: cost of 20 {{.*}} uitofp
;
- ; AVX2: uitofpv8i32v8double
+ ; AVX2-LABEL: uitofpv8i32v8double
; AVX2: cost of 20 {{.*}} uitofp
;
- ; AVX512F: uitofpv8i32v8double
- ; AVX512F: cost of 22 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv8i32v8double
+ ; AVX512F: cost of 1 {{.*}} uitofp
%1 = uitofp <8 x i32> %a to <8 x double>
ret <8 x double> %1
}
define <16 x double> @uitofpv16i32v16double(<16 x i32> %a) {
- ; SSE2: uitofpv16i32v16double
+ ; SSE2-LABEL: uitofpv16i32v16double
; SSE2: cost of 160 {{.*}} uitofp
;
- ; AVX1: uitofpv16i32v16double
+ ; AVX1-LABEL: uitofpv16i32v16double
; AVX1: cost of 40 {{.*}} uitofp
;
- ; AVX2: uitofpv16i32v16double
+ ; AVX2-LABEL: uitofpv16i32v16double
; AVX2: cost of 40 {{.*}} uitofp
;
- ; AVX512F: uitofpv16i32v16double
+ ; AVX512F-LABEL: uitofpv16i32v16double
; AVX512F: cost of 44 {{.*}} uitofp
%1 = uitofp <16 x i32> %a to <16 x double>
ret <16 x double> %1
}
define <32 x double> @uitofpv32i32v32double(<32 x i32> %a) {
- ; SSE2: uitofpv32i32v32double
+ ; SSE2-LABEL: uitofpv32i32v32double
; SSE2: cost of 320 {{.*}} uitofp
;
- ; AVX1: uitofpv32i32v32double
+ ; AVX1-LABEL: uitofpv32i32v32double
; AVX1: cost of 80 {{.*}} uitofp
;
- ; AVX2: uitofpv32i32v32double
+ ; AVX2-LABEL: uitofpv32i32v32double
; AVX2: cost of 80 {{.*}} uitofp
;
- ; AVX512F: uitofpv32i32v32double
+ ; AVX512F-LABEL: uitofpv32i32v32double
; AVX512F: cost of 88 {{.*}} uitofp
%1 = uitofp <32 x i32> %a to <32 x double>
ret <32 x double> %1
}
define <2 x double> @uitofpv2i64v2double(<2 x i64> %a) {
- ; SSE2: uitofpv2i64v2double
+ ; SSE2-LABEL: uitofpv2i64v2double
; SSE2: cost of 20 {{.*}} uitofp
;
- ; AVX1: uitofpv2i64v2double
+ ; AVX1-LABEL: uitofpv2i64v2double
; AVX1: cost of 20 {{.*}} uitofp
;
- ; AVX2: uitofpv2i64v2double
+ ; AVX2-LABEL: uitofpv2i64v2double
; AVX2: cost of 20 {{.*}} uitofp
;
- ; AVX512F: uitofpv2i64v2double
- ; AVX512F: cost of 20 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv2i64v2double
+ ; AVX512F: cost of 5 {{.*}} uitofp
+ ;
+ ; AVX512DQ: uitofpv2i64v2double
+ ; AVX512DQ: cost of 1 {{.*}} uitofp
%1 = uitofp <2 x i64> %a to <2 x double>
ret <2 x double> %1
}
define <4 x double> @uitofpv4i64v4double(<4 x i64> %a) {
- ; SSE2: uitofpv4i64v4double
+ ; SSE2-LABEL: uitofpv4i64v4double
; SSE2: cost of 40 {{.*}} uitofp
;
- ; AVX1: uitofpv4i64v4double
+ ; AVX1-LABEL: uitofpv4i64v4double
; AVX1: cost of 40 {{.*}} uitofp
;
- ; AVX2: uitofpv4i64v4double
+ ; AVX2-LABEL: uitofpv4i64v4double
; AVX2: cost of 40 {{.*}} uitofp
;
- ; AVX512F: uitofpv4i64v4double
- ; AVX512F: cost of 40 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv4i64v4double
+ ; AVX512F: cost of 12 {{.*}} uitofp
+ ;
+ ; AVX512DQ: uitofpv4i64v4double
+ ; AVX512DQ: cost of 1 {{.*}} uitofp
%1 = uitofp <4 x i64> %a to <4 x double>
ret <4 x double> %1
}
define <8 x double> @uitofpv8i64v8double(<8 x i64> %a) {
- ; SSE2: uitofpv8i64v8double
+ ; SSE2-LABEL: uitofpv8i64v8double
; SSE2: cost of 80 {{.*}} uitofp
;
- ; AVX1: uitofpv8i64v8double
+ ; AVX1-LABEL: uitofpv8i64v8double
; AVX1: cost of 20 {{.*}} uitofp
;
- ; AVX2: uitofpv8i64v8double
+ ; AVX2-LABEL: uitofpv8i64v8double
; AVX2: cost of 20 {{.*}} uitofp
;
- ; AVX512F: uitofpv8i64v8double
- ; AVX512F: cost of 22 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv8i64v8double
+ ; AVX512F: cost of 26 {{.*}} uitofp
+ ;
+ ; AVX512DQ: uitofpv8i64v8double
+ ; AVX512DQ: cost of 1 {{.*}} uitofp
%1 = uitofp <8 x i64> %a to <8 x double>
ret <8 x double> %1
}
define <16 x double> @uitofpv16i64v16double(<16 x i64> %a) {
- ; SSE2: uitofpv16i64v16double
+ ; SSE2-LABEL: uitofpv16i64v16double
; SSE2: cost of 160 {{.*}} uitofp
;
- ; AVX1: uitofpv16i64v16double
+ ; AVX1-LABEL: uitofpv16i64v16double
; AVX1: cost of 40 {{.*}} uitofp
;
- ; AVX2: uitofpv16i64v16double
+ ; AVX2-LABEL: uitofpv16i64v16double
; AVX2: cost of 40 {{.*}} uitofp
;
- ; AVX512F: uitofpv16i64v16double
+ ; AVX512F-LABEL: uitofpv16i64v16double
; AVX512F: cost of 44 {{.*}} uitofp
+ ;
+ ; AVX512DQ: uitofpv16i64v16double
+ ; AVX512DQ: cost of 44 {{.*}} uitofp
%1 = uitofp <16 x i64> %a to <16 x double>
ret <16 x double> %1
}
define <32 x double> @uitofpv32i64v32double(<32 x i64> %a) {
- ; SSE2: uitofpv32i64v32double
+ ; SSE2-LABEL: uitofpv32i64v32double
; SSE2: cost of 320 {{.*}} uitofp
;
- ; AVX1: uitofpv32i64v32double
+ ; AVX1-LABEL: uitofpv32i64v32double
; AVX1: cost of 80 {{.*}} uitofp
;
- ; AVX2: uitofpv32i64v32double
+ ; AVX2-LABEL: uitofpv32i64v32double
; AVX2: cost of 80 {{.*}} uitofp
;
- ; AVX512F: uitofpv32i64v32double
+ ; AVX512F-LABEL: uitofpv32i64v32double
; AVX512F: cost of 88 {{.*}} uitofp
+ ;
+ ; AVX512DQ: uitofpv32i64v32double
+ ; AVX512DQ: cost of 88 {{.*}} uitofp
%1 = uitofp <32 x i64> %a to <32 x double>
ret <32 x double> %1
}
define <2 x float> @uitofpv2i8v2float(<2 x i8> %a) {
- ; SSE2: uitofpv2i8v2float
+ ; SSE2-LABEL: uitofpv2i8v2float
; SSE2: cost of 15 {{.*}} uitofp
;
- ; AVX1: uitofpv2i8v2float
+ ; AVX1-LABEL: uitofpv2i8v2float
; AVX1: cost of 4 {{.*}} uitofp
;
- ; AVX2: uitofpv2i8v2float
+ ; AVX2-LABEL: uitofpv2i8v2float
; AVX2: cost of 4 {{.*}} uitofp
;
- ; AVX512F: uitofpv2i8v2float
+ ; AVX512F-LABEL: uitofpv2i8v2float
; AVX512F: cost of 4 {{.*}} uitofp
%1 = uitofp <2 x i8> %a to <2 x float>
ret <2 x float> %1
}
define <4 x float> @uitofpv4i8v4float(<4 x i8> %a) {
- ; SSE2: uitofpv4i8v4float
+ ; SSE2-LABEL: uitofpv4i8v4float
; SSE2: cost of 8 {{.*}} uitofp
;
- ; AVX1: uitofpv4i8v4float
+ ; AVX1-LABEL: uitofpv4i8v4float
; AVX1: cost of 2 {{.*}} uitofp
;
- ; AVX2: uitofpv4i8v4float
+ ; AVX2-LABEL: uitofpv4i8v4float
; AVX2: cost of 2 {{.*}} uitofp
;
- ; AVX512F: uitofpv4i8v4float
+ ; AVX512F-LABEL: uitofpv4i8v4float
; AVX512F: cost of 2 {{.*}} uitofp
%1 = uitofp <4 x i8> %a to <4 x float>
ret <4 x float> %1
}
define <8 x float> @uitofpv8i8v8float(<8 x i8> %a) {
- ; SSE2: uitofpv8i8v8float
+ ; SSE2-LABEL: uitofpv8i8v8float
; SSE2: cost of 15 {{.*}} uitofp
;
- ; AVX1: uitofpv8i8v8float
+ ; AVX1-LABEL: uitofpv8i8v8float
; AVX1: cost of 5 {{.*}} uitofp
;
- ; AVX2: uitofpv8i8v8float
+ ; AVX2-LABEL: uitofpv8i8v8float
; AVX2: cost of 5 {{.*}} uitofp
;
- ; AVX512F: uitofpv8i8v8float
- ; AVX512F: cost of 5 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv8i8v8float
+ ; AVX512F: cost of 2 {{.*}} uitofp
%1 = uitofp <8 x i8> %a to <8 x float>
ret <8 x float> %1
}
define <16 x float> @uitofpv16i8v16float(<16 x i8> %a) {
- ; SSE2: uitofpv16i8v16float
+ ; SSE2-LABEL: uitofpv16i8v16float
; SSE2: cost of 8 {{.*}} uitofp
;
- ; AVX1: uitofpv16i8v16float
+ ; AVX1-LABEL: uitofpv16i8v16float
; AVX1: cost of 44 {{.*}} uitofp
;
- ; AVX2: uitofpv16i8v16float
+ ; AVX2-LABEL: uitofpv16i8v16float
; AVX2: cost of 44 {{.*}} uitofp
;
- ; AVX512F: uitofpv16i8v16float
- ; AVX512F: cost of 46 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv16i8v16float
+ ; AVX512F: cost of 2 {{.*}} uitofp
%1 = uitofp <16 x i8> %a to <16 x float>
ret <16 x float> %1
}
define <32 x float> @uitofpv32i8v32float(<32 x i8> %a) {
- ; SSE2: uitofpv32i8v32float
+ ; SSE2-LABEL: uitofpv32i8v32float
; SSE2: cost of 16 {{.*}} uitofp
;
- ; AVX1: uitofpv32i8v32float
+ ; AVX1-LABEL: uitofpv32i8v32float
; AVX1: cost of 88 {{.*}} uitofp
;
- ; AVX2: uitofpv32i8v32float
+ ; AVX2-LABEL: uitofpv32i8v32float
; AVX2: cost of 88 {{.*}} uitofp
;
- ; AVX512F: uitofpv32i8v32float
+ ; AVX512F-LABEL: uitofpv32i8v32float
; AVX512F: cost of 92 {{.*}} uitofp
%1 = uitofp <32 x i8> %a to <32 x float>
ret <32 x float> %1
}
define <2 x float> @uitofpv2i16v2float(<2 x i16> %a) {
- ; SSE2: uitofpv2i16v2float
+ ; SSE2-LABEL: uitofpv2i16v2float
; SSE2: cost of 15 {{.*}} uitofp
;
- ; AVX1: uitofpv2i16v2float
+ ; AVX1-LABEL: uitofpv2i16v2float
; AVX1: cost of 4 {{.*}} uitofp
;
- ; AVX2: uitofpv2i16v2float
+ ; AVX2-LABEL: uitofpv2i16v2float
; AVX2: cost of 4 {{.*}} uitofp
;
- ; AVX512F: uitofpv2i16v2float
+ ; AVX512F-LABEL: uitofpv2i16v2float
; AVX512F: cost of 4 {{.*}} uitofp
%1 = uitofp <2 x i16> %a to <2 x float>
ret <2 x float> %1
}
define <4 x float> @uitofpv4i16v4float(<4 x i16> %a) {
- ; SSE2: uitofpv4i16v4float
+ ; SSE2-LABEL: uitofpv4i16v4float
; SSE2: cost of 8 {{.*}} uitofp
;
- ; AVX1: uitofpv4i16v4float
+ ; AVX1-LABEL: uitofpv4i16v4float
; AVX1: cost of 2 {{.*}} uitofp
;
- ; AVX2: uitofpv4i16v4float
+ ; AVX2-LABEL: uitofpv4i16v4float
; AVX2: cost of 2 {{.*}} uitofp
;
- ; AVX512F: uitofpv4i16v4float
+ ; AVX512F-LABEL: uitofpv4i16v4float
; AVX512F: cost of 2 {{.*}} uitofp
%1 = uitofp <4 x i16> %a to <4 x float>
ret <4 x float> %1
}
define <8 x float> @uitofpv8i16v8float(<8 x i16> %a) {
- ; SSE2: uitofpv8i16v8float
+ ; SSE2-LABEL: uitofpv8i16v8float
; SSE2: cost of 15 {{.*}} uitofp
;
- ; AVX1: uitofpv8i16v8float
+ ; AVX1-LABEL: uitofpv8i16v8float
; AVX1: cost of 5 {{.*}} uitofp
;
- ; AVX2: uitofpv8i16v8float
+ ; AVX2-LABEL: uitofpv8i16v8float
; AVX2: cost of 5 {{.*}} uitofp
;
- ; AVX512F: uitofpv8i16v8float
- ; AVX512F: cost of 5 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv8i16v8float
+ ; AVX512F: cost of 2 {{.*}} uitofp
%1 = uitofp <8 x i16> %a to <8 x float>
ret <8 x float> %1
}
define <16 x float> @uitofpv16i16v16float(<16 x i16> %a) {
- ; SSE2: uitofpv16i16v16float
+ ; SSE2-LABEL: uitofpv16i16v16float
; SSE2: cost of 30 {{.*}} uitofp
;
- ; AVX1: uitofpv16i16v16float
+ ; AVX1-LABEL: uitofpv16i16v16float
; AVX1: cost of 44 {{.*}} uitofp
;
- ; AVX2: uitofpv16i16v16float
+ ; AVX2-LABEL: uitofpv16i16v16float
; AVX2: cost of 44 {{.*}} uitofp
;
- ; AVX512F: uitofpv16i16v16float
- ; AVX512F: cost of 46 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv16i16v16float
+ ; AVX512F: cost of 2 {{.*}} uitofp
%1 = uitofp <16 x i16> %a to <16 x float>
ret <16 x float> %1
}
define <32 x float> @uitofpv32i16v32float(<32 x i16> %a) {
- ; SSE2: uitofpv32i16v32float
+ ; SSE2-LABEL: uitofpv32i16v32float
; SSE2: cost of 60 {{.*}} uitofp
;
- ; AVX1: uitofpv32i16v32float
+ ; AVX1-LABEL: uitofpv32i16v32float
; AVX1: cost of 88 {{.*}} uitofp
;
- ; AVX2: uitofpv32i16v32float
+ ; AVX2-LABEL: uitofpv32i16v32float
; AVX2: cost of 88 {{.*}} uitofp
;
- ; AVX512F: uitofpv32i16v32float
+ ; AVX512F-LABEL: uitofpv32i16v32float
; AVX512F: cost of 92 {{.*}} uitofp
%1 = uitofp <32 x i16> %a to <32 x float>
ret <32 x float> %1
}
define <2 x float> @uitofpv2i32v2float(<2 x i32> %a) {
- ; SSE2: uitofpv2i32v2float
+ ; SSE2-LABEL: uitofpv2i32v2float
; SSE2: cost of 15 {{.*}} uitofp
;
- ; AVX1: uitofpv2i32v2float
+ ; AVX1-LABEL: uitofpv2i32v2float
; AVX1: cost of 4 {{.*}} uitofp
;
- ; AVX2: uitofpv2i32v2float
+ ; AVX2-LABEL: uitofpv2i32v2float
; AVX2: cost of 4 {{.*}} uitofp
;
- ; AVX512F: uitofpv2i32v2float
- ; AVX512F: cost of 4 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv2i32v2float
+ ; AVX512F: cost of 2 {{.*}} uitofp
%1 = uitofp <2 x i32> %a to <2 x float>
ret <2 x float> %1
}
define <4 x float> @uitofpv4i32v4float(<4 x i32> %a) {
- ; SSE2: uitofpv4i32v4float
+ ; SSE2-LABEL: uitofpv4i32v4float
; SSE2: cost of 8 {{.*}} uitofp
;
- ; AVX1: uitofpv4i32v4float
+ ; AVX1-LABEL: uitofpv4i32v4float
; AVX1: cost of 6 {{.*}} uitofp
;
- ; AVX2: uitofpv4i32v4float
+ ; AVX2-LABEL: uitofpv4i32v4float
; AVX2: cost of 6 {{.*}} uitofp
;
- ; AVX512F: uitofpv4i32v4float
- ; AVX512F: cost of 6 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv4i32v4float
+ ; AVX512F: cost of 1 {{.*}} uitofp
%1 = uitofp <4 x i32> %a to <4 x float>
ret <4 x float> %1
}
define <8 x float> @uitofpv8i32v8float(<8 x i32> %a) {
- ; SSE2: uitofpv8i32v8float
+ ; SSE2-LABEL: uitofpv8i32v8float
; SSE2: cost of 16 {{.*}} uitofp
;
- ; AVX1: uitofpv8i32v8float
+ ; AVX1-LABEL: uitofpv8i32v8float
; AVX1: cost of 9 {{.*}} uitofp
;
- ; AVX2: uitofpv8i32v8float
+ ; AVX2-LABEL: uitofpv8i32v8float
; AVX2: cost of 8 {{.*}} uitofp
;
- ; AVX512F: uitofpv8i32v8float
- ; AVX512F: cost of 8 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv8i32v8float
+ ; AVX512F: cost of 1 {{.*}} uitofp
%1 = uitofp <8 x i32> %a to <8 x float>
ret <8 x float> %1
}
define <16 x float> @uitofpv16i32v16float(<16 x i32> %a) {
- ; SSE2: uitofpv16i32v16float
+ ; SSE2-LABEL: uitofpv16i32v16float
; SSE2: cost of 32 {{.*}} uitofp
;
- ; AVX1: uitofpv16i32v16float
+ ; AVX1-LABEL: uitofpv16i32v16float
; AVX1: cost of 44 {{.*}} uitofp
;
- ; AVX2: uitofpv16i32v16float
+ ; AVX2-LABEL: uitofpv16i32v16float
; AVX2: cost of 44 {{.*}} uitofp
;
- ; AVX512F: uitofpv16i32v16float
- ; AVX512F: cost of 46 {{.*}} uitofp
+ ; AVX512F-LABEL: uitofpv16i32v16float
+ ; AVX512F: cost of 1 {{.*}} uitofp
%1 = uitofp <16 x i32> %a to <16 x float>
ret <16 x float> %1
}
define <32 x float> @uitofpv32i32v32float(<32 x i32> %a) {
- ; SSE2: uitofpv32i32v32float
+ ; SSE2-LABEL: uitofpv32i32v32float
; SSE2: cost of 64 {{.*}} uitofp
;
- ; AVX1: uitofpv32i32v32float
+ ; AVX1-LABEL: uitofpv32i32v32float
; AVX1: cost of 88 {{.*}} uitofp
;
- ; AVX2: uitofpv32i32v32float
+ ; AVX2-LABEL: uitofpv32i32v32float
; AVX2: cost of 88 {{.*}} uitofp
;
- ; AVX512F: uitofpv32i32v32float
+ ; AVX512F-LABEL: uitofpv32i32v32float
; AVX512F: cost of 92 {{.*}} uitofp
%1 = uitofp <32 x i32> %a to <32 x float>
ret <32 x float> %1
}
define <2 x float> @uitofpv2i64v2float(<2 x i64> %a) {
- ; SSE2: uitofpv2i64v2float
+ ; SSE2-LABEL: uitofpv2i64v2float
; SSE2: cost of 15 {{.*}} uitofp
;
- ; AVX1: uitofpv2i64v2float
+ ; AVX1-LABEL: uitofpv2i64v2float
; AVX1: cost of 4 {{.*}} uitofp
;
- ; AVX2: uitofpv2i64v2float
+ ; AVX2-LABEL: uitofpv2i64v2float
; AVX2: cost of 4 {{.*}} uitofp
;
- ; AVX512F: uitofpv2i64v2float
+ ; AVX512F-LABEL: uitofpv2i64v2float
; AVX512F: cost of 4 {{.*}} uitofp
%1 = uitofp <2 x i64> %a to <2 x float>
ret <2 x float> %1
}
define <4 x float> @uitofpv4i64v4float(<4 x i64> %a) {
- ; SSE2: uitofpv4i64v4float
+ ; SSE2-LABEL: uitofpv4i64v4float
; SSE2: cost of 30 {{.*}} uitofp
;
- ; AVX1: uitofpv4i64v4float
+ ; AVX1-LABEL: uitofpv4i64v4float
; AVX1: cost of 10 {{.*}} uitofp
;
- ; AVX2: uitofpv4i64v4float
+ ; AVX2-LABEL: uitofpv4i64v4float
; AVX2: cost of 10 {{.*}} uitofp
;
- ; AVX512F: uitofpv4i64v4float
+ ; AVX512F-LABEL: uitofpv4i64v4float
; AVX512F: cost of 10 {{.*}} uitofp
%1 = uitofp <4 x i64> %a to <4 x float>
ret <4 x float> %1
}
define <8 x float> @uitofpv8i64v8float(<8 x i64> %a) {
- ; SSE2: uitofpv8i64v8float
+ ; SSE2-LABEL: uitofpv8i64v8float
; SSE2: cost of 60 {{.*}} uitofp
;
- ; AVX1: uitofpv8i64v8float
+ ; AVX1-LABEL: uitofpv8i64v8float
; AVX1: cost of 22 {{.*}} uitofp
;
- ; AVX2: uitofpv8i64v8float
+ ; AVX2-LABEL: uitofpv8i64v8float
; AVX2: cost of 22 {{.*}} uitofp
;
- ; AVX512F: uitofpv8i64v8float
+ ; AVX512F-LABEL: uitofpv8i64v8float
; AVX512F: cost of 22 {{.*}} uitofp
%1 = uitofp <8 x i64> %a to <8 x float>
ret <8 x float> %1
}
define <16 x float> @uitofpv16i64v16float(<16 x i64> %a) {
- ; SSE2: uitofpv16i64v16float
+ ; SSE2-LABEL: uitofpv16i64v16float
; SSE2: cost of 120 {{.*}} uitofp
;
- ; AVX1: uitofpv16i64v16float
+ ; AVX1-LABEL: uitofpv16i64v16float
; AVX1: cost of 44 {{.*}} uitofp
;
- ; AVX2: uitofpv16i64v16float
+ ; AVX2-LABEL: uitofpv16i64v16float
; AVX2: cost of 44 {{.*}} uitofp
;
- ; AVX512F: uitofpv16i64v16float
+ ; AVX512F-LABEL: uitofpv16i64v16float
; AVX512F: cost of 46 {{.*}} uitofp
%1 = uitofp <16 x i64> %a to <16 x float>
ret <16 x float> %1
}
define <32 x float> @uitofpv32i64v32float(<32 x i64> %a) {
- ; SSE2: uitofpv32i64v32float
+ ; SSE2-LABEL: uitofpv32i64v32float
; SSE2: cost of 240 {{.*}} uitofp
;
- ; AVX1: uitofpv32i64v32float
+ ; AVX1-LABEL: uitofpv32i64v32float
; AVX1: cost of 88 {{.*}} uitofp
;
- ; AVX2: uitofpv32i64v32float
+ ; AVX2-LABEL: uitofpv32i64v32float
; AVX2: cost of 88 {{.*}} uitofp
;
- ; AVX512F: uitofpv32i64v32float
+ ; AVX512F-LABEL: uitofpv32i64v32float
; AVX512F: cost of 92 {{.*}} uitofp
%1 = uitofp <32 x i64> %a to <32 x float>
ret <32 x float> %1
}
+define <8 x i32> @fptouiv8f32v8i32(<8 x float> %a) {
+ ; AVX512F-LABEL: fptouiv8f32v8i32
+ ; AVX512F: cost of 1 {{.*}} fptoui
+ %1 = fptoui <8 x float> %a to <8 x i32>
+ ret <8 x i32> %1
+}
+
+define <4 x i32> @fptouiv4f32v4i32(<4 x float> %a) {
+ ; AVX512F-LABEL: fptouiv4f32v4i32
+ ; AVX512F: cost of 1 {{.*}} fptoui
+ %1 = fptoui <4 x float> %a to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <2 x i32> @fptouiv2f32v2i32(<2 x float> %a) {
+ ; AVX512F-LABEL: fptouiv2f32v2i32
+ ; AVX512F: cost of 1 {{.*}} fptoui
+ %1 = fptoui <2 x float> %a to <2 x i32>
+ ret <2 x i32> %1
+}
+
+define <16 x i32> @fptouiv16f32v16i32(<16 x float> %a) {
+ ; AVX512F-LABEL: fptouiv16f32v16i32
+ ; AVX512F: cost of 1 {{.*}} fptoui
+ %1 = fptoui <16 x float> %a to <16 x i32>
+ ret <16 x i32> %1
+}
+
+define <8 x i64> @fptouiv8f32v8i64(<8 x float> %a) {
+ ; AVX512DQ-LABEL: fptouiv8f32v8i64
+ ; AVX512DQ: cost of 1 {{.*}} fptoui
+ %1 = fptoui <8 x float> %a to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define <4 x i64> @fptouiv4f32v4i64(<4 x float> %a) {
+ ; AVX512DQ-LABEL: fptouiv4f32v4i64
+ ; AVX512DQ: cost of 1 {{.*}} fptoui
+ %1 = fptoui <4 x float> %a to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define <2 x i64> @fptouiv2f32v2i64(<2 x float> %a) {
+ ; AVX512DQ-LABEL: fptouiv2f32v2i64
+ ; AVX512DQ: cost of 1 {{.*}} fptoui
+ %1 = fptoui <2 x float> %a to <2 x i64>
+ ret <2 x i64> %1
+}
diff --git a/test/Analysis/CostModel/X86/vector_gep.ll b/test/Analysis/CostModel/X86/vector_gep.ll
new file mode 100644
index 000000000000..e49f25871d66
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vector_gep.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-linux-unknown-unknown -mattr=+avx512f | FileCheck %s
+
+%struct.S = type { [1000 x i32] }
+
+
+declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
+
+define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){
+ %temp = insertelement <4 x i64> undef, i64 %base, i32 0
+ %vector = shufflevector <4 x i64> %temp, <4 x i64> undef, <4 x i32> zeroinitializer
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds %struct.S
+ %B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32]
+ %arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector
+ %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+ ret <4 x i32> %res
+}
diff --git a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
new file mode 100644
index 000000000000..a0d07d7b6ec0
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -0,0 +1,392 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
+
+; Verify the cost of vector arithmetic shift right instructions.
+
+;
+; Variable Shifts
+;
+
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64':
+; SSE2: Found an estimated cost of 12 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
+; AVX2: Found an estimated cost of 4 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <2 x i64> %a, %b
+ ret <2 x i64> %shift
+}
+
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
+; SSE2: Found an estimated cost of 24 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
+; AVX2: Found an estimated cost of 4 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = ashr <4 x i64> %a, %b
+ ret <4 x i64> %shift
+}
+
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = ashr <4 x i32> %a, %b
+ ret <4 x i32> %shift
+}
+
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = ashr <8 x i32> %a, %b
+ ret <8 x i32> %shift
+}
+
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 32 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <8 x i16> %a, %b
+ ret <8 x i16> %shift
+}
+
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 64 for instruction: %shift
+; AVX: Found an estimated cost of 64 for instruction: %shift
+; AVX2: Found an estimated cost of 10 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = ashr <16 x i16> %a, %b
+ ret <16 x i16> %shift
+}
+
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
+; SSE2: Found an estimated cost of 54 for instruction: %shift
+; SSE41: Found an estimated cost of 54 for instruction: %shift
+; AVX: Found an estimated cost of 54 for instruction: %shift
+; AVX2: Found an estimated cost of 54 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <16 x i8> %a, %b
+ ret <16 x i8> %shift
+}
+
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
+; SSE2: Found an estimated cost of 108 for instruction: %shift
+; SSE41: Found an estimated cost of 108 for instruction: %shift
+; AVX: Found an estimated cost of 108 for instruction: %shift
+; AVX2: Found an estimated cost of 24 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = ashr <32 x i8> %a, %b
+ ret <32 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64':
+; SSE2: Found an estimated cost of 12 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
+; AVX2: Found an estimated cost of 4 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
+ %shift = ashr <2 x i64> %a, %splat
+ ret <2 x i64> %shift
+}
+
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
+; SSE2: Found an estimated cost of 24 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
+; AVX2: Found an estimated cost of 4 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
+ %shift = ashr <4 x i64> %a, %splat
+ ret <4 x i64> %shift
+}
+
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %shift = ashr <4 x i32> %a, %splat
+ ret <4 x i32> %shift
+}
+
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+ %shift = ashr <8 x i32> %a, %splat
+ ret <8 x i32> %shift
+}
+
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 32 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %shift = ashr <8 x i16> %a, %splat
+ ret <8 x i16> %shift
+}
+
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 64 for instruction: %shift
+; AVX: Found an estimated cost of 64 for instruction: %shift
+; AVX2: Found an estimated cost of 10 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+ %shift = ashr <16 x i16> %a, %splat
+ ret <16 x i16> %shift
+}
+
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
+; SSE2: Found an estimated cost of 54 for instruction: %shift
+; SSE41: Found an estimated cost of 54 for instruction: %shift
+; AVX: Found an estimated cost of 54 for instruction: %shift
+; AVX2: Found an estimated cost of 54 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %shift = ashr <16 x i8> %a, %splat
+ ret <16 x i8> %shift
+}
+
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
+; SSE2: Found an estimated cost of 108 for instruction: %shift
+; SSE41: Found an estimated cost of 108 for instruction: %shift
+; AVX: Found an estimated cost of 108 for instruction: %shift
+; AVX2: Found an estimated cost of 24 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+ %shift = ashr <32 x i8> %a, %splat
+ ret <32 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64':
+; SSE2: Found an estimated cost of 12 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
+; AVX2: Found an estimated cost of 4 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <2 x i64> %a, <i64 1, i64 7>
+ ret <2 x i64> %shift
+}
+
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
+; SSE2: Found an estimated cost of 24 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
+; AVX2: Found an estimated cost of 4 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+ ret <4 x i64> %shift
+}
+
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = ashr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
+ ret <4 x i32> %shift
+}
+
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+ ret <8 x i32> %shift
+}
+
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 32 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+ ret <8 x i16> %shift
+}
+
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 64 for instruction: %shift
+; AVX: Found an estimated cost of 64 for instruction: %shift
+; AVX2: Found an estimated cost of 10 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+ ret <16 x i16> %shift
+}
+
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
+; SSE2: Found an estimated cost of 54 for instruction: %shift
+; SSE41: Found an estimated cost of 54 for instruction: %shift
+; AVX: Found an estimated cost of 54 for instruction: %shift
+; AVX2: Found an estimated cost of 54 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <16 x i8> %shift
+}
+
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
+; SSE2: Found an estimated cost of 108 for instruction: %shift
+; SSE41: Found an estimated cost of 108 for instruction: %shift
+; AVX: Found an estimated cost of 108 for instruction: %shift
+; AVX2: Found an estimated cost of 24 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <32 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 4 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+ ret <2 x i64> %shift
+}
+
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction: %shift
+; SSE41: Found an estimated cost of 8 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
+; AVX2: Found an estimated cost of 4 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+ ret <4 x i64> %shift
+}
+
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
+; SSE2: Found an estimated cost of 1 for instruction: %shift
+; SSE41: Found an estimated cost of 1 for instruction: %shift
+; AVX: Found an estimated cost of 1 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+ ret <4 x i32> %shift
+}
+
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32':
+; SSE2: Found an estimated cost of 2 for instruction: %shift
+; SSE41: Found an estimated cost of 2 for instruction: %shift
+; AVX: Found an estimated cost of 2 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <8 x i32> %shift
+}
+
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
+; SSE2: Found an estimated cost of 1 for instruction: %shift
+; SSE41: Found an estimated cost of 1 for instruction: %shift
+; AVX: Found an estimated cost of 1 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <8 x i16> %shift
+}
+
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16':
+; SSE2: Found an estimated cost of 2 for instruction: %shift
+; SSE41: Found an estimated cost of 2 for instruction: %shift
+; AVX: Found an estimated cost of 2 for instruction: %shift
+; AVX2: Found an estimated cost of 10 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <16 x i16> %shift
+}
+
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 4 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <16 x i8> %shift
+}
+
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
+; SSE2: Found an estimated cost of 8 for instruction: %shift
+; SSE41: Found an estimated cost of 8 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
+; AVX2: Found an estimated cost of 24 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <32 x i8> %shift
+}
diff --git a/test/Analysis/CostModel/X86/vshift-cost.ll b/test/Analysis/CostModel/X86/vshift-cost.ll
deleted file mode 100644
index 84d72463ac0d..000000000000
--- a/test/Analysis/CostModel/X86/vshift-cost.ll
+++ /dev/null
@@ -1,167 +0,0 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
-
-
-; Verify the cost of vector shift left instructions.
-
-; We always emit a single pmullw in the case of v8i16 vector shifts by
-; non-uniform constant.
-
-define <8 x i16> @test1(<8 x i16> %a) {
- %shl = shl <8 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
- ret <8 x i16> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test1':
-; CHECK: Found an estimated cost of 1 for instruction: %shl
-
-
-define <8 x i16> @test2(<8 x i16> %a) {
- %shl = shl <8 x i16> %a, <i16 0, i16 undef, i16 0, i16 0, i16 1, i16 undef, i16 -1, i16 1>
- ret <8 x i16> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test2':
-; CHECK: Found an estimated cost of 1 for instruction: %shl
-
-
-; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction.
-; Make sure that the estimated cost is always 1 except for the case where
-; we only have SSE2 support. With SSE2, we are forced to special lower the
-; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle.
-
-define <4 x i32> @test3(<4 x i32> %a) {
- %shl = shl <4 x i32> %a, <i32 1, i32 -1, i32 2, i32 -3>
- ret <4 x i32> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test3':
-; SSE2: Found an estimated cost of 6 for instruction: %shl
-; SSE41: Found an estimated cost of 1 for instruction: %shl
-; AVX: Found an estimated cost of 1 for instruction: %shl
-; AVX2: Found an estimated cost of 1 for instruction: %shl
-
-
-define <4 x i32> @test4(<4 x i32> %a) {
- %shl = shl <4 x i32> %a, <i32 0, i32 0, i32 1, i32 1>
- ret <4 x i32> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test4':
-; SSE2: Found an estimated cost of 6 for instruction: %shl
-; SSE41: Found an estimated cost of 1 for instruction: %shl
-; AVX: Found an estimated cost of 1 for instruction: %shl
-; AVX2: Found an estimated cost of 1 for instruction: %shl
-
-
-; On AVX2 we are able to lower the following shift into a single
-; vpsllvq. Therefore, the expected cost is only 1.
-; In all other cases, this shift is scalarized as the target does not support
-; vpsllv instructions.
-
-define <2 x i64> @test5(<2 x i64> %a) {
- %shl = shl <2 x i64> %a, <i64 2, i64 3>
- ret <2 x i64> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test5':
-; SSE2: Found an estimated cost of 20 for instruction: %shl
-; SSE41: Found an estimated cost of 20 for instruction: %shl
-; AVX: Found an estimated cost of 20 for instruction: %shl
-; AVX2: Found an estimated cost of 1 for instruction: %shl
-
-
-; v16i16 and v8i32 shift left by non-uniform constant are lowered into
-; vector multiply instructions. With AVX (but not AVX2), the vector multiply
-; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert.
-;
-; With AVX2, instruction vpmullw works with 256bit quantities and
-; therefore there is no need to split the resulting vector multiply into
-; a sequence of two multiply.
-;
-; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice
-; the cost computed in the case of 'test1'. That is because the backend
-; simply emits 2 pmullw with no extract/insert.
-
-
-define <16 x i16> @test6(<16 x i16> %a) {
- %shl = shl <16 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
- ret <16 x i16> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test6':
-; SSE2: Found an estimated cost of 2 for instruction: %shl
-; SSE41: Found an estimated cost of 2 for instruction: %shl
-; AVX: Found an estimated cost of 4 for instruction: %shl
-; AVX2: Found an estimated cost of 1 for instruction: %shl
-
-
-; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice
-; the cost computed in the case of 'test3'. That is because the multiply
-; is type-legalized into two 4i32 vector multiply.
-
-define <8 x i32> @test7(<8 x i32> %a) {
- %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
- ret <8 x i32> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test7':
-; SSE2: Found an estimated cost of 12 for instruction: %shl
-; SSE41: Found an estimated cost of 2 for instruction: %shl
-; AVX: Found an estimated cost of 4 for instruction: %shl
-; AVX2: Found an estimated cost of 1 for instruction: %shl
-
-
-; On AVX2 we are able to lower the following shift into a single
-; vpsllvq. Therefore, the expected cost is only 1.
-; In all other cases, this shift is scalarized as the target does not support
-; vpsllv instructions.
-
-define <4 x i64> @test8(<4 x i64> %a) {
- %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
- ret <4 x i64> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test8':
-; SSE2: Found an estimated cost of 40 for instruction: %shl
-; SSE41: Found an estimated cost of 40 for instruction: %shl
-; AVX: Found an estimated cost of 40 for instruction: %shl
-; AVX2: Found an estimated cost of 1 for instruction: %shl
-
-
-; Same as 'test6', with the difference that the cost is double.
-
-define <32 x i16> @test9(<32 x i16> %a) {
- %shl = shl <32 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
- ret <32 x i16> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test9':
-; SSE2: Found an estimated cost of 4 for instruction: %shl
-; SSE41: Found an estimated cost of 4 for instruction: %shl
-; AVX: Found an estimated cost of 8 for instruction: %shl
-; AVX2: Found an estimated cost of 2 for instruction: %shl
-
-
-; Same as 'test7', except that now the cost is double.
-
-define <16 x i32> @test10(<16 x i32> %a) {
- %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
- ret <16 x i32> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test10':
-; SSE2: Found an estimated cost of 24 for instruction: %shl
-; SSE41: Found an estimated cost of 4 for instruction: %shl
-; AVX: Found an estimated cost of 8 for instruction: %shl
-; AVX2: Found an estimated cost of 2 for instruction: %shl
-
-
-; On AVX2 we are able to lower the following shift into a sequence of
-; two vpsllvq instructions. Therefore, the expected cost is only 2.
-; In all other cases, this shift is scalarized as we don't have vpsllv
-; instructions.
-
-define <8 x i64> @test11(<8 x i64> %a) {
- %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
- ret <8 x i64> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test11':
-; SSE2: Found an estimated cost of 80 for instruction: %shl
-; SSE41: Found an estimated cost of 80 for instruction: %shl
-; AVX: Found an estimated cost of 80 for instruction: %shl
-; AVX2: Found an estimated cost of 2 for instruction: %shl
-
-
diff --git a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
new file mode 100644
index 000000000000..a686b4368f21
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -0,0 +1,400 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
+
+; Verify the cost of vector logical shift right instructions.
+
+;
+; Variable Shifts
+;
+
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = lshr <2 x i64> %a, %b
+ ret <2 x i64> %shift
+}
+
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction: %shift
+; SSE41: Found an estimated cost of 8 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = lshr <4 x i64> %a, %b
+ ret <4 x i64> %shift
+}
+
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = lshr <4 x i32> %a, %b
+ ret <4 x i32> %shift
+}
+
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = lshr <8 x i32> %a, %b
+ ret <8 x i32> %shift
+}
+
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 32 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <8 x i16> %a, %b
+ ret <8 x i16> %shift
+}
+
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 64 for instruction: %shift
+; AVX: Found an estimated cost of 64 for instruction: %shift
+; AVX2: Found an estimated cost of 10 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = lshr <16 x i16> %a, %b
+ ret <16 x i16> %shift
+}
+
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
+; SSE2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 26 for instruction: %shift
+; AVX: Found an estimated cost of 26 for instruction: %shift
+; AVX2: Found an estimated cost of 26 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <16 x i8> %a, %b
+ ret <16 x i8> %shift
+}
+
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
+; SSE2: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 52 for instruction: %shift
+; AVX: Found an estimated cost of 52 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = lshr <32 x i8> %a, %b
+ ret <32 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
+ %shift = lshr <2 x i64> %a, %splat
+ ret <2 x i64> %shift
+}
+
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction: %shift
+; SSE41: Found an estimated cost of 8 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
+ %shift = lshr <4 x i64> %a, %splat
+ ret <4 x i64> %shift
+}
+
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %shift = lshr <4 x i32> %a, %splat
+ ret <4 x i32> %shift
+}
+
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+ %shift = lshr <8 x i32> %a, %splat
+ ret <8 x i32> %shift
+}
+
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 32 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %shift = lshr <8 x i16> %a, %splat
+ ret <8 x i16> %shift
+}
+
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 64 for instruction: %shift
+; AVX: Found an estimated cost of 64 for instruction: %shift
+; AVX2: Found an estimated cost of 10 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+ %shift = lshr <16 x i16> %a, %splat
+ ret <16 x i16> %shift
+}
+
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
+; SSE2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 26 for instruction: %shift
+; AVX: Found an estimated cost of 26 for instruction: %shift
+; AVX2: Found an estimated cost of 26 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %shift = lshr <16 x i8> %a, %splat
+ ret <16 x i8> %shift
+}
+
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
+; SSE2: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 52 for instruction: %shift
+; AVX: Found an estimated cost of 52 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+ %shift = lshr <32 x i8> %a, %splat
+ ret <32 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = lshr <2 x i64> %a, <i64 1, i64 7>
+ ret <2 x i64> %shift
+}
+
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction: %shift
+; SSE41: Found an estimated cost of 8 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+ ret <4 x i64> %shift
+}
+
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
+; SSE2: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 16 for instruction: %shift
+; AVX: Found an estimated cost of 16 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = lshr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
+ ret <4 x i32> %shift
+}
+
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+ ret <8 x i32> %shift
+}
+
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 32 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+ ret <8 x i16> %shift
+}
+
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 64 for instruction: %shift
+; AVX: Found an estimated cost of 64 for instruction: %shift
+; AVX2: Found an estimated cost of 10 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+ ret <16 x i16> %shift
+}
+
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
+; SSE2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 26 for instruction: %shift
+; AVX: Found an estimated cost of 26 for instruction: %shift
+; AVX2: Found an estimated cost of 26 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <16 x i8> %shift
+}
+
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
+; SSE2: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 52 for instruction: %shift
+; AVX: Found an estimated cost of 52 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <32 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64':
+; SSE2: Found an estimated cost of 1 for instruction: %shift
+; SSE41: Found an estimated cost of 1 for instruction: %shift
+; AVX: Found an estimated cost of 1 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+ ret <2 x i64> %shift
+}
+
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
+; SSE2: Found an estimated cost of 2 for instruction: %shift
+; SSE41: Found an estimated cost of 2 for instruction: %shift
+; AVX: Found an estimated cost of 2 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+ ret <4 x i64> %shift
+}
+
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
+; SSE2: Found an estimated cost of 1 for instruction: %shift
+; SSE41: Found an estimated cost of 1 for instruction: %shift
+; AVX: Found an estimated cost of 1 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+ ret <4 x i32> %shift
+}
+
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32':
+; SSE2: Found an estimated cost of 2 for instruction: %shift
+; SSE41: Found an estimated cost of 2 for instruction: %shift
+; AVX: Found an estimated cost of 2 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <8 x i32> %shift
+}
+
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
+; SSE2: Found an estimated cost of 1 for instruction: %shift
+; SSE41: Found an estimated cost of 1 for instruction: %shift
+; AVX: Found an estimated cost of 1 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <8 x i16> %shift
+}
+
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16':
+; SSE2: Found an estimated cost of 2 for instruction: %shift
+; SSE41: Found an estimated cost of 2 for instruction: %shift
+; AVX: Found an estimated cost of 2 for instruction: %shift
+; AVX2: Found an estimated cost of 10 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <16 x i16> %shift
+}
+
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
+; SSE2: Found an estimated cost of 1 for instruction: %shift
+; SSE41: Found an estimated cost of 1 for instruction: %shift
+; AVX: Found an estimated cost of 1 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <16 x i8> %shift
+}
+
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
+; SSE2: Found an estimated cost of 2 for instruction: %shift
+; SSE41: Found an estimated cost of 2 for instruction: %shift
+; AVX: Found an estimated cost of 2 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
+; XOP: Found an estimated cost of 4 for instruction: %shift
+ %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <32 x i8> %shift
+}
diff --git a/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/test/Analysis/CostModel/X86/vshift-shl-cost.ll
new file mode 100644
index 000000000000..85ca5a5a7f32
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -0,0 +1,580 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
+
+; Verify the cost of vector shift left instructions.
+
+;
+;
+; Variable Shifts
+;
+
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <2 x i64> %a, %b
+ ret <2 x i64> %shift
+}
+
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction: %shift
+; SSE41: Found an estimated cost of 8 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <4 x i64> %a, %b
+ ret <4 x i64> %shift
+}
+
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
+; SSE2: Found an estimated cost of 10 for instruction: %shift
+; SSE41: Found an estimated cost of 10 for instruction: %shift
+; AVX: Found an estimated cost of 10 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <4 x i32> %a, %b
+ ret <4 x i32> %shift
+}
+
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
+; SSE2: Found an estimated cost of 20 for instruction: %shift
+; SSE41: Found an estimated cost of 20 for instruction: %shift
+; AVX: Found an estimated cost of 20 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <8 x i32> %a, %b
+ ret <8 x i32> %shift
+}
+
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 32 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <8 x i16> %a, %b
+ ret <8 x i16> %shift
+}
+
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 64 for instruction: %shift
+; AVX: Found an estimated cost of 64 for instruction: %shift
+; AVX2: Found an estimated cost of 10 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <16 x i16> %a, %b
+ ret <16 x i16> %shift
+}
+
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
+; SSE2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 26 for instruction: %shift
+; AVX: Found an estimated cost of 26 for instruction: %shift
+; AVX2: Found an estimated cost of 26 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <16 x i8> %a, %b
+ ret <16 x i8> %shift
+}
+
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
+; SSE2: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 52 for instruction: %shift
+; AVX: Found an estimated cost of 52 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <32 x i8> %a, %b
+ ret <32 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
+ %shift = shl <2 x i64> %a, %splat
+ ret <2 x i64> %shift
+}
+
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction: %shift
+; SSE41: Found an estimated cost of 8 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
+ %shift = shl <4 x i64> %a, %splat
+ ret <4 x i64> %shift
+}
+
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
+; SSE2: Found an estimated cost of 10 for instruction: %shift
+; SSE41: Found an estimated cost of 10 for instruction: %shift
+; AVX: Found an estimated cost of 10 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %shift = shl <4 x i32> %a, %splat
+ ret <4 x i32> %shift
+}
+
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
+; SSE2: Found an estimated cost of 20 for instruction: %shift
+; SSE41: Found an estimated cost of 20 for instruction: %shift
+; AVX: Found an estimated cost of 20 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+ %shift = shl <8 x i32> %a, %splat
+ ret <8 x i32> %shift
+}
+
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 32 for instruction: %shift
+; AVX: Found an estimated cost of 32 for instruction: %shift
+; AVX2: Found an estimated cost of 32 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
+ %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %shift = shl <8 x i16> %a, %splat
+ ret <8 x i16> %shift
+}
+
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 64 for instruction: %shift
+; AVX: Found an estimated cost of 64 for instruction: %shift
+; AVX2: Found an estimated cost of 10 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+ %shift = shl <16 x i16> %a, %splat
+ ret <16 x i16> %shift
+}
+
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
+; SSE2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 26 for instruction: %shift
+; AVX: Found an estimated cost of 26 for instruction: %shift
+; AVX2: Found an estimated cost of 26 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
+ %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %shift = shl <16 x i8> %a, %splat
+ ret <16 x i8> %shift
+}
+
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
+; SSE2: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 52 for instruction: %shift
+; AVX: Found an estimated cost of 52 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+ %shift = shl <32 x i8> %a, %splat
+ ret <32 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction: %shift
+; SSE41: Found an estimated cost of 4 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <2 x i64> %a, <i64 1, i64 7>
+ ret <2 x i64> %shift
+}
+
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction: %shift
+; SSE41: Found an estimated cost of 8 for instruction: %shift
+; AVX: Found an estimated cost of 8 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+ ret <4 x i64> %shift
+}
+
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
+; SSE2: Found an estimated cost of 6 for instruction: %shift
+; SSE41: Found an estimated cost of 1 for instruction: %shift
+; AVX: Found an estimated cost of 1 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
+ ret <4 x i32> %shift
+}
+
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
+; SSE2: Found an estimated cost of 12 for instruction: %shift
+; SSE41: Found an estimated cost of 2 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+ ret <8 x i32> %shift
+}
+
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
+; SSE2: Found an estimated cost of 1 for instruction: %shift
+; SSE41: Found an estimated cost of 1 for instruction: %shift
+; AVX: Found an estimated cost of 1 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+ ret <8 x i16> %shift
+}
+
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
+; SSE2: Found an estimated cost of 2 for instruction: %shift
+; SSE41: Found an estimated cost of 2 for instruction: %shift
+; AVX: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+ ret <16 x i16> %shift
+}
+
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
+; SSE2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 26 for instruction: %shift
+; AVX: Found an estimated cost of 26 for instruction: %shift
+; AVX2: Found an estimated cost of 26 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <16 x i8> %shift
+}
+
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
+; SSE2: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 52 for instruction: %shift
+; AVX: Found an estimated cost of 52 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <32 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64':
+; SSE2: Found an estimated cost of 1 for instruction: %shift
+; SSE41: Found an estimated cost of 1 for instruction: %shift
+; AVX: Found an estimated cost of 1 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <2 x i64> %a, <i64 7, i64 7>
+ ret <2 x i64> %shift
+}
+
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
+; SSE2: Found an estimated cost of 2 for instruction: %shift
+; SSE41: Found an estimated cost of 2 for instruction: %shift
+; AVX: Found an estimated cost of 2 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+ ret <4 x i64> %shift
+}
+
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
+; SSE2: Found an estimated cost of 1 for instruction: %shift
+; SSE41: Found an estimated cost of 1 for instruction: %shift
+; AVX: Found an estimated cost of 1 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+ ret <4 x i32> %shift
+}
+
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32':
+; SSE2: Found an estimated cost of 2 for instruction: %shift
+; SSE41: Found an estimated cost of 2 for instruction: %shift
+; AVX: Found an estimated cost of 2 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <8 x i32> %shift
+}
+
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
+; SSE2: Found an estimated cost of 1 for instruction: %shift
+; SSE41: Found an estimated cost of 1 for instruction: %shift
+; AVX: Found an estimated cost of 1 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <8 x i16> %shift
+}
+
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16':
+; SSE2: Found an estimated cost of 2 for instruction: %shift
+; SSE41: Found an estimated cost of 2 for instruction: %shift
+; AVX: Found an estimated cost of 2 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 2 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <16 x i16> %shift
+}
+
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
+; SSE2: Found an estimated cost of 1 for instruction: %shift
+; SSE41: Found an estimated cost of 1 for instruction: %shift
+; AVX: Found an estimated cost of 1 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; XOP: Found an estimated cost of 1 for instruction: %shift
+ %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <16 x i8> %shift
+}
+
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
+; SSE2: Found an estimated cost of 2 for instruction: %shift
+; SSE41: Found an estimated cost of 2 for instruction: %shift
+; AVX: Found an estimated cost of 2 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
+; XOP: Found an estimated cost of 2 for instruction: %shift
+ %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <32 x i8> %shift
+}
+
+;
+; Special Cases
+;
+
+; We always emit a single pmullw in the case of v8i16 vector shifts by
+; non-uniform constant.
+
+define <8 x i16> @test1(<8 x i16> %a) {
+ %shl = shl <8 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
+ ret <8 x i16> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test1':
+; CHECK: Found an estimated cost of 1 for instruction: %shl
+
+
+define <8 x i16> @test2(<8 x i16> %a) {
+ %shl = shl <8 x i16> %a, <i16 0, i16 undef, i16 0, i16 0, i16 1, i16 undef, i16 -1, i16 1>
+ ret <8 x i16> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test2':
+; CHECK: Found an estimated cost of 1 for instruction: %shl
+
+
+; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction.
+; Make sure that the estimated cost is always 1 except for the case where
+; we only have SSE2 support. With SSE2, we are forced to special lower the
+; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle.
+
+define <4 x i32> @test3(<4 x i32> %a) {
+ %shl = shl <4 x i32> %a, <i32 1, i32 -1, i32 2, i32 -3>
+ ret <4 x i32> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test3':
+; SSE2: Found an estimated cost of 6 for instruction: %shl
+; SSE41: Found an estimated cost of 1 for instruction: %shl
+; AVX: Found an estimated cost of 1 for instruction: %shl
+; AVX2: Found an estimated cost of 1 for instruction: %shl
+; XOP: Found an estimated cost of 1 for instruction: %shl
+
+
+define <4 x i32> @test4(<4 x i32> %a) {
+ %shl = shl <4 x i32> %a, <i32 0, i32 0, i32 1, i32 1>
+ ret <4 x i32> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test4':
+; SSE2: Found an estimated cost of 6 for instruction: %shl
+; SSE41: Found an estimated cost of 1 for instruction: %shl
+; AVX: Found an estimated cost of 1 for instruction: %shl
+; AVX2: Found an estimated cost of 1 for instruction: %shl
+; XOP: Found an estimated cost of 1 for instruction: %shl
+
+
+; On AVX2 we are able to lower the following shift into a single
+; vpsllvq. Therefore, the expected cost is only 1.
+; In all other cases, this shift is scalarized as the target does not support
+; vpsllv instructions.
+
+define <2 x i64> @test5(<2 x i64> %a) {
+ %shl = shl <2 x i64> %a, <i64 2, i64 3>
+ ret <2 x i64> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test5':
+; SSE2: Found an estimated cost of 4 for instruction: %shl
+; SSE41: Found an estimated cost of 4 for instruction: %shl
+; AVX: Found an estimated cost of 4 for instruction: %shl
+; AVX2: Found an estimated cost of 1 for instruction: %shl
+; XOP: Found an estimated cost of 1 for instruction: %shl
+
+
+; v16i16 and v8i32 shift left by non-uniform constant are lowered into
+; vector multiply instructions. With AVX (but not AVX2), the vector multiply
+; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert.
+;
+; With AVX2, instruction vpmullw works with 256bit quantities and
+; therefore there is no need to split the resulting vector multiply into
+; a sequence of two multiply.
+;
+; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice
+; the cost computed in the case of 'test1'. That is because the backend
+; simply emits 2 pmullw with no extract/insert.
+
+
+define <16 x i16> @test6(<16 x i16> %a) {
+ %shl = shl <16 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
+ ret <16 x i16> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test6':
+; SSE2: Found an estimated cost of 2 for instruction: %shl
+; SSE41: Found an estimated cost of 2 for instruction: %shl
+; AVX: Found an estimated cost of 4 for instruction: %shl
+; AVX2: Found an estimated cost of 1 for instruction: %shl
+; XOPAVX: Found an estimated cost of 2 for instruction: %shl
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shl
+
+
+; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice
+; the cost computed in the case of 'test3'. That is because the multiply
+; is type-legalized into two 4i32 vector multiply.
+
+define <8 x i32> @test7(<8 x i32> %a) {
+ %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+ ret <8 x i32> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test7':
+; SSE2: Found an estimated cost of 12 for instruction: %shl
+; SSE41: Found an estimated cost of 2 for instruction: %shl
+; AVX: Found an estimated cost of 4 for instruction: %shl
+; AVX2: Found an estimated cost of 1 for instruction: %shl
+; XOPAVX: Found an estimated cost of 2 for instruction: %shl
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shl
+
+
+; On AVX2 we are able to lower the following shift into a single
+; vpsllvq. Therefore, the expected cost is only 1.
+; In all other cases, this shift is scalarized as the target does not support
+; vpsllv instructions.
+
+define <4 x i64> @test8(<4 x i64> %a) {
+ %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
+ ret <4 x i64> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test8':
+; SSE2: Found an estimated cost of 8 for instruction: %shl
+; SSE41: Found an estimated cost of 8 for instruction: %shl
+; AVX: Found an estimated cost of 8 for instruction: %shl
+; AVX2: Found an estimated cost of 1 for instruction: %shl
+; XOPAVX: Found an estimated cost of 2 for instruction: %shl
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shl
+
+
+; Same as 'test6', with the difference that the cost is double.
+
+define <32 x i16> @test9(<32 x i16> %a) {
+ %shl = shl <32 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
+ ret <32 x i16> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test9':
+; SSE2: Found an estimated cost of 4 for instruction: %shl
+; SSE41: Found an estimated cost of 4 for instruction: %shl
+; AVX: Found an estimated cost of 8 for instruction: %shl
+; AVX2: Found an estimated cost of 2 for instruction: %shl
+; XOPAVX: Found an estimated cost of 4 for instruction: %shl
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shl
+
+
+; Same as 'test7', except that now the cost is double.
+
+define <16 x i32> @test10(<16 x i32> %a) {
+ %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+ ret <16 x i32> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test10':
+; SSE2: Found an estimated cost of 24 for instruction: %shl
+; SSE41: Found an estimated cost of 4 for instruction: %shl
+; AVX: Found an estimated cost of 8 for instruction: %shl
+; AVX2: Found an estimated cost of 2 for instruction: %shl
+; XOPAVX: Found an estimated cost of 4 for instruction: %shl
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shl
+
+
+; On AVX2 we are able to lower the following shift into a sequence of
+; two vpsllvq instructions. Therefore, the expected cost is only 2.
+; In all other cases, this shift is scalarized as we don't have vpsllv
+; instructions.
+
+define <8 x i64> @test11(<8 x i64> %a) {
+ %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
+ ret <8 x i64> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test11':
+; SSE2: Found an estimated cost of 16 for instruction: %shl
+; SSE41: Found an estimated cost of 16 for instruction: %shl
+; AVX: Found an estimated cost of 16 for instruction: %shl
+; AVX2: Found an estimated cost of 2 for instruction: %shl
+; XOPAVX: Found an estimated cost of 4 for instruction: %shl
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shl
diff --git a/test/Analysis/CostModel/no_info.ll b/test/Analysis/CostModel/no_info.ll
index 5f3b56ad9cf8..931669b60176 100644
--- a/test/Analysis/CostModel/no_info.ll
+++ b/test/Analysis/CostModel/no_info.ll
@@ -5,9 +5,27 @@
; -- No triple in this module --
-;CHECK: cost of 1 {{.*}} add
-;CHECK: cost of 1 {{.*}} ret
+; CHECK-LABEL: function 'no_info'
+; CHECK: cost of 1 {{.*}} add
+; CHECK: cost of 1 {{.*}} ret
define i32 @no_info(i32 %arg) {
%e = add i32 %arg, %arg
ret i32 %e
}
+
+define i8 @addressing_mode_reg_reg(i8* %a, i32 %b) {
+; CHECK-LABEL: function 'addressing_mode_reg_reg'
+ %p = getelementptr i8, i8* %a, i32 %b ; NoTTI accepts reg+reg addressing.
+; CHECK: cost of 0 {{.*}} getelementptr
+ %v = load i8, i8* %p
+ ret i8 %v
+}
+
+; CHECK-LABEL: function 'addressing_mode_scaled_reg'
+define i32 @addressing_mode_scaled_reg(i32* %a, i32 %b) {
+ ; NoTTI rejects reg+scale*reg addressing.
+ %p = getelementptr i32, i32* %a, i32 %b
+; CHECK: cost of 1 {{.*}} getelementptr
+ %v = load i32, i32* %p
+ ret i32 %v
+}
diff --git a/test/Analysis/Delinearization/a.ll b/test/Analysis/Delinearization/a.ll
index 78bbfcf7de6e..917fc355726c 100644
--- a/test/Analysis/Delinearization/a.ll
+++ b/test/Analysis/Delinearization/a.ll
@@ -10,7 +10,7 @@
; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k>
; CHECK: Base offset: %A
; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes.
-; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>]
+; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<nw><%for.k>]
define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 {
entry:
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
index 317e62c8ef90..79d0c789704b 100644
--- a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
+++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
@@ -11,7 +11,7 @@
; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
; CHECK: Base offset: %A
; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
-; CHECK: ArrayRef[{3,+,1}<nw><%for.i>][{-4,+,1}<nw><%for.j>][{7,+,1}<nw><%for.k>]
+; CHECK: ArrayRef[{3,+,1}<nuw><%for.i>][{-4,+,1}<nw><%for.j>][{7,+,1}<nuw><nsw><%for.k>]
define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
entry:
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
index ada7758b21ba..f886d2ccd288 100644
--- a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
+++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
@@ -11,7 +11,7 @@
; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>,+,8}<%for.body6.us.us>
; CHECK: Base offset: %A
; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of 8 bytes.
-; CHECK: ArrayRef[{3,+,1}<nw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>]
+; CHECK: ArrayRef[{3,+,1}<nuw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>]
define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, double* nocapture %A) nounwind uwtable {
entry:
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
index 9e37b76e59b9..893c542c06a9 100644
--- a/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
+++ b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
@@ -11,7 +11,7 @@
; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
; CHECK: Base offset: %A
; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
-; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nw><%for.j>][{%r,+,1}<nw><%for.k>]
+; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nw><%for.j>][{%r,+,1}<nsw><%for.k>]
define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) {
entry:
diff --git a/test/Analysis/Delinearization/parameter_addrec_product.ll b/test/Analysis/Delinearization/parameter_addrec_product.ll
new file mode 100644
index 000000000000..561158eae739
--- /dev/null
+++ b/test/Analysis/Delinearization/parameter_addrec_product.ll
@@ -0,0 +1,56 @@
+; RUN: opt -delinearize -analyze < %s | FileCheck %s
+;
+; void foo(float *A, long *p) {
+; for (long i = 0; i < 100; i++)
+; for (long j = 0; j < 100; j++)
+; A[i * (*p) + j] += i + j;
+; }
+;
+; CHECK: ArrayDecl[UnknownSize][%pval] with elements of 4 bytes.
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%bb2>][{0,+,1}<nuw><nsw><%bb4>]
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo(float* %A, i64* %p) {
+bb:
+ br label %bb2
+
+bb2: ; preds = %bb16, %bb
+ %i.0 = phi i64 [ 0, %bb ], [ %tmp17, %bb16 ]
+ %exitcond1 = icmp ne i64 %i.0, 100
+ br i1 %exitcond1, label %bb3, label %bb18
+
+bb3: ; preds = %bb2
+ br label %bb4
+
+bb4: ; preds = %bb13, %bb3
+ %j.0 = phi i64 [ 0, %bb3 ], [ %tmp14, %bb13 ]
+ %exitcond = icmp ne i64 %j.0, 100
+ br i1 %exitcond, label %bb5, label %bb15
+
+bb5: ; preds = %bb4
+ %tmp = add nuw nsw i64 %i.0, %j.0
+ %tmp6 = sitofp i64 %tmp to float
+ %pval = load i64, i64* %p, align 8
+ %tmp8 = mul nsw i64 %i.0, %pval
+ %tmp9 = add nsw i64 %tmp8, %j.0
+ %tmp10 = getelementptr inbounds float, float* %A, i64 %tmp9
+ %tmp11 = load float, float* %tmp10, align 4
+ %tmp12 = fadd float %tmp11, %tmp6
+ store float %tmp12, float* %tmp10, align 4
+ br label %bb13
+
+bb13: ; preds = %bb5
+ %tmp14 = add nuw nsw i64 %j.0, 1
+ br label %bb4
+
+bb15: ; preds = %bb4
+ br label %bb16
+
+bb16: ; preds = %bb15
+ %tmp17 = add nuw nsw i64 %i.0, 1
+ br label %bb2
+
+bb18: ; preds = %bb2
+ ret void
+}
diff --git a/test/Analysis/DemandedBits/basic.ll b/test/Analysis/DemandedBits/basic.ll
new file mode 100644
index 000000000000..487e522e9dbc
--- /dev/null
+++ b/test/Analysis/DemandedBits/basic.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s
+
+; CHECK-LABEL: 'test_mul'
+; CHECK-DAG: DemandedBits: 0xFF for %1 = add nsw i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8
+; CHECK-DAG: DemandedBits: 0xFF for %2 = mul nsw i32 %1, %b
+define i8 @test_mul(i32 %a, i32 %b) {
+ %1 = add nsw i32 %a, 5
+ %2 = mul nsw i32 %1, %b
+ %3 = trunc i32 %2 to i8
+ ret i8 %3
+}
+
+; CHECK-LABEL: 'test_icmp1'
+; CHECK-DAG: DemandedBits: 0x1 for %3 = icmp eq i32 %1, %2
+; CHECK-DAG: DemandedBits: 0xFFF for %1 = and i32 %a, 255
+; CHECK-DAG: DemandedBits: 0xFFF for %2 = shl i32 %1, 4
+define i1 @test_icmp1(i32 %a, i32 %b) {
+ %1 = and i32 %a, 255
+ %2 = shl i32 %1, 4
+ %3 = icmp eq i32 %1, %2
+ ret i1 %3
+}
+
+; CHECK-LABEL: 'test_icmp2'
+; CHECK-DAG: DemandedBits: 0x1 for %3 = icmp eq i32 %1, %2
+; CHECK-DAG: DemandedBits: 0xFF for %1 = and i32 %a, 255
+; CHECK-DAG: DemandedBits: 0xF for %2 = ashr i32 %1, 4
+define i1 @test_icmp2(i32 %a, i32 %b) {
+ %1 = and i32 %a, 255
+ %2 = ashr i32 %1, 4
+ %3 = icmp eq i32 %1, %2
+ ret i1 %3
+}
diff --git a/test/Analysis/DependenceAnalysis/GCD.ll b/test/Analysis/DependenceAnalysis/GCD.ll
index 81d05a10cf1c..f9749d51bb33 100644
--- a/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/test/Analysis/DependenceAnalysis/GCD.ll
@@ -269,10 +269,10 @@ entry:
; CHECK: da analyze - none!
; DELIN: 'Dependence Analysis' for function 'gcd4'
-; DELIN: da analyze - output [* *]!
+; DELIN: da analyze - none!
; DELIN: da analyze - none!
; DELIN: da analyze - confused!
-; DELIN: da analyze - input [* *]!
+; DELIN: da analyze - none!
; DELIN: da analyze - confused!
; DELIN: da analyze - none!
@@ -339,10 +339,10 @@ entry:
; CHECK: da analyze - none!
; DELIN: 'Dependence Analysis' for function 'gcd5'
-; DELIN: da analyze - output [* *]!
-; DELIN: da analyze - flow [<> *]!
+; DELIN: da analyze - none!
+; DELIN: da analyze - flow [> *]!
; DELIN: da analyze - confused!
-; DELIN: da analyze - input [* *]!
+; DELIN: da analyze - none!
; DELIN: da analyze - confused!
; DELIN: da analyze - none!
@@ -410,10 +410,10 @@ entry:
; CHECK: da analyze - output [* *]!
; DELIN: 'Dependence Analysis' for function 'gcd6'
-; DELIN: da analyze - output [* *]!
+; DELIN: da analyze - none!
; DELIN: da analyze - none!
; DELIN: da analyze - confused!
-; DELIN: da analyze - input [* *]!
+; DELIN: da analyze - none!
; DELIN: da analyze - confused!
; DELIN: da analyze - output [* *]!
diff --git a/test/Analysis/DependenceAnalysis/NonAffineExpr.ll b/test/Analysis/DependenceAnalysis/NonAffineExpr.ll
new file mode 100644
index 000000000000..265533797cec
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/NonAffineExpr.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -analyze -basicaa -da-delinearize -da
+;
+; CHECK: da analyze - consistent input [S S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *]!
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
+target triple = "thumbv7--linux-gnueabi"
+
+define void @f(i32** %a, i32 %n) align 2 {
+for.preheader:
+ %t.0 = ashr exact i32 %n, 3
+ br label %for.body.1
+
+for.body.1:
+ %i.1 = phi i32 [ %t.5, %for.inc ], [ 0, %for.preheader ]
+ %i.2 = phi i32 [ %i.5, %for.inc ], [ %t.0, %for.preheader ]
+ br i1 undef, label %for.inc, label %for.body.2
+
+for.body.2:
+ %i.3 = phi i32 [ %t.1, %for.body.2 ], [ %i.1, %for.body.1 ]
+ %t.1 = add i32 %i.3, 1
+ %t.2 = load i32*, i32** %a, align 4
+ %t.3 = getelementptr inbounds i32, i32* %t.2, i32 %i.3
+ %t.4 = load i32, i32* %t.3, align 4
+ br i1 undef, label %for.inc, label %for.body.2
+
+for.inc:
+ %i.4 = phi i32 [ %i.2, %for.body.1 ], [ %i.2, %for.body.2 ]
+ %t.5 = add i32 %i.1, %i.4
+ %i.5 = add i32 %i.2, -1
+ br i1 undef, label %for.exit, label %for.body.1
+
+for.exit:
+ ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/PR21585.ll b/test/Analysis/DependenceAnalysis/PR21585.ll
index bb62c68918f7..23af449ad51e 100644
--- a/test/Analysis/DependenceAnalysis/PR21585.ll
+++ b/test/Analysis/DependenceAnalysis/PR21585.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -basicaa -globalsmodref-aa -da | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -globals-aa -da | FileCheck %s
define void @i32_subscript(i32* %a) {
entry:
br label %for.body
diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
new file mode 100644
index 000000000000..63e6c5c50ded
--- /dev/null
+++ b/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
@@ -0,0 +1,16 @@
+; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
+
+; CHECK: DIVERGENT:
+; CHECK-NOT: %arg0
+; CHECK-NOT: %arg1
+; CHECK-NOT; %arg2
+; CHECK: <2 x i32> %arg3
+; CHECK: DIVERGENT: <3 x i32> %arg4
+; CHECK: DIVERGENT: float %arg5
+; CHECK: DIVERGENT: i32 %arg6
+
+define void @main([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
+ ret void
+}
+
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg b/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..2a665f06be72
--- /dev/null
+++ b/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll b/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
index 9dd3d557f8cd..fc63038e77cc 100644
--- a/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
+++ b/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
@@ -185,14 +185,35 @@ else:
ret i32 1
}
+; Verifies sync-dependence is computed correctly in the absense of loops.
+define i32 @sync_no_loop(i32 %arg) {
+entry:
+ %0 = add i32 %arg, 1
+ %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %1 = icmp sge i32 %tid, 10
+ br i1 %1, label %bb1, label %bb2
+
+bb1:
+ br label %bb3
+
+bb2:
+ br label %bb3
+
+bb3:
+ %2 = add i32 %0, 2
+ ; CHECK-NOT: DIVERGENT: %2
+ ret i32 %2
+}
+
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
declare i32 @llvm.ptx.read.laneid()
-!nvvm.annotations = !{!0, !1, !2, !3, !4}
+!nvvm.annotations = !{!0, !1, !2, !3, !4, !5}
!0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1}
!1 = !{i32 (i32, i32)* @sync, !"kernel", i32 1}
!2 = !{i32 (i32, i32, i32)* @mixed, !"kernel", i32 1}
!3 = !{i32 ()* @loop, !"kernel", i32 1}
!4 = !{i32 (i1)* @unstructured_loop, !"kernel", i32 1}
+!5 = !{i32 (i32)* @sync_no_loop, !"kernel", i32 1}
diff --git a/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll b/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
index 513ec86ef031..37796a9ceb32 100644
--- a/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
+++ b/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalsmodref-aa -gvn -S | FileCheck %s
+; RUN: opt < %s -globals-aa -gvn -S | FileCheck %s
@g = internal global i32 0 ; <i32*> [#uses=2]
diff --git a/test/Analysis/GlobalsModRef/aliastest.ll b/test/Analysis/GlobalsModRef/aliastest.ll
index 3474e130de8c..ecc6bcc55d1d 100644
--- a/test/Analysis/GlobalsModRef/aliastest.ll
+++ b/test/Analysis/GlobalsModRef/aliastest.ll
@@ -1,4 +1,7 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -globals-aa -gvn -S -enable-unsafe-globalsmodref-alias-results | FileCheck %s
+;
+; Note that this test relies on an unsafe feature of GlobalsModRef. While this
+; test is correct and safe, GMR's technique for handling this isn't generally.
@X = internal global i32 4 ; <i32*> [#uses=1]
diff --git a/test/Analysis/GlobalsModRef/argmemonly-escape.ll b/test/Analysis/GlobalsModRef/argmemonly-escape.ll
new file mode 100644
index 000000000000..64c625810af9
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/argmemonly-escape.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -O1 -S -enable-non-lto-gmr=true | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@a = internal global [3 x i32] zeroinitializer, align 4
+
+; The important thing we're checking for here is the reload of (some element of)
+; @a after the memset.
+
+; CHECK-LABEL: @main
+; CHECK: load i32, i32* getelementptr {{.*}} @a
+; CHECK-NEXT: call void @memsetp0i8i64{{.*}} @a
+; CHECK-NEXT: load i32, i32* getelementptr {{.*}} @a
+; CHECK-NEXT: call void @memsetp0i8i64A{{.*}} @a
+; CHECK-NEXT: load i32, i32* getelementptr {{.*}} @a
+; CHECK: icmp eq
+; CHECK: br i1
+
+define i32 @main() {
+entry:
+ %0 = bitcast [3 x i32]* @a to i8*
+ %1 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
+ call void @memsetp0i8i64(i8* %0, i8 0, i64 4, i32 4, i1 false)
+ %2 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
+ call void @memsetp0i8i64A(i8* %0, i8 0, i64 4, i32 4, i1 false)
+ %3 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
+ %4 = add i32 %2, %3
+ %cmp1 = icmp eq i32 %1, %4
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %entr
+ call void @abort() #3
+ unreachable
+
+if.end: ; preds = %entry
+ ret i32 0
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @memsetp0i8i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly
+
+; Function Attrs: nounwind inaccessiblemem_or_argmemonly
+declare void @memsetp0i8i64A(i8* nocapture, i8, i64, i32, i1) nounwind inaccessiblemem_or_argmemonly
+
+; Function Attrs: noreturn nounwind
+declare void @abort() noreturn nounwind
diff --git a/test/Analysis/GlobalsModRef/atomic-instrs.ll b/test/Analysis/GlobalsModRef/atomic-instrs.ll
new file mode 100644
index 000000000000..d2ae830ff87a
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/atomic-instrs.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -globals-aa -gvn -S | FileCheck %s
+
+@X = internal global i32 4
+
+define i32 @test_cmpxchg(i32* %P) {
+; CHECK-LABEL: @test_cmpxchg
+; CHECK-NEXT: store i32 12, i32* @X
+; CHECK-NEXT: call void @modrefX_cmpxchg()
+; CHECK-NEXT: %V = load i32, i32* @X
+; CHECK-NEXT: ret i32 %V
+ store i32 12, i32* @X
+ call void @modrefX_cmpxchg()
+ %V = load i32, i32* @X
+ ret i32 %V
+}
+
+define void @modrefX_cmpxchg() {
+ %1 = cmpxchg i32* @X, i32 0, i32 -1 monotonic monotonic
+ ret void
+}
+
+define i32 @test_atomicrmw(i32* %P) {
+; CHECK-LABEL: @test_atomicrmw
+; CHECK-NEXT: store i32 12, i32* @X
+; CHECK-NEXT: call void @modrefXatomicrmw()
+; CHECK-NEXT: %V = load i32, i32* @X
+; CHECK-NEXT: ret i32 %V
+ store i32 12, i32* @X
+ call void @modrefXatomicrmw()
+ %V = load i32, i32* @X
+ ret i32 %V
+}
+
+define void @modrefXatomicrmw() {
+ %1 = atomicrmw add i32* @X, i32 1 acquire
+ ret void
+}
diff --git a/test/Analysis/GlobalsModRef/chaining-analysis.ll b/test/Analysis/GlobalsModRef/chaining-analysis.ll
index 26671daf3480..a12d7371524a 100644
--- a/test/Analysis/GlobalsModRef/chaining-analysis.ll
+++ b/test/Analysis/GlobalsModRef/chaining-analysis.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -globals-aa -gvn -S | FileCheck %s
; This test requires the use of previous analyses to determine that
; doesnotmodX does not modify X (because 'sin' doesn't).
diff --git a/test/Analysis/GlobalsModRef/indirect-global.ll b/test/Analysis/GlobalsModRef/indirect-global.ll
index 028132324674..a51f54b07503 100644
--- a/test/Analysis/GlobalsModRef/indirect-global.ll
+++ b/test/Analysis/GlobalsModRef/indirect-global.ll
@@ -1,4 +1,7 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -instcombine -S | FileCheck %s
+; RUN: opt < %s -basicaa -globals-aa -gvn -instcombine -S -enable-unsafe-globalsmodref-alias-results | FileCheck %s
+;
+; Note that this test relies on an unsafe feature of GlobalsModRef. While this
+; test is correct and safe, GMR's technique for handling this isn't generally.
@G = internal global i32* null ; <i32**> [#uses=3]
diff --git a/test/Analysis/GlobalsModRef/memset-escape.ll b/test/Analysis/GlobalsModRef/memset-escape.ll
new file mode 100644
index 000000000000..8da375ad8775
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/memset-escape.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -O1 -S -enable-non-lto-gmr=true | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@a = internal global [3 x i32] zeroinitializer, align 4
+@b = common global i32 0, align 4
+
+; The important thing we're checking for here is the reload of (some element of)
+; @a after the memset.
+
+; CHECK-LABEL: @main
+; CHECK: call void @llvm.memset.p0i8.i64{{.*}} @a
+; CHECK: store i32 3
+; CHECK: load i32, i32* getelementptr {{.*}} @a
+; CHECK: icmp eq i32
+; CHECK: br i1
+
+define i32 @main() {
+entry:
+ %retval = alloca i32, align 4
+ %c = alloca [1 x i32], align 4
+ store i32 0, i32* %retval, align 4
+ %0 = bitcast [1 x i32]* %c to i8*
+ call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 4, i32 4, i1 false)
+ store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
+ store i32 0, i32* @b, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %1 = load i32, i32* @b, align 4
+ %cmp = icmp slt i32 %1, 3
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* @b, align 4
+ %idxprom = sext i32 %2 to i64
+ %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* @a, i64 0, i64 %idxprom
+ store i32 0, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %3 = load i32, i32* @b, align 4
+ %inc = add nsw i32 %3, 1
+ store i32 %inc, i32* @b, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %4 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
+ %cmp1 = icmp ne i32 %4, 0
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.end
+ call void @abort() #3
+ unreachable
+
+if.end: ; preds = %for.end
+ ret i32 0
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly
+
+; Function Attrs: noreturn nounwind
+declare void @abort() noreturn nounwind
diff --git a/test/Analysis/GlobalsModRef/modreftest.ll b/test/Analysis/GlobalsModRef/modreftest.ll
index 74101e23bed9..2018b149fc06 100644
--- a/test/Analysis/GlobalsModRef/modreftest.ll
+++ b/test/Analysis/GlobalsModRef/modreftest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -globals-aa -gvn -S | FileCheck %s
@X = internal global i32 4 ; <i32*> [#uses=2]
@@ -16,3 +16,23 @@ define i32 @test(i32* %P) {
define void @doesnotmodX() {
ret void
}
+
+declare void @InaccessibleMemOnlyFunc( ) #0
+declare void @InaccessibleMemOrArgMemOnlyFunc( ) #1
+
+define i32 @test2(i32* %P) {
+; CHECK: @test2
+; CHECK-NEXT: store i32 12, i32* @X
+; CHECK-NEXT: call void @InaccessibleMemOnlyFunc()
+; CHECK-NEXT: call void @InaccessibleMemOrArgMemOnlyFunc()
+; CHECK-NOT: load i32
+; CHECK-NEXT: ret i32 12
+ store i32 12, i32* @X
+ call void @InaccessibleMemOnlyFunc( )
+ call void @InaccessibleMemOrArgMemOnlyFunc( )
+ %V = load i32, i32* @X ; <i32> [#uses=1]
+ ret i32 %V
+}
+
+attributes #0 = { inaccessiblememonly }
+attributes #1 = { inaccessiblemem_or_argmemonly }
diff --git a/test/Analysis/GlobalsModRef/nocapture.ll b/test/Analysis/GlobalsModRef/nocapture.ll
new file mode 100644
index 000000000000..0cb80a10f8da
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/nocapture.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -globals-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@a = internal global i32 0, align 4
+@b = internal global i32 0, align 4
+
+define void @g(i32* %p, void (i32*)* nocapture %ptr) {
+entry:
+ tail call void %ptr(i32* %p) #1
+ ret void
+}
+
+; CHECK-LABEL: Function: f
+; CHECK: MayAlias: i32* %p, i32* @a
+; CHECK: MayAlias: i32* %q, i32* @a
+define i32 @f(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) {
+entry:
+ tail call void @g(i32* nonnull @a, void (i32*)* %ptr)
+ %arrayidx = getelementptr inbounds i32, i32* %p, i64 0
+ %z1 = load i32, i32* %arrayidx, align 4
+ %z2 = load i32, i32* %q, align 4
+ %add = add nsw i32 %z2, %z1
+ store i32 %add, i32* %q, align 4
+ ret i32 4
+}
+
+define void @g2(i32* nocapture %p, void (i32*)* nocapture %ptr) {
+entry:
+ tail call void %ptr(i32* %p) #1
+ ret void
+}
+
+; CHECK-LABEL: Function: f2
+; CHECK: NoAlias: i32* %p, i32* @b
+; CHECK: NoAlias: i32* %q, i32* @b
+define i32 @f2(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) {
+entry:
+ tail call void @g2(i32* nonnull @b, void (i32*)* %ptr)
+ %arrayidx = getelementptr inbounds i32, i32* %p, i64 0
+ %z1 = load i32, i32* %arrayidx, align 4
+ %z2 = load i32, i32* %q, align 4
+ %add = add nsw i32 %z2, %z1
+ store i32 %add, i32* %q, align 4
+ ret i32 4
+}
+
+declare void @g3()
+
+; CHECK-LABEL: Function: f3
+; CHECK: NoAlias: i32* %p, i32* @b
+define void @f3(i32* nocapture readonly %p) {
+entry:
+ tail call void @g3() [ "deopt"(i32* @b, i32 *%p) ]
+ unreachable
+}
diff --git a/test/Analysis/GlobalsModRef/nonescaping-noalias.ll b/test/Analysis/GlobalsModRef/nonescaping-noalias.ll
new file mode 100644
index 000000000000..8ce9551edec7
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/nonescaping-noalias.ll
@@ -0,0 +1,116 @@
+; RUN: opt < %s -globals-aa -gvn -S | FileCheck %s
+;
+; This tests the safe no-alias conclusions of GMR -- when there is
+; a non-escaping global as one indentified underlying object and some pointer
+; that would inherently have escaped any other function as the other underlying
+; pointer of an alias query.
+
+@g1 = internal global i32 0
+
+define i32 @test1(i32* %param) {
+; Ensure that we can fold a store to a load of a global across a store to
+; a parameter when the global is non-escaping.
+;
+; CHECK-LABEL: @test1(
+; CHECK: store i32 42, i32* @g1
+; CHECK-NOT: load i32
+; CHECK: ret i32 42
+entry:
+ store i32 42, i32* @g1
+ store i32 7, i32* %param
+ %v = load i32, i32* @g1
+ ret i32 %v
+}
+
+declare i32* @f()
+
+define i32 @test2() {
+; Ensure that we can fold a store to a load of a global across a store to
+; the pointer returned by a function call. Since the global could not escape,
+; this function cannot be returning its address.
+;
+; CHECK-LABEL: @test2(
+; CHECK: store i32 42, i32* @g1
+; CHECK-NOT: load i32
+; CHECK: ret i32 42
+entry:
+ %ptr = call i32* @f() readnone
+ store i32 42, i32* @g1
+ store i32 7, i32* %ptr
+ %v = load i32, i32* @g1
+ ret i32 %v
+}
+
+@g2 = external global i32*
+
+define i32 @test3() {
+; Ensure that we can fold a store to a load of a global across a store to
+; the pointer loaded from that global. Because the global does not escape, it
+; cannot alias a pointer loaded out of a global.
+;
+; CHECK-LABEL: @test3(
+; CHECK: store i32 42, i32* @g1
+; CHECK: store i32 7, i32*
+; CHECK-NOT: load i32
+; CHECK: ret i32 42
+entry:
+ store i32 42, i32* @g1
+ %ptr1 = load i32*, i32** @g2
+ store i32 7, i32* %ptr1
+ %v = load i32, i32* @g1
+ ret i32 %v
+}
+
+@g3 = internal global i32 1
+@g4 = internal global [10 x i32*] zeroinitializer
+
+define i32 @test4(i32* %param, i32 %n, i1 %c1, i1 %c2, i1 %c3) {
+; Ensure that we can fold a store to a load of a global across a store to
+; the pointer loaded from that global even when the load is behind PHIs and
+; selects, and there is a mixture of a load and another global or argument.
+; Note that we can't eliminate the load here because it is used in a PHI and
+; GVN doesn't try to do real DCE. The store is still forwarded by GVN though.
+;
+; CHECK-LABEL: @test4(
+; CHECK: store i32 42, i32* @g1
+; CHECK: store i32 7, i32*
+; CHECK: ret i32 42
+entry:
+ %call = call i32* @f()
+ store i32 42, i32* @g1
+ %ptr1 = load i32*, i32** @g2
+ %ptr2 = select i1 %c1, i32* %ptr1, i32* %param
+ %ptr3 = select i1 %c3, i32* %ptr2, i32* @g3
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %ptr = phi i32* [ %ptr3, %entry ], [ %ptr5, %loop ]
+ store i32 7, i32* %ptr
+ %ptr4 = load i32*, i32** getelementptr ([10 x i32*], [10 x i32*]* @g4, i32 0, i32 1)
+ %ptr5 = select i1 %c2, i32* %ptr4, i32* %call
+ %inc = add i32 %iv, 1
+ %test = icmp slt i32 %inc, %n
+ br i1 %test, label %loop, label %exit
+
+exit:
+ %v = load i32, i32* @g1
+ ret i32 %v
+}
+
+define i32 @test5(i32** %param) {
+; Ensure that we can fold a store to a load of a global across a store to
+; a parameter that has been dereferenced when the global is non-escaping.
+;
+; CHECK-LABEL: @test5(
+; CHECK: %p = load i32*
+; CHECK: store i32 42, i32* @g1
+; CHECK-NOT: load i32
+; CHECK: ret i32 42
+entry:
+ %p = load i32*, i32** %param
+ store i32 42, i32* @g1
+ store i32 7, i32* %p
+ %v = load i32, i32* @g1
+ ret i32 %v
+}
diff --git a/test/Analysis/GlobalsModRef/pr12351.ll b/test/Analysis/GlobalsModRef/pr12351.ll
index 8f922770b306..5d299cd2e917 100644
--- a/test/Analysis/GlobalsModRef/pr12351.ll
+++ b/test/Analysis/GlobalsModRef/pr12351.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -globals-aa -gvn -S | FileCheck %s
declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
define void @foo(i8* %x, i8* %y) {
diff --git a/test/Analysis/GlobalsModRef/pr25309.ll b/test/Analysis/GlobalsModRef/pr25309.ll
new file mode 100644
index 000000000000..bca5e9307656
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/pr25309.ll
@@ -0,0 +1,27 @@
+; RUN: opt -globals-aa -gvn < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; @o and @m are initialized to @i, so they should not be classed as
+; indirect globals referring only to allocation functions.
+@o = internal global i32* @i, align 8
+@m = internal global i32* @i, align 8
+@i = internal global i32 0, align 4
+
+; CHECK-LABEL: @f
+define i1 @f() {
+entry:
+ %0 = load i32*, i32** @o, align 8
+ store i32 0, i32* %0, align 4
+ %1 = load volatile i32*, i32** @m, align 8
+ store i32 1, i32* %1, align 4
+ ; CHECK: %[[a:.*]] = load i32*
+ %2 = load i32*, i32** @o, align 8
+ ; CHECK: %[[b:.*]] = load i32, i32* %[[a]]
+ %3 = load i32, i32* %2, align 4
+ ; CHECK: %[[c:.*]] = icmp ne i32 %[[b]], 0
+ %tobool.i = icmp ne i32 %3, 0
+ ; CHECK: ret i1 %[[c]]
+ ret i1 %tobool.i
+}
diff --git a/test/Analysis/GlobalsModRef/purecse.ll b/test/Analysis/GlobalsModRef/purecse.ll
index e030417f9552..8e2bfebc8af4 100644
--- a/test/Analysis/GlobalsModRef/purecse.ll
+++ b/test/Analysis/GlobalsModRef/purecse.ll
@@ -1,5 +1,5 @@
; Test that pure functions are cse'd away
-; RUN: opt < %s -globalsmodref-aa -gvn -instcombine -S | FileCheck %s
+; RUN: opt < %s -disable-basicaa -globals-aa -gvn -instcombine -S | FileCheck %s
define i32 @pure(i32 %X) {
%Y = add i32 %X, 1 ; <i32> [#uses=1]
diff --git a/test/Analysis/GlobalsModRef/weak-interposition.ll b/test/Analysis/GlobalsModRef/weak-interposition.ll
new file mode 100644
index 000000000000..091aa74d5217
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/weak-interposition.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -O1 -enable-non-lto-gmr=true < %s | FileCheck %s
+
+@a = common global i32 0, align 4
+
+; @hook_me is weak, so GMR must not eliminate the reload of @a in @f,
+; even though @hook_me doesn't mod or ref @a.
+
+; Function Attrs: nounwind ssp uwtable
+define weak i32 @hook_me() {
+ ret i32 0
+}
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @f() {
+ %1 = alloca i32, align 4
+ store i32 4, i32* @a, align 4
+ %2 = call i32 @hook_me()
+ ; CHECK: load i32, i32* @a, align 4
+ %3 = load i32, i32* @a, align 4
+ %4 = add nsw i32 %3, %2
+ store i32 %4, i32* @a, align 4
+ %5 = load i32, i32* %1
+ ret i32 %5
+}
diff --git a/test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll b/test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll
index 01782e0f2c47..89a21e542f74 100644
--- a/test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll
+++ b/test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll
@@ -1,7 +1,7 @@
; RUN: opt -S -disable-output -passes=print-cg < %s 2>&1 | FileCheck %s
declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
define private void @f() {
ret void
@@ -12,7 +12,7 @@ define void @calls_statepoint(i8 addrspace(1)* %arg) gc "statepoint-example" {
; CHECK-NEXT: -> f
entry:
%cast = bitcast i8 addrspace(1)* %arg to i64 addrspace(1)*
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
ret void
}
diff --git a/test/Analysis/Lint/cppeh-catch-intrinsics.ll b/test/Analysis/Lint/cppeh-catch-intrinsics.ll
deleted file mode 100644
index 19480a2f60fe..000000000000
--- a/test/Analysis/Lint/cppeh-catch-intrinsics.ll
+++ /dev/null
@@ -1,278 +0,0 @@
-; RUN: opt -lint -disable-output < %s 2>&1 | FileCheck %s
-
-; This test is meant to prove that the Verifier is able to identify a variety
-; of errors with the llvm.eh.begincatch and llvm.eh.endcatch intrinsics.
-; See cppeh-catch-intrinsics-clean for correct uses.
-
-target triple = "x86_64-pc-windows-msvc"
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-declare void @llvm.eh.endcatch()
-
-@_ZTIi = external constant i8*
-
-; Function Attrs: uwtable
-define void @test_missing_endcatch() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch
-; CHECK-NEXT: call void @llvm.eh.begincatch(i8* %exn, i8* null)
-entry:
- invoke void @_Z9may_throwv()
- to label %try.cont unwind label %lpad
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* bitcast (i8** @_ZTIi to i8*)
- %exn = extractvalue { i8*, i32 } %0, 0
- %sel = extractvalue { i8*, i32 } %0, 1
- %1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
- %matches = icmp eq i32 %sel, %1
- br i1 %matches, label %catch, label %eh.resume
-
-catch: ; preds = %lpad
- call void @llvm.eh.begincatch(i8* %exn, i8* null)
- call void @_Z10handle_intv()
- br label %invoke.cont2
-
-invoke.cont2: ; preds = %catch
- br label %try.cont
-
-try.cont: ; preds = %invoke.cont2, %entry
- ret void
-
-eh.resume: ; preds = %catch.dispatch
- resume { i8*, i32 } %0
-}
-
-; Function Attrs: uwtable
-define void @test_missing_begincatch() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch
-; CHECK-NEXT: call void @llvm.eh.endcatch()
-entry:
- invoke void @_Z9may_throwv()
- to label %try.cont unwind label %lpad
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* bitcast (i8** @_ZTIi to i8*)
- %exn = extractvalue { i8*, i32 } %0, 0
- %sel = extractvalue { i8*, i32 } %0, 1
- %1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
- %matches = icmp eq i32 %sel, %1
- br i1 %matches, label %catch, label %eh.resume
-
-catch: ; preds = %lpad
- call void @_Z10handle_intv()
- br label %invoke.cont2
-
-invoke.cont2: ; preds = %catch
- call void @llvm.eh.endcatch()
- br label %try.cont
-
-try.cont: ; preds = %invoke.cont2, %entry
- ret void
-
-eh.resume: ; preds = %catch.dispatch
- resume { i8*, i32 } %0
-}
-
-; Function Attrs: uwtable
-define void @test_multiple_begin() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: llvm.eh.begincatch may be called a second time before llvm.eh.endcatch
-; CHECK-NEXT: call void @llvm.eh.begincatch(i8* %exn, i8* null)
-; CHECK-NEXT: call void @llvm.eh.begincatch(i8* %exn, i8* null)
-entry:
- invoke void @_Z9may_throwv()
- to label %try.cont unwind label %lpad
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* bitcast (i8** @_ZTIi to i8*)
- %exn = extractvalue { i8*, i32 } %0, 0
- %sel = extractvalue { i8*, i32 } %0, 1
- %1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
- %matches = icmp eq i32 %sel, %1
- br i1 %matches, label %catch, label %eh.resume
-
-catch: ; preds = %lpad
- call void @llvm.eh.begincatch(i8* %exn, i8* null)
- call void @_Z10handle_intv()
- br label %invoke.cont2
-
-invoke.cont2: ; preds = %catch
- call void @llvm.eh.begincatch(i8* %exn, i8* null)
- call void @llvm.eh.endcatch()
- br label %try.cont
-
-try.cont: ; preds = %invoke.cont2, %entry
- ret void
-
-eh.resume: ; preds = %catch.dispatch
- resume { i8*, i32 } %0
-}
-
-; Function Attrs: uwtable
-define void @test_multiple_end() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: llvm.eh.endcatch may be called a second time after llvm.eh.begincatch
-; CHECK-NEXT: call void @llvm.eh.endcatch()
-; CHECK-NEXT: call void @llvm.eh.endcatch()
-entry:
- invoke void @_Z9may_throwv()
- to label %try.cont unwind label %lpad
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* bitcast (i8** @_ZTIi to i8*)
- %exn = extractvalue { i8*, i32 } %0, 0
- %sel = extractvalue { i8*, i32 } %0, 1
- %1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
- %matches = icmp eq i32 %sel, %1
- br i1 %matches, label %catch, label %eh.resume
-
-catch: ; preds = %lpad
- call void @llvm.eh.begincatch(i8* %exn, i8* null)
- call void @_Z10handle_intv()
- call void @llvm.eh.endcatch()
- br label %invoke.cont2
-
-invoke.cont2: ; preds = %catch
- call void @llvm.eh.endcatch()
- br label %try.cont
-
-try.cont: ; preds = %invoke.cont2, %entry
- ret void
-
-eh.resume: ; preds = %catch.dispatch
- resume { i8*, i32 } %0
-}
-
-
-; Function Attrs: uwtable
-define void @test_begincatch_without_lpad() {
-; CHECK: llvm.eh.begincatch may be reachable without passing a landingpad
-; CHECK-NEXT: call void @llvm.eh.begincatch(i8* %exn, i8* null)
-entry:
- %exn = alloca i8
- call void @llvm.eh.begincatch(i8* %exn, i8* null)
- call void @_Z10handle_intv()
- br label %invoke.cont2
-
-invoke.cont2: ; preds = %catch
- call void @llvm.eh.endcatch()
- br label %try.cont
-
-try.cont: ; preds = %invoke.cont2, %entry
- ret void
-}
-
-; Function Attrs: uwtable
-define void @test_branch_to_begincatch_with_no_lpad(i32 %fake.sel) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: llvm.eh.begincatch may be reachable without passing a landingpad
-; CHECK-NEXT: call void @llvm.eh.begincatch(i8* %exn2, i8* null)
-entry:
- %fake.exn = alloca i8
- invoke void @_Z9may_throwv()
- to label %catch unwind label %lpad
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* bitcast (i8** @_ZTIi to i8*)
- %exn = extractvalue { i8*, i32 } %0, 0
- %sel = extractvalue { i8*, i32 } %0, 1
- %1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
- %matches = icmp eq i32 %sel, %1
- br i1 %matches, label %catch, label %eh.resume
-
- invoke void @_Z9may_throwv()
- to label %try.cont unwind label %lpad
-
-catch: ; preds = %lpad, %entry
- %exn2 = phi i8* [%exn, %lpad], [%fake.exn, %entry]
- %sel2 = phi i32 [%sel, %lpad], [%fake.sel, %entry]
- call void @llvm.eh.begincatch(i8* %exn2, i8* null)
- call void @_Z10handle_intv()
- %matches1 = icmp eq i32 %sel2, 0
- br i1 %matches1, label %invoke.cont2, label %invoke.cont3
-
-invoke.cont2: ; preds = %catch
- call void @llvm.eh.endcatch()
- br label %try.cont
-
-invoke.cont3: ; preds = %catch
- call void @llvm.eh.endcatch()
- br label %eh.resume
-
-try.cont: ; preds = %invoke.cont2
- ret void
-
-eh.resume: ; preds = %catch.dispatch
- %lpad.val = insertvalue { i8*, i32 } undef, i32 0, 1
- resume { i8*, i32 } %lpad.val
-}
-
-; Function Attrs: uwtable
-define void @test_branch_missing_endcatch() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch
-; CHECK-NEXT: call void @llvm.eh.begincatch(i8* %exn2, i8* null)
-entry:
- invoke void @_Z9may_throwv()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont:
- invoke void @_Z9may_throwv()
- to label %invoke.cont unwind label %lpad1
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* bitcast (i8** @_ZTIi to i8*)
- %exn = extractvalue { i8*, i32 } %0, 0
- %sel = extractvalue { i8*, i32 } %0, 1
- %1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
- %matches = icmp eq i32 %sel, %1
- br i1 %matches, label %catch, label %eh.resume
-
- invoke void @_Z9may_throwv()
- to label %try.cont unwind label %lpad
-
-lpad1: ; preds = %entry
- %l1.0 = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (i8** @_ZTIi to i8*)
- %exn1 = extractvalue { i8*, i32 } %l1.0, 0
- %sel1 = extractvalue { i8*, i32 } %l1.0, 1
- %l1.1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
- %matchesl1 = icmp eq i32 %sel1, %l1.1
- br i1 %matchesl1, label %catch, label %eh.resume
-
-catch: ; preds = %lpad, %lpad1
- %exn2 = phi i8* [%exn, %lpad], [%exn1, %lpad1]
- %sel2 = phi i32 [%sel, %lpad], [%sel1, %lpad1]
- call void @llvm.eh.begincatch(i8* %exn2, i8* null)
- call void @_Z10handle_intv()
- %matches1 = icmp eq i32 %sel2, 0
- br i1 %matches1, label %invoke.cont2, label %invoke.cont3
-
-invoke.cont2: ; preds = %catch
- call void @llvm.eh.endcatch()
- br label %try.cont
-
-invoke.cont3: ; preds = %catch
- br label %eh.resume
-
-try.cont: ; preds = %invoke.cont2, %entry
- ret void
-
-eh.resume: ; preds = %catch.dispatch
- %lpad.val = insertvalue { i8*, i32 } undef, i32 0, 1
- resume { i8*, i32 } %lpad.val
-}
-
-declare void @_Z9may_throwv()
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*)
-
-declare void @_Z10handle_intv()
-
diff --git a/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll b/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
new file mode 100644
index 000000000000..e18ec2357fdb
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
@@ -0,0 +1,44 @@
+; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
+
+; for (unsigned i = 0; i < 100; i++) {
+; A[i+8] = B[i] + 2;
+; C[i] = A[i] * 2;
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32* %B, i32* %C, i64 %N) {
+
+; CHECK: Dependences:
+; CHECK-NEXT: Forward:
+; CHECK-NEXT: store i32 %a_p1, i32* %Aidx_ahead, align 4 ->
+; CHECK-NEXT: %a = load i32, i32* %Aidx, align 4
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %idx = add nuw nsw i64 %indvars.iv, 8
+
+ %Aidx_ahead = getelementptr inbounds i32, i32* %A, i64 %idx
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p1 = add i32 %b, 2
+ store i32 %a_p1, i32* %Aidx_ahead, align 4
+
+ %a = load i32, i32* %Aidx, align 4
+ %c = mul i32 %a, 2
+ store i32 %c, i32* %Cidx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll b/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll
new file mode 100644
index 000000000000..e1ba674ce80a
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll
@@ -0,0 +1,64 @@
+; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
+
+; Check that loop-indepedent forward dependences are discovered properly.
+;
+; FIXME: This does not actually always work which is pretty confusing. Right
+; now there is hack in LAA that tries to figure out loop-indepedent forward
+; dependeces *outside* of the MemoryDepChecker logic (i.e. proper dependence
+; analysis).
+;
+; Therefore if there is only loop-independent dependences for an array
+; (i.e. the same index is used), we don't discover the forward dependence.
+; So, at ***, we add another non-I-based access of A to trigger
+; MemoryDepChecker analysis for accesses of A.
+;
+; for (unsigned i = 0; i < 100; i++) {
+; A[i + 1] = B[i] + 1; // ***
+; A[i] = B[i] + 2;
+; C[i] = A[i] * 2;
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias %A, i32* noalias %B, i32* noalias %C, i64 %N) {
+
+; CHECK: Dependences:
+; CHECK-NEXT: Forward:
+; CHECK-NEXT: store i32 %b_p1, i32* %Aidx, align 4 ->
+; CHECK-NEXT: %a = load i32, i32* %Aidx, align 4
+; CHECK: ForwardButPreventsForwarding:
+; CHECK-NEXT: store i32 %b_p2, i32* %Aidx_next, align 4 ->
+; CHECK-NEXT: %a = load i32, i32* %Aidx, align 4
+; CHECK: Forward:
+; CHECK-NEXT: store i32 %b_p2, i32* %Aidx_next, align 4 ->
+; CHECK-NEXT: store i32 %b_p1, i32* %Aidx, align 4
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+
+ %b = load i32, i32* %Bidx, align 4
+ %b_p2 = add i32 %b, 1
+ store i32 %b_p2, i32* %Aidx_next, align 4
+
+ %b_p1 = add i32 %b, 2
+ store i32 %b_p1, i32* %Aidx, align 4
+
+ %a = load i32, i32* %Aidx, align 4
+ %c = mul i32 %a, 2
+ store i32 %c, i32* %Cidx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Analysis/LoopAccessAnalysis/nullptr.ll b/test/Analysis/LoopAccessAnalysis/nullptr.ll
new file mode 100644
index 000000000000..a72b48cc352d
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/nullptr.ll
@@ -0,0 +1,38 @@
+; RUN: opt -loop-accesses -analyze %s | FileCheck %s
+
+; Test that the loop accesses are proven safe in this case.
+; The analyzer uses to be confused by the "diamond" because GetUnderlyingObjects
+; is saying that the two pointers can both points to null. The loop analyzer
+; needs to ignore null in the results returned by GetUnderlyingObjects.
+
+; CHECK: Memory dependences are safe with run-time checks
+
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+; Function Attrs: ssp uwtable
+define void @foo(i1 %cond, i32* %ptr1, i32* %ptr2) {
+ br i1 %cond, label %.preheader, label %diamond
+
+diamond: ; preds = %.noexc.i.i
+ br label %.preheader
+
+.preheader: ; preds = %diamond, %0
+ %ptr1_or_null = phi i32* [ null, %0 ], [ %ptr1, %diamond ]
+ %ptr2_or_null = phi i32* [ null, %0 ], [ %ptr2, %diamond ]
+ br label %.lr.ph
+
+.lr.ph: ; preds = %.lr.ph, %.preheader
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 10, %.preheader ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ %tmp4 = getelementptr inbounds i32, i32* %ptr2_or_null, i64 %indvars.iv.next
+ %tmp5 = load i32, i32* %tmp4, align 4
+ %tmp6 = getelementptr inbounds i32, i32* %ptr1_or_null, i64 %indvars.iv.next
+ store i32 undef, i32* %tmp6, align 4
+ br i1 false, label %.lr.ph, label %.end
+
+.end:
+ ret void
+}
diff --git a/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll b/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
index 50b37a031a60..01b3eda3e123 100644
--- a/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
+++ b/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
@@ -81,28 +81,28 @@ for.end: ; preds = %for.body
; CHECK: Run-time memory checks:
; CHECK-NEXT: Check 0:
-; CHECK-NEXT: Comparing group 0:
+; CHECK-NEXT: Comparing group ([[ZERO:.+]]):
; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
-; CHECK-NEXT: Against group 1:
+; CHECK-NEXT: Against group ([[ONE:.+]]):
; CHECK-NEXT: %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
; CHECK-NEXT: %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
; CHECK-NEXT: Check 1:
-; CHECK-NEXT: Comparing group 0:
+; CHECK-NEXT: Comparing group ({{.*}}[[ZERO]]):
; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
-; CHECK-NEXT: Against group 2:
+; CHECK-NEXT: Against group ([[TWO:.+]]):
; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
; CHECK-NEXT: Grouped accesses:
-; CHECK-NEXT: Group 0:
+; CHECK-NEXT: Group {{.*}}[[ZERO]]:
; CHECK-NEXT: (Low: %c High: (78 + %c))
; CHECK-NEXT: Member: {(2 + %c),+,4}
; CHECK-NEXT: Member: {%c,+,4}
-; CHECK-NEXT: Group 1:
+; CHECK-NEXT: Group {{.*}}[[ONE]]:
; CHECK-NEXT: (Low: %a High: (40 + %a))
; CHECK-NEXT: Member: {(2 + %a),+,2}
; CHECK-NEXT: Member: {%a,+,2}
-; CHECK-NEXT: Group 2:
+; CHECK-NEXT: Group {{.*}}[[TWO]]:
; CHECK-NEXT: (Low: %b High: (38 + %b))
; CHECK-NEXT: Member: {%b,+,2}
@@ -153,28 +153,28 @@ for.end: ; preds = %for.body
; CHECK: function 'testh':
; CHECK: Run-time memory checks:
; CHECK-NEXT: Check 0:
-; CHECK-NEXT: Comparing group 0:
+; CHECK-NEXT: Comparing group ([[ZERO:.+]]):
; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
-; CHECK-NEXT: Against group 1:
+; CHECK-NEXT: Against group ([[ONE:.+]]):
; CHECK-NEXT: %arrayidxA1 = getelementptr i16, i16* %a, i64 %add
; CHECK-NEXT: %arrayidxA = getelementptr i16, i16* %a, i64 %ind
; CHECK-NEXT: Check 1:
-; CHECK-NEXT: Comparing group 0:
+; CHECK-NEXT: Comparing group ({{.*}}[[ZERO]]):
; CHECK-NEXT: %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
; CHECK-NEXT: %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
-; CHECK-NEXT: Against group 2:
+; CHECK-NEXT: Against group ([[TWO:.+]]):
; CHECK-NEXT: %arrayidxB = getelementptr i16, i16* %b, i64 %ind
; CHECK-NEXT: Grouped accesses:
-; CHECK-NEXT: Group 0:
+; CHECK-NEXT: Group {{.*}}[[ZERO]]:
; CHECK-NEXT: (Low: %c High: (78 + %c))
; CHECK-NEXT: Member: {(2 + %c),+,4}
; CHECK-NEXT: Member: {%c,+,4}
-; CHECK-NEXT: Group 1:
+; CHECK-NEXT: Group {{.*}}[[ONE]]:
; CHECK-NEXT: (Low: %a High: (40 + %a))
; CHECK-NEXT: Member: {(2 + %a),+,2}
; CHECK-NEXT: Member: {%a,+,2}
-; CHECK-NEXT: Group 2:
+; CHECK-NEXT: Group {{.*}}[[TWO]]:
; CHECK-NEXT: (Low: %b High: (38 + %b))
; CHECK-NEXT: Member: {%b,+,2}
@@ -217,8 +217,9 @@ for.end: ; preds = %for.body
ret void
}
-; Don't merge pointers if there is some other check which could be falsely
-; invalidated. For example, in the following loop:
+; Don't merge pointers if we need to perform a check against a pointer
+; to the same underlying object (doing so would emit a check that could be
+; falsely invalidated) For example, in the following loop:
;
; for (i = 0; i < 5000; ++i)
; a[i + offset] = a[i] + a[i + 10000]
@@ -226,27 +227,31 @@ for.end: ; preds = %for.body
; we should not merge the intervals associated with the reads (0,5000) and
; (10000, 15000) into (0, 15000) as this will pottentially fail the check
; against the interval associated with the write.
+;
+; We cannot have this check unless ShouldRetryWithRuntimeCheck is set,
+; and therefore the grouping algorithm would create a separate group for
+; each pointer.
; CHECK: function 'testi':
; CHECK: Run-time memory checks:
; CHECK-NEXT: Check 0:
-; CHECK-NEXT: Comparing group 0:
+; CHECK-NEXT: Comparing group ([[ZERO:.+]]):
; CHECK-NEXT: %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
-; CHECK-NEXT: Against group 1:
+; CHECK-NEXT: Against group ([[ONE:.+]]):
; CHECK-NEXT: %arrayidxA1 = getelementptr i16, i16* %a, i64 %ind
; CHECK-NEXT: Check 1:
-; CHECK-NEXT: Comparing group 0:
+; CHECK-NEXT: Comparing group ({{.*}}[[ZERO]]):
; CHECK-NEXT: %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
-; CHECK-NEXT: Against group 2:
+; CHECK-NEXT: Against group ([[TWO:.+]]):
; CHECK-NEXT: %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
; CHECK-NEXT: Grouped accesses:
-; CHECK-NEXT: Group 0:
+; CHECK-NEXT: Group {{.*}}[[ZERO]]:
; CHECK-NEXT: (Low: ((2 * %offset) + %a) High: (9998 + (2 * %offset) + %a))
; CHECK-NEXT: Member: {((2 * %offset) + %a),+,2}<nsw><%for.body>
-; CHECK-NEXT: Group 1:
+; CHECK-NEXT: Group {{.*}}[[ONE]]:
; CHECK-NEXT: (Low: %a High: (9998 + %a))
; CHECK-NEXT: Member: {%a,+,2}<%for.body>
-; CHECK-NEXT: Group 2:
+; CHECK-NEXT: Group {{.*}}[[TWO]]:
; CHECK-NEXT: (Low: (20000 + %a) High: (29998 + %a))
; CHECK-NEXT: Member: {(20000 + %a),+,2}<%for.body>
diff --git a/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll b/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll
index d05849e2be2d..20b7fb2c6335 100644
--- a/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll
+++ b/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll
@@ -4,7 +4,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; We shouldn't quit the analysis if we encounter a pointer without known
; bounds *unless* we actually need to emit a memcheck for it. (We only
-; compute bounds for SCEVAddRecs so A[i*I] is deemed not having known bounds.)
+; compute bounds for SCEVAddRecs so A[i*i] is deemed not having known bounds.)
;
; for (i = 0; i < 20; ++i)
; A[i*i] *= 2;
@@ -12,7 +12,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: for.body:
; CHECK: Report: unsafe dependent memory operations in loop
; CHECK-NOT: Report: cannot identify array bounds
-; CHECK: Interesting Dependences:
+; CHECK: Dependences:
; CHECK: Unknown:
; CHECK: %loadA = load i16, i16* %arrayidxA, align 2 ->
; CHECK: store i16 %mul, i16* %arrayidxA, align 2
diff --git a/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll b/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll
index e7305173dd95..9412028fc702 100644
--- a/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll
+++ b/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll
@@ -2,7 +2,7 @@
; We give up analyzing the dependences in this loop due to non-constant
; distance between A[i+offset] and A[i] and add memchecks to prove
-; independence. Make sure that no interesting dependences are reported in
+; independence. Make sure that no dependences are reported in
; this case.
;
; for (i = 0; i < n; i++)
@@ -12,7 +12,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
; CHECK: Memory dependences are safe with run-time checks
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: 0:
; CHECK-NEXT: Comparing group
diff --git a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
new file mode 100644
index 000000000000..2053e49826b9
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
@@ -0,0 +1,89 @@
+; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
+
+; The runtime memory check code and the access grouping
+; algorithm both assume that the start and end values
+; for an access range are ordered (start <= stop).
+; When generating checks for accesses with negative stride
+; we need to take this into account and swap the interval
+; ends.
+;
+; for (i = 0; i < 10000; i++) {
+; B[i] = A[15000 - i] * 3;
+; }
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnueabi"
+
+; CHECK: function 'f':
+; CHECK: (Low: (20000 + %a) High: (60000 + %a))
+
+@B = common global i32* null, align 8
+@A = common global i32* null, align 8
+
+define void @f() {
+entry:
+ %a = load i32*, i32** @A, align 8
+ %b = load i32*, i32** @B, align 8
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %idx = phi i64 [ 0, %entry ], [ %add, %for.body ]
+ %negidx = sub i64 15000, %idx
+
+ %arrayidxA0 = getelementptr inbounds i32, i32* %a, i64 %negidx
+ %loadA0 = load i32, i32* %arrayidxA0, align 2
+
+ %res = mul i32 %loadA0, 3
+
+ %add = add nuw nsw i64 %idx, 1
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %idx
+ store i32 %res, i32* %arrayidxB, align 2
+
+ %exitcond = icmp eq i64 %idx, 10000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; CHECK: function 'g':
+; When the stride is not constant, we are forced to do umin/umax to get
+; the interval limits.
+
+; for (i = 0; i < 10000; i++) {
+; B[i] = A[15000 - step * i] * 3;
+; }
+
+; Here it is not obvious what the limits are, since 'step' could be negative.
+
+; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a)))))
+; CHECK: High: ((60000 + %a) umax (60000 + (-40000 * %step) + %a))
+
+define void @g(i64 %step) {
+entry:
+ %a = load i32*, i32** @A, align 8
+ %b = load i32*, i32** @B, align 8
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %idx = phi i64 [ 0, %entry ], [ %add, %for.body ]
+ %idx_mul = mul i64 %idx, %step
+ %negidx = sub i64 15000, %idx_mul
+
+ %arrayidxA0 = getelementptr inbounds i32, i32* %a, i64 %negidx
+ %loadA0 = load i32, i32* %arrayidxA0, align 2
+
+ %res = mul i32 %loadA0, 3
+
+ %add = add nuw nsw i64 %idx, 1
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %idx
+ store i32 %res, i32* %arrayidxB, align 2
+
+ %exitcond = icmp eq i64 %idx, 10000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll b/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll
index fa70c024a9c5..d85258f75aae 100644
--- a/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll
+++ b/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll
@@ -7,7 +7,15 @@
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
+; Check the loop-carried forward anti-dep between the load of A[i+1] and the
+; store of A[i];
+
; CHECK: Memory dependences are safe{{$}}
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Forward:
+; CHECK-NEXT: %loadA_plus_2 = load i16, i16* %arrayidxA_plus_2, align 2 ->
+; CHECK-NEXT: store i16 %mul1, i16* %arrayidxA, align 2
+
define void @f(i16* noalias %a,
i16* noalias %b,
diff --git a/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll b/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll
index 735735662952..5fc353e70cf8 100644
--- a/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll
+++ b/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll
@@ -13,7 +13,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; CHECK: function 'nodep_Read_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
define void @nodep_Read_Write(i32* nocapture %A) {
@@ -49,7 +49,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: function 'nodep_Write_Read':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
define i32 @nodep_Write_Read(i32* nocapture %A) {
@@ -84,7 +84,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: function 'nodep_Write_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
define void @nodep_Write_Write(i32* nocapture %A) {
@@ -118,7 +118,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: function 'unsafe_Read_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %0 = load i32, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %add, i32* %arrayidx3, align 4
@@ -157,7 +157,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: function 'unsafe_Write_Read':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
; CHECK-NEXT: %1 = load i32, i32* %arrayidx2, align 4
@@ -193,7 +193,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: function 'unsafe_Write_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %2, i32* %arrayidx3, align 4
@@ -230,7 +230,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: function 'vectorizable_Read_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: BackwardVectorizable:
; CHECK-NEXT: %0 = load i32, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
@@ -269,7 +269,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: function 'vectorizable_Write_Read':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: BackwardVectorizable:
; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
; CHECK-NEXT: %1 = load i32, i32* %arrayidx2, align 4
@@ -307,7 +307,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: function 'vectorizable_Write_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: BackwardVectorizable:
; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %2, i32* %arrayidx2, align 4
@@ -346,7 +346,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: function 'vectorizable_unscaled_Read_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
@@ -387,7 +387,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: for function 'vectorizable_unscaled_Write_Read':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Memory dependences are safe
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: BackwardVectorizable:
; CHECK-NEXT: store i32 %2, i32* %arrayidx, align 4 ->
; CHECK-NEXT: %3 = load i32, i32* %arrayidx2, align 4
@@ -425,7 +425,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: function 'unsafe_unscaled_Read_Write':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
@@ -455,7 +455,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: function 'unsafe_unscaled_Read_Write2':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 ->
; CHECK-NEXT: store i32 %add, i32* %arrayidx2, align 4
@@ -505,7 +505,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: function 'interleaved_stores':
; CHECK-NEXT: for.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: store i32 %4, i32* %arrayidx5, align 4 ->
; CHECK-NEXT: store i32 %4, i32* %arrayidx9, align 4
diff --git a/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll b/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll
index d0bed68188db..d388151365f1 100644
--- a/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll
+++ b/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll
@@ -38,7 +38,7 @@ target triple = "x86_64-apple-macosx10.10.0"
; CHECK: for_j.body:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %loadB = load i8, i8* %gepB, align 1 ->
; CHECK-NEXT: store i8 2, i8* %gepB_plus_one, align 1
diff --git a/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll b/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
index 237cbc8b9873..7157b954c5b6 100644
--- a/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
+++ b/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
@@ -8,7 +8,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
; CHECK: Report: unsafe dependent memory operations in loop
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
; CHECK-NEXT: Backward:
; CHECK-NEXT: %loadA = load i16, i16* %arrayidxA, align 2 ->
; CHECK-NEXT: store i16 %mul1, i16* %arrayidxA_plus_2, align 2
diff --git a/test/Analysis/MemoryDependenceAnalysis/memdep-block-scan-limit.ll b/test/Analysis/MemoryDependenceAnalysis/memdep-block-scan-limit.ll
new file mode 100644
index 000000000000..20ca31456fd3
--- /dev/null
+++ b/test/Analysis/MemoryDependenceAnalysis/memdep-block-scan-limit.ll
@@ -0,0 +1,15 @@
+; RUN: opt -S -memdep -gvn -basicaa < %s | FileCheck %s
+; RUN: opt -S -memdep -memdep-block-scan-limit=1 -gvn -basicaa < %s | FileCheck %s --check-prefix=WITH-LIMIT
+; CHECK-LABEL: @test(
+; CHECK: load
+; CHECK-NOT: load
+; WITH-LIMIT-LABEL: @test(
+; WITH-LIMIT-CHECK: load
+; WITH-LIMIT-CHECK: load
+define i32 @test(i32* %p) {
+ %1 = load i32, i32* %p
+ %2 = add i32 %1, 3
+ %3 = load i32, i32* %p
+ %4 = add i32 %2, %3
+ ret i32 %4
+}
diff --git a/test/Analysis/ScalarEvolution/avoid-assume-hang.ll b/test/Analysis/ScalarEvolution/avoid-assume-hang.ll
new file mode 100644
index 000000000000..e2428ed1f73f
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/avoid-assume-hang.ll
@@ -0,0 +1,139 @@
+; RUN: opt %s -always-inline | opt -analyze -scalar-evolution
+; There was optimization bug in ScalarEvolution, that causes too long
+; compute time and stack overflow crash.
+
+declare void @body(i32)
+declare void @llvm.assume(i1)
+
+define available_externally void @assume1(i64 %i.ext, i64 %a) alwaysinline {
+ %cmp0 = icmp ne i64 %i.ext, %a
+ call void @llvm.assume(i1 %cmp0)
+
+ %a1 = add i64 %a, 1
+ %cmp1 = icmp ne i64 %i.ext, %a1
+ call void @llvm.assume(i1 %cmp1)
+
+ %a2 = add i64 %a1, 1
+ %cmp2 = icmp ne i64 %i.ext, %a2
+ call void @llvm.assume(i1 %cmp2)
+
+ %a3 = add i64 %a2, 1
+ %cmp3 = icmp ne i64 %i.ext, %a3
+ call void @llvm.assume(i1 %cmp3)
+
+ %a4 = add i64 %a3, 1
+ %cmp4 = icmp ne i64 %i.ext, %a4
+ call void @llvm.assume(i1 %cmp4)
+
+ ret void
+}
+
+define available_externally void @assume2(i64 %i.ext, i64 %a) alwaysinline {
+ call void @assume1(i64 %i.ext, i64 %a)
+
+ %a1 = add i64 %a, 5
+ %cmp1 = icmp ne i64 %i.ext, %a1
+ call void @assume1(i64 %i.ext, i64 %a1)
+
+ %a2 = add i64 %a1, 5
+ %cmp2 = icmp ne i64 %i.ext, %a2
+ call void @assume1(i64 %i.ext, i64 %a2)
+
+ %a3 = add i64 %a2, 5
+ %cmp3 = icmp ne i64 %i.ext, %a3
+ call void @assume1(i64 %i.ext, i64 %a3)
+
+ %a4 = add i64 %a3, 5
+ %cmp4 = icmp ne i64 %i.ext, %a4
+ call void @assume1(i64 %i.ext, i64 %a4)
+
+ ret void
+}
+
+define available_externally void @assume3(i64 %i.ext, i64 %a) alwaysinline {
+ call void @assume2(i64 %i.ext, i64 %a)
+
+ %a1 = add i64 %a, 25
+ %cmp1 = icmp ne i64 %i.ext, %a1
+ call void @assume2(i64 %i.ext, i64 %a1)
+
+ %a2 = add i64 %a1, 25
+ %cmp2 = icmp ne i64 %i.ext, %a2
+ call void @assume2(i64 %i.ext, i64 %a2)
+
+ %a3 = add i64 %a2, 25
+ %cmp3 = icmp ne i64 %i.ext, %a3
+ call void @assume2(i64 %i.ext, i64 %a3)
+
+ %a4 = add i64 %a3, 25
+ %cmp4 = icmp ne i64 %i.ext, %a4
+ call void @assume2(i64 %i.ext, i64 %a4)
+
+ ret void
+}
+
+define available_externally void @assume4(i64 %i.ext, i64 %a) alwaysinline {
+ call void @assume3(i64 %i.ext, i64 %a)
+
+ %a1 = add i64 %a, 125
+ %cmp1 = icmp ne i64 %i.ext, %a1
+ call void @assume3(i64 %i.ext, i64 %a1)
+
+ %a2 = add i64 %a1, 125
+ %cmp2 = icmp ne i64 %i.ext, %a2
+ call void @assume3(i64 %i.ext, i64 %a2)
+
+ %a3 = add i64 %a2, 125
+ %cmp3 = icmp ne i64 %i.ext, %a3
+ call void @assume3(i64 %i.ext, i64 %a3)
+
+ %a4 = add i64 %a3, 125
+ %cmp4 = icmp ne i64 %i.ext, %a4
+ call void @assume3(i64 %i.ext, i64 %a4)
+
+ ret void
+}
+
+define available_externally void @assume5(i64 %i.ext, i64 %a) alwaysinline {
+ call void @assume4(i64 %i.ext, i64 %a)
+
+ %a1 = add i64 %a, 625
+ %cmp1 = icmp ne i64 %i.ext, %a1
+ call void @assume4(i64 %i.ext, i64 %a1)
+
+ %a2 = add i64 %a1, 625
+ %cmp2 = icmp ne i64 %i.ext, %a2
+ call void @assume4(i64 %i.ext, i64 %a2)
+
+ %a3 = add i64 %a2, 625
+ %cmp3 = icmp ne i64 %i.ext, %a3
+ call void @assume4(i64 %i.ext, i64 %a3)
+
+ %a4 = add i64 %a3, 625
+ %cmp4 = icmp ne i64 %i.ext, %a4
+ call void @assume4(i64 %i.ext, i64 %a4)
+
+ ret void
+}
+
+define void @fn(i32 %init) {
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [%init, %entry], [%next, %loop]
+ call void @body(i32 %i)
+
+ %i.ext = zext i32 %i to i64
+
+ call void @assume5(i64 %i.ext, i64 500000000)
+
+ %i.next = add i64 %i.ext, 1
+ %next = trunc i64 %i.next to i32
+ %done = icmp eq i32 %i, 500000000
+
+ br i1 %done, label %exit, label %loop
+
+exit:
+ ret void
+} \ No newline at end of file
diff --git a/test/Analysis/ScalarEvolution/constant_condition.ll b/test/Analysis/ScalarEvolution/constant_condition.ll
new file mode 100644
index 000000000000..32ab91b2c857
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/constant_condition.ll
@@ -0,0 +1,51 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+define i32 @branch_true(i32 %x, i32 %y) {
+; CHECK-LABEL: Classifying expressions for: @branch_true
+ entry:
+ br i1 true, label %add, label %merge
+
+ add:
+ %sum = add i32 %x, %y
+ br label %merge
+
+ merge:
+ %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK: %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK-NEXT: --> (%x + %y) U: full-set S: full-set
+ ret i32 %v
+}
+
+define i32 @branch_false(i32 %x, i32 %y) {
+; CHECK-LABEL: Classifying expressions for: @branch_false
+ entry:
+ br i1 false, label %add, label %merge
+
+ add:
+ %sum = add i32 %x, %y
+ br label %merge
+
+ merge:
+ %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK: %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK-NEXT: --> %x U: full-set S: full-set
+ ret i32 %v
+}
+
+define i32 @select_true(i32 %x, i32 %y) {
+; CHECK-LABEL: Classifying expressions for: @select_true
+ entry:
+ %v = select i1 true, i32 %x, i32 %y
+; CHECK: %v = select i1 true, i32 %x, i32 %y
+; CHECK-NEXT: --> %x U: full-set S: full-set
+ ret i32 %v
+}
+
+define i32 @select_false(i32 %x, i32 %y) {
+; CHECK-LABEL: Classifying expressions for: @select_false
+ entry:
+ %v = select i1 false, i32 %x, i32 %y
+; CHECK: %v = select i1 false, i32 %x, i32 %y
+; CHECK-NEXT: --> %y U: full-set S: full-set
+ ret i32 %v
+}
diff --git a/test/Analysis/ScalarEvolution/flags-from-poison.ll b/test/Analysis/ScalarEvolution/flags-from-poison.ll
new file mode 100644
index 000000000000..b1fe7f1138b6
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/flags-from-poison.ll
@@ -0,0 +1,592 @@
+; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
+
+; Positive and negative tests for inferring flags like nsw from
+; reasoning about how a poison value from overflow would trigger
+; undefined behavior.
+
+define void @foo() {
+ ret void
+}
+
+; Example where an add should get the nsw flag, so that a sext can be
+; distributed over the add.
+define void @test-add-nsw(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-nsw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nsw>
+ %index32 = add nsw i32 %i, %offset
+
+; CHECK: %index64 =
+; CHECK: --> {(sext i32 %offset to i64),+,1}<nsw>
+ %index64 = sext i32 %index32 to i64
+
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ call void @foo()
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Example where an add should get the nuw flag.
+define void @test-add-nuw(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-nuw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nuw>
+ %index32 = add nuw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nuw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; With no load to trigger UB from poison, we cannot infer nsw.
+define void @test-add-no-load(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-no-load
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nuw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; The current code is only supposed to look at the loop header, so
+; it should not infer nsw in this case, as that would require looking
+; outside the loop header.
+define void @test-add-not-header(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-not-header
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+ br label %loop2
+loop2:
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Same thing as test-add-not-header, but in this case only the load
+; instruction is outside the loop header.
+define void @test-add-not-header2(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-not-header2
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ br label %loop2
+loop2:
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; The call instruction makes it not guaranteed that the add will be
+; executed, since it could run forever or throw an exception, so we
+; cannot assume that the UB is realized.
+define void @test-add-call(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-call
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ call void @foo()
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Same issue as test-add-call, but this time the call is between the
+; producer of poison and the load that consumes it.
+define void @test-add-call2(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-call2
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ call void @foo()
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Without inbounds, GEP does not propagate poison in the very
+; conservative approach used here.
+define void @test-add-no-inbounds(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-no-inbounds
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Multiplication by a non-zero constant propagates poison if there is
+; a nuw or nsw flag on the multiplication.
+define void @test-add-mul-propagates(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-mul-propagates
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nsw>
+ %index32 = add nsw i32 %i, %offset
+
+ %indexmul = mul nuw i32 %index32, 2
+ %ptr = getelementptr inbounds float, float* %input, i32 %indexmul
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Multiplication by a non-constant should not propagate poison in the
+; very conservative approach used here.
+define void @test-add-mul-no-propagation(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-mul-no-propagation
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %indexmul = mul nsw i32 %index32, %offset
+ %ptr = getelementptr inbounds float, float* %input, i32 %indexmul
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Multiplication by a non-zero constant does not propagate poison
+; without a no-wrap flag.
+define void @test-add-mul-no-propagation2(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-mul-no-propagation2
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+ %index32 = add nsw i32 %i, %offset
+
+ %indexmul = mul i32 %index32, 2
+ %ptr = getelementptr inbounds float, float* %input, i32 %indexmul
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Division by poison triggers UB.
+define void @test-add-div(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-div
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %j =
+; CHECK: --> {%offset,+,1}<nsw>
+ %j = add nsw i32 %i, %offset
+
+ %q = sdiv i32 %numIterations, %j
+ %nexti = add nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Remainder of poison by non-poison divisor does not trigger UB.
+define void @test-add-div2(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-div2
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %j =
+; CHECK: --> {%offset,+,1}<nw>
+ %j = add nsw i32 %i, %offset
+
+ %q = sdiv i32 %j, %numIterations
+ %nexti = add nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Store to poison address triggers UB.
+define void @test-add-store(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-store
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nsw>
+ %index32 = add nsw i32 %i, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ store float 1.0, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Three sequential adds where the middle add should have nsw. There is
+; a special case for sequential adds and this test covers that. We have to
+; put the final add first in the program since otherwise the special case
+; is not triggered, hence the strange basic block ordering.
+define void @test-add-twice(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-twice
+entry:
+ br label %loop
+loop2:
+; CHECK: %seq =
+; CHECK: --> {(2 + %offset),+,1}<nw>
+ %seq = add nsw nuw i32 %index32, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+
+ %j = add nsw i32 %i, 1
+; CHECK: %index32 =
+; CHECK: --> {(1 + %offset),+,1}<nsw>
+ %index32 = add nsw i32 %j, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ store float 1.0, float* %ptr, align 4
+ br label %loop2
+exit:
+ ret void
+}
+
+; Example where a mul should get the nsw flag, so that a sext can be
+; distributed over the mul.
+define void @test-mul-nsw(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @test-mul-nsw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,%stride}<nsw>
+ %index32 = mul nsw i32 %i, %stride
+
+; CHECK: %index64 =
+; CHECK: --> {0,+,(sext i32 %stride to i64)}<nsw>
+ %index64 = sext i32 %index32 to i64
+
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Example where a mul should get the nuw flag.
+define void @test-mul-nuw(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @test-mul-nuw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,%stride}<nuw>
+ %index32 = mul nuw i32 %i, %stride
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nuw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Example where a shl should get the nsw flag, so that a sext can be
+; distributed over the shl.
+define void @test-shl-nsw(float* %input, i32 %start, i32 %numIterations) {
+; CHECK-LABEL: @test-shl-nsw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {(256 * %start),+,256}<nsw>
+ %index32 = shl nsw i32 %i, 8
+
+; CHECK: %index64 =
+; CHECK: --> {(sext i32 (256 * %start) to i64),+,256}<nsw>
+ %index64 = sext i32 %index32 to i64
+
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Example where a shl should get the nuw flag.
+define void @test-shl-nuw(float* %input, i32 %numIterations) {
+; CHECK-LABEL: @test-shl-nuw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,512}<nuw>
+ %index32 = shl nuw i32 %i, 9
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nuw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Example where a sub should *not* get the nsw flag, because of how
+; scalar evolution represents A - B as A + (-B) and -B can wrap even
+; in cases where A - B does not.
+define void @test-sub-no-nsw(float* %input, i32 %start, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-no-nsw
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {((-1 * %sub) + %start),+,1}<nw>
+ %index32 = sub nsw i32 %i, %sub
+ %index64 = sext i32 %index32 to i64
+
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Example where a sub should get the nsw flag as the RHS cannot be the
+; minimal signed value.
+define void @test-sub-nsw(float* %input, i32 %start, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-nsw
+entry:
+ %halfsub = ashr i32 %sub, 1
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {((-1 * %halfsub)<nsw> + %start),+,1}<nsw>
+ %index32 = sub nsw i32 %i, %halfsub
+ %index64 = sext i32 %index32 to i64
+
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Example where a sub should get the nsw flag, since the LHS is non-negative,
+; which implies that the RHS cannot be the minimal signed value.
+define void @test-sub-nsw-lhs-non-negative(float* %input, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-nsw-lhs-non-negative
+entry:
+ br label %loop
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {(-1 * %sub),+,1}<nsw>
+ %index32 = sub nsw i32 %i, %sub
+
+; CHECK: %index64 =
+; CHECK: --> {(sext i32 (-1 * %sub) to i64),+,1}<nsw>
+ %index64 = sext i32 %index32 to i64
+
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %nexti = add nsw i32 %i, 1
+ %f = load float, float* %ptr, align 4
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Two adds with a sub in the middle and the sub should have nsw. There is
+; a special case for sequential adds/subs and this test covers that. We have to
+; put the final add first in the program since otherwise the special case
+; is not triggered, hence the strange basic block ordering.
+define void @test-sub-with-add(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-with-add
+entry:
+ br label %loop
+loop2:
+; CHECK: %seq =
+; CHECK: --> {(2 + (-1 * %offset)),+,1}<nw>
+ %seq = add nsw nuw i32 %index32, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+
+ %j = add nsw i32 %i, 1
+; CHECK: %index32 =
+; CHECK: --> {(1 + (-1 * %offset)),+,1}<nsw>
+ %index32 = sub nsw i32 %j, %offset
+
+ %ptr = getelementptr inbounds float, float* %input, i32 %index32
+ %nexti = add nsw i32 %i, 1
+ store float 1.0, float* %ptr, align 4
+ br label %loop2
+exit:
+ ret void
+}
+
+
+; Subtraction of two recurrences. The addition in the SCEV that this
+; maps to is NSW, but the negation of the RHS does not since that
+; recurrence could be the most negative representable value.
+define void @subrecurrences(i32 %outer_l, i32 %inner_l, i32 %val) {
+; CHECK-LABEL: @subrecurrences
+ entry:
+ br label %outer
+
+outer:
+ %o_idx = phi i32 [ 0, %entry ], [ %o_idx.inc, %outer.be ]
+ %o_idx.inc = add nsw i32 %o_idx, 1
+ %cond = icmp eq i32 %o_idx, %val
+ br i1 %cond, label %inner, label %outer.be
+
+inner:
+ %i_idx = phi i32 [ 0, %outer ], [ %i_idx.inc, %inner ]
+ %i_idx.inc = add nsw i32 %i_idx, 1
+; CHECK: %v =
+; CHECK-NEXT: --> {{[{][{]}}-1,+,-1}<nw><%outer>,+,1}<nsw><%inner>
+ %v = sub nsw i32 %i_idx, %o_idx.inc
+ %forub = udiv i32 1, %v
+ %cond2 = icmp eq i32 %i_idx, %inner_l
+ br i1 %cond2, label %outer.be, label %inner
+
+outer.be:
+ %cond3 = icmp eq i32 %o_idx, %outer_l
+ br i1 %cond3, label %exit, label %outer
+
+exit:
+ ret void
+}
diff --git a/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll b/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll
index 078ca03ff14e..5c372b5d7b8a 100644
--- a/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll
+++ b/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll
@@ -11,7 +11,7 @@ define void @infer.sext.0(i1* %c, i32 %start) {
%idx.inc = add nsw i32 %idx, 1
%idx.inc.sext = sext i32 %idx.inc to i64
; CHECK: %idx.inc.sext = sext i32 %idx.inc to i64
-; CHECK-NEXT: --> {(1 + (sext i32 %start to i64)),+,1}<nsw><%loop>
+; CHECK-NEXT: --> {(1 + (sext i32 %start to i64))<nsw>,+,1}<nsw><%loop>
%condition = icmp eq i32 %counter, 1
%counter.inc = add i32 %counter, 1
br i1 %condition, label %exit, label %loop
@@ -31,7 +31,7 @@ define void @infer.zext.0(i1* %c, i32 %start) {
%idx.inc = add nuw i32 %idx, 1
%idx.inc.sext = zext i32 %idx.inc to i64
; CHECK: %idx.inc.sext = zext i32 %idx.inc to i64
-; CHECK-NEXT: --> {(1 + (zext i32 %start to i64)),+,1}<nuw><%loop>
+; CHECK-NEXT: --> {(1 + (zext i32 %start to i64))<nuw><nsw>,+,1}<nuw><%loop>
%condition = icmp eq i32 %counter, 1
%counter.inc = add i32 %counter, 1
br i1 %condition, label %exit, label %loop
@@ -51,7 +51,7 @@ define void @infer.sext.1(i32 %start, i1* %c) {
%idx = phi i32 [ %start.real, %entry ], [ %idx.inc, %loop ]
%idx.sext = sext i32 %idx to i64
; CHECK: %idx.sext = sext i32 %idx to i64
-; CHECK-NEXT: --> {(2 + (sext i32 (4 * %start) to i64)),+,2}<nsw><%loop>
+; CHECK-NEXT: --> {(2 + (sext i32 (4 * %start) to i64))<nsw>,+,2}<nsw><%loop>
%idx.inc = add nsw i32 %idx, 2
%condition = load i1, i1* %c
br i1 %condition, label %exit, label %loop
@@ -71,7 +71,7 @@ define void @infer.sext.2(i1* %c, i8 %start) {
%idx = phi i8 [ %start.inc, %entry ], [ %idx.inc, %loop ]
%idx.sext = sext i8 %idx to i16
; CHECK: %idx.sext = sext i8 %idx to i16
-; CHECK-NEXT: --> {(1 + (sext i8 %start to i16)),+,1}<nsw><%loop>
+; CHECK-NEXT: --> {(1 + (sext i8 %start to i16))<nsw>,+,1}<nsw><%loop>
%idx.inc = add nsw i8 %idx, 1
%condition = load volatile i1, i1* %c
br i1 %condition, label %exit, label %loop
@@ -91,7 +91,7 @@ define void @infer.zext.1(i1* %c, i8 %start) {
%idx = phi i8 [ %start.inc, %entry ], [ %idx.inc, %loop ]
%idx.zext = zext i8 %idx to i16
; CHECK: %idx.zext = zext i8 %idx to i16
-; CHECK-NEXT: --> {(1 + (zext i8 %start to i16)),+,1}<nuw><%loop>
+; CHECK-NEXT: --> {(1 + (zext i8 %start to i16))<nuw><nsw>,+,1}<nuw><%loop>
%idx.inc = add nuw i8 %idx, 1
%condition = load volatile i1, i1* %c
br i1 %condition, label %exit, label %loop
diff --git a/test/Analysis/ScalarEvolution/min-max-exprs.ll b/test/Analysis/ScalarEvolution/min-max-exprs.ll
index 892fc23fe6b2..e8c1e33e095f 100644
--- a/test/Analysis/ScalarEvolution/min-max-exprs.ll
+++ b/test/Analysis/ScalarEvolution/min-max-exprs.ll
@@ -33,7 +33,7 @@ bb2: ; preds = %bb1
%tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6
; min(N, i+3)
; CHECK: select i1 %tmp4, i64 %tmp5, i64 %tmp6
-; CHECK-NEXT: --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nw><%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64))))))
+; CHECK-NEXT: --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nuw><%bb1> to i64))<nsw>)<nsw> smax (-1 + (-1 * (sext i32 %N to i64))<nsw>)<nsw>))<nsw>)<nsw>
%tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9
%tmp12 = load i32, i32* %tmp11, align 4
%tmp13 = shl nsw i32 %tmp12, 1
diff --git a/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
new file mode 100644
index 000000000000..c24d7173e4ee
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
@@ -0,0 +1,122 @@
+; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
+
+!0 = !{i8 0, i8 127}
+
+define void @f0(i8* %len_addr) {
+; CHECK-LABEL: Classifying expressions for: @f0
+ entry:
+ %len = load i8, i8* %len_addr, !range !0
+ %len_norange = load i8, i8* %len_addr
+; CHECK: %len = load i8, i8* %len_addr, !range !0
+; CHECK-NEXT: --> %len U: [0,127) S: [0,127)
+; CHECK: %len_norange = load i8, i8* %len_addr
+; CHECK-NEXT: --> %len_norange U: full-set S: full-set
+
+ %t0 = add i8 %len, 1
+ %t1 = add i8 %len, 2
+; CHECK: %t0 = add i8 %len, 1
+; CHECK-NEXT: --> (1 + %len)<nuw><nsw> U: [1,-128) S: [1,-128)
+; CHECK: %t1 = add i8 %len, 2
+; CHECK-NEXT: --> (2 + %len)<nuw> U: [2,-127) S: [2,-127)
+
+ %t2 = sub i8 %len, 1
+ %t3 = sub i8 %len, 2
+; CHECK: %t2 = sub i8 %len, 1
+; CHECK-NEXT: --> (-1 + %len)<nsw> U: [-1,126) S: [-1,126)
+; CHECK: %t3 = sub i8 %len, 2
+; CHECK-NEXT: --> (-2 + %len)<nsw> U: [-2,125) S: [-2,125)
+
+ %q0 = add i8 %len_norange, 1
+ %q1 = add i8 %len_norange, 2
+; CHECK: %q0 = add i8 %len_norange, 1
+; CHECK-NEXT: --> (1 + %len_norange) U: full-set S: full-set
+; CHECK: %q1 = add i8 %len_norange, 2
+; CHECK-NEXT: --> (2 + %len_norange) U: full-set S: full-set
+
+ %q2 = sub i8 %len_norange, 1
+ %q3 = sub i8 %len_norange, 2
+; CHECK: %q2 = sub i8 %len_norange, 1
+; CHECK-NEXT: --> (-1 + %len_norange) U: full-set S: full-set
+; CHECK: %q3 = sub i8 %len_norange, 2
+; CHECK-NEXT: --> (-2 + %len_norange) U: full-set S: full-set
+
+ ret void
+}
+
+define void @f1(i8* %len_addr) {
+; CHECK-LABEL: Classifying expressions for: @f1
+ entry:
+ %len = load i8, i8* %len_addr, !range !0
+ %len_norange = load i8, i8* %len_addr
+; CHECK: %len = load i8, i8* %len_addr, !range !0
+; CHECK-NEXT: --> %len U: [0,127) S: [0,127)
+; CHECK: %len_norange = load i8, i8* %len_addr
+; CHECK-NEXT: --> %len_norange U: full-set S: full-set
+
+ %t0 = add i8 %len, -1
+ %t1 = add i8 %len, -2
+; CHECK: %t0 = add i8 %len, -1
+; CHECK-NEXT: --> (-1 + %len)<nsw> U: [-1,126) S: [-1,126)
+; CHECK: %t1 = add i8 %len, -2
+; CHECK-NEXT: --> (-2 + %len)<nsw> U: [-2,125) S: [-2,125)
+
+ %t0.sext = sext i8 %t0 to i16
+ %t1.sext = sext i8 %t1 to i16
+; CHECK: %t0.sext = sext i8 %t0 to i16
+; CHECK-NEXT: --> (-1 + (zext i8 %len to i16))<nsw> U: [-1,126) S: [-1,126)
+; CHECK: %t1.sext = sext i8 %t1 to i16
+; CHECK-NEXT: --> (-2 + (zext i8 %len to i16))<nsw> U: [-2,125) S: [-2,125)
+
+ %q0 = add i8 %len_norange, 1
+ %q1 = add i8 %len_norange, 2
+; CHECK: %q0 = add i8 %len_norange, 1
+; CHECK-NEXT: --> (1 + %len_norange) U: full-set S: full-set
+; CHECK: %q1 = add i8 %len_norange, 2
+; CHECK-NEXT: --> (2 + %len_norange) U: full-set S: full-set
+
+ %q0.sext = sext i8 %q0 to i16
+ %q1.sext = sext i8 %q1 to i16
+; CHECK: %q0.sext = sext i8 %q0 to i16
+; CHECK-NEXT: --> (sext i8 (1 + %len_norange) to i16) U: [-128,128) S: [-128,128)
+; CHECK: %q1.sext = sext i8 %q1 to i16
+; CHECK-NEXT: --> (sext i8 (2 + %len_norange) to i16) U: [-128,128) S: [-128,128)
+
+ ret void
+}
+
+define void @f2(i8* %len_addr) {
+; CHECK-LABEL: Classifying expressions for: @f2
+ entry:
+ %len = load i8, i8* %len_addr, !range !0
+ %len_norange = load i8, i8* %len_addr
+; CHECK: %len = load i8, i8* %len_addr, !range !0
+; CHECK-NEXT: --> %len U: [0,127) S: [0,127)
+; CHECK: %len_norange = load i8, i8* %len_addr
+; CHECK-NEXT: --> %len_norange U: full-set S: full-set
+
+ %t0 = add i8 %len, 1
+ %t1 = add i8 %len, 2
+; CHECK: %t0 = add i8 %len, 1
+; CHECK-NEXT: --> (1 + %len)<nuw><nsw>
+; CHECK: %t1 = add i8 %len, 2
+; CHECK-NEXT: --> (2 + %len)<nuw>
+
+ %t0.zext = zext i8 %t0 to i16
+ %t1.zext = zext i8 %t1 to i16
+; CHECK: %t0.zext = zext i8 %t0 to i16
+; CHECK-NEXT: --> (1 + (zext i8 %len to i16))<nuw><nsw> U: [1,128) S: [1,128)
+; CHECK: %t1.zext = zext i8 %t1 to i16
+; CHECK-NEXT: --> (2 + (zext i8 %len to i16))<nuw><nsw> U: [2,129) S: [2,129)
+
+ %q0 = add i8 %len_norange, 1
+ %q1 = add i8 %len_norange, 2
+ %q0.zext = zext i8 %q0 to i16
+ %q1.zext = zext i8 %q1 to i16
+
+; CHECK: %q0.zext = zext i8 %q0 to i16
+; CHECK-NEXT: --> (zext i8 (1 + %len_norange) to i16) U: [0,256) S: [0,256)
+; CHECK: %q1.zext = zext i8 %q1 to i16
+; CHECK-NEXT: --> (zext i8 (2 + %len_norange) to i16) U: [0,256) S: [0,256)
+
+ ret void
+}
diff --git a/test/Analysis/ScalarEvolution/non-IV-phi.ll b/test/Analysis/ScalarEvolution/non-IV-phi.ll
new file mode 100644
index 000000000000..f0d6c2f5d9d3
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/non-IV-phi.ll
@@ -0,0 +1,59 @@
+; RUN: opt -scalar-evolution -analyze < %s | FileCheck %s
+
+define void @test1(i8 %t, i32 %len) {
+; CHECK-LABEL: test1
+; CHECK: %sphi = phi i32 [ %ext, %entry ], [ %idx.inc.ext, %loop ]
+; CHECK-NEXT: --> (zext i8 {%t,+,1}<%loop> to i32)
+
+ entry:
+ %st = zext i8 %t to i16
+ %ext = zext i8 %t to i32
+ %ecmp = icmp ult i16 %st, 42
+ br i1 %ecmp, label %loop, label %exit
+
+ loop:
+
+ %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
+ %sphi = phi i32 [ %ext, %entry ], [%idx.inc.ext, %loop]
+
+ %idx.inc = add i8 %idx, 1
+ %idx.inc.ext = zext i8 %idx.inc to i32
+ %idx.ext = zext i8 %idx to i32
+
+ %c = icmp ult i32 %idx.inc.ext, %len
+ br i1 %c, label %loop, label %exit
+
+ exit:
+ ret void
+}
+
+define void @test2(i8 %t, i32 %len) {
+; CHECK-LABEL: test2
+; CHECK: %sphi = phi i32 [ %ext.mul, %entry ], [ %mul, %loop ]
+; CHECK-NEXT: --> (4 * (zext i8 {%t,+,1}<%loop> to i32))
+
+ entry:
+ %st = zext i8 %t to i16
+ %ext = zext i8 %t to i32
+ %ext.mul = mul i32 %ext, 4
+
+ %ecmp = icmp ult i16 %st, 42
+ br i1 %ecmp, label %loop, label %exit
+
+ loop:
+
+ %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
+ %sphi = phi i32 [ %ext.mul, %entry ], [%mul, %loop]
+
+ %idx.inc = add i8 %idx, 1
+ %idx.inc.ext = zext i8 %idx.inc to i32
+ %mul = mul i32 %idx.inc.ext, 4
+
+ %idx.ext = zext i8 %idx to i32
+
+ %c = icmp ult i32 %idx.inc.ext, %len
+ br i1 %c, label %loop, label %exit
+
+ exit:
+ ret void
+}
diff --git a/test/Analysis/ScalarEvolution/pr24757.ll b/test/Analysis/ScalarEvolution/pr24757.ll
new file mode 100644
index 000000000000..815adcde0e9d
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/pr24757.ll
@@ -0,0 +1,35 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+; CHECK: Loop %bb1: backedge-taken count is (zext i7 (trunc i8 %a.promoted to i7) to i8)
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@a = global i8 -127, align 1
+@b = common global i32 0, align 4
+
+declare void @use(i32)
+
+define i32 @main() {
+bb:
+ %a.promoted = load i8, i8* @a
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %tmp = phi i8 [ %tmp2, %bb1 ], [ %a.promoted, %bb ]
+ %tmp2 = add i8 %tmp, -1
+ %tmp3 = sext i8 %tmp to i32
+ %tmp4 = xor i32 %tmp3, -1
+ %tmp5 = sext i8 %tmp2 to i32
+ %tmpf = sub nsw i32 %tmp4, %tmp5
+ %tmp6 = trunc i32 %tmpf to i8
+ %tmp7 = icmp eq i8 %tmp6, 0
+ br i1 %tmp7, label %bb8, label %bb1
+
+bb8: ; preds = %bb1
+ store i8 %tmp2, i8* @a
+ store i32 %tmp4, i32* @b
+ %tmp9 = sext i8 %tmp2 to i32
+ call void @use(i32 %tmp9)
+ ret i32 0
+}
diff --git a/test/Analysis/ScalarEvolution/pr25369.ll b/test/Analysis/ScalarEvolution/pr25369.ll
new file mode 100644
index 000000000000..10754867a368
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/pr25369.ll
@@ -0,0 +1,78 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @hoge1() {
+; CHECK-LABEL: Classifying expressions for: @hoge1
+bb:
+ br i1 undef, label %bb4, label %bb2
+
+bb2: ; preds = %bb2, %bb
+ br i1 false, label %bb4, label %bb2
+
+bb3: ; preds = %bb4
+ %tmp = add i32 %tmp10, -1
+ br label %bb13
+
+bb4: ; preds = %bb4, %bb2, %bb
+ %tmp5 = phi i64 [ %tmp11, %bb4 ], [ 1, %bb2 ], [ 1, %bb ]
+ %tmp6 = phi i32 [ %tmp10, %bb4 ], [ 0, %bb2 ], [ 0, %bb ]
+ %tmp7 = load i32, i32* undef, align 4
+ %tmp8 = add i32 %tmp7, %tmp6
+ %tmp9 = add i32 undef, %tmp8
+ %tmp10 = add i32 undef, %tmp9
+ %tmp11 = add nsw i64 %tmp5, 3
+ %tmp12 = icmp eq i64 %tmp11, 64
+ br i1 %tmp12, label %bb3, label %bb4
+
+; CHECK: Loop %bb4: backedge-taken count is 20
+; CHECK: Loop %bb4: max backedge-taken count is 20
+
+bb13: ; preds = %bb13, %bb3
+ %tmp14 = phi i64 [ 0, %bb3 ], [ %tmp15, %bb13 ]
+ %tmp15 = add nuw nsw i64 %tmp14, 1
+ %tmp16 = trunc i64 %tmp15 to i32
+ %tmp17 = icmp eq i32 %tmp16, %tmp
+ br i1 %tmp17, label %bb18, label %bb13
+
+bb18: ; preds = %bb13
+ ret void
+}
+
+define void @hoge2() {
+; CHECK-LABEL: Classifying expressions for: @hoge2
+bb:
+ br i1 undef, label %bb4, label %bb2
+
+bb2: ; preds = %bb2, %bb
+ br i1 false, label %bb4, label %bb2
+
+bb3: ; preds = %bb4
+ %tmp = add i32 %tmp10, -1
+ br label %bb13
+
+bb4: ; preds = %bb4, %bb2, %bb
+ %tmp5 = phi i64 [ %tmp11, %bb4 ], [ 1, %bb2 ], [ 3, %bb ]
+ %tmp6 = phi i32 [ %tmp10, %bb4 ], [ 0, %bb2 ], [ 0, %bb ]
+ %tmp7 = load i32, i32* undef, align 4
+ %tmp8 = add i32 %tmp7, %tmp6
+ %tmp9 = add i32 undef, %tmp8
+ %tmp10 = add i32 undef, %tmp9
+ %tmp11 = add nsw i64 %tmp5, 3
+ %tmp12 = icmp eq i64 %tmp11, 64
+ br i1 %tmp12, label %bb3, label %bb4
+
+; CHECK: Loop %bb4: Unpredictable backedge-taken count.
+; CHECK: Loop %bb4: Unpredictable max backedge-taken count.
+
+bb13: ; preds = %bb13, %bb3
+ %tmp14 = phi i64 [ 0, %bb3 ], [ %tmp15, %bb13 ]
+ %tmp15 = add nuw nsw i64 %tmp14, 1
+ %tmp16 = trunc i64 %tmp15 to i32
+ %tmp17 = icmp eq i32 %tmp16, %tmp
+ br i1 %tmp17, label %bb18, label %bb13
+
+bb18: ; preds = %bb13
+ ret void
+}
diff --git a/test/Analysis/ScalarEvolution/scev-aa.ll b/test/Analysis/ScalarEvolution/scev-aa.ll
index e2123f44f167..47a84d57c3b3 100644
--- a/test/Analysis/ScalarEvolution/scev-aa.ll
+++ b/test/Analysis/ScalarEvolution/scev-aa.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scev-aa -aa-eval -print-all-alias-modref-info \
+; RUN: opt -disable-output < %s -disable-basicaa -scev-aa -aa-eval -print-all-alias-modref-info \
; RUN: 2>&1 | FileCheck %s
; At the time of this writing, -basicaa misses the example of the form
diff --git a/test/Analysis/ScalarEvolution/shift-op.ll b/test/Analysis/ScalarEvolution/shift-op.ll
new file mode 100644
index 000000000000..fe832d567687
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/shift-op.ll
@@ -0,0 +1,164 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+define void @test0(i32 %init) {
+; CHECK-LABEL: Classifying expressions for: @test0
+; CHECK: Loop %loop: max backedge-taken count is 32
+ entry:
+ br label %loop
+
+ loop:
+ %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+ %iv.shift = lshr i32 %iv, 1
+ %exit.cond = icmp eq i32 %iv, 0
+ br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+ ret void
+}
+
+define void @test1(i32 %init) {
+; CHECK-LABEL: Classifying expressions for: @test1
+; CHECK: Loop %loop: max backedge-taken count is 32
+ entry:
+ br label %loop
+
+ loop:
+ %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+ %iv.shift = shl i32 %iv, 1
+ %exit.cond = icmp eq i32 %iv, 0
+ br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+ ret void
+}
+
+define void @test2(i32 %init) {
+; CHECK-LABEL: Determining loop execution counts for: @test2
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
+
+; Unpredictable because %iv could "stabilize" to either -1 or 0,
+; depending on %init.
+ entry:
+ br label %loop
+
+ loop:
+ %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+ %iv.shift = ashr i32 %iv, 1
+ %exit.cond = icmp eq i32 %iv, 0
+ br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+ ret void
+}
+
+define void @test3(i32* %init.ptr) {
+; CHECK-LABEL: Determining loop execution counts for: @test3
+; CHECK: Loop %loop: max backedge-taken count is 32
+ entry:
+ %init = load i32, i32* %init.ptr, !range !0
+ br label %loop
+
+ loop:
+ %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+ %iv.shift = ashr i32 %iv, 1
+ %exit.cond = icmp eq i32 %iv, 0
+ br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+ ret void
+}
+
+define void @test4(i32* %init.ptr) {
+; CHECK-LABEL: Classifying expressions for: @test4
+; CHECK-LABEL: Loop %loop: max backedge-taken count is 32
+ entry:
+ %init = load i32, i32* %init.ptr, !range !1
+ br label %loop
+
+ loop:
+ %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+ %iv.shift = ashr i32 %iv, 1
+ %exit.cond = icmp eq i32 %iv, -1
+ br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+ ret void
+}
+
+define void @test5(i32* %init.ptr) {
+; CHECK-LABEL: Determining loop execution counts for: @test5
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
+
+; %iv will "stabilize" to -1, so this is an infinite loop
+ entry:
+ %init = load i32, i32* %init.ptr, !range !1
+ br label %loop
+
+ loop:
+ %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+ %iv.shift = ashr i32 %iv, 1
+ %exit.cond = icmp eq i32 %iv, 0
+ br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+ ret void
+}
+
+define void @test6(i32 %init, i32 %shift.amt) {
+; CHECK-LABEL: Determining loop execution counts for: @test6
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
+
+; Potentially infinite loop, since %shift.amt could be 0
+ entry:
+ br label %loop
+
+ loop:
+ %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+ %iv.shift = lshr i32 %iv, %shift.amt
+ %exit.cond = icmp eq i32 %iv, 0
+ br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+ ret void
+}
+
+define void @test7(i32 %init) {
+; CHECK-LABEL: Classifying expressions for: @test7
+; CHECK: Loop %loop: max backedge-taken count is 32
+
+ entry:
+ br label %loop
+
+ loop:
+ %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+ %iv.shift = lshr i32 %iv, 1
+ %exit.cond = icmp eq i32 %iv.shift, 0
+ br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+ ret void
+}
+
+define void @test8(i32 %init) {
+; CHECK-LABEL: Classifying expressions for: @test8
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
+
+; In this test case, %iv.test stabilizes to 127, not -1, so the loop
+; is infinite.
+
+ entry:
+ br label %loop
+
+ loop:
+ %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+ %iv.shift = ashr i32 %iv, 1
+ %iv.test = lshr i32 %iv, 1
+ %exit.cond = icmp eq i32 %iv.test, -1
+ br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+ ret void
+}
+
+!0 = !{i32 0, i32 50000}
+!1 = !{i32 -5000, i32 -1}
diff --git a/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll b/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll
new file mode 100644
index 000000000000..500f3e16c8f5
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll
@@ -0,0 +1,128 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+define i32 @f0(i32 %x, i32 %y) {
+; CHECK-LABEL: Classifying expressions for: @f0
+ entry:
+ %c = icmp sgt i32 %y, 0
+ br i1 %c, label %add, label %merge
+
+ add:
+ %sum = add i32 %x, %y
+ br label %merge
+
+ merge:
+ %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK: %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK-NEXT: --> ((0 smax %y) + %x) U: full-set S: full-set
+ ret i32 %v
+}
+
+define i32 @f1(i32 %x, i32 %y) {
+; CHECK-LABEL: Classifying expressions for: @f1
+ entry:
+ %c = icmp sge i32 %y, 0
+ br i1 %c, label %add, label %merge
+
+ add:
+ %sum = add i32 %x, %y
+ br label %merge
+
+ merge:
+ %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK: %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK-NEXT: --> ((0 smax %y) + %x) U: full-set S: full-set
+ ret i32 %v
+}
+
+define i32 @f2(i32 %x, i32 %y, i32* %ptr) {
+; CHECK-LABEL: Classifying expressions for: @f2
+ entry:
+ %c = icmp sge i32 %y, 0
+ br i1 %c, label %add, label %merge
+
+ add:
+ %lv = load i32, i32* %ptr
+ br label %merge
+
+ merge:
+ %v = phi i32 [ %lv, %add ], [ %x, %entry ]
+; CHECK: %v = phi i32 [ %lv, %add ], [ %x, %entry ]
+; CHECK-NEXT: --> %v U: full-set S: full-set
+ ret i32 %v
+}
+
+define i32 @f3(i32 %x, i32 %init, i32 %lim) {
+; CHECK-LABEL: Classifying expressions for: @f3
+ entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ %init, %entry ], [ %iv.inc, %merge ]
+ %iv.inc = add i32 %iv, 1
+ %c = icmp sge i32 %iv, 0
+ br i1 %c, label %add, label %merge
+
+ add:
+ %sum = add i32 %x, %iv
+ br label %merge
+
+ merge:
+ %v = phi i32 [ %sum, %add ], [ %x, %loop ]
+; CHECK: %v = phi i32 [ %sum, %add ], [ %x, %loop ]
+; CHECK-NEXT: --> ((0 smax {%init,+,1}<%loop>) + %x) U: full-set S: full-set
+ %be.cond = icmp eq i32 %iv.inc, %lim
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret i32 0
+}
+
+define i32 @f4(i32 %x, i32 %init, i32 %lim) {
+; CHECK-LABEL: Classifying expressions for: @f4
+ entry:
+ %c = icmp sge i32 %init, 0
+ br i1 %c, label %add, label %merge
+
+ add:
+ br label %loop
+
+ loop:
+ %iv = phi i32 [ %init, %add ], [ %iv.inc, %loop ]
+ %iv.inc = add i32 %iv, 1
+ %be.cond = icmp eq i32 %iv.inc, %lim
+ br i1 %be.cond, label %loop, label %add.cont
+
+ add.cont:
+ %sum = add i32 %x, %iv
+ br label %merge
+
+ merge:
+ %v = phi i32 [ %sum, %add.cont ], [ %x, %entry ]
+; CHECK: %v = phi i32 [ %sum, %add.cont ], [ %x, %entry ]
+; CHECK-NEXT: --> %v U: full-set S: full-set
+ ret i32 %v
+}
+
+define i32 @f5(i32* %val) {
+; CHECK-LABEL: Classifying expressions for: @f5
+entry:
+ br label %for.end
+
+for.condt:
+ br i1 true, label %for.cond.0, label %for.end
+
+for.end:
+ %inc = load i32, i32* %val
+ br i1 false, label %for.condt, label %for.cond.0
+
+for.cond.0:
+ %init = phi i32 [ 0, %for.condt ], [ %inc, %for.end ]
+
+; CHECK: %init = phi i32 [ 0, %for.condt ], [ %inc, %for.end ]
+; CHECK-NEXT: --> %init U: full-set S: full-set
+
+; Matching "through" %init will break LCSSA at the SCEV expression
+; level.
+
+ ret i32 %init
+}
diff --git a/test/Analysis/ScalarEvolution/trip-count.ll b/test/Analysis/ScalarEvolution/trip-count.ll
index 1b75c88c753c..89750810d1b7 100644
--- a/test/Analysis/ScalarEvolution/trip-count.ll
+++ b/test/Analysis/ScalarEvolution/trip-count.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -passes='print<scalar-evolution>' -disable-output 2>&1 | FileCheck %s
; PR1101
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -6,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
@A = weak global [1000 x i32] zeroinitializer, align 32
-; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test1':
+; CHECK-LABEL: Determining loop execution counts for: @test1
; CHECK: backedge-taken count is 10000
define void @test1(i32 %N) {
@@ -32,7 +33,7 @@ return: ; preds = %bb5
}
; PR22795
-; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test2':
+; CHECK-LABEL: Classifying expressions for: @test2
; CHECK: %iv = phi i32 [ -1, %entry ], [ %next.1, %for.inc.1 ]
; CHECK-NEXT: --> {-1,+,2}<%preheader> U: full-set S: full-set Exits: 13
diff --git a/test/Analysis/ScalarEvolution/zext-wrap.ll b/test/Analysis/ScalarEvolution/zext-wrap.ll
index f56e4556c697..5bc149e2309a 100644
--- a/test/Analysis/ScalarEvolution/zext-wrap.ll
+++ b/test/Analysis/ScalarEvolution/zext-wrap.ll
@@ -10,7 +10,7 @@ bb.i: ; preds = %bb1.i, %bb.nph
; This cast shouldn't be folded into the addrec.
; CHECK: %tmp = zext i8 %l_95.0.i1 to i16
-; CHECK: --> (zext i8 {0,+,-1}<%bb.i> to i16){{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: 2
+; CHECK: --> (zext i8 {0,+,-1}<nw><%bb.i> to i16){{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: 2
%tmp = zext i8 %l_95.0.i1 to i16
diff --git a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index 6c9439afeeab..fe2fdd74b411 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -30,7 +30,7 @@ define void @test1_yes(i32* %p) nounwind {
ret void
}
-; CHECK: define void @test1_no(i32* %p) #1 {
+; CHECK: define void @test1_no(i32* %p) #3 {
define void @test1_no(i32* %p) nounwind {
call void @callee(i32* %p), !tbaa !2
ret void
@@ -72,9 +72,11 @@ define i32 @test3_no(i8* %p) nounwind {
declare void @callee(i32* %p) nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) nounwind
-; CHECK: attributes #0 = { nounwind readnone }
-; CHECK: attributes #1 = { nounwind }
+; CHECK: attributes #0 = { norecurse nounwind readnone }
+; CHECK: attributes #1 = { norecurse nounwind }
; CHECK: attributes #2 = { nounwind readonly }
+; CHECK: attributes #3 = { nounwind }
+; CHECK: attributes #4 = { argmemonly nounwind }
; Root note.
!0 = !{ }
diff --git a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
index 0c12cac27d3a..197ef7e5196f 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
@@ -7,22 +7,23 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
; CHECK: define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[NUW:#[0-9]+]]
-; CHECK-NEXT: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) [[NUW:#[0-9]+]]
+; CHECK-NEXT: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
; CHECK-NEXT: %c = add <8 x i16> %a, %a
define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) {
entry:
- %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind, !tbaa !2
- call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16), !tbaa !1
- %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind, !tbaa !2
+ %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind, !tbaa !2
+ call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16), !tbaa !1
+ %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind, !tbaa !2
%c = add <8 x i16> %a, %b
ret <8 x i16> %c
}
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
-; CHECK: attributes #0 = { nounwind readonly }
+; CHECK: attributes #0 = { argmemonly nounwind readonly }
+; CHECK: attributes #1 = { argmemonly nounwind }
; CHECK: attributes [[NUW]] = { nounwind }
!0 = !{!"tbaa root", null}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/licm.ll b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
index fe07730577e6..d2aee58204df 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/licm.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
@@ -5,7 +5,7 @@
; CHECK: @foo
; CHECK: entry:
-; CHECK-NEXT: %tmp3 = load double*, double** @P, !tbaa !0
+; CHECK-NEXT: %tmp3 = load double*, double** @P
; CHECK-NEXT: br label %for.body
@P = common global double* null
diff --git a/test/Analysis/TypeBasedAliasAnalysis/precedence.ll b/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
index b2931cac75c8..c7cd2e274dff 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
@@ -1,9 +1,9 @@
-; RUN: opt -basicaa -tbaa -gvn -instcombine -S < %s | FileCheck %s --check-prefix=TBAA
-; RUN: opt -tbaa -basicaa -gvn -instcombine -S < %s | FileCheck %s --check-prefix=BASICAA
+; RUN: opt -tbaa -disable-basicaa -gvn -instcombine -S < %s | FileCheck %s --check-prefix=TBAA
+; RUN: opt -tbaa -gvn -instcombine -S < %s | FileCheck %s --check-prefix=BASICAA
; According to the TBAA metadata the load and store don't alias. However,
-; according to the actual code, they do. The order of the alias analysis
-; passes should determine which of these takes precedence.
+; according to the actual code, they do. Disabling basicaa shows the raw TBAA
+; results.
target datalayout = "e-p:64:64:64"
diff --git a/test/Analysis/ValueTracking/known-bits-from-range-md.ll b/test/Analysis/ValueTracking/known-bits-from-range-md.ll
new file mode 100644
index 000000000000..e1de089b3501
--- /dev/null
+++ b/test/Analysis/ValueTracking/known-bits-from-range-md.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+define i1 @test0(i8* %ptr) {
+; CHECK-LABEL: @test0(
+ entry:
+ %val = load i8, i8* %ptr, !range !{i8 -50, i8 0}
+ %and = and i8 %val, 128
+ %is.eq = icmp eq i8 %and, 128
+ ret i1 %is.eq
+; CHECK: ret i1 true
+}
+
+define i1 @test1(i8* %ptr) {
+; CHECK-LABEL: @test1(
+ entry:
+ %val = load i8, i8* %ptr, !range !{i8 64, i8 128}
+ %and = and i8 %val, 64
+ %is.eq = icmp eq i8 %and, 64
+ ret i1 %is.eq
+; CHECK: ret i1 true
+}
+
+define i1 @test2(i8* %ptr) {
+; CHECK-LABEL: @test2(
+ entry:
+; CHECK: load
+; CHECK: and
+; CHECK: icmp eq
+; CHECK: ret
+ %val = load i8, i8* %ptr, !range !{i8 64, i8 129}
+ %and = and i8 %val, 64
+ %is.eq = icmp eq i8 %and, 64
+ ret i1 %is.eq
+}
diff --git a/test/Analysis/ValueTracking/known-non-equal.ll b/test/Analysis/ValueTracking/known-non-equal.ll
new file mode 100644
index 000000000000..d28b3f4f63a3
--- /dev/null
+++ b/test/Analysis/ValueTracking/known-non-equal.ll
@@ -0,0 +1,21 @@
+; RUN: opt -instsimplify < %s -S | FileCheck %s
+
+; CHECK: define i1 @test
+define i1 @test(i8* %pq, i8 %B) {
+ %q = load i8, i8* %pq, !range !0 ; %q is known nonzero; no known bits
+ %A = add nsw i8 %B, %q
+ %cmp = icmp eq i8 %A, %B
+ ; CHECK: ret i1 false
+ ret i1 %cmp
+}
+
+; CHECK: define i1 @test2
+define i1 @test2(i8 %a, i8 %b) {
+ %A = or i8 %a, 2 ; %A[1] = 1
+ %B = and i8 %b, -3 ; %B[1] = 0
+ %cmp = icmp eq i8 %A, %B ; %A[1] and %B[1] are contradictory.
+ ; CHECK: ret i1 false
+ ret i1 %cmp
+}
+
+!0 = !{ i8 1, i8 5 }
diff --git a/test/Analysis/ValueTracking/knownnonzero-shift.ll b/test/Analysis/ValueTracking/knownnonzero-shift.ll
new file mode 100644
index 000000000000..e59d19cc2e26
--- /dev/null
+++ b/test/Analysis/ValueTracking/knownnonzero-shift.ll
@@ -0,0 +1,13 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test
+define i1 @test(i8 %p, i8* %pq) {
+ %q = load i8, i8* %pq, !range !0 ; %q is known nonzero; no known bits
+ %1 = shl i8 %p, %q ; because %q is nonzero, %1[0] is known to be zero.
+ %2 = and i8 %1, 1
+ %x = icmp eq i8 %2, 0
+ ; CHECK: ret i1 true
+ ret i1 %x
+}
+
+!0 = !{ i8 1, i8 5 }
diff --git a/test/Analysis/ValueTracking/knownzero-shift.ll b/test/Analysis/ValueTracking/knownzero-shift.ll
new file mode 100644
index 000000000000..835d87a9d9c1
--- /dev/null
+++ b/test/Analysis/ValueTracking/knownzero-shift.ll
@@ -0,0 +1,14 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test
+define i1 @test(i8 %p, i8* %pq) {
+ %q = load i8, i8* %pq, !range !0 ; %q is known nonzero; no known bits
+ %1 = or i8 %p, 2 ; %1[1] = 1
+ %2 = and i8 %1, 254 ; %2[0] = 0, %2[1] = 1
+ %A = lshr i8 %2, 1 ; We should know that %A is nonzero.
+ %x = icmp eq i8 %A, 0
+ ; CHECK: ret i1 false
+ ret i1 %x
+}
+
+!0 = !{ i8 1, i8 5 }
diff --git a/test/Analysis/ValueTracking/memory-dereferenceable.ll b/test/Analysis/ValueTracking/memory-dereferenceable.ll
index f49f4f77f404..5b45172f695e 100644
--- a/test/Analysis/ValueTracking/memory-dereferenceable.ll
+++ b/test/Analysis/ValueTracking/memory-dereferenceable.ll
@@ -5,73 +5,156 @@
target datalayout = "e"
+%TypeOpaque = type opaque
+
declare zeroext i1 @return_i1()
+declare i32* @foo()
@globalstr = global [6 x i8] c"hello\00"
@globali32ptr = external global i32*
%struct.A = type { [8 x i8], [5 x i8] }
@globalstruct = external global %struct.A
-define void @test(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-example" {
+@globalptr.align1 = external global i8, align 1
+@globalptr.align16 = external global i8, align 16
+
+; CHECK-LABEL: 'test'
+define void @test(i32 addrspace(1)* dereferenceable(8) %dparam,
+ i8 addrspace(1)* dereferenceable(32) align 1 %dparam.align1,
+ i8 addrspace(1)* dereferenceable(32) align 16 %dparam.align16)
+ gc "statepoint-example" {
; CHECK: The following are dereferenceable:
-; CHECK: %globalptr
-; CHECK: %alloca
-; CHECK: %dparam
-; CHECK: %relocate
-; CHECK-NOT: %nparam
-; CHECK-NOT: %nd_load
-; CHECK: %d4_load
-; CHECK-NOT: %d2_load
-; CHECK-NOT: %d_or_null_load
-; CHECK: %d_or_null_non_null_load
-; CHECK: %within_allocation
-; CHECK-NOT: %outside_allocation
entry:
+; CHECK: %globalptr{{.*}}(aligned)
%globalptr = getelementptr inbounds [6 x i8], [6 x i8]* @globalstr, i32 0, i32 0
%load1 = load i8, i8* %globalptr
+
+; CHECK: %alloca{{.*}}(aligned)
%alloca = alloca i1
%load2 = load i1, i1* %alloca
+
+; CHECK: %dparam{{.*}}(aligned)
%load3 = load i32, i32 addrspace(1)* %dparam
- %tok = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
- %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %tok, i32 7, i32 7)
+
+; CHECK: %relocate{{.*}}(aligned)
+ %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+ %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 7, i32 7)
%load4 = load i32, i32 addrspace(1)* %relocate
+
+; CHECK-NOT: %nparam
%nparam = getelementptr i32, i32 addrspace(1)* %dparam, i32 5
%load5 = load i32, i32 addrspace(1)* %nparam
; Load from a non-dereferenceable load
+; CHECK-NOT: %nd_load
%nd_load = load i32*, i32** @globali32ptr
%load6 = load i32, i32* %nd_load
; Load from a dereferenceable load
+; CHECK: %d4_load{{.*}}(aligned)
%d4_load = load i32*, i32** @globali32ptr, !dereferenceable !0
%load7 = load i32, i32* %d4_load
; Load from an offset not covered by the dereferenceable portion
+; CHECK-NOT: %d2_load
%d2_load = load i32*, i32** @globali32ptr, !dereferenceable !1
%load8 = load i32, i32* %d2_load
; Load from a potentially null pointer with dereferenceable_or_null
+; CHECK-NOT: %d_or_null_load
%d_or_null_load = load i32*, i32** @globali32ptr, !dereferenceable_or_null !0
%load9 = load i32, i32* %d_or_null_load
; Load from a non-null pointer with dereferenceable_or_null
+; CHECK: %d_or_null_non_null_load{{.*}}(aligned)
%d_or_null_non_null_load = load i32*, i32** @globali32ptr, !nonnull !2, !dereferenceable_or_null !0
%load10 = load i32, i32* %d_or_null_non_null_load
; It's OK to overrun static array size as long as we stay within underlying object size
+; CHECK: %within_allocation{{.*}}(aligned)
%within_allocation = getelementptr inbounds %struct.A, %struct.A* @globalstruct, i64 0, i32 0, i64 10
%load11 = load i8, i8* %within_allocation
; GEP is outside the underlying object size
+; CHECK-NOT: %outside_allocation
%outside_allocation = getelementptr inbounds %struct.A, %struct.A* @globalstruct, i64 0, i32 1, i64 10
%load12 = load i8, i8* %outside_allocation
+ ; Loads from aligned globals
+; CHECK: @globalptr.align1{{.*}}(unaligned)
+; CHECK: @globalptr.align16{{.*}}(aligned)
+ %load13 = load i8, i8* @globalptr.align1, align 16
+ %load14 = load i8, i8* @globalptr.align16, align 16
+
+ ; Loads from aligned arguments
+; CHECK: %dparam.align1{{.*}}(unaligned)
+; CHECK: %dparam.align16{{.*}}(aligned)
+ %load15 = load i8, i8 addrspace(1)* %dparam.align1, align 16
+ %load16 = load i8, i8 addrspace(1)* %dparam.align16, align 16
+
+ ; Loads from aligned allocas
+; CHECK: %alloca.align1{{.*}}(unaligned)
+; CHECK: %alloca.align16{{.*}}(aligned)
+ %alloca.align1 = alloca i1, align 1
+ %alloca.align16 = alloca i1, align 16
+ %load17 = load i1, i1* %alloca.align1, align 16
+ %load18 = load i1, i1* %alloca.align16, align 16
+
+ ; Loads from GEPs
+; CHECK: %gep.align1.offset1{{.*}}(unaligned)
+; CHECK: %gep.align16.offset1{{.*}}(unaligned)
+; CHECK: %gep.align1.offset16{{.*}}(unaligned)
+; CHECK: %gep.align16.offset16{{.*}}(aligned)
+ %gep.align1.offset1 = getelementptr inbounds i8, i8 addrspace(1)* %dparam.align1, i32 1
+ %gep.align16.offset1 = getelementptr inbounds i8, i8 addrspace(1)* %dparam.align16, i32 1
+ %gep.align1.offset16 = getelementptr inbounds i8, i8 addrspace(1)* %dparam.align1, i32 16
+ %gep.align16.offset16 = getelementptr inbounds i8, i8 addrspace(1)* %dparam.align16, i32 16
+ %load19 = load i8, i8 addrspace(1)* %gep.align1.offset1, align 16
+ %load20 = load i8, i8 addrspace(1)* %gep.align16.offset1, align 16
+ %load21 = load i8, i8 addrspace(1)* %gep.align1.offset16, align 16
+ %load22 = load i8, i8 addrspace(1)* %gep.align16.offset16, align 16
+
+; CHECK-NOT: %no_deref_return
+; CHECK: %deref_return{{.*}}(unaligned)
+; CHECK: %deref_and_aligned_return{{.*}}(aligned)
+ %no_deref_return = call i32* @foo()
+ %deref_return = call dereferenceable(32) i32* @foo()
+ %deref_and_aligned_return = call dereferenceable(32) align 16 i32* @foo()
+ %load23 = load i32, i32* %no_deref_return
+ %load24 = load i32, i32* %deref_return, align 16
+ %load25 = load i32, i32* %deref_and_aligned_return, align 16
+
+ ; Load from a dereferenceable and aligned load
+; CHECK: %d4_unaligned_load{{.*}}(unaligned)
+; CHECK: %d4_aligned_load{{.*}}(aligned)
+ %d4_unaligned_load = load i32*, i32** @globali32ptr, !dereferenceable !0
+ %d4_aligned_load = load i32*, i32** @globali32ptr, !dereferenceable !0, !align !{i64 16}
+ %load26 = load i32, i32* %d4_unaligned_load, align 16
+ %load27 = load i32, i32* %d4_aligned_load, align 16
+
ret void
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
+; Just check that we don't crash.
+; CHECK-LABEL: 'opaque_type_crasher'
+define void @opaque_type_crasher(%TypeOpaque* dereferenceable(16) %a) {
+entry:
+ %bc = bitcast %TypeOpaque* %a to i8*
+ %ptr8 = getelementptr inbounds i8, i8* %bc, i32 8
+ %ptr32 = bitcast i8* %ptr8 to i32*
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+ %res = load i32, i32* %ptr32, align 4
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
!0 = !{i64 4}
!1 = !{i64 2}
diff --git a/test/Analysis/ValueTracking/monotonic-phi.ll b/test/Analysis/ValueTracking/monotonic-phi.ll
new file mode 100644
index 000000000000..3204bda49f0b
--- /dev/null
+++ b/test/Analysis/ValueTracking/monotonic-phi.ll
@@ -0,0 +1,49 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test1
+define i1 @test1(i8 %p, i8* %pq, i8 %n, i8 %r) {
+entry:
+ br label %loop
+loop:
+ %A = phi i8 [ 1, %entry ], [ %next, %loop ]
+ %next = add nsw i8 %A, 1
+ %cmp1 = icmp eq i8 %A, %n
+ br i1 %cmp1, label %exit, label %loop
+exit:
+ %add = or i8 %A, %r
+ %cmp = icmp eq i8 %add, 0
+ ; CHECK: ret i1 false
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @test2
+define i1 @test2(i8 %p, i8* %pq, i8 %n, i8 %r) {
+entry:
+ br label %loop
+loop:
+ %A = phi i8 [ 1, %entry ], [ %next, %loop ]
+ %next = add i8 %A, 1
+ %cmp1 = icmp eq i8 %A, %n
+ br i1 %cmp1, label %exit, label %loop
+exit:
+ %add = or i8 %A, %r
+ %cmp = icmp eq i8 %add, 0
+ ; CHECK-NOT: ret i1 false
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @test3
+define i1 @test3(i8 %p, i8* %pq, i8 %n, i8 %r) {
+entry:
+ br label %loop
+loop:
+ %A = phi i8 [ 1, %entry ], [ %next, %loop ]
+ %next = add nuw i8 %A, 1
+ %cmp1 = icmp eq i8 %A, %n
+ br i1 %cmp1, label %exit, label %loop
+exit:
+ %add = or i8 %A, %r
+ %cmp = icmp eq i8 %add, 0
+ ; CHECK: ret i1 false
+ ret i1 %cmp
+}
diff --git a/test/Analysis/ValueTracking/pr24866.ll b/test/Analysis/ValueTracking/pr24866.ll
new file mode 100644
index 000000000000..b146b4ac0564
--- /dev/null
+++ b/test/Analysis/ValueTracking/pr24866.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S %s -value-tracking-dom-conditions -licm -load-combine | FileCheck %s
+; In pr24866.ll, we saw a crash when accessing a nullptr returned when
+; asking for a dominator tree Node. This reproducer is really fragile,
+; but it's currently the best we have.
+
+%struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183 = type { [256 x i32], [256 x i8] }
+
+
+; Function Attrs: nounwind uwtable
+define void @encode_one_blockX2(%struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* nocapture readonly %actbl) #0 {
+; CHECK-LABEL: @encode_one_blockX2
+entry:
+ br i1 false, label %L_KLOOP_01, label %L_KLOOP.preheader
+
+L_KLOOP_01: ; preds = %while.end, %entry
+ br label %L_KLOOP.preheader
+
+L_KLOOP_08: ; preds = %while.end
+ br label %L_KLOOP.preheader
+
+L_KLOOP.preheader: ; preds = %L_KLOOP_08, %L_KLOOP_01, %entry
+ %r.2.ph = phi i32 [ undef, %L_KLOOP_08 ], [ 0, %entry ], [ undef, %L_KLOOP_01 ]
+ br label %L_KLOOP
+
+L_KLOOP: ; preds = %while.end, %L_KLOOP.preheader
+ %r.2 = phi i32 [ 0, %while.end ], [ %r.2.ph, %L_KLOOP.preheader ]
+ br i1 true, label %while.body, label %while.end
+
+while.body: ; preds = %while.body, %L_KLOOP
+ br label %while.body
+
+while.end: ; preds = %L_KLOOP
+ %shl105 = shl i32 %r.2, 4
+ %add106 = add nsw i32 %shl105, undef
+ %idxprom107 = sext i32 %add106 to i64
+ %arrayidx108 = getelementptr inbounds %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183, %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* %actbl, i64 0, i32 0, i64 %idxprom107
+ %0 = load i32, i32* %arrayidx108, align 4
+ %arrayidx110 = getelementptr inbounds %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183, %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* %actbl, i64 0, i32 1, i64 %idxprom107
+ %1 = load i8, i8* %arrayidx110, align 1
+ indirectbr i8* undef, [label %L_KLOOP_DONE, label %L_KLOOP_01, label %L_KLOOP_08, label %L_KLOOP]
+
+L_KLOOP_DONE: ; preds = %while.end
+ ret void
+}
diff --git a/test/Assembler/2007-09-10-AliasFwdRef.ll b/test/Assembler/2007-09-10-AliasFwdRef.ll
index 8e0a5718058c..882569b2f24e 100644
--- a/test/Assembler/2007-09-10-AliasFwdRef.ll
+++ b/test/Assembler/2007-09-10-AliasFwdRef.ll
@@ -3,7 +3,7 @@
; PR1645
@__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*)
-@__gthrw_pthread_cancel = weak alias i32 (i32)* @pthread_cancel
+@__gthrw_pthread_cancel = weak alias i32 (i32), i32 (i32)* @pthread_cancel
diff --git a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
index 8a6735fbca48..0a2462610acb 100644
--- a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
+++ b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin10.2"
@TestArrayPtr = global %struct.test* getelementptr inbounds ([10 x %struct.test], [10 x %struct.test]* @TestArray, i64 0, i64 3) ; <%struct.test**> [#uses=1]
@TestArray = common global [10 x %struct.test] zeroinitializer, align 32 ; <[10 x %struct.test]*> [#uses=2]
-define i32 @main() nounwind readonly {
+define i32 @main() nounwind readonly !dbg !1 {
%diff1 = alloca i64 ; <i64*> [#uses=2]
; CHECK: call void @llvm.dbg.value(metadata i64 72,
call void @llvm.dbg.declare(metadata i64* %diff1, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
@@ -25,9 +25,9 @@ define i32 @main() nounwind readonly {
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!7 = !{!1}
-!6 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131941)", isOptimized: true, emissionKind: 0, file: !8, enums: !9, retainedTypes: !9, subprograms: !7)
-!0 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 2, scope: !1, file: !2, type: !5)
-!1 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !8, scope: !2, type: !3, function: i32 ()* @main)
+!6 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131941)", isOptimized: true, emissionKind: 0, file: !8, enums: !9, retainedTypes: !9, subprograms: !7)
+!0 = !DILocalVariable(name: "c", line: 2, scope: !1, file: !2, type: !5)
+!1 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !8, scope: !2, type: !3)
!2 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b")
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
diff --git a/test/Assembler/ConstantExprFoldCast.ll b/test/Assembler/ConstantExprFoldCast.ll
index 094f87be92cf..dfe840cc37dd 100644
--- a/test/Assembler/ConstantExprFoldCast.ll
+++ b/test/Assembler/ConstantExprFoldCast.ll
@@ -15,3 +15,7 @@
; Address space cast AS0 null-> AS1 null
@H = global i32 addrspace(1)* addrspacecast(i32* null to i32 addrspace(1)*)
+
+; Bitcast -> GEP
+@I = external global { i32 }
+@J = global i32* bitcast ({ i32 }* @I to i32*)
diff --git a/test/Assembler/ConstantExprNoFold.ll b/test/Assembler/ConstantExprNoFold.ll
index 38f8cbbc9cd8..6b06a8fbb75f 100644
--- a/test/Assembler/ConstantExprNoFold.ll
+++ b/test/Assembler/ConstantExprNoFold.ll
@@ -43,8 +43,8 @@ target datalayout = "p:32:32"
@empty.cmp = global i1 icmp eq ([0 x i8]* @empty.1, [0 x i8]* @empty.2)
; Don't add an inbounds on @glob.a3, since it's not inbounds.
-; CHECK: @glob.a3 = alias getelementptr (i32, i32* @glob.a2, i32 1)
+; CHECK: @glob.a3 = alias i32, getelementptr (i32, i32* @glob.a2, i32 1)
@glob = global i32 0
-@glob.a3 = alias getelementptr (i32, i32* @glob.a2, i32 1)
-@glob.a2 = alias getelementptr (i32, i32* @glob.a1, i32 1)
-@glob.a1 = alias i32* @glob
+@glob.a3 = alias i32, getelementptr (i32, i32* @glob.a2, i32 1)
+@glob.a2 = alias i32, getelementptr (i32, i32* @glob.a1, i32 1)
+@glob.a1 = alias i32, i32* @glob
diff --git a/test/Assembler/addrspacecast-alias.ll b/test/Assembler/addrspacecast-alias.ll
index 745e525c15f4..32eb5b9d4868 100644
--- a/test/Assembler/addrspacecast-alias.ll
+++ b/test/Assembler/addrspacecast-alias.ll
@@ -4,5 +4,5 @@
; Test that global aliases are allowed to be constant addrspacecast
@i = internal addrspace(1) global i8 42
-@ia = internal alias addrspacecast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(3)*)
-; CHECK: @ia = internal alias addrspacecast (i8 addrspace(2)* addrspace(1)* bitcast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(1)*) to i8 addrspace(2)* addrspace(3)*)
+@ia = internal alias i8 addrspace(2)*, addrspacecast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(3)*)
+; CHECK: @ia = internal alias i8 addrspace(2)*, addrspacecast (i8 addrspace(2)* addrspace(1)* bitcast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(1)*) to i8 addrspace(2)* addrspace(3)*)
diff --git a/test/Assembler/alias-redefinition.ll b/test/Assembler/alias-redefinition.ll
index 19ad85bf5f5b..3c36c81d8138 100644
--- a/test/Assembler/alias-redefinition.ll
+++ b/test/Assembler/alias-redefinition.ll
@@ -1,7 +1,7 @@
; RUN: not llvm-as %s 2>&1 | FileCheck %s
-; CHECK: error: redefinition of global named '@bar'
+; CHECK: error: redefinition of global '@bar'
@foo = global i32 0
-@bar = alias i32* @foo
-@bar = alias i32* @foo
+@bar = alias i32, i32* @foo
+@bar = alias i32, i32* @foo
diff --git a/test/Assembler/alias-use-list-order.ll b/test/Assembler/alias-use-list-order.ll
index c72bad28e500..7e72e450a750 100644
--- a/test/Assembler/alias-use-list-order.ll
+++ b/test/Assembler/alias-use-list-order.ll
@@ -6,6 +6,6 @@
@alias.ref2 = global i32* getelementptr inbounds (i32, i32* @alias, i64 1)
; Aliases.
-@alias = alias i32* @global
-@alias.ref3 = alias i32* getelementptr inbounds (i32, i32* @alias, i64 1)
-@alias.ref4 = alias i32* getelementptr inbounds (i32, i32* @alias, i64 1)
+@alias = alias i32, i32* @global
+@alias.ref3 = alias i32, getelementptr inbounds (i32, i32* @alias, i64 1)
+@alias.ref4 = alias i32, getelementptr inbounds (i32, i32* @alias, i64 1)
diff --git a/test/Assembler/anon-functions.ll b/test/Assembler/anon-functions.ll
index 42eea837227a..2352a00f57d2 100644
--- a/test/Assembler/anon-functions.ll
+++ b/test/Assembler/anon-functions.ll
@@ -5,8 +5,8 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
-@f = alias void ()* @0 ; <void ()*> [#uses=0]
-@g = alias void ()* @1 ; <void ()*> [#uses=0]
+@f = alias void (), void ()* @0 ; <void ()*> [#uses=0]
+@g = alias void (), void ()* @1 ; <void ()*> [#uses=0]
@h = external global void ()* ; <void ()*> [#uses=0]
define internal void @0() nounwind {
diff --git a/test/Assembler/debug-info.ll b/test/Assembler/debug-info.ll
index 91dfe561a2fe..86630840dc20 100644
--- a/test/Assembler/debug-info.ll
+++ b/test/Assembler/debug-info.ll
@@ -1,8 +1,8 @@
; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
; RUN: verify-uselistorder %s
-; CHECK: !named = !{!0, !0, !1, !2, !3, !4, !5, !6, !7, !8, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !27}
-!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30}
+; CHECK: !named = !{!0, !0, !1, !2, !3, !4, !5, !6, !7, !8, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !27, !28, !29, !30, !31}
+!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34}
; CHECK: !0 = !DISubrange(count: 3)
; CHECK-NEXT: !1 = !DISubrange(count: 3, lowerBound: 4)
@@ -63,10 +63,19 @@
!25 = !DICompositeType(tag: DW_TAG_structure_type)
!26 = !DICompositeType(tag: DW_TAG_structure_type, runtimeLang: 6)
-; !25 = !{!7, !7}
-; !26 = !DISubroutineType(flags: DIFlagPublic | DIFlagStaticMember, types: !25)
-; !27 = !DISubroutineType(types: !25)
+; CHECK-NEXT: !25 = !{!6, !6}
+; CHECK-NEXT: !26 = !DISubroutineType(flags: DIFlagPublic | DIFlagStaticMember, types: !25)
+; CHECK-NEXT: !27 = !DISubroutineType(types: !25)
!27 = !{!7, !7}
!28 = !DISubroutineType(flags: DIFlagPublic | DIFlagStaticMember, types: !27)
!29 = !DISubroutineType(flags: 0, types: !27)
!30 = !DISubroutineType(types: !27)
+
+; CHECK-NEXT: !28 = !DIMacro(type: DW_MACINFO_define, line: 9, name: "Name", value: "Value")
+; CHECK-NEXT: !29 = distinct !{!28}
+; CHECK-NEXT: !30 = !DIMacroFile(line: 9, file: !12, nodes: !29)
+; CHECK-NEXT: !31 = !DIMacroFile(line: 11, file: !12)
+!31 = !DIMacro(type: DW_MACINFO_define, line: 9, name: "Name", value: "Value")
+!32 = distinct !{!31}
+!33 = !DIMacroFile(line: 9, file: !14, nodes: !32)
+!34 = !DIMacroFile(type: DW_MACINFO_start_file, line: 11, file: !14)
diff --git a/test/Assembler/dicompileunit.ll b/test/Assembler/dicompileunit.ll
index dc136f0b83e9..92fa61fe6b90 100644
--- a/test/Assembler/dicompileunit.ll
+++ b/test/Assembler/dicompileunit.ll
@@ -1,8 +1,8 @@
; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
; RUN: verify-uselistorder %s
-; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !7, !8, !8}
-!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10}
+; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
+!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
!0 = distinct !{}
!1 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
@@ -11,21 +11,16 @@
!4 = distinct !{}
!5 = distinct !{}
!6 = distinct !{}
+!7 = distinct !{}
-; CHECK: !7 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, flags: "-O2", runtimeVersion: 2, splitDebugFilename: "abc.debug", emissionKind: 3, enums: !2, retainedTypes: !3, subprograms: !4, globals: !5, imports: !6, dwoId: 42)
-!7 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang",
- isOptimized: true, flags: "-O2", runtimeVersion: 2,
- splitDebugFilename: "abc.debug", emissionKind: 3,
- enums: !2, retainedTypes: !3, subprograms: !4,
- globals: !5, imports: !6, dwoId: 42)
-!8 = !DICompileUnit(language: 12, file: !1, producer: "clang",
- isOptimized: true, flags: "-O2", runtimeVersion: 2,
- splitDebugFilename: "abc.debug", emissionKind: 3,
- enums: !2, retainedTypes: !3, subprograms: !4,
- globals: !5, imports: !6, dwoId: 42)
+; CHECK: !8 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, flags: "-O2", runtimeVersion: 2, splitDebugFilename: "abc.debug", emissionKind: 3, enums: !2, retainedTypes: !3, subprograms: !4, globals: !5, imports: !6, macros: !7, dwoId: 42)
+!8 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang",
+ isOptimized: true, flags: "-O2", runtimeVersion: 2,
+ splitDebugFilename: "abc.debug", emissionKind: 3,
+ enums: !2, retainedTypes: !3, subprograms: !4,
+ globals: !5, imports: !6, macros: !7, dwoId: 42)
-; CHECK: !8 = !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: 0)
-!9 = !DICompileUnit(language: 12, file: !1, producer: "",
- isOptimized: false, flags: "", runtimeVersion: 0,
- splitDebugFilename: "", emissionKind: 0)
-!10 = !DICompileUnit(language: 12, file: !1)
+; CHECK: !9 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: 0)
+!9 = distinct !DICompileUnit(language: 12, file: !1, producer: "",
+ isOptimized: false, flags: "", runtimeVersion: 0,
+ splitDebugFilename: "", emissionKind: 0)
diff --git a/test/Assembler/diimportedentity.ll b/test/Assembler/diimportedentity.ll
index 929267e3b25e..334eb20daede 100644
--- a/test/Assembler/diimportedentity.ll
+++ b/test/Assembler/diimportedentity.ll
@@ -4,9 +4,9 @@
; CHECK: !named = !{!0, !1, !2, !3, !3}
!named = !{!0, !1, !2, !3, !4}
-; CHECK: !0 = !DISubprogram({{.*}})
+; CHECK: !0 = distinct !DISubprogram({{.*}})
; CHECK-NEXT: !1 = !DICompositeType({{.*}})
-!0 = !DISubprogram(name: "foo")
+!0 = distinct !DISubprogram(name: "foo")
!1 = !DICompositeType(tag: DW_TAG_structure_type, name: "Class", size: 32, align: 32)
; CHECK-NEXT: !2 = !DIImportedEntity(tag: DW_TAG_imported_module, name: "foo", scope: !0, entity: !1, line: 7)
diff --git a/test/Assembler/dilexicalblock.ll b/test/Assembler/dilexicalblock.ll
index 2cab372384ca..9e1ab64fd51e 100644
--- a/test/Assembler/dilexicalblock.ll
+++ b/test/Assembler/dilexicalblock.ll
@@ -5,7 +5,7 @@
!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
!0 = distinct !{}
-!1 = !DISubprogram(name: "foo", scope: !2)
+!1 = distinct !DISubprogram(name: "foo", scope: !2)
!2 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
; CHECK: !3 = !DILexicalBlock(scope: !1, file: !2, line: 7, column: 35)
diff --git a/test/Assembler/dilocalvariable-arg-large.ll b/test/Assembler/dilocalvariable-arg-large.ll
index 7788186a54ab..a4c3f8c97630 100644
--- a/test/Assembler/dilocalvariable-arg-large.ll
+++ b/test/Assembler/dilocalvariable-arg-large.ll
@@ -6,5 +6,5 @@
!0 = distinct !DISubprogram()
-; CHECK: !1 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "foo", arg: 65535, scope: !0)
-!1 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "foo", arg: 65535, scope: !0)
+; CHECK: !1 = !DILocalVariable(name: "foo", arg: 65535, scope: !0)
+!1 = !DILocalVariable(name: "foo", arg: 65535, scope: !0)
diff --git a/test/Assembler/dilocalvariable.ll b/test/Assembler/dilocalvariable.ll
index 312373ca8623..d286dd92a402 100644
--- a/test/Assembler/dilocalvariable.ll
+++ b/test/Assembler/dilocalvariable.ll
@@ -12,15 +12,15 @@
!3 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!4 = !DILocation(scope: !0)
-; CHECK: !5 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "foo", arg: 3, scope: !0, file: !2, line: 7, type: !3, flags: DIFlagArtificial)
-; CHECK: !6 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "foo", scope: !0, file: !2, line: 7, type: !3, flags: DIFlagArtificial)
-!5 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "foo", arg: 3,
+; CHECK: !5 = !DILocalVariable(name: "foo", arg: 3, scope: !0, file: !2, line: 7, type: !3, flags: DIFlagArtificial)
+; CHECK: !6 = !DILocalVariable(name: "foo", scope: !0, file: !2, line: 7, type: !3, flags: DIFlagArtificial)
+!5 = !DILocalVariable(name: "foo", arg: 3,
scope: !0, file: !2, line: 7, type: !3,
flags: DIFlagArtificial)
-!6 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "foo", scope: !0,
+!6 = !DILocalVariable(name: "foo", scope: !0,
file: !2, line: 7, type: !3, flags: DIFlagArtificial)
-; CHECK: !7 = !DILocalVariable(tag: DW_TAG_arg_variable, arg: 0, scope: !0)
-; CHECK: !8 = !DILocalVariable(tag: DW_TAG_auto_variable, scope: !0)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !0)
-!8 = !DILocalVariable(tag: DW_TAG_auto_variable, scope: !0)
+; CHECK: !7 = !DILocalVariable(arg: 1, scope: !0)
+; CHECK: !8 = !DILocalVariable(scope: !0)
+!7 = !DILocalVariable(scope: !0, arg: 1)
+!8 = !DILocalVariable(scope: !0)
diff --git a/test/Assembler/dilocation.ll b/test/Assembler/dilocation.ll
index a468f8abe9c6..b177be590246 100644
--- a/test/Assembler/dilocation.ll
+++ b/test/Assembler/dilocation.ll
@@ -4,8 +4,8 @@
; CHECK: !named = !{!0, !1, !1, !2, !2, !3, !3, !4}
!named = !{!0, !1, !2, !3, !4, !5, !6, !7}
-; CHECK: !0 = !DISubprogram(
-!0 = !DISubprogram()
+; CHECK: !0 = distinct !DISubprogram(
+!0 = distinct !DISubprogram()
; CHECK-NEXT: !1 = !DILocation(line: 3, column: 7, scope: !0)
!1 = !DILocation(line: 3, column: 7, scope: !0)
diff --git a/test/Assembler/disubprogram.ll b/test/Assembler/disubprogram.ll
index 3fa1081889ff..af60f52eb210 100644
--- a/test/Assembler/disubprogram.ll
+++ b/test/Assembler/disubprogram.ll
@@ -1,7 +1,10 @@
; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
; RUN: verify-uselistorder %s
-declare void @_Z3foov()
+; CHECK: define void @_Z3foov() !dbg !9
+define void @_Z3foov() !dbg !9 {
+ ret void
+}
; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
@@ -12,17 +15,21 @@ declare void @_Z3foov()
!3 = !DISubroutineType(types: !0)
!4 = distinct !DICompositeType(tag: DW_TAG_structure_type)
!5 = distinct !{}
-!6 = distinct !DISubprogram(isDefinition: false)
-!7 = distinct !{}
+!6 = distinct !{}
-; CHECK: !8 = !DISubprogram(name: "foo", linkageName: "_Zfoov", scope: !1, file: !2, line: 7, type: !3, isLocal: true, isDefinition: false, scopeLine: 8, containingType: !4, virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @_Z3foov, templateParams: !5, declaration: !6, variables: !7)
-!8 = !DISubprogram(name: "foo", linkageName: "_Zfoov", scope: !1,
- file: !2, line: 7, type: !3, isLocal: true,
- isDefinition: false, scopeLine: 8, containingType: !4,
- virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10,
- flags: DIFlagPrototyped, isOptimized: true, function: void ()* @_Z3foov,
- templateParams: !5, declaration: !6, variables: !7)
+; CHECK: !7 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false)
+!7 = distinct !DISubprogram()
-; CHECK: !9 = !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false)
-!9 = !DISubprogram()
+; CHECK: !8 = !DISubprogram(scope: null, isLocal: false, isDefinition: false, isOptimized: false)
+!8 = !DISubprogram(isDefinition: false)
+; CHECK: !9 = distinct !DISubprogram(name: "foo", linkageName: "_Zfoov", scope: !1, file: !2, line: 7, type: !3, isLocal: true, isDefinition: true, scopeLine: 8, containingType: !4, virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10, flags: DIFlagPrototyped, isOptimized: true, templateParams: !5, declaration: !8, variables: !6)
+!9 = distinct !DISubprogram(name: "foo", linkageName: "_Zfoov", scope: !1,
+ file: !2, line: 7, type: !3, isLocal: true,
+ isDefinition: true, scopeLine: 8, containingType: !4,
+ virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10,
+ flags: DIFlagPrototyped, isOptimized: true,
+ templateParams: !5, declaration: !8, variables: !6)
+
+!10 = !{i32 1, !"Debug Info Version", i32 3}
+!llvm.module.flags = !{!10}
diff --git a/test/Assembler/drop-debug-info.ll b/test/Assembler/drop-debug-info.ll
index 44a7b78b004d..13e0c32718e7 100644
--- a/test/Assembler/drop-debug-info.ll
+++ b/test/Assembler/drop-debug-info.ll
@@ -2,7 +2,7 @@
; RUN: llvm-dis < %t.bc | FileCheck %s
; RUN: verify-uselistorder < %t.bc
-define i32 @main() {
+define i32 @main() !dbg !4 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
@@ -12,11 +12,11 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 195495) (llvm/trunk 195495:195504M)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 195495) (llvm/trunk 195495:195504M)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "../llvm/tools/clang/test/CodeGen/debug-info-version.c", directory: "/Users/manmanren/llvm_gmail/release")
!2 = !{i32 0}
!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "../llvm/tools/clang/test/CodeGen/debug-info-version.c", directory: "/Users/manmanren/llvm_gmail/release")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
diff --git a/test/Assembler/global-addrspace-forwardref.ll b/test/Assembler/global-addrspace-forwardref.ll
index 4a036e08be6a..c66b5b73066f 100644
--- a/test/Assembler/global-addrspace-forwardref.ll
+++ b/test/Assembler/global-addrspace-forwardref.ll
@@ -7,3 +7,12 @@
; CHECK: @a = addrspace(1) global i8 0
@a2 = global i8 addrspace(1)* @a
@a = addrspace(1) global i8 0
+
+; Now test with global IDs instead of global names.
+
+; CHECK: @a3 = global i8 addrspace(1)* @0
+; CHECK: @0 = addrspace(1) global i8 0
+
+@a3 = global i8 addrspace(1)* @0
+@0 = addrspace(1) global i8 0
+
diff --git a/test/Assembler/incorrect-tdep-attrs-parsing.ll b/test/Assembler/incorrect-tdep-attrs-parsing.ll
new file mode 100644
index 000000000000..bf8152b69dc0
--- /dev/null
+++ b/test/Assembler/incorrect-tdep-attrs-parsing.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; CHECK: define i32 @f(i64 "foo bar", i64, i64, i64 "xyz") {
+define i32 @f(i64 "foo bar", i64, i64, i64 "xyz") {
+ ret i32 41
+}
diff --git a/test/Assembler/internal-hidden-alias.ll b/test/Assembler/internal-hidden-alias.ll
index df547c0838cb..f5bf4fb869e4 100644
--- a/test/Assembler/internal-hidden-alias.ll
+++ b/test/Assembler/internal-hidden-alias.ll
@@ -2,5 +2,5 @@
@global = global i32 0
-@alias = internal hidden alias i32* @global
+@alias = internal hidden alias i32, i32* @global
; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/internal-protected-alias.ll b/test/Assembler/internal-protected-alias.ll
index 46a05ec732f5..37e90a0de1c3 100644
--- a/test/Assembler/internal-protected-alias.ll
+++ b/test/Assembler/internal-protected-alias.ll
@@ -2,5 +2,5 @@
@global = global i32 0
-@alias = internal protected alias i32* @global
+@alias = internal protected alias i32, i32* @global
; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/invalid-alias-mismatched-explicit-type.ll b/test/Assembler/invalid-alias-mismatched-explicit-type.ll
new file mode 100644
index 000000000000..d28223793082
--- /dev/null
+++ b/test/Assembler/invalid-alias-mismatched-explicit-type.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: <stdin>:4:12: error: explicit pointee type doesn't match operand's pointee type
+@y = global i2 0
+@x = alias i1, i2* @y
diff --git a/test/Assembler/invalid-dicompileunit-language-bad.ll b/test/Assembler/invalid-dicompileunit-language-bad.ll
index e6f49f3fba47..98c04833e78a 100644
--- a/test/Assembler/invalid-dicompileunit-language-bad.ll
+++ b/test/Assembler/invalid-dicompileunit-language-bad.ll
@@ -1,5 +1,5 @@
; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
-; CHECK: <stdin>:[[@LINE+1]]:31: error: invalid DWARF language 'DW_LANG_NoSuchLanguage'
-!0 = !DICompileUnit(language: DW_LANG_NoSuchLanguage,
- file: !DIFile(filename: "a", directory: "b"))
+; CHECK: <stdin>:[[@LINE+1]]:40: error: invalid DWARF language 'DW_LANG_NoSuchLanguage'
+!0 = distinct !DICompileUnit(language: DW_LANG_NoSuchLanguage,
+ file: !DIFile(filename: "a", directory: "b"))
diff --git a/test/Assembler/invalid-dicompileunit-language-overflow.ll b/test/Assembler/invalid-dicompileunit-language-overflow.ll
index c558f7aaa258..26b98e10d350 100644
--- a/test/Assembler/invalid-dicompileunit-language-overflow.ll
+++ b/test/Assembler/invalid-dicompileunit-language-overflow.ll
@@ -1,9 +1,9 @@
; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
; CHECK-NOT: error:
-!0 = !DICompileUnit(language: 65535,
- file: !DIFile(filename: "a", directory: "b"))
+!0 = distinct !DICompileUnit(language: 65535,
+ file: !DIFile(filename: "a", directory: "b"))
-; CHECK: <stdin>:[[@LINE+1]]:31: error: value for 'language' too large, limit is 65535
-!1 = !DICompileUnit(language: 65536,
- file: !DIFile(filename: "a", directory: "b"))
+; CHECK: <stdin>:[[@LINE+1]]:40: error: value for 'language' too large, limit is 65535
+!1 = distinct !DICompileUnit(language: 65536,
+ file: !DIFile(filename: "a", directory: "b"))
diff --git a/test/Assembler/invalid-dicompileunit-missing-language.ll b/test/Assembler/invalid-dicompileunit-missing-language.ll
index 15631b7f640b..8e4cb0261dbb 100644
--- a/test/Assembler/invalid-dicompileunit-missing-language.ll
+++ b/test/Assembler/invalid-dicompileunit-missing-language.ll
@@ -1,4 +1,4 @@
; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
-; CHECK: <stdin>:[[@LINE+1]]:65: error: missing required field 'language'
-!0 = !DICompileUnit(file: !DIFile(filename: "a", directory: "b"))
+; CHECK: <stdin>:[[@LINE+1]]:74: error: missing required field 'language'
+!0 = distinct !DICompileUnit(file: !DIFile(filename: "a", directory: "b"))
diff --git a/test/Assembler/invalid-dicompileunit-null-file.ll b/test/Assembler/invalid-dicompileunit-null-file.ll
index cc1892e91465..450584f3d715 100644
--- a/test/Assembler/invalid-dicompileunit-null-file.ll
+++ b/test/Assembler/invalid-dicompileunit-null-file.ll
@@ -1,4 +1,4 @@
; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
-; CHECK: <stdin>:[[@LINE+1]]:27: error: 'file' cannot be null
-!0 = !DICompileUnit(file: null)
+; CHECK: <stdin>:[[@LINE+1]]:36: error: 'file' cannot be null
+!0 = distinct !DICompileUnit(file: null)
diff --git a/test/Assembler/invalid-dicompileunit-uniqued.ll b/test/Assembler/invalid-dicompileunit-uniqued.ll
new file mode 100644
index 000000000000..18ae5c5ff01f
--- /dev/null
+++ b/test/Assembler/invalid-dicompileunit-uniqued.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:6: error: missing 'distinct', required for !DICompileUnit
+!0 = !DICompileUnit(language: DW_LANG_C99, file: !DIFile(filename: "file", directory: "/dir"))
diff --git a/test/Assembler/invalid-dilocalvariable-arg-large.ll b/test/Assembler/invalid-dilocalvariable-arg-large.ll
index d62da601e13c..e6e4a76b1c1e 100644
--- a/test/Assembler/invalid-dilocalvariable-arg-large.ll
+++ b/test/Assembler/invalid-dilocalvariable-arg-large.ll
@@ -1,6 +1,6 @@
; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !{}, arg: 65535)
+!0 = !DILocalVariable(scope: !{}, arg: 65535)
-; CHECK: <stdin>:[[@LINE+1]]:66: error: value for 'arg' too large, limit is 65535
-!1 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !{}, arg: 65536)
+; CHECK: <stdin>:[[@LINE+1]]:40: error: value for 'arg' too large, limit is 65535
+!1 = !DILocalVariable(scope: !{}, arg: 65536)
diff --git a/test/Assembler/invalid-dilocalvariable-arg-negative.ll b/test/Assembler/invalid-dilocalvariable-arg-negative.ll
index 08e370a36660..9cf8e973db21 100644
--- a/test/Assembler/invalid-dilocalvariable-arg-negative.ll
+++ b/test/Assembler/invalid-dilocalvariable-arg-negative.ll
@@ -1,6 +1,7 @@
; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !{}, arg: 0)
+!0 = !DILocalVariable(scope: !{}, arg: 1)
+!1 = !DILocalVariable(scope: !{})
-; CHECK: <stdin>:[[@LINE+1]]:66: error: expected unsigned integer
-!1 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !{}, arg: -1)
+; CHECK: <stdin>:[[@LINE+1]]:40: error: expected unsigned integer
+!2 = !DILocalVariable(scope: !{}, arg: -1)
diff --git a/test/Assembler/invalid-dilocalvariable-missing-scope.ll b/test/Assembler/invalid-dilocalvariable-missing-scope.ll
index 45dcad713609..86bf52ddf2d3 100644
--- a/test/Assembler/invalid-dilocalvariable-missing-scope.ll
+++ b/test/Assembler/invalid-dilocalvariable-missing-scope.ll
@@ -1,4 +1,4 @@
; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
-; CHECK: <stdin>:[[@LINE+1]]:48: error: missing required field 'scope'
-!0 = !DILocalVariable(tag: DW_TAG_auto_variable)
+; CHECK: <stdin>:[[@LINE+1]]:23: error: missing required field 'scope'
+!0 = !DILocalVariable()
diff --git a/test/Assembler/invalid-dilocalvariable-missing-tag.ll b/test/Assembler/invalid-dilocalvariable-missing-tag.ll
deleted file mode 100644
index 18062edf75bd..000000000000
--- a/test/Assembler/invalid-dilocalvariable-missing-tag.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
-
-; CHECK: <stdin>:[[@LINE+1]]:45: error: missing required field 'tag'
-!0 = !DILocalVariable(scope: !DISubprogram())
diff --git a/test/Assembler/invalid-disubprogram-uniqued-definition.ll b/test/Assembler/invalid-disubprogram-uniqued-definition.ll
new file mode 100644
index 000000000000..c146883d6649
--- /dev/null
+++ b/test/Assembler/invalid-disubprogram-uniqued-definition.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:6: error: missing 'distinct', required for !DISubprogram when 'isDefinition'
+!0 = !DISubprogram(isDefinition: true)
diff --git a/test/Assembler/invalid-fp80hex.ll b/test/Assembler/invalid-fp80hex.ll
new file mode 100644
index 000000000000..70c518dd648e
--- /dev/null
+++ b/test/Assembler/invalid-fp80hex.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+; Tests bug: 24640
+; CHECK: expected '=' in global variable
+
+@- 0xKate potb8ed
diff --git a/test/Assembler/invalid-fwdref2.ll b/test/Assembler/invalid-fwdref2.ll
index d823481f8e1b..0828cd9b08ac 100644
--- a/test/Assembler/invalid-fwdref2.ll
+++ b/test/Assembler/invalid-fwdref2.ll
@@ -1,4 +1,4 @@
; RUN: not llvm-as %s -disable-output 2>&1 | grep "forward reference and definition of global have different types"
-@a2 = alias void ()* @g2
+@a2 = alias void (), void ()* @g2
@g2 = internal global i8 42
diff --git a/test/Assembler/invalid-inline-constraint.ll b/test/Assembler/invalid-inline-constraint.ll
new file mode 100644
index 000000000000..000fb86a8f79
--- /dev/null
+++ b/test/Assembler/invalid-inline-constraint.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+; Tests bug: https://llvm.org/bugs/show_bug.cgi?id=24646
+; CHECK: error: invalid type for inline asm constraint string
+
+define void @foo() nounwind {
+call void asm sideeffect "mov x0, #42","=~{x0},~{x19},mov |0,{x19},mov x0, #4~x{21}"()ounwi #4~x{21}"()ounwindret
diff --git a/test/Assembler/invalid-untyped-metadata.ll b/test/Assembler/invalid-untyped-metadata.ll
new file mode 100644
index 000000000000..5a97ae659648
--- /dev/null
+++ b/test/Assembler/invalid-untyped-metadata.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+; Tests bug: https://llvm.org/bugs/show_bug.cgi?id=24645
+; CHECK: error: invalid type for inline asm constraint string
+
+ !3=! {%..d04 *asm" !6!={!H)4" ,""
diff --git a/test/Assembler/invalid-uselistorder-indexes-duplicated.ll b/test/Assembler/invalid-uselistorder-indexes-duplicated.ll
index e4affc53c1f6..4ab4fbdfcce9 100644
--- a/test/Assembler/invalid-uselistorder-indexes-duplicated.ll
+++ b/test/Assembler/invalid-uselistorder-indexes-duplicated.ll
@@ -1,7 +1,7 @@
; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
; CHECK: error: expected distinct uselistorder indexes in range [0, size)
@global = global i32 0
-@alias1 = alias i32* @global
-@alias2 = alias i32* @global
-@alias3 = alias i32* @global
+@alias1 = alias i32, i32* @global
+@alias2 = alias i32, i32* @global
+@alias3 = alias i32, i32* @global
uselistorder i32* @global, { 0, 0, 2 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-one.ll b/test/Assembler/invalid-uselistorder-indexes-one.ll
index f5eac80a3ca2..f61809f7fea2 100644
--- a/test/Assembler/invalid-uselistorder-indexes-one.ll
+++ b/test/Assembler/invalid-uselistorder-indexes-one.ll
@@ -1,5 +1,5 @@
; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
; CHECK: error: value only has one use
@global = global i32 0
-@alias = alias i32* @global
+@alias = alias i32, i32* @global
uselistorder i32* @global, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-ordered.ll b/test/Assembler/invalid-uselistorder-indexes-ordered.ll
index 7bdc40037afe..e7a17b0e3abe 100644
--- a/test/Assembler/invalid-uselistorder-indexes-ordered.ll
+++ b/test/Assembler/invalid-uselistorder-indexes-ordered.ll
@@ -1,7 +1,7 @@
; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
; CHECK: error: expected uselistorder indexes to change the order
@global = global i32 0
-@alias1 = alias i32* @global
-@alias2 = alias i32* @global
-@alias3 = alias i32* @global
+@alias1 = alias i32, i32* @global
+@alias2 = alias i32, i32* @global
+@alias3 = alias i32, i32* @global
uselistorder i32* @global, { 0, 1, 2 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-range.ll b/test/Assembler/invalid-uselistorder-indexes-range.ll
index fc97acac5e63..7c3ab116bd81 100644
--- a/test/Assembler/invalid-uselistorder-indexes-range.ll
+++ b/test/Assembler/invalid-uselistorder-indexes-range.ll
@@ -1,7 +1,7 @@
; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
; CHECK: error: expected distinct uselistorder indexes in range [0, size)
@global = global i32 0
-@alias1 = alias i32* @global
-@alias2 = alias i32* @global
-@alias3 = alias i32* @global
+@alias1 = alias i32, i32* @global
+@alias2 = alias i32, i32* @global
+@alias3 = alias i32, i32* @global
uselistorder i32* @global, { 0, 3, 1 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-toofew.ll b/test/Assembler/invalid-uselistorder-indexes-toofew.ll
index 88a76fc568a9..fd9ff8029f9a 100644
--- a/test/Assembler/invalid-uselistorder-indexes-toofew.ll
+++ b/test/Assembler/invalid-uselistorder-indexes-toofew.ll
@@ -1,7 +1,7 @@
; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
; CHECK: error: wrong number of indexes, expected 3
@global = global i32 0
-@alias1 = alias i32* @global
-@alias2 = alias i32* @global
-@alias3 = alias i32* @global
+@alias1 = alias i32, i32* @global
+@alias2 = alias i32, i32* @global
+@alias3 = alias i32, i32* @global
uselistorder i32* @global, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-toomany.ll b/test/Assembler/invalid-uselistorder-indexes-toomany.ll
index a2cf3da0bd38..ba8481d61aa6 100644
--- a/test/Assembler/invalid-uselistorder-indexes-toomany.ll
+++ b/test/Assembler/invalid-uselistorder-indexes-toomany.ll
@@ -1,6 +1,6 @@
; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
; CHECK: error: wrong number of indexes, expected 2
@global = global i32 0
-@alias1 = alias i32* @global
-@alias2 = alias i32* @global
+@alias1 = alias i32, i32* @global
+@alias2 = alias i32, i32* @global
uselistorder i32* @global, { 1, 0, 2 }
diff --git a/test/Assembler/metadata.ll b/test/Assembler/metadata.ll
index 052ac1b5097e..8c7781e887dd 100644
--- a/test/Assembler/metadata.ll
+++ b/test/Assembler/metadata.ll
@@ -30,7 +30,7 @@ define void @test_attachment_name() {
}
!0 = !DILocation(line: 662302, column: 26, scope: !1)
-!1 = !DISubprogram(name: "foo")
+!1 = distinct !DISubprogram(name: "foo")
!2 = distinct !{}
!3 = distinct !{}
!4 = distinct !{}
diff --git a/test/Assembler/private-hidden-alias.ll b/test/Assembler/private-hidden-alias.ll
index 2e770e58784e..eac27f488e6b 100644
--- a/test/Assembler/private-hidden-alias.ll
+++ b/test/Assembler/private-hidden-alias.ll
@@ -2,5 +2,5 @@
@global = global i32 0
-@alias = private hidden alias i32* @global
+@alias = private hidden alias i32, i32* @global
; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/private-protected-alias.ll b/test/Assembler/private-protected-alias.ll
index f1824a2f3c7c..37bdabaf8a5e 100644
--- a/test/Assembler/private-protected-alias.ll
+++ b/test/Assembler/private-protected-alias.ll
@@ -2,5 +2,5 @@
@global = global i32 0
-@alias = private protected alias i32* @global
+@alias = private protected alias i32, i32* @global
; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/token.ll b/test/Assembler/token.ll
new file mode 100644
index 000000000000..c4d68b4b3ffb
--- /dev/null
+++ b/test/Assembler/token.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
+; Basic smoke test for token type.
+
+; CHECK: declare void @llvm.token.foobar(token)
+declare void @llvm.token.foobar(token)
+
+define void @f() {
+ call void @llvm.token.foobar(token none)
+ ret void
+}
diff --git a/test/Assembler/unnamed-alias.ll b/test/Assembler/unnamed-alias.ll
index 8ae1c45d312d..121e54b7d079 100644
--- a/test/Assembler/unnamed-alias.ll
+++ b/test/Assembler/unnamed-alias.ll
@@ -5,7 +5,7 @@
@1 = private constant i32 1
; CHECK: @1 = private constant i32 1
-@2 = private alias i32* @0
-; CHECK: @2 = private alias i32* @0
-@3 = private alias i32* @1
-; CHECK: @3 = private alias i32* @1
+@2 = private alias i32, i32* @3
+; CHECK: @2 = private alias i32, i32* @3
+@3 = private alias i32, i32* @1
+; CHECK: @3 = private alias i32, i32* @1
diff --git a/test/Assembler/uselistorder.ll b/test/Assembler/uselistorder.ll
index 2475fc48caf4..016bd877311b 100644
--- a/test/Assembler/uselistorder.ll
+++ b/test/Assembler/uselistorder.ll
@@ -4,7 +4,7 @@
; RUN: verify-uselistorder < %s
@a = global [4 x i1] [i1 0, i1 1, i1 0, i1 1]
-@b = alias i1* getelementptr ([4 x i1], [4 x i1]* @a, i64 0, i64 2)
+@b = alias i1, getelementptr ([4 x i1], [4 x i1]* @a, i64 0, i64 2)
; Check use-list order of constants used by globals.
@glob1 = global i5 7
diff --git a/test/Bindings/Go/go.test b/test/Bindings/Go/go.test
index 14eb3281cc4e..407623e9c6dc 100644
--- a/test/Bindings/Go/go.test
+++ b/test/Bindings/Go/go.test
@@ -1,3 +1,3 @@
; RUN: llvm-go test llvm.org/llvm/bindings/go/llvm
-; REQUIRES: shell, not_ubsan
+; REQUIRES: shell, not_ubsan, not_msan
diff --git a/test/Bindings/Go/lit.local.cfg b/test/Bindings/Go/lit.local.cfg
index e86595b8cb56..d68d867fb308 100644
--- a/test/Bindings/Go/lit.local.cfg
+++ b/test/Bindings/Go/lit.local.cfg
@@ -6,6 +6,9 @@ import sys
if not 'go' in config.root.llvm_bindings:
config.unsupported = True
+if config.root.include_go_tests != 'ON':
+ config.unsupported = True
+
def find_executable(executable, path=None):
if path is None:
path = os.environ['PATH']
diff --git a/test/Bindings/OCaml/analysis.ml b/test/Bindings/OCaml/analysis.ml
index e935ee838058..22ef153b372a 100644
--- a/test/Bindings/OCaml/analysis.ml
+++ b/test/Bindings/OCaml/analysis.ml
@@ -1,7 +1,7 @@
(* RUN: cp %s %T/analysis.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.analysis -linkpkg %T/analysis.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.analysis -linkpkg %T/analysis.ml -o %t
* RUN: %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.analysis -linkpkg %T/analysis.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.analysis -linkpkg %T/analysis.ml -o %t
* RUN: %t
* XFAIL: vg_leak
*)
diff --git a/test/Bindings/OCaml/bitreader.ml b/test/Bindings/OCaml/bitreader.ml
index 57cfd043b2de..3fda34ab22ed 100644
--- a/test/Bindings/OCaml/bitreader.ml
+++ b/test/Bindings/OCaml/bitreader.ml
@@ -1,7 +1,7 @@
(* RUN: cp %s %T/bitreader.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitreader.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitreader.ml -o %t
* RUN: %t %t.bc
- * RUN: %ocamlopt -g -warn-error A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitreader.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitreader.ml -o %t
* RUN: %t %t.bc
* RUN: llvm-dis < %t.bc
* XFAIL: vg_leak
diff --git a/test/Bindings/OCaml/bitwriter.ml b/test/Bindings/OCaml/bitwriter.ml
index 7c803f6d5510..b051b8c61a29 100644
--- a/test/Bindings/OCaml/bitwriter.ml
+++ b/test/Bindings/OCaml/bitwriter.ml
@@ -1,7 +1,7 @@
(* RUN: cp %s %T/bitwriter.ml
- * RUN: %ocamlc -g -w -3 -warn-error A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitwriter.ml -o %t
+ * RUN: %ocamlc -g -w -3 -w +A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitwriter.ml -o %t
* RUN: %t %t.bc
- * RUN: %ocamlopt -g -w -3 -warn-error A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitwriter.ml -o %t
+ * RUN: %ocamlopt -g -w -3 -w +A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitwriter.ml -o %t
* RUN: %t %t.bc
* RUN: llvm-dis < %t.bc
* XFAIL: vg_leak
diff --git a/test/Bindings/OCaml/core.ml b/test/Bindings/OCaml/core.ml
index 3c759af791d0..73e9956bc563 100644
--- a/test/Bindings/OCaml/core.ml
+++ b/test/Bindings/OCaml/core.ml
@@ -1,7 +1,7 @@
(* RUN: cp %s %T/core.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.analysis -package llvm.bitwriter -linkpkg %T/core.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.analysis -package llvm.bitwriter -linkpkg %T/core.ml -o %t
* RUN: %t %t.bc
- * RUN: %ocamlopt -g -warn-error A -package llvm.analysis -package llvm.bitwriter -linkpkg %T/core.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.analysis -package llvm.bitwriter -linkpkg %T/core.ml -o %t
* RUN: %t %t.bc
* RUN: llvm-dis < %t.bc > %t.ll
* RUN: FileCheck %s < %t.ll
diff --git a/test/Bindings/OCaml/executionengine.ml b/test/Bindings/OCaml/executionengine.ml
index 1de2cfb7fefd..6d9abe728805 100644
--- a/test/Bindings/OCaml/executionengine.ml
+++ b/test/Bindings/OCaml/executionengine.ml
@@ -1,7 +1,7 @@
(* RUN: cp %s %T/executionengine.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.executionengine -linkpkg %T/executionengine.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.executionengine -linkpkg %T/executionengine.ml -o %t
* RUN: %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.executionengine -linkpkg %T/executionengine.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.executionengine -linkpkg %T/executionengine.ml -o %t
* RUN: %t
* REQUIRES: native, object-emission
* XFAIL: vg_leak
diff --git a/test/Bindings/OCaml/ext_exc.ml b/test/Bindings/OCaml/ext_exc.ml
index 2b44803f5170..a24a28b1f528 100644
--- a/test/Bindings/OCaml/ext_exc.ml
+++ b/test/Bindings/OCaml/ext_exc.ml
@@ -1,7 +1,7 @@
(* RUN: cp %s %T/ext_exc.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.bitreader -linkpkg %T/ext_exc.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.bitreader -linkpkg %T/ext_exc.ml -o %t
* RUN: %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.bitreader -linkpkg %T/ext_exc.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.bitreader -linkpkg %T/ext_exc.ml -o %t
* RUN: %t
* XFAIL: vg_leak
*)
diff --git a/test/Bindings/OCaml/ipo.ml b/test/Bindings/OCaml/ipo.ml
index fc728b92ff3d..4a7c73e891b5 100644
--- a/test/Bindings/OCaml/ipo.ml
+++ b/test/Bindings/OCaml/ipo.ml
@@ -1,7 +1,7 @@
(* RUN: cp %s %T/ipo_opts.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.ipo -linkpkg %T/ipo_opts.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.ipo -linkpkg %T/ipo_opts.ml -o %t
* RUN: %t %t.bc
- * RUN: %ocamlopt -g -warn-error A -package llvm.ipo -linkpkg %T/ipo_opts.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.ipo -linkpkg %T/ipo_opts.ml -o %t
* RUN: %t %t.bc
* XFAIL: vg_leak
*)
diff --git a/test/Bindings/OCaml/irreader.ml b/test/Bindings/OCaml/irreader.ml
index e1771e75dd49..b5142334d321 100644
--- a/test/Bindings/OCaml/irreader.ml
+++ b/test/Bindings/OCaml/irreader.ml
@@ -1,7 +1,7 @@
(* RUN: cp %s %T/irreader.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.irreader -linkpkg %T/irreader.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.irreader -linkpkg %T/irreader.ml -o %t
* RUN: %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.irreader -linkpkg %T/irreader.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.irreader -linkpkg %T/irreader.ml -o %t
* RUN: %t
* XFAIL: vg_leak
*)
diff --git a/test/Bindings/OCaml/linker.ml b/test/Bindings/OCaml/linker.ml
index 1ea0be9d3dc3..423905e489ce 100644
--- a/test/Bindings/OCaml/linker.ml
+++ b/test/Bindings/OCaml/linker.ml
@@ -1,7 +1,7 @@
(* RUN: cp %s %T/linker.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.linker -linkpkg %T/linker.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.linker -linkpkg %T/linker.ml -o %t
* RUN: %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.linker -linkpkg %T/linker.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.linker -linkpkg %T/linker.ml -o %t
* RUN: %t
* XFAIL: vg_leak
*)
@@ -39,23 +39,21 @@ let test_linker () =
let m1 = make_module "one"
and m2 = make_module "two" in
- link_modules m1 m2;
+ link_modules' m1 m2;
dispose_module m1;
- dispose_module m2;
let m1 = make_module "one"
and m2 = make_module "two" in
- link_modules m1 m2;
+ link_modules' m1 m2;
dispose_module m1;
let m1 = make_module "one"
and m2 = make_module "one" in
try
- link_modules m1 m2;
+ link_modules' m1 m2;
failwith "must raise"
with Error _ ->
- dispose_module m1;
- dispose_module m2
+ dispose_module m1
(*===-- Driver ------------------------------------------------------------===*)
diff --git a/test/Bindings/OCaml/passmgr_builder.ml b/test/Bindings/OCaml/passmgr_builder.ml
index 5dd9d4e00e89..3104736d66aa 100644
--- a/test/Bindings/OCaml/passmgr_builder.ml
+++ b/test/Bindings/OCaml/passmgr_builder.ml
@@ -1,7 +1,7 @@
(* RUN: cp %s %T/passmgr_builder.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.passmgr_builder -linkpkg %T/passmgr_builder.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.passmgr_builder -linkpkg %T/passmgr_builder.ml -o %t
* RUN: %t %t.bc
- * RUN: %ocamlopt -g -warn-error A -package llvm.passmgr_builder -linkpkg %T/passmgr_builder.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.passmgr_builder -linkpkg %T/passmgr_builder.ml -o %t
* RUN: %t %t.bc
* XFAIL: vg_leak
*)
diff --git a/test/Bindings/OCaml/scalar_opts.ml b/test/Bindings/OCaml/scalar_opts.ml
index 3017fb131129..9ffcf6915c07 100644
--- a/test/Bindings/OCaml/scalar_opts.ml
+++ b/test/Bindings/OCaml/scalar_opts.ml
@@ -1,7 +1,7 @@
(* RUN: cp %s %T/scalar_opts.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.scalar_opts -linkpkg %T/scalar_opts.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.scalar_opts -linkpkg %T/scalar_opts.ml -o %t
* RUN: %t %t.bc
- * RUN: %ocamlopt -g -warn-error A -package llvm.scalar_opts -linkpkg %T/scalar_opts.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.scalar_opts -linkpkg %T/scalar_opts.ml -o %t
* RUN: %t %t.bc
* XFAIL: vg_leak
*)
diff --git a/test/Bindings/OCaml/target.ml b/test/Bindings/OCaml/target.ml
index 41faefa8ea68..68ba75d04f28 100644
--- a/test/Bindings/OCaml/target.ml
+++ b/test/Bindings/OCaml/target.ml
@@ -1,6 +1,6 @@
(* RUN: cp %s %T/target.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.target -package llvm.all_backends -linkpkg %T/target.ml -o %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.target -package llvm.all_backends -linkpkg %T/target.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.target -package llvm.all_backends -linkpkg %T/target.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.target -package llvm.all_backends -linkpkg %T/target.ml -o %t
* RUN: %t %t.bc
* XFAIL: vg_leak
*)
diff --git a/test/Bindings/OCaml/transform_utils.ml b/test/Bindings/OCaml/transform_utils.ml
index 6b46df173b08..dc14effc57ef 100644
--- a/test/Bindings/OCaml/transform_utils.ml
+++ b/test/Bindings/OCaml/transform_utils.ml
@@ -1,7 +1,7 @@
(* RUN: cp %s %T/transform_utils.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.transform_utils -linkpkg %T/transform_utils.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.transform_utils -linkpkg %T/transform_utils.ml -o %t
* RUN: %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.transform_utils -linkpkg %T/transform_utils.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.transform_utils -linkpkg %T/transform_utils.ml -o %t
* RUN: %t
* XFAIL: vg_leak
*)
diff --git a/test/Bindings/OCaml/vectorize.ml b/test/Bindings/OCaml/vectorize.ml
index c5b03b525375..a668654de74a 100644
--- a/test/Bindings/OCaml/vectorize.ml
+++ b/test/Bindings/OCaml/vectorize.ml
@@ -1,7 +1,7 @@
(* RUN: cp %s %T/vectorize_opts.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.vectorize -linkpkg %T/vectorize_opts.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.vectorize -linkpkg %T/vectorize_opts.ml -o %t
* RUN: %t %t.bc
- * RUN: %ocamlopt -g -warn-error A -package llvm.vectorize -linkpkg %T/vectorize_opts.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.vectorize -linkpkg %T/vectorize_opts.ml -o %t
* RUN: %t %t.bc
* XFAIL: vg_leak
*)
diff --git a/test/Bindings/llvm-c/Inputs/invalid.ll.bc b/test/Bindings/llvm-c/Inputs/invalid.ll.bc
index a85c3644b3ab..60c7afffbc23 100644
--- a/test/Bindings/llvm-c/Inputs/invalid.ll.bc
+++ b/test/Bindings/llvm-c/Inputs/invalid.ll.bc
Binary files differ
diff --git a/test/Bindings/llvm-c/functions.ll b/test/Bindings/llvm-c/functions.ll
index 4503fb17315f..27c05464502e 100644
--- a/test/Bindings/llvm-c/functions.ll
+++ b/test/Bindings/llvm-c/functions.ll
@@ -1,4 +1,13 @@
-; RUN: llvm-as < %s | llvm-c-test --module-list-functions | FileCheck %s
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-c-test --module-list-functions < %t.bc| FileCheck %s
+; RUN: llvm-c-test --module-dump < %t.bc| FileCheck --check-prefix=MOD %s
+; RUN: llvm-c-test --lazy-module-dump < %t.bc| FileCheck --check-prefix=LMOD %s
+
+; MOD: define i32 @X() {
+; MOD-NEXT: entry:
+
+; LMOD: ; Materializable
+; LMOD-NEXT: define i32 @X() {}
define i32 @X() {
entry:
diff --git a/test/Bindings/llvm-c/invalid-bitcode.test b/test/Bindings/llvm-c/invalid-bitcode.test
index 6318a9bf13d9..afae0ea1092c 100644
--- a/test/Bindings/llvm-c/invalid-bitcode.test
+++ b/test/Bindings/llvm-c/invalid-bitcode.test
@@ -1,3 +1,10 @@
; RUN: not llvm-c-test --module-dump < %S/Inputs/invalid.ll.bc 2>&1 | FileCheck %s
+; RUN: not llvm-c-test --lazy-module-dump < %S/Inputs/invalid.ll.bc 2>&1 | FileCheck %s
-CHECK: Error parsing bitcode: Unknown attribute kind (48)
+CHECK: Error parsing bitcode: Unknown attribute kind (52)
+
+
+; RUN: not llvm-c-test --new-module-dump < %S/Inputs/invalid.ll.bc 2>&1 | FileCheck --check-prefix=NEW %s
+; RUN: not llvm-c-test --lazy-new-module-dump < %S/Inputs/invalid.ll.bc 2>&1 | FileCheck --check-prefix=NEW %s
+
+NEW: Error with new bitcode parser: Unknown attribute kind (52)
diff --git a/test/Bitcode/DICompileUnit-no-DWOId.ll b/test/Bitcode/DICompileUnit-no-DWOId.ll
index 4771c37264f2..dd3273e44ff0 100644
--- a/test/Bitcode/DICompileUnit-no-DWOId.ll
+++ b/test/Bitcode/DICompileUnit-no-DWOId.ll
@@ -2,7 +2,7 @@
; default to 0, which is not displayed at all in the textual representation.
;
; RUN: llvm-dis %s.bc -o - | FileCheck %s
-; CHECK: !DICompileUnit
+; CHECK: distinct !DICompileUnit
; CHECK-NOT: dwoId:
!named = !{!0}
!0 = !DICompileUnit(language: 12, file: !1)
diff --git a/test/Bitcode/DILocalVariable-explicit-tags.ll b/test/Bitcode/DILocalVariable-explicit-tags.ll
new file mode 100644
index 000000000000..3f7be3c061a3
--- /dev/null
+++ b/test/Bitcode/DILocalVariable-explicit-tags.ll
@@ -0,0 +1,16 @@
+; Bitcode compiled with 3.7_rc2. 3.7 had redundant (but mandatory) tag fields
+; on DILocalVariable.
+;
+; RUN: llvm-dis < %s.bc -o - | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: ![[SP:[0-9]+]] = distinct !DISubprogram(name: "foo",{{.*}} variables: ![[VARS:[0-9]+]]
+; CHECK: ![[VARS]] = !{![[PARAM:[0-9]+]], ![[AUTO:[0-9]+]]}
+; CHECK: ![[PARAM]] = !DILocalVariable(name: "param", arg: 1, scope: ![[SP]])
+; CHECK: ![[AUTO]] = !DILocalVariable(name: "auto", scope: ![[SP]])
+
+!named = !{!0}
+
+!0 = distinct !DISubprogram(name: "foo", variables: !1)
+!1 = !{!2, !3}
+!2 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "param", arg: 1, scope: !0)
+!3 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "auto", scope: !0)
diff --git a/test/Bitcode/DILocalVariable-explicit-tags.ll.bc b/test/Bitcode/DILocalVariable-explicit-tags.ll.bc
new file mode 100644
index 000000000000..4efcbe878f84
--- /dev/null
+++ b/test/Bitcode/DILocalVariable-explicit-tags.ll.bc
Binary files differ
diff --git a/test/Bitcode/DISubprogram-distinct-definitions.ll b/test/Bitcode/DISubprogram-distinct-definitions.ll
new file mode 100644
index 000000000000..5c6fb908be90
--- /dev/null
+++ b/test/Bitcode/DISubprogram-distinct-definitions.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-dis < %s.bc | FileCheck %s
+; Check that subprogram definitions are correctly upgraded to 'distinct'.
+; Bitcode compiled from r245235 of the 3.7 release branch.
+
+!named = !{!0}
+!0 = distinct !DICompileUnit(language: 12, file: !1, subprograms: !2)
+!1 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
+!2 = !{!3}
+
+; CHECK: = distinct !DISubprogram({{.*}}, isDefinition: true
+!3 = !DISubprogram(name: "foo", isDefinition: true)
diff --git a/test/Bitcode/DISubprogram-distinct-definitions.ll.bc b/test/Bitcode/DISubprogram-distinct-definitions.ll.bc
new file mode 100644
index 000000000000..600f68facc3a
--- /dev/null
+++ b/test/Bitcode/DISubprogram-distinct-definitions.ll.bc
Binary files differ
diff --git a/test/Bitcode/Inputs/invalid-abbrev.bc b/test/Bitcode/Inputs/invalid-abbrev.bc
index 4e8f3944b84b..06c2fd38cb27 100644
--- a/test/Bitcode/Inputs/invalid-abbrev.bc
+++ b/test/Bitcode/Inputs/invalid-abbrev.bc
Binary files differ
diff --git a/test/Bitcode/Inputs/invalid-cast.bc b/test/Bitcode/Inputs/invalid-cast.bc
new file mode 100644
index 000000000000..a8b82f3e2861
--- /dev/null
+++ b/test/Bitcode/Inputs/invalid-cast.bc
Binary files differ
diff --git a/test/Bitcode/Inputs/invalid-name-with-0-byte.bc b/test/Bitcode/Inputs/invalid-name-with-0-byte.bc
new file mode 100644
index 000000000000..9c6a9158eee7
--- /dev/null
+++ b/test/Bitcode/Inputs/invalid-name-with-0-byte.bc
Binary files differ
diff --git a/test/Bitcode/Inputs/invalid-no-function-block.bc b/test/Bitcode/Inputs/invalid-no-function-block.bc
new file mode 100644
index 000000000000..52b65588e4a7
--- /dev/null
+++ b/test/Bitcode/Inputs/invalid-no-function-block.bc
Binary files differ
diff --git a/test/Bitcode/anon-functions.ll b/test/Bitcode/anon-functions.ll
new file mode 100644
index 000000000000..b68cc8eff050
--- /dev/null
+++ b/test/Bitcode/anon-functions.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; PR25101
+
+; CHECK: define void @0()
+define void @0() {
+ ret void
+}
+
+; CHECK: define void @f()
+define void @f() {
+ ret void
+}
+
+; CHECK: define void @1()
+define void @1() {
+ ret void
+}
+
diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll
index a0bc66642f7d..1b3526f1fa0d 100644
--- a/test/Bitcode/attributes.ll
+++ b/test/Bitcode/attributes.ll
@@ -204,7 +204,7 @@ define void @f34()
; CHECK: define void @f34()
{
call void @nobuiltin() nobuiltin
-; CHECK: call void @nobuiltin() #27
+; CHECK: call void @nobuiltin() #30
ret void;
}
@@ -262,6 +262,31 @@ define void @f44() argmemonly
ret void;
}
+; CHECK: define "string_attribute" void @f45(i32 "string_attribute")
+define "string_attribute" void @f45(i32 "string_attribute") {
+ ret void
+}
+
+; CHECK: define "string_attribute_with_value"="value" void @f46(i32 "string_attribute_with_value"="value")
+define "string_attribute_with_value"="value" void @f46(i32 "string_attribute_with_value"="value") {
+ ret void
+}
+
+; CHECK: define void @f47() #27
+define void @f47() norecurse {
+ ret void
+}
+
+; CHECK: define void @f48() #28
+define void @f48() inaccessiblememonly {
+ ret void
+}
+
+; CHECK: define void @f49() #29
+define void @f49() inaccessiblemem_or_argmemonly {
+ ret void
+}
+
; CHECK: attributes #0 = { noreturn }
; CHECK: attributes #1 = { nounwind }
; CHECK: attributes #2 = { readnone }
@@ -289,4 +314,7 @@ define void @f44() argmemonly
; CHECK: attributes #24 = { jumptable }
; CHECK: attributes #25 = { convergent }
; CHECK: attributes #26 = { argmemonly }
-; CHECK: attributes #27 = { nobuiltin }
+; CHECK: attributes #27 = { norecurse }
+; CHECK: attributes #28 = { inaccessiblememonly }
+; CHECK: attributes #29 = { inaccessiblemem_or_argmemonly }
+; CHECK: attributes #30 = { nobuiltin }
diff --git a/test/Bitcode/compatibility-3.6.ll b/test/Bitcode/compatibility-3.6.ll
new file mode 100644
index 000000000000..87958fc34183
--- /dev/null
+++ b/test/Bitcode/compatibility-3.6.ll
@@ -0,0 +1,1207 @@
+; Bitcode compatibility test for llvm 3.6.2
+;
+; N.b: This is 3.6.2-compatible IR. The CHECK lines occasionally differ from
+; the IR used to generate the bitcode, and may need to be updated. These
+; locations are tagged with an 'XXX'.
+
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+target datalayout = "E"
+; CHECK: target datalayout = "E"
+
+target triple = "x86_64-apple-macosx10.10.0"
+; CHECK: target triple = "x86_64-apple-macosx10.10.0"
+
+;; Module-level assembly
+module asm "beep boop"
+; CHECK: module asm "beep boop"
+
+;; Comdats
+$comdat.any = comdat any
+; CHECK: $comdat.any = comdat any
+$comdat.exactmatch = comdat exactmatch
+; CHECK: $comdat.exactmatch = comdat exactmatch
+$comdat.largest = comdat largest
+; CHECK: $comdat.largest = comdat largest
+$comdat.noduplicates = comdat noduplicates
+; CHECK: $comdat.noduplicates = comdat noduplicates
+$comdat.samesize = comdat samesize
+; CHECK: $comdat.samesize = comdat samesize
+
+; Global Variables -- comdat
+@comdat.any = global i32 0, comdat
+; CHECK: @comdat.any = global i32 0, comdat
+@comdat.exactmatch = global i32 0, comdat
+; CHECK: @comdat.exactmatch = global i32 0, comdat
+@comdat.largest = global i32 0, comdat
+; CHECK: @comdat.largest = global i32 0, comdat
+@comdat.noduplicates = global i32 0, comdat
+; CHECK: @comdat.noduplicates = global i32 0, comdat
+@comdat.samesize = global i32 0, comdat
+; CHECK: @comdat.samesize = global i32 0, comdat
+
+; Force two globals from different comdats into sections with the same name.
+$comdat1 = comdat any
+$comdat2 = comdat any
+@g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1)
+; CHECK: @g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1)
+@g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2)
+; CHECK: @g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2)
+
+;; Constants
+@const.true = constant i1 true
+; CHECK: @const.true = constant i1 true
+@const.false = constant i1 false
+; CHECK: @const.false = constant i1 false
+@const.int = constant i32 zeroinitializer
+; CHECK: @const.int = constant i32 0
+@const.float = constant double 0.0
+; CHECK: @const.float = constant double 0.0
+@const.null = constant i8* null
+; CHECK: @const.null = constant i8* null
+%const.struct.type = type { i32, i8 }
+%const.struct.type.packed = type <{ i32, i8 }>
+@const.struct = constant %const.struct.type { i32 -1, i8 undef }
+; CHECK: @const.struct = constant %const.struct.type { i32 -1, i8 undef }
+@const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
+; CHECK: @const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
+@const.array = constant [2 x i32] [i32 -3, i32 -4]
+; CHECK: @const.array = constant [2 x i32] [i32 -3, i32 -4]
+@const.vector = constant <2 x i32> <i32 -5, i32 -6>
+; CHECK: @const.vector = constant <2 x i32> <i32 -5, i32 -6>
+
+;; Global Variables
+; Format: [@<GlobalVarName> =] [Linkage] [Visibility] [DLLStorageClass]
+; [ThreadLocal] [unnamed_addr] [AddrSpace] [ExternallyInitialized]
+; <global | constant> <Type> [<InitializerConstant>]
+; [, section "name"] [, align <Alignment>]
+
+; Global Variables -- Simple
+@g1 = global i32 0
+; CHECK: @g1 = global i32 0
+@g2 = constant i32 0
+; CHECK: @g2 = constant i32 0
+
+; Global Variables -- Linkage
+@g.private = private global i32 0
+; CHECK: @g.private = private global i32 0
+@g.internal = internal global i32 0
+; CHECK: @g.internal = internal global i32 0
+@g.available_externally = available_externally global i32 0
+; CHECK: @g.available_externally = available_externally global i32 0
+@g.linkonce = linkonce global i32 0
+; CHECK: @g.linkonce = linkonce global i32 0
+@g.weak = weak global i32 0
+; CHECK: @g.weak = weak global i32 0
+@g.common = common global i32 0
+; CHECK: @g.common = common global i32 0
+@g.appending = appending global [4 x i8] c"test"
+; CHECK: @g.appending = appending global [4 x i8] c"test"
+@g.extern_weak = extern_weak global i32
+; CHECK: @g.extern_weak = extern_weak global i32
+@g.linkonce_odr = linkonce_odr global i32 0
+; CHECK: @g.linkonce_odr = linkonce_odr global i32 0
+@g.weak_odr = weak_odr global i32 0
+; CHECK: @g.weak_odr = weak_odr global i32 0
+@g.external = external global i32
+; CHECK: @g.external = external global i32
+
+; Global Variables -- Visibility
+@g.default = default global i32 0
+; CHECK: @g.default = global i32 0
+@g.hidden = hidden global i32 0
+; CHECK: @g.hidden = hidden global i32 0
+@g.protected = protected global i32 0
+; CHECK: @g.protected = protected global i32 0
+
+; Global Variables -- DLLStorageClass
+@g.dlldefault = default global i32 0
+; CHECK: @g.dlldefault = global i32 0
+@g.dllimport = external dllimport global i32
+; CHECK: @g.dllimport = external dllimport global i32
+@g.dllexport = dllexport global i32 0
+; CHECK: @g.dllexport = dllexport global i32 0
+
+; Global Variables -- ThreadLocal
+@g.notthreadlocal = global i32 0
+; CHECK: @g.notthreadlocal = global i32 0
+@g.generaldynamic = thread_local global i32 0
+; CHECK: @g.generaldynamic = thread_local global i32 0
+@g.localdynamic = thread_local(localdynamic) global i32 0
+; CHECK: @g.localdynamic = thread_local(localdynamic) global i32 0
+@g.initialexec = thread_local(initialexec) global i32 0
+; CHECK: @g.initialexec = thread_local(initialexec) global i32 0
+@g.localexec = thread_local(localexec) global i32 0
+; CHECK: @g.localexec = thread_local(localexec) global i32 0
+
+; Global Variables -- unnamed_addr
+@g.unnamed_addr = unnamed_addr global i32 0
+; CHECK: @g.unnamed_addr = unnamed_addr global i32 0
+
+; Global Variables -- AddrSpace
+@g.addrspace = addrspace(1) global i32 0
+; CHECK: @g.addrspace = addrspace(1) global i32 0
+
+; Global Variables -- ExternallyInitialized
+@g.externally_initialized = external externally_initialized global i32
+; CHECK: @g.externally_initialized = external externally_initialized global i32
+
+; Global Variables -- section
+@g.section = global i32 0, section "_DATA"
+; CHECK: @g.section = global i32 0, section "_DATA"
+
+; Global Variables -- align
+@g.align = global i32 0, align 4
+; CHECK: @g.align = global i32 0, align 4
+
+; Global Variables -- Intrinsics
+%pri.func.data = type { i32, void ()*, i8* }
+@g.used1 = global i32 0
+@g.used2 = global i32 0
+@g.used3 = global i8 0
+declare void @g.f1()
+@llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata"
+; CHECK: @llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata"
+@llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata"
+@llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+; CHECK: @llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+@llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+; CHECK: @llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+
+;; Aliases
+; Format: @<Name> = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal]
+; [unnamed_addr] alias <AliaseeTy> @<Aliasee>
+
+; Aliases -- Linkage
+@a.private = private alias i32* @g.private
+; CHECK: @a.private = private alias i32, i32* @g.private
+@a.internal = internal alias i32* @g.internal
+; CHECK: @a.internal = internal alias i32, i32* @g.internal
+@a.linkonce = linkonce alias i32* @g.linkonce
+; CHECK: @a.linkonce = linkonce alias i32, i32* @g.linkonce
+@a.weak = weak alias i32* @g.weak
+; CHECK: @a.weak = weak alias i32, i32* @g.weak
+@a.linkonce_odr = linkonce_odr alias i32* @g.linkonce_odr
+; CHECK: @a.linkonce_odr = linkonce_odr alias i32, i32* @g.linkonce_odr
+@a.weak_odr = weak_odr alias i32* @g.weak_odr
+; CHECK: @a.weak_odr = weak_odr alias i32, i32* @g.weak_odr
+@a.external = external alias i32* @g1
+; CHECK: @a.external = alias i32, i32* @g1
+
+; Aliases -- Visibility
+@a.default = default alias i32* @g.default
+; CHECK: @a.default = alias i32, i32* @g.default
+@a.hidden = hidden alias i32* @g.hidden
+; CHECK: @a.hidden = hidden alias i32, i32* @g.hidden
+@a.protected = protected alias i32* @g.protected
+; CHECK: @a.protected = protected alias i32, i32* @g.protected
+
+; Aliases -- DLLStorageClass
+@a.dlldefault = default alias i32* @g.dlldefault
+; CHECK: @a.dlldefault = alias i32, i32* @g.dlldefault
+@a.dllimport = dllimport alias i32* @g1
+; CHECK: @a.dllimport = dllimport alias i32, i32* @g1
+@a.dllexport = dllexport alias i32* @g.dllexport
+; CHECK: @a.dllexport = dllexport alias i32, i32* @g.dllexport
+
+; Aliases -- ThreadLocal
+@a.notthreadlocal = alias i32* @g.notthreadlocal
+; CHECK: @a.notthreadlocal = alias i32, i32* @g.notthreadlocal
+@a.generaldynamic = thread_local alias i32* @g.generaldynamic
+; CHECK: @a.generaldynamic = thread_local alias i32, i32* @g.generaldynamic
+@a.localdynamic = thread_local(localdynamic) alias i32* @g.localdynamic
+; CHECK: @a.localdynamic = thread_local(localdynamic) alias i32, i32* @g.localdynamic
+@a.initialexec = thread_local(initialexec) alias i32* @g.initialexec
+; CHECK: @a.initialexec = thread_local(initialexec) alias i32, i32* @g.initialexec
+@a.localexec = thread_local(localexec) alias i32* @g.localexec
+; CHECK: @a.localexec = thread_local(localexec) alias i32, i32* @g.localexec
+
+; Aliases -- unnamed_addr
+@a.unnamed_addr = unnamed_addr alias i32* @g.unnamed_addr
+; CHECK: @a.unnamed_addr = unnamed_addr alias i32, i32* @g.unnamed_addr
+
+;; Functions
+; Format: define [linkage] [visibility] [DLLStorageClass]
+; [cconv] [ret attrs]
+; <ResultType> @<FunctionName> ([argument list])
+; [unnamed_addr] [fn Attrs] [section "name"] [comdat [($name)]]
+; [align N] [gc] [prefix Constant]
+; { ... }
+
+; Functions -- Simple
+declare void @f1 ()
+; CHECK: declare void @f1()
+
+define void @f2 () {
+; CHECK: define void @f2()
+entry:
+ ret void
+}
+
+; Functions -- linkage
+define private void @f.private() {
+; CHECK: define private void @f.private()
+entry:
+ ret void
+}
+define internal void @f.internal() {
+; CHECK: define internal void @f.internal()
+entry:
+ ret void
+}
+define available_externally void @f.available_externally() {
+; CHECK: define available_externally void @f.available_externally()
+entry:
+ ret void
+}
+define linkonce void @f.linkonce() {
+; CHECK: define linkonce void @f.linkonce()
+entry:
+ ret void
+}
+define weak void @f.weak() {
+; CHECK: define weak void @f.weak()
+entry:
+ ret void
+}
+define linkonce_odr void @f.linkonce_odr() {
+; CHECK: define linkonce_odr void @f.linkonce_odr()
+entry:
+ ret void
+}
+define weak_odr void @f.weak_odr() {
+; CHECK: define weak_odr void @f.weak_odr()
+entry:
+ ret void
+}
+declare external void @f.external()
+; CHECK: declare void @f.external()
+declare extern_weak void @f.extern_weak()
+; CHECK: declare extern_weak void @f.extern_weak()
+
+; Functions -- visibility
+declare default void @f.default()
+; CHECK: declare void @f.default()
+declare hidden void @f.hidden()
+; CHECK: declare hidden void @f.hidden()
+declare protected void @f.protected()
+; CHECK: declare protected void @f.protected()
+
+; Functions -- DLLStorageClass
+declare dllimport void @f.dllimport()
+; CHECK: declare dllimport void @f.dllimport()
+declare dllexport void @f.dllexport()
+; CHECK: declare dllexport void @f.dllexport()
+
+; Functions -- cconv (Calling conventions)
+declare ccc void @f.ccc()
+; CHECK: declare void @f.ccc()
+declare fastcc void @f.fastcc()
+; CHECK: declare fastcc void @f.fastcc()
+declare coldcc void @f.coldcc()
+; CHECK: declare coldcc void @f.coldcc()
+declare cc10 void @f.cc10()
+; CHECK: declare ghccc void @f.cc10()
+declare ghccc void @f.ghccc()
+; CHECK: declare ghccc void @f.ghccc()
+declare cc11 void @f.cc11()
+; CHECK: declare cc11 void @f.cc11()
+declare webkit_jscc void @f.webkit_jscc()
+; CHECK: declare webkit_jscc void @f.webkit_jscc()
+declare anyregcc void @f.anyregcc()
+; CHECK: declare anyregcc void @f.anyregcc()
+declare preserve_mostcc void @f.preserve_mostcc()
+; CHECK: declare preserve_mostcc void @f.preserve_mostcc()
+declare preserve_allcc void @f.preserve_allcc()
+; CHECK: declare preserve_allcc void @f.preserve_allcc()
+declare cc64 void @f.cc64()
+; CHECK: declare x86_stdcallcc void @f.cc64()
+declare x86_stdcallcc void @f.x86_stdcallcc()
+; CHECK: declare x86_stdcallcc void @f.x86_stdcallcc()
+declare cc65 void @f.cc65()
+; CHECK: declare x86_fastcallcc void @f.cc65()
+declare x86_fastcallcc void @f.x86_fastcallcc()
+; CHECK: declare x86_fastcallcc void @f.x86_fastcallcc()
+declare cc66 void @f.cc66()
+; CHECK: declare arm_apcscc void @f.cc66()
+declare arm_apcscc void @f.arm_apcscc()
+; CHECK: declare arm_apcscc void @f.arm_apcscc()
+declare cc67 void @f.cc67()
+; CHECK: declare arm_aapcscc void @f.cc67()
+declare arm_aapcscc void @f.arm_aapcscc()
+; CHECK: declare arm_aapcscc void @f.arm_aapcscc()
+declare cc68 void @f.cc68()
+; CHECK: declare arm_aapcs_vfpcc void @f.cc68()
+declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc()
+; CHECK: declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc()
+declare cc69 void @f.cc69()
+; CHECK: declare msp430_intrcc void @f.cc69()
+declare msp430_intrcc void @f.msp430_intrcc()
+; CHECK: declare msp430_intrcc void @f.msp430_intrcc()
+declare cc70 void @f.cc70()
+; CHECK: declare x86_thiscallcc void @f.cc70()
+declare x86_thiscallcc void @f.x86_thiscallcc()
+; CHECK: declare x86_thiscallcc void @f.x86_thiscallcc()
+declare cc71 void @f.cc71()
+; CHECK: declare ptx_kernel void @f.cc71()
+declare ptx_kernel void @f.ptx_kernel()
+; CHECK: declare ptx_kernel void @f.ptx_kernel()
+declare cc72 void @f.cc72()
+; CHECK: declare ptx_device void @f.cc72()
+declare ptx_device void @f.ptx_device()
+; CHECK: declare ptx_device void @f.ptx_device()
+declare cc75 void @f.cc75()
+; CHECK: declare spir_func void @f.cc75()
+declare spir_func void @f.spir_func()
+; CHECK: declare spir_func void @f.spir_func()
+declare cc76 void @f.cc76()
+; CHECK: declare spir_kernel void @f.cc76()
+declare spir_kernel void @f.spir_kernel()
+; CHECK: declare spir_kernel void @f.spir_kernel()
+declare cc77 void @f.cc77()
+; CHECK: declare intel_ocl_bicc void @f.cc77()
+declare intel_ocl_bicc void @f.intel_ocl_bicc()
+; CHECK: declare intel_ocl_bicc void @f.intel_ocl_bicc()
+declare cc78 void @f.cc78()
+; CHECK: declare x86_64_sysvcc void @f.cc78()
+declare x86_64_sysvcc void @f.x86_64_sysvcc()
+; CHECK: declare x86_64_sysvcc void @f.x86_64_sysvcc()
+declare cc79 void @f.cc79()
+; CHECK: declare x86_64_win64cc void @f.cc79()
+declare x86_64_win64cc void @f.x86_64_win64cc()
+; CHECK: declare x86_64_win64cc void @f.x86_64_win64cc()
+declare cc80 void @f.cc80()
+; CHECK: declare x86_vectorcallcc void @f.cc80()
+declare x86_vectorcallcc void @f.x86_vectorcallcc()
+; CHECK: declare x86_vectorcallcc void @f.x86_vectorcallcc()
+declare cc1023 void @f.cc1023()
+; CHECK: declare cc1023 void @f.cc1023()
+
+; Functions -- ret attrs (Return attributes)
+declare zeroext i64 @f.zeroext()
+; CHECK: declare zeroext i64 @f.zeroext()
+declare signext i64 @f.signext()
+; CHECK: declare signext i64 @f.signext()
+declare inreg i32* @f.inreg()
+; CHECK: declare inreg i32* @f.inreg()
+declare noalias i32* @f.noalias()
+; CHECK: declare noalias i32* @f.noalias()
+declare nonnull i32* @f.nonnull()
+; CHECK: declare nonnull i32* @f.nonnull()
+declare dereferenceable(4) i32* @f.dereferenceable4()
+; CHECK: declare dereferenceable(4) i32* @f.dereferenceable4()
+declare dereferenceable(8) i32* @f.dereferenceable8()
+; CHECK: declare dereferenceable(8) i32* @f.dereferenceable8()
+declare dereferenceable(16) i32* @f.dereferenceable16()
+; CHECK: declare dereferenceable(16) i32* @f.dereferenceable16()
+
+; Functions -- Parameter attributes
+declare void @f.param.zeroext(i8 zeroext)
+; CHECK: declare void @f.param.zeroext(i8 zeroext)
+declare void @f.param.signext(i8 signext)
+; CHECK: declare void @f.param.signext(i8 signext)
+declare void @f.param.inreg(i8 inreg)
+; CHECK: declare void @f.param.inreg(i8 inreg)
+declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+declare void @f.param.inalloca(i8* inalloca)
+; CHECK: declare void @f.param.inalloca(i8* inalloca)
+declare void @f.param.sret(i8* sret)
+; CHECK: declare void @f.param.sret(i8* sret)
+declare void @f.param.noalias(i8* noalias)
+; CHECK: declare void @f.param.noalias(i8* noalias)
+declare void @f.param.nocapture(i8* nocapture)
+; CHECK: declare void @f.param.nocapture(i8* nocapture)
+declare void @f.param.nest(i8* nest)
+; CHECK: declare void @f.param.nest(i8* nest)
+declare i8* @f.param.returned(i8* returned)
+; CHECK: declare i8* @f.param.returned(i8* returned)
+declare void @f.param.nonnull(i8* nonnull)
+; CHECK: declare void @f.param.nonnull(i8* nonnull)
+declare void @f.param.dereferenceable(i8* dereferenceable(4))
+; CHECK: declare void @f.param.dereferenceable(i8* dereferenceable(4))
+
+; Functions -- unnamed_addr
+declare void @f.unnamed_addr() unnamed_addr
+; CHECK: declare void @f.unnamed_addr() unnamed_addr
+
+; Functions -- fn Attrs (Function attributes)
+declare void @f.alignstack4() alignstack(4)
+; CHECK: declare void @f.alignstack4() #0
+declare void @f.alignstack8() alignstack(8)
+; CHECK: declare void @f.alignstack8() #1
+declare void @f.alwaysinline() alwaysinline
+; CHECK: declare void @f.alwaysinline() #2
+declare void @f.cold() cold
+; CHECK: declare void @f.cold() #3
+declare void @f.inlinehint() inlinehint
+; CHECK: declare void @f.inlinehint() #4
+declare void @f.jumptable() unnamed_addr jumptable
+; CHECK: declare void @f.jumptable() unnamed_addr #5
+declare void @f.minsize() minsize
+; CHECK: declare void @f.minsize() #6
+declare void @f.naked() naked
+; CHECK: declare void @f.naked() #7
+declare void @f.nobuiltin() nobuiltin
+; CHECK: declare void @f.nobuiltin() #8
+declare void @f.noduplicate() noduplicate
+; CHECK: declare void @f.noduplicate() #9
+declare void @f.noimplicitfloat() noimplicitfloat
+; CHECK: declare void @f.noimplicitfloat() #10
+declare void @f.noinline() noinline
+; CHECK: declare void @f.noinline() #11
+declare void @f.nonlazybind() nonlazybind
+; CHECK: declare void @f.nonlazybind() #12
+declare void @f.noredzone() noredzone
+; CHECK: declare void @f.noredzone() #13
+declare void @f.noreturn() noreturn
+; CHECK: declare void @f.noreturn() #14
+declare void @f.nounwind() nounwind
+; CHECK: declare void @f.nounwind() #15
+declare void @f.optnone() noinline optnone
+; CHECK: declare void @f.optnone() #16
+declare void @f.optsize() optsize
+; CHECK: declare void @f.optsize() #17
+declare void @f.readnone() readnone
+; CHECK: declare void @f.readnone() #18
+declare void @f.readonly() readonly
+; CHECK: declare void @f.readonly() #19
+declare void @f.returns_twice() returns_twice
+; CHECK: declare void @f.returns_twice() #20
+declare void @f.sanitize_address() sanitize_address
+; CHECK: declare void @f.sanitize_address() #21
+declare void @f.sanitize_memory() sanitize_memory
+; CHECK: declare void @f.sanitize_memory() #22
+declare void @f.sanitize_thread() sanitize_thread
+; CHECK: declare void @f.sanitize_thread() #23
+declare void @f.ssp() ssp
+; CHECK: declare void @f.ssp() #24
+declare void @f.sspreq() sspreq
+; CHECK: declare void @f.sspreq() #25
+declare void @f.sspstrong() sspstrong
+; CHECK: declare void @f.sspstrong() #26
+declare void @f.uwtable() uwtable
+; CHECK: declare void @f.uwtable() #27
+declare void @f.kvpair() "cpu"="cortex-a8"
+; CHECK:declare void @f.kvpair() #28
+
+; Functions -- section
+declare void @f.section() section "80"
+; CHECK: declare void @f.section() section "80"
+
+; Functions -- comdat
+define void @f.comdat_any() comdat($comdat.any) {
+; CHECK: define void @f.comdat_any() comdat($comdat.any)
+entry:
+ ret void
+}
+define void @f.comdat_exactmatch() comdat($comdat.exactmatch) {
+; CHECK: define void @f.comdat_exactmatch() comdat($comdat.exactmatch)
+entry:
+ ret void
+}
+define void @f.comdat_largest() comdat($comdat.largest) {
+; CHECK: define void @f.comdat_largest() comdat($comdat.largest)
+entry:
+ ret void
+}
+define void @f.comdat_noduplicates() comdat($comdat.noduplicates) {
+; CHECK: define void @f.comdat_noduplicates() comdat($comdat.noduplicates)
+entry:
+ ret void
+}
+define void @f.comdat_samesize() comdat($comdat.samesize) {
+; CHECK: define void @f.comdat_samesize() comdat($comdat.samesize)
+entry:
+ ret void
+}
+
+; Functions -- align
+declare void @f.align2() align 2
+; CHECK: declare void @f.align2() align 2
+declare void @f.align4() align 4
+; CHECK: declare void @f.align4() align 4
+declare void @f.align8() align 8
+; CHECK: declare void @f.align8() align 8
+
+; Functions -- GC
+declare void @f.gcshadow() gc "shadow-stack"
+; CHECK: declare void @f.gcshadow() gc "shadow-stack"
+
+; Functions -- Prefix data
+declare void @f.prefixi32() prefix i32 1684365668
+; CHECK: declare void @f.prefixi32() prefix i32 1684365668
+declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+; CHECK: declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+
+;; Atomic Memory Ordering Constraints
+define void @atomics(i32* %word) {
+ %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic
+ ; CHECK: %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic
+ %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic
+ ; CHECK: %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic
+ %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic
+ ; CHECK: %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic
+ %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic
+ ; CHECK: %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic
+ %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic
+ ; CHECK: %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic
+ %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic
+ ; CHECK: %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic
+ %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic
+ ; CHECK: %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic
+ %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic
+ ; CHECK: %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic
+ %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+ ; CHECK: %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+ %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
+ ; CHECK: %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
+ %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
+ ; CHECK: %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
+ %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
+ ; CHECK: %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
+ %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
+ ; CHECK: %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
+ %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
+ ; CHECK: %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
+ %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
+ ; CHECK: %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
+ %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
+ ; CHECK: %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
+ %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+ ; CHECK: %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+ %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic
+ ; CHECK: %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic
+ %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic
+ ; CHECK: %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic
+ fence acquire
+ ; CHECK: fence acquire
+ fence release
+ ; CHECK: fence release
+ fence acq_rel
+ ; CHECK: fence acq_rel
+ fence singlethread seq_cst
+ ; CHECK: fence singlethread seq_cst
+
+ ; XXX: The parser spits out the load type here.
+ %ld.1 = load atomic i32* %word monotonic, align 4
+ ; CHECK: %ld.1 = load atomic i32, i32* %word monotonic, align 4
+ %ld.2 = load atomic volatile i32* %word acquire, align 8
+ ; CHECK: %ld.2 = load atomic volatile i32, i32* %word acquire, align 8
+ %ld.3 = load atomic volatile i32* %word singlethread seq_cst, align 16
+ ; CHECK: %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16
+
+ store atomic i32 23, i32* %word monotonic, align 4
+ ; CHECK: store atomic i32 23, i32* %word monotonic, align 4
+ store atomic volatile i32 24, i32* %word monotonic, align 4
+ ; CHECK: store atomic volatile i32 24, i32* %word monotonic, align 4
+ store atomic volatile i32 25, i32* %word singlethread monotonic, align 4
+ ; CHECK: store atomic volatile i32 25, i32* %word singlethread monotonic, align 4
+ ret void
+}
+
+;; Fast Math Flags
+define void @fastmathflags(float %op1, float %op2) {
+ %f.nnan = fadd nnan float %op1, %op2
+ ; CHECK: %f.nnan = fadd nnan float %op1, %op2
+ %f.ninf = fadd ninf float %op1, %op2
+ ; CHECK: %f.ninf = fadd ninf float %op1, %op2
+ %f.nsz = fadd nsz float %op1, %op2
+ ; CHECK: %f.nsz = fadd nsz float %op1, %op2
+ %f.arcp = fadd arcp float %op1, %op2
+ ; CHECK: %f.arcp = fadd arcp float %op1, %op2
+ %f.fast = fadd fast float %op1, %op2
+ ; CHECK: %f.fast = fadd fast float %op1, %op2
+ ret void
+}
+
+;; Type System
+%opaquety = type opaque
+define void @typesystem() {
+ %p0 = bitcast i8* null to i32 (i32)*
+ ; CHECK: %p0 = bitcast i8* null to i32 (i32)*
+ %p1 = bitcast i8* null to void (i8*)*
+ ; CHECK: %p1 = bitcast i8* null to void (i8*)*
+ %p2 = bitcast i8* null to i32 (i8*, ...)*
+ ; CHECK: %p2 = bitcast i8* null to i32 (i8*, ...)*
+ %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)*
+ ; CHECK: %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)*
+ %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)*
+ ; CHECK: %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)*
+ %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)*
+ ; CHECK: %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)*
+
+ %t0 = alloca i1942652
+ ; CHECK: %t0 = alloca i1942652
+ %t1 = alloca half
+ ; CHECK: %t1 = alloca half
+ %t2 = alloca float
+ ; CHECK: %t2 = alloca float
+ %t3 = alloca double
+ ; CHECK: %t3 = alloca double
+ %t4 = alloca fp128
+ ; CHECK: %t4 = alloca fp128
+ %t5 = alloca x86_fp80
+ ; CHECK: %t5 = alloca x86_fp80
+ %t6 = alloca ppc_fp128
+ ; CHECK: %t6 = alloca ppc_fp128
+ %t7 = alloca x86_mmx
+ ; CHECK: %t7 = alloca x86_mmx
+ %t8 = alloca %opaquety*
+ ; CHECK: %t8 = alloca %opaquety*
+
+ ret void
+}
+
+;; Inline Assembler Expressions
+define void @inlineasm(i32 %arg) {
+ call i32 asm "bswap $0", "=r,r"(i32 %arg)
+ ; CHECK: call i32 asm "bswap $0", "=r,r"(i32 %arg)
+ call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg)
+ ; CHECK: call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg)
+ ret void
+}
+
+;; Instructions
+
+; Instructions -- Terminators
+define void @instructions.terminators(i8 %val) { ; XXX: landingpad changed.
+; CHECK: define void @instructions.terminators(i8 %val) personality i32 ()* @personality_handler
+
+ br i1 false, label %iftrue, label %iffalse
+ ; CHECK: br i1 false, label %iftrue, label %iffalse
+ br label %iftrue
+ ; CHECK: br label %iftrue
+iftrue:
+ ret void
+ ; CHECK: ret void
+iffalse:
+
+ switch i8 %val, label %defaultdest [
+ ; CHECK: switch i8 %val, label %defaultdest [
+ i8 0, label %defaultdest.0
+ ; CHECK: i8 0, label %defaultdest.0
+ i8 1, label %defaultdest.1
+ ; CHECK: i8 1, label %defaultdest.1
+ i8 2, label %defaultdest.2
+ ; CHECK: i8 2, label %defaultdest.2
+ ]
+ ; CHECK: ]
+defaultdest:
+ ret void
+defaultdest.0:
+ ret void
+defaultdest.1:
+ ret void
+defaultdest.2:
+
+ indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2]
+ ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2]
+ indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2]
+ ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2]
+
+ invoke fastcc void @f.fastcc()
+ ; CHECK: invoke fastcc void @f.fastcc()
+ to label %defaultdest unwind label %exc
+ ; CHECK: to label %defaultdest unwind label %exc
+exc:
+ %cleanup = landingpad i32 personality i32()* @personality_handler cleanup
+
+ resume i32 undef
+ ; CHECK: resume i32 undef
+ unreachable
+ ; CHECK: unreachable
+
+ ret void
+}
+
+; Instructions -- Binary Operations
+define void @instructions.binops(i8 %op1, i8 %op2) {
+ ; nuw x nsw
+ add i8 %op1, %op2
+ ; CHECK: add i8 %op1, %op2
+ add nuw i8 %op1, %op2
+ ; CHECK: add nuw i8 %op1, %op2
+ add nsw i8 %op1, %op2
+ ; CHECK: add nsw i8 %op1, %op2
+ add nuw nsw i8 %op1, %op2
+ ; CHECK: add nuw nsw i8 %op1, %op2
+ sub i8 %op1, %op2
+ ; CHECK: sub i8 %op1, %op2
+ sub nuw i8 %op1, %op2
+ ; CHECK: sub nuw i8 %op1, %op2
+ sub nsw i8 %op1, %op2
+ ; CHECK: sub nsw i8 %op1, %op2
+ sub nuw nsw i8 %op1, %op2
+ ; CHECK: sub nuw nsw i8 %op1, %op2
+ mul i8 %op1, %op2
+ ; CHECK: mul i8 %op1, %op2
+ mul nuw i8 %op1, %op2
+ ; CHECK: mul nuw i8 %op1, %op2
+ mul nsw i8 %op1, %op2
+ ; CHECK: mul nsw i8 %op1, %op2
+ mul nuw nsw i8 %op1, %op2
+ ; CHECK: mul nuw nsw i8 %op1, %op2
+
+ ; exact
+ udiv i8 %op1, %op2
+ ; CHECK: udiv i8 %op1, %op2
+ udiv exact i8 %op1, %op2
+ ; CHECK: udiv exact i8 %op1, %op2
+ sdiv i8 %op1, %op2
+ ; CHECK: sdiv i8 %op1, %op2
+ sdiv exact i8 %op1, %op2
+ ; CHECK: sdiv exact i8 %op1, %op2
+
+ ; none
+ urem i8 %op1, %op2
+ ; CHECK: urem i8 %op1, %op2
+ srem i8 %op1, %op2
+ ; CHECK: srem i8 %op1, %op2
+
+ ret void
+}
+
+; Instructions -- Bitwise Binary Operations
+define void @instructions.bitwise_binops(i8 %op1, i8 %op2) {
+ ; nuw x nsw
+ shl i8 %op1, %op2
+ ; CHECK: shl i8 %op1, %op2
+ shl nuw i8 %op1, %op2
+ ; CHECK: shl nuw i8 %op1, %op2
+ shl nsw i8 %op1, %op2
+ ; CHECK: shl nsw i8 %op1, %op2
+ shl nuw nsw i8 %op1, %op2
+ ; CHECK: shl nuw nsw i8 %op1, %op2
+
+ ; exact
+ lshr i8 %op1, %op2
+ ; CHECK: lshr i8 %op1, %op2
+ lshr exact i8 %op1, %op2
+ ; CHECK: lshr exact i8 %op1, %op2
+ ashr i8 %op1, %op2
+ ; CHECK: ashr i8 %op1, %op2
+ ashr exact i8 %op1, %op2
+ ; CHECK: ashr exact i8 %op1, %op2
+
+ ; none
+ and i8 %op1, %op2
+ ; CHECK: and i8 %op1, %op2
+ or i8 %op1, %op2
+ ; CHECK: or i8 %op1, %op2
+ xor i8 %op1, %op2
+ ; CHECK: xor i8 %op1, %op2
+
+ ret void
+}
+
+; Instructions -- Vector Operations
+define void @instructions.vectorops(<4 x float> %vec, <4 x float> %vec2) {
+ extractelement <4 x float> %vec, i8 0
+ ; CHECK: extractelement <4 x float> %vec, i8 0
+ insertelement <4 x float> %vec, float 3.500000e+00, i8 0
+ ; CHECK: insertelement <4 x float> %vec, float 3.500000e+00, i8 0
+ shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer
+ ; CHECK: shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer
+
+ ret void
+}
+
+; Instructions -- Aggregate Operations
+define void @instructions.aggregateops({ i8, i32 } %up, <{ i8, i32 }> %p,
+ [3 x i8] %arr, { i8, { i32 }} %n,
+ <2 x i8*> %pvec, <2 x i64> %offsets) {
+ extractvalue { i8, i32 } %up, 0
+ ; CHECK: extractvalue { i8, i32 } %up, 0
+ extractvalue <{ i8, i32 }> %p, 1
+ ; CHECK: extractvalue <{ i8, i32 }> %p, 1
+ extractvalue [3 x i8] %arr, 2
+ ; CHECK: extractvalue [3 x i8] %arr, 2
+ extractvalue { i8, { i32 } } %n, 1, 0
+ ; CHECK: extractvalue { i8, { i32 } } %n, 1, 0
+
+ insertvalue { i8, i32 } %up, i8 1, 0
+ ; CHECK: insertvalue { i8, i32 } %up, i8 1, 0
+ insertvalue <{ i8, i32 }> %p, i32 2, 1
+ ; CHECK: insertvalue <{ i8, i32 }> %p, i32 2, 1
+ insertvalue [3 x i8] %arr, i8 0, 0
+ ; CHECK: insertvalue [3 x i8] %arr, i8 0, 0
+ insertvalue { i8, { i32 } } %n, i32 0, 1, 0
+ ; CHECK: insertvalue { i8, { i32 } } %n, i32 0, 1, 0
+
+ %up.ptr = alloca { i8, i32 }
+ %p.ptr = alloca <{ i8, i32 }>
+ %arr.ptr = alloca [3 x i8]
+ %n.ptr = alloca { i8, { i32 } }
+
+ ; XXX: The parser spits out the load type here.
+ getelementptr { i8, i32 }* %up.ptr, i8 0
+ ; CHECK: getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0
+ getelementptr <{ i8, i32 }>* %p.ptr, i8 1
+ ; CHECK: getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1
+ getelementptr [3 x i8]* %arr.ptr, i8 2
+ ; CHECK: getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2
+ getelementptr { i8, { i32 } }* %n.ptr, i32 0, i32 1
+ ; CHECK: getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1
+ getelementptr inbounds { i8, { i32 } }* %n.ptr, i32 1, i32 0
+ ; CHECK: getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0
+ getelementptr <2 x i8*> %pvec, <2 x i64> %offsets
+ ; CHECK: getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets
+
+ ret void
+}
+
+; Instructions -- Memory Access and Addressing Operations
+!7 = !{i32 1}
+!8 = !{}
+!9 = !{i64 4}
+define void @instructions.memops(i32** %base) {
+ alloca i32, i8 4, align 4
+ ; CHECK: alloca i32, i8 4, align 4
+ alloca inalloca i32, i8 4, align 4
+ ; CHECK: alloca inalloca i32, i8 4, align 4
+
+ load i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9
+ ; CHECK: load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9
+ load volatile i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9
+ ; CHECK: load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9
+
+ store i32* null, i32** %base, align 4, !nontemporal !8
+ ; CHECK: store i32* null, i32** %base, align 4, !nontemporal !8
+ store volatile i32* null, i32** %base, align 4, !nontemporal !8
+ ; CHECK: store volatile i32* null, i32** %base, align 4, !nontemporal !8
+
+ ret void
+}
+
+; Instructions -- Conversion Operations
+define void @instructions.conversions() {
+ trunc i32 -1 to i1
+ ; CHECK: trunc i32 -1 to i1
+ zext i32 -1 to i64
+ ; CHECK: zext i32 -1 to i64
+ sext i32 -1 to i64
+ ; CHECK: sext i32 -1 to i64
+ fptrunc float undef to half
+ ; CHECK: fptrunc float undef to half
+ fpext half undef to float
+ ; CHECK: fpext half undef to float
+ fptoui float undef to i32
+ ; CHECK: fptoui float undef to i32
+ fptosi float undef to i32
+ ; CHECK: fptosi float undef to i32
+ uitofp i32 1 to float
+ ; CHECK: uitofp i32 1 to float
+ sitofp i32 -1 to float
+ ; CHECK: sitofp i32 -1 to float
+ ptrtoint i8* null to i64
+ ; CHECK: ptrtoint i8* null to i64
+ inttoptr i64 0 to i8*
+ ; CHECK: inttoptr i64 0 to i8*
+ bitcast i32 0 to i32
+ ; CHECK: bitcast i32 0 to i32
+ addrspacecast i32* null to i32 addrspace(1)*
+ ; CHECK: addrspacecast i32* null to i32 addrspace(1)*
+
+ ret void
+}
+
+; Instructions -- Other Operations
+define void @instructions.other(i32 %op1, i32 %op2, half %fop1, half %fop2) {
+entry:
+ icmp eq i32 %op1, %op2
+ ; CHECK: icmp eq i32 %op1, %op2
+ icmp ne i32 %op1, %op2
+ ; CHECK: icmp ne i32 %op1, %op2
+ icmp ugt i32 %op1, %op2
+ ; CHECK: icmp ugt i32 %op1, %op2
+ icmp uge i32 %op1, %op2
+ ; CHECK: icmp uge i32 %op1, %op2
+ icmp ult i32 %op1, %op2
+ ; CHECK: icmp ult i32 %op1, %op2
+ icmp ule i32 %op1, %op2
+ ; CHECK: icmp ule i32 %op1, %op2
+ icmp sgt i32 %op1, %op2
+ ; CHECK: icmp sgt i32 %op1, %op2
+ icmp sge i32 %op1, %op2
+ ; CHECK: icmp sge i32 %op1, %op2
+ icmp slt i32 %op1, %op2
+ ; CHECK: icmp slt i32 %op1, %op2
+ icmp sle i32 %op1, %op2
+ ; CHECK: icmp sle i32 %op1, %op2
+
+ fcmp false half %fop1, %fop2
+ ; CHECK: fcmp false half %fop1, %fop2
+ fcmp oeq half %fop1, %fop2
+ ; CHECK: fcmp oeq half %fop1, %fop2
+ fcmp ogt half %fop1, %fop2
+ ; CHECK: fcmp ogt half %fop1, %fop2
+ fcmp oge half %fop1, %fop2
+ ; CHECK: fcmp oge half %fop1, %fop2
+ fcmp olt half %fop1, %fop2
+ ; CHECK: fcmp olt half %fop1, %fop2
+ fcmp ole half %fop1, %fop2
+ ; CHECK: fcmp ole half %fop1, %fop2
+ fcmp one half %fop1, %fop2
+ ; CHECK: fcmp one half %fop1, %fop2
+ fcmp ord half %fop1, %fop2
+ ; CHECK: fcmp ord half %fop1, %fop2
+ fcmp ueq half %fop1, %fop2
+ ; CHECK: fcmp ueq half %fop1, %fop2
+ fcmp ugt half %fop1, %fop2
+ ; CHECK: fcmp ugt half %fop1, %fop2
+ fcmp uge half %fop1, %fop2
+ ; CHECK: fcmp uge half %fop1, %fop2
+ fcmp ult half %fop1, %fop2
+ ; CHECK: fcmp ult half %fop1, %fop2
+ fcmp ule half %fop1, %fop2
+ ; CHECK: fcmp ule half %fop1, %fop2
+ fcmp une half %fop1, %fop2
+ ; CHECK: fcmp une half %fop1, %fop2
+ fcmp uno half %fop1, %fop2
+ ; CHECK: fcmp uno half %fop1, %fop2
+ fcmp true half %fop1, %fop2
+ ; CHECK: fcmp true half %fop1, %fop2
+
+ br label %exit
+L1:
+ %v1 = add i32 %op1, %op2
+ br label %exit
+L2:
+ %v2 = add i32 %op1, %op2
+ br label %exit
+exit:
+ phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ]
+ ; CHECK: phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ]
+
+ select i1 true, i32 0, i32 1
+ ; CHECK: select i1 true, i32 0, i32 1
+ select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
+ ; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
+
+ call void @f.nobuiltin() builtin
+ ; CHECK: call void @f.nobuiltin() #33
+
+ call fastcc noalias i32* @f.noalias() noinline
+ ; CHECK: call fastcc noalias i32* @f.noalias() #11
+ tail call ghccc nonnull i32* @f.nonnull() minsize
+ ; CHECK: tail call ghccc nonnull i32* @f.nonnull() #6
+
+ ret void
+}
+
+define void @instructions.call_musttail(i8* inalloca %val) {
+ musttail call void @f.param.inalloca(i8* inalloca %val)
+ ; CHECK: musttail call void @f.param.inalloca(i8* inalloca %val)
+
+ ret void
+}
+
+declare void @llvm.donothing() nounwind readnone
+
+declare i32 @personality_handler()
+
+define void @instructions.landingpad() {
+; CHECK: define void @instructions.landingpad() personality i32 ()* @personality_handler
+
+ invoke void @llvm.donothing() to label %proceed unwind label %catch1
+ invoke void @llvm.donothing() to label %proceed unwind label %catch2
+ invoke void @llvm.donothing() to label %proceed unwind label %catch3
+ invoke void @llvm.donothing() to label %proceed unwind label %catch4
+
+catch1:
+ landingpad i32
+ ; CHECK: landingpad i32
+ personality i32()* @personality_handler
+ cleanup
+ ; CHECK: cleanup
+ br label %proceed
+
+catch2:
+ landingpad i32
+ ; CHECK: landingpad i32
+ personality i32()* @personality_handler
+ cleanup
+ ; CHECK: cleanup
+ catch i32* null
+ ; CHECK: catch i32* null
+ br label %proceed
+
+catch3:
+ landingpad i32
+ ; CHECK: landingpad i32
+ personality i32()* @personality_handler
+ cleanup
+ ; CHECK: cleanup
+ catch i32* null
+ ; CHECK: catch i32* null
+ catch i32* null
+ ; CHECK: catch i32* null
+ br label %proceed
+
+catch4:
+ landingpad i32
+ ; CHECK: landingpad i32
+ personality i32()* @personality_handler
+ filter [2 x i32] zeroinitializer
+ ; CHECK: filter [2 x i32] zeroinitializer
+ br label %proceed
+
+proceed:
+ ret void
+}
+
+;; Intrinsic Functions
+
+; Intrinsic Functions -- Variable Argument Handling
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
+define void @instructions.va_arg(i8* %v, ...) {
+ %ap = alloca i8*
+ %ap2 = bitcast i8** %ap to i8*
+
+ call void @llvm.va_start(i8* %ap2)
+ ; CHECK: call void @llvm.va_start(i8* %ap2)
+
+ va_arg i8* %ap2, i32
+ ; CHECK: va_arg i8* %ap2, i32
+
+ call void @llvm.va_copy(i8* %v, i8* %ap2)
+ ; CHECK: call void @llvm.va_copy(i8* %v, i8* %ap2)
+
+ call void @llvm.va_end(i8* %ap2)
+ ; CHECK: call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+; Intrinsic Functions -- Accurate Garbage Collection
+declare void @llvm.gcroot(i8**, i8*)
+declare i8* @llvm.gcread(i8*, i8**)
+declare void @llvm.gcwrite(i8*, i8*, i8**)
+define void @intrinsics.gc() gc "shadow-stack" {
+ %ptrloc = alloca i8*
+ call void @llvm.gcroot(i8** %ptrloc, i8* null)
+ ; CHECK: call void @llvm.gcroot(i8** %ptrloc, i8* null)
+
+ call i8* @llvm.gcread(i8* null, i8** %ptrloc)
+ ; CHECK: call i8* @llvm.gcread(i8* null, i8** %ptrloc)
+
+ %ref = alloca i8
+ call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc)
+ ; CHECK: call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc)
+
+ ret void
+}
+
+; Intrinsic Functions -- Code Generation
+declare i8* @llvm.returnaddress(i32)
+declare i8* @llvm.frameaddress(i32)
+declare i32 @llvm.read_register.i32(metadata)
+declare i64 @llvm.read_register.i64(metadata)
+declare void @llvm.write_register.i32(metadata, i32)
+declare void @llvm.write_register.i64(metadata, i64)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+declare void @llvm.pcmarker(i32)
+declare i64 @llvm.readcyclecounter()
+declare void @llvm.clear_cache(i8*, i8*)
+declare void @llvm.instrprof_increment(i8*, i64, i32, i32)
+
+!10 = !{!"rax"}
+define void @intrinsics.codegen() {
+ call i8* @llvm.returnaddress(i32 1)
+ ; CHECK: call i8* @llvm.returnaddress(i32 1)
+ call i8* @llvm.frameaddress(i32 1)
+ ; CHECK: call i8* @llvm.frameaddress(i32 1)
+
+ call i32 @llvm.read_register.i32(metadata !10)
+ ; CHECK: call i32 @llvm.read_register.i32(metadata !10)
+ call i64 @llvm.read_register.i64(metadata !10)
+ ; CHECK: call i64 @llvm.read_register.i64(metadata !10)
+ call void @llvm.write_register.i32(metadata !10, i32 0)
+ ; CHECK: call void @llvm.write_register.i32(metadata !10, i32 0)
+ call void @llvm.write_register.i64(metadata !10, i64 0)
+ ; CHECK: call void @llvm.write_register.i64(metadata !10, i64 0)
+
+ %stack = call i8* @llvm.stacksave()
+ ; CHECK: %stack = call i8* @llvm.stacksave()
+ call void @llvm.stackrestore(i8* %stack)
+ ; CHECK: call void @llvm.stackrestore(i8* %stack)
+
+ call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
+ ; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
+
+ call void @llvm.pcmarker(i32 1)
+ ; CHECK: call void @llvm.pcmarker(i32 1)
+
+ call i64 @llvm.readcyclecounter()
+ ; CHECK: call i64 @llvm.readcyclecounter()
+
+ call void @llvm.clear_cache(i8* null, i8* null)
+ ; CHECK: call void @llvm.clear_cache(i8* null, i8* null)
+
+ call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0)
+ ; CHECK: call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0)
+
+ ret void
+}
+
+; CHECK: attributes #0 = { alignstack=4 }
+; CHECK: attributes #1 = { alignstack=8 }
+; CHECK: attributes #2 = { alwaysinline }
+; CHECK: attributes #3 = { cold }
+; CHECK: attributes #4 = { inlinehint }
+; CHECK: attributes #5 = { jumptable }
+; CHECK: attributes #6 = { minsize }
+; CHECK: attributes #7 = { naked }
+; CHECK: attributes #8 = { nobuiltin }
+; CHECK: attributes #9 = { noduplicate }
+; CHECK: attributes #10 = { noimplicitfloat }
+; CHECK: attributes #11 = { noinline }
+; CHECK: attributes #12 = { nonlazybind }
+; CHECK: attributes #13 = { noredzone }
+; CHECK: attributes #14 = { noreturn }
+; CHECK: attributes #15 = { nounwind }
+; CHECK: attributes #16 = { noinline optnone }
+; CHECK: attributes #17 = { optsize }
+; CHECK: attributes #18 = { readnone }
+; CHECK: attributes #19 = { readonly }
+; CHECK: attributes #20 = { returns_twice }
+; CHECK: attributes #21 = { sanitize_address }
+; CHECK: attributes #22 = { sanitize_memory }
+; CHECK: attributes #23 = { sanitize_thread }
+; CHECK: attributes #24 = { ssp }
+; CHECK: attributes #25 = { sspreq }
+; CHECK: attributes #26 = { sspstrong }
+; CHECK: attributes #27 = { uwtable }
+; CHECK: attributes #28 = { "cpu"="cortex-a8" }
+; CHECK: attributes #29 = { nounwind readnone }
+; CHECK: attributes #30 = { argmemonly nounwind readonly }
+; CHECK: attributes #31 = { argmemonly nounwind }
+; CHECK: attributes #32 = { nounwind readonly }
+; CHECK: attributes #33 = { builtin }
+
+;; Metadata
+
+; Metadata -- Module flags
+!llvm.module.flags = !{!0, !1, !2, !4, !5, !6}
+; CHECK: !llvm.module.flags = !{!0, !1, !2, !4, !5, !6}
+
+!0 = !{i32 1, !"mod1", i32 0}
+; CHECK: !0 = !{i32 1, !"mod1", i32 0}
+!1 = !{i32 2, !"mod2", i32 0}
+; CHECK: !1 = !{i32 2, !"mod2", i32 0}
+!2 = !{i32 3, !"mod3", !3}
+; CHECK: !2 = !{i32 3, !"mod3", !3}
+!3 = !{!"mod6", !0}
+; CHECK: !3 = !{!"mod6", !0}
+!4 = !{i32 4, !"mod4", i32 0}
+; CHECK: !4 = !{i32 4, !"mod4", i32 0}
+!5 = !{i32 5, !"mod5", !0}
+; CHECK: !5 = !{i32 5, !"mod5", !0}
+!6 = !{i32 6, !"mod6", !0}
+; CHECK: !6 = !{i32 6, !"mod6", !0}
diff --git a/test/Bitcode/compatibility-3.6.ll.bc b/test/Bitcode/compatibility-3.6.ll.bc
new file mode 100644
index 000000000000..86b662000316
--- /dev/null
+++ b/test/Bitcode/compatibility-3.6.ll.bc
Binary files differ
diff --git a/test/Bitcode/compatibility-3.7.ll b/test/Bitcode/compatibility-3.7.ll
new file mode 100644
index 000000000000..4ae0aed20181
--- /dev/null
+++ b/test/Bitcode/compatibility-3.7.ll
@@ -0,0 +1,1280 @@
+; Bitcode compatibility test for llvm 3.7.0
+;
+; N.b: This is 3.7-compatible IR. The CHECK lines occasionally differ from
+; the IR used to generate the bitcode, and may need to be updated. These
+; locations are tagged with an 'XXX'.
+
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+target datalayout = "E"
+; CHECK: target datalayout = "E"
+
+target triple = "x86_64-apple-macosx10.10.0"
+; CHECK: target triple = "x86_64-apple-macosx10.10.0"
+
+;; Module-level assembly
+module asm "beep boop"
+; CHECK: module asm "beep boop"
+
+;; Comdats
+$comdat.any = comdat any
+; CHECK: $comdat.any = comdat any
+$comdat.exactmatch = comdat exactmatch
+; CHECK: $comdat.exactmatch = comdat exactmatch
+$comdat.largest = comdat largest
+; CHECK: $comdat.largest = comdat largest
+$comdat.noduplicates = comdat noduplicates
+; CHECK: $comdat.noduplicates = comdat noduplicates
+$comdat.samesize = comdat samesize
+; CHECK: $comdat.samesize = comdat samesize
+
+;; Constants
+@const.true = constant i1 true
+; CHECK: @const.true = constant i1 true
+@const.false = constant i1 false
+; CHECK: @const.false = constant i1 false
+@const.int = constant i32 zeroinitializer
+; CHECK: @const.int = constant i32 0
+@const.float = constant double 0.0
+; CHECK: @const.float = constant double 0.0
+@const.null = constant i8* null
+; CHECK: @const.null = constant i8* null
+%const.struct.type = type { i32, i8 }
+%const.struct.type.packed = type <{ i32, i8 }>
+@const.struct = constant %const.struct.type { i32 -1, i8 undef }
+; CHECK: @const.struct = constant %const.struct.type { i32 -1, i8 undef }
+@const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
+; CHECK: @const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
+@const.array = constant [2 x i32] [i32 -3, i32 -4]
+; CHECK: @const.array = constant [2 x i32] [i32 -3, i32 -4]
+@const.vector = constant <2 x i32> <i32 -5, i32 -6>
+; CHECK: @const.vector = constant <2 x i32> <i32 -5, i32 -6>
+
+;; Global Variables
+; Format: [@<GlobalVarName> =] [Linkage] [Visibility] [DLLStorageClass]
+; [ThreadLocal] [unnamed_addr] [AddrSpace] [ExternallyInitialized]
+; <global | constant> <Type> [<InitializerConstant>]
+; [, section "name"] [, comdat [($name)]] [, align <Alignment>]
+
+; Global Variables -- Simple
+@g1 = global i32 0
+; CHECK: @g1 = global i32 0
+@g2 = constant i32 0
+; CHECK: @g2 = constant i32 0
+
+; Global Variables -- Linkage
+@g.private = private global i32 0
+; CHECK: @g.private = private global i32 0
+@g.internal = internal global i32 0
+; CHECK: @g.internal = internal global i32 0
+@g.available_externally = available_externally global i32 0
+; CHECK: @g.available_externally = available_externally global i32 0
+@g.linkonce = linkonce global i32 0
+; CHECK: @g.linkonce = linkonce global i32 0
+@g.weak = weak global i32 0
+; CHECK: @g.weak = weak global i32 0
+@g.common = common global i32 0
+; CHECK: @g.common = common global i32 0
+@g.appending = appending global [4 x i8] c"test"
+; CHECK: @g.appending = appending global [4 x i8] c"test"
+@g.extern_weak = extern_weak global i32
+; CHECK: @g.extern_weak = extern_weak global i32
+@g.linkonce_odr = linkonce_odr global i32 0
+; CHECK: @g.linkonce_odr = linkonce_odr global i32 0
+@g.weak_odr = weak_odr global i32 0
+; CHECK: @g.weak_odr = weak_odr global i32 0
+@g.external = external global i32
+; CHECK: @g.external = external global i32
+
+; Global Variables -- Visibility
+@g.default = default global i32 0
+; CHECK: @g.default = global i32 0
+@g.hidden = hidden global i32 0
+; CHECK: @g.hidden = hidden global i32 0
+@g.protected = protected global i32 0
+; CHECK: @g.protected = protected global i32 0
+
+; Global Variables -- DLLStorageClass
+@g.dlldefault = default global i32 0
+; CHECK: @g.dlldefault = global i32 0
+@g.dllimport = external dllimport global i32
+; CHECK: @g.dllimport = external dllimport global i32
+@g.dllexport = dllexport global i32 0
+; CHECK: @g.dllexport = dllexport global i32 0
+
+; Global Variables -- ThreadLocal
+@g.notthreadlocal = global i32 0
+; CHECK: @g.notthreadlocal = global i32 0
+@g.generaldynamic = thread_local global i32 0
+; CHECK: @g.generaldynamic = thread_local global i32 0
+@g.localdynamic = thread_local(localdynamic) global i32 0
+; CHECK: @g.localdynamic = thread_local(localdynamic) global i32 0
+@g.initialexec = thread_local(initialexec) global i32 0
+; CHECK: @g.initialexec = thread_local(initialexec) global i32 0
+@g.localexec = thread_local(localexec) global i32 0
+; CHECK: @g.localexec = thread_local(localexec) global i32 0
+
+; Global Variables -- unnamed_addr
+@g.unnamed_addr = unnamed_addr global i32 0
+; CHECK: @g.unnamed_addr = unnamed_addr global i32 0
+
+; Global Variables -- AddrSpace
+@g.addrspace = addrspace(1) global i32 0
+; CHECK: @g.addrspace = addrspace(1) global i32 0
+
+; Global Variables -- ExternallyInitialized
+@g.externally_initialized = external externally_initialized global i32
+; CHECK: @g.externally_initialized = external externally_initialized global i32
+
+; Global Variables -- section
+@g.section = global i32 0, section "_DATA"
+; CHECK: @g.section = global i32 0, section "_DATA"
+
+; Global Variables -- comdat
+@comdat.any = global i32 0, comdat
+; CHECK: @comdat.any = global i32 0, comdat
+@comdat.exactmatch = global i32 0, comdat
+; CHECK: @comdat.exactmatch = global i32 0, comdat
+@comdat.largest = global i32 0, comdat
+; CHECK: @comdat.largest = global i32 0, comdat
+@comdat.noduplicates = global i32 0, comdat
+; CHECK: @comdat.noduplicates = global i32 0, comdat
+@comdat.samesize = global i32 0, comdat
+; CHECK: @comdat.samesize = global i32 0, comdat
+
+; Force two globals from different comdats into sections with the same name.
+$comdat1 = comdat any
+$comdat2 = comdat any
+@g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1)
+; CHECK: @g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1)
+@g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2)
+; CHECK: @g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2)
+
+; Global Variables -- align
+@g.align = global i32 0, align 4
+; CHECK: @g.align = global i32 0, align 4
+
+; Global Variables -- Intrinsics
+%pri.func.data = type { i32, void ()*, i8* }
+@g.used1 = global i32 0
+@g.used2 = global i32 0
+@g.used3 = global i8 0
+declare void @g.f1()
+@llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata"
+; CHECK: @llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata"
+@llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata"
+@llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+; CHECK: @llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+@llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+; CHECK: @llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+
+;; Aliases
+; Format: @<Name> = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal]
+; [unnamed_addr] alias <AliaseeTy> @<Aliasee>
+
+; Aliases -- Linkage
+@a.private = private alias i32* @g.private
+; CHECK: @a.private = private alias i32, i32* @g.private
+@a.internal = internal alias i32* @g.internal
+; CHECK: @a.internal = internal alias i32, i32* @g.internal
+@a.linkonce = linkonce alias i32* @g.linkonce
+; CHECK: @a.linkonce = linkonce alias i32, i32* @g.linkonce
+@a.weak = weak alias i32* @g.weak
+; CHECK: @a.weak = weak alias i32, i32* @g.weak
+@a.linkonce_odr = linkonce_odr alias i32* @g.linkonce_odr
+; CHECK: @a.linkonce_odr = linkonce_odr alias i32, i32* @g.linkonce_odr
+@a.weak_odr = weak_odr alias i32* @g.weak_odr
+; CHECK: @a.weak_odr = weak_odr alias i32, i32* @g.weak_odr
+@a.external = external alias i32* @g1
+; CHECK: @a.external = alias i32, i32* @g1
+
+; Aliases -- Visibility
+@a.default = default alias i32* @g.default
+; CHECK: @a.default = alias i32, i32* @g.default
+@a.hidden = hidden alias i32* @g.hidden
+; CHECK: @a.hidden = hidden alias i32, i32* @g.hidden
+@a.protected = protected alias i32* @g.protected
+; CHECK: @a.protected = protected alias i32, i32* @g.protected
+
+; Aliases -- DLLStorageClass
+@a.dlldefault = default alias i32* @g.dlldefault
+; CHECK: @a.dlldefault = alias i32, i32* @g.dlldefault
+@a.dllimport = dllimport alias i32* @g1
+; CHECK: @a.dllimport = dllimport alias i32, i32* @g1
+@a.dllexport = dllexport alias i32* @g.dllexport
+; CHECK: @a.dllexport = dllexport alias i32, i32* @g.dllexport
+
+; Aliases -- ThreadLocal
+@a.notthreadlocal = alias i32* @g.notthreadlocal
+; CHECK: @a.notthreadlocal = alias i32, i32* @g.notthreadlocal
+@a.generaldynamic = thread_local alias i32* @g.generaldynamic
+; CHECK: @a.generaldynamic = thread_local alias i32, i32* @g.generaldynamic
+@a.localdynamic = thread_local(localdynamic) alias i32* @g.localdynamic
+; CHECK: @a.localdynamic = thread_local(localdynamic) alias i32, i32* @g.localdynamic
+@a.initialexec = thread_local(initialexec) alias i32* @g.initialexec
+; CHECK: @a.initialexec = thread_local(initialexec) alias i32, i32* @g.initialexec
+@a.localexec = thread_local(localexec) alias i32* @g.localexec
+; CHECK: @a.localexec = thread_local(localexec) alias i32, i32* @g.localexec
+
+; Aliases -- unnamed_addr
+@a.unnamed_addr = unnamed_addr alias i32* @g.unnamed_addr
+; CHECK: @a.unnamed_addr = unnamed_addr alias i32, i32* @g.unnamed_addr
+
+;; Functions
+; Format: define [linkage] [visibility] [DLLStorageClass]
+; [cconv] [ret attrs]
+; <ResultType> @<FunctionName> ([argument list])
+; [unnamed_addr] [fn Attrs] [section "name"] [comdat [($name)]]
+; [align N] [gc] [prefix Constant] [prologue Constant]
+; [personality Constant] { ... }
+
+; Functions -- Simple
+declare void @f1 ()
+; CHECK: declare void @f1()
+
+define void @f2 () {
+; CHECK: define void @f2()
+entry:
+ ret void
+}
+
+; Functions -- linkage
+define private void @f.private() {
+; CHECK: define private void @f.private()
+entry:
+ ret void
+}
+define internal void @f.internal() {
+; CHECK: define internal void @f.internal()
+entry:
+ ret void
+}
+define available_externally void @f.available_externally() {
+; CHECK: define available_externally void @f.available_externally()
+entry:
+ ret void
+}
+define linkonce void @f.linkonce() {
+; CHECK: define linkonce void @f.linkonce()
+entry:
+ ret void
+}
+define weak void @f.weak() {
+; CHECK: define weak void @f.weak()
+entry:
+ ret void
+}
+define linkonce_odr void @f.linkonce_odr() {
+; CHECK: define linkonce_odr void @f.linkonce_odr()
+entry:
+ ret void
+}
+define weak_odr void @f.weak_odr() {
+; CHECK: define weak_odr void @f.weak_odr()
+entry:
+ ret void
+}
+declare external void @f.external()
+; CHECK: declare void @f.external()
+declare extern_weak void @f.extern_weak()
+; CHECK: declare extern_weak void @f.extern_weak()
+
+; Functions -- visibility
+declare default void @f.default()
+; CHECK: declare void @f.default()
+declare hidden void @f.hidden()
+; CHECK: declare hidden void @f.hidden()
+declare protected void @f.protected()
+; CHECK: declare protected void @f.protected()
+
+; Functions -- DLLStorageClass
+declare dllimport void @f.dllimport()
+; CHECK: declare dllimport void @f.dllimport()
+declare dllexport void @f.dllexport()
+; CHECK: declare dllexport void @f.dllexport()
+
+; Functions -- cconv (Calling conventions)
+declare ccc void @f.ccc()
+; CHECK: declare void @f.ccc()
+declare fastcc void @f.fastcc()
+; CHECK: declare fastcc void @f.fastcc()
+declare coldcc void @f.coldcc()
+; CHECK: declare coldcc void @f.coldcc()
+declare cc10 void @f.cc10()
+; CHECK: declare ghccc void @f.cc10()
+declare ghccc void @f.ghccc()
+; CHECK: declare ghccc void @f.ghccc()
+declare cc11 void @f.cc11()
+; CHECK: declare cc11 void @f.cc11()
+declare webkit_jscc void @f.webkit_jscc()
+; CHECK: declare webkit_jscc void @f.webkit_jscc()
+declare anyregcc void @f.anyregcc()
+; CHECK: declare anyregcc void @f.anyregcc()
+declare preserve_mostcc void @f.preserve_mostcc()
+; CHECK: declare preserve_mostcc void @f.preserve_mostcc()
+declare preserve_allcc void @f.preserve_allcc()
+; CHECK: declare preserve_allcc void @f.preserve_allcc()
+declare cc64 void @f.cc64()
+; CHECK: declare x86_stdcallcc void @f.cc64()
+declare x86_stdcallcc void @f.x86_stdcallcc()
+; CHECK: declare x86_stdcallcc void @f.x86_stdcallcc()
+declare cc65 void @f.cc65()
+; CHECK: declare x86_fastcallcc void @f.cc65()
+declare x86_fastcallcc void @f.x86_fastcallcc()
+; CHECK: declare x86_fastcallcc void @f.x86_fastcallcc()
+declare cc66 void @f.cc66()
+; CHECK: declare arm_apcscc void @f.cc66()
+declare arm_apcscc void @f.arm_apcscc()
+; CHECK: declare arm_apcscc void @f.arm_apcscc()
+declare cc67 void @f.cc67()
+; CHECK: declare arm_aapcscc void @f.cc67()
+declare arm_aapcscc void @f.arm_aapcscc()
+; CHECK: declare arm_aapcscc void @f.arm_aapcscc()
+declare cc68 void @f.cc68()
+; CHECK: declare arm_aapcs_vfpcc void @f.cc68()
+declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc()
+; CHECK: declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc()
+declare cc69 void @f.cc69()
+; CHECK: declare msp430_intrcc void @f.cc69()
+declare msp430_intrcc void @f.msp430_intrcc()
+; CHECK: declare msp430_intrcc void @f.msp430_intrcc()
+declare cc70 void @f.cc70()
+; CHECK: declare x86_thiscallcc void @f.cc70()
+declare x86_thiscallcc void @f.x86_thiscallcc()
+; CHECK: declare x86_thiscallcc void @f.x86_thiscallcc()
+declare cc71 void @f.cc71()
+; CHECK: declare ptx_kernel void @f.cc71()
+declare ptx_kernel void @f.ptx_kernel()
+; CHECK: declare ptx_kernel void @f.ptx_kernel()
+declare cc72 void @f.cc72()
+; CHECK: declare ptx_device void @f.cc72()
+declare ptx_device void @f.ptx_device()
+; CHECK: declare ptx_device void @f.ptx_device()
+declare cc75 void @f.cc75()
+; CHECK: declare spir_func void @f.cc75()
+declare spir_func void @f.spir_func()
+; CHECK: declare spir_func void @f.spir_func()
+declare cc76 void @f.cc76()
+; CHECK: declare spir_kernel void @f.cc76()
+declare spir_kernel void @f.spir_kernel()
+; CHECK: declare spir_kernel void @f.spir_kernel()
+declare cc77 void @f.cc77()
+; CHECK: declare intel_ocl_bicc void @f.cc77()
+declare intel_ocl_bicc void @f.intel_ocl_bicc()
+; CHECK: declare intel_ocl_bicc void @f.intel_ocl_bicc()
+declare cc78 void @f.cc78()
+; CHECK: declare x86_64_sysvcc void @f.cc78()
+declare x86_64_sysvcc void @f.x86_64_sysvcc()
+; CHECK: declare x86_64_sysvcc void @f.x86_64_sysvcc()
+declare cc79 void @f.cc79()
+; CHECK: declare x86_64_win64cc void @f.cc79()
+declare x86_64_win64cc void @f.x86_64_win64cc()
+; CHECK: declare x86_64_win64cc void @f.x86_64_win64cc()
+declare cc80 void @f.cc80()
+; CHECK: declare x86_vectorcallcc void @f.cc80()
+declare x86_vectorcallcc void @f.x86_vectorcallcc()
+; CHECK: declare x86_vectorcallcc void @f.x86_vectorcallcc()
+declare cc1023 void @f.cc1023()
+; CHECK: declare cc1023 void @f.cc1023()
+
+; Functions -- ret attrs (Return attributes)
+declare zeroext i64 @f.zeroext()
+; CHECK: declare zeroext i64 @f.zeroext()
+declare signext i64 @f.signext()
+; CHECK: declare signext i64 @f.signext()
+declare inreg i32* @f.inreg()
+; CHECK: declare inreg i32* @f.inreg()
+declare noalias i32* @f.noalias()
+; CHECK: declare noalias i32* @f.noalias()
+declare nonnull i32* @f.nonnull()
+; CHECK: declare nonnull i32* @f.nonnull()
+declare dereferenceable(4) i32* @f.dereferenceable4()
+; CHECK: declare dereferenceable(4) i32* @f.dereferenceable4()
+declare dereferenceable(8) i32* @f.dereferenceable8()
+; CHECK: declare dereferenceable(8) i32* @f.dereferenceable8()
+declare dereferenceable(16) i32* @f.dereferenceable16()
+; CHECK: declare dereferenceable(16) i32* @f.dereferenceable16()
+declare dereferenceable_or_null(4) i32* @f.dereferenceable4_or_null()
+; CHECK: declare dereferenceable_or_null(4) i32* @f.dereferenceable4_or_null()
+declare dereferenceable_or_null(8) i32* @f.dereferenceable8_or_null()
+; CHECK: declare dereferenceable_or_null(8) i32* @f.dereferenceable8_or_null()
+declare dereferenceable_or_null(16) i32* @f.dereferenceable16_or_null()
+; CHECK: declare dereferenceable_or_null(16) i32* @f.dereferenceable16_or_null()
+
+; Functions -- Parameter attributes
+declare void @f.param.zeroext(i8 zeroext)
+; CHECK: declare void @f.param.zeroext(i8 zeroext)
+declare void @f.param.signext(i8 signext)
+; CHECK: declare void @f.param.signext(i8 signext)
+declare void @f.param.inreg(i8 inreg)
+; CHECK: declare void @f.param.inreg(i8 inreg)
+declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+declare void @f.param.inalloca(i8* inalloca)
+; CHECK: declare void @f.param.inalloca(i8* inalloca)
+declare void @f.param.sret(i8* sret)
+; CHECK: declare void @f.param.sret(i8* sret)
+declare void @f.param.noalias(i8* noalias)
+; CHECK: declare void @f.param.noalias(i8* noalias)
+declare void @f.param.nocapture(i8* nocapture)
+; CHECK: declare void @f.param.nocapture(i8* nocapture)
+declare void @f.param.nest(i8* nest)
+; CHECK: declare void @f.param.nest(i8* nest)
+declare i8* @f.param.returned(i8* returned)
+; CHECK: declare i8* @f.param.returned(i8* returned)
+declare void @f.param.nonnull(i8* nonnull)
+; CHECK: declare void @f.param.nonnull(i8* nonnull)
+declare void @f.param.dereferenceable(i8* dereferenceable(4))
+; CHECK: declare void @f.param.dereferenceable(i8* dereferenceable(4))
+declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4))
+; CHECK: declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4))
+
+; Functions -- unnamed_addr
+declare void @f.unnamed_addr() unnamed_addr
+; CHECK: declare void @f.unnamed_addr() unnamed_addr
+
+; Functions -- fn Attrs (Function attributes)
+declare void @f.alignstack4() alignstack(4)
+; CHECK: declare void @f.alignstack4() #0
+declare void @f.alignstack8() alignstack(8)
+; CHECK: declare void @f.alignstack8() #1
+declare void @f.alwaysinline() alwaysinline
+; CHECK: declare void @f.alwaysinline() #2
+declare void @f.cold() cold
+; CHECK: declare void @f.cold() #3
+declare void @f.convergent() convergent
+; CHECK: declare void @f.convergent() #4
+declare void @f.inlinehint() inlinehint
+; CHECK: declare void @f.inlinehint() #5
+declare void @f.jumptable() unnamed_addr jumptable
+; CHECK: declare void @f.jumptable() unnamed_addr #6
+declare void @f.minsize() minsize
+; CHECK: declare void @f.minsize() #7
+declare void @f.naked() naked
+; CHECK: declare void @f.naked() #8
+declare void @f.nobuiltin() nobuiltin
+; CHECK: declare void @f.nobuiltin() #9
+declare void @f.noduplicate() noduplicate
+; CHECK: declare void @f.noduplicate() #10
+declare void @f.noimplicitfloat() noimplicitfloat
+; CHECK: declare void @f.noimplicitfloat() #11
+declare void @f.noinline() noinline
+; CHECK: declare void @f.noinline() #12
+declare void @f.nonlazybind() nonlazybind
+; CHECK: declare void @f.nonlazybind() #13
+declare void @f.noredzone() noredzone
+; CHECK: declare void @f.noredzone() #14
+declare void @f.noreturn() noreturn
+; CHECK: declare void @f.noreturn() #15
+declare void @f.nounwind() nounwind
+; CHECK: declare void @f.nounwind() #16
+declare void @f.optnone() noinline optnone
+; CHECK: declare void @f.optnone() #17
+declare void @f.optsize() optsize
+; CHECK: declare void @f.optsize() #18
+declare void @f.readnone() readnone
+; CHECK: declare void @f.readnone() #19
+declare void @f.readonly() readonly
+; CHECK: declare void @f.readonly() #20
+declare void @f.returns_twice() returns_twice
+; CHECK: declare void @f.returns_twice() #21
+declare void @f.safestack() safestack
+; CHECK: declare void @f.safestack() #22
+declare void @f.sanitize_address() sanitize_address
+; CHECK: declare void @f.sanitize_address() #23
+declare void @f.sanitize_memory() sanitize_memory
+; CHECK: declare void @f.sanitize_memory() #24
+declare void @f.sanitize_thread() sanitize_thread
+; CHECK: declare void @f.sanitize_thread() #25
+declare void @f.ssp() ssp
+; CHECK: declare void @f.ssp() #26
+declare void @f.sspreq() sspreq
+; CHECK: declare void @f.sspreq() #27
+declare void @f.sspstrong() sspstrong
+; CHECK: declare void @f.sspstrong() #28
+declare void @f.thunk() "thunk"
+; CHECK: declare void @f.thunk() #29
+declare void @f.uwtable() uwtable
+; CHECK: declare void @f.uwtable() #30
+declare void @f.kvpair() "cpu"="cortex-a8"
+; CHECK:declare void @f.kvpair() #31
+
+; Functions -- section
+declare void @f.section() section "80"
+; CHECK: declare void @f.section() section "80"
+
+; Functions -- comdat
+define void @f.comdat_any() comdat($comdat.any) {
+; CHECK: define void @f.comdat_any() comdat($comdat.any)
+entry:
+ ret void
+}
+define void @f.comdat_exactmatch() comdat($comdat.exactmatch) {
+; CHECK: define void @f.comdat_exactmatch() comdat($comdat.exactmatch)
+entry:
+ ret void
+}
+define void @f.comdat_largest() comdat($comdat.largest) {
+; CHECK: define void @f.comdat_largest() comdat($comdat.largest)
+entry:
+ ret void
+}
+define void @f.comdat_noduplicates() comdat($comdat.noduplicates) {
+; CHECK: define void @f.comdat_noduplicates() comdat($comdat.noduplicates)
+entry:
+ ret void
+}
+define void @f.comdat_samesize() comdat($comdat.samesize) {
+; CHECK: define void @f.comdat_samesize() comdat($comdat.samesize)
+entry:
+ ret void
+}
+
+; Functions -- align
+declare void @f.align2() align 2
+; CHECK: declare void @f.align2() align 2
+declare void @f.align4() align 4
+; CHECK: declare void @f.align4() align 4
+declare void @f.align8() align 8
+; CHECK: declare void @f.align8() align 8
+
+; Functions -- GC
+declare void @f.gcshadow() gc "shadow-stack"
+; CHECK: declare void @f.gcshadow() gc "shadow-stack"
+
+; Functions -- Prefix data
+declare void @f.prefixi32() prefix i32 1684365668
+; CHECK: declare void @f.prefixi32() prefix i32 1684365668
+declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+; CHECK: declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+
+; Functions -- Prologue data
+declare void @f.prologuei32() prologue i32 1684365669
+; CHECK: declare void @f.prologuei32() prologue i32 1684365669
+declare void @f.prologuearray() prologue [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+; CHECK: declare void @f.prologuearray() prologue [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+
+; Functions -- Personality constant
+declare void @llvm.donothing() nounwind readnone
+; CHECK: declare void @llvm.donothing() #32
+define void @f.no_personality() personality i8 3 {
+; CHECK: define void @f.no_personality() personality i8 3
+ invoke void @llvm.donothing() to label %normal unwind label %exception
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+normal:
+ ret void
+}
+
+declare i32 @f.personality_handler()
+; CHECK: declare i32 @f.personality_handler()
+define void @f.personality() personality i32 ()* @f.personality_handler {
+; CHECK: define void @f.personality() personality i32 ()* @f.personality_handler
+ invoke void @llvm.donothing() to label %normal unwind label %exception
+exception:
+ %cleanup = landingpad i32 cleanup
+ br label %normal
+normal:
+ ret void
+}
+
+;; Atomic Memory Ordering Constraints
+define void @atomics(i32* %word) {
+ %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic
+ ; CHECK: %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic
+ %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic
+ ; CHECK: %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic
+ %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic
+ ; CHECK: %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic
+ %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic
+ ; CHECK: %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic
+ %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic
+ ; CHECK: %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic
+ %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic
+ ; CHECK: %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic
+ %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic
+ ; CHECK: %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic
+ %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic
+ ; CHECK: %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic
+ %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+ ; CHECK: %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+ %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
+ ; CHECK: %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
+ %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
+ ; CHECK: %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
+ %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
+ ; CHECK: %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
+ %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
+ ; CHECK: %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
+ %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
+ ; CHECK: %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
+ %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
+ ; CHECK: %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
+ %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
+ ; CHECK: %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
+ %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+ ; CHECK: %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+ %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic
+ ; CHECK: %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic
+ %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic
+ ; CHECK: %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic
+ fence acquire
+ ; CHECK: fence acquire
+ fence release
+ ; CHECK: fence release
+ fence acq_rel
+ ; CHECK: fence acq_rel
+ fence singlethread seq_cst
+ ; CHECK: fence singlethread seq_cst
+
+ %ld.1 = load atomic i32, i32* %word monotonic, align 4
+ ; CHECK: %ld.1 = load atomic i32, i32* %word monotonic, align 4
+ %ld.2 = load atomic volatile i32, i32* %word acquire, align 8
+ ; CHECK: %ld.2 = load atomic volatile i32, i32* %word acquire, align 8
+ %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16
+ ; CHECK: %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16
+
+ store atomic i32 23, i32* %word monotonic, align 4
+ ; CHECK: store atomic i32 23, i32* %word monotonic, align 4
+ store atomic volatile i32 24, i32* %word monotonic, align 4
+ ; CHECK: store atomic volatile i32 24, i32* %word monotonic, align 4
+ store atomic volatile i32 25, i32* %word singlethread monotonic, align 4
+ ; CHECK: store atomic volatile i32 25, i32* %word singlethread monotonic, align 4
+ ret void
+}
+
+;; Fast Math Flags
+define void @fastmathflags(float %op1, float %op2) {
+ %f.nnan = fadd nnan float %op1, %op2
+ ; CHECK: %f.nnan = fadd nnan float %op1, %op2
+ %f.ninf = fadd ninf float %op1, %op2
+ ; CHECK: %f.ninf = fadd ninf float %op1, %op2
+ %f.nsz = fadd nsz float %op1, %op2
+ ; CHECK: %f.nsz = fadd nsz float %op1, %op2
+ %f.arcp = fadd arcp float %op1, %op2
+ ; CHECK: %f.arcp = fadd arcp float %op1, %op2
+ %f.fast = fadd fast float %op1, %op2
+ ; CHECK: %f.fast = fadd fast float %op1, %op2
+ ret void
+}
+
+;; Type System
+%opaquety = type opaque
+define void @typesystem() {
+ %p0 = bitcast i8* null to i32 (i32)*
+ ; CHECK: %p0 = bitcast i8* null to i32 (i32)*
+ %p1 = bitcast i8* null to void (i8*)*
+ ; CHECK: %p1 = bitcast i8* null to void (i8*)*
+ %p2 = bitcast i8* null to i32 (i8*, ...)*
+ ; CHECK: %p2 = bitcast i8* null to i32 (i8*, ...)*
+ %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)*
+ ; CHECK: %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)*
+ %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)*
+ ; CHECK: %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)*
+ %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)*
+ ; CHECK: %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)*
+
+ %t0 = alloca i1942652
+ ; CHECK: %t0 = alloca i1942652
+ %t1 = alloca half
+ ; CHECK: %t1 = alloca half
+ %t2 = alloca float
+ ; CHECK: %t2 = alloca float
+ %t3 = alloca double
+ ; CHECK: %t3 = alloca double
+ %t4 = alloca fp128
+ ; CHECK: %t4 = alloca fp128
+ %t5 = alloca x86_fp80
+ ; CHECK: %t5 = alloca x86_fp80
+ %t6 = alloca ppc_fp128
+ ; CHECK: %t6 = alloca ppc_fp128
+ %t7 = alloca x86_mmx
+ ; CHECK: %t7 = alloca x86_mmx
+ %t8 = alloca %opaquety*
+ ; CHECK: %t8 = alloca %opaquety*
+
+ ret void
+}
+
+;; Inline Assembler Expressions
+define void @inlineasm(i32 %arg) {
+ call i32 asm "bswap $0", "=r,r"(i32 %arg)
+ ; CHECK: call i32 asm "bswap $0", "=r,r"(i32 %arg)
+ call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg)
+ ; CHECK: call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg)
+ ret void
+}
+
+;; Instructions
+
+; Instructions -- Terminators
+define void @instructions.terminators(i8 %val) personality i32 -10 {
+ br i1 false, label %iftrue, label %iffalse
+ ; CHECK: br i1 false, label %iftrue, label %iffalse
+ br label %iftrue
+ ; CHECK: br label %iftrue
+iftrue:
+ ret void
+ ; CHECK: ret void
+iffalse:
+
+ switch i8 %val, label %defaultdest [
+ ; CHECK: switch i8 %val, label %defaultdest [
+ i8 0, label %defaultdest.0
+ ; CHECK: i8 0, label %defaultdest.0
+ i8 1, label %defaultdest.1
+ ; CHECK: i8 1, label %defaultdest.1
+ i8 2, label %defaultdest.2
+ ; CHECK: i8 2, label %defaultdest.2
+ ]
+ ; CHECK: ]
+defaultdest:
+ ret void
+defaultdest.0:
+ ret void
+defaultdest.1:
+ ret void
+defaultdest.2:
+
+ indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2]
+ ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2]
+ indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2]
+ ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2]
+
+ invoke fastcc void @f.fastcc()
+ ; CHECK: invoke fastcc void @f.fastcc()
+ to label %defaultdest unwind label %exc
+ ; CHECK: to label %defaultdest unwind label %exc
+exc:
+ %cleanup = landingpad i32 cleanup
+
+ resume i32 undef
+ ; CHECK: resume i32 undef
+ unreachable
+ ; CHECK: unreachable
+
+ ret void
+}
+
+; Instructions -- Binary Operations
+define void @instructions.binops(i8 %op1, i8 %op2) {
+ ; nuw x nsw
+ add i8 %op1, %op2
+ ; CHECK: add i8 %op1, %op2
+ add nuw i8 %op1, %op2
+ ; CHECK: add nuw i8 %op1, %op2
+ add nsw i8 %op1, %op2
+ ; CHECK: add nsw i8 %op1, %op2
+ add nuw nsw i8 %op1, %op2
+ ; CHECK: add nuw nsw i8 %op1, %op2
+ sub i8 %op1, %op2
+ ; CHECK: sub i8 %op1, %op2
+ sub nuw i8 %op1, %op2
+ ; CHECK: sub nuw i8 %op1, %op2
+ sub nsw i8 %op1, %op2
+ ; CHECK: sub nsw i8 %op1, %op2
+ sub nuw nsw i8 %op1, %op2
+ ; CHECK: sub nuw nsw i8 %op1, %op2
+ mul i8 %op1, %op2
+ ; CHECK: mul i8 %op1, %op2
+ mul nuw i8 %op1, %op2
+ ; CHECK: mul nuw i8 %op1, %op2
+ mul nsw i8 %op1, %op2
+ ; CHECK: mul nsw i8 %op1, %op2
+ mul nuw nsw i8 %op1, %op2
+ ; CHECK: mul nuw nsw i8 %op1, %op2
+
+ ; exact
+ udiv i8 %op1, %op2
+ ; CHECK: udiv i8 %op1, %op2
+ udiv exact i8 %op1, %op2
+ ; CHECK: udiv exact i8 %op1, %op2
+ sdiv i8 %op1, %op2
+ ; CHECK: sdiv i8 %op1, %op2
+ sdiv exact i8 %op1, %op2
+ ; CHECK: sdiv exact i8 %op1, %op2
+
+ ; none
+ urem i8 %op1, %op2
+ ; CHECK: urem i8 %op1, %op2
+ srem i8 %op1, %op2
+ ; CHECK: srem i8 %op1, %op2
+
+ ret void
+}
+
+; Instructions -- Bitwise Binary Operations
+define void @instructions.bitwise_binops(i8 %op1, i8 %op2) {
+ ; nuw x nsw
+ shl i8 %op1, %op2
+ ; CHECK: shl i8 %op1, %op2
+ shl nuw i8 %op1, %op2
+ ; CHECK: shl nuw i8 %op1, %op2
+ shl nsw i8 %op1, %op2
+ ; CHECK: shl nsw i8 %op1, %op2
+ shl nuw nsw i8 %op1, %op2
+ ; CHECK: shl nuw nsw i8 %op1, %op2
+
+ ; exact
+ lshr i8 %op1, %op2
+ ; CHECK: lshr i8 %op1, %op2
+ lshr exact i8 %op1, %op2
+ ; CHECK: lshr exact i8 %op1, %op2
+ ashr i8 %op1, %op2
+ ; CHECK: ashr i8 %op1, %op2
+ ashr exact i8 %op1, %op2
+ ; CHECK: ashr exact i8 %op1, %op2
+
+ ; none
+ and i8 %op1, %op2
+ ; CHECK: and i8 %op1, %op2
+ or i8 %op1, %op2
+ ; CHECK: or i8 %op1, %op2
+ xor i8 %op1, %op2
+ ; CHECK: xor i8 %op1, %op2
+
+ ret void
+}
+
+; Instructions -- Vector Operations
+define void @instructions.vectorops(<4 x float> %vec, <4 x float> %vec2) {
+ extractelement <4 x float> %vec, i8 0
+ ; CHECK: extractelement <4 x float> %vec, i8 0
+ insertelement <4 x float> %vec, float 3.500000e+00, i8 0
+ ; CHECK: insertelement <4 x float> %vec, float 3.500000e+00, i8 0
+ shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer
+ ; CHECK: shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer
+
+ ret void
+}
+
+; Instructions -- Aggregate Operations
+define void @instructions.aggregateops({ i8, i32 } %up, <{ i8, i32 }> %p,
+ [3 x i8] %arr, { i8, { i32 }} %n,
+ <2 x i8*> %pvec, <2 x i64> %offsets) {
+ extractvalue { i8, i32 } %up, 0
+ ; CHECK: extractvalue { i8, i32 } %up, 0
+ extractvalue <{ i8, i32 }> %p, 1
+ ; CHECK: extractvalue <{ i8, i32 }> %p, 1
+ extractvalue [3 x i8] %arr, 2
+ ; CHECK: extractvalue [3 x i8] %arr, 2
+ extractvalue { i8, { i32 } } %n, 1, 0
+ ; CHECK: extractvalue { i8, { i32 } } %n, 1, 0
+
+ insertvalue { i8, i32 } %up, i8 1, 0
+ ; CHECK: insertvalue { i8, i32 } %up, i8 1, 0
+ insertvalue <{ i8, i32 }> %p, i32 2, 1
+ ; CHECK: insertvalue <{ i8, i32 }> %p, i32 2, 1
+ insertvalue [3 x i8] %arr, i8 0, 0
+ ; CHECK: insertvalue [3 x i8] %arr, i8 0, 0
+ insertvalue { i8, { i32 } } %n, i32 0, 1, 0
+ ; CHECK: insertvalue { i8, { i32 } } %n, i32 0, 1, 0
+
+ %up.ptr = alloca { i8, i32 }
+ %p.ptr = alloca <{ i8, i32 }>
+ %arr.ptr = alloca [3 x i8]
+ %n.ptr = alloca { i8, { i32 } }
+
+ getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0
+ ; CHECK: getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0
+ getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1
+ ; CHECK: getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1
+ getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2
+ ; CHECK: getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2
+ getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1
+ ; CHECK: getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1
+ getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0
+ ; CHECK: getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0
+ getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets
+ ; CHECK: getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets
+
+ ret void
+}
+
+; Instructions -- Memory Access and Addressing Operations
+!7 = !{i32 1}
+!8 = !{}
+!9 = !{i64 4}
+define void @instructions.memops(i32** %base) {
+ alloca i32, i8 4, align 4
+ ; CHECK: alloca i32, i8 4, align 4
+ alloca inalloca i32, i8 4, align 4
+ ; CHECK: alloca inalloca i32, i8 4, align 4
+
+ load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+ ; CHECK: load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+ load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+ ; CHECK: load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+
+ store i32* null, i32** %base, align 4, !nontemporal !8
+ ; CHECK: store i32* null, i32** %base, align 4, !nontemporal !8
+ store volatile i32* null, i32** %base, align 4, !nontemporal !8
+ ; CHECK: store volatile i32* null, i32** %base, align 4, !nontemporal !8
+
+ ret void
+}
+
+; Instructions -- Conversion Operations
+define void @instructions.conversions() {
+ trunc i32 -1 to i1
+ ; CHECK: trunc i32 -1 to i1
+ zext i32 -1 to i64
+ ; CHECK: zext i32 -1 to i64
+ sext i32 -1 to i64
+ ; CHECK: sext i32 -1 to i64
+ fptrunc float undef to half
+ ; CHECK: fptrunc float undef to half
+ fpext half undef to float
+ ; CHECK: fpext half undef to float
+ fptoui float undef to i32
+ ; CHECK: fptoui float undef to i32
+ fptosi float undef to i32
+ ; CHECK: fptosi float undef to i32
+ uitofp i32 1 to float
+ ; CHECK: uitofp i32 1 to float
+ sitofp i32 -1 to float
+ ; CHECK: sitofp i32 -1 to float
+ ptrtoint i8* null to i64
+ ; CHECK: ptrtoint i8* null to i64
+ inttoptr i64 0 to i8*
+ ; CHECK: inttoptr i64 0 to i8*
+ bitcast i32 0 to i32
+ ; CHECK: bitcast i32 0 to i32
+ addrspacecast i32* null to i32 addrspace(1)*
+ ; CHECK: addrspacecast i32* null to i32 addrspace(1)*
+
+ ret void
+}
+
+; Instructions -- Other Operations
+define void @instructions.other(i32 %op1, i32 %op2, half %fop1, half %fop2) {
+entry:
+ icmp eq i32 %op1, %op2
+ ; CHECK: icmp eq i32 %op1, %op2
+ icmp ne i32 %op1, %op2
+ ; CHECK: icmp ne i32 %op1, %op2
+ icmp ugt i32 %op1, %op2
+ ; CHECK: icmp ugt i32 %op1, %op2
+ icmp uge i32 %op1, %op2
+ ; CHECK: icmp uge i32 %op1, %op2
+ icmp ult i32 %op1, %op2
+ ; CHECK: icmp ult i32 %op1, %op2
+ icmp ule i32 %op1, %op2
+ ; CHECK: icmp ule i32 %op1, %op2
+ icmp sgt i32 %op1, %op2
+ ; CHECK: icmp sgt i32 %op1, %op2
+ icmp sge i32 %op1, %op2
+ ; CHECK: icmp sge i32 %op1, %op2
+ icmp slt i32 %op1, %op2
+ ; CHECK: icmp slt i32 %op1, %op2
+ icmp sle i32 %op1, %op2
+ ; CHECK: icmp sle i32 %op1, %op2
+
+ fcmp false half %fop1, %fop2
+ ; CHECK: fcmp false half %fop1, %fop2
+ fcmp oeq half %fop1, %fop2
+ ; CHECK: fcmp oeq half %fop1, %fop2
+ fcmp ogt half %fop1, %fop2
+ ; CHECK: fcmp ogt half %fop1, %fop2
+ fcmp oge half %fop1, %fop2
+ ; CHECK: fcmp oge half %fop1, %fop2
+ fcmp olt half %fop1, %fop2
+ ; CHECK: fcmp olt half %fop1, %fop2
+ fcmp ole half %fop1, %fop2
+ ; CHECK: fcmp ole half %fop1, %fop2
+ fcmp one half %fop1, %fop2
+ ; CHECK: fcmp one half %fop1, %fop2
+ fcmp ord half %fop1, %fop2
+ ; CHECK: fcmp ord half %fop1, %fop2
+ fcmp ueq half %fop1, %fop2
+ ; CHECK: fcmp ueq half %fop1, %fop2
+ fcmp ugt half %fop1, %fop2
+ ; CHECK: fcmp ugt half %fop1, %fop2
+ fcmp uge half %fop1, %fop2
+ ; CHECK: fcmp uge half %fop1, %fop2
+ fcmp ult half %fop1, %fop2
+ ; CHECK: fcmp ult half %fop1, %fop2
+ fcmp ule half %fop1, %fop2
+ ; CHECK: fcmp ule half %fop1, %fop2
+ fcmp une half %fop1, %fop2
+ ; CHECK: fcmp une half %fop1, %fop2
+ fcmp uno half %fop1, %fop2
+ ; CHECK: fcmp uno half %fop1, %fop2
+ fcmp true half %fop1, %fop2
+ ; CHECK: fcmp true half %fop1, %fop2
+
+ br label %exit
+L1:
+ %v1 = add i32 %op1, %op2
+ br label %exit
+L2:
+ %v2 = add i32 %op1, %op2
+ br label %exit
+exit:
+ phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ]
+ ; CHECK: phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ]
+
+ select i1 true, i32 0, i32 1
+ ; CHECK: select i1 true, i32 0, i32 1
+ select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
+ ; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
+
+ call void @f.nobuiltin() builtin
+ ; CHECK: call void @f.nobuiltin() #36
+
+ call fastcc noalias i32* @f.noalias() noinline
+ ; CHECK: call fastcc noalias i32* @f.noalias() #12
+ tail call ghccc nonnull i32* @f.nonnull() minsize
+ ; CHECK: tail call ghccc nonnull i32* @f.nonnull() #7
+
+ ret void
+}
+
+define void @instructions.call_musttail(i8* inalloca %val) {
+ musttail call void @f.param.inalloca(i8* inalloca %val)
+ ; CHECK: musttail call void @f.param.inalloca(i8* inalloca %val)
+
+ ret void
+}
+
+define void @instructions.landingpad() personality i32 -2 {
+ invoke void @llvm.donothing() to label %proceed unwind label %catch1
+ invoke void @llvm.donothing() to label %proceed unwind label %catch2
+ invoke void @llvm.donothing() to label %proceed unwind label %catch3
+ invoke void @llvm.donothing() to label %proceed unwind label %catch4
+
+catch1:
+ landingpad i32
+ ; CHECK: landingpad i32
+ cleanup
+ ; CHECK: cleanup
+ br label %proceed
+
+catch2:
+ landingpad i32
+ ; CHECK: landingpad i32
+ cleanup
+ ; CHECK: cleanup
+ catch i32* null
+ ; CHECK: catch i32* null
+ br label %proceed
+
+catch3:
+ landingpad i32
+ ; CHECK: landingpad i32
+ cleanup
+ ; CHECK: cleanup
+ catch i32* null
+ ; CHECK: catch i32* null
+ catch i32* null
+ ; CHECK: catch i32* null
+ br label %proceed
+
+catch4:
+ landingpad i32
+ ; CHECK: landingpad i32
+ filter [2 x i32] zeroinitializer
+ ; CHECK: filter [2 x i32] zeroinitializer
+ br label %proceed
+
+proceed:
+ ret void
+}
+
+;; Intrinsic Functions
+
+; Intrinsic Functions -- Variable Argument Handling
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
+define void @instructions.va_arg(i8* %v, ...) {
+ %ap = alloca i8*
+ %ap2 = bitcast i8** %ap to i8*
+
+ call void @llvm.va_start(i8* %ap2)
+ ; CHECK: call void @llvm.va_start(i8* %ap2)
+
+ va_arg i8* %ap2, i32
+ ; CHECK: va_arg i8* %ap2, i32
+
+ call void @llvm.va_copy(i8* %v, i8* %ap2)
+ ; CHECK: call void @llvm.va_copy(i8* %v, i8* %ap2)
+
+ call void @llvm.va_end(i8* %ap2)
+ ; CHECK: call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+; Intrinsic Functions -- Accurate Garbage Collection
+declare void @llvm.gcroot(i8**, i8*)
+declare i8* @llvm.gcread(i8*, i8**)
+declare void @llvm.gcwrite(i8*, i8*, i8**)
+define void @intrinsics.gc() gc "shadow-stack" {
+ %ptrloc = alloca i8*
+ call void @llvm.gcroot(i8** %ptrloc, i8* null)
+ ; CHECK: call void @llvm.gcroot(i8** %ptrloc, i8* null)
+
+ call i8* @llvm.gcread(i8* null, i8** %ptrloc)
+ ; CHECK: call i8* @llvm.gcread(i8* null, i8** %ptrloc)
+
+ %ref = alloca i8
+ call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc)
+ ; CHECK: call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc)
+
+ ret void
+}
+
+; Intrinsic Functions -- Code Generation
+declare i8* @llvm.returnaddress(i32)
+declare i8* @llvm.frameaddress(i32)
+declare i32 @llvm.read_register.i32(metadata)
+declare i64 @llvm.read_register.i64(metadata)
+declare void @llvm.write_register.i32(metadata, i32)
+declare void @llvm.write_register.i64(metadata, i64)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+declare void @llvm.pcmarker(i32)
+declare i64 @llvm.readcyclecounter()
+declare void @llvm.clear_cache(i8*, i8*)
+declare void @llvm.instrprof_increment(i8*, i64, i32, i32)
+
+!10 = !{!"rax"}
+define void @intrinsics.codegen() {
+ call i8* @llvm.returnaddress(i32 1)
+ ; CHECK: call i8* @llvm.returnaddress(i32 1)
+ call i8* @llvm.frameaddress(i32 1)
+ ; CHECK: call i8* @llvm.frameaddress(i32 1)
+
+ call i32 @llvm.read_register.i32(metadata !10)
+ ; CHECK: call i32 @llvm.read_register.i32(metadata !10)
+ call i64 @llvm.read_register.i64(metadata !10)
+ ; CHECK: call i64 @llvm.read_register.i64(metadata !10)
+ call void @llvm.write_register.i32(metadata !10, i32 0)
+ ; CHECK: call void @llvm.write_register.i32(metadata !10, i32 0)
+ call void @llvm.write_register.i64(metadata !10, i64 0)
+ ; CHECK: call void @llvm.write_register.i64(metadata !10, i64 0)
+
+ %stack = call i8* @llvm.stacksave()
+ ; CHECK: %stack = call i8* @llvm.stacksave()
+ call void @llvm.stackrestore(i8* %stack)
+ ; CHECK: call void @llvm.stackrestore(i8* %stack)
+
+ call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
+ ; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
+
+ call void @llvm.pcmarker(i32 1)
+ ; CHECK: call void @llvm.pcmarker(i32 1)
+
+ call i64 @llvm.readcyclecounter()
+ ; CHECK: call i64 @llvm.readcyclecounter()
+
+ call void @llvm.clear_cache(i8* null, i8* null)
+ ; CHECK: call void @llvm.clear_cache(i8* null, i8* null)
+
+ call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0)
+ ; CHECK: call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0)
+
+ ret void
+}
+
+declare void @llvm.localescape(...)
+declare i8* @llvm.localrecover(i8* %func, i8* %fp, i32 %idx)
+define void @intrinsics.localescape() {
+ %static.alloca = alloca i32
+ call void (...) @llvm.localescape(i32* %static.alloca)
+ ; CHECK: call void (...) @llvm.localescape(i32* %static.alloca)
+
+ call void @intrinsics.localrecover()
+
+ ret void
+}
+define void @intrinsics.localrecover() {
+ %func = bitcast void ()* @intrinsics.localescape to i8*
+ %fp = call i8* @llvm.frameaddress(i32 1)
+ call i8* @llvm.localrecover(i8* %func, i8* %fp, i32 0)
+ ; CHECK: call i8* @llvm.localrecover(i8* %func, i8* %fp, i32 0)
+
+ ret void
+}
+
+; We need this function to provide `uses' for some metadata tests.
+define void @misc.metadata() {
+ call void @f1(), !srcloc !11
+ call void @f1(), !srcloc !12
+ call void @f1(), !srcloc !13
+ call void @f1(), !srcloc !14
+ ret void
+}
+
+; CHECK: attributes #0 = { alignstack=4 }
+; CHECK: attributes #1 = { alignstack=8 }
+; CHECK: attributes #2 = { alwaysinline }
+; CHECK: attributes #3 = { cold }
+; CHECK: attributes #4 = { convergent }
+; CHECK: attributes #5 = { inlinehint }
+; CHECK: attributes #6 = { jumptable }
+; CHECK: attributes #7 = { minsize }
+; CHECK: attributes #8 = { naked }
+; CHECK: attributes #9 = { nobuiltin }
+; CHECK: attributes #10 = { noduplicate }
+; CHECK: attributes #11 = { noimplicitfloat }
+; CHECK: attributes #12 = { noinline }
+; CHECK: attributes #13 = { nonlazybind }
+; CHECK: attributes #14 = { noredzone }
+; CHECK: attributes #15 = { noreturn }
+; CHECK: attributes #16 = { nounwind }
+; CHECK: attributes #17 = { noinline optnone }
+; CHECK: attributes #18 = { optsize }
+; CHECK: attributes #19 = { readnone }
+; CHECK: attributes #20 = { readonly }
+; CHECK: attributes #21 = { returns_twice }
+; CHECK: attributes #22 = { safestack }
+; CHECK: attributes #23 = { sanitize_address }
+; CHECK: attributes #24 = { sanitize_memory }
+; CHECK: attributes #25 = { sanitize_thread }
+; CHECK: attributes #26 = { ssp }
+; CHECK: attributes #27 = { sspreq }
+; CHECK: attributes #28 = { sspstrong }
+; CHECK: attributes #29 = { "thunk" }
+; CHECK: attributes #30 = { uwtable }
+; CHECK: attributes #31 = { "cpu"="cortex-a8" }
+; CHECK: attributes #32 = { nounwind readnone }
+; CHECK: attributes #33 = { argmemonly nounwind readonly }
+; CHECK: attributes #34 = { argmemonly nounwind }
+; CHECK: attributes #35 = { nounwind readonly }
+; CHECK: attributes #36 = { builtin }
+
+;; Metadata
+
+; Metadata -- Module flags
+!llvm.module.flags = !{!0, !1, !2, !4, !5, !6}
+; CHECK: !llvm.module.flags = !{!0, !1, !2, !4, !5, !6}
+
+!0 = !{i32 1, !"mod1", i32 0}
+; CHECK: !0 = !{i32 1, !"mod1", i32 0}
+!1 = !{i32 2, !"mod2", i32 0}
+; CHECK: !1 = !{i32 2, !"mod2", i32 0}
+!2 = !{i32 3, !"mod3", !3}
+; CHECK: !2 = !{i32 3, !"mod3", !3}
+!3 = !{!"mod6", !0}
+; CHECK: !3 = !{!"mod6", !0}
+!4 = !{i32 4, !"mod4", i32 0}
+; CHECK: !4 = !{i32 4, !"mod4", i32 0}
+!5 = !{i32 5, !"mod5", !0}
+; CHECK: !5 = !{i32 5, !"mod5", !0}
+!6 = !{i32 6, !"mod6", !0}
+; CHECK: !6 = !{i32 6, !"mod6", !0}
+
+; Metadata -- Check `distinct'
+!11 = distinct !{}
+; CHECK: !11 = distinct !{}
+!12 = distinct !{}
+; CHECK: !12 = distinct !{}
+!13 = !{!11}
+; CHECK: !13 = !{!11}
+!14 = !{!12}
+; CHECK: !14 = !{!12}
diff --git a/test/Bitcode/compatibility-3.7.ll.bc b/test/Bitcode/compatibility-3.7.ll.bc
new file mode 100644
index 000000000000..14c0f1a6d6f1
--- /dev/null
+++ b/test/Bitcode/compatibility-3.7.ll.bc
Binary files differ
diff --git a/test/Bitcode/compatibility.ll b/test/Bitcode/compatibility.ll
new file mode 100644
index 000000000000..31e501de0a11
--- /dev/null
+++ b/test/Bitcode/compatibility.ll
@@ -0,0 +1,1560 @@
+; Bitcode compatibility test for llvm
+;
+; Please update this file when making any IR changes. Information on the
+; release process for this file is available here:
+;
+; http://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility
+
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN-PR24755: verify-uselistorder < %s
+
+target datalayout = "E"
+; CHECK: target datalayout = "E"
+
+target triple = "x86_64-apple-macosx10.10.0"
+; CHECK: target triple = "x86_64-apple-macosx10.10.0"
+
+;; Module-level assembly
+module asm "beep boop"
+; CHECK: module asm "beep boop"
+
+;; Comdats
+$comdat.any = comdat any
+; CHECK: $comdat.any = comdat any
+$comdat.exactmatch = comdat exactmatch
+; CHECK: $comdat.exactmatch = comdat exactmatch
+$comdat.largest = comdat largest
+; CHECK: $comdat.largest = comdat largest
+$comdat.noduplicates = comdat noduplicates
+; CHECK: $comdat.noduplicates = comdat noduplicates
+$comdat.samesize = comdat samesize
+; CHECK: $comdat.samesize = comdat samesize
+
+;; Constants
+@const.true = constant i1 true
+; CHECK: @const.true = constant i1 true
+@const.false = constant i1 false
+; CHECK: @const.false = constant i1 false
+@const.int = constant i32 zeroinitializer
+; CHECK: @const.int = constant i32 0
+@const.float = constant double 0.0
+; CHECK: @const.float = constant double 0.0
+@const.null = constant i8* null
+; CHECK: @const.null = constant i8* null
+%const.struct.type = type { i32, i8 }
+%const.struct.type.packed = type <{ i32, i8 }>
+@const.struct = constant %const.struct.type { i32 -1, i8 undef }
+; CHECK: @const.struct = constant %const.struct.type { i32 -1, i8 undef }
+@const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
+; CHECK: @const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
+@const.array = constant [2 x i32] [i32 -3, i32 -4]
+; CHECK: @const.array = constant [2 x i32] [i32 -3, i32 -4]
+@const.vector = constant <2 x i32> <i32 -5, i32 -6>
+; CHECK: @const.vector = constant <2 x i32> <i32 -5, i32 -6>
+
+;; Global Variables
+; Format: [@<GlobalVarName> =] [Linkage] [Visibility] [DLLStorageClass]
+; [ThreadLocal] [unnamed_addr] [AddrSpace] [ExternallyInitialized]
+; <global | constant> <Type> [<InitializerConstant>]
+; [, section "name"] [, comdat [($name)]] [, align <Alignment>]
+
+; Global Variables -- Simple
+@g1 = global i32 0
+; CHECK: @g1 = global i32 0
+@g2 = constant i32 0
+; CHECK: @g2 = constant i32 0
+
+; Global Variables -- Linkage
+@g.private = private global i32 0
+; CHECK: @g.private = private global i32 0
+@g.internal = internal global i32 0
+; CHECK: @g.internal = internal global i32 0
+@g.available_externally = available_externally global i32 0
+; CHECK: @g.available_externally = available_externally global i32 0
+@g.linkonce = linkonce global i32 0
+; CHECK: @g.linkonce = linkonce global i32 0
+@g.weak = weak global i32 0
+; CHECK: @g.weak = weak global i32 0
+@g.common = common global i32 0
+; CHECK: @g.common = common global i32 0
+@g.appending = appending global [4 x i8] c"test"
+; CHECK: @g.appending = appending global [4 x i8] c"test"
+@g.extern_weak = extern_weak global i32
+; CHECK: @g.extern_weak = extern_weak global i32
+@g.linkonce_odr = linkonce_odr global i32 0
+; CHECK: @g.linkonce_odr = linkonce_odr global i32 0
+@g.weak_odr = weak_odr global i32 0
+; CHECK: @g.weak_odr = weak_odr global i32 0
+@g.external = external global i32
+; CHECK: @g.external = external global i32
+
+; Global Variables -- Visibility
+@g.default = default global i32 0
+; CHECK: @g.default = global i32 0
+@g.hidden = hidden global i32 0
+; CHECK: @g.hidden = hidden global i32 0
+@g.protected = protected global i32 0
+; CHECK: @g.protected = protected global i32 0
+
+; Global Variables -- DLLStorageClass
+@g.dlldefault = default global i32 0
+; CHECK: @g.dlldefault = global i32 0
+@g.dllimport = external dllimport global i32
+; CHECK: @g.dllimport = external dllimport global i32
+@g.dllexport = dllexport global i32 0
+; CHECK: @g.dllexport = dllexport global i32 0
+
+; Global Variables -- ThreadLocal
+@g.notthreadlocal = global i32 0
+; CHECK: @g.notthreadlocal = global i32 0
+@g.generaldynamic = thread_local global i32 0
+; CHECK: @g.generaldynamic = thread_local global i32 0
+@g.localdynamic = thread_local(localdynamic) global i32 0
+; CHECK: @g.localdynamic = thread_local(localdynamic) global i32 0
+@g.initialexec = thread_local(initialexec) global i32 0
+; CHECK: @g.initialexec = thread_local(initialexec) global i32 0
+@g.localexec = thread_local(localexec) global i32 0
+; CHECK: @g.localexec = thread_local(localexec) global i32 0
+
+; Global Variables -- unnamed_addr
+@g.unnamed_addr = unnamed_addr global i32 0
+; CHECK: @g.unnamed_addr = unnamed_addr global i32 0
+
+; Global Variables -- AddrSpace
+@g.addrspace = addrspace(1) global i32 0
+; CHECK: @g.addrspace = addrspace(1) global i32 0
+
+; Global Variables -- ExternallyInitialized
+@g.externally_initialized = external externally_initialized global i32
+; CHECK: @g.externally_initialized = external externally_initialized global i32
+
+; Global Variables -- section
+@g.section = global i32 0, section "_DATA"
+; CHECK: @g.section = global i32 0, section "_DATA"
+
+; Global Variables -- comdat
+@comdat.any = global i32 0, comdat
+; CHECK: @comdat.any = global i32 0, comdat
+@comdat.exactmatch = global i32 0, comdat
+; CHECK: @comdat.exactmatch = global i32 0, comdat
+@comdat.largest = global i32 0, comdat
+; CHECK: @comdat.largest = global i32 0, comdat
+@comdat.noduplicates = global i32 0, comdat
+; CHECK: @comdat.noduplicates = global i32 0, comdat
+@comdat.samesize = global i32 0, comdat
+; CHECK: @comdat.samesize = global i32 0, comdat
+
+; Force two globals from different comdats into sections with the same name.
+$comdat1 = comdat any
+$comdat2 = comdat any
+@g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1)
+; CHECK: @g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1)
+@g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2)
+; CHECK: @g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2)
+
+; Global Variables -- align
+@g.align = global i32 0, align 4
+; CHECK: @g.align = global i32 0, align 4
+
+; Global Variables -- Intrinsics
+%pri.func.data = type { i32, void ()*, i8* }
+@g.used1 = global i32 0
+@g.used2 = global i32 0
+@g.used3 = global i8 0
+declare void @g.f1()
+@llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata"
+; CHECK: @llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata"
+@llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata"
+@llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+; CHECK: @llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+@llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+; CHECK: @llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+
+;; Aliases
+; Format: @<Name> = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal]
+; [unnamed_addr] alias <AliaseeTy> @<Aliasee>
+
+; Aliases -- Linkage
+@a.private = private alias i32, i32* @g.private
+; CHECK: @a.private = private alias i32, i32* @g.private
+@a.internal = internal alias i32, i32* @g.internal
+; CHECK: @a.internal = internal alias i32, i32* @g.internal
+@a.linkonce = linkonce alias i32, i32* @g.linkonce
+; CHECK: @a.linkonce = linkonce alias i32, i32* @g.linkonce
+@a.weak = weak alias i32, i32* @g.weak
+; CHECK: @a.weak = weak alias i32, i32* @g.weak
+@a.linkonce_odr = linkonce_odr alias i32, i32* @g.linkonce_odr
+; CHECK: @a.linkonce_odr = linkonce_odr alias i32, i32* @g.linkonce_odr
+@a.weak_odr = weak_odr alias i32, i32* @g.weak_odr
+; CHECK: @a.weak_odr = weak_odr alias i32, i32* @g.weak_odr
+@a.external = external alias i32, i32* @g1
+; CHECK: @a.external = alias i32, i32* @g1
+
+; Aliases -- Visibility
+@a.default = default alias i32, i32* @g.default
+; CHECK: @a.default = alias i32, i32* @g.default
+@a.hidden = hidden alias i32, i32* @g.hidden
+; CHECK: @a.hidden = hidden alias i32, i32* @g.hidden
+@a.protected = protected alias i32, i32* @g.protected
+; CHECK: @a.protected = protected alias i32, i32* @g.protected
+
+; Aliases -- DLLStorageClass
+@a.dlldefault = default alias i32, i32* @g.dlldefault
+; CHECK: @a.dlldefault = alias i32, i32* @g.dlldefault
+@a.dllimport = dllimport alias i32, i32* @g1
+; CHECK: @a.dllimport = dllimport alias i32, i32* @g1
+@a.dllexport = dllexport alias i32, i32* @g.dllexport
+; CHECK: @a.dllexport = dllexport alias i32, i32* @g.dllexport
+
+; Aliases -- ThreadLocal
+@a.notthreadlocal = alias i32, i32* @g.notthreadlocal
+; CHECK: @a.notthreadlocal = alias i32, i32* @g.notthreadlocal
+@a.generaldynamic = thread_local alias i32, i32* @g.generaldynamic
+; CHECK: @a.generaldynamic = thread_local alias i32, i32* @g.generaldynamic
+@a.localdynamic = thread_local(localdynamic) alias i32, i32* @g.localdynamic
+; CHECK: @a.localdynamic = thread_local(localdynamic) alias i32, i32* @g.localdynamic
+@a.initialexec = thread_local(initialexec) alias i32, i32* @g.initialexec
+; CHECK: @a.initialexec = thread_local(initialexec) alias i32, i32* @g.initialexec
+@a.localexec = thread_local(localexec) alias i32, i32* @g.localexec
+; CHECK: @a.localexec = thread_local(localexec) alias i32, i32* @g.localexec
+
+; Aliases -- unnamed_addr
+@a.unnamed_addr = unnamed_addr alias i32, i32* @g.unnamed_addr
+; CHECK: @a.unnamed_addr = unnamed_addr alias i32, i32* @g.unnamed_addr
+
+;; Functions
+; Format: define [linkage] [visibility] [DLLStorageClass]
+; [cconv] [ret attrs]
+; <ResultType> @<FunctionName> ([argument list])
+; [unnamed_addr] [fn Attrs] [section "name"] [comdat [($name)]]
+; [align N] [gc] [prefix Constant] [prologue Constant]
+; [personality Constant] { ... }
+
+; Functions -- Simple
+declare void @f1 ()
+; CHECK: declare void @f1()
+
+define void @f2 () {
+; CHECK: define void @f2()
+entry:
+ ret void
+}
+
+; Functions -- linkage
+define private void @f.private() {
+; CHECK: define private void @f.private()
+entry:
+ ret void
+}
+define internal void @f.internal() {
+; CHECK: define internal void @f.internal()
+entry:
+ ret void
+}
+define available_externally void @f.available_externally() {
+; CHECK: define available_externally void @f.available_externally()
+entry:
+ ret void
+}
+define linkonce void @f.linkonce() {
+; CHECK: define linkonce void @f.linkonce()
+entry:
+ ret void
+}
+define weak void @f.weak() {
+; CHECK: define weak void @f.weak()
+entry:
+ ret void
+}
+define linkonce_odr void @f.linkonce_odr() {
+; CHECK: define linkonce_odr void @f.linkonce_odr()
+entry:
+ ret void
+}
+define weak_odr void @f.weak_odr() {
+; CHECK: define weak_odr void @f.weak_odr()
+entry:
+ ret void
+}
+declare external void @f.external()
+; CHECK: declare void @f.external()
+declare extern_weak void @f.extern_weak()
+; CHECK: declare extern_weak void @f.extern_weak()
+
+; Functions -- visibility
+declare default void @f.default()
+; CHECK: declare void @f.default()
+declare hidden void @f.hidden()
+; CHECK: declare hidden void @f.hidden()
+declare protected void @f.protected()
+; CHECK: declare protected void @f.protected()
+
+; Functions -- DLLStorageClass
+declare dllimport void @f.dllimport()
+; CHECK: declare dllimport void @f.dllimport()
+declare dllexport void @f.dllexport()
+; CHECK: declare dllexport void @f.dllexport()
+
+; Functions -- cconv (Calling conventions)
+declare ccc void @f.ccc()
+; CHECK: declare void @f.ccc()
+declare fastcc void @f.fastcc()
+; CHECK: declare fastcc void @f.fastcc()
+declare coldcc void @f.coldcc()
+; CHECK: declare coldcc void @f.coldcc()
+declare cc10 void @f.cc10()
+; CHECK: declare ghccc void @f.cc10()
+declare ghccc void @f.ghccc()
+; CHECK: declare ghccc void @f.ghccc()
+declare cc11 void @f.cc11()
+; CHECK: declare cc11 void @f.cc11()
+declare webkit_jscc void @f.webkit_jscc()
+; CHECK: declare webkit_jscc void @f.webkit_jscc()
+declare anyregcc void @f.anyregcc()
+; CHECK: declare anyregcc void @f.anyregcc()
+declare preserve_mostcc void @f.preserve_mostcc()
+; CHECK: declare preserve_mostcc void @f.preserve_mostcc()
+declare preserve_allcc void @f.preserve_allcc()
+; CHECK: declare preserve_allcc void @f.preserve_allcc()
+declare cc64 void @f.cc64()
+; CHECK: declare x86_stdcallcc void @f.cc64()
+declare x86_stdcallcc void @f.x86_stdcallcc()
+; CHECK: declare x86_stdcallcc void @f.x86_stdcallcc()
+declare cc65 void @f.cc65()
+; CHECK: declare x86_fastcallcc void @f.cc65()
+declare x86_fastcallcc void @f.x86_fastcallcc()
+; CHECK: declare x86_fastcallcc void @f.x86_fastcallcc()
+declare cc66 void @f.cc66()
+; CHECK: declare arm_apcscc void @f.cc66()
+declare arm_apcscc void @f.arm_apcscc()
+; CHECK: declare arm_apcscc void @f.arm_apcscc()
+declare cc67 void @f.cc67()
+; CHECK: declare arm_aapcscc void @f.cc67()
+declare arm_aapcscc void @f.arm_aapcscc()
+; CHECK: declare arm_aapcscc void @f.arm_aapcscc()
+declare cc68 void @f.cc68()
+; CHECK: declare arm_aapcs_vfpcc void @f.cc68()
+declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc()
+; CHECK: declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc()
+declare cc69 void @f.cc69()
+; CHECK: declare msp430_intrcc void @f.cc69()
+declare msp430_intrcc void @f.msp430_intrcc()
+; CHECK: declare msp430_intrcc void @f.msp430_intrcc()
+declare cc70 void @f.cc70()
+; CHECK: declare x86_thiscallcc void @f.cc70()
+declare x86_thiscallcc void @f.x86_thiscallcc()
+; CHECK: declare x86_thiscallcc void @f.x86_thiscallcc()
+declare cc71 void @f.cc71()
+; CHECK: declare ptx_kernel void @f.cc71()
+declare ptx_kernel void @f.ptx_kernel()
+; CHECK: declare ptx_kernel void @f.ptx_kernel()
+declare cc72 void @f.cc72()
+; CHECK: declare ptx_device void @f.cc72()
+declare ptx_device void @f.ptx_device()
+; CHECK: declare ptx_device void @f.ptx_device()
+declare cc75 void @f.cc75()
+; CHECK: declare spir_func void @f.cc75()
+declare spir_func void @f.spir_func()
+; CHECK: declare spir_func void @f.spir_func()
+declare cc76 void @f.cc76()
+; CHECK: declare spir_kernel void @f.cc76()
+declare spir_kernel void @f.spir_kernel()
+; CHECK: declare spir_kernel void @f.spir_kernel()
+declare cc77 void @f.cc77()
+; CHECK: declare intel_ocl_bicc void @f.cc77()
+declare intel_ocl_bicc void @f.intel_ocl_bicc()
+; CHECK: declare intel_ocl_bicc void @f.intel_ocl_bicc()
+declare cc78 void @f.cc78()
+; CHECK: declare x86_64_sysvcc void @f.cc78()
+declare x86_64_sysvcc void @f.x86_64_sysvcc()
+; CHECK: declare x86_64_sysvcc void @f.x86_64_sysvcc()
+declare cc79 void @f.cc79()
+; CHECK: declare x86_64_win64cc void @f.cc79()
+declare x86_64_win64cc void @f.x86_64_win64cc()
+; CHECK: declare x86_64_win64cc void @f.x86_64_win64cc()
+declare cc80 void @f.cc80()
+; CHECK: declare x86_vectorcallcc void @f.cc80()
+declare x86_vectorcallcc void @f.x86_vectorcallcc()
+; CHECK: declare x86_vectorcallcc void @f.x86_vectorcallcc()
+declare cc1023 void @f.cc1023()
+; CHECK: declare cc1023 void @f.cc1023()
+
+; Functions -- ret attrs (Return attributes)
+declare zeroext i64 @f.zeroext()
+; CHECK: declare zeroext i64 @f.zeroext()
+declare signext i64 @f.signext()
+; CHECK: declare signext i64 @f.signext()
+declare inreg i32* @f.inreg()
+; CHECK: declare inreg i32* @f.inreg()
+declare noalias i32* @f.noalias()
+; CHECK: declare noalias i32* @f.noalias()
+declare nonnull i32* @f.nonnull()
+; CHECK: declare nonnull i32* @f.nonnull()
+declare dereferenceable(4) i32* @f.dereferenceable4()
+; CHECK: declare dereferenceable(4) i32* @f.dereferenceable4()
+declare dereferenceable(8) i32* @f.dereferenceable8()
+; CHECK: declare dereferenceable(8) i32* @f.dereferenceable8()
+declare dereferenceable(16) i32* @f.dereferenceable16()
+; CHECK: declare dereferenceable(16) i32* @f.dereferenceable16()
+declare dereferenceable_or_null(4) i32* @f.dereferenceable4_or_null()
+; CHECK: declare dereferenceable_or_null(4) i32* @f.dereferenceable4_or_null()
+declare dereferenceable_or_null(8) i32* @f.dereferenceable8_or_null()
+; CHECK: declare dereferenceable_or_null(8) i32* @f.dereferenceable8_or_null()
+declare dereferenceable_or_null(16) i32* @f.dereferenceable16_or_null()
+; CHECK: declare dereferenceable_or_null(16) i32* @f.dereferenceable16_or_null()
+
+; Functions -- Parameter attributes
+declare void @f.param.zeroext(i8 zeroext)
+; CHECK: declare void @f.param.zeroext(i8 zeroext)
+declare void @f.param.signext(i8 signext)
+; CHECK: declare void @f.param.signext(i8 signext)
+declare void @f.param.inreg(i8 inreg)
+; CHECK: declare void @f.param.inreg(i8 inreg)
+declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+declare void @f.param.inalloca(i8* inalloca)
+; CHECK: declare void @f.param.inalloca(i8* inalloca)
+declare void @f.param.sret(i8* sret)
+; CHECK: declare void @f.param.sret(i8* sret)
+declare void @f.param.noalias(i8* noalias)
+; CHECK: declare void @f.param.noalias(i8* noalias)
+declare void @f.param.nocapture(i8* nocapture)
+; CHECK: declare void @f.param.nocapture(i8* nocapture)
+declare void @f.param.nest(i8* nest)
+; CHECK: declare void @f.param.nest(i8* nest)
+declare i8* @f.param.returned(i8* returned)
+; CHECK: declare i8* @f.param.returned(i8* returned)
+declare void @f.param.nonnull(i8* nonnull)
+; CHECK: declare void @f.param.nonnull(i8* nonnull)
+declare void @f.param.dereferenceable(i8* dereferenceable(4))
+; CHECK: declare void @f.param.dereferenceable(i8* dereferenceable(4))
+declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4))
+; CHECK: declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4))
+
+; Functions -- unnamed_addr
+declare void @f.unnamed_addr() unnamed_addr
+; CHECK: declare void @f.unnamed_addr() unnamed_addr
+
+; Functions -- fn Attrs (Function attributes)
+declare void @f.alignstack4() alignstack(4)
+; CHECK: declare void @f.alignstack4() #0
+declare void @f.alignstack8() alignstack(8)
+; CHECK: declare void @f.alignstack8() #1
+declare void @f.alwaysinline() alwaysinline
+; CHECK: declare void @f.alwaysinline() #2
+declare void @f.cold() cold
+; CHECK: declare void @f.cold() #3
+declare void @f.convergent() convergent
+; CHECK: declare void @f.convergent() #4
+declare void @f.inlinehint() inlinehint
+; CHECK: declare void @f.inlinehint() #5
+declare void @f.jumptable() unnamed_addr jumptable
+; CHECK: declare void @f.jumptable() unnamed_addr #6
+declare void @f.minsize() minsize
+; CHECK: declare void @f.minsize() #7
+declare void @f.naked() naked
+; CHECK: declare void @f.naked() #8
+declare void @f.nobuiltin() nobuiltin
+; CHECK: declare void @f.nobuiltin() #9
+declare void @f.noduplicate() noduplicate
+; CHECK: declare void @f.noduplicate() #10
+declare void @f.noimplicitfloat() noimplicitfloat
+; CHECK: declare void @f.noimplicitfloat() #11
+declare void @f.noinline() noinline
+; CHECK: declare void @f.noinline() #12
+declare void @f.nonlazybind() nonlazybind
+; CHECK: declare void @f.nonlazybind() #13
+declare void @f.noredzone() noredzone
+; CHECK: declare void @f.noredzone() #14
+declare void @f.noreturn() noreturn
+; CHECK: declare void @f.noreturn() #15
+declare void @f.nounwind() nounwind
+; CHECK: declare void @f.nounwind() #16
+declare void @f.optnone() noinline optnone
+; CHECK: declare void @f.optnone() #17
+declare void @f.optsize() optsize
+; CHECK: declare void @f.optsize() #18
+declare void @f.readnone() readnone
+; CHECK: declare void @f.readnone() #19
+declare void @f.readonly() readonly
+; CHECK: declare void @f.readonly() #20
+declare void @f.returns_twice() returns_twice
+; CHECK: declare void @f.returns_twice() #21
+declare void @f.safestack() safestack
+; CHECK: declare void @f.safestack() #22
+declare void @f.sanitize_address() sanitize_address
+; CHECK: declare void @f.sanitize_address() #23
+declare void @f.sanitize_memory() sanitize_memory
+; CHECK: declare void @f.sanitize_memory() #24
+declare void @f.sanitize_thread() sanitize_thread
+; CHECK: declare void @f.sanitize_thread() #25
+declare void @f.ssp() ssp
+; CHECK: declare void @f.ssp() #26
+declare void @f.sspreq() sspreq
+; CHECK: declare void @f.sspreq() #27
+declare void @f.sspstrong() sspstrong
+; CHECK: declare void @f.sspstrong() #28
+declare void @f.thunk() "thunk"
+; CHECK: declare void @f.thunk() #29
+declare void @f.uwtable() uwtable
+; CHECK: declare void @f.uwtable() #30
+declare void @f.kvpair() "cpu"="cortex-a8"
+; CHECK:declare void @f.kvpair() #31
+declare void @f.norecurse() norecurse
+; CHECK: declare void @f.norecurse() #32
+declare void @f.inaccessiblememonly() inaccessiblememonly
+; CHECK: declare void @f.inaccessiblememonly() #33
+declare void @f.inaccessiblemem_or_argmemonly() inaccessiblemem_or_argmemonly
+; CHECK: declare void @f.inaccessiblemem_or_argmemonly() #34
+
+; Functions -- section
+declare void @f.section() section "80"
+; CHECK: declare void @f.section() section "80"
+
+; Functions -- comdat
+define void @f.comdat_any() comdat($comdat.any) {
+; CHECK: define void @f.comdat_any() comdat($comdat.any)
+entry:
+ ret void
+}
+define void @f.comdat_exactmatch() comdat($comdat.exactmatch) {
+; CHECK: define void @f.comdat_exactmatch() comdat($comdat.exactmatch)
+entry:
+ ret void
+}
+define void @f.comdat_largest() comdat($comdat.largest) {
+; CHECK: define void @f.comdat_largest() comdat($comdat.largest)
+entry:
+ ret void
+}
+define void @f.comdat_noduplicates() comdat($comdat.noduplicates) {
+; CHECK: define void @f.comdat_noduplicates() comdat($comdat.noduplicates)
+entry:
+ ret void
+}
+define void @f.comdat_samesize() comdat($comdat.samesize) {
+; CHECK: define void @f.comdat_samesize() comdat($comdat.samesize)
+entry:
+ ret void
+}
+
+; Functions -- align
+declare void @f.align2() align 2
+; CHECK: declare void @f.align2() align 2
+declare void @f.align4() align 4
+; CHECK: declare void @f.align4() align 4
+declare void @f.align8() align 8
+; CHECK: declare void @f.align8() align 8
+
+; Functions -- GC
+declare void @f.gcshadow() gc "shadow-stack"
+; CHECK: declare void @f.gcshadow() gc "shadow-stack"
+
+; Functions -- Prefix data
+declare void @f.prefixi32() prefix i32 1684365668
+; CHECK: declare void @f.prefixi32() prefix i32 1684365668
+declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+; CHECK: declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+
+; Functions -- Prologue data
+declare void @f.prologuei32() prologue i32 1684365669
+; CHECK: declare void @f.prologuei32() prologue i32 1684365669
+declare void @f.prologuearray() prologue [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+; CHECK: declare void @f.prologuearray() prologue [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+
+; Functions -- Personality constant
+declare void @llvm.donothing() nounwind readnone
+; CHECK: declare void @llvm.donothing() #35
+define void @f.no_personality() personality i8 3 {
+; CHECK: define void @f.no_personality() personality i8 3
+ invoke void @llvm.donothing() to label %normal unwind label %exception
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+normal:
+ ret void
+}
+
+declare i32 @f.personality_handler()
+; CHECK: declare i32 @f.personality_handler()
+define void @f.personality() personality i32 ()* @f.personality_handler {
+; CHECK: define void @f.personality() personality i32 ()* @f.personality_handler
+ invoke void @llvm.donothing() to label %normal unwind label %exception
+exception:
+ %cleanup = landingpad i32 cleanup
+ br label %normal
+normal:
+ ret void
+}
+
+;; Atomic Memory Ordering Constraints
+define void @atomics(i32* %word) {
+ %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic
+ ; CHECK: %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic
+ %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic
+ ; CHECK: %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic
+ %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic
+ ; CHECK: %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic
+ %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic
+ ; CHECK: %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic
+ %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic
+ ; CHECK: %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic
+ %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic
+ ; CHECK: %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic
+ %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic
+ ; CHECK: %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic
+ %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic
+ ; CHECK: %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic
+ %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+ ; CHECK: %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+ %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
+ ; CHECK: %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
+ %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
+ ; CHECK: %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
+ %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
+ ; CHECK: %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
+ %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
+ ; CHECK: %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
+ %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
+ ; CHECK: %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
+ %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
+ ; CHECK: %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
+ %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
+ ; CHECK: %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
+ %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+ ; CHECK: %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+ %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic
+ ; CHECK: %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic
+ %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic
+ ; CHECK: %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic
+ fence acquire
+ ; CHECK: fence acquire
+ fence release
+ ; CHECK: fence release
+ fence acq_rel
+ ; CHECK: fence acq_rel
+ fence singlethread seq_cst
+ ; CHECK: fence singlethread seq_cst
+
+ %ld.1 = load atomic i32, i32* %word monotonic, align 4
+ ; CHECK: %ld.1 = load atomic i32, i32* %word monotonic, align 4
+ %ld.2 = load atomic volatile i32, i32* %word acquire, align 8
+ ; CHECK: %ld.2 = load atomic volatile i32, i32* %word acquire, align 8
+ %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16
+ ; CHECK: %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16
+
+ store atomic i32 23, i32* %word monotonic, align 4
+ ; CHECK: store atomic i32 23, i32* %word monotonic, align 4
+ store atomic volatile i32 24, i32* %word monotonic, align 4
+ ; CHECK: store atomic volatile i32 24, i32* %word monotonic, align 4
+ store atomic volatile i32 25, i32* %word singlethread monotonic, align 4
+ ; CHECK: store atomic volatile i32 25, i32* %word singlethread monotonic, align 4
+ ret void
+}
+
+;; Fast Math Flags
+define void @fastmathflags(float %op1, float %op2) {
+ %f.nnan = fadd nnan float %op1, %op2
+ ; CHECK: %f.nnan = fadd nnan float %op1, %op2
+ %f.ninf = fadd ninf float %op1, %op2
+ ; CHECK: %f.ninf = fadd ninf float %op1, %op2
+ %f.nsz = fadd nsz float %op1, %op2
+ ; CHECK: %f.nsz = fadd nsz float %op1, %op2
+ %f.arcp = fadd arcp float %op1, %op2
+ ; CHECK: %f.arcp = fadd arcp float %op1, %op2
+ %f.fast = fadd fast float %op1, %op2
+ ; CHECK: %f.fast = fadd fast float %op1, %op2
+ ret void
+}
+
+; Check various fast math flags and floating-point types on calls.
+
+declare float @fmf1()
+declare double @fmf2()
+declare <4 x double> @fmf3()
+
+; CHECK-LABEL: fastMathFlagsForCalls(
+define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) {
+ %call.fast = call fast float @fmf1()
+ ; CHECK: %call.fast = call fast float @fmf1()
+
+ ; Throw in some other attributes to make sure those stay in the right places.
+
+ %call.nsz.arcp = notail call nsz arcp double @fmf2()
+ ; CHECK: %call.nsz.arcp = notail call nsz arcp double @fmf2()
+
+ %call.nnan.ninf = tail call nnan ninf fastcc <4 x double> @fmf3()
+ ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc <4 x double> @fmf3()
+
+ ret void
+}
+
+;; Type System
+%opaquety = type opaque
+define void @typesystem() {
+ %p0 = bitcast i8* null to i32 (i32)*
+ ; CHECK: %p0 = bitcast i8* null to i32 (i32)*
+ %p1 = bitcast i8* null to void (i8*)*
+ ; CHECK: %p1 = bitcast i8* null to void (i8*)*
+ %p2 = bitcast i8* null to i32 (i8*, ...)*
+ ; CHECK: %p2 = bitcast i8* null to i32 (i8*, ...)*
+ %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)*
+ ; CHECK: %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)*
+ %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)*
+ ; CHECK: %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)*
+ %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)*
+ ; CHECK: %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)*
+
+ %t0 = alloca i1942652
+ ; CHECK: %t0 = alloca i1942652
+ %t1 = alloca half
+ ; CHECK: %t1 = alloca half
+ %t2 = alloca float
+ ; CHECK: %t2 = alloca float
+ %t3 = alloca double
+ ; CHECK: %t3 = alloca double
+ %t4 = alloca fp128
+ ; CHECK: %t4 = alloca fp128
+ %t5 = alloca x86_fp80
+ ; CHECK: %t5 = alloca x86_fp80
+ %t6 = alloca ppc_fp128
+ ; CHECK: %t6 = alloca ppc_fp128
+ %t7 = alloca x86_mmx
+ ; CHECK: %t7 = alloca x86_mmx
+ %t8 = alloca %opaquety*
+ ; CHECK: %t8 = alloca %opaquety*
+
+ ret void
+}
+
+declare void @llvm.token(token)
+; CHECK: declare void @llvm.token(token)
+
+;; Inline Assembler Expressions
+define void @inlineasm(i32 %arg) {
+ call i32 asm "bswap $0", "=r,r"(i32 %arg)
+ ; CHECK: call i32 asm "bswap $0", "=r,r"(i32 %arg)
+ call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg)
+ ; CHECK: call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg)
+ ret void
+}
+
+;; Instructions
+
+; Instructions -- Terminators
+define void @instructions.terminators(i8 %val) personality i32 -10 {
+ br i1 false, label %iftrue, label %iffalse
+ ; CHECK: br i1 false, label %iftrue, label %iffalse
+ br label %iftrue
+ ; CHECK: br label %iftrue
+iftrue:
+ ret void
+ ; CHECK: ret void
+iffalse:
+
+ switch i8 %val, label %defaultdest [
+ ; CHECK: switch i8 %val, label %defaultdest [
+ i8 0, label %defaultdest.0
+ ; CHECK: i8 0, label %defaultdest.0
+ i8 1, label %defaultdest.1
+ ; CHECK: i8 1, label %defaultdest.1
+ i8 2, label %defaultdest.2
+ ; CHECK: i8 2, label %defaultdest.2
+ ]
+ ; CHECK: ]
+defaultdest:
+ ret void
+defaultdest.0:
+ ret void
+defaultdest.1:
+ ret void
+defaultdest.2:
+
+ indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2]
+ ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2]
+ indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2]
+ ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2]
+
+ invoke fastcc void @f.fastcc()
+ ; CHECK: invoke fastcc void @f.fastcc()
+ to label %defaultdest unwind label %exc
+ ; CHECK: to label %defaultdest unwind label %exc
+exc:
+ %cleanup = landingpad i32 cleanup
+
+ resume i32 undef
+ ; CHECK: resume i32 undef
+ unreachable
+ ; CHECK: unreachable
+
+ ret void
+}
+
+define i32 @instructions.win_eh.1() personality i32 -3 {
+entry:
+ %arg1 = alloca i32
+ %arg2 = alloca i32
+ invoke void @f.ccc() to label %normal unwind label %catchswitch1
+ invoke void @f.ccc() to label %normal unwind label %catchswitch2
+ invoke void @f.ccc() to label %normal unwind label %catchswitch3
+
+catchswitch1:
+ %cs1 = catchswitch within none [label %catchpad1] unwind to caller
+
+catchpad1:
+ catchpad within %cs1 []
+ br label %normal
+ ; CHECK: catchpad within %cs1 []
+ ; CHECK-NEXT: br label %normal
+
+catchswitch2:
+ %cs2 = catchswitch within none [label %catchpad2] unwind to caller
+
+catchpad2:
+ catchpad within %cs2 [i32* %arg1]
+ br label %normal
+ ; CHECK: catchpad within %cs2 [i32* %arg1]
+ ; CHECK-NEXT: br label %normal
+
+catchswitch3:
+ %cs3 = catchswitch within none [label %catchpad3] unwind label %cleanuppad1
+
+catchpad3:
+ catchpad within %cs3 [i32* %arg1, i32* %arg2]
+ br label %normal
+ ; CHECK: catchpad within %cs3 [i32* %arg1, i32* %arg2]
+ ; CHECK-NEXT: br label %normal
+
+cleanuppad1:
+ %clean.1 = cleanuppad within none []
+ unreachable
+ ; CHECK: %clean.1 = cleanuppad within none []
+ ; CHECK-NEXT: unreachable
+
+normal:
+ ret i32 0
+}
+;
+define i32 @instructions.win_eh.2() personality i32 -4 {
+entry:
+ invoke void @f.ccc() to label %invoke.cont unwind label %catchswitch
+
+invoke.cont:
+ invoke void @f.ccc() to label %continue unwind label %cleanup
+
+cleanup:
+ %clean = cleanuppad within none []
+ ; CHECK: %clean = cleanuppad within none []
+ cleanupret from %clean unwind to caller
+ ; CHECK: cleanupret from %clean unwind to caller
+
+catchswitch:
+ %cs = catchswitch within none [label %catchpad] unwind label %terminate
+
+catchpad:
+ %catch = catchpad within %cs []
+ br label %body
+ ; CHECK: %catch = catchpad within %cs []
+ ; CHECK-NEXT: br label %body
+
+body:
+ invoke void @f.ccc() to label %continue unwind label %terminate
+ catchret from %catch to label %return
+ ; CHECK: catchret from %catch to label %return
+
+return:
+ ret i32 0
+
+terminate:
+ cleanuppad within %cs []
+ unreachable
+ ; CHECK: cleanuppad within %cs []
+ ; CHECK-NEXT: unreachable
+
+continue:
+ ret i32 0
+}
+
+; Instructions -- Binary Operations
+define void @instructions.binops(i8 %op1, i8 %op2) {
+ ; nuw x nsw
+ add i8 %op1, %op2
+ ; CHECK: add i8 %op1, %op2
+ add nuw i8 %op1, %op2
+ ; CHECK: add nuw i8 %op1, %op2
+ add nsw i8 %op1, %op2
+ ; CHECK: add nsw i8 %op1, %op2
+ add nuw nsw i8 %op1, %op2
+ ; CHECK: add nuw nsw i8 %op1, %op2
+ sub i8 %op1, %op2
+ ; CHECK: sub i8 %op1, %op2
+ sub nuw i8 %op1, %op2
+ ; CHECK: sub nuw i8 %op1, %op2
+ sub nsw i8 %op1, %op2
+ ; CHECK: sub nsw i8 %op1, %op2
+ sub nuw nsw i8 %op1, %op2
+ ; CHECK: sub nuw nsw i8 %op1, %op2
+ mul i8 %op1, %op2
+ ; CHECK: mul i8 %op1, %op2
+ mul nuw i8 %op1, %op2
+ ; CHECK: mul nuw i8 %op1, %op2
+ mul nsw i8 %op1, %op2
+ ; CHECK: mul nsw i8 %op1, %op2
+ mul nuw nsw i8 %op1, %op2
+ ; CHECK: mul nuw nsw i8 %op1, %op2
+
+ ; exact
+ udiv i8 %op1, %op2
+ ; CHECK: udiv i8 %op1, %op2
+ udiv exact i8 %op1, %op2
+ ; CHECK: udiv exact i8 %op1, %op2
+ sdiv i8 %op1, %op2
+ ; CHECK: sdiv i8 %op1, %op2
+ sdiv exact i8 %op1, %op2
+ ; CHECK: sdiv exact i8 %op1, %op2
+
+ ; none
+ urem i8 %op1, %op2
+ ; CHECK: urem i8 %op1, %op2
+ srem i8 %op1, %op2
+ ; CHECK: srem i8 %op1, %op2
+
+ ret void
+}
+
+; Instructions -- Bitwise Binary Operations
+define void @instructions.bitwise_binops(i8 %op1, i8 %op2) {
+ ; nuw x nsw
+ shl i8 %op1, %op2
+ ; CHECK: shl i8 %op1, %op2
+ shl nuw i8 %op1, %op2
+ ; CHECK: shl nuw i8 %op1, %op2
+ shl nsw i8 %op1, %op2
+ ; CHECK: shl nsw i8 %op1, %op2
+ shl nuw nsw i8 %op1, %op2
+ ; CHECK: shl nuw nsw i8 %op1, %op2
+
+ ; exact
+ lshr i8 %op1, %op2
+ ; CHECK: lshr i8 %op1, %op2
+ lshr exact i8 %op1, %op2
+ ; CHECK: lshr exact i8 %op1, %op2
+ ashr i8 %op1, %op2
+ ; CHECK: ashr i8 %op1, %op2
+ ashr exact i8 %op1, %op2
+ ; CHECK: ashr exact i8 %op1, %op2
+
+ ; none
+ and i8 %op1, %op2
+ ; CHECK: and i8 %op1, %op2
+ or i8 %op1, %op2
+ ; CHECK: or i8 %op1, %op2
+ xor i8 %op1, %op2
+ ; CHECK: xor i8 %op1, %op2
+
+ ret void
+}
+
+; Instructions -- Vector Operations
+define void @instructions.vectorops(<4 x float> %vec, <4 x float> %vec2) {
+ extractelement <4 x float> %vec, i8 0
+ ; CHECK: extractelement <4 x float> %vec, i8 0
+ insertelement <4 x float> %vec, float 3.500000e+00, i8 0
+ ; CHECK: insertelement <4 x float> %vec, float 3.500000e+00, i8 0
+ shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer
+ ; CHECK: shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer
+
+ ret void
+}
+
+; Instructions -- Aggregate Operations
+define void @instructions.aggregateops({ i8, i32 } %up, <{ i8, i32 }> %p,
+ [3 x i8] %arr, { i8, { i32 }} %n,
+ <2 x i8*> %pvec, <2 x i64> %offsets) {
+ extractvalue { i8, i32 } %up, 0
+ ; CHECK: extractvalue { i8, i32 } %up, 0
+ extractvalue <{ i8, i32 }> %p, 1
+ ; CHECK: extractvalue <{ i8, i32 }> %p, 1
+ extractvalue [3 x i8] %arr, 2
+ ; CHECK: extractvalue [3 x i8] %arr, 2
+ extractvalue { i8, { i32 } } %n, 1, 0
+ ; CHECK: extractvalue { i8, { i32 } } %n, 1, 0
+
+ insertvalue { i8, i32 } %up, i8 1, 0
+ ; CHECK: insertvalue { i8, i32 } %up, i8 1, 0
+ insertvalue <{ i8, i32 }> %p, i32 2, 1
+ ; CHECK: insertvalue <{ i8, i32 }> %p, i32 2, 1
+ insertvalue [3 x i8] %arr, i8 0, 0
+ ; CHECK: insertvalue [3 x i8] %arr, i8 0, 0
+ insertvalue { i8, { i32 } } %n, i32 0, 1, 0
+ ; CHECK: insertvalue { i8, { i32 } } %n, i32 0, 1, 0
+
+ %up.ptr = alloca { i8, i32 }
+ %p.ptr = alloca <{ i8, i32 }>
+ %arr.ptr = alloca [3 x i8]
+ %n.ptr = alloca { i8, { i32 } }
+
+ getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0
+ ; CHECK: getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0
+ getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1
+ ; CHECK: getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1
+ getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2
+ ; CHECK: getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2
+ getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1
+ ; CHECK: getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1
+ getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0
+ ; CHECK: getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0
+ getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets
+ ; CHECK: getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets
+
+ ret void
+}
+
+; Instructions -- Memory Access and Addressing Operations
+!7 = !{i32 1}
+!8 = !{}
+!9 = !{i64 4}
+define void @instructions.memops(i32** %base) {
+ alloca i32, i8 4, align 4
+ ; CHECK: alloca i32, i8 4, align 4
+ alloca inalloca i32, i8 4, align 4
+ ; CHECK: alloca inalloca i32, i8 4, align 4
+
+ load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+ ; CHECK: load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+ load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+ ; CHECK: load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+
+ store i32* null, i32** %base, align 4, !nontemporal !8
+ ; CHECK: store i32* null, i32** %base, align 4, !nontemporal !8
+ store volatile i32* null, i32** %base, align 4, !nontemporal !8
+ ; CHECK: store volatile i32* null, i32** %base, align 4, !nontemporal !8
+
+ ret void
+}
+
+; Instructions -- Conversion Operations
+define void @instructions.conversions() {
+ trunc i32 -1 to i1
+ ; CHECK: trunc i32 -1 to i1
+ zext i32 -1 to i64
+ ; CHECK: zext i32 -1 to i64
+ sext i32 -1 to i64
+ ; CHECK: sext i32 -1 to i64
+ fptrunc float undef to half
+ ; CHECK: fptrunc float undef to half
+ fpext half undef to float
+ ; CHECK: fpext half undef to float
+ fptoui float undef to i32
+ ; CHECK: fptoui float undef to i32
+ fptosi float undef to i32
+ ; CHECK: fptosi float undef to i32
+ uitofp i32 1 to float
+ ; CHECK: uitofp i32 1 to float
+ sitofp i32 -1 to float
+ ; CHECK: sitofp i32 -1 to float
+ ptrtoint i8* null to i64
+ ; CHECK: ptrtoint i8* null to i64
+ inttoptr i64 0 to i8*
+ ; CHECK: inttoptr i64 0 to i8*
+ bitcast i32 0 to i32
+ ; CHECK: bitcast i32 0 to i32
+ addrspacecast i32* null to i32 addrspace(1)*
+ ; CHECK: addrspacecast i32* null to i32 addrspace(1)*
+
+ ret void
+}
+
+; Instructions -- Other Operations
+define void @instructions.other(i32 %op1, i32 %op2, half %fop1, half %fop2) {
+entry:
+ icmp eq i32 %op1, %op2
+ ; CHECK: icmp eq i32 %op1, %op2
+ icmp ne i32 %op1, %op2
+ ; CHECK: icmp ne i32 %op1, %op2
+ icmp ugt i32 %op1, %op2
+ ; CHECK: icmp ugt i32 %op1, %op2
+ icmp uge i32 %op1, %op2
+ ; CHECK: icmp uge i32 %op1, %op2
+ icmp ult i32 %op1, %op2
+ ; CHECK: icmp ult i32 %op1, %op2
+ icmp ule i32 %op1, %op2
+ ; CHECK: icmp ule i32 %op1, %op2
+ icmp sgt i32 %op1, %op2
+ ; CHECK: icmp sgt i32 %op1, %op2
+ icmp sge i32 %op1, %op2
+ ; CHECK: icmp sge i32 %op1, %op2
+ icmp slt i32 %op1, %op2
+ ; CHECK: icmp slt i32 %op1, %op2
+ icmp sle i32 %op1, %op2
+ ; CHECK: icmp sle i32 %op1, %op2
+
+ fcmp false half %fop1, %fop2
+ ; CHECK: fcmp false half %fop1, %fop2
+ fcmp oeq half %fop1, %fop2
+ ; CHECK: fcmp oeq half %fop1, %fop2
+ fcmp ogt half %fop1, %fop2
+ ; CHECK: fcmp ogt half %fop1, %fop2
+ fcmp oge half %fop1, %fop2
+ ; CHECK: fcmp oge half %fop1, %fop2
+ fcmp olt half %fop1, %fop2
+ ; CHECK: fcmp olt half %fop1, %fop2
+ fcmp ole half %fop1, %fop2
+ ; CHECK: fcmp ole half %fop1, %fop2
+ fcmp one half %fop1, %fop2
+ ; CHECK: fcmp one half %fop1, %fop2
+ fcmp ord half %fop1, %fop2
+ ; CHECK: fcmp ord half %fop1, %fop2
+ fcmp ueq half %fop1, %fop2
+ ; CHECK: fcmp ueq half %fop1, %fop2
+ fcmp ugt half %fop1, %fop2
+ ; CHECK: fcmp ugt half %fop1, %fop2
+ fcmp uge half %fop1, %fop2
+ ; CHECK: fcmp uge half %fop1, %fop2
+ fcmp ult half %fop1, %fop2
+ ; CHECK: fcmp ult half %fop1, %fop2
+ fcmp ule half %fop1, %fop2
+ ; CHECK: fcmp ule half %fop1, %fop2
+ fcmp une half %fop1, %fop2
+ ; CHECK: fcmp une half %fop1, %fop2
+ fcmp uno half %fop1, %fop2
+ ; CHECK: fcmp uno half %fop1, %fop2
+ fcmp true half %fop1, %fop2
+ ; CHECK: fcmp true half %fop1, %fop2
+
+ br label %exit
+L1:
+ %v1 = add i32 %op1, %op2
+ br label %exit
+L2:
+ %v2 = add i32 %op1, %op2
+ br label %exit
+exit:
+ phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ]
+ ; CHECK: phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ]
+
+ select i1 true, i32 0, i32 1
+ ; CHECK: select i1 true, i32 0, i32 1
+ select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
+ ; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
+
+ call void @f.nobuiltin() builtin
+ ; CHECK: call void @f.nobuiltin() #39
+
+ call fastcc noalias i32* @f.noalias() noinline
+ ; CHECK: call fastcc noalias i32* @f.noalias() #12
+ tail call ghccc nonnull i32* @f.nonnull() minsize
+ ; CHECK: tail call ghccc nonnull i32* @f.nonnull() #7
+
+ ret void
+}
+
+define void @instructions.call_musttail(i8* inalloca %val) {
+ musttail call void @f.param.inalloca(i8* inalloca %val)
+ ; CHECK: musttail call void @f.param.inalloca(i8* inalloca %val)
+
+ ret void
+}
+
+define void @instructions.call_notail() {
+ notail call void @f1()
+ ; CHECK: notail call void @f1()
+
+ ret void
+}
+
+define void @instructions.landingpad() personality i32 -2 {
+ invoke void @llvm.donothing() to label %proceed unwind label %catch1
+ invoke void @llvm.donothing() to label %proceed unwind label %catch2
+ invoke void @llvm.donothing() to label %proceed unwind label %catch3
+ invoke void @llvm.donothing() to label %proceed unwind label %catch4
+
+catch1:
+ landingpad i32
+ ; CHECK: landingpad i32
+ cleanup
+ ; CHECK: cleanup
+ br label %proceed
+
+catch2:
+ landingpad i32
+ ; CHECK: landingpad i32
+ cleanup
+ ; CHECK: cleanup
+ catch i32* null
+ ; CHECK: catch i32* null
+ br label %proceed
+
+catch3:
+ landingpad i32
+ ; CHECK: landingpad i32
+ cleanup
+ ; CHECK: cleanup
+ catch i32* null
+ ; CHECK: catch i32* null
+ catch i32* null
+ ; CHECK: catch i32* null
+ br label %proceed
+
+catch4:
+ landingpad i32
+ ; CHECK: landingpad i32
+ filter [2 x i32] zeroinitializer
+ ; CHECK: filter [2 x i32] zeroinitializer
+ br label %proceed
+
+proceed:
+ ret void
+}
+
+;; Intrinsic Functions
+
+; Intrinsic Functions -- Variable Argument Handling
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
+define void @instructions.va_arg(i8* %v, ...) {
+ %ap = alloca i8*
+ %ap2 = bitcast i8** %ap to i8*
+
+ call void @llvm.va_start(i8* %ap2)
+ ; CHECK: call void @llvm.va_start(i8* %ap2)
+
+ va_arg i8* %ap2, i32
+ ; CHECK: va_arg i8* %ap2, i32
+
+ call void @llvm.va_copy(i8* %v, i8* %ap2)
+ ; CHECK: call void @llvm.va_copy(i8* %v, i8* %ap2)
+
+ call void @llvm.va_end(i8* %ap2)
+ ; CHECK: call void @llvm.va_end(i8* %ap2)
+
+ ret void
+}
+
+; Intrinsic Functions -- Accurate Garbage Collection
+declare void @llvm.gcroot(i8**, i8*)
+declare i8* @llvm.gcread(i8*, i8**)
+declare void @llvm.gcwrite(i8*, i8*, i8**)
+define void @intrinsics.gc() gc "shadow-stack" {
+ %ptrloc = alloca i8*
+ call void @llvm.gcroot(i8** %ptrloc, i8* null)
+ ; CHECK: call void @llvm.gcroot(i8** %ptrloc, i8* null)
+
+ call i8* @llvm.gcread(i8* null, i8** %ptrloc)
+ ; CHECK: call i8* @llvm.gcread(i8* null, i8** %ptrloc)
+
+ %ref = alloca i8
+ call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc)
+ ; CHECK: call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc)
+
+ ret void
+}
+
+; Intrinsic Functions -- Code Generation
+declare i8* @llvm.returnaddress(i32)
+declare i8* @llvm.frameaddress(i32)
+declare i32 @llvm.read_register.i32(metadata)
+declare i64 @llvm.read_register.i64(metadata)
+declare void @llvm.write_register.i32(metadata, i32)
+declare void @llvm.write_register.i64(metadata, i64)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+declare void @llvm.pcmarker(i32)
+declare i64 @llvm.readcyclecounter()
+declare void @llvm.clear_cache(i8*, i8*)
+declare void @llvm.instrprof_increment(i8*, i64, i32, i32)
+
+!10 = !{!"rax"}
+define void @intrinsics.codegen() {
+ call i8* @llvm.returnaddress(i32 1)
+ ; CHECK: call i8* @llvm.returnaddress(i32 1)
+ call i8* @llvm.frameaddress(i32 1)
+ ; CHECK: call i8* @llvm.frameaddress(i32 1)
+
+ call i32 @llvm.read_register.i32(metadata !10)
+ ; CHECK: call i32 @llvm.read_register.i32(metadata !10)
+ call i64 @llvm.read_register.i64(metadata !10)
+ ; CHECK: call i64 @llvm.read_register.i64(metadata !10)
+ call void @llvm.write_register.i32(metadata !10, i32 0)
+ ; CHECK: call void @llvm.write_register.i32(metadata !10, i32 0)
+ call void @llvm.write_register.i64(metadata !10, i64 0)
+ ; CHECK: call void @llvm.write_register.i64(metadata !10, i64 0)
+
+ %stack = call i8* @llvm.stacksave()
+ ; CHECK: %stack = call i8* @llvm.stacksave()
+ call void @llvm.stackrestore(i8* %stack)
+ ; CHECK: call void @llvm.stackrestore(i8* %stack)
+
+ call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
+ ; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
+
+ call void @llvm.pcmarker(i32 1)
+ ; CHECK: call void @llvm.pcmarker(i32 1)
+
+ call i64 @llvm.readcyclecounter()
+ ; CHECK: call i64 @llvm.readcyclecounter()
+
+ call void @llvm.clear_cache(i8* null, i8* null)
+ ; CHECK: call void @llvm.clear_cache(i8* null, i8* null)
+
+ call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0)
+ ; CHECK: call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0)
+
+ ret void
+}
+
+declare void @llvm.localescape(...)
+declare i8* @llvm.localrecover(i8* %func, i8* %fp, i32 %idx)
+define void @intrinsics.localescape() {
+ %static.alloca = alloca i32
+ call void (...) @llvm.localescape(i32* %static.alloca)
+ ; CHECK: call void (...) @llvm.localescape(i32* %static.alloca)
+
+ call void @intrinsics.localrecover()
+
+ ret void
+}
+define void @intrinsics.localrecover() {
+ %func = bitcast void ()* @intrinsics.localescape to i8*
+ %fp = call i8* @llvm.frameaddress(i32 1)
+ call i8* @llvm.localrecover(i8* %func, i8* %fp, i32 0)
+ ; CHECK: call i8* @llvm.localrecover(i8* %func, i8* %fp, i32 0)
+
+ ret void
+}
+
+; We need this function to provide `uses' for some metadata tests.
+define void @misc.metadata() {
+ call void @f1(), !srcloc !11
+ call void @f1(), !srcloc !12
+ call void @f1(), !srcloc !13
+ call void @f1(), !srcloc !14
+ ret void
+}
+
+declare void @op_bundle_callee_0()
+declare void @op_bundle_callee_1(i32,i32)
+
+define void @call_with_operand_bundle0(i32* %ptr) {
+; CHECK-LABEL: call_with_operand_bundle0(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+ call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+; CHECK: call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+ ret void
+}
+
+define void @call_with_operand_bundle1(i32* %ptr) {
+; CHECK-LABEL: call_with_operand_bundle1(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+
+ call void @op_bundle_callee_0()
+ call void @op_bundle_callee_0() [ "foo"() ]
+ call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+; CHECK: @op_bundle_callee_0(){{$}}
+; CHECK-NEXT: call void @op_bundle_callee_0() [ "foo"() ]
+; CHECK-NEXT: call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+ ret void
+}
+
+define void @call_with_operand_bundle2(i32* %ptr) {
+; CHECK-LABEL: call_with_operand_bundle2(
+ entry:
+ call void @op_bundle_callee_0() [ "foo"() ]
+; CHECK: call void @op_bundle_callee_0() [ "foo"() ]
+ ret void
+}
+
+define void @call_with_operand_bundle3(i32* %ptr) {
+; CHECK-LABEL: call_with_operand_bundle3(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+ call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+; CHECK: call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+ ret void
+}
+
+define void @call_with_operand_bundle4(i32* %ptr) {
+; CHECK-LABEL: call_with_operand_bundle4(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+ call void @op_bundle_callee_1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+; CHECK: call void @op_bundle_callee_1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+ ret void
+}
+
+; Invoke versions of the above tests:
+
+
+define void @invoke_with_operand_bundle0(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @invoke_with_operand_bundle0(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+ invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] to label %normal unwind label %exception
+; CHECK: invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+normal:
+ ret void
+}
+
+define void @invoke_with_operand_bundle1(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @invoke_with_operand_bundle1(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+
+ invoke void @op_bundle_callee_0() to label %normal unwind label %exception
+; CHECK: invoke void @op_bundle_callee_0(){{$}}
+
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+
+normal:
+ invoke void @op_bundle_callee_0() [ "foo"() ] to label %normal1 unwind label %exception1
+; CHECK: invoke void @op_bundle_callee_0() [ "foo"() ]
+
+exception1:
+ %cleanup1 = landingpad i8 cleanup
+ br label %normal1
+
+normal1:
+ invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] to label %normal2 unwind label %exception2
+; CHECK: invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+
+exception2:
+ %cleanup2 = landingpad i8 cleanup
+ br label %normal2
+
+normal2:
+ ret void
+}
+
+define void @invoke_with_operand_bundle2(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @invoke_with_operand_bundle2(
+ entry:
+ invoke void @op_bundle_callee_0() [ "foo"() ] to label %normal unwind label %exception
+; CHECK: invoke void @op_bundle_callee_0() [ "foo"() ]
+
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+normal:
+ ret void
+}
+
+define void @invoke_with_operand_bundle3(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @invoke_with_operand_bundle3(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+ invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] to label %normal unwind label %exception
+; CHECK: invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+normal:
+ ret void
+}
+
+define void @invoke_with_operand_bundle4(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @invoke_with_operand_bundle4(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+ invoke void @op_bundle_callee_1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+ to label %normal unwind label %exception
+; CHECK: invoke void @op_bundle_callee_1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+normal:
+ ret void
+}
+
+
+; CHECK: attributes #0 = { alignstack=4 }
+; CHECK: attributes #1 = { alignstack=8 }
+; CHECK: attributes #2 = { alwaysinline }
+; CHECK: attributes #3 = { cold }
+; CHECK: attributes #4 = { convergent }
+; CHECK: attributes #5 = { inlinehint }
+; CHECK: attributes #6 = { jumptable }
+; CHECK: attributes #7 = { minsize }
+; CHECK: attributes #8 = { naked }
+; CHECK: attributes #9 = { nobuiltin }
+; CHECK: attributes #10 = { noduplicate }
+; CHECK: attributes #11 = { noimplicitfloat }
+; CHECK: attributes #12 = { noinline }
+; CHECK: attributes #13 = { nonlazybind }
+; CHECK: attributes #14 = { noredzone }
+; CHECK: attributes #15 = { noreturn }
+; CHECK: attributes #16 = { nounwind }
+; CHECK: attributes #17 = { noinline optnone }
+; CHECK: attributes #18 = { optsize }
+; CHECK: attributes #19 = { readnone }
+; CHECK: attributes #20 = { readonly }
+; CHECK: attributes #21 = { returns_twice }
+; CHECK: attributes #22 = { safestack }
+; CHECK: attributes #23 = { sanitize_address }
+; CHECK: attributes #24 = { sanitize_memory }
+; CHECK: attributes #25 = { sanitize_thread }
+; CHECK: attributes #26 = { ssp }
+; CHECK: attributes #27 = { sspreq }
+; CHECK: attributes #28 = { sspstrong }
+; CHECK: attributes #29 = { "thunk" }
+; CHECK: attributes #30 = { uwtable }
+; CHECK: attributes #31 = { "cpu"="cortex-a8" }
+; CHECK: attributes #32 = { norecurse }
+; CHECK: attributes #33 = { inaccessiblememonly }
+; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly }
+; CHECK: attributes #35 = { nounwind readnone }
+; CHECK: attributes #36 = { argmemonly nounwind readonly }
+; CHECK: attributes #37 = { argmemonly nounwind }
+; CHECK: attributes #38 = { nounwind readonly }
+; CHECK: attributes #39 = { builtin }
+
+;; Metadata
+
+; Metadata -- Module flags
+!llvm.module.flags = !{!0, !1, !2, !4, !5, !6}
+; CHECK: !llvm.module.flags = !{!0, !1, !2, !4, !5, !6}
+
+!0 = !{i32 1, !"mod1", i32 0}
+; CHECK: !0 = !{i32 1, !"mod1", i32 0}
+!1 = !{i32 2, !"mod2", i32 0}
+; CHECK: !1 = !{i32 2, !"mod2", i32 0}
+!2 = !{i32 3, !"mod3", !3}
+; CHECK: !2 = !{i32 3, !"mod3", !3}
+!3 = !{!"mod6", !0}
+; CHECK: !3 = !{!"mod6", !0}
+!4 = !{i32 4, !"mod4", i32 0}
+; CHECK: !4 = !{i32 4, !"mod4", i32 0}
+!5 = !{i32 5, !"mod5", !0}
+; CHECK: !5 = !{i32 5, !"mod5", !0}
+!6 = !{i32 6, !"mod6", !0}
+; CHECK: !6 = !{i32 6, !"mod6", !0}
+
+; Metadata -- Check `distinct'
+!11 = distinct !{}
+; CHECK: !11 = distinct !{}
+!12 = distinct !{}
+; CHECK: !12 = distinct !{}
+!13 = !{!11}
+; CHECK: !13 = !{!11}
+!14 = !{!12}
+; CHECK: !14 = !{!12}
diff --git a/test/Bitcode/debug-loc-again.ll b/test/Bitcode/debug-loc-again.ll
index 6dbea16121aa..adf74a07ebed 100644
--- a/test/Bitcode/debug-loc-again.ll
+++ b/test/Bitcode/debug-loc-again.ll
@@ -29,8 +29,8 @@ entry:
!llvm.module.flags = !{!0}
!0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DICompileUnit(language: DW_LANG_C99, file: !DIFile(filename: "f", directory: "/d"),
- subprograms: !{!2})
-!2 = !DISubprogram(name: "foo")
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !DIFile(filename: "f", directory: "/d"),
+ subprograms: !{!2})
+!2 = distinct !DISubprogram(name: "foo")
!3 = !DILocation(line: 1, scope: !2)
!4 = !DILocation(line: 2, scope: !2)
diff --git a/test/Bitcode/highLevelStructure.3.2.ll b/test/Bitcode/highLevelStructure.3.2.ll
index 54356b9fb139..749b157cffc3 100644
--- a/test/Bitcode/highLevelStructure.3.2.ll
+++ b/test/Bitcode/highLevelStructure.3.2.ll
@@ -19,16 +19,16 @@ module asm "some assembly"
; Aliases Test
; CHECK: @glob1 = global i32 1
@glob1 = global i32 1
-; CHECK: @aliased1 = alias i32* @glob1
-@aliased1 = alias i32* @glob1
-; CHECK-NEXT: @aliased2 = internal alias i32* @glob1
-@aliased2 = internal alias i32* @glob1
-; CHECK-NEXT: @aliased3 = alias i32* @glob1
-@aliased3 = external alias i32* @glob1
-; CHECK-NEXT: @aliased4 = weak alias i32* @glob1
-@aliased4 = weak alias i32* @glob1
-; CHECK-NEXT: @aliased5 = weak_odr alias i32* @glob1
-@aliased5 = weak_odr alias i32* @glob1
+; CHECK: @aliased1 = alias i32, i32* @glob1
+@aliased1 = alias i32, i32* @glob1
+; CHECK-NEXT: @aliased2 = internal alias i32, i32* @glob1
+@aliased2 = internal alias i32, i32* @glob1
+; CHECK-NEXT: @aliased3 = alias i32, i32* @glob1
+@aliased3 = external alias i32, i32* @glob1
+; CHECK-NEXT: @aliased4 = weak alias i32, i32* @glob1
+@aliased4 = weak alias i32, i32* @glob1
+; CHECK-NEXT: @aliased5 = weak_odr alias i32, i32* @glob1
+@aliased5 = weak_odr alias i32, i32* @glob1
;Parameter Attribute Test
; CHECK: declare void @ParamAttr1(i8 zeroext)
diff --git a/test/Bitcode/identification.ll b/test/Bitcode/identification.ll
new file mode 100644
index 000000000000..2f09d5a43281
--- /dev/null
+++ b/test/Bitcode/identification.ll
@@ -0,0 +1,6 @@
+; Check that a block "IDENTIFICATION_BLOCK_ID" is emitted.
+;RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s
+;CHECK: <IDENTIFICATION_BLOCK_ID
+;CHECK-NEXT: <STRING
+;CHECK-NEXT: <EPOCH
+;CHECK-NEXT: </IDENTIFICATION_BLOCK_ID
diff --git a/test/Bitcode/invalid.ll b/test/Bitcode/invalid.ll
index df9fec8772d1..c56f0ffefa9e 100644
--- a/test/Bitcode/invalid.ll
+++ b/test/Bitcode/invalid.ll
@@ -1,6 +1,6 @@
; RUN: not llvm-dis < %s.bc 2>&1 | FileCheck %s
-; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: error: Unknown attribute kind (48)
+; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: error: Unknown attribute kind (52)
; invalid.ll.bc has an invalid attribute number.
; The test checks that LLVM reports the error and doesn't access freed memory
diff --git a/test/Bitcode/invalid.ll.bc b/test/Bitcode/invalid.ll.bc
index a85c3644b3ab..60c7afffbc23 100644
--- a/test/Bitcode/invalid.ll.bc
+++ b/test/Bitcode/invalid.ll.bc
Binary files differ
diff --git a/test/Bitcode/invalid.test b/test/Bitcode/invalid.test
index 0aab553bb615..3425adc84100 100644
--- a/test/Bitcode/invalid.test
+++ b/test/Bitcode/invalid.test
@@ -113,6 +113,11 @@ RUN: FileCheck --check-prefix=ELEMENT-TYPE %s
ELEMENT-TYPE: Invalid type
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-cast.bc 2>&1 | \
+RUN: FileCheck --check-prefix=INVALID-CAST %s
+
+INVALID-CAST: Invalid cast
+
RUN: not llvm-dis -disable-output %p/Inputs/invalid-array-op-not-2nd-to-last.bc 2>&1 | \
RUN: FileCheck --check-prefix=ARRAY-NOT-2LAST %s
@@ -202,3 +207,13 @@ RUN: not llvm-dis -disable-output %p/Inputs/invalid-alias-type-mismatch.bc 2>&1
RUN: FileCheck --check-prefix=ALIAS-TYPE-MISMATCH %s
ALIAS-TYPE-MISMATCH: Alias and aliasee types don't match
+
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-no-function-block.bc 2>&1 | \
+RUN: FileCheck --check-prefix=NO-FUNCTION-BLOCK %s
+
+NO-FUNCTION-BLOCK: Trying to materialize functions before seeing function blocks
+
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-name-with-0-byte.bc 2>&1 | \
+RUN: FileCheck --check-prefix=NAME-WITH-0 %s
+
+NAME-WITH-0: Invalid value name
diff --git a/test/Bitcode/local-linkage-default-visibility.3.4.ll b/test/Bitcode/local-linkage-default-visibility.3.4.ll
index df0cf7653e55..15ff5e3a6af8 100644
--- a/test/Bitcode/local-linkage-default-visibility.3.4.ll
+++ b/test/Bitcode/local-linkage-default-visibility.3.4.ll
@@ -25,23 +25,23 @@
@global = global i32 0
-@default.internal.alias = alias internal i32* @global
-; CHECK: @default.internal.alias = internal alias i32* @global
+@default.internal.alias = alias internal i32, internal i32* @global
+; CHECK: @default.internal.alias = internal alias i32, i32* @global
-@hidden.internal.alias = hidden alias internal i32* @global
-; CHECK: @hidden.internal.alias = internal alias i32* @global
+@hidden.internal.alias = hidden alias internal i32, internal i32* @global
+; CHECK: @hidden.internal.alias = internal alias i32, i32* @global
-@protected.internal.alias = protected alias internal i32* @global
-; CHECK: @protected.internal.alias = internal alias i32* @global
+@protected.internal.alias = protected alias internal i32, internal i32* @global
+; CHECK: @protected.internal.alias = internal alias i32, i32* @global
-@default.private.alias = alias private i32* @global
-; CHECK: @default.private.alias = private alias i32* @global
+@default.private.alias = alias private i32, private i32* @global
+; CHECK: @default.private.alias = private alias i32, i32* @global
-@hidden.private.alias = hidden alias private i32* @global
-; CHECK: @hidden.private.alias = private alias i32* @global
+@hidden.private.alias = hidden alias private i32, private i32* @global
+; CHECK: @hidden.private.alias = private alias i32, i32* @global
-@protected.private.alias = protected alias private i32* @global
-; CHECK: @protected.private.alias = private alias i32* @global
+@protected.private.alias = protected alias private i32, private i32* @global
+; CHECK: @protected.private.alias = private alias i32, i32* @global
define internal void @default.internal() {
; CHECK: define internal void @default.internal
diff --git a/test/Bitcode/old-aliases.ll b/test/Bitcode/old-aliases.ll
index 8527f074d04b..1bcc4306477c 100644
--- a/test/Bitcode/old-aliases.ll
+++ b/test/Bitcode/old-aliases.ll
@@ -10,14 +10,14 @@
@v2 = global [1 x i32] zeroinitializer
; CHECK: @v2 = global [1 x i32] zeroinitializer
-@v3 = alias bitcast (i32* @v1 to i16*)
-; CHECK: @v3 = alias bitcast (i32* @v1 to i16*)
+@v3 = alias i16, bitcast (i32* @v1 to i16*)
+; CHECK: @v3 = alias i16, bitcast (i32* @v1 to i16*)
-@v4 = alias getelementptr ([1 x i32], [1 x i32]* @v2, i32 0, i32 0)
-; CHECK: @v4 = alias getelementptr inbounds ([1 x i32], [1 x i32]* @v2, i32 0, i32 0)
+@v4 = alias i32, getelementptr ([1 x i32], [1 x i32]* @v2, i32 0, i32 0)
+; CHECK: @v4 = alias i32, getelementptr inbounds ([1 x i32], [1 x i32]* @v2, i32 0, i32 0)
-@v5 = alias i32 addrspace(2)* addrspacecast (i32 addrspace(0)* @v1 to i32 addrspace(2)*)
-; CHECK: @v5 = alias addrspacecast (i32* @v1 to i32 addrspace(2)*)
+@v5 = alias i32, i32 addrspace(2)* addrspacecast (i32 addrspace(0)* @v1 to i32 addrspace(2)*)
+; CHECK: @v5 = alias i32, addrspacecast (i32* @v1 to i32 addrspace(2)*)
-@v6 = alias i16* @v3
-; CHECK: @v6 = alias i16* @v3
+@v6 = alias i16, i16* @v3
+; CHECK: @v6 = alias i16, i16* @v3
diff --git a/test/Bitcode/operand-bundles.ll b/test/Bitcode/operand-bundles.ll
new file mode 100644
index 000000000000..ab28cffd84aa
--- /dev/null
+++ b/test/Bitcode/operand-bundles.ll
@@ -0,0 +1,152 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+declare void @callee0()
+declare void @callee1(i32,i32)
+
+define void @f0(i32* %ptr) {
+; CHECK-LABEL: @f0(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+ call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+; CHECK: call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+ ret void
+}
+
+define void @f1(i32* %ptr) {
+; CHECK-LABEL: @f1(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+
+ call void @callee0()
+ call void @callee0() [ "foo"() ]
+ call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+; CHECK: @callee0(){{$}}
+; CHECK-NEXT: call void @callee0() [ "foo"() ]
+; CHECK-NEXT: call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+ ret void
+}
+
+define void @f2(i32* %ptr) {
+; CHECK-LABEL: @f2(
+ entry:
+ call void @callee0() [ "foo"() ]
+; CHECK: call void @callee0() [ "foo"() ]
+ ret void
+}
+
+define void @f3(i32* %ptr) {
+; CHECK-LABEL: @f3(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+ call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+; CHECK: call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+ ret void
+}
+
+define void @f4(i32* %ptr) {
+; CHECK-LABEL: @f4(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+ call void @callee1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+; CHECK: call void @callee1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+ ret void
+}
+
+; Invoke versions of the above tests:
+
+
+define void @g0(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @g0(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+ invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ] to label %normal unwind label %exception
+; CHECK: invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+normal:
+ ret void
+}
+
+define void @g1(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @g1(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+
+ invoke void @callee0() to label %normal unwind label %exception
+; CHECK: invoke void @callee0(){{$}}
+
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+
+normal:
+ invoke void @callee0() [ "foo"() ] to label %normal1 unwind label %exception1
+; CHECK: invoke void @callee0() [ "foo"() ]
+
+exception1:
+ %cleanup1 = landingpad i8 cleanup
+ br label %normal1
+
+normal1:
+ invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] to label %normal2 unwind label %exception2
+; CHECK: invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+
+exception2:
+ %cleanup2 = landingpad i8 cleanup
+ br label %normal2
+
+normal2:
+ ret void
+}
+
+define void @g2(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @g2(
+ entry:
+ invoke void @callee0() [ "foo"() ] to label %normal unwind label %exception
+; CHECK: invoke void @callee0() [ "foo"() ]
+
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+normal:
+ ret void
+}
+
+define void @g3(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @g3(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+ invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ] to label %normal unwind label %exception
+; CHECK: invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+normal:
+ ret void
+}
+
+define void @g4(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @g4(
+ entry:
+ %l = load i32, i32* %ptr
+ %x = add i32 42, 1
+ invoke void @callee1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+ to label %normal unwind label %exception
+; CHECK: invoke void @callee1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float 0.000000e+00, i32 %l) ]
+
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+normal:
+ ret void
+}
diff --git a/test/Bitcode/select.ll b/test/Bitcode/select.ll
index 3ad06796dccf..666d2960fb5f 100644
--- a/test/Bitcode/select.ll
+++ b/test/Bitcode/select.ll
@@ -8,3 +8,11 @@ define <2 x i32> @main() {
; CHECK: define <2 x i32> @main() {
; CHECK: ret <2 x i32> <i32 0, i32 undef>
; CHECK: }
+
+define <2 x float> @f() {
+ ret <2 x float> select (i1 ptrtoint (<2 x float> ()* @f to i1), <2 x float> <float 1.000000e+00, float 0.000000e+00>, <2 x float> zeroinitializer)
+}
+
+; CHECK: define <2 x float> @f() {
+; CHECK: ret <2 x float> select (i1 ptrtoint (<2 x float> ()* @f to i1), <2 x float> <float 1.000000e+00, float 0.000000e+00>, <2 x float> zeroinitializer)
+; CHECK: }
diff --git a/test/Bitcode/tailcall.ll b/test/Bitcode/tailcall.ll
index 01190d74c348..6a4b8885847a 100644
--- a/test/Bitcode/tailcall.ll
+++ b/test/Bitcode/tailcall.ll
@@ -3,16 +3,16 @@
; Check that musttail and tail roundtrip.
-declare cc8191 void @t1_callee()
-define cc8191 void @t1() {
-; CHECK: tail call cc8191 void @t1_callee()
- tail call cc8191 void @t1_callee()
+declare cc1023 void @t1_callee()
+define cc1023 void @t1() {
+; CHECK: tail call cc1023 void @t1_callee()
+ tail call cc1023 void @t1_callee()
ret void
}
-declare cc8191 void @t2_callee()
-define cc8191 void @t2() {
-; CHECK: musttail call cc8191 void @t2_callee()
- musttail call cc8191 void @t2_callee()
+declare cc1023 void @t2_callee()
+define cc1023 void @t2() {
+; CHECK: musttail call cc1023 void @t2_callee()
+ musttail call cc1023 void @t2_callee()
ret void
}
diff --git a/test/Bitcode/thinlto-function-summary.ll b/test/Bitcode/thinlto-function-summary.ll
new file mode 100644
index 000000000000..9c2f2acd6c7a
--- /dev/null
+++ b/test/Bitcode/thinlto-function-summary.ll
@@ -0,0 +1,45 @@
+; RUN: llvm-as -function-summary < %s | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=BC
+; Check for function summary block/records.
+
+; BC: <FUNCTION_SUMMARY_BLOCK
+; BC-NEXT: <PERMODULE_ENTRY
+; BC-NEXT: <PERMODULE_ENTRY
+; BC-NEXT: <PERMODULE_ENTRY
+; BC-NEXT: </FUNCTION_SUMMARY_BLOCK
+
+; RUN: llvm-as -function-summary < %s | llvm-dis | FileCheck %s
+; Check that this round-trips correctly.
+
+; ModuleID = '<stdin>'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: define i32 @foo()
+
+; Function Attrs: nounwind uwtable
+define i32 @foo() #0 {
+entry:
+ ret i32 1
+}
+
+; CHECK: define i32 @bar(i32 %x)
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %x) #0 {
+entry:
+ ret i32 %x
+}
+
+; Check an anonymous function as well, since in that case only the alias
+; ends up in the value symbol table and having a summary.
+@f = alias void (), void ()* @0 ; <void ()*> [#uses=0]
+@h = external global void ()* ; <void ()*> [#uses=0]
+
+define internal void @0() nounwind {
+entry:
+ store void()* @0, void()** @h
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
diff --git a/test/Bitcode/upgrade-subprogram.ll b/test/Bitcode/upgrade-subprogram.ll
new file mode 100644
index 000000000000..8d5c000a1480
--- /dev/null
+++ b/test/Bitcode/upgrade-subprogram.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-dis < %s.bc | FileCheck %s
+; RUN: verify-uselistorder < %s.bc
+
+; CHECK: define void @foo() !dbg [[SP:![0-9]+]]
+define void @foo() {
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+
+!llvm.dbg.cu = !{!1}
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3}, emissionKind: 1)
+!2 = !DIFile(filename: "foo.c", directory: "/path/to/dir")
+; CHECK: [[SP]] = distinct !DISubprogram
+!3 = distinct !DISubprogram(file: !2, scope: !2, line: 51, name: "foo", function: void ()* @foo, type: !4)
+!4 = !DISubroutineType(types: !{})
diff --git a/test/Bitcode/upgrade-subprogram.ll.bc b/test/Bitcode/upgrade-subprogram.ll.bc
new file mode 100644
index 000000000000..cfab5a2c76a9
--- /dev/null
+++ b/test/Bitcode/upgrade-subprogram.ll.bc
Binary files differ
diff --git a/test/Bitcode/use-list-order.ll b/test/Bitcode/use-list-order.ll
index 09ec44897e73..ec5f7eceb972 100644
--- a/test/Bitcode/use-list-order.ll
+++ b/test/Bitcode/use-list-order.ll
@@ -1,7 +1,7 @@
; RUN: verify-uselistorder < %s
@a = global [4 x i1] [i1 0, i1 1, i1 0, i1 1]
-@b = alias i1* getelementptr ([4 x i1], [4 x i1]* @a, i64 0, i64 2)
+@b = alias i1, getelementptr ([4 x i1], [4 x i1]* @a, i64 0, i64 2)
; Check use-list order of constants used by globals.
@glob1 = global i5 7
@@ -10,9 +10,9 @@
; Check use-list order between variables and aliases.
@target = global i3 zeroinitializer
-@alias1 = alias i3* @target
-@alias2 = alias i3* @target
-@alias3 = alias i3* @target
+@alias1 = alias i3, i3* @target
+@alias2 = alias i3, i3* @target
+@alias3 = alias i3, i3* @target
@var1 = global i3* @target
@var2 = global i3* @target
@var3 = global i3* @target
@@ -31,9 +31,9 @@
; Same as above, but for aliases.
@const.target = global i62 1
-@const.alias = alias i62* @const.target
-@const.alias.ptr = alias i62* @const.alias
-@const.alias.2 = alias i62* @const.target
+@const.alias = alias i62, i62* @const.target
+@const.alias.ptr = alias i62, i62* @const.alias
+@const.alias.2 = alias i62, i62* @const.target
define i64 @f(i64 %f) {
entry:
diff --git a/test/Bitcode/use-list-order2.ll b/test/Bitcode/use-list-order2.ll
new file mode 100644
index 000000000000..7de79a520210
--- /dev/null
+++ b/test/Bitcode/use-list-order2.ll
@@ -0,0 +1,57 @@
+; RUN: verify-uselistorder %s
+; XFAIL: *
+
+; Test 1
+@g1 = global i8 0
+
+declare void @llvm.donothing() nounwind readnone
+
+define void @f.no_personality1() personality i8 0 {
+ invoke void @llvm.donothing() to label %normal unwind label %exception
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+normal:
+ ret void
+}
+
+; Test 2
+@g2 = global i8 -1
+@g3 = global i8 -1
+
+define void @f.no_personality2() personality i8 -1 {
+ invoke void @llvm.donothing() to label %normal unwind label %exception
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+normal:
+ ret void
+}
+
+; Test 3
+declare void @f1() prefix i32 1
+
+define void @test1() {
+ %t1 = alloca half ; Implicit i32 1 used here.
+ %t2 = alloca float
+ ret void
+}
+
+; Test 4
+declare void @f2() prefix i32 2
+
+define void @test2(i32* %word) {
+ %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 2 monotonic monotonic
+ %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 2 acq_rel monotonic
+ ret void
+}
+
+; Test 5
+@g4 = global i32 3
+@g5 = global i32 3
+declare void @test3() prefix i32 3
+
+; Test 6
+@g6 = global i32 4
+@g7 = global i32 4
+declare void @c() prologue i32 4
diff --git a/test/Bitcode/vst-forward-declaration.ll b/test/Bitcode/vst-forward-declaration.ll
new file mode 100644
index 000000000000..599caa0150ed
--- /dev/null
+++ b/test/Bitcode/vst-forward-declaration.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=BC
+; Check for VST forward declaration record and VST function index records.
+
+; BC: <VSTOFFSET
+; BC: <FNENTRY
+; BC: <FNENTRY
+
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; Check that this round-trips correctly.
+
+; ModuleID = '<stdin>'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: define i32 @foo()
+
+; Function Attrs: nounwind uwtable
+define i32 @foo() #0 {
+entry:
+ ret i32 1
+}
+
+; CHECK: define i32 @bar(i32 %x)
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %x) #0 {
+entry:
+ ret i32 %x
+}
diff --git a/test/BugPoint/metadata.ll b/test/BugPoint/metadata.ll
index d8aca524ee28..44d109aa0077 100644
--- a/test/BugPoint/metadata.ll
+++ b/test/BugPoint/metadata.ll
@@ -6,8 +6,8 @@
; CHECK: call void @foo(), !dbg ![[LOC:[0-9]+]], !attach ![[CALL:[0-9]+]]
; CHECK: ![[LOC]] = !DILocation(line: 104, column: 105, scope: ![[SCOPE:[0-9]+]])
-; CHECK: ![[SCOPE]] = !DISubprogram(name: "test"
-; CHECK-SAME: file: ![[FILE:[0-9]+]]
+; CHECK: ![[SCOPE]] = distinct !DISubprogram(name: "test"
+; CHECK-SAME: file: ![[FILE:[0-9]+]]
; CHECK: ![[FILE]] = !DIFile(filename: "source.c", directory: "/dir")
; CHECK: ![[CALL]] = !{!"the call to foo"}
@@ -31,7 +31,7 @@ declare void @foo()
!3 = !{!"noise"}
!4 = !{!"filler"}
-!9 = !DISubprogram(name: "test", file: !15)
+!9 = distinct !DISubprogram(name: "test", file: !15)
!10 = !DILocation(line: 100, column: 101, scope: !9)
!11 = !DILocation(line: 102, column: 103, scope: !9)
!12 = !DILocation(line: 104, column: 105, scope: !9)
diff --git a/test/BugPoint/named-md.ll b/test/BugPoint/named-md.ll
new file mode 100644
index 000000000000..1fffa2cb978f
--- /dev/null
+++ b/test/BugPoint/named-md.ll
@@ -0,0 +1,39 @@
+; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crash-too-many-cus -silence-passes > /dev/null
+; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
+; RUN-DISABLE: bugpoint -disable-namedmd-remove -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crash-too-many-cus -silence-passes > /dev/null
+; RUN-DISABLE: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
+; REQUIRES: loadable_module
+
+; CHECK: !llvm.dbg.cu = !{![[FIRST:[0-9]+]], ![[SECOND:[0-9]+]]}
+; CHECK-DISABLE: !llvm.dbg.cu = !{![[FIRST:[0-9]+]], ![[SECOND:[0-9]+]],
+; CHECK-DISABLE-SAME: ![[THIRD:[0-9]+]], ![[FOURTH:[0-9]+]], ![[FIFTH:[0-9]+]]}
+!llvm.dbg.cu = !{!0, !1, !2, !3, !4, !5}
+; CHECK-NOT: !named
+; CHECK-DISABLE: !named
+!named = !{!0, !1, !2, !3, !4, !5}
+; CHECK: !llvm.module.flags = !{![[DIVERSION:[0-9]+]]}
+!llvm.module.flags = !{!6, !7}
+
+; CHECK-DAG: ![[FIRST]] = distinct !DICompileUnit(language: DW_LANG_Julia,
+; CHECK-DAG: ![[SECOND]] = distinct !DICompileUnit(language: DW_LANG_Julia,
+; CHECK-DAG: ![[DIVERSION]] = !{i32 2, !"Debug Info Version", i32 3}
+; CHECK-DAG: !DIFile(filename: "a", directory: "b")
+
+; 4 nodes survive. Due to renumbering !4 should not exist
+; CHECK-NOT: !4
+
+!0 = distinct !DICompileUnit(language: DW_LANG_Julia,
+ file: !8)
+!1 = distinct !DICompileUnit(language: DW_LANG_Julia,
+ file: !8)
+!2 = distinct !DICompileUnit(language: DW_LANG_Julia,
+ file: !8)
+!3 = distinct !DICompileUnit(language: DW_LANG_Julia,
+ file: !8)
+!4 = distinct !DICompileUnit(language: DW_LANG_Julia,
+ file: !8)
+!5 = distinct !DICompileUnit(language: DW_LANG_Julia,
+ file: !8)
+!6 = !{i32 2, !"Dwarf Version", i32 2}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !DIFile(filename: "a", directory: "b")
diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll
index 29a03b831077..72be4fe55936 100644
--- a/test/BugPoint/remove_arguments_test.ll
+++ b/test/BugPoint/remove_arguments_test.ll
@@ -2,10 +2,10 @@
; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
; REQUIRES: loadable_module
-; Test to make sure that arguments are removed from the function if they are
-; unnecessary. And clean up any types that that frees up too.
+; Test to make sure that arguments are removed from the function if they are
+; unnecessary. And clean up any types that frees up too.
-; CHECK: target triple
+; CHECK: ModuleID
; CHECK-NOT: struct.anon
%struct.anon = type { i32 }
diff --git a/test/BugPoint/replace-funcs-with-null.ll b/test/BugPoint/replace-funcs-with-null.ll
index 3433c456e90f..622f9eb67a29 100644
--- a/test/BugPoint/replace-funcs-with-null.ll
+++ b/test/BugPoint/replace-funcs-with-null.ll
@@ -3,7 +3,7 @@
; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -replace-funcs-with-null -bugpoint-crash-decl-funcs -silence-passes -safe-run-llc
; REQUIRES: loadable_module
-@foo2 = alias i32 ()* @foo
+@foo2 = alias i32 (), i32 ()* @foo
define i32 @foo() { ret i32 1 }
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index f49df542f4e5..138450ba8e02 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(LLVM_BUILD_EXAMPLES)
+ set(ENABLE_EXAMPLES 1)
+endif()
+
configure_lit_site_cfg(
${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
@@ -15,11 +19,12 @@ endif()
# Set the depends list as a variable so that it can grow conditionally.
# NOTE: Sync the substitutions in test/lit.cfg when adding to this list.
set(LLVM_TEST_DEPENDS
- llvm-config
- UnitTests
BugpointPasses
+ FileCheck
LLVMHello
+ UnitTests
bugpoint
+ count
llc
lli
lli-child-target
@@ -27,12 +32,14 @@ set(LLVM_TEST_DEPENDS
llvm-as
llvm-bcanalyzer
llvm-c-test
+ llvm-config
llvm-cov
llvm-cxxdump
llvm-diff
llvm-dis
llvm-dsymutil
llvm-dwarfdump
+ llvm-dwp
llvm-extract
llvm-lib
llvm-link
@@ -41,22 +48,22 @@ set(LLVM_TEST_DEPENDS
llvm-mcmarkup
llvm-nm
llvm-objdump
+ llvm-pdbdump
llvm-profdata
llvm-ranlib
llvm-readobj
llvm-rtdyld
llvm-size
+ llvm-split
llvm-symbolizer
llvm-tblgen
- macho-dump
- opt
- FileCheck
- count
not
- yaml-bench
- yaml2obj
obj2yaml
+ opt
+ sancov
verify-uselistorder
+ yaml-bench
+ yaml2obj
)
# If Intel JIT events are supported, depend on a tool that tests the listener.
@@ -95,6 +102,16 @@ if(TARGET ocaml_llvm)
)
endif()
+if(LLVM_BUILD_EXAMPLES)
+ list(APPEND LLVM_TEST_DEPENDS
+ Kaleidoscope-Ch3
+ Kaleidoscope-Ch4
+ Kaleidoscope-Ch5
+ Kaleidoscope-Ch6
+ Kaleidoscope-Ch7
+ )
+endif()
+
add_lit_testsuite(check-llvm "Running the LLVM regression tests"
${CMAKE_CURRENT_BINARY_DIR}
PARAMS llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
diff --git a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
index b075573cc674..5eb455f3a22c 100644
--- a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
+++ b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
@@ -3,7 +3,7 @@
; Bug 20598
-define void @test() #0 {
+define void @test() #0 !dbg !4 {
entry:
br label %for.body, !dbg !39
@@ -44,39 +44,39 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!36, !37}
!llvm.ident = !{!38}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "", line: 140, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 141, file: !1, scope: !1, type: !6, function: void ()* @test, variables: !12)
+!4 = distinct !DISubprogram(name: "", line: 140, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 141, file: !1, scope: !1, type: !6, variables: !12)
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8}
!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
!9 = !DIDerivedType(tag: DW_TAG_typedef, line: 30, file: !1, baseType: !11)
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!12 = !{!13, !14, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 140, arg: 1, scope: !4, file: !1, type: !8)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
+!13 = !DILocalVariable(name: "", line: 140, arg: 1, scope: !4, file: !1, type: !8)
+!14 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
!15 = !DIDerivedType(tag: DW_TAG_typedef, line: 183, file: !1, baseType: !17)
!17 = !DIBasicType(tag: DW_TAG_base_type, size: 64, align: 64, encoding: DW_ATE_signed)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15)
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15)
-!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
-!30 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
-!31 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
-!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
-!33 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
-!34 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 145, scope: !4, file: !1, type: !8)
-!35 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 146, scope: !4, file: !1, type: !11)
+!18 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!19 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!20 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!21 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!22 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!23 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!24 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!25 = !DILocalVariable(name: "", line: 143, scope: !4, file: !1, type: !15)
+!26 = !DILocalVariable(name: "", line: 143, scope: !4, file: !1, type: !15)
+!27 = !DILocalVariable(name: "", line: 143, scope: !4, file: !1, type: !15)
+!28 = !DILocalVariable(name: "", line: 143, scope: !4, file: !1, type: !15)
+!29 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15)
+!30 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15)
+!31 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15)
+!32 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15)
+!33 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15)
+!34 = !DILocalVariable(name: "", line: 145, scope: !4, file: !1, type: !8)
+!35 = !DILocalVariable(name: "", line: 146, scope: !4, file: !1, type: !11)
!36 = !{i32 2, !"Dwarf Version", i32 4}
!37 = !{i32 2, !"Debug Info Version", i32 3}
!38 = !{!"clang version 3.6.0 "}
diff --git a/test/CodeGen/AArch64/aarch64-addv.ll b/test/CodeGen/AArch64/aarch64-addv.ll
new file mode 100644
index 000000000000..ca374eea28e7
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-addv.ll
@@ -0,0 +1,98 @@
+; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic < %s | FileCheck %s
+
+define i8 @add_B(<16 x i8>* %arr) {
+; CHECK-LABEL: add_B
+; CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b
+ %bin.rdx = load <16 x i8>, <16 x i8>* %arr
+ %rdx.shuf0 = shufflevector <16 x i8> %bin.rdx, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx0 = add <16 x i8> %bin.rdx, %rdx.shuf0
+ %rdx.shuf = shufflevector <16 x i8> %bin.rdx0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef >
+ %bin.rdx11 = add <16 x i8> %bin.rdx0, %rdx.shuf
+ %rdx.shuf12 = shufflevector <16 x i8> %bin.rdx11, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
+ %bin.rdx13 = add <16 x i8> %bin.rdx11, %rdx.shuf12
+ %rdx.shuf13 = shufflevector <16 x i8> %bin.rdx13, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
+ %bin.rdx14 = add <16 x i8> %bin.rdx13, %rdx.shuf13
+ %r = extractelement <16 x i8> %bin.rdx14, i32 0
+ ret i8 %r
+}
+
+define i16 @add_H(<8 x i16>* %arr) {
+; CHECK-LABEL: add_H
+; CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h
+ %bin.rdx = load <8 x i16>, <8 x i16>* %arr
+ %rdx.shuf = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef>
+ %bin.rdx11 = add <8 x i16> %bin.rdx, %rdx.shuf
+ %rdx.shuf12 = shufflevector <8 x i16> %bin.rdx11, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx13 = add <8 x i16> %bin.rdx11, %rdx.shuf12
+ %rdx.shuf13 = shufflevector <8 x i16> %bin.rdx13, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx14 = add <8 x i16> %bin.rdx13, %rdx.shuf13
+ %r = extractelement <8 x i16> %bin.rdx14, i32 0
+ ret i16 %r
+}
+
+define i32 @add_S( <4 x i32>* %arr) {
+; CHECK-LABEL: add_S
+; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
+ %bin.rdx = load <4 x i32>, <4 x i32>* %arr
+ %rdx.shuf = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+ %bin.rdx11 = add <4 x i32> %bin.rdx, %rdx.shuf
+ %rdx.shuf12 = shufflevector <4 x i32> %bin.rdx11, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ %bin.rdx13 = add <4 x i32> %bin.rdx11, %rdx.shuf12
+ %r = extractelement <4 x i32> %bin.rdx13, i32 0
+ ret i32 %r
+}
+
+define i64 @add_D(<2 x i64>* %arr) {
+; CHECK-LABEL: add_D
+; CHECK-NOT: addv
+ %bin.rdx = load <2 x i64>, <2 x i64>* %arr
+ %rdx.shuf0 = shufflevector <2 x i64> %bin.rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+ %bin.rdx0 = add <2 x i64> %bin.rdx, %rdx.shuf0
+ %r = extractelement <2 x i64> %bin.rdx0, i32 0
+ ret i64 %r
+}
+
+define i32 @oversized_ADDV_256(i8* noalias nocapture readonly %arg1, i8* noalias nocapture readonly %arg2) {
+; CHECK-LABEL: oversized_ADDV_256
+; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+ %0 = bitcast i8* %arg1 to <8 x i8>*
+ %1 = load <8 x i8>, <8 x i8>* %0, align 1
+ %2 = zext <8 x i8> %1 to <8 x i32>
+ %3 = bitcast i8* %arg2 to <8 x i8>*
+ %4 = load <8 x i8>, <8 x i8>* %3, align 1
+ %5 = zext <8 x i8> %4 to <8 x i32>
+ %6 = sub nsw <8 x i32> %2, %5
+ %7 = icmp slt <8 x i32> %6, zeroinitializer
+ %8 = sub nsw <8 x i32> zeroinitializer, %6
+ %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
+ %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx = add <8 x i32> %9, %rdx.shuf
+ %rdx.shuf1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx2 = add <8 x i32> %bin.rdx, %rdx.shuf1
+ %rdx.shuf3 = shufflevector <8 x i32> %bin.rdx2, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx4 = add <8 x i32> %bin.rdx2, %rdx.shuf3
+ %10 = extractelement <8 x i32> %bin.rdx4, i32 0
+ ret i32 %10
+}
+
+define i32 @oversized_ADDV_512(<16 x i32>* %arr) {
+; CHECK-LABEL: oversized_ADDV_512
+; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
+ %bin.rdx = load <16 x i32>, <16 x i32>* %arr
+
+ %rdx.shuf0 = shufflevector <16 x i32> %bin.rdx, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx0 = add <16 x i32> %bin.rdx, %rdx.shuf0
+
+ %rdx.shuf = shufflevector <16 x i32> %bin.rdx0, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef >
+ %bin.rdx11 = add <16 x i32> %bin.rdx0, %rdx.shuf
+
+ %rdx.shuf12 = shufflevector <16 x i32> %bin.rdx11, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
+ %bin.rdx13 = add <16 x i32> %bin.rdx11, %rdx.shuf12
+
+ %rdx.shuf13 = shufflevector <16 x i32> %bin.rdx13, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
+ %bin.rdx14 = add <16 x i32> %bin.rdx13, %rdx.shuf13
+
+ %r = extractelement <16 x i32> %bin.rdx14, i32 0
+ ret i32 %r
+}
diff --git a/test/CodeGen/AArch64/aarch64-deferred-spilling.ll b/test/CodeGen/AArch64/aarch64-deferred-spilling.ll
new file mode 100644
index 000000000000..7accdced7d44
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-deferred-spilling.ll
@@ -0,0 +1,514 @@
+;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=true -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=DEFERRED
+;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=false -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=REGULAR
+
+; Check that we do not end up with useless spill code.
+;
+; Move to the basic block we are interested in.
+;
+; CHECK: // %if.then.120
+;
+; REGULAR: str w21, [sp, #[[OFFSET:[0-9]+]]] // 4-byte Folded Spill
+; Check that w21 wouldn't need to be spilled since it is never reused.
+; REGULAR-NOT: {{[wx]}}21{{,?}}
+;
+; Check that w22 is used to carry a value through the call.
+; DEFERRED-NOT: str {{[wx]}}22,
+; DEFERRED: mov {{[wx]}}22,
+; DEFERRED-NOT: str {{[wx]}}22,
+;
+; CHECK: bl fprintf
+;
+; DEFERRED-NOT: ldr {{[wx]}}22,
+; DEFERRED: mov {{[wx][0-9]+}}, {{[wx]}}22
+; DEFERRED-NOT: ldr {{[wx]}}22,
+;
+; REGULAR-NOT: {{[wx]}}21{{,?}}
+; REGULAR: ldr w21, [sp, #[[OFFSET]]] // 4-byte Folded Reload
+;
+; End of the basic block we are interested in.
+; CHECK: b
+; CHECK: {{[^:]+}}: // %sw.bb.123
+
+%struct.__sFILE = type { i8*, i32, i32, i32, i32, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sbuf = type { i8*, i64 }
+%struct.DState = type { %struct.bz_stream*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
+%struct.bz_stream = type { i8*, i32, i32, i32, i8*, i32, i32, i32, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8* }
+
+@__sF = external global [0 x %struct.__sFILE], align 8
+@.str = private unnamed_addr constant [20 x i8] c"\0A [%d: stuff+mf \00", align 1
+
+declare i32 @fprintf(%struct.__sFILE* nocapture, i8* nocapture readonly, ...)
+
+declare void @bar(i32)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
+define i32 @foo(%struct.DState* %s) {
+entry:
+ %state = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 1
+ %tmp = load i32, i32* %state, align 4
+ %cmp = icmp eq i32 %tmp, 10
+ %save_i = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 40
+ br i1 %cmp, label %if.end.thread, label %if.end
+
+if.end.thread: ; preds = %entry
+ %save_j = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41
+ %save_t = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42
+ %save_alphaSize = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43
+ %save_nGroups = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44
+ %save_nSelectors = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45
+ %save_EOB = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46
+ %save_groupNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47
+ %save_groupPos = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48
+ %save_nextSym = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49
+ %save_nblockMAX = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50
+ %save_nblock = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51
+ %save_es = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52
+ %save_N = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53
+ %save_curr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54
+ %save_zt = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55
+ %save_zn = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56
+ %save_zvec = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57
+ %save_zj = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58
+ %tmp1 = bitcast i32* %save_i to i8*
+ call void @llvm.memset.p0i8.i64(i8* %tmp1, i8 0, i64 108, i32 4, i1 false)
+ br label %sw.default
+
+if.end: ; preds = %entry
+ %.pre = load i32, i32* %save_i, align 4
+ %save_j3.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41
+ %.pre406 = load i32, i32* %save_j3.phi.trans.insert, align 4
+ %save_t4.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42
+ %.pre407 = load i32, i32* %save_t4.phi.trans.insert, align 4
+ %save_alphaSize5.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43
+ %.pre408 = load i32, i32* %save_alphaSize5.phi.trans.insert, align 4
+ %save_nGroups6.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44
+ %.pre409 = load i32, i32* %save_nGroups6.phi.trans.insert, align 4
+ %save_nSelectors7.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45
+ %.pre410 = load i32, i32* %save_nSelectors7.phi.trans.insert, align 4
+ %save_EOB8.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46
+ %.pre411 = load i32, i32* %save_EOB8.phi.trans.insert, align 4
+ %save_groupNo9.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47
+ %.pre412 = load i32, i32* %save_groupNo9.phi.trans.insert, align 4
+ %save_groupPos10.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48
+ %.pre413 = load i32, i32* %save_groupPos10.phi.trans.insert, align 4
+ %save_nextSym11.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49
+ %.pre414 = load i32, i32* %save_nextSym11.phi.trans.insert, align 4
+ %save_nblockMAX12.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50
+ %.pre415 = load i32, i32* %save_nblockMAX12.phi.trans.insert, align 4
+ %save_nblock13.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51
+ %.pre416 = load i32, i32* %save_nblock13.phi.trans.insert, align 4
+ %save_es14.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52
+ %.pre417 = load i32, i32* %save_es14.phi.trans.insert, align 4
+ %save_N15.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53
+ %.pre418 = load i32, i32* %save_N15.phi.trans.insert, align 4
+ %save_curr16.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54
+ %.pre419 = load i32, i32* %save_curr16.phi.trans.insert, align 4
+ %save_zt17.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55
+ %.pre420 = load i32, i32* %save_zt17.phi.trans.insert, align 4
+ %save_zn18.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56
+ %.pre421 = load i32, i32* %save_zn18.phi.trans.insert, align 4
+ %save_zvec19.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57
+ %.pre422 = load i32, i32* %save_zvec19.phi.trans.insert, align 4
+ %save_zj20.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58
+ %.pre423 = load i32, i32* %save_zj20.phi.trans.insert, align 4
+ switch i32 %tmp, label %sw.default [
+ i32 13, label %sw.bb
+ i32 14, label %if.end.sw.bb.65_crit_edge
+ i32 25, label %if.end.sw.bb.123_crit_edge
+ ]
+
+if.end.sw.bb.123_crit_edge: ; preds = %if.end
+ %.pre433 = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
+ br label %sw.bb.123
+
+if.end.sw.bb.65_crit_edge: ; preds = %if.end
+ %bsLive69.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
+ %.pre426 = load i32, i32* %bsLive69.phi.trans.insert, align 4
+ br label %sw.bb.65
+
+sw.bb: ; preds = %if.end
+ %sunkaddr = ptrtoint %struct.DState* %s to i64
+ %sunkaddr485 = add i64 %sunkaddr, 8
+ %sunkaddr486 = inttoptr i64 %sunkaddr485 to i32*
+ store i32 13, i32* %sunkaddr486, align 4
+ %bsLive = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
+ %tmp2 = load i32, i32* %bsLive, align 4
+ %cmp28.400 = icmp sgt i32 %tmp2, 7
+ br i1 %cmp28.400, label %sw.bb.if.then.29_crit_edge, label %if.end.33.lr.ph
+
+sw.bb.if.then.29_crit_edge: ; preds = %sw.bb
+ %sunkaddr487 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr488 = add i64 %sunkaddr487, 32
+ %sunkaddr489 = inttoptr i64 %sunkaddr488 to i32*
+ %.pre425 = load i32, i32* %sunkaddr489, align 4
+ br label %if.then.29
+
+if.end.33.lr.ph: ; preds = %sw.bb
+ %tmp3 = bitcast %struct.DState* %s to %struct.bz_stream**
+ %.pre424 = load %struct.bz_stream*, %struct.bz_stream** %tmp3, align 8
+ %avail_in.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre424, i64 0, i32 1
+ %.pre430 = load i32, i32* %avail_in.phi.trans.insert, align 4
+ %tmp4 = add i32 %.pre430, -1
+ br label %if.end.33
+
+if.then.29: ; preds = %while.body.backedge, %sw.bb.if.then.29_crit_edge
+ %tmp5 = phi i32 [ %.pre425, %sw.bb.if.then.29_crit_edge ], [ %or, %while.body.backedge ]
+ %.lcssa393 = phi i32 [ %tmp2, %sw.bb.if.then.29_crit_edge ], [ %add, %while.body.backedge ]
+ %sub = add nsw i32 %.lcssa393, -8
+ %shr = lshr i32 %tmp5, %sub
+ %and = and i32 %shr, 255
+ %sunkaddr491 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr492 = add i64 %sunkaddr491, 36
+ %sunkaddr493 = inttoptr i64 %sunkaddr492 to i32*
+ store i32 %sub, i32* %sunkaddr493, align 4
+ %blockSize100k = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 9
+ store i32 %and, i32* %blockSize100k, align 4
+ %and.off = add nsw i32 %and, -49
+ %tmp6 = icmp ugt i32 %and.off, 8
+ br i1 %tmp6, label %save_state_and_return, label %if.end.62
+
+if.end.33: ; preds = %while.body.backedge, %if.end.33.lr.ph
+ %lsr.iv482 = phi i32 [ %tmp4, %if.end.33.lr.ph ], [ %lsr.iv.next483, %while.body.backedge ]
+ %tmp7 = phi i32 [ %tmp2, %if.end.33.lr.ph ], [ %add, %while.body.backedge ]
+ %cmp35 = icmp eq i32 %lsr.iv482, -1
+ br i1 %cmp35, label %save_state_and_return, label %if.end.37
+
+if.end.37: ; preds = %if.end.33
+ %tmp8 = bitcast %struct.bz_stream* %.pre424 to i8**
+ %sunkaddr494 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr495 = add i64 %sunkaddr494, 32
+ %sunkaddr496 = inttoptr i64 %sunkaddr495 to i32*
+ %tmp9 = load i32, i32* %sunkaddr496, align 4
+ %shl = shl i32 %tmp9, 8
+ %tmp10 = load i8*, i8** %tmp8, align 8
+ %tmp11 = load i8, i8* %tmp10, align 1
+ %conv = zext i8 %tmp11 to i32
+ %or = or i32 %conv, %shl
+ store i32 %or, i32* %sunkaddr496, align 4
+ %add = add nsw i32 %tmp7, 8
+ %sunkaddr497 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr498 = add i64 %sunkaddr497, 36
+ %sunkaddr499 = inttoptr i64 %sunkaddr498 to i32*
+ store i32 %add, i32* %sunkaddr499, align 4
+ %incdec.ptr = getelementptr inbounds i8, i8* %tmp10, i64 1
+ store i8* %incdec.ptr, i8** %tmp8, align 8
+ %sunkaddr500 = ptrtoint %struct.bz_stream* %.pre424 to i64
+ %sunkaddr501 = add i64 %sunkaddr500, 8
+ %sunkaddr502 = inttoptr i64 %sunkaddr501 to i32*
+ store i32 %lsr.iv482, i32* %sunkaddr502, align 4
+ %sunkaddr503 = ptrtoint %struct.bz_stream* %.pre424 to i64
+ %sunkaddr504 = add i64 %sunkaddr503, 12
+ %sunkaddr505 = inttoptr i64 %sunkaddr504 to i32*
+ %tmp12 = load i32, i32* %sunkaddr505, align 4
+ %inc = add i32 %tmp12, 1
+ store i32 %inc, i32* %sunkaddr505, align 4
+ %cmp49 = icmp eq i32 %inc, 0
+ br i1 %cmp49, label %if.then.51, label %while.body.backedge
+
+if.then.51: ; preds = %if.end.37
+ %sunkaddr506 = ptrtoint %struct.bz_stream* %.pre424 to i64
+ %sunkaddr507 = add i64 %sunkaddr506, 16
+ %sunkaddr508 = inttoptr i64 %sunkaddr507 to i32*
+ %tmp13 = load i32, i32* %sunkaddr508, align 4
+ %inc53 = add i32 %tmp13, 1
+ store i32 %inc53, i32* %sunkaddr508, align 4
+ br label %while.body.backedge
+
+while.body.backedge: ; preds = %if.then.51, %if.end.37
+ %lsr.iv.next483 = add i32 %lsr.iv482, -1
+ %cmp28 = icmp sgt i32 %add, 7
+ br i1 %cmp28, label %if.then.29, label %if.end.33
+
+if.end.62: ; preds = %if.then.29
+ %sub64 = add nsw i32 %and, -48
+ %sunkaddr509 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr510 = add i64 %sunkaddr509, 40
+ %sunkaddr511 = inttoptr i64 %sunkaddr510 to i32*
+ store i32 %sub64, i32* %sunkaddr511, align 4
+ br label %sw.bb.65
+
+sw.bb.65: ; preds = %if.end.62, %if.end.sw.bb.65_crit_edge
+ %bsLive69.pre-phi = phi i32* [ %bsLive69.phi.trans.insert, %if.end.sw.bb.65_crit_edge ], [ %bsLive, %if.end.62 ]
+ %tmp14 = phi i32 [ %.pre426, %if.end.sw.bb.65_crit_edge ], [ %sub, %if.end.62 ]
+ %sunkaddr512 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr513 = add i64 %sunkaddr512, 8
+ %sunkaddr514 = inttoptr i64 %sunkaddr513 to i32*
+ store i32 14, i32* %sunkaddr514, align 4
+ %cmp70.397 = icmp sgt i32 %tmp14, 7
+ br i1 %cmp70.397, label %if.then.72, label %if.end.82.lr.ph
+
+if.end.82.lr.ph: ; preds = %sw.bb.65
+ %tmp15 = bitcast %struct.DState* %s to %struct.bz_stream**
+ %.pre427 = load %struct.bz_stream*, %struct.bz_stream** %tmp15, align 8
+ %avail_in84.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre427, i64 0, i32 1
+ %.pre431 = load i32, i32* %avail_in84.phi.trans.insert, align 4
+ %tmp16 = add i32 %.pre431, -1
+ br label %if.end.82
+
+if.then.72: ; preds = %while.body.68.backedge, %sw.bb.65
+ %.lcssa390 = phi i32 [ %tmp14, %sw.bb.65 ], [ %add97, %while.body.68.backedge ]
+ %sub76 = add nsw i32 %.lcssa390, -8
+ %sunkaddr516 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr517 = add i64 %sunkaddr516, 36
+ %sunkaddr518 = inttoptr i64 %sunkaddr517 to i32*
+ store i32 %sub76, i32* %sunkaddr518, align 4
+ %currBlockNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 11
+ %tmp17 = load i32, i32* %currBlockNo, align 4
+ %inc117 = add nsw i32 %tmp17, 1
+ store i32 %inc117, i32* %currBlockNo, align 4
+ %verbosity = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 12
+ %tmp18 = load i32, i32* %verbosity, align 4
+ %cmp118 = icmp sgt i32 %tmp18, 1
+ br i1 %cmp118, label %if.then.120, label %sw.bb.123, !prof !0
+
+if.end.82: ; preds = %while.body.68.backedge, %if.end.82.lr.ph
+ %lsr.iv480 = phi i32 [ %tmp16, %if.end.82.lr.ph ], [ %lsr.iv.next481, %while.body.68.backedge ]
+ %tmp19 = phi i32 [ %tmp14, %if.end.82.lr.ph ], [ %add97, %while.body.68.backedge ]
+ %cmp85 = icmp eq i32 %lsr.iv480, -1
+ br i1 %cmp85, label %save_state_and_return, label %if.end.88
+
+if.end.88: ; preds = %if.end.82
+ %tmp20 = bitcast %struct.bz_stream* %.pre427 to i8**
+ %sunkaddr519 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr520 = add i64 %sunkaddr519, 32
+ %sunkaddr521 = inttoptr i64 %sunkaddr520 to i32*
+ %tmp21 = load i32, i32* %sunkaddr521, align 4
+ %shl90 = shl i32 %tmp21, 8
+ %tmp22 = load i8*, i8** %tmp20, align 8
+ %tmp23 = load i8, i8* %tmp22, align 1
+ %conv93 = zext i8 %tmp23 to i32
+ %or94 = or i32 %conv93, %shl90
+ store i32 %or94, i32* %sunkaddr521, align 4
+ %add97 = add nsw i32 %tmp19, 8
+ %sunkaddr522 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr523 = add i64 %sunkaddr522, 36
+ %sunkaddr524 = inttoptr i64 %sunkaddr523 to i32*
+ store i32 %add97, i32* %sunkaddr524, align 4
+ %incdec.ptr100 = getelementptr inbounds i8, i8* %tmp22, i64 1
+ store i8* %incdec.ptr100, i8** %tmp20, align 8
+ %sunkaddr525 = ptrtoint %struct.bz_stream* %.pre427 to i64
+ %sunkaddr526 = add i64 %sunkaddr525, 8
+ %sunkaddr527 = inttoptr i64 %sunkaddr526 to i32*
+ store i32 %lsr.iv480, i32* %sunkaddr527, align 4
+ %sunkaddr528 = ptrtoint %struct.bz_stream* %.pre427 to i64
+ %sunkaddr529 = add i64 %sunkaddr528, 12
+ %sunkaddr530 = inttoptr i64 %sunkaddr529 to i32*
+ %tmp24 = load i32, i32* %sunkaddr530, align 4
+ %inc106 = add i32 %tmp24, 1
+ store i32 %inc106, i32* %sunkaddr530, align 4
+ %cmp109 = icmp eq i32 %inc106, 0
+ br i1 %cmp109, label %if.then.111, label %while.body.68.backedge
+
+if.then.111: ; preds = %if.end.88
+ %sunkaddr531 = ptrtoint %struct.bz_stream* %.pre427 to i64
+ %sunkaddr532 = add i64 %sunkaddr531, 16
+ %sunkaddr533 = inttoptr i64 %sunkaddr532 to i32*
+ %tmp25 = load i32, i32* %sunkaddr533, align 4
+ %inc114 = add i32 %tmp25, 1
+ store i32 %inc114, i32* %sunkaddr533, align 4
+ br label %while.body.68.backedge
+
+while.body.68.backedge: ; preds = %if.then.111, %if.end.88
+ %lsr.iv.next481 = add i32 %lsr.iv480, -1
+ %cmp70 = icmp sgt i32 %add97, 7
+ br i1 %cmp70, label %if.then.72, label %if.end.82
+
+if.then.120: ; preds = %if.then.72
+ %call = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* getelementptr inbounds ([0 x %struct.__sFILE], [0 x %struct.__sFILE]* @__sF, i64 0, i64 2), i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str, i64 0, i64 0), i32 %inc117)
+ br label %sw.bb.123
+
+sw.bb.123: ; preds = %if.then.120, %if.then.72, %if.end.sw.bb.123_crit_edge
+ %bsLive127.pre-phi = phi i32* [ %.pre433, %if.end.sw.bb.123_crit_edge ], [ %bsLive69.pre-phi, %if.then.72 ], [ %bsLive69.pre-phi, %if.then.120 ]
+ %sunkaddr534 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr535 = add i64 %sunkaddr534, 8
+ %sunkaddr536 = inttoptr i64 %sunkaddr535 to i32*
+ store i32 25, i32* %sunkaddr536, align 4
+ %tmp26 = load i32, i32* %bsLive127.pre-phi, align 4
+ %cmp128.395 = icmp sgt i32 %tmp26, 7
+ br i1 %cmp128.395, label %sw.bb.123.if.then.130_crit_edge, label %if.end.140.lr.ph
+
+sw.bb.123.if.then.130_crit_edge: ; preds = %sw.bb.123
+ %sunkaddr537 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr538 = add i64 %sunkaddr537, 32
+ %sunkaddr539 = inttoptr i64 %sunkaddr538 to i32*
+ %.pre429 = load i32, i32* %sunkaddr539, align 4
+ br label %if.then.130
+
+if.end.140.lr.ph: ; preds = %sw.bb.123
+ %tmp27 = bitcast %struct.DState* %s to %struct.bz_stream**
+ %.pre428 = load %struct.bz_stream*, %struct.bz_stream** %tmp27, align 8
+ %avail_in142.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre428, i64 0, i32 1
+ %.pre432 = load i32, i32* %avail_in142.phi.trans.insert, align 4
+ %tmp28 = add i32 %.pre432, -1
+ br label %if.end.140
+
+if.then.130: ; preds = %while.body.126.backedge, %sw.bb.123.if.then.130_crit_edge
+ %tmp29 = phi i32 [ %.pre429, %sw.bb.123.if.then.130_crit_edge ], [ %or152, %while.body.126.backedge ]
+ %.lcssa = phi i32 [ %tmp26, %sw.bb.123.if.then.130_crit_edge ], [ %add155, %while.body.126.backedge ]
+ %sub134 = add nsw i32 %.lcssa, -8
+ %shr135 = lshr i32 %tmp29, %sub134
+ store i32 %sub134, i32* %bsLive127.pre-phi, align 4
+ %origPtr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 13
+ %tmp30 = load i32, i32* %origPtr, align 4
+ %shl175 = shl i32 %tmp30, 8
+ %conv176 = and i32 %shr135, 255
+ %or177 = or i32 %shl175, %conv176
+ store i32 %or177, i32* %origPtr, align 4
+ %nInUse = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 27
+ %tmp31 = load i32, i32* %nInUse, align 4
+ %add179 = add nsw i32 %tmp31, 2
+ br label %save_state_and_return
+
+if.end.140: ; preds = %while.body.126.backedge, %if.end.140.lr.ph
+ %lsr.iv = phi i32 [ %tmp28, %if.end.140.lr.ph ], [ %lsr.iv.next, %while.body.126.backedge ]
+ %tmp32 = phi i32 [ %tmp26, %if.end.140.lr.ph ], [ %add155, %while.body.126.backedge ]
+ %cmp143 = icmp eq i32 %lsr.iv, -1
+ br i1 %cmp143, label %save_state_and_return, label %if.end.146
+
+if.end.146: ; preds = %if.end.140
+ %tmp33 = bitcast %struct.bz_stream* %.pre428 to i8**
+ %sunkaddr541 = ptrtoint %struct.DState* %s to i64
+ %sunkaddr542 = add i64 %sunkaddr541, 32
+ %sunkaddr543 = inttoptr i64 %sunkaddr542 to i32*
+ %tmp34 = load i32, i32* %sunkaddr543, align 4
+ %shl148 = shl i32 %tmp34, 8
+ %tmp35 = load i8*, i8** %tmp33, align 8
+ %tmp36 = load i8, i8* %tmp35, align 1
+ %conv151 = zext i8 %tmp36 to i32
+ %or152 = or i32 %conv151, %shl148
+ store i32 %or152, i32* %sunkaddr543, align 4
+ %add155 = add nsw i32 %tmp32, 8
+ store i32 %add155, i32* %bsLive127.pre-phi, align 4
+ %incdec.ptr158 = getelementptr inbounds i8, i8* %tmp35, i64 1
+ store i8* %incdec.ptr158, i8** %tmp33, align 8
+ %sunkaddr544 = ptrtoint %struct.bz_stream* %.pre428 to i64
+ %sunkaddr545 = add i64 %sunkaddr544, 8
+ %sunkaddr546 = inttoptr i64 %sunkaddr545 to i32*
+ store i32 %lsr.iv, i32* %sunkaddr546, align 4
+ %sunkaddr547 = ptrtoint %struct.bz_stream* %.pre428 to i64
+ %sunkaddr548 = add i64 %sunkaddr547, 12
+ %sunkaddr549 = inttoptr i64 %sunkaddr548 to i32*
+ %tmp37 = load i32, i32* %sunkaddr549, align 4
+ %inc164 = add i32 %tmp37, 1
+ store i32 %inc164, i32* %sunkaddr549, align 4
+ %cmp167 = icmp eq i32 %inc164, 0
+ br i1 %cmp167, label %if.then.169, label %while.body.126.backedge
+
+if.then.169: ; preds = %if.end.146
+ %sunkaddr550 = ptrtoint %struct.bz_stream* %.pre428 to i64
+ %sunkaddr551 = add i64 %sunkaddr550, 16
+ %sunkaddr552 = inttoptr i64 %sunkaddr551 to i32*
+ %tmp38 = load i32, i32* %sunkaddr552, align 4
+ %inc172 = add i32 %tmp38, 1
+ store i32 %inc172, i32* %sunkaddr552, align 4
+ br label %while.body.126.backedge
+
+while.body.126.backedge: ; preds = %if.then.169, %if.end.146
+ %lsr.iv.next = add i32 %lsr.iv, -1
+ %cmp128 = icmp sgt i32 %add155, 7
+ br i1 %cmp128, label %if.then.130, label %if.end.140
+
+sw.default: ; preds = %if.end, %if.end.thread
+ %tmp39 = phi i32 [ 0, %if.end.thread ], [ %.pre, %if.end ]
+ %tmp40 = phi i32 [ 0, %if.end.thread ], [ %.pre406, %if.end ]
+ %tmp41 = phi i32 [ 0, %if.end.thread ], [ %.pre407, %if.end ]
+ %tmp42 = phi i32 [ 0, %if.end.thread ], [ %.pre408, %if.end ]
+ %tmp43 = phi i32 [ 0, %if.end.thread ], [ %.pre409, %if.end ]
+ %tmp44 = phi i32 [ 0, %if.end.thread ], [ %.pre410, %if.end ]
+ %tmp45 = phi i32 [ 0, %if.end.thread ], [ %.pre411, %if.end ]
+ %tmp46 = phi i32 [ 0, %if.end.thread ], [ %.pre412, %if.end ]
+ %tmp47 = phi i32 [ 0, %if.end.thread ], [ %.pre413, %if.end ]
+ %tmp48 = phi i32 [ 0, %if.end.thread ], [ %.pre414, %if.end ]
+ %tmp49 = phi i32 [ 0, %if.end.thread ], [ %.pre415, %if.end ]
+ %tmp50 = phi i32 [ 0, %if.end.thread ], [ %.pre416, %if.end ]
+ %tmp51 = phi i32 [ 0, %if.end.thread ], [ %.pre417, %if.end ]
+ %tmp52 = phi i32 [ 0, %if.end.thread ], [ %.pre418, %if.end ]
+ %tmp53 = phi i32 [ 0, %if.end.thread ], [ %.pre419, %if.end ]
+ %tmp54 = phi i32 [ 0, %if.end.thread ], [ %.pre420, %if.end ]
+ %tmp55 = phi i32 [ 0, %if.end.thread ], [ %.pre421, %if.end ]
+ %tmp56 = phi i32 [ 0, %if.end.thread ], [ %.pre422, %if.end ]
+ %tmp57 = phi i32 [ 0, %if.end.thread ], [ %.pre423, %if.end ]
+ %save_j3.pre-phi469 = phi i32* [ %save_j, %if.end.thread ], [ %save_j3.phi.trans.insert, %if.end ]
+ %save_t4.pre-phi467 = phi i32* [ %save_t, %if.end.thread ], [ %save_t4.phi.trans.insert, %if.end ]
+ %save_alphaSize5.pre-phi465 = phi i32* [ %save_alphaSize, %if.end.thread ], [ %save_alphaSize5.phi.trans.insert, %if.end ]
+ %save_nGroups6.pre-phi463 = phi i32* [ %save_nGroups, %if.end.thread ], [ %save_nGroups6.phi.trans.insert, %if.end ]
+ %save_nSelectors7.pre-phi461 = phi i32* [ %save_nSelectors, %if.end.thread ], [ %save_nSelectors7.phi.trans.insert, %if.end ]
+ %save_EOB8.pre-phi459 = phi i32* [ %save_EOB, %if.end.thread ], [ %save_EOB8.phi.trans.insert, %if.end ]
+ %save_groupNo9.pre-phi457 = phi i32* [ %save_groupNo, %if.end.thread ], [ %save_groupNo9.phi.trans.insert, %if.end ]
+ %save_groupPos10.pre-phi455 = phi i32* [ %save_groupPos, %if.end.thread ], [ %save_groupPos10.phi.trans.insert, %if.end ]
+ %save_nextSym11.pre-phi453 = phi i32* [ %save_nextSym, %if.end.thread ], [ %save_nextSym11.phi.trans.insert, %if.end ]
+ %save_nblockMAX12.pre-phi451 = phi i32* [ %save_nblockMAX, %if.end.thread ], [ %save_nblockMAX12.phi.trans.insert, %if.end ]
+ %save_nblock13.pre-phi449 = phi i32* [ %save_nblock, %if.end.thread ], [ %save_nblock13.phi.trans.insert, %if.end ]
+ %save_es14.pre-phi447 = phi i32* [ %save_es, %if.end.thread ], [ %save_es14.phi.trans.insert, %if.end ]
+ %save_N15.pre-phi445 = phi i32* [ %save_N, %if.end.thread ], [ %save_N15.phi.trans.insert, %if.end ]
+ %save_curr16.pre-phi443 = phi i32* [ %save_curr, %if.end.thread ], [ %save_curr16.phi.trans.insert, %if.end ]
+ %save_zt17.pre-phi441 = phi i32* [ %save_zt, %if.end.thread ], [ %save_zt17.phi.trans.insert, %if.end ]
+ %save_zn18.pre-phi439 = phi i32* [ %save_zn, %if.end.thread ], [ %save_zn18.phi.trans.insert, %if.end ]
+ %save_zvec19.pre-phi437 = phi i32* [ %save_zvec, %if.end.thread ], [ %save_zvec19.phi.trans.insert, %if.end ]
+ %save_zj20.pre-phi435 = phi i32* [ %save_zj, %if.end.thread ], [ %save_zj20.phi.trans.insert, %if.end ]
+ tail call void @bar(i32 4001)
+ br label %save_state_and_return
+
+save_state_and_return: ; preds = %sw.default, %if.end.140, %if.then.130, %if.end.82, %if.end.33, %if.then.29
+ %tmp58 = phi i32 [ %tmp39, %sw.default ], [ %.pre, %if.then.29 ], [ %.pre, %if.then.130 ], [ %.pre, %if.end.140 ], [ %.pre, %if.end.82 ], [ %.pre, %if.end.33 ]
+ %tmp59 = phi i32 [ %tmp40, %sw.default ], [ %.pre406, %if.then.29 ], [ %.pre406, %if.then.130 ], [ %.pre406, %if.end.140 ], [ %.pre406, %if.end.82 ], [ %.pre406, %if.end.33 ]
+ %tmp60 = phi i32 [ %tmp41, %sw.default ], [ %.pre407, %if.then.29 ], [ %.pre407, %if.then.130 ], [ %.pre407, %if.end.140 ], [ %.pre407, %if.end.82 ], [ %.pre407, %if.end.33 ]
+ %tmp61 = phi i32 [ %tmp43, %sw.default ], [ %.pre409, %if.then.29 ], [ %.pre409, %if.then.130 ], [ %.pre409, %if.end.140 ], [ %.pre409, %if.end.82 ], [ %.pre409, %if.end.33 ]
+ %tmp62 = phi i32 [ %tmp44, %sw.default ], [ %.pre410, %if.then.29 ], [ %.pre410, %if.then.130 ], [ %.pre410, %if.end.140 ], [ %.pre410, %if.end.82 ], [ %.pre410, %if.end.33 ]
+ %tmp63 = phi i32 [ %tmp45, %sw.default ], [ %.pre411, %if.then.29 ], [ %.pre411, %if.then.130 ], [ %.pre411, %if.end.140 ], [ %.pre411, %if.end.82 ], [ %.pre411, %if.end.33 ]
+ %tmp64 = phi i32 [ %tmp46, %sw.default ], [ %.pre412, %if.then.29 ], [ %.pre412, %if.then.130 ], [ %.pre412, %if.end.140 ], [ %.pre412, %if.end.82 ], [ %.pre412, %if.end.33 ]
+ %tmp65 = phi i32 [ %tmp47, %sw.default ], [ %.pre413, %if.then.29 ], [ %.pre413, %if.then.130 ], [ %.pre413, %if.end.140 ], [ %.pre413, %if.end.82 ], [ %.pre413, %if.end.33 ]
+ %tmp66 = phi i32 [ %tmp48, %sw.default ], [ %.pre414, %if.then.29 ], [ %.pre414, %if.then.130 ], [ %.pre414, %if.end.140 ], [ %.pre414, %if.end.82 ], [ %.pre414, %if.end.33 ]
+ %tmp67 = phi i32 [ %tmp49, %sw.default ], [ %.pre415, %if.then.29 ], [ %.pre415, %if.then.130 ], [ %.pre415, %if.end.140 ], [ %.pre415, %if.end.82 ], [ %.pre415, %if.end.33 ]
+ %tmp68 = phi i32 [ %tmp51, %sw.default ], [ %.pre417, %if.then.29 ], [ %.pre417, %if.then.130 ], [ %.pre417, %if.end.140 ], [ %.pre417, %if.end.82 ], [ %.pre417, %if.end.33 ]
+ %tmp69 = phi i32 [ %tmp52, %sw.default ], [ %.pre418, %if.then.29 ], [ %.pre418, %if.then.130 ], [ %.pre418, %if.end.140 ], [ %.pre418, %if.end.82 ], [ %.pre418, %if.end.33 ]
+ %tmp70 = phi i32 [ %tmp53, %sw.default ], [ %.pre419, %if.then.29 ], [ %.pre419, %if.then.130 ], [ %.pre419, %if.end.140 ], [ %.pre419, %if.end.82 ], [ %.pre419, %if.end.33 ]
+ %tmp71 = phi i32 [ %tmp54, %sw.default ], [ %.pre420, %if.then.29 ], [ %.pre420, %if.then.130 ], [ %.pre420, %if.end.140 ], [ %.pre420, %if.end.82 ], [ %.pre420, %if.end.33 ]
+ %tmp72 = phi i32 [ %tmp55, %sw.default ], [ %.pre421, %if.then.29 ], [ %.pre421, %if.then.130 ], [ %.pre421, %if.end.140 ], [ %.pre421, %if.end.82 ], [ %.pre421, %if.end.33 ]
+ %tmp73 = phi i32 [ %tmp56, %sw.default ], [ %.pre422, %if.then.29 ], [ %.pre422, %if.then.130 ], [ %.pre422, %if.end.140 ], [ %.pre422, %if.end.82 ], [ %.pre422, %if.end.33 ]
+ %tmp74 = phi i32 [ %tmp57, %sw.default ], [ %.pre423, %if.then.29 ], [ %.pre423, %if.then.130 ], [ %.pre423, %if.end.140 ], [ %.pre423, %if.end.82 ], [ %.pre423, %if.end.33 ]
+ %save_j3.pre-phi468 = phi i32* [ %save_j3.pre-phi469, %sw.default ], [ %save_j3.phi.trans.insert, %if.then.29 ], [ %save_j3.phi.trans.insert, %if.then.130 ], [ %save_j3.phi.trans.insert, %if.end.140 ], [ %save_j3.phi.trans.insert, %if.end.82 ], [ %save_j3.phi.trans.insert, %if.end.33 ]
+ %save_t4.pre-phi466 = phi i32* [ %save_t4.pre-phi467, %sw.default ], [ %save_t4.phi.trans.insert, %if.then.29 ], [ %save_t4.phi.trans.insert, %if.then.130 ], [ %save_t4.phi.trans.insert, %if.end.140 ], [ %save_t4.phi.trans.insert, %if.end.82 ], [ %save_t4.phi.trans.insert, %if.end.33 ]
+ %save_alphaSize5.pre-phi464 = phi i32* [ %save_alphaSize5.pre-phi465, %sw.default ], [ %save_alphaSize5.phi.trans.insert, %if.then.29 ], [ %save_alphaSize5.phi.trans.insert, %if.then.130 ], [ %save_alphaSize5.phi.trans.insert, %if.end.140 ], [ %save_alphaSize5.phi.trans.insert, %if.end.82 ], [ %save_alphaSize5.phi.trans.insert, %if.end.33 ]
+ %save_nGroups6.pre-phi462 = phi i32* [ %save_nGroups6.pre-phi463, %sw.default ], [ %save_nGroups6.phi.trans.insert, %if.then.29 ], [ %save_nGroups6.phi.trans.insert, %if.then.130 ], [ %save_nGroups6.phi.trans.insert, %if.end.140 ], [ %save_nGroups6.phi.trans.insert, %if.end.82 ], [ %save_nGroups6.phi.trans.insert, %if.end.33 ]
+ %save_nSelectors7.pre-phi460 = phi i32* [ %save_nSelectors7.pre-phi461, %sw.default ], [ %save_nSelectors7.phi.trans.insert, %if.then.29 ], [ %save_nSelectors7.phi.trans.insert, %if.then.130 ], [ %save_nSelectors7.phi.trans.insert, %if.end.140 ], [ %save_nSelectors7.phi.trans.insert, %if.end.82 ], [ %save_nSelectors7.phi.trans.insert, %if.end.33 ]
+ %save_EOB8.pre-phi458 = phi i32* [ %save_EOB8.pre-phi459, %sw.default ], [ %save_EOB8.phi.trans.insert, %if.then.29 ], [ %save_EOB8.phi.trans.insert, %if.then.130 ], [ %save_EOB8.phi.trans.insert, %if.end.140 ], [ %save_EOB8.phi.trans.insert, %if.end.82 ], [ %save_EOB8.phi.trans.insert, %if.end.33 ]
+ %save_groupNo9.pre-phi456 = phi i32* [ %save_groupNo9.pre-phi457, %sw.default ], [ %save_groupNo9.phi.trans.insert, %if.then.29 ], [ %save_groupNo9.phi.trans.insert, %if.then.130 ], [ %save_groupNo9.phi.trans.insert, %if.end.140 ], [ %save_groupNo9.phi.trans.insert, %if.end.82 ], [ %save_groupNo9.phi.trans.insert, %if.end.33 ]
+ %save_groupPos10.pre-phi454 = phi i32* [ %save_groupPos10.pre-phi455, %sw.default ], [ %save_groupPos10.phi.trans.insert, %if.then.29 ], [ %save_groupPos10.phi.trans.insert, %if.then.130 ], [ %save_groupPos10.phi.trans.insert, %if.end.140 ], [ %save_groupPos10.phi.trans.insert, %if.end.82 ], [ %save_groupPos10.phi.trans.insert, %if.end.33 ]
+ %save_nextSym11.pre-phi452 = phi i32* [ %save_nextSym11.pre-phi453, %sw.default ], [ %save_nextSym11.phi.trans.insert, %if.then.29 ], [ %save_nextSym11.phi.trans.insert, %if.then.130 ], [ %save_nextSym11.phi.trans.insert, %if.end.140 ], [ %save_nextSym11.phi.trans.insert, %if.end.82 ], [ %save_nextSym11.phi.trans.insert, %if.end.33 ]
+ %save_nblockMAX12.pre-phi450 = phi i32* [ %save_nblockMAX12.pre-phi451, %sw.default ], [ %save_nblockMAX12.phi.trans.insert, %if.then.29 ], [ %save_nblockMAX12.phi.trans.insert, %if.then.130 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.140 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.82 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.33 ]
+ %save_nblock13.pre-phi448 = phi i32* [ %save_nblock13.pre-phi449, %sw.default ], [ %save_nblock13.phi.trans.insert, %if.then.29 ], [ %save_nblock13.phi.trans.insert, %if.then.130 ], [ %save_nblock13.phi.trans.insert, %if.end.140 ], [ %save_nblock13.phi.trans.insert, %if.end.82 ], [ %save_nblock13.phi.trans.insert, %if.end.33 ]
+ %save_es14.pre-phi446 = phi i32* [ %save_es14.pre-phi447, %sw.default ], [ %save_es14.phi.trans.insert, %if.then.29 ], [ %save_es14.phi.trans.insert, %if.then.130 ], [ %save_es14.phi.trans.insert, %if.end.140 ], [ %save_es14.phi.trans.insert, %if.end.82 ], [ %save_es14.phi.trans.insert, %if.end.33 ]
+ %save_N15.pre-phi444 = phi i32* [ %save_N15.pre-phi445, %sw.default ], [ %save_N15.phi.trans.insert, %if.then.29 ], [ %save_N15.phi.trans.insert, %if.then.130 ], [ %save_N15.phi.trans.insert, %if.end.140 ], [ %save_N15.phi.trans.insert, %if.end.82 ], [ %save_N15.phi.trans.insert, %if.end.33 ]
+ %save_curr16.pre-phi442 = phi i32* [ %save_curr16.pre-phi443, %sw.default ], [ %save_curr16.phi.trans.insert, %if.then.29 ], [ %save_curr16.phi.trans.insert, %if.then.130 ], [ %save_curr16.phi.trans.insert, %if.end.140 ], [ %save_curr16.phi.trans.insert, %if.end.82 ], [ %save_curr16.phi.trans.insert, %if.end.33 ]
+ %save_zt17.pre-phi440 = phi i32* [ %save_zt17.pre-phi441, %sw.default ], [ %save_zt17.phi.trans.insert, %if.then.29 ], [ %save_zt17.phi.trans.insert, %if.then.130 ], [ %save_zt17.phi.trans.insert, %if.end.140 ], [ %save_zt17.phi.trans.insert, %if.end.82 ], [ %save_zt17.phi.trans.insert, %if.end.33 ]
+ %save_zn18.pre-phi438 = phi i32* [ %save_zn18.pre-phi439, %sw.default ], [ %save_zn18.phi.trans.insert, %if.then.29 ], [ %save_zn18.phi.trans.insert, %if.then.130 ], [ %save_zn18.phi.trans.insert, %if.end.140 ], [ %save_zn18.phi.trans.insert, %if.end.82 ], [ %save_zn18.phi.trans.insert, %if.end.33 ]
+ %save_zvec19.pre-phi436 = phi i32* [ %save_zvec19.pre-phi437, %sw.default ], [ %save_zvec19.phi.trans.insert, %if.then.29 ], [ %save_zvec19.phi.trans.insert, %if.then.130 ], [ %save_zvec19.phi.trans.insert, %if.end.140 ], [ %save_zvec19.phi.trans.insert, %if.end.82 ], [ %save_zvec19.phi.trans.insert, %if.end.33 ]
+ %save_zj20.pre-phi434 = phi i32* [ %save_zj20.pre-phi435, %sw.default ], [ %save_zj20.phi.trans.insert, %if.then.29 ], [ %save_zj20.phi.trans.insert, %if.then.130 ], [ %save_zj20.phi.trans.insert, %if.end.140 ], [ %save_zj20.phi.trans.insert, %if.end.82 ], [ %save_zj20.phi.trans.insert, %if.end.33 ]
+ %nblock.1 = phi i32 [ %tmp50, %sw.default ], [ %.pre416, %if.then.29 ], [ 0, %if.then.130 ], [ %.pre416, %if.end.140 ], [ %.pre416, %if.end.82 ], [ %.pre416, %if.end.33 ]
+ %alphaSize.1 = phi i32 [ %tmp42, %sw.default ], [ %.pre408, %if.then.29 ], [ %add179, %if.then.130 ], [ %.pre408, %if.end.140 ], [ %.pre408, %if.end.82 ], [ %.pre408, %if.end.33 ]
+ %retVal.0 = phi i32 [ 0, %sw.default ], [ -5, %if.then.29 ], [ -4, %if.then.130 ], [ 0, %if.end.140 ], [ 0, %if.end.82 ], [ 0, %if.end.33 ]
+ store i32 %tmp58, i32* %save_i, align 4
+ store i32 %tmp59, i32* %save_j3.pre-phi468, align 4
+ store i32 %tmp60, i32* %save_t4.pre-phi466, align 4
+ store i32 %alphaSize.1, i32* %save_alphaSize5.pre-phi464, align 4
+ store i32 %tmp61, i32* %save_nGroups6.pre-phi462, align 4
+ store i32 %tmp62, i32* %save_nSelectors7.pre-phi460, align 4
+ store i32 %tmp63, i32* %save_EOB8.pre-phi458, align 4
+ store i32 %tmp64, i32* %save_groupNo9.pre-phi456, align 4
+ store i32 %tmp65, i32* %save_groupPos10.pre-phi454, align 4
+ store i32 %tmp66, i32* %save_nextSym11.pre-phi452, align 4
+ store i32 %tmp67, i32* %save_nblockMAX12.pre-phi450, align 4
+ store i32 %nblock.1, i32* %save_nblock13.pre-phi448, align 4
+ store i32 %tmp68, i32* %save_es14.pre-phi446, align 4
+ store i32 %tmp69, i32* %save_N15.pre-phi444, align 4
+ store i32 %tmp70, i32* %save_curr16.pre-phi442, align 4
+ store i32 %tmp71, i32* %save_zt17.pre-phi440, align 4
+ store i32 %tmp72, i32* %save_zn18.pre-phi438, align 4
+ store i32 %tmp73, i32* %save_zvec19.pre-phi436, align 4
+ store i32 %tmp74, i32* %save_zj20.pre-phi434, align 4
+ ret i32 %retVal.0
+}
+
+!0 = !{!"branch_weights", i32 10, i32 1}
diff --git a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
index 739570236da9..1820b8163a90 100644
--- a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
+++ b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s
; This test aims to check basic correctness of frame layout &
; frame access code. There are 8 functions in this test file,
@@ -252,11 +252,11 @@ entry:
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
-; CHECK: ubfx x9, x0, #0, #32
+; CHECK: mov w9, w0
+; CHECK: mov x10, sp
; CHECK: lsl x9, x9, #2
; CHECK: add x9, x9, #15
; CHECK: and x9, x9, #0x7fffffff0
-; CHECK: mov x10, sp
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
; CHECK: mov sp, x[[VLASPTMP]]
; Check correct access to local variable, through frame pointer
@@ -299,11 +299,11 @@ entry:
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
-; CHECK: ubfx x9, x0, #0, #32
+; CHECK: mov w9, w0
+; CHECK: mov x10, sp
; CHECK: lsl x9, x9, #2
; CHECK: add x9, x9, #15
; CHECK: and x9, x9, #0x7fffffff0
-; CHECK: mov x10, sp
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
; CHECK: mov sp, x[[VLASPTMP]]
; Check correct access to local variable, through frame pointer
@@ -361,11 +361,11 @@ entry:
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
; and set-up of base pointer (x19).
-; CHECK: ubfx x9, x0, #0, #32
+; CHECK: mov w9, w0
+; CHECK: mov x10, sp
; CHECK: lsl x9, x9, #2
; CHECK: add x9, x9, #15
; CHECK: and x9, x9, #0x7fffffff0
-; CHECK: mov x10, sp
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
; CHECK: mov sp, x[[VLASPTMP]]
; Check correct access to local variable, through base pointer
@@ -414,11 +414,11 @@ entry:
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
; and set-up of base pointer (x19).
-; CHECK: ubfx x9, x0, #0, #32
+; CHECK: mov w9, w0
+; CHECK: mov x10, sp
; CHECK: lsl x9, x9, #2
; CHECK: add x9, x9, #15
; CHECK: and x9, x9, #0x7fffffff0
-; CHECK: mov x10, sp
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
; CHECK: mov sp, x[[VLASPTMP]]
; Check correct access to local variable, through base pointer
@@ -465,11 +465,11 @@ entry:
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
; and set-up of base pointer (x19).
-; CHECK: ubfx x9, x0, #0, #32
+; CHECK: mov w9, w0
+; CHECK: mov x10, sp
; CHECK: lsl x9, x9, #2
; CHECK: add x9, x9, #15
; CHECK: and x9, x9, #0x7fffffff0
-; CHECK: mov x10, sp
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
; CHECK: mov sp, x[[VLASPTMP]]
; Check correct access to local variable, through base pointer
@@ -522,10 +522,10 @@ bb1:
; CHECK-LABEL: realign_conditional2
; Extra realignment in the prologue (performance issue).
+; CHECK: tbz {{.*}} .[[LABEL:.*]]
; CHECK: sub x9, sp, #32 // =32
; CHECK: and sp, x9, #0xffffffffffffffe0
; CHECK: mov x19, sp
-; CHECK: tbz {{.*}} .[[LABEL:.*]]
; Stack is realigned in a non-entry BB.
; CHECK: sub [[REG:x[01-9]+]], sp, #64
; CHECK: and sp, [[REG]], #0xffffffffffffffe0
diff --git a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
index ea3b8fa55732..1bc2a3ccb1ca 100644
--- a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
+++ b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
@@ -1,7 +1,10 @@
-; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic -lower-interleaved-accesses=true < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON
+; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true -mattr=-neon < %s | FileCheck %s -check-prefix=NONEON
-; CHECK-LABEL: load_factor2:
-; CHECK: ld2 { v0.8b, v1.8b }, [x0]
+; NEON-LABEL: load_factor2:
+; NEON: ld2 { v0.8b, v1.8b }, [x0]
+; NONEON-LABEL: load_factor2:
+; NONEON-NOT: ld2
define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
%wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
%strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -10,8 +13,10 @@ define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
ret <8 x i8> %add
}
-; CHECK-LABEL: load_factor3:
-; CHECK: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NEON-LABEL: load_factor3:
+; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NONEON-LABEL: load_factor3:
+; NONEON-NOT: ld3
define <4 x i32> @load_factor3(i32* %ptr) {
%base = bitcast i32* %ptr to <12 x i32>*
%wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
@@ -21,8 +26,10 @@ define <4 x i32> @load_factor3(i32* %ptr) {
ret <4 x i32> %add
}
-; CHECK-LABEL: load_factor4:
-; CHECK: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NEON-LABEL: load_factor4:
+; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NONEON-LABEL: load_factor4:
+; NONEON-NOT: ld4
define <4 x i32> @load_factor4(i32* %ptr) {
%base = bitcast i32* %ptr to <16 x i32>*
%wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
@@ -32,16 +39,20 @@ define <4 x i32> @load_factor4(i32* %ptr) {
ret <4 x i32> %add
}
-; CHECK-LABEL: store_factor2:
-; CHECK: st2 { v0.8b, v1.8b }, [x0]
+; NEON-LABEL: store_factor2:
+; NEON: st2 { v0.8b, v1.8b }, [x0]
+; NONEON-LABEL: store_factor2:
+; NONEON-NOT: st2
define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
%interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
ret void
}
-; CHECK-LABEL: store_factor3:
-; CHECK: st3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NEON-LABEL: store_factor3:
+; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NONEON-LABEL: store_factor3:
+; NONEON-NOT: st3
define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
%base = bitcast i32* %ptr to <12 x i32>*
%v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -51,8 +62,10 @@ define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v
ret void
}
-; CHECK-LABEL: store_factor4:
-; CHECK: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NEON-LABEL: store_factor4:
+; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NONEON-LABEL: store_factor4:
+; NONEON-NOT: st4
define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
%base = bitcast i32* %ptr to <16 x i32>*
%v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -65,8 +78,10 @@ define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v
; The following cases test that interleaved access of pointer vectors can be
; matched to ldN/stN instruction.
-; CHECK-LABEL: load_ptrvec_factor2:
-; CHECK: ld2 { v0.2d, v1.2d }, [x0]
+; NEON-LABEL: load_ptrvec_factor2:
+; NEON: ld2 { v0.2d, v1.2d }, [x0]
+; NONEON-LABEL: load_ptrvec_factor2:
+; NONEON-NOT: ld2
define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
%base = bitcast i32** %ptr to <4 x i32*>*
%wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
@@ -74,8 +89,10 @@ define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
ret <2 x i32*> %strided.v0
}
-; CHECK-LABEL: load_ptrvec_factor3:
-; CHECK: ld3 { v0.2d, v1.2d, v2.2d }, [x0]
+; NEON-LABEL: load_ptrvec_factor3:
+; NEON: ld3 { v0.2d, v1.2d, v2.2d }, [x0]
+; NONEON-LABEL: load_ptrvec_factor3:
+; NONEON-NOT: ld3
define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
%base = bitcast i32** %ptr to <6 x i32*>*
%wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
@@ -86,8 +103,10 @@ define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr
ret void
}
-; CHECK-LABEL: load_ptrvec_factor4:
-; CHECK: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+; NEON-LABEL: load_ptrvec_factor4:
+; NEON: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+; NONEON-LABEL: load_ptrvec_factor4:
+; NONEON-NOT: ld4
define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
%base = bitcast i32** %ptr to <8 x i32*>*
%wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
@@ -98,8 +117,10 @@ define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr
ret void
}
-; CHECK-LABEL: store_ptrvec_factor2:
-; CHECK: st2 { v0.2d, v1.2d }, [x0]
+; NEON-LABEL: store_ptrvec_factor2:
+; NEON: st2 { v0.2d, v1.2d }, [x0]
+; NONEON-LABEL: store_ptrvec_factor2:
+; NONEON-NOT: st2
define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
%base = bitcast i32** %ptr to <4 x i32*>*
%interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -107,8 +128,10 @@ define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
ret void
}
-; CHECK-LABEL: store_ptrvec_factor3:
-; CHECK: st3 { v0.2d, v1.2d, v2.2d }, [x0]
+; NEON-LABEL: store_ptrvec_factor3:
+; NEON: st3 { v0.2d, v1.2d, v2.2d }, [x0]
+; NONEON-LABEL: store_ptrvec_factor3:
+; NONEON-NOT: st3
define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
%base = bitcast i32** %ptr to <6 x i32*>*
%v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -118,8 +141,10 @@ define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2
ret void
}
-; CHECK-LABEL: store_ptrvec_factor4:
-; CHECK: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+; NEON-LABEL: store_ptrvec_factor4:
+; NEON: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+; NONEON-LABEL: store_ptrvec_factor4:
+; NONEON-NOT: st4
define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
%base = bitcast i32* %ptr to <8 x i32*>*
%v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -132,8 +157,10 @@ define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2
; Following cases check that shuffle maskes with undef indices can be matched
; into ldN/stN instruction.
-; CHECK-LABEL: load_undef_mask_factor2:
-; CHECK: ld2 { v0.4s, v1.4s }, [x0]
+; NEON-LABEL: load_undef_mask_factor2:
+; NEON: ld2 { v0.4s, v1.4s }, [x0]
+; NONEON-LABEL: load_undef_mask_factor2:
+; NONEON-NOT: ld2
define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
%base = bitcast i32* %ptr to <8 x i32>*
%wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
@@ -143,8 +170,10 @@ define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
ret <4 x i32> %add
}
-; CHECK-LABEL: load_undef_mask_factor3:
-; CHECK: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NEON-LABEL: load_undef_mask_factor3:
+; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NONEON-LABEL: load_undef_mask_factor3:
+; NONEON-NOT: ld3
define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
%base = bitcast i32* %ptr to <12 x i32>*
%wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
@@ -154,8 +183,10 @@ define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
ret <4 x i32> %add
}
-; CHECK-LABEL: load_undef_mask_factor4:
-; CHECK: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NEON-LABEL: load_undef_mask_factor4:
+; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NONEON-LABEL: load_undef_mask_factor4:
+; NONEON-NOT: ld4
define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
%base = bitcast i32* %ptr to <16 x i32>*
%wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
@@ -165,8 +196,10 @@ define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
ret <4 x i32> %add
}
-; CHECK-LABEL: store_undef_mask_factor2:
-; CHECK: st2 { v0.4s, v1.4s }, [x0]
+; NEON-LABEL: store_undef_mask_factor2:
+; NEON: st2 { v0.4s, v1.4s }, [x0]
+; NONEON-LABEL: store_undef_mask_factor2:
+; NONEON-NOT: st2
define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
%base = bitcast i32* %ptr to <8 x i32>*
%interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
@@ -174,8 +207,10 @@ define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
ret void
}
-; CHECK-LABEL: store_undef_mask_factor3:
-; CHECK: st3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NEON-LABEL: store_undef_mask_factor3:
+; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NONEON-LABEL: store_undef_mask_factor3:
+; NONEON-NOT: st3
define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
%base = bitcast i32* %ptr to <12 x i32>*
%v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -185,8 +220,10 @@ define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <
ret void
}
-; CHECK-LABEL: store_undef_mask_factor4:
-; CHECK: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NEON-LABEL: store_undef_mask_factor4:
+; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NONEON-LABEL: store_undef_mask_factor4:
+; NONEON-NOT: st4
define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
%base = bitcast i32* %ptr to <16 x i32>*
%v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -195,3 +232,39 @@ define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <
store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
ret void
}
+
+; Check that we do something sane with illegal types.
+
+; NEON-LABEL: load_illegal_factor2:
+; NEON: BB#0:
+; NEON-NEXT: ldr q[[V:[0-9]+]], [x0]
+; NEON-NEXT: uzp1 v0.4s, v[[V]].4s, v{{.*}}.4s
+; NEON-NEXT: ret
+; NONEON-LABEL: load_illegal_factor2:
+; NONEON: BB#0:
+; NONEON-NEXT: ldr s0, [x0]
+; NONEON-NEXT: ldr s1, [x0, #8]
+; NONEON-NEXT: ret
+define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind {
+ %tmp1 = load <3 x float>, <3 x float>* %p, align 16
+ %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+ ret <3 x float> %tmp2
+}
+
+; NEON-LABEL: store_illegal_factor2:
+; NEON: BB#0:
+; NEON-NEXT: uzp1 v0.4s, v0.4s, v{{.*}}.4s
+; NEON-NEXT: st1 { v0.d }[0], [x0]
+; NEON-NEXT: ret
+; NONEON-LABEL: store_illegal_factor2:
+; NONEON: BB#0:
+; NONEON-NEXT: fmov w[[ELT2:[0-9]+]], s2
+; NONEON-NEXT: fmov w[[RES:[0-9]+]], s0
+; NONEON-NEXT: bfi x[[RES]], x[[ELT2]], #32, #32
+; NONEON-NEXT: str x[[RES]], [x0]
+; NONEON-NEXT: ret
+define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
+ %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+ store <3 x float> %tmp1, <3 x float>* %p, align 16
+ ret void
+}
diff --git a/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
new file mode 100644
index 000000000000..84277995ce5b
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
@@ -0,0 +1,50 @@
+; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck <%t %s
+; REQUIRES: asserts
+target triple = "aarch64--linux-android"
+
+%typeD = type { i32, i32, [256 x i32], [257 x i32] }
+
+; Function Attrs: noreturn nounwind uwtable
+define i32 @test1(%typeD* nocapture %s) {
+entry:
+; CHECK-LABEL: entry:
+; CHECK: %uglygep = getelementptr i8, i8* %0, i64 1032
+; CHECK: br label %do.body.i
+
+
+ %tPos = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 0
+ %k0 = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 1
+ %.pre = load i32, i32* %tPos, align 4
+ br label %do.body.i
+
+do.body.i:
+; CHECK-LABEL: do.body.i:
+; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3
+; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32*
+; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032
+
+
+ %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ]
+ %1 = phi i32 [ 0, %entry ], [ %.be6, %do.body.i.backedge ]
+ %add.i = add nsw i32 %1, %0
+ %shr.i = ashr i32 %add.i, 1
+ %idxprom.i = sext i32 %shr.i to i64
+ %arrayidx.i = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 3, i64 %idxprom.i
+ %2 = load i32, i32* %arrayidx.i, align 4
+ %cmp.i = icmp sle i32 %2, %.pre
+ %na.1.i = select i1 %cmp.i, i32 %0, i32 %shr.i
+ %nb.1.i = select i1 %cmp.i, i32 %shr.i, i32 %1
+ %sub.i = sub nsw i32 %na.1.i, %nb.1.i
+ %cmp1.i = icmp eq i32 %sub.i, 1
+ br i1 %cmp1.i, label %fooo.exit, label %do.body.i.backedge
+
+do.body.i.backedge:
+ %.be = phi i32 [ %na.1.i, %do.body.i ], [ 256, %fooo.exit ]
+ %.be6 = phi i32 [ %nb.1.i, %do.body.i ], [ 0, %fooo.exit ]
+ br label %do.body.i
+
+fooo.exit: ; preds = %do.body.i
+ store i32 %nb.1.i, i32* %k0, align 4
+ br label %do.body.i.backedge
+}
+
diff --git a/test/CodeGen/AArch64/aarch64-minmaxv.ll b/test/CodeGen/AArch64/aarch64-minmaxv.ll
new file mode 100644
index 000000000000..fb13b706cfaf
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -0,0 +1,511 @@
+; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-linu--gnu"
+
+; CHECK-LABEL: smax_B
+; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
+define i8 @smax_B(<16 x i8>* nocapture readonly %arr) {
+ %arr.load = load <16 x i8>, <16 x i8>* %arr
+ %rdx.shuf = shufflevector <16 x i8> %arr.load, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp22 = icmp sgt <16 x i8> %arr.load, %rdx.shuf
+ %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %arr.load, <16 x i8> %rdx.shuf
+ %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp25 = icmp sgt <16 x i8> %rdx.minmax.select23, %rdx.shuf24
+ %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
+ %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp28 = icmp sgt <16 x i8> %rdx.minmax.select26, %rdx.shuf27
+ %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
+ %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp31 = icmp sgt <16 x i8> %rdx.minmax.select29, %rdx.shuf30
+ %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+ %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
+ %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
+ %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
+ ret i8 %r
+}
+
+; CHECK-LABEL: smax_H
+; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
+define i16 @smax_H(<8 x i16>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
+ %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp23 = icmp sgt <8 x i16> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
+ %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp26 = icmp sgt <8 x i16> %rdx.minmax.select24, %rdx.shuf25
+ %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
+ %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp29 = icmp sgt <8 x i16> %rdx.minmax.select27, %rdx.shuf28
+ %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
+ %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
+ %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
+ %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
+ ret i16 %r
+}
+
+; CHECK-LABEL: smax_S
+; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
+define i32 @smax_S(<4 x i32> * nocapture readonly %arr) {
+ %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
+ %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+ %rdx.minmax.cmp18 = icmp sgt <4 x i32> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
+ %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp21 = icmp sgt <4 x i32> %rdx.minmax.select19, %rdx.shuf20
+ %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
+ %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
+ %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
+ %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
+ ret i32 %r
+}
+
+; CHECK-LABEL: smax_D
+; CHECK-NOT: smaxv
+define i64 @smax_D(<2 x i64>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
+ %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+ %rdx.minmax.cmp18 = icmp sgt <2 x i64> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
+ %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
+ %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
+ %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
+ ret i64 %r
+}
+
+
+; CHECK-LABEL: umax_B
+; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
+define i8 @umax_B(<16 x i8>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr
+ %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp22 = icmp ugt <16 x i8> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf
+ %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp25 = icmp ugt <16 x i8> %rdx.minmax.select23, %rdx.shuf24
+ %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
+ %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp28 = icmp ugt <16 x i8> %rdx.minmax.select26, %rdx.shuf27
+ %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
+ %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp31 = icmp ugt <16 x i8> %rdx.minmax.select29, %rdx.shuf30
+ %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+ %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
+ %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
+ %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
+ ret i8 %r
+}
+
+; CHECK-LABEL: umax_H
+; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
+define i16 @umax_H(<8 x i16>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
+ %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp23 = icmp ugt <8 x i16> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
+ %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp26 = icmp ugt <8 x i16> %rdx.minmax.select24, %rdx.shuf25
+ %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
+ %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp29 = icmp ugt <8 x i16> %rdx.minmax.select27, %rdx.shuf28
+ %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
+ %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
+ %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
+ %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
+ ret i16 %r
+}
+
+; CHECK-LABEL: umax_S
+; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
+define i32 @umax_S(<4 x i32>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
+ %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+ %rdx.minmax.cmp18 = icmp ugt <4 x i32> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
+ %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp21 = icmp ugt <4 x i32> %rdx.minmax.select19, %rdx.shuf20
+ %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
+ %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
+ %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
+ %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
+ ret i32 %r
+}
+
+; CHECK-LABEL: umax_D
+; CHECK-NOT: umaxv
+define i64 @umax_D(<2 x i64>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
+ %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+ %rdx.minmax.cmp18 = icmp ugt <2 x i64> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
+ %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
+ %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
+ %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
+ ret i64 %r
+}
+
+
+; CHECK-LABEL: smin_B
+; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b
+define i8 @smin_B(<16 x i8>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr
+ %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp22 = icmp slt <16 x i8> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf
+ %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp25 = icmp slt <16 x i8> %rdx.minmax.select23, %rdx.shuf24
+ %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
+ %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp28 = icmp slt <16 x i8> %rdx.minmax.select26, %rdx.shuf27
+ %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
+ %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp31 = icmp slt <16 x i8> %rdx.minmax.select29, %rdx.shuf30
+ %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+ %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
+ %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
+ %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
+ ret i8 %r
+}
+
+; CHECK-LABEL: smin_H
+; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h
+define i16 @smin_H(<8 x i16>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
+ %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp23 = icmp slt <8 x i16> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
+ %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp26 = icmp slt <8 x i16> %rdx.minmax.select24, %rdx.shuf25
+ %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
+ %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp29 = icmp slt <8 x i16> %rdx.minmax.select27, %rdx.shuf28
+ %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
+ %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
+ %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
+ %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
+ ret i16 %r
+}
+
+; CHECK-LABEL: smin_S
+; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s
+define i32 @smin_S(<4 x i32>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
+ %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+ %rdx.minmax.cmp18 = icmp slt <4 x i32> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
+ %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp21 = icmp slt <4 x i32> %rdx.minmax.select19, %rdx.shuf20
+ %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
+ %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
+ %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
+ %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
+ ret i32 %r
+}
+
+; CHECK-LABEL: smin_D
+; CHECK-NOT: sminv
+define i64 @smin_D(<2 x i64>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
+ %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+ %rdx.minmax.cmp18 = icmp slt <2 x i64> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
+ %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
+ %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
+ %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
+ ret i64 %r
+}
+
+
+; CHECK-LABEL: umin_B
+; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b
+define i8 @umin_B(<16 x i8>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr
+ %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp22 = icmp ult <16 x i8> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf
+ %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp25 = icmp ult <16 x i8> %rdx.minmax.select23, %rdx.shuf24
+ %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
+ %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp28 = icmp ult <16 x i8> %rdx.minmax.select26, %rdx.shuf27
+ %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
+ %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp31 = icmp ult <16 x i8> %rdx.minmax.select29, %rdx.shuf30
+ %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+ %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
+ %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
+ %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
+ ret i8 %r
+}
+
+; CHECK-LABEL: umin_H
+; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h
+define i16 @umin_H(<8 x i16>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
+ %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp23 = icmp ult <8 x i16> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
+ %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp26 = icmp ult <8 x i16> %rdx.minmax.select24, %rdx.shuf25
+ %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
+ %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp29 = icmp ult <8 x i16> %rdx.minmax.select27, %rdx.shuf28
+ %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
+ %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
+ %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
+ %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
+ ret i16 %r
+}
+
+; CHECK-LABEL: umin_S
+; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s
+define i32 @umin_S(<4 x i32>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
+ %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+ %rdx.minmax.cmp18 = icmp ult <4 x i32> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
+ %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp21 = icmp ult <4 x i32> %rdx.minmax.select19, %rdx.shuf20
+ %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
+ %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
+ %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
+ %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
+ ret i32 %r
+}
+
+; CHECK-LABEL: umin_D
+; CHECK-NOT: uminv
+define i64 @umin_D(<2 x i64>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
+ %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+ %rdx.minmax.cmp18 = icmp ult <2 x i64> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
+ %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
+ %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
+ %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
+ ret i64 %r
+}
+
+; CHECK-LABEL: fmaxnm_S
+; CHECK: fmaxnmv
+define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <4 x float>, <4 x float>* %arr
+ %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+ %rdx.minmax.cmp = fcmp fast oge <4 x float> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf
+ %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp1 = fcmp fast oge <4 x float> %rdx.minmax.select1, %rdx.shuf1
+ %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0
+ %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0
+ %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1
+ %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt
+ ret float %r
+}
+
+; CHECK-LABEL: fminnm_S
+; CHECK: fminnmv
+define float @fminnm_S(<4 x float>* nocapture readonly %arr) {
+ %rdx.minmax.select = load <4 x float>, <4 x float>* %arr
+ %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+ %rdx.minmax.cmp = fcmp fast ole <4 x float> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf
+ %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp1 = fcmp fast ole <4 x float> %rdx.minmax.select1, %rdx.shuf1
+ %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0
+ %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0
+ %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1
+ %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt
+ ret float %r
+}
+
+define i16 @oversized_umax_256(<16 x i16>* nocapture readonly %arr) {
+; CHECK-LABEL: oversized_umax_256
+; CHECK: umax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: umaxv {{h[0-9]+}}, [[V0]]
+ %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr
+ %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp22 = icmp ugt <16 x i16> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf
+ %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp25 = icmp ugt <16 x i16> %rdx.minmax.select23, %rdx.shuf24
+ %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24
+ %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp28 = icmp ugt <16 x i16> %rdx.minmax.select26, %rdx.shuf27
+ %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27
+ %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp31 = icmp ugt <16 x i16> %rdx.minmax.select29, %rdx.shuf30
+ %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+ %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0
+ %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1
+ %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt
+ ret i16 %r
+}
+
+define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr) {
+; CHECK-LABEL: oversized_umax_512
+; CHECK: umax v
+; CHECK-NEXT: umax v
+; CHECK-NEXT: umax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: umaxv {{s[0-9]+}}, [[V0]]
+ %arr.load = load <16 x i32>, <16 x i32>* %arr
+ %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp22 = icmp ugt <16 x i32> %arr.load, %rdx.shuf
+ %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf
+ %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp25 = icmp ugt <16 x i32> %rdx.minmax.select23, %rdx.shuf24
+ %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24
+ %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp28 = icmp ugt <16 x i32> %rdx.minmax.select26, %rdx.shuf27
+ %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27
+ %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp31 = icmp ugt <16 x i32> %rdx.minmax.select29, %rdx.shuf30
+ %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+ %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0
+ %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1
+ %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt
+ ret i32 %r
+}
+
+define i16 @oversized_umin_256(<16 x i16>* nocapture readonly %arr) {
+; CHECK-LABEL: oversized_umin_256
+; CHECK: umin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uminv {{h[0-9]+}}, [[V0]]
+ %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr
+ %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp22 = icmp ult <16 x i16> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf
+ %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp25 = icmp ult <16 x i16> %rdx.minmax.select23, %rdx.shuf24
+ %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24
+ %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp28 = icmp ult <16 x i16> %rdx.minmax.select26, %rdx.shuf27
+ %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27
+ %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp31 = icmp ult <16 x i16> %rdx.minmax.select29, %rdx.shuf30
+ %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+ %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0
+ %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1
+ %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt
+ ret i16 %r
+}
+
+define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr) {
+; CHECK-LABEL: oversized_umin_512
+; CHECK: umin v
+; CHECK-NEXT: umin v
+; CHECK-NEXT: umin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: uminv {{s[0-9]+}}, [[V0]]
+ %arr.load = load <16 x i32>, <16 x i32>* %arr
+ %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp22 = icmp ult <16 x i32> %arr.load, %rdx.shuf
+ %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf
+ %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp25 = icmp ult <16 x i32> %rdx.minmax.select23, %rdx.shuf24
+ %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24
+ %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp28 = icmp ult <16 x i32> %rdx.minmax.select26, %rdx.shuf27
+ %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27
+ %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp31 = icmp ult <16 x i32> %rdx.minmax.select29, %rdx.shuf30
+ %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+ %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0
+ %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1
+ %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt
+ ret i32 %r
+}
+
+define i16 @oversized_smax_256(<16 x i16>* nocapture readonly %arr) {
+; CHECK-LABEL: oversized_smax_256
+; CHECK: smax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: smaxv {{h[0-9]+}}, [[V0]]
+ %arr.load = load <16 x i16>, <16 x i16>* %arr
+ %rdx.shuf = shufflevector <16 x i16> %arr.load, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp22 = icmp sgt <16 x i16> %arr.load, %rdx.shuf
+ %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %arr.load, <16 x i16> %rdx.shuf
+ %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp25 = icmp sgt <16 x i16> %rdx.minmax.select23, %rdx.shuf24
+ %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24
+ %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp28 = icmp sgt <16 x i16> %rdx.minmax.select26, %rdx.shuf27
+ %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27
+ %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp31 = icmp sgt <16 x i16> %rdx.minmax.select29, %rdx.shuf30
+ %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+ %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0
+ %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1
+ %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt
+ ret i16 %r
+}
+
+define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr) {
+; CHECK-LABEL: oversized_smax_512
+; CHECK: smax v
+; CHECK-NEXT: smax v
+; CHECK-NEXT: smax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: smaxv {{s[0-9]+}}, [[V0]]
+ %arr.load = load <16 x i32>, <16 x i32>* %arr
+ %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp22 = icmp sgt <16 x i32> %arr.load, %rdx.shuf
+ %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf
+ %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp25 = icmp sgt <16 x i32> %rdx.minmax.select23, %rdx.shuf24
+ %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24
+ %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp28 = icmp sgt <16 x i32> %rdx.minmax.select26, %rdx.shuf27
+ %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27
+ %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp31 = icmp sgt <16 x i32> %rdx.minmax.select29, %rdx.shuf30
+ %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+ %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0
+ %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1
+ %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt
+ ret i32 %r
+}
+
+define i16 @oversized_smin_256(<16 x i16>* nocapture readonly %arr) {
+; CHECK-LABEL: oversized_smin_256
+; CHECK: smin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: sminv {{h[0-9]+}}, [[V0]]
+ %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr
+ %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp22 = icmp slt <16 x i16> %rdx.minmax.select, %rdx.shuf
+ %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf
+ %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp25 = icmp slt <16 x i16> %rdx.minmax.select23, %rdx.shuf24
+ %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24
+ %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp28 = icmp slt <16 x i16> %rdx.minmax.select26, %rdx.shuf27
+ %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27
+ %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp31 = icmp slt <16 x i16> %rdx.minmax.select29, %rdx.shuf30
+ %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+ %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0
+ %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1
+ %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt
+ ret i16 %r
+}
+
+define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr) {
+; CHECK-LABEL: oversized_smin_512
+; CHECK: smin v
+; CHECK-NEXT: smin v
+; CHECK-NEXT: smin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: sminv {{s[0-9]+}}, [[V0]]
+ %arr.load = load <16 x i32>, <16 x i32>* %arr
+ %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp22 = icmp slt <16 x i32> %arr.load, %rdx.shuf
+ %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf
+ %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp25 = icmp slt <16 x i32> %rdx.minmax.select23, %rdx.shuf24
+ %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24
+ %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp28 = icmp slt <16 x i32> %rdx.minmax.select26, %rdx.shuf27
+ %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27
+ %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %rdx.minmax.cmp31 = icmp slt <16 x i32> %rdx.minmax.select29, %rdx.shuf30
+ %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+ %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0
+ %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1
+ %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt
+ ret i32 %r
+}
diff --git a/test/CodeGen/AArch64/aarch64-smax-constantfold.ll b/test/CodeGen/AArch64/aarch64-smax-constantfold.ll
new file mode 100644
index 000000000000..0e5b59f95126
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-smax-constantfold.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o -| FileCheck %s
+
+; Function Attrs: nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>)
+
+; CHECK-LABEL: test
+define <4 x i16> @test() {
+entry:
+; CHECK: movi d{{[0-9]+}}, #0000000000000000
+ %0 = tail call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> zeroinitializer)
+ ret <4 x i16> %0
+}
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll
index f0c7572ebf13..f30ab89f238b 100644
--- a/test/CodeGen/AArch64/addsub_ext.ll
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -80,6 +80,64 @@ end:
ret void
}
+define void @sub_i8rhs() minsize {
+; CHECK-LABEL: sub_i8rhs:
+ %val8_tmp = load i8, i8* @var8
+ %lhs32 = load i32, i32* @var32
+ %lhs64 = load i64, i64* @var64
+
+ ; Need this to prevent extension upon load and give a vanilla i8 operand.
+ %val8 = add i8 %val8_tmp, 123
+
+
+; Zero-extending to 32-bits
+ %rhs32_zext = zext i8 %val8 to i32
+ %res32_zext = sub i32 %lhs32, %rhs32_zext
+ store volatile i32 %res32_zext, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+
+ %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+ %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift
+ store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
+
+
+; Zero-extending to 64-bits
+ %rhs64_zext = zext i8 %val8 to i64
+ %res64_zext = sub i64 %lhs64, %rhs64_zext
+ store volatile i64 %res64_zext, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
+
+ %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+ %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
+ store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
+
+; Sign-extending to 32-bits
+ %rhs32_sext = sext i8 %val8 to i32
+ %res32_sext = sub i32 %lhs32, %rhs32_sext
+ store volatile i32 %res32_sext, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb
+
+ %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+ %res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift
+ store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1
+
+; Sign-extending to 64-bits
+ %rhs64_sext = sext i8 %val8 to i64
+ %res64_sext = sub i64 %lhs64, %rhs64_sext
+ store volatile i64 %res64_sext, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb
+
+ %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+ %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift
+ store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4
+
+ ret void
+}
+
define void @addsub_i16rhs() minsize {
; CHECK-LABEL: addsub_i16rhs:
%val16_tmp = load i16, i16* @var16
@@ -155,6 +213,64 @@ end:
ret void
}
+define void @sub_i16rhs() minsize {
+; CHECK-LABEL: sub_i16rhs:
+ %val16_tmp = load i16, i16* @var16
+ %lhs32 = load i32, i32* @var32
+ %lhs64 = load i64, i64* @var64
+
+ ; Need this to prevent extension upon load and give a vanilla i16 operand.
+ %val16 = add i16 %val16_tmp, 123
+
+
+; Zero-extending to 32-bits
+ %rhs32_zext = zext i16 %val16 to i32
+ %res32_zext = sub i32 %lhs32, %rhs32_zext
+ store volatile i32 %res32_zext, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
+
+ %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+ %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift
+ store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
+
+
+; Zero-extending to 64-bits
+ %rhs64_zext = zext i16 %val16 to i64
+ %res64_zext = sub i64 %lhs64, %rhs64_zext
+ store volatile i64 %res64_zext, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
+
+ %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+ %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
+ store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
+
+; Sign-extending to 32-bits
+ %rhs32_sext = sext i16 %val16 to i32
+ %res32_sext = sub i32 %lhs32, %rhs32_sext
+ store volatile i32 %res32_sext, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
+
+ %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+ %res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift
+ store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1
+
+; Sign-extending to 64-bits
+ %rhs64_sext = sext i16 %val16 to i64
+ %res64_sext = sub i64 %lhs64, %rhs64_sext
+ store volatile i64 %res64_sext, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth
+
+ %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+ %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift
+ store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4
+
+ ret void
+}
+
; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for
; example), but the remaining instructions are probably not idiomatic
; in the face of "add/sub (shifted register)" so I don't intend to.
@@ -187,3 +303,33 @@ define void @addsub_i32rhs() minsize {
ret void
}
+
+define void @sub_i32rhs() minsize {
+; CHECK-LABEL: sub_i32rhs:
+ %val32_tmp = load i32, i32* @var32
+ %lhs64 = load i64, i64* @var64
+
+ %val32 = add i32 %val32_tmp, 123
+
+ %rhs64_zext = zext i32 %val32 to i64
+ %res64_zext = sub i64 %lhs64, %rhs64_zext
+ store volatile i64 %res64_zext, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
+
+ %rhs64_zext_shift = shl i64 %rhs64_zext, 2
+ %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
+ store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
+
+ %rhs64_sext = sext i32 %val32 to i64
+ %res64_sext = sub i64 %lhs64, %rhs64_sext
+ store volatile i64 %res64_sext, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
+
+ %rhs64_sext_shift = shl i64 %rhs64_sext, 2
+ %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift
+ store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
+
+ ret void
+}
diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll
index 5b2278ce8a35..45754377b2d9 100644
--- a/test/CodeGen/AArch64/alloca.ll
+++ b/test/CodeGen/AArch64/alloca.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s
+; RUN: llc -mtriple=aarch64-linux-gnu -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s
declare void @use_addr(i8*)
diff --git a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
index 173a440326ac..a66ea0df2e98 100644
--- a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
+++ b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
@@ -22,22 +22,22 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.sp = !{!1, !7, !10, !11, !12}
!0 = !DIGlobalVariable(name: "vsplive", line: 617, isLocal: true, isDefinition: true, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "drt_vsprintf", line: 616, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
+!1 = distinct !DISubprogram(name: "drt_vsprintf", line: 616, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
!2 = !DIFile(filename: "print.i", directory: "/Volumes/Ebi/echeng/radars/r9146594")
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
!4 = !DISubroutineType(types: !5)
!5 = !{!6}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!7 = !DISubprogram(name: "putc_mem", line: 30, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8)
+!7 = distinct !DISubprogram(name: "putc_mem", line: 30, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8)
!8 = !DISubroutineType(types: !9)
!9 = !{null}
-!10 = !DISubprogram(name: "print_double", line: 203, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
-!11 = !DISubprogram(name: "print_number", line: 75, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
-!12 = !DISubprogram(name: "get_flags", line: 508, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8)
+!10 = distinct !DISubprogram(name: "print_double", line: 203, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
+!11 = distinct !DISubprogram(name: "print_number", line: 75, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
+!12 = distinct !DISubprogram(name: "get_flags", line: 508, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8)
!13 = !DILocation(line: 653, column: 5, scope: !14)
!14 = distinct !DILexicalBlock(line: 652, column: 35, file: !20, scope: !15)
!15 = distinct !DILexicalBlock(line: 616, column: 1, file: !20, scope: !1)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "do_tab_convert", line: 853, scope: !17, file: !2, type: !6)
+!16 = !DILocalVariable(name: "do_tab_convert", line: 853, scope: !17, file: !2, type: !6)
!17 = distinct !DILexicalBlock(line: 850, column: 12, file: !20, scope: !14)
!18 = !DILocation(line: 853, column: 11, scope: !17)
!19 = !DILocation(line: 853, column: 29, scope: !17)
diff --git a/test/CodeGen/AArch64/arm64-aapcs-be.ll b/test/CodeGen/AArch64/arm64-aapcs-be.ll
index f27570acc820..e77952e4b8a1 100644
--- a/test/CodeGen/AArch64/arm64-aapcs-be.ll
+++ b/test/CodeGen/AArch64/arm64-aapcs-be.ll
@@ -32,7 +32,7 @@ define float @test_block_addr([8 x float], [1 x float] %in) {
define void @test_block_addr_callee() {
; CHECK-LABEL: test_block_addr_callee:
-; CHECK: str {{[a-z0-9]+}}, [sp]
+; CHECK: str {{[a-z0-9]+}}, [sp, #-16]!
; CHECK: bl test_block_addr
%val = insertvalue [1 x float] undef, float 0.0, 0
call float @test_block_addr([8 x float] undef, [1 x float] %val)
diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll
index d0880cd4f3eb..441f45bf90b3 100644
--- a/test/CodeGen/AArch64/arm64-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-aapcs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -enable-misched=false < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-linux-gnu -enable-misched=false -disable-post-ra < %s | FileCheck %s
@var = global i32 0, align 4
@@ -27,12 +27,13 @@ define [2 x i64] @test_i64x2_align(i32, [2 x i64] %arg, i32 %after) {
; Check stack slots are 64-bit at all times.
define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
i32 %int, i64 %long) {
- ; Part of last store. Blasted scheduler.
-; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32]
-
%ext_bool = zext i1 %bool to i64
store volatile i64 %ext_bool, i64* @var64, align 8
; CHECK: ldrb w[[EXT:[0-9]+]], [sp]
+
+ ; Part of last store. Blasted scheduler.
+; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32]
+
; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1
; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64]
@@ -63,8 +64,8 @@ define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) {
%ext_bool = zext i1 %bool to i64
store volatile i64 %ext_bool, i64* @var64
-; CHECK: and [[EXT:x[0-9]+]], x0, #0x1
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: and w[[EXT:[0-9]+]], w0, #0x1
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
%ext_char = sext i8 %char to i64
store volatile i64 %ext_char, i64* @var64
@@ -73,13 +74,13 @@ define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) {
%ext_short = zext i16 %short to i64
store volatile i64 %ext_short, i64* @var64
-; CHECK: and [[EXT:x[0-9]+]], x2, #0xffff
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: and w[[EXT:[0-9]+]], w2, #0xffff
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
%ext_int = zext i32 %int to i64
store volatile i64 %ext_int, i64* @var64
-; CHECK: ubfx [[EXT:x[0-9]+]], x3, #0, #32
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: mov w[[EXT:[0-9]+]], w3
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll
index 1c1b58b8b140..dc9884f12f57 100644
--- a/test/CodeGen/AArch64/arm64-abi_align.ll
+++ b/test/CodeGen/AArch64/arm64-abi_align.ll
@@ -508,7 +508,7 @@ entry:
; "i64 %0" should be in register x7.
; "i32 8" should be on stack at [sp].
; CHECK: ldr x7, [{{x[0-9]+}}]
-; CHECK: str {{w[0-9]+}}, [sp]
+; CHECK: str {{w[0-9]+}}, [sp, #-16]!
; FAST-LABEL: i64_split
; FAST: ldr x7, [{{x[0-9]+}}]
; FAST: mov x[[R0:[0-9]+]], sp
diff --git a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
index 4703d25a6016..d46800d34cac 100644
--- a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
+++ b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
@@ -1,6 +1,7 @@
-; RUN: llc -march arm64 < %s | FileCheck %s
+; RUN: llc -march arm64 < %s -aarch64-collect-loh=false | FileCheck %s
; rdar://13452552
-; ModuleID = 'reduced_test.ll'
+; Disable the collecting of LOH so that the labels do not get in the
+; way of the NEXT patterns.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
target triple = "arm64-apple-ios3.0.0"
@@ -13,8 +14,8 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]]
; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], w0, sxtw]
; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], w1, sxtw]
-; CHECK-NEXT cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
-; CHECK-NEXT b.ne
+; CHECK-NEXT: cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
+; CHECK-NEXT: b.ne
; Next BB
; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw
; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw
diff --git a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
index eb0cd3547bda..36424506bee8 100644
--- a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
+++ b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
@@ -1,9 +1,9 @@
; RUN: llc -march=arm64 -mcpu=cyclone < %s | FileCheck %s
; CHECK: foo
-; CHECK: ldr w[[REG:[0-9]+]], [x19, #264]
-; CHECK: str w[[REG]], [x19, #132]
-; CHECK: ldr w{{[0-9]+}}, [x19, #264]
+; CHECK: str w[[REG0:[0-9]+]], [x19, #264]
+; CHECK: mov w[[REG1:[0-9]+]], w[[REG0]]
+; CHECK: str w[[REG1]], [x19, #132]
define i32 @foo(i32 %a) nounwind {
%retval = alloca i32, align 4
diff --git a/test/CodeGen/AArch64/arm64-arith.ll b/test/CodeGen/AArch64/arm64-arith.ll
index f36e706b15dd..d5d9a1b98174 100644
--- a/test/CodeGen/AArch64/arm64-arith.ll
+++ b/test/CodeGen/AArch64/arm64-arith.ll
@@ -123,7 +123,8 @@ entry:
define i64 @t14(i16 %a, i64 %x) nounwind ssp {
entry:
; CHECK-LABEL: t14:
-; CHECK: add x0, x1, w0, uxth #3
+; CHECK: and w8, w0, #0xffff
+; CHECK: add x0, x1, w8, uxtw #3
; CHECK: ret
%c = zext i16 %a to i64
%d = shl i64 %c, 3
diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll
index a76cf74a6d0c..44c24c51f0df 100644
--- a/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -173,10 +173,13 @@ define i128 @atomic_load_seq_cst(i128* %p) {
ret i128 %r
}
-define i128 @atomic_load_relaxed(i128* %p) {
+define i128 @atomic_load_relaxed(i64, i64, i128* %p) {
; CHECK-LABEL: atomic_load_relaxed:
; CHECK-NOT: dmb
-; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0]
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x2]
+; CHECK-NEXT: stxp [[SUCCESS:w[0-9]+]], [[LO]], [[HI]], [x2]
+; CHECK: cbnz [[SUCCESS]], [[LABEL]]
; CHECK-NOT: dmb
%r = load atomic i128, i128* %p monotonic, align 16
ret i128 %r
diff --git a/test/CodeGen/AArch64/arm64-atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll
index 0824bd881a95..5d8d60de5fc5 100644
--- a/test/CodeGen/AArch64/arm64-atomic.ll
+++ b/test/CodeGen/AArch64/arm64-atomic.ll
@@ -2,13 +2,17 @@
define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
; CHECK-LABEL: val_compare_and_swap:
-; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0]
+; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
+; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
+; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]]
; CHECK-NEXT: cmp [[RESULT]], w1
-; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x0]
-; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
-; CHECK-NEXT: [[LABEL2]]:
+; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
+; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[EXITBB]]:
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
@@ -17,13 +21,16 @@ define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
; CHECK-LABEL: val_compare_and_swap_from_load:
; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2]
-; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0]
; CHECK-NEXT: cmp [[RESULT]], w1
-; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0]
-; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
-; CHECK-NEXT: [[LABEL2]]:
+; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
+; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[EXITBB]]:
%new = load i32, i32* %pnew
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
%val = extractvalue { i32, i1 } %pair, 0
@@ -32,13 +39,17 @@ define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
; CHECK-LABEL: val_compare_and_swap_rel:
-; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0]
+; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
+; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
+; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]
; CHECK-NEXT: cmp [[RESULT]], w1
-; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x0]
-; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
-; CHECK-NEXT: [[LABEL2]]:
+; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]
+; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
+; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[EXITBB]]:
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
@@ -47,13 +58,16 @@ define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
; CHECK-LABEL: val_compare_and_swap_64:
; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
-; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK-NEXT: ldxr [[RESULT:x[0-9]+]], [x[[ADDR]]]
; CHECK-NEXT: cmp [[RESULT]], x1
-; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]]
-; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
-; CHECK-NEXT: [[LABEL2]]:
+; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
+; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[EXITBB]]:
%pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic
%val = extractvalue { i64, i1 } %pair, 0
ret i64 %val
@@ -61,13 +75,13 @@ define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
define i32 @fetch_and_nand(i32* %p) #0 {
; CHECK-LABEL: fetch_and_nand:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK: ldxr w[[DEST_REG:[0-9]+]], [x0]
; CHECK: mvn [[TMP_REG:w[0-9]+]], w[[DEST_REG]]
; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], [[TMP_REG]], #0xfffffff8
; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
-; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
+; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]]
; CHECK: mov x0, x[[DEST_REG]]
%val = atomicrmw nand i32* %p, i32 7 release
ret i32 %val
@@ -76,12 +90,12 @@ define i32 @fetch_and_nand(i32* %p) #0 {
define i64 @fetch_and_nand_64(i64* %p) #0 {
; CHECK-LABEL: fetch_and_nand_64:
; CHECK: mov x[[ADDR:[0-9]+]], x0
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK: ldaxr x[[DEST_REG:[0-9]+]], [x[[ADDR]]]
; CHECK: mvn w[[TMP_REG:[0-9]+]], w[[DEST_REG]]
; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], x[[TMP_REG]], #0xfffffffffffffff8
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
-; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
+; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]]
%val = atomicrmw nand i64* %p, i64 7 acq_rel
ret i64 %val
@@ -90,12 +104,12 @@ define i64 @fetch_and_nand_64(i64* %p) #0 {
define i32 @fetch_and_or(i32* %p) #0 {
; CHECK-LABEL: fetch_and_or:
; CHECK: movz [[OLDVAL_REG:w[0-9]+]], #0x5
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK: ldaxr w[[DEST_REG:[0-9]+]], [x0]
; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], [[OLDVAL_REG]]
; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
-; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
+; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]]
; CHECK: mov x0, x[[DEST_REG]]
%val = atomicrmw or i32* %p, i32 5 seq_cst
ret i32 %val
@@ -104,11 +118,11 @@ define i32 @fetch_and_or(i32* %p) #0 {
define i64 @fetch_and_or_64(i64* %p) #0 {
; CHECK: fetch_and_or_64:
; CHECK: mov x[[ADDR:[0-9]+]], x0
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
; CHECK: ldxr [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0x7
; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
-; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
+; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]]
%val = atomicrmw or i64* %p, i64 7 monotonic
ret i64 %val
}
diff --git a/test/CodeGen/AArch64/arm64-builtins-linux.ll b/test/CodeGen/AArch64/arm64-builtins-linux.ll
new file mode 100644
index 000000000000..34fa1b471561
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-builtins-linux.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.aarch64.thread.pointer() #1
+
+define i8* @thread_pointer() {
+; CHECK: thread_pointer:
+; CHECK: mrs {{x[0-9]+}}, TPIDR_EL0
+ %1 = tail call i8* @llvm.aarch64.thread.pointer()
+ ret i8* %1
+}
diff --git a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
index 4e47ab6c03f3..25d874e54cb7 100644
--- a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
@@ -15,10 +15,10 @@ target triple = "arm64-apple-ios7.0.0"
; CHECK: Maze1
; CHECK: %if.then
; CHECK: cmp x{{[0-9]+}}, #2
-; CHECK-NEXT b.cc
+; CHECK-NEXT: b.lo
; CHECK: %if.then
; CHECK: cmp x{{[0-9]+}}, #2
-; CHECK-NEXT b.cc
+; CHECK-NEXT: b.lo
define i32 @Maze1() nounwind ssp {
entry:
%0 = load i64, i64* @channelColumns, align 8, !tbaa !0
diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll
index ff18f7364337..72d3b8331162 100644
--- a/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -104,11 +104,14 @@ if.end: ; preds = %if.then, %lor.lhs.f
; Speculatively execute division by zero.
; The sdiv/udiv instructions do not trap when the divisor is zero, so they are
; safe to speculate.
-; CHECK: speculate_division
-; CHECK-NOT: cmp
-; CHECK: sdiv
-; CHECK: cmp
-; CHECK-NEXT: ccmp
+; CHECK-LABEL: speculate_division:
+; CHECK: cmp w0, #1
+; CHECK: sdiv [[DIVRES:w[0-9]+]], w1, w0
+; CHECK: ccmp [[DIVRES]], #16, #0, ge
+; CHECK: b.gt [[BLOCK:LBB[0-9_]+]]
+; CHECK: bl _foo
+; CHECK: [[BLOCK]]:
+; CHECK: orr w0, wzr, #0x7
define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
entry:
%cmp = icmp sgt i32 %a, 0
@@ -287,3 +290,156 @@ sw.bb.i.i:
%code1.i.i.phi.trans.insert = getelementptr inbounds %str1, %str1* %0, i64 0, i32 0, i32 0, i64 16
br label %sw.bb.i.i
}
+
+; CHECK-LABEL: select_and
+define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
+; CHECK: cmp w1, #5
+; CHECK-NEXT: ccmp w0, w1, #0, ne
+; CHECK-NEXT: csel x0, x2, x3, lt
+; CHECK-NEXT: ret
+ %1 = icmp slt i32 %w0, %w1
+ %2 = icmp ne i32 5, %w1
+ %3 = and i1 %1, %2
+ %sel = select i1 %3, i64 %x2, i64 %x3
+ ret i64 %sel
+}
+
+; CHECK-LABEL: select_or
+define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
+; CHECK: cmp w1, #5
+; CHECK-NEXT: ccmp w0, w1, #8, eq
+; CHECK-NEXT: csel x0, x2, x3, lt
+; CHECK-NEXT: ret
+ %1 = icmp slt i32 %w0, %w1
+ %2 = icmp ne i32 5, %w1
+ %3 = or i1 %1, %2
+ %sel = select i1 %3, i64 %x2, i64 %x3
+ ret i64 %sel
+}
+
+; CHECK-LABEL: select_complicated
+define i16 @select_complicated(double %v1, double %v2, i16 %a, i16 %b) {
+; CHECK: ldr [[REG:d[0-9]+]],
+; CHECK: fcmp d0, d2
+; CHECK-NEXT: fmov d2, #13.00000000
+; CHECK-NEXT: fccmp d1, d2, #4, ne
+; CHECK-NEXT: fccmp d0, d1, #1, ne
+; CHECK-NEXT: fccmp d0, d1, #4, vc
+; CEHCK-NEXT: csel w0, w0, w1, eq
+ %1 = fcmp one double %v1, %v2
+ %2 = fcmp oeq double %v2, 13.0
+ %3 = fcmp oeq double %v1, 42.0
+ %or0 = or i1 %2, %3
+ %or1 = or i1 %1, %or0
+ %sel = select i1 %or1, i16 %a, i16 %b
+ ret i16 %sel
+}
+
+; CHECK-LABEL: gccbug
+define i64 @gccbug(i64 %x0, i64 %x1) {
+; CHECK: cmp x0, #2
+; CHECK-NEXT: ccmp x0, #4, #4, ne
+; CHECK-NEXT: ccmp x1, #0, #0, eq
+; CHECK-NEXT: orr w[[REGNUM:[0-9]+]], wzr, #0x1
+; CHECK-NEXT: cinc x0, x[[REGNUM]], eq
+; CHECK-NEXT: ret
+ %cmp0 = icmp eq i64 %x1, 0
+ %cmp1 = icmp eq i64 %x0, 2
+ %cmp2 = icmp eq i64 %x0, 4
+
+ %or = or i1 %cmp2, %cmp1
+ %and = and i1 %or, %cmp0
+
+ %sel = select i1 %and, i64 2, i64 1
+ ret i64 %sel
+}
+
+; CHECK-LABEL: select_ororand
+define i32 @select_ororand(i32 %w0, i32 %w1, i32 %w2, i32 %w3) {
+; CHECK: cmp w3, #4
+; CHECK-NEXT: ccmp w2, #2, #0, gt
+; CHECK-NEXT: ccmp w1, #13, #2, ge
+; CHECK-NEXT: ccmp w0, #0, #4, ls
+; CHECK-NEXT: csel w0, w3, wzr, eq
+; CHECK-NEXT: ret
+ %c0 = icmp eq i32 %w0, 0
+ %c1 = icmp ugt i32 %w1, 13
+ %c2 = icmp slt i32 %w2, 2
+ %c4 = icmp sgt i32 %w3, 4
+ %or = or i1 %c0, %c1
+ %and = and i1 %c2, %c4
+ %or1 = or i1 %or, %and
+ %sel = select i1 %or1, i32 %w3, i32 0
+ ret i32 %sel
+}
+
+; CHECK-LABEL: select_andor
+define i32 @select_andor(i32 %v1, i32 %v2, i32 %v3) {
+; CHECK: cmp w1, w2
+; CHECK-NEXT: ccmp w0, #0, #4, lt
+; CHECK-NEXT: ccmp w0, w1, #0, eq
+; CHECK-NEXT: csel w0, w0, w1, eq
+; CHECK-NEXT: ret
+ %c0 = icmp eq i32 %v1, %v2
+ %c1 = icmp sge i32 %v2, %v3
+ %c2 = icmp eq i32 %v1, 0
+ %or = or i1 %c2, %c1
+ %and = and i1 %or, %c0
+ %sel = select i1 %and, i32 %v1, i32 %v2
+ ret i32 %sel
+}
+
+; CHECK-LABEL: select_noccmp1
+define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
+; CHECK: cmp x0, #0
+; CHECK-NEXT: cset [[REG0:w[0-9]+]], lt
+; CHECK-NEXT: cmp x0, #13
+; CHECK-NOT: ccmp
+; CHECK-NEXT: cset [[REG1:w[0-9]+]], gt
+; CHECK-NEXT: cmp x2, #2
+; CHECK-NEXT: cset [[REG2:w[0-9]+]], lt
+; CHECK-NEXT: cmp x2, #4
+; CHECK-NEXT: cset [[REG3:w[0-9]+]], gt
+; CHECK-NEXT: and [[REG4:w[0-9]+]], [[REG0]], [[REG1]]
+; CHECK-NEXT: and [[REG5:w[0-9]+]], [[REG2]], [[REG3]]
+; CHECK-NEXT: orr [[REG6:w[0-9]+]], [[REG4]], [[REG5]]
+; CHECK-NEXT: cmp [[REG6]], #0
+; CHECK-NEXT: csel x0, xzr, x3, ne
+; CHECK-NEXT: ret
+ %c0 = icmp slt i64 %v1, 0
+ %c1 = icmp sgt i64 %v1, 13
+ %c2 = icmp slt i64 %v3, 2
+ %c4 = icmp sgt i64 %v3, 4
+ %and0 = and i1 %c0, %c1
+ %and1 = and i1 %c2, %c4
+ %or = or i1 %and0, %and1
+ %sel = select i1 %or, i64 0, i64 %r
+ ret i64 %sel
+}
+
+@g = global i32 0
+
+; Should not use ccmp if we have to compute the or expression in an integer
+; register anyway because of other users.
+; CHECK-LABEL: select_noccmp2
+define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
+; CHECK: cmp x0, #0
+; CHECK-NEXT: cset [[REG0:w[0-9]+]], lt
+; CHECK-NOT: ccmp
+; CHECK-NEXT: cmp x0, #13
+; CHECK-NEXT: cset [[REG1:w[0-9]+]], gt
+; CHECK-NEXT: orr [[REG2:w[0-9]+]], [[REG0]], [[REG1]]
+; CHECK-NEXT: cmp [[REG2]], #0
+; CHECK-NEXT: csel x0, xzr, x3, ne
+; CHECK-NEXT: sbfx [[REG3:w[0-9]+]], [[REG2]], #0, #1
+; CHECK-NEXT: adrp x[[REGN4:[0-9]+]], _g@PAGE
+; CHECK-NEXT: str [[REG3]], [x[[REGN4]], _g@PAGEOFF]
+; CHECK-NEXT: ret
+ %c0 = icmp slt i64 %v1, 0
+ %c1 = icmp sgt i64 %v1, 13
+ %or = or i1 %c0, %c1
+ %sel = select i1 %or, i64 0, i64 %r
+ %ext = sext i1 %or to i32
+ store volatile i32 %ext, i32* @g
+ ret i64 %sel
+}
diff --git a/test/CodeGen/AArch64/arm64-coalescing-MOVi32imm.ll b/test/CodeGen/AArch64/arm64-coalescing-MOVi32imm.ll
new file mode 100644
index 000000000000..528d2538bb4a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-coalescing-MOVi32imm.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s | FileCheck %s
+
+; CHECK: orr w0, wzr, #0x1
+; CHECK-NEXT: bl foo
+; CHECK-NEXT: orr w0, wzr, #0x1
+; CHECK-NEXT: bl foo
+
+target triple = "aarch64--linux-android"
+declare i32 @foo(i32)
+
+; Function Attrs: nounwind uwtable
+define i32 @main() {
+entry:
+ %call = tail call i32 @foo(i32 1)
+ %call1 = tail call i32 @foo(i32 1)
+ ret i32 0
+}
diff --git a/test/CodeGen/AArch64/arm64-collect-loh.ll b/test/CodeGen/AArch64/arm64-collect-loh.ll
index c0aa63cc4331..59147d401a30 100644
--- a/test/CodeGen/AArch64/arm64-collect-loh.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh.ll
@@ -51,3 +51,607 @@ if.end4: ; preds = %if.then2, %if.then,
%add6 = add nsw i32 %tmp3, %t.addr.0
ret i32 %add6
}
+
+@C = common global i32 0, align 4
+
+; Check that we catch AdrpLdrGotLdr case when we have a simple chain:
+; adrp -> ldrgot -> ldr.
+; CHECK-LABEL: _getC
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i32 @getC() {
+ %res = load i32, i32* @C, align 4
+ ret i32 %res
+}
+
+; LDRSW supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExtC
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsw x0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i64 @getSExtC() {
+ %res = load i32, i32* @C, align 4
+ %sextres = sext i32 %res to i64
+ ret i64 %sextres
+}
+
+; It may not be safe to fold the literal in the load if the address is
+; used several times.
+; Make sure we emit AdrpLdrGot for those.
+; CHECK-LABEL: _getSeveralC
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF]
+; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0
+; CHECK-NEXT: str [[ADD]], {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]]
+define void @getSeveralC(i32 %t) {
+entry:
+ %tmp = load i32, i32* @C, align 4
+ %add = add nsw i32 %tmp, %t
+ store i32 %add, i32* @C, align 4
+ ret void
+}
+
+; Make sure we catch that:
+; adrp -> ldrgot -> str.
+; CHECK-LABEL: _setC
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define void @setC(i32 %t) {
+entry:
+ store i32 %t, i32* @C, align 4
+ ret void
+}
+
+; Perform the same tests for internal global and a displacement
+; in the addressing mode.
+; Indeed we will get an ADD for those instead of LOADGot.
+@InternalC = internal global i32 0, align 4
+
+; Check that we catch AdrpAddLdr case when we have a simple chain:
+; adrp -> add -> ldr.
+; CHECK-LABEL: _getInternalCPlus4
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr w0, {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
+define i32 @getInternalCPlus4() {
+ %addr = getelementptr i32, i32* @InternalC, i32 4
+ %res = load i32, i32* %addr, align 4
+ ret i32 %res
+}
+
+; LDRSW supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExtInternalCPlus4
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsw x0, {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
+define i64 @getSExtInternalCPlus4() {
+ %addr = getelementptr i32, i32* @InternalC, i32 4
+ %res = load i32, i32* %addr, align 4
+ %sextres = sext i32 %res to i64
+ ret i64 %sextres
+}
+
+; It may not be safe to fold the literal in the load if the address is
+; used several times.
+; Make sure we emit AdrpAdd for those.
+; CHECK-LABEL: _getSeveralInternalCPlus4
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
+; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0
+; CHECK-NEXT: str [[ADD]], {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpAdd [[ADRP_LABEL]], [[ADDGOT_LABEL]]
+define void @getSeveralInternalCPlus4(i32 %t) {
+entry:
+ %addr = getelementptr i32, i32* @InternalC, i32 4
+ %tmp = load i32, i32* %addr, align 4
+ %add = add nsw i32 %tmp, %t
+ store i32 %add, i32* %addr, align 4
+ ret void
+}
+
+; Make sure we catch that:
+; adrp -> add -> str.
+; CHECK-LABEL: _setInternalCPlus4
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str w0, {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpAddStr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
+define void @setInternalCPlus4(i32 %t) {
+entry:
+ %addr = getelementptr i32, i32* @InternalC, i32 4
+ store i32 %t, i32* %addr, align 4
+ ret void
+}
+
+; Check that we catch AdrpAddLdr case when we have a simple chain:
+; adrp -> ldr.
+; CHECK-LABEL: _getInternalC
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr w0, {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdr [[ADRP_LABEL]], [[LDR_LABEL]]
+define i32 @getInternalC() {
+ %res = load i32, i32* @InternalC, align 4
+ ret i32 %res
+}
+
+; LDRSW supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExtInternalC
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsw x0, {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdr [[ADRP_LABEL]], [[LDR_LABEL]]
+define i64 @getSExtInternalC() {
+ %res = load i32, i32* @InternalC, align 4
+ %sextres = sext i32 %res to i64
+ ret i64 %sextres
+}
+
+; It may not be safe to fold the literal in the load if the address is
+; used several times.
+; Make sure we do not catch anything here. We have a adrp alone,
+; there is not much we can do about it.
+; CHECK-LABEL: _getSeveralInternalC
+; CHECK: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0
+; CHECK-NEXT: str [[ADD]], {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: ret
+define void @getSeveralInternalC(i32 %t) {
+entry:
+ %tmp = load i32, i32* @InternalC, align 4
+ %add = add nsw i32 %tmp, %t
+ store i32 %add, i32* @InternalC, align 4
+ ret void
+}
+
+; Make sure we do not catch anything when:
+; adrp -> str.
+; We cannot fold anything in the str at this point.
+; Indeed, strs do not support litterals.
+; CHECK-LABEL: _setInternalC
+; CHECK: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: str w0, {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: ret
+define void @setInternalC(i32 %t) {
+entry:
+ store i32 %t, i32* @InternalC, align 4
+ ret void
+}
+
+; Now check other variant of loads/stores.
+
+@D = common global i8 0, align 4
+
+; LDRB does not support loading from a literal.
+; Make sure we emit AdrpLdrGot and not AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getD
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF]
+; CHECK-NEXT: ldrb w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]]
+define i8 @getD() {
+ %res = load i8, i8* @D, align 4
+ ret i8 %res
+}
+
+; CHECK-LABEL: _setD
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: strb w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setD(i8 %t) {
+ store i8 %t, i8* @D, align 4
+ ret void
+}
+
+; LDRSB supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExtD
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsb w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i32 @getSExtD() {
+ %res = load i8, i8* @D, align 4
+ %sextres = sext i8 %res to i32
+ ret i32 %sextres
+}
+
+; LDRSB supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExt64D
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsb x0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i64 @getSExt64D() {
+ %res = load i8, i8* @D, align 4
+ %sextres = sext i8 %res to i64
+ ret i64 %sextres
+}
+
+@E = common global i16 0, align 4
+
+; LDRH does not support loading from a literal.
+; Make sure we emit AdrpLdrGot and not AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getE
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF]
+; CHECK-NEXT: ldrh w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]]
+define i16 @getE() {
+ %res = load i16, i16* @E, align 4
+ ret i16 %res
+}
+
+; LDRSH supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExtE
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsh w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i32 @getSExtE() {
+ %res = load i16, i16* @E, align 4
+ %sextres = sext i16 %res to i32
+ ret i32 %sextres
+}
+
+; CHECK-LABEL: _setE
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: strh w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setE(i16 %t) {
+ store i16 %t, i16* @E, align 4
+ ret void
+}
+
+; LDRSH supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExt64E
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsh x0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i64 @getSExt64E() {
+ %res = load i16, i16* @E, align 4
+ %sextres = sext i16 %res to i64
+ ret i64 %sextres
+}
+
+@F = common global i64 0, align 4
+
+; LDR supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getF
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _F@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr x0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i64 @getF() {
+ %res = load i64, i64* @F, align 4
+ ret i64 %res
+}
+
+; CHECK-LABEL: _setF
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _F@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str x0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setF(i64 %t) {
+ store i64 %t, i64* @F, align 4
+ ret void
+}
+
+@G = common global float 0.0, align 4
+
+; LDR float supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getG
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _G@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr s0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define float @getG() {
+ %res = load float, float* @G, align 4
+ ret float %res
+}
+
+; CHECK-LABEL: _setG
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _G@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str s0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setG(float %t) {
+ store float %t, float* @G, align 4
+ ret void
+}
+
+@H = common global half 0.0, align 4
+
+; LDR half supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getH
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _H@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr h0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define half @getH() {
+ %res = load half, half* @H, align 4
+ ret half %res
+}
+
+; CHECK-LABEL: _setH
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _H@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str h0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setH(half %t) {
+ store half %t, half* @H, align 4
+ ret void
+}
+
+@I = common global double 0.0, align 4
+
+; LDR double supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getI
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _I@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr d0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define double @getI() {
+ %res = load double, double* @I, align 4
+ ret double %res
+}
+
+; CHECK-LABEL: _setI
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _I@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str d0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setI(double %t) {
+ store double %t, double* @I, align 4
+ ret void
+}
+
+@J = common global <2 x i32> <i32 0, i32 0>, align 4
+
+; LDR 64-bit vector supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getJ
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _J@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr d0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define <2 x i32> @getJ() {
+ %res = load <2 x i32>, <2 x i32>* @J, align 4
+ ret <2 x i32> %res
+}
+
+; CHECK-LABEL: _setJ
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _J@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str d0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setJ(<2 x i32> %t) {
+ store <2 x i32> %t, <2 x i32>* @J, align 4
+ ret void
+}
+
+@K = common global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 4
+
+; LDR 128-bit vector supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getK
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _K@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr q0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define <4 x i32> @getK() {
+ %res = load <4 x i32>, <4 x i32>* @K, align 4
+ ret <4 x i32> %res
+}
+
+; CHECK-LABEL: _setK
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _K@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str q0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setK(<4 x i32> %t) {
+ store <4 x i32> %t, <4 x i32>* @K, align 4
+ ret void
+}
+
+@L = common global <1 x i8> <i8 0>, align 4
+
+; LDR 8-bit vector supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getL
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr b0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define <1 x i8> @getL() {
+ %res = load <1 x i8>, <1 x i8>* @L, align 4
+ ret <1 x i8> %res
+}
+
+; CHECK-LABEL: _setL
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF]
+; Ultimately we should generate str b0, but right now, we match the vector
+; variant which does not allow to fold the immediate into the store.
+; CHECK-NEXT: st1.b { v0 }[0], {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]]
+define void @setL(<1 x i8> %t) {
+ store <1 x i8> %t, <1 x i8>* @L, align 4
+ ret void
+}
+
+; Make sure we do not assert when we do not track
+; all the aliases of a tuple register.
+; Indeed the tuple register can be tracked because of
+; one of its element, but the other elements of the tuple
+; do not need to be tracked and we used to assert on that.
+; Note: The test case is fragile in the sense that we need
+; a tuple register to appear in the lowering. Thus, the target
+; cpu is required to have the problem reproduced.
+; CHECK-LABEL: _uninterestingSub
+; CHECK: adrp [[ADRP_REG:x[0-9]+]], [[CONSTPOOL:lCPI[0-9]+_[0-9]+]]@PAGE
+; CHECK-NEXT: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF]
+; The tuple comes from the next instruction.
+; CHECK-NEXT: tbl.16b v{{[0-9]+}}, { v{{[0-9]+}}, v{{[0-9]+}} }, v[[IDX]]
+; CHECK: ret
+define void @uninterestingSub(i8* nocapture %row) #0 {
+ %tmp = bitcast i8* %row to <16 x i8>*
+ %tmp1 = load <16 x i8>, <16 x i8>* %tmp, align 16
+ %vext43 = shufflevector <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %tmp1, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
+ %add.i.414 = add <16 x i8> zeroinitializer, %vext43
+ store <16 x i8> %add.i.414, <16 x i8>* %tmp, align 16
+ %add.ptr51 = getelementptr inbounds i8, i8* %row, i64 16
+ %tmp2 = bitcast i8* %add.ptr51 to <16 x i8>*
+ %tmp3 = load <16 x i8>, <16 x i8>* %tmp2, align 16
+ %tmp4 = bitcast i8* undef to <16 x i8>*
+ %tmp5 = load <16 x i8>, <16 x i8>* %tmp4, align 16
+ %vext157 = shufflevector <16 x i8> %tmp3, <16 x i8> %tmp5, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
+ %add.i.402 = add <16 x i8> zeroinitializer, %vext157
+ store <16 x i8> %add.i.402, <16 x i8>* %tmp4, align 16
+ ret void
+}
+
+attributes #0 = { "target-cpu"="cyclone" }
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll
index 0ef7b143df80..55c9c6036ed5 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-br.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-br.ll
@@ -94,9 +94,7 @@ entry:
store i32 %c, i32* %c.addr, align 4
store i64 %d, i64* %d.addr, align 8
%0 = load i16, i16* %b.addr, align 2
-; CHECK: and w0, w0, #0x1
-; CHECK: cmp w0, #0
-; CHECK: b.eq LBB4_2
+; CHECK: tbz w0, #0, LBB4_2
%conv = trunc i16 %0 to i1
br i1 %conv, label %if.then, label %if.end
@@ -106,9 +104,7 @@ if.then: ; preds = %entry
if.end: ; preds = %if.then, %entry
%1 = load i32, i32* %c.addr, align 4
-; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1
-; CHECK: cmp w[[REG]], #0
-; CHECK: b.eq LBB4_4
+; CHECK: tbz w{{[0-9]+}}, #0, LBB4_4
%conv1 = trunc i32 %1 to i1
br i1 %conv1, label %if.then3, label %if.end4
@@ -118,8 +114,7 @@ if.then3: ; preds = %if.end
if.end4: ; preds = %if.then3, %if.end
%2 = load i64, i64* %d.addr, align 8
-; CHECK: cmp w{{[0-9]+}}, #0
-; CHECK: b.eq LBB4_6
+; CHECK: tbz w{{[0-9]+}}, #0, LBB4_6
%conv5 = trunc i64 %2 to i1
br i1 %conv5, label %if.then7, label %if.end8
@@ -139,9 +134,7 @@ define i32 @trunc64(i64 %foo) nounwind {
; CHECK: trunc64
; CHECK: and [[REG1:x[0-9]+]], x0, #0x1
; CHECK: mov x[[REG2:[0-9]+]], [[REG1]]
-; CHECK: and [[REG3:w[0-9]+]], w[[REG2]], #0x1
-; CHECK: cmp [[REG3]], #0
-; CHECK: b.eq LBB5_2
+; CHECK: tbz w[[REG2]], #0, LBB5_2
%a = and i64 %foo, 1
%b = trunc i64 %a to i1
br i1 %b, label %if.then, label %if.else
diff --git a/test/CodeGen/AArch64/arm64-fmax-safe.ll b/test/CodeGen/AArch64/arm64-fmax-safe.ll
new file mode 100644
index 000000000000..8b7d66986e78
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -0,0 +1,53 @@
+; RUN: llc -march=arm64 < %s | FileCheck %s
+
+define double @test_direct(float %in) {
+; CHECK-LABEL: test_direct:
+ %cmp = fcmp olt float %in, 0.000000e+00
+ %val = select i1 %cmp, float 0.000000e+00, float %in
+ %longer = fpext float %val to double
+ ret double %longer
+
+; CHECK: fmax s
+}
+
+define double @test_cross(float %in) {
+; CHECK-LABEL: test_cross:
+ %cmp = fcmp ult float %in, 0.000000e+00
+ %val = select i1 %cmp, float %in, float 0.000000e+00
+ %longer = fpext float %val to double
+ ret double %longer
+
+; CHECK: fmin s
+}
+
+; Same as previous, but with ordered comparison;
+; must become fminnm, not fmin.
+define double @test_cross_fail_nan(float %in) {
+; CHECK-LABEL: test_cross_fail_nan:
+ %cmp = fcmp olt float %in, 0.000000e+00
+ %val = select i1 %cmp, float %in, float 0.000000e+00
+ %longer = fpext float %val to double
+ ret double %longer
+
+; CHECK: fminnm s
+}
+
+; This isn't a min or a max, but passes the first condition for swapping the
+; results. Make sure they're put back before we resort to the normal fcsel.
+define float @test_cross_fail(float %lhs, float %rhs) {
+; CHECK-LABEL: test_cross_fail:
+ %tst = fcmp une float %lhs, %rhs
+ %res = select i1 %tst, float %rhs, float %lhs
+ ret float %res
+
+ ; The register allocator would have to decide to be deliberately obtuse before
+ ; other register were used.
+; CHECK: fcsel s0, s1, s0, ne
+}
+
+; Make sure the transformation isn't triggered for integers
+define i64 @test_integer(i64 %in) {
+ %cmp = icmp slt i64 %in, 0
+ %val = select i1 %cmp, i64 0, i64 %in
+ ret i64 %val
+}
diff --git a/test/CodeGen/AArch64/arm64-fmax.ll b/test/CodeGen/AArch64/arm64-fmax.ll
index ea281528b84c..40cc36ea52fa 100644
--- a/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/test/CodeGen/AArch64/arm64-fmax.ll
@@ -1,57 +1,48 @@
; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s
-; RUN: llc -march=arm64 < %s | FileCheck %s --check-prefix=CHECK-SAFE
define double @test_direct(float %in) {
; CHECK-LABEL: test_direct:
-; CHECK-SAFE-LABEL: test_direct:
- %cmp = fcmp olt float %in, 0.000000e+00
- %longer = fpext float %in to double
- %val = select i1 %cmp, double 0.000000e+00, double %longer
- ret double %val
+ %cmp = fcmp nnan olt float %in, 0.000000e+00
+ %val = select i1 %cmp, float 0.000000e+00, float %in
+ %longer = fpext float %val to double
+ ret double %longer
; CHECK: fmax
-; CHECK-SAFE: fmax
}
define double @test_cross(float %in) {
; CHECK-LABEL: test_cross:
-; CHECK-SAFE-LABEL: test_cross:
- %cmp = fcmp ult float %in, 0.000000e+00
- %longer = fpext float %in to double
- %val = select i1 %cmp, double %longer, double 0.000000e+00
- ret double %val
+ %cmp = fcmp nnan ult float %in, 0.000000e+00
+ %val = select i1 %cmp, float %in, float 0.000000e+00
+ %longer = fpext float %val to double
+ ret double %longer
; CHECK: fmin
-; CHECK-SAFE: fmin
}
; Same as previous, but with ordered comparison;
; can't be converted in safe-math mode.
define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
-; CHECK-SAFE-LABEL: test_cross_fail_nan:
- %cmp = fcmp olt float %in, 0.000000e+00
- %longer = fpext float %in to double
- %val = select i1 %cmp, double %longer, double 0.000000e+00
- ret double %val
+ %cmp = fcmp nnan olt float %in, 0.000000e+00
+ %val = select i1 %cmp, float %in, float 0.000000e+00
+ %longer = fpext float %val to double
+ ret double %longer
; CHECK: fmin
-; CHECK-SAFE: fcsel d0, d1, d0, mi
}
; This isn't a min or a max, but passes the first condition for swapping the
; results. Make sure they're put back before we resort to the normal fcsel.
define float @test_cross_fail(float %lhs, float %rhs) {
; CHECK-LABEL: test_cross_fail:
-; CHECK-SAFE-LABEL: test_cross_fail:
- %tst = fcmp une float %lhs, %rhs
+ %tst = fcmp nnan une float %lhs, %rhs
%res = select i1 %tst, float %rhs, float %lhs
ret float %res
; The register allocator would have to decide to be deliberately obtuse before
; other register were used.
; CHECK: fcsel s0, s1, s0, ne
-; CHECK-SAFE: fcsel s0, s1, s0, ne
}
; Make sure the transformation isn't triggered for integers
@@ -60,3 +51,14 @@ define i64 @test_integer(i64 %in) {
%val = select i1 %cmp, i64 0, i64 %in
ret i64 %val
}
+
+define float @test_f16(half %in) {
+; CHECK-LABEL: test_f16:
+ %cmp = fcmp nnan ult half %in, 0.000000e+00
+ %val = select i1 %cmp, half %in, half 0.000000e+00
+ %longer = fpext half %val to float
+ ret float %longer
+; FIXME: It'd be nice for this to create an fmin instruction!
+; CHECK: fcvt
+; CHECK: fcsel
+}
diff --git a/test/CodeGen/AArch64/arm64-fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll
index aaef39fcf512..097fe2ca6ed9 100644
--- a/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/test/CodeGen/AArch64/arm64-fp128.ll
@@ -148,14 +148,9 @@ define i1 @test_setcc2() {
; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
%val = fcmp ugt fp128 %lhs, %rhs
-; CHECK: bl __gttf2
+; CHECK: bl __letf2
; CHECK: cmp w0, #0
-; CHECK: cset [[GT:w[0-9]+]], gt
-
-; CHECK: bl __unordtf2
-; CHECK: cmp w0, #0
-; CHECK: cset [[UNORDERED:w[0-9]+]], ne
-; CHECK: orr w0, [[UNORDERED]], [[GT]]
+; CHECK: cset w0, gt
ret i1 %val
; CHECK: ret
@@ -169,31 +164,21 @@ define i32 @test_br_cc() {
; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
- ; olt == !uge, which LLVM unfortunately "optimizes" this to.
+ ; olt == !uge, which LLVM optimizes this to.
%cond = fcmp olt fp128 %lhs, %rhs
-; CHECK: bl __getf2
-; CHECK: cmp w0, #0
-; CHECK: cset [[OGE:w[0-9]+]], ge
-
-; CHECK: bl __unordtf2
-; CHECK: cmp w0, #0
-; CHECK: cset [[UNORDERED:w[0-9]+]], ne
-
-; CHECK: orr [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]]
-; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]]
+; CHECK: bl __lttf2
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: b.ge {{.LBB[0-9]+_[0-9]+}}
br i1 %cond, label %iftrue, label %iffalse
iftrue:
ret i32 42
; CHECK-NEXT: BB#
; CHECK-NEXT: movz w0, #0x2a
-; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]]
-
+; CHECK: ret
iffalse:
ret i32 29
-; CHECK: [[RET29]]:
-; CHECK-NEXT: movz w0, #0x1d
-; CHECK-NEXT: [[REALRET]]:
+; CHECK: movz w0, #0x1d
; CHECK: ret
}
diff --git a/test/CodeGen/AArch64/arm64-hello.ll b/test/CodeGen/AArch64/arm64-hello.ll
index f1c4e9bbaed9..895bfe4b3915 100644
--- a/test/CodeGen/AArch64/arm64-hello.ll
+++ b/test/CodeGen/AArch64/arm64-hello.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s --check-prefix=CHECK-LINUX
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-linux-gnu -disable-post-ra | FileCheck %s --check-prefix=CHECK-LINUX
; CHECK-LABEL: main:
; CHECK: stp x29, x30, [sp, #-16]!
diff --git a/test/CodeGen/AArch64/arm64-indexed-memory.ll b/test/CodeGen/AArch64/arm64-indexed-memory.ll
index b52cddf600ac..b6ab9934dbc3 100644
--- a/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -81,6 +81,17 @@ define void @truncst64to8(i8** nocapture %out, i8 %index, i64 %spacing) nounwind
}
+define void @storef16(half** %out, half %index, half %spacing) nounwind {
+; CHECK-LABEL: storef16:
+; CHECK: str h{{[0-9+]}}, [x{{[0-9+]}}], #2
+; CHECK: ret
+ %tmp = load half*, half** %out, align 2
+ %incdec.ptr = getelementptr inbounds half, half* %tmp, i64 1
+ store half %spacing, half* %tmp, align 2
+ store half* %incdec.ptr, half** %out, align 2
+ ret void
+}
+
define void @storef32(float** nocapture %out, float %index, float %spacing) nounwind noinline ssp {
; CHECK-LABEL: storef32:
; CHECK: str s{{[0-9+]}}, [x{{[0-9+]}}], #4
@@ -125,6 +136,17 @@ define float * @pref32(float** nocapture %out, float %spacing) nounwind noinline
ret float *%ptr
}
+define half* @pref16(half** %out, half %spacing) nounwind {
+; CHECK-LABEL: pref16:
+; CHECK: ldr x0, [x0]
+; CHECK-NEXT: str h0, [x0, #6]!
+; CHECK-NEXT: ret
+ %tmp = load half*, half** %out, align 2
+ %ptr = getelementptr inbounds half, half* %tmp, i64 3
+ store half %spacing, half* %ptr, align 2
+ ret half *%ptr
+}
+
define i64 * @pre64(i64** nocapture %out, i64 %spacing) nounwind noinline ssp {
; CHECK-LABEL: pre64:
; CHECK: ldr x0, [x0]
@@ -230,6 +252,17 @@ define float* @preidxf32(float* %src, float* %out) {
ret float* %ptr
}
+define half* @preidxf16(half* %src, half* %out) {
+; CHECK-LABEL: preidxf16:
+; CHECK: ldr h0, [x0, #2]!
+; CHECK: str h0, [x1]
+; CHECK: ret
+ %ptr = getelementptr inbounds half, half* %src, i64 1
+ %tmp = load half, half* %ptr, align 2
+ store half %tmp, half* %out, align 2
+ ret half* %ptr
+}
+
define i64* @preidx64(i64* %src, i64* %out) {
; CHECK-LABEL: preidx64:
; CHECK: ldr x[[REG:[0-9]+]], [x0, #8]!
diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index ba31513172d5..98d4e3646f56 100644
--- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-ios7.0 -disable-post-ra -o - %s | FileCheck %s
@ptr = global i8* null
@@ -6215,3 +6215,27 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %pt
}
declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
+
+; CHECK-LABEL: test_ld1lane_build:
+; CHECK-DAG: ld1.s { [[REG0:v[0-9]+]] }[0], [x0]
+; CHECK-DAG: ld1.s { [[REG0:v[0-9]+]] }[1], [x1]
+; CHECK-DAG: ld1.s { [[REG1:v[0-9]+]] }[0], [x2]
+; CHECK-DAG: ld1.s { [[REG1:v[0-9]+]] }[1], [x3]
+; CHECK: sub.2s v[[REGNUM2:[0-9]+]], [[REG0]], [[REG1]]
+; CHECK-NEXT: str d[[REGNUM2]], [x4]
+; CHECK-NEXT: ret
+define void @test_ld1lane_build(i32* %ptr0, i32* %ptr1, i32* %ptr2, i32* %ptr3, <2 x i32>* %out) {
+ %load0 = load i32, i32* %ptr0, align 4
+ %load1 = load i32, i32* %ptr1, align 4
+ %vec0_0 = insertelement <2 x i32> undef, i32 %load0, i32 0
+ %vec0_1 = insertelement <2 x i32> %vec0_0, i32 %load1, i32 1
+
+ %load2 = load i32, i32* %ptr2, align 4
+ %load3 = load i32, i32* %ptr3, align 4
+ %vec1_0 = insertelement <2 x i32> undef, i32 %load2, i32 0
+ %vec1_1 = insertelement <2 x i32> %vec1_0, i32 %load3, i32 1
+
+ %sub = sub nsw <2 x i32> %vec0_1, %vec1_1
+ store <2 x i32> %sub, <2 x i32>* %out, align 16
+ ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-inline-asm.ll b/test/CodeGen/AArch64/arm64-inline-asm.ll
index 802d95826ce4..ac6e8a7731c6 100644
--- a/test/CodeGen/AArch64/arm64-inline-asm.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s
; rdar://9167275
diff --git a/test/CodeGen/AArch64/arm64-join-reserved.ll b/test/CodeGen/AArch64/arm64-join-reserved.ll
index dee034483541..c65cf95be2e5 100644
--- a/test/CodeGen/AArch64/arm64-join-reserved.ll
+++ b/test/CodeGen/AArch64/arm64-join-reserved.ll
@@ -5,7 +5,7 @@ target triple = "arm64-apple-macosx10"
; A move isn't necessary.
; <rdar://problem/11492712>
; CHECK-LABEL: g:
-; CHECK: str xzr, [sp]
+; CHECK: str xzr, [sp, #-16]!
; CHECK: bl
; CHECK: ret
define void @g() nounwind ssp {
diff --git a/test/CodeGen/AArch64/arm64-large-frame.ll b/test/CodeGen/AArch64/arm64-large-frame.ll
index c4cce36bcb74..d1244e73b0f3 100644
--- a/test/CodeGen/AArch64/arm64-large-frame.ll
+++ b/test/CodeGen/AArch64/arm64-large-frame.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -disable-fp-elim -disable-post-ra < %s | FileCheck %s
declare void @use_addr(i8*)
@addr = global i8* null
diff --git a/test/CodeGen/AArch64/arm64-ld-from-st.ll b/test/CodeGen/AArch64/arm64-ld-from-st.ll
new file mode 100644
index 000000000000..dd8add70cdb7
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-ld-from-st.ll
@@ -0,0 +1,666 @@
+; RUN: llc < %s -mtriple aarch64--none-eabi -verify-machineinstrs | FileCheck %s
+
+; CHECK-LABEL: Str64Ldr64
+; CHECK: mov x0, x1
+define i64 @Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i64*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 1
+ %1 = load i64, i64* %arrayidx1
+ ret i64 %1
+}
+
+; CHECK-LABEL: Str64Ldr32_0
+; CHECK: and x0, x1, #0xffffffff
+define i32 @Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i32*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 2
+ %1 = load i32, i32* %arrayidx1
+ ret i32 %1
+}
+
+; CHECK-LABEL: Str64Ldr32_1
+; CHECK: lsr x0, x1, #32
+define i32 @Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i32*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 3
+ %1 = load i32, i32* %arrayidx1
+ ret i32 %1
+}
+
+; CHECK-LABEL: Str64Ldr16_0
+; CHECK: and x0, x1, #0xffff
+define i16 @Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i16*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 4
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Str64Ldr16_1
+; CHECK: ubfx x0, x1, #16, #16
+define i16 @Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i16*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 5
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Str64Ldr16_2
+; CHECK: ubfx x0, x1, #32, #16
+define i16 @Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i16*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 6
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Str64Ldr16_3
+; CHECK: lsr x0, x1, #48
+define i16 @Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i16*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 7
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_0
+; CHECK: and x0, x1, #0xff
+define i8 @Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 8
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_1
+; CHECK: ubfx x0, x1, #8, #8
+define i8 @Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 9
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_2
+; CHECK: ubfx x0, x1, #16, #8
+define i8 @Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 10
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_3
+; CHECK: ubfx x0, x1, #24, #8
+define i8 @Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 11
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_4
+; CHECK: ubfx x0, x1, #32, #8
+define i8 @Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 12
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_5
+; CHECK: ubfx x0, x1, #40, #8
+define i8 @Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 13
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_6
+; CHECK: ubfx x0, x1, #48, #8
+define i8 @Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 14
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_7
+; CHECK: lsr x0, x1, #56
+define i8 @Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 15
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Str32Ldr32
+; CHECK: mov w0, w1
+define i32 @Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i32*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 1
+ %1 = load i32, i32* %arrayidx1
+ ret i32 %1
+}
+
+; CHECK-LABEL: Str32Ldr16_0
+; CHECK: and w0, w1, #0xffff
+define i16 @Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i16*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Str32Ldr16_1
+; CHECK: lsr w0, w1, #16
+define i16 @Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i16*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 3
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Str32Ldr8_0
+; CHECK: and w0, w1, #0xff
+define i8 @Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i8*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 4
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Str32Ldr8_1
+; CHECK: ubfx w0, w1, #8, #8
+define i8 @Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i8*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 5
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Str32Ldr8_2
+; CHECK: ubfx w0, w1, #16, #8
+define i8 @Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i8*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 6
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Str32Ldr8_3
+; CHECK: lsr w0, w1, #24
+define i8 @Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i8*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 7
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Str16Ldr16
+; CHECK: and w0, w1, #0xffff
+define i16 @Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) {
+entry:
+ %0 = bitcast i16* %P to i16*
+ %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
+ store i16 %v, i16* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Str16Ldr8_0
+; CHECK: and w0, w1, #0xff
+define i8 @Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) {
+entry:
+ %0 = bitcast i16* %P to i8*
+ %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
+ store i16 %v, i16* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 2
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Str16Ldr8_1
+; CHECK: ubfx w0, w1, #8, #8
+define i8 @Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) {
+entry:
+ %0 = bitcast i16* %P to i8*
+ %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
+ store i16 %v, i16* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 3
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+
+; CHECK-LABEL: Unscaled_Str64Ldr64
+; CHECK: mov x0, x1
+define i64 @Unscaled_Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i64*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 -1
+ %1 = load i64, i64* %arrayidx1
+ ret i64 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr32_0
+; CHECK: and x0, x1, #0xffffffff
+define i32 @Unscaled_Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i32*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -2
+ %1 = load i32, i32* %arrayidx1
+ ret i32 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr32_1
+; CHECK: lsr x0, x1, #32
+define i32 @Unscaled_Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i32*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1
+ %1 = load i32, i32* %arrayidx1
+ ret i32 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr16_0
+; CHECK: and x0, x1, #0xffff
+define i16 @Unscaled_Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i16*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -4
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr16_1
+; CHECK: ubfx x0, x1, #16, #16
+define i16 @Unscaled_Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i16*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr16_2
+; CHECK: ubfx x0, x1, #32, #16
+define i16 @Unscaled_Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i16*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr16_3
+; CHECK: lsr x0, x1, #48
+define i16 @Unscaled_Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i16*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_0
+; CHECK: and x0, x1, #0xff
+define i8 @Unscaled_Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -8
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_1
+; CHECK: ubfx x0, x1, #8, #8
+define i8 @Unscaled_Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -7
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_2
+; CHECK: ubfx x0, x1, #16, #8
+define i8 @Unscaled_Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -6
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_3
+; CHECK: ubfx x0, x1, #24, #8
+define i8 @Unscaled_Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -5
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_4
+; CHECK: ubfx x0, x1, #32, #8
+define i8 @Unscaled_Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_5
+; CHECK: ubfx x0, x1, #40, #8
+define i8 @Unscaled_Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_6
+; CHECK: ubfx x0, x1, #48, #8
+define i8 @Unscaled_Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_7
+; CHECK: lsr x0, x1, #56
+define i8 @Unscaled_Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+ %0 = bitcast i64* %P to i8*
+ %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+ store i64 %v, i64* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr32
+; CHECK: mov w0, w1
+define i32 @Unscaled_Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i32*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1
+ %1 = load i32, i32* %arrayidx1
+ ret i32 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr16_0
+; CHECK: and w0, w1, #0xffff
+define i16 @Unscaled_Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i16*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr16_1
+; CHECK: lsr w0, w1, #16
+define i16 @Unscaled_Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i16*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr8_0
+; CHECK: and w0, w1, #0xff
+define i8 @Unscaled_Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i8*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr8_1
+; CHECK: ubfx w0, w1, #8, #8
+define i8 @Unscaled_Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i8*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr8_2
+; CHECK: ubfx w0, w1, #16, #8
+define i8 @Unscaled_Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i8*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr8_3
+; CHECK: lsr w0, w1, #24
+define i8 @Unscaled_Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i8*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str16Ldr16
+; CHECK: and w0, w1, #0xffff
+define i16 @Unscaled_Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) {
+entry:
+ %0 = bitcast i16* %P to i16*
+ %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
+ store i16 %v, i16* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str16Ldr8_0
+; CHECK: and w0, w1, #0xff
+define i8 @Unscaled_Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) {
+entry:
+ %0 = bitcast i16* %P to i8*
+ %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
+ store i16 %v, i16* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str16Ldr8_1
+; CHECK: ubfx w0, w1, #8, #8
+define i8 @Unscaled_Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) {
+entry:
+ %0 = bitcast i16* %P to i8*
+ %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
+ store i16 %v, i16* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
+ %1 = load i8, i8* %arrayidx1
+ ret i8 %1
+}
+
+; CHECK-LABEL: StrVolatileLdr
+; CHECK: ldrh
+define i16 @StrVolatileLdr(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i16*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
+ %1 = load volatile i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: StrNotInRangeLdr
+; CHECK: ldrh
+define i16 @StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i16*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_StrNotInRangeLdr
+; CHECK: ldurh
+define i16 @Unscaled_StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i16*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+ store i32 %v, i32* %arrayidx0
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+; CHECK-LABEL: StrCallLdr
+; CHECK: ldrh
+define i16 @StrCallLdr(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+ %0 = bitcast i32* %P to i16*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+ store i32 %v, i32* %arrayidx0
+ %c = call i1 @test_dummy()
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
+
+declare i1 @test_dummy()
+
+; CHECK-LABEL: StrStrLdr
+; CHECK: ldrh
+define i16 @StrStrLdr(i32 %v, i32* %P, i32* %P2, i32 %n) {
+entry:
+ %0 = bitcast i32* %P to i16*
+ %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+ store i32 %v, i32* %arrayidx0
+ store i32 %n, i32* %P2
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
+ %1 = load i16, i16* %arrayidx1
+ ret i16 %1
+}
diff --git a/test/CodeGen/AArch64/arm64-ldp.ll b/test/CodeGen/AArch64/arm64-ldp.ll
index a192eab112fa..6071d092f8b3 100644
--- a/test/CodeGen/AArch64/arm64-ldp.ll
+++ b/test/CodeGen/AArch64/arm64-ldp.ll
@@ -1,8 +1,6 @@
; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\
-; RUN: -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s
-; CHECK: ldp_int
+; CHECK-LABEL: ldp_int
; CHECK: ldp
define i32 @ldp_int(i32* %p) nounwind {
%tmp = load i32, i32* %p, align 4
@@ -12,7 +10,7 @@ define i32 @ldp_int(i32* %p) nounwind {
ret i32 %add
}
-; CHECK: ldp_sext_int
+; CHECK-LABEL: ldp_sext_int
; CHECK: ldpsw
define i64 @ldp_sext_int(i32* %p) nounwind {
%tmp = load i32, i32* %p, align 4
@@ -51,7 +49,7 @@ define i64 @ldp_half_sext_res1_int(i32* %p) nounwind {
}
-; CHECK: ldp_long
+; CHECK-LABEL: ldp_long
; CHECK: ldp
define i64 @ldp_long(i64* %p) nounwind {
%tmp = load i64, i64* %p, align 8
@@ -61,7 +59,7 @@ define i64 @ldp_long(i64* %p) nounwind {
ret i64 %add
}
-; CHECK: ldp_float
+; CHECK-LABEL: ldp_float
; CHECK: ldp
define float @ldp_float(float* %p) nounwind {
%tmp = load float, float* %p, align 4
@@ -71,7 +69,7 @@ define float @ldp_float(float* %p) nounwind {
ret float %add
}
-; CHECK: ldp_double
+; CHECK-LABEL: ldp_double
; CHECK: ldp
define double @ldp_double(double* %p) nounwind {
%tmp = load double, double* %p, align 8
@@ -83,10 +81,10 @@ define double @ldp_double(double* %p) nounwind {
; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
define i32 @ldur_int(i32* %a) nounwind {
-; LDUR_CHK: ldur_int
-; LDUR_CHK: ldp [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
-; LDUR_CHK-NEXT: add w{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_int
+; CHECK: ldp [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
+; CHECK-NEXT: add w{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i32, i32* %a, i32 -1
%tmp1 = load i32, i32* %p1, align 2
%p2 = getelementptr inbounds i32, i32* %a, i32 -2
@@ -96,10 +94,10 @@ define i32 @ldur_int(i32* %a) nounwind {
}
define i64 @ldur_sext_int(i32* %a) nounwind {
-; LDUR_CHK: ldur_sext_int
-; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
-; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_sext_int
+; CHECK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
+; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i32, i32* %a, i32 -1
%tmp1 = load i32, i32* %p1, align 2
%p2 = getelementptr inbounds i32, i32* %a, i32 -2
@@ -111,11 +109,11 @@ define i64 @ldur_sext_int(i32* %a) nounwind {
}
define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
-; LDUR_CHK: ldur_half_sext_int_res0
-; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
-; LDUR_CHK: sxtw x[[DST1]], w[[DST1]]
-; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_half_sext_int_res0
+; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
+; CHECK: sxtw x[[DST1]], w[[DST1]]
+; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i32, i32* %a, i32 -1
%tmp1 = load i32, i32* %p1, align 2
%p2 = getelementptr inbounds i32, i32* %a, i32 -2
@@ -127,11 +125,11 @@ define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
}
define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
-; LDUR_CHK: ldur_half_sext_int_res1
-; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
-; LDUR_CHK: sxtw x[[DST2]], w[[DST2]]
-; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_half_sext_int_res1
+; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
+; CHECK: sxtw x[[DST2]], w[[DST2]]
+; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i32, i32* %a, i32 -1
%tmp1 = load i32, i32* %p1, align 2
%p2 = getelementptr inbounds i32, i32* %a, i32 -2
@@ -144,10 +142,10 @@ define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
define i64 @ldur_long(i64* %a) nounwind ssp {
-; LDUR_CHK: ldur_long
-; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
-; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_long
+; CHECK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
+; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i64, i64* %a, i64 -1
%tmp1 = load i64, i64* %p1, align 2
%p2 = getelementptr inbounds i64, i64* %a, i64 -2
@@ -157,10 +155,10 @@ define i64 @ldur_long(i64* %a) nounwind ssp {
}
define float @ldur_float(float* %a) {
-; LDUR_CHK: ldur_float
-; LDUR_CHK: ldp [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
-; LDUR_CHK-NEXT: add s{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_float
+; CHECK: ldp [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
+; CHECK-NEXT: add s{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds float, float* %a, i64 -1
%tmp1 = load float, float* %p1, align 2
%p2 = getelementptr inbounds float, float* %a, i64 -2
@@ -170,10 +168,10 @@ define float @ldur_float(float* %a) {
}
define double @ldur_double(double* %a) {
-; LDUR_CHK: ldur_double
-; LDUR_CHK: ldp [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
-; LDUR_CHK-NEXT: add d{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_double
+; CHECK: ldp [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
+; CHECK-NEXT: add d{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds double, double* %a, i64 -1
%tmp1 = load double, double* %p1, align 2
%p2 = getelementptr inbounds double, double* %a, i64 -2
@@ -184,11 +182,11 @@ define double @ldur_double(double* %a) {
; Now check some boundary conditions
define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyIn
-; LDUR_CHK-NOT: ldur
-; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
-; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyIn
+; CHECK-NOT: ldur
+; CHECK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
+; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i64, i64* %a, i64 -31
%tmp1 = load i64, i64* %p1, align 2
%p2 = getelementptr inbounds i64, i64* %a, i64 -32
@@ -198,11 +196,11 @@ define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
}
define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyInSext
-; LDUR_CHK-NOT: ldur
-; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
-; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyInSext
+; CHECK-NOT: ldur
+; CHECK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
+; CHECK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i32, i32* %a, i64 -63
%tmp1 = load i32, i32* %p1, align 2
%p2 = getelementptr inbounds i32, i32* %a, i64 -64
@@ -214,12 +212,12 @@ define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
}
define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyInHalfSextRes0
-; LDUR_CHK-NOT: ldur
-; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
-; LDUR_CHK: sxtw x[[DST1]], w[[DST1]]
-; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyInHalfSextRes0
+; CHECK-NOT: ldur
+; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
+; CHECK: sxtw x[[DST1]], w[[DST1]]
+; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i32, i32* %a, i64 -63
%tmp1 = load i32, i32* %p1, align 2
%p2 = getelementptr inbounds i32, i32* %a, i64 -64
@@ -231,12 +229,12 @@ define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
}
define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyInHalfSextRes1
-; LDUR_CHK-NOT: ldur
-; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
-; LDUR_CHK: sxtw x[[DST2]], w[[DST2]]
-; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyInHalfSextRes1
+; CHECK-NOT: ldur
+; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
+; CHECK: sxtw x[[DST2]], w[[DST2]]
+; CHECK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i32, i32* %a, i64 -63
%tmp1 = load i32, i32* %p1, align 2
%p2 = getelementptr inbounds i32, i32* %a, i64 -64
@@ -248,12 +246,12 @@ define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
}
define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyOut
-; LDUR_CHK-NOT: ldp
+; CHECK-LABEL: pairUpBarelyOut
+; CHECK-NOT: ldp
; Don't be fragile about which loads or manipulations of the base register
; are used---just check that there isn't an ldp before the add
-; LDUR_CHK: add
-; LDUR_CHK-NEXT: ret
+; CHECK: add
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i64, i64* %a, i64 -32
%tmp1 = load i64, i64* %p1, align 2
%p2 = getelementptr inbounds i64, i64* %a, i64 -33
@@ -263,12 +261,12 @@ define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
}
define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyOutSext
-; LDUR_CHK-NOT: ldp
+; CHECK-LABEL: pairUpBarelyOutSext
+; CHECK-NOT: ldp
; Don't be fragile about which loads or manipulations of the base register
; are used---just check that there isn't an ldp before the add
-; LDUR_CHK: add
-; LDUR_CHK-NEXT: ret
+; CHECK: add
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i32, i32* %a, i64 -64
%tmp1 = load i32, i32* %p1, align 2
%p2 = getelementptr inbounds i32, i32* %a, i64 -65
@@ -280,12 +278,12 @@ define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
}
define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
-; LDUR_CHK: pairUpNotAligned
-; LDUR_CHK-NOT: ldp
-; LDUR_CHK: ldur
-; LDUR_CHK-NEXT: ldur
-; LDUR_CHK-NEXT: add
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: pairUpNotAligned
+; CHECK-NOT: ldp
+; CHECK: ldur
+; CHECK-NEXT: ldur
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i64, i64* %a, i64 -18
%bp1 = bitcast i64* %p1 to i8*
%bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
@@ -303,12 +301,12 @@ define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
}
define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
-; LDUR_CHK: pairUpNotAlignedSext
-; LDUR_CHK-NOT: ldp
-; LDUR_CHK: ldursw
-; LDUR_CHK-NEXT: ldursw
-; LDUR_CHK-NEXT: add
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: pairUpNotAlignedSext
+; CHECK-NOT: ldp
+; CHECK: ldursw
+; CHECK-NEXT: ldursw
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i32, i32* %a, i64 -18
%bp1 = bitcast i32* %p1 to i8*
%bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
@@ -326,3 +324,35 @@ define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
%tmp3 = add i64 %sexttmp1, %sexttmp2
ret i64 %tmp3
}
+
+declare void @use-ptr(i32*)
+
+; CHECK-LABEL: ldp_sext_int_pre
+; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}, #8]
+define i64 @ldp_sext_int_pre(i32* %p) nounwind {
+ %ptr = getelementptr inbounds i32, i32* %p, i64 2
+ call void @use-ptr(i32* %ptr)
+ %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 0
+ %tmp = load i32, i32* %add.ptr, align 4
+ %add.ptr1 = getelementptr inbounds i32, i32* %ptr, i64 1
+ %tmp1 = load i32, i32* %add.ptr1, align 4
+ %sexttmp = sext i32 %tmp to i64
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %add = add nsw i64 %sexttmp1, %sexttmp
+ ret i64 %add
+}
+
+; CHECK-LABEL: ldp_sext_int_post
+; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x0], #8
+define i64 @ldp_sext_int_post(i32* %p) nounwind {
+ %tmp = load i32, i32* %p, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %tmp1 = load i32, i32* %add.ptr, align 4
+ %sexttmp = sext i32 %tmp to i64
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %ptr = getelementptr inbounds i32, i32* %add.ptr, i64 1
+ call void @use-ptr(i32* %ptr)
+ %add = add nsw i64 %sexttmp1, %sexttmp
+ ret i64 %add
+}
+
diff --git a/test/CodeGen/AArch64/arm64-long-shift.ll b/test/CodeGen/AArch64/arm64-long-shift.ll
index d5baf16bdd5c..ad89d3ff711b 100644
--- a/test/CodeGen/AArch64/arm64-long-shift.ll
+++ b/test/CodeGen/AArch64/arm64-long-shift.ll
@@ -2,18 +2,20 @@
define i128 @shl(i128 %r, i128 %s) nounwind readnone {
; CHECK-LABEL: shl:
-; CHECK: lsl [[XREG_0:x[0-9]+]], x1, x2
-; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40
-; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2
-; CHECK-NEXT: lsr [[XREG_3:x[0-9]+]], x0, [[XREG_2]]
-; CHECK-NEXT: orr [[XREG_6:x[0-9]+]], [[XREG_3]], [[XREG_0]]
-; CHECK-NEXT: sub [[XREG_4:x[0-9]+]], x2, #64
-; CHECK-NEXT: lsl [[XREG_5:x[0-9]+]], x0, [[XREG_4]]
-; CHECK-NEXT: cmp [[XREG_4]], #0
-; CHECK-NEXT: csel x1, [[XREG_5]], [[XREG_6]], ge
-; CHECK-NEXT: lsl [[SMALLSHIFT_LO:x[0-9]+]], x0, x2
-; CHECK-NEXT: csel x0, xzr, [[SMALLSHIFT_LO]], ge
-; CHECK-NEXT: ret
+; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
+; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
+; CHECK: lsr [[LO_FOR_HI_NORMAL:x[0-9]+]], x0, [[REV_SHIFT]]
+; CHECK: cmp x2, #0
+; CHECK: csel [[LO_FOR_HI:x[0-9]+]], xzr, [[LO_FOR_HI_NORMAL]], eq
+; CHECK: lsl [[HI_FOR_HI:x[0-9]+]], x1, x2
+; CHECK: orr [[HI_NORMAL:x[0-9]+]], [[LO_FOR_HI]], [[HI_FOR_HI]]
+; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
+; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, [[EXTRA_SHIFT]]
+; CHECK: cmp [[EXTRA_SHIFT]], #0
+; CHECK: csel x1, [[HI_BIG_SHIFT]], [[HI_NORMAL]], ge
+; CHECK: lsl [[SMALLSHIFT_LO:x[0-9]+]], x0, x2
+; CHECK: csel x0, xzr, [[SMALLSHIFT_LO]], ge
+; CHECK: ret
%shl = shl i128 %r, %s
ret i128 %shl
@@ -21,19 +23,21 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone {
define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
; CHECK-LABEL: ashr:
-; CHECK: lsr [[XREG_0:x[0-9]+]], x0, x2
-; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40
-; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2
-; CHECK-NEXT: lsl [[XREG_3:x[0-9]+]], x1, [[XREG_2]]
-; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]]
-; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64
-; CHECK-NEXT: asr [[XREG_6:x[0-9]+]], x1, [[XREG_5]]
-; CHECK-NEXT: cmp [[XREG_5]], #0
-; CHECK-NEXT: csel x0, [[XREG_6]], [[XREG_4]], ge
-; CHECK-NEXT: asr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
-; CHECK-NEXT: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
-; CHECK-NEXT: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge
-; CHECK-NEXT: ret
+; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
+; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
+; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
+; CHECK: cmp x2, #0
+; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
+; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2
+; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
+; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
+; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]]
+; CHECK: cmp [[EXTRA_SHIFT]], #0
+; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
+; CHECK: asr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
+; CHECK: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
+; CHECK: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge
+; CHECK: ret
%shr = ashr i128 %r, %s
ret i128 %shr
@@ -41,18 +45,20 @@ define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
define i128 @lshr(i128 %r, i128 %s) nounwind readnone {
; CHECK-LABEL: lshr:
-; CHECK: lsr [[XREG_0:x[0-9]+]], x0, x2
-; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40
-; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2
-; CHECK-NEXT: lsl [[XREG_3:x[0-9]+]], x1, [[XREG_2]]
-; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]]
-; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64
-; CHECK-NEXT: lsr [[XREG_6:x[0-9]+]], x1, [[XREG_5]]
-; CHECK-NEXT: cmp [[XREG_5]], #0
-; CHECK-NEXT: csel x0, [[XREG_6]], [[XREG_4]], ge
-; CHECK-NEXT: lsr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
-; CHECK-NEXT: csel x1, xzr, [[SMALLSHIFT_HI]], ge
-; CHECK-NEXT: ret
+; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
+; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
+; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
+; CHECK: cmp x2, #0
+; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
+; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2
+; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
+; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
+; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]]
+; CHECK: cmp [[EXTRA_SHIFT]], #0
+; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
+; CHECK: lsr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
+; CHECK: csel x1, xzr, [[SMALLSHIFT_HI]], ge
+; CHECK: ret
%shr = lshr i128 %r, %s
ret i128 %shr
diff --git a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
index 5bc4d71501ba..85572f2cf0f8 100644
--- a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
+++ b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-apple-ios -aarch64-strict-align < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-ios -mattr=+strict-align < %s | FileCheck %s
; Small (16-bytes here) unaligned memcpys should stay memcpy calls if
; strict-alignment is turned on.
diff --git a/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll b/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
new file mode 100644
index 000000000000..5276ac334a71
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
@@ -0,0 +1,406 @@
+; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=LE
+; RUN: llc < %s -mtriple aarch64_be--none-eabi -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=BE
+
+; CHECK-LABEL: Ldrh_merge
+; CHECK-NOT: ldrh
+; CHECK: ldr [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; CHECK-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i16 @Ldrh_merge(i16* nocapture readonly %p) {
+ %1 = load i16, i16* %p, align 2
+ %arrayidx2 = getelementptr inbounds i16, i16* %p, i64 1
+ %2 = load i16, i16* %arrayidx2, align 2
+ %add = sub nuw nsw i16 %1, %2
+ ret i16 %add
+}
+
+; CHECK-LABEL: Ldurh_merge
+; CHECK-NOT: ldurh
+; CHECK: ldur [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; CHECK-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i16 @Ldurh_merge(i16* nocapture readonly %p) {
+entry:
+ %arrayidx = getelementptr inbounds i16, i16* %p, i64 -2
+ %0 = load i16, i16* %arrayidx
+ %arrayidx3 = getelementptr inbounds i16, i16* %p, i64 -1
+ %1 = load i16, i16* %arrayidx3
+ %add = sub nuw nsw i16 %0, %1
+ ret i16 %add
+}
+
+; CHECK-LABEL: Ldrh_4_merge
+; CHECK-NOT: ldrh
+; CHECK: ldp [[WORD1:w[0-9]+]], [[WORD2:w[0-9]+]], [x0]
+; CHECK-DAG: and [[WORD1LO:w[0-9]+]], [[WORD1]], #0xffff
+; CHECK-DAG: lsr [[WORD1HI:w[0-9]+]], [[WORD1]], #16
+; CHECK-DAG: and [[WORD2LO:w[0-9]+]], [[WORD2]], #0xffff
+; CHECK-DAG: lsr [[WORD2HI:w[0-9]+]], [[WORD2]], #16
+; LE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1HI]], [[WORD1LO]]
+; BE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1LO]], [[WORD1HI]]
+; LE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2LO]]
+; BE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2HI]]
+; LE: sub w0, [[TEMP2]], [[WORD2HI]]
+; BE: sub w0, [[TEMP2]], [[WORD2LO]]
+define i16 @Ldrh_4_merge(i16* nocapture readonly %P) {
+ %arrayidx = getelementptr inbounds i16, i16* %P, i64 0
+ %l0 = load i16, i16* %arrayidx
+ %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 1
+ %l1 = load i16, i16* %arrayidx2
+ %arrayidx7 = getelementptr inbounds i16, i16* %P, i64 2
+ %l2 = load i16, i16* %arrayidx7
+ %arrayidx12 = getelementptr inbounds i16, i16* %P, i64 3
+ %l3 = load i16, i16* %arrayidx12
+ %add4 = sub nuw nsw i16 %l1, %l0
+ %add9 = udiv i16 %add4, %l2
+ %add14 = sub nuw nsw i16 %add9, %l3
+ ret i16 %add14
+}
+
+; CHECK-LABEL: Ldrsh_merge
+; CHECK: ldr [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+
+define i32 @Ldrsh_merge(i16* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
+ %tmp = load i16, i16* %add.ptr0
+ %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
+ %tmp1 = load i16, i16* %add.ptr
+ %sexttmp = sext i16 %tmp to i32
+ %sexttmp1 = sext i16 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp1, %sexttmp
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsh_zsext_merge
+; CHECK: ldr [[NEW_DEST:w[0-9]+]]
+; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; LE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; BE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; BE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsh_zsext_merge(i16* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
+ %tmp = load i16, i16* %add.ptr0
+ %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
+ %tmp1 = load i16, i16* %add.ptr
+ %sexttmp = zext i16 %tmp to i32
+ %sexttmp1 = sext i16 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsh_szext_merge
+; CHECK: ldr [[NEW_DEST:w[0-9]+]]
+; LE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; LE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; BE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsh_szext_merge(i16* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
+ %tmp = load i16, i16* %add.ptr0
+ %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
+ %tmp1 = load i16, i16* %add.ptr
+ %sexttmp = sext i16 %tmp to i32
+ %sexttmp1 = zext i16 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldrb_merge
+; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; CHECK-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrb_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = zext i8 %tmp to i32
+ %sexttmp1 = zext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsb_merge
+; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; CHECK-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsb_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = sext i8 %tmp to i32
+ %sexttmp1 = sext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsb_zsext_merge
+; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
+; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; LE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; BE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; BE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsb_zsext_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = zext i8 %tmp to i32
+ %sexttmp1 = sext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsb_szext_merge
+; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
+; LE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; LE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; BE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsb_szext_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = sext i8 %tmp to i32
+ %sexttmp1 = zext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldursh_merge
+; CHECK: ldur [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursh_merge(i16* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
+ %tmp = load i16, i16* %add.ptr0
+ %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
+ %tmp1 = load i16, i16* %add.ptr
+ %sexttmp = sext i16 %tmp to i32
+ %sexttmp1 = sext i16 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldursh_zsext_merge
+; CHECK: ldur [[NEW_DEST:w[0-9]+]]
+; LE-DAG: lsr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; BE-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursh_zsext_merge(i16* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
+ %tmp = load i16, i16* %add.ptr0
+ %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
+ %tmp1 = load i16, i16* %add.ptr
+ %sexttmp = zext i16 %tmp to i32
+ %sexttmp1 = sext i16 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldursh_szext_merge
+; CHECK: ldur [[NEW_DEST:w[0-9]+]]
+; LE-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; BE-DAG: lsr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; BE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursh_szext_merge(i16* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
+ %tmp = load i16, i16* %add.ptr0
+ %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
+ %tmp1 = load i16, i16* %add.ptr
+ %sexttmp = sext i16 %tmp to i32
+ %sexttmp1 = zext i16 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldurb_merge
+; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; CHECK-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldurb_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = zext i8 %tmp to i32
+ %sexttmp1 = zext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldursb_merge
+; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; CHECK-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursb_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = sext i8 %tmp to i32
+ %sexttmp1 = sext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldursb_zsext_merge
+; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
+; LE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; BE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursb_zsext_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = zext i8 %tmp to i32
+ %sexttmp1 = sext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Ldursb_szext_merge
+; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
+; LE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; BE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; BE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursb_szext_merge(i8* %p) nounwind {
+ %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
+ %tmp = load i8, i8* %add.ptr0
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
+ %tmp1 = load i8, i8* %add.ptr
+ %sexttmp = sext i8 %tmp to i32
+ %sexttmp1 = zext i8 %tmp1 to i32
+ %add = sub nsw i32 %sexttmp, %sexttmp1
+ ret i32 %add
+}
+
+; CHECK-LABEL: Strh_zero
+; CHECK: str wzr
+define void @Strh_zero(i16* nocapture %P, i32 %n) {
+entry:
+ %idxprom = sext i32 %n to i64
+ %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
+ store i16 0, i16* %arrayidx
+ %add = add nsw i32 %n, 1
+ %idxprom1 = sext i32 %add to i64
+ %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1
+ store i16 0, i16* %arrayidx2
+ ret void
+}
+
+; CHECK-LABEL: Strh_zero_4
+; CHECK: stp wzr, wzr
+define void @Strh_zero_4(i16* nocapture %P, i32 %n) {
+entry:
+ %idxprom = sext i32 %n to i64
+ %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
+ store i16 0, i16* %arrayidx
+ %add = add nsw i32 %n, 1
+ %idxprom1 = sext i32 %add to i64
+ %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1
+ store i16 0, i16* %arrayidx2
+ %add3 = add nsw i32 %n, 2
+ %idxprom4 = sext i32 %add3 to i64
+ %arrayidx5 = getelementptr inbounds i16, i16* %P, i64 %idxprom4
+ store i16 0, i16* %arrayidx5
+ %add6 = add nsw i32 %n, 3
+ %idxprom7 = sext i32 %add6 to i64
+ %arrayidx8 = getelementptr inbounds i16, i16* %P, i64 %idxprom7
+ store i16 0, i16* %arrayidx8
+ ret void
+}
+
+; CHECK-LABEL: Sturb_zero
+; CHECK: sturh wzr
+define void @Sturb_zero(i8* nocapture %P, i32 %n) #0 {
+entry:
+ %sub = add nsw i32 %n, -2
+ %idxprom = sext i32 %sub to i64
+ %arrayidx = getelementptr inbounds i8, i8* %P, i64 %idxprom
+ store i8 0, i8* %arrayidx
+ %sub2= add nsw i32 %n, -1
+ %idxprom1 = sext i32 %sub2 to i64
+ %arrayidx2 = getelementptr inbounds i8, i8* %P, i64 %idxprom1
+ store i8 0, i8* %arrayidx2
+ ret void
+}
+
+; CHECK-LABEL: Sturh_zero
+; CHECK: stur wzr
+define void @Sturh_zero(i16* nocapture %P, i32 %n) {
+entry:
+ %sub = add nsw i32 %n, -2
+ %idxprom = sext i32 %sub to i64
+ %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
+ store i16 0, i16* %arrayidx
+ %sub1 = add nsw i32 %n, -3
+ %idxprom2 = sext i32 %sub1 to i64
+ %arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2
+ store i16 0, i16* %arrayidx3
+ ret void
+}
+
+; CHECK-LABEL: Sturh_zero_4
+; CHECK: stp wzr, wzr
+define void @Sturh_zero_4(i16* nocapture %P, i32 %n) {
+entry:
+ %sub = add nsw i32 %n, -3
+ %idxprom = sext i32 %sub to i64
+ %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
+ store i16 0, i16* %arrayidx
+ %sub1 = add nsw i32 %n, -4
+ %idxprom2 = sext i32 %sub1 to i64
+ %arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2
+ store i16 0, i16* %arrayidx3
+ %sub4 = add nsw i32 %n, -2
+ %idxprom5 = sext i32 %sub4 to i64
+ %arrayidx6 = getelementptr inbounds i16, i16* %P, i64 %idxprom5
+ store i16 0, i16* %arrayidx6
+ %sub7 = add nsw i32 %n, -1
+ %idxprom8 = sext i32 %sub7 to i64
+ %arrayidx9 = getelementptr inbounds i16, i16* %P, i64 %idxprom8
+ store i16 0, i16* %arrayidx9
+ ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-neon-2velem.ll b/test/CodeGen/AArch64/arm64-neon-2velem.ll
index 869966caa3ae..985b5bf483ac 100644
--- a/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ b/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -535,6 +535,17 @@ entry:
declare double @llvm.fma.f64(double, double, double)
+define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfmss_lane_f32
+; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+ %extract.rhs = extractelement <2 x float> %v, i32 1
+ %extract = fsub float -0.000000e+00, %extract.rhs
+ %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
+ ret float %0
+}
+
define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) {
; CHECK-LABEL: test_vfmss_laneq_f32
; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
@@ -557,6 +568,50 @@ entry:
ret double %0
}
+define double @test_vfmsd_lane_f64_0(double %a, double %b, <1 x double> %v) {
+; CHCK-LABEL: test_vfmsd_lane_f64_0
+; CHCK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHCK-NEXT: ret
+entry:
+ %tmp0 = fsub <1 x double> <double -0.000000e+00>, %v
+ %tmp1 = extractelement <1 x double> %tmp0, i32 0
+ %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+ ret double %0
+}
+
+define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfmss_lane_f32_0
+; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+ %tmp0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+ %tmp1 = extractelement <2 x float> %tmp0, i32 1
+ %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+ ret float %0
+}
+
+define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) {
+; CHECK-LABEL: test_vfmss_laneq_f32_0
+; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+ %tmp0 = fsub <4 x float><float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+ %tmp1 = extractelement <4 x float> %tmp0, i32 3
+ %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+ ret float %0
+}
+
+define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) {
+; CHECK-LABEL: test_vfmsd_laneq_f64_0
+; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: ret
+entry:
+ %tmp0 = fsub <2 x double><double -0.000000e+00, double -0.000000e+00>, %v
+ %tmp1 = extractelement <2 x double> %tmp0, i32 1
+ %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+ ret double %0
+}
+
define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
; CHECK-LABEL: test_vmlal_lane_s16:
; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll
index b74a40626cee..83b1cac70f5c 100644
--- a/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -320,21 +320,20 @@ define i32 @smovw8h(<8 x i16> %tmp1) {
ret i32 %tmp5
}
-define i32 @smovx16b(<16 x i8> %tmp1) {
+define i64 @smovx16b(<16 x i8> %tmp1) {
; CHECK-LABEL: smovx16b:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8]
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
%tmp3 = extractelement <16 x i8> %tmp1, i32 8
- %tmp4 = sext i8 %tmp3 to i32
- %tmp5 = add i32 %tmp4, %tmp4
- ret i32 %tmp5
+ %tmp4 = sext i8 %tmp3 to i64
+ ret i64 %tmp4
}
-define i32 @smovx8h(<8 x i16> %tmp1) {
+define i64 @smovx8h(<8 x i16> %tmp1) {
; CHECK-LABEL: smovx8h:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
%tmp3 = extractelement <8 x i16> %tmp1, i32 2
- %tmp4 = sext i16 %tmp3 to i32
- ret i32 %tmp4
+ %tmp4 = sext i16 %tmp3 to i64
+ ret i64 %tmp4
}
define i64 @smovx4s(<4 x i32> %tmp1) {
diff --git a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
index b8236c5b2479..c2006ccdd064 100644
--- a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
+++ b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
@@ -7,7 +7,7 @@ define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
; CHECK-LABEL: jscall_patchpoint_codegen:
; CHECK: Ltmp
-; CHECK: str x{{.+}}, [sp]
+; CHECK: str x{{.+}}, [sp, #-16]!
; CHECK-NEXT: mov x0, x{{.+}}
; CHECK: Ltmp
; CHECK-NEXT: movz x16, #0xffff, lsl #32
@@ -16,7 +16,7 @@ entry:
; CHECK-NEXT: blr x16
; FAST-LABEL: jscall_patchpoint_codegen:
; FAST: Ltmp
-; FAST: str x{{.+}}, [sp]
+; FAST: str x{{.+}}, [sp, #-16]!
; FAST: Ltmp
; FAST-NEXT: movz x16, #0xffff, lsl #32
; FAST-NEXT: movk x16, #0xdead, lsl #16
@@ -50,7 +50,7 @@ entry:
; FAST: orr [[REG1:x[0-9]+]], xzr, #0x2
; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4
; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6
-; FAST-NEXT: str [[REG1]], [sp]
+; FAST-NEXT: str [[REG1]], [sp, #-32]!
; FAST-NEXT: str [[REG2]], [sp, #16]
; FAST-NEXT: str [[REG3]], [sp, #24]
; FAST: Ltmp
@@ -90,7 +90,7 @@ entry:
; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6
; FAST-NEXT: orr [[REG4:w[0-9]+]], wzr, #0x8
; FAST-NEXT: movz [[REG5:x[0-9]+]], #0xa
-; FAST-NEXT: str [[REG1]], [sp]
+; FAST-NEXT: str [[REG1]], [sp, #-64]!
; FAST-NEXT: str [[REG2]], [sp, #16]
; FAST-NEXT: str [[REG3]], [sp, #24]
; FAST-NEXT: str [[REG4]], [sp, #36]
diff --git a/test/CodeGen/AArch64/arm64-platform-reg.ll b/test/CodeGen/AArch64/arm64-platform-reg.ll
index 60672aa38486..f3af01a73559 100644
--- a/test/CodeGen/AArch64/arm64-platform-reg.ll
+++ b/test/CodeGen/AArch64/arm64-platform-reg.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
-; RUN: llc -mtriple=arm64-freebsd-gnu -aarch64-reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
+; RUN: llc -mtriple=arm64-apple-ios -mattr=+reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
+; RUN: llc -mtriple=arm64-freebsd-gnu -mattr=+reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
; x18 is reserved as a platform register on Darwin but not on other
diff --git a/test/CodeGen/AArch64/arm64-popcnt.ll b/test/CodeGen/AArch64/arm64-popcnt.ll
index b0b529a13f41..9ee53a0f92e6 100644
--- a/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -4,8 +4,8 @@
define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %cnt
-; CHECK: ubfx x{{[0-9]+}}
-; CHECK: fmov d0, x{{[0-9]+}}
+; CHECK: mov w[[IN64:[0-9]+]], w0
+; CHECK: fmov d0, x[[IN64]]
; CHECK: cnt.8b v0, v0
; CHECK: uaddlv.8b h0, v0
; CHECK: fmov w0, s0
@@ -59,7 +59,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %cnt
; CHECK-LABEL: cnt32:
-; CHECK-NOT 16b
+; CHECK-NOT: 16b
; CHECK: ret
}
@@ -67,7 +67,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
ret i64 %cnt
; CHECK-LABEL: cnt64:
-; CHECK-NOT 16b
+; CHECK-NOT: 16b
; CHECK: ret
}
diff --git a/test/CodeGen/AArch64/arm64-rounding.ll b/test/CodeGen/AArch64/arm64-rounding.ll
index 931114447adf..d487aabccc4f 100644
--- a/test/CodeGen/AArch64/arm64-rounding.ll
+++ b/test/CodeGen/AArch64/arm64-rounding.ll
@@ -1,10 +1,8 @@
-; RUN: llc -O3 < %s -mcpu=cyclone | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
-target triple = "arm64-apple-ios6.0.0"
+; RUN: llc -O3 < %s -mtriple=arm64 | FileCheck %s
-; CHECK: test1
-; CHECK: frintx
+; CHECK-LABEL: test1:
; CHECK: frintm
+; CHECK-NOT: frintx
define float @test1(float %a) #0 {
entry:
%call = tail call float @floorf(float %a) nounwind readnone
@@ -13,9 +11,9 @@ entry:
declare float @floorf(float) nounwind readnone
-; CHECK: test2
-; CHECK: frintx
+; CHECK-LABEL: test2:
; CHECK: frintm
+; CHECK-NOT: frintx
define double @test2(double %a) #0 {
entry:
%call = tail call double @floor(double %a) nounwind readnone
@@ -24,7 +22,7 @@ entry:
declare double @floor(double) nounwind readnone
-; CHECK: test3
+; CHECK-LABEL: test3:
; CHECK: frinti
define float @test3(float %a) #0 {
entry:
@@ -34,7 +32,7 @@ entry:
declare float @nearbyintf(float) nounwind readnone
-; CHECK: test4
+; CHECK-LABEL: test4:
; CHECK: frinti
define double @test4(double %a) #0 {
entry:
@@ -44,9 +42,9 @@ entry:
declare double @nearbyint(double) nounwind readnone
-; CHECK: test5
-; CHECK: frintx
+; CHECK-LABEL: test5:
; CHECK: frintp
+; CHECK-NOT: frintx
define float @test5(float %a) #0 {
entry:
%call = tail call float @ceilf(float %a) nounwind readnone
@@ -55,9 +53,9 @@ entry:
declare float @ceilf(float) nounwind readnone
-; CHECK: test6
-; CHECK: frintx
+; CHECK-LABEL: test6:
; CHECK: frintp
+; CHECK-NOT: frintx
define double @test6(double %a) #0 {
entry:
%call = tail call double @ceil(double %a) nounwind readnone
@@ -66,7 +64,7 @@ entry:
declare double @ceil(double) nounwind readnone
-; CHECK: test7
+; CHECK-LABEL: test7:
; CHECK: frintx
define float @test7(float %a) #0 {
entry:
@@ -76,7 +74,7 @@ entry:
declare float @rintf(float) nounwind readnone
-; CHECK: test8
+; CHECK-LABEL: test8:
; CHECK: frintx
define double @test8(double %a) #0 {
entry:
@@ -86,9 +84,9 @@ entry:
declare double @rint(double) nounwind readnone
-; CHECK: test9
-; CHECK: frintx
+; CHECK-LABEL: test9:
; CHECK: frintz
+; CHECK-NOT: frintx
define float @test9(float %a) #0 {
entry:
%call = tail call float @truncf(float %a) nounwind readnone
@@ -97,9 +95,9 @@ entry:
declare float @truncf(float) nounwind readnone
-; CHECK: test10
-; CHECK: frintx
+; CHECK-LABEL: test10:
; CHECK: frintz
+; CHECK-NOT: frintx
define double @test10(double %a) #0 {
entry:
%call = tail call double @trunc(double %a) nounwind readnone
@@ -108,9 +106,9 @@ entry:
declare double @trunc(double) nounwind readnone
-; CHECK: test11
-; CHECK: frintx
+; CHECK-LABEL: test11:
; CHECK: frinta
+; CHECK-NOT: frintx
define float @test11(float %a) #0 {
entry:
%call = tail call float @roundf(float %a) nounwind readnone
@@ -119,9 +117,9 @@ entry:
declare float @roundf(float %a) nounwind readnone
-; CHECK: test12
-; CHECK: frintx
+; CHECK-LABEL: test12:
; CHECK: frinta
+; CHECK-NOT: frintx
define double @test12(double %a) #0 {
entry:
%call = tail call double @round(double %a) nounwind readnone
@@ -130,7 +128,7 @@ entry:
declare double @round(double %a) nounwind readnone
-; CHECK: test13
+; CHECK-LABEL: test13:
; CHECK-NOT: frintx
; CHECK: frintm
define float @test13(float %a) #1 {
@@ -139,7 +137,7 @@ entry:
ret float %call
}
-; CHECK: test14
+; CHECK-LABEL: test14:
; CHECK-NOT: frintx
; CHECK: frintm
define double @test14(double %a) #1 {
@@ -148,7 +146,7 @@ entry:
ret double %call
}
-; CHECK: test15
+; CHECK-LABEL: test15:
; CHECK-NOT: frintx
; CHECK: frintp
define float @test15(float %a) #1 {
@@ -157,7 +155,7 @@ entry:
ret float %call
}
-; CHECK: test16
+; CHECK-LABEL: test16:
; CHECK-NOT: frintx
; CHECK: frintp
define double @test16(double %a) #1 {
@@ -166,7 +164,7 @@ entry:
ret double %call
}
-; CHECK: test17
+; CHECK-LABEL: test17:
; CHECK-NOT: frintx
; CHECK: frintz
define float @test17(float %a) #1 {
@@ -175,7 +173,7 @@ entry:
ret float %call
}
-; CHECK: test18
+; CHECK-LABEL: test18:
; CHECK-NOT: frintx
; CHECK: frintz
define double @test18(double %a) #1 {
@@ -184,7 +182,7 @@ entry:
ret double %call
}
-; CHECK: test19
+; CHECK-LABEL: test19:
; CHECK-NOT: frintx
; CHECK: frinta
define float @test19(float %a) #1 {
@@ -193,7 +191,7 @@ entry:
ret float %call
}
-; CHECK: test20
+; CHECK-LABEL: test20:
; CHECK-NOT: frintx
; CHECK: frinta
define double @test20(double %a) #1 {
@@ -202,7 +200,5 @@ entry:
ret double %call
}
-
-
attributes #0 = { nounwind }
attributes #1 = { nounwind "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
index 599712be401c..2ecd66ddf5d4 100644
--- a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -1,5 +1,5 @@
-; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
-; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+; RUN: llc %s -o - -enable-shrink-wrap=true -disable-post-ra | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios"
@@ -539,3 +539,94 @@ if.end:
declare void @abort() #0
attributes #0 = { noreturn nounwind }
+
+; Make sure that we handle infinite loops properly When checking that the Save
+; and Restore blocks are control flow equivalent, the loop searches for the
+; immediate (post) dominator for the (restore) save blocks. When either the Save
+; or Restore block is located in an infinite loop the only immediate (post)
+; dominator is itself. In this case, we cannot perform shrink wrapping, but we
+; should return gracefully and continue compilation.
+; The only condition for this test is the compilation finishes correctly.
+;
+; CHECK-LABEL: infiniteloop
+; CHECK: ret
+define void @infiniteloop() {
+entry:
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+ %ptr = alloca i32, i32 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+ %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+ %add = add nsw i32 %call, %sum.03
+ store i32 %add, i32* %ptr
+ br label %for.body
+
+if.end:
+ ret void
+}
+
+; Another infinite loop test this time with a body bigger than just one block.
+; CHECK-LABEL: infiniteloop2
+; CHECK: ret
+define void @infiniteloop2() {
+entry:
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+ %ptr = alloca i32, i32 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
+ %call = tail call i32 asm "mov $0, #0", "=r,~{x19}"()
+ %add = add nsw i32 %call, %sum.03
+ store i32 %add, i32* %ptr
+ br i1 undef, label %body1, label %body2
+
+body1:
+ tail call void asm sideeffect "nop", "~{x19}"()
+ br label %for.body
+
+body2:
+ tail call void asm sideeffect "nop", "~{x19}"()
+ br label %for.body
+
+if.end:
+ ret void
+}
+
+; Another infinite loop test this time with two nested infinite loop.
+; CHECK-LABEL: infiniteloop3
+; CHECK: ret
+define void @infiniteloop3() {
+entry:
+ br i1 undef, label %loop2a, label %body
+
+body: ; preds = %entry
+ br i1 undef, label %loop2a, label %end
+
+loop1: ; preds = %loop2a, %loop2b
+ %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
+ %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
+ %0 = icmp eq i32* %var, null
+ %next.load = load i32*, i32** undef
+ br i1 %0, label %loop2a, label %loop2b
+
+loop2a: ; preds = %loop1, %body, %entry
+ %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
+ %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
+ br label %loop1
+
+loop2b: ; preds = %loop1
+ %gep1 = bitcast i32* %var.phi to i32*
+ %next.ptr = bitcast i32* %gep1 to i32**
+ store i32* %next.phi, i32** %next.ptr
+ br label %loop1
+
+end:
+ ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-spill-lr.ll b/test/CodeGen/AArch64/arm64-spill-lr.ll
index 88109088a2ff..2ea5d7810a14 100644
--- a/test/CodeGen/AArch64/arm64-spill-lr.ll
+++ b/test/CodeGen/AArch64/arm64-spill-lr.ll
@@ -1,9 +1,9 @@
; RUN: llc -mtriple=arm64-apple-ios < %s
@bar = common global i32 0, align 4
-; Leaf function which uses all callee-saved registers and allocates >= 256 bytes on the stack
-; this will cause processFunctionBeforeCalleeSavedScan() to spill LR as an additional scratch
-; register.
+; Leaf function which uses all callee-saved registers and allocates >= 256 bytes
+; on the stack this will cause determineCalleeSaves() to spill LR as an
+; additional scratch register.
;
; This is a crash-only regression test for rdar://15124582.
define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) nounwind {
diff --git a/test/CodeGen/AArch64/arm64-stackmap.ll b/test/CodeGen/AArch64/arm64-stackmap.ll
index 1a4df7a6f2d6..3eb1d2753001 100644
--- a/test/CodeGen/AArch64/arm64-stackmap.ll
+++ b/test/CodeGen/AArch64/arm64-stackmap.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-apple-darwin < %s | FileCheck %s
-; RUN: llc -mtriple=arm64-apple-darwin -fast-isel -fast-isel-abort=1 < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -mattr=+reserve-x18 < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -mattr=+reserve-x18 -fast-isel -fast-isel-abort=1 < %s | FileCheck %s
;
; Note: Print verbose stackmaps using -debug-only=stackmaps.
diff --git a/test/CodeGen/AArch64/arm64-stp.ll b/test/CodeGen/AArch64/arm64-stp.ll
index 72561aac6e87..98242d0bb57e 100644
--- a/test/CodeGen/AArch64/arm64-stp.ll
+++ b/test/CodeGen/AArch64/arm64-stp.ll
@@ -1,8 +1,6 @@
; RUN: llc < %s -march=arm64 -aarch64-stp-suppress=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s
-; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\
-; RUN: -verify-machineinstrs -mcpu=cyclone | FileCheck -check-prefix=STUR_CHK %s
-; CHECK: stp_int
+; CHECK-LABEL: stp_int
; CHECK: stp w0, w1, [x2]
define void @stp_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
store i32 %a, i32* %p, align 4
@@ -11,7 +9,7 @@ define void @stp_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
ret void
}
-; CHECK: stp_long
+; CHECK-LABEL: stp_long
; CHECK: stp x0, x1, [x2]
define void @stp_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
store i64 %a, i64* %p, align 8
@@ -20,7 +18,7 @@ define void @stp_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
ret void
}
-; CHECK: stp_float
+; CHECK-LABEL: stp_float
; CHECK: stp s0, s1, [x0]
define void @stp_float(float %a, float %b, float* nocapture %p) nounwind {
store float %a, float* %p, align 4
@@ -29,7 +27,7 @@ define void @stp_float(float %a, float %b, float* nocapture %p) nounwind {
ret void
}
-; CHECK: stp_double
+; CHECK-LABEL: stp_double
; CHECK: stp d0, d1, [x0]
define void @stp_double(double %a, double %b, double* nocapture %p) nounwind {
store double %a, double* %p, align 8
@@ -40,9 +38,9 @@ define void @stp_double(double %a, double %b, double* nocapture %p) nounwind {
; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
define void @stur_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
-; STUR_CHK: stur_int
-; STUR_CHK: stp w{{[0-9]+}}, {{w[0-9]+}}, [x{{[0-9]+}}, #-8]
-; STUR_CHK-NEXT: ret
+; CHECK-LABEL: stur_int
+; CHECK: stp w{{[0-9]+}}, {{w[0-9]+}}, [x{{[0-9]+}}, #-8]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i32, i32* %p, i32 -1
store i32 %a, i32* %p1, align 2
%p2 = getelementptr inbounds i32, i32* %p, i32 -2
@@ -51,9 +49,9 @@ define void @stur_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
}
define void @stur_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
-; STUR_CHK: stur_long
-; STUR_CHK: stp x{{[0-9]+}}, {{x[0-9]+}}, [x{{[0-9]+}}, #-16]
-; STUR_CHK-NEXT: ret
+; CHECK-LABEL: stur_long
+; CHECK: stp x{{[0-9]+}}, {{x[0-9]+}}, [x{{[0-9]+}}, #-16]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds i64, i64* %p, i32 -1
store i64 %a, i64* %p1, align 2
%p2 = getelementptr inbounds i64, i64* %p, i32 -2
@@ -62,9 +60,9 @@ define void @stur_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
}
define void @stur_float(float %a, float %b, float* nocapture %p) nounwind {
-; STUR_CHK: stur_float
-; STUR_CHK: stp s{{[0-9]+}}, {{s[0-9]+}}, [x{{[0-9]+}}, #-8]
-; STUR_CHK-NEXT: ret
+; CHECK-LABEL: stur_float
+; CHECK: stp s{{[0-9]+}}, {{s[0-9]+}}, [x{{[0-9]+}}, #-8]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds float, float* %p, i32 -1
store float %a, float* %p1, align 2
%p2 = getelementptr inbounds float, float* %p, i32 -2
@@ -73,9 +71,9 @@ define void @stur_float(float %a, float %b, float* nocapture %p) nounwind {
}
define void @stur_double(double %a, double %b, double* nocapture %p) nounwind {
-; STUR_CHK: stur_double
-; STUR_CHK: stp d{{[0-9]+}}, {{d[0-9]+}}, [x{{[0-9]+}}, #-16]
-; STUR_CHK-NEXT: ret
+; CHECK-LABEL: stur_double
+; CHECK: stp d{{[0-9]+}}, {{d[0-9]+}}, [x{{[0-9]+}}, #-16]
+; CHECK-NEXT: ret
%p1 = getelementptr inbounds double, double* %p, i32 -1
store double %a, double* %p1, align 2
%p2 = getelementptr inbounds double, double* %p, i32 -2
diff --git a/test/CodeGen/AArch64/arm64-strict-align.ll b/test/CodeGen/AArch64/arm64-strict-align.ll
index 109f4115d801..28c158f7a2eb 100644
--- a/test/CodeGen/AArch64/arm64-strict-align.ll
+++ b/test/CodeGen/AArch64/arm64-strict-align.ll
@@ -1,7 +1,6 @@
; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-no-strict-align | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align | FileCheck %s --check-prefix=CHECK-STRICT
-; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align -fast-isel | FileCheck %s --check-prefix=CHECK-STRICT
+; RUN: llc < %s -mtriple=arm64-apple-darwin -mattr=+strict-align | FileCheck %s --check-prefix=CHECK-STRICT
+; RUN: llc < %s -mtriple=arm64-apple-darwin -mattr=+strict-align -fast-isel | FileCheck %s --check-prefix=CHECK-STRICT
define i32 @f0(i32* nocapture %p) nounwind {
; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2]
diff --git a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
index f94f88a1183f..c95eca062ff6 100644
--- a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
+++ b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
@@ -1,4 +1,7 @@
-; RUN: llc -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic \
+; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=NOEMU %s
+; RUN: llc -emulated-tls -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic \
+; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=EMU %s
; If the .tlsdesccall and blr parts are emitted completely separately (even with
; glue) then LLVM will separate them quite happily (with a spill at O0, hence
@@ -13,6 +16,40 @@ define i32 @test_generaldynamic() {
%val = load i32, i32* @general_dynamic_var
ret i32 %val
-; CHECK: .tlsdesccall general_dynamic_var
-; CHECK-NEXT: blr {{x[0-9]+}}
+; NOEMU: .tlsdesccall general_dynamic_var
+; NOEMU-NEXT: blr {{x[0-9]+}}
+; NOEMU-NOT: __emutls_v.general_dynamic_var:
+
+; EMU: adrp{{.+}}__emutls_v.general_dynamic_var
+; EMU: bl __emutls_get_address
+
+; EMU-NOT: __emutls_v.general_dynamic_var
+; EMU-NOT: __emutls_t.general_dynamic_var
+}
+
+@emulated_init_var = thread_local global i32 37, align 8
+
+define i32 @test_emulated_init() {
+; COMMON-LABEL: test_emulated_init:
+
+ %val = load i32, i32* @emulated_init_var
+ ret i32 %val
+
+; EMU: adrp{{.+}}__emutls_v.emulated_init_var
+; EMU: bl __emutls_get_address
+
+; EMU-NOT: __emutls_v.general_dynamic_var:
+
+; EMU: .align 3
+; EMU-LABEL: __emutls_v.emulated_init_var:
+; EMU-NEXT: .xword 4
+; EMU-NEXT: .xword 8
+; EMU-NEXT: .xword 0
+; EMU-NEXT: .xword __emutls_t.emulated_init_var
+
+; EMU-LABEL: __emutls_t.emulated_init_var:
+; EMU-NEXT: .word 37
}
+
+; CHECK-NOT: __emutls_v.general_dynamic_var:
+; EMU-NOT: __emutls_t.general_dynamic_var
diff --git a/test/CodeGen/AArch64/arm64-trunc-store.ll b/test/CodeGen/AArch64/arm64-trunc-store.ll
index 7cde629b33ae..be0388284fb8 100644
--- a/test/CodeGen/AArch64/arm64-trunc-store.ll
+++ b/test/CodeGen/AArch64/arm64-trunc-store.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra | FileCheck %s
define void @bar(<8 x i16> %arg, <8 x i8>* %p) nounwind {
; CHECK-LABEL: bar:
diff --git a/test/CodeGen/AArch64/arm64-vabs.ll b/test/CodeGen/AArch64/arm64-vabs.ll
index a52c4ebf13e7..c1800085884c 100644
--- a/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/test/CodeGen/AArch64/arm64-vabs.ll
@@ -134,6 +134,72 @@ define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
ret <2 x i64> %tmp4
}
+define i16 @uabdl8h_log2_shuffle(<16 x i8>* %a, <16 x i8>* %b) {
+; CHECK-LABEL: uabdl8h_log2_shuffle
+; CHECK: uabdl2.8h
+; CHECK: uabdl.8h
+ %aload = load <16 x i8>, <16 x i8>* %a, align 1
+ %bload = load <16 x i8>, <16 x i8>* %b, align 1
+ %aext = zext <16 x i8> %aload to <16 x i16>
+ %bext = zext <16 x i8> %bload to <16 x i16>
+ %abdiff = sub nsw <16 x i16> %aext, %bext
+ %abcmp = icmp slt <16 x i16> %abdiff, zeroinitializer
+ %ababs = sub nsw <16 x i16> zeroinitializer, %abdiff
+ %absel = select <16 x i1> %abcmp, <16 x i16> %ababs, <16 x i16> %abdiff
+ %rdx.shuf = shufflevector <16 x i16> %absel, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin1.rdx = add <16 x i16> %absel, %rdx.shuf
+ %rdx.shufx = shufflevector <16 x i16> %bin1.rdx, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx = add <16 x i16> %bin1.rdx, %rdx.shufx
+ %rdx.shuf136 = shufflevector <16 x i16> %bin.rdx, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx137 = add <16 x i16> %bin.rdx, %rdx.shuf136
+ %rdx.shuf138 = shufflevector <16 x i16> %bin.rdx137, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx139 = add <16 x i16> %bin.rdx137, %rdx.shuf138
+ %reduced_v = extractelement <16 x i16> %bin.rdx139, i16 0
+ ret i16 %reduced_v
+}
+
+define i32 @uabdl4s_log2_shuffle(<8 x i16>* %a, <8 x i16>* %b) {
+; CHECK-LABEL: uabdl4s_log2_shuffle
+; CHECK: uabdl2.4s
+; CHECK: uabdl.4s
+ %aload = load <8 x i16>, <8 x i16>* %a, align 1
+ %bload = load <8 x i16>, <8 x i16>* %b, align 1
+ %aext = zext <8 x i16> %aload to <8 x i32>
+ %bext = zext <8 x i16> %bload to <8 x i32>
+ %abdiff = sub nsw <8 x i32> %aext, %bext
+ %abcmp = icmp slt <8 x i32> %abdiff, zeroinitializer
+ %ababs = sub nsw <8 x i32> zeroinitializer, %abdiff
+ %absel = select <8 x i1> %abcmp, <8 x i32> %ababs, <8 x i32> %abdiff
+ %rdx.shuf = shufflevector <8 x i32> %absel, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx = add <8 x i32> %absel, %rdx.shuf
+ %rdx.shuf136 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx137 = add <8 x i32> %bin.rdx, %rdx.shuf136
+ %rdx.shuf138 = shufflevector <8 x i32> %bin.rdx137, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %bin.rdx139 = add <8 x i32> %bin.rdx137, %rdx.shuf138
+ %reduced_v = extractelement <8 x i32> %bin.rdx139, i32 0
+ ret i32 %reduced_v
+}
+
+define i64 @uabdl2d_log2_shuffle(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
+; CHECK: uabdl2d_log2_shuffle
+; CHECK: uabdl2.2d
+; CHECK: uabdl.2d
+ %aload = load <4 x i32>, <4 x i32>* %a, align 1
+ %bload = load <4 x i32>, <4 x i32>* %b, align 1
+ %aext = zext <4 x i32> %aload to <4 x i64>
+ %bext = zext <4 x i32> %bload to <4 x i64>
+ %abdiff = sub nsw <4 x i64> %aext, %bext
+ %abcmp = icmp slt <4 x i64> %abdiff, zeroinitializer
+ %ababs = sub nsw <4 x i64> zeroinitializer, %abdiff
+ %absel = select <4 x i1> %abcmp, <4 x i64> %ababs, <4 x i64> %abdiff
+ %rdx.shuf136 = shufflevector <4 x i64> %absel, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+ %bin.rdx137 = add <4 x i64> %absel, %rdx.shuf136
+ %rdx.shuf138 = shufflevector <4 x i64> %bin.rdx137, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ %bin.rdx139 = add <4 x i64> %bin.rdx137, %rdx.shuf138
+ %reduced_v = extractelement <4 x i64> %bin.rdx139, i16 0
+ ret i64 %reduced_v
+}
+
define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: fabd_2s:
;CHECK: fabd.2s
diff --git a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
index 44f2af1c5e79..8702b41023d0 100644
--- a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=arm64-linux-gnu -pre-RA-sched=linearize -enable-misched=false < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64-linux-gnu -pre-RA-sched=linearize -enable-misched=false -disable-post-ra < %s | FileCheck %s
%va_list = type {i8*, i8*, i8*, i32, i32}
diff --git a/test/CodeGen/AArch64/arm64-vector-ext.ll b/test/CodeGen/AArch64/arm64-vector-ext.ll
index 5bee1611e6c6..994a9956cf7f 100644
--- a/test/CodeGen/AArch64/arm64-vector-ext.ll
+++ b/test/CodeGen/AArch64/arm64-vector-ext.ll
@@ -1,27 +1,27 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
-
-;CHECK: @func30
-;CHECK: ushll.4s v0, v0, #0
-;CHECK: movi.4s v1, #0x1
-;CHECK: and.16b v0, v0, v1
-;CHECK: str q0, [x0]
-;CHECK: ret
-
-%T0_30 = type <4 x i1>
-%T1_30 = type <4 x i32>
-define void @func30(%T0_30 %v0, %T1_30* %p1) {
- %r = zext %T0_30 %v0 to %T1_30
- store %T1_30 %r, %T1_30* %p1
- ret void
-}
-
-; Extend from v1i1 was crashing things (PR20791). Make sure we do something
-; sensible instead.
-define <1 x i32> @autogen_SD7918() {
-; CHECK-LABEL: autogen_SD7918
-; CHECK: movi d0, #0000000000000000
-; CHECK-NEXT: ret
- %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
- %ZE = zext <1 x i1> %I29 to <1 x i32>
- ret <1 x i32> %ZE
-}
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+;CHECK: @func30
+;CHECK: movi.4h v1, #0x1
+;CHECK: and.8b v0, v0, v1
+;CHECK: ushll.4s v0, v0, #0
+;CHECK: str q0, [x0]
+;CHECK: ret
+
+%T0_30 = type <4 x i1>
+%T1_30 = type <4 x i32>
+define void @func30(%T0_30 %v0, %T1_30* %p1) {
+ %r = zext %T0_30 %v0 to %T1_30
+ store %T1_30 %r, %T1_30* %p1
+ ret void
+}
+
+; Extend from v1i1 was crashing things (PR20791). Make sure we do something
+; sensible instead.
+define <1 x i32> @autogen_SD7918() {
+; CHECK-LABEL: autogen_SD7918
+; CHECK: movi d0, #0000000000000000
+; CHECK-NEXT: ret
+ %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
+ %ZE = zext <1 x i1> %I29 to <1 x i32>
+ ret <1 x i32> %ZE
+}
diff --git a/test/CodeGen/AArch64/arm64-vminmaxnm.ll b/test/CodeGen/AArch64/arm64-vminmaxnm.ll
index b5aca45cd479..302ba9d681c6 100644
--- a/test/CodeGen/AArch64/arm64-vminmaxnm.ll
+++ b/test/CodeGen/AArch64/arm64-vminmaxnm.ll
@@ -42,13 +42,28 @@ define <2 x double> @f6(<2 x double> %a, <2 x double> %b) nounwind readnone ssp
ret <2 x double> %vminnm2.i
}
+define float @f7(float %a, float %b) nounwind readnone ssp {
+; CHECK: fmaxnm s0, s0, s1
+; CHECK: ret
+ %vmaxnm2.i = tail call float @llvm.aarch64.neon.fmaxnm.f32(float %a, float %b) nounwind
+ ret float %vmaxnm2.i
+}
+
+define double @f8(double %a, double %b) nounwind readnone ssp {
+; CHECK: fminnm d0, d0, d1
+; CHECK: ret
+ %vmaxnm2.i = tail call double @llvm.aarch64.neon.fminnm.f64(double %a, double %b) nounwind
+ ret double %vmaxnm2.i
+}
+
declare <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
declare <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
declare <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
declare <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
declare <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
declare <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
-
+declare float @llvm.aarch64.neon.fmaxnm.f32(float, float) nounwind readnone
+declare double @llvm.aarch64.neon.fminnm.f64(double, double) nounwind readnone
define double @test_fmaxnmv(<2 x double> %in) {
; CHECK-LABEL: test_fmaxnmv:
diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll
index ce9c0a64b587..ec49110d4052 100644
--- a/test/CodeGen/AArch64/arm64-xaluo.ll
+++ b/test/CodeGen/AArch64/arm64-xaluo.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -disable-post-ra -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -disable-post-ra -verify-machineinstrs < %s | FileCheck %s
;
; Get the actual value of the overflow bit.
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index cb90caeadc1f..900d2072925f 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG
; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created
@@ -893,6 +893,8 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
; CHECK: stxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: [[GET_OUT]]:
+; CHECK: clrex
; CHECK-NOT: dmb
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
@@ -916,6 +918,8 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
; CHECK: stlxrh [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: [[GET_OUT]]:
+; CHECK: clrex
; CHECK-NOT: dmb
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
@@ -927,21 +931,21 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
%pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
%old = extractvalue { i32, i1 } %pair, 0
+; CHECK: mov {{[xw]}}[[WANTED:[0-9]+]], {{[xw]}}0
+
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
- ; w0 below is a reasonable guess but could change: it certainly comes into the
- ; function there.
-; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: cmp w[[OLD]], w[[WANTED]]
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: [[GET_OUT]]:
+; CHECK: clrex
; CHECK-NOT: dmb
-
-; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
ret i32 %old
}
@@ -963,6 +967,8 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
; As above, w1 is a reasonable guess.
; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: [[GET_OUT]]:
+; CHECK: clrex
; CHECK-NOT: dmb
; CHECK: str x[[OLD]],
diff --git a/test/CodeGen/AArch64/bitcast-v2i8.ll b/test/CodeGen/AArch64/bitcast-v2i8.ll
index 4bdac641c5bc..aff3ffc70a71 100644
--- a/test/CodeGen/AArch64/bitcast-v2i8.ll
+++ b/test/CodeGen/AArch64/bitcast-v2i8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck %s
; Part of PR21549: going through the stack isn't ideal but is correct.
diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll
index 9b731fa72a47..509b547a5c82 100644
--- a/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/test/CodeGen/AArch64/bitfield-insert.ll
@@ -196,3 +196,44 @@ define void @test_32bit_with_shr(i32* %existing, i32* %new) {
ret void
}
+
+; Bitfield insert where the second or operand is a better match to be folded into the BFM
+define void @test_32bit_opnd1_better(i32* %existing, i32* %new) {
+; CHECK-LABEL: test_32bit_opnd1_better:
+
+ %oldval = load volatile i32, i32* %existing
+ %oldval_keep = and i32 %oldval, 65535 ; 0x0000ffff
+
+ %newval = load i32, i32* %new
+ %newval_shifted = shl i32 %newval, 16
+ %newval_masked = and i32 %newval_shifted, 16711680 ; 0x00ff0000
+
+ %combined = or i32 %oldval_keep, %newval_masked
+ store volatile i32 %combined, i32* %existing
+; CHECK: and [[BIT:w[0-9]+]], {{w[0-9]+}}, #0xffff
+; CHECK: bfi [[BIT]], {{w[0-9]+}}, #16, #8
+
+ ret void
+}
+
+; Tests when all the bits from one operand are not useful
+define i32 @test_nouseful_bits(i8 %a, i32 %b) {
+; CHECK-LABEL: test_nouseful_bits:
+; CHECK: bfi
+; CHECK: bfi
+; CHECK: bfi
+; CHECK-NOT: bfi
+; CHECK-NOT: or
+; CHECK: lsl
+ %conv = zext i8 %a to i32 ; 0 0 0 A
+ %shl = shl i32 %b, 8 ; B2 B1 B0 0
+ %or = or i32 %conv, %shl ; B2 B1 B0 A
+ %shl.1 = shl i32 %or, 8 ; B1 B0 A 0
+ %or.1 = or i32 %conv, %shl.1 ; B1 B0 A A
+ %shl.2 = shl i32 %or.1, 8 ; B0 A A 0
+ %or.2 = or i32 %conv, %shl.2 ; B0 A A A
+ %shl.3 = shl i32 %or.2, 8 ; A A A 0
+ %or.3 = or i32 %conv, %shl.3 ; A A A A
+ %shl.4 = shl i32 %or.3, 8 ; A A A 0
+ ret i32 %shl.4
+}
diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll
index 78399c80b5de..5f19b6943b8e 100644
--- a/test/CodeGen/AArch64/bitfield.ll
+++ b/test/CodeGen/AArch64/bitfield.ll
@@ -3,51 +3,67 @@
@var32 = global i32 0
@var64 = global i64 0
-define void @test_extendb(i8 %var) {
-; CHECK-LABEL: test_extendb:
+define void @test_extendb32(i8 %var) {
+; CHECK-LABEL: test_extendb32:
%sxt32 = sext i8 %var to i32
store volatile i32 %sxt32, i32* @var32
; CHECK: sxtb {{w[0-9]+}}, {{w[0-9]+}}
- %sxt64 = sext i8 %var to i64
- store volatile i64 %sxt64, i64* @var64
-; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
-
; N.b. this doesn't actually produce a bitfield instruction at the
; moment, but it's still a good test to have and the semantics are
; correct.
%uxt32 = zext i8 %var to i32
store volatile i32 %uxt32, i32* @var32
; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
+ ret void
+}
+
+define void @test_extendb64(i8 %var) {
+; CHECK-LABEL: test_extendb64:
+
+ %sxt64 = sext i8 %var to i64
+ store volatile i64 %sxt64, i64* @var64
+; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
%uxt64 = zext i8 %var to i64
store volatile i64 %uxt64, i64* @var64
-; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
ret void
}
-define void @test_extendh(i16 %var) {
-; CHECK-LABEL: test_extendh:
+define void @test_extendh32(i16 %var) {
+; CHECK-LABEL: test_extendh32:
%sxt32 = sext i16 %var to i32
store volatile i32 %sxt32, i32* @var32
; CHECK: sxth {{w[0-9]+}}, {{w[0-9]+}}
- %sxt64 = sext i16 %var to i64
- store volatile i64 %sxt64, i64* @var64
-; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
-
; N.b. this doesn't actually produce a bitfield instruction at the
; moment, but it's still a good test to have and the semantics are
; correct.
%uxt32 = zext i16 %var to i32
store volatile i32 %uxt32, i32* @var32
; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
+ ret void
+}
+
+define void @test_extendh64(i16 %var) {
+; CHECK-LABEL: test_extendh64:
+
+ %sxt64 = sext i16 %var to i64
+ store volatile i64 %sxt64, i64* @var64
+; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
%uxt64 = zext i16 %var to i64
store volatile i64 %uxt64, i64* @var64
-; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
ret void
}
@@ -60,7 +76,7 @@ define void @test_extendw(i32 %var) {
%uxt64 = zext i32 %var to i64
store volatile i64 %uxt64, i64* @var64
-; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32
+; CHECK: mov {{w[0-9]+}}, w0
ret void
}
diff --git a/test/CodeGen/AArch64/bitreverse.ll b/test/CodeGen/AArch64/bitreverse.ll
new file mode 100644
index 000000000000..936e3554b397
--- /dev/null
+++ b/test/CodeGen/AArch64/bitreverse.ll
@@ -0,0 +1,87 @@
+; RUN: llc -mtriple=aarch64-eabi %s -o - | FileCheck %s
+
+; These tests just check that the plumbing is in place for @llvm.bitreverse. The
+; actual output is massive at the moment as llvm.bitreverse is not yet legal.
+
+declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone
+
+define <2 x i16> @f(<2 x i16> %a) {
+; CHECK-LABEL: f:
+; CHECK: ushr
+ %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
+ ret <2 x i16> %b
+}
+
+declare i8 @llvm.bitreverse.i8(i8) readnone
+
+; Unfortunately some of the shift-and-inserts become BFIs, and some do not :(
+define i8 @g(i8 %a) {
+; CHECK-LABEL: g:
+; CHECK-DAG: lsr [[S5:w.*]], w0, #5
+; CHECK-DAG: lsr [[S4:w.*]], w0, #4
+; CHECK-DAG: lsr [[S3:w.*]], w0, #3
+; CHECK-DAG: lsr [[S2:w.*]], w0, #2
+; CHECK-DAG: lsl [[L1:w.*]], w0, #29
+; CHECK-DAG: lsl [[L2:w.*]], w0, #19
+; CHECK-DAG: lsl [[L3:w.*]], w0, #17
+
+; CHECK-DAG: and [[T1:w.*]], [[L1]], #0x40000000
+; CHECK-DAG: bfi [[T1]], w0, #31, #1
+; CHECK-DAG: bfi [[T1]], [[S2]], #29, #1
+; CHECK-DAG: bfi [[T1]], [[S3]], #28, #1
+; CHECK-DAG: bfi [[T1]], [[S4]], #27, #1
+; CHECK-DAG: bfi [[T1]], [[S5]], #26, #1
+; CHECK-DAG: and [[T2:w.*]], [[L2]], #0x2000000
+; CHECK-DAG: and [[T3:w.*]], [[L3]], #0x1000000
+; CHECK-DAG: orr [[T4:w.*]], [[T1]], [[T2]]
+; CHECK-DAG: orr [[T5:w.*]], [[T4]], [[T3]]
+; CHECK: lsr w0, [[T5]], #24
+
+ %b = call i8 @llvm.bitreverse.i8(i8 %a)
+ ret i8 %b
+}
+
+declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) readnone
+
+define <8 x i8> @g_vec(<8 x i8> %a) {
+; Try and match as much of the sequence as precisely as possible.
+
+; CHECK-LABEL: g_vec:
+; CHECK-DAG: movi [[M1:v.*]], #0x80
+; CHECK-DAG: movi [[M2:v.*]], #0x40
+; CHECK-DAG: movi [[M3:v.*]], #0x20
+; CHECK-DAG: movi [[M4:v.*]], #0x10
+; CHECK-DAG: movi [[M5:v.*]], #0x8
+; CHECK-DAG: movi [[M6:v.*]], #0x4{{$}}
+; CHECK-DAG: movi [[M7:v.*]], #0x2{{$}}
+; CHECK-DAG: movi [[M8:v.*]], #0x1{{$}}
+; CHECK-DAG: shl [[S1:v.*]], v0.8b, #7
+; CHECK-DAG: shl [[S2:v.*]], v0.8b, #5
+; CHECK-DAG: shl [[S3:v.*]], v0.8b, #3
+; CHECK-DAG: shl [[S4:v.*]], v0.8b, #1
+; CHECK-DAG: ushr [[S5:v.*]], v0.8b, #1
+; CHECK-DAG: ushr [[S6:v.*]], v0.8b, #3
+; CHECK-DAG: ushr [[S7:v.*]], v0.8b, #5
+; CHECK-DAG: ushr [[S8:v.*]], v0.8b, #7
+; CHECK-DAG: and [[A1:v.*]], [[S1]], [[M1]]
+; CHECK-DAG: and [[A2:v.*]], [[S2]], [[M2]]
+; CHECK-DAG: and [[A3:v.*]], [[S3]], [[M3]]
+; CHECK-DAG: and [[A4:v.*]], [[S4]], [[M4]]
+; CHECK-DAG: and [[A5:v.*]], [[S5]], [[M5]]
+; CHECK-DAG: and [[A6:v.*]], [[S6]], [[M6]]
+; CHECK-DAG: and [[A7:v.*]], [[S7]], [[M7]]
+; CHECK-DAG: and [[A8:v.*]], [[S8]], [[M8]]
+
+; The rest can be ORRed together in any order; it's not worth the test
+; maintenance to match them precisely.
+; CHECK-DAG: orr
+; CHECK-DAG: orr
+; CHECK-DAG: orr
+; CHECK-DAG: orr
+; CHECK-DAG: orr
+; CHECK-DAG: orr
+; CHECK-DAG: orr
+; CHECK: ret
+ %b = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
+ ret <8 x i8> %b
+}
diff --git a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index c78fabac6187..004267f4e4e0 100644
--- a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -403,6 +403,32 @@ return: ; preds = %land.lhs.true, %con
ret i32 %retval.0
}
+define void @cmp_shifted(i32 %in, i32 %lhs, i32 %rhs) {
+; CHECK-LABEL: cmp_shifted:
+; CHECK: cmp w0, #1
+; [...]
+; CHECK: cmp w0, #2, lsl #12
+
+ %tst_low = icmp sgt i32 %in, 0
+ br i1 %tst_low, label %true, label %false
+
+true:
+ call i32 @zoo(i32 128)
+ ret void
+
+false:
+ %tst = icmp sgt i32 %in, 8191
+ br i1 %tst, label %truer, label %falser
+
+truer:
+ call i32 @zoo(i32 42)
+ ret void
+
+falser:
+ call i32 @zoo(i32 1)
+ ret void
+}
+
declare i32 @zoo(i32)
declare double @yoo(i32)
diff --git a/test/CodeGen/AArch64/cpus.ll b/test/CodeGen/AArch64/cpus.ll
index 1266842fcc6d..a8399f92ebe4 100644
--- a/test/CodeGen/AArch64/cpus.ll
+++ b/test/CodeGen/AArch64/cpus.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a35 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a72 2>&1 | FileCheck %s
diff --git a/test/CodeGen/AArch64/cxx-tlscc.ll b/test/CodeGen/AArch64/cxx-tlscc.ll
new file mode 100644
index 000000000000..a9ae00c8d270
--- /dev/null
+++ b/test/CodeGen/AArch64/cxx-tlscc.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-apple-ios -enable-shrink-wrap=true | FileCheck --check-prefix=CHECK %s
+; Shrink wrapping currently does not kick in because we have a TLS CALL
+; in the entry block and it will clobber the link register.
+
+%struct.S = type { i8 }
+
+@sg = internal thread_local global %struct.S zeroinitializer, align 1
+@__dso_handle = external global i8
+@__tls_guard = internal thread_local unnamed_addr global i1 false
+
+declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
+declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
+declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
+
+define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
+ %.b.i = load i1, i1* @__tls_guard, align 1
+ br i1 %.b.i, label %__tls_init.exit, label %init.i
+
+init.i:
+ store i1 true, i1* @__tls_guard, align 1
+ %call.i.i = tail call %struct.S* @_ZN1SC1Ev(%struct.S* nonnull @sg)
+ %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (%struct.S* (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle)
+ br label %__tls_init.exit
+
+__tls_init.exit:
+ ret %struct.S* @sg
+}
+
+; CHECK-LABEL: _ZTW2sg
+; CHECK-NOT: stp d31, d30
+; CHECK-NOT: stp d29, d28
+; CHECK-NOT: stp d27, d26
+; CHECK-NOT: stp d25, d24
+; CHECK-NOT: stp d23, d22
+; CHECK-NOT: stp d21, d20
+; CHECK-NOT: stp d19, d18
+; CHECK-NOT: stp d17, d16
+; CHECK-NOT: stp d7, d6
+; CHECK-NOT: stp d5, d4
+; CHECK-NOT: stp d3, d2
+; CHECK-NOT: stp d1, d0
+; CHECK-NOT: stp x20, x19
+; CHECK-NOT: stp x14, x13
+; CHECK-NOT: stp x12, x11
+; CHECK-NOT: stp x10, x9
+; CHECK-NOT: stp x8, x7
+; CHECK-NOT: stp x6, x5
+; CHECK-NOT: stp x4, x3
+; CHECK-NOT: stp x2, x1
+; CHECK: blr
+; CHECK: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]]
+; CHECK: blr
+; CHECK: tlv_atexit
+; CHECK: [[BB_end]]:
+; CHECK: blr
+; CHECK-NOT: ldp x2, x1
+; CHECK-NOT: ldp x4, x3
+; CHECK-NOT: ldp x6, x5
+; CHECK-NOT: ldp x8, x7
+; CHECK-NOT: ldp x10, x9
+; CHECK-NOT: ldp x12, x11
+; CHECK-NOT: ldp x14, x13
+; CHECK-NOT: ldp x20, x19
+; CHECK-NOT: ldp d1, d0
+; CHECK-NOT: ldp d3, d2
+; CHECK-NOT: ldp d5, d4
+; CHECK-NOT: ldp d7, d6
+; CHECK-NOT: ldp d17, d16
+; CHECK-NOT: ldp d19, d18
+; CHECK-NOT: ldp d21, d20
+; CHECK-NOT: ldp d23, d22
+; CHECK-NOT: ldp d25, d24
+; CHECK-NOT: ldp d27, d26
+; CHECK-NOT: ldp d29, d28
+; CHECK-NOT: ldp d31, d30
diff --git a/test/CodeGen/AArch64/dag-combine-select.ll b/test/CodeGen/AArch64/dag-combine-select.ll
new file mode 100644
index 000000000000..45b998d9136d
--- /dev/null
+++ b/test/CodeGen/AArch64/dag-combine-select.ll
@@ -0,0 +1,47 @@
+; RUN: llc -disable-post-ra -o - %s | FileCheck %s
+target triple = "arm64--"
+
+@out = internal global i32 0, align 4
+
+; Ensure that we transform select(C0, x, select(C1, x, y)) towards
+; select(C0 | C1, x, y) so we can use CMP;CCMP for the implementation.
+; CHECK-LABEL: test0:
+; CHECK: cmp w0, #7
+; CHECK: ccmp w1, #0, #0, ne
+; CHECK: csel w0, w1, w2, gt
+; CHECK: ret
+define i32 @test0(i32 %v0, i32 %v1, i32 %v2) {
+ %cmp1 = icmp eq i32 %v0, 7
+ %cmp2 = icmp sgt i32 %v1, 0
+ %sel0 = select i1 %cmp1, i32 %v1, i32 %v2
+ %sel1 = select i1 %cmp2, i32 %v1, i32 %sel0
+ ret i32 %sel1
+}
+
+; Usually we keep select(C0 | C1, x, y) as is on aarch64 to create CMP;CCMP
+; sequences. This case should be transformed to select(C0, select(C1, x, y), y)
+; anyway to get CSE effects.
+; CHECK-LABEL: test1:
+; CHECK-NOT: ccmp
+; CHECK: cmp w0, #7
+; CHECK: adrp x[[OUTNUM:[0-9]+]], out
+; CHECK: csel w[[SEL0NUM:[0-9]+]], w1, w2, eq
+; CHECK: cmp w[[SEL0NUM]], #13
+; CHECK: csel w[[SEL1NUM:[0-9]+]], w1, w2, lo
+; CHECK: cmp w0, #42
+; CHECK: csel w[[SEL2NUM:[0-9]+]], w1, w[[SEL1NUM]], eq
+; CHECK: str w[[SEL1NUM]], [x[[OUTNUM]], :lo12:out]
+; CHECK: str w[[SEL2NUM]], [x[[OUTNUM]], :lo12:out]
+; CHECK: ret
+define void @test1(i32 %bitset, i32 %val0, i32 %val1) {
+ %cmp1 = icmp eq i32 %bitset, 7
+ %cond = select i1 %cmp1, i32 %val0, i32 %val1
+ %cmp5 = icmp ult i32 %cond, 13
+ %cond11 = select i1 %cmp5, i32 %val0, i32 %val1
+ %cmp3 = icmp eq i32 %bitset, 42
+ %or.cond = or i1 %cmp3, %cmp5
+ %cond17 = select i1 %or.cond, i32 %val0, i32 %val1
+ store volatile i32 %cond11, i32* @out, align 4
+ store volatile i32 %cond17, i32* @out, align 4
+ ret void
+}
diff --git a/test/CodeGen/AArch64/divrem.ll b/test/CodeGen/AArch64/divrem.ll
new file mode 100644
index 000000000000..9f648eb63eac
--- /dev/null
+++ b/test/CodeGen/AArch64/divrem.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mattr=+neon | FileCheck %s
+
+; SDIVREM/UDIVREM DAG nodes are generated but expanded when lowering and
+; should not generate select error.
+define <2 x i32> @test_udivrem(<2 x i32> %x, < 2 x i32> %y, < 2 x i32>* %z) {
+; CHECK-LABEL: test_udivrem
+; CHECK-DAG: udivrem
+; CHECK-NOT: LLVM ERROR: Cannot select
+ %div = udiv <2 x i32> %x, %y
+ store <2 x i32> %div, <2 x i32>* %z
+ %1 = urem <2 x i32> %x, %y
+ ret <2 x i32> %1
+}
+
+define <4 x i32> @test_sdivrem(<4 x i32> %x, <4 x i32>* %y) {
+; CHECK-LABEL: test_sdivrem
+; CHECK-DAG: sdivrem
+ %div = sdiv <4 x i32> %x, < i32 20, i32 20, i32 20, i32 20 >
+ store <4 x i32> %div, <4 x i32>* %y
+ %1 = srem <4 x i32> %x, < i32 20, i32 20, i32 20, i32 20 >
+ ret <4 x i32> %1
+}
diff --git a/test/CodeGen/AArch64/emutls.ll b/test/CodeGen/AArch64/emutls.ll
new file mode 100644
index 000000000000..ac5762edba98
--- /dev/null
+++ b/test/CodeGen/AArch64/emutls.ll
@@ -0,0 +1,116 @@
+; RUN: llc -emulated-tls -mtriple=aarch64-linux-android \
+; RUN: -relocation-model=pic < %s | FileCheck -check-prefix=ARM64 %s
+
+; Copied from X86/emutls.ll
+
+; Use my_emutls_get_address like __emutls_get_address.
+@my_emutls_v_xyz = external global i8*, align 4
+declare i8* @my_emutls_get_address(i8*)
+
+define i32 @my_get_xyz() {
+; ARM64-LABEL: my_get_xyz:
+; ARM64: adrp x0, :got:my_emutls_v_xyz
+; ARM64-NEXT: ldr x0, [x0, :got_lo12:my_emutls_v_xyz]
+; ARM64-NEXT: bl my_emutls_get_address
+; ARM64-NEXT: ldr w0, [x0]
+; ARM64-NEXT: ldp x29, x30, [sp]
+
+entry:
+ %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*))
+ %0 = bitcast i8* %call to i32*
+ %1 = load i32, i32* %0, align 4
+ ret i32 %1
+}
+
+@i1 = thread_local global i32 15
+@i2 = external thread_local global i32
+@i3 = internal thread_local global i32 15
+@i4 = hidden thread_local global i32 15
+@i5 = external hidden thread_local global i32
+@s1 = thread_local global i16 15
+@b1 = thread_local global i8 0
+
+define i32 @f1() {
+; ARM64-LABEL: f1:
+; ARM64: adrp x0, :got:__emutls_v.i1
+; ARM64-NEXT: ldr x0, [x0, :got_lo12:__emutls_v.i1]
+; ARM64-NEXT: bl __emutls_get_address
+; ARM64-NEXT: ldr w0, [x0]
+; ARM64-NEXT: ldp x29, x30, [sp]
+
+entry:
+ %tmp1 = load i32, i32* @i1
+ ret i32 %tmp1
+}
+
+define i32* @f2() {
+; ARM64-LABEL: f2:
+; ARM64: adrp x0, :got:__emutls_v.i1
+; ARM64-NEXT: ldr x0, [x0, :got_lo12:__emutls_v.i1]
+; ARM64-NEXT: bl __emutls_get_address
+; ARM64-NEXT: ldp x29, x30, [sp]
+
+entry:
+ ret i32* @i1
+}
+
+;;;;;;;;;;;;;; 64-bit __emutls_v. and __emutls_t.
+
+; ARM64 .section .data.rel.local,
+; ARM64-LABEL: __emutls_v.i1:
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword __emutls_t.i1
+
+; ARM64 .section .rodata,
+; ARM64-LABEL: __emutls_t.i1:
+; ARM64-NEXT: .word 15
+
+; ARM64-NOT: __emutls_v.i2
+
+; ARM64 .section .data.rel.local,
+; ARM64-LABEL: __emutls_v.i3:
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword __emutls_t.i3
+
+; ARM64 .section .rodata,
+; ARM64-LABEL: __emutls_t.i3:
+; ARM64-NEXT: .word 15
+
+; ARM64 .section .data.rel.local,
+; ARM64-LABEL: __emutls_v.i4:
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword __emutls_t.i4
+
+; ARM64 .section .rodata,
+; ARM64-LABEL: __emutls_t.i4:
+; ARM64-NEXT: .word 15
+
+; ARM64-NOT: __emutls_v.i5:
+; ARM64 .hidden __emutls_v.i5
+; ARM64-NOT: __emutls_v.i5:
+
+; ARM64 .section .data.rel.local,
+; ARM64-LABEL: __emutls_v.s1:
+; ARM64-NEXT: .xword 2
+; ARM64-NEXT: .xword 2
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword __emutls_t.s1
+
+; ARM64 .section .rodata,
+; ARM64-LABEL: __emutls_t.s1:
+; ARM64-NEXT: .hword 15
+
+; ARM64 .section .data.rel.local,
+; ARM64-LABEL: __emutls_v.b1:
+; ARM64-NEXT: .xword 1
+; ARM64-NEXT: .xword 1
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword 0
+
+; ARM64-NOT: __emutls_t.b1
diff --git a/test/CodeGen/AArch64/emutls_generic.ll b/test/CodeGen/AArch64/emutls_generic.ll
new file mode 100644
index 000000000000..7664db3df8d2
--- /dev/null
+++ b/test/CodeGen/AArch64/emutls_generic.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-android -relocation-model=pic \
+; RUN: | FileCheck -check-prefix=ARM_64 %s
+; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-android -relocation-model=pic -O3 \
+; RUN: | FileCheck -check-prefix=ARM_64 %s
+; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-android -O3 \
+; RUN: | FileCheck -check-prefix=ARM_64 %s
+
+; Make sure that TLS symbols are emitted in expected order.
+
+@external_x = external thread_local global i32, align 8
+@external_y = thread_local global i8 7, align 2
+@internal_y = internal thread_local global i64 9, align 16
+
+define i32* @get_external_x() {
+entry:
+ ret i32* @external_x
+}
+
+define i8* @get_external_y() {
+entry:
+ ret i8* @external_y
+}
+
+define i64* @get_internal_y() {
+entry:
+ ret i64* @internal_y
+}
+
+; ARM_64-LABEL: get_external_x:
+; ARM_64: __emutls_v.external_x
+; ARM_64: __emutls_get_address
+; ARM_64-LABEL: get_external_y:
+; ARM_64: __emutls_v.external_y
+; ARM_64: __emutls_get_address
+; ARM_64-LABEL: get_internal_y:
+; ARM_64: __emutls_v.internal_y
+; ARM_64: __emutls_get_address
+; ARM_64-NOT: __emutls_t.external_x
+; ARM_64-NOT: __emutls_v.external_x:
+; ARM_64: .align 3
+; ARM_64-LABEL: __emutls_v.external_y:
+; ARM_64-NEXT: .xword 1
+; ARM_64-NEXT: .xword 2
+; ARM_64-NEXT: .xword 0
+; ARM_64-NEXT: .xword __emutls_t.external_y
+; ARM_64-NOT: __emutls_v.external_x:
+; ARM_64: .section .rodata,
+; ARM_64-LABEL: __emutls_t.external_y:
+; ARM_64-NEXT: .byte 7
+; ARM_64: .data
+; ARM_64: .align 3
+; ARM_64-LABEL: __emutls_v.internal_y:
+; ARM_64-NEXT: .xword 8
+; ARM_64-NEXT: .xword 16
+; ARM_64-NEXT: .xword 0
+; ARM_64-NEXT: .xword __emutls_t.internal_y
+; ARM_64: .section .rodata,
+; ARM_64-LABEL: __emutls_t.internal_y:
+; ARM_64-NEXT: .xword 9
diff --git a/test/CodeGen/AArch64/eon.ll b/test/CodeGen/AArch64/eon.ll
new file mode 100644
index 000000000000..ea61ce34c050
--- /dev/null
+++ b/test/CodeGen/AArch64/eon.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; Check that the eon instruction is generated instead of eor,movn
+define i64 @test1(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test1:
+; CHECK: eon
+; CHECK: ret
+entry:
+ %shl = shl i64 %b, 4
+ %neg = xor i64 %a, -1
+ %xor = xor i64 %shl, %neg
+ ret i64 %xor
+}
+
+; Same check with mutliple uses of %neg
+define i64 @test2(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test2:
+; CHECK: eon
+; CHECK: eon
+; CHECK: lsl
+; CHECK: ret
+entry:
+ %shl = shl i64 %b, 4
+ %neg = xor i64 %shl, -1
+ %xor = xor i64 %neg, %a
+ %xor1 = xor i64 %c, %neg
+ %shl2 = shl i64 %xor, %xor1
+ ret i64 %shl2
+}
diff --git a/test/CodeGen/AArch64/f16-instructions.ll b/test/CodeGen/AArch64/f16-instructions.ll
index be5e2e51385d..e8ecb13b3564 100644
--- a/test/CodeGen/AArch64/f16-instructions.ll
+++ b/test/CodeGen/AArch64/f16-instructions.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@@ -143,6 +143,33 @@ define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
ret half %r
}
+; CHECK-LABEL: test_select_cc_f32_f16:
+; CHECK-DAG: fcvt s2, h2
+; CHECK-DAG: fcvt s3, h3
+; CHECK-NEXT: fcmp s2, s3
+; CHECK-NEXT: fcsel s0, s0, s1, ne
+; CHECK-NEXT: ret
+define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 {
+ %cc = fcmp une half %c, %d
+ %r = select i1 %cc, float %a, float %b
+ ret float %r
+}
+
+; CHECK-LABEL: test_select_cc_f16_f32:
+; CHECK-DAG: fcvt s0, h0
+; CHECK-DAG: fcvt s1, h1
+; CHECK-DAG: fcmp s2, s3
+; CHECK-DAG: cset w8, ne
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: fcsel s0, s0, s1, ne
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
+ %cc = fcmp une float %c, %d
+ %r = select i1 %cc, half %a, half %b
+ ret half %r
+}
+
; CHECK-LABEL: test_fcmp_une:
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fcvt s0, h0
@@ -644,13 +671,10 @@ define half @test_fabs(half %a) #0 {
}
; CHECK-LABEL: test_minnum:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]!
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: bl {{_?}}fminf
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fminnm s0, s0, s1
; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: ldp x29, x30, [sp], #16
; CHECK-NEXT: ret
define half @test_minnum(half %a, half %b) #0 {
%r = call half @llvm.minnum.f16(half %a, half %b)
@@ -658,13 +682,10 @@ define half @test_minnum(half %a, half %b) #0 {
}
; CHECK-LABEL: test_maxnum:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]!
-; CHECK-NEXT: mov x29, sp
-; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: bl {{_?}}fmaxf
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fmaxnm s0, s0, s1
; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: ldp x29, x30, [sp], #16
; CHECK-NEXT: ret
define half @test_maxnum(half %a, half %b) #0 {
%r = call half @llvm.maxnum.f16(half %a, half %b)
@@ -683,11 +704,50 @@ define half @test_copysign(half %a, half %b) #0 {
ret half %r
}
-; CHECK-LABEL: test_floor:
-; CHECK-NEXT: fcvt s1, h0
-; CHECK-NEXT: frintm s0, s1
+; CHECK-LABEL: test_copysign_f32:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v0, v1, v2
; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: ret
+define half @test_copysign_f32(half %a, float %b) #0 {
+ %tb = fptrunc float %b to half
+ %r = call half @llvm.copysign.f16(half %a, half %tb)
+ ret half %r
+}
+
+; CHECK-LABEL: test_copysign_f64:
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_copysign_f64(half %a, double %b) #0 {
+ %tb = fptrunc double %b to half
+ %r = call half @llvm.copysign.f16(half %a, half %tb)
+ ret half %r
+}
+
+; Check that the FP promotion will use a truncating FP_ROUND, so we can fold
+; away the (fpext (fp_round <result>)) here.
+
+; CHECK-LABEL: test_copysign_extended:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+define float @test_copysign_extended(half %a, half %b) #0 {
+ %r = call half @llvm.copysign.f16(half %a, half %b)
+ %xr = fpext half %r to float
+ ret float %xr
+}
+
+; CHECK-LABEL: test_floor:
+; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
+; CHECK-NEXT: frintm [[INT32:s[0-9]+]], [[FLOAT32]]
+; CHECK-NEXT: fcvt h0, [[INT32]]
; CHECK-NEXT: ret
define half @test_floor(half %a) #0 {
%r = call half @llvm.floor.f16(half %a)
@@ -695,10 +755,9 @@ define half @test_floor(half %a) #0 {
}
; CHECK-LABEL: test_ceil:
-; CHECK-NEXT: fcvt s1, h0
-; CHECK-NEXT: frintp s0, s1
-; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
+; CHECK-NEXT: frintp [[INT32:s[0-9]+]], [[FLOAT32]]
+; CHECK-NEXT: fcvt h0, [[INT32]]
; CHECK-NEXT: ret
define half @test_ceil(half %a) #0 {
%r = call half @llvm.ceil.f16(half %a)
@@ -706,10 +765,9 @@ define half @test_ceil(half %a) #0 {
}
; CHECK-LABEL: test_trunc:
-; CHECK-NEXT: fcvt s1, h0
-; CHECK-NEXT: frintz s0, s1
-; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
+; CHECK-NEXT: frintz [[INT32:s[0-9]+]], [[FLOAT32]]
+; CHECK-NEXT: fcvt h0, [[INT32]]
; CHECK-NEXT: ret
define half @test_trunc(half %a) #0 {
%r = call half @llvm.trunc.f16(half %a)
@@ -737,10 +795,9 @@ define half @test_nearbyint(half %a) #0 {
}
; CHECK-LABEL: test_round:
-; CHECK-NEXT: fcvt s1, h0
-; CHECK-NEXT: frinta s0, s1
-; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
+; CHECK-NEXT: frinta [[INT32:s[0-9]+]], [[FLOAT32]]
+; CHECK-NEXT: fcvt h0, [[INT32]]
; CHECK-NEXT: ret
define half @test_round(half %a) #0 {
%r = call half @llvm.round.f16(half %a)
diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll
new file mode 100644
index 000000000000..55fbf63319ee
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -O0 -fast-isel -fast-isel-abort=0 -verify-machineinstrs < %s | FileCheck %s
+
+define void @test(i64 %a, i64 %b, i2* %c) {
+; CHECK-LABEL: test
+; CHECK: and [[REG1:w[0-9]+]], w8, #0x3
+; CHECK-NEXT: strb [[REG1]], {{\[}}x2{{\]}}
+; CHECK-NEXT: tbz w9, #0,
+ %1 = trunc i64 %a to i2
+ %2 = trunc i64 %b to i1
+; Force fast-isel to fall back to SDAG.
+ store i2 %1, i2* %c, align 8
+ br i1 %2, label %bb1, label %bb2
+
+bb1:
+ ret void
+
+bb2:
+ ret void
+}
diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
index da6ddbf5101e..e04a62b85c8e 100644
--- a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
+++ b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
@@ -1,6 +1,6 @@
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
-; CHECK-label: test_or
+; CHECK-LABEL: test_or
; CHECK: cbnz w0, {{LBB[0-9]+_2}}
; CHECK: cbz w1, {{LBB[0-9]+_1}}
define i64 @test_or(i32 %a, i32 %b) {
@@ -18,7 +18,7 @@ bb4:
ret i64 %2
}
-; CHECK-label: test_ans
+; CHECK-LABEL: test_and
; CHECK: cbz w0, {{LBB[0-9]+_2}}
; CHECK: cbnz w1, {{LBB[0-9]+_3}}
define i64 @test_and(i32 %a, i32 %b) {
@@ -36,7 +36,55 @@ bb4:
ret i64 %2
}
+; If the branch is unpredictable, don't add another branch.
+
+; CHECK-LABEL: test_or_unpredictable
+; CHECK: cmp w0, #0
+; CHECK-NEXT: cset w8, eq
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: cset w9, eq
+; CHECK-NEXT: orr w8, w8, w9
+; CHECK-NEXT: tbnz w8, #0,
+define i64 @test_or_unpredictable(i32 %a, i32 %b) {
+bb1:
+ %0 = icmp eq i32 %a, 0
+ %1 = icmp eq i32 %b, 0
+ %or.cond = or i1 %0, %1
+ br i1 %or.cond, label %bb3, label %bb4, !unpredictable !2
+
+bb3:
+ ret i64 0
+
+bb4:
+ %2 = call i64 @bar()
+ ret i64 %2
+}
+
+; CHECK-LABEL: test_and_unpredictable
+; CHECK: cmp w0, #0
+; CHECK-NEXT: cset w8, ne
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: cset w9, ne
+; CHECK-NEXT: and w8, w8, w9
+; CHECK-NEXT: tbz w8, #0,
+define i64 @test_and_unpredictable(i32 %a, i32 %b) {
+bb1:
+ %0 = icmp ne i32 %a, 0
+ %1 = icmp ne i32 %b, 0
+ %or.cond = and i1 %0, %1
+ br i1 %or.cond, label %bb4, label %bb3, !unpredictable !2
+
+bb3:
+ ret i64 0
+
+bb4:
+ %2 = call i64 @bar()
+ ret i64 %2
+}
+
declare i64 @bar()
!0 = !{!"branch_weights", i32 5128, i32 32}
!1 = !{!"branch_weights", i32 1024, i32 4136}
+!2 = !{}
+
diff --git a/test/CodeGen/AArch64/fast-isel-cmp-vec.ll b/test/CodeGen/AArch64/fast-isel-cmp-vec.ll
new file mode 100644
index 000000000000..2855419a1ca0
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-cmp-vec.ll
@@ -0,0 +1,100 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs \
+; RUN: -aarch64-atomic-cfg-tidy=0 -disable-cgp -disable-branch-fold \
+; RUN: < %s | FileCheck %s
+
+;
+; Verify that we don't mess up vector comparisons in fast-isel.
+;
+
+define <2 x i32> @icmp_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: icmp_v2i32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: cmeq.2s [[CMP:v[0-9]+]], v0, #0
+; CHECK-NEXT: ; BB#1:
+; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT: and.8b v0, [[CMP]], [[MASK]]
+; CHECK-NEXT: ret
+ %c = icmp eq <2 x i32> %a, zeroinitializer
+ br label %bb2
+bb2:
+ %z = zext <2 x i1> %c to <2 x i32>
+ ret <2 x i32> %z
+}
+
+define <2 x i32> @icmp_constfold_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: icmp_constfold_v2i32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: movi d[[CMP:[0-9]+]], #0xffffffffffffffff
+; CHECK-NEXT: ; BB#1:
+; CHECK-NEXT: movi.2s [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT: and.8b v0, v[[CMP]], [[MASK]]
+; CHECK-NEXT: ret
+ %1 = icmp eq <2 x i32> %a, %a
+ br label %bb2
+bb2:
+ %2 = zext <2 x i1> %1 to <2 x i32>
+ ret <2 x i32> %2
+}
+
+define <4 x i32> @icmp_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: icmp_v4i32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: cmeq.4s [[CMP:v[0-9]+]], v0, #0
+; CHECK-NEXT: xtn.4h [[CMPV4I16:v[0-9]+]], [[CMP]]
+; CHECK-NEXT: ; BB#1:
+; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT: and.8b [[ZEXT:v[0-9]+]], [[CMPV4I16]], [[MASK]]
+; CHECK-NEXT: ushll.4s v0, [[ZEXT]], #0
+; CHECK-NEXT: ret
+ %c = icmp eq <4 x i32> %a, zeroinitializer
+ br label %bb2
+bb2:
+ %z = zext <4 x i1> %c to <4 x i32>
+ ret <4 x i32> %z
+}
+
+define <4 x i32> @icmp_constfold_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: icmp_constfold_v4i32:
+; CHECK: ; BB#0:
+; CHECK-NEXT: movi d[[CMP:[0-9]+]], #0xffffffffffffffff
+; CHECK-NEXT: ; BB#1:
+; CHECK-NEXT: movi.4h [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT: and.8b [[ZEXT:v[0-9]+]], v[[CMP]], [[MASK]]
+; CHECK-NEXT: ushll.4s v0, [[ZEXT]], #0
+; CHECK-NEXT: ret
+ %1 = icmp eq <4 x i32> %a, %a
+ br label %bb2
+bb2:
+ %2 = zext <4 x i1> %1 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <16 x i8> @icmp_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: icmp_v16i8:
+; CHECK: ; BB#0:
+; CHECK-NEXT: cmeq.16b [[CMP:v[0-9]+]], v0, #0
+; CHECK-NEXT: ; BB#1:
+; CHECK-NEXT: movi.16b [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT: and.16b v0, [[CMP]], [[MASK]]
+; CHECK-NEXT: ret
+ %c = icmp eq <16 x i8> %a, zeroinitializer
+ br label %bb2
+bb2:
+ %z = zext <16 x i1> %c to <16 x i8>
+ ret <16 x i8> %z
+}
+
+define <16 x i8> @icmp_constfold_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: icmp_constfold_v16i8:
+; CHECK: ; BB#0:
+; CHECK-NEXT: movi.2d [[CMP:v[0-9]+]], #0xffffffffffffffff
+; CHECK-NEXT: ; BB#1:
+; CHECK-NEXT: movi.16b [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT: and.16b v0, [[CMP]], [[MASK]]
+; CHECK-NEXT: ret
+ %1 = icmp eq <16 x i8> %a, %a
+ br label %bb2
+bb2:
+ %2 = zext <16 x i1> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
diff --git a/test/CodeGen/AArch64/fast-isel-folded-shift.ll b/test/CodeGen/AArch64/fast-isel-folded-shift.ll
new file mode 100644
index 000000000000..b881ef5c6d52
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-folded-shift.ll
@@ -0,0 +1,125 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=1 -verify-machineinstrs < %s | FileCheck %s
+
+; Test invalid shift values. This will fall-back to SDAG.
+; AND
+define zeroext i8 @and_rs_i8(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: and_rs_i8
+; CHECK: and [[REG:w[0-9]+]], w0, w8
+; CHECK-NEXT: and {{w[0-9]+}}, [[REG]], #0xff
+ %1 = shl i8 %b, 8
+ %2 = and i8 %a, %1
+ ret i8 %2
+}
+
+define zeroext i16 @and_rs_i16(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: and_rs_i16
+; CHECK: and [[REG:w[0-9]+]], w0, w8
+; CHECK-NEXT: and {{w[0-9]+}}, [[REG]], #0xffff
+ %1 = shl i16 %b, 16
+ %2 = and i16 %a, %1
+ ret i16 %2
+}
+
+define i32 @and_rs_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: and_rs_i32
+; CHECK: and w0, w0, w8
+ %1 = shl i32 %b, 32
+ %2 = and i32 %a, %1
+ ret i32 %2
+}
+
+define i64 @and_rs_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: and_rs_i64
+; CHECK: and x0, x0, x8
+ %1 = shl i64 %b, 64
+ %2 = and i64 %a, %1
+ ret i64 %2
+}
+
+; OR
+define zeroext i8 @or_rs_i8(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: or_rs_i8
+; CHECK: orr [[REG:w[0-9]+]], w0, w8
+; CHECK-NEXT: and {{w[0-9]+}}, [[REG]], #0xff
+ %1 = shl i8 %b, 8
+ %2 = or i8 %a, %1
+ ret i8 %2
+}
+
+define zeroext i16 @or_rs_i16(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: or_rs_i16
+; CHECK: orr [[REG:w[0-9]+]], w0, w8
+; CHECK-NEXT: and {{w[0-9]+}}, [[REG]], #0xffff
+ %1 = shl i16 %b, 16
+ %2 = or i16 %a, %1
+ ret i16 %2
+}
+
+define i32 @or_rs_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: or_rs_i32
+; CHECK: orr w0, w0, w8
+ %1 = shl i32 %b, 32
+ %2 = or i32 %a, %1
+ ret i32 %2
+}
+
+define i64 @or_rs_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: or_rs_i64
+; CHECK: orr x0, x0, x8
+ %1 = shl i64 %b, 64
+ %2 = or i64 %a, %1
+ ret i64 %2
+}
+
+; XOR
+define zeroext i8 @xor_rs_i8(i8 %a, i8 %b) {
+; CHECK-LABEL: xor_rs_i8
+; CHECK: eor [[REG:w[0-9]+]], w0, w8
+; CHECK-NEXT: and {{w[0-9]+}}, [[REG]], #0xff
+ %1 = shl i8 %b, 8
+ %2 = xor i8 %a, %1
+ ret i8 %2
+}
+
+define zeroext i16 @xor_rs_i16(i16 %a, i16 %b) {
+; CHECK-LABEL: xor_rs_i16
+; CHECK: eor [[REG:w[0-9]+]], w0, w8
+; CHECK-NEXT: and {{w[0-9]+}}, [[REG]], #0xffff
+ %1 = shl i16 %b, 16
+ %2 = xor i16 %a, %1
+ ret i16 %2
+}
+
+define i32 @xor_rs_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: xor_rs_i32
+; CHECK: eor w0, w0, w8
+ %1 = shl i32 %b, 32
+ %2 = xor i32 %a, %1
+ ret i32 %2
+}
+
+define i64 @xor_rs_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: xor_rs_i64
+; CHECK: eor x0, x0, x8
+ %1 = shl i64 %b, 64
+ %2 = xor i64 %a, %1
+ ret i64 %2
+}
+
+;ADD
+define i32 @add_rs_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: add_rs_i32
+; CHECK: add w0, w0, w8
+ %1 = shl i32 %b, 32
+ %2 = add i32 %a, %1
+ ret i32 %2
+}
+
+define i64 @add_rs_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: add_rs_i64
+; CHECK: add x0, x0, x8
+ %1 = shl i64 %b, 64
+ %2 = add i64 %a, %1
+ ret i64 %2
+}
+
diff --git a/test/CodeGen/AArch64/fast-isel-logic-op.ll b/test/CodeGen/AArch64/fast-isel-logic-op.ll
index 89c5f2c48024..16d0429fe98d 100644
--- a/test/CodeGen/AArch64/fast-isel-logic-op.ll
+++ b/test/CodeGen/AArch64/fast-isel-logic-op.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=1 -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
; AND
diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll
index a392619a768d..b5e03f08280f 100644
--- a/test/CodeGen/AArch64/fastcc-reserved.ll
+++ b/test/CodeGen/AArch64/fastcc-reserved.ll
@@ -16,7 +16,7 @@ define fastcc void @foo(i32 %in) {
; CHECK: mov x29, sp
; Reserve space for call-frame:
-; CHECK: sub sp, sp, #16
+; CHECK: str w{{[0-9]+}}, [sp, #-16]!
call fastcc void @will_pop([8 x i32] undef, i32 42)
; CHECK: bl will_pop
@@ -42,7 +42,7 @@ define void @foo1(i32 %in) {
; CHECK: mov x29, sp
; Reserve space for call-frame
-; CHECK: sub sp, sp, #16
+; CHECK: str w{{[0-9]+}}, [sp, #-16]!
call void @wont_pop([8 x i32] undef, i32 42)
; CHECK: bl wont_pop
diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll
index 9917fcd044fd..f021eb232618 100644
--- a/test/CodeGen/AArch64/fastcc.ll
+++ b/test/CodeGen/AArch64/fastcc.ll
@@ -7,12 +7,12 @@
define fastcc void @func_stack0() {
; CHECK-LABEL: func_stack0:
; CHECK: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #32
+; CHECK: str w{{[0-9]+}}, [sp, #-32]!
; CHECK-TAIL-LABEL: func_stack0:
; CHECK-TAIL: stp x29, x30, [sp, #-16]!
; CHECK-TAIL-NEXT: mov x29, sp
-; CHECK-TAIL-NEXT: sub sp, sp, #32
+; CHECK-TAIL: str w{{[0-9]+}}, [sp, #-32]!
call fastcc void @func_stack8([8 x i32] undef, i32 42)
@@ -55,13 +55,13 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) {
; CHECK-LABEL: func_stack8:
; CHECK: stp x29, x30, [sp, #-16]!
; CHECK: mov x29, sp
-; CHECK: sub sp, sp, #32
+; CHECK: str w{{[0-9]+}}, [sp, #-32]!
; CHECK-TAIL-LABEL: func_stack8:
; CHECK-TAIL: stp x29, x30, [sp, #-16]!
; CHECK-TAIL: mov x29, sp
-; CHECK-TAIL: sub sp, sp, #32
+; CHECK-TAIL: str w{{[0-9]+}}, [sp, #-32]!
call fastcc void @func_stack8([8 x i32] undef, i32 42)
diff --git a/test/CodeGen/AArch64/fcvt_combine.ll b/test/CodeGen/AArch64/fcvt_combine.ll
new file mode 100644
index 000000000000..093ce4a4cd85
--- /dev/null
+++ b/test/CodeGen/AArch64/fcvt_combine.ll
@@ -0,0 +1,154 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s
+
+; CHECK-LABEL: test1
+; CHECK-NOT: fmul.2s
+; CHECK: fcvtzs.2s v0, v0, #4
+; CHECK: ret
+define <2 x i32> @test1(<2 x float> %f) {
+ %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
+ %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+ ret <2 x i32> %vcvt.i
+}
+
+; CHECK-LABEL: test2
+; CHECK-NOT: fmul.4s
+; CHECK: fcvtzs.4s v0, v0, #3
+; CHECK: ret
+define <4 x i32> @test2(<4 x float> %f) {
+ %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+ %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32>
+ ret <4 x i32> %vcvt.i
+}
+
+; CHECK-LABEL: test3
+; CHECK-NOT: fmul.2d
+; CHECK: fcvtzs.2d v0, v0, #5
+; CHECK: ret
+define <2 x i64> @test3(<2 x double> %d) {
+ %mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00>
+ %vcvt.i = fptosi <2 x double> %mul.i to <2 x i64>
+ ret <2 x i64> %vcvt.i
+}
+
+; Truncate double to i32
+; CHECK-LABEL: test4
+; CHECK-NOT: fmul.2d v0, v0, #4
+; CHECK: fcvtzs.2d v0, v0
+; CHECK: xtn.2s
+; CHECK: ret
+define <2 x i32> @test4(<2 x double> %d) {
+ %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
+ %vcvt.i = fptosi <2 x double> %mul.i to <2 x i32>
+ ret <2 x i32> %vcvt.i
+}
+
+; Truncate float to i16
+; CHECK-LABEL: test5
+; CHECK-NOT: fmul.2s
+; CHECK: fcvtzs.2s v0, v0, #4
+; CHECK: ret
+define <2 x i16> @test5(<2 x float> %f) {
+ %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
+ %vcvt.i = fptosi <2 x float> %mul.i to <2 x i16>
+ ret <2 x i16> %vcvt.i
+}
+
+; Don't convert float to i64
+; CHECK-LABEL: test6
+; CHECK: fmov.2s v1, #16.00000000
+; CHECK: fmul.2s v0, v0, v1
+; CHECK: fcvtl v0.2d, v0.2s
+; CHECK: fcvtzs.2d v0, v0
+; CHECK: ret
+define <2 x i64> @test6(<2 x float> %f) {
+ %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
+ %vcvt.i = fptosi <2 x float> %mul.i to <2 x i64>
+ ret <2 x i64> %vcvt.i
+}
+
+; Check unsigned conversion.
+; CHECK-LABEL: test7
+; CHECK-NOT: fmul.2s
+; CHECK: fcvtzu.2s v0, v0, #4
+; CHECK: ret
+define <2 x i32> @test7(<2 x float> %f) {
+ %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
+ %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
+ ret <2 x i32> %vcvt.i
+}
+
+; Test which should not fold due to non-power of 2.
+; CHECK-LABEL: test8
+; CHECK: fmov.2s v1, #17.00000000
+; CHECK: fmul.2s v0, v0, v1
+; CHECK: fcvtzu.2s v0, v0
+; CHECK: ret
+define <2 x i32> @test8(<2 x float> %f) {
+ %mul.i = fmul <2 x float> %f, <float 17.000000e+00, float 17.000000e+00>
+ %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
+ ret <2 x i32> %vcvt.i
+}
+
+; Test which should not fold due to non-matching power of 2.
+; CHECK-LABEL: test9
+; CHECK: fmul.2s v0, v0, v1
+; CHECK: fcvtzu.2s v0, v0
+; CHECK: ret
+define <2 x i32> @test9(<2 x float> %f) {
+ %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00>
+ %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
+ ret <2 x i32> %vcvt.i
+}
+
+; Don't combine all undefs.
+; CHECK-LABEL: test10
+; CHECK: fmul.2s v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; CHECK: fcvtzu.2s v{{[0-9]+}}, v{{[0-9]+}}
+; CHECK: ret
+define <2 x i32> @test10(<2 x float> %f) {
+ %mul.i = fmul <2 x float> %f, <float undef, float undef>
+ %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
+ ret <2 x i32> %vcvt.i
+}
+
+; Combine if mix of undef and pow2.
+; CHECK-LABEL: test11
+; CHECK: fcvtzu.2s v0, v0, #3
+; CHECK: ret
+define <2 x i32> @test11(<2 x float> %f) {
+ %mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00>
+ %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
+ ret <2 x i32> %vcvt.i
+}
+
+; Don't combine when multiplied by 0.0.
+; CHECK-LABEL: test12
+; CHECK: fmul.2s v0, v0, v1
+; CHECK: fcvtzs.2s v0, v0
+; CHECK: ret
+define <2 x i32> @test12(<2 x float> %f) {
+ %mul.i = fmul <2 x float> %f, <float 0.000000e+00, float 0.000000e+00>
+ %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+ ret <2 x i32> %vcvt.i
+}
+
+; Test which should not fold due to power of 2 out of range (i.e., 2^33).
+; CHECK-LABEL: test13
+; CHECK: fmul.2s v0, v0, v1
+; CHECK: fcvtzs.2s v0, v0
+; CHECK: ret
+define <2 x i32> @test13(<2 x float> %f) {
+ %mul.i = fmul <2 x float> %f, <float 0x4200000000000000, float 0x4200000000000000>
+ %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+ ret <2 x i32> %vcvt.i
+}
+
+; Test case where const is max power of 2 (i.e., 2^32).
+; CHECK-LABEL: test14
+; CHECK: fcvtzs.2s v0, v0, #32
+; CHECK: ret
+define <2 x i32> @test14(<2 x float> %f) {
+ %mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000>
+ %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+ ret <2 x i32> %vcvt.i
+}
diff --git a/test/CodeGen/AArch64/fdiv_combine.ll b/test/CodeGen/AArch64/fdiv_combine.ll
new file mode 100644
index 000000000000..6f38a267ec3f
--- /dev/null
+++ b/test/CodeGen/AArch64/fdiv_combine.ll
@@ -0,0 +1,115 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s
+
+; Test signed conversion.
+; CHECK-LABEL: @test1
+; CHECK: scvtf.2s v0, v0, #4
+; CHECK: ret
+define <2 x float> @test1(<2 x i32> %in) {
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 16.0, float 16.0>
+ ret <2 x float> %div.i
+}
+
+; Test unsigned conversion.
+; CHECK-LABEL: @test2
+; CHECK: ucvtf.2s v0, v0, #3
+; CHECK: ret
+define <2 x float> @test2(<2 x i32> %in) {
+entry:
+ %vcvt.i = uitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 8.0, float 8.0>
+ ret <2 x float> %div.i
+}
+
+; Test which should not fold due to non-power of 2.
+; CHECK-LABEL: @test3
+; CHECK: scvtf.2s v0, v0
+; CHECK: fmov.2s v1, #9.00000000
+; CHECK: fdiv.2s v0, v0, v1
+; CHECK: ret
+define <2 x float> @test3(<2 x i32> %in) {
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 9.0, float 9.0>
+ ret <2 x float> %div.i
+}
+
+; Test which should not fold due to power of 2 out of range.
+; CHECK-LABEL: @test4
+; CHECK: scvtf.2s v0, v0
+; CHECK: movi.2s v1, #0x50, lsl #24
+; CHECK: fdiv.2s v0, v0, v1
+; CHECK: ret
+define <2 x float> @test4(<2 x i32> %in) {
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000>
+ ret <2 x float> %div.i
+}
+
+; Test case where const is max power of 2 (i.e., 2^32).
+; CHECK-LABEL: @test5
+; CHECK: scvtf.2s v0, v0, #32
+; CHECK: ret
+define <2 x float> @test5(<2 x i32> %in) {
+entry:
+ %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+ %div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000>
+ ret <2 x float> %div.i
+}
+
+; Test quadword.
+; CHECK-LABEL: @test6
+; CHECK: scvtf.4s v0, v0, #2
+; CHECK: ret
+define <4 x float> @test6(<4 x i32> %in) {
+entry:
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %div.i = fdiv <4 x float> %vcvt.i, <float 4.0, float 4.0, float 4.0, float 4.0>
+ ret <4 x float> %div.i
+}
+
+; Test unsigned i16 to float
+; CHECK-LABEL: @test7
+; CHECK: ushll.4s v0, v0, #0
+; CHECK: ucvtf.4s v0, v0, #1
+; CHECK: ret
+define <4 x float> @test7(<4 x i16> %in) {
+ %conv = uitofp <4 x i16> %in to <4 x float>
+ %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
+ ret <4 x float> %shift
+}
+
+; Test signed i16 to float
+; CHECK-LABEL: @test8
+; CHECK: sshll.4s v0, v0, #0
+; CHECK: scvtf.4s v0, v0, #2
+; CHECK: ret
+define <4 x float> @test8(<4 x i16> %in) {
+ %conv = sitofp <4 x i16> %in to <4 x float>
+ %shift = fdiv <4 x float> %conv, <float 4.0, float 4.0, float 4.0, float 4.0>
+ ret <4 x float> %shift
+}
+
+; Can't convert i64 to float.
+; CHECK-LABEL: @test9
+; CHECK: ucvtf.2d v0, v0
+; CHECK: fcvtn v0.2s, v0.2d
+; CHECK: movi.2s v1, #0x40, lsl #24
+; CHECK: fdiv.2s v0, v0, v1
+; CHECK: ret
+define <2 x float> @test9(<2 x i64> %in) {
+ %conv = uitofp <2 x i64> %in to <2 x float>
+ %shift = fdiv <2 x float> %conv, <float 2.0, float 2.0>
+ ret <2 x float> %shift
+}
+
+; CHECK-LABEL: @test10
+; CHECK: ucvtf.2d v0, v0, #1
+; CHECK: ret
+define <2 x double> @test10(<2 x i64> %in) {
+ %conv = uitofp <2 x i64> %in to <2 x double>
+ %shift = fdiv <2 x double> %conv, <double 2.0, double 2.0>
+ ret <2 x double> %shift
+}
diff --git a/test/CodeGen/AArch64/fold-constants.ll b/test/CodeGen/AArch64/fold-constants.ll
index 2dd0d1245930..c0fec4d171cd 100644
--- a/test/CodeGen/AArch64/fold-constants.ll
+++ b/test/CodeGen/AArch64/fold-constants.ll
@@ -3,9 +3,6 @@
define i64 @dotests_616() {
; CHECK-LABEL: dotests_616
; CHECK: movi d0, #0000000000000000
-; CHECK-NEXT: umov w8, v0.b[2]
-; CHECK-NEXT: sbfx w8, w8, #0, #1
-; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
entry:
@@ -19,3 +16,19 @@ entry:
%vget_lane = extractelement <1 x i64> %4, i32 0
ret i64 %vget_lane
}
+
+; PR25763 - folding constant vector comparisons with sign-extended result
+define <8 x i16> @dotests_458() {
+; CHECK-LABEL: dotests_458
+; CHECK: movi d0, #0x00000000ff0000
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: ret
+entry:
+ %vclz_v.i = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> <i8 127, i8 38, i8 -1, i8 -128, i8 127, i8 0, i8 0, i8 0>, i1 false) #6
+ %vsra_n = lshr <8 x i8> %vclz_v.i, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+ %name_6 = or <8 x i8> %vsra_n, <i8 127, i8 -128, i8 -1, i8 67, i8 84, i8 127, i8 -1, i8 0>
+ %cmp.i603 = icmp slt <8 x i8> %name_6, <i8 -57, i8 -128, i8 127, i8 -128, i8 -1, i8 0, i8 -1, i8 -1>
+ %vmovl.i4.i = sext <8 x i1> %cmp.i603 to <8 x i16>
+ ret <8 x i16> %vmovl.i4.i
+}
+declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1)
diff --git a/test/CodeGen/AArch64/fp16-v4-instructions.ll b/test/CodeGen/AArch64/fp16-v4-instructions.ll
index 0dbda152fca9..f6e4bdf73459 100644
--- a/test/CodeGen/AArch64/fp16-v4-instructions.ll
+++ b/test/CodeGen/AArch64/fp16-v4-instructions.ll
@@ -130,7 +130,6 @@ define <4 x i16> @bitcast_h_to_i(float, <4 x half> %a) {
ret <4 x i16> %2
}
-
define <4 x half> @sitofp_i8(<4 x i8> %a) #0 {
; CHECK-LABEL: sitofp_i8:
; CHECK-NEXT: shl [[OP1:v[0-9]+\.4h]], v0.4h, #8
@@ -218,4 +217,54 @@ define <4 x half> @uitofp_i64(<4 x i64> %a) #0 {
ret <4 x half> %1
}
+define void @test_insert_at_zero(half %a, <4 x half>* %b) #0 {
+; CHECK-LABEL: test_insert_at_zero:
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+ %1 = insertelement <4 x half> undef, half %a, i64 0
+ store <4 x half> %1, <4 x half>* %b, align 4
+ ret void
+}
+
+define <4 x i8> @fptosi_i8(<4 x half> %a) #0 {
+; CHECK-LABEL: fptosi_i8:
+; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h
+; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]]
+; CHECK-NEXT: xtn v0.4h, [[REG2]]
+; CHECK-NEXT: ret
+ %1 = fptosi<4 x half> %a to <4 x i8>
+ ret <4 x i8> %1
+}
+
+define <4 x i16> @fptosi_i16(<4 x half> %a) #0 {
+; CHECK-LABEL: fptosi_i16:
+; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h
+; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]]
+; CHECK-NEXT: xtn v0.4h, [[REG2]]
+; CHECK-NEXT: ret
+ %1 = fptosi<4 x half> %a to <4 x i16>
+ ret <4 x i16> %1
+}
+
+define <4 x i8> @fptoui_i8(<4 x half> %a) #0 {
+; CHECK-LABEL: fptoui_i8:
+; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h
+; NOTE: fcvtzs selected here because the xtn shaves the sign bit
+; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]]
+; CHECK-NEXT: xtn v0.4h, [[REG2]]
+; CHECK-NEXT: ret
+ %1 = fptoui<4 x half> %a to <4 x i8>
+ ret <4 x i8> %1
+}
+
+define <4 x i16> @fptoui_i16(<4 x half> %a) #0 {
+; CHECK-LABEL: fptoui_i16:
+; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h
+; CHECK-NEXT: fcvtzu [[REG2:v[0-9]+\.4s]], [[REG1]]
+; CHECK-NEXT: xtn v0.4h, [[REG2]]
+; CHECK-NEXT: ret
+ %1 = fptoui<4 x half> %a to <4 x i16>
+ ret <4 x i16> %1
+}
+
attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/fp16-v8-instructions.ll b/test/CodeGen/AArch64/fp16-v8-instructions.ll
index 10a8c22d6f7e..137d1f358a30 100644
--- a/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -358,4 +358,67 @@ define <8 x half> @uitofp_i64(<8 x i64> %a) #0 {
ret <8 x half> %1
}
+define void @test_insert_at_zero(half %a, <8 x half>* %b) #0 {
+; CHECK-LABEL: test_insert_at_zero:
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
+ %1 = insertelement <8 x half> undef, half %a, i64 0
+ store <8 x half> %1, <8 x half>* %b, align 4
+ ret void
+}
+
+define <8 x i8> @fptosi_i8(<8 x half> %a) #0 {
+; CHECK-LABEL: fptosi_i8:
+; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
+; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
+; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]]
+; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
+; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]]
+; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]]
+; CHECK-NEXT: xtn v0.8b, [[I16]].8h
+; CHECK-NEXT: ret
+ %1 = fptosi<8 x half> %a to <8 x i8>
+ ret <8 x i8> %1
+}
+
+define <8 x i16> @fptosi_i16(<8 x half> %a) #0 {
+; CHECK-LABEL: fptosi_i16:
+; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
+; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
+; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]]
+; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
+; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]]
+; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]]
+; CHECK-NEXT: ret
+ %1 = fptosi<8 x half> %a to <8 x i16>
+ ret <8 x i16> %1
+}
+
+define <8 x i8> @fptoui_i8(<8 x half> %a) #0 {
+; CHECK-LABEL: fptoui_i8:
+; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
+; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
+; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]]
+; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
+; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]]
+; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]]
+; CHECK-NEXT: xtn v0.8b, [[I16]].8h
+; CHECK-NEXT: ret
+ %1 = fptoui<8 x half> %a to <8 x i8>
+ ret <8 x i8> %1
+}
+
+define <8 x i16> @fptoui_i16(<8 x half> %a) #0 {
+; CHECK-LABEL: fptoui_i16:
+; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h
+; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h
+; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]]
+; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]]
+; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]]
+; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]]
+; CHECK-NEXT: ret
+ %1 = fptoui<8 x half> %a to <8 x i16>
+ ret <8 x i16> %1
+}
+
attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/free-zext.ll b/test/CodeGen/AArch64/free-zext.ll
index cff11f85bda4..ea4f1f4e10f3 100644
--- a/test/CodeGen/AArch64/free-zext.ll
+++ b/test/CodeGen/AArch64/free-zext.ll
@@ -1,7 +1,7 @@
; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
define i64 @test_free_zext(i8* %a, i16* %b) {
-; CHECK-LABEL: test_free_zext
+; CHECK-LABEL: test_free_zext:
; CHECK-DAG: ldrb w[[A:[0-9]+]], [x0]
; CHECK: ldrh w[[B:[0-9]+]], [x1]
; CHECK: add x0, x[[B]], x[[A]]
@@ -12,3 +12,60 @@ define i64 @test_free_zext(i8* %a, i16* %b) {
%add = add nsw i64 %conv1, %conv
ret i64 %add
}
+
+define void @test_free_zext2(i32* %ptr, i32* %dst1, i64* %dst2) {
+; CHECK-LABEL: test_free_zext2:
+; CHECK: ldrh w[[A:[0-9]+]], [x0]
+; CHECK-NOT: and x
+; CHECK: str w[[A]], [x1]
+; CHECK: str x[[A]], [x2]
+ %load = load i32, i32* %ptr, align 8
+ %load16 = and i32 %load, 65535
+ %load64 = zext i32 %load16 to i64
+ store i32 %load16, i32* %dst1, align 4
+ store i64 %load64, i64* %dst2, align 8
+ ret void
+}
+
+; Test for CodeGenPrepare::optimizeLoadExt(): simple case: two loads
+; feeding a phi that zext's each loaded value.
+define i32 @test_free_zext3(i32* %ptr, i32* %ptr2, i32* %dst, i32 %c) {
+; CHECK-LABEL: test_free_zext3:
+bb1:
+; CHECK: ldrh [[REG:w[0-9]+]]
+; CHECK-NOT: and {{w[0-9]+}}, [[REG]], #0xffff
+ %tmp1 = load i32, i32* %ptr, align 4
+ %cmp = icmp ne i32 %c, 0
+ br i1 %cmp, label %bb2, label %bb3
+bb2:
+; CHECK: ldrh [[REG2:w[0-9]+]]
+; CHECK-NOT: and {{w[0-9]+}}, [[REG2]], #0xffff
+ %tmp2 = load i32, i32* %ptr2, align 4
+ br label %bb3
+bb3:
+ %tmp3 = phi i32 [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+; CHECK-NOT: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
+ %tmpand = and i32 %tmp3, 65535
+ ret i32 %tmpand
+}
+
+; Test for CodeGenPrepare::optimizeLoadExt(): check case of zext-able
+; load feeding a phi in the same block.
+define void @test_free_zext4(i32* %ptr, i32* %ptr2, i32* %dst) {
+; CHECK-LABEL: test_free_zext4:
+; CHECK: ldrh [[REG:w[0-9]+]]
+; TODO: fix isel to remove final and XCHECK-NOT: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
+; CHECK: ldrh [[REG:w[0-9]+]]
+bb1:
+ %load1 = load i32, i32* %ptr, align 4
+ br label %loop
+loop:
+ %phi = phi i32 [ %load1, %bb1 ], [ %load2, %loop ]
+ %and = and i32 %phi, 65535
+ store i32 %and, i32* %dst, align 4
+ %load2 = load i32, i32* %ptr2, align 4
+ %cmp = icmp ne i32 %and, 0
+ br i1 %cmp, label %loop, label %end
+end:
+ ret void
+}
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
index 9100ae39282b..2ea13e388867 100644
--- a/test/CodeGen/AArch64/func-argpassing.ll
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-post-ra | FileCheck --check-prefix=CHECK %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -disable-post-ra | FileCheck --check-prefix=CHECK-NOFP %s
%myStruct = type { i64 , i8, i32 }
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
index 22a33157fd55..2f45666ba13a 100644
--- a/test/CodeGen/AArch64/func-calls.ll
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -89,11 +89,11 @@ define void @check_stack_args() {
; that varstruct is passed on the stack. Rather dependent on how a
; memcpy gets created, but the following works for now.
-; CHECK-DAG: str {{q[0-9]+}}, [sp]
+; CHECK-DAG: str {{q[0-9]+}}, [sp, #-16]
; CHECK-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
; CHECK: mov v0.16b, v[[FINAL_DOUBLE]].16b
-; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp]
+; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp, #-16]!
; CHECK-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
; CHECK-NONEON: fmov d0, d[[FINAL_DOUBLE]]
diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll
index 657778e34187..5e820b8bb303 100644
--- a/test/CodeGen/AArch64/global-alignment.ll
+++ b/test/CodeGen/AArch64/global-alignment.ll
@@ -3,7 +3,7 @@
@var32 = global [3 x i32] zeroinitializer
@var64 = global [3 x i64] zeroinitializer
@var32_align64 = global [3 x i32] zeroinitializer, align 8
-@alias = alias [3 x i32]* @var32_align64
+@alias = alias [3 x i32], [3 x i32]* @var32_align64
define i64 @test_align32() {
; CHECK-LABEL: test_align32:
diff --git a/test/CodeGen/AArch64/global-merge-1.ll b/test/CodeGen/AArch64/global-merge-1.ll
index 14b04303ffb3..b93f41c07df9 100644
--- a/test/CodeGen/AArch64/global-merge-1.ll
+++ b/test/CodeGen/AArch64/global-merge-1.ll
@@ -12,16 +12,20 @@
define void @f1(i32 %a1, i32 %a2) {
;CHECK-APPLE-IOS-NOT: adrp
-;CHECK-APPLE-IOS: adrp x8, __MergedGlobals@PAGE
+;CHECK-APPLE-IOS: adrp x8, l__MergedGlobals@PAGE
;CHECK-APPLE-IOS-NOT: adrp
-;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals@PAGEOFF
+;CHECK-APPLE-IOS: add x8, x8, l__MergedGlobals@PAGEOFF
store i32 %a1, i32* @m, align 4
store i32 %a2, i32* @n, align 4
ret void
}
-;CHECK: .type _MergedGlobals,@object // @_MergedGlobals
-;CHECK: .local _MergedGlobals
-;CHECK: .comm _MergedGlobals,8,8
+;CHECK: .type .L_MergedGlobals,@object // @_MergedGlobals
+;CHECK: .local .L_MergedGlobals
+;CHECK: .comm .L_MergedGlobals,8,8
+;CHECK: m = .L_MergedGlobals
+;CHECK: n = .L_MergedGlobals+4
-;CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3 ; @_MergedGlobals
+;CHECK-APPLE-IOS: .zerofill __DATA,__bss,l__MergedGlobals,8,3 ; @_MergedGlobals
+;CHECK-APPLE-IOS-NOT: _m = l__MergedGlobals
+;CHECK-APPLE-IOS-NOT: _n = l__MergedGlobals+4
diff --git a/test/CodeGen/AArch64/global-merge-2.ll b/test/CodeGen/AArch64/global-merge-2.ll
index af684039bf10..53bed1d9bc09 100644
--- a/test/CodeGen/AArch64/global-merge-2.ll
+++ b/test/CodeGen/AArch64/global-merge-2.ll
@@ -9,8 +9,8 @@
define void @f1(i32 %a1, i32 %a2) {
;CHECK-APPLE-IOS-LABEL: _f1:
;CHECK-APPLE-IOS-NOT: adrp
-;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE
-;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF
+;CHECK-APPLE-IOS: adrp x8, l__MergedGlobals@PAGE
+;CHECK-APPLE-IOS: add x8, x8, l__MergedGlobals@PAGEOFF
;CHECK-APPLE-IOS-NOT: adrp
store i32 %a1, i32* @x, align 4
store i32 %a2, i32* @y, align 4
@@ -19,34 +19,34 @@ define void @f1(i32 %a1, i32 %a2) {
define void @g1(i32 %a1, i32 %a2) {
;CHECK-APPLE-IOS-LABEL: _g1:
-;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE
-;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF
+;CHECK-APPLE-IOS: adrp x8, l__MergedGlobals@PAGE
+;CHECK-APPLE-IOS: add x8, x8, l__MergedGlobals@PAGEOFF
;CHECK-APPLE-IOS-NOT: adrp
store i32 %a1, i32* @y, align 4
store i32 %a2, i32* @z, align 4
ret void
}
-;CHECK: .type _MergedGlobals_x,@object // @_MergedGlobals_x
-;CHECK: .globl _MergedGlobals_x
-;CHECK: .align 3
-;CHECK: _MergedGlobals_x:
-;CHECK: .size _MergedGlobals_x, 12
+;CHECK: .type .L_MergedGlobals,@object // @_MergedGlobals
+;CHECK: .local .L_MergedGlobals
+;CHECK: .comm .L_MergedGlobals,12,8
;CHECK: .globl x
-;CHECK: x = _MergedGlobals_x
+;CHECK: x = .L_MergedGlobals
+;CHECK: .size x, 4
;CHECK: .globl y
-;CHECK: y = _MergedGlobals_x+4
+;CHECK: y = .L_MergedGlobals+4
+;CHECK: .size y, 4
;CHECK: .globl z
-;CHECK: z = _MergedGlobals_x+8
+;CHECK: z = .L_MergedGlobals+8
+;CHECK: .size z, 4
-;CHECK-APPLE-IOS: .globl __MergedGlobals_x ; @_MergedGlobals_x
-;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_x,12,3
+;CHECK-APPLE-IOS: .zerofill __DATA,__bss,l__MergedGlobals,12,3
;CHECK-APPLE-IOS: .globl _x
-;CHECK-APPLE-IOS: _x = __MergedGlobals_x
+;CHECK-APPLE-IOS: = l__MergedGlobals
;CHECK-APPLE-IOS: .globl _y
-;CHECK-APPLE-IOS: _y = __MergedGlobals_x+4
+;CHECK-APPLE-IOS: _y = l__MergedGlobals+4
;CHECK-APPLE-IOS: .globl _z
-;CHECK-APPLE-IOS: _z = __MergedGlobals_x+8
+;CHECK-APPLE-IOS: _z = l__MergedGlobals+8
;CHECK-APPLE-IOS: .subsections_via_symbols
diff --git a/test/CodeGen/AArch64/global-merge-3.ll b/test/CodeGen/AArch64/global-merge-3.ll
index 925108308e56..6895380ca63e 100644
--- a/test/CodeGen/AArch64/global-merge-3.ll
+++ b/test/CodeGen/AArch64/global-merge-3.ll
@@ -1,17 +1,17 @@
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
@x = global [1000 x i32] zeroinitializer, align 1
@y = global [1000 x i32] zeroinitializer, align 1
@z = internal global i32 1, align 4
define void @f1(i32 %a1, i32 %a2, i32 %a3) {
-;CHECK-APPLE-IOS: adrp x8, __MergedGlobals_x@PAGE
+;CHECK-APPLE-IOS: adrp x8, l__MergedGlobals@PAGE
;CHECK-APPLE-IOS-NOT: adrp
-;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF
-;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE
-;CHECK-APPLE-IOS: add x9, x9, __MergedGlobals_y@PAGEOFF
+;CHECK-APPLE-IOS: add x8, x8, l__MergedGlobals@PAGEOFF
+;CHECK-APPLE-IOS: adrp x9, l__MergedGlobals.1@PAGE
+;CHECK-APPLE-IOS: add x9, x9, l__MergedGlobals.1@PAGEOFF
%x3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @x, i32 0, i64 3
%y3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @y, i32 0, i64 3
store i32 %a1, i32* %x3, align 4
@@ -20,32 +20,32 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) {
ret void
}
-;CHECK: .type _MergedGlobals_x,@object // @_MergedGlobals_x
-;CHECK: .globl _MergedGlobals_x
+;CHECK: .type .L_MergedGlobals,@object // @_MergedGlobals
;CHECK: .align 4
-;CHECK: _MergedGlobals_x:
-;CHECK: .size _MergedGlobals_x, 4004
+;CHECK: .L_MergedGlobals:
+;CHECK: .size .L_MergedGlobals, 4004
-;CHECK: .type _MergedGlobals_y,@object // @_MergedGlobals_y
-;CHECK: .globl _MergedGlobals_y
-;CHECK: _MergedGlobals_y:
-;CHECK: .size _MergedGlobals_y, 4000
+;CHECK: .type .L_MergedGlobals.1,@object // @_MergedGlobals.1
+;CHECK: .local .L_MergedGlobals.1
+;CHECK: .comm .L_MergedGlobals.1,4000,16
-;CHECK-APPLE-IOS: .globl __MergedGlobals_x ; @_MergedGlobals_x
;CHECK-APPLE-IOS: .align 4
-;CHECK-APPLE-IOS: __MergedGlobals_x:
+;CHECK-APPLE-IOS: l__MergedGlobals:
;CHECK-APPLE-IOS: .long 1
;CHECK-APPLE-IOS: .space 4000
-;CHECK-APPLE-IOS: .globl __MergedGlobals_y ; @_MergedGlobals_y
-;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_y,4000,4
+;CHECK-APPLE-IOS: .zerofill __DATA,__bss,l__MergedGlobals.1,4000,4
+;CHECK: z = .L_MergedGlobals
;CHECK: .globl x
-;CHECK: x = _MergedGlobals_x+4
+;CHECK: x = .L_MergedGlobals+4
+;CHECK: .size x, 4000
;CHECK: .globl y
-;CHECK: y = _MergedGlobals_y
+;CHECK: y = .L_MergedGlobals.1
+;CHECK: .size y, 4000
+;CHECK-APPLE-IOS-NOT: _z = l__MergedGlobals
;CHECK-APPLE-IOS:.globl _x
-;CHECK-APPLE-IOS: _x = __MergedGlobals_x+4
+;CHECK-APPLE-IOS: _x = l__MergedGlobals+4
;CHECK-APPLE-IOS:.globl _y
-;CHECK-APPLE-IOS: _y = __MergedGlobals_y
+;CHECK-APPLE-IOS: _y = l__MergedGlobals.1
diff --git a/test/CodeGen/AArch64/global-merge-4.ll b/test/CodeGen/AArch64/global-merge-4.ll
index bc6b68a9c046..a5109f6e8ea5 100644
--- a/test/CodeGen/AArch64/global-merge-4.ll
+++ b/test/CodeGen/AArch64/global-merge-4.ll
@@ -64,9 +64,9 @@ define internal i32* @returnFoo() #1 {
ret i32* getelementptr inbounds ([5 x i32], [5 x i32]* @foo, i64 0, i64 0)
}
-;CHECK: .type _MergedGlobals,@object // @_MergedGlobals
-;CHECK: .local _MergedGlobals
-;CHECK: .comm _MergedGlobals,60,16
+;CHECK: .type .L_MergedGlobals,@object // @_MergedGlobals
+;CHECK: .local .L_MergedGlobals
+;CHECK: .comm .L_MergedGlobals,60,16
attributes #0 = { nounwind ssp }
attributes #1 = { nounwind readnone ssp }
diff --git a/test/CodeGen/AArch64/global-merge-group-by-use.ll b/test/CodeGen/AArch64/global-merge-group-by-use.ll
index ddc044ed9e08..8b3fc97c9e2e 100644
--- a/test/CodeGen/AArch64/global-merge-group-by-use.ll
+++ b/test/CodeGen/AArch64/global-merge-group-by-use.ll
@@ -12,7 +12,7 @@
; CHECK-LABEL: f1:
define void @f1(i32 %a1, i32 %a2) #0 {
-; CHECK-NEXT: adrp x8, [[SET1:__MergedGlobals.[0-9]*]]@PAGE
+; CHECK-NEXT: adrp x8, [[SET1:l__MergedGlobals.[0-9]*]]@PAGE
; CHECK-NEXT: add x8, x8, [[SET1]]@PAGEOFF
; CHECK-NEXT: stp w0, w1, [x8]
; CHECK-NEXT: ret
@@ -27,7 +27,7 @@ define void @f1(i32 %a1, i32 %a2) #0 {
; CHECK-LABEL: f2:
define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 {
-; CHECK-NEXT: adrp x8, [[SET2:__MergedGlobals.[0-9]*]]@PAGE
+; CHECK-NEXT: adrp x8, [[SET2:l__MergedGlobals.[0-9]*]]@PAGE
; CHECK-NEXT: add x8, x8, [[SET2]]@PAGEOFF
; CHECK-NEXT: stp w0, w1, [x8]
; CHECK-NEXT: str w2, [x8, #8]
@@ -48,7 +48,7 @@ define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 {
; CHECK-LABEL: f3:
define void @f3(i32 %a1, i32 %a2) #0 {
; CHECK-NEXT: adrp x8, _m3@PAGE
-; CHECK-NEXT: adrp x9, [[SET3:__MergedGlobals[0-9]*]]@PAGE
+; CHECK-NEXT: adrp x9, [[SET3:l__MergedGlobals[0-9]*]]@PAGE
; CHECK-NEXT: str w0, [x8, _m3@PAGEOFF]
; CHECK-NEXT: str w1, [x9, [[SET3]]@PAGEOFF]
; CHECK-NEXT: ret
diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
index e83cbab140a7..399438925771 100644
--- a/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
+++ b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
@@ -11,7 +11,7 @@
; CHECK-LABEL: f1:
define void @f1(i32 %a1, i32 %a2) minsize nounwind {
-; CHECK-NEXT: adrp x8, [[SET:__MergedGlobals]]@PAGE
+; CHECK-NEXT: adrp x8, [[SET:l__MergedGlobals]]@PAGE
; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
; CHECK-NEXT: stp w0, w1, [x8]
; CHECK-NEXT: ret
diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
index e6de4699132a..c3756a85feff 100644
--- a/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
+++ b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
@@ -10,7 +10,7 @@
; CHECK-LABEL: f1:
define void @f1(i32 %a1, i32 %a2) #0 {
-; CHECK-NEXT: adrp x8, [[SET:__MergedGlobals]]@PAGE
+; CHECK-NEXT: adrp x8, [[SET:l__MergedGlobals]]@PAGE
; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
; CHECK-NEXT: stp w0, w1, [x8]
; CHECK-NEXT: ret
diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll
index b2c11c7517c0..d2133213f186 100644
--- a/test/CodeGen/AArch64/ldst-opt.ll
+++ b/test/CodeGen/AArch64/ldst-opt.ll
@@ -3,11 +3,15 @@
; This file contains tests for the AArch64 load/store optimizer.
%padding = type { i8*, i8*, i8*, i8* }
+%s.byte = type { i8, i8 }
+%s.halfword = type { i16, i16 }
%s.word = type { i32, i32 }
%s.doubleword = type { i64, i32 }
%s.quadword = type { fp128, i32 }
%s.float = type { float, i32 }
%s.double = type { double, i32 }
+%struct.byte = type { %padding, %s.byte }
+%struct.halfword = type { %padding, %s.halfword }
%struct.word = type { %padding, %s.word }
%struct.doubleword = type { %padding, %s.doubleword }
%struct.quadword = type { %padding, %s.quadword }
@@ -24,6 +28,62 @@
;
; with X being either w1, x1, s0, d0 or q0.
+declare void @bar_byte(%s.byte*, i8)
+
+define void @load-pre-indexed-byte(%struct.byte* %ptr) nounwind {
+; CHECK-LABEL: load-pre-indexed-byte
+; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+ %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0
+ %add = load i8, i8* %a, align 4
+ br label %bar
+bar:
+ %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1
+ tail call void @bar_byte(%s.byte* %c, i8 %add)
+ ret void
+}
+
+define void @store-pre-indexed-byte(%struct.byte* %ptr, i8 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-byte
+; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+ %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0
+ store i8 %val, i8* %a, align 4
+ br label %bar
+bar:
+ %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1
+ tail call void @bar_byte(%s.byte* %c, i8 %val)
+ ret void
+}
+
+declare void @bar_halfword(%s.halfword*, i16)
+
+define void @load-pre-indexed-halfword(%struct.halfword* %ptr) nounwind {
+; CHECK-LABEL: load-pre-indexed-halfword
+; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+ %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0
+ %add = load i16, i16* %a, align 4
+ br label %bar
+bar:
+ %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1
+ tail call void @bar_halfword(%s.halfword* %c, i16 %add)
+ ret void
+}
+
+define void @store-pre-indexed-halfword(%struct.halfword* %ptr, i16 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-halfword
+; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+ %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0
+ store i16 %val, i16* %a, align 4
+ br label %bar
+bar:
+ %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1
+ tail call void @bar_halfword(%s.halfword* %c, i16 %val)
+ ret void
+}
+
declare void @bar_word(%s.word*, i32)
define void @load-pre-indexed-word(%struct.word* %ptr) nounwind {
@@ -166,6 +226,48 @@ bar:
; Check the following transform:
;
+; (ldp|stp) w1, w2 [x0, #32]
+; ...
+; add x0, x0, #32
+; ->
+; (ldp|stp) w1, w2, [x0, #32]!
+;
+
+define void @load-pair-pre-indexed-word(%struct.word* %ptr) nounwind {
+; CHECK-LABEL: load-pair-pre-indexed-word
+; CHECK: ldp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]!
+; CHECK-NOT: add x0, x0, #32
+entry:
+ %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
+ %a1 = load i32, i32* %a, align 4
+ %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1
+ %b1 = load i32, i32* %b, align 4
+ %add = add i32 %a1, %b1
+ br label %bar
+bar:
+ %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
+ tail call void @bar_word(%s.word* %c, i32 %add)
+ ret void
+}
+
+define void @store-pair-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind {
+; CHECK-LABEL: store-pair-pre-indexed-word
+; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]!
+; CHECK-NOT: add x0, x0, #32
+entry:
+ %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
+ store i32 %val, i32* %a, align 4
+ %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1
+ store i32 %val, i32* %b, align 4
+ br label %bar
+bar:
+ %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
+ tail call void @bar_word(%s.word* %c, i32 %val)
+ ret void
+}
+
+; Check the following transform:
+;
; add x8, x8, #16
; ...
; ldr X, [x8]
@@ -174,11 +276,11 @@ bar:
;
; with X being either w0, x0, s0, d0 or q0.
-%pre.struct.i32 = type { i32, i32, i32}
-%pre.struct.i64 = type { i32, i64, i64}
-%pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>}
-%pre.struct.float = type { i32, float, float}
-%pre.struct.double = type { i32, double, double}
+%pre.struct.i32 = type { i32, i32, i32, i32, i32}
+%pre.struct.i64 = type { i32, i64, i64, i64, i64}
+%pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>, <2 x i64>}
+%pre.struct.float = type { i32, float, float, float}
+%pre.struct.double = type { i32, double, double, double}
define i32 @load-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond,
%pre.struct.i32* %load2) nounwind {
@@ -270,6 +372,96 @@ return:
ret double %ret
}
+define i32 @load-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond,
+ %pre.struct.i32* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-word3
+; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #12]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
+ %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4
+ br label %return
+return:
+ %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ %ret = load i32, i32* %retptr
+ ret i32 %ret
+}
+
+define i64 @load-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond,
+ %pre.struct.i64* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-doubleword3
+; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #16]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
+ %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 2
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 3
+ br label %return
+return:
+ %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ %ret = load i64, i64* %retptr
+ ret i64 %ret
+}
+
+define <2 x i64> @load-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond,
+ %pre.struct.i128* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-quadword3
+; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
+ %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3
+ br label %return
+return:
+ %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ %ret = load <2 x i64>, <2 x i64>* %retptr
+ ret <2 x i64> %ret
+}
+
+define float @load-pre-indexed-float3(%pre.struct.float** %this, i1 %cond,
+ %pre.struct.float* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-float3
+; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #8]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.float*, %pre.struct.float** %this
+ %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3
+ br label %return
+return:
+ %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ %ret = load float, float* %retptr
+ ret float %ret
+}
+
+define double @load-pre-indexed-double3(%pre.struct.double** %this, i1 %cond,
+ %pre.struct.double* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-double3
+; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #16]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.double*, %pre.struct.double** %this
+ %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3
+ br label %return
+return:
+ %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ %ret = load double, double* %retptr
+ ret double %ret
+}
+
; Check the following transform:
;
; add x8, x8, #16
@@ -375,6 +567,101 @@ return:
ret void
}
+define void @store-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond,
+ %pre.struct.i32* %load2,
+ i32 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-word3
+; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #12]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
+ %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4
+ br label %return
+return:
+ %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ store i32 %val, i32* %retptr
+ ret void
+}
+
+define void @store-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond,
+ %pre.struct.i64* %load2,
+ i64 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-doubleword3
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #24]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
+ %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 3
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 4
+ br label %return
+return:
+ %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ store i64 %val, i64* %retptr
+ ret void
+}
+
+define void @store-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond,
+ %pre.struct.i128* %load2,
+ <2 x i64> %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-quadword3
+; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
+ %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3
+ br label %return
+return:
+ %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ store <2 x i64> %val, <2 x i64>* %retptr
+ ret void
+}
+
+define void @store-pre-indexed-float3(%pre.struct.float** %this, i1 %cond,
+ %pre.struct.float* %load2,
+ float %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-float3
+; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #8]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.float*, %pre.struct.float** %this
+ %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3
+ br label %return
+return:
+ %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ store float %val, float* %retptr
+ ret void
+}
+
+define void @store-pre-indexed-double3(%pre.struct.double** %this, i1 %cond,
+ %pre.struct.double* %load2,
+ double %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-double3
+; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #16]!
+ br i1 %cond, label %if.then, label %if.end
+if.then:
+ %load1 = load %pre.struct.double*, %pre.struct.double** %this
+ %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2
+ br label %return
+if.end:
+ %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3
+ br label %return
+return:
+ %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
+ store double %val, double* %retptr
+ ret void
+}
+
; Check the following transform:
;
; ldr X, [x20]
@@ -385,6 +672,54 @@ return:
;
; with X being either w0, x0, s0, d0 or q0.
+define void @load-post-indexed-byte(i8* %array, i64 %count) nounwind {
+; CHECK-LABEL: load-post-indexed-byte
+; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}], #4
+entry:
+ %gep1 = getelementptr i8, i8* %array, i64 2
+ br label %body
+
+body:
+ %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ]
+ %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+ %gep2 = getelementptr i8, i8* %iv2, i64 -1
+ %load = load i8, i8* %gep2
+ call void @use-byte(i8 %load)
+ %load2 = load i8, i8* %iv2
+ call void @use-byte(i8 %load2)
+ %iv.next = add i64 %iv, -4
+ %gep3 = getelementptr i8, i8* %iv2, i64 4
+ %cond = icmp eq i64 %iv.next, 0
+ br i1 %cond, label %exit, label %body
+
+exit:
+ ret void
+}
+
+define void @load-post-indexed-halfword(i16* %array, i64 %count) nounwind {
+; CHECK-LABEL: load-post-indexed-halfword
+; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}], #8
+entry:
+ %gep1 = getelementptr i16, i16* %array, i64 2
+ br label %body
+
+body:
+ %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ]
+ %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+ %gep2 = getelementptr i16, i16* %iv2, i64 -1
+ %load = load i16, i16* %gep2
+ call void @use-halfword(i16 %load)
+ %load2 = load i16, i16* %iv2
+ call void @use-halfword(i16 %load2)
+ %iv.next = add i64 %iv, -4
+ %gep3 = getelementptr i16, i16* %iv2, i64 4
+ %cond = icmp eq i64 %iv.next, 0
+ br i1 %cond, label %exit, label %body
+
+exit:
+ ret void
+}
+
define void @load-post-indexed-word(i32* %array, i64 %count) nounwind {
; CHECK-LABEL: load-post-indexed-word
; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #16
@@ -515,6 +850,52 @@ exit:
;
; with X being either w0, x0, s0, d0 or q0.
+define void @store-post-indexed-byte(i8* %array, i64 %count, i8 %val) nounwind {
+; CHECK-LABEL: store-post-indexed-byte
+; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}], #4
+entry:
+ %gep1 = getelementptr i8, i8* %array, i64 2
+ br label %body
+
+body:
+ %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ]
+ %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+ %gep2 = getelementptr i8, i8* %iv2, i64 -1
+ %load = load i8, i8* %gep2
+ call void @use-byte(i8 %load)
+ store i8 %val, i8* %iv2
+ %iv.next = add i64 %iv, -4
+ %gep3 = getelementptr i8, i8* %iv2, i64 4
+ %cond = icmp eq i64 %iv.next, 0
+ br i1 %cond, label %exit, label %body
+
+exit:
+ ret void
+}
+
+define void @store-post-indexed-halfword(i16* %array, i64 %count, i16 %val) nounwind {
+; CHECK-LABEL: store-post-indexed-halfword
+; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}], #8
+entry:
+ %gep1 = getelementptr i16, i16* %array, i64 2
+ br label %body
+
+body:
+ %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ]
+ %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+ %gep2 = getelementptr i16, i16* %iv2, i64 -1
+ %load = load i16, i16* %gep2
+ call void @use-halfword(i16 %load)
+ store i16 %val, i16* %iv2
+ %iv.next = add i64 %iv, -4
+ %gep3 = getelementptr i16, i16* %iv2, i64 4
+ %cond = icmp eq i64 %iv.next, 0
+ br i1 %cond, label %exit, label %body
+
+exit:
+ ret void
+}
+
define void @store-post-indexed-word(i32* %array, i64 %count, i32 %val) nounwind {
; CHECK-LABEL: store-post-indexed-word
; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #16
@@ -630,6 +1011,8 @@ exit:
ret void
}
+declare void @use-byte(i8)
+declare void @use-halfword(i16)
declare void @use-word(i32)
declare void @use-doubleword(i64)
declare void @use-quadword(<2 x i64>)
@@ -638,6 +1021,90 @@ declare void @use-double(double)
; Check the following transform:
;
+; stp w0, [x20]
+; ...
+; add x20, x20, #32
+; ->
+; stp w0, [x20], #32
+
+define void @store-pair-post-indexed-word() nounwind {
+; CHECK-LABEL: store-pair-post-indexed-word
+; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [sp], #16
+; CHECK: ret
+ %src = alloca { i32, i32 }, align 8
+ %dst = alloca { i32, i32 }, align 8
+
+ %src.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 0
+ %src.real = load i32, i32* %src.realp
+ %src.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 1
+ %src.imag = load i32, i32* %src.imagp
+
+ %dst.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 0
+ %dst.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 1
+ store i32 %src.real, i32* %dst.realp
+ store i32 %src.imag, i32* %dst.imagp
+ ret void
+}
+
+define void @store-pair-post-indexed-doubleword() nounwind {
+; CHECK-LABEL: store-pair-post-indexed-doubleword
+; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [sp], #32
+; CHECK: ret
+ %src = alloca { i64, i64 }, align 8
+ %dst = alloca { i64, i64 }, align 8
+
+ %src.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 0
+ %src.real = load i64, i64* %src.realp
+ %src.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 1
+ %src.imag = load i64, i64* %src.imagp
+
+ %dst.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 0
+ %dst.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 1
+ store i64 %src.real, i64* %dst.realp
+ store i64 %src.imag, i64* %dst.imagp
+ ret void
+}
+
+define void @store-pair-post-indexed-float() nounwind {
+; CHECK-LABEL: store-pair-post-indexed-float
+; CHECK: stp s{{[0-9]+}}, s{{[0-9]+}}, [sp], #16
+; CHECK: ret
+ %src = alloca { float, float }, align 8
+ %dst = alloca { float, float }, align 8
+
+ %src.realp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 0
+ %src.real = load float, float* %src.realp
+ %src.imagp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 1
+ %src.imag = load float, float* %src.imagp
+
+ %dst.realp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 0
+ %dst.imagp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 1
+ store float %src.real, float* %dst.realp
+ store float %src.imag, float* %dst.imagp
+ ret void
+}
+
+define void @store-pair-post-indexed-double() nounwind {
+; CHECK-LABEL: store-pair-post-indexed-double
+; CHECK: stp d{{[0-9]+}}, d{{[0-9]+}}, [sp], #32
+; CHECK: ret
+ %src = alloca { double, double }, align 8
+ %dst = alloca { double, double }, align 8
+
+ %src.realp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 0
+ %src.real = load double, double* %src.realp
+ %src.imagp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 1
+ %src.imag = load double, double* %src.imagp
+
+ %dst.realp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 0
+ %dst.imagp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 1
+ store double %src.real, double* %dst.realp
+ store double %src.imag, double* %dst.imagp
+ ret void
+}
+
+; Check the following transform:
+;
; (ldr|str) X, [x20]
; ...
; sub x20, x20, #16
diff --git a/test/CodeGen/AArch64/merge-store.ll b/test/CodeGen/AArch64/merge-store.ll
index 18dbad4ce25b..86f5edd5da1d 100644
--- a/test/CodeGen/AArch64/merge-store.ll
+++ b/test/CodeGen/AArch64/merge-store.ll
@@ -1,4 +1,5 @@
; RUN: llc -march aarch64 %s -o - | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=cyclone | FileCheck %s --check-prefix=CYCLONE
@g0 = external global <3 x float>, align 16
@g1 = external global <3 x float>, align 4
@@ -18,3 +19,32 @@ define void @blam() {
store float %tmp9, float* %tmp7
ret void;
}
+
+
+; PR21711 - Merge vector stores into wider vector stores.
+
+; On Cyclone, the stores should not get merged into a 16-byte store because
+; unaligned 16-byte stores are slow. This test would infinite loop when
+; the fastness of unaligned accesses was not specified correctly.
+
+define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) {
+ %idx0 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 3
+ %idx1 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 4
+
+ %shuffle0 = shufflevector <4 x float> %v1, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %shuffle1 = shufflevector <4 x float> %v1, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+
+ store <2 x float> %shuffle0, <2 x float>* %idx0, align 8
+ store <2 x float> %shuffle1, <2 x float>* %idx1, align 8
+ ret void
+
+; CHECK-LABEL: merge_vec_extract_stores
+; CHECK: stur q0, [x0, #24]
+; CHECK-NEXT: ret
+
+; CYCLONE-LABEL: merge_vec_extract_stores
+; CYCLONE: ext v1.16b, v0.16b, v0.16b, #8
+; CYCLONE-NEXT: str d0, [x0, #24]
+; CYCLONE-NEXT: str d1, [x0, #32]
+; CYCLONE-NEXT: ret
+}
diff --git a/test/CodeGen/AArch64/misched-fusion.ll b/test/CodeGen/AArch64/misched-fusion.ll
new file mode 100644
index 000000000000..d38869329034
--- /dev/null
+++ b/test/CodeGen/AArch64/misched-fusion.ll
@@ -0,0 +1,34 @@
+; RUN: llc -o - %s -mcpu=cyclone | FileCheck %s
+target triple = "arm64-apple-ios"
+
+declare void @foobar(i32 %v0, i32 %v1)
+
+; Make sure sub is scheduled in front of cbnz
+; CHECK-LABEL: test_sub_cbz:
+; CHECK: add w[[ADDRES:[0-9]+]], w1, #7
+; CHECK: sub w[[SUBRES:[0-9]+]], w0, #13
+; CHECK-NEXT: cbnz w[[SUBRES]], [[SKIPBLOCK:LBB[0-9_]+]]
+; CHECK: mov x0, x[[ADDRES]]
+; CHECK: mov x1, x[[SUBRES]]
+; CHECK: bl _foobar
+; CHECK: [[SKIPBLOCK]]:
+; CHECK: mov x0, x[[SUBRES]]
+; CHECK: mov x1, x[[ADDRES]]
+; CHECK: bl _foobar
+define void @test_sub_cbz(i32 %a0, i32 %a1) {
+entry:
+ ; except for the fusion opportunity the sub/add should be equal so the
+ ; scheduler would leave them in source order if it weren't for the scheduling
+ %v0 = sub i32 %a0, 13
+ %cond = icmp eq i32 %v0, 0
+ %v1 = add i32 %a1, 7
+ br i1 %cond, label %if, label %exit
+
+if:
+ call void @foobar(i32 %v1, i32 %v0)
+ br label %exit
+
+exit:
+ call void @foobar(i32 %v0, i32 %v1)
+ ret void
+}
diff --git a/test/CodeGen/AArch64/mul-lohi.ll b/test/CodeGen/AArch64/mul-lohi.ll
index 4515697b9991..e93521858a31 100644
--- a/test/CodeGen/AArch64/mul-lohi.ll
+++ b/test/CodeGen/AArch64/mul-lohi.ll
@@ -1,5 +1,6 @@
; RUN: llc -mtriple=arm64-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s
; RUN: llc -mtriple=aarch64_be-linux-gnu -mcpu=cyclone %s -o - | FileCheck --check-prefix=CHECK-BE %s
+
define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
; CHECK-LABEL: test_128bitmul:
; CHECK-DAG: mul [[PART1:x[0-9]+]], x0, x3
@@ -16,3 +17,31 @@ define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
%prod = mul i128 %lhs, %rhs
ret i128 %prod
}
+
+; The machine combiner should create madd instructions when
+; optimizing for size because that's smaller than mul + add.
+
+define i128 @test_128bitmul_optsize(i128 %lhs, i128 %rhs) optsize {
+; CHECK-LABEL: test_128bitmul_optsize:
+; CHECK: umulh [[HI:x[0-9]+]], x0, x2
+; CHECK-NEXT: madd [[TEMP1:x[0-9]+]], x0, x3, [[HI]]
+; CHECK-NEXT: madd x1, x1, x2, [[TEMP1]]
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: ret
+
+ %prod = mul i128 %lhs, %rhs
+ ret i128 %prod
+}
+
+define i128 @test_128bitmul_minsize(i128 %lhs, i128 %rhs) minsize {
+; CHECK-LABEL: test_128bitmul_minsize:
+; CHECK: umulh [[HI:x[0-9]+]], x0, x2
+; CHECK-NEXT: madd [[TEMP1:x[0-9]+]], x0, x3, [[HI]]
+; CHECK-NEXT: madd x1, x1, x2, [[TEMP1]]
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: ret
+
+ %prod = mul i128 %lhs, %rhs
+ ret i128 %prod
+}
+
diff --git a/test/CodeGen/AArch64/nest-register.ll b/test/CodeGen/AArch64/nest-register.ll
index 9c659fb74ec4..cc42913e10a6 100644
--- a/test/CodeGen/AArch64/nest-register.ll
+++ b/test/CodeGen/AArch64/nest-register.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -disable-post-ra -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
; Tests that the 'nest' parameter attribute causes the relevant parameter to be
; passed in the right register.
diff --git a/test/CodeGen/AArch64/nontemporal.ll b/test/CodeGen/AArch64/nontemporal.ll
new file mode 100644
index 000000000000..db9779e03190
--- /dev/null
+++ b/test/CodeGen/AArch64/nontemporal.ll
@@ -0,0 +1,339 @@
+; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck %s
+
+define void @test_stnp_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
+; CHECK-LABEL: test_stnp_v4i64:
+; CHECK-NEXT: mov d[[HI1:[0-9]+]], v1[1]
+; CHECK-NEXT: mov d[[HI0:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d1, d[[HI1]], [x0, #16]
+; CHECK-NEXT: stnp d0, d[[HI0]], [x0]
+; CHECK-NEXT: ret
+ store <4 x i64> %v, <4 x i64>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
+; CHECK-LABEL: test_stnp_v4i32:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <4 x i32> %v, <4 x i32>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
+; CHECK-LABEL: test_stnp_v8i16:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <8 x i16> %v, <8 x i16>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
+; CHECK-LABEL: test_stnp_v16i8:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <16 x i8> %v, <16 x i8>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2i32(<2 x i32>* %p, <2 x i32> %v) #0 {
+; CHECK-LABEL: test_stnp_v2i32:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <2 x i32> %v, <2 x i32>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v4i16(<4 x i16>* %p, <4 x i16> %v) #0 {
+; CHECK-LABEL: test_stnp_v4i16:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <4 x i16> %v, <4 x i16>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v8i8(<8 x i8>* %p, <8 x i8> %v) #0 {
+; CHECK-LABEL: test_stnp_v8i8:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <8 x i8> %v, <8 x i8>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2f64(<2 x double>* %p, <2 x double> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f64:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <2 x double> %v, <2 x double>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v4f32(<4 x float>* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <4 x float> %v, <4 x float>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2f32(<2 x float>* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <2 x float> %v, <2 x float>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v1f64(<1 x double>* %p, <1 x double> %v) #0 {
+; CHECK-LABEL: test_stnp_v1f64:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <1 x double> %v, <1 x double>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v1i64(<1 x i64>* %p, <1 x i64> %v) #0 {
+; CHECK-LABEL: test_stnp_v1i64:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0]
+; CHECK-NEXT: ret
+ store <1 x i64> %v, <1 x i64>* %p, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_i64(i64* %p, i64 %v) #0 {
+; CHECK-LABEL: test_stnp_i64:
+; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT: stnp w1, w[[HI]], [x0]
+; CHECK-NEXT: ret
+ store i64 %v, i64* %p, align 1, !nontemporal !0
+ ret void
+}
+
+
+define void @test_stnp_v2f64_offset(<2 x double>* %p, <2 x double> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f64_offset:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0, #16]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 1
+ store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2f64_offset_neg(<2 x double>* %p, <2 x double> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f64_offset_neg:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0, #-16]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 -1
+ store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2f32_offset(<2 x float>* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_offset:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0, #8]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 1
+ store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_offset_neg:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0, #-8]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 -1
+ store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 {
+; CHECK-LABEL: test_stnp_i64_offset:
+; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT: stnp w1, w[[HI]], [x0, #8]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i64, i64* %p, i32 1
+ store i64 %v, i64* %tmp0, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 {
+; CHECK-LABEL: test_stnp_i64_offset_neg:
+; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT: stnp w1, w[[HI]], [x0, #-8]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i64, i64* %p, i32 -1
+ store i64 %v, i64* %tmp0, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_4(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4:
+; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #4
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i8, i8* %p, i32 4
+ %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+ store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_neg_4(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4:
+; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #4
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i8, i8* %p, i32 -4
+ %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+ store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_512(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512:
+; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #512
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i8, i8* %p, i32 512
+ %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+ store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v4f32_offset_504(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_offset_504:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0, #504]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i8, i8* %p, i32 504
+ %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+ store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_508(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508:
+; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #508
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i8, i8* %p, i32 508
+ %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+ store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_neg_520(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520:
+; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #520
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i8, i8* %p, i32 -520
+ %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+ store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v4f32_offset_neg_512(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_offset_neg_512:
+; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp d0, d[[HI]], [x0, #-512]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i8, i8* %p, i32 -512
+ %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+ store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+ ret void
+}
+
+
+define void @test_stnp_v2f32_invalid_offset_256(i8* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256:
+; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #256
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i8, i8* %p, i32 256
+ %tmp1 = bitcast i8* %tmp0 to <2 x float>*
+ store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2f32_offset_252(i8* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_offset_252:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0, #252]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i8, i8* %p, i32 252
+ %tmp1 = bitcast i8* %tmp0 to <2 x float>*
+ store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2f32_invalid_offset_neg_260(i8* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
+; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #260
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i8, i8* %p, i32 -260
+ %tmp1 = bitcast i8* %tmp0 to <2 x float>*
+ store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
+ ret void
+}
+
+define void @test_stnp_v2f32_offset_neg_256(i8* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_offset_neg_256:
+; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT: stnp s0, s[[HI]], [x0, #-256]
+; CHECK-NEXT: ret
+ %tmp0 = getelementptr i8, i8* %p, i32 -256
+ %tmp1 = bitcast i8* %tmp0 to <2 x float>*
+ store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
+ ret void
+}
+
+declare void @dummy(<4 x float>*)
+
+define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_offset_alloca:
+; CHECK: stnp d0, d{{.*}}, [sp]
+; CHECK-NEXT: mov x0, sp
+; CHECK-NEXT: bl _dummy
+ %tmp0 = alloca <4 x float>
+ store <4 x float> %v, <4 x float>* %tmp0, align 1, !nontemporal !0
+ call void @dummy(<4 x float>* %tmp0)
+ ret void
+}
+
+define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2:
+; CHECK: stnp d0, d{{.*}}, [sp, #16]
+; CHECK-NEXT: mov x0, sp
+; CHECK-NEXT: bl _dummy
+ %tmp0 = alloca <4 x float>, i32 2
+ %tmp1 = getelementptr <4 x float>, <4 x float>* %tmp0, i32 1
+ store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+ call void @dummy(<4 x float>* %tmp0)
+ ret void
+}
+
+!0 = !{ i32 1 }
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll
index 143558f7b2c7..c59a5b6743d6 100644
--- a/test/CodeGen/AArch64/pic-eh-stubs.ll
+++ b/test/CodeGen/AArch64/pic-eh-stubs.ll
@@ -15,7 +15,7 @@
; CHECK-NEXT: .xword .L_ZTIi.DW.stub-[[TYPEINFO_LBL]]
; .. and which is properly defined (in a writable section for the dynamic loader) later.
-; CHECK: .section .data.rel,"aw"
+; CHECK: .data
; CHECK: .L_ZTIi.DW.stub:
; CHECK-NEXT: .xword _ZTIi
diff --git a/test/CodeGen/AArch64/readcyclecounter.ll b/test/CodeGen/AArch64/readcyclecounter.ll
new file mode 100644
index 000000000000..037f11809386
--- /dev/null
+++ b/test/CodeGen/AArch64/readcyclecounter.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=aarch64-unknown-unknown -asm-verbose=false < %s |\
+; RUN: FileCheck %s --check-prefix=CHECK --check-prefix=PERFMON
+; RUN: llc -mtriple=aarch64-unknown-unknown -mattr=-perfmon -asm-verbose=false < %s |\
+; RUN: FileCheck %s --check-prefix=CHECK --check-prefix=NOPERFMON
+
+define i64 @test_readcyclecounter() nounwind {
+ ; CHECK-LABEL: test_readcyclecounter:
+ ; PERFMON-NEXT: mrs x0, PMCCNTR_EL0
+ ; NOPERFMON-NEXT: mov x0, xzr
+ ; CHECK-NEXT: ret
+ %tmp0 = call i64 @llvm.readcyclecounter()
+ ret i64 %tmp0
+}
+
+declare i64 @llvm.readcyclecounter()
diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll
index 0d301bbd502a..ba34873eaa5b 100644
--- a/test/CodeGen/AArch64/regress-tblgen-chains.ll
+++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll
@@ -27,8 +27,8 @@ define i64 @test_chains() {
; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]]
; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1
-; CHECK: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]]
-; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]]
+; CHECK: sturb w[[STRVAL:[0-9]+]], [x29, [[LOCADDR]]]
+; CHECK; and w0, w[[STRVAL]], #0xff
%ret.1 = load i8, i8* %locvar
%ret.2 = zext i8 %ret.1 to i64
diff --git a/test/CodeGen/AArch64/remat.ll b/test/CodeGen/AArch64/remat.ll
index 8b3e6dd5ad92..a397c339a2d7 100644
--- a/test/CodeGen/AArch64/remat.ll
+++ b/test/CodeGen/AArch64/remat.ll
@@ -1,3 +1,4 @@
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a35 -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a57 -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a53 -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a72 -o - %s | FileCheck %s
diff --git a/test/CodeGen/AArch64/rotate.ll b/test/CodeGen/AArch64/rotate.ll
new file mode 100644
index 000000000000..5ac86d5f59c9
--- /dev/null
+++ b/test/CodeGen/AArch64/rotate.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=aarch64--linux-gnueabihf | FileCheck %s
+
+;; This used to cause a backend crash about not being able to
+;; select ROTL. Make sure if generates the basic ushr/shl.
+define <2 x i64> @testcase(<2 x i64>* %in) {
+; CHECK-LABEL: testcase
+; CHECK: ushr {{v[0-9]+}}.2d
+; CHECK: shl {{v[0-9]+}}.2d
+ %1 = load <2 x i64>, <2 x i64>* %in
+ %2 = lshr <2 x i64> %1, <i64 8, i64 8>
+ %3 = shl <2 x i64> %1, <i64 56, i64 56>
+ %4 = or <2 x i64> %2, %3
+ ret <2 x i64> %4
+}
diff --git a/test/CodeGen/AArch64/round-conv.ll b/test/CodeGen/AArch64/round-conv.ll
new file mode 100644
index 000000000000..5ed7d9409e3d
--- /dev/null
+++ b/test/CodeGen/AArch64/round-conv.ll
@@ -0,0 +1,330 @@
+; RUN: llc < %s -mtriple=arm64 | FileCheck %s
+
+; CHECK-LABEL: testmsws:
+; CHECK: fcvtms w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testmsws(float %a) {
+entry:
+ %call = call float @floorf(float %a) nounwind readnone
+ %conv = fptosi float %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxs:
+; CHECK: fcvtms x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testmsxs(float %a) {
+entry:
+ %call = call float @floorf(float %a) nounwind readnone
+ %conv = fptosi float %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testmswd:
+; CHECK: fcvtms w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testmswd(double %a) {
+entry:
+ %call = call double @floor(double %a) nounwind readnone
+ %conv = fptosi double %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxd:
+; CHECK: fcvtms x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testmsxd(double %a) {
+entry:
+ %call = call double @floor(double %a) nounwind readnone
+ %conv = fptosi double %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testmuws:
+; CHECK: fcvtmu w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testmuws(float %a) {
+entry:
+ %call = call float @floorf(float %a) nounwind readnone
+ %conv = fptoui float %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testmuxs:
+; CHECK: fcvtmu x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testmuxs(float %a) {
+entry:
+ %call = call float @floorf(float %a) nounwind readnone
+ %conv = fptoui float %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testmuwd:
+; CHECK: fcvtmu w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testmuwd(double %a) {
+entry:
+ %call = call double @floor(double %a) nounwind readnone
+ %conv = fptoui double %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testmuxd:
+; CHECK: fcvtmu x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testmuxd(double %a) {
+entry:
+ %call = call double @floor(double %a) nounwind readnone
+ %conv = fptoui double %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testpsws:
+; CHECK: fcvtps w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testpsws(float %a) {
+entry:
+ %call = call float @ceilf(float %a) nounwind readnone
+ %conv = fptosi float %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testpsxs:
+; CHECK: fcvtps x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testpsxs(float %a) {
+entry:
+ %call = call float @ceilf(float %a) nounwind readnone
+ %conv = fptosi float %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testpswd:
+; CHECK: fcvtps w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testpswd(double %a) {
+entry:
+ %call = call double @ceil(double %a) nounwind readnone
+ %conv = fptosi double %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testpsxd:
+; CHECK: fcvtps x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testpsxd(double %a) {
+entry:
+ %call = call double @ceil(double %a) nounwind readnone
+ %conv = fptosi double %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testpuws:
+; CHECK: fcvtpu w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testpuws(float %a) {
+entry:
+ %call = call float @ceilf(float %a) nounwind readnone
+ %conv = fptoui float %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testpuxs:
+; CHECK: fcvtpu x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testpuxs(float %a) {
+entry:
+ %call = call float @ceilf(float %a) nounwind readnone
+ %conv = fptoui float %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testpuwd:
+; CHECK: fcvtpu w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testpuwd(double %a) {
+entry:
+ %call = call double @ceil(double %a) nounwind readnone
+ %conv = fptoui double %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testpuxd:
+; CHECK: fcvtpu x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testpuxd(double %a) {
+entry:
+ %call = call double @ceil(double %a) nounwind readnone
+ %conv = fptoui double %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testzsws:
+; CHECK: fcvtzs w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testzsws(float %a) {
+entry:
+ %call = call float @truncf(float %a) nounwind readnone
+ %conv = fptosi float %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testzsxs:
+; CHECK: fcvtzs x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testzsxs(float %a) {
+entry:
+ %call = call float @truncf(float %a) nounwind readnone
+ %conv = fptosi float %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testzswd:
+; CHECK: fcvtzs w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testzswd(double %a) {
+entry:
+ %call = call double @trunc(double %a) nounwind readnone
+ %conv = fptosi double %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testzsxd:
+; CHECK: fcvtzs x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testzsxd(double %a) {
+entry:
+ %call = call double @trunc(double %a) nounwind readnone
+ %conv = fptosi double %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testzuws:
+; CHECK: fcvtzu w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testzuws(float %a) {
+entry:
+ %call = call float @truncf(float %a) nounwind readnone
+ %conv = fptoui float %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testzuxs:
+; CHECK: fcvtzu x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testzuxs(float %a) {
+entry:
+ %call = call float @truncf(float %a) nounwind readnone
+ %conv = fptoui float %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testzuwd:
+; CHECK: fcvtzu w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testzuwd(double %a) {
+entry:
+ %call = call double @trunc(double %a) nounwind readnone
+ %conv = fptoui double %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testzuxd:
+; CHECK: fcvtzu x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testzuxd(double %a) {
+entry:
+ %call = call double @trunc(double %a) nounwind readnone
+ %conv = fptoui double %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testasws:
+; CHECK: fcvtas w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testasws(float %a) {
+entry:
+ %call = call float @roundf(float %a) nounwind readnone
+ %conv = fptosi float %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testasxs:
+; CHECK: fcvtas x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testasxs(float %a) {
+entry:
+ %call = call float @roundf(float %a) nounwind readnone
+ %conv = fptosi float %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testaswd:
+; CHECK: fcvtas w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testaswd(double %a) {
+entry:
+ %call = call double @round(double %a) nounwind readnone
+ %conv = fptosi double %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testasxd:
+; CHECK: fcvtas x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testasxd(double %a) {
+entry:
+ %call = call double @round(double %a) nounwind readnone
+ %conv = fptosi double %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testauws:
+; CHECK: fcvtau w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testauws(float %a) {
+entry:
+ %call = call float @roundf(float %a) nounwind readnone
+ %conv = fptoui float %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testauxs:
+; CHECK: fcvtau x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testauxs(float %a) {
+entry:
+ %call = call float @roundf(float %a) nounwind readnone
+ %conv = fptoui float %call to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: testauwd:
+; CHECK: fcvtau w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testauwd(double %a) {
+entry:
+ %call = call double @round(double %a) nounwind readnone
+ %conv = fptoui double %call to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: testauxd:
+; CHECK: fcvtau x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testauxd(double %a) {
+entry:
+ %call = call double @round(double %a) nounwind readnone
+ %conv = fptoui double %call to i64
+ ret i64 %conv
+}
+
+declare float @floorf(float) nounwind readnone
+declare double @floor(double) nounwind readnone
+declare float @ceilf(float) nounwind readnone
+declare double @ceil(double) nounwind readnone
+declare float @truncf(float) nounwind readnone
+declare double @trunc(double) nounwind readnone
+declare float @roundf(float) nounwind readnone
+declare double @round(double) nounwind readnone
diff --git a/test/CodeGen/AArch64/shrink-wrap.ll b/test/CodeGen/AArch64/shrink-wrap.ll
new file mode 100755
index 000000000000..ea101a8da15d
--- /dev/null
+++ b/test/CodeGen/AArch64/shrink-wrap.ll
@@ -0,0 +1,184 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s
+
+; Regression test for a crash in the ShrinkWrap pass not handling targets
+; requiring a register scavenger.
+
+%type1 = type { i32, i32, i32 }
+
+@g1 = external unnamed_addr global i32, align 4
+@g2 = external unnamed_addr global i1
+@g3 = external unnamed_addr global [144 x i32], align 4
+@g4 = external unnamed_addr constant [144 x i32], align 4
+@g5 = external unnamed_addr constant [144 x i32], align 4
+@g6 = external unnamed_addr constant [144 x i32], align 4
+@g7 = external unnamed_addr constant [144 x i32], align 4
+@g8 = external unnamed_addr constant [144 x i32], align 4
+@g9 = external unnamed_addr constant [144 x i32], align 4
+@g10 = external unnamed_addr constant [144 x i32], align 4
+@g11 = external unnamed_addr global i32, align 4
+@g12 = external unnamed_addr global [144 x [144 x i8]], align 1
+@g13 = external unnamed_addr global %type1*, align 8
+@g14 = external unnamed_addr global [144 x [144 x i8]], align 1
+@g15 = external unnamed_addr global [144 x [144 x i8]], align 1
+@g16 = external unnamed_addr global [144 x [144 x i8]], align 1
+@g17 = external unnamed_addr global [62 x i32], align 4
+@g18 = external unnamed_addr global i32, align 4
+@g19 = external unnamed_addr constant [144 x i32], align 4
+@g20 = external unnamed_addr global [144 x [144 x i8]], align 1
+@g21 = external unnamed_addr global i32, align 4
+
+declare fastcc i32 @foo()
+
+declare fastcc i32 @bar()
+
+define internal fastcc i32 @func(i32 %alpha, i32 %beta) {
+entry:
+ %v1 = alloca [2 x [11 x i32]], align 4
+ %v2 = alloca [11 x i32], align 16
+ %v3 = alloca [11 x i32], align 16
+ switch i32 undef, label %if.end.9 [
+ i32 4, label %if.then.6
+ i32 3, label %if.then.2
+ ]
+
+if.then.2:
+ %call3 = tail call fastcc i32 @bar()
+ br label %cleanup
+
+if.then.6:
+ %call7 = tail call fastcc i32 @foo()
+ unreachable
+
+if.end.9:
+ %tmp = load i32, i32* @g1, align 4
+ %rem.i = urem i32 %tmp, 1000000
+ %idxprom.1.i = zext i32 %rem.i to i64
+ %tmp1 = load %type1*, %type1** @g13, align 8
+ %v4 = getelementptr inbounds %type1, %type1* %tmp1, i64 %idxprom.1.i, i32 0
+ %.b = load i1, i1* @g2, align 1
+ %v5 = select i1 %.b, i32 2, i32 0
+ %tmp2 = load i32, i32* @g18, align 4
+ %tmp3 = load i32, i32* @g11, align 4
+ %idxprom58 = sext i32 %tmp3 to i64
+ %tmp4 = load i32, i32* @g21, align 4
+ %idxprom69 = sext i32 %tmp4 to i64
+ br label %for.body
+
+for.body:
+ %v6 = phi i32 [ 0, %if.end.9 ], [ %v7, %for.inc ]
+ %a.0983 = phi i32 [ 1, %if.end.9 ], [ %a.1, %for.inc ]
+ %arrayidx = getelementptr inbounds [62 x i32], [62 x i32]* @g17, i64 0, i64 undef
+ %tmp5 = load i32, i32* %arrayidx, align 4
+ br i1 undef, label %for.inc, label %if.else.51
+
+if.else.51:
+ %idxprom53 = sext i32 %tmp5 to i64
+ %arrayidx54 = getelementptr inbounds [144 x i32], [144 x i32]* @g3, i64 0, i64 %idxprom53
+ %tmp6 = load i32, i32* %arrayidx54, align 4
+ switch i32 %tmp6, label %for.inc [
+ i32 1, label %block.bb
+ i32 10, label %block.bb.159
+ i32 7, label %block.bb.75
+ i32 8, label %block.bb.87
+ i32 9, label %block.bb.147
+ i32 12, label %block.bb.111
+ i32 3, label %block.bb.123
+ i32 4, label %block.bb.135
+ ]
+
+block.bb:
+ %arrayidx56 = getelementptr inbounds [144 x i32], [144 x i32]* @g6, i64 0, i64 %idxprom53
+ %tmp7 = load i32, i32* %arrayidx56, align 4
+ %shr = ashr i32 %tmp7, %v5
+ %add57 = add nsw i32 %shr, 0
+ %arrayidx61 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g14, i64 0, i64 %idxprom53, i64 %idxprom58
+ %tmp8 = load i8, i8* %arrayidx61, align 1
+ %conv = zext i8 %tmp8 to i32
+ %add62 = add nsw i32 %conv, %add57
+ br label %for.inc
+
+block.bb.75:
+ %arrayidx78 = getelementptr inbounds [144 x i32], [144 x i32]* @g10, i64 0, i64 %idxprom53
+ %tmp9 = load i32, i32* %arrayidx78, align 4
+ %shr79 = ashr i32 %tmp9, %v5
+ %add80 = add nsw i32 %shr79, 0
+ %add86 = add nsw i32 0, %add80
+ br label %for.inc
+
+block.bb.87:
+ %arrayidx90 = getelementptr inbounds [144 x i32], [144 x i32]* @g9, i64 0, i64 %idxprom53
+ %tmp10 = load i32, i32* %arrayidx90, align 4
+ %shr91 = ashr i32 %tmp10, 0
+ %sub92 = sub nsw i32 0, %shr91
+ %arrayidx96 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g15, i64 0, i64 %idxprom53, i64 %idxprom69
+ %tmp11 = load i8, i8* %arrayidx96, align 1
+ %conv97 = zext i8 %tmp11 to i32
+ %sub98 = sub nsw i32 %sub92, %conv97
+ br label %for.inc
+
+block.bb.111:
+ %arrayidx114 = getelementptr inbounds [144 x i32], [144 x i32]* @g19, i64 0, i64 %idxprom53
+ %tmp12 = load i32, i32* %arrayidx114, align 4
+ %shr115 = ashr i32 %tmp12, 0
+ %sub116 = sub nsw i32 0, %shr115
+ %arrayidx120 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g12, i64 0, i64 %idxprom53, i64 %idxprom69
+ %tmp13 = load i8, i8* %arrayidx120, align 1
+ %conv121 = zext i8 %tmp13 to i32
+ %sub122 = sub nsw i32 %sub116, %conv121
+ br label %for.inc
+
+block.bb.123:
+ %arrayidx126 = getelementptr inbounds [144 x i32], [144 x i32]* @g5, i64 0, i64 %idxprom53
+ %tmp14 = load i32, i32* %arrayidx126, align 4
+ %shr127 = ashr i32 %tmp14, %v5
+ %add128 = add nsw i32 %shr127, 0
+ %add134 = add nsw i32 0, %add128
+ br label %for.inc
+
+block.bb.135:
+ %arrayidx138 = getelementptr inbounds [144 x i32], [144 x i32]* @g4, i64 0, i64 %idxprom53
+ %tmp15 = load i32, i32* %arrayidx138, align 4
+ %shr139 = ashr i32 %tmp15, 0
+ %sub140 = sub nsw i32 0, %shr139
+ %arrayidx144 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g20, i64 0, i64 %idxprom53, i64 %idxprom69
+ %tmp16 = load i8, i8* %arrayidx144, align 1
+ %conv145 = zext i8 %tmp16 to i32
+ %sub146 = sub nsw i32 %sub140, %conv145
+ br label %for.inc
+
+block.bb.147:
+ %arrayidx150 = getelementptr inbounds [144 x i32], [144 x i32]* @g8, i64 0, i64 %idxprom53
+ %tmp17 = load i32, i32* %arrayidx150, align 4
+ %shr151 = ashr i32 %tmp17, %v5
+ %add152 = add nsw i32 %shr151, 0
+ %arrayidx156 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g16, i64 0, i64 %idxprom53, i64 %idxprom58
+ %tmp18 = load i8, i8* %arrayidx156, align 1
+ %conv157 = zext i8 %tmp18 to i32
+ %add158 = add nsw i32 %conv157, %add152
+ br label %for.inc
+
+block.bb.159:
+ %sub160 = add nsw i32 %v6, -450
+ %arrayidx162 = getelementptr inbounds [144 x i32], [144 x i32]* @g7, i64 0, i64 %idxprom53
+ %tmp19 = load i32, i32* %arrayidx162, align 4
+ %shr163 = ashr i32 %tmp19, 0
+ %sub164 = sub nsw i32 %sub160, %shr163
+ %sub170 = sub nsw i32 %sub164, 0
+ br label %for.inc
+
+for.inc:
+ %v7 = phi i32 [ %v6, %for.body ], [ %v6, %if.else.51 ], [ %sub170, %block.bb.159 ], [ %add158, %block.bb.147 ], [ %sub146, %block.bb.135 ], [ %add134, %block.bb.123 ], [ %sub122, %block.bb.111 ], [ %sub98, %block.bb.87 ], [ %add86, %block.bb.75 ], [ %add62, %block.bb ]
+ %a.1 = phi i32 [ %a.0983, %for.body ], [ undef, %if.else.51 ], [ undef, %block.bb.159 ], [ undef, %block.bb.147 ], [ undef, %block.bb.135 ], [ undef, %block.bb.123 ], [ undef, %block.bb.111 ], [ undef, %block.bb.87 ], [ undef, %block.bb.75 ], [ undef, %block.bb ]
+ %cmp48 = icmp sgt i32 %a.1, %tmp2
+ br i1 %cmp48, label %for.end, label %for.body
+
+for.end:
+ store i32 %tmp, i32* %v4, align 4
+ %hold_hash.i.7 = getelementptr inbounds %type1, %type1* %tmp1, i64 %idxprom.1.i, i32 1
+ store i32 0, i32* %hold_hash.i.7, align 4
+ br label %cleanup
+
+cleanup:
+ %retval.0 = phi i32 [ %call3, %if.then.2 ], [ undef, %for.end ]
+ ret i32 %retval.0
+}
diff --git a/test/CodeGen/AArch64/stackmap-frame-setup.ll b/test/CodeGen/AArch64/stackmap-frame-setup.ll
new file mode 100644
index 000000000000..4712012b0d25
--- /dev/null
+++ b/test/CodeGen/AArch64/stackmap-frame-setup.ll
@@ -0,0 +1,20 @@
+; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=aarch64-apple-darwin -stop-after machine-sink %s | FileCheck %s --check-prefix=ISEL
+; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -stop-after machine-sink %s | FileCheck %s --check-prefix=FAST-ISEL
+
+define void @caller_meta_leaf() {
+entry:
+ %metadata = alloca i64, i32 3, align 8
+ store i64 11, i64* %metadata
+ store i64 12, i64* %metadata
+ store i64 13, i64* %metadata
+; ISEL: ADJCALLSTACKDOWN 0, implicit-def
+; ISEL-NEXT: STACKMAP
+; ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+; FAST-ISEL: ADJCALLSTACKDOWN 0, implicit-def
+; FAST-ISEL-NEXT: STACKMAP
+; FAST-ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def
+ ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll
index e5766154bb46..fa5d8b943b6b 100644
--- a/test/CodeGen/AArch64/tail-call.ll
+++ b/test/CodeGen/AArch64/tail-call.ll
@@ -59,8 +59,7 @@ define fastcc void @caller_to16_from8([8 x i32], i64 %a) {
; callee will not deallocate the space, even in fastcc.
tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2)
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]!
; CHECK-NEXT: b callee_stack16
ret void
}
@@ -89,8 +88,7 @@ define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
ret void
; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]!
; CHECK-NEXT: b callee_stack16
}
diff --git a/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
index 4d80f2ac5c12..bcc8af8d0690 100644
--- a/test/CodeGen/AArch64/tailcall-explicit-sret.ll
+++ b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false -disable-post-ra | FileCheck %s
; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks.
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
diff --git a/test/CodeGen/AArch64/tbi.ll b/test/CodeGen/AArch64/tbi.ll
new file mode 100644
index 000000000000..ab2d31b7cacc
--- /dev/null
+++ b/test/CodeGen/AArch64/tbi.ll
@@ -0,0 +1,102 @@
+; RUN: llc -aarch64-use-tbi -mtriple=arm64-apple-ios8.0.0 < %s \
+; RUN: | FileCheck --check-prefix=TBI --check-prefix=BOTH %s
+; RUN: llc -aarch64-use-tbi -mtriple=arm64-apple-ios7.1.0 < %s \
+; RUN: | FileCheck --check-prefix=NO_TBI --check-prefix=BOTH %s
+
+; BOTH-LABEL:ld_and32:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_and32(i64 %p) {
+ %and = and i64 %p, 72057594037927935
+ %cast = inttoptr i64 %and to i32*
+ %load = load i32, i32* %cast
+ ret i32 %load
+}
+
+; load (r & MASK) + 4
+; BOTH-LABEL:ld_and_plus_offset:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_and_plus_offset(i64 %p) {
+ %and = and i64 %p, 72057594037927935
+ %cast = inttoptr i64 %and to i32*
+ %gep = getelementptr i32, i32* %cast, i64 4
+ %load = load i32, i32* %gep
+ ret i32 %load
+}
+
+; load (r & WIDER_MASK)
+; BOTH-LABEL:ld_and32_wider:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_and32_wider(i64 %p) {
+ %and = and i64 %p, 1152921504606846975
+ %cast = inttoptr i64 %and to i32*
+ %load = load i32, i32* %cast
+ ret i32 %load
+}
+
+; BOTH-LABEL:ld_and64:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i64 @ld_and64(i64 %p) {
+ %and = and i64 %p, 72057594037927935
+ %cast = inttoptr i64 %and to i64*
+ %load = load i64, i64* %cast
+ ret i64 %load
+}
+
+; BOTH-LABEL:st_and32:
+; TBI-NOT: and x
+; NO_TBI: and x
+define void @st_and32(i64 %p, i32 %v) {
+ %and = and i64 %p, 72057594037927935
+ %cast = inttoptr i64 %and to i32*
+ store i32 %v, i32* %cast
+ ret void
+}
+
+; load (x1 + x2) & MASK
+; BOTH-LABEL:ld_ro:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_ro(i64 %a, i64 %b) {
+ %p = add i64 %a, %b
+ %and = and i64 %p, 72057594037927935
+ %cast = inttoptr i64 %and to i32*
+ %load = load i32, i32* %cast
+ ret i32 %load
+}
+
+; load (r1 & MASK) + r2
+; BOTH-LABEL:ld_ro2:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_ro2(i64 %a, i64 %b) {
+ %and = and i64 %a, 72057594037927935
+ %p = add i64 %and, %b
+ %cast = inttoptr i64 %p to i32*
+ %load = load i32, i32* %cast
+ ret i32 %load
+}
+
+; load (r1 & MASK) | r2
+; BOTH-LABEL:ld_indirect_and:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_indirect_and(i64 %r1, i64 %r2) {
+ %and = and i64 %r1, 72057594037927935
+ %p = or i64 %and, %r2
+ %cast = inttoptr i64 %p to i32*
+ %load = load i32, i32* %cast
+ ret i32 %load
+}
+
+; BOTH-LABEL:ld_and32_narrower:
+; BOTH: and x
+define i32 @ld_and32_narrower(i64 %p) {
+ %and = and i64 %p, 36028797018963967
+ %cast = inttoptr i64 %and to i32*
+ %load = load i32, i32* %cast
+ ret i32 %load
+}
diff --git a/test/CodeGen/AArch64/vector-fcopysign.ll b/test/CodeGen/AArch64/vector-fcopysign.ll
new file mode 100644
index 000000000000..865a0a5b8580
--- /dev/null
+++ b/test/CodeGen/AArch64/vector-fcopysign.ll
@@ -0,0 +1,178 @@
+; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+;============ v1f32
+
+; WidenVecRes same
+define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f32_v1f32:
+; CHECK-NEXT: movi.2s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.8b v0, v1, v2
+; CHECK-NEXT: ret
+ %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b)
+ ret <1 x float> %r
+}
+
+; WidenVecRes mismatched
+define <1 x float> @test_copysign_v1f32_v1f64(<1 x float> %a, <1 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f32_v1f64:
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp0 = fptrunc <1 x double> %b to <1 x float>
+ %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %tmp0)
+ ret <1 x float> %r
+}
+
+declare <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b) #0
+
+;============ v1f64
+
+; WidenVecOp #1
+define <1 x double> @test_copysign_v1f64_v1f32(<1 x double> %a, <1 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f64_v1f32:
+; CHECK-NEXT: fcvt d1, s1
+; CHECK-NEXT: movi.2d v2, #0000000000000000
+; CHECK-NEXT: fneg.2d v2, v2
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp0 = fpext <1 x float> %b to <1 x double>
+ %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %tmp0)
+ ret <1 x double> %r
+}
+
+define <1 x double> @test_copysign_v1f64_v1f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f64_v1f64:
+; CHECK-NEXT: movi.2d v2, #0000000000000000
+; CHECK-NEXT: fneg.2d v2, v2
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+ %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b)
+ ret <1 x double> %r
+}
+
+declare <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) #0
+
+;============ v2f32
+
+define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f32_v2f32:
+; CHECK-NEXT: movi.2s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.8b v0, v1, v2
+; CHECK-NEXT: ret
+ %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
+ ret <2 x float> %r
+}
+
+define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f32_v2f64:
+; CHECK-NEXT: fcvtn v1.2s, v1.2d
+; CHECK-NEXT: movi.2s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.8b v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp0 = fptrunc <2 x double> %b to <2 x float>
+ %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0)
+ ret <2 x float> %r
+}
+
+declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
+
+;============ v4f32
+
+define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f32_v4f32:
+; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+ %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %r
+}
+
+; SplitVecOp #1
+define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f32_v4f64:
+; CHECK-NEXT: mov s3, v0[1]
+; CHECK-NEXT: mov d4, v1[1]
+; CHECK-NEXT: movi.4s v5, #0x80, lsl #24
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: mov s6, v0[2]
+; CHECK-NEXT: mov s7, v0[3]
+; CHECK-NEXT: fcvt s16, d2
+; CHECK-NEXT: bit.16b v0, v1, v5
+; CHECK-NEXT: bit.16b v6, v16, v5
+; CHECK-NEXT: fcvt s1, d4
+; CHECK-NEXT: bit.16b v3, v1, v5
+; CHECK-NEXT: mov d1, v2[1]
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: ins.s v0[1], v3[0]
+; CHECK-NEXT: ins.s v0[2], v6[0]
+; CHECK-NEXT: bit.16b v7, v1, v5
+; CHECK-NEXT: ins.s v0[3], v7[0]
+; CHECK-NEXT: ret
+ %tmp0 = fptrunc <4 x double> %b to <4 x float>
+ %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
+ ret <4 x float> %r
+}
+
+declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0
+
+;============ v2f64
+
+define <2 x double> @test_copysign_v2f64_v232(<2 x double> %a, <2 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f64_v232:
+; CHECK-NEXT: movi.2d v2, #0000000000000000
+; CHECK-NEXT: fneg.2d v2, v2
+; CHECK-NEXT: fcvtl v1.2d, v1.2s
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+ %tmp0 = fpext <2 x float> %b to <2 x double>
+ %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0)
+ ret <2 x double> %r
+}
+
+define <2 x double> @test_copysign_v2f64_v2f64(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f64_v2f64:
+; CHECK-NEXT: movi.2d v2, #0000000000000000
+; CHECK-NEXT: fneg.2d v2, v2
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+ %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
+ ret <2 x double> %r
+}
+
+declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
+
+;============ v4f64
+
+; SplitVecRes mismatched
+define <4 x double> @test_copysign_v4f64_v4f32(<4 x double> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f64_v4f32:
+; CHECK-NEXT: movi.2d v3, #0000000000000000
+; CHECK-NEXT: fcvtl2 v4.2d, v2.4s
+; CHECK-NEXT: fcvtl v2.2d, v2.2s
+; CHECK-NEXT: fneg.2d v3, v3
+; CHECK-NEXT: bit.16b v1, v4, v3
+; CHECK-NEXT: bit.16b v0, v2, v3
+; CHECK-NEXT: ret
+ %tmp0 = fpext <4 x float> %b to <4 x double>
+ %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0)
+ ret <4 x double> %r
+}
+
+; SplitVecRes same
+define <4 x double> @test_copysign_v4f64_v4f64(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f64_v4f64:
+; CHECK-NEXT: movi.2d v4, #0000000000000000
+; CHECK-NEXT: fneg.2d v4, v4
+; CHECK-NEXT: bit.16b v0, v2, v4
+; CHECK-NEXT: bit.16b v1, v3, v4
+; CHECK-NEXT: ret
+ %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
+ ret <4 x double> %r
+}
+
+declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/xbfiz.ll b/test/CodeGen/AArch64/xbfiz.ll
index f763400d7f6a..3211cc3f2ced 100644
--- a/test/CodeGen/AArch64/xbfiz.ll
+++ b/test/CodeGen/AArch64/xbfiz.ll
@@ -31,3 +31,33 @@ define i32 @ubfiz32(i32 %v) {
%shr = lshr i32 %shl, 2
ret i32 %shr
}
+
+define i64 @ubfiz64and(i64 %v) {
+; CHECK-LABEL: ubfiz64and:
+; CHECK: ubfiz x0, x0, #36, #11
+ %shl = shl i64 %v, 36
+ %and = and i64 %shl, 140668768878592
+ ret i64 %and
+}
+
+define i32 @ubfiz32and(i32 %v) {
+; CHECK-LABEL: ubfiz32and:
+; CHECK: ubfiz w0, w0, #6, #24
+ %shl = shl i32 %v, 6
+ %and = and i32 %shl, 1073741760
+ ret i32 %and
+}
+
+; Check that we don't generate a ubfiz if the lsl has more than one
+; use, since we'd just be replacing an and with a ubfiz.
+define i32 @noubfiz32(i32 %v) {
+; CHECK-LABEL: noubfiz32:
+; CHECK: lsl w[[REG1:[0-9]+]], w0, #6
+; CHECK: and w[[REG2:[0-9]+]], w[[REG1]], #0x3fffffc0
+; CHECK: add w0, w[[REG1]], w[[REG2]]
+; CHECK: ret
+ %shl = shl i32 %v, 6
+ %and = and i32 %shl, 1073741760
+ %add = add i32 %shl, %and
+ ret i32 %add
+}
diff --git a/test/CodeGen/AMDGPU/add.ll b/test/CodeGen/AMDGPU/add.ll
index 655e75dbc1a4..2ddfa9649ac9 100644
--- a/test/CodeGen/AMDGPU/add.ll
+++ b/test/CodeGen/AMDGPU/add.ll
@@ -5,7 +5,7 @@
;FUNC-LABEL: {{^}}test1:
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 [[REG:v[0-9]+]], vcc, {{v[0-9]+, v[0-9]+}}
;SI-NOT: [[REG]]
;SI: buffer_store_dword [[REG]],
define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
@@ -21,8 +21,8 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
@@ -39,10 +39,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/AMDGPU/address-space.ll b/test/CodeGen/AMDGPU/address-space.ll
index 4be8c5847529..3aa2f653bf9c 100644
--- a/test/CodeGen/AMDGPU/address-space.ll
+++ b/test/CodeGen/AMDGPU/address-space.ll
@@ -5,15 +5,11 @@
%struct.foo = type { [3 x float], [3 x float] }
-; FIXME: Extra V_MOV from SGPR to VGPR for second read. The address is
-; already in a VGPR after the first read.
-
; CHECK-LABEL: {{^}}do_as_ptr_calcs:
; CHECK: s_load_dword [[SREG1:s[0-9]+]],
-; CHECK: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG1]]
; CHECK: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:12
-; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG2]] offset:20
+; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:20
define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
entry:
%x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
diff --git a/test/CodeGen/AMDGPU/addrspacecast.ll b/test/CodeGen/AMDGPU/addrspacecast.ll
new file mode 100644
index 000000000000..61bcd4b3c093
--- /dev/null
+++ b/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -0,0 +1,66 @@
+; RUN: not llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s 2>&1 | FileCheck -check-prefix=ERROR %s
+
+; ERROR: unsupported addrspacecast not implemented
+
+; XUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
+; XUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
+; XUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
+; XUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
+
+; Disable optimizations in case there are optimizations added that
+; specialize away generic pointer accesses.
+
+; CHECK-LABEL: {{^}}branch_use_flat_i32:
+; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: s_endpgm
+define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
+entry:
+ %cmp = icmp ne i32 %c, 0
+ br i1 %cmp, label %local, label %global
+
+local:
+ %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
+ br label %end
+
+global:
+ %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
+ br label %end
+
+end:
+ %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
+ store i32 %x, i32 addrspace(4)* %fptr, align 4
+; %val = load i32, i32 addrspace(4)* %fptr, align 4
+; store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; TODO: This should not be zero when registers are used for small
+; scratch allocations again.
+
+; Check for prologue initializing special SGPRs pointing to scratch.
+; CHECK-LABEL: {{^}}store_flat_scratch:
+; CHECK: s_movk_i32 flat_scratch_lo, 0
+; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}}
+; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}}
+; CHECK: flat_store_dword
+; CHECK: s_barrier
+; CHECK: flat_load_dword
+define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
+ %alloca = alloca i32, i32 9, align 4
+ %x = call i32 @llvm.r600.read.tidig.x() #3
+ %pptr = getelementptr i32, i32* %alloca, i32 %x
+ %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
+ store i32 %x, i32 addrspace(4)* %fptr
+ ; Dummy call
+ call void @llvm.AMDGPU.barrier.local() #1
+ %reload = load i32, i32 addrspace(4)* %fptr, align 4
+ store i32 %reload, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+declare void @llvm.AMDGPU.barrier.local() #1
+declare i32 @llvm.r600.read.tidig.x() #3
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind convergent }
+attributes #3 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/and.ll b/test/CodeGen/AMDGPU/and.ll
index 5672d470bd7e..f83fb16101fb 100644
--- a/test/CodeGen/AMDGPU/and.ll
+++ b/test/CodeGen/AMDGPU/and.ll
@@ -2,6 +2,8 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+declare i32 @llvm.r600.read.tidig.x() #0
+
; FUNC-LABEL: {{^}}test2:
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -54,13 +56,80 @@ define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
ret void
}
-; FUNC-LABEL: {{^}}v_and_i32:
-; SI: v_and_b32
-define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
- %a = load i32, i32 addrspace(1)* %aptr, align 4
- %b = load i32, i32 addrspace(1)* %bptr, align 4
+; FIXME: We should really duplicate the constant so that the SALU use
+; can fold into the s_and_b32 and the VALU one is materialized
+; directly without copying from the SGPR.
+
+; Second use is a VGPR use of the constant.
+; FUNC-LABEL: {{^}}s_and_multi_use_constant_i32_0:
+; SI: s_mov_b32 [[K:s[0-9]+]], 0x12d687
+; SI-DAG: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]]
+; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
+; SI: buffer_store_dword [[VK]]
+define void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+ %and = and i32 %a, 1234567
+
+ ; Just to stop future replacement of copy to vgpr + store with VALU op.
+ %foo = add i32 %and, %b
+ store volatile i32 %foo, i32 addrspace(1)* %out
+ store volatile i32 1234567, i32 addrspace(1)* %out
+ ret void
+}
+
+; Second use is another SGPR use of the constant.
+; FUNC-LABEL: {{^}}s_and_multi_use_constant_i32_1:
+; SI: s_mov_b32 [[K:s[0-9]+]], 0x12d687
+; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]]
+; SI: s_add_i32
+; SI: s_add_i32 [[ADD:s[0-9]+]], s{{[0-9]+}}, [[K]]
+; SI: buffer_store_dword [[VK]]
+define void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+ %and = and i32 %a, 1234567
+ %foo = add i32 %and, 1234567
+ %bar = add i32 %foo, %b
+ store volatile i32 %bar, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr:
+; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+define void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep.a
+ %b = load i32, i32 addrspace(1)* %gep.b
%and = and i32 %a, %b
- store i32 %and, i32 addrspace(1)* %out, align 4
+ store i32 %and, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_and_i32_sgpr_vgpr:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]]
+; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
+; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
+define void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %b = load i32, i32 addrspace(1)* %gep.b
+ %and = and i32 %a, %b
+ store i32 %and, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_and_i32_vgpr_sgpr:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]]
+; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
+; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
+define void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep.a
+ %and = and i32 %a, %b
+ store i32 %and, i32 addrspace(1)* %gep.out
ret void
}
@@ -148,10 +217,24 @@ endif:
}
; FUNC-LABEL: {{^}}v_and_constant_i64:
-; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI-DAG: s_mov_b32 [[KLO:s[0-9]+]], 0xab19b207
+; SI-DAG: s_movk_i32 [[KHI:s[0-9]+]], 0x11e{{$}}
+; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KLO]], {{v[0-9]+}}
+; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KHI]], {{v[0-9]+}}
+; SI: buffer_store_dwordx2
define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64, i64 addrspace(1)* %aptr, align 8
+ %and = and i64 %a, 1231231234567
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FIXME: Should replace and 0
+; FUNC-LABEL: {{^}}v_and_i64_32_bit_constant:
+; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
+define void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 1234567
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@@ -294,3 +377,5 @@ define void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 a
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/test/CodeGen/AMDGPU/annotate-kernel-features.ll
new file mode 100644
index 000000000000..b116c72322bb
--- /dev/null
+++ b/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -0,0 +1,193 @@
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA -check-prefix=ALL %s
+; RUN: opt -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s
+
+declare i32 @llvm.r600.read.tgid.x() #0
+declare i32 @llvm.r600.read.tgid.y() #0
+declare i32 @llvm.r600.read.tgid.z() #0
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.r600.read.tidig.y() #0
+declare i32 @llvm.r600.read.tidig.z() #0
+
+declare i32 @llvm.r600.read.local.size.x() #0
+declare i32 @llvm.r600.read.local.size.y() #0
+declare i32 @llvm.r600.read.local.size.z() #0
+
+declare i32 @llvm.r600.read.global.size.x() #0
+declare i32 @llvm.r600.read.global.size.y() #0
+declare i32 @llvm.r600.read.global.size.z() #0
+
+
+; ALL: define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
+define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.tgid.x()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
+define void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.tgid.y()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
+define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tgid.y()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ %val1 = call i32 @llvm.r600.read.tgid.y()
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
+define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tgid.x()
+ %val1 = call i32 @llvm.r600.read.tgid.y()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
+define void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.tgid.z()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
+define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tgid.x()
+ %val1 = call i32 @llvm.r600.read.tgid.z()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
+define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tgid.y()
+ %val1 = call i32 @llvm.r600.read.tgid.z()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
+define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tgid.x()
+ %val1 = call i32 @llvm.r600.read.tgid.y()
+ %val2 = call i32 @llvm.r600.read.tgid.z()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ store volatile i32 %val2, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
+define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.tidig.x()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
+define void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.tidig.y()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
+define void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.tidig.z()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
+define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tidig.x()
+ %val1 = call i32 @llvm.r600.read.tgid.x()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
+define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tidig.y()
+ %val1 = call i32 @llvm.r600.read.tgid.y()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
+define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tidig.x()
+ %val1 = call i32 @llvm.r600.read.tidig.y()
+ %val2 = call i32 @llvm.r600.read.tidig.z()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ store volatile i32 %val2, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; ALL: define void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
+define void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
+ %val0 = call i32 @llvm.r600.read.tidig.x()
+ %val1 = call i32 @llvm.r600.read.tidig.y()
+ %val2 = call i32 @llvm.r600.read.tidig.z()
+ %val3 = call i32 @llvm.r600.read.tgid.x()
+ %val4 = call i32 @llvm.r600.read.tgid.y()
+ %val5 = call i32 @llvm.r600.read.tgid.z()
+ store volatile i32 %val0, i32 addrspace(1)* %ptr
+ store volatile i32 %val1, i32 addrspace(1)* %ptr
+ store volatile i32 %val2, i32 addrspace(1)* %ptr
+ store volatile i32 %val3, i32 addrspace(1)* %ptr
+ store volatile i32 %val4, i32 addrspace(1)* %ptr
+ store volatile i32 %val5, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; HSA: define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #10 {
+; NOHSA: define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
+define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.local.size.x()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; HSA: define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #10 {
+; NOHSA: define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
+define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.local.size.y()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+; HSA: define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #10 {
+; NOHSA: define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
+define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
+ %val = call i32 @llvm.r600.read.local.size.z()
+ store i32 %val, i32 addrspace(1)* %ptr
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+; HSA: attributes #0 = { nounwind readnone }
+; HSA: attributes #1 = { nounwind }
+; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
+; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
+; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
+; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
+; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
+; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
+; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
diff --git a/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll b/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
index 8c2a0795860d..f8a74222d566 100644
--- a/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
+++ b/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
@@ -2,7 +2,7 @@
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
declare i32 @llvm.SI.tid() nounwind readnone
-declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
+declare void @llvm.AMDGPU.barrier.local() nounwind convergent
; The required pointer calculations for the alloca'd actually requires
; an add and won't be folded into the addressing, which fails with a
@@ -14,7 +14,7 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
; FIXME: We end up with zero argument for ADD, because
; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
; with the appropriate offset. We should fold this into the store.
-; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
+; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 0, v{{[0-9]+}}
; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
;
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
@@ -22,7 +22,7 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
; to interpret:
; getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
-; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16
+; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16
; SI-PROMOTE: ds_write_b32 [[PTRREG]]
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%alloca = alloca [4 x i32], i32 4, align 16
@@ -35,7 +35,7 @@ define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 add
%alloca_ptr = getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
store i32 %result, i32* %alloca_ptr, align 4
; Dummy call
- call void @llvm.AMDGPU.barrier.local() nounwind noduplicate
+ call void @llvm.AMDGPU.barrier.local() nounwind convergent
%reload = load i32, i32* %alloca_ptr, align 4
%out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
diff --git a/test/CodeGen/AMDGPU/bitreverse.ll b/test/CodeGen/AMDGPU/bitreverse.ll
new file mode 100644
index 000000000000..0ef7d5184c1f
--- /dev/null
+++ b/test/CodeGen/AMDGPU/bitreverse.ll
@@ -0,0 +1,115 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i16 @llvm.bitreverse.i16(i16) #1
+declare i32 @llvm.bitreverse.i32(i32) #1
+declare i64 @llvm.bitreverse.i64(i64) #1
+
+declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) #1
+declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) #1
+
+declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) #1
+declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) #1
+
+declare i32 @llvm.AMDGPU.brev(i32) #1
+
+; FUNC-LABEL: {{^}}s_brev_i16:
+; SI: s_brev_b32
+define void @s_brev_i16(i16 addrspace(1)* noalias %out, i16 %val) #0 {
+ %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
+ store i16 %brev, i16 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_brev_i16:
+; SI: v_bfrev_b32_e32
+define void @v_brev_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) #0 {
+ %val = load i16, i16 addrspace(1)* %valptr
+ %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
+ store i16 %brev, i16 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_brev_i32:
+; SI: s_load_dword [[VAL:s[0-9]+]],
+; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
+define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) #0 {
+ %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
+ store i32 %brev, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_brev_i32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
+ store i32 %brev, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_brev_v2i32:
+; SI: s_brev_b32
+; SI: s_brev_b32
+define void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> %val) #0 {
+ %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
+ store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_brev_v2i32:
+; SI: v_bfrev_b32_e32
+; SI: v_bfrev_b32_e32
+define void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #0 {
+ %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr
+ %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
+ store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_brev_i64:
+define void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val) #0 {
+ %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
+ store i64 %brev, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_brev_i64:
+define void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #0 {
+ %val = load i64, i64 addrspace(1)* %valptr
+ %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
+ store i64 %brev, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_brev_v2i64:
+define void @s_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %val) #0 {
+ %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
+ store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_brev_v2i64:
+define void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #0 {
+ %val = load <2 x i64>, <2 x i64> addrspace(1)* %valptr
+ %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
+ store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}legacy_s_brev_i32:
+; SI: s_brev_b32
+define void @legacy_s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+ %brev = call i32 @llvm.AMDGPU.brev(i32 %val) #1
+ store i32 %brev, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/calling-conventions.ll b/test/CodeGen/AMDGPU/calling-conventions.ll
new file mode 100644
index 000000000000..57adc8be6a99
--- /dev/null
+++ b/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s
+
+; Make sure we don't crash or assert on spir_kernel calling convention.
+
+; SI-LABEL: {{^}}kernel:
+; SI: s_endpgm
+define spir_kernel void @kernel(i32 addrspace(1)* %out) {
+entry:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FIXME: This is treated like a kernel
+; SI-LABEL: {{^}}func:
+; SI: s_endpgm
+define spir_func void @func(i32 addrspace(1)* %out) {
+entry:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
new file mode 100644
index 000000000000..1c5bed3b905f
--- /dev/null
+++ b/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -0,0 +1,98 @@
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
+; OPT: getelementptr i32, i32 addrspace(4)* %in
+; OPT: br i1
+; OPT-NOT: ptrtoint
+
+; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
+; GCN: flat_load_dword
+; GCN: {{^}}BB0_2:
+define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(4)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(4)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
+; OPT: getelementptr i32, i32 addrspace(4)* %out,
+; OPT-CI-NOT: getelementptr
+; OPT: br i1
+
+; OPT-CI: ptrtoint
+; OPT-CI: add
+; OPT-CI: inttoptr
+; OPT: br label
+
+; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32:
+; CI: buffer_load_dword {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
+define void @test_sink_noop_addrspacecast_flat_to_global_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
+ %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(1)*
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(1)* %cast
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(4)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
+; OPT: getelementptr i32, i32 addrspace(4)* %out,
+; OPT-CI-NOT: getelementptr
+; OPT: br i1
+
+; OPT-CI: ptrtoint
+; OPT-CI: add
+; OPT-CI: inttoptr
+; OPT: br label
+
+; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_constant_i32:
+; CI: s_load_dword {{s[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+define void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
+ %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(2)*
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %cast
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(4)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index a68d110fdc96..698494265a7d 100644
--- a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -1,5 +1,7 @@
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
@@ -115,35 +117,6 @@ done:
ret void
}
-; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
-; OPT: getelementptr i32, i32 addrspace(4)* %in
-; OPT: br i1
-; OPT-NOT: ptrtoint
-
-; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
-; GCN: flat_load_dword
-; GCN: {{^}}BB4_2:
-
-define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
-entry:
- %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
- %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
- %tmp0 = icmp eq i32 %cond, 0
- br i1 %tmp0, label %endif, label %if
-
-if:
- %tmp1 = load i32, i32 addrspace(4)* %in.gep
- br label %endif
-
-endif:
- %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
- store i32 %x, i32 addrspace(4)* %out.gep
- br label %done
-
-done:
- ret void
-}
-
; OPT-LABEL: @test_sink_scratch_small_offset_i32(
; OPT-NOT: getelementptr [512 x i32]
; OPT: br i1
@@ -153,7 +126,7 @@ done:
; GCN: s_and_saveexec_b64
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
-; GCN: {{^}}BB5_2:
+; GCN: {{^}}BB4_2:
define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
entry:
%alloca = alloca [512 x i32], align 4
@@ -189,7 +162,7 @@ done:
; GCN: s_and_saveexec_b64
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
-; GCN: {{^}}BB6_2:
+; GCN: {{^}}BB5_2:
define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
entry:
%alloca = alloca [512 x i32], align 4
@@ -222,7 +195,7 @@ done:
; GCN: s_and_saveexec_b64
; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
-; GCN: {{^}}BB7_2:
+; GCN: {{^}}BB6_2:
define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) {
entry:
%offset.ext = zext i32 %offset to i64
@@ -246,3 +219,220 @@ done:
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
+
+
+
+; OPT-LABEL: @test_sink_constant_small_offset_i32
+; OPT-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
+; OPT-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
+; OPT-SI: getelementptr i32, i32 addrspace(2)*
+; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400
+
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
+; OPT-SI: getelementptr i32, i32 addrspace(2)*
+; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
+; GCN: s_and_saveexec_b64
+; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
+; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
+; OPT: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
+; GCN: s_and_saveexec_b64
+; GCN: s_add_u32
+; GCN: s_addc_u32
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}}
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
+
+; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}}
+; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
+
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
+; OPT-SI: getelementptr i32, i32 addrspace(2)*
+; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT-VI: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
+
+; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}}
+
+; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
+; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
+
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+ %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
+ %tmp0 = icmp eq i32 %cond, 0
+ br i1 %tmp0, label %endif, label %if
+
+if:
+ %tmp1 = load i32, i32 addrspace(2)* %in.gep
+ br label %endif
+
+endif:
+ %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+ store i32 %x, i32 addrspace(1)* %out.gep
+ br label %done
+
+done:
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll b/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
new file mode 100644
index 000000000000..1a37e3c75fa3
--- /dev/null
+++ b/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck -check-prefix=HSA-DEFAULT %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA-NODEFAULT %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri | FileCheck -check-prefix=NOHSA-DEFAULT %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT %s
+
+
+; HSA-DEFAULT: flat_store_dword
+; HSA-NODEFAULT: buffer_store_dword
+; NOHSA-DEFAULT: buffer_store_dword
+; NOHSA-NODEFAULT: flat_store_dword
+define void @test(i32 addrspace(1)* %out) {
+entry:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/ctpop64.ll b/test/CodeGen/AMDGPU/ctpop64.ll
index e1a0ee3ea217..ec2971e98032 100644
--- a/test/CodeGen/AMDGPU/ctpop64.ll
+++ b/test/CodeGen/AMDGPU/ctpop64.ll
@@ -36,6 +36,24 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
ret void
}
+; FIXME: or 0 should be replaxed with copy
+; FUNC-LABEL: {{^}}v_ctpop_i64_user:
+; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
+; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
+; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
+; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
+; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]]
+; GCN-DAG: v_or_b32_e64 v[[RESULT_HI:[0-9]+]], 0, s{{[0-9]+}}
+; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+; GCN: s_endpgm
+define void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind {
+ %val = load i64, i64 addrspace(1)* %in, align 8
+ %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
+ %or = or i64 %ctpop, %s.val
+ store i64 %or, i64 addrspace(1)* %out
+ ret void
+}
+
; FUNC-LABEL: {{^}}s_ctpop_v2i64:
; GCN: s_bcnt1_i32_b64
; GCN: s_bcnt1_i32_b64
@@ -99,8 +117,8 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs
; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
; VI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34
; GCN: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
-; GCN: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
-; GCN: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
+; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
+; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
; GCN: s_endpgm
define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
diff --git a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
index 3399d9da29e3..834922c62cbd 100644
--- a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
+++ b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
@@ -137,14 +137,8 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[HILOAD]]
; SI-NOT: bfe
; SI-NOT: lshr
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <8 x i8>, <8 x i8> addrspace(1)* %in, align 8
%cvt = uitofp <8 x i8> %load to <8 x float>
@@ -154,7 +148,7 @@ define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8>
; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
; SI: buffer_load_dword [[LOADREG:v[0-9]+]],
-; SI: v_add_i32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
+; SI: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]]
; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
; SI: buffer_store_dword [[CONV]],
define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
diff --git a/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll b/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
new file mode 100644
index 000000000000..171883e4c74b
--- /dev/null
+++ b/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+
+; The memory operand was dropped from the buffer_load_dword_offset
+; when replaced with the addr64 during operand legalization, resulting
+; in the global loads not being scheduled together.
+
+; GCN-LABEL: {{^}}reschedule_global_load_lds_store:
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: ds_write_b32
+; GCN: ds_write_b32
+; GCN: s_endpgm
+define void @reschedule_global_load_lds_store(i32 addrspace(1)* noalias %gptr0, i32 addrspace(1)* noalias %gptr1, i32 addrspace(3)* noalias %lptr, i32 %c) #0 {
+entry:
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #1
+ %idx = shl i32 %tid, 2
+ %gep0 = getelementptr i32, i32 addrspace(1)* %gptr0, i32 %idx
+ %gep1 = getelementptr i32, i32 addrspace(1)* %gptr1, i32 %idx
+ %gep2 = getelementptr i32, i32 addrspace(3)* %lptr, i32 %tid
+ %cmp0 = icmp eq i32 %c, 0
+ br i1 %cmp0, label %for.body, label %exit
+
+for.body: ; preds = %for.body, %entry
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %for.body ]
+ %gptr0.phi = phi i32 addrspace(1)* [ %gep0, %entry ], [ %gep0.inc, %for.body ]
+ %gptr1.phi = phi i32 addrspace(1)* [ %gep1, %entry ], [ %gep1.inc, %for.body ]
+ %lptr0.phi = phi i32 addrspace(3)* [ %gep2, %entry ], [ %gep2.inc, %for.body ]
+ %lptr1 = getelementptr i32, i32 addrspace(3)* %lptr0.phi, i32 1
+ %val0 = load i32, i32 addrspace(1)* %gep0
+ store i32 %val0, i32 addrspace(3)* %lptr0.phi
+ %val1 = load i32, i32 addrspace(1)* %gep1
+ store i32 %val1, i32 addrspace(3)* %lptr1
+ %gep0.inc = getelementptr i32, i32 addrspace(1)* %gptr0.phi, i32 4
+ %gep1.inc = getelementptr i32, i32 addrspace(1)* %gptr1.phi, i32 4
+ %gep2.inc = getelementptr i32, i32 addrspace(3)* %lptr0.phi, i32 4
+ %i.inc = add nsw i32 %i, 1
+ %cmp1 = icmp ne i32 %i, 256
+ br i1 %cmp1, label %for.body, label %exit
+
+exit: ; preds = %for.body, %entry
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
index 5e4654abd91b..e657991557e3 100644
--- a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
+++ b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
@@ -10,13 +10,13 @@ declare void @llvm.AMDGPU.barrier.local() #1
; CHECK: BB0_1:
; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]],
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]]
-; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], vcc, 4, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR4]]
-; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], vcc, 0x80, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]]
-; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], vcc, 0x84, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x84]]
-; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, 0x100, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1
@@ -66,5 +66,5 @@ for.end: ; preds = %for.body
}
attributes #0 = { nounwind readnone }
-attributes #1 = { noduplicate nounwind }
+attributes #1 = { convergent nounwind }
attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/AMDGPU/ds-sub-offset.ll b/test/CodeGen/AMDGPU/ds-sub-offset.ll
new file mode 100644
index 000000000000..7d6eddb01993
--- /dev/null
+++ b/test/CodeGen/AMDGPU/ds-sub-offset.ll
@@ -0,0 +1,125 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+
+declare void @llvm.AMDGPU.barrier.local() #2
+declare i32 @llvm.r600.read.tidig.x() #0
+
+@lds.obj = addrspace(3) global [256 x i32] undef, align 4
+
+; GCN-LABEL: {{^}}write_ds_sub0_offset0_global:
+; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 2, v0
+; GCN: v_sub_i32_e32 [[BASEPTR:v[0-9]+]], vcc, 0, [[SHL]]
+; GCN: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b
+; GCN: ds_write_b32 [[BASEPTR]], [[VAL]] offset:12
+define void @write_ds_sub0_offset0_global() #0 {
+entry:
+ %x.i = call i32 @llvm.r600.read.tidig.x() #1
+ %sub1 = sub i32 0, %x.i
+ %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3
+ store i32 123, i32 addrspace(3)* %arrayidx
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
+; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
+; GCN: ds_write_b8 [[NEG]], [[K]] offset:65535
+define void @add_x_shl_neg_to_sub_max_offset() #1 {
+ %x.i = call i32 @llvm.r600.read.tidig.x() #0
+ %neg = sub i32 0, %x.i
+ %shl = shl i32 %neg, 2
+ %add = add i32 65535, %shl
+ %ptr = inttoptr i32 %add to i8 addrspace(3)*
+ store i8 13, i8 addrspace(3)* %ptr
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset_p1:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
+; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x10000, [[SCALED]]
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
+; GCN: ds_write_b8 [[NEG]], [[K]]{{$}}
+define void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
+ %x.i = call i32 @llvm.r600.read.tidig.x() #0
+ %neg = sub i32 0, %x.i
+ %shl = shl i32 %neg, 2
+ %add = add i32 65536, %shl
+ %ptr = inttoptr i32 %add to i8 addrspace(3)*
+ store i8 13, i8 addrspace(3)* %ptr
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
+; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
+; GCN-NOT: v_sub
+; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
+; GCN-NOT: v_sub
+; GCN: ds_write_b32 [[NEG]], [[K]] offset:456{{$}}
+; GCN: s_endpgm
+define void @add_x_shl_neg_to_sub_multi_use() #1 {
+ %x.i = call i32 @llvm.r600.read.tidig.x() #0
+ %neg = sub i32 0, %x.i
+ %shl = shl i32 %neg, 2
+ %add0 = add i32 123, %shl
+ %add1 = add i32 456, %shl
+ %ptr0 = inttoptr i32 %add0 to i32 addrspace(3)*
+ store volatile i32 13, i32 addrspace(3)* %ptr0
+ %ptr1 = inttoptr i32 %add1 to i32 addrspace(3)*
+ store volatile i32 13, i32 addrspace(3)* %ptr1
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use_same_offset:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
+; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
+; GCN-NOT: v_sub
+; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
+; GCN-NOT: v_sub
+; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
+; GCN: s_endpgm
+define void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
+ %x.i = call i32 @llvm.r600.read.tidig.x() #0
+ %neg = sub i32 0, %x.i
+ %shl = shl i32 %neg, 2
+ %add = add i32 123, %shl
+ %ptr = inttoptr i32 %add to i32 addrspace(3)*
+ store volatile i32 13, i32 addrspace(3)* %ptr
+ store volatile i32 13, i32 addrspace(3)* %ptr
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
+; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
+; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset0:254 offset1:255
+define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
+ %x.i = call i32 @llvm.r600.read.tidig.x() #0
+ %neg = sub i32 0, %x.i
+ %shl = shl i32 %neg, 2
+ %add = add i32 1019, %shl
+ %ptr = inttoptr i32 %add to i64 addrspace(3)*
+ store i64 123, i64 addrspace(3)* %ptr, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
+; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x3fc, [[SCALED]]
+; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}}
+define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 {
+ %x.i = call i32 @llvm.r600.read.tidig.x() #0
+ %neg = sub i32 0, %x.i
+ %shl = shl i32 %neg, 2
+ %add = add i32 1020, %shl
+ %ptr = inttoptr i32 %add to i64 addrspace(3)*
+ store i64 123, i64 addrspace(3)* %ptr, align 4
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind convergent }
diff --git a/test/CodeGen/AMDGPU/ds_read2.ll b/test/CodeGen/AMDGPU/ds_read2.ll
index ec04f8b1acd6..5170d9c82712 100644
--- a/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/test/CodeGen/AMDGPU/ds_read2.ll
@@ -216,10 +216,8 @@ define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x f
ret void
}
-; We should be able to merge in this case, but probably not worth the effort.
-; SI-NOT: ds_read2_b32
-; SI: ds_read_b32
-; SI: ds_read_b32
+; SI-LABEL: {{^}}read2_ptr_is_subreg_f32:
+; SI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:8{{$}}
; SI: s_endpgm
define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -507,9 +505,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.y() #1
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
declare void @llvm.AMDGPU.barrier.local() #2
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/ds_read2_superreg.ll b/test/CodeGen/AMDGPU/ds_read2_superreg.ll
index 842c2d8bc339..0061aaf2cdbd 100644
--- a/test/CodeGen/AMDGPU/ds_read2_superreg.ll
+++ b/test/CodeGen/AMDGPU/ds_read2_superreg.ll
@@ -35,14 +35,11 @@ define void @simple_read2_v2f32_superreg(<2 x float> addrspace(1)* %out) #0 {
ret void
}
-; FIXME: Shuffling to new superregister
; CI-LABEL: {{^}}simple_read2_v4f32_superreg_align4:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_Y:[0-9]+]]:[[REG_X:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
-; CI-DAG: v_mov_b32_e32 v[[COPY_REG_Y:[0-9]+]], v[[REG_Y]]
-; CI-DAG: v_mov_b32_e32 v[[COPY_REG_Z:[0-9]+]], v[[REG_Z]]
-; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[COPY_REG_Z]], v[[REG_X]]
-; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[REG_W]], v[[COPY_REG_Y]]
+; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
+; CI-DAG: ds_read2_b32 v{{\[}}[[REG_Z:[0-9]+]]:[[REG_W:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
+; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[REG_Z]], v[[REG_X]]
+; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[REG_W]], v[[REG_Y]]
; CI: v_add_f32_e32 v[[ADD2:[0-9]+]], v[[ADD1]], v[[ADD0]]
; CI: buffer_store_dword v[[ADD2]]
; CI: s_endpgm
@@ -88,8 +85,13 @@ define void @simple_read2_v3f32_superreg_align4(float addrspace(1)* %out) #0 {
}
; CI-LABEL: {{^}}simple_read2_v4f32_superreg_align8:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
+; CI-DAG: ds_read2_b64 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
+
+; FIXME: These moves shouldn't be necessary, it should be able to
+; store the same register if offset1 was the non-zero offset.
+
+; CI: v_mov_b32
+; CI: v_mov_b32
; CI: buffer_store_dwordx4
; CI: s_endpgm
define void @simple_read2_v4f32_superreg_align8(<4 x float> addrspace(1)* %out) #0 {
@@ -102,8 +104,9 @@ define void @simple_read2_v4f32_superreg_align8(<4 x float> addrspace(1)* %out)
}
; CI-LABEL: {{^}}simple_read2_v4f32_superreg:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
+; CI: ds_read2_b64 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
+; CI: v_mov_b32
+; CI: v_mov_b32
; CI: buffer_store_dwordx4
; CI: s_endpgm
define void @simple_read2_v4f32_superreg(<4 x float> addrspace(1)* %out) #0 {
@@ -115,19 +118,16 @@ define void @simple_read2_v4f32_superreg(<4 x float> addrspace(1)* %out) #0 {
ret void
}
+; FIXME: Extra moves shuffling superregister
; CI-LABEL: {{^}}simple_read2_v8f32_superreg:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:7 offset1:6{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT5:[0-9]+]]:[[REG_ELT4:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:5 offset1:4{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT3:[0-9]+]]:[[REG_ELT2:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT1:[0-9]+]]:[[REG_ELT0:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
+; CI: ds_read2_b64 v{{\[}}[[REG_ELT3:[0-9]+]]:[[REG_ELT7:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:3{{$}}
+; CI: v_mov_b32
+; CI: v_mov_b32
+; CI: ds_read2_b64 v{{\[}}[[REG_ELT6:[0-9]+]]:[[REG_ELT5:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2{{$}}
+; CI: v_mov_b32
+; CI: v_mov_b32
+; CI: buffer_store_dwordx4
+; CI: buffer_store_dwordx4
; CI: s_endpgm
define void @simple_read2_v8f32_superreg(<8 x float> addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -138,33 +138,24 @@ define void @simple_read2_v8f32_superreg(<8 x float> addrspace(1)* %out) #0 {
ret void
}
+; FIXME: Extra moves shuffling superregister
; CI-LABEL: {{^}}simple_read2_v16f32_superreg:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:15 offset1:14{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:13 offset1:12{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:11 offset1:10{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:9 offset1:8{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:7 offset1:6{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT5:[0-9]+]]:[[REG_ELT4:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:5 offset1:4{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT3:[0-9]+]]:[[REG_ELT2:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT1:[0-9]+]]:[[REG_ELT0:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
+; CI: ds_read2_b64 v{{\[}}[[REG_ELT11:[0-9]+]]:[[REG_ELT15:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:3{{$}}
+; CI: v_mov_b32
+; CI: v_mov_b32
+; CI: ds_read2_b64 v{{\[}}[[REG_ELT14:[0-9]+]]:[[REG_ELT13:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:5 offset1:7{{$}}
+; CI: ds_read2_b64 v{{\[}}[[REG_ELT14:[0-9]+]]:[[REG_ELT13:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:6 offset1:4{{$}}
+; CI: v_mov_b32
+; CI: v_mov_b32
+; CI: ds_read2_b64 v{{\[}}[[REG_ELT12:[0-9]+]]:[[REG_ELT10:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2{{$}}
+; CI: v_mov_b32
+; CI: v_mov_b32
; CI: s_waitcnt lgkmcnt(0)
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
+; CI: buffer_store_dwordx4
+; CI: buffer_store_dwordx4
+; CI: buffer_store_dwordx4
+; CI: buffer_store_dwordx4
; CI: s_endpgm
define void @simple_read2_v16f32_superreg(<16 x float> addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -238,9 +229,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.y() #1
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
declare void @llvm.AMDGPU.barrier.local() #2
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/ds_read2st64.ll b/test/CodeGen/AMDGPU/ds_read2st64.ll
index e2e441214b4a..4a0571ea16f2 100644
--- a/test/CodeGen/AMDGPU/ds_read2st64.ll
+++ b/test/CodeGen/AMDGPU/ds_read2st64.ll
@@ -65,7 +65,7 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add
; SI-LABEL: @simple_read2st64_f32_over_max_offset
; SI-NOT: ds_read2st64_b32
-; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
; SI: s_endpgm
@@ -197,7 +197,7 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a
; SI-LABEL: @simple_read2st64_f64_over_max_offset
; SI-NOT: ds_read2st64_b64
-; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
; SI: s_endpgm
@@ -264,9 +264,5 @@ declare i32 @llvm.r600.read.tidig.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.y() #1
-; Function Attrs: noduplicate nounwind
-declare void @llvm.AMDGPU.barrier.local() #2
-
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
diff --git a/test/CodeGen/AMDGPU/ds_write2.ll b/test/CodeGen/AMDGPU/ds_write2.ll
index d4973e377b59..9d3a293f3b89 100644
--- a/test/CodeGen/AMDGPU/ds_write2.ll
+++ b/test/CodeGen/AMDGPU/ds_write2.ll
@@ -345,8 +345,9 @@ define void @store_constant_disjoint_offsets() {
; SI-LABEL: @store_misaligned64_constant_offsets
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
-; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
+; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+; SI: s_endpgm
define void @store_misaligned64_constant_offsets() {
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
@@ -430,9 +431,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.y() #1
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
declare void @llvm.AMDGPU.barrier.local() #2
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/ds_write2st64.ll b/test/CodeGen/AMDGPU/ds_write2st64.ll
index 358aa6a9e363..5a1024ccf6d7 100644
--- a/test/CodeGen/AMDGPU/ds_write2st64.ll
+++ b/test/CodeGen/AMDGPU/ds_write2st64.ll
@@ -109,9 +109,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.y() #1
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
declare void @llvm.AMDGPU.barrier.local() #2
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/dynamic_stackalloc.ll b/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
new file mode 100644
index 000000000000..f4409a0984a9
--- /dev/null
+++ b/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
@@ -0,0 +1,11 @@
+; RUN: not llc -march=amdgcn -mcpu=tahiti -mattr=+promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s
+; RUN: not llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s
+; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported dynamic alloca in test_dynamic_stackalloc
+
+define void @test_dynamic_stackalloc(i32 addrspace(1)* %out, i32 %n) {
+ %alloca = alloca i32, i32 %n
+ store volatile i32 0, i32* %alloca
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/extract-vector-elt-i64.ll b/test/CodeGen/AMDGPU/extract-vector-elt-i64.ll
new file mode 100644
index 000000000000..e32559139623
--- /dev/null
+++ b/test/CodeGen/AMDGPU/extract-vector-elt-i64.ll
@@ -0,0 +1,43 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; How the replacement of i64 stores with v2i32 stores resulted in
+; breaking other users of the bitcast if they already existed
+
+; GCN-LABEL: {{^}}extract_vector_elt_select_error:
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: buffer_store_dwordx2
+define void @extract_vector_elt_select_error(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %val) nounwind {
+ %vec = bitcast i64 %val to <2 x i32>
+ %elt0 = extractelement <2 x i32> %vec, i32 0
+ %elt1 = extractelement <2 x i32> %vec, i32 1
+
+ store volatile i32 %elt0, i32 addrspace(1)* %out
+ store volatile i32 %elt1, i32 addrspace(1)* %out
+ store volatile i64 %val, i64 addrspace(1)* %in
+ ret void
+}
+
+
+define void @extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo) nounwind {
+ %p0 = extractelement <2 x i64> %foo, i32 0
+ %p1 = extractelement <2 x i64> %foo, i32 1
+ %out1 = getelementptr i64, i64 addrspace(1)* %out, i32 1
+ store volatile i64 %p1, i64 addrspace(1)* %out
+ store volatile i64 %p0, i64 addrspace(1)* %out1
+ ret void
+}
+
+define void @dyn_extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo, i32 %elt) nounwind {
+ %dynelt = extractelement <2 x i64> %foo, i32 %elt
+ store volatile i64 %dynelt, i64 addrspace(1)* %out
+ ret void
+}
+
+define void @dyn_extract_vector_elt_v2i64_2(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %foo, i32 %elt, <2 x i64> %arst) nounwind {
+ %load = load volatile <2 x i64>, <2 x i64> addrspace(1)* %foo
+ %or = or <2 x i64> %load, %arst
+ %dynelt = extractelement <2 x i64> %or, i32 %elt
+ store volatile i64 %dynelt, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/fadd64.ll b/test/CodeGen/AMDGPU/fadd64.ll
index 485c55870c47..19c17289da3d 100644
--- a/test/CodeGen/AMDGPU/fadd64.ll
+++ b/test/CodeGen/AMDGPU/fadd64.ll
@@ -1,14 +1,44 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
-; CHECK: {{^}}fadd_f64:
+; CHECK-LABEL: {{^}}v_fadd_f64:
; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
+define void @v_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+ double addrspace(1)* %in2) {
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
+ %r2 = fadd double %r0, %r1
+ store double %r2, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}s_fadd_f64:
+; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @s_fadd_f64(double addrspace(1)* %out, double %r0, double %r1) {
+ %r2 = fadd double %r0, %r1
+ store double %r2, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}v_fadd_v2f64:
+; CHECK: v_add_f64
+; CHECK: v_add_f64
+; CHECK: buffer_store_dwordx4
+define void @v_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
+ <2 x double> addrspace(1)* %in2) {
+ %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
+ %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
+ %r2 = fadd <2 x double> %r0, %r1
+ store <2 x double> %r2, <2 x double> addrspace(1)* %out
+ ret void
+}
-define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
- double addrspace(1)* %in2) {
- %r0 = load double, double addrspace(1)* %in1
- %r1 = load double, double addrspace(1)* %in2
- %r2 = fadd double %r0, %r1
- store double %r2, double addrspace(1)* %out
- ret void
+; CHECK-LABEL: {{^}}s_fadd_v2f64:
+; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: buffer_store_dwordx4
+define void @s_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %r0, <2 x double> %r1) {
+ %r2 = fadd <2 x double> %r0, %r1
+ store <2 x double> %r2, <2 x double> addrspace(1)* %out
+ ret void
}
diff --git a/test/CodeGen/AMDGPU/fceil64.ll b/test/CodeGen/AMDGPU/fceil64.ll
index e8c34f0141e4..c8ef5b101c4d 100644
--- a/test/CodeGen/AMDGPU/fceil64.ll
+++ b/test/CodeGen/AMDGPU/fceil64.ll
@@ -17,12 +17,12 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
; SI: s_lshr_b64
; SI: s_not_b64
; SI: s_and_b64
-; SI: cmp_gt_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
-; SI: cmp_lt_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
+; SI-DAG: cmp_gt_i32
+; SI-DAG: cndmask_b32
+; SI-DAG: cndmask_b32
+; SI-DAG: cmp_lt_i32
+; SI-DAG: cndmask_b32
+; SI-DAG: cndmask_b32
; SI-DAG: v_cmp_lt_f64
; SI-DAG: v_cmp_lg_f64
; SI: s_and_b64
diff --git a/test/CodeGen/AMDGPU/fcmp.ll b/test/CodeGen/AMDGPU/fcmp.ll
index 5207ab57bade..97d954fcc3c2 100644
--- a/test/CodeGen/AMDGPU/fcmp.ll
+++ b/test/CodeGen/AMDGPU/fcmp.ll
@@ -20,7 +20,7 @@ entry:
; CHECK: {{^}}fcmp_br:
; CHECK: SET{{[N]*}}E_DX10 * T{{[0-9]+\.[XYZW],}}
-; CHECK-NEXT {{[0-9]+(5.0}}
+; CHECK-NEXT: {{[0-9]+\(5.0}}
define void @fcmp_br(i32 addrspace(1)* %out, float %in) {
entry:
diff --git a/test/CodeGen/AMDGPU/flat-address-space.ll b/test/CodeGen/AMDGPU/flat-address-space.ll
index 8ceca078f2d6..86e0c07323bb 100644
--- a/test/CodeGen/AMDGPU/flat-address-space.ll
+++ b/test/CodeGen/AMDGPU/flat-address-space.ll
@@ -7,39 +7,16 @@
; specialize away generic pointer accesses.
-; CHECK-LABEL: {{^}}branch_use_flat_i32:
-; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
-; CHECK: s_endpgm
-define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
-entry:
- %cmp = icmp ne i32 %c, 0
- br i1 %cmp, label %local, label %global
-
-local:
- %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
- br label %end
-
-global:
- %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
- br label %end
-
-end:
- %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
- store i32 %x, i32 addrspace(4)* %fptr, align 4
-; %val = load i32, i32 addrspace(4)* %fptr, align 4
-; store i32 %val, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-
-
; These testcases might become useless when there are optimizations to
; remove generic pointers.
; CHECK-LABEL: {{^}}store_flat_i32:
-; CHECK: v_mov_b32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
-; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
-; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
+; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
+; CHECK-DAG: s_load_dword s[[SDATA:[0-9]+]],
+; CHECK: s_waitcnt lgkmcnt(0)
+; CHECK-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]]
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
@@ -83,7 +60,7 @@ define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
-; CHECK-LABEL @load_flat_i32:
+; CHECK-LABEL: load_flat_i32:
; CHECK: flat_load_dword
define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
@@ -92,7 +69,7 @@ define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noa
ret void
}
-; CHECK-LABEL @load_flat_i64:
+; CHECK-LABEL: load_flat_i64:
; CHECK: flat_load_dwordx2
define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
@@ -101,7 +78,7 @@ define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
ret void
}
-; CHECK-LABEL @load_flat_v4i32:
+; CHECK-LABEL: load_flat_v4i32:
; CHECK: flat_load_dwordx4
define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
@@ -110,7 +87,7 @@ define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> add
ret void
}
-; CHECK-LABEL @sextload_flat_i8:
+; CHECK-LABEL: sextload_flat_i8:
; CHECK: flat_load_sbyte
define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
@@ -120,7 +97,7 @@ define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n
ret void
}
-; CHECK-LABEL @zextload_flat_i8:
+; CHECK-LABEL: zextload_flat_i8:
; CHECK: flat_load_ubyte
define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
@@ -130,7 +107,7 @@ define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n
ret void
}
-; CHECK-LABEL @sextload_flat_i16:
+; CHECK-LABEL: sextload_flat_i16:
; CHECK: flat_load_sshort
define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
@@ -140,7 +117,7 @@ define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
ret void
}
-; CHECK-LABEL @zextload_flat_i16:
+; CHECK-LABEL: zextload_flat_i16:
; CHECK: flat_load_ushort
define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
@@ -150,35 +127,9 @@ define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
ret void
}
-
-
-; TODO: This should not be zero when registers are used for small
-; scratch allocations again.
-
-; Check for prologue initializing special SGPRs pointing to scratch.
-; CHECK-LABEL: {{^}}store_flat_scratch:
-; CHECK: s_movk_i32 flat_scratch_lo, 0
-; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}}
-; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}}
-; CHECK: flat_store_dword
-; CHECK: s_barrier
-; CHECK: flat_load_dword
-define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
- %alloca = alloca i32, i32 9, align 4
- %x = call i32 @llvm.r600.read.tidig.x() #3
- %pptr = getelementptr i32, i32* %alloca, i32 %x
- %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
- store i32 %x, i32 addrspace(4)* %fptr
- ; Dummy call
- call void @llvm.AMDGPU.barrier.local() #1
- %reload = load i32, i32 addrspace(4)* %fptr, align 4
- store i32 %reload, i32 addrspace(1)* %out, align 4
- ret void
-}
-
declare void @llvm.AMDGPU.barrier.local() #1
declare i32 @llvm.r600.read.tidig.x() #3
attributes #0 = { nounwind }
-attributes #1 = { nounwind noduplicate }
+attributes #1 = { nounwind convergent }
attributes #3 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/test/CodeGen/AMDGPU/flat-scratch-reg.ll
new file mode 100644
index 000000000000..e2ae3353ae1d
--- /dev/null
+++ b/test/CodeGen/AMDGPU/flat-scratch-reg.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=CI
+; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI
+
+; GCN-LABEL: {{^}}no_vcc_no_flat:
+; GCN: ; NumSgprs: 8
+define void @no_vcc_no_flat() {
+entry:
+ call void asm sideeffect "", "~{SGPR7}"()
+ ret void
+}
+
+; GCN-LABEL: {{^}}vcc_no_flat:
+; GCN: ; NumSgprs: 10
+define void @vcc_no_flat() {
+entry:
+ call void asm sideeffect "", "~{SGPR7},~{VCC}"()
+ ret void
+}
+
+; GCN-LABEL: {{^}}no_vcc_flat:
+; CI: ; NumSgprs: 12
+; VI: ; NumSgprs: 14
+define void @no_vcc_flat() {
+entry:
+ call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"()
+ ret void
+}
+
+; GCN-LABEL: {{^}}vcc_flat:
+; CI: ; NumSgprs: 12
+; VI: ; NumSgprs: 14
+define void @vcc_flat() {
+entry:
+ call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"()
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/fma-combine.ll b/test/CodeGen/AMDGPU/fma-combine.ll
index bd574b877117..6f3437048ed8 100644
--- a/test/CodeGen/AMDGPU/fma-combine.ll
+++ b/test/CodeGen/AMDGPU/fma-combine.ll
@@ -364,5 +364,205 @@ define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %
ret void
}
+;
+; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
+;
+
+; FUNC-LABEL: {{^}}test_f32_mul_add_x_one_y:
+; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]]
+define void @test_f32_mul_add_x_one_y(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %x = load float, float addrspace(1)* %in1
+ %y = load float, float addrspace(1)* %in2
+ %a = fadd float %x, 1.0
+ %m = fmul float %a, %y
+ store float %m, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_one:
+; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]]
+define void @test_f32_mul_y_add_x_one(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %x = load float, float addrspace(1)* %in1
+ %y = load float, float addrspace(1)* %in2
+ %a = fadd float %x, 1.0
+ %m = fmul float %y, %a
+ store float %m, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_add_x_negone_y:
+; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
+define void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %x = load float, float addrspace(1)* %in1
+ %y = load float, float addrspace(1)* %in2
+ %a = fadd float %x, -1.0
+ %m = fmul float %a, %y
+ store float %m, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_negone:
+; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
+define void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %x = load float, float addrspace(1)* %in1
+ %y = load float, float addrspace(1)* %in2
+ %a = fadd float %x, -1.0
+ %m = fmul float %y, %a
+ store float %m, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_sub_one_x_y:
+; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]]
+define void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %x = load float, float addrspace(1)* %in1
+ %y = load float, float addrspace(1)* %in2
+ %s = fsub float 1.0, %x
+ %m = fmul float %s, %y
+ store float %m, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_y_sub_one_x:
+; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]]
+define void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %x = load float, float addrspace(1)* %in1
+ %y = load float, float addrspace(1)* %in2
+ %s = fsub float 1.0, %x
+ %m = fmul float %y, %s
+ store float %m, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_sub_negone_x_y:
+; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]]
+define void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %x = load float, float addrspace(1)* %in1
+ %y = load float, float addrspace(1)* %in2
+ %s = fsub float -1.0, %x
+ %m = fmul float %s, %y
+ store float %m, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_y_sub_negone_x:
+; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]]
+define void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %x = load float, float addrspace(1)* %in1
+ %y = load float, float addrspace(1)* %in2
+ %s = fsub float -1.0, %x
+ %m = fmul float %y, %s
+ store float %m, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_sub_x_one_y:
+; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
+define void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %x = load float, float addrspace(1)* %in1
+ %y = load float, float addrspace(1)* %in2
+ %s = fsub float %x, 1.0
+ %m = fmul float %s, %y
+ store float %m, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_one:
+; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
+define void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %x = load float, float addrspace(1)* %in1
+ %y = load float, float addrspace(1)* %in2
+ %s = fsub float %x, 1.0
+ %m = fmul float %y, %s
+ store float %m, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_sub_x_negone_y:
+; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]]
+define void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %x = load float, float addrspace(1)* %in1
+ %y = load float, float addrspace(1)* %in2
+ %s = fsub float %x, -1.0
+ %m = fmul float %s, %y
+ store float %m, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_negone:
+; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]]
+define void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2) {
+ %x = load float, float addrspace(1)* %in1
+ %y = load float, float addrspace(1)* %in2
+ %s = fsub float %x, -1.0
+ %m = fmul float %y, %s
+ store float %m, float addrspace(1)* %out
+ ret void
+}
+
+;
+; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
+;
+
+; FUNC-LABEL: {{^}}test_f32_interp:
+; SI: v_mad_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]]
+; SI: v_mac_f32_e32 [[VR]], [[VT]], [[VX:v[0-9]]]
+define void @test_f32_interp(float addrspace(1)* %out,
+ float addrspace(1)* %in1,
+ float addrspace(1)* %in2,
+ float addrspace(1)* %in3) {
+ %x = load float, float addrspace(1)* %in1
+ %y = load float, float addrspace(1)* %in2
+ %t = load float, float addrspace(1)* %in3
+ %t1 = fsub float 1.0, %t
+ %tx = fmul float %x, %t
+ %ty = fmul float %y, %t1
+ %r = fadd float %tx, %ty
+ store float %r, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_f64_interp:
+; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VY]]
+; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VR]]
+define void @test_f64_interp(double addrspace(1)* %out,
+ double addrspace(1)* %in1,
+ double addrspace(1)* %in2,
+ double addrspace(1)* %in3) {
+ %x = load double, double addrspace(1)* %in1
+ %y = load double, double addrspace(1)* %in2
+ %t = load double, double addrspace(1)* %in3
+ %t1 = fsub double 1.0, %t
+ %tx = fmul double %x, %t
+ %ty = fmul double %y, %t1
+ %r = fadd double %tx, %ty
+ store double %r, double addrspace(1)* %out
+ ret void
+}
+
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/fmax_legacy.ll b/test/CodeGen/AMDGPU/fmax_legacy.ll
index 413957d2982a..d374fb67350c 100644
--- a/test/CodeGen/AMDGPU/fmax_legacy.ll
+++ b/test/CodeGen/AMDGPU/fmax_legacy.ll
@@ -87,6 +87,46 @@ define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(
ret void
}
+; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32:
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; EG: MAX
+define void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
+
+ %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
+ %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
+
+ %cmp = fcmp ogt <1 x float> %a, %b
+ %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
+ store <1 x float> %val, <1 x float> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32:
+; SI-SAFE: v_max_legacy_f32_e32
+; SI-SAFE: v_max_legacy_f32_e32
+; SI-SAFE: v_max_legacy_f32_e32
+; SI-NONAN: v_max_f32_e32
+; SI-NONAN: v_max_f32_e32
+; SI-NONAN: v_max_f32_e32
+define void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
+
+ %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
+ %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
+
+ %cmp = fcmp ogt <3 x float> %a, %b
+ %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
+ store <3 x float> %val, <3 x float> addrspace(1)* %out
+ ret void
+}
; FUNC-LABEL: @test_fmax_legacy_ogt_f32_multi_use
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
diff --git a/test/CodeGen/AMDGPU/fmin_legacy.ll b/test/CodeGen/AMDGPU/fmin_legacy.ll
index 6a625c239d76..52fc3d0d251a 100644
--- a/test/CodeGen/AMDGPU/fmin_legacy.ll
+++ b/test/CodeGen/AMDGPU/fmin_legacy.ll
@@ -96,6 +96,69 @@ define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(
ret void
}
+; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v1f32:
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+define void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
+
+ %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
+ %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
+
+ %cmp = fcmp ult <1 x float> %a, %b
+ %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
+ store <1 x float> %val, <1 x float> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v2f32:
+; SI: buffer_load_dwordx2
+; SI: buffer_load_dwordx2
+; SI-SAFE: v_min_legacy_f32_e32
+; SI-SAFE: v_min_legacy_f32_e32
+
+; SI-NONAN: v_min_f32_e32
+; SI-NONAN: v_min_f32_e32
+define void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1
+
+ %a = load <2 x float>, <2 x float> addrspace(1)* %gep.0
+ %b = load <2 x float>, <2 x float> addrspace(1)* %gep.1
+
+ %cmp = fcmp ult <2 x float> %a, %b
+ %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
+ store <2 x float> %val, <2 x float> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32:
+; SI-SAFE: v_min_legacy_f32_e32
+; SI-SAFE: v_min_legacy_f32_e32
+; SI-SAFE: v_min_legacy_f32_e32
+
+; SI-NONAN: v_min_f32_e32
+; SI-NONAN: v_min_f32_e32
+; SI-NONAN: v_min_f32_e32
+define void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
+
+ %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
+ %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
+
+ %cmp = fcmp ult <3 x float> %a, %b
+ %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
+ store <3 x float> %val, <3 x float> addrspace(1)* %out
+ ret void
+}
+
; FUNC-LABEL: @test_fmin_legacy_ole_f32_multi_use
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
diff --git a/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
new file mode 100644
index 000000000000..1ee92b2f7c08
--- /dev/null
+++ b/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
@@ -0,0 +1,102 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
+; make add an instruction if the fadd has more than one use.
+
+declare float @llvm.fabs.f32(float) #1
+
+; GCN-LABEL: {{^}}multiple_fadd_use_test:
+; GCN: v_max_legacy_f32_e64 [[A16:v[0-9]+]],
+; GCN: v_add_f32_e32 [[A17:v[0-9]+]], [[A16]], [[A16]]
+; GCN: v_mul_f32_e32 [[A18:v[0-9]+]], [[A17]], [[A17]]
+; GCN: v_mad_f32 [[A20:v[0-9]+]], -[[A18]], [[A17]], 1.0
+; GCN: buffer_store_dword [[A20]]
+define void @multiple_fadd_use_test(float addrspace(1)* %out, float %x, float %y, float %z) #0 {
+ %a11 = fadd fast float %y, -1.0
+ %a12 = call float @llvm.fabs.f32(float %a11)
+ %a13 = fadd fast float %x, -1.0
+ %a14 = call float @llvm.fabs.f32(float %a13)
+ %a15 = fcmp ogt float %a12, %a14
+ %a16 = select i1 %a15, float %a12, float %a14
+ %a17 = fmul fast float %a16, 2.0
+ %a18 = fmul fast float %a17, %a17
+ %a19 = fmul fast float %a18, %a17
+ %a20 = fsub fast float 1.0, %a19
+ store float %a20, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}multiple_use_fadd_fmac
+; GCN-DAG: v_add_f32_e64 [[MUL2:v[0-9]+]], [[X:s[0-9]+]], s{{[0-9]+}}
+; GCN-DAG: v_mac_f32_e64 [[MAD:v[0-9]+]], 2.0, [[X]]
+; GCN-DAG: buffer_store_dword [[MUL2]]
+; GCN-DAG: buffer_store_dword [[MAD]]
+; GCN: s_endpgm
+define void @multiple_use_fadd_fmac(float addrspace(1)* %out, float %x, float %y) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %mul2 = fmul fast float %x, 2.0
+ %mad = fadd fast float %mul2, %y
+ store float %mul2, float addrspace(1)* %out
+ store float %mad, float addrspace(1)* %out.gep.1
+ ret void
+}
+
+; GCN-LABEL: {{^}}multiple_use_fadd_fmad:
+; GCN-DAG: v_add_f32_e64 [[MUL2:v[0-9]+]], |[[X:s[0-9]+]]|, |s{{[0-9]+}}|
+; GCN-DAG: v_mad_f32 [[MAD:v[0-9]+]], 2.0, |[[X]]|, v{{[0-9]+}}
+; GCN-DAG: buffer_store_dword [[MUL2]]
+; GCN-DAG: buffer_store_dword [[MAD]]
+; GCN: s_endpgm
+define void @multiple_use_fadd_fmad(float addrspace(1)* %out, float %x, float %y) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %x.abs = call float @llvm.fabs.f32(float %x)
+ %mul2 = fmul fast float %x.abs, 2.0
+ %mad = fadd fast float %mul2, %y
+ store float %mul2, float addrspace(1)* %out
+ store float %mad, float addrspace(1)* %out.gep.1
+ ret void
+}
+
+; GCN-LABEL: {{^}}multiple_use_fadd_multi_fmad:
+; GCN: v_mad_f32 {{v[0-9]+}}, 2.0, |[[X:s[0-9]+]]|, v{{[0-9]+}}
+; GCN: v_mad_f32 {{v[0-9]+}}, 2.0, |[[X]]|, v{{[0-9]+}}
+define void @multiple_use_fadd_multi_fmad(float addrspace(1)* %out, float %x, float %y, float %z) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %x.abs = call float @llvm.fabs.f32(float %x)
+ %mul2 = fmul fast float %x.abs, 2.0
+ %mad0 = fadd fast float %mul2, %y
+ %mad1 = fadd fast float %mul2, %z
+ store float %mad0, float addrspace(1)* %out
+ store float %mad1, float addrspace(1)* %out.gep.1
+ ret void
+}
+
+; GCN-LABEL: {{^}}fmul_x2_xn2:
+; GCN: v_mul_f32_e64 [[TMP0:v[0-9]+]], -4.0, [[X:s[0-9]+]]
+; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @fmul_x2_xn2(float addrspace(1)* %out, float %x, float %y) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %mul2 = fmul fast float %x, 2.0
+ %muln2 = fmul fast float %x, -2.0
+ %mul = fmul fast float %mul2, %muln2
+ store float %mul, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}fmul_x2_xn3:
+; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xc0c00000
+; GCN: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], [[K]]
+; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @fmul_x2_xn3(float addrspace(1)* %out, float %x, float %y) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %mul2 = fmul fast float %x, 2.0
+ %muln2 = fmul fast float %x, -3.0
+ %mul = fmul fast float %mul2, %muln2
+ store float %mul, float addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind "unsafe-fp-math"="true" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/fneg-fabs.ll b/test/CodeGen/AMDGPU/fneg-fabs.ll
index 3b4930d9897d..b99d2712ed75 100644
--- a/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -34,8 +34,7 @@ define void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
; R600: |PV.{{[XYZW]}}|
; R600: -PV
-; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) {
%bc = bitcast i32 %in to float
%fabs = call float @llvm.fabs.f32(float %bc)
@@ -49,8 +48,7 @@ define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) {
; R600: |PV.{{[XYZW]}}|
; R600: -PV
-; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) {
%bc = bitcast i32 %in to float
%fabs = call float @fabs(float %bc)
@@ -60,8 +58,7 @@ define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) {
}
; FUNC-LABEL: {{^}}fneg_fabs_f32:
-; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
%fabs = call float @llvm.fabs.f32(float %in)
%fsub = fsub float -0.000000e+00, %fabs
@@ -85,11 +82,8 @@ define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in)
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: -PV
-; FIXME: SGPR should be used directly for first src operand.
-; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI-NOT: 0x80000000
-; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
-; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
%fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
@@ -97,14 +91,11 @@ define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
ret void
}
-; FIXME: SGPR should be used directly for first src operand.
; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
-; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI-NOT: 0x80000000
-; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
-; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
-; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
-; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
define void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
%fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
diff --git a/test/CodeGen/AMDGPU/ftrunc.f64.ll b/test/CodeGen/AMDGPU/ftrunc.f64.ll
index 6618d8b5e57e..83a8ad8901d2 100644
--- a/test/CodeGen/AMDGPU/ftrunc.f64.ll
+++ b/test/CodeGen/AMDGPU/ftrunc.f64.ll
@@ -29,12 +29,12 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
; SI: s_lshr_b64
; SI: s_not_b64
; SI: s_and_b64
-; SI: cmp_gt_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
-; SI: cmp_lt_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
+; SI-DAG: cmp_gt_i32
+; SI-DAG: cndmask_b32
+; SI-DAG: cndmask_b32
+; SI-DAG: cmp_lt_i32
+; SI-DAG: cndmask_b32
+; SI-DAG: cndmask_b32
; SI: s_endpgm
define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
%y = call double @llvm.trunc.f64(double %x) nounwind readnone
diff --git a/test/CodeGen/AMDGPU/gep-address-space.ll b/test/CodeGen/AMDGPU/gep-address-space.ll
index 471b0f6b13e7..f5ab390ce686 100644
--- a/test/CodeGen/AMDGPU/gep-address-space.ll
+++ b/test/CodeGen/AMDGPU/gep-address-space.ll
@@ -11,24 +11,35 @@ define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
ret void
}
-define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind {
; CHECK-LABEL: {{^}}use_gep_address_space_large_offset:
; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
; SI, which is why it is being OR'd with the base pointer.
; SI: s_or_b32
; CI: s_add_i32
; CHECK: ds_write_b32
+define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind {
%p = getelementptr [1024 x i32], [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
store i32 99, i32 addrspace(3)* %p
ret void
}
-define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
; CHECK-LABEL: {{^}}gep_as_vector_v4:
-; CHECK: s_add_i32
-; CHECK: s_add_i32
-; CHECK: s_add_i32
-; CHECK: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+
+; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+
+; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
+; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
+; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
+; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
+; CHECK: s_endpgm
+define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
%p = getelementptr [1024 x i32], <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
%p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0
%p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1
@@ -41,10 +52,15 @@ define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind
ret void
}
-define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind {
; CHECK-LABEL: {{^}}gep_as_vector_v2:
-; CHECK: s_add_i32
-; CHECK: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
+; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
+; CHECK: s_endpgm
+define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind {
%p = getelementptr [1024 x i32], <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
%p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0
%p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1
diff --git a/test/CodeGen/AMDGPU/global-constant.ll b/test/CodeGen/AMDGPU/global-constant.ll
new file mode 100644
index 000000000000..bc5f031cd4a2
--- /dev/null
+++ b/test/CodeGen/AMDGPU/global-constant.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOHSA %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA %s
+
+@readonly = private unnamed_addr addrspace(2) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0]
+@readonly2 = private unnamed_addr addrspace(2) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0]
+
+; GCN-LABEL: {{^}}main:
+; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], readonly
+; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], 0
+; GCN: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], readonly
+; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], 0
+; NOHSA: .text
+; HSA: .hsatext
+; GCN: readonly:
+; GCN: readonly2:
+define void @main(i32 %index, float addrspace(1)* %out) {
+ %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @readonly, i32 0, i32 %index
+ %val = load float, float addrspace(2)* %ptr
+ store float %val, float addrspace(1)* %out
+ %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @readonly2, i32 0, i32 %index
+ %val2 = load float, float addrspace(2)* %ptr2
+ store float %val2, float addrspace(1)* %out
+ ret void
+}
+
diff --git a/test/CodeGen/AMDGPU/global-extload-i32.ll b/test/CodeGen/AMDGPU/global-extload-i32.ll
index 79b83452939e..e5e6be2199c3 100644
--- a/test/CodeGen/AMDGPU/global-extload-i32.ll
+++ b/test/CodeGen/AMDGPU/global-extload-i32.ll
@@ -49,8 +49,7 @@ define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i
; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
; SI: buffer_load_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx4
; SI: s_endpgm
define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i32>, <2 x i32> addrspace(1)* %in
@@ -63,8 +62,7 @@ define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
; SI: buffer_load_dwordx2
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
; SI: s_endpgm
define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <2 x i32>, <2 x i32> addrspace(1)* %in
@@ -75,10 +73,8 @@ define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
; SI: buffer_load_dwordx4
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
; SI: s_endpgm
define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i32>, <4 x i32> addrspace(1)* %in
@@ -93,10 +89,8 @@ define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
; SI: s_endpgm
define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <4 x i32>, <4 x i32> addrspace(1)* %in
@@ -106,22 +100,12 @@ define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
}
; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
; SI: s_endpgm
define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i32>, <8 x i32> addrspace(1)* %in
@@ -131,14 +115,8 @@ define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
}
; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
@@ -148,15 +126,10 @@ define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
; SI: s_endpgm
define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <8 x i32>, <8 x i32> addrspace(1)* %in
@@ -166,50 +139,34 @@ define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
}
; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
; SI: s_endpgm
define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i32>, <16 x i32> addrspace(1)* %in
@@ -219,40 +176,19 @@ define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
}
; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
; SI: s_endpgm
define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
%load = load <16 x i32>, <16 x i32> addrspace(1)* %in
@@ -262,41 +198,15 @@ define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
}
; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
@@ -331,41 +241,25 @@ define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; SI-DAG: v_ashrrev_i32
; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
; SI: s_endpgm
define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
@@ -376,77 +270,34 @@ define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32
}
; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
; SI: s_endpgm
define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
diff --git a/test/CodeGen/AMDGPU/global_atomics.ll b/test/CodeGen/AMDGPU/global_atomics.ll
index 146f0a5fbf26..6786e4a2f375 100644
--- a/test/CodeGen/AMDGPU/global_atomics.ll
+++ b/test/CodeGen/AMDGPU/global_atomics.ll
@@ -12,7 +12,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset:
-; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
@@ -105,7 +105,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset:
-; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
@@ -197,7 +197,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset:
-; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
@@ -289,7 +289,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset:
-; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
@@ -381,7 +381,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset:
-; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
@@ -473,7 +473,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset:
-; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
@@ -565,7 +565,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset:
-; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
@@ -657,7 +657,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset:
-; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
@@ -749,7 +749,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset:
-; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
@@ -838,7 +838,7 @@ entry:
}
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset:
-; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
diff --git a/test/CodeGen/AMDGPU/half.ll b/test/CodeGen/AMDGPU/half.ll
index bf8f11860b50..a02cbf43c400 100644
--- a/test/CodeGen/AMDGPU/half.ll
+++ b/test/CodeGen/AMDGPU/half.ll
@@ -105,6 +105,26 @@ define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x hal
}
; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
%ext = fpext <8 x half> %arg to <8 x float>
store <8 x float> %ext, <8 x float> addrspace(1)* %out
@@ -112,12 +132,24 @@ define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x hal
}
; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
+; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
+; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
+; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
+; GCN: buffer_store_dwordx2 [[RESULT]]
define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
%ext = fpext half %arg to double
store double %ext, double addrspace(1)* %out
ret void
}
+
; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN: s_endpgm
define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
%ext = fpext <2 x half> %arg to <2 x double>
store <2 x double> %ext, <2 x double> addrspace(1)* %out
@@ -125,6 +157,16 @@ define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x ha
}
; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN: s_endpgm
define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
%ext = fpext <3 x half> %arg to <3 x double>
store <3 x double> %ext, <3 x double> addrspace(1)* %out
@@ -132,6 +174,19 @@ define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x ha
}
; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN: s_endpgm
define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
%ext = fpext <4 x half> %arg to <4 x double>
store <4 x double> %ext, <4 x double> addrspace(1)* %out
@@ -139,6 +194,37 @@ define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x ha
}
; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+
+; GCN: s_endpgm
define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
%ext = fpext <8 x half> %arg to <8 x double>
store <8 x double> %ext, <8 x double> addrspace(1)* %out
@@ -194,6 +280,12 @@ define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(
}
; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
+; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
+; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
+; GCN: s_endpgm
define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
%val = load <2 x half>, <2 x half> addrspace(1)* %in
%cvt = fpext <2 x half> %val to <2 x float>
@@ -226,6 +318,46 @@ define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x
}
; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+
+; GCN: s_endpgm
define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
%val = load <16 x half>, <16 x half> addrspace(1)* %in
%cvt = fpext <16 x half> %val to <16 x float>
@@ -246,6 +378,14 @@ define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace
}
; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
+; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
+; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]]
+; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]]
+; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}}
+; GCN: s_endpgm
define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
%val = load <2 x half>, <2 x half> addrspace(1)* %in
%cvt = fpext <2 x half> %val to <2 x double>
@@ -254,6 +394,25 @@ define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x
}
; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
+
+; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
+; SI: v_lshr_b64 v{{\[[0-9]+:[0-9]+\]}}, [[LOAD]], 32
+; VI: v_lshrrev_b64 v{{\[[0-9]+:[0-9]+\]}}, 32, [[LOAD]]
+; GCN: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
+
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN-NOT: v_cvt_f32_f16_e32
+
+; GCN: v_cvt_f64_f32_e32
+; GCN: v_cvt_f64_f32_e32
+; GCN: v_cvt_f64_f32_e32
+; GCN-NOT: v_cvt_f64_f32_e32
+
+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
+; GCN: s_endpgm
define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
%val = load <3 x half>, <3 x half> addrspace(1)* %in
%cvt = fpext <3 x half> %val to <3 x double>
@@ -310,13 +469,12 @@ define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2
ret void
}
-; FIXME: Shouldn't do 4th conversion
; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
; GCN: buffer_load_dwordx4
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
+; GCN-NOT: v_cvt_f16_f32_e32
; GCN: buffer_store_short
; GCN: buffer_store_dword
; GCN: s_endpgm
@@ -346,14 +504,8 @@ define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4
}
; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
@@ -379,54 +531,42 @@ define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8
}
; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
; GCN: s_endpgm
define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
%val = load <16 x float>, <16 x float> addrspace(1)* %in
diff --git a/test/CodeGen/AMDGPU/hsa-globals.ll b/test/CodeGen/AMDGPU/hsa-globals.ll
new file mode 100644
index 000000000000..1d76c40c042e
--- /dev/null
+++ b/test/CodeGen/AMDGPU/hsa-globals.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=ASM %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -symbols -s | FileCheck %s --check-prefix=ELF
+
+@internal_global_program = internal addrspace(1) global i32 0
+@common_global_program = common addrspace(1) global i32 0
+@external_global_program = addrspace(1) global i32 0
+
+@internal_global_agent = internal addrspace(1) global i32 0, section ".hsadata_global_agent"
+@common_global_agent = common addrspace(1) global i32 0, section ".hsadata_global_agent"
+@external_global_agent = addrspace(1) global i32 0, section ".hsadata_global_agent"
+
+@internal_readonly = internal unnamed_addr addrspace(2) constant i32 0
+@external_readonly = unnamed_addr addrspace(2) constant i32 0
+
+define void @test() {
+ ret void
+}
+
+; ASM: .amdgpu_hsa_module_global internal_global
+; ASM: .hsadata_global_program
+; ASM: internal_global_program:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_module_global common_global
+; ASM: .hsadata_global_program
+; ASM: common_global_program:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_program_global external_global
+; ASM: .hsadata_global_program
+; ASM: external_global_program:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_module_global internal_global
+; ASM: .hsadata_global_agent
+; ASM: internal_global_agent:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_module_global common_global
+; ASM: .hsadata_global_agent
+; ASM: common_global_agent:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_program_global external_global
+; ASM: .hsadata_global_agent
+; ASM: external_global_agent:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_module_global internal_readonly
+; ASM: .hsatext
+; ASM: internal_readonly:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_program_global external_readonly
+; ASM: .hsatext
+; ASM: external_readonly:
+; ASM: .long 0
+
+; ELF: Section {
+; ELF: Name: .hsadata_global_program
+; ELF: Type: SHT_PROGBITS (0x1)
+; ELF: Flags [ (0x100003)
+; ELF: SHF_ALLOC (0x2)
+; ELF: SHF_AMDGPU_HSA_GLOBAL (0x100000)
+; ELF: SHF_WRITE (0x1)
+; ELF: ]
+; ELF: }
+
+; ELF: Section {
+; ELF: Name: .hsadata_global_agent
+; ELF: Type: SHT_PROGBITS (0x1)
+; ELF: Flags [ (0x900003)
+; ELF: SHF_ALLOC (0x2)
+; ELF: SHF_AMDGPU_HSA_AGENT (0x800000)
+; ELF: SHF_AMDGPU_HSA_GLOBAL (0x100000)
+; ELF: SHF_WRITE (0x1)
+; ELF: ]
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: common_global_agent
+; ELF: Binding: Local
+; ELF: Section: .hsadata_global_agent
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: common_global_program
+; ELF: Binding: Local
+; ELF: Section: .hsadata_global_program
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: internal_global_agent
+; ELF: Binding: Local
+; ELF: Type: Object
+; ELF: Section: .hsadata_global_agent
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: internal_global_program
+; ELF: Binding: Local
+; ELF: Type: Object
+; ELF: Section: .hsadata_global_program
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: internal_readonly
+; ELF: Binding: Local
+; ELF: Type: Object
+; ELF: Section: .hsatext
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: external_global_agent
+; ELF: Binding: Global
+; ELF: Type: Object
+; ELF: Section: .hsadata_global_agent
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: external_global_program
+; ELF: Binding: Global
+; ELF: Type: Object
+; ELF: Section: .hsadata_global_program
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: external_readonly
+; ELF: Binding: Global
+; ELF: Type: Object
+; ELF: Section: .hsatext
+; ELF: }
diff --git a/test/CodeGen/AMDGPU/hsa-group-segment.ll b/test/CodeGen/AMDGPU/hsa-group-segment.ll
new file mode 100644
index 000000000000..1999dc38a6b0
--- /dev/null
+++ b/test/CodeGen/AMDGPU/hsa-group-segment.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
+
+@internal_group = internal addrspace(3) global i32 undef
+@external_group = addrspace(3) global i32 undef
+
+define void @test() {
+entry:
+ store i32 0, i32 addrspace(3)* @internal_group
+ store i32 0, i32 addrspace(3)* @external_group
+ ret void
+}
+
+; HSA-NOT: internal_group:
+; HSA-NOT: external_group:
diff --git a/test/CodeGen/AMDGPU/hsa.ll b/test/CodeGen/AMDGPU/hsa.ll
index 653a6bb1b609..abc89b7fd837 100644
--- a/test/CodeGen/AMDGPU/hsa.ll
+++ b/test/CodeGen/AMDGPU/hsa.ll
@@ -1,11 +1,24 @@
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA-CI --check-prefix=HSA %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA-VI --check-prefix=HSA %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -s -sd | FileCheck --check-prefix=ELF %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -s -sd | FileCheck %s --check-prefix=ELF
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck --check-prefix=HSA-CI %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -symbols -s -sd | FileCheck --check-prefix=ELF %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF
; The SHT_NOTE section contains the output from the .hsa_code_object_*
; directives.
+; ELF: Section {
+; ELF: Name: .hsatext
+; ELF: Type: SHT_PROGBITS (0x1)
+; ELF: Flags [ (0xC00007)
+; ELF: SHF_ALLOC (0x2)
+; ELF: SHF_AMDGPU_HSA_AGENT (0x800000)
+; ELF: SHF_AMDGPU_HSA_CODE (0x400000)
+; ELF: SHF_EXECINSTR (0x4)
+; ELF: SHF_WRITE (0x1)
+; ELF: }
+
; ELF: SHT_NOTE
; ELF: 0000: 04000000 08000000 01000000 414D4400
; ELF: 0010: 01000000 00000000 04000000 1B000000
@@ -13,20 +26,31 @@
; ELF: 0030: 00000000 00000000 414D4400 414D4447
; ELF: 0040: 50550000
+; ELF: Symbol {
+; ELF: Name: simple
+; ELF: Type: AMDGPU_HSA_KERNEL (0xA)
+; ELF: }
+
; HSA: .hsa_code_object_version 1,0
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
+; HSA: .hsatext
+
+; HSA: .amdgpu_hsa_kernel simple
; HSA: {{^}}simple:
; HSA: .amd_kernel_code_t
+; HSA: enable_sgpr_private_segment_buffer = 1
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: .end_amd_kernel_code_t
-; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[0:1], 0x0
+; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
; Make sure we are setting the ATC bit:
; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000
; On VI+ we also need to set MTYPE = 2
; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000
-; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
+; Make sure we generate flat store for HSA
+; HSA: flat_store_dword v{{[0-9]+}}
define void @simple(i32 addrspace(1)* %out) {
entry:
diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll
new file mode 100644
index 000000000000..5906b2f15709
--- /dev/null
+++ b/test/CodeGen/AMDGPU/image-attributes.ll
@@ -0,0 +1,206 @@
+; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; === WIDTH ==================================================================
+; 9 implicit args = 9 dwords to first image argument.
+; First width at dword index 9+1 -> KC0[2].Z
+
+; FUNC-LABEL: {{^}}width_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].Z
+define void @width_2d (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call [3 x i32] @llvm.OpenCL.image.get.size.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ %1 = extractvalue [3 x i32] %0, 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}width_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].Z
+define void @width_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call [3 x i32] @llvm.OpenCL.image.get.size.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ %1 = extractvalue [3 x i32] %0, 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === HEIGHT =================================================================
+; First height at dword index 9+2 -> KC0[2].W
+
+; FUNC-LABEL: {{^}}height_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].W
+define void @height_2d (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call [3 x i32] @llvm.OpenCL.image.get.size.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ %1 = extractvalue [3 x i32] %0, 1
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}height_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].W
+define void @height_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call [3 x i32] @llvm.OpenCL.image.get.size.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ %1 = extractvalue [3 x i32] %0, 1
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === DEPTH ==================================================================
+; First depth at dword index 9+3 -> KC0[3].X
+
+; FUNC-LABEL: {{^}}depth_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[3].X
+define void @depth_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call [3 x i32] @llvm.OpenCL.image.get.size.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ %1 = extractvalue [3 x i32] %0, 2
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === CHANNEL DATA TYPE ======================================================
+; First channel data type at dword index 9+4 -> KC0[3].Y
+
+; FUNC-LABEL: {{^}}data_type_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[3].Y
+define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call [2 x i32] @llvm.OpenCL.image.get.format.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ %1 = extractvalue [2 x i32] %0, 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}data_type_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[3].Y
+define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call [2 x i32] @llvm.OpenCL.image.get.format.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ %1 = extractvalue [2 x i32] %0, 0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === CHANNEL ORDER ==========================================================
+; First channel order at dword index 9+5 -> KC0[3].Z
+
+; FUNC-LABEL: {{^}}channel_order_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[3].Z
+define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call [2 x i32] @llvm.OpenCL.image.get.format.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ %1 = extractvalue [2 x i32] %0, 1
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}channel_order_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[3].Z
+define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call [2 x i32] @llvm.OpenCL.image.get.format.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ %1 = extractvalue [2 x i32] %0, 1
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; === 2ND IMAGE ==============================================================
+; 9 implicit args + 2 explicit args + 5 implicit args for 1st image argument
+; = 16 dwords to 2nd image argument.
+; Height of the second image is at 16+2 -> KC0[4].Z
+;
+; FUNC-LABEL: {{^}}image_arg_2nd:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[4].Z
+define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1,
+ i32 %x,
+ %opencl.image2d_t addrspace(1)* %in2,
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call [3 x i32] @llvm.OpenCL.image.get.size.2d(
+ %opencl.image2d_t addrspace(1)* %in2) #0
+ %1 = extractvalue [3 x i32] %0, 1
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+
+declare [3 x i32] @llvm.OpenCL.image.get.size.2d(%opencl.image2d_t addrspace(1)*) #0
+declare [3 x i32] @llvm.OpenCL.image.get.size.3d(%opencl.image3d_t addrspace(1)*) #0
+declare [2 x i32] @llvm.OpenCL.image.get.format.2d(%opencl.image2d_t addrspace(1)*) #0
+declare [2 x i32] @llvm.OpenCL.image.get.format.3d(%opencl.image3d_t addrspace(1)*) #0
+
+attributes #0 = { readnone }
+
+!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
+!0 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @width_2d,
+ !10, !20, !30, !40, !50}
+!1 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @width_3d,
+ !10, !21, !31, !41, !50}
+!2 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @height_2d,
+ !10, !20, !30, !40, !50}
+!3 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @height_3d,
+ !10, !21, !31, !41, !50}
+!4 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @depth_3d,
+ !10, !21, !31, !41, !50}
+!5 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @data_type_2d,
+ !10, !20, !30, !40, !50}
+!6 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @data_type_3d,
+ !10, !21, !31, !41, !50}
+!7 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @channel_order_2d,
+ !10, !20, !30, !40, !50}
+!8 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @channel_order_3d,
+ !10, !21, !31, !41, !50}
+!9 = !{void (%opencl.image3d_t addrspace(1)*, i32, %opencl.image2d_t addrspace(1)*,
+ i32 addrspace(1)*)* @image_arg_2nd, !12, !22, !32, !42, !52}
+
+!10 = !{!"kernel_arg_addr_space", i32 1, i32 1}
+!20 = !{!"kernel_arg_access_qual", !"read_only", !"none"}
+!21 = !{!"kernel_arg_access_qual", !"read_only", !"none"}
+!30 = !{!"kernel_arg_type", !"image2d_t", !"int*"}
+!31 = !{!"kernel_arg_type", !"image3d_t", !"int*"}
+!40 = !{!"kernel_arg_base_type", !"image2d_t", !"int*"}
+!41 = !{!"kernel_arg_base_type", !"image3d_t", !"int*"}
+!50 = !{!"kernel_arg_type_qual", !"", !""}
+
+!12 = !{!"kernel_arg_addr_space", i32 1, i32 0, i32 1, i32 1}
+!22 = !{!"kernel_arg_access_qual", !"read_only", !"none", !"write_only", !"none"}
+!32 = !{!"kernel_arg_type", !"image3d_t", !"sampler_t", !"image2d_t", !"int*"}
+!42 = !{!"kernel_arg_base_type", !"image3d_t", !"sampler_t", !"image2d_t", !"int*"}
+!52 = !{!"kernel_arg_type_qual", !"", !"", !"", !""}
diff --git a/test/CodeGen/AMDGPU/image-resource-id.ll b/test/CodeGen/AMDGPU/image-resource-id.ll
new file mode 100644
index 000000000000..d4cf34944240
--- /dev/null
+++ b/test/CodeGen/AMDGPU/image-resource-id.ll
@@ -0,0 +1,409 @@
+; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; === 1 image arg, read_only ===================================================
+
+; FUNC-LABEL: {{^}}test_2d_rd_1_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_2d_rd_1_0(%opencl.image2d_t addrspace(1)* %in, ; read_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_rd_1_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_3d_rd_1_0(%opencl.image3d_t addrspace(1)* %in, ; read_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; === 1 image arg, write_only ==================================================
+
+; FUNC-LABEL: {{^}}test_2d_wr_1_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_2d_wr_1_0(%opencl.image2d_t addrspace(1)* %in, ; write_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_wr_1_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_3d_wr_1_0(%opencl.image3d_t addrspace(1)* %in, ; write_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)* %in) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; === 2 image args, read_only ==================================================
+
+; FUNC-LABEL: {{^}}test_2d_rd_2_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_2d_rd_2_0(%opencl.image2d_t addrspace(1)* %in1, ; read_only
+ %opencl.image2d_t addrspace(1)* %in2, ; read_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %in1) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_2d_rd_2_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_2d_rd_2_1(%opencl.image2d_t addrspace(1)* %in1, ; read_only
+ %opencl.image2d_t addrspace(1)* %in2, ; read_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %in2) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_rd_2_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_3d_rd_2_0(%opencl.image3d_t addrspace(1)* %in1, ; read_only
+ %opencl.image3d_t addrspace(1)* %in2, ; read_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)* %in1) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_rd_2_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_3d_rd_2_1(%opencl.image3d_t addrspace(1)* %in1, ; read_only
+ %opencl.image3d_t addrspace(1)* %in2, ; read_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)* %in2) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; === 2 image args, write_only =================================================
+
+; FUNC-LABEL: {{^}}test_2d_wr_2_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_2d_wr_2_0(%opencl.image2d_t addrspace(1)* %in1, ; write_only
+ %opencl.image2d_t addrspace(1)* %in2, ; write_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %in1) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_2d_wr_2_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_2d_wr_2_1(%opencl.image2d_t addrspace(1)* %in1, ; write_only
+ %opencl.image2d_t addrspace(1)* %in2, ; write_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %in2) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_wr_2_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_3d_wr_2_0(%opencl.image3d_t addrspace(1)* %in1, ; write_only
+ %opencl.image3d_t addrspace(1)* %in2, ; write_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)* %in1) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_wr_2_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_3d_wr_2_1(%opencl.image3d_t addrspace(1)* %in1, ; write_only
+ %opencl.image3d_t addrspace(1)* %in2, ; write_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)* %in2) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; === 3 image args, read_only ==================================================
+
+; FUNC-LABEL: {{^}}test_2d_rd_3_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 2(
+define void @test_2d_rd_3_0(%opencl.image2d_t addrspace(1)* %in1, ; read_only
+ %opencl.image3d_t addrspace(1)* %in2, ; read_only
+ %opencl.image2d_t addrspace(1)* %in3, ; read_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %in3) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; FUNC-LABEL: {{^}}test_3d_rd_3_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 2(
+define void @test_3d_rd_3_0(%opencl.image3d_t addrspace(1)* %in1, ; read_only
+ %opencl.image2d_t addrspace(1)* %in2, ; read_only
+ %opencl.image3d_t addrspace(1)* %in3, ; read_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)* %in3) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; === 3 image args, write_only =================================================
+
+; FUNC-LABEL: {{^}}test_2d_wr_3_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 2(
+define void @test_2d_wr_3_0(%opencl.image2d_t addrspace(1)* %in1, ; write_only
+ %opencl.image3d_t addrspace(1)* %in2, ; write_only
+ %opencl.image2d_t addrspace(1)* %in3, ; write_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %in3) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; FUNC-LABEL: {{^}}test_3d_wr_3_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 2(
+define void @test_3d_wr_3_0(%opencl.image3d_t addrspace(1)* %in1, ; write_only
+ %opencl.image2d_t addrspace(1)* %in2, ; write_only
+ %opencl.image3d_t addrspace(1)* %in3, ; write_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)* %in3) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; === 3 image args, mixed ======================================================
+
+; FUNC-LABEL: {{^}}test_2d_mix_3_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_2d_mix_3_0(%opencl.image2d_t addrspace(1)* %in1, ; write_only
+ %opencl.image3d_t addrspace(1)* %in2, ; read_only
+ %opencl.image2d_t addrspace(1)* %in3, ; read_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %in3) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_mix_3_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_3d_mix_3_0(%opencl.image3d_t addrspace(1)* %in1, ; write_only
+ %opencl.image2d_t addrspace(1)* %in2, ; read_only
+ %opencl.image3d_t addrspace(1)* %in3, ; read_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)* %in3) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_2d_mix_3_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_2d_mix_3_1(%opencl.image2d_t addrspace(1)* %in1, ; write_only
+ %opencl.image3d_t addrspace(1)* %in2, ; read_only
+ %opencl.image2d_t addrspace(1)* %in3, ; write_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+ %opencl.image2d_t addrspace(1)* %in3) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_mix_3_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_3d_mix_3_1(%opencl.image3d_t addrspace(1)* %in1, ; write_only
+ %opencl.image2d_t addrspace(1)* %in2, ; read_only
+ %opencl.image3d_t addrspace(1)* %in3, ; write_only
+ i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+ %opencl.image3d_t addrspace(1)* %in3) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+
+declare i32 @llvm.OpenCL.image.get.resource.id.2d(%opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.OpenCL.image.get.resource.id.3d(%opencl.image3d_t addrspace(1)*) #0
+
+attributes #0 = { readnone }
+
+!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13,
+ !14, !15, !16, !17, !18, !19}
+!0 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_rd_1_0,
+ !110, !120, !130, !140, !150}
+!1 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_rd_1_0,
+ !110, !120, !131, !141, !150}
+!2 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_wr_1_0,
+ !110, !121, !130, !140, !150}
+!3 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_wr_1_0,
+ !110, !121, !131, !141, !150}
+!110 = !{!"kernel_arg_addr_space", i32 1, i32 1}
+!120 = !{!"kernel_arg_access_qual", !"read_only", !"none"}
+!121 = !{!"kernel_arg_access_qual", !"write_only", !"none"}
+!130 = !{!"kernel_arg_type", !"image2d_t", !"int*"}
+!131 = !{!"kernel_arg_type", !"image3d_t", !"int*"}
+!140 = !{!"kernel_arg_base_type", !"image2d_t", !"int*"}
+!141 = !{!"kernel_arg_base_type", !"image3d_t", !"int*"}
+!150 = !{!"kernel_arg_type_qual", !"", !""}
+
+!4 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+ i32 addrspace(1)*)* @test_2d_rd_2_0, !112, !122, !132, !142, !152}
+!5 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+ i32 addrspace(1)*)* @test_2d_rd_2_1, !112, !122, !132, !142, !152}
+!6 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+ i32 addrspace(1)*)* @test_3d_rd_2_0, !112, !122, !133, !143, !152}
+!7 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+ i32 addrspace(1)*)* @test_3d_rd_2_1, !112, !122, !133, !143, !152}
+!8 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+ i32 addrspace(1)*)* @test_2d_wr_2_0, !112, !123, !132, !142, !152}
+!9 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+ i32 addrspace(1)*)* @test_2d_wr_2_1, !112, !123, !132, !142, !152}
+!10 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+ i32 addrspace(1)*)* @test_3d_wr_2_0, !112, !123, !133, !143, !152}
+!11 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+ i32 addrspace(1)*)* @test_3d_wr_2_1, !112, !123, !133, !143, !152}
+!112 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1}
+!122 = !{!"kernel_arg_access_qual", !"read_only", !"read_only", !"none"}
+!123 = !{!"kernel_arg_access_qual", !"write_only", !"write_only", !"none"}
+!132 = !{!"kernel_arg_type", !"image2d_t", !"image2d_t", !"int*"}
+!133 = !{!"kernel_arg_type", !"image3d_t", !"image3d_t", !"int*"}
+!142 = !{!"kernel_arg_base_type", !"image2d_t", !"image2d_t", !"int*"}
+!143 = !{!"kernel_arg_base_type", !"image3d_t", !"image3d_t", !"int*"}
+!152 = !{!"kernel_arg_type_qual", !"", !"", !""}
+
+!12 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+ %opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_rd_3_0,
+ !114, !124, !134, !144, !154}
+!13 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+ %opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_rd_3_0,
+ !114, !124, !135, !145, !154}
+!14 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+ %opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_wr_3_0,
+ !114, !125, !134, !144, !154}
+!15 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+ %opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_wr_3_0,
+ !114, !125, !135, !145, !154}
+!16 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+ %opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_mix_3_0,
+ !114, !126, !134, !144, !154}
+!17 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+ %opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_mix_3_0,
+ !114, !126, !135, !145, !154}
+!18 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+ %opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_mix_3_1,
+ !114, !127, !134, !144, !154}
+!19 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+ %opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_mix_3_1,
+ !114, !127, !135, !145, !154}
+!114 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1}
+!124 = !{!"kernel_arg_access_qual", !"read_only", !"read_only", !"read_only", !"none"}
+!125 = !{!"kernel_arg_access_qual", !"write_only", !"write_only", !"write_only", !"none"}
+!126 = !{!"kernel_arg_access_qual", !"write_only", !"read_only", !"read_only", !"none"}
+!127 = !{!"kernel_arg_access_qual", !"write_only", !"read_only", !"write_only", !"none"}
+!134 = !{!"kernel_arg_type", !"image2d_t", !"image3d_t", !"image2d_t", !"int*"}
+!135 = !{!"kernel_arg_type", !"image3d_t", !"image2d_t", !"image3d_t", !"int*"}
+!144 = !{!"kernel_arg_base_type", !"image2d_t", !"image3d_t", !"image2d_t", !"int*"}
+!145 = !{!"kernel_arg_base_type", !"image3d_t", !"image2d_t", !"image3d_t", !"int*"}
+!154 = !{!"kernel_arg_type_qual", !"", !"", !"", !""}
diff --git a/test/CodeGen/AMDGPU/imm.ll b/test/CodeGen/AMDGPU/imm.ll
index 12eed550eb1f..8db9ea4ccf31 100644
--- a/test/CodeGen/AMDGPU/imm.ll
+++ b/test/CodeGen/AMDGPU/imm.ll
@@ -3,8 +3,7 @@
; Use a 64-bit value with lo bits that can be represented as an inline constant
; CHECK-LABEL: {{^}}i64_imm_inline_lo:
-; CHECK: s_mov_b32 [[LO:s[0-9]+]], 5
-; CHECK: v_mov_b32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
+; CHECK: v_mov_b32_e32 v[[LO_VGPR:[0-9]+]], 5
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VGPR]]:
define void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
entry:
@@ -14,8 +13,7 @@ entry:
; Use a 64-bit value with hi bits that can be represented as an inline constant
; CHECK-LABEL: {{^}}i64_imm_inline_hi:
-; CHECK: s_mov_b32 [[HI:s[0-9]+]], 5
-; CHECK: v_mov_b32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
+; CHECK: v_mov_b32_e32 v[[HI_VGPR:[0-9]+]], 5
; CHECK: buffer_store_dwordx2 v{{\[[0-9]+:}}[[HI_VGPR]]
define void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
entry:
@@ -24,10 +22,8 @@ entry:
}
; CHECK-LABEL: {{^}}store_imm_neg_0.0_i64:
-; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000
-; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}}
-; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
-; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x80000000
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
define void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) {
store i64 -9223372036854775808, i64 addrspace(1) *%out
@@ -523,10 +519,8 @@ define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) {
; CHECK-LABEL: {{^}}store_literal_imm_neg_0.0_f64:
-; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000
-; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}}
-; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
-; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x80000000
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
define void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) {
store double -0.0, double addrspace(1)* %out
@@ -606,10 +600,8 @@ define void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) {
}
; CHECK-LABEL: {{^}}store_literal_imm_f64:
-; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x40b00000
-; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}}
-; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
-; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40b00000
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
define void @store_literal_imm_f64(double addrspace(1)* %out) {
store double 4096.0, double addrspace(1)* %out
diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index f551606d63a7..e40cac22725c 100644
--- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -5,23 +5,52 @@
; indexing of vectors.
; CHECK-LABEL: {{^}}extract_w_offset:
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movrels_b32_e32
define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
entry:
- %0 = add i32 %in, 1
- %1 = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %0
- store float %1, float addrspace(1)* %out
+ %idx = add i32 %in, 1
+ %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %idx
+ store float %elt, float addrspace(1)* %out
+ ret void
+}
+
+; XXX: Could do v_or_b32 directly
+; CHECK-LABEL: {{^}}extract_w_offset_salu_use_vector:
+; CHECK-DAG: s_or_b32
+; CHECK-DAG: s_or_b32
+; CHECK-DAG: s_or_b32
+; CHECK-DAG: s_or_b32
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movrels_b32_e32
+define void @extract_w_offset_salu_use_vector(i32 addrspace(1)* %out, i32 %in, <4 x i32> %or.val) {
+entry:
+ %idx = add i32 %in, 1
+ %vec = or <4 x i32> %or.val, <i32 1, i32 2, i32 3, i32 4>
+ %elt = extractelement <4 x i32> %vec, i32 %idx
+ store i32 %elt, i32 addrspace(1)* %out
ret void
}
; CHECK-LABEL: {{^}}extract_wo_offset:
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
; CHECK: s_mov_b32 m0
; CHECK-NEXT: v_movrels_b32_e32
define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
entry:
- %0 = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
- store float %0, float addrspace(1)* %out
+ %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
+ store float %elt, float addrspace(1)* %out
ret void
}
@@ -37,6 +66,19 @@ entry:
ret void
}
+; CHECK-LABEL: {{^}}extract_neg_offset_sgpr_loaded:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
+; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
+define void @extract_neg_offset_sgpr_loaded(i32 addrspace(1)* %out, <4 x i32> %vec0, <4 x i32> %vec1, i32 %offset) {
+entry:
+ %index = add i32 %offset, -512
+ %or = or <4 x i32> %vec0, %vec1
+ %value = extractelement <4 x i32> %or, i32 %index
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
; CHECK-LABEL: {{^}}extract_neg_offset_vgpr:
; The offset depends on the register that holds the first element of the vector.
; CHECK: v_readfirstlane_b32
@@ -87,6 +129,21 @@ entry:
ret void
}
+; The vector indexed into is originally loaded into an SGPR rather
+; than built with a reg_sequence
+
+; CHECK-LABEL: {{^}}insert_neg_offset_sgpr_loadreg:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
+; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
+define void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %offset) {
+entry:
+ %index = add i32 %offset, -512
+ %value = insertelement <4 x i32> %vec, i32 5, i32 %index
+ store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
; CHECK-LABEL: {{^}}insert_neg_offset_vgpr:
; The offset depends on the register that holds the first element of the vector.
; CHECK: v_readfirstlane_b32
diff --git a/test/CodeGen/AMDGPU/indirect-private-64.ll b/test/CodeGen/AMDGPU/indirect-private-64.ll
index d63e1b6c5212..2a3b29f54fa9 100644
--- a/test/CodeGen/AMDGPU/indirect-private-64.ll
+++ b/test/CodeGen/AMDGPU/indirect-private-64.ll
@@ -4,7 +4,7 @@
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
-declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() convergent nounwind
; SI-LABEL: {{^}}private_access_f64_alloca:
@@ -18,7 +18,7 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
%array = alloca double, i32 16, align 8
%ptr = getelementptr double, double* %array, i32 %b
store double %val, double* %ptr, align 8
- call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+ call void @llvm.AMDGPU.barrier.local() convergent nounwind
%result = load double, double* %ptr, align 8
store double %result, double addrspace(1)* %out, align 8
ret void
@@ -29,20 +29,16 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
; SI-ALLOCA: buffer_store_dwordx4
; SI-ALLOCA: buffer_load_dwordx4
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_read_b32
-; SI-PROMOTE: ds_read_b32
-; SI-PROMOTE: ds_read_b32
-; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_read_b64
+; SI-PROMOTE: ds_read_b64
define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
%array = alloca <2 x double>, i32 16, align 16
%ptr = getelementptr <2 x double>, <2 x double>* %array, i32 %b
store <2 x double> %val, <2 x double>* %ptr, align 16
- call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+ call void @llvm.AMDGPU.barrier.local() convergent nounwind
%result = load <2 x double>, <2 x double>* %ptr, align 16
store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
ret void
@@ -60,7 +56,7 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
%array = alloca i64, i32 16, align 8
%ptr = getelementptr i64, i64* %array, i32 %b
store i64 %val, i64* %ptr, align 8
- call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+ call void @llvm.AMDGPU.barrier.local() convergent nounwind
%result = load i64, i64* %ptr, align 8
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -71,20 +67,16 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
; SI-ALLOCA: buffer_store_dwordx4
; SI-ALLOCA: buffer_load_dwordx4
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_read_b32
-; SI-PROMOTE: ds_read_b32
-; SI-PROMOTE: ds_read_b32
-; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_read_b64
+; SI-PROMOTE: ds_read_b64
define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
%array = alloca <2 x i64>, i32 16, align 16
%ptr = getelementptr <2 x i64>, <2 x i64>* %array, i32 %b
store <2 x i64> %val, <2 x i64>* %ptr, align 16
- call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+ call void @llvm.AMDGPU.barrier.local() convergent nounwind
%result = load <2 x i64>, <2 x i64>* %ptr, align 16
store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
ret void
diff --git a/test/CodeGen/AMDGPU/inline-constraints.ll b/test/CodeGen/AMDGPU/inline-constraints.ll
new file mode 100644
index 000000000000..78868710c6a2
--- /dev/null
+++ b/test/CodeGen/AMDGPU/inline-constraints.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}inline_reg_constraints:
+; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
+; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+
+define void @inline_reg_constraints(i32 addrspace(1)* %ptr) {
+entry:
+ %v32 = tail call i32 asm sideeffect "flat_load_dword $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+ %v64 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+ %v128 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+ %s32 = tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ %s64 = tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ %s128 = tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ %s256 = tail call <8 x i32> asm sideeffect "s_load_dwordx8 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.ll b/test/CodeGen/AMDGPU/insert_vector_elt.ll
index 6de3d408c486..7f9579e59782 100644
--- a/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -70,8 +70,9 @@ define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x fl
}
; SI-LABEL: {{^}}dynamic_insertelement_v8f32:
-; FIXMESI: buffer_store_dwordx4
-; FIXMESI: buffer_store_dwordx4
+; SI: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
%vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
@@ -79,10 +80,11 @@ define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x fl
}
; SI-LABEL: {{^}}dynamic_insertelement_v16f32:
-; FIXMESI: buffer_store_dwordx4
-; FIXMESI: buffer_store_dwordx4
-; FIXMESI: buffer_store_dwordx4
-; FIXMESI: buffer_store_dwordx4
+; SI: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
%vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
@@ -202,10 +204,28 @@ endif:
}
; SI-LABEL: {{^}}dynamic_insertelement_v2f64:
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: s_load_dword [[IDX:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0x11|0x44}}{{$}}
+; SI-DAG: s_lshl_b32 [[SCALEDIDX:s[0-9]+]], [[IDX]], 1{{$}}
+; SI-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 0{{$}}
+
+; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+
+; SI: s_mov_b32 m0, [[SCALEDIDX]]
+; SI: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT0]]
+
+; Increment to next element.
+; FIXME: Should be able to manipulate m0 directly instead of add and
+; copy.
+
+; SI: s_or_b32 [[IDX1:s[0-9]+]], [[SCALEDIDX]], 1
+; SI-DAG: v_mov_b32_e32 [[ELT1:v[0-9]+]], 0x40200000
+; SI-DAG: s_mov_b32 m0, [[IDX1]]
+; SI: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT1]]
+
+; SI: buffer_store_dwordx4
; SI: s_endpgm
define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind {
%vecins = insertelement <2 x double> %a, double 8.0, i32 %b
@@ -213,9 +233,16 @@ define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x d
ret void
}
+; FIXME: Inline immediate should be folded into v_movreld_b32.
; SI-LABEL: {{^}}dynamic_insertelement_v2i64:
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+
+; SI-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 5{{$}}
+; SI-DAG: v_mov_b32_e32 [[ELT1:v[0-9]+]], 0{{$}}
+
+; SI-DAG: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT0]]
+; SI-DAG: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT1]]
+
+; SI: buffer_store_dwordx4
; SI: s_endpgm
define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind {
%vecins = insertelement <2 x i64> %a, i64 5, i32 %b
@@ -223,12 +250,29 @@ define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64>
ret void
}
+; FIXME: Should be able to do without stack access. The used stack
+; space is also 2x what should be required.
+
; SI-LABEL: {{^}}dynamic_insertelement_v4f64:
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: SCRATCH_RSRC_DWORD
+
+; Stack store
+; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
+
+; Write element
+; SI: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+
+; Stack reload
+; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
+; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+
+; Store result
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
; SI: s_endpgm
+; SI: ScratchSize: 64
+
define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind {
%vecins = insertelement <4 x double> %a, double 8.0, i32 %b
store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16
@@ -236,15 +280,26 @@ define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x d
}
; SI-LABEL: {{^}}dynamic_insertelement_v8f64:
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: SCRATCH_RSRC_DWORD
+
+; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
+; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:32{{$}}
+; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:48{{$}}
+
+; SI: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+
+; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
+; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
+; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
; SI: s_endpgm
+; SI: ScratchSize: 128
define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind {
%vecins = insertelement <8 x double> %a, double 8.0, i32 %b
store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16
diff --git a/test/CodeGen/AMDGPU/kernel-args.ll b/test/CodeGen/AMDGPU/kernel-args.ll
index 1dd7c2cb7995..e9d98ac89e72 100644
--- a/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/test/CodeGen/AMDGPU/kernel-args.ll
@@ -4,8 +4,10 @@
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
; FUNC-LABEL: {{^}}i8_arg:
-; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; GCN: buffer_load_ubyte
+; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
+; GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
entry:
@@ -39,8 +41,10 @@ entry:
}
; FUNC-LABEL: {{^}}i16_arg:
-; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; GCN: buffer_load_ushort
+; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
+; GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
entry:
@@ -290,8 +294,8 @@ entry:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
-; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
-; VI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44
+; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
+; VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44
define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
entry:
store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
@@ -307,7 +311,7 @@ entry:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
-; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
+; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
entry:
store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
@@ -409,8 +413,8 @@ entry:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
-; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
-; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
+; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
+; VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
entry:
store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
@@ -434,8 +438,8 @@ entry:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
-; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
-; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
+; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
+; VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
entry:
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
diff --git a/test/CodeGen/AMDGPU/large-alloca-compute.ll b/test/CodeGen/AMDGPU/large-alloca-compute.ll
new file mode 100644
index 000000000000..8347b8c96ec4
--- /dev/null
+++ b/test/CodeGen/AMDGPU/large-alloca-compute.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s -mattr=-flat-for-global | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s
+
+; FIXME: align on alloca seems to be ignored for private_segment_alignment
+
+; ALL-LABEL: {{^}}large_alloca_compute_shader:
+
+; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GCN: s_mov_b32 s10, -1
+; CI: s_mov_b32 s11, 0x80f000
+; VI: s_mov_b32 s11, 0x800000
+
+
+; GCNHSA: .amd_kernel_code_t
+
+; GCNHSA: compute_pgm_rsrc2_scratch_en = 1
+; GCNHSA: compute_pgm_rsrc2_user_sgpr = 6
+; GCNHSA: compute_pgm_rsrc2_tgid_x_en = 1
+; GCNHSA: compute_pgm_rsrc2_tgid_y_en = 0
+; GCNHSA: compute_pgm_rsrc2_tgid_z_en = 0
+; GCNHSA: compute_pgm_rsrc2_tg_size_en = 0
+; GCNHSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
+
+; GCNHSA: enable_sgpr_private_segment_buffer = 1
+; GCNHSA: enable_sgpr_dispatch_ptr = 0
+; GCNHSA: enable_sgpr_queue_ptr = 0
+; GCNHSA: enable_sgpr_kernarg_segment_ptr = 1
+; GCNHSA: enable_sgpr_dispatch_id = 0
+; GCNHSA: enable_sgpr_flat_scratch_init = 0
+; GCNHSA: enable_sgpr_private_segment_size = 0
+; GCNHSA: enable_sgpr_grid_workgroup_count_x = 0
+; GCNHSA: enable_sgpr_grid_workgroup_count_y = 0
+; GCNHSA: enable_sgpr_grid_workgroup_count_z = 0
+; GCNHSA: workitem_private_segment_byte_size = 32772
+; GCNHSA: private_segment_alignment = 4
+; GCNHSA: .end_amd_kernel_code_t
+
+
+; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s7 offen
+; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s7 offen
+
+; Scratch size = alloca size + emergency stack slot
+; ALL: ; ScratchSize: 32772
+define void @large_alloca_compute_shader(i32 %x, i32 %y) #0 {
+ %large = alloca [8192 x i32], align 4
+ %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
+ store volatile i32 %x, i32* %gep
+ %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
+ %val = load volatile i32, i32* %gep1
+ store volatile i32 %val, i32 addrspace(1)* undef
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/large-alloca-graphics.ll b/test/CodeGen/AMDGPU/large-alloca-graphics.ll
new file mode 100644
index 000000000000..141ee2560152
--- /dev/null
+++ b/test/CodeGen/AMDGPU/large-alloca-graphics.ll
@@ -0,0 +1,47 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
+
+; ALL-LABEL: {{^}}large_alloca_pixel_shader:
+; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GCN: s_mov_b32 s10, -1
+; CI: s_mov_b32 s11, 0x80f000
+; VI: s_mov_b32 s11, 0x800000
+
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
+
+; ALL: ; ScratchSize: 32772
+define void @large_alloca_pixel_shader(i32 %x, i32 %y) #1 {
+ %large = alloca [8192 x i32], align 4
+ %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
+ store volatile i32 %x, i32* %gep
+ %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
+ %val = load volatile i32, i32* %gep1
+ store volatile i32 %val, i32 addrspace(1)* undef
+ ret void
+}
+
+; ALL-LABEL: {{^}}large_alloca_pixel_shader_inreg:
+; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GCN: s_mov_b32 s10, -1
+; CI: s_mov_b32 s11, 0x80f000
+; VI: s_mov_b32 s11, 0x800000
+
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
+
+; ALL: ; ScratchSize: 32772
+define void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #1 {
+ %large = alloca [8192 x i32], align 4
+ %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
+ store volatile i32 %x, i32* %gep
+ %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
+ %val = load volatile i32, i32* %gep1
+ store volatile i32 %val, i32 addrspace(1)* undef
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "ShaderType"="0" }
diff --git a/test/CodeGen/AMDGPU/large-alloca.ll b/test/CodeGen/AMDGPU/large-alloca.ll
deleted file mode 100644
index 671833d1a33a..000000000000
--- a/test/CodeGen/AMDGPU/large-alloca.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; XFAIL: *
-; REQUIRES: asserts
-; RUN: llc -march=amdgcn -mcpu=SI < %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s
-
-define void @large_alloca(i32 addrspace(1)* %out, i32 %x, i32 %y) nounwind {
- %large = alloca [8192 x i32], align 4
- %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
- store i32 %x, i32* %gep
- %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
- %0 = load i32, i32* %gep1
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
diff --git a/test/CodeGen/AMDGPU/literals.ll b/test/CodeGen/AMDGPU/literals.ll
index cff1c24f89d6..9d2320cb2d19 100644
--- a/test/CodeGen/AMDGPU/literals.ll
+++ b/test/CodeGen/AMDGPU/literals.ll
@@ -7,8 +7,8 @@
; ADD_INT literal.x KC0[2].Z, 5
; CHECK: {{^}}i32_literal:
-; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: ADD_INT * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y
; CHECK-NEXT: 5
define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
entry:
@@ -24,8 +24,8 @@ entry:
; ADD literal.x KC0[2].Z, 5.0
; CHECK: {{^}}float_literal:
-; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: ADD * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y
; CHECK-NEXT: 1084227584(5.0
define void @float_literal(float addrspace(1)* %out, float %in) {
entry:
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
index 8bf094b8bc7b..ca8ddbae9fbc 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
@@ -8,9 +8,7 @@ declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
; FUNC-LABEL: {{^}}s_abs_i32:
-; SI: s_sub_i32
-; SI: s_max_i32
-; SI: s_endpgm
+; SI: s_abs_i32
; EG: SUB_INT
; EG: MAX_INT
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll
index 1168713ca66e..d56b48457285 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll
@@ -425,7 +425,7 @@ define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; SI: buffer_load_dword [[LOAD:v[0-9]+]]
; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
-; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
+; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]]
; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
; SI: buffer_store_dword [[TMP2]]
define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll
deleted file mode 100644
index 301de4b1c82d..000000000000
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone
-
-; FUNC-LABEL: {{^}}s_brev_i32:
-; SI: s_load_dword [[VAL:s[0-9]+]],
-; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: buffer_store_dword [[VRESULT]],
-; SI: s_endpgm
-define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
- %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
- store i32 %ctlz, i32 addrspace(1)* %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_brev_i32:
-; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
- %val = load i32, i32 addrspace(1)* %valptr, align 4
- %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
- store i32 %ctlz, i32 addrspace(1)* %out, align 4
- ret void
-}
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll
index 805a88b59c72..80eb3b93f8e5 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll
@@ -271,7 +271,8 @@ define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 {
; SI: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[MASK]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NOT: vcc
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
@@ -285,7 +286,8 @@ define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
; SI-DAG: buffer_load_dwordx2 [[VA:v\[[0-9]+:[0-9]+\]]]
; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
; SI: v_cmp_class_f64_e32 vcc, [[VA]], [[MASK]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NOT: vcc
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 {
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
index f948c987b038..7dc094ed1b4b 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
@@ -4,7 +4,6 @@
; FIXME: Enable for VI.
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll
new file mode 100644
index 000000000000..2e299e30b8c7
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}read_workdim:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].Z
+
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+define void @read_workdim(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.AMDGPU.read.workdim() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}read_workdim_known_bits:
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
+; GCN-NOT: 0xff
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
+define void @read_workdim_known_bits(i32 addrspace(1)* %out) {
+entry:
+ %dim = call i32 @llvm.AMDGPU.read.workdim() #0
+ %shl = shl i32 %dim, 24
+ %shr = lshr i32 %shl, 24
+ store i32 %shr, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.AMDGPU.read.workdim() #0
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll
index 74792e50017f..a30a8e083eb6 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll
@@ -3,7 +3,7 @@
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
; R600: {{^}}amdgpu_trunc:
-; R600: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; R600: TRUNC {{\*? *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: {{^}}amdgpu_trunc:
; SI: v_trunc_f32
diff --git a/test/CodeGen/AMDGPU/llvm.SI.packf16.ll b/test/CodeGen/AMDGPU/llvm.SI.packf16.ll
new file mode 100644
index 000000000000..0155757632d4
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.SI.packf16.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}main:
+; GCN: v_cvt_pkrtz_f16_f32
+; GCN: v_cvt_pkrtz_f16_f32
+; GCN-NOT: v_cvt_pkrtz_f16_f32
+
+define void @main(float %src) #0 {
+main_body:
+ %p1 = call i32 @llvm.SI.packf16(float undef, float %src)
+ %p2 = call i32 @llvm.SI.packf16(float %src, float undef)
+ %p3 = call i32 @llvm.SI.packf16(float undef, float undef)
+ %f1 = bitcast i32 %p1 to float
+ %f2 = bitcast i32 %p2 to float
+ %f3 = bitcast i32 %p3 to float
+ call void @llvm.SI.export(i32 15, i32 1, i32 0, i32 0, i32 1, float undef, float %f1, float undef, float %f1)
+ call void @llvm.SI.export(i32 15, i32 1, i32 0, i32 0, i32 1, float undef, float %f2, float undef, float %f2)
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %f3, float undef, float %f2)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll
new file mode 100644
index 000000000000..6d9db65e7d93
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+declare void @llvm.amdgcn.buffer.wbinvl1() #0
+
+; GCN-LABEL: {{^}}test_buffer_wbinvl1:
+; GCN-NEXT: ; BB#0:
+; SI-NEXT: buffer_wbinvl1 ; encoding: [0x00,0x00,0xc4,0xe1,0x00,0x00,0x00,0x00]
+; VI-NEXT: buffer_wbinvl1 ; encoding: [0x00,0x00,0xf8,0xe0,0x00,0x00,0x00,0x00]
+; GCN-NEXT: s_endpgm
+define void @test_buffer_wbinvl1() #0 {
+ call void @llvm.amdgcn.buffer.wbinvl1()
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll
new file mode 100644
index 000000000000..746298465e58
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=SI %s
+
+declare void @llvm.amdgcn.buffer.wbinvl1.sc() #0
+
+; SI-LABEL: {{^}}test_buffer_wbinvl1_sc:
+; SI-NEXT: ; BB#0:
+; SI-NEXT: buffer_wbinvl1_sc ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00]
+; SI-NEXT: s_endpgm
+define void @test_buffer_wbinvl1_sc() #0 {
+ call void @llvm.amdgcn.buffer.wbinvl1.sc()
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
new file mode 100644
index 000000000000..cecfcb1bfe7c
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+declare void @llvm.amdgcn.buffer.wbinvl1.vol() #0
+
+; GCN-LABEL: {{^}}test_buffer_wbinvl1_vol:
+; GCN-NEXT: ; BB#0:
+; CI-NEXT: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00]
+; VI-NEXT: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
+; GCN-NEXT: s_endpgm
+define void @test_buffer_wbinvl1_vol() #0 {
+ call void @llvm.amdgcn.buffer.wbinvl1.vol()
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
new file mode 100644
index 000000000000..dc95cd1ee012
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}test:
+; GCN: enable_sgpr_dispatch_ptr = 1
+; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
+define void @test(i32 addrspace(1)* %out) {
+ %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+ %header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
+ %value = load i32, i32 addrspace(2)* %header_ptr
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
new file mode 100644
index 000000000000..a28e1b1eb241
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
@@ -0,0 +1,30 @@
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+
+;GCN-LABEL: {{^}}v_interp:
+;GCN-NOT: s_wqm
+;GCN: s_mov_b32 m0, s{{[0-9]+}}
+;GCN: v_interp_p1_f32
+;GCN: v_interp_p2_f32
+define void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 {
+main_body:
+ %i = extractelement <2 x i32> %4, i32 0
+ %j = extractelement <2 x i32> %4, i32 1
+ %p0_0 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 0, i32 0, i32 %3)
+ %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, i32 %j, i32 0, i32 0, i32 %3)
+ %p0_1 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 1, i32 0, i32 %3)
+ %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, i32 %j, i32 1, i32 0, i32 %3)
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %p0_0, float %p0_0, float %p1_1, float %p1_1)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.amdgcn.interp.p1(i32, i32, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.amdgcn.interp.p2(float, i32, i32, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
new file mode 100644
index 000000000000..02ee2039542a
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
@@ -0,0 +1,24 @@
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
+
+;GCN-LABEL: {{^}}mbcnt_intrinsics:
+;GCN: v_mbcnt_lo_u32_b32_e64 [[LO:v[0-9]+]], -1, 0
+;SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]]
+;VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]]
+
+define void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
+main_body:
+ %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
+ %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #1
+ %4 = bitcast i32 %hi to float
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %4, float %4, float %4, float %4)
+ ret void
+}
+
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
+
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
new file mode 100644
index 000000000000..f8af67c17ec2
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+declare void @llvm.amdgcn.s.dcache.inv() #0
+
+; GCN-LABEL: {{^}}test_s_dcache_inv:
+; GCN-NEXT: ; BB#0:
+; SI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0xc0,0xc7]
+; VI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0x80,0xc0,0x00,0x00,0x00,0x00]
+; GCN-NEXT: s_endpgm
+define void @test_s_dcache_inv() #0 {
+ call void @llvm.amdgcn.s.dcache.inv()
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_s_dcache_inv_insert_wait:
+; GCN-NEXT: ; BB#0:
+; GCN-NEXT: s_dcache_inv
+; GCN-NEXT: s_waitcnt lgkmcnt(0) ; encoding
+define void @test_s_dcache_inv_insert_wait() #0 {
+ call void @llvm.amdgcn.s.dcache.inv()
+ br label %end
+
+end:
+ store volatile i32 3, i32 addrspace(1)* undef
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
new file mode 100644
index 000000000000..a8502a7c5033
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+declare void @llvm.amdgcn.s.dcache.inv.vol() #0
+
+; GCN-LABEL: {{^}}test_s_dcache_inv_vol:
+; GCN-NEXT: ; BB#0:
+; CI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7]
+; VI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x88,0xc0,0x00,0x00,0x00,0x00]
+; GCN-NEXT: s_endpgm
+define void @test_s_dcache_inv_vol() #0 {
+ call void @llvm.amdgcn.s.dcache.inv.vol()
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_s_dcache_inv_vol_insert_wait:
+; GCN-NEXT: ; BB#0:
+; GCN-NEXT: s_dcache_inv_vol
+; GCN-NEXT: s_waitcnt lgkmcnt(0) ; encoding
+define void @test_s_dcache_inv_vol_insert_wait() #0 {
+ call void @llvm.amdgcn.s.dcache.inv.vol()
+ br label %end
+
+end:
+ store volatile i32 3, i32 addrspace(1)* undef
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
new file mode 100644
index 000000000000..f9ae09b391aa
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
+
+declare void @llvm.amdgcn.s.dcache.wb() #0
+
+; VI-LABEL: {{^}}test_s_dcache_wb:
+; VI-NEXT: ; BB#0:
+; VI-NEXT: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT: s_endpgm
+define void @test_s_dcache_wb() #0 {
+ call void @llvm.amdgcn.s.dcache.wb()
+ ret void
+}
+
+; VI-LABEL: {{^}}test_s_dcache_wb_insert_wait:
+; VI-NEXT: ; BB#0:
+; VI-NEXT: s_dcache_wb
+; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding
+define void @test_s_dcache_wb_insert_wait() #0 {
+ call void @llvm.amdgcn.s.dcache.wb()
+ br label %end
+
+end:
+ store volatile i32 3, i32 addrspace(1)* undef
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
new file mode 100644
index 000000000000..d9145458a1f6
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
+
+declare void @llvm.amdgcn.s.dcache.wb.vol() #0
+
+; VI-LABEL: {{^}}test_s_dcache_wb_vol:
+; VI-NEXT: ; BB#0:
+; VI-NEXT: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT: s_endpgm
+define void @test_s_dcache_wb_vol() #0 {
+ call void @llvm.amdgcn.s.dcache.wb.vol()
+ ret void
+}
+
+; VI-LABEL: {{^}}test_s_dcache_wb_vol_insert_wait:
+; VI-NEXT: ; BB#0:
+; VI-NEXT: s_dcache_wb_vol
+; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding
+define void @test_s_dcache_wb_vol_insert_wait() #0 {
+ call void @llvm.amdgcn.s.dcache.wb.vol()
+ br label %end
+
+end:
+ store volatile i32 3, i32 addrspace(1)* undef
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll b/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
index a64dd0ebd2dd..0c3e4ecaa1a0 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
@@ -4,7 +4,7 @@
declare float @llvm.AMDGPU.lrp(float, float, float) nounwind readnone
; FUNC-LABEL: {{^}}test_lrp:
-; SI: v_sub_f32
+; SI: v_mad_f32
; SI: v_mac_f32_e32
define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind {
%mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone
diff --git a/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/test/CodeGen/AMDGPU/llvm.dbg.value.ll
index d001bcb4db17..b01f8ab2bdf9 100644
--- a/test/CodeGen/AMDGPU/llvm.dbg.value.ll
+++ b/test/CodeGen/AMDGPU/llvm.dbg.value.ll
@@ -1,11 +1,11 @@
-; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs -mattr=-flat-for-global < %s | FileCheck %s
; CHECK-LABEL: {{^}}test_debug_value:
-; CHECK: s_load_dwordx2
-; CHECK: DEBUG_VALUE: test_debug_value:globalptr_arg <- SGPR0_SGPR1
+; CHECK: s_load_dwordx2 s[4:5]
+; CHECK: DEBUG_VALUE: test_debug_value:globalptr_arg <- %SGPR4_SGPR5
; CHECK: buffer_store_dword
; CHECK: s_endpgm
-define void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 {
+define void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %globalptr_arg, i64 0, metadata !10, metadata !13), !dbg !14
store i32 123, i32 addrspace(1)* %globalptr_arg, align 4
@@ -24,13 +24,13 @@ attributes #1 = { nounwind readnone }
!1 = !DIFile(filename: "/tmp/test_debug_value.cl", directory: "/Users/matt/src/llvm/build_debug")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "test_debug_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, function: void (i32 addrspace(1)*)* @test_debug_value, variables: !9)
+!4 = distinct !DISubprogram(name: "test_debug_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !9)
!5 = !DISubroutineType(types: !6)
!6 = !{null, !7}
!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64, align: 32)
!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "globalptr_arg", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!10 = !DILocalVariable(name: "globalptr_arg", arg: 1, scope: !4, file: !1, line: 1, type: !7)
!11 = !{i32 2, !"Dwarf Version", i32 4}
!12 = !{i32 2, !"Debug Info Version", i32 3}
!13 = !DIExpression()
diff --git a/test/CodeGen/AMDGPU/llvm.memcpy.ll b/test/CodeGen/AMDGPU/llvm.memcpy.ll
index e491732cf9c5..d83ab562b718 100644
--- a/test/CodeGen/AMDGPU/llvm.memcpy.ll
+++ b/test/CodeGen/AMDGPU/llvm.memcpy.ll
@@ -132,32 +132,15 @@ define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %
}
; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
+; SI: ds_read2_b32
+; SI: ds_read2_b32
+; SI: ds_read2_b32
+; SI: ds_read2_b32
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
+; SI: ds_write2_b32
+; SI: ds_write2_b32
+; SI: ds_write2_b32
+; SI: ds_write2_b32
; SI: s_endpgm
define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
@@ -170,32 +153,15 @@ define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %
; FIXME: Use 64-bit ops
; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
+; SI: ds_read_b64
+; SI: ds_read_b64
+; SI: ds_read_b64
+; SI: ds_read_b64
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
+; SI: ds_write_b64
+; SI: ds_write_b64
+; SI: ds_write_b64
+; SI: ds_write_b64
; SI-DAG: s_endpgm
define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
diff --git a/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll b/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
new file mode 100644
index 000000000000..13ebee41e844
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
@@ -0,0 +1,184 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}local_size_x:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[1].Z
+
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
+; CI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x1
+; VI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x4
+
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
+define void @local_size_x(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.local.size.x() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_y:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[1].W
+
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
+define void @local_size_y(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.local.size.y() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_z:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].X
+
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
+define void @local_size_z(i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.r600.read.local.size.z() #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_xy:
+; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
+; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
+; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
+; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
+; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]]
+; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VY]]
+; GCN: buffer_store_dword [[VAL]]
+define void @local_size_xy(i32 addrspace(1)* %out) {
+entry:
+ %x = call i32 @llvm.r600.read.local.size.x() #0
+ %y = call i32 @llvm.r600.read.local.size.y() #0
+ %val = mul i32 %x, %y
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_xz:
+
+; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
+; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
+; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
+; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
+; HSA-DAG: s_and_b32 [[X:s[0-9]+]], [[XY]], 0xffff
+; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
+; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VZ]]
+; GCN: buffer_store_dword [[VAL]]
+define void @local_size_xz(i32 addrspace(1)* %out) {
+entry:
+ %x = call i32 @llvm.r600.read.local.size.x() #0
+ %z = call i32 @llvm.r600.read.local.size.z() #0
+ %val = mul i32 %x, %z
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_yz:
+; HSA: enable_sgpr_private_segment_buffer = 1
+; HSA: enable_sgpr_dispatch_ptr = 1
+
+; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
+; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
+; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
+; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
+; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
+; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[Y]], [[VZ]]
+; GCN: buffer_store_dword [[VAL]]
+define void @local_size_yz(i32 addrspace(1)* %out) {
+entry:
+ %y = call i32 @llvm.r600.read.local.size.y() #0
+ %z = call i32 @llvm.r600.read.local.size.z() #0
+ %val = mul i32 %y, %z
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_xyz:
+; HSA: enable_sgpr_private_segment_buffer = 1
+; HSA: enable_sgpr_dispatch_ptr = 1
+
+; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
+; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
+; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
+; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
+; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
+; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
+; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]]
+; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
+; GCN: v_mad_u32_u24 [[VAL:v[0-9]+]], [[X]], [[VY]], [[VZ]]
+; GCN: buffer_store_dword [[VAL]]
+define void @local_size_xyz(i32 addrspace(1)* %out) {
+entry:
+ %x = call i32 @llvm.r600.read.local.size.x() #0
+ %y = call i32 @llvm.r600.read.local.size.y() #0
+ %z = call i32 @llvm.r600.read.local.size.z() #0
+ %xy = mul i32 %x, %y
+ %xyz = add i32 %xy, %z
+ store i32 %xyz, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_x_known_bits:
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
+; GCN-NOT: 0xffff
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NEXT: buffer_store_dword [[VVAL]]
+define void @local_size_x_known_bits(i32 addrspace(1)* %out) {
+entry:
+ %size = call i32 @llvm.r600.read.local.size.x() #0
+ %shl = shl i32 %size, 16
+ %shr = lshr i32 %shl, 16
+ store i32 %shr, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_y_known_bits:
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
+; GCN-NOT: 0xffff
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NEXT: buffer_store_dword [[VVAL]]
+define void @local_size_y_known_bits(i32 addrspace(1)* %out) {
+entry:
+ %size = call i32 @llvm.r600.read.local.size.y() #0
+ %shl = shl i32 %size, 16
+ %shr = lshr i32 %shl, 16
+ store i32 %shr, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_z_known_bits:
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
+; GCN-NOT: 0xffff
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NEXT: buffer_store_dword [[VVAL]]
+define void @local_size_z_known_bits(i32 addrspace(1)* %out) {
+entry:
+ %size = call i32 @llvm.r600.read.local.size.z() #0
+ %shl = shl i32 %size, 16
+ %shr = lshr i32 %shl, 16
+ store i32 %shr, i32 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.r600.read.local.size.x() #0
+declare i32 @llvm.r600.read.local.size.y() #0
+declare i32 @llvm.r600.read.local.size.z() #0
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/llvm.round.f64.ll b/test/CodeGen/AMDGPU/llvm.round.f64.ll
index 3d0f57e33280..6b365dc09e2a 100644
--- a/test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ b/test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -21,12 +21,9 @@ define void @round_f64(double addrspace(1)* %out, double %x) #0 {
; SI-DAG: v_cmp_eq_i32
; SI-DAG: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff
-; SI-DAG: v_cmp_gt_i32_e64
+; SI-DAG: v_cmp_gt_i32_e32
; SI-DAG: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]]
-; SI-DAG: v_cmp_gt_i32_e64
-
-
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
diff --git a/test/CodeGen/AMDGPU/load.ll b/test/CodeGen/AMDGPU/load.ll
index 93b1b51a0d07..6a04261fe47b 100644
--- a/test/CodeGen/AMDGPU/load.ll
+++ b/test/CodeGen/AMDGPU/load.ll
@@ -277,15 +277,9 @@ entry:
; FUNC-LABEL: {{^}}load_v8i32:
; R600: VTX_READ_128
; R600: VTX_READ_128
-; XXX: We should be using DWORDX4 instructions on SI.
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
entry:
%0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
@@ -298,23 +292,11 @@ entry:
; R600: VTX_READ_128
; R600: VTX_READ_128
; R600: VTX_READ_128
-; XXX: We should be using DWORDX4 instructions on SI.
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
entry:
%0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
diff --git a/test/CodeGen/AMDGPU/local-memory-two-objects.ll b/test/CodeGen/AMDGPU/local-memory-two-objects.ll
index f501a7ac6274..6b52b80ba082 100644
--- a/test/CodeGen/AMDGPU/local-memory-two-objects.ll
+++ b/test/CodeGen/AMDGPU/local-memory-two-objects.ll
@@ -10,7 +10,7 @@
; EG: .long 166120
; EG-NEXT: .long 8
; GCN: .long 47180
-; GCN-NEXT: .long 38792
+; GCN-NEXT: .long 32900
; EG: {{^}}local_memory_two_objects:
@@ -30,7 +30,7 @@
; constant offsets.
; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
-; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
+; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], vcc, 16, v{{[0-9]+}}
; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]]
; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16
; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]]
diff --git a/test/CodeGen/AMDGPU/local-memory.ll b/test/CodeGen/AMDGPU/local-memory.ll
index 9494ed75bd0c..9ffb59e70920 100644
--- a/test/CodeGen/AMDGPU/local-memory.ll
+++ b/test/CodeGen/AMDGPU/local-memory.ll
@@ -9,9 +9,9 @@
; EG: .long 166120
; EG-NEXT: .long 128
; SI: .long 47180
-; SI-NEXT: .long 71560
+; SI-NEXT: .long 65668
; CI: .long 47180
-; CI-NEXT: .long 38792
+; CI-NEXT: .long 32900
; FUNC-LABEL: {{^}}local_memory:
diff --git a/test/CodeGen/AMDGPU/max.ll b/test/CodeGen/AMDGPU/max.ll
index fef3e2f0a21c..eeb915c10a96 100644
--- a/test/CodeGen/AMDGPU/max.ll
+++ b/test/CodeGen/AMDGPU/max.ll
@@ -2,7 +2,7 @@
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-; FUNC-LABEL: @v_test_imax_sge_i32
+; FUNC-LABEL: {{^}}v_test_imax_sge_i32:
; SI: v_max_i32_e32
define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -17,6 +17,24 @@ define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
ret void
}
+; FUNC-LABEL: {{^}}v_test_imax_sge_v4i32:
+; SI: v_max_i32_e32
+; SI: v_max_i32_e32
+; SI: v_max_i32_e32
+; SI: v_max_i32_e32
+define void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %out, i32 %tid
+ %a = load <4 x i32>, <4 x i32> addrspace(1)* %gep0, align 4
+ %b = load <4 x i32>, <4 x i32> addrspace(1)* %gep1, align 4
+ %cmp = icmp sge <4 x i32> %a, %b
+ %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+ store <4 x i32> %val, <4 x i32> addrspace(1)* %outgep, align 4
+ ret void
+}
+
; FUNC-LABEL: @s_test_imax_sge_i32
; SI: s_max_i32
define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
@@ -35,6 +53,23 @@ define void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
ret void
}
+; FUNC-LABEL: {{^}}v_test_imax_sge_i8:
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: v_max_i32_e32
+define void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+ %a = load i8, i8 addrspace(1)* %gep0, align 1
+ %b = load i8, i8 addrspace(1)* %gep1, align 1
+ %cmp = icmp sge i8 %a, %b
+ %val = select i1 %cmp, i8 %a, i8 %b
+ store i8 %val, i8 addrspace(1)* %outgep, align 1
+ ret void
+}
+
; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i32:
; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
@@ -44,6 +79,15 @@ define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
ret void
}
+; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_v2i32:
+; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
+; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
+define void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
+ %cmp = icmp sgt <2 x i32> %a, <i32 9, i32 9>
+ %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 9, i32 9>
+ store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4
+ ret void
+}
; FUNC-LABEL: @v_test_imax_sgt_i32
; SI: v_max_i32_e32
define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
@@ -92,6 +136,36 @@ define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
ret void
}
+; FUNC-LABEL: {{^}}s_test_umax_uge_v3i32:
+; SI: s_max_u32
+; SI: s_max_u32
+; SI: s_max_u32
+; SI-NOT: s_max_u32
+; SI: s_endpgm
+define void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, <3 x i32> %b) nounwind {
+ %cmp = icmp uge <3 x i32> %a, %b
+ %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b
+ store <3 x i32> %val, <3 x i32> addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_test_umax_uge_i8:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: v_max_u32_e32
+define void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+ %a = load i8, i8 addrspace(1)* %gep0, align 1
+ %b = load i8, i8 addrspace(1)* %gep1, align 1
+ %cmp = icmp uge i8 %a, %b
+ %val = select i1 %cmp, i8 %a, i8 %b
+ store i8 %val, i8 addrspace(1)* %outgep, align 1
+ ret void
+}
+
; FUNC-LABEL: @v_test_umax_ugt_i32
; SI: v_max_u32_e32
define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
@@ -107,7 +181,7 @@ define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
ret void
}
-; FUNC-LABEL: @s_test_umax_ugt_i32
+; FUNC-LABEL: {{^}}s_test_umax_ugt_i32:
; SI: s_max_u32
define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp ugt i32 %a, %b
@@ -116,13 +190,23 @@ define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
ret void
}
+; FUNC-LABEL: {{^}}s_test_umax_ugt_imm_v2i32:
+; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15
+; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23
+define void @s_test_umax_ugt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
+ %cmp = icmp ugt <2 x i32> %a, <i32 15, i32 23>
+ %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 15, i32 23>
+ store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4
+ ret void
+}
+
; Make sure redundant and removed
; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umax_ugt_i16:
; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: s_max_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
-; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
-; SI-NEXT: buffer_store_dword [[VMIN]]
+; SI: s_max_u32 [[MAX:s[0-9]+]], [[A]], [[B]]
+; SI-NEXT: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]]
+; SI-NEXT: buffer_store_dword [[VMAX]]
define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
%a.ext = zext i16 %a to i32
%b.ext = zext i16 %b to i32
@@ -135,13 +219,13 @@ define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i1
; Make sure redundant sign_extend_inreg removed.
-; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
+; FUNC-LABEL: {{^}}simplify_demanded_bits_test_max_slt_i16:
; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: s_max_i32 [[MIN:s[0-9]+]], [[A]], [[B]]
-; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
-; SI-NEXT: buffer_store_dword [[VMIN]]
-define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
+; SI: s_max_i32 [[MAX:s[0-9]+]], [[A]], [[B]]
+; SI-NEXT: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]]
+; SI-NEXT: buffer_store_dword [[VMAX]]
+define void @simplify_demanded_bits_test_max_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
%a.ext = sext i16 %a to i32
%b.ext = sext i16 %b to i32
%cmp = icmp sgt i32 %a.ext, %b.ext
@@ -152,15 +236,13 @@ define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16
ret void
}
-; FIXME: Should get match min/max through extends inserted by
-; legalization.
-
-; FUNC-LABEL: {{^}}s_test_imin_sge_i16:
+; FUNC-LABEL: {{^}}s_test_imax_sge_i16:
+; SI: s_load_dword
+; SI: s_load_dword
; SI: s_sext_i32_i16
; SI: s_sext_i32_i16
-; SI: v_cmp_ge_i32_e32
-; SI: v_cndmask_b32
-define void @s_test_imin_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
+; SI: s_max_i32
+define void @s_test_imax_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
%cmp = icmp sge i16 %a, %b
%val = select i1 %cmp, i16 %a, i16 %b
store i16 %val, i16 addrspace(1)* %out
diff --git a/test/CodeGen/AMDGPU/merge-stores.ll b/test/CodeGen/AMDGPU/merge-stores.ll
index 34a2fc7ffa74..65b454b5d8cb 100644
--- a/test/CodeGen/AMDGPU/merge-stores.ll
+++ b/test/CodeGen/AMDGPU/merge-stores.ll
@@ -1,5 +1,8 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
+
+; RUN: llc -march=amdgcn -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
; Run with devices with different unaligned load restrictions.
@@ -65,10 +68,8 @@ define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)*
}
; GCN-LABEL: {{^}}merge_global_store_2_constants_i32:
-; SI-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8
-; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b
-; SI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]]
-; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]]
+; SI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
+; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
@@ -89,10 +90,8 @@ define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 {
}
; GCN-LABEL: {{^}}merge_global_store_2_constants_f32_i32:
-; SI-DAG: s_mov_b32 [[SLO:s[0-9]+]], 4.0
-; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b{{$}}
-; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[SLO]]
-; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[SHI]]
+; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 4.0
+; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0x7b
; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
@@ -121,10 +120,7 @@ define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 {
}
; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dwordx2 v
+; GCN: buffer_store_dwordx4
define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
@@ -137,17 +133,9 @@ define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out)
ret void
}
-; First store is out of order. Because of order of combines, the
-; consecutive store fails because only some of the stores have been
-; replaced with integer constant stores, and then won't merge because
-; the types are different.
-
+; First store is out of order.
; GCN-LABEL: {{^}}merge_global_store_4_constants_f32:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
+; GCN: buffer_store_dwordx4
define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
@@ -160,6 +148,33 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
ret void
}
+; FIXME: Should be able to merge this
+; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+
+; GCN-AA: buffer_store_dwordx2
+; GCN-AA: buffer_store_dword v
+; GCN-AA: buffer_store_dword v
+
+; GCN: s_endpgm
+define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
+
+ %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
+ %out.gep.3.bc = bitcast float addrspace(1)* %out.gep.3 to i32 addrspace(1)*
+
+ store i32 11, i32 addrspace(1)* %out.gep.1.bc
+ store float 2.0, float addrspace(1)* %out.gep.2
+ store i32 17, i32 addrspace(1)* %out.gep.3.bc
+ store float 8.0, float addrspace(1)* %out
+ ret void
+}
+
; GCN-LABEL: {{^}}merge_global_store_3_constants_i32:
; SI-DAG: buffer_store_dwordx2
; SI-DAG: buffer_store_dword
@@ -176,9 +191,7 @@ define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 {
}
; GCN-LABEL: {{^}}merge_global_store_2_constants_i64:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dwordx2
-; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx4
define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
@@ -188,13 +201,8 @@ define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 {
}
; GCN-LABEL: {{^}}merge_global_store_4_constants_i64:
-; XGCN: buffer_store_dwordx4
-; XGCN: buffer_store_dwordx4
-
-; GCN: buffer_store_dwordx2
-; GCN: buffer_store_dwordx2
-; GCN: buffer_store_dwordx2
-; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 {
%out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
%out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2
@@ -472,11 +480,15 @@ define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1
; This works once AA is enabled on the subtarget
; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32:
; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
-; XGCN: buffer_store_dwordx4 [[LOAD]]
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
+
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+
+; GCN-AA: buffer_store_dwordx4 [[LOAD]]
+
+; GCN: s_endpgm
define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
@@ -508,10 +520,8 @@ define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 {
}
; GCN-LABEL: {{^}}merge_local_store_2_constants_i32:
-; GCN-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8
-; GCN-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b
-; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]]
-; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]]
+; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}}
define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
@@ -522,10 +532,15 @@ define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
}
; GCN-LABEL: {{^}}merge_local_store_4_constants_i32:
-; GCN: ds_write_b32
-; GCN: ds_write_b32
-; GCN: ds_write_b32
-; GCN: ds_write_b32
+; GCN-DAG: v_mov_b32_e32 [[K2:v[0-9]+]], 0x1c8
+; GCN-DAG: v_mov_b32_e32 [[K3:v[0-9]+]], 0x14d
+; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K2]], [[K3]] offset0:2 offset1:3
+
+; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0x4d2
+; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0x7b
+; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K0]], [[K1]] offset1:1
+
+; GCN: s_endpgm
define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
%out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
%out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2
@@ -597,17 +612,9 @@ define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) {
}
; GCN-LABEL: {{^}}merge_global_store_8_constants_i32:
-; XGCN: buffer_store_dwordx4
-; XGCN: buffer_store_dwordx4
-
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: s_endpgm
define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
store i32 34, i32 addrspace(1)* %out, align 4
%idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
@@ -627,7 +634,78 @@ define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
ret void
}
+; This requires handling of scalar_to_vector for v2i64 to avoid
+; scratch usage.
+; FIXME: Should do single load and store
+
+; GCN-LABEL: {{^}}copy_v3i32_align4:
+; GCN-NOT: SCRATCH_RSRC_DWORD
+; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-NOT: offen
+; GCN: s_waitcnt vmcnt
+; GCN-NOT: offen
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+
+; GCN: ScratchSize: 0{{$}}
+define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 {
+ %vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4
+ store <3 x i32> %vec, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}copy_v3i64_align4:
+; GCN-NOT: SCRATCH_RSRC_DWORD
+; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN-NOT: offen
+; GCN: s_waitcnt vmcnt
+; GCN-NOT: offen
+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN: ScratchSize: 0{{$}}
+define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 {
+ %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4
+ store <3 x i64> %vec, <3 x i64> addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}copy_v3f32_align4:
+; GCN-NOT: SCRATCH_RSRC_DWORD
+; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-NOT: offen
+; GCN: s_waitcnt vmcnt
+; GCN-NOT: offen
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN: ScratchSize: 0{{$}}
+define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 {
+ %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4
+ %fadd = fadd <3 x float> %vec, <float 1.0, float 2.0, float 4.0>
+ store <3 x float> %fadd, <3 x float> addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}copy_v3f64_align4:
+; GCN-NOT: SCRATCH_RSRC_DWORD
+; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN-NOT: offen
+; GCN: s_waitcnt vmcnt
+; GCN-NOT: offen
+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN: ScratchSize: 0{{$}}
+define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 {
+ %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4
+ %fadd = fadd <3 x double> %vec, <double 1.0, double 2.0, double 4.0>
+ store <3 x double> %fadd, <3 x double> addrspace(1)* %out
+ ret void
+}
+
declare void @llvm.AMDGPU.barrier.local() #1
attributes #0 = { nounwind }
-attributes #1 = { noduplicate nounwind }
+attributes #1 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/min.ll b/test/CodeGen/AMDGPU/min.ll
index 0332d1a8e407..215dbeb4b2fd 100644
--- a/test/CodeGen/AMDGPU/min.ll
+++ b/test/CodeGen/AMDGPU/min.ll
@@ -2,7 +2,7 @@
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-; FUNC-LABEL: @v_test_imin_sle_i32
+; FUNC-LABEL: {{^}}v_test_imin_sle_i32:
; SI: v_min_i32_e32
define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -17,7 +17,7 @@ define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
ret void
}
-; FUNC-LABEL: @s_test_imin_sle_i32
+; FUNC-LABEL: {{^}}s_test_imin_sle_i32:
; SI: s_min_i32
define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp sle i32 %a, %b
@@ -26,6 +26,78 @@ define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
ret void
}
+; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32:
+; SI: s_min_i32
+define void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
+ %cmp = icmp sle <1 x i32> %a, %b
+ %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
+ store <1 x i32> %val, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32:
+; SI: s_min_i32
+; SI: s_min_i32
+; SI: s_min_i32
+; SI: s_min_i32
+define void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
+ %cmp = icmp sle <4 x i32> %a, %b
+ %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+ store <4 x i32> %val, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_imin_sle_i8:
+; SI: s_load_dword
+; SI: s_load_dword
+; SI: s_sext_i32_i8
+; SI: s_sext_i32_i8
+; SI: s_min_i32
+define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind {
+ %cmp = icmp sle i8 %a, %b
+ %val = select i1 %cmp, i8 %a, i8 %b
+ store i8 %val, i8 addrspace(1)* %out
+ ret void
+}
+
+; XXX - should be able to use s_min if we stop unnecessarily doing
+; extloads with mubuf instructions.
+
+; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8:
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+
+; SI: v_min_i32
+; SI: v_min_i32
+; SI: v_min_i32
+; SI: v_min_i32
+
+; SI: s_endpgm
+define void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b) nounwind {
+ %cmp = icmp sle <4 x i8> %a, %b
+ %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
+ store <4 x i8> %val, <4 x i8> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16:
+; SI: v_min_i32
+; SI: v_min_i32
+; SI: v_min_i32
+; SI: v_min_i32
+define void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
+ %cmp = icmp sle <4 x i16> %a, %b
+ %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
+ store <4 x i16> %val, <4 x i16> addrspace(1)* %out
+ ret void
+}
+
; FUNC-LABEL: @v_test_imin_slt_i32
; SI: v_min_i32_e32
define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
@@ -50,6 +122,16 @@ define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
ret void
}
+; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32:
+; SI: s_min_i32
+; SI: s_min_i32
+define void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
+ %cmp = icmp slt <2 x i32> %a, %b
+ %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
+ store <2 x i32> %val, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32:
; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
@@ -83,6 +165,24 @@ define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
ret void
}
+; FUNC-LABEL: @v_test_umin_ule_v3i32
+; SI: v_min_u32_e32
+; SI: v_min_u32_e32
+; SI: v_min_u32_e32
+; SI-NOT: v_min_u32_e32
+; SI: s_endpgm
+define void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %aptr, <3 x i32> addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
+ %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep0
+ %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep1
+ %cmp = icmp ule <3 x i32> %a, %b
+ %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b
+ store <3 x i32> %val, <3 x i32> addrspace(1)* %outgep
+ ret void
+}
; FUNC-LABEL: @s_test_umin_ule_i32
; SI: s_min_u32
define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
@@ -107,6 +207,23 @@ define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
ret void
}
+; FUNC-LABEL: {{^}}v_test_umin_ult_i8:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: v_min_u32_e32
+define void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+ %a = load i8, i8 addrspace(1)* %gep0, align 1
+ %b = load i8, i8 addrspace(1)* %gep1, align 1
+ %cmp = icmp ult i8 %a, %b
+ %val = select i1 %cmp, i8 %a, i8 %b
+ store i8 %val, i8 addrspace(1)* %outgep, align 1
+ ret void
+}
+
; FUNC-LABEL: @s_test_umin_ult_i32
; SI: s_min_u32
define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
@@ -137,6 +254,48 @@ define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace
ret void
}
+
+; FUNC-LABEL: @s_test_umin_ult_v1i32
+; SI: s_min_u32
+define void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
+ %cmp = icmp ult <1 x i32> %a, %b
+ %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
+ store <1 x i32> %val, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32:
+; SI: s_min_u32
+; SI: s_min_u32
+; SI: s_min_u32
+; SI: s_min_u32
+; SI: s_min_u32
+; SI: s_min_u32
+; SI: s_min_u32
+; SI: s_min_u32
+define void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind {
+ %cmp = icmp ult <8 x i32> %a, %b
+ %val = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+ store <8 x i32> %val, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16:
+; SI: v_min_u32
+; SI: v_min_u32
+; SI: v_min_u32
+; SI: v_min_u32
+; SI: v_min_u32
+; SI: v_min_u32
+; SI: v_min_u32
+; SI: v_min_u32
+define void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
+ %cmp = icmp ult <8 x i16> %a, %b
+ %val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+ store <8 x i16> %val, <8 x i16> addrspace(1)* %out
+ ret void
+}
+
; Make sure redundant and removed
; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16:
; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
@@ -173,14 +332,8 @@ define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16
ret void
}
-; FIXME: Should get match min/max through extends inserted by
-; legalization.
-
; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
-; SI: s_sext_i32_i16
-; SI: s_sext_i32_i16
-; SI: v_cmp_le_i32_e32
-; SI: v_cndmask_b32
+; SI: s_min_i32
define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
%cmp = icmp sle i16 %a, %b
%val = select i1 %cmp, i16 %a, i16 %b
diff --git a/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll b/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll
new file mode 100644
index 000000000000..e9f641b736d5
--- /dev/null
+++ b/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN %s
+
+; Check that when mubuf addr64 instruction is handled in moveToVALU
+; from the pointer, dead register writes are not emitted.
+
+; FIXME: We should be able to use the SGPR directly as src0 to v_add_i32
+
+; GCN-LABEL: {{^}}clobber_vgpr_pair_pointer_add:
+; GCN: s_load_dwordx2 s{{\[}}[[ARG1LO:[0-9]+]]:[[ARG1HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; GCN: buffer_load_dwordx2 v{{\[}}[[LDPTRLO:[0-9]+]]:[[LDPTRHI:[0-9]+]]{{\]}}
+
+; GCN-NOT: v_mov_b32
+; GCN: v_mov_b32_e32 v[[VARG1LO:[0-9]+]], s[[ARG1LO]]
+; GCN-NEXT: v_mov_b32_e32 v[[VARG1HI:[0-9]+]], s[[ARG1HI]]
+; GCN-NOT: v_mov_b32
+
+; GCN: v_add_i32_e32 v[[PTRLO:[0-9]+]], vcc, v[[LDPTRLO]], v[[VARG1LO]]
+; GCN: v_addc_u32_e32 v[[PTRHI:[0-9]+]], vcc, v[[LDPTRHI]], v[[VARG1HI]]
+; GCN: buffer_load_ubyte v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}},
+
+define void @clobber_vgpr_pair_pointer_add(i64 %arg1, i8 addrspace(1)* addrspace(1)* %ptrarg, i32 %arg3) #0 {
+bb:
+ %tmp = icmp sgt i32 %arg3, 0
+ br i1 %tmp, label %bb4, label %bb17
+
+bb4:
+ %tmp14 = load volatile i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %ptrarg
+ %tmp15 = getelementptr inbounds i8, i8 addrspace(1)* %tmp14, i64 %arg1
+ %tmp16 = load volatile i8, i8 addrspace(1)* %tmp15
+ br label %bb17
+
+bb17:
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll b/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
new file mode 100644
index 000000000000..8bca0575ecd2
--- /dev/null
+++ b/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; XUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+; FIXME: broken on VI because flat instructions need to be emitted
+; instead of addr64 equivalent of the _OFFSET variants.
+
+; Check that moving the pointer out of the resource descriptor to
+; vaddr works for atomics.
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; GCN-LABEL: {{^}}atomic_max_i32:
+; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400 glc{{$}}
+define void @atomic_max_i32(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid
+ %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep
+ %xor = xor i32 %tid, 1
+ %cmp = icmp ne i32 %xor, 0
+ br i1 %cmp, label %atomic, label %exit
+
+atomic:
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100
+ %ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst
+ store i32 %ret, i32 addrspace(1)* %out
+ br label %exit
+
+exit:
+ ret void
+}
+
+; GCN-LABEL: {{^}}atomic_max_i32_noret:
+; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400{{$}}
+define void @atomic_max_i32_noret(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid
+ %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep
+ %xor = xor i32 %tid, 1
+ %cmp = icmp ne i32 %xor, 0
+ br i1 %cmp, label %atomic, label %exit
+
+atomic:
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100
+ %ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst
+ br label %exit
+
+exit:
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll b/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
new file mode 100644
index 000000000000..73a146710a9f
--- /dev/null
+++ b/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
@@ -0,0 +1,18 @@
+; RUN: not llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported non-compute shaders with HSA in pixel_shader
+define void @pixel_shader() #0 {
+ ret void
+}
+
+define void @vertex_shader() #1 {
+ ret void
+}
+
+define void @geometry_shader() #2 {
+ ret void
+}
+
+attributes #0 = { nounwind "ShaderType"="0" }
+attributes #1 = { nounwind "ShaderType"="1" }
+attributes #2 = { nounwind "ShaderType"="2" }
diff --git a/test/CodeGen/AMDGPU/no-shrink-extloads.ll b/test/CodeGen/AMDGPU/no-shrink-extloads.ll
index e4328ecbaca8..f81911aafe22 100644
--- a/test/CodeGen/AMDGPU/no-shrink-extloads.ll
+++ b/test/CodeGen/AMDGPU/no-shrink-extloads.ll
@@ -189,3 +189,15 @@ define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace
store i8 %trunc, i8 addrspace(1)* %gep.out
ret void
}
+
+; FUNC-LABEL: {{^}}smrd_mask_i32_to_i16
+; SI: s_load_dword [[LOAD:s[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0x0
+; SI: s_waitcnt lgkmcnt(0)
+; SI: s_and_b32 s{{[0-9]+}}, [[LOAD]], 0xffff
+define void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+entry:
+ %val = load i32, i32 addrspace(2)* %in
+ %mask = and i32 %val, 65535
+ store i32 %mask, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/opencl-image-metadata.ll b/test/CodeGen/AMDGPU/opencl-image-metadata.ll
new file mode 100644
index 000000000000..bc467e47dc31
--- /dev/null
+++ b/test/CodeGen/AMDGPU/opencl-image-metadata.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; Make sure the OpenCL Image lowering pass doesn't crash when argument metadata
+; is not in expected order.
+
+; EG: CF_END
+; SI: s_endpgm
+define void @kernel(i32 addrspace(1)* %out) {
+entry:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #3 = { nounwind }
+
+!opencl.kernels = !{!0}
+
+!0 = !{void (i32 addrspace(1)*)* @kernel, !1, !2, !3, !4, !5}
+!1 = !{!"kernel_arg_addr_space", i32 0}
+!2 = !{!"kernel_arg_access_qual", !"none"}
+!3 = !{!"kernel_arg_type", !"int*"}
+!4 = !{!"kernel_arg_type_qual", !""}
+!5 = !{!"kernel_arg_name", !""}
diff --git a/test/CodeGen/AMDGPU/operand-folding.ll b/test/CodeGen/AMDGPU/operand-folding.ll
index 816755efb07c..9e514ef9970a 100644
--- a/test/CodeGen/AMDGPU/operand-folding.ll
+++ b/test/CodeGen/AMDGPU/operand-folding.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
; CHECK-LABEL: {{^}}fold_sgpr:
-; CHECK: v_add_i32_e32 v{{[0-9]+}}, s
+; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s
define void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) {
entry:
%tmp0 = icmp ne i32 %fold, 0
diff --git a/test/CodeGen/AMDGPU/or.ll b/test/CodeGen/AMDGPU/or.ll
index 1c04090b407f..e40f18f040b7 100644
--- a/test/CodeGen/AMDGPU/or.ll
+++ b/test/CodeGen/AMDGPU/or.ll
@@ -153,7 +153,7 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
}
; FUNC-LABEL: {{^}}or_i1:
-; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
+; EG: OR_INT * {{\** *}}T{{[0-9]+\.[XYZW], PS, PV\.[XYZW]}}
; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
diff --git a/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll b/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
new file mode 100644
index 000000000000..51985af42a29
--- /dev/null
+++ b/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs -verify-coalescing < %s
+
+; The original and requires materializing a 64-bit immediate for
+; s_and_b64. This is split into 2 x v_and_i32, part of the immediate
+; is folded through the reg_sequence into the v_and_i32 operand, and
+; only half of the result is ever used.
+;
+; During live interval construction, the first sub register def is
+; incorrectly marked as dead.
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+define void @dead_def_subregister(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %val = load i64, i64 addrspace(1)* %in.gep
+
+ %lshr = shl i64 %val, 24
+ %and1 = and i64 %lshr, 2190433320969 ; (255 << 33) | 9
+ %vec = bitcast i64 %and1 to <2 x i32>
+ %elt1 = extractelement <2 x i32> %vec, i32 1
+
+ store i32 %elt1, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/private-memory.ll b/test/CodeGen/AMDGPU/private-memory.ll
index 645dc04f4420..79778eebd802 100644
--- a/test/CodeGen/AMDGPU/private-memory.ll
+++ b/test/CodeGen/AMDGPU/private-memory.ll
@@ -1,6 +1,8 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
@@ -13,11 +15,21 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; R600: LDS_READ
; R600: LDS_READ
+; HSA-PROMOTE: .amd_kernel_code_t
+; HSA-PROMOTE: workgroup_group_segment_byte_size = 5120
+; HSA-PROMOTE: .end_amd_kernel_code_t
+
; SI-PROMOTE: ds_write_b32
; SI-PROMOTE: ds_write_b32
; SI-PROMOTE: ds_read_b32
; SI-PROMOTE: ds_read_b32
+; HSA-ALLOCA: .amd_kernel_code_t
+; FIXME: Creating the emergency stack slots causes us to over-estimate scratch
+; by 4 bytes.
+; HSA-ALLOCA: workitem_private_segment_byte_size = 24
+; HSA-ALLOCA: .end_amd_kernel_code_t
+
; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
diff --git a/test/CodeGen/AMDGPU/register-count-comments.ll b/test/CodeGen/AMDGPU/register-count-comments.ll
index de6bfb310883..4bb315049be4 100644
--- a/test/CodeGen/AMDGPU/register-count-comments.ll
+++ b/test/CodeGen/AMDGPU/register-count-comments.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs -asm-verbose -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s
declare i32 @llvm.SI.tid() nounwind readnone
diff --git a/test/CodeGen/AMDGPU/reorder-stores.ll b/test/CodeGen/AMDGPU/reorder-stores.ll
index 187650ff9a53..d5e10d0be883 100644
--- a/test/CodeGen/AMDGPU/reorder-stores.ll
+++ b/test/CodeGen/AMDGPU/reorder-stores.ll
@@ -2,14 +2,10 @@
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store:
-; SI: buffer_load_dwordx2
-; SI: buffer_load_dwordx2
-; SI: buffer_load_dwordx2
-; SI: buffer_load_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
; SI: s_endpgm
define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
%tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16
@@ -34,46 +30,16 @@ define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace
}
; SI-LABEL: {{^}}no_reorder_split_v8i32_global_load_store:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
; SI: s_endpgm
define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
%tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32
diff --git a/test/CodeGen/AMDGPU/s_movk_i32.ll b/test/CodeGen/AMDGPU/s_movk_i32.ll
index 6b1a36c979c2..47c7fbb6dd6a 100644
--- a/test/CodeGen/AMDGPU/s_movk_i32.ll
+++ b/test/CodeGen/AMDGPU/s_movk_i32.ll
@@ -3,10 +3,9 @@
; SI-LABEL: {{^}}s_movk_i32_k0:
; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
@@ -17,10 +16,9 @@ define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-LABEL: {{^}}s_movk_i32_k1:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
@@ -31,10 +29,9 @@ define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-LABEL: {{^}}s_movk_i32_k2:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 64{{$}}
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 64, v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
@@ -45,10 +42,9 @@ define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-LABEL: {{^}}s_movk_i32_k3:
; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
@@ -59,10 +55,9 @@ define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-LABEL: {{^}}s_movk_i32_k4:
; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x20000{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
@@ -87,10 +82,9 @@ define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-LABEL: {{^}}s_movk_i32_k6:
; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x41{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 63{{$}}
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 63, v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 4
diff --git a/test/CodeGen/AMDGPU/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll
index 0b9649576545..a30c25e700ab 100644
--- a/test/CodeGen/AMDGPU/salu-to-valu.ll
+++ b/test/CodeGen/AMDGPU/salu-to-valu.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.r600.read.tidig.y() #0
; In this test both the pointer and the offset operands to the
; BUFFER_LOAD instructions end up being stored in vgprs. This
@@ -7,94 +11,267 @@
; sgpr register pair and use that for the pointer operand
; (low 64-bits of srsrc).
-; CHECK-LABEL: {{^}}mubuf:
+; GCN-LABEL: {{^}}mubuf:
; Make sure we aren't using VGPRs for the source operand of s_mov_b64
-; CHECK-NOT: s_mov_b64 s[{{[0-9]+:[0-9]+}}], v
+; GCN-NOT: s_mov_b64 s[{{[0-9]+:[0-9]+}}], v
; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
; instructions
-; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
-; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
-define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
+; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
+; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
+
+define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
entry:
- %0 = call i32 @llvm.r600.read.tidig.x() #1
- %1 = call i32 @llvm.r600.read.tidig.y() #1
- %2 = sext i32 %0 to i64
- %3 = sext i32 %1 to i64
+ %tmp = call i32 @llvm.r600.read.tidig.x()
+ %tmp1 = call i32 @llvm.r600.read.tidig.y()
+ %tmp2 = sext i32 %tmp to i64
+ %tmp3 = sext i32 %tmp1 to i64
br label %loop
-loop:
- %4 = phi i64 [0, %entry], [%5, %loop]
- %5 = add i64 %2, %4
- %6 = getelementptr i8, i8 addrspace(1)* %in, i64 %5
- %7 = load i8, i8 addrspace(1)* %6, align 1
- %8 = or i64 %5, 1
- %9 = getelementptr i8, i8 addrspace(1)* %in, i64 %8
- %10 = load i8, i8 addrspace(1)* %9, align 1
- %11 = add i8 %7, %10
- %12 = sext i8 %11 to i32
- store i32 %12, i32 addrspace(1)* %out
- %13 = icmp slt i64 %5, 10
- br i1 %13, label %loop, label %done
-
-done:
+loop: ; preds = %loop, %entry
+ %tmp4 = phi i64 [ 0, %entry ], [ %tmp5, %loop ]
+ %tmp5 = add i64 %tmp2, %tmp4
+ %tmp6 = getelementptr i8, i8 addrspace(1)* %in, i64 %tmp5
+ %tmp7 = load i8, i8 addrspace(1)* %tmp6, align 1
+ %tmp8 = or i64 %tmp5, 1
+ %tmp9 = getelementptr i8, i8 addrspace(1)* %in, i64 %tmp8
+ %tmp10 = load i8, i8 addrspace(1)* %tmp9, align 1
+ %tmp11 = add i8 %tmp7, %tmp10
+ %tmp12 = sext i8 %tmp11 to i32
+ store i32 %tmp12, i32 addrspace(1)* %out
+ %tmp13 = icmp slt i64 %tmp5, 10
+ br i1 %tmp13, label %loop, label %done
+
+done: ; preds = %loop
ret void
}
-declare i32 @llvm.r600.read.tidig.x() #1
-declare i32 @llvm.r600.read.tidig.y() #1
-
-attributes #1 = { nounwind readnone }
-
; Test moving an SMRD instruction to the VALU
-; CHECK-LABEL: {{^}}smrd_valu:
-; CHECK: buffer_load_dword [[OUT:v[0-9]+]]
-; CHECK: buffer_store_dword [[OUT]]
-
-define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 addrspace(1)* %out) {
+; GCN-LABEL: {{^}}smrd_valu:
+; GCN: buffer_load_dword [[OUT:v[0-9]+]]
+; GCN: buffer_store_dword [[OUT]]
+define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 {
entry:
- %0 = icmp ne i32 %a, 0
- br i1 %0, label %if, label %else
+ %tmp = icmp ne i32 %a, 0
+ br i1 %tmp, label %if, label %else
-if:
- %1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
+if: ; preds = %entry
+ %tmp1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
br label %endif
-else:
- %2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
- %3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %2
+else: ; preds = %entry
+ %tmp2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
+ %tmp3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %tmp2
br label %endif
-endif:
- %4 = phi i32 addrspace(2)* [%1, %if], [%3, %else]
- %5 = getelementptr i32, i32 addrspace(2)* %4, i32 3000
- %6 = load i32, i32 addrspace(2)* %5
- store i32 %6, i32 addrspace(1)* %out
+endif: ; preds = %else, %if
+ %tmp4 = phi i32 addrspace(2)* [ %tmp1, %if ], [ %tmp3, %else ]
+ %tmp5 = getelementptr i32, i32 addrspace(2)* %tmp4, i32 3000
+ %tmp6 = load i32, i32 addrspace(2)* %tmp5
+ store i32 %tmp6, i32 addrspace(1)* %out
ret void
}
-; Test moving ann SMRD with an immediate offset to the VALU
+; Test moving an SMRD with an immediate offset to the VALU
-; CHECK-LABEL: {{^}}smrd_valu2:
-; CHECK: buffer_load_dword
-define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) {
+; GCN-LABEL: {{^}}smrd_valu2:
+; GCN-NOT: v_add
+; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 {
entry:
- %0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %1 = add i32 %0, 4
- %2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %0, i32 4
- %3 = load i32, i32 addrspace(2)* %2
- store i32 %3, i32 addrspace(1)* %out
+ %tmp = call i32 @llvm.r600.read.tidig.x() #0
+ %tmp1 = add i32 %tmp, 4
+ %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
+ %tmp3 = load i32, i32 addrspace(2)* %tmp2
+ store i32 %tmp3, i32 addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}s_load_imm_v8i32:
-; CHECK: buffer_load_dwordx4
-; CHECK: buffer_load_dwordx4
-define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
+; Use a big offset that will use the SMRD literal offset on CI
+; GCN-LABEL: {{^}}smrd_valu_ci_offset:
+; GCN-NOT: v_add
+; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4e20{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}}
+; GCN: v_add_i32_e32
+; GCN: buffer_store_dword
+define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 {
entry:
- %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
+ %tmp = call i32 @llvm.r600.read.tidig.x() #0
+ %tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp
+ %tmp3 = getelementptr i32, i32 addrspace(2)* %tmp2, i32 5000
+ %tmp4 = load i32, i32 addrspace(2)* %tmp3
+ %tmp5 = add i32 %tmp4, %c
+ store i32 %tmp5, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}smrd_valu_ci_offset_x2:
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET:s[0-9]+]], 0x9c40{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: buffer_store_dwordx2
+define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 {
+entry:
+ %tmp = call i32 @llvm.r600.read.tidig.x() #0
+ %tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp
+ %tmp3 = getelementptr i64, i64 addrspace(2)* %tmp2, i32 5000
+ %tmp4 = load i64, i64 addrspace(2)* %tmp3
+ %tmp5 = or i64 %tmp4, %c
+ store i64 %tmp5, i64 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}smrd_valu_ci_offset_x4:
+; GCN-NOT: v_add
+; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4d20{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: buffer_store_dwordx4
+define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 {
+entry:
+ %tmp = call i32 @llvm.r600.read.tidig.x() #0
+ %tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp
+ %tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %tmp2, i32 1234
+ %tmp4 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp3
+ %tmp5 = or <4 x i32> %tmp4, %c
+ store <4 x i32> %tmp5, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; Original scalar load uses SGPR offset on SI and 32-bit literal on
+; CI.
+
+; GCN-LABEL: {{^}}smrd_valu_ci_offset_x8:
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
+
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 {
+entry:
+ %tmp = call i32 @llvm.r600.read.tidig.x() #0
+ %tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp
+ %tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %tmp2, i32 1234
+ %tmp4 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp3
+ %tmp5 = or <8 x i32> %tmp4, %c
+ store <8 x i32> %tmp5, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16:
+
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}}
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}}
+
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+
+; GCN: s_endpgm
+define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 {
+entry:
+ %tmp = call i32 @llvm.r600.read.tidig.x() #0
+ %tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp
+ %tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %tmp2, i32 1234
+ %tmp4 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp3
+ %tmp5 = or <16 x i32> %tmp4, %c
+ store <16 x i32> %tmp5, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}smrd_valu2_salu_user:
+; GCN: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]]
+; GCN: buffer_store_dword [[ADD]]
+define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 {
+entry:
+ %tmp = call i32 @llvm.r600.read.tidig.x() #0
+ %tmp1 = add i32 %tmp, 4
+ %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
+ %tmp3 = load i32, i32 addrspace(2)* %tmp2
+ %tmp4 = add i32 %tmp3, %a
+ store i32 %tmp4, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset:
+; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}}
+define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
+entry:
+ %tmp = call i32 @llvm.r600.read.tidig.x() #0
+ %tmp1 = add i32 %tmp, 4
+ %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 255
+ %tmp3 = load i32, i32 addrspace(2)* %tmp2
+ store i32 %tmp3, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}smrd_valu2_mubuf_offset:
+; GCN-NOT: v_add
+; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}}
+define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
+entry:
+ %tmp = call i32 @llvm.r600.read.tidig.x() #0
+ %tmp1 = add i32 %tmp, 4
+ %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 256
+ %tmp3 = load i32, i32 addrspace(2)* %tmp2
+ store i32 %tmp3, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}s_load_imm_v8i32:
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+entry:
+ %tmp0 = tail call i32 @llvm.r600.read.tidig.x()
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
%tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
@@ -102,12 +279,51 @@ entry:
ret void
}
-; CHECK-LABEL: {{^}}s_load_imm_v16i32:
-; CHECK: buffer_load_dwordx4
-; CHECK: buffer_load_dwordx4
-; CHECK: buffer_load_dwordx4
-; CHECK: buffer_load_dwordx4
-define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
+; GCN-LABEL: {{^}}s_load_imm_v8i32_salu_user:
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: buffer_store_dword
+define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+entry:
+ %tmp0 = tail call i32 @llvm.r600.read.tidig.x()
+ %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
+ %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
+ %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
+
+ %elt0 = extractelement <8 x i32> %tmp3, i32 0
+ %elt1 = extractelement <8 x i32> %tmp3, i32 1
+ %elt2 = extractelement <8 x i32> %tmp3, i32 2
+ %elt3 = extractelement <8 x i32> %tmp3, i32 3
+ %elt4 = extractelement <8 x i32> %tmp3, i32 4
+ %elt5 = extractelement <8 x i32> %tmp3, i32 5
+ %elt6 = extractelement <8 x i32> %tmp3, i32 6
+ %elt7 = extractelement <8 x i32> %tmp3, i32 7
+
+ %add0 = add i32 %elt0, %elt1
+ %add1 = add i32 %add0, %elt2
+ %add2 = add i32 %add1, %elt3
+ %add3 = add i32 %add2, %elt4
+ %add4 = add i32 %add3, %elt5
+ %add5 = add i32 %add4, %elt6
+ %add6 = add i32 %add5, %elt7
+
+ store i32 %add6, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}s_load_imm_v16i32:
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
entry:
%tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
%tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
@@ -116,3 +332,71 @@ entry:
store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
ret void
}
+
+; GCN-LABEL: {{^}}s_load_imm_v16i32_salu_user:
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: buffer_store_dword
+define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+entry:
+ %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
+ %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
+ %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
+ %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
+
+ %elt0 = extractelement <16 x i32> %tmp3, i32 0
+ %elt1 = extractelement <16 x i32> %tmp3, i32 1
+ %elt2 = extractelement <16 x i32> %tmp3, i32 2
+ %elt3 = extractelement <16 x i32> %tmp3, i32 3
+ %elt4 = extractelement <16 x i32> %tmp3, i32 4
+ %elt5 = extractelement <16 x i32> %tmp3, i32 5
+ %elt6 = extractelement <16 x i32> %tmp3, i32 6
+ %elt7 = extractelement <16 x i32> %tmp3, i32 7
+ %elt8 = extractelement <16 x i32> %tmp3, i32 8
+ %elt9 = extractelement <16 x i32> %tmp3, i32 9
+ %elt10 = extractelement <16 x i32> %tmp3, i32 10
+ %elt11 = extractelement <16 x i32> %tmp3, i32 11
+ %elt12 = extractelement <16 x i32> %tmp3, i32 12
+ %elt13 = extractelement <16 x i32> %tmp3, i32 13
+ %elt14 = extractelement <16 x i32> %tmp3, i32 14
+ %elt15 = extractelement <16 x i32> %tmp3, i32 15
+
+ %add0 = add i32 %elt0, %elt1
+ %add1 = add i32 %add0, %elt2
+ %add2 = add i32 %add1, %elt3
+ %add3 = add i32 %add2, %elt4
+ %add4 = add i32 %add3, %elt5
+ %add5 = add i32 %add4, %elt6
+ %add6 = add i32 %add5, %elt7
+ %add7 = add i32 %add6, %elt8
+ %add8 = add i32 %add7, %elt9
+ %add9 = add i32 %add8, %elt10
+ %add10 = add i32 %add9, %elt11
+ %add11 = add i32 %add10, %elt12
+ %add12 = add i32 %add11, %elt13
+ %add13 = add i32 %add12, %elt14
+ %add14 = add i32 %add13, %elt15
+
+ store i32 %add14, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/sampler-resource-id.ll b/test/CodeGen/AMDGPU/sampler-resource-id.ll
new file mode 100644
index 000000000000..c41d345369bf
--- /dev/null
+++ b/test/CodeGen/AMDGPU/sampler-resource-id.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}test_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_0(i32 %in0, i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in0) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_1(i32 %in0, i32 %in1, i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in1) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}test_2:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 2(
+define void @test_2(i32 %in0, i32 %in1, i32 %in2, i32 addrspace(1)* %out) {
+entry:
+ %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in2) #0
+ store i32 %0, i32 addrspace(1)* %out
+ ret void
+}
+
+
+declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) #0
+
+attributes #0 = { readnone }
+
+!opencl.kernels = !{!0, !1, !2}
+
+!0 = !{void (i32, i32 addrspace(1)*)* @test_0, !10, !20, !30, !40, !50}
+!10 = !{!"kernel_arg_addr_space", i32 0, i32 1}
+!20 = !{!"kernel_arg_access_qual", !"none", !"none"}
+!30 = !{!"kernel_arg_type", !"sampler_t", !"int*"}
+!40 = !{!"kernel_arg_base_type", !"sampler_t", !"int*"}
+!50 = !{!"kernel_arg_type_qual", !"", !""}
+
+!1 = !{void (i32, i32, i32 addrspace(1)*)* @test_1, !11, !21, !31, !41, !51}
+!11 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 1}
+!21 = !{!"kernel_arg_access_qual", !"none", !"none", !"none"}
+!31 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"int*"}
+!41 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"int*"}
+!51 = !{!"kernel_arg_type_qual", !"", !"", !""}
+
+!2 = !{void (i32, i32, i32, i32 addrspace(1)*)* @test_2, !12, !22, !32, !42, !52}
+!12 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 0, i32 1}
+!22 = !{!"kernel_arg_access_qual", !"none", !"none", !"none", !"none"}
+!32 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int*"}
+!42 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int*"}
+!52 = !{!"kernel_arg_type_qual", !"", !"", !"", !""}
diff --git a/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
index 3863afda5dd3..e4b16c0a165f 100644
--- a/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
+++ b/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
@@ -3,7 +3,7 @@
; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
-declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
+declare void @llvm.AMDGPU.barrier.local() nounwind convergent
; SI-LABEL: {{^}}main(
diff --git a/test/CodeGen/AMDGPU/scratch-buffer.ll b/test/CodeGen/AMDGPU/scratch-buffer.ll
index 268869daaa32..d43de4766057 100644
--- a/test/CodeGen/AMDGPU/scratch-buffer.ll
+++ b/test/CodeGen/AMDGPU/scratch-buffer.ll
@@ -51,7 +51,7 @@ done:
; GCN-LABEL: {{^}}legal_offset_fi_offset
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
-; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000
+; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, 0x8000
; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
diff --git a/test/CodeGen/AMDGPU/select64.ll b/test/CodeGen/AMDGPU/select64.ll
index 13fb575b2b15..a68fdecb00af 100644
--- a/test/CodeGen/AMDGPU/select64.ll
+++ b/test/CodeGen/AMDGPU/select64.ll
@@ -51,12 +51,8 @@ define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspa
}
; CHECK-LABEL: {{^}}v_select_i64_split_imm:
-; CHECK: s_mov_b32 [[SHI:s[0-9]+]], 63
-; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0
-; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]]
-; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]]
-; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}}
-; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}}
+; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
+; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 63, {{v[0-9]+}}
; CHECK: s_endpgm
define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%cmp = icmp ugt i32 %cond, 5
diff --git a/test/CodeGen/AMDGPU/set-dx10.ll b/test/CodeGen/AMDGPU/set-dx10.ll
index 53694dcffa66..57365a6e1fc3 100644
--- a/test/CodeGen/AMDGPU/set-dx10.ll
+++ b/test/CodeGen/AMDGPU/set-dx10.ll
@@ -5,8 +5,8 @@
; SET*DX10 instructions.
; CHECK: {{^}}fcmp_une_select_fptosi:
-; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@@ -19,8 +19,8 @@ entry:
}
; CHECK: {{^}}fcmp_une_select_i32:
-; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@@ -31,8 +31,8 @@ entry:
}
; CHECK: {{^}}fcmp_oeq_select_fptosi:
-; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@@ -45,8 +45,8 @@ entry:
}
; CHECK: {{^}}fcmp_oeq_select_i32:
-; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@@ -57,8 +57,8 @@ entry:
}
; CHECK: {{^}}fcmp_ogt_select_fptosi:
-; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@@ -71,8 +71,8 @@ entry:
}
; CHECK: {{^}}fcmp_ogt_select_i32:
-; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@@ -83,8 +83,8 @@ entry:
}
; CHECK: {{^}}fcmp_oge_select_fptosi:
-; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@@ -97,8 +97,8 @@ entry:
}
; CHECK: {{^}}fcmp_oge_select_i32:
-; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@@ -109,8 +109,8 @@ entry:
}
; CHECK: {{^}}fcmp_ole_select_fptosi:
-; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@@ -123,8 +123,8 @@ entry:
}
; CHECK: {{^}}fcmp_ole_select_i32:
-; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@@ -135,8 +135,8 @@ entry:
}
; CHECK: {{^}}fcmp_olt_select_fptosi:
-; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@@ -149,8 +149,8 @@ entry:
}
; CHECK: {{^}}fcmp_olt_select_i32:
-; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
diff --git a/test/CodeGen/AMDGPU/setcc-opt.ll b/test/CodeGen/AMDGPU/setcc-opt.ll
index 4e6a10d6b78d..63d74820f961 100644
--- a/test/CodeGen/AMDGPU/setcc-opt.ll
+++ b/test/CodeGen/AMDGPU/setcc-opt.ll
@@ -142,11 +142,14 @@ define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
-; GCN: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
-; GCN: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
-; GCN: v_cmp_ne_i32_e32 vcc, [[K255]], [[B]]
+; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff
+; GCN: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]]
+; GCN: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]]
+; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK255]]
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
%b.ext = zext i8 %b to i32
@@ -187,11 +190,14 @@ define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) n
; Should do a buffer_load_sbyte and compare with -1
; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
-; GCN-DAG: buffer_load_ubyte [[B:v[0-9]+]]
-; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
-; GCN: v_cmp_ne_i32_e32 vcc, [[K]], [[B]]{{$}}
+; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN: s_movk_i32 [[K:s[0-9]+]], 0xff
+; GCN: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]]
+; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
+; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK]]{{$}}
; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN: buffer_store_byte [[RESULT]]
; GCN: s_endpgm
define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
%b.ext = sext i8 %b to i32
diff --git a/test/CodeGen/AMDGPU/sext-in-reg.ll b/test/CodeGen/AMDGPU/sext-in-reg.ll
index 5aedda2ce1a9..23ae3b967971 100644
--- a/test/CodeGen/AMDGPU/sext-in-reg.ll
+++ b/test/CodeGen/AMDGPU/sext-in-reg.ll
@@ -12,8 +12,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI: buffer_store_dword [[EXTRACT]],
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
-; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
-; EG-NEXT: LSHR * [[ADDR]]
+; EG: LSHR * [[ADDR]]
+; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1
define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
%shl = shl i32 %in, 31
%sext = ashr i32 %shl, 31
@@ -609,3 +609,53 @@ define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
}
+
+; Make sure we propagate the VALUness to users of a moved scalar BFE.
+
+; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64_move_use:
+; SI: buffer_load_dwordx2
+; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1
+; SI-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]]
+; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]]
+; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+define void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a.gep, align 8
+ %b = load i64, i64 addrspace(1)* %b.gep, align 8
+
+ %c = shl i64 %a, %b
+ %shl = shl i64 %c, 63
+ %ashr = ashr i64 %shl, 63
+
+ %and = and i64 %ashr, %s.val
+ store i64 %and, i64 addrspace(1)* %out.gep, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64_move_use:
+; SI: buffer_load_dwordx2
+; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
+; SI-DAG: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
+; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]]
+; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[SHR]]
+; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+define void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a.gep, align 8
+ %b = load i64, i64 addrspace(1)* %b.gep, align 8
+
+ %c = shl i64 %a, %b
+ %shl = shl i64 %c, 32
+ %ashr = ashr i64 %shl, 32
+ %and = and i64 %ashr, %s.val
+ store i64 %and, i64 addrspace(1)* %out.gep, align 8
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/shl.ll b/test/CodeGen/AMDGPU/shl.ll
index 6f81a39ed96a..55db80731c90 100644
--- a/test/CodeGen/AMDGPU/shl.ll
+++ b/test/CodeGen/AMDGPU/shl.ll
@@ -53,14 +53,14 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
ret void
}
-;EG: {{^}}shl_i64:
+;EG-LABEL: {{^}}shl_i64:
;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
-;EG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
-;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-DAG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
;EG-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]
-;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
-;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}}
+;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
+;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]|PV.[XYZW]}}
;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
@@ -80,7 +80,7 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
ret void
}
-;EG: {{^}}shl_v2i64:
+;EG-LABEL: {{^}}shl_v2i64:
;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
;EG-DAG: LSHR {{\*? *}}[[COMPSHA]]
@@ -185,8 +185,7 @@ define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in
; Make sure load width gets reduced to i32 load.
; GCN-LABEL: {{^}}s_shl_32_i64:
; GCN-DAG: s_load_dword [[LO_A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
-; GCN-DAG: s_mov_b32 s[[SLO:[0-9]+]], 0{{$}}
-; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[LO_A]]
; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
define void @s_shl_32_i64(i64 addrspace(1)* %out, i64 %a) {
diff --git a/test/CodeGen/AMDGPU/shl_add_constant.ll b/test/CodeGen/AMDGPU/shl_add_constant.ll
index b1485bfaaebb..dfb2bf3383fc 100644
--- a/test/CodeGen/AMDGPU/shl_add_constant.ll
+++ b/test/CodeGen/AMDGPU/shl_add_constant.ll
@@ -6,7 +6,7 @@ declare i32 @llvm.r600.read.tidig.x() #1
; FUNC-LABEL: {{^}}shl_2_add_9_i32:
; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 36, [[REG]]
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 36, [[REG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
@@ -20,7 +20,7 @@ define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
}
; FUNC-LABEL: {{^}}shl_2_add_9_i32_2_add_uses:
-; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}}
+; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], vcc, 9, {{v[0-9]+}}
; SI-DAG: v_lshlrev_b32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}}
; SI-DAG: buffer_store_dword [[ADDREG]]
; SI-DAG: buffer_store_dword [[SHLREG]]
@@ -40,7 +40,7 @@ define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1
; FUNC-LABEL: {{^}}shl_2_add_999_i32:
; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]]
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xf9c, [[REG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
diff --git a/test/CodeGen/AMDGPU/shl_add_ptr.ll b/test/CodeGen/AMDGPU/shl_add_ptr.ll
index 6671e909cd1d..ac94824bd61f 100644
--- a/test/CodeGen/AMDGPU/shl_add_ptr.ll
+++ b/test/CodeGen/AMDGPU/shl_add_ptr.ll
@@ -35,7 +35,7 @@ define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %ad
; SI-LABEL: {{^}}load_shl_base_lds_1:
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8
-; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
+; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}}
; SI-DAG: buffer_store_dword [[RESULT]]
; SI-DAG: buffer_store_dword [[ADDUSE]]
; SI: s_endpgm
diff --git a/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll b/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll
new file mode 100644
index 000000000000..27a8e70aae13
--- /dev/null
+++ b/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll
@@ -0,0 +1,16 @@
+; RUN: llc -o /dev/null %s -march=amdgcn -mcpu=verde -verify-machineinstrs -stop-after expand-isel-pseudos 2>&1 | FileCheck %s
+; This test verifies that the instruction selection will add the implicit
+; register operands in the correct order when modifying the opcode of an
+; instruction to V_ADD_I32_e32.
+
+; CHECK: %{{[0-9]+}} = V_ADD_I32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def %vcc, implicit %exec
+
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
+ %result = add i32 %a, %b
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/si-literal-folding.ll b/test/CodeGen/AMDGPU/si-literal-folding.ll
new file mode 100644
index 000000000000..901b3c3453fc
--- /dev/null
+++ b/test/CodeGen/AMDGPU/si-literal-folding.ll
@@ -0,0 +1,17 @@
+; XFAIL: *
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}main:
+; CHECK-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0xbf4353f8
+
+define void @main(float) #0 {
+main_body:
+ %1 = fmul float %0, 0x3FE86A7F00000000
+ %2 = fmul float %0, 0xBFE86A7F00000000
+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %1, float %1, float %2, float %2)
+ ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="1" }
diff --git a/test/CodeGen/AMDGPU/si-sgpr-spill.ll b/test/CodeGen/AMDGPU/si-sgpr-spill.ll
index 84652701f773..d7b35fc631eb 100644
--- a/test/CodeGen/AMDGPU/si-sgpr-spill.ll
+++ b/test/CodeGen/AMDGPU/si-sgpr-spill.ll
@@ -6,6 +6,16 @@
; CHECK-LABEL: {{^}}main:
; CHECK: s_wqm
+
+; Make sure not emitting unused scratch resource descriptor setup
+; CHECK-NOT: s_mov_b32
+; CHECK-NOT: s_mov_b32
+; CHECK-NOT: s_mov_b32
+; CHECK-NOT: s_mov_b32
+
+; CHECK: s_mov_b32 m0
+
+
; Writing to M0 from an SMRD instruction will hang the GPU.
; CHECK-NOT: s_buffer_load_dword m0
; CHECK: s_endpgm
diff --git a/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
index 5a6129aaa3fa..bc766dbcac67 100644
--- a/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ b/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@@ -155,9 +155,9 @@ define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out,
}
; FUNC-LABEL: @reorder_local_offsets
-; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
+; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
; CI: buffer_store_dword
@@ -181,9 +181,10 @@ define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspa
}
; FUNC-LABEL: @reorder_global_offsets
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
+; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
+; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
; CI: buffer_store_dword
@@ -233,4 +234,4 @@ define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrsp
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #1 = { "ShaderType"="1" nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #2 = { nounwind noduplicate }
+attributes #2 = { nounwind convergent }
diff --git a/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
index 0db7cdc171b5..a94ccc32e61c 100644
--- a/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -46,9 +46,9 @@ define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
; SI-LABEL: @v_sint_to_fp_i64_to_f64
; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
-; SI: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
-; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
-; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI-DAG: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
+; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
; SI: buffer_store_dwordx2 [[RESULT]]
define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
diff --git a/test/CodeGen/AMDGPU/sminmax.ll b/test/CodeGen/AMDGPU/sminmax.ll
new file mode 100644
index 000000000000..e646605f7da1
--- /dev/null
+++ b/test/CodeGen/AMDGPU/sminmax.ll
@@ -0,0 +1,130 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}s_abs_i32:
+; GCN: s_abs_i32
+; GCN: s_add_i32
+define void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind {
+ %neg = sub i32 0, %val
+ %cond = icmp sgt i32 %val, %neg
+ %res = select i1 %cond, i32 %val, i32 %neg
+ %res2 = add i32 %res, 2
+ store i32 %res2, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_abs_i32:
+; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
+; GCN: v_add_i32
+define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
+ %val = load i32, i32 addrspace(1)* %src, align 4
+ %neg = sub i32 0, %val
+ %cond = icmp sgt i32 %val, %neg
+ %res = select i1 %cond, i32 %val, i32 %neg
+ %res2 = add i32 %res, 2
+ store i32 %res2, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_abs_v2i32:
+; GCN: s_abs_i32
+; GCN: s_abs_i32
+; GCN: s_add_i32
+; GCN: s_add_i32
+define void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind {
+ %z0 = insertelement <2 x i32> undef, i32 0, i32 0
+ %z1 = insertelement <2 x i32> %z0, i32 0, i32 1
+ %t0 = insertelement <2 x i32> undef, i32 2, i32 0
+ %t1 = insertelement <2 x i32> %t0, i32 2, i32 1
+ %neg = sub <2 x i32> %z1, %val
+ %cond = icmp sgt <2 x i32> %val, %neg
+ %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg
+ %res2 = add <2 x i32> %res, %t1
+ store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_abs_v2i32:
+; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
+; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
+
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
+
+; GCN: v_add_i32
+; GCN: v_add_i32
+define void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind {
+ %z0 = insertelement <2 x i32> undef, i32 0, i32 0
+ %z1 = insertelement <2 x i32> %z0, i32 0, i32 1
+ %t0 = insertelement <2 x i32> undef, i32 2, i32 0
+ %t1 = insertelement <2 x i32> %t0, i32 2, i32 1
+ %val = load <2 x i32>, <2 x i32> addrspace(1)* %src, align 4
+ %neg = sub <2 x i32> %z1, %val
+ %cond = icmp sgt <2 x i32> %val, %neg
+ %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg
+ %res2 = add <2 x i32> %res, %t1
+ store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_abs_v4i32:
+; TODO: this should use s_abs_i32
+; GCN: s_abs_i32
+; GCN: s_abs_i32
+; GCN: s_abs_i32
+; GCN: s_abs_i32
+
+; GCN: s_add_i32
+; GCN: s_add_i32
+; GCN: s_add_i32
+; GCN: s_add_i32
+define void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind {
+ %z0 = insertelement <4 x i32> undef, i32 0, i32 0
+ %z1 = insertelement <4 x i32> %z0, i32 0, i32 1
+ %z2 = insertelement <4 x i32> %z1, i32 0, i32 2
+ %z3 = insertelement <4 x i32> %z2, i32 0, i32 3
+ %t0 = insertelement <4 x i32> undef, i32 2, i32 0
+ %t1 = insertelement <4 x i32> %t0, i32 2, i32 1
+ %t2 = insertelement <4 x i32> %t1, i32 2, i32 2
+ %t3 = insertelement <4 x i32> %t2, i32 2, i32 3
+ %neg = sub <4 x i32> %z3, %val
+ %cond = icmp sgt <4 x i32> %val, %neg
+ %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg
+ %res2 = add <4 x i32> %res, %t3
+ store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_abs_v4i32:
+; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
+; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
+; GCN: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]]
+; GCN: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]]
+
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
+
+; GCN: v_add_i32
+; GCN: v_add_i32
+; GCN: v_add_i32
+; GCN: v_add_i32
+define void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %src) nounwind {
+ %z0 = insertelement <4 x i32> undef, i32 0, i32 0
+ %z1 = insertelement <4 x i32> %z0, i32 0, i32 1
+ %z2 = insertelement <4 x i32> %z1, i32 0, i32 2
+ %z3 = insertelement <4 x i32> %z2, i32 0, i32 3
+ %t0 = insertelement <4 x i32> undef, i32 2, i32 0
+ %t1 = insertelement <4 x i32> %t0, i32 2, i32 1
+ %t2 = insertelement <4 x i32> %t1, i32 2, i32 2
+ %t3 = insertelement <4 x i32> %t2, i32 2, i32 3
+ %val = load <4 x i32>, <4 x i32> addrspace(1)* %src, align 4
+ %neg = sub <4 x i32> %z3, %val
+ %cond = icmp sgt <4 x i32> %val, %neg
+ %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg
+ %res2 = add <4 x i32> %res, %t3
+ store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll
index 0598208e1317..1d6bb9ece8c6 100644
--- a/test/CodeGen/AMDGPU/smrd.ll
+++ b/test/CodeGen/AMDGPU/smrd.ll
@@ -1,9 +1,10 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s
+; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=CI --check-prefix=GCN %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s
; SMRD load with an immediate offset.
; GCN-LABEL: {{^}}smrd0:
-; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
+; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
@@ -15,7 +16,7 @@ entry:
; SMRD load with the largest possible immediate offset.
; GCN-LABEL: {{^}}smrd1:
-; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
@@ -29,6 +30,7 @@ entry:
; GCN-LABEL: {{^}}smrd2:
; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
; GCN: s_endpgm
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
@@ -54,9 +56,37 @@ entry:
ret void
}
+; SMRD load with the largest possible immediate offset on VI
+; GCN-LABEL: {{^}}smrd4:
+; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc
+; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
+; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
+; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
+define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+entry:
+ %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
+ %1 = load i32, i32 addrspace(2)* %0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
+; SMRD load with an offset greater than the largest possible immediate on VI
+; GCN-LABEL: {{^}}smrd5:
+; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000
+; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
+; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
+; GCN: s_endpgm
+define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+entry:
+ %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
+ %1 = load i32, i32 addrspace(2)* %0
+ store i32 %1, i32 addrspace(1)* %out
+ ret void
+}
+
; SMRD load using the load.const intrinsic with an immediate offset
; GCN-LABEL: {{^}}smrd_load_const0:
-; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
+; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
@@ -70,7 +100,7 @@ main_body:
; SMRD load using the load.const intrinsic with the largest possible immediate
; offset.
; GCN-LABEL: {{^}}smrd_load_const1:
-; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
@@ -86,6 +116,7 @@ main_body:
; GCN-LABEL: {{^}}smrd_load_const2:
; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
@@ -96,6 +127,36 @@ main_body:
ret void
}
+; SMRD load with the largest possible immediate offset on VI
+; GCN-LABEL: {{^}}smrd_load_const3:
+; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc
+; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
+; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
+; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
+define void @smrd_load_const3(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
+ %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048572)
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
+ ret void
+}
+
+; SMRD load with an offset greater than the largest possible immediate on VI
+; GCN-LABEL: {{^}}smrd_load_const4:
+; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000
+; SIVI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
+; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
+; GCN: s_endpgm
+define void @smrd_load_const4(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
+ %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048576)
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
+ ret void
+}
+
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
diff --git a/test/CodeGen/AMDGPU/split-scalar-i64-add.ll b/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
index 46409cdfae1c..9e181bc14d9d 100644
--- a/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
+++ b/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() readnone
@@ -8,9 +8,22 @@ declare i32 @llvm.r600.read.tidig.x() readnone
; scc instead.
; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0:
-; SI: v_add_i32
-; SI: v_addc_u32
-define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
+; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x18f, v{{[0-9]+}}
+; SI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
+define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %s.val) {
+ %v.val = load volatile i32, i32 addrspace(1)* %in
+ %vec.0 = insertelement <2 x i32> undef, i32 %s.val, i32 0
+ %vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1
+ %bc = bitcast <2 x i32> %vec.1 to i64
+ %add = add i64 %bc, 399
+ store i64 %add, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_0:
+; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x18f
+; SI: s_addc_u32 {{s[0-9]+}}, 0xf423f, 0
+define void @s_imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
%vec.0 = insertelement <2 x i32> undef, i32 %val, i32 0
%vec.1 = insertelement <2 x i32> %vec.0, i32 999999, i32 1
%bc = bitcast <2 x i32> %vec.1 to i64
@@ -22,7 +35,20 @@ define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_1:
; SI: v_add_i32
; SI: v_addc_u32
-define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) {
+define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
+ %v.val = load volatile i32, i32 addrspace(1)* %in
+ %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
+ %vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1
+ %bc = bitcast <2 x i32> %vec.1 to i64
+ %add = add i64 %bc, %val1
+ store i64 %add, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_1:
+; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_addc_u32 {{s[0-9]+}}, 0x1869f, {{s[0-9]+}}
+define void @s_imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) {
%vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
%vec.1 = insertelement <2 x i32> %vec.0, i32 99999, i32 1
%bc = bitcast <2 x i32> %vec.1 to i64
@@ -32,9 +58,9 @@ define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64
}
; Doesn't use constants
-; FUNC-LABEL @imp_def_vcc_split_i64_add_2
-; SI: v_add_i32
-; SI: v_addc_u32
+; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_2:
+; SI: v_add_i32_e32 {{v[0-9]+}}, vcc, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_addc_u32_e32 {{v[0-9]+}}, vcc, {{v[0-9]+}}, {{v[0-9]+}}, vcc
define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
diff --git a/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll b/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
new file mode 100644
index 000000000000..4c82ed6affc2
--- /dev/null
+++ b/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
@@ -0,0 +1,104 @@
+; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s
+
+@sPrivateStorage = external addrspace(3) global [256 x [8 x <4 x i64>]]
+
+; GCN-LABEL: {{^}}ds_reorder_vector_split:
+
+; Write zeroinitializer
+; GCN-DAG: ds_write_b64 [[PTR:v[0-9]+]], [[VAL:v\[[0-9]+:[0-9]+\]]] offset:24
+; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]] offset:16
+; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]] offset:8
+; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]]{{$}}
+
+; GCN: s_waitcnt vmcnt
+
+; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24
+; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16
+; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8
+
+; GCN: s_waitcnt lgkmcnt
+
+; GCN-DAG ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:8
+; GCN-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:16
+; GCN-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:24
+
+; Appears to be dead store of vector component.
+; GCN: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]$}}
+
+; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx2
+; GCN: s_endpgm
+define void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly %srcValues, i32 addrspace(1)* nocapture readonly %offsets, <4 x i64> addrspace(1)* nocapture %destBuffer, i32 %alignmentOffset) #0 {
+entry:
+ %tmp = tail call i32 @llvm.r600.read.local.size.y()
+ %tmp1 = tail call i32 @llvm.r600.read.local.size.z()
+ %tmp2 = tail call i32 @llvm.r600.read.tidig.x()
+ %tmp3 = tail call i32 @llvm.r600.read.tidig.y()
+ %tmp4 = tail call i32 @llvm.r600.read.tidig.z()
+ %tmp6 = mul i32 %tmp2, %tmp
+ %tmp10 = add i32 %tmp3, %tmp6
+ %tmp11 = mul i32 %tmp10, %tmp1
+ %tmp9 = add i32 %tmp11, %tmp4
+ %x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1
+ %x.i.12.i = tail call i32 @llvm.r600.read.local.size.x() #1
+ %mul.26.i = mul i32 %x.i.12.i, %x.i.i
+ %add.i = add i32 %tmp2, %mul.26.i
+ %arrayidx = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %add.i
+ store <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* %arrayidx
+ %tmp12 = sext i32 %add.i to i64
+ %arrayidx1 = getelementptr inbounds <4 x i64>, <4 x i64> addrspace(1)* %srcValues, i64 %tmp12
+ %tmp13 = load <4 x i64>, <4 x i64> addrspace(1)* %arrayidx1
+ %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %offsets, i64 %tmp12
+ %tmp14 = load i32, i32 addrspace(1)* %arrayidx2
+ %add.ptr = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 0, i32 %alignmentOffset
+ %mul.i = shl i32 %tmp14, 2
+ %arrayidx.i = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr, i32 %mul.i
+ %tmp15 = bitcast i64 addrspace(3)* %arrayidx.i to <4 x i64> addrspace(3)*
+ store <4 x i64> %tmp13, <4 x i64> addrspace(3)* %tmp15
+ %add.ptr6 = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %tmp14, i32 %alignmentOffset
+ %tmp16 = sext i32 %tmp14 to i64
+ %tmp17 = sext i32 %alignmentOffset to i64
+ %add.ptr9 = getelementptr inbounds <4 x i64>, <4 x i64> addrspace(1)* %destBuffer, i64 %tmp16, i64 %tmp17
+ %tmp18 = bitcast <4 x i64> %tmp13 to i256
+ %trunc = trunc i256 %tmp18 to i64
+ store i64 %trunc, i64 addrspace(1)* %add.ptr9
+ %arrayidx10.1 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 1
+ %tmp19 = load i64, i64 addrspace(3)* %arrayidx10.1
+ %arrayidx11.1 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 1
+ store i64 %tmp19, i64 addrspace(1)* %arrayidx11.1
+ %arrayidx10.2 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 2
+ %tmp20 = load i64, i64 addrspace(3)* %arrayidx10.2
+ %arrayidx11.2 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 2
+ store i64 %tmp20, i64 addrspace(1)* %arrayidx11.2
+ %arrayidx10.3 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 3
+ %tmp21 = load i64, i64 addrspace(3)* %arrayidx10.3
+ %arrayidx11.3 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 3
+ store i64 %tmp21, i64 addrspace(1)* %arrayidx11.3
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.z() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.z() #1
+
+attributes #0 = { norecurse nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/sra.ll b/test/CodeGen/AMDGPU/sra.ll
index bcbc32f4c053..3b59bbfb18c0 100644
--- a/test/CodeGen/AMDGPU/sra.ll
+++ b/test/CodeGen/AMDGPU/sra.ll
@@ -70,11 +70,11 @@ entry:
;EG-LABEL: {{^}}ashr_i64_2:
;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
;EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
-;EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
-;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
;EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
-;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
-;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
+;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|PV.[XYZW]|[[SHIFT]]}}
;EG-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
;EG-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
diff --git a/test/CodeGen/AMDGPU/srl.ll b/test/CodeGen/AMDGPU/srl.ll
index 0dad91e709d9..bbd954356322 100644
--- a/test/CodeGen/AMDGPU/srl.ll
+++ b/test/CodeGen/AMDGPU/srl.ll
@@ -65,14 +65,14 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
-; EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+; EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
-; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
-; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
-; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
+; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]|PV\.[XYZW]}}
; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
-; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]|PS}}
+; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], [[SHIFT]]
; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
%b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
@@ -190,8 +190,7 @@ define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %i
; Make sure load width gets reduced to i32 load.
; GCN-LABEL: {{^}}s_lshr_32_i64:
; GCN-DAG: s_load_dword [[HI_A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc{{$}}
-; GCN-DAG: s_mov_b32 s[[SHI:[0-9]+]], 0{{$}}
-; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0{{$}}
; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[HI_A]]
; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
define void @s_lshr_32_i64(i64 addrspace(1)* %out, i64 %a) {
diff --git a/test/CodeGen/AMDGPU/store-barrier.ll b/test/CodeGen/AMDGPU/store-barrier.ll
index 4a72b4d090ad..ba4049f28a6e 100644
--- a/test/CodeGen/AMDGPU/store-barrier.ll
+++ b/test/CodeGen/AMDGPU/store-barrier.ll
@@ -36,7 +36,7 @@ bb:
ret void
}
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
declare void @llvm.AMDGPU.barrier.local() #2
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/store.ll b/test/CodeGen/AMDGPU/store.ll
index 0f89405e073b..d22f43fa05ef 100644
--- a/test/CodeGen/AMDGPU/store.ll
+++ b/test/CodeGen/AMDGPU/store.ll
@@ -287,16 +287,33 @@ entry:
; CM: LDS_WRITE
; CM: LDS_WRITE
-; SI: ds_write_b32
-; SI: ds_write_b32
-; SI: ds_write_b32
-; SI: ds_write_b32
+; SI: ds_write_b64
+; SI: ds_write_b64
define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(3)* %out
ret void
}
+; FUNC-LABEL: {{^}}store_local_v4i32_align4:
+; EG: LDS_WRITE
+; EG: LDS_WRITE
+; EG: LDS_WRITE
+; EG: LDS_WRITE
+
+; CM: LDS_WRITE
+; CM: LDS_WRITE
+; CM: LDS_WRITE
+; CM: LDS_WRITE
+
+; SI: ds_write2_b32
+; SI: ds_write2_b32
+define void @store_local_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
+entry:
+ store <4 x i32> %in, <4 x i32> addrspace(3)* %out, align 4
+ ret void
+}
+
; FUNC-LABEL: {{^}}store_local_i64_i8:
; EG: LDS_BYTE_WRITE
; SI: ds_write_b8
diff --git a/test/CodeGen/AMDGPU/store_typed.ll b/test/CodeGen/AMDGPU/store_typed.ll
new file mode 100644
index 000000000000..515fcf04f406
--- /dev/null
+++ b/test/CodeGen/AMDGPU/store_typed.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck --check-prefix=CM --check-prefix=FUNC %s
+
+; store to rat 0
+; FUNC-LABEL: {{^}}store_typed_rat0:
+; EG: MEM_RAT STORE_TYPED RAT(0) {{T[0-9]+, T[0-9]+}}, 1
+; CM: MEM_RAT STORE_TYPED RAT(0) {{T[0-9]+, T[0-9]+}}
+
+define void @store_typed_rat0(<4 x i32> %data, <4 x i32> %index) {
+ call void @llvm.r600.rat.store.typed(<4 x i32> %data, <4 x i32> %index, i32 0)
+ ret void
+}
+
+; store to rat 11
+; FUNC-LABEL: {{^}}store_typed_rat11:
+; EG: MEM_RAT STORE_TYPED RAT(11) {{T[0-9]+, T[0-9]+}}, 1
+; CM: MEM_RAT STORE_TYPED RAT(11) {{T[0-9]+, T[0-9]+}}
+
+define void @store_typed_rat11(<4 x i32> %data, <4 x i32> %index) {
+ call void @llvm.r600.rat.store.typed(<4 x i32> %data, <4 x i32> %index, i32 11)
+ ret void
+}
+
+declare void @llvm.r600.rat.store.typed(<4 x i32>, <4 x i32>, i32)
diff --git a/test/CodeGen/AMDGPU/sub.ll b/test/CodeGen/AMDGPU/sub.ll
index b7fba0efa5b2..9f9446a4e608 100644
--- a/test/CodeGen/AMDGPU/sub.ll
+++ b/test/CodeGen/AMDGPU/sub.ll
@@ -7,7 +7,7 @@ declare i32 @llvm.r600.read.tidig.x() readnone
; FUNC-LABEL: {{^}}test_sub_i32:
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load i32, i32 addrspace(1)* %in
@@ -22,8 +22,8 @@ define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
@@ -40,10 +40,10 @@ define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/AMDGPU/trunc.ll b/test/CodeGen/AMDGPU/trunc.ll
index bf690ca4cb28..ad52d0f2e238 100644
--- a/test/CodeGen/AMDGPU/trunc.ll
+++ b/test/CodeGen/AMDGPU/trunc.ll
@@ -61,7 +61,7 @@ define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
}
; SI-LABEL: {{^}}sgpr_trunc_i32_to_i1:
-; SI: v_and_b32_e64 v{{[0-9]+}}, 1, s{{[0-9]+}}
+; SI: s_and_b32 s{{[0-9]+}}, 1, s{{[0-9]+}}
; SI: v_cmp_eq_i32
define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
%trunc = trunc i32 %a to i1
@@ -72,9 +72,9 @@ define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
; SI-LABEL: {{^}}s_trunc_i64_to_i1:
; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: v_and_b32_e64 [[MASKED:v[0-9]+]], 1, s[[SLO]]
-; SI: v_cmp_eq_i32_e32 vcc, 1, [[MASKED]]
-; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc
+; SI: s_and_b32 [[MASKED:s[0-9]+]], 1, s[[SLO]]
+; SI: v_cmp_eq_i32_e64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], 1, [[MASKED]]
+; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, s{{\[}}[[VLO]]:[[VHI]]]
define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) {
%trunc = trunc i64 %x to i1
%sel = select i1 %trunc, i32 63, i32 -12
diff --git a/test/CodeGen/AMDGPU/udivrem.ll b/test/CodeGen/AMDGPU/udivrem.ll
index b3837f28209a..f692b7dfdc27 100644
--- a/test/CodeGen/AMDGPU/udivrem.ll
+++ b/test/CodeGen/AMDGPU/udivrem.ll
@@ -30,19 +30,19 @@
; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]]
; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]]
; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]]
-; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]]
+; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]]
; SI: v_cndmask_b32_e64
; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
-; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]]
-; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]]
+; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]]
+; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]]
; SI: v_cndmask_b32_e64
; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]]
; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]]
+; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]]
+; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]]
; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]],
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
@@ -110,15 +110,15 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FIRST_RCP_LO]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], vcc, [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
@@ -133,15 +133,15 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[SECOND_RCP_LO]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], vcc, [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
@@ -257,83 +257,83 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
-; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: v_rcp_iflag_f32_e32
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_sub_i32_e32
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_and_b32_e32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: v_rcp_iflag_f32_e32
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_sub_i32_e32
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_and_b32_e32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_rcp_iflag_f32_e32 [[THIRD_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]]
-; SI-DAG: v_mul_lo_i32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]]
-; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]]
+; SI-DAG: v_rcp_iflag_f32_e32
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_sub_i32_e32
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]]
-; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], [[THIRD_Num_S_Remainder]], {{v[0-9]+}}
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[THIRD_Quotient_A_One:v[0-9]+]], {{.*}}, [[THIRD_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[THIRD_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_and_b32_e32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[THIRD_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_rcp_iflag_f32_e32 [[FOURTH_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]]
-; SI-DAG: v_mul_lo_i32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]]
-; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]]
+; SI-DAG: v_rcp_iflag_f32_e32
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_sub_i32_e32
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]]
-; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
; SI-DAG: v_cndmask_b32_e64
; SI: s_endpgm
define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
diff --git a/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
index 6f608df5e9f5..65fe580792a5 100644
--- a/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
+++ b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
@@ -4,9 +4,9 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64
; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
-; SI: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
-; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
-; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI-DAG: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
+; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
; SI: buffer_store_dwordx2 [[RESULT]]
define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
diff --git a/test/CodeGen/AMDGPU/unsupported-cc.ll b/test/CodeGen/AMDGPU/unsupported-cc.ll
index 8ab4faf2f145..d120111a71fb 100644
--- a/test/CodeGen/AMDGPU/unsupported-cc.ll
+++ b/test/CodeGen/AMDGPU/unsupported-cc.ll
@@ -3,8 +3,8 @@
; These tests are for condition codes that are not supported by the hardware
; CHECK-LABEL: {{^}}slt:
-; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
; CHECK-NEXT: 5(7.006492e-45)
define void @slt(i32 addrspace(1)* %out, i32 %in) {
entry:
@@ -15,8 +15,8 @@ entry:
}
; CHECK-LABEL: {{^}}ult_i32:
-; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
; CHECK-NEXT: 5(7.006492e-45)
define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
@@ -40,8 +40,8 @@ entry:
}
; CHECK-LABEL: {{^}}ult_float_native:
-; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
-; CHECK-NEXT: LSHR *
+; CHECK: LSHR
+; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}}
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @ult_float_native(float addrspace(1)* %out, float %in) {
entry:
@@ -52,8 +52,8 @@ entry:
}
; CHECK-LABEL: {{^}}olt:
-; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR *
+; CHECK: LSHR
+; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @olt(float addrspace(1)* %out, float %in) {
entry:
@@ -64,8 +64,8 @@ entry:
}
; CHECK-LABEL: {{^}}sle:
-; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
; CHECK-NEXT: 6(8.407791e-45)
define void @sle(i32 addrspace(1)* %out, i32 %in) {
entry:
@@ -76,8 +76,8 @@ entry:
}
; CHECK-LABEL: {{^}}ule_i32:
-; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
; CHECK-NEXT: 6(8.407791e-45)
define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
@@ -101,8 +101,8 @@ entry:
}
; CHECK-LABEL: {{^}}ule_float_native:
-; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
-; CHECK-NEXT: LSHR *
+; CHECK: LSHR
+; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}}
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @ule_float_native(float addrspace(1)* %out, float %in) {
entry:
@@ -113,8 +113,8 @@ entry:
}
; CHECK-LABEL: {{^}}ole:
-; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR *
+; CHECK: LSHR
+; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
; CHECK-NEXT:1084227584(5.000000e+00)
define void @ole(float addrspace(1)* %out, float %in) {
entry:
diff --git a/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
index f26f30022b4f..87b925a24a04 100644
--- a/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
+++ b/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
@@ -2,6 +2,7 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
declare float @llvm.fma.f32(float, float, float) #1
+declare double @llvm.fma.f64(double, double, double) #1
declare float @llvm.fmuladd.f32(float, float, float) #1
declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1
@@ -40,6 +41,32 @@ define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, floa
ret void
}
+; GCN-LABEL: {{^}}test_use_s_v_s:
+; GCN-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
+
+; GCN: buffer_load_dword [[VA0:v[0-9]+]]
+; GCN-NOT: v_mov_b32
+; GCN: buffer_load_dword [[VA1:v[0-9]+]]
+
+; GCN-NOT: v_mov_b32
+; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; GCN-NOT: v_mov_b32
+
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VA0]], [[SA]], [[VB]]
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VA1]], [[SA]], [[VB]]
+; GCN: buffer_store_dword [[RESULT0]]
+; GCN: buffer_store_dword [[RESULT1]]
+define void @test_use_s_v_s(float addrspace(1)* %out, float %a, float %b, float addrspace(1)* %in) #0 {
+ %va0 = load volatile float, float addrspace(1)* %in
+ %va1 = load volatile float, float addrspace(1)* %in
+ %fma0 = call float @llvm.fma.f32(float %a, float %va0, float %b) #1
+ %fma1 = call float @llvm.fma.f32(float %a, float %va1, float %b) #1
+ store volatile float %fma0, float addrspace(1)* %out
+ store volatile float %fma1, float addrspace(1)* %out
+ ret void
+}
+
; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
@@ -99,5 +126,145 @@ define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32
ret void
}
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_kimm:
+; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_a_a_kimm(float addrspace(1)* %out, float %a) #0 {
+ %fma = call float @llvm.fma.f32(float %a, float %a, float 1024.0) #1
+ store float %fma, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s:
+; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR]]
+; GCN: buffer_store_dword [[RESULT0]]
+define void @test_literal_use_twice_ternary_op_k_k_s(float addrspace(1)* %out, float %a) #0 {
+ %fma = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
+ store float %fma, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s_x2:
+; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR0]]
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VK]], [[VK]], [[SGPR1]]
+; GCN: buffer_store_dword [[RESULT0]]
+; GCN: buffer_store_dword [[RESULT1]]
+; GCN: s_endpgm
+define void @test_literal_use_twice_ternary_op_k_k_s_x2(float addrspace(1)* %out, float %a, float %b) #0 {
+ %fma0 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
+ %fma1 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %b) #1
+ store volatile float %fma0, float addrspace(1)* %out
+ store volatile float %fma1, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k:
+; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @test_literal_use_twice_ternary_op_k_s_k(float addrspace(1)* %out, float %a) #0 {
+ %fma = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
+ store float %fma, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k_x2:
+; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VK]], [[VK]]
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR1]], [[VK]], [[VK]]
+; GCN: buffer_store_dword [[RESULT0]]
+; GCN: buffer_store_dword [[RESULT1]]
+; GCN: s_endpgm
+define void @test_literal_use_twice_ternary_op_k_s_k_x2(float addrspace(1)* %out, float %a, float %b) #0 {
+ %fma0 = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
+ %fma1 = call float @llvm.fma.f32(float 1024.0, float %b, float 1024.0) #1
+ store volatile float %fma0, float addrspace(1)* %out
+ store volatile float %fma1, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k:
+; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @test_literal_use_twice_ternary_op_s_k_k(float addrspace(1)* %out, float %a) #0 {
+ %fma = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
+ store float %fma, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k_x2:
+; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VK]], [[VK]]
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR1]], [[VK]], [[VK]]
+; GCN: buffer_store_dword [[RESULT0]]
+; GCN: buffer_store_dword [[RESULT1]]
+; GCN: s_endpgm
+define void @test_literal_use_twice_ternary_op_s_k_k_x2(float addrspace(1)* %out, float %a, float %b) #0 {
+ %fma0 = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
+ %fma1 = call float @llvm.fma.f32(float %b, float 1024.0, float 1024.0) #1
+ store volatile float %fma0, float addrspace(1)* %out
+ store volatile float %fma1, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_s0_s1_k_f32:
+; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
+; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
+; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], [[SGPR1]]
+
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK0]]
+; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK1]]
+
+; GCN: buffer_store_dword [[RESULT0]]
+; GCN: buffer_store_dword [[RESULT1]]
+define void @test_s0_s1_k_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+ %fma0 = call float @llvm.fma.f32(float %a, float %b, float 1024.0) #1
+ %fma1 = call float @llvm.fma.f32(float %a, float %b, float 4096.0) #1
+ store volatile float %fma0, float addrspace(1)* %out
+ store volatile float %fma1, float addrspace(1)* %out
+ ret void
+}
+
+; FIXME: Immediate in SGPRs just copied to VGPRs
+; GCN-LABEL: {{^}}test_s0_s1_k_f64:
+; GCN-DAG: s_load_dwordx2 [[SGPR0:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dwordx2 s{{\[}}[[SGPR1_SUB0:[0-9]+]]:[[SGPR1_SUB1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}}
+; GCN-DAG: v_mov_b32_e32 v[[VK0_SUB1:[0-9]+]], 0x40900000
+; GCN-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0{{$}}
+
+; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB0:[0-9]+]], s[[SGPR1_SUB0]]
+; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB1:[0-9]+]], s[[SGPR1_SUB1]]
+; GCN: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, [[SGPR0]], v{{\[}}[[VZERO]]:[[VK0_SUB1]]{{\]}}
+
+; Same zero component is re-used for half of each immediate.
+; GCN: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], 0x40b00000
+; GCN: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, [[SGPR0]], v{{\[}}[[VZERO]]:[[VK1_SUB1]]{{\]}}
+
+; GCN: buffer_store_dwordx2 [[RESULT0]]
+; GCN: buffer_store_dwordx2 [[RESULT1]]
+define void @test_s0_s1_k_f64(double addrspace(1)* %out, double %a, double %b) #0 {
+ %fma0 = call double @llvm.fma.f64(double %a, double %b, double 1024.0) #1
+ %fma1 = call double @llvm.fma.f64(double %a, double %b, double 4096.0) #1
+ store volatile double %fma0, double addrspace(1)* %out
+ store volatile double %fma1, double addrspace(1)* %out
+ ret void
+}
+
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/valu-i1.ll b/test/CodeGen/AMDGPU/valu-i1.ll
index 7d0ebd139f51..1cbefba60c95 100644
--- a/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/test/CodeGen/AMDGPU/valu-i1.ll
@@ -78,8 +78,8 @@ exit:
; SI: BB2_3:
; SI: buffer_load_dword
-; SI: buffer_store_dword
-; SI: v_cmp_eq_i32_e32 vcc,
+; SI-DAG: buffer_store_dword
+; SI-DAG: v_cmp_eq_i32_e32 vcc,
; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]]
; SI: s_andn2_b64 exec, exec, [[OR_SREG]]
; SI: s_cbranch_execnz BB2_3
@@ -128,18 +128,18 @@ exit:
; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]]
; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
-; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]]
-; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]]
+; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
+; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]]
; SI: s_cbranch_execz BB3_5
; SI: BB#4:
; SI: buffer_store_dword
-; SI: v_cmp_ge_i64_e32 vcc
-; SI: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]]
+; SI: v_cmp_ge_i64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]]
+; SI: s_or_b64 [[COND_STATE]], [[CMP]], [[COND_STATE]]
; SI: BB3_5:
-; SI: s_or_b64 exec, exec, [[ORNEG1]]
-; SI: s_or_b64 [[COND_STATE]], [[ORNEG1]], [[COND_STATE]]
+; SI: s_or_b64 exec, exec, [[ORNEG2]]
+; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[COND_STATE]]
; SI: s_andn2_b64 exec, exec, [[COND_STATE]]
; SI: s_cbranch_execnz BB3_3
diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
new file mode 100644
index 000000000000..cd7c78f408dd
--- /dev/null
+++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
@@ -0,0 +1,585 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; XUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA %s
+; XUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA %s
+
+; This ends up using all 256 registers and requires register
+; scavenging which will fail to find an unsued register.
+
+; Check the ScratchSize to avoid regressions from spilling
+; intermediate register class copies.
+
+; FIXME: The same register is initialized to 0 for every spill.
+
+declare i32 @llvm.r600.read.tgid.x() #1
+declare i32 @llvm.r600.read.tgid.y() #1
+declare i32 @llvm.r600.read.tgid.z() #1
+
+; GCN-LABEL: {{^}}spill_vgpr_compute:
+
+; GCN: s_mov_b32 s16, s3
+; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s14, -1
+; SI-NEXT: s_mov_b32 s15, 0x80f000
+; VI-NEXT: s_mov_b32 s15, 0x800000
+
+
+; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
+
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
+; GCN: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
+
+; GCN: NumVgprs: 256
+; GCN: ScratchSize: 1024
+
+; s[0:3] input user SGPRs. s4,s5,s6 = workgroup IDs. s8 scratch offset.
+define void @spill_vgpr_compute(<4 x float> %arg6, float addrspace(1)* %arg, i32 %arg1, i32 %arg2, float %arg3, float %arg4, float %arg5) #0 {
+bb:
+ %tmp = add i32 %arg1, %arg2
+ %tmp7 = extractelement <4 x float> %arg6, i32 0
+ %tmp8 = extractelement <4 x float> %arg6, i32 1
+ %tmp9 = extractelement <4 x float> %arg6, i32 2
+ %tmp10 = extractelement <4 x float> %arg6, i32 3
+ %tmp11 = bitcast float %arg5 to i32
+ br label %bb12
+
+bb12: ; preds = %bb145, %bb
+ %tmp13 = phi float [ 0.000000e+00, %bb ], [ %tmp338, %bb145 ]
+ %tmp14 = phi float [ 0.000000e+00, %bb ], [ %tmp337, %bb145 ]
+ %tmp15 = phi float [ 0.000000e+00, %bb ], [ %tmp336, %bb145 ]
+ %tmp16 = phi float [ 0.000000e+00, %bb ], [ %tmp339, %bb145 ]
+ %tmp17 = phi float [ 0.000000e+00, %bb ], [ %tmp335, %bb145 ]
+ %tmp18 = phi float [ 0.000000e+00, %bb ], [ %tmp334, %bb145 ]
+ %tmp19 = phi float [ 0.000000e+00, %bb ], [ %tmp333, %bb145 ]
+ %tmp20 = phi float [ 0.000000e+00, %bb ], [ %tmp340, %bb145 ]
+ %tmp21 = phi float [ 0.000000e+00, %bb ], [ %tmp332, %bb145 ]
+ %tmp22 = phi float [ 0.000000e+00, %bb ], [ %tmp331, %bb145 ]
+ %tmp23 = phi float [ 0.000000e+00, %bb ], [ %tmp330, %bb145 ]
+ %tmp24 = phi float [ 0.000000e+00, %bb ], [ %tmp341, %bb145 ]
+ %tmp25 = phi float [ 0.000000e+00, %bb ], [ %tmp329, %bb145 ]
+ %tmp26 = phi float [ 0.000000e+00, %bb ], [ %tmp328, %bb145 ]
+ %tmp27 = phi float [ 0.000000e+00, %bb ], [ %tmp327, %bb145 ]
+ %tmp28 = phi float [ 0.000000e+00, %bb ], [ %tmp342, %bb145 ]
+ %tmp29 = phi float [ 0.000000e+00, %bb ], [ %tmp326, %bb145 ]
+ %tmp30 = phi float [ 0.000000e+00, %bb ], [ %tmp325, %bb145 ]
+ %tmp31 = phi float [ 0.000000e+00, %bb ], [ %tmp324, %bb145 ]
+ %tmp32 = phi float [ 0.000000e+00, %bb ], [ %tmp343, %bb145 ]
+ %tmp33 = phi float [ 0.000000e+00, %bb ], [ %tmp323, %bb145 ]
+ %tmp34 = phi float [ 0.000000e+00, %bb ], [ %tmp322, %bb145 ]
+ %tmp35 = phi float [ 0.000000e+00, %bb ], [ %tmp321, %bb145 ]
+ %tmp36 = phi float [ 0.000000e+00, %bb ], [ %tmp344, %bb145 ]
+ %tmp37 = phi float [ 0.000000e+00, %bb ], [ %tmp320, %bb145 ]
+ %tmp38 = phi float [ 0.000000e+00, %bb ], [ %tmp319, %bb145 ]
+ %tmp39 = phi float [ 0.000000e+00, %bb ], [ %tmp318, %bb145 ]
+ %tmp40 = phi float [ 0.000000e+00, %bb ], [ %tmp345, %bb145 ]
+ %tmp41 = phi float [ 0.000000e+00, %bb ], [ %tmp317, %bb145 ]
+ %tmp42 = phi float [ 0.000000e+00, %bb ], [ %tmp316, %bb145 ]
+ %tmp43 = phi float [ 0.000000e+00, %bb ], [ %tmp315, %bb145 ]
+ %tmp44 = phi float [ 0.000000e+00, %bb ], [ %tmp346, %bb145 ]
+ %tmp45 = phi float [ 0.000000e+00, %bb ], [ %tmp314, %bb145 ]
+ %tmp46 = phi float [ 0.000000e+00, %bb ], [ %tmp313, %bb145 ]
+ %tmp47 = phi float [ 0.000000e+00, %bb ], [ %tmp312, %bb145 ]
+ %tmp48 = phi float [ 0.000000e+00, %bb ], [ %tmp347, %bb145 ]
+ %tmp49 = phi float [ 0.000000e+00, %bb ], [ %tmp311, %bb145 ]
+ %tmp50 = phi float [ 0.000000e+00, %bb ], [ %tmp310, %bb145 ]
+ %tmp51 = phi float [ 0.000000e+00, %bb ], [ %tmp309, %bb145 ]
+ %tmp52 = phi float [ 0.000000e+00, %bb ], [ %tmp348, %bb145 ]
+ %tmp53 = phi float [ 0.000000e+00, %bb ], [ %tmp308, %bb145 ]
+ %tmp54 = phi float [ 0.000000e+00, %bb ], [ %tmp307, %bb145 ]
+ %tmp55 = phi float [ 0.000000e+00, %bb ], [ %tmp306, %bb145 ]
+ %tmp56 = phi float [ 0.000000e+00, %bb ], [ %tmp349, %bb145 ]
+ %tmp57 = phi float [ 0.000000e+00, %bb ], [ %tmp305, %bb145 ]
+ %tmp58 = phi float [ 0.000000e+00, %bb ], [ %tmp304, %bb145 ]
+ %tmp59 = phi float [ 0.000000e+00, %bb ], [ %tmp303, %bb145 ]
+ %tmp60 = phi float [ 0.000000e+00, %bb ], [ %tmp350, %bb145 ]
+ %tmp61 = phi float [ 0.000000e+00, %bb ], [ %tmp302, %bb145 ]
+ %tmp62 = phi float [ 0.000000e+00, %bb ], [ %tmp301, %bb145 ]
+ %tmp63 = phi float [ 0.000000e+00, %bb ], [ %tmp300, %bb145 ]
+ %tmp64 = phi float [ 0.000000e+00, %bb ], [ %tmp351, %bb145 ]
+ %tmp65 = phi float [ 0.000000e+00, %bb ], [ %tmp299, %bb145 ]
+ %tmp66 = phi float [ 0.000000e+00, %bb ], [ %tmp298, %bb145 ]
+ %tmp67 = phi float [ 0.000000e+00, %bb ], [ %tmp297, %bb145 ]
+ %tmp68 = phi float [ 0.000000e+00, %bb ], [ %tmp352, %bb145 ]
+ %tmp69 = phi float [ 0.000000e+00, %bb ], [ %tmp296, %bb145 ]
+ %tmp70 = phi float [ 0.000000e+00, %bb ], [ %tmp295, %bb145 ]
+ %tmp71 = phi float [ 0.000000e+00, %bb ], [ %tmp294, %bb145 ]
+ %tmp72 = phi float [ 0.000000e+00, %bb ], [ %tmp353, %bb145 ]
+ %tmp73 = phi float [ 0.000000e+00, %bb ], [ %tmp293, %bb145 ]
+ %tmp74 = phi float [ 0.000000e+00, %bb ], [ %tmp292, %bb145 ]
+ %tmp75 = phi float [ 0.000000e+00, %bb ], [ %tmp291, %bb145 ]
+ %tmp76 = phi float [ 0.000000e+00, %bb ], [ %tmp354, %bb145 ]
+ %tmp77 = phi float [ 0.000000e+00, %bb ], [ %tmp290, %bb145 ]
+ %tmp78 = phi float [ 0.000000e+00, %bb ], [ %tmp289, %bb145 ]
+ %tmp79 = phi float [ 0.000000e+00, %bb ], [ %tmp288, %bb145 ]
+ %tmp80 = phi float [ 0.000000e+00, %bb ], [ %tmp355, %bb145 ]
+ %tmp81 = phi float [ 0.000000e+00, %bb ], [ %tmp287, %bb145 ]
+ %tmp82 = phi float [ 0.000000e+00, %bb ], [ %tmp286, %bb145 ]
+ %tmp83 = phi float [ 0.000000e+00, %bb ], [ %tmp285, %bb145 ]
+ %tmp84 = phi float [ 0.000000e+00, %bb ], [ %tmp356, %bb145 ]
+ %tmp85 = phi float [ 0.000000e+00, %bb ], [ %tmp284, %bb145 ]
+ %tmp86 = phi float [ 0.000000e+00, %bb ], [ %tmp283, %bb145 ]
+ %tmp87 = phi float [ 0.000000e+00, %bb ], [ %tmp282, %bb145 ]
+ %tmp88 = phi float [ 0.000000e+00, %bb ], [ %tmp357, %bb145 ]
+ %tmp89 = phi float [ 0.000000e+00, %bb ], [ %tmp281, %bb145 ]
+ %tmp90 = phi float [ 0.000000e+00, %bb ], [ %tmp280, %bb145 ]
+ %tmp91 = phi float [ 0.000000e+00, %bb ], [ %tmp279, %bb145 ]
+ %tmp92 = phi float [ 0.000000e+00, %bb ], [ %tmp358, %bb145 ]
+ %tmp93 = phi float [ 0.000000e+00, %bb ], [ %tmp359, %bb145 ]
+ %tmp94 = phi float [ 0.000000e+00, %bb ], [ %tmp360, %bb145 ]
+ %tmp95 = phi float [ 0.000000e+00, %bb ], [ %tmp409, %bb145 ]
+ %tmp96 = phi float [ 0.000000e+00, %bb ], [ %tmp361, %bb145 ]
+ %tmp97 = phi float [ 0.000000e+00, %bb ], [ %tmp362, %bb145 ]
+ %tmp98 = phi float [ 0.000000e+00, %bb ], [ %tmp363, %bb145 ]
+ %tmp99 = phi float [ 0.000000e+00, %bb ], [ %tmp364, %bb145 ]
+ %tmp100 = phi float [ 0.000000e+00, %bb ], [ %tmp365, %bb145 ]
+ %tmp101 = phi float [ 0.000000e+00, %bb ], [ %tmp366, %bb145 ]
+ %tmp102 = phi float [ 0.000000e+00, %bb ], [ %tmp367, %bb145 ]
+ %tmp103 = phi float [ 0.000000e+00, %bb ], [ %tmp368, %bb145 ]
+ %tmp104 = phi float [ 0.000000e+00, %bb ], [ %tmp369, %bb145 ]
+ %tmp105 = phi float [ 0.000000e+00, %bb ], [ %tmp370, %bb145 ]
+ %tmp106 = phi float [ 0.000000e+00, %bb ], [ %tmp371, %bb145 ]
+ %tmp107 = phi float [ 0.000000e+00, %bb ], [ %tmp372, %bb145 ]
+ %tmp108 = phi float [ 0.000000e+00, %bb ], [ %tmp373, %bb145 ]
+ %tmp109 = phi float [ 0.000000e+00, %bb ], [ %tmp374, %bb145 ]
+ %tmp110 = phi float [ 0.000000e+00, %bb ], [ %tmp375, %bb145 ]
+ %tmp111 = phi float [ 0.000000e+00, %bb ], [ %tmp376, %bb145 ]
+ %tmp112 = phi float [ 0.000000e+00, %bb ], [ %tmp377, %bb145 ]
+ %tmp113 = phi float [ 0.000000e+00, %bb ], [ %tmp378, %bb145 ]
+ %tmp114 = phi float [ 0.000000e+00, %bb ], [ %tmp379, %bb145 ]
+ %tmp115 = phi float [ 0.000000e+00, %bb ], [ %tmp380, %bb145 ]
+ %tmp116 = phi float [ 0.000000e+00, %bb ], [ %tmp381, %bb145 ]
+ %tmp117 = phi float [ 0.000000e+00, %bb ], [ %tmp382, %bb145 ]
+ %tmp118 = phi float [ 0.000000e+00, %bb ], [ %tmp383, %bb145 ]
+ %tmp119 = phi float [ 0.000000e+00, %bb ], [ %tmp384, %bb145 ]
+ %tmp120 = phi float [ 0.000000e+00, %bb ], [ %tmp385, %bb145 ]
+ %tmp121 = phi float [ 0.000000e+00, %bb ], [ %tmp386, %bb145 ]
+ %tmp122 = phi float [ 0.000000e+00, %bb ], [ %tmp387, %bb145 ]
+ %tmp123 = phi float [ 0.000000e+00, %bb ], [ %tmp388, %bb145 ]
+ %tmp124 = phi float [ 0.000000e+00, %bb ], [ %tmp389, %bb145 ]
+ %tmp125 = phi float [ 0.000000e+00, %bb ], [ %tmp390, %bb145 ]
+ %tmp126 = phi float [ 0.000000e+00, %bb ], [ %tmp391, %bb145 ]
+ %tmp127 = phi float [ 0.000000e+00, %bb ], [ %tmp392, %bb145 ]
+ %tmp128 = phi float [ 0.000000e+00, %bb ], [ %tmp393, %bb145 ]
+ %tmp129 = phi float [ 0.000000e+00, %bb ], [ %tmp394, %bb145 ]
+ %tmp130 = phi float [ 0.000000e+00, %bb ], [ %tmp395, %bb145 ]
+ %tmp131 = phi float [ 0.000000e+00, %bb ], [ %tmp396, %bb145 ]
+ %tmp132 = phi float [ 0.000000e+00, %bb ], [ %tmp397, %bb145 ]
+ %tmp133 = phi float [ 0.000000e+00, %bb ], [ %tmp398, %bb145 ]
+ %tmp134 = phi float [ 0.000000e+00, %bb ], [ %tmp399, %bb145 ]
+ %tmp135 = phi float [ 0.000000e+00, %bb ], [ %tmp400, %bb145 ]
+ %tmp136 = phi float [ 0.000000e+00, %bb ], [ %tmp401, %bb145 ]
+ %tmp137 = phi float [ 0.000000e+00, %bb ], [ %tmp402, %bb145 ]
+ %tmp138 = phi float [ 0.000000e+00, %bb ], [ %tmp403, %bb145 ]
+ %tmp139 = phi float [ 0.000000e+00, %bb ], [ %tmp404, %bb145 ]
+ %tmp140 = phi float [ 0.000000e+00, %bb ], [ %tmp405, %bb145 ]
+ %tmp141 = phi float [ 0.000000e+00, %bb ], [ %tmp406, %bb145 ]
+ %tmp142 = bitcast float %tmp95 to i32
+ %tmp143 = icmp sgt i32 %tmp142, 125
+ br i1 %tmp143, label %bb144, label %bb145
+
+bb144: ; preds = %bb12
+ store volatile float %arg3, float addrspace(1)* %arg
+ store volatile float %tmp91, float addrspace(1)* %arg
+ store volatile float %tmp90, float addrspace(1)* %arg
+ store volatile float %tmp89, float addrspace(1)* %arg
+ store volatile float %tmp87, float addrspace(1)* %arg
+ store volatile float %tmp86, float addrspace(1)* %arg
+ store volatile float %tmp85, float addrspace(1)* %arg
+ store volatile float %tmp83, float addrspace(1)* %arg
+ store volatile float %tmp82, float addrspace(1)* %arg
+ store volatile float %tmp81, float addrspace(1)* %arg
+ store volatile float %tmp79, float addrspace(1)* %arg
+ store volatile float %tmp78, float addrspace(1)* %arg
+ store volatile float %tmp77, float addrspace(1)* %arg
+ store volatile float %tmp75, float addrspace(1)* %arg
+ store volatile float %tmp74, float addrspace(1)* %arg
+ store volatile float %tmp73, float addrspace(1)* %arg
+ store volatile float %tmp71, float addrspace(1)* %arg
+ store volatile float %tmp70, float addrspace(1)* %arg
+ store volatile float %tmp69, float addrspace(1)* %arg
+ store volatile float %tmp67, float addrspace(1)* %arg
+ store volatile float %tmp66, float addrspace(1)* %arg
+ store volatile float %tmp65, float addrspace(1)* %arg
+ store volatile float %tmp63, float addrspace(1)* %arg
+ store volatile float %tmp62, float addrspace(1)* %arg
+ store volatile float %tmp61, float addrspace(1)* %arg
+ store volatile float %tmp59, float addrspace(1)* %arg
+ store volatile float %tmp58, float addrspace(1)* %arg
+ store volatile float %tmp57, float addrspace(1)* %arg
+ store volatile float %tmp55, float addrspace(1)* %arg
+ store volatile float %tmp54, float addrspace(1)* %arg
+ store volatile float %tmp53, float addrspace(1)* %arg
+ store volatile float %tmp51, float addrspace(1)* %arg
+ store volatile float %tmp50, float addrspace(1)* %arg
+ store volatile float %tmp49, float addrspace(1)* %arg
+ store volatile float %tmp47, float addrspace(1)* %arg
+ store volatile float %tmp46, float addrspace(1)* %arg
+ store volatile float %tmp45, float addrspace(1)* %arg
+ store volatile float %tmp43, float addrspace(1)* %arg
+ store volatile float %tmp42, float addrspace(1)* %arg
+ store volatile float %tmp41, float addrspace(1)* %arg
+ store volatile float %tmp39, float addrspace(1)* %arg
+ store volatile float %tmp38, float addrspace(1)* %arg
+ store volatile float %tmp37, float addrspace(1)* %arg
+ store volatile float %tmp35, float addrspace(1)* %arg
+ store volatile float %tmp34, float addrspace(1)* %arg
+ store volatile float %tmp33, float addrspace(1)* %arg
+ store volatile float %tmp31, float addrspace(1)* %arg
+ store volatile float %tmp30, float addrspace(1)* %arg
+ store volatile float %tmp29, float addrspace(1)* %arg
+ store volatile float %tmp27, float addrspace(1)* %arg
+ store volatile float %tmp26, float addrspace(1)* %arg
+ store volatile float %tmp25, float addrspace(1)* %arg
+ store volatile float %tmp23, float addrspace(1)* %arg
+ store volatile float %tmp22, float addrspace(1)* %arg
+ store volatile float %tmp21, float addrspace(1)* %arg
+ store volatile float %tmp19, float addrspace(1)* %arg
+ store volatile float %tmp18, float addrspace(1)* %arg
+ store volatile float %tmp17, float addrspace(1)* %arg
+ store volatile float %tmp15, float addrspace(1)* %arg
+ store volatile float %tmp14, float addrspace(1)* %arg
+ store volatile float %tmp13, float addrspace(1)* %arg
+ store volatile float %tmp16, float addrspace(1)* %arg
+ store volatile float %tmp20, float addrspace(1)* %arg
+ store volatile float %tmp24, float addrspace(1)* %arg
+ store volatile float %tmp28, float addrspace(1)* %arg
+ store volatile float %tmp32, float addrspace(1)* %arg
+ store volatile float %tmp36, float addrspace(1)* %arg
+ store volatile float %tmp40, float addrspace(1)* %arg
+ store volatile float %tmp44, float addrspace(1)* %arg
+ store volatile float %tmp48, float addrspace(1)* %arg
+ store volatile float %tmp52, float addrspace(1)* %arg
+ store volatile float %tmp56, float addrspace(1)* %arg
+ store volatile float %tmp60, float addrspace(1)* %arg
+ store volatile float %tmp64, float addrspace(1)* %arg
+ store volatile float %tmp68, float addrspace(1)* %arg
+ store volatile float %tmp72, float addrspace(1)* %arg
+ store volatile float %tmp76, float addrspace(1)* %arg
+ store volatile float %tmp80, float addrspace(1)* %arg
+ store volatile float %tmp84, float addrspace(1)* %arg
+ store volatile float %tmp88, float addrspace(1)* %arg
+ store volatile float %tmp92, float addrspace(1)* %arg
+ store volatile float %tmp93, float addrspace(1)* %arg
+ store volatile float %tmp94, float addrspace(1)* %arg
+ store volatile float %tmp96, float addrspace(1)* %arg
+ store volatile float %tmp97, float addrspace(1)* %arg
+ store volatile float %tmp98, float addrspace(1)* %arg
+ store volatile float %tmp99, float addrspace(1)* %arg
+ store volatile float %tmp100, float addrspace(1)* %arg
+ store volatile float %tmp101, float addrspace(1)* %arg
+ store volatile float %tmp102, float addrspace(1)* %arg
+ store volatile float %tmp103, float addrspace(1)* %arg
+ store volatile float %tmp104, float addrspace(1)* %arg
+ store volatile float %tmp105, float addrspace(1)* %arg
+ store volatile float %tmp106, float addrspace(1)* %arg
+ store volatile float %tmp107, float addrspace(1)* %arg
+ store volatile float %tmp108, float addrspace(1)* %arg
+ store volatile float %tmp109, float addrspace(1)* %arg
+ store volatile float %tmp110, float addrspace(1)* %arg
+ store volatile float %tmp111, float addrspace(1)* %arg
+ store volatile float %tmp112, float addrspace(1)* %arg
+ store volatile float %tmp113, float addrspace(1)* %arg
+ store volatile float %tmp114, float addrspace(1)* %arg
+ store volatile float %tmp115, float addrspace(1)* %arg
+ store volatile float %tmp116, float addrspace(1)* %arg
+ store volatile float %tmp117, float addrspace(1)* %arg
+ store volatile float %tmp118, float addrspace(1)* %arg
+ store volatile float %tmp119, float addrspace(1)* %arg
+ store volatile float %tmp120, float addrspace(1)* %arg
+ store volatile float %tmp121, float addrspace(1)* %arg
+ store volatile float %tmp122, float addrspace(1)* %arg
+ store volatile float %tmp123, float addrspace(1)* %arg
+ store volatile float %tmp124, float addrspace(1)* %arg
+ store volatile float %tmp125, float addrspace(1)* %arg
+ store volatile float %tmp126, float addrspace(1)* %arg
+ store volatile float %tmp127, float addrspace(1)* %arg
+ store volatile float %tmp128, float addrspace(1)* %arg
+ store volatile float %tmp129, float addrspace(1)* %arg
+ store volatile float %tmp130, float addrspace(1)* %arg
+ store volatile float %tmp131, float addrspace(1)* %arg
+ store volatile float %tmp132, float addrspace(1)* %arg
+ store volatile float %tmp133, float addrspace(1)* %arg
+ store volatile float %tmp134, float addrspace(1)* %arg
+ store volatile float %tmp135, float addrspace(1)* %arg
+ store volatile float %tmp136, float addrspace(1)* %arg
+ store volatile float %tmp137, float addrspace(1)* %arg
+ store volatile float %tmp138, float addrspace(1)* %arg
+ store volatile float %tmp139, float addrspace(1)* %arg
+ store volatile float %arg4, float addrspace(1)* %arg
+ store volatile float %tmp7, float addrspace(1)* %arg
+ store volatile float %tmp8, float addrspace(1)* %arg
+ store volatile float %tmp9, float addrspace(1)* %arg
+ store volatile float %tmp10, float addrspace(1)* %arg
+ ret void
+
+bb145: ; preds = %bb12
+ %tmp146 = bitcast float %tmp95 to i32
+ %tmp147 = bitcast float %tmp95 to i32
+ %tmp148 = add i32 %tmp11, %tmp147
+ %tmp149 = bitcast i32 %tmp148 to float
+ %tmp150 = insertelement <128 x float> undef, float %tmp91, i32 0
+ %tmp151 = insertelement <128 x float> %tmp150, float %tmp90, i32 1
+ %tmp152 = insertelement <128 x float> %tmp151, float %tmp89, i32 2
+ %tmp153 = insertelement <128 x float> %tmp152, float %tmp87, i32 3
+ %tmp154 = insertelement <128 x float> %tmp153, float %tmp86, i32 4
+ %tmp155 = insertelement <128 x float> %tmp154, float %tmp85, i32 5
+ %tmp156 = insertelement <128 x float> %tmp155, float %tmp83, i32 6
+ %tmp157 = insertelement <128 x float> %tmp156, float %tmp82, i32 7
+ %tmp158 = insertelement <128 x float> %tmp157, float %tmp81, i32 8
+ %tmp159 = insertelement <128 x float> %tmp158, float %tmp79, i32 9
+ %tmp160 = insertelement <128 x float> %tmp159, float %tmp78, i32 10
+ %tmp161 = insertelement <128 x float> %tmp160, float %tmp77, i32 11
+ %tmp162 = insertelement <128 x float> %tmp161, float %tmp75, i32 12
+ %tmp163 = insertelement <128 x float> %tmp162, float %tmp74, i32 13
+ %tmp164 = insertelement <128 x float> %tmp163, float %tmp73, i32 14
+ %tmp165 = insertelement <128 x float> %tmp164, float %tmp71, i32 15
+ %tmp166 = insertelement <128 x float> %tmp165, float %tmp70, i32 16
+ %tmp167 = insertelement <128 x float> %tmp166, float %tmp69, i32 17
+ %tmp168 = insertelement <128 x float> %tmp167, float %tmp67, i32 18
+ %tmp169 = insertelement <128 x float> %tmp168, float %tmp66, i32 19
+ %tmp170 = insertelement <128 x float> %tmp169, float %tmp65, i32 20
+ %tmp171 = insertelement <128 x float> %tmp170, float %tmp63, i32 21
+ %tmp172 = insertelement <128 x float> %tmp171, float %tmp62, i32 22
+ %tmp173 = insertelement <128 x float> %tmp172, float %tmp61, i32 23
+ %tmp174 = insertelement <128 x float> %tmp173, float %tmp59, i32 24
+ %tmp175 = insertelement <128 x float> %tmp174, float %tmp58, i32 25
+ %tmp176 = insertelement <128 x float> %tmp175, float %tmp57, i32 26
+ %tmp177 = insertelement <128 x float> %tmp176, float %tmp55, i32 27
+ %tmp178 = insertelement <128 x float> %tmp177, float %tmp54, i32 28
+ %tmp179 = insertelement <128 x float> %tmp178, float %tmp53, i32 29
+ %tmp180 = insertelement <128 x float> %tmp179, float %tmp51, i32 30
+ %tmp181 = insertelement <128 x float> %tmp180, float %tmp50, i32 31
+ %tmp182 = insertelement <128 x float> %tmp181, float %tmp49, i32 32
+ %tmp183 = insertelement <128 x float> %tmp182, float %tmp47, i32 33
+ %tmp184 = insertelement <128 x float> %tmp183, float %tmp46, i32 34
+ %tmp185 = insertelement <128 x float> %tmp184, float %tmp45, i32 35
+ %tmp186 = insertelement <128 x float> %tmp185, float %tmp43, i32 36
+ %tmp187 = insertelement <128 x float> %tmp186, float %tmp42, i32 37
+ %tmp188 = insertelement <128 x float> %tmp187, float %tmp41, i32 38
+ %tmp189 = insertelement <128 x float> %tmp188, float %tmp39, i32 39
+ %tmp190 = insertelement <128 x float> %tmp189, float %tmp38, i32 40
+ %tmp191 = insertelement <128 x float> %tmp190, float %tmp37, i32 41
+ %tmp192 = insertelement <128 x float> %tmp191, float %tmp35, i32 42
+ %tmp193 = insertelement <128 x float> %tmp192, float %tmp34, i32 43
+ %tmp194 = insertelement <128 x float> %tmp193, float %tmp33, i32 44
+ %tmp195 = insertelement <128 x float> %tmp194, float %tmp31, i32 45
+ %tmp196 = insertelement <128 x float> %tmp195, float %tmp30, i32 46
+ %tmp197 = insertelement <128 x float> %tmp196, float %tmp29, i32 47
+ %tmp198 = insertelement <128 x float> %tmp197, float %tmp27, i32 48
+ %tmp199 = insertelement <128 x float> %tmp198, float %tmp26, i32 49
+ %tmp200 = insertelement <128 x float> %tmp199, float %tmp25, i32 50
+ %tmp201 = insertelement <128 x float> %tmp200, float %tmp23, i32 51
+ %tmp202 = insertelement <128 x float> %tmp201, float %tmp22, i32 52
+ %tmp203 = insertelement <128 x float> %tmp202, float %tmp21, i32 53
+ %tmp204 = insertelement <128 x float> %tmp203, float %tmp19, i32 54
+ %tmp205 = insertelement <128 x float> %tmp204, float %tmp18, i32 55
+ %tmp206 = insertelement <128 x float> %tmp205, float %tmp17, i32 56
+ %tmp207 = insertelement <128 x float> %tmp206, float %tmp15, i32 57
+ %tmp208 = insertelement <128 x float> %tmp207, float %tmp14, i32 58
+ %tmp209 = insertelement <128 x float> %tmp208, float %tmp13, i32 59
+ %tmp210 = insertelement <128 x float> %tmp209, float %tmp16, i32 60
+ %tmp211 = insertelement <128 x float> %tmp210, float %tmp20, i32 61
+ %tmp212 = insertelement <128 x float> %tmp211, float %tmp24, i32 62
+ %tmp213 = insertelement <128 x float> %tmp212, float %tmp28, i32 63
+ %tmp214 = insertelement <128 x float> %tmp213, float %tmp32, i32 64
+ %tmp215 = insertelement <128 x float> %tmp214, float %tmp36, i32 65
+ %tmp216 = insertelement <128 x float> %tmp215, float %tmp40, i32 66
+ %tmp217 = insertelement <128 x float> %tmp216, float %tmp44, i32 67
+ %tmp218 = insertelement <128 x float> %tmp217, float %tmp48, i32 68
+ %tmp219 = insertelement <128 x float> %tmp218, float %tmp52, i32 69
+ %tmp220 = insertelement <128 x float> %tmp219, float %tmp56, i32 70
+ %tmp221 = insertelement <128 x float> %tmp220, float %tmp60, i32 71
+ %tmp222 = insertelement <128 x float> %tmp221, float %tmp64, i32 72
+ %tmp223 = insertelement <128 x float> %tmp222, float %tmp68, i32 73
+ %tmp224 = insertelement <128 x float> %tmp223, float %tmp72, i32 74
+ %tmp225 = insertelement <128 x float> %tmp224, float %tmp76, i32 75
+ %tmp226 = insertelement <128 x float> %tmp225, float %tmp80, i32 76
+ %tmp227 = insertelement <128 x float> %tmp226, float %tmp84, i32 77
+ %tmp228 = insertelement <128 x float> %tmp227, float %tmp88, i32 78
+ %tmp229 = insertelement <128 x float> %tmp228, float %tmp92, i32 79
+ %tmp230 = insertelement <128 x float> %tmp229, float %tmp93, i32 80
+ %tmp231 = insertelement <128 x float> %tmp230, float %tmp94, i32 81
+ %tmp232 = insertelement <128 x float> %tmp231, float %tmp96, i32 82
+ %tmp233 = insertelement <128 x float> %tmp232, float %tmp97, i32 83
+ %tmp234 = insertelement <128 x float> %tmp233, float %tmp98, i32 84
+ %tmp235 = insertelement <128 x float> %tmp234, float %tmp99, i32 85
+ %tmp236 = insertelement <128 x float> %tmp235, float %tmp100, i32 86
+ %tmp237 = insertelement <128 x float> %tmp236, float %tmp101, i32 87
+ %tmp238 = insertelement <128 x float> %tmp237, float %tmp102, i32 88
+ %tmp239 = insertelement <128 x float> %tmp238, float %tmp103, i32 89
+ %tmp240 = insertelement <128 x float> %tmp239, float %tmp104, i32 90
+ %tmp241 = insertelement <128 x float> %tmp240, float %tmp105, i32 91
+ %tmp242 = insertelement <128 x float> %tmp241, float %tmp106, i32 92
+ %tmp243 = insertelement <128 x float> %tmp242, float %tmp107, i32 93
+ %tmp244 = insertelement <128 x float> %tmp243, float %tmp108, i32 94
+ %tmp245 = insertelement <128 x float> %tmp244, float %tmp109, i32 95
+ %tmp246 = insertelement <128 x float> %tmp245, float %tmp110, i32 96
+ %tmp247 = insertelement <128 x float> %tmp246, float %tmp111, i32 97
+ %tmp248 = insertelement <128 x float> %tmp247, float %tmp112, i32 98
+ %tmp249 = insertelement <128 x float> %tmp248, float %tmp113, i32 99
+ %tmp250 = insertelement <128 x float> %tmp249, float %tmp114, i32 100
+ %tmp251 = insertelement <128 x float> %tmp250, float %tmp115, i32 101
+ %tmp252 = insertelement <128 x float> %tmp251, float %tmp116, i32 102
+ %tmp253 = insertelement <128 x float> %tmp252, float %tmp117, i32 103
+ %tmp254 = insertelement <128 x float> %tmp253, float %tmp118, i32 104
+ %tmp255 = insertelement <128 x float> %tmp254, float %tmp119, i32 105
+ %tmp256 = insertelement <128 x float> %tmp255, float %tmp120, i32 106
+ %tmp257 = insertelement <128 x float> %tmp256, float %tmp121, i32 107
+ %tmp258 = insertelement <128 x float> %tmp257, float %tmp122, i32 108
+ %tmp259 = insertelement <128 x float> %tmp258, float %tmp123, i32 109
+ %tmp260 = insertelement <128 x float> %tmp259, float %tmp124, i32 110
+ %tmp261 = insertelement <128 x float> %tmp260, float %tmp125, i32 111
+ %tmp262 = insertelement <128 x float> %tmp261, float %tmp126, i32 112
+ %tmp263 = insertelement <128 x float> %tmp262, float %tmp127, i32 113
+ %tmp264 = insertelement <128 x float> %tmp263, float %tmp128, i32 114
+ %tmp265 = insertelement <128 x float> %tmp264, float %tmp129, i32 115
+ %tmp266 = insertelement <128 x float> %tmp265, float %tmp130, i32 116
+ %tmp267 = insertelement <128 x float> %tmp266, float %tmp131, i32 117
+ %tmp268 = insertelement <128 x float> %tmp267, float %tmp132, i32 118
+ %tmp269 = insertelement <128 x float> %tmp268, float %tmp133, i32 119
+ %tmp270 = insertelement <128 x float> %tmp269, float %tmp134, i32 120
+ %tmp271 = insertelement <128 x float> %tmp270, float %tmp135, i32 121
+ %tmp272 = insertelement <128 x float> %tmp271, float %tmp136, i32 122
+ %tmp273 = insertelement <128 x float> %tmp272, float %tmp137, i32 123
+ %tmp274 = insertelement <128 x float> %tmp273, float %tmp138, i32 124
+ %tmp275 = insertelement <128 x float> %tmp274, float %tmp139, i32 125
+ %tmp276 = insertelement <128 x float> %tmp275, float %tmp140, i32 126
+ %tmp277 = insertelement <128 x float> %tmp276, float %tmp141, i32 127
+ %tmp278 = insertelement <128 x float> %tmp277, float %tmp149, i32 %tmp146
+ %tmp279 = extractelement <128 x float> %tmp278, i32 0
+ %tmp280 = extractelement <128 x float> %tmp278, i32 1
+ %tmp281 = extractelement <128 x float> %tmp278, i32 2
+ %tmp282 = extractelement <128 x float> %tmp278, i32 3
+ %tmp283 = extractelement <128 x float> %tmp278, i32 4
+ %tmp284 = extractelement <128 x float> %tmp278, i32 5
+ %tmp285 = extractelement <128 x float> %tmp278, i32 6
+ %tmp286 = extractelement <128 x float> %tmp278, i32 7
+ %tmp287 = extractelement <128 x float> %tmp278, i32 8
+ %tmp288 = extractelement <128 x float> %tmp278, i32 9
+ %tmp289 = extractelement <128 x float> %tmp278, i32 10
+ %tmp290 = extractelement <128 x float> %tmp278, i32 11
+ %tmp291 = extractelement <128 x float> %tmp278, i32 12
+ %tmp292 = extractelement <128 x float> %tmp278, i32 13
+ %tmp293 = extractelement <128 x float> %tmp278, i32 14
+ %tmp294 = extractelement <128 x float> %tmp278, i32 15
+ %tmp295 = extractelement <128 x float> %tmp278, i32 16
+ %tmp296 = extractelement <128 x float> %tmp278, i32 17
+ %tmp297 = extractelement <128 x float> %tmp278, i32 18
+ %tmp298 = extractelement <128 x float> %tmp278, i32 19
+ %tmp299 = extractelement <128 x float> %tmp278, i32 20
+ %tmp300 = extractelement <128 x float> %tmp278, i32 21
+ %tmp301 = extractelement <128 x float> %tmp278, i32 22
+ %tmp302 = extractelement <128 x float> %tmp278, i32 23
+ %tmp303 = extractelement <128 x float> %tmp278, i32 24
+ %tmp304 = extractelement <128 x float> %tmp278, i32 25
+ %tmp305 = extractelement <128 x float> %tmp278, i32 26
+ %tmp306 = extractelement <128 x float> %tmp278, i32 27
+ %tmp307 = extractelement <128 x float> %tmp278, i32 28
+ %tmp308 = extractelement <128 x float> %tmp278, i32 29
+ %tmp309 = extractelement <128 x float> %tmp278, i32 30
+ %tmp310 = extractelement <128 x float> %tmp278, i32 31
+ %tmp311 = extractelement <128 x float> %tmp278, i32 32
+ %tmp312 = extractelement <128 x float> %tmp278, i32 33
+ %tmp313 = extractelement <128 x float> %tmp278, i32 34
+ %tmp314 = extractelement <128 x float> %tmp278, i32 35
+ %tmp315 = extractelement <128 x float> %tmp278, i32 36
+ %tmp316 = extractelement <128 x float> %tmp278, i32 37
+ %tmp317 = extractelement <128 x float> %tmp278, i32 38
+ %tmp318 = extractelement <128 x float> %tmp278, i32 39
+ %tmp319 = extractelement <128 x float> %tmp278, i32 40
+ %tmp320 = extractelement <128 x float> %tmp278, i32 41
+ %tmp321 = extractelement <128 x float> %tmp278, i32 42
+ %tmp322 = extractelement <128 x float> %tmp278, i32 43
+ %tmp323 = extractelement <128 x float> %tmp278, i32 44
+ %tmp324 = extractelement <128 x float> %tmp278, i32 45
+ %tmp325 = extractelement <128 x float> %tmp278, i32 46
+ %tmp326 = extractelement <128 x float> %tmp278, i32 47
+ %tmp327 = extractelement <128 x float> %tmp278, i32 48
+ %tmp328 = extractelement <128 x float> %tmp278, i32 49
+ %tmp329 = extractelement <128 x float> %tmp278, i32 50
+ %tmp330 = extractelement <128 x float> %tmp278, i32 51
+ %tmp331 = extractelement <128 x float> %tmp278, i32 52
+ %tmp332 = extractelement <128 x float> %tmp278, i32 53
+ %tmp333 = extractelement <128 x float> %tmp278, i32 54
+ %tmp334 = extractelement <128 x float> %tmp278, i32 55
+ %tmp335 = extractelement <128 x float> %tmp278, i32 56
+ %tmp336 = extractelement <128 x float> %tmp278, i32 57
+ %tmp337 = extractelement <128 x float> %tmp278, i32 58
+ %tmp338 = extractelement <128 x float> %tmp278, i32 59
+ %tmp339 = extractelement <128 x float> %tmp278, i32 60
+ %tmp340 = extractelement <128 x float> %tmp278, i32 61
+ %tmp341 = extractelement <128 x float> %tmp278, i32 62
+ %tmp342 = extractelement <128 x float> %tmp278, i32 63
+ %tmp343 = extractelement <128 x float> %tmp278, i32 64
+ %tmp344 = extractelement <128 x float> %tmp278, i32 65
+ %tmp345 = extractelement <128 x float> %tmp278, i32 66
+ %tmp346 = extractelement <128 x float> %tmp278, i32 67
+ %tmp347 = extractelement <128 x float> %tmp278, i32 68
+ %tmp348 = extractelement <128 x float> %tmp278, i32 69
+ %tmp349 = extractelement <128 x float> %tmp278, i32 70
+ %tmp350 = extractelement <128 x float> %tmp278, i32 71
+ %tmp351 = extractelement <128 x float> %tmp278, i32 72
+ %tmp352 = extractelement <128 x float> %tmp278, i32 73
+ %tmp353 = extractelement <128 x float> %tmp278, i32 74
+ %tmp354 = extractelement <128 x float> %tmp278, i32 75
+ %tmp355 = extractelement <128 x float> %tmp278, i32 76
+ %tmp356 = extractelement <128 x float> %tmp278, i32 77
+ %tmp357 = extractelement <128 x float> %tmp278, i32 78
+ %tmp358 = extractelement <128 x float> %tmp278, i32 79
+ %tmp359 = extractelement <128 x float> %tmp278, i32 80
+ %tmp360 = extractelement <128 x float> %tmp278, i32 81
+ %tmp361 = extractelement <128 x float> %tmp278, i32 82
+ %tmp362 = extractelement <128 x float> %tmp278, i32 83
+ %tmp363 = extractelement <128 x float> %tmp278, i32 84
+ %tmp364 = extractelement <128 x float> %tmp278, i32 85
+ %tmp365 = extractelement <128 x float> %tmp278, i32 86
+ %tmp366 = extractelement <128 x float> %tmp278, i32 87
+ %tmp367 = extractelement <128 x float> %tmp278, i32 88
+ %tmp368 = extractelement <128 x float> %tmp278, i32 89
+ %tmp369 = extractelement <128 x float> %tmp278, i32 90
+ %tmp370 = extractelement <128 x float> %tmp278, i32 91
+ %tmp371 = extractelement <128 x float> %tmp278, i32 92
+ %tmp372 = extractelement <128 x float> %tmp278, i32 93
+ %tmp373 = extractelement <128 x float> %tmp278, i32 94
+ %tmp374 = extractelement <128 x float> %tmp278, i32 95
+ %tmp375 = extractelement <128 x float> %tmp278, i32 96
+ %tmp376 = extractelement <128 x float> %tmp278, i32 97
+ %tmp377 = extractelement <128 x float> %tmp278, i32 98
+ %tmp378 = extractelement <128 x float> %tmp278, i32 99
+ %tmp379 = extractelement <128 x float> %tmp278, i32 100
+ %tmp380 = extractelement <128 x float> %tmp278, i32 101
+ %tmp381 = extractelement <128 x float> %tmp278, i32 102
+ %tmp382 = extractelement <128 x float> %tmp278, i32 103
+ %tmp383 = extractelement <128 x float> %tmp278, i32 104
+ %tmp384 = extractelement <128 x float> %tmp278, i32 105
+ %tmp385 = extractelement <128 x float> %tmp278, i32 106
+ %tmp386 = extractelement <128 x float> %tmp278, i32 107
+ %tmp387 = extractelement <128 x float> %tmp278, i32 108
+ %tmp388 = extractelement <128 x float> %tmp278, i32 109
+ %tmp389 = extractelement <128 x float> %tmp278, i32 110
+ %tmp390 = extractelement <128 x float> %tmp278, i32 111
+ %tmp391 = extractelement <128 x float> %tmp278, i32 112
+ %tmp392 = extractelement <128 x float> %tmp278, i32 113
+ %tmp393 = extractelement <128 x float> %tmp278, i32 114
+ %tmp394 = extractelement <128 x float> %tmp278, i32 115
+ %tmp395 = extractelement <128 x float> %tmp278, i32 116
+ %tmp396 = extractelement <128 x float> %tmp278, i32 117
+ %tmp397 = extractelement <128 x float> %tmp278, i32 118
+ %tmp398 = extractelement <128 x float> %tmp278, i32 119
+ %tmp399 = extractelement <128 x float> %tmp278, i32 120
+ %tmp400 = extractelement <128 x float> %tmp278, i32 121
+ %tmp401 = extractelement <128 x float> %tmp278, i32 122
+ %tmp402 = extractelement <128 x float> %tmp278, i32 123
+ %tmp403 = extractelement <128 x float> %tmp278, i32 124
+ %tmp404 = extractelement <128 x float> %tmp278, i32 125
+ %tmp405 = extractelement <128 x float> %tmp278, i32 126
+ %tmp406 = extractelement <128 x float> %tmp278, i32 127
+ %tmp407 = bitcast float %tmp95 to i32
+ %tmp408 = add i32 %tmp407, 1
+ %tmp409 = bitcast i32 %tmp408 to float
+ br label %bb12
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
new file mode 100644
index 000000000000..16abb89bb0b8
--- /dev/null
+++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -0,0 +1,494 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+; This ends up using all 255 registers and requires register
+; scavenging which will fail to find an unsued register.
+
+; Check the ScratchSize to avoid regressions from spilling
+; intermediate register class copies.
+
+; FIXME: The same register is initialized to 0 for every spill.
+
+; GCN-LABEL: {{^}}main:
+
+; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s14, -1
+; SI-NEXT: s_mov_b32 s15, 0x80f000
+; VI-NEXT: s_mov_b32 s15, 0x800000
+
+; s12 is offset user SGPR
+; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s11 offset:{{[0-9]+}} ; 4-byte Folded Spill
+
+; GCN: NumVgprs: 256
+; GCN: ScratchSize: 1024
+
+define void @main([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
+bb:
+ %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i64 0, i64 0
+ %tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0
+ %tmp12 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 0)
+ %tmp13 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 16)
+ %tmp14 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 32)
+ %tmp15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 0
+ %tmp16 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp15, align 16, !tbaa !0
+ %tmp17 = add i32 %arg5, %arg7
+ %tmp18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp16, i32 0, i32 %tmp17)
+ %tmp19 = extractelement <4 x float> %tmp18, i32 0
+ %tmp20 = extractelement <4 x float> %tmp18, i32 1
+ %tmp21 = extractelement <4 x float> %tmp18, i32 2
+ %tmp22 = extractelement <4 x float> %tmp18, i32 3
+ %tmp23 = bitcast float %tmp14 to i32
+ br label %bb24
+
+bb24: ; preds = %bb157, %bb
+ %tmp25 = phi float [ 0.000000e+00, %bb ], [ %tmp350, %bb157 ]
+ %tmp26 = phi float [ 0.000000e+00, %bb ], [ %tmp349, %bb157 ]
+ %tmp27 = phi float [ 0.000000e+00, %bb ], [ %tmp348, %bb157 ]
+ %tmp28 = phi float [ 0.000000e+00, %bb ], [ %tmp351, %bb157 ]
+ %tmp29 = phi float [ 0.000000e+00, %bb ], [ %tmp347, %bb157 ]
+ %tmp30 = phi float [ 0.000000e+00, %bb ], [ %tmp346, %bb157 ]
+ %tmp31 = phi float [ 0.000000e+00, %bb ], [ %tmp345, %bb157 ]
+ %tmp32 = phi float [ 0.000000e+00, %bb ], [ %tmp352, %bb157 ]
+ %tmp33 = phi float [ 0.000000e+00, %bb ], [ %tmp344, %bb157 ]
+ %tmp34 = phi float [ 0.000000e+00, %bb ], [ %tmp343, %bb157 ]
+ %tmp35 = phi float [ 0.000000e+00, %bb ], [ %tmp342, %bb157 ]
+ %tmp36 = phi float [ 0.000000e+00, %bb ], [ %tmp353, %bb157 ]
+ %tmp37 = phi float [ 0.000000e+00, %bb ], [ %tmp341, %bb157 ]
+ %tmp38 = phi float [ 0.000000e+00, %bb ], [ %tmp340, %bb157 ]
+ %tmp39 = phi float [ 0.000000e+00, %bb ], [ %tmp339, %bb157 ]
+ %tmp40 = phi float [ 0.000000e+00, %bb ], [ %tmp354, %bb157 ]
+ %tmp41 = phi float [ 0.000000e+00, %bb ], [ %tmp338, %bb157 ]
+ %tmp42 = phi float [ 0.000000e+00, %bb ], [ %tmp337, %bb157 ]
+ %tmp43 = phi float [ 0.000000e+00, %bb ], [ %tmp336, %bb157 ]
+ %tmp44 = phi float [ 0.000000e+00, %bb ], [ %tmp355, %bb157 ]
+ %tmp45 = phi float [ 0.000000e+00, %bb ], [ %tmp335, %bb157 ]
+ %tmp46 = phi float [ 0.000000e+00, %bb ], [ %tmp334, %bb157 ]
+ %tmp47 = phi float [ 0.000000e+00, %bb ], [ %tmp333, %bb157 ]
+ %tmp48 = phi float [ 0.000000e+00, %bb ], [ %tmp356, %bb157 ]
+ %tmp49 = phi float [ 0.000000e+00, %bb ], [ %tmp332, %bb157 ]
+ %tmp50 = phi float [ 0.000000e+00, %bb ], [ %tmp331, %bb157 ]
+ %tmp51 = phi float [ 0.000000e+00, %bb ], [ %tmp330, %bb157 ]
+ %tmp52 = phi float [ 0.000000e+00, %bb ], [ %tmp357, %bb157 ]
+ %tmp53 = phi float [ 0.000000e+00, %bb ], [ %tmp329, %bb157 ]
+ %tmp54 = phi float [ 0.000000e+00, %bb ], [ %tmp328, %bb157 ]
+ %tmp55 = phi float [ 0.000000e+00, %bb ], [ %tmp327, %bb157 ]
+ %tmp56 = phi float [ 0.000000e+00, %bb ], [ %tmp358, %bb157 ]
+ %tmp57 = phi float [ 0.000000e+00, %bb ], [ %tmp326, %bb157 ]
+ %tmp58 = phi float [ 0.000000e+00, %bb ], [ %tmp325, %bb157 ]
+ %tmp59 = phi float [ 0.000000e+00, %bb ], [ %tmp324, %bb157 ]
+ %tmp60 = phi float [ 0.000000e+00, %bb ], [ %tmp359, %bb157 ]
+ %tmp61 = phi float [ 0.000000e+00, %bb ], [ %tmp323, %bb157 ]
+ %tmp62 = phi float [ 0.000000e+00, %bb ], [ %tmp322, %bb157 ]
+ %tmp63 = phi float [ 0.000000e+00, %bb ], [ %tmp321, %bb157 ]
+ %tmp64 = phi float [ 0.000000e+00, %bb ], [ %tmp360, %bb157 ]
+ %tmp65 = phi float [ 0.000000e+00, %bb ], [ %tmp320, %bb157 ]
+ %tmp66 = phi float [ 0.000000e+00, %bb ], [ %tmp319, %bb157 ]
+ %tmp67 = phi float [ 0.000000e+00, %bb ], [ %tmp318, %bb157 ]
+ %tmp68 = phi float [ 0.000000e+00, %bb ], [ %tmp361, %bb157 ]
+ %tmp69 = phi float [ 0.000000e+00, %bb ], [ %tmp317, %bb157 ]
+ %tmp70 = phi float [ 0.000000e+00, %bb ], [ %tmp316, %bb157 ]
+ %tmp71 = phi float [ 0.000000e+00, %bb ], [ %tmp315, %bb157 ]
+ %tmp72 = phi float [ 0.000000e+00, %bb ], [ %tmp362, %bb157 ]
+ %tmp73 = phi float [ 0.000000e+00, %bb ], [ %tmp314, %bb157 ]
+ %tmp74 = phi float [ 0.000000e+00, %bb ], [ %tmp313, %bb157 ]
+ %tmp75 = phi float [ 0.000000e+00, %bb ], [ %tmp312, %bb157 ]
+ %tmp76 = phi float [ 0.000000e+00, %bb ], [ %tmp363, %bb157 ]
+ %tmp77 = phi float [ 0.000000e+00, %bb ], [ %tmp311, %bb157 ]
+ %tmp78 = phi float [ 0.000000e+00, %bb ], [ %tmp310, %bb157 ]
+ %tmp79 = phi float [ 0.000000e+00, %bb ], [ %tmp309, %bb157 ]
+ %tmp80 = phi float [ 0.000000e+00, %bb ], [ %tmp364, %bb157 ]
+ %tmp81 = phi float [ 0.000000e+00, %bb ], [ %tmp308, %bb157 ]
+ %tmp82 = phi float [ 0.000000e+00, %bb ], [ %tmp307, %bb157 ]
+ %tmp83 = phi float [ 0.000000e+00, %bb ], [ %tmp306, %bb157 ]
+ %tmp84 = phi float [ 0.000000e+00, %bb ], [ %tmp365, %bb157 ]
+ %tmp85 = phi float [ 0.000000e+00, %bb ], [ %tmp305, %bb157 ]
+ %tmp86 = phi float [ 0.000000e+00, %bb ], [ %tmp304, %bb157 ]
+ %tmp87 = phi float [ 0.000000e+00, %bb ], [ %tmp303, %bb157 ]
+ %tmp88 = phi float [ 0.000000e+00, %bb ], [ %tmp366, %bb157 ]
+ %tmp89 = phi float [ 0.000000e+00, %bb ], [ %tmp302, %bb157 ]
+ %tmp90 = phi float [ 0.000000e+00, %bb ], [ %tmp301, %bb157 ]
+ %tmp91 = phi float [ 0.000000e+00, %bb ], [ %tmp300, %bb157 ]
+ %tmp92 = phi float [ 0.000000e+00, %bb ], [ %tmp367, %bb157 ]
+ %tmp93 = phi float [ 0.000000e+00, %bb ], [ %tmp299, %bb157 ]
+ %tmp94 = phi float [ 0.000000e+00, %bb ], [ %tmp298, %bb157 ]
+ %tmp95 = phi float [ 0.000000e+00, %bb ], [ %tmp297, %bb157 ]
+ %tmp96 = phi float [ 0.000000e+00, %bb ], [ %tmp368, %bb157 ]
+ %tmp97 = phi float [ 0.000000e+00, %bb ], [ %tmp296, %bb157 ]
+ %tmp98 = phi float [ 0.000000e+00, %bb ], [ %tmp295, %bb157 ]
+ %tmp99 = phi float [ 0.000000e+00, %bb ], [ %tmp294, %bb157 ]
+ %tmp100 = phi float [ 0.000000e+00, %bb ], [ %tmp369, %bb157 ]
+ %tmp101 = phi float [ 0.000000e+00, %bb ], [ %tmp293, %bb157 ]
+ %tmp102 = phi float [ 0.000000e+00, %bb ], [ %tmp292, %bb157 ]
+ %tmp103 = phi float [ 0.000000e+00, %bb ], [ %tmp291, %bb157 ]
+ %tmp104 = phi float [ 0.000000e+00, %bb ], [ %tmp370, %bb157 ]
+ %tmp105 = phi float [ 0.000000e+00, %bb ], [ %tmp371, %bb157 ]
+ %tmp106 = phi float [ 0.000000e+00, %bb ], [ %tmp372, %bb157 ]
+ %tmp107 = phi float [ 0.000000e+00, %bb ], [ %tmp421, %bb157 ]
+ %tmp108 = phi float [ 0.000000e+00, %bb ], [ %tmp373, %bb157 ]
+ %tmp109 = phi float [ 0.000000e+00, %bb ], [ %tmp374, %bb157 ]
+ %tmp110 = phi float [ 0.000000e+00, %bb ], [ %tmp375, %bb157 ]
+ %tmp111 = phi float [ 0.000000e+00, %bb ], [ %tmp376, %bb157 ]
+ %tmp112 = phi float [ 0.000000e+00, %bb ], [ %tmp377, %bb157 ]
+ %tmp113 = phi float [ 0.000000e+00, %bb ], [ %tmp378, %bb157 ]
+ %tmp114 = phi float [ 0.000000e+00, %bb ], [ %tmp379, %bb157 ]
+ %tmp115 = phi float [ 0.000000e+00, %bb ], [ %tmp380, %bb157 ]
+ %tmp116 = phi float [ 0.000000e+00, %bb ], [ %tmp381, %bb157 ]
+ %tmp117 = phi float [ 0.000000e+00, %bb ], [ %tmp382, %bb157 ]
+ %tmp118 = phi float [ 0.000000e+00, %bb ], [ %tmp383, %bb157 ]
+ %tmp119 = phi float [ 0.000000e+00, %bb ], [ %tmp384, %bb157 ]
+ %tmp120 = phi float [ 0.000000e+00, %bb ], [ %tmp385, %bb157 ]
+ %tmp121 = phi float [ 0.000000e+00, %bb ], [ %tmp386, %bb157 ]
+ %tmp122 = phi float [ 0.000000e+00, %bb ], [ %tmp387, %bb157 ]
+ %tmp123 = phi float [ 0.000000e+00, %bb ], [ %tmp388, %bb157 ]
+ %tmp124 = phi float [ 0.000000e+00, %bb ], [ %tmp389, %bb157 ]
+ %tmp125 = phi float [ 0.000000e+00, %bb ], [ %tmp390, %bb157 ]
+ %tmp126 = phi float [ 0.000000e+00, %bb ], [ %tmp391, %bb157 ]
+ %tmp127 = phi float [ 0.000000e+00, %bb ], [ %tmp392, %bb157 ]
+ %tmp128 = phi float [ 0.000000e+00, %bb ], [ %tmp393, %bb157 ]
+ %tmp129 = phi float [ 0.000000e+00, %bb ], [ %tmp394, %bb157 ]
+ %tmp130 = phi float [ 0.000000e+00, %bb ], [ %tmp395, %bb157 ]
+ %tmp131 = phi float [ 0.000000e+00, %bb ], [ %tmp396, %bb157 ]
+ %tmp132 = phi float [ 0.000000e+00, %bb ], [ %tmp397, %bb157 ]
+ %tmp133 = phi float [ 0.000000e+00, %bb ], [ %tmp398, %bb157 ]
+ %tmp134 = phi float [ 0.000000e+00, %bb ], [ %tmp399, %bb157 ]
+ %tmp135 = phi float [ 0.000000e+00, %bb ], [ %tmp400, %bb157 ]
+ %tmp136 = phi float [ 0.000000e+00, %bb ], [ %tmp401, %bb157 ]
+ %tmp137 = phi float [ 0.000000e+00, %bb ], [ %tmp402, %bb157 ]
+ %tmp138 = phi float [ 0.000000e+00, %bb ], [ %tmp403, %bb157 ]
+ %tmp139 = phi float [ 0.000000e+00, %bb ], [ %tmp404, %bb157 ]
+ %tmp140 = phi float [ 0.000000e+00, %bb ], [ %tmp405, %bb157 ]
+ %tmp141 = phi float [ 0.000000e+00, %bb ], [ %tmp406, %bb157 ]
+ %tmp142 = phi float [ 0.000000e+00, %bb ], [ %tmp407, %bb157 ]
+ %tmp143 = phi float [ 0.000000e+00, %bb ], [ %tmp408, %bb157 ]
+ %tmp144 = phi float [ 0.000000e+00, %bb ], [ %tmp409, %bb157 ]
+ %tmp145 = phi float [ 0.000000e+00, %bb ], [ %tmp410, %bb157 ]
+ %tmp146 = phi float [ 0.000000e+00, %bb ], [ %tmp411, %bb157 ]
+ %tmp147 = phi float [ 0.000000e+00, %bb ], [ %tmp412, %bb157 ]
+ %tmp148 = phi float [ 0.000000e+00, %bb ], [ %tmp413, %bb157 ]
+ %tmp149 = phi float [ 0.000000e+00, %bb ], [ %tmp414, %bb157 ]
+ %tmp150 = phi float [ 0.000000e+00, %bb ], [ %tmp415, %bb157 ]
+ %tmp151 = phi float [ 0.000000e+00, %bb ], [ %tmp416, %bb157 ]
+ %tmp152 = phi float [ 0.000000e+00, %bb ], [ %tmp417, %bb157 ]
+ %tmp153 = phi float [ 0.000000e+00, %bb ], [ %tmp418, %bb157 ]
+ %tmp154 = bitcast float %tmp107 to i32
+ %tmp155 = icmp sgt i32 %tmp154, 125
+ br i1 %tmp155, label %bb156, label %bb157
+
+bb156: ; preds = %bb24
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %tmp12, float %tmp103, float %tmp102, float %tmp101)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %tmp99, float %tmp98, float %tmp97, float %tmp95)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %tmp94, float %tmp93, float %tmp91, float %tmp90)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %tmp89, float %tmp87, float %tmp86, float %tmp85)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %tmp83, float %tmp82, float %tmp81, float %tmp79)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %tmp78, float %tmp77, float %tmp75, float %tmp74)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %tmp73, float %tmp71, float %tmp70, float %tmp69)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %tmp67, float %tmp66, float %tmp65, float %tmp63)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %tmp62, float %tmp61, float %tmp59, float %tmp58)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 41, i32 0, float %tmp57, float %tmp55, float %tmp54, float %tmp53)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 42, i32 0, float %tmp51, float %tmp50, float %tmp49, float %tmp47)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 43, i32 0, float %tmp46, float %tmp45, float %tmp43, float %tmp42)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 44, i32 0, float %tmp41, float %tmp39, float %tmp38, float %tmp37)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 45, i32 0, float %tmp35, float %tmp34, float %tmp33, float %tmp31)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 46, i32 0, float %tmp30, float %tmp29, float %tmp27, float %tmp26)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 47, i32 0, float %tmp25, float %tmp28, float %tmp32, float %tmp36)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 48, i32 0, float %tmp40, float %tmp44, float %tmp48, float %tmp52)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 49, i32 0, float %tmp56, float %tmp60, float %tmp64, float %tmp68)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 50, i32 0, float %tmp72, float %tmp76, float %tmp80, float %tmp84)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 51, i32 0, float %tmp88, float %tmp92, float %tmp96, float %tmp100)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 52, i32 0, float %tmp104, float %tmp105, float %tmp106, float %tmp108)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 53, i32 0, float %tmp109, float %tmp110, float %tmp111, float %tmp112)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 54, i32 0, float %tmp113, float %tmp114, float %tmp115, float %tmp116)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 55, i32 0, float %tmp117, float %tmp118, float %tmp119, float %tmp120)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 56, i32 0, float %tmp121, float %tmp122, float %tmp123, float %tmp124)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 57, i32 0, float %tmp125, float %tmp126, float %tmp127, float %tmp128)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 58, i32 0, float %tmp129, float %tmp130, float %tmp131, float %tmp132)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 59, i32 0, float %tmp133, float %tmp134, float %tmp135, float %tmp136)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 60, i32 0, float %tmp137, float %tmp138, float %tmp139, float %tmp140)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 61, i32 0, float %tmp141, float %tmp142, float %tmp143, float %tmp144)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 62, i32 0, float %tmp145, float %tmp146, float %tmp147, float %tmp148)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 63, i32 0, float %tmp149, float %tmp150, float %tmp151, float %tmp13)
+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp19, float %tmp20, float %tmp21, float %tmp22)
+ ret void
+
+bb157: ; preds = %bb24
+ %tmp158 = bitcast float %tmp107 to i32
+ %tmp159 = bitcast float %tmp107 to i32
+ %tmp160 = add i32 %tmp23, %tmp159
+ %tmp161 = bitcast i32 %tmp160 to float
+ %tmp162 = insertelement <128 x float> undef, float %tmp103, i32 0
+ %tmp163 = insertelement <128 x float> %tmp162, float %tmp102, i32 1
+ %tmp164 = insertelement <128 x float> %tmp163, float %tmp101, i32 2
+ %tmp165 = insertelement <128 x float> %tmp164, float %tmp99, i32 3
+ %tmp166 = insertelement <128 x float> %tmp165, float %tmp98, i32 4
+ %tmp167 = insertelement <128 x float> %tmp166, float %tmp97, i32 5
+ %tmp168 = insertelement <128 x float> %tmp167, float %tmp95, i32 6
+ %tmp169 = insertelement <128 x float> %tmp168, float %tmp94, i32 7
+ %tmp170 = insertelement <128 x float> %tmp169, float %tmp93, i32 8
+ %tmp171 = insertelement <128 x float> %tmp170, float %tmp91, i32 9
+ %tmp172 = insertelement <128 x float> %tmp171, float %tmp90, i32 10
+ %tmp173 = insertelement <128 x float> %tmp172, float %tmp89, i32 11
+ %tmp174 = insertelement <128 x float> %tmp173, float %tmp87, i32 12
+ %tmp175 = insertelement <128 x float> %tmp174, float %tmp86, i32 13
+ %tmp176 = insertelement <128 x float> %tmp175, float %tmp85, i32 14
+ %tmp177 = insertelement <128 x float> %tmp176, float %tmp83, i32 15
+ %tmp178 = insertelement <128 x float> %tmp177, float %tmp82, i32 16
+ %tmp179 = insertelement <128 x float> %tmp178, float %tmp81, i32 17
+ %tmp180 = insertelement <128 x float> %tmp179, float %tmp79, i32 18
+ %tmp181 = insertelement <128 x float> %tmp180, float %tmp78, i32 19
+ %tmp182 = insertelement <128 x float> %tmp181, float %tmp77, i32 20
+ %tmp183 = insertelement <128 x float> %tmp182, float %tmp75, i32 21
+ %tmp184 = insertelement <128 x float> %tmp183, float %tmp74, i32 22
+ %tmp185 = insertelement <128 x float> %tmp184, float %tmp73, i32 23
+ %tmp186 = insertelement <128 x float> %tmp185, float %tmp71, i32 24
+ %tmp187 = insertelement <128 x float> %tmp186, float %tmp70, i32 25
+ %tmp188 = insertelement <128 x float> %tmp187, float %tmp69, i32 26
+ %tmp189 = insertelement <128 x float> %tmp188, float %tmp67, i32 27
+ %tmp190 = insertelement <128 x float> %tmp189, float %tmp66, i32 28
+ %tmp191 = insertelement <128 x float> %tmp190, float %tmp65, i32 29
+ %tmp192 = insertelement <128 x float> %tmp191, float %tmp63, i32 30
+ %tmp193 = insertelement <128 x float> %tmp192, float %tmp62, i32 31
+ %tmp194 = insertelement <128 x float> %tmp193, float %tmp61, i32 32
+ %tmp195 = insertelement <128 x float> %tmp194, float %tmp59, i32 33
+ %tmp196 = insertelement <128 x float> %tmp195, float %tmp58, i32 34
+ %tmp197 = insertelement <128 x float> %tmp196, float %tmp57, i32 35
+ %tmp198 = insertelement <128 x float> %tmp197, float %tmp55, i32 36
+ %tmp199 = insertelement <128 x float> %tmp198, float %tmp54, i32 37
+ %tmp200 = insertelement <128 x float> %tmp199, float %tmp53, i32 38
+ %tmp201 = insertelement <128 x float> %tmp200, float %tmp51, i32 39
+ %tmp202 = insertelement <128 x float> %tmp201, float %tmp50, i32 40
+ %tmp203 = insertelement <128 x float> %tmp202, float %tmp49, i32 41
+ %tmp204 = insertelement <128 x float> %tmp203, float %tmp47, i32 42
+ %tmp205 = insertelement <128 x float> %tmp204, float %tmp46, i32 43
+ %tmp206 = insertelement <128 x float> %tmp205, float %tmp45, i32 44
+ %tmp207 = insertelement <128 x float> %tmp206, float %tmp43, i32 45
+ %tmp208 = insertelement <128 x float> %tmp207, float %tmp42, i32 46
+ %tmp209 = insertelement <128 x float> %tmp208, float %tmp41, i32 47
+ %tmp210 = insertelement <128 x float> %tmp209, float %tmp39, i32 48
+ %tmp211 = insertelement <128 x float> %tmp210, float %tmp38, i32 49
+ %tmp212 = insertelement <128 x float> %tmp211, float %tmp37, i32 50
+ %tmp213 = insertelement <128 x float> %tmp212, float %tmp35, i32 51
+ %tmp214 = insertelement <128 x float> %tmp213, float %tmp34, i32 52
+ %tmp215 = insertelement <128 x float> %tmp214, float %tmp33, i32 53
+ %tmp216 = insertelement <128 x float> %tmp215, float %tmp31, i32 54
+ %tmp217 = insertelement <128 x float> %tmp216, float %tmp30, i32 55
+ %tmp218 = insertelement <128 x float> %tmp217, float %tmp29, i32 56
+ %tmp219 = insertelement <128 x float> %tmp218, float %tmp27, i32 57
+ %tmp220 = insertelement <128 x float> %tmp219, float %tmp26, i32 58
+ %tmp221 = insertelement <128 x float> %tmp220, float %tmp25, i32 59
+ %tmp222 = insertelement <128 x float> %tmp221, float %tmp28, i32 60
+ %tmp223 = insertelement <128 x float> %tmp222, float %tmp32, i32 61
+ %tmp224 = insertelement <128 x float> %tmp223, float %tmp36, i32 62
+ %tmp225 = insertelement <128 x float> %tmp224, float %tmp40, i32 63
+ %tmp226 = insertelement <128 x float> %tmp225, float %tmp44, i32 64
+ %tmp227 = insertelement <128 x float> %tmp226, float %tmp48, i32 65
+ %tmp228 = insertelement <128 x float> %tmp227, float %tmp52, i32 66
+ %tmp229 = insertelement <128 x float> %tmp228, float %tmp56, i32 67
+ %tmp230 = insertelement <128 x float> %tmp229, float %tmp60, i32 68
+ %tmp231 = insertelement <128 x float> %tmp230, float %tmp64, i32 69
+ %tmp232 = insertelement <128 x float> %tmp231, float %tmp68, i32 70
+ %tmp233 = insertelement <128 x float> %tmp232, float %tmp72, i32 71
+ %tmp234 = insertelement <128 x float> %tmp233, float %tmp76, i32 72
+ %tmp235 = insertelement <128 x float> %tmp234, float %tmp80, i32 73
+ %tmp236 = insertelement <128 x float> %tmp235, float %tmp84, i32 74
+ %tmp237 = insertelement <128 x float> %tmp236, float %tmp88, i32 75
+ %tmp238 = insertelement <128 x float> %tmp237, float %tmp92, i32 76
+ %tmp239 = insertelement <128 x float> %tmp238, float %tmp96, i32 77
+ %tmp240 = insertelement <128 x float> %tmp239, float %tmp100, i32 78
+ %tmp241 = insertelement <128 x float> %tmp240, float %tmp104, i32 79
+ %tmp242 = insertelement <128 x float> %tmp241, float %tmp105, i32 80
+ %tmp243 = insertelement <128 x float> %tmp242, float %tmp106, i32 81
+ %tmp244 = insertelement <128 x float> %tmp243, float %tmp108, i32 82
+ %tmp245 = insertelement <128 x float> %tmp244, float %tmp109, i32 83
+ %tmp246 = insertelement <128 x float> %tmp245, float %tmp110, i32 84
+ %tmp247 = insertelement <128 x float> %tmp246, float %tmp111, i32 85
+ %tmp248 = insertelement <128 x float> %tmp247, float %tmp112, i32 86
+ %tmp249 = insertelement <128 x float> %tmp248, float %tmp113, i32 87
+ %tmp250 = insertelement <128 x float> %tmp249, float %tmp114, i32 88
+ %tmp251 = insertelement <128 x float> %tmp250, float %tmp115, i32 89
+ %tmp252 = insertelement <128 x float> %tmp251, float %tmp116, i32 90
+ %tmp253 = insertelement <128 x float> %tmp252, float %tmp117, i32 91
+ %tmp254 = insertelement <128 x float> %tmp253, float %tmp118, i32 92
+ %tmp255 = insertelement <128 x float> %tmp254, float %tmp119, i32 93
+ %tmp256 = insertelement <128 x float> %tmp255, float %tmp120, i32 94
+ %tmp257 = insertelement <128 x float> %tmp256, float %tmp121, i32 95
+ %tmp258 = insertelement <128 x float> %tmp257, float %tmp122, i32 96
+ %tmp259 = insertelement <128 x float> %tmp258, float %tmp123, i32 97
+ %tmp260 = insertelement <128 x float> %tmp259, float %tmp124, i32 98
+ %tmp261 = insertelement <128 x float> %tmp260, float %tmp125, i32 99
+ %tmp262 = insertelement <128 x float> %tmp261, float %tmp126, i32 100
+ %tmp263 = insertelement <128 x float> %tmp262, float %tmp127, i32 101
+ %tmp264 = insertelement <128 x float> %tmp263, float %tmp128, i32 102
+ %tmp265 = insertelement <128 x float> %tmp264, float %tmp129, i32 103
+ %tmp266 = insertelement <128 x float> %tmp265, float %tmp130, i32 104
+ %tmp267 = insertelement <128 x float> %tmp266, float %tmp131, i32 105
+ %tmp268 = insertelement <128 x float> %tmp267, float %tmp132, i32 106
+ %tmp269 = insertelement <128 x float> %tmp268, float %tmp133, i32 107
+ %tmp270 = insertelement <128 x float> %tmp269, float %tmp134, i32 108
+ %tmp271 = insertelement <128 x float> %tmp270, float %tmp135, i32 109
+ %tmp272 = insertelement <128 x float> %tmp271, float %tmp136, i32 110
+ %tmp273 = insertelement <128 x float> %tmp272, float %tmp137, i32 111
+ %tmp274 = insertelement <128 x float> %tmp273, float %tmp138, i32 112
+ %tmp275 = insertelement <128 x float> %tmp274, float %tmp139, i32 113
+ %tmp276 = insertelement <128 x float> %tmp275, float %tmp140, i32 114
+ %tmp277 = insertelement <128 x float> %tmp276, float %tmp141, i32 115
+ %tmp278 = insertelement <128 x float> %tmp277, float %tmp142, i32 116
+ %tmp279 = insertelement <128 x float> %tmp278, float %tmp143, i32 117
+ %tmp280 = insertelement <128 x float> %tmp279, float %tmp144, i32 118
+ %tmp281 = insertelement <128 x float> %tmp280, float %tmp145, i32 119
+ %tmp282 = insertelement <128 x float> %tmp281, float %tmp146, i32 120
+ %tmp283 = insertelement <128 x float> %tmp282, float %tmp147, i32 121
+ %tmp284 = insertelement <128 x float> %tmp283, float %tmp148, i32 122
+ %tmp285 = insertelement <128 x float> %tmp284, float %tmp149, i32 123
+ %tmp286 = insertelement <128 x float> %tmp285, float %tmp150, i32 124
+ %tmp287 = insertelement <128 x float> %tmp286, float %tmp151, i32 125
+ %tmp288 = insertelement <128 x float> %tmp287, float %tmp152, i32 126
+ %tmp289 = insertelement <128 x float> %tmp288, float %tmp153, i32 127
+ %tmp290 = insertelement <128 x float> %tmp289, float %tmp161, i32 %tmp158
+ %tmp291 = extractelement <128 x float> %tmp290, i32 0
+ %tmp292 = extractelement <128 x float> %tmp290, i32 1
+ %tmp293 = extractelement <128 x float> %tmp290, i32 2
+ %tmp294 = extractelement <128 x float> %tmp290, i32 3
+ %tmp295 = extractelement <128 x float> %tmp290, i32 4
+ %tmp296 = extractelement <128 x float> %tmp290, i32 5
+ %tmp297 = extractelement <128 x float> %tmp290, i32 6
+ %tmp298 = extractelement <128 x float> %tmp290, i32 7
+ %tmp299 = extractelement <128 x float> %tmp290, i32 8
+ %tmp300 = extractelement <128 x float> %tmp290, i32 9
+ %tmp301 = extractelement <128 x float> %tmp290, i32 10
+ %tmp302 = extractelement <128 x float> %tmp290, i32 11
+ %tmp303 = extractelement <128 x float> %tmp290, i32 12
+ %tmp304 = extractelement <128 x float> %tmp290, i32 13
+ %tmp305 = extractelement <128 x float> %tmp290, i32 14
+ %tmp306 = extractelement <128 x float> %tmp290, i32 15
+ %tmp307 = extractelement <128 x float> %tmp290, i32 16
+ %tmp308 = extractelement <128 x float> %tmp290, i32 17
+ %tmp309 = extractelement <128 x float> %tmp290, i32 18
+ %tmp310 = extractelement <128 x float> %tmp290, i32 19
+ %tmp311 = extractelement <128 x float> %tmp290, i32 20
+ %tmp312 = extractelement <128 x float> %tmp290, i32 21
+ %tmp313 = extractelement <128 x float> %tmp290, i32 22
+ %tmp314 = extractelement <128 x float> %tmp290, i32 23
+ %tmp315 = extractelement <128 x float> %tmp290, i32 24
+ %tmp316 = extractelement <128 x float> %tmp290, i32 25
+ %tmp317 = extractelement <128 x float> %tmp290, i32 26
+ %tmp318 = extractelement <128 x float> %tmp290, i32 27
+ %tmp319 = extractelement <128 x float> %tmp290, i32 28
+ %tmp320 = extractelement <128 x float> %tmp290, i32 29
+ %tmp321 = extractelement <128 x float> %tmp290, i32 30
+ %tmp322 = extractelement <128 x float> %tmp290, i32 31
+ %tmp323 = extractelement <128 x float> %tmp290, i32 32
+ %tmp324 = extractelement <128 x float> %tmp290, i32 33
+ %tmp325 = extractelement <128 x float> %tmp290, i32 34
+ %tmp326 = extractelement <128 x float> %tmp290, i32 35
+ %tmp327 = extractelement <128 x float> %tmp290, i32 36
+ %tmp328 = extractelement <128 x float> %tmp290, i32 37
+ %tmp329 = extractelement <128 x float> %tmp290, i32 38
+ %tmp330 = extractelement <128 x float> %tmp290, i32 39
+ %tmp331 = extractelement <128 x float> %tmp290, i32 40
+ %tmp332 = extractelement <128 x float> %tmp290, i32 41
+ %tmp333 = extractelement <128 x float> %tmp290, i32 42
+ %tmp334 = extractelement <128 x float> %tmp290, i32 43
+ %tmp335 = extractelement <128 x float> %tmp290, i32 44
+ %tmp336 = extractelement <128 x float> %tmp290, i32 45
+ %tmp337 = extractelement <128 x float> %tmp290, i32 46
+ %tmp338 = extractelement <128 x float> %tmp290, i32 47
+ %tmp339 = extractelement <128 x float> %tmp290, i32 48
+ %tmp340 = extractelement <128 x float> %tmp290, i32 49
+ %tmp341 = extractelement <128 x float> %tmp290, i32 50
+ %tmp342 = extractelement <128 x float> %tmp290, i32 51
+ %tmp343 = extractelement <128 x float> %tmp290, i32 52
+ %tmp344 = extractelement <128 x float> %tmp290, i32 53
+ %tmp345 = extractelement <128 x float> %tmp290, i32 54
+ %tmp346 = extractelement <128 x float> %tmp290, i32 55
+ %tmp347 = extractelement <128 x float> %tmp290, i32 56
+ %tmp348 = extractelement <128 x float> %tmp290, i32 57
+ %tmp349 = extractelement <128 x float> %tmp290, i32 58
+ %tmp350 = extractelement <128 x float> %tmp290, i32 59
+ %tmp351 = extractelement <128 x float> %tmp290, i32 60
+ %tmp352 = extractelement <128 x float> %tmp290, i32 61
+ %tmp353 = extractelement <128 x float> %tmp290, i32 62
+ %tmp354 = extractelement <128 x float> %tmp290, i32 63
+ %tmp355 = extractelement <128 x float> %tmp290, i32 64
+ %tmp356 = extractelement <128 x float> %tmp290, i32 65
+ %tmp357 = extractelement <128 x float> %tmp290, i32 66
+ %tmp358 = extractelement <128 x float> %tmp290, i32 67
+ %tmp359 = extractelement <128 x float> %tmp290, i32 68
+ %tmp360 = extractelement <128 x float> %tmp290, i32 69
+ %tmp361 = extractelement <128 x float> %tmp290, i32 70
+ %tmp362 = extractelement <128 x float> %tmp290, i32 71
+ %tmp363 = extractelement <128 x float> %tmp290, i32 72
+ %tmp364 = extractelement <128 x float> %tmp290, i32 73
+ %tmp365 = extractelement <128 x float> %tmp290, i32 74
+ %tmp366 = extractelement <128 x float> %tmp290, i32 75
+ %tmp367 = extractelement <128 x float> %tmp290, i32 76
+ %tmp368 = extractelement <128 x float> %tmp290, i32 77
+ %tmp369 = extractelement <128 x float> %tmp290, i32 78
+ %tmp370 = extractelement <128 x float> %tmp290, i32 79
+ %tmp371 = extractelement <128 x float> %tmp290, i32 80
+ %tmp372 = extractelement <128 x float> %tmp290, i32 81
+ %tmp373 = extractelement <128 x float> %tmp290, i32 82
+ %tmp374 = extractelement <128 x float> %tmp290, i32 83
+ %tmp375 = extractelement <128 x float> %tmp290, i32 84
+ %tmp376 = extractelement <128 x float> %tmp290, i32 85
+ %tmp377 = extractelement <128 x float> %tmp290, i32 86
+ %tmp378 = extractelement <128 x float> %tmp290, i32 87
+ %tmp379 = extractelement <128 x float> %tmp290, i32 88
+ %tmp380 = extractelement <128 x float> %tmp290, i32 89
+ %tmp381 = extractelement <128 x float> %tmp290, i32 90
+ %tmp382 = extractelement <128 x float> %tmp290, i32 91
+ %tmp383 = extractelement <128 x float> %tmp290, i32 92
+ %tmp384 = extractelement <128 x float> %tmp290, i32 93
+ %tmp385 = extractelement <128 x float> %tmp290, i32 94
+ %tmp386 = extractelement <128 x float> %tmp290, i32 95
+ %tmp387 = extractelement <128 x float> %tmp290, i32 96
+ %tmp388 = extractelement <128 x float> %tmp290, i32 97
+ %tmp389 = extractelement <128 x float> %tmp290, i32 98
+ %tmp390 = extractelement <128 x float> %tmp290, i32 99
+ %tmp391 = extractelement <128 x float> %tmp290, i32 100
+ %tmp392 = extractelement <128 x float> %tmp290, i32 101
+ %tmp393 = extractelement <128 x float> %tmp290, i32 102
+ %tmp394 = extractelement <128 x float> %tmp290, i32 103
+ %tmp395 = extractelement <128 x float> %tmp290, i32 104
+ %tmp396 = extractelement <128 x float> %tmp290, i32 105
+ %tmp397 = extractelement <128 x float> %tmp290, i32 106
+ %tmp398 = extractelement <128 x float> %tmp290, i32 107
+ %tmp399 = extractelement <128 x float> %tmp290, i32 108
+ %tmp400 = extractelement <128 x float> %tmp290, i32 109
+ %tmp401 = extractelement <128 x float> %tmp290, i32 110
+ %tmp402 = extractelement <128 x float> %tmp290, i32 111
+ %tmp403 = extractelement <128 x float> %tmp290, i32 112
+ %tmp404 = extractelement <128 x float> %tmp290, i32 113
+ %tmp405 = extractelement <128 x float> %tmp290, i32 114
+ %tmp406 = extractelement <128 x float> %tmp290, i32 115
+ %tmp407 = extractelement <128 x float> %tmp290, i32 116
+ %tmp408 = extractelement <128 x float> %tmp290, i32 117
+ %tmp409 = extractelement <128 x float> %tmp290, i32 118
+ %tmp410 = extractelement <128 x float> %tmp290, i32 119
+ %tmp411 = extractelement <128 x float> %tmp290, i32 120
+ %tmp412 = extractelement <128 x float> %tmp290, i32 121
+ %tmp413 = extractelement <128 x float> %tmp290, i32 122
+ %tmp414 = extractelement <128 x float> %tmp290, i32 123
+ %tmp415 = extractelement <128 x float> %tmp290, i32 124
+ %tmp416 = extractelement <128 x float> %tmp290, i32 125
+ %tmp417 = extractelement <128 x float> %tmp290, i32 126
+ %tmp418 = extractelement <128 x float> %tmp290, i32 127
+ %tmp419 = bitcast float %tmp107 to i32
+ %tmp420 = add i32 %tmp419, 1
+ %tmp421 = bitcast i32 %tmp420 to float
+ br label %bb24
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
+attributes #1 = { nounwind readnone }
+
+!0 = !{!1, !1, i64 0, i32 1}
+!1 = !{!"const", null}
diff --git a/test/CodeGen/AMDGPU/vop-shrink.ll b/test/CodeGen/AMDGPU/vop-shrink.ll
index 9b2f229c05af..2bfe1b2bd6ec 100644
--- a/test/CodeGen/AMDGPU/vop-shrink.ll
+++ b/test/CodeGen/AMDGPU/vop-shrink.ll
@@ -3,8 +3,8 @@
; Test that we correctly commute a sub instruction
; FUNC-LABEL: {{^}}sub_rev:
-; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, s
-; SI: v_subrev_i32_e32 v{{[0-9]+}}, s
+; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, vcc, s
+; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, s
; ModuleID = 'vop-shrink.ll'
diff --git a/test/CodeGen/AMDGPU/wait.ll b/test/CodeGen/AMDGPU/wait.ll
index 5cc7577cad33..107e84b33be9 100644
--- a/test/CodeGen/AMDGPU/wait.ll
+++ b/test/CodeGen/AMDGPU/wait.ll
@@ -1,11 +1,16 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace %s
-
-; CHECK-LABEL: {{^}}main:
-; CHECK: s_load_dwordx4
-; CHECK: s_load_dwordx4
-; CHECK: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; CHECK: s_endpgm
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=DEFAULT
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=DEFAULT
+; RUN: llc -march=amdgcn --misched=ilpmax -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=ILPMAX
+; RUN: llc -march=amdgcn --misched=ilpmax -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=ILPMAX
+; The ilpmax scheduler is used for the second test to get the ordering we want for the test.
+
+; DEFAULT-LABEL: {{^}}main:
+; DEFAULT: s_load_dwordx4
+; DEFAULT: s_load_dwordx4
+; DEFAULT: s_waitcnt vmcnt(0)
+; DEFAULT: exp
+; DEFAULT: s_waitcnt lgkmcnt(0)
+; DEFAULT: s_endpgm
define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
main_body:
%tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
@@ -29,7 +34,43 @@ main_body:
ret void
}
-; Function Attrs: noduplicate nounwind
+; ILPMAX-LABEL: {{^}}main2:
+; ILPMAX: s_load_dwordx4
+; ILPMAX: s_waitcnt lgkmcnt(0)
+; ILPMAX: buffer_load
+; ILPMAX: s_load_dwordx4
+; ILPMAX: s_waitcnt lgkmcnt(0)
+; ILPMAX: buffer_load
+; ILPMAX: s_waitcnt vmcnt(1)
+; ILPMAX: s_waitcnt vmcnt(0)
+; ILPMAX: s_endpgm
+
+define void @main2([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)*
+byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
+main_body:
+ %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
+ %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
+ %13 = add i32 %5, %7
+ %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13)
+ %15 = extractelement <4 x float> %14, i32 0
+ %16 = extractelement <4 x float> %14, i32 1
+ %17 = extractelement <4 x float> %14, i32 2
+ %18 = extractelement <4 x float> %14, i32 3
+ %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
+ %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0
+ %21 = add i32 %5, %7
+ %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21)
+ %23 = extractelement <4 x float> %22, i32 0
+ %24 = extractelement <4 x float> %22, i32 1
+ %25 = extractelement <4 x float> %22, i32 2
+ %26 = extractelement <4 x float> %22, i32 3
+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18)
+ call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26)
+ ret void
+}
+
+
+; Function Attrs: convergent nounwind
declare void @llvm.AMDGPU.barrier.global() #1
; Function Attrs: nounwind readnone
@@ -38,7 +79,7 @@ declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" }
-attributes #1 = { noduplicate nounwind }
+attributes #1 = { convergent nounwind }
attributes #2 = { nounwind readnone }
!0 = !{!1, !1, i64 0, i32 1}
diff --git a/test/CodeGen/AMDGPU/work-item-intrinsics.ll b/test/CodeGen/AMDGPU/work-item-intrinsics.ll
index 4328e964c1bf..e7fcd1ff3650 100644
--- a/test/CodeGen/AMDGPU/work-item-intrinsics.ll
+++ b/test/CodeGen/AMDGPU/work-item-intrinsics.ll
@@ -1,15 +1,34 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=HSA -check-prefix=CI-HSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=HSA -check-prefix=VI-HSA -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}ngroups_x:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[0].X
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
+
+; HSA: .amd_kernel_code_t
+
+; HSA: enable_sgpr_private_segment_buffer = 1
+; HSA: enable_sgpr_dispatch_ptr = 0
+; HSA: enable_sgpr_queue_ptr = 0
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: enable_sgpr_dispatch_id = 0
+; HSA: enable_sgpr_flat_scratch_init = 0
+; HSA: enable_sgpr_private_segment_size = 0
+; HSA: enable_sgpr_grid_workgroup_count_x = 0
+; HSA: enable_sgpr_grid_workgroup_count_y = 0
+; HSA: enable_sgpr_grid_workgroup_count_z = 0
+
+; HSA: .end_amd_kernel_code_t
+
+
+; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
-; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
define void @ngroups_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.x() #0
@@ -19,12 +38,12 @@ entry:
; FUNC-LABEL: {{^}}ngroups_y:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[0].Y
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
define void @ngroups_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.y() #0
@@ -34,12 +53,12 @@ entry:
; FUNC-LABEL: {{^}}ngroups_z:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[0].Z
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
define void @ngroups_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.z() #0
@@ -49,12 +68,12 @@ entry:
; FUNC-LABEL: {{^}}global_size_x:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[0].W
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
define void @global_size_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.x() #0
@@ -64,12 +83,12 @@ entry:
; FUNC-LABEL: {{^}}global_size_y:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[1].X
+; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
define void @global_size_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.y() #0
@@ -79,12 +98,12 @@ entry:
; FUNC-LABEL: {{^}}global_size_z:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[1].Y
+; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
define void @global_size_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.z() #0
@@ -92,74 +111,34 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}local_size_x:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[1].Z
-
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
-define void @local_size_x (i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.local.size.x() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}local_size_y:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[1].W
-
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
-define void @local_size_y (i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.local.size.y() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}local_size_z:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[2].X
-
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
-define void @local_size_z (i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.r600.read.local.size.z() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; FUNC-LABEL: {{^}}get_work_dim:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[2].Z
-
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
-define void @get_work_dim (i32 addrspace(1)* %out) {
-entry:
- %0 = call i32 @llvm.AMDGPU.read.workdim() #0
- store i32 %0, i32 addrspace(1)* %out
- ret void
-}
-
-; The tgid values are stored in sgprs offset by the number of user sgprs.
-; Currently we always use exactly 2 user sgprs for the pointer to the
-; kernel arguments, but this may change in the future.
+; The tgid values are stored in sgprs offset by the number of user
+; sgprs.
; FUNC-LABEL: {{^}}tgid_x:
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
-; GCN: buffer_store_dword [[VVAL]]
-define void @tgid_x (i32 addrspace(1)* %out) {
+; HSA: .amd_kernel_code_t
+; HSA: compute_pgm_rsrc2_user_sgpr = 6
+; HSA: compute_pgm_rsrc2_tgid_x_en = 1
+; HSA: compute_pgm_rsrc2_tgid_y_en = 0
+; HSA: compute_pgm_rsrc2_tgid_z_en = 0
+; HSA: compute_pgm_rsrc2_tg_size_en = 0
+; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
+; HSA: enable_sgpr_grid_workgroup_count_x = 0
+; HSA: enable_sgpr_grid_workgroup_count_y = 0
+; HSA: enable_sgpr_grid_workgroup_count_z = 0
+; HSA: .end_amd_kernel_code_t
+
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
+; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6{{$}}
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+; HSA: flat_store_dword [[VVAL]]
+
+; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
+; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
+; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
+define void @tgid_x(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.x() #0
store i32 %0, i32 addrspace(1)* %out
@@ -167,9 +146,26 @@ entry:
}
; FUNC-LABEL: {{^}}tgid_y:
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
-; GCN: buffer_store_dword [[VVAL]]
-define void @tgid_y (i32 addrspace(1)* %out) {
+; HSA: compute_pgm_rsrc2_user_sgpr = 6
+; HSA: compute_pgm_rsrc2_tgid_x_en = 1
+; HSA: compute_pgm_rsrc2_tgid_y_en = 1
+; HSA: compute_pgm_rsrc2_tgid_z_en = 0
+; HSA: compute_pgm_rsrc2_tg_size_en = 0
+; HSA: enable_sgpr_grid_workgroup_count_x = 0
+; HSA: enable_sgpr_grid_workgroup_count_y = 0
+; HSA: enable_sgpr_grid_workgroup_count_z = 0
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
+; GCN-HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+; HSA: flat_store_dword [[VVAL]]
+
+; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
+; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
+; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
+define void @tgid_y(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.y() #0
store i32 %0, i32 addrspace(1)* %out
@@ -177,36 +173,81 @@ entry:
}
; FUNC-LABEL: {{^}}tgid_z:
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
-; GCN: buffer_store_dword [[VVAL]]
-define void @tgid_z (i32 addrspace(1)* %out) {
+; HSA: compute_pgm_rsrc2_user_sgpr = 6
+; HSA: compute_pgm_rsrc2_tgid_x_en = 1
+; HSA: compute_pgm_rsrc2_tgid_y_en = 0
+; HSA: compute_pgm_rsrc2_tgid_z_en = 1
+; HSA: compute_pgm_rsrc2_tg_size_en = 0
+; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
+; HSA: enable_sgpr_private_segment_buffer = 1
+; HSA: enable_sgpr_dispatch_ptr = 0
+; HSA: enable_sgpr_queue_ptr = 0
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: enable_sgpr_dispatch_id = 0
+; HSA: enable_sgpr_flat_scratch_init = 0
+; HSA: enable_sgpr_private_segment_size = 0
+; HSA: enable_sgpr_grid_workgroup_count_x = 0
+; HSA: enable_sgpr_grid_workgroup_count_y = 0
+; HSA: enable_sgpr_grid_workgroup_count_z = 0
+
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
+; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7{{$}}
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+; HSA: flat_store_dword [[VVAL]]
+
+; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
+; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
+; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
+define void @tgid_z(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.z() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
+; GCN-NOHSA: .section .AMDGPU.config
+; GCN-NOHSA: .long 47180
+; GCN-NOHSA-NEXT: .long 132{{$}}
+
; FUNC-LABEL: {{^}}tidig_x:
-; GCN: buffer_store_dword v0
-define void @tidig_x (i32 addrspace(1)* %out) {
+; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
+; GCN-NOHSA: buffer_store_dword v0
+; HSA: flat_store_dword v0
+define void @tidig_x(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
+; GCN-NOHSA: .section .AMDGPU.config
+; GCN-NOHSA: .long 47180
+; GCN-NOHSA-NEXT: .long 2180{{$}}
+
; FUNC-LABEL: {{^}}tidig_y:
-; GCN: buffer_store_dword v1
-define void @tidig_y (i32 addrspace(1)* %out) {
+
+; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1
+; GCN-NOHSA: buffer_store_dword v1
+; HSA: flat_store_dword v1
+define void @tidig_y(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.y() #0
store i32 %0, i32 addrspace(1)* %out
ret void
}
+; GCN-NOHSA: .section .AMDGPU.config
+; GCN-NOHSA: .long 47180
+; GCN-NOHSA-NEXT: .long 4228{{$}}
+
; FUNC-LABEL: {{^}}tidig_z:
-; GCN: buffer_store_dword v2
-define void @tidig_z (i32 addrspace(1)* %out) {
+; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2
+; GCN-NOHSA: buffer_store_dword v2
+; HSA: flat_store_dword v2
+define void @tidig_z(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.z() #0
store i32 %0, i32 addrspace(1)* %out
@@ -221,10 +262,6 @@ declare i32 @llvm.r600.read.global.size.x() #0
declare i32 @llvm.r600.read.global.size.y() #0
declare i32 @llvm.r600.read.global.size.z() #0
-declare i32 @llvm.r600.read.local.size.x() #0
-declare i32 @llvm.r600.read.local.size.y() #0
-declare i32 @llvm.r600.read.local.size.z() #0
-
declare i32 @llvm.r600.read.tgid.x() #0
declare i32 @llvm.r600.read.tgid.y() #0
declare i32 @llvm.r600.read.tgid.z() #0
diff --git a/test/CodeGen/AMDGPU/xor.ll b/test/CodeGen/AMDGPU/xor.ll
index ddb920af29d8..655655d92f08 100644
--- a/test/CodeGen/AMDGPU/xor.ll
+++ b/test/CodeGen/AMDGPU/xor.ll
@@ -38,7 +38,7 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
}
; FUNC-LABEL: {{^}}xor_i1:
-; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
+; EG: XOR_INT {{\** *}}{{T[0-9]+\.[XYZW]}}, {{PS|PV\.[XYZW]}}, {{PS|PV\.[XYZW]}}
; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}}
; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}}
diff --git a/test/CodeGen/AMDGPU/zero_extend.ll b/test/CodeGen/AMDGPU/zero_extend.ll
index 033055db185a..35ddf2b0a465 100644
--- a/test/CodeGen/AMDGPU/zero_extend.ll
+++ b/test/CodeGen/AMDGPU/zero_extend.ll
@@ -7,8 +7,7 @@
; R600: MEM_RAT_CACHELESS STORE_RAW
; SI: {{^}}test:
-; SI: s_mov_b32 [[ZERO:s[0-9]]], 0{{$}}
-; SI: v_mov_b32_e32 v[[V_ZERO:[0-9]]], [[ZERO]]
+; SI: v_mov_b32_e32 v[[V_ZERO:[0-9]]], 0{{$}}
; SI: buffer_store_dwordx2 v[0:[[V_ZERO]]{{\]}}
define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
entry:
diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
index 9c0143be06c3..81a6bb64971d 100644
--- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll
+++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
; RUN: -mattr=+v6 | grep r9
; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
-; RUN: -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats 2>&1 | grep asm-printer
+; RUN: -mattr=+v6,+reserve-r9 -ifcvt-limit=0 -stats 2>&1 | grep asm-printer
; | grep 35
define void @test(i32 %tmp56222, i32 %tmp36224, i32 %tmp46223, i32 %i.0196.0.ph, i32 %tmp8, i32* %tmp1011, i32** %tmp1, i32* %d2.1.out, i32* %d3.1.out, i32* %d0.1.out, i32* %d1.1.out) {
diff --git a/test/CodeGen/ARM/2009-10-16-Scope.ll b/test/CodeGen/ARM/2009-10-16-Scope.ll
index 3f47488372b8..613694f091d1 100644
--- a/test/CodeGen/ARM/2009-10-16-Scope.ll
+++ b/test/CodeGen/ARM/2009-10-16-Scope.ll
@@ -24,9 +24,9 @@ declare i32 @foo(i32) ssp
!0 = !DILocation(line: 5, column: 2, scope: !1)
!1 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !2)
-!2 = !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3)
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !8, retainedTypes: !9)
-!4 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "count_", line: 5, scope: !5, file: !3, type: !6)
+!2 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !8, retainedTypes: !9)
+!4 = !DILocalVariable(name: "count_", line: 5, scope: !5, file: !3, type: !6)
!5 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !1)
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!7 = !DILocation(line: 6, column: 1, scope: !2)
diff --git a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
index 638b26c73146..1341830b4a4b 100644
--- a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
+++ b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
@@ -14,11 +14,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!15}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 93, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "__addvsi3", linkageName: "__addvsi3", line: 94, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: null, type: !4)
+!0 = !DILocalVariable(name: "b", line: 93, arg: 2, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "__addvsi3", linkageName: "__addvsi3", line: 94, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: null, type: !4)
!2 = !DIFile(filename: "libgcc2.c", directory: "/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc")
!12 = !DIFile(filename: "libgcc2.c", directory: "/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13, subprograms: !14)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13, subprograms: !14)
!4 = !DISubroutineType(types: !5)
!5 = !{!6, !6, !6}
!6 = !DIDerivedType(tag: DW_TAG_typedef, name: "SItype", line: 152, file: !12, baseType: !8)
diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
index cfaffd8234ba..171b6d2bcc5c 100644
--- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
+++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -1,36 +1,36 @@
; RUN: llc -mtriple=arm-eabi -mattr=+neon -O0 -optimize-regalloc -regalloc=basic %s -o /dev/null
; This test would crash the rewriter when trying to handle a spill after one of
-; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
+; the @llvm.arm.neon.vld3.v8i8.p0i8 defined three parts of a register.
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind {
- %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+ %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A2, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
%tmp2b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 0 ; <<8 x i8>> [#uses=1]
%tmp4b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 1 ; <<8 x i8>> [#uses=1]
- %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+ %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
%tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1]
%tmp4d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 1 ; <<8 x i8>> [#uses=1]
- %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+ %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A5, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
%tmp2e = extractvalue %struct.__neon_int8x8x3_t %tmp1e, 0 ; <<8 x i8>> [#uses=1]
- %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+ %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
%tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1]
- %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+ %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A7, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
%tmp2g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 0 ; <<8 x i8>> [#uses=1]
%tmp4g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 1 ; <<8 x i8>> [#uses=1]
- %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+ %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A8, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
%tmp2h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 0 ; <<8 x i8>> [#uses=1]
%tmp3h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 2 ; <<8 x i8>> [#uses=1]
%tmp2bd = add <8 x i8> %tmp2b, %tmp2d ; <<8 x i8>> [#uses=1]
%tmp4bd = add <8 x i8> %tmp4b, %tmp4d ; <<8 x i8>> [#uses=1]
%tmp2abcd = mul <8 x i8> undef, %tmp2bd ; <<8 x i8>> [#uses=1]
%tmp4abcd = mul <8 x i8> undef, %tmp4bd ; <<8 x i8>> [#uses=2]
- call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd, i32 1)
+ call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd, i32 1)
%tmp2ef = sub <8 x i8> %tmp2e, %tmp2f ; <<8 x i8>> [#uses=1]
%tmp2gh = sub <8 x i8> %tmp2g, %tmp2h ; <<8 x i8>> [#uses=1]
%tmp3gh = sub <8 x i8> zeroinitializer, %tmp3h ; <<8 x i8>> [#uses=1]
@@ -38,8 +38,8 @@ define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A
%tmp2efgh = mul <8 x i8> %tmp2ef, %tmp2gh ; <<8 x i8>> [#uses=1]
%tmp3efgh = mul <8 x i8> undef, %tmp3gh ; <<8 x i8>> [#uses=1]
%tmp4efgh = mul <8 x i8> %tmp4ef, undef ; <<8 x i8>> [#uses=2]
- call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh, i32 1)
+ call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh, i32 1)
%tmp4 = sub <8 x i8> %tmp4efgh, %tmp4abcd ; <<8 x i8>> [#uses=1]
- tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef, i32 1)
+ tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef, i32 1)
ret <8 x i8> %tmp4
}
diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
index 6a6ccf3d0a01..c6c0e2caee42 100644
--- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll
+++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
@@ -36,8 +36,8 @@ entry:
%tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
%19 = fmul <4 x float> %tmp5, %2
%20 = bitcast float* %fltp to i8*
- tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %20, <4 x float> %19, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
index f86c3ba9ef6e..1deb98631a4f 100644
--- a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
+++ b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
@@ -12,8 +12,8 @@ entry:
%tmp9 = trunc i128 %tmp8 to i64 ; <i64> [#uses=1]
%tmp16.i = bitcast i64 %tmp6 to <8 x i8> ; <<8 x i8>> [#uses=1]
%tmp20.i = bitcast i64 %tmp9 to <8 x i8> ; <<8 x i8>> [#uses=1]
- tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i, i32 1) nounwind
+ tail call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i, i32 1) nounwind
ret void
}
-declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
diff --git a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
index bead8d9781e8..47a5ef0bc544 100755
--- a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
+++ b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
@@ -20,7 +20,7 @@
@.str51 = external constant [45 x i8] ; <[45 x i8]*> [#uses=1]
@__PRETTY_FUNCTION__._ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs = external constant [116 x i8] ; <[116 x i8]*> [#uses=1]
-@_ZN4llvm9RecordValC1ERKSsPNS_5RecTyEj = alias void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)* @_ZN4llvm9RecordValC2ERKSsPNS_5RecTyEj ; <void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)*> [#uses=0]
+@_ZN4llvm9RecordValC1ERKSsPNS_5RecTyEj = alias void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32), void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)* @_ZN4llvm9RecordValC2ERKSsPNS_5RecTyEj ; <void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)*> [#uses=0]
declare i8* @__dynamic_cast(i8*, i8*, i8*, i32)
diff --git a/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
index 95bb2769759e..38b352c473b1 100644
--- a/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
+++ b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
@@ -47,19 +47,19 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.lv.fn = !{!0, !8, !10, !12}
!llvm.dbg.gv = !{!14}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "buf", line: 4, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "x0", linkageName: "x0", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !26, scope: null, type: !4)
+!0 = !DILocalVariable(name: "buf", line: 4, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "x0", linkageName: "x0", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !26, scope: null, type: !4)
!2 = !DIFile(filename: "t.c", directory: "/private/tmp")
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 2.0", isOptimized: true, file: !26)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang 2.0", isOptimized: true, file: !26)
!4 = !DISubroutineType(types: !5)
!5 = !{null}
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !26, scope: !2, baseType: !7)
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "nbytes", line: 4, arg: 0, scope: !1, file: !2, type: !9)
+!8 = !DILocalVariable(name: "nbytes", line: 4, arg: 2, scope: !1, file: !2, type: !9)
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned long", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "nread", line: 6, scope: !11, file: !2, type: !9)
+!10 = !DILocalVariable(name: "nread", line: 6, scope: !11, file: !2, type: !9)
!11 = distinct !DILexicalBlock(line: 5, column: 1, file: !26, scope: !1)
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 7, scope: !11, file: !2, type: !13)
+!12 = !DILocalVariable(name: "c", line: 7, scope: !11, file: !2, type: !13)
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!14 = !DIGlobalVariable(name: "length", linkageName: "length", line: 1, isLocal: false, isDefinition: true, scope: !2, file: !2, type: !13, variable: i32* @length)
!15 = !DILocation(line: 4, column: 24, scope: !1)
diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
index 1aee5088eee4..130221d38c23 100644
--- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
+++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -16,10 +16,10 @@ target triple = "thumbv7-apple-darwin10"
define i32 @test(i8* %arg) nounwind {
entry:
- %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg, i32 1)
+ %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %arg, i32 1)
%1 = shufflevector <2 x i64> undef, <2 x i64> %0, <2 x i32> <i32 1, i32 2>
store <2 x i64> %1, <2 x i64>* undef, align 16
ret i32 undef
}
-declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8*, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 953e2bbf291c..14ddb59b5387 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -3,7 +3,7 @@
%struct.SVal = type { i8*, i32 }
-define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp {
+define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp !dbg !17 {
entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !23, metadata !DIExpression()), !dbg !24
@@ -31,7 +31,7 @@ return: ; preds = %bb2
ret i32 %.0, !dbg !29
}
-define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 {
+define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 !dbg !16 {
entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
call void @llvm.dbg.value(metadata %struct.SVal* %this, i64 0, metadata !31, metadata !DIExpression()), !dbg !34
@@ -47,7 +47,7 @@ return: ; preds = %entry
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-define i32 @main() nounwind ssp {
+define i32 @main() nounwind ssp !dbg !20 {
entry:
%0 = alloca %struct.SVal ; <%struct.SVal*> [#uses=3]
%v = alloca %struct.SVal ; <%struct.SVal*> [#uses=4]
@@ -80,7 +80,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!0 = !DISubprogram(name: "SVal", line: 11, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !48, scope: !1, type: !14)
!1 = !DICompositeType(tag: DW_TAG_structure_type, name: "SVal", line: 1, size: 128, align: 64, file: !48, elements: !4)
!2 = !DIFile(filename: "small.cc", directory: "/Users/manav/R8248330")
-!3 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 1, file: !48, enums: !47, retainedTypes: !47, subprograms: !46, globals: !47, imports: !47)
+!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 1, file: !48, enums: !47, retainedTypes: !47, subprograms: !46, globals: !47, imports: !47)
!4 = !{!5, !7, !0, !9}
!5 = !DIDerivedType(tag: DW_TAG_member, name: "Data", line: 7, size: 64, align: 64, file: !48, scope: !1, baseType: !6)
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !48, baseType: null)
@@ -93,35 +93,35 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!14 = !DISubroutineType(types: !15)
!15 = !{null, !12}
-!16 = !DISubprogram(name: "SVal", linkageName: "_ZN4SValC1Ev", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !1, type: !14, function: void (%struct.SVal*)* @_ZN4SValC1Ev)
-!17 = !DISubprogram(name: "foo", linkageName: "_Z3fooi4SVal", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !18, function: i32 (i32, %struct.SVal*)* @_Z3fooi4SVal)
+!16 = distinct !DISubprogram(name: "SVal", linkageName: "_ZN4SValC1Ev", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !1, type: !14)
+!17 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi4SVal", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !18)
!18 = !DISubroutineType(types: !19)
!19 = !{!13, !13, !1}
-!20 = !DISubprogram(name: "main", linkageName: "main", line: 23, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !21, function: i32 ()* @main)
+!20 = distinct !DISubprogram(name: "main", linkageName: "main", line: 23, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !21)
!21 = !DISubroutineType(types: !22)
!22 = !{!13}
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 16, arg: 0, scope: !17, file: !2, type: !13)
+!23 = !DILocalVariable(name: "i", line: 16, arg: 1, scope: !17, file: !2, type: !13)
!24 = !DILocation(line: 16, scope: !17)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "location", line: 16, arg: 0, scope: !17, file: !2, type: !26)
+!25 = !DILocalVariable(name: "location", line: 16, arg: 2, scope: !17, file: !2, type: !26)
!26 = !DIDerivedType(tag: DW_TAG_reference_type, name: "SVal", size: 64, align: 64, file: !48, scope: !2, baseType: !1)
!27 = !DILocation(line: 17, scope: !28)
!28 = distinct !DILexicalBlock(line: 16, column: 0, file: !2, scope: !17)
!29 = !DILocation(line: 18, scope: !28)
!30 = !DILocation(line: 20, scope: !28)
-!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 11, arg: 0, scope: !16, file: !2, type: !32)
+!31 = !DILocalVariable(name: "this", line: 11, arg: 1, scope: !16, file: !2, type: !32)
!32 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !48, scope: !2, baseType: !33)
!33 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !48, scope: !2, baseType: !1)
!34 = !DILocation(line: 11, scope: !16)
!35 = !DILocation(line: 11, scope: !36)
!36 = distinct !DILexicalBlock(line: 11, column: 0, file: !48, scope: !37)
!37 = distinct !DILexicalBlock(line: 11, column: 0, file: !48, scope: !16)
-!38 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "v", line: 24, scope: !39, file: !2, type: !1)
+!38 = !DILocalVariable(name: "v", line: 24, scope: !39, file: !2, type: !1)
!39 = distinct !DILexicalBlock(line: 23, column: 0, file: !48, scope: !40)
!40 = distinct !DILexicalBlock(line: 23, column: 0, file: !48, scope: !20)
!41 = !DILocation(line: 24, scope: !39)
!42 = !DILocation(line: 25, scope: !39)
!43 = !DILocation(line: 26, scope: !39)
-!44 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 26, scope: !39, file: !2, type: !13)
+!44 = !DILocalVariable(name: "k", line: 26, scope: !39, file: !2, type: !13)
!45 = !DILocation(line: 27, scope: !39)
!46 = !{!16, !17, !20}
!47 = !{}
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 9a5baf21b8fb..d5eed8b6a2c4 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -28,7 +28,7 @@ target triple = "thumbv7-apple-darwin10"
; CHECK-NOT: {{DW_TAG|NULL}}
; CHECK: DW_AT_location [DW_FORM_exprloc] (<0x8> 03 [[ADDR]] 10 01 22 )
-define zeroext i8 @get1(i8 zeroext %a) nounwind optsize {
+define zeroext i8 @get1(i8 zeroext %a) nounwind optsize !dbg !0 {
entry:
tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !10, metadata !DIExpression()), !dbg !30
%0 = load i8, i8* @x1, align 4, !dbg !30
@@ -39,7 +39,7 @@ entry:
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-define zeroext i8 @get2(i8 zeroext %a) nounwind optsize {
+define zeroext i8 @get2(i8 zeroext %a) nounwind optsize !dbg !6 {
entry:
tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !18, metadata !DIExpression()), !dbg !32
%0 = load i8, i8* @x2, align 4, !dbg !32
@@ -48,7 +48,7 @@ entry:
ret i8 %0, !dbg !33
}
-define zeroext i8 @get3(i8 zeroext %a) nounwind optsize {
+define zeroext i8 @get3(i8 zeroext %a) nounwind optsize !dbg !7 {
entry:
tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !21, metadata !DIExpression()), !dbg !34
%0 = load i8, i8* @x3, align 4, !dbg !34
@@ -57,7 +57,7 @@ entry:
ret i8 %0, !dbg !35
}
-define zeroext i8 @get4(i8 zeroext %a) nounwind optsize {
+define zeroext i8 @get4(i8 zeroext %a) nounwind optsize !dbg !8 {
entry:
tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !24, metadata !DIExpression()), !dbg !36
%0 = load i8, i8* @x4, align 4, !dbg !36
@@ -66,7 +66,7 @@ entry:
ret i8 %0, !dbg !37
}
-define zeroext i8 @get5(i8 zeroext %a) nounwind optsize {
+define zeroext i8 @get5(i8 zeroext %a) nounwind optsize !dbg !9 {
entry:
tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !27, metadata !DIExpression()), !dbg !38
%0 = load i8, i8* @x5, align 4, !dbg !38
@@ -78,35 +78,35 @@ entry:
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!49}
-!0 = !DISubprogram(name: "get1", linkageName: "get1", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !47, scope: !1, type: !3, function: i8 (i8)* @get1, variables: !42)
+!0 = distinct !DISubprogram(name: "get1", linkageName: "get1", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !47, scope: !1, type: !3, variables: !42)
!1 = !DIFile(filename: "foo.c", directory: "/tmp/")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", isOptimized: true, emissionKind: 0, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports: !48)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", isOptimized: true, emissionKind: 0, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports: !48)
!3 = !DISubroutineType(types: !4)
!4 = !{!5, !5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "_Bool", size: 8, align: 8, encoding: DW_ATE_boolean)
-!6 = !DISubprogram(name: "get2", linkageName: "get2", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !47, scope: !1, type: !3, function: i8 (i8)* @get2, variables: !43)
-!7 = !DISubprogram(name: "get3", linkageName: "get3", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !47, scope: !1, type: !3, function: i8 (i8)* @get3, variables: !44)
-!8 = !DISubprogram(name: "get4", linkageName: "get4", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !47, scope: !1, type: !3, function: i8 (i8)* @get4, variables: !45)
-!9 = !DISubprogram(name: "get5", linkageName: "get5", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !47, scope: !1, type: !3, function: i8 (i8)* @get5, variables: !46)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 4, arg: 0, scope: !0, file: !1, type: !5)
-!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 4, scope: !12, file: !1, type: !5)
+!6 = distinct !DISubprogram(name: "get2", linkageName: "get2", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !47, scope: !1, type: !3, variables: !43)
+!7 = distinct !DISubprogram(name: "get3", linkageName: "get3", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !47, scope: !1, type: !3, variables: !44)
+!8 = distinct !DISubprogram(name: "get4", linkageName: "get4", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !47, scope: !1, type: !3, variables: !45)
+!9 = distinct !DISubprogram(name: "get5", linkageName: "get5", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !47, scope: !1, type: !3, variables: !46)
+!10 = !DILocalVariable(name: "a", line: 4, arg: 1, scope: !0, file: !1, type: !5)
+!11 = !DILocalVariable(name: "b", line: 4, scope: !12, file: !1, type: !5)
!12 = distinct !DILexicalBlock(line: 4, column: 0, file: !47, scope: !0)
!13 = !DIGlobalVariable(name: "x1", line: 3, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x1)
!14 = !DIGlobalVariable(name: "x2", line: 6, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x2)
!15 = !DIGlobalVariable(name: "x3", line: 9, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x3)
!16 = !DIGlobalVariable(name: "x4", line: 12, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x4)
!17 = !DIGlobalVariable(name: "x5", line: 15, isLocal: false, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x5)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 7, arg: 0, scope: !6, file: !1, type: !5)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 7, scope: !20, file: !1, type: !5)
+!18 = !DILocalVariable(name: "a", line: 7, arg: 1, scope: !6, file: !1, type: !5)
+!19 = !DILocalVariable(name: "b", line: 7, scope: !20, file: !1, type: !5)
!20 = distinct !DILexicalBlock(line: 7, column: 0, file: !47, scope: !6)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 10, arg: 0, scope: !7, file: !1, type: !5)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 10, scope: !23, file: !1, type: !5)
+!21 = !DILocalVariable(name: "a", line: 10, arg: 1, scope: !7, file: !1, type: !5)
+!22 = !DILocalVariable(name: "b", line: 10, scope: !23, file: !1, type: !5)
!23 = distinct !DILexicalBlock(line: 10, column: 0, file: !47, scope: !7)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 13, arg: 0, scope: !8, file: !1, type: !5)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 13, scope: !26, file: !1, type: !5)
+!24 = !DILocalVariable(name: "a", line: 13, arg: 1, scope: !8, file: !1, type: !5)
+!25 = !DILocalVariable(name: "b", line: 13, scope: !26, file: !1, type: !5)
!26 = distinct !DILexicalBlock(line: 13, column: 0, file: !47, scope: !8)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 16, arg: 0, scope: !9, file: !1, type: !5)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 16, scope: !29, file: !1, type: !5)
+!27 = !DILocalVariable(name: "a", line: 16, arg: 1, scope: !9, file: !1, type: !5)
+!28 = !DILocalVariable(name: "b", line: 16, scope: !29, file: !1, type: !5)
!29 = distinct !DILexicalBlock(line: 16, column: 0, file: !47, scope: !9)
!30 = !DILocation(line: 4, scope: !0)
!31 = !DILocation(line: 4, scope: !12)
diff --git a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
index aac8f7b3a026..1097050df54b 100644
--- a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
+++ b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -arm-global-merge -global-merge-group-by-use=false | FileCheck %s
-; CHECK: .zerofill __DATA,__bss,__MergedGlobals,16,2
+; CHECK: .zerofill __DATA,__bss,l__MergedGlobals,16,2
@prev = external global [0 x i16]
@max_lazy_match = internal unnamed_addr global i32 0, align 4
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index 067c719f491c..3d82e706862c 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -28,7 +28,7 @@ target triple = "thumbv7-apple-macosx10.7.0"
@x4 = internal unnamed_addr global i32 4, align 4
@x5 = global i32 0, align 4
-define i32 @get1(i32 %a) nounwind optsize ssp {
+define i32 @get1(i32 %a) nounwind optsize ssp !dbg !1 {
tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !10, metadata !DIExpression()), !dbg !30
%1 = load i32, i32* @x1, align 4, !dbg !31
tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !11, metadata !DIExpression()), !dbg !31
@@ -36,7 +36,7 @@ define i32 @get1(i32 %a) nounwind optsize ssp {
ret i32 %1, !dbg !31
}
-define i32 @get2(i32 %a) nounwind optsize ssp {
+define i32 @get2(i32 %a) nounwind optsize ssp !dbg !6 {
tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !13, metadata !DIExpression()), !dbg !32
%1 = load i32, i32* @x2, align 4, !dbg !33
tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !14, metadata !DIExpression()), !dbg !33
@@ -44,7 +44,7 @@ define i32 @get2(i32 %a) nounwind optsize ssp {
ret i32 %1, !dbg !33
}
-define i32 @get3(i32 %a) nounwind optsize ssp {
+define i32 @get3(i32 %a) nounwind optsize ssp !dbg !7 {
tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !16, metadata !DIExpression()), !dbg !34
%1 = load i32, i32* @x3, align 4, !dbg !35
tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !17, metadata !DIExpression()), !dbg !35
@@ -52,7 +52,7 @@ define i32 @get3(i32 %a) nounwind optsize ssp {
ret i32 %1, !dbg !35
}
-define i32 @get4(i32 %a) nounwind optsize ssp {
+define i32 @get4(i32 %a) nounwind optsize ssp !dbg !8 {
tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !19, metadata !DIExpression()), !dbg !36
%1 = load i32, i32* @x4, align 4, !dbg !37
tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !20, metadata !DIExpression()), !dbg !37
@@ -60,7 +60,7 @@ define i32 @get4(i32 %a) nounwind optsize ssp {
ret i32 %1, !dbg !37
}
-define i32 @get5(i32 %a) nounwind optsize ssp {
+define i32 @get5(i32 %a) nounwind optsize ssp !dbg !9 {
tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !27, metadata !DIExpression()), !dbg !38
%1 = load i32, i32* @x5, align 4, !dbg !39
tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !28, metadata !DIExpression()), !dbg !39
@@ -73,32 +73,32 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!49}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 1, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports: !48)
-!1 = !DISubprogram(name: "get1", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !47, scope: !2, type: !3, function: i32 (i32)* @get1, variables: !42)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 1, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports: !48)
+!1 = distinct !DISubprogram(name: "get1", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !47, scope: !2, type: !3, variables: !42)
!2 = !DIFile(filename: "ss3.c", directory: "/private/tmp")
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "get2", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !47, scope: !2, type: !3, function: i32 (i32)* @get2, variables: !43)
-!7 = !DISubprogram(name: "get3", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !47, scope: !2, type: !3, function: i32 (i32)* @get3, variables: !44)
-!8 = !DISubprogram(name: "get4", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !47, scope: !2, type: !3, function: i32 (i32)* @get4, variables: !45)
-!9 = !DISubprogram(name: "get5", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !47, scope: !2, type: !3, function: i32 (i32)* @get5, variables: !46)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 5, arg: 1, scope: !1, file: !2, type: !5)
-!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 5, scope: !12, file: !2, type: !5)
+!6 = distinct !DISubprogram(name: "get2", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !47, scope: !2, type: !3, variables: !43)
+!7 = distinct !DISubprogram(name: "get3", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !47, scope: !2, type: !3, variables: !44)
+!8 = distinct !DISubprogram(name: "get4", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !47, scope: !2, type: !3, variables: !45)
+!9 = distinct !DISubprogram(name: "get5", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !47, scope: !2, type: !3, variables: !46)
+!10 = !DILocalVariable(name: "a", line: 5, arg: 1, scope: !1, file: !2, type: !5)
+!11 = !DILocalVariable(name: "b", line: 5, scope: !12, file: !2, type: !5)
!12 = distinct !DILexicalBlock(line: 5, column: 19, file: !47, scope: !1)
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 8, arg: 1, scope: !6, file: !2, type: !5)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 8, scope: !15, file: !2, type: !5)
+!13 = !DILocalVariable(name: "a", line: 8, arg: 1, scope: !6, file: !2, type: !5)
+!14 = !DILocalVariable(name: "b", line: 8, scope: !15, file: !2, type: !5)
!15 = distinct !DILexicalBlock(line: 8, column: 17, file: !47, scope: !6)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 11, arg: 1, scope: !7, file: !2, type: !5)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 11, scope: !18, file: !2, type: !5)
+!16 = !DILocalVariable(name: "a", line: 11, arg: 1, scope: !7, file: !2, type: !5)
+!17 = !DILocalVariable(name: "b", line: 11, scope: !18, file: !2, type: !5)
!18 = distinct !DILexicalBlock(line: 11, column: 19, file: !47, scope: !7)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 14, arg: 1, scope: !8, file: !2, type: !5)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 14, scope: !21, file: !2, type: !5)
+!19 = !DILocalVariable(name: "a", line: 14, arg: 1, scope: !8, file: !2, type: !5)
+!20 = !DILocalVariable(name: "b", line: 14, scope: !21, file: !2, type: !5)
!21 = distinct !DILexicalBlock(line: 14, column: 19, file: !47, scope: !8)
!25 = !DIGlobalVariable(name: "x1", line: 4, isLocal: true, isDefinition: true, scope: !0, file: !2, type: !5, variable: i32* @x1)
!26 = !DIGlobalVariable(name: "x2", line: 7, isLocal: true, isDefinition: true, scope: !0, file: !2, type: !5, variable: i32* @x2)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 17, arg: 1, scope: !9, file: !2, type: !5)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 17, scope: !29, file: !2, type: !5)
+!27 = !DILocalVariable(name: "a", line: 17, arg: 1, scope: !9, file: !2, type: !5)
+!28 = !DILocalVariable(name: "b", line: 17, scope: !29, file: !2, type: !5)
!29 = distinct !DILexicalBlock(line: 17, column: 19, file: !47, scope: !9)
!30 = !DILocation(line: 5, column: 16, scope: !1)
!31 = !DILocation(line: 5, column: 32, scope: !12)
diff --git a/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll b/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll
index 3cbc4cdcd707..d702af7c0c70 100644
--- a/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll
+++ b/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll
@@ -4,9 +4,9 @@
define void @test_vmovqqqq_pseudo() nounwind ssp {
entry:
- %vld3_lane = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> zeroinitializer, i32 7, i32 2)
+ %vld3_lane = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> zeroinitializer, i32 7, i32 2)
store { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, { <8 x i16>, <8 x i16>, <8 x i16> }* undef
ret void
}
-declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
index 17bd291a6b55..5df439389cdb 100644
--- a/test/CodeGen/ARM/2011-10-26-memset-inline.ll
+++ b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
@@ -1,5 +1,5 @@
; Make sure short memsets on ARM lower to stores, even when optimizing for size.
-; RUN: llc -march=arm < %s | FileCheck %s -check-prefix=CHECK-GENERIC
+; RUN: llc -march=arm -mattr=+strict-align < %s | FileCheck %s -check-prefix=CHECK-GENERIC
; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s -check-prefix=CHECK-UNALIGNED
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
index b70b7f6f3b2e..f622ceb584e6 100644
--- a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -52,8 +52,8 @@ cond.end295: ; preds = %entry
%shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
%shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> <i32 0, i32 1>
%2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float>
- tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind
- tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind
+ tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind
+ tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind
unreachable
for.end: ; preds = %entry
@@ -63,10 +63,10 @@ for.end: ; preds = %entry
; Check that pseudo-expansion preserves <undef> flags.
define void @foo3(i8* %p) nounwind ssp {
entry:
- tail call void @llvm.arm.neon.vst2.v4f32(i8* %p, <4 x float> undef, <4 x float> undef, i32 4)
+ tail call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %p, <4 x float> undef, <4 x float> undef, i32 4)
ret void
}
declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll
index 7f30ae10e436..606af47a3d8e 100644
--- a/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll
+++ b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll
@@ -7,8 +7,8 @@ entry:
%vecinit.i = insertelement <2 x i32> undef, i32 %x, i32 0
%vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %x, i32 1
%0 = bitcast i32* %p to i8*
- tail call void @llvm.arm.neon.vst1.v2i32(i8* %0, <2 x i32> %vecinit1.i, i32 4)
+ tail call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %0, <2 x i32> %vecinit1.i, i32 4)
ret void
}
-declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v2i32(i8*, <2 x i32>, i32) nounwind
diff --git a/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll b/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll
index 545bfc73c590..6cff67614c64 100644
--- a/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll
+++ b/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll
@@ -5,9 +5,9 @@
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios5.1.0"
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v16i8(i8*, <16 x i8>, i32) nounwind
define void @findEdges(i8*) nounwind ssp {
%2 = icmp sgt i32 undef, 0
@@ -19,16 +19,16 @@ define void @findEdges(i8*) nounwind ssp {
; <label>:5 ; preds = %5, %1
%6 = phi i8* [ %19, %5 ], [ %0, %1 ]
- %7 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* null, i32 1)
+ %7 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* null, i32 1)
%8 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, 0
%9 = getelementptr inbounds i8, i8* null, i32 3
- %10 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %9, i32 1)
+ %10 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %9, i32 1)
%11 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %10, 2
- %12 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %6, i32 1)
+ %12 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %6, i32 1)
%13 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %12, 0
%14 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %12, 1
%15 = getelementptr inbounds i8, i8* %6, i32 3
- %16 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %15, i32 1)
+ %16 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %15, i32 1)
%17 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %16, 1
%18 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %16, 2
%19 = getelementptr inbounds i8, i8* %6, i32 48
@@ -111,7 +111,7 @@ define void @findEdges(i8*) nounwind ssp {
%96 = bitcast <8 x i8> %94 to <1 x i64>
%97 = shufflevector <1 x i64> %95, <1 x i64> %96, <2 x i32> <i32 0, i32 1>
%98 = bitcast <2 x i64> %97 to <16 x i8>
- tail call void @llvm.arm.neon.vst1.v16i8(i8* null, <16 x i8> %98, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* null, <16 x i8> %98, i32 1)
%99 = icmp slt i32 undef, undef
br i1 %99, label %5, label %3
}
diff --git a/test/CodeGen/ARM/2012-11-14-subs_carry.ll b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
index 33083303a3d4..f7abac06919d 100644
--- a/test/CodeGen/ARM/2012-11-14-subs_carry.ll
+++ b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
@@ -1,10 +1,14 @@
; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s
;CHECK-LABEL: foo:
-;CHECK: adds
-;CHECK-NEXT: adc
-;CHECK-NEXT: bx
+;CHECK: movs r0, #0
+;CHECK-NEXT: bx lr
+; Note: This test case originally checked, per r167963, for:
+; adds
+; adc
+; bx
+; But SDAG now, like InstCombine, can fold everything away.
;rdar://12028498
define i32 @foo() nounwind ssp {
diff --git a/test/CodeGen/ARM/2013-10-11-select-stalls.ll b/test/CodeGen/ARM/2013-10-11-select-stalls.ll
index d6045c7b8c8c..2c15c1a943ba 100644
--- a/test/CodeGen/ARM/2013-10-11-select-stalls.ll
+++ b/test/CodeGen/ARM/2013-10-11-select-stalls.ll
@@ -1,16 +1,21 @@
; REQUIRES: asserts
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -stats 2>&1 | not grep "Number of pipeline stalls"
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -disable-ifcvt-diamond -stats 2>&1 | FileCheck %s
; Evaluate the two vld1.8 instructions in separate MBB's,
; instead of stalling on one and conditionally overwriting its result.
+;
+; Update: After if-conversion the two vld1.8 instructions are in the same MBB
+; again. So we disable this if-conversion to eliminate its influence to this
+; test.
+; CHECK-NOT: Number of pipeline stalls
define <16 x i8> @multiselect(i32 %avail, i8* %foo, i8* %bar) {
entry:
- %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %foo, i32 1)
- %vld2 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
+ %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %foo, i32 1)
+ %vld2 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %bar, i32 1)
%and = and i32 %avail, 3
%tobool = icmp eq i32 %and, 0
%retv = select i1 %tobool, <16 x i8> %vld1, <16 x i8> %vld2
ret <16 x i8> %retv
}
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 )
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* , i32 )
diff --git a/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
index ef575f4c41ec..be87a2fb1c89 100644
--- a/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
+++ b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
@@ -27,7 +27,7 @@ entry:
%n0 = insertelement <2 x i64> undef, i64 %tmp0, i32 0
%n1 = insertelement <2 x i64> %n0, i64 %tmp1, i32 1
- call void @llvm.arm.neon.vst4.v1i64(i8* %m, <1 x i64> %s0, <1 x i64> %s1, <1 x i64> %s2, <1 x i64> %s3, i32 8)
+ call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* %m, <1 x i64> %s0, <1 x i64> %s1, <1 x i64> %s2, <1 x i64> %s3, i32 8)
call void @bar(<2 x i64> %n1)
@@ -50,7 +50,7 @@ define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C
ret <8 x i8> %tmp8
}
-declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.arm.neon.vst4.p0i8.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
declare <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
declare void @bar2(%struct.__neon_int8x8x4_t, <8 x i8>)
declare void @bar(<2 x i64> %arg)
diff --git a/test/CodeGen/ARM/MachO-subtypes.ll b/test/CodeGen/ARM/MachO-subtypes.ll
new file mode 100644
index 000000000000..8176d6640847
--- /dev/null
+++ b/test/CodeGen/ARM/MachO-subtypes.ll
@@ -0,0 +1,68 @@
+; Check that MachO ARM CPU Subtypes are respected
+
+; RUN: llc -mtriple=armv4t-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V4T
+
+; RUN: llc -mtriple=armv5-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V5
+; RUN: llc -mtriple=armv5e-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V5
+; RUN: llc -mtriple=armv5t-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V5
+; RUN: llc -mtriple=armv5te-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V5
+; RUN: llc -mtriple=armv5tej-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V5
+
+; RUN: llc -mtriple=armv6-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V6
+; RUN: llc -mtriple=armv6k-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V6
+; RUN: llc -mtriple=thumbv6-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V6
+; RUN: llc -mtriple=thumbv6k-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V6
+
+; RUN: llc -mtriple=armv6m-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V6M
+; RUN: llc -mtriple=thumbv6m-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V6M
+
+; RUN: llc -mtriple=armv7-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7
+; RUN: llc -mtriple=thumbv7-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7
+
+; RUN: llc -mtriple=thumbv7em-apple-darwin -mcpu=cortex-m4 -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7EM
+; RUN: llc -mtriple=thumbv7em-apple-darwin -mcpu=cortex-m7 -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7EM
+
+; RUN: llc -mtriple=armv7k-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7K
+; RUN: llc -mtriple=thumbv7k-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7K
+
+; RUN: llc -mtriple=thumbv7m-apple-darwin -mcpu=sc300 -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7M
+; RUN: llc -mtriple=thumbv7m-apple-darwin -mcpu=cortex-m3 -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7M
+
+; RUN: llc -mtriple=armv7s-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7S
+; RUN: llc -mtriple=thumbv7s-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7S
+
+define void @_test() {
+ ret void
+}
+
+; CHECK-V4T: CpuSubType: CPU_SUBTYPE_ARM_V4T (0x5)
+; CHECK-V5: CpuSubType: CPU_SUBTYPE_ARM_V5 (0x7)
+; CHECK-V6: CpuSubType: CPU_SUBTYPE_ARM_V6 (0x6)
+; CHECK-V6M: CpuSubType: CPU_SUBTYPE_ARM_V6M (0xE)
+; CHECK-V7: CpuSubType: CPU_SUBTYPE_ARM_V7 (0x9)
+; CHECK-V7EM: CpuSubType: CPU_SUBTYPE_ARM_V7EM (0x10)
+; CHECK-V7K: CpuSubType: CPU_SUBTYPE_ARM_V7K (0xC)
+; CHECK-V7M: CpuSubType: CPU_SUBTYPE_ARM_V7M (0xF)
+; CHECK-V7S: CpuSubType: CPU_SUBTYPE_ARM_V7S (0xB)
diff --git a/test/CodeGen/ARM/Windows/division.ll b/test/CodeGen/ARM/Windows/division.ll
new file mode 100644
index 000000000000..b3ef9c6d278b
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/division.ll
@@ -0,0 +1,38 @@
+; RUN: llc -mtriple thumbv7-windows-itanium -filetype asm -o - %s | FileCheck %s
+; RUN: llc -mtriple thumbv7-windows-msvc -filetype asm -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc i32 @sdiv32(i32 %divisor, i32 %divident) {
+entry:
+ %div = sdiv i32 %divident, %divisor
+ ret i32 %div
+}
+
+; CHECK-LABEL: sdiv32
+; CHECK: b __rt_sdiv
+
+define arm_aapcs_vfpcc i32 @udiv32(i32 %divisor, i32 %divident) {
+entry:
+ %div = udiv i32 %divident, %divisor
+ ret i32 %div
+}
+
+; CHECK-LABEL: udiv32:
+; CHECK: b __rt_udiv
+
+define arm_aapcs_vfpcc i64 @sdiv64(i64 %divisor, i64 %divident) {
+entry:
+ %div = sdiv i64 %divident, %divisor
+ ret i64 %div
+}
+
+; CHECK-LABEL: sdiv64
+; CHECK: bl __rt_sdiv64
+
+define arm_aapcs_vfpcc i64 @udiv64(i64 %divisor, i64 %divident) {
+entry:
+ %div = udiv i64 %divident, %divisor
+ ret i64 %div
+}
+
+; CHECK-LABEL: udiv64:
+; CHECK: bl __rt_udiv64
diff --git a/test/CodeGen/ARM/Windows/integer-floating-point-conversion.ll b/test/CodeGen/ARM/Windows/integer-floating-point-conversion.ll
deleted file mode 100644
index acf21a1caad3..000000000000
--- a/test/CodeGen/ARM/Windows/integer-floating-point-conversion.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; RUN: llc -mtriple thumbv7-windows -filetype asm -o - %s | FileCheck %s
-
-define arm_aapcs_vfpcc i64 @stoi64(float %f) {
-entry:
- %conv = fptosi float %f to i64
- ret i64 %conv
-}
-
-; CHECK-LABEL: stoi64
-; CHECK: bl __stoi64
-
-define arm_aapcs_vfpcc i64 @stou64(float %f) {
-entry:
- %conv = fptoui float %f to i64
- ret i64 %conv
-}
-
-; CHECK-LABEL: stou64
-; CHECK: bl __stou64
-
-define arm_aapcs_vfpcc float @i64tos(i64 %i64) {
-entry:
- %conv = sitofp i64 %i64 to float
- ret float %conv
-}
-
-; CHECK-LABEL: i64tos
-; CHECK: bl __i64tos
-
-define arm_aapcs_vfpcc float @u64tos(i64 %u64) {
-entry:
- %conv = uitofp i64 %u64 to float
- ret float %conv
-}
-
-; CHECK-LABEL: u64tos
-; CHECK: bl __u64tos
-
-define arm_aapcs_vfpcc i64 @dtoi64(double %d) {
-entry:
- %conv = fptosi double %d to i64
- ret i64 %conv
-}
-
-; CHECK-LABEL: dtoi64
-; CHECK: bl __dtoi64
-
-define arm_aapcs_vfpcc i64 @dtou64(double %d) {
-entry:
- %conv = fptoui double %d to i64
- ret i64 %conv
-}
-
-; CHECK-LABEL: dtou64
-; CHECK: bl __dtou64
-
-define arm_aapcs_vfpcc double @i64tod(i64 %i64) {
-entry:
- %conv = sitofp i64 %i64 to double
- ret double %conv
-}
-
-; CHECK-LABEL: i64tod
-; CHECK: bl __i64tod
-
-define arm_aapcs_vfpcc double @u64tod(i64 %i64) {
-entry:
- %conv = uitofp i64 %i64 to double
- ret double %conv
-}
-
-; CHECK-LABEL: u64tod
-; CHECK: bl __u64tod
-
diff --git a/test/CodeGen/ARM/Windows/libcalls.ll b/test/CodeGen/ARM/Windows/libcalls.ll
new file mode 100644
index 000000000000..d8b498a40d97
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/libcalls.ll
@@ -0,0 +1,75 @@
+; RUN: llc -mtriple thumbv7-windows-itanium -filetype asm -o - %s | FileCheck %s
+; RUN: llc -mtriple thumbv7-windows-msvc -filetype asm -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc i64 @stoi64(float %f) {
+entry:
+ %conv = fptosi float %f to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: stoi64
+; CHECK: bl __stoi64
+
+define arm_aapcs_vfpcc i64 @stou64(float %f) {
+entry:
+ %conv = fptoui float %f to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: stou64
+; CHECK: bl __stou64
+
+define arm_aapcs_vfpcc float @i64tos(i64 %i64) {
+entry:
+ %conv = sitofp i64 %i64 to float
+ ret float %conv
+}
+
+; CHECK-LABEL: i64tos
+; CHECK: bl __i64tos
+
+define arm_aapcs_vfpcc float @u64tos(i64 %u64) {
+entry:
+ %conv = uitofp i64 %u64 to float
+ ret float %conv
+}
+
+; CHECK-LABEL: u64tos
+; CHECK: bl __u64tos
+
+define arm_aapcs_vfpcc i64 @dtoi64(double %d) {
+entry:
+ %conv = fptosi double %d to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: dtoi64
+; CHECK: bl __dtoi64
+
+define arm_aapcs_vfpcc i64 @dtou64(double %d) {
+entry:
+ %conv = fptoui double %d to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: dtou64
+; CHECK: bl __dtou64
+
+define arm_aapcs_vfpcc double @i64tod(i64 %i64) {
+entry:
+ %conv = sitofp i64 %i64 to double
+ ret double %conv
+}
+
+; CHECK-LABEL: i64tod
+; CHECK: bl __i64tod
+
+define arm_aapcs_vfpcc double @u64tod(i64 %i64) {
+entry:
+ %conv = uitofp i64 %i64 to double
+ ret double %conv
+}
+
+; CHECK-LABEL: u64tod
+; CHECK: bl __u64tod
+
diff --git a/test/CodeGen/ARM/Windows/no-eabi.ll b/test/CodeGen/ARM/Windows/no-eabi.ll
new file mode 100644
index 000000000000..033ca0267ee0
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/no-eabi.ll
@@ -0,0 +1,10 @@
+; RUN: llc -O3 -mtriple thumbv7-windows %s -filetype asm -o - | FileCheck -check-prefix CHECK-NONEABI %s
+; RUN: llc -O3 -mtriple armv7--linux-gnueabi %s -filetype asm -o - | FileCheck -check-prefix CHECK-EABI %s
+
+define arm_aapcs_vfpcc void @function() {
+ ret void
+}
+
+; CHECK-EABI: .eabi_attribute
+; CHECK-NONEABI-NOT: .eabi_attribute
+
diff --git a/test/CodeGen/ARM/Windows/no-frame-register.ll b/test/CodeGen/ARM/Windows/no-frame-register.ll
new file mode 100644
index 000000000000..80187af7ef22
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/no-frame-register.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple thumbv7-windows -filetype asm -o - %s | FileCheck %s
+
+declare void @callee(i32)
+
+define i32 @calleer(i32 %i) {
+entry:
+ %i.addr = alloca i32, align 4
+ %j = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load i32, i32* %i.addr, align 4
+ %add = add nsw i32 %0, 1
+ store i32 %add, i32* %j, align 4
+ %1 = load i32, i32* %j, align 4
+ call void @callee(i32 %1)
+ %2 = load i32, i32* %j, align 4
+ %add1 = add nsw i32 %2, 1
+ ret i32 %add1
+}
+
+; CHECK-NOT: push.w {r7, lr}
+; CHECK: push.w {r11, lr}
+
diff --git a/test/CodeGen/ARM/Windows/overflow.ll b/test/CodeGen/ARM/Windows/overflow.ll
new file mode 100644
index 000000000000..5f74f25ac224
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/overflow.ll
@@ -0,0 +1,77 @@
+; RUN: llc -mtriple thumbv7-windows-gnu -filetype asm -o - %s
+
+define i32 @divsoverflow32(i32 %a, i32 %b) {
+ %1 = alloca i32, align 4
+ %2 = alloca i32, align 4
+ %3 = load i32, i32* %1, align 4
+ %4 = load i32, i32* %2, align 4
+ %5 = sub nsw i32 0, %4
+ %6 = sdiv i32 -2147483647, %3
+ %7 = icmp sgt i32 %5, %6
+ br i1 %7, label %8, label %9
+ call void (...) @abort_simpl32()
+ unreachable
+ %10 = load i32, i32* %1, align 4
+ %11 = load i32, i32* %2, align 4
+ %12 = mul nsw i32 %10, %11
+ ret i32 %12
+}
+
+declare void @abort_simpl32(...)
+
+define i64 @divsoverflow64(i64 %a, i64 %b) {
+ %1 = alloca i64, align 8
+ %2 = alloca i64, align 8
+ %3 = load i64, i64* %1, align 8
+ %4 = load i64, i64* %2, align 8
+ %5 = sub nsw i64 0, %4
+ %6 = sdiv i64 -9223372036854775808, %3
+ %7 = icmp sgt i64 %5, %6
+ br i1 %7, label %8, label %9
+ call void (...) @abort_simpl64()
+ unreachable
+ %10 = load i64, i64* %1, align 8
+ %11 = load i64, i64* %2, align 8
+ %12 = mul nsw i64 %10, %11
+ ret i64 %12
+}
+
+declare void @abort_simpl64(...)
+
+define i32 @divuoverflow32(i32 %a, i32 %b) {
+ %1 = alloca i32, align 4
+ %2 = alloca i32, align 4
+ %3 = load i32, i32* %1, align 4
+ %4 = load i32, i32* %2, align 4
+ %5 = sub nsw i32 0, %4
+ %6 = udiv i32 4294967296, %3
+ %7 = icmp sgt i32 %5, %6
+ br i1 %7, label %8, label %9
+ call void (...) @abort_uimpl32()
+ unreachable
+ %10 = load i32, i32* %1, align 4
+ %11 = load i32, i32* %2, align 4
+ %12 = mul nsw i32 %10, %11
+ ret i32 %12
+}
+
+declare void @abort_uimpl32(...)
+
+define i64 @divuoverflow64(i64 %a, i64 %b) {
+ %1 = alloca i64, align 8
+ %2 = alloca i64, align 8
+ %3 = load i64, i64* %1, align 8
+ %4 = load i64, i64* %2, align 8
+ %5 = sub nsw i64 0, %4
+ %6 = udiv i64 18446744073709551616, %3
+ %7 = icmp sgt i64 %5, %6
+ br i1 %7, label %8, label %9
+ call void (...) @abort_uimpl64()
+ unreachable
+ %10 = load i64, i64* %1, align 8
+ %11 = load i64, i64* %2, align 8
+ %12 = mul nsw i64 %10, %11
+ ret i64 %12
+}
+
+declare void @abort_uimpl64(...)
diff --git a/test/CodeGen/ARM/adv-copy-opt.ll b/test/CodeGen/ARM/adv-copy-opt.ll
index f71bf78b62c4..395be3457203 100644
--- a/test/CodeGen/ARM/adv-copy-opt.ll
+++ b/test/CodeGen/ARM/adv-copy-opt.ll
@@ -11,25 +11,25 @@
; r0 = r0 / r2
; r1 = r1 / r3
;
-; NOOPT: vmov [[B:d[0-9]+]], r2, r3
-; NOOPT-NEXT: vmov [[A:d[0-9]+]], r0, r1
+; NOOPT: vmov [[A:d[0-9]+]], r0, r1
+; NOOPT-NEXT: vmov [[B:d[0-9]+]], r2, r3
; Move the low part of B into a register.
; Unfortunately, we cannot express that the 's' register is the low
; part of B, i.e., sIdx == BIdx x 2. E.g., B = d1, B_low = s2.
; NOOPT-NEXT: vmov [[B_LOW:r[0-9]+]], s{{[0-9]+}}
-; NOOPT-NEXT: vmov [[A_LOW:r[0-9]+]], s{{[0-9]+}}
-; NOOPT-NEXT: udiv [[RES_LOW:r[0-9]+]], [[A_LOW]], [[B_LOW]]
; NOOPT-NEXT: vmov [[B_HIGH:r[0-9]+]], s{{[0-9]+}}
+; NOOPT-NEXT: vmov [[A_LOW:r[0-9]+]], s{{[0-9]+}}
; NOOPT-NEXT: vmov [[A_HIGH:r[0-9]+]], s{{[0-9]+}}
-; NOOPT-NEXT: udiv [[RES_HIGH:r[0-9]+]], [[A_HIGH]], [[B_HIGH]]
+; NOOPT-NEXT: udiv [[RES_LOW:r[0-9]+]], [[A_LOW]], [[B_LOW]]
; NOOPT-NEXT: vmov.32 [[RES:d[0-9]+]][0], [[RES_LOW]]
+; NOOPT-NEXT: udiv [[RES_HIGH:r[0-9]+]], [[A_HIGH]], [[B_HIGH]]
; NOOPT-NEXT: vmov.32 [[RES]][1], [[RES_HIGH]]
; NOOPT-NEXT: vmov r0, r1, [[RES]]
; NOOPT-NEXT: bx lr
;
; OPT-NOT: vmov
-; OPT: udiv r0, r0, r2
-; OPT-NEXT: udiv r1, r1, r3
+; OPT: udiv r1, r1, r3
+; OPT-NEXT: udiv r0, r0, r2
; OPT-NEXT: bx lr
define <2 x i32> @simpleVectorDiv(<2 x i32> %A, <2 x i32> %B) nounwind {
entry:
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index 04ca3e875487..665ffe902c81 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -2,34 +2,54 @@
; CHECK: .globl test
+; CHECK: .Lstructvar:
+; CHECK: .size .Lstructvar, 8
+
; CHECK: .globl foo1
; CHECK: foo1 = bar
+; CHECK-NOT: .size foo1
; CHECK: .globl foo2
; CHECK: foo2 = bar
+; CHECK-NOT: .size foo2
; CHECK: .weak bar_f
; CHECK: bar_f = foo_f
+; CHECK-NOT: .size bar_f
; CHECK: bar_i = bar
+; CHECK-NOT: .size bar_i
; CHECK: .globl A
; CHECK: A = bar
+; CHECK-NOT: .size A
+
+; CHECK: .globl elem0
+; CHECK: elem0 = .Lstructvar
+; CHECK: .size elem0, 4
+
+; CHECK: .globl elem1
+; CHECK: elem1 = .Lstructvar+4
+; CHECK: .size elem1, 4
@bar = global i32 42
-@foo1 = alias i32* @bar
-@foo2 = alias i32* @bar
+@foo1 = alias i32, i32* @bar
+@foo2 = alias i32, i32* @bar
%FunTy = type i32()
define i32 @foo_f() {
ret i32 0
}
-@bar_f = weak alias %FunTy* @foo_f
+@bar_f = weak alias %FunTy, %FunTy* @foo_f
+
+@bar_i = internal alias i32, i32* @bar
-@bar_i = internal alias i32* @bar
+@A = alias i64, bitcast (i32* @bar to i64*)
-@A = alias bitcast (i32* @bar to i64*)
+@structvar = private global {i32, i32} {i32 1, i32 2}
+@elem0 = alias i32, getelementptr({i32, i32}, {i32, i32}* @structvar, i32 0, i32 0)
+@elem1 = alias i32, getelementptr({i32, i32}, {i32, i32}* @structvar, i32 0, i32 1)
define i32 @test() {
entry:
diff --git a/test/CodeGen/ARM/align-sp-adjustment.ll b/test/CodeGen/ARM/align-sp-adjustment.ll
new file mode 100644
index 000000000000..cce7b03e2362
--- /dev/null
+++ b/test/CodeGen/ARM/align-sp-adjustment.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mtriple=thumbv7 -o - %s | FileCheck %s
+
+; CHECK: [sp, #2120]
+
+%struct.struct_2 = type { [172 x %struct.struct_1] }
+%struct.struct_1 = type { i32, i32, i32 }
+
+@.str = private unnamed_addr constant [2 x i8] c"a\00", align 1
+@.str.1 = private unnamed_addr constant [2 x i8] c"b\00", align 1
+@.str.2 = private unnamed_addr constant [2 x i8] c"c\00", align 1
+@.str.3 = private unnamed_addr constant [2 x i8] c"d\00", align 1
+
+declare i32* @_Z4bar3iiPKcS0_i(i32, i32, i8*, i8*, i32)
+declare void @_Z4bar1i8struct_2(i32, %struct.struct_2* byval align 4)
+declare i32 @_Z4bar2PiPKc(i32*, i8*)
+
+define void @_Z3fooiiiii(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) #0 {
+entry:
+ %params = alloca %struct.struct_2, align 4
+ %0 = bitcast %struct.struct_2* %params to i8*
+ br label %for.body
+
+for.body:
+ %i.015 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %call = tail call i32* @_Z4bar3iiPKcS0_i(i32 %p1, i32 %p5, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0), i32 %i.015) #4
+ %cmp1 = icmp eq i32* %call, null
+ br i1 %cmp1, label %cleanup.8, label %for.inc
+
+for.inc:
+ %call2 = tail call i32 @_Z4bar2PiPKc(i32* %call, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.2, i32 0, i32 0)) #4
+ %f1 = getelementptr inbounds %struct.struct_2, %struct.struct_2* %params, i32 0, i32 0, i32 %i.015, i32 0
+ store i32 %call2, i32* %f1, align 4
+ %call3 = tail call i32 @_Z4bar2PiPKc(i32* %call, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i32 0, i32 0)) #4
+ %f2 = getelementptr inbounds %struct.struct_2, %struct.struct_2* %params, i32 0, i32 0, i32 %i.015, i32 1
+ store i32 %call3, i32* %f2, align 4
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp slt i32 %inc, 4
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ call void @_Z4bar1i8struct_2(i32 %p4, %struct.struct_2* byval nonnull align 4 %params) #4
+ br label %cleanup.8
+
+cleanup.8:
+ ret void
+}
+
diff --git a/test/CodeGen/ARM/apcs-vfp.ll b/test/CodeGen/ARM/apcs-vfp.ll
new file mode 100644
index 000000000000..9157521bfbc4
--- /dev/null
+++ b/test/CodeGen/ARM/apcs-vfp.ll
@@ -0,0 +1,153 @@
+; RUN: llc -mtriple=armv7k-apple-watchos2.0 < %s | FileCheck %s
+
+define arm_aapcs_vfpcc float @t1(float %a, float %b) {
+entry:
+; CHECK: t1
+; CHECK-NOT: vmov
+; CHECK: vadd.f32
+ %a.addr = alloca float, align 4
+ %b.addr = alloca float, align 4
+ store float %a, float* %a.addr, align 4
+ store float %b, float* %b.addr, align 4
+ %0 = load float, float* %a.addr, align 4
+ %1 = load float, float* %b.addr, align 4
+ %add = fadd float %0, %1
+ ret float %add
+}
+
+define arm_aapcs_vfpcc double @t2(double %a, double %b) {
+entry:
+; CHECK: t2
+; CHECK-NOT: vmov
+; CHECK: vadd.f64
+ %a.addr = alloca double, align 8
+ %b.addr = alloca double, align 8
+ store double %a, double* %a.addr, align 8
+ store double %b, double* %b.addr, align 8
+ %0 = load double, double* %a.addr, align 8
+ %1 = load double, double* %b.addr, align 8
+ %add = fadd double %0, %1
+ ret double %add
+}
+
+define arm_aapcs_vfpcc i64 @t3(double %ti) {
+entry:
+; CHECK-LABEL: t3:
+; CHECK-NOT: vmov
+; CHECK: bl ___fixunsdfdi
+ %conv = fptoui double %ti to i64
+ ret i64 %conv
+}
+
+define arm_aapcs_vfpcc i64 @t4(double %ti) {
+entry:
+; CHECK-LABEL: t4:
+; CHECK-NOT: vmov
+; CHECK: bl ___fixdfdi
+ %conv = fptosi double %ti to i64
+ ret i64 %conv
+}
+
+define arm_aapcs_vfpcc double @t5(i64 %ti) {
+entry:
+; CHECK-LABEL: t5:
+; CHECK: bl ___floatundidf
+; CHECK-NOT: vmov
+; CHECK: pop
+ %conv = uitofp i64 %ti to double
+ ret double %conv
+}
+
+define arm_aapcs_vfpcc double @t6(i64 %ti) {
+entry:
+; CHECK-LABEL: t6:
+; CHECK: bl ___floatdidf
+; CHECK-NOT: vmov
+; CHECK: pop
+ %conv = sitofp i64 %ti to double
+ ret double %conv
+}
+
+define arm_aapcs_vfpcc float @t7(i64 %ti) {
+entry:
+; CHECK-LABEL: t7:
+; CHECK: bl ___floatundisf
+; CHECK-NOT: vmov
+; CHECK: pop
+ %conv = uitofp i64 %ti to float
+ ret float %conv
+}
+
+define arm_aapcs_vfpcc float @t8(i64 %ti) {
+entry:
+; CHECK-LABEL: t8:
+; CHECK: bl ___floatdisf
+; CHECK-NOT: vmov
+; CHECK: pop
+ %conv = sitofp i64 %ti to float
+ ret float %conv
+}
+
+define arm_aapcs_vfpcc double @t9(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %a, float %b) {
+entry:
+; CHECK-LABEL: t9:
+; CHECK-NOT: vmov
+; CHECK: vldr
+ %add = fadd float %a, %b
+ %conv = fpext float %add to double
+ ret double %conv
+}
+
+define arm_aapcs_vfpcc double @t10(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %a, float %b, double %c) {
+entry:
+; CHECK-LABEL: t10:
+; CHECK-NOT: vmov
+; CHECK: vldr
+ %add = fadd double %a, %c
+ ret double %add
+}
+
+define arm_aapcs_vfpcc float @t11(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, float %a, double %b, float %c) {
+entry:
+; CHECK-LABEL: t11:
+; CHECK: vldr
+ %add = fadd float %a, %c
+ ret float %add
+}
+
+define arm_aapcs_vfpcc double @t12(double %a, double %b) {
+entry:
+; CHECK-LABEL: t12:
+; CHECK: vstr
+ %add = fadd double %a, %b
+ %sub = fsub double %a, %b
+ %call = tail call arm_aapcs_vfpcc double @x(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double %add, float 0.000000e+00, double %sub)
+ ret double %call
+}
+
+define arm_aapcs_vfpcc double @t13(double %x) {
+entry:
+; CHECK-LABEL: t13:
+; CHECK-NOT: vmov
+; CHECK: bl ___sincos_stret
+ %call = tail call arm_aapcs_vfpcc double @cos(double %x)
+ %call1 = tail call arm_aapcs_vfpcc double @sin(double %x)
+ %mul = fmul double %call, %call1
+ ret double %mul
+}
+
+define arm_aapcs_vfpcc double @t14(double %x) {
+; CHECK-LABEL: t14:
+; CHECK-NOT: vmov
+; CHECK: b ___exp10
+ %__exp10 = tail call double @__exp10(double %x) #1
+ ret double %__exp10
+}
+
+declare arm_aapcs_vfpcc double @x(double, double, double, double, double, double, double, float, double)
+declare arm_aapcs_vfpcc double @cos(double) #0
+declare arm_aapcs_vfpcc double @sin(double) #0
+declare double @__exp10(double)
+
+attributes #0 = { readnone }
+attributes #1 = { readonly }
diff --git a/test/CodeGen/ARM/arm-eabi.ll b/test/CodeGen/ARM/arm-eabi.ll
new file mode 100644
index 000000000000..d1e7a947553f
--- /dev/null
+++ b/test/CodeGen/ARM/arm-eabi.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-eabi -meabi=gnu -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-eabihf -meabi=gnu -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-androideabi -meabi=gnu -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-gnueabi -meabi=gnu -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-gnueabihf -meabi=gnu -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-eabi -meabi=4 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-eabihf -meabi=4 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-androideabi -meabi=4 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-gnueabi -meabi=4 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-gnueabihf -meabi=4 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-eabi -meabi=5 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-eabihf -meabi=5 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-androideabi -meabi=5 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-gnueabi -meabi=5 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-gnueabihf -meabi=5 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+
+%struct.my_s = type { [18 x i32] }
+
+define void @foo(i32* %t) {
+ ; CHECK-LABEL: foo
+
+ %1 = alloca i32*, align 4
+ store i32* %t, i32** %1, align 4
+ %2 = load i32*, i32** %1, align 4
+ %3 = bitcast i32* %2 to %struct.my_s*
+ %4 = bitcast %struct.my_s* %3 to i8*
+ ; CHECK-EABI: bl __aeabi_memcpy
+ ; CHECK-GNUEABI: bl memcpy
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %4, i8* inttoptr (i32 1 to i8*), i32 72, i32 4, i1 false)
+ ret void
+}
+
+define void @f1(i8* %dest, i8* %src) {
+entry:
+ ; CHECK-LABEL: f1
+
+ ; memmove
+ ; CHECK-EABI: bl __aeabi_memmove
+ ; CHECK-GNUEABI: bl memmove
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
+
+ ; memcpy
+ ; CHECK-EABI: bl __aeabi_memcpy
+ ; CHECK-GNUEABI: bl memcpy
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
+
+ ; memset
+ ; CHECK-EABI: mov r2, #1
+ ; CHECK-EABI: bl __aeabi_memset
+ ; CHECK-GNUEABI: mov r1, #1
+ ; CHECK-GNUEABI: bl memset
+ call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false)
+ ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/arm-interleaved-accesses.ll b/test/CodeGen/ARM/arm-interleaved-accesses.ll
index 9a9885ccdd0c..002e71f6d9b8 100644
--- a/test/CodeGen/ARM/arm-interleaved-accesses.ll
+++ b/test/CodeGen/ARM/arm-interleaved-accesses.ll
@@ -1,7 +1,10 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON
+; RUN: llc -mtriple=arm-eabi -mattr=-neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NONEON
-; CHECK-LABEL: load_factor2:
-; CHECK: vld2.8 {d16, d17}, [r0]
+; NEON-LABEL: load_factor2:
+; NEON: vld2.8 {d16, d17}, [r0]
+; NONEON-LABEL: load_factor2:
+; NONEON-NOT: vld2
define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
%wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
%strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -10,8 +13,10 @@ define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
ret <8 x i8> %add
}
-; CHECK-LABEL: load_factor3:
-; CHECK: vld3.32 {d16, d17, d18}, [r0]
+; NEON-LABEL: load_factor3:
+; NEON: vld3.32 {d16, d17, d18}, [r0]
+; NONEON-LABEL: load_factor3:
+; NONEON-NOT: vld3
define <2 x i32> @load_factor3(i32* %ptr) {
%base = bitcast i32* %ptr to <6 x i32>*
%wide.vec = load <6 x i32>, <6 x i32>* %base, align 4
@@ -21,9 +26,11 @@ define <2 x i32> @load_factor3(i32* %ptr) {
ret <2 x i32> %add
}
-; CHECK-LABEL: load_factor4:
-; CHECK: vld4.32 {d16, d18, d20, d22}, [r0]!
-; CHECK: vld4.32 {d17, d19, d21, d23}, [r0]
+; NEON-LABEL: load_factor4:
+; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
+; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
+; NONEON-LABEL: load_factor4:
+; NONEON-NOT: vld4
define <4 x i32> @load_factor4(i32* %ptr) {
%base = bitcast i32* %ptr to <16 x i32>*
%wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
@@ -33,17 +40,21 @@ define <4 x i32> @load_factor4(i32* %ptr) {
ret <4 x i32> %add
}
-; CHECK-LABEL: store_factor2:
-; CHECK: vst2.8 {d16, d17}, [r0]
+; NEON-LABEL: store_factor2:
+; NEON: vst2.8 {d16, d17}, [r0]
+; NONEON-LABEL: store_factor2:
+; NONEON-NOT: vst2
define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
%interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
ret void
}
-; CHECK-LABEL: store_factor3:
-; CHECK: vst3.32 {d16, d18, d20}, [r0]!
-; CHECK: vst3.32 {d17, d19, d21}, [r0]
+; NEON-LABEL: store_factor3:
+; NEON: vst3.32 {d16, d18, d20}, [r0]!
+; NEON: vst3.32 {d17, d19, d21}, [r0]
+; NONEON-LABEL: store_factor3:
+; NONEON-NOT: vst3.32
define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
%base = bitcast i32* %ptr to <12 x i32>*
%v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -53,9 +64,11 @@ define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v
ret void
}
-; CHECK-LABEL: store_factor4:
-; CHECK: vst4.32 {d16, d18, d20, d22}, [r0]!
-; CHECK: vst4.32 {d17, d19, d21, d23}, [r0]
+; NEON-LABEL: store_factor4:
+; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
+; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
+; NONEON-LABEL: store_factor4:
+; NONEON-NOT: vst4
define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
%base = bitcast i32* %ptr to <16 x i32>*
%v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -68,8 +81,10 @@ define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v
; The following cases test that interleaved access of pointer vectors can be
; matched to ldN/stN instruction.
-; CHECK-LABEL: load_ptrvec_factor2:
-; CHECK: vld2.32 {d16, d17}, [r0]
+; NEON-LABEL: load_ptrvec_factor2:
+; NEON: vld2.32 {d16, d17}, [r0]
+; NONEON-LABEL: load_ptrvec_factor2:
+; NONEON-NOT: vld2
define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
%base = bitcast i32** %ptr to <4 x i32*>*
%wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
@@ -77,8 +92,10 @@ define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
ret <2 x i32*> %strided.v0
}
-; CHECK-LABEL: load_ptrvec_factor3:
-; CHECK: vld3.32 {d16, d17, d18}, [r0]
+; NEON-LABEL: load_ptrvec_factor3:
+; NEON: vld3.32 {d16, d17, d18}, [r0]
+; NONEON-LABEL: load_ptrvec_factor3:
+; NONEON-NOT: vld3
define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
%base = bitcast i32** %ptr to <6 x i32*>*
%wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
@@ -89,8 +106,10 @@ define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr
ret void
}
-; CHECK-LABEL: load_ptrvec_factor4:
-; CHECK: vld4.32 {d16, d17, d18, d19}, [r0]
+; NEON-LABEL: load_ptrvec_factor4:
+; NEON: vld4.32 {d16, d17, d18, d19}, [r0]
+; NONEON-LABEL: load_ptrvec_factor4:
+; NONEON-NOT: vld4
define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
%base = bitcast i32** %ptr to <8 x i32*>*
%wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
@@ -101,8 +120,10 @@ define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr
ret void
}
-; CHECK-LABEL: store_ptrvec_factor2:
-; CHECK: vst2.32 {d16, d17}, [r0]
+; NEON-LABEL: store_ptrvec_factor2:
+; NEON: vst2.32 {d16, d17}, [r0]
+; NONEON-LABEL: store_ptrvec_factor2:
+; NONEON-NOT: vst2
define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
%base = bitcast i32** %ptr to <4 x i32*>*
%interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -110,8 +131,10 @@ define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
ret void
}
-; CHECK-LABEL: store_ptrvec_factor3:
-; CHECK: vst3.32 {d16, d17, d18}, [r0]
+; NEON-LABEL: store_ptrvec_factor3:
+; NEON: vst3.32 {d16, d17, d18}, [r0]
+; NONEON-LABEL: store_ptrvec_factor3:
+; NONEON-NOT: vst3
define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
%base = bitcast i32** %ptr to <6 x i32*>*
%v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -121,8 +144,10 @@ define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2
ret void
}
-; CHECK-LABEL: store_ptrvec_factor4:
-; CHECK: vst4.32 {d16, d17, d18, d19}, [r0]
+; NEON-LABEL: store_ptrvec_factor4:
+; NEON: vst4.32 {d16, d17, d18, d19}, [r0]
+; NONEON-LABEL: store_ptrvec_factor4:
+; NONEON-NOT: vst4
define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
%base = bitcast i32* %ptr to <8 x i32*>*
%v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -135,8 +160,10 @@ define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2
; Following cases check that shuffle maskes with undef indices can be matched
; into ldN/stN instruction.
-; CHECK-LABEL: load_undef_mask_factor2:
-; CHECK: vld2.32 {d16, d17, d18, d19}, [r0]
+; NEON-LABEL: load_undef_mask_factor2:
+; NEON: vld2.32 {d16, d17, d18, d19}, [r0]
+; NONEON-LABEL: load_undef_mask_factor2:
+; NONEON-NOT: vld2
define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
%base = bitcast i32* %ptr to <8 x i32>*
%wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
@@ -146,9 +173,11 @@ define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
ret <4 x i32> %add
}
-; CHECK-LABEL: load_undef_mask_factor3:
-; CHECK: vld3.32 {d16, d18, d20}, [r0]!
-; CHECK: vld3.32 {d17, d19, d21}, [r0]
+; NEON-LABEL: load_undef_mask_factor3:
+; NEON: vld3.32 {d16, d18, d20}, [r0]!
+; NEON: vld3.32 {d17, d19, d21}, [r0]
+; NONEON-LABEL: load_undef_mask_factor3:
+; NONEON-NOT: vld3
define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
%base = bitcast i32* %ptr to <12 x i32>*
%wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
@@ -158,9 +187,11 @@ define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
ret <4 x i32> %add
}
-; CHECK-LABEL: load_undef_mask_factor4:
-; CHECK: vld4.32 {d16, d18, d20, d22}, [r0]!
-; CHECK: vld4.32 {d17, d19, d21, d23}, [r0]
+; NEON-LABEL: load_undef_mask_factor4:
+; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
+; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
+; NONEON-LABEL: load_undef_mask_factor4:
+; NONEON-NOT: vld4
define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
%base = bitcast i32* %ptr to <16 x i32>*
%wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
@@ -170,8 +201,10 @@ define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
ret <4 x i32> %add
}
-; CHECK-LABEL: store_undef_mask_factor2:
-; CHECK: vst2.32 {d16, d17, d18, d19}, [r0]
+; NEON-LABEL: store_undef_mask_factor2:
+; NEON: vst2.32 {d16, d17, d18, d19}, [r0]
+; NONEON-LABEL: store_undef_mask_factor2:
+; NONEON-NOT: vst2
define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
%base = bitcast i32* %ptr to <8 x i32>*
%interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
@@ -179,9 +212,11 @@ define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
ret void
}
-; CHECK-LABEL: store_undef_mask_factor3:
-; CHECK: vst3.32 {d16, d18, d20}, [r0]!
-; CHECK: vst3.32 {d17, d19, d21}, [r0]
+; NEON-LABEL: store_undef_mask_factor3:
+; NEON: vst3.32 {d16, d18, d20}, [r0]!
+; NEON: vst3.32 {d17, d19, d21}, [r0]
+; NONEON-LABEL: store_undef_mask_factor3:
+; NONEON-NOT: vst3
define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
%base = bitcast i32* %ptr to <12 x i32>*
%v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -191,9 +226,11 @@ define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <
ret void
}
-; CHECK-LABEL: store_undef_mask_factor4:
-; CHECK: vst4.32 {d16, d18, d20, d22}, [r0]!
-; CHECK: vst4.32 {d17, d19, d21, d23}, [r0]
+; NEON-LABEL: store_undef_mask_factor4:
+; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
+; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
+; NONEON-LABEL: store_undef_mask_factor4:
+; NONEON-NOT: vst4
define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
%base = bitcast i32* %ptr to <16 x i32>*
%v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -202,3 +239,68 @@ define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <
store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
ret void
}
+
+; The following test cases check that address spaces are properly handled
+
+; NEON-LABEL: load_address_space
+; NEON: vld3.32
+; NONEON-LABEL: load_address_space
+; NONEON-NOT: vld3
+define void @load_address_space(<4 x i32> addrspace(1)* %A, <2 x i32>* %B) {
+ %tmp = load <4 x i32>, <4 x i32> addrspace(1)* %A
+ %interleaved = shufflevector <4 x i32> %tmp, <4 x i32> undef, <2 x i32> <i32 0, i32 3>
+ store <2 x i32> %interleaved, <2 x i32>* %B
+ ret void
+}
+
+; NEON-LABEL: store_address_space
+; NEON: vst2.32
+; NONEON-LABEL: store_address_space
+; NONEON-NOT: vst2
+define void @store_address_space(<2 x i32>* %A, <2 x i32>* %B, <4 x i32> addrspace(1)* %C) {
+ %tmp0 = load <2 x i32>, <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
+ %interleaved = shufflevector <2 x i32> %tmp0, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+ store <4 x i32> %interleaved, <4 x i32> addrspace(1)* %C
+ ret void
+}
+
+; Check that we do something sane with illegal types.
+
+; NEON-LABEL: load_illegal_factor2:
+; NEON: BB#0:
+; NEON-NEXT: vld1.64 {d16, d17}, [r0:128]
+; NEON-NEXT: vuzp.32 q8, {{.*}}
+; NEON-NEXT: vmov r0, r1, d16
+; NEON-NEXT: vmov r2, r3, {{.*}}
+; NEON-NEXT: mov pc, lr
+; NONEON-LABEL: load_illegal_factor2:
+; NONEON: BB#0:
+; NONEON-NEXT: ldr [[ELT0:r[0-9]+]], [r0]
+; NONEON-NEXT: ldr r1, [r0, #8]
+; NONEON-NEXT: mov r0, [[ELT0]]
+; NONEON-NEXT: mov pc, lr
+define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind {
+ %tmp1 = load <3 x float>, <3 x float>* %p, align 16
+ %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+ ret <3 x float> %tmp2
+}
+
+; This lowering isn't great, but it's at least correct.
+
+; NEON-LABEL: store_illegal_factor2:
+; NEON: BB#0:
+; NEON-NEXT: vldr d17, [sp]
+; NEON-NEXT: vmov d16, r2, r3
+; NEON-NEXT: vuzp.32 q8, {{.*}}
+; NEON-NEXT: vstr d16, [r0]
+; NEON-NEXT: mov pc, lr
+; NONEON-LABEL: store_illegal_factor2:
+; NONEON: BB#0:
+; NONEON-NEXT: stm r0, {r1, r3}
+; NONEON-NEXT: mov pc, lr
+define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
+ %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+ store <3 x float> %tmp1, <3 x float>* %p, align 16
+ ret void
+}
diff --git a/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll b/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
new file mode 100644
index 000000000000..1434f40137b5
--- /dev/null
+++ b/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
@@ -0,0 +1,142 @@
+; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+; We cannot merge this test with the main test for shrink-wrapping, because
+; the code path we want to exerce is not taken with ios lowering.
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
+target triple = "armv7--linux-gnueabi"
+
+@skip = internal unnamed_addr constant [2 x i8] c"\01\01", align 1
+
+; Check that we do not restore the before having used the saved CSRs.
+; This happened because of a bad use of the post-dominance property.
+; The exit block of the loop happens to also lead to defs/uses of CSRs.
+; It also post-dominates the loop body and we use to generate invalid
+; restore sequence. I.e., we restored too early.
+;
+; CHECK-LABEL: wrongUseOfPostDominate:
+;
+; The prologue is the first thing happening in the function
+; without shrink-wrapping.
+; DISABLE: push
+;
+; CHECK: cmp r1, #0
+;
+; With shrink-wrapping, we branch to a pre-header, where the prologue
+; is located.
+; ENABLE-NEXT: blt [[LOOP_PREHEADER:[.a-zA-Z0-9_]+]]
+; Without shrink-wrapping, we go straight into the loop.
+; DISABLE-NEXT: blt [[LOOP_HEADER:[.a-zA-Z0-9_]+]]
+;
+; CHECK: @ %if.end29
+; DISABLE-NEXT: pop
+; ENABLE-NEXT: bx lr
+;
+; ENABLE: [[LOOP_PREHEADER]]
+; ENABLE: push
+; We must not find a pop here, otherwise that means we are in the loop
+; and are restoring before using the saved CSRs.
+; ENABLE-NOT: pop
+; ENALBE-NEXT: [[LOOP_HEADER:[.a-zA-Z0-9_]+]]: @ %while.cond2.outer
+;
+; DISABLE: [[LOOP_HEADER]]: @ %while.cond2.outer
+;
+; ENABLE-NOT: pop
+;
+; CHECK: @ %while.cond2
+; CHECK: add
+; CHECK-NEXT: cmp r{{[0-1]+}}, #1
+; Set the return value.
+; CHECK-NEXT: moveq r0,
+; CHECK-NEXT: popeq
+;
+; Use the back edge to check we get the label of the loop right.
+; This is to make sure we check the right loop pattern.
+; CHECK: @ %while.body24.land.rhs14_crit_edge
+; CHECK: cmp r{{[0-9]+}}, #192
+; CHECK-NEXT bhs [[LOOP_HEADER]]
+define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnone %lim) {
+entry:
+ %cmp = icmp sgt i32 %off, -1
+ br i1 %cmp, label %while.cond.preheader, label %while.cond2.outer
+
+while.cond.preheader: ; preds = %entry
+ %tobool4 = icmp ne i32 %off, 0
+ %cmp15 = icmp ult i8* %s, %lim
+ %sel66 = and i1 %tobool4, %cmp15
+ br i1 %sel66, label %while.body, label %if.end29
+
+while.body: ; preds = %while.body, %while.cond.preheader
+ %s.addr.08 = phi i8* [ %add.ptr, %while.body ], [ %s, %while.cond.preheader ]
+ %off.addr.07 = phi i32 [ %dec, %while.body ], [ %off, %while.cond.preheader ]
+ %dec = add nsw i32 %off.addr.07, -1
+ %tmp = load i8, i8* %s.addr.08, align 1, !tbaa !2
+ %idxprom = zext i8 %tmp to i32
+ %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* @skip, i32 0, i32 %idxprom
+ %tmp1 = load i8, i8* %arrayidx, align 1, !tbaa !2
+ %conv = zext i8 %tmp1 to i32
+ %add.ptr = getelementptr inbounds i8, i8* %s.addr.08, i32 %conv
+ %tobool = icmp ne i32 %off.addr.07, 1
+ %cmp1 = icmp ult i8* %add.ptr, %lim
+ %sel6 = and i1 %tobool, %cmp1
+ br i1 %sel6, label %while.body, label %if.end29
+
+while.cond2.outer: ; preds = %while.body24.land.rhs14_crit_edge, %while.body24, %land.rhs14.preheader, %if.then7, %entry
+ %off.addr.1.ph = phi i32 [ %off, %entry ], [ %inc, %land.rhs14.preheader ], [ %inc, %if.then7 ], [ %inc, %while.body24.land.rhs14_crit_edge ], [ %inc, %while.body24 ]
+ %s.addr.1.ph = phi i8* [ %s, %entry ], [ %incdec.ptr, %land.rhs14.preheader ], [ %incdec.ptr, %if.then7 ], [ %lsr.iv, %while.body24.land.rhs14_crit_edge ], [ %lsr.iv, %while.body24 ]
+ br label %while.cond2
+
+while.cond2: ; preds = %while.body4, %while.cond2.outer
+ %off.addr.1 = phi i32 [ %inc, %while.body4 ], [ %off.addr.1.ph, %while.cond2.outer ]
+ %inc = add nsw i32 %off.addr.1, 1
+ %tobool3 = icmp eq i32 %off.addr.1, 0
+ br i1 %tobool3, label %if.end29, label %while.body4
+
+while.body4: ; preds = %while.cond2
+ %tmp2 = icmp ugt i8* %s.addr.1.ph, %lim
+ br i1 %tmp2, label %if.then7, label %while.cond2
+
+if.then7: ; preds = %while.body4
+ %incdec.ptr = getelementptr inbounds i8, i8* %s.addr.1.ph, i32 -1
+ %tmp3 = load i8, i8* %incdec.ptr, align 1, !tbaa !2
+ %conv1525 = zext i8 %tmp3 to i32
+ %tobool9 = icmp slt i8 %tmp3, 0
+ %cmp129 = icmp ugt i8* %incdec.ptr, %lim
+ %or.cond13 = and i1 %tobool9, %cmp129
+ br i1 %or.cond13, label %land.rhs14.preheader, label %while.cond2.outer
+
+land.rhs14.preheader: ; preds = %if.then7
+ %cmp1624 = icmp slt i8 %tmp3, 0
+ %cmp2026 = icmp ult i32 %conv1525, 192
+ %or.cond27 = and i1 %cmp1624, %cmp2026
+ br i1 %or.cond27, label %while.body24.preheader, label %while.cond2.outer
+
+while.body24.preheader: ; preds = %land.rhs14.preheader
+ %scevgep = getelementptr i8, i8* %s.addr.1.ph, i32 -2
+ br label %while.body24
+
+while.body24: ; preds = %while.body24.land.rhs14_crit_edge, %while.body24.preheader
+ %lsr.iv = phi i8* [ %scevgep, %while.body24.preheader ], [ %scevgep34, %while.body24.land.rhs14_crit_edge ]
+ %cmp12 = icmp ugt i8* %lsr.iv, %lim
+ br i1 %cmp12, label %while.body24.land.rhs14_crit_edge, label %while.cond2.outer
+
+while.body24.land.rhs14_crit_edge: ; preds = %while.body24
+ %.pre = load i8, i8* %lsr.iv, align 1, !tbaa !2
+ %cmp16 = icmp slt i8 %.pre, 0
+ %conv15 = zext i8 %.pre to i32
+ %cmp20 = icmp ult i32 %conv15, 192
+ %or.cond = and i1 %cmp16, %cmp20
+ %scevgep34 = getelementptr i8, i8* %lsr.iv, i32 -1
+ br i1 %or.cond, label %while.body24, label %while.cond2.outer
+
+if.end29: ; preds = %while.cond2, %while.body, %while.cond.preheader
+ %s.addr.3 = phi i8* [ %s, %while.cond.preheader ], [ %add.ptr, %while.body ], [ %s.addr.1.ph, %while.cond2 ]
+ ret i8* %s.addr.3
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/arm-shrink-wrapping.ll b/test/CodeGen/ARM/arm-shrink-wrapping.ll
new file mode 100644
index 000000000000..9375df4b15cb
--- /dev/null
+++ b/test/CodeGen/ARM/arm-shrink-wrapping.ll
@@ -0,0 +1,683 @@
+; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=ENABLE --check-prefix=ARM-ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=DISABLE --check-prefix=ARM-DISABLE
+; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=ENABLE --check-prefix=THUMB-ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=DISABLE --check-prefix=THUMB-DISABLE
+
+;
+; Note: Lots of tests use inline asm instead of regular calls.
+; This allows to have a better control on what the allocation will do.
+; Otherwise, we may have spill right in the entry block, defeating
+; shrink-wrapping. Moreover, some of the inline asm statements (nop)
+; are here to ensure that the related paths do not end up as critical
+; edges.
+; Also disable the late if-converter as it makes harder to reason on
+; the diffs.
+
+; Initial motivating example: Simple diamond with a call just on one side.
+; CHECK-LABEL: foo:
+;
+; Compare the arguments and jump to exit.
+; No prologue needed.
+; ENABLE: cmp r0, r1
+; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: push {r7, lr}
+; CHECK-NEXT: mov r7, sp
+;;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; DISABLE: sub sp
+; DISABLE: cmp r0, r1
+; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Store %a in the alloca.
+; ARM-ENABLE: push {r0}
+; THUMB-ENABLE: str r0, [sp, #-4]
+; DISABLE: str r0, [sp]
+; Set the alloca address in the second argument.
+; CHECK-NEXT: mov r1, sp
+; Set the first argument to zero.
+; CHECK-NEXT: mov{{s?}} r0, #0
+; CHECK-NEXT: bl{{x?}} _doSomething
+;
+; With shrink-wrapping, epilogue is just after the call.
+; ARM-ENABLE-NEXT: mov sp, r7
+; THUMB-ENABLE-NEXT: add sp, #4
+; ENABLE-NEXT: pop{{(\.w)?}} {r7, lr}
+;
+; CHECK: [[EXIT_LABEL]]:
+;
+; Without shrink-wrapping, epilogue is in the exit block.
+; Epilogue code. (What we pop does not matter.)
+; ARM-DISABLE: mov sp, r7
+; THUMB-DISABLE: add sp,
+; DISABLE-NEXT: pop {r7, pc}
+;
+; ENABLE-NEXT: bx lr
+define i32 @foo(i32 %a, i32 %b) {
+ %tmp = alloca i32, align 4
+ %tmp2 = icmp slt i32 %a, %b
+ br i1 %tmp2, label %true, label %false
+
+true:
+ store i32 %a, i32* %tmp, align 4
+ %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+ br label %false
+
+false:
+ %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+ ret i32 %tmp.0
+}
+
+; Function Attrs: optsize
+declare i32 @doSomething(i32, i32*)
+
+
+; Check that we do not perform the restore inside the loop whereas the save
+; is outside.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
+;
+; Shrink-wrapping allows to skip the prologue in the else case.
+; ARM-ENABLE: cmp r0, #0
+; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, r7, lr}
+; CHECK-NEXT: add r7, sp, #4
+;
+; ARM-DISABLE: cmp r0, #0
+; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; SUM is in r0 because it is coalesced with the second
+; argument on the else path.
+; CHECK: mov{{s?}} [[SUM:r0]], #0
+; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
+; ARM: subs [[IV]], [[IV]], #1
+; THUMB: subs [[IV]], #1
+; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
+; THUMB-NEXT: add [[SUM]], [[TMP]]
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; SUM << 3.
+; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3
+; ENABLE-NEXT: pop {r4, r7, pc}
+;
+; Duplicated epilogue.
+; DISABLE: pop {r4, r7, pc}
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsl{{s?}} r0, r1, #1
+; DISABLE-NEXT: pop {r4, r7, pc}
+;
+; ENABLE-NEXT: bx lr
+define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+ %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+declare i32 @something(...)
+
+; Check that we do not perform the shrink-wrapping inside the loop even
+; though that would be legal. The cost model must prevent that.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4
+; CHECK: mov{{s?}} [[SUM:r0]], #0
+; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
+; CHECK: nop
+; Next BB.
+; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body
+; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
+; ARM: subs [[IV]], [[IV]], #1
+; THUMB: subs [[IV]], #1
+; ARM: add [[SUM]], [[TMP]], [[SUM]]
+; THUMB: add [[SUM]], [[TMP]]
+; CHECK-NEXT: bne [[LOOP_LABEL]]
+; Next BB.
+; CHECK: @ %for.exit
+; CHECK: nop
+; CHECK: pop {r4
+define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
+entry:
+ br label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
+ %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
+ %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
+ %add = add nsw i32 %call, %sum.03
+ %inc = add nuw nsw i32 %i.04, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+ tail call void asm "nop", ""()
+ br label %for.end
+
+for.end: ; preds = %for.body
+ ret i32 %add
+}
+
+; Check with a more complex case that we do not have save within the loop and
+; restore outside.
+; CHECK-LABEL: loopInfoSaveOutsideLoop:
+;
+; ARM-ENABLE: cmp r0, #0
+; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, r7, lr}
+; CHECK-NEXT: add r7, sp, #4
+;
+; ARM-DISABLE: cmp r0, #0
+; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; SUM is in r0 because it is coalesced with the second
+; argument on the else path.
+; CHECK: mov{{s?}} [[SUM:r0]], #0
+; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
+; ARM: subs [[IV]], [[IV]], #1
+; THUMB: subs [[IV]], #1
+; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
+; THUMB-NEXT: add [[SUM]], [[TMP]]
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; SUM << 3.
+; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3
+; ENABLE: pop {r4, r7, pc}
+;
+; Duplicated epilogue.
+; DISABLE: pop {r4, r7, pc}
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsl{{s?}} r0, r1, #1
+; DISABLE-NEXT: pop {r4, r7, pc}
+;
+; ENABLE-NEXT: bx lr
+define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+ %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ tail call void asm "nop", "~{r4}"()
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+declare void @somethingElse(...)
+
+; Check with a more complex case that we do not have restore within the loop and
+; save outside.
+; CHECK-LABEL: loopInfoRestoreOutsideLoop:
+;
+; ARM-ENABLE: cmp r0, #0
+; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, r7, lr}
+; CHECK-NEXT: add r7, sp, #4
+;
+; ARM-DISABLE: cmp r0, #0
+; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; SUM is in r0 because it is coalesced with the second
+; argument on the else path.
+; CHECK: mov{{s?}} [[SUM:r0]], #0
+; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
+; ARM: subs [[IV]], [[IV]], #1
+; THUMB: subs [[IV]], #1
+; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
+; THUMB-NEXT: add [[SUM]], [[TMP]]
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; SUM << 3.
+; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3
+; ENABLE-NEXT: pop {r4, r7, pc}
+;
+; Duplicated epilogue.
+; DISABLE: pop {r4, r7, pc}
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsl{{s?}} r0, r1, #1
+; DISABLE-NEXT: pop {r4, r7, pc}
+;
+; ENABLE-NEXT: bx lr
+define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ tail call void asm "nop", "~{r4}"()
+ br label %for.body
+
+for.body: ; preds = %for.body, %if.then
+ %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
+ %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+ %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+; Check that we handle function with no frame information correctly.
+; CHECK-LABEL: emptyFrame:
+; CHECK: @ %entry
+; CHECK-NEXT: mov{{s?}} r0, #0
+; CHECK-NEXT: bx lr
+define i32 @emptyFrame() {
+entry:
+ ret i32 0
+}
+
+; Check that we handle inline asm correctly.
+; CHECK-LABEL: inlineAsm:
+;
+; ARM-ENABLE: cmp r0, #0
+; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, r7, lr}
+; CHECK-NEXT: add r7, sp, #4
+;
+; ARM-DISABLE: cmp r0, #0
+; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: mov{{s?}} [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; ARM: subs [[IV]], [[IV]], #1
+; THUMB: subs [[IV]], #1
+; CHECK: add{{(\.w)?}} r4, r4, #1
+; CHECK: bne [[LOOP]]
+;
+; Next BB.
+; CHECK: mov{{s?}} r0, #0
+;
+; Duplicated epilogue.
+; DISABLE: pop {r4, r7, pc}
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsl{{s?}} r0, r1, #1
+; DISABLE-NEXT: pop {r4, r7, pc}
+;
+; ENABLE-NEXT: bx lr
+define i32 @inlineAsm(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ tail call void asm sideeffect "add r4, #1", "~{r4}"()
+ %inc = add nuw nsw i32 %i.03, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+ tail call void asm "nop", ""()
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %for.body, %if.else
+ %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
+ ret i32 %sum.0
+}
+
+; Check that we handle calls to variadic functions correctly.
+; CHECK-LABEL: callVariadicFunc:
+;
+; ARM-ENABLE: cmp r0, #0
+; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: push {r7, lr}
+; CHECK-NEXT: mov r7, sp
+; CHECK-NEXT: sub sp, {{(sp, )?}}#12
+;
+; ARM-DISABLE: cmp r0, #0
+; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-DISABLE-NEXT: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Setup of the varags.
+; CHECK: mov r0, r1
+; CHECK-NEXT: mov r2, r1
+; CHECK-NEXT: mov r3, r1
+; ARM-NEXT: str r1, [sp]
+; ARM-NEXT: str r1, [sp, #4]
+; THUMB-NEXT: strd r1, r1, [sp]
+; CHECK-NEXT: str r1, [sp, #8]
+; CHECK-NEXT: bl{{x?}} _someVariadicFunc
+; CHECK-NEXT: lsl{{s?}} r0, r0, #3
+; ARM-NEXT: mov sp, r7
+; THUMB-NEXT: add sp, #12
+; CHECK-NEXT: pop {r7, pc}
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsl{{s?}} r0, r1, #1
+;
+; Epilogue code.
+; ENABLE-NEXT: bx lr
+;
+; ARM-DISABLE-NEXT: mov sp, r7
+; THUMB-DISABLE-NEXT: add sp, #12
+; DISABLE-NEXT: pop {r7, pc}
+define i32 @callVariadicFunc(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
+ %shl = shl i32 %call, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
+ ret i32 %sum.0
+}
+
+declare i32 @someVariadicFunc(i32, ...)
+
+; Make sure we do not insert unreachable code after noreturn function.
+; Although this is not incorrect to insert such code, it is useless
+; and it hurts the binary size.
+;
+; CHECK-LABEL: noreturn:
+; DISABLE: push
+;
+; CHECK: tst{{(\.w)?}} r0, #255
+; CHECK-NEXT: bne [[ABORT:LBB[0-9_]+]]
+;
+; CHECK: mov{{s?}} r0, #42
+;
+; ENABLE-NEXT: bx lr
+;
+; DISABLE-NEXT: pop
+;;
+; CHECK: [[ABORT]]: @ %if.abort
+;
+; ENABLE: push
+;
+; CHECK: bl{{x?}} _abort
+; ENABLE-NOT: pop
+define i32 @noreturn(i8 signext %bad_thing) {
+entry:
+ %tobool = icmp eq i8 %bad_thing, 0
+ br i1 %tobool, label %if.end, label %if.abort
+
+if.abort:
+ %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
+ tail call void @abort() #0
+ unreachable
+
+if.end:
+ ret i32 42
+}
+
+declare void @abort() #0
+
+attributes #0 = { noreturn nounwind }
+
+; Make sure that we handle infinite loops properly When checking that the Save
+; and Restore blocks are control flow equivalent, the loop searches for the
+; immediate (post) dominator for the (restore) save blocks. When either the Save
+; or Restore block is located in an infinite loop the only immediate (post)
+; dominator is itself. In this case, we cannot perform shrink wrapping, but we
+; should return gracefully and continue compilation.
+; The only condition for this test is the compilation finishes correctly.
+; CHECK-LABEL: infiniteloop
+; CHECK: pop
+define void @infiniteloop() {
+entry:
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+ %ptr = alloca i32, i32 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+ %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
+ %add = add nsw i32 %call, %sum.03
+ store i32 %add, i32* %ptr
+ br label %for.body
+
+if.end:
+ ret void
+}
+
+; Another infinite loop test this time with a body bigger than just one block.
+; CHECK-LABEL: infiniteloop2
+; CHECK: pop
+define void @infiniteloop2() {
+entry:
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+ %ptr = alloca i32, i32 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
+ %call = tail call i32 asm "mov $0, #0", "=r,~{r4}"()
+ %add = add nsw i32 %call, %sum.03
+ store i32 %add, i32* %ptr
+ br i1 undef, label %body1, label %body2
+
+body1:
+ tail call void asm sideeffect "nop", "~{r4}"()
+ br label %for.body
+
+body2:
+ tail call void asm sideeffect "nop", "~{r4}"()
+ br label %for.body
+
+if.end:
+ ret void
+}
+
+; Another infinite loop test this time with two nested infinite loop.
+; CHECK-LABEL: infiniteloop3
+; CHECK: bx lr
+define void @infiniteloop3() {
+entry:
+ br i1 undef, label %loop2a, label %body
+
+body: ; preds = %entry
+ br i1 undef, label %loop2a, label %end
+
+loop1: ; preds = %loop2a, %loop2b
+ %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
+ %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
+ %0 = icmp eq i32* %var, null
+ %next.load = load i32*, i32** undef
+ br i1 %0, label %loop2a, label %loop2b
+
+loop2a: ; preds = %loop1, %body, %entry
+ %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
+ %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
+ br label %loop1
+
+loop2b: ; preds = %loop1
+ %gep1 = bitcast i32* %var.phi to i32*
+ %next.ptr = bitcast i32* %gep1 to i32**
+ store i32* %next.phi, i32** %next.ptr
+ br label %loop1
+
+end:
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @llvm.pow.f64(double, double)
+
+; This function needs to spill floating point registers to
+; exerce the path where we were deferencing the end iterator
+; to access debug info location while inserting the spill code
+; during PEI with shrink-wrapping enable.
+; CHECK-LABEL: debug_info:
+;
+; ENABLE: tst{{(\.w)?}} r2, #1
+; ENABLE-NEXT: beq [[BB13:LBB[0-9_]+]]
+;
+; CHECK: push
+;
+; DISABLE: tst{{(\.w)?}} r2, #1
+; DISABLE-NEXT: beq [[BB13:LBB[0-9_]+]]
+;
+; CHECK: bl{{x?}} _pow
+;
+;
+; ENABLE: pop
+;
+; CHECK: [[BB13]]:
+; CHECK: vldr
+;
+; DISABLE: pop
+;
+; CHECK: bl
+define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %tmp) {
+bb:
+ br i1 %or.cond, label %bb3, label %bb13
+
+bb3: ; preds = %bb
+ %tmp4 = fcmp ogt float %gamma, 1.000000e+00
+ %tmp5 = fadd double 1.000000e+00, %tmp
+ %tmp6 = select i1 %tmp4, double %tmp5, double %tmp
+ %tmp10 = tail call double @llvm.pow.f64(double %tmp, double %tmp)
+ %tmp11 = fcmp une double %tmp6, %tmp
+ %tmp12 = fadd double %tmp10, %tmp10
+ %cutoff.0 = select i1 %tmp11, double %tmp12, double %tmp
+ %phitmp = fptrunc double %cutoff.0 to float
+ br label %bb13
+
+bb13: ; preds = %bb3, %bb
+ %cutoff.1 = phi float [ 0.000000e+00, %bb ], [ %phitmp, %bb3 ]
+ ret float %cutoff.1
+}
+
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "LLVM", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "a.cpp", directory: "b")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index 7510d6ccdc33..573cd45c0825 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -208,10 +208,16 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
define i64 @test8(i64* %ptr) {
; CHECK-LABEL: test8:
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK-NOT: strexd
+; CHECK: clrex
+; CHECK-NOT: strexd
; CHECK: dmb {{ish$}}
; CHECK-THUMB-LABEL: test8:
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB-NOT: strexd
+; CHECK-THUMB: clrex
+; CHECK-THUMB-NOT: strexd
; CHECK-THUMB: dmb {{ish$}}
%r = load atomic i64, i64* %ptr seq_cst, align 8
diff --git a/test/CodeGen/ARM/atomic-cmp.ll b/test/CodeGen/ARM/atomic-cmp.ll
index 629b16d86ab5..7f41b7d93d1a 100644
--- a/test/CodeGen/ARM/atomic-cmp.ll
+++ b/test/CodeGen/ARM/atomic-cmp.ll
@@ -6,10 +6,12 @@ define i8 @t(i8* %a, i8 %b, i8 %c) nounwind {
; ARM-LABEL: t:
; ARM: ldrexb
; ARM: strexb
+; ARM: clrex
; T2-LABEL: t:
-; T2: ldrexb
; T2: strexb
+; T2: ldrexb
+; T2: clrex
%tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic monotonic
%tmp1 = extractvalue { i8, i1 } %tmp0, 0
ret i8 %tmp1
diff --git a/test/CodeGen/ARM/atomic-cmpxchg.ll b/test/CodeGen/ARM/atomic-cmpxchg.ll
index 84790be6d605..32cdf4174ddc 100644
--- a/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARM
-; RUN: llc < %s -mtriple=thumb-linux-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMB
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARM
+; RUN: llc < %s -mtriple=thumb-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMB
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARMV7
-; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMBV7
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARMV6
+; RUN: llc < %s -mtriple=thumbv6-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMBV6
+
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARMV7
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMBV7
define zeroext i1 @test_cmpxchg_res_i8(i8* %addr, i8 %desired, i8 zeroext %new) {
entry:
@@ -26,28 +29,71 @@ entry:
; CHECK-THUMB: movs r0, #1
; CHECK-THUMB: movs [[R2:r[0-9]+]], #0
; CHECK-THUMB: cmp [[R1]], {{r[0-9]+}}
-; CHECK-THU<B: beq
+; CHECK-THUMB: beq
; CHECK-THUMB: push {[[R2]]}
; CHECK-THUMB: pop {r0}
-; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8
-; CHECK-ARMV7: ldrexb [[R3:r[0-9]+]], [r0]
-; CHECK-ARMV7: mov [[R1:r[0-9]+]], #0
-; CHECK-ARMV7: cmp [[R3]], {{r[0-9]+}}
-; CHECK-ARMV7: bne
-; CHECK-ARMV7: strexb [[R3]], {{r[0-9]+}}, [{{r[0-9]+}}]
-; CHECK-ARMV7: mov [[R1]], #1
-; CHECK-ARMV7: cmp [[R3]], #0
-; CHECK-ARMV7: bne
-; CHECK-ARMV7: mov r0, [[R1]]
-
-; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8
-; CHECK-THUMBV7: ldrexb [[R3:r[0-9]+]], [r0]
-; CHECK-THUMBV7: cmp [[R3]], {{r[0-9]+}}
-; CHECK-THUMBV7: movne r0, #0
-; CHECK-THUMBV7: bxne lr
-; CHECK-THUMBV7: strexb [[R3]], {{r[0-9]+}}, [{{r[0-9]+}}]
-; CHECK-THUMBV7: cmp [[R3]], #0
-; CHECK-THUMBV7: itt eq
-; CHECK-THUMBV7: moveq r0, #1
-; CHECK-THUMBV7: bxeq lr
+; CHECK-ARMV6-LABEL: test_cmpxchg_res_i8:
+; CHECK-ARMV6-NEXT: .fnstart
+; CHECK-ARMV6-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
+; CHECK-ARMV6-NEXT: [[TRY:.LBB[0-9_]+]]:
+; CHECK-ARMV6-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
+; CHECK-ARMV6-NEXT: mov [[RES:r[0-9]+]], #0
+; CHECK-ARMV6-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-ARMV6-NEXT: bne [[END:.LBB[0-9_]+]]
+; CHECK-ARMV6-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK-ARMV6-NEXT: mov [[RES]], #1
+; CHECK-ARMV6-NEXT: cmp [[SUCCESS]], #0
+; CHECK-ARMV6-NEXT: bne [[TRY]]
+; CHECK-ARMV6-NEXT: [[END]]:
+; CHECK-ARMV6-NEXT: mov r0, [[RES]]
+; CHECK-ARMV6-NEXT: bx lr
+
+; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8:
+; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1
+; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1
+; CHECK-THUMBV6-NEXT: mov [[RES:r[0-9]+]], r0
+; CHECK-THUMBV6-NEXT: movs r0, #1
+; CHECK-THUMBV6-NEXT: movs [[ZERO:r[0-9]+]], #0
+; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED]]
+; CHECK-THUMBV6-NEXT: beq [[END:.LBB[0-9_]+]]
+; CHECK-THUMBV6-NEXT: mov r0, [[ZERO]]
+; CHECK-THUMBV6-NEXT: [[END]]:
+; CHECK-THUMBV6-NEXT: pop {{.*}}pc}
+
+; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8:
+; CHECK-ARMV7-NEXT: .fnstart
+; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
+; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]:
+; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
+; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-ARMV7-NEXT: bne [[FAIL:.LBB[0-9_]+]]
+; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK-ARMV7-NEXT: mov [[RES:r[0-9]+]], #1
+; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0
+; CHECK-ARMV7-NEXT: bne [[TRY]]
+; CHECK-ARMV7-NEXT: b [[END:.LBB[0-9_]+]]
+; CHECK-ARMV7-NEXT: [[FAIL]]:
+; CHECK-ARMV7-NEXT: clrex
+; CHECK-ARMV7-NEXT: mov [[RES]], #0
+; CHECK-ARMV7-NEXT: [[END]]:
+; CHECK-ARMV7-NEXT: mov r0, [[RES]]
+; CHECK-ARMV7-NEXT: bx lr
+
+; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
+; CHECK-THUMBV7-NEXT: .fnstart
+; CHECK-THUMBV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
+; CHECK-THUMBV7-NEXT: b [[TRYLD:.LBB[0-9_]+]]
+; CHECK-THUMBV7-NEXT: [[TRYST:.LBB[0-9_]+]]:
+; CHECK-THUMBV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK-THUMBV7-NEXT: cmp [[SUCCESS]], #0
+; CHECK-THUMBV7-NEXT: itt eq
+; CHECK-THUMBV7-NEXT: moveq r0, #1
+; CHECK-THUMBV7-NEXT: bxeq lr
+; CHECK-THUMBV7-NEXT: [[TRYLD]]:
+; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
+; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-THUMBV7-NEXT: beq [[TRYST:.LBB[0-9_]+]]
+; CHECK-THUMBV7-NEXT: clrex
+; CHECK-THUMBV7-NEXT: movs r0, #0
+; CHECK-THUMBV7-NEXT: bx lr
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index db32bffdd5d1..791389456619 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix CHECK-ARMV7
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2
; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-T1
; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-M0
; RUN: llc < %s -mtriple=thumbv7--none-eabi -thread-model single -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-BAREMETAL
@@ -272,16 +272,31 @@ define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) {
%pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
%oldval = extractvalue { i32, i1 } %pair, 0
-; CHECK: dmb ish
-; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
-; CHECK: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
-; CHECK: cmp [[OLDVAL]], r1
-; CHECK: bxne lr
-; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
-; CHECK: cmp [[SUCCESS]], #0
-; CHECK: bne [[LOOP_BB]]
-; CHECK: dmb ish
-; CHECK: bx lr
+; CHECK-ARMV7: dmb ish
+; CHECK-ARMV7: [[LOOP_BB:\.?LBB[0-9]+_1]]:
+; CHECK-ARMV7: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
+; CHECK-ARMV7: cmp [[OLDVAL]], r1
+; CHECK-ARMV7: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK-ARMV7: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
+; CHECK-ARMV7: cmp [[SUCCESS]], #0
+; CHECK-ARMV7: bne [[LOOP_BB]]
+; CHECK-ARMV7: dmb ish
+; CHECK-ARMV7: bx lr
+; CHECK-ARMV7: [[FAIL_BB]]:
+; CHECK-ARMV7: clrex
+; CHECK-ARMV7: bx lr
+
+; CHECK-T2: dmb ish
+; CHECK-T2: [[LOOP_BB:\.?LBB[0-9]+_1]]:
+; CHECK-T2: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
+; CHECK-T2: cmp [[OLDVAL]], r1
+; CHECK-T2: clrexne
+; CHECK-T2: bxne lr
+; CHECK-T2: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
+; CHECK-T2: cmp [[SUCCESS]], #0
+; CHECK-T2: dmbeq ish
+; CHECK-T2: bxeq lr
+; CHECK-T2: b [[LOOP_BB]]
ret i32 %oldval
}
@@ -295,11 +310,14 @@ define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
; CHECK: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
; CHECK: cmp [[OLDVAL]], r1
-; CHECK: bne [[END_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
; CHECK: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
; CHECK: cmp [[SUCCESS]], #0
; CHECK: bne [[LOOP_BB]]
-; CHECK: [[END_BB]]:
+; CHECK: b [[END_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK: [[FAIL_BB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[END_BB]]:
; CHECK: dmb ish
; CHECK: bx lr
diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll
index 86287c1178db..efdb75b63222 100644
--- a/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -1055,24 +1055,30 @@ define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind
%old = extractvalue { i8, i1 } %pair, 0
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
-; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-DAG: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-DAG: movt r[[ADDR]], :upper16:var8
+; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0
; CHECK: .LBB{{[0-9]+}}_1:
; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-ARM-NEXT: cmp r[[OLD]], r0
+; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
; CHECK-NEXT: BB#2:
; As above, r1 is a reasonable guess.
-; CHECK: strexb [[STATUS:r[0-9]+]], r1, {{.*}}[[ADDR]]
+; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NEXT: b .LBB{{[0-9]+}}_4
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD]]
+; CHECK-ARM: mov r0, r[[OLD]]
ret i8 %old
}
@@ -1082,24 +1088,30 @@ define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounw
%old = extractvalue { i16, i1 } %pair, 0
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
-; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK-DAG: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK-DAG: movt r[[ADDR]], :upper16:var16
+; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0
; CHECK: .LBB{{[0-9]+}}_1:
; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
; r0 below is a reasonable guess but could change: it certainly comes into the
; function there.
-; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-ARM-NEXT: cmp r[[OLD]], r0
+; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
; CHECK-NEXT: BB#2:
; As above, r1 is a reasonable guess.
; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NEXT: b .LBB{{[0-9]+}}_4
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
; CHECK-NOT: dmb
; CHECK-NOT: mcr
-; CHECK: mov r0, r[[OLD]]
+; CHECK-ARM: mov r0, r[[OLD]]
ret i16 %old
}
@@ -1124,6 +1136,10 @@ define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NEXT: b .LBB{{[0-9]+}}_4
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
; CHECK-NOT: dmb
; CHECK-NOT: mcr
@@ -1158,6 +1174,10 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
; CHECK-NEXT: cmp [[STATUS]], #0
; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NEXT: b .LBB{{[0-9]+}}_4
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
; CHECK-NOT: dmb
; CHECK-NOT: mcr
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index c3de07e03b6b..79e8e68e2f57 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CORTEX
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SWIFT
; Avoid some 's' 16-bit instruction which partially update CPSR (and add false
; dependency) when it isn't dependent on last CPSR defining instruction.
; rdar://8928208
@@ -7,8 +7,10 @@
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
entry:
; CHECK-LABEL: t1:
-; CHECK: muls [[REG:(r[0-9]+)]], r3, r2
-; CHECK-NEXT: mul [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-CORTEX: muls [[REG:(r[0-9]+)]], r3, r2
+; CHECK-CORTEX-NEXT: mul [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-SWIFT: muls [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-SWIFT-NEXT: mul [[REG:(r[0-9]+)]], r2, r3
; CHECK-NEXT: muls r0, [[REG]], [[REG2]]
%0 = mul nsw i32 %a, %b
%1 = mul nsw i32 %c, %d
@@ -21,8 +23,7 @@ define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
define void @t2(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind {
entry:
; CHECK-LABEL: t2:
- %tobool7 = icmp eq i32* %ptr2, null
- br i1 %tobool7, label %while.end, label %while.body
+ br label %while.body
while.body:
; CHECK: while.body
@@ -55,8 +56,7 @@ while.end:
define void @t3(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind minsize {
entry:
; CHECK-LABEL: t3:
- %tobool7 = icmp eq i32* %ptr2, null
- br i1 %tobool7, label %while.end, label %while.body
+ br label %while.body
while.body:
; CHECK: while.body
diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll
index 0661960d1ae0..893fef3add7e 100644
--- a/test/CodeGen/ARM/bfi.ll
+++ b/test/CodeGen/ARM/bfi.ll
@@ -74,3 +74,98 @@ entry:
%or = or i32 %shl, %and
ret i32 %or
}
+
+define i32 @f7(i32 %x, i32 %y) {
+; CHECK-LABEL: f7:
+; CHECK: bfi r1, r0, #4, #1
+ %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+ %and = and i32 %x, 4
+ %or = or i32 %y2, 16
+ %cmp = icmp ne i32 %and, 0
+ %sel = select i1 %cmp, i32 %or, i32 %y2
+ ret i32 %sel
+}
+
+define i32 @f8(i32 %x, i32 %y) {
+; CHECK-LABEL: f8:
+; CHECK: bfi r1, r0, #4, #1
+; CHECK: bfi r1, r0, #5, #1
+ %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+ %and = and i32 %x, 4
+ %or = or i32 %y2, 48
+ %cmp = icmp ne i32 %and, 0
+ %sel = select i1 %cmp, i32 %or, i32 %y2
+ ret i32 %sel
+}
+
+define i32 @f9(i32 %x, i32 %y) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: bfi
+ %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+ %and = and i32 %x, 4
+ %or = or i32 %y2, 48
+ %cmp = icmp ne i32 %and, 0
+ %sel = select i1 %cmp, i32 %y2, i32 %or
+ ret i32 %sel
+}
+
+define i32 @f10(i32 %x, i32 %y) {
+; CHECK-LABEL: f10:
+; CHECK: bfi r1, r0, #4, #2
+ %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+ %and = and i32 %x, 4
+ %or = or i32 %y2, 32
+ %cmp = icmp ne i32 %and, 0
+ %sel = select i1 %cmp, i32 %or, i32 %y2
+
+ %aand = and i32 %x, 2
+ %aor = or i32 %sel, 16
+ %acmp = icmp ne i32 %aand, 0
+ %asel = select i1 %acmp, i32 %aor, i32 %sel
+
+ ret i32 %asel
+}
+
+define i32 @f11(i32 %x, i32 %y) {
+; CHECK-LABEL: f11:
+; CHECK: bfi r1, r0, #4, #3
+ %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+ %and = and i32 %x, 4
+ %or = or i32 %y2, 32
+ %cmp = icmp ne i32 %and, 0
+ %sel = select i1 %cmp, i32 %or, i32 %y2
+
+ %aand = and i32 %x, 2
+ %aor = or i32 %sel, 16
+ %acmp = icmp ne i32 %aand, 0
+ %asel = select i1 %acmp, i32 %aor, i32 %sel
+
+ %band = and i32 %x, 8
+ %bor = or i32 %asel, 64
+ %bcmp = icmp ne i32 %band, 0
+ %bsel = select i1 %bcmp, i32 %bor, i32 %asel
+
+ ret i32 %bsel
+}
+
+define i32 @f12(i32 %x, i32 %y) {
+; CHECK-LABEL: f12:
+; CHECK: bfi r1, r0, #4, #1
+ %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+ %and = and i32 %x, 4
+ %or = or i32 %y2, 16
+ %cmp = icmp eq i32 %and, 0
+ %sel = select i1 %cmp, i32 %y2, i32 %or
+ ret i32 %sel
+}
+
+define i32 @f13(i32 %x, i32 %y) {
+; CHECK-LABEL: f13:
+; CHECK-NOT: bfi
+ %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+ %and = and i32 %x, 4
+ %or = or i32 %y2, 16
+ %cmp = icmp eq i32 %and, 42 ; Not comparing against zero!
+ %sel = select i1 %cmp, i32 %y2, i32 %or
+ ret i32 %sel
+}
diff --git a/test/CodeGen/ARM/build-attributes-optimization-minsize.ll b/test/CodeGen/ARM/build-attributes-optimization-minsize.ll
new file mode 100644
index 000000000000..4cfb6012f439
--- /dev/null
+++ b/test/CodeGen/ARM/build-attributes-optimization-minsize.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s
+
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+
+; CHECK: .eabi_attribute 30, 4 @ Tag_ABI_optimization_goals
+; CHECK-OBJ: TagName: ABI_optimization_goals
+; CHECK-OBJ-NEXT: Description: Aggressive Size
+
+define i32 @f(i64 %z) #0 {
+ ret i32 0
+}
+
+attributes #0 = { minsize optsize }
+
diff --git a/test/CodeGen/ARM/build-attributes-optimization-mixed.ll b/test/CodeGen/ARM/build-attributes-optimization-mixed.ll
new file mode 100644
index 000000000000..8009fc6e28f8
--- /dev/null
+++ b/test/CodeGen/ARM/build-attributes-optimization-mixed.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s
+
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s
+
+; CHECK-NOT: .eabi_attribute 30
+; CHECK-NOT: Tag_ABI_optimization_goals
+
+define i32 @f(i64 %z) #0 {
+ ret i32 0
+}
+
+define i32 @g(i64 %z) #1 {
+ ret i32 1
+}
+
+attributes #0 = { noinline optnone }
+
+attributes #1 = { minsize optsize }
+
diff --git a/test/CodeGen/ARM/build-attributes-optimization-optnone.ll b/test/CodeGen/ARM/build-attributes-optimization-optnone.ll
new file mode 100644
index 000000000000..cbdb915045c6
--- /dev/null
+++ b/test/CodeGen/ARM/build-attributes-optimization-optnone.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s
+
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+
+; CHECK: .eabi_attribute 30, 6 @ Tag_ABI_optimization_goals
+; CHECK-OBJ: TagName: ABI_optimization_goals
+; CHECK-OBJ-NEXT: Description: Best Debugging
+
+define i32 @f(i64 %z) #0 {
+ ret i32 0
+}
+
+attributes #0 = { noinline optnone }
+
diff --git a/test/CodeGen/ARM/build-attributes-optimization-optsize.ll b/test/CodeGen/ARM/build-attributes-optimization-optsize.ll
new file mode 100644
index 000000000000..bab210aa8d01
--- /dev/null
+++ b/test/CodeGen/ARM/build-attributes-optimization-optsize.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s
+
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+
+; CHECK: .eabi_attribute 30, 3 @ Tag_ABI_optimization_goals
+; CHECK-OBJ: TagName: ABI_optimization_goals
+; CHECK-OBJ-NEXT: Description: Size
+
+define i32 @f(i64 %z) #0 {
+ ret i32 0
+}
+
+attributes #0 = { optsize }
+
diff --git a/test/CodeGen/ARM/build-attributes-optimization.ll b/test/CodeGen/ARM/build-attributes-optimization.ll
new file mode 100644
index 000000000000..21b7b3c3ab0c
--- /dev/null
+++ b/test/CodeGen/ARM/build-attributes-optimization.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s --check-prefix=NONE
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s --check-prefix=SPEED
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s --check-prefix=MAXSPEED
+
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=NONE-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=SPEED-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=MAXSPEED-OBJ
+
+; NONE: .eabi_attribute 30, 5 @ Tag_ABI_optimization_goals
+; SPEED: .eabi_attribute 30, 1 @ Tag_ABI_optimization_goals
+; MAXSPEED: .eabi_attribute 30, 2 @ Tag_ABI_optimization_goals
+
+; NONE-OBJ: TagName: ABI_optimization_goals
+; NONE-OBJ-NEXT: Description: Debugging
+; SPEED-OBJ: TagName: ABI_optimization_goals
+; SPEED-OBJ-NEXT: Description: Speed
+; MAXSPEED-OBJ: TagName: ABI_optimization_goals
+; MAXSPEED-OBJ-NEXT: Description: Aggressive Speed
+
+define i32 @f(i64 %z) {
+ ret i32 0
+}
+
diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll
index 29c702304a3f..bf502b3ae077 100644
--- a/test/CodeGen/ARM/build-attributes.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -1,17 +1,17 @@
; This tests that MC/asm header conversion is smooth and that the
; build attributes are correct
-; RUN: llc < %s -mtriple=thumbv5-linux-gnueabi -mcpu=xscale | FileCheck %s --check-prefix=XSCALE
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi | FileCheck %s --check-prefix=V6
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6-FAST
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi | FileCheck %s --check-prefix=V6M
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
-; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi | FileCheck %s --check-prefix=V6M
-; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s | FileCheck %s --check-prefix=ARM1156T2F-S
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=ARM1156T2F-S-FAST
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv5-linux-gnueabi -mcpu=xscale -mattr=+strict-align | FileCheck %s --check-prefix=XSCALE
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mattr=+strict-align -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6-FAST
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6M
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mattr=+strict-align -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
+; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6M
+; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -mattr=+strict-align -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align | FileCheck %s --check-prefix=ARM1156T2F-S
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=ARM1156T2F-S-FAST
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi | FileCheck %s --check-prefix=V7M
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V7M-FAST
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
@@ -59,18 +59,18 @@
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=+neon,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-NEON-FP16
; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0-FAST
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus | FileCheck %s --check-prefix=CORTEX-M0PLUS
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0PLUS-FAST
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 | FileCheck %s --check-prefix=CORTEX-M1
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M1-FAST
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 | FileCheck %s --check-prefix=SC000
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=SC000-FAST
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -mattr=+strict-align | FileCheck %s --check-prefix=CORTEX-M0
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -mattr=+strict-align -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0-FAST
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -mattr=+strict-align | FileCheck %s --check-prefix=CORTEX-M0PLUS
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -mattr=+strict-align -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0PLUS-FAST
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -mattr=+strict-align | FileCheck %s --check-prefix=CORTEX-M1
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -mattr=+strict-align -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M1-FAST
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -mattr=+strict-align | FileCheck %s --check-prefix=SC000
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -mattr=+strict-align -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=SC000-FAST
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 | FileCheck %s --check-prefix=CORTEX-M3
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M3-FAST
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
@@ -96,6 +96,9 @@
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 | FileCheck %s --check-prefix=CORTEX-R7
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-R7-FAST
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a35 | FileCheck %s --check-prefix=CORTEX-A35
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a35 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A35-FAST
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a35 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 | FileCheck %s --check-prefix=CORTEX-A53
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A53-FAST
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
@@ -116,58 +119,50 @@
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-FPUV4-FAST
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,,+d16,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=pic | FileCheck %s --check-prefix=RELOC-PIC
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=static | FileCheck %s --check-prefix=RELOC-OTHER
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=default | FileCheck %s --check-prefix=RELOC-OTHER
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=dynamic-no-pic | FileCheck %s --check-prefix=RELOC-OTHER
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi | FileCheck %s --check-prefix=RELOC-OTHER
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi | FileCheck %s --check-prefix=PCS-R9-USE
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -arm-reserve-r9 | FileCheck %s --check-prefix=PCS-R9-RESERVE
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=pic | FileCheck %s --check-prefix=RELOC-PIC
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=static | FileCheck %s --check-prefix=RELOC-OTHER
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=default | FileCheck %s --check-prefix=RELOC-OTHER
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=dynamic-no-pic | FileCheck %s --check-prefix=RELOC-OTHER
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=RELOC-OTHER
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=PCS-R9-USE
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+reserve-r9,+strict-align | FileCheck %s --check-prefix=PCS-R9-RESERVE
; ARMv8.1a (AArch32)
-; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi | FileCheck %s --check-prefix=NO-STRICT-ALIGN
; ARMv8a (AArch32)
-; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a35 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a35 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a72 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a72 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a72 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a72 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; ARMv7a
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; ARMv7r
-; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; ARMv7m
-; RUN: llc < %s -mtriple=thumbv7m-none-linux-gnueabi -mcpu=cortex-m3 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=thumbv7m-none-linux-gnueabi -mcpu=cortex-m3 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; RUN: llc < %s -mtriple=thumbv7m-none-linux-gnueabi -mcpu=cortex-m3 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumbv7m-none-linux-gnueabi -mcpu=cortex-m3 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; ARMv6
; RUN: llc < %s -mtriple=armv6-none-netbsd-gnueabi -mcpu=arm1136j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN
; ARMv6k
; RUN: llc < %s -mtriple=armv6k-none-netbsd-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN
; ARMv6m
-; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -arm-no-strict-align -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -arm-strict-align -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=thumbv6m-none-linux-gnueabi -arm-no-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=thumbv6m-none-linux-gnueabi -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -mcpu=cortex-m0 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -mattr=+strict-align -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumbv6m-none-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -mcpu=cortex-m0 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; ARMv5
-; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; XSCALE: .eabi_attribute 6, 5
; XSCALE: .eabi_attribute 8, 1
@@ -748,6 +743,7 @@
; CORTEX-M0: .eabi_attribute 21, 1
; CORTEX-M0-NOT: .eabi_attribute 22
; CORTEX-M0: .eabi_attribute 23, 3
+; CORTEX-M0: .eabi_attribute 34, 0
; CORTEX-M0: .eabi_attribute 24, 1
; CORTEX-M0: .eabi_attribute 25, 1
; CORTEX-M0-NOT: .eabi_attribute 27
@@ -1109,7 +1105,7 @@
; CORTEX-R7: .eabi_attribute 25, 1
; CORTEX-R7: .eabi_attribute 27, 1
; CORTEX-R7-NOT: .eabi_attribute 28
-; CORTEX-R7-NOT: .eabi_attribute 36
+; CORTEX-R7: .eabi_attribute 36, 1
; CORTEX-R7: .eabi_attribute 38, 1
; CORTEX-R7: .eabi_attribute 42, 1
; CORTEX-R7: .eabi_attribute 44, 2
@@ -1122,6 +1118,36 @@
; CORTEX-R7-FAST-NOT: .eabi_attribute 22
; CORTEX-R7-FAST: .eabi_attribute 23, 1
+; CORTEX-A35: .cpu cortex-a35
+; CORTEX-A35: .eabi_attribute 6, 14
+; CORTEX-A35: .eabi_attribute 7, 65
+; CORTEX-A35: .eabi_attribute 8, 1
+; CORTEX-A35: .eabi_attribute 9, 2
+; CORTEX-A35: .fpu crypto-neon-fp-armv8
+; CORTEX-A35: .eabi_attribute 12, 3
+; CORTEX-A35-NOT: .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-A35: .eabi_attribute 20, 1
+; CORTEX-A35: .eabi_attribute 21, 1
+; CORTEX-A35-NOT: .eabi_attribute 22
+; CORTEX-A35: .eabi_attribute 23, 3
+; CORTEX-A35: .eabi_attribute 24, 1
+; CORTEX-A35: .eabi_attribute 25, 1
+; CORTEX-A35-NOT: .eabi_attribute 27
+; CORTEX-A35-NOT: .eabi_attribute 28
+; CORTEX-A35: .eabi_attribute 36, 1
+; CORTEX-A35: .eabi_attribute 38, 1
+; CORTEX-A35: .eabi_attribute 42, 1
+; CORTEX-A35-NOT: .eabi_attribute 44
+; CORTEX-A35: .eabi_attribute 68, 3
+
+; CORTEX-A35-FAST-NOT: .eabi_attribute 19
+;; The A35 has the ARMv8 FP unit, which always flushes preserving sign.
+; CORTEX-A35-FAST: .eabi_attribute 20, 2
+; CORTEX-A35-FAST-NOT: .eabi_attribute 21
+; CORTEX-A35-FAST-NOT: .eabi_attribute 22
+; CORTEX-A35-FAST: .eabi_attribute 23, 1
+
; CORTEX-A53: .cpu cortex-a53
; CORTEX-A53: .eabi_attribute 6, 14
; CORTEX-A53: .eabi_attribute 7, 65
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index b2b6aaec8131..8821029520fe 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -83,9 +83,11 @@ declare void @foo() nounwind
define void @t7() nounwind {
entry:
; CHECKT2D-LABEL: t7:
-; CHECKT2D: blxeq _foo
-; CHECKT2D-NEXT: pop.w
-; CHECKT2D-NEXT: b.w _foo
+; CHECKT2D: it ne
+; CHECKT2D-NEXT: bne.w _foo
+; CHECKT2D-NEXT: push
+; CHECKT2D-NEXT: mov r7, sp
+; CHECKT2D-NEXT: blx _foo
br i1 undef, label %bb, label %bb1.lr.ph
bb1.lr.ph:
diff --git a/test/CodeGen/ARM/cfi-alignment.ll b/test/CodeGen/ARM/cfi-alignment.ll
new file mode 100644
index 000000000000..11add2242656
--- /dev/null
+++ b/test/CodeGen/ARM/cfi-alignment.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=thumbv7k-apple-watchos7.0 -o - %s | FileCheck %s
+
+; Since d11 doesn't get pushed with the aligned registers, its frameindex
+; shouldn't be modified to say it has been.
+
+define void @foo() {
+; CHECK-LABEL: foo:
+; CHECK: push {r7, lr}
+; CHECK: .cfi_offset r7, -8
+; CHECK: vpush {d11}
+; CHECK: vpush {d8, d9}
+; CHECK: .cfi_offset d11, -16
+; CHECK: .cfi_offset d9, -24
+; CHECK: .cfi_offset d8, -32
+ call void asm sideeffect "", "~{d8},~{d9},~{d11}"()
+ call void @bar()
+ ret void
+}
+
+define void @variadic_foo(i8, ...) {
+; CHECK-LABEL: variadic_foo:
+; CHECK: sub sp, #12
+; CHECK: push {r7, lr}
+; CHECK: .cfi_offset r7, -20
+; CHECK: sub sp, #4
+; CHECK: vpush {d11}
+; CHECK: vpush {d8, d9}
+; CHECK: .cfi_offset d11, -32
+; CHECK: .cfi_offset d9, -40
+; CHECK: .cfi_offset d8, -48
+ call void asm sideeffect "", "~{d8},~{d9},~{d11}"()
+ call void @llvm.va_start(i8* null)
+ call void @bar()
+ ret void
+}
+
+define void @test_maintain_stack_align() {
+; CHECK-LABEL: test_maintain_stack_align:
+; CHECK: push {r7, lr}
+; CHECK: vpush {d8, d9}
+; CHECK: sub sp, #8
+ call void asm sideeffect "", "~{d8},~{d9}"()
+ call void @bar()
+ ret void
+}
+
+declare void @bar()
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/ARM/cmpxchg-idioms.ll b/test/CodeGen/ARM/cmpxchg-idioms.ll
index fb88575cab3b..81e05acfef79 100644
--- a/test/CodeGen/ARM/cmpxchg-idioms.ll
+++ b/test/CodeGen/ARM/cmpxchg-idioms.ll
@@ -15,14 +15,14 @@ define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
; CHECK: bne [[LOOP]]
; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: movs r0, #1
; CHECK: dmb ish
+; CHECK: movs r0, #1
; CHECK: bx lr
; CHECK: [[FAILED]]:
; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: movs r0, #0
; CHECK: dmb ish
+; CHECK: movs r0, #0
; CHECK: bx lr
%pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
@@ -34,8 +34,8 @@ define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
; CHECK-LABEL: test_return_bool:
-; CHECK: uxtb [[OLDBYTE:r[0-9]+]], r1
; CHECK: dmb ishst
+; CHECK: uxtb [[OLDBYTE:r[0-9]+]], r1
; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: ldrexb [[LOADED:r[0-9]+]], [r0]
diff --git a/test/CodeGen/ARM/cmpxchg-weak.ll b/test/CodeGen/ARM/cmpxchg-weak.ll
index 126e33062623..1eac9c41cf92 100644
--- a/test/CodeGen/ARM/cmpxchg-weak.ll
+++ b/test/CodeGen/ARM/cmpxchg-weak.ll
@@ -5,16 +5,24 @@ define void @test_cmpxchg_weak(i32 *%addr, i32 %desired, i32 %new) {
%pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
%oldval = extractvalue { i32, i1 } %pair, 0
-; CHECK: dmb ish
-; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
-; CHECK: cmp [[LOADED]], r1
-; CHECK: strexeq [[SUCCESS:r[0-9]+]], r2, [r0]
-; CHECK: cmpeq [[SUCCESS]], #0
-; CHECK: bne [[DONE:LBB[0-9]+_[0-9]+]]
-; CHECK: dmb ish
-; CHECK: [[DONE]]:
-; CHECK: str r3, [r0]
-; CHECK: bx lr
+; CHECK-NEXT: BB#0:
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r0]
+; CHECK-NEXT: cmp [[LOADED]], r1
+; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#1:
+; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK-NEXT: cmp [[SUCCESS]], #0
+; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: str r3, [r0]
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: [[LDFAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: str r3, [r0]
+; CHECK-NEXT: bx lr
store i32 %oldval, i32* %addr
ret void
@@ -27,17 +35,23 @@ define i1 @test_cmpxchg_weak_to_bool(i32, i32 *%addr, i32 %desired, i32 %new) {
%pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
%success = extractvalue { i32, i1 } %pair, 1
-; CHECK: dmb ish
-; CHECK: mov r0, #0
-; CHECK: ldrex [[LOADED:r[0-9]+]], [r1]
-; CHECK: cmp [[LOADED]], r2
-; CHECK: strexeq [[STATUS:r[0-9]+]], r3, [r1]
-; CHECK: cmpeq [[STATUS]], #0
-; CHECK: bne [[DONE:LBB[0-9]+_[0-9]+]]
-; CHECK: dmb ish
-; CHECK: mov r0, #1
-; CHECK: [[DONE]]:
-; CHECK: bx lr
+; CHECK-NEXT: BB#0:
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r1]
+; CHECK-NEXT: cmp [[LOADED]], r2
+; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#1:
+; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1]
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: cmp [[SUCCESS]], #0
+; CHECK-NEXT: bxne lr
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: mov r0, #1
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: [[LDFAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: bx lr
ret i1 %success
}
diff --git a/test/CodeGen/ARM/coalesce-dbgvalue.ll b/test/CodeGen/ARM/coalesce-dbgvalue.ll
index cd2ab257207a..4468f1ec9c42 100644
--- a/test/CodeGen/ARM/coalesce-dbgvalue.ll
+++ b/test/CodeGen/ARM/coalesce-dbgvalue.ll
@@ -15,7 +15,7 @@ target triple = "thumbv7-apple-ios3.0.0"
@d = common global i32 0, align 4
; Function Attrs: nounwind ssp
-define i32 @pr16110() #0 {
+define i32 @pr16110() #0 !dbg !4 {
for.cond1.preheader:
store i32 0, i32* @c, align 4, !dbg !21
br label %for.cond1.outer, !dbg !26
@@ -79,18 +79,18 @@ attributes #3 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!33}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 182024) (llvm/trunk 182023)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !15, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 182024) (llvm/trunk 182023)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !15, imports: !2)
!1 = !DIFile(filename: "pr16110.c", directory: "/d/b")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "pr16110", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 7, file: !1, scope: !5, type: !6, function: i32 ()* @pr16110, variables: !9)
+!4 = distinct !DISubprogram(name: "pr16110", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !9)
!5 = !DIFile(filename: "pr16110.c", directory: "/d/b")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{!10, !11}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 8, scope: !4, file: !5, type: !8)
-!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f", line: 13, scope: !12, file: !5, type: !14)
+!10 = !DILocalVariable(name: "e", line: 8, scope: !4, file: !5, type: !8)
+!11 = !DILocalVariable(name: "f", line: 13, scope: !12, file: !5, type: !14)
!12 = distinct !DILexicalBlock(line: 12, column: 0, file: !1, scope: !13)
!13 = distinct !DILexicalBlock(line: 12, column: 0, file: !1, scope: !4)
!14 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !8)
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
index 72fefeacfc5b..a11976e27448 100644
--- a/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -14,11 +14,11 @@ target triple = "thumbv7-apple-ios0.0.0"
define void @f(float* %p, i32 %c) nounwind ssp {
entry:
%0 = bitcast float* %p to i8*
- %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+ %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %0, i32 4)
%vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
%add.ptr = getelementptr inbounds float, float* %p, i32 8
%1 = bitcast float* %add.ptr to i8*
- tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %vld221, <4 x float> undef, i32 4)
+ tail call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %1, <4 x float> %vld221, <4 x float> undef, i32 4)
ret void
}
@@ -27,13 +27,13 @@ entry:
define void @f1(float* %p, i32 %c) nounwind ssp {
entry:
%0 = bitcast float* %p to i8*
- %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+ %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %0, i32 4)
%vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
%add.ptr = getelementptr inbounds float, float* %p, i32 8
%1 = bitcast float* %add.ptr to i8*
- %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
+ %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %1, i32 4)
%vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0
- tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %vld221, <4 x float> %vld2215, i32 4)
+ tail call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %1, <4 x float> %vld221, <4 x float> %vld2215, i32 4)
ret void
}
@@ -42,7 +42,7 @@ entry:
define void @f2(float* %p, i32 %c) nounwind ssp {
entry:
%0 = bitcast float* %p to i8*
- %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+ %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %0, i32 4)
%vld224 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
br label %do.body
@@ -52,10 +52,10 @@ do.body: ; preds = %do.body, %entry
%p.addr.0 = phi float* [ %p, %entry ], [ %add.ptr, %do.body ]
%add.ptr = getelementptr inbounds float, float* %p.addr.0, i32 8
%1 = bitcast float* %add.ptr to i8*
- %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
+ %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %1, i32 4)
%vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0
%vld2216 = extractvalue { <4 x float>, <4 x float> } %vld22, 1
- tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %qq0.0.1.0, <4 x float> %vld2215, i32 4)
+ tail call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %1, <4 x float> %qq0.0.1.0, <4 x float> %vld2215, i32 4)
%dec = add nsw i32 %c.addr.0, -1
%tobool = icmp eq i32 %dec, 0
br i1 %tobool, label %do.end, label %do.body
@@ -64,8 +64,8 @@ do.end: ; preds = %do.body
ret void
}
-declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst2.p0i8.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
; CHECK: f3
; This function has lane insertions that span basic blocks.
@@ -109,12 +109,12 @@ if.end: ; preds = %if.else, %if.then
%x.0 = phi <2 x float> [ %vecins3, %if.then ], [ %vecins5, %if.else ]
%add.ptr = getelementptr inbounds float, float* %p, i32 4
%4 = bitcast float* %add.ptr to i8*
- tail call void @llvm.arm.neon.vst1.v2f32(i8* %4, <2 x float> %x.0, i32 4)
+ tail call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %4, <2 x float> %x.0, i32 4)
ret void
}
-declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind
-declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst1.p0i8.v2f32(i8*, <2 x float>, i32) nounwind
+declare <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8*, i32) nounwind readonly
; CHECK: f4
; This function inserts a lane into a fully defined vector.
@@ -124,7 +124,7 @@ declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly
define void @f4(float* %p, float* %q) nounwind ssp {
entry:
%0 = bitcast float* %p to i8*
- %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %0, i32 4)
+ %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8* %0, i32 4)
%tobool = icmp eq float* %q, null
br i1 %tobool, label %if.end, label %if.then
@@ -138,7 +138,7 @@ if.then: ; preds = %entry
if.end: ; preds = %entry, %if.then
%x.0 = phi <2 x float> [ %vecins, %if.then ], [ %vld1, %entry ]
- tail call void @llvm.arm.neon.vst1.v2f32(i8* %0, <2 x float> %x.0, i32 4)
+ tail call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %0, <2 x float> %x.0, i32 4)
ret void
}
@@ -154,7 +154,7 @@ if.end: ; preds = %entry, %if.then
define void @f5(float* %p, float* %q) nounwind ssp {
entry:
%0 = bitcast float* %p to i8*
- %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %0, i32 4)
+ %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %0, i32 4)
%vecext = extractelement <4 x float> %vld1, i32 0
%vecext1 = extractelement <4 x float> %vld1, i32 1
%vecext2 = extractelement <4 x float> %vld1, i32 2
@@ -182,13 +182,13 @@ if.end: ; preds = %entry, %if.then
%vecinit9 = insertelement <4 x float> %vecinit, float %b.0, i32 1
%vecinit10 = insertelement <4 x float> %vecinit9, float %c.0, i32 2
%vecinit11 = insertelement <4 x float> %vecinit10, float %add, i32 3
- tail call void @llvm.arm.neon.vst1.v4f32(i8* %0, <4 x float> %vecinit11, i32 4)
+ tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %0, <4 x float> %vecinit11, i32 4)
ret void
}
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind
; CHECK: pr13999
define void @pr13999() nounwind readonly {
diff --git a/test/CodeGen/ARM/combine-vmovdrr.ll b/test/CodeGen/ARM/combine-vmovdrr.ll
new file mode 100644
index 000000000000..358f7e3a983e
--- /dev/null
+++ b/test/CodeGen/ARM/combine-vmovdrr.ll
@@ -0,0 +1,72 @@
+; RUN: llc %s -o - | FileCheck %s
+
+target triple = "thumbv7s-apple-ios"
+
+declare <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %shuffle.i.i307, <8 x i8> %shuffle.i27.i308, <8 x i8> %vtbl2.i25.i)
+
+; Check that we get the motivating example:
+; The bitcasts force the values to go through the GPRs, whereas
+; they are defined on VPRs and used on VPRs.
+;
+; CHECK-LABEL: motivatingExample:
+; CHECK: vldr [[ARG2_VAL:d[0-9]+]], [r1]
+; CHECK-NEXT: vld1.32 {[[ARG1_VALlo:d[0-9]+]], [[ARG1_VALhi:d[0-9]+]]}, [r0]
+; CHECK-NEXT: vtbl.8 [[RES:d[0-9]+]], {[[ARG1_VALlo]], [[ARG1_VALhi]]}, [[ARG2_VAL]]
+; CHECK-NEXT: vstr [[RES]], [r1]
+; CHECK-NEXT: bx lr
+define void @motivatingExample(<2 x i64>* %addr, <8 x i8>* %addr2) {
+ %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr
+ %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2
+ %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0
+ %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
+ %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
+ %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
+ %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
+ store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2
+ ret void
+}
+
+; Check that we do not perform the transformation for dynamic index.
+; CHECK-LABEL: dynamicIndex:
+; CHECK-NOT: mul
+; CHECK: pop
+define void @dynamicIndex(<2 x i64>* %addr, <8 x i8>* %addr2, i32 %index) {
+ %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr
+ %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2
+ %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 %index
+ %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
+ %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
+ %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
+ %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
+ store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2
+ ret void
+}
+
+; Check that we do not perform the transformation when there are several uses
+; of the result of the bitcast.
+; CHECK-LABEL: severalUses:
+; ARG1_VALlo is hard coded because we need to access the high part of d0,
+; i.e., s1, and we can't express that with filecheck.
+; CHECK: vld1.32 {[[ARG1_VALlo:d0]], [[ARG1_VALhi:d[0-9]+]]}, [r0]
+; CHECK-NEXT: vldr [[ARG2_VAL:d[0-9]+]], [r1]
+; s1 is actually 2 * ARG1_VALlo + 1, but we cannot express that with filecheck.
+; CHECK-NEXT: vmov [[REThi:r[0-9]+]], s1
+; We build the return value here. s0 is 2 * ARG1_VALlo.
+; CHECK-NEXT: vmov r0, s0
+; This copy is correct but actually useless. We should be able to clean it up.
+; CHECK-NEXT: vmov [[ARG1_VALloCPY:d[0-9]+]], r0, [[REThi]]
+; CHECK-NEXT: vtbl.8 [[RES:d[0-9]+]], {[[ARG1_VALloCPY]], [[ARG1_VALhi]]}, [[ARG2_VAL]]
+; CHECK-NEXT: vstr [[RES]], [r1]
+; CHECK-NEXT: mov r1, [[REThi]]
+; CHECK-NEXT: bx lr
+define i64 @severalUses(<2 x i64>* %addr, <8 x i8>* %addr2) {
+ %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr
+ %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2
+ %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0
+ %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
+ %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
+ %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
+ %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
+ store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2
+ ret i64 %shuffle.i.extract.i310
+}
diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll
index 3baa103e3d5d..75a90bbf0caa 100644
--- a/test/CodeGen/ARM/constants.ll
+++ b/test/CodeGen/ARM/constants.ll
@@ -63,7 +63,7 @@ define i32 @f8() nounwind {
float 3.000000e+00> }, align 16
; CHECK: const1
; CHECK: .zero 16
-; CHECK: float 1.0
-; CHECK: float 2.0
-; CHECK: float 3.0
+; CHECK: float 1
+; CHECK: float 2
+; CHECK: float 3
; CHECK: .zero 4
diff --git a/test/CodeGen/ARM/dagcombine-concatvector.ll b/test/CodeGen/ARM/dagcombine-concatvector.ll
index 80ef2ab7b8bf..578d80d1cef4 100644
--- a/test/CodeGen/ARM/dagcombine-concatvector.ll
+++ b/test/CodeGen/ARM/dagcombine-concatvector.ll
@@ -19,8 +19,8 @@ bb:
%tmp5 = bitcast i64 %tmp4 to <8 x i8>
%tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> %tmp3, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- tail call void @llvm.arm.neon.vst1.v16i8(i8* %arg, <16 x i8> %tmp7, i32 2)
+ tail call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %arg, <16 x i8> %tmp7, i32 2)
ret void
}
-declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)
+declare void @llvm.arm.neon.vst1.p0i8.v16i8(i8*, <16 x i8>, i32)
diff --git a/test/CodeGen/ARM/debug-frame-vararg.ll b/test/CodeGen/ARM/debug-frame-vararg.ll
index c1eff0a5bd67..13ca20c20359 100644
--- a/test/CodeGen/ARM/debug-frame-vararg.ll
+++ b/test/CodeGen/ARM/debug-frame-vararg.ll
@@ -25,11 +25,11 @@
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "var.c", directory: "/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "sum", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 (i32, ...)* @sum, variables: !2)
+!4 = distinct !DISubprogram(name: "sum", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "var.c", directory: "/tmp")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
@@ -37,9 +37,9 @@
!9 = !{i32 2, !"Dwarf Version", i32 4}
!10 = !{i32 1, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.5 "}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "count", line: 5, arg: 1, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "count", line: 5, arg: 1, scope: !4, file: !5, type: !8)
!13 = !DILocation(line: 5, scope: !4)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vl", line: 6, scope: !4, file: !5, type: !15)
+!14 = !DILocalVariable(name: "vl", line: 6, scope: !4, file: !5, type: !15)
!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "va_list", line: 30, file: !16, baseType: !17)
!16 = !DIFile(filename: "/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", directory: "/tmp")
!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "__builtin_va_list", line: 6, file: !1, baseType: !18)
@@ -49,9 +49,9 @@
!21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: null)
!22 = !DILocation(line: 6, scope: !4)
!23 = !DILocation(line: 7, scope: !4)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "sum", line: 8, scope: !4, file: !5, type: !8)
+!24 = !DILocalVariable(name: "sum", line: 8, scope: !4, file: !5, type: !8)
!25 = !DILocation(line: 8, scope: !4)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 9, scope: !27, file: !5, type: !8)
+!26 = !DILocalVariable(name: "i", line: 9, scope: !27, file: !5, type: !8)
!27 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !4)
!28 = !DILocation(line: 9, scope: !27)
!29 = !DILocation(line: 10, scope: !30)
@@ -108,7 +108,7 @@
; CHECK-THUMB-FP-ELIM: add r7, sp, #8
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 20
-define i32 @sum(i32 %count, ...) {
+define i32 @sum(i32 %count, ...) !dbg !4 {
entry:
%vl = alloca i8*, align 4
%vl1 = bitcast i8** %vl to i8*
diff --git a/test/CodeGen/ARM/debug-frame.ll b/test/CodeGen/ARM/debug-frame.ll
index cc07400c2e1c..4bd401b68496 100644
--- a/test/CodeGen/ARM/debug-frame.ll
+++ b/test/CodeGen/ARM/debug-frame.ll
@@ -30,11 +30,11 @@
; RUN: -filetype=asm -o - %s \
; RUN: | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM
-; RUN: llc -mtriple thumb-unknown-linux-gnueabi \
+; RUN: llc -mtriple thumbv5-unknown-linux-gnueabi \
; RUN: -disable-fp-elim -filetype=asm -o - %s \
; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-FP
-; RUN: llc -mtriple thumb-unknown-linux-gnueabi \
+; RUN: llc -mtriple thumbv5-unknown-linux-gnueabi \
; RUN: -filetype=asm -o - %s \
; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-FP-ELIM
@@ -125,11 +125,11 @@ declare void @_ZSt9terminatev()
!llvm.module.flags = !{!10, !11}
!llvm.ident = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "exp.cpp", directory: "/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "test", linkageName: "_Z4testiiiiiddddd", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: void (i32, i32, i32, i32, i32, double, double, double, double, double)* @_Z4testiiiiiddddd, variables: !2)
+!4 = distinct !DISubprogram(name: "test", linkageName: "_Z4testiiiiiddddd", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "exp.cpp", directory: "/tmp")
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8, !8, !8, !8, !8, !9, !9, !9, !9, !9}
@@ -138,18 +138,18 @@ declare void @_ZSt9terminatev()
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 1, !"Debug Info Version", i32 3}
!12 = !{!"clang version 3.5 "}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 4, arg: 1, scope: !4, file: !5, type: !8)
+!13 = !DILocalVariable(name: "a", line: 4, arg: 1, scope: !4, file: !5, type: !8)
!14 = !DILocation(line: 4, scope: !4)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 4, arg: 2, scope: !4, file: !5, type: !8)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !4, file: !5, type: !8)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "d", line: 4, arg: 4, scope: !4, file: !5, type: !8)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "e", line: 4, arg: 5, scope: !4, file: !5, type: !8)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "m", line: 5, arg: 6, scope: !4, file: !5, type: !9)
+!15 = !DILocalVariable(name: "b", line: 4, arg: 2, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "c", line: 4, arg: 3, scope: !4, file: !5, type: !8)
+!17 = !DILocalVariable(name: "d", line: 4, arg: 4, scope: !4, file: !5, type: !8)
+!18 = !DILocalVariable(name: "e", line: 4, arg: 5, scope: !4, file: !5, type: !8)
+!19 = !DILocalVariable(name: "m", line: 5, arg: 6, scope: !4, file: !5, type: !9)
!20 = !DILocation(line: 5, scope: !4)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "n", line: 5, arg: 7, scope: !4, file: !5, type: !9)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", line: 5, arg: 8, scope: !4, file: !5, type: !9)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "q", line: 5, arg: 9, scope: !4, file: !5, type: !9)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "r", line: 5, arg: 10, scope: !4, file: !5, type: !9)
+!21 = !DILocalVariable(name: "n", line: 5, arg: 7, scope: !4, file: !5, type: !9)
+!22 = !DILocalVariable(name: "p", line: 5, arg: 8, scope: !4, file: !5, type: !9)
+!23 = !DILocalVariable(name: "q", line: 5, arg: 9, scope: !4, file: !5, type: !9)
+!24 = !DILocalVariable(name: "r", line: 5, arg: 10, scope: !4, file: !5, type: !9)
!25 = !DILocation(line: 7, scope: !26)
!26 = distinct !DILexicalBlock(line: 6, column: 0, file: !1, scope: !4)
!27 = !DILocation(line: 8, scope: !26)
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index 84eae77794a4..bf7e7321ae3d 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -6,12 +6,12 @@ target triple = "thumbv7-apple-ios"
%struct.tag_s = type { i32, i32, i32 }
-define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64 %y, %struct.tag_s* nocapture %ptr1, %struct.tag_s* nocapture %ptr2) nounwind ssp {
+define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64 %y, %struct.tag_s* nocapture %ptr1, %struct.tag_s* nocapture %ptr2) nounwind ssp !dbg !1 {
tail call void @llvm.dbg.value(metadata %struct.tag_s* %this, i64 0, metadata !5, metadata !DIExpression()), !dbg !20
tail call void @llvm.dbg.value(metadata %struct.tag_s* %c, i64 0, metadata !13, metadata !DIExpression()), !dbg !21
tail call void @llvm.dbg.value(metadata i64 %x, i64 0, metadata !14, metadata !DIExpression()), !dbg !22
tail call void @llvm.dbg.value(metadata i64 %y, i64 0, metadata !17, metadata !DIExpression()), !dbg !23
-;CHECK: @DEBUG_VALUE: foo:y <- [R7+8]
+;CHECK: @DEBUG_VALUE: foo:y <- [%R7+8]
tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr1, i64 0, metadata !18, metadata !DIExpression()), !dbg !24
tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr2, i64 0, metadata !19, metadata !DIExpression()), !dbg !25
%1 = icmp eq %struct.tag_s* %c, null, !dbg !26
@@ -32,12 +32,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!33}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 1, file: !32, enums: !{}, retainedTypes: !{}, subprograms: !30, imports: null)
-!1 = !DISubprogram(name: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !2, scope: !2, type: !3, function: void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, variables: !31)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 1, file: !32, enums: !{}, retainedTypes: !{}, subprograms: !30, imports: null)
+!1 = distinct !DISubprogram(name: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !2, scope: !2, type: !3, variables: !31)
!2 = !DIFile(filename: "one.c", directory: "/Volumes/Athwagate/R10048772")
!3 = !DISubroutineType(types: !4)
!4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 11, arg: 1, scope: !1, file: !2, type: !6)
+!5 = !DILocalVariable(name: "this", line: 11, arg: 1, scope: !1, file: !2, type: !6)
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !0, baseType: !7)
!7 = !DICompositeType(tag: DW_TAG_structure_type, name: "tag_s", line: 5, size: 96, align: 32, file: !32, scope: !0, elements: !8)
!8 = !{!9, !11, !12}
@@ -45,13 +45,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!11 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 7, size: 32, align: 32, offset: 32, file: !32, scope: !7, baseType: !10)
!12 = !DIDerivedType(tag: DW_TAG_member, name: "z", line: 8, size: 32, align: 32, offset: 64, file: !32, scope: !7, baseType: !10)
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 11, arg: 2, scope: !1, file: !2, type: !6)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 11, arg: 3, scope: !1, file: !2, type: !15)
+!13 = !DILocalVariable(name: "c", line: 11, arg: 2, scope: !1, file: !2, type: !6)
+!14 = !DILocalVariable(name: "x", line: 11, arg: 3, scope: !1, file: !2, type: !15)
!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "UInt64", line: 1, file: !32, scope: !0, baseType: !16)
!16 = !DIBasicType(tag: DW_TAG_base_type, name: "long long unsigned int", size: 64, align: 32, encoding: DW_ATE_unsigned)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 11, arg: 4, scope: !1, file: !2, type: !15)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr1", line: 11, arg: 5, scope: !1, file: !2, type: !6)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr2", line: 11, arg: 6, scope: !1, file: !2, type: !6)
+!17 = !DILocalVariable(name: "y", line: 11, arg: 4, scope: !1, file: !2, type: !15)
+!18 = !DILocalVariable(name: "ptr1", line: 11, arg: 5, scope: !1, file: !2, type: !6)
+!19 = !DILocalVariable(name: "ptr2", line: 11, arg: 6, scope: !1, file: !2, type: !6)
!20 = !DILocation(line: 11, column: 24, scope: !1)
!21 = !DILocation(line: 11, column: 44, scope: !1)
!22 = !DILocation(line: 11, column: 54, scope: !1)
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 366102755174..c628c5e9038d 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -1,5 +1,21 @@
-; RUN: llc -O0 < %s | FileCheck %s
-; CHECK: @DEBUG_VALUE: foobar_func_block_invoke_0:mydata <- [SP+{{[0-9]+}}]
+; RUN: llc -filetype=obj -O0 < %s | llvm-dwarfdump - | FileCheck %s
+
+; debug_info content
+; CHECK: DW_AT_name {{.*}} "foobar_func_block_invoke_0"
+; CHECK-NOT: DW_TAG_subprogram
+; CHECK: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] ([[MYDATA_LOC:0x[0-9a-f]*]])
+; CHECK-NEXT: DW_AT_name {{.*}} "mydata"
+
+; debug_loc content
+; CHECK: .debug_loc contents:
+; CHECK: [[MYDATA_LOC]]: Beginning address offset: {{.*}}
+; CHECK-NOT: {{0x[0-9a-f]*}}: Beginning address offset
+; CHECK: Location description: {{.*}} 23 04 06 23 18
+; CHECK-NOT: {{0x[0-9a-f]*}}: Beginning address offset
+; CHECK: Location description: {{.*}} 23 04 06 23 18
+
; Radar 9331779
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
target triple = "thumbv7-apple-ios"
@@ -27,7 +43,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp {
+define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp !dbg !23 {
%1 = alloca %0*, align 4
%bounds = alloca %struct.CR, align 4
%data = alloca %struct.CR, align 4
@@ -95,7 +111,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!162}
-!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "Apple clang version 2.1", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !153, enums: !147, retainedTypes: !{}, subprograms: !148)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "Apple clang version 2.1", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !153, enums: !147, retainedTypes: !{}, subprograms: !148)
!1 = !DICompositeType(tag: DW_TAG_enumeration_type, line: 248, size: 32, align: 32, file: !160, scope: !0, elements: !3)
!2 = !DIFile(filename: "header.h", directory: "/Volumes/Sandbox/llvm")
!3 = !{!4}
@@ -118,11 +134,11 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
!20 = !DIFile(filename: "header4.h", directory: "/Volumes/Sandbox/llvm")
!21 = !{!22}
!22 = !DIEnumerator(name: "Eleven", value: 0) ; [ DW_TAG_enumerator ]
-!23 = !DISubprogram(name: "foobar_func_block_invoke_0", line: 609, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 609, file: !152, scope: !24, type: !25, function: void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0)
+!23 = distinct !DISubprogram(name: "foobar_func_block_invoke_0", line: 609, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 609, file: !152, scope: !24, type: !25)
!24 = !DIFile(filename: "MyLibrary.m", directory: "/Volumes/Sandbox/llvm")
!25 = !DISubroutineType(types: !26)
!26 = !{null}
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: ".block_descriptor", line: 609, arg: 1, flags: DIFlagArtificial, scope: !23, file: !24, type: !28)
+!27 = !DILocalVariable(name: ".block_descriptor", line: 609, arg: 1, flags: DIFlagArtificial, scope: !23, file: !24, type: !28)
!28 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, scope: !0, baseType: !29)
!29 = !DICompositeType(tag: DW_TAG_structure_type, name: "__block_literal_14", line: 609, size: 256, align: 32, file: !152, scope: !24, elements: !30)
!30 = !{!31, !33, !35, !36, !37, !48, !89, !124}
@@ -225,16 +241,16 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
!127 = !DICompositeType(tag: DW_TAG_structure_type, name: "my_struct", line: 49, flags: DIFlagFwdDecl, file: !159, scope: !0)
!128 = !DIFile(filename: "header15.h", directory: "/Volumes/Sandbox/llvm")
!129 = !DILocation(line: 609, column: 144, scope: !23)
-!130 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "loadedMydata", line: 609, arg: 2, scope: !23, file: !24, type: !59)
+!130 = !DILocalVariable(name: "loadedMydata", line: 609, arg: 2, scope: !23, file: !24, type: !59)
!131 = !DILocation(line: 609, column: 155, scope: !23)
-!132 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "bounds", line: 609, arg: 3, scope: !23, file: !24, type: !108)
+!132 = !DILocalVariable(name: "bounds", line: 609, arg: 3, scope: !23, file: !24, type: !108)
!133 = !DILocation(line: 609, column: 175, scope: !23)
-!134 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "data", line: 609, arg: 4, scope: !23, file: !24, type: !108)
+!134 = !DILocalVariable(name: "data", line: 609, arg: 4, scope: !23, file: !24, type: !108)
!135 = !DILocation(line: 609, column: 190, scope: !23)
-!136 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "mydata", line: 604, scope: !23, file: !24, type: !50)
+!136 = !DILocalVariable(name: "mydata", line: 604, scope: !23, file: !24, type: !50)
!137 = !DILocation(line: 604, column: 49, scope: !23)
-!138 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "self", line: 604, scope: !23, file: !40, type: !90)
-!139 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "semi", line: 607, scope: !23, file: !24, type: !125)
+!138 = !DILocalVariable(name: "self", line: 604, scope: !23, file: !40, type: !90)
+!139 = !DILocalVariable(name: "semi", line: 607, scope: !23, file: !24, type: !125)
!140 = !DILocation(line: 607, column: 30, scope: !23)
!141 = !DILocation(line: 610, column: 17, scope: !142)
!142 = distinct !DILexicalBlock(line: 609, column: 200, file: !152, scope: !23)
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 03b4d6b38151..b9d110e42cd4 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -6,15 +6,15 @@ target triple = "thumbv7-apple-macosx10.6.7"
;CHECK-NEXT: Ltmp1
;CHECK-NEXT: LBB0_1
-;CHECK:@DEBUG_VALUE: x <- Q4{{$}}
-;CHECK-NEXT:@DEBUG_VALUE: y <- Q4{{$}}
+;CHECK:@DEBUG_VALUE: x <- %Q4{{$}}
+;CHECK-NEXT:@DEBUG_VALUE: y <- %Q4{{$}}
@.str = external constant [13 x i8]
declare <4 x float> @test0001(float) nounwind readnone ssp
-define i32 @main(i32 %argc, i8** nocapture %argv, i1 %cond) nounwind ssp {
+define i32 @main(i32 %argc, i8** nocapture %argv, i1 %cond) nounwind ssp !dbg !10 {
entry:
br label %for.body9
@@ -42,9 +42,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.module.flags = !{!56}
!llvm.dbg.cu = !{!2}
-!0 = !DISubprogram(name: "test0001", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !54, scope: null, type: !3, function: <4 x float> (float)* @test0001, variables: !51)
+!0 = distinct !DISubprogram(name: "test0001", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !54, scope: null, type: !3, variables: !51)
!1 = !DIFile(filename: "build2.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !{}, retainedTypes: !{}, subprograms: !50, imports: null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !{}, retainedTypes: !{}, subprograms: !50, imports: null)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIDerivedType(tag: DW_TAG_typedef, name: "v4f32", line: 14, file: !54, scope: !2, baseType: !6)
@@ -52,27 +52,27 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
!8 = !{!9}
!9 = !DISubrange(count: 4)
-!10 = !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !54, scope: null, type: !11, function: i32 (i32, i8**, i1)* @main, variables: !52)
+!10 = distinct !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !54, scope: null, type: !11, variables: !52)
!11 = !DISubroutineType(types: !12)
!12 = !{!13}
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!14 = !DISubprogram(name: "printFV", line: 41, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !55, scope: null, type: !16, variables: !53)
+!14 = distinct !DISubprogram(name: "printFV", line: 41, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !55, scope: null, type: !16, variables: !53)
!15 = !DIFile(filename: "/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", directory: "/private/tmp")
!16 = !DISubroutineType(types: !17)
!17 = !{null}
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !0, file: !1, type: !7)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 59, arg: 1, scope: !10, file: !1, type: !13)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 59, arg: 2, scope: !10, file: !1, type: !21)
+!18 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !0, file: !1, type: !7)
+!19 = !DILocalVariable(name: "argc", line: 59, arg: 1, scope: !10, file: !1, type: !13)
+!20 = !DILocalVariable(name: "argv", line: 59, arg: 2, scope: !10, file: !1, type: !21)
!21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !22)
!22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !23)
!23 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 60, scope: !25, file: !1, type: !13)
+!24 = !DILocalVariable(name: "i", line: 60, scope: !25, file: !1, type: !13)
!25 = distinct !DILexicalBlock(line: 59, column: 33, file: !1, scope: !10)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 60, scope: !25, file: !1, type: !13)
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 61, scope: !25, file: !1, type: !5)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 62, scope: !25, file: !1, type: !5)
-!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "z", line: 63, scope: !25, file: !1, type: !5)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "F", line: 41, arg: 1, scope: !14, file: !15, type: !31)
+!26 = !DILocalVariable(name: "j", line: 60, scope: !25, file: !1, type: !13)
+!27 = !DILocalVariable(name: "x", line: 61, scope: !25, file: !1, type: !5)
+!28 = !DILocalVariable(name: "y", line: 62, scope: !25, file: !1, type: !5)
+!29 = !DILocalVariable(name: "z", line: 63, scope: !25, file: !1, type: !5)
+!30 = !DILocalVariable(name: "F", line: 41, arg: 1, scope: !14, file: !15, type: !31)
!31 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !32)
!32 = !DIDerivedType(tag: DW_TAG_typedef, name: "FV", line: 25, file: !55, scope: !2, baseType: !33)
!33 = !DICompositeType(tag: DW_TAG_union_type, line: 22, size: 128, align: 128, file: !55, scope: !2, elements: !34)
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 27bd3b8639c4..0d457d3a7371 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -10,7 +10,7 @@ target triple = "thumbv7-apple-darwin10"
@.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00", align 4
@.str1 = private unnamed_addr constant [6 x i8] c"point\00", align 4
-define i32 @inlineprinter(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize {
+define i32 @inlineprinter(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize !dbg !9 {
entry:
tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !19, metadata !DIExpression()), !dbg !26
tail call void @llvm.dbg.value(metadata double %val, i64 0, metadata !20, metadata !DIExpression()), !dbg !26
@@ -20,7 +20,7 @@ entry:
ret i32 0, !dbg !29
}
-define i32 @printer(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize noinline {
+define i32 @printer(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize noinline !dbg !0 {
entry:
tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !16, metadata !DIExpression()), !dbg !30
tail call void @llvm.dbg.value(metadata double %val, i64 0, metadata !17, metadata !DIExpression()), !dbg !30
@@ -34,7 +34,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize {
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize !dbg !10 {
entry:
tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !22, metadata !DIExpression()), !dbg !34
tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !23, metadata !DIExpression()), !dbg !34
@@ -59,36 +59,36 @@ declare i32 @puts(i8* nocapture) nounwind
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!48}
-!0 = !DISubprogram(name: "printer", linkageName: "printer", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !46, scope: !1, type: !3, function: i32 (i8*, double, i8)* @printer, variables: !43)
+!0 = distinct !DISubprogram(name: "printer", linkageName: "printer", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !46, scope: !1, type: !3, variables: !43)
!1 = !DIFile(filename: "a.c", directory: "/tmp/")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "(LLVM build 00)", isOptimized: true, emissionKind: 1, file: !46, enums: !47, retainedTypes: !47, subprograms: !42, imports: null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "(LLVM build 00)", isOptimized: true, emissionKind: 1, file: !46, enums: !47, retainedTypes: !47, subprograms: !42, imports: null)
!3 = !DISubroutineType(types: !4)
!4 = !{!5, !6, !7, !8}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !46, scope: !1, baseType: null)
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 32, encoding: DW_ATE_float)
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
-!9 = !DISubprogram(name: "inlineprinter", linkageName: "inlineprinter", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !46, scope: !1, type: !3, function: i32 (i8*, double, i8)* @inlineprinter, variables: !44)
-!10 = !DISubprogram(name: "main", linkageName: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !46, scope: !1, type: !11, function: i32 (i32, i8**)* @main, variables: !45)
+!9 = distinct !DISubprogram(name: "inlineprinter", linkageName: "inlineprinter", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !46, scope: !1, type: !3, variables: !44)
+!10 = distinct !DISubprogram(name: "main", linkageName: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !46, scope: !1, type: !11, variables: !45)
!11 = !DISubroutineType(types: !12)
!12 = !{!5, !5, !13}
!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !46, scope: !1, baseType: !14)
!14 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !46, scope: !1, baseType: !15)
!15 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 11, arg: 1, scope: !0, file: !1, type: !6)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 11, arg: 2, scope: !0, file: !1, type: !7)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 11, arg: 3, scope: !0, file: !1, type: !8)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !9, file: !1, type: !6)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !9, file: !1, type: !7)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !9, file: !1, type: !8)
+!16 = !DILocalVariable(name: "ptr", line: 11, arg: 1, scope: !0, file: !1, type: !6)
+!17 = !DILocalVariable(name: "val", line: 11, arg: 2, scope: !0, file: !1, type: !7)
+!18 = !DILocalVariable(name: "c", line: 11, arg: 3, scope: !0, file: !1, type: !8)
+!19 = !DILocalVariable(name: "ptr", line: 4, arg: 1, scope: !9, file: !1, type: !6)
+!20 = !DILocalVariable(name: "val", line: 4, arg: 2, scope: !9, file: !1, type: !7)
+!21 = !DILocalVariable(name: "c", line: 4, arg: 3, scope: !9, file: !1, type: !8)
-!49 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !9, file: !1, type: !6)
-!50 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !9, file: !1, type: !7)
-!51 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 2, scope: !9, file: !1, type: !8)
+!49 = !DILocalVariable(name: "ptr", line: 4, arg: 1, scope: !9, file: !1, type: !6)
+!50 = !DILocalVariable(name: "val", line: 4, arg: 2, scope: !9, file: !1, type: !7)
+!51 = !DILocalVariable(name: "c", line: 4, arg: 2, scope: !9, file: !1, type: !8)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 17, arg: 0, scope: !10, file: !1, type: !5)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 17, arg: 0, scope: !10, file: !1, type: !13)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dval", line: 19, scope: !25, file: !1, type: !7)
+!22 = !DILocalVariable(name: "argc", line: 17, arg: 1, scope: !10, file: !1, type: !5)
+!23 = !DILocalVariable(name: "argv", line: 17, arg: 2, scope: !10, file: !1, type: !13)
+!24 = !DILocalVariable(name: "dval", line: 19, scope: !25, file: !1, type: !7)
!25 = distinct !DILexicalBlock(line: 18, column: 0, file: !46, scope: !10)
!26 = !DILocation(line: 4, scope: !9)
!27 = !DILocation(line: 6, scope: !28)
diff --git a/test/CodeGen/ARM/debug-info-no-frame.ll b/test/CodeGen/ARM/debug-info-no-frame.ll
index e00563cc47c4..d77a195b9528 100644
--- a/test/CodeGen/ARM/debug-info-no-frame.ll
+++ b/test/CodeGen/ARM/debug-info-no-frame.ll
@@ -1,7 +1,7 @@
; RUN: llc -mtriple=armv7-none-linux-gnueabihf < %s -o - | FileCheck %s
; Function Attrs: nounwind
-define void @need_cfi_def_cfa_offset() #0 {
+define void @need_cfi_def_cfa_offset() #0 !dbg !3 {
; CHECK-LABEL: need_cfi_def_cfa_offset:
; CHECK: sub sp, sp, #4
; CHECK: .cfi_def_cfa_offset 4
@@ -21,16 +21,16 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !{!3})
!1 = !DIFile(filename: "file.c", directory: "/dir")
!2 = !{}
-!3 = !DISubprogram(name: "need_cfi_def_cfa_offset", scope: !1, file: !1, line: 1, type: !4, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, function: void ()* @need_cfi_def_cfa_offset, variables: !2)
+!3 = distinct !DISubprogram(name: "need_cfi_def_cfa_offset", scope: !1, file: !1, line: 1, type: !4, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
!4 = !DISubroutineType(types: !5)
!5 = !{null}
!6 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!7 = !{i32 2, !"Dwarf Version", i32 4}
!8 = !{i32 2, !"Debug Info Version", i32 3}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Depth", scope: !3, file: !1, line: 3, type: !6)
+!9 = !DILocalVariable(name: "Depth", scope: !3, file: !1, line: 3, type: !6)
!10 = !DIExpression()
!11 = !DILocation(line: 3, column: 9, scope: !3)
!12 = !DILocation(line: 7, column: 5, scope: !3)
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index 665818fc0b2e..1cd90d433640 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -15,7 +15,7 @@ target triple = "thumbv7-apple-macosx10.6.7"
declare <4 x float> @test0001(float) nounwind readnone ssp
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp !dbg !10 {
entry:
br label %for.body9
@@ -38,9 +38,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!56}
-!0 = !DISubprogram(name: "test0001", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !54, scope: !1, type: !3, function: <4 x float> (float)* @test0001, variables: !51)
+!0 = distinct !DISubprogram(name: "test0001", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !54, scope: !1, type: !3, variables: !51)
!1 = !DIFile(filename: "build2.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !{}, retainedTypes: !{}, subprograms: !50, imports: null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !{}, retainedTypes: !{}, subprograms: !50, imports: null)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIDerivedType(tag: DW_TAG_typedef, name: "v4f32", line: 14, file: !54, scope: !2, baseType: !6)
@@ -48,27 +48,27 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
!8 = !{!9}
!9 = !DISubrange(count: 4)
-!10 = !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 59, file: !54, scope: !1, type: !11, function: i32 (i32, i8**)* @main, variables: !52)
+!10 = distinct !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 59, file: !54, scope: !1, type: !11, variables: !52)
!11 = !DISubroutineType(types: !12)
!12 = !{!13}
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!14 = !DISubprogram(name: "printFV", line: 41, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 41, file: !55, scope: !15, type: !16, variables: !53)
+!14 = distinct !DISubprogram(name: "printFV", line: 41, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 41, file: !55, scope: !15, type: !16, variables: !53)
!15 = !DIFile(filename: "/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", directory: "/private/tmp")
!16 = !DISubroutineType(types: !17)
!17 = !{null}
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !0, file: !1, type: !7)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 59, arg: 1, scope: !10, file: !1, type: !13)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 59, arg: 2, scope: !10, file: !1, type: !21)
+!18 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !0, file: !1, type: !7)
+!19 = !DILocalVariable(name: "argc", line: 59, arg: 1, scope: !10, file: !1, type: !13)
+!20 = !DILocalVariable(name: "argv", line: 59, arg: 2, scope: !10, file: !1, type: !21)
!21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !22)
!22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !23)
!23 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 60, scope: !25, file: !1, type: !13)
+!24 = !DILocalVariable(name: "i", line: 60, scope: !25, file: !1, type: !13)
!25 = distinct !DILexicalBlock(line: 59, column: 33, file: !54, scope: !10)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 60, scope: !25, file: !1, type: !13)
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 61, scope: !25, file: !1, type: !5)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 62, scope: !25, file: !1, type: !5)
-!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "z", line: 63, scope: !25, file: !1, type: !5)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "F", line: 41, arg: 1, scope: !14, file: !15, type: !31)
+!26 = !DILocalVariable(name: "j", line: 60, scope: !25, file: !1, type: !13)
+!27 = !DILocalVariable(name: "x", line: 61, scope: !25, file: !1, type: !5)
+!28 = !DILocalVariable(name: "y", line: 62, scope: !25, file: !1, type: !5)
+!29 = !DILocalVariable(name: "z", line: 63, scope: !25, file: !1, type: !5)
+!30 = !DILocalVariable(name: "F", line: 41, arg: 1, scope: !14, file: !15, type: !31)
!31 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !32)
!32 = !DIDerivedType(tag: DW_TAG_typedef, name: "FV", line: 25, file: !55, scope: !2, baseType: !33)
!33 = !DICompositeType(tag: DW_TAG_union_type, line: 22, size: 128, align: 128, file: !55, scope: !2, elements: !34)
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index ec080f20db9c..654aa4545ca4 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-macosx10.6.7"
@.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00"
@.str1 = private unnamed_addr constant [6 x i8] c"point\00"
-define i32 @inlineprinter(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize ssp {
+define i32 @inlineprinter(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize ssp !dbg !0 {
entry:
tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !8, metadata !DIExpression()), !dbg !24
tail call void @llvm.dbg.value(metadata float %val, i64 0, metadata !10, metadata !DIExpression()), !dbg !25
@@ -25,7 +25,7 @@ entry:
declare i32 @printf(i8* nocapture, ...) nounwind optsize
-define i32 @printer(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp {
+define i32 @printer(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp !dbg !6 {
entry:
tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !14, metadata !DIExpression()), !dbg !30
tail call void @llvm.dbg.value(metadata float %val, i64 0, metadata !15, metadata !DIExpression()), !dbg !31
@@ -36,7 +36,7 @@ entry:
ret i32 0, !dbg !35
}
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize ssp {
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize ssp !dbg !7 {
entry:
tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !17, metadata !DIExpression()), !dbg !36
tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !18, metadata !DIExpression()), !dbg !37
@@ -65,34 +65,34 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!53}
-!0 = !DISubprogram(name: "inlineprinter", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !51, scope: !1, type: !3, function: i32 (i8*, float, i8)* @inlineprinter, variables: !48)
+!0 = distinct !DISubprogram(name: "inlineprinter", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !51, scope: !1, type: !3, variables: !48)
!1 = !DIFile(filename: "a.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !51, enums: !52, retainedTypes: !52, subprograms: !47, imports: null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !51, enums: !52, retainedTypes: !52, subprograms: !47, imports: null)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "printer", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !51, scope: !1, type: !3, function: i32 (i8*, float, i8)* @printer, variables: !49)
-!7 = !DISubprogram(name: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !51, scope: !1, type: !3, function: i32 (i32, i8**)* @main, variables: !50)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !0, file: !1, type: !9)
+!6 = distinct !DISubprogram(name: "printer", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !51, scope: !1, type: !3, variables: !49)
+!7 = distinct !DISubprogram(name: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !51, scope: !1, type: !3, variables: !50)
+!8 = !DILocalVariable(name: "ptr", line: 4, arg: 1, scope: !0, file: !1, type: !9)
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: null)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !0, file: !1, type: !11)
+!10 = !DILocalVariable(name: "val", line: 4, arg: 2, scope: !0, file: !1, type: !11)
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !0, file: !1, type: !13)
+!12 = !DILocalVariable(name: "c", line: 4, arg: 3, scope: !0, file: !1, type: !13)
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
-!58 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !0, file: !1, type: !9)
-!60 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !0, file: !1, type: !11)
-!62 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !0, file: !1, type: !13)
+!58 = !DILocalVariable(name: "ptr", line: 4, arg: 1, scope: !0, file: !1, type: !9)
+!60 = !DILocalVariable(name: "val", line: 4, arg: 2, scope: !0, file: !1, type: !11)
+!62 = !DILocalVariable(name: "c", line: 4, arg: 3, scope: !0, file: !1, type: !13)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 11, arg: 1, scope: !6, file: !1, type: !9)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 11, arg: 2, scope: !6, file: !1, type: !11)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 11, arg: 3, scope: !6, file: !1, type: !13)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 17, arg: 1, scope: !7, file: !1, type: !5)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 17, arg: 2, scope: !7, file: !1, type: !19)
+!14 = !DILocalVariable(name: "ptr", line: 11, arg: 1, scope: !6, file: !1, type: !9)
+!15 = !DILocalVariable(name: "val", line: 11, arg: 2, scope: !6, file: !1, type: !11)
+!16 = !DILocalVariable(name: "c", line: 11, arg: 3, scope: !6, file: !1, type: !13)
+!17 = !DILocalVariable(name: "argc", line: 17, arg: 1, scope: !7, file: !1, type: !5)
+!18 = !DILocalVariable(name: "argv", line: 17, arg: 2, scope: !7, file: !1, type: !19)
!19 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !20)
!20 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !21)
!21 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dval", line: 19, scope: !23, file: !1, type: !11)
+!22 = !DILocalVariable(name: "dval", line: 19, scope: !23, file: !1, type: !11)
!23 = distinct !DILexicalBlock(line: 18, column: 1, file: !51, scope: !7)
!24 = !DILocation(line: 4, column: 22, scope: !0)
!25 = !DILocation(line: 4, column: 33, scope: !0)
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index f22559efad4d..eadf1b48156b 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-macosx10.6.7"
; CHECK-NEXT: Ending address offset:
; CHECK-NEXT: Location description: 90 {{.. .. .. .. $}}
-define void @_Z3foov() optsize ssp {
+define void @_Z3foov() optsize ssp !dbg !1 {
entry:
%call = tail call float @_Z3barv() optsize, !dbg !11
tail call void @llvm.dbg.value(metadata float %call, i64 0, metadata !5, metadata !DIExpression()), !dbg !11
@@ -43,15 +43,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!20}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130845)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !16, imports: null)
-!1 = !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !18, scope: !2, type: !3, function: void ()* @_Z3foov, variables: !17)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130845)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !16, imports: null)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !18, scope: !2, type: !3, variables: !17)
!2 = !DIFile(filename: "k.cc", directory: "/private/tmp")
!3 = !DISubroutineType(types: !4)
!4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 6, scope: !6, file: !2, type: !7)
+!5 = !DILocalVariable(name: "k", line: 6, scope: !6, file: !2, type: !7)
!6 = distinct !DILexicalBlock(line: 5, column: 12, file: !18, scope: !1)
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
-!8 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 8, scope: !9, file: !2, type: !7)
+!8 = !DILocalVariable(name: "y", line: 8, scope: !9, file: !2, type: !7)
!9 = distinct !DILexicalBlock(line: 7, column: 25, file: !18, scope: !10)
!10 = distinct !DILexicalBlock(line: 7, column: 3, file: !18, scope: !6)
!11 = !DILocation(line: 6, column: 18, scope: !6)
diff --git a/test/CodeGen/ARM/debug-segmented-stacks.ll b/test/CodeGen/ARM/debug-segmented-stacks.ll
index 47d366e49ded..bd0abedc4133 100644
--- a/test/CodeGen/ARM/debug-segmented-stacks.ll
+++ b/test/CodeGen/ARM/debug-segmented-stacks.ll
@@ -5,7 +5,7 @@
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-define void @test_basic() #0 {
+define void @test_basic() #0 !dbg !4 {
%mem = alloca i32, i32 10
call void @dummy_use (i32* %mem, i32 10)
ret void
@@ -39,11 +39,11 @@ define void @test_basic() #0 {
; ARM-linux .cfi_same_value r5
}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "var.c", directory: "/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "test_basic", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: void ()* @test_basic, variables: !2)
+!4 = distinct !DISubprogram(name: "test_basic", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "var.c", directory: "/tmp")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
@@ -51,9 +51,9 @@ define void @test_basic() #0 {
!9 = !{i32 2, !"Dwarf Version", i32 4}
!10 = !{i32 1, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.5 "}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "count", line: 5, arg: 1, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "count", line: 5, arg: 1, scope: !4, file: !5, type: !8)
!13 = !DILocation(line: 5, scope: !4)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vl", line: 6, scope: !4, file: !5, type: !15)
+!14 = !DILocalVariable(name: "vl", line: 6, scope: !4, file: !5, type: !15)
!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "va_list", line: 30, file: !16, baseType: !17)
!16 = !DIFile(filename: "/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", directory: "/tmp")
!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "__builtin_va_list", line: 6, file: !1, baseType: !18)
@@ -63,9 +63,9 @@ define void @test_basic() #0 {
!21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: null)
!22 = !DILocation(line: 6, scope: !4)
!23 = !DILocation(line: 7, scope: !4)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "test_basic", line: 8, scope: !4, file: !5, type: !8)
+!24 = !DILocalVariable(name: "test_basic", line: 8, scope: !4, file: !5, type: !8)
!25 = !DILocation(line: 8, scope: !4)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 9, scope: !27, file: !5, type: !8)
+!26 = !DILocalVariable(name: "i", line: 9, scope: !27, file: !5, type: !8)
!27 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !4)
!28 = !DILocation(line: 9, scope: !27)
!29 = !DILocation(line: 10, scope: !30)
diff --git a/test/CodeGen/ARM/debugtrap.ll b/test/CodeGen/ARM/debugtrap.ll
new file mode 100644
index 000000000000..9ce73939ce56
--- /dev/null
+++ b/test/CodeGen/ARM/debugtrap.ll
@@ -0,0 +1,17 @@
+; This test ensures the @llvm.debugtrap() call is not removed when generating
+; the 'pop' instruction to restore the callee saved registers on ARM.
+
+; RUN: llc < %s -mtriple=armv7 -O0 -filetype=asm | FileCheck %s
+
+declare void @llvm.debugtrap() nounwind
+declare void @foo() nounwind
+
+define void @test() nounwind {
+entry:
+ ; CHECK: bl foo
+ ; CHECK-NEXT: pop
+ ; CHECK-NEXT: trap
+ call void @foo()
+ call void @llvm.debugtrap()
+ ret void
+}
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index 7b298fee42a5..997f50760f3a 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -1,52 +1,97 @@
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-SWDIV
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift | FileCheck %s -check-prefix=CHECK-HWDIV
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4 | FileCheck %s -check-prefix=CHECK-SWDIV
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4f | FileCheck %s -check-prefix=CHECK-SWDIV
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r5 | FileCheck %s -check-prefix=CHECK-HWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | \
+; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-SWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift | \
+; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-HWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4 | \
+; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-SWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4f | \
+; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-SWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r5 | \
+; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-HWDIV
+; RUN: llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8 | \
+; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-EABI
define i32 @f1(i32 %a, i32 %b) {
entry:
-; CHECK-SWDIV: f1
+; CHECK-LABEL: f1
; CHECK-SWDIV: __divsi3
-; CHECK-HWDIV: f1
; CHECK-HWDIV: sdiv
+
+; CHECK-EABI: __aeabi_idiv
%tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
define i32 @f2(i32 %a, i32 %b) {
entry:
-; CHECK-SWDIV: f2
+; CHECK-LABEL: f2
; CHECK-SWDIV: __udivsi3
-; CHECK-HWDIV: f2
; CHECK-HWDIV: udiv
+
+; CHECK-EABI: __aeabi_uidiv
%tmp1 = udiv i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
define i32 @f3(i32 %a, i32 %b) {
entry:
-; CHECK-SWDIV: f3
+; CHECK-LABEL: f3
; CHECK-SWDIV: __modsi3
-; CHECK-HWDIV: f3
; CHECK-HWDIV: sdiv
; CHECK-HWDIV: mls
+
+; EABI MODE = Remainder in R1, quotient in R0
+; CHECK-EABI: __aeabi_idivmod
+; CHECK-EABI-NEXT: mov r0, r1
%tmp1 = srem i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
define i32 @f4(i32 %a, i32 %b) {
entry:
-; CHECK-SWDIV: f4
+; CHECK-LABEL: f4
; CHECK-SWDIV: __umodsi3
-; CHECK-HWDIV: f4
; CHECK-HWDIV: udiv
; CHECK-HWDIV: mls
+
+; EABI MODE = Remainder in R1, quotient in R0
+; CHECK-EABI: __aeabi_uidivmod
+; CHECK-EABI-NEXT: mov r0, r1
%tmp1 = urem i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
+
+define i64 @f5(i64 %a, i64 %b) {
+entry:
+; CHECK-LABEL: f5
+; CHECK-SWDIV: __moddi3
+
+; CHECK-HWDIV: __moddi3
+
+; EABI MODE = Remainder in R2-R3, quotient in R0-R1
+; CHECK-EABI: __aeabi_ldivmod
+; CHECK-EABI-NEXT: mov r0, r2
+; CHECK-EABI-NEXT: mov r1, r3
+ %tmp1 = srem i64 %a, %b ; <i64> [#uses=1]
+ ret i64 %tmp1
+}
+
+define i64 @f6(i64 %a, i64 %b) {
+entry:
+; CHECK-LABEL: f6
+; CHECK-SWDIV: __umoddi3
+
+; CHECK-HWDIV: __umoddi3
+
+; EABI MODE = Remainder in R2-R3, quotient in R0-R1
+; CHECK-EABI: __aeabi_uldivmod
+; CHECK-EABI-NEXT: mov r0, r2
+; CHECK-EABI-NEXT: mov r1, r3
+ %tmp1 = urem i64 %a, %b ; <i64> [#uses=1]
+ ret i64 %tmp1
+}
diff --git a/test/CodeGen/ARM/divmod-eabi.ll b/test/CodeGen/ARM/divmod-eabi.ll
index 7f72048d391e..4178af397e66 100644
--- a/test/CodeGen/ARM/divmod-eabi.ll
+++ b/test/CodeGen/ARM/divmod-eabi.ll
@@ -1,5 +1,7 @@
; RUN: llc -mtriple armv7-none-eabi %s -o - | FileCheck %s --check-prefix=EABI
; RUN: llc -mtriple armv7-none-eabihf %s -o - | FileCheck %s --check-prefix=EABI
+; Both "none-eabi" and "androideabi" must lower SREM/UREM to __aeabi_{u,i}divmod
+; RUN: llc -mtriple armv7-linux-androideabi %s -o - | FileCheck %s --check-prefix=EABI
; RUN: llc -mtriple armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=GNU
; RUN: llc -mtriple armv7-apple-darwin %s -o - | FileCheck %s --check-prefix=DARWIN
; FIXME: long-term, we will use "-apple-macho" and won't need this exception:
@@ -187,7 +189,7 @@ define i32 @g4(i32 %a, i32 %b) {
; DARWIN-LABEL: g4:
entry:
%div = sdiv i32 %a, %b
-; EABI: __aeabi_idivmod
+; EABI: __aeabi_idiv{{$}}
; EABI: mov [[div:r[0-9]+]], r0
; GNU: __aeabi_idiv
; GNU: mov [[sum:r[0-9]+]], r0
diff --git a/test/CodeGen/ARM/eh-resume-darwin.ll b/test/CodeGen/ARM/eh-resume-darwin.ll
index 0cd49775cfb4..d3a8481275f3 100644
--- a/test/CodeGen/ARM/eh-resume-darwin.ll
+++ b/test/CodeGen/ARM/eh-resume-darwin.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=arm | FileCheck %s
-target triple = "armv6-apple-macosx10.6"
+; RUN: llc < %s -mtriple=armv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=IOS
+; RUN: llc < %s -mtriple=armv7k-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=IOS
+; RUN: llc < %s -mtriple=armv7k-apple-watchos -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=WATCHOS
declare void @func()
@@ -19,4 +20,5 @@ lpad:
resume { i8*, i32 } %exn
}
-; CHECK: __Unwind_SjLj_Resume
+; IOS: __Unwind_SjLj_Resume
+; WATCHOS: __Unwind_Resume
diff --git a/test/CodeGen/ARM/emutls.ll b/test/CodeGen/ARM/emutls.ll
new file mode 100644
index 000000000000..7ba50dd249bb
--- /dev/null
+++ b/test/CodeGen/ARM/emutls.ll
@@ -0,0 +1,258 @@
+; RUN: llc -emulated-tls -mtriple=arm-linux-android \
+; RUN: -relocation-model=pic < %s | FileCheck -check-prefix=ARM32 %s
+
+; Copied from X86/emutls.ll
+
+; Use my_emutls_get_address like __emutls_get_address.
+@my_emutls_v_xyz = external global i8*, align 4
+declare i8* @my_emutls_get_address(i8*)
+
+define i32 @my_get_xyz() {
+; ARM32-LABEL: my_get_xyz:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl my_emutls_get_address(PLT)
+; ARM32-NEXT: ldr r0, [r0]
+
+entry:
+ %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*))
+ %0 = bitcast i8* %call to i32*
+ %1 = load i32, i32* %0, align 4
+ ret i32 %1
+}
+
+@i1 = thread_local global i32 15
+@i2 = external thread_local global i32
+@i3 = internal thread_local global i32 15
+@i4 = hidden thread_local global i32 15
+@i5 = external hidden thread_local global i32
+@s1 = thread_local global i16 15
+@b1 = thread_local global i8 0
+
+define i32 @f1() {
+; ARM32-LABEL: f1:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: ldr r0, [r0]
+
+entry:
+ %tmp1 = load i32, i32* @i1
+ ret i32 %tmp1
+}
+
+define i32* @f2() {
+; ARM32-LABEL: f2:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: pop
+
+entry:
+ ret i32* @i1
+}
+
+define i32 @f3() nounwind {
+; ARM32-LABEL: f3:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: ldr r0, [r0]
+
+entry:
+ %tmp1 = load i32, i32* @i2
+ ret i32 %tmp1
+}
+
+define i32* @f4() {
+; ARM32-LABEL: f4:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: pop
+
+entry:
+ ret i32* @i2
+}
+
+define i32 @f5() nounwind {
+; ARM32-LABEL: f5:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: ldr r0, [r0]
+
+entry:
+ %tmp1 = load i32, i32* @i3
+ ret i32 %tmp1
+}
+
+define i32* @f6() {
+; ARM32-LABEL: f6:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: pop
+
+entry:
+ ret i32* @i3
+}
+
+define i32 @f7() {
+; ARM32-LABEL: f7:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: ldr r0, [r0]
+
+entry:
+ %tmp1 = load i32, i32* @i4
+ ret i32 %tmp1
+}
+
+define i32* @f8() {
+; ARM32-LABEL: f8:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: pop
+
+entry:
+ ret i32* @i4
+}
+
+define i32 @f9() {
+; ARM32-LABEL: f9:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: ldr r0, [r0]
+
+entry:
+ %tmp1 = load i32, i32* @i5
+ ret i32 %tmp1
+}
+
+define i32* @f10() {
+; ARM32-LABEL: f10:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: pop
+
+entry:
+ ret i32* @i5
+}
+
+define i16 @f11() {
+; ARM32-LABEL: f11:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: ldrh r0, [r0]
+
+entry:
+ %tmp1 = load i16, i16* @s1
+ ret i16 %tmp1
+}
+
+define i32 @f12() {
+; ARM32-LABEL: f12:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: ldrsh r0, [r0]
+
+entry:
+ %tmp1 = load i16, i16* @s1
+ %tmp2 = sext i16 %tmp1 to i32
+ ret i32 %tmp2
+}
+
+define i8 @f13() {
+; ARM32-LABEL: f13:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: ldrb r0, [r0]
+; ARM32-NEXT: pop
+
+entry:
+ %tmp1 = load i8, i8* @b1
+ ret i8 %tmp1
+}
+
+define i32 @f14() {
+; ARM32-LABEL: f14:
+; ARM32: ldr r0,
+; ARM32: ldr r0, [pc, r0]
+; ARM32-NEXT: bl __emutls_get_address(PLT)
+; ARM32-NEXT: ldrsb r0, [r0]
+; ARM32-NEXT: pop
+
+entry:
+ %tmp1 = load i8, i8* @b1
+ %tmp2 = sext i8 %tmp1 to i32
+ ret i32 %tmp2
+}
+
+;;;;;;;;;;;;;; 32-bit __emutls_v. and __emutls_t.
+
+; ARM32 .section .data.rel.local,
+; ARM32-LABEL: __emutls_v.i1:
+; ARM32-NEXT: .long 4
+; ARM32-NEXT: .long 4
+; ARM32-NEXT: .long 0
+; ARM32-NEXT: .long __emutls_t.i1
+
+; ARM32 .section .rodata,
+; ARM32-LABEL: __emutls_t.i1:
+; ARM32-NEXT: .long 15
+
+; ARM32-NOT: __emutls_v.i2
+
+; ARM32 .section .data.rel.local,
+; ARM32-LABEL: __emutls_v.i3:
+; ARM32-NEXT: .long 4
+; ARM32-NEXT: .long 4
+; ARM32-NEXT: .long 0
+; ARM32-NEXT: .long __emutls_t.i3
+
+; ARM32 .section .rodata,
+; ARM32-LABEL: __emutls_t.i3:
+; ARM32-NEXT: .long 15
+
+; ARM32 .section .data.rel.local,
+; ARM32-LABEL: __emutls_v.i4:
+; ARM32-NEXT: .long 4
+; ARM32-NEXT: .long 4
+; ARM32-NEXT: .long 0
+; ARM32-NEXT: .long __emutls_t.i4
+
+; ARM32 .section .rodata,
+; ARM32-LABEL: __emutls_t.i4:
+; ARM32-NEXT: .long 15
+
+; ARM32-NOT: __emutls_v.i5:
+; ARM32 .hidden __emutls_v.i5
+; ARM32-NOT: __emutls_v.i5:
+
+; ARM32 .section .data.rel.local,
+; ARM32-LABEL: __emutls_v.s1:
+; ARM32-NEXT: .long 2
+; ARM32-NEXT: .long 2
+; ARM32-NEXT: .long 0
+; ARM32-NEXT: .long __emutls_t.s1
+
+; ARM32 .section .rodata,
+; ARM32-LABEL: __emutls_t.s1:
+; ARM32-NEXT: .short 15
+
+; ARM32 .section .data.rel.local,
+; ARM32-LABEL: __emutls_v.b1:
+; ARM32-NEXT: .long 1
+; ARM32-NEXT: .long 1
+; ARM32-NEXT: .long 0
+; ARM32-NEXT: .long 0
+
+; ARM32-NOT: __emutls_t.b1
diff --git a/test/CodeGen/ARM/emutls1.ll b/test/CodeGen/ARM/emutls1.ll
new file mode 100644
index 000000000000..d4ba7eced66c
--- /dev/null
+++ b/test/CodeGen/ARM/emutls1.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -emulated-tls -march=arm -mtriple=arm-linux-androideabi \
+; RUN: | FileCheck %s
+; RUN: llc < %s -emulated-tls -march=arm -mtriple=arm-linux-androideabi \
+; RUN: -relocation-model=pic | FileCheck %s --check-prefix=PIC
+
+; Compared with tls1.ll, emulated mode should not use __aeabi_read_tp or __tls_get_addr.
+
+; CHECK-NOT: _aeabi_read_tp
+; CHECK-NOT: _tls_get_addr
+; CHECK: __emutls_get_addr
+; CHECK-NOT: __aeabi_read_tp
+; CHECK-NOT: _tls_get_addr
+
+; PIC-NOT: _aeabi_read_tp
+; PIC-NOT: _tls_get_addr
+; PIC: __emutls_get_addr
+; PIC-NOT: _aeabi_read_tp
+; PIC-NOT: _tls_get_addr
+
+@i = thread_local global i32 15 ; <i32*> [#uses=2]
+
+define i32 @f() {
+entry:
+ %tmp1 = load i32, i32* @i ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32* @g() {
+entry:
+ ret i32* @i
+}
diff --git a/test/CodeGen/ARM/emutls_generic.ll b/test/CodeGen/ARM/emutls_generic.ll
new file mode 100644
index 000000000000..0fada88fb5d9
--- /dev/null
+++ b/test/CodeGen/ARM/emutls_generic.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -emulated-tls -mtriple=arm-linux-android -relocation-model=pic \
+; RUN: | FileCheck -check-prefix=ARM_32 %s
+; RUN: llc < %s -emulated-tls -mtriple=arm-linux-androidabi -relocation-model=pic \
+; RUN: | FileCheck -check-prefix=ARM_32 %s
+; RUN: llc < %s -emulated-tls -mtriple=arm-linux-androidabi -relocation-model=pic -O3 \
+; RUN: | FileCheck -check-prefix=ARM_32 %s
+; RUN: llc < %s -emulated-tls -mtriple=arm-linux-androidabi -O3 \
+; RUN: | FileCheck -check-prefix=ARM_32 %s
+
+; Make sure that TLS symbols are emitted in expected order.
+
+@external_x = external thread_local global i32, align 8
+@external_y = thread_local global i8 7, align 2
+@internal_y = internal thread_local global i64 9, align 16
+
+define i32* @get_external_x() {
+entry:
+ ret i32* @external_x
+}
+
+define i8* @get_external_y() {
+entry:
+ ret i8* @external_y
+}
+
+define i64* @get_internal_y() {
+entry:
+ ret i64* @internal_y
+}
+
+; ARM_32-LABEL: get_external_x:
+; ARM_32: bl __emutls_get_address
+; ARM_32: .long __emutls_v.external_x
+; ARM_32-LABEL: get_external_y:
+; ARM_32: bl __emutls_get_address
+; ARM_32: .long __emutls_v.external_y
+; ARM_32-LABEL: get_internal_y:
+; ARM_32: bl __emutls_get_address
+; ARM_32: .long __emutls_v.internal_y
+; ARM_32-NOT: __emutls_t.external_x
+; ARM_32-NOT: __emutls_v.external_x:
+; ARM_32: .data
+; ARM_32: .align 2
+; ARM_32-LABEL: __emutls_v.external_y:
+; ARM_32-NEXT: .long 1
+; ARM_32-NEXT: .long 2
+; ARM_32-NEXT: .long 0
+; ARM_32-NEXT: .long __emutls_t.external_y
+; ARM_32: .section .rodata,
+; ARM_32-LABEL: __emutls_t.external_y:
+; ARM_32-NEXT: .byte 7
+; ARM_32: .data
+; ARM_32: .align 2
+; ARM_32-LABEL: __emutls_v.internal_y:
+; ARM_32-NEXT: .long 8
+; ARM_32-NEXT: .long 16
+; ARM_32-NEXT: .long 0
+; ARM_32-NEXT: .long __emutls_t.internal_y
+; ARM_32-LABEL: __emutls_t.internal_y:
+; ARM_32-NEXT: .long 9
+; ARM_32-NEXT: .long 0
diff --git a/test/CodeGen/ARM/fast-isel-align.ll b/test/CodeGen/ARM/fast-isel-align.ll
index 39085db95316..701884e926a8 100644
--- a/test/CodeGen/ARM/fast-isel-align.ll
+++ b/test/CodeGen/ARM/fast-isel-align.ll
@@ -1,22 +1,22 @@
; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-
-; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
-; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+
+; RUN: llc < %s -O0 -mattr=+strict-align -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -mattr=+strict-align -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -mattr=+strict-align -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -mattr=+strict-align -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
; Check unaligned stores
%struct.anon = type <{ float }>
diff --git a/test/CodeGen/ARM/fast-isel-ext.ll b/test/CodeGen/ARM/fast-isel-ext.ll
index b792f7a90738..440aa426067c 100644
--- a/test/CodeGen/ARM/fast-isel-ext.ll
+++ b/test/CodeGen/ARM/fast-isel-ext.ll
@@ -1,9 +1,5 @@
; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=v7
; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=v7
-; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv4t-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv4t-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv5-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv5-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=prev6
; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=v7
; Can't test pre-ARMv6 Thumb because ARM FastISel currently only supports
@@ -19,8 +15,6 @@
define i8 @zext_1_8(i1 %a) nounwind ssp {
; v7-LABEL: zext_1_8:
; v7: and r0, r0, #1
-; prev6-LABEL: zext_1_8:
-; prev6: and r0, r0, #1
%r = zext i1 %a to i8
ret i8 %r
}
@@ -28,8 +22,6 @@ define i8 @zext_1_8(i1 %a) nounwind ssp {
define i16 @zext_1_16(i1 %a) nounwind ssp {
; v7-LABEL: zext_1_16:
; v7: and r0, r0, #1
-; prev6-LABEL: zext_1_16:
-; prev6: and r0, r0, #1
%r = zext i1 %a to i16
ret i16 %r
}
@@ -37,8 +29,6 @@ define i16 @zext_1_16(i1 %a) nounwind ssp {
define i32 @zext_1_32(i1 %a) nounwind ssp {
; v7-LABEL: zext_1_32:
; v7: and r0, r0, #1
-; prev6-LABEL: zext_1_32:
-; prev6: and r0, r0, #1
%r = zext i1 %a to i32
ret i32 %r
}
@@ -46,8 +36,6 @@ define i32 @zext_1_32(i1 %a) nounwind ssp {
define i16 @zext_8_16(i8 %a) nounwind ssp {
; v7-LABEL: zext_8_16:
; v7: and r0, r0, #255
-; prev6-LABEL: zext_8_16:
-; prev6: and r0, r0, #255
%r = zext i8 %a to i16
ret i16 %r
}
@@ -55,8 +43,6 @@ define i16 @zext_8_16(i8 %a) nounwind ssp {
define i32 @zext_8_32(i8 %a) nounwind ssp {
; v7-LABEL: zext_8_32:
; v7: and r0, r0, #255
-; prev6-LABEL: zext_8_32:
-; prev6: and r0, r0, #255
%r = zext i8 %a to i32
ret i32 %r
}
@@ -64,9 +50,6 @@ define i32 @zext_8_32(i8 %a) nounwind ssp {
define i32 @zext_16_32(i16 %a) nounwind ssp {
; v7-LABEL: zext_16_32:
; v7: uxth r0, r0
-; prev6-LABEL: zext_16_32:
-; prev6: lsl{{s?}} r0, r0, #16
-; prev6: lsr{{s?}} r0, r0, #16
%r = zext i16 %a to i32
ret i32 %r
}
@@ -77,9 +60,6 @@ define i8 @sext_1_8(i1 %a) nounwind ssp {
; v7-LABEL: sext_1_8:
; v7: lsl{{s?}} r0, r0, #31
; v7: asr{{s?}} r0, r0, #31
-; prev6-LABEL: sext_1_8:
-; prev6: lsl{{s?}} r0, r0, #31
-; prev6: asr{{s?}} r0, r0, #31
%r = sext i1 %a to i8
ret i8 %r
}
@@ -88,9 +68,6 @@ define i16 @sext_1_16(i1 %a) nounwind ssp {
; v7-LABEL: sext_1_16:
; v7: lsl{{s?}} r0, r0, #31
; v7: asr{{s?}} r0, r0, #31
-; prev6-LABEL: sext_1_16:
-; prev6: lsl{{s?}} r0, r0, #31
-; prev6: asr{{s?}} r0, r0, #31
%r = sext i1 %a to i16
ret i16 %r
}
@@ -99,9 +76,6 @@ define i32 @sext_1_32(i1 %a) nounwind ssp {
; v7-LABEL: sext_1_32:
; v7: lsl{{s?}} r0, r0, #31
; v7: asr{{s?}} r0, r0, #31
-; prev6-LABEL: sext_1_32:
-; prev6: lsl{{s?}} r0, r0, #31
-; prev6: asr{{s?}} r0, r0, #31
%r = sext i1 %a to i32
ret i32 %r
}
@@ -109,9 +83,6 @@ define i32 @sext_1_32(i1 %a) nounwind ssp {
define i16 @sext_8_16(i8 %a) nounwind ssp {
; v7-LABEL: sext_8_16:
; v7: sxtb r0, r0
-; prev6-LABEL: sext_8_16:
-; prev6: lsl{{s?}} r0, r0, #24
-; prev6: asr{{s?}} r0, r0, #24
%r = sext i8 %a to i16
ret i16 %r
}
@@ -119,9 +90,6 @@ define i16 @sext_8_16(i8 %a) nounwind ssp {
define i32 @sext_8_32(i8 %a) nounwind ssp {
; v7-LABEL: sext_8_32:
; v7: sxtb r0, r0
-; prev6-LABEL: sext_8_32:
-; prev6: lsl{{s?}} r0, r0, #24
-; prev6: asr{{s?}} r0, r0, #24
%r = sext i8 %a to i32
ret i32 %r
}
@@ -129,9 +97,6 @@ define i32 @sext_8_32(i8 %a) nounwind ssp {
define i32 @sext_16_32(i16 %a) nounwind ssp {
; v7-LABEL: sext_16_32:
; v7: sxth r0, r0
-; prev6-LABEL: sext_16_32:
-; prev6: lsl{{s?}} r0, r0, #16
-; prev6: asr{{s?}} r0, r0, #16
%r = sext i16 %a to i32
ret i32 %r
}
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
index 89b7c05158cd..34bb7225854a 100644
--- a/test/CodeGen/ARM/fast-isel-mvn.ll
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -1,8 +1,8 @@
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=armv7-apple-ios < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=armv7-linux-gnueabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=thumbv7-apple-ios < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=true -mtriple=thumbv7-apple-ios < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=true -mtriple=armv7-apple-ios < %s | FileCheck %s --check-prefix=MOVT
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mattr=+no-movt -mtriple=armv7-apple-ios < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mattr=+no-movt -mtriple=armv7-linux-gnueabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mattr=+no-movt -mtriple=thumbv7-apple-ios < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios < %s | FileCheck %s --check-prefix=MOVT
; rdar://10412592
define void @t1() nounwind {
diff --git a/test/CodeGen/ARM/fast-isel-pic.ll b/test/CodeGen/ARM/fast-isel-pic.ll
index 70e15daaca62..4cee5a7eba90 100644
--- a/test/CodeGen/ARM/fast-isel-pic.ll
+++ b/test/CodeGen/ARM/fast-isel-pic.ll
@@ -1,8 +1,7 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -arm-force-fast-isel -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -arm-force-fast-isel -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
@g = global i32 0, align 4
@@ -14,8 +13,8 @@ entry:
; THUMB: add [[reg0]], pc
; THUMB-ELF: LoadGV
; THUMB-ELF: ldr r[[reg0:[0-9]+]],
-; THUMB-ELF: ldr r[[reg1:[0-9]+]],
-; THUMB-ELF: ldr r[[reg0]], [r[[reg0]], r[[reg1]]]
+; THUMB-ELF: add r[[reg0]], pc
+; THUMB-ELF: ldr r[[reg0]], [r[[reg0]]]
; ARM: LoadGV
; ARM: ldr [[reg1:r[0-9]+]],
; ARM: add [[reg1]], pc, [[reg1]]
@@ -26,9 +25,8 @@ entry:
; ARMv7-ELF: LoadGV
; ARMv7-ELF: ldr r[[reg2:[0-9]+]],
; ARMv7-ELF: .LPC
-; ARMv7-ELF-NEXT: add r[[reg2]], pc
-; ARMv7-ELF: ldr r[[reg3:[0-9]+]],
-; ARMv7-ELF: ldr r[[reg2]], [r[[reg3]], r[[reg2]]]
+; ARMv7-ELF-NEXT: ldr r[[reg2]], [pc, r[[reg2]]]
+; ARMv7-ELF: ldr r[[reg2]], [r[[reg2]]]
%tmp = load i32, i32* @g
ret i32 %tmp
}
@@ -44,8 +42,8 @@ entry:
; THUMB: ldr r[[reg3]], [r[[reg3]]]
; THUMB-ELF: LoadIndirectSymbol
; THUMB-ELF: ldr r[[reg3:[0-9]+]],
-; THUMB-ELF: ldr r[[reg4:[0-9]+]],
-; THUMB-ELF: ldr r[[reg3]], [r[[reg3]], r[[reg4]]]
+; THUMB-ELF: ldr r[[reg4:[0-9]+]], [r[[reg3]]]
+; THUMB-ELF: ldr r0, [r[[reg4]]]
; ARM: LoadIndirectSymbol
; ARM: ldr [[reg4:r[0-9]+]],
; ARM: ldr [[reg4]], [pc, [[reg4]]]
@@ -57,9 +55,8 @@ entry:
; ARMv7-ELF: LoadIndirectSymbol
; ARMv7-ELF: ldr r[[reg5:[0-9]+]],
; ARMv7-ELF: .LPC
-; ARMv7-ELF-NEXT: add r[[reg5]], pc
-; ARMv7-ELF: ldr r[[reg6:[0-9]+]],
-; ARMv7-ELF: ldr r[[reg5]], [r[[reg6]], r[[reg5]]]
+; ARMv7-ELF: ldr r[[reg6:[0-9]+]], [pc, r[[reg5]]]
+; ARMv7-ELF: ldr r0, [r[[reg5]]]
%tmp = load i32, i32* @i
ret i32 %tmp
}
diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll
index 1fd9bd9e47a3..8944a40f311f 100644
--- a/test/CodeGen/ARM/fold-stack-adjust.ll
+++ b/test/CodeGen/ARM/fold-stack-adjust.ll
@@ -1,4 +1,6 @@
-; RUN: llc -mtriple=thumbv7-apple-none-macho < %s | FileCheck %s
+; Disable shrink-wrapping on the first test otherwise we wouldn't
+; exerce the path for PR18136.
+; RUN: llc -mtriple=thumbv7-apple-none-macho < %s -enable-shrink-wrap=false | FileCheck %s
; RUN: llc -mtriple=thumbv6m-apple-none-macho -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-T1
; RUN: llc -mtriple=thumbv7-apple-darwin-ios -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-IOS
; RUN: llc -mtriple=thumbv7--linux-gnueabi -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-LINUX
@@ -60,20 +62,19 @@ define void @check_vfp_fold() minsize {
; CHECK: vpush {d6, d7, d8, d9}
; CHECK-NOT: sub sp,
; ...
-; CHECK: vldmia r[[GLOBREG]], {d8, d9}
-; ...
; CHECK-NOT: add sp,
; CHECK: vpop {d6, d7, d8, d9}
-; CHECKL pop {r[[GLOBREG]], pc}
+; CHECK: pop {r[[GLOBREG]], pc}
; iOS uses aligned NEON stores here, which is convenient since we
; want to make sure that works too.
; CHECK-IOS-LABEL: check_vfp_fold:
-; CHECK-IOS: push {r0, r1, r2, r3, r4, r7, lr}
+; CHECK-IOS: push {r4, r7, lr}
; CHECK-IOS: sub.w r4, sp, #16
; CHECK-IOS: bfc r4, #0, #4
; CHECK-IOS: mov sp, r4
; CHECK-IOS: vst1.64 {d8, d9}, [r4:128]
+; CHECK-IOS: sub sp, #16
; ...
; CHECK-IOS: add r4, sp, #16
; CHECK-IOS: vld1.64 {d8, d9}, [r4:128]
@@ -82,9 +83,8 @@ define void @check_vfp_fold() minsize {
%var = alloca i8, i32 16
- %tmp = load %bigVec, %bigVec* @var
+ call void asm "", "r,~{d8},~{d9}"(i8* %var)
call void @bar(i8* %var)
- store %bigVec %tmp, %bigVec* @var
ret void
}
@@ -170,9 +170,9 @@ define void @test_varsize(...) minsize {
; CHECK-T1: push {r5, r6, r7, lr}
; ...
; CHECK-T1: pop {r2, r3, r7}
-; CHECK-T1: pop {r3}
+; CHECK-T1: pop {[[POP_REG:r[0-3]]]}
; CHECK-T1: add sp, #16
-; CHECK-T1: bx r3
+; CHECK-T1: bx [[POP_REG]]
; CHECK-LABEL: test_varsize:
; CHECK: sub sp, #16
diff --git a/test/CodeGen/ARM/fp16-args.ll b/test/CodeGen/ARM/fp16-args.ll
new file mode 100644
index 000000000000..31a20f85483b
--- /dev/null
+++ b/test/CodeGen/ARM/fp16-args.ll
@@ -0,0 +1,40 @@
+; RUN: llc -float-abi soft -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
+; RUN: llc -float-abi hard -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7a--none-eabi"
+
+define float @foo(float %a.coerce, float %b.coerce) {
+entry:
+ %0 = bitcast float %a.coerce to i32
+ %tmp.0.extract.trunc = trunc i32 %0 to i16
+ %1 = bitcast i16 %tmp.0.extract.trunc to half
+ %2 = bitcast float %b.coerce to i32
+ %tmp1.0.extract.trunc = trunc i32 %2 to i16
+ %3 = bitcast i16 %tmp1.0.extract.trunc to half
+ %4 = fadd half %1, %3
+ %5 = bitcast half %4 to i16
+ %tmp5.0.insert.ext = zext i16 %5 to i32
+ %6 = bitcast i32 %tmp5.0.insert.ext to float
+ ret float %6
+; CHECK: foo:
+
+; SOFT: vmov {{s[0-9]+}}, r1
+; SOFT: vmov {{s[0-9]+}}, r0
+; SOFT: vcvtb.f32.f16 {{s[0-9]+}}, {{s[0-9]+}}
+; SOFT: vcvtb.f32.f16 {{s[0-9]+}}, {{s[0-9]+}}
+; SOFT: vadd.f32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; SOFT: vcvtb.f16.f32 {{s[0-9]+}}, {{s[0-9]+}}
+; SOFT: vmov r0, {{s[0-9]+}}
+
+; HARD-NOT: vmov
+; HARD-NOT: uxth
+; HARD: vcvtb.f32.f16 {{s[0-9]+}}, s1
+; HARD: vcvtb.f32.f16 {{s[0-9]+}}, s0
+; HARD: vadd.f32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; HARD: vcvtb.f16.f32 s0, {{s[0-9]+}}
+; HARD-NOT: vmov
+; HARD-NOT: uxth
+
+; CHECK: bx lr
+}
diff --git a/test/CodeGen/ARM/fp16-promote.ll b/test/CodeGen/ARM/fp16-promote.ll
index e691c2bb8a97..2a2eb8d2b6ba 100644
--- a/test/CodeGen/ARM/fp16-promote.ll
+++ b/test/CodeGen/ARM/fp16-promote.ll
@@ -1,19 +1,19 @@
-; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 -check-prefix=CHECK-ALL
-; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL -check-prefix=CHECK-ALL
+; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL
+; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL --check-prefix=CHECK-LIBCALL-VFP
+; RUN: llc -asm-verbose=false < %s -mattr=-vfp2 | FileCheck %s --check-prefix=CHECK-LIBCALL -check-prefix=CHECK-NOVFP -check-prefix=CHECK-ALL
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
-target triple = "armv7-eabihf"
+target triple = "armv7---eabihf"
-; CHECK-FP16-LABEL: test_fadd:
+; CHECK-ALL-LABEL: test_fadd:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vadd.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vadd.f32
+; CHECK-NOVFP: bl __aeabi_fadd
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fadd:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vadd.f32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fadd(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -22,16 +22,15 @@ define void @test_fadd(half* %p, half* %q) #0 {
ret void
}
-; CHECK-FP16-LABEL: test_fsub:
+; CHECK-ALL-LABEL: test_fsub:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vsub.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vsub.f32
+; CHECK-NOVFP: bl __aeabi_fsub
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fsub:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vsub.f32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fsub(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -40,16 +39,15 @@ define void @test_fsub(half* %p, half* %q) #0 {
ret void
}
-; CHECK-FP16-LABEL: test_fmul:
+; CHECK-ALL-LABEL: test_fmul:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vmul.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vmul.f32
+; CHECK-NOVFP: bl __aeabi_fmul
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fmul
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vmul.f32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fmul(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -58,16 +56,15 @@ define void @test_fmul(half* %p, half* %q) #0 {
ret void
}
-; CHECK-FP16-LABEL: test_fdiv:
+; CHECK-ALL-LABEL: test_fdiv:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vdiv.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vdiv.f32
+; CHECK-NOVFP: bl __aeabi_fdiv
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fdiv
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vdiv.f32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fdiv(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -76,16 +73,14 @@ define void @test_fdiv(half* %p, half* %q) #0 {
ret void
}
-; CHECK-FP16-LABEL: test_frem:
+; CHECK-ALL-LABEL: test_frem:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: bl fmodf
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_frem
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl fmodf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_frem(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -96,9 +91,8 @@ define void @test_frem(half* %p, half* %q) #0 {
; CHECK-ALL-LABEL: test_load_store:
; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: ldrh r0, [r0]
-; CHECK-ALL-NEXT: strh r0, [r1]
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}]
+; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}]
define void @test_load_store(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
store half %a, half* %q
@@ -112,6 +106,7 @@ declare half @test_callee(half %a, half %b) #0
; CHECK-ALL-LABEL: test_call:
; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: .save {r11, lr}
; CHECK-ALL-NEXT: push {r11, lr}
; CHECK-ALL-NEXT: bl test_callee
; CHECK-ALL-NEXT: pop {r11, pc}
@@ -122,10 +117,14 @@ define half @test_call(half %a, half %b) #0 {
; CHECK-ALL-LABEL: test_call_flipped:
; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: .save {r11, lr}
; CHECK-ALL-NEXT: push {r11, lr}
-; CHECK-ALL-NEXT: mov r2, r0
-; CHECK-ALL-NEXT: mov r0, r1
-; CHECK-ALL-NEXT: mov r1, r2
+; CHECK-VFP-NEXT: vmov.f32 s2, s0
+; CHECK-VFP-NEXT: vmov.f32 s0, s1
+; CHECK-VFP-NEXT: vmov.f32 s1, s2
+; CHECK-NOVFP-NEXT: mov r2, r0
+; CHECK-NOVFP-NEXT: mov r0, r1
+; CHECK-NOVFP-NEXT: mov r1, r2
; CHECK-ALL-NEXT: bl test_callee
; CHECK-ALL-NEXT: pop {r11, pc}
define half @test_call_flipped(half %a, half %b) #0 {
@@ -135,9 +134,12 @@ define half @test_call_flipped(half %a, half %b) #0 {
; CHECK-ALL-LABEL: test_tailcall_flipped:
; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: mov r2, r0
-; CHECK-ALL-NEXT: mov r0, r1
-; CHECK-ALL-NEXT: mov r1, r2
+; CHECK-VFP-NEXT: vmov.f32 s2, s0
+; CHECK-VFP-NEXT: vmov.f32 s0, s1
+; CHECK-VFP-NEXT: vmov.f32 s1, s2
+; CHECK-NOVFP-NEXT: mov r2, r0
+; CHECK-NOVFP-NEXT: mov r0, r1
+; CHECK-NOVFP-NEXT: mov r1, r2
; CHECK-ALL-NEXT: b test_callee
define half @test_tailcall_flipped(half %a, half %b) #0 {
%r = tail call half @test_callee(half %b, half %a)
@@ -147,12 +149,10 @@ define half @test_tailcall_flipped(half %a, half %b) #0 {
; Optimizer picks %p or %q based on %c and only loads that value
; No conversion is needed
; CHECK-ALL-LABEL: test_select:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: cmp r2, #0
-; CHECK-ALL-NEXT: movne r1, r0
-; CHECK-ALL-NEXT: ldrh r1, [r1]
-; CHECK-ALL-NEXT: strh r1, [r0]
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: cmp {{r[0-9]+}}, #0
+; CHECK-ALL: movne {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}]
+; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}]
define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -163,17 +163,15 @@ define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 {
; Test only two variants of fcmp. These get translated to f32 vcmpe
; instructions anyway.
-; CHECK-FP16-LABEL: test_fcmp_une:
+; CHECK-ALL-LABEL: test_fcmp_une:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcmpe.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vcmpe.f32
+; CHECK-NOVFP: bl __aeabi_fcmpeq
; CHECK-FP16: vmrs APSR_nzcv, fpscr
-; CHECK-FP16: movwne
-; CHECK-LIBCALL-LABEL: test_fcmp_une:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vcmpe.f32
-; CHECK-LIBCALL: movwne
+; CHECK-ALL: movw{{ne|eq}}
define i1 @test_fcmp_une(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -181,18 +179,15 @@ define i1 @test_fcmp_une(half* %p, half* %q) #0 {
ret i1 %r
}
-; CHECK-FP16-LABEL: test_fcmp_ueq:
+; CHECK-ALL-LABEL: test_fcmp_ueq:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcmpe.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vcmpe.f32
+; CHECK-NOVFP: bl __aeabi_fcmpeq
; CHECK-FP16: vmrs APSR_nzcv, fpscr
-; CHECK-FP16: movweq
-; CHECK-FP16: movwvs
-; CHECK-LIBCALL-LABEL: test_fcmp_ueq:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vcmpe.f32
-; CHECK-LIBCALL: movweq
+; CHECK-LIBCALL: movw{{ne|eq}}
define i1 @test_fcmp_ueq(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -200,19 +195,18 @@ define i1 @test_fcmp_ueq(half* %p, half* %q) #0 {
ret i1 %r
}
-; CHECK-FP16-LABEL: test_br_cc:
+; CHECK-ALL-LABEL: test_br_cc:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcmpe.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vcmpe.f32
+; CHECK-NOVFP: bl __aeabi_fcmplt
; CHECK-FP16: vmrs APSR_nzcv, fpscr
-; CHECK-FP16: strmi
-; CHECK-FP16: strpl
-; CHECK-LIBCALL-LABEL: test_br_cc:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vcmpe.f32
-; CHECK-LIBCALL: strmi
-; CHECK-LIBCALL: strpl
+; CHECK-VFP: strmi
+; CHECK-VFP: strpl
+; CHECK-NOVFP: strne
+; CHECK-NOVFP: streq
define void @test_br_cc(half* %p, half* %q, i32* %p1, i32* %p2) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -227,20 +221,19 @@ else:
}
declare i1 @test_dummy(half* %p) #0
-; CHECK-FP16-LABEL: test_phi:
+; CHECK-ALL-LABEL: test_phi:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: [[LOOP:.LBB[1-9_]+]]:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: bl test_dummy
; CHECK-FP16: bne [[LOOP]]
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_phi:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL-VFP: bl __aeabi_h2f
; CHECK-LIBCALL: [[LOOP:.LBB[1-9_]+]]:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL-VFP: bl __aeabi_h2f
; CHECK-LIBCALL: bl test_dummy
; CHECK-LIBCALL: bne [[LOOP]]
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL-VFP: bl __aeabi_f2h
define void @test_phi(half* %p) #0 {
entry:
%a = load half, half* %p
@@ -255,96 +248,84 @@ return:
ret void
}
-; CHECK-FP16-LABEL: test_fptosi_i32:
+; CHECK-ALL-LABEL: test_fptosi_i32:
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvt.s32.f32
-; CHECK-LIBCALL-LABEL: test_fptosi_i32:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vcvt.s32.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vcvt.s32.f32
+; CHECK-NOVFP: bl __aeabi_f2iz
define i32 @test_fptosi_i32(half* %p) #0 {
%a = load half, half* %p, align 2
%r = fptosi half %a to i32
ret i32 %r
}
-; CHECK-FP16-LABEL: test_fptosi_i64:
+; CHECK-ALL-LABEL: test_fptosi_i64:
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: bl __aeabi_f2lz
-; CHECK-LIBCALL-LABEL: test_fptosi_i64:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __aeabi_f2lz
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-ALL: bl __aeabi_f2lz
define i64 @test_fptosi_i64(half* %p) #0 {
%a = load half, half* %p, align 2
%r = fptosi half %a to i64
ret i64 %r
}
-; CHECK-FP16-LABEL: test_fptoui_i32:
+; CHECK-ALL-LABEL: test_fptoui_i32:
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvt.u32.f32
-; CHECK-LIBCALL-LABEL: test_fptoui_i32:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vcvt.u32.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vcvt.u32.f32
+; CHECK-NOVFP: bl __aeabi_f2uiz
define i32 @test_fptoui_i32(half* %p) #0 {
%a = load half, half* %p, align 2
%r = fptoui half %a to i32
ret i32 %r
}
-; CHECK-FP16-LABEL: test_fptoui_i64:
+; CHECK-ALL-LABEL: test_fptoui_i64:
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: bl __aeabi_f2ulz
-; CHECK-LIBCALL-LABEL: test_fptoui_i64:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __aeabi_f2ulz
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-ALL: bl __aeabi_f2ulz
define i64 @test_fptoui_i64(half* %p) #0 {
%a = load half, half* %p, align 2
%r = fptoui half %a to i64
ret i64 %r
}
-; CHECK-FP16-LABEL: test_sitofp_i32:
-; CHECK-FP16: vcvt.f32.s32
+; CHECK-ALL-LABEL: test_sitofp_i32:
+; CHECK-VFP: vcvt.f32.s32
+; CHECK-NOVFP: bl __aeabi_i2f
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_sitofp_i32:
-; CHECK-LIBCALL: vcvt.f32.s32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_sitofp_i32(i32 %a, half* %p) #0 {
%r = sitofp i32 %a to half
store half %r, half* %p
ret void
}
-; CHECK-FP16-LABEL: test_uitofp_i32:
-; CHECK-FP16: vcvt.f32.u32
+; CHECK-ALL-LABEL: test_uitofp_i32:
+; CHECK-VFP: vcvt.f32.u32
+; CHECK-NOVFP: bl __aeabi_ui2f
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_uitofp_i32:
-; CHECK-LIBCALL: vcvt.f32.u32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_uitofp_i32(i32 %a, half* %p) #0 {
%r = uitofp i32 %a to half
store half %r, half* %p
ret void
}
-; CHECK-FP16-LABEL: test_sitofp_i64:
-; CHECK-FP16: bl __aeabi_l2f
+; CHECK-ALL-LABEL: test_sitofp_i64:
+; CHECK-ALL: bl __aeabi_l2f
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_sitofp_i64:
-; CHECK-LIBCALL: bl __aeabi_l2f
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_sitofp_i64(i64 %a, half* %p) #0 {
%r = sitofp i64 %a to half
store half %r, half* %p
ret void
}
-; CHECK-FP16-LABEL: test_uitofp_i64:
-; CHECK-FP16: bl __aeabi_ul2f
+; CHECK-ALL-LABEL: test_uitofp_i64:
+; CHECK-ALL: bl __aeabi_ul2f
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_uitofp_i64:
-; CHECK-LIBCALL: bl __aeabi_ul2f
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_uitofp_i64(i64 %a, half* %p) #0 {
%r = uitofp i64 %a to half
store half %r, half* %p
@@ -354,7 +335,7 @@ define void @test_uitofp_i64(i64 %a, half* %p) #0 {
; CHECK-FP16-LABEL: test_fptrunc_float:
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_fptrunc_float:
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fptrunc_float(float %f, half* %p) #0 {
%a = fptrunc float %f to half
store half %a, half* %p
@@ -374,7 +355,7 @@ define void @test_fptrunc_double(double %d, half* %p) #0 {
; CHECK-FP16-LABEL: test_fpextend_float:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-LIBCALL-LABEL: test_fpextend_float:
-; CHECK-LIBCALL: b __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
define float @test_fpextend_float(half* %p) {
%a = load half, half* %p, align 2
%r = fpext half %a to float
@@ -383,10 +364,10 @@ define float @test_fpextend_float(half* %p) {
; CHECK-FP16-LABEL: test_fpextend_double:
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvt.f64.f32
; CHECK-LIBCALL-LABEL: test_fpextend_double:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vcvt.f64.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vcvt.f64.f32
+; CHECK-NOVFP: bl __aeabi_f2d
define double @test_fpextend_double(half* %p) {
%a = load half, half* %p, align 2
%r = fpext half %a to double
@@ -436,14 +417,14 @@ declare half @llvm.nearbyint.f16(half %a) #0
declare half @llvm.round.f16(half %a) #0
declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
-; CHECK-FP16-LABEL: test_sqrt:
+; CHECK-ALL-LABEL: test_sqrt:
; CHECK-FP16: vcvtb.f32.f16
; CHECK-FP16: vsqrt.f32
; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_sqrt:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vsqrt.f32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP-LIBCALL: vsqrt.f32
+; CHECK-NOVFP: bl sqrtf
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_sqrt(half* %p) #0 {
%a = load half, half* %p, align 2
%r = call half @llvm.sqrt.f16(half %a)
@@ -456,9 +437,9 @@ define void @test_sqrt(half* %p) #0 {
; CHECK-FP16: bl __powisf2
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_fpowi:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __powisf2
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fpowi(half* %p, i32 %b) #0 {
%a = load half, half* %p, align 2
%r = call half @llvm.powi.f16(half %a, i32 %b)
@@ -471,9 +452,9 @@ define void @test_fpowi(half* %p, i32 %b) #0 {
; CHECK-FP16: bl sinf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_sin:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl sinf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_sin(half* %p) #0 {
%a = load half, half* %p, align 2
%r = call half @llvm.sin.f16(half %a)
@@ -486,9 +467,9 @@ define void @test_sin(half* %p) #0 {
; CHECK-FP16: bl cosf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_cos:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl cosf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_cos(half* %p) #0 {
%a = load half, half* %p, align 2
%r = call half @llvm.cos.f16(half %a)
@@ -502,10 +483,10 @@ define void @test_cos(half* %p) #0 {
; CHECK-FP16: bl powf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_pow:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl powf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_pow(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -519,9 +500,9 @@ define void @test_pow(half* %p, half* %q) #0 {
; CHECK-FP16: bl expf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_exp:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl expf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_exp(half* %p) #0 {
%a = load half, half* %p, align 2
%r = call half @llvm.exp.f16(half %a)
@@ -534,9 +515,9 @@ define void @test_exp(half* %p) #0 {
; CHECK-FP16: bl exp2f
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_exp2:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl exp2f
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_exp2(half* %p) #0 {
%a = load half, half* %p, align 2
%r = call half @llvm.exp2.f16(half %a)
@@ -549,9 +530,9 @@ define void @test_exp2(half* %p) #0 {
; CHECK-FP16: bl logf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_log:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl logf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_log(half* %p) #0 {
%a = load half, half* %p, align 2
%r = call half @llvm.log.f16(half %a)
@@ -564,9 +545,9 @@ define void @test_log(half* %p) #0 {
; CHECK-FP16: bl log10f
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_log10:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl log10f
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_log10(half* %p) #0 {
%a = load half, half* %p, align 2
%r = call half @llvm.log10.f16(half %a)
@@ -579,9 +560,9 @@ define void @test_log10(half* %p) #0 {
; CHECK-FP16: bl log2f
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_log2:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl log2f
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_log2(half* %p) #0 {
%a = load half, half* %p, align 2
%r = call half @llvm.log2.f16(half %a)
@@ -596,11 +577,11 @@ define void @test_log2(half* %p) #0 {
; CHECK-FP16: bl fmaf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_fma:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl fmaf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fma(half* %p, half* %q, half* %r) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -615,9 +596,9 @@ define void @test_fma(half* %p, half* %q, half* %r) #0 {
; CHECK-FP16: vabs.f32
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_fabs:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bfc
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fabs(half* %p) {
%a = load half, half* %p, align 2
%r = call half @llvm.fabs.f16(half %a)
@@ -631,10 +612,10 @@ define void @test_fabs(half* %p) {
; CHECK-FP16: bl fminf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_minnum:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl fminf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_minnum(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -649,10 +630,10 @@ define void @test_minnum(half* %p, half* %q) #0 {
; CHECK-FP16: bl fmaxf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_maxnum:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl fmaxf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_maxnum(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -667,10 +648,13 @@ define void @test_maxnum(half* %p, half* %q) #0 {
; CHECK-FP16: vbsl
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_copysign:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vbsl
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP-LIBCALL: vbsl
+; CHECK-NOVFP: bfc
+; CHECK-NOVFP: and
+; CHECK-NOVFP: orr
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_copysign(half* %p, half* %q) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -684,9 +668,9 @@ define void @test_copysign(half* %p, half* %q) #0 {
; CHECK-FP16: bl floorf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_floor:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl floorf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_floor(half* %p) {
%a = load half, half* %p, align 2
%r = call half @llvm.floor.f16(half %a)
@@ -699,9 +683,9 @@ define void @test_floor(half* %p) {
; CHECK-FP16: bl ceilf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_ceil:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl ceilf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_ceil(half* %p) {
%a = load half, half* %p, align 2
%r = call half @llvm.ceil.f16(half %a)
@@ -714,9 +698,9 @@ define void @test_ceil(half* %p) {
; CHECK-FP16: bl truncf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_trunc:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl truncf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_trunc(half* %p) {
%a = load half, half* %p, align 2
%r = call half @llvm.trunc.f16(half %a)
@@ -729,9 +713,9 @@ define void @test_trunc(half* %p) {
; CHECK-FP16: bl rintf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_rint:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl rintf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_rint(half* %p) {
%a = load half, half* %p, align 2
%r = call half @llvm.rint.f16(half %a)
@@ -744,9 +728,9 @@ define void @test_rint(half* %p) {
; CHECK-FP16: bl nearbyintf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_nearbyint:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl nearbyintf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_nearbyint(half* %p) {
%a = load half, half* %p, align 2
%r = call half @llvm.nearbyint.f16(half %a)
@@ -759,9 +743,9 @@ define void @test_nearbyint(half* %p) {
; CHECK-FP16: bl roundf
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_round:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl roundf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_round(half* %p) {
%a = load half, half* %p, align 2
%r = call half @llvm.round.f16(half %a)
@@ -776,11 +760,12 @@ define void @test_round(half* %p) {
; CHECK-FP16: vmla.f32
; CHECK-FP16: vcvtb.f16.f32
; CHECK-LIBCALL-LABEL: test_fmuladd:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vmla.f32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP-LIBCALL: vmla.f32
+; CHECK-NOVFP: bl __aeabi_fmul
+; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
%a = load half, half* %p, align 2
%b = load half, half* %q, align 2
@@ -795,30 +780,28 @@ define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
; and extractelement have these extra loads and stores.
; CHECK-ALL-LABEL: test_insertelement:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: sub sp, sp, #8
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: mov
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: add
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: add sp, sp, #8
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: sub sp, sp, #8
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: mov
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: add
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: add sp, sp, #8
define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
%a = load half, half* %p, align 2
%b = load <4 x half>, <4 x half>* %q, align 8
@@ -828,22 +811,30 @@ define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
}
; CHECK-ALL-LABEL: test_extractelement:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: sub sp, sp, #8
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: orr
-; CHECK-ALL-NEXT: str
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: orr
-; CHECK-ALL-NEXT: str
-; CHECK-ALL-NEXT: mov
-; CHECK-ALL-NEXT: add
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: add sp, sp, #8
-; CHECK-ALL-NEXT: bx lr
+; CHECK-VFP: sub sp, sp, #8
+; CHECK-VFP: ldrh
+; CHECK-VFP: ldrh
+; CHECK-VFP: orr
+; CHECK-VFP: str
+; CHECK-VFP: ldrh
+; CHECK-VFP: ldrh
+; CHECK-VFP: orr
+; CHECK-VFP: str
+; CHECK-VFP: mov
+; CHECK-VFP: add
+; CHECK-VFP: ldrh
+; CHECK-VFP: strh
+; CHECK-VFP: add sp, sp, #8
+; CHECK-VFP: bx lr
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 {
%a = load <4 x half>, <4 x half>* %q, align 8
%b = extractelement <4 x half> %a, i32 %i
@@ -856,12 +847,10 @@ define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 {
%struct.dummy = type { i32, half }
; CHECK-ALL-LABEL: test_insertvalue:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: ldr
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: str
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL-DAG: ldr
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: str
define void @test_insertvalue(%struct.dummy* %p, half* %q) {
%a = load %struct.dummy, %struct.dummy* %p
%b = load half, half* %q
@@ -871,10 +860,9 @@ define void @test_insertvalue(%struct.dummy* %p, half* %q) {
}
; CHECK-ALL-LABEL: test_extractvalue:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: .fnstart
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
define void @test_extractvalue(%struct.dummy* %p, half* %q) {
%a = load %struct.dummy, %struct.dummy* %p
%b = extractvalue %struct.dummy %a, 1
@@ -882,10 +870,11 @@ define void @test_extractvalue(%struct.dummy* %p, half* %q) {
ret void
}
-; CHECK-FP16-LABEL: test_struct_return:
+; CHECK-ALL-LABEL: test_struct_return:
; CHECK-FP16: vcvtb.f32.f16
-; CHECK-LIBCALL-LABEL: test_struct_return:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-VFP-LIBCALL: bl __aeabi_h2f
+; CHECK-NOVFP-DAG: ldr
+; CHECK-NOVFP-DAG: ldrh
define %struct.dummy @test_struct_return(%struct.dummy* %p) {
%a = load %struct.dummy, %struct.dummy* %p
ret %struct.dummy %a
@@ -893,7 +882,7 @@ define %struct.dummy @test_struct_return(%struct.dummy* %p) {
; CHECK-ALL-LABEL: test_struct_arg:
; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: mov r0, r1
+; CHECK-NOVFP-NEXT: mov r0, r1
; CHECK-ALL-NEXT: bx lr
define half @test_struct_arg(%struct.dummy %p) {
%a = extractvalue %struct.dummy %p, 1
diff --git a/test/CodeGen/ARM/fp16.ll b/test/CodeGen/ARM/fp16.ll
index 25fbf9070cb6..73d5c36a9c2f 100644
--- a/test/CodeGen/ARM/fp16.ll
+++ b/test/CodeGen/ARM/fp16.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s | FileCheck %s
-; RUN: llc -mattr=+vfp3,+fp16 < %s | FileCheck --check-prefix=CHECK-FP16 %s
-; RUN: llc -mtriple=armv8-eabihf < %s | FileCheck --check-prefix=CHECK-ARMV8 %s
-; RUN: llc -mtriple=thumbv7m-eabi < %s | FileCheck --check-prefix=CHECK-SOFTFLOAT %s
+; RUN: llc -mtriple=armv7a--none-eabi < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-HARDFLOAT-EABI %s
+; RUN: llc -mtriple=armv7a--none-gnueabi < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-HARDFLOAT-GNU %s
+; RUN: llc -mattr=+vfp3,+fp16 < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FP16 %s
+; RUN: llc -mtriple=armv8-eabihf < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARMV8 %s
+; RUN: llc -mtriple=thumbv7m-eabi < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SOFTFLOAT-EABI %s
+; RUN: llc -mtriple=thumbv7m-gnueabi < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SOFTFLOAT-GNU %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
target triple = "armv7---eabihf"
@@ -12,41 +14,45 @@ target triple = "armv7---eabihf"
define void @foo() nounwind {
; CHECK-LABEL: foo:
-; CHECK-FP16-LABEL: foo:
-; CHECK-ARMV8-LABEL: foo:
-; CHECK-SOFTFLOAT-LABEL: foo:
entry:
%0 = load i16, i16* @x, align 2
%1 = load i16, i16* @y, align 2
%2 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
-; CHECK: __gnu_h2f_ieee
+; CHECK-HARDFLOAT-EABI: __aeabi_h2f
+; CHECK-HARDFLOAT-GNU: __gnu_h2f_ieee
; CHECK-FP16: vcvtb.f32.f16
; CHECK-ARMv8: vcvtb.f32.f16
-; CHECK-SOFTFLOAT: __gnu_h2f_ieee
+; CHECK-SOFTFLOAT-EABI: __aeabi_h2f
+; CHECK-SOFTFLOAT-GNU: __gnu_h2f_ieee
%3 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
-; CHECK: __gnu_h2f_ieee
+; CHECK-HARDFLOAT-EABI: __aeabi_h2f
+; CHECK-HARDFLOAT-GNU: __gnu_h2f_ieee
; CHECK-FP16: vcvtb.f32.f16
; CHECK-ARMV8: vcvtb.f32.f16
-; CHECK-SOFTFLOAT: __gnu_h2f_ieee
+; CHECK-SOFTFLOAT-EABI: __aeabi_h2f
+; CHECK-SOFTFLOAT-GNU: __gnu_h2f_ieee
%4 = fadd float %2, %3
%5 = tail call i16 @llvm.convert.to.fp16.f32(float %4)
-; CHECK: __gnu_f2h_ieee
+; CHECK-HARDFLOAT-EABI: __aeabi_f2h
+; CHECK-HARDFLOAT-GNU: __gnu_f2h_ieee
; CHECK-FP16: vcvtb.f16.f32
; CHECK-ARMV8: vcvtb.f16.f32
-; CHECK-SOFTFLOAT: __gnu_f2h_ieee
+; CHECK-SOFTFLOAT-EABI: __aeabi_f2h
+; CHECK-SOFTFLOAT-GNU: __gnu_f2h_ieee
store i16 %5, i16* @x, align 2
ret void
}
define double @test_from_fp16(i16 %in) {
; CHECK-LABEL: test_from_fp16:
-; CHECK-FP16-LABEL: test_from_fp16:
-; CHECK-ARMV8-LABEL: test_from_fp16:
-; CHECK-SOFTFLOAT-LABEL: test_from_fp16:
%val = call double @llvm.convert.from.fp16.f64(i16 %in)
-; CHECK: bl __gnu_h2f_ieee
-; CHECK: vmov [[TMP:s[0-9]+]], r0
-; CHECK: vcvt.f64.f32 d0, [[TMP]]
+; CHECK-HARDFLOAT-EABI: bl __aeabi_h2f
+; CHECK-HARDFLOAT-EABI: vmov [[TMP:s[0-9]+]], r0
+; CHECK-HARDFLOAT-EABI: vcvt.f64.f32 {{d[0-9]+}}, [[TMP]]
+
+; CHECK-HARDFLOAT-GNU: bl __gnu_h2f_ieee
+; CHECK-HARDFLOAT-GNU: vmov [[TMP:s[0-9]+]], r0
+; CHECK-HARDFLOAT-GNU: vcvt.f64.f32 {{d[0-9]+}}, [[TMP]]
; CHECK-FP16: vmov [[TMP16:s[0-9]+]], r0
; CHECK-FP16: vcvtb.f32.f16 [[TMP32:s[0-9]+]], [[TMP16]]
@@ -55,25 +61,29 @@ define double @test_from_fp16(i16 %in) {
; CHECK-ARMV8: vmov [[TMP:s[0-9]+]], r0
; CHECK-ARMV8: vcvtb.f64.f16 d0, [[TMP]]
-; CHECK-SOFTFLOAT: bl __gnu_h2f_ieee
-; CHECK-SOFTFLOAT: bl __aeabi_f2d
+; CHECK-SOFTFLOAT-EABI: bl __aeabi_h2f
+; CHECK-SOFTFLOAT-EABI: bl __aeabi_f2d
+
+; CHECK-SOFTFLOAT-GNU: bl __gnu_h2f_ieee
+; CHECK-SOFTFLOAT-GNU: bl __aeabi_f2d
ret double %val
}
define i16 @test_to_fp16(double %in) {
; CHECK-LABEL: test_to_fp16:
-; CHECK-FP16-LABEL: test_to_fp16:
-; CHECK-ARMV8-LABEL: test_to_fp16:
-; CHECK-SOFTFLOAT-LABEL: test_to_fp16:
%val = call i16 @llvm.convert.to.fp16.f64(double %in)
-; CHECK: bl __aeabi_d2h
+; CHECK-HARDFLOAT-EABI: bl __aeabi_d2h
+
+; CHECK-HARDFLOAT-GNU: bl __aeabi_d2h
; CHECK-FP16: bl __aeabi_d2h
; CHECK-ARMV8: vcvtb.f16.f64 [[TMP:s[0-9]+]], d0
; CHECK-ARMV8: vmov r0, [[TMP]]
-; CHECK-SOFTFLOAT: bl __aeabi_d2h
+; CHECK-SOFTFLOAT-EABI: bl __aeabi_d2h
+
+; CHECK-SOFTFLOAT-GNU: bl __aeabi_d2h
ret i16 %val
}
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index cc880148da85..824824429db1 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -56,7 +56,7 @@ entry:
ret float %tmp1
}
-define double @f8(double %a) {
+define arm_aapcs_vfpcc double @f8(double %a) {
;CHECK-LABEL: f8:
;CHECK: vneg.f64
entry:
@@ -90,7 +90,7 @@ entry:
declare float @fabsf(float)
-define double @f12(double %a) {
+define arm_aapcs_vfpcc double @f12(double %a) {
;CHECK-LABEL: f12:
;CHECK: vabs.f64
entry:
diff --git a/test/CodeGen/ARM/gep-optimization.ll b/test/CodeGen/ARM/gep-optimization.ll
new file mode 100644
index 000000000000..ce5af66d56ce
--- /dev/null
+++ b/test/CodeGen/ARM/gep-optimization.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=armv7a-eabi | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-AT2
+; RUN: llc < %s -mtriple=thumbv7m-eabi | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-AT2
+; RUN: llc < %s -mtriple=thumbv6m-eabi | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T1
+
+; This test checks that various kinds of getelementptr are all optimised to a
+; simple multiply plus add, with the add being done by a register offset if the
+; result is used in a load.
+
+; CHECK-LABEL: calc_1d:
+; CHECK: mov{{s?}} [[REG1:r[0-9]+]], #84
+; CHECK-AT2: mla r0, r1, [[REG1]], r0
+; CHECK-T1: muls [[REG2:r[0-9]+]], r1, [[REG1]]
+; CHECK-T1: adds r0, r0, [[REG2]]
+define i32* @calc_1d(i32* %p, i32 %n) {
+entry:
+ %mul = mul nsw i32 %n, 21
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %mul
+ ret i32* %add.ptr
+}
+
+; CHECK-LABEL: load_1d:
+; CHECK: mov{{s?}} [[REG1:r[0-9]+]], #84
+; CHECK: mul{{s?}} [[REG2:r[0-9]+]],{{( r1,)?}} [[REG1]]{{(, r1)?}}
+; CHECK: ldr r0, [r0, [[REG2]]]
+define i32 @load_1d(i32* %p, i32 %n) #1 {
+entry:
+ %mul = mul nsw i32 %n, 21
+ %arrayidx = getelementptr inbounds i32, i32* %p, i32 %mul
+ %0 = load i32, i32* %arrayidx, align 4
+ ret i32 %0
+}
+
+; CHECK-LABEL: calc_2d_a:
+; CHECK: mov{{s?}} [[REG1:r[0-9]+]], #84
+; CHECK-AT2: mla r0, r1, [[REG1]], r0
+; CHECK-T1: muls [[REG2:r[0-9]+]], r1, [[REG1]]
+; CHECK-T1: adds r0, r0, [[REG2]]
+define i32* @calc_2d_a([100 x i32]* %p, i32 %n) {
+entry:
+ %mul = mul nsw i32 %n, 21
+ %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %p, i32 0, i32 %mul
+ ret i32* %arrayidx1
+}
+
+; CHECK-LABEL: load_2d_a:
+; CHECK: mov{{s?}} [[REG1:r[0-9]+]], #84
+; CHECK: mul{{s?}} [[REG2:r[0-9]+]],{{( r1,)?}} [[REG1]]{{(, r1)?}}
+; CHECK: ldr r0, [r0, [[REG2]]]
+define i32 @load_2d_a([100 x i32]* %p, i32 %n) #1 {
+entry:
+ %mul = mul nsw i32 %n, 21
+ %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %p, i32 0, i32 %mul
+ %0 = load i32, i32* %arrayidx1, align 4
+ ret i32 %0
+}
+
+; CHECK-LABEL: calc_2d_b:
+; CHECK: mov{{s?}} [[REG1:r[0-9]+]], #84
+; CHECK-AT2: mla r0, r1, [[REG1]], r0
+; CHECK-T1: muls [[REG2:r[0-9]+]], r1, [[REG1]]
+; CHECK-T1: adds r0, r0, [[REG2]]
+define i32* @calc_2d_b([21 x i32]* %p, i32 %n) {
+entry:
+ %arrayidx1 = getelementptr inbounds [21 x i32], [21 x i32]* %p, i32 %n, i32 0
+ ret i32* %arrayidx1
+}
+
+; CHECK-LABEL: load_2d_b:
+; CHECK: mov{{s?}} [[REG1:r[0-9]+]], #84
+; CHECK: mul{{s?}} [[REG2:r[0-9]+]],{{( r1,)?}} [[REG1]]{{(, r1)?}}
+; CHECK: ldr r0, [r0, [[REG2]]]
+define i32 @load_2d_b([21 x i32]* %p, i32 %n) {
+entry:
+ %arrayidx1 = getelementptr inbounds [21 x i32], [21 x i32]* %p, i32 %n, i32 0
+ %0 = load i32, i32* %arrayidx1, align 4
+ ret i32 %0
+}
diff --git a/test/CodeGen/ARM/global-merge-1.ll b/test/CodeGen/ARM/global-merge-1.ll
index d4d9b0f9d1f3..a3cbe8aec098 100644
--- a/test/CodeGen/ARM/global-merge-1.ll
+++ b/test/CodeGen/ARM/global-merge-1.ll
@@ -11,16 +11,16 @@
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
-; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; MERGE: .zerofill __DATA,__bss,l__MergedGlobals,60,4
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; NO-MERGE-NOT: .zerofill __DATA,__bss,l__MergedGlobals,60,4
; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; NO-MERGE-NOT: .zerofill __DATA,__bss,l__MergedGlobals,60,4
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios3.0.0"
diff --git a/test/CodeGen/ARM/global-merge-external.ll b/test/CodeGen/ARM/global-merge-external.ll
new file mode 100644
index 000000000000..a9e0d199705a
--- /dev/null
+++ b/test/CodeGen/ARM/global-merge-external.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge | FileCheck %s --check-prefix=CHECK-MERGE
+; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-MERGE
+; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefix=CHECK-NO-MERGE
+; RUN: llc < %s -mtriple=arm-macho -arm-global-merge | FileCheck %s --check-prefix=CHECK-NO-MERGE
+
+@x = global i32 0, align 4
+@y = global i32 0, align 4
+@z = global i32 0, align 4
+
+define void @f1(i32 %a1, i32 %a2) {
+;CHECK: f1:
+;CHECK: ldr {{r[0-9]+}}, [[LABEL1:\.LCPI[0-9]+_[0-9]]]
+;CHECK: [[LABEL1]]:
+;CHECK-MERGE: .long .L_MergedGlobals
+;CHECK-NO-MERGE: .long {{_?x}}
+ store i32 %a1, i32* @x, align 4
+ store i32 %a2, i32* @y, align 4
+ ret void
+}
+
+define void @g1(i32 %a1, i32 %a2) {
+;CHECK: g1:
+;CHECK: ldr {{r[0-9]+}}, [[LABEL2:\.LCPI[0-9]+_[0-9]]]
+;CHECK: [[LABEL2]]:
+;CHECK-MERGE: .long .L_MergedGlobals
+;CHECK-NO-MERGE: .long {{_?y}}
+ store i32 %a1, i32* @y, align 4
+ store i32 %a2, i32* @z, align 4
+ ret void
+}
+
+;CHECK-NO-MERGE-NOT: .globl .L_MergedGlobals
+
+;CHECK-MERGE: .type .L_MergedGlobals,%object
+;CHECK-MERGE: .local .L_MergedGlobals
+;CHECK-MERGE: .comm .L_MergedGlobals,12,4
+
+;CHECK-MERGE: .globl x
+;CHECK-MERGE: x = .L_MergedGlobals
+;CHECK-MERGE: .size x, 4
+;CHECK-MERGE: .globl y
+;CHECK-MERGE: y = .L_MergedGlobals+4
+;CHECK-MERGE: .size y, 4
+;CHECK-MERGE: .globl z
+;CHECK-MERGE: z = .L_MergedGlobals+8
+;CHECK-MERGE: .size z, 4
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index bab96dadce55..e6aa2db744d5 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -60,16 +60,13 @@ define i32 @test1() {
; LinuxPIC-LABEL: test1:
; LinuxPIC: ldr r0, .LCPI0_0
-; LinuxPIC: ldr r1, .LCPI0_1
; LinuxPIC: .LPC0_0:
-; LinuxPIC: add r0, pc, r0
-; LinuxPIC: ldr r0, [r1, r0]
+; LinuxPIC: ldr r0, [pc, r0]
; LinuxPIC: ldr r0, [r0]
; LinuxPIC: bx lr
; LinuxPIC: .align 2
; LinuxPIC: .LCPI0_0:
-; LinuxPIC: .long _GLOBAL_OFFSET_TABLE_-(.LPC0_0+8)
-; LinuxPIC: .LCPI0_1:
-; LinuxPIC: .long G(GOT)
+; LinuxPIC: .Ltmp0:
+; LinuxPIC: .long G(GOT_PREL)-((.LPC0_0+8)-.Ltmp0)
diff --git a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
index 4e6924fe5b6b..a44c9721d6c1 100644
--- a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
+++ b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
@@ -14,15 +14,15 @@ entry:
br i1 undef, label %for.end, label %for.body
; Before if conversion, we have
-; for.body -> lor.lhs.false.i (62)
-; -> for.cond.backedge (62)
-; lor.lhs.false.i -> for.cond.backedge (1048575)
-; -> cond.false.i (1)
+; for.body -> lor.lhs.false.i (50%)
+; -> for.cond.backedge (50%)
+; lor.lhs.false.i -> for.cond.backedge (100%)
+; -> cond.false.i (0%)
; Afer if conversion, we have
-; for.body -> for.cond.backedge (130023362)
-; -> cond.false.i (62)
+; for.body -> for.cond.backedge (100%)
+; -> cond.false.i (0%)
; CHECK: BB#1: derived from LLVM BB %for.body
-; CHECK: Successors according to CFG: BB#2(130023362) BB#4(62)
+; CHECK: Successors according to CFG: BB#2(0x7ffffc00 / 0x80000000 = 100.00%) BB#4(0x00000400 / 0x80000000 = 0.00%)
for.body:
br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i, !prof !1
diff --git a/test/CodeGen/ARM/ifcvt-branch-weight.ll b/test/CodeGen/ARM/ifcvt-branch-weight.ll
index 41d78e53acc7..0de039cde23c 100644
--- a/test/CodeGen/ARM/ifcvt-branch-weight.ll
+++ b/test/CodeGen/ARM/ifcvt-branch-weight.ll
@@ -19,7 +19,7 @@ bb:
br i1 %9, label %return, label %bb2
; CHECK: BB#2: derived from LLVM BB %bb2
-; CHECK: Successors according to CFG: BB#3(192) BB#4(192)
+; CHECK: Successors according to CFG: BB#3({{[0-9a-fx/= ]+}}50.00%) BB#4({{[0-9a-fx/= ]+}}50.00%)
bb2:
%v10 = icmp eq i32 %3, 16
diff --git a/test/CodeGen/ARM/ifcvt-iter-indbr.ll b/test/CodeGen/ARM/ifcvt-iter-indbr.ll
index 75e9d77d7920..a96b6e8a1e83 100644
--- a/test/CodeGen/ARM/ifcvt-iter-indbr.ll
+++ b/test/CodeGen/ARM/ifcvt-iter-indbr.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple thumbv7s-apple-darwin -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple thumbv7s-apple-darwin -asm-verbose=false -print-machineinstrs=if-converter 2>&1 | FileCheck --check-prefix=CHECK-PROB %s
declare i32 @foo(i32)
declare i8* @bar(i32, i8*, i8*)
@@ -27,6 +28,11 @@ declare i8* @bar(i32, i8*, i8*)
; CHECK-NEXT: movw r0, #4567
; CHECK-NEXT: [[FOOCALL]]:
; CHECK-NEXT: blx _foo
+;
+; CHECK-PROB: BB#0:
+; CHECK-PROB: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}50.00%) BB#2({{[0-9a-fx/= ]+}}25.00%) BB#4({{[0-9a-fx/= ]+}}25.00%)
+; CHECK-PROB: BB#1:
+; CHECK-PROB: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.00%) BB#4({{[0-9a-fx/= ]+}}25.00%)
define i32 @test(i32 %a, i32 %a2, i32* %p, i32* %p2) {
entry:
diff --git a/test/CodeGen/ARM/ifcvt4.ll b/test/CodeGen/ARM/ifcvt4.ll
index 8c6825aeda97..0a6b99fb89b3 100644
--- a/test/CodeGen/ARM/ifcvt4.ll
+++ b/test/CodeGen/ARM/ifcvt4.ll
@@ -1,10 +1,8 @@
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
-; Do not if-convert when branches go to the different loops.
; CHECK-LABEL: t:
-; CHECK-NOT: subgt
-; CHECK-NOT: suble
-; Don't use
+; CHECK: subgt
+; CHECK: suble
define i32 @t(i32 %a, i32 %b) {
entry:
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 3aa2139cc03a..9fb8abde6130 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -13,10 +13,10 @@ entry:
define i32 @t1(i32 %a, i32 %b) {
; A8-LABEL: t1:
-; A8: poplt {r7, pc}
+; A8: bxlt lr
; SWIFT-LABEL: t1:
-; SWIFT: pop {r7, pc}
+; SWIFT: bxlt lr
; SWIFT: pop {r7, pc}
entry:
%tmp1 = icmp sgt i32 %a, 10 ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 78901930e4b2..668069751cf1 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -3,7 +3,7 @@
define void @foo(i32 %X, i32 %Y) {
entry:
; CHECK: cmpne
-; CHECK: pophi
+; CHECK: bxhi lr
%tmp1 = icmp ult i32 %X, 4 ; <i1> [#uses=1]
%tmp4 = icmp eq i32 %Y, 0 ; <i1> [#uses=1]
%tmp7 = or i1 %tmp4, %tmp1 ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
index ca9a5c63cda6..e8b7f6926396 100644
--- a/test/CodeGen/ARM/ifcvt8.ll
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -5,7 +5,9 @@
declare void @abort()
define fastcc void @t(%struct.SString* %word, i8 signext %c) {
-; CHECK: popne
+; CHECK-NOT: pop
+; CHECK: bxne
+; CHECK-NOT: pop
entry:
%tmp1 = icmp eq %struct.SString* %word, null ; <i1> [#uses=1]
br i1 %tmp1, label %cond_true, label %cond_false
diff --git a/test/CodeGen/ARM/inlineasm-switch-mode.ll b/test/CodeGen/ARM/inlineasm-switch-mode.ll
index 65fea114d7de..6035612788d8 100644
--- a/test/CodeGen/ARM/inlineasm-switch-mode.ll
+++ b/test/CodeGen/ARM/inlineasm-switch-mode.ll
@@ -15,8 +15,8 @@ define hidden i32 @bah(i8* %start) #0 align 2 {
; ARM: $t
; ARM-NEXT: 48 1c
-; THUMB: $a
+; THUMB: $a{{.*}}:
; THUMB-NEXT: 04 70
; THUMB-NEXT: 2d e5
-; THUMB: $t
+; THUMB: $t{{.*}}:
; THUMB-NEXT: 48 1c adds r0, r1, #1
diff --git a/test/CodeGen/ARM/ldm-stm-base-materialization.ll b/test/CodeGen/ARM/ldm-stm-base-materialization.ll
new file mode 100644
index 000000000000..a3231f95f478
--- /dev/null
+++ b/test/CodeGen/ARM/ldm-stm-base-materialization.ll
@@ -0,0 +1,93 @@
+; RUN: llc -mtriple armv7a-none-eabi -mattr=-neon < %s -verify-machineinstrs -o - | FileCheck %s
+
+; Thumb1 (thumbv6m) is tested in tests/Thumb
+
+@a = external global i32*
+@b = external global i32*
+
+; Function Attrs: nounwind
+define void @foo24() #0 {
+entry:
+; CHECK-LABEL: foo24:
+; We use '[rl0-9]*' to allow 'r0'..'r12', 'lr'
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4
+; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]], [[R5:[rl0-9]+]], [[R6:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]], [[R5]], [[R6]]}
+ %0 = load i32*, i32** @a, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+ %1 = bitcast i32* %arrayidx to i8*
+ %2 = load i32*, i32** @b, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+ %3 = bitcast i32* %arrayidx1 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false)
+ ret void
+}
+
+define void @foo28() #0 {
+entry:
+; CHECK-LABEL: foo28:
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4
+; CHECK-NEXT: ldm [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]]!, {[[R1]], [[R2]], [[R3]]}
+; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]]}
+ %0 = load i32*, i32** @a, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+ %1 = bitcast i32* %arrayidx to i8*
+ %2 = load i32*, i32** @b, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+ %3 = bitcast i32* %arrayidx1 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false)
+ ret void
+}
+
+define void @foo32() #0 {
+entry:
+; CHECK-LABEL: foo32:
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4
+; CHECK-NEXT: ldm [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]]!, {[[R1]], [[R2]], [[R3]], [[R4]]}
+; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]]}
+ %0 = load i32*, i32** @a, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+ %1 = bitcast i32* %arrayidx to i8*
+ %2 = load i32*, i32** @b, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+ %3 = bitcast i32* %arrayidx1 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 32, i32 4, i1 false)
+ ret void
+}
+
+define void @foo36() #0 {
+entry:
+; CHECK-LABEL: foo36:
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4
+; CHECK-NEXT: ldm [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]]!, {[[R1]], [[R2]], [[R3]], [[R4]]}
+; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]], [[R5:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]], [[R5]]}
+ %0 = load i32*, i32** @a, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+ %1 = bitcast i32* %arrayidx to i8*
+ %2 = load i32*, i32** @b, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+ %3 = bitcast i32* %arrayidx1 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 36, i32 4, i1 false)
+ ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 5411618ed86d..b2596346bfa1 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=A8 -check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs | FileCheck %s -check-prefix=A8 -check-prefix=CHECK
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=M3 -check-prefix=CHECK
; rdar://6949835
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC -check-prefix=CHECK
@@ -112,10 +112,10 @@ entry:
}
; CHECK-LABEL: strd_spill_ldrd_reload:
-; A8: strd r1, r0, [sp]
-; M3: strd r1, r0, [sp]
-; BASIC: strd r1, r0, [sp]
-; GREEDY: strd r0, r1, [sp]
+; A8: strd r1, r0, [sp, #-8]!
+; M3: strd r1, r0, [sp, #-8]!
+; BASIC: strd r1, r0, [sp, #-8]!
+; GREEDY: strd r0, r1, [sp, #-8]!
; CHECK: @ InlineAsm Start
; CHECK: @ InlineAsm End
; A8: ldrd r2, r1, [sp]
@@ -131,5 +131,53 @@ define void @strd_spill_ldrd_reload(i32 %v0, i32 %v1) {
ret void
}
+declare void @extfunc2(i32*, i32, i32)
+
+; CHECK-LABEL: ldrd_postupdate_dec:
+; CHECK: ldrd r1, r2, [r0], #-8
+; CHECK-NEXT: bl{{x?}} _extfunc
+define void @ldrd_postupdate_dec(i32* %p0) {
+ %p0.1 = getelementptr i32, i32* %p0, i32 1
+ %v0 = load i32, i32* %p0
+ %v1 = load i32, i32* %p0.1
+ %p1 = getelementptr i32, i32* %p0, i32 -2
+ call void @extfunc2(i32* %p1, i32 %v0, i32 %v1)
+ ret void
+}
+
+; CHECK-LABEL: ldrd_postupdate_inc:
+; CHECK: ldrd r1, r2, [r0], #8
+; CHECK-NEXT: bl{{x?}} _extfunc
+define void @ldrd_postupdate_inc(i32* %p0) {
+ %p0.1 = getelementptr i32, i32* %p0, i32 1
+ %v0 = load i32, i32* %p0
+ %v1 = load i32, i32* %p0.1
+ %p1 = getelementptr i32, i32* %p0, i32 2
+ call void @extfunc2(i32* %p1, i32 %v0, i32 %v1)
+ ret void
+}
+
+; CHECK-LABEL: strd_postupdate_dec:
+; CHECK: strd r1, r2, [r0], #-8
+; CHECK-NEXT: bx lr
+define i32* @strd_postupdate_dec(i32* %p0, i32 %v0, i32 %v1) {
+ %p0.1 = getelementptr i32, i32* %p0, i32 1
+ store i32 %v0, i32* %p0
+ store i32 %v1, i32* %p0.1
+ %p1 = getelementptr i32, i32* %p0, i32 -2
+ ret i32* %p1
+}
+
+; CHECK-LABEL: strd_postupdate_inc:
+; CHECK: strd r1, r2, [r0], #8
+; CHECK-NEXT: bx lr
+define i32* @strd_postupdate_inc(i32* %p0, i32 %v0, i32 %v1) {
+ %p0.1 = getelementptr i32, i32* %p0, i32 1
+ store i32 %v0, i32* %p0
+ store i32 %v1, i32* %p0.1
+ %p1 = getelementptr i32, i32* %p0, i32 2
+ ret i32* %p1
+}
+
declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/CodeGen/ARM/legalize-unaligned-load.ll b/test/CodeGen/ARM/legalize-unaligned-load.ll
new file mode 100644
index 000000000000..fa5b21aa4a23
--- /dev/null
+++ b/test/CodeGen/ARM/legalize-unaligned-load.ll
@@ -0,0 +1,35 @@
+; RUN: llc -O3 -code-model=default -relocation-model=default -mtriple=armv7l-unknown-linux-gnueabihf -mcpu=generic %s -o - | FileCheck %s
+; Check that we respect the existing chain between loads and stores when we
+; legalize unaligned loads.
+; Test case from PR24669.
+
+; Make sure the loads happen before the stores.
+; CHECK-LABEL: get_set_complex:
+; CHECK-NOT: str
+; CHECK: ldr
+; CHECK-NOT: str
+; CHECK: ldr
+; CHECK: str
+; CHECK: bx
+define i32 @get_set_complex({ float, float }* noalias nocapture %retptr,
+ { i8*, i32 }** noalias nocapture readnone %excinfo,
+ i8* noalias nocapture readnone %env,
+ [38 x i8]* nocapture %arg.rec,
+ float %arg.val.0, float %arg.val.1)
+{
+entry:
+ %inserted.real = insertvalue { float, float } undef, float %arg.val.0, 0
+ %inserted.imag = insertvalue { float, float } %inserted.real, float %arg.val.1, 1
+ %.15 = getelementptr inbounds [38 x i8], [38 x i8]* %arg.rec, i32 0, i32 10
+ %.16 = bitcast i8* %.15 to { float, float }*
+ %.17 = bitcast i8* %.15 to float*
+ %.18 = load float, float* %.17, align 1
+ %.19 = getelementptr inbounds [38 x i8], [38 x i8]* %arg.rec, i32 0, i32 14
+ %tmp = bitcast i8* %.19 to float*
+ %.20 = load float, float* %tmp, align 1
+ %inserted.real.1 = insertvalue { float, float } undef, float %.18, 0
+ %inserted.imag.1 = insertvalue { float, float } %inserted.real.1, float %.20, 1
+ store { float, float } %inserted.imag, { float, float }* %.16, align 1
+ store { float, float } %inserted.imag.1, { float, float }* %retptr, align 4
+ ret i32 0
+}
diff --git a/test/CodeGen/ARM/load-global.ll b/test/CodeGen/ARM/load-global.ll
index 34748bc848bd..eade2fda3705 100644
--- a/test/CodeGen/ARM/load-global.ll
+++ b/test/CodeGen/ARM/load-global.ll
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_T
; RUN: llc < %s -mtriple=armv7-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_V7
; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=thumbv6-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX_T
@G = external global i32
@@ -40,11 +41,14 @@ define i32 @test1() {
; LINUX: test1
; LINUX: ldr r0, .LCPI0_0
-; LINUX: ldr r1, .LCPI0_1
-; LINUX: add r0, pc, r0
-; LINUX: ldr r0, [r1, r0]
+; LINUX: ldr r0, [pc, r0]
; LINUX: ldr r0, [r0]
-; LINUX: .long G(GOT)
+; LINUX: .long G(GOT_PREL)-((.LPC0_0+8)-.Ltmp0)
+
+; LINUX_T: ldr r0, .LCPI0_0
+; LINUX_T: add r0, pc
+; LINUX_T: ldr r0, [r0]
+; LINUX_T: ldr r0, [r0]
%tmp = load i32, i32* @G
ret i32 %tmp
}
diff --git a/test/CodeGen/ARM/load-store-flags.ll b/test/CodeGen/ARM/load-store-flags.ll
index 5825a30109d0..95d9b484a0a7 100644
--- a/test/CodeGen/ARM/load-store-flags.ll
+++ b/test/CodeGen/ARM/load-store-flags.ll
@@ -6,7 +6,7 @@
define void @test_base_kill(i32 %v0, i32 %v1, i32* %addr) {
; CHECK-LABEL: test_base_kill:
; CHECK: adds [[NEWBASE:r[0-9]+]], r2, #4
-; CHECK: stm.w [[NEWBASE]], {r0, r1, r2}
+; CHECK: stm [[NEWBASE]]!, {r0, r1, r2}
%addr.1 = getelementptr i32, i32* %addr, i32 1
store i32 %v0, i32* %addr.1
@@ -27,7 +27,7 @@ define void @test_base_kill(i32 %v0, i32 %v1, i32* %addr) {
define void @test_base_kill_mid(i32 %v0, i32* %addr, i32 %v1) {
; CHECK-LABEL: test_base_kill_mid:
; CHECK: adds [[NEWBASE:r[0-9]+]], r1, #4
-; CHECK: stm.w [[NEWBASE]], {r0, r1, r2}
+; CHECK: stm [[NEWBASE]]!, {r0, r1, r2}
%addr.1 = getelementptr i32, i32* %addr, i32 1
store i32 %v0, i32* %addr.1
diff --git a/test/CodeGen/ARM/load.ll b/test/CodeGen/ARM/load.ll
index 3b2d637cb26e..b8f3003755a0 100644
--- a/test/CodeGen/ARM/load.ll
+++ b/test/CodeGen/ARM/load.ll
@@ -1,35 +1,564 @@
-; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T1
+; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T2
-define i32 @f1(i8* %p) {
+
+; Register offset
+
+; CHECK-LABEL: ldrsb_rr
+; CHECK: ldrsb r0, [r0, r1]
+define i32 @ldrsb_rr(i8* %p, i32 %n) {
+entry:
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = sext i8 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrsh_rr
+; CHECK-T1: lsls r1, r1, #1
+; CHECK-T1: ldrsh r0, [r0, r1]
+; CHECK-T2: ldrsh.w r0, [r0, r1, lsl #1]
+define i32 @ldrsh_rr(i16* %p, i32 %n) {
+entry:
+ %arrayidx = getelementptr inbounds i16, i16* %p, i32 %n
+ %0 = load i16, i16* %arrayidx, align 2
+ %conv = sext i16 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrb_rr
+; CHECK: ldrb r0, [r0, r1]
+define i32 @ldrb_rr(i8* %p, i32 %n) {
+entry:
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrh_rr
+; CHECK-T1: lsls r1, r1, #1
+; CHECK-T1: ldrh r0, [r0, r1]
+; CHECK-T2: ldrh.w r0, [r0, r1, lsl #1]
+define i32 @ldrh_rr(i16* %p, i32 %n) {
+entry:
+ %arrayidx = getelementptr inbounds i16, i16* %p, i32 %n
+ %0 = load i16, i16* %arrayidx, align 2
+ %conv = zext i16 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldr_rr
+; CHECK-T1: lsls r1, r1, #2
+; CHECK-T1: ldr r0, [r0, r1]
+; CHECK-T2: ldr.w r0, [r0, r1, lsl #2]
+define i32 @ldr_rr(i32* %p, i32 %n) {
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %p, i32 %n
+ %0 = load i32, i32* %arrayidx, align 4
+ ret i32 %0
+}
+
+; CHECK-LABEL: strb_rr
+; CHECK: strb r2, [r0, r1]
+define void @strb_rr(i8* %p, i32 %n, i32 %x) {
+entry:
+ %conv = trunc i32 %x to i8
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n
+ store i8 %conv, i8* %arrayidx, align 1
+ ret void
+}
+
+; CHECK-LABEL: strh_rr
+; CHECK-T1: lsls r1, r1, #1
+; CHECK-T1: strh r2, [r0, r1]
+; CHECK-T2: strh.w r2, [r0, r1, lsl #1]
+define void @strh_rr(i16* %p, i32 %n, i32 %x) {
+entry:
+ %conv = trunc i32 %x to i16
+ %arrayidx = getelementptr inbounds i16, i16* %p, i32 %n
+ store i16 %conv, i16* %arrayidx, align 2
+ ret void
+}
+
+; CHECK-LABEL: str_rr
+; CHECK-T1: lsls r1, r1, #2
+; CHECK-T1: str r2, [r0, r1]
+; CHECK-T2: str.w r2, [r0, r1, lsl #2]
+define void @str_rr(i32* %p, i32 %n, i32 %x) {
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %p, i32 %n
+ store i32 %x, i32* %arrayidx, align 4
+ ret void
+}
+
+
+; Immediate offset of zero
+
+; CHECK-LABEL: ldrsb_ri_zero
+; CHECK-T1: ldrb r0, [r0]
+; CHECK-T1: sxtb r0, r0
+; CHECK-T2: ldrsb.w r0, [r0]
+define i32 @ldrsb_ri_zero(i8* %p) {
+entry:
+ %0 = load i8, i8* %p, align 1
+ %conv = sext i8 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrsh_ri_zero
+; CHECK-T1: ldrh r0, [r0]
+; CHECK-T1: sxth r0, r0
+; CHECK-T2: ldrsh.w r0, [r0]
+define i32 @ldrsh_ri_zero(i16* %p) {
+entry:
+ %0 = load i16, i16* %p, align 2
+ %conv = sext i16 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrb_ri_zero
+; CHECK: ldrb r0, [r0]
+define i32 @ldrb_ri_zero(i8* %p) {
+entry:
+ %0 = load i8, i8* %p, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrh_ri_zero
+; CHECK: ldrh r0, [r0]
+define i32 @ldrh_ri_zero(i16* %p) {
+entry:
+ %0 = load i16, i16* %p, align 2
+ %conv = zext i16 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldr_ri_zero
+; CHECK: ldr r0, [r0]
+define i32 @ldr_ri_zero(i32* %p) {
+entry:
+ %0 = load i32, i32* %p, align 4
+ ret i32 %0
+}
+
+; CHECK-LABEL: strb_ri_zero
+; CHECK: strb r1, [r0]
+define void @strb_ri_zero(i8* %p, i32 %x) {
+entry:
+ %conv = trunc i32 %x to i8
+ store i8 %conv, i8* %p, align 1
+ ret void
+}
+
+; CHECK-LABEL: strh_ri_zero
+; CHECK: strh r1, [r0]
+define void @strh_ri_zero(i16* %p, i32 %x) {
+entry:
+ %conv = trunc i32 %x to i16
+ store i16 %conv, i16* %p, align 2
+ ret void
+}
+
+; CHECK-LABEL: str_ri_zero
+; CHECK: str r1, [r0]
+define void @str_ri_zero(i32* %p, i32 %x) {
+entry:
+ store i32 %x, i32* %p, align 4
+ ret void
+}
+
+
+; Maximum Thumb-1 immediate offset
+
+; CHECK-LABEL: ldrsb_ri_t1_max
+; CHECK-T1: movs r1, #31
+; CHECK-T1: ldrsb r0, [r0, r1]
+; CHECK-T2: ldrsb.w r0, [r0, #31]
+define i32 @ldrsb_ri_t1_max(i8* %p) {
+entry:
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 31
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = sext i8 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrsh_ri_t1_max
+; CHECK-T1: movs r1, #62
+; CHECK-T1: ldrsh r0, [r0, r1]
+; CHECK-T2: ldrsh.w r0, [r0, #62]
+define i32 @ldrsh_ri_t1_max(i16* %p) {
+entry:
+ %arrayidx = getelementptr inbounds i16, i16* %p, i32 31
+ %0 = load i16, i16* %arrayidx, align 2
+ %conv = sext i16 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrb_ri_t1_max
+; CHECK: ldrb r0, [r0, #31]
+define i32 @ldrb_ri_t1_max(i8* %p) {
+entry:
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 31
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrh_ri_t1_max
+; CHECK: ldrh r0, [r0, #62]
+define i32 @ldrh_ri_t1_max(i16* %p) {
+entry:
+ %arrayidx = getelementptr inbounds i16, i16* %p, i32 31
+ %0 = load i16, i16* %arrayidx, align 2
+ %conv = zext i16 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldr_ri_t1_max
+; CHECK: ldr r0, [r0, #124]
+define i32 @ldr_ri_t1_max(i32* %p) {
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %p, i32 31
+ %0 = load i32, i32* %arrayidx, align 4
+ ret i32 %0
+}
+
+; CHECK-LABEL: strb_ri_t1_max
+; CHECK: strb r1, [r0, #31]
+define void @strb_ri_t1_max(i8* %p, i32 %x) {
+entry:
+ %conv = trunc i32 %x to i8
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 31
+ store i8 %conv, i8* %arrayidx, align 1
+ ret void
+}
+
+; CHECK-LABEL: strh_ri_t1_max
+; CHECK: strh r1, [r0, #62]
+define void @strh_ri_t1_max(i16* %p, i32 %x) {
+entry:
+ %conv = trunc i32 %x to i16
+ %arrayidx = getelementptr inbounds i16, i16* %p, i32 31
+ store i16 %conv, i16* %arrayidx, align 2
+ ret void
+}
+
+; CHECK-LABEL: str_ri_t1_max
+; CHECK: str r1, [r0, #124]
+define void @str_ri_t1_max(i32* %p, i32 %x) {
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %p, i32 31
+ store i32 %x, i32* %arrayidx, align 4
+ ret void
+}
+
+
+; One past maximum Thumb-1 immediate offset
+
+; CHECK-LABEL: ldrsb_ri_t1_too_big
+; CHECK-T1: movs r1, #32
+; CHECK-T1: ldrsb r0, [r0, r1]
+; CHECK-T2: ldrsb.w r0, [r0, #32]
+define i32 @ldrsb_ri_t1_too_big(i8* %p) {
+entry:
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 32
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = sext i8 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrsh_ri_t1_too_big
+; CHECK-T1: movs r1, #64
+; CHECK-T1: ldrsh r0, [r0, r1]
+; CHECK-T2: ldrsh.w r0, [r0, #64]
+define i32 @ldrsh_ri_t1_too_big(i16* %p) {
+entry:
+ %arrayidx = getelementptr inbounds i16, i16* %p, i32 32
+ %0 = load i16, i16* %arrayidx, align 2
+ %conv = sext i16 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrb_ri_t1_too_big
+; CHECK-T1: movs r1, #32
+; CHECK-T1: ldrb r0, [r0, r1]
+; CHECK-T2: ldrb.w r0, [r0, #32]
+define i32 @ldrb_ri_t1_too_big(i8* %p) {
+entry:
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 32
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrh_ri_t1_too_big
+; CHECK-T1: movs r1, #64
+; CHECK-T1: ldrh r0, [r0, r1]
+; CHECK-T2: ldrh.w r0, [r0, #64]
+define i32 @ldrh_ri_t1_too_big(i16* %p) {
entry:
- %tmp = load i8, i8* %p ; <i8> [#uses=1]
- %tmp1 = sext i8 %tmp to i32 ; <i32> [#uses=1]
- ret i32 %tmp1
+ %arrayidx = getelementptr inbounds i16, i16* %p, i32 32
+ %0 = load i16, i16* %arrayidx, align 2
+ %conv = zext i16 %0 to i32
+ ret i32 %conv
}
-define i32 @f2(i8* %p) {
+; CHECK-LABEL: ldr_ri_t1_too_big
+; CHECK-T1: movs r1, #128
+; CHECK-T1: ldr r0, [r0, r1]
+; CHECK-T2: ldr.w r0, [r0, #128]
+define i32 @ldr_ri_t1_too_big(i32* %p) {
entry:
- %tmp = load i8, i8* %p ; <i8> [#uses=1]
- %tmp2 = zext i8 %tmp to i32 ; <i32> [#uses=1]
- ret i32 %tmp2
+ %arrayidx = getelementptr inbounds i32, i32* %p, i32 32
+ %0 = load i32, i32* %arrayidx, align 4
+ ret i32 %0
}
-define i32 @f3(i16* %p) {
+; CHECK-LABEL: strb_ri_t1_too_big
+; CHECK-T1: movs r2, #32
+; CHECK-T1: strb r1, [r0, r2]
+; CHECK-T2: strb.w r1, [r0, #32]
+define void @strb_ri_t1_too_big(i8* %p, i32 %x) {
entry:
- %tmp = load i16, i16* %p ; <i16> [#uses=1]
- %tmp3 = sext i16 %tmp to i32 ; <i32> [#uses=1]
- ret i32 %tmp3
+ %conv = trunc i32 %x to i8
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 32
+ store i8 %conv, i8* %arrayidx, align 1
+ ret void
}
-define i32 @f4(i16* %p) {
+; CHECK-LABEL: strh_ri_t1_too_big
+; CHECK-T1: movs r2, #64
+; CHECK-T1: strh r1, [r0, r2]
+; CHECK-T2: strh.w r1, [r0, #64]
+define void @strh_ri_t1_too_big(i16* %p, i32 %x) {
entry:
- %tmp = load i16, i16* %p ; <i16> [#uses=1]
- %tmp4 = zext i16 %tmp to i32 ; <i32> [#uses=1]
- ret i32 %tmp4
+ %conv = trunc i32 %x to i16
+ %arrayidx = getelementptr inbounds i16, i16* %p, i32 32
+ store i16 %conv, i16* %arrayidx, align 2
+ ret void
}
-; CHECK: ldrsb
-; CHECK: ldrb
-; CHECK: ldrsh
-; CHECK: ldrh
+; CHECK-LABEL: str_ri_t1_too_big
+; CHECK-T1: movs r2, #128
+; CHECK-T1: str r1, [r0, r2]
+; CHECK-T2: str.w r1, [r0, #128]
+define void @str_ri_t1_too_big(i32* %p, i32 %x) {
+entry:
+ %arrayidx = getelementptr inbounds i32, i32* %p, i32 32
+ store i32 %x, i32* %arrayidx, align 4
+ ret void
+}
+
+
+; Maximum Thumb-2 immediate offset
+
+; CHECK-LABEL: ldrsb_ri_t2_max
+; CHECK-T1: ldr r1, .LCP
+; CHECK-T1: ldrsb r0, [r0, r1]
+; CHECK-T2: ldrsb.w r0, [r0, #4095]
+define i32 @ldrsb_ri_t2_max(i8* %p) {
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+ %0 = load i8, i8* %add.ptr, align 1
+ %conv = sext i8 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrsh_ri_t2_max
+; CHECK-T1: ldr r1, .LCP
+; CHECK-T1: ldrsh r0, [r0, r1]
+; CHECK-T2: ldrsh.w r0, [r0, #4095]
+define i32 @ldrsh_ri_t2_max(i8* %p) {
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+ %0 = bitcast i8* %add.ptr to i16*
+ %1 = load i16, i16* %0, align 2
+ %conv = sext i16 %1 to i32
+ ret i32 %conv
+}
+; CHECK-LABEL: ldrb_ri_t2_max
+; CHECK-T1: ldr r1, .LCP
+; CHECK-T1: ldrb r0, [r0, r1]
+; CHECK-T2: ldrb.w r0, [r0, #4095]
+define i32 @ldrb_ri_t2_max(i8* %p) {
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+ %0 = load i8, i8* %add.ptr, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrh_ri_t2_max
+; CHECK-T1: ldr r1, .LCP
+; CHECK-T1: ldrh r0, [r0, r1]
+; CHECK-T2: ldrh.w r0, [r0, #4095]
+define i32 @ldrh_ri_t2_max(i8* %p) {
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+ %0 = bitcast i8* %add.ptr to i16*
+ %1 = load i16, i16* %0, align 2
+ %conv = zext i16 %1 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldr_ri_t2_max
+; CHECK-T1: ldr r1, .LCP
+; CHECK-T1: ldr r0, [r0, r1]
+; CHECK-T2: ldr.w r0, [r0, #4095]
+define i32 @ldr_ri_t2_max(i8* %p) {
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+ %0 = bitcast i8* %add.ptr to i32*
+ %1 = load i32, i32* %0, align 4
+ ret i32 %1
+}
+
+; CHECK-LABEL: strb_ri_t2_max
+; CHECK-T1: ldr r2, .LCP
+; CHECK-T1: strb r1, [r0, r2]
+; CHECK-T2: strb.w r1, [r0, #4095]
+define void @strb_ri_t2_max(i8* %p, i32 %x) {
+entry:
+ %conv = trunc i32 %x to i8
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+ store i8 %conv, i8* %add.ptr, align 1
+ ret void
+}
+
+; CHECK-LABEL: strh_ri_t2_max
+; CHECK-T1: ldr r2, .LCP
+; CHECK-T1: strh r1, [r0, r2]
+; CHECK-T2: strh.w r1, [r0, #4095]
+define void @strh_ri_t2_max(i8* %p, i32 %x) {
+entry:
+ %conv = trunc i32 %x to i16
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+ %0 = bitcast i8* %add.ptr to i16*
+ store i16 %conv, i16* %0, align 2
+ ret void
+}
+
+; CHECK-LABEL: str_ri_t2_max
+; CHECK-T1: ldr r2, .LCP
+; CHECK-T1: str r1, [r0, r2]
+; CHECK-T2: str.w r1, [r0, #4095]
+define void @str_ri_t2_max(i8* %p, i32 %x) {
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+ %0 = bitcast i8* %add.ptr to i32*
+ store i32 %x, i32* %0, align 4
+ ret void
+}
+
+
+; One past maximum Thumb-2 immediate offset
+
+; CHECK-LABEL: ldrsb_ri_t2_too_big
+; CHECK-T1: movs r1, #1
+; CHECK-T1: lsls r1, r1, #12
+; CHECK-T2: mov.w r1, #4096
+; CHECK: ldrsb r0, [r0, r1]
+define i32 @ldrsb_ri_t2_too_big(i8* %p) {
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+ %0 = load i8, i8* %add.ptr, align 1
+ %conv = sext i8 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrsh_ri_t2_too_big
+; CHECK-T1: movs r1, #1
+; CHECK-T1: lsls r1, r1, #12
+; CHECK-T2: mov.w r1, #4096
+; CHECK: ldrsh r0, [r0, r1]
+define i32 @ldrsh_ri_t2_too_big(i8* %p) {
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+ %0 = bitcast i8* %add.ptr to i16*
+ %1 = load i16, i16* %0, align 2
+ %conv = sext i16 %1 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrb_ri_t2_too_big
+; CHECK-T1: movs r1, #1
+; CHECK-T1: lsls r1, r1, #12
+; CHECK-T2: mov.w r1, #4096
+; CHECK: ldrb r0, [r0, r1]
+define i32 @ldrb_ri_t2_too_big(i8* %p) {
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+ %0 = load i8, i8* %add.ptr, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldrh_ri_t2_too_big
+; CHECK-T1: movs r1, #1
+; CHECK-T1: lsls r1, r1, #12
+; CHECK-T2: mov.w r1, #4096
+; CHECK: ldrh r0, [r0, r1]
+define i32 @ldrh_ri_t2_too_big(i8* %p) {
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+ %0 = bitcast i8* %add.ptr to i16*
+ %1 = load i16, i16* %0, align 2
+ %conv = zext i16 %1 to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: ldr_ri_t2_too_big
+; CHECK-T1: movs r1, #1
+; CHECK-T1: lsls r1, r1, #12
+; CHECK-T2: mov.w r1, #4096
+; CHECK: ldr r0, [r0, r1]
+define i32 @ldr_ri_t2_too_big(i8* %p) {
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+ %0 = bitcast i8* %add.ptr to i32*
+ %1 = load i32, i32* %0, align 4
+ ret i32 %1
+}
+
+; CHECK-LABEL: strb_ri_t2_too_big
+; CHECK-T1: movs r2, #1
+; CHECK-T1: lsls r2, r2, #12
+; CHECK-T2: mov.w r2, #4096
+; CHECK: strb r1, [r0, r2]
+define void @strb_ri_t2_too_big(i8* %p, i32 %x) {
+entry:
+ %conv = trunc i32 %x to i8
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+ store i8 %conv, i8* %add.ptr, align 1
+ ret void
+}
+
+; CHECK-LABEL: strh_ri_t2_too_big
+; CHECK-T1: movs r2, #1
+; CHECK-T1: lsls r2, r2, #12
+; CHECK-T2: mov.w r2, #4096
+; CHECK: strh r1, [r0, r2]
+define void @strh_ri_t2_too_big(i8* %p, i32 %x) {
+entry:
+ %conv = trunc i32 %x to i16
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+ %0 = bitcast i8* %add.ptr to i16*
+ store i16 %conv, i16* %0, align 2
+ ret void
+}
+
+; CHECK-LABEL: str_ri_t2_too_big
+; CHECK-T1: movs r2, #1
+; CHECK-T1: lsls r2, r2, #12
+; CHECK-T2: mov.w r2, #4096
+; CHECK: str r1, [r0, r2]
+define void @str_ri_t2_too_big(i8* %p, i32 %x) {
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+ %0 = bitcast i8* %add.ptr to i32*
+ store i32 %x, i32* %0, align 4
+ ret void
+}
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index 1f92ff4e1192..611cba6ed1fc 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -27,7 +27,7 @@ define void @f2() nounwind ssp {
entry:
; CHECK-LABEL: f2:
; CHECK: cmp
-; CHECK: poplt
+; CHECK: bxlt
; CHECK-NOT: cmp
; CHECK: movle
%0 = load i32, i32* @foo, align 4
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 4ea26e1c59a3..d874884dcb39 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6m-apple-ios -mcpu=cortex-m0 -pre-RA-sched=source -disable-post-ra | FileCheck %s -check-prefix=CHECK-T1
+; RUN: llc < %s -mtriple=thumbv6m-apple-ios -mcpu=cortex-m0 -pre-RA-sched=source -disable-post-ra -mattr=+strict-align | FileCheck %s -check-prefix=CHECK-T1
%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
@src = external global %struct.x
diff --git a/test/CodeGen/ARM/memcpy-ldm-stm.ll b/test/CodeGen/ARM/memcpy-ldm-stm.ll
new file mode 100644
index 000000000000..2ebe7ed5b146
--- /dev/null
+++ b/test/CodeGen/ARM/memcpy-ldm-stm.ll
@@ -0,0 +1,94 @@
+; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | \
+; RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECKV6
+; RUN: llc -mtriple=thumbv6m-eabi -O=0 -verify-machineinstrs %s -o - | \
+; RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECKV6
+; RUN: llc -mtriple=thumbv7a-eabi -mattr=-neon -verify-machineinstrs %s -o - | \
+; RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECKV7
+; RUN: llc -mtriple=armv7a-eabi -mattr=-neon -verify-machineinstrs %s -o - | \
+; RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECKV7
+
+@d = external global [64 x i32]
+@s = external global [64 x i32]
+
+; Function Attrs: nounwind
+define void @t1() #0 {
+entry:
+; CHECK-LABEL: t1:
+; CHECKV6: ldr [[LB:r[0-7]]],
+; CHECKV6-NEXT: ldr [[SB:r[0-7]]],
+; We use '[rl0-9]+' to allow 'r0'..'r12', 'lr'
+; CHECKV7: movt [[LB:[rl0-9]+]], :upper16:d
+; CHECKV7-NEXT: movt [[SB:[rl0-9]+]], :upper16:s
+; CHECK-NEXT: ldm{{(\.w)?}} [[LB]]!,
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!,
+; Think of the monstrosity '{{\[}}[[LB]]]' as '[ [[LB]] ]' without the spaces.
+; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
+; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false)
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @t2() #0 {
+entry:
+; CHECK-LABEL: t2:
+; CHECKV6: ldr [[LB:r[0-7]]],
+; CHECKV6-NEXT: ldr [[SB:r[0-7]]],
+; CHECKV7: movt [[LB:[rl0-9]+]], :upper16:d
+; CHECKV7-NEXT: movt [[SB:[rl0-9]+]], :upper16:s
+; CHECK-NEXT: ldm{{(\.w)?}} [[LB]]!,
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!,
+; CHECK-NEXT: ldrh{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
+; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #2]
+; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #2]
+; CHECK-NEXT: strh{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false)
+ ret void
+}
+
+; PR23768
+%struct.T = type { i8, i64, i8 }
+
+@copy = external global %struct.T, align 8
+@etest = external global %struct.T, align 8
+
+define void @t3() {
+ call void @llvm.memcpy.p0i8.p0i8.i32(
+ i8* getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0),
+ i8* getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0),
+ i32 24, i32 8, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(
+ i8* getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0),
+ i8* getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0),
+ i32 24, i32 8, i1 false)
+ ret void
+}
+
+%struct.S = type { [12 x i32] }
+
+; CHECK-LABEL: test3
+define void @test3(%struct.S* %d, %struct.S* %s) #0 {
+ %1 = bitcast %struct.S* %d to i8*
+ %2 = bitcast %struct.S* %s to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 48, i32 4, i1 false)
+; 3 ldm/stm pairs in v6; 2 in v7
+; CHECK: ldm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST1:{.*}]]
+; CHECK: stm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST1]]
+; CHECK: ldm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST2:{.*}]]
+; CHECK: stm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST2]]
+; CHECKV6: ldm {{r[0-7]!?}}, [[REGLIST3:{.*}]]
+; CHECKV6: stm {{r[0-7]!?}}, [[REGLIST3]]
+; CHECKV7-NOT: ldm
+; CHECKV7-NOT: stm
+ %arrayidx = getelementptr inbounds %struct.S, %struct.S* %s, i32 0, i32 0, i32 1
+ tail call void @g(i32* %arrayidx) #3
+ ret void
+}
+
+declare void @g(i32*)
+
+; Set "no-frame-pointer-elim" to increase register pressure
+attributes #0 = { "no-frame-pointer-elim"="true" }
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index 5223983a7f30..66743f3e9d5e 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,79 +1,95 @@
-; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS --check-prefix=CHECK
-; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN --check-prefix=CHECK
-; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
-; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
+; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS
+; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN
+; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
define void @f1(i8* %dest, i8* %src) {
entry:
; CHECK-LABEL: f1
- ; CHECK-IOS: memmove
- ; CHECK-DARWIN: memmove
- ; CHECK-EABI: __aeabi_memmove
+ ; CHECK-IOS: bl _memmove
+ ; CHECK-DARWIN: bl _memmove
+ ; CHECK-EABI: bl __aeabi_memmove
+ ; CHECK-GNUEABI: bl memmove
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
- ; CHECK-IOS: memcpy
- ; CHECK-DARWIN: memcpy
- ; CHECK-EABI: __aeabi_memcpy
+ ; CHECK-IOS: bl _memcpy
+ ; CHECK-DARWIN: bl _memcpy
+ ; CHECK-EABI: bl __aeabi_memcpy
+ ; CHECK-GNUEABI: bl memcpy
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
; EABI memset swaps arguments
; CHECK-IOS: mov r1, #1
- ; CHECK-IOS: memset
+ ; CHECK-IOS: bl _memset
; CHECK-DARWIN: movs r1, #1
- ; CHECK-DARWIN: memset
+ ; CHECK-DARWIN: bl _memset
; CHECK-EABI: mov r2, #1
- ; CHECK-EABI: __aeabi_memset
+ ; CHECK-EABI: bl __aeabi_memset
+ ; CHECK-GNUEABI: mov r1, #1
+ ; CHECK-GNUEABI: bl memset
call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false)
; EABI uses memclr if value set to 0
; CHECK-IOS: mov r1, #0
- ; CHECK-IOS: memset
+ ; CHECK-IOS: bl _memset
; CHECK-DARWIN: movs r1, #0
- ; CHECK-DARWIN: memset
- ; CHECK-EABI: __aeabi_memclr
+ ; CHECK-DARWIN: bl _memset
+ ; CHECK-EABI: bl __aeabi_memclr
+ ; CHECK-GNUEABI: bl memset
call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 0, i1 false)
-
+
; EABI uses aligned function variants if possible
- ; CHECK-IOS: memmove
- ; CHECK-DARWIN: memmove
- ; CHECK-EABI: __aeabi_memmove4
+ ; CHECK-IOS: bl _memmove
+ ; CHECK-DARWIN: bl _memmove
+ ; CHECK-EABI: bl __aeabi_memmove4
+ ; CHECK-GNUEABI: bl memmove
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false)
- ; CHECK-IOS: memcpy
- ; CHECK-DARWIN: memcpy
- ; CHECK-EABI: __aeabi_memcpy4
+ ; CHECK-IOS: bl _memcpy
+ ; CHECK-DARWIN: bl _memcpy
+ ; CHECK-EABI: bl __aeabi_memcpy4
+ ; CHECK-GNUEABI: bl memcpy
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false)
- ; CHECK-IOS: memset
- ; CHECK-DARWIN: memset
- ; CHECK-EABI: __aeabi_memset4
+ ; CHECK-IOS: bl _memset
+ ; CHECK-DARWIN: bl _memset
+ ; CHECK-EABI: bl __aeabi_memset4
+ ; CHECK-GNUEABI: bl memset
call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 4, i1 false)
- ; CHECK-IOS: memset
- ; CHECK-DARWIN: memset
- ; CHECK-EABI: __aeabi_memclr4
+ ; CHECK-IOS: bl _memset
+ ; CHECK-DARWIN: bl _memset
+ ; CHECK-EABI: bl __aeabi_memclr4
+ ; CHECK-GNUEABI: bl memset
call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 4, i1 false)
- ; CHECK-IOS: memmove
- ; CHECK-DARWIN: memmove
- ; CHECK-EABI: __aeabi_memmove8
+ ; CHECK-IOS: bl _memmove
+ ; CHECK-DARWIN: bl _memmove
+ ; CHECK-EABI: bl __aeabi_memmove8
+ ; CHECK-GNUEABI: bl memmove
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false)
- ; CHECK-IOS: memcpy
- ; CHECK-DARWIN: memcpy
- ; CHECK-EABI: __aeabi_memcpy8
+ ; CHECK-IOS: bl _memcpy
+ ; CHECK-DARWIN: bl _memcpy
+ ; CHECK-EABI: bl __aeabi_memcpy8
+ ; CHECK-GNUEABI: bl memcpy
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false)
- ; CHECK-IOS: memset
- ; CHECK-DARWIN: memset
- ; CHECK-EABI: __aeabi_memset8
+ ; CHECK-IOS: bl _memset
+ ; CHECK-DARWIN: bl _memset
+ ; CHECK-EABI: bl __aeabi_memset8
+ ; CHECK-GNUEABI: bl memset
call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 8, i1 false)
- ; CHECK-IOS: memset
- ; CHECK-DARWIN: memset
- ; CHECK-EABI: __aeabi_memclr8
+ ; CHECK-IOS: bl _memset
+ ; CHECK-DARWIN: bl _memset
+ ; CHECK-EABI: bl __aeabi_memclr8
+ ; CHECK-GNUEABI: bl memset
call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 8, i1 false)
unreachable
@@ -86,32 +102,38 @@ entry:
; IOS (ARMv7) should 8-byte align, others should 4-byte align
; CHECK-IOS: add r1, sp, #32
- ; CHECK-IOS: memmove
+ ; CHECK-IOS: bl _memmove
; CHECK-DARWIN: add r1, sp, #28
- ; CHECK-DARWIN: memmove
+ ; CHECK-DARWIN: bl _memmove
; CHECK-EABI: add r1, sp, #28
- ; CHECK-EABI: __aeabi_memmove
+ ; CHECK-EABI: bl __aeabi_memmove
+ ; CHECK-GNUEABI: add r1, sp, #28
+ ; CHECK-GNUEABI: bl memmove
%arr0 = alloca [9 x i8], align 1
%0 = bitcast [9 x i8]* %arr0 to i8*
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
; CHECK: add r1, sp, #16
- ; CHECK-IOS: memcpy
- ; CHECK-DARWIN: memcpy
- ; CHECK-EABI: __aeabi_memcpy
+ ; CHECK-IOS: bl _memcpy
+ ; CHECK-DARWIN: bl _memcpy
+ ; CHECK-EABI: bl __aeabi_memcpy
+ ; CHECK-GNUEABI: bl memcpy
%arr1 = alloca [9 x i8], align 1
%1 = bitcast [9 x i8]* %arr1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK-IOS: mov r0, sp
; CHECK-IOS: mov r1, #1
- ; CHECK-IOS: memset
+ ; CHECK-IOS: bl _memset
; CHECK-DARWIN: add r0, sp, #4
; CHECK-DARWIN: movs r1, #1
- ; CHECK-DARWIN: memset
+ ; CHECK-DARWIN: bl _memset
; CHECK-EABI: add r0, sp, #4
; CHECK-EABI: mov r2, #1
- ; CHECK-EABI: __aeabi_memset
+ ; CHECK-EABI: bl __aeabi_memset
+ ; CHECK-GNUEABI: add r0, sp, #4
+ ; CHECK-GNUEABI: mov r1, #1
+ ; CHECK-GNUEABI: bl memset
%arr2 = alloca [9 x i8], align 1
%2 = bitcast [9 x i8]* %arr2 to i8*
call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -125,28 +147,32 @@ entry:
; CHECK-LABEL: f3
; CHECK: {{add(.w)? r1, sp, #17|sub(.w)? r1, r7, #15}}
- ; CHECK-IOS: memmove
- ; CHECK-DARWIN: memmove
- ; CHECK-EABI: __aeabi_memmove
+ ; CHECK-IOS: bl _memmove
+ ; CHECK-DARWIN: bl _memmove
+ ; CHECK-EABI: bl __aeabi_memmove
+ ; CHECK-GNUEABI: bl memmove
%arr0 = alloca [7 x i8], align 1
%0 = bitcast [7 x i8]* %arr0 to i8*
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r1, sp, #10}}
- ; CHECK-IOS: memcpy
- ; CHECK-DARWIN: memcpy
- ; CHECK-EABI: __aeabi_memcpy
+ ; CHECK-IOS: bl _memcpy
+ ; CHECK-DARWIN: bl _memcpy
+ ; CHECK-EABI: bl __aeabi_memcpy
+ ; CHECK-GNUEABI: bl memcpy
%arr1 = alloca [7 x i8], align 1
%1 = bitcast [7 x i8]* %arr1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r0, sp, #3}}
; CHECK-IOS: mov r1, #1
- ; CHECK-IOS: memset
+ ; CHECK-IOS: bl _memset
; CHECK-DARWIN: movs r1, #1
- ; CHECK-DARWIN: memset
+ ; CHECK-DARWIN: bl _memset
; CHECK-EABI: mov r2, #1
- ; CHECK-EABI: __aeabi_memset
+ ; CHECK-EABI: bl __aeabi_memset
+ ; CHECK-GNUEABI: mov r1, #1
+ ; CHECK-GNUEABI: bl memset
%arr2 = alloca [7 x i8], align 1
%2 = bitcast [7 x i8]* %arr2 to i8*
call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -160,28 +186,32 @@ entry:
; CHECK-LABEL: f4
; CHECK: {{add(.w)? r., sp, #23|sub(.w)? r., r7, #17}}
- ; CHECK-IOS: memmove
- ; CHECK-DARWIN: memmove
- ; CHECK-EABI: __aeabi_memmove
+ ; CHECK-IOS: bl _memmove
+ ; CHECK-DARWIN: bl _memmove
+ ; CHECK-EABI: bl __aeabi_memmove
+ ; CHECK-GNUEABI: bl memmove
%arr0 = alloca [9 x i8], align 1
%0 = getelementptr inbounds [9 x i8], [9 x i8]* %arr0, i32 0, i32 4
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(10|14)}}
- ; CHECK-IOS: memcpy
- ; CHECK-DARWIN: memcpy
- ; CHECK-EABI: __aeabi_memcpy
+ ; CHECK-IOS: bl _memcpy
+ ; CHECK-DARWIN: bl _memcpy
+ ; CHECK-EABI: bl __aeabi_memcpy
+ ; CHECK-GNUEABI: bl memcpy
%arr1 = alloca [9 x i8], align 1
%1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 4
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(1|5)}}
; CHECK-IOS: mov r1, #1
- ; CHECK-IOS: memset
+ ; CHECK-IOS: bl _memset
; CHECK-DARWIN: movs r1, #1
- ; CHECK-DARWIN: memset
+ ; CHECK-DARWIN: bl _memset
; CHECK-EABI: mov r2, #1
- ; CHECK-EABI: __aeabi_memset
+ ; CHECK-EABI: bl __aeabi_memset
+ ; CHECK-GNUEABI: mov r1, #1
+ ; CHECK-GNUEABI: bl memset
%arr2 = alloca [9 x i8], align 1
%2 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 4
call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -195,28 +225,32 @@ entry:
; CHECK-LABEL: f5
; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}}
- ; CHECK-IOS: memmove
- ; CHECK-DARWIN: memmove
- ; CHECK-EABI: __aeabi_memmove
+ ; CHECK-IOS: bl _memmove
+ ; CHECK-DARWIN: bl _memmove
+ ; CHECK-EABI: bl __aeabi_memmove
+ ; CHECK-GNUEABI: bl memmove
%arr0 = alloca [13 x i8], align 1
%0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 1
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(10|14)}}
- ; CHECK-IOS: memcpy
- ; CHECK-DARWIN: memcpy
- ; CHECK-EABI: __aeabi_memcpy
+ ; CHECK-IOS: bl _memcpy
+ ; CHECK-DARWIN: bl _memcpy
+ ; CHECK-EABI: bl __aeabi_memcpy
+ ; CHECK-GNUEABI: bl memcpy
%arr1 = alloca [13 x i8], align 1
%1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 1
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(1|5)}}
; CHECK-IOS: mov r1, #1
- ; CHECK-IOS: memset
+ ; CHECK-IOS: bl _memset
; CHECK-DARWIN: movs r1, #1
- ; CHECK-DARWIN: memset
+ ; CHECK-DARWIN: bl _memset
; CHECK-EABI: mov r2, #1
- ; CHECK-EABI: __aeabi_memset
+ ; CHECK-EABI: bl __aeabi_memset
+ ; CHECK-GNUEABI: mov r1, #1
+ ; CHECK-GNUEABI: bl memset
%arr2 = alloca [13 x i8], align 1
%2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 1
call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -230,28 +264,32 @@ entry:
; CHECK-LABEL: f6
; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #25}}
- ; CHECK-IOS: memmove
- ; CHECK-DARWIN: memmove
- ; CHECK-EABI: __aeabi_memmove
+ ; CHECK-IOS: bl _memmove
+ ; CHECK-DARWIN: bl _memmove
+ ; CHECK-EABI: bl __aeabi_memmove
+ ; CHECK-GNUEABI: bl memmove
%arr0 = alloca [13 x i8], align 1
%0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 %i
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(10|14)}}
- ; CHECK-IOS: memcpy
- ; CHECK-DARWIN: memcpy
- ; CHECK-EABI: __aeabi_memcpy
+ ; CHECK-IOS: bl _memcpy
+ ; CHECK-DARWIN: bl _memcpy
+ ; CHECK-EABI: bl __aeabi_memcpy
+ ; CHECK-GNUEABI: bl memcpy
%arr1 = alloca [13 x i8], align 1
%1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 %i
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(1|5)}}
; CHECK-IOS: mov r1, #1
- ; CHECK-IOS: memset
+ ; CHECK-IOS: bl _memset
; CHECK-DARWIN: movs r1, #1
- ; CHECK-DARWIN: memset
+ ; CHECK-DARWIN: bl _memset
; CHECK-EABI: mov r2, #1
- ; CHECK-EABI: __aeabi_memset
+ ; CHECK-EABI: bl __aeabi_memset
+ ; CHECK-GNUEABI: mov r1, #1
+ ; CHECK-GNUEABI: bl memset
%arr2 = alloca [13 x i8], align 1
%2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 %i
call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -265,28 +303,32 @@ entry:
; CHECK-LABEL: f7
; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}}
- ; CHECK-IOS: memmove
- ; CHECK-DARWIN: memmove
- ; CHECK-EABI: __aeabi_memmove
+ ; CHECK-IOS: bl _memmove
+ ; CHECK-DARWIN: bl _memmove
+ ; CHECK-EABI: bl __aeabi_memmove
+ ; CHECK-GNUEABI: bl memmove
%arr0 = alloca [13 x i8], align 1
%0 = getelementptr [13 x i8], [13 x i8]* %arr0, i32 0, i32 4
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(10|14)}}
- ; CHECK-IOS: memcpy
- ; CHECK-DARWIN: memcpy
- ; CHECK-EABI: __aeabi_memcpy
+ ; CHECK-IOS: bl _memcpy
+ ; CHECK-DARWIN: bl _memcpy
+ ; CHECK-EABI: bl __aeabi_memcpy
+ ; CHECK-GNUEABI: bl memcpy
%arr1 = alloca [13 x i8], align 1
%1 = getelementptr [13 x i8], [13 x i8]* %arr1, i32 0, i32 4
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(1|5)}}
; CHECK-IOS: mov r1, #1
- ; CHECK-IOS: memset
+ ; CHECK-IOS: bl _memset
; CHECK-DARWIN: movs r1, #1
- ; CHECK-DARWIN: memset
+ ; CHECK-DARWIN: bl _memset
; CHECK-EABI: mov r2, #1
- ; CHECK-EABI: __aeabi_memset
+ ; CHECK-EABI: bl __aeabi_memset
+ ; CHECK-GNUEABI: mov r1, #1
+ ; CHECK-GNUEABI: bl memset
%arr2 = alloca [13 x i8], align 1
%2 = getelementptr [13 x i8], [13 x i8]* %arr2, i32 0, i32 4
call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -300,28 +342,32 @@ entry:
; CHECK-LABEL: f8
; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}}
- ; CHECK-IOS: memmove
- ; CHECK-DARWIN: memmove
- ; CHECK-EABI: __aeabi_memmove
+ ; CHECK-IOS: bl _memmove
+ ; CHECK-DARWIN: bl _memmove
+ ; CHECK-EABI: bl __aeabi_memmove
+ ; CHECK-GNUEABI: bl memmove
%arr0 = alloca [13 x i8], align 1
%0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 16
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(10|14)}}
- ; CHECK-IOS: memcpy
- ; CHECK-DARWIN: memcpy
- ; CHECK-EABI: __aeabi_memcpy
+ ; CHECK-IOS: bl _memcpy
+ ; CHECK-DARWIN: bl _memcpy
+ ; CHECK-EABI: bl __aeabi_memcpy
+ ; CHECK-GNUEABI: bl memcpy
%arr1 = alloca [13 x i8], align 1
%1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 16
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
; CHECK: {{add(.w)? r., sp, #(1|5)}}
; CHECK-IOS: mov r1, #1
- ; CHECK-IOS: memset
+ ; CHECK-IOS: bl _memset
; CHECK-DARWIN: movs r1, #1
- ; CHECK-DARWIN: memset
+ ; CHECK-DARWIN: bl _memset
; CHECK-EABI: mov r2, #1
- ; CHECK-EABI: __aeabi_memset
+ ; CHECK-EABI: bl __aeabi_memset
+ ; CHECK-GNUEABI: mov r1, #1
+ ; CHECK-GNUEABI: bl memset
%arr2 = alloca [13 x i8], align 1
%2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 16
call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -357,6 +403,7 @@ entry:
; CHECK-IOS: .align 3
; CHECK-DARWIN: .align 2
; CHECK-EABI: .align 2
+; CHECK-GNUEABI: .align 2
; CHECK: arr2:
; CHECK: {{\.section.+foo,bar}}
; CHECK-NOT: .align
diff --git a/test/CodeGen/ARM/minmax.ll b/test/CodeGen/ARM/minmax.ll
new file mode 100644
index 000000000000..78e8922fba0e
--- /dev/null
+++ b/test/CodeGen/ARM/minmax.ll
@@ -0,0 +1,193 @@
+; RUN: llc < %s -mtriple=armv8-linux-gnu -mattr=+neon | FileCheck %s
+
+; CHECK-LABEL: t1
+; CHECK: vmax.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <4 x i32> @t1(<4 x i32> %a, <4 x i32> %b) {
+ %t1 = icmp sgt <4 x i32> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t2
+; CHECK: vmin.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <4 x i32> @t2(<4 x i32> %a, <4 x i32> %b) {
+ %t1 = icmp slt <4 x i32> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t3
+; CHECK: vmax.u32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <4 x i32> @t3(<4 x i32> %a, <4 x i32> %b) {
+ %t1 = icmp ugt <4 x i32> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t4
+; CHECK: vmin.u32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <4 x i32> @t4(<4 x i32> %a, <4 x i32> %b) {
+ %t1 = icmp ult <4 x i32> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t5
+; CHECK: vmax.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <2 x i32> @t5(<2 x i32> %a, <2 x i32> %b) {
+ %t1 = icmp sgt <2 x i32> %a, %b
+ %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b
+ ret <2 x i32> %t2
+}
+
+; CHECK-LABEL: t6
+; CHECK: vmin.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <2 x i32> @t6(<2 x i32> %a, <2 x i32> %b) {
+ %t1 = icmp slt <2 x i32> %a, %b
+ %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b
+ ret <2 x i32> %t2
+}
+
+; CHECK-LABEL: t7
+; CHECK: vmax.u32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <2 x i32> @t7(<2 x i32> %a, <2 x i32> %b) {
+ %t1 = icmp ugt <2 x i32> %a, %b
+ %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b
+ ret <2 x i32> %t2
+}
+
+; CHECK-LABEL: t8
+; CHECK: vmin.u32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <2 x i32> @t8(<2 x i32> %a, <2 x i32> %b) {
+ %t1 = icmp ult <2 x i32> %a, %b
+ %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b
+ ret <2 x i32> %t2
+}
+
+; CHECK-LABEL: t9
+; CHECK: vmax.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <8 x i16> @t9(<8 x i16> %a, <8 x i16> %b) {
+ %t1 = icmp sgt <8 x i16> %a, %b
+ %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %t2
+}
+
+; CHECK-LABEL: t10
+; CHECK: vmin.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <8 x i16> @t10(<8 x i16> %a, <8 x i16> %b) {
+ %t1 = icmp slt <8 x i16> %a, %b
+ %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %t2
+}
+
+; CHECK-LABEL: t11
+; CHECK: vmax.u16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <8 x i16> @t11(<8 x i16> %a, <8 x i16> %b) {
+ %t1 = icmp ugt <8 x i16> %a, %b
+ %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %t2
+}
+
+; CHECK-LABEL: t12
+; CHECK: vmin.u16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <8 x i16> @t12(<8 x i16> %a, <8 x i16> %b) {
+ %t1 = icmp ult <8 x i16> %a, %b
+ %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %t2
+}
+
+; CHECK-LABEL: t13
+; CHECK: vmax.s16
+define <4 x i16> @t13(<4 x i16> %a, <4 x i16> %b) {
+ %t1 = icmp sgt <4 x i16> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b
+ ret <4 x i16> %t2
+}
+
+; CHECK-LABEL: t14
+; CHECK: vmin.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <4 x i16> @t14(<4 x i16> %a, <4 x i16> %b) {
+ %t1 = icmp slt <4 x i16> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b
+ ret <4 x i16> %t2
+}
+
+; CHECK-LABEL: t15
+; CHECK: vmax.u16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <4 x i16> @t15(<4 x i16> %a, <4 x i16> %b) {
+ %t1 = icmp ugt <4 x i16> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b
+ ret <4 x i16> %t2
+}
+
+; CHECK-LABEL: t16
+; CHECK: vmin.u16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <4 x i16> @t16(<4 x i16> %a, <4 x i16> %b) {
+ %t1 = icmp ult <4 x i16> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b
+ ret <4 x i16> %t2
+}
+
+; CHECK-LABEL: t17
+; CHECK: vmax.s8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <16 x i8> @t17(<16 x i8> %a, <16 x i8> %b) {
+ %t1 = icmp sgt <16 x i8> %a, %b
+ %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %t2
+}
+
+; CHECK-LABEL: t18
+; CHECK: vmin.s8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <16 x i8> @t18(<16 x i8> %a, <16 x i8> %b) {
+ %t1 = icmp slt <16 x i8> %a, %b
+ %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %t2
+}
+
+; CHECK-LABEL: t19
+; CHECK: vmax.u8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <16 x i8> @t19(<16 x i8> %a, <16 x i8> %b) {
+ %t1 = icmp ugt <16 x i8> %a, %b
+ %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %t2
+}
+
+; CHECK-LABEL: t20
+; CHECK: vmin.u8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <16 x i8> @t20(<16 x i8> %a, <16 x i8> %b) {
+ %t1 = icmp ult <16 x i8> %a, %b
+ %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %t2
+}
+
+; CHECK-LABEL: t21
+; CHECK: vmax.s8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <8 x i8> @t21(<8 x i8> %a, <8 x i8> %b) {
+ %t1 = icmp sgt <8 x i8> %a, %b
+ %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b
+ ret <8 x i8> %t2
+}
+
+; CHECK-LABEL: t22
+; CHECK: vmin.s8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <8 x i8> @t22(<8 x i8> %a, <8 x i8> %b) {
+ %t1 = icmp slt <8 x i8> %a, %b
+ %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b
+ ret <8 x i8> %t2
+}
+
+; CHECK-LABEL: t23
+; CHECK: vmax.u8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <8 x i8> @t23(<8 x i8> %a, <8 x i8> %b) {
+ %t1 = icmp ugt <8 x i8> %a, %b
+ %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b
+ ret <8 x i8> %t2
+}
+
+; CHECK-LABEL: t24
+; CHECK: vmin.u8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <8 x i8> @t24(<8 x i8> %a, <8 x i8> %b) {
+ %t1 = icmp ult <8 x i8> %a, %b
+ %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b
+ ret <8 x i8> %t2
+}
diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll
index 84e4b303c16d..883522a829a5 100644
--- a/test/CodeGen/ARM/neon_minmax.ll
+++ b/test/CodeGen/ARM/neon_minmax.ll
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=arm-eabi -mcpu=swift %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -mattr=-neon %s -o -
define float @fmin_ole(float %x) nounwind {
;CHECK-LABEL: fmin_ole:
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
index 04f08f43840f..f9282f93f4d2 100644
--- a/test/CodeGen/ARM/neon_spill.ll
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -22,7 +22,7 @@ declare arm_aapcs_vfpcc %2* @func3(%2*, %2*, i32)
declare arm_aapcs_vfpcc %2** @func4()
define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
- call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+ call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
%2 = call arm_aapcs_vfpcc %0** @func2() nounwind
%3 = load %0*, %0** %2, align 4
store float 0.000000e+00, float* undef, align 4
@@ -40,10 +40,10 @@ define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
%10 = fmul float undef, 2.000000e+05
%11 = fadd float %10, -1.000000e+05
store float %11, float* undef, align 4
- call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+ call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
ret void
}
-declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
declare arm_aapcs_vfpcc i32 @rand()
diff --git a/test/CodeGen/ARM/neon_vabs.ll b/test/CodeGen/ARM/neon_vabs.ll
index 7a02512198be..d32e7b78879b 100644
--- a/test/CodeGen/ARM/neon_vabs.ll
+++ b/test/CodeGen/ARM/neon_vabs.ll
@@ -89,3 +89,41 @@ define <2 x i32> @test10(<2 x i32> %a) nounwind {
%abs = select <2 x i1> %b, <2 x i32> %tmp1neg, <2 x i32> %a
ret <2 x i32> %abs
}
+
+;; Check that absdiff patterns as emitted by log2 shuffles are
+;; matched by VABD.
+
+define <4 x i32> @test11(<4 x i16> %a, <4 x i16> %b) nounwind {
+; CHECK-LABEL: test11:
+; CHECK: vabdl.u16 q
+ %zext1 = zext <4 x i16> %a to <4 x i32>
+ %zext2 = zext <4 x i16> %b to <4 x i32>
+ %diff = sub <4 x i32> %zext1, %zext2
+ %shift1 = ashr <4 x i32> %diff, <i32 31, i32 31, i32 31, i32 31>
+ %add1 = add <4 x i32> %shift1, %diff
+ %res = xor <4 x i32> %shift1, %add1
+ ret <4 x i32> %res
+}
+define <8 x i16> @test12(<8 x i8> %a, <8 x i8> %b) nounwind {
+; CHECK-LABEL: test12:
+; CHECK: vabdl.u8 q
+ %zext1 = zext <8 x i8> %a to <8 x i16>
+ %zext2 = zext <8 x i8> %b to <8 x i16>
+ %diff = sub <8 x i16> %zext1, %zext2
+ %shift1 = ashr <8 x i16> %diff,<i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %add1 = add <8 x i16> %shift1, %diff
+ %res = xor <8 x i16> %shift1, %add1
+ ret <8 x i16> %res
+}
+
+define <2 x i64> @test13(<2 x i32> %a, <2 x i32> %b) nounwind {
+; CHECK-LABEL: test13:
+; CHECK: vabdl.u32 q
+ %zext1 = zext <2 x i32> %a to <2 x i64>
+ %zext2 = zext <2 x i32> %b to <2 x i64>
+ %diff = sub <2 x i64> %zext1, %zext2
+ %shift1 = ashr <2 x i64> %diff,<i64 63, i64 63>
+ %add1 = add <2 x i64> %shift1, %diff
+ %res = xor <2 x i64> %shift1, %add1
+ ret <2 x i64> %res
+}
diff --git a/test/CodeGen/ARM/neon_vshl_minint.ll b/test/CodeGen/ARM/neon_vshl_minint.ll
new file mode 100644
index 000000000000..769eff845fd6
--- /dev/null
+++ b/test/CodeGen/ARM/neon_vshl_minint.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-none-eabi -mcpu=cortex-a8 2>&1 | FileCheck %s
+
+define <1 x i64> @vshl_minint() #0 {
+ entry:
+ ; CHECK-LABEL: vshl_minint
+ ; CHECK: vldr
+ ; CHECK: vshl.u64
+ %vshl.i = tail call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> undef, <1 x i64> <i64 -9223372036854775808>)
+ ret <1 x i64> %vshl.i
+}
+
+declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
diff --git a/test/CodeGen/ARM/out-of-registers.ll b/test/CodeGen/ARM/out-of-registers.ll
index a83923db0b30..891319881f45 100644
--- a/test/CodeGen/ARM/out-of-registers.ll
+++ b/test/CodeGen/ARM/out-of-registers.ll
@@ -8,7 +8,7 @@ target triple = "thumbv7-none-linux-gnueabi"
define void @foo(float* nocapture %A) #0 {
%1= bitcast float* %A to i8*
- %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8* %1, i32 4)
+ %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32.p0i8(i8* %1, i32 4)
%3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0
%divp_vec = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %3
%4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 1
@@ -17,7 +17,7 @@ define void @foo(float* nocapture %A) #0 {
%div8p_vec = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %5
%6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 3
%div13p_vec = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %6
- tail call void @llvm.arm.neon.vst4.v4f32(i8* %1, <4 x float> %divp_vec, <4 x float> %div3p_vec, <4 x float> %div8p_vec, <4 x float> %div13p_vec, i32 4)
+ tail call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* %1, <4 x float> %divp_vec, <4 x float> %div3p_vec, <4 x float> %div8p_vec, <4 x float> %div13p_vec, i32 4)
ret void
}
@@ -27,8 +27,8 @@ declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) #1
; Function Attrs: nounwind readonly
; Function Attrs: nounwind
-declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) #1
-declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8*, i32) #2
+declare void @llvm.arm.neon.vst4.p0i8.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) #1
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32.p0i8(i8*, i32) #2
; Function Attrs: nounwind
diff --git a/test/CodeGen/ARM/pr25317.ll b/test/CodeGen/ARM/pr25317.ll
new file mode 100644
index 000000000000..6770c6f84ecd
--- /dev/null
+++ b/test/CodeGen/ARM/pr25317.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+; CHECK-LABEL: f:
+; CHECK: str lr, [r0]
+define void @f(i32* %p) {
+ call void asm sideeffect "str lr, $0", "=*o"(i32* %p)
+ ret void
+} \ No newline at end of file
diff --git a/test/CodeGen/ARM/pr25838.ll b/test/CodeGen/ARM/pr25838.ll
new file mode 100644
index 000000000000..0aa95fd2d720
--- /dev/null
+++ b/test/CodeGen/ARM/pr25838.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s
+; PR25838
+
+target triple = "armv7--linux-android"
+
+%0 = type { i32, i32 }
+
+define i32 @foo(%0* readonly) {
+ br i1 undef, label %12, label %2
+
+; <label>:2
+ %3 = trunc i64 undef to i32
+ %4 = icmp eq i32 undef, 0
+ br i1 %4, label %5, label %9
+
+; <label>:5
+ %6 = icmp slt i32 %3, 0
+ %7 = sub nsw i32 0, %3
+ %8 = select i1 %6, i32 %7, i32 %3
+ br label %12
+
+; <label>:9
+ br i1 undef, label %12, label %10
+
+; <label>:10
+ %11 = tail call i32 @bar(i32 undef)
+ unreachable
+
+; <label>:12
+ %13 = phi i32 [ %8, %5 ], [ 0, %1 ], [ undef, %9 ]
+ ret i32 %13
+}
+
+declare i32 @bar(i32)
diff --git a/test/CodeGen/ARM/rbit.ll b/test/CodeGen/ARM/rbit.ll
index 41f866fc8d2f..a2bfeca75526 100644
--- a/test/CodeGen/ARM/rbit.ll
+++ b/test/CodeGen/ARM/rbit.ll
@@ -18,3 +18,14 @@ entry:
}
declare i32 @llvm.arm.rbit(i32)
+
+declare i32 @llvm.bitreverse.i32(i32) readnone
+
+; CHECK-LABEL: rbit_generic
+; CHECK: rbit r0, r0
+define i32 @rbit_generic(i32 %t) {
+entry:
+ %rbit = call i32 @llvm.bitreverse.i32(i32 %t)
+ ret i32 %rbit
+}
+
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 507ee48cafc2..d02f5f883795 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -24,7 +24,7 @@ entry:
%2 = getelementptr inbounds %struct.int32x4_t, %struct.int32x4_t* %vT1ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
%3 = load <4 x i32>, <4 x i32>* %2, align 16 ; <<4 x i32>> [#uses=1]
%4 = bitcast i16* %i_ptr to i8* ; <i8*> [#uses=1]
- %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
+ %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
%6 = bitcast <8 x i16> %5 to <2 x double> ; <<2 x double>> [#uses=2]
%7 = extractelement <2 x double> %6, i32 0 ; <double> [#uses=1]
%8 = bitcast double %7 to <4 x i16> ; <<4 x i16>> [#uses=1]
@@ -40,7 +40,7 @@ entry:
%trunc_16 = trunc <4 x i32> %16 to <4 x i16>
%17 = shufflevector <4 x i16> %trunc_15, <4 x i16> %trunc_16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1]
%18 = bitcast i16* %o_ptr to i8* ; <i8*> [#uses=1]
- tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %18, <8 x i16> %17, i32 1)
ret void
}
@@ -60,17 +60,17 @@ entry:
%2 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %vT1ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
%3 = load <8 x i16>, <8 x i16>* %2, align 16 ; <<8 x i16>> [#uses=1]
%4 = bitcast i16* %i_ptr to i8* ; <i8*> [#uses=1]
- %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
+ %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
%6 = getelementptr inbounds i16, i16* %i_ptr, i32 8 ; <i16*> [#uses=1]
%7 = bitcast i16* %6 to i8* ; <i8*> [#uses=1]
- %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7, i32 1) ; <<8 x i16>> [#uses=1]
+ %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %7, i32 1) ; <<8 x i16>> [#uses=1]
%9 = mul <8 x i16> %1, %5 ; <<8 x i16>> [#uses=1]
%10 = mul <8 x i16> %3, %8 ; <<8 x i16>> [#uses=1]
%11 = bitcast i16* %o_ptr to i8* ; <i8*> [#uses=1]
- tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %11, <8 x i16> %9, i32 1)
%12 = getelementptr inbounds i16, i16* %o_ptr, i32 8 ; <i16*> [#uses=1]
%13 = bitcast i16* %12 to i8* ; <i8*> [#uses=1]
- tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %13, <8 x i16> %10, i32 1)
ret void
}
@@ -81,14 +81,14 @@ define <8 x i8> @t3(i8* %A, i8* %B) nounwind {
; CHECK: vmov r
; CHECK-NOT: vmov d
; CHECK: vst3.8
- %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+ %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
%tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 ; <<8 x i8>> [#uses=1]
%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 ; <<8 x i8>> [#uses=1]
%tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 1 ; <<8 x i8>> [#uses=1]
%tmp5 = sub <8 x i8> %tmp3, %tmp4
%tmp6 = add <8 x i8> %tmp2, %tmp3 ; <<8 x i8>> [#uses=1]
%tmp7 = mul <8 x i8> %tmp4, %tmp2
- tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7, i32 1)
+ tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7, i32 1)
ret <8 x i8> %tmp4
}
@@ -101,10 +101,10 @@ entry:
; CHECK-NOT: vmov
; CHECK: bne
%tmp1 = bitcast i32* %in to i8* ; <i8*> [#uses=1]
- %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+ %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8* %tmp1, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
%tmp3 = getelementptr inbounds i32, i32* %in, i32 8 ; <i32*> [#uses=1]
%tmp4 = bitcast i32* %tmp3 to i8* ; <i8*> [#uses=1]
- %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+ %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8* %tmp4, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
%tmp8 = bitcast i32* %out to i8* ; <i8*> [#uses=1]
br i1 undef, label %return1, label %return2
@@ -120,7 +120,7 @@ return1:
%tmp39 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
%tmp6 = add <4 x i32> %tmp52, %tmp ; <<4 x i32>> [#uses=1]
%tmp7 = add <4 x i32> %tmp57, %tmp39 ; <<4 x i32>> [#uses=1]
- tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7, i32 1)
+ tail call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7, i32 1)
ret void
return2:
@@ -131,7 +131,7 @@ return2:
%tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
%tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
%tmp102 = add <4 x i32> %tmp100, %tmp101 ; <<4 x i32>> [#uses=1]
- tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101, i32 1)
+ tail call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101, i32 1)
call void @llvm.trap()
unreachable
}
@@ -147,7 +147,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
; CHECK: vadd.i16
%tmp0 = bitcast i16* %A to i8* ; <i8*> [#uses=1]
%tmp1 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=2]
- %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2]
+ %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2]
%tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 ; <<8 x i16>> [#uses=1]
%tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 ; <<8 x i16>> [#uses=1]
%tmp5 = add <8 x i16> %tmp3, %tmp4 ; <<8 x i16>> [#uses=1]
@@ -160,7 +160,7 @@ define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
; CHECK: vorr d[[D0:[0-9]+]], d[[D1:[0-9]+]]
; CHECK-NEXT: vld2.8 {d[[D1]][1], d[[D0]][1]}
%tmp1 = load <8 x i8>, <8 x i8>* %B ; <<8 x i8>> [#uses=2]
- %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
+ %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
%tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1]
%tmp5 = add <8 x i8> %tmp3, %tmp4 ; <<8 x i8>> [#uses=1]
@@ -178,14 +178,14 @@ entry:
; CHECK: vuzp.32 q[[Q1]], q[[Q0]]
; CHECK: vst1.32
%0 = bitcast i32* %iptr to i8* ; <i8*> [#uses=2]
- %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+ %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
%tmp57 = extractvalue %struct.__neon_int32x4x2_t %1, 0 ; <<4 x i32>> [#uses=1]
%tmp60 = extractvalue %struct.__neon_int32x4x2_t %1, 1 ; <<4 x i32>> [#uses=1]
%2 = bitcast i32* %optr to i8* ; <i8*> [#uses=2]
- tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60, i32 1)
- %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0, i32 1) ; <<4 x i32>> [#uses=1]
+ tail call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60, i32 1)
+ %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* %0, i32 1) ; <<4 x i32>> [#uses=1]
%4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> ; <<4 x i32>> [#uses=1]
- tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %2, <4 x i32> %4, i32 1)
ret void
}
@@ -307,43 +307,43 @@ bb14: ; preds = %bb6
; This test crashes the coalescer because live variables were not updated properly.
define <8 x i8> @t11(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind {
- %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+ %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
%tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1]
- %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+ %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
%tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1]
%tmp2bd = add <8 x i8> zeroinitializer, %tmp2d ; <<8 x i8>> [#uses=1]
%tmp2abcd = mul <8 x i8> zeroinitializer, %tmp2bd ; <<8 x i8>> [#uses=1]
%tmp2ef = sub <8 x i8> zeroinitializer, %tmp2f ; <<8 x i8>> [#uses=1]
%tmp2efgh = mul <8 x i8> %tmp2ef, undef ; <<8 x i8>> [#uses=2]
- call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh, i32 1)
+ call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh, i32 1)
%tmp2 = sub <8 x i8> %tmp2efgh, %tmp2abcd ; <<8 x i8>> [#uses=1]
%tmp7 = mul <8 x i8> undef, %tmp2 ; <<8 x i8>> [#uses=1]
- tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7, i32 1)
+ tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7, i32 1)
ret <8 x i8> undef
}
-declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8*, i32) nounwind readonly
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly
declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4i32(i8*, <4 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.arm.neon.vst3.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
nounwind
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
-declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/rotate.ll b/test/CodeGen/ARM/rotate.ll
new file mode 100644
index 000000000000..f3f7de2160fb
--- /dev/null
+++ b/test/CodeGen/ARM/rotate.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=thumbv8--linux-gnueabihf | FileCheck %s
+
+;; This used to cause a backend crash about not being able to
+;; select ROTL. Make sure if generates the basic VSHL/VSHR.
+define <2 x i64> @testcase(<2 x i64>* %in) {
+; CHECK-LABEL: testcase
+; CHECK: vshl.i64
+; CHECK: vshr.u64
+ %1 = load <2 x i64>, <2 x i64>* %in
+ %2 = lshr <2 x i64> %1, <i64 8, i64 8>
+ %3 = shl <2 x i64> %1, <i64 56, i64 56>
+ %4 = or <2 x i64> %2, %3
+ ret <2 x i64> %4
+}
diff --git a/test/CodeGen/ARM/sat-arith.ll b/test/CodeGen/ARM/sat-arith.ll
new file mode 100644
index 000000000000..4844ed1bd21e
--- /dev/null
+++ b/test/CodeGen/ARM/sat-arith.ll
@@ -0,0 +1,63 @@
+; RUN: llc -O1 -mtriple=armv6-none-none-eabi %s -o - | FileCheck %s -check-prefix=ARM -check-prefix=CHECK
+; RUN: llc -O1 -mtriple=thumbv7-none-none-eabi %s -o - | FileCheck %s -check-prefix=THUMB -check-prefix=CHECK
+
+; CHECK-LABEL: qadd
+define i32 @qadd() nounwind {
+; CHECK-DAG: mov{{s?}} [[R0:.*]], #8
+; CHECK-DAG: mov{{s?}} [[R1:.*]], #128
+; CHECK-ARM: qadd [[R0]], [[R1]], [[R0]]
+; CHECK-THRUMB: qadd [[R0]], [[R0]], [[R1]]
+ %tmp = call i32 @llvm.arm.qadd(i32 128, i32 8)
+ ret i32 %tmp
+}
+
+; CHECK-LABEL: qsub
+define i32 @qsub() nounwind {
+; CHECK-DAG: mov{{s?}} [[R0:.*]], #8
+; CHECK-DAG: mov{{s?}} [[R1:.*]], #128
+; CHECK-ARM: qsub [[R0]], [[R1]], [[R0]]
+; CHECK-THRUMB: qadd [[R0]], [[R1]], [[R0]]
+ %tmp = call i32 @llvm.arm.qsub(i32 128, i32 8)
+ ret i32 %tmp
+}
+
+; upper-bound of the immediate argument
+; CHECK-LABEL: ssat1
+define i32 @ssat1() nounwind {
+; CHECK: mov{{s?}} [[R0:.*]], #128
+; CHECK: ssat [[R1:.*]], #32, [[R0]]
+ %tmp = call i32 @llvm.arm.ssat(i32 128, i32 32)
+ ret i32 %tmp
+}
+
+; lower-bound of the immediate argument
+; CHECK-LABEL: ssat2
+define i32 @ssat2() nounwind {
+; CHECK: mov{{s?}} [[R0:.*]], #128
+; CHECK: ssat [[R1:.*]], #1, [[R0]]
+ %tmp = call i32 @llvm.arm.ssat(i32 128, i32 1)
+ ret i32 %tmp
+}
+
+; upper-bound of the immediate argument
+; CHECK-LABEL: usat1
+define i32 @usat1() nounwind {
+; CHECK: mov{{s?}} [[R0:.*]], #128
+; CHECK: usat [[R1:.*]], #31, [[R0]]
+ %tmp = call i32 @llvm.arm.usat(i32 128, i32 31)
+ ret i32 %tmp
+}
+
+; lower-bound of the immediate argument
+; CHECK-LABEL: usat2
+define i32 @usat2() nounwind {
+; CHECK: mov{{s?}} [[R0:.*]], #128
+; CHECK: usat [[R1:.*]], #0, [[R0]]
+ %tmp = call i32 @llvm.arm.usat(i32 128, i32 0)
+ ret i32 %tmp
+}
+
+declare i32 @llvm.arm.qadd(i32, i32) nounwind
+declare i32 @llvm.arm.qsub(i32, i32) nounwind
+declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone
+declare i32 @llvm.arm.usat(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/sched-it-debug-nodes.ll b/test/CodeGen/ARM/sched-it-debug-nodes.ll
deleted file mode 100644
index 7906e7c7581e..000000000000
--- a/test/CodeGen/ARM/sched-it-debug-nodes.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; RUN: llc -mtriple thumbv7 -print-before=post-RA-sched -print-after=post-RA-sched %s -o /dev/null 2>&1 | FileCheck %s
-
-; ModuleID = '<stdin>'
-target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
-target triple = "thumbv7"
-
-%struct.s = type opaque
-
-; Function Attrs: nounwind
-define arm_aapcscc i32 @f(%struct.s* %s, i32 %u, i8* %b, i32 %n) #0 {
-entry:
- tail call void @llvm.dbg.value(metadata %struct.s* %s, i64 0, metadata !19, metadata !28), !dbg !29
- tail call void @llvm.dbg.value(metadata i32 %u, i64 0, metadata !20, metadata !28), !dbg !29
- tail call void @llvm.dbg.value(metadata i8* %b, i64 0, metadata !21, metadata !28), !dbg !29
- tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !22, metadata !28), !dbg !29
- %cmp = icmp ult i32 %n, 4, !dbg !30
- br i1 %cmp, label %return, label %if.end, !dbg !32
-
-if.end: ; preds = %entry
- tail call arm_aapcscc void @g(%struct.s* %s, i8* %b, i32 %n) #3, !dbg !33
- br label %return, !dbg !34
-
-return: ; preds = %entry, %if.end
- %retval.0 = phi i32 [ 0, %if.end ], [ -1, %entry ]
- ret i32 %retval.0, !dbg !35
-}
-
-; NOTE: This is checking that the register in the DEBUG_VALUE node is not
-; accidentally being marked as KILL. The DBG_VALUE node gets introduced in
-; If-Conversion, and gets bundled into the IT block. The Post RA Scheduler
-; attempts to schedule the Machine Instr, and tries to tag the register in the
-; debug value as KILL'ed, resulting in a DEBUG_VALUE node changing codegen! (or
-; hopefully, triggering an assert).
-
-; CHECK: BUNDLE %ITSTATE<imp-def,dead>
-; CHECK: * DBG_VALUE %R1, %noreg, !"u"
-; CHECK-NOT: * DBG_VALUE %R1<kill>, %noreg, !"u"
-
-declare arm_aapcscc void @g(%struct.s*, i8*, i32) #1
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!23, !24, !25, !26}
-!llvm.ident = !{!27}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (llvm/trunk 237059)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "<stdin>", directory: "/Users/compnerd/Source/llvm")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "f", scope: !5, file: !5, line: 9, type: !6, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, function: i32 (%struct.s*, i32, i8*, i32)* @f, variables: !18)
-!5 = !DIFile(filename: "<stdin>", directory: "/Users/compnerd/Source/llvm")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8, !9, !12, !13, !17}
-!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 32, align: 32)
-!10 = !DIDerivedType(tag: DW_TAG_typedef, name: "s", file: !5, line: 5, baseType: !11)
-!11 = !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !5, line: 5, flags: DIFlagFwdDecl)
-!12 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 32, align: 32)
-!14 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !15)
-!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", file: !5, line: 2, baseType: !16)
-!16 = !DIBasicType(name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
-!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: !5, line: 3, baseType: !12)
-!18 = !{!19, !20, !21, !22}
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", arg: 1, scope: !4, file: !5, line: 9, type: !9)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "u", arg: 2, scope: !4, file: !5, line: 9, type: !12)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 3, scope: !4, file: !5, line: 9, type: !13)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "n", arg: 4, scope: !4, file: !5, line: 9, type: !17)
-!23 = !{i32 2, !"Dwarf Version", i32 4}
-!24 = !{i32 2, !"Debug Info Version", i32 3}
-!25 = !{i32 1, !"wchar_size", i32 4}
-!26 = !{i32 1, !"min_enum_size", i32 4}
-!27 = !{!"clang version 3.7.0 (llvm/trunk 237059)"}
-!28 = !DIExpression()
-!29 = !DILocation(line: 9, scope: !4)
-!30 = !DILocation(line: 10, scope: !31)
-!31 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10)
-!32 = !DILocation(line: 10, scope: !4)
-!33 = !DILocation(line: 13, scope: !4)
-!34 = !DILocation(line: 14, scope: !4)
-!35 = !DILocation(line: 15, scope: !4)
diff --git a/test/CodeGen/ARM/setjmp_longjmp.ll b/test/CodeGen/ARM/setjmp_longjmp.ll
new file mode 100644
index 000000000000..7100175a97a4
--- /dev/null
+++ b/test/CodeGen/ARM/setjmp_longjmp.ll
@@ -0,0 +1,113 @@
+; RUN: llc %s -o - | FileCheck %s
+target triple = "armv7-apple-ios"
+
+declare i32 @llvm.eh.sjlj.setjmp(i8*)
+declare void @llvm.eh.sjlj.longjmp(i8*)
+@g = external global i32
+
+declare void @may_throw()
+declare i32 @__gxx_personality_sj0(...)
+declare i8* @__cxa_begin_catch(i8*)
+declare void @__cxa_end_catch()
+declare i32 @llvm.eh.typeid.for(i8*)
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.stacksave()
+@_ZTIPKc = external constant i8*
+
+; CHECK-LABEL: foobar
+;
+; setjmp sequence:
+; CHECK: add [[PCREG:r[0-9]+]], pc, #8
+; CHECK-NEXT: str [[PCREG]], {{\[}}[[BUFREG:r[0-9]+]], #4]
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: add pc, pc, #0
+; CHECK-NEXT: mov r0, #1
+;
+; longjmp sequence:
+; CHECK: ldr sp, [{{\s*}}[[BUFREG:r[0-9]+]], #8]
+; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4]
+; CHECK-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}}
+; CHECK-NEXT: bx [[DESTREG]]
+define void @foobar() {
+entry:
+ %buf = alloca [5 x i8*], align 4
+ %arraydecay = getelementptr inbounds [5 x i8*], [5 x i8*]* %buf, i32 0, i32 0
+ %bufptr = bitcast i8** %arraydecay to i8*
+ ; Note: This is simplified, in reality you have to store the framepointer +
+ ; stackpointer in the buffer as well for this to be legal!
+ %setjmpres = call i32 @llvm.eh.sjlj.setjmp(i8* %bufptr)
+ %tobool = icmp ne i32 %setjmpres, 0
+ br i1 %tobool, label %if.then, label %if.else
+
+if.then:
+ store volatile i32 1, i32* @g, align 4
+ br label %if.end
+
+if.else:
+ store volatile i32 0, i32* @g, align 4
+ call void @llvm.eh.sjlj.longjmp(i8* %bufptr)
+ unreachable
+
+if.end:
+ ret void
+}
+
+; CHECK-LABEL: combine_sjlj_eh_and_setjmp_longjmp
+; Check that we can mix sjlj exception handling with __builtin_setjmp
+; and __builtin_longjmp.
+;
+; setjmp sequence:
+; CHECK: add [[PCREG:r[0-9]+]], pc, #8
+; CHECK-NEXT: str [[PCREG]], {{\[}}[[BUFREG:r[0-9]+]], #4]
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: add pc, pc, #0
+; CHECK-NEXT: mov r0, #1
+;
+; longjmp sequence:
+; CHECK: ldr sp, [{{\s*}}[[BUFREG:r[0-9]+]], #8]
+; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4]
+; CHECK-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}}
+; CHECK-NEXT: bx [[DESTREG]]
+define void @combine_sjlj_eh_and_setjmp_longjmp() personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) {
+entry:
+ %buf = alloca [5 x i8*], align 4
+ invoke void @may_throw() to label %try.cont unwind label %lpad
+
+lpad:
+ %0 = landingpad { i8*, i32 } catch i8* bitcast (i8** @_ZTIPKc to i8*)
+ %1 = extractvalue { i8*, i32 } %0, 1
+ %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIPKc to i8*)) #3
+ %matches = icmp eq i32 %1, %2
+ br i1 %matches, label %catch, label %eh.resume
+
+catch:
+ %3 = extractvalue { i8*, i32 } %0, 0
+ %4 = tail call i8* @__cxa_begin_catch(i8* %3) #3
+ store volatile i32 0, i32* @g, align 4
+ %5 = bitcast [5 x i8*]* %buf to i8*
+ %arraydecay = getelementptr inbounds [5 x i8*], [5 x i8*]* %buf, i64 0, i64 0
+ %6 = tail call i8* @llvm.frameaddress(i32 0)
+ store i8* %6, i8** %arraydecay, align 16
+ %7 = tail call i8* @llvm.stacksave()
+ %8 = getelementptr [5 x i8*], [5 x i8*]* %buf, i64 0, i64 2
+ store i8* %7, i8** %8, align 16
+ %9 = call i32 @llvm.eh.sjlj.setjmp(i8* %5)
+ %tobool = icmp eq i32 %9, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then:
+ store volatile i32 2, i32* @g, align 4
+ call void @__cxa_end_catch() #3
+ br label %try.cont
+
+if.else:
+ store volatile i32 1, i32* @g, align 4
+ call void @llvm.eh.sjlj.longjmp(i8* %5)
+ unreachable
+
+eh.resume:
+ resume { i8*, i32 } %0
+
+try.cont:
+ ret void
+}
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 6f5c0e8279a9..5d44eb0f11d1 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -1,14 +1,14 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
-; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-ARM
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-ARM
+; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMB
; rdar://8576755
define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
-; A8-LABEL: test1:
-; A8: add r0, r0, r1, lsl r2
-
-; A9-LABEL: test1:
-; A9: add r0, r0, r1, lsl r2
+; CHECK-LABEL: test1:
+; CHECK-ARM: add r0, r0, r1, lsl r2
+; CHECK-THUMB: lsls r1, r2
+; CHECK-THUMB: add r0, r1
%shift.upgrd.1 = zext i8 %sh to i32
%A = shl i32 %Y, %shift.upgrd.1
%B = add i32 %X, %A
@@ -16,11 +16,10 @@ define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
}
define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
-; A8-LABEL: test2:
-; A8: bic r0, r0, r1, asr r2
-
-; A9-LABEL: test2:
-; A9: bic r0, r0, r1, asr r2
+; CHECK-LABEL: test2:
+; CHECK-ARM: bic r0, r0, r1, asr r2
+; CHECK-THUMB: asrs r1, r2
+; CHECK-THUMB: bics r0, r1
%shift.upgrd.2 = zext i8 %sh to i32
%A = ashr i32 %Y, %shift.upgrd.2
%B = xor i32 %A, -1
@@ -30,14 +29,9 @@ define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
define i32 @test3(i32 %base, i32 %base2, i32 %offset) {
entry:
-; A8-LABEL: test3:
-; A8: ldr r0, [r0, r2, lsl #2]
-; A8: ldr r1, [r1, r2, lsl #2]
-
-; lsl #2 is free
-; A9-LABEL: test3:
-; A9: ldr r0, [r0, r2, lsl #2]
-; A9: ldr r1, [r1, r2, lsl #2]
+; CHECK-LABEL: test3:
+; CHECK: ldr{{(.w)?}} r0, [r0, r2, lsl #2]
+; CHECK: ldr{{(.w)?}} r1, [r1, r2, lsl #2]
%tmp1 = shl i32 %offset, 2
%tmp2 = add i32 %base, %tmp1
%tmp3 = inttoptr i32 %tmp2 to i32*
@@ -53,17 +47,11 @@ declare i8* @malloc(...)
define fastcc void @test4(i16 %addr) nounwind {
entry:
-; A8-LABEL: test4:
-; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A8-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
-; A8: str [[REG]], [r0, r1, lsl #2]
-; A8-NOT: str [[REG]], [r0]
-
-; A9-LABEL: test4:
-; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A9-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
-; A9: str [[REG]], [r0, r1, lsl #2]
-; A9-NOT: str [[REG]], [r0]
+; CHECK-LABEL: test4:
+; CHECK: ldr{{(.w)?}} [[REG:r[0-9]+]], [r0, r1, lsl #2]
+; CHECK-NOT: ldr{{(.w)?}} [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; CHECK: str{{(.w)?}} [[REG]], [r0, r1, lsl #2]
+; CHECK-NOT: str{{(.w)?}} [[REG]], [r0]
%0 = tail call i8* (...) @malloc(i32 undef) nounwind
%1 = bitcast i8* %0 to i32*
%2 = sext i16 %addr to i32
@@ -73,3 +61,181 @@ entry:
store i32 %5, i32* %3, align 4
ret void
}
+
+define i32 @test_orr_extract_from_mul_1(i32 %x, i32 %y) {
+entry:
+; CHECK-LABEL: test_orr_extract_from_mul_1
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-ARM: orr r0, r1, r0
+; CHECK-THUMB: muls r1, r2, r1
+; CHECk-THUMB: orrs r0, r1
+ %mul = mul i32 %y, 63767
+ %or = or i32 %mul, %x
+ ret i32 %or
+}
+
+define i32 @test_orr_extract_from_mul_2(i32 %x, i32 %y) {
+; CHECK-LABEL: test_orr_extract_from_mul_2
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #1
+entry:
+ %mul1 = mul i32 %y, 127534
+ %or = or i32 %mul1, %x
+ ret i32 %or
+}
+
+define i32 @test_orr_extract_from_mul_3(i32 %x, i32 %y) {
+; CHECK-LABEL: test_orr_extract_from_mul_3
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #2
+entry:
+ %mul1 = mul i32 %y, 255068
+ %or = or i32 %mul1, %x
+ ret i32 %or
+}
+
+define i32 @test_orr_extract_from_mul_4(i32 %x, i32 %y) {
+; CHECK-LABEL: test_orr_extract_from_mul_4
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #3
+entry:
+ %mul1 = mul i32 %y, 510136
+ %or = or i32 %mul1, %x
+ ret i32 %or
+}
+
+define i32 @test_orr_extract_from_mul_5(i32 %x, i32 %y) {
+; CHECK-LABEL: test_orr_extract_from_mul_5
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #4
+entry:
+ %mul1 = mul i32 %y, 1020272
+ %or = or i32 %mul1, %x
+ ret i32 %or
+}
+
+define i32 @test_orr_extract_from_mul_6(i32 %x, i32 %y) {
+; CHECK-LABEL: test_orr_extract_from_mul_6
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #16
+entry:
+ %mul = mul i32 %y, -115933184
+ %or = or i32 %mul, %x
+ ret i32 %or
+}
+
+define i32 @test_load_extract_from_mul_1(i8* %x, i32 %y) {
+; CHECK-LABEL: test_load_extract_from_mul_1
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: ldrb r0, [r0, r1]
+entry:
+ %mul = mul i32 %y, 63767
+ %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+define i32 @test_load_extract_from_mul_2(i8* %x, i32 %y) {
+; CHECK-LABEL: test_load_extract_from_mul_2
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: ldrb{{(.w)?}} r0, [r0, r1, lsl #1]
+entry:
+ %mul1 = mul i32 %y, 127534
+ %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+define i32 @test_load_extract_from_mul_3(i8* %x, i32 %y) {
+; CHECK-LABEL: test_load_extract_from_mul_3
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: ldrb{{(.w)?}} r0, [r0, r1, lsl #2]
+entry:
+ %mul1 = mul i32 %y, 255068
+ %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+define i32 @test_load_extract_from_mul_4(i8* %x, i32 %y) {
+; CHECK-LABEL: test_load_extract_from_mul_4
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: ldrb{{(.w)?}} r0, [r0, r1, lsl #3]
+entry:
+ %mul1 = mul i32 %y, 510136
+ %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+define i32 @test_load_extract_from_mul_5(i8* %x, i32 %y) {
+; CHECK-LABEL: test_load_extract_from_mul_5
+; CHECK-ARM: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-ARM: ldrb r0, [r0, r1, lsl #4]
+; CHECK-THUMB: movw r2, #37232
+; CHECK-THUMB: movt r2, #15
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK-THUMB: ldrb r0, [r0, r1]
+entry:
+ %mul1 = mul i32 %y, 1020272
+ %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+define i32 @test_load_extract_from_mul_6(i8* %x, i32 %y) {
+; CHECK-LABEL: test_load_extract_from_mul_6
+; CHECK-ARM: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-ARM: ldrb r0, [r0, r1, lsl #16]
+; CHECK-THUMB: movs r2, #0
+; CHECK-THUMB: movt r2, #63767
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK-THUMB: ldrb r0, [r0, r1]
+entry:
+ %mul = mul i32 %y, -115933184
+ %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+
+define void @test_well_formed_dag(i32 %in1, i32 %in2, i32* %addr) {
+; CHECK-LABEL: test_well_formed_dag:
+; CHECK-ARM: movw [[SMALL_CONST:r[0-9]+]], #675
+; CHECK-ARM: mul [[SMALL_PROD:r[0-9]+]], r0, [[SMALL_CONST]]
+; CHECK-ARM: add {{r[0-9]+}}, r1, [[SMALL_PROD]], lsl #7
+
+ %mul.small = mul i32 %in1, 675
+ store i32 %mul.small, i32* %addr
+ %mul.big = mul i32 %in1, 86400
+ %add = add i32 %in2, %mul.big
+ store i32 %add, i32* %addr
+ ret void
+}
diff --git a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
index c7f47b0962dc..a1abef9605ca 100644
--- a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
+++ b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
@@ -77,8 +77,8 @@ declare void @terminatev()
; CHECK: blx __Znwm
; CHECK: {{.*}}@ %entry.do.body.i.i.i_crit_edge
; CHECK: str r0, [sp, [[OFFSET:#[0-9]+]]]
-; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
; CHECK: {{.*}}@ %do.body.i.i.i
+; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
; CHECK: cbz [[R0]]
%"class.std::__1::basic_string" = type { %"class.std::__1::__compressed_pair" }
diff --git a/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll b/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll
index 5d015738623a..b44b447b3dff 100644
--- a/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll
+++ b/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll
@@ -2,6 +2,7 @@
; RUN: llc -mtriple=armv7-apple-ios -O1 < %s | FileCheck %s
; RUN: llc -mtriple=armv7-apple-ios -O2 < %s | FileCheck %s
; RUN: llc -mtriple=armv7-apple-ios -O3 < %s | FileCheck %s
+; RUN: llc -mtriple=armv7k-apple-ios < %s | FileCheck %s
; SjLjEHPrepare shouldn't crash when lowering empty structs.
;
diff --git a/test/CodeGen/ARM/softfp-fabs-fneg.ll b/test/CodeGen/ARM/softfp-fabs-fneg.ll
new file mode 100644
index 000000000000..b608fb840218
--- /dev/null
+++ b/test/CodeGen/ARM/softfp-fabs-fneg.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=armv7 < %s | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK
+; RUN: llc -mtriple=thumbv7 < %s | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--"
+
+define double @f(double %a) {
+ ; CHECK-LABEL: f:
+ ; CHECK: bfc r1, #31, #1
+ ; CHECK-NEXT: bx lr
+ %x = call double @llvm.fabs.f64(double %a) readnone
+ ret double %x
+}
+
+define float @g(float %a) {
+ ; CHECK-LABEL: g:
+ ; CHECK-THUMB: bic r0, r0, #-2147483648
+ ; CHECK-ARM: bfc r0, #31, #1
+ ; CHECK-NEXT: bx lr
+ %x = call float @llvm.fabs.f32(float %a) readnone
+ ret float %x
+}
+
+define double @h(double %a) {
+ ; CHECK-LABEL: h:
+ ; CHECK: eor r1, r1, #-2147483648
+ ; CHECK-NEXT: bx lr
+ %x = fsub nsz double -0.0, %a
+ ret double %x
+}
+
+define float @i(float %a) {
+ ; CHECK-LABEL: i:
+ ; CHECK: eor r0, r0, #-2147483648
+ ; CHECK-NEXT: bx lr
+ %x = fsub nsz float -0.0, %a
+ ret float %x
+}
+
+declare double @llvm.fabs.f64(double) readnone
+declare float @llvm.fabs.f32(float) readnone
diff --git a/test/CodeGen/ARM/special-reg-mcore.ll b/test/CodeGen/ARM/special-reg-mcore.ll
index 686da0f6b839..45e6db9e78fe 100644
--- a/test/CodeGen/ARM/special-reg-mcore.ll
+++ b/test/CodeGen/ARM/special-reg-mcore.ll
@@ -3,7 +3,7 @@
; RUN: not llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8 2>&1 | FileCheck %s --check-prefix=ACORE
; ACORE: LLVM ERROR: Invalid register name "control".
-; M3CORE: LLVM ERROR: Invalid register name "control".
+; M3CORE: LLVM ERROR: Invalid register name "xpsr_nzcvqg".
define i32 @read_mclass_registers() nounwind {
entry:
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index 1a102e3d971f..845018ebb0e7 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -7,7 +7,7 @@
%quux = type { i32 (...)**, %baz*, i32 }
%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8*, i32) nounwind readonly
define void @aaa(%quuz* %this, i8* %block) {
; CHECK-LABEL: aaa:
@@ -18,30 +18,30 @@ entry:
%aligned_vec = alloca <4 x float>, align 16
%"alloca point" = bitcast i32 0 to i32
%vecptr = bitcast <4 x float>* %aligned_vec to i8*
- %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind ; <<4 x float>> [#uses=1]
+ %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %vecptr, i32 1) nounwind ; <<4 x float>> [#uses=1]
store float 6.300000e+01, float* undef, align 4
- %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+ %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
store float 0.000000e+00, float* undef, align 4
- %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
- %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+ %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
%val173 = load <4 x float>, <4 x float>* undef ; <<4 x float>> [#uses=1]
br label %bb4
diff --git a/test/CodeGen/ARM/ssat-lower.ll b/test/CodeGen/ARM/ssat-lower.ll
new file mode 100644
index 000000000000..9f0cd0364bcf
--- /dev/null
+++ b/test/CodeGen/ARM/ssat-lower.ll
@@ -0,0 +1,11 @@
+; RUN: not llc < %s -O1 -mtriple=armv6-none-none-eabi 2>&1 | FileCheck %s
+; RUN: not llc < %s -O1 -mtriple=thumbv7-none-none-eabi 2>&1 | FileCheck %s
+
+; immediate argument < lower-bound
+; CHECK: LLVM ERROR: Cannot select: intrinsic %llvm.arm.ssat
+define i32 @ssat1() nounwind {
+ %tmp = call i32 @llvm.arm.ssat(i32 128, i32 0)
+ ret i32 %tmp
+}
+
+declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/ssat-upper.ll b/test/CodeGen/ARM/ssat-upper.ll
new file mode 100644
index 000000000000..e53f82b3efa3
--- /dev/null
+++ b/test/CodeGen/ARM/ssat-upper.ll
@@ -0,0 +1,11 @@
+; RUN: not llc < %s -O1 -mtriple=armv6-none-none-eabi 2>&1 | FileCheck %s
+; RUN: not llc < %s -O1 -mtriple=thumbv7-none-none-eabi 2>&1 | FileCheck %s
+
+; immediate argument > upper-bound
+; CHECK: LLVM ERROR: Cannot select: intrinsic %llvm.arm.ssat
+define i32 @ssat1() nounwind {
+ %tmp = call i32 @llvm.arm.ssat(i32 128, i32 33)
+ ret i32 %tmp
+}
+
+declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/subtarget-no-movt.ll b/test/CodeGen/ARM/subtarget-no-movt.ll
new file mode 100644
index 000000000000..cb61bde3f9c0
--- /dev/null
+++ b/test/CodeGen/ARM/subtarget-no-movt.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march thumb -mcpu=cortex-a8 -relocation-model=static %s -o - | FileCheck -check-prefix=NO-OPTION %s
+; RUN: llc -march thumb -mcpu=cortex-a8 -relocation-model=static %s -o - -mattr=-no-movt | FileCheck -check-prefix=USE-MOVT %s
+; RUN: llc -march thumb -mcpu=cortex-a8 -relocation-model=static %s -o - -mattr=+no-movt | FileCheck -check-prefix=NO-USE-MOVT %s
+; RUN: llc -march thumb -mcpu=cortex-a8 -relocation-model=static %s -o - -O0 | FileCheck -check-prefix=NO-OPTION %s
+; RUN: llc -march thumb -mcpu=cortex-a8 -relocation-model=static %s -o - -O0 -mattr=-no-movt | FileCheck -check-prefix=USE-MOVT %s
+; RUN: llc -march thumb -mcpu=cortex-a8 -relocation-model=static %s -o - -O0 -mattr=+no-movt | FileCheck -check-prefix=NO-USE-MOVT %s
+
+; NO-OPTION-LABEL: {{_?}}foo0
+; NO-OPTION: ldr [[R0:r[0-9]+]], [[L0:.*]]
+; NO-OPTION: [[L0]]:
+; NO-OPTION: .long 2296237089
+
+; USE-MOVT-LABEL: {{_?}}foo0
+; USE-MOVT: movw [[R0:r[0-9]+]], #52257
+; USE-MOVT: movt [[R0]], #35037
+
+; NO-USE-MOVT-LABEL: {{_?}}foo0
+; NO-USE-MOVT: ldr [[R0:r[0-9]+]], [[L0:.*]]
+; NO-USE-MOVT: [[L0]]:
+; NO-USE-MOVT: .long 2296237089
+
+define i32 @foo0(i32 %a) #0 {
+ %1 = xor i32 -1998730207, %a
+ ret i32 %1
+}
+
+; NO-OPTION-LABEL: {{_?}}foo1
+; NO-OPTION: movw [[R0:r[0-9]+]], #52257
+; NO-OPTION: movt [[R0]], #35037
+
+; USE-MOVT-LABEL: {{_?}}foo1
+; USE-MOVT: movw [[R0:r[0-9]+]], #52257
+; USE-MOVT: movt [[R0]], #35037
+
+; NO-USE-MOVT-LABEL: {{_?}}foo1
+; NO-USE-MOVT: ldr [[R0:r[0-9]+]], [[L0:.*]]
+; NO-USE-MOVT: [[L0]]:
+; NO-USE-MOVT: .long 2296237089
+
+define i32 @foo1(i32 %a) {
+ %1 = xor i32 -1998730207, %a
+ ret i32 %1
+}
+
+attributes #0 = { "target-features"="+no-movt" }
diff --git a/test/CodeGen/ARM/tail-merge-branch-weight.ll b/test/CodeGen/ARM/tail-merge-branch-weight.ll
index 95b0a202e7ff..f83f28815793 100644
--- a/test/CodeGen/ARM/tail-merge-branch-weight.ll
+++ b/test/CodeGen/ARM/tail-merge-branch-weight.ll
@@ -9,7 +9,7 @@
; = 0.2 * 0.4 + 0.8 * 0.7 = 0.64
; CHECK: # Machine code for function test0:
-; CHECK: Successors according to CFG: BB#{{[0-9]+}}(13) BB#{{[0-9]+}}(24)
+; CHECK: Successors according to CFG: BB#{{[0-9]+}}({{[0-9a-fx/= ]+}}20.00%) BB#{{[0-9]+}}({{[0-9a-fx/= ]+}}80.00%)
; CHECK: BB#{{[0-9]+}}:
; CHECK: BB#{{[0-9]+}}:
; CHECK: # End machine code for function test0.
diff --git a/test/CodeGen/ARM/taildup-branch-weight.ll b/test/CodeGen/ARM/taildup-branch-weight.ll
index 64e0f4bcdefc..6f8d245e74a0 100644
--- a/test/CodeGen/ARM/taildup-branch-weight.ll
+++ b/test/CodeGen/ARM/taildup-branch-weight.ll
@@ -3,7 +3,7 @@
; RUN: | FileCheck %s
; CHECK: Machine code for function test0:
-; CHECK: Successors according to CFG: BB#1(4) BB#2(124)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}3.12%) BB#2({{[0-9a-fx/= ]+}}96.88%)
define void @test0(i32 %a, i32 %b, i32* %c, i32* %d) {
entry:
@@ -30,7 +30,7 @@ B4:
!0 = !{!"branch_weights", i32 4, i32 124}
; CHECK: Machine code for function test1:
-; CHECK: Successors according to CFG: BB#1(8) BB#2(248)
+; CHECK: Successors according to CFG: BB#2(0x7c000000 / 0x80000000 = 96.88%) BB#1(0x04000000 / 0x80000000 = 3.12%)
@g0 = common global i32 0, align 4
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll
index 377996c4c3c8..db32f18d82c0 100644
--- a/test/CodeGen/ARM/test-sharedidx.ll
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -20,8 +20,8 @@ entry:
for.body: ; preds = %entry, %for.body.3
; CHECK: %for.body
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
%i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.09
%0 = load i8, i8* %arrayidx, align 1
@@ -42,8 +42,8 @@ for.end: ; preds = %for.body, %for.body
for.body.1: ; preds = %for.body
; CHECK: %for.body.1
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
%arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %add5
%2 = load i8, i8* %arrayidx.1, align 1
%conv6.1 = zext i8 %2 to i32
@@ -59,9 +59,6 @@ for.body.1: ; preds = %for.body
br i1 %cmp.1, label %for.body.2, label %for.end
for.body.2: ; preds = %for.body.1
-; CHECK: %for.body.2
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
%arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
%4 = load i8, i8* %arrayidx.2, align 1
%conv6.2 = zext i8 %4 to i32
@@ -78,8 +75,8 @@ for.body.2: ; preds = %for.body.1
for.body.3: ; preds = %for.body.2
; CHECK: %for.body.3
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
%arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %add5.2
%6 = load i8, i8* %arrayidx.3, align 1
%conv6.3 = zext i8 %6 to i32
diff --git a/test/CodeGen/ARM/thumb-alignment.ll b/test/CodeGen/ARM/thumb-alignment.ll
index c11d4b6da3c9..b9ddfbb714d1 100644
--- a/test/CodeGen/ARM/thumb-alignment.ll
+++ b/test/CodeGen/ARM/thumb-alignment.ll
@@ -23,7 +23,7 @@ define i32* @bar() {
; CHECK: .globl baz
; CHECK-NEXT: .align 2
-; CHECK: adr.w
+; CHECK: tbb
define i32 @baz() {
%1 = load i32, i32* @c, align 4
switch i32 %1, label %7 [
diff --git a/test/CodeGen/ARM/thumb1-ldst-opt.ll b/test/CodeGen/ARM/thumb1-ldst-opt.ll
new file mode 100644
index 000000000000..eb82385de0c5
--- /dev/null
+++ b/test/CodeGen/ARM/thumb1-ldst-opt.ll
@@ -0,0 +1,27 @@
+; RUN: llc -stop-after block-placement -o /dev/null %s | FileCheck %s
+
+target triple = "thumbv6m-none-none"
+
+define i32* @foo(i32* readonly %p0) {
+entry:
+ %add.ptr = getelementptr inbounds i32, i32* %p0, i32 10
+ %arrayidx = getelementptr inbounds i32, i32* %p0, i32 13
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %p0, i32 12
+ %1 = load i32, i32* %arrayidx1, align 4
+ %add = add nsw i32 %1, %0
+ %arrayidx2 = getelementptr inbounds i32, i32* %p0, i32 11
+ %2 = load i32, i32* %arrayidx2, align 4
+ %add3 = add nsw i32 %add, %2
+ %3 = load i32, i32* %add.ptr, align 4
+ %add5 = add nsw i32 %add3, %3
+ tail call void @g(i32 %add5)
+ ret i32* %p0
+}
+
+declare void @g(i32)
+
+; CHECK-LABEL: name: foo
+; CHECK: [[BASE:%r[0-7]]], {{.*}} tADDi8
+; CHECK-NOT: [[BASE]] = tLDMIA_UPD {{.*}} [[BASE]]
+; CHECK: tLDMIA killed [[BASE]], {{.*}} def [[BASE]]
diff --git a/test/CodeGen/ARM/thumb1_return_sequence.ll b/test/CodeGen/ARM/thumb1_return_sequence.ll
index 9c62faeaa684..67d1cad2cf68 100644
--- a/test/CodeGen/ARM/thumb1_return_sequence.ll
+++ b/test/CodeGen/ARM/thumb1_return_sequence.ll
@@ -23,11 +23,22 @@ entry:
; --------
; CHECK-V4T: add sp,
; CHECK-V4T-NEXT: pop {[[SAVED]]}
-; CHECK-V4T-NEXT: mov r12, r3
-; CHECK-V4T-NEXT: pop {r3}
-; CHECK-V4T-NEXT: mov lr, r3
-; CHECK-V4T-NEXT: mov r3, r12
-; CHECK-V4T: bx lr
+; The ISA for v4 does not support pop pc, so make sure we do not emit
+; one even when we do not need to update SP.
+; CHECK-V4T-NOT: pop {pc}
+; We may only use lo register to pop, but in that case, all the scratch
+; ones are used.
+; r12 is the only register we are allowed to clobber for AAPCS.
+; Use it to save a lo register.
+; CHECK-V4T-NEXT: mov [[TEMP_REG:r12]], [[POP_REG:r[0-7]]]
+; Pop the value of LR.
+; CHECK-V4T-NEXT: pop {[[POP_REG]]}
+; Copy the value of LR in the right register.
+; CHECK-V4T-NEXT: mov lr, [[POP_REG]]
+; Restore the value that was in the register we used to pop the value of LR.
+; CHECK-V4T-NEXT: mov [[POP_REG]], [[TEMP_REG]]
+; Return.
+; CHECK-V4T-NEXT: bx lr
; CHECK-V5T: pop {[[SAVED]], pc}
}
@@ -53,19 +64,19 @@ entry:
; Epilogue
; --------
; CHECK-V4T: pop {[[SAVED]]}
-; CHECK-V4T-NEXT: mov r12, r3
-; CHECK-V4T-NEXT: pop {r3}
+; CHECK-V4T-NEXT: mov r12, [[POP_REG:r[0-7]]]
+; CHECK-V4T-NEXT: pop {[[POP_REG]]}
; CHECK-V4T-NEXT: add sp,
-; CHECK-V4T-NEXT: mov lr, r3
-; CHECK-V4T-NEXT: mov r3, r12
+; CHECK-V4T-NEXT: mov lr, [[POP_REG]]
+; CHECK-V4T-NEXT: mov [[POP_REG]], r12
; CHECK-V4T: bx lr
; CHECK-V5T: add sp,
; CHECK-V5T-NEXT: pop {[[SAVED]]}
-; CHECK-V5T-NEXT: mov r12, r3
-; CHECK-V5T-NEXT: pop {r3}
+; CHECK-V5T-NEXT: mov r12, [[POP_REG:r[0-7]]]
+; CHECK-V5T-NEXT: pop {[[POP_REG]]}
; CHECK-V5T-NEXT: add sp,
-; CHECK-V5T-NEXT: mov lr, r3
-; CHECK-V5T-NEXT: mov r3, r12
+; CHECK-V5T-NEXT: mov lr, [[POP_REG]]
+; CHECK-V5T-NEXT: mov [[POP_REG]], r12
; CHECK-V5T-NEXT: bx lr
}
@@ -95,8 +106,13 @@ entry:
; Epilogue
; --------
; CHECK-V4T: pop {[[SAVED]]}
-; CHECK-V4T: pop {r3}
-; CHECK-V4T: bx r3
+; The ISA for v4 does not support pop pc, so make sure we do not emit
+; one even when we do not need to update SP.
+; CHECK-V4T-NOT: pop {pc}
+; Pop the value of LR into a scratch lo register other than r0 (it is
+; used for the return value).
+; CHECK-V4T-NEXT: pop {[[POP_REG:r[1-3]]]}
+; CHECK-V4T-NEXT: bx [[POP_REG]]
; CHECK-V5T: pop {[[SAVED]], pc}
}
@@ -148,14 +164,18 @@ entry:
; --------
; CHECK-V4T: add sp,
; CHECK-V4T-NEXT: pop {[[SAVED]]}
-; CHECK-V4T-NEXT: pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V4T-NEXT: pop {[[POP_REG:r[1-3]]]}
; CHECK-V4T-NEXT: add sp,
-; CHECK-V4T-NEXT: bx r3
+; CHECK-V4T-NEXT: bx [[POP_REG]]
; CHECK-V5T: add sp,
; CHECK-V5T-NEXT: pop {[[SAVED]]}
-; CHECK-V5T-NEXT: pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V5T-NEXT: pop {[[POP_REG:r[1-3]]]}
; CHECK-V5T-NEXT: add sp,
-; CHECK-V5T-NEXT: bx r3
+; CHECK-V5T-NEXT: bx [[POP_REG]]
}
; CHECK-V4T-LABEL: noframe
@@ -191,13 +211,17 @@ entry:
; Epilogue
; --------
; CHECK-V4T: pop {[[SAVED]]}
-; CHECK-V4T-NEXT: pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V4T-NEXT: pop {[[POP_REG:r[1-3]]]}
; CHECK-V4T-NEXT: add sp,
-; CHECK-V4T-NEXT: bx r3
+; CHECK-V4T-NEXT: bx [[POP_REG]]
; CHECK-V5T: pop {[[SAVED]]}
-; CHECK-V5T-NEXT: pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V5T-NEXT: pop {[[POP_REG:r[1-3]]]}
; CHECK-V5T-NEXT: add sp,
-; CHECK-V5T-NEXT: bx r3
+; CHECK-V5T-NEXT: bx [[POP_REG]]
}
declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll
index 2675a733da97..aaefc0a14863 100644
--- a/test/CodeGen/ARM/thumb2-it-block.ll
+++ b/test/CodeGen/ARM/thumb2-it-block.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck -check-prefix CHECK-V7 %s
-; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s -check-prefix CHECK-V8
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s
; PR11107
define i32 @test(i32 %a, i32 %b) {
@@ -14,17 +14,13 @@ entry:
ret i32 %add
}
-; CHECK-V7: cmp
-; CHECK-V7-NEXT: it mi
-; CHECK-V7-NEXT: rsbmi
-; CHECK-V7-NEXT: cmp
-; CHECK-V7-NEXT: it mi
-; CHECK-V7-NEXT: rsbmi
+; CHECK: cmp
+; CHECK-NEXT: it mi
+; We shouldn't need to check for the extra 's' here; tRSB should be printed as
+; "rsb" inside an IT block, not "rsbs".
+; CHECK-NEXT: rsb{{s?}}mi
+; CHECK-NEXT: cmp
+; CHECK-NEXT: it mi
+; CHECK-NEXT: rsb{{s?}}mi
-; CHECK-V8: cmp
-; CHECK-V8-NEXT: bpl
-; CHECK-V8: rsbs
-; CHECK-V8: cmp
-; CHECK-V8-NEXT: bpl
-; CHECK-V8: rsbs
diff --git a/test/CodeGen/ARM/thumb_indirect_calls.ll b/test/CodeGen/ARM/thumb_indirect_calls.ll
index 9f1950c743c0..67346c6fde9c 100644
--- a/test/CodeGen/ARM/thumb_indirect_calls.ll
+++ b/test/CodeGen/ARM/thumb_indirect_calls.ll
@@ -3,7 +3,7 @@
@f = common global void (i32)* null, align 4
-; CHECK-LABEL foo:
+; CHECK-LABEL: foo:
define void @foo(i32 %x) {
entry:
%0 = load void (i32)*, void (i32)** @f, align 4
@@ -21,7 +21,7 @@ entry:
; CHECK-V5T: blx [[CALLEE]]
}
-; CHECK-LABEL bar:
+; CHECK-LABEL: bar:
define void @bar(void (i32)* nocapture %g, i32 %x, void (i32)* nocapture %h) {
entry:
tail call void %g(i32 %x)
@@ -37,4 +37,3 @@ entry:
; CHECK-V5T: blx
; CHECK-V5T: blx
}
-
diff --git a/test/CodeGen/ARM/tls-models.ll b/test/CodeGen/ARM/tls-models.ll
index 42c1ba911028..f3c58f74ebf7 100644
--- a/test/CodeGen/ARM/tls-models.ll
+++ b/test/CodeGen/ARM/tls-models.ll
@@ -1,5 +1,11 @@
-; RUN: llc -march=arm -mtriple=arm-linux-gnueabi < %s | FileCheck -check-prefix=CHECK-NONPIC %s
-; RUN: llc -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s | FileCheck -check-prefix=CHECK-PIC %s
+; RUN: llc -march=arm -mtriple=arm-linux-gnueabi < %s \
+; RUN: | FileCheck -check-prefix=CHECK-NONPIC -check-prefix=COMMON %s
+; RUN: llc -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s \
+; RUN: | FileCheck -check-prefix=CHECK-PIC -check-prefix=COMMON %s
+; RUN: llc -emulated-tls -march=arm -mtriple=arm-linux-gnueabi < %s \
+; RUN: | FileCheck -check-prefix=EMUNONPIC -check-prefix=EMU -check-prefix=COMMON %s
+; RUN: llc -emulated-tls -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s \
+; RUN: | FileCheck -check-prefix=EMUPIC -check-prefix=EMU -check-prefix=COMMON %s
@external_gd = external thread_local global i32
@@ -20,23 +26,23 @@ define i32* @f1() {
entry:
ret i32* @external_gd
+ ; COMMON-LABEL: f1:
; Non-PIC code can use initial-exec, PIC code has to use general dynamic.
- ; CHECK-NONPIC-LABEL: f1:
; CHECK-NONPIC: external_gd(GOTTPOFF)
- ; CHECK-PIC-LABEL: f1:
; CHECK-PIC: external_gd(TLSGD)
+ ; EMU: __emutls_get_address
}
define i32* @f2() {
entry:
ret i32* @internal_gd
+ ; COMMON-LABEL: f2:
; Non-PIC code can use local exec, PIC code can use local dynamic,
; but that is not implemented, so falls back to general dynamic.
- ; CHECK-NONPIC-LABEL: f2:
; CHECK-NONPIC: internal_gd(TPOFF)
- ; CHECK-PIC-LABEL: f2:
; CHECK-PIC: internal_gd(TLSGD)
+ ; EMU: __emutls_get_address
}
@@ -46,24 +52,24 @@ define i32* @f3() {
entry:
ret i32* @external_ld
+ ; COMMON-LABEL: f3:
; Non-PIC code can use initial exec, PIC should use local dynamic,
; but that is not implemented, so falls back to general dynamic.
- ; CHECK-NONPIC-LABEL: f3:
; CHECK-NONPIC: external_ld(GOTTPOFF)
- ; CHECK-PIC-LABEL: f3:
; CHECK-PIC: external_ld(TLSGD)
+ ; EMU: __emutls_get_address
}
define i32* @f4() {
entry:
ret i32* @internal_ld
+ ; COMMON-LABEL: f4:
; Non-PIC code can use local exec, PIC code can use local dynamic,
; but that is not implemented, so it falls back to general dynamic.
- ; CHECK-NONPIC-LABEL: f4:
; CHECK-NONPIC: internal_ld(TPOFF)
- ; CHECK-PIC-LABEL: f4:
; CHECK-PIC: internal_ld(TLSGD)
+ ; EMU: __emutls_get_address
}
@@ -73,22 +79,22 @@ define i32* @f5() {
entry:
ret i32* @external_ie
+ ; COMMON-LABEL: f5:
; Non-PIC and PIC code will use initial exec as specified.
- ; CHECK-NONPIC-LABEL: f5:
; CHECK-NONPIC: external_ie(GOTTPOFF)
- ; CHECK-PIC-LABEL: f5:
; CHECK-PIC: external_ie(GOTTPOFF)
+ ; EMU: __emutls_get_address
}
define i32* @f6() {
entry:
ret i32* @internal_ie
+ ; COMMON-LABEL: f6:
; Non-PIC code can use local exec, PIC code use initial exec as specified.
- ; CHECK-NONPIC-LABEL: f6:
; CHECK-NONPIC: internal_ie(TPOFF)
- ; CHECK-PIC-LABEL: f6:
; CHECK-PIC: internal_ie(GOTTPOFF)
+ ; EMU: __emutls_get_address
}
@@ -98,20 +104,52 @@ define i32* @f7() {
entry:
ret i32* @external_le
+ ; COMMON-LABEL: f7:
; Non-PIC and PIC code will use local exec as specified.
- ; CHECK-NONPIC-LABEL: f7:
; CHECK-NONPIC: external_le(TPOFF)
- ; CHECK-PIC-LABEL: f7:
; CHECK-PIC: external_le(TPOFF)
+ ; EMU: __emutls_get_address
}
define i32* @f8() {
entry:
ret i32* @internal_le
+ ; COMMON-LABEL: f8:
; Non-PIC and PIC code will use local exec as specified.
- ; CHECK-NONPIC-LABEL: f8:
; CHECK-NONPIC: internal_le(TPOFF)
- ; CHECK-PIC-LABEL: f8:
; CHECK-PIC: internal_le(TPOFF)
+ ; EMU: __emutls_get_address
}
+
+
+; ----- emulated specified -----
+
+; External declaration has no initializer.
+; Internal definition has initializer.
+
+; EMU-NOT: __emutls_t.external_gd
+; EMU-NOT: __emutls_v.external_gd
+; EMU: .align 2
+; EMU-LABEL: __emutls_v.internal_gd:
+; EMU-NEXT: .long 4
+; EMU-NEXT: .long 4
+; EMU-NEXT: .long 0
+; EMU-NEXT: .long __emutls_t.internal_gd
+; EMU-LABEL: __emutls_t.internal_gd:
+; EMU-NEXT: .long 42
+; EMU-NOT: __emutls_t.external_gd
+
+; __emutls_t and __emutls_v are the same for PIC and non-PIC modes.
+
+; EMU-NOT: __emutls_t.external_gd
+; EMU-NOT: __emutls_v.external_gd
+; EMU: .align 2
+; EMU-LABEL: __emutls_v.internal_le:
+; EMU-NEXT: .long 4
+; EMU-NEXT: .long 4
+; EMU-NEXT: .long 0
+; EMU-NEXT: .long __emutls_t.internal_le
+; EMU-LABEL: __emutls_t.internal_le:
+; EMU-NEXT: .long 42
+; EMU-NOT: __emutls_t.external_le
diff --git a/test/CodeGen/ARM/tls3.ll b/test/CodeGen/ARM/tls3.ll
index 7e17b13a3c99..94cadeedd938 100644
--- a/test/CodeGen/ARM/tls3.ll
+++ b/test/CodeGen/ARM/tls3.ll
@@ -1,11 +1,34 @@
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
; RUN: grep "tbss"
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=NOEMU
+; RUN: llc < %s -emulated-tls -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN: FileCheck %s -check-prefix=CHECK -check-prefix=EMU
%struct.anon = type { i32, i32 }
-@teste = internal thread_local global %struct.anon zeroinitializer ; <%struct.anon*> [#uses=1]
+@teste = internal thread_local global %struct.anon zeroinitializer ; <%struct.anon*> [#uses=1]
define i32 @main() {
entry:
- %tmp2 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @teste, i32 0, i32 0), align 8 ; <i32> [#uses=1]
- ret i32 %tmp2
+ %tmp2 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @teste, i32 0, i32 0), align 8 ; <i32> [#uses=1]
+ ret i32 %tmp2
}
+
+; CHECK-LABEL: main:
+; NOEMU-NOT: __emutls_get_address
+
+; NOEMU: .section .tbss
+; NOEMU-LABEL: teste:
+; NOEMU-NEXT: .zero 8
+
+; CHECK-NOT: __emutls_t.teste
+
+; EMU: .align 2
+; EMU-LABEL: __emutls_v.teste:
+; EMU-NEXT: .long 8
+; EMU-NEXT: .long 4
+; EMU-NEXT: .long 0
+; EMU-NEXT: .long 0
+
+; CHECK-NOT: teste:
+; CHECK-NOT: __emutls_t.teste
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index 0be3917ffa26..4e16bda6c4d9 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple=arm-eabi -pre-RA-sched=source %s -o - \
+; RUN: llc -mtriple=arm-eabi -pre-RA-sched=source -mattr=+strict-align %s -o - \
; RUN: | FileCheck %s -check-prefix=EXPANDED
-; RUN: llc -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -mattr=-neon -arm-strict-align -pre-RA-sched=source %s -o - \
+; RUN: llc -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -mattr=-neon,+strict-align -pre-RA-sched=source %s -o - \
; RUN: | FileCheck %s -check-prefix=EXPANDED
; RUN: llc -mtriple=armv6-apple-darwin -mcpu=cortex-a8 %s -o - \
diff --git a/test/CodeGen/ARM/unaligned_load_store_vfp.ll b/test/CodeGen/ARM/unaligned_load_store_vfp.ll
new file mode 100644
index 000000000000..90d17e19c286
--- /dev/null
+++ b/test/CodeGen/ARM/unaligned_load_store_vfp.ll
@@ -0,0 +1,98 @@
+; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s
+
+define float @test_load_s32_float(i32* %addr) {
+; CHECK-LABEL: test_load_s32_float:
+; CHECK: ldr [[TMP:r[0-9]+]], [r0]
+; CHECK: vmov [[RES_INT:s[0-9]+]], [[TMP]]
+; CHECK: vcvt.f32.s32 s0, [[RES_INT]]
+
+ %val = load i32, i32* %addr, align 1
+ %res = sitofp i32 %val to float
+ ret float %res
+}
+
+define double @test_load_s32_double(i32* %addr) {
+; CHECK-LABEL: test_load_s32_double:
+; CHECK: ldr [[TMP:r[0-9]+]], [r0]
+; CHECK: vmov [[RES_INT:s[0-9]+]], [[TMP]]
+; CHECK: vcvt.f64.s32 d0, [[RES_INT]]
+
+ %val = load i32, i32* %addr, align 1
+ %res = sitofp i32 %val to double
+ ret double %res
+}
+
+define float @test_load_u32_float(i32* %addr) {
+; CHECK-LABEL: test_load_u32_float:
+; CHECK: ldr [[TMP:r[0-9]+]], [r0]
+; CHECK: vmov [[RES_INT:s[0-9]+]], [[TMP]]
+; CHECK: vcvt.f32.u32 s0, [[RES_INT]]
+
+ %val = load i32, i32* %addr, align 1
+ %res = uitofp i32 %val to float
+ ret float %res
+}
+
+define double @test_load_u32_double(i32* %addr) {
+; CHECK-LABEL: test_load_u32_double:
+; CHECK: ldr [[TMP:r[0-9]+]], [r0]
+; CHECK: vmov [[RES_INT:s[0-9]+]], [[TMP]]
+; CHECK: vcvt.f64.u32 d0, [[RES_INT]]
+
+ %val = load i32, i32* %addr, align 1
+ %res = uitofp i32 %val to double
+ ret double %res
+}
+
+define void @test_store_f32(float %in, float* %addr) {
+; CHECK-LABEL: test_store_f32:
+; CHECK: vmov [[TMP:r[0-9]+]], s0
+; CHECK: str [[TMP]], [r0]
+
+ store float %in, float* %addr, align 1
+ ret void
+}
+
+define void @test_store_float_s32(float %in, i32* %addr) {
+; CHECK-LABEL: test_store_float_s32:
+; CHECK: vcvt.s32.f32 [[TMP:s[0-9]+]], s0
+; CHECK: vmov [[TMP_INT:r[0-9]+]], [[TMP]]
+; CHECK: str [[TMP_INT]], [r0]
+
+ %val = fptosi float %in to i32
+ store i32 %val, i32* %addr, align 1
+ ret void
+}
+
+define void @test_store_double_s32(double %in, i32* %addr) {
+; CHECK-LABEL: test_store_double_s32:
+; CHECK: vcvt.s32.f64 [[TMP:s[0-9]+]], d0
+; CHECK: vmov [[TMP_INT:r[0-9]+]], [[TMP]]
+; CHECK: str [[TMP_INT]], [r0]
+
+ %val = fptosi double %in to i32
+ store i32 %val, i32* %addr, align 1
+ ret void
+}
+
+define void @test_store_float_u32(float %in, i32* %addr) {
+; CHECK-LABEL: test_store_float_u32:
+; CHECK: vcvt.u32.f32 [[TMP:s[0-9]+]], s0
+; CHECK: vmov [[TMP_INT:r[0-9]+]], [[TMP]]
+; CHECK: str [[TMP_INT]], [r0]
+
+ %val = fptoui float %in to i32
+ store i32 %val, i32* %addr, align 1
+ ret void
+}
+
+define void @test_store_double_u32(double %in, i32* %addr) {
+; CHECK-LABEL: test_store_double_u32:
+; CHECK: vcvt.u32.f64 [[TMP:s[0-9]+]], d0
+; CHECK: vmov [[TMP_INT:r[0-9]+]], [[TMP]]
+; CHECK: str [[TMP_INT]], [r0]
+
+ %val = fptoui double %in to i32
+ store i32 %val, i32* %addr, align 1
+ ret void
+}
diff --git a/test/CodeGen/ARM/usat-lower.ll b/test/CodeGen/ARM/usat-lower.ll
new file mode 100644
index 000000000000..58d3bba5a1f8
--- /dev/null
+++ b/test/CodeGen/ARM/usat-lower.ll
@@ -0,0 +1,11 @@
+; RUN: not llc < %s -O1 -mtriple=armv6-none-none-eabi 2>&1 | FileCheck %s
+; RUN: not llc < %s -O1 -mtriple=thumbv7-none-none-eabi 2>&1 | FileCheck %s
+
+; immediate argument < lower-bound
+; CHECK: LLVM ERROR: Cannot select: intrinsic %llvm.arm.usat
+define i32 @usat1() nounwind {
+ %tmp = call i32 @llvm.arm.usat(i32 128, i32 -1)
+ ret i32 %tmp
+}
+
+declare i32 @llvm.arm.usat(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/usat-upper.ll b/test/CodeGen/ARM/usat-upper.ll
new file mode 100644
index 000000000000..84ad694725b4
--- /dev/null
+++ b/test/CodeGen/ARM/usat-upper.ll
@@ -0,0 +1,11 @@
+; RUN: not llc < %s -O1 -mtriple=armv6-none-none-eabi 2>&1 | FileCheck %s
+; RUN: not llc < %s -O1 -mtriple=thumbv7-none-none-eabi 2>&1 | FileCheck %s
+
+; immediate argument > upper-bound
+; CHECK: LLVM ERROR: Cannot select: intrinsic %llvm.arm.usat
+define i32 @usat1() nounwind {
+ %tmp = call i32 @llvm.arm.usat(i32 128, i32 32)
+ ret i32 %tmp
+}
+
+declare i32 @llvm.arm.usat(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/v7k-abi-align.ll b/test/CodeGen/ARM/v7k-abi-align.ll
new file mode 100644
index 000000000000..e9b67f22edf2
--- /dev/null
+++ b/test/CodeGen/ARM/v7k-abi-align.ll
@@ -0,0 +1,152 @@
+; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s
+
+%struct = type { i8, i64, i8, double, i8, <2 x float>, i8, <4 x float> }
+
+define i32 @test_i64_align() {
+; CHECK-LABEL: test_i64_align:
+; CHECL: movs r0, #8
+ ret i32 ptrtoint(i64* getelementptr(%struct, %struct* null, i32 0, i32 1) to i32)
+}
+
+define i32 @test_f64_align() {
+; CHECK-LABEL: test_f64_align:
+; CHECL: movs r0, #24
+ ret i32 ptrtoint(double* getelementptr(%struct, %struct* null, i32 0, i32 3) to i32)
+}
+
+define i32 @test_v2f32_align() {
+; CHECK-LABEL: test_v2f32_align:
+; CHECL: movs r0, #40
+ ret i32 ptrtoint(<2 x float>* getelementptr(%struct, %struct* null, i32 0, i32 5) to i32)
+}
+
+define i32 @test_v4f32_align() {
+; CHECK-LABEL: test_v4f32_align:
+; CHECL: movs r0, #64
+ ret i32 ptrtoint(<4 x float>* getelementptr(%struct, %struct* null, i32 0, i32 7) to i32)
+}
+
+; Key point here is than an extra register has to be saved so that the DPRs end
+; up in an aligned location (as prologue/epilogue inserter had calculated).
+define void @test_dpr_unwind_align() {
+; CHECK-LABEL: test_dpr_unwind_align:
+; CHECK: push {r5, r6, r7, lr}
+; CHECK-NOT: sub sp
+; CHECK: vpush {d8, d9}
+; CHECK: .cfi_offset d9, -24
+; CHECK: .cfi_offset d8, -32
+; [...]
+; CHECK: bl _test_i64_align
+; CHECK-NOT: add sp,
+; CHECK: vpop {d8, d9}
+; CHECK-NOT: add sp,
+; CHECK: pop {r5, r6, r7, pc}
+
+ call void asm sideeffect "", "~{r6},~{d8},~{d9}"()
+
+ ; Whatever
+ call i32 @test_i64_align()
+ ret void
+}
+
+; This time, there's no viable way to tack CS-registers onto the list: a real SP
+; adjustment needs to be performed to put d8 and d9 where they should be.
+define void @test_dpr_unwind_align_manually() {
+; CHECK-LABEL: test_dpr_unwind_align_manually:
+; CHECK: push {r4, r5, r6, r7, lr}
+; CHECK-NOT: sub sp
+; CHECK: push.w {r8, r11}
+; CHECK: sub sp, #4
+; CHECK: vpush {d8, d9}
+; CHECK: .cfi_offset d9, -40
+; CHECK: .cfi_offset d8, -48
+; [...]
+; CHECK: bl _test_i64_align
+; CHECK-NOT: add sp,
+; CHECK: vpop {d8, d9}
+; CHECK: add sp, #4
+; CHECK: pop.w {r8, r11}
+; CHECK: pop {r4, r5, r6, r7, pc}
+
+ call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{d8},~{d9}"()
+
+ ; Whatever
+ call i32 @test_i64_align()
+ ret void
+}
+
+; If there's only a CS1 area, the sub should be in the right place:
+define void @test_dpr_unwind_align_just_cs1() {
+; CHECK-LABEL: test_dpr_unwind_align_just_cs1:
+; CHECK: push {r4, r5, r6, r7, lr}
+; CHECK: sub sp, #4
+; CHECK: vpush {d8, d9}
+; CHECK: .cfi_offset d9, -32
+; CHECK: .cfi_offset d8, -40
+; CHECK: sub sp, #8
+; [...]
+; CHECK: bl _test_i64_align
+; CHECK: add sp, #8
+; CHECK: vpop {d8, d9}
+; CHECK: add sp, #4
+; CHECK: pop {r4, r5, r6, r7, pc}
+
+ call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{d8},~{d9}"()
+
+ ; Whatever
+ call i32 @test_i64_align()
+ ret void
+}
+
+; If there are no DPRs, we shouldn't try to align the stack in stages anyway
+define void @test_dpr_unwind_align_no_dprs() {
+; CHECK-LABEL: test_dpr_unwind_align_no_dprs:
+; CHECK: push {r4, r5, r6, r7, lr}
+; CHECK: sub sp, #12
+; [...]
+; CHECK: bl _test_i64_align
+; CHECK: add sp, #12
+; CHECK: pop {r4, r5, r6, r7, pc}
+
+ call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"()
+
+ ; Whatever
+ call i32 @test_i64_align()
+ ret void
+}
+
+; 128-bit vectors should use 128-bit (i.e. correctly aligned) slots on
+; the stack.
+define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) {
+; CHECK-LABEL: test_v128_stack_pass:
+; CHECK: add r[[ADDR:[0-9]+]], sp, #16
+; CHECK: vld1.64 {d0, d1}, [r[[ADDR]]:128]
+
+ ret <4 x float> %in
+}
+
+declare void @varargs(i32, ...)
+
+; When varargs are enabled, we go down a different route. Still want 128-bit
+; alignment though.
+define void @test_v128_stack_pass_varargs(<4 x float> %in) {
+; CHECK-LABEL: test_v128_stack_pass_varargs:
+; CHECK: add r[[ADDR:[0-9]+]], sp, #16
+; CHECK: vst1.64 {d0, d1}, [r[[ADDR]]:128]
+
+ call void(i32, ...) @varargs(i32 undef, [3 x i32] undef, float undef, <4 x float> %in)
+ ret void
+}
+
+; To be compatible with AAPCS's va_start model (store r0-r3 at incoming SP, give
+; a single pointer), 64-bit quantities must be pass
+define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) {
+; CHECK-LABEL: test_64bit_gpr_align:
+; CHECK: ldr [[RHS:r[0-9]+]], [sp]
+; CHECK: adds r0, [[RHS]], r2
+; CHECK: adc r1, r3, #0
+
+ %ext = zext i32 %sp to i64
+ %sum = add i64 %ext, %r2_r3
+ ret i64 %sum
+}
diff --git a/test/CodeGen/ARM/v7k-libcalls.ll b/test/CodeGen/ARM/v7k-libcalls.ll
new file mode 100644
index 000000000000..a1dfb07ca614
--- /dev/null
+++ b/test/CodeGen/ARM/v7k-libcalls.ll
@@ -0,0 +1,154 @@
+; RUN: llc -mtriple=armv7k-apple-watchos2.0 -mcpu=cortex-a7 < %s | FileCheck %s
+
+define arm_aapcs_vfpcc float @t1(float %a, float %b) {
+entry:
+; CHECK: t1
+; CHECK-NOT: vmov
+; CHECK: vadd.f32
+ %a.addr = alloca float, align 4
+ %b.addr = alloca float, align 4
+ store float %a, float* %a.addr, align 4
+ store float %b, float* %b.addr, align 4
+ %0 = load float, float* %a.addr, align 4
+ %1 = load float, float* %b.addr, align 4
+ %add = fadd float %0, %1
+ ret float %add
+}
+
+define arm_aapcs_vfpcc double @t2(double %a, double %b) {
+entry:
+; CHECK: t2
+; CHECK-NOT: vmov
+; CHECK: vadd.f64
+ %a.addr = alloca double, align 8
+ %b.addr = alloca double, align 8
+ store double %a, double* %a.addr, align 8
+ store double %b, double* %b.addr, align 8
+ %0 = load double, double* %a.addr, align 8
+ %1 = load double, double* %b.addr, align 8
+ %add = fadd double %0, %1
+ ret double %add
+}
+
+define arm_aapcs_vfpcc i64 @t3(double %ti) {
+entry:
+; CHECK-LABEL: t3:
+; CHECK-NOT: vmov
+; CHECK: bl ___fixunsdfdi
+ %conv = fptoui double %ti to i64
+ ret i64 %conv
+}
+
+define arm_aapcs_vfpcc i64 @t4(double %ti) {
+entry:
+; CHECK-LABEL: t4:
+; CHECK-NOT: vmov
+; CHECK: bl ___fixdfdi
+ %conv = fptosi double %ti to i64
+ ret i64 %conv
+}
+
+define arm_aapcs_vfpcc double @t5(i64 %ti) {
+entry:
+; CHECK-LABEL: t5:
+; CHECK: bl ___floatundidf
+; CHECK-NOT: vmov
+; CHECK: pop
+ %conv = uitofp i64 %ti to double
+ ret double %conv
+}
+
+define arm_aapcs_vfpcc double @t6(i64 %ti) {
+entry:
+; CHECK-LABEL: t6:
+; CHECK: bl ___floatdidf
+; CHECK-NOT: vmov
+; CHECK: pop
+ %conv = sitofp i64 %ti to double
+ ret double %conv
+}
+
+define arm_aapcs_vfpcc float @t7(i64 %ti) {
+entry:
+; CHECK-LABEL: t7:
+; CHECK: bl ___floatundisf
+; CHECK-NOT: vmov
+; CHECK: pop
+ %conv = uitofp i64 %ti to float
+ ret float %conv
+}
+
+define arm_aapcs_vfpcc float @t8(i64 %ti) {
+entry:
+; CHECK-LABEL: t8:
+; CHECK: bl ___floatdisf
+; CHECK-NOT: vmov
+; CHECK: pop
+ %conv = sitofp i64 %ti to float
+ ret float %conv
+}
+
+define arm_aapcs_vfpcc double @t9(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %a, float %b) {
+entry:
+; CHECK-LABEL: t9:
+; CHECK-NOT: vmov
+; CHECK: vldr
+ %add = fadd float %a, %b
+ %conv = fpext float %add to double
+ ret double %conv
+}
+
+define arm_aapcs_vfpcc double @t10(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %a, float %b, double %c) {
+entry:
+; CHECK-LABEL: t10:
+; CHECK-NOT: vmov
+; CHECK: vldr
+ %add = fadd double %a, %c
+ ret double %add
+}
+
+define arm_aapcs_vfpcc float @t11(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, float %a, double %b, float %c) {
+entry:
+; CHECK-LABEL: t11:
+; CHECK: vldr
+ %add = fadd float %a, %c
+ ret float %add
+}
+
+; rdar://16039676
+define arm_aapcs_vfpcc double @t12(double %a, double %b) {
+entry:
+; CHECK-LABEL: t12:
+; CHECK: vstr
+ %add = fadd double %a, %b
+ %sub = fsub double %a, %b
+ %call = tail call arm_aapcs_vfpcc double @x(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double %add, float 0.000000e+00, double %sub)
+ ret double %call
+}
+
+define arm_aapcs_vfpcc double @t13(double %x) {
+entry:
+; CHECK-LABEL: t13:
+; CHECK-NOT: vmov
+; CHECK: bl ___sincos_stret
+ %call = tail call arm_aapcs_vfpcc double @cos(double %x)
+ %call1 = tail call arm_aapcs_vfpcc double @sin(double %x)
+ %mul = fmul double %call, %call1
+ ret double %mul
+}
+
+define arm_aapcs_vfpcc double @t14(double %x) {
+; CHECK-LABEL: t14:
+; CHECK-NOT: vmov
+; CHECK: b ___exp10
+ %__exp10 = tail call double @__exp10(double %x) #1
+ ret double %__exp10
+}
+
+declare arm_aapcs_vfpcc double @x(double, double, double, double, double, double, double, float, double)
+declare arm_aapcs_vfpcc double @cos(double) #0
+declare arm_aapcs_vfpcc double @sin(double) #0
+declare double @__exp10(double)
+
+attributes #0 = { readnone }
+attributes #1 = { readonly }
diff --git a/test/CodeGen/ARM/v7k-sincos.ll b/test/CodeGen/ARM/v7k-sincos.ll
new file mode 100644
index 000000000000..b89d4dc8120b
--- /dev/null
+++ b/test/CodeGen/ARM/v7k-sincos.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s
+
+declare double @sin(double) nounwind readnone
+declare double @cos(double) nounwind readnone
+
+define double @test_stret(double %in) {
+; CHECK-LABEL: test_stret:
+; CHECK: blx ___sincos_stret
+; CHECK-NOT: ldr
+; CHECK: vadd.f64 d0, d0, d1
+
+ %sin = call double @sin(double %in)
+ %cos = call double @cos(double %in)
+ %sum = fadd double %sin, %cos
+ ret double %sum
+}
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll
index 2cd33cf3a422..e34b3e5e365a 100644
--- a/test/CodeGen/ARM/vcge.ll
+++ b/test/CodeGen/ARM/vcge.ll
@@ -196,8 +196,8 @@ entry:
%3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = add <8 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%5 = trunc <8 x i16> %4 to <8 x i8>
- tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %5, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* undef, <8 x i8> %5, i32 1)
unreachable
}
-declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i8(i8*, <8 x i8>, i32) nounwind
diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll
index 9491c15aef58..fc171889f5f8 100644
--- a/test/CodeGen/ARM/vcombine.ll
+++ b/test/CodeGen/ARM/vcombine.ll
@@ -2,11 +2,15 @@
; RUN: llc -mtriple=armeb-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-; CHECK: vcombine8
-; CHECK-LE: vmov r0, r1, d16
-; CHECK-LE: vmov r2, r3, d17
-; CHECK-BE: vmov r1, r0, d16
-; CHECK-BE: vmov r3, r2, d17
+; CHECK-LABEL: vcombine8
+; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
+; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
+
+; CHECK-LE-DAG: vmov r0, r1, [[LD0]]
+; CHECK-LE-DAG: vmov r2, r3, [[LD1]]
+
+; CHECK-BE-DAG: vmov r1, r0, d16
+; CHECK-BE-DAG: vmov r3, r2, d17
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -14,11 +18,15 @@ define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
}
define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-; CHECK: vcombine16
-; CHECK-LE: vmov r0, r1, d16
-; CHECK-LE: vmov r2, r3, d17
-; CHECK-BE: vmov r1, r0, d16
-; CHECK-BE: vmov r3, r2, d17
+; CHECK-LABEL: vcombine16
+; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
+; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
+
+; CHECK-LE-DAG: vmov r0, r1, [[LD0]]
+; CHECK-LE-DAG: vmov r2, r3, [[LD1]]
+
+; CHECK-BE-DAG: vmov r1, r0, d16
+; CHECK-BE-DAG: vmov r3, r2, d17
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -26,9 +34,14 @@ define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
}
define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-; CHECK: vcombine32
-; CHECK-LE: vmov r0, r1, d16
-; CHECK-LE: vmov r2, r3, d17
+; CHECK-LABEL: vcombine32
+
+; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
+; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
+
+; CHECK-LE: vmov r0, r1, [[LD0]]
+; CHECK-LE: vmov r2, r3, [[LD1]]
+
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
%tmp1 = load <2 x i32>, <2 x i32>* %A
@@ -38,9 +51,14 @@ define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
}
define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
-; CHECK: vcombinefloat
-; CHECK-LE: vmov r0, r1, d16
-; CHECK-LE: vmov r2, r3, d17
+; CHECK-LABEL: vcombinefloat
+
+; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
+; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
+
+; CHECK-LE: vmov r0, r1, [[LD0]]
+; CHECK-LE: vmov r2, r3, [[LD1]]
+
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
%tmp1 = load <2 x float>, <2 x float>* %A
@@ -50,11 +68,15 @@ define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
}
define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-; CHECK: vcombine64
-; CHECK-LE: vmov r0, r1, d16
-; CHECK-LE: vmov r2, r3, d17
-; CHECK-BE: vmov r1, r0, d16
-; CHECK-BE: vmov r3, r2, d17
+; CHECK-LABEL: vcombine64
+; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
+; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
+
+; CHECK-LE: vmov r0, r1, [[LD0]]
+; CHECK-LE: vmov r2, r3, [[LD1]]
+
+; CHECK-BE: vmov r1, r0, [[LD0]]
+; CHECK-BE: vmov r3, r2, [[LD1]]
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
diff --git a/test/CodeGen/ARM/vcvt_combine.ll b/test/CodeGen/ARM/vcvt_combine.ll
index 0c856e8d7617..9a8f084d2303 100644
--- a/test/CodeGen/ARM/vcvt_combine.ll
+++ b/test/CodeGen/ARM/vcvt_combine.ll
@@ -1,95 +1,64 @@
; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
-@in = global float 0x400921FA00000000, align 4
-
; Test signed conversion.
-; CHECK: t0
-; CHECK-NOT: vmul
-define void @t0() nounwind {
-entry:
- %tmp = load float, float* @in, align 4
- %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
- %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
- %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
+; CHECK-LABEL: @t0
+; CHECK: vcvt.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}, #2
+; CHECK: bx lr
+define <2 x i32> @t0(<2 x float> %in) {
+ %mul.i = fmul <2 x float> %in, <float 4.0, float 4.0>
%vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
- tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
- ret void
+ ret <2 x i32> %vcvt.i
}
-declare void @foo_int32x2_t(<2 x i32>)
-
; Test unsigned conversion.
-; CHECK: t1
-; CHECK-NOT: vmul
-define void @t1() nounwind {
-entry:
- %tmp = load float, float* @in, align 4
- %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
- %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
- %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
+; CHECK-LABEL: @t1
+; CHECK: vcvt.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}, #3
+; CHECK: bx lr
+define <2 x i32> @t1(<2 x float> %in) {
+ %mul.i = fmul <2 x float> %in, <float 8.0, float 8.0>
%vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
- tail call void @foo_uint32x2_t(<2 x i32> %vcvt.i) nounwind
- ret void
+ ret <2 x i32> %vcvt.i
}
-declare void @foo_uint32x2_t(<2 x i32>)
-
; Test which should not fold due to non-power of 2.
-; CHECK: t2
+; CHECK-LABEL: @t2
; CHECK: vmul
-define void @t2() nounwind {
+; CHECK: vcvt.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bx lr
+define <2 x i32> @t2(<2 x float> %in) {
entry:
- %tmp = load float, float* @in, align 4
- %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
- %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
- %mul.i = fmul <2 x float> %vecinit2.i, <float 0x401B333340000000, float 0x401B333340000000>
+ %mul.i = fmul <2 x float> %in, <float 0x401B333340000000, float 0x401B333340000000>
%vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
- tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
- ret void
+ ret <2 x i32> %vcvt.i
}
; Test which should not fold due to power of 2 out of range.
-; CHECK: t3
+; CHECK-LABEL: @t3
; CHECK: vmul
-define void @t3() nounwind {
-entry:
- %tmp = load float, float* @in, align 4
- %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
- %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
- %mul.i = fmul <2 x float> %vecinit2.i, <float 0x4200000000000000, float 0x4200000000000000>
+; CHECK: vcvt.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bx lr
+define <2 x i32> @t3(<2 x float> %in) {
+ %mul.i = fmul <2 x float> %in, <float 0x4200000000000000, float 0x4200000000000000>
%vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
- tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
- ret void
+ ret <2 x i32> %vcvt.i
}
; Test which case where const is max power of 2 (i.e., 2^32).
-; CHECK: t4
-; CHECK-NOT: vmul
-define void @t4() nounwind {
-entry:
- %tmp = load float, float* @in, align 4
- %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
- %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
- %mul.i = fmul <2 x float> %vecinit2.i, <float 0x41F0000000000000, float 0x41F0000000000000>
+; CHECK-LABEL: @t4
+; CHECK: vcvt.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}, #32
+; CHECK: bx lr
+define <2 x i32> @t4(<2 x float> %in) {
+ %mul.i = fmul <2 x float> %in, <float 0x41F0000000000000, float 0x41F0000000000000>
%vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
- tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
- ret void
+ ret <2 x i32> %vcvt.i
}
; Test quadword.
-; CHECK: t5
-; CHECK-NOT: vmul
-define void @t5() nounwind {
-entry:
- %tmp = load float, float* @in, align 4
- %vecinit.i = insertelement <4 x float> undef, float %tmp, i32 0
- %vecinit2.i = insertelement <4 x float> %vecinit.i, float %tmp, i32 1
- %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %tmp, i32 2
- %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %tmp, i32 3
- %mul.i = fmul <4 x float> %vecinit6.i, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+; CHECK-LABEL: @t5
+; CHECK: vcvt.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}, #3
+; CHECK: bx lr
+define <4 x i32> @t5(<4 x float> %in) {
+ %mul.i = fmul <4 x float> %in, <float 8.0, float 8.0, float 8.0, float 8.0>
%vcvt.i = fptosi <4 x float> %mul.i to <4 x i32>
- tail call void @foo_int32x4_t(<4 x i32> %vcvt.i) nounwind
- ret void
+ ret <4 x i32> %vcvt.i
}
-
-declare void @foo_int32x4_t(<4 x i32>)
diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll
index 8c6e4ba35054..8511dbcb6876 100644
--- a/test/CodeGen/ARM/vdiv_combine.ll
+++ b/test/CodeGen/ARM/vdiv_combine.ll
@@ -136,3 +136,20 @@ define <2 x double> @fix_i64_to_double(<2 x i64> %in) {
ret <2 x double> %shift
}
+; Don't combine with 8 lanes. Just make sure things don't crash.
+; CHECK-LABEL: test7
+define <8 x float> @test7(<8 x i32> %in) nounwind {
+entry:
+ %vcvt.i = sitofp <8 x i32> %in to <8 x float>
+ %div.i = fdiv <8 x float> %vcvt.i, <float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0>
+ ret <8 x float> %div.i
+}
+
+; Can combine splat with an undef.
+; CHECK-LABEL: test8
+; CHECK: vcvt.f32.s32 q{{[0-9]+}}, q{{[0-9]+}}, #1
+define <4 x float> @test8(<4 x i32> %in) {
+ %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+ %div.i = fdiv <4 x float> %vcvt.i, <float 2.0, float 2.0, float 2.0, float undef>
+ ret <4 x float> %div.i
+}
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index 36eebbfc4650..25c4807d9862 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -364,3 +364,19 @@ define <4 x float> @check_spr_splat4_lane1(<4 x float> %p, i16 %q) {
%sub = fsub <4 x float> %splat.splat, %p
ret <4 x float> %sub
}
+
+; Also make sure we don't barf on variable-index extractelts, where we almost
+; could have generated a vdup.
+
+define <8 x i8> @check_i8_varidx(<16 x i8> %v, i32 %idx) {
+; CHECK-LABEL: check_i8_varidx:
+; CHECK: mov r[[FP:[0-9]+]], sp
+; CHECK: ldr r[[IDX:[0-9]+]], [r[[FP]], #4]
+; CHECK: mov r[[SPCOPY:[0-9]+]], sp
+; CHECK: vst1.64 {d{{.*}}, d{{.*}}}, [r[[SPCOPY]]:128], r[[IDX]]
+; CHECK: vld1.8 {d{{.*}}[]}, [r[[SPCOPY]]]
+ %x = extractelement <16 x i8> %v, i32 %idx
+ %1 = insertelement <8 x i8> undef, i8 %x, i32 0
+ %2 = insertelement <8 x i8> %1, i8 %x, i32 1
+ ret <8 x i8> %2
+}
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index 29f4bb972a24..2ef2a0697ec9 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -78,11 +78,11 @@ entry:
%2 = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = add <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%4 = trunc <8 x i16> %3 to <8 x i8>
- tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %4, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* undef, <8 x i8> %4, i32 1)
unreachable
}
-declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i8(i8*, <8 x i8>, i32) nounwind
; Test that loads and stores of i64 vector elements are handled as f64 values
; so they are not split up into i32 values. Radar 8755338.
diff --git a/test/CodeGen/ARM/vector-load.ll b/test/CodeGen/ARM/vector-load.ll
index 17f134f458a2..a638c2bdb9be 100644
--- a/test/CodeGen/ARM/vector-load.ll
+++ b/test/CodeGen/ARM/vector-load.ll
@@ -238,12 +238,12 @@ define <4 x i32> @zextload_v8i8tov8i32(<4 x i8>** %ptr) {
define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
;CHECK-LABEL: zextload_v8i8tov8i32_fake_update:
-;CHECK: ldr.w r[[PTRREG:[0-9]+]], [r0]
+;CHECK: ldr r[[PTRREG:[0-9]+]], [r0]
;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32]
;CHECK: add.w r[[INCREG:[0-9]+]], r[[PTRREG]], #16
-;CHECK: str.w r[[INCREG]], [r0]
;CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
;CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
+;CHECK: str r[[INCREG]], [r0]
%A = load <4 x i8>*, <4 x i8>** %ptr
%lA = load <4 x i8>, <4 x i8>* %A, align 4
%inc = getelementptr <4 x i8>, <4 x i8>* %A, i38 4
diff --git a/test/CodeGen/ARM/vector-store.ll b/test/CodeGen/ARM/vector-store.ll
index 30baa9a20ddc..161bbf1d0fde 100644
--- a/test/CodeGen/ARM/vector-store.ll
+++ b/test/CodeGen/ARM/vector-store.ll
@@ -228,9 +228,9 @@ define void @truncstore_v4i32tov4i8(<4 x i8>** %ptr, <4 x i32> %val) {
;CHECK: ldr.w r9, [sp]
;CHECK: vmov {{d[0-9]+}}, r3, r9
;CHECK: vmov {{d[0-9]+}}, r1, r2
+;CHECK: ldr r[[PTRREG:[0-9]+]], [r0]
;CHECK: vmovn.i32 [[VECLO:d[0-9]+]], {{q[0-9]+}}
;CHECK: vuzp.8 [[VECLO]], {{d[0-9]+}}
-;CHECK: ldr r[[PTRREG:[0-9]+]], [r0]
;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32]
%A = load <4 x i8>*, <4 x i8>** %ptr
%trunc = trunc <4 x i32> %val to <4 x i8>
@@ -243,10 +243,10 @@ define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val)
;CHECK: ldr.w r9, [sp]
;CHECK: vmov {{d[0-9]+}}, r3, r9
;CHECK: vmov {{d[0-9]+}}, r1, r2
-;CHECK: movs [[IMM16:r[0-9]+]], #16
+;CHECK: ldr r[[PTRREG:[0-9]+]], [r0]
;CHECK: vmovn.i32 [[VECLO:d[0-9]+]], {{q[0-9]+}}
;CHECK: vuzp.8 [[VECLO]], {{d[0-9]+}}
-;CHECK: ldr r[[PTRREG:[0-9]+]], [r0]
+;CHECK: movs [[IMM16:r[0-9]+]], #16
;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32], [[IMM16]]
;CHECK: str r[[PTRREG]], [r0]
%A = load <4 x i8>*, <4 x i8>** %ptr
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index 72ecf0ef0626..394ecfb281fc 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - -lower-interleaved-accesses=false | FileCheck %s
define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextd:
@@ -196,3 +196,35 @@ define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>
store <4 x i16> %tmp7, <4 x i16>* %dest, align 4
ret void
}
+
+define <4 x i32> @test_reverse_and_extract(<2 x i32>* %A) {
+entry:
+ ; CHECK-LABEL: test_reverse_and_extract
+ ; CHECK-NOT: vtrn
+ ; CHECK: vrev
+ ; CHECK: vext
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 0>
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_dup_and_extract(<2 x i32>* %A) {
+entry:
+ ; CHECK-LABEL: test_dup_and_extract
+ ; CHECK-NOT: vtrn
+ ; CHECK: vdup
+ ; CHECK: vext
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_zip_and_extract(<2 x i32>* %A) {
+entry:
+ ; CHECK-LABEL: test_zip_and_extract
+ ; CHECK: vzip
+ ; CHECK: vext
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 1>
+ ret <4 x i32> %0
+}
diff --git a/test/CodeGen/ARM/vfp-reg-stride.ll b/test/CodeGen/ARM/vfp-reg-stride.ll
new file mode 100644
index 000000000000..c5339db68e30
--- /dev/null
+++ b/test/CodeGen/ARM/vfp-reg-stride.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mcpu=swift -mtriple=thumbv7s-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-STRIDE4
+; RUN: llc -mcpu=swift -mtriple=thumbv7k-apple-watchos -o - %s | FileCheck %s --check-prefix=CHECK-STRIDE4-WATCH
+; RUN: llc -mcpu=cortex-a57 -mtriple=thumbv7-linux-gnueabihf -o - %s | FileCheck %s --check-prefix=CHECK-GENERIC
+
+define void @test_reg_stride(double %a, double %b) {
+; CHECK-STRIDE4-LABEL: test_reg_stride:
+; CHECK-STRIDE4-DAG: vmov d16, r
+; CHECK-STRIDE4-DAG: vmov d18, r
+
+; CHECK-STRIDE4-WATCH-LABEL: test_reg_stride:
+; CHECK-STRIDE4-WATCH-DAG: vmov.f64 d16, d
+; CHECK-STRIDE4-WATCH-DAG: vmov.f64 d18, d
+
+; CHECK-GENERIC-LABEL: test_reg_stride:
+; CHECK-GENERIC-DAG: vmov.f64 d16, {{d[01]}}
+; CHECK-GENERIC-DAG: vmov.f64 d17, {{d[01]}}
+
+ call void asm "", "~{r0},~{r1},~{d0},~{d1}"()
+ call arm_aapcs_vfpcc void @eat_doubles(double %a, double %b)
+ ret void
+}
+
+define void @test_stride_minsize(float %a, float %b) minsize {
+; CHECK-STRIDE4-LABEL: test_stride_minsize:
+; CHECK-STRIDE4: vmov d2, {{r[01]}}
+; CHECK-STRIDE4: vmov d3, {{r[01]}}
+
+; CHECK-STRIDE4-WATCH-LABEL: test_stride_minsize:
+; CHECK-STRIDE4-WATCH-DAG: vmov.f32 s4, {{s[01]}}
+; CHECK-STRIDE4-WATCH-DAG: vmov.f32 s8, {{s[01]}}
+
+; CHECK-GENERIC-LABEL: test_stride_minsize:
+; CHECK-GENERIC-DAG: vmov.f32 s4, {{s[01]}}
+; CHECK-GENERIC-DAG: vmov.f32 s6, {{s[01]}}
+ call void asm "", "~{r0},~{r1},~{s0},~{s1},~{d0},~{d1}"()
+ call arm_aapcs_vfpcc void @eat_floats(float %a, float %b)
+ ret void
+}
+
+
+declare arm_aapcs_vfpcc void @eat_doubles(double, double)
+declare arm_aapcs_vfpcc void @eat_floats(float, float)
diff --git a/test/CodeGen/ARM/vfp-regs-dwarf.ll b/test/CodeGen/ARM/vfp-regs-dwarf.ll
index eca0c26e5562..1b2055e5aff1 100644
--- a/test/CodeGen/ARM/vfp-regs-dwarf.ll
+++ b/test/CodeGen/ARM/vfp-regs-dwarf.ll
@@ -10,7 +10,7 @@
; the layout of the VFP registers correctly. The fact that the numbers are
; monotonic in memory is also a nice property to have.
-define void @stack_offsets() {
+define void @stack_offsets() !dbg !4 {
; CHECK-LABEL: stack_offsets:
; CHECK: vpush {d13}
; CHECK: vpush {d11}
@@ -31,11 +31,11 @@ define void @stack_offsets() {
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "tmp.c", directory: "/Users/tim/llvm/build")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @stack_offsets, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "tmp.c", directory: "/Users/tim/llvm/build")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
diff --git a/test/CodeGen/ARM/vld-vst-upgrade.ll b/test/CodeGen/ARM/vld-vst-upgrade.ll
new file mode 100644
index 000000000000..fe868f6cb078
--- /dev/null
+++ b/test/CodeGen/ARM/vld-vst-upgrade.ll
@@ -0,0 +1,139 @@
+; RUN: llc -mtriple=arm-eabi -mattr=+neon < %s | FileCheck %s
+
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+
+; vld[1234] auto-upgrade tests
+
+; CHECK-LABEL: test_vld1_upgrade:
+; CHECK: vld1.32 {d16}, [r0]
+define <2 x i32> @test_vld1_upgrade(i8* %ptr) {
+ %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %ptr, i32 1)
+ ret <2 x i32> %tmp1
+}
+
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly
+
+; CHECK-LABEL: test_vld2_upgrade:
+; CHECK: vld2.32 {d16, d17}, [r0]
+define %struct.__neon_int32x2x2_t @test_vld2_upgrade(i8* %ptr) {
+ %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %ptr, i32 1)
+ ret %struct.__neon_int32x2x2_t %tmp1
+}
+
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly
+
+; CHECK-LABEL: test_vld3_upgrade:
+; CHECK: vld3.32 {d16, d17, d18}, [r1]
+define %struct.__neon_int32x2x3_t @test_vld3_upgrade(i8* %ptr) {
+ %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %ptr, i32 1)
+ ret %struct.__neon_int32x2x3_t %tmp1
+}
+
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly
+
+; CHECK-LABEL: test_vld4_upgrade:
+; CHECK: vld4.32 {d16, d17, d18, d19}, [r1]
+define %struct.__neon_int32x2x4_t @test_vld4_upgrade(i8* %ptr) {
+ %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %ptr, i32 1)
+ ret %struct.__neon_int32x2x4_t %tmp1
+}
+
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
+
+; vld[234]lane auto-upgrade tests
+
+; CHECK-LABEL: test_vld2lane_upgrade:
+; CHECK: vld2.32 {d16[1], d17[1]}, [r0]
+define %struct.__neon_int32x2x2_t @test_vld2lane_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B) {
+ %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, i32 1, i32 1)
+ ret %struct.__neon_int32x2x2_t %tmp1
+}
+
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+; CHECK-LABEL: test_vld3lane_upgrade:
+; CHECK: vld3.32 {d16[1], d17[1], d18[1]}, [r1]
+define %struct.__neon_int32x2x3_t @test_vld3lane_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
+ %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32 1, i32 1)
+ ret %struct.__neon_int32x2x3_t %tmp1
+}
+
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+; CHECK-LABEL: test_vld4lane_upgrade:
+; CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r1]
+define %struct.__neon_int32x2x4_t @test_vld4lane_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) {
+ %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32 1, i32 1)
+ ret %struct.__neon_int32x2x4_t %tmp1
+}
+
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+; vst[1234] auto-upgrade tests
+
+; CHECK-LABEL: test_vst1_upgrade:
+; CHECK: vst1.32 {d16}, [r0]
+define void @test_vst1_upgrade(i8* %ptr, <2 x i32> %A) {
+ call void @llvm.arm.neon.vst1.v2i32(i8* %ptr, <2 x i32> %A, i32 1)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
+
+; CHECK-LABEL: test_vst2_upgrade:
+; CHECK: vst2.32 {d16, d17}, [r0]
+define void @test_vst2_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B) {
+ call void @llvm.arm.neon.vst2.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, i32 1)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
+
+; CHECK-LABEL: test_vst3_upgrade:
+; CHECK: vst3.32 {d16, d17, d18}, [r0]
+define void @test_vst3_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
+ call void @llvm.arm.neon.vst3.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32 1)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+
+; CHECK-LABEL: test_vst4_upgrade:
+; CHECK: vst4.32 {d16, d17, d18, d19}, [r0]
+define void @test_vst4_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) {
+ call void @llvm.arm.neon.vst4.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32 1)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+
+; vst[234]lane auto-upgrade tests
+
+; CHECK-LABEL: test_vst2lane_upgrade:
+; CHECK: vst2.32 {d16[1], d17[1]}, [r0]
+define void @test_vst2lane_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B) {
+ call void @llvm.arm.neon.vst2lane.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, i32 1, i32 1)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind
+
+; CHECK-LABEL: test_vst3lane_upgrade:
+; CHECK: vst3.32 {d16[1], d17[1], d18[1]}, [r0]
+define void @test_vst3lane_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
+ call void @llvm.arm.neon.vst3lane.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32 1, i32 1)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+
+; CHECK-LABEL: test_vst4lane_upgrade:
+; CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0]
+define void @test_vst4lane_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) {
+ call void @llvm.arm.neon.vst4lane.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32 1, i32 1)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index 8064ea4a320a..bdb384769741 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -7,7 +7,7 @@ define <8 x i8> @vld1i8(i8* %A) nounwind {
;CHECK-LABEL: vld1i8:
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vld1.8 {d16}, [r0:64]
- %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
+ %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %A, i32 16)
ret <8 x i8> %tmp1
}
@@ -15,7 +15,7 @@ define <4 x i16> @vld1i16(i16* %A) nounwind {
;CHECK-LABEL: vld1i16:
;CHECK: vld1.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+ %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* %tmp0, i32 1)
ret <4 x i16> %tmp1
}
@@ -25,7 +25,7 @@ define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
;CHECK: vld1.16 {d16}, [{{r[0-9]+}}]!
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+ %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* %tmp0, i32 1)
%tmp2 = getelementptr i16, i16* %A, i32 4
store i16* %tmp2, i16** %ptr
ret <4 x i16> %tmp1
@@ -35,7 +35,7 @@ define <2 x i32> @vld1i32(i32* %A) nounwind {
;CHECK-LABEL: vld1i32:
;CHECK: vld1.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+ %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* %tmp0, i32 1)
ret <2 x i32> %tmp1
}
@@ -45,7 +45,7 @@ define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
;CHECK: vld1.32 {d16}, [{{r[0-9]+}}], {{r[0-9]+}}
%A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+ %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* %tmp0, i32 1)
%tmp2 = getelementptr i32, i32* %A, i32 %inc
store i32* %tmp2, i32** %ptr
ret <2 x i32> %tmp1
@@ -55,7 +55,7 @@ define <2 x float> @vld1f(float* %A) nounwind {
;CHECK-LABEL: vld1f:
;CHECK: vld1.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0, i32 1)
+ %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8* %tmp0, i32 1)
ret <2 x float> %tmp1
}
@@ -63,7 +63,7 @@ define <1 x i64> @vld1i64(i64* %A) nounwind {
;CHECK-LABEL: vld1i64:
;CHECK: vld1.64
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1)
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %tmp0, i32 1)
ret <1 x i64> %tmp1
}
@@ -71,7 +71,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {
;CHECK-LABEL: vld1Qi8:
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vld1.8 {d16, d17}, [r0:64]
- %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+ %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %A, i32 8)
ret <16 x i8> %tmp1
}
@@ -80,7 +80,7 @@ define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
;CHECK-LABEL: vld1Qi8_update:
;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}:64]!
%A = load i8*, i8** %ptr
- %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+ %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %A, i32 8)
%tmp2 = getelementptr i8, i8* %A, i32 16
store i8* %tmp2, i8** %ptr
ret <16 x i8> %tmp1
@@ -91,7 +91,7 @@ define <8 x i16> @vld1Qi16(i16* %A) nounwind {
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vld1.16 {d16, d17}, [r0:128]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32)
+ %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %tmp0, i32 32)
ret <8 x i16> %tmp1
}
@@ -99,7 +99,7 @@ define <4 x i32> @vld1Qi32(i32* %A) nounwind {
;CHECK-LABEL: vld1Qi32:
;CHECK: vld1.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1)
+ %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* %tmp0, i32 1)
ret <4 x i32> %tmp1
}
@@ -107,7 +107,7 @@ define <4 x float> @vld1Qf(float* %A) nounwind {
;CHECK-LABEL: vld1Qf:
;CHECK: vld1.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0, i32 1)
+ %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %tmp0, i32 1)
ret <4 x float> %tmp1
}
@@ -115,7 +115,7 @@ define <2 x i64> @vld1Qi64(i64* %A) nounwind {
;CHECK-LABEL: vld1Qi64:
;CHECK: vld1.64
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1)
+ %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %tmp0, i32 1)
ret <2 x i64> %tmp1
}
@@ -123,28 +123,28 @@ define <2 x double> @vld1Qf64(double* %A) nounwind {
;CHECK-LABEL: vld1Qf64:
;CHECK: vld1.64
%tmp0 = bitcast double* %A to i8*
- %tmp1 = call <2 x double> @llvm.arm.neon.vld1.v2f64(i8* %tmp0, i32 1)
+ %tmp1 = call <2 x double> @llvm.arm.neon.vld1.v2f64.p0i8(i8* %tmp0, i32 1)
ret <2 x double> %tmp1
}
-declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
-declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly
-declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly
-declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly
-declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8*, i32) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8*, i32) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8*, i32) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8*, i32) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8*, i32) nounwind readonly
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
-declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
-declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
-declare <2 x double> @llvm.arm.neon.vld1.v2f64(i8*, i32) nounwind readonly
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8*, i32) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8*, i32) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8*, i32) nounwind readonly
+declare <2 x double> @llvm.arm.neon.vld1.v2f64.p0i8(i8*, i32) nounwind readonly
; Radar 8355607
; Do not crash if the vld1 result is not used.
define void @unused_vld1_result() {
entry:
- %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1)
+ %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1)
call void @llvm.trap()
unreachable
}
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 391b49152cd9..1ca16587bd91 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -15,7 +15,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
;CHECK-LABEL: vld2i8:
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vld2.8 {d16, d17}, [r0:64]
- %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
+ %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8.p0i8(i8* %A, i32 8)
%tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
%tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -27,7 +27,7 @@ define <4 x i16> @vld2i16(i16* %A) nounwind {
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vld2.16 {d16, d17}, [r0:128]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32)
+ %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16.p0i8(i8* %tmp0, i32 32)
%tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
%tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -38,7 +38,7 @@ define <2 x i32> @vld2i32(i32* %A) nounwind {
;CHECK-LABEL: vld2i32:
;CHECK: vld2.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
%tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -49,7 +49,7 @@ define <2 x float> @vld2f(float* %A) nounwind {
;CHECK-LABEL: vld2f:
;CHECK: vld2.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
%tmp4 = fadd <2 x float> %tmp2, %tmp3
@@ -62,7 +62,7 @@ define <2 x float> @vld2f_update(float** %ptr) nounwind {
;CHECK: vld2.32 {d16, d17}, [r1]!
%A = load float*, float** %ptr
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
%tmp4 = fadd <2 x float> %tmp2, %tmp3
@@ -76,7 +76,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind {
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vld1.64 {d16, d17}, [r0:128]
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 32)
+ %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64.p0i8(i8* %tmp0, i32 32)
%tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
%tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -87,7 +87,7 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind {
;CHECK-LABEL: vld2Qi8:
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld2.8 {d16, d17, d18, d19}, [r0:64]
- %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
+ %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8.p0i8(i8* %A, i32 8)
%tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
%tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -99,7 +99,7 @@ define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld2Qi8_update:
;CHECK: vld2.8 {d16, d17, d18, d19}, [r2:128], r1
%A = load i8*, i8** %ptr
- %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
+ %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8.p0i8(i8* %A, i32 16)
%tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
%tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -113,7 +113,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld2.16 {d16, d17, d18, d19}, [r0:128]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16)
+ %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16.p0i8(i8* %tmp0, i32 16)
%tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
%tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -125,7 +125,7 @@ define <4 x i32> @vld2Qi32(i32* %A) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld2.32 {d16, d17, d18, d19}, [r0:256]
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64)
+ %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8* %tmp0, i32 64)
%tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
%tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -136,20 +136,20 @@ define <4 x float> @vld2Qf(float* %A) nounwind {
;CHECK-LABEL: vld2Qf:
;CHECK: vld2.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1
%tmp4 = fadd <4 x float> %tmp2, %tmp3
ret <4 x float> %tmp4
}
-declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly
-declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly
-declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64.p0i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
-declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32.p0i8(i8*, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index 0d14179ba73a..c3e8ee8691fd 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -16,7 +16,7 @@ define <8 x i8> @vld3i8(i8* %A) nounwind {
;CHECK-LABEL: vld3i8:
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vld3.8 {d16, d17, d18}, [r0:64]
- %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32)
+ %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A, i32 32)
%tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
%tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -27,7 +27,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
;CHECK-LABEL: vld3i16:
;CHECK: vld3.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
%tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -40,7 +40,7 @@ define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}}
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
%tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -53,7 +53,7 @@ define <2 x i32> @vld3i32(i32* %A) nounwind {
;CHECK-LABEL: vld3i32:
;CHECK: vld3.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2
%tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -64,7 +64,7 @@ define <2 x float> @vld3f(float* %A) nounwind {
;CHECK-LABEL: vld3f:
;CHECK: vld3.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2
%tmp4 = fadd <2 x float> %tmp2, %tmp3
@@ -76,7 +76,7 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vld1.64 {d16, d17, d18}, [r0:64]
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
+ %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16)
%tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
%tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -87,7 +87,7 @@ define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {
;CHECK-LABEL: vld3i64_update:
;CHECK: vld1.64 {d16, d17, d18}, [r1:64]!
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
+ %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16)
%tmp5 = getelementptr i64, i64* %A, i32 3
store i64* %tmp5, i64** %ptr
%tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
@@ -101,7 +101,7 @@ define <16 x i8> @vld3Qi8(i8* %A) nounwind {
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vld3.8 {d16, d18, d20}, [r0:64]!
;CHECK: vld3.8 {d17, d19, d21}, [r0:64]
- %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32)
+ %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8.p0i8(i8* %A, i32 32)
%tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
%tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -113,7 +113,7 @@ define <8 x i16> @vld3Qi16(i16* %A) nounwind {
;CHECK: vld3.16
;CHECK: vld3.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2
%tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -125,7 +125,7 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind {
;CHECK: vld3.32
;CHECK: vld3.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
%tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -139,7 +139,7 @@ define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]!
%A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
%tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -153,20 +153,20 @@ define <4 x float> @vld3Qf(float* %A) nounwind {
;CHECK: vld3.32
;CHECK: vld3.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2
%tmp4 = fadd <4 x float> %tmp2, %tmp3
ret <4 x float> %tmp4
}
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly
-declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*, i32) nounwind readonly
-declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*, i32) nounwind readonly
-declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32.p0i8(i8*, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index 575e0fa717fb..10570039a9d2 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -15,7 +15,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
;CHECK-LABEL: vld4i8:
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld4.8 {d16, d17, d18, d19}, [r0:64]
- %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8)
+ %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 8)
%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
%tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -27,7 +27,7 @@ define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld4i8_update:
;CHECK: vld4.8 {d16, d17, d18, d19}, [r2:128], r1
%A = load i8*, i8** %ptr
- %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
+ %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 16)
%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
%tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -41,7 +41,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld4.16 {d16, d17, d18, d19}, [r0:128]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16)
+ %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16.p0i8(i8* %tmp0, i32 16)
%tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
%tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -53,7 +53,7 @@ define <2 x i32> @vld4i32(i32* %A) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld4.32 {d16, d17, d18, d19}, [r0:256]
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32)
+ %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8* %tmp0, i32 32)
%tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
%tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -64,7 +64,7 @@ define <2 x float> @vld4f(float* %A) nounwind {
;CHECK-LABEL: vld4f:
;CHECK: vld4.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2
%tmp4 = fadd <2 x float> %tmp2, %tmp3
@@ -76,7 +76,7 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld1.64 {d16, d17, d18, d19}, [r0:256]
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
+ %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8* %tmp0, i32 64)
%tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
%tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -87,7 +87,7 @@ define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {
;CHECK-LABEL: vld4i64_update:
;CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
+ %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8* %tmp0, i32 64)
%tmp5 = getelementptr i64, i64* %A, i32 4
store i64* %tmp5, i64** %ptr
%tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
@@ -101,7 +101,7 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld4.8 {d16, d18, d20, d22}, [r0:256]!
;CHECK: vld4.8 {d17, d19, d21, d23}, [r0:256]
- %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64)
+ %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8.p0i8(i8* %A, i32 64)
%tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
%tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -114,7 +114,7 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind {
;CHECK: vld4.16 {d16, d18, d20, d22}, [r0]!
;CHECK: vld4.16 {d17, d19, d21, d23}, [r0]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
%tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -128,7 +128,7 @@ define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
;CHECK: vld4.16 {d17, d19, d21, d23}, [r1:64]!
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
+ %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16.p0i8(i8* %tmp0, i32 8)
%tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
%tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -142,7 +142,7 @@ define <4 x i32> @vld4Qi32(i32* %A) nounwind {
;CHECK: vld4.32
;CHECK: vld4.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2
%tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -154,20 +154,20 @@ define <4 x float> @vld4Qf(float* %A) nounwind {
;CHECK: vld4.32
;CHECK: vld4.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8* %tmp0, i32 1)
+ %tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2
%tmp4 = fadd <4 x float> %tmp2, %tmp3
ret <4 x float> %tmp4
}
-declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
-declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*, i32) nounwind readonly
-declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*, i32) nounwind readonly
-declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32.p0i8(i8*, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index 09304d87d53b..c115a3863d0d 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -66,7 +66,7 @@ define <8 x i8> @vld2dupi8(i8* %A) nounwind {
;CHECK-LABEL: vld2dupi8:
;Check the (default) alignment value.
;CHECK: vld2.8 {d16[], d17[]}, [r0]
- %tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+ %tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
%tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 1
@@ -80,7 +80,7 @@ define <4 x i16> @vld2dupi16(i8* %A) nounwind {
;Check that a power-of-two alignment smaller than the total size of the memory
;being loaded is ignored.
;CHECK: vld2.16 {d16[], d17[]}, [r0]
- %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+ %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
@@ -95,7 +95,7 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
;CHECK: vld2.16 {d16[], d17[]}, [r1]!
%A = load i16*, i16** %ptr
%A2 = bitcast i16* %A to i8*
- %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+ %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
@@ -110,7 +110,7 @@ define <2 x i32> @vld2dupi32(i8* %A) nounwind {
;CHECK-LABEL: vld2dupi32:
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vld2.32 {d16[], d17[]}, [r0:64]
- %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
+ %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1
@@ -119,9 +119,9 @@ define <2 x i32> @vld2dupi32(i8* %A) nounwind {
ret <2 x i32> %tmp5
}
-declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
@@ -131,7 +131,7 @@ define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld3dupi8_update:
;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1
%A = load i8*, i8** %ptr
- %tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
+ %tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 1
@@ -149,7 +149,7 @@ define <4 x i16> @vld3dupi16(i8* %A) nounwind {
;CHECK-LABEL: vld3dupi16:
;Check the (default) alignment value. VLD3 does not support alignment.
;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0]
- %tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
+ %tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1
@@ -161,8 +161,8 @@ define <4 x i16> @vld3dupi16(i8* %A) nounwind {
ret <4 x i16> %tmp8
}
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
@@ -173,7 +173,7 @@ define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
%A = load i16*, i16** %ptr
%A2 = bitcast i16* %A to i8*
- %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
+ %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
%tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1
@@ -195,7 +195,7 @@ define <2 x i32> @vld4dupi32(i8* %A) nounwind {
;Check the alignment value. An 8-byte alignment is allowed here even though
;it is smaller than the total size of the memory being loaded.
;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0:64]
- %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
+ %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1
@@ -210,5 +210,5 @@ define <2 x i32> @vld4dupi32(i8* %A) nounwind {
ret <2 x i32> %tmp11
}
-declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index ac2be7f87f53..2c14bc2d8f4e 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -102,7 +102,7 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
;Check the alignment value. Max for this instruction is 16 bits:
;CHECK: vld2.8 {d16[1], d17[1]}, [r0:16]
%tmp1 = load <8 x i8>, <8 x i8>* %B
- %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
+ %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -115,7 +115,7 @@ define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vld2.16 {d16[1], d17[1]}, [r0:32]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>, <4 x i16>* %B
- %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+ %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
%tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -127,7 +127,7 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vld2.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>, <2 x i32>* %B
- %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+ %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
%tmp5 = add <2 x i32> %tmp3, %tmp4
@@ -141,7 +141,7 @@ define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
%A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>, <2 x i32>* %B
- %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+ %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
%tmp5 = add <2 x i32> %tmp3, %tmp4
@@ -155,7 +155,7 @@ define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vld2.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>, <2 x float>* %B
- %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+ %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
%tmp5 = fadd <2 x float> %tmp3, %tmp4
@@ -168,7 +168,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vld2.16 {d17[1], d19[1]}, [{{r[0-9]+}}]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
- %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
+ %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
%tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -181,7 +181,7 @@ define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}:64]
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>, <4 x i32>* %B
- %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
+ %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
%tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -193,21 +193,21 @@ define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vld2.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>, <4 x float>* %B
- %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+ %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
%tmp5 = fadd <4 x float> %tmp3, %tmp4
ret <4 x float> %tmp5
}
-declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
-declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32.p0i8(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
-declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32.p0i8(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32.p0i8(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
@@ -222,7 +222,7 @@ define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vld3lanei8:
;CHECK: vld3.8
%tmp1 = load <8 x i8>, <8 x i8>* %B
- %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+ %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
@@ -237,7 +237,7 @@ define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>, <4 x i16>* %B
- %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+ %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
@@ -251,7 +251,7 @@ define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vld3.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>, <2 x i32>* %B
- %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+ %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
@@ -265,7 +265,7 @@ define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vld3.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>, <2 x float>* %B
- %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+ %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
@@ -280,7 +280,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
- %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
+ %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
@@ -296,7 +296,7 @@ define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounw
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
- %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
+ %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
@@ -312,7 +312,7 @@ define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vld3.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>, <4 x i32>* %B
- %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
+ %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
@@ -326,7 +326,7 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vld3.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>, <4 x float>* %B
- %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+ %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
@@ -335,14 +335,14 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
ret <4 x float> %tmp7
}
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
-declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32.p0i8(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
-declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32.p0i8(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32.p0i8(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
@@ -358,7 +358,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
;Check the alignment value. Max for this instruction is 32 bits:
;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}:32]
%tmp1 = load <8 x i8>, <8 x i8>* %B
- %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+ %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
@@ -375,7 +375,7 @@ define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:32]!
%A = load i8*, i8** %ptr
%tmp1 = load <8 x i8>, <8 x i8>* %B
- %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+ %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
@@ -395,7 +395,7 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>, <4 x i16>* %B
- %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
+ %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
@@ -413,7 +413,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:64]
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>, <2 x i32>* %B
- %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
+ %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
@@ -429,7 +429,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vld4.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>, <2 x float>* %B
- %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+ %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
@@ -446,7 +446,7 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}:64]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
- %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
+ %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
%tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
@@ -463,7 +463,7 @@ define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [{{r[0-9]+}}]
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>, <4 x i32>* %B
- %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+ %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
@@ -479,7 +479,7 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vld4.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>, <4 x float>* %B
- %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+ %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
@@ -490,14 +490,14 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
ret <4 x float> %tmp9
}
-declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
-declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32.p0i8(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
-declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32.p0i8(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32.p0i8(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
; Radar 8776599: If one of the operands to a QQQQ REG_SEQUENCE is a register
; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because
@@ -511,7 +511,7 @@ define <8 x i16> @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
%tmp65 = shl i128 %tmp64, 64
%ins67 = or i128 %tmp65, 0
%tmp78 = bitcast i128 %ins67 to <8 x i16>
- %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
+ %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
%tmp3 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 0
%tmp4 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 1
%tmp5 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 2
diff --git a/test/CodeGen/ARM/vminmaxnm-safe.ll b/test/CodeGen/ARM/vminmaxnm-safe.ll
new file mode 100644
index 000000000000..ce1aab2dbcec
--- /dev/null
+++ b/test/CodeGen/ARM/vminmaxnm-safe.ll
@@ -0,0 +1,396 @@
+; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 | FileCheck %s
+
+; vectors
+
+define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
+; CHECK-LABEL: vmaxnmq:
+; CHECK: vmaxnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
+ %tmp3 = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x float> %tmp3
+}
+
+define <2 x float> @vmaxnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
+; CHECK-LABEL: vmaxnmd:
+; CHECK: vmaxnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+define <4 x float> @vminnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
+; CHECK-LABEL: vminnmq:
+; CHECK: vminnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
+ %tmp3 = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+ ret <4 x float> %tmp3
+}
+
+define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
+; CHECK-LABEL: vminnmd:
+; CHECK: vminnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
+ %tmp3 = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+ ret <2 x float> %tmp3
+}
+
+; scalars
+
+define float @fp-armv8_vminnm_o(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_o":
+; CHECK-NOT: vminnm.f32
+ %cmp = fcmp olt float %a, %b
+ %cond = select i1 %cmp, float %a, float %b
+ ret float %cond
+}
+
+define double @fp-armv8_vminnm_ole(double %a, double %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_ole":
+; CHECK-NOT: vminnm.f64
+ %cmp = fcmp ole double %a, %b
+ %cond = select i1 %cmp, double %a, double %b
+ ret double %cond
+}
+
+define float @fp-armv8_vminnm_o_rev(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_o_rev":
+; CHECK-NOT: vminnm.f32
+ %cmp = fcmp ogt float %a, %b
+ %cond = select i1 %cmp, float %b, float %a
+ ret float %cond
+}
+
+define double @fp-armv8_vminnm_oge_rev(double %a, double %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_oge_rev":
+; CHECK-NOT: vminnm.f64
+ %cmp = fcmp oge double %a, %b
+ %cond = select i1 %cmp, double %b, double %a
+ ret double %cond
+}
+
+define float @fp-armv8_vminnm_u(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_u":
+; CHECK-NOT: vminnm.f32
+ %cmp = fcmp ult float %a, %b
+ %cond = select i1 %cmp, float %a, float %b
+ ret float %cond
+}
+
+define float @fp-armv8_vminnm_ule(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_ule":
+; CHECK-NOT: vminnm.f32
+ %cmp = fcmp ule float %a, %b
+ %cond = select i1 %cmp, float %a, float %b
+ ret float %cond
+}
+
+define float @fp-armv8_vminnm_u_rev(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_u_rev":
+; CHECK-NOT: vminnm.f32
+ %cmp = fcmp ugt float %a, %b
+ %cond = select i1 %cmp, float %b, float %a
+ ret float %cond
+}
+
+define double @fp-armv8_vminnm_uge_rev(double %a, double %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_uge_rev":
+; CHECK-NOT: vminnm.f64
+ %cmp = fcmp uge double %a, %b
+ %cond = select i1 %cmp, double %b, double %a
+ ret double %cond
+}
+
+define float @fp-armv8_vmaxnm_o(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_o":
+; CHECK-NOT: vmaxnm.f32
+ %cmp = fcmp ogt float %a, %b
+ %cond = select i1 %cmp, float %a, float %b
+ ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_oge(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_oge":
+; CHECK-NOT: vmaxnm.f32
+ %cmp = fcmp oge float %a, %b
+ %cond = select i1 %cmp, float %a, float %b
+ ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_o_rev":
+; CHECK-NOT: vmaxnm.f32
+ %cmp = fcmp olt float %a, %b
+ %cond = select i1 %cmp, float %b, float %a
+ ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_ole_rev(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_ole_rev":
+; CHECK-NOT: vmaxnm.f32
+ %cmp = fcmp ole float %a, %b
+ %cond = select i1 %cmp, float %b, float %a
+ ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_u(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_u":
+; CHECK-NOT: vmaxnm.f32
+ %cmp = fcmp ugt float %a, %b
+ %cond = select i1 %cmp, float %a, float %b
+ ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_uge(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_uge":
+; CHECK-NOT: vmaxnm.f32
+ %cmp = fcmp uge float %a, %b
+ %cond = select i1 %cmp, float %a, float %b
+ ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_u_rev":
+; CHECK-NOT: vmaxnm.f32
+ %cmp = fcmp ult float %a, %b
+ %cond = select i1 %cmp, float %b, float %a
+ ret float %cond
+}
+
+define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_ule_rev":
+; CHECK-NOT: vmaxnm.f64
+ %cmp = fcmp ule double %a, %b
+ %cond = select i1 %cmp, double %b, double %a
+ ret double %cond
+}
+
+; known non-NaNs
+
+define float @fp-armv8_vminnm_NNNo(float %a) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNo":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp olt float %a, 12.
+ %cond1 = select i1 %cmp1, float %a, float 12.
+ %cmp2 = fcmp olt float 34., %cond1
+ %cond2 = select i1 %cmp2, float 34., float %cond1
+ ret float %cond2
+}
+
+define double @fp-armv8_vminnm_NNNole(double %a) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNole":
+; CHECK: vminnm.f64
+; CHECK-NOT: vminnm.f64
+ %cmp1 = fcmp ole double %a, 34.
+ %cond1 = select i1 %cmp1, double %a, double 34.
+ %cmp2 = fcmp ole double 56., %cond1
+ %cond2 = select i1 %cmp2, double 56., double %cond1
+ ret double %cond2
+}
+
+define float @fp-armv8_vminnm_NNNo_rev(float %a) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNo_rev":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp ogt float %a, 56.
+ %cond1 = select i1 %cmp1, float 56., float %a
+ %cmp2 = fcmp ogt float 78., %cond1
+ %cond2 = select i1 %cmp2, float %cond1, float 78.
+ ret float %cond2
+}
+
+define double @fp-armv8_vminnm_NNNoge_rev(double %a) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNoge_rev":
+; CHECK: vminnm.f64
+; CHECK-NOT: vminnm.f64
+ %cmp1 = fcmp oge double %a, 78.
+ %cond1 = select i1 %cmp1, double 78., double %a
+ %cmp2 = fcmp oge double 90., %cond1
+ %cond2 = select i1 %cmp2, double %cond1, double 90.
+ ret double %cond2
+}
+
+define float @fp-armv8_vminnm_NNNu(float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNu":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp ult float 12., %b
+ %cond1 = select i1 %cmp1, float 12., float %b
+ %cmp2 = fcmp ult float %cond1, 34.
+ %cond2 = select i1 %cmp2, float %cond1, float 34.
+ ret float %cond2
+}
+
+define float @fp-armv8_vminnm_NNNule(float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNule":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp ule float 34., %b
+ %cond1 = select i1 %cmp1, float 34., float %b
+ %cmp2 = fcmp ule float %cond1, 56.
+ %cond2 = select i1 %cmp2, float %cond1, float 56.
+ ret float %cond2
+}
+
+define float @fp-armv8_vminnm_NNNu_rev(float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNu_rev":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp ugt float 56., %b
+ %cond1 = select i1 %cmp1, float %b, float 56.
+ %cmp2 = fcmp ugt float %cond1, 78.
+ %cond2 = select i1 %cmp2, float 78., float %cond1
+ ret float %cond2
+}
+
+define double @fp-armv8_vminnm_NNNuge_rev(double %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNuge_rev":
+; CHECK: vminnm.f64
+; CHECK-NOT: vminnm.f64
+ %cmp1 = fcmp uge double 78., %b
+ %cond1 = select i1 %cmp1, double %b, double 78.
+ %cmp2 = fcmp uge double %cond1, 90.
+ %cond2 = select i1 %cmp2, double 90., double %cond1
+ ret double %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNo(float %a) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp ogt float %a, 12.
+ %cond1 = select i1 %cmp1, float %a, float 12.
+ %cmp2 = fcmp ogt float 34., %cond1
+ %cond2 = select i1 %cmp2, float 34., float %cond1
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNoge(float %a) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNoge":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp oge float %a, 34.
+ %cond1 = select i1 %cmp1, float %a, float 34.
+ %cmp2 = fcmp oge float 56., %cond1
+ %cond2 = select i1 %cmp2, float 56., float %cond1
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNo_rev(float %a) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo_rev":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp olt float %a, 56.
+ %cond1 = select i1 %cmp1, float 56., float %a
+ %cmp2 = fcmp olt float 78., %cond1
+ %cond2 = select i1 %cmp2, float %cond1, float 78.
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNole_rev(float %a) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNole_rev":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp ole float %a, 78.
+ %cond1 = select i1 %cmp1, float 78., float %a
+ %cmp2 = fcmp ole float 90., %cond1
+ %cond2 = select i1 %cmp2, float %cond1, float 90.
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNu(float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu":
+; CHECK: vmaxnm.f32
+; CHEC-NOT: vmaxnm.f32
+ %cmp1 = fcmp ugt float 12., %b
+ %cond1 = select i1 %cmp1, float 12., float %b
+ %cmp2 = fcmp ugt float %cond1, 34.
+ %cond2 = select i1 %cmp2, float %cond1, float 34.
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNuge(float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNuge":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp uge float 34., %b
+ %cond1 = select i1 %cmp1, float 34., float %b
+ %cmp2 = fcmp uge float %cond1, 56.
+ %cond2 = select i1 %cmp2, float %cond1, float 56.
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNu_rev(float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu_rev":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp ult float 56., %b
+ %cond1 = select i1 %cmp1, float %b, float 56.
+ %cmp2 = fcmp ult float %cond1, 78.
+ %cond2 = select i1 %cmp2, float 78., float %cond1
+ ret float %cond2
+}
+
+define double @fp-armv8_vmaxnm_NNNule_rev( double %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNule_rev":
+; CHECK: vmaxnm.f64
+; CHECK-NOT: vmaxnm.f64
+ %cmp1 = fcmp ule double 78., %b
+ %cond1 = select i1 %cmp1, double %b, double 78.
+ %cmp2 = fcmp ule double %cond1, 90.
+ %cond2 = select i1 %cmp2, double 90., double %cond1
+ ret double %cond2
+}
+
+define float @fp-armv8_vminmaxnm_0(float %a) {
+; CHECK-LABEL: "fp-armv8_vminmaxnm_0":
+; CHECK-NOT: vminnm.f32
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp ult float %a, 0.
+ %cond1 = select i1 %cmp1, float %a, float 0.
+ %cmp2 = fcmp ogt float %cond1, 0.
+ %cond2 = select i1 %cmp2, float %cond1, float 0.
+ ret float %cond2
+}
+
+define float @fp-armv8_vminmaxnm_neg0(float %a) {
+; CHECK-LABEL: "fp-armv8_vminmaxnm_neg0":
+; CHECK: vminnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp olt float %a, -0.
+ %cond1 = select i1 %cmp1, float %a, float -0.
+ %cmp2 = fcmp ugt float %cond1, -0.
+ %cond2 = select i1 %cmp2, float %cond1, float -0.
+ ret float %cond2
+}
+
+define float @fp-armv8_vminmaxnm_e_0(float %a) {
+; CHECK-LABEL: "fp-armv8_vminmaxnm_e_0":
+; CHECK-NOT: vminnm.f32
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp nsz ole float 0., %a
+ %cond1 = select i1 %cmp1, float 0., float %a
+ %cmp2 = fcmp nsz uge float 0., %cond1
+ %cond2 = select i1 %cmp2, float 0., float %cond1
+ ret float %cond2
+}
+
+define float @fp-armv8_vminmaxnm_e_neg0(float %a) {
+; CHECK-LABEL: "fp-armv8_vminmaxnm_e_neg0":
+; CHECK: vminnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp nsz ule float -0., %a
+ %cond1 = select i1 %cmp1, float -0., float %a
+ %cmp2 = fcmp nsz oge float -0., %cond1
+ %cond2 = select i1 %cmp2, float -0., float %cond1
+ ret float %cond2
+}
+
+declare <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vminmaxnm.ll b/test/CodeGen/ARM/vminmaxnm.ll
index 3632ffd00213..a6803fc78d8c 100644
--- a/test/CodeGen/ARM/vminmaxnm.ll
+++ b/test/CodeGen/ARM/vminmaxnm.ll
@@ -1,219 +1,147 @@
-; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 | FileCheck %s
-; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 \
-; RUN: -enable-no-nans-fp-math -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST
-
-; vectors
-
-define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
-; CHECK-LABEL: vmaxnmq:
-; CHECK: vmaxnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
- %tmp1 = load <4 x float>, <4 x float>* %A
- %tmp2 = load <4 x float>, <4 x float>* %B
- %tmp3 = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
- ret <4 x float> %tmp3
-}
-
-define <2 x float> @vmaxnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
-; CHECK-LABEL: vmaxnmd:
-; CHECK: vmaxnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
- %tmp1 = load <2 x float>, <2 x float>* %A
- %tmp2 = load <2 x float>, <2 x float>* %B
- %tmp3 = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
- ret <2 x float> %tmp3
-}
-
-define <4 x float> @vminnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
-; CHECK-LABEL: vminnmq:
-; CHECK: vminnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
- %tmp1 = load <4 x float>, <4 x float>* %A
- %tmp2 = load <4 x float>, <4 x float>* %B
- %tmp3 = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
- ret <4 x float> %tmp3
-}
-
-define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
-; CHECK-LABEL: vminnmd:
-; CHECK: vminnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
- %tmp1 = load <2 x float>, <2 x float>* %A
- %tmp2 = load <2 x float>, <2 x float>* %B
- %tmp3 = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
- ret <2 x float> %tmp3
-}
+; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s
; scalars
define float @fp-armv8_vminnm_o(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_o":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
; CHECK-LABEL: "fp-armv8_vminnm_o":
-; CHECK-NOT: vminnm.f32
- %cmp = fcmp olt float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
+ %cmp = fcmp fast olt float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
define double @fp-armv8_vminnm_ole(double %a, double %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_ole":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f64
; CHECK-LABEL: "fp-armv8_vminnm_ole":
-; CHECK-NOT: vminnm.f64
- %cmp = fcmp ole double %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f64
+ %cmp = fcmp fast ole double %a, %b
%cond = select i1 %cmp, double %a, double %b
ret double %cond
}
define float @fp-armv8_vminnm_o_rev(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_o_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
; CHECK-LABEL: "fp-armv8_vminnm_o_rev":
-; CHECK-NOT: vminnm.f32
- %cmp = fcmp ogt float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
+ %cmp = fcmp fast ogt float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
define double @fp-armv8_vminnm_oge_rev(double %a, double %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_oge_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f64
; CHECK-LABEL: "fp-armv8_vminnm_oge_rev":
-; CHECK-NOT: vminnm.f64
- %cmp = fcmp oge double %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f64
+ %cmp = fcmp fast oge double %a, %b
%cond = select i1 %cmp, double %b, double %a
ret double %cond
}
define float @fp-armv8_vminnm_u(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_u":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
; CHECK-LABEL: "fp-armv8_vminnm_u":
-; CHECK-NOT: vminnm.f32
- %cmp = fcmp ult float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
+ %cmp = fcmp fast ult float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
define float @fp-armv8_vminnm_ule(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_ule":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
; CHECK-LABEL: "fp-armv8_vminnm_ule":
-; CHECK-NOT: vminnm.f32
- %cmp = fcmp ule float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
+ %cmp = fcmp fast ule float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
define float @fp-armv8_vminnm_u_rev(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_u_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
; CHECK-LABEL: "fp-armv8_vminnm_u_rev":
-; CHECK-NOT: vminnm.f32
- %cmp = fcmp ugt float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
+ %cmp = fcmp fast ugt float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
define double @fp-armv8_vminnm_uge_rev(double %a, double %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_uge_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f64
; CHECK-LABEL: "fp-armv8_vminnm_uge_rev":
-; CHECK-NOT: vminnm.f64
- %cmp = fcmp uge double %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f64
+ %cmp = fcmp fast uge double %a, %b
%cond = select i1 %cmp, double %b, double %a
ret double %cond
}
define float @fp-armv8_vmaxnm_o(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_o":
-; CHECK-NOT: vmaxnm.f32
- %cmp = fcmp ogt float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+ %cmp = fcmp fast ogt float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
define float @fp-armv8_vmaxnm_oge(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_oge":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_oge":
-; CHECK-NOT: vmaxnm.f32
- %cmp = fcmp oge float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+ %cmp = fcmp fast oge float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_o_rev":
-; CHECK-NOT: vmaxnm.f32
- %cmp = fcmp olt float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+ %cmp = fcmp fast olt float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
define float @fp-armv8_vmaxnm_ole_rev(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_ole_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_ole_rev":
-; CHECK-NOT: vmaxnm.f32
- %cmp = fcmp ole float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+ %cmp = fcmp fast ole float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
define float @fp-armv8_vmaxnm_u(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_u":
-; CHECK-NOT: vmaxnm.f32
- %cmp = fcmp ugt float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+ %cmp = fcmp fast ugt float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
define float @fp-armv8_vmaxnm_uge(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_uge":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_uge":
-; CHECK-NOT: vmaxnm.f32
- %cmp = fcmp uge float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+ %cmp = fcmp fast uge float %a, %b
%cond = select i1 %cmp, float %a, float %b
ret float %cond
}
define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_u_rev":
-; CHECK-NOT: vmaxnm.f32
- %cmp = fcmp ult float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+ %cmp = fcmp fast ult float %a, %b
%cond = select i1 %cmp, float %b, float %a
ret float %cond
}
define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_ule_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f64
; CHECK-LABEL: "fp-armv8_vmaxnm_ule_rev":
-; CHECK-NOT: vmaxnm.f64
- %cmp = fcmp ule double %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f64
+ %cmp = fcmp fast ule double %a, %b
%cond = select i1 %cmp, double %b, double %a
ret double %cond
}
@@ -221,285 +149,225 @@ define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) {
; known non-NaNs
define float @fp-armv8_vminnm_NNNo(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNo":
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vminnm.f32
; CHECK-LABEL: "fp-armv8_vminnm_NNNo":
; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
- %cmp1 = fcmp olt float %a, 12.
+; CHECK: vminnm.f32
+ %cmp1 = fcmp fast olt float %a, 12.
%cond1 = select i1 %cmp1, float %a, float 12.
- %cmp2 = fcmp olt float 34., %cond1
+ %cmp2 = fcmp fast olt float 34., %cond1
%cond2 = select i1 %cmp2, float 34., float %cond1
ret float %cond2
}
define double @fp-armv8_vminnm_NNNole(double %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNole":
-; CHECK-FAST: vminnm.f64
-; CHECK-FAST: vminnm.f64
; CHECK-LABEL: "fp-armv8_vminnm_NNNole":
; CHECK: vminnm.f64
-; CHECK-NOT: vminnm.f64
- %cmp1 = fcmp ole double %a, 34.
+; CHECK: vminnm.f64
+ %cmp1 = fcmp fast ole double %a, 34.
%cond1 = select i1 %cmp1, double %a, double 34.
- %cmp2 = fcmp ole double 56., %cond1
+ %cmp2 = fcmp fast ole double 56., %cond1
%cond2 = select i1 %cmp2, double 56., double %cond1
ret double %cond2
}
define float @fp-armv8_vminnm_NNNo_rev(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNo_rev":
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vminnm.f32
; CHECK-LABEL: "fp-armv8_vminnm_NNNo_rev":
; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
- %cmp1 = fcmp ogt float %a, 56.
+; CHECK: vminnm.f32
+ %cmp1 = fcmp fast ogt float %a, 56.
%cond1 = select i1 %cmp1, float 56., float %a
- %cmp2 = fcmp ogt float 78., %cond1
+ %cmp2 = fcmp fast ogt float 78., %cond1
%cond2 = select i1 %cmp2, float %cond1, float 78.
ret float %cond2
}
define double @fp-armv8_vminnm_NNNoge_rev(double %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNoge_rev":
-; CHECK-FAST: vminnm.f64
-; CHECK-FAST: vminnm.f64
; CHECK-LABEL: "fp-armv8_vminnm_NNNoge_rev":
; CHECK: vminnm.f64
-; CHECK-NOT: vminnm.f64
- %cmp1 = fcmp oge double %a, 78.
+; CHECK: vminnm.f64
+ %cmp1 = fcmp fast oge double %a, 78.
%cond1 = select i1 %cmp1, double 78., double %a
- %cmp2 = fcmp oge double 90., %cond1
+ %cmp2 = fcmp fast oge double 90., %cond1
%cond2 = select i1 %cmp2, double %cond1, double 90.
ret double %cond2
}
define float @fp-armv8_vminnm_NNNu(float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNu":
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vminnm.f32
; CHECK-LABEL: "fp-armv8_vminnm_NNNu":
; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
- %cmp1 = fcmp ult float 12., %b
+; CHECK: vminnm.f32
+ %cmp1 = fcmp fast ult float 12., %b
%cond1 = select i1 %cmp1, float 12., float %b
- %cmp2 = fcmp ult float %cond1, 34.
+ %cmp2 = fcmp fast ult float %cond1, 34.
%cond2 = select i1 %cmp2, float %cond1, float 34.
ret float %cond2
}
define float @fp-armv8_vminnm_NNNule(float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNule":
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vminnm.f32
; CHECK-LABEL: "fp-armv8_vminnm_NNNule":
; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
- %cmp1 = fcmp ule float 34., %b
+; CHECK: vminnm.f32
+ %cmp1 = fcmp fast ule float 34., %b
%cond1 = select i1 %cmp1, float 34., float %b
- %cmp2 = fcmp ule float %cond1, 56.
+ %cmp2 = fcmp fast ule float %cond1, 56.
%cond2 = select i1 %cmp2, float %cond1, float 56.
ret float %cond2
}
define float @fp-armv8_vminnm_NNNu_rev(float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNu_rev":
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vminnm.f32
; CHECK-LABEL: "fp-armv8_vminnm_NNNu_rev":
; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
- %cmp1 = fcmp ugt float 56., %b
+; CHECK: vminnm.f32
+ %cmp1 = fcmp fast ugt float 56., %b
%cond1 = select i1 %cmp1, float %b, float 56.
- %cmp2 = fcmp ugt float %cond1, 78.
+ %cmp2 = fcmp fast ugt float %cond1, 78.
%cond2 = select i1 %cmp2, float 78., float %cond1
ret float %cond2
}
define double @fp-armv8_vminnm_NNNuge_rev(double %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNuge_rev":
-; CHECK-FAST: vminnm.f64
-; CHECK-FAST: vminnm.f64
; CHECK-LABEL: "fp-armv8_vminnm_NNNuge_rev":
; CHECK: vminnm.f64
-; CHECK-NOT: vminnm.f64
- %cmp1 = fcmp uge double 78., %b
+; CHECK: vminnm.f64
+ %cmp1 = fcmp fast uge double 78., %b
%cond1 = select i1 %cmp1, double %b, double 78.
- %cmp2 = fcmp uge double %cond1, 90.
+ %cmp2 = fcmp fast uge double %cond1, 90.
%cond2 = select i1 %cmp2, double 90., double %cond1
ret double %cond2
}
define float @fp-armv8_vmaxnm_NNNo(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNo":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo":
; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
- %cmp1 = fcmp ogt float %a, 12.
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp fast ogt float %a, 12.
%cond1 = select i1 %cmp1, float %a, float 12.
- %cmp2 = fcmp ogt float 34., %cond1
+ %cmp2 = fcmp fast ogt float 34., %cond1
%cond2 = select i1 %cmp2, float 34., float %cond1
ret float %cond2
}
define float @fp-armv8_vmaxnm_NNNoge(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNoge":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_NNNoge":
; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
- %cmp1 = fcmp oge float %a, 34.
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp fast oge float %a, 34.
%cond1 = select i1 %cmp1, float %a, float 34.
- %cmp2 = fcmp oge float 56., %cond1
+ %cmp2 = fcmp fast oge float 56., %cond1
%cond2 = select i1 %cmp2, float 56., float %cond1
ret float %cond2
}
define float @fp-armv8_vmaxnm_NNNo_rev(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNo_rev":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo_rev":
; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
- %cmp1 = fcmp olt float %a, 56.
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp fast olt float %a, 56.
%cond1 = select i1 %cmp1, float 56., float %a
- %cmp2 = fcmp olt float 78., %cond1
+ %cmp2 = fcmp fast olt float 78., %cond1
%cond2 = select i1 %cmp2, float %cond1, float 78.
ret float %cond2
}
define float @fp-armv8_vmaxnm_NNNole_rev(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNole_rev":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_NNNole_rev":
; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
- %cmp1 = fcmp ole float %a, 78.
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp fast ole float %a, 78.
%cond1 = select i1 %cmp1, float 78., float %a
- %cmp2 = fcmp ole float 90., %cond1
+ %cmp2 = fcmp fast ole float 90., %cond1
%cond2 = select i1 %cmp2, float %cond1, float 90.
ret float %cond2
}
define float @fp-armv8_vmaxnm_NNNu(float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNu":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu":
; CHECK: vmaxnm.f32
-; CHEC-NOT: vmaxnm.f32
- %cmp1 = fcmp ugt float 12., %b
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp fast ugt float 12., %b
%cond1 = select i1 %cmp1, float 12., float %b
- %cmp2 = fcmp ugt float %cond1, 34.
+ %cmp2 = fcmp fast ugt float %cond1, 34.
%cond2 = select i1 %cmp2, float %cond1, float 34.
ret float %cond2
}
define float @fp-armv8_vmaxnm_NNNuge(float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNuge":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_NNNuge":
; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
- %cmp1 = fcmp uge float 34., %b
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp fast uge float 34., %b
%cond1 = select i1 %cmp1, float 34., float %b
- %cmp2 = fcmp uge float %cond1, 56.
+ %cmp2 = fcmp fast uge float %cond1, 56.
%cond2 = select i1 %cmp2, float %cond1, float 56.
ret float %cond2
}
define float @fp-armv8_vmaxnm_NNNu_rev(float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNu_rev":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu_rev":
; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
- %cmp1 = fcmp ult float 56., %b
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp fast ult float 56., %b
%cond1 = select i1 %cmp1, float %b, float 56.
- %cmp2 = fcmp ult float %cond1, 78.
+ %cmp2 = fcmp fast ult float %cond1, 78.
%cond2 = select i1 %cmp2, float 78., float %cond1
ret float %cond2
}
define double @fp-armv8_vmaxnm_NNNule_rev( double %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNule_rev":
-; CHECK-FAST: vmaxnm.f64
-; CHECK-FAST: vmaxnm.f64
; CHECK-LABEL: "fp-armv8_vmaxnm_NNNule_rev":
; CHECK: vmaxnm.f64
-; CHECK-NOT: vmaxnm.f64
- %cmp1 = fcmp ule double 78., %b
+; CHECK: vmaxnm.f64
+ %cmp1 = fcmp fast ule double 78., %b
%cond1 = select i1 %cmp1, double %b, double 78.
- %cmp2 = fcmp ule double %cond1, 90.
+ %cmp2 = fcmp fast ule double %cond1, 90.
%cond2 = select i1 %cmp2, double 90., double %cond1
ret double %cond2
}
define float @fp-armv8_vminmaxnm_0(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_0":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vminmaxnm_0":
-; CHECK-NOT: vminnm.f32
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
; CHECK: vmaxnm.f32
- %cmp1 = fcmp olt float %a, 0.
+ %cmp1 = fcmp fast olt float %a, 0.
%cond1 = select i1 %cmp1, float %a, float 0.
- %cmp2 = fcmp ogt float %cond1, 0.
+ %cmp2 = fcmp fast ogt float %cond1, 0.
%cond2 = select i1 %cmp2, float %cond1, float 0.
ret float %cond2
}
define float @fp-armv8_vminmaxnm_neg0(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_neg0":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vminmaxnm_neg0":
+; CHECK-NOT: vcmp
; CHECK: vminnm.f32
-; CHECK-NOT: vmaxnm.f32
- %cmp1 = fcmp olt float %a, -0.
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp fast olt float %a, -0.
%cond1 = select i1 %cmp1, float %a, float -0.
- %cmp2 = fcmp ogt float %cond1, -0.
+ %cmp2 = fcmp fast ugt float %cond1, -0.
%cond2 = select i1 %cmp2, float %cond1, float -0.
ret float %cond2
}
define float @fp-armv8_vminmaxnm_e_0(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_e_0":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vminmaxnm_e_0":
-; CHECK-NOT: vminnm.f32
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
; CHECK: vmaxnm.f32
- %cmp1 = fcmp ule float 0., %a
+ %cmp1 = fcmp fast ule float 0., %a
%cond1 = select i1 %cmp1, float 0., float %a
- %cmp2 = fcmp uge float 0., %cond1
+ %cmp2 = fcmp fast uge float 0., %cond1
%cond2 = select i1 %cmp2, float 0., float %cond1
ret float %cond2
}
define float @fp-armv8_vminmaxnm_e_neg0(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_e_neg0":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vmaxnm.f32
; CHECK-LABEL: "fp-armv8_vminmaxnm_e_neg0":
+; CHECK-NOT: vcmp
; CHECK: vminnm.f32
-; CHECK-NOT: vmaxnm.f32
- %cmp1 = fcmp ule float -0., %a
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp fast ule float -0., %a
%cond1 = select i1 %cmp1, float -0., float %a
- %cmp2 = fcmp uge float -0., %cond1
+ %cmp2 = fcmp fast oge float -0., %cond1
%cond2 = select i1 %cmp2, float -0., float %cond1
ret float %cond2
}
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index b7a23b7bb59c..b1b4f1a940d4 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -393,8 +393,8 @@ entry:
%sub.i = sub <4 x i32> %add.i185, zeroinitializer
%add.i = add <4 x i32> %sub.i, zeroinitializer
%vmovn.i = trunc <4 x i32> %add.i to <4 x i16>
- tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
+ tail call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
unreachable
}
-declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4i16(i8*, <4 x i16>, i32) nounwind
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index c3e41cacde4c..0455190b4c9d 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -447,7 +447,7 @@ entry:
%0 = trunc i32 %mul to i8
%1 = insertelement <8 x i8> undef, i8 %0, i32 0
%2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
- %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
+ %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %src, i32 1)
%4 = bitcast <16 x i8> %3 to <2 x double>
%5 = extractelement <2 x double> %4, i32 1
%6 = bitcast double %5 to <8 x i8>
@@ -459,13 +459,13 @@ entry:
%12 = add <8 x i16> %7, %11
%13 = mul <8 x i16> %12, %8
%14 = bitcast i16* %dst to i8*
- tail call void @llvm.arm.neon.vst1.v8i16(i8* %14, <8 x i16> %13, i32 2)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %14, <8 x i16> %13, i32 2)
ret void
}
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
; Take advantage of the Cortex-A8 multiplier accumulator forward.
@@ -480,7 +480,7 @@ entry:
%0 = trunc i32 %mul to i8
%1 = insertelement <8 x i8> undef, i8 %0, i32 0
%2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
- %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
+ %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %src, i32 1)
%4 = bitcast <16 x i8> %3 to <2 x double>
%5 = extractelement <2 x double> %4, i32 1
%6 = bitcast double %5 to <8 x i8>
@@ -502,7 +502,7 @@ entry:
%0 = trunc i32 %mul to i8
%1 = insertelement <8 x i8> undef, i8 %0, i32 0
%2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
- %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
+ %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %src, i32 1)
%4 = bitcast <16 x i8> %3 to <2 x double>
%5 = extractelement <2 x double> %4, i32 1
%6 = bitcast double %5 to <8 x i8>
@@ -559,7 +559,7 @@ for.body33.lr.ph: ; preds = %for.body
for.body33: ; preds = %for.body33, %for.body33.lr.ph
%add45 = add i32 undef, undef
- %vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* undef, i32 1)
+ %vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* undef, i32 1)
%0 = load i32*, i32** undef, align 4
%shuffle.i250 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
%1 = bitcast <1 x i64> %shuffle.i250 to <8 x i8>
diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll
index e362ce36f8ba..6ddf9850cfcb 100644
--- a/test/CodeGen/ARM/vpadd.ll
+++ b/test/CodeGen/ARM/vpadd.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - -lower-interleaved-accesses=false | FileCheck %s
define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vpaddi8:
diff --git a/test/CodeGen/ARM/vselect_imax.ll b/test/CodeGen/ARM/vselect_imax.ll
index 0eb051036d99..85c8c5cfcda1 100644
--- a/test/CodeGen/ARM/vselect_imax.ll
+++ b/test/CodeGen/ARM/vselect_imax.ll
@@ -3,16 +3,13 @@
; Make sure that ARM backend with NEON handles vselect.
define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: vcgt.s32 [[QR:q[0-9]+]], [[Q1:q[0-9]+]], [[Q2:q[0-9]+]]
-; CHECK: vbsl [[QR]], [[Q1]], [[Q2]]
+; CHECK: vmax.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
%cmpres = icmp sgt <4 x i32> %a, %b
%maxres = select <4 x i1> %cmpres, <4 x i32> %a, <4 x i32> %b
store <4 x i32> %maxres, <4 x i32>* %m
ret void
}
-; We adjusted the cost model of the following selects. When we improve code
-; lowering we also need to adjust the cost.
%T0_10 = type <16 x i16>
%T1_10 = type <16 x i1>
; CHECK-LABEL: func_blend10:
@@ -21,10 +18,10 @@ define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
%v0 = load %T0_10, %T0_10* %loadaddr
%v1 = load %T0_10, %T0_10* %loadaddr2
%c = icmp slt %T0_10 %v0, %v1
-; CHECK: vbsl
-; CHECK: vbsl
+; CHECK: vmin.s16
+; CHECK: vmin.s16
; COST: func_blend10
-; COST: cost of 40 {{.*}} select
+; COST: cost of 2 {{.*}} select
%r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
store %T0_10 %r, %T0_10* %storeaddr
ret void
@@ -37,10 +34,10 @@ define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
%v0 = load %T0_14, %T0_14* %loadaddr
%v1 = load %T0_14, %T0_14* %loadaddr2
%c = icmp slt %T0_14 %v0, %v1
-; CHECK: vbsl
-; CHECK: vbsl
+; CHECK: vmin.s32
+; CHECK: vmin.s32
; COST: func_blend14
-; COST: cost of 41 {{.*}} select
+; COST: cost of 2 {{.*}} select
%r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
store %T0_14 %r, %T0_14* %storeaddr
ret void
@@ -50,17 +47,20 @@ define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
; CHECK-LABEL: func_blend15:
define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
%T1_15* %blend, %T0_15* %storeaddr) {
-; CHECK: vbsl
-; CHECK: vbsl
+; CHECK: vmin.s32
+; CHECK: vmin.s32
%v0 = load %T0_15, %T0_15* %loadaddr
%v1 = load %T0_15, %T0_15* %loadaddr2
%c = icmp slt %T0_15 %v0, %v1
; COST: func_blend15
-; COST: cost of 82 {{.*}} select
+; COST: cost of 4 {{.*}} select
%r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
store %T0_15 %r, %T0_15* %storeaddr
ret void
}
+
+; We adjusted the cost model of the following selects. When we improve code
+; lowering we also need to adjust the cost.
%T0_18 = type <4 x i64>
%T1_18 = type <4 x i1>
; CHECK-LABEL: func_blend18:
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index f605fa4d6003..404129a7e6ad 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -5,7 +5,7 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vst1.8 {d16}, [r0:64]
%tmp1 = load <8 x i8>, <8 x i8>* %B
- call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
+ call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
ret void
}
@@ -14,7 +14,7 @@ define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst1.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>, <4 x i16>* %B
- call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1)
ret void
}
@@ -23,7 +23,7 @@ define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst1.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>, <2 x i32>* %B
- call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1)
ret void
}
@@ -32,7 +32,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst1.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>, <2 x float>* %B
- call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
ret void
}
@@ -43,7 +43,7 @@ define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
%A = load float*, float** %ptr
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>, <2 x float>* %B
- call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
%tmp2 = getelementptr float, float* %A, i32 2
store float* %tmp2, float** %ptr
ret void
@@ -54,7 +54,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
;CHECK: vst1.64
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <1 x i64>, <1 x i64>* %B
- call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1)
ret void
}
@@ -63,7 +63,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vst1.8 {d16, d17}, [r0:64]
%tmp1 = load <16 x i8>, <16 x i8>* %B
- call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
+ call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
ret void
}
@@ -73,7 +73,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst1.16 {d16, d17}, [r0:128]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
- call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
+ call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
ret void
}
@@ -84,7 +84,7 @@ define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
- call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 8)
+ call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 8)
%tmp2 = getelementptr i16, i16* %A, i32 %inc
store i16* %tmp2, i16** %ptr
ret void
@@ -95,7 +95,7 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst1.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>, <4 x i32>* %B
- call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1)
ret void
}
@@ -104,7 +104,7 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst1.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>, <4 x float>* %B
- call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1)
ret void
}
@@ -113,7 +113,7 @@ define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
;CHECK: vst1.64
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <2 x i64>, <2 x i64>* %B
- call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1)
ret void
}
@@ -122,19 +122,19 @@ define void @vst1Qf64(double* %A, <2 x double>* %B) nounwind {
;CHECK: vst1.64
%tmp0 = bitcast double* %A to i8*
%tmp1 = load <2 x double>, <2 x double>* %B
- call void @llvm.arm.neon.vst1.v2f64(i8* %tmp0, <2 x double> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst1.p0i8.v2f64(i8* %tmp0, <2 x double> %tmp1, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i8(i8*, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4i16(i8*, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v2i32(i8*, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v2f32(i8*, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v1i64(i8*, <1 x i64>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v2f64(i8*, <2 x double>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v16i8(i8*, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4i32(i8*, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v2i64(i8*, <2 x i64>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v2f64(i8*, <2 x double>, i32) nounwind
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 17c8a4bdad9b..e0846ff512ea 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -5,7 +5,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vst2.8 {d16, d17}, [r0:64]
%tmp1 = load <8 x i8>, <8 x i8>* %B
- call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+ call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
ret void
}
@@ -15,7 +15,7 @@ define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
;CHECK: vst2.8 {d16, d17}, [r1], r2
%A = load i8*, i8** %ptr
%tmp1 = load <8 x i8>, <8 x i8>* %B
- call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
+ call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
%tmp2 = getelementptr i8, i8* %A, i32 %inc
store i8* %tmp2, i8** %ptr
ret void
@@ -27,7 +27,7 @@ define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst2.16 {d16, d17}, [r0:128]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>, <4 x i16>* %B
- call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
+ call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
ret void
}
@@ -36,7 +36,7 @@ define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>, <2 x i32>* %B
- call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
ret void
}
@@ -45,7 +45,7 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>, <2 x float>* %B
- call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst2.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
ret void
}
@@ -55,7 +55,7 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
;CHECK: vst1.64 {d16, d17}, [r0:128]
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <1 x i64>, <1 x i64>* %B
- call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
+ call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
ret void
}
@@ -66,7 +66,7 @@ define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
%A = load i64*, i64** %ptr
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <1 x i64>, <1 x i64>* %B
- call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 8)
+ call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 8)
%tmp2 = getelementptr i64, i64* %A, i32 2
store i64* %tmp2, i64** %ptr
ret void
@@ -77,7 +77,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vst2.8 {d16, d17, d18, d19}, [r0:64]
%tmp1 = load <16 x i8>, <16 x i8>* %B
- call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
+ call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
ret void
}
@@ -87,7 +87,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst2.16 {d16, d17, d18, d19}, [r0:128]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
- call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
+ call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
ret void
}
@@ -97,7 +97,7 @@ define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst2.32 {d16, d17, d18, d19}, [r0:256]
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>, <4 x i32>* %B
- call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
+ call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
ret void
}
@@ -106,7 +106,7 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>, <4 x float>* %B
- call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
ret void
}
@@ -114,7 +114,7 @@ define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vst2update:
;CHECK: vst2.16 {d16, d17}, [r0]!
%tmp1 = load <4 x i16>, <4 x i16>* %B
- tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
+ tail call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
%t5 = getelementptr inbounds i8, i8* %out, i32 16
ret i8* %t5
}
@@ -123,18 +123,18 @@ define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp ali
;CHECK-LABEL: vst2update2:
;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
%tmp1 = load <4 x float>, <4 x float>* %this
- call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
+ call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
%tmp2 = getelementptr inbounds i8, i8* %out, i32 32
ret i8* %tmp2
}
-declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v1i64(i8*, <1 x i64>, <1 x i64>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v16i8(i8*, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index 691ee3bd28f3..d70d59579009 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -6,7 +6,7 @@ define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
;This test runs at -O0 so do not check for specific register numbers.
;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
%tmp1 = load <8 x i8>, <8 x i8>* %B
- call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
+ call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
ret void
}
@@ -15,7 +15,7 @@ define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst3.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>, <4 x i16>* %B
- call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
ret void
}
@@ -24,7 +24,7 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>, <2 x i32>* %B
- call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
ret void
}
@@ -35,7 +35,7 @@ define void @vst3i32_update(i32** %ptr, <2 x i32>* %B) nounwind {
%A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>, <2 x i32>* %B
- call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
%tmp2 = getelementptr i32, i32* %A, i32 6
store i32* %tmp2, i32** %ptr
ret void
@@ -46,7 +46,7 @@ define void @vst3f(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>, <2 x float>* %B
- call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
ret void
}
@@ -57,7 +57,7 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <1 x i64>, <1 x i64>* %B
- call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
+ call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
ret void
}
@@ -67,7 +67,7 @@ define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
%A = load i64*, i64** %ptr
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <1 x i64>, <1 x i64>* %B
- call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
%tmp2 = getelementptr i64, i64* %A, i32 3
store i64* %tmp2, i64** %ptr
ret void
@@ -80,7 +80,7 @@ define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]!
;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
%tmp1 = load <16 x i8>, <16 x i8>* %B
- call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
+ call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
ret void
}
@@ -90,7 +90,7 @@ define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst3.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
- call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
ret void
}
@@ -102,7 +102,7 @@ define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
- call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
%tmp2 = getelementptr i16, i16* %A, i32 24
store i16* %tmp2, i16** %ptr
ret void
@@ -114,7 +114,7 @@ define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>, <4 x i32>* %B
- call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
ret void
}
@@ -124,17 +124,17 @@ define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>, <4 x float>* %B
- call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index c343c6c86959..188955102290 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -5,7 +5,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vst4.8 {d16, d17, d18, d19}, [r0:64]
%tmp1 = load <8 x i8>, <8 x i8>* %B
- call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+ call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
ret void
}
@@ -15,7 +15,7 @@ define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
;CHECK: vst4.8 {d16, d17, d18, d19}, [r1:128], r2
%A = load i8*, i8** %ptr
%tmp1 = load <8 x i8>, <8 x i8>* %B
- call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
+ call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
%tmp2 = getelementptr i8, i8* %A, i32 %inc
store i8* %tmp2, i8** %ptr
ret void
@@ -27,7 +27,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst4.16 {d16, d17, d18, d19}, [r0:128]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>, <4 x i16>* %B
- call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
+ call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
ret void
}
@@ -37,7 +37,7 @@ define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst4.32 {d16, d17, d18, d19}, [r0:256]
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>, <2 x i32>* %B
- call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
+ call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
ret void
}
@@ -46,7 +46,7 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst4.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>, <2 x float>* %B
- call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst4.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
ret void
}
@@ -56,7 +56,7 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
;CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <1 x i64>, <1 x i64>* %B
- call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
+ call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
ret void
}
@@ -66,7 +66,7 @@ define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
%A = load i64*, i64** %ptr
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <1 x i64>, <1 x i64>* %B
- call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
%tmp2 = getelementptr i64, i64* %A, i32 4
store i64* %tmp2, i64** %ptr
ret void
@@ -78,7 +78,7 @@ define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK: vst4.8 {d16, d18, d20, d22}, [r0:256]!
;CHECK: vst4.8 {d17, d19, d21, d23}, [r0:256]
%tmp1 = load <16 x i8>, <16 x i8>* %B
- call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
+ call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
ret void
}
@@ -89,7 +89,7 @@ define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst4.16 {d17, d19, d21, d23}, [r0]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
- call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
ret void
}
@@ -99,7 +99,7 @@ define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst4.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>, <4 x i32>* %B
- call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
ret void
}
@@ -109,7 +109,7 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst4.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>, <4 x float>* %B
- call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
ret void
}
@@ -121,19 +121,19 @@ define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
%A = load float*, float** %ptr
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>, <4 x float>* %B
- call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
%tmp2 = getelementptr float, float* %A, i32 16
store float* %tmp2, float** %ptr
ret void
}
-declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index a4575417bce5..7e130ea01b64 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -110,7 +110,7 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
;Check the alignment value. Max for this instruction is 16 bits:
;CHECK: vst2.8 {d16[1], d17[1]}, [r0:16]
%tmp1 = load <8 x i8>, <8 x i8>* %B
- call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
+ call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
ret void
}
@@ -120,7 +120,7 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst2.16 {d16[1], d17[1]}, [r0:32]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>, <4 x i16>* %B
- call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+ call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
ret void
}
@@ -131,7 +131,7 @@ define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>, <4 x i16>* %B
- call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
+ call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
%tmp2 = getelementptr i16, i16* %A, i32 %inc
store i16* %tmp2, i16** %ptr
ret void
@@ -142,7 +142,7 @@ define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>, <2 x i32>* %B
- call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+ call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
ret void
}
@@ -151,7 +151,7 @@ define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>, <2 x float>* %B
- call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+ call void @llvm.arm.neon.vst2lane.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
ret void
}
@@ -161,7 +161,7 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst2.16 {d17[1], d19[1]}, [r0]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
- call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
+ call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
ret void
}
@@ -171,7 +171,7 @@ define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst2.32 {d17[0], d19[0]}, [r0:64]
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>, <4 x i32>* %B
- call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
+ call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
ret void
}
@@ -180,24 +180,24 @@ define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>, <4 x float>* %B
- call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1)
+ call void @llvm.arm.neon.vst2lane.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind
define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst3lanei8:
;CHECK: vst3.8
%tmp1 = load <8 x i8>, <8 x i8>* %B
- call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+ call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
ret void
}
@@ -207,7 +207,7 @@ define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>, <4 x i16>* %B
- call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+ call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
ret void
}
@@ -216,7 +216,7 @@ define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>, <2 x i32>* %B
- call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+ call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
ret void
}
@@ -225,7 +225,7 @@ define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>, <2 x float>* %B
- call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+ call void @llvm.arm.neon.vst3lane.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
ret void
}
@@ -235,7 +235,7 @@ define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
- call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
+ call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
ret void
}
@@ -244,7 +244,7 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>, <4 x i32>* %B
- call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
+ call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
ret void
}
@@ -255,7 +255,7 @@ define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
%A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>, <4 x i32>* %B
- call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
+ call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
%tmp2 = getelementptr i32, i32* %A, i32 3
store i32* %tmp2, i32** %ptr
ret void
@@ -266,18 +266,18 @@ define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>, <4 x float>* %B
- call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+ call void @llvm.arm.neon.vst3lane.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
@@ -285,7 +285,7 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
;Check the alignment value. Max for this instruction is 32 bits:
;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0:32]
%tmp1 = load <8 x i8>, <8 x i8>* %B
- call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+ call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
ret void
}
@@ -295,7 +295,7 @@ define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
%A = load i8*, i8** %ptr
%tmp1 = load <8 x i8>, <8 x i8>* %B
- call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+ call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
%tmp2 = getelementptr i8, i8* %A, i32 4
store i8* %tmp2, i8** %ptr
ret void
@@ -306,7 +306,7 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst4.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>, <4 x i16>* %B
- call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+ call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
ret void
}
@@ -316,7 +316,7 @@ define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0:128]
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>, <2 x i32>* %B
- call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
+ call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
ret void
}
@@ -325,7 +325,7 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst4.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>, <2 x float>* %B
- call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+ call void @llvm.arm.neon.vst4lane.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
ret void
}
@@ -335,7 +335,7 @@ define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0:64]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B
- call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
+ call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
ret void
}
@@ -345,7 +345,7 @@ define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>, <4 x i32>* %B
- call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+ call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
ret void
}
@@ -354,7 +354,7 @@ define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst4.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>, <4 x float>* %B
- call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+ call void @llvm.arm.neon.vst4lane.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
ret void
}
@@ -365,11 +365,11 @@ define <8 x i16> @variable_insertelement(<8 x i16> %a, i16 %b, i32 %c) nounwind
ret <8 x i16> %r
}
-declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll
index 7b83dfdaf229..36bcde22731d 100644
--- a/test/CodeGen/ARM/vtrn.ll
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -20,11 +20,11 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @vtrni8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: vtrni8_Qres:
; CHECK: @ BB#0:
-; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vtrn.8 d16, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]]
+; CHECK-NEXT: vmov r0, r1, [[LDR0]]
+; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
@@ -52,11 +52,11 @@ define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @vtrni16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK-LABEL: vtrni16_Qres:
; CHECK: @ BB#0:
-; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vtrn.16 d16, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT: vtrn.16 [[LDR0]], [[LDR1]]
+; CHECK-NEXT: vmov r0, r1, [[LDR0]]
+; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
@@ -84,11 +84,11 @@ define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @vtrni32_Qres(<2 x i32>* %A, <2 x i32>* %B) nounwind {
; CHECK-LABEL: vtrni32_Qres:
; CHECK: @ BB#0:
-; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vtrn.32 d16, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]]
+; CHECK-NEXT: vmov r0, r1, [[LDR0]]
+; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = load <2 x i32>, <2 x i32>* %B
@@ -116,11 +116,11 @@ define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @vtrnf_Qres(<2 x float>* %A, <2 x float>* %B) nounwind {
; CHECK-LABEL: vtrnf_Qres:
; CHECK: @ BB#0:
-; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vtrn.32 d16, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT: vtrn.32 [[LDR0]], [[LDR1]]
+; CHECK-NEXT: vmov r0, r1, [[LDR0]]
+; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = load <2 x float>, <2 x float>* %B
@@ -281,11 +281,11 @@ define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @vtrni8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: vtrni8_undef_Qres:
; CHECK: @ BB#0:
-; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vtrn.8 d16, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT: vtrn.8 [[LDR0]], [[LDR1]]
+; CHECK-NEXT: vmov r0, r1, [[LDR0]]
+; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
@@ -325,3 +325,77 @@ define <16 x i16> @vtrnQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14, i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
ret <16 x i16> %tmp3
}
+
+define <8 x i16> @vtrn_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
+entry:
+ ; CHECK-LABEL: vtrn_lower_shufflemask_undef
+ ; CHECK: vtrn
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 5, i32 3, i32 7>
+ ret <8 x i16> %0
+}
+
+; Here we get a build_vector node, where all the incoming extract_element
+; values do modify the type. However, we get different input types, as some of
+; them get truncated from i32 to i8 (from comparing cmp0 with cmp1) and some of
+; them get truncated from i16 to i8 (from comparing cmp2 with cmp3).
+define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1,
+ <4 x i32> %cmp0, <4 x i32> %cmp1,
+ <4 x i16> %cmp2, <4 x i16> %cmp3) {
+ ; CHECK-LABEL: vtrn_mismatched_builvector0
+ ; CHECK: vmovn.i32
+ ; CHECK: vtrn
+ ; CHECK: vbsl
+ %c0 = icmp ult <4 x i32> %cmp0, %cmp1
+ %c1 = icmp ult <4 x i16> %cmp2, %cmp3
+ %c = shufflevector <4 x i1> %c0, <4 x i1> %c1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+ %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
+ ret <8 x i8> %rv
+}
+
+; Here we get a build_vector node, where half the incoming extract_element
+; values do not modify the type (the values form cmp2), but half of them do
+; (from the icmp operation).
+define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1,
+ <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
+ ; CHECK-LABEL: vtrn_mismatched_builvector1
+ ; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn
+ ; CHECK: vmovl
+ ; CHECK: vtrn.8
+ ; CHECK: vbsl
+ %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
+ %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
+ %c0 = icmp ult <4 x i32> %cmp0, %cmp1
+ %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+ %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
+ ret <8 x i8> %rv
+}
+
+; Negative test that should not generate a vtrn
+define void @lower_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) {
+entry:
+ ; CHECK-LABEL: lower_twice_no_vtrn
+ ; CHECK: @ BB#0:
+ ; CHECK-NOT: vtrn
+ ; CHECK: mov pc, lr
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 5, i32 3, i32 7, i32 1, i32 5, i32 3, i32 7>
+ store <8 x i16> %0, <8 x i16>* %C
+ ret void
+}
+
+; Negative test that should not generate a vtrn
+define void @upper_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) {
+entry:
+ ; CHECK-LABEL: upper_twice_no_vtrn
+ ; CHECK: @ BB#0:
+ ; CHECK-NOT: vtrn
+ ; CHECK: mov pc, lr
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 6, i32 0, i32 4, i32 2, i32 6>
+ store <8 x i16> %0, <8 x i16>* %C
+ ret void
+}
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
index 5510634b0668..04499e77fde6 100644
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -20,11 +20,11 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @vuzpi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: vuzpi8_Qres:
; CHECK: @ BB#0:
-; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vuzp.8 d16, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT: vuzp.8 [[LDR0]], [[LDR1]]
+; CHECK-NEXT: vmov r0, r1, [[LDR0]]
+; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
@@ -52,11 +52,11 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @vuzpi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK-LABEL: vuzpi16_Qres:
; CHECK: @ BB#0:
-; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vuzp.16 d16, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT: vuzp.16 [[LDR0]], [[LDR1]]
+; CHECK-NEXT: vmov r0, r1, [[LDR0]]
+; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
@@ -220,11 +220,11 @@ define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @vuzpi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: vuzpi8_undef_Qres:
; CHECK: @ BB#0:
-; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vuzp.8 d16, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT: vuzp.8 [[LDR0]], [[LDR1]]
+; CHECK-NEXT: vmov r0, r1, [[LDR0]]
+; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
@@ -264,3 +264,109 @@ define <16 x i16> @vuzpQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
ret <16 x i16> %tmp3
}
+
+define <8 x i16> @vuzp_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
+entry:
+ ; CHECK-LABEL: vuzp_lower_shufflemask_undef
+ ; CHECK: vuzp
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 3, i32 5, i32 7>
+ ret <8 x i16> %0
+}
+
+define <4 x i32> @vuzp_lower_shufflemask_zeroed(<2 x i32>* %A, <2 x i32>* %B) {
+entry:
+ ; CHECK-LABEL: vuzp_lower_shufflemask_zeroed
+ ; CHECK-NOT: vtrn
+ ; CHECK: vuzp
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 0, i32 1, i32 3>
+ ret <4 x i32> %0
+}
+
+define void @vuzp_rev_shufflemask_vtrn(<2 x i32>* %A, <2 x i32>* %B, <4 x i32>* %C) {
+entry:
+ ; CHECK-LABEL: vuzp_rev_shufflemask_vtrn
+ ; CHECK-NOT: vtrn
+ ; CHECK: vuzp
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
+ store <4 x i32> %0, <4 x i32>* %C
+ ret void
+}
+
+define <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) {
+; In order to create the select we need to truncate the vcgt result from a vector of i32 to a vector of i8.
+; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to
+; truncate from i32 to i16 and one vuzp to perform the final truncation for i8.
+; CHECK-LABEL: vuzp_trunc
+; CHECK: vmovn.i32
+; CHECK: vmovn.i32
+; CHECK: vuzp
+; CHECK: vbsl
+ %c = icmp ult <8 x i32> %cmp0, %cmp1
+ %res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1
+ ret <8 x i8> %res
+}
+
+; Shuffle the result from the compare with a <4 x i8>.
+; We need to extend the loaded <4 x i8> to <4 x i16>. Otherwise we wouldn't be able
+; to perform the vuzp and get the vbsl mask.
+define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
+ <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
+; CHECK-LABEL: vuzp_trunc_and_shuffle
+; CHECK: vmovl
+; CHECK: vuzp
+; CHECK: vbsl
+ %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
+ %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
+ %c0 = icmp ult <4 x i32> %cmp0, %cmp1
+ %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
+ ret <8 x i8> %rv
+}
+
+; Use an undef value for the <4 x i8> that is being shuffled with the compare result.
+; This produces a build_vector with some of the operands undefs.
+define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1,
+ <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
+; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right
+; CHECK: vuzp
+; CHECK: vbsl
+ %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
+ %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
+ %c0 = icmp ult <4 x i32> %cmp0, %cmp1
+ %c = shufflevector <4 x i1> %c0, <4 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
+ ret <8 x i8> %rv
+}
+
+define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
+ <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
+; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left
+; CHECK: vuzp
+; CHECK: vbsl
+ %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
+ %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
+ %c0 = icmp ult <4 x i32> %cmp0, %cmp1
+ %c = shufflevector <4 x i1> undef, <4 x i1> %c0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
+ ret <8 x i8> %rv
+}
+
+; We're using large data types here, and we have to fill with undef values until we
+; get some vector size that we can represent.
+define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
+ <5 x i32> %cmp0, <5 x i32> %cmp1, <5 x i8> *%cmp2_ptr) {
+; CHECK-LABEL: vuzp_wide_type
+; CHECK: vbsl
+ %cmp2_load = load <5 x i8>, <5 x i8> * %cmp2_ptr, align 4
+ %cmp2 = trunc <5 x i8> %cmp2_load to <5 x i1>
+ %c0 = icmp ult <5 x i32> %cmp0, %cmp1
+ %c = shufflevector <5 x i1> %c0, <5 x i1> %cmp2, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+ %rv = select <10 x i1> %c, <10 x i8> %tr0, <10 x i8> %tr1
+ ret <10 x i8> %rv
+}
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll
index 1d9f59aeda0b..259b484f5f8e 100644
--- a/test/CodeGen/ARM/vzip.ll
+++ b/test/CodeGen/ARM/vzip.ll
@@ -20,11 +20,11 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @vzipi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: vzipi8_Qres:
; CHECK: @ BB#0:
-; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vzip.8 d16, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT: vzip.8 [[LDR0]], [[LDR1]]
+; CHECK-NEXT: vmov r0, r1, [[LDR0]]
+; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
@@ -52,11 +52,11 @@ define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @vzipi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK-LABEL: vzipi16_Qres:
; CHECK: @ BB#0:
-; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vzip.16 d16, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT: vzip.16 [[LDR0]], [[LDR1]]
+; CHECK-NEXT: vmov r0, r1, [[LDR0]]
+; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = load <4 x i16>, <4 x i16>* %B
@@ -220,11 +220,11 @@ define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @vzipi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: vzipi8_undef_Qres:
; CHECK: @ BB#0:
-; CHECK-NEXT: vldr d17, [r1]
-; CHECK-NEXT: vldr d16, [r0]
-; CHECK-NEXT: vzip.8 d16, d17
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT: vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT: vzip.8 [[LDR0]], [[LDR1]]
+; CHECK-NEXT: vmov r0, r1, [[LDR0]]
+; CHECK-NEXT: vmov r2, r3, [[LDR1]]
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = load <8 x i8>, <8 x i8>* %B
@@ -264,3 +264,55 @@ define <32 x i8> @vzipQi8_undef_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind {
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
ret <32 x i8> %tmp3
}
+
+define <8 x i16> @vzip_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
+entry:
+ ; CHECK-LABEL: vzip_lower_shufflemask_undef
+ ; CHECK: vzip
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
+ ret <8 x i16> %0
+}
+
+define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) {
+entry:
+ ; CHECK-LABEL: vzip_lower_shufflemask_zeroed
+ ; CHECK-NOT: vtrn
+ ; CHECK: vzip
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @vzip_lower_shufflemask_vuzp(<2 x i32>* %A) {
+entry:
+ ; CHECK-LABEL: vzip_lower_shufflemask_vuzp
+ ; CHECK-NOT: vuzp
+ ; CHECK: vzip
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
+ ret <4 x i32> %0
+}
+
+define void @vzip_undef_rev_shufflemask_vtrn(<2 x i32>* %A, <4 x i32>* %B) {
+entry:
+ ; CHECK-LABEL: vzip_undef_rev_shufflemask_vtrn
+ ; CHECK-NOT: vtrn
+ ; CHECK: vzip
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
+ store <4 x i32> %0, <4 x i32>* %B
+ ret void
+}
+
+define void @vzip_vext_factor(<8 x i16>* %A, <4 x i16>* %B) {
+entry:
+ ; CHECK-LABEL: vzip_vext_factor
+ ; CHECK: vext.16 d16, d16, d17, #3
+ ; CHECK: vzip
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %0 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 4, i32 5, i32 3>
+ store <4 x i16> %0, <4 x i16>* %B
+ ret void
+}
diff --git a/test/CodeGen/BPF/sockex2.ll b/test/CodeGen/BPF/sockex2.ll
index 5de2787d5b07..b3e83eadf537 100644
--- a/test/CodeGen/BPF/sockex2.ll
+++ b/test/CodeGen/BPF/sockex2.ll
@@ -311,7 +311,7 @@ flow_dissector.exit.thread: ; preds = %86, %12, %196, %199
; CHECK-LABEL: bpf_prog2:
; CHECK: ldabs_h r0, r6.data + 12 # encoding: [0x28,0x00,0x00,0x00,0x0c,0x00,0x00,0x00]
; CHECK: ldabs_h r0, r6.data + 16 # encoding: [0x28,0x00,0x00,0x00,0x10,0x00,0x00,0x00]
-; CHECK: implicit-def: R
+; CHECK: implicit-def: %R1
; CHECK: ld_64 r1
; CHECK-NOT: ori
; CHECK: call 1 # encoding: [0x85,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
diff --git a/test/CodeGen/CPP/gep.ll b/test/CodeGen/CPP/gep.ll
new file mode 100644
index 000000000000..88a0bf1f216d
--- /dev/null
+++ b/test/CodeGen/CPP/gep.ll
@@ -0,0 +1,10 @@
+; RUN: llc -march=cpp -o - %s | FileCheck %s
+
+define void @f1(i32* %addr) {
+ %x = getelementptr i32, i32* %addr, i32 1
+; CHECK: ConstantInt* [[INT_1:.*]] = ConstantInt::get(mod->getContext(), APInt(32, StringRef("1"), 10));
+; CHECK: GetElementPtrInst::Create(IntegerType::get(mod->getContext(), 32), ptr_addr,
+; CHECK-NEXT: [[INT_1]]
+; CHECK-NEXT: }, "x", label_3);
+ ret void
+}
diff --git a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
index eaaeb37eebb4..ff5952db43b3 100644
--- a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
+++ b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
@@ -30,20 +30,20 @@
%"struct.qdesigner_internal::GridLayout" = type { %"struct.qdesigner_internal::Layout", %"struct.QPair<int,int>", %"struct.qdesigner_internal::Grid"* }
%"struct.qdesigner_internal::Layout" = type { %struct.QObject, %"struct.QList<QAbstractExtensionFactory*>", %struct.QWidget*, %"struct.QHash<QString,QList<QAbstractExtensionFactory*> >", %struct.QWidget*, %struct.QDesignerFormWindowInterface*, i8, %"struct.QPair<int,int>", %struct.QRect, i8 }
-@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32)* @pthread_getspecific ; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*)* @pthread_setspecific ; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = weak alias i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i64)* @pthread_cancel ; <i32 (i64)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.Alignment*)* @pthread_mutex_init ; <i32 (%struct.pthread_mutex_t*, %struct.Alignment*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*)* @pthread_key_create ; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32)* @pthread_key_delete ; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.Alignment*)* @pthread_mutexattr_init ; <i32 (%struct.Alignment*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.Alignment*, i32)* @pthread_mutexattr_settype ; <i32 (%struct.Alignment*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.Alignment*)* @pthread_mutexattr_destroy ; <i32 (%struct.Alignment*)*> [#uses=0]
+@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*), i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32), i8* (i32)* @pthread_getspecific ; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*), i32 (i32, i8*)* @pthread_setspecific ; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = weak alias i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*), i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i64), i32 (i64)* @pthread_cancel ; <i32 (i64)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.Alignment*), i32 (%struct.pthread_mutex_t*, %struct.Alignment*)* @pthread_mutex_init ; <i32 (%struct.pthread_mutex_t*, %struct.Alignment*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*), i32 (i32*, void (i8*)*)* @pthread_key_create ; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32), i32 (i32)* @pthread_key_delete ; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.Alignment*), i32 (%struct.Alignment*)* @pthread_mutexattr_init ; <i32 (%struct.Alignment*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.Alignment*, i32), i32 (%struct.Alignment*, i32)* @pthread_mutexattr_settype ; <i32 (%struct.Alignment*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.Alignment*), i32 (%struct.Alignment*)* @pthread_mutexattr_destroy ; <i32 (%struct.Alignment*)*> [#uses=0]
define void @_ZN18qdesigner_internal10GridLayout9buildGridEv(%"struct.qdesigner_internal::GridLayout"* %this) nounwind {
entry:
diff --git a/test/CodeGen/Generic/ForceStackAlign.ll b/test/CodeGen/Generic/ForceStackAlign.ll
new file mode 100644
index 000000000000..57ccb2c41d77
--- /dev/null
+++ b/test/CodeGen/Generic/ForceStackAlign.ll
@@ -0,0 +1,27 @@
+; Check that stack alignment can be forced. Individual targets should test their
+; specific implementation details.
+
+; RUN: llc < %s -stackrealign -stack-alignment=32 | FileCheck %s
+; CHECK-LABEL: @f
+; CHECK-LABEL: @g
+
+define i32 @f(i8* %p) nounwind {
+entry:
+ %0 = load i8, i8* %p
+ %conv = sext i8 %0 to i32
+ ret i32 %conv
+}
+
+define i64 @g(i32 %i) nounwind {
+entry:
+ br label %if.then
+
+if.then:
+ %0 = alloca i8, i32 %i
+ call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 %i, i32 1, i1 false)
+ %call = call i32 @f(i8* %0)
+ %conv = sext i32 %call to i64
+ ret i64 %conv
+}
+
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/Generic/MachineBranchProb.ll b/test/CodeGen/Generic/MachineBranchProb.ll
index 8288e45ee509..ae3c8da21471 100644
--- a/test/CodeGen/Generic/MachineBranchProb.ll
+++ b/test/CodeGen/Generic/MachineBranchProb.ll
@@ -16,11 +16,11 @@ entry:
i64 5, label %sw.bb1
], !prof !0
; CHECK: BB#0: derived from LLVM BB %entry
-; CHECK: Successors according to CFG: BB#2(64) BB#4(14)
+; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.29%) BB#4({{[0-9a-fx/= ]+}}24.71%)
; CHECK: BB#4: derived from LLVM BB %entry
-; CHECK: Successors according to CFG: BB#1(10) BB#5(4)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}47.62%) BB#5({{[0-9a-fx/= ]+}}52.38%)
; CHECK: BB#5: derived from LLVM BB %entry
-; CHECK: Successors according to CFG: BB#1(4) BB#3(7)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}36.36%) BB#3({{[0-9a-fx/= ]+}}63.64%)
sw.bb:
br label %return
@@ -62,7 +62,7 @@ return: ret void
; CHECK-LABEL: Machine code for function left_leaning_weight_balanced_tree:
; CHECK: BB#0: derived from LLVM BB %entry
; CHECK-NOT: Successors
-; CHECK: Successors according to CFG: BB#8(13) BB#9(20)
+; CHECK: Successors according to CFG: BB#8({{[0-9a-fx/= ]+}}39.71%) BB#9({{[0-9a-fx/= ]+}}60.29%)
}
!1 = !{!"branch_weights",
diff --git a/test/CodeGen/Generic/dbg_value.ll b/test/CodeGen/Generic/dbg_value.ll
index 4038086cbb4e..e96458862298 100644
--- a/test/CodeGen/Generic/dbg_value.ll
+++ b/test/CodeGen/Generic/dbg_value.ll
@@ -4,11 +4,12 @@
%0 = type { i32, i32 }
define void @t(%0*, i32, i32, i32, i32) nounwind {
- tail call void @llvm.dbg.value(metadata %0* %0, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !DISubprogram())
+ tail call void @llvm.dbg.value(metadata %0* %0, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
unreachable
}
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
; !0 should conform to the format of DIVariable.
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", arg: 0, scope: !DISubprogram())
+!0 = !DILocalVariable(name: "a", arg: 1, scope: !1)
+!1 = distinct !DISubprogram()
diff --git a/test/CodeGen/Generic/lit.local.cfg b/test/CodeGen/Generic/lit.local.cfg
new file mode 100644
index 000000000000..f22d4aabd730
--- /dev/null
+++ b/test/CodeGen/Generic/lit.local.cfg
@@ -0,0 +1,3 @@
+if not config.target_triple:
+ config.unsupported = True
+
diff --git a/test/CodeGen/Generic/overloaded-intrinsic-name.ll b/test/CodeGen/Generic/overloaded-intrinsic-name.ll
index 979bc772f75f..65fc9c1184cf 100644
--- a/test/CodeGen/Generic/overloaded-intrinsic-name.ll
+++ b/test/CodeGen/Generic/overloaded-intrinsic-name.ll
@@ -13,29 +13,29 @@
; function and integer
define i32* @test_iAny(i32* %v) gc "statepoint-example" {
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %v)
- %v-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %v)
+ %v-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
ret i32* %v-new
}
; float
define float* @test_fAny(float* %v) gc "statepoint-example" {
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, float* %v)
- %v-new = call float* @llvm.experimental.gc.relocate.p0f32(i32 %tok, i32 7, i32 7)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, float* %v)
+ %v-new = call float* @llvm.experimental.gc.relocate.p0f32(token %tok, i32 7, i32 7)
ret float* %v-new
}
; array of integers
define [3 x i32]* @test_aAny([3 x i32]* %v) gc "statepoint-example" {
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %v)
- %v-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 7, i32 7)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %v)
+ %v-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token %tok, i32 7, i32 7)
ret [3 x i32]* %v-new
}
; vector of integers
define <3 x i32>* @test_vAny(<3 x i32>* %v) gc "statepoint-example" {
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, <3 x i32>* %v)
- %v-new = call <3 x i32>* @llvm.experimental.gc.relocate.p0v3i32(i32 %tok, i32 7, i32 7)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, <3 x i32>* %v)
+ %v-new = call <3 x i32>* @llvm.experimental.gc.relocate.p0v3i32(token %tok, i32 7, i32 7)
ret <3 x i32>* %v-new
}
@@ -43,15 +43,15 @@ define <3 x i32>* @test_vAny(<3 x i32>* %v) gc "statepoint-example" {
; struct
define %struct.test* @test_struct(%struct.test* %v) gc "statepoint-example" {
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, %struct.test* %v)
- %v-new = call %struct.test* @llvm.experimental.gc.relocate.p0struct.test(i32 %tok, i32 7, i32 7)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, %struct.test* %v)
+ %v-new = call %struct.test* @llvm.experimental.gc.relocate.p0struct.test(token %tok, i32 7, i32 7)
ret %struct.test* %v-new
}
declare zeroext i1 @return_i1()
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32)
-declare float* @llvm.experimental.gc.relocate.p0f32(i32, i32, i32)
-declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32, i32, i32)
-declare <3 x i32>* @llvm.experimental.gc.relocate.p0v3i32(i32, i32, i32)
-declare %struct.test* @llvm.experimental.gc.relocate.p0struct.test(i32, i32, i32)
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.relocate.p0i32(token, i32, i32)
+declare float* @llvm.experimental.gc.relocate.p0f32(token, i32, i32)
+declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token, i32, i32)
+declare <3 x i32>* @llvm.experimental.gc.relocate.p0v3i32(token, i32, i32)
+declare %struct.test* @llvm.experimental.gc.relocate.p0struct.test(token, i32, i32)
diff --git a/test/CodeGen/Generic/vector.ll b/test/CodeGen/Generic/vector.ll
index 962b1295b5de..2d4dc501a53a 100644
--- a/test/CodeGen/Generic/vector.ll
+++ b/test/CodeGen/Generic/vector.ll
@@ -156,3 +156,9 @@ define <2 x i32*> @vector_gep(<2 x [3 x {i32, i32}]*> %a) {
%w = getelementptr [3 x {i32, i32}], <2 x [3 x {i32, i32}]*> %a, <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 1, i32 1>
ret <2 x i32*> %w
}
+
+define i32 @extractelt_constant_bitcast() {
+ %1 = bitcast i64 4 to <2 x i32>
+ %2 = extractelement <2 x i32> %1, i32 0
+ ret i32 %2
+}
diff --git a/test/CodeGen/Hexagon/NVJumpCmp.ll b/test/CodeGen/Hexagon/NVJumpCmp.ll
new file mode 100644
index 000000000000..6b160d962ebb
--- /dev/null
+++ b/test/CodeGen/Hexagon/NVJumpCmp.ll
@@ -0,0 +1,89 @@
+; RUN: llc -march=hexagon -O2 -mcpu=hexagonv60 < %s | FileCheck %s
+
+; Look for an instruction, we really just do not want to see an abort.
+; CHECK: trace_event
+; REQUIRES: asserts
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32"
+target triple = "hexagon-unknown--elf"
+
+; Function Attrs: nounwind
+define void @_ZN6Halide7Runtime8Internal13default_traceEPvPK18halide_trace_event() #0 {
+entry:
+ br i1 undef, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ br label %while.cond
+
+while.cond: ; preds = %while.cond, %if.then
+ br i1 undef, label %while.cond, label %while.end
+
+while.end: ; preds = %while.cond
+ %add = add i32 undef, 48
+ br i1 undef, label %if.end, label %if.then17
+
+if.then17: ; preds = %while.end
+ unreachable
+
+if.end: ; preds = %while.end
+ %arrayidx21 = getelementptr inbounds [4096 x i8], [4096 x i8]* undef, i32 0, i32 8
+ store i8 undef, i8* %arrayidx21, align 4, !tbaa !1
+ br i1 undef, label %for.body42.preheader6, label %min.iters.checked
+
+for.body42.preheader6: ; preds = %vector.body.preheader, %min.iters.checked, %if.end
+ unreachable
+
+min.iters.checked: ; preds = %if.end
+ br i1 undef, label %for.body42.preheader6, label %vector.body.preheader
+
+vector.body.preheader: ; preds = %min.iters.checked
+ br i1 undef, label %for.cond48.preheader, label %for.body42.preheader6
+
+for.cond48.preheader: ; preds = %vector.body.preheader
+ br i1 undef, label %while.cond.i, label %for.body61.lr.ph
+
+for.body61.lr.ph: ; preds = %for.cond48.preheader
+ br i1 undef, label %for.body61, label %min.iters.checked595
+
+min.iters.checked595: ; preds = %for.body61.lr.ph
+ br i1 undef, label %for.body61, label %vector.memcheck608
+
+vector.memcheck608: ; preds = %min.iters.checked595
+ %scevgep600 = getelementptr [4096 x i8], [4096 x i8]* undef, i32 0, i32 %add
+ %bound0604 = icmp ule i8* %scevgep600, undef
+ %memcheck.conflict607 = and i1 undef, %bound0604
+ br i1 %memcheck.conflict607, label %for.body61, label %vector.body590
+
+vector.body590: ; preds = %vector.body590, %vector.memcheck608
+ br i1 undef, label %middle.block591, label %vector.body590, !llvm.loop !4
+
+middle.block591: ; preds = %vector.body590
+ %cmp.n613 = icmp eq i32 undef, 0
+ br i1 %cmp.n613, label %while.cond.i, label %for.body61
+
+while.cond.i: ; preds = %for.body61, %while.cond.i, %middle.block591, %for.cond48.preheader
+ br i1 undef, label %_ZN6Halide7Runtime8Internal14ScopedSpinLockC2EPVi.exit, label %while.cond.i
+
+_ZN6Halide7Runtime8Internal14ScopedSpinLockC2EPVi.exit: ; preds = %while.cond.i
+ unreachable
+
+for.body61: ; preds = %for.body61, %middle.block591, %vector.memcheck608, %min.iters.checked595, %for.body61.lr.ph
+ %cmp59 = icmp ult i32 undef, undef
+ br i1 %cmp59, label %for.body61, label %while.cond.i, !llvm.loop !7
+
+if.else: ; preds = %entry
+ unreachable
+}
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 2, !"halide_mattrs", !"+hvx"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = distinct !{!4, !5, !6}
+!5 = !{!"llvm.loop.vectorize.width", i32 1}
+!6 = !{!"llvm.loop.interleave.count", i32 1}
+!7 = distinct !{!7, !5, !6}
diff --git a/test/CodeGen/Hexagon/absaddr-store.ll b/test/CodeGen/Hexagon/absaddr-store.ll
index dac8607d88db..f4e97d22e7d2 100644
--- a/test/CodeGen/Hexagon/absaddr-store.ll
+++ b/test/CodeGen/Hexagon/absaddr-store.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=hexagon -hexagon-small-data-threshold=0 < %s | FileCheck %s
; Check that we generate load instructions with absolute addressing mode.
+; XFAIL: *
@a0 = external global i32
@a1 = external global i32
diff --git a/test/CodeGen/Hexagon/adde.ll b/test/CodeGen/Hexagon/adde.ll
index 7b29e7ad8a0f..4a88914dc6cb 100644
--- a/test/CodeGen/Hexagon/adde.ll
+++ b/test/CodeGen/Hexagon/adde.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -disable-hsdr -hexagon-expand-condsets=0 -hexagon-bit=0 < %s | FileCheck %s
-; CHECK: r{{[0-9]+:[0-9]+}} = #1
-; CHECK: r{{[0-9]+:[0-9]+}} = #0
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0, #1)
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0, #0)
; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
diff --git a/test/CodeGen/Hexagon/alu64.ll b/test/CodeGen/Hexagon/alu64.ll
index d0824a4ecadc..f986f1359374 100644
--- a/test/CodeGen/Hexagon/alu64.ll
+++ b/test/CodeGen/Hexagon/alu64.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -O0 < %s | FileCheck %s
; CHECK-LABEL: @test00
-; CHECK: p0 = cmp.eq(r1:0, r3:2)
+; CHECK: = cmp.eq(r1:0, r3:2)
define i32 @test00(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.C2.cmpeqp(i64 %Rs, i64 %Rt)
@@ -9,7 +9,7 @@ entry:
}
; CHECK-LABEL: @test01
-; CHECK: p0 = cmp.gt(r1:0, r3:2)
+; CHECK: = cmp.gt(r1:0, r3:2)
define i32 @test01(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.C2.cmpgtp(i64 %Rs, i64 %Rt)
@@ -17,7 +17,7 @@ entry:
}
; CHECK-LABEL: @test02
-; CHECK: p0 = cmp.gtu(r1:0, r3:2)
+; CHECK: = cmp.gtu(r1:0, r3:2)
define i32 @test02(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.C2.cmpgtup(i64 %Rs, i64 %Rt)
@@ -25,7 +25,7 @@ entry:
}
; CHECK-LABEL: @test10
-; CHECK: r0 = cmp.eq(r0, r1)
+; CHECK: = cmp.eq(r0, r1)
define i32 @test10(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpeq(i32 %Rs, i32 %Rt)
@@ -33,7 +33,7 @@ entry:
}
; CHECK-LABEL: @test11
-; CHECK: r0 = !cmp.eq(r0, r1)
+; CHECK: = !cmp.eq(r0, r1)
define i32 @test11(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpneq(i32 %Rs, i32 %Rt)
@@ -41,7 +41,7 @@ entry:
}
; CHECK-LABEL: @test12
-; CHECK: r0 = cmp.eq(r0, #23)
+; CHECK: = cmp.eq(r0, #23)
define i32 @test12(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpeqi(i32 %Rs, i32 23)
@@ -49,7 +49,7 @@ entry:
}
; CHECK-LABEL: @test13
-; CHECK: r0 = !cmp.eq(r0, #47)
+; CHECK: = !cmp.eq(r0, #47)
define i32 @test13(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.rcmpneqi(i32 %Rs, i32 47)
@@ -57,7 +57,7 @@ entry:
}
; CHECK-LABEL: @test20
-; CHECK: p0 = cmpb.eq(r0, r1)
+; CHECK: = cmpb.eq(r0, r1)
define i32 @test20(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbeq(i32 %Rs, i32 %Rt)
@@ -65,7 +65,7 @@ entry:
}
; CHECK-LABEL: @test21
-; CHECK: p0 = cmpb.gt(r0, r1)
+; CHECK: = cmpb.gt(r0, r1)
define i32 @test21(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgt(i32 %Rs, i32 %Rt)
@@ -73,7 +73,7 @@ entry:
}
; CHECK-LABEL: @test22
-; CHECK: p0 = cmpb.gtu(r0, r1)
+; CHECK: = cmpb.gtu(r0, r1)
define i32 @test22(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgtu(i32 %Rs, i32 %Rt)
@@ -81,7 +81,7 @@ entry:
}
; CHECK-LABEL: @test23
-; CHECK: p0 = cmpb.eq(r0, #56)
+; CHECK: = cmpb.eq(r0, #56)
define i32 @test23(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbeqi(i32 %Rs, i32 56)
@@ -89,7 +89,7 @@ entry:
}
; CHECK-LABEL: @test24
-; CHECK: p0 = cmpb.gt(r0, #29)
+; CHECK: = cmpb.gt(r0, #29)
define i32 @test24(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgti(i32 %Rs, i32 29)
@@ -97,7 +97,7 @@ entry:
}
; CHECK-LABEL: @test25
-; CHECK: p0 = cmpb.gtu(r0, #111)
+; CHECK: = cmpb.gtu(r0, #111)
define i32 @test25(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpbgtui(i32 %Rs, i32 111)
@@ -105,7 +105,7 @@ entry:
}
; CHECK-LABEL: @test30
-; CHECK: p0 = cmph.eq(r0, r1)
+; CHECK: = cmph.eq(r0, r1)
define i32 @test30(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpheq(i32 %Rs, i32 %Rt)
@@ -113,7 +113,7 @@ entry:
}
; CHECK-LABEL: @test31
-; CHECK: p0 = cmph.gt(r0, r1)
+; CHECK: = cmph.gt(r0, r1)
define i32 @test31(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgt(i32 %Rs, i32 %Rt)
@@ -121,7 +121,7 @@ entry:
}
; CHECK-LABEL: @test32
-; CHECK: p0 = cmph.gtu(r0, r1)
+; CHECK: = cmph.gtu(r0, r1)
define i32 @test32(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgtu(i32 %Rs, i32 %Rt)
@@ -129,7 +129,7 @@ entry:
}
; CHECK-LABEL: @test33
-; CHECK: p0 = cmph.eq(r0, #-123)
+; CHECK: = cmph.eq(r0, #-123)
define i32 @test33(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmpheqi(i32 %Rs, i32 -123)
@@ -137,7 +137,7 @@ entry:
}
; CHECK-LABEL: @test34
-; CHECK: p0 = cmph.gt(r0, #-3)
+; CHECK: = cmph.gt(r0, #-3)
define i32 @test34(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgti(i32 %Rs, i32 -3)
@@ -145,7 +145,7 @@ entry:
}
; CHECK-LABEL: @test35
-; CHECK: p0 = cmph.gtu(r0, #13)
+; CHECK: = cmph.gtu(r0, #13)
define i32 @test35(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.cmphgtui(i32 %Rs, i32 13)
@@ -153,7 +153,7 @@ entry:
}
; CHECK-LABEL: @test40
-; CHECK: r1:0 = vmux(p0, r3:2, r5:4)
+; CHECK: = vmux(p0, r3:2, r5:4)
define i64 @test40(i32 %Pu, i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.C2.vmux(i32 %Pu, i64 %Rs, i64 %Rt)
@@ -161,7 +161,7 @@ entry:
}
; CHECK-LABEL: @test41
-; CHECK: p0 = any8(vcmpb.eq(r1:0, r3:2))
+; CHECK: = any8(vcmpb.eq(r1:0, r3:2))
define i32 @test41(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.vcmpbeq.any(i64 %Rs, i64 %Rt)
@@ -169,7 +169,7 @@ entry:
}
; CHECK-LABEL: @test50
-; CHECK: r1:0 = add(r1:0, r3:2)
+; CHECK: = add(r1:0, r3:2)
define i64 @test50(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.addp(i64 %Rs, i64 %Rt)
@@ -177,7 +177,7 @@ entry:
}
; CHECK-LABEL: @test51
-; CHECK: r1:0 = add(r1:0, r3:2):sat
+; CHECK: = add(r1:0, r3:2):sat
define i64 @test51(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.addpsat(i64 %Rs, i64 %Rt)
@@ -185,7 +185,7 @@ entry:
}
; CHECK-LABEL: @test52
-; CHECK: r1:0 = sub(r1:0, r3:2)
+; CHECK: = sub(r1:0, r3:2)
define i64 @test52(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.subp(i64 %Rs, i64 %Rt)
@@ -193,7 +193,7 @@ entry:
}
; CHECK-LABEL: @test53
-; CHECK: r1:0 = add(r0, r3:2)
+; CHECK: = add(r1:0, r3:2):raw:
define i64 @test53(i32 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.addsp(i32 %Rs, i64 %Rt)
@@ -201,7 +201,7 @@ entry:
}
; CHECK-LABEL: @test54
-; CHECK: r1:0 = and(r1:0, r3:2)
+; CHECK: = and(r1:0, r3:2)
define i64 @test54(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.andp(i64 %Rs, i64 %Rt)
@@ -209,7 +209,7 @@ entry:
}
; CHECK-LABEL: @test55
-; CHECK: r1:0 = or(r1:0, r3:2)
+; CHECK: = or(r1:0, r3:2)
define i64 @test55(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.orp(i64 %Rs, i64 %Rt)
@@ -217,7 +217,7 @@ entry:
}
; CHECK-LABEL: @test56
-; CHECK: r1:0 = xor(r1:0, r3:2)
+; CHECK: = xor(r1:0, r3:2)
define i64 @test56(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A2.xorp(i64 %Rs, i64 %Rt)
@@ -225,7 +225,7 @@ entry:
}
; CHECK-LABEL: @test57
-; CHECK: r1:0 = and(r1:0, ~r3:2)
+; CHECK: = and(r1:0, ~r3:2)
define i64 @test57(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A4.andnp(i64 %Rs, i64 %Rt)
@@ -233,7 +233,7 @@ entry:
}
; CHECK-LABEL: @test58
-; CHECK: r1:0 = or(r1:0, ~r3:2)
+; CHECK: = or(r1:0, ~r3:2)
define i64 @test58(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A4.ornp(i64 %Rs, i64 %Rt)
@@ -241,7 +241,7 @@ entry:
}
; CHECK-LABEL: @test60
-; CHECK: r0 = add(r0.l, r1.l)
+; CHECK: = add(r0.l, r1.l)
define i32 @test60(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.ll(i32 %Rs, i32 %Rt)
@@ -249,7 +249,7 @@ entry:
}
; CHECK-LABEL: @test61
-; CHECK: r0 = add(r0.l, r1.h)
+; CHECK: = add(r0.l, r1.h)
define i32 @test61(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.hl(i32 %Rs, i32 %Rt)
@@ -257,7 +257,7 @@ entry:
}
; CHECK-LABEL: @test62
-; CHECK: r0 = add(r0.l, r1.l):sat
+; CHECK: = add(r0.l, r1.l):sat
define i32 @test62(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %Rs, i32 %Rt)
@@ -265,7 +265,7 @@ entry:
}
; CHECK-LABEL: @test63
-; CHECK: r0 = add(r0.l, r1.h):sat
+; CHECK: = add(r0.l, r1.h):sat
define i32 @test63(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32 %Rs, i32 %Rt)
@@ -273,7 +273,7 @@ entry:
}
; CHECK-LABEL: @test64
-; CHECK: r0 = add(r0.l, r1.l):<<16
+; CHECK: = add(r0.l, r1.l):<<16
define i32 @test64(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.ll(i32 %Rs, i32 %Rt)
@@ -281,7 +281,7 @@ entry:
}
; CHECK-LABEL: @test65
-; CHECK: r0 = add(r0.l, r1.h):<<16
+; CHECK: = add(r0.l, r1.h):<<16
define i32 @test65(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.lh(i32 %Rs, i32 %Rt)
@@ -289,7 +289,7 @@ entry:
}
; CHECK-LABEL: @test66
-; CHECK: r0 = add(r0.h, r1.l):<<16
+; CHECK: = add(r0.h, r1.l):<<16
define i32 @test66(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.hl(i32 %Rs, i32 %Rt)
@@ -297,7 +297,7 @@ entry:
}
; CHECK-LABEL: @test67
-; CHECK: r0 = add(r0.h, r1.h):<<16
+; CHECK: = add(r0.h, r1.h):<<16
define i32 @test67(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.hh(i32 %Rs, i32 %Rt)
@@ -305,7 +305,7 @@ entry:
}
; CHECK-LABEL: @test68
-; CHECK: r0 = add(r0.l, r1.l):sat:<<16
+; CHECK: = add(r0.l, r1.l):sat:<<16
define i32 @test68(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32 %Rs, i32 %Rt)
@@ -313,7 +313,7 @@ entry:
}
; CHECK-LABEL: @test69
-; CHECK: r0 = add(r0.l, r1.h):sat:<<16
+; CHECK: = add(r0.l, r1.h):sat:<<16
define i32 @test69(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32 %Rs, i32 %Rt)
@@ -321,7 +321,7 @@ entry:
}
; CHECK-LABEL: @test6A
-; CHECK: r0 = add(r0.h, r1.l):sat:<<16
+; CHECK: = add(r0.h, r1.l):sat:<<16
define i32 @test6A(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32 %Rs, i32 %Rt)
@@ -329,7 +329,7 @@ entry:
}
; CHECK-LABEL: @test6B
-; CHECK: r0 = add(r0.h, r1.h):sat:<<16
+; CHECK: = add(r0.h, r1.h):sat:<<16
define i32 @test6B(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32 %Rs, i32 %Rt)
@@ -337,7 +337,7 @@ entry:
}
; CHECK-LABEL: @test70
-; CHECK: r0 = sub(r0.l, r1.l)
+; CHECK: = sub(r0.l, r1.l)
define i32 @test70(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.ll(i32 %Rs, i32 %Rt)
@@ -345,7 +345,7 @@ entry:
}
; CHECK-LABEL: @test71
-; CHECK: r0 = sub(r0.l, r1.h)
+; CHECK: = sub(r0.l, r1.h)
define i32 @test71(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.hl(i32 %Rs, i32 %Rt)
@@ -353,7 +353,7 @@ entry:
}
; CHECK-LABEL: @test72
-; CHECK: r0 = sub(r0.l, r1.l):sat
+; CHECK: = sub(r0.l, r1.l):sat
define i32 @test72(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32 %Rs, i32 %Rt)
@@ -361,7 +361,7 @@ entry:
}
; CHECK-LABEL: @test73
-; CHECK: r0 = sub(r0.l, r1.h):sat
+; CHECK: = sub(r0.l, r1.h):sat
define i32 @test73(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32 %Rs, i32 %Rt)
@@ -369,7 +369,7 @@ entry:
}
; CHECK-LABEL: @test74
-; CHECK: r0 = sub(r0.l, r1.l):<<16
+; CHECK: = sub(r0.l, r1.l):<<16
define i32 @test74(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.ll(i32 %Rs, i32 %Rt)
@@ -377,7 +377,7 @@ entry:
}
; CHECK-LABEL: @test75
-; CHECK: r0 = sub(r0.l, r1.h):<<16
+; CHECK: = sub(r0.l, r1.h):<<16
define i32 @test75(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.lh(i32 %Rs, i32 %Rt)
@@ -385,7 +385,7 @@ entry:
}
; CHECK-LABEL: @test76
-; CHECK: r0 = sub(r0.h, r1.l):<<16
+; CHECK: = sub(r0.h, r1.l):<<16
define i32 @test76(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.hl(i32 %Rs, i32 %Rt)
@@ -393,7 +393,7 @@ entry:
}
; CHECK-LABEL: @test77
-; CHECK: r0 = sub(r0.h, r1.h):<<16
+; CHECK: = sub(r0.h, r1.h):<<16
define i32 @test77(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.hh(i32 %Rs, i32 %Rt)
@@ -401,7 +401,7 @@ entry:
}
; CHECK-LABEL: @test78
-; CHECK: r0 = sub(r0.l, r1.l):sat:<<16
+; CHECK: = sub(r0.l, r1.l):sat:<<16
define i32 @test78(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32 %Rs, i32 %Rt)
@@ -409,7 +409,7 @@ entry:
}
; CHECK-LABEL: @test79
-; CHECK: r0 = sub(r0.l, r1.h):sat:<<16
+; CHECK: = sub(r0.l, r1.h):sat:<<16
define i32 @test79(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32 %Rs, i32 %Rt)
@@ -417,7 +417,7 @@ entry:
}
; CHECK-LABEL: @test7A
-; CHECK: r0 = sub(r0.h, r1.l):sat:<<16
+; CHECK: = sub(r0.h, r1.l):sat:<<16
define i32 @test7A(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32 %Rs, i32 %Rt)
@@ -425,7 +425,7 @@ entry:
}
; CHECK-LABEL: @test7B
-; CHECK: r0 = sub(r0.h, r1.h):sat:<<16
+; CHECK: = sub(r0.h, r1.h):sat:<<16
define i32 @test7B(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32 %Rs, i32 %Rt)
@@ -433,7 +433,7 @@ entry:
}
; CHECK-LABEL: @test90
-; CHECK: r0 = and(#1, asl(r0, #2))
+; CHECK: = and(#1, asl(r0, #2))
define i32 @test90(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.andi.asl.ri(i32 1, i32 %Rs, i32 2)
@@ -441,7 +441,7 @@ entry:
}
; CHECK-LABEL: @test91
-; CHECK: r0 = or(#1, asl(r0, #2))
+; CHECK: = or(#1, asl(r0, #2))
define i32 @test91(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.ori.asl.ri(i32 1, i32 %Rs, i32 2)
@@ -449,7 +449,7 @@ entry:
}
; CHECK-LABEL: @test92
-; CHECK: r0 = add(#1, asl(r0, #2))
+; CHECK: = add(#1, asl(r0, #2))
define i32 @test92(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.addi.asl.ri(i32 1, i32 %Rs, i32 2)
@@ -457,7 +457,7 @@ entry:
}
; CHECK-LABEL: @test93
-; CHECK: r0 = sub(#1, asl(r0, #2))
+; CHECK: = sub(#1, asl(r0, #2))
define i32 @test93(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.subi.asl.ri(i32 1, i32 %Rs, i32 2)
@@ -465,7 +465,7 @@ entry:
}
; CHECK-LABEL: @test94
-; CHECK: r0 = and(#1, lsr(r0, #2))
+; CHECK: = and(#1, lsr(r0, #2))
define i32 @test94(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.andi.lsr.ri(i32 1, i32 %Rs, i32 2)
@@ -473,7 +473,7 @@ entry:
}
; CHECK-LABEL: @test95
-; CHECK: r0 = or(#1, lsr(r0, #2))
+; CHECK: = or(#1, lsr(r0, #2))
define i32 @test95(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.ori.lsr.ri(i32 1, i32 %Rs, i32 2)
@@ -481,7 +481,7 @@ entry:
}
; CHECK-LABEL: @test96
-; CHECK: r0 = add(#1, lsr(r0, #2))
+; CHECK: = add(#1, lsr(r0, #2))
define i32 @test96(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.addi.lsr.ri(i32 1, i32 %Rs, i32 2)
@@ -489,7 +489,7 @@ entry:
}
; CHECK-LABEL: @test97
-; CHECK: r0 = sub(#1, lsr(r0, #2))
+; CHECK: = sub(#1, lsr(r0, #2))
define i32 @test97(i32 %Rs) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.subi.lsr.ri(i32 1, i32 %Rs, i32 2)
@@ -497,7 +497,7 @@ entry:
}
; CHECK-LABEL: @test100
-; CHECK: r1:0 = bitsplit(r0, r1)
+; CHECK: = bitsplit(r0, r1)
define i64 @test100(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i64 @llvm.hexagon.A4.bitsplit(i32 %Rs, i32 %Rt)
@@ -505,7 +505,7 @@ entry:
}
; CHECK-LABEL: @test101
-; CHECK: r0 = modwrap(r0, r1)
+; CHECK: = modwrap(r0, r1)
define i32 @test101(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %Rs, i32 %Rt)
@@ -513,7 +513,7 @@ entry:
}
; CHECK-LABEL: @test102
-; CHECK: r0 = parity(r1:0, r3:2)
+; CHECK: = parity(r1:0, r3:2)
define i32 @test102(i64 %Rs, i64 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S2.parityp(i64 %Rs, i64 %Rt)
@@ -521,7 +521,7 @@ entry:
}
; CHECK-LABEL: @test103
-; CHECK: r0 = parity(r0, r1)
+; CHECK: = parity(r0, r1)
define i32 @test103(i32 %Rs, i32 %Rt) #0 {
entry:
%0 = tail call i32 @llvm.hexagon.S4.parity(i32 %Rs, i32 %Rt)
diff --git a/test/CodeGen/Hexagon/bit-eval.ll b/test/CodeGen/Hexagon/bit-eval.ll
new file mode 100644
index 000000000000..1d2be5bfc19d
--- /dev/null
+++ b/test/CodeGen/Hexagon/bit-eval.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon"
+
+; CHECK-LABEL: test1:
+; CHECK: r0 = ##1073741824
+define i32 @test1() #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S2.asr.i.r.rnd(i32 2147483647, i32 0)
+ ret i32 %0
+}
+
+; CHECK-LABEL: test2:
+; CHECK: r0 = ##1073741824
+define i32 @test2() #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S2.asr.i.r.rnd.goodsyntax(i32 2147483647, i32 1)
+ ret i32 %0
+}
+
+; CHECK-LABEL: test3:
+; CHECK: r1:0 = combine(#0, #1)
+define i64 @test3() #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.S4.extractp(i64 -1, i32 63, i32 63)
+ ret i64 %0
+}
+
+; CHECK-LABEL: test4:
+; CHECK: r0 = #1
+define i32 @test4() #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S4.extract(i32 -1, i32 31, i32 31)
+ ret i32 %0
+}
+
+; CHECK-LABEL: test5:
+; CHECK: r0 = ##-1073741569
+define i32 @test5() #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S4.subi.lsr.ri(i32 255, i32 -2147483648, i32 1)
+ ret i32 %0
+}
+
+declare i32 @llvm.hexagon.S2.asr.i.r.rnd(i32, i32) #0
+declare i32 @llvm.hexagon.S2.asr.i.r.rnd.goodsyntax(i32, i32) #0
+declare i64 @llvm.hexagon.S4.extractp(i64, i32, i32) #0
+declare i32 @llvm.hexagon.S4.extract(i32, i32, i32) #0
+declare i32 @llvm.hexagon.S4.subi.lsr.ri(i32, i32, i32) #0
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/Hexagon/bit-loop.ll b/test/CodeGen/Hexagon/bit-loop.ll
new file mode 100644
index 000000000000..74a1a276115d
--- /dev/null
+++ b/test/CodeGen/Hexagon/bit-loop.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s | FileCheck %s
+; CHECK-DAG: memh(r{{[0-9]+}}+#0) = r{{[0-9]+}}
+; CHECK-DAG: memh(r{{[0-9]+}}+#2) = r{{[0-9]+}}.h
+; CHECK-DAG: memh(r{{[0-9]+}}+#4) = r{{[0-9]+}}
+; CHECK-DAG: memh(r{{[0-9]+}}+#6) = r{{[0-9]+}}.h
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon"
+
+; Function Attrs: nounwind
+define void @foo(i64* nocapture readonly %r64, i16 zeroext %n, i16 zeroext %s, i64* nocapture %p64) #0 {
+entry:
+ %conv = zext i16 %n to i32
+ %cmp = icmp eq i16 %n, 0
+ br i1 %cmp, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = load i64, i64* %r64, align 8, !tbaa !1
+ %v.sroa.0.0.extract.trunc = trunc i64 %0 to i16
+ %v.sroa.4.0.extract.shift = lshr i64 %0, 16
+ %v.sroa.4.0.extract.trunc = trunc i64 %v.sroa.4.0.extract.shift to i16
+ %v.sroa.5.0.extract.shift = lshr i64 %0, 32
+ %v.sroa.5.0.extract.trunc = trunc i64 %v.sroa.5.0.extract.shift to i16
+ %v.sroa.6.0.extract.shift = lshr i64 %0, 48
+ %v.sroa.6.0.extract.trunc = trunc i64 %v.sroa.6.0.extract.shift to i16
+ %1 = bitcast i64* %p64 to i16*
+ %conv2 = zext i16 %s to i32
+ %add.ptr = getelementptr inbounds i16, i16* %1, i32 %conv2
+ %add.ptr.sum = add nuw nsw i32 %conv2, 1
+ %add.ptr3 = getelementptr inbounds i16, i16* %1, i32 %add.ptr.sum
+ %add.ptr.sum50 = add nuw nsw i32 %conv2, 2
+ %add.ptr4 = getelementptr inbounds i16, i16* %1, i32 %add.ptr.sum50
+ %add.ptr.sum51 = add nuw nsw i32 %conv2, 3
+ %add.ptr5 = getelementptr inbounds i16, i16* %1, i32 %add.ptr.sum51
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %add.ptr11.phi = phi i16* [ %add.ptr11.inc, %for.body ], [ %add.ptr, %for.body.preheader ]
+ %add.ptr16.phi = phi i16* [ %add.ptr16.inc, %for.body ], [ %add.ptr3, %for.body.preheader ]
+ %add.ptr21.phi = phi i16* [ %add.ptr21.inc, %for.body ], [ %add.ptr4, %for.body.preheader ]
+ %add.ptr26.phi = phi i16* [ %add.ptr26.inc, %for.body ], [ %add.ptr5, %for.body.preheader ]
+ %i.058.pmt = phi i32 [ %inc.pmt, %for.body ], [ 0, %for.body.preheader ]
+ %v.sroa.0.157 = phi i16 [ %v.sroa.0.0.extract.trunc34, %for.body ], [ %v.sroa.0.0.extract.trunc, %for.body.preheader ]
+ %v.sroa.4.156 = phi i16 [ %v.sroa.4.0.extract.trunc36, %for.body ], [ %v.sroa.4.0.extract.trunc, %for.body.preheader ]
+ %v.sroa.5.155 = phi i16 [ %v.sroa.5.0.extract.trunc38, %for.body ], [ %v.sroa.5.0.extract.trunc, %for.body.preheader ]
+ %v.sroa.6.154 = phi i16 [ %v.sroa.6.0.extract.trunc40, %for.body ], [ %v.sroa.6.0.extract.trunc, %for.body.preheader ]
+ %q64.153.pn = phi i64* [ %q64.153, %for.body ], [ %r64, %for.body.preheader ]
+ %q64.153 = getelementptr inbounds i64, i64* %q64.153.pn, i32 1
+ store i16 %v.sroa.0.157, i16* %add.ptr11.phi, align 2, !tbaa !5
+ store i16 %v.sroa.4.156, i16* %add.ptr16.phi, align 2, !tbaa !5
+ store i16 %v.sroa.5.155, i16* %add.ptr21.phi, align 2, !tbaa !5
+ store i16 %v.sroa.6.154, i16* %add.ptr26.phi, align 2, !tbaa !5
+ %2 = load i64, i64* %q64.153, align 8, !tbaa !1
+ %v.sroa.0.0.extract.trunc34 = trunc i64 %2 to i16
+ %v.sroa.4.0.extract.shift35 = lshr i64 %2, 16
+ %v.sroa.4.0.extract.trunc36 = trunc i64 %v.sroa.4.0.extract.shift35 to i16
+ %v.sroa.5.0.extract.shift37 = lshr i64 %2, 32
+ %v.sroa.5.0.extract.trunc38 = trunc i64 %v.sroa.5.0.extract.shift37 to i16
+ %v.sroa.6.0.extract.shift39 = lshr i64 %2, 48
+ %v.sroa.6.0.extract.trunc40 = trunc i64 %v.sroa.6.0.extract.shift39 to i16
+ %inc.pmt = add i32 %i.058.pmt, 1
+ %cmp8 = icmp slt i32 %inc.pmt, %conv
+ %add.ptr11.inc = getelementptr i16, i16* %add.ptr11.phi, i32 4
+ %add.ptr16.inc = getelementptr i16, i16* %add.ptr16.phi, i32 4
+ %add.ptr21.inc = getelementptr i16, i16* %add.ptr21.phi, i32 4
+ %add.ptr26.inc = getelementptr i16, i16* %add.ptr26.phi, i32 4
+ br i1 %cmp8, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+attributes #0 = { nounwind }
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"long long", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"short", !3, i64 0}
diff --git a/test/CodeGen/Hexagon/cfi-late.ll b/test/CodeGen/Hexagon/cfi-late.ll
new file mode 100644
index 000000000000..ce38711ae8d7
--- /dev/null
+++ b/test/CodeGen/Hexagon/cfi-late.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=hexagon -enable-misched=false < %s | FileCheck %s
+; This testcase causes the scheduler to crash for some reason. Disable
+; it for now.
+
+target datalayout = "e-m:e-p:32:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon-unknown--elf"
+
+; Check that allocframe was packetized with the two adds.
+; CHECK: foo:
+; CHECK: {
+; CHECK-DAG: allocframe
+; CHECK-DAG: add
+; CHECK-DAG: add
+; CHECK: }
+; CHECK: dealloc_return
+; CHECK: }
+
+; Function Attrs: nounwind
+define i32 @foo(i32 %x, i32 %y) #0 !dbg !4 {
+entry:
+ tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !9, metadata !14), !dbg !15
+ tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !10, metadata !14), !dbg !16
+ %add = add nsw i32 %x, 1, !dbg !17
+ %add1 = add nsw i32 %y, 1, !dbg !18
+ %call = tail call i32 @bar(i32 %add, i32 %add1) #3, !dbg !19
+ %add2 = add nsw i32 %call, 1, !dbg !20
+ ret i32 %add2, !dbg !21
+}
+
+declare i32 @bar(i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv4" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv4" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12}
+!llvm.ident = !{!13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (http://llvm.org/git/clang.git 15506a21305e212c406f980ed9b6b1bac785df56)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "cfi-late.c", directory: "/test")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9, !10}
+!9 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !1, line: 3, type: !7)
+!10 = !DILocalVariable(name: "y", arg: 2, scope: !4, file: !1, line: 3, type: !7)
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git 15506a21305e212c406f980ed9b6b1bac785df56)"}
+!14 = !DIExpression()
+!15 = !DILocation(line: 3, column: 13, scope: !4)
+!16 = !DILocation(line: 3, column: 20, scope: !4)
+!17 = !DILocation(line: 4, column: 15, scope: !4)
+!18 = !DILocation(line: 4, column: 20, scope: !4)
+!19 = !DILocation(line: 4, column: 10, scope: !4)
+!20 = !DILocation(line: 4, column: 24, scope: !4)
+!21 = !DILocation(line: 4, column: 3, scope: !4)
diff --git a/test/CodeGen/Hexagon/clr_set_toggle.ll b/test/CodeGen/Hexagon/clr_set_toggle.ll
index 87c52956129e..4e90f3d99a1e 100644
--- a/test/CodeGen/Hexagon/clr_set_toggle.ll
+++ b/test/CodeGen/Hexagon/clr_set_toggle.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -hexagon-bit=0 < %s | FileCheck %s
; Optimized bitwise operations.
define i32 @my_clrbit(i32 %x) nounwind {
diff --git a/test/CodeGen/Hexagon/combine.ll b/test/CodeGen/Hexagon/combine.ll
index 2e320d977d62..8f5cec88d692 100644
--- a/test/CodeGen/Hexagon/combine.ll
+++ b/test/CodeGen/Hexagon/combine.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s
; CHECK: combine(r{{[0-9]+}}, r{{[0-9]+}})
@j = external global i32
diff --git a/test/CodeGen/Hexagon/combine_ir.ll b/test/CodeGen/Hexagon/combine_ir.ll
index 634a5c82a916..0d781d8d5d49 100644
--- a/test/CodeGen/Hexagon/combine_ir.ll
+++ b/test/CodeGen/Hexagon/combine_ir.ll
@@ -1,18 +1,8 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: word
-; CHECK: combine(#0
-
-define void @word(i32* nocapture %a) nounwind {
-entry:
- %0 = load i32, i32* %a, align 4
- %1 = zext i32 %0 to i64
- tail call void @bar(i64 %1) nounwind
- ret void
-}
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hsdr < %s | FileCheck %s
declare void @bar(i64)
-; CHECK: halfword
+; CHECK-LABEL: halfword:
; CHECK: combine(#0
define void @halfword(i16* nocapture %a) nounwind {
@@ -28,7 +18,7 @@ entry:
ret void
}
-; CHECK: byte
+; CHECK-LABEL: byte:
; CHECK: combine(#0
define void @byte(i8* nocapture %a) nounwind {
diff --git a/test/CodeGen/Hexagon/early-if-conversion-bug1.ll b/test/CodeGen/Hexagon/early-if-conversion-bug1.ll
new file mode 100644
index 000000000000..6739b03985dd
--- /dev/null
+++ b/test/CodeGen/Hexagon/early-if-conversion-bug1.ll
@@ -0,0 +1,412 @@
+; RUN: llc -O2 -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; we do not want to see a segv.
+; CHECK-NOT: segmentation
+; CHECK: call
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon"
+
+%"class.std::__1::basic_string" = type { %"class.std::__1::__compressed_pair" }
+%"class.std::__1::__compressed_pair" = type { %"class.std::__1::__libcpp_compressed_pair_imp" }
+%"class.std::__1::__libcpp_compressed_pair_imp" = type { %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep" }
+%"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep" = type { %union.anon }
+%union.anon = type { %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__long" }
+%"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__long" = type { i32, i32, i8* }
+%"class.std::__1::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, i8*, i8*, void (i8, %"class.std::__1::ios_base"*, i32)**, i32*, i32, i32, i32*, i32, i32, i8**, i32, i32 }
+%"class.std::__1::basic_streambuf" = type { i32 (...)**, %"class.std::__1::locale", i8*, i8*, i8*, i8*, i8*, i8* }
+%"class.std::__1::locale" = type { %"class.std::__1::locale::__imp"* }
+%"class.std::__1::locale::__imp" = type opaque
+%"class.std::__1::allocator" = type { i8 }
+%"class.std::__1::ostreambuf_iterator" = type { %"class.std::__1::basic_streambuf"* }
+%"class.std::__1::__basic_string_common" = type { i8 }
+%"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short" = type { %union.anon.0, [11 x i8] }
+%union.anon.0 = type { i8 }
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: nounwind
+declare void @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED1Ev(%"class.std::__1::basic_string"*) #1
+
+define weak_odr hidden i32 @_ZNSt3__116__pad_and_outputIcNS_11char_traitsIcEEEENS_19ostreambuf_iteratorIT_T0_EES6_PKS4_S8_S8_RNS_8ios_baseES4_(i32 %__s.coerce, i8* %__ob, i8* %__op, i8* %__oe, %"class.std::__1::ios_base"* nonnull %__iob, i8 zeroext %__fl) #2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %this.addr.i66 = alloca %"class.std::__1::basic_streambuf"*, align 4
+ %__s.addr.i67 = alloca i8*, align 4
+ %__n.addr.i68 = alloca i32, align 4
+ %__p.addr.i.i = alloca i8*, align 4
+ %this.addr.i.i.i13.i.i = alloca %"class.std::__1::__libcpp_compressed_pair_imp"*, align 4
+ %this.addr.i.i14.i.i = alloca %"class.std::__1::__compressed_pair"*, align 4
+ %this.addr.i15.i.i = alloca %"class.std::__1::basic_string"*, align 4
+ %__x.addr.i.i.i.i.i = alloca i8*, align 4
+ %__r.addr.i.i.i.i = alloca i8*, align 4
+ %this.addr.i.i.i4.i.i = alloca %"class.std::__1::__libcpp_compressed_pair_imp"*, align 4
+ %this.addr.i.i5.i.i = alloca %"class.std::__1::__compressed_pair"*, align 4
+ %this.addr.i6.i.i = alloca %"class.std::__1::basic_string"*, align 4
+ %this.addr.i.i.i.i.i56 = alloca %"class.std::__1::__libcpp_compressed_pair_imp"*, align 4
+ %this.addr.i.i.i.i57 = alloca %"class.std::__1::__compressed_pair"*, align 4
+ %this.addr.i.i.i58 = alloca %"class.std::__1::basic_string"*, align 4
+ %this.addr.i.i59 = alloca %"class.std::__1::basic_string"*, align 4
+ %this.addr.i60 = alloca %"class.std::__1::basic_string"*, align 4
+ %this.addr.i.i.i.i.i = alloca %"class.std::__1::allocator"*, align 4
+ %this.addr.i.i.i.i = alloca %"class.std::__1::__libcpp_compressed_pair_imp"*, align 4
+ %this.addr.i.i.i = alloca %"class.std::__1::__compressed_pair"*, align 4
+ %this.addr.i.i = alloca %"class.std::__1::basic_string"*, align 4
+ %__n.addr.i.i = alloca i32, align 4
+ %__c.addr.i.i = alloca i8, align 1
+ %this.addr.i53 = alloca %"class.std::__1::basic_string"*, align 4
+ %__n.addr.i54 = alloca i32, align 4
+ %__c.addr.i = alloca i8, align 1
+ %this.addr.i46 = alloca %"class.std::__1::basic_streambuf"*, align 4
+ %__s.addr.i47 = alloca i8*, align 4
+ %__n.addr.i48 = alloca i32, align 4
+ %this.addr.i44 = alloca %"class.std::__1::basic_streambuf"*, align 4
+ %__s.addr.i = alloca i8*, align 4
+ %__n.addr.i = alloca i32, align 4
+ %this.addr.i41 = alloca %"class.std::__1::ios_base"*, align 4
+ %__wide.addr.i = alloca i32, align 4
+ %__r.i = alloca i32, align 4
+ %this.addr.i = alloca %"class.std::__1::ios_base"*, align 4
+ %retval = alloca %"class.std::__1::ostreambuf_iterator", align 4
+ %__s = alloca %"class.std::__1::ostreambuf_iterator", align 4
+ %__ob.addr = alloca i8*, align 4
+ %__op.addr = alloca i8*, align 4
+ %__oe.addr = alloca i8*, align 4
+ %__iob.addr = alloca %"class.std::__1::ios_base"*, align 4
+ %__fl.addr = alloca i8, align 1
+ %__sz = alloca i32, align 4
+ %__ns = alloca i32, align 4
+ %__np = alloca i32, align 4
+ %__sp = alloca %"class.std::__1::basic_string", align 4
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ %cleanup.dest.slot = alloca i32
+ %coerce.dive = getelementptr %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+ %coerce.val.ip = inttoptr i32 %__s.coerce to %"class.std::__1::basic_streambuf"*
+ store %"class.std::__1::basic_streambuf"* %coerce.val.ip, %"class.std::__1::basic_streambuf"** %coerce.dive
+ store i8* %__ob, i8** %__ob.addr, align 4
+ store i8* %__op, i8** %__op.addr, align 4
+ store i8* %__oe, i8** %__oe.addr, align 4
+ store %"class.std::__1::ios_base"* %__iob, %"class.std::__1::ios_base"** %__iob.addr, align 4
+ store i8 %__fl, i8* %__fl.addr, align 1
+ %__sbuf_ = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+ %0 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %__sbuf_, align 4
+ %cmp = icmp eq %"class.std::__1::basic_streambuf"* %0, null
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %1 = bitcast %"class.std::__1::ostreambuf_iterator"* %retval to i8*
+ %2 = bitcast %"class.std::__1::ostreambuf_iterator"* %__s to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 4, i32 4, i1 false)
+ br label %return
+
+if.end: ; preds = %entry
+ %3 = load i8*, i8** %__oe.addr, align 4
+ %4 = load i8*, i8** %__ob.addr, align 4
+ %sub.ptr.lhs.cast = ptrtoint i8* %3 to i32
+ %sub.ptr.rhs.cast = ptrtoint i8* %4 to i32
+ %sub.ptr.sub = sub i32 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+ store i32 %sub.ptr.sub, i32* %__sz, align 4
+ %5 = load %"class.std::__1::ios_base"*, %"class.std::__1::ios_base"** %__iob.addr, align 4
+ store %"class.std::__1::ios_base"* %5, %"class.std::__1::ios_base"** %this.addr.i, align 4
+ %this1.i = load %"class.std::__1::ios_base"*, %"class.std::__1::ios_base"** %this.addr.i
+ %__width_.i = getelementptr inbounds %"class.std::__1::ios_base", %"class.std::__1::ios_base"* %this1.i, i32 0, i32 3
+ %6 = load i32, i32* %__width_.i, align 4
+ store i32 %6, i32* %__ns, align 4
+ %7 = load i32, i32* %__ns, align 4
+ %8 = load i32, i32* %__sz, align 4
+ %cmp1 = icmp sgt i32 %7, %8
+ br i1 %cmp1, label %if.then2, label %if.else
+
+if.then2: ; preds = %if.end
+ %9 = load i32, i32* %__sz, align 4
+ %10 = load i32, i32* %__ns, align 4
+ %sub = sub nsw i32 %10, %9
+ store i32 %sub, i32* %__ns, align 4
+ br label %if.end3
+
+if.else: ; preds = %if.end
+ store i32 0, i32* %__ns, align 4
+ br label %if.end3
+
+if.end3: ; preds = %if.else, %if.then2
+ %11 = load i8*, i8** %__op.addr, align 4
+ %12 = load i8*, i8** %__ob.addr, align 4
+ %sub.ptr.lhs.cast4 = ptrtoint i8* %11 to i32
+ %sub.ptr.rhs.cast5 = ptrtoint i8* %12 to i32
+ %sub.ptr.sub6 = sub i32 %sub.ptr.lhs.cast4, %sub.ptr.rhs.cast5
+ store i32 %sub.ptr.sub6, i32* %__np, align 4
+ %13 = load i32, i32* %__np, align 4
+ %cmp7 = icmp sgt i32 %13, 0
+ br i1 %cmp7, label %if.then8, label %if.end15
+
+if.then8: ; preds = %if.end3
+ %__sbuf_9 = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+ %14 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %__sbuf_9, align 4
+ %15 = load i8*, i8** %__ob.addr, align 4
+ %16 = load i32, i32* %__np, align 4
+ store %"class.std::__1::basic_streambuf"* %14, %"class.std::__1::basic_streambuf"** %this.addr.i46, align 4
+ store i8* %15, i8** %__s.addr.i47, align 4
+ store i32 %16, i32* %__n.addr.i48, align 4
+ %this1.i49 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %this.addr.i46
+ %17 = bitcast %"class.std::__1::basic_streambuf"* %this1.i49 to i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)***
+ %vtable.i50 = load i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)**, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*** %17
+ %vfn.i51 = getelementptr inbounds i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)** %vtable.i50, i64 12
+ %18 = load i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)** %vfn.i51
+ %19 = load i8*, i8** %__s.addr.i47, align 4
+ %20 = load i32, i32* %__n.addr.i48, align 4
+ %call.i52 = call i32 %18(%"class.std::__1::basic_streambuf"* %this1.i49, i8* %19, i32 %20)
+ %21 = load i32, i32* %__np, align 4
+ %cmp11 = icmp ne i32 %call.i52, %21
+ br i1 %cmp11, label %if.then12, label %if.end14
+
+if.then12: ; preds = %if.then8
+ %__sbuf_13 = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+ store %"class.std::__1::basic_streambuf"* null, %"class.std::__1::basic_streambuf"** %__sbuf_13, align 4
+ %22 = bitcast %"class.std::__1::ostreambuf_iterator"* %retval to i8*
+ %23 = bitcast %"class.std::__1::ostreambuf_iterator"* %__s to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %22, i8* %23, i32 4, i32 4, i1 false)
+ br label %return
+
+if.end14: ; preds = %if.then8
+ br label %if.end15
+
+if.end15: ; preds = %if.end14, %if.end3
+ %24 = load i32, i32* %__ns, align 4
+ %cmp16 = icmp sgt i32 %24, 0
+ br i1 %cmp16, label %if.then17, label %if.end25
+
+if.then17: ; preds = %if.end15
+ %25 = load i32, i32* %__ns, align 4
+ %26 = load i8, i8* %__fl.addr, align 1
+ store %"class.std::__1::basic_string"* %__sp, %"class.std::__1::basic_string"** %this.addr.i53, align 4
+ store i32 %25, i32* %__n.addr.i54, align 4
+ store i8 %26, i8* %__c.addr.i, align 1
+ %this1.i55 = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i53
+ %27 = load i32, i32* %__n.addr.i54, align 4
+ %28 = load i8, i8* %__c.addr.i, align 1
+ store %"class.std::__1::basic_string"* %this1.i55, %"class.std::__1::basic_string"** %this.addr.i.i, align 4
+ store i32 %27, i32* %__n.addr.i.i, align 4
+ store i8 %28, i8* %__c.addr.i.i, align 1
+ %this1.i.i = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i.i
+ %29 = bitcast %"class.std::__1::basic_string"* %this1.i.i to %"class.std::__1::__basic_string_common"*
+ %__r_.i.i = getelementptr inbounds %"class.std::__1::basic_string", %"class.std::__1::basic_string"* %this1.i.i, i32 0, i32 0
+ store %"class.std::__1::__compressed_pair"* %__r_.i.i, %"class.std::__1::__compressed_pair"** %this.addr.i.i.i, align 4
+ %this1.i.i.i = load %"class.std::__1::__compressed_pair"*, %"class.std::__1::__compressed_pair"** %this.addr.i.i.i
+ %30 = bitcast %"class.std::__1::__compressed_pair"* %this1.i.i.i to %"class.std::__1::__libcpp_compressed_pair_imp"*
+ store %"class.std::__1::__libcpp_compressed_pair_imp"* %30, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i.i, align 4
+ %this1.i.i.i.i = load %"class.std::__1::__libcpp_compressed_pair_imp"*, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i.i
+ %31 = bitcast %"class.std::__1::__libcpp_compressed_pair_imp"* %this1.i.i.i.i to %"class.std::__1::allocator"*
+ store %"class.std::__1::allocator"* %31, %"class.std::__1::allocator"** %this.addr.i.i.i.i.i, align 4
+ %this1.i.i.i.i.i = load %"class.std::__1::allocator"*, %"class.std::__1::allocator"** %this.addr.i.i.i.i.i
+ %__first_.i.i.i.i = getelementptr inbounds %"class.std::__1::__libcpp_compressed_pair_imp", %"class.std::__1::__libcpp_compressed_pair_imp"* %this1.i.i.i.i, i32 0, i32 0
+ %32 = load i32, i32* %__n.addr.i.i, align 4
+ %33 = load i8, i8* %__c.addr.i.i, align 1
+ call void @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6__initEjc(%"class.std::__1::basic_string"* %this1.i.i, i32 %32, i8 zeroext %33)
+ %__sbuf_18 = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+ %34 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %__sbuf_18, align 4
+ store %"class.std::__1::basic_string"* %__sp, %"class.std::__1::basic_string"** %this.addr.i60, align 4
+ %this1.i61 = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i60
+ store %"class.std::__1::basic_string"* %this1.i61, %"class.std::__1::basic_string"** %this.addr.i.i59, align 4
+ %this1.i.i62 = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i.i59
+ store %"class.std::__1::basic_string"* %this1.i.i62, %"class.std::__1::basic_string"** %this.addr.i.i.i58, align 4
+ %this1.i.i.i63 = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i.i.i58
+ %__r_.i.i.i = getelementptr inbounds %"class.std::__1::basic_string", %"class.std::__1::basic_string"* %this1.i.i.i63, i32 0, i32 0
+ store %"class.std::__1::__compressed_pair"* %__r_.i.i.i, %"class.std::__1::__compressed_pair"** %this.addr.i.i.i.i57, align 4
+ %this1.i.i.i.i64 = load %"class.std::__1::__compressed_pair"*, %"class.std::__1::__compressed_pair"** %this.addr.i.i.i.i57
+ %35 = bitcast %"class.std::__1::__compressed_pair"* %this1.i.i.i.i64 to %"class.std::__1::__libcpp_compressed_pair_imp"*
+ store %"class.std::__1::__libcpp_compressed_pair_imp"* %35, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i.i.i56, align 4
+ %this1.i.i.i.i.i65 = load %"class.std::__1::__libcpp_compressed_pair_imp"*, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i.i.i56
+ %__first_.i.i.i.i.i = getelementptr inbounds %"class.std::__1::__libcpp_compressed_pair_imp", %"class.std::__1::__libcpp_compressed_pair_imp"* %this1.i.i.i.i.i65, i32 0, i32 0
+ %36 = getelementptr inbounds %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep", %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep"* %__first_.i.i.i.i.i, i32 0, i32 0
+ %__s.i.i.i = bitcast %union.anon* %36 to %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short"*
+ %37 = getelementptr inbounds %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short", %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short"* %__s.i.i.i, i32 0, i32 0
+ %__size_.i.i.i = bitcast %union.anon.0* %37 to i8*
+ %38 = load i8, i8* %__size_.i.i.i, align 1
+ %conv.i.i.i = zext i8 %38 to i32
+ %and.i.i.i = and i32 %conv.i.i.i, 1
+ %tobool.i.i.i = icmp ne i32 %and.i.i.i, 0
+ br i1 %tobool.i.i.i, label %cond.true.i.i, label %cond.false.i.i
+
+cond.true.i.i: ; preds = %if.then17
+ store %"class.std::__1::basic_string"* %this1.i.i62, %"class.std::__1::basic_string"** %this.addr.i15.i.i, align 4
+ %this1.i16.i.i = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i15.i.i
+ %__r_.i17.i.i = getelementptr inbounds %"class.std::__1::basic_string", %"class.std::__1::basic_string"* %this1.i16.i.i, i32 0, i32 0
+ store %"class.std::__1::__compressed_pair"* %__r_.i17.i.i, %"class.std::__1::__compressed_pair"** %this.addr.i.i14.i.i, align 4
+ %this1.i.i18.i.i = load %"class.std::__1::__compressed_pair"*, %"class.std::__1::__compressed_pair"** %this.addr.i.i14.i.i
+ %39 = bitcast %"class.std::__1::__compressed_pair"* %this1.i.i18.i.i to %"class.std::__1::__libcpp_compressed_pair_imp"*
+ store %"class.std::__1::__libcpp_compressed_pair_imp"* %39, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i13.i.i, align 4
+ %this1.i.i.i19.i.i = load %"class.std::__1::__libcpp_compressed_pair_imp"*, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i13.i.i
+ %__first_.i.i.i20.i.i = getelementptr inbounds %"class.std::__1::__libcpp_compressed_pair_imp", %"class.std::__1::__libcpp_compressed_pair_imp"* %this1.i.i.i19.i.i, i32 0, i32 0
+ %40 = getelementptr inbounds %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep", %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep"* %__first_.i.i.i20.i.i, i32 0, i32 0
+ %__l.i.i.i = bitcast %union.anon* %40 to %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__long"*
+ %__data_.i21.i.i = getelementptr inbounds %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__long", %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__long"* %__l.i.i.i, i32 0, i32 2
+ %41 = load i8*, i8** %__data_.i21.i.i, align 4
+ br label %_ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4dataEv.exit
+
+cond.false.i.i: ; preds = %if.then17
+ store %"class.std::__1::basic_string"* %this1.i.i62, %"class.std::__1::basic_string"** %this.addr.i6.i.i, align 4
+ %this1.i7.i.i = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i6.i.i
+ %__r_.i8.i.i = getelementptr inbounds %"class.std::__1::basic_string", %"class.std::__1::basic_string"* %this1.i7.i.i, i32 0, i32 0
+ store %"class.std::__1::__compressed_pair"* %__r_.i8.i.i, %"class.std::__1::__compressed_pair"** %this.addr.i.i5.i.i, align 4
+ %this1.i.i9.i.i = load %"class.std::__1::__compressed_pair"*, %"class.std::__1::__compressed_pair"** %this.addr.i.i5.i.i
+ %42 = bitcast %"class.std::__1::__compressed_pair"* %this1.i.i9.i.i to %"class.std::__1::__libcpp_compressed_pair_imp"*
+ store %"class.std::__1::__libcpp_compressed_pair_imp"* %42, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i4.i.i, align 4
+ %this1.i.i.i10.i.i = load %"class.std::__1::__libcpp_compressed_pair_imp"*, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i4.i.i
+ %__first_.i.i.i11.i.i = getelementptr inbounds %"class.std::__1::__libcpp_compressed_pair_imp", %"class.std::__1::__libcpp_compressed_pair_imp"* %this1.i.i.i10.i.i, i32 0, i32 0
+ %43 = getelementptr inbounds %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep", %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep"* %__first_.i.i.i11.i.i, i32 0, i32 0
+ %__s.i12.i.i = bitcast %union.anon* %43 to %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short"*
+ %__data_.i.i.i = getelementptr inbounds %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short", %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short"* %__s.i12.i.i, i32 0, i32 1
+ %arrayidx.i.i.i = getelementptr inbounds [11 x i8], [11 x i8]* %__data_.i.i.i, i32 0, i32 0
+ store i8* %arrayidx.i.i.i, i8** %__r.addr.i.i.i.i, align 4
+ %44 = load i8*, i8** %__r.addr.i.i.i.i, align 4
+ store i8* %44, i8** %__x.addr.i.i.i.i.i, align 4
+ %45 = load i8*, i8** %__x.addr.i.i.i.i.i, align 4
+ br label %_ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4dataEv.exit
+
+_ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4dataEv.exit: ; preds = %cond.false.i.i, %cond.true.i.i
+ %cond.i.i = phi i8* [ %41, %cond.true.i.i ], [ %45, %cond.false.i.i ]
+ store i8* %cond.i.i, i8** %__p.addr.i.i, align 4
+ %46 = load i8*, i8** %__p.addr.i.i, align 4
+ %47 = load i32, i32* %__ns, align 4
+ store %"class.std::__1::basic_streambuf"* %34, %"class.std::__1::basic_streambuf"** %this.addr.i66, align 4
+ store i8* %46, i8** %__s.addr.i67, align 4
+ store i32 %47, i32* %__n.addr.i68, align 4
+ %this1.i69 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %this.addr.i66
+ %48 = bitcast %"class.std::__1::basic_streambuf"* %this1.i69 to i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)***
+ %vtable.i70 = load i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)**, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*** %48
+ %vfn.i71 = getelementptr inbounds i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)** %vtable.i70, i64 12
+ %49 = load i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)** %vfn.i71
+ %50 = load i8*, i8** %__s.addr.i67, align 4
+ %51 = load i32, i32* %__n.addr.i68, align 4
+ %call.i7273 = invoke i32 %49(%"class.std::__1::basic_streambuf"* %this1.i69, i8* %50, i32 %51)
+ to label %_ZNSt3__115basic_streambufIcNS_11char_traitsIcEEE5sputnEPKci.exit unwind label %lpad
+
+_ZNSt3__115basic_streambufIcNS_11char_traitsIcEEE5sputnEPKci.exit: ; preds = %_ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4dataEv.exit
+ br label %invoke.cont
+
+invoke.cont: ; preds = %_ZNSt3__115basic_streambufIcNS_11char_traitsIcEEE5sputnEPKci.exit
+ %52 = load i32, i32* %__ns, align 4
+ %cmp21 = icmp ne i32 %call.i7273, %52
+ br i1 %cmp21, label %if.then22, label %if.end24
+
+if.then22: ; preds = %invoke.cont
+ %__sbuf_23 = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+ store %"class.std::__1::basic_streambuf"* null, %"class.std::__1::basic_streambuf"** %__sbuf_23, align 4
+ %53 = bitcast %"class.std::__1::ostreambuf_iterator"* %retval to i8*
+ %54 = bitcast %"class.std::__1::ostreambuf_iterator"* %__s to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %53, i8* %54, i32 4, i32 4, i1 false)
+ store i32 1, i32* %cleanup.dest.slot
+ br label %cleanup
+
+lpad: ; preds = %_ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4dataEv.exit
+ %55 = landingpad { i8*, i32 }
+ cleanup
+ %56 = extractvalue { i8*, i32 } %55, 0
+ store i8* %56, i8** %exn.slot
+ %57 = extractvalue { i8*, i32 } %55, 1
+ store i32 %57, i32* %ehselector.slot
+ call void @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED1Ev(%"class.std::__1::basic_string"* %__sp) #0
+ br label %eh.resume
+
+if.end24: ; preds = %invoke.cont
+ store i32 0, i32* %cleanup.dest.slot
+ br label %cleanup
+
+cleanup: ; preds = %if.end24, %if.then22
+ call void @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED1Ev(%"class.std::__1::basic_string"* %__sp) #0
+ %cleanup.dest = load i32, i32* %cleanup.dest.slot
+ switch i32 %cleanup.dest, label %unreachable [
+ i32 0, label %cleanup.cont
+ i32 1, label %return
+ ]
+
+cleanup.cont: ; preds = %cleanup
+ br label %if.end25
+
+if.end25: ; preds = %cleanup.cont, %if.end15
+ %58 = load i8*, i8** %__oe.addr, align 4
+ %59 = load i8*, i8** %__op.addr, align 4
+ %sub.ptr.lhs.cast26 = ptrtoint i8* %58 to i32
+ %sub.ptr.rhs.cast27 = ptrtoint i8* %59 to i32
+ %sub.ptr.sub28 = sub i32 %sub.ptr.lhs.cast26, %sub.ptr.rhs.cast27
+ store i32 %sub.ptr.sub28, i32* %__np, align 4
+ %60 = load i32, i32* %__np, align 4
+ %cmp29 = icmp sgt i32 %60, 0
+ br i1 %cmp29, label %if.then30, label %if.end37
+
+if.then30: ; preds = %if.end25
+ %__sbuf_31 = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+ %61 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %__sbuf_31, align 4
+ %62 = load i8*, i8** %__op.addr, align 4
+ %63 = load i32, i32* %__np, align 4
+ store %"class.std::__1::basic_streambuf"* %61, %"class.std::__1::basic_streambuf"** %this.addr.i44, align 4
+ store i8* %62, i8** %__s.addr.i, align 4
+ store i32 %63, i32* %__n.addr.i, align 4
+ %this1.i45 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %this.addr.i44
+ %64 = bitcast %"class.std::__1::basic_streambuf"* %this1.i45 to i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)***
+ %vtable.i = load i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)**, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*** %64
+ %vfn.i = getelementptr inbounds i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)** %vtable.i, i64 12
+ %65 = load i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)** %vfn.i
+ %66 = load i8*, i8** %__s.addr.i, align 4
+ %67 = load i32, i32* %__n.addr.i, align 4
+ %call.i = call i32 %65(%"class.std::__1::basic_streambuf"* %this1.i45, i8* %66, i32 %67)
+ %68 = load i32, i32* %__np, align 4
+ %cmp33 = icmp ne i32 %call.i, %68
+ br i1 %cmp33, label %if.then34, label %if.end36
+
+if.then34: ; preds = %if.then30
+ %__sbuf_35 = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+ store %"class.std::__1::basic_streambuf"* null, %"class.std::__1::basic_streambuf"** %__sbuf_35, align 4
+ %69 = bitcast %"class.std::__1::ostreambuf_iterator"* %retval to i8*
+ %70 = bitcast %"class.std::__1::ostreambuf_iterator"* %__s to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %69, i8* %70, i32 4, i32 4, i1 false)
+ br label %return
+
+if.end36: ; preds = %if.then30
+ br label %if.end37
+
+if.end37: ; preds = %if.end36, %if.end25
+ %71 = load %"class.std::__1::ios_base"*, %"class.std::__1::ios_base"** %__iob.addr, align 4
+ store %"class.std::__1::ios_base"* %71, %"class.std::__1::ios_base"** %this.addr.i41, align 4
+ store i32 0, i32* %__wide.addr.i, align 4
+ %this1.i42 = load %"class.std::__1::ios_base"*, %"class.std::__1::ios_base"** %this.addr.i41
+ %__width_.i43 = getelementptr inbounds %"class.std::__1::ios_base", %"class.std::__1::ios_base"* %this1.i42, i32 0, i32 3
+ %72 = load i32, i32* %__width_.i43, align 4
+ store i32 %72, i32* %__r.i, align 4
+ %73 = load i32, i32* %__wide.addr.i, align 4
+ %__width_2.i = getelementptr inbounds %"class.std::__1::ios_base", %"class.std::__1::ios_base"* %this1.i42, i32 0, i32 3
+ store i32 %73, i32* %__width_2.i, align 4
+ %74 = load i32, i32* %__r.i, align 4
+ %75 = bitcast %"class.std::__1::ostreambuf_iterator"* %retval to i8*
+ %76 = bitcast %"class.std::__1::ostreambuf_iterator"* %__s to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %75, i8* %76, i32 4, i32 4, i1 false)
+ br label %return
+
+return: ; preds = %if.end37, %if.then34, %cleanup, %if.then12, %if.then
+ %coerce.dive39 = getelementptr %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %retval, i32 0, i32 0
+ %77 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %coerce.dive39
+ %coerce.val.pi = ptrtoint %"class.std::__1::basic_streambuf"* %77 to i32
+ ret i32 %coerce.val.pi
+
+eh.resume: ; preds = %lpad
+ %exn = load i8*, i8** %exn.slot
+ %sel = load i32, i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+ %lpad.val40 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+ resume { i8*, i32 } %lpad.val40
+
+unreachable: ; preds = %cleanup
+ unreachable
+}
+
+declare void @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6__initEjc(%"class.std::__1::basic_string"*, i32, i8 zeroext) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}
diff --git a/test/CodeGen/Hexagon/early-if-phi-i1.ll b/test/CodeGen/Hexagon/early-if-phi-i1.ll
new file mode 100644
index 000000000000..1649d51269ee
--- /dev/null
+++ b/test/CodeGen/Hexagon/early-if-phi-i1.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s
+; REQUIRES: asserts
+; Check that the early if-conversion does not predicate block1 (where the
+; join block has a phi node of type i1).
+
+define i1 @foo(i32 %x, i32* %p) {
+entry:
+ %c = icmp sgt i32 %x, 0
+ %c1 = icmp sgt i32 %x, 10
+ br i1 %c, label %block2, label %block1
+block1:
+ store i32 1, i32* %p, align 4
+ br label %block2
+block2:
+ %b = phi i1 [ 0, %entry ], [ %c1, %block1 ]
+ ret i1 %b
+}
diff --git a/test/CodeGen/Hexagon/early-if-spare.ll b/test/CodeGen/Hexagon/early-if-spare.ll
new file mode 100644
index 000000000000..7497b53ba3ca
--- /dev/null
+++ b/test/CodeGen/Hexagon/early-if-spare.ll
@@ -0,0 +1,57 @@
+; RUN: llc -O2 -mcpu=hexagonv5 < %s | FileCheck %s
+; Check if the three stores in the loop were predicated.
+; CHECK: if{{.*}}memw
+; CHECK: if{{.*}}memw
+; CHECK: if{{.*}}memw
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @fred(i32 %n, i32* %bp) nounwind {
+entry:
+ %cmp16 = icmp eq i32 %n, 0
+ br i1 %cmp16, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+ %cmp2 = icmp ugt i32 %n, 32
+ br label %for.body
+
+for.body: ; preds = %for.inc, %for.body.lr.ph
+ %i.017 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+ %call = tail call i32 @foo(i32* %bp) nounwind
+ %call1 = tail call i32 @bar(i32* %bp) nounwind
+ br i1 %cmp2, label %if.then, label %if.else
+
+if.then: ; preds = %for.body
+ %arrayidx = getelementptr inbounds i32, i32* %bp, i32 %i.017
+ store i32 %call, i32* %arrayidx, align 4, !tbaa !0
+ %add = add i32 %i.017, 2
+ %arrayidx3 = getelementptr inbounds i32, i32* %bp, i32 %add
+ store i32 %call1, i32* %arrayidx3, align 4, !tbaa !0
+ br label %for.inc
+
+if.else: ; preds = %for.body
+ %or = or i32 %call1, %call
+ %arrayidx4 = getelementptr inbounds i32, i32* %bp, i32 %i.017
+ store i32 %or, i32* %arrayidx4, align 4, !tbaa !0
+ br label %for.inc
+
+for.inc: ; preds = %if.then, %if.else
+ %inc = add i32 %i.017, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.inc
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+declare i32 @foo(i32*) nounwind
+
+declare i32 @bar(i32*) nounwind
+
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/early-if.ll b/test/CodeGen/Hexagon/early-if.ll
new file mode 100644
index 000000000000..dcb1dd20b515
--- /dev/null
+++ b/test/CodeGen/Hexagon/early-if.ll
@@ -0,0 +1,75 @@
+; RUN: llc -O2 -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Rely on the comments generated by llc. Check that "if.then" was predicated.
+; CHECK: while.body13
+; CHECK: if{{.*}}memd
+; CHECK: while.end
+
+%struct.1 = type { i32, i32 }
+%struct.2 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %struct.1], [5 x i32] }
+
+@A1 = global i64 zeroinitializer
+@A2 = global i64 zeroinitializer
+@B1 = global i32 zeroinitializer
+@B2 = global i32 zeroinitializer
+@C1 = global i8 zeroinitializer
+
+declare i32 @llvm.hexagon.S2.cl0(i32) nounwind readnone
+declare i32 @llvm.hexagon.S2.setbit.r(i32, i32) nounwind readnone
+declare i64 @llvm.hexagon.M2.vmpy2s.s0(i32, i32) nounwind readnone
+declare i64 @llvm.hexagon.M2.vmac2s.s0(i64, i32, i32) nounwind readnone
+declare i64 @llvm.hexagon.A2.vaddws(i64, i64) nounwind readnone
+declare i64 @llvm.hexagon.A2.vsubws(i64, i64) nounwind readnone
+declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) nounwind readnone
+
+define void @foo(i32 %n, i64* %ptr) nounwind {
+entry:
+ br label %while.body
+
+while.body:
+ %count = phi i32 [ 0, %entry ], [ %next, %while.end ]
+ %idx = phi i32 [ 0, %entry ], [ %15, %while.end ]
+ %0 = load i32, i32* @B1, align 4
+ %1 = load i32, i32* @B2, align 8
+ %2 = and i32 %1, %0
+ br label %while.body13
+
+while.body13: ; preds = %while.body, %if.end
+ %3 = phi i64 [ %13, %if.end ], [ 0, %while.body ]
+ %4 = phi i64 [ %14, %if.end ], [ 0, %while.body ]
+ %m = phi i32 [ %6, %if.end ], [ %2, %while.body ]
+ %5 = tail call i32 @llvm.hexagon.S2.cl0(i32 %m)
+ %6 = tail call i32 @llvm.hexagon.S2.setbit.r(i32 %m, i32 %5)
+ %cgep85 = getelementptr [10 x %struct.2], [10 x %struct.2]* inttoptr (i32 -121502345 to [10 x %struct.2]*), i32 0, i32 %idx
+ %cgep90 = getelementptr %struct.2, %struct.2* %cgep85, i32 0, i32 12, i32 %5
+ %7 = load i32, i32* %cgep90, align 4
+ %8 = tail call i64 @llvm.hexagon.M2.vmpy2s.s0(i32 %7, i32 %7)
+ %cgep91 = getelementptr %struct.2, %struct.2* %cgep85, i32 0, i32 13, i32 %5
+ %9 = load i32, i32* %cgep91, align 4
+ %10 = tail call i64 @llvm.hexagon.M2.vmac2s.s0(i64 %8, i32 %9, i32 %9)
+ %11 = load i8, i8* @C1, align 1
+ %and24 = and i8 %11, 1
+ %cmp = icmp eq i8 %and24, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %while.body13
+ %12 = tail call i64 @llvm.hexagon.A2.vaddws(i64 %3, i64 %10)
+ store i64 %12, i64* %ptr, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %while.body13
+ %13 = phi i64 [ %12, %if.then ], [ %3, %while.body13 ]
+ %14 = tail call i64 @llvm.hexagon.A2.vsubws(i64 %4, i64 %10)
+ %tobool12 = icmp eq i32 %6, 0
+ br i1 %tobool12, label %while.end, label %while.body13
+
+while.end:
+ %add40 = add i32 %idx, 1
+ %15 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %add40, i32 10) nounwind
+ %next = add i32 %count, 1
+ %cc = icmp eq i32 %next, %n
+ br i1 %cc, label %end, label %while.body
+
+end:
+ store i64 %10, i64* @A2, align 8
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/extload-combine.ll b/test/CodeGen/Hexagon/extload-combine.ll
index 519177fc75fc..773b10b2b288 100644
--- a/test/CodeGen/Hexagon/extload-combine.ll
+++ b/test/CodeGen/Hexagon/extload-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 -disable-hsdr < %s | FileCheck %s
; Check that the combine/stxw instructions are being generated.
; In case of combine one of the operand should be 0 and another should be
; the output of absolute addressing load instruction.
diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll
index 66c6662f735a..341f8db9e336 100644
--- a/test/CodeGen/Hexagon/hwloop-dbg.ll
+++ b/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -3,7 +3,7 @@
target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
target triple = "hexagon"
-define void @foo(i32* nocapture %a, i32* nocapture %b) nounwind {
+define void @foo(i32* nocapture %a, i32* nocapture %b) nounwind !dbg !5 {
entry:
tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !13, metadata !DIExpression()), !dbg !17
tail call void @llvm.dbg.value(metadata i32* %b, i64 0, metadata !14, metadata !DIExpression()), !dbg !18
@@ -37,19 +37,19 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!29}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", isOptimized: true, emissionKind: 1, file: !28, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", isOptimized: true, emissionKind: 1, file: !28, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
!2 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !28, scope: null, type: !7, function: void (i32*, i32*)* @foo, variables: !11)
+!5 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !28, scope: null, type: !7, variables: !11)
!6 = !DIFile(filename: "hwloop-dbg.c", directory: "/usr2/kparzysz/s.hex/t")
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9, !9}
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !10)
!10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!11 = !{!13, !14, !15}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !5, file: !6, type: !9)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 1, arg: 2, scope: !5, file: !6, type: !9)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2, scope: !16, file: !6, type: !10)
+!13 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !5, file: !6, type: !9)
+!14 = !DILocalVariable(name: "b", line: 1, arg: 2, scope: !5, file: !6, type: !9)
+!15 = !DILocalVariable(name: "i", line: 2, scope: !16, file: !6, type: !10)
!16 = distinct !DILexicalBlock(line: 1, column: 26, file: !28, scope: !5)
!17 = !DILocation(line: 1, column: 15, scope: !5)
!18 = !DILocation(line: 1, column: 23, scope: !5)
diff --git a/test/CodeGen/Hexagon/i16_VarArg.ll b/test/CodeGen/Hexagon/i16_VarArg.ll
index ba98f6226683..74d066e4936e 100644
--- a/test/CodeGen/Hexagon/i16_VarArg.ll
+++ b/test/CodeGen/Hexagon/i16_VarArg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
@a_str = internal constant [8 x i8] c"a = %f\0A\00"
diff --git a/test/CodeGen/Hexagon/i1_VarArg.ll b/test/CodeGen/Hexagon/i1_VarArg.ll
index 1908b3c71f3f..4078c0f3f005 100644
--- a/test/CodeGen/Hexagon/i1_VarArg.ll
+++ b/test/CodeGen/Hexagon/i1_VarArg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: call __hexagon_{{[_A-Za-z0-9]+}}
@a_str = internal constant [8 x i8] c"a = %f\0A\00"
diff --git a/test/CodeGen/Hexagon/i8_VarArg.ll b/test/CodeGen/Hexagon/i8_VarArg.ll
index c40a6a957270..1353de47a976 100644
--- a/test/CodeGen/Hexagon/i8_VarArg.ll
+++ b/test/CodeGen/Hexagon/i8_VarArg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
@a_str = internal constant [8 x i8] c"a = %f\0A\00"
diff --git a/test/CodeGen/Hexagon/ifcvt-edge-weight.ll b/test/CodeGen/Hexagon/ifcvt-edge-weight.ll
new file mode 100644
index 000000000000..341567e1d02f
--- /dev/null
+++ b/test/CodeGen/Hexagon/ifcvt-edge-weight.ll
@@ -0,0 +1,64 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -hexagon-eif=0 -print-machineinstrs=if-converter %s -o /dev/null 2>&1 | FileCheck %s
+; Check that the edge weights are updated correctly after if-conversion.
+
+; CHECK: BB#3:
+; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}10.00%) BB#1({{[0-9a-fx/= ]+}}90.00%)
+@a = external global i32
+@d = external global i32
+
+; In the following CFG, A,B,C,D will be if-converted into a single block.
+; Check if the edge weights on edges to E and F are maintained correctly.
+;
+; A
+; / \
+; B C
+; \ /
+; D
+; / \
+; E F
+;
+define void @test1(i8 zeroext %la, i8 zeroext %lb) {
+entry:
+ %cmp0 = call i1 @pred()
+ br i1 %cmp0, label %if.else2, label %if.then0, !prof !1
+
+if.else2:
+ call void @bar(i32 2)
+ br label %if.end2
+
+if.end2:
+ call void @foo(i32 2)
+ br label %return
+
+if.end:
+ %storemerge = phi i32 [ %and, %if.else ], [ %shl, %if.then ]
+ store i32 %storemerge, i32* @a, align 4
+ %0 = load i32, i32* @d, align 4
+ %cmp2 = call i1 @pred()
+ br i1 %cmp2, label %if.end2, label %if.else2, !prof !2
+
+if.then0:
+ %cmp = icmp eq i8 %la, %lb
+ br i1 %cmp, label %if.then, label %if.else, !prof !1
+
+if.then:
+ %conv1 = zext i8 %la to i32
+ %shl = shl nuw nsw i32 %conv1, 16
+ br label %if.end
+
+if.else:
+ %and8 = and i8 %lb, %la
+ %and = zext i8 %and8 to i32
+ br label %if.end
+
+return:
+ call void @foo(i32 2)
+ ret void
+}
+
+declare void @foo(i32)
+declare void @bar(i32)
+declare i1 @pred()
+
+!1 = !{!"branch_weights", i32 80, i32 20}
+!2 = !{!"branch_weights", i32 10, i32 90}
diff --git a/test/CodeGen/Hexagon/memcpy-likely-aligned.ll b/test/CodeGen/Hexagon/memcpy-likely-aligned.ll
new file mode 100644
index 000000000000..f2677efc3049
--- /dev/null
+++ b/test/CodeGen/Hexagon/memcpy-likely-aligned.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
+target triple = "hexagon-unknown-linux-gnu"
+
+%struct.e = type { i8, i8, [2 x i8] }
+%struct.s = type { i8* }
+%struct.o = type { %struct.n }
+%struct.n = type { [2 x %struct.l] }
+%struct.l = type { %struct.e, %struct.d, %struct.e }
+%struct.d = type <{ i8, i8, i8, i8, [2 x i8], [2 x i8] }>
+
+@y = global { <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } }> } { <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } }> <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } { %struct.e { i8 3, i8 0, [2 x i8] undef }, { i8, i8, i8, [5 x i8] } { i8 -47, i8 2, i8 0, [5 x i8] undef }, %struct.e { i8 3, i8 0, [2 x i8] undef } }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } { %struct.e { i8 3, i8 0, [2 x i8] undef }, { i8, i8, i8, [5 x i8] } { i8 -47, i8 2, i8 0, [5 x i8] undef }, %struct.e { i8 3, i8 0, [2 x i8] undef } } }> }, align 4
+@t = common global %struct.s zeroinitializer, align 4
+@q = internal global %struct.o* null, align 4
+
+define void @foo() nounwind {
+entry:
+ %0 = load i8*, i8** getelementptr inbounds (%struct.s, %struct.s* @t, i32 0, i32 0), align 4
+ %1 = bitcast i8* %0 to %struct.o*
+ store %struct.o* %1, %struct.o** @q, align 4
+ %2 = load %struct.o*, %struct.o** @q, align 4
+ %p = getelementptr inbounds %struct.o, %struct.o* %2, i32 0, i32 0
+ %m = getelementptr inbounds %struct.n, %struct.n* %p, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [2 x %struct.l], [2 x %struct.l]* %m, i32 0, i32 0
+ %3 = bitcast %struct.l* %arraydecay to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* getelementptr inbounds ({ <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } }> }, { <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } }> }* @y, i32 0, i32 0, i32 0, i32 0, i32 0), i32 32, i32 4, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/Hexagon/mux-basic.ll b/test/CodeGen/Hexagon/mux-basic.ll
new file mode 100644
index 000000000000..ef1f7cb60e17
--- /dev/null
+++ b/test/CodeGen/Hexagon/mux-basic.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O2 < %s | FileCheck %s
+; We should generate a MUX instruction for one of the selects.
+; CHECK: mux
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+%struct.struct_t = type { i32, i32, i32 }
+
+define void @foo(%struct.struct_t* nocapture %p, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+ %cmp = icmp slt i32 %x, 4660
+ %add = add nsw i32 %x, 1
+ %add.y = select i1 %cmp, i32 %add, i32 %y
+ %x.add.y = select i1 %cmp, i32 %x, i32 %y
+ %. = zext i1 %cmp to i32
+ %b.0 = add nsw i32 %x.add.y, %z
+ %a3 = getelementptr inbounds %struct.struct_t, %struct.struct_t* %p, i32 0, i32 0
+ store i32 %add.y, i32* %a3, align 4, !tbaa !0
+ %b4 = getelementptr inbounds %struct.struct_t, %struct.struct_t* %p, i32 0, i32 1
+ store i32 %b.0, i32* %b4, align 4, !tbaa !0
+ %c5 = getelementptr inbounds %struct.struct_t, %struct.struct_t* %p, i32 0, i32 2
+ store i32 %., i32* %c5, align 4, !tbaa !0
+ ret void
+}
+
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/opt-fabs.ll b/test/CodeGen/Hexagon/opt-fabs.ll
index da657e4b1b8f..2ecbce310ade 100644
--- a/test/CodeGen/Hexagon/opt-fabs.ll
+++ b/test/CodeGen/Hexagon/opt-fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=hexagon-unknown-elf -mcpu=hexagonv5 < %s | FileCheck %s
+; RUN: llc -mtriple=hexagon-unknown-elf -mcpu=hexagonv5 -hexagon-bit=0 < %s | FileCheck %s
; Optimize fabsf to clrbit in V5.
; CHECK: r{{[0-9]+}} = clrbit(r{{[0-9]+}}, #31)
diff --git a/test/CodeGen/Hexagon/pic-jumptables.ll b/test/CodeGen/Hexagon/pic-jumptables.ll
new file mode 100644
index 000000000000..271105cb4b5b
--- /dev/null
+++ b/test/CodeGen/Hexagon/pic-jumptables.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=hexagon -relocation-model=pic < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add({{pc|PC}}{{ *}},{{ *}}##
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}{{ *}}<<{{ *}}#2)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}r{{[0-9]+}})
+
+
+define i32 @test(i32 %y) nounwind {
+entry:
+ switch i32 %y, label %sw.epilog [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb1
+ i32 3, label %sw.bb2
+ i32 4, label %sw.bb3
+ i32 5, label %sw.bb4
+ ]
+
+sw.bb: ; preds = %entry
+ tail call void bitcast (void (...)* @baz1 to void ()*)() nounwind
+ br label %sw.epilog
+
+sw.bb1: ; preds = %entry
+ tail call void @baz2(i32 2, i32 78) nounwind
+ br label %sw.epilog
+
+sw.bb2: ; preds = %entry
+ tail call void @baz3(i32 59) nounwind
+ br label %sw.epilog
+
+sw.bb3: ; preds = %entry
+ tail call void @baz4(i32 4, i32 14) nounwind
+ br label %sw.epilog
+
+sw.bb4: ; preds = %entry
+ br label %sw.epilog
+
+sw.epilog: ; preds = %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb, %entry
+ %y.addr.0 = phi i32 [ %y, %entry ], [ 14, %sw.bb4 ], [ 4, %sw.bb3 ], [ 3, %sw.bb2 ], [ 2, %sw.bb1 ], [ 1, %sw.bb ]
+ ret i32 %y.addr.0
+}
+
+declare void @baz1(...)
+
+declare void @baz2(i32, i32)
+
+declare void @baz3(i32)
+
+declare void @baz4(i32, i32)
diff --git a/test/CodeGen/Hexagon/pic-simple.ll b/test/CodeGen/Hexagon/pic-simple.ll
new file mode 100644
index 000000000000..fa223d5372e1
--- /dev/null
+++ b/test/CodeGen/Hexagon/pic-simple.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -relocation-model=pic < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}} = add({{pc|PC}}, ##_GLOBAL_OFFSET_TABLE_@PCREL)
+; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}{{.*}}+{{.*}}##src@GOT)
+; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}{{.*}}+{{.*}}##dst@GOT)
+
+@dst = external global i32
+@src = external global i32
+
+define i32 @foo() nounwind {
+entry:
+ %0 = load i32, i32* @src, align 4, !tbaa !0
+ store i32 %0, i32* @dst, align 4, !tbaa !0
+ %call = tail call i32 @baz(i32 %0) nounwind
+ ret i32 0
+}
+
+declare i32 @baz(i32)
+
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/pic-static.ll b/test/CodeGen/Hexagon/pic-static.ll
new file mode 100644
index 000000000000..f4ccc6b9ee73
--- /dev/null
+++ b/test/CodeGen/Hexagon/pic-static.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -relocation-model=pic < %s | FileCheck %s
+
+; CHECK-DAG: r{{[0-9]+}} = add({{pc|PC}}, ##_GLOBAL_OFFSET_TABLE_@PCREL)
+; CHECK-DAG: r{{[0-9]+}} = add({{pc|PC}}, ##x@PCREL)
+; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}{{.*}}+{{.*}}##bar@GOT)
+
+@x = internal global i32 9, align 4
+@bar = external global i32*
+
+define i32 @foo(i32 %y) nounwind {
+entry:
+ store i32* @x, i32** @bar, align 4, !tbaa !0
+ %0 = load i32, i32* @x, align 4, !tbaa !3
+ %add = add nsw i32 %0, %y
+ ret i32 %add
+}
+
+!0 = !{!"any pointer", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"int", !1}
diff --git a/test/CodeGen/Hexagon/relax.ll b/test/CodeGen/Hexagon/relax.ll
index 9823d4d1cd9c..9af45f3353ce 100644
--- a/test/CodeGen/Hexagon/relax.ll
+++ b/test/CodeGen/Hexagon/relax.ll
@@ -7,8 +7,7 @@ call void @bar()
ret void
}
-
-; CHECK: { allocframe(#0) }
-; CHECK: { call 0 }
-; CHECK: 00000004: R_HEX_B22_PCREL
-; CHECK: { dealloc_return } \ No newline at end of file
+; CHECK: { call 0
+; CHECK: allocframe(#0)
+; CHECK: 00000000: R_HEX_B22_PCREL
+; CHECK: { dealloc_return }
diff --git a/test/CodeGen/Hexagon/sdr-basic.ll b/test/CodeGen/Hexagon/sdr-basic.ll
new file mode 100644
index 000000000000..162e5452572d
--- /dev/null
+++ b/test/CodeGen/Hexagon/sdr-basic.ll
@@ -0,0 +1,15 @@
+; RUN: llc -O2 -mcpu=hexagonv5 < %s | FileCheck %s
+; There should be no register pair used.
+; CHECK-NOT: r{{.*}}:{{[0-9]}} = and
+; CHECK-NOT: r{{.*}}:{{[0-9]}} = xor
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define i32 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+ %and = and i64 %y, -361700868401135616
+ %xor = xor i64 %and, %z
+ %shr1 = lshr i64 %xor, 32
+ %conv = trunc i64 %shr1 to i32
+ ret i32 %conv
+}
diff --git a/test/CodeGen/Hexagon/sdr-shr32.ll b/test/CodeGen/Hexagon/sdr-shr32.ll
new file mode 100644
index 000000000000..67dc1c14d720
--- /dev/null
+++ b/test/CodeGen/Hexagon/sdr-shr32.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK-NOT: lsr{{.*}}#31
+
+target datalayout = "e-m:e-p:32:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon-unknown--elf"
+
+; Function Attrs: nounwind readnone
+define i64 @foo(i64 %x) #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.S2.asr.i.p(i64 %x, i32 32)
+ ret i64 %0
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hexagon.S2.asr.i.p(i64, i32) #1
+
+attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang $LLVM_VERSION_MAJOR.$LLVM_VERSION_MINOR (based on LLVM 3.7.0)"}
diff --git a/test/CodeGen/Hexagon/simple_addend.ll b/test/CodeGen/Hexagon/simple_addend.ll
index ec3a87f1dcc0..939d44b2a241 100644
--- a/test/CodeGen/Hexagon/simple_addend.ll
+++ b/test/CodeGen/Hexagon/simple_addend.ll
@@ -7,4 +7,4 @@ define void @foo(i32 %a) {
call void @bar(i32 %b)
ret void
}
-; CHECK: 0x8 R_HEX_B22_PCREL bar 0x4
+; CHECK: 0x4 R_HEX_B22_PCREL bar 0x4
diff --git a/test/CodeGen/Hexagon/store-widen-aliased-load.ll b/test/CodeGen/Hexagon/store-widen-aliased-load.ll
new file mode 100644
index 000000000000..a8380306565e
--- /dev/null
+++ b/test/CodeGen/Hexagon/store-widen-aliased-load.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK-NOT: memh
+; Check that store widening does not merge the two stores.
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+%struct.type_t = type { i8, i8, [2 x i8] }
+
+define zeroext i8 @foo(%struct.type_t* nocapture %p) nounwind {
+entry:
+ %a = getelementptr inbounds %struct.type_t, %struct.type_t* %p, i32 0, i32 0
+ store i8 0, i8* %a, align 2, !tbaa !0
+ %b = getelementptr inbounds %struct.type_t, %struct.type_t* %p, i32 0, i32 1
+ %0 = load i8, i8* %b, align 1, !tbaa !0
+ store i8 0, i8* %b, align 1, !tbaa !0
+ ret i8 %0
+}
+
+!0 = !{!"omnipotent char", !1}
+!1 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/store-widen-negv.ll b/test/CodeGen/Hexagon/store-widen-negv.ll
new file mode 100644
index 000000000000..50a633d82be4
--- /dev/null
+++ b/test/CodeGen/Hexagon/store-widen-negv.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; We shouldn't see a 32-bit expansion of -120, just the uint8 value.
+; CHECK: #136
+define i32 @foo([4 x i8]* %ptr) {
+entry:
+ %msb = getelementptr inbounds [4 x i8], [4 x i8]* %ptr, i32 0, i32 3
+ %lsb = getelementptr inbounds [4 x i8], [4 x i8]* %ptr, i32 0, i32 2
+ store i8 0, i8* %msb
+ store i8 -120, i8* %lsb, align 2
+ ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/store-widen-negv2.ll b/test/CodeGen/Hexagon/store-widen-negv2.ll
new file mode 100644
index 000000000000..6abe01a6ed9f
--- /dev/null
+++ b/test/CodeGen/Hexagon/store-widen-negv2.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK-LABEL: foo:
+; CHECK: memh(r0+#0){{.*}}={{.*}}#-2
+; Don't use memh(r0+#0)=##65534.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon"
+
+; Function Attrs: nounwind
+define void @foo(i16* nocapture %s) #0 {
+entry:
+ %0 = bitcast i16* %s to i8*
+ store i8 -2, i8* %0, align 2
+ %add.ptr = getelementptr inbounds i8, i8* %0, i32 1
+ store i8 -1, i8* %add.ptr, align 1
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/Hexagon/store-widen.ll b/test/CodeGen/Hexagon/store-widen.ll
new file mode 100644
index 000000000000..9428093901c5
--- /dev/null
+++ b/test/CodeGen/Hexagon/store-widen.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @foo(i16* nocapture %a) nounwind {
+entry:
+; There should be a memw, not memh.
+; CHECK: memw
+ ; Cheated on the alignment, just to trigger the widening.
+ store i16 0, i16* %a, align 4, !tbaa !0
+ %arrayidx1 = getelementptr inbounds i16, i16* %a, i32 1
+ store i16 0, i16* %arrayidx1, align 2, !tbaa !0
+ ret void
+}
+
+!0 = !{!"short", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
index 95b76c7999d4..2ac1f8eadbb7 100644
--- a/test/CodeGen/Hexagon/struct_args.ll
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hsdr < %s | FileCheck %s
; CHECK: r{{[0-9]}}:{{[0-9]}} = combine({{r[0-9]|#0}}, r{{[0-9]}})
; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
diff --git a/test/CodeGen/Hexagon/sube.ll b/test/CodeGen/Hexagon/sube.ll
index 9735894c419e..fab3dcaefa86 100644
--- a/test/CodeGen/Hexagon/sube.ll
+++ b/test/CodeGen/Hexagon/sube.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -disable-hsdr -hexagon-expand-condsets=0 -hexagon-bit=0 < %s | FileCheck %s
-; CHECK: r{{[0-9]+:[0-9]+}} = #1
-; CHECK: r{{[0-9]+:[0-9]+}} = #0
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0, #1)
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0, #0)
; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
-; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}})
define void @check_sube_subc(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
diff --git a/test/CodeGen/Hexagon/tail-dup-subreg-abort.ll b/test/CodeGen/Hexagon/tail-dup-subreg-abort.ll
new file mode 100644
index 000000000000..82dae2cc586a
--- /dev/null
+++ b/test/CodeGen/Hexagon/tail-dup-subreg-abort.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=hexagon -O2 -disable-cgp < %s
+; REQUIRES: asserts
+;
+; Tail duplication can ignore subregister information on PHI nodes, and as
+; a result, generate COPY instructions between registers of different classes.
+; This could lead to HexagonInstrInfo::copyPhysReg aborting on an unhandled
+; src/dst combination.
+;
+define i32 @foo(i32 %x, i64 %y) nounwind {
+entry:
+ %a = icmp slt i32 %x, 0
+ %lo = trunc i64 %y to i32
+ br i1 %a, label %next, label %tail
+tail:
+ br label %join
+next:
+ %c = icmp eq i32 %x, 0
+ br i1 %c, label %b1, label %tail
+b1:
+ %t1 = lshr i64 %y, 32
+ %hi = trunc i64 %t1 to i32
+ br label %join
+join:
+ %val = phi i32 [ %hi, %b1 ], [ %lo, %tail ]
+ ret i32 %val
+}
+
+
diff --git a/test/CodeGen/Hexagon/tfr-to-combine.ll b/test/CodeGen/Hexagon/tfr-to-combine.ll
index a257acfeb49b..1b82f3e4562e 100644
--- a/test/CodeGen/Hexagon/tfr-to-combine.ll
+++ b/test/CodeGen/Hexagon/tfr-to-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 -O3 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -O3 -disable-hsdr < %s | FileCheck %s
; Check that we combine TFRs and TFRIs into COMBINEs.
diff --git a/test/CodeGen/Hexagon/union-1.ll b/test/CodeGen/Hexagon/union-1.ll
index 1d93797db858..8f2ff28b3814 100644
--- a/test/CodeGen/Hexagon/union-1.ll
+++ b/test/CodeGen/Hexagon/union-1.ll
@@ -2,8 +2,6 @@
; CHECK: word
; CHECK-NOT: combine(#0
; CHECK: jump bar
-; XFAIL: *
-; Disable this test temporarily.
define void @word(i32* nocapture %a) nounwind {
entry:
diff --git a/test/CodeGen/Hexagon/v60Intrins.ll b/test/CodeGen/Hexagon/v60Intrins.ll
new file mode 100644
index 000000000000..5f4f294c405b
--- /dev/null
+++ b/test/CodeGen/Hexagon/v60Intrins.ll
@@ -0,0 +1,2559 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv60 -O2 < %s | FileCheck %s
+
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vsetq(r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.eq(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.eq(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.eq(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.eq(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.eq(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.eq(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.eq(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.eq(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.eq(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.eq(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.eq(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.eq(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.gt(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.gt(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.gt(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.gt(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.gt(v{{[0-9]*}}.uw,v{{[0-9]*}}.uw)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.gt(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.gt(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.gt(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.gt(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.gt(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.gt(v{{[0-9]*}}.uw,v{{[0-9]*}}.uw)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.gt(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.gt(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.gt(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.gt(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.gt(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.gt(v{{[0-9]*}}.uw,v{{[0-9]*}}.uw)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.gt(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.gt(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.gt(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.gt(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.gt(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.gt(v{{[0-9]*}}.uw,v{{[0-9]*}}.uw)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.gt(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = xor{{[0-9]*}}(q{{[0-3]}},q{{[0-3]}})
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = v
+; CHECK: v{{[0-9]*}} = valign(v{{[0-9]*}},v{{[0-9]*}},#0)
+; CHECK: v{{[0-9]*}} = valign(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vand(v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} |= vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vdelta(v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vlalign(v{{[0-9]*}},v{{[0-9]*}},#0)
+; CHECK: v{{[0-9]*}} = vlalign(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vmux(q{{[0-3]}},v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vnot(v{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vor{{[0-9]*}}(v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vr{{[0-9]*}}delta(v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vr{{[0-9]*}}or{{[0-9]*}}(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vxor{{[0-9]*}}(v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.b = vadd(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.b = vasr{{[0-9]*}}(v{{[0-9]*}}.h,v{{[0-9]*}}.h,r{{[0-9]*}}):{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.b = vdeal(v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.b = vdeale(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.b = vlut32(v{{[0-9]*}}.b,v{{[0-9]*}}.b,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.b |= vlut32(v{{[0-9]*}}.b,v{{[0-9]*}}.b,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.b = vnav{{[0-9]*}}g(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.b = vpack(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.b = vpacke(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.b = vpacko(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.b = vr{{[0-9]*}}ound(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.b = vshuff(v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.b = vshuffe(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.b = vshuffo(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.b = vsub(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.h = vabs(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vabs(v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vadd(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vadd(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vasl(v{{[0-9]*}}.h,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.h = vasl(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vasr{{[0-9]*}}(v{{[0-9]*}}.h,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.h = vasr{{[0-9]*}}(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vasr{{[0-9]*}}(v{{[0-9]*}}.w,v{{[0-9]*}}.w,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.h = vasr{{[0-9]*}}(v{{[0-9]*}}.w,v{{[0-9]*}}.w,r{{[0-9]*}}):{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vasr{{[0-9]*}}(v{{[0-9]*}}.w,v{{[0-9]*}}.w,r{{[0-9]*}}):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vav{{[0-9]*}}g(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vav{{[0-9]*}}g(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}r{{[0-9]*}}nd
+; CHECK: v{{[0-9]*}}.h = vdeal(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vdmpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.h += vdmpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.h = vlsr{{[0-9]*}}(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vmax(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vmin(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vmpyi(v{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.h = vmpyi(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h += vmpyi(v{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.h += vmpyi(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vnav{{[0-9]*}}g(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vnor{{[0-9]*}}mamt(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vpack(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vpacke(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.h = vpacko(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.h = vpopcount(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vr{{[0-9]*}}ound(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vsat(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.h = vshuff(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vshuffe(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vshuffo(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vsub(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vsub(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.ub = vabsdiff(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.ub = vadd(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.ub = vasr{{[0-9]*}}(v{{[0-9]*}}.h,v{{[0-9]*}}.h,r{{[0-9]*}}):{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.ub = vasr{{[0-9]*}}(v{{[0-9]*}}.h,v{{[0-9]*}}.h,r{{[0-9]*}}):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.ub = vav{{[0-9]*}}g(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.ub = vav{{[0-9]*}}g(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub):{{[0-9]*}}r{{[0-9]*}}nd
+; CHECK: v{{[0-9]*}}.ub = vmax(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.ub = vmin(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.ub = vpack(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.ub = vr{{[0-9]*}}ound(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.ub = vsat(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.ub = vsub(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.uh = vabsdiff(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.uh = vabsdiff(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.uh = vadd(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.uh = vasr{{[0-9]*}}(v{{[0-9]*}}.w,v{{[0-9]*}}.w,r{{[0-9]*}}):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.uh = vav{{[0-9]*}}g(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.uh = vav{{[0-9]*}}g(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh):{{[0-9]*}}r{{[0-9]*}}nd
+; CHECK: v{{[0-9]*}}.uh = vcl0(v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.uh = vlsr{{[0-9]*}}(v{{[0-9]*}}.uh,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.uh = vmax(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.uh = vmin(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.uh = vpack(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.uh = vr{{[0-9]*}}ound(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.uh = vsub(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.uw = vabsdiff(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.uw = vcl0(v{{[0-9]*}}.uw)
+; CHECK: v{{[0-9]*}}.uw = vlsr{{[0-9]*}}(v{{[0-9]*}}.uw,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.uw = vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.uw = vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.uw += vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.uw += vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.w = vabs(v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vabs(v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vadd(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vadd(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vasl(v{{[0-9]*}}.w,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vasl(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w += vasl(v{{[0-9]*}}.w,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vasr{{[0-9]*}}(v{{[0-9]*}}.w,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vasr{{[0-9]*}}(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w += vasr{{[0-9]*}}(v{{[0-9]*}}.w,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vav{{[0-9]*}}g(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vav{{[0-9]*}}g(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}r{{[0-9]*}}nd
+; CHECK: v{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.uh):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.uh,#1):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.uh):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.uh,#1):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vinser{{[0-9]*}}t(r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vinser{{[0-9]*}}t(r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vinser{{[0-9]*}}t(r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vlsr{{[0-9]*}}(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vmax(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vmin(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vmpye(v{{[0-9]*}}.w,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.w = vmpyi(v{{[0-9]*}}.w,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w = vmpyi(v{{[0-9]*}}.w,r{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.w += vmpyi(v{{[0-9]*}}.w,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w += vmpyi(v{{[0-9]*}}.w,r{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.w = vmpyie(v{{[0-9]*}}.w,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.w += vmpyie(v{{[0-9]*}}.w,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.w += vmpyie(v{{[0-9]*}}.w,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.w = vmpyieo(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.w = vmpyio(v{{[0-9]*}}.w,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.w = vmpyo(v{{[0-9]*}}.w,v{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vmpyo(v{{[0-9]*}}.w,v{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w += vmpyo(v{{[0-9]*}}.w,v{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat:{{[0-9]*}}shift
+; CHECK: v{{[0-9]*}}.w += vmpyo(v{{[0-9]*}}.w,v{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}sat:{{[0-9]*}}shift
+; CHECK: v{{[0-9]*}}.w = vnav{{[0-9]*}}g(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vnor{{[0-9]*}}mamt(v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vr{{[0-9]*}}mpy(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w = vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w = vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w += vr{{[0-9]*}}mpy(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w += vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w += vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w = vsub(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vsub(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vcombine(v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vdeal(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vshuff(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vshuff(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vshuff(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vswap(q{{[0-3]}},v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.b = vadd(v{{[0-9]*}}:{{[0-9]*}}.b,v{{[0-9]*}}:{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.b = vshuffoe(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.b = vsub(v{{[0-9]*}}:{{[0-9]*}}.b,v{{[0-9]*}}:{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vadd(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vadd(v{{[0-9]*}}:{{[0-9]*}}.h,v{{[0-9]*}}:{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vadd(v{{[0-9]*}}:{{[0-9]*}}.h,v{{[0-9]*}}:{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vdmpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vdmpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vlut16(v{{[0-9]*}}.b,v{{[0-9]*}}.h,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h |= vlut16(v{{[0-9]*}}.b,v{{[0-9]*}}.h,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vmpa(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vmpa(v{{[0-9]*}}:{{[0-9]*}}.ub,v{{[0-9]*}}:{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vmpa(v{{[0-9]*}}:{{[0-9]*}}.ub,v{{[0-9]*}}:{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vmpa(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vmpy(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vmpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vmpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vmpy(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vmpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vmpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vshuffoe(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vsub(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vsub(v{{[0-9]*}}:{{[0-9]*}}.h,v{{[0-9]*}}:{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vsub(v{{[0-9]*}}:{{[0-9]*}}.h,v{{[0-9]*}}:{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vsxt(v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vtmpy(v{{[0-9]*}}:{{[0-9]*}}.b,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vtmpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vtmpy(v{{[0-9]*}}:{{[0-9]*}}.b,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vtmpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vunpack(v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h |= vunpacko(v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.ub = vadd(v{{[0-9]*}}:{{[0-9]*}}.ub,v{{[0-9]*}}:{{[0-9]*}}.ub):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.ub = vsub(v{{[0-9]*}}:{{[0-9]*}}.ub,v{{[0-9]*}}:{{[0-9]*}}.ub):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh = vadd(v{{[0-9]*}}:{{[0-9]*}}.uh,v{{[0-9]*}}:{{[0-9]*}}.uh):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh = vmpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh = vmpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh += vmpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh += vmpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh = vsub(v{{[0-9]*}}:{{[0-9]*}}.uh,v{{[0-9]*}}:{{[0-9]*}}.uh):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh = vunpack(v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh = vzxt(v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vdsad(v{{[0-9]*}}:{{[0-9]*}}.uh,r{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw += vdsad(v{{[0-9]*}}:{{[0-9]*}}.uh,r{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vmpy(v{{[0-9]*}}.uh,r{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vmpy(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw += vmpy(v{{[0-9]*}}.uh,r{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw += vmpy(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vr{{[0-9]*}}mpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.ub,#0)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw += vr{{[0-9]*}}mpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.ub,#0)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vr{{[0-9]*}}sad(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.ub,#0)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw += vr{{[0-9]*}}sad(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.ub,#0)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vunpack(v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vzxt(v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vadd(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vadd(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vadd(v{{[0-9]*}}:{{[0-9]*}}.w,v{{[0-9]*}}:{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vadd(v{{[0-9]*}}:{{[0-9]*}}.w,v{{[0-9]*}}:{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vmpa(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vmpa(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vr{{[0-9]*}}mpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b,#0)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vr{{[0-9]*}}mpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b,#0)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vsub(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vsub(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vsub(v{{[0-9]*}}:{{[0-9]*}}.w,v{{[0-9]*}}:{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vsub(v{{[0-9]*}}:{{[0-9]*}}.w,v{{[0-9]*}}:{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vsxt(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vtmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vtmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vunpack(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w |= vunpacko(v{{[0-9]*}}.h)
+target datalayout = "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32"
+target triple = "hexagon"
+
+@K = global i64 0, align 8
+@src = global i8 -1, align 1
+@vecpreds = common global [15 x <16 x i32>] zeroinitializer, align 64
+@Q6VecPredResult = common global <16 x i32> zeroinitializer, align 64
+@vectors = common global [15 x <16 x i32>] zeroinitializer, align 64
+@VectorResult = common global <16 x i32> zeroinitializer, align 64
+@vector_pairs = common global [15 x <32 x i32>] zeroinitializer, align 128
+@VectorPairResult = common global <32 x i32> zeroinitializer, align 128
+@dst_addresses = common global [15 x i8] zeroinitializer, align 8
+@ptr_addresses = common global [15 x i8*] zeroinitializer, align 8
+@src_addresses = common global [15 x i8*] zeroinitializer, align 8
+@dst = common global i8 0, align 1
+@ptr = common global [32768 x i8] zeroinitializer, align 8
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ %0 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %1 = bitcast <16 x i32> %0 to <512 x i1>
+ %2 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+ %3 = bitcast <16 x i32> %2 to <512 x i1>
+ %4 = call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %1, <512 x i1> %3)
+ %5 = bitcast <512 x i1> %4 to <16 x i32>
+ store volatile <16 x i32> %5, <16 x i32>* @Q6VecPredResult, align 64
+ %6 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %7 = bitcast <16 x i32> %6 to <512 x i1>
+ %8 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+ %9 = bitcast <16 x i32> %8 to <512 x i1>
+ %10 = call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %7, <512 x i1> %9)
+ %11 = bitcast <512 x i1> %10 to <16 x i32>
+ store volatile <16 x i32> %11, <16 x i32>* @Q6VecPredResult, align 64
+ %12 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %13 = bitcast <16 x i32> %12 to <512 x i1>
+ %14 = call <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1> %13)
+ %15 = bitcast <512 x i1> %14 to <16 x i32>
+ store volatile <16 x i32> %15, <16 x i32>* @Q6VecPredResult, align 64
+ %16 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %17 = bitcast <16 x i32> %16 to <512 x i1>
+ %18 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+ %19 = bitcast <16 x i32> %18 to <512 x i1>
+ %20 = call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %17, <512 x i1> %19)
+ %21 = bitcast <512 x i1> %20 to <16 x i32>
+ store volatile <16 x i32> %21, <16 x i32>* @Q6VecPredResult, align 64
+ %22 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %23 = bitcast <16 x i32> %22 to <512 x i1>
+ %24 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+ %25 = bitcast <16 x i32> %24 to <512 x i1>
+ %26 = call <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1> %23, <512 x i1> %25)
+ %27 = bitcast <512 x i1> %26 to <16 x i32>
+ store volatile <16 x i32> %27, <16 x i32>* @Q6VecPredResult, align 64
+ %28 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %29 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %28, i32 -1)
+ %30 = bitcast <512 x i1> %29 to <16 x i32>
+ store volatile <16 x i32> %30, <16 x i32>* @Q6VecPredResult, align 64
+ %31 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %32 = bitcast <16 x i32> %31 to <512 x i1>
+ %33 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %34 = call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %32, <16 x i32> %33, i32 -1)
+ %35 = bitcast <512 x i1> %34 to <16 x i32>
+ store volatile <16 x i32> %35, <16 x i32>* @Q6VecPredResult, align 64
+ %36 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %37 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %38 = call <512 x i1> @llvm.hexagon.V6.veqb(<16 x i32> %36, <16 x i32> %37)
+ %39 = bitcast <512 x i1> %38 to <16 x i32>
+ store volatile <16 x i32> %39, <16 x i32>* @Q6VecPredResult, align 64
+ %40 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %41 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %42 = call <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %40, <16 x i32> %41)
+ %43 = bitcast <512 x i1> %42 to <16 x i32>
+ store volatile <16 x i32> %43, <16 x i32>* @Q6VecPredResult, align 64
+ %44 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %45 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %46 = call <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %44, <16 x i32> %45)
+ %47 = bitcast <512 x i1> %46 to <16 x i32>
+ store volatile <16 x i32> %47, <16 x i32>* @Q6VecPredResult, align 64
+ %48 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %49 = bitcast <16 x i32> %48 to <512 x i1>
+ %50 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %51 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %52 = call <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1> %49, <16 x i32> %50, <16 x i32> %51)
+ %53 = bitcast <512 x i1> %52 to <16 x i32>
+ store volatile <16 x i32> %53, <16 x i32>* @Q6VecPredResult, align 64
+ %54 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %55 = bitcast <16 x i32> %54 to <512 x i1>
+ %56 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %57 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %58 = call <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1> %55, <16 x i32> %56, <16 x i32> %57)
+ %59 = bitcast <512 x i1> %58 to <16 x i32>
+ store volatile <16 x i32> %59, <16 x i32>* @Q6VecPredResult, align 64
+ %60 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %61 = bitcast <16 x i32> %60 to <512 x i1>
+ %62 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %63 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %64 = call <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1> %61, <16 x i32> %62, <16 x i32> %63)
+ %65 = bitcast <512 x i1> %64 to <16 x i32>
+ store volatile <16 x i32> %65, <16 x i32>* @Q6VecPredResult, align 64
+ %66 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %67 = bitcast <16 x i32> %66 to <512 x i1>
+ %68 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %69 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %70 = call <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1> %67, <16 x i32> %68, <16 x i32> %69)
+ %71 = bitcast <512 x i1> %70 to <16 x i32>
+ store volatile <16 x i32> %71, <16 x i32>* @Q6VecPredResult, align 64
+ %72 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %73 = bitcast <16 x i32> %72 to <512 x i1>
+ %74 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %75 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %76 = call <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1> %73, <16 x i32> %74, <16 x i32> %75)
+ %77 = bitcast <512 x i1> %76 to <16 x i32>
+ store volatile <16 x i32> %77, <16 x i32>* @Q6VecPredResult, align 64
+ %78 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %79 = bitcast <16 x i32> %78 to <512 x i1>
+ %80 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %81 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %82 = call <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1> %79, <16 x i32> %80, <16 x i32> %81)
+ %83 = bitcast <512 x i1> %82 to <16 x i32>
+ store volatile <16 x i32> %83, <16 x i32>* @Q6VecPredResult, align 64
+ %84 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %85 = bitcast <16 x i32> %84 to <512 x i1>
+ %86 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %87 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %88 = call <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1> %85, <16 x i32> %86, <16 x i32> %87)
+ %89 = bitcast <512 x i1> %88 to <16 x i32>
+ store volatile <16 x i32> %89, <16 x i32>* @Q6VecPredResult, align 64
+ %90 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %91 = bitcast <16 x i32> %90 to <512 x i1>
+ %92 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %93 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %94 = call <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1> %91, <16 x i32> %92, <16 x i32> %93)
+ %95 = bitcast <512 x i1> %94 to <16 x i32>
+ store volatile <16 x i32> %95, <16 x i32>* @Q6VecPredResult, align 64
+ %96 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %97 = bitcast <16 x i32> %96 to <512 x i1>
+ %98 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %99 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %100 = call <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1> %97, <16 x i32> %98, <16 x i32> %99)
+ %101 = bitcast <512 x i1> %100 to <16 x i32>
+ store volatile <16 x i32> %101, <16 x i32>* @Q6VecPredResult, align 64
+ %102 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %103 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %104 = call <512 x i1> @llvm.hexagon.V6.vgtb(<16 x i32> %102, <16 x i32> %103)
+ %105 = bitcast <512 x i1> %104 to <16 x i32>
+ store volatile <16 x i32> %105, <16 x i32>* @Q6VecPredResult, align 64
+ %106 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %107 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %108 = call <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %106, <16 x i32> %107)
+ %109 = bitcast <512 x i1> %108 to <16 x i32>
+ store volatile <16 x i32> %109, <16 x i32>* @Q6VecPredResult, align 64
+ %110 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %111 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %112 = call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %110, <16 x i32> %111)
+ %113 = bitcast <512 x i1> %112 to <16 x i32>
+ store volatile <16 x i32> %113, <16 x i32>* @Q6VecPredResult, align 64
+ %114 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %115 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %116 = call <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %114, <16 x i32> %115)
+ %117 = bitcast <512 x i1> %116 to <16 x i32>
+ store volatile <16 x i32> %117, <16 x i32>* @Q6VecPredResult, align 64
+ %118 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %119 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %120 = call <512 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32> %118, <16 x i32> %119)
+ %121 = bitcast <512 x i1> %120 to <16 x i32>
+ store volatile <16 x i32> %121, <16 x i32>* @Q6VecPredResult, align 64
+ %122 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %123 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %124 = call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %122, <16 x i32> %123)
+ %125 = bitcast <512 x i1> %124 to <16 x i32>
+ store volatile <16 x i32> %125, <16 x i32>* @Q6VecPredResult, align 64
+ %126 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %127 = bitcast <16 x i32> %126 to <512 x i1>
+ %128 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %129 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %130 = call <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1> %127, <16 x i32> %128, <16 x i32> %129)
+ %131 = bitcast <512 x i1> %130 to <16 x i32>
+ store volatile <16 x i32> %131, <16 x i32>* @Q6VecPredResult, align 64
+ %132 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %133 = bitcast <16 x i32> %132 to <512 x i1>
+ %134 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %135 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %136 = call <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1> %133, <16 x i32> %134, <16 x i32> %135)
+ %137 = bitcast <512 x i1> %136 to <16 x i32>
+ store volatile <16 x i32> %137, <16 x i32>* @Q6VecPredResult, align 64
+ %138 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %139 = bitcast <16 x i32> %138 to <512 x i1>
+ %140 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %141 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %142 = call <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1> %139, <16 x i32> %140, <16 x i32> %141)
+ %143 = bitcast <512 x i1> %142 to <16 x i32>
+ store volatile <16 x i32> %143, <16 x i32>* @Q6VecPredResult, align 64
+ %144 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %145 = bitcast <16 x i32> %144 to <512 x i1>
+ %146 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %147 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %148 = call <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1> %145, <16 x i32> %146, <16 x i32> %147)
+ %149 = bitcast <512 x i1> %148 to <16 x i32>
+ store volatile <16 x i32> %149, <16 x i32>* @Q6VecPredResult, align 64
+ %150 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %151 = bitcast <16 x i32> %150 to <512 x i1>
+ %152 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %153 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %154 = call <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1> %151, <16 x i32> %152, <16 x i32> %153)
+ %155 = bitcast <512 x i1> %154 to <16 x i32>
+ store volatile <16 x i32> %155, <16 x i32>* @Q6VecPredResult, align 64
+ %156 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %157 = bitcast <16 x i32> %156 to <512 x i1>
+ %158 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %159 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %160 = call <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1> %157, <16 x i32> %158, <16 x i32> %159)
+ %161 = bitcast <512 x i1> %160 to <16 x i32>
+ store volatile <16 x i32> %161, <16 x i32>* @Q6VecPredResult, align 64
+ %162 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %163 = bitcast <16 x i32> %162 to <512 x i1>
+ %164 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %165 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %166 = call <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1> %163, <16 x i32> %164, <16 x i32> %165)
+ %167 = bitcast <512 x i1> %166 to <16 x i32>
+ store volatile <16 x i32> %167, <16 x i32>* @Q6VecPredResult, align 64
+ %168 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %169 = bitcast <16 x i32> %168 to <512 x i1>
+ %170 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %171 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %172 = call <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1> %169, <16 x i32> %170, <16 x i32> %171)
+ %173 = bitcast <512 x i1> %172 to <16 x i32>
+ store volatile <16 x i32> %173, <16 x i32>* @Q6VecPredResult, align 64
+ %174 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %175 = bitcast <16 x i32> %174 to <512 x i1>
+ %176 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %177 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %178 = call <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1> %175, <16 x i32> %176, <16 x i32> %177)
+ %179 = bitcast <512 x i1> %178 to <16 x i32>
+ store volatile <16 x i32> %179, <16 x i32>* @Q6VecPredResult, align 64
+ %180 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %181 = bitcast <16 x i32> %180 to <512 x i1>
+ %182 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %183 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %184 = call <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1> %181, <16 x i32> %182, <16 x i32> %183)
+ %185 = bitcast <512 x i1> %184 to <16 x i32>
+ store volatile <16 x i32> %185, <16 x i32>* @Q6VecPredResult, align 64
+ %186 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %187 = bitcast <16 x i32> %186 to <512 x i1>
+ %188 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %189 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %190 = call <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1> %187, <16 x i32> %188, <16 x i32> %189)
+ %191 = bitcast <512 x i1> %190 to <16 x i32>
+ store volatile <16 x i32> %191, <16 x i32>* @Q6VecPredResult, align 64
+ %192 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %193 = bitcast <16 x i32> %192 to <512 x i1>
+ %194 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %195 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %196 = call <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1> %193, <16 x i32> %194, <16 x i32> %195)
+ %197 = bitcast <512 x i1> %196 to <16 x i32>
+ store volatile <16 x i32> %197, <16 x i32>* @Q6VecPredResult, align 64
+ %198 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %199 = bitcast <16 x i32> %198 to <512 x i1>
+ %200 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %201 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %202 = call <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1> %199, <16 x i32> %200, <16 x i32> %201)
+ %203 = bitcast <512 x i1> %202 to <16 x i32>
+ store volatile <16 x i32> %203, <16 x i32>* @Q6VecPredResult, align 64
+ %204 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %205 = bitcast <16 x i32> %204 to <512 x i1>
+ %206 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %207 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %208 = call <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1> %205, <16 x i32> %206, <16 x i32> %207)
+ %209 = bitcast <512 x i1> %208 to <16 x i32>
+ store volatile <16 x i32> %209, <16 x i32>* @Q6VecPredResult, align 64
+ %210 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %211 = bitcast <16 x i32> %210 to <512 x i1>
+ %212 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %213 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %214 = call <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1> %211, <16 x i32> %212, <16 x i32> %213)
+ %215 = bitcast <512 x i1> %214 to <16 x i32>
+ store volatile <16 x i32> %215, <16 x i32>* @Q6VecPredResult, align 64
+ %216 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %217 = bitcast <16 x i32> %216 to <512 x i1>
+ %218 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %219 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %220 = call <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1> %217, <16 x i32> %218, <16 x i32> %219)
+ %221 = bitcast <512 x i1> %220 to <16 x i32>
+ store volatile <16 x i32> %221, <16 x i32>* @Q6VecPredResult, align 64
+ %222 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %223 = bitcast <16 x i32> %222 to <512 x i1>
+ %224 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %225 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %226 = call <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1> %223, <16 x i32> %224, <16 x i32> %225)
+ %227 = bitcast <512 x i1> %226 to <16 x i32>
+ store volatile <16 x i32> %227, <16 x i32>* @Q6VecPredResult, align 64
+ %228 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %229 = bitcast <16 x i32> %228 to <512 x i1>
+ %230 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %231 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %232 = call <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1> %229, <16 x i32> %230, <16 x i32> %231)
+ %233 = bitcast <512 x i1> %232 to <16 x i32>
+ store volatile <16 x i32> %233, <16 x i32>* @Q6VecPredResult, align 64
+ %234 = call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 1)
+ %235 = bitcast <512 x i1> %234 to <16 x i32>
+ store volatile <16 x i32> %235, <16 x i32>* @Q6VecPredResult, align 64
+ %236 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %237 = bitcast <16 x i32> %236 to <512 x i1>
+ %238 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+ %239 = bitcast <16 x i32> %238 to <512 x i1>
+ %240 = call <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1> %237, <512 x i1> %239)
+ %241 = bitcast <512 x i1> %240 to <16 x i32>
+ store volatile <16 x i32> %241, <16 x i32>* @Q6VecPredResult, align 64
+ %242 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %243 = call <16 x i32> @llvm.hexagon.V6.vassign(<16 x i32> %242)
+ store volatile <16 x i32> %243, <16 x i32>* @VectorResult, align 64
+ %244 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %245 = call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %244)
+ store volatile <16 x i32> %245, <16 x i32>* @VectorResult, align 64
+ %246 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %247 = call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %246)
+ store volatile <16 x i32> %247, <16 x i32>* @VectorResult, align 64
+ %248 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %249 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %250 = call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %248, <16 x i32> %249, i32 0)
+ store volatile <16 x i32> %250, <16 x i32>* @VectorResult, align 64
+ %251 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %252 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %253 = call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %251, <16 x i32> %252, i32 -1)
+ store volatile <16 x i32> %253, <16 x i32>* @VectorResult, align 64
+ %254 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %255 = bitcast <16 x i32> %254 to <512 x i1>
+ %256 = call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %255, i32 -1)
+ store volatile <16 x i32> %256, <16 x i32>* @VectorResult, align 64
+ %257 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %258 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %259 = call <16 x i32> @llvm.hexagon.V6.vand(<16 x i32> %257, <16 x i32> %258)
+ store volatile <16 x i32> %259, <16 x i32>* @VectorResult, align 64
+ %260 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %261 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %262 = bitcast <16 x i32> %261 to <512 x i1>
+ %263 = call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %260, <512 x i1> %262, i32 -1)
+ store volatile <16 x i32> %263, <16 x i32>* @VectorResult, align 64
+ %264 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %265 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %266 = call <16 x i32> @llvm.hexagon.V6.vdelta(<16 x i32> %264, <16 x i32> %265)
+ store volatile <16 x i32> %266, <16 x i32>* @VectorResult, align 64
+ %267 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %268 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %269 = call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %267, <16 x i32> %268, i32 0)
+ store volatile <16 x i32> %269, <16 x i32>* @VectorResult, align 64
+ %270 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %271 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %272 = call <16 x i32> @llvm.hexagon.V6.vlalignb(<16 x i32> %270, <16 x i32> %271, i32 -1)
+ store volatile <16 x i32> %272, <16 x i32>* @VectorResult, align 64
+ %273 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %274 = bitcast <16 x i32> %273 to <512 x i1>
+ %275 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %276 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %277 = call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %274, <16 x i32> %275, <16 x i32> %276)
+ store volatile <16 x i32> %277, <16 x i32>* @VectorResult, align 64
+ %278 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %279 = call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %278)
+ store volatile <16 x i32> %279, <16 x i32>* @VectorResult, align 64
+ %280 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %281 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %282 = call <16 x i32> @llvm.hexagon.V6.vor(<16 x i32> %280, <16 x i32> %281)
+ store volatile <16 x i32> %282, <16 x i32>* @VectorResult, align 64
+ %283 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %284 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %285 = call <16 x i32> @llvm.hexagon.V6.vrdelta(<16 x i32> %283, <16 x i32> %284)
+ store volatile <16 x i32> %285, <16 x i32>* @VectorResult, align 64
+ %286 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %287 = call <16 x i32> @llvm.hexagon.V6.vror(<16 x i32> %286, i32 -1)
+ store volatile <16 x i32> %287, <16 x i32>* @VectorResult, align 64
+ %288 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 -1)
+ store volatile <16 x i32> %288, <16 x i32>* @VectorResult, align 64
+ %289 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %290 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %291 = call <16 x i32> @llvm.hexagon.V6.vxor(<16 x i32> %289, <16 x i32> %290)
+ store volatile <16 x i32> %291, <16 x i32>* @VectorResult, align 64
+ %292 = call <16 x i32> @llvm.hexagon.V6.vd0()
+ store volatile <16 x i32> %292, <16 x i32>* @VectorResult, align 64
+ %293 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %294 = bitcast <16 x i32> %293 to <512 x i1>
+ %295 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %296 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %297 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %294, <16 x i32> %295, <16 x i32> %296)
+ store volatile <16 x i32> %297, <16 x i32>* @VectorResult, align 64
+ %298 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %299 = bitcast <16 x i32> %298 to <512 x i1>
+ %300 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %301 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %302 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %299, <16 x i32> %300, <16 x i32> %301)
+ store volatile <16 x i32> %302, <16 x i32>* @VectorResult, align 64
+ %303 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %304 = bitcast <16 x i32> %303 to <512 x i1>
+ %305 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %306 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %307 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1> %304, <16 x i32> %305, <16 x i32> %306)
+ store volatile <16 x i32> %307, <16 x i32>* @VectorResult, align 64
+ %308 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %309 = bitcast <16 x i32> %308 to <512 x i1>
+ %310 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %311 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %312 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1> %309, <16 x i32> %310, <16 x i32> %311)
+ store volatile <16 x i32> %312, <16 x i32>* @VectorResult, align 64
+ %313 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %314 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %315 = call <16 x i32> @llvm.hexagon.V6.vaddb(<16 x i32> %313, <16 x i32> %314)
+ store volatile <16 x i32> %315, <16 x i32>* @VectorResult, align 64
+ %316 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %317 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %318 = call <16 x i32> @llvm.hexagon.V6.vasrhbrndsat(<16 x i32> %316, <16 x i32> %317, i32 -1)
+ store volatile <16 x i32> %318, <16 x i32>* @VectorResult, align 64
+ %319 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %320 = call <16 x i32> @llvm.hexagon.V6.vdealb(<16 x i32> %319)
+ store volatile <16 x i32> %320, <16 x i32>* @VectorResult, align 64
+ %321 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %322 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %323 = call <16 x i32> @llvm.hexagon.V6.vdealb4w(<16 x i32> %321, <16 x i32> %322)
+ store volatile <16 x i32> %323, <16 x i32>* @VectorResult, align 64
+ %324 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %325 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %326 = call <16 x i32> @llvm.hexagon.V6.vlutvvb(<16 x i32> %324, <16 x i32> %325, i32 -1)
+ store volatile <16 x i32> %326, <16 x i32>* @VectorResult, align 64
+ %327 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %328 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %329 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+ %330 = call <16 x i32> @llvm.hexagon.V6.vlutvvb.oracc(<16 x i32> %327, <16 x i32> %328, <16 x i32> %329, i32 -1)
+ store volatile <16 x i32> %330, <16 x i32>* @VectorResult, align 64
+ %331 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %332 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %333 = call <16 x i32> @llvm.hexagon.V6.vnavgub(<16 x i32> %331, <16 x i32> %332)
+ store volatile <16 x i32> %333, <16 x i32>* @VectorResult, align 64
+ %334 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %335 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %336 = call <16 x i32> @llvm.hexagon.V6.vpackhb.sat(<16 x i32> %334, <16 x i32> %335)
+ store volatile <16 x i32> %336, <16 x i32>* @VectorResult, align 64
+ %337 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %338 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %339 = call <16 x i32> @llvm.hexagon.V6.vpackeb(<16 x i32> %337, <16 x i32> %338)
+ store volatile <16 x i32> %339, <16 x i32>* @VectorResult, align 64
+ %340 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %341 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %342 = call <16 x i32> @llvm.hexagon.V6.vpackob(<16 x i32> %340, <16 x i32> %341)
+ store volatile <16 x i32> %342, <16 x i32>* @VectorResult, align 64
+ %343 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %344 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %345 = call <16 x i32> @llvm.hexagon.V6.vroundhb(<16 x i32> %343, <16 x i32> %344)
+ store volatile <16 x i32> %345, <16 x i32>* @VectorResult, align 64
+ %346 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %347 = call <16 x i32> @llvm.hexagon.V6.vshuffb(<16 x i32> %346)
+ store volatile <16 x i32> %347, <16 x i32>* @VectorResult, align 64
+ %348 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %349 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %350 = call <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32> %348, <16 x i32> %349)
+ store volatile <16 x i32> %350, <16 x i32>* @VectorResult, align 64
+ %351 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %352 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %353 = call <16 x i32> @llvm.hexagon.V6.vshuffob(<16 x i32> %351, <16 x i32> %352)
+ store volatile <16 x i32> %353, <16 x i32>* @VectorResult, align 64
+ %354 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %355 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %356 = call <16 x i32> @llvm.hexagon.V6.vsubb(<16 x i32> %354, <16 x i32> %355)
+ store volatile <16 x i32> %356, <16 x i32>* @VectorResult, align 64
+ %357 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %358 = bitcast <16 x i32> %357 to <512 x i1>
+ %359 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %360 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %361 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1> %358, <16 x i32> %359, <16 x i32> %360)
+ store volatile <16 x i32> %361, <16 x i32>* @VectorResult, align 64
+ %362 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %363 = bitcast <16 x i32> %362 to <512 x i1>
+ %364 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %365 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %366 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1> %363, <16 x i32> %364, <16 x i32> %365)
+ store volatile <16 x i32> %366, <16 x i32>* @VectorResult, align 64
+ %367 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %368 = bitcast <16 x i32> %367 to <512 x i1>
+ %369 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %370 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %371 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1> %368, <16 x i32> %369, <16 x i32> %370)
+ store volatile <16 x i32> %371, <16 x i32>* @VectorResult, align 64
+ %372 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %373 = bitcast <16 x i32> %372 to <512 x i1>
+ %374 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %375 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %376 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %373, <16 x i32> %374, <16 x i32> %375)
+ store volatile <16 x i32> %376, <16 x i32>* @VectorResult, align 64
+ %377 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %378 = call <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32> %377)
+ store volatile <16 x i32> %378, <16 x i32>* @VectorResult, align 64
+ %379 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %380 = call <16 x i32> @llvm.hexagon.V6.vabsh.sat(<16 x i32> %379)
+ store volatile <16 x i32> %380, <16 x i32>* @VectorResult, align 64
+ %381 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %382 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %383 = call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %381, <16 x i32> %382)
+ store volatile <16 x i32> %383, <16 x i32>* @VectorResult, align 64
+ %384 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %385 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %386 = call <16 x i32> @llvm.hexagon.V6.vaddhsat(<16 x i32> %384, <16 x i32> %385)
+ store volatile <16 x i32> %386, <16 x i32>* @VectorResult, align 64
+ %387 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %388 = call <16 x i32> @llvm.hexagon.V6.vaslh(<16 x i32> %387, i32 -1)
+ store volatile <16 x i32> %388, <16 x i32>* @VectorResult, align 64
+ %389 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %390 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %391 = call <16 x i32> @llvm.hexagon.V6.vaslhv(<16 x i32> %389, <16 x i32> %390)
+ store volatile <16 x i32> %391, <16 x i32>* @VectorResult, align 64
+ %392 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %393 = call <16 x i32> @llvm.hexagon.V6.vasrh(<16 x i32> %392, i32 -1)
+ store volatile <16 x i32> %393, <16 x i32>* @VectorResult, align 64
+ %394 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %395 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %396 = call <16 x i32> @llvm.hexagon.V6.vasrhv(<16 x i32> %394, <16 x i32> %395)
+ store volatile <16 x i32> %396, <16 x i32>* @VectorResult, align 64
+ %397 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %398 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %399 = call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %397, <16 x i32> %398, i32 -1)
+ store volatile <16 x i32> %399, <16 x i32>* @VectorResult, align 64
+ %400 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %401 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %402 = call <16 x i32> @llvm.hexagon.V6.vasrwhrndsat(<16 x i32> %400, <16 x i32> %401, i32 -1)
+ store volatile <16 x i32> %402, <16 x i32>* @VectorResult, align 64
+ %403 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %404 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %405 = call <16 x i32> @llvm.hexagon.V6.vasrwhsat(<16 x i32> %403, <16 x i32> %404, i32 -1)
+ store volatile <16 x i32> %405, <16 x i32>* @VectorResult, align 64
+ %406 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %407 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %408 = call <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32> %406, <16 x i32> %407)
+ store volatile <16 x i32> %408, <16 x i32>* @VectorResult, align 64
+ %409 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %410 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %411 = call <16 x i32> @llvm.hexagon.V6.vavghrnd(<16 x i32> %409, <16 x i32> %410)
+ store volatile <16 x i32> %411, <16 x i32>* @VectorResult, align 64
+ %412 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %413 = call <16 x i32> @llvm.hexagon.V6.vdealh(<16 x i32> %412)
+ store volatile <16 x i32> %413, <16 x i32>* @VectorResult, align 64
+ %414 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %415 = call <16 x i32> @llvm.hexagon.V6.vdmpybus(<16 x i32> %414, i32 -1)
+ store volatile <16 x i32> %415, <16 x i32>* @VectorResult, align 64
+ %416 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %417 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %418 = call <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32> %416, <16 x i32> %417, i32 -1)
+ store volatile <16 x i32> %418, <16 x i32>* @VectorResult, align 64
+ %419 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %420 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %421 = call <16 x i32> @llvm.hexagon.V6.vlsrhv(<16 x i32> %419, <16 x i32> %420)
+ store volatile <16 x i32> %421, <16 x i32>* @VectorResult, align 64
+ %422 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %423 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %424 = call <16 x i32> @llvm.hexagon.V6.vmaxh(<16 x i32> %422, <16 x i32> %423)
+ store volatile <16 x i32> %424, <16 x i32>* @VectorResult, align 64
+ %425 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %426 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %427 = call <16 x i32> @llvm.hexagon.V6.vminh(<16 x i32> %425, <16 x i32> %426)
+ store volatile <16 x i32> %427, <16 x i32>* @VectorResult, align 64
+ %428 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %429 = call <16 x i32> @llvm.hexagon.V6.vmpyhsrs(<16 x i32> %428, i32 -1)
+ store volatile <16 x i32> %429, <16 x i32>* @VectorResult, align 64
+ %430 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %431 = call <16 x i32> @llvm.hexagon.V6.vmpyhss(<16 x i32> %430, i32 -1)
+ store volatile <16 x i32> %431, <16 x i32>* @VectorResult, align 64
+ %432 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %433 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %434 = call <16 x i32> @llvm.hexagon.V6.vmpyhvsrs(<16 x i32> %432, <16 x i32> %433)
+ store volatile <16 x i32> %434, <16 x i32>* @VectorResult, align 64
+ %435 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %436 = call <16 x i32> @llvm.hexagon.V6.vmpyihb(<16 x i32> %435, i32 -1)
+ store volatile <16 x i32> %436, <16 x i32>* @VectorResult, align 64
+ %437 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %438 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %439 = call <16 x i32> @llvm.hexagon.V6.vmpyih(<16 x i32> %437, <16 x i32> %438)
+ store volatile <16 x i32> %439, <16 x i32>* @VectorResult, align 64
+ %440 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %441 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %442 = call <16 x i32> @llvm.hexagon.V6.vmpyihb.acc(<16 x i32> %440, <16 x i32> %441, i32 -1)
+ store volatile <16 x i32> %442, <16 x i32>* @VectorResult, align 64
+ %443 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %444 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %445 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+ %446 = call <16 x i32> @llvm.hexagon.V6.vmpyih.acc(<16 x i32> %443, <16 x i32> %444, <16 x i32> %445)
+ store volatile <16 x i32> %446, <16 x i32>* @VectorResult, align 64
+ %447 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %448 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %449 = call <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32> %447, <16 x i32> %448)
+ store volatile <16 x i32> %449, <16 x i32>* @VectorResult, align 64
+ %450 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %451 = call <16 x i32> @llvm.hexagon.V6.vnormamth(<16 x i32> %450)
+ store volatile <16 x i32> %451, <16 x i32>* @VectorResult, align 64
+ %452 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %453 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %454 = call <16 x i32> @llvm.hexagon.V6.vpackwh.sat(<16 x i32> %452, <16 x i32> %453)
+ store volatile <16 x i32> %454, <16 x i32>* @VectorResult, align 64
+ %455 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %456 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %457 = call <16 x i32> @llvm.hexagon.V6.vpackeh(<16 x i32> %455, <16 x i32> %456)
+ store volatile <16 x i32> %457, <16 x i32>* @VectorResult, align 64
+ %458 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %459 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %460 = call <16 x i32> @llvm.hexagon.V6.vpackoh(<16 x i32> %458, <16 x i32> %459)
+ store volatile <16 x i32> %460, <16 x i32>* @VectorResult, align 64
+ %461 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %462 = call <16 x i32> @llvm.hexagon.V6.vpopcounth(<16 x i32> %461)
+ store volatile <16 x i32> %462, <16 x i32>* @VectorResult, align 64
+ %463 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %464 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %465 = call <16 x i32> @llvm.hexagon.V6.vroundwh(<16 x i32> %463, <16 x i32> %464)
+ store volatile <16 x i32> %465, <16 x i32>* @VectorResult, align 64
+ %466 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %467 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %468 = call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %466, <16 x i32> %467)
+ store volatile <16 x i32> %468, <16 x i32>* @VectorResult, align 64
+ %469 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %470 = call <16 x i32> @llvm.hexagon.V6.vshuffh(<16 x i32> %469)
+ store volatile <16 x i32> %470, <16 x i32>* @VectorResult, align 64
+ %471 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %472 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %473 = call <16 x i32> @llvm.hexagon.V6.vshufeh(<16 x i32> %471, <16 x i32> %472)
+ store volatile <16 x i32> %473, <16 x i32>* @VectorResult, align 64
+ %474 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %475 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %476 = call <16 x i32> @llvm.hexagon.V6.vshufoh(<16 x i32> %474, <16 x i32> %475)
+ store volatile <16 x i32> %476, <16 x i32>* @VectorResult, align 64
+ %477 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %478 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %479 = call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %477, <16 x i32> %478)
+ store volatile <16 x i32> %479, <16 x i32>* @VectorResult, align 64
+ %480 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %481 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %482 = call <16 x i32> @llvm.hexagon.V6.vsubhsat(<16 x i32> %480, <16 x i32> %481)
+ store volatile <16 x i32> %482, <16 x i32>* @VectorResult, align 64
+ %483 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %484 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %485 = call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %483, <16 x i32> %484)
+ store volatile <16 x i32> %485, <16 x i32>* @VectorResult, align 64
+ %486 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %487 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %488 = call <16 x i32> @llvm.hexagon.V6.vaddubsat(<16 x i32> %486, <16 x i32> %487)
+ store volatile <16 x i32> %488, <16 x i32>* @VectorResult, align 64
+ %489 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %490 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %491 = call <16 x i32> @llvm.hexagon.V6.vasrhubrndsat(<16 x i32> %489, <16 x i32> %490, i32 -1)
+ store volatile <16 x i32> %491, <16 x i32>* @VectorResult, align 64
+ %492 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %493 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %494 = call <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32> %492, <16 x i32> %493, i32 -1)
+ store volatile <16 x i32> %494, <16 x i32>* @VectorResult, align 64
+ %495 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %496 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %497 = call <16 x i32> @llvm.hexagon.V6.vavgub(<16 x i32> %495, <16 x i32> %496)
+ store volatile <16 x i32> %497, <16 x i32>* @VectorResult, align 64
+ %498 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %499 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %500 = call <16 x i32> @llvm.hexagon.V6.vavgubrnd(<16 x i32> %498, <16 x i32> %499)
+ store volatile <16 x i32> %500, <16 x i32>* @VectorResult, align 64
+ %501 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %502 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %503 = call <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32> %501, <16 x i32> %502)
+ store volatile <16 x i32> %503, <16 x i32>* @VectorResult, align 64
+ %504 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %505 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %506 = call <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32> %504, <16 x i32> %505)
+ store volatile <16 x i32> %506, <16 x i32>* @VectorResult, align 64
+ %507 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %508 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %509 = call <16 x i32> @llvm.hexagon.V6.vpackhub.sat(<16 x i32> %507, <16 x i32> %508)
+ store volatile <16 x i32> %509, <16 x i32>* @VectorResult, align 64
+ %510 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %511 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %512 = call <16 x i32> @llvm.hexagon.V6.vroundhub(<16 x i32> %510, <16 x i32> %511)
+ store volatile <16 x i32> %512, <16 x i32>* @VectorResult, align 64
+ %513 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %514 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %515 = call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %513, <16 x i32> %514)
+ store volatile <16 x i32> %515, <16 x i32>* @VectorResult, align 64
+ %516 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %517 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %518 = call <16 x i32> @llvm.hexagon.V6.vsububsat(<16 x i32> %516, <16 x i32> %517)
+ store volatile <16 x i32> %518, <16 x i32>* @VectorResult, align 64
+ %519 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %520 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %521 = call <16 x i32> @llvm.hexagon.V6.vabsdiffh(<16 x i32> %519, <16 x i32> %520)
+ store volatile <16 x i32> %521, <16 x i32>* @VectorResult, align 64
+ %522 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %523 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %524 = call <16 x i32> @llvm.hexagon.V6.vabsdiffuh(<16 x i32> %522, <16 x i32> %523)
+ store volatile <16 x i32> %524, <16 x i32>* @VectorResult, align 64
+ %525 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %526 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %527 = call <16 x i32> @llvm.hexagon.V6.vadduhsat(<16 x i32> %525, <16 x i32> %526)
+ store volatile <16 x i32> %527, <16 x i32>* @VectorResult, align 64
+ %528 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %529 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %530 = call <16 x i32> @llvm.hexagon.V6.vasrwuhsat(<16 x i32> %528, <16 x i32> %529, i32 -1)
+ store volatile <16 x i32> %530, <16 x i32>* @VectorResult, align 64
+ %531 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %532 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %533 = call <16 x i32> @llvm.hexagon.V6.vavguh(<16 x i32> %531, <16 x i32> %532)
+ store volatile <16 x i32> %533, <16 x i32>* @VectorResult, align 64
+ %534 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %535 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %536 = call <16 x i32> @llvm.hexagon.V6.vavguhrnd(<16 x i32> %534, <16 x i32> %535)
+ store volatile <16 x i32> %536, <16 x i32>* @VectorResult, align 64
+ %537 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %538 = call <16 x i32> @llvm.hexagon.V6.vcl0h(<16 x i32> %537)
+ store volatile <16 x i32> %538, <16 x i32>* @VectorResult, align 64
+ %539 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %540 = call <16 x i32> @llvm.hexagon.V6.vlsrh(<16 x i32> %539, i32 -1)
+ store volatile <16 x i32> %540, <16 x i32>* @VectorResult, align 64
+ %541 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %542 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %543 = call <16 x i32> @llvm.hexagon.V6.vmaxuh(<16 x i32> %541, <16 x i32> %542)
+ store volatile <16 x i32> %543, <16 x i32>* @VectorResult, align 64
+ %544 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %545 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %546 = call <16 x i32> @llvm.hexagon.V6.vminuh(<16 x i32> %544, <16 x i32> %545)
+ store volatile <16 x i32> %546, <16 x i32>* @VectorResult, align 64
+ %547 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %548 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %549 = call <16 x i32> @llvm.hexagon.V6.vpackwuh.sat(<16 x i32> %547, <16 x i32> %548)
+ store volatile <16 x i32> %549, <16 x i32>* @VectorResult, align 64
+ %550 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %551 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %552 = call <16 x i32> @llvm.hexagon.V6.vroundwuh(<16 x i32> %550, <16 x i32> %551)
+ store volatile <16 x i32> %552, <16 x i32>* @VectorResult, align 64
+ %553 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %554 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %555 = call <16 x i32> @llvm.hexagon.V6.vsubuhsat(<16 x i32> %553, <16 x i32> %554)
+ store volatile <16 x i32> %555, <16 x i32>* @VectorResult, align 64
+ %556 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %557 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %558 = call <16 x i32> @llvm.hexagon.V6.vabsdiffw(<16 x i32> %556, <16 x i32> %557)
+ store volatile <16 x i32> %558, <16 x i32>* @VectorResult, align 64
+ %559 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %560 = call <16 x i32> @llvm.hexagon.V6.vcl0w(<16 x i32> %559)
+ store volatile <16 x i32> %560, <16 x i32>* @VectorResult, align 64
+ %561 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %562 = call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %561, i32 -1)
+ store volatile <16 x i32> %562, <16 x i32>* @VectorResult, align 64
+ %563 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %564 = call <16 x i32> @llvm.hexagon.V6.vrmpyub(<16 x i32> %563, i32 -1)
+ store volatile <16 x i32> %564, <16 x i32>* @VectorResult, align 64
+ %565 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %566 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %567 = call <16 x i32> @llvm.hexagon.V6.vrmpyubv(<16 x i32> %565, <16 x i32> %566)
+ store volatile <16 x i32> %567, <16 x i32>* @VectorResult, align 64
+ %568 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %569 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %570 = call <16 x i32> @llvm.hexagon.V6.vrmpyub.acc(<16 x i32> %568, <16 x i32> %569, i32 -1)
+ store volatile <16 x i32> %570, <16 x i32>* @VectorResult, align 64
+ %571 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %572 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %573 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+ %574 = call <16 x i32> @llvm.hexagon.V6.vrmpyubv.acc(<16 x i32> %571, <16 x i32> %572, <16 x i32> %573)
+ store volatile <16 x i32> %574, <16 x i32>* @VectorResult, align 64
+ %575 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %576 = bitcast <16 x i32> %575 to <512 x i1>
+ %577 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %578 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %579 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1> %576, <16 x i32> %577, <16 x i32> %578)
+ store volatile <16 x i32> %579, <16 x i32>* @VectorResult, align 64
+ %580 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %581 = bitcast <16 x i32> %580 to <512 x i1>
+ %582 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %583 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %584 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %581, <16 x i32> %582, <16 x i32> %583)
+ store volatile <16 x i32> %584, <16 x i32>* @VectorResult, align 64
+ %585 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %586 = bitcast <16 x i32> %585 to <512 x i1>
+ %587 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %588 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %589 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1> %586, <16 x i32> %587, <16 x i32> %588)
+ store volatile <16 x i32> %589, <16 x i32>* @VectorResult, align 64
+ %590 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %591 = bitcast <16 x i32> %590 to <512 x i1>
+ %592 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %593 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %594 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %591, <16 x i32> %592, <16 x i32> %593)
+ store volatile <16 x i32> %594, <16 x i32>* @VectorResult, align 64
+ %595 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %596 = call <16 x i32> @llvm.hexagon.V6.vabsw(<16 x i32> %595)
+ store volatile <16 x i32> %596, <16 x i32>* @VectorResult, align 64
+ %597 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %598 = call <16 x i32> @llvm.hexagon.V6.vabsw.sat(<16 x i32> %597)
+ store volatile <16 x i32> %598, <16 x i32>* @VectorResult, align 64
+ %599 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %600 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %601 = call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %599, <16 x i32> %600)
+ store volatile <16 x i32> %601, <16 x i32>* @VectorResult, align 64
+ %602 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %603 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %604 = call <16 x i32> @llvm.hexagon.V6.vaddwsat(<16 x i32> %602, <16 x i32> %603)
+ store volatile <16 x i32> %604, <16 x i32>* @VectorResult, align 64
+ %605 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %606 = call <16 x i32> @llvm.hexagon.V6.vaslw(<16 x i32> %605, i32 -1)
+ store volatile <16 x i32> %606, <16 x i32>* @VectorResult, align 64
+ %607 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %608 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %609 = call <16 x i32> @llvm.hexagon.V6.vaslwv(<16 x i32> %607, <16 x i32> %608)
+ store volatile <16 x i32> %609, <16 x i32>* @VectorResult, align 64
+ %610 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %611 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %612 = call <16 x i32> @llvm.hexagon.V6.vaslw.acc(<16 x i32> %610, <16 x i32> %611, i32 -1)
+ store volatile <16 x i32> %612, <16 x i32>* @VectorResult, align 64
+ %613 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %614 = call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %613, i32 -1)
+ store volatile <16 x i32> %614, <16 x i32>* @VectorResult, align 64
+ %615 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %616 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %617 = call <16 x i32> @llvm.hexagon.V6.vasrwv(<16 x i32> %615, <16 x i32> %616)
+ store volatile <16 x i32> %617, <16 x i32>* @VectorResult, align 64
+ %618 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %619 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %620 = call <16 x i32> @llvm.hexagon.V6.vasrw.acc(<16 x i32> %618, <16 x i32> %619, i32 -1)
+ store volatile <16 x i32> %620, <16 x i32>* @VectorResult, align 64
+ %621 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %622 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %623 = call <16 x i32> @llvm.hexagon.V6.vavgw(<16 x i32> %621, <16 x i32> %622)
+ store volatile <16 x i32> %623, <16 x i32>* @VectorResult, align 64
+ %624 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %625 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %626 = call <16 x i32> @llvm.hexagon.V6.vavgwrnd(<16 x i32> %624, <16 x i32> %625)
+ store volatile <16 x i32> %626, <16 x i32>* @VectorResult, align 64
+ %627 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %628 = call <16 x i32> @llvm.hexagon.V6.vdmpyhb(<16 x i32> %627, i32 -1)
+ store volatile <16 x i32> %628, <16 x i32>* @VectorResult, align 64
+ %629 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %630 = call <16 x i32> @llvm.hexagon.V6.vdmpyhsat(<16 x i32> %629, i32 -1)
+ store volatile <16 x i32> %630, <16 x i32>* @VectorResult, align 64
+ %631 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %632 = call <16 x i32> @llvm.hexagon.V6.vdmpyhsusat(<16 x i32> %631, i32 -1)
+ store volatile <16 x i32> %632, <16 x i32>* @VectorResult, align 64
+ %633 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %634 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %635 = call <16 x i32> @llvm.hexagon.V6.vdmpyhvsat(<16 x i32> %633, <16 x i32> %634)
+ store volatile <16 x i32> %635, <16 x i32>* @VectorResult, align 64
+ %636 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %637 = call <16 x i32> @llvm.hexagon.V6.vdmpyhisat(<32 x i32> %636, i32 -1)
+ store volatile <16 x i32> %637, <16 x i32>* @VectorResult, align 64
+ %638 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %639 = call <16 x i32> @llvm.hexagon.V6.vdmpyhsuisat(<32 x i32> %638, i32 -1)
+ store volatile <16 x i32> %639, <16 x i32>* @VectorResult, align 64
+ %640 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %641 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %642 = call <16 x i32> @llvm.hexagon.V6.vdmpyhb.acc(<16 x i32> %640, <16 x i32> %641, i32 -1)
+ store volatile <16 x i32> %642, <16 x i32>* @VectorResult, align 64
+ %643 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %644 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %645 = call <16 x i32> @llvm.hexagon.V6.vdmpyhsat.acc(<16 x i32> %643, <16 x i32> %644, i32 -1)
+ store volatile <16 x i32> %645, <16 x i32>* @VectorResult, align 64
+ %646 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %647 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %648 = call <16 x i32> @llvm.hexagon.V6.vdmpyhsusat.acc(<16 x i32> %646, <16 x i32> %647, i32 -1)
+ store volatile <16 x i32> %648, <16 x i32>* @VectorResult, align 64
+ %649 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %650 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %651 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+ %652 = call <16 x i32> @llvm.hexagon.V6.vdmpyhvsat.acc(<16 x i32> %649, <16 x i32> %650, <16 x i32> %651)
+ store volatile <16 x i32> %652, <16 x i32>* @VectorResult, align 64
+ %653 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %654 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %655 = call <16 x i32> @llvm.hexagon.V6.vdmpyhisat.acc(<16 x i32> %653, <32 x i32> %654, i32 -1)
+ store volatile <16 x i32> %655, <16 x i32>* @VectorResult, align 64
+ %656 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %657 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %658 = call <16 x i32> @llvm.hexagon.V6.vdmpyhsuisat.acc(<16 x i32> %656, <32 x i32> %657, i32 -1)
+ store volatile <16 x i32> %658, <16 x i32>* @VectorResult, align 64
+ %659 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %660 = call <16 x i32> @llvm.hexagon.V6.vinsertwr(<16 x i32> %659, i32 -1)
+ store volatile <16 x i32> %660, <16 x i32>* @VectorResult, align 64
+ %661 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %662 = call <16 x i32> @llvm.hexagon.V6.vinsertwr(<16 x i32> %661, i32 0)
+ store volatile <16 x i32> %662, <16 x i32>* @VectorResult, align 64
+ %663 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %664 = call <16 x i32> @llvm.hexagon.V6.vinsertwr(<16 x i32> %663, i32 1)
+ store volatile <16 x i32> %664, <16 x i32>* @VectorResult, align 64
+ %665 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %666 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %667 = call <16 x i32> @llvm.hexagon.V6.vlsrwv(<16 x i32> %665, <16 x i32> %666)
+ store volatile <16 x i32> %667, <16 x i32>* @VectorResult, align 64
+ %668 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %669 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %670 = call <16 x i32> @llvm.hexagon.V6.vmaxw(<16 x i32> %668, <16 x i32> %669)
+ store volatile <16 x i32> %670, <16 x i32>* @VectorResult, align 64
+ %671 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %672 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %673 = call <16 x i32> @llvm.hexagon.V6.vminw(<16 x i32> %671, <16 x i32> %672)
+ store volatile <16 x i32> %673, <16 x i32>* @VectorResult, align 64
+ %674 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %675 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %676 = call <16 x i32> @llvm.hexagon.V6.vmpyewuh(<16 x i32> %674, <16 x i32> %675)
+ store volatile <16 x i32> %676, <16 x i32>* @VectorResult, align 64
+ %677 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %678 = call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %677, i32 -1)
+ store volatile <16 x i32> %678, <16 x i32>* @VectorResult, align 64
+ %679 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %680 = call <16 x i32> @llvm.hexagon.V6.vmpyiwh(<16 x i32> %679, i32 -1)
+ store volatile <16 x i32> %680, <16 x i32>* @VectorResult, align 64
+ %681 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %682 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %683 = call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %681, <16 x i32> %682, i32 -1)
+ store volatile <16 x i32> %683, <16 x i32>* @VectorResult, align 64
+ %684 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %685 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %686 = call <16 x i32> @llvm.hexagon.V6.vmpyiwh.acc(<16 x i32> %684, <16 x i32> %685, i32 -1)
+ store volatile <16 x i32> %686, <16 x i32>* @VectorResult, align 64
+ %687 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %688 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %689 = call <16 x i32> @llvm.hexagon.V6.vmpyiewuh(<16 x i32> %687, <16 x i32> %688)
+ store volatile <16 x i32> %689, <16 x i32>* @VectorResult, align 64
+ %690 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %691 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %692 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+ %693 = call <16 x i32> @llvm.hexagon.V6.vmpyiewh.acc(<16 x i32> %690, <16 x i32> %691, <16 x i32> %692)
+ store volatile <16 x i32> %693, <16 x i32>* @VectorResult, align 64
+ %694 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %695 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %696 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+ %697 = call <16 x i32> @llvm.hexagon.V6.vmpyiewuh.acc(<16 x i32> %694, <16 x i32> %695, <16 x i32> %696)
+ store volatile <16 x i32> %697, <16 x i32>* @VectorResult, align 64
+ %698 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %699 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %700 = call <16 x i32> @llvm.hexagon.V6.vmpyieoh(<16 x i32> %698, <16 x i32> %699)
+ store volatile <16 x i32> %700, <16 x i32>* @VectorResult, align 64
+ %701 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %702 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %703 = call <16 x i32> @llvm.hexagon.V6.vmpyiowh(<16 x i32> %701, <16 x i32> %702)
+ store volatile <16 x i32> %703, <16 x i32>* @VectorResult, align 64
+ %704 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %705 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %706 = call <16 x i32> @llvm.hexagon.V6.vmpyowh.rnd(<16 x i32> %704, <16 x i32> %705)
+ store volatile <16 x i32> %706, <16 x i32>* @VectorResult, align 64
+ %707 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %708 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %709 = call <16 x i32> @llvm.hexagon.V6.vmpyowh(<16 x i32> %707, <16 x i32> %708)
+ store volatile <16 x i32> %709, <16 x i32>* @VectorResult, align 64
+ %710 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %711 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %712 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+ %713 = call <16 x i32> @llvm.hexagon.V6.vmpyowh.rnd.sacc(<16 x i32> %710, <16 x i32> %711, <16 x i32> %712)
+ store volatile <16 x i32> %713, <16 x i32>* @VectorResult, align 64
+ %714 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %715 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %716 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+ %717 = call <16 x i32> @llvm.hexagon.V6.vmpyowh.sacc(<16 x i32> %714, <16 x i32> %715, <16 x i32> %716)
+ store volatile <16 x i32> %717, <16 x i32>* @VectorResult, align 64
+ %718 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %719 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %720 = call <16 x i32> @llvm.hexagon.V6.vnavgw(<16 x i32> %718, <16 x i32> %719)
+ store volatile <16 x i32> %720, <16 x i32>* @VectorResult, align 64
+ %721 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %722 = call <16 x i32> @llvm.hexagon.V6.vnormamtw(<16 x i32> %721)
+ store volatile <16 x i32> %722, <16 x i32>* @VectorResult, align 64
+ %723 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %724 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %725 = call <16 x i32> @llvm.hexagon.V6.vrmpybv(<16 x i32> %723, <16 x i32> %724)
+ store volatile <16 x i32> %725, <16 x i32>* @VectorResult, align 64
+ %726 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %727 = call <16 x i32> @llvm.hexagon.V6.vrmpybus(<16 x i32> %726, i32 -1)
+ store volatile <16 x i32> %727, <16 x i32>* @VectorResult, align 64
+ %728 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %729 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %730 = call <16 x i32> @llvm.hexagon.V6.vrmpybusv(<16 x i32> %728, <16 x i32> %729)
+ store volatile <16 x i32> %730, <16 x i32>* @VectorResult, align 64
+ %731 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %732 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %733 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+ %734 = call <16 x i32> @llvm.hexagon.V6.vrmpybv.acc(<16 x i32> %731, <16 x i32> %732, <16 x i32> %733)
+ store volatile <16 x i32> %734, <16 x i32>* @VectorResult, align 64
+ %735 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %736 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %737 = call <16 x i32> @llvm.hexagon.V6.vrmpybus.acc(<16 x i32> %735, <16 x i32> %736, i32 -1)
+ store volatile <16 x i32> %737, <16 x i32>* @VectorResult, align 64
+ %738 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %739 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %740 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+ %741 = call <16 x i32> @llvm.hexagon.V6.vrmpybusv.acc(<16 x i32> %738, <16 x i32> %739, <16 x i32> %740)
+ store volatile <16 x i32> %741, <16 x i32>* @VectorResult, align 64
+ %742 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %743 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %744 = call <16 x i32> @llvm.hexagon.V6.vsubw(<16 x i32> %742, <16 x i32> %743)
+ store volatile <16 x i32> %744, <16 x i32>* @VectorResult, align 64
+ %745 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %746 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %747 = call <16 x i32> @llvm.hexagon.V6.vsubwsat(<16 x i32> %745, <16 x i32> %746)
+ store volatile <16 x i32> %747, <16 x i32>* @VectorResult, align 64
+ %748 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %749 = call <32 x i32> @llvm.hexagon.V6.vassignp(<32 x i32> %748)
+ store volatile <32 x i32> %749, <32 x i32>* @VectorPairResult, align 128
+ %750 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %751 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %752 = call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %750, <16 x i32> %751)
+ store volatile <32 x i32> %752, <32 x i32>* @VectorPairResult, align 128
+ %753 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %754 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %755 = call <32 x i32> @llvm.hexagon.V6.vdealvdd(<16 x i32> %753, <16 x i32> %754, i32 -1)
+ store volatile <32 x i32> %755, <32 x i32>* @VectorPairResult, align 128
+ %756 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %757 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %758 = call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> %756, <16 x i32> %757, i32 -1)
+ store volatile <32 x i32> %758, <32 x i32>* @VectorPairResult, align 128
+ %759 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %760 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %761 = call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> %759, <16 x i32> %760, i32 0)
+ store volatile <32 x i32> %761, <32 x i32>* @VectorPairResult, align 128
+ %762 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %763 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %764 = call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> %762, <16 x i32> %763, i32 1)
+ store volatile <32 x i32> %764, <32 x i32>* @VectorPairResult, align 128
+ %765 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %766 = bitcast <16 x i32> %765 to <512 x i1>
+ %767 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %768 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %769 = call <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1> %766, <16 x i32> %767, <16 x i32> %768)
+ store volatile <32 x i32> %769, <32 x i32>* @VectorPairResult, align 128
+ %770 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %771 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %772 = call <32 x i32> @llvm.hexagon.V6.vaddb.dv(<32 x i32> %770, <32 x i32> %771)
+ store volatile <32 x i32> %772, <32 x i32>* @VectorPairResult, align 128
+ %773 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %774 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %775 = call <32 x i32> @llvm.hexagon.V6.vshufoeb(<16 x i32> %773, <16 x i32> %774)
+ store volatile <32 x i32> %775, <32 x i32>* @VectorPairResult, align 128
+ %776 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %777 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %778 = call <32 x i32> @llvm.hexagon.V6.vsubb.dv(<32 x i32> %776, <32 x i32> %777)
+ store volatile <32 x i32> %778, <32 x i32>* @VectorPairResult, align 128
+ %779 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %780 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %781 = call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %779, <16 x i32> %780)
+ store volatile <32 x i32> %781, <32 x i32>* @VectorPairResult, align 128
+ %782 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %783 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %784 = call <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32> %782, <32 x i32> %783)
+ store volatile <32 x i32> %784, <32 x i32>* @VectorPairResult, align 128
+ %785 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %786 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %787 = call <32 x i32> @llvm.hexagon.V6.vaddhsat.dv(<32 x i32> %785, <32 x i32> %786)
+ store volatile <32 x i32> %787, <32 x i32>* @VectorPairResult, align 128
+ %788 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %789 = call <32 x i32> @llvm.hexagon.V6.vdmpybus.dv(<32 x i32> %788, i32 -1)
+ store volatile <32 x i32> %789, <32 x i32>* @VectorPairResult, align 128
+ %790 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %791 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %792 = call <32 x i32> @llvm.hexagon.V6.vdmpybus.dv.acc(<32 x i32> %790, <32 x i32> %791, i32 -1)
+ store volatile <32 x i32> %792, <32 x i32>* @VectorPairResult, align 128
+ %793 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %794 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %795 = call <32 x i32> @llvm.hexagon.V6.vlutvwh(<16 x i32> %793, <16 x i32> %794, i32 -1)
+ store volatile <32 x i32> %795, <32 x i32>* @VectorPairResult, align 128
+ %796 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %797 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %798 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %799 = call <32 x i32> @llvm.hexagon.V6.vlutvwh.oracc(<32 x i32> %796, <16 x i32> %797, <16 x i32> %798, i32 -1)
+ store volatile <32 x i32> %799, <32 x i32>* @VectorPairResult, align 128
+ %800 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %801 = call <32 x i32> @llvm.hexagon.V6.vmpabus(<32 x i32> %800, i32 -1)
+ store volatile <32 x i32> %801, <32 x i32>* @VectorPairResult, align 128
+ %802 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %803 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %804 = call <32 x i32> @llvm.hexagon.V6.vmpabusv(<32 x i32> %802, <32 x i32> %803)
+ store volatile <32 x i32> %804, <32 x i32>* @VectorPairResult, align 128
+ %805 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %806 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %807 = call <32 x i32> @llvm.hexagon.V6.vmpabuuv(<32 x i32> %805, <32 x i32> %806)
+ store volatile <32 x i32> %807, <32 x i32>* @VectorPairResult, align 128
+ %808 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %809 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %810 = call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %808, <32 x i32> %809, i32 -1)
+ store volatile <32 x i32> %810, <32 x i32>* @VectorPairResult, align 128
+ %811 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %812 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %813 = call <32 x i32> @llvm.hexagon.V6.vmpybv(<16 x i32> %811, <16 x i32> %812)
+ store volatile <32 x i32> %813, <32 x i32>* @VectorPairResult, align 128
+ %814 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %815 = call <32 x i32> @llvm.hexagon.V6.vmpybus(<16 x i32> %814, i32 -1)
+ store volatile <32 x i32> %815, <32 x i32>* @VectorPairResult, align 128
+ %816 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %817 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %818 = call <32 x i32> @llvm.hexagon.V6.vmpybusv(<16 x i32> %816, <16 x i32> %817)
+ store volatile <32 x i32> %818, <32 x i32>* @VectorPairResult, align 128
+ %819 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %820 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %821 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %822 = call <32 x i32> @llvm.hexagon.V6.vmpybv.acc(<32 x i32> %819, <16 x i32> %820, <16 x i32> %821)
+ store volatile <32 x i32> %822, <32 x i32>* @VectorPairResult, align 128
+ %823 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %824 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %825 = call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %823, <16 x i32> %824, i32 -1)
+ store volatile <32 x i32> %825, <32 x i32>* @VectorPairResult, align 128
+ %826 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %827 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %828 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %829 = call <32 x i32> @llvm.hexagon.V6.vmpybusv.acc(<32 x i32> %826, <16 x i32> %827, <16 x i32> %828)
+ store volatile <32 x i32> %829, <32 x i32>* @VectorPairResult, align 128
+ %830 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %831 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %832 = call <32 x i32> @llvm.hexagon.V6.vshufoeh(<16 x i32> %830, <16 x i32> %831)
+ store volatile <32 x i32> %832, <32 x i32>* @VectorPairResult, align 128
+ %833 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %834 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %835 = call <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32> %833, <16 x i32> %834)
+ store volatile <32 x i32> %835, <32 x i32>* @VectorPairResult, align 128
+ %836 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %837 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %838 = call <32 x i32> @llvm.hexagon.V6.vsubh.dv(<32 x i32> %836, <32 x i32> %837)
+ store volatile <32 x i32> %838, <32 x i32>* @VectorPairResult, align 128
+ %839 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %840 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %841 = call <32 x i32> @llvm.hexagon.V6.vsubhsat.dv(<32 x i32> %839, <32 x i32> %840)
+ store volatile <32 x i32> %841, <32 x i32>* @VectorPairResult, align 128
+ %842 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %843 = call <32 x i32> @llvm.hexagon.V6.vsb(<16 x i32> %842)
+ store volatile <32 x i32> %843, <32 x i32>* @VectorPairResult, align 128
+ %844 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %845 = call <32 x i32> @llvm.hexagon.V6.vtmpyb(<32 x i32> %844, i32 -1)
+ store volatile <32 x i32> %845, <32 x i32>* @VectorPairResult, align 128
+ %846 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %847 = call <32 x i32> @llvm.hexagon.V6.vtmpybus(<32 x i32> %846, i32 -1)
+ store volatile <32 x i32> %847, <32 x i32>* @VectorPairResult, align 128
+ %848 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %849 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %850 = call <32 x i32> @llvm.hexagon.V6.vtmpyb.acc(<32 x i32> %848, <32 x i32> %849, i32 -1)
+ store volatile <32 x i32> %850, <32 x i32>* @VectorPairResult, align 128
+ %851 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %852 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %853 = call <32 x i32> @llvm.hexagon.V6.vtmpybus.acc(<32 x i32> %851, <32 x i32> %852, i32 -1)
+ store volatile <32 x i32> %853, <32 x i32>* @VectorPairResult, align 128
+ %854 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %855 = call <32 x i32> @llvm.hexagon.V6.vunpackb(<16 x i32> %854)
+ store volatile <32 x i32> %855, <32 x i32>* @VectorPairResult, align 128
+ %856 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %857 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %858 = call <32 x i32> @llvm.hexagon.V6.vunpackob(<32 x i32> %856, <16 x i32> %857)
+ store volatile <32 x i32> %858, <32 x i32>* @VectorPairResult, align 128
+ %859 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %860 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %861 = call <32 x i32> @llvm.hexagon.V6.vaddubsat.dv(<32 x i32> %859, <32 x i32> %860)
+ store volatile <32 x i32> %861, <32 x i32>* @VectorPairResult, align 128
+ %862 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %863 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %864 = call <32 x i32> @llvm.hexagon.V6.vsububsat.dv(<32 x i32> %862, <32 x i32> %863)
+ store volatile <32 x i32> %864, <32 x i32>* @VectorPairResult, align 128
+ %865 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %866 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %867 = call <32 x i32> @llvm.hexagon.V6.vadduhsat.dv(<32 x i32> %865, <32 x i32> %866)
+ store volatile <32 x i32> %867, <32 x i32>* @VectorPairResult, align 128
+ %868 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %869 = call <32 x i32> @llvm.hexagon.V6.vmpyub(<16 x i32> %868, i32 -1)
+ store volatile <32 x i32> %869, <32 x i32>* @VectorPairResult, align 128
+ %870 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %871 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %872 = call <32 x i32> @llvm.hexagon.V6.vmpyubv(<16 x i32> %870, <16 x i32> %871)
+ store volatile <32 x i32> %872, <32 x i32>* @VectorPairResult, align 128
+ %873 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %874 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %875 = call <32 x i32> @llvm.hexagon.V6.vmpyub.acc(<32 x i32> %873, <16 x i32> %874, i32 -1)
+ store volatile <32 x i32> %875, <32 x i32>* @VectorPairResult, align 128
+ %876 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %877 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %878 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %879 = call <32 x i32> @llvm.hexagon.V6.vmpyubv.acc(<32 x i32> %876, <16 x i32> %877, <16 x i32> %878)
+ store volatile <32 x i32> %879, <32 x i32>* @VectorPairResult, align 128
+ %880 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %881 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %882 = call <32 x i32> @llvm.hexagon.V6.vsubuhsat.dv(<32 x i32> %880, <32 x i32> %881)
+ store volatile <32 x i32> %882, <32 x i32>* @VectorPairResult, align 128
+ %883 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %884 = call <32 x i32> @llvm.hexagon.V6.vunpackub(<16 x i32> %883)
+ store volatile <32 x i32> %884, <32 x i32>* @VectorPairResult, align 128
+ %885 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %886 = call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %885)
+ store volatile <32 x i32> %886, <32 x i32>* @VectorPairResult, align 128
+ %887 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %888 = call <32 x i32> @llvm.hexagon.V6.vdsaduh(<32 x i32> %887, i32 -1)
+ store volatile <32 x i32> %888, <32 x i32>* @VectorPairResult, align 128
+ %889 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %890 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %891 = call <32 x i32> @llvm.hexagon.V6.vdsaduh.acc(<32 x i32> %889, <32 x i32> %890, i32 -1)
+ store volatile <32 x i32> %891, <32 x i32>* @VectorPairResult, align 128
+ %892 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %893 = call <32 x i32> @llvm.hexagon.V6.vmpyuh(<16 x i32> %892, i32 -1)
+ store volatile <32 x i32> %893, <32 x i32>* @VectorPairResult, align 128
+ %894 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %895 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %896 = call <32 x i32> @llvm.hexagon.V6.vmpyuhv(<16 x i32> %894, <16 x i32> %895)
+ store volatile <32 x i32> %896, <32 x i32>* @VectorPairResult, align 128
+ %897 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %898 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %899 = call <32 x i32> @llvm.hexagon.V6.vmpyuh.acc(<32 x i32> %897, <16 x i32> %898, i32 -1)
+ store volatile <32 x i32> %899, <32 x i32>* @VectorPairResult, align 128
+ %900 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %901 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %902 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %903 = call <32 x i32> @llvm.hexagon.V6.vmpyuhv.acc(<32 x i32> %900, <16 x i32> %901, <16 x i32> %902)
+ store volatile <32 x i32> %903, <32 x i32>* @VectorPairResult, align 128
+ %904 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %905 = call <32 x i32> @llvm.hexagon.V6.vrmpyubi(<32 x i32> %904, i32 -1, i32 0)
+ store volatile <32 x i32> %905, <32 x i32>* @VectorPairResult, align 128
+ %906 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %907 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %908 = call <32 x i32> @llvm.hexagon.V6.vrmpyubi.acc(<32 x i32> %906, <32 x i32> %907, i32 -1, i32 0)
+ store volatile <32 x i32> %908, <32 x i32>* @VectorPairResult, align 128
+ %909 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %910 = call <32 x i32> @llvm.hexagon.V6.vrsadubi(<32 x i32> %909, i32 -1, i32 0)
+ store volatile <32 x i32> %910, <32 x i32>* @VectorPairResult, align 128
+ %911 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %912 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %913 = call <32 x i32> @llvm.hexagon.V6.vrsadubi.acc(<32 x i32> %911, <32 x i32> %912, i32 -1, i32 0)
+ store volatile <32 x i32> %913, <32 x i32>* @VectorPairResult, align 128
+ %914 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %915 = call <32 x i32> @llvm.hexagon.V6.vunpackuh(<16 x i32> %914)
+ store volatile <32 x i32> %915, <32 x i32>* @VectorPairResult, align 128
+ %916 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %917 = call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %916)
+ store volatile <32 x i32> %917, <32 x i32>* @VectorPairResult, align 128
+ %918 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %919 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %920 = call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %918, <16 x i32> %919)
+ store volatile <32 x i32> %920, <32 x i32>* @VectorPairResult, align 128
+ %921 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %922 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %923 = call <32 x i32> @llvm.hexagon.V6.vadduhw(<16 x i32> %921, <16 x i32> %922)
+ store volatile <32 x i32> %923, <32 x i32>* @VectorPairResult, align 128
+ %924 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %925 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %926 = call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %924, <32 x i32> %925)
+ store volatile <32 x i32> %926, <32 x i32>* @VectorPairResult, align 128
+ %927 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %928 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %929 = call <32 x i32> @llvm.hexagon.V6.vaddwsat.dv(<32 x i32> %927, <32 x i32> %928)
+ store volatile <32 x i32> %929, <32 x i32>* @VectorPairResult, align 128
+ %930 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %931 = call <32 x i32> @llvm.hexagon.V6.vdmpyhb.dv(<32 x i32> %930, i32 -1)
+ store volatile <32 x i32> %931, <32 x i32>* @VectorPairResult, align 128
+ %932 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %933 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %934 = call <32 x i32> @llvm.hexagon.V6.vdmpyhb.dv.acc(<32 x i32> %932, <32 x i32> %933, i32 -1)
+ store volatile <32 x i32> %934, <32 x i32>* @VectorPairResult, align 128
+ %935 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %936 = call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %935, i32 -1)
+ store volatile <32 x i32> %936, <32 x i32>* @VectorPairResult, align 128
+ %937 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %938 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %939 = call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %937, <32 x i32> %938, i32 -1)
+ store volatile <32 x i32> %939, <32 x i32>* @VectorPairResult, align 128
+ %940 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %941 = call <32 x i32> @llvm.hexagon.V6.vmpyh(<16 x i32> %940, i32 -1)
+ store volatile <32 x i32> %941, <32 x i32>* @VectorPairResult, align 128
+ %942 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %943 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %944 = call <32 x i32> @llvm.hexagon.V6.vmpyhv(<16 x i32> %942, <16 x i32> %943)
+ store volatile <32 x i32> %944, <32 x i32>* @VectorPairResult, align 128
+ %945 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %946 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %947 = call <32 x i32> @llvm.hexagon.V6.vmpyhus(<16 x i32> %945, <16 x i32> %946)
+ store volatile <32 x i32> %947, <32 x i32>* @VectorPairResult, align 128
+ %948 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %949 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %950 = call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %948, <16 x i32> %949, i32 -1)
+ store volatile <32 x i32> %950, <32 x i32>* @VectorPairResult, align 128
+ %951 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %952 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %953 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %954 = call <32 x i32> @llvm.hexagon.V6.vmpyhv.acc(<32 x i32> %951, <16 x i32> %952, <16 x i32> %953)
+ store volatile <32 x i32> %954, <32 x i32>* @VectorPairResult, align 128
+ %955 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %956 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %957 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %958 = call <32 x i32> @llvm.hexagon.V6.vmpyhus.acc(<32 x i32> %955, <16 x i32> %956, <16 x i32> %957)
+ store volatile <32 x i32> %958, <32 x i32>* @VectorPairResult, align 128
+ %959 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %960 = call <32 x i32> @llvm.hexagon.V6.vrmpybusi(<32 x i32> %959, i32 -1, i32 0)
+ store volatile <32 x i32> %960, <32 x i32>* @VectorPairResult, align 128
+ %961 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %962 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %963 = call <32 x i32> @llvm.hexagon.V6.vrmpybusi.acc(<32 x i32> %961, <32 x i32> %962, i32 -1, i32 0)
+ store volatile <32 x i32> %963, <32 x i32>* @VectorPairResult, align 128
+ %964 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %965 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %966 = call <32 x i32> @llvm.hexagon.V6.vsubhw(<16 x i32> %964, <16 x i32> %965)
+ store volatile <32 x i32> %966, <32 x i32>* @VectorPairResult, align 128
+ %967 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %968 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+ %969 = call <32 x i32> @llvm.hexagon.V6.vsubuhw(<16 x i32> %967, <16 x i32> %968)
+ store volatile <32 x i32> %969, <32 x i32>* @VectorPairResult, align 128
+ %970 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %971 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %972 = call <32 x i32> @llvm.hexagon.V6.vsubw.dv(<32 x i32> %970, <32 x i32> %971)
+ store volatile <32 x i32> %972, <32 x i32>* @VectorPairResult, align 128
+ %973 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %974 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %975 = call <32 x i32> @llvm.hexagon.V6.vsubwsat.dv(<32 x i32> %973, <32 x i32> %974)
+ store volatile <32 x i32> %975, <32 x i32>* @VectorPairResult, align 128
+ %976 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %977 = call <32 x i32> @llvm.hexagon.V6.vsh(<16 x i32> %976)
+ store volatile <32 x i32> %977, <32 x i32>* @VectorPairResult, align 128
+ %978 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %979 = call <32 x i32> @llvm.hexagon.V6.vtmpyhb(<32 x i32> %978, i32 -1)
+ store volatile <32 x i32> %979, <32 x i32>* @VectorPairResult, align 128
+ %980 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %981 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+ %982 = call <32 x i32> @llvm.hexagon.V6.vtmpyhb.acc(<32 x i32> %980, <32 x i32> %981, i32 -1)
+ store volatile <32 x i32> %982, <32 x i32>* @VectorPairResult, align 128
+ %983 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %984 = call <32 x i32> @llvm.hexagon.V6.vunpackh(<16 x i32> %983)
+ store volatile <32 x i32> %984, <32 x i32>* @VectorPairResult, align 128
+ %985 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+ %986 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+ %987 = call <32 x i32> @llvm.hexagon.V6.vunpackoh(<32 x i32> %985, <16 x i32> %986)
+ store volatile <32 x i32> %987, <32 x i32>* @VectorPairResult, align 128
+ ret i32 0
+}
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1>, <512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1>, <512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vassign(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vand(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdelta(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlalignb(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vor(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrdelta(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vror(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vxor(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vd0() #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrhbrndsat(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdealb(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdealb4w(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlutvvb(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlutvvb.oracc(<16 x i32>, <16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vnavgub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackhb.sat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackeb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackob(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vroundhb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vshuffb(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vshuffob(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsh.sat(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddhsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaslh(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaslhv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrh(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrhv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrwhrndsat(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrwhsat(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavghrnd(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdealh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpybus(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlsrhv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmaxh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vminh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyhsrs(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyhss(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyhvsrs(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyihb(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyih(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyihb.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyih.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vnormamth(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackwh.sat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackeh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackoh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpopcounth(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vroundwh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vshuffh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vshufeh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vshufoh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubhsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddubsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrhubrndsat(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavgub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavgubrnd(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackhub.sat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vroundhub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsububsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsdiffh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsdiffuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vadduhsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrwuhsat(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavguh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavguhrnd(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vcl0h(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlsrh(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmaxuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vminuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackwuh.sat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vroundwuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubuhsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsdiffw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vcl0w(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpyub(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpyubv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpyub.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpyubv.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsw(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsw.sat(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddwsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaslw(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaslwv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaslw.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrwv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrw.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavgw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavgwrnd(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhb(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhsat(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhsusat(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhvsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhisat(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhsuisat(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhb.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhsat.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhsusat.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhvsat.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhisat.acc(<16 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhsuisat.acc(<16 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vinsertwr(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlsrwv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmaxw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vminw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyewuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiwh(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiwh.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiewuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiewh.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiewuh.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyieoh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiowh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyowh.rnd(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyowh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyowh.rnd.sacc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyowh.sacc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vnavgw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vnormamtw(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpybv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpybus(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpybusv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpybv.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpybus.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpybusv.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubwsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vassignp(<32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdealvdd(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddb.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vshufoeb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubb.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddhsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdmpybus.dv(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdmpybus.dv.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vlutvwh(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vlutvwh.oracc(<32 x i32>, <16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpabus(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpabusv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpabuuv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpybv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpybus(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpybusv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpybv.acc(<32 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpybusv.acc(<32 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vshufoeh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubh.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubhsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsb(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vtmpyb(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vtmpybus(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vtmpyb.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vtmpybus.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vunpackb(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vunpackob(<32 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddubsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsububsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vadduhsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyub(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyubv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyub.acc(<32 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyubv.acc(<32 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubuhsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vunpackub(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdsaduh(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdsaduh.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyuh(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyuhv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyuh.acc(<32 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyuhv.acc(<32 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vrmpyubi(<32 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vrmpyubi.acc(<32 x i32>, <32 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vrsadubi(<32 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vrsadubi.acc(<32 x i32>, <32 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vunpackuh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vadduhw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddwsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdmpyhb.dv(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdmpyhb.dv.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyh(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyhv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyhus(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyhv.acc(<32 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyhus.acc(<32 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vrmpybusi(<32 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vrmpybusi.acc(<32 x i32>, <32 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubhw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubuhw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubw.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubwsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vtmpyhb(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vtmpyhb.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vunpackh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vunpackoh(<32 x i32>, <16 x i32>) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/Hexagon/v60Vasr.ll b/test/CodeGen/Hexagon/v60Vasr.ll
new file mode 100644
index 000000000000..fb177f614f72
--- /dev/null
+++ b/test/CodeGen/Hexagon/v60Vasr.ll
@@ -0,0 +1,247 @@
+; RUN: llc -march=hexagon -O2 -mcpu=hexagonv60 < %s | FileCheck %s
+
+; CHECK: vasr(v{{[0-9]+}}.h,v{{[0-9]+}}.h,r{{[0-7]+}}):sat
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32"
+target triple = "hexagon-unknown--elf"
+
+%struct.buffer_t = type { i64, i8*, [4 x i32], [4 x i32], [4 x i32], i32, i8, i8, [6 x i8] }
+
+; Function Attrs: norecurse nounwind
+define i32 @__test_vasr(%struct.buffer_t* noalias nocapture %f.buffer, %struct.buffer_t* noalias nocapture %g.buffer, %struct.buffer_t* noalias nocapture %res.buffer) #0 {
+entry:
+ %buf_host = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 1
+ %f.host = load i8*, i8** %buf_host, align 4
+ %buf_dev = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 0
+ %f.dev = load i64, i64* %buf_dev, align 8
+ %0 = icmp eq i8* %f.host, null
+ %1 = icmp eq i64 %f.dev, 0
+ %f.host_and_dev_are_null = and i1 %0, %1
+ %buf_min = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 0
+ %f.min.0 = load i32, i32* %buf_min, align 4
+ %buf_host10 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 1
+ %g.host = load i8*, i8** %buf_host10, align 4
+ %buf_dev11 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 0
+ %g.dev = load i64, i64* %buf_dev11, align 8
+ %2 = icmp eq i8* %g.host, null
+ %3 = icmp eq i64 %g.dev, 0
+ %g.host_and_dev_are_null = and i1 %2, %3
+ %buf_min22 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 0
+ %g.min.0 = load i32, i32* %buf_min22, align 4
+ %buf_host27 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 1
+ %res.host = load i8*, i8** %buf_host27, align 4
+ %buf_dev28 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 0
+ %res.dev = load i64, i64* %buf_dev28, align 8
+ %4 = icmp eq i8* %res.host, null
+ %5 = icmp eq i64 %res.dev, 0
+ %res.host_and_dev_are_null = and i1 %4, %5
+ %buf_extent31 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 0
+ %res.extent.0 = load i32, i32* %buf_extent31, align 4
+ %buf_min39 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 0
+ %res.min.0 = load i32, i32* %buf_min39, align 4
+ %6 = add nsw i32 %res.extent.0, -1
+ %7 = and i32 %6, -64
+ %8 = add i32 %res.min.0, 63
+ %9 = add i32 %8, %7
+ %10 = add nsw i32 %res.min.0, %res.extent.0
+ %11 = add nsw i32 %10, -1
+ %12 = icmp slt i32 %9, %11
+ %13 = select i1 %12, i32 %9, i32 %11
+ %14 = add nsw i32 %10, -64
+ %15 = icmp slt i32 %res.min.0, %14
+ %16 = select i1 %15, i32 %res.min.0, i32 %14
+ %f.extent.0.required.s = sub nsw i32 %13, %16
+ br i1 %f.host_and_dev_are_null, label %true_bb, label %after_bb
+
+true_bb: ; preds = %entry
+ %buf_elem_size44 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 5
+ store i32 1, i32* %buf_elem_size44, align 4
+ store i32 %16, i32* %buf_min, align 4
+ %17 = add nsw i32 %f.extent.0.required.s, 1
+ %buf_extent46 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 0
+ store i32 %17, i32* %buf_extent46, align 4
+ %buf_stride47 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 0
+ store i32 1, i32* %buf_stride47, align 4
+ %buf_min48 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 1
+ store i32 0, i32* %buf_min48, align 4
+ %buf_extent49 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 1
+ store i32 0, i32* %buf_extent49, align 4
+ %buf_stride50 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 1
+ store i32 0, i32* %buf_stride50, align 4
+ %buf_min51 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 2
+ store i32 0, i32* %buf_min51, align 4
+ %buf_extent52 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 2
+ store i32 0, i32* %buf_extent52, align 4
+ %buf_stride53 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 2
+ store i32 0, i32* %buf_stride53, align 4
+ %buf_min54 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 3
+ store i32 0, i32* %buf_min54, align 4
+ %buf_extent55 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 3
+ store i32 0, i32* %buf_extent55, align 4
+ %buf_stride56 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 3
+ store i32 0, i32* %buf_stride56, align 4
+ br label %after_bb
+
+after_bb: ; preds = %true_bb, %entry
+ br i1 %g.host_and_dev_are_null, label %true_bb57, label %after_bb59
+
+true_bb57: ; preds = %after_bb
+ %buf_elem_size60 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 5
+ store i32 1, i32* %buf_elem_size60, align 4
+ store i32 %16, i32* %buf_min22, align 4
+ %18 = add nsw i32 %f.extent.0.required.s, 1
+ %buf_extent62 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 0
+ store i32 %18, i32* %buf_extent62, align 4
+ %buf_stride63 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 0
+ store i32 1, i32* %buf_stride63, align 4
+ %buf_min64 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 1
+ store i32 0, i32* %buf_min64, align 4
+ %buf_extent65 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 1
+ store i32 0, i32* %buf_extent65, align 4
+ %buf_stride66 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 1
+ store i32 0, i32* %buf_stride66, align 4
+ %buf_min67 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 2
+ store i32 0, i32* %buf_min67, align 4
+ %buf_extent68 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 2
+ store i32 0, i32* %buf_extent68, align 4
+ %buf_stride69 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 2
+ store i32 0, i32* %buf_stride69, align 4
+ %buf_min70 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 3
+ store i32 0, i32* %buf_min70, align 4
+ %buf_extent71 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 3
+ store i32 0, i32* %buf_extent71, align 4
+ %buf_stride72 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 3
+ store i32 0, i32* %buf_stride72, align 4
+ br label %after_bb59
+
+after_bb59: ; preds = %true_bb57, %after_bb
+ br i1 %res.host_and_dev_are_null, label %after_bb75.thread, label %after_bb75
+
+after_bb75.thread: ; preds = %after_bb59
+ %buf_elem_size76 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 5
+ store i32 1, i32* %buf_elem_size76, align 4
+ store i32 %16, i32* %buf_min39, align 4
+ %19 = add nsw i32 %f.extent.0.required.s, 1
+ store i32 %19, i32* %buf_extent31, align 4
+ %buf_stride79 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 0
+ store i32 1, i32* %buf_stride79, align 4
+ %buf_min80 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 1
+ store i32 0, i32* %buf_min80, align 4
+ %buf_extent81 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 1
+ store i32 0, i32* %buf_extent81, align 4
+ %buf_stride82 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 1
+ store i32 0, i32* %buf_stride82, align 4
+ %buf_min83 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 2
+ store i32 0, i32* %buf_min83, align 4
+ %buf_extent84 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 2
+ store i32 0, i32* %buf_extent84, align 4
+ %buf_stride85 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 2
+ store i32 0, i32* %buf_stride85, align 4
+ %buf_min86 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 3
+ store i32 0, i32* %buf_min86, align 4
+ %buf_extent87 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 3
+ store i32 0, i32* %buf_extent87, align 4
+ %buf_stride88 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 3
+ store i32 0, i32* %buf_stride88, align 4
+ br label %destructor_block
+
+after_bb75: ; preds = %after_bb59
+ %20 = or i1 %f.host_and_dev_are_null, %g.host_and_dev_are_null
+ br i1 %20, label %destructor_block, label %"produce res"
+
+"produce res": ; preds = %after_bb75
+ %21 = ashr i32 %res.extent.0, 6
+ %22 = icmp sgt i32 %21, 0
+ br i1 %22, label %"for res.s0.x.x", label %"end for res.s0.x.x", !prof !4
+
+"for res.s0.x.x": ; preds = %"for res.s0.x.x", %"produce res"
+ %res.s0.x.x = phi i32 [ %41, %"for res.s0.x.x" ], [ 0, %"produce res" ]
+ %23 = shl nsw i32 %res.s0.x.x, 6
+ %24 = add nsw i32 %23, %res.min.0
+ %25 = sub nsw i32 %24, %f.min.0
+ %26 = getelementptr inbounds i8, i8* %f.host, i32 %25
+ %27 = bitcast i8* %26 to <16 x i32>*
+ %28 = load <16 x i32>, <16 x i32>* %27, align 1, !tbaa !5
+ %29 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %28)
+ %30 = sub nsw i32 %24, %g.min.0
+ %31 = getelementptr inbounds i8, i8* %g.host, i32 %30
+ %32 = bitcast i8* %31 to <16 x i32>*
+ %33 = load <16 x i32>, <16 x i32>* %32, align 1, !tbaa !8
+ %34 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %33)
+ %35 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32> %29, <32 x i32> %34)
+ %36 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %35)
+ %37 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %35)
+ %38 = tail call <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32> %36, <16 x i32> %37, i32 4)
+ %39 = getelementptr inbounds i8, i8* %res.host, i32 %23
+ %40 = bitcast i8* %39 to <16 x i32>*
+ store <16 x i32> %38, <16 x i32>* %40, align 1, !tbaa !10
+ %41 = add nuw nsw i32 %res.s0.x.x, 1
+ %42 = icmp eq i32 %41, %21
+ br i1 %42, label %"end for res.s0.x.x", label %"for res.s0.x.x"
+
+"end for res.s0.x.x": ; preds = %"for res.s0.x.x", %"produce res"
+ %43 = add nsw i32 %res.extent.0, 63
+ %44 = ashr i32 %43, 6
+ %45 = icmp sgt i32 %44, %21
+ br i1 %45, label %"for res.s0.x.x92.preheader", label %destructor_block, !prof !4
+
+"for res.s0.x.x92.preheader": ; preds = %"end for res.s0.x.x"
+ %46 = sub i32 -64, %f.min.0
+ %47 = add i32 %46, %10
+ %48 = getelementptr inbounds i8, i8* %f.host, i32 %47
+ %49 = bitcast i8* %48 to <16 x i32>*
+ %50 = load <16 x i32>, <16 x i32>* %49, align 1
+ %51 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %50)
+ %52 = sub i32 -64, %g.min.0
+ %53 = add i32 %52, %10
+ %54 = getelementptr inbounds i8, i8* %g.host, i32 %53
+ %55 = bitcast i8* %54 to <16 x i32>*
+ %56 = load <16 x i32>, <16 x i32>* %55, align 1
+ %57 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %56)
+ %58 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32> %51, <32 x i32> %57)
+ %59 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %58)
+ %60 = add nsw i32 %res.extent.0, -64
+ %61 = getelementptr inbounds i8, i8* %res.host, i32 %60
+ %62 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %58)
+ %63 = tail call <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32> %62, <16 x i32> %59, i32 4)
+ %64 = bitcast i8* %61 to <16 x i32>*
+ store <16 x i32> %63, <16 x i32>* %64, align 1, !tbaa !10
+ br label %destructor_block
+
+destructor_block: ; preds = %"for res.s0.x.x92.preheader", %"end for res.s0.x.x", %after_bb75, %after_bb75.thread
+ ret i32 0
+}
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32>, <16 x i32>, i32) #1
+
+attributes #0 = { norecurse nounwind }
+attributes #1 = { nounwind readnone }
+
+!llvm.ident = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0}
+!llvm.module.flags = !{!1, !2, !3}
+
+!0 = !{!"Clang $LLVM_VERSION_MAJOR.$LLVM_VERSION_MINOR (based on LLVM 3.8.0)"}
+!1 = !{i32 2, !"halide_use_soft_float_abi", i32 0}
+!2 = !{i32 2, !"halide_mcpu", !"hexagonv60"}
+!3 = !{i32 2, !"halide_mattrs", !"+hvx"}
+!4 = !{!"branch_weights", i32 1073741824, i32 0}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"f", !7}
+!7 = !{!"Halide buffer"}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"g", !7}
+!10 = !{!11, !11, i64 0}
+!11 = !{!"res", !7}
diff --git a/test/CodeGen/Hexagon/v60small.ll b/test/CodeGen/Hexagon/v60small.ll
new file mode 100644
index 000000000000..8a6a6155a399
--- /dev/null
+++ b/test/CodeGen/Hexagon/v60small.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=hexagon -O2 -mcpu=hexagonv60 < %s | FileCheck %s
+
+; CHECK: q{{[0-3]}} = v{{[0-9]*}}and(v{{[0-9]*}},r{{[0-9]*}})
+target datalayout = "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32"
+target triple = "hexagon"
+
+@K = global i64 0, align 8
+@src = global i8 -1, align 1
+@vecpreds = common global [15 x <16 x i32>] zeroinitializer, align 64
+@Q6VecPredResult = common global <16 x i32> zeroinitializer, align 64
+@vectors = common global [15 x <16 x i32>] zeroinitializer, align 64
+@VectorResult = common global <16 x i32> zeroinitializer, align 64
+@vector_pairs = common global [15 x <32 x i32>] zeroinitializer, align 128
+@VectorPairResult = common global <32 x i32> zeroinitializer, align 128
+@dst_addresses = common global [15 x i8] zeroinitializer, align 8
+@ptr_addresses = common global [15 x i8*] zeroinitializer, align 8
+@src_addresses = common global [15 x i8*] zeroinitializer, align 8
+@dst = common global i8 0, align 1
+@ptr = common global [32768 x i8] zeroinitializer, align 8
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ %0 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %1 = bitcast <16 x i32> %0 to <512 x i1>
+ %2 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+ %3 = bitcast <16 x i32> %2 to <512 x i1>
+ %4 = call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %1, <512 x i1> %3)
+ %5 = bitcast <512 x i1> %4 to <16 x i32>
+ store volatile <16 x i32> %5, <16 x i32>* @Q6VecPredResult, align 64
+ %6 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+ %7 = bitcast <16 x i32> %6 to <512 x i1>
+ %8 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+ %9 = bitcast <16 x i32> %8 to <512 x i1>
+ %10 = call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %7, <512 x i1> %9)
+ %11 = bitcast <512 x i1> %10 to <16 x i32>
+ store volatile <16 x i32> %11, <16 x i32>* @Q6VecPredResult, align 64
+ ret i32 0
+
+}
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll b/test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll
index f5ee5d001510..70c4aeb4bac0 100644
--- a/test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll
+++ b/test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s
; This one should generate a combine with two immediates.
; CHECK: combine(#7, #7)
@B = common global [400 x i32] zeroinitializer, align 8
diff --git a/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll b/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll
index 16591ef68536..91b32652400f 100644
--- a/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll
+++ b/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s
; Check that store is post-incremented.
; CHECK: memuh(r{{[0-9]+}} + {{ *}}#6{{ *}})
diff --git a/test/CodeGen/Hexagon/vect/vect-shuffle.ll b/test/CodeGen/Hexagon/vect/vect-shuffle.ll
index 9d80df2e0887..bd5b2b981695 100644
--- a/test/CodeGen/Hexagon/vect/vect-shuffle.ll
+++ b/test/CodeGen/Hexagon/vect/vect-shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s
; Check that store is post-incremented.
; CHECK-NOT: extractu
diff --git a/test/CodeGen/Hexagon/vect/vect-splat.ll b/test/CodeGen/Hexagon/vect/vect-splat.ll
index 3613dbf6fdd1..8cc226a00dab 100644
--- a/test/CodeGen/Hexagon/vect/vect-splat.ll
+++ b/test/CodeGen/Hexagon/vect/vect-splat.ll
@@ -1,6 +1,6 @@
; Extracted from test/CodeGen/Generic/vector.ll: used to loop indefinitely.
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
-; CHECK: combine
+; CHECK: splat_i4
%i4 = type <4 x i32>
diff --git a/test/CodeGen/Hexagon/vect/vect-xor.ll b/test/CodeGen/Hexagon/vect/vect-xor.ll
index 961185581128..96719e683413 100644
--- a/test/CodeGen/Hexagon/vect/vect-xor.ll
+++ b/test/CodeGen/Hexagon/vect/vect-xor.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s
; Check that the parsing succeeded.
; CHECK: r{{[0-9]+:[0-9]+}} = xor(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
diff --git a/test/CodeGen/Inputs/DbgValueOtherTargets.ll b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
index efa1a0849a8e..440073fea153 100644
--- a/test/CodeGen/Inputs/DbgValueOtherTargets.ll
+++ b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
@@ -1,6 +1,6 @@
; Check that DEBUG_VALUE comments come through on a variety of targets.
-define i32 @main() nounwind ssp {
+define i32 @main() nounwind ssp !dbg !0 {
entry:
; CHECK: DEBUG_VALUE
call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !7, metadata !DIExpression()), !dbg !9
@@ -14,14 +14,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!13}
-!0 = !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !1, type: !3, function: i32 ()* @main)
+!0 = distinct !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !1, type: !3)
!1 = !DIFile(filename: "/tmp/x.c", directory: "/Users/manav")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 120996)", isOptimized: false, emissionKind: 0, file: !12, enums: !6, retainedTypes: !6, subprograms: !11)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 120996)", isOptimized: false, emissionKind: 0, file: !12, enums: !6, retainedTypes: !6, subprograms: !11)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!6 = !{}
-!7 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3, scope: !8, file: !1, type: !5)
+!7 = !DILocalVariable(name: "i", line: 3, scope: !8, file: !1, type: !5)
!8 = distinct !DILexicalBlock(line: 2, column: 12, file: !12, scope: !0)
!9 = !DILocation(line: 3, column: 11, scope: !8)
!10 = !DILocation(line: 4, column: 2, scope: !8)
diff --git a/test/CodeGen/MIR/AArch64/cfi-def-cfa.mir b/test/CodeGen/MIR/AArch64/cfi-def-cfa.mir
new file mode 100644
index 000000000000..cf7572ecad37
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/cfi-def-cfa.mir
@@ -0,0 +1,31 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the .cfi_def_cfa operands
+# correctly.
+
+--- |
+
+ declare void @foo()
+
+ define void @trivial_fp_func() {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+...
+---
+name: trivial_fp_func
+body: |
+ bb.0.entry:
+ liveins: %lr, %fp, %lr, %fp
+
+ %sp = frame-setup STPXpre killed %fp, killed %lr, %sp, -2
+ %fp = frame-setup ADDXri %sp, 0, 0
+ ; CHECK: CFI_INSTRUCTION .cfi_def_cfa %w29, 16
+ frame-setup CFI_INSTRUCTION .cfi_def_cfa %w29, 16
+ frame-setup CFI_INSTRUCTION .cfi_offset %w30, -8
+ frame-setup CFI_INSTRUCTION .cfi_offset %w29, -16
+ BL @foo, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp
+ %sp, %fp, %lr = LDPXpost %sp, 2
+ RET_ReallyLR
+...
diff --git a/test/CodeGen/MIR/AArch64/expected-target-flag-name.mir b/test/CodeGen/MIR/AArch64/expected-target-flag-name.mir
new file mode 100644
index 000000000000..b7bac2682c70
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/expected-target-flag-name.mir
@@ -0,0 +1,23 @@
+# RUN: not llc -mtriple=aarch64-none-linux-gnu -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ @var_i32 = global i32 42
+ @var_i64 = global i64 0
+
+ define i32 @sub_small() {
+ entry:
+ %val32 = load i32, i32* @var_i32
+ ret i32 %val32
+ }
+
+...
+---
+name: sub_small
+body: |
+ bb.0.entry:
+ %x8 = ADRP target-flags(aarch64-page) @var_i32
+ ; CHECK: [[@LINE+1]]:60: expected the name of the target flag
+ %w0 = LDRWui killed %x8, target-flags(aarch64-pageoff, ) @var_i32
+ RET_ReallyLR implicit %w0
+...
diff --git a/test/CodeGen/MIR/AArch64/invalid-target-flag-name.mir b/test/CodeGen/MIR/AArch64/invalid-target-flag-name.mir
new file mode 100644
index 000000000000..d4145b8961df
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/invalid-target-flag-name.mir
@@ -0,0 +1,23 @@
+# RUN: not llc -mtriple=aarch64-none-linux-gnu -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ @var_i32 = global i32 42
+ @var_i64 = global i64 0
+
+ define i32 @sub_small() {
+ entry:
+ %val32 = load i32, i32* @var_i32
+ ret i32 %val32
+ }
+
+...
+---
+name: sub_small
+body: |
+ bb.0.entry:
+ %x8 = ADRP target-flags(aarch64-page) @var_i32
+ ; CHECK: [[@LINE+1]]:60: use of undefined target flag 'ncc'
+ %w0 = LDRWui killed %x8, target-flags(aarch64-pageoff, ncc) @var_i32
+ RET_ReallyLR implicit %w0
+...
diff --git a/test/CodeGen/MIR/AArch64/lit.local.cfg b/test/CodeGen/MIR/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..f4f77c5aa312
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/lit.local.cfg
@@ -0,0 +1,8 @@
+import re
+
+if not 'AArch64' in config.root.targets:
+ config.unsupported = True
+
+# For now we don't test arm64-win32.
+if re.search(r'cygwin|mingw32|win32|windows-gnu|windows-msvc', config.target_triple):
+ config.unsupported = True
diff --git a/test/CodeGen/MIR/AArch64/multiple-lhs-operands.mir b/test/CodeGen/MIR/AArch64/multiple-lhs-operands.mir
new file mode 100644
index 000000000000..e23a352dff21
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/multiple-lhs-operands.mir
@@ -0,0 +1,28 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser can parse multiple register machine
+# operands before '='.
+
+--- |
+
+ declare void @foo()
+
+ define void @trivial_fp_func() {
+ entry:
+ call void @foo()
+ ret void
+ }
+
+...
+---
+name: trivial_fp_func
+body: |
+ bb.0.entry:
+ liveins: %lr, %fp, %lr, %fp
+
+ %sp = frame-setup STPXpre killed %fp, killed %lr, %sp, -2
+ %fp = frame-setup ADDXri %sp, 0, 0
+ BL @foo, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp
+ ; CHECK: %sp, %fp, %lr = LDPXpost %sp, 2
+ %sp, %fp, %lr = LDPXpost %sp, 2
+ RET_ReallyLR
+...
diff --git a/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir b/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir
new file mode 100644
index 000000000000..9471516db647
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir
@@ -0,0 +1,41 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
+
+--- |
+ @var = global i64 0
+ @local_addr = global i64* null
+
+ define void @stack_local() {
+ entry:
+ %local_var = alloca i64
+ %val = load i64, i64* @var
+ store i64 %val, i64* %local_var
+ store i64* %local_var, i64** @local_addr
+ ret void
+ }
+...
+---
+name: stack_local
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64common }
+ - { id: 1, class: gpr64 }
+ - { id: 2, class: gpr64common }
+ - { id: 3, class: gpr64common }
+frameInfo:
+ maxAlignment: 8
+# CHECK-LABEL: stack_local
+# CHECK: stack:
+# CHECK_NEXT: { id:0, name:local_var, offset:0, size:8, alignment:8, local-offset: -8 }
+stack:
+ - { id: 0,name: local_var,offset: 0,size: 8,alignment: 8, local-offset: -8 }
+body: |
+ bb.0.entry:
+ %0 = ADRP @var
+ %1 = LDRXui killed %0, @var :: (load 8 from @var)
+ STRXui killed %1, %stack.0.local_var, 0 :: (store 8 into %ir.local_var)
+ %2 = ADRP @local_addr
+ %3 = ADDXri %stack.0.local_var, 0, 0
+ STRXui killed %3, killed %2, @local_addr :: (store 8 into @local_addr)
+ RET_ReallyLR
+...
diff --git a/test/CodeGen/MIR/AArch64/target-flags.mir b/test/CodeGen/MIR/AArch64/target-flags.mir
new file mode 100644
index 000000000000..e96fce7c2f2b
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/target-flags.mir
@@ -0,0 +1,39 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+
+--- |
+
+ @var_i32 = global i32 42
+ @var_i64 = global i64 0
+
+ define void @sub_small() {
+ entry:
+ %val32 = load i32, i32* @var_i32
+ %newval32 = sub i32 %val32, 4095
+ store i32 %newval32, i32* @var_i32
+ %val64 = load i64, i64* @var_i64
+ %newval64 = sub i64 %val64, 52
+ store i64 %newval64, i64* @var_i64
+ ret void
+ }
+
+...
+---
+name: sub_small
+body: |
+ bb.0.entry:
+ ; CHECK: %x8 = ADRP target-flags(aarch64-page) @var_i32
+ ; CHECK-NEXT: %x9 = ADRP target-flags(aarch64-page) @var_i64
+ ; CHECK-NEXT: %w10 = LDRWui %x8, target-flags(aarch64-pageoff, aarch64-nc) @var_i32
+ ; CHECK-NEXT: %x11 = LDRXui %x9, target-flags(aarch64-pageoff, aarch64-got, aarch64-nc) @var_i64
+ ; CHECK: STRWui killed %w10, killed %x8, target-flags(aarch64-nc) @var_i32
+ ; CHECK: STRXui killed %x11, killed %x9, target-flags(aarch64-pageoff, aarch64-nc) @var_i64
+ %x8 = ADRP target-flags(aarch64-page) @var_i32
+ %x9 = ADRP target-flags(aarch64-page) @var_i64
+ %w10 = LDRWui %x8, target-flags(aarch64-pageoff, aarch64-nc) @var_i32
+ %x11 = LDRXui %x9, target-flags(aarch64-pageoff, aarch64-got, aarch64-nc) @var_i64
+ %w10 = SUBWri killed %w10, 4095, 0
+ %x11 = SUBXri killed %x11, 52, 0
+ STRWui killed %w10, killed %x8, target-flags(aarch64-nc) @var_i32
+ STRXui killed %x11, killed %x9, target-flags(aarch64-pageoff, aarch64-nc) @var_i64
+ RET_ReallyLR
+...
diff --git a/test/CodeGen/MIR/AMDGPU/expected-target-index-name.mir b/test/CodeGen/MIR/AMDGPU/expected-target-index-name.mir
new file mode 100644
index 000000000000..34793880a60b
--- /dev/null
+++ b/test/CodeGen/MIR/AMDGPU/expected-target-index-name.mir
@@ -0,0 +1,64 @@
+# RUN: not llc -march=amdgcn -mcpu=SI -start-after postrapseudos -stop-after postrapseudos -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ %struct.foo = type { float, [5 x i32] }
+
+ @float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 4
+
+ define void @float(float addrspace(1)* %out, i32 %index) #0 {
+ entry:
+ %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
+ %1 = load float, float addrspace(2)* %0
+ store float %1, float addrspace(1)* %out
+ ret void
+ }
+
+ declare { i1, i64 } @llvm.SI.if(i1)
+
+ declare { i1, i64 } @llvm.SI.else(i64)
+
+ declare i64 @llvm.SI.break(i64)
+
+ declare i64 @llvm.SI.if.break(i1, i64)
+
+ declare i64 @llvm.SI.else.break(i64, i64)
+
+ declare i1 @llvm.SI.loop(i64)
+
+ declare void @llvm.SI.end.cf(i64)
+
+ attributes #0 = { "target-cpu"="SI" }
+
+...
+---
+name: float
+tracksSubRegLiveness: true
+liveins:
+ - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+ maxAlignment: 8
+body: |
+ bb.0.entry:
+ liveins: %sgpr0_sgpr1
+
+ %sgpr2_sgpr3 = S_GETPC_B64
+ ; CHECK: [[@LINE+1]]:45: expected the name of the target index
+ %sgpr2 = S_ADD_U32 %sgpr2, target-index(0), implicit-def %scc, implicit-def %scc
+ %sgpr3 = S_ADDC_U32 %sgpr3, 0, implicit-def %scc, implicit %scc, implicit-def %scc, implicit %scc
+ %sgpr4_sgpr5 = S_LSHR_B64 %sgpr2_sgpr3, 32, implicit-def dead %scc
+ %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11
+ %sgpr7 = S_ASHR_I32 %sgpr6, 31, implicit-def dead %scc
+ %sgpr6_sgpr7 = S_LSHL_B64 %sgpr6_sgpr7, 2, implicit-def dead %scc
+ %sgpr2 = S_ADD_U32 %sgpr2, @float_gv, implicit-def %scc
+ %sgpr3 = S_ADDC_U32 %sgpr4, 0, implicit-def dead %scc, implicit %scc
+ %sgpr4 = S_ADD_U32 %sgpr2, %sgpr6, implicit-def %scc
+ %sgpr5 = S_ADDC_U32 %sgpr3, %sgpr7, implicit-def dead %scc, implicit %scc
+ %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+ %sgpr7 = S_MOV_B32 61440
+ %sgpr6 = S_MOV_B32 -1
+ %vgpr0 = V_MOV_B32_e32 killed %sgpr2, implicit %exec
+ BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+ S_ENDPGM
+...
diff --git a/test/CodeGen/MIR/AMDGPU/invalid-target-index-operand.mir b/test/CodeGen/MIR/AMDGPU/invalid-target-index-operand.mir
new file mode 100644
index 000000000000..e20cf376414a
--- /dev/null
+++ b/test/CodeGen/MIR/AMDGPU/invalid-target-index-operand.mir
@@ -0,0 +1,64 @@
+# RUN: not llc -march=amdgcn -mcpu=SI -start-after postrapseudos -stop-after postrapseudos -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ %struct.foo = type { float, [5 x i32] }
+
+ @float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 4
+
+ define void @float(float addrspace(1)* %out, i32 %index) #0 {
+ entry:
+ %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
+ %1 = load float, float addrspace(2)* %0
+ store float %1, float addrspace(1)* %out
+ ret void
+ }
+
+ declare { i1, i64 } @llvm.SI.if(i1)
+
+ declare { i1, i64 } @llvm.SI.else(i64)
+
+ declare i64 @llvm.SI.break(i64)
+
+ declare i64 @llvm.SI.if.break(i1, i64)
+
+ declare i64 @llvm.SI.else.break(i64, i64)
+
+ declare i1 @llvm.SI.loop(i64)
+
+ declare void @llvm.SI.end.cf(i64)
+
+ attributes #0 = { "target-cpu"="SI" }
+
+...
+---
+name: float
+tracksSubRegLiveness: true
+liveins:
+ - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+ maxAlignment: 8
+body: |
+ bb.0.entry:
+ liveins: %sgpr0_sgpr1
+
+ %sgpr2_sgpr3 = S_GETPC_B64
+ ; CHECK: [[@LINE+1]]:45: use of undefined target index 'constdata-start'
+ %sgpr2 = S_ADD_U32 %sgpr2, target-index(constdata-start), implicit-def %scc, implicit-def %scc
+ %sgpr3 = S_ADDC_U32 %sgpr3, 0, implicit-def %scc, implicit %scc, implicit-def %scc, implicit %scc
+ %sgpr4_sgpr5 = S_LSHR_B64 %sgpr2_sgpr3, 32, implicit-def dead %scc
+ %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11
+ %sgpr7 = S_ASHR_I32 %sgpr6, 31, implicit-def dead %scc
+ %sgpr6_sgpr7 = S_LSHL_B64 %sgpr6_sgpr7, 2, implicit-def dead %scc
+ %sgpr2 = S_ADD_U32 %sgpr2, @float_gv, implicit-def %scc
+ %sgpr3 = S_ADDC_U32 %sgpr4, 0, implicit-def dead %scc, implicit %scc
+ %sgpr4 = S_ADD_U32 %sgpr2, %sgpr6, implicit-def %scc
+ %sgpr5 = S_ADDC_U32 %sgpr3, %sgpr7, implicit-def dead %scc, implicit %scc
+ %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+ %sgpr7 = S_MOV_B32 61440
+ %sgpr6 = S_MOV_B32 -1
+ %vgpr0 = V_MOV_B32_e32 killed %sgpr2, implicit %exec
+ BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+ S_ENDPGM
+...
diff --git a/test/CodeGen/MIR/AMDGPU/lit.local.cfg b/test/CodeGen/MIR/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..2a665f06be72
--- /dev/null
+++ b/test/CodeGen/MIR/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
diff --git a/test/CodeGen/MIR/AMDGPU/target-index-operands.mir b/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
new file mode 100644
index 000000000000..839fd3212c61
--- /dev/null
+++ b/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
@@ -0,0 +1,104 @@
+# RUN: llc -march=amdgcn -mcpu=SI -start-after postrapseudos -stop-after postrapseudos -o /dev/null %s | FileCheck %s
+# This test verifies that the MIR parser can parse target index operands.
+
+--- |
+
+ %struct.foo = type { float, [5 x i32] }
+
+ @float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 4
+
+ define void @float(float addrspace(1)* %out, i32 %index) #0 {
+ entry:
+ %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
+ %1 = load float, float addrspace(2)* %0
+ store float %1, float addrspace(1)* %out
+ ret void
+ }
+
+ define void @float2(float addrspace(1)* %out, i32 %index) #0 {
+ entry:
+ %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
+ %1 = load float, float addrspace(2)* %0
+ store float %1, float addrspace(1)* %out
+ ret void
+ }
+
+ declare { i1, i64 } @llvm.SI.if(i1)
+
+ declare { i1, i64 } @llvm.SI.else(i64)
+
+ declare i64 @llvm.SI.break(i64)
+
+ declare i64 @llvm.SI.if.break(i1, i64)
+
+ declare i64 @llvm.SI.else.break(i64, i64)
+
+ declare i1 @llvm.SI.loop(i64)
+
+ declare void @llvm.SI.end.cf(i64)
+
+ attributes #0 = { "target-cpu"="SI" }
+
+...
+---
+name: float
+tracksSubRegLiveness: true
+liveins:
+ - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+ maxAlignment: 8
+body: |
+ bb.0.entry:
+ liveins: %sgpr0_sgpr1
+
+ %sgpr2_sgpr3 = S_GETPC_B64
+ ; CHECK: %sgpr2 = S_ADD_U32 %sgpr2, target-index(amdgpu-constdata-start), implicit-def %scc, implicit-def %scc
+ %sgpr2 = S_ADD_U32 %sgpr2, target-index(amdgpu-constdata-start), implicit-def %scc, implicit-def %scc
+ %sgpr3 = S_ADDC_U32 %sgpr3, 0, implicit-def %scc, implicit %scc, implicit-def %scc, implicit %scc
+ %sgpr4_sgpr5 = S_LSHR_B64 %sgpr2_sgpr3, 32, implicit-def dead %scc
+ %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11
+ %sgpr7 = S_ASHR_I32 %sgpr6, 31, implicit-def dead %scc
+ %sgpr6_sgpr7 = S_LSHL_B64 %sgpr6_sgpr7, 2, implicit-def dead %scc
+ %sgpr2 = S_ADD_U32 %sgpr2, @float_gv, implicit-def %scc
+ %sgpr3 = S_ADDC_U32 %sgpr4, 0, implicit-def dead %scc, implicit %scc
+ %sgpr4 = S_ADD_U32 %sgpr2, %sgpr6, implicit-def %scc
+ %sgpr5 = S_ADDC_U32 %sgpr3, %sgpr7, implicit-def dead %scc, implicit %scc
+ %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+ %sgpr7 = S_MOV_B32 61440
+ %sgpr6 = S_MOV_B32 -1
+ %vgpr0 = V_MOV_B32_e32 killed %sgpr2, implicit %exec
+ BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+ S_ENDPGM
+...
+---
+name: float2
+tracksSubRegLiveness: true
+liveins:
+ - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+ maxAlignment: 8
+body: |
+ bb.0.entry:
+ liveins: %sgpr0_sgpr1
+
+ %sgpr2_sgpr3 = S_GETPC_B64
+ ; CHECK: %sgpr2 = S_ADD_U32 %sgpr2, target-index(amdgpu-constdata-start) + 1, implicit-def %scc, implicit-def %scc
+ %sgpr2 = S_ADD_U32 %sgpr2, target-index(amdgpu-constdata-start) + 1, implicit-def %scc, implicit-def %scc
+ %sgpr3 = S_ADDC_U32 %sgpr3, 0, implicit-def %scc, implicit %scc, implicit-def %scc, implicit %scc
+ %sgpr4_sgpr5 = S_LSHR_B64 %sgpr2_sgpr3, 32, implicit-def dead %scc
+ %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11
+ %sgpr7 = S_ASHR_I32 %sgpr6, 31, implicit-def dead %scc
+ %sgpr6_sgpr7 = S_LSHL_B64 %sgpr6_sgpr7, 2, implicit-def dead %scc
+ %sgpr2 = S_ADD_U32 %sgpr2, @float_gv, implicit-def %scc
+ %sgpr3 = S_ADDC_U32 %sgpr4, 0, implicit-def dead %scc, implicit %scc
+ %sgpr4 = S_ADD_U32 %sgpr2, %sgpr6, implicit-def %scc
+ %sgpr5 = S_ADDC_U32 %sgpr3, %sgpr7, implicit-def dead %scc, implicit %scc
+ %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+ %sgpr7 = S_MOV_B32 61440
+ %sgpr6 = S_MOV_B32 -1
+ %vgpr0 = V_MOV_B32_e32 killed %sgpr2, implicit %exec
+ BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+ S_ENDPGM
+...
diff --git a/test/CodeGen/MIR/ARM/ARMLoadStoreDBG.mir b/test/CodeGen/MIR/ARM/ARMLoadStoreDBG.mir
new file mode 100644
index 000000000000..e351713dc290
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/ARMLoadStoreDBG.mir
@@ -0,0 +1,165 @@
+# RUN: llc -start-after machine-cp -stop-after=if-converter -mtriple=thumbv7 %s -o /dev/null 2>&1 | FileCheck %s
+--- |
+ ; ModuleID = '/Volumes/Data/llvm/test/CodeGen/ARM/sched-it-debug-nodes.ll'
+ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv7"
+
+ %struct.s = type opaque
+
+ ; Function Attrs: nounwind
+ define arm_aapcscc i32 @f(%struct.s* %s, i32 %u, i8* %b, i32 %n) #0 !dbg !4 {
+ entry:
+ tail call void @llvm.dbg.value(metadata %struct.s* %s, i64 0, metadata !18, metadata !27), !dbg !28
+ tail call void @llvm.dbg.value(metadata i32 %u, i64 0, metadata !19, metadata !27), !dbg !28
+ tail call void @llvm.dbg.value(metadata i8* %b, i64 0, metadata !20, metadata !27), !dbg !28
+ tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !21, metadata !27), !dbg !28
+ %cmp = icmp ult i32 %n, 4, !dbg !29
+ br i1 %cmp, label %return, label %if.end, !dbg !31
+
+ if.end: ; preds = %entry
+ tail call arm_aapcscc void @g(%struct.s* %s, i8* %b, i32 %n) #3, !dbg !32
+ br label %return, !dbg !33
+
+ return: ; preds = %if.end, %entry
+ %retval.0 = phi i32 [ 0, %if.end ], [ -1, %entry ]
+ ret i32 %retval.0, !dbg !34
+ }
+
+ declare arm_aapcscc void @g(%struct.s*, i8*, i32) #1
+
+ ; Function Attrs: nounwind readnone
+ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #2 = { nounwind readnone }
+ attributes #3 = { nounwind }
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!22, !23, !24, !25}
+ !llvm.ident = !{!26}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (llvm/trunk 237059)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+ !1 = !DIFile(filename: "<stdin>", directory: "/Users/compnerd/Source/llvm")
+ !2 = !{}
+ !3 = !{!4}
+ !4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 9, type: !5, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, variables: !17)
+ !5 = !DISubroutineType(types: !6)
+ !6 = !{!7, !8, !11, !12, !16}
+ !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+ !8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 32, align: 32)
+ !9 = !DIDerivedType(tag: DW_TAG_typedef, name: "s", file: !1, line: 5, baseType: !10)
+ !10 = !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !1, line: 5, flags: DIFlagFwdDecl)
+ !11 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+ !12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 32, align: 32)
+ !13 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !14)
+ !14 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", file: !1, line: 2, baseType: !15)
+ !15 = !DIBasicType(name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
+ !16 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: !1, line: 3, baseType: !11)
+ !17 = !{!18, !19, !20, !21}
+ !18 = !DILocalVariable(name: "s", arg: 1, scope: !4, file: !1, line: 9, type: !8)
+ !19 = !DILocalVariable(name: "u", arg: 2, scope: !4, file: !1, line: 9, type: !11)
+ !20 = !DILocalVariable(name: "b", arg: 3, scope: !4, file: !1, line: 9, type: !12)
+ !21 = !DILocalVariable(name: "n", arg: 4, scope: !4, file: !1, line: 9, type: !16)
+ !22 = !{i32 2, !"Dwarf Version", i32 4}
+ !23 = !{i32 2, !"Debug Info Version", i32 3}
+ !24 = !{i32 1, !"wchar_size", i32 4}
+ !25 = !{i32 1, !"min_enum_size", i32 4}
+ !26 = !{!"clang version 3.7.0 (llvm/trunk 237059)"}
+ !27 = !DIExpression()
+ !28 = !DILocation(line: 9, scope: !4)
+ !29 = !DILocation(line: 10, scope: !30)
+ !30 = distinct !DILexicalBlock(scope: !4, file: !1, line: 10)
+ !31 = !DILocation(line: 10, scope: !4)
+ !32 = !DILocation(line: 13, scope: !4)
+ !33 = !DILocation(line: 14, scope: !4)
+ !34 = !DILocation(line: 15, scope: !4)
+
+...
+---
+name: f
+alignment: 1
+exposesReturnsTwice: false
+hasInlineAsm: false
+isSSA: false
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+liveins:
+ - { reg: '%r0' }
+ - { reg: '%r2' }
+ - { reg: '%r3' }
+calleeSavedRegisters: [ '%lr', '%d8', '%d9', '%d10', '%d11', '%d12', '%d13',
+ '%d14', '%d15', '%q4', '%q5', '%q6', '%q7', '%r4',
+ '%r5', '%r6', '%r7', '%r8', '%r9', '%r10', '%r11',
+ '%s16', '%s17', '%s18', '%s19', '%s20', '%s21',
+ '%s22', '%s23', '%s24', '%s25', '%s26', '%s27',
+ '%s28', '%s29', '%s30', '%s31', '%d8_d10', '%d9_d11',
+ '%d10_d12', '%d11_d13', '%d12_d14', '%d13_d15',
+ '%q4_q5', '%q5_q6', '%q6_q7', '%q4_q5_q6_q7', '%r4_r5',
+ '%r6_r7', '%r8_r9', '%r10_r11', '%d8_d9_d10', '%d9_d10_d11',
+ '%d10_d11_d12', '%d11_d12_d13', '%d12_d13_d14',
+ '%d13_d14_d15', '%d8_d10_d12', '%d9_d11_d13', '%d10_d12_d14',
+ '%d11_d13_d15', '%d8_d10_d12_d14', '%d9_d11_d13_d15',
+ '%d9_d10', '%d11_d12', '%d13_d14', '%d9_d10_d11_d12',
+ '%d11_d12_d13_d14' ]
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 8
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ savePoint: '%bb.2.if.end'
+ restorePoint: '%bb.2.if.end'
+stack:
+ - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '%lr' }
+ - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '%r7' }
+body: |
+ bb.0.entry:
+ successors: %bb.1, %bb.2.if.end
+ liveins: %r0, %r2, %r3, %lr, %r7
+
+ DBG_VALUE debug-use %r0, debug-use _, !18, !27, debug-location !28
+ DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+ DBG_VALUE debug-use %r2, debug-use _, !20, !27, debug-location !28
+ DBG_VALUE debug-use %r3, debug-use _, !21, !27, debug-location !28
+ t2CMPri %r3, 4, 14, _, implicit-def %cpsr, debug-location !31
+ t2Bcc %bb.2.if.end, 2, killed %cpsr
+
+ bb.1:
+ liveins: %lr, %r7
+
+ DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+ %r0 = t2MOVi -1, 14, _, _
+ DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+ tBX_RET 14, _, implicit %r0, debug-location !34
+
+ bb.2.if.end:
+ liveins: %r0, %r2, %r3, %r7, %lr
+
+ %sp = frame-setup t2STMDB_UPD %sp, 14, _, killed %r7, killed %lr
+ frame-setup CFI_INSTRUCTION .cfi_def_cfa_offset 8
+ frame-setup CFI_INSTRUCTION .cfi_offset %lr, -4
+ frame-setup CFI_INSTRUCTION .cfi_offset %r7, -8
+ DBG_VALUE debug-use %r0, debug-use _, !18, !27, debug-location !28
+ DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+ DBG_VALUE debug-use %r2, debug-use _, !20, !27, debug-location !28
+ DBG_VALUE debug-use %r3, debug-use _, !21, !27, debug-location !28
+ %r1 = COPY killed %r2, debug-location !32
+ %r2 = COPY killed %r3, debug-location !32
+ tBL 14, _, @g, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit-def %sp, debug-location !32
+ %r0 = t2MOVi 0, 14, _, _
+ %sp = t2LDMIA_UPD %sp, 14, _, def %r7, def %lr
+ DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+ tBX_RET 14, _, implicit %r0, debug-location !34
+# Verify that the DBG_VALUE is ignored.
+# CHECK: %sp = t2LDMIA_RET %sp, 14, _, def %r7, def %pc, implicit %r0
+
+...
diff --git a/test/CodeGen/MIR/ARM/bundled-instructions.mir b/test/CodeGen/MIR/ARM/bundled-instructions.mir
new file mode 100644
index 000000000000..814c4e188ea5
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/bundled-instructions.mir
@@ -0,0 +1,75 @@
+# RUN: llc -mtriple thumbv7-apple-ios -start-after block-placement -stop-after block-placement -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the bundled machine instructions
+# and 'internal' register flags correctly.
+
+--- |
+
+ define i32 @test1(i32 %a) {
+ entry:
+ %cmp = icmp sgt i32 %a, -78
+ %. = zext i1 %cmp to i32
+ ret i32 %.
+ }
+
+ define i32 @test2(i32 %a) {
+ entry:
+ %cmp = icmp sgt i32 %a, -78
+ %. = zext i1 %cmp to i32
+ ret i32 %.
+ }
+
+...
+---
+name: test1
+tracksRegLiveness: true
+liveins:
+ - { reg: '%r0' }
+body: |
+ bb.0.entry:
+ liveins: %r0
+ ; CHECK-LABEL: name: test1
+ ; CHECK: %r1 = t2MOVi 0, 14, _, _
+ ; CHECK-NEXT: t2CMNri killed %r0, 78, 14, _, implicit-def %cpsr
+ ; CHECK-NEXT: BUNDLE implicit-def dead %itstate, implicit-def %r1, implicit killed %cpsr {
+ ; CHECK-NEXT: t2IT 12, 8, implicit-def %itstate
+ ; CHECK-NEXT: %r1 = t2MOVi 1, 12, killed %cpsr, _, implicit internal killed %itstate
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: %r0 = tMOVr killed %r1, 14, _
+ ; CHECK-NEXT: tBX_RET 14, _, implicit killed %r0
+ %r1 = t2MOVi 0, 14, _, _
+ t2CMNri killed %r0, 78, 14, _, implicit-def %cpsr
+ BUNDLE implicit-def dead %itstate, implicit-def %r1, implicit killed %cpsr {
+ t2IT 12, 8, implicit-def %itstate
+ %r1 = t2MOVi 1, 12, killed %cpsr, _, implicit internal killed %itstate
+ }
+ %r0 = tMOVr killed %r1, 14, _
+ tBX_RET 14, _, implicit killed %r0
+...
+---
+name: test2
+tracksRegLiveness: true
+liveins:
+ - { reg: '%r0' }
+body: |
+ bb.0.entry:
+ liveins: %r0
+
+ ; Verify that the next machine instruction can be on the same line as
+ ; '{' or '}'.
+
+ ; CHECK-LABEL: name: test2
+ ; CHECK: %r1 = t2MOVi 0, 14, _, _
+ ; CHECK-NEXT: t2CMNri killed %r0, 78, 14, _, implicit-def %cpsr
+ ; CHECK-NEXT: BUNDLE implicit-def dead %itstate, implicit-def %r1, implicit killed %cpsr {
+ ; CHECK-NEXT: t2IT 12, 8, implicit-def %itstate
+ ; CHECK-NEXT: %r1 = t2MOVi 1, 12, killed %cpsr, _, implicit internal killed %itstate
+ ; CHECK-NEXT: }
+ ; CHECK-NEXT: %r0 = tMOVr killed %r1, 14, _
+ ; CHECK-NEXT: tBX_RET 14, _, implicit killed %r0
+ %r1 = t2MOVi 0, 14, _, _
+ t2CMNri killed %r0, 78, 14, _, implicit-def %cpsr
+ BUNDLE implicit-def dead %itstate, implicit-def %r1, implicit killed %cpsr { t2IT 12, 8, implicit-def %itstate
+ %r1 = t2MOVi 1, 12, killed %cpsr, _, internal implicit killed %itstate
+ } %r0 = tMOVr killed %r1, 14, _
+ tBX_RET 14, _, implicit killed %r0
+...
diff --git a/test/CodeGen/MIR/ARM/cfi-same-value.mir b/test/CodeGen/MIR/ARM/cfi-same-value.mir
new file mode 100644
index 000000000000..f9850abe0463
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/cfi-same-value.mir
@@ -0,0 +1,80 @@
+# RUN: llc -mtriple=arm-linux-unknown-gnueabi -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+
+--- |
+ declare void @dummy_use(i32*, i32)
+
+ define void @test_basic() #0 {
+ entry:
+ %mem = alloca i32, i32 10
+ call void @dummy_use(i32* %mem, i32 10)
+ ret void
+ }
+
+ attributes #0 = { "split-stack" }
+...
+---
+name: test_basic
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 48
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+stack:
+ - { id: 0, name: mem, offset: -48, size: 40, alignment: 4 }
+ - { id: 1, type: spill-slot, offset: -4, size: 4, alignment: 4,
+ callee-saved-register: '%lr' }
+ - { id: 2, type: spill-slot, offset: -8, size: 4, alignment: 4,
+ callee-saved-register: '%r11' }
+body: |
+ bb.0:
+ successors: %bb.2, %bb.1
+ liveins: %r11, %lr
+
+ %sp = STMDB_UPD %sp, 14, _, %r4, %r5
+ CFI_INSTRUCTION .cfi_def_cfa_offset 8
+ CFI_INSTRUCTION .cfi_offset %r5, -4
+ CFI_INSTRUCTION .cfi_offset %r4, -8
+ %r5 = MOVr %sp, 14, _, _
+ %r4 = MRC 15, 0, 13, 0, 3, 14, _
+ %r4 = LDRi12 %r4, 4, 14, _
+ CMPrr %r4, %r5, 14, _, implicit-def %cpsr
+ Bcc %bb.2, 3, %cpsr
+
+ bb.1:
+ successors: %bb.2
+ liveins: %r11, %lr
+
+ %r4 = MOVi 48, 14, _, _
+ %r5 = MOVi 0, 14, _, _
+ %sp = STMDB_UPD %sp, 14, _, %lr
+ CFI_INSTRUCTION .cfi_def_cfa_offset 12
+ CFI_INSTRUCTION .cfi_offset %lr, -12
+ BL $__morestack, implicit-def %lr, implicit %sp
+ %sp = LDMIA_UPD %sp, 14, _, %lr
+ %sp = LDMIA_UPD %sp, 14, _, %r4, %r5
+ CFI_INSTRUCTION .cfi_def_cfa_offset 0
+ BX_RET 14, _
+
+ bb.2:
+ liveins: %r11, %lr
+
+ %sp = LDMIA_UPD %sp, 14, _, %r4, %r5
+ CFI_INSTRUCTION .cfi_def_cfa_offset 0
+ ; CHECK: CFI_INSTRUCTION .cfi_same_value %r4
+ ; CHECK-NEXT: CFI_INSTRUCTION .cfi_same_value %r5
+ CFI_INSTRUCTION .cfi_same_value %r4
+ CFI_INSTRUCTION .cfi_same_value %r5
+ %sp = frame-setup STMDB_UPD %sp, 14, _, killed %r11, killed %lr
+ frame-setup CFI_INSTRUCTION .cfi_def_cfa_offset 8
+ frame-setup CFI_INSTRUCTION .cfi_offset %lr, -4
+ frame-setup CFI_INSTRUCTION .cfi_offset %r11, -8
+ %sp = frame-setup SUBri killed %sp, 40, 14, _, _
+ frame-setup CFI_INSTRUCTION .cfi_def_cfa_offset 48
+ %r0 = MOVr %sp, 14, _, _
+ %r1 = MOVi 10, 14, _, _
+ BL @dummy_use, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit killed %r1, implicit-def %sp
+ %sp = ADDri killed %sp, 40, 14, _, _
+ %sp = LDMIA_UPD %sp, 14, _, %r4, %r5
+ MOVPCLR 14, _
+...
diff --git a/test/CodeGen/MIR/ARM/expected-closing-brace.mir b/test/CodeGen/MIR/ARM/expected-closing-brace.mir
new file mode 100644
index 000000000000..78d91aead247
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/expected-closing-brace.mir
@@ -0,0 +1,50 @@
+# RUN: not llc -mtriple thumbv7-apple-ios -start-after block-placement -stop-after block-placement -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+ @G = external global i32
+
+ define i32 @test1(i32 %a) {
+ entry:
+ br label %foo
+
+ foo:
+ %cmp = icmp sgt i32 %a, -78
+ %. = zext i1 %cmp to i32
+ br i1 %cmp, label %if.then, label %if.else
+
+ if.then:
+ ret i32 %.
+
+ if.else:
+ %b = load i32, i32* @G
+ %c = add i32 %b, 1
+ br label %foo
+ }
+...
+---
+name: test1
+tracksRegLiveness: true
+liveins:
+ - { reg: '%r0' }
+body: |
+ bb.0.entry:
+ successors: %bb.1.foo
+ liveins: %r0
+ bb.1.foo:
+ successors: %bb.2.if.then, %bb.1.foo
+ liveins: %r0
+
+ t2CMNri %r0, 78, 14, _, implicit-def %cpsr
+ %r1 = t2MOVi 0, 14, _, _
+ BUNDLE implicit-def dead %itstate, implicit-def %r1, implicit killed %cpsr {
+ t2IT 12, 8, implicit-def %itstate
+ %r1 = t2MOVi 1, 12, killed %cpsr, _, implicit killed %itstate
+ t2CMNri %r0, 77, 14, _, implicit-def %cpsr
+ t2Bcc %bb.1.foo, 11, killed %cpsr
+ ; CHECK: [[@LINE+1]]:3: expected '}'
+ bb.2.if.then:
+ liveins: %r1
+
+ %r0 = tMOVr killed %r1, 14, _
+ tBX_RET 14, _, implicit killed %r0
+...
diff --git a/test/CodeGen/MIR/ARM/extraneous-closing-brace-error.mir b/test/CodeGen/MIR/ARM/extraneous-closing-brace-error.mir
new file mode 100644
index 000000000000..a069dd307936
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/extraneous-closing-brace-error.mir
@@ -0,0 +1,20 @@
+# RUN: not llc -mtriple thumbv7-apple-ios -start-after block-placement -stop-after block-placement -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+ define i32 @test1(i32 %a) {
+ entry:
+ ret i32 %a
+ }
+...
+---
+name: test1
+tracksRegLiveness: true
+liveins:
+ - { reg: '%r0' }
+body: |
+ bb.0.entry:
+ liveins: %r0
+ tBX_RET 14, _, implicit killed %r0
+ ; CHECK: [[@LINE+1]]:5: extraneous closing brace ('}')
+ }
+...
diff --git a/test/CodeGen/MIR/ARM/lit.local.cfg b/test/CodeGen/MIR/ARM/lit.local.cfg
new file mode 100644
index 000000000000..236e1d344166
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'ARM' in config.root.targets:
+ config.unsupported = True
diff --git a/test/CodeGen/MIR/ARM/nested-instruction-bundle-error.mir b/test/CodeGen/MIR/ARM/nested-instruction-bundle-error.mir
new file mode 100644
index 000000000000..b93697857e79
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/nested-instruction-bundle-error.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -mtriple thumbv7-apple-ios -start-after block-placement -stop-after block-placement -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+ define i32 @test1(i32 %a) {
+ entry:
+ %cmp = icmp sgt i32 %a, -78
+ %. = zext i1 %cmp to i32
+ ret i32 %.
+ }
+...
+---
+name: test1
+tracksRegLiveness: true
+liveins:
+ - { reg: '%r0' }
+body: |
+ bb.0.entry:
+ liveins: %r0
+ %r1 = t2MOVi 0, 14, _, _
+ t2CMNri killed %r0, 78, 14, _, implicit-def %cpsr
+ BUNDLE implicit-def dead %itstate, implicit-def %r1, implicit killed %cpsr {
+ t2IT 12, 8, implicit-def %itstate
+ %r1 = t2MOVi 1, 12, killed %cpsr, _
+ ; CHECK: [[@LINE+1]]:14: nested instruction bundles are not allowed
+ BUNDLE {
+ }
+ }
+ %r0 = tMOVr killed %r1, 14, _
+ tBX_RET 14, _, implicit killed %r0
+...
diff --git a/test/CodeGen/MIR/ARM/sched-it-debug-nodes.mir b/test/CodeGen/MIR/ARM/sched-it-debug-nodes.mir
new file mode 100644
index 000000000000..5b5750b8d1e8
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/sched-it-debug-nodes.mir
@@ -0,0 +1,160 @@
+# RUN: llc -mtriple thumbv7 -start-after if-converter -print-before=post-RA-sched -print-after=post-RA-sched %s -o /dev/null 2>&1 | FileCheck %s
+--- |
+ ; ModuleID = '/Volumes/Data/llvm/test/CodeGen/ARM/sched-it-debug-nodes.ll'
+ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv7"
+
+ %struct.s = type opaque
+
+ ; Function Attrs: nounwind
+ define arm_aapcscc i32 @f(%struct.s* %s, i32 %u, i8* %b, i32 %n) #0 !dbg !4 {
+ entry:
+ tail call void @llvm.dbg.value(metadata %struct.s* %s, i64 0, metadata !18, metadata !27), !dbg !28
+ tail call void @llvm.dbg.value(metadata i32 %u, i64 0, metadata !19, metadata !27), !dbg !28
+ tail call void @llvm.dbg.value(metadata i8* %b, i64 0, metadata !20, metadata !27), !dbg !28
+ tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !21, metadata !27), !dbg !28
+ %cmp = icmp ult i32 %n, 4, !dbg !29
+ br i1 %cmp, label %return, label %if.end, !dbg !31
+
+ if.end: ; preds = %entry
+ tail call arm_aapcscc void @g(%struct.s* %s, i8* %b, i32 %n) #3, !dbg !32
+ br label %return, !dbg !33
+
+ return: ; preds = %if.end, %entry
+ %retval.0 = phi i32 [ 0, %if.end ], [ -1, %entry ]
+ ret i32 %retval.0, !dbg !34
+ }
+
+ ; NOTE: This is checking that the register in the DEBUG_VALUE node is not
+ ; accidentally being marked as KILL. The DBG_VALUE node gets introduced in
+ ; If-Conversion, and gets bundled into the IT block. The Post RA Scheduler
+ ; attempts to schedule the Machine Instr, and tries to tag the register in the
+ ; debug value as KILL'ed, resulting in a DEBUG_VALUE node changing codegen! (or
+ ; hopefully, triggering an assert).
+
+ ; CHECK: BUNDLE %ITSTATE<imp-def,dead>
+ ; CHECK: * DBG_VALUE %R1, %noreg, !"u"
+ ; CHECK-NOT: * DBG_VALUE %R1<kill>, %noreg, !"u"
+
+ declare arm_aapcscc void @g(%struct.s*, i8*, i32) #1
+
+ ; Function Attrs: nounwind readnone
+ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #2 = { nounwind readnone }
+ attributes #3 = { nounwind }
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!22, !23, !24, !25}
+ !llvm.ident = !{!26}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (llvm/trunk 237059)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+ !1 = !DIFile(filename: "<stdin>", directory: "/Users/compnerd/Source/llvm")
+ !2 = !{}
+ !3 = !{!4}
+ !4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 9, type: !5, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, variables: !17)
+ !5 = !DISubroutineType(types: !6)
+ !6 = !{!7, !8, !11, !12, !16}
+ !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+ !8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 32, align: 32)
+ !9 = !DIDerivedType(tag: DW_TAG_typedef, name: "s", file: !1, line: 5, baseType: !10)
+ !10 = !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !1, line: 5, flags: DIFlagFwdDecl)
+ !11 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+ !12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 32, align: 32)
+ !13 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !14)
+ !14 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", file: !1, line: 2, baseType: !15)
+ !15 = !DIBasicType(name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
+ !16 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: !1, line: 3, baseType: !11)
+ !17 = !{!18, !19, !20, !21}
+ !18 = !DILocalVariable(name: "s", arg: 1, scope: !4, file: !1, line: 9, type: !8)
+ !19 = !DILocalVariable(name: "u", arg: 2, scope: !4, file: !1, line: 9, type: !11)
+ !20 = !DILocalVariable(name: "b", arg: 3, scope: !4, file: !1, line: 9, type: !12)
+ !21 = !DILocalVariable(name: "n", arg: 4, scope: !4, file: !1, line: 9, type: !16)
+ !22 = !{i32 2, !"Dwarf Version", i32 4}
+ !23 = !{i32 2, !"Debug Info Version", i32 3}
+ !24 = !{i32 1, !"wchar_size", i32 4}
+ !25 = !{i32 1, !"min_enum_size", i32 4}
+ !26 = !{!"clang version 3.7.0 (llvm/trunk 237059)"}
+ !27 = !DIExpression()
+ !28 = !DILocation(line: 9, scope: !4)
+ !29 = !DILocation(line: 10, scope: !30)
+ !30 = distinct !DILexicalBlock(scope: !4, file: !1, line: 10)
+ !31 = !DILocation(line: 10, scope: !4)
+ !32 = !DILocation(line: 13, scope: !4)
+ !33 = !DILocation(line: 14, scope: !4)
+ !34 = !DILocation(line: 15, scope: !4)
+
+...
+---
+name: f
+alignment: 1
+exposesReturnsTwice: false
+hasInlineAsm: false
+isSSA: false
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+liveins:
+ - { reg: '%r0' }
+ - { reg: '%r2' }
+ - { reg: '%r3' }
+calleeSavedRegisters: [ '%lr', '%d8', '%d9', '%d10', '%d11', '%d12', '%d13',
+ '%d14', '%d15', '%q4', '%q5', '%q6', '%q7', '%r4',
+ '%r5', '%r6', '%r7', '%r8', '%r9', '%r10', '%r11',
+ '%s16', '%s17', '%s18', '%s19', '%s20', '%s21',
+ '%s22', '%s23', '%s24', '%s25', '%s26', '%s27',
+ '%s28', '%s29', '%s30', '%s31', '%d8_d10', '%d9_d11',
+ '%d10_d12', '%d11_d13', '%d12_d14', '%d13_d15',
+ '%q4_q5', '%q5_q6', '%q6_q7', '%q4_q5_q6_q7', '%r4_r5',
+ '%r6_r7', '%r8_r9', '%r10_r11', '%d8_d9_d10', '%d9_d10_d11',
+ '%d10_d11_d12', '%d11_d12_d13', '%d12_d13_d14',
+ '%d13_d14_d15', '%d8_d10_d12', '%d9_d11_d13', '%d10_d12_d14',
+ '%d11_d13_d15', '%d8_d10_d12_d14', '%d9_d11_d13_d15',
+ '%d9_d10', '%d11_d12', '%d13_d14', '%d9_d10_d11_d12',
+ '%d11_d12_d13_d14' ]
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 8
+ offsetAdjustment: 0
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+stack:
+ - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '%lr' }
+ - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '%r7' }
+body: |
+ bb.0.entry:
+ liveins: %r0, %r2, %r3, %lr, %r7
+
+ DBG_VALUE debug-use %r0, debug-use _, !18, !27, debug-location !28
+ DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+ DBG_VALUE debug-use %r2, debug-use _, !20, !27, debug-location !28
+ DBG_VALUE debug-use %r3, debug-use _, !21, !27, debug-location !28
+ t2CMPri %r3, 4, 14, _, implicit-def %cpsr, debug-location !31
+ DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+ %r0 = t2MOVi -1, 3, %cpsr, _, implicit undef %r0
+ DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+ tBX_RET 3, %cpsr, implicit %r0, debug-location !34
+ %sp = frame-setup t2STMDB_UPD %sp, 14, _, killed %r7, killed %lr
+ frame-setup CFI_INSTRUCTION .cfi_def_cfa_offset 8
+ frame-setup CFI_INSTRUCTION .cfi_offset %lr, -4
+ frame-setup CFI_INSTRUCTION .cfi_offset %r7, -8
+ DBG_VALUE debug-use %r0, debug-use _, !18, !27, debug-location !28
+ DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+ DBG_VALUE debug-use %r2, debug-use _, !20, !27, debug-location !28
+ DBG_VALUE debug-use %r3, debug-use _, !21, !27, debug-location !28
+ %r1 = tMOVr killed %r2, 14, _, debug-location !32
+ %r2 = tMOVr killed %r3, 14, _, debug-location !32
+ tBL 14, _, @g, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit-def %sp, debug-location !32
+ %r0 = t2MOVi 0, 14, _, _
+ %sp = t2LDMIA_RET %sp, 14, _, def %r7, def %pc, implicit %r0
+
+...
diff --git a/test/CodeGen/MIR/Generic/basic-blocks.mir b/test/CodeGen/MIR/Generic/basic-blocks.mir
new file mode 100644
index 000000000000..22f8d28290db
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/basic-blocks.mir
@@ -0,0 +1,49 @@
+# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses machine functions correctly.
+
+--- |
+
+ define i32 @foo() {
+ entry:
+ ret i32 0
+ }
+
+ define i32 @bar() {
+ start:
+ ret i32 0
+ }
+
+ define i32 @test() {
+ start:
+ ret i32 0
+ }
+
+...
+---
+# CHECK-LABEL: name: foo
+# CHECK: body:
+# CHECK-NEXT: bb.0.entry:
+name: foo
+body: |
+ bb.0.entry:
+...
+---
+# CHECK-LABEL: name: bar
+# CHECK: body:
+# CHECK-NEXT: bb.0.start (align 4):
+# CHECK: bb.1 (address-taken):
+name: bar
+body: |
+ bb.0.start (align 4):
+ bb.1 (address-taken):
+...
+---
+# CHECK-LABEL: name: test
+# CHECK: body:
+# CHECK-NEXT: bb.0.start (address-taken, align 4):
+# CHECK: bb.1 (address-taken, align 4):
+name: test
+body: |
+ bb.0.start (align 4, address-taken):
+ bb.1 (address-taken, align 4):
+...
diff --git a/test/CodeGen/MIR/Generic/expected-colon-after-basic-block.mir b/test/CodeGen/MIR/Generic/expected-colon-after-basic-block.mir
new file mode 100644
index 000000000000..892258666d10
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/expected-colon-after-basic-block.mir
@@ -0,0 +1,16 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @foo() {
+ entry:
+ ret i32 0
+ }
+
+...
+---
+name: foo
+body: |
+ ; CHECK: [[@LINE+1]]:13: expected ':'
+ bb.0.entry
+...
diff --git a/test/CodeGen/MIR/Generic/expected-mbb-reference-for-successor-mbb.mir b/test/CodeGen/MIR/Generic/expected-mbb-reference-for-successor-mbb.mir
new file mode 100644
index 000000000000..a5e04f86c6d1
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/expected-mbb-reference-for-successor-mbb.mir
@@ -0,0 +1,28 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @foo(i32 %a) {
+ entry:
+ %0 = icmp sle i32 %a, 10
+ br i1 %0, label %less, label %exit
+
+ less:
+ ret i32 0
+
+ exit:
+ ret i32 %a
+ }
+
+...
+---
+name: foo
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:29: expected a machine basic block reference
+ successors: %bb.1.less, 2
+
+ bb.1.less:
+
+ bb.2.exit:
+...
diff --git a/test/CodeGen/MIR/Generic/frame-info.mir b/test/CodeGen/MIR/Generic/frame-info.mir
new file mode 100644
index 000000000000..6e4e3955cb17
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/frame-info.mir
@@ -0,0 +1,89 @@
+# RUN: llc -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses machine frame info properties
+# correctly.
+
+--- |
+
+ define i32 @test(i32 %a) {
+ entry:
+ %b = alloca i32
+ store i32 %a, i32* %b
+ %c = load i32, i32* %b
+ ret i32 %c
+ }
+
+ define i32 @test2(i32 %a) {
+ entry:
+ %b = alloca i32
+ store i32 %a, i32* %b
+ %c = load i32, i32* %b
+ ret i32 %c
+ }
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+
+# CHECK: frameInfo:
+# CHECK-NEXT: isFrameAddressTaken: false
+# CHECK-NEXT: isReturnAddressTaken: false
+# CHECK-NEXT: hasStackMap: false
+# CHECK-NEXT: hasPatchPoint: false
+# CHECK-NEXT: stackSize: 0
+# CHECK-NEXT: offsetAdjustment: 0
+# Note: max alignment can be target specific when printed.
+# CHECK-NEXT: maxAlignment:
+# CHECK-NEXT: adjustsStack: false
+# CHECK-NEXT: hasCalls: false
+# CHECK-NEXT: maxCallFrameSize: 0
+# CHECK-NEXT: hasOpaqueSPAdjustment: false
+# CHECK-NEXT: hasVAStart: false
+# CHECK-NEXT: hasMustTailInVarArgFunc: false
+# CHECK: body
+frameInfo:
+ maxAlignment: 4
+body: |
+ bb.0.entry:
+...
+---
+name: test2
+isSSA: true
+tracksRegLiveness: true
+
+# CHECK: test2
+# CHECK: frameInfo:
+# CHECK-NEXT: isFrameAddressTaken: true
+# CHECK-NEXT: isReturnAddressTaken: true
+# CHECK-NEXT: hasStackMap: true
+# CHECK-NEXT: hasPatchPoint: true
+# CHECK-NEXT: stackSize: 4
+# CHECK-NEXT: offsetAdjustment: 4
+# Note: max alignment can be target specific when printed.
+# CHECK-NEXT: maxAlignment:
+# CHECK-NEXT: adjustsStack: true
+# CHECK-NEXT: hasCalls: true
+# CHECK-NEXT: maxCallFrameSize: 4
+# CHECK-NEXT: hasOpaqueSPAdjustment: true
+# CHECK-NEXT: hasVAStart: true
+# CHECK-NEXT: hasMustTailInVarArgFunc: true
+# CHECK: body
+frameInfo:
+ isFrameAddressTaken: true
+ isReturnAddressTaken: true
+ hasStackMap: true
+ hasPatchPoint: true
+ stackSize: 4
+ offsetAdjustment: 4
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 4
+ hasOpaqueSPAdjustment: true
+ hasVAStart: true
+ hasMustTailInVarArgFunc: true
+body: |
+ bb.0.entry:
+...
+
diff --git a/test/CodeGen/MIR/function-missing-machine-function.mir b/test/CodeGen/MIR/Generic/function-missing-machine-function.mir
index 71b5b2845340..71b5b2845340 100644
--- a/test/CodeGen/MIR/function-missing-machine-function.mir
+++ b/test/CodeGen/MIR/Generic/function-missing-machine-function.mir
diff --git a/test/CodeGen/MIR/Generic/invalid-jump-table-kind.mir b/test/CodeGen/MIR/Generic/invalid-jump-table-kind.mir
new file mode 100644
index 000000000000..576de4bd9dc7
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/invalid-jump-table-kind.mir
@@ -0,0 +1,53 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test_jumptable(i32 %in) {
+ entry:
+ switch i32 %in, label %def [
+ i32 0, label %lbl1
+ i32 1, label %lbl2
+ i32 2, label %lbl3
+ i32 3, label %lbl4
+ ]
+
+ def:
+ ret i32 0
+
+ lbl1:
+ ret i32 1
+
+ lbl2:
+ ret i32 2
+
+ lbl3:
+ ret i32 4
+
+ lbl4:
+ ret i32 8
+ }
+
+...
+---
+name: test_jumptable
+jumpTable:
+ # CHECK: [[@LINE+1]]:18: unknown enumerated scalar
+ kind: switch
+ entries:
+ - id: 0
+ blocks: [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+body: |
+ bb.0.entry:
+
+ bb.1.entry:
+
+ bb.2.def:
+
+ bb.3.lbl1:
+
+ bb.4.lbl2:
+
+ bb.5.lbl3:
+
+ bb.6.lbl4:
+...
diff --git a/test/CodeGen/MIR/Generic/lit.local.cfg b/test/CodeGen/MIR/Generic/lit.local.cfg
new file mode 100644
index 000000000000..f3f03bd7047e
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/lit.local.cfg
@@ -0,0 +1,3 @@
+if 'native' not in config.available_features:
+ config.unsupported = True
+
diff --git a/test/CodeGen/MIR/llvm-ir-error-reported.mir b/test/CodeGen/MIR/Generic/llvm-ir-error-reported.mir
index 3508c341c44d..3508c341c44d 100644
--- a/test/CodeGen/MIR/llvm-ir-error-reported.mir
+++ b/test/CodeGen/MIR/Generic/llvm-ir-error-reported.mir
diff --git a/test/CodeGen/MIR/Generic/llvmIR.mir b/test/CodeGen/MIR/Generic/llvmIR.mir
new file mode 100644
index 000000000000..c7a220afa505
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/llvmIR.mir
@@ -0,0 +1,37 @@
+# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the LLVM IR that's embedded with MIR is parsed
+# correctly.
+
+--- |
+ ; CHECK: define i32 @foo(i32 %x, i32 %y)
+ ; CHECK: %z = alloca i32, align 4
+ ; CHECK: store i32 %x, i32* %z, align 4
+ ; CHECK: br label %Test
+ ; CHECK: Test:
+ ; CHECK: %m = load i32, i32* %z, align 4
+ ; CHECK: %cond = icmp eq i32 %y, %m
+ ; CHECK: br i1 %cond, label %IfEqual, label %IfUnequal
+ ; CHECK: IfEqual:
+ ; CHECK: ret i32 1
+ ; CHECK: IfUnequal:
+ ; CHECK: ret i32 0
+ define i32 @foo(i32 %x, i32 %y) {
+ %z = alloca i32, align 4
+ store i32 %x, i32* %z, align 4
+ br label %Test
+ Test:
+ %m = load i32, i32* %z, align 4
+ %cond = icmp eq i32 %y, %m
+ br i1 %cond, label %IfEqual, label %IfUnequal
+ IfEqual:
+ ret i32 1
+ IfUnequal:
+ ret i32 0
+ }
+
+...
+---
+name: foo
+body: |
+ bb.0:
+...
diff --git a/test/CodeGen/MIR/Generic/llvmIRMissing.mir b/test/CodeGen/MIR/Generic/llvmIRMissing.mir
new file mode 100644
index 000000000000..afa96010f297
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/llvmIRMissing.mir
@@ -0,0 +1,9 @@
+# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser accepts files without the LLVM IR.
+
+---
+# CHECK: name: foo
+name: foo
+body: |
+ bb.0:
+...
diff --git a/test/CodeGen/MIR/Generic/machine-basic-block-ir-block-reference.mir b/test/CodeGen/MIR/Generic/machine-basic-block-ir-block-reference.mir
new file mode 100644
index 000000000000..d6ecd5dc8514
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/machine-basic-block-ir-block-reference.mir
@@ -0,0 +1,17 @@
+# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser preserves unnamed LLVM IR block
+# references.
+
+--- |
+
+ define i32 @foo() {
+ ret i32 0
+ }
+
+...
+---
+name: foo
+body: |
+ ; CHECK: bb.0 (%ir-block.0):
+ bb.0 (%ir-block.0):
+...
diff --git a/test/CodeGen/MIR/Generic/machine-basic-block-redefinition-error.mir b/test/CodeGen/MIR/Generic/machine-basic-block-redefinition-error.mir
new file mode 100644
index 000000000000..41747535c351
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/machine-basic-block-redefinition-error.mir
@@ -0,0 +1,18 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @foo() {
+ entry:
+ ret i32 0
+ }
+
+...
+---
+name: foo
+body: |
+ ; CHECK: [[@LINE+3]]:3: redefinition of machine basic block with id #0
+ bb.0:
+
+ bb.0:
+...
diff --git a/test/CodeGen/MIR/Generic/machine-basic-block-undefined-ir-block.mir b/test/CodeGen/MIR/Generic/machine-basic-block-undefined-ir-block.mir
new file mode 100644
index 000000000000..df559f852ec0
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/machine-basic-block-undefined-ir-block.mir
@@ -0,0 +1,15 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @foo() {
+ ret i32 0
+ }
+
+...
+---
+name: foo
+body: |
+ ; CHECK: [[@LINE+1]]:9: use of undefined IR block '%ir-block.10'
+ bb.0 (%ir-block.10):
+...
diff --git a/test/CodeGen/MIR/Generic/machine-basic-block-unknown-name.mir b/test/CodeGen/MIR/Generic/machine-basic-block-unknown-name.mir
new file mode 100644
index 000000000000..876947b868b0
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/machine-basic-block-unknown-name.mir
@@ -0,0 +1,18 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that an error is reported whenever the MIR parser can't find
+# a basic block with the machine basis block's name.
+
+--- |
+
+ define i32 @foo() {
+ entry:
+ ret i32 0
+ }
+
+...
+---
+name: foo
+body: |
+ ; CHECK: [[@LINE+1]]:3: basic block 'entrie' is not defined in the function 'foo'
+ bb.0.entrie:
+...
diff --git a/test/CodeGen/MIR/machine-function-missing-body-error.mir b/test/CodeGen/MIR/Generic/machine-function-missing-body-error.mir
index 0dc7477f6275..0dc7477f6275 100644
--- a/test/CodeGen/MIR/machine-function-missing-body-error.mir
+++ b/test/CodeGen/MIR/Generic/machine-function-missing-body-error.mir
diff --git a/test/CodeGen/MIR/Generic/machine-function-missing-function.mir b/test/CodeGen/MIR/Generic/machine-function-missing-function.mir
new file mode 100644
index 000000000000..6800f8724324
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/machine-function-missing-function.mir
@@ -0,0 +1,23 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that an error is reported when the mir file has LLVM IR and
+# one of the machine functions has a name that doesn't match any function in
+# the LLVM IR.
+
+--- |
+
+ define i32 @foo() {
+ ret i32 0
+ }
+
+...
+---
+name: foo
+body: |
+ bb.0:
+...
+---
+# CHECK: function 'faa' isn't defined in the provided LLVM IR
+name: faa
+body: |
+ bb.0:
+...
diff --git a/test/CodeGen/MIR/Generic/machine-function-missing-name.mir b/test/CodeGen/MIR/Generic/machine-function-missing-name.mir
new file mode 100644
index 000000000000..f65b77880e97
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/machine-function-missing-name.mir
@@ -0,0 +1,26 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that an error is reported when a machine function doesn't
+# have a name attribute.
+
+--- |
+
+ define i32 @foo() {
+ ret i32 0
+ }
+
+ define i32 @bar() {
+ ret i32 0
+ }
+
+...
+---
+# CHECK: [[@LINE+1]]:1: missing required key 'name'
+nme: foo
+body: |
+ bb.0:
+...
+---
+name: bar
+body: |
+ bb.0:
+...
diff --git a/test/CodeGen/MIR/machine-function-redefinition-error.mir b/test/CodeGen/MIR/Generic/machine-function-redefinition-error.mir
index be84161b5630..be84161b5630 100644
--- a/test/CodeGen/MIR/machine-function-redefinition-error.mir
+++ b/test/CodeGen/MIR/Generic/machine-function-redefinition-error.mir
diff --git a/test/CodeGen/MIR/Generic/machine-function.mir b/test/CodeGen/MIR/Generic/machine-function.mir
new file mode 100644
index 000000000000..1c4ca3d07d2a
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/machine-function.mir
@@ -0,0 +1,66 @@
+# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses machine functions correctly.
+
+--- |
+
+ define i32 @foo() {
+ ret i32 0
+ }
+
+ define i32 @bar() {
+ ret i32 0
+ }
+
+ define i32 @func() {
+ ret i32 0
+ }
+
+ define i32 @func2() {
+ ret i32 0
+ }
+
+...
+---
+# CHECK: name: foo
+# CHECK-NEXT: alignment:
+# CHECK-NEXT: exposesReturnsTwice: false
+# CHECK-NEXT: hasInlineAsm: false
+# CHECK: ...
+name: foo
+body: |
+ bb.0:
+...
+---
+# CHECK: name: bar
+# CHECK-NEXT: alignment:
+# CHECK-NEXT: exposesReturnsTwice: false
+# CHECK-NEXT: hasInlineAsm: false
+# CHECK: ...
+name: bar
+body: |
+ bb.0:
+...
+---
+# CHECK: name: func
+# CHECK-NEXT: alignment: 8
+# CHECK-NEXT: exposesReturnsTwice: false
+# CHECK-NEXT: hasInlineAsm: false
+# CHECK: ...
+name: func
+alignment: 8
+body: |
+ bb.0:
+...
+---
+# CHECK: name: func2
+# CHECK-NEXT: alignment: 16
+# CHECK-NEXT: exposesReturnsTwice: true
+# CHECK-NEXT: hasInlineAsm: true
+# CHECK: ...
+name: func2
+alignment: 16
+exposesReturnsTwice: true
+hasInlineAsm: true
+body: |
+ bb.0:
+...
diff --git a/test/CodeGen/MIR/Generic/register-info.mir b/test/CodeGen/MIR/Generic/register-info.mir
new file mode 100644
index 000000000000..229cf0f9130f
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/register-info.mir
@@ -0,0 +1,40 @@
+# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses machine register info properties
+# correctly.
+
+--- |
+
+ define i32 @foo() {
+ entry:
+ ret i32 0
+ }
+
+ define i32 @bar() {
+ start:
+ ret i32 0
+ }
+
+...
+---
+# CHECK: name: foo
+# CHECK: isSSA: false
+# CHECK-NEXT: tracksRegLiveness: false
+# CHECK-NEXT: tracksSubRegLiveness: false
+# CHECK: ...
+name: foo
+body: |
+ bb.0:
+...
+---
+# CHECK: name: bar
+# CHECK: isSSA: false
+# CHECK-NEXT: tracksRegLiveness: true
+# CHECK-NEXT: tracksSubRegLiveness: true
+# CHECK: ...
+name: bar
+isSSA: false
+tracksRegLiveness: true
+tracksSubRegLiveness: true
+body: |
+ bb.0:
+...
diff --git a/test/CodeGen/MIR/Mips/expected-global-value-or-symbol-after-call-entry.mir b/test/CodeGen/MIR/Mips/expected-global-value-or-symbol-after-call-entry.mir
new file mode 100644
index 000000000000..ea94c9906557
--- /dev/null
+++ b/test/CodeGen/MIR/Mips/expected-global-value-or-symbol-after-call-entry.mir
@@ -0,0 +1,41 @@
+# RUN: not llc -march=mipsel -mattr=mips16 -relocation-model=pic -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+ define i32 @test(i32 %a) {
+ entry:
+ %call = call i32 @foo(i32 %a)
+ ret i32 0
+ }
+
+ declare i32 @foo(i32)
+...
+---
+name: test
+tracksRegLiveness: true
+liveins:
+ - { reg: '%a0' }
+frameInfo:
+ stackSize: 24
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 16
+stack:
+ - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4,
+ callee-saved-register: '%ra' }
+body: |
+ bb.0.entry:
+ liveins: %a0, %ra
+
+ Save16 %ra, 24, implicit-def %sp, implicit %sp
+ %v0, %v1 = GotPrologue16 $_gp_disp, $_gp_disp
+ %v0 = SllX16 killed %v0, 16
+ %v0 = AdduRxRyRz16 killed %v1, killed %v0
+ ; CHECK: [[@LINE+1]]:67: expected a global value or an external symbol after 'call-entry'
+ %v1 = LwRxRyOffMemX16 %v0, @foo, 0 :: (load 4 from call-entry foo)
+ %t9 = COPY %v1
+ %gp = COPY killed %v0
+ JumpLinkReg16 killed %v1, csr_o32, implicit-def %ra, implicit killed %t9, implicit %a0, implicit killed %gp, implicit-def %sp, implicit-def dead %v0
+ %v0 = LiRxImmX16 0
+ %ra = Restore16 24, implicit-def %sp, implicit %sp
+ RetRA16 implicit %v0
+...
diff --git a/test/CodeGen/MIR/Mips/lit.local.cfg b/test/CodeGen/MIR/Mips/lit.local.cfg
new file mode 100644
index 000000000000..7d12f7a9c564
--- /dev/null
+++ b/test/CodeGen/MIR/Mips/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'Mips' in config.root.targets:
+ config.unsupported = True
diff --git a/test/CodeGen/MIR/Mips/memory-operands.mir b/test/CodeGen/MIR/Mips/memory-operands.mir
new file mode 100644
index 000000000000..d4206b067f7e
--- /dev/null
+++ b/test/CodeGen/MIR/Mips/memory-operands.mir
@@ -0,0 +1,102 @@
+# RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the call entry pseudo source
+# values in memory operands correctly.
+
+--- |
+ define i32 @test(i32 %a) {
+ entry:
+ %call = call i32 @foo(i32 %a)
+ ret i32 0
+ }
+
+ declare i32 @foo(i32)
+
+ define float @test2() #0 {
+ entry:
+ %call = tail call float bitcast (float (...)* @g to float ()*)()
+ call void @__mips16_ret_sf(float %call)
+ ret float %call
+ }
+
+ declare float @g(...)
+
+ declare void @__mips16_ret_sf(float) #1
+
+ attributes #0 = { "saveS2" }
+ attributes #1 = { noinline readnone "__Mips16RetHelper" }
+...
+---
+name: test
+tracksRegLiveness: true
+liveins:
+ - { reg: '%a0' }
+frameInfo:
+ stackSize: 24
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 16
+stack:
+ - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4,
+ callee-saved-register: '%ra' }
+body: |
+ bb.0.entry:
+ liveins: %a0, %ra
+
+ Save16 %ra, 24, implicit-def %sp, implicit %sp
+ CFI_INSTRUCTION .cfi_def_cfa_offset 24
+ CFI_INSTRUCTION .cfi_offset %ra_64, -4
+ %v0, %v1 = GotPrologue16 $_gp_disp, $_gp_disp
+ %v0 = SllX16 killed %v0, 16
+ %v0 = AdduRxRyRz16 killed %v1, killed %v0
+ ; CHECK-LABEL: name: test
+ ; CHECK: %v1 = LwRxRyOffMemX16 %v0, @foo, 0 :: (load 4 from call-entry @foo)
+ %v1 = LwRxRyOffMemX16 %v0, @foo, 0 :: (load 4 from call-entry @foo)
+ %t9 = COPY %v1
+ %gp = COPY killed %v0
+ JumpLinkReg16 killed %v1, csr_o32, implicit-def %ra, implicit killed %t9, implicit %a0, implicit killed %gp, implicit-def %sp, implicit-def dead %v0
+ %v0 = LiRxImmX16 0
+ %ra = Restore16 24, implicit-def %sp, implicit %sp
+ RetRA16 implicit %v0
+...
+---
+name: test2
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 32
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 16
+stack:
+ - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4,
+ callee-saved-register: '%ra' }
+ - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4,
+ callee-saved-register: '%s2' }
+ - { id: 2, type: spill-slot, offset: -12, size: 4, alignment: 4,
+ callee-saved-register: '%s0' }
+body: |
+ bb.0.entry:
+ liveins: %ra, %s2, %s0, %ra, %s2, %s0
+
+ SaveX16 %s0, %ra, %s2, 32, implicit-def %sp, implicit %sp
+ CFI_INSTRUCTION .cfi_def_cfa_offset 32
+ CFI_INSTRUCTION .cfi_offset %ra_64, -4
+ CFI_INSTRUCTION .cfi_offset %s2_64, -8
+ CFI_INSTRUCTION .cfi_offset %s0_64, -12
+ %v0, %v1 = GotPrologue16 $_gp_disp, $_gp_disp
+ %v0 = SllX16 killed %v0, 16
+ %s0 = AdduRxRyRz16 killed %v1, killed %v0
+ %v0 = LwRxRyOffMemX16 %s0, @g, 0 :: (load 4 from call-entry @g)
+ ; CHECK-LABEL: test2
+ ; CHECK: %v1 = LwRxRyOffMemX16 %s0, $__mips16_call_stub_sf_0, 0 :: (load 4 from call-entry $__mips16_call_stub_sf_0)
+ %v1 = LwRxRyOffMemX16 %s0, $__mips16_call_stub_sf_0, 0 :: (load 4 from call-entry $__mips16_call_stub_sf_0)
+ %gp = COPY %s0
+ JumpLinkReg16 killed %v1, csr_o32, implicit-def %ra, implicit %v0, implicit killed %gp, implicit-def %sp, implicit-def %v0
+ %v1 = LwRxRyOffMemX16 %s0, @__mips16_ret_sf, 0 :: (load 4 from call-entry @__mips16_ret_sf)
+ %t9 = COPY %v1
+ %gp = COPY killed %s0
+ JumpLinkReg16 killed %v1, csr_mips16rethelper, implicit-def %ra, implicit killed %t9, implicit %v0, implicit killed %gp, implicit-def %sp
+ %s0, %ra, %s2 = RestoreX16 32, implicit-def %sp, implicit %sp
+ RetRA16 implicit %v0
+...
diff --git a/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir b/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
new file mode 100644
index 000000000000..28fb2a2cf5c9
--- /dev/null
+++ b/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=nvptx -mcpu=sm_20 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define float @test(float %k) {
+ entry:
+ %0 = fadd float %k, 3.250000e+00
+ ret float %0
+ }
+
+...
+---
+name: test
+registers:
+ - { id: 0, class: float32regs }
+ - { id: 1, class: float32regs }
+body: |
+ bb.0.entry:
+ %0 = LD_f32_avar 0, 4, 1, 2, 32, $test_param_0
+ ; CHECK: [[@LINE+1]]:33: expected a floating point literal
+ %1 = FADD_rnf32ri %0, float 3
+ StoreRetvalF32 %1, 0
+ Return
+...
diff --git a/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
new file mode 100644
index 000000000000..18866d58a946
--- /dev/null
+++ b/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
@@ -0,0 +1,81 @@
+# RUN: llc -march=nvptx -mcpu=sm_20 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses floating point constant operands
+# correctly.
+
+--- |
+
+ define float @test(float %k, i32 %i) {
+ entry:
+ %0 = fpext float %k to double
+ %1 = fadd double %0, 3.250000e+00
+ %2 = fptrunc double %1 to float
+ %3 = sitofp i32 %i to float
+ %4 = fadd float %3, 6.250000e+00
+ %5 = fmul float %4, %2
+ ret float %5
+ }
+
+ define float @test2(float %k, i32 %i) {
+ entry:
+ %0 = fpext float %k to double
+ %1 = fadd double %0, 0x7FF8000000000000
+ %2 = fptrunc double %1 to float
+ %3 = sitofp i32 %i to float
+ %4 = fadd float %3, 0x7FF8000000000000
+ %5 = fmul float %4, %2
+ ret float %5
+ }
+
+...
+---
+name: test
+registers:
+ - { id: 0, class: float32regs }
+ - { id: 1, class: float64regs }
+ - { id: 2, class: int32regs }
+ - { id: 3, class: float64regs }
+ - { id: 4, class: float32regs }
+ - { id: 5, class: float32regs }
+ - { id: 6, class: float32regs }
+ - { id: 7, class: float32regs }
+body: |
+ bb.0.entry:
+ %0 = LD_f32_avar 0, 4, 1, 2, 32, $test_param_0
+ %1 = CVT_f64_f32 %0, 0
+ %2 = LD_i32_avar 0, 4, 1, 0, 32, $test_param_1
+ ; CHECK: %3 = FADD_rnf64ri %1, double 3.250000e+00
+ %3 = FADD_rnf64ri %1, double 3.250000e+00
+ %4 = CVT_f32_f64 %3, 5
+ %5 = CVT_f32_s32 %2, 5
+ ; CHECK: %6 = FADD_rnf32ri %5, float 6.250000e+00
+ %6 = FADD_rnf32ri %5, float 6.250000e+00
+ %7 = FMUL_rnf32rr %6, %4
+ StoreRetvalF32 %7, 0
+ Return
+...
+---
+name: test2
+registers:
+ - { id: 0, class: float32regs }
+ - { id: 1, class: float64regs }
+ - { id: 2, class: int32regs }
+ - { id: 3, class: float64regs }
+ - { id: 4, class: float32regs }
+ - { id: 5, class: float32regs }
+ - { id: 6, class: float32regs }
+ - { id: 7, class: float32regs }
+body: |
+ bb.0.entry:
+ %0 = LD_f32_avar 0, 4, 1, 2, 32, $test2_param_0
+ %1 = CVT_f64_f32 %0, 0
+ %2 = LD_i32_avar 0, 4, 1, 0, 32, $test2_param_1
+ ; CHECK: %3 = FADD_rnf64ri %1, double 0x7FF8000000000000
+ %3 = FADD_rnf64ri %1, double 0x7FF8000000000000
+ %4 = CVT_f32_f64 %3, 5
+ %5 = CVT_f32_s32 %2, 5
+ ; CHECK: %6 = FADD_rnf32ri %5, float 0x7FF8000000000000
+ %6 = FADD_rnf32ri %5, float 0x7FF8000000000000
+ %7 = FMUL_rnf32rr %6, %4
+ StoreRetvalF32 %7, 0
+ Return
+...
diff --git a/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir b/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
new file mode 100644
index 000000000000..e4080f80ee52
--- /dev/null
+++ b/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=nvptx -mcpu=sm_20 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define float @test(float %k) {
+ entry:
+ %0 = fadd float %k, 3.250000e+00
+ ret float %0
+ }
+
+...
+---
+name: test
+registers:
+ - { id: 0, class: float32regs }
+ - { id: 1, class: float32regs }
+body: |
+ bb.0.entry:
+ %0 = LD_f32_avar 0, 4, 1, 2, 32, $test_param_0
+ ; CHECK: [[@LINE+1]]:33: floating point constant does not have type 'float'
+ %1 = FADD_rnf32ri %0, float 0xH3C00
+ StoreRetvalF32 %1, 0
+ Return
+...
diff --git a/test/CodeGen/MIR/NVPTX/lit.local.cfg b/test/CodeGen/MIR/NVPTX/lit.local.cfg
new file mode 100644
index 000000000000..2cb98eb371b2
--- /dev/null
+++ b/test/CodeGen/MIR/NVPTX/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'NVPTX' in config.root.targets:
+ config.unsupported = True
diff --git a/test/CodeGen/MIR/PowerPC/lit.local.cfg b/test/CodeGen/MIR/PowerPC/lit.local.cfg
new file mode 100644
index 000000000000..091332439b18
--- /dev/null
+++ b/test/CodeGen/MIR/PowerPC/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'PowerPC' in config.root.targets:
+ config.unsupported = True
diff --git a/test/CodeGen/MIR/PowerPC/unordered-implicit-registers.mir b/test/CodeGen/MIR/PowerPC/unordered-implicit-registers.mir
new file mode 100644
index 000000000000..39d14e72ffee
--- /dev/null
+++ b/test/CodeGen/MIR/PowerPC/unordered-implicit-registers.mir
@@ -0,0 +1,45 @@
+# RUN: llc -mtriple=powerpc64-unknown-linux-gnu -start-after machine-combiner -stop-after machine-combiner -o /dev/null %s | FileCheck %s
+# PR24724
+
+--- |
+ define signext i32 @main(i32* %p) #0 {
+ entry:
+ %0 = load i32, i32* %p, align 4
+ %or = or i32 0, %0
+ store i32 %or, i32* %p, align 4
+ %lnot.1 = icmp eq i32 undef, 0
+ %lnot.ext.1 = zext i1 %lnot.1 to i32
+ %shr.i.1 = lshr i32 2072, %lnot.ext.1
+ %call.lobit.1 = lshr i32 %shr.i.1, 7
+ %1 = and i32 %call.lobit.1, 1
+ %or.1 = or i32 %1, %or
+ ret i32 %or.1
+ }
+
+ attributes #0 = { nounwind "target-cpu"="ppc64" }
+...
+---
+name: main
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: g8rc_and_g8rc_nox0 }
+ - { id: 1, class: gprc }
+ - { id: 2, class: gprc }
+ - { id: 3, class: gprc }
+ - { id: 4, class: g8rc }
+liveins:
+ - { reg: '%x3', virtual-reg: '%0' }
+body: |
+ bb.0.entry:
+ liveins: %x3
+
+ %0 = COPY %x3
+ %1 = LWZ 0, %0 :: (load 4 from %ir.p)
+ %2 = LI 0
+ %3 = RLWIMI %2, killed %1, 0, 0, 31
+ %4 = EXTSW_32_64 killed %3
+ %x3 = COPY %4
+ ; CHECK: BLR8 implicit %lr8, implicit %rm, implicit %x3
+ BLR8 implicit %lr8, implicit %rm, implicit %x3
+...
diff --git a/test/CodeGen/MIR/X86/basic-block-liveins.mir b/test/CodeGen/MIR/X86/basic-block-liveins.mir
index d749a0524422..00732975495d 100644
--- a/test/CodeGen/MIR/X86/basic-block-liveins.mir
+++ b/test/CodeGen/MIR/X86/basic-block-liveins.mir
@@ -9,17 +9,54 @@
ret i32 %c
}
+ define i32 @test2(i32 %a, i32 %b) {
+ body:
+ %c = add i32 %a, %b
+ ret i32 %c
+ }
+
+ define i32 @test3() {
+ body:
+ ret i32 0
+ }
+
...
---
name: test
-body:
- # CHECK: name: body
- # CHECK: liveins: [ '%edi', '%esi' ]
- # CHECK-NEXT: instructions:
- - id: 0
- name: body
- liveins: [ '%edi', '%esi' ]
- instructions:
- - '%eax = LEA64_32r killed %rdi, 1, killed %rsi, 0, _'
- - 'RETQ %eax'
+body: |
+ ; CHECK-LABEL: bb.0.body:
+ ; CHECK-NEXT: liveins: %edi, %esi
+ bb.0.body:
+ liveins: %edi, %esi
+
+ %eax = LEA64_32r killed %rdi, 1, killed %rsi, 0, _
+ RETQ %eax
+...
+---
+name: test2
+body: |
+ ; CHECK-LABEL: name: test2
+ ; Verify that we can have multiple lists of liveins that will be merged into
+ ; one.
+ ; CHECK: bb.0.body:
+ ; CHECK-NEXT: liveins: %edi, %esi
+ bb.0.body:
+ liveins: %edi
+ liveins: %esi
+
+ %eax = LEA64_32r killed %rdi, 1, killed %rsi, 0, _
+ RETQ %eax
+...
+---
+name: test3
+body: |
+ ; Verify that we can have an empty list of liveins.
+ ; CHECK-LABEL: name: test3
+ ; CHECK: bb.0.body:
+ ; CHECK-NEXT: %eax = MOV32r0 implicit-def dead %eflags
+ bb.0.body:
+ liveins:
+
+ %eax = MOV32r0 implicit-def dead %eflags
+ RETQ killed %eax
...
diff --git a/test/CodeGen/MIR/X86/basic-block-not-at-start-of-line-error.mir b/test/CodeGen/MIR/X86/basic-block-not-at-start-of-line-error.mir
new file mode 100644
index 000000000000..b4b7dddea56c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/basic-block-not-at-start-of-line-error.mir
@@ -0,0 +1,41 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @foo(i32 %a) {
+ entry:
+ %0 = icmp sle i32 %a, 10
+ br i1 %0, label %less, label %exit
+
+ less: ; preds = %entry
+ ret i32 0
+
+ exit: ; preds = %entry
+ ret i32 %a
+ }
+
+...
+---
+name: foo
+tracksRegLiveness: true
+liveins:
+ - { reg: '%edi' }
+body: |
+ bb.0.entry:
+ successors: %bb.1.less, %bb.2.exit
+ liveins: %edi 44
+
+ CMP32ri8 %edi, 10, implicit-def %eflags
+ JG_1 %bb.2.exit, implicit killed %eflags
+
+ ; CHECK: [[@LINE+1]]:8: basic block definition should be located at the start of the line
+ less bb.1:
+ %eax = MOV32r0 implicit-def dead %eflags
+ RETQ killed %eax
+
+ bb.2.exit:
+ liveins: %edi
+
+ %eax = COPY killed %edi
+ RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/block-address-operands.mir b/test/CodeGen/MIR/X86/block-address-operands.mir
new file mode 100644
index 000000000000..3c2d2aefff20
--- /dev/null
+++ b/test/CodeGen/MIR/X86/block-address-operands.mir
@@ -0,0 +1,121 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the block address operands
+# correctly.
+
+--- |
+
+ @addr = global i8* null
+
+ define void @test() {
+ entry:
+ store volatile i8* blockaddress(@test, %block), i8** @addr
+ %val = load volatile i8*, i8** @addr
+ indirectbr i8* %val, [label %block]
+
+ block:
+ ret void
+ }
+
+ define void @test2() {
+ entry:
+ store volatile i8* blockaddress(@test2, %"quoted block"), i8** @addr
+ %val = load volatile i8*, i8** @addr
+ indirectbr i8* %val, [label %"quoted block"]
+
+ "quoted block":
+ ret void
+ }
+
+ define void @slot_in_other_function(i8** %addr) {
+ entry:
+ store volatile i8* blockaddress(@test3, %0), i8** %addr
+ ret void
+ }
+
+ define void @test3() {
+ entry:
+ store volatile i8* blockaddress(@test3, %0), i8** @addr
+ %val = load volatile i8*, i8** @addr
+ indirectbr i8* %val, [label %0]
+
+ ret void
+ }
+
+ define void @test4() {
+ entry:
+ store volatile i8* blockaddress(@test4, %block), i8** @addr
+ %val = load volatile i8*, i8** @addr
+ indirectbr i8* %val, [label %block]
+
+ block:
+ ret void
+ }
+
+...
+---
+name: test
+body: |
+ bb.0.entry:
+ successors: %bb.1.block
+ ; CHECK: %rax = LEA64r %rip, 1, _, blockaddress(@test, %ir-block.block), _
+ %rax = LEA64r %rip, 1, _, blockaddress(@test, %ir-block.block), _
+ MOV64mr %rip, 1, _, @addr, _, killed %rax
+ JMP64m %rip, 1, _, @addr, _
+
+ bb.1.block (address-taken):
+ RETQ
+...
+---
+name: test2
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ successors: %bb.1
+ ; CHECK: %rax = LEA64r %rip, 1, _, blockaddress(@test2, %ir-block."quoted block"), _
+ %rax = LEA64r %rip, 1, _, blockaddress(@test2, %ir-block."quoted block"), _
+ MOV64mr %rip, 1, _, @addr, _, killed %rax
+ JMP64m %rip, 1, _, @addr, _
+
+ bb.1 (address-taken):
+ RETQ
+...
+---
+name: slot_in_other_function
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK-LABEL: name: slot_in_other_function
+ ; CHECK: %rax = LEA64r %rip, 1, _, blockaddress(@test3, %ir-block.0), _
+ %rax = LEA64r %rip, 1, _, blockaddress(@test3, %ir-block.0), _
+ MOV64mr killed %rdi, 1, _, 0, _, killed %rax
+ RETQ
+...
+---
+name: test3
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ successors: %bb.1
+ ; CHECK-LABEL: name: test3
+ ; CHECK: %rax = LEA64r %rip, 1, _, blockaddress(@test3, %ir-block.0), _
+ %rax = LEA64r %rip, 1, _, blockaddress(@test3, %ir-block.0), _
+ MOV64mr %rip, 1, _, @addr, _, killed %rax
+ JMP64m %rip, 1, _, @addr, _
+
+ bb.1 (address-taken):
+ RETQ
+...
+---
+name: test4
+body: |
+ bb.0.entry:
+ successors: %bb.1.block
+ ; CHECK: %rax = LEA64r %rip, 1, _, blockaddress(@test, %ir-block.block) + 2, _
+ %rax = LEA64r %rip, 1, _, blockaddress(@test, %ir-block.block) + 2, _
+ MOV64mr %rip, 1, _, @addr, _, killed %rax
+ JMP64m %rip, 1, _, @addr, _
+
+ bb.1.block (address-taken):
+ RETQ
+...
diff --git a/test/CodeGen/MIR/X86/callee-saved-info.mir b/test/CodeGen/MIR/X86/callee-saved-info.mir
new file mode 100644
index 000000000000..17c7739951d9
--- /dev/null
+++ b/test/CodeGen/MIR/X86/callee-saved-info.mir
@@ -0,0 +1,95 @@
+# RUN: llc -march=x86-64 -start-after prologepilog -stop-after prologepilog -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses callee saved information in the
+# stack objects correctly.
+
+--- |
+
+ define i32 @compute(i32 %a) {
+ body:
+ ret i32 %a
+ }
+
+ define i32 @func(i32 %a) {
+ entry:
+ %b = alloca i32
+ store i32 %a, i32* %b
+ br label %check
+
+ check:
+ %comp = icmp sle i32 %a, 10
+ br i1 %comp, label %loop, label %exit
+
+ loop:
+ %c = load i32, i32* %b
+ %d = call i32 @compute(i32 %c)
+ %e = sub i32 %d, 1
+ store i32 %e, i32* %b
+ br label %check
+
+ exit:
+ ret i32 0
+ }
+
+...
+---
+name: compute
+tracksRegLiveness: true
+body: |
+ bb.0.body:
+ liveins: %edi
+
+ %eax = COPY killed %edi
+ RETQ killed %eax
+...
+---
+name: func
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 24
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+# CHECK: fixedStack:
+# CHECK-NEXT: , callee-saved-register: '%rbx' }
+fixedStack:
+ - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%rbx' }
+# CHECK: stack:
+# CHECK-NEXT: - { id: 0
+# CHECK-NEXT: , callee-saved-register: '%edi' }
+stack:
+ - { id: 0, name: b, offset: -20, size: 4, alignment: 4 }
+ - { id: 1, offset: -24, size: 4, alignment: 4, callee-saved-register: '%edi' }
+body: |
+ bb.0.entry:
+ successors: %bb.1.check
+ liveins: %edi, %rbx
+
+ frame-setup PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+ %rsp = frame-setup SUB64ri8 %rsp, 16, implicit-def dead %eflags
+ %ebx = COPY %edi
+ MOV32mr %rsp, 1, _, 12, _, %ebx
+
+ bb.1.check:
+ successors: %bb.2.loop, %bb.3.exit
+ liveins: %ebx
+
+ CMP32ri8 %ebx, 10, implicit-def %eflags
+ JG_1 %bb.3.exit, implicit killed %eflags
+ JMP_1 %bb.2.loop
+
+ bb.2.loop:
+ successors: %bb.1.check
+ liveins: %ebx
+
+ %edi = MOV32rm %rsp, 1, _, 12, _
+ CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+ %eax = DEC32r killed %eax, implicit-def dead %eflags
+ MOV32mr %rsp, 1, _, 12, _, killed %eax
+ JMP_1 %bb.1.check
+
+ bb.3.exit:
+ %eax = MOV32r0 implicit-def dead %eflags
+ %rsp = ADD64ri8 %rsp, 16, implicit-def dead %eflags
+ %rbx = POP64r implicit-def %rsp, implicit %rsp
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir b/test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir
new file mode 100644
index 000000000000..47051a53e3f4
--- /dev/null
+++ b/test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir
@@ -0,0 +1,29 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the .cfi_def_cfa_offset operands
+# correctly.
+
+--- |
+
+ define void @test() {
+ entry:
+ %tmp = alloca [4168 x i8], align 4
+ ret void
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 4040
+stack:
+ - { id: 0, name: tmp, offset: -4176, size: 4168, alignment: 4 }
+body: |
+ bb.0.entry:
+ %rsp = SUB64ri32 %rsp, 4040, implicit-def dead %eflags
+ ; CHECK: CFI_INSTRUCTION .cfi_def_cfa_offset 4048
+ CFI_INSTRUCTION .cfi_def_cfa_offset 4048
+ %rsp = ADD64ri32 %rsp, 4040, implicit-def dead %eflags
+ RETQ
+...
+
diff --git a/test/CodeGen/MIR/X86/cfi-def-cfa-register.mir b/test/CodeGen/MIR/X86/cfi-def-cfa-register.mir
new file mode 100644
index 000000000000..74a33b5c3437
--- /dev/null
+++ b/test/CodeGen/MIR/X86/cfi-def-cfa-register.mir
@@ -0,0 +1,32 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the .cfi_def_cfa_register
+# operands correctly.
+
+--- |
+
+ define void @func() #0 {
+ entry:
+ unreachable
+ }
+
+ attributes #0 = { "no-frame-pointer-elim"="true" }
+
+...
+---
+name: func
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 8
+fixedStack:
+ - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16 }
+body: |
+ bb.0.entry:
+ liveins: %rbp
+
+ PUSH64r killed %rbp, implicit-def %rsp, implicit %rsp
+ CFI_INSTRUCTION .cfi_def_cfa_offset 16
+ CFI_INSTRUCTION .cfi_offset %rbp, -16
+ %rbp = MOV64rr %rsp
+ ; CHECK: CFI_INSTRUCTION .cfi_def_cfa_register %rbp
+ CFI_INSTRUCTION .cfi_def_cfa_register %rbp
+...
diff --git a/test/CodeGen/MIR/X86/cfi-offset.mir b/test/CodeGen/MIR/X86/cfi-offset.mir
new file mode 100644
index 000000000000..fd9e605a036a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/cfi-offset.mir
@@ -0,0 +1,47 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the .cfi_offset operands
+# correctly.
+
+--- |
+
+ declare void @foo(i32)
+
+ define i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) {
+ entry:
+ %add = add nsw i32 %b, %a
+ %add1 = add nsw i32 %add, %c
+ %add2 = add nsw i32 %add1, %d
+ tail call void @foo(i32 %add2)
+ %add6 = add nsw i32 %add2, %add2
+ ret i32 %add6
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 8
+ adjustsStack: true
+ hasCalls: true
+fixedStack:
+ - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16 }
+body: |
+ bb.0.entry:
+ liveins: %ecx, %edi, %edx, %esi, %rbx
+
+ PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+ CFI_INSTRUCTION .cfi_def_cfa_offset 16
+ ; CHECK: CFI_INSTRUCTION .cfi_offset %rbx, -16
+ CFI_INSTRUCTION .cfi_offset %rbx, -16
+ %ebx = COPY %edi, implicit-def %rbx
+ %ebx = ADD32rr %ebx, killed %esi, implicit-def dead %eflags
+ %ebx = ADD32rr %ebx, killed %edx, implicit-def dead %eflags
+ %ebx = ADD32rr %ebx, killed %ecx, implicit-def dead %eflags
+ %edi = COPY %ebx
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp
+ %eax = LEA64_32r killed %rbx, 1, %rbx, 0, _
+ %rbx = POP64r implicit-def %rsp, implicit %rsp
+ RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/constant-pool-item-redefinition-error.mir b/test/CodeGen/MIR/X86/constant-pool-item-redefinition-error.mir
new file mode 100644
index 000000000000..2ddf5736b977
--- /dev/null
+++ b/test/CodeGen/MIR/X86/constant-pool-item-redefinition-error.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define double @test(double %a, float %b) {
+ entry:
+ %c = fadd double %a, 3.250000e+00
+ ret double %c
+ }
+
+...
+---
+name: test
+constants:
+ - id: 0
+ value: 'double 3.250000e+00'
+# CHECK: [[@LINE+1]]:18: redefinition of constant pool item '%const.0'
+ - id: 0
+ value: 'double 3.250000e+00'
+body: |
+ bb.0.entry:
+ %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+ RETQ %xmm0
+...
+
diff --git a/test/CodeGen/MIR/X86/constant-pool.mir b/test/CodeGen/MIR/X86/constant-pool.mir
new file mode 100644
index 000000000000..213e4e283485
--- /dev/null
+++ b/test/CodeGen/MIR/X86/constant-pool.mir
@@ -0,0 +1,139 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses constant pool constants and
+# constant pool operands correctly.
+
+--- |
+
+ define double @test(double %a, float %b) {
+ entry:
+ %c = fadd double %a, 3.250000e+00
+ %d = fadd float %b, 6.250000e+00
+ %e = fpext float %d to double
+ %f = fmul double %c, %e
+ ret double %f
+ }
+
+ define double @test2(double %a, float %b) {
+ entry:
+ %c = fadd double %a, 3.250000e+00
+ %d = fadd float %b, 6.250000e+00
+ %e = fpext float %d to double
+ %f = fmul double %c, %e
+ ret double %f
+ }
+
+ define double @test3(double %a, float %b) {
+ entry:
+ %c = fadd double %a, 3.250000e+00
+ %d = fadd float %b, 6.250000e+00
+ %e = fpext float %d to double
+ %f = fmul double %c, %e
+ ret double %f
+ }
+
+ define double @test4(double %a, float %b) {
+ entry:
+ %c = fadd double %a, 3.250000e+00
+ %d = fadd float %b, 6.250000e+00
+ %e = fpext float %d to double
+ %f = fmul double %c, %e
+ ret double %f
+ }
+...
+---
+# CHECK: name: test
+# CHECK: constants:
+# CHECK-NEXT: - id: 0
+# CHECK-NEXT: value: 'double 3.250000e+00'
+# CHECK-NEXT: alignment: 8
+# CHECK-NEXT: - id: 1
+# CHECK-NEXT: value: 'float 6.250000e+00'
+# CHECK-NEXT: alignment: 4
+name: test
+constants:
+ - id: 0
+ value: 'double 3.250000e+00'
+ alignment: 8
+ - id: 1
+ value: 'float 6.250000e+00'
+ alignment: 4
+body: |
+ bb.0.entry:
+ ; CHECK: %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+ ; CHECK-NEXT: %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.1, _
+ %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+ %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.1, _
+ %xmm1 = CVTSS2SDrr killed %xmm1
+ %xmm0 = MULSDrr killed %xmm0, killed %xmm1
+ RETQ %xmm0
+...
+---
+# Verify that alignment can be inferred:
+# CHECK: name: test2
+# CHECK: constants:
+# CHECK-NEXT: - id: 0
+# CHECK-NEXT: value: 'double 3.250000e+00'
+# CHECK-NEXT: alignment: 8
+# CHECK-NEXT: - id: 1
+# CHECK-NEXT: value: 'float 6.250000e+00'
+# CHECK-NEXT: alignment: 4
+name: test2
+constants:
+ - id: 0
+ value: 'double 3.250000e+00'
+ - id: 1
+ value: 'float 6.250000e+00'
+body: |
+ bb.0.entry:
+ %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+ %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.1, _
+ %xmm1 = CVTSS2SDrr killed %xmm1
+ %xmm0 = MULSDrr killed %xmm0, killed %xmm1
+ RETQ %xmm0
+...
+---
+# Verify that the non-standard alignments are respected:
+# CHECK: name: test3
+# CHECK: constants:
+# CHECK-NEXT: - id: 0
+# CHECK-NEXT: value: 'double 3.250000e+00'
+# CHECK-NEXT: alignment: 128
+# CHECK-NEXT: - id: 1
+# CHECK-NEXT: value: 'float 6.250000e+00'
+# CHECK-NEXT: alignment: 1
+name: test3
+constants:
+ - id: 0
+ value: 'double 3.250000e+00'
+ alignment: 128
+ - id: 1
+ value: 'float 6.250000e+00'
+ alignment: 1
+body: |
+ bb.0.entry:
+ ; CHECK: %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+ ; CHECK-NEXT: %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.1, _
+ %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+ %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.1, _
+ %xmm1 = CVTSS2SDrr killed %xmm1
+ %xmm0 = MULSDrr killed %xmm0, killed %xmm1
+ RETQ %xmm0
+...
+---
+# CHECK: name: test4
+name: test4
+constants:
+ - id: 0
+ value: 'double 3.250000e+00'
+ - id: 1
+ value: 'float 6.250000e+00'
+body: |
+ bb.0.entry:
+ ; CHECK: %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.1 - 12, _
+ ; CHECK-NEXT: %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.0 + 8, _
+ %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.1 - 12, _
+ %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.0 + 8, _
+ %xmm1 = CVTSS2SDrr killed %xmm1
+ %xmm0 = MULSDrr killed %xmm0, killed %xmm1
+ RETQ %xmm0
+...
diff --git a/test/CodeGen/MIR/X86/constant-value-error.mir b/test/CodeGen/MIR/X86/constant-value-error.mir
new file mode 100644
index 000000000000..1e14d2282c5a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/constant-value-error.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that the MIR parser reports an error when parsing an invalid
+# constant value.
+
+--- |
+
+ define double @test(double %a, float %b) {
+ entry:
+ %c = fadd double %a, 3.250000e+00
+ ret double %c
+ }
+
+...
+---
+name: test
+constants:
+ - id: 0
+ # CHECK: [[@LINE+1]]:19: expected type
+ value: 'dub 3.250000e+00'
+body: |
+ bb.0.entry:
+ %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+ RETQ %xmm0
+...
+
diff --git a/test/CodeGen/MIR/X86/dead-register-flag.mir b/test/CodeGen/MIR/X86/dead-register-flag.mir
index 988b554659cb..309e776de46a 100644
--- a/test/CodeGen/MIR/X86/dead-register-flag.mir
+++ b/test/CodeGen/MIR/X86/dead-register-flag.mir
@@ -15,12 +15,10 @@
...
---
name: foo
-body:
- # CHECK: name: body
- - id: 0
- name: body
- instructions:
- # CHECK: - '%eax = IMUL32rri8 %edi, 11, implicit-def dead %eflags'
- - '%eax = IMUL32rri8 %edi, 11, implicit-def dead %eflags'
- - 'RETQ %eax'
+body: |
+ ; CHECK: bb.0.body:
+ bb.0.body:
+ ; CHECK: %eax = IMUL32rri8 %edi, 11, implicit-def dead %eflags
+ %eax = IMUL32rri8 %edi, 11, implicit-def dead %eflags
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/def-register-already-tied-error.mir b/test/CodeGen/MIR/X86/def-register-already-tied-error.mir
new file mode 100644
index 000000000000..69c816f59b9b
--- /dev/null
+++ b/test/CodeGen/MIR/X86/def-register-already-tied-error.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+ define i64 @test(i64 %x) #0 {
+ entry:
+ %asm = tail call i64 asm sideeffect "$foo", "=r,0"(i64 %x) nounwind
+ ret i64 %asm
+ }
+
+ attributes #0 = { nounwind }
+...
+---
+name: test
+hasInlineAsm: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+
+ ; CHECK: [[@LINE+1]]:83: the tied-def operand #3 is already tied with another register operand
+ INLINEASM $"$foo", 1, 2818058, def %rdi, 2147483657, killed %rdi(tied-def 3), killed %rdi(tied-def 3)
+ %rax = COPY killed %rdi
+ RETQ killed %rax
+...
diff --git a/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir b/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir
new file mode 100644
index 000000000000..7d01810c792b
--- /dev/null
+++ b/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir
@@ -0,0 +1,27 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @volatile_inc(i32* %x) {
+ entry:
+ %0 = load volatile i32, i32* %x
+ %1 = add i32 %0, 1
+ store volatile i32 %1, i32* %x
+ ret i32 %1
+ }
+
+...
+---
+name: volatile_inc
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: [[@LINE+1]]:50: duplicate 'volatile' memory operand flag
+ %eax = MOV32rm %rdi, 1, _, 0, _ :: (volatile volatile load 4 from %ir.x)
+ %eax = INC32r killed %eax, implicit-def dead %eflags
+ MOV32mr killed %rdi, 1, _, 0, _, %eax :: (volatile store 4 into %ir.x)
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/duplicate-register-flag-error.mir b/test/CodeGen/MIR/X86/duplicate-register-flag-error.mir
new file mode 100644
index 000000000000..d80c6ed061de
--- /dev/null
+++ b/test/CodeGen/MIR/X86/duplicate-register-flag-error.mir
@@ -0,0 +1,35 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @foo(i32 %a) {
+ entry:
+ %0 = icmp sle i32 %a, 10
+ br i1 %0, label %less, label %exit
+
+ less:
+ ret i32 0
+
+ exit:
+ ret i32 %a
+ }
+
+...
+---
+name: foo
+body: |
+ bb.0.entry:
+ successors: %bb.1.less, %bb.2.exit
+
+ CMP32ri8 %edi, 10, implicit-def %eflags
+ ; CHECK: [[@LINE+1]]:31: duplicate 'implicit' register flag
+ JG_1 %bb.2.exit, implicit implicit %eflags
+
+ bb.1.less:
+ %eax = MOV32r0 implicit-def %eflags
+ RETQ %eax
+
+ bb.2.exit:
+ %eax = COPY %edi
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/early-clobber-register-flag.mir b/test/CodeGen/MIR/X86/early-clobber-register-flag.mir
new file mode 100644
index 000000000000..4dc442e4fb94
--- /dev/null
+++ b/test/CodeGen/MIR/X86/early-clobber-register-flag.mir
@@ -0,0 +1,45 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the 'early-clobber' register
+# flags correctly.
+
+--- |
+
+ declare void @foo(i32)
+
+ define void @test(i32 %a, i32 %b) #0 {
+ entry:
+ %c = add i32 %a, %b
+ call void asm sideeffect "nop", "~{ax},~{di}"()
+ call void @foo(i32 %c)
+ ret void
+ }
+
+ attributes #0 = { optsize }
+
+...
+---
+name: test
+hasInlineAsm: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '%edi' }
+ - { reg: '%esi' }
+frameInfo:
+ stackSize: 8
+ adjustsStack: true
+ hasCalls: true
+body: |
+ bb.0.entry:
+ liveins: %edi, %esi
+
+ frame-setup PUSH64r undef %rax, implicit-def %rsp, implicit %rsp
+ CFI_INSTRUCTION .cfi_def_cfa_offset 16
+ %ecx = COPY %edi
+ %ecx = ADD32rr killed %ecx, killed %esi, implicit-def dead %eflags
+ ; CHECK: INLINEASM $nop, 1, 12, implicit-def dead early-clobber %ax, 12, implicit-def dead early-clobber %di
+ INLINEASM $nop, 1, 12, implicit-def dead early-clobber %ax, 12, implicit-def dead early-clobber %di
+ %edi = COPY killed %ecx
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp
+ %rax = POP64r implicit-def %rsp, implicit %rsp
+ RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir b/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir
new file mode 100644
index 000000000000..f2e349454c5d
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define void @memory_alignment(<8 x float>* %vec) {
+ entry:
+ %v = load <8 x float>, <8 x float>* %vec
+ %v2 = insertelement <8 x float> %v, float 0.0, i32 4
+ store <8 x float> %v2, <8 x float>* %vec
+ ret void
+ }
+
+...
+---
+name: memory_alignment
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: [[@LINE+1]]:65: expected 'align'
+ %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec, 32)
+ %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, align 32)
+ %xmm2 = FsFLD0SS
+ %xmm1 = MOVSSrr killed %xmm1, killed %xmm2
+ MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec, align 32)
+ MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16, align 32)
+ RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir b/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir
new file mode 100644
index 000000000000..7ce377f8c5fb
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define void @memory_alignment(<8 x float>* %vec) {
+ entry:
+ %v = load <8 x float>, <8 x float>* %vec
+ %v2 = insertelement <8 x float> %v, float 0.0, i32 4
+ store <8 x float> %v2, <8 x float>* %vec
+ ret void
+ }
+
+...
+---
+name: memory_alignment
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: [[@LINE+1]]:70: expected an integer literal after 'align'
+ %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec, align)
+ %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, align 32)
+ %xmm2 = FsFLD0SS
+ %xmm1 = MOVSSrr killed %xmm1, killed %xmm2
+ MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec, align 32)
+ MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16, align 32)
+ RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-basic-block-at-start-of-body.mir b/test/CodeGen/MIR/X86/expected-basic-block-at-start-of-body.mir
new file mode 100644
index 000000000000..861baec4bcbc
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-basic-block-at-start-of-body.mir
@@ -0,0 +1,40 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @foo(i32 %a) {
+ entry:
+ %0 = icmp sle i32 %a, 10
+ br i1 %0, label %less, label %exit
+
+ less: ; preds = %entry
+ ret i32 0
+
+ exit: ; preds = %entry
+ ret i32 %a
+ }
+
+...
+---
+name: foo
+tracksRegLiveness: true
+liveins:
+ - { reg: '%edi' }
+body: |
+ ; CHECK: [[@LINE+1]]:3: expected a basic block definition before instructions
+ successors: %bb.1.less, %bb.2.exit
+ liveins: %edi 44
+
+ CMP32ri8 %edi, 10, implicit-def %eflags
+ JG_1 %bb.2.exit, implicit killed %eflags
+
+ bb.1.less:
+ %eax = MOV32r0 implicit-def dead %eflags
+ RETQ killed %eax
+
+ bb.2.exit:
+ liveins: %edi
+
+ %eax = COPY killed %edi
+ RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir b/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir
new file mode 100644
index 000000000000..ef7df4c8c20f
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ @addr = global i8* null
+
+ define void @test() {
+ entry:
+ store volatile i8* blockaddress(@test, %block), i8** @addr
+ %val = load volatile i8*, i8** @addr
+ indirectbr i8* %val, [label %block]
+
+ block:
+ ret void
+ }
+
+...
+---
+name: test
+body: |
+ bb.0.entry:
+ successors: %bb.1.block
+ ; CHECK: [[@LINE+1]]:51: expected an IR block reference
+ %rax = LEA64r %rip, 1, _, blockaddress(@test, _), _
+ MOV64mr %rip, 1, _, @addr, _, killed %rax
+ JMP64m %rip, 1, _, @addr, _
+
+ bb.1.block (address-taken):
+ RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-comma-after-cfi-register.mir b/test/CodeGen/MIR/X86/expected-comma-after-cfi-register.mir
new file mode 100644
index 000000000000..ba7b2ab64c3e
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-comma-after-cfi-register.mir
@@ -0,0 +1,42 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ declare void @foo(i32)
+
+ define i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) {
+ entry:
+ %add = add nsw i32 %b, %a
+ %add1 = add nsw i32 %add, %c
+ %add2 = add nsw i32 %add1, %d
+ tail call void @foo(i32 %add2)
+ %add6 = add nsw i32 %add2, %add2
+ ret i32 %add6
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 8
+ adjustsStack: true
+ hasCalls: true
+fixedStack:
+ - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16 }
+body: |
+ bb.0.entry:
+ PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+ CFI_INSTRUCTION .cfi_def_cfa_offset 16
+ ; CHECK: [[@LINE+1]]:38: expected ','
+ CFI_INSTRUCTION .cfi_offset %rbx -16
+ %ebx = COPY %edi, implicit-def %rbx
+ %ebx = ADD32rr %ebx, killed %esi, implicit-def dead %eflags
+ %ebx = ADD32rr %ebx, killed %edx, implicit-def dead %eflags
+ %ebx = ADD32rr %ebx, killed %ecx, implicit-def dead %eflags
+ %edi = COPY %ebx
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp
+ %eax = LEA64_32r killed %rbx, 1, %rbx, 0, _
+ %rbx = POP64r implicit-def %rsp, implicit %rsp
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir b/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir
new file mode 100644
index 000000000000..dd5693952573
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define void @test(i32* %a) {
+ entry2:
+ %b = load i32, i32* %a
+ %c = add i32 %b, 1
+ store i32 %c, i32* %a
+ ret void
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry2:
+ liveins: %rdi
+ ; CHECK: [[@LINE+1]]:87: expected ',' before the next machine memory operand
+ INC32m killed %rdi, 1, _, 0, _, implicit-def dead %eflags :: (store 4 into %ir.a) (load 4 from %ir.a)
+ RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-different-implicit-operand.mir b/test/CodeGen/MIR/X86/expected-different-implicit-operand.mir
index c5f5aaca34e0..601551a7720a 100644
--- a/test/CodeGen/MIR/X86/expected-different-implicit-operand.mir
+++ b/test/CodeGen/MIR/X86/expected-different-implicit-operand.mir
@@ -19,20 +19,16 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- - '%eax = MOV32rm %rdi, 1, _, 0, _'
- - 'CMP32ri8 %eax, 10, implicit-def %eflags'
-# CHECK: [[@LINE+1]]:26: expected an implicit register operand 'implicit %eflags'
- - 'JG_1 %bb.2.exit, implicit %eax'
- - id: 1
- name: less
- instructions:
- - '%eax = MOV32r0 implicit-def %eflags'
- - id: 2
- name: exit
- instructions:
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ %eax = MOV32rm %rdi, 1, _, 0, _
+ CMP32ri8 %eax, 10, implicit-def %eflags
+ ; CHECK: [[@LINE+1]]:35: missing implicit register operand 'implicit %eflags'
+ JG_1 %bb.2.exit, implicit %eax
+
+ bb.1.less:
+ %eax = MOV32r0 implicit-def %eflags
+
+ bb.2.exit:
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/expected-different-implicit-register-flag.mir b/test/CodeGen/MIR/X86/expected-different-implicit-register-flag.mir
index ecf3a122bf66..6494960d3264 100644
--- a/test/CodeGen/MIR/X86/expected-different-implicit-register-flag.mir
+++ b/test/CodeGen/MIR/X86/expected-different-implicit-register-flag.mir
@@ -19,20 +19,16 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- - '%eax = MOV32rm %rdi, 1, _, 0, _'
- - 'CMP32ri8 %eax, 10, implicit-def %eflags'
-# CHECK: [[@LINE+1]]:26: expected an implicit register operand 'implicit %eflags'
- - 'JG_1 %bb.2.exit, implicit-def %eflags'
- - id: 1
- name: less
- instructions:
- - '%eax = MOV32r0 implicit-def %eflags'
- - id: 2
- name: exit
- instructions:
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ %eax = MOV32rm %rdi, 1, _, 0, _
+ CMP32ri8 %eax, 10, implicit-def %eflags
+ ; CHECK: [[@LINE+1]]:42: missing implicit register operand 'implicit %eflags'
+ JG_1 %bb.2.exit, implicit-def %eflags
+
+ bb.1.less:
+ %eax = MOV32r0 implicit-def %eflags
+
+ bb.2.exit:
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/expected-from-in-memory-operand.mir b/test/CodeGen/MIR/X86/expected-from-in-memory-operand.mir
new file mode 100644
index 000000000000..f9e9d0b22968
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-from-in-memory-operand.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32* %a) {
+ entry:
+ %b = load i32, i32* %a
+ ret i32 %b
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: [[@LINE+1]]:55: expected 'from'
+ %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 4 %ir.a)
+ RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir b/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir
new file mode 100644
index 000000000000..de6a745fd702
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ @addr = global i8* null
+
+ define void @test() {
+ entry:
+ store volatile i8* blockaddress(@test, %block), i8** @addr
+ %val = load volatile i8*, i8** @addr
+ indirectbr i8* %val, [label %block]
+
+ block:
+ ret void
+ }
+
+...
+---
+name: test
+body: |
+ bb.0.entry:
+ successors: %bb.1.block
+ ; CHECK: [[@LINE+1]]:44: expected an IR function reference
+ %rax = LEA64r %rip, 1, _, blockaddress(@addr, %ir-block.block), _
+ MOV64mr %rip, 1, _, @addr, _, killed %rax
+ JMP64m %rip, 1, _, @addr, _
+
+ bb.1.block (address-taken):
+ RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir b/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir
new file mode 100644
index 000000000000..f737c06c3e1e
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ @addr = global i8* null
+
+ define void @test() {
+ entry:
+ store volatile i8* blockaddress(@test, %block), i8** @addr
+ %val = load volatile i8*, i8** @addr
+ indirectbr i8* %val, [label %block]
+
+ block:
+ ret void
+ }
+
+...
+---
+name: test
+body: |
+ bb.0.entry:
+ successors: %bb.1.block
+ ; CHECK: [[@LINE+1]]:44: expected a global value
+ %rax = LEA64r %rip, 1, _, blockaddress(0, %ir-block.block), _
+ MOV64mr %rip, 1, _, @addr, _, killed %rax
+ JMP64m %rip, 1, _, @addr, _
+
+ bb.1.block (address-taken):
+ RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-integer-after-offset-sign.mir b/test/CodeGen/MIR/X86/expected-integer-after-offset-sign.mir
new file mode 100644
index 000000000000..e337292f17a2
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-integer-after-offset-sign.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ @G = external global i32
+
+ define i32 @inc() {
+ entry:
+ %a = load i32, i32* @G
+ %b = add i32 %a, 1
+ ret i32 %b
+ }
+
+...
+---
+name: inc
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:37: expected an integer literal after '+'
+ %rax = MOV64rm %rip, 1, _, @G + , _
+ %eax = MOV32rm %rax, 1, _, 0, _
+ %eax = INC32r %eax, implicit-def %eflags
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-integer-after-tied-def.mir b/test/CodeGen/MIR/X86/expected-integer-after-tied-def.mir
new file mode 100644
index 000000000000..580d2bc0a419
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-integer-after-tied-def.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+ define i64 @test(i64 %x) #0 {
+ entry:
+ %asm = tail call i64 asm sideeffect "$foo", "=r,0"(i64 %x) nounwind
+ ret i64 %asm
+ }
+
+ attributes #0 = { nounwind }
+...
+---
+name: test
+hasInlineAsm: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+
+ ; CHECK: [[@LINE+1]]:78: expected an integer literal after 'tied-def'
+ INLINEASM $"$foo", 1, 2818058, def %rdi, 2147483657, killed %rdi(tied-def)
+ %rax = COPY killed %rdi
+ RETQ killed %rax
+...
diff --git a/test/CodeGen/MIR/X86/expected-integer-in-successor-weight.mir b/test/CodeGen/MIR/X86/expected-integer-in-successor-weight.mir
new file mode 100644
index 000000000000..83874eb67476
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-integer-in-successor-weight.mir
@@ -0,0 +1,38 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @foo(i32 %a) {
+ entry:
+ %0 = icmp sle i32 %a, 10
+ br i1 %0, label %less, label %exit
+
+ less:
+ ret i32 0
+
+ exit:
+ ret i32 %a
+ }
+
+...
+---
+name: foo
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:29: expected an integer literal after '('
+ successors: %bb.1.less (_), %bb.2.exit(32)
+ liveins: %edi
+
+ CMP32ri8 %edi, 10, implicit-def %eflags
+ JG_1 %bb.2.exit, implicit killed %eflags
+
+ bb.1.less:
+ %eax = MOV32r0 implicit-def dead %eflags
+ RETQ killed %eax
+
+ bb.2.exit:
+ liveins: %edi
+
+ %eax = COPY killed %edi
+ RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-load-or-store-in-memory-operand.mir b/test/CodeGen/MIR/X86/expected-load-or-store-in-memory-operand.mir
new file mode 100644
index 000000000000..8fcd622a18e6
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-load-or-store-in-memory-operand.mir
@@ -0,0 +1,23 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32* %a) {
+ entry:
+ %b = load i32, i32* %a
+ ret i32 %b
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: [[@LINE+1]]:48: expected 'load' or 'store' memory operation
+ %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (4 from %ir.a)
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-machine-operand.mir b/test/CodeGen/MIR/X86/expected-machine-operand.mir
index 3725c93cd3ea..3ba5126b9982 100644
--- a/test/CodeGen/MIR/X86/expected-machine-operand.mir
+++ b/test/CodeGen/MIR/X86/expected-machine-operand.mir
@@ -10,12 +10,10 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: [[@LINE+1]]:24: expected a machine operand
- - '%eax = XOR32rr ='
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:20: expected a machine operand
+ %eax = XOR32rr =
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/expected-metadata-node-after-debug-location.mir b/test/CodeGen/MIR/X86/expected-metadata-node-after-debug-location.mir
new file mode 100644
index 000000000000..620bb5d961ee
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-metadata-node-after-debug-location.mir
@@ -0,0 +1,59 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32 %x) #0 !dbg !4 {
+ entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+ %0 = load i32, i32* %x.addr, align 4, !dbg !15
+ ret i32 %0, !dbg !15
+ }
+
+ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+ attributes #0 = { nounwind "no-frame-pointer-elim"="false" }
+ attributes #1 = { nounwind readnone }
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!9, !10}
+ !llvm.ident = !{!11}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+ !1 = !DIFile(filename: "test.ll", directory: "")
+ !2 = !{}
+ !3 = !{!4}
+ !4 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 4, type: !6, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+ !5 = !DIFile(filename: "test.c", directory: "")
+ !6 = !DISubroutineType(types: !7)
+ !7 = !{!8, !8}
+ !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+ !9 = !{i32 2, !"Dwarf Version", i32 4}
+ !10 = !{i32 2, !"Debug Info Version", i32 3}
+ !11 = !{!"clang version 3.7.0"}
+ !12 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 4, type: !8)
+ !13 = !DIExpression()
+ !14 = !DILocation(line: 4, scope: !4)
+ !15 = !DILocation(line: 8, scope: !4)
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, name: x.addr, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ %0 = COPY %edi
+ ; CHECK: [[@LINE+1]]:46: expected a metadata node after 'debug-location'
+ DBG_VALUE _, 0, !12, !13, debug-location 14
+ MOV32mr %stack.x.addr, 1, _, 0, _, %0
+ %eax = COPY %0
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-metadata-node-after-exclaim.mir b/test/CodeGen/MIR/X86/expected-metadata-node-after-exclaim.mir
new file mode 100644
index 000000000000..6497f5db2026
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-metadata-node-after-exclaim.mir
@@ -0,0 +1,59 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32 %x) #0 !dbg !4 {
+ entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+ %0 = load i32, i32* %x.addr, align 4, !dbg !15
+ ret i32 %0, !dbg !15
+ }
+
+ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+ attributes #0 = { nounwind "no-frame-pointer-elim"="false" }
+ attributes #1 = { nounwind readnone }
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!9, !10}
+ !llvm.ident = !{!11}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+ !1 = !DIFile(filename: "test.ll", directory: "")
+ !2 = !{}
+ !3 = !{!4}
+ !4 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 4, type: !6, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+ !5 = !DIFile(filename: "test.c", directory: "")
+ !6 = !DISubroutineType(types: !7)
+ !7 = !{!8, !8}
+ !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+ !9 = !{i32 2, !"Dwarf Version", i32 4}
+ !10 = !{i32 2, !"Debug Info Version", i32 3}
+ !11 = !{!"clang version 3.7.0"}
+ !12 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 4, type: !8)
+ !13 = !DIExpression()
+ !14 = !DILocation(line: 4, scope: !4)
+ !15 = !DILocation(line: 8, scope: !4)
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, name: x.addr, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ %0 = COPY %edi
+ ; CHECK: [[@LINE+1]]:28: expected metadata id after '!'
+ DBG_VALUE _, 0, !12, ! _
+ MOV32mr %stack.0.x.addr, 1, _, 0, _, %0
+ %eax = COPY %0
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir b/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir
new file mode 100644
index 000000000000..9a4696779fb5
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+ define i32 @test(i32 %x) {
+ entry:
+ %xa = alloca i32, align 4
+ store i32 %x, i32* %xa, align 4
+ %0 = load i32, i32* %xa, align 4
+ ret i32 %0
+ }
+...
+---
+name: test
+liveins:
+ - { reg: '%edi' }
+stack:
+# CHECK: [[@LINE+1]]:74: expected a metadata node
+ - { id: 0, name: xa, offset: -12, size: 4, alignment: 4, di-variable: '0' }
+body: |
+ bb.0.entry:
+ liveins: %edi
+
+ MOV32mr %rsp, 1, _, -4, _, %edi :: (store 4 into %ir.xa)
+ %eax = COPY killed %edi
+ RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-named-register-in-allocation-hint.mir b/test/CodeGen/MIR/X86/expected-named-register-in-allocation-hint.mir
new file mode 100644
index 000000000000..04568f6dde57
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-named-register-in-allocation-hint.mir
@@ -0,0 +1,29 @@
+# RUN: not llc -march=x86-64 -start-after machine-scheduler -stop-after machine-scheduler -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32 %a, i32 %b) {
+ body:
+ %c = mul i32 %a, %b
+ ret i32 %c
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+ # CHECK: [[@LINE+1]]:48: expected a named register
+ - { id: 1, class: gr32, preferred-register: '%0' }
+ - { id: 2, class: gr32, preferred-register: '%edi' }
+body: |
+ bb.0.body:
+ liveins: %edi, %esi
+
+ %1 = COPY %esi
+ %2 = COPY %edi
+ %2 = IMUL32rr %2, %1, implicit-def dead %eflags
+ %eax = COPY %2
+ RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-named-register-in-callee-saved-register.mir b/test/CodeGen/MIR/X86/expected-named-register-in-callee-saved-register.mir
new file mode 100644
index 000000000000..be57734ecf33
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-named-register-in-callee-saved-register.mir
@@ -0,0 +1,88 @@
+# RUN: not llc -march=x86-64 -start-after prologepilog -stop-after prologepilog -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @compute(i32 %a) {
+ body:
+ ret i32 %a
+ }
+
+ define i32 @func(i32 %a) {
+ entry:
+ %b = alloca i32
+ store i32 %a, i32* %b
+ br label %check
+
+ check:
+ %comp = icmp sle i32 %a, 10
+ br i1 %comp, label %loop, label %exit
+
+ loop:
+ %c = load i32, i32* %b
+ %d = call i32 @compute(i32 %c)
+ %e = sub i32 %d, 1
+ store i32 %e, i32* %b
+ br label %check
+
+ exit:
+ ret i32 0
+ }
+
+...
+---
+name: compute
+tracksRegLiveness: true
+body: |
+ bb.0.body:
+ liveins: %edi
+
+ %eax = COPY killed %edi
+ RETQ killed %eax
+...
+---
+name: func
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 24
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+fixedStack:
+ # CHECK: [[@LINE+1]]:93: expected a named register
+ - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%0' }
+stack:
+ - { id: 0, name: b, offset: -20, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ successors: %bb.1.check
+ liveins: %edi, %rbx
+
+ frame-setup PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+ %rsp = frame-setup SUB64ri8 %rsp, 16, implicit-def dead %eflags
+ %ebx = COPY %edi
+ MOV32mr %rsp, 1, _, 12, _, %ebx
+
+ bb.1.check:
+ successors: %bb.2.loop, %bb.3.exit
+ liveins: %ebx
+
+ CMP32ri8 %ebx, 10, implicit-def %eflags
+ JG_1 %bb.3.exit, implicit killed %eflags
+ JMP_1 %bb.2.loop
+
+ bb.2.loop:
+ successors: %bb.1.check
+ liveins: %ebx
+
+ %edi = MOV32rm %rsp, 1, _, 12, _
+ CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+ %eax = DEC32r killed %eax, implicit-def dead %eflags
+ MOV32mr %rsp, 1, _, 12, _, killed %eax
+ JMP_1 %bb.1.check
+
+ bb.3.exit:
+ %eax = MOV32r0 implicit-def dead %eflags
+ %rsp = ADD64ri8 %rsp, 16, implicit-def dead %eflags
+ %rbx = POP64r implicit-def %rsp, implicit %rsp
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-named-register-in-functions-livein.mir b/test/CodeGen/MIR/X86/expected-named-register-in-functions-livein.mir
new file mode 100644
index 000000000000..ae9f776ad769
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-named-register-in-functions-livein.mir
@@ -0,0 +1,27 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32 %a) {
+ body:
+ ret i32 %a
+ }
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+liveins:
+ # CHECK: [[@LINE+1]]:13: expected a named register
+ - { reg: '%0' }
+body: |
+ bb.0.body:
+ liveins: %edi
+
+ %0 = COPY %edi
+ %eax = COPY %0
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-named-register-livein.mir b/test/CodeGen/MIR/X86/expected-named-register-livein.mir
index 1fbe881c8c70..41e6a4a6cc88 100644
--- a/test/CodeGen/MIR/X86/expected-named-register-livein.mir
+++ b/test/CodeGen/MIR/X86/expected-named-register-livein.mir
@@ -10,12 +10,11 @@
...
---
name: test
-body:
- - id: 0
- name: body
- # CHECK: [[@LINE+1]]:21: expected a named register
- liveins: [ '%0' ]
- instructions:
- - '%eax = COPY %edi'
- - 'RETQ %eax'
+body: |
+ bb.0.body:
+ ; CHECK: [[@LINE+1]]:14: expected a named register
+ liveins: %0
+
+ %eax = COPY %edi
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/expected-newline-at-end-of-list.mir b/test/CodeGen/MIR/X86/expected-newline-at-end-of-list.mir
new file mode 100644
index 000000000000..1f0439d126f4
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-newline-at-end-of-list.mir
@@ -0,0 +1,41 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @foo(i32 %a) {
+ entry:
+ %0 = icmp sle i32 %a, 10
+ br i1 %0, label %less, label %exit
+
+ less: ; preds = %entry
+ ret i32 0
+
+ exit: ; preds = %entry
+ ret i32 %a
+ }
+
+...
+---
+name: foo
+tracksRegLiveness: true
+liveins:
+ - { reg: '%edi' }
+body: |
+ bb.0.entry:
+ successors: %bb.1.less, %bb.2.exit
+ ; CHECK: [[@LINE+1]]:19: expected line break at the end of a list
+ liveins: %edi 44
+
+ CMP32ri8 %edi, 10, implicit-def %eflags
+ JG_1 %bb.2.exit, implicit killed %eflags
+
+ bb.1.less:
+ %eax = MOV32r0 implicit-def dead %eflags
+ RETQ killed %eax
+
+ bb.2.exit:
+ liveins: %edi
+
+ %eax = COPY killed %edi
+ RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-number-after-bb.mir b/test/CodeGen/MIR/X86/expected-number-after-bb.mir
index 5343a847fbb9..a239cf176f5f 100644
--- a/test/CodeGen/MIR/X86/expected-number-after-bb.mir
+++ b/test/CodeGen/MIR/X86/expected-number-after-bb.mir
@@ -18,20 +18,16 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- - '%eax = MOV32rm %rdi, 1, _, 0, _'
- - 'CMP32ri8 %eax, 10, implicit-def %eflags'
- # CHECK: [[@LINE+1]]:18: expected a number after '%bb.'
- - 'JG_1 %bb.nah, implicit %eflags'
- - id: 1
- name: yes
- instructions:
- - '%eax = MOV32r0 implicit-def %eflags'
- - id: 2
- name: nah
- instructions:
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ %eax = MOV32rm %rdi, 1, _, 0, _
+ CMP32ri8 %eax, 10, implicit-def %eflags
+ ; CHECK: [[@LINE+1]]:14: expected a number after '%bb.'
+ JG_1 %bb.nah, implicit %eflags
+
+ bb.1.true:
+ %eax = MOV32r0 implicit-def %eflags
+
+ bb.2.nah:
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/expected-offset-after-cfi-operand.mir b/test/CodeGen/MIR/X86/expected-offset-after-cfi-operand.mir
new file mode 100644
index 000000000000..aefeed9ce05e
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-offset-after-cfi-operand.mir
@@ -0,0 +1,27 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define void @test() {
+ entry:
+ %tmp = alloca [4168 x i8], align 4
+ ret void
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 4040
+stack:
+ - { id: 0, name: tmp, offset: -4176, size: 4168, alignment: 4 }
+body: |
+ bb.0.entry:
+ %rsp = SUB64ri32 %rsp, 4040, implicit-def dead %eflags
+ ; CHECK: [[@LINE+1]]:41: expected a cfi offset
+ CFI_INSTRUCTION .cfi_def_cfa_offset _
+ %rsp = ADD64ri32 %rsp, 4040, implicit-def dead %eflags
+ RETQ
+...
+
diff --git a/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir b/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir
new file mode 100644
index 000000000000..fca078c3497c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32* %a) {
+ entry:
+ %b = load i32, i32* %a
+ ret i32 %b
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: [[@LINE+1]]:60: expected a pointer IR value
+ %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 4 from %ir.b)
+ RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir b/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir
new file mode 100644
index 000000000000..31b4c5be1251
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define void @memory_alignment(<8 x float>* %vec) {
+ entry:
+ %v = load <8 x float>, <8 x float>* %vec
+ %v2 = insertelement <8 x float> %v, float 0.0, i32 4
+ store <8 x float> %v2, <8 x float>* %vec
+ ret void
+ }
+
+...
+---
+name: memory_alignment
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: [[@LINE+1]]:71: expected an integer literal after 'align'
+ %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec, align -32)
+ %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, align 32)
+ %xmm2 = FsFLD0SS
+ %xmm1 = MOVSSrr killed %xmm1, killed %xmm2
+ MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec, align 32)
+ MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16, align 32)
+ RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-register-after-cfi-operand.mir b/test/CodeGen/MIR/X86/expected-register-after-cfi-operand.mir
new file mode 100644
index 000000000000..3280fca6d551
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-register-after-cfi-operand.mir
@@ -0,0 +1,42 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ declare void @foo(i32)
+
+ define i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) {
+ entry:
+ %add = add nsw i32 %b, %a
+ %add1 = add nsw i32 %add, %c
+ %add2 = add nsw i32 %add1, %d
+ tail call void @foo(i32 %add2)
+ %add6 = add nsw i32 %add2, %add2
+ ret i32 %add6
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 8
+ adjustsStack: true
+ hasCalls: true
+fixedStack:
+ - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16 }
+body: |
+ bb.0.entry:
+ PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+ CFI_INSTRUCTION .cfi_def_cfa_offset 16
+ ; CHECK: [[@LINE+1]]:33: expected a cfi register
+ CFI_INSTRUCTION .cfi_offset %0, -16
+ %ebx = COPY %edi, implicit-def %rbx
+ %ebx = ADD32rr %ebx, killed %esi, implicit-def dead %eflags
+ %ebx = ADD32rr %ebx, killed %edx, implicit-def dead %eflags
+ %ebx = ADD32rr %ebx, killed %ecx, implicit-def dead %eflags
+ %edi = COPY %ebx
+ CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp
+ %eax = LEA64_32r killed %rbx, 1, %rbx, 0, _
+ %rbx = POP64r implicit-def %rsp, implicit %rsp
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-register-after-flags.mir b/test/CodeGen/MIR/X86/expected-register-after-flags.mir
index 111f5496a378..68f1060ad873 100644
--- a/test/CodeGen/MIR/X86/expected-register-after-flags.mir
+++ b/test/CodeGen/MIR/X86/expected-register-after-flags.mir
@@ -12,11 +12,9 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: [[@LINE+1]]:37: expected a register after register flags
- - '%eax = MOV32r0 implicit-def 2'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:33: expected a register after register flags
+ %eax = MOV32r0 implicit-def 2
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation.mir b/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation.mir
new file mode 100644
index 000000000000..71ff15bd9c52
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32* %a) {
+ entry:
+ %b = load i32, i32* %a
+ ret i32 %b
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: [[@LINE+1]]:53: expected the size integer literal after memory operation
+ %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load from %ir.a)
+ RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/expected-stack-object.mir b/test/CodeGen/MIR/X86/expected-stack-object.mir
new file mode 100644
index 000000000000..ff0c10d59e33
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-stack-object.mir
@@ -0,0 +1,67 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+
+--- |
+ @.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+ @__stack_chk_guard = external global i8*
+
+ define i32 @test() #0 {
+ entry:
+ %StackGuardSlot = alloca i8*
+ %StackGuard = load i8*, i8** @__stack_chk_guard
+ call void @llvm.stackprotector(i8* %StackGuard, i8** %StackGuardSlot)
+ %test = alloca i8*, align 8
+ %a = alloca i8, i64 5
+ store i8* %a, i8** %test, align 8
+ %b = load i8*, i8** %test, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %b)
+ call void @llvm.stackprotectorcheck(i8** @__stack_chk_guard)
+ ret i32 %call
+ }
+
+ declare i32 @printf(i8*, ...)
+
+ declare void @llvm.stackprotector(i8*, i8**) #1
+
+ declare void @llvm.stackprotectorcheck(i8**) #2
+
+ attributes #0 = { ssp "stack-protector-buffer-size"="5" }
+ attributes #1 = { nounwind }
+ attributes #2 = { nounwind argmemonly }
+...
+---
+name: test
+alignment: 4
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 40
+ maxAlignment: 8
+ adjustsStack: true
+ hasCalls: true
+# CHECK: [[@LINE+1]]:21: expected a stack object
+ stackProtector: '0'
+fixedStack:
+ - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16,
+ callee-saved-register: '%rbx' }
+stack:
+ - { id: 0, name: StackGuardSlot, offset: -24, size: 8, alignment: 8 }
+ - { id: 1, name: test, offset: -40, size: 8, alignment: 8 }
+ - { id: 2, name: a, offset: -29, size: 5, alignment: 1 }
+body: |
+ bb.0.entry:
+ liveins: %rbx, %rbx
+
+ frame-setup PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+ %rsp = frame-setup SUB64ri8 %rsp, 32, implicit-def dead %eflags
+ %rbx = LOAD_STACK_GUARD :: (invariant load 8 from %ir.__stack_chk_guard)
+ MOV64mr %rsp, 1, _, 24, _, %rbx
+ %rsi = LEA64r %rsp, 1, _, 19, _
+ MOV64mr %rsp, 1, _, 8, _, %rsi
+ %rdi = LEA64r %rip, 1, _, @.str, _
+ dead %eax = MOV32r0 implicit-def dead %eflags, implicit-def %al
+ CALL64pcrel32 @printf, csr_64, implicit %rsp, implicit %rdi, implicit %rsi, implicit %al, implicit-def %rsp, implicit-def %eax
+ CMP64rm killed %rbx, %rsp, 1, _, 24, _, implicit-def %eflags
+ %rsp = ADD64ri8 %rsp, 32, implicit-def dead %eflags
+ %rbx = POP64r implicit-def %rsp, implicit %rsp
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-subregister-after-colon.mir b/test/CodeGen/MIR/X86/expected-subregister-after-colon.mir
index c891a115a180..6283427c10b3 100644
--- a/test/CodeGen/MIR/X86/expected-subregister-after-colon.mir
+++ b/test/CodeGen/MIR/X86/expected-subregister-after-colon.mir
@@ -16,14 +16,12 @@ registers:
- { id: 0, class: gr32 }
- { id: 1, class: gr8 }
- { id: 2, class: gr8 }
-body:
- - name: entry
- id: 0
- instructions:
- - '%0 = COPY %edi'
- # CHECK: [[@LINE+1]]:25: expected a subregister index after ':'
- - '%1 = COPY %0 : 42'
- - '%2 = AND8ri %1, 1, implicit-def %eflags'
- - '%al = COPY %2'
- - 'RETQ %al'
+body: |
+ bb.0.entry:
+ %0 = COPY %edi
+ ; CHECK: [[@LINE+1]]:20: expected a subregister index after ':'
+ %1 = COPY %0 : 42
+ %2 = AND8ri %1, 1, implicit-def %eflags
+ %al = COPY %2
+ RETQ %al
...
diff --git a/test/CodeGen/MIR/X86/expected-target-flag-name.mir b/test/CodeGen/MIR/X86/expected-target-flag-name.mir
new file mode 100644
index 000000000000..3d094a11e9f3
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-target-flag-name.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ @G = external global i32
+
+ define i32 @inc() {
+ entry:
+ %a = load i32, i32* @G
+ %b = add i32 %a, 1
+ ret i32 %b
+ }
+
+...
+---
+name: inc
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:46: expected the name of the target flag
+ %rax = MOV64rm %rip, 1, _, target-flags( ) @G, _
+ %eax = MOV32rm killed %rax, 1, _, 0, _
+ %eax = INC32r killed %eax, implicit-def dead %eflags
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-tied-def-after-lparen.mir b/test/CodeGen/MIR/X86/expected-tied-def-after-lparen.mir
new file mode 100644
index 000000000000..e8d6afd5333e
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-tied-def-after-lparen.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+ define i64 @test(i64 %x) #0 {
+ entry:
+ %asm = tail call i64 asm sideeffect "$foo", "=r,0"(i64 %x) nounwind
+ ret i64 %asm
+ }
+
+ attributes #0 = { nounwind }
+...
+---
+name: test
+hasInlineAsm: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+
+ ; CHECK: [[@LINE+1]]:70: expected 'tied-def' after '('
+ INLINEASM $"$foo", 1, 2818058, def %rdi, 2147483657, killed %rdi(3)
+ %rax = COPY killed %rdi
+ RETQ killed %rax
+...
diff --git a/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir b/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir
new file mode 100644
index 000000000000..f99443f1726d
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32* %a) {
+ entry:
+ %b = load i32, i32* %a
+ ret i32 %b
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: [[@LINE+1]]:60: expected an IR value reference
+ %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 4 from a)
+ RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/expected-virtual-register-in-functions-livein.mir b/test/CodeGen/MIR/X86/expected-virtual-register-in-functions-livein.mir
new file mode 100644
index 000000000000..da0d1e166a1c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-virtual-register-in-functions-livein.mir
@@ -0,0 +1,27 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32 %a) {
+ body:
+ ret i32 %a
+ }
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+liveins:
+ # CHECK: [[@LINE+1]]:34: expected a virtual register
+ - { reg: '%edi', virtual-reg: '%edi' }
+body: |
+ bb.0.body:
+ liveins: %edi
+
+ %0 = COPY %edi
+ %eax = COPY %0
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/external-symbol-operands.mir b/test/CodeGen/MIR/X86/external-symbol-operands.mir
new file mode 100644
index 000000000000..7e85d946b75a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/external-symbol-operands.mir
@@ -0,0 +1,64 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the external symbol machine
+# operands correctly.
+
+--- |
+ @__stack_chk_guard = external global i8*
+
+ define i32 @test(i32 %n) #0 {
+ entry:
+ %StackGuardSlot = alloca i8*
+ %StackGuard = load i8*, i8** @__stack_chk_guard
+ call void @llvm.stackprotector(i8* %StackGuard, i8** %StackGuardSlot)
+ %a = alloca [128 x i32], align 16
+ %idxprom = sext i32 %n to i64
+ %arrayidx = getelementptr inbounds [128 x i32], [128 x i32]* %a, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ call void @llvm.stackprotectorcheck(i8** @__stack_chk_guard)
+ ret i32 %0
+ }
+
+ declare void @llvm.stackprotector(i8*, i8**) #1
+
+ declare void @llvm.stackprotectorcheck(i8**) #1
+
+ attributes #0 = { ssp "stack-protector-buffer-size"="8" }
+ attributes #1 = { nounwind }
+
+...
+---
+name: test
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ successors: %bb.1.entry, %bb.2.entry
+ liveins: %edi
+
+ %rsp = SUB64ri32 %rsp, 520, implicit-def %eflags
+ %rcx = LOAD_STACK_GUARD
+ MOV64mr %rsp, 1, _, 512, _, %rcx
+ %rax = MOVSX64rr32 %edi
+ %eax = MOV32rm %rsp, 4, %rax, 0, _
+ CMP64rm %rcx, %rsp, 1, _, 512, _, implicit-def %eflags
+ JNE_1 %bb.2.entry, implicit %eflags
+
+ bb.1.entry:
+ liveins: %eax
+
+ %rsp = ADD64ri32 %rsp, 520, implicit-def %eflags
+ RETQ %eax
+
+ bb.2.entry:
+ ; CHECK: CALL64pcrel32 $__stack_chk_fail,
+ ; CHECK-NEXT: CALL64pcrel32 $__stack_chk_fail.09-_,
+ ; CHECK-NEXT: CALL64pcrel32 $"__stack_chk_fail$",
+ ; CHECK-NEXT: CALL64pcrel32 $"$Quoted \09 External symbol \11 ",
+ ; CHECK-NEXT: CALL64pcrel32 $__stack_chk_fail + 2,
+ ; CHECK-NEXT: CALL64pcrel32 $" check stack - 20" - 20,
+ CALL64pcrel32 $__stack_chk_fail, csr_64, implicit %rsp, implicit-def %rsp
+ CALL64pcrel32 $__stack_chk_fail.09-_, csr_64, implicit %rsp, implicit-def %rsp
+ CALL64pcrel32 $__stack_chk_fail$, csr_64, implicit %rsp, implicit-def %rsp
+ CALL64pcrel32 $"$Quoted \09 External symbol \11 ", csr_64, implicit %rsp, implicit-def %rsp
+ CALL64pcrel32 $__stack_chk_fail + 2, csr_64, implicit %rsp, implicit-def %rsp
+ CALL64pcrel32 $" check stack - 20" - 20, csr_64, implicit %rsp, implicit-def %rsp
+...
diff --git a/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir b/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir
new file mode 100644
index 000000000000..75d0f8a39c1c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir
@@ -0,0 +1,39 @@
+# RUN: llc -march=x86 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses fixed stack memory operands
+# correctly.
+
+--- |
+
+ define i32 @test(i32 %a) #0 {
+ entry:
+ %b = alloca i32
+ store i32 %a, i32* %b
+ %c = load i32, i32* %b
+ ret i32 %c
+ }
+
+ attributes #0 = { "no-frame-pointer-elim"="false" }
+
+...
+---
+name: test
+alignment: 4
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 4
+ maxAlignment: 4
+fixedStack:
+ - { id: 0, offset: 0, size: 4, alignment: 16, isImmutable: true }
+stack:
+ - { id: 0, name: b, offset: -8, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ frame-setup PUSH32r undef %eax, implicit-def %esp, implicit %esp
+ CFI_INSTRUCTION .cfi_def_cfa_offset 8
+ ; CHECK: name: test
+ ; CHECK: %eax = MOV32rm %esp, 1, _, 8, _ :: (load 4 from %fixed-stack.0, align 16)
+ %eax = MOV32rm %esp, 1, _, 8, _ :: (load 4 from %fixed-stack.0, align 16)
+ MOV32mr %esp, 1, _, 0, _, %eax :: (store 4 into %ir.b)
+ %edx = POP32r implicit-def %esp, implicit %esp
+ RETL %eax
+...
diff --git a/test/CodeGen/MIR/X86/fixed-stack-object-redefinition-error.mir b/test/CodeGen/MIR/X86/fixed-stack-object-redefinition-error.mir
new file mode 100644
index 000000000000..c4c57a1d2443
--- /dev/null
+++ b/test/CodeGen/MIR/X86/fixed-stack-object-redefinition-error.mir
@@ -0,0 +1,28 @@
+# RUN: not llc -march=x86 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32 %a, i32 %b) #0 {
+ entry:
+ %c = add i32 %a, %b
+ ret i32 %c
+ }
+
+ attributes #0 = { "no-frame-pointer-elim"="false" }
+
+...
+---
+name: test
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 4
+fixedStack:
+ - { id: 0, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
+# CHECK: [[@LINE+1]]:11: redefinition of fixed stack object '%fixed-stack.0'
+ - { id: 0, offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+body: |
+ bb.0.entry:
+ %eax = MOV32rm %esp, 1, _, 4, _
+ %eax = ADD32rm killed %eax, %esp, 1, _, 8, _, implicit-def dead %eflags
+ RETL %eax
+...
diff --git a/test/CodeGen/MIR/X86/fixed-stack-objects.mir b/test/CodeGen/MIR/X86/fixed-stack-objects.mir
index dcbe6f73a6d0..70e5a7428359 100644
--- a/test/CodeGen/MIR/X86/fixed-stack-objects.mir
+++ b/test/CodeGen/MIR/X86/fixed-stack-objects.mir
@@ -25,11 +25,9 @@ fixedStack:
- { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false }
stack:
- { id: 0, offset: -8, size: 4, alignment: 4 }
-body:
- - id: 0
- name: entry
- instructions:
- - '%eax = MOV32rm %esp, 1, _, 8, _'
- - 'MOV32mr %esp, 1, _, 0, _, %eax'
- - 'RETL %eax'
+body: |
+ bb.0.entry:
+ %eax = MOV32rm %esp, 1, _, 8, _
+ MOV32mr %esp, 1, _, 0, _, %eax
+ RETL %eax
...
diff --git a/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir b/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir
new file mode 100644
index 000000000000..54fa8ad0b616
--- /dev/null
+++ b/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir
@@ -0,0 +1,73 @@
+# RUN: llc -march=x86-64 -enable-shrink-wrap=true -start-after shrink-wrap -stop-after shrink-wrap -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the save and restore points in
+# the machine frame info correctly.
+
+--- |
+
+ define i32 @foo(i32 %a, i32 %b) {
+ entry:
+ %tmp = alloca i32, align 4
+ %tmp2 = icmp slt i32 %a, %b
+ br i1 %tmp2, label %true, label %false
+
+ true:
+ store i32 %a, i32* %tmp, align 4
+ %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+ br label %false
+
+ false:
+ %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %entry ]
+ ret i32 %tmp.0
+ }
+
+ declare i32 @doSomething(i32, i32*)
+
+...
+---
+name: foo
+tracksRegLiveness: true
+liveins:
+ - { reg: '%edi' }
+ - { reg: '%esi' }
+# CHECK: frameInfo:
+# CHECK: savePoint: '%bb.2.true'
+# CHECK-NEXT: restorePoint: '%bb.2.true'
+# CHECK: stack
+frameInfo:
+ maxAlignment: 4
+ hasCalls: true
+ savePoint: '%bb.2.true'
+ restorePoint: '%bb.2.true'
+stack:
+ - { id: 0, name: tmp, offset: 0, size: 4, alignment: 4 }
+body: |
+ bb.0:
+ successors: %bb.2.true, %bb.1
+ liveins: %edi, %esi
+
+ %eax = COPY %edi
+ CMP32rr %eax, killed %esi, implicit-def %eflags
+ JL_1 %bb.2.true, implicit killed %eflags
+
+ bb.1:
+ successors: %bb.3.false
+ liveins: %eax
+
+ JMP_1 %bb.3.false
+
+ bb.2.true:
+ successors: %bb.3.false
+ liveins: %eax
+
+ MOV32mr %stack.0.tmp, 1, _, 0, _, killed %eax
+ ADJCALLSTACKDOWN64 0, 0, implicit-def %rsp, implicit-def dead %eflags, implicit %rsp
+ %rsi = LEA64r %stack.0.tmp, 1, _, 0, _
+ %edi = MOV32r0 implicit-def dead %eflags
+ CALL64pcrel32 @doSomething, csr_64, implicit %rsp, implicit %edi, implicit %rsi, implicit-def %rsp, implicit-def %eax
+ ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def dead %eflags, implicit %rsp
+
+ bb.3.false:
+ liveins: %eax
+
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/frame-info-stack-references.mir b/test/CodeGen/MIR/X86/frame-info-stack-references.mir
new file mode 100644
index 000000000000..c8fa3bbe226f
--- /dev/null
+++ b/test/CodeGen/MIR/X86/frame-info-stack-references.mir
@@ -0,0 +1,79 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the stack protector stack
+# object reference in the machine frame info correctly.
+
+--- |
+ @.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+ @__stack_chk_guard = external global i8*
+
+ define i32 @test() #0 {
+ entry:
+ %StackGuardSlot = alloca i8*
+ %StackGuard = load i8*, i8** @__stack_chk_guard
+ call void @llvm.stackprotector(i8* %StackGuard, i8** %StackGuardSlot)
+ %test = alloca i8*, align 8
+ %a = alloca i8, i64 5
+ store i8* %a, i8** %test, align 8
+ %b = load i8*, i8** %test, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %b)
+ call void @llvm.stackprotectorcheck(i8** @__stack_chk_guard)
+ ret i32 %call
+ }
+
+ declare i32 @printf(i8*, ...)
+
+ declare void @llvm.stackprotector(i8*, i8**) #1
+
+ declare void @llvm.stackprotectorcheck(i8**) #2
+
+ attributes #0 = { ssp "stack-protector-buffer-size"="5" }
+ attributes #1 = { nounwind }
+ attributes #2 = { nounwind argmemonly }
+...
+---
+name: test
+alignment: 4
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 40
+ maxAlignment: 8
+ adjustsStack: true
+ hasCalls: true
+# CHECK-LABEL: name: test
+# CHECK: frameInfo
+# CHECK: stackProtector: '%stack.0.StackGuardSlot'
+ stackProtector: '%stack.0.StackGuardSlot'
+fixedStack:
+ - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16,
+ callee-saved-register: '%rbx' }
+stack:
+ - { id: 0, name: StackGuardSlot, offset: -24, size: 8, alignment: 8 }
+ - { id: 1, name: test, offset: -40, size: 8, alignment: 8 }
+ - { id: 2, name: a, offset: -29, size: 5, alignment: 1 }
+body: |
+ bb.0.entry:
+ successors: %bb.1.entry, %bb.2.entry
+ liveins: %rbx, %rbx
+
+ frame-setup PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+ %rsp = frame-setup SUB64ri8 %rsp, 32, implicit-def dead %eflags
+ %rbx = LOAD_STACK_GUARD :: (invariant load 8 from @__stack_chk_guard)
+ MOV64mr %rsp, 1, _, 24, _, %rbx
+ %rsi = LEA64r %rsp, 1, _, 19, _
+ MOV64mr %rsp, 1, _, 8, _, %rsi
+ %rdi = LEA64r %rip, 1, _, @.str, _
+ dead %eax = MOV32r0 implicit-def dead %eflags, implicit-def %al
+ CALL64pcrel32 @printf, csr_64, implicit %rsp, implicit %rdi, implicit %rsi, implicit %al, implicit-def %rsp, implicit-def %eax
+ CMP64rm killed %rbx, %rsp, 1, _, 24, _, implicit-def %eflags
+ JNE_1 %bb.2.entry, implicit %eflags
+
+ bb.1.entry:
+ liveins: %eax
+
+ %rsp = ADD64ri8 %rsp, 32, implicit-def dead %eflags
+ %rbx = POP64r implicit-def %rsp, implicit %rsp
+ RETQ %eax
+
+ bb.2.entry:
+ CALL64pcrel32 $__stack_chk_fail, csr_64, implicit %rsp, implicit-def %rsp
+...
diff --git a/test/CodeGen/MIR/X86/frame-setup-instruction-flag.mir b/test/CodeGen/MIR/X86/frame-setup-instruction-flag.mir
new file mode 100644
index 000000000000..87c1fc68046e
--- /dev/null
+++ b/test/CodeGen/MIR/X86/frame-setup-instruction-flag.mir
@@ -0,0 +1,35 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the frame setup instruction flag.
+
+--- |
+
+ define i32 @compute(i32 %a) {
+ body:
+ %c = mul i32 %a, 11
+ ret i32 %c
+ }
+
+ define i32 @foo(i32 %a) {
+ entry:
+ %b = call i32 @compute(i32 %a)
+ ret i32 %b
+ }
+
+...
+---
+name: compute
+body: |
+ bb.0.body:
+ %eax = IMUL32rri8 %edi, 11, implicit-def %eflags
+ RETQ %eax
+...
+---
+name: foo
+body: |
+ bb.0.entry:
+ ; CHECK: frame-setup PUSH64r %rax
+ frame-setup PUSH64r %rax, implicit-def %rsp, implicit %rsp
+ CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+ %rdx = POP64r implicit-def %rsp, implicit %rsp
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/function-liveins.mir b/test/CodeGen/MIR/X86/function-liveins.mir
new file mode 100644
index 000000000000..95f8786b47a8
--- /dev/null
+++ b/test/CodeGen/MIR/X86/function-liveins.mir
@@ -0,0 +1,37 @@
+# RUN: llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses machine function's liveins
+# correctly.
+
+--- |
+
+ define i32 @test(i32 %a, i32 %b) {
+ body:
+ %c = add i32 %a, %b
+ ret i32 %c
+ }
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+ - { id: 1, class: gr32 }
+ - { id: 2, class: gr32 }
+# CHECK: liveins:
+# CHECK-NEXT: - { reg: '%edi', virtual-reg: '%0' }
+# CHECK-NEXT: - { reg: '%esi', virtual-reg: '%1' }
+liveins:
+ - { reg: '%edi', virtual-reg: '%0' }
+ - { reg: '%esi', virtual-reg: '%1' }
+body: |
+ bb.0.body:
+ liveins: %edi, %esi
+
+ %1 = COPY %esi
+ %0 = COPY %edi
+ %2 = ADD32rr %0, %1, implicit-def dead %eflags
+ %eax = COPY %2
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/global-value-operands.mir b/test/CodeGen/MIR/X86/global-value-operands.mir
index 3ea729b00554..394aa397aef4 100644
--- a/test/CodeGen/MIR/X86/global-value-operands.mir
+++ b/test/CodeGen/MIR/X86/global-value-operands.mir
@@ -20,30 +20,121 @@
ret i32 %b
}
+ @.$0 = external global i32
+ @-_- = external global i32
+ @_-_a = external global i32
+ @$.-B = external global i32
+
+ define i32 @test() {
+ entry:
+ %a = load i32, i32* @.$0
+ store i32 %a, i32* @-_-
+ %b = load i32, i32* @_-_a
+ store i32 %b, i32* @$.-B
+ ret i32 %b
+ }
+
+ @"\01Hello@$%09 \\ World," = external global i32
+
+ define i32 @test2() {
+ entry:
+ %a = load i32, i32* @"\01Hello@$%09 \\ World,"
+ ret i32 %a
+ }
+
+ define i32 @test3() {
+ entry:
+ %a = load i32, i32* @.$0
+ store i32 %a, i32* @-_-
+ %b = load i32, i32* @_-_a
+ store i32 %b, i32* @$.-B
+ ret i32 %b
+ }
+
+ define i32 @tf() {
+ entry:
+ %a = load i32, i32* @G
+ %b = add i32 %a, 1
+ ret i32 %b
+ }
+
...
---
# CHECK: name: inc
name: inc
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: - '%rax = MOV64rm %rip, 1, _, @G, _'
- - '%rax = MOV64rm %rip, 1, _, @G, _'
- - '%eax = MOV32rm %rax, 1, _, 0, _'
- - '%eax = INC32r %eax, implicit-def %eflags'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ ; CHECK: %rax = MOV64rm %rip, 1, _, @G, _
+ %rax = MOV64rm %rip, 1, _, @G, _
+ %eax = MOV32rm %rax, 1, _, 0, _
+ %eax = INC32r %eax, implicit-def %eflags
+ RETQ %eax
...
---
# CHECK: name: inc2
name: inc2
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: - '%rax = MOV64rm %rip, 1, _, @0, _'
- - '%rax = MOV64rm %rip, 1, _, @0, _'
- - '%eax = MOV32rm %rax, 1, _, 0, _'
- - '%eax = INC32r %eax, implicit-def %eflags'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ ; CHECK: %rax = MOV64rm %rip, 1, _, @0, _
+ %rax = MOV64rm %rip, 1, _, @0, _
+ %eax = MOV32rm %rax, 1, _, 0, _
+ %eax = INC32r %eax, implicit-def %eflags
+ RETQ %eax
+...
+---
+name: test
+body: |
+ bb.0.entry:
+ ; CHECK: , @".$0",
+ ; CHECK: , @-_-,
+ ; CHECK: , @_-_a,
+ ; CHECK: , @"$.-B",
+ %rax = MOV64rm %rip, 1, _, @.$0, _
+ %eax = MOV32rm killed %rax, 1, _, 0, _
+ %rcx = MOV64rm %rip, 1, _, @-_-, _
+ MOV32mr killed %rcx, 1, _, 0, _, killed %eax
+ %rax = MOV64rm %rip, 1, _, @_-_a, _
+ %eax = MOV32rm killed %rax, 1, _, 0, _
+ %rcx = MOV64rm %rip, 1, _, @$.-B, _
+ MOV32mr killed %rcx, 1, _, 0, _, %eax
+ RETQ %eax
+...
+---
+name: test2
+body: |
+ bb.0.entry:
+ ; CHECK: , @"\01Hello@$%09 \5C World,",
+ %rax = MOV64rm %rip, 1, _, @"\01Hello@$%09 \\ World,", _
+ %eax = MOV32rm killed %rax, 1, _, 0, _
+ RETQ %eax
+...
+---
+# CHECK: name: test3
+name: test3
+body: |
+ bb.0.entry:
+ ; CHECK: , @".$0",
+ ; CHECK: , @-_-,
+ ; CHECK: , @_-_a + 4,
+ ; CHECK: , @"$.-B" - 8,
+ %rax = MOV64rm %rip, 1, _, @.$0 + 0, _
+ %eax = MOV32rm killed %rax, 1, _, 0, _
+ %rcx = MOV64rm %rip, 1, _, @-_- - 0, _
+ MOV32mr killed %rcx, 1, _, 0, _, killed %eax
+ %rax = MOV64rm %rip, 1, _, @_-_a + 4, _
+ %eax = MOV32rm killed %rax, 1, _, 0, _
+ %rcx = MOV64rm %rip, 1, _, @$.-B - 8, _
+ MOV32mr killed %rcx, 1, _, 0, _, %eax
+ RETQ %eax
+...
+---
+# CHECK: name: tf
+name: tf
+body: |
+ bb.0.entry:
+ ; CHECK: %rax = MOV64rm %rip, 1, _, target-flags(x86-gotpcrel) @G, _
+ %rax = MOV64rm %rip, 1, _, target-flags(x86-gotpcrel) @G, _
+ %eax = MOV32rm %rax, 1, _, 0, _
+ %eax = INC32r %eax, implicit-def %eflags
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/immediate-operands.mir b/test/CodeGen/MIR/X86/immediate-operands.mir
index 5d4956f539dd..34bd0fa14904 100644
--- a/test/CodeGen/MIR/X86/immediate-operands.mir
+++ b/test/CodeGen/MIR/X86/immediate-operands.mir
@@ -17,24 +17,20 @@
---
# CHECK: name: foo
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: - '%eax = MOV32ri 42'
- # CHECK-NEXT: - 'RETQ %eax'
- - '%eax = MOV32ri 42'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ ; CHECK: %eax = MOV32ri 42
+ ; CHECK-NEXT: RETQ %eax
+ %eax = MOV32ri 42
+ RETQ %eax
...
---
# CHECK: name: bar
name: bar
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: - '%eax = MOV32ri -11'
- # CHECK-NEXT: - 'RETQ %eax'
- - '%eax = MOV32ri -11'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ ; CHECK: %eax = MOV32ri -11
+ ; CHECK-NEXT: RETQ %eax
+ %eax = MOV32ri -11
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/implicit-register-flag.mir b/test/CodeGen/MIR/X86/implicit-register-flag.mir
index 9c6882d27bdc..b0a15ed93a8f 100644
--- a/test/CodeGen/MIR/X86/implicit-register-flag.mir
+++ b/test/CodeGen/MIR/X86/implicit-register-flag.mir
@@ -16,26 +16,53 @@
ret i32 %a
}
+ define i1 @implicit_subregister1() {
+ entry:
+ ret i1 false
+ }
+
+ define i16 @implicit_subregister2() {
+ entry:
+ ret i16 0
+ }
+
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: - 'CMP32ri8 %edi, 10, implicit-def %eflags'
- # CHECK-NEXT: - 'JG_1 %bb.2.exit, implicit %eflags'
- - 'CMP32ri8 %edi, 10, implicit-def %eflags'
- - 'JG_1 %bb.2.exit, implicit %eflags'
- - id: 1
- name: less
- instructions:
- # CHECK: - '%eax = MOV32r0 implicit-def %eflags'
- - '%eax = MOV32r0 implicit-def %eflags'
- - 'RETQ %eax'
- - id: 2
- name: exit
- instructions:
- - '%eax = COPY %edi'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ successors: %bb.1.less, %bb.2.exit
+ ; CHECK: CMP32ri8 %edi, 10, implicit-def %eflags
+ ; CHECK-NEXT: JG_1 %bb.2.exit, implicit %eflags
+ CMP32ri8 %edi, 10, implicit-def %eflags
+ JG_1 %bb.2.exit, implicit %eflags
+
+ bb.1.less:
+ ; CHECK: %eax = MOV32r0 implicit-def %eflags
+ %eax = MOV32r0 implicit-def %eflags
+ RETQ %eax
+
+ bb.2.exit:
+ %eax = COPY %edi
+ RETQ %eax
+...
+---
+name: implicit_subregister1
+body: |
+ bb.0.entry:
+ ; Verify that the implicit register verifier won't report an error on implicit
+ ; subregisters.
+ ; CHECK-LABEL: name: implicit_subregister1
+ ; CHECK: dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al
+ dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al
+ RETQ killed %al
+...
+---
+name: implicit_subregister2
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: implicit_subregister2
+ ; CHECK: dead %r15 = XOR64rr undef %r15, undef %r15, implicit-def dead %eflags, implicit-def %r15w
+ dead %r15 = XOR64rr undef %r15, undef %r15, implicit-def dead %eflags, implicit-def %r15w
+ RETQ killed %r15w
...
diff --git a/test/CodeGen/MIR/X86/inline-asm-registers.mir b/test/CodeGen/MIR/X86/inline-asm-registers.mir
new file mode 100644
index 000000000000..3fd565891091
--- /dev/null
+++ b/test/CodeGen/MIR/X86/inline-asm-registers.mir
@@ -0,0 +1,54 @@
+# RUN: llc -march=x86-64 -start-after block-placement -stop-after block-placement -o /dev/null %s | FileCheck %s
+
+--- |
+ define i64 @test(i64 %x, i64 %y) #0 {
+ entry:
+ %x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) #0
+ %x1 = extractvalue { i64, i64 } %x0, 0
+ ret i64 %x1
+ }
+
+ define i64 @test2(i64 %x, i64 %y) #0 {
+ entry:
+ %x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) #0
+ %x1 = extractvalue { i64, i64 } %x0, 0
+ ret i64 %x1
+ }
+
+ attributes #0 = { nounwind }
+...
+---
+name: test
+hasInlineAsm: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+ - { reg: '%rsi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi, %rsi
+
+ ; CHECK-LABEL: name: test
+ ; CHECK: INLINEASM $foo, 0, 2818058, def %rsi, 2818058, def dead %rdi,
+ INLINEASM $foo, 0, 2818058, def %rsi, 2818058, def dead %rdi, 2147549193, killed %rdi, 2147483657, killed %rsi, 12, implicit-def dead early-clobber %eflags
+ %rax = MOV64rr killed %rsi
+ RETQ killed %rax
+...
+---
+name: test2
+hasInlineAsm: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+ - { reg: '%rsi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi, %rsi
+
+ ; Verify that the register ties are preserved.
+ ; CHECK-LABEL: name: test2
+ ; CHECK: INLINEASM $foo, 0, 2818058, def %rsi, 2818058, def dead %rdi, 2147549193, killed %rdi(tied-def 5), 2147483657, killed %rsi(tied-def 3), 12, implicit-def dead early-clobber %eflags
+ INLINEASM $foo, 0, 2818058, def %rsi, 2818058, def dead %rdi, 2147549193, killed %rdi(tied-def 5), 2147483657, killed %rsi(tied-def 3), 12, implicit-def dead early-clobber %eflags
+ %rax = MOV64rr killed %rsi
+ RETQ killed %rax
+...
diff --git a/test/CodeGen/MIR/X86/instructions-debug-location.mir b/test/CodeGen/MIR/X86/instructions-debug-location.mir
new file mode 100644
index 000000000000..ea2cdbf7cb2f
--- /dev/null
+++ b/test/CodeGen/MIR/X86/instructions-debug-location.mir
@@ -0,0 +1,98 @@
+# RUN: llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the machine instruction's
+# debug location metadata correctly.
+
+--- |
+
+ define i32 @test(i32 %x) #0 !dbg !4 {
+ entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+ %0 = load i32, i32* %x.addr, align 4, !dbg !15
+ ret i32 %0, !dbg !15
+ }
+
+ define i32 @test_typed_immediates(i32 %x) #0 {
+ entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+ %0 = load i32, i32* %x.addr, align 4, !dbg !15
+ ret i32 %0, !dbg !15
+ }
+
+ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+ attributes #0 = { nounwind "no-frame-pointer-elim"="false" }
+ attributes #1 = { nounwind readnone }
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!9, !10}
+ !llvm.ident = !{!11}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+ !1 = !DIFile(filename: "test.ll", directory: "")
+ !2 = !{}
+ !3 = !{!4}
+ !4 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 4, type: !6, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+ !5 = !DIFile(filename: "test.c", directory: "")
+ !6 = !DISubroutineType(types: !7)
+ !7 = !{!8, !8}
+ !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+ !9 = !{i32 2, !"Dwarf Version", i32 4}
+ !10 = !{i32 2, !"Debug Info Version", i32 3}
+ !11 = !{!"clang version 3.7.0"}
+ !12 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 4, type: !8)
+ !13 = !DIExpression()
+ !14 = !DILocation(line: 4, scope: !4)
+ !15 = !DILocation(line: 8, scope: !4)
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, name: x.addr, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ liveins: %edi
+ ; CHECK: DBG_VALUE debug-use _, 0, !12, !13, debug-location !14
+ ; CHECK: %eax = COPY %0, debug-location !15
+ ; CHECK: RETQ %eax, debug-location !15
+ %0 = COPY %edi
+ DBG_VALUE debug-use _, 0, !12, !13, debug-location !14
+ MOV32mr %stack.0.x.addr, 1, _, 0, _, %0
+ %eax = COPY %0, debug-location !15
+ RETQ %eax, debug-location !15
+...
+---
+name: test_typed_immediates
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, name: x.addr, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ liveins: %edi
+
+ %0 = COPY %edi
+ ; CHECK: DBG_VALUE _, i32 0, !12, !13
+ ; CHECK-NEXT: DBG_VALUE _, i64 -22, !12, !13
+ ; CHECK-NEXT: DBG_VALUE _, i128 123492148938512984928424384934328985928, !12, !13
+ DBG_VALUE _, i32 0, !12, !13
+ DBG_VALUE _, i64 -22, !12, !13
+ DBG_VALUE _, i128 123492148938512984928424384934328985928, !12, !13
+ MOV32mr %stack.0.x.addr, 1, _, 0, _, %0
+ %eax = COPY %0
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/invalid-constant-pool-item.mir b/test/CodeGen/MIR/X86/invalid-constant-pool-item.mir
new file mode 100644
index 000000000000..afd6c78546ce
--- /dev/null
+++ b/test/CodeGen/MIR/X86/invalid-constant-pool-item.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that the MIR parser reports an error when parsing an invalid
+# constant pool item operand.
+
+--- |
+
+ define double @test(double %a, float %b) {
+ entry:
+ %c = fadd double %a, 3.250000e+00
+ ret double %c
+ }
+
+...
+---
+name: test
+constants:
+ - id: 0
+ value: 'double 3.250000e+00'
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:47: use of undefined constant '%const.10'
+ %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.10, _
+ RETQ %xmm0
+...
+
diff --git a/test/CodeGen/MIR/X86/invalid-metadata-node-type.mir b/test/CodeGen/MIR/X86/invalid-metadata-node-type.mir
new file mode 100644
index 000000000000..a6c2e509da0c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/invalid-metadata-node-type.mir
@@ -0,0 +1,53 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+ declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
+
+ define void @foo() #1 {
+ entry:
+ %x.i = alloca i8, align 1
+ %y.i = alloca [256 x i8], align 16
+ %0 = bitcast [256 x i8]* %y.i to i8*
+ br label %for.body
+
+ for.body:
+ %1 = bitcast [256 x i8]* %y.i to i8*
+ call void @llvm.dbg.declare(metadata i8* %0, metadata !4, metadata !7) #3, !dbg !8
+ br label %for.body
+ }
+
+ attributes #0 = { nounwind readnone }
+ attributes #1 = { nounwind ssp uwtable }
+ attributes #3 = { nounwind }
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!3}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C89, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: 0, enums: !2, retainedTypes: !2)
+ !1 = !DIFile(filename: "t.c", directory: "")
+ !2 = !{}
+ !3 = !{i32 1, !"Debug Info Version", i32 3}
+ !4 = !DILocalVariable(name: "x", scope: !5, file: !1, line: 16, type: !6)
+ !5 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false)
+ !6 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+ !7 = !DIExpression()
+ !8 = !DILocation(line: 0, scope: !5)
+...
+---
+name: foo
+isSSA: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 16
+stack:
+# CHECK: [[@LINE+1]]:75: expected a reference to a 'DILocalVariable' metadata node
+ - { id: 0, name: y.i, offset: 0, size: 256, alignment: 16, di-variable: '!8',
+ di-expression: '!7', di-location: '!8' }
+body: |
+ bb.0.entry:
+ successors: %bb.1.for.body
+ bb.1.for.body:
+ successors: %bb.1.for.body
+
+ DBG_VALUE %stack.0.y.i, 0, !4, !7, debug-location !8
+ JMP_1 %bb.1.for.body
+...
diff --git a/test/CodeGen/MIR/X86/invalid-target-flag-name.mir b/test/CodeGen/MIR/X86/invalid-target-flag-name.mir
new file mode 100644
index 000000000000..313c5bdafed8
--- /dev/null
+++ b/test/CodeGen/MIR/X86/invalid-target-flag-name.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ @G = external global i32
+
+ define i32 @inc() {
+ entry:
+ %a = load i32, i32* @G
+ %b = add i32 %a, 1
+ ret i32 %b
+ }
+
+...
+---
+name: inc
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:45: use of undefined target flag 'x86-test'
+ %rax = MOV64rm %rip, 1, _, target-flags(x86-test) @G, _
+ %eax = MOV32rm killed %rax, 1, _, 0, _
+ %eax = INC32r killed %eax, implicit-def dead %eflags
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/invalid-tied-def-index-error.mir b/test/CodeGen/MIR/X86/invalid-tied-def-index-error.mir
new file mode 100644
index 000000000000..00436adca484
--- /dev/null
+++ b/test/CodeGen/MIR/X86/invalid-tied-def-index-error.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+ define i64 @test(i64 %x) #0 {
+ entry:
+ %asm = tail call i64 asm sideeffect "$foo", "=r,0"(i64 %x) nounwind
+ ret i64 %asm
+ }
+
+ attributes #0 = { nounwind }
+...
+---
+name: test
+hasInlineAsm: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+
+ ; CHECK: [[@LINE+1]]:58: use of invalid tied-def operand index '300'; instruction has only 6 operands
+ INLINEASM $"$foo", 1, 2818058, def %rdi, 2147483657, killed %rdi(tied-def 300)
+ %rax = COPY killed %rdi
+ RETQ killed %rax
+...
diff --git a/test/CodeGen/MIR/X86/jump-table-info.mir b/test/CodeGen/MIR/X86/jump-table-info.mir
new file mode 100644
index 000000000000..a4e6f6a1728c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/jump-table-info.mir
@@ -0,0 +1,150 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the jump table info and jump
+# table operands correctly.
+
+--- |
+
+ define i32 @test_jumptable(i32 %in) {
+ entry:
+ switch i32 %in, label %def [
+ i32 0, label %lbl1
+ i32 1, label %lbl2
+ i32 2, label %lbl3
+ i32 3, label %lbl4
+ ]
+
+ def:
+ ret i32 0
+
+ lbl1:
+ ret i32 1
+
+ lbl2:
+ ret i32 2
+
+ lbl3:
+ ret i32 4
+
+ lbl4:
+ ret i32 8
+ }
+
+ define i32 @test_jumptable2(i32 %in) {
+ entry:
+ switch i32 %in, label %def [
+ i32 0, label %lbl1
+ i32 1, label %lbl2
+ i32 2, label %lbl3
+ i32 3, label %lbl4
+ ]
+
+ def:
+ ret i32 0
+
+ lbl1:
+ ret i32 1
+
+ lbl2:
+ ret i32 2
+
+ lbl3:
+ ret i32 4
+
+ lbl4:
+ ret i32 8
+ }
+
+...
+---
+name: test_jumptable
+# CHECK: jumpTable:
+# CHECK-NEXT: kind: label-difference32
+# CHECK-NEXT: entries:
+# CHECK-NEXT: - id: 0
+# CHECK-NEXT: blocks: [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+# CHECK_NEXT: body:
+jumpTable:
+ kind: label-difference32
+ entries:
+ - id: 0
+ blocks: [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+body: |
+ bb.0.entry:
+ successors: %bb.2.def, %bb.1.entry
+
+ %eax = MOV32rr %edi, implicit-def %rax
+ CMP32ri8 %edi, 3, implicit-def %eflags
+ JA_1 %bb.2.def, implicit %eflags
+
+ bb.1.entry:
+ successors: %bb.3.lbl1, %bb.4.lbl2, %bb.5.lbl3, %bb.6.lbl4
+ ; CHECK: %rcx = LEA64r %rip, 1, _, %jump-table.0, _
+ %rcx = LEA64r %rip, 1, _, %jump-table.0, _
+ %rax = MOVSX64rm32 %rcx, 4, %rax, 0, _
+ %rax = ADD64rr %rax, %rcx, implicit-def %eflags
+ JMP64r %rax
+
+ bb.2.def:
+ %eax = MOV32r0 implicit-def %eflags
+ RETQ %eax
+
+ bb.3.lbl1:
+ %eax = MOV32ri 1
+ RETQ %eax
+
+ bb.4.lbl2:
+ %eax = MOV32ri 2
+ RETQ %eax
+
+ bb.5.lbl3:
+ %eax = MOV32ri 4
+ RETQ %eax
+
+ bb.6.lbl4:
+ %eax = MOV32ri 8
+ RETQ %eax
+...
+---
+name: test_jumptable2
+jumpTable:
+ kind: label-difference32
+ entries:
+ - id: 1
+ blocks: [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+body: |
+ bb.0.entry:
+ successors: %bb.2.def, %bb.1.entry
+
+ %eax = MOV32rr %edi, implicit-def %rax
+ CMP32ri8 %edi, 3, implicit-def %eflags
+ JA_1 %bb.2.def, implicit %eflags
+
+ bb.1.entry:
+ successors: %bb.3.lbl1, %bb.4.lbl2, %bb.5.lbl3, %bb.6.lbl4
+ ; Verify that the printer will use an id of 0 for this jump table:
+ ; CHECK: %rcx = LEA64r %rip, 1, _, %jump-table.0, _
+ %rcx = LEA64r %rip, 1, _, %jump-table.1, _
+ %rax = MOVSX64rm32 %rcx, 4, %rax, 0, _
+ %rax = ADD64rr %rax, %rcx, implicit-def %eflags
+ JMP64r %rax
+
+ bb.2.def:
+ %eax = MOV32r0 implicit-def %eflags
+ RETQ %eax
+
+ bb.3.lbl1:
+ %eax = MOV32ri 1
+ RETQ %eax
+
+ bb.4.lbl2:
+ %eax = MOV32ri 2
+ RETQ %eax
+
+ bb.5.lbl3:
+ %eax = MOV32ri 4
+ RETQ %eax
+
+ bb.6.lbl4:
+ %eax = MOV32ri 8
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/jump-table-redefinition-error.mir b/test/CodeGen/MIR/X86/jump-table-redefinition-error.mir
new file mode 100644
index 000000000000..d4ab11f40787
--- /dev/null
+++ b/test/CodeGen/MIR/X86/jump-table-redefinition-error.mir
@@ -0,0 +1,76 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test_jumptable(i32 %in) {
+ entry:
+ switch i32 %in, label %def [
+ i32 0, label %lbl1
+ i32 1, label %lbl2
+ i32 2, label %lbl3
+ i32 3, label %lbl4
+ ]
+
+ def:
+ ret i32 0
+
+ lbl1:
+ ret i32 1
+
+ lbl2:
+ ret i32 2
+
+ lbl3:
+ ret i32 4
+
+ lbl4:
+ ret i32 8
+ }
+
+...
+---
+name: test_jumptable
+jumpTable:
+ kind: label-difference32
+ entries:
+ - id: 0
+ blocks: [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+# CHECK: [[@LINE+1]]:18: redefinition of jump table entry '%jump-table.0'
+ - id: 0
+ blocks: [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+body: |
+ bb.0.entry:
+ successors: %bb.2.def, %bb.1.entry
+
+ %eax = MOV32rr %edi, implicit-def %rax
+ CMP32ri8 %edi, 3, implicit-def %eflags
+ JA_1 %bb.2.def, implicit %eflags
+
+ bb.1.entry:
+ successors: %bb.3.lbl1, %bb.4.lbl2, %bb.5.lbl3, %bb.6.lbl4
+
+ %rcx = LEA64r %rip, 1, _, %jump-table.0, _
+ %rax = MOVSX64rm32 %rcx, 4, %rax, 0, _
+ %rax = ADD64rr %rax, %rcx, implicit-def %eflags
+ JMP64r %rax
+
+ bb.2.def:
+ %eax = MOV32r0 implicit-def %eflags
+ RETQ %eax
+
+ bb.3.lbl1:
+ %eax = MOV32ri 1
+ RETQ %eax
+
+ bb.4.lbl2:
+ %eax = MOV32ri 2
+ RETQ %eax
+
+ bb.5.lbl3:
+ %eax = MOV32ri 4
+ RETQ %eax
+
+ bb.6.lbl4:
+ %eax = MOV32ri 8
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/killed-register-flag.mir b/test/CodeGen/MIR/X86/killed-register-flag.mir
index d654a9d2fa56..9e8f3ba3b368 100644
--- a/test/CodeGen/MIR/X86/killed-register-flag.mir
+++ b/test/CodeGen/MIR/X86/killed-register-flag.mir
@@ -19,24 +19,22 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- - 'CMP32ri8 %edi, 10, implicit-def %eflags'
- - 'JG_1 %bb.2.exit, implicit %eflags'
- - id: 1
- name: less
- instructions:
- # CHECK: - '%eax = MOV32r0
- # CHECK-NEXT: - 'RETQ killed %eax
- - '%eax = MOV32r0 implicit-def %eflags'
- - 'RETQ killed %eax'
- - id: 2
- name: exit
- instructions:
- # CHECK: - '%eax = COPY killed %edi
- # CHECK-NEXT: - 'RETQ killed %eax
- - '%eax = COPY killed %edi'
- - 'RETQ killed %eax'
+body: |
+ bb.0.entry:
+ successors: %bb.1.less, %bb.2.exit
+
+ CMP32ri8 %edi, 10, implicit-def %eflags
+ JG_1 %bb.2.exit, implicit %eflags
+
+ bb.1.less:
+ ; CHECK: %eax = MOV32r0
+ ; CHECK-NEXT: RETQ killed %eax
+ %eax = MOV32r0 implicit-def %eflags
+ RETQ killed %eax
+
+ bb.2.exit:
+ ; CHECK: %eax = COPY killed %edi
+ ; CHECK-NEXT: RETQ killed %eax
+ %eax = COPY killed %edi
+ RETQ killed %eax
...
diff --git a/test/CodeGen/MIR/X86/large-cfi-offset-number-error.mir b/test/CodeGen/MIR/X86/large-cfi-offset-number-error.mir
new file mode 100644
index 000000000000..93ce30abec7c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/large-cfi-offset-number-error.mir
@@ -0,0 +1,27 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define void @test() {
+ entry:
+ %tmp = alloca [4168 x i8], align 4
+ ret void
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 4040
+stack:
+ - { id: 0, name: tmp, offset: -4176, size: 4168, alignment: 4 }
+body: |
+ bb.0.entry:
+ %rsp = SUB64ri32 %rsp, 4040, implicit-def dead %eflags
+ ; CHECK: [[@LINE+1]]:41: expected a 32 bit integer (the cfi offset is too large)
+ CFI_INSTRUCTION .cfi_def_cfa_offset 123456789123456
+ %rsp = ADD64ri32 %rsp, 4040, implicit-def dead %eflags
+ RETQ
+...
+
diff --git a/test/CodeGen/MIR/X86/large-immediate-operand-error.mir b/test/CodeGen/MIR/X86/large-immediate-operand-error.mir
new file mode 100644
index 000000000000..f815c63e18e9
--- /dev/null
+++ b/test/CodeGen/MIR/X86/large-immediate-operand-error.mir
@@ -0,0 +1,18 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @foo() {
+ entry:
+ ret i32 42
+ }
+
+...
+---
+name: foo
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:20: integer literal is too large to be an immediate operand
+ %eax = MOV32ri 12346127502983478823754212949184914
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/large-index-number-error.mir b/test/CodeGen/MIR/X86/large-index-number-error.mir
index fdb25c907f52..272cd685b381 100644
--- a/test/CodeGen/MIR/X86/large-index-number-error.mir
+++ b/test/CodeGen/MIR/X86/large-index-number-error.mir
@@ -18,18 +18,16 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- - '%eax = MOV32rm %rdi, 1, _, 0, _'
- - 'CMP32ri8 %eax, 10, implicit-def %eflags'
- # CHECK: [[@LINE+1]]:14: expected 32-bit integer (too large)
- - 'JG_1 %bb.123456789123456, implicit %eflags'
- - id: 1
- instructions:
- - '%eax = MOV32r0 implicit-def %eflags'
- - id: 2
- instructions:
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ %eax = MOV32rm %rdi, 1, _, 0, _
+ CMP32ri8 %eax, 10, implicit-def %eflags
+ ; CHECK: [[@LINE+1]]:10: expected 32-bit integer (too large)
+ JG_1 %bb.123456789123456, implicit %eflags
+
+ bb.1:
+ %eax = MOV32r0 implicit-def %eflags
+
+ bb.2:
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/large-offset-number-error.mir b/test/CodeGen/MIR/X86/large-offset-number-error.mir
new file mode 100644
index 000000000000..5463cdbce444
--- /dev/null
+++ b/test/CodeGen/MIR/X86/large-offset-number-error.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ @G = external global i32
+
+ define i32 @inc() {
+ entry:
+ %a = load i32, i32* @G
+ %b = add i32 %a, 1
+ ret i32 %b
+ }
+
+...
+---
+name: inc
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:37: expected 64-bit integer (too large)
+ %rax = MOV64rm %rip, 1, _, @G + 123456789123456789123456789, _
+ %eax = MOV32rm %rax, 1, _, 0, _
+ %eax = INC32r %eax implicit-def %eflags
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/large-size-in-memory-operand-error.mir b/test/CodeGen/MIR/X86/large-size-in-memory-operand-error.mir
new file mode 100644
index 000000000000..c570f0992a3f
--- /dev/null
+++ b/test/CodeGen/MIR/X86/large-size-in-memory-operand-error.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32* %a) {
+ entry:
+ %b = load i32, i32* %a
+ ret i32 %b
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: [[@LINE+1]]:53: expected 64-bit integer (too large)
+ %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 12345678912345678924218574857 from %ir.a)
+ RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/liveout-register-mask.mir b/test/CodeGen/MIR/X86/liveout-register-mask.mir
new file mode 100644
index 000000000000..7ded7287060e
--- /dev/null
+++ b/test/CodeGen/MIR/X86/liveout-register-mask.mir
@@ -0,0 +1,42 @@
+# RUN: llc -march=x86-64 -start-after stackmap-liveness -stop-after stackmap-liveness -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the liveout register mask
+# machine operands correctly.
+
+--- |
+
+ define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+ entry:
+ %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 5, i8* null, i32 2, i64 %p1, i64 %p2)
+ ret void
+ }
+
+ declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
+
+...
+---
+name: small_patchpoint_codegen
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+ - { reg: '%rsi' }
+frameInfo:
+ hasPatchPoint: true
+ stackSize: 8
+ adjustsStack: true
+ hasCalls: true
+fixedStack:
+ - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16 }
+body: |
+ bb.0.entry:
+ liveins: %rdi, %rsi, %rbp
+
+ frame-setup PUSH64r killed %rbp, implicit-def %rsp, implicit %rsp
+ CFI_INSTRUCTION .cfi_def_cfa_offset 16
+ CFI_INSTRUCTION .cfi_offset %rbp, -16
+ %rbp = frame-setup MOV64rr %rsp
+ CFI_INSTRUCTION .cfi_def_cfa_register %rbp
+ ; CHECK: PATCHPOINT 5, 5, 0, 2, 0, %rdi, %rsi, csr_64, liveout(%esp, %rsp, %sp, %spl),
+ PATCHPOINT 5, 5, 0, 2, 0, %rdi, %rsi, csr_64, liveout(%esp, %rsp, %sp, %spl), implicit-def dead early-clobber %r11, implicit-def %rsp, implicit-def dead %rax
+ %rbp = POP64r implicit-def %rsp, implicit %rsp
+ RETQ
+...
diff --git a/test/CodeGen/MIR/X86/machine-basic-block-operands.mir b/test/CodeGen/MIR/X86/machine-basic-block-operands.mir
index 607acb5f273e..0d7a9f8ef34c 100644
--- a/test/CodeGen/MIR/X86/machine-basic-block-operands.mir
+++ b/test/CodeGen/MIR/X86/machine-basic-block-operands.mir
@@ -33,43 +33,41 @@
---
# CHECK: name: foo
name: foo
-body:
- # CHECK: name: entry
- - id: 0
- name: entry
- instructions:
- - '%eax = MOV32rm %rdi, 1, _, 0, _'
- # CHECK: - 'CMP32ri8 %eax, 10
- # CHECK-NEXT: - 'JG_1 %bb.2.exit
- - 'CMP32ri8 %eax, 10, implicit-def %eflags'
- - 'JG_1 %bb.2.exit, implicit %eflags'
- # CHECK: name: less
- - id: 1
- name: less
- instructions:
- - '%eax = MOV32r0 implicit-def %eflags'
- - id: 2
- name: exit
- instructions:
- - 'RETQ %eax'
+body: |
+ ; CHECK: bb.0.entry
+ bb.0.entry:
+ successors: %bb.1.less, %bb.2.exit
+
+ %eax = MOV32rm %rdi, 1, _, 0, _
+ ; CHECK: CMP32ri8 %eax, 10
+ ; CHECK-NEXT: JG_1 %bb.2.exit
+ CMP32ri8 %eax, 10, implicit-def %eflags
+ JG_1 %bb.2.exit, implicit %eflags
+ ; CHECK: bb.1.less:
+
+ bb.1.less:
+ %eax = MOV32r0 implicit-def %eflags
+
+ bb.2.exit:
+ RETQ %eax
...
---
# CHECK: name: bar
name: bar
-body:
- # CHECK: name: entry
- - id: 0
- name: entry
- instructions:
- - '%eax = MOV32rm %rdi, 1, _, 0, _'
- # CHECK: - 'CMP32ri8 %eax, 10
- # CHECK-NEXT: - 'JG_1 %bb.2
- - 'CMP32ri8 %eax, 10, implicit-def %eflags'
- - 'JG_1 %bb.3, implicit %eflags'
- - id: 1
- instructions:
- - '%eax = MOV32r0 implicit-def %eflags'
- - id: 3
- instructions:
- - 'RETQ %eax'
+body: |
+ ; CHECK: bb.0.entry:
+ bb.0.entry:
+ successors: %bb.1, %bb.3
+
+ %eax = MOV32rm %rdi, 1, _, 0, _
+ ; CHECK: CMP32ri8 %eax, 10
+ ; CHECK-NEXT: JG_1 %bb.2
+ CMP32ri8 %eax, 10, implicit-def %eflags
+ JG_1 %bb.3, implicit %eflags
+
+ bb.1:
+ %eax = MOV32r0 implicit-def %eflags
+
+ bb.3:
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/machine-instructions.mir b/test/CodeGen/MIR/X86/machine-instructions.mir
index 08f3d76486b1..0e46d01e0bd1 100644
--- a/test/CodeGen/MIR/X86/machine-instructions.mir
+++ b/test/CodeGen/MIR/X86/machine-instructions.mir
@@ -14,12 +14,10 @@
---
# CHECK: name: inc
name: inc
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: - MOV32rr
- # CHECK-NEXT: - RETQ
- - MOV32rr
- - ' RETQ '
+body: |
+ bb.0.entry:
+ ; CHECK: MOV32rr
+ ; CHECK-NEXT: RETQ
+ %eax = MOV32rr %eax
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/machine-verifier.mir b/test/CodeGen/MIR/X86/machine-verifier.mir
new file mode 100644
index 000000000000..a7413d4d03bc
--- /dev/null
+++ b/test/CodeGen/MIR/X86/machine-verifier.mir
@@ -0,0 +1,22 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that the MIR parser runs the machine verifier after parsing.
+
+--- |
+
+ define i32 @inc(i32 %a) {
+ entry:
+ ret i32 %a
+ }
+
+...
+---
+name: inc
+body: |
+ bb.0.entry:
+ liveins: %edi
+ ; CHECK: *** Bad machine code: Too few operands ***
+ ; CHECK: instruction: COPY
+ ; CHECK: 2 operands expected, but 0 given.
+ COPY
+ RETQ
+...
diff --git a/test/CodeGen/MIR/X86/memory-operands.mir b/test/CodeGen/MIR/X86/memory-operands.mir
new file mode 100644
index 000000000000..3c9463d2f313
--- /dev/null
+++ b/test/CodeGen/MIR/X86/memory-operands.mir
@@ -0,0 +1,508 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the machine memory operands
+# correctly.
+
+--- |
+
+ define i32 @test(i32* %a) {
+ entry:
+ %b = load i32, i32* %a
+ store i32 42, i32* %a
+ ret i32 %b
+ }
+
+ define void @test2(i32* %"a value") {
+ entry2:
+ %b = load i32, i32* %"a value"
+ %c = add i32 %b, 1
+ store i32 %c, i32* %"a value"
+ ret void
+ }
+
+ define void @test3(i32*) {
+ entry3:
+ %1 = alloca i32
+ %b = load i32, i32* %0
+ %c = add i32 %b, 1
+ store i32 %c, i32* %1
+ ret void
+ }
+
+ define i32 @volatile_inc(i32* %x) {
+ entry:
+ %0 = load volatile i32, i32* %x
+ %1 = add i32 %0, 1
+ store volatile i32 %1, i32* %x
+ ret i32 %1
+ }
+
+ define void @non_temporal_store(i32* %a, i32 %b) {
+ entry:
+ store i32 %b, i32* %a, align 16, !nontemporal !0
+ ret void
+ }
+
+ !0 = !{i32 1}
+
+ define i32 @invariant_load(i32* %x) {
+ entry:
+ %v = load i32, i32* %x, !invariant.load !1
+ ret i32 %v
+ }
+
+ !1 = !{}
+
+ define void @memory_offset(<8 x float>* %vec) {
+ entry:
+ %v = load <8 x float>, <8 x float>* %vec
+ %v2 = insertelement <8 x float> %v, float 0.0, i32 4
+ store <8 x float> %v2, <8 x float>* %vec
+ ret void
+ }
+
+ define void @memory_alignment(<8 x float>* %vec) {
+ entry:
+ %v = load <8 x float>, <8 x float>* %vec
+ %v2 = insertelement <8 x float> %v, float 0.0, i32 4
+ store <8 x float> %v2, <8 x float>* %vec
+ ret void
+ }
+
+ define double @constant_pool_psv(double %a) {
+ entry:
+ %b = fadd double %a, 3.250000e+00
+ ret double %b
+ }
+
+ declare x86_fp80 @cosl(x86_fp80) #0
+
+ define x86_fp80 @stack_psv(x86_fp80 %x) {
+ entry:
+ %y = call x86_fp80 @cosl(x86_fp80 %x) #0
+ ret x86_fp80 %y
+ }
+
+ attributes #0 = { readonly }
+
+ @G = external global i32
+
+ define i32 @got_psv() {
+ entry:
+ %a = load i32, i32* @G
+ %b = add i32 %a, 1
+ ret i32 %b
+ }
+
+ @0 = external global i32
+
+ define i32 @global_value() {
+ entry:
+ %a = load i32, i32* @G
+ %b = add i32 %a, 1
+ %c = load i32, i32* @0
+ %d = add i32 %b, %c
+ ret i32 %d
+ }
+
+ define i32 @jumptable_psv(i32 %in) {
+ entry:
+ switch i32 %in, label %def [
+ i32 0, label %lbl1
+ i32 1, label %lbl2
+ i32 2, label %lbl3
+ i32 3, label %lbl4
+ ]
+ def:
+ ret i32 0
+ lbl1:
+ ret i32 1
+ lbl2:
+ ret i32 2
+ lbl3:
+ ret i32 4
+ lbl4:
+ ret i32 8
+ }
+
+ %struct.XXH_state64_t = type { i32, i32, i64, i64, i64 }
+
+ @a = common global i32 0, align 4
+
+ define i32 @tbaa_metadata() {
+ entry:
+ %0 = load i32, i32* @a, align 4, !tbaa !2
+ %1 = inttoptr i32 %0 to %struct.XXH_state64_t*
+ %total_len2 = bitcast %struct.XXH_state64_t* %1 to i32*
+ %2 = load i32, i32* %total_len2, align 4, !tbaa !6
+ ret i32 %2
+ }
+
+ !2 = !{!3, !3, i64 0}
+ !3 = !{!"int", !4, i64 0}
+ !4 = !{!"omnipotent char", !5, i64 0}
+ !5 = !{!"Simple C/C++ TBAA"}
+ !6 = !{!7, !3, i64 0}
+ !7 = !{!"XXH_state64_t", !3, i64 0, !3, i64 4, !8, i64 8, !8, i64 16, !8, i64 24}
+ !8 = !{!"long long", !4, i64 0}
+
+ define void @aa_scope(float* nocapture %a, float* nocapture readonly %c) #1 {
+ entry:
+ %0 = load float, float* %c, align 4, !alias.scope !9
+ %arrayidx.i = getelementptr inbounds float, float* %a, i64 5
+ store float %0, float* %arrayidx.i, align 4, !noalias !9
+ %1 = load float, float* %c, align 4
+ %arrayidx = getelementptr inbounds float, float* %a, i64 7
+ store float %1, float* %arrayidx, align 4
+ ret void
+ }
+
+ attributes #1 = { nounwind uwtable }
+
+ !9 = distinct !{!9, !10, !"some scope"}
+ !10 = distinct !{!10, !"some domain"}
+
+ define zeroext i1 @range_metadata(i8* %x) {
+ entry:
+ %0 = load i8, i8* %x, align 1, !range !11
+ %tobool = trunc i8 %0 to i1
+ ret i1 %tobool
+ }
+
+ !11 = !{i8 0, i8 2}
+
+ %st = type { i32, i32 }
+
+ @values = common global [50 x %st] zeroinitializer, align 16
+
+ define void @gep_value(i64 %d) {
+ entry:
+ %conv = trunc i64 %d to i32
+ store i32 %conv, i32* getelementptr inbounds ([50 x %st], [50 x %st]* @values, i64 0, i64 0, i32 0), align 16
+ ret void
+ }
+
+ define i8* @undef_value() {
+ entry:
+ %0 = load i8*, i8** undef, align 8
+ ret i8* %0
+ }
+...
+---
+name: test
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: %eax = MOV32rm %rdi, 1, _, 0, _ :: (load 4 from %ir.a)
+ ; CHECK-NEXT: MOV32mi killed %rdi, 1, _, 0, _, 42 :: (store 4 into %ir.a)
+ %eax = MOV32rm %rdi, 1, _, 0, _ :: (load 4 from %ir.a)
+ MOV32mi killed %rdi, 1, _, 0, _, 42 :: (store 4 into %ir.a)
+ RETQ %eax
+...
+---
+name: test2
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry2:
+ liveins: %rdi
+ ; CHECK: INC32m killed %rdi, 1, _, 0, _, implicit-def dead %eflags :: (store 4 into %ir."a value"), (load 4 from %ir."a value")
+ INC32m killed %rdi, 1, _, 0, _, implicit-def dead %eflags :: (store 4 into %ir."a value"), (load 4 from %ir."a value")
+ RETQ
+...
+---
+name: test3
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, offset: -12, size: 4, alignment: 4 }
+body: |
+ bb.0.entry3:
+ liveins: %rdi
+ ; Verify that the unnamed local values can be serialized.
+ ; CHECK-LABEL: name: test3
+ ; CHECK: %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 4 from %ir.0)
+ ; CHECK: MOV32mr %rsp, 1, _, -4, _, killed %eax :: (store 4 into %ir.1)
+ %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 4 from %ir.0)
+ %eax = INC32r killed %eax, implicit-def dead %eflags
+ MOV32mr %rsp, 1, _, -4, _, killed %eax :: (store 4 into %ir.1)
+ RETQ
+...
+---
+name: volatile_inc
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: name: volatile_inc
+ ; CHECK: %eax = MOV32rm %rdi, 1, _, 0, _ :: (volatile load 4 from %ir.x)
+ ; CHECK: MOV32mr killed %rdi, 1, _, 0, _, %eax :: (volatile store 4 into %ir.x)
+ %eax = MOV32rm %rdi, 1, _, 0, _ :: (volatile load 4 from %ir.x)
+ %eax = INC32r killed %eax, implicit-def dead %eflags
+ MOV32mr killed %rdi, 1, _, 0, _, %eax :: (volatile store 4 into %ir.x)
+ RETQ %eax
+...
+---
+name: non_temporal_store
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+ - { reg: '%esi' }
+body: |
+ bb.0.entry:
+ liveins: %esi, %rdi
+ ; CHECK: name: non_temporal_store
+ ; CHECK: MOVNTImr killed %rdi, 1, _, 0, _, killed %esi :: (non-temporal store 4 into %ir.a)
+ MOVNTImr killed %rdi, 1, _, 0, _, killed %esi :: (non-temporal store 4 into %ir.a)
+ RETQ
+...
+---
+name: invariant_load
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: name: invariant_load
+ ; CHECK: %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (invariant load 4 from %ir.x)
+ %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (invariant load 4 from %ir.x)
+ RETQ %eax
+...
+---
+name: memory_offset
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: name: memory_offset
+ ; CHECK: %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec)
+ ; CHECK-NEXT: %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16)
+ ; CHECK: MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec)
+ ; CHECK-NEXT: MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16)
+ %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec)
+ %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16)
+ %xmm2 = FsFLD0SS
+ %xmm1 = MOVSSrr killed %xmm1, killed %xmm2
+ MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec)
+ MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16)
+ RETQ
+...
+---
+name: memory_alignment
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: name: memory_alignment
+ ; CHECK: %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec, align 32)
+ ; CHECK-NEXT: %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, align 32)
+ ; CHECK: MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec, align 32)
+ ; CHECK-NEXT: MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16, align 32)
+ %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec, align 32)
+ %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, align 32)
+ %xmm2 = FsFLD0SS
+ %xmm1 = MOVSSrr killed %xmm1, killed %xmm2
+ MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec, align 32)
+ MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16, align 32)
+ RETQ
+...
+---
+name: constant_pool_psv
+tracksRegLiveness: true
+liveins:
+ - { reg: '%xmm0' }
+constants:
+ - id: 0
+ value: 'double 3.250000e+00'
+body: |
+ bb.0.entry:
+ liveins: %xmm0
+ ; CHECK: name: constant_pool_psv
+ ; CHECK: %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _ :: (load 8 from constant-pool)
+ ; CHECK-NEXT: %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _ :: (load 8 from constant-pool + 8)
+ %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _ :: (load 8 from constant-pool)
+ %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _ :: (load 8 from constant-pool + 8)
+ RETQ %xmm0
+...
+---
+name: stack_psv
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 24
+ maxAlignment: 16
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 16
+fixedStack:
+ - { id: 0, offset: 0, size: 10, alignment: 16, isImmutable: true, isAliased: false }
+body: |
+ bb.0.entry:
+ %rsp = frame-setup SUB64ri8 %rsp, 24, implicit-def dead %eflags
+ CFI_INSTRUCTION .cfi_def_cfa_offset 32
+ LD_F80m %rsp, 1, _, 32, _, implicit-def dead %fpsw
+ ; CHECK: name: stack_psv
+ ; CHECK: ST_FP80m %rsp, 1, _, 0, _, implicit-def dead %fpsw :: (store 10 into stack, align 16)
+ ST_FP80m %rsp, 1, _, 0, _, implicit-def dead %fpsw :: (store 10 into stack, align 16)
+ CALL64pcrel32 $cosl, csr_64, implicit %rsp, implicit-def %rsp, implicit-def %fp0
+ %rsp = ADD64ri8 %rsp, 24, implicit-def dead %eflags
+ RETQ
+...
+---
+name: got_psv
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK: name: got_psv
+ ; CHECK: %rax = MOV64rm %rip, 1, _, @G, _ :: (load 8 from got)
+ %rax = MOV64rm %rip, 1, _, @G, _ :: (load 8 from got)
+ %eax = MOV32rm killed %rax, 1, _, 0, _
+ %eax = INC32r killed %eax, implicit-def dead %eflags
+ RETQ %eax
+...
+---
+name: global_value
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ %rax = MOV64rm %rip, 1, _, @G, _
+ ; CHECK-LABEL: name: global_value
+ ; CHECK: %eax = MOV32rm killed %rax, 1, _, 0, _, implicit-def %rax :: (load 4 from @G)
+ ; CHECK: %ecx = MOV32rm killed %rcx, 1, _, 0, _, implicit-def %rcx :: (load 4 from @0)
+ %eax = MOV32rm killed %rax, 1, _, 0, _, implicit-def %rax :: (load 4 from @G)
+ %rcx = MOV64rm %rip, 1, _, @0, _
+ %ecx = MOV32rm killed %rcx, 1, _, 0, _, implicit-def %rcx :: (load 4 from @0)
+ %eax = LEA64_32r killed %rax, 1, killed %rcx, 1, _
+ RETQ %eax
+...
+---
+name: jumptable_psv
+tracksRegLiveness: true
+liveins:
+ - { reg: '%edi' }
+jumpTable:
+ kind: label-difference32
+ entries:
+ - id: 0
+ blocks: [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+body: |
+ bb.0.entry:
+ successors: %bb.2.def, %bb.1.entry
+ liveins: %edi
+
+ %eax = MOV32rr %edi, implicit-def %rax
+ CMP32ri8 killed %edi, 3, implicit-def %eflags
+ JA_1 %bb.2.def, implicit killed %eflags
+
+ bb.1.entry:
+ successors: %bb.3.lbl1, %bb.4.lbl2, %bb.5.lbl3, %bb.6.lbl4
+ liveins: %rax
+
+ %rcx = LEA64r %rip, 1, _, %jump-table.0, _
+ ; CHECK: name: jumptable_psv
+ ; CHECK: %rax = MOVSX64rm32 %rcx, 4, killed %rax, 0, _ :: (load 4 from jump-table, align 8)
+ %rax = MOVSX64rm32 %rcx, 4, killed %rax, 0, _ :: (load 4 from jump-table, align 8)
+ %rax = ADD64rr killed %rax, killed %rcx, implicit-def dead %eflags
+ JMP64r killed %rax
+
+ bb.2.def:
+ %eax = MOV32r0 implicit-def dead %eflags
+ RETQ %eax
+
+ bb.3.lbl1:
+ %eax = MOV32ri 1
+ RETQ %eax
+
+ bb.4.lbl2:
+ %eax = MOV32ri 2
+ RETQ %eax
+
+ bb.5.lbl3:
+ %eax = MOV32ri 4
+ RETQ %eax
+
+ bb.6.lbl4:
+ %eax = MOV32ri 8
+ RETQ %eax
+...
+---
+name: tbaa_metadata
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ %rax = MOV64rm %rip, 1, _, @a, _ :: (load 8 from got)
+ ; CHECK-LABEL: name: tbaa_metadata
+ ; CHECK: %eax = MOV32rm killed %rax, 1, _, 0, _, implicit-def %rax :: (load 4 from @a, !tbaa !2)
+ ; CHECK-NEXT: %eax = MOV32rm killed %rax, 1, _, 0, _ :: (load 4 from %ir.total_len2, !tbaa !6)
+ %eax = MOV32rm killed %rax, 1, _, 0, _, implicit-def %rax :: (load 4 from @a, !tbaa !2)
+ %eax = MOV32rm killed %rax, 1, _, 0, _ :: (load 4 from %ir.total_len2, !tbaa !6)
+ RETQ %eax
+...
+---
+name: aa_scope
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+ - { reg: '%rsi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi, %rsi
+ ; CHECK-LABEL: name: aa_scope
+ ; CHECK: %xmm0 = MOVSSrm %rsi, 1, _, 0, _ :: (load 4 from %ir.c, !alias.scope !9)
+ %xmm0 = MOVSSrm %rsi, 1, _, 0, _ :: (load 4 from %ir.c, !alias.scope !9)
+ ; CHECK-NEXT: MOVSSmr %rdi, 1, _, 20, _, killed %xmm0 :: (store 4 into %ir.arrayidx.i, !noalias !9)
+ MOVSSmr %rdi, 1, _, 20, _, killed %xmm0 :: (store 4 into %ir.arrayidx.i, !noalias !9)
+ %xmm0 = MOVSSrm killed %rsi, 1, _, 0, _ :: (load 4 from %ir.c)
+ MOVSSmr killed %rdi, 1, _, 28, _, killed %xmm0 :: (store 4 into %ir.arrayidx)
+ RETQ
+...
+---
+name: range_metadata
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK-LABEL: name: range_metadata
+ ; CHECK: %al = MOV8rm killed %rdi, 1, _, 0, _ :: (load 1 from %ir.x, !range !11)
+ %al = MOV8rm killed %rdi, 1, _, 0, _ :: (load 1 from %ir.x, !range !11)
+ RETQ %al
+...
+---
+name: gep_value
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+
+ %rax = MOV64rm %rip, 1, _, @values, _ :: (load 8 from got)
+ ; CHECK-LABEL: gep_value
+ ; CHECK: MOV32mr killed %rax, 1, _, 0, _, %edi, implicit killed %rdi :: (store 4 into `i32* getelementptr inbounds ([50 x %st], [50 x %st]* @values, i64 0, i64 0, i32 0)`, align 16)
+ MOV32mr killed %rax, 1, _, 0, _, %edi, implicit killed %rdi :: (store 4 into `i32* getelementptr inbounds ([50 x %st], [50 x %st]* @values, i64 0, i64 0, i32 0)`, align 16)
+ RETQ
+...
+---
+name: undef_value
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: undef_value
+ ; CHECK: %rax = MOV64rm undef %rax, 1, _, 0, _ :: (load 8 from `i8** undef`)
+ %rax = MOV64rm undef %rax, 1, _, 0, _ :: (load 8 from `i8** undef`)
+ RETQ %rax
+...
diff --git a/test/CodeGen/MIR/X86/metadata-operands.mir b/test/CodeGen/MIR/X86/metadata-operands.mir
new file mode 100644
index 000000000000..89a1e6fcb815
--- /dev/null
+++ b/test/CodeGen/MIR/X86/metadata-operands.mir
@@ -0,0 +1,63 @@
+# RUN: llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the metadata machine operands
+# correctly.
+
+--- |
+
+ define i32 @test(i32 %x) #0 !dbg !4 {
+ entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+ %0 = load i32, i32* %x.addr, align 4, !dbg !15
+ ret i32 %0, !dbg !15
+ }
+
+ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+ attributes #0 = { nounwind "no-frame-pointer-elim"="false" }
+ attributes #1 = { nounwind readnone }
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!9, !10}
+ !llvm.ident = !{!11}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+ !1 = !DIFile(filename: "test.ll", directory: "")
+ !2 = !{}
+ !3 = !{!4}
+ !4 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 4, type: !6, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+ !5 = !DIFile(filename: "test.c", directory: "")
+ !6 = !DISubroutineType(types: !7)
+ !7 = !{!8, !8}
+ !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+ !9 = !{i32 2, !"Dwarf Version", i32 4}
+ !10 = !{i32 2, !"Debug Info Version", i32 3}
+ !11 = !{!"clang version 3.7.0"}
+ !12 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 4, type: !8)
+ !13 = !DIExpression()
+ !14 = !DILocation(line: 4, scope: !4)
+ !15 = !DILocation(line: 8, scope: !4)
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, name: x.addr, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ liveins: %edi
+ ; CHECK: %0 = COPY %edi
+ ; CHECK-NEXT: DBG_VALUE _, 0, !12, !13
+ %0 = COPY %edi
+ DBG_VALUE _, 0, !12, ! 13
+ MOV32mr %stack.0.x.addr, 1, _, 0, _, %0
+ %eax = COPY %0
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/missing-closing-quote.mir b/test/CodeGen/MIR/X86/missing-closing-quote.mir
new file mode 100644
index 000000000000..9f4b369a3df4
--- /dev/null
+++ b/test/CodeGen/MIR/X86/missing-closing-quote.mir
@@ -0,0 +1,22 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ @"quoted name" = external global i32
+
+ define i32 @test() {
+ entry:
+ %a = load i32, i32* @"quoted name"
+ ret i32 %a
+ }
+
+...
+---
+name: test
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:48: end of machine instruction reached before the closing '"'
+ %rax = MOV64rm %rip, 1, _, @"quoted name, _
+ %eax = MOV32rm killed %rax, 1, _, 0, _
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/missing-comma.mir b/test/CodeGen/MIR/X86/missing-comma.mir
index 54c67ac6c911..092995e59c70 100644
--- a/test/CodeGen/MIR/X86/missing-comma.mir
+++ b/test/CodeGen/MIR/X86/missing-comma.mir
@@ -10,12 +10,10 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: [[@LINE+1]]:29: expected ',' before the next machine operand
- - '%eax = XOR32rr %eax %eflags'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:25: expected ',' before the next machine operand
+ %eax = XOR32rr %eax %eflags
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/missing-implicit-operand.mir b/test/CodeGen/MIR/X86/missing-implicit-operand.mir
index 4d2cd03f4a3d..0135c756e138 100644
--- a/test/CodeGen/MIR/X86/missing-implicit-operand.mir
+++ b/test/CodeGen/MIR/X86/missing-implicit-operand.mir
@@ -21,20 +21,18 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- - '%eax = MOV32rm %rdi, 1, _, 0, _'
- - 'CMP32ri8 %eax, 10, implicit-def %eflags'
-# CHECK: [[@LINE+1]]:24: missing implicit register operand 'implicit %eflags'
- - 'JG_1 %bb.2.exit'
- - id: 1
- name: less
- instructions:
- - '%eax = MOV32r0 implicit-def %eflags'
- - id: 2
- name: exit
- instructions:
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ successors: %bb.1.less, %bb.2.exit
+
+ %eax = MOV32rm %rdi, 1, _, 0, _
+ CMP32ri8 %eax, 10, implicit-def %eflags
+ ; CHECK: [[@LINE+1]]:20: missing implicit register operand 'implicit %eflags'
+ JG_1 %bb.2.exit
+
+ bb.1.less:
+ %eax = MOV32r0 implicit-def %eflags
+
+ bb.2.exit:
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/missing-instruction.mir b/test/CodeGen/MIR/X86/missing-instruction.mir
deleted file mode 100644
index 8d11ab5eaabe..000000000000
--- a/test/CodeGen/MIR/X86/missing-instruction.mir
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
-
---- |
-
- define void @foo() {
- entry:
- ret void
- }
-
-...
----
-name: foo
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: [[@LINE+1]]:9: expected a machine instruction
- - ''
-...
diff --git a/test/CodeGen/MIR/X86/named-registers.mir b/test/CodeGen/MIR/X86/named-registers.mir
index 91ed48568678..e547c326563e 100644
--- a/test/CodeGen/MIR/X86/named-registers.mir
+++ b/test/CodeGen/MIR/X86/named-registers.mir
@@ -12,12 +12,10 @@
---
# CHECK: name: foo
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: - '%eax = MOV32r0
- # CHECK-NEXT: - 'RETQ %eax
- - '%eax = MOV32r0 implicit-def %eflags'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ ; CHECK: %eax = MOV32r0
+ ; CHECK-NEXT: RETQ %eax
+ %eax = MOV32r0 implicit-def %eflags
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/newline-handling.mir b/test/CodeGen/MIR/X86/newline-handling.mir
new file mode 100644
index 000000000000..bce06d540114
--- /dev/null
+++ b/test/CodeGen/MIR/X86/newline-handling.mir
@@ -0,0 +1,109 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+
+--- |
+
+ define i32 @foo(i32 %a) {
+ entry:
+ %0 = icmp sle i32 %a, 10
+ br i1 %0, label %less, label %exit
+
+ less:
+ ret i32 0
+
+ exit:
+ ret i32 %a
+ }
+
+ define i32 @bar(i32 %a) {
+ entry:
+ %0 = icmp sle i32 %a, 10
+ br i1 %0, label %less, label %exit
+
+ less:
+ ret i32 0
+
+ exit:
+ ret i32 %a
+ }
+
+...
+---
+name: foo
+tracksRegLiveness: true
+liveins:
+ - { reg: '%edi' }
+# CHECK-LABEL: name: foo
+# CHECK: body: |
+# CHECK-NEXT: bb.0.entry:
+# CHECK-NEXT: successors: %bb.1.less(0x40000000 / 0x80000000 = 50.00%), %bb.2.exit(0x40000000 / 0x80000000 = 50.00%)
+# CHECK-NEXT: liveins: %edi
+# CHECK: CMP32ri8 %edi, 10, implicit-def %eflags
+# CHECK-NEXT: JG_1 %bb.2.exit, implicit killed %eflags
+
+# CHECK: bb.1.less:
+# CHECK-NEXT: %eax = MOV32r0 implicit-def dead %eflags
+# CHECK-NEXT: RETQ killed %eax
+
+# CHECK: bb.2.exit:
+# CHECK-NEXT: liveins: %edi
+# CHECK: %eax = COPY killed %edi
+# CHECK-NEXT: RETQ killed %eax
+body: |
+ bb.0.entry:
+ successors: %bb.1.less, %bb.2.exit
+
+ liveins: %edi
+
+ CMP32ri8 %edi, 10, implicit-def %eflags
+
+ JG_1 %bb.2.exit, implicit killed %eflags
+
+ bb.1.less:
+
+
+ %eax = MOV32r0 implicit-def dead %eflags
+ RETQ killed %eax
+ bb.2.exit:
+
+
+ liveins: %edi
+ %eax = COPY killed %edi
+ RETQ killed %eax
+
+...
+---
+name: bar
+tracksRegLiveness: true
+liveins:
+ - { reg: '%edi' }
+# CHECK-LABEL: name: bar
+# CHECK: body: |
+# CHECK-NEXT: bb.0.entry:
+# CHECK-NEXT: successors: %bb.1.less(0x40000000 / 0x80000000 = 50.00%), %bb.2.exit(0x40000000 / 0x80000000 = 50.00%)
+# CHECK-NEXT: liveins: %edi
+# CHECK: CMP32ri8 %edi, 10, implicit-def %eflags
+# CHECK-NEXT: JG_1 %bb.2.exit, implicit killed %eflags
+
+# CHECK: bb.1.less:
+# CHECK-NEXT: %eax = MOV32r0 implicit-def dead %eflags
+# CHECK-NEXT: RETQ killed %eax
+
+# CHECK: bb.2.exit:
+# CHECK-NEXT: liveins: %edi
+# CHECK: %eax = COPY killed %edi
+# CHECK-NEXT: RETQ killed %eax
+body: |
+
+ bb.0.entry:
+ successors: %bb.1.less, %bb.2.exit
+ liveins: %edi
+ CMP32ri8 %edi, 10, implicit-def %eflags
+ JG_1 %bb.2.exit, implicit killed %eflags
+ bb.1.less: %eax = MOV32r0 implicit-def dead %eflags
+ RETQ killed %eax
+
+ bb.2.exit: liveins: %edi
+ %eax = COPY killed %edi
+ RETQ killed %eax
+
+...
diff --git a/test/CodeGen/MIR/X86/null-register-operands.mir b/test/CodeGen/MIR/X86/null-register-operands.mir
index 55c0ceb3a60a..5563ef8e8f75 100644
--- a/test/CodeGen/MIR/X86/null-register-operands.mir
+++ b/test/CodeGen/MIR/X86/null-register-operands.mir
@@ -13,12 +13,10 @@
---
# CHECK: name: deref
name: deref
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: - '%eax = MOV32rm %rdi, 1, _, 0, _'
- # CHECK-NEXT: - 'RETQ %eax'
- - '%eax = MOV32rm %rdi, 1, _, 0, %noreg'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ ; CHECK: %eax = MOV32rm %rdi, 1, _, 0, _
+ ; CHECK-NEXT: RETQ %eax
+ %eax = MOV32rm %rdi, 1, _, 0, %noreg
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/register-mask-operands.mir b/test/CodeGen/MIR/X86/register-mask-operands.mir
index f4136598ff5c..9fa4e6e3994e 100644
--- a/test/CodeGen/MIR/X86/register-mask-operands.mir
+++ b/test/CodeGen/MIR/X86/register-mask-operands.mir
@@ -20,24 +20,20 @@
...
---
name: compute
-body:
- - id: 0
- name: body
- instructions:
- - '%eax = IMUL32rri8 %edi, 11, implicit-def %eflags'
- - 'RETQ %eax'
+body: |
+ bb.0.body:
+ %eax = IMUL32rri8 %edi, 11, implicit-def %eflags
+ RETQ %eax
...
---
# CHECK: name: foo
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: - 'PUSH64r %rax
- # CHECK-NEXT: - 'CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax'
- - 'PUSH64r %rax, implicit-def %rsp, implicit %rsp'
- - 'CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax'
- - '%rdx = POP64r implicit-def %rsp, implicit %rsp'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ ; CHECK: PUSH64r %rax
+ ; CHECK-NEXT: CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+ PUSH64r %rax, implicit-def %rsp, implicit %rsp
+ CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+ %rdx = POP64r implicit-def %rsp, implicit %rsp
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/register-operands-target-flag-error.mir b/test/CodeGen/MIR/X86/register-operands-target-flag-error.mir
new file mode 100644
index 000000000000..64d46d20db74
--- /dev/null
+++ b/test/CodeGen/MIR/X86/register-operands-target-flag-error.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ @G = external global i32
+
+ define i32 @inc() {
+ entry:
+ %a = load i32, i32* @G
+ %b = add i32 %a, 1
+ ret i32 %b
+ }
+
+...
+---
+name: inc
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:42: register operands can't have target flags
+ %rax = MOV64rm target-flags(x86-got) %rip, 1, _, @G, _
+ %eax = MOV32rm killed %rax, 1, _, 0, _
+ %eax = INC32r killed %eax, implicit-def dead %eflags
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir b/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir
new file mode 100644
index 000000000000..d7e76329be73
--- /dev/null
+++ b/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir
@@ -0,0 +1,34 @@
+# RUN: llc -march=x86-64 -start-after machine-scheduler -stop-after machine-scheduler -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses simple register allocation hints
+# correctly.
+
+--- |
+
+ define i32 @test(i32 %a, i32 %b) {
+ body:
+ %c = mul i32 %a, %b
+ ret i32 %c
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+# CHECK: registers:
+# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '%esi' }
+# CHECK-NEXT: - { id: 2, class: gr32, preferred-register: '%edi' }
+registers:
+ - { id: 0, class: gr32 }
+ - { id: 1, class: gr32, preferred-register: '%esi' }
+ - { id: 2, class: gr32, preferred-register: '%edi' }
+body: |
+ bb.0.body:
+ liveins: %edi, %esi
+
+ %1 = COPY %esi
+ %2 = COPY %edi
+ %2 = IMUL32rr %2, %1, implicit-def dead %eflags
+ %eax = COPY %2
+ RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-aliased.mir b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-aliased.mir
index 67f4bd21cd05..b62cd755fec1 100644
--- a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-aliased.mir
+++ b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-aliased.mir
@@ -22,11 +22,9 @@ fixedStack:
- { id: 0, type: spill-slot, offset: 0, size: 4, isAliased: true }
stack:
- { id: 0, offset: -12, size: 4, alignment: 4 }
-body:
- - id: 0
- name: entry
- instructions:
- - 'MOV32mr %rsp, 1, _, -4, _, %edi'
- - '%eax = COPY %edi'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ MOV32mr %rsp, 1, _, -4, _, %edi
+ %eax = COPY %edi
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-immutable.mir b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-immutable.mir
index 1e1b0fdcc8dc..c89216bea67a 100644
--- a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-immutable.mir
+++ b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-immutable.mir
@@ -22,11 +22,9 @@ fixedStack:
- { id: 0, type: spill-slot, offset: 0, size: 4, isImmutable: true }
stack:
- { id: 0, offset: -12, size: 4, alignment: 4 }
-body:
- - id: 0
- name: entry
- instructions:
- - 'MOV32mr %rsp, 1, _, -4, _, %edi'
- - '%eax = COPY %edi'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ MOV32mr %rsp, 1, _, -4, _, %edi
+ %eax = COPY %edi
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir
index f771f796ec34..7e13a26f0b68 100644
--- a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir
+++ b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir
@@ -24,11 +24,9 @@ fixedStack:
- { id: 0, type: spill-slot, offset: 0, size: 4, alignment: 4 }
stack:
- { id: 0, offset: -12, size: 4, alignment: 4 }
-body:
- - id: 0
- name: entry
- instructions:
- - 'MOV32mr %rsp, 1, _, -4, _, %edi'
- - '%eax = COPY %edi'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ MOV32mr %rsp, 1, _, -4, _, %edi
+ %eax = COPY %edi
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/stack-object-debug-info.mir b/test/CodeGen/MIR/X86/stack-object-debug-info.mir
new file mode 100644
index 000000000000..509b196416fd
--- /dev/null
+++ b/test/CodeGen/MIR/X86/stack-object-debug-info.mir
@@ -0,0 +1,65 @@
+# RUN: llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the stack object's debug info
+# correctly.
+--- |
+ declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
+
+ define void @foo() #1 {
+ entry:
+ %x.i = alloca i8, align 1
+ %y.i = alloca [256 x i8], align 16
+ %0 = bitcast [256 x i8]* %y.i to i8*
+ br label %for.body
+
+ for.body:
+ %1 = bitcast [256 x i8]* %y.i to i8*
+ call void @llvm.lifetime.end(i64 -1, i8* %1) #3
+ call void @llvm.lifetime.start(i64 -1, i8* %0) #3
+ call void @llvm.dbg.declare(metadata i8* %0, metadata !4, metadata !7) #3, !dbg !8
+ br label %for.body
+ }
+
+ declare void @llvm.lifetime.start(i64, i8* nocapture) #2
+
+ declare void @llvm.lifetime.end(i64, i8* nocapture) #2
+
+ attributes #0 = { nounwind readnone }
+ attributes #1 = { nounwind ssp uwtable }
+ attributes #2 = { nounwind argmemonly }
+ attributes #3 = { nounwind }
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!3}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C89, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: 0, enums: !2, retainedTypes: !2)
+ !1 = !DIFile(filename: "t.c", directory: "")
+ !2 = !{}
+ !3 = !{i32 1, !"Debug Info Version", i32 3}
+ !4 = !DILocalVariable(name: "x", scope: !5, file: !1, line: 16, type: !6)
+ !5 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false)
+ !6 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+ !7 = !DIExpression()
+ !8 = !DILocation(line: 0, scope: !5)
+...
+---
+name: foo
+isSSA: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 16
+# CHECK-LABEL: foo
+# CHECK: stack:
+# CHECK: - { id: 0, name: y.i, offset: 0, size: 256, alignment: 16, di-variable: '!4',
+# CHECK-NEXT: di-expression: '!7', di-location: '!8' }
+stack:
+ - { id: 0, name: y.i, offset: 0, size: 256, alignment: 16, di-variable: '!4',
+ di-expression: '!7', di-location: '!8' }
+body: |
+ bb.0.entry:
+ successors: %bb.1.for.body
+ bb.1.for.body:
+ successors: %bb.1.for.body
+
+ DBG_VALUE %stack.0.y.i, 0, !4, !7, debug-location !8
+ JMP_1 %bb.1.for.body
+...
diff --git a/test/CodeGen/MIR/X86/stack-object-invalid-name.mir b/test/CodeGen/MIR/X86/stack-object-invalid-name.mir
new file mode 100644
index 000000000000..e42e1e59f1e7
--- /dev/null
+++ b/test/CodeGen/MIR/X86/stack-object-invalid-name.mir
@@ -0,0 +1,28 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that the MIR parser reports an error when it encounters a
+# stack object with a name that can't be associated with an alloca instruction.
+
+--- |
+
+ define i32 @test(i32 %a) {
+ entry:
+ %b = alloca i32
+ store i32 %a, i32* %b
+ %c = load i32, i32* %b
+ ret i32 %c
+ }
+
+...
+---
+name: test
+frameInfo:
+ maxAlignment: 4
+stack:
+ # CHECK: [[@LINE+1]]:20: alloca instruction named 'x' isn't defined in the function 'test'
+ - { id: 0, name: x, offset: -12, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ MOV32mr %rsp, 1, _, -4, _, %edi
+ %eax = MOV32rm %rsp, 1, _, -4, _
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/stack-object-operand-name-mismatch-error.mir b/test/CodeGen/MIR/X86/stack-object-operand-name-mismatch-error.mir
new file mode 100644
index 000000000000..46661d95e727
--- /dev/null
+++ b/test/CodeGen/MIR/X86/stack-object-operand-name-mismatch-error.mir
@@ -0,0 +1,33 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that an error is reported when an stack object reference
+# uses a different name then the stack object definition.
+
+--- |
+
+ define i32 @test(i32 %a) {
+ entry:
+ %b = alloca i32
+ store i32 %a, i32* %b
+ %c = load i32, i32* %b
+ ret i32 %c
+ }
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, name: b, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ %0 = COPY %edi
+ ; CHECK: [[@LINE+1]]:13: the name of the stack object '%stack.0' isn't 'x'
+ MOV32mr %stack.0.x, 1, _, 0, _, %0
+ %eax = COPY %0
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/stack-object-operands.mir b/test/CodeGen/MIR/X86/stack-object-operands.mir
new file mode 100644
index 000000000000..fce5bf717d1a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/stack-object-operands.mir
@@ -0,0 +1,45 @@
+# RUN: llc -march=x86 -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses stack object machine operands
+# correctly.
+
+--- |
+
+ define i32 @test(i32 %a) {
+ entry:
+ %b = alloca i32
+ %0 = alloca i32
+ store i32 %a, i32* %b
+ store i32 2, i32* %0
+ %c = load i32, i32* %b
+ ret i32 %c
+ }
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+ - { id: 1, class: gr32 }
+frameInfo:
+ maxAlignment: 4
+fixedStack:
+ - { id: 0, offset: 0, size: 4, isImmutable: true, isAliased: false }
+stack:
+ - { id: 0, name: b, size: 4, alignment: 4 }
+ - { id: 1, size: 4, alignment: 4 }
+body: |
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: %0 = MOV32rm %fixed-stack.0, 1, _, 0, _
+ ; CHECK-NEXT: MOV32mr %stack.0.b, 1, _, 0, _, %0
+ ; CHECK-NEXT: MOV32mi %stack.1, 1, _, 0, _, 2
+ ; CHECK-NEXT: %1 = MOV32rm %stack.0.b, 1, _, 0, _
+ bb.0.entry:
+ %0 = MOV32rm %fixed-stack.0, 1, _, 0, _
+ MOV32mr %stack.0.b, 1, _, 0, _, %0
+ MOV32mi %stack.1, 1, _, 0, _, 2
+ %1 = MOV32rm %stack.0, 1, _, 0, _
+ %eax = COPY %1
+ RETL %eax
+...
diff --git a/test/CodeGen/MIR/X86/stack-object-redefinition-error.mir b/test/CodeGen/MIR/X86/stack-object-redefinition-error.mir
new file mode 100644
index 000000000000..b84863ebca67
--- /dev/null
+++ b/test/CodeGen/MIR/X86/stack-object-redefinition-error.mir
@@ -0,0 +1,37 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32 %a) #0 {
+ entry:
+ %b = alloca i32
+ %x = alloca i64
+ store i32 %a, i32* %b
+ store i64 2, i64* %x
+ %c = load i32, i32* %b
+ ret i32 %c
+ }
+
+ attributes #0 = { "no-frame-pointer-elim"="false" }
+
+...
+---
+name: test
+tracksRegLiveness: true
+liveins:
+ - { reg: '%edi' }
+frameInfo:
+ maxAlignment: 8
+stack:
+ - { id: 0, name: b, offset: -12, size: 4, alignment: 4 }
+# CHECK: [[@LINE+1]]:11: redefinition of stack object '%stack.0'
+ - { id: 0, name: x, offset: -24, size: 8, alignment: 8 }
+body: |
+ bb.0.entry:
+ liveins: %edi
+
+ MOV32mr %rsp, 1, _, -4, _, killed %edi
+ MOV64mi32 %rsp, 1, _, -16, _, 2
+ %eax = MOV32rm %rsp, 1, _, -4, _
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/stack-objects.mir b/test/CodeGen/MIR/X86/stack-objects.mir
index 14ed4b74f96f..bdd911075da0 100644
--- a/test/CodeGen/MIR/X86/stack-objects.mir
+++ b/test/CodeGen/MIR/X86/stack-objects.mir
@@ -21,19 +21,17 @@ name: test
frameInfo:
maxAlignment: 8
# CHECK: stack:
-# CHECK-NEXT: - { id: 0, offset: -12, size: 4, alignment: 4 }
-# CHECK-NEXT: - { id: 1, offset: -24, size: 8, alignment: 8 }
+# CHECK-NEXT: - { id: 0, name: b, offset: -12, size: 4, alignment: 4 }
+# CHECK-NEXT: - { id: 1, name: x, offset: -24, size: 8, alignment: 8 }
# CHECK-NEXT: - { id: 2, type: spill-slot, offset: -32, size: 4, alignment: 4 }
stack:
- - { id: 0, offset: -12, size: 4, alignment: 4 }
- - { id: 1, offset: -24, size: 8, alignment: 8 }
+ - { id: 0, name: b, offset: -12, size: 4, alignment: 4 }
+ - { id: 1, name: x, offset: -24, size: 8, alignment: 8 }
- { id: 2, type: spill-slot, offset: -32, size: 4, alignment: 4 }
-body:
- - id: 0
- name: entry
- instructions:
- - 'MOV32mr %rsp, 1, _, -4, _, %edi'
- - 'MOV64mi32 %rsp, 1, _, -16, _, 2'
- - '%eax = MOV32rm %rsp, 1, _, -4, _'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ MOV32mr %rsp, 1, _, -4, _, %edi
+ MOV64mi32 %rsp, 1, _, -16, _, 2
+ %eax = MOV32rm %rsp, 1, _, -4, _
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/standalone-register-error.mir b/test/CodeGen/MIR/X86/standalone-register-error.mir
new file mode 100644
index 000000000000..f17451bfc89c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/standalone-register-error.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+ define i32 @test(i32 %a) {
+ body:
+ ret i32 %a
+ }
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+liveins:
+# CHECK: [[@LINE+1]]:13: unknown register name 'register'
+ - { reg: '%register', virtual-reg: '%0' }
+body: |
+ bb.0.body:
+ liveins: %edi
+
+ %0 = COPY %edi
+ %eax = COPY %0
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/subregister-operands.mir b/test/CodeGen/MIR/X86/subregister-operands.mir
index 5e46fab4b058..8a3fcf69aca6 100644
--- a/test/CodeGen/MIR/X86/subregister-operands.mir
+++ b/test/CodeGen/MIR/X86/subregister-operands.mir
@@ -18,16 +18,15 @@ registers:
- { id: 0, class: gr32 }
- { id: 1, class: gr8 }
- { id: 2, class: gr8 }
-body:
- - name: entry
- id: 0
- instructions:
- # CHECK: %0 = COPY %edi
- # CHECK-NEXT: %1 = COPY %0:sub_8bit
- - '%0 = COPY %edi'
- - '%1 = COPY %0:sub_8bit'
- - '%2 = AND8ri %1, 1, implicit-def %eflags'
- - '%al = COPY %2'
- - 'RETQ %al'
+body: |
+ bb.0.entry:
+ liveins: %edi
+ ; CHECK: %0 = COPY %edi
+ ; CHECK-NEXT: %1 = COPY %0:sub_8bit
+ %0 = COPY %edi
+ %1 = COPY %0:sub_8bit
+ %2 = AND8ri %1, 1, implicit-def %eflags
+ %al = COPY %2
+ RETQ %al
...
diff --git a/test/CodeGen/MIR/X86/successor-basic-blocks-weights.mir b/test/CodeGen/MIR/X86/successor-basic-blocks-weights.mir
new file mode 100644
index 000000000000..64af6121189a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/successor-basic-blocks-weights.mir
@@ -0,0 +1,42 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses basic block successors and
+# probabilities correctly.
+
+--- |
+
+ define i32 @foo(i32 %a) {
+ entry:
+ %0 = icmp sle i32 %a, 10
+ br i1 %0, label %less, label %exit
+
+ less:
+ ret i32 0
+
+ exit:
+ ret i32 %a
+ }
+
+...
+---
+name: foo
+body: |
+ ; CHECK-LABEL: bb.0.entry:
+ ; CHECK: successors: %bb.1.less({{[0-9a-fx/= ]+}}33.00%), %bb.2.exit({{[0-9a-fx/= ]+}}67.00%)
+ ; CHECK-LABEL: bb.1.less:
+ bb.0.entry:
+ successors: %bb.1.less (33), %bb.2.exit(67)
+ liveins: %edi
+
+ CMP32ri8 %edi, 10, implicit-def %eflags
+ JG_1 %bb.2.exit, implicit killed %eflags
+
+ bb.1.less:
+ %eax = MOV32r0 implicit-def dead %eflags
+ RETQ killed %eax
+
+ bb.2.exit:
+ liveins: %edi
+
+ %eax = COPY killed %edi
+ RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/successor-basic-blocks.mir b/test/CodeGen/MIR/X86/successor-basic-blocks.mir
new file mode 100644
index 000000000000..a6c14f70bc7c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/successor-basic-blocks.mir
@@ -0,0 +1,83 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses basic block successors correctly.
+
+--- |
+
+ define i32 @foo(i32 %a) {
+ entry:
+ %0 = icmp sle i32 %a, 10
+ br i1 %0, label %less, label %exit
+
+ less:
+ ret i32 0
+
+ exit:
+ ret i32 %a
+ }
+
+ define i32 @bar(i32 %a) {
+ entry:
+ %b = icmp sle i32 %a, 10
+ br i1 %b, label %0, label %1
+
+ ; <label>:0
+ ret i32 0
+
+ ; <label>:1
+ ret i32 %a
+ }
+
+...
+---
+name: foo
+body: |
+ ; CHECK-LABEL: bb.0.entry:
+ ; CHECK: successors: %bb.1.less(0x40000000 / 0x80000000 = 50.00%), %bb.2.exit(0x40000000 / 0x80000000 = 50.00%)
+ ; CHECK-LABEL: bb.1.less:
+ bb.0.entry:
+ successors: %bb.1.less, %bb.2.exit
+ liveins: %edi
+
+ CMP32ri8 %edi, 10, implicit-def %eflags
+ JG_1 %bb.2.exit, implicit killed %eflags
+
+ bb.1.less:
+ %eax = MOV32r0 implicit-def dead %eflags
+ RETQ killed %eax
+
+ bb.2.exit:
+ liveins: %edi
+
+ %eax = COPY killed %edi
+ RETQ killed %eax
+...
+---
+name: bar
+body: |
+ ; CHECK-LABEL: name: bar
+ ; Verify that we can have multiple lists of successors that will be merged
+ ; into one.
+ ; CHECK-LABEL: bb.0.entry:
+ ; CHECK: successors: %bb.1(0x80000000 / 0x80000000 = 100.00%), %bb.2(0x00000000 / 0x80000000 = 0.00%)
+ bb.0.entry:
+ liveins: %edi
+ successors: %bb.1
+ successors: %bb.2
+
+ CMP32ri8 %edi, 10, implicit-def %eflags
+ JG_1 %bb.2, implicit killed %eflags
+
+ ; Verify that we can have an empty list of successors.
+ ; CHECK-LABEL: bb.1:
+ ; CHECK-NEXT: %eax = MOV32r0 implicit-def dead %eflags
+ bb.1:
+ successors:
+ %eax = MOV32r0 implicit-def dead %eflags
+ RETQ killed %eax
+
+ bb.2:
+ liveins: %edi
+
+ %eax = COPY killed %edi
+ RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/tied-def-operand-invalid.mir b/test/CodeGen/MIR/X86/tied-def-operand-invalid.mir
new file mode 100644
index 000000000000..fe5263df355f
--- /dev/null
+++ b/test/CodeGen/MIR/X86/tied-def-operand-invalid.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+ define i64 @test(i64 %x) #0 {
+ entry:
+ %asm = tail call i64 asm sideeffect "$foo", "=r,0"(i64 %x) nounwind
+ ret i64 %asm
+ }
+
+ attributes #0 = { nounwind }
+...
+---
+name: test
+hasInlineAsm: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+
+ ; CHECK: [[@LINE+1]]:58: use of invalid tied-def operand index '0'; the operand #0 isn't a defined register
+ INLINEASM $"$foo", 1, 2818058, def %rdi, 2147483657, killed %rdi(tied-def 0)
+ %rax = COPY killed %rdi
+ RETQ killed %rax
+...
diff --git a/test/CodeGen/MIR/X86/undef-register-flag.mir b/test/CodeGen/MIR/X86/undef-register-flag.mir
index 83b9e10a80d1..0b26c528aee1 100644
--- a/test/CodeGen/MIR/X86/undef-register-flag.mir
+++ b/test/CodeGen/MIR/X86/undef-register-flag.mir
@@ -21,22 +21,18 @@
...
---
name: compute
-body:
- - id: 0
- name: body
- instructions:
- - '%eax = IMUL32rri8 %edi, 11, implicit-def %eflags'
- - 'RETQ %eax'
+body: |
+ bb.0.body:
+ %eax = IMUL32rri8 %edi, 11, implicit-def %eflags
+ RETQ %eax
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: - 'PUSH64r undef %rax
- - 'PUSH64r undef %rax, implicit-def %rsp, implicit %rsp'
- - 'CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax'
- - '%rdx = POP64r implicit-def %rsp, implicit %rsp'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ ; CHECK: PUSH64r undef %rax
+ PUSH64r undef %rax, implicit-def %rsp, implicit %rsp
+ CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+ %rdx = POP64r implicit-def %rsp, implicit %rsp
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/undefined-fixed-stack-object.mir b/test/CodeGen/MIR/X86/undefined-fixed-stack-object.mir
new file mode 100644
index 000000000000..8d8f8614f32b
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-fixed-stack-object.mir
@@ -0,0 +1,38 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+
+ define i32 @test(i32 %a) {
+ entry:
+ %b = alloca i32
+ %0 = alloca i32
+ store i32 %a, i32* %b
+ store i32 2, i32* %0
+ %c = load i32, i32* %b
+ ret i32 %c
+ }
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+ - { id: 1, class: gr32 }
+frameInfo:
+ maxAlignment: 4
+fixedStack:
+ - { id: 0, offset: 0, size: 4, isImmutable: true, isAliased: false }
+stack:
+ - { id: 0, name: b, size: 4, alignment: 4 }
+ - { id: 1, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:18: use of undefined fixed stack object '%fixed-stack.11'
+ %0 = MOV32rm %fixed-stack.11, 1, _, 0, _
+ MOV32mr %stack.0, 1, _, 0, _, %0
+ MOV32mi %stack.1, 1, _, 0, _, 2
+ %1 = MOV32rm %stack.0, 1, _, 0, _
+ %eax = COPY %1
+ RETL %eax
+...
diff --git a/test/CodeGen/MIR/X86/undefined-global-value.mir b/test/CodeGen/MIR/X86/undefined-global-value.mir
index e41dc0454d2c..f82c626397a9 100644
--- a/test/CodeGen/MIR/X86/undefined-global-value.mir
+++ b/test/CodeGen/MIR/X86/undefined-global-value.mir
@@ -16,13 +16,11 @@
...
---
name: inc
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: [[@LINE+1]]:37: use of undefined global value '@2'
- - '%rax = MOV64rm %rip, 1, _, @2, _'
- - '%eax = MOV32rm %rax, 1, _, 0, _'
- - '%eax = INC32r %eax'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:32: use of undefined global value '@2'
+ %rax = MOV64rm %rip, 1, _, @2, _
+ %eax = MOV32rm %rax, 1, _, 0, _
+ %eax = INC32r %eax
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir b/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir
new file mode 100644
index 000000000000..f6b10e3123ca
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ @addr = global i8* null
+
+ define void @test() {
+ entry:
+ store volatile i8* blockaddress(@test, %block), i8** @addr
+ %val = load volatile i8*, i8** @addr
+ indirectbr i8* %val, [label %block]
+
+ block:
+ ret void
+ }
+
+...
+---
+name: test
+body: |
+ bb.0.entry:
+ successors: %bb.1.block
+ ; CHECK: [[@LINE+1]]:51: use of undefined IR block '%ir-block."block "'
+ %rax = LEA64r %rip, 1, _, blockaddress(@test, %ir-block."block "), _
+ MOV64mr %rip, 1, _, @addr, _, killed %rax
+ JMP64m %rip, 1, _, @addr, _
+
+ bb.1.block (address-taken):
+ RETQ
+...
diff --git a/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir b/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir
new file mode 100644
index 000000000000..0b3c0093dc62
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir
@@ -0,0 +1,29 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ @addr = global i8* null
+
+ define void @test() {
+ entry:
+ store volatile i8* blockaddress(@test, %0), i8** @addr
+ %val = load volatile i8*, i8** @addr
+ indirectbr i8* %val, [label %0]
+
+ ret void
+ }
+
+...
+---
+name: test
+body: |
+ bb.0.entry:
+ successors: %bb.1
+ ; CHECK: [[@LINE+1]]:51: use of undefined IR block '%ir-block.1'
+ %rax = LEA64r %rip, 1, _, blockaddress(@test, %ir-block.1), _
+ MOV64mr %rip, 1, _, @addr, _, killed %rax
+ JMP64m %rip, 1, _, @addr, _
+
+ bb.1 (address-taken):
+ RETQ
+...
diff --git a/test/CodeGen/MIR/X86/undefined-jump-table-id.mir b/test/CodeGen/MIR/X86/undefined-jump-table-id.mir
new file mode 100644
index 000000000000..b463dc4bd9f4
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-jump-table-id.mir
@@ -0,0 +1,73 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test_jumptable(i32 %in) {
+ entry:
+ switch i32 %in, label %def [
+ i32 0, label %lbl1
+ i32 1, label %lbl2
+ i32 2, label %lbl3
+ i32 3, label %lbl4
+ ]
+
+ def:
+ ret i32 0
+
+ lbl1:
+ ret i32 1
+
+ lbl2:
+ ret i32 2
+
+ lbl3:
+ ret i32 4
+
+ lbl4:
+ ret i32 8
+ }
+
+...
+---
+name: test_jumptable
+jumpTable:
+ kind: label-difference32
+ entries:
+ - id: 0
+ blocks: [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+body: |
+ bb.0.entry:
+ successors: %bb.2.def, %bb.1.entry
+
+ %eax = MOV32rr %edi, implicit-def %rax
+ CMP32ri8 %edi, 3, implicit-def %eflags
+ JA_1 %bb.2.def, implicit %eflags
+
+ bb.1.entry:
+ successors: %bb.3.lbl1, %bb.4.lbl2, %bb.5.lbl3, %bb.6.lbl4
+ ; CHECK: [[@LINE+1]]:31: use of undefined jump table '%jump-table.2'
+ %rcx = LEA64r %rip, 1, _, %jump-table.2, _
+ %rax = MOVSX64rm32 %rcx, 4, %rax, 0, _
+ %rax = ADD64rr %rax, %rcx, implicit-def %eflags
+ JMP64r %rax
+
+ bb.2.def:
+ %eax = MOV32r0 implicit-def %eflags
+ RETQ %eax
+
+ bb.3.lbl1:
+ %eax = MOV32ri 1
+ RETQ %eax
+
+ bb.4.lbl2:
+ %eax = MOV32ri 2
+ RETQ %eax
+
+ bb.5.lbl3:
+ %eax = MOV32ri 4
+ RETQ %eax
+
+ bb.6.lbl4:
+ %eax = MOV32ri 8
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/undefined-named-global-value.mir b/test/CodeGen/MIR/X86/undefined-named-global-value.mir
index b40c2ce43b5f..a1ada4b42e46 100644
--- a/test/CodeGen/MIR/X86/undefined-named-global-value.mir
+++ b/test/CodeGen/MIR/X86/undefined-named-global-value.mir
@@ -16,13 +16,11 @@
...
---
name: inc
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: [[@LINE+1]]:37: use of undefined global value '@GG'
- - '%rax = MOV64rm %rip, 1, _, @GG, _'
- - '%eax = MOV32rm %rax, 1, _, 0, _'
- - '%eax = INC32r %eax'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:32: use of undefined global value '@GG'
+ %rax = MOV64rm %rip, 1, _, @GG, _
+ %eax = MOV32rm %rax, 1, _, 0, _
+ %eax = INC32r %eax
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/undefined-register-class.mir b/test/CodeGen/MIR/X86/undefined-register-class.mir
index a14d2303a7d8..348f6af5c44f 100644
--- a/test/CodeGen/MIR/X86/undefined-register-class.mir
+++ b/test/CodeGen/MIR/X86/undefined-register-class.mir
@@ -17,10 +17,8 @@ tracksRegLiveness: true
registers:
# CHECK: [[@LINE+1]]:20: use of undefined register class 'gr3200'
- {id: 0, class: 'gr3200'}
-body:
- - id: 0
- name: entry
- instructions:
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/undefined-stack-object.mir b/test/CodeGen/MIR/X86/undefined-stack-object.mir
new file mode 100644
index 000000000000..416e6789ba0f
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-stack-object.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+
+ define i32 @test(i32 %a) {
+ entry:
+ %b = alloca i32
+ store i32 %a, i32* %b
+ %c = load i32, i32* %b
+ ret i32 %c
+ }
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, name: b, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ %0 = COPY %edi
+ ; CHECK: [[@LINE+1]]:13: use of undefined stack object '%stack.2'
+ MOV32mr %stack.2, 1, _, 0, _, %0
+ %eax = COPY %0
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir b/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir
new file mode 100644
index 000000000000..a3907d7a3a4a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32* %a) {
+ entry:
+ %b = load i32, i32* %a
+ ret i32 %b
+ }
+
+...
+---
+name: test
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: [[@LINE+1]]:60: use of undefined IR value '%ir.c'
+ %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 4 from %ir.c)
+ RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/undefined-virtual-register.mir b/test/CodeGen/MIR/X86/undefined-virtual-register.mir
index 12370c80caf9..2f9a304ffe5c 100644
--- a/test/CodeGen/MIR/X86/undefined-virtual-register.mir
+++ b/test/CodeGen/MIR/X86/undefined-virtual-register.mir
@@ -16,13 +16,11 @@ isSSA: true
tracksRegLiveness: true
registers:
- { id: 0, class: gr32 }
-body:
- - id: 0
- name: entry
- instructions:
- - '%0 = COPY %edi'
- # CHECK: [[@LINE+1]]:22: use of undefined virtual register '%10'
- - '%eax = COPY %10'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ %0 = COPY %edi
+ ; CHECK: [[@LINE+1]]:17: use of undefined virtual register '%10'
+ %eax = COPY %10
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/unknown-instruction.mir b/test/CodeGen/MIR/X86/unknown-instruction.mir
index 4e58ca6bad40..cec354948832 100644
--- a/test/CodeGen/MIR/X86/unknown-instruction.mir
+++ b/test/CodeGen/MIR/X86/unknown-instruction.mir
@@ -12,10 +12,8 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: [[@LINE+1]]:8: unknown machine instruction name 'retJust0'
- - retJust0
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:5: unknown machine instruction name 'retJust0'
+ retJust0
...
diff --git a/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir b/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir
index a82e9a780f54..a512d9aa08e6 100644
--- a/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir
+++ b/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir
@@ -21,18 +21,16 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- - '%eax = MOV32rm %rdi, 1, _, 0, _'
- - 'CMP32ri8 %eax, 10, implicit-def %eflags'
- # CHECK: [[@LINE+1]]:14: use of undefined machine basic block #4
- - 'JG_1 %bb.4, implicit %eflags'
- - id: 1
- instructions:
- - '%eax = MOV32r0 implicit-def %eflags'
- - id: 2
- instructions:
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ %eax = MOV32rm %rdi, 1, _, 0, _
+ CMP32ri8 %eax, 10, implicit-def %eflags
+ ; CHECK: [[@LINE+1]]:10: use of undefined machine basic block #4
+ JG_1 %bb.4, implicit %eflags
+
+ bb.1:
+ %eax = MOV32r0 implicit-def %eflags
+
+ bb.2:
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir b/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir
new file mode 100644
index 000000000000..c58c38ab1322
--- /dev/null
+++ b/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+ define i32 @inc(i32* %x) {
+ entry:
+ %0 = load i32, i32* %x
+ %1 = add i32 %0, 1
+ store i32 %1, i32* %x
+ ret i32 %1
+ }
+...
+---
+name: inc
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+body: |
+ bb.0.entry:
+ liveins: %rdi
+ ; CHECK: [[@LINE+1]]:60: use of unknown metadata keyword '!tba'
+ %eax = MOV32rm %rdi, 1, _, 0, _ :: (load 4 from %ir.x, !tba !0)
+ %eax = INC32r killed %eax, implicit-def dead %eflags
+ MOV32mr killed %rdi, 1, _, 0, _, %eax :: (store 4 into %ir.x)
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/unknown-metadata-node.mir b/test/CodeGen/MIR/X86/unknown-metadata-node.mir
new file mode 100644
index 000000000000..958a30678be1
--- /dev/null
+++ b/test/CodeGen/MIR/X86/unknown-metadata-node.mir
@@ -0,0 +1,59 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32 %x) #0 !dbg !4 {
+ entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+ %0 = load i32, i32* %x.addr, align 4, !dbg !15
+ ret i32 %0, !dbg !15
+ }
+
+ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+ attributes #0 = { nounwind "no-frame-pointer-elim"="false" }
+ attributes #1 = { nounwind readnone }
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!9, !10}
+ !llvm.ident = !{!11}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+ !1 = !DIFile(filename: "test.ll", directory: "")
+ !2 = !{}
+ !3 = !{!4}
+ !4 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 4, type: !6, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+ !5 = !DIFile(filename: "test.c", directory: "")
+ !6 = !DISubroutineType(types: !7)
+ !7 = !{!8, !8}
+ !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+ !9 = !{i32 2, !"Dwarf Version", i32 4}
+ !10 = !{i32 2, !"Debug Info Version", i32 3}
+ !11 = !{!"clang version 3.7.0"}
+ !12 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 4, type: !8)
+ !13 = !DIExpression()
+ !14 = !DILocation(line: 4, scope: !4)
+ !15 = !DILocation(line: 8, scope: !4)
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, name: x.addr, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ %0 = COPY %edi
+ ; CHECK: [[@LINE+1]]:21: use of undefined metadata '!42'
+ DBG_VALUE _, 0, !42, !13
+ MOV32mr %stack.0.x.addr, 1, _, 0, _, %0
+ %eax = COPY %0
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir b/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir
index f304113f40b9..6627273d4470 100644
--- a/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir
+++ b/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir
@@ -20,20 +20,16 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- - '%eax = MOV32rm %rdi, 1, _, 0, _'
- - 'CMP32ri8 %eax, 10, implicit-def %eflags'
- # CHECK: [[@LINE+1]]:14: the name of machine basic block #2 isn't 'hit'
- - 'JG_1 %bb.2.hit, implicit %eflags'
- - id: 1
- name: less
- instructions:
- - '%eax = MOV32r0 implicit-def %eflags'
- - id: 2
- name: exit
- instructions:
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ %eax = MOV32rm %rdi, 1, _, 0, _
+ CMP32ri8 %eax, 10, implicit-def %eflags
+ ; CHECK: [[@LINE+1]]:10: the name of machine basic block #2 isn't 'hit'
+ JG_1 %bb.2.hit, implicit %eflags
+
+ bb.1.less:
+ %eax = MOV32r0 implicit-def %eflags
+
+ bb.2.exit:
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/unknown-register.mir b/test/CodeGen/MIR/X86/unknown-register.mir
index ce40ee809bf3..da0798ca1b52 100644
--- a/test/CodeGen/MIR/X86/unknown-register.mir
+++ b/test/CodeGen/MIR/X86/unknown-register.mir
@@ -12,11 +12,9 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: [[@LINE+1]]:9: unknown register name 'xax'
- - '%xax = MOV32r0'
- - 'RETQ %xax'
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:5: unknown register name 'xax'
+ %xax = MOV32r0
+ RETQ %xax
...
diff --git a/test/CodeGen/MIR/X86/unknown-subregister-index.mir b/test/CodeGen/MIR/X86/unknown-subregister-index.mir
index 50461232b623..5dde34561236 100644
--- a/test/CodeGen/MIR/X86/unknown-subregister-index.mir
+++ b/test/CodeGen/MIR/X86/unknown-subregister-index.mir
@@ -18,14 +18,12 @@ registers:
- { id: 0, class: gr32 }
- { id: 1, class: gr8 }
- { id: 2, class: gr8 }
-body:
- - name: entry
- id: 0
- instructions:
- - '%0 = COPY %edi'
- # CHECK: [[@LINE+1]]:23: use of unknown subregister index 'bit8'
- - '%1 = COPY %0:bit8'
- - '%2 = AND8ri %1, 1, implicit-def %eflags'
- - '%al = COPY %2'
- - 'RETQ %al'
+body: |
+ bb.0.entry:
+ %0 = COPY %edi
+ ; CHECK: [[@LINE+1]]:18: use of unknown subregister index 'bit8'
+ %1 = COPY %0:bit8
+ %2 = AND8ri %1, 1, implicit-def %eflags
+ %al = COPY %2
+ RETQ %al
...
diff --git a/test/CodeGen/MIR/X86/unrecognized-character.mir b/test/CodeGen/MIR/X86/unrecognized-character.mir
index 3b4fb1a9fc6e..cf99028677fa 100644
--- a/test/CodeGen/MIR/X86/unrecognized-character.mir
+++ b/test/CodeGen/MIR/X86/unrecognized-character.mir
@@ -10,10 +10,8 @@
...
---
name: foo
-body:
- - id: 0
- name: entry
- instructions:
- # CHECK: [[@LINE+1]]:9: unexpected character '`'
- - '` RETQ'
+body: |
+ bb.0.entry:
+ ; CHECK: [[@LINE+1]]:5: unexpected character '\'
+ \ RETQ
...
diff --git a/test/CodeGen/MIR/X86/used-physical-register-info.mir b/test/CodeGen/MIR/X86/used-physical-register-info.mir
new file mode 100644
index 000000000000..9a81578703e0
--- /dev/null
+++ b/test/CodeGen/MIR/X86/used-physical-register-info.mir
@@ -0,0 +1,109 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the callee saved register mask
+# correctly and that the MIR parser can infer it as well.
+
+--- |
+
+ define i32 @compute(i32 %a) #0 {
+ body:
+ %c = mul i32 %a, 11
+ ret i32 %c
+ }
+
+ define i32 @foo(i32 %a) #0 {
+ entry:
+ %b = call i32 @compute(i32 %a)
+ ret i32 %b
+ }
+
+ define i32 @bar(i32 %a) #0 {
+ entry:
+ %b = call i32 @compute(i32 %a)
+ ret i32 %b
+ }
+
+ define i32 @empty(i32 %a) #0 {
+ entry:
+ %b = call i32 @compute(i32 %a)
+ ret i32 %b
+ }
+
+ attributes #0 = { "no-frame-pointer-elim"="false" }
+
+...
+---
+# CHECK: name: compute
+# CHECK: liveins:
+# CHECK-NEXT: - { reg: '%edi' }
+# CHECK-NEXT: frameInfo:
+name: compute
+liveins:
+ - { reg: '%edi' }
+frameInfo:
+ stackSize: 8
+body: |
+ bb.0.body:
+ liveins: %edi
+
+ %eax = IMUL32rri8 %edi, 11, implicit-def %eflags
+ RETQ %eax
+...
+---
+name: foo
+liveins:
+ - { reg: '%edi' }
+# CHECK: name: foo
+# CHECK: calleeSavedRegisters: [ '%bh', '%bl', '%bp', '%bpl', '%bx', '%ebp', '%ebx',
+# CHECK-NEXT: '%rbp', '%rbx', '%r12', '%r13', '%r14', '%r15',
+# CHECK-NEXT: '%r12b', '%r13b', '%r14b', '%r15b', '%r12d', '%r13d',
+# CHECK-NEXT: '%r14d', '%r15d', '%r12w', '%r13w', '%r14w', '%r15w' ]
+calleeSavedRegisters: [ '%bh', '%bl', '%bp', '%bpl', '%bx', '%ebp', '%ebx',
+ '%rbp', '%rbx', '%r12', '%r13', '%r14', '%r15',
+ '%r12b', '%r13b', '%r14b', '%r15b', '%r12d', '%r13d',
+ '%r14d', '%r15d', '%r12w', '%r13w', '%r14w', '%r15w' ]
+body: |
+ bb.0.entry:
+ liveins: %edi
+
+ PUSH64r %rax, implicit-def %rsp, implicit %rsp
+ CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+ %rdx = POP64r implicit-def %rsp, implicit %rsp
+ RETQ %eax
+...
+---
+name: bar
+liveins:
+ - { reg: '%edi' }
+# Verify that the callee saved register can be inferred from register mask
+# machine operands:
+# CHECK: name: bar
+# CHECK: calleeSavedRegisters: [ '%bh', '%bl', '%bp', '%bpl', '%bx', '%ebp', '%ebx',
+# CHECK-NEXT: '%rbp', '%rbx', '%r12', '%r13', '%r14', '%r15',
+# CHECK-NEXT: '%r12b', '%r13b', '%r14b', '%r15b', '%r12d', '%r13d',
+# CHECK-NEXT: '%r14d', '%r15d', '%r12w', '%r13w', '%r14w', '%r15w' ]
+body: |
+ bb.0.entry:
+ liveins: %edi
+
+ PUSH64r %rax, implicit-def %rsp, implicit %rsp
+ CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+ %rdx = POP64r implicit-def %rsp, implicit %rsp
+ RETQ %eax
+...
+---
+name: empty
+liveins:
+ - { reg: '%edi' }
+# Verify that the callee saved register can be empty.
+# CHECK: name: empty
+# CHECK: calleeSavedRegisters: [ ]
+calleeSavedRegisters: [ ]
+body: |
+ bb.0.entry:
+ liveins: %edi
+
+ PUSH64r %rax, implicit-def %rsp, implicit %rsp
+ CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+ %rdx = POP64r implicit-def %rsp, implicit %rsp
+ RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/variable-sized-stack-object-size-error.mir b/test/CodeGen/MIR/X86/variable-sized-stack-object-size-error.mir
index 8e50c52f5e18..e6a9ef8d4c88 100644
--- a/test/CodeGen/MIR/X86/variable-sized-stack-object-size-error.mir
+++ b/test/CodeGen/MIR/X86/variable-sized-stack-object-size-error.mir
@@ -25,12 +25,10 @@ stack:
- { id: 1, offset: -32, size: 8, alignment: 8 }
# CHECK: [[@LINE+1]]:55: unknown key 'size'
- { id: 2, type: variable-sized, offset: -32, size: 42, alignment: 1 }
-body:
- - id: 0
- name: entry
- instructions:
- - 'MOV32mr %rsp, 1, _, -4, _, %edi'
- - 'MOV64mi32 %rsp, 1, _, -16, _, 2'
- - '%eax = MOV32rm %rsp, 1, _, -4, _'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ MOV32mr %rsp, 1, _, -4, _, %edi
+ MOV64mi32 %rsp, 1, _, -16, _, 2
+ %eax = MOV32rm %rsp, 1, _, -4, _
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir b/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
index 4c45742b25a4..a58be69ae046 100644
--- a/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
+++ b/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
@@ -26,17 +26,15 @@ frameInfo:
# CHECK: stack:
# CHECK-NEXT: - { id: 0, offset: -20, size: 4, alignment: 4 }
# CHECK-NEXT: - { id: 1, offset: -32, size: 8, alignment: 8 }
-# CHECK-NEXT: - { id: 2, type: variable-sized, offset: -32, alignment: 1 }
+# CHECK-NEXT: - { id: 2, name: y, type: variable-sized, offset: -32, alignment: 1 }
stack:
- { id: 0, offset: -20, size: 4, alignment: 4 }
- { id: 1, offset: -32, size: 8, alignment: 8 }
- - { id: 2, type: variable-sized, offset: -32, alignment: 1 }
-body:
- - id: 0
- name: entry
- instructions:
- - 'MOV32mr %rsp, 1, _, -4, _, %edi'
- - 'MOV64mi32 %rsp, 1, _, -16, _, 2'
- - '%eax = MOV32rm %rsp, 1, _, -4, _'
- - 'RETQ %eax'
+ - { id: 2, name: y, type: variable-sized, offset: -32, alignment: 1 }
+body: |
+ bb.0.entry:
+ MOV32mr %rsp, 1, _, -4, _, %edi
+ MOV64mi32 %rsp, 1, _, -16, _, 2
+ %eax = MOV32rm %rsp, 1, _, -4, _
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/X86/virtual-register-redefinition-error.mir b/test/CodeGen/MIR/X86/virtual-register-redefinition-error.mir
new file mode 100644
index 000000000000..5dae6e666c83
--- /dev/null
+++ b/test/CodeGen/MIR/X86/virtual-register-redefinition-error.mir
@@ -0,0 +1,27 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+ define i32 @test(i32 %a) {
+ body:
+ ret i32 %a
+ }
+
+...
+---
+name: test
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+# CHECK: [[@LINE+1]]:11: redefinition of virtual register '%0'
+ - { id: 0, class: gr32 }
+body: |
+ bb.0.body:
+ liveins: %edi
+
+ %0 = COPY %edi
+ %eax = COPY %0
+ RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/virtual-registers.mir b/test/CodeGen/MIR/X86/virtual-registers.mir
index c6d76e6a18c5..93c2fea6fd95 100644
--- a/test/CodeGen/MIR/X86/virtual-registers.mir
+++ b/test/CodeGen/MIR/X86/virtual-registers.mir
@@ -41,29 +41,27 @@ registers:
- { id: 0, class: gr32 }
- { id: 1, class: gr32 }
- { id: 2, class: gr32 }
-body:
- - id: 0
- name: entry
- # CHECK: %0 = COPY %edi
- # CHECK-NEXT: %1 = SUB32ri8 %0, 10
- instructions:
- - '%0 = COPY %edi'
- - '%1 = SUB32ri8 %0, 10, implicit-def %eflags'
- - 'JG_1 %bb.2.exit, implicit %eflags'
- - 'JMP_1 %bb.1.less'
- - id: 1
- name: less
- # CHECK: %2 = MOV32r0
- # CHECK-NEXT: %eax = COPY %2
- instructions:
- - '%2 = MOV32r0 implicit-def %eflags'
- - '%eax = COPY %2'
- - 'RETQ %eax'
- - id: 2
- name: exit
- instructions:
- - '%eax = COPY %0'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ successors: %bb.2.exit, %bb.1.less
+ liveins: %edi
+ ; CHECK: %0 = COPY %edi
+ ; CHECK-NEXT: %1 = SUB32ri8 %0, 10
+ %0 = COPY %edi
+ %1 = SUB32ri8 %0, 10, implicit-def %eflags
+ JG_1 %bb.2.exit, implicit %eflags
+ JMP_1 %bb.1.less
+
+ bb.1.less:
+ ; CHECK: %2 = MOV32r0
+ ; CHECK-NEXT: %eax = COPY %2
+ %2 = MOV32r0 implicit-def %eflags
+ %eax = COPY %2
+ RETQ %eax
+
+ bb.2.exit:
+ %eax = COPY %0
+ RETQ %eax
...
---
name: foo
@@ -78,28 +76,26 @@ registers:
- { id: 2, class: gr32 }
- { id: 0, class: gr32 }
- { id: 10, class: gr32 }
-body:
- - id: 0
- name: entry
- # CHECK: %0 = COPY %edi
- # CHECK-NEXT: %1 = SUB32ri8 %0, 10
- instructions:
- - '%2 = COPY %edi'
- - '%0 = SUB32ri8 %2, 10, implicit-def %eflags'
- - 'JG_1 %bb.2.exit, implicit %eflags'
- - 'JMP_1 %bb.1.less'
- - id: 1
- name: less
- # CHECK: %2 = MOV32r0
- # CHECK-NEXT: %eax = COPY %2
- instructions:
- - '%10 = MOV32r0 implicit-def %eflags'
- - '%eax = COPY %10'
- - 'RETQ %eax'
- - id: 2
- name: exit
- # CHECK: %eax = COPY %0
- instructions:
- - '%eax = COPY %2'
- - 'RETQ %eax'
+body: |
+ bb.0.entry:
+ successors: %bb.2.exit, %bb.1.less
+ liveins: %edi
+ ; CHECK: %0 = COPY %edi
+ ; CHECK-NEXT: %1 = SUB32ri8 %0, 10
+ %2 = COPY %edi
+ %0 = SUB32ri8 %2, 10, implicit-def %eflags
+ JG_1 %bb.2.exit, implicit %eflags
+ JMP_1 %bb.1.less
+
+ bb.1.less:
+ ; CHECK: %2 = MOV32r0
+ ; CHECK-NEXT: %eax = COPY %2
+ %10 = MOV32r0 implicit-def %eflags
+ %eax = COPY %10
+ RETQ %eax
+
+ bb.2.exit:
+ ; CHECK: %eax = COPY %0
+ %eax = COPY %2
+ RETQ %eax
...
diff --git a/test/CodeGen/MIR/basic-blocks.mir b/test/CodeGen/MIR/basic-blocks.mir
deleted file mode 100644
index 17313047576b..000000000000
--- a/test/CodeGen/MIR/basic-blocks.mir
+++ /dev/null
@@ -1,49 +0,0 @@
-# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
-# This test ensures that the MIR parser parses machine functions correctly.
-
---- |
-
- define i32 @foo() {
- entry:
- ret i32 0
- }
-
- define i32 @bar() {
- start:
- ret i32 0
- }
-
-...
----
-# CHECK: name: foo
-# CHECK: body:
-# CHECK-NEXT: - id: 0
-# CHECK-NEXT: name: entry
-# CHECK-NEXT: alignment: 0
-# CHECK-NEXT: isLandingPad: false
-# CHECK-NEXT: addressTaken: false
-name: foo
-body:
- - id: 0
- name: entry
-...
----
-# CHECK: name: bar
-# CHECK: body:
-# CHECK-NEXT: - id: 0
-# CHECK-NEXT: name: start
-# CHECK-NEXT: alignment: 4
-# CHECK-NEXT: isLandingPad: false
-# CHECK-NEXT: addressTaken: false
-# CHECK-NEXT: - id: 1
-# CHECK-NEXT: alignment: 0
-# CHECK-NEXT: isLandingPad: false
-# CHECK-NEXT: addressTaken: true
-name: bar
-body:
- - id: 0
- name: start
- alignment: 4
- - id: 1
- addressTaken: true
-...
diff --git a/test/CodeGen/MIR/expected-eof-after-successor-mbb.mir b/test/CodeGen/MIR/expected-eof-after-successor-mbb.mir
deleted file mode 100644
index 25ae51192971..000000000000
--- a/test/CodeGen/MIR/expected-eof-after-successor-mbb.mir
+++ /dev/null
@@ -1,29 +0,0 @@
-# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
-
---- |
-
- define i32 @foo(i32 %a) {
- entry:
- %0 = icmp sle i32 %a, 10
- br i1 %0, label %less, label %exit
-
- less:
- ret i32 0
-
- exit:
- ret i32 %a
- }
-
-...
----
-name: foo
-body:
- - id: 0
- name: entry
- # CHECK: [[@LINE+1]]:46: expected end of string after the machine basic block reference
- successors: [ '%bb.1.less', '%bb.2.exit 2' ]
- - id: 1
- name: less
- - id: 2
- name: exit
-...
diff --git a/test/CodeGen/MIR/expected-mbb-reference-for-successor-mbb.mir b/test/CodeGen/MIR/expected-mbb-reference-for-successor-mbb.mir
deleted file mode 100644
index ce9192901d7d..000000000000
--- a/test/CodeGen/MIR/expected-mbb-reference-for-successor-mbb.mir
+++ /dev/null
@@ -1,29 +0,0 @@
-# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
-
---- |
-
- define i32 @foo(i32 %a) {
- entry:
- %0 = icmp sle i32 %a, 10
- br i1 %0, label %less, label %exit
-
- less:
- ret i32 0
-
- exit:
- ret i32 %a
- }
-
-...
----
-name: foo
-body:
- - id: 0
- name: entry
- # CHECK: [[@LINE+1]]:35: expected a machine basic block reference
- successors: [ '%bb.1.less', '2' ]
- - id: 1
- name: less
- - id: 2
- name: exit
-...
diff --git a/test/CodeGen/MIR/frame-info.mir b/test/CodeGen/MIR/frame-info.mir
deleted file mode 100644
index c5468f94f33a..000000000000
--- a/test/CodeGen/MIR/frame-info.mir
+++ /dev/null
@@ -1,91 +0,0 @@
-# RUN: llc -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
-# This test ensures that the MIR parser parses machine frame info properties
-# correctly.
-
---- |
-
- define i32 @test(i32 %a) {
- entry:
- %b = alloca i32
- store i32 %a, i32* %b
- %c = load i32, i32* %b
- ret i32 %c
- }
-
- define i32 @test2(i32 %a) {
- entry:
- %b = alloca i32
- store i32 %a, i32* %b
- %c = load i32, i32* %b
- ret i32 %c
- }
-
-...
----
-name: test
-isSSA: true
-tracksRegLiveness: true
-
-# CHECK: frameInfo:
-# CHECK-NEXT: isFrameAddressTaken: false
-# CHECK-NEXT: isReturnAddressTaken: false
-# CHECK-NEXT: hasStackMap: false
-# CHECK-NEXT: hasPatchPoint: false
-# CHECK-NEXT: stackSize: 0
-# CHECK-NEXT: offsetAdjustment: 0
-# Note: max alignment can be target specific when printed.
-# CHECK-NEXT: maxAlignment:
-# CHECK-NEXT: adjustsStack: false
-# CHECK-NEXT: hasCalls: false
-# CHECK-NEXT: maxCallFrameSize: 0
-# CHECK-NEXT: hasOpaqueSPAdjustment: false
-# CHECK-NEXT: hasVAStart: false
-# CHECK-NEXT: hasMustTailInVarArgFunc: false
-# CHECK: body
-frameInfo:
- maxAlignment: 4
-body:
- - id: 0
- name: entry
-...
----
-name: test2
-isSSA: true
-tracksRegLiveness: true
-
-# CHECK: test2
-# CHECK: frameInfo:
-# CHECK-NEXT: isFrameAddressTaken: true
-# CHECK-NEXT: isReturnAddressTaken: true
-# CHECK-NEXT: hasStackMap: true
-# CHECK-NEXT: hasPatchPoint: true
-# CHECK-NEXT: stackSize: 4
-# CHECK-NEXT: offsetAdjustment: 4
-# Note: max alignment can be target specific when printed.
-# CHECK-NEXT: maxAlignment:
-# CHECK-NEXT: adjustsStack: true
-# CHECK-NEXT: hasCalls: true
-# CHECK-NEXT: maxCallFrameSize: 4
-# CHECK-NEXT: hasOpaqueSPAdjustment: true
-# CHECK-NEXT: hasVAStart: true
-# CHECK-NEXT: hasMustTailInVarArgFunc: true
-# CHECK: body
-frameInfo:
- isFrameAddressTaken: true
- isReturnAddressTaken: true
- hasStackMap: true
- hasPatchPoint: true
- stackSize: 4
- offsetAdjustment: 4
- maxAlignment: 4
- adjustsStack: true
- hasCalls: true
- maxCallFrameSize: 4
- hasOpaqueSPAdjustment: true
- hasVAStart: true
- hasMustTailInVarArgFunc: true
-body:
- - id: 0
- name: entry
-...
-
diff --git a/test/CodeGen/MIR/llvmIR.mir b/test/CodeGen/MIR/llvmIR.mir
deleted file mode 100644
index 3c084ad7d393..000000000000
--- a/test/CodeGen/MIR/llvmIR.mir
+++ /dev/null
@@ -1,37 +0,0 @@
-# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
-# This test ensures that the LLVM IR that's embedded with MIR is parsed
-# correctly.
-
---- |
- ; CHECK: define i32 @foo(i32 %x, i32 %y)
- ; CHECK: %z = alloca i32, align 4
- ; CHECK: store i32 %x, i32* %z, align 4
- ; CHECK: br label %Test
- ; CHECK: Test:
- ; CHECK: %m = load i32, i32* %z, align 4
- ; CHECK: %cond = icmp eq i32 %y, %m
- ; CHECK: br i1 %cond, label %IfEqual, label %IfUnequal
- ; CHECK: IfEqual:
- ; CHECK: ret i32 1
- ; CHECK: IfUnequal:
- ; CHECK: ret i32 0
- define i32 @foo(i32 %x, i32 %y) {
- %z = alloca i32, align 4
- store i32 %x, i32* %z, align 4
- br label %Test
- Test:
- %m = load i32, i32* %z, align 4
- %cond = icmp eq i32 %y, %m
- br i1 %cond, label %IfEqual, label %IfUnequal
- IfEqual:
- ret i32 1
- IfUnequal:
- ret i32 0
- }
-
-...
----
-name: foo
-body:
- - id: 0
-...
diff --git a/test/CodeGen/MIR/llvmIRMissing.mir b/test/CodeGen/MIR/llvmIRMissing.mir
deleted file mode 100644
index 80cea5a6fdaa..000000000000
--- a/test/CodeGen/MIR/llvmIRMissing.mir
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
-# This test ensures that the MIR parser accepts files without the LLVM IR.
-
----
-# CHECK: name: foo
-name: foo
-body:
- - id: 0
-...
diff --git a/test/CodeGen/MIR/machine-basic-block-redefinition-error.mir b/test/CodeGen/MIR/machine-basic-block-redefinition-error.mir
deleted file mode 100644
index deac3b0b69bf..000000000000
--- a/test/CodeGen/MIR/machine-basic-block-redefinition-error.mir
+++ /dev/null
@@ -1,17 +0,0 @@
-# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
-
---- |
-
- define i32 @foo() {
- entry:
- ret i32 0
- }
-
-...
----
-name: foo
-body:
- # CHECK: redefinition of machine basic block with id #0
- - id: 0
- - id: 0
-...
diff --git a/test/CodeGen/MIR/machine-basic-block-unknown-name.mir b/test/CodeGen/MIR/machine-basic-block-unknown-name.mir
deleted file mode 100644
index df8eee9d2708..000000000000
--- a/test/CodeGen/MIR/machine-basic-block-unknown-name.mir
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
-# This test ensures that an error is reported whenever the MIR parser can't find
-# a basic block with the machine basis block's name.
-
---- |
-
- define i32 @foo() {
- entry:
- ret i32 0
- }
-
-...
----
-name: foo
-body:
- # CHECK: [[@LINE+2]]:18: basic block 'entrie' is not defined in the function 'foo'
- - id: 0
- name: entrie
-...
diff --git a/test/CodeGen/MIR/machine-function-missing-function.mir b/test/CodeGen/MIR/machine-function-missing-function.mir
deleted file mode 100644
index 424c34aae847..000000000000
--- a/test/CodeGen/MIR/machine-function-missing-function.mir
+++ /dev/null
@@ -1,23 +0,0 @@
-# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
-# This test ensures that an error is reported when the mir file has LLVM IR and
-# one of the machine functions has a name that doesn't match any function in
-# the LLVM IR.
-
---- |
-
- define i32 @foo() {
- ret i32 0
- }
-
-...
----
-name: foo
-body:
- - id: 0
-...
----
-# CHECK: function 'faa' isn't defined in the provided LLVM IR
-name: faa
-body:
- - id: 0
-...
diff --git a/test/CodeGen/MIR/machine-function-missing-name.mir b/test/CodeGen/MIR/machine-function-missing-name.mir
deleted file mode 100644
index a868a65d35f2..000000000000
--- a/test/CodeGen/MIR/machine-function-missing-name.mir
+++ /dev/null
@@ -1,26 +0,0 @@
-# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
-# This test ensures that an error is reported when a machine function doesn't
-# have a name attribute.
-
---- |
-
- define i32 @foo() {
- ret i32 0
- }
-
- define i32 @bar() {
- ret i32 0
- }
-
-...
----
-# CHECK: [[@LINE+1]]:1: missing required key 'name'
-nme: foo
-body:
- - id: 0
-...
----
-name: bar
-body:
- - id: 0
-...
diff --git a/test/CodeGen/MIR/machine-function.mir b/test/CodeGen/MIR/machine-function.mir
deleted file mode 100644
index afd10ab02c26..000000000000
--- a/test/CodeGen/MIR/machine-function.mir
+++ /dev/null
@@ -1,66 +0,0 @@
-# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
-# This test ensures that the MIR parser parses machine functions correctly.
-
---- |
-
- define i32 @foo() {
- ret i32 0
- }
-
- define i32 @bar() {
- ret i32 0
- }
-
- define i32 @func() {
- ret i32 0
- }
-
- define i32 @func2() {
- ret i32 0
- }
-
-...
----
-# CHECK: name: foo
-# CHECK-NEXT: alignment:
-# CHECK-NEXT: exposesReturnsTwice: false
-# CHECK-NEXT: hasInlineAsm: false
-# CHECK: ...
-name: foo
-body:
- - id: 0
-...
----
-# CHECK: name: bar
-# CHECK-NEXT: alignment:
-# CHECK-NEXT: exposesReturnsTwice: false
-# CHECK-NEXT: hasInlineAsm: false
-# CHECK: ...
-name: bar
-body:
- - id: 0
-...
----
-# CHECK: name: func
-# CHECK-NEXT: alignment: 8
-# CHECK-NEXT: exposesReturnsTwice: false
-# CHECK-NEXT: hasInlineAsm: false
-# CHECK: ...
-name: func
-alignment: 8
-body:
- - id: 0
-...
----
-# CHECK: name: func2
-# CHECK-NEXT: alignment: 16
-# CHECK-NEXT: exposesReturnsTwice: true
-# CHECK-NEXT: hasInlineAsm: true
-# CHECK: ...
-name: func2
-alignment: 16
-exposesReturnsTwice: true
-hasInlineAsm: true
-body:
- - id: 0
-...
diff --git a/test/CodeGen/MIR/register-info.mir b/test/CodeGen/MIR/register-info.mir
deleted file mode 100644
index 9585faa96223..000000000000
--- a/test/CodeGen/MIR/register-info.mir
+++ /dev/null
@@ -1,40 +0,0 @@
-# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
-# This test ensures that the MIR parser parses machine register info properties
-# correctly.
-
---- |
-
- define i32 @foo() {
- entry:
- ret i32 0
- }
-
- define i32 @bar() {
- start:
- ret i32 0
- }
-
-...
----
-# CHECK: name: foo
-# CHECK: isSSA: false
-# CHECK-NEXT: tracksRegLiveness: false
-# CHECK-NEXT: tracksSubRegLiveness: false
-# CHECK: ...
-name: foo
-body:
- - id: 0
-...
----
-# CHECK: name: bar
-# CHECK: isSSA: false
-# CHECK-NEXT: tracksRegLiveness: true
-# CHECK-NEXT: tracksSubRegLiveness: true
-# CHECK: ...
-name: bar
-isSSA: false
-tracksRegLiveness: true
-tracksSubRegLiveness: true
-body:
- - id: 0
-...
diff --git a/test/CodeGen/MIR/successor-basic-blocks.mir b/test/CodeGen/MIR/successor-basic-blocks.mir
deleted file mode 100644
index 3fe01e3ad438..000000000000
--- a/test/CodeGen/MIR/successor-basic-blocks.mir
+++ /dev/null
@@ -1,58 +0,0 @@
-# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
-# This test ensures that the MIR parser parses basic block successors correctly.
-
---- |
-
- define i32 @foo(i32 %a) {
- entry:
- %0 = icmp sle i32 %a, 10
- br i1 %0, label %less, label %exit
-
- less:
- ret i32 0
-
- exit:
- ret i32 %a
- }
-
- define i32 @bar(i32 %a) {
- entry:
- %b = icmp sle i32 %a, 10
- br i1 %b, label %0, label %1
-
- ; <label>:0
- ret i32 0
-
- ; <label>:1
- ret i32 %a
- }
-
-...
----
-name: foo
-body:
- # CHECK: name: entry
- # CHECK: successors: [ '%bb.1.less', '%bb.2.exit' ]
- # CHECK: name: less
- - id: 0
- name: entry
- successors: [ '%bb.1.less', '%bb.2.exit' ]
- - id: 1
- name: less
- - id: 2
- name: exit
-...
----
-name: bar
-body:
- # CHECK: name: bar
- # CHECK: name: entry
- # CHECK: successors: [ '%bb.1', '%bb.2' ]
- # CHECK: id: 1
- # CHECK: id: 2
- - id: 0
- name: entry
- successors: [ '%bb.1', '%bb.2' ]
- - id: 1
- - id: 2
-...
diff --git a/test/CodeGen/Mips/Fast-ISel/check-disabled-mcpus.ll b/test/CodeGen/Mips/Fast-ISel/check-disabled-mcpus.ll
new file mode 100644
index 000000000000..203e5a7e1595
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/check-disabled-mcpus.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mips -mcpu=mips2 -O0 -relocation-model=pic \
+; RUN: -fast-isel-verbose <%s 2>&1 | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips3 -O0 -relocation-model=pic \
+; RUN: -fast-isel-verbose <%s 2>&1 | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips4 -O0 -relocation-model=pic \
+; RUN: -fast-isel-verbose <%s 2>&1 | FileCheck %s
+
+; RUN: llc -march=mips -mcpu=mips32r6 -O0 -relocation-model=pic \
+; RUN: -fast-isel-verbose <%s 2>&1 | FileCheck %s
+
+; RUN: llc -march=mips -mcpu=mips64 -O0 -relocation-model=pic \
+; RUN: -fast-isel-verbose <%s 2>&1 | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips64r2 -O0 -relocation-model=pic \
+; RUN: -fast-isel-verbose <%s 2>&1 | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips64r3 -O0 -relocation-model=pic \
+; RUN: -fast-isel-verbose <%s 2>&1 | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips64r5 -O0 -relocation-model=pic \
+; RUN: -fast-isel-verbose <%s 2>&1 | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32r6 -O0 -relocation-model=pic \
+; RUN: -fast-isel-verbose <%s 2>&1 | FileCheck %s
+
+; CHECK: FastISel missed terminator: ret i32 0
+
+define i32 @foo() {
+entry:
+ ret i32 0
+}
diff --git a/test/CodeGen/Mips/addi.ll b/test/CodeGen/Mips/addi.ll
index b6af2ee45687..f1db843caf64 100644
--- a/test/CodeGen/Mips/addi.ll
+++ b/test/CodeGen/Mips/addi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=16
@i = global i32 6, align 4
@j = global i32 12, align 4
diff --git a/test/CodeGen/Mips/adjust-callstack-sp.ll b/test/CodeGen/Mips/adjust-callstack-sp.ll
index 8c61a650a962..e4afcd835005 100644
--- a/test/CodeGen/Mips/adjust-callstack-sp.ll
+++ b/test/CodeGen/Mips/adjust-callstack-sp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=mips -mcpu=mips16 | FileCheck %s -check-prefix=M16
+; RUN: llc < %s -march=mips -mattr=mips16 | FileCheck %s -check-prefix=M16
; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s -check-prefix=GP32
diff --git a/test/CodeGen/Mips/align16.ll b/test/CodeGen/Mips/align16.ll
index f385adfaa04c..ac6685dd5524 100644
--- a/test/CodeGen/Mips/align16.ll
+++ b/test/CodeGen/Mips/align16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=16
@i = global i32 25, align 4
@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
diff --git a/test/CodeGen/Mips/alloca16.ll b/test/CodeGen/Mips/alloca16.ll
index be8cc740310b..d728d3bb0b7b 100644
--- a/test/CodeGen/Mips/alloca16.ll
+++ b/test/CodeGen/Mips/alloca16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@iiii = global i32 25, align 4
@jjjj = global i32 35, align 4
diff --git a/test/CodeGen/Mips/and1.ll b/test/CodeGen/Mips/and1.ll
index 57076a4d4fcf..a2bf4f080a07 100644
--- a/test/CodeGen/Mips/and1.ll
+++ b/test/CodeGen/Mips/and1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@x = global i32 65504, align 4
@y = global i32 60929, align 4
diff --git a/test/CodeGen/Mips/asm-large-immediate.ll b/test/CodeGen/Mips/asm-large-immediate.ll
index 246fff615edb..c75b9e4ad12b 100644
--- a/test/CodeGen/Mips/asm-large-immediate.ll
+++ b/test/CodeGen/Mips/asm-large-immediate.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=mipsel < %s | FileCheck %s
+; RUN: llc -march=mipsel -no-integrated-as < %s | FileCheck %s
+
define void @test() {
entry:
; CHECK: /* result: 68719476738 */
diff --git a/test/CodeGen/Mips/atomicops.ll b/test/CodeGen/Mips/atomicops.ll
index 0ff9f5c22a84..18a48ca5023b 100644
--- a/test/CodeGen/Mips/atomicops.ll
+++ b/test/CodeGen/Mips/atomicops.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@.str = private unnamed_addr constant [8 x i8] c"%d, %d\0A\00", align 1
diff --git a/test/CodeGen/Mips/beqzc.ll b/test/CodeGen/Mips/beqzc.ll
index 37bece884212..c0845f7185e1 100644
--- a/test/CodeGen/Mips/beqzc.ll
+++ b/test/CodeGen/Mips/beqzc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
@i = global i32 0, align 4
@j = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/beqzc1.ll b/test/CodeGen/Mips/beqzc1.ll
index 1f5575f099fa..144983513edf 100644
--- a/test/CodeGen/Mips/beqzc1.ll
+++ b/test/CodeGen/Mips/beqzc1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
@i = global i32 0, align 4
@j = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/br-jmp.ll b/test/CodeGen/Mips/br-jmp.ll
index 9ca8d159614f..5e94c755c969 100644
--- a/test/CodeGen/Mips/br-jmp.ll
+++ b/test/CodeGen/Mips/br-jmp.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC16
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
define void @count(i32 %x, i32 %y, i32 %z) noreturn nounwind readnone {
entry:
diff --git a/test/CodeGen/Mips/brconeq.ll b/test/CodeGen/Mips/brconeq.ll
index f555528bbb64..7c3c31e0ec3c 100644
--- a/test/CodeGen/Mips/brconeq.ll
+++ b/test/CodeGen/Mips/brconeq.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 5, align 4
@j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/brconeqk.ll b/test/CodeGen/Mips/brconeqk.ll
index 59edae82e5ad..85d257e8d797 100644
--- a/test/CodeGen/Mips/brconeqk.ll
+++ b/test/CodeGen/Mips/brconeqk.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 5, align 4
@result = global i32 0, align 4
diff --git a/test/CodeGen/Mips/brconeqz.ll b/test/CodeGen/Mips/brconeqz.ll
index 22c566407528..cf1beed49bb4 100644
--- a/test/CodeGen/Mips/brconeqz.ll
+++ b/test/CodeGen/Mips/brconeqz.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 5, align 4
@result = global i32 0, align 4
diff --git a/test/CodeGen/Mips/brconge.ll b/test/CodeGen/Mips/brconge.ll
index 46d19847d9bc..f3f059ff2d54 100644
--- a/test/CodeGen/Mips/brconge.ll
+++ b/test/CodeGen/Mips/brconge.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 5, align 4
@j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/brcongt.ll b/test/CodeGen/Mips/brcongt.ll
index cefacb8318b0..7dffdb411211 100644
--- a/test/CodeGen/Mips/brcongt.ll
+++ b/test/CodeGen/Mips/brcongt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 5, align 4
@j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/brconle.ll b/test/CodeGen/Mips/brconle.ll
index e1f15ecb6b92..99599f84db17 100644
--- a/test/CodeGen/Mips/brconle.ll
+++ b/test/CodeGen/Mips/brconle.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 -5, align 4
@j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/brconlt.ll b/test/CodeGen/Mips/brconlt.ll
index 049f35c393fe..487018c22f26 100644
--- a/test/CodeGen/Mips/brconlt.ll
+++ b/test/CodeGen/Mips/brconlt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 5, align 4
@j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/brconne.ll b/test/CodeGen/Mips/brconne.ll
index b260320b94e1..e0cbe378fe3c 100644
--- a/test/CodeGen/Mips/brconne.ll
+++ b/test/CodeGen/Mips/brconne.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 5, align 4
@j = global i32 5, align 4
diff --git a/test/CodeGen/Mips/brconnek.ll b/test/CodeGen/Mips/brconnek.ll
index 778a5cce72b3..0b9234fe3b9d 100644
--- a/test/CodeGen/Mips/brconnek.ll
+++ b/test/CodeGen/Mips/brconnek.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@j = global i32 5, align 4
@result = global i32 0, align 4
diff --git a/test/CodeGen/Mips/brconnez.ll b/test/CodeGen/Mips/brconnez.ll
index 754714b21daf..27cf9e8cacb8 100644
--- a/test/CodeGen/Mips/brconnez.ll
+++ b/test/CodeGen/Mips/brconnez.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@j = global i32 0, align 4
@result = global i32 0, align 4
diff --git a/test/CodeGen/Mips/brind.ll b/test/CodeGen/Mips/brind.ll
index a3e9b8011a2b..ed2c3b3dddb7 100644
--- a/test/CodeGen/Mips/brind.ll
+++ b/test/CodeGen/Mips/brind.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@main.L = internal unnamed_addr constant [5 x i8*] [i8* blockaddress(@main, %L1), i8* blockaddress(@main, %L2), i8* blockaddress(@main, %L3), i8* blockaddress(@main, %L4), i8* null], align 4
@str = private unnamed_addr constant [2 x i8] c"A\00"
diff --git a/test/CodeGen/Mips/brsize3.ll b/test/CodeGen/Mips/brsize3.ll
index dad0d841d4c6..1e76879409c6 100644
--- a/test/CodeGen/Mips/brsize3.ll
+++ b/test/CodeGen/Mips/brsize3.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=b-no-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=b-no-short
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=b-long
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=b-long
; ModuleID = 'brsize3.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/brsize3a.ll b/test/CodeGen/Mips/brsize3a.ll
index e1cd5893ceda..24516018b9b7 100644
--- a/test/CodeGen/Mips/brsize3a.ll
+++ b/test/CodeGen/Mips/brsize3a.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=b-short
; ModuleID = 'brsize3.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs.ll b/test/CodeGen/Mips/cconv/arguments-varargs.ll
index af217c92dab8..d1a196738aee 100644
--- a/test/CodeGen/Mips/cconv/arguments-varargs.ll
+++ b/test/CodeGen/Mips/cconv/arguments-varargs.ll
@@ -55,7 +55,7 @@ entry:
; Store [[VA]]
; O32-DAG: sw [[VA]], 0([[SP]])
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
; Increment [[VA]]
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -89,7 +89,7 @@ entry:
; ALL-DAG: sh [[ARG1]], 2([[GV]])
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
; Increment [[VA]] again.
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -117,12 +117,12 @@ entry:
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- call void asm sideeffect "# ANCHOR1", ""()
+ call void asm sideeffect "teqi $$zero, 1", ""()
%arg1 = va_arg i8** %ap, i16
%e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
store volatile i16 %arg1, i16* %e1, align 2
- call void asm sideeffect "# ANCHOR2", ""()
+ call void asm sideeffect "teqi $$zero, 2", ""()
%arg2 = va_arg i8** %ap, i16
%e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
store volatile i16 %arg2, i16* %e2, align 2
@@ -173,7 +173,7 @@ entry:
; Store [[VA]]
; O32-DAG: sw [[VA]], 0([[SP]])
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
; Increment [[VA]]
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -207,7 +207,7 @@ entry:
; ALL-DAG: sw [[ARG1]], 4([[GV]])
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
; Increment [[VA]] again.
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -235,12 +235,12 @@ entry:
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- call void asm sideeffect "# ANCHOR1", ""()
+ call void asm sideeffect "teqi $$zero, 1", ""()
%arg1 = va_arg i8** %ap, i32
%e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
store volatile i32 %arg1, i32* %e1, align 4
- call void asm sideeffect "# ANCHOR2", ""()
+ call void asm sideeffect "teqi $$zero, 2", ""()
%arg2 = va_arg i8** %ap, i32
%e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
store volatile i32 %arg2, i32* %e2, align 4
@@ -291,7 +291,7 @@ entry:
; Store [[VA]]
; O32-DAG: sw [[VA]], 0([[SP]])
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
; Increment [[VA]] (and realign pointer for O32)
; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -328,7 +328,7 @@ entry:
; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]])
; NEW-DAG: sd [[ARG1]], 8([[GV]])
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
; Increment [[VA]] again.
; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
@@ -362,12 +362,12 @@ entry:
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- call void asm sideeffect "# ANCHOR1", ""()
+ call void asm sideeffect "teqi $$zero, 1", ""()
%arg1 = va_arg i8** %ap, i64
%e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
store volatile i64 %arg1, i64* %e1, align 8
- call void asm sideeffect "# ANCHOR2", ""()
+ call void asm sideeffect "teqi $$zero, 2", ""()
%arg2 = va_arg i8** %ap, i64
%e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
store volatile i64 %arg2, i64* %e2, align 8
@@ -418,7 +418,7 @@ entry:
; Store [[VA]]
; O32-DAG: sw [[VA]], 0([[SP]])
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
; Increment [[VA]]
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -452,7 +452,7 @@ entry:
; ALL-DAG: sh [[ARG1]], 2([[GV]])
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
; Increment [[VA]] again.
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -480,12 +480,12 @@ entry:
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- call void asm sideeffect "# ANCHOR1", ""()
+ call void asm sideeffect "teqi $$zero, 1", ""()
%arg1 = va_arg i8** %ap, i16
%e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
store volatile i16 %arg1, i16* %e1, align 2
- call void asm sideeffect "# ANCHOR2", ""()
+ call void asm sideeffect "teqi $$zero, 2", ""()
%arg2 = va_arg i8** %ap, i16
%e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
store volatile i16 %arg2, i16* %e2, align 2
@@ -536,7 +536,7 @@ entry:
; Store [[VA]]
; O32-DAG: sw [[VA]], 0([[SP]])
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
; Increment [[VA]]
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -570,7 +570,7 @@ entry:
; ALL-DAG: sw [[ARG1]], 4([[GV]])
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
; Increment [[VA]] again.
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -598,12 +598,12 @@ entry:
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- call void asm sideeffect "# ANCHOR1", ""()
+ call void asm sideeffect "teqi $$zero, 1", ""()
%arg1 = va_arg i8** %ap, i32
%e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
store volatile i32 %arg1, i32* %e1, align 4
- call void asm sideeffect "# ANCHOR2", ""()
+ call void asm sideeffect "teqi $$zero, 2", ""()
%arg2 = va_arg i8** %ap, i32
%e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
store volatile i32 %arg2, i32* %e2, align 4
@@ -654,7 +654,7 @@ entry:
; Store [[VA]]
; O32-DAG: sw [[VA]], 0([[SP]])
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
; Increment [[VA]] (and realign pointer for O32)
; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -691,7 +691,7 @@ entry:
; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]])
; NEW-DAG: sd [[ARG1]], 8([[GV]])
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
; Increment [[VA]] again.
; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
@@ -725,12 +725,12 @@ entry:
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- call void asm sideeffect "# ANCHOR1", ""()
+ call void asm sideeffect "teqi $$zero, 1", ""()
%arg1 = va_arg i8** %ap, i64
%e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
store volatile i64 %arg1, i64* %e1, align 8
- call void asm sideeffect "# ANCHOR2", ""()
+ call void asm sideeffect "teqi $$zero, 2", ""()
%arg2 = va_arg i8** %ap, i64
%e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
store volatile i64 %arg2, i64* %e2, align 8
@@ -780,7 +780,7 @@ entry:
; Store [[VA]]
; O32-DAG: sw [[VA]], 0([[SP]])
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
; Increment [[VA]]
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -814,7 +814,7 @@ entry:
; ALL-DAG: sh [[ARG1]], 2([[GV]])
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
; Increment [[VA]] again.
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -842,12 +842,12 @@ entry:
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- call void asm sideeffect "# ANCHOR1", ""()
+ call void asm sideeffect "teqi $$zero, 1", ""()
%arg1 = va_arg i8** %ap, i16
%e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
store volatile i16 %arg1, i16* %e1, align 2
- call void asm sideeffect "# ANCHOR2", ""()
+ call void asm sideeffect "teqi $$zero, 2", ""()
%arg2 = va_arg i8** %ap, i16
%e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
store volatile i16 %arg2, i16* %e2, align 2
@@ -897,7 +897,7 @@ entry:
; Store [[VA]]
; O32-DAG: sw [[VA]], 0([[SP]])
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
; Increment [[VA]]
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -931,7 +931,7 @@ entry:
; ALL-DAG: sw [[ARG1]], 4([[GV]])
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
; Increment [[VA]] again.
; O32-DAG: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -959,12 +959,12 @@ entry:
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- call void asm sideeffect "# ANCHOR1", ""()
+ call void asm sideeffect "teqi $$zero, 1", ""()
%arg1 = va_arg i8** %ap, i32
%e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
store volatile i32 %arg1, i32* %e1, align 4
- call void asm sideeffect "# ANCHOR2", ""()
+ call void asm sideeffect "teqi $$zero, 2", ""()
%arg2 = va_arg i8** %ap, i32
%e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
store volatile i32 %arg2, i32* %e2, align 4
@@ -1014,7 +1014,7 @@ entry:
; Store [[VA]]
; O32-DAG: sw [[VA]], 0([[SP]])
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
; Increment [[VA]] (and realign pointer for O32)
; O32: lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -1051,7 +1051,7 @@ entry:
; NEW-DAG: ld [[ARG1:\$[0-9]+]], 0([[VA]])
; NEW-DAG: sd [[ARG1]], 8([[GV]])
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
; Increment [[VA]] again.
; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
@@ -1085,12 +1085,12 @@ entry:
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
- call void asm sideeffect "# ANCHOR1", ""()
+ call void asm sideeffect "teqi $$zero, 1", ""()
%arg1 = va_arg i8** %ap, i64
%e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
store volatile i64 %arg1, i64* %e1, align 8
- call void asm sideeffect "# ANCHOR2", ""()
+ call void asm sideeffect "teqi $$zero, 2", ""()
%arg2 = va_arg i8** %ap, i64
%e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
store volatile i64 %arg2, i64* %e2, align 8
diff --git a/test/CodeGen/Mips/ci2.ll b/test/CodeGen/Mips/ci2.ll
index 4687748879ac..bb16fa83fc5c 100644
--- a/test/CodeGen/Mips/ci2.ll
+++ b/test/CodeGen/Mips/ci2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s -check-prefix=constisle
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s -check-prefix=constisle
@i = common global i32 0, align 4
@b = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/cmplarge.ll b/test/CodeGen/Mips/cmplarge.ll
index 79019065a905..1ca5b921e0bc 100644
--- a/test/CodeGen/Mips/cmplarge.ll
+++ b/test/CodeGen/Mips/cmplarge.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=cmp16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=cmp16
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
target triple = "mipsel--linux-gnu"
diff --git a/test/CodeGen/Mips/const1.ll b/test/CodeGen/Mips/const1.ll
index 1a5d58bd3f95..2bcd405179b2 100644
--- a/test/CodeGen/Mips/const1.ll
+++ b/test/CodeGen/Mips/const1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s
; ModuleID = 'const1.c'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/const4a.ll b/test/CodeGen/Mips/const4a.ll
index c31e54a01036..d1182d7fc6ec 100644
--- a/test/CodeGen/Mips/const4a.ll
+++ b/test/CodeGen/Mips/const4a.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation < %s | FileCheck %s -check-prefix=no-load-relax
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation < %s | FileCheck %s -check-prefix=no-load-relax
; ModuleID = 'const4.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/const6.ll b/test/CodeGen/Mips/const6.ll
index 49e98ea78703..c576f573a43b 100644
--- a/test/CodeGen/Mips/const6.ll
+++ b/test/CodeGen/Mips/const6.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=load-relax
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=load-relax
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation < %s | FileCheck %s -check-prefix=no-load-relax
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation < %s | FileCheck %s -check-prefix=no-load-relax
; ModuleID = 'const6.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/const6a.ll b/test/CodeGen/Mips/const6a.ll
index 54a3f2234dc2..653cdeb920f3 100644
--- a/test/CodeGen/Mips/const6a.ll
+++ b/test/CodeGen/Mips/const6a.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=load-relax1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=load-relax1
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=load-relax
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=load-relax
; ModuleID = 'const6a.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/div.ll b/test/CodeGen/Mips/div.ll
index 731841c554fa..92258bce02d2 100644
--- a/test/CodeGen/Mips/div.ll
+++ b/test/CodeGen/Mips/div.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@iiii = global i32 100, align 4
@jjjj = global i32 -4, align 4
diff --git a/test/CodeGen/Mips/div_rem.ll b/test/CodeGen/Mips/div_rem.ll
index e64529cee841..be1e001a24c3 100644
--- a/test/CodeGen/Mips/div_rem.ll
+++ b/test/CodeGen/Mips/div_rem.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@iiii = global i32 103, align 4
@jjjj = global i32 -4, align 4
diff --git a/test/CodeGen/Mips/divu.ll b/test/CodeGen/Mips/divu.ll
index 5bc765a71eb9..ce1b70cacf6f 100644
--- a/test/CodeGen/Mips/divu.ll
+++ b/test/CodeGen/Mips/divu.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@iiii = global i32 100, align 4
@jjjj = global i32 4, align 4
diff --git a/test/CodeGen/Mips/divu_remu.ll b/test/CodeGen/Mips/divu_remu.ll
index a079440b913f..0e094cbe48ae 100644
--- a/test/CodeGen/Mips/divu_remu.ll
+++ b/test/CodeGen/Mips/divu_remu.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@iiii = global i32 103, align 4
@jjjj = global i32 4, align 4
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
index 2f843d9da9a6..19f3d4d23d64 100644
--- a/test/CodeGen/Mips/eh.ll
+++ b/test/CodeGen/Mips/eh.ll
@@ -24,7 +24,7 @@ entry:
lpad: ; preds = %entry
; CHECK-EL: # %lpad
-; CHECK-EL: bne $5
+; CHECK-EL: beq $5
%exn.val = landingpad { i8*, i32 }
cleanup
diff --git a/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll b/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
index 58dd16c9f9c8..54092b4e3ebe 100644
--- a/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
+++ b/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
@@ -1,5 +1,5 @@
; Check that register scavenging spill slot is close to $fp.
-; RUN: llc -march=mipsel -O0 -fast-isel=false < %s | FileCheck %s
+; RUN: llc -march=mipsel -O0 < %s | FileCheck %s
; CHECK: sw ${{.*}}, 8($sp)
; CHECK: lw ${{.*}}, 8($sp)
@@ -31,4 +31,4 @@ entry:
ret i32 0
}
-attributes #0 = { noinline optnone "no-frame-pointer-elim"="true" }
+attributes #0 = { noinline "no-frame-pointer-elim"="true" }
diff --git a/test/CodeGen/Mips/emutls_generic.ll b/test/CodeGen/Mips/emutls_generic.ll
new file mode 100644
index 000000000000..a6cf23aa67ff
--- /dev/null
+++ b/test/CodeGen/Mips/emutls_generic.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s -emulated-tls -mtriple=mipsel-linux-android -relocation-model=pic \
+; RUN: | FileCheck -check-prefix=MIPS_32 %s
+; RUN: llc < %s -emulated-tls -mtriple=mips64el-linux-android -relocation-model=pic \
+; RUN: | FileCheck -check-prefix=MIPS_64 %s
+
+; Make sure that TLS symbols are emitted in expected order.
+
+@external_x = external thread_local global i32, align 8
+@external_y = thread_local global i8 7, align 2
+@internal_y = internal thread_local global i64 9, align 16
+
+define i32* @get_external_x() {
+entry:
+ ret i32* @external_x
+}
+
+define i8* @get_external_y() {
+entry:
+ ret i8* @external_y
+}
+
+define i64* @get_internal_y() {
+entry:
+ ret i64* @internal_y
+}
+
+; MIPS_32-LABEL: get_external_y:
+; MIPS_32-LABEL: get_internal_y:
+; MIPS_32: lw {{.+}}(__emutls_v.internal_y
+; MIPS_32: lw {{.+}}call16(__emutls_get_address
+; MIPS_32-NOT: __emutls_t.external_x
+; MIPS_32-NOT: __emutls_v.external_x:
+; MIPS_32: .data
+; MIPS_32: .align 2
+; MIPS_32-LABEL: __emutls_v.external_y:
+; MIPS_32: .section .rodata,
+; MIPS_32-LABEL: __emutls_t.external_y:
+; MIPS_32-NEXT: .byte 7
+; MIPS_32: .data
+; MIPS_32: .align 2
+; MIPS_32-LABEL: __emutls_v.internal_y:
+; MIPS_32-NEXT: .4byte 8
+; MIPS_32-NEXT: .4byte 16
+; MIPS_32-NEXT: .4byte 0
+; MIPS_32-NEXT: .4byte __emutls_t.internal_y
+; MIPS_32-LABEL: __emutls_t.internal_y:
+; MIPS_32-NEXT: .8byte 9
+
+; MIPS_64-LABEL: get_external_x:
+; MIPS_64-LABEL: get_external_y:
+; MIPS_64-LABEL: get_internal_y:
+; MIPS_64: ld {{.+}}(__emutls_v.internal_y
+; MIPS_64: ld {{.+}}call16(__emutls_get_address
+; MIPS_64-NOT: __emutls_t.external_x
+; MIPS_64-NOT: __emutls_v.external_x:
+; MIPS_64-LABEL: __emutls_v.external_y:
+; MIPS_64-NOT: __emutls_v.external_x:
+; MIPS_64: .section .rodata,
+; MIPS_64-LABEL: __emutls_t.external_y:
+; MIPS_64-NEXT: .byte 7
+; MIPS_64: .data
+; MIPS_64: .align 3
+; MIPS_64-LABEL: __emutls_v.internal_y:
+; MIPS_64-NEXT: .8byte 8
+; MIPS_64-NEXT: .8byte 16
+; MIPS_64-NEXT: .8byte 0
+; MIPS_64-NEXT: .8byte __emutls_t.internal_y
+; MIPS_64: .section .rodata,
+; MIPS_64-LABEL: __emutls_t.internal_y:
+; MIPS_64-NEXT: .8byte 9
diff --git a/test/CodeGen/Mips/ex2.ll b/test/CodeGen/Mips/ex2.ll
index 7547fdf81e35..87fe77035ec2 100644
--- a/test/CodeGen/Mips/ex2.ll
+++ b/test/CodeGen/Mips/ex2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
@_ZTIPKc = external constant i8*
diff --git a/test/CodeGen/Mips/extins.ll b/test/CodeGen/Mips/extins.ll
index 6604f89b1843..0b327a91bbfd 100644
--- a/test/CodeGen/Mips/extins.ll
+++ b/test/CodeGen/Mips/extins.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
-; RUN: llc < %s -march=mips -mcpu=mips16 | FileCheck %s -check-prefix=16
+; RUN: llc < %s -march=mips -mattr=mips16 | FileCheck %s -check-prefix=16
define i32 @ext0_5_9(i32 %s, i32 %pos, i32 %sz) nounwind readnone {
entry:
diff --git a/test/CodeGen/Mips/f16abs.ll b/test/CodeGen/Mips/f16abs.ll
index 838983274e9b..6c33e011719e 100644
--- a/test/CodeGen/Mips/f16abs.ll
+++ b/test/CodeGen/Mips/f16abs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=static
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=static
@y = global double -1.450000e+00, align 8
@x = common global double 0.000000e+00, align 8
diff --git a/test/CodeGen/Mips/fixdfsf.ll b/test/CodeGen/Mips/fixdfsf.ll
index 869579922d51..5eb336bf6499 100644
--- a/test/CodeGen/Mips/fixdfsf.ll
+++ b/test/CodeGen/Mips/fixdfsf.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic1
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic2
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic2
@x = common global double 0.000000e+00, align 8
@y = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/fp16instrinsmc.ll b/test/CodeGen/Mips/fp16instrinsmc.ll
index 797be2668d40..258b1bf97d7b 100644
--- a/test/CodeGen/Mips/fp16instrinsmc.ll
+++ b/test/CodeGen/Mips/fp16instrinsmc.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=1010111 -mips-os16 < %s | FileCheck %s -check-prefix=fmask
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static -mips32-function-mask=1010111 -mips-os16 < %s | FileCheck %s -check-prefix=fmask
@x = global float 1.500000e+00, align 4
@xn = global float -1.900000e+01, align 4
diff --git a/test/CodeGen/Mips/fp16mix.ll b/test/CodeGen/Mips/fp16mix.ll
index a94f838fb675..d97759422cf9 100644
--- a/test/CodeGen/Mips/fp16mix.ll
+++ b/test/CodeGen/Mips/fp16mix.ll
@@ -1,8 +1,8 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=10 -mips-os16 < %s | FileCheck %s -check-prefix=fmask1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static -mips32-function-mask=10 -mips-os16 < %s | FileCheck %s -check-prefix=fmask1
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=01 -mips-os16 < %s | FileCheck %s -check-prefix=fmask2
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static -mips32-function-mask=01 -mips-os16 < %s | FileCheck %s -check-prefix=fmask2
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=10. -mips-os16 < %s | FileCheck %s -check-prefix=fmask1nr
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static -mips32-function-mask=10. -mips-os16 < %s | FileCheck %s -check-prefix=fmask1nr
; Function Attrs: nounwind optsize readnone
define void @foo1() {
diff --git a/test/CodeGen/Mips/fp16static.ll b/test/CodeGen/Mips/fp16static.ll
index 4e5059ed39e9..341ecf02cb5c 100644
--- a/test/CodeGen/Mips/fp16static.ll
+++ b/test/CodeGen/Mips/fp16static.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
@x = common global float 0.000000e+00, align 4
diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll
index a0dbdf3afd47..f715313354ea 100644
--- a/test/CodeGen/Mips/helloworld.ll
+++ b/test/CodeGen/Mips/helloworld.ll
@@ -1,10 +1,10 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST1
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST2
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST2
;
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR32
diff --git a/test/CodeGen/Mips/hf16_1.ll b/test/CodeGen/Mips/hf16_1.ll
index 103fd2d7fd63..aea241e27195 100644
--- a/test/CodeGen/Mips/hf16_1.ll
+++ b/test/CodeGen/Mips/hf16_1.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=1
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=2
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=1
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=2
@x = common global float 0.000000e+00, align 4
diff --git a/test/CodeGen/Mips/hf16call32.ll b/test/CodeGen/Mips/hf16call32.ll
index 3b3f8f799111..9fc94cac5175 100644
--- a/test/CodeGen/Mips/hf16call32.ll
+++ b/test/CodeGen/Mips/hf16call32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=stel
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=stel
@x = common global float 0.000000e+00, align 4
@y = common global float 0.000000e+00, align 4
@@ -751,280 +751,280 @@ land.end289: ; preds = %land.rhs286, %land.
}
declare void @v_sf(float) #1
-; stel: .section .mips16.call.fp.v_sf,"ax",@progbits
-; stel: .ent __call_stub_fp_v_sf
-; stel: mtc1 $4,$f12
-; stel: lui $25,%hi(v_sf)
-; stel: addiu $25,$25,%lo(v_sf)
-; stel: jr $25
-; stel: .end __call_stub_fp_v_sf
+; stel: .section .mips16.call.fp.v_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_v_sf
+; stel: mtc1 $4, $f12
+; stel: lui $25, %hi(v_sf)
+; stel: addiu $25, $25, %lo(v_sf)
+; stel: jr $25
+; stel: .end __call_stub_fp_v_sf
declare i32 @printf(i8*, ...) #1
declare void @v_df(double) #1
-; stel: .section .mips16.call.fp.v_df,"ax",@progbits
-; stel: .ent __call_stub_fp_v_df
+; stel: .section .mips16.call.fp.v_df,"ax",@progbits
+; stel: .ent __call_stub_fp_v_df
; stel: #APP
-; setl: .set reorder
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: lui $25,%hi(v_df)
-; stel: addiu $25,$25,%lo(v_df)
-; stel: jr $25
-; stel: .end __call_stub_fp_v_df
+; stel: .set reorder
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: lui $25, %hi(v_df)
+; stel: addiu $25, $25, %lo(v_df)
+; stel: jr $25
+; stel: .end __call_stub_fp_v_df
declare void @v_sf_sf(float, float) #1
-; stel: .section .mips16.call.fp.v_sf_sf,"ax",@progbits
-; stel: .ent __call_stub_fp_v_sf_sf
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f14
-; stel: lui $25,%hi(v_sf_sf)
-; stel: addiu $25,$25,%lo(v_sf_sf)
-; stel: jr $25
-; stel: .end __call_stub_fp_v_sf_sf
+; stel: .section .mips16.call.fp.v_sf_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_v_sf_sf
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f14
+; stel: lui $25, %hi(v_sf_sf)
+; stel: addiu $25, $25, %lo(v_sf_sf)
+; stel: jr $25
+; stel: .end __call_stub_fp_v_sf_sf
declare void @v_sf_df(float, double) #1
-; stel: .section .mips16.call.fp.v_sf_df,"ax",@progbits
-; stel: .ent __call_stub_fp_v_sf_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $6,$f14
-; stel: mtc1 $7,$f15
-; stel: lui $25,%hi(v_sf_df)
-; stel: addiu $25,$25,%lo(v_sf_df)
-; stel: jr $25
-; stel: .end __call_stub_fp_v_sf_df
+; stel: .section .mips16.call.fp.v_sf_df,"ax",@progbits
+; stel: .ent __call_stub_fp_v_sf_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $6, $f14
+; stel: mtc1 $7, $f15
+; stel: lui $25, %hi(v_sf_df)
+; stel: addiu $25, $25, %lo(v_sf_df)
+; stel: jr $25
+; stel: .end __call_stub_fp_v_sf_df
declare void @v_df_sf(double, float) #1
-; stel: .section .mips16.call.fp.v_df_sf,"ax",@progbits
-; stel: .ent __call_stub_fp_v_df_sf
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: mtc1 $6,$f14
-; stel: lui $25,%hi(v_df_sf)
-; stel: addiu $25,$25,%lo(v_df_sf)
-; stel: jr $25
-; stel: .end __call_stub_fp_v_df_sf
+; stel: .section .mips16.call.fp.v_df_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_v_df_sf
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: mtc1 $6, $f14
+; stel: lui $25, %hi(v_df_sf)
+; stel: addiu $25, $25, %lo(v_df_sf)
+; stel: jr $25
+; stel: .end __call_stub_fp_v_df_sf
declare void @v_df_df(double, double) #1
-; stel: .section .mips16.call.fp.v_df_df,"ax",@progbits
-; stel: .ent __call_stub_fp_v_df_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: mtc1 $6,$f14
-; stel: mtc1 $7,$f15
-; stel: lui $25,%hi(v_df_df)
-; stel: addiu $25,$25,%lo(v_df_df)
-; stel: jr $25
-; stel: .end __call_stub_fp_v_df_df
+; stel: .section .mips16.call.fp.v_df_df,"ax",@progbits
+; stel: .ent __call_stub_fp_v_df_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: mtc1 $6, $f14
+; stel: mtc1 $7, $f15
+; stel: lui $25, %hi(v_df_df)
+; stel: addiu $25, $25, %lo(v_df_df)
+; stel: jr $25
+; stel: .end __call_stub_fp_v_df_df
declare float @sf_v() #1
-; stel: .section .mips16.call.fp.sf_v,"ax",@progbits
-; stel: .ent __call_stub_fp_sf_v
+; stel: .section .mips16.call.fp.sf_v,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_v
; stel: move $18, $31
; stel: jal sf_v
-; stel: mfc1 $2,$f0
-; stel: jr $18
-; stel: .end __call_stub_fp_sf_v
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_v
declare float @sf_sf(float) #1
-; stel: .section .mips16.call.fp.sf_sf,"ax",@progbits
-; stel: .ent __call_stub_fp_sf_sf
-; stel: mtc1 $4,$f12
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sf_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_sf
+; stel: mtc1 $4, $f12
+; stel: move $18, ${{31|ra}}
; stel: jal sf_sf
-; stel: mfc1 $2,$f0
-; stel: jr $18
-; stel: .end __call_stub_fp_sf_sf
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_sf
declare float @sf_df(double) #1
-; stel: .section .mips16.call.fp.sf_df,"ax",@progbits
-; stel: .ent __call_stub_fp_sf_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sf_df,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: move $18, ${{31|ra}}
; stel: jal sf_df
-; stel: mfc1 $2,$f0
-; stel: jr $18
-; stel: .end __call_stub_fp_sf_df
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_df
declare float @sf_sf_sf(float, float) #1
-; stel: .section .mips16.call.fp.sf_sf_sf,"ax",@progbits
-; stel: .ent __call_stub_fp_sf_sf_sf
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f14
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sf_sf_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_sf_sf
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f14
+; stel: move $18, ${{31|ra}}
; stel: jal sf_sf_sf
-; stel: mfc1 $2,$f0
-; stel: jr $18
-; stel: .end __call_stub_fp_sf_sf_sf
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_sf_sf
declare float @sf_sf_df(float, double) #1
-; stel: .section .mips16.call.fp.sf_sf_df,"ax",@progbits
-; stel: .ent __call_stub_fp_sf_sf_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $6,$f14
-; stel: mtc1 $7,$f15
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sf_sf_df,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_sf_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $6, $f14
+; stel: mtc1 $7, $f15
+; stel: move $18, ${{31|ra}}
; stel: jal sf_sf_df
-; stel: mfc1 $2,$f0
-; stel: jr $18
-; stel: .end __call_stub_fp_sf_sf_df
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_sf_df
declare float @sf_df_sf(double, float) #1
-; stel: .section .mips16.call.fp.sf_df_sf,"ax",@progbits
-; stel: .ent __call_stub_fp_sf_df_sf
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: mtc1 $6,$f14
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sf_df_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_df_sf
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: mtc1 $6, $f14
+; stel: move $18, ${{31|ra}}
; stel: jal sf_df_sf
-; stel: mfc1 $2,$f0
-; stel: jr $18
-; stel: .end __call_stub_fp_sf_df_sf
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_df_sf
declare float @sf_df_df(double, double) #1
-; stel: .section .mips16.call.fp.sf_df_df,"ax",@progbits
-; stel: .ent __call_stub_fp_sf_df_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: mtc1 $6,$f14
-; stel: mtc1 $7,$f15
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sf_df_df,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_df_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: mtc1 $6, $f14
+; stel: mtc1 $7, $f15
+; stel: move $18, ${{31|ra}}
; stel: jal sf_df_df
-; stel: mfc1 $2,$f0
-; stel: jr $18
-; stel: .end __call_stub_fp_sf_df_df
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_df_df
declare double @df_v() #1
-; stel: .section .mips16.call.fp.df_v,"ax",@progbits
-; stel: .ent __call_stub_fp_df_v
+; stel: .section .mips16.call.fp.df_v,"ax",@progbits
+; stel: .ent __call_stub_fp_df_v
; stel: move $18, $31
; stel: jal df_v
-; stel: mfc1 $2,$f0
-; stel: mfc1 $3,$f1
-; stel: jr $18
-; stel: .end __call_stub_fp_df_v
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_v
declare double @df_sf(float) #1
-; stel: .section .mips16.call.fp.df_sf,"ax",@progbits
-; stel: .ent __call_stub_fp_df_sf
-; stel: mtc1 $4,$f12
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.df_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_df_sf
+; stel: mtc1 $4, $f12
+; stel: move $18, ${{31|ra}}
; stel: jal df_sf
-; stel: mfc1 $2,$f0
-; stel: mfc1 $3,$f1
-; stel: jr $18
-; stel: .end __call_stub_fp_df_sf
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_sf
declare double @df_df(double) #1
-; stel: .section .mips16.call.fp.df_df,"ax",@progbits
-; stel: .ent __call_stub_fp_df_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.df_df,"ax",@progbits
+; stel: .ent __call_stub_fp_df_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: move $18, ${{31|ra}}
; stel: jal df_df
-; stel: mfc1 $2,$f0
-; stel: mfc1 $3,$f1
-; stel: jr $18
-; stel: .end __call_stub_fp_df_df
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_df
declare double @df_sf_sf(float, float) #1
-; stel: .section .mips16.call.fp.df_sf_sf,"ax",@progbits
-; stel: .ent __call_stub_fp_df_sf_sf
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f14
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.df_sf_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_df_sf_sf
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f14
+; stel: move $18, ${{31|ra}}
; stel: jal df_sf_sf
-; stel: mfc1 $2,$f0
-; stel: mfc1 $3,$f1
-; stel: jr $18
-; stel: .end __call_stub_fp_df_sf_sf
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_sf_sf
declare double @df_sf_df(float, double) #1
-; stel: .section .mips16.call.fp.df_sf_df,"ax",@progbits
-; stel: .ent __call_stub_fp_df_sf_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $6,$f14
-; stel: mtc1 $7,$f15
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.df_sf_df,"ax",@progbits
+; stel: .ent __call_stub_fp_df_sf_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $6, $f14
+; stel: mtc1 $7, $f15
+; stel: move $18, ${{31|ra}}
; stel: jal df_sf_df
-; stel: mfc1 $2,$f0
-; stel: mfc1 $3,$f1
-; stel: jr $18
-; stel: .end __call_stub_fp_df_sf_df
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_sf_df
declare double @df_df_sf(double, float) #1
-; stel: .section .mips16.call.fp.df_df_sf,"ax",@progbits
-; stel: .ent __call_stub_fp_df_df_sf
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: mtc1 $6,$f14
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.df_df_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_df_df_sf
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: mtc1 $6, $f14
+; stel: move $18, ${{31|ra}}
; stel: jal df_df_sf
-; stel: mfc1 $2,$f0
-; stel: mfc1 $3,$f1
-; stel: jr $18
-; stel: .end __call_stub_fp_df_df_sf
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_df_sf
declare double @df_df_df(double, double) #1
-; stel: .section .mips16.call.fp.df_df_df,"ax",@progbits
-; stel: .ent __call_stub_fp_df_df_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: mtc1 $6,$f14
-; stel: mtc1 $7,$f15
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.df_df_df,"ax",@progbits
+; stel: .ent __call_stub_fp_df_df_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: mtc1 $6, $f14
+; stel: mtc1 $7, $f15
+; stel: move $18, ${{31|ra}}
; stel: jal df_df_df
-; stel: mfc1 $2,$f0
-; stel: mfc1 $3,$f1
-; stel: jr $18
-; stel: .end __call_stub_fp_df_df_df
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_df_df
declare { float, float } @sc_v() #1
-; stel: .section .mips16.call.fp.sc_v,"ax",@progbits
-; stel: .ent __call_stub_fp_sc_v
+; stel: .section .mips16.call.fp.sc_v,"ax",@progbits
+; stel: .ent __call_stub_fp_sc_v
; stel: move $18, $31
; stel: jal sc_v
-; stel: mfc1 $2,$f0
-; stel: mfc1 $3,$f2
-; stel: jr $18
-; stel: .end __call_stub_fp_sc_v
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f2
+; stel: jr $18
+; stel: .end __call_stub_fp_sc_v
declare { float, float } @sc_sf(float) #1
-; stel: .section .mips16.call.fp.sc_sf,"ax",@progbits
-; stel: .ent __call_stub_fp_sc_sf
-; stel: mtc1 $4,$f12
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sc_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_sc_sf
+; stel: mtc1 $4, $f12
+; stel: move $18, ${{31|ra}}
; stel: jal sc_sf
-; stel: mfc1 $2,$f0
-; stel: mfc1 $3,$f2
-; stel: jr $18
-; stel: .end __call_stub_fp_sc_sf
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f2
+; stel: jr $18
+; stel: .end __call_stub_fp_sc_sf
declare { double, double } @dc_v() #1
-; stel: .section .mips16.call.fp.dc_v,"ax",@progbits
-; stel: .ent __call_stub_fp_dc_v
+; stel: .section .mips16.call.fp.dc_v,"ax",@progbits
+; stel: .ent __call_stub_fp_dc_v
; stel: move $18, $31
; stel: jal dc_v
-; stel: mfc1 $4,$f2
-; stel: mfc1 $5,$f3
-; stel: mfc1 $2,$f0
-; stel: mfc1 $3,$f1
-; stel: jr $18
-; stel: .end __call_stub_fp_dc_v
+; stel: mfc1 $4, $f2
+; stel: mfc1 $5, $f3
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_dc_v
declare { double, double } @dc_sf(float) #1
-; stel: .section .mips16.call.fp.dc_sf,"ax",@progbits
-; stel: .ent __call_stub_fp_dc_sf
-; stel: mtc1 $4,$f12
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.dc_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_dc_sf
+; stel: mtc1 $4, $f12
+; stel: move $18, ${{31|ra}}
; stel: jal dc_sf
-; stel: mfc1 $4,$f2
-; stel: mfc1 $5,$f3
-; stel: mfc1 $2,$f0
-; stel: mfc1 $3,$f1
-; stel: jr $18
-; stel: .end __call_stub_fp_dc_sf
+; stel: mfc1 $4, $f2
+; stel: mfc1 $5, $f3
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_dc_sf
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/hf16call32_body.ll b/test/CodeGen/Mips/hf16call32_body.ll
index d06256cc564f..1a04fd46f8bd 100644
--- a/test/CodeGen/Mips/hf16call32_body.ll
+++ b/test/CodeGen/Mips/hf16call32_body.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=stel
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=stel
@x = external global float
@xd = external global double
@@ -18,13 +18,13 @@ entry:
store float %0, float* @x, align 4
ret void
}
-; stel: .section .mips16.fn.v_sf,"ax",@progbits
-; stel: .ent __fn_stub_v_sf
-; stel: la $25,v_sf
-; stel: mfc1 $4,$f12
-; stel: jr $25
-; stel: __fn_local_v_sf = v_sf
-; stel: .end __fn_stub_v_sf
+; stel: .section .mips16.fn.v_sf,"ax",@progbits
+; stel: .ent __fn_stub_v_sf
+; stel: la $25, v_sf
+; stel: mfc1 $4, $f12
+; stel: jr $25
+; stel: __fn_local_v_sf = v_sf
+; stel: .end __fn_stub_v_sf
declare i32 @printf(i8*, ...) #1
@@ -38,14 +38,14 @@ entry:
ret void
}
-; stel: .section .mips16.fn.v_df,"ax",@progbits
-; stel: .ent __fn_stub_v_df
-; stel: la $25,v_df
-; stel: mfc1 $4,$f12
-; stel: mfc1 $5,$f13
-; stel: jr $25
-; stel: __fn_local_v_df = v_df
-; stel: .end __fn_stub_v_df
+; stel: .section .mips16.fn.v_df,"ax",@progbits
+; stel: .ent __fn_stub_v_df
+; stel: la $25, v_df
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f13
+; stel: jr $25
+; stel: __fn_local_v_df = v_df
+; stel: .end __fn_stub_v_df
; Function Attrs: nounwind
define void @v_sf_sf(float %p1, float %p2) #0 {
@@ -61,14 +61,14 @@ entry:
ret void
}
-; stel: .section .mips16.fn.v_sf_sf,"ax",@progbits
-; stel: .ent __fn_stub_v_sf_sf
-; stel: la $25,v_sf_sf
-; stel: mfc1 $4,$f12
-; stel: mfc1 $5,$f14
-; stel: jr $25
-; stel: __fn_local_v_sf_sf = v_sf_sf
-; stel: .end __fn_stub_v_sf_sf
+; stel: .section .mips16.fn.v_sf_sf,"ax",@progbits
+; stel: .ent __fn_stub_v_sf_sf
+; stel: la $25, v_sf_sf
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f14
+; stel: jr $25
+; stel: __fn_local_v_sf_sf = v_sf_sf
+; stel: .end __fn_stub_v_sf_sf
; Function Attrs: nounwind
define void @v_sf_df(float %p1, double %p2) #0 {
@@ -84,15 +84,15 @@ entry:
ret void
}
-; stel: .section .mips16.fn.v_sf_df,"ax",@progbits
-; stel: .ent __fn_stub_v_sf_df
-; stel: la $25,v_sf_df
-; stel: mfc1 $4,$f12
-; stel: mfc1 $6,$f14
-; stel: mfc1 $7,$f15
-; stel: jr $25
-; stel: __fn_local_v_sf_df = v_sf_df
-; stel: .end __fn_stub_v_sf_df
+; stel: .section .mips16.fn.v_sf_df,"ax",@progbits
+; stel: .ent __fn_stub_v_sf_df
+; stel: la $25, v_sf_df
+; stel: mfc1 $4, $f12
+; stel: mfc1 $6, $f14
+; stel: mfc1 $7, $f15
+; stel: jr $25
+; stel: __fn_local_v_sf_df = v_sf_df
+; stel: .end __fn_stub_v_sf_df
; Function Attrs: nounwind
define void @v_df_sf(double %p1, float %p2) #0 {
@@ -108,15 +108,15 @@ entry:
ret void
}
-; stel: .section .mips16.fn.v_df_sf,"ax",@progbits
-; stel: .ent __fn_stub_v_df_sf
-; stel: la $25,v_df_sf
-; stel: mfc1 $4,$f12
-; stel: mfc1 $5,$f13
-; stel: mfc1 $6,$f14
-; stel: jr $25
-; stel: __fn_local_v_df_sf = v_df_sf
-; stel: .end __fn_stub_v_df_sf
+; stel: .section .mips16.fn.v_df_sf,"ax",@progbits
+; stel: .ent __fn_stub_v_df_sf
+; stel: la $25, v_df_sf
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f13
+; stel: mfc1 $6, $f14
+; stel: jr $25
+; stel: __fn_local_v_df_sf = v_df_sf
+; stel: .end __fn_stub_v_df_sf
; Function Attrs: nounwind
define void @v_df_df(double %p1, double %p2) #0 {
@@ -132,16 +132,16 @@ entry:
ret void
}
-; stel: .section .mips16.fn.v_df_df,"ax",@progbits
-; stel: .ent __fn_stub_v_df_df
-; stel: la $25,v_df_df
-; stel: mfc1 $4,$f12
-; stel: mfc1 $5,$f13
-; stel: mfc1 $6,$f14
-; stel: mfc1 $7,$f15
-; stel: jr $25
-; stel: __fn_local_v_df_df = v_df_df
-; stel: .end __fn_stub_v_df_df
+; stel: .section .mips16.fn.v_df_df,"ax",@progbits
+; stel: .ent __fn_stub_v_df_df
+; stel: la $25, v_df_df
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f13
+; stel: mfc1 $6, $f14
+; stel: mfc1 $7, $f15
+; stel: jr $25
+; stel: __fn_local_v_df_df = v_df_df
+; stel: .end __fn_stub_v_df_df
; Function Attrs: nounwind
define float @sf_v() #0 {
@@ -162,13 +162,13 @@ entry:
}
-; stel: .section .mips16.fn.sf_sf,"ax",@progbits
-; stel: .ent __fn_stub_sf_sf
-; stel: la $25,sf_sf
-; stel: mfc1 $4,$f12
-; stel: jr $25
-; stel: __fn_local_sf_sf = sf_sf
-; stel: .end __fn_stub_sf_sf
+; stel: .section .mips16.fn.sf_sf,"ax",@progbits
+; stel: .ent __fn_stub_sf_sf
+; stel: la $25, sf_sf
+; stel: mfc1 $4, $f12
+; stel: jr $25
+; stel: __fn_local_sf_sf = sf_sf
+; stel: .end __fn_stub_sf_sf
; Function Attrs: nounwind
@@ -182,14 +182,14 @@ entry:
ret float %1
}
-; stel: .section .mips16.fn.sf_df,"ax",@progbits
-; stel: .ent __fn_stub_sf_df
-; stel: la $25,sf_df
-; stel: mfc1 $4,$f12
-; stel: mfc1 $5,$f13
-; stel: jr $25
-; stel: __fn_local_sf_df = sf_df
-; stel: .end __fn_stub_sf_df
+; stel: .section .mips16.fn.sf_df,"ax",@progbits
+; stel: .ent __fn_stub_sf_df
+; stel: la $25, sf_df
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f13
+; stel: jr $25
+; stel: __fn_local_sf_df = sf_df
+; stel: .end __fn_stub_sf_df
; Function Attrs: nounwind
define float @sf_sf_sf(float %p1, float %p2) #0 {
@@ -206,14 +206,14 @@ entry:
ret float %2
}
-; stel: .section .mips16.fn.sf_sf_sf,"ax",@progbits
-; stel: .ent __fn_stub_sf_sf_sf
-; stel: la $25,sf_sf_sf
-; stel: mfc1 $4,$f12
-; stel: mfc1 $5,$f14
-; stel: jr $25
-; stel: __fn_local_sf_sf_sf = sf_sf_sf
-; stel: .end __fn_stub_sf_sf_sf
+; stel: .section .mips16.fn.sf_sf_sf,"ax",@progbits
+; stel: .ent __fn_stub_sf_sf_sf
+; stel: la $25, sf_sf_sf
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f14
+; stel: jr $25
+; stel: __fn_local_sf_sf_sf = sf_sf_sf
+; stel: .end __fn_stub_sf_sf_sf
; Function Attrs: nounwind
define float @sf_sf_df(float %p1, double %p2) #0 {
@@ -230,15 +230,15 @@ entry:
ret float %2
}
-; stel: .section .mips16.fn.sf_sf_df,"ax",@progbits
-; stel: .ent __fn_stub_sf_sf_df
-; stel: la $25,sf_sf_df
-; stel: mfc1 $4,$f12
-; stel: mfc1 $6,$f14
-; stel: mfc1 $7,$f15
-; stel: jr $25
-; stel: __fn_local_sf_sf_df = sf_sf_df
-; stel: .end __fn_stub_sf_sf_df
+; stel: .section .mips16.fn.sf_sf_df,"ax",@progbits
+; stel: .ent __fn_stub_sf_sf_df
+; stel: la $25, sf_sf_df
+; stel: mfc1 $4, $f12
+; stel: mfc1 $6, $f14
+; stel: mfc1 $7, $f15
+; stel: jr $25
+; stel: __fn_local_sf_sf_df = sf_sf_df
+; stel: .end __fn_stub_sf_sf_df
; Function Attrs: nounwind
define float @sf_df_sf(double %p1, float %p2) #0 {
@@ -255,15 +255,15 @@ entry:
ret float %2
}
-; stel: .section .mips16.fn.sf_df_sf,"ax",@progbits
-; stel: .ent __fn_stub_sf_df_sf
-; stel: la $25,sf_df_sf
-; stel: mfc1 $4,$f12
-; stel: mfc1 $5,$f13
-; stel: mfc1 $6,$f14
-; stel: jr $25
-; stel: __fn_local_sf_df_sf = sf_df_sf
-; stel: .end __fn_stub_sf_df_sf
+; stel: .section .mips16.fn.sf_df_sf,"ax",@progbits
+; stel: .ent __fn_stub_sf_df_sf
+; stel: la $25, sf_df_sf
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f13
+; stel: mfc1 $6, $f14
+; stel: jr $25
+; stel: __fn_local_sf_df_sf = sf_df_sf
+; stel: .end __fn_stub_sf_df_sf
; Function Attrs: nounwind
define float @sf_df_df(double %p1, double %p2) #0 {
@@ -280,15 +280,15 @@ entry:
ret float %2
}
-; stel: .section .mips16.fn.sf_df_df,"ax",@progbits
-; stel: .ent __fn_stub_sf_df_df
-; stel: la $25,sf_df_df
-; stel: mfc1 $4,$f12
-; stel: mfc1 $5,$f13
-; stel: mfc1 $6,$f14
-; stel: mfc1 $7,$f15
-; stel: jr $25
-; stel: __fn_local_sf_df_df = sf_df_df
-; stel: .end __fn_stub_sf_df_df
+; stel: .section .mips16.fn.sf_df_df,"ax",@progbits
+; stel: .ent __fn_stub_sf_df_df
+; stel: la $25, sf_df_df
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f13
+; stel: mfc1 $6, $f14
+; stel: mfc1 $7, $f15
+; stel: jr $25
+; stel: __fn_local_sf_df_df = sf_df_df
+; stel: .end __fn_stub_sf_df_df
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/hf1_body.ll b/test/CodeGen/Mips/hf1_body.ll
index 71a1b960c5b3..adf45109d69a 100644
--- a/test/CodeGen/Mips/hf1_body.ll
+++ b/test/CodeGen/Mips/hf1_body.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=picfp16
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=picfp16
@x = external global float
@@ -11,11 +11,11 @@ entry:
store float %0, float* @x, align 4
ret void
}
-; picfp16: .ent __fn_stub_v_sf
-; picfp16: .cpload $25
-; picfp16: .set reorder
-; picfp16: .reloc 0,R_MIPS_NONE,v_sf
-; picfp16: la $25,$__fn_local_v_sf
-; picfp16: mfc1 $4,$f12
-; picfp16: jr $25
-; picfp16: .end __fn_stub_v_sf
+; picfp16: .ent __fn_stub_v_sf
+; picfp16: .cpload $25
+; picfp16: .set reorder
+; picfp16: .reloc 0, R_MIPS_NONE, v_sf
+; picfp16: la $25, $__fn_local_v_sf
+; picfp16: mfc1 $4, $f12
+; picfp16: jr $25
+; picfp16: .end __fn_stub_v_sf
diff --git a/test/CodeGen/Mips/hfptrcall.ll b/test/CodeGen/Mips/hfptrcall.ll
index c9f1fe973796..f6ce243c3047 100644
--- a/test/CodeGen/Mips/hfptrcall.ll
+++ b/test/CodeGen/Mips/hfptrcall.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=picel
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=picel
@ptrsv = global float ()* @sv, align 4
@ptrdv = global double ()* @dv, align 4
diff --git a/test/CodeGen/Mips/i32k.ll b/test/CodeGen/Mips/i32k.ll
index ba9cf7342308..57a2e788a301 100644
--- a/test/CodeGen/Mips/i32k.ll
+++ b/test/CodeGen/Mips/i32k.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -mips16-constant-islands=false -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic -mips16-constant-islands=false -O3 < %s | FileCheck %s -check-prefix=16
@.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
diff --git a/test/CodeGen/Mips/inlineasm-assembler-directives.ll b/test/CodeGen/Mips/inlineasm-assembler-directives.ll
index 88ceed4114c2..9f6f1ebb2858 100644
--- a/test/CodeGen/Mips/inlineasm-assembler-directives.ll
+++ b/test/CodeGen/Mips/inlineasm-assembler-directives.ll
@@ -10,14 +10,14 @@ entry:
; CHECK-NEXT: .set macro
; CHECK-NEXT: .set reorder
; CHECK: addi $9, ${{[2-9][0-9]?}}, 8
-; CHECK: subi ${{[2-9][0-9]?}}, $9, 6
+; CHECK: ori ${{[2-9][0-9]?}}, $9, 6
; CHECK: .set pop
; CHECK-NEXT: #NO_APP
%a = alloca i32, align 4
%b = alloca i32, align 4
store i32 20, i32* %a, align 4
%0 = load i32, i32* %a, align 4
- %1 = call i32 asm sideeffect "addi $$9, $1, 8\0A\09subi $0, $$9, 6", "=r,r,~{$1}"(i32 %0)
+ %1 = call i32 asm sideeffect "addi $$9, $1, 8\0A\09ori $0, $$9, 6", "=r,r,~{$1}"(i32 %0)
store i32 %1, i32* %b, align 4
ret void
}
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
index 41991d07a4fe..63ee42c0c7cd 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
@@ -7,38 +7,38 @@ define i32 @main() nounwind {
entry:
; r with char
-;CHECK: #APP
-;CHECK: addiu ${{[0-9]+}},${{[0-9]+}},23
-;CHECK: #NO_APP
- tail call i8 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i8 27, i8 23) nounwind
+;CHECK: #APP
+;CHECK: addiu ${{[0-9]+}}, ${{[0-9]+}}, 23
+;CHECK: #NO_APP
+ tail call i8 asm sideeffect "addiu $0, $1, $2", "=r,r,n"(i8 27, i8 23) nounwind
; r with short
-;CHECK: #APP
-;CHECK: addiu ${{[0-9]+}},${{[0-9]+}},13
-;CHECK: #NO_APP
- tail call i16 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i16 17, i16 13) nounwind
+;CHECK: #APP
+;CHECK: addiu ${{[0-9]+}}, ${{[0-9]+}}, 13
+;CHECK: #NO_APP
+ tail call i16 asm sideeffect "addiu $0, $1, $2", "=r,r,n"(i16 17, i16 13) nounwind
; r with int
-;CHECK: #APP
-;CHECK: addiu ${{[0-9]+}},${{[0-9]+}},3
-;CHECK: #NO_APP
- tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i32 7, i32 3) nounwind
+;CHECK: #APP
+;CHECK: addiu ${{[0-9]+}}, ${{[0-9]+}}, 3
+;CHECK: #NO_APP
+ tail call i32 asm sideeffect "addiu $0, $1, $2", "=r,r,n"(i32 7, i32 3) nounwind
; Now c with 1024: make sure register $25 is picked
; CHECK: #APP
-; CHECK: addiu $25,${{[0-9]+}},1024
-; CHECK: #NO_APP
- tail call i32 asm sideeffect "addiu $0,$1,$2", "=c,c,I"(i32 4194304, i32 1024) nounwind
+; CHECK: addiu $25, ${{[0-9]+}}, 1024
+; CHECK: #NO_APP
+ tail call i32 asm sideeffect "addiu $0, $1, $2", "=c,c,I"(i32 4194304, i32 1024) nounwind
; Now l with 1024: make sure register lo is picked. We do this by checking the instruction
; after the inline expression for a mflo to pull the value out of lo.
; CHECK: #APP
; CHECK: mtlo ${{[0-9]+}}
-; CHECK-NEXT: madd ${{[0-9]+}},${{[0-9]+}}
+; CHECK-NEXT: madd ${{[0-9]+}}, ${{[0-9]+}}
; CHECK: #NO_APP
-; CHECK-NEXT: mflo ${{[0-9]+}}
+; CHECK-NEXT: mflo ${{[0-9]+}}
%bosco = alloca i32, align 4
- call i32 asm sideeffect "\09mtlo $3 \0A\09\09madd $1,$2 ", "=l,r,r,r"(i32 7, i32 6, i32 44) nounwind
+ call i32 asm sideeffect "\09mtlo $3 \0A\09\09madd $1, $2 ", "=l,r,r,r"(i32 7, i32 6, i32 44) nounwind
store volatile i32 %4, i32* %bosco, align 4
ret i32 0
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
index acce63203812..3b078d6f70db 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
@@ -12,9 +12,9 @@ entry:
; r with long long
;CHECK: #APP
-;CHECK: addiu ${{[0-9]+}},${{[0-9]+}},3
+;CHECK: addiu ${{[0-9]+}}, ${{[0-9]+}}, 3
;CHECK: #NO_APP
- tail call i64 asm sideeffect "addiu $0,$1,$2", "=r,r,i"(i64 7, i64 3) nounwind
+ tail call i64 asm sideeffect "addiu $0, $1, $2", "=r,r,i"(i64 7, i64 3) nounwind
ret i32 0
}
diff --git a/test/CodeGen/Mips/inlineasm-operand-code.ll b/test/CodeGen/Mips/inlineasm-operand-code.ll
index b9415ee90cdb..6d41385d18de 100644
--- a/test/CodeGen/Mips/inlineasm-operand-code.ll
+++ b/test/CodeGen/Mips/inlineasm-operand-code.ll
@@ -1,7 +1,9 @@
; Positive test for inline register constraints
;
-; RUN: llc -march=mipsel < %s | FileCheck -check-prefix=CHECK_LITTLE_32 %s
-; RUN: llc -march=mips < %s | FileCheck -check-prefix=CHECK_BIG_32 %s
+; RUN: llc -no-integrated-as -march=mipsel < %s | \
+; RUN: FileCheck -check-prefix=ALL -check-prefix=LE32 -check-prefix=GAS %s
+; RUN: llc -no-integrated-as -march=mips < %s | \
+; RUN: FileCheck -check-prefix=ALL -check-prefix=BE32 -check-prefix=GAS %s
%union.u_tag = type { i64 }
%struct.anon = type { i32, i32 }
@@ -10,171 +12,152 @@
; X with -3
define i32 @constraint_X() nounwind {
entry:
-;CHECK_LITTLE_32-LABEL: constraint_X:
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},0xfffffffffffffffd
-;CHECK_LITTLE_32: #NO_APP
- tail call i32 asm sideeffect "addiu $0,$1,${2:X}", "=r,r,I"(i32 7, i32 -3) ;
+; ALL-LABEL: constraint_X:
+; ALL: #APP
+; GAS: addiu ${{[0-9]+}}, ${{[0-9]+}}, 0xfffffffffffffffd
+; ALL: #NO_APP
+ tail call i32 asm sideeffect "addiu $0, $1, ${2:X}", "=r,r,I"(i32 7, i32 -3) ;
ret i32 0
}
; x with -3
define i32 @constraint_x() nounwind {
entry:
-;CHECK_LITTLE_32-LABEL: constraint_x:
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},0xfffd
-;CHECK_LITTLE_32: #NO_APP
- tail call i32 asm sideeffect "addiu $0,$1,${2:x}", "=r,r,I"(i32 7, i32 -3) ;
+; ALL-LABEL: constraint_x:
+; ALL: #APP
+; GAS: addiu ${{[0-9]+}}, ${{[0-9]+}}, 0xfffd
+; ALL: #NO_APP
+ tail call i32 asm sideeffect "addiu $0, $1, ${2:x}", "=r,r,I"(i32 7, i32 -3) ;
ret i32 0
}
; d with -3
define i32 @constraint_d() nounwind {
entry:
-;CHECK_LITTLE_32-LABEL: constraint_d:
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},-3
-;CHECK_LITTLE_32: #NO_APP
- tail call i32 asm sideeffect "addiu $0,$1,${2:d}", "=r,r,I"(i32 7, i32 -3) ;
+; ALL-LABEL: constraint_d:
+; ALL: #APP
+; ALL: addiu ${{[0-9]+}}, ${{[0-9]+}}, -3
+; ALL: #NO_APP
+ tail call i32 asm sideeffect "addiu $0, $1, ${2:d}", "=r,r,I"(i32 7, i32 -3) ;
ret i32 0
}
; m with -3
define i32 @constraint_m() nounwind {
entry:
-;CHECK_LITTLE_32-LABEL: constraint_m:
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},-4
-;CHECK_LITTLE_32: #NO_APP
- tail call i32 asm sideeffect "addiu $0,$1,${2:m}", "=r,r,I"(i32 7, i32 -3) ;
+; ALL-LABEL: constraint_m:
+; ALL: #APP
+; ALL: addiu ${{[0-9]+}}, ${{[0-9]+}}, -4
+; ALL: #NO_APP
+ tail call i32 asm sideeffect "addiu $0, $1, ${2:m}", "=r,r,I"(i32 7, i32 -3) ;
ret i32 0
}
; z with -3
define i32 @constraint_z() nounwind {
entry:
-;CHECK_LITTLE_32-LABEL: constraint_z:
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},-3
-;CHECK_LITTLE_32: #NO_APP
- tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 -3) ;
+; ALL-LABEL: constraint_z:
+; ALL: #APP
+; ALL: addiu ${{[0-9]+}}, ${{[0-9]+}}, -3
+; ALL: #NO_APP
+ tail call i32 asm sideeffect "addiu $0, $1, ${2:z}", "=r,r,I"(i32 7, i32 -3) ;
; z with 0
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},$0
-;CHECK_LITTLE_32: #NO_APP
- tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind
+; ALL: #APP
+; GAS: addiu ${{[0-9]+}}, ${{[0-9]+}}, $0
+; ALL: #NO_APP
+ tail call i32 asm sideeffect "addiu $0, $1, ${2:z}", "=r,r,I"(i32 7, i32 0) nounwind
; z with non-zero and the "r"(register) and "J"(integer zero) constraints
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
-;CHECK_LITTLE_32: #NO_APP
+; ALL: #APP
+; ALL: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+; ALL: #NO_APP
call void asm sideeffect "mtc0 ${0:z}, $$12", "Jr"(i32 7) nounwind
; z with zero and the "r"(register) and "J"(integer zero) constraints
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: mtc0 $0, ${{[0-9]+}}
-;CHECK_LITTLE_32: #NO_APP
+; ALL: #APP
+; ALL: mtc0 $0, ${{[0-9]+}}
+; ALL: #NO_APP
call void asm sideeffect "mtc0 ${0:z}, $$12", "Jr"(i32 0) nounwind
; z with non-zero and just the "r"(register) constraint
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
-;CHECK_LITTLE_32: #NO_APP
+; ALL: #APP
+; ALL: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+; ALL: #NO_APP
call void asm sideeffect "mtc0 ${0:z}, $$12", "r"(i32 7) nounwind
; z with zero and just the "r"(register) constraint
; FIXME: Check for $0, instead of other registers.
; We should be using $0 directly in this case, not real registers.
; When the materialization of 0 gets fixed, this test will fail.
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
-;CHECK_LITTLE_32: #NO_APP
+; ALL: #APP
+; ALL: mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+; ALL: #NO_APP
call void asm sideeffect "mtc0 ${0:z}, $$12", "r"(i32 0) nounwind
ret i32 0
}
-; a long long in 32 bit mode (use to assert)
+; A long long in 32 bit mode (use to assert)
define i32 @constraint_longlong() nounwind {
entry:
-;CHECK_LITTLE_32-LABEL: constraint_longlong:
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},3
-;CHECK_LITTLE_32: #NO_APP
- tail call i64 asm sideeffect "addiu $0,$1,$2 \0A\09", "=r,r,X"(i64 1229801703532086340, i64 3) nounwind
+; ALL-LABEL: constraint_longlong:
+; ALL: #APP
+; ALL: addiu ${{[0-9]+}}, ${{[0-9]+}}, 3
+; ALL: #NO_APP
+ tail call i64 asm sideeffect "addiu $0, $1, $2 \0A\09", "=r,r,X"(i64 1229801703532086340, i64 3) nounwind
ret i32 0
}
-; D, in little endian the source reg will be 4 bytes into the long long
+; In little endian the source reg will be 4 bytes into the long long
+; In big endian the source reg will also be 4 bytes into the long long
define i32 @constraint_D() nounwind {
entry:
-;CHECK_LITTLE_32-LABEL: constraint_D:
-;CHECK_LITTLE_32: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
-;CHECK_LITTLE_32: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
-;CHECK_LITTLE_32: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
-;CHECK_LITTLE_32: #NO_APP
-
-; D, in big endian the source reg will also be 4 bytes into the long long
-;CHECK_BIG_32-LABEL: constraint_D:
-;CHECK_BIG_32: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
-;CHECK_BIG_32: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
-;CHECK_BIG_32: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
-;CHECK_BIG_32: #APP
-;CHECK_BIG_32: or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
-;CHECK_BIG_32: #NO_APP
+; ALL-LABEL: constraint_D:
+; ALL: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+; ALL: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+; ALL: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+; ALL: #APP
+; LE32: or ${{[0-9]+}}, $[[SECOND]], ${{[0-9]+}}
+; BE32: or ${{[0-9]+}}, $[[SECOND]], ${{[0-9]+}}
+; ALL: #NO_APP
%bosco = load i64, i64* getelementptr inbounds (%union.u_tag, %union.u_tag* @uval, i32 0, i32 0), align 8
%trunc1 = trunc i64 %bosco to i32
- tail call i32 asm sideeffect "or $0,${1:D},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
+ tail call i32 asm sideeffect "or $0, ${1:D}, $2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
ret i32 0
}
-; L, in little endian the source reg will be 0 bytes into the long long
+; In little endian the source reg will be 0 bytes into the long long
+; In big endian the source reg will be 4 bytes into the long long
define i32 @constraint_L() nounwind {
entry:
-;CHECK_LITTLE_32-LABEL: constraint_L:
-;CHECK_LITTLE_32: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
-;CHECK_LITTLE_32: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
-;CHECK_LITTLE_32: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: or ${{[0-9]+}},$[[FIRST]],${{[0-9]+}}
-;CHECK_LITTLE_32: #NO_APP
-; L, in big endian the source reg will be 4 bytes into the long long
-;CHECK_BIG_32-LABEL: constraint_L:
-;CHECK_BIG_32: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
-;CHECK_BIG_32: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
-;CHECK_BIG_32: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
-;CHECK_BIG_32: #APP
-;CHECK_BIG_32: or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
-;CHECK_BIG_32: #NO_APP
+; ALL-LABEL: constraint_L:
+; ALL: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+; ALL: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+; ALL: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+; ALL: #APP
+; LE32: or ${{[0-9]+}}, $[[FIRST]], ${{[0-9]+}}
+; BE32: or ${{[0-9]+}}, $[[SECOND]], ${{[0-9]+}}
+; ALL: #NO_APP
%bosco = load i64, i64* getelementptr inbounds (%union.u_tag, %union.u_tag* @uval, i32 0, i32 0), align 8
%trunc1 = trunc i64 %bosco to i32
- tail call i32 asm sideeffect "or $0,${1:L},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
+ tail call i32 asm sideeffect "or $0, ${1:L}, $2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
ret i32 0
}
-; M, in little endian the source reg will be 4 bytes into the long long
+; In little endian the source reg will be 4 bytes into the long long
+; In big endian the source reg will be 0 bytes into the long long
define i32 @constraint_M() nounwind {
entry:
-;CHECK_LITTLE_32-LABEL: constraint_M:
-;CHECK_LITTLE_32: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
-;CHECK_LITTLE_32: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
-;CHECK_LITTLE_32: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
-;CHECK_LITTLE_32: #NO_APP
-; M, in big endian the source reg will be 0 bytes into the long long
-;CHECK_BIG_32-LABEL: constraint_M:
-;CHECK_BIG_32: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
-;CHECK_BIG_32: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
-;CHECK_BIG_32: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
-;CHECK_BIG_32: #APP
-;CHECK_BIG_32: or ${{[0-9]+}},$[[FIRST]],${{[0-9]+}}
-;CHECK_BIG_32: #NO_APP
+; ALL-LABEL: constraint_M:
+; ALL: lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+; ALL: lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+; ALL: lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+; ALL: #APP
+; LE32: or ${{[0-9]+}}, $[[SECOND]], ${{[0-9]+}}
+; BE32: or ${{[0-9]+}}, $[[FIRST]], ${{[0-9]+}}
+; ALL: #NO_APP
%bosco = load i64, i64* getelementptr inbounds (%union.u_tag, %union.u_tag* @uval, i32 0, i32 0), align 8
%trunc1 = trunc i64 %bosco to i32
- tail call i32 asm sideeffect "or $0,${1:M},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
+ tail call i32 asm sideeffect "or $0, ${1:M}, $2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
ret i32 0
}
diff --git a/test/CodeGen/Mips/inlineasm_constraint.ll b/test/CodeGen/Mips/inlineasm_constraint.ll
index 868433e0941f..a6ac07182ff5 100644
--- a/test/CodeGen/Mips/inlineasm_constraint.ll
+++ b/test/CodeGen/Mips/inlineasm_constraint.ll
@@ -1,55 +1,73 @@
-; RUN: llc -march=mipsel < %s | FileCheck %s
-
-define i32 @main() nounwind {
-entry:
+; RUN: llc -no-integrated-as -march=mipsel < %s | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=GAS
+define void @constraint_I() nounwind {
; First I with short
-; CHECK: #APP
-; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},4096
-; CHECK: #NO_APP
- tail call i16 asm sideeffect "addiu $0,$1,$2", "=r,r,I"(i16 7, i16 4096) nounwind
+; ALL-LABEL: constraint_I:
+; ALL: #APP
+; ALL: addiu ${{[0-9]+}}, ${{[0-9]+}}, 4096
+; ALL: #NO_APP
+ tail call i16 asm sideeffect "addiu $0, $1, $2", "=r,r,I"(i16 7, i16 4096) nounwind
; Then I with int
-; CHECK: #APP
-; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3
-; CHECK: #NO_APP
- tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,I"(i32 7, i32 -3) nounwind
+; ALL: #APP
+; ALL: addiu ${{[0-9]+}}, ${{[0-9]+}}, -3
+; ALL: #NO_APP
+ tail call i32 asm sideeffect "addiu $0, $1, $2", "=r,r,I"(i32 7, i32 -3) nounwind
+ ret void
+}
+define void @constraint_J() nounwind {
; Now J with 0
-; CHECK: #APP
-; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},0
-; CHECK: #NO_APP
- tail call i32 asm sideeffect "addiu $0,$1,$2\0A\09 ", "=r,r,J"(i32 7, i16 0) nounwind
+; ALL-LABEL: constraint_J:
+; ALL: #APP
+; ALL: addiu ${{[0-9]+}}, ${{[0-9]+}}, 0
+; ALL: #NO_APP
+ tail call i32 asm sideeffect "addiu $0, $1, $2\0A\09 ", "=r,r,J"(i32 7, i16 0) nounwind
+ ret void
+}
+define void @constraint_K() nounwind {
; Now K with 64
-; CHECK: #APP
-; CHECK: addu ${{[0-9]+}},${{[0-9]+}},64
-; CHECK: #NO_APP
- tail call i16 asm sideeffect "addu $0,$1,$2\0A\09 ", "=r,r,K"(i16 7, i16 64) nounwind
+; ALL: #APP
+; GAS: addu ${{[0-9]+}}, ${{[0-9]+}}, 64
+; ALL: #NO_APP
+ tail call i16 asm sideeffect "addu $0, $1, $2\0A\09 ", "=r,r,K"(i16 7, i16 64) nounwind
+ ret void
+}
+define void @constraint_L() nounwind {
; Now L with 0x00100000
-; CHECK: #APP
-; CHECK: add ${{[0-9]+}},${{[0-9]+}},${{[0-9]+}}
-; CHECK: #NO_APP
- tail call i32 asm sideeffect "add $0,$1,$3\0A\09", "=r,r,L,r"(i32 7, i32 1048576, i32 0) nounwind
+; ALL: #APP
+; ALL: add ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; ALL: #NO_APP
+ tail call i32 asm sideeffect "add $0, $1, $3\0A\09", "=r,r,L,r"(i32 7, i32 1048576, i32 0) nounwind
+ ret void
+}
+define void @constraint_N() nounwind {
; Now N with -3
-; CHECK: #APP
-; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3
-; CHECK: #NO_APP
- tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,N"(i32 7, i32 -3) nounwind
+; ALL: #APP
+; ALL: addiu ${{[0-9]+}}, ${{[0-9]+}}, -3
+; ALL: #NO_APP
+ tail call i32 asm sideeffect "addiu $0, $1, $2", "=r,r,N"(i32 7, i32 -3) nounwind
+ ret void
+}
+define void @constraint_O() nounwind {
; Now O with -3
-; CHECK: #APP
-; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3
-; CHECK: #NO_APP
- tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,O"(i32 7, i16 -3) nounwind
+; ALL: #APP
+; ALL: addiu ${{[0-9]+}}, ${{[0-9]+}}, -3
+; ALL: #NO_APP
+ tail call i32 asm sideeffect "addiu $0, $1, $2", "=r,r,O"(i32 7, i16 -3) nounwind
+ ret void
+}
+define void @constraint_P() nounwind {
; Now P with 65535
-; CHECK: #APP
-; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},65535
-; CHECK: #NO_APP
- tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind
-
- ret i32 0
+; ALL: #APP
+; GAS: addiu ${{[0-9]+}}, ${{[0-9]+}}, 65535
+; ALL: #NO_APP
+ tail call i32 asm sideeffect "addiu $0, $1, $2", "=r,r,P"(i32 7, i32 65535) nounwind
+ ret void
}
diff --git a/test/CodeGen/Mips/inlineasmmemop.ll b/test/CodeGen/Mips/inlineasmmemop.ll
index 9e9b6cd089ea..bdf3ae55b802 100644
--- a/test/CodeGen/Mips/inlineasmmemop.ll
+++ b/test/CodeGen/Mips/inlineasmmemop.ll
@@ -26,13 +26,13 @@ entry:
; "D": Second word of a double word. This works for any memory element
; double or single.
; CHECK: #APP
-; CHECK: lw ${{[0-9]+}}, 16(${{[0-9]+}});
+; CHECK: lw ${{[0-9]+}}, 16(${{[0-9]+}})
; CHECK: #NO_APP
; No "D": First word of a double word. This works for any memory element
; double or single.
; CHECK: #APP
-; CHECK: lw ${{[0-9]+}}, 12(${{[0-9]+}});
+; CHECK: lw ${{[0-9]+}}, 12(${{[0-9]+}})
; CHECK: #NO_APP
@b = common global [20 x i32] zeroinitializer, align 4
@@ -40,8 +40,8 @@ entry:
define void @main() {
entry:
; Second word:
- tail call void asm sideeffect " lw $0, ${1:D};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3))
+ tail call void asm sideeffect " lw $0, ${1:D}", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3))
; First word. Notice, no 'D':
- tail call void asm sideeffect " lw $0, ${1};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3))
+ tail call void asm sideeffect " lw $0, ${1}", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3))
ret void
}
diff --git a/test/CodeGen/Mips/insn-zero-size-bb.ll b/test/CodeGen/Mips/insn-zero-size-bb.ll
index ea61c994ae1d..d2124c407a0d 100644
--- a/test/CodeGen/Mips/insn-zero-size-bb.ll
+++ b/test/CodeGen/Mips/insn-zero-size-bb.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s
; RUN: llc < %s -march=mips -mcpu=mips32r3 -mattr=+micromips | FileCheck %s
-; RUN: llc < %s -march=mips -mcpu=mips16 | FileCheck %s
+; RUN: llc < %s -march=mips -mattr=mips16 | FileCheck %s
; Verify that we emit the .insn directive for zero-sized (empty) basic blocks.
; This only really matters for microMIPS and MIPS16.
@@ -14,7 +14,7 @@ entry:
to label %unreachable unwind label %return
unreachable:
-; CHECK: ${{.*}}: # %unreachable
+; CHECK: {{.*}}: # %unreachable
; CHECK-NEXT: .insn
unreachable
diff --git a/test/CodeGen/Mips/interrupt-attr-64-error.ll b/test/CodeGen/Mips/interrupt-attr-64-error.ll
new file mode 100644
index 000000000000..830c199d91d9
--- /dev/null
+++ b/test/CodeGen/Mips/interrupt-attr-64-error.ll
@@ -0,0 +1,9 @@
+; RUN: not llc -mcpu=mips64r6 -march=mipsel -relocation-model=static < %s 2>%t
+; RUN: FileCheck %s < %t
+
+; CHECK: LLVM ERROR: "interrupt" attribute is only supported for the O32 ABI on MIPS32R2+ at the present time.
+define i32 @isr_sw0() #0 {
+ ret i32 0
+}
+
+attributes #0 = { "interrupt"="sw0" }
diff --git a/test/CodeGen/Mips/interrupt-attr-args-error.ll b/test/CodeGen/Mips/interrupt-attr-args-error.ll
new file mode 100644
index 000000000000..993629bdbcd6
--- /dev/null
+++ b/test/CodeGen/Mips/interrupt-attr-args-error.ll
@@ -0,0 +1,9 @@
+; RUN: not llc -mcpu=mips32r2 -march=mipsel -relocation-model=static < %s 2> %t
+; RUN: FileCheck %s < %t
+
+; CHECK: LLVM ERROR: Functions with the interrupt attribute cannot have arguments!
+define i32 @isr_sw0(i8 signext %n) #0 {
+ ret i32 0
+}
+
+attributes #0 = { "interrupt"="sw0" }
diff --git a/test/CodeGen/Mips/interrupt-attr-error.ll b/test/CodeGen/Mips/interrupt-attr-error.ll
new file mode 100644
index 000000000000..f35e98ea14bf
--- /dev/null
+++ b/test/CodeGen/Mips/interrupt-attr-error.ll
@@ -0,0 +1,9 @@
+; RUN: not llc -mcpu=mips32 -march=mipsel -relocation-model=static < %s 2>%t
+; RUN: FileCheck %s < %t
+
+; CHECK: LLVM ERROR: "interrupt" attribute is not supported on pre-MIPS32R2 or MIPS16 targets.
+define i32 @isr_sw0() #0 {
+ ret i32 0
+}
+
+attributes #0 = { "interrupt"="sw0" }
diff --git a/test/CodeGen/Mips/interrupt-attr.ll b/test/CodeGen/Mips/interrupt-attr.ll
new file mode 100644
index 000000000000..6cfb0c3ebd54
--- /dev/null
+++ b/test/CodeGen/Mips/interrupt-attr.ll
@@ -0,0 +1,244 @@
+; RUN: llc -mcpu=mips32r2 -march=mipsel -relocation-model=static -o - %s | FileCheck %s
+
+define void @isr_sw0() #0 {
+; CHECK-LABEL: isr_sw0:
+; CHECK: mfc0 $27, $14, 0
+; CHECK: sw $27, [[R1:[0-9]+]]($sp)
+; CHECK: mfc0 $27, $12, 0
+; CHECK: sw $27, [[R2:[0-9]+]]($sp)
+; CHECK: ins $27, $zero, 8, 1
+; CHECK: ins $27, $zero, 1, 4
+; CHECK: ins $27, $zero, 29, 1
+; CHECK: mtc0 $27, $12, 0
+ ; Must save all registers
+; CHECK: sw $7, {{[0-9]+}}($sp)
+; CHECK: sw $6, {{[0-9]+}}($sp)
+; CHECK: sw $5, {{[0-9]+}}($sp)
+; CHECK: sw $4, {{[0-9]+}}($sp)
+; CHECK: sw $3, {{[0-9]+}}($sp)
+; CHECK: sw $2, {{[0-9]+}}($sp)
+; CHECK: sw $25, {{[0-9]+}}($sp)
+; CHECK: sw $24, {{[0-9]+}}($sp)
+; CHECK: sw $15, {{[0-9]+}}($sp)
+; CHECK: sw $14, {{[0-9]+}}($sp)
+; CHECK: sw $13, {{[0-9]+}}($sp)
+; CHECK: sw $12, {{[0-9]+}}($sp)
+; CHECK: sw $11, {{[0-9]+}}($sp)
+; CHECK: sw $10, {{[0-9]+}}($sp)
+; CHECK: sw $9, {{[0-9]+}}($sp)
+; CHECK: sw $8, {{[0-9]+}}($sp)
+; CHECK: sw $ra, [[R5:[0-9]+]]($sp)
+; CHECK: sw $gp, {{[0-9]+}}($sp)
+; CHECK: sw $1, {{[0-9]+}}($sp)
+; CHECK: mflo $26
+; CHECK: sw $26, [[R3:[0-9]+]]($sp)
+; CHECK: mfhi $26
+; CHECK: sw $26, [[R4:[0-9]+]]($sp)
+ call void bitcast (void (...)* @write to void ()*)()
+; CHECK: lw $26, [[R4:[0-9]+]]($sp)
+; CHECK: mthi $26
+; CHECK: lw $26, [[R3:[0-9]+]]($sp)
+; CHECK: mtlo $26
+; CHECK: lw $1, {{[0-9]+}}($sp)
+; CHECK: lw $gp, {{[0-9]+}}($sp)
+; CHECK: lw $ra, [[R5:[0-9]+]]($sp)
+; CHECK: lw $8, {{[0-9]+}}($sp)
+; CHECK: lw $9, {{[0-9]+}}($sp)
+; CHECK: lw $10, {{[0-9]+}}($sp)
+; CHECK: lw $11, {{[0-9]+}}($sp)
+; CHECK: lw $12, {{[0-9]+}}($sp)
+; CHECK: lw $13, {{[0-9]+}}($sp)
+; CHECK: lw $14, {{[0-9]+}}($sp)
+; CHECK: lw $15, {{[0-9]+}}($sp)
+; CHECK: lw $24, {{[0-9]+}}($sp)
+; CHECK: lw $25, {{[0-9]+}}($sp)
+; CHECK: lw $2, {{[0-9]+}}($sp)
+; CHECK: lw $3, {{[0-9]+}}($sp)
+; CHECK: lw $4, {{[0-9]+}}($sp)
+; CHECK: lw $5, {{[0-9]+}}($sp)
+; CHECK: lw $6, {{[0-9]+}}($sp)
+; CHECK: lw $7, {{[0-9]+}}($sp)
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw $27, [[R2:[0-9]+]]($sp)
+; CHECK: mtc0 $27, $14, 0
+; CHECK: lw $27, [[R1:[0-9]+]]($sp)
+; CHECK: mtc0 $27, $12, 0
+; CHECK: eret
+ ret void
+}
+
+declare void @write(...)
+
+define void @isr_sw1() #2 {
+; CHECK-LABEL: isr_sw1:
+; CHECK: mfc0 $27, $14, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: mfc0 $27, $12, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: ins $27, $zero, 8, 2
+; CHECK: ins $27, $zero, 1, 4
+; CHECK: ins $27, $zero, 29, 1
+; CHECK: mtc0 $27, $12, 0
+ ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $14, 0
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $12, 0
+; CHECK: eret
+ }
+
+define void @isr_hw0() #3 {
+; CHECK-LABEL: isr_hw0:
+; CHECK: mfc0 $27, $14, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: mfc0 $27, $12, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: ins $27, $zero, 8, 3
+; CHECK: ins $27, $zero, 1, 4
+; CHECK: ins $27, $zero, 29, 1
+; CHECK: mtc0 $27, $12, 0
+ ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $14, 0
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $12, 0
+; CHECK: eret
+ }
+
+define void @isr_hw1() #4 {
+; CHECK-LABEL: isr_hw1:
+; CHECK: mfc0 $27, $14, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: mfc0 $27, $12, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: ins $27, $zero, 8, 4
+; CHECK: ins $27, $zero, 1, 4
+; CHECK: ins $27, $zero, 29, 1
+; CHECK: mtc0 $27, $12, 0
+ ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $14, 0
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $12, 0
+; CHECK: eret
+ }
+
+
+define void @isr_hw2() #5 {
+; CHECK-LABEL: isr_hw2:
+; CHECK: mfc0 $27, $14, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: mfc0 $27, $12, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: ins $27, $zero, 8, 5
+; CHECK: ins $27, $zero, 1, 4
+; CHECK: ins $27, $zero, 29, 1
+; CHECK: mtc0 $27, $12, 0
+ ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $14, 0
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $12, 0
+; CHECK: eret
+ }
+
+define void @isr_hw3() #6 {
+; CHECK-LABEL: isr_hw3:
+; CHECK: mfc0 $27, $14, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: mfc0 $27, $12, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: ins $27, $zero, 8, 6
+; CHECK: ins $27, $zero, 1, 4
+; CHECK: ins $27, $zero, 29, 1
+; CHECK: mtc0 $27, $12, 0
+ ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $14, 0
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $12, 0
+; CHECK: eret
+ }
+
+define void @isr_hw4() #7 {
+; CHECK-LABEL: isr_hw4:
+; CHECK: mfc0 $27, $14, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: mfc0 $27, $12, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: ins $27, $zero, 8, 7
+; CHECK: ins $27, $zero, 1, 4
+; CHECK: ins $27, $zero, 29, 1
+; CHECK: mtc0 $27, $12, 0
+ ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $14, 0
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $12, 0
+; CHECK: eret
+ }
+
+define void @isr_hw5() #8 {
+; CHECK-LABEL: isr_hw5:
+; CHECK: mfc0 $27, $14, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: mfc0 $27, $12, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: ins $27, $zero, 8, 8
+; CHECK: ins $27, $zero, 1, 4
+; CHECK: ins $27, $zero, 29, 1
+; CHECK: mtc0 $27, $12, 0
+ ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $14, 0
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $12, 0
+; CHECK: eret
+ }
+
+define void @isr_eic() #9 {
+; CHECK-LABEL: isr_eic:
+; CHECK: mfc0 $26, $13, 0
+; CHECK: ext $26, $26, 10, 6
+; CHECK: mfc0 $27, $14, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: mfc0 $27, $12, 0
+; CHECK: sw $27, {{[0-9]+}}($sp)
+; CHECK: ins $27, $26, 10, 6
+; CHECK: ins $27, $zero, 1, 4
+; CHECK: ins $27, $zero, 29, 1
+; CHECK: mtc0 $27, $12, 0
+ ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $14, 0
+; CHECK: lw $27, {{[0-9]+}}($sp)
+; CHECK: mtc0 $27, $12, 0
+; CHECK: eret
+ }
+
+attributes #0 = { "interrupt"="sw0" }
+attributes #2 = { "interrupt"="sw1" }
+attributes #3 = { "interrupt"="hw0" }
+attributes #4 = { "interrupt"="hw1" }
+attributes #5 = { "interrupt"="hw2" }
+attributes #6 = { "interrupt"="hw3" }
+attributes #7 = { "interrupt"="hw4" }
+attributes #8 = { "interrupt"="hw5" }
+attributes #9 = { "interrupt"="eic" }
diff --git a/test/CodeGen/Mips/jtstat.ll b/test/CodeGen/Mips/jtstat.ll
index 35f71cf2dc85..5b578d4cffe4 100644
--- a/test/CodeGen/Mips/jtstat.ll
+++ b/test/CodeGen/Mips/jtstat.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
@s = global i8 115, align 1
@c = common global i8 0, align 1
diff --git a/test/CodeGen/Mips/l3mc.ll b/test/CodeGen/Mips/l3mc.ll
index c1bff11595c9..c6855fc03fec 100644
--- a/test/CodeGen/Mips/l3mc.ll
+++ b/test/CodeGen/Mips/l3mc.ll
@@ -1,22 +1,22 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunsdfsi
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunsdfsi
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdidf
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdidf
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdisf
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdisf
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundidf
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundidf
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixsfdi
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixsfdi
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunsdfdi
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunsdfdi
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixdfdi
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixdfdi
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfsi
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfsi
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfdi
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfdi
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundisf
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundisf
@ll1 = global i64 0, align 8
@ll2 = global i64 0, align 8
diff --git a/test/CodeGen/Mips/lb1.ll b/test/CodeGen/Mips/lb1.ll
index 21648d7572a5..1e908b81a878 100644
--- a/test/CodeGen/Mips/lb1.ll
+++ b/test/CodeGen/Mips/lb1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@c = global i8 -1, align 1
@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
diff --git a/test/CodeGen/Mips/lbu1.ll b/test/CodeGen/Mips/lbu1.ll
index 28ca27132467..32515411b7d4 100644
--- a/test/CodeGen/Mips/lbu1.ll
+++ b/test/CodeGen/Mips/lbu1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@c = global i8 97, align 1
@.str = private unnamed_addr constant [5 x i8] c"%c \0A\00", align 1
diff --git a/test/CodeGen/Mips/lcb2.ll b/test/CodeGen/Mips/lcb2.ll
index a6f4968e6d23..b15a5085f5a2 100644
--- a/test/CodeGen/Mips/lcb2.ll
+++ b/test/CodeGen/Mips/lcb2.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcb
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcb
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcbn
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcbn
@i = global i32 0, align 4
@j = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/lcb3c.ll b/test/CodeGen/Mips/lcb3c.ll
index 4c6f2c036a0b..b953229a15c9 100644
--- a/test/CodeGen/Mips/lcb3c.ll
+++ b/test/CodeGen/Mips/lcb3c.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -O0 < %s | FileCheck %s -check-prefix=lcb
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -O0 < %s | FileCheck %s -check-prefix=lcb
@i = global i32 0, align 4
@j = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/lcb4a.ll b/test/CodeGen/Mips/lcb4a.ll
index 9e97b5bf1433..4a99ef26efca 100644
--- a/test/CodeGen/Mips/lcb4a.ll
+++ b/test/CodeGen/Mips/lcb4a.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=ci
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=ci
@i = global i32 0, align 4
@j = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/lcb5.ll b/test/CodeGen/Mips/lcb5.ll
index 41878d5f8817..ec4c3da6515c 100644
--- a/test/CodeGen/Mips/lcb5.ll
+++ b/test/CodeGen/Mips/lcb5.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=ci
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=ci
@i = global i32 0, align 4
@j = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/lh1.ll b/test/CodeGen/Mips/lh1.ll
index 31967e5a5379..dcab12a38e17 100644
--- a/test/CodeGen/Mips/lh1.ll
+++ b/test/CodeGen/Mips/lh1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@s = global i16 -1, align 2
@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
diff --git a/test/CodeGen/Mips/lhu1.ll b/test/CodeGen/Mips/lhu1.ll
index 413da46d4a31..9a52d6fb269f 100644
--- a/test/CodeGen/Mips/lhu1.ll
+++ b/test/CodeGen/Mips/lhu1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@s = global i16 255, align 2
diff --git a/test/CodeGen/Mips/llcarry.ll b/test/CodeGen/Mips/llcarry.ll
index f4120ecec175..fcf129420234 100644
--- a/test/CodeGen/Mips/llcarry.ll
+++ b/test/CodeGen/Mips/llcarry.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i64 4294967295, align 8
@j = global i64 15, align 8
diff --git a/test/CodeGen/Mips/llvm-ir/atomicrmx.ll b/test/CodeGen/Mips/llvm-ir/atomicrmx.ll
new file mode 100644
index 000000000000..9069a6f2d13f
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/atomicrmx.ll
@@ -0,0 +1,26 @@
+; RUN: llc -asm-show-inst -march=mipsel -mcpu=mips32r6 < %s | \
+; RUN: FileCheck %s -check-prefix=CHK32
+; RUN: llc -asm-show-inst -march=mips64el -mcpu=mips64r6 < %s | \
+; RUN: FileCheck %s -check-prefix=CHK64
+
+@a = common global i32 0, align 4
+@b = common global i64 0, align 8
+
+
+define i32 @ll_sc(i32 signext %x) {
+; CHK32-LABEL: ll_sc
+
+;CHK32: LL_R6
+;CHK32: SC_R6
+ %1 = atomicrmw add i32* @a, i32 %x monotonic
+ ret i32 %1
+}
+
+define i64 @lld_scd(i64 signext %x) {
+; CHK64-LABEL: lld_scd
+
+;CHK64: LLD_R6
+;CHK64: SCD_R6
+ %1 = atomicrmw add i64* @b, i64 %x monotonic
+ ret i64 %1
+}
diff --git a/test/CodeGen/Mips/llvm-ir/call.ll b/test/CodeGen/Mips/llvm-ir/call.ll
index 112ab8ee8c7f..22a44da0b069 100644
--- a/test/CodeGen/Mips/llvm-ir/call.ll
+++ b/test/CodeGen/Mips/llvm-ir/call.ll
@@ -6,6 +6,7 @@
; RUN: llc -march=mips -mcpu=mips32r3 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
; RUN: llc -march=mips -mcpu=mips32r5 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
; RUN: llc -march=mips -mcpu=mips32r6 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+; RUN: llc -march=mips -mcpu=mips32r6 -mattr=+fp64,+nooddspreg -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
; RUN: llc -march=mips64 -mcpu=mips4 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
; RUN: llc -march=mips64 -mcpu=mips64 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
; RUN: llc -march=mips64 -mcpu=mips64r2 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
@@ -168,3 +169,16 @@ define float @tail_indirect_call_float_void(float ()* %addr) {
%1 = tail call float %addr()
ret float %1
}
+
+; Check that passing undef as a double value doesn't cause machine code errors
+; for FP64.
+declare hidden void @undef_double(i32 %this, double %volume) unnamed_addr align 2
+
+define hidden void @thunk_undef_double(i32 %this, double %volume) unnamed_addr align 2 {
+; ALL-LABEL: thunk_undef_double:
+; O32: # implicit-def: %A2
+; O32: # implicit-def: %A3
+; ALL: jr $25
+ tail call void @undef_double(i32 undef, double undef) #8
+ ret void
+}
diff --git a/test/CodeGen/Mips/llvm-ir/load-atomic.ll b/test/CodeGen/Mips/llvm-ir/load-atomic.ll
new file mode 100644
index 000000000000..a44b00bff586
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/load-atomic.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL
+; RUN: llc -march=mips -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL
+; RUN: llc -march=mips64 -mcpu=mips64r2 < %s | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=M64
+; RUN: llc -march=mips64 -mcpu=mips64r6 < %s | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=M64
+
+define i8 @load_i8(i8* %ptr) {
+; ALL-LABEL: load_i8
+
+; ALL: lb $2, 0($4)
+; ALL: sync
+ %val = load atomic i8, i8* %ptr acquire, align 1
+ ret i8 %val
+}
+
+define i16 @load_i16(i16* %ptr) {
+; ALL-LABEL: load_i16
+
+; ALL: lh $2, 0($4)
+; ALL: sync
+ %val = load atomic i16, i16* %ptr acquire, align 2
+ ret i16 %val
+}
+
+define i32 @load_i32(i32* %ptr) {
+; ALL-LABEL: load_i32
+
+; ALL: lw $2, 0($4)
+; ALL: sync
+ %val = load atomic i32, i32* %ptr acquire, align 4
+ ret i32 %val
+}
+
+define i64 @load_i64(i64* %ptr) {
+; M64-LABEL: load_i64
+
+; M64: ld $2, 0($4)
+; M64: sync
+ %val = load atomic i64, i64* %ptr acquire, align 8
+ ret i64 %val
+}
diff --git a/test/CodeGen/Mips/llvm-ir/sqrt.ll b/test/CodeGen/Mips/llvm-ir/sqrt.ll
new file mode 100644
index 000000000000..1a8892de0ee2
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/sqrt.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -mattr=+micromips | FileCheck %s
+; RUN: llc < %s -march=mips -mcpu=mips32r2 -mattr=+micromips | FileCheck %s
+; RUN: llc < %s -march=mips -mcpu=mips32r6 -mattr=+micromips | FileCheck %s
+
+define float @sqrt_fn(float %value) #0 {
+entry:
+ %sqrtf = tail call float @sqrtf(float %value) #0
+ ret float %sqrtf
+}
+
+declare float @sqrtf(float)
+
+; CHECK: sqrt.s $f0, $f12
diff --git a/test/CodeGen/Mips/llvm-ir/store-atomic.ll b/test/CodeGen/Mips/llvm-ir/store-atomic.ll
new file mode 100644
index 000000000000..6b33f2685d17
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/store-atomic.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL
+; RUN: llc -march=mips -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL
+; RUN: llc -march=mips64 -mcpu=mips64r2 < %s | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=M64
+; RUN: llc -march=mips64 -mcpu=mips64r6 < %s | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=M64
+
+define void @store_i8(i8* %ptr, i8 signext %v) {
+; ALL-LABEL: store_i8
+
+; ALL: sync
+; ALL: sb $5, 0($4)
+ store atomic i8 %v, i8* %ptr release, align 1
+ ret void
+}
+
+define void @store_i16(i16* %ptr, i16 signext %v) {
+; ALL-LABEL: store_i16
+
+; ALL: sync
+; ALL: sh $5, 0($4)
+ store atomic i16 %v, i16* %ptr release, align 2
+ ret void
+}
+
+define void @store_i32(i32* %ptr, i32 signext %v) {
+; ALL-LABEL: store_i32
+
+; ALL: sync
+; ALL: sw $5, 0($4)
+ store atomic i32 %v, i32* %ptr release, align 4
+ ret void
+}
+
+define void @store_i64(i64* %ptr, i64 %v) {
+; M64-LABEL: store_i64
+
+; M64: sync
+; M64: sd $5, 0($4)
+ store atomic i64 %v, i64* %ptr release, align 8
+ ret void
+}
diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll
index b0c3ff6ff9b5..b84d94d31494 100644
--- a/test/CodeGen/Mips/madd-msub.ll
+++ b/test/CodeGen/Mips/madd-msub.ll
@@ -7,7 +7,7 @@
; RUN: llc -march=mips -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64R6
; FIXME: The MIPS16 test should check its output
-; RUN: llc -march=mips -mcpu=mips16 < %s
+; RUN: llc -march=mips -mattr=mips16 < %s
; ALL-LABEL: madd1:
diff --git a/test/CodeGen/Mips/mbrsize4a.ll b/test/CodeGen/Mips/mbrsize4a.ll
index 264d2284afc9..9562e55dcc2a 100644
--- a/test/CodeGen/Mips/mbrsize4a.ll
+++ b/test/CodeGen/Mips/mbrsize4a.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s -check-prefix=jal16
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s -check-prefix=jal16
@j = global i32 10, align 4
@.str = private unnamed_addr constant [11 x i8] c"at bottom\0A\00", align 1
diff --git a/test/CodeGen/Mips/mips16-hf-attr-2.ll b/test/CodeGen/Mips/mips16-hf-attr-2.ll
index 60c6eaad8f76..82c0989c4254 100644
--- a/test/CodeGen/Mips/mips16-hf-attr-2.ll
+++ b/test/CodeGen/Mips/mips16-hf-attr-2.ll
@@ -1,7 +1,7 @@
; Check that stubs generation for mips16 hard-float mode does not depend
; on the function 'use-soft-float' attribute's value.
; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel \
-; RUN: -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s
+; RUN: -mattr=mips16 -relocation-model=pic < %s | FileCheck %s
define void @bar_sf() #1 {
; CHECK: bar_sf:
diff --git a/test/CodeGen/Mips/mips16-hf-attr.ll b/test/CodeGen/Mips/mips16-hf-attr.ll
index c6ad442fdea2..bcae1e92f7cc 100644
--- a/test/CodeGen/Mips/mips16-hf-attr.ll
+++ b/test/CodeGen/Mips/mips16-hf-attr.ll
@@ -1,7 +1,7 @@
; Check that stubs generation for mips16 hard-float mode does not depend
; on the function 'use-soft-float' attribute's value.
; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel \
-; RUN: -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s
+; RUN: -mattr=mips16 -relocation-model=pic < %s | FileCheck %s
define void @bar_hf() #0 {
; CHECK: bar_hf:
diff --git a/test/CodeGen/Mips/mips16_32_1.ll b/test/CodeGen/Mips/mips16_32_1.ll
index f6096b402f2d..211aa2a0f4b0 100644
--- a/test/CodeGen/Mips/mips16_32_1.ll
+++ b/test/CodeGen/Mips/mips16_32_1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s -mips-mixed-16-32 | FileCheck %s
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s -mips-mixed-16-32 | FileCheck %s
; RUN: llc -march=mipsel -mcpu=mips32 -relocation-model=pic -O3 < %s -mips-mixed-16-32 | FileCheck %s
define void @foo() #0 {
diff --git a/test/CodeGen/Mips/mips16_32_10.ll b/test/CodeGen/Mips/mips16_32_10.ll
index ff9831ed7622..b256912d7151 100644
--- a/test/CodeGen/Mips/mips16_32_10.ll
+++ b/test/CodeGen/Mips/mips16_32_10.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=16
define void @foo() #0 {
entry:
diff --git a/test/CodeGen/Mips/mips16_32_3.ll b/test/CodeGen/Mips/mips16_32_3.ll
index c5a29a0b8fdb..8891eba8c954 100644
--- a/test/CodeGen/Mips/mips16_32_3.ll
+++ b/test/CodeGen/Mips/mips16_32_3.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=16
; RUN: llc -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=32
define void @foo() #0 {
diff --git a/test/CodeGen/Mips/mips16_32_4.ll b/test/CodeGen/Mips/mips16_32_4.ll
index 1238363d907e..2d50881c61b4 100644
--- a/test/CodeGen/Mips/mips16_32_4.ll
+++ b/test/CodeGen/Mips/mips16_32_4.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=16
; RUN: llc -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=32
define void @foo() #0 {
diff --git a/test/CodeGen/Mips/mips16_32_5.ll b/test/CodeGen/Mips/mips16_32_5.ll
index 5d4c8a1af563..644ba4c98e34 100644
--- a/test/CodeGen/Mips/mips16_32_5.ll
+++ b/test/CodeGen/Mips/mips16_32_5.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=16
; RUN: llc -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=32
define void @foo() #0 {
diff --git a/test/CodeGen/Mips/mips16_32_6.ll b/test/CodeGen/Mips/mips16_32_6.ll
index 63323b608bc5..1afce84b0335 100644
--- a/test/CodeGen/Mips/mips16_32_6.ll
+++ b/test/CodeGen/Mips/mips16_32_6.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=16
; RUN: llc -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=32
define void @foo() #0 {
diff --git a/test/CodeGen/Mips/mips16_32_7.ll b/test/CodeGen/Mips/mips16_32_7.ll
index 480a23c8b25e..cc247c054543 100644
--- a/test/CodeGen/Mips/mips16_32_7.ll
+++ b/test/CodeGen/Mips/mips16_32_7.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=16
; RUN: llc -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32 | FileCheck %s -check-prefix=32
define void @foo() #0 {
diff --git a/test/CodeGen/Mips/mips16_fpret.ll b/test/CodeGen/Mips/mips16_fpret.ll
index 0f09c4105a17..651feba19808 100644
--- a/test/CodeGen/Mips/mips16_fpret.ll
+++ b/test/CodeGen/Mips/mips16_fpret.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=1
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=2
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=3
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=4
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=2
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=3
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=4
@x = global float 0x41F487E980000000, align 4
diff --git a/test/CodeGen/Mips/mips16ex.ll b/test/CodeGen/Mips/mips16ex.ll
index c3a02261119e..b2521ae872a8 100644
--- a/test/CodeGen/Mips/mips16ex.ll
+++ b/test/CodeGen/Mips/mips16ex.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
;16: main:
;16-NEXT: [[TMP:.*]]:
diff --git a/test/CodeGen/Mips/mips16fpe.ll b/test/CodeGen/Mips/mips16fpe.ll
index f8b916da3a49..16695e45265a 100644
--- a/test/CodeGen/Mips/mips16fpe.ll
+++ b/test/CodeGen/Mips/mips16fpe.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16hf
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16hf
@x = global float 5.000000e+00, align 4
@y = global float 1.500000e+01, align 4
@@ -297,7 +297,7 @@ entry:
%and2 = and i1 %lnot, %cmp1
%and = zext i1 %and2 to i32
store i32 %and, i32* @ltsf2_result, align 4
-;16hf: lw ${{[0-9]+}}, %call16(__mips16_unordsf2)(${{[0-9]+}})
+;16hf: lw ${{[0-9]+}}, %call16(__mips16_ltsf2)(${{[0-9]+}})
;16hf: lw ${{[0-9]+}}, %call16(__mips16_ltsf2)(${{[0-9]+}})
ret void
}
@@ -313,7 +313,7 @@ entry:
%and2 = and i1 %lnot, %cmp1
%and = zext i1 %and2 to i32
store i32 %and, i32* @ltdf2_result, align 4
-;16hf: lw ${{[0-9]+}}, %call16(__mips16_unorddf2)(${{[0-9]+}})
+;16hf: lw ${{[0-9]+}}, %call16(__mips16_ltdf2)(${{[0-9]+}})
;16hf: lw ${{[0-9]+}}, %call16(__mips16_ltdf2)(${{[0-9]+}})
ret void
}
diff --git a/test/CodeGen/Mips/misha.ll b/test/CodeGen/Mips/misha.ll
index 23ad7f6057af..bedea9de5f92 100644
--- a/test/CodeGen/Mips/misha.ll
+++ b/test/CodeGen/Mips/misha.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
define i32 @sumc(i8* nocapture %to, i8* nocapture %from, i32) nounwind {
entry:
diff --git a/test/CodeGen/Mips/msa/elm_copy.ll b/test/CodeGen/Mips/msa/elm_copy.ll
index 2a0d74f44524..251b535fd76c 100644
--- a/test/CodeGen/Mips/msa/elm_copy.ll
+++ b/test/CodeGen/Mips/msa/elm_copy.ll
@@ -170,7 +170,8 @@ declare i32 @llvm.mips.copy.u.w(<4 x i32>, i32) nounwind
; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_u_w_ARG1)
; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_u_w_ARG1)
; MIPS-ANY-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
-; MIPS-ANY-DAG: copy_u.w [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS32-DAG: copy_s.w [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS64-DAG: copy_u.w [[RD:\$[0-9]+]], [[WS]][1]
; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_u_w_RES)
; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_u_w_RES)
; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
@@ -196,7 +197,7 @@ declare i64 @llvm.mips.copy.u.d(<2 x i64>, i32) nounwind
; MIPS64-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
; MIPS32-DAG: copy_s.w [[RD1:\$[0-9]+]], [[WS]][2]
; MIPS32-DAG: copy_s.w [[RD2:\$[0-9]+]], [[WS]][3]
-; MIPS64-DAG: copy_u.d [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS64-DAG: copy_s.d [[RD:\$[0-9]+]], [[WS]][1]
; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_u_d_RES)
; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_u_d_RES)
; MIPS32-DAG: sw [[RD1]], 0([[RES]])
diff --git a/test/CodeGen/Mips/mul.ll b/test/CodeGen/Mips/mul.ll
index 3231f9cac38e..9e053fc2e7d6 100644
--- a/test/CodeGen/Mips/mul.ll
+++ b/test/CodeGen/Mips/mul.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@iiii = global i32 5, align 4
@jjjj = global i32 -6, align 4
diff --git a/test/CodeGen/Mips/mulll.ll b/test/CodeGen/Mips/mulll.ll
index 6e5ba647b8bf..9a2acd417381 100644
--- a/test/CodeGen/Mips/mulll.ll
+++ b/test/CodeGen/Mips/mulll.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@iiii = global i64 5, align 8
@jjjj = global i64 -6, align 8
diff --git a/test/CodeGen/Mips/mulull.ll b/test/CodeGen/Mips/mulull.ll
index c1334484fb66..fdcb68d036f8 100644
--- a/test/CodeGen/Mips/mulull.ll
+++ b/test/CodeGen/Mips/mulull.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@iiii = global i64 5, align 8
@jjjj = global i64 6, align 8
diff --git a/test/CodeGen/Mips/nacl-align.ll b/test/CodeGen/Mips/nacl-align.ll
index ec8f3f06afdf..8191c7dec6f2 100644
--- a/test/CodeGen/Mips/nacl-align.ll
+++ b/test/CodeGen/Mips/nacl-align.ll
@@ -44,18 +44,17 @@ default:
; CHECK-NEXT: ${{BB[0-9]+_[0-9]+}}:
; CHECK-NEXT: jr $ra
; CHECK-NEXT: addiu $2, $zero, 111
-; CHECK-NEXT: .align 4
; CHECK-NEXT: ${{BB[0-9]+_[0-9]+}}:
; CHECK-NEXT: jr $ra
-; CHECK-NEXT: addiu $2, $zero, 222
+; CHECK-NEXT: addiu $2, $zero, 555
; CHECK-NEXT: .align 4
; CHECK-NEXT: ${{BB[0-9]+_[0-9]+}}:
; CHECK-NEXT: jr $ra
-; CHECK-NEXT: addiu $2, $zero, 333
+; CHECK-NEXT: addiu $2, $zero, 222
; CHECK-NEXT: .align 4
; CHECK-NEXT: ${{BB[0-9]+_[0-9]+}}:
; CHECK-NEXT: jr $ra
-; CHECK-NEXT: addiu $2, $zero, 444
+; CHECK-NEXT: addiu $2, $zero, 333
}
diff --git a/test/CodeGen/Mips/neg1.ll b/test/CodeGen/Mips/neg1.ll
index 36275a2991f6..dd5d7a09eb28 100644
--- a/test/CodeGen/Mips/neg1.ll
+++ b/test/CodeGen/Mips/neg1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 10, align 4
@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
diff --git a/test/CodeGen/Mips/no-odd-spreg-msa.ll b/test/CodeGen/Mips/no-odd-spreg-msa.ll
index cf79557cc97f..7213044a2300 100644
--- a/test/CodeGen/Mips/no-odd-spreg-msa.ll
+++ b/test/CodeGen/Mips/no-odd-spreg-msa.ll
@@ -1,5 +1,9 @@
-; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,-nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=ODDSPREG
-; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,+nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOODDSPREG
+; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,-nooddspreg \
+; RUN: -no-integrated-as < %s | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=ODDSPREG
+; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,+nooddspreg \
+; RUN: -no-integrated-as < %s | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=NOODDSPREG
@v4f32 = global <4 x float> zeroinitializer
@@ -19,7 +23,7 @@ entry:
; On the other hand, if odd single precision registers are not permitted, it
; must copy $f13 to an even-numbered register before inserting into the
; vector.
- call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+ call void asm sideeffect "teqi $$zero, 1", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
%1 = insertelement <4 x float> %0, float %b, i32 0
store <4 x float> %1, <4 x float>* @v4f32
ret void
@@ -32,7 +36,7 @@ entry:
; NOODDSPREG: mov.s $f[[F0:[0-9]+]], $f13
; NOODDSPREG: insve.w $w[[W0]][0], $w[[F0]][0]
; ODDSPREG: insve.w $w[[W0]][0], $w13[0]
-; ALL: # Clobber
+; ALL: teqi $zero, 1
; ALL-NOT: sdc1
; ALL-NOT: ldc1
; ALL: st.w $w[[W0]], 0($[[R0]])
@@ -53,7 +57,7 @@ entry:
; On the other hand, if odd single precision registers are not permitted, it
; must copy $f13 to an even-numbered register before inserting into the
; vector.
- call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+ call void asm sideeffect "teqi $$zero, 1", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
%1 = insertelement <4 x float> %0, float %b, i32 1
store <4 x float> %1, <4 x float>* @v4f32
ret void
@@ -66,7 +70,7 @@ entry:
; NOODDSPREG: mov.s $f[[F0:[0-9]+]], $f13
; NOODDSPREG: insve.w $w[[W0]][1], $w[[F0]][0]
; ODDSPREG: insve.w $w[[W0]][1], $w13[0]
-; ALL: # Clobber
+; ALL: teqi $zero, 1
; ALL-NOT: sdc1
; ALL-NOT: ldc1
; ALL: st.w $w[[W0]], 0($[[R0]])
@@ -83,7 +87,7 @@ entry:
;
; On the other hand, if odd single precision registers are not permitted, it
; must move it to $f12/$w12.
- call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+ call void asm sideeffect "teqi $$zero, 1", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
%2 = extractelement <4 x float> %1, i32 0
ret float %2
@@ -94,7 +98,7 @@ entry:
; ALL: ld.w $w12, 0($[[R0]])
; ALL: move.v $w[[W0:13]], $w12
; NOODDSPREG: move.v $w[[W0:12]], $w13
-; ALL: # Clobber
+; ALL: teqi $zero, 1
; ALL-NOT: st.w
; ALL-NOT: ld.w
; ALL: mov.s $f0, $f[[W0]]
@@ -111,7 +115,7 @@ entry:
;
; On the other hand, if odd single precision registers are not permitted, it
; must be spilled.
- call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f12},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+ call void asm sideeffect "teqi $$zero, 1", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f12},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
%2 = extractelement <4 x float> %1, i32 1
ret float %2
@@ -124,7 +128,7 @@ entry:
; NOODDSPREG: st.w $w[[W0]], 0($sp)
; ODDSPREG-NOT: st.w
; ODDSPREG-NOT: ld.w
-; ALL: # Clobber
+; ALL: teqi $zero, 1
; ODDSPREG-NOT: st.w
; ODDSPREG-NOT: ld.w
; NOODDSPREG: ld.w $w0, 0($sp)
diff --git a/test/CodeGen/Mips/nomips16.ll b/test/CodeGen/Mips/nomips16.ll
index 418d8ead2c39..749272181884 100644
--- a/test/CodeGen/Mips/nomips16.ll
+++ b/test/CodeGen/Mips/nomips16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s
@x = global float 0.000000e+00, align 4
@.str = private unnamed_addr constant [20 x i8] c"in main: mips16 %f\0A\00", align 1
diff --git a/test/CodeGen/Mips/not1.ll b/test/CodeGen/Mips/not1.ll
index f5ec5b60e421..512480508272 100644
--- a/test/CodeGen/Mips/not1.ll
+++ b/test/CodeGen/Mips/not1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@x = global i32 65504, align 4
@y = global i32 60929, align 4
diff --git a/test/CodeGen/Mips/null.ll b/test/CodeGen/Mips/null.ll
index bc78a27f199c..67e346d959f9 100644
--- a/test/CodeGen/Mips/null.ll
+++ b/test/CodeGen/Mips/null.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 < %s | FileCheck %s -check-prefix=16
define i32 @main() nounwind {
diff --git a/test/CodeGen/Mips/or1.ll b/test/CodeGen/Mips/or1.ll
index 51b6ebfe8e3b..aabffd111c5e 100644
--- a/test/CodeGen/Mips/or1.ll
+++ b/test/CodeGen/Mips/or1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@x = global i32 65504, align 4
@y = global i32 60929, align 4
diff --git a/test/CodeGen/Mips/powif64_16.ll b/test/CodeGen/Mips/powif64_16.ll
index 33ec8c40c610..9be1a368b199 100644
--- a/test/CodeGen/Mips/powif64_16.ll
+++ b/test/CodeGen/Mips/powif64_16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s
declare float @llvm.powi.f32(float %Val, i32 %power)
declare double @llvm.powi.f64(double %Val, i32 %power)
diff --git a/test/CodeGen/Mips/rem.ll b/test/CodeGen/Mips/rem.ll
index 70f957ce15f6..ef16483f39d3 100644
--- a/test/CodeGen/Mips/rem.ll
+++ b/test/CodeGen/Mips/rem.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@iiii = global i32 103, align 4
@jjjj = global i32 -4, align 4
diff --git a/test/CodeGen/Mips/remu.ll b/test/CodeGen/Mips/remu.ll
index 12679727952f..dac4b05cd00a 100644
--- a/test/CodeGen/Mips/remu.ll
+++ b/test/CodeGen/Mips/remu.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@iiii = global i32 103, align 4
@jjjj = global i32 4, align 4
diff --git a/test/CodeGen/Mips/s2rem.ll b/test/CodeGen/Mips/s2rem.ll
index 65e48fe57c92..715abc072b4b 100644
--- a/test/CodeGen/Mips/s2rem.ll
+++ b/test/CodeGen/Mips/s2rem.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC
@xi = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/sb1.ll b/test/CodeGen/Mips/sb1.ll
index d2e8510024e5..4724a7f2cfd7 100644
--- a/test/CodeGen/Mips/sb1.ll
+++ b/test/CodeGen/Mips/sb1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 97, align 4
@c = common global i8 0, align 1
diff --git a/test/CodeGen/Mips/sel1c.ll b/test/CodeGen/Mips/sel1c.ll
index 6753af106e0f..b09be8d41157 100644
--- a/test/CodeGen/Mips/sel1c.ll
+++ b/test/CodeGen/Mips/sel1c.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
@i = global i32 1, align 4
@j = global i32 2, align 4
diff --git a/test/CodeGen/Mips/sel2c.ll b/test/CodeGen/Mips/sel2c.ll
index 987cccad5bf4..0650147be70c 100644
--- a/test/CodeGen/Mips/sel2c.ll
+++ b/test/CodeGen/Mips/sel2c.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
@i = global i32 1, align 4
@j = global i32 2, align 4
diff --git a/test/CodeGen/Mips/selTBteqzCmpi.ll b/test/CodeGen/Mips/selTBteqzCmpi.ll
index 5a72ea01073c..97eba29e99fb 100644
--- a/test/CodeGen/Mips/selTBteqzCmpi.ll
+++ b/test/CodeGen/Mips/selTBteqzCmpi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
@i = global i32 1, align 4
@j = global i32 2, align 4
diff --git a/test/CodeGen/Mips/selTBtnezCmpi.ll b/test/CodeGen/Mips/selTBtnezCmpi.ll
index b6407e67f27a..62af3dffb7b3 100644
--- a/test/CodeGen/Mips/selTBtnezCmpi.ll
+++ b/test/CodeGen/Mips/selTBtnezCmpi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
@i = global i32 1, align 4
@j = global i32 2, align 4
diff --git a/test/CodeGen/Mips/selTBtnezSlti.ll b/test/CodeGen/Mips/selTBtnezSlti.ll
index 2f1cdb866294..3851fdf093e4 100644
--- a/test/CodeGen/Mips/selTBtnezSlti.ll
+++ b/test/CodeGen/Mips/selTBtnezSlti.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
@i = global i32 1, align 4
@j = global i32 2, align 4
diff --git a/test/CodeGen/Mips/seleq.ll b/test/CodeGen/Mips/seleq.ll
index bd25358fb9e6..7d1e034d68c7 100644
--- a/test/CodeGen/Mips/seleq.ll
+++ b/test/CodeGen/Mips/seleq.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
@t = global i32 10, align 4
@f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/seleqk.ll b/test/CodeGen/Mips/seleqk.ll
index 2eeaa9e33738..a0bfe44eadd6 100644
--- a/test/CodeGen/Mips/seleqk.ll
+++ b/test/CodeGen/Mips/seleqk.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
@t = global i32 10, align 4
@f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selgek.ll b/test/CodeGen/Mips/selgek.ll
index 38ad95ee01a9..9d9df743db9b 100644
--- a/test/CodeGen/Mips/selgek.ll
+++ b/test/CodeGen/Mips/selgek.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
@t = global i32 10, align 4
@f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selgt.ll b/test/CodeGen/Mips/selgt.ll
index a2e1e39e742f..94f0f9b50af1 100644
--- a/test/CodeGen/Mips/selgt.ll
+++ b/test/CodeGen/Mips/selgt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
@t = global i32 10, align 4
@f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selle.ll b/test/CodeGen/Mips/selle.ll
index 1adefb7846e4..8925aac10c4d 100644
--- a/test/CodeGen/Mips/selle.ll
+++ b/test/CodeGen/Mips/selle.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
@t = global i32 10, align 4
@f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selltk.ll b/test/CodeGen/Mips/selltk.ll
index db9f8c171b78..106fe9b85d60 100644
--- a/test/CodeGen/Mips/selltk.ll
+++ b/test/CodeGen/Mips/selltk.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
@t = global i32 10, align 4
@f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selne.ll b/test/CodeGen/Mips/selne.ll
index 9be99d669475..270c0dadd864 100644
--- a/test/CodeGen/Mips/selne.ll
+++ b/test/CodeGen/Mips/selne.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
@t = global i32 10, align 4
@f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selnek.ll b/test/CodeGen/Mips/selnek.ll
index 5b6aa2afa1af..13ab693adb8b 100644
--- a/test/CodeGen/Mips/selnek.ll
+++ b/test/CodeGen/Mips/selnek.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
@t = global i32 10, align 4
@f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selpat.ll b/test/CodeGen/Mips/selpat.ll
index c682d8182a46..ff4bed327f45 100644
--- a/test/CodeGen/Mips/selpat.ll
+++ b/test/CodeGen/Mips/selpat.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@t = global i32 10, align 4
@f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/seteq.ll b/test/CodeGen/Mips/seteq.ll
index 8fad6122bdbe..76f9bb3ebf9d 100644
--- a/test/CodeGen/Mips/seteq.ll
+++ b/test/CodeGen/Mips/seteq.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 1, align 4
@j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/seteqz.ll b/test/CodeGen/Mips/seteqz.ll
index 8e9a4beac75b..368e85ce886e 100644
--- a/test/CodeGen/Mips/seteqz.ll
+++ b/test/CodeGen/Mips/seteqz.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 0, align 4
@j = global i32 99, align 4
diff --git a/test/CodeGen/Mips/setge.ll b/test/CodeGen/Mips/setge.ll
index 8fb729964cf5..af69d7b325b9 100644
--- a/test/CodeGen/Mips/setge.ll
+++ b/test/CodeGen/Mips/setge.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@j = global i32 -5, align 4
@k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setgek.ll b/test/CodeGen/Mips/setgek.ll
index 1148d1b67bda..d6eee1ff6e18 100644
--- a/test/CodeGen/Mips/setgek.ll
+++ b/test/CodeGen/Mips/setgek.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@k = global i32 10, align 4
@r1 = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/setle.ll b/test/CodeGen/Mips/setle.ll
index fe4a2c37eb54..f7d25054e013 100644
--- a/test/CodeGen/Mips/setle.ll
+++ b/test/CodeGen/Mips/setle.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@j = global i32 -5, align 4
@k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setlt.ll b/test/CodeGen/Mips/setlt.ll
index c4211e6dd696..040f8b17f219 100644
--- a/test/CodeGen/Mips/setlt.ll
+++ b/test/CodeGen/Mips/setlt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@j = global i32 -5, align 4
@k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setltk.ll b/test/CodeGen/Mips/setltk.ll
index 8c0041111270..79d25b1f130d 100644
--- a/test/CodeGen/Mips/setltk.ll
+++ b/test/CodeGen/Mips/setltk.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@j = global i32 -5, align 4
@k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setne.ll b/test/CodeGen/Mips/setne.ll
index 484674e5da32..02692bf9e632 100644
--- a/test/CodeGen/Mips/setne.ll
+++ b/test/CodeGen/Mips/setne.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 1, align 4
@j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setuge.ll b/test/CodeGen/Mips/setuge.ll
index 025b4dcefd76..6ae77b6cc1b4 100644
--- a/test/CodeGen/Mips/setuge.ll
+++ b/test/CodeGen/Mips/setuge.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@j = global i32 5, align 4
@k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setugt.ll b/test/CodeGen/Mips/setugt.ll
index 0ce317e0df9e..f8de59b754cf 100644
--- a/test/CodeGen/Mips/setugt.ll
+++ b/test/CodeGen/Mips/setugt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@j = global i32 5, align 4
@k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setule.ll b/test/CodeGen/Mips/setule.ll
index 4255fd27c5cd..8874d4d698b9 100644
--- a/test/CodeGen/Mips/setule.ll
+++ b/test/CodeGen/Mips/setule.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@j = global i32 5, align 4
@k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setult.ll b/test/CodeGen/Mips/setult.ll
index d30107e54dd0..29c7588a153d 100644
--- a/test/CodeGen/Mips/setult.ll
+++ b/test/CodeGen/Mips/setult.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@j = global i32 5, align 4
@k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setultk.ll b/test/CodeGen/Mips/setultk.ll
index 1b79f103bed7..c1ef0aa0b059 100644
--- a/test/CodeGen/Mips/setultk.ll
+++ b/test/CodeGen/Mips/setultk.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@j = global i32 5, align 4
@k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/sh1.ll b/test/CodeGen/Mips/sh1.ll
index 3f70b9bc6e68..ccba32a4cca9 100644
--- a/test/CodeGen/Mips/sh1.ll
+++ b/test/CodeGen/Mips/sh1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 97, align 4
@s = common global i16 0, align 2
diff --git a/test/CodeGen/Mips/simplebr.ll b/test/CodeGen/Mips/simplebr.ll
index 2aeacc903fbe..96dfce915e90 100644
--- a/test/CodeGen/Mips/simplebr.ll
+++ b/test/CodeGen/Mips/simplebr.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -mattr=+soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mips16-hard-float -mattr=+soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
; ModuleID = 'simplebr.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/sitofp-selectcc-opt.ll b/test/CodeGen/Mips/sitofp-selectcc-opt.ll
index c60fceb1a04c..751fba46d72f 100644
--- a/test/CodeGen/Mips/sitofp-selectcc-opt.ll
+++ b/test/CodeGen/Mips/sitofp-selectcc-opt.ll
@@ -7,7 +7,7 @@ entry:
; check that this transformation doesn't happen:
; (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
;
-; CHECK-NOT: # double -1.000000e+00
+; CHECK-NOT: # double -1
%tobool1 = icmp ne i32 %a, 0
%not.tobool = icmp ne i64 %b, 0
@@ -19,4 +19,3 @@ entry:
store double %add, double* @foo12.d4, align 8
ret double %add
}
-
diff --git a/test/CodeGen/Mips/sll1.ll b/test/CodeGen/Mips/sll1.ll
index 4d35b64e0b58..93b814f944c5 100644
--- a/test/CodeGen/Mips/sll1.ll
+++ b/test/CodeGen/Mips/sll1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 10, align 4
@j = global i32 0, align 4
diff --git a/test/CodeGen/Mips/sll2.ll b/test/CodeGen/Mips/sll2.ll
index dc2236b10ccf..f30108d14df8 100644
--- a/test/CodeGen/Mips/sll2.ll
+++ b/test/CodeGen/Mips/sll2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 10, align 4
@j = global i32 4, align 4
diff --git a/test/CodeGen/Mips/sr1.ll b/test/CodeGen/Mips/sr1.ll
index 69655f7b842c..b3fdef0a17b4 100644
--- a/test/CodeGen/Mips/sr1.ll
+++ b/test/CodeGen/Mips/sr1.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=NEG
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=NEG
@f = common global float 0.000000e+00, align 4
diff --git a/test/CodeGen/Mips/sra1.ll b/test/CodeGen/Mips/sra1.ll
index 1c7d417cb13a..51282bd8033d 100644
--- a/test/CodeGen/Mips/sra1.ll
+++ b/test/CodeGen/Mips/sra1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 -354, align 4
@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
diff --git a/test/CodeGen/Mips/sra2.ll b/test/CodeGen/Mips/sra2.ll
index 771d0f4a79e3..0a2bff9e4080 100644
--- a/test/CodeGen/Mips/sra2.ll
+++ b/test/CodeGen/Mips/sra2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 -354, align 4
@j = global i32 3, align 4
diff --git a/test/CodeGen/Mips/srl1.ll b/test/CodeGen/Mips/srl1.ll
index a748eabb066f..8e97734bb313 100644
--- a/test/CodeGen/Mips/srl1.ll
+++ b/test/CodeGen/Mips/srl1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 10654, align 4
@j = global i32 0, align 4
diff --git a/test/CodeGen/Mips/srl2.ll b/test/CodeGen/Mips/srl2.ll
index 6e338b39350f..4ccdefdf1413 100644
--- a/test/CodeGen/Mips/srl2.ll
+++ b/test/CodeGen/Mips/srl2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 10654, align 4
@j = global i32 0, align 4
diff --git a/test/CodeGen/Mips/stchar.ll b/test/CodeGen/Mips/stchar.ll
index 6bc4889931a7..4b3c8fb79315 100644
--- a/test/CodeGen/Mips/stchar.ll
+++ b/test/CodeGen/Mips/stchar.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16_h
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16_b
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16_h
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16_b
@.str = private unnamed_addr constant [9 x i8] c"%hd %c \0A\00", align 1
@sp = common global i16* null, align 4
diff --git a/test/CodeGen/Mips/stldst.ll b/test/CodeGen/Mips/stldst.ll
index 4eef5ece0589..8aecca4aed67 100644
--- a/test/CodeGen/Mips/stldst.ll
+++ b/test/CodeGen/Mips/stldst.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@kkkk = global i32 67, align 4
@llll = global i32 33, align 4
diff --git a/test/CodeGen/Mips/sub1.ll b/test/CodeGen/Mips/sub1.ll
index 636ab8f2c5f3..a5e698840264 100644
--- a/test/CodeGen/Mips/sub1.ll
+++ b/test/CodeGen/Mips/sub1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 10, align 4
@.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
diff --git a/test/CodeGen/Mips/sub2.ll b/test/CodeGen/Mips/sub2.ll
index a97f5e947ca9..d10cddb9e6b9 100644
--- a/test/CodeGen/Mips/sub2.ll
+++ b/test/CodeGen/Mips/sub2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@i = global i32 10, align 4
@j = global i32 20, align 4
diff --git a/test/CodeGen/Mips/tail16.ll b/test/CodeGen/Mips/tail16.ll
index 13f27fcc513b..75a2a827f258 100644
--- a/test/CodeGen/Mips/tail16.ll
+++ b/test/CodeGen/Mips/tail16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s
; Function Attrs: nounwind optsize
define float @h() {
diff --git a/test/CodeGen/Mips/tailcall.ll b/test/CodeGen/Mips/tailcall.ll
index 6a0d64b7eed8..b0868255053a 100644
--- a/test/CodeGen/Mips/tailcall.ll
+++ b/test/CodeGen/Mips/tailcall.ll
@@ -4,7 +4,7 @@
; RUN: -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=STATIC32
; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+n64 -enable-mips-tail-calls \
; RUN: < %s | FileCheck %s -check-prefix=N64
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic \
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic \
; RUN: -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=PIC16
@g0 = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/tls-alias.ll b/test/CodeGen/Mips/tls-alias.ll
index b61f84e03761..5de23103c997 100644
--- a/test/CodeGen/Mips/tls-alias.ll
+++ b/test/CodeGen/Mips/tls-alias.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=mipsel -relocation-model=pic -disable-mips-delay-filler < %s | FileCheck %s
@foo = thread_local global i32 42
-@bar = hidden thread_local alias i32* @foo
+@bar = hidden thread_local alias i32, i32* @foo
define i32* @zed() {
; CHECK-DAG: __tls_get_addr
diff --git a/test/CodeGen/Mips/tls16.ll b/test/CodeGen/Mips/tls16.ll
index 3d324d7ed1e8..349e381af2b7 100644
--- a/test/CodeGen/Mips/tls16.ll
+++ b/test/CodeGen/Mips/tls16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PIC16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PIC16
@a = thread_local global i32 4, align 4
diff --git a/test/CodeGen/Mips/tls16_2.ll b/test/CodeGen/Mips/tls16_2.ll
index 0a6a4123e116..b232c8534c55 100644
--- a/test/CodeGen/Mips/tls16_2.ll
+++ b/test/CodeGen/Mips/tls16_2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PIC16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PIC16
@f.i = internal thread_local unnamed_addr global i32 1, align 4
diff --git a/test/CodeGen/Mips/trap1.ll b/test/CodeGen/Mips/trap1.ll
index 90755130e7c2..575574a0a3b1 100644
--- a/test/CodeGen/Mips/trap1.ll
+++ b/test/CodeGen/Mips/trap1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
declare void @llvm.trap()
diff --git a/test/CodeGen/Mips/ul1.ll b/test/CodeGen/Mips/ul1.ll
index ad0992954631..eb5187a8533a 100644
--- a/test/CodeGen/Mips/ul1.ll
+++ b/test/CodeGen/Mips/ul1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
%struct.ua = type <{ i16, i32 }>
@foo = common global %struct.ua zeroinitializer, align 1
diff --git a/test/CodeGen/Mips/xor1.ll b/test/CodeGen/Mips/xor1.ll
index dd51f143bb6c..b203271a042b 100644
--- a/test/CodeGen/Mips/xor1.ll
+++ b/test/CodeGen/Mips/xor1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
@x = global i32 65504, align 4
@y = global i32 60929, align 4
diff --git a/test/CodeGen/NVPTX/branch-fold.ll b/test/CodeGen/NVPTX/branch-fold.ll
new file mode 100644
index 000000000000..2b9cd0a35d92
--- /dev/null
+++ b/test/CodeGen/NVPTX/branch-fold.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp | FileCheck %s
+; Disable CGP which also folds branches, so that only BranchFolding is under
+; the spotlight.
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+define void @foo(i32 %x, float* %output) {
+; CHECK-LABEL: .visible .func foo(
+; CHECK-NOT: bra.uni
+; CHECK-NOT: LBB0_
+ %1 = icmp eq i32 %x, 1
+ br i1 %1, label %then, label %else
+
+then:
+ br label %merge
+
+else:
+ br label %merge
+
+merge:
+ store float 2.0, float* %output
+ ret void
+}
+
+; PR24299. no crash
+define ptx_kernel void @hoge() #0 {
+; CHECK-LABEL: .visible .entry hoge(
+bb:
+ br i1 undef, label %bb1, label %bb4
+
+bb1: ; preds = %bb1, %bb
+ %tmp = phi i64 [ %tmp2, %bb1 ], [ undef, %bb ]
+ %tmp2 = add nsw i64 %tmp, 1
+ %tmp3 = icmp sle i64 %tmp, 0
+ br i1 %tmp3, label %bb1, label %bb4
+
+bb4: ; preds = %bb4, %bb1, %bb
+ br label %bb4
+}
diff --git a/test/CodeGen/NVPTX/bypass-div.ll b/test/CodeGen/NVPTX/bypass-div.ll
new file mode 100644
index 000000000000..bd98c9a5b0b1
--- /dev/null
+++ b/test/CodeGen/NVPTX/bypass-div.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
+
+; 64-bit divides and rems should be split into a fast and slow path where
+; the fast path uses a 32-bit operation.
+
+define void @sdiv64(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: sdiv64(
+; CHECK: div.s64
+; CHECK: div.u32
+; CHECK: ret
+ %d = sdiv i64 %a, %b
+ store i64 %d, i64* %retptr
+ ret void
+}
+
+define void @udiv64(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: udiv64(
+; CHECK: div.u64
+; CHECK: div.u32
+; CHECK: ret
+ %d = udiv i64 %a, %b
+ store i64 %d, i64* %retptr
+ ret void
+}
+
+define void @srem64(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: srem64(
+; CHECK: rem.s64
+; CHECK: rem.u32
+; CHECK: ret
+ %d = srem i64 %a, %b
+ store i64 %d, i64* %retptr
+ ret void
+}
+
+define void @urem64(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: urem64(
+; CHECK: rem.u64
+; CHECK: rem.u32
+; CHECK: ret
+ %d = urem i64 %a, %b
+ store i64 %d, i64* %retptr
+ ret void
+}
+
+define void @sdiv32(i32 %a, i32 %b, i32* %retptr) {
+; CHECK-LABEL: sdiv32(
+; CHECK: div.s32
+; CHECK-NOT: div.
+ %d = sdiv i32 %a, %b
+ store i32 %d, i32* %retptr
+ ret void
+}
+
+define void @udiv32(i32 %a, i32 %b, i32* %retptr) {
+; CHECK-LABEL: udiv32(
+; CHECK: div.u32
+; CHECK-NOT: div.
+ %d = udiv i32 %a, %b
+ store i32 %d, i32* %retptr
+ ret void
+}
+
+define void @srem32(i32 %a, i32 %b, i32* %retptr) {
+; CHECK-LABEL: srem32(
+; CHECK: rem.s32
+; CHECK-NOT: rem.
+ %d = srem i32 %a, %b
+ store i32 %d, i32* %retptr
+ ret void
+}
+
+define void @urem32(i32 %a, i32 %b, i32* %retptr) {
+; CHECK-LABEL: urem32(
+; CHECK: rem.u32
+; CHECK-NOT: rem.
+ %d = urem i32 %a, %b
+ store i32 %d, i32* %retptr
+ ret void
+}
diff --git a/test/CodeGen/NVPTX/combine-min-max.ll b/test/CodeGen/NVPTX/combine-min-max.ll
new file mode 100644
index 000000000000..64bb7a37ffd2
--- /dev/null
+++ b/test/CodeGen/NVPTX/combine-min-max.ll
@@ -0,0 +1,307 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O2 | FileCheck %s
+
+; *************************************
+; * Cases with no min/max
+
+define i32 @ab_eq_i32(i32 %a, i32 %b) {
+; LABEL: @ab_slt_i32
+; CHECK-NOT: min
+; CHECK-NOT: max
+ %cmp = icmp eq i32 %a, %b
+ %sel = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+define i64 @ba_ne_i64(i64 %a, i64 %b) {
+; LABEL: @ab_ne_i64
+; CHECK-NOT: min
+; CHECK-NOT: max
+ %cmp = icmp ne i64 %a, %b
+ %sel = select i1 %cmp, i64 %b, i64 %a
+ ret i64 %sel
+}
+
+; PTX does have e.g. max.s16, but at least as of Kepler (sm_3x) that
+; gets compiled to SASS that converts the 16 bit parameters to 32 bit
+; before using a 32 bit instruction. That is probably not a win and
+; NVCC 7.5 does not emit 16 bit min/max either, presumably for that
+; reason.
+define i16 @ab_ugt_i16(i16 %a, i16 %b) {
+; LABEL: @ab_ugt_i16
+; CHECK-NOT: min
+; CHECK-NOT: max
+ %cmp = icmp ugt i16 %a, %b
+ %sel = select i1 %cmp, i16 %a, i16 %b
+ ret i16 %sel
+}
+
+
+; *************************************
+; * All variations with i32
+
+; *** ab, unsigned, i32
+define i32 @ab_ugt_i32(i32 %a, i32 %b) {
+; LABEL: @ab_ugt_i32
+; CHECK: max.u32
+ %cmp = icmp ugt i32 %a, %b
+ %sel = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+define i32 @ab_uge_i32(i32 %a, i32 %b) {
+; LABEL: @ab_uge_i32
+; CHECK: max.u32
+ %cmp = icmp uge i32 %a, %b
+ %sel = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+define i32 @ab_ult_i32(i32 %a, i32 %b) {
+; LABEL: @ab_ult_i32
+; CHECK: min.u32
+ %cmp = icmp ult i32 %a, %b
+ %sel = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+define i32 @ab_ule_i32(i32 %a, i32 %b) {
+; LABEL: @ab_ule_i32
+; CHECK: min.u32
+ %cmp = icmp ule i32 %a, %b
+ %sel = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; *** ab, signed, i32
+define i32 @ab_sgt_i32(i32 %a, i32 %b) {
+; LABEL: @ab_ugt_i32
+; CHECK: max.s32
+ %cmp = icmp sgt i32 %a, %b
+ %sel = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+define i32 @ab_sge_i32(i32 %a, i32 %b) {
+; LABEL: @ab_sge_i32
+; CHECK: max.s32
+ %cmp = icmp sge i32 %a, %b
+ %sel = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+define i32 @ab_slt_i32(i32 %a, i32 %b) {
+; LABEL: @ab_slt_i32
+; CHECK: min.s32
+ %cmp = icmp slt i32 %a, %b
+ %sel = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+define i32 @ab_sle_i32(i32 %a, i32 %b) {
+; LABEL: @ab_sle_i32
+; CHECK: min.s32
+ %cmp = icmp sle i32 %a, %b
+ %sel = select i1 %cmp, i32 %a, i32 %b
+ ret i32 %sel
+}
+
+; *** ba, unsigned, i32
+define i32 @ba_ugt_i32(i32 %a, i32 %b) {
+; LABEL: @ba_ugt_i32
+; CHECK: min.u32
+ %cmp = icmp ugt i32 %a, %b
+ %sel = select i1 %cmp, i32 %b, i32 %a
+ ret i32 %sel
+}
+
+define i32 @ba_uge_i32(i32 %a, i32 %b) {
+; LABEL: @ba_uge_i32
+; CHECK: min.u32
+ %cmp = icmp uge i32 %a, %b
+ %sel = select i1 %cmp, i32 %b, i32 %a
+ ret i32 %sel
+}
+
+define i32 @ba_ult_i32(i32 %a, i32 %b) {
+; LABEL: @ba_ult_i32
+; CHECK: max.u32
+ %cmp = icmp ult i32 %a, %b
+ %sel = select i1 %cmp, i32 %b, i32 %a
+ ret i32 %sel
+}
+
+define i32 @ba_ule_i32(i32 %a, i32 %b) {
+; LABEL: @ba_ule_i32
+; CHECK: max.u32
+ %cmp = icmp ule i32 %a, %b
+ %sel = select i1 %cmp, i32 %b, i32 %a
+ ret i32 %sel
+}
+
+; *** ba, signed, i32
+define i32 @ba_sgt_i32(i32 %a, i32 %b) {
+; LBAEL: @ba_ugt_i32
+; CHECK: min.s32
+ %cmp = icmp sgt i32 %a, %b
+ %sel = select i1 %cmp, i32 %b, i32 %a
+ ret i32 %sel
+}
+
+define i32 @ba_sge_i32(i32 %a, i32 %b) {
+; LABEL: @ba_sge_i32
+; CHECK: min.s32
+ %cmp = icmp sge i32 %a, %b
+ %sel = select i1 %cmp, i32 %b, i32 %a
+ ret i32 %sel
+}
+
+define i32 @ba_slt_i32(i32 %a, i32 %b) {
+; LABEL: @ba_slt_i32
+; CHECK: max.s32
+ %cmp = icmp slt i32 %a, %b
+ %sel = select i1 %cmp, i32 %b, i32 %a
+ ret i32 %sel
+}
+
+define i32 @ba_sle_i32(i32 %a, i32 %b) {
+; LABEL: @ba_sle_i32
+; CHECK: max.s32
+ %cmp = icmp sle i32 %a, %b
+ %sel = select i1 %cmp, i32 %b, i32 %a
+ ret i32 %sel
+}
+
+; *************************************
+; * All variations with i64
+
+; *** ab, unsigned, i64
+define i64 @ab_ugt_i64(i64 %a, i64 %b) {
+; LABEL: @ab_ugt_i64
+; CHECK: max.u64
+ %cmp = icmp ugt i64 %a, %b
+ %sel = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %sel
+}
+
+define i64 @ab_uge_i64(i64 %a, i64 %b) {
+; LABEL: @ab_uge_i64
+; CHECK: max.u64
+ %cmp = icmp uge i64 %a, %b
+ %sel = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %sel
+}
+
+define i64 @ab_ult_i64(i64 %a, i64 %b) {
+; LABEL: @ab_ult_i64
+; CHECK: min.u64
+ %cmp = icmp ult i64 %a, %b
+ %sel = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %sel
+}
+
+define i64 @ab_ule_i64(i64 %a, i64 %b) {
+; LABEL: @ab_ule_i64
+; CHECK: min.u64
+ %cmp = icmp ule i64 %a, %b
+ %sel = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %sel
+}
+
+; *** ab, signed, i64
+define i64 @ab_sgt_i64(i64 %a, i64 %b) {
+; LABEL: @ab_ugt_i64
+; CHECK: max.s64
+ %cmp = icmp sgt i64 %a, %b
+ %sel = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %sel
+}
+
+define i64 @ab_sge_i64(i64 %a, i64 %b) {
+; LABEL: @ab_sge_i64
+; CHECK: max.s64
+ %cmp = icmp sge i64 %a, %b
+ %sel = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %sel
+}
+
+define i64 @ab_slt_i64(i64 %a, i64 %b) {
+; LABEL: @ab_slt_i64
+; CHECK: min.s64
+ %cmp = icmp slt i64 %a, %b
+ %sel = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %sel
+}
+
+define i64 @ab_sle_i64(i64 %a, i64 %b) {
+; LABEL: @ab_sle_i64
+; CHECK: min.s64
+ %cmp = icmp sle i64 %a, %b
+ %sel = select i1 %cmp, i64 %a, i64 %b
+ ret i64 %sel
+}
+
+; *** ba, unsigned, i64
+define i64 @ba_ugt_i64(i64 %a, i64 %b) {
+; LABEL: @ba_ugt_i64
+; CHECK: min.u64
+ %cmp = icmp ugt i64 %a, %b
+ %sel = select i1 %cmp, i64 %b, i64 %a
+ ret i64 %sel
+}
+
+define i64 @ba_uge_i64(i64 %a, i64 %b) {
+; LABEL: @ba_uge_i64
+; CHECK: min.u64
+ %cmp = icmp uge i64 %a, %b
+ %sel = select i1 %cmp, i64 %b, i64 %a
+ ret i64 %sel
+}
+
+define i64 @ba_ult_i64(i64 %a, i64 %b) {
+; LABEL: @ba_ult_i64
+; CHECK: max.u64
+ %cmp = icmp ult i64 %a, %b
+ %sel = select i1 %cmp, i64 %b, i64 %a
+ ret i64 %sel
+}
+
+define i64 @ba_ule_i64(i64 %a, i64 %b) {
+; LABEL: @ba_ule_i64
+; CHECK: max.u64
+ %cmp = icmp ule i64 %a, %b
+ %sel = select i1 %cmp, i64 %b, i64 %a
+ ret i64 %sel
+}
+
+; *** ba, signed, i64
+define i64 @ba_sgt_i64(i64 %a, i64 %b) {
+; LBAEL: @ba_ugt_i64
+; CHECK: min.s64
+ %cmp = icmp sgt i64 %a, %b
+ %sel = select i1 %cmp, i64 %b, i64 %a
+ ret i64 %sel
+}
+
+define i64 @ba_sge_i64(i64 %a, i64 %b) {
+; LABEL: @ba_sge_i64
+; CHECK: min.s64
+ %cmp = icmp sge i64 %a, %b
+ %sel = select i1 %cmp, i64 %b, i64 %a
+ ret i64 %sel
+}
+
+define i64 @ba_slt_i64(i64 %a, i64 %b) {
+; LABEL: @ba_slt_i64
+; CHECK: max.s64
+ %cmp = icmp slt i64 %a, %b
+ %sel = select i1 %cmp, i64 %b, i64 %a
+ ret i64 %sel
+}
+
+define i64 @ba_sle_i64(i64 %a, i64 %b) {
+; LABEL: @ba_sle_i64
+; CHECK: max.s64
+ %cmp = icmp sle i64 %a, %b
+ %sel = select i1 %cmp, i64 %b, i64 %a
+ ret i64 %sel
+}
diff --git a/test/CodeGen/NVPTX/fma-assoc.ll b/test/CodeGen/NVPTX/fma-assoc.ll
index fc04c61dd691..80a08a86316c 100644
--- a/test/CodeGen/NVPTX/fma-assoc.ll
+++ b/test/CodeGen/NVPTX/fma-assoc.ll
@@ -23,3 +23,16 @@ define ptx_device double @t1_f64(double %x, double %y, double %z,
%d = fadd double %c, %z
ret double %d
}
+
+define double @two_choices(double %val1, double %val2) {
+; CHECK-LABEL: two_choices(
+; CHECK: mul.f64
+; CHECK-NOT: mul.f64
+; CHECK: fma.rn.f64
+ %1 = fmul double %val1, %val2
+ %2 = fmul double %1, %1
+ %3 = fadd double %1, %2
+
+ ret double %3
+}
+
diff --git a/test/CodeGen/NVPTX/global-addrspace.ll b/test/CodeGen/NVPTX/global-addrspace.ll
new file mode 100644
index 000000000000..4da14c7ff4fe
--- /dev/null
+++ b/test/CodeGen/NVPTX/global-addrspace.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+; PTX32: .visible .global .align 4 .u32 i;
+; PTX32: .visible .const .align 4 .u32 j;
+; PTX32: .visible .shared .align 4 .u32 k;
+; PTX64: .visible .global .align 4 .u32 i;
+; PTX64: .visible .const .align 4 .u32 j;
+; PTX64: .visible .shared .align 4 .u32 k;
+@i = addrspace(1) externally_initialized global i32 0, align 4
+@j = addrspace(4) externally_initialized global i32 0, align 4
+@k = addrspace(3) global i32 undef, align 4
diff --git a/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll b/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
new file mode 100644
index 000000000000..d93499b47f59
--- /dev/null
+++ b/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
@@ -0,0 +1,264 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefix=SM20 %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck -check-prefix=SM35 %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+; SM20-LABEL: .visible .entry foo1(
+; SM20: ld.global.f32
+; SM35-LABEL: .visible .entry foo1(
+; SM35: ld.global.nc.f32
+define void @foo1(float * noalias readonly %from, float * %to) {
+ %1 = load float, float * %from
+ store float %1, float * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo2(
+; SM20: ld.global.f64
+; SM35-LABEL: .visible .entry foo2(
+; SM35: ld.global.nc.f64
+define void @foo2(double * noalias readonly %from, double * %to) {
+ %1 = load double, double * %from
+ store double %1, double * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo3(
+; SM20: ld.global.u16
+; SM35-LABEL: .visible .entry foo3(
+; SM35: ld.global.nc.u16
+define void @foo3(i16 * noalias readonly %from, i16 * %to) {
+ %1 = load i16, i16 * %from
+ store i16 %1, i16 * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo4(
+; SM20: ld.global.u32
+; SM35-LABEL: .visible .entry foo4(
+; SM35: ld.global.nc.u32
+define void @foo4(i32 * noalias readonly %from, i32 * %to) {
+ %1 = load i32, i32 * %from
+ store i32 %1, i32 * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo5(
+; SM20: ld.global.u64
+; SM35-LABEL: .visible .entry foo5(
+; SM35: ld.global.nc.u64
+define void @foo5(i64 * noalias readonly %from, i64 * %to) {
+ %1 = load i64, i64 * %from
+ store i64 %1, i64 * %to
+ ret void
+}
+
+; i128 is non standard integer in nvptx64
+; SM20-LABEL: .visible .entry foo6(
+; SM20: ld.global.u64
+; SM20: ld.global.u64
+; SM35-LABEL: .visible .entry foo6(
+; SM35: ld.global.nc.u64
+; SM35: ld.global.nc.u64
+define void @foo6(i128 * noalias readonly %from, i128 * %to) {
+ %1 = load i128, i128 * %from
+ store i128 %1, i128 * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo7(
+; SM20: ld.global.v2.u8
+; SM35-LABEL: .visible .entry foo7(
+; SM35: ld.global.nc.v2.u8
+define void @foo7(<2 x i8> * noalias readonly %from, <2 x i8> * %to) {
+ %1 = load <2 x i8>, <2 x i8> * %from
+ store <2 x i8> %1, <2 x i8> * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo8(
+; SM20: ld.global.v2.u16
+; SM35-LABEL: .visible .entry foo8(
+; SM35: ld.global.nc.v2.u16
+define void @foo8(<2 x i16> * noalias readonly %from, <2 x i16> * %to) {
+ %1 = load <2 x i16>, <2 x i16> * %from
+ store <2 x i16> %1, <2 x i16> * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo9(
+; SM20: ld.global.v2.u32
+; SM35-LABEL: .visible .entry foo9(
+; SM35: ld.global.nc.v2.u32
+define void @foo9(<2 x i32> * noalias readonly %from, <2 x i32> * %to) {
+ %1 = load <2 x i32>, <2 x i32> * %from
+ store <2 x i32> %1, <2 x i32> * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo10(
+; SM20: ld.global.v2.u64
+; SM35-LABEL: .visible .entry foo10(
+; SM35: ld.global.nc.v2.u64
+define void @foo10(<2 x i64> * noalias readonly %from, <2 x i64> * %to) {
+ %1 = load <2 x i64>, <2 x i64> * %from
+ store <2 x i64> %1, <2 x i64> * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo11(
+; SM20: ld.global.v2.f32
+; SM35-LABEL: .visible .entry foo11(
+; SM35: ld.global.nc.v2.f32
+define void @foo11(<2 x float> * noalias readonly %from, <2 x float> * %to) {
+ %1 = load <2 x float>, <2 x float> * %from
+ store <2 x float> %1, <2 x float> * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo12(
+; SM20: ld.global.v2.f64
+; SM35-LABEL: .visible .entry foo12(
+; SM35: ld.global.nc.v2.f64
+define void @foo12(<2 x double> * noalias readonly %from, <2 x double> * %to) {
+ %1 = load <2 x double>, <2 x double> * %from
+ store <2 x double> %1, <2 x double> * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo13(
+; SM20: ld.global.v4.u8
+; SM35-LABEL: .visible .entry foo13(
+; SM35: ld.global.nc.v4.u8
+define void @foo13(<4 x i8> * noalias readonly %from, <4 x i8> * %to) {
+ %1 = load <4 x i8>, <4 x i8> * %from
+ store <4 x i8> %1, <4 x i8> * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo14(
+; SM20: ld.global.v4.u16
+; SM35-LABEL: .visible .entry foo14(
+; SM35: ld.global.nc.v4.u16
+define void @foo14(<4 x i16> * noalias readonly %from, <4 x i16> * %to) {
+ %1 = load <4 x i16>, <4 x i16> * %from
+ store <4 x i16> %1, <4 x i16> * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo15(
+; SM20: ld.global.v4.u32
+; SM35-LABEL: .visible .entry foo15(
+; SM35: ld.global.nc.v4.u32
+define void @foo15(<4 x i32> * noalias readonly %from, <4 x i32> * %to) {
+ %1 = load <4 x i32>, <4 x i32> * %from
+ store <4 x i32> %1, <4 x i32> * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo16(
+; SM20: ld.global.v4.f32
+; SM35-LABEL: .visible .entry foo16(
+; SM35: ld.global.nc.v4.f32
+define void @foo16(<4 x float> * noalias readonly %from, <4 x float> * %to) {
+ %1 = load <4 x float>, <4 x float> * %from
+ store <4 x float> %1, <4 x float> * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo17(
+; SM20: ld.global.v2.f64
+; SM20: ld.global.v2.f64
+; SM35-LABEL: .visible .entry foo17(
+; SM35: ld.global.nc.v2.f64
+; SM35: ld.global.nc.v2.f64
+define void @foo17(<4 x double> * noalias readonly %from, <4 x double> * %to) {
+ %1 = load <4 x double>, <4 x double> * %from
+ store <4 x double> %1, <4 x double> * %to
+ ret void
+}
+
+; SM20-LABEL: .visible .entry foo18(
+; SM20: ld.global.u64
+; SM35-LABEL: .visible .entry foo18(
+; SM35: ld.global.nc.u64
+define void @foo18(float ** noalias readonly %from, float ** %to) {
+ %1 = load float *, float ** %from
+ store float * %1, float ** %to
+ ret void
+}
+
+; Test that we can infer a cached load for a pointer induction variable.
+; SM20-LABEL: .visible .entry foo19(
+; SM20: ld.global.f32
+; SM35-LABEL: .visible .entry foo19(
+; SM35: ld.global.nc.f32
+define void @foo19(float * noalias readonly %from, float * %to, i32 %n) {
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %nexti, %loop ]
+ %sum = phi float [ 0.0, %entry ], [ %nextsum, %loop ]
+ %ptr = getelementptr inbounds float, float * %from, i32 %i
+ %value = load float, float * %ptr, align 4
+ %nextsum = fadd float %value, %sum
+ %nexti = add nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %n
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ store float %nextsum, float * %to
+ ret void
+}
+
+; This test captures the case of a non-kernel function. In a
+; non-kernel function, without interprocedural analysis, we do not
+; know that the parameter is global. We also do not know that the
+; pointed-to memory is never written to (for the duration of the
+; kernel). For both reasons, we cannot use a cached load here.
+; SM20-LABEL: notkernel(
+; SM20: ld.f32
+; SM35-LABEL: notkernel(
+; SM35: ld.f32
+define void @notkernel(float * noalias readonly %from, float * %to) {
+ %1 = load float, float * %from
+ store float %1, float * %to
+ ret void
+}
+
+; As @notkernel, but with the parameter explicitly marked as global. We still
+; do not know that the parameter is never written to (for the duration of the
+; kernel). This case does not currently come up normally since we do not infer
+; that pointers are global interprocedurally as of 2015-08-05.
+; SM20-LABEL: notkernel2(
+; SM20: ld.global.f32
+; SM35-LABEL: notkernel2(
+; SM35: ld.global.f32
+define void @notkernel2(float addrspace(1) * noalias readonly %from, float * %to) {
+ %1 = load float, float addrspace(1) * %from
+ store float %1, float * %to
+ ret void
+}
+
+!nvvm.annotations = !{!1 ,!2 ,!3 ,!4 ,!5 ,!6, !7 ,!8 ,!9 ,!10 ,!11 ,!12, !13, !14, !15, !16, !17, !18, !19}
+!1 = !{void (float *, float *)* @foo1, !"kernel", i32 1}
+!2 = !{void (double *, double *)* @foo2, !"kernel", i32 1}
+!3 = !{void (i16 *, i16 *)* @foo3, !"kernel", i32 1}
+!4 = !{void (i32 *, i32 *)* @foo4, !"kernel", i32 1}
+!5 = !{void (i64 *, i64 *)* @foo5, !"kernel", i32 1}
+!6 = !{void (i128 *, i128 *)* @foo6, !"kernel", i32 1}
+!7 = !{void (<2 x i8> *, <2 x i8> *)* @foo7, !"kernel", i32 1}
+!8 = !{void (<2 x i16> *, <2 x i16> *)* @foo8, !"kernel", i32 1}
+!9 = !{void (<2 x i32> *, <2 x i32> *)* @foo9, !"kernel", i32 1}
+!10 = !{void (<2 x i64> *, <2 x i64> *)* @foo10, !"kernel", i32 1}
+!11 = !{void (<2 x float> *, <2 x float> *)* @foo11, !"kernel", i32 1}
+!12 = !{void (<2 x double> *, <2 x double> *)* @foo12, !"kernel", i32 1}
+!13 = !{void (<4 x i8> *, <4 x i8> *)* @foo13, !"kernel", i32 1}
+!14 = !{void (<4 x i16> *, <4 x i16> *)* @foo14, !"kernel", i32 1}
+!15 = !{void (<4 x i32> *, <4 x i32> *)* @foo15, !"kernel", i32 1}
+!16 = !{void (<4 x float> *, <4 x float> *)* @foo16, !"kernel", i32 1}
+!17 = !{void (<4 x double> *, <4 x double> *)* @foo17, !"kernel", i32 1}
+!18 = !{void (float **, float **)* @foo18, !"kernel", i32 1}
+!19 = !{void (float *, float *, i32)* @foo19, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/lower-aggr-copies.ll b/test/CodeGen/NVPTX/lower-aggr-copies.ll
index c3adfc4646cf..ef570982b808 100644
--- a/test/CodeGen/NVPTX/lower-aggr-copies.ll
+++ b/test/CodeGen/NVPTX/lower-aggr-copies.ll
@@ -1,35 +1,68 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX
+; RUN: opt < %s -S -nvptx-lower-aggr-copies | FileCheck %s --check-prefix IR
; Verify that the NVPTXLowerAggrCopies pass works as expected - calls to
; llvm.mem* intrinsics get lowered to loops.
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "nvptx64-unknown-unknown"
+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
define i8* @memcpy_caller(i8* %dst, i8* %src, i64 %n) #0 {
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i32 1, i1 false)
ret i8* %dst
-; CHECK-LABEL: .visible .func (.param .b32 func_retval0) memcpy_caller
-; CHECK: LBB[[LABEL:[_0-9]+]]:
-; CHECK: ld.u8 %rs[[REG:[0-9]+]]
-; CHECK: st.u8 [%r{{[0-9]+}}], %rs[[REG]]
-; CHECK: add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
-; CHECK-NEXT: setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
-; CHECK-NEXT: @%p[[PRED]] bra LBB[[LABEL]]
+
+; IR-LABEL: @memcpy_caller
+; IR: loadstoreloop:
+; IR: [[LOADPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64
+; IR-NEXT: [[VAL:%[0-9]+]] = load i8, i8* [[LOADPTR]]
+; IR-NEXT: [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64
+; IR-NEXT: store i8 [[VAL]], i8* [[STOREPTR]]
+
+; PTX-LABEL: .visible .func (.param .b64 func_retval0) memcpy_caller
+; PTX: LBB[[LABEL:[_0-9]+]]:
+; PTX: ld.u8 %rs[[REG:[0-9]+]]
+; PTX: st.u8 [%rd{{[0-9]+}}], %rs[[REG]]
+; PTX: add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
+; PTX-NEXT: setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
+; PTX-NEXT: @%p[[PRED]] bra LBB[[LABEL]]
}
define i8* @memcpy_volatile_caller(i8* %dst, i8* %src, i64 %n) #0 {
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i32 1, i1 true)
ret i8* %dst
-; CHECK-LABEL: .visible .func (.param .b32 func_retval0) memcpy_volatile_caller
-; CHECK: LBB[[LABEL:[_0-9]+]]:
-; CHECK: ld.volatile.u8 %rs[[REG:[0-9]+]]
-; CHECK: st.volatile.u8 [%r{{[0-9]+}}], %rs[[REG]]
-; CHECK: add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
-; CHECK-NEXT: setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
-; CHECK-NEXT: @%p[[PRED]] bra LBB[[LABEL]]
+
+; IR-LABEL: @memcpy_volatile_caller
+; IR: load volatile
+; IR: store volatile
+
+; PTX-LABEL: .visible .func (.param .b64 func_retval0) memcpy_volatile_caller
+; PTX: LBB[[LABEL:[_0-9]+]]:
+; PTX: ld.volatile.u8 %rs[[REG:[0-9]+]]
+; PTX: st.volatile.u8 [%rd{{[0-9]+}}], %rs[[REG]]
+; PTX: add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
+; PTX-NEXT: setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
+; PTX-NEXT: @%p[[PRED]] bra LBB[[LABEL]]
+}
+
+define i8* @memcpy_casting_caller(i32* %dst, i32* %src, i64 %n) #0 {
+entry:
+ %0 = bitcast i32* %dst to i8*
+ %1 = bitcast i32* %src to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 %n, i32 1, i1 false)
+ ret i8* %0
+
+; Check that casts in calls to memcpy are handled properly
+; IR-LABEL: @memcpy_casting_caller
+; IR: [[DSTCAST:%[0-9]+]] = bitcast i32* %dst to i8*
+; IR: [[SRCCAST:%[0-9]+]] = bitcast i32* %src to i8*
+; IR: getelementptr inbounds i8, i8* [[SRCCAST]]
+; IR: getelementptr inbounds i8, i8* [[DSTCAST]]
}
define i8* @memset_caller(i8* %dst, i32 %c, i64 %n) #0 {
@@ -37,11 +70,52 @@ entry:
%0 = trunc i32 %c to i8
tail call void @llvm.memset.p0i8.i64(i8* %dst, i8 %0, i64 %n, i32 1, i1 false)
ret i8* %dst
-; CHECK-LABEL: .visible .func (.param .b32 func_retval0) memset_caller(
-; CHECK: ld.param.u8 %rs[[REG:[0-9]+]]
-; CHECK: LBB[[LABEL:[_0-9]+]]:
-; CHECK: st.u8 [%r{{[0-9]+}}], %rs[[REG]]
-; CHECK: add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
-; CHECK-NEXT: setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
-; CHECK-NEXT: @%p[[PRED]] bra LBB[[LABEL]]
+
+; IR-LABEL: @memset_caller
+; IR: [[VAL:%[0-9]+]] = trunc i32 %c to i8
+; IR: loadstoreloop:
+; IR: [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64
+; IR-NEXT: store i8 [[VAL]], i8* [[STOREPTR]]
+
+; PTX-LABEL: .visible .func (.param .b64 func_retval0) memset_caller(
+; PTX: ld.param.u8 %rs[[REG:[0-9]+]]
+; PTX: LBB[[LABEL:[_0-9]+]]:
+; PTX: st.u8 [%rd{{[0-9]+}}], %rs[[REG]]
+; PTX: add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
+; PTX-NEXT: setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
+; PTX-NEXT: @%p[[PRED]] bra LBB[[LABEL]]
+}
+
+define i8* @memmove_caller(i8* %dst, i8* %src, i64 %n) #0 {
+entry:
+ tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i32 1, i1 false)
+ ret i8* %dst
+
+; IR-LABEL: @memmove_caller
+; IR: icmp ult i8* %src, %dst
+; IR: [[PHIVAL:%[0-9a-zA-Z_]+]] = phi i64
+; IR-NEXT: %index_ptr = sub i64 [[PHIVAL]], 1
+; IR: [[FWDPHIVAL:%[0-9a-zA-Z_]+]] = phi i64
+; IR: {{%[0-9a-zA-Z_]+}} = add i64 [[FWDPHIVAL]], 1
+
+; PTX-LABEL: .visible .func (.param .b64 func_retval0) memmove_caller(
+; PTX: ld.param.u64 %rd[[N:[0-9]+]]
+; PTX: setp.eq.s64 %p[[NEQ0:[0-9]+]], %rd[[N]], 0
+; PTX: setp.ge.u64 %p[[SRC_GT_THAN_DST:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
+; PTX-NEXT: @%p[[SRC_GT_THAN_DST]] bra LBB[[FORWARD_BB:[0-9_]+]]
+; -- this is the backwards copying BB
+; PTX: @%p[[NEQ0]] bra LBB[[EXIT:[0-9_]+]]
+; PTX: add.s64 %rd[[N]], %rd[[N]], -1
+; PTX: ld.u8 %rs[[ELEMENT:[0-9]+]]
+; PTX: st.u8 [%rd{{[0-9]+}}], %rs[[ELEMENT]]
+; -- this is the forwards copying BB
+; PTX: LBB[[FORWARD_BB]]:
+; PTX: @%p[[NEQ0]] bra LBB[[EXIT]]
+; PTX: ld.u8 %rs[[ELEMENT2:[0-9]+]]
+; PTX: st.u8 [%rd{{[0-9]+}}], %rs[[ELEMENT2]]
+; PTX: add.s64 %rd[[INDEX:[0-9]+]], %rd[[INDEX]], 1
+; -- exit block
+; PTX: LBB[[EXIT]]:
+; PTX-NEXT: st.param.b64 [func_retval0
+; PTX-NEXT: ret
}
diff --git a/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll b/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
index 0de72c4a1aed..2fffa3eeac15 100644
--- a/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
+++ b/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-target triple = "nvptx64-unknown-unknown"
+target triple = "nvptx64-nvidia-cuda"
; Verify that both %input and %output are converted to global pointers and then
; addrspacecast'ed back to the original type.
@@ -26,6 +26,22 @@ define void @kernel2(float addrspace(1)* %input, float addrspace(1)* %output) {
ret void
}
-!nvvm.annotations = !{!0, !1}
+%struct.S = type { i32*, i32* }
+
+define void @ptr_in_byval(%struct.S* byval %input, i32* %output) {
+; CHECK-LABEL: .visible .entry ptr_in_byval(
+; CHECK: cvta.to.global.u64
+; CHECK: cvta.to.global.u64
+ %b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
+ %b = load i32*, i32** %b_ptr, align 4
+ %v = load i32, i32* %b, align 4
+; CHECK: ld.global.u32
+ store i32 %v, i32* %output, align 4
+; CHECK: st.global.u32
+ ret void
+}
+
+!nvvm.annotations = !{!0, !1, !2}
!0 = !{void (float*, float*)* @kernel, !"kernel", i32 1}
!1 = !{void (float addrspace(1)*, float addrspace(1)*)* @kernel2, !"kernel", i32 1}
+!2 = !{void (%struct.S*, i32*)* @ptr_in_byval, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/reg-copy.ll b/test/CodeGen/NVPTX/reg-copy.ll
new file mode 100644
index 000000000000..98ee49d39023
--- /dev/null
+++ b/test/CodeGen/NVPTX/reg-copy.ll
@@ -0,0 +1,224 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+define void @PR24303(float* %f) {
+; CHECK-LABEL: .visible .entry PR24303(
+; Do not use mov.f or mov.u to convert between float and int.
+; CHECK-NOT: mov.{{f|u}}{{32|64}} %f{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK-NOT: mov.{{f|u}}{{32|64}} %r{{[0-9]+}}, %f{{[0-9]+}}
+entry:
+ %arrayidx1 = getelementptr inbounds float, float* %f, i64 1
+ %0 = load float, float* %f, align 4
+ %1 = load float, float* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds float, float* %f, i64 2
+ %arrayidx3 = getelementptr inbounds float, float* %f, i64 3
+ %2 = load float, float* %arrayidx2, align 4
+ %3 = load float, float* %arrayidx3, align 4
+ %mul.i = fmul float %0, %2
+ %mul4.i = fmul float %1, %3
+ %mul5.i = fmul float %0, %3
+ %mul6.i = fmul float %1, %2
+ %sub.i = fsub float %mul.i, %mul4.i
+ %4 = bitcast float %sub.i to i32
+ %add.i = fadd float %mul6.i, %mul5.i
+ %5 = bitcast float %add.i to i32
+ %6 = tail call float @llvm.nvvm.fabs.f(float %sub.i) #2
+ %7 = fcmp ugt float %6, 0x7FF0000000000000
+ br i1 %7, label %land.lhs.true.i, label %_ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit
+
+land.lhs.true.i: ; preds = %entry
+ %8 = tail call float @llvm.nvvm.fabs.f(float %add.i) #2
+ %9 = fcmp ugt float %8, 0x7FF0000000000000
+ br i1 %9, label %if.then.i, label %_ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit
+
+if.then.i: ; preds = %land.lhs.true.i
+ %10 = tail call float @llvm.nvvm.fabs.f(float %0) #2
+ %11 = fcmp oeq float %10, 0x7FF0000000000000
+ %.pre.i = tail call float @llvm.nvvm.fabs.f(float %1) #2
+ %12 = fcmp oeq float %.pre.i, 0x7FF0000000000000
+ %or.cond.i = or i1 %11, %12
+ br i1 %or.cond.i, label %if.then.14.i, label %if.end.31.i
+
+if.then.14.i: ; preds = %if.then.i
+ %13 = bitcast float %0 to i32
+ %14 = and i32 %13, -2147483648
+ %15 = select i1 %11, i32 1065353216, i32 0
+ %16 = or i32 %15, %14
+ %17 = bitcast i32 %16 to float
+ %18 = bitcast float %1 to i32
+ %19 = and i32 %18, -2147483648
+ %20 = select i1 %12, i32 1065353216, i32 0
+ %21 = or i32 %20, %19
+ %22 = bitcast i32 %21 to float
+ %23 = tail call float @llvm.nvvm.fabs.f(float %2) #2
+ %24 = fcmp ugt float %23, 0x7FF0000000000000
+ br i1 %24, label %if.then.24.i, label %if.end.i
+
+if.then.24.i: ; preds = %if.then.14.i
+ %25 = bitcast float %2 to i32
+ %26 = and i32 %25, -2147483648
+ %27 = bitcast i32 %26 to float
+ br label %if.end.i
+
+if.end.i: ; preds = %if.then.24.i, %if.then.14.i
+ %__c.0.i = phi float [ %27, %if.then.24.i ], [ %2, %if.then.14.i ]
+ %28 = tail call float @llvm.nvvm.fabs.f(float %3) #2
+ %29 = fcmp ugt float %28, 0x7FF0000000000000
+ br i1 %29, label %if.then.28.i, label %if.end.31.i
+
+if.then.28.i: ; preds = %if.end.i
+ %30 = bitcast float %3 to i32
+ %31 = and i32 %30, -2147483648
+ %32 = bitcast i32 %31 to float
+ br label %if.end.31.i
+
+if.end.31.i: ; preds = %if.then.28.i, %if.end.i, %if.then.i
+ %__d.1.i = phi float [ %32, %if.then.28.i ], [ %3, %if.end.i ], [ %3, %if.then.i ]
+ %__c.1.i = phi float [ %__c.0.i, %if.then.28.i ], [ %__c.0.i, %if.end.i ], [ %2, %if.then.i ]
+ %__b.0.i = phi float [ %22, %if.then.28.i ], [ %22, %if.end.i ], [ %1, %if.then.i ]
+ %__a.0.i = phi float [ %17, %if.then.28.i ], [ %17, %if.end.i ], [ %0, %if.then.i ]
+ %__recalc.0.off0.i = phi i1 [ true, %if.then.28.i ], [ true, %if.end.i ], [ false, %if.then.i ]
+ %33 = tail call float @llvm.nvvm.fabs.f(float %__c.1.i) #2
+ %34 = fcmp oeq float %33, 0x7FF0000000000000
+ %.pre6.i = tail call float @llvm.nvvm.fabs.f(float %__d.1.i) #2
+ %35 = fcmp oeq float %.pre6.i, 0x7FF0000000000000
+ %or.cond8.i = or i1 %34, %35
+ br i1 %or.cond8.i, label %if.then.37.i, label %if.end.56.i
+
+if.then.37.i: ; preds = %if.end.31.i
+ %36 = bitcast float %__c.1.i to i32
+ %37 = and i32 %36, -2147483648
+ %38 = select i1 %34, i32 1065353216, i32 0
+ %39 = or i32 %38, %37
+ %40 = bitcast i32 %39 to float
+ %41 = bitcast float %__d.1.i to i32
+ %42 = and i32 %41, -2147483648
+ %43 = select i1 %35, i32 1065353216, i32 0
+ %44 = or i32 %43, %42
+ %45 = bitcast i32 %44 to float
+ %46 = tail call float @llvm.nvvm.fabs.f(float %__a.0.i) #2
+ %47 = fcmp ugt float %46, 0x7FF0000000000000
+ br i1 %47, label %if.then.48.i, label %if.end.50.i
+
+if.then.48.i: ; preds = %if.then.37.i
+ %48 = bitcast float %__a.0.i to i32
+ %49 = and i32 %48, -2147483648
+ %50 = bitcast i32 %49 to float
+ br label %if.end.50.i
+
+if.end.50.i: ; preds = %if.then.48.i, %if.then.37.i
+ %__a.1.i = phi float [ %50, %if.then.48.i ], [ %__a.0.i, %if.then.37.i ]
+ %51 = tail call float @llvm.nvvm.fabs.f(float %__b.0.i) #2
+ %52 = fcmp ugt float %51, 0x7FF0000000000000
+ br i1 %52, label %if.then.53.i, label %if.then.93.i
+
+if.then.53.i: ; preds = %if.end.50.i
+ %53 = bitcast float %__b.0.i to i32
+ %54 = and i32 %53, -2147483648
+ %55 = bitcast i32 %54 to float
+ br label %if.then.93.i
+
+if.end.56.i: ; preds = %if.end.31.i
+ br i1 %__recalc.0.off0.i, label %if.then.93.i, label %land.lhs.true.58.i
+
+land.lhs.true.58.i: ; preds = %if.end.56.i
+ %56 = tail call float @llvm.nvvm.fabs.f(float %mul.i) #2
+ %57 = fcmp oeq float %56, 0x7FF0000000000000
+ br i1 %57, label %if.then.70.i, label %lor.lhs.false.61.i
+
+lor.lhs.false.61.i: ; preds = %land.lhs.true.58.i
+ %58 = tail call float @llvm.nvvm.fabs.f(float %mul4.i) #2
+ %59 = fcmp oeq float %58, 0x7FF0000000000000
+ br i1 %59, label %if.then.70.i, label %lor.lhs.false.64.i
+
+lor.lhs.false.64.i: ; preds = %lor.lhs.false.61.i
+ %60 = tail call float @llvm.nvvm.fabs.f(float %mul5.i) #2
+ %61 = fcmp oeq float %60, 0x7FF0000000000000
+ br i1 %61, label %if.then.70.i, label %lor.lhs.false.67.i
+
+lor.lhs.false.67.i: ; preds = %lor.lhs.false.64.i
+ %62 = tail call float @llvm.nvvm.fabs.f(float %mul6.i) #2
+ %63 = fcmp oeq float %62, 0x7FF0000000000000
+ br i1 %63, label %if.then.70.i, label %_ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit
+
+if.then.70.i: ; preds = %lor.lhs.false.67.i, %lor.lhs.false.64.i, %lor.lhs.false.61.i, %land.lhs.true.58.i
+ %64 = tail call float @llvm.nvvm.fabs.f(float %__a.0.i) #2
+ %65 = fcmp ugt float %64, 0x7FF0000000000000
+ br i1 %65, label %if.then.73.i, label %if.end.75.i
+
+if.then.73.i: ; preds = %if.then.70.i
+ %66 = bitcast float %__a.0.i to i32
+ %67 = and i32 %66, -2147483648
+ %68 = bitcast i32 %67 to float
+ br label %if.end.75.i
+
+if.end.75.i: ; preds = %if.then.73.i, %if.then.70.i
+ %__a.3.i = phi float [ %68, %if.then.73.i ], [ %__a.0.i, %if.then.70.i ]
+ %69 = tail call float @llvm.nvvm.fabs.f(float %__b.0.i) #2
+ %70 = fcmp ugt float %69, 0x7FF0000000000000
+ br i1 %70, label %if.then.78.i, label %if.end.80.i
+
+if.then.78.i: ; preds = %if.end.75.i
+ %71 = bitcast float %__b.0.i to i32
+ %72 = and i32 %71, -2147483648
+ %73 = bitcast i32 %72 to float
+ br label %if.end.80.i
+
+if.end.80.i: ; preds = %if.then.78.i, %if.end.75.i
+ %__b.3.i = phi float [ %73, %if.then.78.i ], [ %__b.0.i, %if.end.75.i ]
+ %74 = fcmp ugt float %33, 0x7FF0000000000000
+ br i1 %74, label %if.then.83.i, label %if.end.85.i
+
+if.then.83.i: ; preds = %if.end.80.i
+ %75 = bitcast float %__c.1.i to i32
+ %76 = and i32 %75, -2147483648
+ %77 = bitcast i32 %76 to float
+ br label %if.end.85.i
+
+if.end.85.i: ; preds = %if.then.83.i, %if.end.80.i
+ %__c.3.i = phi float [ %77, %if.then.83.i ], [ %__c.1.i, %if.end.80.i ]
+ %78 = fcmp ugt float %.pre6.i, 0x7FF0000000000000
+ br i1 %78, label %if.then.88.i, label %if.then.93.i
+
+if.then.88.i: ; preds = %if.end.85.i
+ %79 = bitcast float %__d.1.i to i32
+ %80 = and i32 %79, -2147483648
+ %81 = bitcast i32 %80 to float
+ br label %if.then.93.i
+
+if.then.93.i: ; preds = %if.then.88.i, %if.end.85.i, %if.end.56.i, %if.then.53.i, %if.end.50.i
+ %__d.4.ph.i = phi float [ %__d.1.i, %if.end.85.i ], [ %81, %if.then.88.i ], [ %__d.1.i, %if.end.56.i ], [ %45, %if.end.50.i ], [ %45, %if.then.53.i ]
+ %__c.4.ph.i = phi float [ %__c.3.i, %if.end.85.i ], [ %__c.3.i, %if.then.88.i ], [ %__c.1.i, %if.end.56.i ], [ %40, %if.end.50.i ], [ %40, %if.then.53.i ]
+ %__b.4.ph.i = phi float [ %__b.3.i, %if.end.85.i ], [ %__b.3.i, %if.then.88.i ], [ %__b.0.i, %if.end.56.i ], [ %__b.0.i, %if.end.50.i ], [ %55, %if.then.53.i ]
+ %__a.4.ph.i = phi float [ %__a.3.i, %if.end.85.i ], [ %__a.3.i, %if.then.88.i ], [ %__a.0.i, %if.end.56.i ], [ %__a.1.i, %if.end.50.i ], [ %__a.1.i, %if.then.53.i ]
+ %mul95.i = fmul float %__c.4.ph.i, %__a.4.ph.i
+ %mul96.i = fmul float %__d.4.ph.i, %__b.4.ph.i
+ %sub97.i = fsub float %mul95.i, %mul96.i
+ %mul98.i = fmul float %sub97.i, 0x7FF0000000000000
+ %82 = bitcast float %mul98.i to i32
+ %mul100.i = fmul float %__d.4.ph.i, %__a.4.ph.i
+ %mul101.i = fmul float %__c.4.ph.i, %__b.4.ph.i
+ %add102.i = fadd float %mul101.i, %mul100.i
+ %mul103.i = fmul float %add102.i, 0x7FF0000000000000
+ %83 = bitcast float %mul103.i to i32
+ br label %_ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit
+
+_ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit: ; preds = %if.then.93.i, %lor.lhs.false.67.i, %land.lhs.true.i, %entry
+ %84 = phi i32 [ %4, %land.lhs.true.i ], [ %4, %entry ], [ %82, %if.then.93.i ], [ %4, %lor.lhs.false.67.i ]
+ %85 = phi i32 [ %5, %land.lhs.true.i ], [ %5, %entry ], [ %83, %if.then.93.i ], [ %5, %lor.lhs.false.67.i ]
+ %arrayidx5 = getelementptr inbounds float, float* %f, i64 5
+ %86 = bitcast float* %arrayidx5 to i32*
+ store i32 %84, i32* %86, align 4
+ %arrayidx7 = getelementptr inbounds float, float* %f, i64 6
+ %87 = bitcast float* %arrayidx7 to i32*
+ store i32 %85, i32* %87, align 4
+ ret void
+}
+
+declare float @llvm.nvvm.fabs.f(float)
+
+!nvvm.annotations = !{!0}
+
+!0 = !{void (float*)* @PR24303, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/symbol-naming.ll b/test/CodeGen/NVPTX/symbol-naming.ll
index 0f176934ca39..7a3e6310ffdf 100644
--- a/test/CodeGen/NVPTX/symbol-naming.ll
+++ b/test/CodeGen/NVPTX/symbol-naming.ll
@@ -7,10 +7,10 @@
; PTX32-NOT: .str
; PTX64-NOT: .str
-; PTX32-DAG: _$_str1
+; PTX32-DAG: _$_str.1
; PTX32-DAG: _$_str
-; PTX64-DAG: _$_str1
+; PTX64-DAG: _$_str.1
; PTX64-DAG: _$_str
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
diff --git a/test/CodeGen/NVPTX/vector-call.ll b/test/CodeGen/NVPTX/vector-call.ll
index a03d7fd41914..968d1d4a5f51 100644
--- a/test/CodeGen/NVPTX/vector-call.ll
+++ b/test/CodeGen/NVPTX/vector-call.ll
@@ -4,7 +4,7 @@ target triple = "nvptx-unknown-cuda"
declare void @bar(<4 x i32>)
-; CHECK-LABEL @foo
+; CHECK-LABEL: @foo
define void @foo(<4 x i32> %a) {
; CHECK: st.param.v4.b32
tail call void @bar(<4 x i32> %a)
diff --git a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
index fde330321aa4..d20e3b05c091 100644
--- a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
+++ b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s
+; REQUIRES: default_triple
define void @iterative_hash_host_wide_int() {
%zero = alloca i32 ; <i32*> [#uses=2]
diff --git a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
index c63fd9ae1700..3d5fa52d0abd 100644
--- a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
+++ b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s
+; REQUIRES: default_triple
%struct..0anon = type { i32 }
%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
diff --git a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
index 0e7709857406..c064c273173f 100644
--- a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
+++ b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -march=ppc64
; RUN: llc < %s -march=ppc32
; RUN: llc < %s
+; REQUIRES: default_triple
define void @bitap() {
entry:
diff --git a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
index 9660d450cb4c..8536dda0a9ba 100644
--- a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
+++ b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -march=ppc64
; RUN: llc < %s -march=ppc32
; RUN: llc < %s
+; REQUIRES: default_triple
@qsz.b = external global i1 ; <i1*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
index 4830ca60f9ff..aa39dfd03748 100644
--- a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
+++ b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -march=ppc32 -mcpu=g3
; RUN: llc < %s -march=ppc32 -mcpu=g5
; PR1811
+; REQUIRES: default_triple
define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>*
%CONST) {
diff --git a/test/CodeGen/PowerPC/BoolRetToIntTest.ll b/test/CodeGen/PowerPC/BoolRetToIntTest.ll
new file mode 100644
index 000000000000..a7b79789b4ca
--- /dev/null
+++ b/test/CodeGen/PowerPC/BoolRetToIntTest.ll
@@ -0,0 +1,203 @@
+; RUN: opt -bool-ret-to-int -S -o - < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; CHECK-LABEL: notBoolRet
+define signext i32 @notBoolRet() {
+entry:
+; CHECK: ret i32 1
+ ret i32 1
+}
+
+; CHECK-LABEL: find
+define zeroext i1 @find(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp) {
+entry:
+ %cmp.4 = icmp eq i8** %begin, %end
+ br i1 %cmp.4, label %cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond: ; preds = %for.body
+ %cmp = icmp eq i8** %incdec.ptr, %end
+ br i1 %cmp, label %cleanup.loopexit, label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.cond
+ %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ]
+ %0 = load i8*, i8** %curr.05, align 8
+ %call = tail call zeroext i1 %hasProp(i8* %0)
+ %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1
+ br i1 %call, label %cleanup.loopexit, label %for.cond
+
+cleanup.loopexit: ; preds = %for.body, %for.cond
+; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+ %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
+ br label %cleanup
+
+cleanup: ; preds = %cleanup.loopexit, %entry
+; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+ %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
+; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: ret i1 [[REG]]
+ ret i1 %cleanup.dest.slot.0
+}
+
+; CHECK-LABEL: retFalse
+define zeroext i1 @retFalse() {
+entry:
+; CHECK: ret i1 false
+ ret i1 false
+}
+
+; CHECK-LABEL: retCvtFalse
+define zeroext i1 @retCvtFalse() {
+entry:
+; CHECK: ret i1 false
+ ret i1 trunc(i32 0 to i1)
+}
+
+; CHECK-LABEL: find_cont
+define void @find_cont(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) {
+entry:
+ %cmp.4 = icmp eq i8** %begin, %end
+ br i1 %cmp.4, label %cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond: ; preds = %for.body
+ %cmp = icmp eq i8** %incdec.ptr, %end
+ br i1 %cmp, label %cleanup.loopexit, label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.cond
+ %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ]
+ %0 = load i8*, i8** %curr.05, align 8
+ %call = tail call zeroext i1 %hasProp(i8* %0)
+ %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1
+ br i1 %call, label %cleanup.loopexit, label %for.cond
+
+cleanup.loopexit: ; preds = %for.body, %for.cond
+; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+ %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
+ br label %cleanup
+
+cleanup: ; preds = %cleanup.loopexit, %entry
+; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+ %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
+; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: call void %cont(i1 [[REG]]
+ tail call void %cont(i1 %cleanup.dest.slot.0)
+ ret void
+}
+
+; CHECK-LABEL: find_cont_ret
+define zeroext i1 @find_cont_ret(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) {
+entry:
+ %cmp.4 = icmp eq i8** %begin, %end
+ br i1 %cmp.4, label %cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond: ; preds = %for.body
+ %cmp = icmp eq i8** %incdec.ptr, %end
+ br i1 %cmp, label %cleanup.loopexit, label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.cond
+ %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ]
+ %0 = load i8*, i8** %curr.05, align 8
+ %call = tail call zeroext i1 %hasProp(i8* %0)
+ %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1
+ br i1 %call, label %cleanup.loopexit, label %for.cond
+
+cleanup.loopexit: ; preds = %for.body, %for.cond
+; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+ %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
+ br label %cleanup
+
+cleanup: ; preds = %cleanup.loopexit, %entry
+; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+ %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
+; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: call void %cont(i1 [[REG]]
+ tail call void %cont(i1 %cleanup.dest.slot.0)
+; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: ret i1 [[REG]]
+ ret i1 %cleanup.dest.slot.0
+}
+
+; CHECK-LABEL: arg_operand
+define zeroext i1 @arg_operand(i1 %operand) {
+entry:
+ br i1 %operand, label %foo, label %cleanup
+
+foo:
+ br label %cleanup
+
+cleanup:
+; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: ret i1 [[REG]]
+ %result = phi i1 [ false, %foo ], [ %operand, %entry ]
+ ret i1 %result
+}
+
+; CHECK-LABEL: bad_use
+define zeroext i1 @bad_use(i1 %operand) {
+entry:
+ br i1 %operand, label %foo, label %cleanup
+
+foo:
+ br label %cleanup
+
+cleanup:
+; CHECK: [[REG:%.+]] = phi i1
+; CHECK: ret i1 [[REG]]
+ %result = phi i1 [ false, %foo], [ true, %entry ]
+ %0 = icmp eq i1 %result, %operand
+ ret i1 %result
+}
+
+; CHECK-LABEL: bad_use_closure
+define zeroext i1 @bad_use_closure(i1 %operand) {
+entry:
+ br i1 %operand, label %foo, label %cleanup
+
+foo:
+ %bar = phi i1 [ false, %entry ]
+ %0 = icmp eq i1 %bar, %operand
+ br label %cleanup
+
+cleanup:
+; CHECK: [[REG:%.+]] = phi i1 [ true
+; CHECK: ret i1 [[REG]]
+ %result = phi i1 [ true, %entry ], [ %bar, %foo]
+ ret i1 %result
+}
+
+; CHECK-LABEL: arg_test
+define zeroext i1 @arg_test(i1 %operand) {
+entry:
+ br i1 %operand, label %foo, label %cleanup
+
+foo:
+ %bar = phi i1 [ false, %entry ]
+ br label %cleanup
+
+; CHECK-LABEL: cleanup
+cleanup:
+; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: ret i1 [[REG]]
+ %result = phi i1 [ %bar, %foo], [ %operand, %entry ]
+ ret i1 %result
+}
+
+declare zeroext i1 @return_i1()
+
+; CHECK-LABEL: call_test
+define zeroext i1 @call_test() {
+; CHECK: [[REG:%.+]] = call i1
+ %result = call i1 @return_i1()
+; CHECK: ret i1 [[REG]]
+ ret i1 %result
+} \ No newline at end of file
diff --git a/test/CodeGen/PowerPC/BreakableToken-reduced.ll b/test/CodeGen/PowerPC/BreakableToken-reduced.ll
new file mode 100644
index 000000000000..2077dbb820f7
--- /dev/null
+++ b/test/CodeGen/PowerPC/BreakableToken-reduced.ll
@@ -0,0 +1,335 @@
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -enable-shrink-wrap=true %s -o - | FileCheck %s --check-prefix=CHECK
+;
+; Test the use of a non-R0 register to save/restore the LR in function
+; prologue/epilogue.
+; This problem can occur as a result of shrink wrapping, where the function
+; prologue and epilogue are moved from the beginning/ending of the function. If
+; register R0 is used before the prologue/epilogue blocks, then it cannot be
+; used to save/restore the LR.
+;
+; TODO: Convert this to an MIR test once the infrastructure can support it.
+; To convert this to an MIR pass, generate MIR after register allocation
+; but before shrink wrapping and verify that has been used in the body of
+; the function. This can be done with something like:
+; llc -stop-after stack-slot-coloring BreakableToken-reduced.ll > BreakableToken-reduced.mir
+;
+; The resulting MIR file can then be used as input to llc, and only run
+; shrink wrapping and Prologue/Epilogue insertion on it. For example:
+; llc -start-after stack-slot-coloring -stop-after prologepilog BreakableToken-reduced.mir
+;
+; Verify in the resulting code that R0 is not used in the prologue/epilogue.
+;
+; This currently cannot be done because the PrologEpilogInserter pass has
+; a dependency on the TargetPassConfig and StackProtector classes, which
+; are currently not serialized when generating the MIR.
+;
+
+; ModuleID = 'BreakableToken.cpp'
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+%"class.clang::format::BreakableStringLiteral" = type { %"class.clang::format::BreakableSingleLineToken" }
+%"class.clang::format::BreakableSingleLineToken" = type { %"class.clang::format::BreakableToken", i32, %"class.llvm::StringRef", %"class.llvm::StringRef", %"class.llvm::StringRef" }
+%"class.clang::format::BreakableToken" = type { i32 (...)**, %"struct.clang::format::FormatToken"*, i32, i8, i32, %"struct.clang::format::FormatStyle"* }
+%"class.llvm::StringRef" = type { i8*, i64 }
+%"struct.clang::format::FormatToken" = type <{ %"class.clang::Token", i32, i8, [3 x i8], %"class.clang::SourceRange", i32, i32, i32, i8, i8, i8, i8, %"class.llvm::StringRef", i8, [3 x i8], i32, i32, i32, i8, i8, [2 x i8], i32, i32, i16, [2 x i8], %"class.std::unique_ptr", i32, i32, i32, i32, i32, i32, i32, i32, %"class.llvm::SmallVector", i32, i8, i8, [2 x i8], i32, i8, i8, [2 x i8], %"struct.clang::format::FormatToken"*, %"struct.clang::format::FormatToken"*, %"struct.clang::format::FormatToken"*, %"class.llvm::SmallVector.6", i32, i8, [3 x i8] }>
+%"class.clang::Token" = type <{ i32, i32, i8*, i16, i16, [4 x i8] }>
+%"class.clang::SourceRange" = type { %"class.clang::SourceLocation", %"class.clang::SourceLocation" }
+%"class.clang::SourceLocation" = type { i32 }
+%"class.std::unique_ptr" = type { %"class.std::tuple" }
+%"class.std::tuple" = type { %"struct.std::_Tuple_impl" }
+%"struct.std::_Tuple_impl" = type { %"struct.std::_Head_base.2" }
+%"struct.std::_Head_base.2" = type { %"class.clang::format::TokenRole"* }
+%"class.clang::format::TokenRole" = type { i32 (...)**, %"struct.clang::format::FormatStyle"* }
+%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl.base", %"struct.llvm::SmallVectorStorage" }
+%"class.llvm::SmallVectorImpl.base" = type { %"class.llvm::SmallVectorTemplateBase.base" }
+%"class.llvm::SmallVectorTemplateBase.base" = type { %"class.llvm::SmallVectorTemplateCommon.base" }
+%"class.llvm::SmallVectorTemplateCommon.base" = type <{ %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion" }>
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8* }
+%"struct.llvm::AlignedCharArrayUnion" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::AlignedCharArray" = type { [4 x i8] }
+%"struct.llvm::SmallVectorStorage" = type { [3 x %"struct.llvm::AlignedCharArrayUnion"] }
+%"class.llvm::SmallVector.6" = type <{ %"class.llvm::SmallVectorImpl.7", %"struct.llvm::SmallVectorStorage.12", [7 x i8] }>
+%"class.llvm::SmallVectorImpl.7" = type { %"class.llvm::SmallVectorTemplateBase.8" }
+%"class.llvm::SmallVectorTemplateBase.8" = type { %"class.llvm::SmallVectorTemplateCommon.9" }
+%"class.llvm::SmallVectorTemplateCommon.9" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.10" }
+%"struct.llvm::AlignedCharArrayUnion.10" = type { %"struct.llvm::AlignedCharArray.11" }
+%"struct.llvm::AlignedCharArray.11" = type { [8 x i8] }
+%"struct.llvm::SmallVectorStorage.12" = type { i8 }
+%"struct.clang::format::FormatStyle" = type { i32, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i32, i8, i8, i8, i8, i32, i32, i8, i8, i32, %"class.std::basic_string", i8, i32, i32, i8, i8, i8, i8, %"class.std::vector", i8, i32, i8, i8, i32, %"class.std::basic_string", %"class.std::basic_string", i32, i32, i32, i8, i8, i32, i32, i32, i32, i32, i32, i32, i8, i8, i32, i8, i32, i8, i8, i8, i8, i8, i32, i32, i32 }
+%"class.std::vector" = type { %"struct.std::_Vector_base" }
+%"struct.std::_Vector_base" = type { %"struct.std::_Vector_base<std::basic_string<char>, std::allocator<std::basic_string<char> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::basic_string<char>, std::allocator<std::basic_string<char> > >::_Vector_impl" = type { %"class.std::basic_string"*, %"class.std::basic_string"*, %"class.std::basic_string"* }
+%"class.std::basic_string" = type { %"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" }
+%"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+%"struct.llvm::AlignedCharArray.52" = type { [16 x i8] }
+%"class.clang::format::WhitespaceManager" = type <{ %"class.llvm::SmallVector.13", %"class.clang::SourceManager"*, %"class.std::set", %"struct.clang::format::FormatStyle"*, i8, [7 x i8] }>
+%"class.llvm::SmallVector.13" = type { %"class.llvm::SmallVectorImpl.14", %"struct.llvm::SmallVectorStorage.19" }
+%"class.llvm::SmallVectorImpl.14" = type { %"class.llvm::SmallVectorTemplateBase.15" }
+%"class.llvm::SmallVectorTemplateBase.15" = type { %"class.llvm::SmallVectorTemplateCommon.16" }
+%"class.llvm::SmallVectorTemplateCommon.16" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.17" }
+%"struct.llvm::AlignedCharArrayUnion.17" = type { %"struct.llvm::AlignedCharArray.18" }
+%"struct.llvm::AlignedCharArray.18" = type { [88 x i8] }
+%"struct.llvm::SmallVectorStorage.19" = type { [15 x %"struct.llvm::AlignedCharArrayUnion.17"] }
+%"class.clang::SourceManager" = type { %"class.llvm::RefCountedBase", %"class.clang::DiagnosticsEngine"*, %"class.clang::FileManager"*, %"class.llvm::BumpPtrAllocatorImpl", %"class.llvm::DenseMap.65", i8, i8, %"class.std::unique_ptr.78", %"class.std::vector.94", %"class.llvm::SmallVector.99", %"class.llvm::SmallVector.99", i32, i32, %"class.std::vector.107", %"class.clang::ExternalSLocEntrySource"*, %"class.clang::FileID", %"class.clang::LineTableInfo"*, %"class.clang::FileID", %"class.clang::SrcMgr::ContentCache"*, i32, i32, %"class.clang::FileID", %"class.clang::FileID", i32, i32, %"class.llvm::DenseMap.111", %"class.llvm::DenseMap.115", %"class.clang::InBeforeInTUCacheEntry", %"class.std::unique_ptr.119", %"class.std::unique_ptr.127", %"class.llvm::DenseMap.135", %"class.llvm::SmallVector.139" }
+%"class.llvm::RefCountedBase" = type { i32 }
+%"class.clang::DiagnosticsEngine" = type opaque
+%"class.clang::FileManager" = type { %"class.llvm::RefCountedBase.20", %"class.llvm::IntrusiveRefCntPtr", %"class.clang::FileSystemOptions", %"class.std::map", %"class.std::map.24", %"class.llvm::SmallVector.29", %"class.llvm::SmallVector.35", %"class.llvm::StringMap", %"class.llvm::StringMap.56", %"class.llvm::DenseMap", %"class.llvm::BumpPtrAllocatorImpl", i32, i32, i32, i32, i32, %"class.std::unique_ptr.57" }
+%"class.llvm::RefCountedBase.20" = type { i32 }
+%"class.llvm::IntrusiveRefCntPtr" = type { %"class.clang::vfs::FileSystem"* }
+%"class.clang::vfs::FileSystem" = type <{ i32 (...)**, %"class.llvm::ThreadSafeRefCountedBase", [4 x i8] }>
+%"class.llvm::ThreadSafeRefCountedBase" = type { %"struct.std::atomic" }
+%"struct.std::atomic" = type { %"struct.std::__atomic_base" }
+%"struct.std::__atomic_base" = type { i32 }
+%"class.clang::FileSystemOptions" = type { %"class.std::basic_string" }
+%"class.std::map" = type { %"class.std::_Rb_tree" }
+%"class.std::_Rb_tree" = type { %"struct.std::_Rb_tree<llvm::sys::fs::UniqueID, std::pair<const llvm::sys::fs::UniqueID, clang::DirectoryEntry>, std::_Select1st<std::pair<const llvm::sys::fs::UniqueID, clang::DirectoryEntry> >, std::less<llvm::sys::fs::UniqueID>, std::allocator<std::pair<const llvm::sys::fs::UniqueID, clang::DirectoryEntry> > >::_Rb_tree_impl" }
+%"struct.std::_Rb_tree<llvm::sys::fs::UniqueID, std::pair<const llvm::sys::fs::UniqueID, clang::DirectoryEntry>, std::_Select1st<std::pair<const llvm::sys::fs::UniqueID, clang::DirectoryEntry> >, std::less<llvm::sys::fs::UniqueID>, std::allocator<std::pair<const llvm::sys::fs::UniqueID, clang::DirectoryEntry> > >::_Rb_tree_impl" = type { %"struct.std::less", %"struct.std::_Rb_tree_node_base", i64 }
+%"struct.std::less" = type { i8 }
+%"struct.std::_Rb_tree_node_base" = type { i32, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }
+%"class.std::map.24" = type { %"class.std::_Rb_tree.25" }
+%"class.std::_Rb_tree.25" = type { %"struct.std::_Rb_tree<llvm::sys::fs::UniqueID, std::pair<const llvm::sys::fs::UniqueID, clang::FileEntry>, std::_Select1st<std::pair<const llvm::sys::fs::UniqueID, clang::FileEntry> >, std::less<llvm::sys::fs::UniqueID>, std::allocator<std::pair<const llvm::sys::fs::UniqueID, clang::FileEntry> > >::_Rb_tree_impl" }
+%"struct.std::_Rb_tree<llvm::sys::fs::UniqueID, std::pair<const llvm::sys::fs::UniqueID, clang::FileEntry>, std::_Select1st<std::pair<const llvm::sys::fs::UniqueID, clang::FileEntry> >, std::less<llvm::sys::fs::UniqueID>, std::allocator<std::pair<const llvm::sys::fs::UniqueID, clang::FileEntry> > >::_Rb_tree_impl" = type { %"struct.std::less", %"struct.std::_Rb_tree_node_base", i64 }
+%"class.llvm::SmallVector.29" = type { %"class.llvm::SmallVectorImpl.30", %"struct.llvm::SmallVectorStorage.34" }
+%"class.llvm::SmallVectorImpl.30" = type { %"class.llvm::SmallVectorTemplateBase.31" }
+%"class.llvm::SmallVectorTemplateBase.31" = type { %"class.llvm::SmallVectorTemplateCommon.32" }
+%"class.llvm::SmallVectorTemplateCommon.32" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.33" }
+%"struct.llvm::AlignedCharArrayUnion.33" = type { %"struct.llvm::AlignedCharArray.11" }
+%"struct.llvm::SmallVectorStorage.34" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.33"] }
+%"class.llvm::SmallVector.35" = type { %"class.llvm::SmallVectorImpl.36", %"struct.llvm::SmallVectorStorage.40" }
+%"class.llvm::SmallVectorImpl.36" = type { %"class.llvm::SmallVectorTemplateBase.37" }
+%"class.llvm::SmallVectorTemplateBase.37" = type { %"class.llvm::SmallVectorTemplateCommon.38" }
+%"class.llvm::SmallVectorTemplateCommon.38" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.39" }
+%"struct.llvm::AlignedCharArrayUnion.39" = type { %"struct.llvm::AlignedCharArray.11" }
+%"struct.llvm::SmallVectorStorage.40" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.39"] }
+%"class.llvm::StringMap" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocatorImpl" }
+%"class.llvm::StringMapImpl" = type { %"class.llvm::StringMapEntryBase"**, i32, i32, i32, i32 }
+%"class.llvm::StringMapEntryBase" = type { i32 }
+%"class.llvm::StringMap.56" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocatorImpl" }
+%"class.llvm::DenseMap" = type <{ %"struct.llvm::detail::DenseMapPair"*, i32, i32, i32, [4 x i8] }>
+%"struct.llvm::detail::DenseMapPair" = type opaque
+%"class.std::unique_ptr.57" = type { %"class.std::tuple.58" }
+%"class.std::tuple.58" = type { %"struct.std::_Tuple_impl.59" }
+%"struct.std::_Tuple_impl.59" = type { %"struct.std::_Head_base.64" }
+%"struct.std::_Head_base.64" = type { %"class.clang::FileSystemStatCache"* }
+%"class.clang::FileSystemStatCache" = type opaque
+%"class.llvm::BumpPtrAllocatorImpl" = type <{ i8*, i8*, %"class.llvm::SmallVector.41", %"class.llvm::SmallVector.47", i64, %"class.llvm::MallocAllocator", [7 x i8] }>
+%"class.llvm::SmallVector.41" = type { %"class.llvm::SmallVectorImpl.42", %"struct.llvm::SmallVectorStorage.46" }
+%"class.llvm::SmallVectorImpl.42" = type { %"class.llvm::SmallVectorTemplateBase.43" }
+%"class.llvm::SmallVectorTemplateBase.43" = type { %"class.llvm::SmallVectorTemplateCommon.44" }
+%"class.llvm::SmallVectorTemplateCommon.44" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.45" }
+%"struct.llvm::AlignedCharArrayUnion.45" = type { %"struct.llvm::AlignedCharArray.11" }
+%"struct.llvm::SmallVectorStorage.46" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.45"] }
+%"class.llvm::SmallVector.47" = type <{ %"class.llvm::SmallVectorImpl.48", %"struct.llvm::SmallVectorStorage.53", [7 x i8] }>
+%"class.llvm::SmallVectorImpl.48" = type { %"class.llvm::SmallVectorTemplateBase.49" }
+%"class.llvm::SmallVectorTemplateBase.49" = type { %"class.llvm::SmallVectorTemplateCommon.50" }
+%"class.llvm::SmallVectorTemplateCommon.50" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.51" }
+%"struct.llvm::AlignedCharArrayUnion.51" = type { %"struct.llvm::AlignedCharArray.52" }
+%"struct.llvm::SmallVectorStorage.53" = type { i8 }
+%"class.llvm::MallocAllocator" = type { i8 }
+%"class.llvm::DenseMap.65" = type <{ %"struct.llvm::detail::DenseMapPair.67"*, i32, i32, i32, [4 x i8] }>
+%"struct.llvm::detail::DenseMapPair.67" = type { %"struct.std::pair.68" }
+%"struct.std::pair.68" = type { %"class.clang::FileEntry"*, %"class.clang::SrcMgr::ContentCache"* }
+%"class.clang::FileEntry" = type { i8*, i64, i64, %"class.clang::DirectoryEntry"*, i32, %"class.llvm::sys::fs::UniqueID", i8, i8, i8, %"class.std::unique_ptr.69" }
+%"class.clang::DirectoryEntry" = type { i8* }
+%"class.llvm::sys::fs::UniqueID" = type { i64, i64 }
+%"class.std::unique_ptr.69" = type { %"class.std::tuple.70" }
+%"class.std::tuple.70" = type { %"struct.std::_Tuple_impl.71" }
+%"struct.std::_Tuple_impl.71" = type { %"struct.std::_Head_base.76" }
+%"struct.std::_Head_base.76" = type { %"class.clang::vfs::File"* }
+%"class.clang::vfs::File" = type { i32 (...)** }
+%"class.std::unique_ptr.78" = type { %"class.std::tuple.79" }
+%"class.std::tuple.79" = type { %"struct.std::_Tuple_impl.80" }
+%"struct.std::_Tuple_impl.80" = type { %"struct.std::_Head_base.85" }
+%"struct.std::_Head_base.85" = type { %"struct.clang::SourceManager::OverriddenFilesInfoTy"* }
+%"struct.clang::SourceManager::OverriddenFilesInfoTy" = type { %"class.llvm::DenseMap.86", %"class.llvm::DenseSet" }
+%"class.llvm::DenseMap.86" = type <{ %"struct.llvm::detail::DenseMapPair.88"*, i32, i32, i32, [4 x i8] }>
+%"struct.llvm::detail::DenseMapPair.88" = type { %"struct.std::pair.89" }
+%"struct.std::pair.89" = type { %"class.clang::FileEntry"*, %"class.clang::FileEntry"* }
+%"class.llvm::DenseSet" = type { %"class.llvm::DenseMap.91" }
+%"class.llvm::DenseMap.91" = type <{ %"class.llvm::detail::DenseSetPair"*, i32, i32, i32, [4 x i8] }>
+%"class.llvm::detail::DenseSetPair" = type { %"class.clang::FileEntry"* }
+%"class.std::vector.94" = type { %"struct.std::_Vector_base.95" }
+%"struct.std::_Vector_base.95" = type { %"struct.std::_Vector_base<clang::SrcMgr::ContentCache *, std::allocator<clang::SrcMgr::ContentCache *> >::_Vector_impl" }
+%"struct.std::_Vector_base<clang::SrcMgr::ContentCache *, std::allocator<clang::SrcMgr::ContentCache *> >::_Vector_impl" = type { %"class.clang::SrcMgr::ContentCache"**, %"class.clang::SrcMgr::ContentCache"**, %"class.clang::SrcMgr::ContentCache"** }
+%"class.llvm::SmallVector.99" = type <{ %"class.llvm::SmallVectorImpl.100", %"struct.llvm::SmallVectorStorage.105", [7 x i8] }>
+%"class.llvm::SmallVectorImpl.100" = type { %"class.llvm::SmallVectorTemplateBase.101" }
+%"class.llvm::SmallVectorTemplateBase.101" = type { %"class.llvm::SmallVectorTemplateCommon.102" }
+%"class.llvm::SmallVectorTemplateCommon.102" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.103" }
+%"struct.llvm::AlignedCharArrayUnion.103" = type { %"struct.llvm::AlignedCharArray.104" }
+%"struct.llvm::AlignedCharArray.104" = type { [24 x i8] }
+%"struct.llvm::SmallVectorStorage.105" = type { i8 }
+%"class.std::vector.107" = type { %"struct.std::_Bvector_base" }
+%"struct.std::_Bvector_base" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
+%"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" = type { %"struct.std::_Bit_iterator", %"struct.std::_Bit_iterator", i64* }
+%"struct.std::_Bit_iterator" = type { %"struct.std::_Bit_iterator_base.base", [4 x i8] }
+%"struct.std::_Bit_iterator_base.base" = type <{ i64*, i32 }>
+%"class.clang::ExternalSLocEntrySource" = type { i32 (...)** }
+%"class.clang::LineTableInfo" = type opaque
+%"class.clang::SrcMgr::ContentCache" = type <{ %"class.llvm::PointerIntPair", %"class.clang::FileEntry"*, %"class.clang::FileEntry"*, i32*, [5 x i8], [3 x i8] }>
+%"class.llvm::PointerIntPair" = type { i64 }
+%"class.clang::FileID" = type { i32 }
+%"class.llvm::DenseMap.111" = type <{ %"struct.llvm::detail::DenseMapPair.113"*, i32, i32, i32, [4 x i8] }>
+%"struct.llvm::detail::DenseMapPair.113" = type opaque
+%"class.llvm::DenseMap.115" = type <{ %"struct.llvm::detail::DenseMapPair.117"*, i32, i32, i32, [4 x i8] }>
+%"struct.llvm::detail::DenseMapPair.117" = type opaque
+%"class.clang::InBeforeInTUCacheEntry" = type { %"class.clang::FileID", %"class.clang::FileID", i8, %"class.clang::FileID", i32, i32 }
+%"class.std::unique_ptr.119" = type { %"class.std::tuple.120" }
+%"class.std::tuple.120" = type { %"struct.std::_Tuple_impl.121" }
+%"struct.std::_Tuple_impl.121" = type { %"struct.std::_Head_base.126" }
+%"struct.std::_Head_base.126" = type { %"class.llvm::MemoryBuffer"* }
+%"class.llvm::MemoryBuffer" = type { i32 (...)**, i8*, i8* }
+%"class.std::unique_ptr.127" = type { %"class.std::tuple.128" }
+%"class.std::tuple.128" = type { %"struct.std::_Tuple_impl.129" }
+%"struct.std::_Tuple_impl.129" = type { %"struct.std::_Head_base.134" }
+%"struct.std::_Head_base.134" = type { %"class.clang::SrcMgr::ContentCache"* }
+%"class.llvm::DenseMap.135" = type <{ %"struct.llvm::detail::DenseMapPair.137"*, i32, i32, i32, [4 x i8] }>
+%"struct.llvm::detail::DenseMapPair.137" = type opaque
+%"class.llvm::SmallVector.139" = type { %"class.llvm::SmallVectorImpl.140", %"struct.llvm::SmallVectorStorage.144" }
+%"class.llvm::SmallVectorImpl.140" = type { %"class.llvm::SmallVectorTemplateBase.141" }
+%"class.llvm::SmallVectorTemplateBase.141" = type { %"class.llvm::SmallVectorTemplateCommon.142" }
+%"class.llvm::SmallVectorTemplateCommon.142" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.143" }
+%"struct.llvm::AlignedCharArrayUnion.143" = type { %"struct.llvm::AlignedCharArray.104" }
+%"struct.llvm::SmallVectorStorage.144" = type { [1 x %"struct.llvm::AlignedCharArrayUnion.143"] }
+%"class.std::set" = type { %"class.std::_Rb_tree.145" }
+%"class.std::_Rb_tree.145" = type { %"struct.std::_Rb_tree<clang::tooling::Replacement, clang::tooling::Replacement, std::_Identity<clang::tooling::Replacement>, std::less<clang::tooling::Replacement>, std::allocator<clang::tooling::Replacement> >::_Rb_tree_impl" }
+%"struct.std::_Rb_tree<clang::tooling::Replacement, clang::tooling::Replacement, std::_Identity<clang::tooling::Replacement>, std::less<clang::tooling::Replacement>, std::allocator<clang::tooling::Replacement> >::_Rb_tree_impl" = type { %"struct.std::less.149", %"struct.std::_Rb_tree_node_base", i64 }
+%"struct.std::less.149" = type { i8 }
+
+
+; Function Attrs: nounwind
+; CHECK-LABEL: @_ZN5clang6format22BreakableStringLiteral11insertBreakEjjSt4pairImjERNS0_17WhitespaceManagerE
+
+; Load a value into R0 before saving the LR
+; CHECK: lwz 0, {{[0-9]+([0-9]+)}}
+
+; Ensure the LR is saved using a different register
+; CHECK: mflr {{[1-9]+}}
+
+; Ensure the LR is restored using a different register
+; CHECK: mtlr {{[0-9]+}}
+; CHECK: blr
+define void @_ZN5clang6format22BreakableStringLiteral11insertBreakEjjSt4pairImjERNS0_17WhitespaceManagerE(%"class.clang::format::BreakableStringLiteral"* nocapture readonly %this, i32 zeroext %LineIndex, i32 zeroext %TailOffset, [2 x i64] %Split.coerce, %"class.clang::format::WhitespaceManager"* dereferenceable(1504) %Whitespaces) unnamed_addr #1 align 2 {
+entry:
+ %Split.coerce.fca.0.extract = extractvalue [2 x i64] %Split.coerce, 0
+ %Split.coerce.fca.1.extract = extractvalue [2 x i64] %Split.coerce, 1
+ %StartColumn = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 1
+ %0 = load i32, i32* %StartColumn, align 8, !tbaa !2
+ %Prefix = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 2
+ %Length.i.19 = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 2, i32 1
+ %1 = load i64, i64* %Length.i.19, align 8, !tbaa !10
+ %cmp.i = icmp eq i64 %1, 0
+ br i1 %cmp.i, label %entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge, label %if.end.i.i
+
+entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge: ; preds = %entry
+ %agg.tmp7.sroa.0.0..sroa_cast.phi.trans.insert = bitcast %"class.llvm::StringRef"* %Prefix to i64*
+ %agg.tmp7.sroa.0.0.copyload.pre = load i64, i64* %agg.tmp7.sroa.0.0..sroa_cast.phi.trans.insert, align 8
+ br label %_ZNK4llvm9StringRef10startswithES0_.exit
+
+if.end.i.i: ; preds = %entry
+ %Data.i.20 = getelementptr inbounds %"class.llvm::StringRef", %"class.llvm::StringRef"* %Prefix, i64 0, i32 0
+ %2 = load i8*, i8** %Data.i.20, align 8, !tbaa !12
+ %lhsc = load i8, i8* %2, align 1
+ %phitmp.i = icmp eq i8 %lhsc, 64
+ %3 = ptrtoint i8* %2 to i64
+ br label %_ZNK4llvm9StringRef10startswithES0_.exit
+
+_ZNK4llvm9StringRef10startswithES0_.exit: ; preds = %entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge, %if.end.i.i
+ %agg.tmp7.sroa.0.0.copyload = phi i64 [ %agg.tmp7.sroa.0.0.copyload.pre, %entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge ], [ %3, %if.end.i.i ]
+ %4 = phi i1 [ false, %entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge ], [ %phitmp.i, %if.end.i.i ]
+ %dec = sext i1 %4 to i32
+ %dec. = add i32 %dec, %0
+ %Tok = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 0, i32 1
+ %ref = load %"struct.clang::format::FormatToken"*, %"struct.clang::format::FormatToken"** %Tok, align 8, !tbaa !13
+ %conv = zext i32 %TailOffset to i64
+ %add = add i64 %Split.coerce.fca.0.extract, %conv
+ %add4 = add i64 %add, %1
+ %conv5 = trunc i64 %add4 to i32
+ %Split.sroa.2.8.extract.trunc = trunc i64 %Split.coerce.fca.1.extract to i32
+ %agg.tmp6.sroa.0.0..sroa_idx13 = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 3
+ %agg.tmp6.sroa.0.0..sroa_cast = bitcast %"class.llvm::StringRef"* %agg.tmp6.sroa.0.0..sroa_idx13 to i64*
+ %agg.tmp6.sroa.0.0.copyload = load i64, i64* %agg.tmp6.sroa.0.0..sroa_cast, align 8
+ %agg.tmp6.sroa.2.0..sroa_idx14 = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 3, i32 1
+ %agg.tmp6.sroa.2.0.copyload = load i64, i64* %agg.tmp6.sroa.2.0..sroa_idx14, align 8
+ %InPPDirective = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 0, i32 3
+ %5 = load i8, i8* %InPPDirective, align 4, !tbaa !34, !range !39
+ %tobool = icmp ne i8 %5, 0
+ %IndentLevel = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 0, i32 2
+ %6 = load i32, i32* %IndentLevel, align 8, !tbaa !33
+ %.fca.0.insert11 = insertvalue [2 x i64] undef, i64 %agg.tmp6.sroa.0.0.copyload, 0
+ %.fca.1.insert12 = insertvalue [2 x i64] %.fca.0.insert11, i64 %agg.tmp6.sroa.2.0.copyload, 1
+ %.fca.0.insert = insertvalue [2 x i64] undef, i64 %agg.tmp7.sroa.0.0.copyload, 0
+ %.fca.1.insert = insertvalue [2 x i64] %.fca.0.insert, i64 %1, 1
+ tail call void @_ZN5clang6format17WhitespaceManager24replaceWhitespaceInTokenERKNS0_11FormatTokenEjjN4llvm9StringRefES6_bjji(%"class.clang::format::WhitespaceManager"* nonnull %Whitespaces, %"struct.clang::format::FormatToken"* dereferenceable(272) %ref, i32 zeroext %conv5, i32 zeroext %Split.sroa.2.8.extract.trunc, [2 x i64] %.fca.1.insert12, [2 x i64] %.fca.1.insert, i1 zeroext %tobool, i32 zeroext 1, i32 zeroext %6, i32 signext %dec.) #9
+ ret void
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.start(i64, i8* nocapture) #2
+
+declare void @_ZN5clang6format17WhitespaceManager24replaceWhitespaceInTokenERKNS0_11FormatTokenEjjN4llvm9StringRefES6_bjji(%"class.clang::format::WhitespaceManager"*, %"struct.clang::format::FormatToken"* dereferenceable(272), i32 zeroext, i32 zeroext, [2 x i64], [2 x i64], i1 zeroext, i32 zeroext, i32 zeroext, i32 signext) #3
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.end(i64, i8* nocapture) #2
+
+attributes #9 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.8.0 (trunk 248714) (llvm/trunk 248719)"}
+!2 = !{!3, !4, i64 40}
+!3 = !{!"_ZTSN5clang6format24BreakableSingleLineTokenE", !4, i64 40, !7, i64 48, !7, i64 64, !7, i64 80}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"_ZTSN4llvm9StringRefE", !8, i64 0, !9, i64 8}
+!8 = !{!"any pointer", !5, i64 0}
+!9 = !{!"long", !5, i64 0}
+!10 = !{!7, !9, i64 8}
+!11 = !{!9, !9, i64 0}
+!12 = !{!7, !8, i64 0}
+!13 = !{!5, !5, i64 0}
+!14 = !{!15, !4, i64 200}
+!15 = !{!"_ZTSN5clang6format11FormatStyleE", !4, i64 0, !16, i64 4, !16, i64 5, !16, i64 6, !16, i64 7, !16, i64 8, !16, i64 9, !16, i64 10, !16, i64 11, !17, i64 12, !16, i64 16, !16, i64 17, !18, i64 20, !16, i64 24, !16, i64 25, !16, i64 26, !16, i64 27, !19, i64 28, !20, i64 32, !16, i64 36, !16, i64 37, !4, i64 40, !21, i64 48, !16, i64 56, !4, i64 60, !4, i64 64, !16, i64 68, !16, i64 69, !16, i64 70, !16, i64 71, !23, i64 72, !16, i64 96, !4, i64 100, !16, i64 104, !16, i64 105, !24, i64 108, !21, i64 112, !21, i64 120, !4, i64 128, !25, i64 132, !4, i64 136, !16, i64 140, !16, i64 141, !4, i64 144, !4, i64 148, !4, i64 152, !4, i64 156, !4, i64 160, !4, i64 164, !26, i64 168, !16, i64 172, !16, i64 173, !27, i64 176, !16, i64 180, !4, i64 184, !16, i64 188, !16, i64 189, !16, i64 190, !16, i64 191, !16, i64 192, !28, i64 196, !4, i64 200, !29, i64 204}
+!16 = !{!"bool", !5, i64 0}
+!17 = !{!"_ZTSN5clang6format11FormatStyle18ShortFunctionStyleE", !5, i64 0}
+!18 = !{!"_ZTSN5clang6format11FormatStyle33DefinitionReturnTypeBreakingStyleE", !5, i64 0}
+!19 = !{!"_ZTSN5clang6format11FormatStyle19BinaryOperatorStyleE", !5, i64 0}
+!20 = !{!"_ZTSN5clang6format11FormatStyle18BraceBreakingStyleE", !5, i64 0}
+!21 = !{!"_ZTSSs", !22, i64 0}
+!22 = !{!"_ZTSNSs12_Alloc_hiderE", !8, i64 0}
+!23 = !{!"_ZTSSt6vectorISsSaISsEE"}
+!24 = !{!"_ZTSN5clang6format11FormatStyle12LanguageKindE", !5, i64 0}
+!25 = !{!"_ZTSN5clang6format11FormatStyle24NamespaceIndentationKindE", !5, i64 0}
+!26 = !{!"_ZTSN5clang6format11FormatStyle21PointerAlignmentStyleE", !5, i64 0}
+!27 = !{!"_ZTSN5clang6format11FormatStyle24SpaceBeforeParensOptionsE", !5, i64 0}
+!28 = !{!"_ZTSN5clang6format11FormatStyle16LanguageStandardE", !5, i64 0}
+!29 = !{!"_ZTSN5clang6format11FormatStyle11UseTabStyleE", !5, i64 0}
+!30 = !{!31, !32, i64 24}
+!31 = !{!"_ZTSN5clang6format14BreakableTokenE", !5, i64 8, !4, i64 16, !16, i64 20, !32, i64 24, !5, i64 32}
+!32 = !{!"_ZTSN5clang6format8encoding8EncodingE", !5, i64 0}
+!33 = !{!31, !4, i64 16}
+!34 = !{!31, !16, i64 20}
+!35 = !{!36, !36, i64 0}
+!36 = !{!"vtable pointer", !6, i64 0}
+!37 = !{!38, !38, i64 0}
+!38 = !{!"short", !5, i64 0}
+!39 = !{i8 0, i8 2}
+!40 = !{i64 0, i64 8, !41, i64 8, i64 8, !11}
+!41 = !{!8, !8, i64 0}
+!42 = !{!43, !8, i64 16}
+!43 = !{!"_ZTSN4llvm15SmallVectorBaseE", !8, i64 0, !8, i64 8, !8, i64 16}
+!44 = !{!43, !8, i64 8}
+!45 = !{!43, !8, i64 0}
+!46 = !{!4, !4, i64 0}
+!47 = !{!48, !16, i64 500}
+!48 = !{!"_ZTSN5clang6format21BreakableBlockCommentE", !49, i64 40, !51, i64 320, !53, i64 408, !4, i64 496, !16, i64 500, !7, i64 504}
+!49 = !{!"_ZTSN4llvm11SmallVectorINS_9StringRefELj16EEE", !50, i64 40}
+!50 = !{!"_ZTSN4llvm18SmallVectorStorageINS_9StringRefELj16EEE", !5, i64 0}
+!51 = !{!"_ZTSN4llvm11SmallVectorIjLj16EEE", !52, i64 28}
+!52 = !{!"_ZTSN4llvm18SmallVectorStorageIjLj16EEE", !5, i64 0}
+!53 = !{!"_ZTSN4llvm11SmallVectorIiLj16EEE", !54, i64 28}
+!54 = !{!"_ZTSN4llvm18SmallVectorStorageIiLj16EEE", !5, i64 0}
+!55 = !{!48, !4, i64 496}
diff --git a/test/CodeGen/PowerPC/aantidep-def-ec.mir b/test/CodeGen/PowerPC/aantidep-def-ec.mir
new file mode 100644
index 000000000000..d1cb6782f038
--- /dev/null
+++ b/test/CodeGen/PowerPC/aantidep-def-ec.mir
@@ -0,0 +1,117 @@
+# RUN: llc -o - %s -start-after=if-converter | FileCheck %s
+
+--- |
+ target datalayout = "E-m:e-i64:64-n32:64"
+ target triple = "powerpc64-unknown-linux-gnu"
+
+ %struct.rwlock_t.0.22.58.68.242.244 = type {}
+
+ @tasklist_lock = external global %struct.rwlock_t.0.22.58.68.242.244, align 1
+
+ ; Function Attrs: nounwind
+ define void @mm_update_next_owner(i8** %p1, i32* %p2) #0 {
+ entry:
+ %0 = load i8*, i8** %p1, align 8
+ br i1 undef, label %do.body.92, label %for.body.21
+
+ for.body.21: ; preds = %entry
+ unreachable
+
+ do.body.92: ; preds = %entry
+ %usage = getelementptr inbounds i8, i8* %0, i64 -48
+ %counter.i = bitcast i8* %usage to i32*
+ %call95 = tail call signext i32 bitcast (i32 (...)* @__raw_read_unlock to i32 (%struct.rwlock_t.0.22.58.68.242.244*)*)(%struct.rwlock_t.0.22.58.68.242.244* nonnull @tasklist_lock) #1
+ store volatile i32 0, i32* %p2, align 4
+ tail call void asm sideeffect "#compiler barrier", "~{memory}"() #1
+ %1 = tail call i32 asm sideeffect "\0Alwsync \0A1:\09lwarx\09$0,0,$1\09\09# atomic_dec_return\0A\09addic\09$0,$0,-1\0A\09stwcx.\09$0,0,$1\0A\09bne-\091b\0Async \0A", "=&r,r,~{cc},~{xer},~{memory}"(i32* %counter.i) #1
+ %cmp.i = icmp eq i32 %1, 0
+ br i1 %cmp.i, label %if.then.i, label %put_task_struct.exit
+
+ if.then.i: ; preds = %do.body.92
+ unreachable
+
+ put_task_struct.exit: ; preds = %do.body.92
+ ret void
+ }
+
+ declare signext i32 @__raw_read_unlock(...)
+
+ attributes #0 = { nounwind "target-cpu"="pwr7" }
+ attributes #1 = { nounwind }
+
+...
+---
+name: mm_update_next_owner
+alignment: 4
+exposesReturnsTwice: false
+hasInlineAsm: true
+isSSA: false
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+liveins:
+ - { reg: '%x3' }
+ - { reg: '%x4' }
+calleeSavedRegisters: [ '%cr2', '%cr3', '%cr4', '%f14', '%f15', '%f16',
+ '%f17', '%f18', '%f19', '%f20', '%f21', '%f22',
+ '%f23', '%f24', '%f25', '%f26', '%f27', '%f28',
+ '%f29', '%f30', '%f31', '%r14', '%r15', '%r16',
+ '%r17', '%r18', '%r19', '%r20', '%r21', '%r22',
+ '%r23', '%r24', '%r25', '%r26', '%r27', '%r28',
+ '%r29', '%r30', '%r31', '%v20', '%v21', '%v22',
+ '%v23', '%v24', '%v25', '%v26', '%v27', '%v28',
+ '%v29', '%v30', '%v31', '%vf20', '%vf21', '%vf22',
+ '%vf23', '%vf24', '%vf25', '%vf26', '%vf27', '%vf28',
+ '%vf29', '%vf30', '%vf31', '%x14', '%x15', '%x16',
+ '%x17', '%x18', '%x19', '%x20', '%x21', '%x22',
+ '%x23', '%x24', '%x25', '%x26', '%x27', '%x28',
+ '%x29', '%x30', '%x31', '%cr2eq', '%cr3eq', '%cr4eq',
+ '%cr2gt', '%cr3gt', '%cr4gt', '%cr2lt', '%cr3lt',
+ '%cr4lt', '%cr2un', '%cr3un', '%cr4un' ]
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 144
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 112
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+fixedStack:
+ - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%x30' }
+ - { id: 1, type: spill-slot, offset: -24, size: 8, alignment: 8, callee-saved-register: '%x29' }
+body: |
+ bb.0.entry:
+ liveins: %x3, %x4, %x29, %x30, %x29, %x30
+
+ %x0 = MFLR8 implicit %lr8
+ STD %x0, 16, %x1
+ %x1 = STDU %x1, -144, %x1
+ STD killed %x29, 120, %x1 :: (store 8 into %fixed-stack.1)
+ STD killed %x30, 128, %x1 :: (store 8 into %fixed-stack.0, align 16)
+ %x30 = OR8 %x4, %x4
+ %x3 = LD 0, killed %x3 :: (load 8 from %ir.p1)
+ %x29 = ADDI8 killed %x3, -48
+ %x3 = ADDIStocHA %x2, @tasklist_lock
+ %x3 = LDtocL @tasklist_lock, killed %x3, implicit %x2 :: (load 8 from got)
+ BL8_NOP @__raw_read_unlock, csr_svr464_altivec, implicit-def %lr8, implicit %rm, implicit %x3, implicit %x2, implicit-def %r1, implicit-def dead %x3
+ %r3 = LI 0
+ STW killed %r3, 0, killed %x30 :: (volatile store 4 into %ir.p2)
+ INLINEASM $"#compiler barrier", 25
+ INLINEASM $"\0Alwsync \0A1:\09lwarx\09$0,0,$1\09\09# atomic_dec_return\0A\09addic\09$0,$0,-1\0A\09stwcx.\09$0,0,$1\0A\09bne-\091b\0Async \0A", 25, 131083, def early-clobber %r3, 851977, killed %x29, 12, implicit-def dead early-clobber %cr0
+ ; CHECK-LABEL: @mm_update_next_owner
+ ; CHECK-NOT: lwarx 29, 0, 29
+ ; CHECK-NOT: stwcx. 29, 0, 29
+ %cr0 = CMPLWI killed %r3, 0
+ %x30 = LD 128, %x1 :: (load 8 from %fixed-stack.0, align 16)
+ %x29 = LD 120, %x1 :: (load 8 from %fixed-stack.1)
+ %x1 = ADDI8 %x1, 144
+ %x0 = LD 16, %x1
+ MTLR8 %x0, implicit-def %lr8
+ BLR8 implicit %lr8, implicit %rm
+
+...
diff --git a/test/CodeGen/PowerPC/aantidep-inline-asm-use.ll b/test/CodeGen/PowerPC/aantidep-inline-asm-use.ll
new file mode 100644
index 000000000000..f0c0deacf4dd
--- /dev/null
+++ b/test/CodeGen/PowerPC/aantidep-inline-asm-use.ll
@@ -0,0 +1,305 @@
+; RUN: llc -O2 < %s | FileCheck %s
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-grtev4-linux-gnu"
+
+; Function Attrs: nounwind
+define void @_ZN10SubProcess19ScrubbedForkAndExecEiPiS0_PNS_7ResultsE() #0 align 2 {
+; CHECK: lis 3, 1234
+; CHECK-NOT: li 3
+; CHECK-NOT: ori 3
+; CHECK-NOT: addi 3
+; CHECK-NOT: addis 3
+; CHECK-NOT: lis 3
+; CHECK: sc
+ br i1 undef, label %1, label %2
+
+; <label>:1 ; preds = %0
+ br label %60
+
+; <label>:2 ; preds = %0
+ br i1 undef, label %3, label %4
+
+; <label>:3 ; preds = %2
+ unreachable
+
+; <label>:4 ; preds = %2
+ br i1 undef, label %.lr.ph111, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit
+
+.lr.ph111: ; preds = %4
+ br label %5
+
+_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit: ; preds = %12, %4
+ br i1 undef, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19.preheader, label %13
+
+; <label>:5 ; preds = %12, %.lr.ph111
+ br i1 undef, label %6, label %9
+
+; <label>:6 ; preds = %5
+ br i1 undef, label %7, label %8
+
+; <label>:7 ; preds = %6
+ unreachable
+
+; <label>:8 ; preds = %6
+ br label %12
+
+; <label>:9 ; preds = %5
+ br i1 undef, label %10, label %11
+
+; <label>:10 ; preds = %9
+ br label %12
+
+; <label>:11 ; preds = %9
+ br label %12
+
+; <label>:12 ; preds = %11, %10, %8
+ br i1 undef, label %5, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit
+
+; <label>:13 ; preds = %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit
+ br i1 undef, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19.preheader, label %14
+
+; <label>:14 ; preds = %13
+ br label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19.preheader
+
+_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19.preheader: ; preds = %14, %13, %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit
+ br i1 undef, label %_ZN10SubProcess12SafeSyscalls5closeEi.exit.preheader, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19._crit_edge
+
+_ZN10SubProcess12SafeSyscalls5closeEi.exit.preheader: ; preds = %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19, %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19.preheader
+ br label %_ZN10SubProcess12SafeSyscalls5closeEi.exit
+
+_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19._crit_edge: ; preds = %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19, %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19.preheader
+ br i1 undef, label %15, label %19
+
+_ZN10SubProcess12SafeSyscalls5closeEi.exit: ; preds = %_ZN10SubProcess12SafeSyscalls5closeEi.exit, %_ZN10SubProcess12SafeSyscalls5closeEi.exit.preheader
+ br i1 undef, label %_ZN10SubProcess12SafeSyscalls5closeEi.exit, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19
+
+_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19: ; preds = %_ZN10SubProcess12SafeSyscalls5closeEi.exit
+ br i1 undef, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19._crit_edge, label %_ZN10SubProcess12SafeSyscalls5closeEi.exit.preheader
+
+; <label>:15 ; preds = %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19._crit_edge
+ br label %16
+
+; <label>:16 ; preds = %17, %15
+ br i1 undef, label %17, label %.critedge.preheader
+
+; <label>:17 ; preds = %16
+ br i1 undef, label %16, label %.critedge.preheader
+
+.critedge.preheader: ; preds = %17, %16
+ br label %.critedge
+
+.critedge: ; preds = %18, %.critedge.preheader
+ br i1 undef, label %18, label %.critedge8
+
+; <label>:18 ; preds = %.critedge
+ br i1 undef, label %.critedge, label %.critedge8
+
+.critedge8: ; preds = %18, %.critedge
+ br label %59
+
+; <label>:19 ; preds = %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19._crit_edge
+ br label %_ZN10SubProcess12SafeSyscalls5closeEi.exit22
+
+_ZN10SubProcess12SafeSyscalls5closeEi.exit22: ; preds = %_ZN10SubProcess12SafeSyscalls5closeEi.exit22, %19
+ br i1 undef, label %_ZN10SubProcess12SafeSyscalls5closeEi.exit22, label %20
+
+; <label>:20 ; preds = %_ZN10SubProcess12SafeSyscalls5closeEi.exit22
+ %21 = alloca i8, i64 undef, align 1
+ br label %.thread.outer
+
+.thread.outer: ; preds = %._crit_edge, %20
+ br label %.thread
+
+.thread: ; preds = %45, %.thread.outer
+ call void @llvm.memset.p0i8.i64(i8* undef, i8 0, i64 56, i32 8, i1 false)
+ store i8* %21, i8** undef, align 8
+ store i32 1073741824, i32* undef, align 8
+ %22 = call { i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "sc\0A\09mfcr $0", "=&{r0},=&{r3},=&{r4},=&{r5},=&{r6},=&{r7},=&{r8},{r0},{r3},{r4},{r5},~{cr0},~{ctr},~{memory},~{r11},~{r12}"(i64 342, i64 80871424, i64 undef, i64 0) #2, !srcloc !1
+ br i1 undef, label %.lr.ph, label %.critedge15.preheader
+
+.critedge15.preheader: ; preds = %_ZN10SubProcess12SafeSyscalls7recvmsgEiPNS0_13kernel_msghdrEi.exit.backedge, %.thread
+ br i1 undef, label %.lr.ph93.preheader, label %.critedge15._crit_edge
+
+.lr.ph93.preheader: ; preds = %.critedge15.preheader
+ br label %.lr.ph93
+
+.lr.ph: ; preds = %_ZN10SubProcess12SafeSyscalls7recvmsgEiPNS0_13kernel_msghdrEi.exit.backedge, %.thread
+ switch i32 undef, label %.critedge9 [
+ i32 11, label %_ZN10SubProcess12SafeSyscalls7recvmsgEiPNS0_13kernel_msghdrEi.exit.backedge
+ i32 4, label %_ZN10SubProcess12SafeSyscalls7recvmsgEiPNS0_13kernel_msghdrEi.exit.backedge
+ ]
+
+_ZN10SubProcess12SafeSyscalls7recvmsgEiPNS0_13kernel_msghdrEi.exit.backedge: ; preds = %.lr.ph, %.lr.ph
+ br i1 undef, label %.lr.ph, label %.critedge15.preheader
+
+.critedge9: ; preds = %.lr.ph
+ unreachable
+
+.critedge15._crit_edge: ; preds = %.critedge15, %.critedge15.preheader
+ br i1 undef, label %35, label %34
+
+.lr.ph93: ; preds = %.critedge15, %.lr.ph93.preheader
+ switch i32 undef, label %33 [
+ i32 0, label %23
+ i32 1, label %23
+ i32 2, label %23
+ i32 3, label %23
+ i32 4, label %23
+ i32 5, label %23
+ i32 6, label %23
+ i32 7, label %23
+ i32 8, label %27
+ i32 9, label %30
+ ]
+
+; <label>:23 ; preds = %.lr.ph93, %.lr.ph93, %.lr.ph93, %.lr.ph93, %.lr.ph93, %.lr.ph93, %.lr.ph93, %.lr.ph93
+ br i1 undef, label %24, label %.critedge15
+
+; <label>:24 ; preds = %23
+ br i1 undef, label %.critedge15, label %25
+
+; <label>:25 ; preds = %24
+ br i1 undef, label %.critedge15, label %26
+
+; <label>:26 ; preds = %25
+ unreachable
+
+; <label>:27 ; preds = %.lr.ph93
+ br i1 undef, label %.critedge15, label %28
+
+; <label>:28 ; preds = %27
+ br i1 undef, label %29, label %.critedge15
+
+; <label>:29 ; preds = %28
+ br label %.critedge15
+
+; <label>:30 ; preds = %.lr.ph93
+ br i1 undef, label %.critedge15, label %31
+
+; <label>:31 ; preds = %30
+ br i1 undef, label %32, label %.critedge15
+
+; <label>:32 ; preds = %31
+ br label %.critedge15
+
+; <label>:33 ; preds = %.lr.ph93
+ unreachable
+
+.critedge15: ; preds = %32, %31, %30, %29, %28, %27, %25, %24, %23
+ br i1 undef, label %.lr.ph93, label %.critedge15._crit_edge
+
+; <label>:34 ; preds = %.critedge15._crit_edge
+ unreachable
+
+; <label>:35 ; preds = %.critedge15._crit_edge
+ br i1 undef, label %45, label %36
+
+; <label>:36 ; preds = %35
+ br i1 undef, label %37, label %38
+
+; <label>:37 ; preds = %36
+ br i1 undef, label %.preheader, label %38
+
+.preheader: ; preds = %37
+ br i1 undef, label %.lr.ph101, label %._crit_edge
+
+.lr.ph101: ; preds = %.preheader
+ br label %39
+
+; <label>:38 ; preds = %37, %36
+ unreachable
+
+; <label>:39 ; preds = %43, %.lr.ph101
+ br i1 undef, label %40, label %43
+
+; <label>:40 ; preds = %39
+ br i1 undef, label %_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit17, label %41
+
+; <label>:41 ; preds = %40
+ unreachable
+
+_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit17: ; preds = %40
+ br i1 undef, label %42, label %_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit
+
+; <label>:42 ; preds = %_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit17
+ unreachable
+
+_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit: ; preds = %_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit17
+ br i1 undef, label %.thread27, label %43
+
+; <label>:43 ; preds = %_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit, %39
+ br i1 undef, label %39, label %._crit_edge
+
+.thread27: ; preds = %_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit
+ br label %58
+
+._crit_edge: ; preds = %43, %.preheader
+ br i1 undef, label %.thread.outer, label %44
+
+; <label>:44 ; preds = %._crit_edge
+ unreachable
+
+; <label>:45 ; preds = %35
+ br i1 undef, label %46, label %.thread
+
+; <label>:46 ; preds = %45
+ br i1 undef, label %48, label %47
+
+; <label>:47 ; preds = %46
+ unreachable
+
+; <label>:48 ; preds = %46
+ br i1 undef, label %55, label %49
+
+; <label>:49 ; preds = %48
+ br i1 undef, label %50, label %51
+
+; <label>:50 ; preds = %49
+ br label %52
+
+; <label>:51 ; preds = %49
+ br label %52
+
+; <label>:52 ; preds = %51, %50
+ br label %53
+
+; <label>:53 ; preds = %54, %52
+ br i1 undef, label %54, label %.critedge13
+
+; <label>:54 ; preds = %53
+ br i1 undef, label %53, label %.critedge13
+
+.critedge13: ; preds = %54, %53
+ br label %58
+
+; <label>:55 ; preds = %48
+ br label %56
+
+; <label>:56 ; preds = %57, %55
+ br i1 undef, label %57, label %.critedge14
+
+; <label>:57 ; preds = %56
+ br i1 undef, label %56, label %.critedge14
+
+.critedge14: ; preds = %57, %56
+ br label %58
+
+; <label>:58 ; preds = %.critedge14, %.critedge13, %.thread27
+ br label %59
+
+; <label>:59 ; preds = %58, %.critedge8
+ br label %60
+
+; <label>:60 ; preds = %59, %1
+ ret void
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind argmemonly }
+attributes #2 = { nounwind }
+
+!1 = !{i32 -2140527538, i32 -2140527533}
diff --git a/test/CodeGen/PowerPC/addisdtprelha-nonr3.mir b/test/CodeGen/PowerPC/addisdtprelha-nonr3.mir
new file mode 100644
index 000000000000..e4aaaf30f90f
--- /dev/null
+++ b/test/CodeGen/PowerPC/addisdtprelha-nonr3.mir
@@ -0,0 +1,80 @@
+# RUN: llc -relocation-model=pic -start-after=block-placement -o - %s | FileCheck %s
+
+--- |
+ target datalayout = "E-m:e-i64:64-n32:64"
+ target triple = "powerpc64-unknown-linux-gnu"
+
+ @x = internal thread_local unnamed_addr global i1 false
+ @y = external thread_local global i32, align 4
+
+ ; Function Attrs: nounwind
+ define void @test1() #0 {
+ entry:
+ store i1 true, i1* @x, align 1
+ store i32 20, i32* @y, align 4
+ ret void
+ }
+
+ attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-cpu"="pwr7" }
+
+ !llvm.module.flags = !{!0}
+
+ !0 = !{i32 1, !"PIC Level", i32 2}
+
+...
+---
+name: test1
+alignment: 4
+exposesReturnsTwice: false
+hasInlineAsm: false
+isSSA: false
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 64
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 48
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+fixedStack:
+ - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%x30' }
+ - { id: 1, offset: -8, size: 8, alignment: 8, isImmutable: true, isAliased: false }
+body: |
+ bb.0.entry:
+ liveins: %x30, %x30
+
+ %x0 = MFLR8 implicit %lr8
+ STD %x31, -8, %x1
+ STD killed %x0, 16, %x1
+ %x1 = STDU %x1, -64, %x1
+ %x3 = ADDIStlsldHA %x2, @x
+ %x31 = OR8 %x1, %x1
+ %x3 = ADDItlsldL killed %x3, @x
+ STD killed %x30, 48, %x31 :: (store 8 into %fixed-stack.0, align 16)
+ %x3 = GETtlsldADDR killed %x3, @x, implicit-def dead %x0, implicit-def dead %x4, implicit-def dead %x5, implicit-def dead %x6, implicit-def dead %x7, implicit-def dead %x8, implicit-def dead %x9, implicit-def dead %x10, implicit-def dead %x11, implicit-def dead %x12, implicit-def %lr8, implicit-def %ctr8, implicit-def dead %cr0, implicit-def dead %cr1, implicit-def dead %cr5, implicit-def dead %cr6, implicit-def dead %cr7
+ %x12 = ADDIStlsgdHA %x2, @y
+ %x30 = OR8 killed %x3, %x3
+ %x3 = ADDItlsgdL killed %x12, @y
+ %x3 = GETtlsADDR killed %x3, @y, implicit-def dead %x0, implicit-def dead %x4, implicit-def dead %x5, implicit-def dead %x6, implicit-def dead %x7, implicit-def dead %x8, implicit-def dead %x9, implicit-def dead %x10, implicit-def dead %x11, implicit-def dead %x12, implicit-def %lr8, implicit-def %ctr8, implicit-def dead %cr0, implicit-def dead %cr1, implicit-def dead %cr5, implicit-def dead %cr6, implicit-def dead %cr7
+ %x4 = ADDISdtprelHA killed %x30, @x
+ ; CHECK: addis 4, 30, x@dtprel@ha
+ %x5 = LI8 1
+ %r6 = LI 20
+ %x30 = LD 48, %x31 :: (load 8 from %fixed-stack.0, align 16)
+ STB8 killed %x5, target-flags(ppc-dtprel-lo) @x, killed %x4 :: (store 1 into @x)
+ STW killed %r6, 0, killed %x3 :: (store 4 into @y)
+ %x1 = ADDI8 %x1, 64
+ %x0 = LD 16, %x1
+ %x31 = LD -8, %x1
+ MTLR8 killed %x0, implicit-def %lr8
+ BLR8 implicit %lr8, implicit %rm
+
+...
diff --git a/test/CodeGen/PowerPC/alias.ll b/test/CodeGen/PowerPC/alias.ll
index 524abd5da3ef..3650cd9d8a06 100644
--- a/test/CodeGen/PowerPC/alias.ll
+++ b/test/CodeGen/PowerPC/alias.ll
@@ -2,10 +2,10 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK --check-prefix=LARGE %s
@foo = global i32 42
-@fooa = alias i32* @foo
+@fooa = alias i32, i32* @foo
@foo2 = global i64 42
-@foo2a = alias i64* @foo2
+@foo2a = alias i64, i64* @foo2
; CHECK-LABEL: bar:
define i32 @bar() {
diff --git a/test/CodeGen/PowerPC/bitcasts-direct-move.ll b/test/CodeGen/PowerPC/bitcasts-direct-move.ll
new file mode 100644
index 000000000000..756f57917548
--- /dev/null
+++ b/test/CodeGen/PowerPC/bitcasts-direct-move.ll
@@ -0,0 +1,83 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
+; RUN: --check-prefix=CHECK-P7
+
+define signext i32 @f32toi32(float %a) {
+entry:
+ %0 = bitcast float %a to i32
+ ret i32 %0
+; CHECK-P7: stfs 1,
+; CHECK-P7: lwa 3,
+; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1
+; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3
+; CHECK: mfvsrwz 3, [[SHIFTREG]]
+}
+
+define i64 @f64toi64(double %a) {
+entry:
+ %0 = bitcast double %a to i64
+ ret i64 %0
+; CHECK-P7: stxsdx 1,
+; CHECK-P7: ld 3,
+; CHECK: mfvsrd 3, 1
+}
+
+define float @i32tof32(i32 signext %a) {
+entry:
+ %0 = bitcast i32 %a to float
+ ret float %0
+; CHECK-P7: stw 3,
+; CHECK-P7: lfs 1,
+; CHECK: mtvsrd [[MOVEREG:[0-9]+]], 3
+; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[MOVEREG]], [[MOVEREG]], 1
+; CHECK: xscvspdpn 1, [[SHIFTREG]]
+}
+
+define double @i64tof64(i64 %a) {
+entry:
+ %0 = bitcast i64 %a to double
+ ret double %0
+; CHECK-P7: std 3,
+; CHECK-P7: lxsdx 1,
+; CHECK: mtvsrd 1, 3
+}
+
+define zeroext i32 @f32toi32u(float %a) {
+entry:
+ %0 = bitcast float %a to i32
+ ret i32 %0
+; CHECK-P7: stfs 1,
+; CHECK-P7: lwz 3,
+; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1
+; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3
+; CHECK: mfvsrwz 3, [[SHIFTREG]]
+}
+
+define i64 @f64toi64u(double %a) {
+entry:
+ %0 = bitcast double %a to i64
+ ret i64 %0
+; CHECK-P7: stxsdx 1,
+; CHECK-P7: ld 3,
+; CHECK: mfvsrd 3, 1
+}
+
+define float @i32utof32(i32 zeroext %a) {
+entry:
+ %0 = bitcast i32 %a to float
+ ret float %0
+; CHECK-P7: stw 3,
+; CHECK-P7: lfs 1,
+; CHECK: mtvsrd [[MOVEREG:[0-9]+]], 3
+; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[MOVEREG]], [[MOVEREG]], 1
+; CHECK: xscvspdpn 1, [[SHIFTREG]]
+}
+
+define double @i64utof64(i64 %a) {
+entry:
+ %0 = bitcast i64 %a to double
+ ret double %0
+; CHECK-P7: std 3,
+; CHECK-P7: lxsdx 1,
+; CHECK: mtvsrd 1, 3
+}
diff --git a/test/CodeGen/PowerPC/bitreverse.ll b/test/CodeGen/PowerPC/bitreverse.ll
new file mode 100644
index 000000000000..1c3741a9a696
--- /dev/null
+++ b/test/CodeGen/PowerPC/bitreverse.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=ppc64 %s -o - | FileCheck %s
+
+; These tests just check that the plumbing is in place for @llvm.bitreverse. The
+; actual output is massive at the moment as llvm.bitreverse is not yet legal.
+
+declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone
+
+define <2 x i16> @f(<2 x i16> %a) {
+; CHECK-LABEL: f:
+; CHECK: rlwinm
+ %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
+ ret <2 x i16> %b
+}
+
+declare i8 @llvm.bitreverse.i8(i8) readnone
+
+define i8 @g(i8 %a) {
+; CHECK-LABEL: g:
+; CHECK: rlwinm
+; CHECK: rlwimi
+ %b = call i8 @llvm.bitreverse.i8(i8 %a)
+ ret i8 %b
+}
diff --git a/test/CodeGen/PowerPC/branch-hint.ll b/test/CodeGen/PowerPC/branch-hint.ll
new file mode 100644
index 000000000000..46160507105f
--- /dev/null
+++ b/test/CodeGen/PowerPC/branch-hint.ll
@@ -0,0 +1,135 @@
+; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -ppc-use-branch-hint=false | FileCheck %s
+; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -ppc-use-branch-hint=true | FileCheck %s -check-prefix=CHECK-HINT
+define void @branch_hint_1(i32 %src) {
+entry:
+ %cmp = icmp eq i32 %src, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ tail call void @foo() #0
+ unreachable
+
+if.end:
+ call void @goo()
+ ret void
+
+; CHECK-LABEL: branch_hint_1:
+; CHECK: beq
+
+; CHECK-HINT-LABEL: branch_hint_1:
+; CHECK-HINT: beq-
+}
+
+define void @branch_hint_2(i32 %src) {
+entry:
+ %cmp = icmp eq i32 %src, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ call void @goo()
+ ret void
+
+if.end:
+ tail call void @foo() #0
+ unreachable
+
+; CHECK-LABEL: @branch_hint_2
+; CHECK: bne
+
+; CHECK-HINT-LABEL: @branch_hint_2
+; CHECK-HINT: bne-
+}
+
+declare void @foo()
+attributes #0 = { noreturn }
+
+define void @branch_hint_3(i32 %src) {
+entry:
+ %cmp = icmp eq i32 %src, 0
+ br i1 %cmp, label %if.then, label %if.end, !prof !0
+
+if.then:
+ call void @foo()
+ ret void
+
+if.end:
+ call void @goo()
+ ret void
+
+; CHECK-LABEL: @branch_hint_3
+; CHECK: bne
+
+; CHECK-HINT-LABEL: @branch_hint_3
+; CHECK-HINT: bne
+}
+
+!0 = !{!"branch_weights", i32 64, i32 4}
+
+define void @branch_hint_4(i32 %src) {
+entry:
+ %cmp = icmp eq i32 %src, 0
+ br i1 %cmp, label %if.then, label %if.end, !prof !1
+
+if.then:
+ call void @foo()
+ ret void
+
+if.end:
+ call void @goo()
+ ret void
+
+; CHECK-HINT-LABEL: branch_hint_4
+; CHECK-HINT: bne
+}
+
+!1 = !{!"branch_weights", i32 64, i32 8}
+
+define void @branch_hint_5(i32 %src) {
+entry:
+ %cmp = icmp eq i32 %src, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ ret void
+
+if.end:
+ call void @goo()
+ ret void
+
+; CHECK-HINT-LABEL: branch_hint_5:
+; CHECK-HINT: beq
+}
+
+declare void @goo()
+
+define void @branch_hint_6(i32 %src1, i32 %src2, i32 %src3) {
+entry:
+ %cmp = icmp eq i32 %src1, 0
+ br i1 %cmp, label %if.end.6, label %if.end, !prof !3
+
+if.end:
+ %cmp1 = icmp eq i32 %src2, 0
+ br i1 %cmp1, label %if.end.3, label %if.then.2
+
+if.then.2:
+ tail call void @foo() #0
+ unreachable
+
+if.end.3:
+ %cmp4 = icmp eq i32 %src3, 1
+ br i1 %cmp4, label %if.then.5, label %if.end.6
+
+if.then.5:
+ tail call void @foo() #0
+ unreachable
+
+if.end.6:
+ ret void
+
+; CHECK-HINT-LABEL: branch_hint_6:
+; CHECK-HINT: bne
+; CHECK-HINT: bne-
+; CHECK-HINT: bne+
+}
+
+!3 = !{!"branch_weights", i32 64, i32 4}
diff --git a/test/CodeGen/PowerPC/coal-sections.ll b/test/CodeGen/PowerPC/coal-sections.ll
new file mode 100644
index 000000000000..377891c47143
--- /dev/null
+++ b/test/CodeGen/PowerPC/coal-sections.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple powerpc-apple-darwin8 -march=ppc32 | FileCheck %s
+
+; Check that *coal* sections are emitted.
+
+; CHECK: .section __TEXT,__textcoal_nt,coalesced,pure_instructions
+; CHECK: .section __TEXT,__textcoal_nt,coalesced,pure_instructions
+; CHECK-NEXT: .globl _foo
+
+; CHECK: .section __TEXT,__const_coal,coalesced
+; CHECK-NEXT: .globl _a
+
+; CHECK: .section __DATA,__datacoal_nt,coalesced
+; CHECK-NEXT: .globl _b
+
+@a = weak_odr constant [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 16
+@b = weak global i32 5, align 4
+@g = common global i32* null, align 8
+
+; Function Attrs: nounwind ssp uwtable
+define weak i32* @foo() {
+entry:
+ store i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 0), i32** @g, align 8
+ ret i32* @b
+}
diff --git a/test/CodeGen/PowerPC/crbit-asm-disabled.ll b/test/CodeGen/PowerPC/crbit-asm-disabled.ll
new file mode 100644
index 000000000000..56ec8ecb85d7
--- /dev/null
+++ b/test/CodeGen/PowerPC/crbit-asm-disabled.ll
@@ -0,0 +1,16 @@
+; RUN: not llc -mcpu=pwr7 -o /dev/null %s 2>&1 | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define zeroext i1 @testi1(i1 zeroext %b1, i1 zeroext %b2) #0 {
+entry:
+ %0 = tail call i8 asm "crand $0, $1, $2", "=^wc,^wc,^wc"(i1 %b1, i1 %b2) #0
+ %1 = and i8 %0, 1
+ %tobool3 = icmp ne i8 %1, 0
+ ret i1 %tobool3
+
+; CHECK: error: couldn't allocate output register for constraint 'wc'
+}
+
+attributes #0 = { nounwind "target-features"="-crbits" }
+
diff --git a/test/CodeGen/PowerPC/crbit-asm.ll b/test/CodeGen/PowerPC/crbit-asm.ll
index 36de3435a081..41e65af29a8a 100644
--- a/test/CodeGen/PowerPC/crbit-asm.ll
+++ b/test/CodeGen/PowerPC/crbit-asm.ll
@@ -1,4 +1,5 @@
; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -O1 -mcpu=pwr7 < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@@ -55,5 +56,5 @@ entry:
; CHECK: blr
}
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "target-features"="+crbits" }
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index 60de982d91a1..3757fa3e2f29 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -6,7 +6,7 @@ declare i32 @llvm.cttz.i32(i32, i1)
define i32 @bar(i32 %x) {
entry:
; CHECK: @bar
-; CHECK: cntlz
+; CHECK: cntlzw
%tmp.1 = call i32 @llvm.cttz.i32( i32 %x, i1 true ) ; <i32> [#uses=1]
ret i32 %tmp.1
}
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index 87914025b733..b636cff0f205 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -4,7 +4,7 @@
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readnone {
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readnone !dbg !5 {
entry:
tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !15, metadata !DIExpression()), !dbg !17
tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !16, metadata !DIExpression()), !dbg !18
@@ -17,10 +17,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!22}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1", isOptimized: true, emissionKind: 0, file: !21, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1", isOptimized: true, emissionKind: 0, file: !21, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !21, scope: null, type: !7, function: i32 (i32, i8**)* @main, variables: !13)
+!5 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !21, scope: null, type: !7, variables: !13)
!6 = !DIFile(filename: "dbg.c", directory: "/src")
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !9, !10}
@@ -29,8 +29,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !12)
!12 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
!13 = !{!15, !16}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 1, arg: 1, scope: !5, file: !6, type: !9)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 1, arg: 2, scope: !5, file: !6, type: !10)
+!15 = !DILocalVariable(name: "argc", line: 1, arg: 1, scope: !5, file: !6, type: !9)
+!16 = !DILocalVariable(name: "argv", line: 1, arg: 2, scope: !5, file: !6, type: !10)
!17 = !DILocation(line: 1, column: 14, scope: !5)
!18 = !DILocation(line: 1, column: 26, scope: !5)
!19 = !DILocation(line: 2, column: 3, scope: !20)
diff --git a/test/CodeGen/PowerPC/dyn-alloca-offset.ll b/test/CodeGen/PowerPC/dyn-alloca-offset.ll
new file mode 100644
index 000000000000..7159b9da736d
--- /dev/null
+++ b/test/CodeGen/PowerPC/dyn-alloca-offset.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare i64 @llvm.get.dynamic.area.offset.i64()
+
+declare i64 @bar(i64)
+
+attributes #0 = { nounwind }
+
+; Function Attrs: nounwind sanitize_address uwtable
+define signext i64 @foo(i32 signext %N, i32 signext %M) #0 {
+ %1 = alloca i64, align 32
+ %dynamic_area_offset = call i64 @llvm.get.dynamic.area.offset.i64()
+ %2 = call i64 @bar(i64 %dynamic_area_offset)
+ ret i64 %2
+
+; CHECK-DAG: li [[REG1:[0-9]+]], 112
+; CHECK: blr
+
+}
diff --git a/test/CodeGen/PowerPC/e500-1.ll b/test/CodeGen/PowerPC/e500-1.ll
new file mode 100644
index 000000000000..7457c0e57e18
--- /dev/null
+++ b/test/CodeGen/PowerPC/e500-1.ll
@@ -0,0 +1,30 @@
+; RUN: llc -O0 -mcpu=e500mc < %s | FileCheck %s
+; Check if e500 generates code with mfocrf insn.
+
+target datalayout = "E-m:e-p:32:32-i64:64-n32"
+target triple = "powerpc-unknown-linux-gnu"
+
+define internal i32 @func_49(i64 %p_50, i16 zeroext %p_51, i8* %p_52, i32 %p_53) {
+; CHECK-LABEL: @func_49
+; CHECK-NOT: mfocrf
+
+ %1 = load i64, i64* undef, align 8
+ %2 = load i64, i64* undef, align 8
+ %3 = icmp sge i32 undef, undef
+ %4 = zext i1 %3 to i32
+ %5 = sext i32 %4 to i64
+ %6 = icmp slt i64 %2, %5
+ %7 = zext i1 %6 to i32
+ %8 = call i64 @safe_sub_func_int64_t_s_s(i64 -6372137293439783564, i64 undef)
+ %9 = icmp slt i32 %7, undef
+ %10 = zext i1 %9 to i32
+ %11 = sext i32 %10 to i64
+ %12 = icmp sle i64 %1, %11
+ %13 = zext i1 %12 to i32
+ %14 = call i32 @safe_add_func_int32_t_s_s(i32 undef, i32 %13)
+ ret i32 undef
+}
+
+declare i32 @safe_add_func_int32_t_s_s(i32, i32)
+
+declare i64 @safe_sub_func_int64_t_s_s(i64, i64)
diff --git a/test/CodeGen/PowerPC/emutls_generic.ll b/test/CodeGen/PowerPC/emutls_generic.ll
new file mode 100644
index 000000000000..a2e13a6723f8
--- /dev/null
+++ b/test/CodeGen/PowerPC/emutls_generic.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -emulated-tls -mtriple=powerpc64-unknown-linux-gnu -relocation-model=pic \
+; RUN: | FileCheck %s
+; RUN: llc < %s -emulated-tls -mtriple=powerpc-unknown-linux-gnu -relocation-model=pic \
+; RUN: | FileCheck %s
+
+; Make sure that TLS symbols are emitted in expected order.
+
+@external_x = external thread_local global i32, align 8
+@external_y = thread_local global i8 7, align 2
+@internal_y = internal thread_local global i64 9, align 16
+
+define i32* @get_external_x() {
+entry:
+ ret i32* @external_x
+}
+
+define i8* @get_external_y() {
+entry:
+ ret i8* @external_y
+}
+
+define i64* @get_internal_y() {
+entry:
+ ret i64* @internal_y
+}
+
+; CHECK-LABEL: get_external_x:
+; CHECK-NOT: _tls_get_address
+; CHECK: __emutls_get_address
+; CHECK-LABEL: get_external_y:
+; CHECK: __emutls_get_address
+; CHECK-NOT: _tls_get_address
+; CHECK-LABEL: get_internal_y:
+; CHECK-NOT: __emutls_t.external_x:
+; CHECK-NOT: __emutls_v.external_x:
+; CHECK-LABEL: __emutls_v.external_y:
+; CHECK-LABEL: __emutls_t.external_y:
+; CHECK: __emutls_t.external_y
+; CHECK-LABEL: __emutls_v.internal_y:
+; CHECK-LABEL: __emutls_t.internal_y:
+; CHECK: __emutls_t.internal_y
diff --git a/test/CodeGen/PowerPC/fast-isel-binary.ll b/test/CodeGen/PowerPC/fast-isel-binary.ll
index 2f1513f8aa11..1036689ff44d 100644
--- a/test/CodeGen/PowerPC/fast-isel-binary.ll
+++ b/test/CodeGen/PowerPC/fast-isel-binary.ll
@@ -2,7 +2,7 @@
; Test add with non-legal types
-define void @add_i8(i8 %a, i8 %b) nounwind ssp {
+define void @add_i8(i8 %a, i8 %b) nounwind {
entry:
; ELF64: add_i8
%a.addr = alloca i8, align 4
@@ -12,7 +12,7 @@ entry:
ret void
}
-define void @add_i8_imm(i8 %a) nounwind ssp {
+define void @add_i8_imm(i8 %a) nounwind {
entry:
; ELF64: add_i8_imm
%a.addr = alloca i8, align 4
@@ -22,7 +22,7 @@ entry:
ret void
}
-define void @add_i16(i16 %a, i16 %b) nounwind ssp {
+define void @add_i16(i16 %a, i16 %b) nounwind {
entry:
; ELF64: add_i16
%a.addr = alloca i16, align 4
@@ -32,7 +32,7 @@ entry:
ret void
}
-define void @add_i16_imm(i16 %a, i16 %b) nounwind ssp {
+define void @add_i16_imm(i16 %a, i16 %b) nounwind {
entry:
; ELF64: add_i16_imm
%a.addr = alloca i16, align 4
@@ -44,7 +44,7 @@ entry:
; Test or with non-legal types
-define void @or_i8(i8 %a, i8 %b) nounwind ssp {
+define void @or_i8(i8 %a, i8 %b) nounwind {
entry:
; ELF64: or_i8
%a.addr = alloca i8, align 4
@@ -54,7 +54,7 @@ entry:
ret void
}
-define void @or_i8_imm(i8 %a) nounwind ssp {
+define void @or_i8_imm(i8 %a) nounwind {
entry:
; ELF64: or_i8_imm
%a.addr = alloca i8, align 4
@@ -64,7 +64,7 @@ entry:
ret void
}
-define void @or_i16(i16 %a, i16 %b) nounwind ssp {
+define void @or_i16(i16 %a, i16 %b) nounwind {
entry:
; ELF64: or_i16
%a.addr = alloca i16, align 4
@@ -74,7 +74,7 @@ entry:
ret void
}
-define void @or_i16_imm(i16 %a) nounwind ssp {
+define void @or_i16_imm(i16 %a) nounwind {
entry:
; ELF64: or_i16_imm
%a.addr = alloca i16, align 4
@@ -86,7 +86,7 @@ entry:
; Test sub with non-legal types
-define void @sub_i8(i8 %a, i8 %b) nounwind ssp {
+define void @sub_i8(i8 %a, i8 %b) nounwind {
entry:
; ELF64: sub_i8
%a.addr = alloca i8, align 4
@@ -96,7 +96,7 @@ entry:
ret void
}
-define void @sub_i8_imm(i8 %a) nounwind ssp {
+define void @sub_i8_imm(i8 %a) nounwind {
entry:
; ELF64: sub_i8_imm
%a.addr = alloca i8, align 4
@@ -106,7 +106,7 @@ entry:
ret void
}
-define void @sub_i16(i16 %a, i16 %b) nounwind ssp {
+define void @sub_i16(i16 %a, i16 %b) nounwind {
entry:
; ELF64: sub_i16
%a.addr = alloca i16, align 4
@@ -116,7 +116,7 @@ entry:
ret void
}
-define void @sub_i16_imm(i16 %a) nounwind ssp {
+define void @sub_i16_imm(i16 %a) nounwind {
entry:
; ELF64: sub_i16_imm
%a.addr = alloca i16, align 4
@@ -126,7 +126,7 @@ entry:
ret void
}
-define void @sub_i16_badimm(i16 %a) nounwind ssp {
+define void @sub_i16_badimm(i16 %a) nounwind {
entry:
; ELF64: sub_i16_imm
%a.addr = alloca i16, align 4
diff --git a/test/CodeGen/PowerPC/fast-isel-br-const.ll b/test/CodeGen/PowerPC/fast-isel-br-const.ll
index 6be7fbf9e02f..f411d23fb288 100644
--- a/test/CodeGen/PowerPC/fast-isel-br-const.ll
+++ b/test/CodeGen/PowerPC/fast-isel-br-const.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
-define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
+define i32 @t1(i32 %a, i32 %b) nounwind {
entry:
; ELF64: t1
%x = add i32 %a, %b
diff --git a/test/CodeGen/PowerPC/fast-isel-call.ll b/test/CodeGen/PowerPC/fast-isel-call.ll
index 64d8f6e79195..5d541e3a01f1 100644
--- a/test/CodeGen/PowerPC/fast-isel-call.ll
+++ b/test/CodeGen/PowerPC/fast-isel-call.ll
@@ -56,7 +56,7 @@ declare zeroext i16 @t6();
declare signext i8 @t7();
declare zeroext i8 @t8();
-define i32 @t10(i32 %argc, i8** nocapture %argv) {
+define i32 @t10(i32 %argc, i8** nocapture %argv) nounwind {
entry:
; ELF64: t10
%call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
@@ -97,9 +97,9 @@ define i32 @bar0(i32 %i) nounwind {
; ret i32 %tmp1
;}
-declare void @float_foo(float %f) ssp
+declare void @float_foo(float %f)
-define void @float_const() ssp {
+define void @float_const() nounwind {
entry:
; ELF64: float_const
call void @float_foo(float 0x401C666660000000)
@@ -108,7 +108,7 @@ entry:
ret void
}
-define void @float_reg(float %dummy, float %f) ssp {
+define void @float_reg(float %dummy, float %f) nounwind {
entry:
; ELF64: float_reg
call void @float_foo(float %f)
@@ -116,9 +116,9 @@ entry:
ret void
}
-declare void @double_foo(double %d) ssp
+declare void @double_foo(double %d)
-define void @double_const() ssp {
+define void @double_const() nounwind {
entry:
; ELF64: double_const
call void @double_foo(double 0x1397723CCABD0000401C666660000000)
@@ -127,7 +127,7 @@ entry:
ret void
}
-define void @double_reg(double %dummy, double %d) ssp {
+define void @double_reg(double %dummy, double %d) nounwind {
entry:
; ELF64: double_reg
call void @double_foo(double %d)
diff --git a/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll b/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
index 5a9d15868b6b..5881dc3798ae 100644
--- a/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
@@ -3,7 +3,7 @@
; When fastisel better supports VSX fix up this test case.
;
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
-define void @t1a(float %a) uwtable ssp {
+define void @t1a(float %a) nounwind {
entry:
; ELF64: t1a
%cmp = fcmp oeq float %a, 0.000000e+00
@@ -22,7 +22,7 @@ if.end: ; preds = %if.then, %entry
declare void @foo()
-define void @t1b(float %a) uwtable ssp {
+define void @t1b(float %a) nounwind {
entry:
; ELF64: t1b
%cmp = fcmp oeq float %a, -0.000000e+00
@@ -39,7 +39,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t2a(double %a) uwtable ssp {
+define void @t2a(double %a) nounwind {
entry:
; ELF64: t2a
%cmp = fcmp oeq double %a, 0.000000e+00
@@ -56,7 +56,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t2b(double %a) uwtable ssp {
+define void @t2b(double %a) nounwind {
entry:
; ELF64: t2b
%cmp = fcmp oeq double %a, -0.000000e+00
@@ -73,7 +73,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t4(i8 signext %a) uwtable ssp {
+define void @t4(i8 signext %a) nounwind {
entry:
; ELF64: t4
%cmp = icmp eq i8 %a, -1
@@ -89,7 +89,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t5(i8 zeroext %a) uwtable ssp {
+define void @t5(i8 zeroext %a) nounwind {
entry:
; ELF64: t5
%cmp = icmp eq i8 %a, 1
@@ -105,7 +105,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t6(i16 signext %a) uwtable ssp {
+define void @t6(i16 signext %a) nounwind {
entry:
; ELF64: t6
%cmp = icmp eq i16 %a, -1
@@ -121,7 +121,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t7(i16 zeroext %a) uwtable ssp {
+define void @t7(i16 zeroext %a) nounwind {
entry:
; ELF64: t7
%cmp = icmp eq i16 %a, 1
@@ -137,7 +137,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t8(i32 %a) uwtable ssp {
+define void @t8(i32 %a) nounwind {
entry:
; ELF64: t8
%cmp = icmp eq i32 %a, -1
@@ -152,7 +152,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t9(i32 %a) uwtable ssp {
+define void @t9(i32 %a) nounwind {
entry:
; ELF64: t9
%cmp = icmp eq i32 %a, 1
@@ -167,7 +167,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t10(i32 %a) uwtable ssp {
+define void @t10(i32 %a) nounwind {
entry:
; ELF64: t10
%cmp = icmp eq i32 %a, 384
@@ -182,7 +182,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t11(i32 %a) uwtable ssp {
+define void @t11(i32 %a) nounwind {
entry:
; ELF64: t11
%cmp = icmp eq i32 %a, 4096
@@ -197,7 +197,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t12(i8 %a) uwtable ssp {
+define void @t12(i8 %a) nounwind {
entry:
; ELF64: t12
%cmp = icmp ugt i8 %a, -113
@@ -229,7 +229,7 @@ if.end: ; preds = %entry
ret void
}
-define void @t14(i64 %a) uwtable ssp {
+define void @t14(i64 %a) nounwind {
entry:
; ELF64: t14
%cmp = icmp eq i64 %a, -1
@@ -244,7 +244,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t15(i64 %a) uwtable ssp {
+define void @t15(i64 %a) nounwind {
entry:
; ELF64: t15
%cmp = icmp eq i64 %a, 1
@@ -259,7 +259,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t16(i64 %a) uwtable ssp {
+define void @t16(i64 %a) nounwind {
entry:
; ELF64: t16
%cmp = icmp eq i64 %a, 384
@@ -274,7 +274,7 @@ if.end: ; preds = %if.then, %entry
ret void
}
-define void @t17(i64 %a) uwtable ssp {
+define void @t17(i64 %a) nounwind {
entry:
; ELF64: t17
%cmp = icmp eq i64 %a, 32768
diff --git a/test/CodeGen/PowerPC/fast-isel-const.ll b/test/CodeGen/PowerPC/fast-isel-const.ll
index a751a2be6c69..3987e54a8d1b 100644
--- a/test/CodeGen/PowerPC/fast-isel-const.ll
+++ b/test/CodeGen/PowerPC/fast-isel-const.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
-define zeroext i1 @testi1(i8 %in) nounwind uwtable ssp {
+define zeroext i1 @testi1(i8 %in) nounwind {
entry:
%c = icmp eq i8 %in, 5
br i1 %c, label %true, label %false
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
index cfb934c6ab02..e4cdf8d7a9c3 100644
--- a/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
@@ -2,7 +2,7 @@
; Test sitofp
-define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
+define void @sitofp_double_i32(i32 %a, double %b) nounwind {
entry:
; ELF64: sitofp_double_i32
%b.addr = alloca double, align 8
@@ -14,7 +14,7 @@ entry:
ret void
}
-define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
+define void @sitofp_double_i64(i64 %a, double %b) nounwind {
entry:
; ELF64: sitofp_double_i64
%b.addr = alloca double, align 8
@@ -26,7 +26,7 @@ entry:
ret void
}
-define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
+define void @sitofp_double_i16(i16 %a, double %b) nounwind {
entry:
; ELF64: sitofp_double_i16
%b.addr = alloca double, align 8
@@ -39,7 +39,7 @@ entry:
ret void
}
-define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
+define void @sitofp_double_i8(i8 %a, double %b) nounwind {
entry:
; ELF64: sitofp_double_i8
%b.addr = alloca double, align 8
@@ -54,7 +54,7 @@ entry:
; Test fptosi
-define void @fptosi_float_i32(float %a) nounwind ssp {
+define void @fptosi_float_i32(float %a) nounwind {
entry:
; ELF64: fptosi_float_i32
%b.addr = alloca i32, align 4
@@ -66,7 +66,7 @@ entry:
ret void
}
-define void @fptosi_float_i64(float %a) nounwind ssp {
+define void @fptosi_float_i64(float %a) nounwind {
entry:
; ELF64: fptosi_float_i64
%b.addr = alloca i64, align 4
@@ -78,7 +78,7 @@ entry:
ret void
}
-define void @fptosi_double_i32(double %a) nounwind ssp {
+define void @fptosi_double_i32(double %a) nounwind {
entry:
; ELF64: fptosi_double_i32
%b.addr = alloca i32, align 8
@@ -90,7 +90,7 @@ entry:
ret void
}
-define void @fptosi_double_i64(double %a) nounwind ssp {
+define void @fptosi_double_i64(double %a) nounwind {
entry:
; ELF64: fptosi_double_i64
%b.addr = alloca i64, align 8
@@ -104,7 +104,7 @@ entry:
; Test fptoui
-define void @fptoui_float_i32(float %a) nounwind ssp {
+define void @fptoui_float_i32(float %a) nounwind {
entry:
; ELF64: fptoui_float_i32
%b.addr = alloca i32, align 4
@@ -116,7 +116,7 @@ entry:
ret void
}
-define void @fptoui_double_i32(double %a) nounwind ssp {
+define void @fptoui_double_i32(double %a) nounwind {
entry:
; ELF64: fptoui_double_i32
%b.addr = alloca i32, align 8
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion.ll b/test/CodeGen/PowerPC/fast-isel-conversion.ll
index f7557d456858..a9324592aeab 100644
--- a/test/CodeGen/PowerPC/fast-isel-conversion.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll
@@ -11,7 +11,7 @@
; Test sitofp
-define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp {
+define void @sitofp_single_i64(i64 %a, float %b) nounwind {
entry:
; ELF64: sitofp_single_i64
; ELF64LE: sitofp_single_i64
@@ -32,7 +32,7 @@ entry:
ret void
}
-define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
+define void @sitofp_single_i32(i32 %a, float %b) nounwind {
entry:
; ELF64: sitofp_single_i32
; ELF64LE: sitofp_single_i32
@@ -57,7 +57,7 @@ entry:
ret void
}
-define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
+define void @sitofp_single_i16(i16 %a, float %b) nounwind {
entry:
; ELF64: sitofp_single_i16
; ELF64LE: sitofp_single_i16
@@ -81,7 +81,7 @@ entry:
ret void
}
-define void @sitofp_single_i8(i8 %a) nounwind ssp {
+define void @sitofp_single_i8(i8 %a) nounwind {
entry:
; ELF64: sitofp_single_i8
; ELF64LE: sitofp_single_i8
@@ -105,7 +105,7 @@ entry:
ret void
}
-define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
+define void @sitofp_double_i32(i32 %a, double %b) nounwind {
entry:
; ELF64: sitofp_double_i32
; ELF64LE: sitofp_double_i32
@@ -129,7 +129,7 @@ entry:
ret void
}
-define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
+define void @sitofp_double_i64(i64 %a, double %b) nounwind {
entry:
; ELF64: sitofp_double_i64
; ELF64LE: sitofp_double_i64
@@ -149,7 +149,7 @@ entry:
ret void
}
-define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
+define void @sitofp_double_i16(i16 %a, double %b) nounwind {
entry:
; ELF64: sitofp_double_i16
; ELF64LE: sitofp_double_i16
@@ -172,7 +172,7 @@ entry:
ret void
}
-define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
+define void @sitofp_double_i8(i8 %a, double %b) nounwind {
entry:
; ELF64: sitofp_double_i8
; ELF64LE: sitofp_double_i8
@@ -197,7 +197,7 @@ entry:
; Test uitofp
-define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp {
+define void @uitofp_single_i64(i64 %a, float %b) nounwind {
entry:
; ELF64: uitofp_single_i64
; ELF64LE: uitofp_single_i64
@@ -215,7 +215,7 @@ entry:
ret void
}
-define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
+define void @uitofp_single_i32(i32 %a, float %b) nounwind {
entry:
; ELF64: uitofp_single_i32
; ELF64LE: uitofp_single_i32
@@ -238,7 +238,7 @@ entry:
ret void
}
-define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
+define void @uitofp_single_i16(i16 %a, float %b) nounwind {
entry:
; ELF64: uitofp_single_i16
; ELF64LE: uitofp_single_i16
@@ -262,7 +262,7 @@ entry:
ret void
}
-define void @uitofp_single_i8(i8 %a) nounwind ssp {
+define void @uitofp_single_i8(i8 %a) nounwind {
entry:
; ELF64: uitofp_single_i8
; ELF64LE: uitofp_single_i8
@@ -286,7 +286,7 @@ entry:
ret void
}
-define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp {
+define void @uitofp_double_i64(i64 %a, double %b) nounwind {
entry:
; ELF64: uitofp_double_i64
; ELF64LE: uitofp_double_i64
@@ -304,7 +304,7 @@ entry:
ret void
}
-define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
+define void @uitofp_double_i32(i32 %a, double %b) nounwind {
entry:
; ELF64: uitofp_double_i32
; ELF64LE: uitofp_double_i32
@@ -327,7 +327,7 @@ entry:
ret void
}
-define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
+define void @uitofp_double_i16(i16 %a, double %b) nounwind {
entry:
; ELF64: uitofp_double_i16
; ELF64LE: uitofp_double_i16
@@ -350,7 +350,7 @@ entry:
ret void
}
-define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
+define void @uitofp_double_i8(i8 %a, double %b) nounwind {
entry:
; ELF64: uitofp_double_i8
; ELF64LE: uitofp_double_i8
@@ -375,7 +375,7 @@ entry:
; Test fptosi
-define void @fptosi_float_i32(float %a) nounwind ssp {
+define void @fptosi_float_i32(float %a) nounwind {
entry:
; ELF64: fptosi_float_i32
; ELF64LE: fptosi_float_i32
@@ -395,7 +395,7 @@ entry:
ret void
}
-define void @fptosi_float_i64(float %a) nounwind ssp {
+define void @fptosi_float_i64(float %a) nounwind {
entry:
; ELF64: fptosi_float_i64
; ELF64LE: fptosi_float_i64
@@ -415,7 +415,7 @@ entry:
ret void
}
-define void @fptosi_double_i32(double %a) nounwind ssp {
+define void @fptosi_double_i32(double %a) nounwind {
entry:
; ELF64: fptosi_double_i32
; ELF64LE: fptosi_double_i32
@@ -435,7 +435,7 @@ entry:
ret void
}
-define void @fptosi_double_i64(double %a) nounwind ssp {
+define void @fptosi_double_i64(double %a) nounwind {
entry:
; ELF64: fptosi_double_i64
; ELF64LE: fptosi_double_i64
@@ -457,7 +457,7 @@ entry:
; Test fptoui
-define void @fptoui_float_i32(float %a) nounwind ssp {
+define void @fptoui_float_i32(float %a) nounwind {
entry:
; ELF64: fptoui_float_i32
; ELF64LE: fptoui_float_i32
@@ -477,7 +477,7 @@ entry:
ret void
}
-define void @fptoui_float_i64(float %a) nounwind ssp {
+define void @fptoui_float_i64(float %a) nounwind {
entry:
; ELF64: fptoui_float_i64
; ELF64LE: fptoui_float_i64
@@ -495,7 +495,7 @@ entry:
ret void
}
-define void @fptoui_double_i32(double %a) nounwind ssp {
+define void @fptoui_double_i32(double %a) nounwind {
entry:
; ELF64: fptoui_double_i32
; ELF64LE: fptoui_double_i32
@@ -515,7 +515,7 @@ entry:
ret void
}
-define void @fptoui_double_i64(double %a) nounwind ssp {
+define void @fptoui_double_i64(double %a) nounwind {
entry:
; ELF64: fptoui_double_i64
; ELF64LE: fptoui_double_i64
diff --git a/test/CodeGen/PowerPC/fast-isel-crash.ll b/test/CodeGen/PowerPC/fast-isel-crash.ll
index 55e87effcd82..e20ef6bcd5d3 100644
--- a/test/CodeGen/PowerPC/fast-isel-crash.ll
+++ b/test/CodeGen/PowerPC/fast-isel-crash.ll
@@ -11,12 +11,12 @@ entry:
ret void
}
-define internal i32 @_Z13get_global_idj(i32 %dim) nounwind ssp {
+define internal i32 @_Z13get_global_idj(i32 %dim) nounwind {
entry:
ret i32 undef
}
-define void @wrap(i8 addrspace(1)* addrspace(1)* %arglist, i32 addrspace(1)* %gtid) nounwind ssp {
+define void @wrap(i8 addrspace(1)* addrspace(1)* %arglist, i32 addrspace(1)* %gtid) nounwind {
entry:
call void @stretch(<4 x i8> addrspace(1)* undef, <4 x i8> addrspace(1)* undef, i32 undef, i32 undef, i32 undef, i32 undef, <2 x float> undef, <4 x float> undef)
ret void
diff --git a/test/CodeGen/PowerPC/fast-isel-ext.ll b/test/CodeGen/PowerPC/fast-isel-ext.ll
index 6fd3b4035122..ce8ac440b79c 100644
--- a/test/CodeGen/PowerPC/fast-isel-ext.ll
+++ b/test/CodeGen/PowerPC/fast-isel-ext.ll
@@ -2,35 +2,35 @@
; zext
-define i32 @zext_8_32(i8 %a) nounwind ssp {
+define i32 @zext_8_32(i8 %a) nounwind {
; ELF64: zext_8_32
%r = zext i8 %a to i32
; ELF64: clrlwi {{[0-9]+}}, {{[0-9]+}}, 24
ret i32 %r
}
-define i32 @zext_16_32(i16 %a) nounwind ssp {
+define i32 @zext_16_32(i16 %a) nounwind {
; ELF64: zext_16_32
%r = zext i16 %a to i32
; ELF64: clrlwi {{[0-9]+}}, {{[0-9]+}}, 16
ret i32 %r
}
-define i64 @zext_8_64(i8 %a) nounwind ssp {
+define i64 @zext_8_64(i8 %a) nounwind {
; ELF64: zext_8_64
%r = zext i8 %a to i64
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
ret i64 %r
}
-define i64 @zext_16_64(i16 %a) nounwind ssp {
+define i64 @zext_16_64(i16 %a) nounwind {
; ELF64: zext_16_64
%r = zext i16 %a to i64
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
ret i64 %r
}
-define i64 @zext_32_64(i32 %a) nounwind ssp {
+define i64 @zext_32_64(i32 %a) nounwind {
; ELF64: zext_32_64
%r = zext i32 %a to i64
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
@@ -39,35 +39,35 @@ define i64 @zext_32_64(i32 %a) nounwind ssp {
; sext
-define i32 @sext_8_32(i8 %a) nounwind ssp {
+define i32 @sext_8_32(i8 %a) nounwind {
; ELF64: sext_8_32
%r = sext i8 %a to i32
; ELF64: extsb
ret i32 %r
}
-define i32 @sext_16_32(i16 %a) nounwind ssp {
+define i32 @sext_16_32(i16 %a) nounwind {
; ELF64: sext_16_32
%r = sext i16 %a to i32
; ELF64: extsh
ret i32 %r
}
-define i64 @sext_8_64(i8 %a) nounwind ssp {
+define i64 @sext_8_64(i8 %a) nounwind {
; ELF64: sext_8_64
%r = sext i8 %a to i64
; ELF64: extsb
ret i64 %r
}
-define i64 @sext_16_64(i16 %a) nounwind ssp {
+define i64 @sext_16_64(i16 %a) nounwind {
; ELF64: sext_16_64
%r = sext i16 %a to i64
; ELF64: extsh
ret i64 %r
}
-define i64 @sext_32_64(i32 %a) nounwind ssp {
+define i64 @sext_32_64(i32 %a) nounwind {
; ELF64: sext_32_64
%r = sext i32 %a to i64
; ELF64: extsw
diff --git a/test/CodeGen/PowerPC/fast-isel-fold.ll b/test/CodeGen/PowerPC/fast-isel-fold.ll
index e56101a28e2b..24cdca35b0dd 100644
--- a/test/CodeGen/PowerPC/fast-isel-fold.ll
+++ b/test/CodeGen/PowerPC/fast-isel-fold.ll
@@ -4,7 +4,7 @@
@b = global i16 2, align 2
@c = global i32 4, align 4
-define void @t1() nounwind uwtable ssp {
+define void @t1() nounwind {
; ELF64: t1
%1 = load i8, i8* @a, align 1
call void @foo1(i8 zeroext %1)
@@ -14,7 +14,7 @@ define void @t1() nounwind uwtable ssp {
ret void
}
-define void @t2() nounwind uwtable ssp {
+define void @t2() nounwind {
; ELF64: t2
%1 = load i16, i16* @b, align 2
call void @foo2(i16 zeroext %1)
@@ -24,7 +24,7 @@ define void @t2() nounwind uwtable ssp {
ret void
}
-define void @t2a() nounwind uwtable ssp {
+define void @t2a() nounwind {
; ELF64: t2a
%1 = load i32, i32* @c, align 4
call void @foo3(i32 zeroext %1)
@@ -38,7 +38,7 @@ declare void @foo1(i8 zeroext)
declare void @foo2(i16 zeroext)
declare void @foo3(i32 zeroext)
-define i32 @t3() nounwind uwtable ssp {
+define i32 @t3() nounwind {
; ELF64: t3
%1 = load i8, i8* @a, align 1
%2 = zext i8 %1 to i32
@@ -47,7 +47,7 @@ define i32 @t3() nounwind uwtable ssp {
ret i32 %2
}
-define i32 @t4() nounwind uwtable ssp {
+define i32 @t4() nounwind {
; ELF64: t4
%1 = load i16, i16* @b, align 2
%2 = zext i16 %1 to i32
@@ -56,7 +56,7 @@ define i32 @t4() nounwind uwtable ssp {
ret i32 %2
}
-define i32 @t5() nounwind uwtable ssp {
+define i32 @t5() nounwind {
; ELF64: t5
%1 = load i16, i16* @b, align 2
%2 = sext i16 %1 to i32
@@ -65,7 +65,7 @@ define i32 @t5() nounwind uwtable ssp {
ret i32 %2
}
-define i32 @t6() nounwind uwtable ssp {
+define i32 @t6() nounwind {
; ELF64: t6
%1 = load i8, i8* @a, align 2
%2 = sext i8 %1 to i32
@@ -74,7 +74,7 @@ define i32 @t6() nounwind uwtable ssp {
ret i32 %2
}
-define i64 @t7() nounwind uwtable ssp {
+define i64 @t7() nounwind {
; ELF64: t7
%1 = load i8, i8* @a, align 1
%2 = zext i8 %1 to i64
@@ -83,7 +83,7 @@ define i64 @t7() nounwind uwtable ssp {
ret i64 %2
}
-define i64 @t8() nounwind uwtable ssp {
+define i64 @t8() nounwind {
; ELF64: t8
%1 = load i16, i16* @b, align 2
%2 = zext i16 %1 to i64
@@ -92,7 +92,7 @@ define i64 @t8() nounwind uwtable ssp {
ret i64 %2
}
-define i64 @t9() nounwind uwtable ssp {
+define i64 @t9() nounwind {
; ELF64: t9
%1 = load i16, i16* @b, align 2
%2 = sext i16 %1 to i64
@@ -101,7 +101,7 @@ define i64 @t9() nounwind uwtable ssp {
ret i64 %2
}
-define i64 @t10() nounwind uwtable ssp {
+define i64 @t10() nounwind {
; ELF64: t10
%1 = load i8, i8* @a, align 2
%2 = sext i8 %1 to i64
@@ -110,7 +110,7 @@ define i64 @t10() nounwind uwtable ssp {
ret i64 %2
}
-define i64 @t11() nounwind uwtable ssp {
+define i64 @t11() nounwind {
; ELF64: t11
%1 = load i32, i32* @c, align 4
%2 = zext i32 %1 to i64
@@ -119,7 +119,7 @@ define i64 @t11() nounwind uwtable ssp {
ret i64 %2
}
-define i64 @t12() nounwind uwtable ssp {
+define i64 @t12() nounwind {
; ELF64: t12
%1 = load i32, i32* @c, align 4
%2 = sext i32 %1 to i64
diff --git a/test/CodeGen/PowerPC/fast-isel-indirectbr.ll b/test/CodeGen/PowerPC/fast-isel-indirectbr.ll
index b5477134c517..d66fd1fb752d 100644
--- a/test/CodeGen/PowerPC/fast-isel-indirectbr.ll
+++ b/test/CodeGen/PowerPC/fast-isel-indirectbr.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
-define void @t1(i8* %x) {
+define void @t1(i8* %x) nounwind {
entry:
; ELF64: t1
br label %L0
diff --git a/test/CodeGen/PowerPC/fast-isel-load-store.ll b/test/CodeGen/PowerPC/fast-isel-load-store.ll
index f6a55f06b2cb..800e5aa66090 100644
--- a/test/CodeGen/PowerPC/fast-isel-load-store.ll
+++ b/test/CodeGen/PowerPC/fast-isel-load-store.ll
@@ -24,7 +24,7 @@
; load
-define i8 @t1() nounwind uwtable ssp {
+define i8 @t1() nounwind {
; ELF64: t1
%1 = load i8, i8* @a, align 1
; ELF64: lbz
@@ -33,7 +33,7 @@ define i8 @t1() nounwind uwtable ssp {
ret i8 %2
}
-define i16 @t2() nounwind uwtable ssp {
+define i16 @t2() nounwind {
; ELF64: t2
%1 = load i16, i16* @b, align 2
; ELF64: lhz
@@ -42,7 +42,7 @@ define i16 @t2() nounwind uwtable ssp {
ret i16 %2
}
-define i32 @t3() nounwind uwtable ssp {
+define i32 @t3() nounwind {
; ELF64: t3
%1 = load i32, i32* @c, align 4
; ELF64: lwz
@@ -51,7 +51,7 @@ define i32 @t3() nounwind uwtable ssp {
ret i32 %2
}
-define i64 @t4() nounwind uwtable ssp {
+define i64 @t4() nounwind {
; ELF64: t4
%1 = load i64, i64* @d, align 4
; ELF64: ld
@@ -60,7 +60,7 @@ define i64 @t4() nounwind uwtable ssp {
ret i64 %2
}
-define float @t5() nounwind uwtable ssp {
+define float @t5() nounwind {
; ELF64: t5
%1 = load float, float* @e, align 4
; ELF64: lfs
@@ -69,7 +69,7 @@ define float @t5() nounwind uwtable ssp {
ret float %2
}
-define double @t6() nounwind uwtable ssp {
+define double @t6() nounwind {
; ELF64: t6
%1 = load double, double* @f, align 8
; ELF64: lfd
@@ -80,7 +80,7 @@ define double @t6() nounwind uwtable ssp {
; store
-define void @t7(i8 %v) nounwind uwtable ssp {
+define void @t7(i8 %v) nounwind {
; ELF64: t7
%1 = add nsw i8 %v, 1
store i8 %1, i8* @a, align 1
@@ -91,7 +91,7 @@ define void @t7(i8 %v) nounwind uwtable ssp {
ret void
}
-define void @t8(i16 %v) nounwind uwtable ssp {
+define void @t8(i16 %v) nounwind {
; ELF64: t8
%1 = add nsw i16 %v, 1
store i16 %1, i16* @b, align 2
@@ -102,7 +102,7 @@ define void @t8(i16 %v) nounwind uwtable ssp {
ret void
}
-define void @t9(i32 %v) nounwind uwtable ssp {
+define void @t9(i32 %v) nounwind {
; ELF64: t9
%1 = add nsw i32 %v, 1
store i32 %1, i32* @c, align 4
@@ -113,7 +113,7 @@ define void @t9(i32 %v) nounwind uwtable ssp {
ret void
}
-define void @t10(i64 %v) nounwind uwtable ssp {
+define void @t10(i64 %v) nounwind {
; ELF64: t10
%1 = add nsw i64 %v, 1
store i64 %1, i64* @d, align 4
@@ -124,7 +124,7 @@ define void @t10(i64 %v) nounwind uwtable ssp {
ret void
}
-define void @t11(float %v) nounwind uwtable ssp {
+define void @t11(float %v) nounwind {
; ELF64: t11
%1 = fadd float %v, 1.0
store float %1, float* @e, align 4
@@ -133,7 +133,7 @@ define void @t11(float %v) nounwind uwtable ssp {
ret void
}
-define void @t12(double %v) nounwind uwtable ssp {
+define void @t12(double %v) nounwind {
; ELF64: t12
%1 = fadd double %v, 1.0
store double %1, double* @f, align 8
@@ -143,7 +143,7 @@ define void @t12(double %v) nounwind uwtable ssp {
}
;; lwa requires an offset divisible by 4, so we need lwax here.
-define i64 @t13() nounwind uwtable ssp {
+define i64 @t13() nounwind {
; ELF64: t13
%1 = load i32, i32* getelementptr inbounds (%struct.s, %struct.s* @g, i32 0, i32 1), align 1
%2 = sext i32 %1 to i64
@@ -155,7 +155,7 @@ define i64 @t13() nounwind uwtable ssp {
}
;; ld requires an offset divisible by 4, so we need ldx here.
-define i64 @t14() nounwind uwtable ssp {
+define i64 @t14() nounwind {
; ELF64: t14
%1 = load i64, i64* getelementptr inbounds (%struct.t, %struct.t* @h, i32 0, i32 1), align 1
; ELF64: li
@@ -166,7 +166,7 @@ define i64 @t14() nounwind uwtable ssp {
}
;; std requires an offset divisible by 4, so we need stdx here.
-define void @t15(i64 %v) nounwind uwtable ssp {
+define void @t15(i64 %v) nounwind {
; ELF64: t15
%1 = add nsw i64 %v, 1
store i64 %1, i64* getelementptr inbounds (%struct.t, %struct.t* @h, i32 0, i32 1), align 1
@@ -179,7 +179,7 @@ define void @t15(i64 %v) nounwind uwtable ssp {
}
;; ld requires an offset that fits in 16 bits, so we need ldx here.
-define i64 @t16() nounwind uwtable ssp {
+define i64 @t16() nounwind {
; ELF64: t16
%1 = load i64, i64* getelementptr inbounds ([8192 x i64], [8192 x i64]* @i, i32 0, i64 5000), align 8
; ELF64: lis
@@ -191,7 +191,7 @@ define i64 @t16() nounwind uwtable ssp {
}
;; std requires an offset that fits in 16 bits, so we need stdx here.
-define void @t17(i64 %v) nounwind uwtable ssp {
+define void @t17(i64 %v) nounwind {
; ELF64: t17
%1 = add nsw i64 %v, 1
store i64 %1, i64* getelementptr inbounds ([8192 x i64], [8192 x i64]* @i, i32 0, i64 5000), align 8
diff --git a/test/CodeGen/PowerPC/fast-isel-redefinition.ll b/test/CodeGen/PowerPC/fast-isel-redefinition.ll
index 60706a6e1438..19392c938e15 100644
--- a/test/CodeGen/PowerPC/fast-isel-redefinition.ll
+++ b/test/CodeGen/PowerPC/fast-isel-redefinition.ll
@@ -3,7 +3,7 @@
; doesn't crash. (It crashed formerly on ARM, and proved useful in
; discovering a bug on PowerPC as well.)
-define i32 @f(i32* %x) nounwind ssp {
+define i32 @f(i32* %x) nounwind {
%y = getelementptr inbounds i32, i32* %x, i32 5000
%tmp103 = load i32, i32* %y, align 4
ret i32 %tmp103
diff --git a/test/CodeGen/PowerPC/fast-isel-ret.ll b/test/CodeGen/PowerPC/fast-isel-ret.ll
index 1e4566d94dfd..e05ef7d9ab82 100644
--- a/test/CodeGen/PowerPC/fast-isel-ret.ll
+++ b/test/CodeGen/PowerPC/fast-isel-ret.ll
@@ -4,7 +4,7 @@
;
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
-define zeroext i1 @rettrue() nounwind uwtable ssp {
+define zeroext i1 @rettrue() nounwind {
entry:
; ELF64-LABEL: rettrue
; ELF64: li 3, 1
@@ -12,7 +12,7 @@ entry:
ret i1 true
}
-define zeroext i1 @retfalse() nounwind uwtable ssp {
+define zeroext i1 @retfalse() nounwind {
entry:
; ELF64-LABEL: retfalse
; ELF64: li 3, 0
@@ -20,7 +20,7 @@ entry:
ret i1 false
}
-define signext i1 @retstrue() nounwind uwtable ssp {
+define signext i1 @retstrue() nounwind {
entry:
; ELF64-LABEL: retstrue
; ELF64: li 3, -1
@@ -28,7 +28,7 @@ entry:
ret i1 true
}
-define signext i1 @retsfalse() nounwind uwtable ssp {
+define signext i1 @retsfalse() nounwind {
entry:
; ELF64-LABEL: retsfalse
; ELF64: li 3, 0
@@ -36,7 +36,7 @@ entry:
ret i1 false
}
-define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
+define signext i8 @ret2(i8 signext %a) nounwind {
entry:
; ELF64-LABEL: ret2
; ELF64: extsb
@@ -44,7 +44,7 @@ entry:
ret i8 %a
}
-define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
+define zeroext i8 @ret3(i8 signext %a) nounwind {
entry:
; ELF64-LABEL: ret3
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
@@ -52,7 +52,7 @@ entry:
ret i8 %a
}
-define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
+define signext i16 @ret4(i16 signext %a) nounwind {
entry:
; ELF64-LABEL: ret4
; ELF64: extsh
@@ -60,7 +60,7 @@ entry:
ret i16 %a
}
-define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
+define zeroext i16 @ret5(i16 signext %a) nounwind {
entry:
; ELF64-LABEL: ret5
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
@@ -68,7 +68,7 @@ entry:
ret i16 %a
}
-define i16 @ret6(i16 %a) nounwind uwtable ssp {
+define i16 @ret6(i16 %a) nounwind {
entry:
; ELF64-LABEL: ret6
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
@@ -76,7 +76,7 @@ entry:
ret i16 %a
}
-define signext i32 @ret7(i32 signext %a) nounwind uwtable ssp {
+define signext i32 @ret7(i32 signext %a) nounwind {
entry:
; ELF64-LABEL: ret7
; ELF64: extsw
@@ -84,7 +84,7 @@ entry:
ret i32 %a
}
-define zeroext i32 @ret8(i32 signext %a) nounwind uwtable ssp {
+define zeroext i32 @ret8(i32 signext %a) nounwind {
entry:
; ELF64-LABEL: ret8
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
@@ -92,7 +92,7 @@ entry:
ret i32 %a
}
-define i32 @ret9(i32 %a) nounwind uwtable ssp {
+define i32 @ret9(i32 %a) nounwind {
entry:
; ELF64-LABEL: ret9
; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
@@ -100,7 +100,7 @@ entry:
ret i32 %a
}
-define i64 @ret10(i64 %a) nounwind uwtable ssp {
+define i64 @ret10(i64 %a) nounwind {
entry:
; ELF64-LABEL: ret10
; ELF64-NOT: exts
@@ -109,21 +109,21 @@ entry:
ret i64 %a
}
-define float @ret11(float %a) nounwind uwtable ssp {
+define float @ret11(float %a) nounwind {
entry:
; ELF64-LABEL: ret11
; ELF64: blr
ret float %a
}
-define double @ret12(double %a) nounwind uwtable ssp {
+define double @ret12(double %a) nounwind {
entry:
; ELF64-LABEL: ret12
; ELF64: blr
ret double %a
}
-define i8 @ret13() nounwind uwtable ssp {
+define i8 @ret13() nounwind {
entry:
; ELF64-LABEL: ret13
; ELF64: li
@@ -131,7 +131,7 @@ entry:
ret i8 15;
}
-define i16 @ret14() nounwind uwtable ssp {
+define i16 @ret14() nounwind {
entry:
; ELF64-LABEL: ret14
; ELF64: li
@@ -139,7 +139,7 @@ entry:
ret i16 -225;
}
-define i32 @ret15() nounwind uwtable ssp {
+define i32 @ret15() nounwind {
entry:
; ELF64-LABEL: ret15
; ELF64: lis
@@ -148,7 +148,7 @@ entry:
ret i32 278135;
}
-define i64 @ret16() nounwind uwtable ssp {
+define i64 @ret16() nounwind {
entry:
; ELF64-LABEL: ret16
; ELF64: li
@@ -159,7 +159,7 @@ entry:
ret i64 27813515225;
}
-define float @ret17() nounwind uwtable ssp {
+define float @ret17() nounwind {
entry:
; ELF64-LABEL: ret17
; ELF64: addis
@@ -168,7 +168,7 @@ entry:
ret float 2.5;
}
-define double @ret18() nounwind uwtable ssp {
+define double @ret18() nounwind {
entry:
; ELF64-LABEL: ret18
; ELF64: addis
@@ -176,3 +176,13 @@ entry:
; ELF64: blr
ret double 2.5e-33;
}
+
+define zeroext i32 @ret19() nounwind {
+entry:
+; ELF64-LABEL: ret19
+; ELF64: li
+; ELF64: oris
+; ELF64: ori
+; ELF64: blr
+ ret i32 -1
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-shifter.ll b/test/CodeGen/PowerPC/fast-isel-shifter.ll
index c18f659dde13..04cb41920605 100644
--- a/test/CodeGen/PowerPC/fast-isel-shifter.ll
+++ b/test/CodeGen/PowerPC/fast-isel-shifter.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
-define i32 @shl() nounwind ssp {
+define i32 @shl() nounwind {
entry:
; ELF64: shl
; ELF64: slw
@@ -8,7 +8,7 @@ entry:
ret i32 %shl
}
-define i32 @shl_reg(i32 %src1, i32 %src2) nounwind ssp {
+define i32 @shl_reg(i32 %src1, i32 %src2) nounwind {
entry:
; ELF64: shl_reg
; ELF64: slw
@@ -16,7 +16,7 @@ entry:
ret i32 %shl
}
-define i32 @lshr() nounwind ssp {
+define i32 @lshr() nounwind {
entry:
; ELF64: lshr
; ELF64: srw
@@ -24,7 +24,7 @@ entry:
ret i32 %lshr
}
-define i32 @lshr_reg(i32 %src1, i32 %src2) nounwind ssp {
+define i32 @lshr_reg(i32 %src1, i32 %src2) nounwind {
entry:
; ELF64: lshr_reg
; ELF64: srw
@@ -32,7 +32,7 @@ entry:
ret i32 %lshr
}
-define i32 @ashr() nounwind ssp {
+define i32 @ashr() nounwind {
entry:
; ELF64: ashr
; ELF64: srawi
@@ -40,7 +40,7 @@ entry:
ret i32 %ashr
}
-define i32 @ashr_reg(i32 %src1, i32 %src2) nounwind ssp {
+define i32 @ashr_reg(i32 %src1, i32 %src2) nounwind {
entry:
; ELF64: ashr_reg
; ELF64: sraw
diff --git a/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll b/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
index 96cf67c869f9..32f4c23c2de2 100644
--- a/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
+++ b/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
@@ -2,7 +2,7 @@
; sext(a) + sext(b) != sext(a + b)
; RUN: llc -mtriple=powerpc64-unknown-freebsd10.0 %s -O0 -o - | FileCheck %s
-define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
+define zeroext i8 @gep_promotion(i8* %ptr) nounwind {
entry:
%ptr.addr = alloca i8*, align 8
%add = add i8 64, 64 ; 0x40 + 0x40
diff --git a/test/CodeGen/PowerPC/fma-mutate-register-constraint.ll b/test/CodeGen/PowerPC/fma-mutate-register-constraint.ll
new file mode 100644
index 000000000000..fd2ba4ec635e
--- /dev/null
+++ b/test/CodeGen/PowerPC/fma-mutate-register-constraint.ll
@@ -0,0 +1,89 @@
+; RUN: llc -enable-unsafe-fp-math < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; CHECK-NOT: {{vmrg[hl]w.*(3[23456789]|[456][0-9])}}
+define void @__f0() {
+entry:
+ %0 = shufflevector <8 x float> zeroinitializer, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %1 = shufflevector <16 x float> %0, <16 x float> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %2 = shufflevector <8 x float> zeroinitializer, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %3 = shufflevector <16 x float> %2, <16 x float> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = fmul <32 x float> %1, %3
+ %5 = load <4 x float>, <4 x float>* undef, align 128
+ %6 = load <4 x float>, <4 x float>* undef, align 128
+ %7 = shufflevector <4 x float> undef, <4 x float> %5, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %8 = shufflevector <4 x float> undef, <4 x float> %6, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %9 = shufflevector <8 x float> %7, <8 x float> %8, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %10 = shufflevector <16 x float> undef, <16 x float> %9, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %11 = load <4 x float>, <4 x float>* null, align 128
+ %12 = load <4 x float>, <4 x float>* undef, align 128
+ %13 = shufflevector <4 x float> undef, <4 x float> %11, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %14 = shufflevector <4 x float> undef, <4 x float> %12, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %15 = shufflevector <8 x float> %13, <8 x float> %14, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %16 = shufflevector <16 x float> undef, <16 x float> %15, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %17 = fmul <32 x float> %10, %16
+ %18 = fsub <32 x float> %4, %17
+ %19 = shufflevector <32 x float> %18, <32 x float> undef, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+ %20 = bitcast <64 x float> %19 to <32 x double>
+ %21 = shufflevector <32 x double> undef, <32 x double> %20, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+ %22 = bitcast <64 x double> %21 to <128 x float>
+ %23 = shufflevector <128 x float> undef, <128 x float> %22, <256 x i32> <i32 0, i32 128, i32 1, i32 129, i32 2, i32 130, i32 3, i32 131, i32 4, i32 132, i32 5, i32 133, i32 6, i32 134, i32 7, i32 135, i32 8, i32 136, i32 9, i32 137, i32 10, i32 138, i32 11, i32 139, i32 12, i32 140, i32 13, i32 141, i32 14, i32 142, i32 15, i32 143, i32 16, i32 144, i32 17, i32 145, i32 18, i32 146, i32 19, i32 147, i32 20, i32 148, i32 21, i32 149, i32 22, i32 150, i32 23, i32 151, i32 24, i32 152, i32 25, i32 153, i32 26, i32 154, i32 27, i32 155, i32 28, i32 156, i32 29, i32 157, i32 30, i32 158, i32 31, i32 159, i32 32, i32 160, i32 33, i32 161, i32 34, i32 162, i32 35, i32 163, i32 36, i32 164, i32 37, i32 165, i32 38, i32 166, i32 39, i32 167, i32 40, i32 168, i32 41, i32 169, i32 42, i32 170, i32 43, i32 171, i32 44, i32 172, i32 45, i32 173, i32 46, i32 174, i32 47, i32 175, i32 48, i32 176, i32 49, i32 177, i32 50, i32 178, i32 51, i32 179, i32 52, i32 180, i32 53, i32 181, i32 54, i32 182, i32 55, i32 183, i32 56, i32 184, i32 57, i32 185, i32 58, i32 186, i32 59, i32 187, i32 60, i32 188, i32 61, i32 189, i32 62, i32 190, i32 63, i32 191, i32 64, i32 192, i32 65, i32 193, i32 66, i32 194, i32 67, i32 195, i32 68, i32 196, i32 69, i32 197, i32 70, i32 198, i32 71, i32 199, i32 72, i32 200, i32 73, i32 201, i32 74, i32 202, i32 75, i32 203, i32 76, i32 204, i32 77, i32 205, i32 78, i32 206, i32 79, i32 207, i32 80, i32 208, i32 81, i32 209, i32 82, i32 210, i32 83, i32 211, i32 84, i32 212, i32 85, i32 213, i32 86, i32 214, i32 87, i32 215, i32 88, i32 216, i32 89, i32 217, i32 90, i32 218, i32 91, i32 219, i32 92, i32 220, i32 93, i32 221, i32 94, i32 222, i32 95, i32 223, i32 96, i32 224, i32 97, i32 225, i32 98, i32 226, i32 99, i32 227, i32 100, i32 228, i32 101, i32 229, i32 102, i32 230, i32 103, i32 231, i32 104, i32 232, i32 105, i32 233, i32 106, i32 234, i32 107, i32 235, i32 108, i32 236, i32 109, i32 237, i32 110, i32 238, i32 111, i32 239, i32 112, i32 240, i32 113, i32 241, i32 114, i32 242, i32 115, i32 243, i32 116, i32 244, i32 117, i32 245, i32 118, i32 246, i32 119, i32 247, i32 120, i32 248, i32 121, i32 249, i32 122, i32 250, i32 123, i32 251, i32 124, i32 252, i32 125, i32 253, i32 126, i32 254, i32 127, i32 255>
+ %24 = shufflevector <256 x float> undef, <256 x float> %23, <512 x i32> <i32 0, i32 256, i32 1, i32 257, i32 2, i32 258, i32 3, i32 259, i32 4, i32 260, i32 5, i32 261, i32 6, i32 262, i32 7, i32 263, i32 8, i32 264, i32 9, i32 265, i32 10, i32 266, i32 11, i32 267, i32 12, i32 268, i32 13, i32 269, i32 14, i32 270, i32 15, i32 271, i32 16, i32 272, i32 17, i32 273, i32 18, i32 274, i32 19, i32 275, i32 20, i32 276, i32 21, i32 277, i32 22, i32 278, i32 23, i32 279, i32 24, i32 280, i32 25, i32 281, i32 26, i32 282, i32 27, i32 283, i32 28, i32 284, i32 29, i32 285, i32 30, i32 286, i32 31, i32 287, i32 32, i32 288, i32 33, i32 289, i32 34, i32 290, i32 35, i32 291, i32 36, i32 292, i32 37, i32 293, i32 38, i32 294, i32 39, i32 295, i32 40, i32 296, i32 41, i32 297, i32 42, i32 298, i32 43, i32 299, i32 44, i32 300, i32 45, i32 301, i32 46, i32 302, i32 47, i32 303, i32 48, i32 304, i32 49, i32 305, i32 50, i32 306, i32 51, i32 307, i32 52, i32 308, i32 53, i32 309, i32 54, i32 310, i32 55, i32 311, i32 56, i32 312, i32 57, i32 313, i32 58, i32 314, i32 59, i32 315, i32 60, i32 316, i32 61, i32 317, i32 62, i32 318, i32 63, i32 319, i32 64, i32 320, i32 65, i32 321, i32 66, i32 322, i32 67, i32 323, i32 68, i32 324, i32 69, i32 325, i32 70, i32 326, i32 71, i32 327, i32 72, i32 328, i32 73, i32 329, i32 74, i32 330, i32 75, i32 331, i32 76, i32 332, i32 77, i32 333, i32 78, i32 334, i32 79, i32 335, i32 80, i32 336, i32 81, i32 337, i32 82, i32 338, i32 83, i32 339, i32 84, i32 340, i32 85, i32 341, i32 86, i32 342, i32 87, i32 343, i32 88, i32 344, i32 89, i32 345, i32 90, i32 346, i32 91, i32 347, i32 92, i32 348, i32 93, i32 349, i32 94, i32 350, i32 95, i32 351, i32 96, i32 352, i32 97, i32 353, i32 98, i32 354, i32 99, i32 355, i32 100, i32 356, i32 101, i32 357, i32 102, i32 358, i32 103, i32 359, i32 104, i32 360, i32 105, i32 361, i32 106, i32 362, i32 107, i32 363, i32 108, i32 364, i32 109, i32 365, i32 110, i32 366, i32 111, i32 367, i32 112, i32 368, i32 113, i32 369, i32 114, i32 370, i32 115, i32 371, i32 116, i32 372, i32 117, i32 373, i32 118, i32 374, i32 119, i32 375, i32 120, i32 376, i32 121, i32 377, i32 122, i32 378, i32 123, i32 379, i32 124, i32 380, i32 125, i32 381, i32 126, i32 382, i32 127, i32 383, i32 128, i32 384, i32 129, i32 385, i32 130, i32 386, i32 131, i32 387, i32 132, i32 388, i32 133, i32 389, i32 134, i32 390, i32 135, i32 391, i32 136, i32 392, i32 137, i32 393, i32 138, i32 394, i32 139, i32 395, i32 140, i32 396, i32 141, i32 397, i32 142, i32 398, i32 143, i32 399, i32 144, i32 400, i32 145, i32 401, i32 146, i32 402, i32 147, i32 403, i32 148, i32 404, i32 149, i32 405, i32 150, i32 406, i32 151, i32 407, i32 152, i32 408, i32 153, i32 409, i32 154, i32 410, i32 155, i32 411, i32 156, i32 412, i32 157, i32 413, i32 158, i32 414, i32 159, i32 415, i32 160, i32 416, i32 161, i32 417, i32 162, i32 418, i32 163, i32 419, i32 164, i32 420, i32 165, i32 421, i32 166, i32 422, i32 167, i32 423, i32 168, i32 424, i32 169, i32 425, i32 170, i32 426, i32 171, i32 427, i32 172, i32 428, i32 173, i32 429, i32 174, i32 430, i32 175, i32 431, i32 176, i32 432, i32 177, i32 433, i32 178, i32 434, i32 179, i32 435, i32 180, i32 436, i32 181, i32 437, i32 182, i32 438, i32 183, i32 439, i32 184, i32 440, i32 185, i32 441, i32 186, i32 442, i32 187, i32 443, i32 188, i32 444, i32 189, i32 445, i32 190, i32 446, i32 191, i32 447, i32 192, i32 448, i32 193, i32 449, i32 194, i32 450, i32 195, i32 451, i32 196, i32 452, i32 197, i32 453, i32 198, i32 454, i32 199, i32 455, i32 200, i32 456, i32 201, i32 457, i32 202, i32 458, i32 203, i32 459, i32 204, i32 460, i32 205, i32 461, i32 206, i32 462, i32 207, i32 463, i32 208, i32 464, i32 209, i32 465, i32 210, i32 466, i32 211, i32 467, i32 212, i32 468, i32 213, i32 469, i32 214, i32 470, i32 215, i32 471, i32 216, i32 472, i32 217, i32 473, i32 218, i32 474, i32 219, i32 475, i32 220, i32 476, i32 221, i32 477, i32 222, i32 478, i32 223, i32 479, i32 224, i32 480, i32 225, i32 481, i32 226, i32 482, i32 227, i32 483, i32 228, i32 484, i32 229, i32 485, i32 230, i32 486, i32 231, i32 487, i32 232, i32 488, i32 233, i32 489, i32 234, i32 490, i32 235, i32 491, i32 236, i32 492, i32 237, i32 493, i32 238, i32 494, i32 239, i32 495, i32 240, i32 496, i32 241, i32 497, i32 242, i32 498, i32 243, i32 499, i32 244, i32 500, i32 245, i32 501, i32 246, i32 502, i32 247, i32 503, i32 248, i32 504, i32 249, i32 505, i32 250, i32 506, i32 251, i32 507, i32 252, i32 508, i32 253, i32 509, i32 254, i32 510, i32 255, i32 511>
+ %25 = shufflevector <512 x float> %24, <512 x float> undef, <1024 x i32> <i32 0, i32 512, i32 1, i32 513, i32 2, i32 514, i32 3, i32 515, i32 4, i32 516, i32 5, i32 517, i32 6, i32 518, i32 7, i32 519, i32 8, i32 520, i32 9, i32 521, i32 10, i32 522, i32 11, i32 523, i32 12, i32 524, i32 13, i32 525, i32 14, i32 526, i32 15, i32 527, i32 16, i32 528, i32 17, i32 529, i32 18, i32 530, i32 19, i32 531, i32 20, i32 532, i32 21, i32 533, i32 22, i32 534, i32 23, i32 535, i32 24, i32 536, i32 25, i32 537, i32 26, i32 538, i32 27, i32 539, i32 28, i32 540, i32 29, i32 541, i32 30, i32 542, i32 31, i32 543, i32 32, i32 544, i32 33, i32 545, i32 34, i32 546, i32 35, i32 547, i32 36, i32 548, i32 37, i32 549, i32 38, i32 550, i32 39, i32 551, i32 40, i32 552, i32 41, i32 553, i32 42, i32 554, i32 43, i32 555, i32 44, i32 556, i32 45, i32 557, i32 46, i32 558, i32 47, i32 559, i32 48, i32 560, i32 49, i32 561, i32 50, i32 562, i32 51, i32 563, i32 52, i32 564, i32 53, i32 565, i32 54, i32 566, i32 55, i32 567, i32 56, i32 568, i32 57, i32 569, i32 58, i32 570, i32 59, i32 571, i32 60, i32 572, i32 61, i32 573, i32 62, i32 574, i32 63, i32 575, i32 64, i32 576, i32 65, i32 577, i32 66, i32 578, i32 67, i32 579, i32 68, i32 580, i32 69, i32 581, i32 70, i32 582, i32 71, i32 583, i32 72, i32 584, i32 73, i32 585, i32 74, i32 586, i32 75, i32 587, i32 76, i32 588, i32 77, i32 589, i32 78, i32 590, i32 79, i32 591, i32 80, i32 592, i32 81, i32 593, i32 82, i32 594, i32 83, i32 595, i32 84, i32 596, i32 85, i32 597, i32 86, i32 598, i32 87, i32 599, i32 88, i32 600, i32 89, i32 601, i32 90, i32 602, i32 91, i32 603, i32 92, i32 604, i32 93, i32 605, i32 94, i32 606, i32 95, i32 607, i32 96, i32 608, i32 97, i32 609, i32 98, i32 610, i32 99, i32 611, i32 100, i32 612, i32 101, i32 613, i32 102, i32 614, i32 103, i32 615, i32 104, i32 616, i32 105, i32 617, i32 106, i32 618, i32 107, i32 619, i32 108, i32 620, i32 109, i32 621, i32 110, i32 622, i32 111, i32 623, i32 112, i32 624, i32 113, i32 625, i32 114, i32 626, i32 115, i32 627, i32 116, i32 628, i32 117, i32 629, i32 118, i32 630, i32 119, i32 631, i32 120, i32 632, i32 121, i32 633, i32 122, i32 634, i32 123, i32 635, i32 124, i32 636, i32 125, i32 637, i32 126, i32 638, i32 127, i32 639, i32 128, i32 640, i32 129, i32 641, i32 130, i32 642, i32 131, i32 643, i32 132, i32 644, i32 133, i32 645, i32 134, i32 646, i32 135, i32 647, i32 136, i32 648, i32 137, i32 649, i32 138, i32 650, i32 139, i32 651, i32 140, i32 652, i32 141, i32 653, i32 142, i32 654, i32 143, i32 655, i32 144, i32 656, i32 145, i32 657, i32 146, i32 658, i32 147, i32 659, i32 148, i32 660, i32 149, i32 661, i32 150, i32 662, i32 151, i32 663, i32 152, i32 664, i32 153, i32 665, i32 154, i32 666, i32 155, i32 667, i32 156, i32 668, i32 157, i32 669, i32 158, i32 670, i32 159, i32 671, i32 160, i32 672, i32 161, i32 673, i32 162, i32 674, i32 163, i32 675, i32 164, i32 676, i32 165, i32 677, i32 166, i32 678, i32 167, i32 679, i32 168, i32 680, i32 169, i32 681, i32 170, i32 682, i32 171, i32 683, i32 172, i32 684, i32 173, i32 685, i32 174, i32 686, i32 175, i32 687, i32 176, i32 688, i32 177, i32 689, i32 178, i32 690, i32 179, i32 691, i32 180, i32 692, i32 181, i32 693, i32 182, i32 694, i32 183, i32 695, i32 184, i32 696, i32 185, i32 697, i32 186, i32 698, i32 187, i32 699, i32 188, i32 700, i32 189, i32 701, i32 190, i32 702, i32 191, i32 703, i32 192, i32 704, i32 193, i32 705, i32 194, i32 706, i32 195, i32 707, i32 196, i32 708, i32 197, i32 709, i32 198, i32 710, i32 199, i32 711, i32 200, i32 712, i32 201, i32 713, i32 202, i32 714, i32 203, i32 715, i32 204, i32 716, i32 205, i32 717, i32 206, i32 718, i32 207, i32 719, i32 208, i32 720, i32 209, i32 721, i32 210, i32 722, i32 211, i32 723, i32 212, i32 724, i32 213, i32 725, i32 214, i32 726, i32 215, i32 727, i32 216, i32 728, i32 217, i32 729, i32 218, i32 730, i32 219, i32 731, i32 220, i32 732, i32 221, i32 733, i32 222, i32 734, i32 223, i32 735, i32 224, i32 736, i32 225, i32 737, i32 226, i32 738, i32 227, i32 739, i32 228, i32 740, i32 229, i32 741, i32 230, i32 742, i32 231, i32 743, i32 232, i32 744, i32 233, i32 745, i32 234, i32 746, i32 235, i32 747, i32 236, i32 748, i32 237, i32 749, i32 238, i32 750, i32 239, i32 751, i32 240, i32 752, i32 241, i32 753, i32 242, i32 754, i32 243, i32 755, i32 244, i32 756, i32 245, i32 757, i32 246, i32 758, i32 247, i32 759, i32 248, i32 760, i32 249, i32 761, i32 250, i32 762, i32 251, i32 763, i32 252, i32 764, i32 253, i32 765, i32 254, i32 766, i32 255, i32 767, i32 256, i32 768, i32 257, i32 769, i32 258, i32 770, i32 259, i32 771, i32 260, i32 772, i32 261, i32 773, i32 262, i32 774, i32 263, i32 775, i32 264, i32 776, i32 265, i32 777, i32 266, i32 778, i32 267, i32 779, i32 268, i32 780, i32 269, i32 781, i32 270, i32 782, i32 271, i32 783, i32 272, i32 784, i32 273, i32 785, i32 274, i32 786, i32 275, i32 787, i32 276, i32 788, i32 277, i32 789, i32 278, i32 790, i32 279, i32 791, i32 280, i32 792, i32 281, i32 793, i32 282, i32 794, i32 283, i32 795, i32 284, i32 796, i32 285, i32 797, i32 286, i32 798, i32 287, i32 799, i32 288, i32 800, i32 289, i32 801, i32 290, i32 802, i32 291, i32 803, i32 292, i32 804, i32 293, i32 805, i32 294, i32 806, i32 295, i32 807, i32 296, i32 808, i32 297, i32 809, i32 298, i32 810, i32 299, i32 811, i32 300, i32 812, i32 301, i32 813, i32 302, i32 814, i32 303, i32 815, i32 304, i32 816, i32 305, i32 817, i32 306, i32 818, i32 307, i32 819, i32 308, i32 820, i32 309, i32 821, i32 310, i32 822, i32 311, i32 823, i32 312, i32 824, i32 313, i32 825, i32 314, i32 826, i32 315, i32 827, i32 316, i32 828, i32 317, i32 829, i32 318, i32 830, i32 319, i32 831, i32 320, i32 832, i32 321, i32 833, i32 322, i32 834, i32 323, i32 835, i32 324, i32 836, i32 325, i32 837, i32 326, i32 838, i32 327, i32 839, i32 328, i32 840, i32 329, i32 841, i32 330, i32 842, i32 331, i32 843, i32 332, i32 844, i32 333, i32 845, i32 334, i32 846, i32 335, i32 847, i32 336, i32 848, i32 337, i32 849, i32 338, i32 850, i32 339, i32 851, i32 340, i32 852, i32 341, i32 853, i32 342, i32 854, i32 343, i32 855, i32 344, i32 856, i32 345, i32 857, i32 346, i32 858, i32 347, i32 859, i32 348, i32 860, i32 349, i32 861, i32 350, i32 862, i32 351, i32 863, i32 352, i32 864, i32 353, i32 865, i32 354, i32 866, i32 355, i32 867, i32 356, i32 868, i32 357, i32 869, i32 358, i32 870, i32 359, i32 871, i32 360, i32 872, i32 361, i32 873, i32 362, i32 874, i32 363, i32 875, i32 364, i32 876, i32 365, i32 877, i32 366, i32 878, i32 367, i32 879, i32 368, i32 880, i32 369, i32 881, i32 370, i32 882, i32 371, i32 883, i32 372, i32 884, i32 373, i32 885, i32 374, i32 886, i32 375, i32 887, i32 376, i32 888, i32 377, i32 889, i32 378, i32 890, i32 379, i32 891, i32 380, i32 892, i32 381, i32 893, i32 382, i32 894, i32 383, i32 895, i32 384, i32 896, i32 385, i32 897, i32 386, i32 898, i32 387, i32 899, i32 388, i32 900, i32 389, i32 901, i32 390, i32 902, i32 391, i32 903, i32 392, i32 904, i32 393, i32 905, i32 394, i32 906, i32 395, i32 907, i32 396, i32 908, i32 397, i32 909, i32 398, i32 910, i32 399, i32 911, i32 400, i32 912, i32 401, i32 913, i32 402, i32 914, i32 403, i32 915, i32 404, i32 916, i32 405, i32 917, i32 406, i32 918, i32 407, i32 919, i32 408, i32 920, i32 409, i32 921, i32 410, i32 922, i32 411, i32 923, i32 412, i32 924, i32 413, i32 925, i32 414, i32 926, i32 415, i32 927, i32 416, i32 928, i32 417, i32 929, i32 418, i32 930, i32 419, i32 931, i32 420, i32 932, i32 421, i32 933, i32 422, i32 934, i32 423, i32 935, i32 424, i32 936, i32 425, i32 937, i32 426, i32 938, i32 427, i32 939, i32 428, i32 940, i32 429, i32 941, i32 430, i32 942, i32 431, i32 943, i32 432, i32 944, i32 433, i32 945, i32 434, i32 946, i32 435, i32 947, i32 436, i32 948, i32 437, i32 949, i32 438, i32 950, i32 439, i32 951, i32 440, i32 952, i32 441, i32 953, i32 442, i32 954, i32 443, i32 955, i32 444, i32 956, i32 445, i32 957, i32 446, i32 958, i32 447, i32 959, i32 448, i32 960, i32 449, i32 961, i32 450, i32 962, i32 451, i32 963, i32 452, i32 964, i32 453, i32 965, i32 454, i32 966, i32 455, i32 967, i32 456, i32 968, i32 457, i32 969, i32 458, i32 970, i32 459, i32 971, i32 460, i32 972, i32 461, i32 973, i32 462, i32 974, i32 463, i32 975, i32 464, i32 976, i32 465, i32 977, i32 466, i32 978, i32 467, i32 979, i32 468, i32 980, i32 469, i32 981, i32 470, i32 982, i32 471, i32 983, i32 472, i32 984, i32 473, i32 985, i32 474, i32 986, i32 475, i32 987, i32 476, i32 988, i32 477, i32 989, i32 478, i32 990, i32 479, i32 991, i32 480, i32 992, i32 481, i32 993, i32 482, i32 994, i32 483, i32 995, i32 484, i32 996, i32 485, i32 997, i32 486, i32 998, i32 487, i32 999, i32 488, i32 1000, i32 489, i32 1001, i32 490, i32 1002, i32 491, i32 1003, i32 492, i32 1004, i32 493, i32 1005, i32 494, i32 1006, i32 495, i32 1007, i32 496, i32 1008, i32 497, i32 1009, i32 498, i32 1010, i32 499, i32 1011, i32 500, i32 1012, i32 501, i32 1013, i32 502, i32 1014, i32 503, i32 1015, i32 504, i32 1016, i32 505, i32 1017, i32 506, i32 1018, i32 507, i32 1019, i32 508, i32 1020, i32 509, i32 1021, i32 510, i32 1022, i32 511, i32 1023>
+ %26 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ store <4 x float> %26, <4 x float>* undef, align 128
+ %27 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 56, i32 57, i32 58, i32 59>
+ store <4 x float> %27, <4 x float>* undef, align 128
+ %28 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 164, i32 165, i32 166, i32 167>
+ store <4 x float> %28, <4 x float>* undef, align 128
+ %29 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 168, i32 169, i32 170, i32 171>
+ store <4 x float> %29, <4 x float>* undef, align 128
+ %30 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 172, i32 173, i32 174, i32 175>
+ store <4 x float> %30, <4 x float>* undef, align 128
+ %31 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 176, i32 177, i32 178, i32 179>
+ store <4 x float> %31, <4 x float>* undef, align 128
+ %32 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 284, i32 285, i32 286, i32 287>
+ store <4 x float> %32, <4 x float>* undef, align 128
+ %33 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 328, i32 329, i32 330, i32 331>
+ store <4 x float> %33, <4 x float>* undef, align 128
+ %34 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 332, i32 333, i32 334, i32 335>
+ store <4 x float> %34, <4 x float>* undef, align 128
+ %35 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 524, i32 525, i32 526, i32 527>
+ store <4 x float> %35, <4 x float>* undef, align 128
+ %36 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 528, i32 529, i32 530, i32 531>
+ store <4 x float> %36, <4 x float>* undef, align 128
+ %37 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 648, i32 649, i32 650, i32 651>
+ store <4 x float> %37, <4 x float>* undef, align 128
+ %38 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 652, i32 653, i32 654, i32 655>
+ store <4 x float> %38, <4 x float>* undef, align 128
+ %39 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 656, i32 657, i32 658, i32 659>
+ store <4 x float> %39, <4 x float>* undef, align 128
+ %40 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 732, i32 733, i32 734, i32 735>
+ store <4 x float> %40, <4 x float>* undef, align 128
+ %41 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 736, i32 737, i32 738, i32 739>
+ store <4 x float> %41, <4 x float>* undef, align 128
+ %42 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 740, i32 741, i32 742, i32 743>
+ store <4 x float> %42, <4 x float>* undef, align 128
+ %43 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 872, i32 873, i32 874, i32 875>
+ store <4 x float> %43, <4 x float>* undef, align 128
+ %44 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 968, i32 969, i32 970, i32 971>
+ store <4 x float> %44, <4 x float>* undef, align 128
+ %45 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 1016, i32 1017, i32 1018, i32 1019>
+ store <4 x float> %45, <4 x float>* undef, align 128
+ %46 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 1020, i32 1021, i32 1022, i32 1023>
+ store <4 x float> %46, <4 x float>* undef, align 128
+ %47 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+ store <4 x float> %47, <4 x float>* undef, align 128
+ %48 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+ store <4 x float> %48, <4 x float>* undef, align 128
+ %49 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+ store <4 x float> %49, <4 x float>* undef, align 128
+ %50 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
+ store <4 x float> %50, <4 x float>* undef, align 128
+ %51 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> <i32 148, i32 149, i32 150, i32 151>
+ store <4 x float> %51, <4 x float>* undef, align 128
+ %52 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> <i32 632, i32 633, i32 634, i32 635>
+ store <4 x float> %52, <4 x float>* undef, align 128
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll b/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll
index 1d9b64823140..4868a18a95a0 100644
--- a/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll
+++ b/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll
@@ -24,8 +24,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z6testfcc
; CHECK: mtvsrwz [[MOVEREG01:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG01]]
+; CHECK: xscvuxdsp 1, [[MOVEREG01]]
}
; Function Attrs: nounwind
@@ -77,8 +76,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z7testfuch
; CHECK: mtvsrwz [[MOVEREG03:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG03]]
+; CHECK: xscvuxdsp 1, [[MOVEREG03]]
}
; Function Attrs: nounwind
@@ -130,8 +128,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z6testfss
; CHECK: mtvsrwa [[MOVEREG05:[0-9]+]], 3
-; FIXME: Once we have XSCVSXDSP implemented, this will change
-; CHECK: fcfids 1, [[MOVEREG05]]
+; CHECK: xscvsxdsp 1, [[MOVEREG05]]
}
; Function Attrs: nounwind
@@ -183,8 +180,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z7testfust
; CHECK: mtvsrwz [[MOVEREG07:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG07]]
+; CHECK: xscvuxdsp 1, [[MOVEREG07]]
}
; Function Attrs: nounwind
@@ -236,8 +232,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z6testfii
; CHECK: mtvsrwa [[MOVEREG09:[0-9]+]], 3
-; FIXME: Once we have XSCVSXDSP implemented, this will change
-; CHECK: fcfids 1, [[MOVEREG09]]
+; CHECK: xscvsxdsp 1, [[MOVEREG09]]
}
; Function Attrs: nounwind
@@ -289,8 +284,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z7testfuij
; CHECK: mtvsrwz [[MOVEREG11:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG11]]
+; CHECK: xscvuxdsp 1, [[MOVEREG11]]
}
; Function Attrs: nounwind
@@ -342,8 +336,7 @@ entry:
ret float %conv
; CHECK-LABEL:@_Z7testfllx
; CHECK: mtvsrd [[MOVEREG13:[0-9]+]], 3
-; FIXME: Once we have XSCVSXDSP implemented, this will change
-; CHECK: fcfids 1, [[MOVEREG13]]
+; CHECK: xscvsxdsp 1, [[MOVEREG13]]
}
; Function Attrs: nounwind
@@ -395,8 +388,7 @@ entry:
ret float %conv
; CHECK-LABEL: @_Z8testfully
; CHECK: mtvsrd [[MOVEREG15:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG15]]
+; CHECK: xscvuxdsp 1, [[MOVEREG15]]
}
; Function Attrs: nounwind
diff --git a/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
new file mode 100644
index 000000000000..f5b0a3a59bf3
--- /dev/null
+++ b/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
@@ -0,0 +1,137 @@
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PPC64-P8
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PPC64
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PPC64-P8
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PPC64
+; RUN: llc -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC32
+
+define i128 @test_abs(ppc_fp128 %x) nounwind {
+entry:
+; PPC64-LABEL: test_abs:
+; PPC64-DAG: stxsdx 2, 0, [[ADDR_HI:[0-9]+]]
+; PPC64-DAG: stxsdx 1, 0, [[ADDR_LO:[0-9]+]]
+; PPC64-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]]
+; PPC64-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]]
+; PPC64-DAG: li [[MASK_REG:[0-9]+]], 1
+; PPC64: sldi [[MASK_REG]], [[MASK_REG]], 63
+; PPC64-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]])
+; PPC64-DAG: ld [[LO:[0-9]+]], [[OFFSET_HI]]([[SP]])
+; PPC64: and [[FLIP_BIT:[0-9]+]], [[HI]], [[MASK_REG]]
+; PPC64-DAG: xor 3, [[HI]], [[FLIP_BIT]]
+; PPC64-DAG: xor 4, [[LO]], [[FLIP_BIT]]
+; PPC64: blr
+
+; PPC64-P8-LABEL: test_abs:
+; PPC64-P8-DAG: mfvsrd [[LO:[0-9]+]], 2
+; PPC64-P8-DAG: mfvsrd [[HI:[0-9]+]], 1
+; PPC64-P8-DAG: li [[MASK_REG:[0-9]+]], 1
+; PPC64-P8-DAG: sldi [[SHIFT_REG:[0-9]+]], [[MASK_REG]], 63
+; PPC64-P8: and [[FLIP_BIT:[0-9]+]], [[HI]], [[SHIFT_REG]]
+; PPC64-P8-DAG: xor 3, [[HI]], [[FLIP_BIT]]
+; PPC64-P8-DAG: xor 4, [[LO]], [[FLIP_BIT]]
+; PPC64-P8: blr
+
+; PPC32-DAG: stfd 1, 24(1)
+; PPC32-DAG: stfd 2, 16(1)
+; PPC32: nop
+; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1)
+; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1)
+; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1)
+; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1)
+; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0
+; PPC32-DAG: xor [[HI0]], [[HI0]], [[FLIP_BIT]]
+; PPC32-DAG: xor [[LO0]], [[LO0]], [[FLIP_BIT]]
+; PPC32: blr
+ %0 = tail call ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128 %x)
+ %1 = bitcast ppc_fp128 %0 to i128
+ ret i128 %1
+}
+
+define i128 @test_neg(ppc_fp128 %x) nounwind {
+entry:
+; PPC64-LABEL: test_neg:
+; PPC64-DAG: stxsdx 2, 0, [[ADDR_HI:[0-9]+]]
+; PPC64-DAG: stxsdx 1, 0, [[ADDR_LO:[0-9]+]]
+; PPC64-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]]
+; PPC64-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]]
+; PPC64-DAG: li [[FLIP_BIT:[0-9]+]], 1
+; PPC64-DAG: sldi [[FLIP_BIT]], [[FLIP_BIT]], 63
+; PPC64-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]])
+; PPC64-DAG: ld [[LO:[0-9]+]], [[OFFSET_HI]]([[SP]])
+; PPC64-NOT: BARRIER
+; PPC64-DAG: xor 3, [[HI]], [[FLIP_BIT]]
+; PPC64-DAG: xor 4, [[LO]], [[FLIP_BIT]]
+; PPC64: blr
+
+; PPC64-P8-LABEL: test_neg:
+; PPC64-P8-DAG: mfvsrd [[LO:[0-9]+]], 2
+; PPC64-P8-DAG: mfvsrd [[HI:[0-9]+]], 1
+; PPC64-P8-DAG: li [[IMM1:[0-9]+]], 1
+; PPC64-P8-DAG: sldi [[FLIP_BIT]], [[IMM1]], 63
+; PPC64-P8-NOT: BARRIER
+; PPC64-P8-DAG: xor 3, [[HI]], [[FLIP_BIT]]
+; PPC64-P8-DAG: xor 4, [[LO]], [[FLIP_BIT]]
+; PPC64-P8: blr
+
+; PPC32-DAG: stfd 1, 24(1)
+; PPC32-DAG: stfd 2, 16(1)
+; PPC32: nop
+; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1)
+; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1)
+; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1)
+; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1)
+; PPC32-NOT: BARRIER
+; PPC32-DAG: xoris [[HI0]], [[HI0]], 32768
+; PPC32-DAG: xoris [[LO0]], [[LO0]], 32768
+; PPC32: blr
+ %0 = fsub ppc_fp128 0xM80000000000000000000000000000000, %x
+ %1 = bitcast ppc_fp128 %0 to i128
+ ret i128 %1
+}
+
+define i128 @test_copysign(ppc_fp128 %x) nounwind {
+entry:
+; PPC64-LABEL: test_copysign:
+; PPC64-DAG: stxsdx 1, 0, [[ADDR_REG:[0-9]+]]
+; PPC64-DAG: addi [[ADDR_REG]], 1, [[OFFSET:-?[0-9]+]]
+; PPC64-DAG: li [[SIGN:[0-9]+]], 1
+; PPC64-DAG: sldi [[SIGN]], [[SIGN]], 63
+; PPC64-DAG: li [[HI_TMP:[0-9]+]], 16399
+; PPC64-DAG: sldi [[CST_HI:[0-9]+]], [[HI_TMP]], 48
+; PPC64-DAG: li [[LO_TMP:[0-9]+]], 3019
+; PPC64-DAG: sldi [[CST_LO:[0-9]+]], [[LO_TMP]], 52
+; PPC64-NOT: BARRIER
+; PPC64-DAG: ld [[X_HI:[0-9]+]], [[OFFSET]](1)
+; PPC64-DAG: and [[NEW_HI_TMP:[0-9]+]], [[X_HI]], [[SIGN]]
+; PPC64-DAG: or 3, [[NEW_HI_TMP]], [[CST_HI]]
+; PPC64-DAG: xor 4, [[SIGN]], [[CST_LO]]
+; PPC64: blr
+
+; PPC64-P8-LABEL: test_copysign:
+; PPC64-P8-DAG: mfvsrd [[X_HI:[0-9]+]], 1
+; PPC64-P8-DAG: li [[SIGN:[0-9]+]], 1
+; PPC64-P8-DAG: sldi [[SIGN]], [[SIGN]], 63
+; PPC64-P8-DAG: li [[HI_TMP:[0-9]+]], 16399
+; PPC64-P8-DAG: sldi [[CST_HI:[0-9]+]], [[HI_TMP]], 48
+; PPC64-P8-DAG: li [[LO_TMP:[0-9]+]], 3019
+; PPC64-P8-DAG: sldi [[CST_LO:[0-9]+]], [[LO_TMP]], 52
+; PPC64-P8-NOT: BARRIER
+; PPC64-P8-DAG: and [[NEW_HI_TMP:[0-9]+]], [[X_HI]], [[SIGN]]
+; PPC64-P8-DAG: or 3, [[NEW_HI_TMP]], [[CST_HI]]
+; PPC64-P8-DAG: xor 4, [[NEW_HI_TMP]], [[CST_LO]]
+; PPC64-P8: blr
+
+; PPC32: stfd 1, [[STACK:[0-9]+]](1)
+; PPC32: nop
+; PPC32: lwz [[HI:[0-9]+]], [[STACK]](1)
+; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI]], 0, 0, 0
+; PPC32-NOT: BARRIER
+; PPC32-DAG: oris {{[0-9]+}}, [[FLIP_BIT]], 16399
+; PPC32-DAG: xoris {{[0-9]+}}, [[FLIP_BIT]], 48304
+; PPC32: blr
+ %0 = tail call ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128 0xMBCB0000000000000400F000000000000, ppc_fp128 %x)
+ %1 = bitcast ppc_fp128 %0 to i128
+ ret i128 %1
+}
+
+declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128)
+declare ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128, ppc_fp128)
diff --git a/test/CodeGen/PowerPC/load-shift-combine.ll b/test/CodeGen/PowerPC/load-shift-combine.ll
index 8d1f8146db95..3b4685725216 100644
--- a/test/CodeGen/PowerPC/load-shift-combine.ll
+++ b/test/CodeGen/PowerPC/load-shift-combine.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s
+; REQUIRES: default_triple
; This used to cause a crash. A standard load is converted to a pre-increment
; load. Later the pre-increment load is combined with a subsequent SRL to
diff --git a/test/CodeGen/PowerPC/long-compare.ll b/test/CodeGen/PowerPC/long-compare.ll
index e53356a5ddf2..d596068cbb71 100644
--- a/test/CodeGen/PowerPC/long-compare.ll
+++ b/test/CodeGen/PowerPC/long-compare.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | grep cntlz
+; RUN: llc < %s -march=ppc32 | grep cntlzw
; RUN: llc < %s -march=ppc32 | not grep xori
; RUN: llc < %s -march=ppc32 | not grep "li "
; RUN: llc < %s -march=ppc32 | not grep "mr "
diff --git a/test/CodeGen/PowerPC/machine-combiner.ll b/test/CodeGen/PowerPC/machine-combiner.ll
new file mode 100644
index 000000000000..93fb2020d530
--- /dev/null
+++ b/test/CodeGen/PowerPC/machine-combiner.ll
@@ -0,0 +1,188 @@
+; RUN: llc -O3 -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR
+; RUN: llc -O3 -mcpu=a2q -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Verify that the first two adds are independent regardless of how the inputs are
+; commuted. The destination registers are used as source registers for the third add.
+
+define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds1:
+; CHECK: # BB#0:
+; CHECK: fadds [[REG0:[0-9]+]], 1, 2
+; CHECK: fadds [[REG1:[0-9]+]], 3, 4
+; CHECK: fadds 1, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %t0, %x2
+ %t2 = fadd float %t1, %x3
+ ret float %t2
+}
+
+define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds2:
+; CHECK: # BB#0:
+; CHECK: fadds [[REG0:[0-9]+]], 1, 2
+; CHECK: fadds [[REG1:[0-9]+]], 3, 4
+; CHECK: fadds 1, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %x2, %t0
+ %t2 = fadd float %t1, %x3
+ ret float %t2
+}
+
+define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds3:
+; CHECK: # BB#0:
+; CHECK: fadds [[REG0:[0-9]+]], 1, 2
+; CHECK: fadds [[REG1:[0-9]+]], 3, 4
+; CHECK: fadds 1, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %t0, %x2
+ %t2 = fadd float %x3, %t1
+ ret float %t2
+}
+
+define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds4:
+; CHECK: # BB#0:
+; CHECK: fadds [[REG0:[0-9]+]], 1, 2
+; CHECK: fadds [[REG1:[0-9]+]], 3, 4
+; CHECK: fadds 1, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %x2, %t0
+ %t2 = fadd float %x3, %t1
+ ret float %t2
+}
+
+; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
+; produced because that would cost more compile time.
+
+define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
+; CHECK-LABEL: reassociate_adds5:
+; CHECK: # BB#0:
+; CHECK: fadds [[REG12:[0-9]+]], 5, 6
+; CHECK: fadds [[REG0:[0-9]+]], 1, 2
+; CHECK: fadds [[REG11:[0-9]+]], 3, 4
+; CHECK: fadds [[REG13:[0-9]+]], [[REG12]], 7
+; CHECK: fadds [[REG1:[0-9]+]], [[REG0]], [[REG11]]
+; CHECK: fadds [[REG2:[0-9]+]], [[REG1]], [[REG13]]
+; CHECK: fadds 1, [[REG2]], 8
+; CHECK-NEXT: blr
+
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %t0, %x2
+ %t2 = fadd float %t1, %x3
+ %t3 = fadd float %t2, %x4
+ %t4 = fadd float %t3, %x5
+ %t5 = fadd float %t4, %x6
+ %t6 = fadd float %t5, %x7
+ ret float %t6
+}
+
+; Verify that we reassociate vector instructions too.
+
+define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds1:
+; CHECK: # BB#0:
+; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
+; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
+; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fadd <4 x float> %t0, %x2
+ %t2 = fadd <4 x float> %t1, %x3
+ ret <4 x float> %t2
+}
+
+define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds2:
+; CHECK: # BB#0:
+; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
+; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
+; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fadd <4 x float> %x2, %t0
+ %t2 = fadd <4 x float> %t1, %x3
+ ret <4 x float> %t2
+}
+
+define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds3:
+; CHECK: # BB#0:
+; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
+; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
+; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fadd <4 x float> %t0, %x2
+ %t2 = fadd <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
+define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds4:
+; CHECK: # BB#0:
+; CHECK-QPX: qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX: qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX: qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
+; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
+; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-NEXT: blr
+
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fadd <4 x float> %x2, %t0
+ %t2 = fadd <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
+define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
+ %t0 = fdiv float %x0, %x1
+ %t1 = fadd float %x2, %t0
+ %t2 = fadd float %x3, %t1
+ ret float %t2
+}
+
+define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
+ %t0 = fdiv float %x0, %x1
+ %t1 = fmul float %x2, %t0
+ %t2 = fmul float %x3, %t1
+ ret float %t2
+}
+
+define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
+ %t0 = fdiv double %x0, %x1
+ %t1 = fadd double %x2, %t0
+ %t2 = fadd double %x3, %t1
+ ret double %t2
+}
+
+define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
+ %t0 = fdiv double %x0, %x1
+ %t1 = fmul double %x2, %t0
+ %t2 = fmul double %x3, %t1
+ ret double %t2
+}
+
+
diff --git a/test/CodeGen/PowerPC/mc-instrlat.ll b/test/CodeGen/PowerPC/mc-instrlat.ll
new file mode 100644
index 000000000000..0bbac14f6d3e
--- /dev/null
+++ b/test/CodeGen/PowerPC/mc-instrlat.ll
@@ -0,0 +1,25 @@
+; RUN: llc -O3 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double %eps) #0 {
+entry:
+ %0 = fmul fast double %eps, %eps
+ %div = fmul fast double %0, 0x3FD5555555555555
+ tail call void @bar(double %div) #2
+ unreachable
+
+; This used to crash because we'd call a function to compute instruction
+; latency not supported with itineraries.
+; CHECK-LABEL: @foo
+; CHECK: bar
+
+}
+
+declare void @bar(double) #1
+
+attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/mcm-13.ll b/test/CodeGen/PowerPC/mcm-13.ll
new file mode 100644
index 000000000000..ba371c5026c1
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-13.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing a weak variable
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@wi = weak global i32 0, align 4
+
+define signext i32 @test_weak() nounwind {
+entry:
+ %0 = load i32, i32* @wi, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* @wi, align 4
+ ret i32 %0
+}
+
+; CHECK-LABEL: test_weak:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/memcpy-vec.ll b/test/CodeGen/PowerPC/memcpy-vec.ll
index 70b8ea931a27..29baef55ce17 100644
--- a/test/CodeGen/PowerPC/memcpy-vec.ll
+++ b/test/CodeGen/PowerPC/memcpy-vec.ll
@@ -14,8 +14,11 @@ entry:
; PWR7-LABEL: @foo1
; PWR7-NOT: bl memcpy
-; PWR7: ld {{[0-9]+}}, {{[0-9]+}}(4)
-; PWR7: std {{[0-9]+}}, {{[0-9]+}}(3)
+; PWR7-DAG: li [[OFFSET:[0-9]+]], 16
+; PWR7-DAG: lxvd2x [[TMP0:[0-9]+]], 4, [[OFFSET]]
+; PWR7-DAG: stxvd2x [[TMP0]], 0, 3
+; PWR7-DAG: lxvd2x [[TMP1:[0-9]+]], 0, 4
+; PWR7-DAG: stxvd2x [[TMP1]], 0, 3
; PWR7: blr
; PWR8-LABEL: @foo1
diff --git a/test/CodeGen/PowerPC/merge-st-chain-op.ll b/test/CodeGen/PowerPC/merge-st-chain-op.ll
new file mode 100644
index 000000000000..bfb911c01157
--- /dev/null
+++ b/test/CodeGen/PowerPC/merge-st-chain-op.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@_ZNSs4_Rep20_S_empty_rep_storageE = external global [0 x i64], align 8
+
+; Function Attrs: nounwind
+define void @_ZN5clang7tooling15RefactoringTool10runAndSaveEPNS0_21FrontendActionFactoryE() #0 align 2 {
+entry:
+ br i1 undef, label %_ZN4llvm18IntrusiveRefCntPtrIN5clang13DiagnosticIDsEEC2EPS2_.exit, label %return
+
+; CHECK: @_ZN5clang7tooling15RefactoringTool10runAndSaveEPNS0_21FrontendActionFactoryE
+
+_ZN4llvm18IntrusiveRefCntPtrIN5clang13DiagnosticIDsEEC2EPS2_.exit: ; preds = %entry
+ %call2 = call noalias i8* @_Znwm() #3
+ %ref_cnt.i.i = bitcast i8* %call2 to i32*
+ store <2 x i8*> <i8* bitcast (i64* getelementptr inbounds ([0 x i64], [0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE, i64 0, i64 3) to i8*), i8* bitcast (i64* getelementptr inbounds ([0 x i64], [0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE, i64 0, i64 3) to i8*)>, <2 x i8*>* undef, align 8
+ %IgnoreWarnings.i = getelementptr inbounds i8, i8* %call2, i64 4
+ %0 = bitcast i8* %IgnoreWarnings.i to i32*
+ call void @llvm.memset.p0i8.i64(i8* null, i8 0, i64 48, i32 8, i1 false) #4
+ store i32 251658240, i32* %0, align 4
+ store i256 37662610426935100959726589394453639584271499769928088551424, i256* null, align 8
+ store i32 1, i32* %ref_cnt.i.i, align 4
+ unreachable
+
+return: ; preds = %entry
+ ret void
+}
+
+; Function Attrs: nobuiltin
+declare noalias i8* @_Znwm() #1
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #2
+
+attributes #0 = { nounwind "target-cpu"="pwr7" }
+attributes #1 = { nobuiltin "target-cpu"="pwr7" }
+attributes #2 = { nounwind argmemonly }
+attributes #3 = { builtin nounwind }
+attributes #4 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
new file mode 100644
index 000000000000..8da8df58a85c
--- /dev/null
+++ b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -0,0 +1,1476 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-LE
+
+; The build[csilf] functions simply test the scalar_to_vector handling with
+; direct moves. This corresponds to the "insertelement" instruction. Subsequent
+; to this, there will be a splat corresponding to the shufflevector.
+
+@d = common global double 0.000000e+00, align 8
+
+; Function Attrs: nounwind
+define <16 x i8> @buildc(i8 zeroext %a) {
+entry:
+ %a.addr = alloca i8, align 1
+ store i8 %a, i8* %a.addr, align 1
+ %0 = load i8, i8* %a.addr, align 1
+ %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0
+ %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %splat.splat
+; CHECK: sldi [[REG1:[0-9]+]], 3, 56
+; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
+; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+}
+
+; Function Attrs: nounwind
+define <8 x i16> @builds(i16 zeroext %a) {
+entry:
+ %a.addr = alloca i16, align 2
+ store i16 %a, i16* %a.addr, align 2
+ %0 = load i16, i16* %a.addr, align 2
+ %splat.splatinsert = insertelement <8 x i16> undef, i16 %0, i32 0
+ %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %splat.splat
+; CHECK: sldi [[REG1:[0-9]+]], 3, 48
+; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
+; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+}
+
+; Function Attrs: nounwind
+define <4 x i32> @buildi(i32 zeroext %a) {
+entry:
+ %a.addr = alloca i32, align 4
+ store i32 %a, i32* %a.addr, align 4
+ %0 = load i32, i32* %a.addr, align 4
+ %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
+ %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %splat.splat
+; CHECK: sldi [[REG1:[0-9]+]], 3, 32
+; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
+; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+}
+
+; Function Attrs: nounwind
+define <2 x i64> @buildl(i64 %a) {
+entry:
+ %a.addr = alloca i64, align 8
+ store i64 %a, i64* %a.addr, align 8
+ %0 = load i64, i64* %a.addr, align 8
+ %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0
+ %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %splat.splat
+; CHECK: mtvsrd {{[0-9]+}}, 3
+; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
+; CHECK-LE: xxspltd [[REG1]], [[REG1]], 0
+}
+
+; Function Attrs: nounwind
+define <4 x float> @buildf(float %a) {
+entry:
+ %a.addr = alloca float, align 4
+ store float %a, float* %a.addr, align 4
+ %0 = load float, float* %a.addr, align 4
+ %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
+ %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
+ ret <4 x float> %splat.splat
+; CHECK: xscvdpspn {{[0-9]+}}, 1
+; CHECK-LE: xscvdpspn [[REG1:[0-9]+]], 1
+; CHECK-LE: xxsldwi {{[0-9]+}}, [[REG1]], [[REG1]], 1
+}
+
+; The optimization to remove stack operations from PPCDAGToDAGISel::Select
+; should still trigger for v2f64, producing an lxvdsx.
+; Function Attrs: nounwind
+define <2 x double> @buildd() #0 {
+entry:
+ %0 = load double, double* @d, align 8
+ %splat.splatinsert = insertelement <2 x double> undef, double %0, i32 0
+ %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer
+ ret <2 x double> %splat.splat
+; CHECK: ld [[REG1:[0-9]+]], .LC0@toc@l
+; CHECK: lxvdsx 34, 0, [[REG1]]
+; CHECK-LE: ld [[REG1:[0-9]+]], .LC0@toc@l
+; CHECK-LE: lxvdsx 34, 0, [[REG1]]
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc0(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 0
+ ret i8 %vecext
+; CHECK-LABEL: @getsc0
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 8, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc0
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: clrldi 3, 3, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc1(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 1
+ ret i8 %vecext
+; CHECK-LABEL: @getsc1
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 16, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc1
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 56, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc2(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 2
+ ret i8 %vecext
+; CHECK-LABEL: @getsc2
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 24, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc2
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 48, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc3(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 3
+ ret i8 %vecext
+; CHECK-LABEL: @getsc3
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 32, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc3
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 40, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc4(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 4
+ ret i8 %vecext
+; CHECK-LABEL: @getsc4
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 40, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc4
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 32, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc5(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 5
+ ret i8 %vecext
+; CHECK-LABEL: @getsc5
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 48, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc5
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 24, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc6(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 6
+ ret i8 %vecext
+; CHECK-LABEL: @getsc6
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 56, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc6
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 16, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc7(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 7
+ ret i8 %vecext
+; CHECK-LABEL: @getsc7
+; CHECK: mfvsrd 3, 34
+; CHECK: clrldi 3, 3, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc7
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 8, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc8(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 8
+ ret i8 %vecext
+; CHECK-LABEL: @getsc8
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 8, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc8
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: clrldi 3, 3, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc9(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 9
+ ret i8 %vecext
+; CHECK-LABEL: @getsc9
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 16, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc9
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 56, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc10(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 10
+ ret i8 %vecext
+; CHECK-LABEL: @getsc10
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 24, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc10
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 48, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc11(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 11
+ ret i8 %vecext
+; CHECK-LABEL: @getsc11
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 32, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc11
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 40, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc12(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 12
+ ret i8 %vecext
+; CHECK-LABEL: @getsc12
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 40, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc12
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 32, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc13(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 13
+ ret i8 %vecext
+; CHECK-LABEL: @getsc13
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 48, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc13
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 24, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc14(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 14
+ ret i8 %vecext
+; CHECK-LABEL: @getsc14
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 56, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc14
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 16, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc15(<16 x i8> %vsc) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 15
+ ret i8 %vecext
+; CHECK-LABEL: @getsc15
+; CHECK: mfvsrd 3,
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc15
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 8, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc0(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 0
+ ret i8 %vecext
+; CHECK-LABEL: @getuc0
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 8, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc0
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc1(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 1
+ ret i8 %vecext
+; CHECK-LABEL: @getuc1
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 16, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc1
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 56, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc2(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 2
+ ret i8 %vecext
+; CHECK-LABEL: @getuc2
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 24, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc2
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 48, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc3(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 3
+ ret i8 %vecext
+; CHECK-LABEL: @getuc3
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 32, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc3
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 40, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc4(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 4
+ ret i8 %vecext
+; CHECK-LABEL: @getuc4
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 40, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc4
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 32, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc5(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 5
+ ret i8 %vecext
+; CHECK-LABEL: @getuc5
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 48, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc5
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 24, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc6(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 6
+ ret i8 %vecext
+; CHECK-LABEL: @getuc6
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 56, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc6
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 16, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc7(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 7
+ ret i8 %vecext
+; CHECK-LABEL: @getuc7
+; CHECK: mfvsrd 3, 34
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc7
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 8, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc8(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 8
+ ret i8 %vecext
+; CHECK-LABEL: @getuc8
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 8, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc8
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc9(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 9
+ ret i8 %vecext
+; CHECK-LABEL: @getuc9
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 16, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc9
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 56, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc10(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 10
+ ret i8 %vecext
+; CHECK-LABEL: @getuc10
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 24, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc10
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 48, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc11(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 11
+ ret i8 %vecext
+; CHECK-LABEL: @getuc11
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 32, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc11
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 40, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc12(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 12
+ ret i8 %vecext
+; CHECK-LABEL: @getuc12
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 40, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc12
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 32, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc13(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 13
+ ret i8 %vecext
+; CHECK-LABEL: @getuc13
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 48, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc13
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 24, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc14(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 14
+ ret i8 %vecext
+; CHECK-LABEL: @getuc14
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 56, 56
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc14
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 16, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc15(<16 x i8> %vuc) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %vecext = extractelement <16 x i8> %0, i32 15
+ ret i8 %vecext
+; CHECK-LABEL: @getuc15
+; CHECK: mfvsrd 3,
+; CHECK: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getuc15
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 8, 56
+; CHECK-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) {
+entry:
+ %vsc.addr = alloca <16 x i8>, align 16
+ %i.addr = alloca i32, align 4
+ store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <16 x i8> %0, i32 %1
+ ret i8 %vecext
+; CHECK-LABEL: @getvelsc
+; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 8
+; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[ANDI]]
+; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG: li [[IMM7:[0-9]+]], 7
+; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM7]]
+; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 3
+; CHECK-DAG: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG: extsb 3, 3
+; CHECK-LE-LABEL: @getvelsc
+; CHECK-DAG-LE: li [[IMM8:[0-9]+]], 8
+; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM8]]
+; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[ANDC]]
+; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG-LE: li [[IMM7:[0-9]+]], 7
+; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM7]]
+; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 3
+; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) {
+entry:
+ %vuc.addr = alloca <16 x i8>, align 16
+ %i.addr = alloca i32, align 4
+ store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <16 x i8> %0, i32 %1
+ ret i8 %vecext
+; CHECK-LABEL: @getveluc
+; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 8
+; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[ANDI]]
+; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG: li [[IMM7:[0-9]+]], 7
+; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM7]]
+; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 3
+; CHECK-DAG: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG: clrldi 3, 3, 56
+; CHECK-LE-LABEL: @getveluc
+; CHECK-DAG-LE: li [[IMM8:[0-9]+]], 8
+; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM8]]
+; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[ANDC]]
+; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG-LE: li [[IMM7:[0-9]+]], 7
+; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM7]]
+; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 3
+; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG-LE: clrldi 3, 3, 56
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss0(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 0
+ ret i16 %vecext
+; CHECK-LABEL: @getss0
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 16, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss0
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: clrldi 3, 3, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss1(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 1
+ ret i16 %vecext
+; CHECK-LABEL: @getss1
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 32, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss1
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 48, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss2(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 2
+ ret i16 %vecext
+; CHECK-LABEL: @getss2
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 48, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss2
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 32, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss3(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 3
+ ret i16 %vecext
+; CHECK-LABEL: @getss3
+; CHECK: mfvsrd 3, 34
+; CHECK: clrldi 3, 3, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss3
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 16, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss4(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 4
+ ret i16 %vecext
+; CHECK-LABEL: @getss4
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 16, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss4
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: clrldi 3, 3, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss5(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 5
+ ret i16 %vecext
+; CHECK-LABEL: @getss5
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 32, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss5
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 48, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss6(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 6
+ ret i16 %vecext
+; CHECK-LABEL: @getss6
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 48, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss6
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 32, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss7(<8 x i16> %vss) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 7
+ ret i16 %vecext
+; CHECK-LABEL: @getss7
+; CHECK: mfvsrd 3,
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss7
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 16, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus0(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 0
+ ret i16 %vecext
+; CHECK-LABEL: @getus0
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 16, 48
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus0
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus1(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 1
+ ret i16 %vecext
+; CHECK-LABEL: @getus1
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 32, 48
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus1
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 48, 48
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus2(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 2
+ ret i16 %vecext
+; CHECK-LABEL: @getus2
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 48, 48
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus2
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 32, 48
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus3(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 3
+ ret i16 %vecext
+; CHECK-LABEL: @getus3
+; CHECK: mfvsrd 3, 34
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus3
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 16, 48
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus4(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 4
+ ret i16 %vecext
+; CHECK-LABEL: @getus4
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 16, 48
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus4
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus5(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 5
+ ret i16 %vecext
+; CHECK-LABEL: @getus5
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 32, 48
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus5
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 48, 48
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus6(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 6
+ ret i16 %vecext
+; CHECK-LABEL: @getus6
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 48, 48
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus6
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 32, 48
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus7(<8 x i16> %vus) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %vecext = extractelement <8 x i16> %0, i32 7
+ ret i16 %vecext
+; CHECK-LABEL: @getus7
+; CHECK: mfvsrd 3,
+; CHECK: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getus7
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 16, 48
+; CHECK-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) {
+entry:
+ %vss.addr = alloca <8 x i16>, align 16
+ %i.addr = alloca i32, align 4
+ store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <8 x i16> %0, i32 %1
+ ret i16 %vecext
+; CHECK-LABEL: @getvelss
+; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 4
+; CHECK-DAG: sldi [[MUL2:[0-9]+]], [[ANDI]], 1
+; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]]
+; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG: li [[IMM3:[0-9]+]], 3
+; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]]
+; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 4
+; CHECK-DAG: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG: extsh 3, 3
+; CHECK-LE-LABEL: @getvelss
+; CHECK-DAG-LE: li [[IMM4:[0-9]+]], 4
+; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM4]]
+; CHECK-DAG-LE: sldi [[MUL2:[0-9]+]], [[ANDC]], 1
+; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]]
+; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG-LE: li [[IMM3:[0-9]+]], 3
+; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM3]]
+; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 4
+; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) {
+entry:
+ %vus.addr = alloca <8 x i16>, align 16
+ %i.addr = alloca i32, align 4
+ store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <8 x i16> %0, i32 %1
+ ret i16 %vecext
+; CHECK-LABEL: @getvelus
+; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 4
+; CHECK-DAG: sldi [[MUL2:[0-9]+]], [[ANDI]], 1
+; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]]
+; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG: li [[IMM3:[0-9]+]], 3
+; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]]
+; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 4
+; CHECK-DAG: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG: clrldi 3, 3, 48
+; CHECK-LE-LABEL: @getvelus
+; CHECK-DAG-LE: li [[IMM4:[0-9]+]], 4
+; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM4]]
+; CHECK-DAG-LE: sldi [[MUL2:[0-9]+]], [[ANDC]], 1
+; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]]
+; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG-LE: li [[IMM3:[0-9]+]], 3
+; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM3]]
+; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 4
+; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG-LE: clrldi 3, 3, 48
+}
+
+; Function Attrs: nounwind
+define signext i32 @getsi0(<4 x i32> %vsi) {
+entry:
+ %vsi.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 0
+ ret i32 %vecext
+; CHECK-LABEL: @getsi0
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: extsw 3, 3
+; CHECK-LE-LABEL: @getsi0
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: extsw 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i32 @getsi1(<4 x i32> %vsi) {
+entry:
+ %vsi.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 1
+ ret i32 %vecext
+; CHECK-LABEL: @getsi1
+; CHECK: mfvsrwz 3, 34
+; CHECK: extsw 3, 3
+; CHECK-LE-LABEL: @getsi1
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: extsw 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i32 @getsi2(<4 x i32> %vsi) {
+entry:
+ %vsi.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 2
+ ret i32 %vecext
+; CHECK-LABEL: @getsi2
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: extsw 3, 3
+; CHECK-LE-LABEL: @getsi2
+; CHECK-LE: mfvsrwz 3, 34
+; CHECK-LE: extsw 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i32 @getsi3(<4 x i32> %vsi) {
+entry:
+ %vsi.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 3
+ ret i32 %vecext
+; CHECK-LABEL: @getsi3
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: extsw 3, 3
+; CHECK-LE-LABEL: @getsi3
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: extsw 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getui0(<4 x i32> %vui) {
+entry:
+ %vui.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 0
+ ret i32 %vecext
+; CHECK-LABEL: @getui0
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: clrldi 3, 3, 32
+; CHECK-LE-LABEL: @getui0
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: clrldi 3, 3, 32
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getui1(<4 x i32> %vui) {
+entry:
+ %vui.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 1
+ ret i32 %vecext
+; CHECK-LABEL: @getui1
+; CHECK: mfvsrwz 3, 34
+; CHECK: clrldi 3, 3, 32
+; CHECK-LE-LABEL: @getui1
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: clrldi 3, 3, 32
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getui2(<4 x i32> %vui) {
+entry:
+ %vui.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 2
+ ret i32 %vecext
+; CHECK-LABEL: @getui2
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: clrldi 3, 3, 32
+; CHECK-LE-LABEL: @getui2
+; CHECK-LE: mfvsrwz 3, 34
+; CHECK-LE: clrldi 3, 3, 32
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getui3(<4 x i32> %vui) {
+entry:
+ %vui.addr = alloca <4 x i32>, align 16
+ store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+ %vecext = extractelement <4 x i32> %0, i32 3
+ ret i32 %vecext
+; CHECK-LABEL: @getui3
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: clrldi 3, 3, 32
+; CHECK-LE-LABEL: @getui3
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: clrldi 3, 3, 32
+}
+
+; Function Attrs: nounwind
+define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) {
+entry:
+ %vsi.addr = alloca <4 x i32>, align 16
+ %i.addr = alloca i32, align 4
+ store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <4 x i32> %0, i32 %1
+ ret i32 %vecext
+; CHECK-LABEL: @getvelsi
+; CHECK-LE-LABEL: @getvelsi
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) {
+entry:
+ %vui.addr = alloca <4 x i32>, align 16
+ %i.addr = alloca i32, align 4
+ store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <4 x i32> %0, i32 %1
+ ret i32 %vecext
+; CHECK-LABEL: @getvelui
+; CHECK-LE-LABEL: @getvelui
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define i64 @getsl0(<2 x i64> %vsl) {
+entry:
+ %vsl.addr = alloca <2 x i64>, align 16
+ store <2 x i64> %vsl, <2 x i64>* %vsl.addr, align 16
+ %0 = load <2 x i64>, <2 x i64>* %vsl.addr, align 16
+ %vecext = extractelement <2 x i64> %0, i32 0
+ ret i64 %vecext
+; CHECK-LABEL: @getsl0
+; CHECK: mfvsrd 3, 34
+; CHECK-LE-LABEL: @getsl0
+; CHECK-LE: xxswapd [[SWP:[0-9]+]], 34
+; CHECK-LE: mfvsrd 3, [[SWP]]
+}
+
+; Function Attrs: nounwind
+define i64 @getsl1(<2 x i64> %vsl) {
+entry:
+ %vsl.addr = alloca <2 x i64>, align 16
+ store <2 x i64> %vsl, <2 x i64>* %vsl.addr, align 16
+ %0 = load <2 x i64>, <2 x i64>* %vsl.addr, align 16
+ %vecext = extractelement <2 x i64> %0, i32 1
+ ret i64 %vecext
+; CHECK-LABEL: @getsl1
+; CHECK: xxswapd [[SWP:[0-9]+]], 34
+; CHECK: mfvsrd 3, [[SWP]]
+; CHECK-LE-LABEL: @getsl1
+; CHECK-LE: mfvsrd 3, 34
+}
+
+; Function Attrs: nounwind
+define i64 @getul0(<2 x i64> %vul) {
+entry:
+ %vul.addr = alloca <2 x i64>, align 16
+ store <2 x i64> %vul, <2 x i64>* %vul.addr, align 16
+ %0 = load <2 x i64>, <2 x i64>* %vul.addr, align 16
+ %vecext = extractelement <2 x i64> %0, i32 0
+ ret i64 %vecext
+; CHECK-LABEL: @getul0
+; CHECK: mfvsrd 3, 34
+; CHECK-LE-LABEL: @getul0
+; CHECK-LE: xxswapd [[SWP:[0-9]+]], 34
+; CHECK-LE: mfvsrd 3, [[SWP]]
+}
+
+; Function Attrs: nounwind
+define i64 @getul1(<2 x i64> %vul) {
+entry:
+ %vul.addr = alloca <2 x i64>, align 16
+ store <2 x i64> %vul, <2 x i64>* %vul.addr, align 16
+ %0 = load <2 x i64>, <2 x i64>* %vul.addr, align 16
+ %vecext = extractelement <2 x i64> %0, i32 1
+ ret i64 %vecext
+; CHECK-LABEL: @getul1
+; CHECK: xxswapd [[SWP:[0-9]+]], 34
+; CHECK: mfvsrd 3, [[SWP]]
+; CHECK-LE-LABEL: @getul1
+; CHECK-LE: mfvsrd 3, 34
+}
+
+; Function Attrs: nounwind
+define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) {
+entry:
+ %vsl.addr = alloca <2 x i64>, align 16
+ %i.addr = alloca i32, align 4
+ store <2 x i64> %vsl, <2 x i64>* %vsl.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <2 x i64>, <2 x i64>* %vsl.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <2 x i64> %0, i32 %1
+ ret i64 %vecext
+; CHECK-LABEL: @getvelsl
+; CHECK-LE-LABEL: @getvelsl
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define i64 @getvelul(<2 x i64> %vul, i32 signext %i) {
+entry:
+ %vul.addr = alloca <2 x i64>, align 16
+ %i.addr = alloca i32, align 4
+ store <2 x i64> %vul, <2 x i64>* %vul.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <2 x i64>, <2 x i64>* %vul.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <2 x i64> %0, i32 %1
+ ret i64 %vecext
+; CHECK-LABEL: @getvelul
+; CHECK-LE-LABEL: @getvelul
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define float @getf0(<4 x float> %vf) {
+entry:
+ %vf.addr = alloca <4 x float>, align 16
+ store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+ %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+ %vecext = extractelement <4 x float> %0, i32 0
+ ret float %vecext
+; CHECK-LABEL: @getf0
+; CHECK: xscvspdpn 1, 34
+; CHECK-LE-LABEL: @getf0
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK-LE: xscvspdpn 1, [[SHL]]
+}
+
+; Function Attrs: nounwind
+define float @getf1(<4 x float> %vf) {
+entry:
+ %vf.addr = alloca <4 x float>, align 16
+ store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+ %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+ %vecext = extractelement <4 x float> %0, i32 1
+ ret float %vecext
+; CHECK-LABEL: @getf1
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK: xscvspdpn 1, [[SHL]]
+; CHECK-LE-LABEL: @getf1
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK-LE: xscvspdpn 1, [[SHL]]
+}
+
+; Function Attrs: nounwind
+define float @getf2(<4 x float> %vf) {
+entry:
+ %vf.addr = alloca <4 x float>, align 16
+ store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+ %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+ %vecext = extractelement <4 x float> %0, i32 2
+ ret float %vecext
+; CHECK-LABEL: @getf2
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK: xscvspdpn 1, [[SHL]]
+; CHECK-LE-LABEL: @getf2
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK-LE: xscvspdpn 1, [[SHL]]
+}
+
+; Function Attrs: nounwind
+define float @getf3(<4 x float> %vf) {
+entry:
+ %vf.addr = alloca <4 x float>, align 16
+ store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+ %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+ %vecext = extractelement <4 x float> %0, i32 3
+ ret float %vecext
+; CHECK-LABEL: @getf3
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK: xscvspdpn 1, [[SHL]]
+; CHECK-LE-LABEL: @getf3
+; CHECK-LE: xscvspdpn 1, 34
+}
+
+; Function Attrs: nounwind
+define float @getvelf(<4 x float> %vf, i32 signext %i) {
+entry:
+ %vf.addr = alloca <4 x float>, align 16
+ %i.addr = alloca i32, align 4
+ store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <4 x float> %0, i32 %1
+ ret float %vecext
+; CHECK-LABEL: @getvelf
+; CHECK-LE-LABEL: @getvelf
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define double @getd0(<2 x double> %vd) {
+entry:
+ %vd.addr = alloca <2 x double>, align 16
+ store <2 x double> %vd, <2 x double>* %vd.addr, align 16
+ %0 = load <2 x double>, <2 x double>* %vd.addr, align 16
+ %vecext = extractelement <2 x double> %0, i32 0
+ ret double %vecext
+; CHECK-LABEL: @getd0
+; CHECK: xxlor 1, 34, 34
+; CHECK-LE-LABEL: @getd0
+; CHECK-LE: xxswapd 1, 34
+}
+
+; Function Attrs: nounwind
+define double @getd1(<2 x double> %vd) {
+entry:
+ %vd.addr = alloca <2 x double>, align 16
+ store <2 x double> %vd, <2 x double>* %vd.addr, align 16
+ %0 = load <2 x double>, <2 x double>* %vd.addr, align 16
+ %vecext = extractelement <2 x double> %0, i32 1
+ ret double %vecext
+; CHECK-LABEL: @getd1
+; CHECK: xxswapd 1, 34
+; CHECK-LE-LABEL: @getd1
+; CHECK-LE: xxlor 1, 34, 34
+}
+
+; Function Attrs: nounwind
+define double @getveld(<2 x double> %vd, i32 signext %i) {
+entry:
+ %vd.addr = alloca <2 x double>, align 16
+ %i.addr = alloca i32, align 4
+ store <2 x double> %vd, <2 x double>* %vd.addr, align 16
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load <2 x double>, <2 x double>* %vd.addr, align 16
+ %1 = load i32, i32* %i.addr, align 4
+ %vecext = extractelement <2 x double> %0, i32 %1
+ ret double %vecext
+; CHECK-LABEL: @getveld
+; CHECK-LE-LABEL: @getveld
+; FIXME: add check patterns when variable element extraction is implemented
+}
diff --git a/test/CodeGen/PowerPC/peephole-align.ll b/test/CodeGen/PowerPC/peephole-align.ll
new file mode 100644
index 000000000000..c8c2fe4d32ce
--- /dev/null
+++ b/test/CodeGen/PowerPC/peephole-align.ll
@@ -0,0 +1,335 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck -check-prefix=POWER7 -check-prefix=CHECK %s
+; RUN: llc -mcpu=pwr8 -O1 -code-model=medium <%s | FileCheck -check-prefix=POWER8 -check-prefix=CHECK %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading and storing small offsets within aligned values.
+; For power8, verify that the optimization doesn't fire, as it prevents fusion
+; opportunities.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.b4 = type<{ i8, i8, i8, i8 }>
+%struct.h2 = type<{ i16, i16 }>
+
+%struct.b8 = type<{ i8, i8, i8, i8, i8, i8, i8, i8 }>
+%struct.h4 = type<{ i16, i16, i16, i16 }>
+%struct.w2 = type<{ i32, i32 }>
+
+%struct.d2 = type<{ i64, i64 }>
+%struct.misalign = type<{ i8, i64 }>
+
+@b4v = global %struct.b4 <{ i8 1, i8 2, i8 3, i8 4 }>, align 4
+@h2v = global %struct.h2 <{ i16 1, i16 2 }>, align 4
+
+@b8v = global %struct.b8 <{ i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8 }>, align 8
+@h4v = global %struct.h4 <{ i16 1, i16 2, i16 3, i16 4 }>, align 8
+@w2v = global %struct.w2 <{ i32 1, i32 2 }>, align 8
+
+@d2v = global %struct.d2 <{ i64 1, i64 2 }>, align 16
+@misalign_v = global %struct.misalign <{ i8 1, i64 2 }>, align 16
+
+; CHECK-LABEL: test_b4:
+; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, b4v@toc@ha
+; POWER7-DAG: lbz [[REG0_0:[0-9]+]], b4v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG1_0:[0-9]+]], b4v@toc@l+1([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG2_0:[0-9]+]], b4v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG3_0:[0-9]+]], b4v@toc@l+3([[REGSTRUCT]])
+; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
+; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
+; POWER7-DAG: stb [[REG0_1]], b4v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG1_1]], b4v@toc@l+1([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG2_1]], b4v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG3_1]], b4v@toc@l+3([[REGSTRUCT]])
+
+; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, b4v@toc@ha
+; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], b4v@toc@l
+; POWER8-DAG: lbz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG1_0:[0-9]+]], 1([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG2_0:[0-9]+]], 2([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG3_0:[0-9]+]], 3([[REGSTRUCT]])
+; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
+; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
+; POWER8-DAG: stb [[REG0_1]], 0([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG1_1]], 1([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG2_1]], 2([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG3_1]], 3([[REGSTRUCT]])
+define void @test_b4() nounwind {
+entry:
+ %0 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 0), align 1
+ %inc0 = add nsw i8 %0, 1
+ store i8 %inc0, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 0), align 1
+ %1 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 1), align 1
+ %inc1 = add nsw i8 %1, 2
+ store i8 %inc1, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 1), align 1
+ %2 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 2), align 1
+ %inc2 = add nsw i8 %2, 3
+ store i8 %inc2, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 2), align 1
+ %3 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 3), align 1
+ %inc3 = add nsw i8 %3, 4
+ store i8 %inc3, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 3), align 1
+ ret void
+}
+
+; CHECK-LABEL: test_h2:
+; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha
+; POWER7-DAG: lhz [[REG0_0:[0-9]+]], h2v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: lhz [[REG1_0:[0-9]+]], h2v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER7-DAG: sth [[REG0_1]], h2v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: sth [[REG1_1]], h2v@toc@l+2([[REGSTRUCT]])
+
+; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha
+; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], h2v@toc@l
+; POWER8-DAG: lhz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
+; POWER8-DAG: lhz [[REG1_0:[0-9]+]], 2([[REGSTRUCT]])
+; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER8-DAG: sth [[REG0_1]], 0([[REGSTRUCT]])
+; POWER8-DAG: sth [[REG1_1]], 2([[REGSTRUCT]])
+define void @test_h2() nounwind {
+entry:
+ %0 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2
+ %inc0 = add nsw i16 %0, 1
+ store i16 %inc0, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2
+ %1 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2
+ %inc1 = add nsw i16 %1, 2
+ store i16 %inc1, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2
+ ret void
+}
+
+; CHECK-LABEL: test_h2_optsize:
+; CHECK: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha
+; CHECK-DAG: lhz [[REG0_0:[0-9]+]], h2v@toc@l([[REGSTRUCT]])
+; CHECK-DAG: lhz [[REG1_0:[0-9]+]], h2v@toc@l+2([[REGSTRUCT]])
+; CHECK-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; CHECK-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; CHECK-DAG: sth [[REG0_1]], h2v@toc@l([[REGSTRUCT]])
+; CHECK-DAG: sth [[REG1_1]], h2v@toc@l+2([[REGSTRUCT]])
+define void @test_h2_optsize() optsize nounwind {
+entry:
+ %0 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2
+ %inc0 = add nsw i16 %0, 1
+ store i16 %inc0, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2
+ %1 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2
+ %inc1 = add nsw i16 %1, 2
+ store i16 %inc1, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2
+ ret void
+}
+
+; CHECK-LABEL: test_b8:
+; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, b8v@toc@ha
+; POWER7-DAG: lbz [[REG0_0:[0-9]+]], b8v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG1_0:[0-9]+]], b8v@toc@l+1([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG2_0:[0-9]+]], b8v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG3_0:[0-9]+]], b8v@toc@l+3([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG4_0:[0-9]+]], b8v@toc@l+4([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG5_0:[0-9]+]], b8v@toc@l+5([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG6_0:[0-9]+]], b8v@toc@l+6([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG7_0:[0-9]+]], b8v@toc@l+7([[REGSTRUCT]])
+; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
+; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
+; POWER7-DAG: addi [[REG4_1:[0-9]+]], [[REG4_0]], 5
+; POWER7-DAG: addi [[REG5_1:[0-9]+]], [[REG5_0]], 6
+; POWER7-DAG: addi [[REG6_1:[0-9]+]], [[REG6_0]], 7
+; POWER7-DAG: addi [[REG7_1:[0-9]+]], [[REG7_0]], 8
+; POWER7-DAG: stb [[REG0_1]], b8v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG1_1]], b8v@toc@l+1([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG2_1]], b8v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG3_1]], b8v@toc@l+3([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG4_1]], b8v@toc@l+4([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG5_1]], b8v@toc@l+5([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG6_1]], b8v@toc@l+6([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG7_1]], b8v@toc@l+7([[REGSTRUCT]])
+
+; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, b8v@toc@ha
+; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], b8v@toc@l
+; POWER8-DAG: lbz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG1_0:[0-9]+]], 1([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG2_0:[0-9]+]], 2([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG3_0:[0-9]+]], 3([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG4_0:[0-9]+]], 4([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG5_0:[0-9]+]], 5([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG6_0:[0-9]+]], 6([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG7_0:[0-9]+]], 7([[REGSTRUCT]])
+; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
+; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
+; POWER8-DAG: addi [[REG4_1:[0-9]+]], [[REG4_0]], 5
+; POWER8-DAG: addi [[REG5_1:[0-9]+]], [[REG5_0]], 6
+; POWER8-DAG: addi [[REG6_1:[0-9]+]], [[REG6_0]], 7
+; POWER8-DAG: addi [[REG7_1:[0-9]+]], [[REG7_0]], 8
+; POWER8-DAG: stb [[REG0_1]], 0([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG1_1]], 1([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG2_1]], 2([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG3_1]], 3([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG4_1]], 4([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG5_1]], 5([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG6_1]], 6([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG7_1]], 7([[REGSTRUCT]])
+define void @test_b8() nounwind {
+entry:
+ %0 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 0), align 1
+ %inc0 = add nsw i8 %0, 1
+ store i8 %inc0, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 0), align 1
+ %1 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 1), align 1
+ %inc1 = add nsw i8 %1, 2
+ store i8 %inc1, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 1), align 1
+ %2 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 2), align 1
+ %inc2 = add nsw i8 %2, 3
+ store i8 %inc2, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 2), align 1
+ %3 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 3), align 1
+ %inc3 = add nsw i8 %3, 4
+ store i8 %inc3, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 3), align 1
+ %4 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 4), align 1
+ %inc4 = add nsw i8 %4, 5
+ store i8 %inc4, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 4), align 1
+ %5 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 5), align 1
+ %inc5 = add nsw i8 %5, 6
+ store i8 %inc5, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 5), align 1
+ %6 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 6), align 1
+ %inc6 = add nsw i8 %6, 7
+ store i8 %inc6, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 6), align 1
+ %7 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 7), align 1
+ %inc7 = add nsw i8 %7, 8
+ store i8 %inc7, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 7), align 1
+ ret void
+}
+
+; CHECK-LABEL: test_h4:
+; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, h4v@toc@ha
+; POWER7-DAG: lhz [[REG0_0:[0-9]+]], h4v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: lhz [[REG1_0:[0-9]+]], h4v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: lhz [[REG2_0:[0-9]+]], h4v@toc@l+4([[REGSTRUCT]])
+; POWER7-DAG: lhz [[REG3_0:[0-9]+]], h4v@toc@l+6([[REGSTRUCT]])
+; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
+; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
+; POWER7-DAG: sth [[REG0_1]], h4v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: sth [[REG1_1]], h4v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: sth [[REG2_1]], h4v@toc@l+4([[REGSTRUCT]])
+; POWER7-DAG: sth [[REG3_1]], h4v@toc@l+6([[REGSTRUCT]])
+
+; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, h4v@toc@ha
+; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], h4v@toc@l
+; POWER8-DAG: lhz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
+; POWER8-DAG: lhz [[REG1_0:[0-9]+]], 2([[REGSTRUCT]])
+; POWER8-DAG: lhz [[REG2_0:[0-9]+]], 4([[REGSTRUCT]])
+; POWER8-DAG: lhz [[REG3_0:[0-9]+]], 6([[REGSTRUCT]])
+; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
+; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
+; POWER8-DAG: sth [[REG0_1]], 0([[REGSTRUCT]])
+; POWER8-DAG: sth [[REG1_1]], 2([[REGSTRUCT]])
+; POWER8-DAG: sth [[REG2_1]], 4([[REGSTRUCT]])
+; POWER8-DAG: sth [[REG3_1]], 6([[REGSTRUCT]])
+define void @test_h4() nounwind {
+entry:
+ %0 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 0), align 2
+ %inc0 = add nsw i16 %0, 1
+ store i16 %inc0, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 0), align 2
+ %1 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 1), align 2
+ %inc1 = add nsw i16 %1, 2
+ store i16 %inc1, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 1), align 2
+ %2 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 2), align 2
+ %inc2 = add nsw i16 %2, 3
+ store i16 %inc2, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 2), align 2
+ %3 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 3), align 2
+ %inc3 = add nsw i16 %3, 4
+ store i16 %inc3, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 3), align 2
+ ret void
+}
+
+; CHECK-LABEL: test_w2:
+; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, w2v@toc@ha
+; POWER7-DAG: lwz [[REG0_0:[0-9]+]], w2v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: lwz [[REG1_0:[0-9]+]], w2v@toc@l+4([[REGSTRUCT]])
+; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER7-DAG: stw [[REG0_1]], w2v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: stw [[REG1_1]], w2v@toc@l+4([[REGSTRUCT]])
+
+; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, w2v@toc@ha
+; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], w2v@toc@l
+; POWER8-DAG: lwz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
+; POWER8-DAG: lwz [[REG1_0:[0-9]+]], 4([[REGSTRUCT]])
+; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER8-DAG: stw [[REG0_1]], 0([[REGSTRUCT]])
+; POWER8-DAG: stw [[REG1_1]], 4([[REGSTRUCT]])
+define void @test_w2() nounwind {
+entry:
+ %0 = load i32, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 0), align 4
+ %inc0 = add nsw i32 %0, 1
+ store i32 %inc0, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 0), align 4
+ %1 = load i32, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 1), align 4
+ %inc1 = add nsw i32 %1, 2
+ store i32 %inc1, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 1), align 4
+ ret void
+}
+
+; CHECK-LABEL: test_d2:
+; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, d2v@toc@ha
+; POWER7-DAG: ld [[REG0_0:[0-9]+]], d2v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: ld [[REG1_0:[0-9]+]], d2v@toc@l+8([[REGSTRUCT]])
+; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER7-DAG: std [[REG0_1]], d2v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: std [[REG1_1]], d2v@toc@l+8([[REGSTRUCT]])
+
+; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, d2v@toc@ha
+; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], d2v@toc@l
+; POWER8-DAG: ld [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
+; POWER8-DAG: ld [[REG1_0:[0-9]+]], 8([[REGSTRUCT]])
+; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER8-DAG: std [[REG0_1]], 0([[REGSTRUCT]])
+; POWER8-DAG: std [[REG1_1]], 8([[REGSTRUCT]])
+define void @test_d2() nounwind {
+entry:
+ %0 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 0), align 8
+ %inc0 = add nsw i64 %0, 1
+ store i64 %inc0, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 0), align 8
+ %1 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8
+ %inc1 = add nsw i64 %1, 2
+ store i64 %inc1, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8
+ ret void
+}
+
+; Make sure the optimization fires on power8 if there is a single use resulting
+; in a better fusion opportunity.
+; register 3 is the return value, so it should be chosen
+; CHECK-LABEL: test_singleuse:
+; CHECK: addis 3, 2, d2v@toc@ha
+; CHECK: ld 3, d2v@toc@l+8(3)
+define i64 @test_singleuse() nounwind {
+entry:
+ %0 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8
+ ret i64 %0
+}
+
+; Make sure the optimization fails to fire if the symbol is aligned, but the offset is not.
+; CHECK-LABEL: test_misalign
+; POWER7: addis [[REGSTRUCT_0:[0-9]+]], 2, misalign_v@toc@ha
+; POWER7: addi [[REGSTRUCT:[0-9]+]], [[REGSTRUCT_0]], misalign_v@toc@l
+; POWER7: li [[OFFSET_REG:[0-9]+]], 1
+; POWER7: ldx [[REG0_0:[0-9]+]], [[REGSTRUCT]], [[OFFSET_REG]]
+; POWER7: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7: stdx [[REG0_1]], [[REGSTRUCT]], [[OFFSET_REG]]
+define void @test_misalign() nounwind {
+entry:
+ %0 = load i64, i64* getelementptr inbounds (%struct.misalign, %struct.misalign* @misalign_v, i32 0, i32 1), align 1
+ %inc0 = add nsw i64 %0, 1
+ store i64 %inc0, i64* getelementptr inbounds (%struct.misalign, %struct.misalign* @misalign_v, i32 0, i32 1), align 1
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll b/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
new file mode 100644
index 000000000000..2f75190327ef
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
@@ -0,0 +1,784 @@
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+;
+; Note: Lots of tests use inline asm instead of regular calls.
+; This allows to have a better control on what the allocation will do.
+; Otherwise, we may have spill right in the entry block, defeating
+; shrink-wrapping. Moreover, some of the inline asm statement (nop)
+; are here to ensure that the related paths do not end up as critical
+; edges.
+
+
+; Initial motivating example: Simple diamond with a call just on one side.
+; CHECK-LABEL: foo:
+;
+; Compare the arguments and return
+; No prologue needed.
+; ENABLE: cmpw 0, 3, 4
+; ENABLE-NEXT: bgelr 0
+;
+; Prologue code.
+; At a minimum, we save/restore the link register. Other registers may be saved
+; as well.
+; CHECK: mflr
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; DISABLE: cmpw 0, 3, 4
+; DISABLE-NEXT: bge 0, .[[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Store %a on the stack
+; CHECK: stw 3, {{[0-9]+([0-9]+)}}
+; Set the alloca address in the second argument.
+; CHECK-NEXT: addi 4, 1, {{[0-9]+}}
+; Set the first argument to zero.
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: bl doSomething
+;
+; With shrink-wrapping, epilogue is just after the call.
+; Restore the link register and return.
+; Note that there could be other epilog code before the link register is
+; restored but we will not check for it here.
+; ENABLE: mtlr
+; ENABLE-NEXT: blr
+;
+; DISABLE: [[EXIT_LABEL]]:
+;
+; Without shrink-wrapping, epilogue is in the exit block.
+; Epilogue code. (What we pop does not matter.)
+; DISABLE: mtlr {{[0-9]+}}
+; DISABLE-NEXT: blr
+;
+
+define i32 @foo(i32 %a, i32 %b) {
+ %tmp = alloca i32, align 4
+ %tmp2 = icmp slt i32 %a, %b
+ br i1 %tmp2, label %true, label %false
+
+true:
+ store i32 %a, i32* %tmp, align 4
+ %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+ br label %false
+
+false:
+ %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+ ret i32 %tmp.0
+}
+
+; Function Attrs: optsize
+declare i32 @doSomething(i32, i32*)
+
+
+
+; Check that we do not perform the restore inside the loop whereas the save
+; is outside.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
+;
+; Shrink-wrapping allows to skip the prologue in the else case.
+; ENABLE: cmplwi 0, 3, 0
+; ENABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the link register
+; CHECK: mflr {{[0-9]+}}
+;
+; DISABLE: cmplwi 0, 3, 0
+; DISABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Loop preheader
+; CHECK-DAG: li [[SUM:[0-9]+]], 0
+; CHECK-DAG: li [[IV:[0-9]+]], 10
+;
+; Loop body
+; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
+; CHECK: bl something
+; CHECK-DAG: addi [[IV]], [[IV]], -1
+; CHECK-DAG: add [[SUM]], 3, [[SUM]]
+; CHECK-NEXT: cmplwi [[IV]], 0
+; CHECK-NEXT: bne 0, .[[LOOP]]
+;
+; Next BB.
+; CHECK: slwi 3, [[SUM]], 3
+;
+; Jump to epilogue.
+; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]]
+;
+; DISABLE: .[[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; DISABLE: slwi 3, 4, 1
+; DISABLE: .[[EPILOG_BB]]: # %if.end
+;
+; Epilogue code.
+; CHECK: mtlr {{[0-9]+}}
+; CHECK-NEXT: blr
+;
+; ENABLE: .[[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; ENABLE: slwi 3, 4, 1
+; ENABLE-NEXT: blr
+define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+ %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+declare i32 @something(...)
+
+; Check that we do not perform the shrink-wrapping inside the loop even
+; though that would be legal. The cost model must prevent that.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
+; Prologue code.
+; Make sure we save the link register before the call
+; CHECK: mflr {{[0-9]+}}
+;
+; Loop preheader
+; CHECK-DAG: li [[SUM:[0-9]+]], 0
+; CHECK-DAG: li [[IV:[0-9]+]], 10
+;
+; Loop body
+; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
+; CHECK: bl something
+; CHECK-DAG: addi [[IV]], [[IV]], -1
+; CHECK-DAG: add [[SUM]], 3, [[SUM]]
+; CHECK-NEXT: cmplwi [[IV]], 0
+; CHECK-NEXT: bne 0, .[[LOOP]]
+;
+; Next BB
+; CHECK: %for.exit
+; CHECK: mtlr {{[0-9]+}}
+; CHECK-NEXT: blr
+define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
+entry:
+ br label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
+ %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
+ %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+ %add = add nsw i32 %call, %sum.03
+ %inc = add nuw nsw i32 %i.04, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+ tail call void asm "nop", ""()
+ br label %for.end
+
+for.end: ; preds = %for.body
+ ret i32 %add
+}
+
+
+; Check with a more complex case that we do not have save within the loop and
+; restore outside.
+; CHECK-LABEL: loopInfoSaveOutsideLoop:
+;
+; ENABLE: cmplwi 0, 3, 0
+; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the link register
+; CHECK: mflr {{[0-9]+}}
+;
+; DISABLE: cmplwi 0, 3, 0
+; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Loop preheader
+; CHECK-DAG: li [[SUM:[0-9]+]], 0
+; CHECK-DAG: li [[IV:[0-9]+]], 10
+;
+; Loop body
+; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
+; CHECK: bl something
+; CHECK-DAG: addi [[IV]], [[IV]], -1
+; CHECK-DAG: add [[SUM]], 3, [[SUM]]
+; CHECK-NEXT: cmplwi [[IV]], 0
+; CHECK-NEXT: bne 0, .[[LOOP]]
+;
+; Next BB
+; CHECK: bl somethingElse
+; CHECK: slwi 3, [[SUM]], 3
+;
+; Jump to epilogue
+; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]]
+;
+; DISABLE: .[[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; DISABLE: slwi 3, 4, 1
+;
+; DISABLE: .[[EPILOG_BB]]: # %if.end
+; Epilog code
+; CHECK: mtlr {{[0-9]+}}
+; CHECK-NEXT: blr
+;
+; ENABLE: .[[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; ENABLE: slwi 3, 4, 1
+; ENABLE-NEXT: blr
+define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+ %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ tail call void bitcast (void (...)* @somethingElse to void ()*)()
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+declare void @somethingElse(...)
+
+; Check with a more complex case that we do not have restore within the loop and
+; save outside.
+; CHECK-LABEL: loopInfoRestoreOutsideLoop:
+;
+; ENABLE: cmplwi 0, 3, 0
+; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the link register
+; CHECK: mflr {{[0-9]+}}
+;
+; DISABLE: cmplwi 0, 3, 0
+; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: bl somethingElse
+;
+; Loop preheader
+; CHECK-DAG: li [[SUM:[0-9]+]], 0
+; CHECK-DAG: li [[IV:[0-9]+]], 10
+;
+; Loop body
+; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
+; CHECK: bl something
+; CHECK-DAG: addi [[IV]], [[IV]], -1
+; CHECK-DAG: add [[SUM]], 3, [[SUM]]
+; CHECK-NEXT: cmplwi [[IV]], 0
+; CHECK-NEXT: bne 0, .[[LOOP]]
+;
+; Next BB.
+; slwi 3, [[SUM]], 3
+;
+; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]]
+;
+; DISABLE: .[[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; DISABLE: slwi 3, 4, 1
+; DISABLE: .[[EPILOG_BB]]: # %if.end
+;
+; Epilogue code.
+; CHECK: mtlr {{[0-9]+}}
+; CHECK-NEXT: blr
+;
+; ENABLE: .[[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; ENABLE: slwi 3, 4, 1
+; ENABLE-NEXT: blr
+define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ tail call void bitcast (void (...)* @somethingElse to void ()*)()
+ br label %for.body
+
+for.body: ; preds = %for.body, %if.then
+ %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
+ %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+ %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+; Check that we handle function with no frame information correctly.
+; CHECK-LABEL: emptyFrame:
+; CHECK: # %entry
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: blr
+define i32 @emptyFrame() {
+entry:
+ ret i32 0
+}
+
+
+; Check that we handle inline asm correctly.
+; CHECK-LABEL: inlineAsm:
+;
+; ENABLE: cmplwi 0, 3, 0
+; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r14
+; ENABLE-DAG: li [[IV:[0-9]+]], 10
+; ENABLE-DAG: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
+;
+; DISABLE: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
+; DISABLE: cmplwi 0, 3, 0
+; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+; DISABLE: li [[IV:[0-9]+]], 10
+;
+; CHECK: nop
+; CHECK: mtctr [[IV]]
+;
+; CHECK: .[[LOOP_LABEL:LBB[0-9_]+]]: # %for.body
+; Inline asm statement.
+; CHECK: addi 14, 14, 1
+; CHECK: bdnz .[[LOOP_LABEL]]
+;
+; Epilogue code.
+; CHECK: li 3, 0
+; CHECK-DAG: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
+; CHECK: nop
+; CHECK: blr
+;
+; CHECK: [[ELSE_LABEL]]
+; CHECK-NEXT: slwi 3, 4, 1
+; DISABLE: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
+; CHECK-NEXT blr
+;
+define i32 @inlineAsm(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ tail call void asm "addi 14, 14, 1", "~{r14}"()
+ %inc = add nuw nsw i32 %i.03, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+ tail call void asm "nop", ""()
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %for.body, %if.else
+ %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
+ ret i32 %sum.0
+}
+
+
+; Check that we handle calls to variadic functions correctly.
+; CHECK-LABEL: callVariadicFunc:
+;
+; ENABLE: cmplwi 0, 3, 0
+; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: mflr {{[0-9]+}}
+;
+; DISABLE: cmplwi 0, 3, 0
+; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Setup of the varags.
+; CHECK: mr 4, 3
+; CHECK-NEXT: mr 5, 3
+; CHECK-NEXT: mr 6, 3
+; CHECK-NEXT: mr 7, 3
+; CHECK-NEXT: mr 8, 3
+; CHECK-NEXT: mr 9, 3
+; CHECK-NEXT: bl someVariadicFunc
+; CHECK: slwi 3, 3, 3
+; DISABLE: b .[[EPILOGUE_BB:LBB[0-9_]+]]
+;
+; ENABLE: mtlr {{[0-9]+}}
+; ENABLE-NEXT: blr
+;
+; CHECK: .[[ELSE_LABEL]]: # %if.else
+; CHECK-NEXT: slwi 3, 4, 1
+;
+; DISABLE: .[[EPILOGUE_BB]]: # %if.end
+; DISABLE: mtlr
+; CHECK: blr
+define i32 @callVariadicFunc(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
+ %shl = shl i32 %call, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
+ ret i32 %sum.0
+}
+
+declare i32 @someVariadicFunc(i32, ...)
+
+
+
+; Make sure we do not insert unreachable code after noreturn function.
+; Although this is not incorrect to insert such code, it is useless
+; and it hurts the binary size.
+;
+; CHECK-LABEL: noreturn:
+; DISABLE: mflr {{[0-9]+}}
+;
+; CHECK: cmplwi 3, 0
+; CHECK-NEXT: bne{{[-]?}} 0, .[[ABORT:LBB[0-9_]+]]
+;
+; CHECK: li 3, 42
+;
+; DISABLE: mtlr {{[0-9]+}}
+;
+; CHECK-NEXT: blr
+;
+; CHECK: .[[ABORT]]: # %if.abort
+;
+; ENABLE: mflr {{[0-9]+}}
+;
+; CHECK: bl abort
+; ENABLE-NOT: mtlr {{[0-9]+}}
+define i32 @noreturn(i8 signext %bad_thing) {
+entry:
+ %tobool = icmp eq i8 %bad_thing, 0
+ br i1 %tobool, label %if.end, label %if.abort
+
+if.abort:
+ tail call void @abort() #0
+ unreachable
+
+if.end:
+ ret i32 42
+}
+
+declare void @abort() #0
+
+attributes #0 = { noreturn nounwind }
+
+
+; Make sure that we handle infinite loops properly When checking that the Save
+; and Restore blocks are control flow equivalent, the loop searches for the
+; immediate (post) dominator for the (restore) save blocks. When either the Save
+; or Restore block is located in an infinite loop the only immediate (post)
+; dominator is itself. In this case, we cannot perform shrink wrapping, but we
+; should return gracefully and continue compilation.
+; The only condition for this test is the compilation finishes correctly.
+;
+; CHECK-LABEL: infiniteloop
+; CHECK: blr
+define void @infiniteloop() {
+entry:
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+ %ptr = alloca i32, i32 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+ %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+ %add = add nsw i32 %call, %sum.03
+ store i32 %add, i32* %ptr
+ br label %for.body
+
+if.end:
+ ret void
+}
+
+; Another infinite loop test this time with a body bigger than just one block.
+; CHECK-LABEL: infiniteloop2
+; CHECK: blr
+define void @infiniteloop2() {
+entry:
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+ %ptr = alloca i32, i32 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
+ %call = tail call i32 asm "mftb $0, 268", "=r,~{r14}"()
+ %add = add nsw i32 %call, %sum.03
+ store i32 %add, i32* %ptr
+ br i1 undef, label %body1, label %body2
+
+body1:
+ tail call void asm sideeffect "nop", "~{r14}"()
+ br label %for.body
+
+body2:
+ tail call void asm sideeffect "nop", "~{r14}"()
+ br label %for.body
+
+if.end:
+ ret void
+}
+
+; Another infinite loop test this time with two nested infinite loop.
+; CHECK-LABEL: infiniteloop3
+; CHECK: # %end
+define void @infiniteloop3() {
+entry:
+ br i1 undef, label %loop2a, label %body
+
+body: ; preds = %entry
+ br i1 undef, label %loop2a, label %end
+
+loop1: ; preds = %loop2a, %loop2b
+ %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
+ %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
+ %0 = icmp eq i32* %var, null
+ %next.load = load i32*, i32** undef
+ br i1 %0, label %loop2a, label %loop2b
+
+loop2a: ; preds = %loop1, %body, %entry
+ %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
+ %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
+ br label %loop1
+
+loop2b: ; preds = %loop1
+ %gep1 = bitcast i32* %var.phi to i32*
+ %next.ptr = bitcast i32* %gep1 to i32**
+ store i32* %next.phi, i32** %next.ptr
+ br label %loop1
+
+end:
+ ret void
+}
+
+@columns = external global [0 x i32], align 4
+@lock = common global i32 0, align 4
+@htindex = common global i32 0, align 4
+@stride = common global i32 0, align 4
+@ht = common global i32* null, align 8
+@he = common global i8* null, align 8
+
+; Test for a bug that was caused when save point was equal to restore point.
+; Function Attrs: nounwind
+; CHECK-LABEL: transpose
+;
+; Store of callee-save register saved by shrink wrapping
+; CHECK: std [[CSR:[0-9]+]], -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
+;
+; Reload of callee-save register
+; CHECK: ld [[CSR]], -[[STACK_OFFSET]](1) # 8-byte Folded Reload
+;
+; Ensure no subsequent uses of callee-save register before end of function
+; CHECK-NOT: {{[a-z]+}} [[CSR]]
+; CHECK: blr
+define signext i32 @transpose() {
+entry:
+ %0 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 1), align 4
+ %shl.i = shl i32 %0, 7
+ %1 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 2), align 4
+ %or.i = or i32 %shl.i, %1
+ %shl1.i = shl i32 %or.i, 7
+ %2 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 3), align 4
+ %or2.i = or i32 %shl1.i, %2
+ %3 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 7), align 4
+ %shl3.i = shl i32 %3, 7
+ %4 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 6), align 4
+ %or4.i = or i32 %shl3.i, %4
+ %shl5.i = shl i32 %or4.i, 7
+ %5 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 5), align 4
+ %or6.i = or i32 %shl5.i, %5
+ %cmp.i = icmp ugt i32 %or2.i, %or6.i
+ br i1 %cmp.i, label %cond.true.i, label %cond.false.i
+
+cond.true.i:
+ %shl7.i = shl i32 %or2.i, 7
+ %6 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4
+ %or8.i = or i32 %6, %shl7.i
+ %conv.i = zext i32 %or8.i to i64
+ %shl9.i = shl nuw nsw i64 %conv.i, 21
+ %conv10.i = zext i32 %or6.i to i64
+ %or11.i = or i64 %shl9.i, %conv10.i
+ br label %hash.exit
+
+cond.false.i:
+ %shl12.i = shl i32 %or6.i, 7
+ %7 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4
+ %or13.i = or i32 %7, %shl12.i
+ %conv14.i = zext i32 %or13.i to i64
+ %shl15.i = shl nuw nsw i64 %conv14.i, 21
+ %conv16.i = zext i32 %or2.i to i64
+ %or17.i = or i64 %shl15.i, %conv16.i
+ br label %hash.exit
+
+hash.exit:
+ %cond.i = phi i64 [ %or11.i, %cond.true.i ], [ %or17.i, %cond.false.i ]
+ %shr.29.i = lshr i64 %cond.i, 17
+ %conv18.i = trunc i64 %shr.29.i to i32
+ store i32 %conv18.i, i32* @lock, align 4
+ %rem.i = srem i64 %cond.i, 1050011
+ %conv19.i = trunc i64 %rem.i to i32
+ store i32 %conv19.i, i32* @htindex, align 4
+ %rem20.i = urem i32 %conv18.i, 179
+ %add.i = or i32 %rem20.i, 131072
+ store i32 %add.i, i32* @stride, align 4
+ %8 = load i32*, i32** @ht, align 8
+ %arrayidx = getelementptr inbounds i32, i32* %8, i64 %rem.i
+ %9 = load i32, i32* %arrayidx, align 4
+ %cmp1 = icmp eq i32 %9, %conv18.i
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then:
+ %idxprom.lcssa = phi i64 [ %rem.i, %hash.exit ], [ %idxprom.1, %if.end ], [ %idxprom.2, %if.end.1 ], [ %idxprom.3, %if.end.2 ], [ %idxprom.4, %if.end.3 ], [ %idxprom.5, %if.end.4 ], [ %idxprom.6, %if.end.5 ], [ %idxprom.7, %if.end.6 ]
+ %10 = load i8*, i8** @he, align 8
+ %arrayidx3 = getelementptr inbounds i8, i8* %10, i64 %idxprom.lcssa
+ %11 = load i8, i8* %arrayidx3, align 1
+ %conv = sext i8 %11 to i32
+ br label %cleanup
+
+if.end:
+ %add = add nsw i32 %add.i, %conv19.i
+ %cmp4 = icmp sgt i32 %add, 1050010
+ %sub = add nsw i32 %add, -1050011
+ %sub.add = select i1 %cmp4, i32 %sub, i32 %add
+ %idxprom.1 = sext i32 %sub.add to i64
+ %arrayidx.1 = getelementptr inbounds i32, i32* %8, i64 %idxprom.1
+ %12 = load i32, i32* %arrayidx.1, align 4
+ %cmp1.1 = icmp eq i32 %12, %conv18.i
+ br i1 %cmp1.1, label %if.then, label %if.end.1
+
+cleanup:
+ %retval.0 = phi i32 [ %conv, %if.then ], [ -128, %if.end.6 ]
+ ret i32 %retval.0
+
+if.end.1:
+ %add.1 = add nsw i32 %add.i, %sub.add
+ %cmp4.1 = icmp sgt i32 %add.1, 1050010
+ %sub.1 = add nsw i32 %add.1, -1050011
+ %sub.add.1 = select i1 %cmp4.1, i32 %sub.1, i32 %add.1
+ %idxprom.2 = sext i32 %sub.add.1 to i64
+ %arrayidx.2 = getelementptr inbounds i32, i32* %8, i64 %idxprom.2
+ %13 = load i32, i32* %arrayidx.2, align 4
+ %cmp1.2 = icmp eq i32 %13, %conv18.i
+ br i1 %cmp1.2, label %if.then, label %if.end.2
+
+if.end.2:
+ %add.2 = add nsw i32 %add.i, %sub.add.1
+ %cmp4.2 = icmp sgt i32 %add.2, 1050010
+ %sub.2 = add nsw i32 %add.2, -1050011
+ %sub.add.2 = select i1 %cmp4.2, i32 %sub.2, i32 %add.2
+ %idxprom.3 = sext i32 %sub.add.2 to i64
+ %arrayidx.3 = getelementptr inbounds i32, i32* %8, i64 %idxprom.3
+ %14 = load i32, i32* %arrayidx.3, align 4
+ %cmp1.3 = icmp eq i32 %14, %conv18.i
+ br i1 %cmp1.3, label %if.then, label %if.end.3
+
+if.end.3:
+ %add.3 = add nsw i32 %add.i, %sub.add.2
+ %cmp4.3 = icmp sgt i32 %add.3, 1050010
+ %sub.3 = add nsw i32 %add.3, -1050011
+ %sub.add.3 = select i1 %cmp4.3, i32 %sub.3, i32 %add.3
+ %idxprom.4 = sext i32 %sub.add.3 to i64
+ %arrayidx.4 = getelementptr inbounds i32, i32* %8, i64 %idxprom.4
+ %15 = load i32, i32* %arrayidx.4, align 4
+ %cmp1.4 = icmp eq i32 %15, %conv18.i
+ br i1 %cmp1.4, label %if.then, label %if.end.4
+
+if.end.4:
+ %add.4 = add nsw i32 %add.i, %sub.add.3
+ %cmp4.4 = icmp sgt i32 %add.4, 1050010
+ %sub.4 = add nsw i32 %add.4, -1050011
+ %sub.add.4 = select i1 %cmp4.4, i32 %sub.4, i32 %add.4
+ %idxprom.5 = sext i32 %sub.add.4 to i64
+ %arrayidx.5 = getelementptr inbounds i32, i32* %8, i64 %idxprom.5
+ %16 = load i32, i32* %arrayidx.5, align 4
+ %cmp1.5 = icmp eq i32 %16, %conv18.i
+ br i1 %cmp1.5, label %if.then, label %if.end.5
+
+if.end.5:
+ %add.5 = add nsw i32 %add.i, %sub.add.4
+ %cmp4.5 = icmp sgt i32 %add.5, 1050010
+ %sub.5 = add nsw i32 %add.5, -1050011
+ %sub.add.5 = select i1 %cmp4.5, i32 %sub.5, i32 %add.5
+ %idxprom.6 = sext i32 %sub.add.5 to i64
+ %arrayidx.6 = getelementptr inbounds i32, i32* %8, i64 %idxprom.6
+ %17 = load i32, i32* %arrayidx.6, align 4
+ %cmp1.6 = icmp eq i32 %17, %conv18.i
+ br i1 %cmp1.6, label %if.then, label %if.end.6
+
+if.end.6:
+ %add.6 = add nsw i32 %add.i, %sub.add.5
+ %cmp4.6 = icmp sgt i32 %add.6, 1050010
+ %sub.6 = add nsw i32 %add.6, -1050011
+ %sub.add.6 = select i1 %cmp4.6, i32 %sub.6, i32 %add.6
+ %idxprom.7 = sext i32 %sub.add.6 to i64
+ %arrayidx.7 = getelementptr inbounds i32, i32* %8, i64 %idxprom.7
+ %18 = load i32, i32* %arrayidx.7, align 4
+ %cmp1.7 = icmp eq i32 %18, %conv18.i
+ br i1 %cmp1.7, label %if.then, label %cleanup
+}
diff --git a/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll b/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll
index ad8ed38da7fa..028006320cb5 100644
--- a/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll
+++ b/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll
@@ -10,7 +10,7 @@ define void @main() {
}
; CHECK-LABEL: @main
-; CHECK-DAG li 4, 0
+; CHECK-DAG: li 4, 0
; CHECK-DAG: crxor 6, 6, 6
; CHECK: bl printf
diff --git a/test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll b/test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll
index e8617ccfc8a5..65b45ea555ae 100644
--- a/test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll
+++ b/test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll
@@ -10,10 +10,10 @@ entry:
ret void
; FIXME: Crashing is not really the correct behavior here, we really should just emit nothing
-; CHECK: Cannot select: 0x{{[0-9,a-f]+}}: ch = Prefetch
-; CHECK: 0x{{[0-9,a-f]+}}: i32 = Constant<0>
-; CHECK-NEXT: 0x{{[0-9,a-f]+}}: i32 = Constant<3>
-; CHECK-NEXT: 0x{{[0-9,a-f]+}}: i32 = Constant<0>
+; CHECK: Cannot select: {{0x[0-9,a-f]+|t[0-9]+}}: ch = Prefetch
+; CHECK: {{0x[0-9,a-f]+|t[0-9]+}}: i32 = Constant<0>
+; CHECK-NEXT: {{0x[0-9,a-f]+|t[0-9]+}}: i32 = Constant<3>
+; CHECK-NEXT: {{0x[0-9,a-f]+|t[0-9]+}}: i32 = Constant<0>
}
diff --git a/test/CodeGen/PowerPC/ppcsoftops.ll b/test/CodeGen/PowerPC/ppcsoftops.ll
new file mode 100644
index 000000000000..56c057613bdc
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppcsoftops.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mtriple=powerpc-unknown-linux-gnu -O0 < %s | FileCheck %s
+define double @foo() #0 {
+entry:
+ %a = alloca double, align 8
+ %b = alloca double, align 8
+ %0 = load double, double* %a, align 8
+ %1 = load double, double* %b, align 8
+ %add = fadd double %0, %1
+ ret double %add
+
+ ; CHECK-LABEL: __adddf3
+}
+
+define double @foo1() #0 {
+entry:
+ %a = alloca double, align 8
+ %b = alloca double, align 8
+ %0 = load double, double* %a, align 8
+ %1 = load double, double* %b, align 8
+ %mul = fmul double %0, %1
+ ret double %mul
+
+ ; CHECK-LABEL: __muldf3
+}
+
+define double @foo2() #0 {
+entry:
+ %a = alloca double, align 8
+ %b = alloca double, align 8
+ %0 = load double, double* %a, align 8
+ %1 = load double, double* %b, align 8
+ %sub = fsub double %0, %1
+ ret double %sub
+
+ ; CHECK-LABEL: __subdf3
+}
+
+define double @foo3() #0 {
+entry:
+ %a = alloca double, align 8
+ %b = alloca double, align 8
+ %0 = load double, double* %a, align 8
+ %1 = load double, double* %b, align 8
+ %div = fdiv double %0, %1
+ ret double %div
+
+ ; CHECK-LABEL: __divdf3
+}
+
+attributes #0 = {"use-soft-float"="true" }
diff --git a/test/CodeGen/PowerPC/pr17168.ll b/test/CodeGen/PowerPC/pr17168.ll
index 096895491381..b1bac59c9ce1 100644
--- a/test/CodeGen/PowerPC/pr17168.ll
+++ b/test/CodeGen/PowerPC/pr17168.ll
@@ -9,7 +9,7 @@ target triple = "powerpc64-unknown-linux-gnu"
@grid_points = external global [3 x i32], align 4
; Function Attrs: nounwind
-define fastcc void @compute_rhs() #0 {
+define fastcc void @compute_rhs() #0 !dbg !114 {
entry:
br i1 undef, label %for.cond871.preheader.for.inc960_crit_edge, label %for.end1042, !dbg !439
@@ -54,11 +54,11 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!438, !464}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 190311)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !298, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 190311)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !298, imports: !2)
!1 = !DIFile(filename: "bt.c", directory: "/home/hfinkel/src/NPB2.3-omp-C/BT")
!2 = !{}
!3 = !{!4, !82, !102, !114, !132, !145, !154, !155, !162, !183, !200, !201, !207, !208, !215, !221, !230, !238, !246, !255, !260, !261, !268, !274, !279, !280, !287, !293}
-!4 = !DISubprogram(name: "main", line: 74, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 74, file: !1, scope: !5, type: !6, variables: !12)
+!4 = distinct !DISubprogram(name: "main", line: 74, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 74, file: !1, scope: !5, type: !6, variables: !12)
!5 = !DIFile(filename: "bt.c", directory: "/home/hfinkel/src/NPB2.3-omp-C/BT")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8, !9}
@@ -67,20 +67,20 @@ attributes #1 = { nounwind readnone }
!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !11)
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
!12 = !{!13, !14, !15, !16, !17, !18, !19, !21, !22, !23, !25, !26}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 74, arg: 1, scope: !4, file: !5, type: !8)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 74, arg: 2, scope: !4, file: !5, type: !9)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "niter", line: 76, scope: !4, file: !5, type: !8)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "step", line: 76, scope: !4, file: !5, type: !8)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n3", line: 76, scope: !4, file: !5, type: !8)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "nthreads", line: 77, scope: !4, file: !5, type: !8)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "navg", line: 78, scope: !4, file: !5, type: !20)
+!13 = !DILocalVariable(name: "argc", line: 74, arg: 1, scope: !4, file: !5, type: !8)
+!14 = !DILocalVariable(name: "argv", line: 74, arg: 2, scope: !4, file: !5, type: !9)
+!15 = !DILocalVariable(name: "niter", line: 76, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "step", line: 76, scope: !4, file: !5, type: !8)
+!17 = !DILocalVariable(name: "n3", line: 76, scope: !4, file: !5, type: !8)
+!18 = !DILocalVariable(name: "nthreads", line: 77, scope: !4, file: !5, type: !8)
+!19 = !DILocalVariable(name: "navg", line: 78, scope: !4, file: !5, type: !20)
!20 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "mflops", line: 78, scope: !4, file: !5, type: !20)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tmax", line: 80, scope: !4, file: !5, type: !20)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "verified", line: 81, scope: !4, file: !5, type: !24)
+!21 = !DILocalVariable(name: "mflops", line: 78, scope: !4, file: !5, type: !20)
+!22 = !DILocalVariable(name: "tmax", line: 80, scope: !4, file: !5, type: !20)
+!23 = !DILocalVariable(name: "verified", line: 81, scope: !4, file: !5, type: !24)
!24 = !DIDerivedType(tag: DW_TAG_typedef, name: "boolean", line: 12, file: !1, baseType: !8)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "class", line: 82, scope: !4, file: !5, type: !11)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "fp", line: 83, scope: !4, file: !5, type: !27)
+!25 = !DILocalVariable(name: "class", line: 82, scope: !4, file: !5, type: !11)
+!26 = !DILocalVariable(name: "fp", line: 83, scope: !4, file: !5, type: !27)
!27 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !28)
!28 = !DIDerivedType(tag: DW_TAG_typedef, name: "FILE", line: 49, file: !1, baseType: !29)
!29 = !DICompositeType(tag: DW_TAG_structure_type, name: "_IO_FILE", line: 271, size: 1728, align: 64, file: !30, elements: !31)
@@ -136,222 +136,222 @@ attributes #1 = { nounwind readnone }
!79 = !DICompositeType(tag: DW_TAG_array_type, size: 160, align: 8, baseType: !11, elements: !80)
!80 = !{!81}
!81 = !DISubrange(count: 20)
-!82 = !DISubprogram(name: "verify", line: 2388, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2388, file: !1, scope: !5, type: !83, variables: !86)
+!82 = distinct !DISubprogram(name: "verify", line: 2388, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2388, file: !1, scope: !5, type: !83, variables: !86)
!83 = !DISubroutineType(types: !84)
!84 = !{null, !8, !10, !85}
!85 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !24)
!86 = !{!87, !88, !89, !90, !94, !95, !96, !97, !98, !99, !100, !101}
-!87 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "no_time_steps", line: 2388, arg: 1, scope: !82, file: !5, type: !8)
-!88 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "class", line: 2388, arg: 2, scope: !82, file: !5, type: !10)
-!89 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "verified", line: 2388, arg: 3, scope: !82, file: !5, type: !85)
-!90 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xcrref", line: 2397, scope: !82, file: !5, type: !91)
+!87 = !DILocalVariable(name: "no_time_steps", line: 2388, arg: 1, scope: !82, file: !5, type: !8)
+!88 = !DILocalVariable(name: "class", line: 2388, arg: 2, scope: !82, file: !5, type: !10)
+!89 = !DILocalVariable(name: "verified", line: 2388, arg: 3, scope: !82, file: !5, type: !85)
+!90 = !DILocalVariable(name: "xcrref", line: 2397, scope: !82, file: !5, type: !91)
!91 = !DICompositeType(tag: DW_TAG_array_type, size: 320, align: 64, baseType: !20, elements: !92)
!92 = !{!93}
!93 = !DISubrange(count: 5)
-!94 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xceref", line: 2397, scope: !82, file: !5, type: !91)
-!95 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xcrdif", line: 2397, scope: !82, file: !5, type: !91)
-!96 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xcedif", line: 2397, scope: !82, file: !5, type: !91)
-!97 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "epsilon", line: 2398, scope: !82, file: !5, type: !20)
-!98 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xce", line: 2398, scope: !82, file: !5, type: !91)
-!99 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xcr", line: 2398, scope: !82, file: !5, type: !91)
-!100 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dtref", line: 2398, scope: !82, file: !5, type: !20)
-!101 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 2399, scope: !82, file: !5, type: !8)
-!102 = !DISubprogram(name: "rhs_norm", line: 266, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 266, file: !1, scope: !5, type: !103, variables: !106)
+!94 = !DILocalVariable(name: "xceref", line: 2397, scope: !82, file: !5, type: !91)
+!95 = !DILocalVariable(name: "xcrdif", line: 2397, scope: !82, file: !5, type: !91)
+!96 = !DILocalVariable(name: "xcedif", line: 2397, scope: !82, file: !5, type: !91)
+!97 = !DILocalVariable(name: "epsilon", line: 2398, scope: !82, file: !5, type: !20)
+!98 = !DILocalVariable(name: "xce", line: 2398, scope: !82, file: !5, type: !91)
+!99 = !DILocalVariable(name: "xcr", line: 2398, scope: !82, file: !5, type: !91)
+!100 = !DILocalVariable(name: "dtref", line: 2398, scope: !82, file: !5, type: !20)
+!101 = !DILocalVariable(name: "m", line: 2399, scope: !82, file: !5, type: !8)
+!102 = distinct !DISubprogram(name: "rhs_norm", line: 266, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 266, file: !1, scope: !5, type: !103, variables: !106)
!103 = !DISubroutineType(types: !104)
!104 = !{null, !105}
!105 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !20)
!106 = !{!107, !108, !109, !110, !111, !112, !113}
-!107 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "rms", line: 266, arg: 1, scope: !102, file: !5, type: !105)
-!108 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 271, scope: !102, file: !5, type: !8)
-!109 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 271, scope: !102, file: !5, type: !8)
-!110 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 271, scope: !102, file: !5, type: !8)
-!111 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 271, scope: !102, file: !5, type: !8)
-!112 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 271, scope: !102, file: !5, type: !8)
-!113 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "add", line: 272, scope: !102, file: !5, type: !20)
-!114 = !DISubprogram(name: "compute_rhs", line: 1767, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1767, file: !1, scope: !5, type: !115, function: void ()* @compute_rhs, variables: !117)
+!107 = !DILocalVariable(name: "rms", line: 266, arg: 1, scope: !102, file: !5, type: !105)
+!108 = !DILocalVariable(name: "i", line: 271, scope: !102, file: !5, type: !8)
+!109 = !DILocalVariable(name: "j", line: 271, scope: !102, file: !5, type: !8)
+!110 = !DILocalVariable(name: "k", line: 271, scope: !102, file: !5, type: !8)
+!111 = !DILocalVariable(name: "d", line: 271, scope: !102, file: !5, type: !8)
+!112 = !DILocalVariable(name: "m", line: 271, scope: !102, file: !5, type: !8)
+!113 = !DILocalVariable(name: "add", line: 272, scope: !102, file: !5, type: !20)
+!114 = distinct !DISubprogram(name: "compute_rhs", line: 1767, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1767, file: !1, scope: !5, type: !115, variables: !117)
!115 = !DISubroutineType(types: !116)
!116 = !{null}
!117 = !{!118, !119, !120, !121, !122, !123, !124, !125, !126, !127, !128, !129, !130, !131}
-!118 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 1769, scope: !114, file: !5, type: !8)
-!119 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 1769, scope: !114, file: !5, type: !8)
-!120 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 1769, scope: !114, file: !5, type: !8)
-!121 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 1769, scope: !114, file: !5, type: !8)
-!122 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "rho_inv", line: 1770, scope: !114, file: !5, type: !20)
-!123 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "uijk", line: 1770, scope: !114, file: !5, type: !20)
-!124 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "up1", line: 1770, scope: !114, file: !5, type: !20)
-!125 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "um1", line: 1770, scope: !114, file: !5, type: !20)
-!126 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vijk", line: 1770, scope: !114, file: !5, type: !20)
-!127 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vp1", line: 1770, scope: !114, file: !5, type: !20)
-!128 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vm1", line: 1770, scope: !114, file: !5, type: !20)
-!129 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "wijk", line: 1770, scope: !114, file: !5, type: !20)
-!130 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "wp1", line: 1770, scope: !114, file: !5, type: !20)
-!131 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "wm1", line: 1770, scope: !114, file: !5, type: !20)
-!132 = !DISubprogram(name: "error_norm", line: 225, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 225, file: !1, scope: !5, type: !103, variables: !133)
+!118 = !DILocalVariable(name: "i", line: 1769, scope: !114, file: !5, type: !8)
+!119 = !DILocalVariable(name: "j", line: 1769, scope: !114, file: !5, type: !8)
+!120 = !DILocalVariable(name: "k", line: 1769, scope: !114, file: !5, type: !8)
+!121 = !DILocalVariable(name: "m", line: 1769, scope: !114, file: !5, type: !8)
+!122 = !DILocalVariable(name: "rho_inv", line: 1770, scope: !114, file: !5, type: !20)
+!123 = !DILocalVariable(name: "uijk", line: 1770, scope: !114, file: !5, type: !20)
+!124 = !DILocalVariable(name: "up1", line: 1770, scope: !114, file: !5, type: !20)
+!125 = !DILocalVariable(name: "um1", line: 1770, scope: !114, file: !5, type: !20)
+!126 = !DILocalVariable(name: "vijk", line: 1770, scope: !114, file: !5, type: !20)
+!127 = !DILocalVariable(name: "vp1", line: 1770, scope: !114, file: !5, type: !20)
+!128 = !DILocalVariable(name: "vm1", line: 1770, scope: !114, file: !5, type: !20)
+!129 = !DILocalVariable(name: "wijk", line: 1770, scope: !114, file: !5, type: !20)
+!130 = !DILocalVariable(name: "wp1", line: 1770, scope: !114, file: !5, type: !20)
+!131 = !DILocalVariable(name: "wm1", line: 1770, scope: !114, file: !5, type: !20)
+!132 = distinct !DISubprogram(name: "error_norm", line: 225, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 225, file: !1, scope: !5, type: !103, variables: !133)
!133 = !{!134, !135, !136, !137, !138, !139, !140, !141, !142, !143, !144}
-!134 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "rms", line: 225, arg: 1, scope: !132, file: !5, type: !105)
-!135 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 232, scope: !132, file: !5, type: !8)
-!136 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 232, scope: !132, file: !5, type: !8)
-!137 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 232, scope: !132, file: !5, type: !8)
-!138 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 232, scope: !132, file: !5, type: !8)
-!139 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 232, scope: !132, file: !5, type: !8)
-!140 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xi", line: 233, scope: !132, file: !5, type: !20)
-!141 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "eta", line: 233, scope: !132, file: !5, type: !20)
-!142 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "zeta", line: 233, scope: !132, file: !5, type: !20)
-!143 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "u_exact", line: 233, scope: !132, file: !5, type: !91)
-!144 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "add", line: 233, scope: !132, file: !5, type: !20)
-!145 = !DISubprogram(name: "exact_solution", line: 643, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 644, file: !1, scope: !5, type: !146, variables: !148)
+!134 = !DILocalVariable(name: "rms", line: 225, arg: 1, scope: !132, file: !5, type: !105)
+!135 = !DILocalVariable(name: "i", line: 232, scope: !132, file: !5, type: !8)
+!136 = !DILocalVariable(name: "j", line: 232, scope: !132, file: !5, type: !8)
+!137 = !DILocalVariable(name: "k", line: 232, scope: !132, file: !5, type: !8)
+!138 = !DILocalVariable(name: "m", line: 232, scope: !132, file: !5, type: !8)
+!139 = !DILocalVariable(name: "d", line: 232, scope: !132, file: !5, type: !8)
+!140 = !DILocalVariable(name: "xi", line: 233, scope: !132, file: !5, type: !20)
+!141 = !DILocalVariable(name: "eta", line: 233, scope: !132, file: !5, type: !20)
+!142 = !DILocalVariable(name: "zeta", line: 233, scope: !132, file: !5, type: !20)
+!143 = !DILocalVariable(name: "u_exact", line: 233, scope: !132, file: !5, type: !91)
+!144 = !DILocalVariable(name: "add", line: 233, scope: !132, file: !5, type: !20)
+!145 = distinct !DISubprogram(name: "exact_solution", line: 643, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 644, file: !1, scope: !5, type: !146, variables: !148)
!146 = !DISubroutineType(types: !147)
!147 = !{null, !20, !20, !20, !105}
!148 = !{!149, !150, !151, !152, !153}
-!149 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "xi", line: 643, arg: 1, scope: !145, file: !5, type: !20)
-!150 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "eta", line: 643, arg: 2, scope: !145, file: !5, type: !20)
-!151 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "zeta", line: 643, arg: 3, scope: !145, file: !5, type: !20)
-!152 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "dtemp", line: 644, arg: 4, scope: !145, file: !5, type: !105)
-!153 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 653, scope: !145, file: !5, type: !8)
-!154 = !DISubprogram(name: "set_constants", line: 2191, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2191, file: !1, scope: !5, type: !115, variables: !2)
-!155 = !DISubprogram(name: "lhsinit", line: 855, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 855, file: !1, scope: !5, type: !115, variables: !156)
+!149 = !DILocalVariable(name: "xi", line: 643, arg: 1, scope: !145, file: !5, type: !20)
+!150 = !DILocalVariable(name: "eta", line: 643, arg: 2, scope: !145, file: !5, type: !20)
+!151 = !DILocalVariable(name: "zeta", line: 643, arg: 3, scope: !145, file: !5, type: !20)
+!152 = !DILocalVariable(name: "dtemp", line: 644, arg: 4, scope: !145, file: !5, type: !105)
+!153 = !DILocalVariable(name: "m", line: 653, scope: !145, file: !5, type: !8)
+!154 = distinct !DISubprogram(name: "set_constants", line: 2191, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2191, file: !1, scope: !5, type: !115, variables: !2)
+!155 = distinct !DISubprogram(name: "lhsinit", line: 855, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 855, file: !1, scope: !5, type: !115, variables: !156)
!156 = !{!157, !158, !159, !160, !161}
-!157 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 857, scope: !155, file: !5, type: !8)
-!158 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 857, scope: !155, file: !5, type: !8)
-!159 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 857, scope: !155, file: !5, type: !8)
-!160 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 857, scope: !155, file: !5, type: !8)
-!161 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n", line: 857, scope: !155, file: !5, type: !8)
-!162 = !DISubprogram(name: "initialize", line: 669, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 669, file: !1, scope: !5, type: !115, variables: !163)
+!157 = !DILocalVariable(name: "i", line: 857, scope: !155, file: !5, type: !8)
+!158 = !DILocalVariable(name: "j", line: 857, scope: !155, file: !5, type: !8)
+!159 = !DILocalVariable(name: "k", line: 857, scope: !155, file: !5, type: !8)
+!160 = !DILocalVariable(name: "m", line: 857, scope: !155, file: !5, type: !8)
+!161 = !DILocalVariable(name: "n", line: 857, scope: !155, file: !5, type: !8)
+!162 = distinct !DISubprogram(name: "initialize", line: 669, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 669, file: !1, scope: !5, type: !115, variables: !163)
!163 = !{!164, !165, !166, !167, !168, !169, !170, !171, !172, !173, !174, !179, !180, !181, !182}
-!164 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 679, scope: !162, file: !5, type: !8)
-!165 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 679, scope: !162, file: !5, type: !8)
-!166 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 679, scope: !162, file: !5, type: !8)
-!167 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 679, scope: !162, file: !5, type: !8)
-!168 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ix", line: 679, scope: !162, file: !5, type: !8)
-!169 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "iy", line: 679, scope: !162, file: !5, type: !8)
-!170 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "iz", line: 679, scope: !162, file: !5, type: !8)
-!171 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xi", line: 680, scope: !162, file: !5, type: !20)
-!172 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "eta", line: 680, scope: !162, file: !5, type: !20)
-!173 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "zeta", line: 680, scope: !162, file: !5, type: !20)
-!174 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Pface", line: 680, scope: !162, file: !5, type: !175)
+!164 = !DILocalVariable(name: "i", line: 679, scope: !162, file: !5, type: !8)
+!165 = !DILocalVariable(name: "j", line: 679, scope: !162, file: !5, type: !8)
+!166 = !DILocalVariable(name: "k", line: 679, scope: !162, file: !5, type: !8)
+!167 = !DILocalVariable(name: "m", line: 679, scope: !162, file: !5, type: !8)
+!168 = !DILocalVariable(name: "ix", line: 679, scope: !162, file: !5, type: !8)
+!169 = !DILocalVariable(name: "iy", line: 679, scope: !162, file: !5, type: !8)
+!170 = !DILocalVariable(name: "iz", line: 679, scope: !162, file: !5, type: !8)
+!171 = !DILocalVariable(name: "xi", line: 680, scope: !162, file: !5, type: !20)
+!172 = !DILocalVariable(name: "eta", line: 680, scope: !162, file: !5, type: !20)
+!173 = !DILocalVariable(name: "zeta", line: 680, scope: !162, file: !5, type: !20)
+!174 = !DILocalVariable(name: "Pface", line: 680, scope: !162, file: !5, type: !175)
!175 = !DICompositeType(tag: DW_TAG_array_type, size: 1920, align: 64, baseType: !20, elements: !176)
!176 = !{!177, !178, !93}
!177 = !DISubrange(count: 2)
!178 = !DISubrange(count: 3)
-!179 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Pxi", line: 680, scope: !162, file: !5, type: !20)
-!180 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Peta", line: 680, scope: !162, file: !5, type: !20)
-!181 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Pzeta", line: 680, scope: !162, file: !5, type: !20)
-!182 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "temp", line: 680, scope: !162, file: !5, type: !91)
-!183 = !DISubprogram(name: "exact_rhs", line: 301, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 301, file: !1, scope: !5, type: !115, variables: !184)
+!179 = !DILocalVariable(name: "Pxi", line: 680, scope: !162, file: !5, type: !20)
+!180 = !DILocalVariable(name: "Peta", line: 680, scope: !162, file: !5, type: !20)
+!181 = !DILocalVariable(name: "Pzeta", line: 680, scope: !162, file: !5, type: !20)
+!182 = !DILocalVariable(name: "temp", line: 680, scope: !162, file: !5, type: !91)
+!183 = distinct !DISubprogram(name: "exact_rhs", line: 301, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 301, file: !1, scope: !5, type: !115, variables: !184)
!184 = !{!185, !186, !187, !188, !189, !190, !191, !192, !193, !194, !195, !196, !197, !198, !199}
-!185 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dtemp", line: 310, scope: !183, file: !5, type: !91)
-!186 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xi", line: 310, scope: !183, file: !5, type: !20)
-!187 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "eta", line: 310, scope: !183, file: !5, type: !20)
-!188 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "zeta", line: 310, scope: !183, file: !5, type: !20)
-!189 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dtpp", line: 310, scope: !183, file: !5, type: !20)
-!190 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 311, scope: !183, file: !5, type: !8)
-!191 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 311, scope: !183, file: !5, type: !8)
-!192 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 311, scope: !183, file: !5, type: !8)
-!193 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 311, scope: !183, file: !5, type: !8)
-!194 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ip1", line: 311, scope: !183, file: !5, type: !8)
-!195 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "im1", line: 311, scope: !183, file: !5, type: !8)
-!196 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "jp1", line: 311, scope: !183, file: !5, type: !8)
-!197 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "jm1", line: 311, scope: !183, file: !5, type: !8)
-!198 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "km1", line: 311, scope: !183, file: !5, type: !8)
-!199 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "kp1", line: 311, scope: !183, file: !5, type: !8)
-!200 = !DISubprogram(name: "adi", line: 210, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 210, file: !1, scope: !5, type: !115, variables: !2)
-!201 = !DISubprogram(name: "add", line: 187, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 187, file: !1, scope: !5, type: !115, variables: !202)
+!185 = !DILocalVariable(name: "dtemp", line: 310, scope: !183, file: !5, type: !91)
+!186 = !DILocalVariable(name: "xi", line: 310, scope: !183, file: !5, type: !20)
+!187 = !DILocalVariable(name: "eta", line: 310, scope: !183, file: !5, type: !20)
+!188 = !DILocalVariable(name: "zeta", line: 310, scope: !183, file: !5, type: !20)
+!189 = !DILocalVariable(name: "dtpp", line: 310, scope: !183, file: !5, type: !20)
+!190 = !DILocalVariable(name: "m", line: 311, scope: !183, file: !5, type: !8)
+!191 = !DILocalVariable(name: "i", line: 311, scope: !183, file: !5, type: !8)
+!192 = !DILocalVariable(name: "j", line: 311, scope: !183, file: !5, type: !8)
+!193 = !DILocalVariable(name: "k", line: 311, scope: !183, file: !5, type: !8)
+!194 = !DILocalVariable(name: "ip1", line: 311, scope: !183, file: !5, type: !8)
+!195 = !DILocalVariable(name: "im1", line: 311, scope: !183, file: !5, type: !8)
+!196 = !DILocalVariable(name: "jp1", line: 311, scope: !183, file: !5, type: !8)
+!197 = !DILocalVariable(name: "jm1", line: 311, scope: !183, file: !5, type: !8)
+!198 = !DILocalVariable(name: "km1", line: 311, scope: !183, file: !5, type: !8)
+!199 = !DILocalVariable(name: "kp1", line: 311, scope: !183, file: !5, type: !8)
+!200 = distinct !DISubprogram(name: "adi", line: 210, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 210, file: !1, scope: !5, type: !115, variables: !2)
+!201 = distinct !DISubprogram(name: "add", line: 187, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 187, file: !1, scope: !5, type: !115, variables: !202)
!202 = !{!203, !204, !205, !206}
-!203 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 193, scope: !201, file: !5, type: !8)
-!204 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 193, scope: !201, file: !5, type: !8)
-!205 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 193, scope: !201, file: !5, type: !8)
-!206 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 193, scope: !201, file: !5, type: !8)
-!207 = !DISubprogram(name: "z_solve", line: 3457, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3457, file: !1, scope: !5, type: !115, variables: !2)
-!208 = !DISubprogram(name: "z_backsubstitute", line: 3480, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3480, file: !1, scope: !5, type: !115, variables: !209)
+!203 = !DILocalVariable(name: "i", line: 193, scope: !201, file: !5, type: !8)
+!204 = !DILocalVariable(name: "j", line: 193, scope: !201, file: !5, type: !8)
+!205 = !DILocalVariable(name: "k", line: 193, scope: !201, file: !5, type: !8)
+!206 = !DILocalVariable(name: "m", line: 193, scope: !201, file: !5, type: !8)
+!207 = distinct !DISubprogram(name: "z_solve", line: 3457, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3457, file: !1, scope: !5, type: !115, variables: !2)
+!208 = distinct !DISubprogram(name: "z_backsubstitute", line: 3480, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3480, file: !1, scope: !5, type: !115, variables: !209)
!209 = !{!210, !211, !212, !213, !214}
-!210 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3492, scope: !208, file: !5, type: !8)
-!211 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 3492, scope: !208, file: !5, type: !8)
-!212 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3492, scope: !208, file: !5, type: !8)
-!213 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 3492, scope: !208, file: !5, type: !8)
-!214 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n", line: 3492, scope: !208, file: !5, type: !8)
-!215 = !DISubprogram(name: "z_solve_cell", line: 3512, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3512, file: !1, scope: !5, type: !115, variables: !216)
+!210 = !DILocalVariable(name: "i", line: 3492, scope: !208, file: !5, type: !8)
+!211 = !DILocalVariable(name: "j", line: 3492, scope: !208, file: !5, type: !8)
+!212 = !DILocalVariable(name: "k", line: 3492, scope: !208, file: !5, type: !8)
+!213 = !DILocalVariable(name: "m", line: 3492, scope: !208, file: !5, type: !8)
+!214 = !DILocalVariable(name: "n", line: 3492, scope: !208, file: !5, type: !8)
+!215 = distinct !DISubprogram(name: "z_solve_cell", line: 3512, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3512, file: !1, scope: !5, type: !115, variables: !216)
!216 = !{!217, !218, !219, !220}
-!217 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3527, scope: !215, file: !5, type: !8)
-!218 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 3527, scope: !215, file: !5, type: !8)
-!219 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3527, scope: !215, file: !5, type: !8)
-!220 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ksize", line: 3527, scope: !215, file: !5, type: !8)
-!221 = !DISubprogram(name: "binvrhs", line: 3154, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3154, file: !1, scope: !5, type: !222, variables: !225)
+!217 = !DILocalVariable(name: "i", line: 3527, scope: !215, file: !5, type: !8)
+!218 = !DILocalVariable(name: "j", line: 3527, scope: !215, file: !5, type: !8)
+!219 = !DILocalVariable(name: "k", line: 3527, scope: !215, file: !5, type: !8)
+!220 = !DILocalVariable(name: "ksize", line: 3527, scope: !215, file: !5, type: !8)
+!221 = distinct !DISubprogram(name: "binvrhs", line: 3154, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3154, file: !1, scope: !5, type: !222, variables: !225)
!222 = !DISubroutineType(types: !223)
!223 = !{null, !224, !105}
!224 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !91)
!225 = !{!226, !227, !228, !229}
-!226 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "lhs", line: 3154, arg: 1, scope: !221, file: !5, type: !224)
-!227 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "r", line: 3154, arg: 2, scope: !221, file: !5, type: !105)
-!228 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "pivot", line: 3159, scope: !221, file: !5, type: !20)
-!229 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "coeff", line: 3159, scope: !221, file: !5, type: !20)
-!230 = !DISubprogram(name: "matmul_sub", line: 2841, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2842, file: !1, scope: !5, type: !231, variables: !233)
+!226 = !DILocalVariable(name: "lhs", line: 3154, arg: 1, scope: !221, file: !5, type: !224)
+!227 = !DILocalVariable(name: "r", line: 3154, arg: 2, scope: !221, file: !5, type: !105)
+!228 = !DILocalVariable(name: "pivot", line: 3159, scope: !221, file: !5, type: !20)
+!229 = !DILocalVariable(name: "coeff", line: 3159, scope: !221, file: !5, type: !20)
+!230 = distinct !DISubprogram(name: "matmul_sub", line: 2841, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2842, file: !1, scope: !5, type: !231, variables: !233)
!231 = !DISubroutineType(types: !232)
!232 = !{null, !224, !224, !224}
!233 = !{!234, !235, !236, !237}
-!234 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ablock", line: 2841, arg: 1, scope: !230, file: !5, type: !224)
-!235 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "bblock", line: 2841, arg: 2, scope: !230, file: !5, type: !224)
-!236 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "cblock", line: 2842, arg: 3, scope: !230, file: !5, type: !224)
-!237 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 2851, scope: !230, file: !5, type: !8)
-!238 = !DISubprogram(name: "matvec_sub", line: 2814, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2814, file: !1, scope: !5, type: !239, variables: !241)
+!234 = !DILocalVariable(name: "ablock", line: 2841, arg: 1, scope: !230, file: !5, type: !224)
+!235 = !DILocalVariable(name: "bblock", line: 2841, arg: 2, scope: !230, file: !5, type: !224)
+!236 = !DILocalVariable(name: "cblock", line: 2842, arg: 3, scope: !230, file: !5, type: !224)
+!237 = !DILocalVariable(name: "j", line: 2851, scope: !230, file: !5, type: !8)
+!238 = distinct !DISubprogram(name: "matvec_sub", line: 2814, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2814, file: !1, scope: !5, type: !239, variables: !241)
!239 = !DISubroutineType(types: !240)
!240 = !{null, !224, !105, !105}
!241 = !{!242, !243, !244, !245}
-!242 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ablock", line: 2814, arg: 1, scope: !238, file: !5, type: !224)
-!243 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "avec", line: 2814, arg: 2, scope: !238, file: !5, type: !105)
-!244 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "bvec", line: 2814, arg: 3, scope: !238, file: !5, type: !105)
-!245 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2823, scope: !238, file: !5, type: !8)
-!246 = !DISubprogram(name: "binvcrhs", line: 2885, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2885, file: !1, scope: !5, type: !247, variables: !249)
+!242 = !DILocalVariable(name: "ablock", line: 2814, arg: 1, scope: !238, file: !5, type: !224)
+!243 = !DILocalVariable(name: "avec", line: 2814, arg: 2, scope: !238, file: !5, type: !105)
+!244 = !DILocalVariable(name: "bvec", line: 2814, arg: 3, scope: !238, file: !5, type: !105)
+!245 = !DILocalVariable(name: "i", line: 2823, scope: !238, file: !5, type: !8)
+!246 = distinct !DISubprogram(name: "binvcrhs", line: 2885, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2885, file: !1, scope: !5, type: !247, variables: !249)
!247 = !DISubroutineType(types: !248)
!248 = !{null, !224, !224, !105}
!249 = !{!250, !251, !252, !253, !254}
-!250 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "lhs", line: 2885, arg: 1, scope: !246, file: !5, type: !224)
-!251 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 2885, arg: 2, scope: !246, file: !5, type: !224)
-!252 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "r", line: 2885, arg: 3, scope: !246, file: !5, type: !105)
-!253 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "pivot", line: 2890, scope: !246, file: !5, type: !20)
-!254 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "coeff", line: 2890, scope: !246, file: !5, type: !20)
-!255 = !DISubprogram(name: "lhsz", line: 1475, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1475, file: !1, scope: !5, type: !115, variables: !256)
+!250 = !DILocalVariable(name: "lhs", line: 2885, arg: 1, scope: !246, file: !5, type: !224)
+!251 = !DILocalVariable(name: "c", line: 2885, arg: 2, scope: !246, file: !5, type: !224)
+!252 = !DILocalVariable(name: "r", line: 2885, arg: 3, scope: !246, file: !5, type: !105)
+!253 = !DILocalVariable(name: "pivot", line: 2890, scope: !246, file: !5, type: !20)
+!254 = !DILocalVariable(name: "coeff", line: 2890, scope: !246, file: !5, type: !20)
+!255 = distinct !DISubprogram(name: "lhsz", line: 1475, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1475, file: !1, scope: !5, type: !115, variables: !256)
!256 = !{!257, !258, !259}
-!257 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 1484, scope: !255, file: !5, type: !8)
-!258 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 1484, scope: !255, file: !5, type: !8)
-!259 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 1484, scope: !255, file: !5, type: !8)
-!260 = !DISubprogram(name: "y_solve", line: 3299, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3299, file: !1, scope: !5, type: !115, variables: !2)
-!261 = !DISubprogram(name: "y_backsubstitute", line: 3323, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3323, file: !1, scope: !5, type: !115, variables: !262)
+!257 = !DILocalVariable(name: "i", line: 1484, scope: !255, file: !5, type: !8)
+!258 = !DILocalVariable(name: "j", line: 1484, scope: !255, file: !5, type: !8)
+!259 = !DILocalVariable(name: "k", line: 1484, scope: !255, file: !5, type: !8)
+!260 = distinct !DISubprogram(name: "y_solve", line: 3299, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3299, file: !1, scope: !5, type: !115, variables: !2)
+!261 = distinct !DISubprogram(name: "y_backsubstitute", line: 3323, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3323, file: !1, scope: !5, type: !115, variables: !262)
!262 = !{!263, !264, !265, !266, !267}
-!263 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3335, scope: !261, file: !5, type: !8)
-!264 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 3335, scope: !261, file: !5, type: !8)
-!265 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3335, scope: !261, file: !5, type: !8)
-!266 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 3335, scope: !261, file: !5, type: !8)
-!267 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n", line: 3335, scope: !261, file: !5, type: !8)
-!268 = !DISubprogram(name: "y_solve_cell", line: 3355, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3355, file: !1, scope: !5, type: !115, variables: !269)
+!263 = !DILocalVariable(name: "i", line: 3335, scope: !261, file: !5, type: !8)
+!264 = !DILocalVariable(name: "j", line: 3335, scope: !261, file: !5, type: !8)
+!265 = !DILocalVariable(name: "k", line: 3335, scope: !261, file: !5, type: !8)
+!266 = !DILocalVariable(name: "m", line: 3335, scope: !261, file: !5, type: !8)
+!267 = !DILocalVariable(name: "n", line: 3335, scope: !261, file: !5, type: !8)
+!268 = distinct !DISubprogram(name: "y_solve_cell", line: 3355, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3355, file: !1, scope: !5, type: !115, variables: !269)
!269 = !{!270, !271, !272, !273}
-!270 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3370, scope: !268, file: !5, type: !8)
-!271 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 3370, scope: !268, file: !5, type: !8)
-!272 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3370, scope: !268, file: !5, type: !8)
-!273 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "jsize", line: 3370, scope: !268, file: !5, type: !8)
-!274 = !DISubprogram(name: "lhsy", line: 1181, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1181, file: !1, scope: !5, type: !115, variables: !275)
+!270 = !DILocalVariable(name: "i", line: 3370, scope: !268, file: !5, type: !8)
+!271 = !DILocalVariable(name: "j", line: 3370, scope: !268, file: !5, type: !8)
+!272 = !DILocalVariable(name: "k", line: 3370, scope: !268, file: !5, type: !8)
+!273 = !DILocalVariable(name: "jsize", line: 3370, scope: !268, file: !5, type: !8)
+!274 = distinct !DISubprogram(name: "lhsy", line: 1181, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1181, file: !1, scope: !5, type: !115, variables: !275)
!275 = !{!276, !277, !278}
-!276 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 1190, scope: !274, file: !5, type: !8)
-!277 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 1190, scope: !274, file: !5, type: !8)
-!278 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 1190, scope: !274, file: !5, type: !8)
-!279 = !DISubprogram(name: "x_solve", line: 2658, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2658, file: !1, scope: !5, type: !115, variables: !2)
-!280 = !DISubprogram(name: "x_backsubstitute", line: 2684, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2684, file: !1, scope: !5, type: !115, variables: !281)
+!276 = !DILocalVariable(name: "i", line: 1190, scope: !274, file: !5, type: !8)
+!277 = !DILocalVariable(name: "j", line: 1190, scope: !274, file: !5, type: !8)
+!278 = !DILocalVariable(name: "k", line: 1190, scope: !274, file: !5, type: !8)
+!279 = distinct !DISubprogram(name: "x_solve", line: 2658, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2658, file: !1, scope: !5, type: !115, variables: !2)
+!280 = distinct !DISubprogram(name: "x_backsubstitute", line: 2684, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2684, file: !1, scope: !5, type: !115, variables: !281)
!281 = !{!282, !283, !284, !285, !286}
-!282 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2696, scope: !280, file: !5, type: !8)
-!283 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 2696, scope: !280, file: !5, type: !8)
-!284 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 2696, scope: !280, file: !5, type: !8)
-!285 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 2696, scope: !280, file: !5, type: !8)
-!286 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n", line: 2696, scope: !280, file: !5, type: !8)
-!287 = !DISubprogram(name: "x_solve_cell", line: 2716, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2716, file: !1, scope: !5, type: !115, variables: !288)
+!282 = !DILocalVariable(name: "i", line: 2696, scope: !280, file: !5, type: !8)
+!283 = !DILocalVariable(name: "j", line: 2696, scope: !280, file: !5, type: !8)
+!284 = !DILocalVariable(name: "k", line: 2696, scope: !280, file: !5, type: !8)
+!285 = !DILocalVariable(name: "m", line: 2696, scope: !280, file: !5, type: !8)
+!286 = !DILocalVariable(name: "n", line: 2696, scope: !280, file: !5, type: !8)
+!287 = distinct !DISubprogram(name: "x_solve_cell", line: 2716, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2716, file: !1, scope: !5, type: !115, variables: !288)
!288 = !{!289, !290, !291, !292}
-!289 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2728, scope: !287, file: !5, type: !8)
-!290 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 2728, scope: !287, file: !5, type: !8)
-!291 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 2728, scope: !287, file: !5, type: !8)
-!292 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "isize", line: 2728, scope: !287, file: !5, type: !8)
-!293 = !DISubprogram(name: "lhsx", line: 898, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 898, file: !1, scope: !5, type: !115, variables: !294)
+!289 = !DILocalVariable(name: "i", line: 2728, scope: !287, file: !5, type: !8)
+!290 = !DILocalVariable(name: "j", line: 2728, scope: !287, file: !5, type: !8)
+!291 = !DILocalVariable(name: "k", line: 2728, scope: !287, file: !5, type: !8)
+!292 = !DILocalVariable(name: "isize", line: 2728, scope: !287, file: !5, type: !8)
+!293 = distinct !DISubprogram(name: "lhsx", line: 898, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 898, file: !1, scope: !5, type: !115, variables: !294)
!294 = !{!295, !296, !297}
-!295 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 907, scope: !293, file: !5, type: !8)
-!296 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 907, scope: !293, file: !5, type: !8)
-!297 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 907, scope: !293, file: !5, type: !8)
+!295 = !DILocalVariable(name: "i", line: 907, scope: !293, file: !5, type: !8)
+!296 = !DILocalVariable(name: "j", line: 907, scope: !293, file: !5, type: !8)
+!297 = !DILocalVariable(name: "k", line: 907, scope: !293, file: !5, type: !8)
!298 = !{!299, !304, !305, !309, !310, !311, !312, !313, !314, !315, !316, !317, !318, !319, !320, !321, !322, !323, !324, !325, !326, !327, !328, !329, !330, !331, !332, !333, !334, !335, !336, !337, !338, !339, !340, !341, !342, !343, !347, !350, !351, !352, !353, !354, !355, !356, !360, !361, !362, !363, !364, !365, !366, !367, !368, !369, !370, !371, !372, !373, !374, !375, !376, !377, !378, !379, !380, !381, !382, !383, !384, !385, !386, !387, !388, !389, !390, !391, !392, !393, !394, !395, !396, !397, !398, !399, !400, !401, !402, !403, !404, !405, !406, !407, !408, !409, !410, !411, !412, !413, !414, !415, !416, !417, !418, !419, !422, !426, !427, !430, !431, !434, !435, !436, !437}
!299 = !DIGlobalVariable(name: "grid_points", line: 28, isLocal: true, isDefinition: true, scope: null, file: !300, type: !302, variable: [3 x i32]* @grid_points)
!300 = !DIFile(filename: "./header.h", directory: "/home/hfinkel/src/NPB2.3-omp-C/BT")
diff --git a/test/CodeGen/PowerPC/pr24546.ll b/test/CodeGen/PowerPC/pr24546.ll
index 3bb638af2343..06f6bc93da99 100644
--- a/test/CodeGen/PowerPC/pr24546.ll
+++ b/test/CodeGen/PowerPC/pr24546.ll
@@ -6,7 +6,7 @@
@php_intpow10.powers = external unnamed_addr constant [23 x double], align 8
; Function Attrs: nounwind
-define double @_php_math_round(double %value, i32 signext %places, i32 signext %mode) #0 {
+define double @_php_math_round(double %value, i32 signext %places, i32 signext %mode) #0 !dbg !6 {
entry:
br i1 undef, label %if.then, label %if.else, !dbg !32
@@ -62,23 +62,23 @@ attributes #3 = { nounwind }
!3 = !{!4}
!4 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float)
!5 = !{!6, !18}
-!6 = !DISubprogram(name: "_php_math_round", scope: !1, file: !1, line: 15, type: !7, isLocal: false, isDefinition: true, scopeLine: 16, flags: DIFlagPrototyped, isOptimized: true, function: double (double, i32, i32)* @_php_math_round, variables: !10)
+!6 = distinct !DISubprogram(name: "_php_math_round", scope: !1, file: !1, line: 15, type: !7, isLocal: false, isDefinition: true, scopeLine: 16, flags: DIFlagPrototyped, isOptimized: true, variables: !10)
!7 = !DISubroutineType(types: !8)
!8 = !{!4, !4, !9, !9}
!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!10 = !{!11, !12, !13, !14, !15, !16, !17}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", arg: 1, scope: !6, file: !1, line: 15, type: !4)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "places", arg: 2, scope: !6, file: !1, line: 15, type: !9)
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "mode", arg: 3, scope: !6, file: !1, line: 15, type: !9)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f1", scope: !6, file: !1, line: 17, type: !4)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f2", scope: !6, file: !1, line: 17, type: !4)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tmp_value", scope: !6, file: !1, line: 18, type: !4)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "precision_places", scope: !6, file: !1, line: 19, type: !9)
-!18 = !DISubprogram(name: "php_intpow10", scope: !1, file: !1, line: 1, type: !19, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !21)
+!11 = !DILocalVariable(name: "value", arg: 1, scope: !6, file: !1, line: 15, type: !4)
+!12 = !DILocalVariable(name: "places", arg: 2, scope: !6, file: !1, line: 15, type: !9)
+!13 = !DILocalVariable(name: "mode", arg: 3, scope: !6, file: !1, line: 15, type: !9)
+!14 = !DILocalVariable(name: "f1", scope: !6, file: !1, line: 17, type: !4)
+!15 = !DILocalVariable(name: "f2", scope: !6, file: !1, line: 17, type: !4)
+!16 = !DILocalVariable(name: "tmp_value", scope: !6, file: !1, line: 18, type: !4)
+!17 = !DILocalVariable(name: "precision_places", scope: !6, file: !1, line: 19, type: !9)
+!18 = distinct !DISubprogram(name: "php_intpow10", scope: !1, file: !1, line: 1, type: !19, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !21)
!19 = !DISubroutineType(types: !20)
!20 = !{!4, !9}
!21 = !{!22}
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "power", arg: 1, scope: !18, file: !1, line: 1, type: !9)
+!22 = !DILocalVariable(name: "power", arg: 1, scope: !18, file: !1, line: 1, type: !9)
!23 = !{!24}
!24 = !DIGlobalVariable(name: "powers", scope: !18, file: !1, line: 3, type: !25, isLocal: true, isDefinition: true, variable: [23 x double]* @php_intpow10.powers)
!25 = !DICompositeType(tag: DW_TAG_array_type, baseType: !26, size: 1472, align: 64, elements: !27)
diff --git a/test/CodeGen/PowerPC/pr24636.ll b/test/CodeGen/PowerPC/pr24636.ll
new file mode 100644
index 000000000000..cc51dd38f9e2
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr24636.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+@c = external global i32, align 4
+@b = external global [1 x i32], align 4
+
+; Function Attrs: nounwind
+define void @fn2() #0 align 4 {
+ br i1 undef, label %.lr.ph, label %4
+
+; We used to crash because a bad DAGCombine was creating i32-typed SETCC nodes,
+; even when crbits are enabled.
+; CHECK-LABEL: @fn2
+; CHECK: blr
+
+.lr.ph: ; preds = %0
+ br i1 undef, label %.lr.ph.split, label %.preheader
+
+.preheader: ; preds = %.preheader, %.lr.ph
+ br i1 undef, label %.lr.ph.split, label %.preheader
+
+.lr.ph.split: ; preds = %.preheader, %.lr.ph
+ br i1 undef, label %._crit_edge, label %.lr.ph.split.split
+
+.lr.ph.split.split: ; preds = %.lr.ph.split.split, %.lr.ph.split
+ %1 = phi i32 [ %2, %.lr.ph.split.split ], [ undef, %.lr.ph.split ]
+ %2 = and i32 %1, and (i32 and (i32 and (i32 and (i32 and (i32 and (i32 and (i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32)), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32)), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32)), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32)), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32)), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32)), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32))
+ %3 = icmp slt i32 undef, 4
+ br i1 %3, label %.lr.ph.split.split, label %._crit_edge
+
+._crit_edge: ; preds = %.lr.ph.split.split, %.lr.ph.split
+ %.lcssa = phi i32 [ undef, %.lr.ph.split ], [ %2, %.lr.ph.split.split ]
+ br label %4
+
+; <label>:4 ; preds = %._crit_edge, %0
+ ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="ppc64le" }
+
diff --git a/test/CodeGen/PowerPC/pr25157-peephole.ll b/test/CodeGen/PowerPC/pr25157-peephole.ll
new file mode 100644
index 000000000000..c5bd49b492cc
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr25157-peephole.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; Verify peephole simplification of splats and swaps. Bugpoint-reduced
+; test from Eric Schweitz.
+
+%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625 = type <{ [28 x i8] }>
+%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626 = type <{ [64 x i8] }>
+
+@.BSS38 = external global %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, align 32
+@_main1_2_ = external global %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, section ".comm", align 16
+
+define void @aercalc_() {
+L.entry:
+ br i1 undef, label %L.LB38_2426, label %L.LB38_2911
+
+L.LB38_2911:
+ br i1 undef, label %L.LB38_2140, label %L.LB38_2640
+
+L.LB38_2640:
+ unreachable
+
+L.LB38_2426:
+ br i1 undef, label %L.LB38_2438, label %L.LB38_2920
+
+L.LB38_2920:
+ br i1 undef, label %L.LB38_2438, label %L.LB38_2921
+
+L.LB38_2921:
+ br label %L.LB38_2140
+
+L.LB38_2140:
+ ret void
+
+L.LB38_2438:
+ br i1 undef, label %L.LB38_2451, label %L.LB38_2935
+
+L.LB38_2935:
+ br i1 undef, label %L.LB38_2451, label %L.LB38_2936
+
+L.LB38_2936:
+ unreachable
+
+L.LB38_2451:
+ br i1 undef, label %L.LB38_2452, label %L.LB38_2937
+
+L.LB38_2937:
+ unreachable
+
+L.LB38_2452:
+ %0 = load float, float* bitcast (i8* getelementptr inbounds (%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625* @.BSS38, i64 0, i32 0, i64 16) to float*), align 16
+ %1 = fpext float %0 to double
+ %2 = insertelement <2 x double> undef, double %1, i32 1
+ store <2 x double> %2, <2 x double>* bitcast (i8* getelementptr inbounds (%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626* @_main1_2_, i64 0, i32 0, i64 32) to <2 x double>*), align 16
+ unreachable
+}
+
+; CHECK-LABEL: @aercalc_
+; CHECK: lxsspx
+; CHECK: xxspltd
+; CHECK: stxvd2x
+; CHECK-NOT: xxswapd
diff --git a/test/CodeGen/PowerPC/preincprep-nontrans-crash.ll b/test/CodeGen/PowerPC/preincprep-nontrans-crash.ll
new file mode 100644
index 000000000000..cfec302d4690
--- /dev/null
+++ b/test/CodeGen/PowerPC/preincprep-nontrans-crash.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64-i64:64-n32:64"
+target triple = "powerpc64le-linux"
+
+%struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144 = type <{ [220 x i8] }>
+
+@.BSS1 = external unnamed_addr global %struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144, align 32
+
+; Function Attrs: noinline nounwind
+define void @ety2_() #0 {
+
+; This test case used to crash because the preinc prep pass would assume that
+; if X-Y could be simplified to a constant, than so could Y-X. While not
+; desirable, we cannot actually make this guarantee.
+; CHECK-LABEL: @ety2_
+
+L.entry:
+ %0 = load i32, i32* undef, align 4
+ %1 = sext i32 %0 to i64
+ %2 = shl nsw i64 %1, 3
+ %3 = add nsw i64 %2, 8
+ br label %L.LB1_425
+
+L.LB1_425: ; preds = %L.LB1_427, %L.entry
+ %4 = phi i64 [ %21, %L.LB1_427 ], [ undef, %L.entry ]
+ br i1 undef, label %L.LB1_427, label %L.LB1_816
+
+L.LB1_816: ; preds = %L.LB1_425
+ switch i32 undef, label %L.LB1_432 [
+ i32 30, label %L.LB1_805
+ i32 10, label %L.LB1_451
+ i32 20, label %L.LB1_451
+ ]
+
+L.LB1_451: ; preds = %L.LB1_816, %L.LB1_816
+ unreachable
+
+L.LB1_432: ; preds = %L.LB1_816
+ %.in.31 = lshr i64 %4, 32
+ %5 = trunc i64 %.in.31 to i32
+ br i1 undef, label %L.LB1_769, label %L.LB1_455
+
+L.LB1_455: ; preds = %L.LB1_432
+ unreachable
+
+L.LB1_769: ; preds = %L.LB1_432
+ %6 = sext i32 %5 to i64
+ %7 = add nsw i64 %6, 2
+ %8 = add nsw i64 %6, -1
+ %9 = mul i64 %8, %1
+ %10 = add i64 %9, %7
+ %11 = shl i64 %10, 3
+ %12 = getelementptr i8, i8* undef, i64 %11
+ %13 = mul nsw i64 %6, %1
+ %14 = add i64 %7, %13
+ %15 = shl i64 %14, 3
+ %16 = getelementptr i8, i8* undef, i64 %15
+ br i1 undef, label %L.LB1_662, label %L.LB1_662.prol
+
+L.LB1_662.prol: ; preds = %L.LB1_662.prol, %L.LB1_769
+ %indvars.iv.next20.prol = add nuw nsw i64 undef, 1
+ br i1 undef, label %L.LB1_662, label %L.LB1_662.prol
+
+L.LB1_662: ; preds = %L.LB1_437.2, %L.LB1_662.prol, %L.LB1_769
+ %indvars.iv19 = phi i64 [ %indvars.iv.next20.3, %L.LB1_437.2 ], [ 0, %L.LB1_769 ], [ %indvars.iv.next20.prol, %L.LB1_662.prol ]
+ %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
+ %17 = mul i64 %indvars.iv.next20, %3
+ %18 = getelementptr i8, i8* %16, i64 %17
+ %19 = bitcast i8* %18 to double*
+ store double 0.000000e+00, double* %19, align 8
+ %indvars.iv.next20.1 = add nsw i64 %indvars.iv19, 2
+ %20 = mul i64 %indvars.iv.next20.1, %3
+ br i1 undef, label %L.LB1_437.2, label %L.LB1_824.2
+
+L.LB1_427: ; preds = %L.LB1_425
+ %21 = load i64, i64* bitcast (i8* getelementptr inbounds (%struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144, %struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144* @.BSS1, i64 0, i32 0, i64 8) to i64*), align 8
+ br label %L.LB1_425
+
+L.LB1_805: ; preds = %L.LB1_816
+ ret void
+
+L.LB1_824.2: ; preds = %L.LB1_662
+ %22 = getelementptr i8, i8* %12, i64 %20
+ %23 = bitcast i8* %22 to double*
+ store double 0.000000e+00, double* %23, align 8
+ br label %L.LB1_437.2
+
+L.LB1_437.2: ; preds = %L.LB1_824.2, %L.LB1_662
+ %indvars.iv.next20.3 = add nsw i64 %indvars.iv19, 4
+ br label %L.LB1_662
+}
+
+attributes #0 = { noinline nounwind }
+
diff --git a/test/CodeGen/PowerPC/qpx-unal-cons-lds.ll b/test/CodeGen/PowerPC/qpx-unal-cons-lds.ll
new file mode 100644
index 000000000000..606c0551a56c
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-unal-cons-lds.ll
@@ -0,0 +1,217 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+entry:
+ br label %vector.body
+
+; CHECK-LABEL: @foo
+; Make sure that the offset constants we use are all even (only the last should be odd).
+; CHECK-DAG: li {{[0-9]+}}, 1056
+; CHECK-DAG: li {{[0-9]+}}, 1088
+; CHECK-DAG: li {{[0-9]+}}, 1152
+; CHECK-DAG: li {{[0-9]+}}, 1216
+; CHECK-DAG: li {{[0-9]+}}, 1280
+; CHECK-DAG: li {{[0-9]+}}, 1344
+; CHECK-DAG: li {{[0-9]+}}, 1408
+; CHECK-DAG: li {{[0-9]+}}, 1472
+; CHECK-DAG: li {{[0-9]+}}, 1536
+; CHECK-DAG: li {{[0-9]+}}, 1600
+; CHECK-DAG: li {{[0-9]+}}, 1568
+; CHECK-DAG: li {{[0-9]+}}, 1664
+; CHECK-DAG: li {{[0-9]+}}, 1632
+; CHECK-DAG: li {{[0-9]+}}, 1728
+; CHECK-DAG: li {{[0-9]+}}, 1696
+; CHECK-DAG: li {{[0-9]+}}, 1792
+; CHECK-DAG: li {{[0-9]+}}, 1760
+; CHECK-DAG: li {{[0-9]+}}, 1856
+; CHECK-DAG: li {{[0-9]+}}, 1824
+; CHECK-DAG: li {{[0-9]+}}, 1920
+; CHECK-DAG: li {{[0-9]+}}, 1888
+; CHECK-DAG: li {{[0-9]+}}, 1984
+; CHECK-DAG: li {{[0-9]+}}, 1952
+; CHECK-DAG: li {{[0-9]+}}, 2016
+; CHECK-DAG: li {{[0-9]+}}, 1024
+; CHECK-DAG: li {{[0-9]+}}, 1120
+; CHECK-DAG: li {{[0-9]+}}, 1184
+; CHECK-DAG: li {{[0-9]+}}, 1248
+; CHECK-DAG: li {{[0-9]+}}, 1312
+; CHECK-DAG: li {{[0-9]+}}, 1376
+; CHECK-DAG: li {{[0-9]+}}, 1440
+; CHECK-DAG: li {{[0-9]+}}, 1504
+; CHECK-DAG: li {{[0-9]+}}, 2047
+; CHECK: blr
+
+vector.body: ; preds = %vector.body, %entry
+ %index = phi i64 [ 0, %entry ], [ %index.next.15, %vector.body ]
+ %0 = shl i64 %index, 1
+ %1 = getelementptr inbounds double, double* %b, i64 %0
+ %2 = bitcast double* %1 to <8 x double>*
+ %wide.vec = load <8 x double>, <8 x double>* %2, align 8
+ %strided.vec = shufflevector <8 x double> %wide.vec, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %3 = fadd <4 x double> %strided.vec, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %4 = getelementptr inbounds double, double* %a, i64 %index
+ %5 = bitcast double* %4 to <4 x double>*
+ store <4 x double> %3, <4 x double>* %5, align 8
+ %index.next = or i64 %index, 4
+ %6 = shl i64 %index.next, 1
+ %7 = getelementptr inbounds double, double* %b, i64 %6
+ %8 = bitcast double* %7 to <8 x double>*
+ %wide.vec.1 = load <8 x double>, <8 x double>* %8, align 8
+ %strided.vec.1 = shufflevector <8 x double> %wide.vec.1, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %9 = fadd <4 x double> %strided.vec.1, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %10 = getelementptr inbounds double, double* %a, i64 %index.next
+ %11 = bitcast double* %10 to <4 x double>*
+ store <4 x double> %9, <4 x double>* %11, align 8
+ %index.next.1 = or i64 %index, 8
+ %12 = shl i64 %index.next.1, 1
+ %13 = getelementptr inbounds double, double* %b, i64 %12
+ %14 = bitcast double* %13 to <8 x double>*
+ %wide.vec.2 = load <8 x double>, <8 x double>* %14, align 8
+ %strided.vec.2 = shufflevector <8 x double> %wide.vec.2, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %15 = fadd <4 x double> %strided.vec.2, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %16 = getelementptr inbounds double, double* %a, i64 %index.next.1
+ %17 = bitcast double* %16 to <4 x double>*
+ store <4 x double> %15, <4 x double>* %17, align 8
+ %index.next.2 = or i64 %index, 12
+ %18 = shl i64 %index.next.2, 1
+ %19 = getelementptr inbounds double, double* %b, i64 %18
+ %20 = bitcast double* %19 to <8 x double>*
+ %wide.vec.3 = load <8 x double>, <8 x double>* %20, align 8
+ %strided.vec.3 = shufflevector <8 x double> %wide.vec.3, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %21 = fadd <4 x double> %strided.vec.3, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %22 = getelementptr inbounds double, double* %a, i64 %index.next.2
+ %23 = bitcast double* %22 to <4 x double>*
+ store <4 x double> %21, <4 x double>* %23, align 8
+ %index.next.3 = or i64 %index, 16
+ %24 = shl i64 %index.next.3, 1
+ %25 = getelementptr inbounds double, double* %b, i64 %24
+ %26 = bitcast double* %25 to <8 x double>*
+ %wide.vec.4 = load <8 x double>, <8 x double>* %26, align 8
+ %strided.vec.4 = shufflevector <8 x double> %wide.vec.4, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %27 = fadd <4 x double> %strided.vec.4, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %28 = getelementptr inbounds double, double* %a, i64 %index.next.3
+ %29 = bitcast double* %28 to <4 x double>*
+ store <4 x double> %27, <4 x double>* %29, align 8
+ %index.next.4 = or i64 %index, 20
+ %30 = shl i64 %index.next.4, 1
+ %31 = getelementptr inbounds double, double* %b, i64 %30
+ %32 = bitcast double* %31 to <8 x double>*
+ %wide.vec.5 = load <8 x double>, <8 x double>* %32, align 8
+ %strided.vec.5 = shufflevector <8 x double> %wide.vec.5, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %33 = fadd <4 x double> %strided.vec.5, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %34 = getelementptr inbounds double, double* %a, i64 %index.next.4
+ %35 = bitcast double* %34 to <4 x double>*
+ store <4 x double> %33, <4 x double>* %35, align 8
+ %index.next.5 = or i64 %index, 24
+ %36 = shl i64 %index.next.5, 1
+ %37 = getelementptr inbounds double, double* %b, i64 %36
+ %38 = bitcast double* %37 to <8 x double>*
+ %wide.vec.6 = load <8 x double>, <8 x double>* %38, align 8
+ %strided.vec.6 = shufflevector <8 x double> %wide.vec.6, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %39 = fadd <4 x double> %strided.vec.6, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %40 = getelementptr inbounds double, double* %a, i64 %index.next.5
+ %41 = bitcast double* %40 to <4 x double>*
+ store <4 x double> %39, <4 x double>* %41, align 8
+ %index.next.6 = or i64 %index, 28
+ %42 = shl i64 %index.next.6, 1
+ %43 = getelementptr inbounds double, double* %b, i64 %42
+ %44 = bitcast double* %43 to <8 x double>*
+ %wide.vec.7 = load <8 x double>, <8 x double>* %44, align 8
+ %strided.vec.7 = shufflevector <8 x double> %wide.vec.7, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %45 = fadd <4 x double> %strided.vec.7, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %46 = getelementptr inbounds double, double* %a, i64 %index.next.6
+ %47 = bitcast double* %46 to <4 x double>*
+ store <4 x double> %45, <4 x double>* %47, align 8
+ %index.next.7 = or i64 %index, 32
+ %48 = shl i64 %index.next.7, 1
+ %49 = getelementptr inbounds double, double* %b, i64 %48
+ %50 = bitcast double* %49 to <8 x double>*
+ %wide.vec.8 = load <8 x double>, <8 x double>* %50, align 8
+ %strided.vec.8 = shufflevector <8 x double> %wide.vec.8, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %51 = fadd <4 x double> %strided.vec.8, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %52 = getelementptr inbounds double, double* %a, i64 %index.next.7
+ %53 = bitcast double* %52 to <4 x double>*
+ store <4 x double> %51, <4 x double>* %53, align 8
+ %index.next.8 = or i64 %index, 36
+ %54 = shl i64 %index.next.8, 1
+ %55 = getelementptr inbounds double, double* %b, i64 %54
+ %56 = bitcast double* %55 to <8 x double>*
+ %wide.vec.9 = load <8 x double>, <8 x double>* %56, align 8
+ %strided.vec.9 = shufflevector <8 x double> %wide.vec.9, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %57 = fadd <4 x double> %strided.vec.9, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %58 = getelementptr inbounds double, double* %a, i64 %index.next.8
+ %59 = bitcast double* %58 to <4 x double>*
+ store <4 x double> %57, <4 x double>* %59, align 8
+ %index.next.9 = or i64 %index, 40
+ %60 = shl i64 %index.next.9, 1
+ %61 = getelementptr inbounds double, double* %b, i64 %60
+ %62 = bitcast double* %61 to <8 x double>*
+ %wide.vec.10 = load <8 x double>, <8 x double>* %62, align 8
+ %strided.vec.10 = shufflevector <8 x double> %wide.vec.10, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %63 = fadd <4 x double> %strided.vec.10, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %64 = getelementptr inbounds double, double* %a, i64 %index.next.9
+ %65 = bitcast double* %64 to <4 x double>*
+ store <4 x double> %63, <4 x double>* %65, align 8
+ %index.next.10 = or i64 %index, 44
+ %66 = shl i64 %index.next.10, 1
+ %67 = getelementptr inbounds double, double* %b, i64 %66
+ %68 = bitcast double* %67 to <8 x double>*
+ %wide.vec.11 = load <8 x double>, <8 x double>* %68, align 8
+ %strided.vec.11 = shufflevector <8 x double> %wide.vec.11, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %69 = fadd <4 x double> %strided.vec.11, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %70 = getelementptr inbounds double, double* %a, i64 %index.next.10
+ %71 = bitcast double* %70 to <4 x double>*
+ store <4 x double> %69, <4 x double>* %71, align 8
+ %index.next.11 = or i64 %index, 48
+ %72 = shl i64 %index.next.11, 1
+ %73 = getelementptr inbounds double, double* %b, i64 %72
+ %74 = bitcast double* %73 to <8 x double>*
+ %wide.vec.12 = load <8 x double>, <8 x double>* %74, align 8
+ %strided.vec.12 = shufflevector <8 x double> %wide.vec.12, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %75 = fadd <4 x double> %strided.vec.12, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %76 = getelementptr inbounds double, double* %a, i64 %index.next.11
+ %77 = bitcast double* %76 to <4 x double>*
+ store <4 x double> %75, <4 x double>* %77, align 8
+ %index.next.12 = or i64 %index, 52
+ %78 = shl i64 %index.next.12, 1
+ %79 = getelementptr inbounds double, double* %b, i64 %78
+ %80 = bitcast double* %79 to <8 x double>*
+ %wide.vec.13 = load <8 x double>, <8 x double>* %80, align 8
+ %strided.vec.13 = shufflevector <8 x double> %wide.vec.13, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %81 = fadd <4 x double> %strided.vec.13, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %82 = getelementptr inbounds double, double* %a, i64 %index.next.12
+ %83 = bitcast double* %82 to <4 x double>*
+ store <4 x double> %81, <4 x double>* %83, align 8
+ %index.next.13 = or i64 %index, 56
+ %84 = shl i64 %index.next.13, 1
+ %85 = getelementptr inbounds double, double* %b, i64 %84
+ %86 = bitcast double* %85 to <8 x double>*
+ %wide.vec.14 = load <8 x double>, <8 x double>* %86, align 8
+ %strided.vec.14 = shufflevector <8 x double> %wide.vec.14, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %87 = fadd <4 x double> %strided.vec.14, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %88 = getelementptr inbounds double, double* %a, i64 %index.next.13
+ %89 = bitcast double* %88 to <4 x double>*
+ store <4 x double> %87, <4 x double>* %89, align 8
+ %index.next.14 = or i64 %index, 60
+ %90 = shl i64 %index.next.14, 1
+ %91 = getelementptr inbounds double, double* %b, i64 %90
+ %92 = bitcast double* %91 to <8 x double>*
+ %wide.vec.15 = load <8 x double>, <8 x double>* %92, align 8
+ %strided.vec.15 = shufflevector <8 x double> %wide.vec.15, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %93 = fadd <4 x double> %strided.vec.15, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %94 = getelementptr inbounds double, double* %a, i64 %index.next.14
+ %95 = bitcast double* %94 to <4 x double>*
+ store <4 x double> %93, <4 x double>* %95, align 8
+ %index.next.15 = add nsw i64 %index, 64
+ %96 = icmp eq i64 %index.next.15, 1600
+ br i1 %96, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup: ; preds = %vector.body
+ ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+
diff --git a/test/CodeGen/PowerPC/retaddr2.ll b/test/CodeGen/PowerPC/retaddr2.ll
index 8581f6cb9a38..1038cd03d154 100644
--- a/test/CodeGen/PowerPC/retaddr2.ll
+++ b/test/CodeGen/PowerPC/retaddr2.ll
@@ -10,11 +10,11 @@ entry:
}
; CHECK-LABEL: @test1
-; CHECK: mflr 0
+; CHECK: mflr {{[0-9]+}}
; CHECK: std 0, 16(1)
; CHECK-DAG: ld 3, 64(1)
-; CHECK-DAG: ld 0, 16(1)
-; CHECK: mtlr 0
+; CHECK-DAG: ld [[SR:[0-9]+]], 16(1)
+; CHECK: mtlr [[SR]]
; CHECK: blr
; Function Attrs: nounwind readnone
diff --git a/test/CodeGen/PowerPC/rm-zext.ll b/test/CodeGen/PowerPC/rm-zext.ll
index 97c546c0145f..df5fe4f7a154 100644
--- a/test/CodeGen/PowerPC/rm-zext.ll
+++ b/test/CodeGen/PowerPC/rm-zext.ll
@@ -9,7 +9,7 @@ entry:
%shr2 = lshr i32 %mul, 5
ret i32 %shr2
-; CHECK-LABEL @foo
+; CHECK-LABEL: @foo
; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
; CHECK: blr
}
@@ -23,7 +23,7 @@ entry:
%or = or i32 %shr, %shl
ret i32 %or
-; CHECK-LABEL @test6
+; CHECK-LABEL: @test6
; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
; CHECK: blr
}
@@ -34,7 +34,7 @@ entry:
%cond = select i1 %cmp, i32 %a, i32 %b
ret i32 %cond
-; CHECK-LABEL @min
+; CHECK-LABEL: @min
; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
; CHECK: blr
}
diff --git a/test/CodeGen/PowerPC/rotl-rotr-crash.ll b/test/CodeGen/PowerPC/rotl-rotr-crash.ll
new file mode 100644
index 000000000000..3fbb67ecf25e
--- /dev/null
+++ b/test/CodeGen/PowerPC/rotl-rotr-crash.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8
+
+; Ensure this does not crash
+
+; Function Attrs: norecurse nounwind
+define <4 x i32> @func1 (<4 x i32> %a) {
+entry:
+ %0 = lshr <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16>
+ %1 = shl <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16>
+ %2 = or <4 x i32> %1, %0
+ ret <4 x i32> %2
+}
diff --git a/test/CodeGen/PowerPC/sdiv-pow2.ll b/test/CodeGen/PowerPC/sdiv-pow2.ll
index 5ec019dfb4af..d1f60da6c740 100644
--- a/test/CodeGen/PowerPC/sdiv-pow2.ll
+++ b/test/CodeGen/PowerPC/sdiv-pow2.ll
@@ -9,7 +9,7 @@ entry:
%div = sdiv i32 %a, 8
ret i32 %div
-; CHECK-LABEL @foo4
+; CHECK-LABEL: @foo4
; CHECK: srawi [[REG1:[0-9]+]], 3, 3
; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
; CHECK: extsw 3, [[REG2]]
@@ -22,12 +22,12 @@ entry:
%div = sdiv i64 %a, 8
ret i64 %div
-; CHECK-LABEL @foo8
+; CHECK-LABEL: @foo8
; CHECK: sradi [[REG1:[0-9]+]], 3, 3
; CHECK: addze 3, [[REG1]]
; CHECK: blr
-; CHECK-32-LABEL @foo8
+; CHECK-32-LABEL: @foo8
; CHECK-32-NOT: sradi
; CHECK-32: blr
}
@@ -58,7 +58,7 @@ entry:
; CHECK: neg 3, [[REG2]]
; CHECK: blr
-; CHECK-32-LABEL @foo8n
+; CHECK-32-LABEL: @foo8n
; CHECK-32-NOT: sradi
; CHECK-32: blr
}
diff --git a/test/CodeGen/PowerPC/selectiondag-extload-computeknownbits.ll b/test/CodeGen/PowerPC/selectiondag-extload-computeknownbits.ll
new file mode 100644
index 000000000000..79dccaa98ca1
--- /dev/null
+++ b/test/CodeGen/PowerPC/selectiondag-extload-computeknownbits.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=powerpc64-bgq-linux < %s
+
+; Check that llc does not crash due to an illegal APInt operation
+
+define i1 @f(i8* %ptr) {
+ entry:
+ %val = load i8, i8* %ptr, align 8, !range !0
+ %tobool = icmp eq i8 %val, 0
+ ret i1 %tobool
+}
+
+!0 = !{i8 0, i8 2}
diff --git a/test/CodeGen/PowerPC/seteq-0.ll b/test/CodeGen/PowerPC/seteq-0.ll
index 4afb8fee1776..b7dd78085eb1 100644
--- a/test/CodeGen/PowerPC/seteq-0.ll
+++ b/test/CodeGen/PowerPC/seteq-0.ll
@@ -5,7 +5,7 @@ define i32 @eq0(i32 %a) {
%tmp.2 = zext i1 %tmp.1 to i32 ; <i32> [#uses=1]
ret i32 %tmp.2
-; CHECK: cntlz [[REG:r[0-9]+]], r3
+; CHECK: cntlzw [[REG:r[0-9]+]], r3
; CHECK: rlwinm r3, [[REG]], 27, 31, 31
; CHECK: blr
}
diff --git a/test/CodeGen/PowerPC/sjlj.ll b/test/CodeGen/PowerPC/sjlj.ll
index dcbdd69d5d50..8c6682ca706e 100644
--- a/test/CodeGen/PowerPC/sjlj.ll
+++ b/test/CodeGen/PowerPC/sjlj.ll
@@ -74,24 +74,24 @@ return: ; preds = %if.end, %if.then
; CHECK-DAG: std [[REGA]], [[OFF:[0-9]+]](31) # 8-byte Folded Spill
; CHECK-DAG: std 1, 16([[REGA]])
; CHECK-DAG: std 2, 24([[REGA]])
-; CHECK: bcl 20, 31, .LBB1_1
+; CHECK: bcl 20, 31, .LBB1_5
; CHECK: li 3, 1
-; CHECK: #EH_SjLj_Setup .LBB1_1
-; CHECK: b .LBB1_2
+; CHECK: #EH_SjLj_Setup .LBB1_5
+; CHECK: b .LBB1_1
-; CHECK: .LBB1_1:
-; CHECK: mflr [[REGL:[0-9]+]]
-; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31) # 8-byte Folded Reload
-; CHECK: std [[REGL]], 8([[REG2]])
-; CHECK: li 3, 0
-
-; CHECK: .LBB1_2:
+; CHECK: .LBB1_4:
; CHECK: lfd
; CHECK: lvx
; CHECK: ld
; CHECK: blr
+; CHECK: .LBB1_5:
+; CHECK: mflr [[REGL:[0-9]+]]
+; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31) # 8-byte Folded Reload
+; CHECK: std [[REGL]], 8([[REG2]])
+; CHECK: li 3, 0
+
; CHECK-NOAV: @main
; CHECK-NOAV-NOT: stvx
; CHECK-NOAV: bcl
diff --git a/test/CodeGen/PowerPC/stack-realign.ll b/test/CodeGen/PowerPC/stack-realign.ll
index e91b563af72e..1c93d665c16d 100644
--- a/test/CodeGen/PowerPC/stack-realign.ll
+++ b/test/CodeGen/PowerPC/stack-realign.ll
@@ -29,7 +29,7 @@ entry:
; CHECK-LABEL: @goo
-; CHECK-DAG: mflr 0
+; CHECK-DAG: mflr {{[0-9]+}}
; CHECK-DAG: clrldi [[REG:[0-9]+]], 1, 59
; CHECK-DAG: std 30, -16(1)
; CHECK-DAG: mr 30, 1
@@ -44,14 +44,14 @@ entry:
; CHECK: std 3, 48(30)
; CHECK: ld 1, 0(1)
-; CHECK-DAG: ld 0, 16(1)
+; CHECK-DAG: ld [[SR:[0-9]+]], 16(1)
; CHECK-DAG: ld 30, -16(1)
-; CHECK-DAG: mtlr 0
+; CHECK-DAG: mtlr [[SR]]
; CHECK: blr
; CHECK-FP-LABEL: @goo
-; CHECK-FP-DAG: mflr 0
+; CHECK-FP-DAG: mflr {{[0-9]+}}
; CHECK-FP-DAG: clrldi [[REG:[0-9]+]], 1, 59
; CHECK-FP-DAG: std 31, -8(1)
; CHECK-FP-DAG: std 30, -16(1)
@@ -70,14 +70,14 @@ entry:
; CHECK-FP: std 3, 48(30)
; CHECK-FP: ld 1, 0(1)
-; CHECK-FP-DAG: ld 0, 16(1)
+; CHECK-FP-DAG: ld [[SR:[0-9]+]], 16(1)
; CHECK-FP-DAG: ld 31, -8(1)
; CHECK-FP-DAG: ld 30, -16(1)
-; CHECK-FP-DAG: mtlr 0
+; CHECK-FP-DAG: mtlr [[SR]]
; CHECK-FP: blr
; CHECK-32-LABEL: @goo
-; CHECK-32-DAG: mflr 0
+; CHECK-32-DAG: mflr {{[0-9]+}}
; CHECK-32-DAG: clrlwi [[REG:[0-9]+]], 1, 27
; CHECK-32-DAG: stw 30, -8(1)
; CHECK-32-DAG: mr 30, 1
@@ -86,7 +86,7 @@ entry:
; CHECK-32: stwux 1, 1, 0
; CHECK-32-PIC-LABEL: @goo
-; CHECK-32-PIC-DAG: mflr 0
+; CHECK-32-PIC-DAG: mflr {{[0-9]+}}
; CHECK-32-PIC-DAG: clrlwi [[REG:[0-9]+]], 1, 27
; CHECK-32-PIC-DAG: stw 29, -12(1)
; CHECK-32-PIC-DAG: mr 29, 1
@@ -114,7 +114,7 @@ entry:
; CHECK-DAG: lis [[REG1:[0-9]+]], -13
; CHECK-DAG: clrldi [[REG3:[0-9]+]], 1, 59
-; CHECK-DAG: mflr 0
+; CHECK-DAG: mflr {{[0-9]+}}
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51808
; CHECK-DAG: std 30, -16(1)
; CHECK-DAG: mr 30, 1
@@ -130,7 +130,7 @@ entry:
; CHECK-32-DAG: lis [[REG1:[0-9]+]], -13
; CHECK-32-DAG: clrlwi [[REG3:[0-9]+]], 1, 27
-; CHECK-32-DAG: mflr 0
+; CHECK-32-DAG: mflr {{[0-9]+}}
; CHECK-32-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51904
; CHECK-32-DAG: stw 30, -8(1)
; CHECK-32-DAG: mr 30, 1
@@ -144,7 +144,7 @@ entry:
; CHECK-32-PIC-DAG: lis [[REG1:[0-9]+]], -13
; CHECK-32-PIC-DAG: clrlwi [[REG3:[0-9]+]], 1, 27
-; CHECK-32-PIC-DAG: mflr 0
+; CHECK-32-PIC-DAG: mflr {{[0-9]+}}
; CHECK-32-PIC-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51904
; CHECK-32-PIC-DAG: stw 29, -12(1)
; CHECK-32-PIC-DAG: mr 29, 1
@@ -174,7 +174,7 @@ entry:
; CHECK-LABEL: @loo
-; CHECK-DAG: mflr 0
+; CHECK-DAG: mflr {{[0-9]+}}
; CHECK-DAG: clrldi [[REG:[0-9]+]], 1, 59
; CHECK-DAG: std 30, -32(1)
; CHECK-DAG: mr 30, 1
@@ -190,7 +190,7 @@ entry:
; CHECK-FP-LABEL: @loo
-; CHECK-FP-DAG: mflr 0
+; CHECK-FP-DAG: mflr {{[0-9]+}}
; CHECK-FP-DAG: clrldi [[REG:[0-9]+]], 1, 59
; CHECK-FP-DAG: std 31, -24(1)
; CHECK-FP-DAG: std 30, -32(1)
diff --git a/test/CodeGen/PowerPC/stackmap-frame-setup.ll b/test/CodeGen/PowerPC/stackmap-frame-setup.ll
new file mode 100644
index 000000000000..487da00faa1c
--- /dev/null
+++ b/test/CodeGen/PowerPC/stackmap-frame-setup.ll
@@ -0,0 +1,20 @@
+; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=powerpc64-unknown-gnu-linux -stop-after machine-sink %s | FileCheck %s --check-prefix=ISEL
+; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=powerpc64-unknown-gnu-linux -fast-isel -fast-isel-abort=1 -stop-after machine-sink %s | FileCheck %s --check-prefix=FAST-ISEL
+
+define void @caller_meta_leaf() {
+entry:
+ %metadata = alloca i64, i32 3, align 8
+ store i64 11, i64* %metadata
+ store i64 12, i64* %metadata
+ store i64 13, i64* %metadata
+; ISEL: ADJCALLSTACKDOWN 0, implicit-def
+; ISEL-NEXT: STACKMAP
+; ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+; FAST-ISEL: ADJCALLSTACKDOWN 0, implicit-def
+; FAST-ISEL-NEXT: STACKMAP
+; FAST-ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def
+ ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
diff --git a/test/CodeGen/PowerPC/swaps-le-5.ll b/test/CodeGen/PowerPC/swaps-le-5.ll
index 5cd739a0efa9..3e13bd16c23b 100644
--- a/test/CodeGen/PowerPC/swaps-le-5.ll
+++ b/test/CodeGen/PowerPC/swaps-le-5.ll
@@ -15,11 +15,11 @@ entry:
}
; CHECK-LABEL: @bar0
-; CHECK-DAG: xxswapd {{[0-9]+}}, 1
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
; CHECK: xxpermdi [[REG3:[0-9]+]], [[REG2]], [[REG1]], 1
; CHECK: stxvd2x [[REG3]]
+; CHECK-NOT: xxswapd
define void @bar1(double %y) {
entry:
@@ -30,11 +30,11 @@ entry:
}
; CHECK-LABEL: @bar1
-; CHECK-DAG: xxswapd {{[0-9]+}}, 1
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
; CHECK: xxmrghd [[REG3:[0-9]+]], [[REG1]], [[REG2]]
; CHECK: stxvd2x [[REG3]]
+; CHECK-NOT: xxswapd
define void @baz0() {
entry:
diff --git a/test/CodeGen/PowerPC/swaps-le-6.ll b/test/CodeGen/PowerPC/swaps-le-6.ll
new file mode 100644
index 000000000000..df88322e4fd8
--- /dev/null
+++ b/test/CodeGen/PowerPC/swaps-le-6.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
+
+; These tests verify that VSX swap optimization works when loading a scalar
+; into a vector register.
+
+
+@x = global <2 x double> <double 9.970000e+01, double -1.032220e+02>, align 16
+@z = global <2 x double> <double 2.332000e+01, double 3.111111e+01>, align 16
+@y = global double 1.780000e+00, align 8
+
+define void @bar0() {
+entry:
+ %0 = load <2 x double>, <2 x double>* @x, align 16
+ %1 = load double, double* @y, align 8
+ %vecins = insertelement <2 x double> %0, double %1, i32 0
+ store <2 x double> %vecins, <2 x double>* @z, align 16
+ ret void
+}
+
+; CHECK-LABEL: @bar0
+; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
+; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
+; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
+; CHECK: stxvd2x [[REG5]]
+
+define void @bar1() {
+entry:
+ %0 = load <2 x double>, <2 x double>* @x, align 16
+ %1 = load double, double* @y, align 8
+ %vecins = insertelement <2 x double> %0, double %1, i32 1
+ store <2 x double> %vecins, <2 x double>* @z, align 16
+ ret void
+}
+
+; CHECK-LABEL: @bar1
+; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
+; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
+; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
+; CHECK: stxvd2x [[REG5]]
+
diff --git a/test/CodeGen/PowerPC/unal-vec-ldst.ll b/test/CodeGen/PowerPC/unal-vec-ldst.ll
new file mode 100644
index 000000000000..260e7f6115f9
--- /dev/null
+++ b/test/CodeGen/PowerPC/unal-vec-ldst.ll
@@ -0,0 +1,580 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 {
+entry:
+ %r = load <16 x i8>, <16 x i8>* %p, align 1
+ ret <16 x i8> %r
+
+; CHECK-LABEL: @test_l_v16i8
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
+; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
+define <32 x i8> @test_l_v32i8(<32 x i8>* %p) #0 {
+entry:
+ %r = load <32 x i8>, <32 x i8>* %p, align 1
+ ret <32 x i8> %r
+
+; CHECK-LABEL: @test_l_v32i8
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
+; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
+; CHECK-DAG: vperm 3, [[REG5]], [[REG4]], [[REG3]]
+; CHECK-DAG: vperm 2, [[REG6]], [[REG5]], [[REG3]]
+; CHECK: blr
+}
+
+define <8 x i16> @test_l_v8i16(<8 x i16>* %p) #0 {
+entry:
+ %r = load <8 x i16>, <8 x i16>* %p, align 2
+ ret <8 x i16> %r
+
+; CHECK-LABEL: @test_l_v8i16
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
+; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
+define <16 x i16> @test_l_v16i16(<16 x i16>* %p) #0 {
+entry:
+ %r = load <16 x i16>, <16 x i16>* %p, align 2
+ ret <16 x i16> %r
+
+; CHECK-LABEL: @test_l_v16i16
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
+; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
+; CHECK-DAG: vperm 3, [[REG5]], [[REG4]], [[REG3]]
+; CHECK-DAG: vperm 2, [[REG6]], [[REG5]], [[REG3]]
+; CHECK: blr
+}
+
+define <4 x i32> @test_l_v4i32(<4 x i32>* %p) #0 {
+entry:
+ %r = load <4 x i32>, <4 x i32>* %p, align 4
+ ret <4 x i32> %r
+
+; CHECK-LABEL: @test_l_v4i32
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
+; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
+define <8 x i32> @test_l_v8i32(<8 x i32>* %p) #0 {
+entry:
+ %r = load <8 x i32>, <8 x i32>* %p, align 4
+ ret <8 x i32> %r
+
+; CHECK-LABEL: @test_l_v8i32
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
+; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
+; CHECK-DAG: vperm 3, [[REG5]], [[REG4]], [[REG3]]
+; CHECK-DAG: vperm 2, [[REG6]], [[REG5]], [[REG3]]
+; CHECK: blr
+}
+
+define <2 x i64> @test_l_v2i64(<2 x i64>* %p) #0 {
+entry:
+ %r = load <2 x i64>, <2 x i64>* %p, align 8
+ ret <2 x i64> %r
+
+; CHECK-LABEL: @test_l_v2i64
+; CHECK: lxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x i64> @test_l_v4i64(<4 x i64>* %p) #0 {
+entry:
+ %r = load <4 x i64>, <4 x i64>* %p, align 8
+ ret <4 x i64> %r
+
+; CHECK-LABEL: @test_l_v4i64
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvd2x 34, 0, 3
+; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <4 x float> @test_l_v4float(<4 x float>* %p) #0 {
+entry:
+ %r = load <4 x float>, <4 x float>* %p, align 4
+ ret <4 x float> %r
+
+; CHECK-LABEL: @test_l_v4float
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
+; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
+define <8 x float> @test_l_v8float(<8 x float>* %p) #0 {
+entry:
+ %r = load <8 x float>, <8 x float>* %p, align 4
+ ret <8 x float> %r
+
+; CHECK-LABEL: @test_l_v8float
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
+; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
+; CHECK-DAG: vperm 3, [[REG5]], [[REG4]], [[REG3]]
+; CHECK-DAG: vperm 2, [[REG6]], [[REG5]], [[REG3]]
+; CHECK: blr
+}
+
+define <2 x double> @test_l_v2double(<2 x double>* %p) #0 {
+entry:
+ %r = load <2 x double>, <2 x double>* %p, align 8
+ ret <2 x double> %r
+
+; CHECK-LABEL: @test_l_v2double
+; CHECK: lxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x double> @test_l_v4double(<4 x double>* %p) #0 {
+entry:
+ %r = load <4 x double>, <4 x double>* %p, align 8
+ ret <4 x double> %r
+
+; CHECK-LABEL: @test_l_v4double
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvd2x 34, 0, 3
+; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <16 x i8> @test_l_p8v16i8(<16 x i8>* %p) #2 {
+entry:
+ %r = load <16 x i8>, <16 x i8>* %p, align 1
+ ret <16 x i8> %r
+
+; CHECK-LABEL: @test_l_p8v16i8
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <32 x i8> @test_l_p8v32i8(<32 x i8>* %p) #2 {
+entry:
+ %r = load <32 x i8>, <32 x i8>* %p, align 1
+ ret <32 x i8> %r
+
+; CHECK-LABEL: @test_l_p8v32i8
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvw4x 34, 0, 3
+; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <8 x i16> @test_l_p8v8i16(<8 x i16>* %p) #2 {
+entry:
+ %r = load <8 x i16>, <8 x i16>* %p, align 2
+ ret <8 x i16> %r
+
+; CHECK-LABEL: @test_l_p8v8i16
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <16 x i16> @test_l_p8v16i16(<16 x i16>* %p) #2 {
+entry:
+ %r = load <16 x i16>, <16 x i16>* %p, align 2
+ ret <16 x i16> %r
+
+; CHECK-LABEL: @test_l_p8v16i16
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvw4x 34, 0, 3
+; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <4 x i32> @test_l_p8v4i32(<4 x i32>* %p) #2 {
+entry:
+ %r = load <4 x i32>, <4 x i32>* %p, align 4
+ ret <4 x i32> %r
+
+; CHECK-LABEL: @test_l_p8v4i32
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <8 x i32> @test_l_p8v8i32(<8 x i32>* %p) #2 {
+entry:
+ %r = load <8 x i32>, <8 x i32>* %p, align 4
+ ret <8 x i32> %r
+
+; CHECK-LABEL: @test_l_p8v8i32
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvw4x 34, 0, 3
+; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <2 x i64> @test_l_p8v2i64(<2 x i64>* %p) #2 {
+entry:
+ %r = load <2 x i64>, <2 x i64>* %p, align 8
+ ret <2 x i64> %r
+
+; CHECK-LABEL: @test_l_p8v2i64
+; CHECK: lxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x i64> @test_l_p8v4i64(<4 x i64>* %p) #2 {
+entry:
+ %r = load <4 x i64>, <4 x i64>* %p, align 8
+ ret <4 x i64> %r
+
+; CHECK-LABEL: @test_l_p8v4i64
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvd2x 34, 0, 3
+; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <4 x float> @test_l_p8v4float(<4 x float>* %p) #2 {
+entry:
+ %r = load <4 x float>, <4 x float>* %p, align 4
+ ret <4 x float> %r
+
+; CHECK-LABEL: @test_l_p8v4float
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <8 x float> @test_l_p8v8float(<8 x float>* %p) #2 {
+entry:
+ %r = load <8 x float>, <8 x float>* %p, align 4
+ ret <8 x float> %r
+
+; CHECK-LABEL: @test_l_p8v8float
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvw4x 34, 0, 3
+; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <2 x double> @test_l_p8v2double(<2 x double>* %p) #2 {
+entry:
+ %r = load <2 x double>, <2 x double>* %p, align 8
+ ret <2 x double> %r
+
+; CHECK-LABEL: @test_l_p8v2double
+; CHECK: lxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x double> @test_l_p8v4double(<4 x double>* %p) #2 {
+entry:
+ %r = load <4 x double>, <4 x double>* %p, align 8
+ ret <4 x double> %r
+
+; CHECK-LABEL: @test_l_p8v4double
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvd2x 34, 0, 3
+; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
+entry:
+ %r = load <4 x float>, <4 x float>* %p, align 4
+ ret <4 x float> %r
+
+; CHECK-LABEL: @test_l_qv4float
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: qvlpclsx 0, 0, 3
+; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlfsx [[REG3:[0-9]+]], 0, 3
+; CHECK: qvfperm 1, [[REG3]], [[REG2]], 0
+; CHECK: blr
+}
+
+define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
+entry:
+ %r = load <8 x float>, <8 x float>* %p, align 4
+ ret <8 x float> %r
+
+; CHECK-LABEL: @test_l_qv8float
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK-DAG: qvlfsx [[REG3:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 3, [[REG2]]
+; CHECK-DAG: qvlpclsx [[REG5:[0-5]+]], 0, 3
+; CHECK-DAG: qvlfsx [[REG6:[0-9]+]], 0, 3
+; CHECK-DAG: qvfperm 2, [[REG4]], [[REG3]], [[REG5]]
+; CHECK-DAG: qvfperm 1, [[REG6]], [[REG4]], [[REG5]]
+; CHECK: blr
+}
+
+define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
+entry:
+ %r = load <4 x double>, <4 x double>* %p, align 8
+ ret <4 x double> %r
+
+; CHECK-LABEL: @test_l_qv4double
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: qvlpcldx 0, 0, 3
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlfdx [[REG3:[0-9]+]], 0, 3
+; CHECK: qvfperm 1, [[REG3]], [[REG2]], 0
+; CHECK: blr
+}
+
+define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
+entry:
+ %r = load <8 x double>, <8 x double>* %p, align 8
+ ret <8 x double> %r
+
+; CHECK-LABEL: @test_l_qv8double
+; CHECK-DAG: li [[REG1:[0-9]+]], 63
+; CHECK-DAG: li [[REG2:[0-9]+]], 32
+; CHECK-DAG: qvlfdx [[REG3:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 3, [[REG2]]
+; CHECK-DAG: qvlpcldx [[REG5:[0-5]+]], 0, 3
+; CHECK-DAG: qvlfdx [[REG6:[0-9]+]], 0, 3
+; CHECK-DAG: qvfperm 2, [[REG4]], [[REG3]], [[REG5]]
+; CHECK-DAG: qvfperm 1, [[REG6]], [[REG4]], [[REG5]]
+; CHECK: blr
+}
+
+define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
+entry:
+ store <16 x i8> %v, <16 x i8>* %p, align 1
+ ret void
+
+; CHECK-LABEL: @test_s_v16i8
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v32i8(<32 x i8>* %p, <32 x i8> %v) #0 {
+entry:
+ store <32 x i8> %v, <32 x i8>* %p, align 1
+ ret void
+
+; CHECK-LABEL: @test_s_v32i8
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
+; CHECK-DAG: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
+entry:
+ store <8 x i16> %v, <8 x i16>* %p, align 2
+ ret void
+
+; CHECK-LABEL: @test_s_v8i16
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v16i16(<16 x i16>* %p, <16 x i16> %v) #0 {
+entry:
+ store <16 x i16> %v, <16 x i16>* %p, align 2
+ ret void
+
+; CHECK-LABEL: @test_s_v16i16
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
+; CHECK-DAG: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
+entry:
+ store <4 x i32> %v, <4 x i32>* %p, align 4
+ ret void
+
+; CHECK-LABEL: @test_s_v4i32
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v8i32(<8 x i32>* %p, <8 x i32> %v) #0 {
+entry:
+ store <8 x i32> %v, <8 x i32>* %p, align 4
+ ret void
+
+; CHECK-LABEL: @test_s_v8i32
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
+; CHECK-DAG: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v2i64(<2 x i64>* %p, <2 x i64> %v) #0 {
+entry:
+ store <2 x i64> %v, <2 x i64>* %p, align 8
+ ret void
+
+; CHECK-LABEL: @test_s_v2i64
+; CHECK: stxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
+entry:
+ store <4 x i64> %v, <4 x i64>* %p, align 8
+ ret void
+
+; CHECK-LABEL: @test_s_v4i64
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: stxvd2x 35, 3, [[REG1]]
+; CHECK-DAG: stxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v4float(<4 x float>* %p, <4 x float> %v) #0 {
+entry:
+ store <4 x float> %v, <4 x float>* %p, align 4
+ ret void
+
+; CHECK-LABEL: @test_s_v4float
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v8float(<8 x float>* %p, <8 x float> %v) #0 {
+entry:
+ store <8 x float> %v, <8 x float>* %p, align 4
+ ret void
+
+; CHECK-LABEL: @test_s_v8float
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
+; CHECK-DAG: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v2double(<2 x double>* %p, <2 x double> %v) #0 {
+entry:
+ store <2 x double> %v, <2 x double>* %p, align 8
+ ret void
+
+; CHECK-LABEL: @test_s_v2double
+; CHECK: stxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v4double(<4 x double>* %p, <4 x double> %v) #0 {
+entry:
+ store <4 x double> %v, <4 x double>* %p, align 8
+ ret void
+
+; CHECK-LABEL: @test_s_v4double
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: stxvd2x 35, 3, [[REG1]]
+; CHECK-DAG: stxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
+entry:
+ store <4 x float> %v, <4 x float>* %p, align 4
+ ret void
+
+; CHECK-LABEL: @test_s_qv4float
+; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 1, 3
+; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 1, 2
+; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 1, 1
+; CHECK-DAG: stfs 1, 0(3)
+; CHECK-DAG: stfs [[REG1]], 12(3)
+; CHECK-DAG: stfs [[REG2]], 8(3)
+; CHECK-DAG: stfs [[REG3]], 4(3)
+; CHECK: blr
+}
+
+define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
+entry:
+ store <8 x float> %v, <8 x float>* %p, align 4
+ ret void
+
+; CHECK-LABEL: @test_s_qv8float
+; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 2, 3
+; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 2, 2
+; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 2, 1
+; CHECK-DAG: qvesplati [[REG4:[0-9]+]], 1, 3
+; CHECK-DAG: qvesplati [[REG5:[0-9]+]], 1, 2
+; CHECK-DAG: qvesplati [[REG6:[0-9]+]], 1, 1
+; CHECK-DAG: stfs 2, 16(3)
+; CHECK-DAG: stfs 1, 0(3)
+; CHECK-DAG: stfs [[REG1]], 28(3)
+; CHECK-DAG: stfs [[REG2]], 24(3)
+; CHECK-DAG: stfs [[REG3]], 20(3)
+; CHECK-DAG: stfs [[REG4]], 12(3)
+; CHECK-DAG: stfs [[REG5]], 8(3)
+; CHECK-DAG: stfs [[REG6]], 4(3)
+; CHECK: blr
+}
+
+define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
+entry:
+ store <4 x double> %v, <4 x double>* %p, align 8
+ ret void
+
+; CHECK-LABEL: @test_s_qv4double
+; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 1, 3
+; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 1, 2
+; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 1, 1
+; CHECK-DAG: stfd 1, 0(3)
+; CHECK-DAG: stfd [[REG1]], 24(3)
+; CHECK-DAG: stfd [[REG2]], 16(3)
+; CHECK-DAG: stfd [[REG3]], 8(3)
+; CHECK: blr
+}
+
+define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
+entry:
+ store <8 x double> %v, <8 x double>* %p, align 8
+ ret void
+
+; CHECK-LABEL: @test_s_qv8double
+; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 2, 3
+; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 2, 2
+; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 2, 1
+; CHECK-DAG: qvesplati [[REG4:[0-9]+]], 1, 3
+; CHECK-DAG: qvesplati [[REG5:[0-9]+]], 1, 2
+; CHECK-DAG: qvesplati [[REG6:[0-9]+]], 1, 1
+; CHECK-DAG: stfd 2, 32(3)
+; CHECK-DAG: stfd 1, 0(3)
+; CHECK-DAG: stfd [[REG1]], 56(3)
+; CHECK-DAG: stfd [[REG2]], 48(3)
+; CHECK-DAG: stfd [[REG3]], 40(3)
+; CHECK-DAG: stfd [[REG4]], 24(3)
+; CHECK-DAG: stfd [[REG5]], 16(3)
+; CHECK-DAG: stfd [[REG6]], 8(3)
+; CHECK: blr
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr7" }
+attributes #1 = { nounwind "target-cpu"="a2q" }
+attributes #2 = { nounwind "target-cpu"="pwr8" }
+
diff --git a/test/CodeGen/PowerPC/unal-vec-negarith.ll b/test/CodeGen/PowerPC/unal-vec-negarith.ll
new file mode 100644
index 000000000000..faac891f5c6f
--- /dev/null
+++ b/test/CodeGen/PowerPC/unal-vec-negarith.ll
@@ -0,0 +1,17 @@
+; RUN: llc -debug-only=isel <%s >%t 2>&1 && FileCheck <%t %s
+; REQUIRES: asserts
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 {
+entry:
+ %r = load <16 x i8>, <16 x i8>* %p, align 1
+ ret <16 x i8> %r
+
+; CHECK-NOT: v4i32,ch = llvm.ppc.altivec.lvx{{.*}}<LD31[%p+4294967281](align=1)>
+; CHECK: v4i32,ch = llvm.ppc.altivec.lvx{{.*}}<LD31[%p+-15](align=1)>
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr7" }
+
diff --git a/test/CodeGen/PowerPC/unwind-dw2-g.ll b/test/CodeGen/PowerPC/unwind-dw2-g.ll
index 8bd158867c79..e44da85f5b36 100644
--- a/test/CodeGen/PowerPC/unwind-dw2-g.ll
+++ b/test/CodeGen/PowerPC/unwind-dw2-g.ll
@@ -3,7 +3,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: nounwind
-define void @foo() #0 {
+define void @foo() #0 !dbg !4 {
entry:
call void @llvm.eh.unwind.init(), !dbg !9
ret void, !dbg !10
@@ -21,11 +21,11 @@ attributes #0 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "/tmp/unwind-dw2.c", directory: "/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "/tmp/unwind-dw2.c", directory: "/tmp")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
diff --git a/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll b/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
new file mode 100644
index 000000000000..3d4789360f55
--- /dev/null
+++ b/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
@@ -0,0 +1,114 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
+; RUN: --check-prefix=CHECK-BE
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
+; RUN: --check-prefix=CHECK-P7
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @geti(<4 x i32> %a, i32 signext %b) {
+entry:
+ %vecext = extractelement <4 x i32> %a, i32 %b
+ ret i32 %vecext
+; CHECK-LABEL: @geti
+; CHECK-P7-LABEL: @geti
+; CHECK-BE-LABEL: @geti
+; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 2
+; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5
+; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 2
+; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
+; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]]
+; CHECK-DAG: li [[ONEREG:[0-9]+]], 1
+; CHECK-DAG: and [[ELEMSREG:[0-9]+]], [[ONEREG]], 5
+; CHECK-DAG: sldi [[SHAMREG:[0-9]+]], [[ELEMSREG]], 5
+; CHECK: mfvsrd [[TOGPR:[0-9]+]],
+; CHECK: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]]
+; CHECK: extsw 3, [[RSHREG]]
+; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2
+; CHECK-P7-DAG: stxvw4x 34,
+; CHECK-P7: lwax 3, [[ELEMOFFREG]],
+; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 2
+; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 2
+; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
+; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; CHECK-BE-DAG: li [[IMMREG:[0-9]+]], 1
+; CHECK-BE-DAG: andc [[ANDCREG:[0-9]+]], [[IMMREG]], 5
+; CHECK-BE-DAG: sldi [[SHAMREG:[0-9]+]], [[ANDCREG]], 5
+; CHECK-BE: mfvsrd [[TOGPR:[0-9]+]],
+; CHECK-BE: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]]
+; CHECk-BE: extsw 3, [[RSHREG]]
+}
+
+; Function Attrs: norecurse nounwind readnone
+define i64 @getl(<2 x i64> %a, i32 signext %b) {
+entry:
+ %vecext = extractelement <2 x i64> %a, i32 %b
+ ret i64 %vecext
+; CHECK-LABEL: @getl
+; CHECK-P7-LABEL: @getl
+; CHECK-BE-LABEL: @getl
+; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 1
+; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5
+; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3
+; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
+; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]]
+; CHECK: mfvsrd 3,
+; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 3
+; CHECK-P7-DAG: stxvd2x 34,
+; CHECK-P7: ldx 3, [[ELEMOFFREG]],
+; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1
+; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3
+; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
+; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; CHECK-BE: mfvsrd 3,
+}
+
+; Function Attrs: norecurse nounwind readnone
+define float @getf(<4 x float> %a, i32 signext %b) {
+entry:
+ %vecext = extractelement <4 x float> %a, i32 %b
+ ret float %vecext
+; CHECK-LABEL: @getf
+; CHECK-P7-LABEL: @getf
+; CHECK-BE-LABEL: @getf
+; CHECK: li [[IMMREG:[0-9]+]], 3
+; CHECK: xor [[TRUNCREG:[0-9]+]], [[IMMREG]], 5
+; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[TRUNCREG]]
+; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; CHECK: xscvspdpn 1,
+; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2
+; CHECK-P7-DAG: stxvw4x 34,
+; CHECK-P7: lfsx 1, [[ELEMOFFREG]],
+; CHECK-BE: sldi [[ELNOREG:[0-9]+]], 5, 2
+; CHECK-BE: lvsl [[SHMSKREG:[0-9]+]], 0, [[ELNOREG]]
+; CHECK-BE: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; CHECK-BE: xscvspdpn 1,
+}
+
+; Function Attrs: norecurse nounwind readnone
+define double @getd(<2 x double> %a, i32 signext %b) {
+entry:
+ %vecext = extractelement <2 x double> %a, i32 %b
+ ret double %vecext
+; CHECK-LABEL: @getd
+; CHECK-P7-LABEL: @getd
+; CHECK-BE-LABEL: @getd
+; CHECK: li [[TRUNCREG:[0-9]+]], 1
+; CHECK: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5
+; CHECK: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3
+; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
+; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; FIXME: the instruction below is a redundant regclass copy, to be removed
+; CHECK: xxlor 1,
+; CHECK-P7-DAG: andi. [[ANDREG:[0-9]+]], 5, 1
+; CHECK-P7-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3
+; CHECK-P7-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
+; CHECK-P7-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; FIXME: the instruction below is a redundant regclass copy, to be removed
+; CHECK-P7: xxlor 1,
+; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1
+; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3
+; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
+; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; FIXME: the instruction below is a redundant regclass copy, to be removed
+; CHECK-BE: xxlor 1,
+}
diff --git a/test/CodeGen/PowerPC/vec-asm-disabled.ll b/test/CodeGen/PowerPC/vec-asm-disabled.ll
new file mode 100644
index 000000000000..333ccce6b89f
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec-asm-disabled.ll
@@ -0,0 +1,14 @@
+; RUN: not llc -mcpu=pwr7 -o /dev/null %s 2>&1 | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <4 x i32> @testi1(<4 x i32> %b1, <4 x i32> %b2) #0 {
+entry:
+ %0 = call <4 x i32> asm "xxland $0, $1, $2", "=^wd,^wd,^wd"(<4 x i32> %b1, <4 x i32> %b2) #0
+ ret <4 x i32> %0
+
+; CHECK: error: couldn't allocate output register for constraint 'wd'
+}
+
+attributes #0 = { nounwind "target-features"="-vsx" }
+
diff --git a/test/CodeGen/PowerPC/vec_add_sub_quadword.ll b/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
index f7ebf479755c..9e79b52c4049 100644
--- a/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
+++ b/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
@@ -14,7 +14,7 @@ define <1 x i128> @increment_by_one(<1 x i128> %x) nounwind {
%result = add <1 x i128> %x, <i128 1>
ret <1 x i128> %result
; CHECK-LABEL: @increment_by_one
-; CHECK vadduqm 2, 2, 3
+; CHECK: vadduqm 2, 2, 3
}
define <1 x i128> @increment_by_val(<1 x i128> %x, i128 %val) nounwind {
@@ -37,7 +37,7 @@ define <1 x i128> @decrement_by_one(<1 x i128> %x) nounwind {
%result = sub <1 x i128> %x, <i128 1>
ret <1 x i128> %result
; CHECK-LABEL: @decrement_by_one
-; CHECK vsubuqm 2, 2, 3
+; CHECK: vsubuqm 2, 2, 3
}
define <1 x i128> @decrement_by_val(<1 x i128> %x, i128 %val) nounwind {
@@ -46,7 +46,7 @@ define <1 x i128> @decrement_by_val(<1 x i128> %x, i128 %val) nounwind {
%result = sub <1 x i128> %x, %tmpvec2
ret <1 x i128> %result
; CHECK-LABEL: @decrement_by_val
-; CHECK vsubuqm 2, 2, 3
+; CHECK: vsubuqm 2, 2, 3
}
declare <1 x i128> @llvm.ppc.altivec.vaddeuqm(<1 x i128> %x,
diff --git a/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll b/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll
new file mode 100644
index 000000000000..db92f20c352a
--- /dev/null
+++ b/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=ppc64 -mtriple=ppc64-apple-darwin < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}_merge_8_float_zero_stores:
+; CHECK: li [[ZEROREG:r[0-9]+]], 0
+; CHECK-DAG: std [[ZEROREG]], 0([[PTR:r[0-9]+]])
+; CHECK-DAG: std [[ZEROREG]], 8([[PTR]])
+; CHECK-DAG: std [[ZEROREG]], 16([[PTR]])
+; CHECK-DAG: std [[ZEROREG]], 24([[PTR]])
+; CHECK: blr
+define void @merge_8_float_zero_stores(float* %ptr) {
+ %idx0 = getelementptr float, float* %ptr, i64 0
+ %idx1 = getelementptr float, float* %ptr, i64 1
+ %idx2 = getelementptr float, float* %ptr, i64 2
+ %idx3 = getelementptr float, float* %ptr, i64 3
+ %idx4 = getelementptr float, float* %ptr, i64 4
+ %idx5 = getelementptr float, float* %ptr, i64 5
+ %idx6 = getelementptr float, float* %ptr, i64 6
+ %idx7 = getelementptr float, float* %ptr, i64 7
+ store float 0.0, float* %idx0, align 4
+ store float 0.0, float* %idx1, align 4
+ store float 0.0, float* %idx2, align 4
+ store float 0.0, float* %idx3, align 4
+ store float 0.0, float* %idx4, align 4
+ store float 0.0, float* %idx5, align 4
+ store float 0.0, float* %idx6, align 4
+ store float 0.0, float* %idx7, align 4
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll
index dceb2516c696..b2eefb666760 100644
--- a/test/CodeGen/PowerPC/vsx.ll
+++ b/test/CodeGen/PowerPC/vsx.ll
@@ -1226,11 +1226,10 @@ define <2 x i32> @test80(i32 %v) {
; CHECK-FISL: blr
; CHECK-LE-LABEL: @test80
-; CHECK-LE-DAG: addi [[R1:[0-9]+]], 1, -16
+; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3
; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI
-; CHECK-LE-DAG: lxvd2x [[V1:[0-9]+]], 0, [[R1]]
; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]]
-; CHECK-LE-DAG: xxswapd 34, [[V1]]
+; CHECK-LE-DAG: xxspltd 34, [[R1]]
; CHECK-LE-DAG: xxswapd 35, [[V2]]
; CHECK-LE: vaddudm 2, 2, 3
; CHECK-LE: blr
diff --git a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index 6c89b1092bdf..97e1548f965f 100644
--- a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -33,12 +33,8 @@ define double @teste0(<2 x double>* %p1) {
%r = extractelement <2 x double> %v, i32 0
ret double %r
-; FIXME: Swap optimization will collapse this into lxvd2x 1, 0, 3.
-
; CHECK-LABEL: teste0
-; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 1, 0
+; CHECK: lxvd2x 1, 0, 3
}
define double @teste1(<2 x double>* %p1) {
diff --git a/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
index 102970885963..c2cb71c58881 100644
--- a/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
+++ b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
@@ -55,8 +55,7 @@ entry:
ret void
; CHECK-LABEL: @intToFlt
; CHECK: lxsiwax [[REGLD2:[0-9]+]],
-; FIXME: the below will change when the VSX form is implemented
-; CHECK: fcfids {{[0-9]}}, [[REGLD2]]
+; CHECK: xscvsxdsp {{[0-9]}}, [[REGLD2]]
}
; Function Attrs: nounwind
@@ -108,8 +107,7 @@ entry:
ret void
; CHECK-LABEL: @uIntToFlt
; CHECK: lxsiwzx [[REGLD4:[0-9]+]],
-; FIXME: the below will change when the VSX form is implemented
-; CHECK: fcfidus {{[0-9]+}}, [[REGLD4]]
+; CHECK: xscvuxdsp {{[0-9]+}}, [[REGLD4]]
}
; Function Attrs: nounwind
diff --git a/test/CodeGen/PowerPC/vsx_shuffle_le.ll b/test/CodeGen/PowerPC/vsx_shuffle_le.ll
index dcfa0e788867..4f767c7ca78f 100644
--- a/test/CodeGen/PowerPC/vsx_shuffle_le.ll
+++ b/test/CodeGen/PowerPC/vsx_shuffle_le.ll
@@ -8,8 +8,7 @@ define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) {
; CHECK-LABEL: test00
; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 1
+; CHECK: xxspltd 34, 0, 0
}
define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) {
@@ -58,9 +57,7 @@ define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) {
ret <2 x double> %v3
; CHECK-LABEL: @test10
-; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 34, 0
+; CHECK: lxvd2x 34, 0, 3
}
define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
@@ -71,8 +68,7 @@ define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
; CHECK-LABEL: @test11
; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 0
+; CHECK: xxspltd 34, 0, 1
}
define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) {
@@ -139,8 +135,7 @@ define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) {
; CHECK-LABEL: @test22
; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 1
+; CHECK: xxspltd 34, 0, 0
}
define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) {
@@ -189,9 +184,7 @@ define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) {
ret <2 x double> %v3
; CHECK-LABEL: @test32
-; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 34, 0
+; CHECK: lxvd2x 34, 0, 4
}
define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
@@ -202,6 +195,5 @@ define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
; CHECK-LABEL: @test33
; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 0
+; CHECK: xxspltd 34, 0, 1
}
diff --git a/test/CodeGen/SPARC/2011-01-22-SRet.ll b/test/CodeGen/SPARC/2011-01-22-SRet.ll
index ae9764e82084..678544ebf2cb 100644
--- a/test/CodeGen/SPARC/2011-01-22-SRet.ll
+++ b/test/CodeGen/SPARC/2011-01-22-SRet.ll
@@ -19,8 +19,8 @@ entry:
define i32 @test() nounwind {
entry:
;CHECK-LABEL: test:
-;CHECK: st {{.+}}, [%sp+64]
;CHECK: call make_foo
+;CHECK: st {{.+}}, [%sp+64]
;CHECK: unimp 12
%f = alloca %struct.foo_t, align 8
call void @make_foo(%struct.foo_t* noalias sret %f, i32 10, i32 20, i32 30) nounwind
diff --git a/test/CodeGen/SPARC/32abi.ll b/test/CodeGen/SPARC/32abi.ll
new file mode 100644
index 000000000000..7ac1de5c0904
--- /dev/null
+++ b/test/CodeGen/SPARC/32abi.ll
@@ -0,0 +1,191 @@
+; RUN: llc < %s -march=sparc -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
+; RUN: llc < %s -march=sparcel -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+
+; CHECK-LABEL: intarg:
+; The save/restore frame is not strictly necessary here, but we would need to
+; refer to %o registers instead.
+; CHECK: save %sp, -96, %sp
+; CHECK: ld [%fp+96], [[R2:%[gilo][0-7]]]
+; CHECK: ld [%fp+92], [[R1:%[gilo][0-7]]]
+; CHECK: stb %i0, [%i4]
+; CHECK: stb %i1, [%i4]
+; CHECK: sth %i2, [%i4]
+; CHECK: st %i3, [%i4]
+; CHECK: st %i4, [%i4]
+; CHECK: st %i5, [%i4]
+; CHECK: st [[R1]], [%i4]
+; CHECK: st [[R2]], [%i4]
+; CHECK: restore
+define void @intarg(i8 %a0, ; %i0
+ i8 %a1, ; %i1
+ i16 %a2, ; %i2
+ i32 %a3, ; %i3
+ i8* %a4, ; %i4
+ i32 %a5, ; %i5
+ i32 signext %a6, ; [%fp+92]
+ i8* %a7) { ; [%fp+96]
+ store i8 %a0, i8* %a4
+ store i8 %a1, i8* %a4
+ %p16 = bitcast i8* %a4 to i16*
+ store i16 %a2, i16* %p16
+ %p32 = bitcast i8* %a4 to i32*
+ store i32 %a3, i32* %p32
+ %pp = bitcast i8* %a4 to i8**
+ store i8* %a4, i8** %pp
+ store i32 %a5, i32* %p32
+ store i32 %a6, i32* %p32
+ store i8* %a7, i8** %pp
+ ret void
+}
+
+; CHECK-LABEL: call_intarg:
+; CHECK: save %sp, -104, %sp
+; Use %o0-%o5 for outgoing arguments
+; CHECK: mov 5, %o5
+; CHECK: st %i0, [%sp+92]
+; CHECK: call intarg
+; CHECK-NOT: add %sp
+; CHECK: restore
+define void @call_intarg(i32 %i0, i8* %i1) {
+ call void @intarg(i8 0, i8 1, i16 2, i32 3, i8* undef, i32 5, i32 %i0, i8* %i1)
+ ret void
+}
+
+;; Verify doubles starting with an even reg, starting with an odd reg,
+;; straddling the boundary of regs and mem, and floats in regs and mem.
+;
+; CHECK-LABEL: floatarg:
+; CHECK: save %sp, -120, %sp
+; CHECK: mov %i5, %g2
+; CHECK-NEXT: ld [%fp+92], %g3
+; CHECK-NEXT: mov %i4, %i5
+; CHECK-NEXT: std %g2, [%fp+-24]
+; CHECK-NEXT: mov %i3, %i4
+; CHECK-NEXT: std %i4, [%fp+-16]
+; CHECK-NEXT: std %i0, [%fp+-8]
+; CHECK-NEXT: st %i2, [%fp+-28]
+; CHECK-NEXT: ld [%fp+104], %f0
+; CHECK-NEXT: ldd [%fp+96], %f2
+; CHECK-NEXT: ld [%fp+-28], %f1
+; CHECK-NEXT: ldd [%fp+-8], %f4
+; CHECK-NEXT: ldd [%fp+-16], %f6
+; CHECK-NEXT: ldd [%fp+-24], %f8
+; CHECK-NEXT: fstod %f1, %f10
+; CHECK-NEXT: faddd %f4, %f10, %f4
+; CHECK-NEXT: faddd %f6, %f4, %f4
+; CHECK-NEXT: faddd %f8, %f4, %f4
+; CHECK-NEXT: faddd %f2, %f4, %f2
+; CHECK-NEXT: fstod %f0, %f0
+; CHECK-NEXT: faddd %f0, %f2, %f0
+; CHECK-NEXT: restore
+define double @floatarg(double %a0, ; %i0,%i1
+ float %a1, ; %i2
+ double %a2, ; %i3, %i4
+ double %a3, ; %i5, [%fp+92] (using 4 bytes)
+ double %a4, ; [%fp+96] (using 8 bytes)
+ float %a5) { ; [%fp+104] (using 4 bytes)
+ %d1 = fpext float %a1 to double
+ %s1 = fadd double %a0, %d1
+ %s2 = fadd double %a2, %s1
+ %s3 = fadd double %a3, %s2
+ %s4 = fadd double %a4, %s3
+ %d5 = fpext float %a5 to double
+ %s5 = fadd double %d5, %s4
+ ret double %s5
+}
+
+; CHECK-LABEL: call_floatarg:
+; CHECK: save %sp, -112, %sp
+; CHECK: mov %i2, %o1
+; CHECK-NEXT: mov %i1, %o0
+; CHECK-NEXT: st %i0, [%sp+104]
+; CHECK-NEXT: std %o0, [%sp+96]
+; CHECK-NEXT: st %o1, [%sp+92]
+; CHECK-NEXT: mov %i0, %o2
+; CHECK-NEXT: mov %o0, %o3
+; CHECK-NEXT: mov %o1, %o4
+; CHECK-NEXT: mov %o0, %o5
+; CHECK-NEXT: call floatarg
+; CHECK: std %f0, [%i4]
+; CHECK: restore
+define void @call_floatarg(float %f1, double %d2, float %f5, double *%p) {
+ %r = call double @floatarg(double %d2, float %f1, double %d2, double %d2,
+ double %d2, float %f1)
+ store double %r, double* %p
+ ret void
+}
+
+;; i64 arguments should effectively work the same as double: split
+;; into two locations. This is different for little-endian vs big
+;; endian, since the 64-bit math needs to be split
+; CHECK-LABEL: i64arg:
+; CHECK: save %sp, -96, %sp
+; CHECK-BE: ld [%fp+100], %g2
+; CHECK-BE-NEXT: ld [%fp+96], %g3
+; CHECK-BE-NEXT: ld [%fp+92], %g4
+; CHECK-BE-NEXT: addcc %i1, %i2, %i1
+; CHECK-BE-NEXT: addxcc %i0, 0, %i0
+; CHECK-BE-NEXT: addcc %i4, %i1, %i1
+; CHECK-BE-NEXT: addxcc %i3, %i0, %i0
+; CHECK-BE-NEXT: addcc %g4, %i1, %i1
+; CHECK-BE-NEXT: ld [%fp+104], %i2
+; CHECK-BE-NEXT: addxcc %i5, %i0, %i0
+; CHECK-BE-NEXT: addcc %g2, %i1, %i1
+; CHECK-BE-NEXT: addxcc %g3, %i0, %i0
+; CHECK-BE-NEXT: addcc %i2, %i1, %i1
+; CHECK-BE-NEXT: addxcc %i0, 0, %i0
+;
+; CHECK-LE: ld [%fp+96], %g2
+; CHECK-LE-NEXT: ld [%fp+100], %g3
+; CHECK-LE-NEXT: ld [%fp+92], %g4
+; CHECK-LE-NEXT: addcc %i0, %i2, %i0
+; CHECK-LE-NEXT: addxcc %i1, 0, %i1
+; CHECK-LE-NEXT: addcc %i3, %i0, %i0
+; CHECK-LE-NEXT: addxcc %i4, %i1, %i1
+; CHECK-LE-NEXT: addcc %i5, %i0, %i0
+; CHECK-LE-NEXT: ld [%fp+104], %i2
+; CHECK-LE-NEXT: addxcc %g4, %i1, %i1
+; CHECK-LE-NEXT: addcc %g2, %i0, %i0
+; CHECK-LE-NEXT: addxcc %g3, %i1, %i1
+; CHECK-LE-NEXT: addcc %i2, %i0, %i0
+; CHECK-LE-NEXT: addxcc %i1, 0, %i1
+; CHECK-NEXT: restore
+
+
+define i64 @i64arg(i64 %a0, ; %i0,%i1
+ i32 %a1, ; %i2
+ i64 %a2, ; %i3, %i4
+ i64 %a3, ; %i5, [%fp+92] (using 4 bytes)
+ i64 %a4, ; [%fp+96] (using 8 bytes)
+ i32 %a5) { ; [%fp+104] (using 4 bytes)
+ %a1L = zext i32 %a1 to i64
+ %s1 = add i64 %a0, %a1L
+ %s2 = add i64 %a2, %s1
+ %s3 = add i64 %a3, %s2
+ %s4 = add i64 %a4, %s3
+ %a5L = zext i32 %a5 to i64
+ %s5 = add i64 %a5L, %s4
+ ret i64 %s5
+}
+
+; CHECK-LABEL: call_i64arg:
+; CHECK: save %sp, -112, %sp
+; CHECK: st %i0, [%sp+104]
+; CHECK-NEXT: st %i2, [%sp+100]
+; CHECK-NEXT: st %i1, [%sp+96]
+; CHECK-NEXT: st %i2, [%sp+92]
+; CHECK-NEXT: mov %i1, %o0
+; CHECK-NEXT: mov %i2, %o1
+; CHECK-NEXT: mov %i0, %o2
+; CHECK-NEXT: mov %i1, %o3
+; CHECK-NEXT: mov %i2, %o4
+; CHECK-NEXT: mov %i1, %o5
+; CHECK-NEXT: call i64arg
+; CHECK: std %o0, [%i3]
+; CHECK-NEXT: restore
+
+define void @call_i64arg(i32 %a0, i64 %a1, i64* %p) {
+ %r = call i64 @i64arg(i64 %a1, i32 %a0, i64 %a1, i64 %a1, i64 %a1, i32 %a0)
+ store i64 %r, i64* %p
+ ret void
+}
diff --git a/test/CodeGen/SPARC/64abi.ll b/test/CodeGen/SPARC/64abi.ll
index 7c08998a1427..96104ecc3c68 100644
--- a/test/CodeGen/SPARC/64abi.ll
+++ b/test/CodeGen/SPARC/64abi.ll
@@ -1,19 +1,19 @@
; RUN: llc < %s -march=sparcv9 -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck %s
-; CHECK: intarg
+; CHECK-LABEL: intarg:
; The save/restore frame is not strictly necessary here, but we would need to
; refer to %o registers instead.
; CHECK: save %sp, -128, %sp
+; CHECK: ldx [%fp+2231], [[R2:%[gilo][0-7]]]
+; CHECK: ld [%fp+2227], [[R1:%[gilo][0-7]]]
; CHECK: stb %i0, [%i4]
; CHECK: stb %i1, [%i4]
; CHECK: sth %i2, [%i4]
; CHECK: st %i3, [%i4]
; CHECK: stx %i4, [%i4]
; CHECK: st %i5, [%i4]
-; CHECK: ld [%fp+2227], [[R:%[gilo][0-7]]]
-; CHECK: st [[R]], [%i4]
-; CHECK: ldx [%fp+2231], [[R:%[gilo][0-7]]]
-; CHECK: stx [[R]], [%i4]
+; CHECK: st [[R1]], [%i4]
+; CHECK: stx [[R2]], [%i4]
; CHECK: restore
define void @intarg(i8 %a0, ; %i0
i8 %a1, ; %i1
@@ -37,14 +37,14 @@ define void @intarg(i8 %a0, ; %i0
ret void
}
-; CHECK: call_intarg
+; CHECK-LABEL: call_intarg:
; 16 saved + 8 args.
; CHECK: save %sp, -192, %sp
; Sign-extend and store the full 64 bits.
; CHECK: sra %i0, 0, [[R:%[gilo][0-7]]]
-; CHECK: stx [[R]], [%sp+2223]
; Use %o0-%o5 for outgoing arguments
; CHECK: mov 5, %o5
+; CHECK: stx [[R]], [%sp+2223]
; CHECK: call intarg
; CHECK-NOT: add %sp
; CHECK: restore
@@ -53,13 +53,13 @@ define void @call_intarg(i32 %i0, i8* %i1) {
ret void
}
-; CHECK: floatarg
+; CHECK-LABEL: floatarg:
; CHECK: save %sp, -128, %sp
+; CHECK: ld [%fp+2307], [[F:%f[0-9]+]]
; CHECK: fstod %f1,
; CHECK: faddd %f2,
; CHECK: faddd %f4,
; CHECK: faddd %f6,
-; CHECK: ld [%fp+2307], [[F:%f[0-9]+]]
; CHECK: fadds %f31, [[F]]
define double @floatarg(float %a0, ; %f1
double %a1, ; %d2
@@ -89,12 +89,12 @@ define double @floatarg(float %a0, ; %f1
ret double %s17
}
-; CHECK: call_floatarg
+; CHECK-LABEL: call_floatarg:
; CHECK: save %sp, -272, %sp
-; Store 4 bytes, right-aligned in slot.
-; CHECK: st %f1, [%sp+2307]
; Store 8 bytes in full slot.
; CHECK: std %f2, [%sp+2311]
+; Store 4 bytes, right-aligned in slot.
+; CHECK: st %f1, [%sp+2307]
; CHECK: fmovd %f2, %f4
; CHECK: call floatarg
; CHECK-NOT: add %sp
@@ -109,12 +109,12 @@ define void @call_floatarg(float %f1, double %d2, float %f5, double *%p) {
ret void
}
-; CHECK: mixedarg
+; CHECK-LABEL: mixedarg:
+; CHECK: ldx [%fp+2247]
+; CHECK: ldx [%fp+2231]
; CHECK: fstod %f3
; CHECK: faddd %f6
; CHECK: faddd %f16
-; CHECK: ldx [%fp+2231]
-; CHECK: ldx [%fp+2247]
define void @mixedarg(i8 %a0, ; %i0
float %a1, ; %f3
i16 %a2, ; %i2
@@ -133,7 +133,7 @@ define void @mixedarg(i8 %a0, ; %i0
ret void
}
-; CHECK: call_mixedarg
+; CHECK-LABEL: call_mixedarg:
; CHECK: stx %i2, [%sp+2247]
; CHECK: stx %i0, [%sp+2223]
; CHECK: fmovd %f2, %f6
@@ -157,7 +157,7 @@ define void @call_mixedarg(i64 %i0, double %f2, i16* %i2) {
; The inreg attribute is used to indicate 32-bit sized struct elements that
; share an 8-byte slot.
-; CHECK: inreg_fi
+; CHECK-LABEL: inreg_fi:
; CHECK: fstoi %f1
; CHECK: srlx %i0, 32, [[R:%[gilo][0-7]]]
; CHECK: sub [[R]],
@@ -168,7 +168,7 @@ define i32 @inreg_fi(i32 inreg %a0, ; high bits of %i0
ret i32 %rv
}
-; CHECK: call_inreg_fi
+; CHECK-LABEL: call_inreg_fi:
; Allocate space for 6 arguments, even when only 2 are used.
; CHECK: save %sp, -176, %sp
; CHECK: sllx %i1, 32, %o0
@@ -179,7 +179,7 @@ define void @call_inreg_fi(i32* %p, i32 %i1, float %f5) {
ret void
}
-; CHECK: inreg_ff
+; CHECK-LABEL: inreg_ff:
; CHECK: fsubs %f0, %f1, %f0
define float @inreg_ff(float inreg %a0, ; %f0
float inreg %a1) { ; %f1
@@ -187,7 +187,7 @@ define float @inreg_ff(float inreg %a0, ; %f0
ret float %rv
}
-; CHECK: call_inreg_ff
+; CHECK-LABEL: call_inreg_ff:
; CHECK: fmovs %f3, %f0
; CHECK: fmovs %f5, %f1
; CHECK: call inreg_ff
@@ -196,7 +196,7 @@ define void @call_inreg_ff(i32* %p, float %f3, float %f5) {
ret void
}
-; CHECK: inreg_if
+; CHECK-LABEL: inreg_if:
; CHECK: fstoi %f0
; CHECK: sub %i0
define i32 @inreg_if(float inreg %a0, ; %f0
@@ -206,7 +206,7 @@ define i32 @inreg_if(float inreg %a0, ; %f0
ret i32 %rv
}
-; CHECK: call_inreg_if
+; CHECK-LABEL: call_inreg_if:
; CHECK: fmovs %f3, %f0
; CHECK: mov %i2, %o0
; CHECK: call inreg_if
@@ -216,7 +216,7 @@ define void @call_inreg_if(i32* %p, float %f3, i32 %i2) {
}
; The frontend shouldn't do this. Just pass i64 instead.
-; CHECK: inreg_ii
+; CHECK-LABEL: inreg_ii:
; CHECK: srlx %i0, 32, [[R:%[gilo][0-7]]]
; CHECK: sub %i0, [[R]], %i0
define i32 @inreg_ii(i32 inreg %a0, ; high bits of %i0
@@ -225,7 +225,7 @@ define i32 @inreg_ii(i32 inreg %a0, ; high bits of %i0
ret i32 %rv
}
-; CHECK: call_inreg_ii
+; CHECK-LABEL: call_inreg_ii:
; CHECK: srl %i2, 0, [[R2:%[gilo][0-7]]]
; CHECK: sllx %i1, 32, [[R1:%[gilo][0-7]]]
; CHECK: or [[R1]], [[R2]], %o0
@@ -236,7 +236,7 @@ define void @call_inreg_ii(i32* %p, i32 %i1, i32 %i2) {
}
; Structs up to 32 bytes in size can be returned in registers.
-; CHECK: ret_i64_pair
+; CHECK-LABEL: ret_i64_pair:
; CHECK: ldx [%i2], %i0
; CHECK: ldx [%i3], %i1
define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) {
@@ -248,7 +248,7 @@ define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) {
ret { i64, i64 } %rv2
}
-; CHECK: call_ret_i64_pair
+; CHECK-LABEL: call_ret_i64_pair:
; CHECK: call ret_i64_pair
; CHECK: stx %o0, [%i0]
; CHECK: stx %o1, [%i0]
@@ -263,7 +263,7 @@ define void @call_ret_i64_pair(i64* %i0) {
}
; This is not a C struct, the i32 member uses 8 bytes, but the float only 4.
-; CHECK: ret_i32_float_pair
+; CHECK-LABEL: ret_i32_float_pair:
; CHECK: ld [%i2], %i0
; CHECK: ld [%i3], %f2
define { i32, float } @ret_i32_float_pair(i32 %a0, i32 %a1,
@@ -276,7 +276,7 @@ define { i32, float } @ret_i32_float_pair(i32 %a0, i32 %a1,
ret { i32, float } %rv2
}
-; CHECK: call_ret_i32_float_pair
+; CHECK-LABEL: call_ret_i32_float_pair:
; CHECK: call ret_i32_float_pair
; CHECK: st %o0, [%i0]
; CHECK: st %f2, [%i1]
@@ -291,10 +291,10 @@ define void @call_ret_i32_float_pair(i32* %i0, float* %i1) {
}
; This is a C struct, each member uses 4 bytes.
-; CHECK: ret_i32_float_packed
+; CHECK-LABEL: ret_i32_float_packed:
; CHECK: ld [%i2], [[R:%[gilo][0-7]]]
-; CHECK: sllx [[R]], 32, %i0
; CHECK: ld [%i3], %f1
+; CHECK: sllx [[R]], 32, %i0
define inreg { i32, float } @ret_i32_float_packed(i32 %a0, i32 %a1,
i32* %p, float* %q) {
%r1 = load i32, i32* %p
@@ -305,7 +305,7 @@ define inreg { i32, float } @ret_i32_float_packed(i32 %a0, i32 %a1,
ret { i32, float } %rv2
}
-; CHECK: call_ret_i32_float_packed
+; CHECK-LABEL: call_ret_i32_float_packed:
; CHECK: call ret_i32_float_packed
; CHECK: srlx %o0, 32, [[R:%[gilo][0-7]]]
; CHECK: st [[R]], [%i0]
@@ -322,7 +322,7 @@ define void @call_ret_i32_float_packed(i32* %i0, float* %i1) {
; The C frontend should use i64 to return { i32, i32 } structs, but verify that
; we don't miscompile thi case where both struct elements are placed in %i0.
-; CHECK: ret_i32_packed
+; CHECK-LABEL: ret_i32_packed:
; CHECK: ld [%i2], [[R1:%[gilo][0-7]]]
; CHECK: ld [%i3], [[R2:%[gilo][0-7]]]
; CHECK: sllx [[R2]], 32, [[R3:%[gilo][0-7]]]
@@ -337,7 +337,7 @@ define inreg { i32, i32 } @ret_i32_packed(i32 %a0, i32 %a1,
ret { i32, i32 } %rv2
}
-; CHECK: call_ret_i32_packed
+; CHECK-LABEL: call_ret_i32_packed:
; CHECK: call ret_i32_packed
; CHECK: srlx %o0, 32, [[R:%[gilo][0-7]]]
; CHECK: st [[R]], [%i0]
@@ -353,31 +353,31 @@ define void @call_ret_i32_packed(i32* %i0, i32* %i1) {
}
; The return value must be sign-extended to 64 bits.
-; CHECK: ret_sext
+; CHECK-LABEL: ret_sext:
; CHECK: sra %i0, 0, %i0
define signext i32 @ret_sext(i32 %a0) {
ret i32 %a0
}
-; CHECK: ret_zext
+; CHECK-LABEL: ret_zext:
; CHECK: srl %i0, 0, %i0
define zeroext i32 @ret_zext(i32 %a0) {
ret i32 %a0
}
-; CHECK: ret_nosext
+; CHECK-LABEL: ret_nosext:
; CHECK-NOT: sra
define signext i32 @ret_nosext(i32 signext %a0) {
ret i32 %a0
}
-; CHECK: ret_nozext
+; CHECK-LABEL: ret_nozext:
; CHECK-NOT: srl
define signext i32 @ret_nozext(i32 signext %a0) {
ret i32 %a0
}
-; CHECK-LABEL: test_register_directive
+; CHECK-LABEL: test_register_directive:
; CHECK: .register %g2, #scratch
; CHECK: .register %g3, #scratch
; CHECK: add %i0, 2, %g2
@@ -391,7 +391,7 @@ entry:
ret i32 %2
}
-; CHECK-LABEL: test_large_stack
+; CHECK-LABEL: test_large_stack:
; CHECK: sethi 16, %g1
; CHECK: xor %g1, -176, %g1
@@ -412,7 +412,7 @@ entry:
declare i32 @use_buf(i32, i8*)
-; CHECK-LABEL: test_fp128_args
+; CHECK-LABEL: test_fp128_args:
; CHECK-DAG: std %f0, [%fp+{{.+}}]
; CHECK-DAG: std %f2, [%fp+{{.+}}]
; CHECK-DAG: std %f6, [%fp+{{.+}}]
@@ -428,7 +428,7 @@ entry:
declare i64 @receive_fp128(i64 %a, ...)
-; CHECK-LABEL: test_fp128_variable_args
+; CHECK-LABEL: test_fp128_variable_args:
; CHECK-DAG: std %f4, [%sp+[[Offset0:[0-9]+]]]
; CHECK-DAG: std %f6, [%sp+[[Offset1:[0-9]+]]]
; CHECK-DAG: ldx [%sp+[[Offset0]]], %o2
@@ -440,7 +440,7 @@ entry:
ret i64 %0
}
-; CHECK-LABEL: test_call_libfunc
+; CHECK-LABEL: test_call_libfunc:
; CHECK: st %f1, [%fp+[[Offset0:[0-9]+]]]
; CHECK: fmovs %f3, %f1
; CHECK: call cosf
diff --git a/test/CodeGen/SPARC/basictest.ll b/test/CodeGen/SPARC/basictest.ll
index 3792100b2e63..889f5144413f 100644
--- a/test/CodeGen/SPARC/basictest.ll
+++ b/test/CodeGen/SPARC/basictest.ll
@@ -71,12 +71,12 @@ define i64 @signed_multiply_32x32_64(i32 %a, i32 %b) {
}
; CHECK-LABEL: unsigned_multiply_32x32_64:
-; CHECK: umul %o0, %o1, %o2
-; CHECK: rd %y, %o2
;FIXME: the smul in the output is totally redundant and should not there.
-; CHECK: smul %o0, %o1, %o1
+; CHECK: smul %o0, %o1, %o2
+; CHECK: umul %o0, %o1, %o0
+; CHECK: rd %y, %o0
; CHECK: retl
-; CHECK: mov %o2, %o0
+; CHECK: mov %o2, %o1
define i64 @unsigned_multiply_32x32_64(i32 %a, i32 %b) {
%xa = zext i32 %a to i64
%xb = zext i32 %b to i64
@@ -84,3 +84,16 @@ define i64 @unsigned_multiply_32x32_64(i32 %a, i32 %b) {
ret i64 %r
}
+; CHECK-LABEL: load_store_64bit:
+; CHECK: ldd [%o0], %o2
+; CHECK: addcc %o3, 3, %o5
+; CHECK: addxcc %o2, 0, %o4
+; CHECK: retl
+; CHECK: std %o4, [%o1]
+define void @load_store_64bit(i64* %x, i64* %y) {
+entry:
+ %0 = load i64, i64* %x
+ %add = add nsw i64 %0, 3
+ store i64 %add, i64* %y
+ ret void
+}
diff --git a/test/CodeGen/SPARC/float-constants.ll b/test/CodeGen/SPARC/float-constants.ll
new file mode 100644
index 000000000000..b3686ebdf440
--- /dev/null
+++ b/test/CodeGen/SPARC/float-constants.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=sparc | FileCheck %s
+
+; TODO: actually fix the codegen to be optimal. At least we don't
+; crash for now, though...
+
+;; Bitcast should not do a runtime conversion, but rather emit a
+;; constant into integer registers directly.
+
+; CHECK-LABEL: bitcast:
+; TODO-CHECK: sethi 1049856, %o0
+; TODO-CHECK: sethi 0, %o1
+define <2 x i32> @bitcast() {
+ %1 = bitcast double 5.0 to <2 x i32>
+ ret <2 x i32> %1
+}
+
+;; Same thing for a call using a double (which gets passed in integer
+;; registers)
+
+; CHECK-LABEL: test_call
+; TODO-CHECK: sethi 1049856, %o0
+; TODO-CHECK: sethi 0, %o1
+declare void @a(double)
+define void @test_call() {
+ call void @a(double 5.0)
+ ret void
+}
+
+;; And for a libcall emitted from the pow intrinsic. (libcall
+;; emission happens after SelectionDAG type legalization, so is a bit
+;; different than a normal function call. This was crashing before,
+;; due to an earlier broken workaround for this issue.)
+
+; CHECK-LABEL: test_intrins_call
+; TODO-CHECK: sethi 1049856, %o0
+; TODO-CHECK: sethi 0, %o1
+declare double @llvm.pow.f64(double, double)
+define double @test_intrins_call() {
+ %1 = call double @llvm.pow.f64(double 2.0, double 2.0)
+ ret double %1
+}
diff --git a/test/CodeGen/SPARC/float.ll b/test/CodeGen/SPARC/float.ll
index d7a79cb05a82..c4cc04420ad7 100644
--- a/test/CodeGen/SPARC/float.ll
+++ b/test/CodeGen/SPARC/float.ll
@@ -53,20 +53,18 @@ declare double @get_double()
declare double @llvm.fabs.f64(double) nounwind readonly
; V8-LABEL: test_v9_floatreg:
-; V8: fsubd {{.+}}, {{.+}}, {{.+}}
-; V8: faddd {{.+}}, {{.+}}, [[R:%f(((1|2)?(0|2|4|6|8))|30)]]
+; V8: fsubd {{.+}}, {{.+}}, [[R:%f(((1|2)?(0|2|4|6|8))|30)]]
; V8: std [[R]], [%{{.+}}]
; V8: ldd [%{{.+}}], %f0
+; V8: faddd {{.+}}, {{.+}}, {{.+}}
; V9-LABEL: test_v9_floatreg:
; V9: fsubd {{.+}}, {{.+}}, {{.+}}
-; V9: faddd {{.+}}, {{.+}}, [[R:%f((3(2|4|6|8))|((4|5)(0|2|4|6|8))|(60|62))]]
-; V9: fmovd [[R]], %f0
+; V9: faddd {{.+}}, {{.+}}, %f0
; SPARC64-LABEL: test_v9_floatreg:
; SPARC64: fsubd {{.+}}, {{.+}}, {{.+}}
-; SPARC64: faddd {{.+}}, {{.+}}, [[R:%f((3(2|4|6|8))|((4|5)(0|2|4|6|8))|(60|62))]]
-; SPARC64: fmovd [[R]], %f0
+; SPARC64: faddd {{.+}}, {{.+}}, %f0
define double @test_v9_floatreg() {
entry:
diff --git a/test/CodeGen/SPARC/fp128.ll b/test/CodeGen/SPARC/fp128.ll
index c864cb7d599b..e0eaf93a733e 100644
--- a/test/CodeGen/SPARC/fp128.ll
+++ b/test/CodeGen/SPARC/fp128.ll
@@ -45,14 +45,14 @@ entry:
; HARD: std %f{{.+}}, [%[[S1:.+]]]
; HARD-DAG: ldd [%[[S0]]], %f{{.+}}
; HARD-DAG: ldd [%[[S1]]], %f{{.+}}
-; HARD: jmp %o7+12
+; HARD: jmp {{%[oi]7}}+12
; SOFT-LABEL: f128_spill
; SOFT: std %f{{.+}}, [%[[S0:.+]]]
; SOFT: std %f{{.+}}, [%[[S1:.+]]]
; SOFT-DAG: ldd [%[[S0]]], %f{{.+}}
; SOFT-DAG: ldd [%[[S1]]], %f{{.+}}
-; SOFT: jmp %o7+12
+; SOFT: jmp {{%[oi]7}}+12
define void @f128_spill(fp128* noalias sret %scalar.result, fp128* byval %a) {
entry:
diff --git a/test/CodeGen/SPARC/inlineasm.ll b/test/CodeGen/SPARC/inlineasm.ll
index 526cde8de8b4..d54c5c6bc780 100644
--- a/test/CodeGen/SPARC/inlineasm.ll
+++ b/test/CodeGen/SPARC/inlineasm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=sparc -no-integrated-as <%s | FileCheck %s
+; RUN: llc -march=sparc <%s | FileCheck %s
; CHECK-LABEL: test_constraint_r
; CHECK: add %o1, %o0, %o0
@@ -8,7 +8,7 @@ entry:
ret i32 %0
}
-; CHECK-LABEL: test_constraint_I
+; CHECK-LABEL: test_constraint_I:
; CHECK: add %o0, 1023, %o0
define i32 @test_constraint_I(i32 %a) {
entry:
@@ -16,7 +16,7 @@ entry:
ret i32 %0
}
-; CHECK-LABEL: test_constraint_I_neg
+; CHECK-LABEL: test_constraint_I_neg:
; CHECK: add %o0, -4096, %o0
define i32 @test_constraint_I_neg(i32 %a) {
entry:
@@ -24,7 +24,7 @@ entry:
ret i32 %0
}
-; CHECK-LABEL: test_constraint_I_largeimm
+; CHECK-LABEL: test_constraint_I_largeimm:
; CHECK: sethi 9, [[R0:%[gilo][0-7]]]
; CHECK: or [[R0]], 784, [[R1:%[gilo][0-7]]]
; CHECK: add %o0, [[R1]], %o0
@@ -34,12 +34,51 @@ entry:
ret i32 %0
}
-; CHECK-LABEL: test_constraint_reg
+; CHECK-LABEL: test_constraint_reg:
; CHECK: ldda [%o1] 43, %g2
-; CHECK: ldda [%o1] 43, %g3
+; CHECK: ldda [%o1] 43, %g4
define void @test_constraint_reg(i32 %s, i32* %ptr) {
entry:
%0 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={r2},r,n"(i32* %ptr, i32 43)
- %1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g3},r,n"(i32* %ptr, i32 43)
+ %1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g4},r,n"(i32* %ptr, i32 43)
ret void
}
+
+;; Ensure that i64 args to asm are allocated to the IntPair register class.
+;; Also checks that register renaming for leaf proc works.
+; CHECK-LABEL: test_constraint_r_i64:
+; CHECK: mov %o0, %o5
+; CHECK: sra %o5, 31, %o4
+; CHECK: std %o4, [%o1]
+define i32 @test_constraint_r_i64(i32 %foo, i64* %out, i32 %o) {
+entry:
+ %conv = sext i32 %foo to i64
+ tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out)
+ ret i32 %o
+}
+
+;; Same test without leaf-proc opt
+; CHECK-LABEL: test_constraint_r_i64_noleaf:
+; CHECK: mov %i0, %i5
+; CHECK: sra %i5, 31, %i4
+; CHECK: std %i4, [%i1]
+define i32 @test_constraint_r_i64_noleaf(i32 %foo, i64* %out, i32 %o) #0 {
+entry:
+ %conv = sext i32 %foo to i64
+ tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out)
+ ret i32 %o
+}
+attributes #0 = { "no-frame-pointer-elim"="true" }
+
+;; Ensures that tied in and out gets allocated properly.
+; CHECK-LABEL: test_i64_inout:
+; CHECK: sethi 0, %o2
+; CHECK: mov 5, %o3
+; CHECK: xor %o2, %g0, %o2
+; CHECK: mov %o2, %o0
+; CHECK: ret
+define i64 @test_i64_inout() {
+entry:
+ %0 = call i64 asm sideeffect "xor $1, %g0, $0", "=r,0,~{i1}"(i64 5);
+ ret i64 %0
+}
diff --git a/test/CodeGen/SPARC/missing-sret.ll b/test/CodeGen/SPARC/missing-sret.ll
new file mode 100644
index 000000000000..683d840bd250
--- /dev/null
+++ b/test/CodeGen/SPARC/missing-sret.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=sparc -filetype=obj < %s > /dev/null 2> %t2
+
+define void @mul_double_cc({ double, double }* noalias sret %agg.result, double %a, double %b, double %c, double %d) {
+entry:
+ call void @__muldc3({ double, double }* sret %agg.result, double %a, double %b, double %c, double %d)
+ ret void
+}
+
+declare void @__muldc3({ double, double }*, double, double, double, double)
diff --git a/test/CodeGen/SPARC/reserved-regs.ll b/test/CodeGen/SPARC/reserved-regs.ll
new file mode 100644
index 000000000000..fe208015827b
--- /dev/null
+++ b/test/CodeGen/SPARC/reserved-regs.ll
@@ -0,0 +1,135 @@
+; RUN: llc -march=sparc < %s | FileCheck %s
+
+@g = common global [32 x i32] zeroinitializer, align 16
+@h = common global [16 x i64] zeroinitializer, align 16
+
+;; Ensures that we don't use registers which are supposed to be reserved.
+
+; CHECK-LABEL: use_all_i32_regs:
+; CHECK-NOT: %g0
+; CHECK-NOT: %g1
+; CHECK-NOT: %g5
+; CHECK-NOT: %g6
+; CHECK-NOT: %g7
+; CHECK-NOT: %o6
+; CHECK-NOT: %i6
+; CHECK-NOT: %i7
+; CHECK: ret
+define void @use_all_i32_regs() {
+entry:
+ %0 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16
+ %1 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4
+ %2 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8
+ %3 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4
+ %4 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16
+ %5 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4
+ %6 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8
+ %7 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4
+ %8 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16
+ %9 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4
+ %10 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8
+ %11 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4
+ %12 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16
+ %13 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4
+ %14 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8
+ %15 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4
+ %16 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16
+ %17 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4
+ %18 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8
+ %19 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4
+ %20 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16
+ %21 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4
+ %22 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8
+ %23 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4
+ %24 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16
+ %25 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4
+ %26 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8
+ %27 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4
+ %28 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16
+ %29 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4
+ %30 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8
+ %31 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4
+ store volatile i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16
+ store volatile i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4
+ store volatile i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8
+ store volatile i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4
+ store volatile i32 %5, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16
+ store volatile i32 %6, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4
+ store volatile i32 %7, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8
+ store volatile i32 %8, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4
+ store volatile i32 %9, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16
+ store volatile i32 %10, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4
+ store volatile i32 %11, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8
+ store volatile i32 %12, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4
+ store volatile i32 %13, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16
+ store volatile i32 %14, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4
+ store volatile i32 %15, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8
+ store volatile i32 %16, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4
+ store volatile i32 %17, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16
+ store volatile i32 %18, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4
+ store volatile i32 %19, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8
+ store volatile i32 %20, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4
+ store volatile i32 %21, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16
+ store volatile i32 %22, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4
+ store volatile i32 %23, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8
+ store volatile i32 %24, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4
+ store volatile i32 %25, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16
+ store volatile i32 %26, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4
+ store volatile i32 %27, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8
+ store volatile i32 %28, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4
+ store volatile i32 %29, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16
+ store volatile i32 %30, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4
+ store volatile i32 %31, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8
+ store volatile i32 %0, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4
+ ret void
+}
+
+
+; CHECK-LABEL: use_all_i64_regs:
+; CHECK-NOT: %g0
+; CHECK-NOT: %g1
+; CHECK-NOT: %g4
+; CHECK-NOT: %g5
+; CHECK-NOT: %g6
+; CHECK-NOT: %g7
+; CHECK-NOT: %o6
+; CHECK-NOT: %o7
+; CHECK-NOT: %i6
+; CHECK-NOT: %i7
+; CHECK: ret
+define void @use_all_i64_regs() {
+entry:
+ %0 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16
+ %1 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4
+ %2 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8
+ %3 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4
+ %4 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16
+ %5 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4
+ %6 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8
+ %7 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4
+ %8 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16
+ %9 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4
+ %10 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8
+ %11 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4
+ %12 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16
+ %13 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4
+ %14 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8
+ %15 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4
+ store volatile i64 %1, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16
+ store volatile i64 %2, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4
+ store volatile i64 %3, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8
+ store volatile i64 %4, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4
+ store volatile i64 %5, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16
+ store volatile i64 %6, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4
+ store volatile i64 %7, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8
+ store volatile i64 %8, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4
+ store volatile i64 %9, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16
+ store volatile i64 %10, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4
+ store volatile i64 %11, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8
+ store volatile i64 %12, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4
+ store volatile i64 %13, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16
+ store volatile i64 %14, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4
+ store volatile i64 %15, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8
+ store volatile i64 %0, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4
+ ret void
+}
diff --git a/test/CodeGen/SPARC/select-mask.ll b/test/CodeGen/SPARC/select-mask.ll
new file mode 100644
index 000000000000..2e69a3b9be53
--- /dev/null
+++ b/test/CodeGen/SPARC/select-mask.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=sparc < %s
+
+;; getBooleanContents on Sparc used to claim that no bits mattered
+;; other than the first for SELECT. Thus, the 'trunc' got eliminated
+;; as redundant. But, cmp does NOT ignore the other bits!
+
+; CHECK-LABEL: select_mask:
+; CHECK: ldub [%o0], [[R:%[goli][0-7]]]
+; CHECK: and [[R]], 1, [[V:%[goli][0-7]]]
+; CHECK: cmp [[V]], 0
+define i32 @select_mask(i8* %this) {
+entry:
+ %bf.load2 = load i8, i8* %this, align 4
+ %bf.cast5 = trunc i8 %bf.load2 to i1
+ %cond = select i1 %bf.cast5, i32 2, i32 0
+ ret i32 %cond
+}
diff --git a/test/CodeGen/SPARC/spill.ll b/test/CodeGen/SPARC/spill.ll
new file mode 100644
index 000000000000..a461de9640bd
--- /dev/null
+++ b/test/CodeGen/SPARC/spill.ll
@@ -0,0 +1,64 @@
+; RUN: llc -march=sparc < %s | FileCheck %s
+
+;; Ensure that spills and reloads work for various types on
+;; sparcv8.
+
+;; For i32/i64 tests, use an asm statement which clobbers most
+;; registers to ensure the spill will happen.
+
+; CHECK-LABEL: test_i32_spill:
+; CHECK: and %i0, %i1, %o0
+; CHECK: st %o0, [%fp+{{.+}}]
+; CHECK: add %o0, %o0, %g0
+; CHECK: ld [%fp+{{.+}}, %i0
+define i32 @test_i32_spill(i32 %a, i32 %b) {
+entry:
+ %r0 = and i32 %a, %b
+ ; The clobber list has all registers except g0/o0. (Only o0 is usable.)
+ %0 = call i32 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6},~{o7}"(i32 %r0)
+ ret i32 %r0
+}
+
+; CHECK-LABEL: test_i64_spill:
+; CHECK: and %i0, %i2, %o0
+; CHECK: and %i1, %i3, %o1
+; CHECK: std %o0, [%fp+{{.+}}]
+; CHECK: add %o0, %o0, %g0
+; CHECK: ldd [%fp+{{.+}}, %i0
+define i64 @test_i64_spill(i64 %a, i64 %b) {
+entry:
+ %r0 = and i64 %a, %b
+ ; The clobber list has all registers except g0,g1,o0,o1. (Only o0/o1 are a usable pair)
+ ; So, o0/o1 must be used.
+ %0 = call i64 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o2},~{o3},~{o4},~{o5},~{o7}"(i64 %r0)
+ ret i64 %r0
+}
+
+;; For float/double tests, a call is a suitable clobber as *all* FPU
+;; registers are caller-save on sparcv8.
+
+; CHECK-LABEL: test_float_spill:
+; CHECK: fadds %f1, %f0, [[R:%[f][0-31]]]
+; CHECK: st [[R]], [%fp+{{.+}}]
+; CHECK: call
+; CHECK: ld [%fp+{{.+}}, %f0
+declare float @foo_float(float)
+define float @test_float_spill(float %a, float %b) {
+entry:
+ %r0 = fadd float %a, %b
+ %0 = call float @foo_float(float %r0)
+ ret float %r0
+}
+
+; CHECK-LABEL: test_double_spill:
+; CHECK: faddd %f2, %f0, [[R:%[f][0-31]]]
+; CHECK: std [[R]], [%fp+{{.+}}]
+; CHECK: call
+; CHECK: ldd [%fp+{{.+}}, %f0
+declare double @foo_double(double)
+define double @test_double_spill(double %a, double %b) {
+entry:
+ %r0 = fadd double %a, %b
+ %0 = call double @foo_double(double %r0)
+ ret double %r0
+}
diff --git a/test/CodeGen/SPARC/stack-align.ll b/test/CodeGen/SPARC/stack-align.ll
new file mode 100644
index 000000000000..2554ee821fcd
--- /dev/null
+++ b/test/CodeGen/SPARC/stack-align.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=sparc < %s | FileCheck %s
+declare void @stack_realign_helper(i32 %a, i32* %b)
+
+@foo = global i32 1
+
+;; This is a function where we have a local variable of 64-byte
+;; alignment. We want to see that the stack is aligned (the initial
+;; andn), that the local var is accessed via stack pointer (to %o0), and that
+;; the argument is accessed via frame pointer not stack pointer (to %o1).
+
+;; CHECK-LABEL: stack_realign:
+;; CHECK: andn %sp, 63, %sp
+;; CHECK-NEXT: ld [%fp+92], %o0
+;; CHECK-NEXT: call stack_realign_helper
+;; CHECK-NEXT: add %sp, 128, %o1
+
+define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) {
+entry:
+ %aligned = alloca i32, align 64
+ call void @stack_realign_helper(i32 %g, i32* %aligned)
+ ret void
+}
diff --git a/test/CodeGen/SPARC/tls.ll b/test/CodeGen/SPARC/tls.ll
index a70637b283f5..8ebd36833ba1 100644
--- a/test/CodeGen/SPARC/tls.ll
+++ b/test/CodeGen/SPARC/tls.ll
@@ -103,10 +103,10 @@ entry:
; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x4
; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_PC10 _GLOBAL_OFFSET_TABLE_ 0x8
; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LDO_HIX22 local_symbol 0x0
-; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LDO_LOX10 local_symbol 0x0
; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LDM_HI22 local_symbol 0x0
; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LDM_LO10 local_symbol 0x0
; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LDM_ADD local_symbol 0x0
+; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LDO_LOX10 local_symbol 0x0
; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LDM_CALL local_symbol 0x0
; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LDO_ADD local_symbol 0x0
; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x4
diff --git a/test/CodeGen/SPARC/varargs.ll b/test/CodeGen/SPARC/varargs.ll
index c2d1e98b698b..576acc284fb9 100644
--- a/test/CodeGen/SPARC/varargs.ll
+++ b/test/CodeGen/SPARC/varargs.ll
@@ -67,8 +67,8 @@ declare void @llvm.va_start(i8*)
; CHECK: call_1d
; The fixed-arg double goes in %d2, the second goes in %o2.
; CHECK: sethi 1048576
-; CHECK: , %o2
; CHECK: , %f2
+; CHECK: , %o2
define i32 @call_1d() #0 {
entry:
%call = call double (i8*, double, ...) @varargsfunc(i8* undef, double 1.000000e+00, double 2.000000e+00)
diff --git a/test/CodeGen/SystemZ/alloca-03.ll b/test/CodeGen/SystemZ/alloca-03.ll
new file mode 100644
index 000000000000..ece1198ad62f
--- /dev/null
+++ b/test/CodeGen/SystemZ/alloca-03.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Allocate 8 bytes, no need to align stack.
+define void @f0() {
+; CHECK-LABEL: f0:
+; CHECK: aghi %r15, -168
+; CHECK-NOT: nil
+; CHECK: mvghi 160(%r15), 10
+; CHECK: aghi %r15, 168
+ %x = alloca i64
+ store volatile i64 10, i64* %x
+ ret void
+}
+
+; Allocate %len * 8, no need to align stack.
+define void @f1(i64 %len) {
+; CHECK-LABEL: f1:
+; CHECK: sllg %r0, %r2, 3
+; CHECK: lgr %r1, %r15
+; CHECK: sgr %r1, %r0
+; CHECK-NOT: ngr
+; CHECK: lgr %r15, %r1
+; CHECK: la %r1, 160(%r1)
+; CHECK: mvghi 0(%r1), 10
+ %x = alloca i64, i64 %len
+ store volatile i64 10, i64* %x
+ ret void
+}
+
+; Static alloca, align 128.
+define void @f2() {
+; CHECK-LABEL: f2:
+; CHECK: aghi %r1, -128
+; CHECK: lgr %r15, %r1
+; CHECK: la %r1, 280(%r1)
+; CHECK: nill %r1, 65408
+; CHECK: mvghi 0(%r1), 10
+ %x = alloca i64, i64 1, align 128
+ store volatile i64 10, i64* %x, align 128
+ ret void
+}
+
+; Dynamic alloca, align 128.
+define void @f3(i64 %len) {
+; CHECK-LABEL: f3:
+; CHECK: sllg %r1, %r2, 3
+; CHECK: la %r0, 120(%r1)
+; CHECK: lgr %r1, %r15
+; CHECK: sgr %r1, %r0
+; CHECK: lgr %r15, %r1
+; CHECK: la %r1, 280(%r1)
+; CHECK: nill %r1, 65408
+; CHECK: mvghi 0(%r1), 10
+ %x = alloca i64, i64 %len, align 128
+ store volatile i64 10, i64* %x, align 128
+ ret void
+}
+
+; Static alloca w/out alignment - part of frame.
+define void @f4() {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r15, -168
+; CHECK: mvhi 164(%r15), 10
+; CHECK: aghi %r15, 168
+ %x = alloca i32
+ store volatile i32 10, i32* %x
+ ret void
+}
+
+; Static alloca of one i32, aligned by 128.
+define void @f5() {
+; CHECK-LABEL: f5:
+
+; CHECK: lgr %r1, %r15
+; CHECK: aghi %r1, -128
+; CHECK: lgr %r15, %r1
+; CHECK: la %r1, 280(%r1)
+; CHECK: nill %r1, 65408
+; CHECK: mvhi 0(%r1), 10
+ %x = alloca i32, i64 1, align 128
+ store volatile i32 10, i32* %x
+ ret void
+}
+
diff --git a/test/CodeGen/SystemZ/alloca-04.ll b/test/CodeGen/SystemZ/alloca-04.ll
new file mode 100644
index 000000000000..86c77493d3e9
--- /dev/null
+++ b/test/CodeGen/SystemZ/alloca-04.ll
@@ -0,0 +1,14 @@
+; Check the "no-realign-stack" function attribute. We should get a warning.
+
+; RUN: llc < %s -mtriple=s390x-linux-gnu -debug-only=codegen 2>&1 | \
+; RUN: FileCheck %s
+; REQUIRES: asserts
+
+define void @f6() "no-realign-stack" {
+ %x = alloca i64, i64 1, align 128
+ store volatile i64 10, i64* %x, align 128
+ ret void
+}
+
+; CHECK: Warning: requested alignment 128 exceeds the stack alignment 8
+; CHECK-NOT: nill
diff --git a/test/CodeGen/SystemZ/args-01.ll b/test/CodeGen/SystemZ/args-01.ll
index 3105503eda53..113110faf341 100644
--- a/test/CodeGen/SystemZ/args-01.ll
+++ b/test/CodeGen/SystemZ/args-01.ll
@@ -30,12 +30,12 @@ define void @foo() {
;
; CHECK-FLOAT-LABEL: foo:
; CHECK-FLOAT: lzer %f0
-; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: lcdfr %f4, %f0
; CHECK-FLOAT: brasl %r14, bar@PLT
;
; CHECK-DOUBLE-LABEL: foo:
; CHECK-DOUBLE: lzdr %f2
-; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: lcdfr %f6, %f2
; CHECK-DOUBLE: brasl %r14, bar@PLT
;
; CHECK-FP128-1-LABEL: foo:
diff --git a/test/CodeGen/SystemZ/args-02.ll b/test/CodeGen/SystemZ/args-02.ll
index 8686df88e679..89b080e821bf 100644
--- a/test/CodeGen/SystemZ/args-02.ll
+++ b/test/CodeGen/SystemZ/args-02.ll
@@ -31,12 +31,12 @@ define void @foo() {
;
; CHECK-FLOAT-LABEL: foo:
; CHECK-FLOAT: lzer %f0
-; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: lcdfr %f4, %f0
; CHECK-FLOAT: brasl %r14, bar@PLT
;
; CHECK-DOUBLE-LABEL: foo:
; CHECK-DOUBLE: lzdr %f2
-; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: lcdfr %f6, %f2
; CHECK-DOUBLE: brasl %r14, bar@PLT
;
; CHECK-FP128-1-LABEL: foo:
diff --git a/test/CodeGen/SystemZ/args-03.ll b/test/CodeGen/SystemZ/args-03.ll
index d7d3ea105df7..a52782f4c183 100644
--- a/test/CodeGen/SystemZ/args-03.ll
+++ b/test/CodeGen/SystemZ/args-03.ll
@@ -31,12 +31,12 @@ define void @foo() {
;
; CHECK-FLOAT-LABEL: foo:
; CHECK-FLOAT: lzer %f0
-; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: lcdfr %f4, %f0
; CHECK-FLOAT: brasl %r14, bar@PLT
;
; CHECK-DOUBLE-LABEL: foo:
; CHECK-DOUBLE: lzdr %f2
-; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: lcdfr %f6, %f2
; CHECK-DOUBLE: brasl %r14, bar@PLT
;
; CHECK-FP128-1-LABEL: foo:
diff --git a/test/CodeGen/SystemZ/args-04.ll b/test/CodeGen/SystemZ/args-04.ll
index 48a2cf491049..475cceb106e5 100644
--- a/test/CodeGen/SystemZ/args-04.ll
+++ b/test/CodeGen/SystemZ/args-04.ll
@@ -1,7 +1,7 @@
; Test incoming GPR, FPR and stack arguments when no extension type is given.
; This type of argument is used for passing structures, etc.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s
; Do some arithmetic so that we can see the register being used.
define i8 @f1(i8 %r2) {
diff --git a/test/CodeGen/SystemZ/args-07.ll b/test/CodeGen/SystemZ/args-07.ll
index 29d9b319ffc0..44a31fadd6d2 100644
--- a/test/CodeGen/SystemZ/args-07.ll
+++ b/test/CodeGen/SystemZ/args-07.ll
@@ -1,6 +1,6 @@
; Test multiple return values (LLVM ABI extension)
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs| FileCheck %s
; Up to four integer return values fit into GPRs.
define { i64, i64, i64, i64 } @f1() {
diff --git a/test/CodeGen/SystemZ/asm-17.ll b/test/CodeGen/SystemZ/asm-17.ll
index 533b5e90d62d..acf2aff45429 100644
--- a/test/CodeGen/SystemZ/asm-17.ll
+++ b/test/CodeGen/SystemZ/asm-17.ll
@@ -1,6 +1,7 @@
; Test explicit register names.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -no-integrated-as \
+; RUN: | FileCheck %s
; Test i32 GPRs.
define i32 @f1() {
diff --git a/test/CodeGen/SystemZ/asm-18.ll b/test/CodeGen/SystemZ/asm-18.ll
index 999984be88d4..7909253d188c 100644
--- a/test/CodeGen/SystemZ/asm-18.ll
+++ b/test/CodeGen/SystemZ/asm-18.ll
@@ -1,7 +1,8 @@
; Test high-word operations, using "h" constraints to force a high
; register and "r" constraints to force a low register.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 -no-integrated-as | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z196 \
+; RUN: -no-integrated-as | FileCheck %s
; Test loads and stores involving mixtures of high and low registers.
define void @f1(i32 *%ptr1, i32 *%ptr2) {
diff --git a/test/CodeGen/SystemZ/dag-combine-01.ll b/test/CodeGen/SystemZ/dag-combine-01.ll
new file mode 100644
index 000000000000..a56a118dadaa
--- /dev/null
+++ b/test/CodeGen/SystemZ/dag-combine-01.ll
@@ -0,0 +1,97 @@
+; Test that MergeConsecutiveStores() does not during DAG combining
+; incorrectly drop a chain dependency to a store previously chained to
+; one of two combined loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+@A = common global [2048 x float] zeroinitializer, align 4
+
+; Function Attrs: nounwind
+define signext i32 @main(i32 signext %argc, i8** nocapture readnone %argv) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv24 = phi i64 [ 0, %entry ], [ %indvars.iv.next25, %for.body ]
+ %sum.018 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+ %0 = trunc i64 %indvars.iv24 to i32
+ %conv = sitofp i32 %0 to float
+ %arrayidx = getelementptr inbounds [2048 x float], [2048 x float]* @A, i64 0, i64 %indvars.iv24
+ store float %conv, float* %arrayidx, align 4
+ %add = fadd float %sum.018, %conv
+ %indvars.iv.next25 = add nuw nsw i64 %indvars.iv24, 1
+ %exitcond26 = icmp eq i64 %indvars.iv.next25, 2048
+ br i1 %exitcond26, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ br label %for.body.3.lr.ph.i.preheader
+
+for.body.3.lr.ph.i.preheader: ; preds = %complex_transpose.exit, %for.end
+ %i.116 = phi i32 [ 0, %for.end ], [ %inc9, %complex_transpose.exit ]
+ br label %for.body.3.lr.ph.i
+
+for.body.3.lr.ph.i: ; preds = %for.body.3.lr.ph.i.preheader, %for.inc.40.i
+ %indvars.iv19 = phi i32 [ 1, %for.body.3.lr.ph.i.preheader ], [ %indvars.iv.next20, %for.inc.40.i ]
+ %indvars.iv57.i = phi i64 [ 1, %for.body.3.lr.ph.i.preheader ], [ %indvars.iv.next58.i, %for.inc.40.i ]
+ %1 = shl nsw i64 %indvars.iv57.i, 1
+ %2 = shl nsw i64 %indvars.iv57.i, 6
+ br label %for.body.3.i
+
+for.body.3.i: ; preds = %for.body.3.i, %for.body.3.lr.ph.i
+; CHECK-LABEL: .LBB0_5:
+; CHECK-NOT: stfh %r{{.*}}, 0(%r{{.*}})
+; CHECK: lg %r{{.*}}, -4(%r{{.*}})
+; Overlapping load should go before the store
+ %indvars.iv.i = phi i64 [ 0, %for.body.3.lr.ph.i ], [ %indvars.iv.next.i, %for.body.3.i ]
+ %3 = shl nsw i64 %indvars.iv.i, 6
+ %4 = add nuw nsw i64 %3, %1
+ %arrayidx.i = getelementptr inbounds [2048 x float], [2048 x float]* @A, i64 0, i64 %4
+ %5 = bitcast float* %arrayidx.i to i32*
+ %6 = load i32, i32* %5, align 4
+ %arrayidx9.i = getelementptr inbounds float, float* getelementptr inbounds ([2048 x float], [2048 x float]* @A, i64 0, i64 1), i64 %4
+ %7 = bitcast float* %arrayidx9.i to i32*
+ %8 = load i32, i32* %7, align 4
+ %9 = shl nsw i64 %indvars.iv.i, 1
+ %10 = add nuw nsw i64 %9, %2
+ %arrayidx14.i = getelementptr inbounds [2048 x float], [2048 x float]* @A, i64 0, i64 %10
+ %11 = bitcast float* %arrayidx14.i to i32*
+ %12 = load i32, i32* %11, align 4
+ %arrayidx19.i = getelementptr inbounds float, float* getelementptr inbounds ([2048 x float], [2048 x float]* @A, i64 0, i64 1), i64 %10
+ %13 = bitcast float* %arrayidx19.i to i32*
+ %14 = load i32, i32* %13, align 4
+ store i32 %6, i32* %11, align 4
+ store i32 %8, i32* %13, align 4
+ store i32 %12, i32* %5, align 4
+ store i32 %14, i32* %7, align 4
+ %indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next.i to i32
+ %exitcond21 = icmp eq i32 %lftr.wideiv, %indvars.iv19
+ br i1 %exitcond21, label %for.inc.40.i, label %for.body.3.i
+
+for.inc.40.i: ; preds = %for.body.3.i
+ %indvars.iv.next58.i = add nuw nsw i64 %indvars.iv57.i, 1
+ %indvars.iv.next20 = add nuw nsw i32 %indvars.iv19, 1
+ %exitcond22 = icmp eq i64 %indvars.iv.next58.i, 32
+ br i1 %exitcond22, label %complex_transpose.exit, label %for.body.3.lr.ph.i
+
+complex_transpose.exit: ; preds = %for.inc.40.i
+ %inc9 = add nuw nsw i32 %i.116, 1
+ %exitcond23 = icmp eq i32 %inc9, 10
+ br i1 %exitcond23, label %for.body.14.preheader, label %for.body.3.lr.ph.i.preheader
+
+for.body.14.preheader: ; preds = %complex_transpose.exit
+ br label %for.body.14
+
+for.body.14: ; preds = %for.body.14.preheader, %for.body.14
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body.14 ], [ 0, %for.body.14.preheader ]
+ %sum.115 = phi float [ %add17, %for.body.14 ], [ 0.000000e+00, %for.body.14.preheader ]
+ %arrayidx16 = getelementptr inbounds [2048 x float], [2048 x float]* @A, i64 0, i64 %indvars.iv
+ %15 = load float, float* %arrayidx16, align 4
+ %add17 = fadd float %sum.115, %15
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 2048
+ br i1 %exitcond, label %for.end.20, label %for.body.14
+
+for.end.20: ; preds = %for.body.14
+ ret i32 0
+}
diff --git a/test/CodeGen/SystemZ/fp-abs-01.ll b/test/CodeGen/SystemZ/fp-abs-01.ll
index 3b143d93315b..3bb3ede457f3 100644
--- a/test/CodeGen/SystemZ/fp-abs-01.ll
+++ b/test/CodeGen/SystemZ/fp-abs-01.ll
@@ -7,7 +7,7 @@
declare float @llvm.fabs.f32(float %f)
define float @f1(float %f) {
; CHECK-LABEL: f1:
-; CHECK: lpebr %f0, %f0
+; CHECK: lpdfr %f0, %f0
; CHECK: br %r14
%res = call float @llvm.fabs.f32(float %f)
ret float %res
@@ -17,7 +17,7 @@ define float @f1(float %f) {
declare double @llvm.fabs.f64(double %f)
define double @f2(double %f) {
; CHECK-LABEL: f2:
-; CHECK: lpdbr %f0, %f0
+; CHECK: lpdfr %f0, %f0
; CHECK: br %r14
%res = call double @llvm.fabs.f64(double %f)
ret double %res
diff --git a/test/CodeGen/SystemZ/fp-abs-02.ll b/test/CodeGen/SystemZ/fp-abs-02.ll
index e831ddb86fea..b2d2cfd52b6a 100644
--- a/test/CodeGen/SystemZ/fp-abs-02.ll
+++ b/test/CodeGen/SystemZ/fp-abs-02.ll
@@ -7,7 +7,7 @@
declare float @llvm.fabs.f32(float %f)
define float @f1(float %f) {
; CHECK-LABEL: f1:
-; CHECK: lnebr %f0, %f0
+; CHECK: lndfr %f0, %f0
; CHECK: br %r14
%abs = call float @llvm.fabs.f32(float %f)
%res = fsub float -0.0, %abs
@@ -18,7 +18,7 @@ define float @f1(float %f) {
declare double @llvm.fabs.f64(double %f)
define double @f2(double %f) {
; CHECK-LABEL: f2:
-; CHECK: lndbr %f0, %f0
+; CHECK: lndfr %f0, %f0
; CHECK: br %r14
%abs = call double @llvm.fabs.f64(double %f)
%res = fsub double -0.0, %abs
diff --git a/test/CodeGen/SystemZ/fp-add-02.ll b/test/CodeGen/SystemZ/fp-add-02.ll
index 5be1ad79d453..4f98742197bd 100644
--- a/test/CodeGen/SystemZ/fp-add-02.ll
+++ b/test/CodeGen/SystemZ/fp-add-02.ll
@@ -2,7 +2,7 @@
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
declare double @foo()
; Check register addition.
diff --git a/test/CodeGen/SystemZ/fp-cmp-02.ll b/test/CodeGen/SystemZ/fp-cmp-02.ll
index 94a256777c75..0808ddd8db48 100644
--- a/test/CodeGen/SystemZ/fp-cmp-02.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-02.ll
@@ -3,7 +3,7 @@
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs\
; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
declare double @foo()
@@ -164,8 +164,7 @@ define i64 @f8(i64 %a, i64 %b, double %f) {
; CHECK-SCALAR: ltdbr %f0, %f0
; CHECK-SCALAR-NEXT: je
; CHECK-SCALAR: lgr %r2, %r3
-; CHECK-VECTOR: lzdr %f1
-; CHECK-VECTOR-NEXT: cdbr %f0, %f1
+; CHECK-VECTOR: ltdbr %f0, %f0
; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
%cond = fcmp oeq double %f, 0.0
diff --git a/test/CodeGen/SystemZ/fp-cmp-05.ll b/test/CodeGen/SystemZ/fp-cmp-05.ll
new file mode 100644
index 000000000000..c8eb18c6e6ba
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-cmp-05.ll
@@ -0,0 +1,80 @@
+; Test that floating-point instructions that set cc are used to
+; eliminate compares for load complement, load negative and load
+; positive.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Load complement (sign-bit flipped).
+; Test f32
+define float @f1(float %a, float %b, float %f) {
+; CHECK-LABEL: f1:
+; CHECK: lcebr
+; CHECK-NEXT: je
+ %neg = fsub float -0.0, %f
+ %cond = fcmp oeq float %neg, 0.0
+ %res = select i1 %cond, float %a, float %b
+ ret float %res
+}
+
+; Test f64
+define double @f2(double %a, double %b, double %f) {
+; CHECK-LABEL: f2:
+; CHECK: lcdbr
+; CHECK-NEXT: je
+ %neg = fsub double -0.0, %f
+ %cond = fcmp oeq double %neg, 0.0
+ %res = select i1 %cond, double %a, double %b
+ ret double %res
+}
+
+; Negation of floating-point absolute.
+; Test f32
+declare float @llvm.fabs.f32(float %f)
+define float @f3(float %a, float %b, float %f) {
+; CHECK-LABEL: f3:
+; CHECK: lnebr
+; CHECK-NEXT: je
+ %abs = call float @llvm.fabs.f32(float %f)
+ %neg = fsub float -0.0, %abs
+ %cond = fcmp oeq float %neg, 0.0
+ %res = select i1 %cond, float %a, float %b
+ ret float %res
+}
+
+; Test f64
+declare double @llvm.fabs.f64(double %f)
+define double @f4(double %a, double %b, double %f) {
+; CHECK-LABEL: f4:
+; CHECK: lndbr
+; CHECK-NEXT: je
+ %abs = call double @llvm.fabs.f64(double %f)
+ %neg = fsub double -0.0, %abs
+ %cond = fcmp oeq double %neg, 0.0
+ %res = select i1 %cond, double %a, double %b
+ ret double %res
+}
+
+; Absolute floating-point value.
+; Test f32
+define float @f5(float %a, float %b, float %f) {
+; CHECK-LABEL: f5:
+; CHECK: lpebr
+; CHECK-NEXT: je
+ %abs = call float @llvm.fabs.f32(float %f)
+ %cond = fcmp oeq float %abs, 0.0
+ %res = select i1 %cond, float %a, float %b
+ ret float %res
+}
+
+; Test f64
+define double @f6(double %a, double %b, double %f) {
+; CHECK-LABEL: f6:
+; CHECK: lpdbr
+; CHECK-NEXT: je
+ %abs = call double @llvm.fabs.f64(double %f)
+ %cond = fcmp oeq double %abs, 0.0
+ %res = select i1 %cond, double %a, double %b
+ ret double %res
+}
+
diff --git a/test/CodeGen/SystemZ/fp-const-02.ll b/test/CodeGen/SystemZ/fp-const-02.ll
index 96f857895ecf..942465c06600 100644
--- a/test/CodeGen/SystemZ/fp-const-02.ll
+++ b/test/CodeGen/SystemZ/fp-const-02.ll
@@ -6,7 +6,7 @@
define float @f1() {
; CHECK-LABEL: f1:
; CHECK: lzer [[REGISTER:%f[0-5]+]]
-; CHECK: lcebr %f0, [[REGISTER]]
+; CHECK: lcdfr %f0, [[REGISTER]]
; CHECK: br %r14
ret float -0.0
}
@@ -15,7 +15,7 @@ define float @f1() {
define double @f2() {
; CHECK-LABEL: f2:
; CHECK: lzdr [[REGISTER:%f[0-5]+]]
-; CHECK: lcdbr %f0, [[REGISTER]]
+; CHECK: lcdfr %f0, [[REGISTER]]
; CHECK: br %r14
ret double -0.0
}
diff --git a/test/CodeGen/SystemZ/fp-libcall.ll b/test/CodeGen/SystemZ/fp-libcall.ll
new file mode 100644
index 000000000000..75250b811cba
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-libcall.ll
@@ -0,0 +1,273 @@
+; Test that library calls are emitted for LLVM IR intrinsics
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define float @f1(float %x, i32 %y) {
+; CHECK-LABEL: f1:
+; CHECK: brasl %r14, __powisf2@PLT
+ %tmp = call float @llvm.powi.f32(float %x, i32 %y)
+ ret float %tmp
+}
+
+define double @f2(double %x, i32 %y) {
+; CHECK-LABEL: f2:
+; CHECK: brasl %r14, __powidf2@PLT
+ %tmp = call double @llvm.powi.f64(double %x, i32 %y)
+ ret double %tmp
+}
+
+define fp128 @f3(fp128 %x, i32 %y) {
+; CHECK-LABEL: f3:
+; CHECK: brasl %r14, __powitf2@PLT
+ %tmp = call fp128 @llvm.powi.f128(fp128 %x, i32 %y)
+ ret fp128 %tmp
+}
+
+define float @f4(float %x, float %y) {
+; CHECK-LABEL: f4:
+; CHECK: brasl %r14, powf@PLT
+ %tmp = call float @llvm.pow.f32(float %x, float %y)
+ ret float %tmp
+}
+
+define double @f5(double %x, double %y) {
+; CHECK-LABEL: f5:
+; CHECK: brasl %r14, pow@PLT
+ %tmp = call double @llvm.pow.f64(double %x, double %y)
+ ret double %tmp
+}
+
+define fp128 @f6(fp128 %x, fp128 %y) {
+; CHECK-LABEL: f6:
+; CHECK: brasl %r14, powl@PLT
+ %tmp = call fp128 @llvm.pow.f128(fp128 %x, fp128 %y)
+ ret fp128 %tmp
+}
+
+define float @f7(float %x) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, sinf@PLT
+ %tmp = call float @llvm.sin.f32(float %x)
+ ret float %tmp
+}
+
+define double @f8(double %x) {
+; CHECK-LABEL: f8:
+; CHECK: brasl %r14, sin@PLT
+ %tmp = call double @llvm.sin.f64(double %x)
+ ret double %tmp
+}
+
+define fp128 @f9(fp128 %x) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, sinl@PLT
+ %tmp = call fp128 @llvm.sin.f128(fp128 %x)
+ ret fp128 %tmp
+}
+
+define float @f10(float %x) {
+; CHECK-LABEL: f10:
+; CHECK: brasl %r14, cosf@PLT
+ %tmp = call float @llvm.cos.f32(float %x)
+ ret float %tmp
+}
+
+define double @f11(double %x) {
+; CHECK-LABEL: f11:
+; CHECK: brasl %r14, cos@PLT
+ %tmp = call double @llvm.cos.f64(double %x)
+ ret double %tmp
+}
+
+define fp128 @f12(fp128 %x) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, cosl@PLT
+ %tmp = call fp128 @llvm.cos.f128(fp128 %x)
+ ret fp128 %tmp
+}
+
+define float @f13(float %x) {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, expf@PLT
+ %tmp = call float @llvm.exp.f32(float %x)
+ ret float %tmp
+}
+
+define double @f14(double %x) {
+; CHECK-LABEL: f14:
+; CHECK: brasl %r14, exp@PLT
+ %tmp = call double @llvm.exp.f64(double %x)
+ ret double %tmp
+}
+
+define fp128 @f15(fp128 %x) {
+; CHECK-LABEL: f15:
+; CHECK: brasl %r14, expl@PLT
+ %tmp = call fp128 @llvm.exp.f128(fp128 %x)
+ ret fp128 %tmp
+}
+
+define float @f16(float %x) {
+; CHECK-LABEL: f16:
+; CHECK: brasl %r14, exp2f@PLT
+ %tmp = call float @llvm.exp2.f32(float %x)
+ ret float %tmp
+}
+
+define double @f17(double %x) {
+; CHECK-LABEL: f17:
+; CHECK: brasl %r14, exp2@PLT
+ %tmp = call double @llvm.exp2.f64(double %x)
+ ret double %tmp
+}
+
+define fp128 @f18(fp128 %x) {
+; CHECK-LABEL: f18:
+; CHECK: brasl %r14, exp2l@PLT
+ %tmp = call fp128 @llvm.exp2.f128(fp128 %x)
+ ret fp128 %tmp
+}
+
+define float @f19(float %x) {
+; CHECK-LABEL: f19:
+; CHECK: brasl %r14, logf@PLT
+ %tmp = call float @llvm.log.f32(float %x)
+ ret float %tmp
+}
+
+define double @f20(double %x) {
+; CHECK-LABEL: f20:
+; CHECK: brasl %r14, log@PLT
+ %tmp = call double @llvm.log.f64(double %x)
+ ret double %tmp
+}
+
+define fp128 @f21(fp128 %x) {
+; CHECK-LABEL: f21:
+; CHECK: brasl %r14, logl@PLT
+ %tmp = call fp128 @llvm.log.f128(fp128 %x)
+ ret fp128 %tmp
+}
+
+define float @f22(float %x) {
+; CHECK-LABEL: f22:
+; CHECK: brasl %r14, log2f@PLT
+ %tmp = call float @llvm.log2.f32(float %x)
+ ret float %tmp
+}
+
+define double @f23(double %x) {
+; CHECK-LABEL: f23:
+; CHECK: brasl %r14, log2@PLT
+ %tmp = call double @llvm.log2.f64(double %x)
+ ret double %tmp
+}
+
+define fp128 @f24(fp128 %x) {
+; CHECK-LABEL: f24:
+; CHECK: brasl %r14, log2l@PLT
+ %tmp = call fp128 @llvm.log2.f128(fp128 %x)
+ ret fp128 %tmp
+}
+
+define float @f25(float %x) {
+; CHECK-LABEL: f25:
+; CHECK: brasl %r14, log10f@PLT
+ %tmp = call float @llvm.log10.f32(float %x)
+ ret float %tmp
+}
+
+define double @f26(double %x) {
+; CHECK-LABEL: f26:
+; CHECK: brasl %r14, log10@PLT
+ %tmp = call double @llvm.log10.f64(double %x)
+ ret double %tmp
+}
+
+define fp128 @f27(fp128 %x) {
+; CHECK-LABEL: f27:
+; CHECK: brasl %r14, log10l@PLT
+ %tmp = call fp128 @llvm.log10.f128(fp128 %x)
+ ret fp128 %tmp
+}
+
+define float @f28(float %x, float %y) {
+; CHECK-LABEL: f28:
+; CHECK: brasl %r14, fminf@PLT
+ %tmp = call float @llvm.minnum.f32(float %x, float %y)
+ ret float %tmp
+}
+
+define double @f29(double %x, double %y) {
+; CHECK-LABEL: f29:
+; CHECK: brasl %r14, fmin@PLT
+ %tmp = call double @llvm.minnum.f64(double %x, double %y)
+ ret double %tmp
+}
+
+define fp128 @f30(fp128 %x, fp128 %y) {
+; CHECK-LABEL: f30:
+; CHECK: brasl %r14, fminl@PLT
+ %tmp = call fp128 @llvm.minnum.f128(fp128 %x, fp128 %y)
+ ret fp128 %tmp
+}
+
+define float @f31(float %x, float %y) {
+; CHECK-LABEL: f31:
+; CHECK: brasl %r14, fmaxf@PLT
+ %tmp = call float @llvm.maxnum.f32(float %x, float %y)
+ ret float %tmp
+}
+
+define double @f32(double %x, double %y) {
+; CHECK-LABEL: f32:
+; CHECK: brasl %r14, fmax@PLT
+ %tmp = call double @llvm.maxnum.f64(double %x, double %y)
+ ret double %tmp
+}
+
+define fp128 @f33(fp128 %x, fp128 %y) {
+; CHECK-LABEL: f33:
+; CHECK: brasl %r14, fmaxl@PLT
+ %tmp = call fp128 @llvm.maxnum.f128(fp128 %x, fp128 %y)
+ ret fp128 %tmp
+}
+
+declare float @llvm.powi.f32(float, i32)
+declare double @llvm.powi.f64(double, i32)
+declare fp128 @llvm.powi.f128(fp128, i32)
+declare float @llvm.pow.f32(float, float)
+declare double @llvm.pow.f64(double, double)
+declare fp128 @llvm.pow.f128(fp128, fp128)
+
+declare float @llvm.sin.f32(float)
+declare double @llvm.sin.f64(double)
+declare fp128 @llvm.sin.f128(fp128)
+declare float @llvm.cos.f32(float)
+declare double @llvm.cos.f64(double)
+declare fp128 @llvm.cos.f128(fp128)
+
+declare float @llvm.exp.f32(float)
+declare double @llvm.exp.f64(double)
+declare fp128 @llvm.exp.f128(fp128)
+declare float @llvm.exp2.f32(float)
+declare double @llvm.exp2.f64(double)
+declare fp128 @llvm.exp2.f128(fp128)
+
+declare float @llvm.log.f32(float)
+declare double @llvm.log.f64(double)
+declare fp128 @llvm.log.f128(fp128)
+declare float @llvm.log2.f32(float)
+declare double @llvm.log2.f64(double)
+declare fp128 @llvm.log2.f128(fp128)
+declare float @llvm.log10.f32(float)
+declare double @llvm.log10.f64(double)
+declare fp128 @llvm.log10.f128(fp128)
+
+declare float @llvm.minnum.f32(float, float)
+declare double @llvm.minnum.f64(double, double)
+declare fp128 @llvm.minnum.f128(fp128, fp128)
+declare float @llvm.maxnum.f32(float, float)
+declare double @llvm.maxnum.f64(double, double)
+declare fp128 @llvm.maxnum.f128(fp128, fp128)
+
diff --git a/test/CodeGen/SystemZ/fp-move-05.ll b/test/CodeGen/SystemZ/fp-move-05.ll
index da12af6d68c1..0864deee5137 100644
--- a/test/CodeGen/SystemZ/fp-move-05.ll
+++ b/test/CodeGen/SystemZ/fp-move-05.ll
@@ -1,6 +1,6 @@
; Test 128-bit floating-point loads.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s
; Check loads with no offset.
define double @f1(i64 %src) {
diff --git a/test/CodeGen/SystemZ/fp-neg-01.ll b/test/CodeGen/SystemZ/fp-neg-01.ll
index fe2e5f67cf5b..b9810f9f34d3 100644
--- a/test/CodeGen/SystemZ/fp-neg-01.ll
+++ b/test/CodeGen/SystemZ/fp-neg-01.ll
@@ -6,7 +6,7 @@
; Test f32.
define float @f1(float %f) {
; CHECK-LABEL: f1:
-; CHECK: lcebr %f0, %f0
+; CHECK: lcdfr %f0, %f0
; CHECK: br %r14
%res = fsub float -0.0, %f
ret float %res
@@ -15,7 +15,7 @@ define float @f1(float %f) {
; Test f64.
define double @f2(double %f) {
; CHECK-LABEL: f2:
-; CHECK: lcdbr %f0, %f0
+; CHECK: lcdfr %f0, %f0
; CHECK: br %r14
%res = fsub double -0.0, %f
ret double %res
diff --git a/test/CodeGen/SystemZ/fp-sincos-01.ll b/test/CodeGen/SystemZ/fp-sincos-01.ll
new file mode 100644
index 000000000000..cd182a590eee
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-sincos-01.ll
@@ -0,0 +1,56 @@
+; Test that combined sin/cos library call is emitted when appropriate
+
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s --check-prefix=CHECK-NOOPT
+; RUN: llc < %s -mtriple=s390x-linux-gnu -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-OPT
+
+define float @f1(float %x) {
+; CHECK-OPT-LABEL: f1:
+; CHECK-OPT: brasl %r14, sincosf@PLT
+; CHECK-OPT: le %f0, 164(%r15)
+; CHECK-OPT: aeb %f0, 160(%r15)
+
+; CHECK-NOOPT-LABEL: f1:
+; CHECK-NOOPT: brasl %r14, sinf@PLT
+; CHECK-NOOPT: brasl %r14, cosf@PLT
+ %tmp1 = call float @sinf(float %x)
+ %tmp2 = call float @cosf(float %x)
+ %add = fadd float %tmp1, %tmp2
+ ret float %add
+}
+
+define double @f2(double %x) {
+; CHECK-OPT-LABEL: f2:
+; CHECK-OPT: brasl %r14, sincos@PLT
+; CHECK-OPT: ld %f0, 168(%r15)
+; CHECK-OPT: adb %f0, 160(%r15)
+
+; CHECK-NOOPT-LABEL: f2:
+; CHECK-NOOPT: brasl %r14, sin@PLT
+; CHECK-NOOPT: brasl %r14, cos@PLT
+ %tmp1 = call double @sin(double %x)
+ %tmp2 = call double @cos(double %x)
+ %add = fadd double %tmp1, %tmp2
+ ret double %add
+}
+
+define fp128 @f3(fp128 %x) {
+; CHECK-OPT-LABEL: f3:
+; CHECK-OPT: brasl %r14, sincosl@PLT
+; CHECK-OPT: axbr
+
+; CHECK-NOOPT-LABEL: f3:
+; CHECK-NOOPT: brasl %r14, sinl@PLT
+; CHECK-NOOPT: brasl %r14, cosl@PLT
+ %tmp1 = call fp128 @sinl(fp128 %x)
+ %tmp2 = call fp128 @cosl(fp128 %x)
+ %add = fadd fp128 %tmp1, %tmp2
+ ret fp128 %add
+}
+
+declare float @sinf(float) readonly
+declare double @sin(double) readonly
+declare fp128 @sinl(fp128) readonly
+declare float @cosf(float) readonly
+declare double @cos(double) readonly
+declare fp128 @cosl(fp128) readonly
+
diff --git a/test/CodeGen/SystemZ/insert-05.ll b/test/CodeGen/SystemZ/insert-05.ll
index b76859a568f3..1ea8a64e28e3 100644
--- a/test/CodeGen/SystemZ/insert-05.ll
+++ b/test/CodeGen/SystemZ/insert-05.ll
@@ -214,8 +214,8 @@ define i64 @f18(i32 %a) {
; The truncation here isn't free; we need an explicit zero extension.
define i64 @f19(i32 %a) {
; CHECK-LABEL: f19:
-; CHECK: llgcr %r2, %r2
-; CHECK: oihl %r2, 1
+; CHECK: llcr %r2, %r2
+; CHECK: iihf %r2, 1
; CHECK: br %r14
%trunc = trunc i32 %a to i8
%ext = zext i8 %trunc to i64
diff --git a/test/CodeGen/SystemZ/int-cmp-44.ll b/test/CodeGen/SystemZ/int-cmp-44.ll
index 97d48521254d..a87dccd4ac2a 100644
--- a/test/CodeGen/SystemZ/int-cmp-44.ll
+++ b/test/CodeGen/SystemZ/int-cmp-44.ll
@@ -1,7 +1,8 @@
; Test that compares are omitted if CC already has the right value
; (z10 version).
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -no-integrated-as | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -no-integrated-as \
+; RUN: -verify-machineinstrs| FileCheck %s
declare void @foo()
diff --git a/test/CodeGen/SystemZ/int-cmp-51.ll b/test/CodeGen/SystemZ/int-cmp-51.ll
new file mode 100644
index 000000000000..85a0e4b4d3a7
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-51.ll
@@ -0,0 +1,34 @@
+; Check that modelling of CC/CCRegs does not stop MachineCSE from
+; removing a compare. MachineCSE will not extend a live range of an
+; allocatable or reserved phys reg.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @bar(i8)
+
+; Check the low end of the CH range.
+define void @f1(i32 %lhs) {
+; CHECK-LABEL: BB#1:
+; CHECK-NOT: cijlh %r0, 1, .LBB0_3
+
+entry:
+ %and188 = and i32 %lhs, 255
+ %cmp189 = icmp ult i32 %and188, 2
+ br i1 %cmp189, label %if.then.191, label %if.else.201
+
+if.then.191:
+ %cmp194 = icmp eq i32 %and188, 1
+ br i1 %cmp194, label %if.then.196, label %if.else.198
+
+if.then.196:
+ call void @bar(i8 1);
+ br label %if.else.201
+
+if.else.198:
+ call void @bar(i8 0);
+ br label %if.else.201
+
+if.else.201:
+ ret void
+}
+
diff --git a/test/CodeGen/SystemZ/int-cmp-52.ll b/test/CodeGen/SystemZ/int-cmp-52.ll
new file mode 100644
index 000000000000..a0b72371d1c5
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-52.ll
@@ -0,0 +1,24 @@
+; This used to crash the backend due to a failed assertion.
+; No particular output expected, but must compile.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu
+
+define void @test(i16 *%input, i32 *%result) {
+entry:
+ %0 = load i16, i16* %input, align 2
+ %1 = zext i16 %0 to i32
+ %2 = icmp slt i32 %1, 0
+ br i1 %2, label %if.then, label %if.else
+
+if.then:
+ store i32 1, i32* %result, align 4
+ br label %return
+
+if.else:
+ store i32 0, i32* %result, align 4
+ br label %return
+
+return:
+ ret void
+}
+
diff --git a/test/CodeGen/SystemZ/memchr-01.ll b/test/CodeGen/SystemZ/memchr-01.ll
index c51690b9848d..f7509c4f256b 100644
--- a/test/CodeGen/SystemZ/memchr-01.ll
+++ b/test/CodeGen/SystemZ/memchr-01.ll
@@ -1,6 +1,6 @@
; Test memchr using SRST, with a weird but usable prototype.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s
declare i8 *@memchr(i8 *%src, i16 %char, i32 %len)
diff --git a/test/CodeGen/SystemZ/spill-01.ll b/test/CodeGen/SystemZ/spill-01.ll
index a59c06f192b6..9be4420fd839 100644
--- a/test/CodeGen/SystemZ/spill-01.ll
+++ b/test/CodeGen/SystemZ/spill-01.ll
@@ -1,7 +1,7 @@
; Test spilling using MVC. The tests here assume z10 register pressure,
; without the high words being available.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -verify-machineinstrs | FileCheck %s
declare void @foo()
diff --git a/test/CodeGen/SystemZ/vec-args-04.ll b/test/CodeGen/SystemZ/vec-args-04.ll
index 3a25404934e2..5176d80f08fb 100644
--- a/test/CodeGen/SystemZ/vec-args-04.ll
+++ b/test/CodeGen/SystemZ/vec-args-04.ll
@@ -21,17 +21,25 @@ define void @foo() {
; CHECK-VEC-DAG: vrepib %v31, 8
; CHECK-VEC: brasl %r14, bar@PLT
;
+
+; CHECK-STACK: .LCPI0_0:
+; CHECK-STACK: .quad 795741901033570304 # 0xb0b0b0b00000000
+; CHECK-STACK: .quad 868082074056920076 # 0xc0c0c0c0c0c0c0c
+; CHECK-STACK: .LCPI0_1:
+; CHECK-STACK: .quad 648518346341351424 # 0x900000000000000
+; CHECK-STACK: .quad 723390690146385920 # 0xa0a000000000000
+
; CHECK-STACK-LABEL: foo:
; CHECK-STACK: aghi %r15, -192
-; CHECK-STACK-DAG: llihh [[REG1:%r[0-9]+]], 2304
-; CHECK-STACK-DAG: stg [[REG1]], 160(%r15)
-; CHECK-STACK-DAG: llihh [[REG2:%r[0-9]+]], 2570
-; CHECK-STACK-DAG: stg [[REG2]], 168(%r15)
-; CHECK-STACK-DAG: llihf [[REG3:%r[0-9]+]], 185273099
-; CHECK-STACK-DAG: stg [[REG3]], 176(%r15)
-; CHECK-STACK-DAG: llihf [[REG4:%r[0-9]+]], 202116108
-; CHECK-STACK-DAG: oilf [[REG4]], 202116108
-; CHECK-STACK-DAG: stg [[REG4]], 176(%r15)
+
+; CHECK-STACK-DAG: larl [[REG1:%r[0-9]+]], .LCPI0_0
+; CHECK-STACK-DAG: vl [[VREG0:%v[0-9]+]], 0([[REG1]])
+; CHECK-STACK-DAG: vst [[VREG0]], 176(%r15)
+
+; CHECK-STACK-DAG: larl [[REG2:%r[0-9]+]], .LCPI0_1
+; CHECK-STACK-DAG: vl [[VREG1:%v[0-9]+]], 0([[REG2]])
+; CHECK-STACK-DAG: vst [[VREG1]], 160(%r15)
+
; CHECK-STACK: brasl %r14, bar@PLT
call void @bar (<1 x i8> <i8 1>,
diff --git a/test/CodeGen/SystemZ/vec-args-05.ll b/test/CodeGen/SystemZ/vec-args-05.ll
index cd1448b8611e..8c5ff8414292 100644
--- a/test/CodeGen/SystemZ/vec-args-05.ll
+++ b/test/CodeGen/SystemZ/vec-args-05.ll
@@ -14,12 +14,14 @@ define void @foo() {
; CHECK-VEC-DAG: vrepib %v26, 2
; CHECK-VEC: brasl %r14, bar@PLT
;
+; CHECK-STACK: .LCPI0_0:
+; CHECK-STACK: .quad 217020518463700992 # 0x303030300000000
+; CHECK-STACK: .quad 289360691284934656 # 0x404040400000000
; CHECK-STACK-LABEL: foo:
; CHECK-STACK: aghi %r15, -176
-; CHECK-STACK-DAG: llihf [[REG1:%r[0-9]+]], 50529027
-; CHECK-STACK-DAG: stg [[REG1]], 160(%r15)
-; CHECK-STACK-DAG: llihf [[REG2:%r[0-9]+]], 67372036
-; CHECK-STACK-DAG: stg [[REG2]], 168(%r15)
+; CHECK-STACK-DAG: larl [[REG1:%r[0-9]+]], .LCPI0_0
+; CHECK-STACK-DAG: vl [[VREG:%v[0-9]+]], 0([[REG1]])
+; CHECK-STACK-DAG: vst [[VREG]], 160(%r15)
; CHECK-STACK: brasl %r14, bar@PLT
call void (<4 x i8>, <4 x i8>, ...) @bar
diff --git a/test/CodeGen/SystemZ/vec-perm-12.ll b/test/CodeGen/SystemZ/vec-perm-12.ll
new file mode 100644
index 000000000000..b70b13d90682
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-12.ll
@@ -0,0 +1,43 @@
+; Test inserting a truncated value into a vector element
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN: FileCheck -check-prefix=CHECK-CODE %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
+
+define <4 x i32> @f1(<4 x i32> %x, i64 %y) {
+; CHECK-CODE-LABEL: f1:
+; CHECK-CODE: vlvgf [[ELT:%v[0-9]+]], %r2, 0
+; CHECK-CODE: larl [[REG:%r[0-5]]],
+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
+; CHECK-CODE: vperm %v24, %v24, [[ELT]], [[MASK]]
+; CHECK-CODE: br %r14
+
+; CHECK-VECTOR: .byte 12
+; CHECK-VECTOR-NEXT: .byte 13
+; CHECK-VECTOR-NEXT: .byte 14
+; CHECK-VECTOR-NEXT: .byte 15
+; CHECK-VECTOR-NEXT: .byte 8
+; CHECK-VECTOR-NEXT: .byte 9
+; CHECK-VECTOR-NEXT: .byte 10
+; CHECK-VECTOR-NEXT: .byte 11
+; CHECK-VECTOR-NEXT: .byte 4
+; CHECK-VECTOR-NEXT: .byte 5
+; CHECK-VECTOR-NEXT: .byte 6
+; CHECK-VECTOR-NEXT: .byte 7
+; CHECK-VECTOR-NEXT: .byte 16
+; CHECK-VECTOR-NEXT: .byte 17
+; CHECK-VECTOR-NEXT: .byte 18
+; CHECK-VECTOR-NEXT: .byte 19
+
+ %elt0 = extractelement <4 x i32> %x, i32 3
+ %elt1 = extractelement <4 x i32> %x, i32 2
+ %elt2 = extractelement <4 x i32> %x, i32 1
+ %elt3 = trunc i64 %y to i32
+ %vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0
+ %vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1
+ %vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2
+ %vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3
+ ret <4 x i32> %vec3
+}
+
diff --git a/test/CodeGen/SystemZ/vec-perm-13.ll b/test/CodeGen/SystemZ/vec-perm-13.ll
new file mode 100644
index 000000000000..708d8de53f86
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-13.ll
@@ -0,0 +1,38 @@
+; Test vector shuffles on vectors with implicitly extended elements
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN: FileCheck -check-prefix=CHECK-CODE %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
+
+define <4 x i16> @f1(<4 x i16> %x) {
+; CHECK-CODE-LABEL: f1:
+; CHECK-CODE: larl [[REG:%r[0-5]]],
+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
+; CHECK-CODE: vgbm [[ELT:%v[0-9]+]], 0
+; CHECK-CODE: vperm %v24, %v24, [[ELT]], [[MASK]]
+; CHECK-CODE: br %r14
+
+; CHECK-VECTOR: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .byte 6
+; CHECK-VECTOR-NEXT: .byte 7
+; CHECK-VECTOR-NEXT: .byte 16
+; CHECK-VECTOR-NEXT: .byte 17
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+
+ %elt = extractelement <4 x i16> %x, i32 3
+ %vec1 = insertelement <4 x i16> undef, i16 %elt, i32 2
+ %vec2 = insertelement <4 x i16> %vec1, i16 0, i32 3
+ ret <4 x i16> %vec2
+}
+
diff --git a/test/CodeGen/SystemZ/xor-01.ll b/test/CodeGen/SystemZ/xor-01.ll
index e0aaffbb257e..281f386ce955 100644
--- a/test/CodeGen/SystemZ/xor-01.ll
+++ b/test/CodeGen/SystemZ/xor-01.ll
@@ -1,6 +1,6 @@
; Test 32-bit XORs in which the second operand is variable.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
declare i32 @foo()
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index 0fd1a9e1e232..8ec4d5b9865b 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -51,9 +51,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!0 = !DILocation(line: 46, scope: !1)
!1 = distinct !DILexicalBlock(line: 44, column: 0, file: !101, scope: !2)
!2 = distinct !DILexicalBlock(line: 44, column: 0, file: !101, scope: !3)
-!3 = !DISubprogram(name: "getClosestDiagonal3", linkageName: "_Z19getClosestDiagonal3ii", line: 44, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !101, scope: null, type: !6)
+!3 = distinct !DISubprogram(name: "getClosestDiagonal3", linkageName: "_Z19getClosestDiagonal3ii", line: 44, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !101, scope: null, type: !6)
!4 = !DIFile(filename: "ggEdgeDiscrepancy.cc", directory: "/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src")
-!5 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", isOptimized: true, emissionKind: 0, file: !101, enums: !102, retainedTypes: !102, subprograms: !103)
+!5 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", isOptimized: true, emissionKind: 0, file: !101, enums: !102, retainedTypes: !102, subprograms: !103)
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !22, !22}
!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "ggVector3", line: 66, size: 192, align: 32, file: !99, elements: !10)
@@ -87,12 +87,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!34 = !DIDerivedType(tag: DW_TAG_const_type, size: 192, align: 32, file: !101, scope: !4, baseType: !8)
!35 = !DISubprogram(name: "y", linkageName: "_ZNK9ggVector31yEv", line: 83, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
!36 = !DISubprogram(name: "z", linkageName: "_ZNK9ggVector31zEv", line: 84, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
-!37 = !DISubprogram(name: "x", linkageName: "_ZN9ggVector31xEv", line: 85, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
+!37 = distinct !DISubprogram(name: "x", linkageName: "_ZN9ggVector31xEv", line: 85, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
!38 = !DISubroutineType(types: !39)
!39 = !{!40, !19}
!40 = !DIDerivedType(tag: DW_TAG_reference_type, name: "double", size: 32, align: 32, file: !101, scope: !4, baseType: !13)
-!41 = !DISubprogram(name: "y", linkageName: "_ZN9ggVector31yEv", line: 86, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
-!42 = !DISubprogram(name: "z", linkageName: "_ZN9ggVector31zEv", line: 87, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
+!41 = distinct !DISubprogram(name: "y", linkageName: "_ZN9ggVector31yEv", line: 86, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
+!42 = distinct !DISubprogram(name: "z", linkageName: "_ZN9ggVector31zEv", line: 87, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
!43 = !DISubprogram(name: "SetX", linkageName: "_ZN9ggVector34SetXEd", line: 88, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !44)
!44 = !DISubroutineType(types: !45)
!45 = !{null, !19, !13}
@@ -127,7 +127,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!74 = !DISubprogram(name: "operator/=", linkageName: "_ZN9ggVector3dVEd", line: 324, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !72)
!75 = !DISubprogram(name: "length", linkageName: "_ZNK9ggVector36lengthEv", line: 121, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
!76 = !DISubprogram(name: "squaredLength", linkageName: "_ZNK9ggVector313squaredLengthEv", line: 122, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
-!77 = !DISubprogram(name: "MakeUnitVector", linkageName: "_ZN9ggVector314MakeUnitVectorEv", line: 217, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !24)
+!77 = distinct !DISubprogram(name: "MakeUnitVector", linkageName: "_ZN9ggVector314MakeUnitVectorEv", line: 217, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !24)
!78 = !DISubprogram(name: "Perturb", linkageName: "_ZNK9ggVector37PerturbEdd", line: 126, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !79)
!79 = !DISubroutineType(types: !80)
!80 = !{!8, !33, !13, !13}
@@ -141,7 +141,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!88 = !DISubprogram(name: "indexOfMinAbsComponent", linkageName: "_ZNK9ggVector322indexOfMinAbsComponentEv", line: 137, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !86)
!89 = !DISubprogram(name: "indexOfMaxComponent", linkageName: "_ZNK9ggVector319indexOfMaxComponentEv", line: 146, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !86)
!90 = !DISubprogram(name: "indexOfMaxAbsComponent", linkageName: "_ZNK9ggVector322indexOfMaxAbsComponentEv", line: 150, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !86)
-!91 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vx", line: 46, scope: !1, file: !4, type: !13)
+!91 = !DILocalVariable(name: "vx", line: 46, scope: !1, file: !4, type: !13)
!92 = !DILocation(line: 48, scope: !1)
!93 = !DILocation(line: 218, scope: !94, inlinedAt: !96)
!94 = distinct !DILexicalBlock(line: 217, column: 0, file: !101, scope: !95)
diff --git a/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll b/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll
index cba1ca68569f..1ba7cb795d11 100644
--- a/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll
+++ b/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumbv6m-apple-unknown-macho < %s | FileCheck --check-prefix=V6M %s
+; RUN: llc -mtriple=thumbv6m-apple-unknown-macho -mattr=+strict-align < %s | FileCheck --check-prefix=V6M %s
; RUN: llc -mtriple=thumbv7m-apple-unknown-macho < %s | FileCheck --check-prefix=V7M %s
define i32 @split_load(i32* %p) nounwind {
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index 0d534589ae0a..c5d1044e9d69 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -32,10 +32,10 @@ define void @test100() {
; Smallest stack for which we use a constant pool
define void @test2() {
; CHECK-LABEL: test2:
-; CHECK: ldr r0,
-; CHECK: add sp, r0
-; EABI: ldr r0,
-; EABI: add sp, r0
+; CHECK: ldr [[TEMP:r[0-7]]],
+; CHECK: add sp, [[TEMP]]
+; EABI: ldr [[TEMP:r[0-7]]],
+; EABI: add sp, [[TEMP]]
; IOS: subs r4, r7, #4
; IOS: mov sp, r4
%tmp = alloca [ 1528 x i8 ] , align 4
@@ -44,12 +44,12 @@ define void @test2() {
define i32 @test3() {
; CHECK-LABEL: test3:
-; CHECK: ldr r1,
-; CHECK: add sp, r1
-; CHECK: ldr r1,
-; CHECK: add r1, sp
-; EABI: ldr r1,
-; EABI: add sp, r1
+; CHECK: ldr [[TEMP:r[0-7]]],
+; CHECK: add sp, [[TEMP]]
+; CHECK: ldr [[TEMP]],
+; CHECK: add [[TEMP]], sp
+; EABI: ldr [[TEMP:r[0-7]]],
+; EABI: add sp, [[TEMP]]
; IOS: subs r4, r7, #4
; IOS: mov sp, r4
%retval = alloca i32, align 4
diff --git a/test/CodeGen/Thumb/ldm-stm-base-materialization-thumb2.ll b/test/CodeGen/Thumb/ldm-stm-base-materialization-thumb2.ll
new file mode 100644
index 000000000000..7901a158a959
--- /dev/null
+++ b/test/CodeGen/Thumb/ldm-stm-base-materialization-thumb2.ll
@@ -0,0 +1,93 @@
+; RUN: llc -mattr=-neon < %s -verify-machineinstrs -o - | FileCheck %s
+
+target triple = "thumbv7a-none--eabi"
+
+@a = external global i32*
+@b = external global i32*
+
+; Function Attrs: nounwind
+define void @foo24() #0 {
+entry:
+; CHECK-LABEL: foo24:
+; We use '[rl0-9]*' to allow 'r0'..'r12', 'lr'
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add{{s?}}{{(\.w)?}} [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: adds [[SB]], #4
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]], [[R5:[rl0-9]+]], [[R6:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]], {[[R1]], [[R2]], [[R3]], [[R4]], [[R5]], [[R6]]}
+ %0 = load i32*, i32** @a, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+ %1 = bitcast i32* %arrayidx to i8*
+ %2 = load i32*, i32** @b, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+ %3 = bitcast i32* %arrayidx1 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false)
+ ret void
+}
+
+define void @foo28() #0 {
+entry:
+; CHECK-LABEL: foo28:
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add{{(\.w)?}} [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: adds [[SB]], #4
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!, {[[R1]], [[R2]], [[R3]]}
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]], {[[R1]], [[R2]], [[R3]], [[R4]]}
+ %0 = load i32*, i32** @a, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+ %1 = bitcast i32* %arrayidx to i8*
+ %2 = load i32*, i32** @b, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+ %3 = bitcast i32* %arrayidx1 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false)
+ ret void
+}
+
+define void @foo32() #0 {
+entry:
+; CHECK-LABEL: foo32:
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add{{(\.w)?}} [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: adds [[SB]], #4
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!, {[[R1]], [[R2]], [[R3]], [[R4]]}
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]], {[[R1]], [[R2]], [[R3]], [[R4]]}
+ %0 = load i32*, i32** @a, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+ %1 = bitcast i32* %arrayidx to i8*
+ %2 = load i32*, i32** @b, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+ %3 = bitcast i32* %arrayidx1 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 32, i32 4, i1 false)
+ ret void
+}
+
+define void @foo36() #0 {
+entry:
+; CHECK-LABEL: foo36:
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add{{(\.w)?}} [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: adds [[SB]], #4
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!, {[[R1]], [[R2]], [[R3]], [[R4]]}
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]], [[R5:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]], {[[R1]], [[R2]], [[R3]], [[R4]], [[R5]]}
+ %0 = load i32*, i32** @a, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+ %1 = bitcast i32* %arrayidx to i8*
+ %2 = load i32*, i32** @b, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+ %3 = bitcast i32* %arrayidx1 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 36, i32 4, i1 false)
+ ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
diff --git a/test/CodeGen/Thumb/ldm-stm-base-materialization.ll b/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
index 916e5ea299a3..0be796eb8f8d 100644
--- a/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
+++ b/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
@@ -6,15 +6,17 @@ target triple = "thumbv6m-none--eabi"
@b = external global i32*
; Function Attrs: nounwind
-define void @foo() #0 {
+define void @foo24() #0 {
entry:
-; CHECK-LABEL: foo:
-; CHECK: ldr r[[SB:[0-9]]], .LCPI
+; CHECK-LABEL: foo24:
; CHECK: ldr r[[LB:[0-9]]], .LCPI
; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
-; CHECK-NEXT: ldm r[[NLB]],
+; CHECK: ldr r[[SB:[0-9]]], .LCPI
; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
-; CHECK-NEXT: stm r[[NSB]]
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
%0 = load i32*, i32** @a, align 4
%arrayidx = getelementptr inbounds i32, i32* %0, i32 1
%1 = bitcast i32* %arrayidx to i8*
@@ -25,5 +27,70 @@ entry:
ret void
}
+define void @foo28() #0 {
+entry:
+; CHECK-LABEL: foo28:
+; CHECK: ldr r[[LB:[0-9]]], .LCPI
+; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
+; CHECK: ldr r[[SB:[0-9]]], .LCPI
+; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]], r[[R4:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]], r[[R4]]}
+ %0 = load i32*, i32** @a, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+ %1 = bitcast i32* %arrayidx to i8*
+ %2 = load i32*, i32** @b, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+ %3 = bitcast i32* %arrayidx1 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false)
+ ret void
+}
+
+define void @foo32() #0 {
+entry:
+; CHECK-LABEL: foo32:
+; CHECK: ldr r[[LB:[0-9]]], .LCPI
+; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
+; CHECK: ldr r[[SB:[0-9]]], .LCPI
+; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]], r[[R4:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]], r[[R4]]}
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]], r[[R4:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]], r[[R4]]}
+ %0 = load i32*, i32** @a, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+ %1 = bitcast i32* %arrayidx to i8*
+ %2 = load i32*, i32** @b, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+ %3 = bitcast i32* %arrayidx1 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 32, i32 4, i1 false)
+ ret void
+}
+
+define void @foo36() #0 {
+entry:
+; CHECK-LABEL: foo36:
+; CHECK: ldr r[[LB:[0-9]]], .LCPI
+; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
+; CHECK: ldr r[[SB:[0-9]]], .LCPI
+; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
+ %0 = load i32*, i32** @a, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+ %1 = bitcast i32* %arrayidx to i8*
+ %2 = load i32*, i32** @b, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+ %3 = bitcast i32* %arrayidx1 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 36, i32 4, i1 false)
+ ret void
+}
+
; Function Attrs: nounwind
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
diff --git a/test/CodeGen/Thumb/pop.ll b/test/CodeGen/Thumb/pop.ll
index 3c539c690170..851f793e5ee0 100644
--- a/test/CodeGen/Thumb/pop.ll
+++ b/test/CodeGen/Thumb/pop.ll
@@ -3,9 +3,9 @@
define void @t(i8* %a, ...) nounwind {
; CHECK-LABEL: t:
-; CHECK: pop {r3}
+; CHECK: pop {[[POP_REG:r[0-3]]]}
; CHECK-NEXT: add sp, #12
-; CHECK-NEXT: bx r3
+; CHECK-NEXT: bx [[POP_REG]]
entry:
%a.addr = alloca i8, i32 4
call void @llvm.va_start(i8* %a.addr)
diff --git a/test/CodeGen/Thumb/segmented-stacks.ll b/test/CodeGen/Thumb/segmented-stacks.ll
index 09f5db852bf4..251c29534727 100644
--- a/test/CodeGen/Thumb/segmented-stacks.ll
+++ b/test/CodeGen/Thumb/segmented-stacks.ll
@@ -12,7 +12,7 @@ define void @test_basic() #0 {
call void @dummy_use (i32* %mem, i32 10)
ret void
-; Thumb-android: test_basic:
+; Thumb-android-LABEL: test_basic:
; Thumb-android: push {r4, r5}
; Thumb-android-NEXT: mov r5, sp
@@ -32,7 +32,11 @@ define void @test_basic() #0 {
; Thumb-android: pop {r4, r5}
-; Thumb-linux: test_basic:
+; Thumb-android: .align 2
+; Thumb-android: .LCPI0_0:
+; Thumb-android-NEXT: .long __STACK_LIMIT
+
+; Thumb-linux-LABEL: test_basic:
; Thumb-linux: push {r4, r5}
; Thumb-linux-NEXT: mov r5, sp
@@ -61,7 +65,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
call void @dummy_use (i32* %mem, i32 10)
ret i32 %result
-; Thumb-android: test_nested:
+; Thumb-android-LABEL: test_nested:
; Thumb-android: push {r4, r5}
; Thumb-android-NEXT: mov r5, sp
@@ -81,7 +85,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
; Thumb-android: pop {r4, r5}
-; Thumb-linux: test_nested:
+; Thumb-linux-LABEL: test_nested:
; Thumb-linux: push {r4, r5}
; Thumb-linux-NEXT: mov r5, sp
@@ -108,7 +112,7 @@ define void @test_large() #0 {
call void @dummy_use (i32* %mem, i32 0)
ret void
-; Thumb-android: test_large:
+; Thumb-android-LABEL: test_large:
; Thumb-android: push {r4, r5}
; Thumb-android-NEXT: mov r5, sp
@@ -129,7 +133,7 @@ define void @test_large() #0 {
; Thumb-android: pop {r4, r5}
-; Thumb-linux: test_large:
+; Thumb-linux-LABEL: test_large:
; Thumb-linux: push {r4, r5}
; Thumb-linux-NEXT: mov r5, sp
@@ -157,7 +161,7 @@ define fastcc void @test_fastcc() #0 {
call void @dummy_use (i32* %mem, i32 10)
ret void
-; Thumb-android: test_fastcc:
+; Thumb-android-LABEL: test_fastcc:
; Thumb-android: push {r4, r5}
; Thumb-android-NEXT: mov r5, sp
@@ -177,7 +181,7 @@ define fastcc void @test_fastcc() #0 {
; Thumb-android: pop {r4, r5}
-; Thumb-linux: test_fastcc:
+; Thumb-linux-LABEL: test_fastcc:
; Thumb-linux: push {r4, r5}
; Thumb-linux-NEXT: mov r5, sp
@@ -204,7 +208,7 @@ define fastcc void @test_fastcc_large() #0 {
call void @dummy_use (i32* %mem, i32 0)
ret void
-; Thumb-android: test_fastcc_large:
+; Thumb-android-LABEL: test_fastcc_large:
; Thumb-android: push {r4, r5}
; Thumb-android-NEXT: mov r5, sp
@@ -225,7 +229,7 @@ define fastcc void @test_fastcc_large() #0 {
; Thumb-android: pop {r4, r5}
-; Thumb-linux: test_fastcc_large:
+; Thumb-linux-LABEL: test_fastcc_large:
; Thumb-linux: push {r4, r5}
; Thumb-linux-NEXT: mov r5, sp
diff --git a/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll b/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
deleted file mode 100644
index da2f3f09b281..000000000000
--- a/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s
-@d = external global [64 x i32]
-@s = external global [64 x i32]
-
-; Function Attrs: nounwind
-define void @t1() #0 {
-entry:
-; CHECK-LABEL: t1:
-; CHECK: ldr r[[LB:[0-9]]],
-; CHECK-NEXT: ldm r[[LB]]!,
-; CHECK-NEXT: ldr r[[SB:[0-9]]],
-; CHECK-NEXT: stm r[[SB]]!,
-; CHECK-NEXT: ldrb {{.*}}, [r[[LB]]]
-; CHECK-NEXT: strb {{.*}}, [r[[SB]]]
- tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false)
- ret void
-}
-
-; Function Attrs: nounwind
-define void @t2() #0 {
-entry:
-; CHECK-LABEL: t2:
-; CHECK: ldr r[[LB:[0-9]]],
-; CHECK-NEXT: ldm r[[LB]]!,
-; CHECK-NEXT: ldr r[[SB:[0-9]]],
-; CHECK-NEXT: stm r[[SB]]!,
-; CHECK-NEXT: ldrh {{.*}}, [r[[LB]]]
-; CHECK-NEXT: ldrb {{.*}}, [r[[LB]], #2]
-; CHECK-NEXT: strb {{.*}}, [r[[SB]], #2]
-; CHECK-NEXT: strh {{.*}}, [r[[SB]]]
- tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false)
- ret void
-}
-
-; Function Attrs: nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
diff --git a/test/CodeGen/Thumb/thumb-shrink-wrapping.ll b/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
new file mode 100644
index 000000000000..fb4ee8dba7a9
--- /dev/null
+++ b/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
@@ -0,0 +1,691 @@
+; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V4T
+; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V5T
+; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V4T
+; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V5T
+;
+; Note: Lots of tests use inline asm instead of regular calls.
+; This allows to have a better control on what the allocation will do.
+; Otherwise, we may have spill right in the entry block, defeating
+; shrink-wrapping. Moreover, some of the inline asm statements (nop)
+; are here to ensure that the related paths do not end up as critical
+; edges.
+; Also disable the late if-converter as it makes harder to reason on
+; the diffs.
+
+; Initial motivating example: Simple diamond with a call just on one side.
+; CHECK-LABEL: foo:
+;
+; Compare the arguments and jump to exit.
+; No prologue needed.
+; ENABLE: cmp r0, r1
+; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: push {r7, lr}
+; CHECK: sub sp, #8
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; DISABLE: cmp r0, r1
+; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Store %a in the alloca.
+; CHECK: str r0, [sp, #4]
+; Set the alloca address in the second argument.
+; Set the first argument to zero.
+; CHECK: movs r0, #0
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl
+;
+; With shrink-wrapping, epilogue is just after the call.
+; ENABLE-NEXT: add sp, #8
+; ENABLE-V5T-NEXT: pop {r7, pc}
+; ENABLE-V4T-NEXT: pop {r7}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: mov lr, r1
+;
+; CHECK: [[EXIT_LABEL]]:
+;
+; Without shrink-wrapping, epilogue is in the exit block.
+; Epilogue code. (What we pop does not matter.)
+; DISABLE: add sp, #8
+; DISABLE-V5T-NEXT: pop {r7, pc}
+; DISABLE-V4T-NEXT: pop {r7}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+;
+; ENABLE-NEXT: bx lr
+define i32 @foo(i32 %a, i32 %b) {
+ %tmp = alloca i32, align 4
+ %tmp2 = icmp slt i32 %a, %b
+ br i1 %tmp2, label %true, label %false
+
+true:
+ store i32 %a, i32* %tmp, align 4
+ %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+ br label %false
+
+false:
+ %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+ ret i32 %tmp.0
+}
+
+
+; Same, but the final BB is non-trivial, so we don't duplicate the return inst.
+; CHECK-LABEL: bar:
+;
+; With shrink-wrapping, epilogue is just after the call.
+; CHECK: bl
+; ENABLE-NEXT: add sp, #8
+; ENABLE-NEXT: pop {r7}
+; ENABLE-NEXT: pop {r0}
+; ENABLE-NEXT: mov lr, r0
+;
+; CHECK: movs r0, #42
+;
+; Without shrink-wrapping, epilogue is in the exit block.
+; Epilogue code. (What we pop does not matter.)
+; DISABLE: add sp, #8
+; DISABLE-V5T-NEXT: pop {r7, pc}
+; DISABLE-V4T-NEXT: pop {r7}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+;
+; ENABLE-NEXT: bx lr
+define i32 @bar(i32 %a, i32 %b) {
+ %tmp = alloca i32, align 4
+ %tmp2 = icmp slt i32 %a, %b
+ br i1 %tmp2, label %true, label %false
+
+true:
+ store i32 %a, i32* %tmp, align 4
+ %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+ br label %false
+
+false:
+ ret i32 42
+}
+
+; Function Attrs: optsize
+declare i32 @doSomething(i32, i32*)
+
+
+; Check that we do not perform the restore inside the loop whereas the save
+; is outside.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
+;
+; Shrink-wrapping allows to skip the prologue in the else case.
+; ENABLE: cmp r0, #0
+; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, lr}
+;
+; DISABLE: cmp r0, #0
+; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; SUM is in r0 because it is coalesced with the second
+; argument on the else path.
+; CHECK: movs [[SUM:r0]], #0
+; CHECK-NEXT: movs [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: movs [[TMP:r[0-9]+]], #1
+; CHECK: adds [[SUM]], [[TMP]], [[SUM]]
+; CHECK-NEXT: subs [[IV]], [[IV]], #1
+; CHECK-NEXT: cmp [[IV]], #0
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; SUM << 3.
+; CHECK: lsls [[SUM]], [[SUM]], #3
+;
+; Duplicated epilogue.
+; DISABLE-V5T: pop {r4, pc}
+; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]]
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsls r0, r1, #1
+; DISABLE-V5T-NEXT: pop {r4, pc}
+; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
+; DISABLE-V4T-NEXT: pop {r4}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+;
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
+; ENABLE-NEXT: bx lr
+define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+ %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+declare i32 @something(...)
+
+; Check that we do not perform the shrink-wrapping inside the loop even
+; though that would be legal. The cost model must prevent that.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4
+; This is the nop.
+; CHECK: mov r8, r8
+; CHECK: movs [[SUM:r0]], #0
+; CHECK-NEXT: movs [[IV:r[0-9]+]], #10
+; Next BB.
+; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body
+; CHECK: movs [[TMP:r[0-9]+]], #1
+; CHECK: adds [[SUM]], [[TMP]], [[SUM]]
+; CHECK-NEXT: subs [[IV]], [[IV]], #1
+; CHECK-NEXT: cmp [[IV]], #0
+; CHECK-NEXT: bne [[LOOP_LABEL]]
+; Next BB.
+; CHECK: @ %for.exit
+; This is the nop.
+; CHECK: mov r8, r8
+; CHECK: pop {r4
+define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
+entry:
+ br label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
+ %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
+ %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"()
+ %add = add nsw i32 %call, %sum.03
+ %inc = add nuw nsw i32 %i.04, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+ tail call void asm "nop", ""()
+ br label %for.end
+
+for.end: ; preds = %for.body
+ ret i32 %add
+}
+
+; Check with a more complex case that we do not have save within the loop and
+; restore outside.
+; CHECK-LABEL: loopInfoSaveOutsideLoop:
+;
+; ENABLE: cmp r0, #0
+; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, lr}
+;
+; DISABLE: cmp r0, #0
+; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; SUM is in r0 because it is coalesced with the second
+; argument on the else path.
+; CHECK: movs [[SUM:r0]], #0
+; CHECK-NEXT: movs [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: movs [[TMP:r[0-9]+]], #1
+; CHECK: adds [[SUM]], [[TMP]], [[SUM]]
+; CHECK-NEXT: subs [[IV]], [[IV]], #1
+; CHECK-NEXT: cmp [[IV]], #0
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; SUM << 3.
+; CHECK: lsls [[SUM]], [[SUM]], #3
+; ENABLE-V5T-NEXT: pop {r4, pc}
+; ENABLE-V4T-NEXT: pop {r4}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: bx r1
+;
+; Duplicated epilogue.
+; DISABLE-V5T: pop {r4, pc}
+; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]]
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsls r0, r1, #1
+; DISABLE-V5T-NEXT: pop {r4, pc}
+; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
+; DISABLE-V4T-NEXT: pop {r4}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+;
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
+; ENABLE-NEXT: bx lr
+define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+ %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ tail call void asm "nop", "~{r4}"()
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+declare void @somethingElse(...)
+
+; Check with a more complex case that we do not have restore within the loop and
+; save outside.
+; CHECK-LABEL: loopInfoRestoreOutsideLoop:
+;
+; ENABLE: cmp r0, #0
+; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, lr}
+;
+; DISABLE-NEXT: cmp r0, #0
+; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; SUM is in r0 because it is coalesced with the second
+; argument on the else path.
+; CHECK: movs [[SUM:r0]], #0
+; CHECK-NEXT: movs [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: movs [[TMP:r[0-9]+]], #1
+; CHECK: adds [[SUM]], [[TMP]], [[SUM]]
+; CHECK-NEXT: subs [[IV]], [[IV]], #1
+; CHECK-NEXT: cmp [[IV]], #0
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; SUM << 3.
+; CHECK: lsls [[SUM]], [[SUM]], #3
+; ENABLE-V5T-NEXT: pop {r4, pc}
+; ENABLE-V4T-NEXT: pop {r4}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: bx r1
+;
+; Duplicated epilogue.
+; DISABLE-V5T: pop {r4, pc}
+; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]]
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsls r0, r1, #1
+; DISABLE-V5T-NEXT: pop {r4, pc}
+; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
+; DISABLE-V4T-NEXT: pop {r4}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+;
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
+; ENABLE-NEXT: bx lr
+define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ tail call void asm "nop", "~{r4}"()
+ br label %for.body
+
+for.body: ; preds = %for.body, %if.then
+ %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
+ %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+ %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+; Check that we handle function with no frame information correctly.
+; CHECK-LABEL: emptyFrame:
+; CHECK: @ %entry
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: bx lr
+define i32 @emptyFrame() {
+entry:
+ ret i32 0
+}
+
+; Check that we handle inline asm correctly.
+; CHECK-LABEL: inlineAsm:
+;
+; ENABLE: cmp r0, #0
+; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, lr}
+;
+; DISABLE: cmp r0, #0
+; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: movs [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: movs r4, #1
+; CHECK: subs [[IV]], [[IV]], #1
+; CHECK-NEXT: cmp [[IV]], #0
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; CHECK: movs r0, #0
+; ENABLE-V5T-NEXT: pop {r4, pc}
+; ENABLE-V4T-NEXT: pop {r4}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: bx r1
+;
+; Duplicated epilogue.
+; DISABLE-V5T-NEXT: pop {r4, pc}
+; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]]
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsls r0, r1, #1
+; DISABLE-V5T-NEXT: pop {r4, pc}
+; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
+; DISABLE-V4T-NEXT: pop {r4}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+;
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
+; ENABLE-NEXT: bx lr
+define i32 @inlineAsm(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ tail call void asm sideeffect "movs r4, #1", "~{r4}"()
+ %inc = add nuw nsw i32 %i.03, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+ tail call void asm "nop", ""()
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %for.body, %if.else
+ %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
+ ret i32 %sum.0
+}
+
+; Check that we handle calls to variadic functions correctly.
+; CHECK-LABEL: callVariadicFunc:
+;
+; ENABLE: cmp r0, #0
+; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: push {[[TMP:r[0-9]+]], lr}
+; CHECK: sub sp, #16
+;
+; DISABLE: cmp r0, #0
+; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Setup of the varags.
+; CHECK: mov [[TMP_SP:r[0-9]+]], sp
+; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]]]
+; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]], #4]
+; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]], #8]
+; Thumb has quite a strange way for moving stuff
+; in around. Oh well, match the current sequence.
+; CHECK: push {r1}
+; CHECK-NEXT: pop {r0}
+; CHECK: push {r1}
+; CHECK-NEXT: pop {r2}
+; CHECK: push {r1}
+; CHECK-NEXT: pop {r3}
+; CHECK-NEXT: bl
+; CHECK-NEXT: lsls r0, r0, #3
+;
+; ENABLE-NEXT: add sp, #16
+; ENABLE-V5T-NEXT: pop {[[TMP]], pc}
+; ENABLE-V4T-NEXT: pop {[[TMP]]}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: bx r1
+;
+; Duplicated epilogue.
+; DISABLE-V5T-NEXT: add sp, #16
+; DISABLE-V5T-NEXT: pop {[[TMP]], pc}
+; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]]
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsls r0, r1, #1
+;
+; Epilogue code.
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
+; ENABLE-NEXT: bx lr
+;
+; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
+; DISABLE-NEXT: add sp, #16
+; DISABLE-V5T-NEXT: pop {[[TMP]], pc}
+; DISABLE-V4T-NEXT: pop {[[TMP]]}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+define i32 @callVariadicFunc(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
+ %shl = shl i32 %call, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
+ ret i32 %sum.0
+}
+
+declare i32 @someVariadicFunc(i32, ...)
+
+; Make sure we do not insert unreachable code after noreturn function.
+; Although this is not incorrect to insert such code, it is useless
+; and it hurts the binary size.
+;
+; CHECK-LABEL: noreturn:
+; DISABLE: push
+;
+; CHECK: movs [[TMP:r[0-9]+]], #255
+; CHECK-NEXT: tst r0, [[TMP]]
+; CHECK-NEXT: bne [[ABORT:LBB[0-9_]+]]
+;
+; CHECK: movs r0, #42
+;
+; ENABLE-NEXT: bx lr
+;
+; DISABLE-NEXT: pop
+;;
+; CHECK: [[ABORT]]: @ %if.abort
+;
+; ENABLE: push
+;
+; CHECK: bl
+; ENABLE-NOT: pop
+define i32 @noreturn(i8 signext %bad_thing) {
+entry:
+ %tobool = icmp eq i8 %bad_thing, 0
+ br i1 %tobool, label %if.end, label %if.abort
+
+if.abort:
+ %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"()
+ tail call void @abort() #0
+ unreachable
+
+if.end:
+ ret i32 42
+}
+
+declare void @abort() #0
+
+define i32 @b_to_bx(i32 %value) {
+; CHECK-LABEL: b_to_bx:
+; DISABLE: push {r7, lr}
+; CHECK: cmp r1, #49
+; CHECK-NEXT: bgt [[ELSE_LABEL:LBB[0-9_]+]]
+; ENABLE: push {r7, lr}
+
+; CHECK: bl
+; DISABLE-V5-NEXT: pop {r7, pc}
+; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]]
+
+; ENABLE-V5-NEXT: pop {r7, pc}
+; ENABLE-V4-NEXT: pop {r7}
+; ENABLE-V4-NEXT: pop {r1}
+; ENABLE-V4-NEXT: bx r1
+
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; CHECK-NEXT: lsls r0, r1, #1
+; DISABLE-V5-NEXT: pop {r7, pc}
+; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
+; DISABLE-V4T-NEXT: pop {r7}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
+; ENABLE-NEXT: bx lr
+
+entry:
+ %cmp = icmp slt i32 %value, 50
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %div = sdiv i32 5000, %value
+ br label %if.end
+
+if.else:
+ %mul = shl nsw i32 %value, 1
+ br label %if.end
+
+if.end:
+ %value.addr.0 = phi i32 [ %div, %if.then ], [ %mul, %if.else ]
+ ret i32 %value.addr.0
+}
+
+define i1 @beq_to_bx(i32* %y, i32 %head) {
+; CHECK-LABEL: beq_to_bx:
+; DISABLE: push {r4, lr}
+; CHECK: cmp r2, #0
+; CHECK-NEXT: beq [[EXIT_LABEL:LBB[0-9_]+]]
+; ENABLE: push {r4, lr}
+
+; CHECK: tst r3, r4
+; ENABLE-NEXT: pop {r4}
+; ENABLE-NEXT: pop {r3}
+; ENABLE-NEXT: mov lr, r3
+; CHECK-NEXT: beq [[EXIT_LABEL]]
+
+; CHECK: str r1, [r2]
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: [[EXIT_LABEL]]: @ %cleanup
+; ENABLE-NEXT: bx lr
+; DISABLE-V5-NEXT: pop {r4, pc}
+; DISABLE-V4T-NEXT: pop {r4}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+
+entry:
+ %cmp = icmp eq i32* %y, null
+ br i1 %cmp, label %cleanup, label %if.end
+
+if.end:
+ %z = load i32, i32* %y, align 4
+ %and = and i32 %z, 2
+ %cmp2 = icmp eq i32 %and, 0
+ br i1 %cmp2, label %cleanup, label %if.end4
+
+if.end4:
+ store i32 %head, i32* %y, align 4
+ br label %cleanup
+
+cleanup:
+ %retval.0 = phi i1 [ 0, %if.end4 ], [ 1, %entry ], [ 1, %if.end ]
+ ret i1 %retval.0
+}
+
+attributes #0 = { noreturn nounwind }
diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll
index 1c7b631741b7..45b42125e166 100644
--- a/test/CodeGen/Thumb/vargs.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -32,12 +32,12 @@ bb7: ; preds = %bb
call void @llvm.va_end( i8* %va.upgrd.4 )
ret void
-; The return sequence should pop the lr to r3, recover the stack space used to
+; The return sequence should pop the lr to r0-3, recover the stack space used to
; store variadic argument registers, then return via r3. Possibly there is a pop
; before this, but only if the function happened to use callee-saved registers.
-; CHECK: pop {r3}
+; CHECK: pop {[[POP_REG:r[0-3]]]}
; CHECK: add sp, #[[IMM]]
-; CHECK: bx r3
+; CHECK: bx [[POP_REG]]
}
declare void @llvm.va_start(i8*)
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index 893a45d8f722..fb32a2cac3a1 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -15,11 +15,11 @@ entry:
%6 = bitcast i32* %sp3 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
%7 = load <4 x i32>, <4 x i32>* %6, align 16 ; <<4 x i32>> [#uses=1]
%8 = bitcast i32* %dp to i8* ; <i8*> [#uses=1]
- tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7, i32 1)
+ tail call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
@sbuf = common global [16 x i32] zeroinitializer, align 16 ; <[16 x i32]*> [#uses=5]
@dbuf = common global [16 x i32] zeroinitializer ; <[16 x i32]*> [#uses=2]
@@ -45,7 +45,7 @@ bb2: ; preds = %bb
%3 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
%4 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
%5 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
- tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
+ tail call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
ret i32 0
}
@@ -53,15 +53,15 @@ bb2: ; preds = %bb
; Make sure the DPair register class can spill.
define void @pr12389(i8* %p) nounwind ssp {
entry:
- %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %p, i32 1)
+ %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %p, i32 1)
tail call void asm sideeffect "", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15}"() nounwind
- tail call void @llvm.arm.neon.vst1.v4f32(i8* %p, <4 x float> %vld1, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> %vld1, i32 1)
ret void
}
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind
; <rdar://problem/11101911>
; When an strd is expanded into two str instructions, make sure the first str
diff --git a/test/CodeGen/Thumb2/emit-unwinding.ll b/test/CodeGen/Thumb2/emit-unwinding.ll
new file mode 100644
index 000000000000..1f1ea1b48af0
--- /dev/null
+++ b/test/CodeGen/Thumb2/emit-unwinding.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple thumbv7em-apple-unknown-eabi-macho %s -o - -O0 | FileCheck %s
+
+; CHECK: add.w r11, sp, #{{[1-9]+}}
+
+define void @foo1() {
+ call void asm sideeffect "", "~{r4}"()
+ call void @foo2()
+ ret void
+}
+
+declare void @foo2()
diff --git a/test/CodeGen/Thumb2/float-cmp.ll b/test/CodeGen/Thumb2/float-cmp.ll
index 88d6c3b0adb8..77b0999337c6 100644
--- a/test/CodeGen/Thumb2/float-cmp.ll
+++ b/test/CodeGen/Thumb2/float-cmp.ll
@@ -81,8 +81,9 @@ define i1 @cmp_f_ord(float %a, float %b) {
}
define i1 @cmp_f_ugt(float %a, float %b) {
; CHECK-LABEL: cmp_f_ugt:
-; NONE: bl __aeabi_fcmpgt
-; NONE: bl __aeabi_fcmpun
+; NONE: bl __aeabi_fcmple
+; NONE: cmp r0, #0
+; NONE-NEXT: it eq
; HARD: vcmpe.f32
; HARD: movhi r0, #1
%1 = fcmp ugt float %a, %b
@@ -90,8 +91,9 @@ define i1 @cmp_f_ugt(float %a, float %b) {
}
define i1 @cmp_f_uge(float %a, float %b) {
; CHECK-LABEL: cmp_f_uge:
-; NONE: bl __aeabi_fcmpge
-; NONE: bl __aeabi_fcmpun
+; NONE: bl __aeabi_fcmplt
+; NONE: cmp r0, #0
+; NONE-NEXT: it eq
; HARD: vcmpe.f32
; HARD: movpl r0, #1
%1 = fcmp uge float %a, %b
@@ -99,8 +101,9 @@ define i1 @cmp_f_uge(float %a, float %b) {
}
define i1 @cmp_f_ult(float %a, float %b) {
; CHECK-LABEL: cmp_f_ult:
-; NONE: bl __aeabi_fcmplt
-; NONE: bl __aeabi_fcmpun
+; NONE: bl __aeabi_fcmpge
+; NONE: cmp r0, #0
+; NONE-NEXT: it eq
; HARD: vcmpe.f32
; HARD: movlt r0, #1
%1 = fcmp ult float %a, %b
@@ -108,8 +111,9 @@ define i1 @cmp_f_ult(float %a, float %b) {
}
define i1 @cmp_f_ule(float %a, float %b) {
; CHECK-LABEL: cmp_f_ule:
-; NONE: bl __aeabi_fcmple
-; NONE: bl __aeabi_fcmpun
+; NONE: bl __aeabi_fcmpgt
+; NONE: cmp r0, #0
+; NONE-NEXT: it eq
; HARD: vcmpe.f32
; HARD: movle r0, #1
%1 = fcmp ule float %a, %b
@@ -214,10 +218,8 @@ define i1 @cmp_d_ord(double %a, double %b) {
}
define i1 @cmp_d_ugt(double %a, double %b) {
; CHECK-LABEL: cmp_d_ugt:
-; NONE: bl __aeabi_dcmpgt
-; NONE: bl __aeabi_dcmpun
-; SP: bl __aeabi_dcmpgt
-; SP: bl __aeabi_dcmpun
+; NONE: bl __aeabi_dcmple
+; SP: bl __aeabi_dcmple
; DP: vcmpe.f64
; DP: movhi r0, #1
%1 = fcmp ugt double %a, %b
@@ -226,10 +228,8 @@ define i1 @cmp_d_ugt(double %a, double %b) {
define i1 @cmp_d_ult(double %a, double %b) {
; CHECK-LABEL: cmp_d_ult:
-; NONE: bl __aeabi_dcmplt
-; NONE: bl __aeabi_dcmpun
-; SP: bl __aeabi_dcmplt
-; SP: bl __aeabi_dcmpun
+; NONE: bl __aeabi_dcmpge
+; SP: bl __aeabi_dcmpge
; DP: vcmpe.f64
; DP: movlt r0, #1
%1 = fcmp ult double %a, %b
@@ -268,10 +268,8 @@ define i1 @cmp_d_ueq(double %a, double %b) {
define i1 @cmp_d_uge(double %a, double %b) {
; CHECK-LABEL: cmp_d_uge:
-; NONE: bl __aeabi_dcmpge
-; NONE: bl __aeabi_dcmpun
-; SP: bl __aeabi_dcmpge
-; SP: bl __aeabi_dcmpun
+; NONE: bl __aeabi_dcmplt
+; SP: bl __aeabi_dcmplt
; DP: vcmpe.f64
; DP: movpl r0, #1
%1 = fcmp uge double %a, %b
@@ -280,10 +278,8 @@ define i1 @cmp_d_uge(double %a, double %b) {
define i1 @cmp_d_ule(double %a, double %b) {
; CHECK-LABEL: cmp_d_ule:
-; NONE: bl __aeabi_dcmple
-; NONE: bl __aeabi_dcmpun
-; SP: bl __aeabi_dcmple
-; SP: bl __aeabi_dcmpun
+; NONE: bl __aeabi_dcmpgt
+; SP: bl __aeabi_dcmpgt
; DP: vcmpe.f64
; DP: movle r0, #1
%1 = fcmp ule double %a, %b
diff --git a/test/CodeGen/Thumb2/float-intrinsics-double.ll b/test/CodeGen/Thumb2/float-intrinsics-double.ll
index 01a23bd0fe69..657d1b172da9 100644
--- a/test/CodeGen/Thumb2/float-intrinsics-double.ll
+++ b/test/CodeGen/Thumb2/float-intrinsics-double.ll
@@ -109,9 +109,12 @@ declare double @llvm.fabs.f64(double %Val)
define double @abs_d(double %a) {
; CHECK-LABEL: abs_d:
; NONE: bic r1, r1, #-2147483648
-; SP: bl __aeabi_dcmpgt
-; SP: bl __aeabi_dcmpun
-; SP: bl __aeabi_dsub
+; SP: vldr d1, .LCPI{{.*}}
+; SP: vmov r0, r1, d0
+; SP: vmov r2, r3, d1
+; SP: lsrs r2, r3, #31
+; SP: bfi r1, r2, #31, #1
+; SP: vmov d0, r0, r1
; DP: vabs.f64 d0, d0
%1 = call double @llvm.fabs.f64(double %a)
ret double %1
@@ -216,7 +219,7 @@ define i16 @d_to_h(double %a) {
declare double @llvm.convert.from.fp16.f64(i16 %a)
define double @h_to_d(i16 %a) {
; CHECK-LABEL: h_to_d:
-; NONE: bl __gnu_h2f_ieee
+; NONE: bl __aeabi_h2f
; NONE: bl __aeabi_f2d
; SP: vcvt{{[bt]}}.f32.f16
; SP: bl __aeabi_f2d
diff --git a/test/CodeGen/Thumb2/float-intrinsics-float.ll b/test/CodeGen/Thumb2/float-intrinsics-float.ll
index ec1bcd3708ac..847aeacd2f91 100644
--- a/test/CodeGen/Thumb2/float-intrinsics-float.ll
+++ b/test/CodeGen/Thumb2/float-intrinsics-float.ll
@@ -205,7 +205,7 @@ define float @fmuladd_f(float %a, float %b, float %c) {
declare i16 @llvm.convert.to.fp16.f32(float %a)
define i16 @f_to_h(float %a) {
; CHECK-LABEL: f_to_h:
-; SOFT: bl __gnu_f2h_ieee
+; SOFT: bl __aeabi_f2h
; HARD: vcvt{{[bt]}}.f16.f32
%1 = call i16 @llvm.convert.to.fp16.f32(float %a)
ret i16 %1
@@ -214,7 +214,7 @@ define i16 @f_to_h(float %a) {
declare float @llvm.convert.from.fp16.f32(i16 %a)
define float @h_to_f(i16 %a) {
; CHECK-LABEL: h_to_f:
-; SOFT: bl __gnu_h2f_ieee
+; SOFT: bl __aeabi_h2f
; HARD: vcvt{{[bt]}}.f32.f16
%1 = call float @llvm.convert.from.fp16.f32(i16 %a)
ret float %1
diff --git a/test/CodeGen/Thumb2/ifcvt-compare.ll b/test/CodeGen/Thumb2/ifcvt-compare.ll
index 8af139a5ef6e..7b5ce4fa3f5f 100644
--- a/test/CodeGen/Thumb2/ifcvt-compare.ll
+++ b/test/CodeGen/Thumb2/ifcvt-compare.ll
@@ -19,7 +19,8 @@ f:
define void @f1(i32 %x) optsize {
; CHECK-LABEL: f1:
; CHECK: cmp r0, #1
- ; CHECK: it eq
+ ; CHECK: it ne
+ ; CHECK-NEXT: bxne lr
%p = icmp eq i32 %x, 1
br i1 %p, label %t, label %f
@@ -34,7 +35,8 @@ f:
define void @f2(i32 %x) {
; CHECK-LABEL: f2:
; CHECK: cmp r0, #0
- ; CHECK: it eq
+ ; CHECK: it ne
+ ; CHECK-NEXT: bxne lr
%p = icmp eq i32 %x, 0
br i1 %p, label %t, label %f
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 2b1caa393072..c57274ea5599 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -59,10 +59,10 @@ bb1:
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
%tmp1 = shl i32 %indvar, 2
%gep1 = getelementptr i8, i8* %ptr1, i32 %tmp1
- %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
+ %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %gep1, i32 1)
%tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
%gep2 = getelementptr i8, i8* %ptr2, i32 %tmp1
- call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
+ call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
%indvar.next = add i32 %indvar, 1
%cond = icmp eq i32 %indvar.next, 10
br i1 %cond, label %bb2, label %bb1
@@ -73,9 +73,9 @@ bb2:
; CHECK-NOT: LCPI1_0:
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind
declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/Thumb2/pic-load.ll b/test/CodeGen/Thumb2/pic-load.ll
index 53d456c53452..cfdad03dcd58 100644
--- a/test/CodeGen/Thumb2/pic-load.ll
+++ b/test/CodeGen/Thumb2/pic-load.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -relocation-model=pic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -relocation-model=pic | FileCheck %s --check-prefix=CHECK --check-prefix=PIC
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -mcpu=swift -mattr=+no-movt | FileCheck %s --check-prefix=CHECK --check-prefix=PIC-NOMOVT
%struct.anon = type { void ()* }
%struct.one_atexit_routine = type { %struct.anon, i32, i8* }
@@ -8,7 +9,14 @@
define hidden i32 @atexit(void ()* %func) nounwind {
entry:
; CHECK-LABEL: atexit:
-; CHECK: add r0, pc
+; CHECK-PIC: add r0, pc
+; CHECK-NOMOVT: ldr r[[REGNUM:[0-9]+]], LCPI0_0
+; CHECK-NOMOVT: LPC0_0:
+; CHECK-NOMOVT: add r[[REGNUM]], pc
+; CHECK-NOMOVT: ldr r1, [r[[REGNUM]]
+; CHECK-NOMOVT: blx _atexit_common
+; CHECK-NOMOVT: LCPI0_0:
+; CHECK-NOMOVT: .long L___dso_handle$non_lazy_ptr-(LPC0_0+4)
%r = alloca %struct.one_atexit_routine, align 4 ; <%struct.one_atexit_routine*> [#uses=3]
%0 = getelementptr %struct.one_atexit_routine, %struct.one_atexit_routine* %r, i32 0, i32 0, i32 0 ; <void ()**> [#uses=1]
store void ()* %func, void ()** %0, align 4
diff --git a/test/CodeGen/Thumb2/setjmp_longjmp.ll b/test/CodeGen/Thumb2/setjmp_longjmp.ll
new file mode 100644
index 000000000000..9e0fad00c140
--- /dev/null
+++ b/test/CodeGen/Thumb2/setjmp_longjmp.ll
@@ -0,0 +1,89 @@
+; RUN: llc %s -o - | FileCheck %s
+target triple = "thumbv7-apple-ios"
+
+declare i32 @llvm.eh.sjlj.setjmp(i8*)
+declare void @llvm.eh.sjlj.longjmp(i8*)
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.stacksave()
+@g = external global i32
+
+; CHECK-LABEL: double_foobar
+;
+; setjmp sequence:
+; CHECK: mov [[PCREG:r[0-9]+]], pc
+; CHECK-NEXT: adds [[PCREG]], [[PCREG]], #7
+; CHECK-NEXT: str [[PCREG]], {{\[}}[[BUFREG:r[0-9]+]], #4]
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: b [[LABEL:L[a-zA-Z0-9]+]]
+; CHECK-NEXT: movs r0, #1
+; CHECK-NEXT: [[LABEL]]:
+;
+; setjmp sequence 2:
+; CHECK: mov [[PCREG:r[0-9]+]], pc
+; CHECK-NEXT: adds [[PCREG]], [[PCREG]], #7
+; CHECK-NEXT: str [[PCREG]], {{\[}}[[BUFREG:r[0-9]+]], #4]
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: b [[LABEL:L[a-zA-Z0-9]+]]
+; CHECK-NEXT: movs r0, #1
+; CHECK-NEXT: [[LABEL]]:
+
+; longjmp sequence:
+; CHECK: ldr [[TEMPREG:r[0-9]+]], [{{\s*}}[[BUFREG:r[0-9]+]], #8]
+; CHECK-NEXT: mov sp, [[TEMPREG]]
+; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4]
+; CHECK-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}}
+; CHECK-NEXT: bx [[DESTREG]]
+;
+; longjmp sequence2:
+; CHECK: ldr [[TEMPREG:r[0-9]+]], [{{\s*}}[[BUFREG:r[0-9]+]], #8]
+; CHECK-NEXT: mov sp, [[TEMPREG]]
+; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4]
+; CHECK-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}}
+; CHECK-NEXT: bx [[DESTREG]]
+define void @double_foobar() {
+entry:
+ %buf = alloca [5 x i8*], align 4
+ %bufptr = bitcast [5 x i8*]* %buf to i8*
+ %arraydecay = getelementptr inbounds [5 x i8*], [5 x i8*]* %buf, i32 0, i32 0
+
+ %fa = tail call i8* @llvm.frameaddress(i32 0)
+ store i8* %fa, i8** %arraydecay, align 4
+ %ss = tail call i8* @llvm.stacksave()
+ %ssgep = getelementptr [5 x i8*], [5 x i8*]* %buf, i32 0, i32 2
+ store i8* %ss, i8** %ssgep, align 4
+
+ %setjmpres = call i32 @llvm.eh.sjlj.setjmp(i8* %bufptr)
+ %tobool = icmp ne i32 %setjmpres, 0
+ br i1 %tobool, label %if.then, label %if.else
+
+if.then:
+ store volatile i32 1, i32* @g, align 4
+ br label %if.end
+
+if.else:
+ store volatile i32 0, i32* @g, align 4
+ call void @llvm.eh.sjlj.longjmp(i8* %bufptr)
+ unreachable
+
+if.end:
+ %fa2 = tail call i8* @llvm.frameaddress(i32 0)
+ store i8* %fa2, i8** %arraydecay, align 4
+ %ss2 = tail call i8* @llvm.stacksave()
+ store i8* %ss2, i8** %ssgep, align 4
+
+ %setjmpres2 = call i32 @llvm.eh.sjlj.setjmp(i8* %bufptr)
+ %tobool2 = icmp ne i32 %setjmpres2, 0
+ br i1 %tobool2, label %if2.then, label %if2.else
+
+if2.then:
+ store volatile i32 3, i32* @g, align 4
+ br label %if2.end
+
+if2.else:
+ store volatile i32 2, i32* @g, align 4
+ call void @llvm.eh.sjlj.longjmp(i8* %bufptr)
+ unreachable
+
+if2.end:
+ ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
index da1057b8bb4a..eb48ffb7d80e 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -24,11 +24,10 @@ cond_next:
define i32 @t2(i32 %a, i32 %b) nounwind {
entry:
-; Do not if-convert when branches go to the different loops.
; CHECK-LABEL: t2:
-; CHECK-NOT: ite gt
-; CHECK-NOT: subgt
-; CHECK-NOT: suble
+; CHECK: ite gt
+; CHECK: subgt
+; CHECK: suble
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
br i1 %tmp1434, label %bb17, label %bb.outer
@@ -73,9 +72,10 @@ entry:
define void @t3(i32 %a, i32 %b) nounwind {
entry:
; CHECK-LABEL: t3:
-; CHECK: itt ge
-; CHECK: movge r0, r1
-; CHECK: blge {{_?}}foo
+; CHECK: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK: mov r0, r1
+; CHECK: bl {{_?}}foo
%tmp1 = icmp sgt i32 %a, 10 ; <i1> [#uses=1]
br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index 1d2ba0008be8..4a76e100b658 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -8,7 +8,7 @@ entry:
; CHECK: it ne
; CHECK: cmpne
; CHECK: it hi
-; CHECK: pophi {r7, pc}
+; CHECK: bxhi lr
%tmp1 = icmp ult i32 %X, 4 ; <i1> [#uses=1]
%tmp4 = icmp eq i32 %Y, 0 ; <i1> [#uses=1]
%tmp7 = or i1 %tmp4, %tmp1 ; <i1> [#uses=1]
@@ -69,7 +69,7 @@ define fastcc void @t1(%struct.SString* %word, i8 signext %c) {
entry:
; CHECK-LABEL: t1:
; CHECK: it ne
-; CHECK: popne {r7, pc}
+; CHECK: bxne lr
%tmp1 = icmp eq %struct.SString* %word, null ; <i1> [#uses=1]
br i1 %tmp1, label %cond_true, label %cond_false
diff --git a/test/CodeGen/Thumb2/thumb2-mulhi.ll b/test/CodeGen/Thumb2/thumb2-mulhi.ll
index db9b644d4f92..273abb8a7b0c 100644
--- a/test/CodeGen/Thumb2/thumb2-mulhi.ll
+++ b/test/CodeGen/Thumb2/thumb2-mulhi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2dsp %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+dsp %s -o - | FileCheck %s
define i32 @smulhi(i32 %x, i32 %y) {
; CHECK: smulhi
diff --git a/test/CodeGen/Thumb2/thumb2-smla.ll b/test/CodeGen/Thumb2/thumb2-smla.ll
index 8573d39f09f6..5ddaf9353f92 100644
--- a/test/CodeGen/Thumb2/thumb2-smla.ll
+++ b/test/CodeGen/Thumb2/thumb2-smla.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp -arm-use-mulops=false %s -o - | FileCheck %s -check-prefix=NO_MULOPS
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+dsp %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+dsp -arm-use-mulops=false %s -o - | FileCheck %s -check-prefix=NO_MULOPS
define i32 @f3(i32 %a, i16 %x, i32 %y) {
; CHECK: f3
diff --git a/test/CodeGen/Thumb2/thumb2-smul.ll b/test/CodeGen/Thumb2/thumb2-smul.ll
index 937f7737f2b3..a196a3c79ae9 100644
--- a/test/CodeGen/Thumb2/thumb2-smul.ll
+++ b/test/CodeGen/Thumb2/thumb2-smul.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+dsp %s -o - | FileCheck %s
@x = weak global i16 0 ; <i16*> [#uses=1]
@y = weak global i16 0 ; <i16*> [#uses=0]
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index e0f7b5bd919c..f408242ea01f 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -7,7 +7,7 @@
%quux = type { i32 (...)**, %baz*, i32 }
%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8*, i32) nounwind readonly
define void @aaa(%quuz* %this, i8* %block) {
; CHECK-LABEL: aaa:
@@ -18,30 +18,30 @@ entry:
%aligned_vec = alloca <4 x float>, align 16
%"alloca point" = bitcast i32 0 to i32
%vecptr = bitcast <4 x float>* %aligned_vec to i8*
- %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind
+ %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %vecptr, i32 1) nounwind
store float 6.300000e+01, float* undef, align 4
- %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+ %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
store float 0.000000e+00, float* undef, align 4
- %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
- %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+ %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+ %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
%val173 = load <4 x float>, <4 x float>* undef ; <<4 x float>> [#uses=1]
br label %bb4
diff --git a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
index 4afea894aebc..e091a6529cd1 100644
--- a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -3,17 +3,17 @@
; rdar://11318438
define zeroext i8 @test1(i32 %A.u) {
-; A8: test1
+; A8-LABEL: test1:
; A8: uxtb r0, r0
%B.u = trunc i32 %A.u to i8
ret i8 %B.u
}
define zeroext i32 @test2(i32 %A.u, i32 %B.u) {
-; A8: test2
+; A8-LABEL: test2:
; A8: uxtab r0, r0, r1
-; M3: test2
+; M3-LABEL: test2:
; M3: uxtb r1, r1
; M3-NOT: uxtab
; M3: add r0, r1
@@ -24,7 +24,7 @@ define zeroext i32 @test2(i32 %A.u, i32 %B.u) {
}
define zeroext i32 @test3(i32 %A.u) {
-; A8-LABEL: test3
+; A8-LABEL: test3:
; A8: ubfx r0, r0, #8, #16
%B.u = lshr i32 %A.u, 8
%C.u = shl i32 %A.u, 24
diff --git a/test/CodeGen/Thumb2/v8_IT_1.ll b/test/CodeGen/Thumb2/v8_IT_1.ll
index 30dbb4802b6d..948f159c343d 100644
--- a/test/CodeGen/Thumb2/v8_IT_1.ll
+++ b/test/CodeGen/Thumb2/v8_IT_1.ll
@@ -6,12 +6,12 @@
;CHECK: bx
define <16 x i8> @select_s_v_v(i32 %avail, i8* %bar) {
entry:
- %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
+ %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %bar, i32 1)
%and = and i32 %avail, 1
%tobool = icmp eq i32 %and, 0
%vld1. = select i1 %tobool, <16 x i8> %vld1, <16 x i8> zeroinitializer
ret <16 x i8> %vld1.
}
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 )
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* , i32 )
diff --git a/test/CodeGen/Thumb2/v8_IT_3.ll b/test/CodeGen/Thumb2/v8_IT_3.ll
index 3ccee5fbb8ca..78b51a033084 100644
--- a/test/CodeGen/Thumb2/v8_IT_3.ll
+++ b/test/CodeGen/Thumb2/v8_IT_3.ll
@@ -35,7 +35,7 @@ bb:
br i1 %tmp4, label %bb1, label %bb8
bb1:
-; CHECK: %bb6
+; CHECK: %entry
; CHECK: it eq
; CHECK-NEXT: ldreq
; CHECK-NEXT: it eq
@@ -54,8 +54,9 @@ bb1:
bb4:
; CHECK-PIC: cmp
; CHECK-PIC: cmp
+; CHECK-PIC: cmp
; CHECK-PIC-NEXT: bne
-; CHECK-PIC-NEXT: %bb4
+; CHECK-PIC: %bb6
; CHECK-PIC-NEXT: movs
; CHECK-PIC-NEXT: add
; CHECK-PIC-NEXT: pop
diff --git a/test/CodeGen/Thumb2/v8_IT_5.ll b/test/CodeGen/Thumb2/v8_IT_5.ll
index 78b80d7dcdef..6a7a7a0b0aa0 100644
--- a/test/CodeGen/Thumb2/v8_IT_5.ll
+++ b/test/CodeGen/Thumb2/v8_IT_5.ll
@@ -7,9 +7,11 @@
; CHECK-NEXT: %if.else163
; CHECK-NEXT: mov.w
; CHECK-NEXT: b
+; CHECK: [[JUMPTARGET]]:{{.*}}%if.else173
+; CHECK-NEXT: mov.w
+; CHECK-NEXT: bx lr
; CHECK-NEXT: %if.else145
; CHECK-NEXT: mov.w
-; CHECK: [[JUMPTARGET]]:{{.*}}%if.else173
%struct.hc = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/WebAssembly/call.ll b/test/CodeGen/WebAssembly/call.ll
new file mode 100644
index 000000000000..9158ccec0979
--- /dev/null
+++ b/test/CodeGen/WebAssembly/call.ll
@@ -0,0 +1,127 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic call operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare i32 @i32_nullary()
+declare i32 @i32_unary(i32)
+declare i32 @i32_binary(i32, i32)
+declare i64 @i64_nullary()
+declare float @float_nullary()
+declare double @double_nullary()
+declare void @void_nullary()
+
+; CHECK-LABEL: call_i32_nullary:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: {{^}} i32.call $push[[NUM:[0-9]+]]=, i32_nullary{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @call_i32_nullary() {
+ %r = call i32 @i32_nullary()
+ ret i32 %r
+}
+
+; CHECK-LABEL: call_i64_nullary:
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: {{^}} i64.call $push[[NUM:[0-9]+]]=, i64_nullary{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @call_i64_nullary() {
+ %r = call i64 @i64_nullary()
+ ret i64 %r
+}
+
+; CHECK-LABEL: call_float_nullary:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: {{^}} f32.call $push[[NUM:[0-9]+]]=, float_nullary{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @call_float_nullary() {
+ %r = call float @float_nullary()
+ ret float %r
+}
+
+; CHECK-LABEL: call_double_nullary:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: {{^}} f64.call $push[[NUM:[0-9]+]]=, double_nullary{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @call_double_nullary() {
+ %r = call double @double_nullary()
+ ret double %r
+}
+
+; CHECK-LABEL: call_void_nullary:
+; CHECK-NEXT: {{^}} call void_nullary{{$}}
+; CHECK-NEXT: return{{$}}
+define void @call_void_nullary() {
+ call void @void_nullary()
+ ret void
+}
+
+; CHECK-LABEL: call_i32_unary:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: {{^}} i32.call $push[[NUM:[0-9]+]]=, i32_unary, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @call_i32_unary(i32 %a) {
+ %r = call i32 @i32_unary(i32 %a)
+ ret i32 %r
+}
+
+; CHECK-LABEL: call_i32_binary:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: {{^}} i32.call $push[[NUM:[0-9]+]]=, i32_binary, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @call_i32_binary(i32 %a, i32 %b) {
+ %r = call i32 @i32_binary(i32 %a, i32 %b)
+ ret i32 %r
+}
+
+; CHECK-LABEL: call_indirect_void:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: {{^}} call_indirect $0{{$}}
+; CHECK-NEXT: return{{$}}
+define void @call_indirect_void(void ()* %callee) {
+ call void %callee()
+ ret void
+}
+
+; CHECK-LABEL: call_indirect_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: {{^}} i32.call_indirect $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @call_indirect_i32(i32 ()* %callee) {
+ %t = call i32 %callee()
+ ret i32 %t
+}
+
+; CHECK-LABEL: tail_call_void_nullary:
+; CHECK-NEXT: {{^}} call void_nullary{{$}}
+; CHECK-NEXT: return{{$}}
+define void @tail_call_void_nullary() {
+ tail call void @void_nullary()
+ ret void
+}
+
+; CHECK-LABEL: fastcc_tail_call_void_nullary:
+; CHECK-NEXT: {{^}} call void_nullary{{$}}
+; CHECK-NEXT: return{{$}}
+define void @fastcc_tail_call_void_nullary() {
+ tail call fastcc void @void_nullary()
+ ret void
+}
+
+; CHECK-LABEL: coldcc_tail_call_void_nullary:
+; CHECK-NEXT: {{^}} call void_nullary
+; CHECK-NEXT: return{{$}}
+define void @coldcc_tail_call_void_nullary() {
+ tail call coldcc void @void_nullary()
+ ret void
+}
+
+; FIXME test the following:
+; - More argument combinations.
+; - Tail call.
+; - Interesting returns (struct, multiple).
+; - Vararg.
diff --git a/test/CodeGen/WebAssembly/cfg-stackify.ll b/test/CodeGen/WebAssembly/cfg-stackify.ll
new file mode 100644
index 000000000000..71f3551347bf
--- /dev/null
+++ b/test/CodeGen/WebAssembly/cfg-stackify.ll
@@ -0,0 +1,1102 @@
+; RUN: llc < %s -asm-verbose=false -disable-block-placement -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck -check-prefix=OPT %s
+
+; Test the CFG stackifier pass.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare void @something()
+
+; Test that loops are made contiguous, even in the presence of split backedges.
+
+; CHECK-LABEL: test0:
+; CHECK: loop
+; CHECK-NOT: br
+; CHECK: i32.add
+; CHECK-NEXT: i32.ge_s
+; CHECK-NEXT: br_if
+; CHECK-NOT: br
+; CHECK: call
+; CHECK: br BB0_1{{$}}
+; CHECK: return{{$}}
+; OPT-LABEL: test0:
+; OPT: loop
+; OPT-NOT: br
+; OPT: i32.add
+; OPT-NEXT: i32.ge_s
+; OPT-NEXT: br_if
+; OPT-NOT: br
+; OPT: call
+; OPT: br BB0_1{{$}}
+; OPT: return{{$}}
+define void @test0(i32 %n) {
+entry:
+ br label %header
+
+header:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %back ]
+ %i.next = add i32 %i, 1
+
+ %c = icmp slt i32 %i.next, %n
+ br i1 %c, label %back, label %exit
+
+exit:
+ ret void
+
+back:
+ call void @something()
+ br label %header
+}
+
+; Same as test0, but the branch condition is reversed.
+
+; CHECK-LABEL: test1:
+; CHECK: loop
+; CHECK-NOT: br
+; CHECK: i32.add
+; CHECK-NEXT: i32.ge_s
+; CHECK-NEXT: br_if
+; CHECK-NOT: br
+; CHECK: call
+; CHECK: br BB1_1{{$}}
+; CHECK: return{{$}}
+; OPT-LABEL: test1:
+; OPT: loop
+; OPT-NOT: br
+; OPT: i32.add
+; OPT-NEXT: i32.ge_s
+; OPT-NEXT: br_if
+; OPT-NOT: br
+; OPT: call
+; OPT: br BB1_1{{$}}
+; OPT: return{{$}}
+define void @test1(i32 %n) {
+entry:
+ br label %header
+
+header:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %back ]
+ %i.next = add i32 %i, 1
+
+ %c = icmp sge i32 %i.next, %n
+ br i1 %c, label %exit, label %back
+
+exit:
+ ret void
+
+back:
+ call void @something()
+ br label %header
+}
+
+; Test that a simple loop is handled as expected.
+
+; CHECK-LABEL: test2:
+; CHECK: block BB2_2{{$}}
+; CHECK: br_if {{[^,]*}}, BB2_2{{$}}
+; CHECK: BB2_1:
+; CHECK: br_if ${{[0-9]+}}, BB2_1{{$}}
+; CHECK: BB2_2:
+; CHECK: return{{$}}
+; OPT-LABEL: test2:
+; OPT: block BB2_2{{$}}
+; OPT: br_if {{[^,]*}}, BB2_2{{$}}
+; OPT: BB2_1:
+; OPT: br_if ${{[0-9]+}}, BB2_1{{$}}
+; OPT: BB2_2:
+; OPT: return{{$}}
+define void @test2(double* nocapture %p, i32 %n) {
+entry:
+ %cmp.4 = icmp sgt i32 %n, 0
+ br i1 %cmp.4, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds double, double* %p, i32 %i.05
+ %0 = load double, double* %arrayidx, align 8
+ %mul = fmul double %0, 3.200000e+00
+ store double %mul, double* %arrayidx, align 8
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: doublediamond:
+; CHECK: block BB3_5{{$}}
+; CHECK: block BB3_2{{$}}
+; CHECK: br_if $0, BB3_2{{$}}
+; CHECK: block BB3_4{{$}}
+; CHECK: br_if $1, BB3_4{{$}}
+; CHECK: br BB3_5{{$}}
+; CHECK: BB3_4:
+; CHECK: BB3_5:
+; CHECK: return ${{[0-9]+}}{{$}}
+; OPT-LABEL: doublediamond:
+; OPT: block BB3_5{{$}}
+; OPT: block BB3_4{{$}}
+; OPT: br_if {{[^,]*}}, BB3_4{{$}}
+; OPT: block BB3_3{{$}}
+; OPT: br_if {{[^,]*}}, BB3_3{{$}}
+; OPT: br BB3_5{{$}}
+; OPT: BB3_4:
+; OPT: BB3_5:
+; OPT: return ${{[0-9]+}}{{$}}
+define i32 @doublediamond(i32 %a, i32 %b, i32* %p) {
+entry:
+ %c = icmp eq i32 %a, 0
+ %d = icmp eq i32 %b, 0
+ store volatile i32 0, i32* %p
+ br i1 %c, label %true, label %false
+true:
+ store volatile i32 1, i32* %p
+ br label %exit
+false:
+ store volatile i32 2, i32* %p
+ br i1 %d, label %ft, label %ff
+ft:
+ store volatile i32 3, i32* %p
+ br label %exit
+ff:
+ store volatile i32 4, i32* %p
+ br label %exit
+exit:
+ store volatile i32 5, i32* %p
+ ret i32 0
+}
+
+; CHECK-LABEL: triangle:
+; CHECK: block BB4_2{{$}}
+; CHECK: br_if $1, BB4_2{{$}}
+; CHECK: BB4_2:
+; CHECK: return ${{[0-9]+}}{{$}}
+; OPT-LABEL: triangle:
+; OPT: block BB4_2{{$}}
+; OPT: br_if $1, BB4_2{{$}}
+; OPT: BB4_2:
+; OPT: return ${{[0-9]+}}{{$}}
+define i32 @triangle(i32* %p, i32 %a) {
+entry:
+ %c = icmp eq i32 %a, 0
+ store volatile i32 0, i32* %p
+ br i1 %c, label %true, label %exit
+true:
+ store volatile i32 1, i32* %p
+ br label %exit
+exit:
+ store volatile i32 2, i32* %p
+ ret i32 0
+}
+
+; CHECK-LABEL: diamond:
+; CHECK: block BB5_3{{$}}
+; CHECK: block BB5_2{{$}}
+; CHECK: br_if $1, BB5_2{{$}}
+; CHECK: br BB5_3{{$}}
+; CHECK: BB5_2:
+; CHECK: BB5_3:
+; CHECK: return ${{[0-9]+}}{{$}}
+; OPT-LABEL: diamond:
+; OPT: block BB5_3{{$}}
+; OPT: block BB5_2{{$}}
+; OPT: br_if {{[^,]*}}, BB5_2{{$}}
+; OPT: br BB5_3{{$}}
+; OPT: BB5_2:
+; OPT: BB5_3:
+; OPT: return ${{[0-9]+}}{{$}}
+define i32 @diamond(i32* %p, i32 %a) {
+entry:
+ %c = icmp eq i32 %a, 0
+ store volatile i32 0, i32* %p
+ br i1 %c, label %true, label %false
+true:
+ store volatile i32 1, i32* %p
+ br label %exit
+false:
+ store volatile i32 2, i32* %p
+ br label %exit
+exit:
+ store volatile i32 3, i32* %p
+ ret i32 0
+}
+
+; CHECK-LABEL: single_block:
+; CHECK-NOT: br
+; CHECK: return $pop{{[0-9]+}}{{$}}
+; OPT-LABEL: single_block:
+; OPT-NOT: br
+; OPT: return $pop{{[0-9]+}}{{$}}
+define i32 @single_block(i32* %p) {
+entry:
+ store volatile i32 0, i32* %p
+ ret i32 0
+}
+
+; CHECK-LABEL: minimal_loop:
+; CHECK-NOT: br
+; CHECK: BB7_1:
+; CHECK: i32.store $discard=, 0($0), $pop{{[0-9]+}}{{$}}
+; CHECK: br BB7_1{{$}}
+; CHECK: BB7_2:
+; OPT-LABEL: minimal_loop:
+; OPT-NOT: br
+; OPT: BB7_1:
+; OPT: i32.store $discard=, 0($0), $pop{{[0-9]+}}{{$}}
+; OPT: br BB7_1{{$}}
+; OPT: BB7_2:
+define i32 @minimal_loop(i32* %p) {
+entry:
+ store volatile i32 0, i32* %p
+ br label %loop
+loop:
+ store volatile i32 1, i32* %p
+ br label %loop
+}
+
+; CHECK-LABEL: simple_loop:
+; CHECK-NOT: br
+; CHECK: BB8_1:
+; CHECK: loop BB8_2{{$}}
+; CHECK: br_if $pop{{[0-9]+}}, BB8_1{{$}}
+; CHECK: BB8_2:
+; CHECK: return ${{[0-9]+}}{{$}}
+; OPT-LABEL: simple_loop:
+; OPT-NOT: br
+; OPT: BB8_1:
+; OPT: loop BB8_2{{$}}
+; OPT: br_if {{[^,]*}}, BB8_1{{$}}
+; OPT: BB8_2:
+; OPT: return ${{[0-9]+}}{{$}}
+define i32 @simple_loop(i32* %p, i32 %a) {
+entry:
+ %c = icmp eq i32 %a, 0
+ store volatile i32 0, i32* %p
+ br label %loop
+loop:
+ store volatile i32 1, i32* %p
+ br i1 %c, label %loop, label %exit
+exit:
+ store volatile i32 2, i32* %p
+ ret i32 0
+}
+
+; CHECK-LABEL: doubletriangle:
+; CHECK: block BB9_4{{$}}
+; CHECK: br_if $0, BB9_4{{$}}
+; CHECK: block BB9_3{{$}}
+; CHECK: br_if $1, BB9_3{{$}}
+; CHECK: BB9_3:
+; CHECK: BB9_4:
+; CHECK: return ${{[0-9]+}}{{$}}
+; OPT-LABEL: doubletriangle:
+; OPT: block BB9_4{{$}}
+; OPT: br_if $0, BB9_4{{$}}
+; OPT: block BB9_3{{$}}
+; OPT: br_if $1, BB9_3{{$}}
+; OPT: BB9_3:
+; OPT: BB9_4:
+; OPT: return ${{[0-9]+}}{{$}}
+define i32 @doubletriangle(i32 %a, i32 %b, i32* %p) {
+entry:
+ %c = icmp eq i32 %a, 0
+ %d = icmp eq i32 %b, 0
+ store volatile i32 0, i32* %p
+ br i1 %c, label %true, label %exit
+true:
+ store volatile i32 2, i32* %p
+ br i1 %d, label %tt, label %tf
+tt:
+ store volatile i32 3, i32* %p
+ br label %tf
+tf:
+ store volatile i32 4, i32* %p
+ br label %exit
+exit:
+ store volatile i32 5, i32* %p
+ ret i32 0
+}
+
+; CHECK-LABEL: ifelse_earlyexits:
+; CHECK: block BB10_4{{$}}
+; CHECK: block BB10_2{{$}}
+; CHECK: br_if $0, BB10_2{{$}}
+; CHECK: br BB10_4{{$}}
+; CHECK: BB10_2:
+; CHECK: br_if $1, BB10_4{{$}}
+; CHECK: BB10_4:
+; CHECK: return ${{[0-9]+}}{{$}}
+; OPT-LABEL: ifelse_earlyexits:
+; OPT: block BB10_4{{$}}
+; OPT: block BB10_3{{$}}
+; OPT: br_if {{[^,]*}}, BB10_3{{$}}
+; OPT: br_if $1, BB10_4{{$}}
+; OPT: br BB10_4{{$}}
+; OPT: BB10_3:
+; OPT: BB10_4:
+; OPT: return ${{[0-9]+}}{{$}}
+define i32 @ifelse_earlyexits(i32 %a, i32 %b, i32* %p) {
+entry:
+ %c = icmp eq i32 %a, 0
+ %d = icmp eq i32 %b, 0
+ store volatile i32 0, i32* %p
+ br i1 %c, label %true, label %false
+true:
+ store volatile i32 1, i32* %p
+ br label %exit
+false:
+ store volatile i32 2, i32* %p
+ br i1 %d, label %ft, label %exit
+ft:
+ store volatile i32 3, i32* %p
+ br label %exit
+exit:
+ store volatile i32 4, i32* %p
+ ret i32 0
+}
+
+; CHECK-LABEL: doublediamond_in_a_loop:
+; CHECK: BB11_1:
+; CHECK: loop BB11_7{{$}}
+; CHECK: block BB11_6{{$}}
+; CHECK: block BB11_3{{$}}
+; CHECK: br_if $0, BB11_3{{$}}
+; CHECK: br BB11_6{{$}}
+; CHECK: BB11_3:
+; CHECK: block BB11_5{{$}}
+; CHECK: br_if $1, BB11_5{{$}}
+; CHECK: br BB11_6{{$}}
+; CHECK: BB11_5:
+; CHECK: BB11_6:
+; CHECK: br BB11_1{{$}}
+; CHECK: BB11_7:
+; OPT-LABEL: doublediamond_in_a_loop:
+; OPT: BB11_1:
+; OPT: loop BB11_7{{$}}
+; OPT: block BB11_6{{$}}
+; OPT: block BB11_5{{$}}
+; OPT: br_if {{[^,]*}}, BB11_5{{$}}
+; OPT: block BB11_4{{$}}
+; OPT: br_if {{[^,]*}}, BB11_4{{$}}
+; OPT: br BB11_6{{$}}
+; OPT: BB11_4:
+; OPT: br BB11_6{{$}}
+; OPT: BB11_5:
+; OPT: BB11_6:
+; OPT: br BB11_1{{$}}
+; OPT: BB11_7:
+define i32 @doublediamond_in_a_loop(i32 %a, i32 %b, i32* %p) {
+entry:
+ br label %header
+header:
+ %c = icmp eq i32 %a, 0
+ %d = icmp eq i32 %b, 0
+ store volatile i32 0, i32* %p
+ br i1 %c, label %true, label %false
+true:
+ store volatile i32 1, i32* %p
+ br label %exit
+false:
+ store volatile i32 2, i32* %p
+ br i1 %d, label %ft, label %ff
+ft:
+ store volatile i32 3, i32* %p
+ br label %exit
+ff:
+ store volatile i32 4, i32* %p
+ br label %exit
+exit:
+ store volatile i32 5, i32* %p
+ br label %header
+}
+
+; Test that nested loops are handled.
+
+; CHECK-LABEL: test3:
+; CHECK: loop
+; CHECK-NEXT: br_if
+; CHECK-NEXT: BB{{[0-9]+}}_{{[0-9]+}}:
+; CHECK-NEXT: loop
+; OPT-LABEL: test3:
+; OPT: loop
+; OPT-NEXT: br_if
+; OPT-NEXT: BB{{[0-9]+}}_{{[0-9]+}}:
+; OPT-NEXT: loop
+declare void @bar()
+define void @test3(i32 %w) {
+entry:
+ br i1 undef, label %outer.ph, label %exit
+
+outer.ph:
+ br label %outer
+
+outer:
+ %tobool = icmp eq i32 undef, 0
+ br i1 %tobool, label %inner, label %unreachable
+
+unreachable:
+ unreachable
+
+inner:
+ %c = icmp eq i32 undef, %w
+ br i1 %c, label %if.end, label %inner
+
+exit:
+ ret void
+
+if.end:
+ call void @bar()
+ br label %outer
+}
+
+; Test switch lowering and block placement.
+
+; CHECK-LABEL: test4:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK: block BB13_8{{$}}
+; CHECK-NEXT: block BB13_7{{$}}
+; CHECK-NEXT: block BB13_4{{$}}
+; CHECK: br_if $pop{{[0-9]*}}, BB13_4{{$}}
+; CHECK-NEXT: block BB13_3{{$}}
+; CHECK: br_if $pop{{[0-9]*}}, BB13_3{{$}}
+; CHECK: br_if $pop{{[0-9]*}}, BB13_7{{$}}
+; CHECK-NEXT: BB13_3:
+; CHECK-NEXT: return{{$}}
+; CHECK-NEXT: BB13_4:
+; CHECK: br_if $pop{{[0-9]*}}, BB13_8{{$}}
+; CHECK: br_if $pop{{[0-9]*}}, BB13_7{{$}}
+; CHECK-NEXT: return{{$}}
+; CHECK-NEXT: BB13_7:
+; CHECK-NEXT: return{{$}}
+; CHECK-NEXT: BB13_8:
+; CHECK-NEXT: return{{$}}
+; OPT-LABEL: test4:
+; OPT-NEXT: .param i32{{$}}
+; OPT: block BB13_8{{$}}
+; OPT-NEXT: block BB13_7{{$}}
+; OPT-NEXT: block BB13_4{{$}}
+; OPT: br_if $pop{{[0-9]*}}, BB13_4{{$}}
+; OPT-NEXT: block BB13_3{{$}}
+; OPT: br_if $pop{{[0-9]*}}, BB13_3{{$}}
+; OPT: br_if $pop{{[0-9]*}}, BB13_7{{$}}
+; OPT-NEXT: BB13_3:
+; OPT-NEXT: return{{$}}
+; OPT-NEXT: BB13_4:
+; OPT: br_if $pop{{[0-9]*}}, BB13_8{{$}}
+; OPT: br_if $pop{{[0-9]*}}, BB13_7{{$}}
+; OPT-NEXT: return{{$}}
+; OPT-NEXT: BB13_7:
+; OPT-NEXT: return{{$}}
+; OPT-NEXT: BB13_8:
+; OPT-NEXT: return{{$}}
+define void @test4(i32 %t) {
+entry:
+ switch i32 %t, label %default [
+ i32 0, label %bb2
+ i32 2, label %bb2
+ i32 4, label %bb1
+ i32 622, label %bb0
+ ]
+
+bb0:
+ ret void
+
+bb1:
+ ret void
+
+bb2:
+ ret void
+
+default:
+ ret void
+}
+
+; Test a case where the BLOCK needs to be placed before the LOOP in the
+; same basic block.
+
+; CHECK-LABEL: test5:
+; CHECK: BB14_1:
+; CHECK-NEXT: block BB14_4{{$}}
+; CHECK-NEXT: loop BB14_3{{$}}
+; CHECK: br_if {{[^,]*}}, BB14_4{{$}}
+; CHECK: br_if {{[^,]*}}, BB14_1{{$}}
+; CHECK-NEXT: BB14_3:
+; CHECK: return{{$}}
+; CHECK-NEXT: BB14_4:
+; CHECK: return{{$}}
+; OPT-LABEL: test5:
+; OPT: BB14_1:
+; OPT-NEXT: block BB14_4{{$}}
+; OPT-NEXT: loop BB14_3{{$}}
+; OPT: br_if {{[^,]*}}, BB14_4{{$}}
+; OPT: br_if {{[^,]*}}, BB14_1{{$}}
+; OPT-NEXT: BB14_3:
+; OPT: return{{$}}
+; OPT-NEXT: BB14_4:
+; OPT: return{{$}}
+define void @test5(i1 %p, i1 %q) {
+entry:
+ br label %header
+
+header:
+ store volatile i32 0, i32* null
+ br i1 %p, label %more, label %alt
+
+more:
+ store volatile i32 1, i32* null
+ br i1 %q, label %header, label %return
+
+alt:
+ store volatile i32 2, i32* null
+ ret void
+
+return:
+ store volatile i32 3, i32* null
+ ret void
+}
+
+; Test an interesting case of a loop with multiple exits, which
+; aren't to layout successors of the loop, and one of which is to a successors
+; which has another predecessor.
+
+; CHECK-LABEL: test6:
+; CHECK: BB15_1:
+; CHECK-NEXT: block BB15_6{{$}}
+; CHECK-NEXT: block BB15_5{{$}}
+; CHECK-NEXT: loop BB15_4{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB15_6{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB15_5{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB15_1{{$}}
+; CHECK-NEXT: BB15_4:
+; CHECK-NOT: block
+; CHECK: return{{$}}
+; CHECK-NEXT: BB15_5:
+; CHECK-NOT: block
+; CHECK: BB15_6:
+; CHECK-NOT: block
+; CHECK: return{{$}}
+; OPT-LABEL: test6:
+; OPT: BB15_1:
+; OPT-NEXT: block BB15_6{{$}}
+; OPT-NEXT: block BB15_5{{$}}
+; OPT-NEXT: loop BB15_4{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB15_6{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB15_5{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB15_1{{$}}
+; OPT-NEXT: BB15_4:
+; OPT-NOT: block
+; OPT: return{{$}}
+; OPT-NEXT: BB15_5:
+; OPT-NOT: block
+; OPT: BB15_6:
+; OPT-NOT: block
+; OPT: return{{$}}
+define void @test6(i1 %p, i1 %q) {
+entry:
+ br label %header
+
+header:
+ store volatile i32 0, i32* null
+ br i1 %p, label %more, label %second
+
+more:
+ store volatile i32 1, i32* null
+ br i1 %q, label %evenmore, label %first
+
+evenmore:
+ store volatile i32 1, i32* null
+ br i1 %q, label %header, label %return
+
+return:
+ store volatile i32 2, i32* null
+ ret void
+
+first:
+ store volatile i32 3, i32* null
+ br label %second
+
+second:
+ store volatile i32 4, i32* null
+ ret void
+}
+
+; Test a case where there are multiple backedges and multiple loop exits
+; that end in unreachable.
+
+; CHECK-LABEL: test7:
+; CHECK: BB16_1:
+; CHECK-NEXT: loop BB16_5{{$}}
+; CHECK-NOT: block
+; CHECK: block BB16_4{{$}}
+; CHECK: br_if {{[^,]*}}, BB16_4{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB16_1{{$}}
+; CHECK-NOT: block
+; CHECK: unreachable
+; CHECK_NEXT: BB16_4:
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB16_1{{$}}
+; CHECK-NEXT: BB16_5:
+; CHECK-NOT: block
+; CHECK: unreachable
+; OPT-LABEL: test7:
+; OPT: BB16_1:
+; OPT-NEXT: loop BB16_5{{$}}
+; OPT-NOT: block
+; OPT: block BB16_4{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB16_4{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB16_1{{$}}
+; OPT-NOT: block
+; OPT: unreachable
+; OPT_NEXT: BB16_4:
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB16_1{{$}}
+; OPT-NEXT: BB16_5:
+; OPT-NOT: block
+; OPT: unreachable
+define void @test7(i1 %tobool2, i1 %tobool9) {
+entry:
+ store volatile i32 0, i32* null
+ br label %loop
+
+loop:
+ store volatile i32 1, i32* null
+ br i1 %tobool2, label %l1, label %l0
+
+l0:
+ store volatile i32 2, i32* null
+ br i1 %tobool9, label %loop, label %u0
+
+l1:
+ store volatile i32 3, i32* null
+ br i1 %tobool9, label %loop, label %u1
+
+u0:
+ store volatile i32 4, i32* null
+ unreachable
+
+u1:
+ store volatile i32 5, i32* null
+ unreachable
+}
+
+; Test an interesting case using nested loops and switches.
+
+; CHECK-LABEL: test8:
+; CHECK: BB17_1:
+; CHECK-NEXT: loop BB17_4{{$}}
+; CHECK-NEXT: block BB17_3{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB17_3{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB17_1{{$}}
+; CHECK-NEXT: BB17_3:
+; CHECK-NEXT: loop BB17_4{{$}}
+; CHECK-NEXT: br_if {{[^,]*}}, BB17_3{{$}}
+; CHECK-NEXT: br BB17_1{{$}}
+; CHECK-NEXT: BB17_4:
+; OPT-LABEL: test8:
+; OPT: BB17_1:
+; OPT-NEXT: loop BB17_4{{$}}
+; OPT-NEXT: block BB17_3{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB17_3{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB17_1{{$}}
+; OPT-NEXT: BB17_3:
+; OPT-NEXT: loop BB17_4{{$}}
+; OPT-NEXT: br_if {{[^,]*}}, BB17_3{{$}}
+; OPT-NEXT: br BB17_1{{$}}
+; OPT-NEXT: BB17_4:
+define i32 @test8() {
+bb:
+ br label %bb1
+
+bb1:
+ br i1 undef, label %bb2, label %bb3
+
+bb2:
+ switch i8 undef, label %bb1 [
+ i8 44, label %bb2
+ ]
+
+bb3:
+ switch i8 undef, label %bb1 [
+ i8 44, label %bb2
+ ]
+}
+
+; Test an interesting case using nested loops that share a bottom block.
+
+; CHECK-LABEL: test9:
+; CHECK: BB18_1:
+; CHECK-NEXT: loop BB18_5{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB18_5{{$}}
+; CHECK-NEXT: BB18_2:
+; CHECK-NEXT: loop BB18_5{{$}}
+; CHECK-NOT: block
+; CHECK: block BB18_4{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB18_4{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB18_2{{$}}
+; CHECK-NEXT: br BB18_1{{$}}
+; CHECK-NEXT: BB18_4:
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB18_2{{$}}
+; CHECK-NEXT: br BB18_1{{$}}
+; CHECK-NEXT: BB18_5:
+; CHECK-NOT: block
+; CHECK: return{{$}}
+; OPT-LABEL: test9:
+; OPT: BB18_1:
+; OPT-NEXT: loop BB18_5{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB18_5{{$}}
+; OPT-NEXT: BB18_2:
+; OPT-NEXT: loop BB18_5{{$}}
+; OPT-NOT: block
+; OPT: block BB18_4{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB18_4{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB18_2{{$}}
+; OPT-NEXT: br BB18_1{{$}}
+; OPT-NEXT: BB18_4:
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB18_2{{$}}
+; OPT-NEXT: br BB18_1{{$}}
+; OPT-NEXT: BB18_5:
+; OPT-NOT: block
+; OPT: return{{$}}
+declare i1 @a()
+define void @test9() {
+entry:
+ store volatile i32 0, i32* null
+ br label %header
+
+header:
+ store volatile i32 1, i32* null
+ %call4 = call i1 @a()
+ br i1 %call4, label %header2, label %end
+
+header2:
+ store volatile i32 2, i32* null
+ %call = call i1 @a()
+ br i1 %call, label %if.then, label %if.else
+
+if.then:
+ store volatile i32 3, i32* null
+ %call3 = call i1 @a()
+ br i1 %call3, label %header2, label %header
+
+if.else:
+ store volatile i32 4, i32* null
+ %call2 = call i1 @a()
+ br i1 %call2, label %header2, label %header
+
+end:
+ store volatile i32 5, i32* null
+ ret void
+}
+
+; Test an interesting case involving nested loops sharing a loop bottom,
+; and loop exits to a block with unreachable.
+
+; CHECK-LABEL: test10:
+; CHECK: BB19_1:
+; CHECK-NEXT: loop BB19_7{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB19_1{{$}}
+; CHECK-NEXT: BB19_2:
+; CHECK-NEXT: block BB19_6{{$}}
+; CHECK-NEXT: loop BB19_5{{$}}
+; CHECK-NOT: block
+; CHECK: BB19_3:
+; CHECK-NEXT: loop BB19_5{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB19_1{{$}}
+; CHECK-NOT: block
+; CHECK: tableswitch {{[^,]*}}, BB19_3, BB19_3, BB19_5, BB19_1, BB19_2, BB19_6{{$}}
+; CHECK-NEXT: BB19_5:
+; CHECK-NEXT: return{{$}}
+; CHECK-NEXT: BB19_6:
+; CHECK-NOT: block
+; CHECK: br BB19_1{{$}}
+; CHECK-NEXT: BB19_7:
+; OPT-LABEL: test10:
+; OPT: BB19_1:
+; OPT-NEXT: loop BB19_7{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB19_1{{$}}
+; OPT-NEXT: BB19_2:
+; OPT-NEXT: block BB19_6{{$}}
+; OPT-NEXT: loop BB19_5{{$}}
+; OPT-NOT: block
+; OPT: BB19_3:
+; OPT-NEXT: loop BB19_5{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB19_1{{$}}
+; OPT-NOT: block
+; OPT: tableswitch {{[^,]*}}, BB19_3, BB19_3, BB19_5, BB19_1, BB19_2, BB19_6{{$}}
+; OPT-NEXT: BB19_5:
+; OPT-NEXT: return{{$}}
+; OPT-NEXT: BB19_6:
+; OPT-NOT: block
+; OPT: br BB19_1{{$}}
+; OPT-NEXT: BB19_7:
+define void @test10() {
+bb0:
+ br label %bb1
+
+bb1:
+ %tmp = phi i32 [ 2, %bb0 ], [ 3, %bb3 ]
+ %tmp3 = phi i32 [ undef, %bb0 ], [ %tmp11, %bb3 ]
+ %tmp4 = icmp eq i32 %tmp3, 0
+ br i1 %tmp4, label %bb4, label %bb2
+
+bb2:
+ br label %bb3
+
+bb3:
+ %tmp11 = phi i32 [ 1, %bb5 ], [ 0, %bb2 ]
+ br label %bb1
+
+bb4:
+ %tmp6 = phi i32 [ %tmp9, %bb5 ], [ 4, %bb1 ]
+ %tmp7 = phi i32 [ %tmp6, %bb5 ], [ %tmp, %bb1 ]
+ br label %bb5
+
+bb5:
+ %tmp9 = phi i32 [ %tmp6, %bb5 ], [ %tmp7, %bb4 ]
+ switch i32 %tmp9, label %bb2 [
+ i32 0, label %bb5
+ i32 1, label %bb6
+ i32 3, label %bb4
+ i32 4, label %bb3
+ ]
+
+bb6:
+ ret void
+}
+
+; Test a CFG DAG with interesting merging.
+
+; CHECK-LABEL: test11:
+; CHECK: block BB20_8{{$}}
+; CHECK-NEXT: block BB20_7{{$}}
+; CHECK-NEXT: block BB20_6{{$}}
+; CHECK-NEXT: block BB20_4{{$}}
+; CHECK-NEXT: br_if {{[^,]*}}, BB20_4{{$}}
+; CHECK-NOT: block
+; CHECK: block BB20_3{{$}}
+; CHECK: br_if {{[^,]*}}, BB20_3{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB20_6{{$}}
+; CHECK-NEXT: BB20_3:
+; CHECK-NOT: block
+; CHECK: return{{$}}
+; CHECK-NEXT: BB20_4:
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB20_8{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB20_7{{$}}
+; CHECK-NEXT: BB20_6:
+; CHECK-NOT: block
+; CHECK: return{{$}}
+; CHECK-NEXT: BB20_7:
+; CHECK-NOT: block
+; CHECK: return{{$}}
+; CHECK-NEXT: BB20_8:
+; CHECK-NOT: block
+; CHECK: return{{$}}
+; OPT-LABEL: test11:
+; OPT: block BB20_8{{$}}
+; OPT-NEXT: block BB20_4{{$}}
+; OPT-NEXT: br_if $0, BB20_4{{$}}
+; OPT-NOT: block
+; OPT: block BB20_3{{$}}
+; OPT: br_if $0, BB20_3{{$}}
+; OPT-NOT: block
+; OPT: br_if $0, BB20_8{{$}}
+; OPT-NEXT: BB20_3:
+; OPT-NOT: block
+; OPT: return{{$}}
+; OPT-NEXT: BB20_4:
+; OPT-NOT: block
+; OPT: block BB20_6{{$}}
+; OPT-NOT: block
+; OPT: br_if $pop9, BB20_6{{$}}
+; OPT-NOT: block
+; OPT: return{{$}}
+; OPT-NEXT: BB20_6:
+; OPT-NOT: block
+; OPT: br_if $0, BB20_8{{$}}
+; OPT-NOT: block
+; OPT: return{{$}}
+; OPT-NEXT: BB20_8:
+; OPT-NOT: block
+; OPT: return{{$}}
+define void @test11() {
+bb0:
+ store volatile i32 0, i32* null
+ br i1 undef, label %bb1, label %bb4
+bb1:
+ store volatile i32 1, i32* null
+ br i1 undef, label %bb3, label %bb2
+bb2:
+ store volatile i32 2, i32* null
+ br i1 undef, label %bb3, label %bb7
+bb3:
+ store volatile i32 3, i32* null
+ ret void
+bb4:
+ store volatile i32 4, i32* null
+ br i1 undef, label %bb8, label %bb5
+bb5:
+ store volatile i32 5, i32* null
+ br i1 undef, label %bb6, label %bb7
+bb6:
+ store volatile i32 6, i32* null
+ ret void
+bb7:
+ store volatile i32 7, i32* null
+ ret void
+bb8:
+ store volatile i32 8, i32* null
+ ret void
+}
+
+; CHECK-LABEL: test12:
+; CHECK: BB21_1:
+; CHECK-NEXT: loop BB21_8{{$}}
+; CHECK-NOT: block
+; CHECK: block BB21_7{{$}}
+; CHECK-NEXT: block BB21_6{{$}}
+; CHECK-NEXT: block BB21_4{{$}}
+; CHECK: br_if {{[^,]*}}, BB21_4{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB21_7{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB21_7{{$}}
+; CHECK-NEXT: br BB21_6{{$}}
+; CHECK-NEXT: BB21_4:
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB21_7{{$}}
+; CHECK-NOT: block
+; CHECK: br_if {{[^,]*}}, BB21_7{{$}}
+; CHECK-NEXT: BB21_6:
+; CHECK-NEXT: return{{$}}
+; CHECK-NEXT: BB21_7:
+; CHECK-NOT: block
+; CHECK: br BB21_1{{$}}
+; CHECK-NEXT: BB21_8:
+; OPT-LABEL: test12:
+; OPT: BB21_1:
+; OPT-NEXT: loop BB21_8{{$}}
+; OPT-NOT: block
+; OPT: block BB21_7{{$}}
+; OPT-NEXT: block BB21_6{{$}}
+; OPT-NEXT: block BB21_4{{$}}
+; OPT: br_if {{[^,]*}}, BB21_4{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB21_7{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB21_7{{$}}
+; OPT-NEXT: br BB21_6{{$}}
+; OPT-NEXT: BB21_4:
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB21_7{{$}}
+; OPT-NOT: block
+; OPT: br_if {{[^,]*}}, BB21_7{{$}}
+; OPT-NEXT: BB21_6:
+; OPT-NEXT: return{{$}}
+; OPT-NEXT: BB21_7:
+; OPT-NOT: block
+; OPT: br BB21_1{{$}}
+; OPT-NEXT: BB21_8:
+define void @test12(i8* %arg) {
+bb:
+ br label %bb1
+
+bb1:
+ %tmp = phi i32 [ 0, %bb ], [ %tmp5, %bb4 ]
+ %tmp2 = getelementptr i8, i8* %arg, i32 %tmp
+ %tmp3 = load i8, i8* %tmp2
+ switch i8 %tmp3, label %bb7 [
+ i8 42, label %bb4
+ i8 76, label %bb4
+ i8 108, label %bb4
+ i8 104, label %bb4
+ ]
+
+bb4:
+ %tmp5 = add i32 %tmp, 1
+ br label %bb1
+
+bb7:
+ ret void
+}
+
+; A block can be "branched to" from another even if it is also reachable via
+; fallthrough from the other. This would normally be optimized away, so use
+; optnone to disable optimizations to test this case.
+
+; CHECK-LABEL: test13:
+; CHECK-NEXT: .local i32{{$}}
+; CHECK: block BB22_2{{$}}
+; CHECK: br_if $pop4, BB22_2{{$}}
+; CHECK-NEXT: return{{$}}
+; CHECK-NEXT: BB22_2:
+; CHECK: block BB22_4{{$}}
+; CHECK-NEXT: br_if $0, BB22_4{{$}}
+; CHECK: BB22_4:
+; CHECK: block BB22_5{{$}}
+; CHECK: br_if $pop6, BB22_5{{$}}
+; CHECK-NEXT: BB22_5:
+; CHECK-NEXT: unreachable{{$}}
+; OPT-LABEL: test13:
+; OPT-NEXT: .local i32{{$}}
+; OPT: block BB22_2{{$}}
+; OPT: br_if $pop4, BB22_2{{$}}
+; OPT-NEXT: return{{$}}
+; OPT-NEXT: BB22_2:
+; OPT: block BB22_4{{$}}
+; OPT-NEXT: br_if $0, BB22_4{{$}}
+; OPT: BB22_4:
+; OPT: block BB22_5{{$}}
+; OPT: br_if $pop6, BB22_5{{$}}
+; OPT-NEXT: BB22_5:
+; OPT-NEXT: unreachable{{$}}
+define void @test13() noinline optnone {
+bb:
+ br i1 undef, label %bb5, label %bb2
+bb1:
+ unreachable
+bb2:
+ br i1 undef, label %bb3, label %bb4
+bb3:
+ br label %bb4
+bb4:
+ %tmp = phi i1 [ false, %bb2 ], [ false, %bb3 ]
+ br i1 %tmp, label %bb1, label %bb1
+bb5:
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/comparisons_f32.ll b/test/CodeGen/WebAssembly/comparisons_f32.ll
new file mode 100644
index 000000000000..6df37ea1c6dd
--- /dev/null
+++ b/test/CodeGen/WebAssembly/comparisons_f32.ll
@@ -0,0 +1,181 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 32-bit floating-point comparison operations assemble as
+; expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: ord_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.eq $push[[NUM0:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.eq $push[[NUM1:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.and $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM2]]{{$}}
+define i32 @ord_f32(float %x, float %y) {
+ %a = fcmp ord float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: uno_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM0:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM2]]{{$}}
+define i32 @uno_f32(float %x, float %y) {
+ %a = fcmp uno float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: oeq_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.eq $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @oeq_f32(float %x, float %y) {
+ %a = fcmp oeq float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: une_f32:
+; CHECK: f32.ne $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @une_f32(float %x, float %y) {
+ %a = fcmp une float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: olt_f32:
+; CHECK: f32.lt $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @olt_f32(float %x, float %y) {
+ %a = fcmp olt float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ole_f32:
+; CHECK: f32.le $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ole_f32(float %x, float %y) {
+ %a = fcmp ole float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ogt_f32:
+; CHECK: f32.gt $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ogt_f32(float %x, float %y) {
+ %a = fcmp ogt float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: oge_f32:
+; CHECK: f32.ge $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @oge_f32(float %x, float %y) {
+ %a = fcmp oge float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; Expanded comparisons, which also check for NaN.
+
+; CHECK-LABEL: ueq_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.eq $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ueq_f32(float %x, float %y) {
+ %a = fcmp ueq float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: one_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f32.eq $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.eq $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.and $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.and $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]
+define i32 @one_f32(float %x, float %y) {
+ %a = fcmp one float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ult_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.lt $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ult_f32(float %x, float %y) {
+ %a = fcmp ult float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ule_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.le $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ule_f32(float %x, float %y) {
+ %a = fcmp ule float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ugt_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.gt $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ugt_f32(float %x, float %y) {
+ %a = fcmp ugt float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: uge_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.ge $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @uge_f32(float %x, float %y) {
+ %a = fcmp uge float %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
diff --git a/test/CodeGen/WebAssembly/comparisons_f64.ll b/test/CodeGen/WebAssembly/comparisons_f64.ll
new file mode 100644
index 000000000000..f5acc64b667c
--- /dev/null
+++ b/test/CodeGen/WebAssembly/comparisons_f64.ll
@@ -0,0 +1,181 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 64-bit floating-point comparison operations assemble as
+; expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: ord_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.eq $push[[NUM0:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.eq $push[[NUM1:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.and $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM2]]{{$}}
+define i32 @ord_f64(double %x, double %y) {
+ %a = fcmp ord double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: uno_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM0:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM2]]{{$}}
+define i32 @uno_f64(double %x, double %y) {
+ %a = fcmp uno double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: oeq_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.eq $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @oeq_f64(double %x, double %y) {
+ %a = fcmp oeq double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: une_f64:
+; CHECK: f64.ne $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @une_f64(double %x, double %y) {
+ %a = fcmp une double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: olt_f64:
+; CHECK: f64.lt $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @olt_f64(double %x, double %y) {
+ %a = fcmp olt double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ole_f64:
+; CHECK: f64.le $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ole_f64(double %x, double %y) {
+ %a = fcmp ole double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ogt_f64:
+; CHECK: f64.gt $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ogt_f64(double %x, double %y) {
+ %a = fcmp ogt double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: oge_f64:
+; CHECK: f64.ge $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @oge_f64(double %x, double %y) {
+ %a = fcmp oge double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; Expanded comparisons, which also check for NaN.
+
+; CHECK-LABEL: ueq_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.eq $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ueq_f64(double %x, double %y) {
+ %a = fcmp ueq double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: one_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f64.eq $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.eq $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.and $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.and $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]
+define i32 @one_f64(double %x, double %y) {
+ %a = fcmp one double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ult_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.lt $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ult_f64(double %x, double %y) {
+ %a = fcmp ult double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ule_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.le $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ule_f64(double %x, double %y) {
+ %a = fcmp ule double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ugt_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.gt $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ugt_f64(double %x, double %y) {
+ %a = fcmp ugt double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: uge_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.ge $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @uge_f64(double %x, double %y) {
+ %a = fcmp uge double %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
diff --git a/test/CodeGen/WebAssembly/comparisons_i32.ll b/test/CodeGen/WebAssembly/comparisons_i32.ll
new file mode 100644
index 000000000000..b724cec1cc63
--- /dev/null
+++ b/test/CodeGen/WebAssembly/comparisons_i32.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 32-bit integer comparison operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: eq_i32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.eq $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @eq_i32(i32 %x, i32 %y) {
+ %a = icmp eq i32 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ne_i32:
+; CHECK: i32.ne $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ne_i32(i32 %x, i32 %y) {
+ %a = icmp ne i32 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: slt_i32:
+; CHECK: i32.lt_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @slt_i32(i32 %x, i32 %y) {
+ %a = icmp slt i32 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: sle_i32:
+; CHECK: i32.le_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @sle_i32(i32 %x, i32 %y) {
+ %a = icmp sle i32 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ult_i32:
+; CHECK: i32.lt_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ult_i32(i32 %x, i32 %y) {
+ %a = icmp ult i32 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ule_i32:
+; CHECK: i32.le_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ule_i32(i32 %x, i32 %y) {
+ %a = icmp ule i32 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: sgt_i32:
+; CHECK: i32.gt_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @sgt_i32(i32 %x, i32 %y) {
+ %a = icmp sgt i32 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: sge_i32:
+; CHECK: i32.ge_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @sge_i32(i32 %x, i32 %y) {
+ %a = icmp sge i32 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ugt_i32:
+; CHECK: i32.gt_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ugt_i32(i32 %x, i32 %y) {
+ %a = icmp ugt i32 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: uge_i32:
+; CHECK: i32.ge_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @uge_i32(i32 %x, i32 %y) {
+ %a = icmp uge i32 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
diff --git a/test/CodeGen/WebAssembly/comparisons_i64.ll b/test/CodeGen/WebAssembly/comparisons_i64.ll
new file mode 100644
index 000000000000..898591999bec
--- /dev/null
+++ b/test/CodeGen/WebAssembly/comparisons_i64.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 64-bit integer comparison operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: eq_i64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i64.eq $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @eq_i64(i64 %x, i64 %y) {
+ %a = icmp eq i64 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ne_i64:
+; CHECK: i64.ne $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ne_i64(i64 %x, i64 %y) {
+ %a = icmp ne i64 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: slt_i64:
+; CHECK: i64.lt_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @slt_i64(i64 %x, i64 %y) {
+ %a = icmp slt i64 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: sle_i64:
+; CHECK: i64.le_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @sle_i64(i64 %x, i64 %y) {
+ %a = icmp sle i64 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ult_i64:
+; CHECK: i64.lt_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ult_i64(i64 %x, i64 %y) {
+ %a = icmp ult i64 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ule_i64:
+; CHECK: i64.le_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ule_i64(i64 %x, i64 %y) {
+ %a = icmp ule i64 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: sgt_i64:
+; CHECK: i64.gt_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @sgt_i64(i64 %x, i64 %y) {
+ %a = icmp sgt i64 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: sge_i64:
+; CHECK: i64.ge_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @sge_i64(i64 %x, i64 %y) {
+ %a = icmp sge i64 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: ugt_i64:
+; CHECK: i64.gt_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ugt_i64(i64 %x, i64 %y) {
+ %a = icmp ugt i64 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+; CHECK-LABEL: uge_i64:
+; CHECK: i64.ge_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @uge_i64(i64 %x, i64 %y) {
+ %a = icmp uge i64 %x, %y
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
diff --git a/test/CodeGen/WebAssembly/conv.ll b/test/CodeGen/WebAssembly/conv.ll
new file mode 100644
index 000000000000..e1acaca2c9ec
--- /dev/null
+++ b/test/CodeGen/WebAssembly/conv.ll
@@ -0,0 +1,255 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic conversion operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: i32_wrap_i64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.wrap/i64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @i32_wrap_i64(i64 %x) {
+ %a = trunc i64 %x to i32
+ ret i32 %a
+}
+
+; CHECK-LABEL: i64_extend_s_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.extend_s/i32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @i64_extend_s_i32(i32 %x) {
+ %a = sext i32 %x to i64
+ ret i64 %a
+}
+
+; CHECK-LABEL: i64_extend_u_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.extend_u/i32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @i64_extend_u_i32(i32 %x) {
+ %a = zext i32 %x to i64
+ ret i64 %a
+}
+
+; CHECK-LABEL: i32_trunc_s_f32:
+; CHECK-NEXT: .param f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.trunc_s/f32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @i32_trunc_s_f32(float %x) {
+ %a = fptosi float %x to i32
+ ret i32 %a
+}
+
+; CHECK-LABEL: i32_trunc_u_f32:
+; CHECK-NEXT: .param f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.trunc_u/f32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @i32_trunc_u_f32(float %x) {
+ %a = fptoui float %x to i32
+ ret i32 %a
+}
+
+; CHECK-LABEL: i32_trunc_s_f64:
+; CHECK-NEXT: .param f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.trunc_s/f64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @i32_trunc_s_f64(double %x) {
+ %a = fptosi double %x to i32
+ ret i32 %a
+}
+
+; CHECK-LABEL: i32_trunc_u_f64:
+; CHECK-NEXT: .param f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.trunc_u/f64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @i32_trunc_u_f64(double %x) {
+ %a = fptoui double %x to i32
+ ret i32 %a
+}
+
+; CHECK-LABEL: i64_trunc_s_f32:
+; CHECK-NEXT: .param f32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.trunc_s/f32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @i64_trunc_s_f32(float %x) {
+ %a = fptosi float %x to i64
+ ret i64 %a
+}
+
+; CHECK-LABEL: i64_trunc_u_f32:
+; CHECK-NEXT: .param f32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.trunc_u/f32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @i64_trunc_u_f32(float %x) {
+ %a = fptoui float %x to i64
+ ret i64 %a
+}
+
+; CHECK-LABEL: i64_trunc_s_f64:
+; CHECK-NEXT: .param f64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.trunc_s/f64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @i64_trunc_s_f64(double %x) {
+ %a = fptosi double %x to i64
+ ret i64 %a
+}
+
+; CHECK-LABEL: i64_trunc_u_f64:
+; CHECK-NEXT: .param f64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.trunc_u/f64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @i64_trunc_u_f64(double %x) {
+ %a = fptoui double %x to i64
+ ret i64 %a
+}
+
+; CHECK-LABEL: f32_convert_s_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.convert_s/i32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @f32_convert_s_i32(i32 %x) {
+ %a = sitofp i32 %x to float
+ ret float %a
+}
+
+; CHECK-LABEL: f32_convert_u_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.convert_u/i32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @f32_convert_u_i32(i32 %x) {
+ %a = uitofp i32 %x to float
+ ret float %a
+}
+
+; CHECK-LABEL: f64_convert_s_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.convert_s/i32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @f64_convert_s_i32(i32 %x) {
+ %a = sitofp i32 %x to double
+ ret double %a
+}
+
+; CHECK-LABEL: f64_convert_u_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.convert_u/i32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @f64_convert_u_i32(i32 %x) {
+ %a = uitofp i32 %x to double
+ ret double %a
+}
+
+; CHECK-LABEL: f32_convert_s_i64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.convert_s/i64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @f32_convert_s_i64(i64 %x) {
+ %a = sitofp i64 %x to float
+ ret float %a
+}
+
+; CHECK-LABEL: f32_convert_u_i64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.convert_u/i64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @f32_convert_u_i64(i64 %x) {
+ %a = uitofp i64 %x to float
+ ret float %a
+}
+
+; CHECK-LABEL: f64_convert_s_i64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.convert_s/i64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @f64_convert_s_i64(i64 %x) {
+ %a = sitofp i64 %x to double
+ ret double %a
+}
+
+; CHECK-LABEL: f64_convert_u_i64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.convert_u/i64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @f64_convert_u_i64(i64 %x) {
+ %a = uitofp i64 %x to double
+ ret double %a
+}
+
+; CHECK-LABEL: f64_promote_f32:
+; CHECK-NEXT: .param f32{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.promote/f32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @f64_promote_f32(float %x) {
+ %a = fpext float %x to double
+ ret double %a
+}
+
+; CHECK-LABEL: f32_demote_f64:
+; CHECK-NEXT: .param f64{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.demote/f64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @f32_demote_f64(double %x) {
+ %a = fptrunc double %x to float
+ ret float %a
+}
+
+; If the high its are unused, LLVM will optimize sext/zext into anyext, which
+; we need to patterm-match back to a specific instruction.
+
+; CHECK-LABEL: anyext:
+; CHECK: i64.extend_u/i32 $push0=, $0{{$}}
+define i64 @anyext(i32 %x) {
+ %y = sext i32 %x to i64
+ %w = shl i64 %y, 32
+ ret i64 %w
+}
+
+; CHECK-LABEL: bitcast_i32_to_float:
+; CHECK: f32.reinterpret/i32 $push0=, $0{{$}}
+define float @bitcast_i32_to_float(i32 %a) {
+ %t = bitcast i32 %a to float
+ ret float %t
+}
+
+; CHECK-LABEL: bitcast_float_to_i32:
+; CHECK: i32.reinterpret/f32 $push0=, $0{{$}}
+define i32 @bitcast_float_to_i32(float %a) {
+ %t = bitcast float %a to i32
+ ret i32 %t
+}
+
+; CHECK-LABEL: bitcast_i64_to_double:
+; CHECK: f64.reinterpret/i64 $push0=, $0{{$}}
+define double @bitcast_i64_to_double(i64 %a) {
+ %t = bitcast i64 %a to double
+ ret double %t
+}
+
+; CHECK-LABEL: bitcast_double_to_i64:
+; CHECK: i64.reinterpret/f64 $push0=, $0{{$}}
+define i64 @bitcast_double_to_i64(double %a) {
+ %t = bitcast double %a to i64
+ ret i64 %t
+}
diff --git a/test/CodeGen/WebAssembly/copysign-casts.ll b/test/CodeGen/WebAssembly/copysign-casts.ll
new file mode 100644
index 000000000000..760e49133018
--- /dev/null
+++ b/test/CodeGen/WebAssembly/copysign-casts.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; DAGCombiner oddly folds casts into the rhs of copysign. Test that they get
+; unfolded.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare double @copysign(double, double) nounwind readnone
+declare float @copysignf(float, float) nounwind readnone
+
+; CHECK-LABEL: fold_promote:
+; CHECK: f64.promote/f32 $push0=, $1{{$}}
+; CHECK: f64.copysign $push1=, $0, $pop0{{$}}
+define double @fold_promote(double %a, float %b) {
+ %c = fpext float %b to double
+ %t = call double @copysign(double %a, double %c)
+ ret double %t
+}
+
+; CHECK-LABEL: fold_demote:{{$}}
+; CHECK: f32.demote/f64 $push0=, $1{{$}}
+; CHECK: f32.copysign $push1=, $0, $pop0{{$}}
+define float @fold_demote(float %a, double %b) {
+ %c = fptrunc double %b to float
+ %t = call float @copysignf(float %a, float %c)
+ ret float %t
+}
diff --git a/test/CodeGen/WebAssembly/cpus.ll b/test/CodeGen/WebAssembly/cpus.ll
new file mode 100644
index 000000000000..2b77c5f475c8
--- /dev/null
+++ b/test/CodeGen/WebAssembly/cpus.ll
@@ -0,0 +1,17 @@
+; This tests that llc accepts all valid WebAssembly CPUs.
+
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=mvp 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -mcpu=mvp 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=bleeding-edge 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -mcpu=bleeding-edge 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
+
+; CHECK-NOT: {{.*}} is not a recognized processor for this target
+; INVALID: {{.*}} is not a recognized processor for this target
+
+define i32 @f(i32 %i_like_the_web) {
+ ret i32 %i_like_the_web
+}
diff --git a/test/CodeGen/WebAssembly/dead-vreg.ll b/test/CodeGen/WebAssembly/dead-vreg.ll
new file mode 100644
index 000000000000..b03e1569fde6
--- /dev/null
+++ b/test/CodeGen/WebAssembly/dead-vreg.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck %s
+
+; Check that unused vregs aren't assigned registers.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+define void @foo(i32* nocapture %a, i32 %w, i32 %h) {
+; CHECK-LABEL: foo:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK-NEXT: .local i32, i32, i32, i32, i32, i32, i32{{$}}
+entry:
+ %cmp.19 = icmp sgt i32 %h, 0
+ br i1 %cmp.19, label %for.cond.1.preheader.lr.ph, label %for.end.7
+
+for.cond.1.preheader.lr.ph:
+ %cmp2.17 = icmp sgt i32 %w, 0
+ br label %for.cond.1.preheader
+
+for.cond.1.preheader:
+ %y.020 = phi i32 [ 0, %for.cond.1.preheader.lr.ph ], [ %inc6, %for.inc.5 ]
+ br i1 %cmp2.17, label %for.body.3.lr.ph, label %for.inc.5
+
+for.body.3.lr.ph:
+ %mul4 = mul nsw i32 %y.020, %w
+ br label %for.body.3
+
+for.body.3:
+ %x.018 = phi i32 [ 0, %for.body.3.lr.ph ], [ %inc, %for.body.3 ]
+ %mul = mul nsw i32 %x.018, %y.020
+ %add = add nsw i32 %x.018, %mul4
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %add
+ store i32 %mul, i32* %arrayidx, align 4
+ %inc = add nuw nsw i32 %x.018, 1
+ %exitcond = icmp eq i32 %inc, %w
+ br i1 %exitcond, label %for.inc.5.loopexit, label %for.body.3
+
+for.inc.5.loopexit:
+ br label %for.inc.5
+
+for.inc.5:
+ %inc6 = add nuw nsw i32 %y.020, 1
+ %exitcond22 = icmp eq i32 %inc6, %h
+ br i1 %exitcond22, label %for.end.7.loopexit, label %for.cond.1.preheader
+
+for.end.7.loopexit:
+ br label %for.end.7
+
+for.end.7:
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/f32.ll b/test/CodeGen/WebAssembly/f32.ll
new file mode 100644
index 000000000000..777010064cdb
--- /dev/null
+++ b/test/CodeGen/WebAssembly/f32.ll
@@ -0,0 +1,154 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 32-bit floating-point operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare float @llvm.fabs.f32(float)
+declare float @llvm.copysign.f32(float, float)
+declare float @llvm.sqrt.f32(float)
+declare float @llvm.ceil.f32(float)
+declare float @llvm.floor.f32(float)
+declare float @llvm.trunc.f32(float)
+declare float @llvm.nearbyint.f32(float)
+declare float @llvm.rint.f32(float)
+declare float @llvm.fma.f32(float, float, float)
+
+; CHECK-LABEL: fadd32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.add $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fadd32(float %x, float %y) {
+ %a = fadd float %x, %y
+ ret float %a
+}
+
+; CHECK-LABEL: fsub32:
+; CHECK: f32.sub $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fsub32(float %x, float %y) {
+ %a = fsub float %x, %y
+ ret float %a
+}
+
+; CHECK-LABEL: fmul32:
+; CHECK: f32.mul $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fmul32(float %x, float %y) {
+ %a = fmul float %x, %y
+ ret float %a
+}
+
+; CHECK-LABEL: fdiv32:
+; CHECK: f32.div $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fdiv32(float %x, float %y) {
+ %a = fdiv float %x, %y
+ ret float %a
+}
+
+; CHECK-LABEL: fabs32:
+; CHECK: f32.abs $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fabs32(float %x) {
+ %a = call float @llvm.fabs.f32(float %x)
+ ret float %a
+}
+
+; CHECK-LABEL: fneg32:
+; CHECK: f32.neg $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fneg32(float %x) {
+ %a = fsub float -0., %x
+ ret float %a
+}
+
+; CHECK-LABEL: copysign32:
+; CHECK: f32.copysign $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @copysign32(float %x, float %y) {
+ %a = call float @llvm.copysign.f32(float %x, float %y)
+ ret float %a
+}
+
+; CHECK-LABEL: sqrt32:
+; CHECK: f32.sqrt $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @sqrt32(float %x) {
+ %a = call float @llvm.sqrt.f32(float %x)
+ ret float %a
+}
+
+; CHECK-LABEL: ceil32:
+; CHECK: f32.ceil $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @ceil32(float %x) {
+ %a = call float @llvm.ceil.f32(float %x)
+ ret float %a
+}
+
+; CHECK-LABEL: floor32:
+; CHECK: f32.floor $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @floor32(float %x) {
+ %a = call float @llvm.floor.f32(float %x)
+ ret float %a
+}
+
+; CHECK-LABEL: trunc32:
+; CHECK: f32.trunc $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @trunc32(float %x) {
+ %a = call float @llvm.trunc.f32(float %x)
+ ret float %a
+}
+
+; CHECK-LABEL: nearest32:
+; CHECK: f32.nearest $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @nearest32(float %x) {
+ %a = call float @llvm.nearbyint.f32(float %x)
+ ret float %a
+}
+
+; CHECK-LABEL: nearest32_via_rint:
+; CHECK: f32.nearest $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @nearest32_via_rint(float %x) {
+ %a = call float @llvm.rint.f32(float %x)
+ ret float %a
+}
+
+; Min and max tests. LLVM currently only forms fminnan and fmaxnan nodes in
+; cases where there's a single fcmp with a select and it can prove that one
+; of the arms is never NaN, so we only test that case. In the future if LLVM
+; learns to form fminnan/fmaxnan in more cases, we can write more general
+; tests.
+
+; CHECK-LABEL: fmin32:
+; CHECK: f32.min $push1=, $0, $pop0{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define float @fmin32(float %x) {
+ %a = fcmp ult float %x, 0.0
+ %b = select i1 %a, float %x, float 0.0
+ ret float %b
+}
+
+; CHECK-LABEL: fmax32:
+; CHECK: f32.max $push1=, $0, $pop0{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define float @fmax32(float %x) {
+ %a = fcmp ugt float %x, 0.0
+ %b = select i1 %a, float %x, float 0.0
+ ret float %b
+}
+
+; CHECK-LABEL: fma32:
+; CHECK: {{^}} f32.call $push0=, fmaf, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fma32(float %a, float %b, float %c) {
+ %d = call float @llvm.fma.f32(float %a, float %b, float %c)
+ ret float %d
+}
diff --git a/test/CodeGen/WebAssembly/f64.ll b/test/CodeGen/WebAssembly/f64.ll
new file mode 100644
index 000000000000..302ee79389b3
--- /dev/null
+++ b/test/CodeGen/WebAssembly/f64.ll
@@ -0,0 +1,154 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 64-bit floating-point operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare double @llvm.fabs.f64(double)
+declare double @llvm.copysign.f64(double, double)
+declare double @llvm.sqrt.f64(double)
+declare double @llvm.ceil.f64(double)
+declare double @llvm.floor.f64(double)
+declare double @llvm.trunc.f64(double)
+declare double @llvm.nearbyint.f64(double)
+declare double @llvm.rint.f64(double)
+declare double @llvm.fma.f64(double, double, double)
+
+; CHECK-LABEL: fadd64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.add $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fadd64(double %x, double %y) {
+ %a = fadd double %x, %y
+ ret double %a
+}
+
+; CHECK-LABEL: fsub64:
+; CHECK: f64.sub $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fsub64(double %x, double %y) {
+ %a = fsub double %x, %y
+ ret double %a
+}
+
+; CHECK-LABEL: fmul64:
+; CHECK: f64.mul $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fmul64(double %x, double %y) {
+ %a = fmul double %x, %y
+ ret double %a
+}
+
+; CHECK-LABEL: fdiv64:
+; CHECK: f64.div $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fdiv64(double %x, double %y) {
+ %a = fdiv double %x, %y
+ ret double %a
+}
+
+; CHECK-LABEL: fabs64:
+; CHECK: f64.abs $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fabs64(double %x) {
+ %a = call double @llvm.fabs.f64(double %x)
+ ret double %a
+}
+
+; CHECK-LABEL: fneg64:
+; CHECK: f64.neg $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fneg64(double %x) {
+ %a = fsub double -0., %x
+ ret double %a
+}
+
+; CHECK-LABEL: copysign64:
+; CHECK: f64.copysign $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @copysign64(double %x, double %y) {
+ %a = call double @llvm.copysign.f64(double %x, double %y)
+ ret double %a
+}
+
+; CHECK-LABEL: sqrt64:
+; CHECK: f64.sqrt $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @sqrt64(double %x) {
+ %a = call double @llvm.sqrt.f64(double %x)
+ ret double %a
+}
+
+; CHECK-LABEL: ceil64:
+; CHECK: f64.ceil $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @ceil64(double %x) {
+ %a = call double @llvm.ceil.f64(double %x)
+ ret double %a
+}
+
+; CHECK-LABEL: floor64:
+; CHECK: f64.floor $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @floor64(double %x) {
+ %a = call double @llvm.floor.f64(double %x)
+ ret double %a
+}
+
+; CHECK-LABEL: trunc64:
+; CHECK: f64.trunc $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @trunc64(double %x) {
+ %a = call double @llvm.trunc.f64(double %x)
+ ret double %a
+}
+
+; CHECK-LABEL: nearest64:
+; CHECK: f64.nearest $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @nearest64(double %x) {
+ %a = call double @llvm.nearbyint.f64(double %x)
+ ret double %a
+}
+
+; CHECK-LABEL: nearest64_via_rint:
+; CHECK: f64.nearest $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @nearest64_via_rint(double %x) {
+ %a = call double @llvm.rint.f64(double %x)
+ ret double %a
+}
+
+; Min and max tests. LLVM currently only forms fminnan and fmaxnan nodes in
+; cases where there's a single fcmp with a select and it can prove that one
+; of the arms is never NaN, so we only test that case. In the future if LLVM
+; learns to form fminnan/fmaxnan in more cases, we can write more general
+; tests.
+
+; CHECK-LABEL: fmin64:
+; CHECK: f64.min $push1=, $0, $pop0{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define double @fmin64(double %x) {
+ %a = fcmp ult double %x, 0.0
+ %b = select i1 %a, double %x, double 0.0
+ ret double %b
+}
+
+; CHECK-LABEL: fmax64:
+; CHECK: f64.max $push1=, $0, $pop0{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define double @fmax64(double %x) {
+ %a = fcmp ugt double %x, 0.0
+ %b = select i1 %a, double %x, double 0.0
+ ret double %b
+}
+
+; CHECK-LABEL: fma64:
+; CHECK: {{^}} f64.call $push0=, fma, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fma64(double %a, double %b, double %c) {
+ %d = call double @llvm.fma.f64(double %a, double %b, double %c)
+ ret double %d
+}
diff --git a/test/CodeGen/WebAssembly/fast-isel.ll b/test/CodeGen/WebAssembly/fast-isel.ll
new file mode 100644
index 000000000000..07d78c1415e5
--- /dev/null
+++ b/test/CodeGen/WebAssembly/fast-isel.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -asm-verbose=false \
+; RUN: -fast-isel -fast-isel-abort=1 -verify-machineinstrs \
+; RUN: | FileCheck %s
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; This tests very minimal fast-isel functionality.
+
+; CHECK-LABEL: immediate_f32:
+; CHECK: f32.const $push{{[0-9]+}}=, 0x1.4p1{{$}}
+define float @immediate_f32() {
+ ret float 2.5
+}
+
+; CHECK-LABEL: immediate_f64:
+; CHECK: f64.const $push{{[0-9]+}}=, 0x1.4p1{{$}}
+define double @immediate_f64() {
+ ret double 2.5
+}
diff --git a/test/CodeGen/WebAssembly/frem.ll b/test/CodeGen/WebAssembly/frem.ll
new file mode 100644
index 000000000000..688370313b48
--- /dev/null
+++ b/test/CodeGen/WebAssembly/frem.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that the frem instruction works.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: frem32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: {{^}} f32.call $push0=, fmodf, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @frem32(float %x, float %y) {
+ %a = frem float %x, %y
+ ret float %a
+}
+
+; CHECK-LABEL: frem64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: {{^}} f64.call $push0=, fmod, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @frem64(double %x, double %y) {
+ %a = frem double %x, %y
+ ret double %a
+}
diff --git a/test/CodeGen/WebAssembly/func.ll b/test/CodeGen/WebAssembly/func.ll
new file mode 100644
index 000000000000..6f42dc744ac7
--- /dev/null
+++ b/test/CodeGen/WebAssembly/func.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic functions assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: f0:
+; CHECK: return{{$}}
+; CHECK: .size f0,
+define void @f0() {
+ ret void
+}
+
+; CHECK-LABEL: f1:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+; CHECK: .size f1,
+define i32 @f1() {
+ ret i32 0
+}
+
+; CHECK-LABEL: f2:
+; CHECK-NEXT: .param i32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+; CHECK: .size f2,
+define i32 @f2(i32 %p1, float %p2) {
+ ret i32 0
+}
+
+; CHECK-LABEL: f3:
+; CHECK-NEXT: .param i32, f32{{$}}
+; CHECK-NOT: local
+; CHECK-NEXT: return{{$}}
+; CHECK: .size f3,
+define void @f3(i32 %p1, float %p2) {
+ ret void
+}
+
+; CHECK-LABEL: f4:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: local
+define i32 @f4(i32 %x) {
+entry:
+ %c = trunc i32 %x to i1
+ br i1 %c, label %true, label %false
+true:
+ ret i32 0
+false:
+ ret i32 1
+}
+
+; CHECK-LABEL: f5:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: unreachable
+define float @f5() {
+ unreachable
+}
diff --git a/test/CodeGen/WebAssembly/global.ll b/test/CodeGen/WebAssembly/global.ll
new file mode 100644
index 000000000000..5f149ed067c8
--- /dev/null
+++ b/test/CodeGen/WebAssembly/global.ll
@@ -0,0 +1,177 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that globals assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-NOT: llvm.used
+; CHECK-NOT: llvm.metadata
+@llvm.used = appending global [1 x i32*] [i32* @g], section "llvm.metadata"
+
+; CHECK: foo:
+; CHECK: i32.const $push0=, 0{{$}}
+; CHECK-NEXT: i32.load $push1=, answer($pop0){{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define i32 @foo() {
+ %a = load i32, i32* @answer
+ ret i32 %a
+}
+
+; CHECK-LABEL: call_memcpy:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: call memcpy, $0, $1, $2{{$}}
+; CHECK-NEXT: return $0{{$}}
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+ ret i8* %p
+}
+
+; CHECK: .type g,@object
+; CHECK: .align 2{{$}}
+; CHECK-NEXT: g:
+; CHECK-NEXT: .int32 1337{{$}}
+; CHECK-NEXT: .size g, 4{{$}}
+@g = private global i32 1337
+
+; CHECK-LABEL: ud:
+; CHECK-NEXT: .zero 4{{$}}
+; CHECK-NEXT: .size ud, 4{{$}}
+@ud = internal global i32 undef
+
+; CHECK: .type nil,@object
+; CHECK-NEXT: .lcomm nil,4,2{{$}}
+@nil = internal global i32 zeroinitializer
+
+; CHECK: .type z,@object
+; CHECK-NEXT: .lcomm z,4,2{{$}}
+@z = internal global i32 0
+
+; CHECK-NEXT: .type one,@object
+; CHECK-NEXT: .align 2{{$}}
+; CHECK-NEXT: one:
+; CHECK-NEXT: .int32 1{{$}}
+; CHECK-NEXT: .size one, 4{{$}}
+@one = internal global i32 1
+
+; CHECK: .type answer,@object
+; CHECK: .align 2{{$}}
+; CHECK-NEXT: answer:
+; CHECK-NEXT: .int32 42{{$}}
+; CHECK-NEXT: .size answer, 4{{$}}
+@answer = internal global i32 42
+
+; CHECK: .type u32max,@object
+; CHECK: .align 2{{$}}
+; CHECK-NEXT: u32max:
+; CHECK-NEXT: .int32 4294967295{{$}}
+; CHECK-NEXT: .size u32max, 4{{$}}
+@u32max = internal global i32 -1
+
+; CHECK: .type ud64,@object
+; CHECK: .align 3{{$}}
+; CHECK-NEXT: ud64:
+; CHECK-NEXT: .zero 8{{$}}
+; CHECK-NEXT: .size ud64, 8{{$}}
+@ud64 = internal global i64 undef
+
+; CHECK: .type nil64,@object
+; CHECK: .lcomm nil64,8,3{{$}}
+@nil64 = internal global i64 zeroinitializer
+
+; CHECK: .type z64,@object
+; CHECK: .lcomm z64,8,3{{$}}
+@z64 = internal global i64 0
+
+; CHECK: .type twoP32,@object
+; CHECK: .align 3{{$}}
+; CHECK-NEXT: twoP32:
+; CHECK-NEXT: .int64 4294967296{{$}}
+; CHECK-NEXT: .size twoP32, 8{{$}}
+@twoP32 = internal global i64 4294967296
+
+; CHECK: .type u64max,@object
+; CHECK: .align 3{{$}}
+; CHECK-NEXT: u64max:
+; CHECK-NEXT: .int64 -1{{$}}
+; CHECK-NEXT: .size u64max, 8{{$}}
+@u64max = internal global i64 -1
+
+; CHECK: .type f32ud,@object
+; CHECK: .align 2{{$}}
+; CHECK-NEXT: f32ud:
+; CHECK-NEXT: .zero 4{{$}}
+; CHECK-NEXT: .size f32ud, 4{{$}}
+@f32ud = internal global float undef
+
+; CHECK: .type f32nil,@object
+; CHECK: .lcomm f32nil,4,2{{$}}
+@f32nil = internal global float zeroinitializer
+
+; CHECK: .type f32z,@object
+; CHECK: .lcomm f32z,4,2{{$}}
+@f32z = internal global float 0.0
+
+; CHECK: .type f32nz,@object
+; CHECK: .align 2{{$}}
+; CHECK: f32nz:
+; CHECK: .int32 2147483648{{$}}
+; CHECK: .size f32nz, 4{{$}}
+@f32nz = internal global float -0.0
+
+; CHECK: .type f32two,@object
+; CHECK: .align 2{{$}}
+; CHECK-NEXT: f32two:
+; CHECK-NEXT: .int32 1073741824{{$}}
+; CHECK-NEXT: .size f32two, 4{{$}}
+@f32two = internal global float 2.0
+
+; CHECK: .type f64ud,@object
+; CHECK: .align 3{{$}}
+; CHECK-NEXT: f64ud:
+; CHECK-NEXT: .zero 8{{$}}
+; CHECK-NEXT: .size f64ud, 8{{$}}
+@f64ud = internal global double undef
+
+; CHECK: .type f64nil,@object
+; CHECK: .lcomm f64nil,8,3{{$}}
+@f64nil = internal global double zeroinitializer
+
+; CHECK: .type f64z,@object
+; CHECK: .lcomm f64z,8,3{{$}}
+@f64z = internal global double 0.0
+
+; CHECK: .type f64nz,@object
+; CHECK: .align 3{{$}}
+; CHECK-NEXT: f64nz:
+; CHECK-NEXT: .int64 -9223372036854775808{{$}}
+; CHECK-NEXT: .size f64nz, 8{{$}}
+@f64nz = internal global double -0.0
+
+; CHECK: .type f64two,@object
+; CHECK: .align 3{{$}}
+; CHECK-NEXT: f64two:
+; CHECK-NEXT: .int64 4611686018427387904{{$}}
+; CHECK-NEXT: .size f64two, 8{{$}}
+@f64two = internal global double 2.0
+
+; Indexing into a global array produces a relocation.
+; CHECK: .type arr,@object
+; CHECK: .type ptr,@object
+; CHECK: ptr:
+; CHECK-NEXT: .int32 arr+80
+; CHECK-NEXT: .size ptr, 4
+@arr = global [128 x i32] zeroinitializer, align 16
+@ptr = global i32* getelementptr inbounds ([128 x i32], [128 x i32]* @arr, i32 0, i32 20), align 4
+
+; Constant global.
+; CHECK: .type rom,@object{{$}}
+; CHECK: .section .rodata,"a",@progbits{{$}}
+; CHECK: .globl rom{{$}}
+; CHECK: .align 4{{$}}
+; CHECK: rom:
+; CHECK: .zero 512{{$}}
+; CHECK: .size rom, 512{{$}}
+@rom = constant [128 x i32] zeroinitializer, align 16
diff --git a/test/CodeGen/WebAssembly/globl.ll b/test/CodeGen/WebAssembly/globl.ll
new file mode 100644
index 000000000000..a5dc028c1db4
--- /dev/null
+++ b/test/CodeGen/WebAssembly/globl.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK: .globl foo
+; CHECK-LABEL: foo:
+define void @foo() {
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/i32.ll b/test/CodeGen/WebAssembly/i32.ll
new file mode 100644
index 000000000000..ab29b0472bf2
--- /dev/null
+++ b/test/CodeGen/WebAssembly/i32.ll
@@ -0,0 +1,190 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 32-bit integer operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i32 @llvm.ctpop.i32(i32)
+
+; CHECK-LABEL: add32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.add $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @add32(i32 %x, i32 %y) {
+ %a = add i32 %x, %y
+ ret i32 %a
+}
+
+; CHECK-LABEL: sub32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.sub $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @sub32(i32 %x, i32 %y) {
+ %a = sub i32 %x, %y
+ ret i32 %a
+}
+
+; CHECK-LABEL: mul32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.mul $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @mul32(i32 %x, i32 %y) {
+ %a = mul i32 %x, %y
+ ret i32 %a
+}
+
+; CHECK-LABEL: sdiv32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.div_s $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @sdiv32(i32 %x, i32 %y) {
+ %a = sdiv i32 %x, %y
+ ret i32 %a
+}
+
+; CHECK-LABEL: udiv32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.div_u $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @udiv32(i32 %x, i32 %y) {
+ %a = udiv i32 %x, %y
+ ret i32 %a
+}
+
+; CHECK-LABEL: srem32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.rem_s $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @srem32(i32 %x, i32 %y) {
+ %a = srem i32 %x, %y
+ ret i32 %a
+}
+
+; CHECK-LABEL: urem32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.rem_u $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @urem32(i32 %x, i32 %y) {
+ %a = urem i32 %x, %y
+ ret i32 %a
+}
+
+; CHECK-LABEL: and32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.and $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @and32(i32 %x, i32 %y) {
+ %a = and i32 %x, %y
+ ret i32 %a
+}
+
+; CHECK-LABEL: or32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.or $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @or32(i32 %x, i32 %y) {
+ %a = or i32 %x, %y
+ ret i32 %a
+}
+
+; CHECK-LABEL: xor32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.xor $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @xor32(i32 %x, i32 %y) {
+ %a = xor i32 %x, %y
+ ret i32 %a
+}
+
+; CHECK-LABEL: shl32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.shl $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @shl32(i32 %x, i32 %y) {
+ %a = shl i32 %x, %y
+ ret i32 %a
+}
+
+; CHECK-LABEL: shr32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.shr_u $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @shr32(i32 %x, i32 %y) {
+ %a = lshr i32 %x, %y
+ ret i32 %a
+}
+
+; CHECK-LABEL: sar32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.shr_s $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @sar32(i32 %x, i32 %y) {
+ %a = ashr i32 %x, %y
+ ret i32 %a
+}
+
+; CHECK-LABEL: clz32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.clz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @clz32(i32 %x) {
+ %a = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+ ret i32 %a
+}
+
+; CHECK-LABEL: clz32_zero_undef:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.clz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @clz32_zero_undef(i32 %x) {
+ %a = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+ ret i32 %a
+}
+
+; CHECK-LABEL: ctz32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.ctz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @ctz32(i32 %x) {
+ %a = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+ ret i32 %a
+}
+
+; CHECK-LABEL: ctz32_zero_undef:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.ctz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @ctz32_zero_undef(i32 %x) {
+ %a = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+ ret i32 %a
+}
+
+; CHECK-LABEL: popcnt32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.popcnt $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @popcnt32(i32 %x) {
+ %a = call i32 @llvm.ctpop.i32(i32 %x)
+ ret i32 %a
+}
diff --git a/test/CodeGen/WebAssembly/i64.ll b/test/CodeGen/WebAssembly/i64.ll
new file mode 100644
index 000000000000..769f74266754
--- /dev/null
+++ b/test/CodeGen/WebAssembly/i64.ll
@@ -0,0 +1,190 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 64-bit integer operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i64 @llvm.ctpop.i64(i64)
+
+; CHECK-LABEL: add64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.add $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @add64(i64 %x, i64 %y) {
+ %a = add i64 %x, %y
+ ret i64 %a
+}
+
+; CHECK-LABEL: sub64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.sub $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @sub64(i64 %x, i64 %y) {
+ %a = sub i64 %x, %y
+ ret i64 %a
+}
+
+; CHECK-LABEL: mul64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.mul $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @mul64(i64 %x, i64 %y) {
+ %a = mul i64 %x, %y
+ ret i64 %a
+}
+
+; CHECK-LABEL: sdiv64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.div_s $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @sdiv64(i64 %x, i64 %y) {
+ %a = sdiv i64 %x, %y
+ ret i64 %a
+}
+
+; CHECK-LABEL: udiv64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.div_u $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @udiv64(i64 %x, i64 %y) {
+ %a = udiv i64 %x, %y
+ ret i64 %a
+}
+
+; CHECK-LABEL: srem64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.rem_s $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @srem64(i64 %x, i64 %y) {
+ %a = srem i64 %x, %y
+ ret i64 %a
+}
+
+; CHECK-LABEL: urem64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.rem_u $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @urem64(i64 %x, i64 %y) {
+ %a = urem i64 %x, %y
+ ret i64 %a
+}
+
+; CHECK-LABEL: and64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.and $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @and64(i64 %x, i64 %y) {
+ %a = and i64 %x, %y
+ ret i64 %a
+}
+
+; CHECK-LABEL: or64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.or $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @or64(i64 %x, i64 %y) {
+ %a = or i64 %x, %y
+ ret i64 %a
+}
+
+; CHECK-LABEL: xor64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.xor $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @xor64(i64 %x, i64 %y) {
+ %a = xor i64 %x, %y
+ ret i64 %a
+}
+
+; CHECK-LABEL: shl64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.shl $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @shl64(i64 %x, i64 %y) {
+ %a = shl i64 %x, %y
+ ret i64 %a
+}
+
+; CHECK-LABEL: shr64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.shr_u $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @shr64(i64 %x, i64 %y) {
+ %a = lshr i64 %x, %y
+ ret i64 %a
+}
+
+; CHECK-LABEL: sar64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.shr_s $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @sar64(i64 %x, i64 %y) {
+ %a = ashr i64 %x, %y
+ ret i64 %a
+}
+
+; CHECK-LABEL: clz64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.clz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @clz64(i64 %x) {
+ %a = call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+ ret i64 %a
+}
+
+; CHECK-LABEL: clz64_zero_undef:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.clz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @clz64_zero_undef(i64 %x) {
+ %a = call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+ ret i64 %a
+}
+
+; CHECK-LABEL: ctz64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.ctz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @ctz64(i64 %x) {
+ %a = call i64 @llvm.cttz.i64(i64 %x, i1 false)
+ ret i64 %a
+}
+
+; CHECK-LABEL: ctz64_zero_undef:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.ctz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @ctz64_zero_undef(i64 %x) {
+ %a = call i64 @llvm.cttz.i64(i64 %x, i1 true)
+ ret i64 %a
+}
+
+; CHECK-LABEL: popcnt64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.popcnt $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @popcnt64(i64 %x) {
+ %a = call i64 @llvm.ctpop.i64(i64 %x)
+ ret i64 %a
+}
diff --git a/test/CodeGen/WebAssembly/ident.ll b/test/CodeGen/WebAssembly/ident.ll
new file mode 100644
index 000000000000..1e0dc2aa6725
--- /dev/null
+++ b/test/CodeGen/WebAssembly/ident.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test llvm.ident.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK: .ident "hello world"
+
+!llvm.ident = !{!0}
+
+!0 = !{!"hello world"}
diff --git a/test/CodeGen/WebAssembly/immediates.ll b/test/CodeGen/WebAssembly/immediates.ll
new file mode 100644
index 000000000000..abab11f2254e
--- /dev/null
+++ b/test/CodeGen/WebAssembly/immediates.ll
@@ -0,0 +1,198 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic immediates assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: zero_i32:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @zero_i32() {
+ ret i32 0
+}
+
+; CHECK-LABEL: one_i32:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM:[0-9]+]]=, 1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @one_i32() {
+ ret i32 1
+}
+
+; CHECK-LABEL: max_i32:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM:[0-9]+]]=, 2147483647{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @max_i32() {
+ ret i32 2147483647
+}
+
+; CHECK-LABEL: min_i32:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM:[0-9]+]]=, -2147483648{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @min_i32() {
+ ret i32 -2147483648
+}
+
+; CHECK-LABEL: zero_i64:
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.const $push[[NUM:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @zero_i64() {
+ ret i64 0
+}
+
+; CHECK-LABEL: one_i64:
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.const $push[[NUM:[0-9]+]]=, 1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @one_i64() {
+ ret i64 1
+}
+
+; CHECK-LABEL: max_i64:
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.const $push[[NUM:[0-9]+]]=, 9223372036854775807{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @max_i64() {
+ ret i64 9223372036854775807
+}
+
+; CHECK-LABEL: min_i64:
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.const $push[[NUM:[0-9]+]]=, -9223372036854775808{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @min_i64() {
+ ret i64 -9223372036854775808
+}
+
+; CHECK-LABEL: negzero_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, -0x0p0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @negzero_f32() {
+ ret float -0.0
+}
+
+; CHECK-LABEL: zero_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, 0x0p0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @zero_f32() {
+ ret float 0.0
+}
+
+; CHECK-LABEL: one_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, 0x1p0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @one_f32() {
+ ret float 1.0
+}
+
+; CHECK-LABEL: two_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, 0x1p1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @two_f32() {
+ ret float 2.0
+}
+
+; CHECK-LABEL: nan_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, nan{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @nan_f32() {
+ ret float 0x7FF8000000000000
+}
+
+; CHECK-LABEL: negnan_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, -nan{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @negnan_f32() {
+ ret float 0xFFF8000000000000
+}
+
+; CHECK-LABEL: inf_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, infinity{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @inf_f32() {
+ ret float 0x7FF0000000000000
+}
+
+; CHECK-LABEL: neginf_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, -infinity{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @neginf_f32() {
+ ret float 0xFFF0000000000000
+}
+
+; CHECK-LABEL: negzero_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, -0x0p0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @negzero_f64() {
+ ret double -0.0
+}
+
+; CHECK-LABEL: zero_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, 0x0p0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @zero_f64() {
+ ret double 0.0
+}
+
+; CHECK-LABEL: one_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, 0x1p0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @one_f64() {
+ ret double 1.0
+}
+
+; CHECK-LABEL: two_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, 0x1p1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @two_f64() {
+ ret double 2.0
+}
+
+; CHECK-LABEL: nan_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, nan{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @nan_f64() {
+ ret double 0x7FF8000000000000
+}
+
+; CHECK-LABEL: negnan_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, -nan{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @negnan_f64() {
+ ret double 0xFFF8000000000000
+}
+
+; CHECK-LABEL: inf_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, infinity{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @inf_f64() {
+ ret double 0x7FF0000000000000
+}
+
+; CHECK-LABEL: neginf_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, -infinity{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @neginf_f64() {
+ ret double 0xFFF0000000000000
+}
diff --git a/test/CodeGen/WebAssembly/inline-asm.ll b/test/CodeGen/WebAssembly/inline-asm.ll
new file mode 100644
index 000000000000..fc066c4b812f
--- /dev/null
+++ b/test/CodeGen/WebAssembly/inline-asm.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test basic inline assembly.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: foo:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: #APP{{$}}
+; CHECK-NEXT: # $0 = aaa($0){{$}}
+; CHECK-NEXT: #NO_APP{{$}}
+; CHECK-NEXT: return $0{{$}}
+define i32 @foo(i32 %r) {
+entry:
+ %0 = tail call i32 asm sideeffect "# $0 = aaa($1)", "=r,r"(i32 %r) #0, !srcloc !0
+ ret i32 %0
+}
+
+; CHECK-LABEL: bar:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: #APP{{$}}
+; CHECK-NEXT: # 0($1) = bbb(0($0)){{$}}
+; CHECK-NEXT: #NO_APP{{$}}
+; CHECK-NEXT: return{{$}}
+define void @bar(i32* %r, i32* %s) {
+entry:
+ tail call void asm sideeffect "# $0 = bbb($1)", "=*m,*m"(i32* %s, i32* %r) #0, !srcloc !1
+ ret void
+}
+
+; CHECK-LABEL: imm:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: .local i32{{$}}
+; CHECK-NEXT: #APP{{$}}
+; CHECK-NEXT: # $0 = ccc(42){{$}}
+; CHECK-NEXT: #NO_APP{{$}}
+; CHECK-NEXT: return $0{{$}}
+define i32 @imm() {
+entry:
+ %0 = tail call i32 asm sideeffect "# $0 = ccc($1)", "=r,i"(i32 42) #0, !srcloc !2
+ ret i32 %0
+}
+
+; CHECK-LABEL: foo_i64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: #APP{{$}}
+; CHECK-NEXT: # $0 = aaa($0){{$}}
+; CHECK-NEXT: #NO_APP{{$}}
+; CHECK-NEXT: return $0{{$}}
+define i64 @foo_i64(i64 %r) {
+entry:
+ %0 = tail call i64 asm sideeffect "# $0 = aaa($1)", "=r,r"(i64 %r) #0, !srcloc !0
+ ret i64 %0
+}
+
+; CHECK-LABEL: X_i16:
+; CHECK: foo $1{{$}}
+; CHECK: i32.store16 $discard=, 0($0), $1{{$}}
+define void @X_i16(i16 * %t) {
+ call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"(i16* %t)
+ ret void
+}
+
+; CHECK-LABEL: X_ptr:
+; CHECK: foo $1{{$}}
+; CHECK: i32.store $discard=, 0($0), $1{{$}}
+define void @X_ptr(i16 ** %t) {
+ call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"(i16** %t)
+ ret void
+}
+
+; CHECK-LABEL: funcname:
+; CHECK: foo funcname{{$}}
+define void @funcname() {
+ tail call void asm sideeffect "foo $0", "i"(void ()* nonnull @funcname) #0, !srcloc !0
+ ret void
+}
+
+; CHECK-LABEL: varname:
+; CHECK: foo gv+37{{$}}
+@gv = global [0 x i8] zeroinitializer
+define void @varname() {
+ tail call void asm sideeffect "foo $0", "i"(i8* getelementptr inbounds ([0 x i8], [0 x i8]* @gv, i64 0, i64 37)) #0, !srcloc !0
+ ret void
+}
+
+attributes #0 = { nounwind }
+
+!0 = !{i32 47}
+!1 = !{i32 145}
+!2 = !{i32 231}
diff --git a/test/CodeGen/WebAssembly/legalize.ll b/test/CodeGen/WebAssembly/legalize.ll
new file mode 100644
index 000000000000..e780b2ee36ca
--- /dev/null
+++ b/test/CodeGen/WebAssembly/legalize.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test various types and operators that need to be legalized.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: shl_i3:
+; CHECK: i32.const $push0=, 7{{$}}
+; CHECK: i32.and $push1=, $1, $pop0{{$}}
+; CHECK: i32.shl $push2=, $0, $pop1{{$}}
+define i3 @shl_i3(i3 %a, i3 %b, i3* %p) {
+ %t = shl i3 %a, %b
+ ret i3 %t
+}
+
+; CHECK-LABEL: shl_i53:
+; CHECK: i64.const $push0=, 9007199254740991{{$}}
+; CHECK: i64.and $push1=, $1, $pop0{{$}}
+; CHECK: i64.shl $push2=, $0, $pop1{{$}}
+define i53 @shl_i53(i53 %a, i53 %b, i53* %p) {
+ %t = shl i53 %a, %b
+ ret i53 %t
+}
+
+; CHECK-LABEL: sext_in_reg_i32_i64:
+; CHECK: i64.shl
+; CHECK: i64.shr_s
+define i64 @sext_in_reg_i32_i64(i64 %a) {
+ %b = shl i64 %a, 32
+ %c = ashr i64 %b, 32
+ ret i64 %c
+}
+
+; CHECK-LABEL: fpext_f32_f64:
+; CHECK: f32.load $push0=, 0($0){{$}}
+; CHECK: f64.promote/f32 $push1=, $pop0{{$}}
+; CHECK: return $pop1{{$}}
+define double @fpext_f32_f64(float *%p) {
+ %v = load float, float* %p
+ %e = fpext float %v to double
+ ret double %e
+}
+
+; CHECK-LABEL: fpconv_f64_f32:
+; CHECK: f64.load $push0=, 0($0){{$}}
+; CHECK: f32.demote/f64 $push1=, $pop0{{$}}
+; CHECK: return $pop1{{$}}
+define float @fpconv_f64_f32(double *%p) {
+ %v = load double, double* %p
+ %e = fptrunc double %v to float
+ ret float %e
+}
+
+; Check that big shifts work. This generates a big pile of code from the
+; legalizer; the main thing here is that we don't abort.
+
+; CHECK-LABEL: bigshift:
+define i1024 @bigshift(i1024 %a, i1024 %b) {
+ %c = shl i1024 %a, %b
+ ret i1024 %c
+}
diff --git a/test/CodeGen/WebAssembly/load-ext.ll b/test/CodeGen/WebAssembly/load-ext.ll
new file mode 100644
index 000000000000..0ffcd38a8666
--- /dev/null
+++ b/test/CodeGen/WebAssembly/load-ext.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that extending loads are assembled properly.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: sext_i8_i32:
+; CHECK: i32.load8_s $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @sext_i8_i32(i8 *%p) {
+ %v = load i8, i8* %p
+ %e = sext i8 %v to i32
+ ret i32 %e
+}
+
+; CHECK-LABEL: zext_i8_i32:
+; CHECK: i32.load8_u $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @zext_i8_i32(i8 *%p) {
+ %v = load i8, i8* %p
+ %e = zext i8 %v to i32
+ ret i32 %e
+}
+
+; CHECK-LABEL: sext_i16_i32:
+; CHECK: i32.load16_s $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @sext_i16_i32(i16 *%p) {
+ %v = load i16, i16* %p
+ %e = sext i16 %v to i32
+ ret i32 %e
+}
+
+; CHECK-LABEL: zext_i16_i32:
+; CHECK: i32.load16_u $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @zext_i16_i32(i16 *%p) {
+ %v = load i16, i16* %p
+ %e = zext i16 %v to i32
+ ret i32 %e
+}
+
+; CHECK-LABEL: sext_i8_i64:
+; CHECK: i64.load8_s $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @sext_i8_i64(i8 *%p) {
+ %v = load i8, i8* %p
+ %e = sext i8 %v to i64
+ ret i64 %e
+}
+
+; CHECK-LABEL: zext_i8_i64:
+; CHECK: i64.load8_u $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @zext_i8_i64(i8 *%p) {
+ %v = load i8, i8* %p
+ %e = zext i8 %v to i64
+ ret i64 %e
+}
+
+; CHECK-LABEL: sext_i16_i64:
+; CHECK: i64.load16_s $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @sext_i16_i64(i16 *%p) {
+ %v = load i16, i16* %p
+ %e = sext i16 %v to i64
+ ret i64 %e
+}
+
+; CHECK-LABEL: zext_i16_i64:
+; CHECK: i64.load16_u $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @zext_i16_i64(i16 *%p) {
+ %v = load i16, i16* %p
+ %e = zext i16 %v to i64
+ ret i64 %e
+}
+
+; CHECK-LABEL: sext_i32_i64:
+; CHECK: i64.load32_s $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @sext_i32_i64(i32 *%p) {
+ %v = load i32, i32* %p
+ %e = sext i32 %v to i64
+ ret i64 %e
+}
+
+; CHECK-LABEL: zext_i32_i64:
+; CHECK: i64.load32_u $push0=, 0($0){{$}}
+; CHECK: return $pop0{{$}}
+define i64 @zext_i32_i64(i32 *%p) {
+ %v = load i32, i32* %p
+ %e = zext i32 %v to i64
+ ret i64 %e
+}
diff --git a/test/CodeGen/WebAssembly/load-store-i1.ll b/test/CodeGen/WebAssembly/load-store-i1.ll
new file mode 100644
index 000000000000..37b514729479
--- /dev/null
+++ b/test/CodeGen/WebAssembly/load-store-i1.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that i1 extending loads and truncating stores are assembled properly.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: load_u_i1_i32:
+; CHECK: i32.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM0]]{{$}}
+define i32 @load_u_i1_i32(i1* %p) {
+ %v = load i1, i1* %p
+ %e = zext i1 %v to i32
+ ret i32 %e
+}
+
+; CHECK-LABEL: load_s_i1_i32:
+; CHECK: i32.const $[[NUM1:[0-9]+]]=, 31{{$}}
+; CHECK-NEXT: i32.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}}
+; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM3]]{{$}}
+define i32 @load_s_i1_i32(i1* %p) {
+ %v = load i1, i1* %p
+ %e = sext i1 %v to i32
+ ret i32 %e
+}
+
+; CHECK-LABEL: load_u_i1_i64:
+; CHECK: i64.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM0]]{{$}}
+define i64 @load_u_i1_i64(i1* %p) {
+ %v = load i1, i1* %p
+ %e = zext i1 %v to i64
+ ret i64 %e
+}
+
+; CHECK-LABEL: load_s_i1_i64:
+; CHECK: i64.const $[[NUM1:[0-9]+]]=, 63{{$}}
+; CHECK-NEXT: i64.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}}
+; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM3]]{{$}}
+define i64 @load_s_i1_i64(i1* %p) {
+ %v = load i1, i1* %p
+ %e = sext i1 %v to i64
+ ret i64 %e
+}
+
+; CHECK-LABEL: store_i32_i1:
+; CHECK: i32.const $push[[NUM0:[0-9]+]]=, 1{{$}}
+; CHECK-NEXT: i32.and $push[[NUM1:[0-9]+]]=, $1, $pop[[NUM0]]{{$}}
+; CHECK-NEXT: i32.store8 $discard=, 0($0), $pop[[NUM1]]{{$}}
+define void @store_i32_i1(i1* %p, i32 %v) {
+ %t = trunc i32 %v to i1
+ store i1 %t, i1* %p
+ ret void
+}
+
+; CHECK-LABEL: store_i64_i1:
+; CHECK: i64.const $push[[NUM0:[0-9]+]]=, 1{{$}}
+; CHECK-NEXT: i64.and $push[[NUM1:[0-9]+]]=, $1, $pop[[NUM0]]{{$}}
+; CHECK-NEXT: i64.store8 $discard=, 0($0), $pop[[NUM1]]{{$}}
+define void @store_i64_i1(i1* %p, i64 %v) {
+ %t = trunc i64 %v to i1
+ store i1 %t, i1* %p
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/load.ll b/test/CodeGen/WebAssembly/load.ll
new file mode 100644
index 000000000000..aa8ae689e0d1
--- /dev/null
+++ b/test/CodeGen/WebAssembly/load.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic loads are assembled properly.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: ldi32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ldi32(i32 *%p) {
+ %v = load i32, i32* %p
+ ret i32 %v
+}
+
+; CHECK-LABEL: ldi64:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi64(i64 *%p) {
+ %v = load i64, i64* %p
+ ret i64 %v
+}
+
+; CHECK-LABEL: ldf32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @ldf32(float *%p) {
+ %v = load float, float* %p
+ ret float %v
+}
+
+; CHECK-LABEL: ldf64:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @ldf64(double *%p) {
+ %v = load double, double* %p
+ ret double %v
+}
diff --git a/test/CodeGen/WebAssembly/loop-idiom.ll b/test/CodeGen/WebAssembly/loop-idiom.ll
new file mode 100644
index 000000000000..2906df20a229
--- /dev/null
+++ b/test/CodeGen/WebAssembly/loop-idiom.ll
@@ -0,0 +1,53 @@
+; RUN: opt -loop-idiom -S < %s -march=wasm32 | FileCheck %s
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+
+; Make sure loop-idiom doesn't create memcpy or memset. These aren't well
+; supported in WebAssembly for now.
+;
+; TODO Check the patterns are recognized once memcpy / memset are supported.
+
+; CHECK-LABEL: @cpy(
+; CHECK-NOT: llvm.memcpy
+; CHECK: load
+; CHECK: store
+define void @cpy(i64 %Size) {
+bb.nph:
+ %Base = alloca i8, i32 10000
+ %Dest = alloca i8, i32 10000
+ br label %for.body
+
+for.body:
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+ %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+ %V = load i8, i8* %I.0.014, align 1
+ store i8 %V, i8* %DestI, align 1
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: @set(
+; CHECK-NOT: llvm.memset
+; CHECK: store
+define void @set(i8* %Base, i64 %Size) {
+bb.nph:
+ br label %for.body
+
+for.body:
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+ %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+ store i8 0, i8* %I.0.014, align 1
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %Size
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/memory-addr32.ll b/test/CodeGen/WebAssembly/memory-addr32.ll
new file mode 100644
index 000000000000..e2dd556bddc0
--- /dev/null
+++ b/test/CodeGen/WebAssembly/memory-addr32.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic memory operations assemble as expected with 32-bit addresses.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare i32 @llvm.wasm.memory.size.i32() nounwind readonly
+declare void @llvm.wasm.grow.memory.i32(i32) nounwind
+
+; CHECK-LABEL: memory_size:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: memory_size $push0={{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @memory_size() {
+ %a = call i32 @llvm.wasm.memory.size.i32()
+ ret i32 %a
+}
+
+; CHECK-LABEL: grow_memory:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK: grow_memory $0{{$}}
+; CHECK-NEXT: return{{$}}
+define void @grow_memory(i32 %n) {
+ call void @llvm.wasm.grow.memory.i32(i32 %n)
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/memory-addr64.ll b/test/CodeGen/WebAssembly/memory-addr64.ll
new file mode 100644
index 000000000000..5de1f2b11cfd
--- /dev/null
+++ b/test/CodeGen/WebAssembly/memory-addr64.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic memory operations assemble as expected with 64-bit addresses.
+
+target datalayout = "e-p:64:64-i64:64-n32:64-S128"
+target triple = "wasm64-unknown-unknown"
+
+declare i64 @llvm.wasm.memory.size.i64() nounwind readonly
+declare void @llvm.wasm.grow.memory.i64(i64) nounwind
+
+; CHECK-LABEL: memory_size:
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: memory_size $push0={{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @memory_size() {
+ %a = call i64 @llvm.wasm.memory.size.i64()
+ ret i64 %a
+}
+
+; CHECK-LABEL: grow_memory:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK: grow_memory $0{{$}}
+; CHECK-NEXT: return{{$}}
+define void @grow_memory(i64 %n) {
+ call void @llvm.wasm.grow.memory.i64(i64 %n)
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/offset-folding.ll b/test/CodeGen/WebAssembly/offset-folding.ll
new file mode 100644
index 000000000000..2b4e8a90b0f0
--- /dev/null
+++ b/test/CodeGen/WebAssembly/offset-folding.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that constant offsets can be folded into global addresses.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; FIXME: make this 'external' and make sure it still works. WebAssembly
+; currently only supports linking single files, so 'external' makes
+; little sense.
+@x = global [0 x i32] zeroinitializer
+@y = global [50 x i32] zeroinitializer
+
+; Test basic constant offsets of both defined and external symbols.
+
+; CHECK-LABEL: test0:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push0=, x+188{{$}}
+; CHECK=NEXT: return $pop0{{$}}
+define i32* @test0() {
+ ret i32* getelementptr ([0 x i32], [0 x i32]* @x, i32 0, i32 47)
+}
+
+; CHECK-LABEL: test1:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push0=, y+188{{$}}
+; CHECK=NEXT: return $pop0{{$}}
+define i32* @test1() {
+ ret i32* getelementptr ([50 x i32], [50 x i32]* @y, i32 0, i32 47)
+}
+
+; Test zero offsets.
+
+; CHECK-LABEL: test2:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push0=, x{{$}}
+; CHECK=NEXT: return $pop0{{$}}
+define i32* @test2() {
+ ret i32* getelementptr ([0 x i32], [0 x i32]* @x, i32 0, i32 0)
+}
+
+; CHECK-LABEL: test3:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push0=, y{{$}}
+; CHECK=NEXT: return $pop0{{$}}
+define i32* @test3() {
+ ret i32* getelementptr ([50 x i32], [50 x i32]* @y, i32 0, i32 0)
+}
diff --git a/test/CodeGen/WebAssembly/offset.ll b/test/CodeGen/WebAssembly/offset.ll
new file mode 100644
index 000000000000..75a0bc9ab6c6
--- /dev/null
+++ b/test/CodeGen/WebAssembly/offset.ll
@@ -0,0 +1,185 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test constant load and store address offsets.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; With an nuw add, we can fold an offset.
+
+; CHECK-LABEL: load_i32_with_folded_offset:
+; CHECK: i32.load $push0=, 24($0){{$}}
+define i32 @load_i32_with_folded_offset(i32* %p) {
+ %q = ptrtoint i32* %p to i32
+ %r = add nuw i32 %q, 24
+ %s = inttoptr i32 %r to i32*
+ %t = load i32, i32* %s
+ ret i32 %t
+}
+
+; Without nuw, and even with nsw, we can't fold an offset.
+
+; CHECK-LABEL: load_i32_with_unfolded_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add $push1=, $0, $pop0{{$}}
+; CHECK: i32.load $push2=, 0($pop1){{$}}
+define i32 @load_i32_with_unfolded_offset(i32* %p) {
+ %q = ptrtoint i32* %p to i32
+ %r = add nsw i32 %q, 24
+ %s = inttoptr i32 %r to i32*
+ %t = load i32, i32* %s
+ ret i32 %t
+}
+
+; Same as above but with i64.
+
+; CHECK-LABEL: load_i64_with_folded_offset:
+; CHECK: i64.load $push0=, 24($0){{$}}
+define i64 @load_i64_with_folded_offset(i64* %p) {
+ %q = ptrtoint i64* %p to i32
+ %r = add nuw i32 %q, 24
+ %s = inttoptr i32 %r to i64*
+ %t = load i64, i64* %s
+ ret i64 %t
+}
+
+; Same as above but with i64.
+
+; CHECK-LABEL: load_i64_with_unfolded_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add $push1=, $0, $pop0{{$}}
+; CHECK: i64.load $push2=, 0($pop1){{$}}
+define i64 @load_i64_with_unfolded_offset(i64* %p) {
+ %q = ptrtoint i64* %p to i32
+ %r = add nsw i32 %q, 24
+ %s = inttoptr i32 %r to i64*
+ %t = load i64, i64* %s
+ ret i64 %t
+}
+
+; Same as above but with store.
+
+; CHECK-LABEL: store_i32_with_folded_offset:
+; CHECK: i32.store $discard=, 24($0), $pop0{{$}}
+define void @store_i32_with_folded_offset(i32* %p) {
+ %q = ptrtoint i32* %p to i32
+ %r = add nuw i32 %q, 24
+ %s = inttoptr i32 %r to i32*
+ store i32 0, i32* %s
+ ret void
+}
+
+; Same as above but with store.
+
+; CHECK-LABEL: store_i32_with_unfolded_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add $push1=, $0, $pop0{{$}}
+; CHECK: i32.store $discard=, 0($pop1), $pop2{{$}}
+define void @store_i32_with_unfolded_offset(i32* %p) {
+ %q = ptrtoint i32* %p to i32
+ %r = add nsw i32 %q, 24
+ %s = inttoptr i32 %r to i32*
+ store i32 0, i32* %s
+ ret void
+}
+
+; Same as above but with store with i64.
+
+; CHECK-LABEL: store_i64_with_folded_offset:
+; CHECK: i64.store $discard=, 24($0), $pop0{{$}}
+define void @store_i64_with_folded_offset(i64* %p) {
+ %q = ptrtoint i64* %p to i32
+ %r = add nuw i32 %q, 24
+ %s = inttoptr i32 %r to i64*
+ store i64 0, i64* %s
+ ret void
+}
+
+; Same as above but with store with i64.
+
+; CHECK-LABEL: store_i64_with_unfolded_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add $push1=, $0, $pop0{{$}}
+; CHECK: i64.store $discard=, 0($pop1), $pop2{{$}}
+define void @store_i64_with_unfolded_offset(i64* %p) {
+ %q = ptrtoint i64* %p to i32
+ %r = add nsw i32 %q, 24
+ %s = inttoptr i32 %r to i64*
+ store i64 0, i64* %s
+ ret void
+}
+
+; When loading from a fixed address, materialize a zero.
+
+; CHECK-LABEL: load_i32_from_numeric_address
+; CHECK: i32.const $push0=, 0{{$}}
+; CHECK: i32.load $push1=, 42($pop0){{$}}
+define i32 @load_i32_from_numeric_address() {
+ %s = inttoptr i32 42 to i32*
+ %t = load i32, i32* %s
+ ret i32 %t
+}
+
+; CHECK-LABEL: load_i32_from_global_address
+; CHECK: i32.const $push0=, 0{{$}}
+; CHECK: i32.load $push1=, gv($pop0){{$}}
+@gv = global i32 0
+define i32 @load_i32_from_global_address() {
+ %t = load i32, i32* @gv
+ ret i32 %t
+}
+
+; CHECK-LABEL: store_i32_to_numeric_address:
+; CHECK: i32.const $0=, 0{{$}}
+; CHECK: i32.store $discard=, 42($0), $0{{$}}
+define void @store_i32_to_numeric_address() {
+ %s = inttoptr i32 42 to i32*
+ store i32 0, i32* %s
+ ret void
+}
+
+; CHECK-LABEL: store_i32_to_global_address:
+; CHECK: i32.const $0=, 0{{$}}
+; CHECK: i32.store $discard=, gv($0), $0{{$}}
+define void @store_i32_to_global_address() {
+ store i32 0, i32* @gv
+ ret void
+}
+
+; Fold an offset into a sign-extending load.
+
+; CHECK-LABEL: load_i8_s_with_folded_offset:
+; CHECK: i32.load8_s $push0=, 24($0){{$}}
+define i32 @load_i8_s_with_folded_offset(i8* %p) {
+ %q = ptrtoint i8* %p to i32
+ %r = add nuw i32 %q, 24
+ %s = inttoptr i32 %r to i8*
+ %t = load i8, i8* %s
+ %u = sext i8 %t to i32
+ ret i32 %u
+}
+
+; Fold an offset into a zero-extending load.
+
+; CHECK-LABEL: load_i8_u_with_folded_offset:
+; CHECK: i32.load8_u $push0=, 24($0){{$}}
+define i32 @load_i8_u_with_folded_offset(i8* %p) {
+ %q = ptrtoint i8* %p to i32
+ %r = add nuw i32 %q, 24
+ %s = inttoptr i32 %r to i8*
+ %t = load i8, i8* %s
+ %u = zext i8 %t to i32
+ ret i32 %u
+}
+
+; Fold an offset into a truncating store.
+
+; CHECK-LABEL: store_i8_with_folded_offset:
+; CHECK: i32.store8 $discard=, 24($0), $pop0{{$}}
+define void @store_i8_with_folded_offset(i8* %p) {
+ %q = ptrtoint i8* %p to i32
+ %r = add nuw i32 %q, 24
+ %s = inttoptr i32 %r to i8*
+ store i8 0, i8* %s
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/phi.ll b/test/CodeGen/WebAssembly/phi.ll
new file mode 100644
index 000000000000..bae8a7c9e3b8
--- /dev/null
+++ b/test/CodeGen/WebAssembly/phi.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck %s
+
+; Test that phis are lowered.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; Basic phi triangle.
+
+; CHECK-LABEL: test0:
+; CHECK: div_s $[[NUM0:[0-9]+]]=, $0, $pop[[NUM1:[0-9]+]]{{$}}
+; CHECK: return $[[NUM0]]{{$}}
+define i32 @test0(i32 %p) {
+entry:
+ %t = icmp slt i32 %p, 0
+ br i1 %t, label %true, label %done
+true:
+ %a = sdiv i32 %p, 3
+ br label %done
+done:
+ %s = phi i32 [ %a, %true ], [ %p, %entry ]
+ ret i32 %s
+}
+
+; Swap phis.
+
+; CHECK-LABEL: test1:
+; CHECK: BB1_1:
+; CHECK: copy_local $[[NUM0:[0-9]+]]=, $[[NUM1:[0-9]+]]{{$}}
+; CHECK: copy_local $[[NUM1]]=, $[[NUM2:[0-9]+]]{{$}}
+; CHECK: copy_local $[[NUM2]]=, $[[NUM0]]{{$}}
+define i32 @test1(i32 %n) {
+entry:
+ br label %loop
+
+loop:
+ %a = phi i32 [ 0, %entry ], [ %b, %loop ]
+ %b = phi i32 [ 1, %entry ], [ %a, %loop ]
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+
+ %i.next = add i32 %i, 1
+ %t = icmp slt i32 %i.next, %n
+ br i1 %t, label %loop, label %exit
+
+exit:
+ ret i32 %a
+}
diff --git a/test/CodeGen/WebAssembly/reg-stackify.ll b/test/CodeGen/WebAssembly/reg-stackify.ll
new file mode 100644
index 000000000000..1c1b1e193f7a
--- /dev/null
+++ b/test/CodeGen/WebAssembly/reg-stackify.ll
@@ -0,0 +1,126 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck %s
+
+; Test the register stackifier pass.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; No because of pointer aliasing.
+
+; CHECK-LABEL: no0:
+; CHECK: return $1{{$}}
+define i32 @no0(i32* %p, i32* %q) {
+ %t = load i32, i32* %q
+ store i32 0, i32* %p
+ ret i32 %t
+}
+
+; No because of side effects.
+
+; CHECK-LABEL: no1:
+; CHECK: return $1{{$}}
+define i32 @no1(i32* %p, i32* dereferenceable(4) %q) {
+ %t = load volatile i32, i32* %q, !invariant.load !0
+ store volatile i32 0, i32* %p
+ ret i32 %t
+}
+
+; Yes because of invariant load and no side effects.
+
+; CHECK-LABEL: yes0:
+; CHECK: return $pop0{{$}}
+define i32 @yes0(i32* %p, i32* dereferenceable(4) %q) {
+ %t = load i32, i32* %q, !invariant.load !0
+ store i32 0, i32* %p
+ ret i32 %t
+}
+
+; Yes because of no intervening side effects.
+
+; CHECK-LABEL: yes1:
+; CHECK: return $pop0{{$}}
+define i32 @yes1(i32* %q) {
+ %t = load volatile i32, i32* %q
+ ret i32 %t
+}
+
+; Don't schedule stack uses into the stack. To reduce register pressure, the
+; scheduler might be tempted to move the definition of $2 down. However, this
+; would risk getting incorrect liveness if the instructions are later
+; rearranged to make the stack contiguous.
+
+; CHECK-LABEL: stack_uses:
+; CHECK-NEXT: .param i32, i32, i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: .local i32, i32{{$}}
+; CHECK-NEXT: i32.const $5=, 2{{$}}
+; CHECK-NEXT: i32.const $4=, 1{{$}}
+; CHECK-NEXT: block BB4_2{{$}}
+; CHECK-NEXT: i32.lt_s $push0=, $0, $4{{$}}
+; CHECK-NEXT: i32.lt_s $push1=, $1, $5{{$}}
+; CHECK-NEXT: i32.xor $push4=, $pop0, $pop1{{$}}
+; CHECK-NEXT: i32.lt_s $push2=, $2, $4{{$}}
+; CHECK-NEXT: i32.lt_s $push3=, $3, $5{{$}}
+; CHECK-NEXT: i32.xor $push5=, $pop2, $pop3{{$}}
+; CHECK-NEXT: i32.xor $push6=, $pop4, $pop5{{$}}
+; CHECK-NEXT: i32.ne $push7=, $pop6, $4{{$}}
+; CHECK-NEXT: br_if $pop7, BB4_2{{$}}
+; CHECK-NEXT: i32.const $push8=, 0{{$}}
+; CHECK-NEXT: return $pop8{{$}}
+; CHECK-NEXT: BB4_2:
+; CHECK-NEXT: return $4{{$}}
+define i32 @stack_uses(i32 %x, i32 %y, i32 %z, i32 %w) {
+entry:
+ %c = icmp sle i32 %x, 0
+ %d = icmp sle i32 %y, 1
+ %e = icmp sle i32 %z, 0
+ %f = icmp sle i32 %w, 1
+ %g = xor i1 %c, %d
+ %h = xor i1 %e, %f
+ %i = xor i1 %g, %h
+ br i1 %i, label %true, label %false
+true:
+ ret i32 0
+false:
+ ret i32 1
+}
+
+; Test an interesting case where the load has multiple uses and cannot
+; be trivially stackified.
+
+; CHECK-LABEL: multiple_uses:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK-NEXT: .local i32{{$}}
+; CHECK-NEXT: i32.load $3=, 0($2){{$}}
+; CHECK-NEXT: block BB5_3{{$}}
+; CHECK-NEXT: i32.ge_u $push0=, $3, $1{{$}}
+; CHECK-NEXT: br_if $pop0, BB5_3{{$}}
+; CHECK-NEXT: i32.lt_u $push1=, $3, $0{{$}}
+; CHECK-NEXT: br_if $pop1, BB5_3{{$}}
+; CHECK-NEXT: i32.store $discard=, 0($2), $3{{$}}
+; CHECK-NEXT: BB5_3:
+; CHECK-NEXT: return{{$}}
+define void @multiple_uses(i32* %arg0, i32* %arg1, i32* %arg2) nounwind {
+bb:
+ br label %loop
+
+loop:
+ %tmp7 = load i32, i32* %arg2
+ %tmp8 = inttoptr i32 %tmp7 to i32*
+ %tmp9 = icmp uge i32* %tmp8, %arg1
+ %tmp10 = icmp ult i32* %tmp8, %arg0
+ %tmp11 = or i1 %tmp9, %tmp10
+ br i1 %tmp11, label %back, label %then
+
+then:
+ store i32 %tmp7, i32* %arg2
+ br label %back
+
+back:
+ br i1 undef, label %return, label %loop
+
+return:
+ ret void
+}
+
+!0 = !{}
diff --git a/test/CodeGen/WebAssembly/return-int32.ll b/test/CodeGen/WebAssembly/return-int32.ll
new file mode 100644
index 000000000000..663cef4e459d
--- /dev/null
+++ b/test/CodeGen/WebAssembly/return-int32.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: return_i32:
+; CHECK: return $0{{$}}
+define i32 @return_i32(i32 %p) {
+ ret i32 %p
+}
diff --git a/test/CodeGen/WebAssembly/return-void.ll b/test/CodeGen/WebAssembly/return-void.ll
new file mode 100644
index 000000000000..4933bfcb87e6
--- /dev/null
+++ b/test/CodeGen/WebAssembly/return-void.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: return_void:
+; CHECK: return{{$}}
+define void @return_void() {
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/returned.ll b/test/CodeGen/WebAssembly/returned.ll
new file mode 100644
index 000000000000..e208e198c73d
--- /dev/null
+++ b/test/CodeGen/WebAssembly/returned.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that the "returned" attribute is optimized effectively.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: _Z3foov:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push0=, 1{{$}}
+; CHECK-NEXT: {{^}} i32.call $push1=, _Znwm, $pop0{{$}}
+; CHECK-NEXT: {{^}} i32.call $push2=, _ZN5AppleC1Ev, $pop1{{$}}
+; CHECK-NEXT: return $pop2{{$}}
+%class.Apple = type { i8 }
+declare noalias i8* @_Znwm(i32)
+declare %class.Apple* @_ZN5AppleC1Ev(%class.Apple* returned)
+define %class.Apple* @_Z3foov() {
+entry:
+ %call = tail call noalias i8* @_Znwm(i32 1)
+ %0 = bitcast i8* %call to %class.Apple*
+ %call1 = tail call %class.Apple* @_ZN5AppleC1Ev(%class.Apple* %0)
+ ret %class.Apple* %0
+}
+
+; CHECK-LABEL: _Z3barPvS_l:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: {{^}} i32.call $push0=, memcpy, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+declare i8* @memcpy(i8* returned, i8*, i32)
+define i8* @_Z3barPvS_l(i8* %p, i8* %s, i32 %n) {
+entry:
+ %call = tail call i8* @memcpy(i8* %p, i8* %s, i32 %n)
+ ret i8* %p
+}
+
+; Test that the optimization isn't performed on constant arguments.
+
+; CHECK-LABEL: test_constant_arg:
+; CHECK-NEXT: i32.const $push0=, global{{$}}
+; CHECK-NEXT: {{^}} i32.call $discard=, returns_arg, $pop0{{$}}
+; CHECK-NEXT: return{{$}}
+@global = external global i32
+@addr = global i32* @global
+define void @test_constant_arg() {
+ %call = call i32* @returns_arg(i32* @global)
+ ret void
+}
+declare i32* @returns_arg(i32* returned)
diff --git a/test/CodeGen/WebAssembly/select.ll b/test/CodeGen/WebAssembly/select.ll
new file mode 100644
index 000000000000..1b1d7aed7154
--- /dev/null
+++ b/test/CodeGen/WebAssembly/select.ll
@@ -0,0 +1,135 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -fast-isel | FileCheck %s
+
+; Test that wasm select instruction is selected from LLVM select instruction.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: select_i32_bool:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @select_i32_bool(i1 zeroext %a, i32 %b, i32 %c) {
+ %cond = select i1 %a, i32 %b, i32 %c
+ ret i32 %cond
+}
+
+; CHECK-LABEL: select_i32_eq:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.select $push0=, $0, $2, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @select_i32_eq(i32 %a, i32 %b, i32 %c) {
+ %cmp = icmp eq i32 %a, 0
+ %cond = select i1 %cmp, i32 %b, i32 %c
+ ret i32 %cond
+}
+
+; CHECK-LABEL: select_i32_ne:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @select_i32_ne(i32 %a, i32 %b, i32 %c) {
+ %cmp = icmp ne i32 %a, 0
+ %cond = select i1 %cmp, i32 %b, i32 %c
+ ret i32 %cond
+}
+
+; CHECK-LABEL: select_i64_bool:
+; CHECK-NEXT: .param i32, i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @select_i64_bool(i1 zeroext %a, i64 %b, i64 %c) {
+ %cond = select i1 %a, i64 %b, i64 %c
+ ret i64 %cond
+}
+
+; CHECK-LABEL: select_i64_eq:
+; CHECK-NEXT: .param i32, i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.select $push0=, $0, $2, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @select_i64_eq(i32 %a, i64 %b, i64 %c) {
+ %cmp = icmp eq i32 %a, 0
+ %cond = select i1 %cmp, i64 %b, i64 %c
+ ret i64 %cond
+}
+
+; CHECK-LABEL: select_i64_ne:
+; CHECK-NEXT: .param i32, i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @select_i64_ne(i32 %a, i64 %b, i64 %c) {
+ %cmp = icmp ne i32 %a, 0
+ %cond = select i1 %cmp, i64 %b, i64 %c
+ ret i64 %cond
+}
+
+; CHECK-LABEL: select_f32_bool:
+; CHECK-NEXT: .param i32, f32, f32{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @select_f32_bool(i1 zeroext %a, float %b, float %c) {
+ %cond = select i1 %a, float %b, float %c
+ ret float %cond
+}
+
+; CHECK-LABEL: select_f32_eq:
+; CHECK-NEXT: .param i32, f32, f32{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.select $push0=, $0, $2, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @select_f32_eq(i32 %a, float %b, float %c) {
+ %cmp = icmp eq i32 %a, 0
+ %cond = select i1 %cmp, float %b, float %c
+ ret float %cond
+}
+
+; CHECK-LABEL: select_f32_ne:
+; CHECK-NEXT: .param i32, f32, f32{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @select_f32_ne(i32 %a, float %b, float %c) {
+ %cmp = icmp ne i32 %a, 0
+ %cond = select i1 %cmp, float %b, float %c
+ ret float %cond
+}
+
+; CHECK-LABEL: select_f64_bool:
+; CHECK-NEXT: .param i32, f64, f64{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @select_f64_bool(i1 zeroext %a, double %b, double %c) {
+ %cond = select i1 %a, double %b, double %c
+ ret double %cond
+}
+
+; CHECK-LABEL: select_f64_eq:
+; CHECK-NEXT: .param i32, f64, f64{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.select $push0=, $0, $2, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @select_f64_eq(i32 %a, double %b, double %c) {
+ %cmp = icmp eq i32 %a, 0
+ %cond = select i1 %cmp, double %b, double %c
+ ret double %cond
+}
+
+; CHECK-LABEL: select_f64_ne:
+; CHECK-NEXT: .param i32, f64, f64{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @select_f64_ne(i32 %a, double %b, double %c) {
+ %cmp = icmp ne i32 %a, 0
+ %cond = select i1 %cmp, double %b, double %c
+ ret double %cond
+}
diff --git a/test/CodeGen/WebAssembly/signext-zeroext.ll b/test/CodeGen/WebAssembly/signext-zeroext.ll
new file mode 100644
index 000000000000..40d49af0ccc7
--- /dev/null
+++ b/test/CodeGen/WebAssembly/signext-zeroext.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test zeroext and signext ABI keywords
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: z2s_func:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: .local i32{{$}}
+; CHECK-NEXT: i32.const $[[NUM0:[0-9]+]]=, 24{{$}}
+; CHECK-NEXT: i32.shl $push[[NUM2:[0-9]+]]=, $0, $[[NUM0]]{{$}}
+; CHECK-NEXT: i32.shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM0]]{{$}}
+; CHECK-NEXT: return $pop[[NUM3]]{{$}}
+define signext i8 @z2s_func(i8 zeroext %t) {
+ ret i8 %t
+}
+
+; CHECK-LABEL: s2z_func:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM0:[0-9]+]]=, 255{{$}}
+; CHECK-NEXT: i32.and $push[[NUM1:[0-9]+]]=, $0, $pop[[NUM0]]{{$}}
+; CHECK-NEXT: return $pop[[NUM1]]{{$}}
+define zeroext i8 @s2z_func(i8 signext %t) {
+ ret i8 %t
+}
+
+; CHECK-LABEL: z2s_call:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM0:[0-9]+]]=, 255{{$}}
+; CHECK-NEXT: i32.and $push[[NUM1:[0-9]+]]=, $0, $pop[[NUM0]]{{$}}
+; CHECK-NEXT: call $push[[NUM2:[0-9]+]]=, z2s_func, $pop[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM2]]{{$}}
+define i32 @z2s_call(i32 %t) {
+ %s = trunc i32 %t to i8
+ %u = call signext i8 @z2s_func(i8 zeroext %s)
+ %v = sext i8 %u to i32
+ ret i32 %v
+}
+
+; CHECK-LABEL: s2z_call:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: .local i32{{$}}
+; CHECK-NEXT: i32.const $[[NUM0:[0-9]+]]=, 24{{$}}
+; CHECK-NEXT: i32.shl $push[[NUM1:[0-9]+]]=, $0, $[[NUM0]]{{$}}
+; CHECK-NEXT: i32.shr_s $push[[NUM2:[0-9]+]]=, $pop[[NUM1]], $[[NUM0]]{{$}}
+; CHECK-NEXT: call $push[[NUM3:[0-9]]]=, s2z_func, $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.shl $push[[NUM4:[0-9]+]]=, $pop[[NUM3]], $[[NUM0]]{{$}}
+; CHECK-NEXT: i32.shr_s $push[[NUM5:[0-9]+]]=, $pop[[NUM4]], $[[NUM0]]{{$}}
+; CHECK-NEXT: return $pop[[NUM5]]{{$}}
+define i32 @s2z_call(i32 %t) {
+ %s = trunc i32 %t to i8
+ %u = call zeroext i8 @s2z_func(i8 signext %s)
+ %v = sext i8 %u to i32
+ ret i32 %v
+}
diff --git a/test/CodeGen/WebAssembly/store-results.ll b/test/CodeGen/WebAssembly/store-results.ll
new file mode 100644
index 000000000000..73479e544db9
--- /dev/null
+++ b/test/CodeGen/WebAssembly/store-results.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that the wasm-store-results pass makes users of stored values use the
+; result of store expressions to reduce get_local/set_local traffic.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: single_block:
+; CHECK-NOT: .local
+; CHECK: i32.const $push{{[0-9]+}}=, 0{{$}}
+; CHECK: i32.store $push[[STORE:[0-9]+]]=, 0($0), $pop{{[0-9]+}}{{$}}
+; CHECK: return $pop[[STORE]]{{$}}
+define i32 @single_block(i32* %p) {
+entry:
+ store i32 0, i32* %p
+ ret i32 0
+}
+
+; Test interesting corner cases for wasm-store-results, in which the operand of
+; a store ends up getting used by a phi, which needs special handling in the
+; dominance test, since phis use their operands on their incoming edges.
+
+%class.Vec3 = type { float, float, float }
+
+@pos = global %class.Vec3 zeroinitializer, align 4
+
+; CHECK-LABEL: foo:
+; CHECK: i32.store $discard=, pos($0), $0{{$}}
+define void @foo() {
+for.body.i:
+ br label %for.body5.i
+
+for.body5.i:
+ %i.0168.i = phi i32 [ 0, %for.body.i ], [ %inc.i, %for.body5.i ]
+ %conv6.i = sitofp i32 %i.0168.i to float
+ store volatile float 0.0, float* getelementptr inbounds (%class.Vec3, %class.Vec3* @pos, i32 0, i32 0)
+ %inc.i = add nuw nsw i32 %i.0168.i, 1
+ %exitcond.i = icmp eq i32 %inc.i, 256
+ br i1 %exitcond.i, label %for.cond.cleanup4.i, label %for.body5.i
+
+for.cond.cleanup4.i:
+ ret void
+}
+
+; CHECK-LABEL: bar:
+; CHECK: i32.store $discard=, pos($0), $0{{$}}
+define void @bar() {
+for.body.i:
+ br label %for.body5.i
+
+for.body5.i:
+ %i.0168.i = phi float [ 0.0, %for.body.i ], [ %inc.i, %for.body5.i ]
+ store volatile float 0.0, float* getelementptr inbounds (%class.Vec3, %class.Vec3* @pos, i32 0, i32 0)
+ %inc.i = fadd float %i.0168.i, 1.0
+ %exitcond.i = fcmp oeq float %inc.i, 256.0
+ br i1 %exitcond.i, label %for.cond.cleanup4.i, label %for.body5.i
+
+for.cond.cleanup4.i:
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/store-trunc.ll b/test/CodeGen/WebAssembly/store-trunc.ll
new file mode 100644
index 000000000000..c12b716dfd59
--- /dev/null
+++ b/test/CodeGen/WebAssembly/store-trunc.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that truncating stores are assembled properly.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: trunc_i8_i32:
+; CHECK: i32.store8 $discard=, 0($0), $1{{$}}
+define void @trunc_i8_i32(i8 *%p, i32 %v) {
+ %t = trunc i32 %v to i8
+ store i8 %t, i8* %p
+ ret void
+}
+
+; CHECK-LABEL: trunc_i16_i32:
+; CHECK: i32.store16 $discard=, 0($0), $1{{$}}
+define void @trunc_i16_i32(i16 *%p, i32 %v) {
+ %t = trunc i32 %v to i16
+ store i16 %t, i16* %p
+ ret void
+}
+
+; CHECK-LABEL: trunc_i8_i64:
+; CHECK: i64.store8 $discard=, 0($0), $1{{$}}
+define void @trunc_i8_i64(i8 *%p, i64 %v) {
+ %t = trunc i64 %v to i8
+ store i8 %t, i8* %p
+ ret void
+}
+
+; CHECK-LABEL: trunc_i16_i64:
+; CHECK: i64.store16 $discard=, 0($0), $1{{$}}
+define void @trunc_i16_i64(i16 *%p, i64 %v) {
+ %t = trunc i64 %v to i16
+ store i16 %t, i16* %p
+ ret void
+}
+
+; CHECK-LABEL: trunc_i32_i64:
+; CHECK: i64.store32 $discard=, 0($0), $1{{$}}
+define void @trunc_i32_i64(i32 *%p, i64 %v) {
+ %t = trunc i64 %v to i32
+ store i32 %t, i32* %p
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/store.ll b/test/CodeGen/WebAssembly/store.ll
new file mode 100644
index 000000000000..442caedef3a7
--- /dev/null
+++ b/test/CodeGen/WebAssembly/store.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic stores are assembled properly.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: sti32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.store $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti32(i32 *%p, i32 %v) {
+ store i32 %v, i32* %p
+ ret void
+}
+
+; CHECK-LABEL: sti64:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti64(i64 *%p, i64 %v) {
+ store i64 %v, i64* %p
+ ret void
+}
+
+; CHECK-LABEL: stf32:
+; CHECK-NEXT: .param i32, f32{{$}}
+; CHECK-NEXT: f32.store $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @stf32(float *%p, float %v) {
+ store float %v, float* %p
+ ret void
+}
+
+; CHECK-LABEL: stf64:
+; CHECK-NEXT: .param i32, f64{{$}}
+; CHECK-NEXT: f64.store $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @stf64(double *%p, double %v) {
+ store double %v, double* %p
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/switch.ll b/test/CodeGen/WebAssembly/switch.ll
new file mode 100644
index 000000000000..7f6f6efff7d6
--- /dev/null
+++ b/test/CodeGen/WebAssembly/switch.ll
@@ -0,0 +1,174 @@
+; RUN: llc < %s -asm-verbose=false -disable-block-placement -verify-machineinstrs | FileCheck %s
+
+; Test switch instructions. Block placement is disabled because it reorders
+; the blocks in a way that isn't interesting here.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare void @foo0()
+declare void @foo1()
+declare void @foo2()
+declare void @foo3()
+declare void @foo4()
+declare void @foo5()
+
+; CHECK-LABEL: bar32:
+; CHECK: block BB0_8{{$}}
+; CHECK: block BB0_7{{$}}
+; CHECK: block BB0_6{{$}}
+; CHECK: block BB0_5{{$}}
+; CHECK: block BB0_4{{$}}
+; CHECK: block BB0_3{{$}}
+; CHECK: block BB0_2{{$}}
+; CHECK: tableswitch {{[^,]*}}, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_4, BB0_4, BB0_4, BB0_4, BB0_4, BB0_4, BB0_5, BB0_6, BB0_7{{$}}
+; CHECK: BB0_2:
+; CHECK: call foo0
+; CHECK: BB0_3:
+; CHECK: call foo1
+; CHECK: BB0_4:
+; CHECK: call foo2
+; CHECK: BB0_5:
+; CHECK: call foo3
+; CHECK: BB0_6:
+; CHECK: call foo4
+; CHECK: BB0_7:
+; CHECK: call foo5
+; CHECK: BB0_8:
+; CHECK: return{{$}}
+define void @bar32(i32 %n) {
+entry:
+ switch i32 %n, label %sw.epilog [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb
+ i32 3, label %sw.bb
+ i32 4, label %sw.bb
+ i32 5, label %sw.bb
+ i32 6, label %sw.bb
+ i32 7, label %sw.bb.1
+ i32 8, label %sw.bb.1
+ i32 9, label %sw.bb.1
+ i32 10, label %sw.bb.1
+ i32 11, label %sw.bb.1
+ i32 12, label %sw.bb.1
+ i32 13, label %sw.bb.1
+ i32 14, label %sw.bb.1
+ i32 15, label %sw.bb.2
+ i32 16, label %sw.bb.2
+ i32 17, label %sw.bb.2
+ i32 18, label %sw.bb.2
+ i32 19, label %sw.bb.2
+ i32 20, label %sw.bb.2
+ i32 21, label %sw.bb.3
+ i32 22, label %sw.bb.4
+ i32 23, label %sw.bb.5
+ ]
+
+sw.bb: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry
+ tail call void @foo0()
+ br label %sw.epilog
+
+sw.bb.1: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+ tail call void @foo1()
+ br label %sw.epilog
+
+sw.bb.2: ; preds = %entry, %entry, %entry, %entry, %entry, %entry
+ tail call void @foo2()
+ br label %sw.epilog
+
+sw.bb.3: ; preds = %entry
+ tail call void @foo3()
+ br label %sw.epilog
+
+sw.bb.4: ; preds = %entry
+ tail call void @foo4()
+ br label %sw.epilog
+
+sw.bb.5: ; preds = %entry
+ tail call void @foo5()
+ br label %sw.epilog
+
+sw.epilog: ; preds = %entry, %sw.bb.5, %sw.bb.4, %sw.bb.3, %sw.bb.2, %sw.bb.1, %sw.bb
+ ret void
+}
+
+; CHECK-LABEL: bar64:
+; CHECK: block BB1_8{{$}}
+; CHECK: block BB1_7{{$}}
+; CHECK: block BB1_6{{$}}
+; CHECK: block BB1_5{{$}}
+; CHECK: block BB1_4{{$}}
+; CHECK: block BB1_3{{$}}
+; CHECK: block BB1_2{{$}}
+; CHECK: tableswitch {{[^,]*}}, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_4, BB1_4, BB1_4, BB1_4, BB1_4, BB1_4, BB1_5, BB1_6, BB1_7{{$}}
+; CHECK: BB1_2:
+; CHECK: call foo0
+; CHECK: BB1_3:
+; CHECK: call foo1
+; CHECK: BB1_4:
+; CHECK: call foo2
+; CHECK: BB1_5:
+; CHECK: call foo3
+; CHECK: BB1_6:
+; CHECK: call foo4
+; CHECK: BB1_7:
+; CHECK: call foo5
+; CHECK: BB1_8:
+; CHECK: return{{$}}
+define void @bar64(i64 %n) {
+entry:
+ switch i64 %n, label %sw.epilog [
+ i64 0, label %sw.bb
+ i64 1, label %sw.bb
+ i64 2, label %sw.bb
+ i64 3, label %sw.bb
+ i64 4, label %sw.bb
+ i64 5, label %sw.bb
+ i64 6, label %sw.bb
+ i64 7, label %sw.bb.1
+ i64 8, label %sw.bb.1
+ i64 9, label %sw.bb.1
+ i64 10, label %sw.bb.1
+ i64 11, label %sw.bb.1
+ i64 12, label %sw.bb.1
+ i64 13, label %sw.bb.1
+ i64 14, label %sw.bb.1
+ i64 15, label %sw.bb.2
+ i64 16, label %sw.bb.2
+ i64 17, label %sw.bb.2
+ i64 18, label %sw.bb.2
+ i64 19, label %sw.bb.2
+ i64 20, label %sw.bb.2
+ i64 21, label %sw.bb.3
+ i64 22, label %sw.bb.4
+ i64 23, label %sw.bb.5
+ ]
+
+sw.bb: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry
+ tail call void @foo0()
+ br label %sw.epilog
+
+sw.bb.1: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+ tail call void @foo1()
+ br label %sw.epilog
+
+sw.bb.2: ; preds = %entry, %entry, %entry, %entry, %entry, %entry
+ tail call void @foo2()
+ br label %sw.epilog
+
+sw.bb.3: ; preds = %entry
+ tail call void @foo3()
+ br label %sw.epilog
+
+sw.bb.4: ; preds = %entry
+ tail call void @foo4()
+ br label %sw.epilog
+
+sw.bb.5: ; preds = %entry
+ tail call void @foo5()
+ br label %sw.epilog
+
+sw.epilog: ; preds = %entry, %sw.bb.5, %sw.bb.4, %sw.bb.3, %sw.bb.2, %sw.bb.1, %sw.bb
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/unreachable.ll b/test/CodeGen/WebAssembly/unreachable.ll
new file mode 100644
index 000000000000..414767e5c35d
--- /dev/null
+++ b/test/CodeGen/WebAssembly/unreachable.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -fast-isel -verify-machineinstrs | FileCheck %s
+
+; Test that LLVM unreachable instruction and trap intrinsic are lowered to
+; wasm unreachable
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare void @llvm.trap()
+declare void @llvm.debugtrap()
+declare void @abort()
+
+; CHECK-LABEL: f1:
+; CHECK: call abort
+; CHECK: unreachable
+define i32 @f1() {
+ call void @abort()
+ unreachable
+}
+
+; CHECK-LABEL: f2:
+; CHECK: unreachable
+define void @f2() {
+ call void @llvm.trap()
+ ret void
+}
+
+; CHECK-LABEL: f3:
+; CHECK: unreachable
+define void @f3() {
+ call void @llvm.debugtrap()
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/unused-argument.ll b/test/CodeGen/WebAssembly/unused-argument.ll
new file mode 100644
index 000000000000..e7851b216cb4
--- /dev/null
+++ b/test/CodeGen/WebAssembly/unused-argument.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Make sure that argument offsets are correct even if some arguments are unused.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: unused_first:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: return $1{{$}}
+define i32 @unused_first(i32 %x, i32 %y) {
+ ret i32 %y
+}
+
+; CHECK-LABEL: unused_second:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: return $0{{$}}
+define i32 @unused_second(i32 %x, i32 %y) {
+ ret i32 %x
+}
+
+; CHECK-LABEL: call_something:
+; CHECK-NEXT: {{^}} i32.call $discard=, return_something{{$}}
+; CHECK-NEXT: return{{$}}
+declare i32 @return_something()
+define void @call_something() {
+ call i32 @return_something()
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/userstack.ll b/test/CodeGen/WebAssembly/userstack.ll
new file mode 100644
index 000000000000..6e01e36cf9fa
--- /dev/null
+++ b/test/CodeGen/WebAssembly/userstack.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -fast-isel | FileCheck %s
+
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: alloca32:
+; Check that there is an extra local for the stack pointer.
+; CHECK: .local i32, i32, i32, i32{{$}}
+define void @alloca32() {
+ ; CHECK: i32.const [[L1:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]])
+ ; CHECK-NEXT: i32.const [[L2:.+]]=, 16
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]]
+ %retval = alloca i32
+ ; CHECK: i32.const $push[[L3:.+]]=, 0
+ ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
+ store i32 0, i32* %retval
+ ; CHECK: i32.const [[L4:.+]]=, 16
+ ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L4]]
+ ; CHECK-NEXT: i32.const [[L5:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store [[SP]]=, 0([[L5]]), [[SP]]
+ ret void
+}
+
+; CHECK-LABEL: alloca3264:
+; CHECK: .local i32, i32, i32, i32{{$}}
+define void @alloca3264() {
+ ; CHECK: i32.const [[L1:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]])
+ ; CHECK-NEXT: i32.const [[L2:.+]]=, 16
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]]
+ %r1 = alloca i32
+ %r2 = alloca double
+ ; CHECK: i32.const $push[[L3:.+]]=, 0
+ ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
+ store i32 0, i32* %r1
+ ; CHECK: i64.const $push[[L4:.+]]=, 0
+ ; CHECK: i64.store {{.*}}=, 0([[SP]]), $pop[[L4]]
+ store double 0.0, double* %r2
+ ; CHECK: i32.const [[L4:.+]]=, 16
+ ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L4]]
+ ; CHECK-NEXT: i32.const [[L5:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store [[SP]]=, 0([[L5]]), [[SP]]
+ ret void
+}
+
+; CHECK-LABEL: allocarray:
+; CHECK: .local i32, i32, i32, i32, i32, i32{{$}}
+define void @allocarray() {
+ ; CHECK: i32.const [[L1:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]])
+ ; CHECK-NEXT: i32.const [[L2:.+]]=, 32
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]]
+ %r = alloca [5 x i32]
+ ; CHECK: i32.const $push[[L3:.+]]=, 1
+ ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
+ %p = getelementptr [5 x i32], [5 x i32]* %r, i32 0, i32 0
+ store i32 1, i32* %p
+ ; CHECK: i32.const $push[[L4:.+]]=, 4
+ ; CHECK: i32.const [[L5:.+]]=, 12
+ ; CHECK: i32.add [[L5]]=, [[SP]], [[L5]]
+ ; CHECK: i32.add $push[[L6:.+]]=, [[L5]], $pop[[L4]]
+ ; CHECK: i32.store {{.*}}=, 0($pop[[L6]]), ${{.+}}
+ %p2 = getelementptr [5 x i32], [5 x i32]* %r, i32 0, i32 1
+ store i32 1, i32* %p2
+ ; CHECK: i32.const [[L7:.+]]=, 32
+ ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L7]]
+ ; CHECK-NEXT: i32.const [[L8:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store [[SP]]=, 0([[L7]]), [[SP]]
+ ret void
+}
+
+define void @dynamic_alloca(i32 %alloc) {
+ ; TODO: Support frame pointers
+ ;%r = alloca i32, i32 %alloc
+ ;store i32 0, i32* %r
+ ret void
+}
+; TODO: test aligned alloc
diff --git a/test/CodeGen/WebAssembly/varargs.ll b/test/CodeGen/WebAssembly/varargs.ll
new file mode 100644
index 000000000000..c564d9420742
--- /dev/null
+++ b/test/CodeGen/WebAssembly/varargs.ll
@@ -0,0 +1,123 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck %s
+
+; Test varargs constructs.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; Test va_start.
+
+; TODO: Test va_start.
+
+;define void @start(i8** %ap, ...) {
+;entry:
+; %0 = bitcast i8** %ap to i8*
+; call void @llvm.va_start(i8* %0)
+; ret void
+;}
+
+; Test va_end.
+
+; CHECK-LABEL: end:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: return{{$}}
+define void @end(i8** %ap) {
+entry:
+ %0 = bitcast i8** %ap to i8*
+ call void @llvm.va_end(i8* %0)
+ ret void
+}
+
+; Test va_copy.
+
+; CHECK-LABEL: copy:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.load $push0=, 0($1){{$}}
+; CHECK-NEXT: i32.store $discard=, 0($0), $pop0{{$}}
+; CHECK-NEXT: return{{$}}
+define void @copy(i8** %ap, i8** %bp) {
+entry:
+ %0 = bitcast i8** %ap to i8*
+ %1 = bitcast i8** %bp to i8*
+ call void @llvm.va_copy(i8* %0, i8* %1)
+ ret void
+}
+
+; Test va_arg with an i8 argument.
+
+; CHECK-LABEL: arg_i8:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: .local i32{{$}}
+; CHECK-NEXT: i32.load $1=, 0($0){{$}}
+; CHECK-NEXT: i32.const $push0=, 4{{$}}
+; CHECK-NEXT: i32.add $push1=, $1, $pop0{{$}}
+; CHECK-NEXT: i32.store $discard=, 0($0), $pop1{{$}}
+; CHECK-NEXT: i32.load $push2=, 0($1){{$}}
+; CHECK-NEXT: return $pop2{{$}}
+define i8 @arg_i8(i8** %ap) {
+entry:
+ %t = va_arg i8** %ap, i8
+ ret i8 %t
+}
+
+; Test va_arg with an i32 argument.
+
+; CHECK-LABEL: arg_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: .local i32{{$}}
+; CHECK-NEXT: i32.load $push0=, 0($0){{$}}
+; CHECK-NEXT: i32.const $push1=, 3{{$}}
+; CHECK-NEXT: i32.add $push2=, $pop0, $pop1{{$}}
+; CHECK-NEXT: i32.const $push3=, -4{{$}}
+; CHECK-NEXT: i32.and $1=, $pop2, $pop3{{$}}
+; CHECK-NEXT: i32.const $push4=, 4{{$}}
+; CHECK-NEXT: i32.add $push5=, $1, $pop4{{$}}
+; CHECK-NEXT: i32.store $discard=, 0($0), $pop5{{$}}
+; CHECK-NEXT: i32.load $push6=, 0($1){{$}}
+; CHECK-NEXT: return $pop6{{$}}
+define i32 @arg_i32(i8** %ap) {
+entry:
+ %t = va_arg i8** %ap, i32
+ ret i32 %t
+}
+
+; Test va_arg with an i128 argument.
+
+; CHECK-LABEL: arg_i128:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .local
+; CHECK: i32.and
+; CHECK: i64.load
+; CHECK: i64.load
+; CHECK: return{{$}}
+define i128 @arg_i128(i8** %ap) {
+entry:
+ %t = va_arg i8** %ap, i128
+ ret i128 %t
+}
+
+; Test a varargs call with no actual arguments.
+
+declare void @callee(...)
+
+; CHECK-LABEL: caller_none:
+; CHECK-NEXT: call callee{{$}}
+; CHECK-NEXT: return{{$}}
+define void @caller_none() {
+ call void (...) @callee()
+ ret void
+}
+
+; CHECK-LABEL: caller_some
+define void @caller_some() {
+ ; TODO: Fix interaction between register coalescer and reg stackifier,
+ ; or disable coalescer.
+ ;call void (...) @callee(i32 0, double 2.0)
+ ret void
+}
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
+declare void @llvm.va_copy(i8*, i8*)
diff --git a/test/CodeGen/WebAssembly/vtable.ll b/test/CodeGen/WebAssembly/vtable.ll
new file mode 100644
index 000000000000..38298bc474b5
--- /dev/null
+++ b/test/CodeGen/WebAssembly/vtable.ll
@@ -0,0 +1,171 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s --check-prefix=TYPEINFONAME
+; RUN: llc < %s -asm-verbose=false | FileCheck %s --check-prefix=VTABLE
+; RUN: llc < %s -asm-verbose=false | FileCheck %s --check-prefix=TYPEINFO
+
+; Test that simple vtables assemble as expected.
+;
+; The class hierarchy is:
+; struct A;
+; struct B : public A;
+; struct C : public A;
+; struct D : public B;
+; Each with a virtual dtor and method foo.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+%struct.C = type { %struct.A }
+%struct.D = type { %struct.B }
+
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTVN10__cxxabiv120__si_class_type_infoE = external global i8*
+
+; TYPEINFONAME-LABEL: _ZTS1A:
+; TYPEINFONAME-NEXT: .asciz "1A"
+@_ZTS1A = constant [3 x i8] c"1A\00"
+; TYPEINFONAME-LABEL: _ZTS1B:
+; TYPEINFONAME-NEXT: .asciz "1B"
+@_ZTS1B = constant [3 x i8] c"1B\00"
+; TYPEINFONAME-LABEL: _ZTS1C:
+; TYPEINFONAME-NEXT: .asciz "1C"
+@_ZTS1C = constant [3 x i8] c"1C\00"
+; TYPEINFONAME-LABEL: _ZTS1D:
+; TYPEINFONAME-NEXT: .asciz "1D"
+@_ZTS1D = constant [3 x i8] c"1D\00"
+
+; VTABLE: .type _ZTV1A,@object
+; VTABLE-NEXT: .section .data.rel.ro,"aw",@progbits
+; VTABLE-NEXT: .globl _ZTV1A
+; VTABLE-LABEL: _ZTV1A:
+; VTABLE-NEXT: .int32 0
+; VTABLE-NEXT: .int32 _ZTI1A
+; VTABLE-NEXT: .int32 _ZN1AD2Ev
+; VTABLE-NEXT: .int32 _ZN1AD0Ev
+; VTABLE-NEXT: .int32 _ZN1A3fooEv
+; VTABLE-NEXT: .size _ZTV1A, 20
+@_ZTV1A = constant [5 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*), i8* bitcast (%struct.A* (%struct.A*)* @_ZN1AD2Ev to i8*), i8* bitcast (void (%struct.A*)* @_ZN1AD0Ev to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 4
+; VTABLE: .type _ZTV1B,@object
+; VTABLE-NEXT: .globl _ZTV1B
+; VTABLE-LABEL: _ZTV1B:
+; VTABLE-NEXT: .int32 0
+; VTABLE-NEXT: .int32 _ZTI1B
+; VTABLE-NEXT: .int32 _ZN1AD2Ev
+; VTABLE-NEXT: .int32 _ZN1BD0Ev
+; VTABLE-NEXT: .int32 _ZN1B3fooEv
+; VTABLE-NEXT: .size _ZTV1B, 20
+@_ZTV1B = constant [5 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1B to i8*), i8* bitcast (%struct.A* (%struct.A*)* @_ZN1AD2Ev to i8*), i8* bitcast (void (%struct.B*)* @_ZN1BD0Ev to i8*), i8* bitcast (void (%struct.B*)* @_ZN1B3fooEv to i8*)], align 4
+; VTABLE: .type _ZTV1C,@object
+; VTABLE-NEXT: .globl _ZTV1C
+; VTABLE-LABEL: _ZTV1C:
+; VTABLE-NEXT: .int32 0
+; VTABLE-NEXT: .int32 _ZTI1C
+; VTABLE-NEXT: .int32 _ZN1AD2Ev
+; VTABLE-NEXT: .int32 _ZN1CD0Ev
+; VTABLE-NEXT: .int32 _ZN1C3fooEv
+; VTABLE-NEXT: .size _ZTV1C, 20
+@_ZTV1C = constant [5 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1C to i8*), i8* bitcast (%struct.A* (%struct.A*)* @_ZN1AD2Ev to i8*), i8* bitcast (void (%struct.C*)* @_ZN1CD0Ev to i8*), i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)], align 4
+; VTABLE: .type _ZTV1D,@object
+; VTABLE-NEXT: .globl _ZTV1D
+; VTABLE-LABEL: _ZTV1D:
+; VTABLE-NEXT: .int32 0
+; VTABLE-NEXT: .int32 _ZTI1D
+; VTABLE-NEXT: .int32 _ZN1AD2Ev
+; VTABLE-NEXT: .int32 _ZN1DD0Ev
+; VTABLE-NEXT: .int32 _ZN1D3fooEv
+; VTABLE-NEXT: .size _ZTV1D, 20
+@_ZTV1D = constant [5 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1D to i8*), i8* bitcast (%struct.A* (%struct.A*)* @_ZN1AD2Ev to i8*), i8* bitcast (void (%struct.D*)* @_ZN1DD0Ev to i8*), i8* bitcast (void (%struct.D*)* @_ZN1D3fooEv to i8*)], align 4
+
+; TYPEINFO: .type _ZTI1A,@object
+; TYPEINFO: .globl _ZTI1A
+; TYPEINFO-LABEL: _ZTI1A:
+; TYPEINFO-NEXT: .int32 _ZTVN10__cxxabiv117__class_type_infoE+8
+; TYPEINFO-NEXT: .int32 _ZTS1A
+; TYPEINFO-NEXT: .size _ZTI1A, 8
+@_ZTI1A = constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) }
+; TYPEINFO: .type _ZTI1B,@object
+; TYPEINFO: .globl _ZTI1B
+; TYPEINFO-LABEL: _ZTI1B:
+; TYPEINFO-NEXT: .int32 _ZTVN10__cxxabiv120__si_class_type_infoE+8
+; TYPEINFO-NEXT: .int32 _ZTS1B
+; TYPEINFO-NEXT: .int32 _ZTI1A
+; TYPEINFO-NEXT: .size _ZTI1B, 12
+@_ZTI1B = constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1B, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*) }
+; TYPEINFO: .type _ZTI1C,@object
+; TYPEINFO: .globl _ZTI1C
+; TYPEINFO-LABEL: _ZTI1C:
+; TYPEINFO-NEXT: .int32 _ZTVN10__cxxabiv120__si_class_type_infoE+8
+; TYPEINFO-NEXT: .int32 _ZTS1C
+; TYPEINFO-NEXT: .int32 _ZTI1A
+; TYPEINFO-NEXT: .size _ZTI1C, 12
+@_ZTI1C = constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1C, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*) }
+; TYPEINFO: .type _ZTI1D,@object
+; TYPEINFO: .globl _ZTI1D
+; TYPEINFO-LABEL: _ZTI1D:
+; TYPEINFO-NEXT: .int32 _ZTVN10__cxxabiv120__si_class_type_infoE+8
+; TYPEINFO-NEXT: .int32 _ZTS1D
+; TYPEINFO-NEXT: .int32 _ZTI1B
+; TYPEINFO-NEXT: .size _ZTI1D, 12
+@_ZTI1D = constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1D, i32 0, i32 0), i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1B to i8*) }
+
+@g = global i32 0, align 4
+
+define void @_ZN1A3fooEv(%struct.A* %this) {
+entry:
+ store i32 2, i32* @g, align 4
+ ret void
+}
+
+define void @_ZN1B3fooEv(%struct.B* %this) {
+entry:
+ store i32 4, i32* @g, align 4
+ ret void
+}
+
+define void @_ZN1C3fooEv(%struct.C* %this) {
+entry:
+ store i32 6, i32* @g, align 4
+ ret void
+}
+
+define void @_ZN1D3fooEv(%struct.D* %this) {
+entry:
+ store i32 8, i32* @g, align 4
+ ret void
+}
+
+define linkonce_odr void @_ZN1AD0Ev(%struct.A* %this) {
+entry:
+ %0 = bitcast %struct.A* %this to i8*
+ tail call void @_ZdlPv(i8* %0)
+ ret void
+}
+
+define linkonce_odr void @_ZN1BD0Ev(%struct.B* %this) {
+entry:
+ %0 = bitcast %struct.B* %this to i8*
+ tail call void @_ZdlPv(i8* %0)
+ ret void
+}
+
+define linkonce_odr void @_ZN1CD0Ev(%struct.C* %this) {
+entry:
+ %0 = bitcast %struct.C* %this to i8*
+ tail call void @_ZdlPv(i8* %0)
+ ret void
+}
+
+define linkonce_odr %struct.A* @_ZN1AD2Ev(%struct.A* returned %this) {
+entry:
+ ret %struct.A* %this
+}
+
+define linkonce_odr void @_ZN1DD0Ev(%struct.D* %this) {
+entry:
+ %0 = bitcast %struct.D* %this to i8*
+ tail call void @_ZdlPv(i8* %0)
+ ret void
+}
+
+declare void @_ZdlPv(i8*)
diff --git a/test/CodeGen/WinEH/cppeh-alloca-sink.ll b/test/CodeGen/WinEH/cppeh-alloca-sink.ll
deleted file mode 100644
index f215dca2ddd3..000000000000
--- a/test/CodeGen/WinEH/cppeh-alloca-sink.ll
+++ /dev/null
@@ -1,180 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test describes two difficult cases in sinking allocas into child frames.
-; We don't currently do this optimization, but we'll need to tweak these tests
-; when we do.
-
-; This test is based on the following code:
-;
-; // In this case we can sink the alloca from the parent into the catch because
-; // the lifetime is limited to the catch.
-; extern "C" void may_throw();
-; extern "C" void sink_alloca_to_catch() {
-; try {
-; may_throw();
-; } catch (int) {
-; volatile int only_used_in_catch = 42;
-; }
-; }
-;
-; // In this case we cannot. The variable should live as long as the parent
-; // frame lives.
-; extern "C" void use_catch_var(int *);
-; extern "C" void dont_sink_alloca_to_catch(int n) {
-; int live_in_out_catch = 0;
-; while (n > 0) {
-; try {
-; may_throw();
-; } catch (int) {
-; use_catch_var(&live_in_out_catch);
-; }
-; n--;
-; }
-; }
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-declare void @may_throw() #1
-declare i32 @__CxxFrameHandler3(...)
-declare i32 @llvm.eh.typeid.for(i8*) #2
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-declare void @llvm.eh.endcatch() #3
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchHandlerType = type { i32, i8* }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-
-; Function Attrs: uwtable
-define void @sink_alloca_to_catch() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %0 = alloca i32
- %only_used_in_catch = alloca i32, align 4
- invoke void @may_throw()
- to label %try.cont unwind label %lpad
-
-lpad: ; preds = %entry
- %1 = landingpad { i8*, i32 }
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
- %2 = extractvalue { i8*, i32 } %1, 1
- %3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #3
- %matches = icmp eq i32 %2, %3
- br i1 %matches, label %catch, label %eh.resume
-
-catch: ; preds = %lpad
- %4 = extractvalue { i8*, i32 } %1, 0
- call void @llvm.eh.begincatch(i8* %4, i8* null) #3
- store volatile i32 42, i32* %only_used_in_catch, align 4
- tail call void @llvm.eh.endcatch() #3
- br label %try.cont
-
-try.cont: ; preds = %entry, %catch
- ret void
-
-eh.resume: ; preds = %lpad
- resume { i8*, i32 } %1
-}
-
-; CHECK-LABEL: define void @sink_alloca_to_catch()
-; CHECK: call void (...) @llvm.localescape(i32* %only_used_in_catch)
-
-declare void @use_catch_var(i32*) #1
-
-; Function Attrs: uwtable
-define void @dont_sink_alloca_to_catch(i32 %n) #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %0 = alloca i32
- %n.addr = alloca i32, align 4
- %live_in_out_catch = alloca i32, align 4
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- store i32 %n, i32* %n.addr, align 4
- br label %while.cond
-
-while.cond: ; preds = %try.cont, %entry
- %1 = load i32, i32* %n.addr, align 4
- %cmp = icmp sgt i32 %1, 0
- br i1 %cmp, label %while.body, label %while.end
-
-while.body: ; preds = %while.cond
- invoke void @may_throw()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %while.body
- br label %try.cont
-
-lpad: ; preds = %while.body
- %2 = landingpad { i8*, i32 }
- catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
- %3 = extractvalue { i8*, i32 } %2, 0
- store i8* %3, i8** %exn.slot
- %4 = extractvalue { i8*, i32 } %2, 1
- store i32 %4, i32* %ehselector.slot
- br label %catch.dispatch
-
-catch.dispatch: ; preds = %lpad
- %sel = load i32, i32* %ehselector.slot
- %5 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #3
- %matches = icmp eq i32 %sel, %5
- br i1 %matches, label %catch, label %eh.resume
-
-catch: ; preds = %catch.dispatch
- %exn = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn, i8* null) #3
- invoke void @use_catch_var(i32* %live_in_out_catch)
- to label %invoke.cont2 unwind label %lpad1
-
-invoke.cont2: ; preds = %catch
- call void @llvm.eh.endcatch() #3
- br label %try.cont
-
-try.cont: ; preds = %invoke.cont2, %invoke.cont
- %6 = load i32, i32* %0
- %7 = load i32, i32* %n.addr, align 4
- %dec = add nsw i32 %7, -1
- store i32 %dec, i32* %n.addr, align 4
- br label %while.cond
-
-lpad1: ; preds = %catch
- %8 = landingpad { i8*, i32 }
- cleanup
- %9 = extractvalue { i8*, i32 } %8, 0
- store i8* %9, i8** %exn.slot
- %10 = extractvalue { i8*, i32 } %8, 1
- store i32 %10, i32* %ehselector.slot
- call void @llvm.eh.endcatch() #3
- br label %eh.resume
-
-while.end: ; preds = %while.cond
- ret void
-
-eh.resume: ; preds = %lpad1, %catch.dispatch
- %exn3 = load i8*, i8** %exn.slot
- %sel4 = load i32, i32* %ehselector.slot
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0
- %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
- resume { i8*, i32 } %lpad.val5
-}
-
-; CHECK-LABEL: define void @dont_sink_alloca_to_catch(i32 %n)
-; CHECK: call void (...) @llvm.localescape(i32* %live_in_out_catch)
-
-; CHECK-LABEL: define internal i8* @sink_alloca_to_catch.catch(i8*, i8*)
-; CHECK: %only_used_in_catch.i8 = call i8* @llvm.localrecover({{.*}}, i32 0)
-; CHECK: %only_used_in_catch = bitcast
-
-; CHECK-LABEL: define internal i8* @dont_sink_alloca_to_catch.catch(i8*, i8*)
-; CHECK: %live_in_out_catch.i8 = call i8* @llvm.localrecover({{.*}}, i32 0)
-; CHECK: %live_in_out_catch = bitcast
-
-
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
diff --git a/test/CodeGen/WinEH/cppeh-catch-all-win32.ll b/test/CodeGen/WinEH/cppeh-catch-all-win32.ll
deleted file mode 100644
index b2e84b90d69f..000000000000
--- a/test/CodeGen/WinEH/cppeh-catch-all-win32.ll
+++ /dev/null
@@ -1,86 +0,0 @@
-; RUN: opt -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; extern "C" void may_throw();
-; extern "C" void handle_exception();
-; extern "C" void test() {
-; try {
-; may_throw();
-; } catch (...) {
-; handle_exception();
-; }
-; }
-
-target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "i686-pc-windows-msvc"
-
-; The function entry in this case remains unchanged.
-; CHECK: define void @test()
-; CHECK: entry:
-; CHECK: invoke void @may_throw()
-; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-define void @test() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- invoke void @may_throw()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i8* null
-; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* ()* @test.catch)
-; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont]
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* null
- %1 = extractvalue { i8*, i32 } %0, 0
- store i8* %1, i8** %exn.slot
- %2 = extractvalue { i8*, i32 } %0, 1
- store i32 %2, i32* %ehselector.slot
- br label %catch
-
-; CHECK-NOT: catch:
-; CHECK-NOT: @handle_exception()
-
-catch: ; preds = %lpad
- %exn = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn, i8* null) #1
- call void @handle_exception()
- call void @llvm.eh.endcatch() #1
- br label %try.cont
-
-try.cont: ; preds = %catch, %invoke.cont
- ret void
-
-; CHECK: }
-}
-
-; CHECK: define internal i8* @test.catch()
-; CHECK: call i8* @llvm.frameaddress(i32 1)
-; CHECK: call i8* @llvm.x86.seh.recoverfp(i8* bitcast (void ()* @test to i8*), i8* %{{.*}})
-; CHECK: call void @handle_exception()
-; CHECK: ret i8* blockaddress(@test, %try.cont)
-; CHECK: }
-
-
-declare void @may_throw() #0
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #1
-
-declare void @handle_exception() #0
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #1
-
-attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind }
diff --git a/test/CodeGen/WinEH/cppeh-catch-all.ll b/test/CodeGen/WinEH/cppeh-catch-all.ll
deleted file mode 100644
index 266dd3e305ca..000000000000
--- a/test/CodeGen/WinEH/cppeh-catch-all.ll
+++ /dev/null
@@ -1,97 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; void test()
-; {
-; try {
-; may_throw();
-; } catch (...) {
-; handle_exception();
-; }
-; }
-;
-; Parts of the IR have been hand-edited to simplify the test case.
-; The full IR will be restored when Windows C++ EH support is complete.
-
-; ModuleID = 'catch-all.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-; The function entry in this case remains unchanged.
-; CHECK: define void @_Z4testv()
-; CHECK: entry:
-; CHECK: invoke void @_Z9may_throwv()
-; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- invoke void @_Z9may_throwv()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i8* null
-; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @_Z4testv.catch)
-; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont]
-
-lpad: ; preds = %entry
- %tmp = landingpad { i8*, i32 }
- catch i8* null
- %tmp1 = extractvalue { i8*, i32 } %tmp, 0
- store i8* %tmp1, i8** %exn.slot
- %tmp2 = extractvalue { i8*, i32 } %tmp, 1
- store i32 %tmp2, i32* %ehselector.slot
- br label %catch
-
-; CHECK-NOT: catch:
-
-catch: ; preds = %lpad
- %exn = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn, i8* null) #2
- call void @_Z16handle_exceptionv()
- br label %invoke.cont2
-
-; CHECK-NOT: invoke.cont2:
-
-invoke.cont2: ; preds = %catch
- call void @llvm.eh.endcatch()
- br label %try.cont
-
-try.cont: ; preds = %invoke.cont2, %invoke.cont
- ret void
-
-; CHECK: }
-}
-
-; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*)
-; CHECK: entry:
-; CHECK: call void @_Z16handle_exceptionv()
-; CHECK: ret i8* blockaddress(@_Z4testv, %try.cont)
-; CHECK: }
-
-declare void @_Z9may_throwv() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-declare void @_Z16handle_exceptionv() #1
-
-declare void @llvm.eh.endcatch()
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { noinline noreturn nounwind }
-attributes #3 = { nounwind }
-attributes #4 = { noreturn nounwind }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 (trunk 226027)"}
diff --git a/test/CodeGen/WinEH/cppeh-catch-and-throw.ll b/test/CodeGen/WinEH/cppeh-catch-and-throw.ll
deleted file mode 100644
index d604b86deb35..000000000000
--- a/test/CodeGen/WinEH/cppeh-catch-and-throw.ll
+++ /dev/null
@@ -1,143 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; class Obj {
-; public:
-; ~Obj();
-; };
-;
-; void test(void)
-; {
-; try {
-; Obj o;
-; throw 1;
-; } catch (...) {
-; throw;
-; }
-; }
-
-; ModuleID = 'cppeh-catch-and-throw.cpp'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
-%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-%class.Obj = type { i8 }
-
-$"\01??_R0H@8" = comdat any
-
-$"_CT??_R0H@84" = comdat any
-
-$_CTA1H = comdat any
-
-$_TI1H = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@__ImageBase = external constant i8
-@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
-@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-
-; This is just a minimal check to verify that main was handled by WinEHPrepare.
-; CHECK: define void @"\01?test@@YAXXZ"()
-; CHECK: entry:
-; CHECK: call void (...) @llvm.localescape
-; CHECK: invoke void @_CxxThrowException
-; CHECK: }
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %o = alloca %class.Obj, align 1
- %tmp = alloca i32, align 4
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- store i32 1, i32* %tmp
- %0 = bitcast i32* %tmp to i8*
- invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #3
- to label %unreachable unwind label %lpad
-
-lpad: ; preds = %entry
- %1 = landingpad { i8*, i32 }
- catch i8* null
- %2 = extractvalue { i8*, i32 } %1, 0
- store i8* %2, i8** %exn.slot
- %3 = extractvalue { i8*, i32 } %1, 1
- store i32 %3, i32* %ehselector.slot
- call void @"\01??1Obj@@QEAA@XZ"(%class.Obj* %o) #2
- br label %catch
-
-catch: ; preds = %lpad
- %exn = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn, i8* null) #2
- invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #3
- to label %unreachable unwind label %lpad1
-
-lpad1: ; preds = %catch
- %4 = landingpad { i8*, i32 }
- cleanup
- %5 = extractvalue { i8*, i32 } %4, 0
- store i8* %5, i8** %exn.slot
- %6 = extractvalue { i8*, i32 } %4, 1
- store i32 %6, i32* %ehselector.slot
- call void @llvm.eh.endcatch() #2
- br label %eh.resume
-
-try.cont: ; No predecessors!
- ret void
-
-eh.resume: ; preds = %lpad1
- %exn2 = load i8*, i8** %exn.slot
- %sel = load i32, i32* %ehselector.slot
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn2, 0
- %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
- resume { i8*, i32 } %lpad.val3
-
-unreachable: ; preds = %catch, %entry
- unreachable
-}
-
-; Verify that we inserted a stub invoke into the outlined cleanup handler.
-;
-; CHECK-LABEL: define internal void @"\01?test@@YAXXZ.cleanup"(i8*, i8*)
-; CHECK: entry:
-; CHECK: call i8* @llvm.localrecover
-; CHECK: call void @"\01??1Obj@@QEAA@XZ"
-; CHECK: invoke void @llvm.donothing()
-; CHECK: to label %[[SPLIT_LABEL:.+]] unwind label %[[LPAD_LABEL:.+]]
-;
-; CHECK: [[SPLIT_LABEL]]
-;
-; CHECK: [[LPAD_LABEL]]
-; CHECK: landingpad { i8*, i32 }
-; CHECK: cleanup
-; CHECK: unreachable
-; CHECK: }
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind
-declare void @"\01??1Obj@@QEAA@XZ"(%class.Obj*) #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #2
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind }
-attributes #3 = { noreturn }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 235214) (llvm/trunk 235213)"}
diff --git a/test/CodeGen/WinEH/cppeh-catch-scalar.ll b/test/CodeGen/WinEH/cppeh-catch-scalar.ll
deleted file mode 100644
index 3b5ab746d63c..000000000000
--- a/test/CodeGen/WinEH/cppeh-catch-scalar.ll
+++ /dev/null
@@ -1,126 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; void test()
-; {
-; try {
-; may_throw();
-; } catch (int i) {
-; handle_int(i);
-; }
-; }
-;
-; Parts of the IR have been hand-edited to simplify the test case.
-; The full IR will be restored when Windows C++ EH support is complete.
-
-;ModuleID = 'cppeh-catch-scalar.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-@_ZTIi = external constant i8*
-
-; The function entry will be rewritten like this.
-; CHECK: define void @_Z4testv()
-; CHECK: entry:
-; CHECK: [[I_PTR:\%.+]] = alloca i32, align 4
-; CHECK: call void (...) @llvm.localescape(i32* [[I_PTR]])
-; CHECK: invoke void @_Z9may_throwv()
-; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- %i = alloca i32, align 4
- invoke void @_Z9may_throwv()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*)
-; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
-; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont]
-
-lpad: ; preds = %entry
- %tmp = landingpad { i8*, i32 }
- catch i8* bitcast (i8** @_ZTIi to i8*)
- %tmp1 = extractvalue { i8*, i32 } %tmp, 0
- store i8* %tmp1, i8** %exn.slot
- %tmp2 = extractvalue { i8*, i32 } %tmp, 1
- store i32 %tmp2, i32* %ehselector.slot
- br label %catch.dispatch
-
-; CHECK-NOT: catch-dispatch:
-
-catch.dispatch: ; preds = %lpad
- %sel = load i32, i32* %ehselector.slot
- %tmp3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #3
- %matches = icmp eq i32 %sel, %tmp3
- br i1 %matches, label %catch, label %eh.resume
-
-; CHECK-NOT: catch:
-
-catch: ; preds = %catch.dispatch
- %exn11 = load i8*, i8** %exn.slot
- %i.i8 = bitcast i32* %i to i8*
- call void @llvm.eh.begincatch(i8* %exn11, i8* %i.i8) #3
- %tmp7 = load i32, i32* %i, align 4
- call void @_Z10handle_inti(i32 %tmp7)
- br label %invoke.cont2
-
-; CHECK-NOT: invoke.cont2:
-
-invoke.cont2: ; preds = %catch
- call void @llvm.eh.endcatch() #3
- br label %try.cont
-
-try.cont: ; preds = %invoke.cont2, %invoke.cont
- ret void
-
-; CHECK-NOT: eh.resume:
-
-eh.resume: ; preds = %catch.dispatch
- %exn3 = load i8*, i8** %exn.slot
- %sel4 = load i32, i32* %ehselector.slot
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0
- %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
- resume { i8*, i32 } %lpad.val5
-
-; CHECK: }
-}
-
-; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 0)
-; CHECK: [[I_PTR1:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK: [[TMP:\%.+]] = load i32, i32* [[I_PTR1]], align 4
-; CHECK: call void @_Z10handle_inti(i32 [[TMP]])
-; CHECK: ret i8* blockaddress(@_Z4testv, %try.cont)
-; CHECK: }
-
-declare void @_Z9may_throwv() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-declare void @llvm.eh.endcatch()
-
-declare void @_Z10handle_inti(i32) #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 (trunk 227474) (llvm/trunk 227508)"}
diff --git a/test/CodeGen/WinEH/cppeh-catch-unwind.ll b/test/CodeGen/WinEH/cppeh-catch-unwind.ll
deleted file mode 100644
index 8fdda9bbc02a..000000000000
--- a/test/CodeGen/WinEH/cppeh-catch-unwind.ll
+++ /dev/null
@@ -1,240 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test was generated from the following source:
-;
-; void test() {
-; try {
-; SomeClass obj;
-; may_throw();
-; try {
-; may_throw();
-; } catch (int) {
-; handle_exception();
-; }
-; } catch (int) {
-; handle_exception();
-; }
-; }
-;
-; The code above was compiled with the -O2 option.
-
-; ModuleID = 'catch-unwind.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%class.SomeClass = type { i8 }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-
-
-; CHECK-LABEL: define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: entry:
-; CHECK: [[OBJ_PTR:\%.+]] = alloca %class.SomeClass
-; CHECK: [[TMP0:\%.+]] = alloca i32, align 4
-; CHECK: [[TMP1:\%.+]] = alloca i32, align 4
-; CHECK: call void (...) @llvm.localescape(i32* [[TMP1]], %class.SomeClass* [[OBJ_PTR]], i32* [[TMP0]])
-; CHECK: %call = invoke %class.SomeClass* @"\01??0SomeClass@@QEAA@XZ"(%class.SomeClass* %obj)
-; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %obj = alloca %class.SomeClass, align 1
- %0 = alloca i32, align 4
- %1 = alloca i32, align 4
- %call = invoke %class.SomeClass* @"\01??0SomeClass@@QEAA@XZ"(%class.SomeClass* %obj)
- to label %invoke.cont unwind label %lpad
-
-; CHECK: invoke.cont:
-; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK: to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
-
-invoke.cont: ; preds = %entry
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont2 unwind label %lpad1
-
-; CHECK: invoke.cont2:
-; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK: to label %try.cont unwind label %[[LPAD3_LABEL:lpad[0-9]*]]
-
-invoke.cont2: ; preds = %invoke.cont
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %try.cont unwind label %lpad3
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK: [[LPAD_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
-; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont15]
-
-lpad: ; preds = %entry
- %2 = landingpad { i8*, i32 }
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
- %3 = extractvalue { i8*, i32 } %2, 0
- %4 = extractvalue { i8*, i32 } %2, 1
- br label %catch.dispatch7
-
-; CHECK: [[LPAD1_LABEL]]:{{[ ]+}}; preds = %invoke.cont
-; CHECK: [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT: [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test@@YAXXZ.cleanup", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
-; CHECK-NEXT: indirectbr i8* [[RECOVER1]], [label %try.cont15]
-
-lpad1: ; preds = %invoke.cont
- %5 = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
- %6 = extractvalue { i8*, i32 } %5, 0
- %7 = extractvalue { i8*, i32 } %5, 1
- br label %ehcleanup
-
-; CHECK: [[LPAD3_LABEL]]:{{[ ]+}}; preds = %invoke.cont2
-; CHECK: [[LPAD3_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT: [[RECOVER3:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1", i32 0, void (i8*, i8*)* @"\01?test@@YAXXZ.cleanup")
-; CHECK-NEXT: indirectbr i8* [[RECOVER3]], [label %try.cont, label %try.cont15]
-
-lpad3: ; preds = %invoke.cont2
- %8 = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
- %9 = extractvalue { i8*, i32 } %8, 0
- %10 = extractvalue { i8*, i32 } %8, 1
- %11 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
- %matches = icmp eq i32 %10, %11
- br i1 %matches, label %catch, label %ehcleanup
-
-; CHECK-NOT: catch:
-catch: ; preds = %lpad3
- %12 = bitcast i32* %0 to i8*
- call void @llvm.eh.begincatch(i8* %9, i8* %12) #3
- invoke void @"\01?handle_exception@@YAXXZ"()
- to label %invoke.cont6 unwind label %lpad5
-
-; CHECK-NOT: invoke.cont6:
-invoke.cont6: ; preds = %catch
- call void @llvm.eh.endcatch() #3
- br label %try.cont
-
-try.cont: ; preds = %invoke.cont2, %invoke.cont6
- call void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass* %obj) #3
- br label %try.cont15
-
-; CHECK-NOT: lpad5:
-lpad5: ; preds = %catch
- %13 = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
- %14 = extractvalue { i8*, i32 } %13, 0
- %15 = extractvalue { i8*, i32 } %13, 1
- call void @llvm.eh.endcatch() #3
- br label %ehcleanup
-
-; CHECK-NOT: ehcleanup
-ehcleanup: ; preds = %lpad5, %lpad3, %lpad1
- %exn.slot.0 = phi i8* [ %14, %lpad5 ], [ %9, %lpad3 ], [ %6, %lpad1 ]
- %ehselector.slot.0 = phi i32 [ %15, %lpad5 ], [ %10, %lpad3 ], [ %7, %lpad1 ]
- call void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass* %obj) #3
- br label %catch.dispatch7
-
-; CHECK-NOT: catch.dispatch7:
-catch.dispatch7: ; preds = %ehcleanup, %lpad
- %exn.slot.1 = phi i8* [ %exn.slot.0, %ehcleanup ], [ %3, %lpad ]
- %ehselector.slot.1 = phi i32 [ %ehselector.slot.0, %ehcleanup ], [ %4, %lpad ]
- %16 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
- %matches9 = icmp eq i32 %ehselector.slot.1, %16
- br i1 %matches9, label %catch10, label %eh.resume
-
-; CHECK-NOT: catch10:
-catch10: ; preds = %catch.dispatch7
- %17 = bitcast i32* %1 to i8*
- call void @llvm.eh.begincatch(i8* %exn.slot.1, i8* %17) #3
- call void @"\01?handle_exception@@YAXXZ"()
- br label %invoke.cont13
-
-; CHECK-NOT: invoke.cont13:
-invoke.cont13: ; preds = %catch10
- call void @llvm.eh.endcatch() #3
- br label %try.cont15
-
-try.cont15: ; preds = %invoke.cont13, %try.cont
- ret void
-
-; CHECK-NOT: eh.resume
-eh.resume: ; preds = %catch.dispatch7
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.1, 0
- %lpad.val18 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.1, 1
- resume { i8*, i32 } %lpad.val18
-
-; CHECK: }
-}
-
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_TMP1:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK: [[TMP1_PTR:\%.+]] = bitcast i8* [[RECOVER_TMP1]] to i32*
-; CHECK: call void @"\01?handle_exception@@YAXXZ"()
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont15)
-; CHECK: }
-
-; CHECK-LABEL: define internal void @"\01?test@@YAXXZ.cleanup"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_OBJ:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK: [[OBJ_PTR:\%.+]] = bitcast i8* %obj.i8 to %class.SomeClass*
-; CHECK: call void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass* [[OBJ_PTR]])
-; CHECK: ret void
-; CHECK: }
-
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch.1"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_TMP0:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
-; CHECK: [[TMP0_PTR:\%.+]] = bitcast i8* [[RECOVER_TMP0]] to i32*
-; CHECK: invoke void @"\01?handle_exception@@YAXXZ"()
-; CHECK: to label %invoke.cont6 unwind label %[[LPAD5_LABEL:lpad[0-9]+]]
-;
-; CHECK: invoke.cont6: ; preds = %entry
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
-;
-; CHECK: [[LPAD5_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK: [[LPAD5_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK: cleanup
-; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK: }
-
-declare %class.SomeClass* @"\01??0SomeClass@@QEAA@XZ"(%class.SomeClass* returned) #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-declare void @"\01?may_throw@@YAXXZ"() #1
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-declare void @"\01?handle_exception@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-; Function Attrs: nounwind
-declare void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass*) #4
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-attributes #4 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 232069) (llvm/trunk 232070)"}
diff --git a/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll b/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll
deleted file mode 100644
index 7e5f659f2a4f..000000000000
--- a/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll
+++ /dev/null
@@ -1,91 +0,0 @@
-; RUN: opt -winehprepare -S < %s | FileCheck %s
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-; Modified based on this code:
-; struct HasDtor {
-; ~HasDtor();
-; };
-; extern "C" void may_throw();
-; int main() {
-; try {
-; HasDtor o;
-; may_throw();
-; } catch (int) {
-; }
-; }
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchHandlerType = type { i32, i8* }
-%struct.HasDtor = type { i8 }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-
-define i32 @main() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %o = alloca %struct.HasDtor, align 1
- invoke void @may_throw()
- to label %invoke.cont2 unwind label %lpad1
-
-invoke.cont2: ; preds = %invoke.cont
- call void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* %o)
- br label %try.cont
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
- %1 = extractvalue { i8*, i32 } %0, 0
- %2 = extractvalue { i8*, i32 } %0, 1
- br label %catch.dispatch
-
-lpad1: ; preds = %invoke.cont
- %3 = landingpad { i8*, i32 }
- cleanup
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
- %4 = extractvalue { i8*, i32 } %3, 0
- %5 = extractvalue { i8*, i32 } %3, 1
- invoke void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* %o)
- to label %catch.dispatch unwind label %lpad
-
-catch.dispatch: ; preds = %lpad1, %lpad
- %exn.slot.0 = phi i8* [ %4, %lpad1 ], [ %1, %lpad ]
- %ehselector.slot.0 = phi i32 [ %5, %lpad1 ], [ %2, %lpad ]
- %6 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*))
- %matches = icmp eq i32 %ehselector.slot.0, %6
- br i1 %matches, label %catch, label %eh.resume
-
-catch: ; preds = %catch.dispatch
- call void @llvm.eh.begincatch(i8* %exn.slot.0, i8* null)
- call void @llvm.eh.endcatch()
- br label %try.cont
-
-try.cont: ; preds = %catch, %invoke.cont2
- ret i32 0
-
-eh.resume: ; preds = %catch.dispatch
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
- %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
- resume { i8*, i32 } %lpad.val5
-}
-
-; CHECK-LABEL: define i32 @main()
-; CHECK: @llvm.eh.actions(i32 0, void (i8*, i8*)* @main.cleanup, i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 -1, i8* (i8*, i8*)* @main.catch)
-
-; CHECK-LABEL: define internal void @main.cleanup(i8*, i8*)
-; CHECK: call void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* %{{.*}})
-; CHECK: ret void
-
-declare void @may_throw()
-
-declare i32 @__CxxFrameHandler3(...)
-
-declare void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor*)
-
-declare i32 @llvm.eh.typeid.for(i8*)
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture)
-declare void @llvm.eh.endcatch()
diff --git a/test/CodeGen/WinEH/cppeh-demote-liveout.ll b/test/CodeGen/WinEH/cppeh-demote-liveout.ll
deleted file mode 100644
index 309952bfc94b..000000000000
--- a/test/CodeGen/WinEH/cppeh-demote-liveout.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S < %s | FileCheck %s
-
-; Notionally based on this C++ source:
-; int liveout_catch(int p) {
-; int val = p + 1;
-; try {
-; might_throw();
-; } catch (int) {
-; val++;
-; }
-; return val;
-; }
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-declare void @llvm.eh.endcatch()
-declare void @might_throw()
-declare i32 @__CxxFrameHandler3(...)
-declare i32 @llvm.eh.typeid.for(i8*)
-
-@typeinfo.int = external global i32
-
-define i32 @liveout_catch(i32 %p) personality i32 (...)* @__CxxFrameHandler3 {
-entry:
- %val.entry = add i32 %p, 1
- invoke void @might_throw()
- to label %ret unwind label %lpad
-
-lpad:
- %ehvals = landingpad { i8*, i32 }
- cleanup
- catch i32* @typeinfo.int
- %ehptr = extractvalue { i8*, i32 } %ehvals, 0
- %sel = extractvalue { i8*, i32 } %ehvals, 1
- %int_sel = call i32 @llvm.eh.typeid.for(i8* bitcast (i32* @typeinfo.int to i8*))
- %match = icmp eq i32 %sel, %int_sel
- br i1 %match, label %catchit, label %resume
-
-catchit:
- call void @llvm.eh.begincatch(i8* %ehptr, i8* null)
- %val.lpad = add i32 %val.entry, 1
- call void @llvm.eh.endcatch()
- br label %ret
-
-ret:
- %rv = phi i32 [%val.entry, %entry], [%val.lpad, %catchit]
- ret i32 %rv
-
-resume:
- resume {i8*, i32} %ehvals
-}
-
-; CHECK-LABEL: define i32 @liveout_catch(i32 %p)
-; CHECK: %val.entry = add i32 %p, 1
-; CHECK-NEXT: store i32 %val.entry, i32* %val.entry.reg2mem
-; CHECK: invoke void @might_throw()
-;
-; CHECK: landingpad
-; CHECK: indirectbr i8* {{.*}}, [label %catchit.split]
-;
-; CHECK: catchit.split:
-; CHECK: load i32, i32* %val.lpad.reg2mem
-; CHECK: br label %ret
-;
-; CHECK: ret:
-; CHECK: %rv = phi i32 [ {{.*}}, %entry ], [ {{.*}}, %catchit.split ]
-; CHECK: ret i32
-
-; CHECK-LABEL: define internal i8* @liveout_catch.catch(i8*, i8*)
-; CHECK: %[[val:[^ ]*]] = load i32, i32*
-; CHECK-NEXT: %[[val_lpad:[^ ]*]] = add i32 %[[val]], 1
-; CHECK-NEXT: store i32 %[[val_lpad]], i32*
-; CHECK: ret i8* blockaddress(@liveout_catch, %catchit.split)
diff --git a/test/CodeGen/WinEH/cppeh-frame-vars.ll b/test/CodeGen/WinEH/cppeh-frame-vars.ll
deleted file mode 100644
index c2dbd8ecab60..000000000000
--- a/test/CodeGen/WinEH/cppeh-frame-vars.ll
+++ /dev/null
@@ -1,272 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; struct SomeData {
-; int a;
-; int b;
-; };
-;
-; void may_throw();
-; void does_not_throw(int i);
-; void dump(int *, int, SomeData&);
-;
-; void test() {
-; int NumExceptions = 0;
-; int ExceptionVal[10];
-; SomeData Data = { 0, 0 };
-;
-; for (int i = 0; i < 10; ++i) {
-; try {
-; may_throw();
-; Data.a += i;
-; }
-; catch (int e) {
-; ExceptionVal[NumExceptions] = e;
-; ++NumExceptions;
-; if (e == i)
-; Data.b += e;
-; else
-; Data.a += e;
-; }
-; does_not_throw(NumExceptions);
-; }
-; dump(ExceptionVal, NumExceptions, Data);
-; }
-
-; ModuleID = 'cppeh-frame-vars.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%struct.SomeData = type { i32, i32 }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-
-; The function entry should be rewritten like this.
-; CHECK: define void @"\01?test@@YAXXZ"()
-; CHECK: entry:
-; CHECK: [[NUMEXCEPTIONS_PTR:\%.+]] = alloca i32, align 4
-; CHECK: [[EXCEPTIONVAL_PTR:\%.+]] = alloca [10 x i32], align 16
-; CHECK: [[DATA_PTR:\%.+]] = alloca %struct.SomeData, align 4
-; CHECK: [[I_PTR:\%.+]] = alloca i32, align 4
-; CHECK: [[E_PTR:\%.+]] = alloca i32, align 4
-; CHECK: store i32 0, i32* [[NUMEXCEPTIONS_PTR]], align 4
-; CHECK: [[TMP:\%.+]] = bitcast %struct.SomeData* [[DATA_PTR]] to i8*
-; CHECK: call void @llvm.memset(i8* [[TMP]], i8 0, i64 8, i32 4, i1 false)
-; CHECK: store i32 0, i32* [[I_PTR]], align 4
-; CHECK: call void (...) @llvm.localescape(i32* [[E_PTR]], i32* [[NUMEXCEPTIONS_PTR]], [10 x i32]* [[EXCEPTIONVAL_PTR]], i32* [[I_PTR]], %struct.SomeData* [[DATA_PTR]])
-; CHECK: br label %for.cond
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %NumExceptions = alloca i32, align 4
- %ExceptionVal = alloca [10 x i32], align 16
- %Data = alloca %struct.SomeData, align 4
- %i = alloca i32, align 4
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- %e = alloca i32, align 4
- store i32 0, i32* %NumExceptions, align 4
- %tmp = bitcast %struct.SomeData* %Data to i8*
- call void @llvm.memset(i8* %tmp, i8 0, i64 8, i32 4, i1 false)
- store i32 0, i32* %i, align 4
- br label %for.cond
-
-for.cond: ; preds = %for.inc, %entry
- %tmp1 = load i32, i32* %i, align 4
- %cmp = icmp slt i32 %tmp1, 10
- br i1 %cmp, label %for.body, label %for.end
-
-; CHECK: for.body:
-; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-for.body: ; preds = %for.cond
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %for.body
- %tmp2 = load i32, i32* %i, align 4
- %a = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 0
- %tmp3 = load i32, i32* %a, align 4
- %add = add nsw i32 %tmp3, %tmp2
- store i32 %add, i32* %a, align 4
- br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %for.body
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
-; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont]
-
-lpad: ; preds = %for.body
- %tmp4 = landingpad { i8*, i32 }
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
- %tmp5 = extractvalue { i8*, i32 } %tmp4, 0
- store i8* %tmp5, i8** %exn.slot
- %tmp6 = extractvalue { i8*, i32 } %tmp4, 1
- store i32 %tmp6, i32* %ehselector.slot
- br label %catch.dispatch
-
-; CHECK-NOT: catch.dispatch:
-
-catch.dispatch: ; preds = %lpad
- %sel = load i32, i32* %ehselector.slot
- %tmp7 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #1
- %matches = icmp eq i32 %sel, %tmp7
- br i1 %matches, label %catch, label %eh.resume
-
-; CHECK-NOT: catch:
-
-catch: ; preds = %catch.dispatch
- %exn = load i8*, i8** %exn.slot
- %e.i8 = bitcast i32* %e to i8*
- call void @llvm.eh.begincatch(i8* %exn, i8* %e.i8) #1
- %tmp11 = load i32, i32* %e, align 4
- %tmp12 = load i32, i32* %NumExceptions, align 4
- %idxprom = sext i32 %tmp12 to i64
- %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i32 0, i64 %idxprom
- store i32 %tmp11, i32* %arrayidx, align 4
- %tmp13 = load i32, i32* %NumExceptions, align 4
- %inc = add nsw i32 %tmp13, 1
- store i32 %inc, i32* %NumExceptions, align 4
- %tmp14 = load i32, i32* %e, align 4
- %tmp15 = load i32, i32* %i, align 4
- %cmp1 = icmp eq i32 %tmp14, %tmp15
- br i1 %cmp1, label %if.then, label %if.else
-
-; CHECK-NOT: if.then:
-
-if.then: ; preds = %catch
- %tmp16 = load i32, i32* %e, align 4
- %b = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 1
- %tmp17 = load i32, i32* %b, align 4
- %add2 = add nsw i32 %tmp17, %tmp16
- store i32 %add2, i32* %b, align 4
- br label %if.end
-
-; CHECK-NOT: if.else:
-
-if.else: ; preds = %catch
- %tmp18 = load i32, i32* %e, align 4
- %a3 = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 0
- %tmp19 = load i32, i32* %a3, align 4
- %add4 = add nsw i32 %tmp19, %tmp18
- store i32 %add4, i32* %a3, align 4
- br label %if.end
-
-; CHECK-NOT: if.end:
-
-if.end: ; preds = %if.else, %if.then
- call void @llvm.eh.endcatch() #1
- br label %try.cont
-
-try.cont: ; preds = %if.end, %invoke.cont
- %tmp20 = load i32, i32* %NumExceptions, align 4
- call void @"\01?does_not_throw@@YAXH@Z"(i32 %tmp20)
- br label %for.inc
-
-for.inc: ; preds = %try.cont
- %tmp21 = load i32, i32* %i, align 4
- %inc5 = add nsw i32 %tmp21, 1
- store i32 %inc5, i32* %i, align 4
- br label %for.cond
-
-for.end: ; preds = %for.cond
- %tmp22 = load i32, i32* %NumExceptions, align 4
- %arraydecay = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i32 0, i32 0
- call void @"\01?dump@@YAXPEAHHAEAUSomeData@@@Z"(i32* %arraydecay, i32 %tmp22, %struct.SomeData* dereferenceable(8) %Data)
- ret void
-
-; CHECK-NOT: eh.resume:
-
-eh.resume: ; preds = %catch.dispatch
- %exn6 = load i8*, i8** %exn.slot
- %sel7 = load i32, i32* %ehselector.slot
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn6, 0
- %lpad.val8 = insertvalue { i8*, i32 } %lpad.val, i32 %sel7, 1
- resume { i8*, i32 } %lpad.val8
-
-; CHECK: }
-}
-
-; The following catch handler should be outlined.
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_E:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK: [[E_PTR1:\%.+]] = bitcast i8* [[RECOVER_E]] to i32*
-; CHECK: [[RECOVER_NUMEXCEPTIONS:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK: [[NUMEXCEPTIONS_PTR1:\%.+]] = bitcast i8* [[RECOVER_NUMEXCEPTIONS]] to i32*
-; CHECK: [[RECOVER_EXCEPTIONVAL:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
-; CHECK: [[EXCEPTIONVAL_PTR1:\%.+]] = bitcast i8* [[RECOVER_EXCEPTIONVAL]] to [10 x i32]*
-; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 3)
-; CHECK: [[I_PTR1:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK: [[RECOVER_DATA:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 4)
-; CHECK: [[DATA_PTR1:\%.+]] = bitcast i8* [[RECOVER_DATA]] to %struct.SomeData*
-; CHECK: [[TMP:\%.+]] = load i32, i32* [[E_PTR1]], align 4
-; CHECK: [[TMP1:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_PTR]], align 4
-; CHECK: [[IDXPROM:\%.+]] = sext i32 [[TMP1]] to i64
-; CHECK: [[ARRAYIDX:\%.+]] = getelementptr inbounds [10 x i32], [10 x i32]* [[EXCEPTIONVAL_PTR1]], i32 0, i64 [[IDXPROM]]
-; CHECK: store i32 [[TMP]], i32* [[ARRAYIDX]], align 4
-; CHECK: [[TMP2:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_PTR1]], align 4
-; CHECK: [[INC:\%.+]] = add nsw i32 [[TMP2]], 1
-; CHECK: store i32 [[INC]], i32* [[NUMEXCEPTIONS_PTR]], align 4
-; CHECK: [[TMP3:\%.+]] = load i32, i32* [[E_PTR1]], align 4
-; CHECK: [[TMP4:\%.+]] = load i32, i32* [[I_PTR1]], align 4
-; CHECK: [[CMP:\%.+]] = icmp eq i32 [[TMP3]], [[TMP4]]
-; CHECK: br i1 [[CMP]], label %if.then, label %if.else
-;
-; CHECK: if.then: ; preds = %entry
-; CHECK: [[TMP5:\%.+]] = load i32, i32* [[E_PTR1]], align 4
-; CHECK: [[B_PTR:\%.+]] = getelementptr inbounds %struct.SomeData, %struct.SomeData* [[DATA_PTR1]], i32 0, i32 1
-; CHECK: [[TMP6:\%.+]] = load i32, i32* [[B_PTR]], align 4
-; CHECK: %add2 = add nsw i32 [[TMP6]], [[TMP5]]
-; CHECK: store i32 [[ADD:\%.+]], i32* [[B_PTR]], align 4
-; CHECK: br label %if.end
-;
-; CHECK: if.else: ; preds = %entry
-; CHECK: [[TMP7:\%.+]] = load i32, i32* %e, align 4
-; CHECK: [[A3:\%.+]] = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 0
-; CHECK: [[TMP8:\%.+]] = load i32, i32* %a3, align 4
-; CHECK: [[ADD1:\%.+]] = add nsw i32 [[TMP8]], [[TMP7]]
-; CHECK: store i32 [[ADD1]], i32* [[A3]], align 4
-; CHECK: br label %if.end
-;
-; CHECK: if.end: ; preds = %if.else, %if.then
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
-; CHECK: }
-
-
-; Function Attrs: nounwind
-declare void @llvm.memset(i8* nocapture, i8, i64, i32, i1) #1
-
-declare void @"\01?may_throw@@YAXXZ"() #2
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #3
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-declare void @llvm.eh.endcatch()
-
-declare void @"\01?does_not_throw@@YAXH@Z"(i32) #2
-
-declare void @"\01?dump@@YAXPEAHHAEAUSomeData@@@Z"(i32*, i32, %struct.SomeData* dereferenceable(8)) #2
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 228868)"}
diff --git a/test/CodeGen/WinEH/cppeh-inalloca.ll b/test/CodeGen/WinEH/cppeh-inalloca.ll
deleted file mode 100644
index 649c5e72e2dd..000000000000
--- a/test/CodeGen/WinEH/cppeh-inalloca.ll
+++ /dev/null
@@ -1,194 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is built from the following code:
-; struct A {
-; A(int a);
-; A(const A &o);
-; ~A();
-; int a;
-; };
-;
-; void may_throw();
-;
-; int test(A a) {
-; try {
-; may_throw();
-; }
-; catch (int e) {
-; return a.a + e;
-; }
-; return 0;
-; }
-;
-; The test was built for a 32-bit Windows target and then the reference to
-; the inalloca instruction was manually sunk into the landingpad.
-
-; ModuleID = 'cppeh-inalloca.cpp'
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%struct.A = type { i32 }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-
-; The function entry should be rewritten like this.
-; CHECK: define i32 @"\01?test@@YAHUA@@@Z"(<{ %struct.A }>* inalloca)
-; CHECK: entry:
-; CHECK: [[TMP_REGMEM:\%.+]] = alloca <{ %struct.A }>*
-; CHECK: store <{ %struct.A }>* %0, <{ %struct.A }>** [[TMP_REGMEM]]
-; CHECK: [[RETVAL:\%.+]] = alloca i32, align 4
-; CHECK: [[E_PTR:\%.+]] = alloca i32, align 4
-; CHECK: [[CLEANUP_SLOT:\%.+]] = alloca i32
-; CHECK: call void (...) @llvm.localescape(i32* %e, <{ %struct.A }>** [[TMP_REGMEM]], i32* [[RETVAL]], i32* [[CLEANUP_SLOT]])
-; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-define i32 @"\01?test@@YAHUA@@@Z"(<{ %struct.A }>* inalloca) #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %retval = alloca i32, align 4
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- %e = alloca i32, align 4
- %cleanup.dest.slot = alloca i32
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT: [[RECOVER:\%recover.*]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAHUA@@@Z.catch", i32 0, void (i8*, i8*)* @"\01?test@@YAHUA@@@Z.cleanup")
-; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %cleanup]
-
-lpad: ; preds = %entry
- %1 = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
- %2 = extractvalue { i8*, i32 } %1, 0
- store i8* %2, i8** %exn.slot
- %3 = extractvalue { i8*, i32 } %1, 1
- store i32 %3, i32* %ehselector.slot
- br label %catch.dispatch
-
-; CHECK-NOT: catch.dispatch:
-
-catch.dispatch: ; preds = %lpad
- %sel = load i32, i32* %ehselector.slot
- %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
- %matches = icmp eq i32 %sel, %4
- br i1 %matches, label %catch, label %ehcleanup
-
-; CHECK-NOT: catch:
-
-catch: ; preds = %catch.dispatch
- %exn = load i8*, i8** %exn.slot
- %e.i8 = bitcast i32* %e to i8*
- call void @llvm.eh.begincatch(i8* %exn, i8* %e.i8) #3
- %a = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* %0, i32 0, i32 0
- %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0
- %tmp8 = load i32, i32* %a1, align 4
- %tmp9 = load i32, i32* %e, align 4
- %add = add nsw i32 %tmp8, %tmp9
- store i32 %add, i32* %retval
- store i32 1, i32* %cleanup.dest.slot
- call void @llvm.eh.endcatch() #3
- br label %cleanup
-
-try.cont: ; preds = %invoke.cont
- store i32 0, i32* %retval
- store i32 1, i32* %cleanup.dest.slot
- br label %cleanup
-
-; The cleanup block should be re-written like this.
-; CHECK: cleanup:{{[ ]+}}; preds = %[[LPAD_LABEL]], %try.cont
-; CHECK: %a2 = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* %0, i32 0, i32 0
-; CHECK: call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %a2)
-; CHECK: [[TMP1:\%.+]] = load i32, i32* [[RETVAL]]
-; CHECK: ret i32 [[TMP1]]
-
-cleanup: ; preds = %try.cont, %catch
- %a2 = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* %0, i32 0, i32 0
- call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %a2) #3
- %tmp10 = load i32, i32* %retval
- ret i32 %tmp10
-
-; CHECK-NOT: ehcleanup:
-
-ehcleanup: ; preds = %catch.dispatch
- %a3 = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* %0, i32 0, i32 0
- call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %a3) #3
- br label %eh.resume
-
-; CHECK-NOT: eh.resume:
-
-eh.resume: ; preds = %ehcleanup
- %exn2 = load i8*, i8** %exn.slot
- %sel3 = load i32, i32* %ehselector.slot
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn2, 0
- %lpad.val4 = insertvalue { i8*, i32 } %lpad.val, i32 %sel3, 1
- resume { i8*, i32 } %lpad.val4
-
-; CHECK: }
-}
-
-; The following catch handler should be outlined.
-; CHECK: define internal i8* @"\01?test@@YAHUA@@@Z.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_E:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 0)
-; CHECK: [[E_PTR:\%.+]] = bitcast i8* [[RECOVER_E]] to i32*
-; CHECK: [[RECOVER_EH_TEMP:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 1)
-; CHECK: [[EH_TEMP:\%.+]] = bitcast i8* [[RECOVER_EH_TEMP]] to <{ %struct.A }>**
-; CHECK: [[RECOVER_RETVAL:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 2)
-; CHECK: [[RETVAL1:\%.+]] = bitcast i8* [[RECOVER_RETVAL]] to i32*
-; CHECK: [[RECOVER_CLEANUPSLOT:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 3)
-; CHECK: [[CLEANUPSLOT1:\%.+]] = bitcast i8* [[RECOVER_CLEANUPSLOT]] to i32*
-; CHECK: [[E_I8PTR:\%.+]] = bitcast i32* [[E_PTR]] to i8*
-; CHECK: [[TMP_RELOAD:\%.+]] = load <{ %struct.A }>*, <{ %struct.A }>** [[EH_TEMP]]
-; CHECK: [[RECOVER_A:\%.+]] = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* [[TMP_RELOAD]], i32 0, i32 0
-; CHECK: [[A1:\%.+]] = getelementptr inbounds %struct.A, %struct.A* [[RECOVER_A]], i32 0, i32 0
-; CHECK: [[TMP2:\%.+]] = load i32, i32* [[A1]], align 4
-; CHECK: [[TMP3:\%.+]] = load i32, i32* [[E_PTR]], align 4
-; CHECK: [[ADD:\%.+]] = add nsw i32 [[TMP2]], [[TMP3]]
-; CHECK: store i32 [[ADD]], i32* [[RETVAL1]]
-; CHECK: store i32 1, i32* [[CLEANUPSLOT1]]
-; CHECK: ret i8* blockaddress(@"\01?test@@YAHUA@@@Z", %cleanup)
-; CHECK: }
-
-; The following cleanup handler should be outlined.
-; CHECK: define internal void @"\01?test@@YAHUA@@@Z.cleanup"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_EH_TEMP1:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 1)
-; CHECK: [[EH_TEMP1:\%.+]] = bitcast i8* [[RECOVER_EH_TEMP]] to <{ %struct.A }>**
-; CHECK: [[TMP_RELOAD1:\%.+]] = load <{ %struct.A }>*, <{ %struct.A }>** [[EH_TEMP1]]
-; CHECK: [[A3:\%.+]] = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* [[TMP_RELOAD1]], i32 0, i32 0
-; CHECK: call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* [[A3]])
-; CHECK: ret void
-; CHECK: }
-
-declare void @"\01?may_throw@@YAXXZ"() #0
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #1
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-declare void @llvm.eh.endcatch()
-
-; Function Attrs: nounwind
-declare x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A*) #2
-
-attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 (trunk 228868)"}
diff --git a/test/CodeGen/WinEH/cppeh-min-unwind.ll b/test/CodeGen/WinEH/cppeh-min-unwind.ll
deleted file mode 100644
index 98d6d6fcacb6..000000000000
--- a/test/CodeGen/WinEH/cppeh-min-unwind.ll
+++ /dev/null
@@ -1,99 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test was generated from the following source:
-;
-; class SomeClass {
-; public:
-; SomeClass();
-; ~SomeClass();
-; };
-;
-; void test() {
-; SomeClass obj;
-; may_throw();
-; }
-
-
-; ModuleID = 'min-unwind.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%class.SomeClass = type { [28 x i32] }
-
-; The function entry should be rewritten like this.
-; CHECK: define void @_Z4testv()
-; CHECK: entry:
-; CHECK: [[OBJ_PTR:\%.+]] = alloca %class.SomeClass, align 4
-; CHECK: call void @_ZN9SomeClassC1Ev(%class.SomeClass* [[OBJ_PTR]])
-; CHECK: call void (...) @llvm.localescape(%class.SomeClass* [[OBJ_PTR]])
-; CHECK: invoke void @_Z9may_throwv()
-; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %obj = alloca %class.SomeClass, align 4
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- call void @_ZN9SomeClassC1Ev(%class.SomeClass* %obj)
- invoke void @_Z9may_throwv()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- call void @_ZN9SomeClassD1Ev(%class.SomeClass* %obj)
- ret void
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @_Z4testv.cleanup)
-; CHECK-NEXT: indirectbr i8* [[RECOVER]], []
-
-lpad: ; preds = %entry
- %tmp = landingpad { i8*, i32 }
- cleanup
- %tmp1 = extractvalue { i8*, i32 } %tmp, 0
- store i8* %tmp1, i8** %exn.slot
- %tmp2 = extractvalue { i8*, i32 } %tmp, 1
- store i32 %tmp2, i32* %ehselector.slot
- call void @_ZN9SomeClassD1Ev(%class.SomeClass* %obj)
- br label %eh.resume
-
-; CHECK-NOT: eh.resume:
-
-eh.resume: ; preds = %lpad
- %exn = load i8*, i8** %exn.slot
- %sel = load i32, i32* %ehselector.slot
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
- %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
- resume { i8*, i32 } %lpad.val2
-
-; CHECK: }
-}
-
-; This cleanup handler should be outlined.
-; CHECK: define internal void @_Z4testv.cleanup(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_OBJ:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 0)
-; CHECK: [[OBJ_PTR1:\%.+]] = bitcast i8* [[RECOVER_OBJ]] to %class.SomeClass*
-; CHECK: call void @_ZN9SomeClassD1Ev(%class.SomeClass* [[OBJ_PTR1]])
-; CHECK: ret void
-; CHECK: }
-
-declare void @_ZN9SomeClassC1Ev(%class.SomeClass*) #1
-
-declare void @_Z9may_throwv() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-declare void @_ZN9SomeClassD1Ev(%class.SomeClass*) #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { noinline noreturn nounwind }
-attributes #3 = { noreturn nounwind }
-attributes #4 = { nounwind }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 (trunk 226027)"}
diff --git a/test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll b/test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll
deleted file mode 100644
index c69633f17e28..000000000000
--- a/test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll
+++ /dev/null
@@ -1,106 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; void test()
-; {
-; try {
-; Obj o;
-; may_throw();
-; } catch (...) {
-; }
-; }
-;
-; The purpose of this test is to verify that we create separate catch and
-; cleanup handlers. When compiling for the C++ 11 standard, this isn't
-; strictly necessary, since calling the destructor from the catch handler
-; would be logically equivalent to calling it from a cleanup handler.
-; However, if the -std=c++98 option is used, an exception in the cleanup
-; code should terminate the process (the MSVCRT runtime will do that) but
-; if the destructor is called from the catch handler, it wouldn't terminate
-; the process
-
-
-; ModuleID = 'cppeh-mixed-catch-and-cleanup.cpp'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%class.Obj = type { i8 }
-
-; This just verifies that the function was processed by WinEHPrepare.
-;
-; CHECK-LABEL: define void @"\01?test@@YAXXZ"()
-; CHECK: entry:
-; CHECK: call void (...) @llvm.localescape
-; CHECK: }
-
-; Function Attrs: nounwind uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %o = alloca %class.Obj, align 1
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- call void @"\01??1Obj@@QEAA@XZ"(%class.Obj* %o) #3
- br label %try.cont
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* null
- %1 = extractvalue { i8*, i32 } %0, 0
- store i8* %1, i8** %exn.slot
- %2 = extractvalue { i8*, i32 } %0, 1
- store i32 %2, i32* %ehselector.slot
- call void @"\01??1Obj@@QEAA@XZ"(%class.Obj* %o) #3
- %exn = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn, i8* null) #3
- call void @llvm.eh.endcatch() #3
- br label %try.cont
-
-try.cont: ; preds = %catch, %invoke.cont
- ret void
-}
-
-; Verify that a cleanup handler was created and that it calls ~Obj().
-; CHECK-LABEL: define internal void @"\01?test@@YAXXZ.cleanup"(i8*, i8*)
-; CHECK: entry:
-; CHECK: @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK: call void @"\01??1Obj@@QEAA@XZ"
-; CHECK: ret void
-; CHECK: }
-
-; Verify that a catch handler was created and that it does not call ~Obj().
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK-NOT: call void @"\01??1Obj@@QEAA@XZ"
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
-; CHECK: }
-
-
-
-declare void @"\01?may_throw@@YAXXZ"() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind
-declare void @"\01??1Obj@@QEAA@XZ"(%class.Obj*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 235779) (llvm/trunk 235769)"}
diff --git a/test/CodeGen/WinEH/cppeh-multi-catch.ll b/test/CodeGen/WinEH/cppeh-multi-catch.ll
deleted file mode 100644
index 266cdea20cdb..000000000000
--- a/test/CodeGen/WinEH/cppeh-multi-catch.ll
+++ /dev/null
@@ -1,226 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; void test()
-; {
-; try {
-; may_throw();
-; } catch (int i) {
-; handle_int(i);
-; } catch (long long ll) {
-; handle_long_long(ll);
-; } catch (SomeClass &obj) {
-; handle_obj(&obj);
-; } catch (...) {
-; handle_exception();
-; }
-; }
-;
-; The catch handlers were edited to insert 'ret void' after the endcatch call.
-
-; ModuleID = 'catch-with-type.cpp'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.HandlerMapEntry = type { i32, i32 }
-%rtti.TypeDescriptor3 = type { i8**, i8*, [4 x i8] }
-%rtti.TypeDescriptor15 = type { i8**, i8*, [16 x i8] }
-%class.SomeClass = type { i8 }
-
-$"\01??_R0H@8" = comdat any
-
-$"\01??_R0_J@8" = comdat any
-
-$"\01??_R0?AVSomeClass@@@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@__ImageBase = external constant i8
-@llvm.eh.handlermapentry.H = private unnamed_addr constant %eh.HandlerMapEntry { i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section "llvm.metadata"
-@"\01??_R0_J@8" = linkonce_odr global %rtti.TypeDescriptor3 { i8** @"\01??_7type_info@@6B@", i8* null, [4 x i8] c"._J\00" }, comdat
-@llvm.eh.handlermapentry._J = private unnamed_addr constant %eh.HandlerMapEntry { i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor3* @"\01??_R0_J@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section "llvm.metadata"
-@"\01??_R0?AVSomeClass@@@8" = linkonce_odr global %rtti.TypeDescriptor15 { i8** @"\01??_7type_info@@6B@", i8* null, [16 x i8] c".?AVSomeClass@@\00" }, comdat
-@"llvm.eh.handlermapentry.reference.?AVSomeClass@@" = private unnamed_addr constant %eh.HandlerMapEntry { i32 8, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor15* @"\01??_R0?AVSomeClass@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section "llvm.metadata"
-
-
-; CHECK: define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: entry:
-; CHECK: [[OBJ_PTR:\%.+]] = alloca %class.SomeClass*, align 8
-; CHECK: [[LL_PTR:\%.+]] = alloca i64, align 8
-; CHECK: [[I_PTR:\%.+]] = alloca i32, align 4
-; CHECK: call void (...) @llvm.localescape(i32* [[I_PTR]], i64* [[LL_PTR]], %class.SomeClass** [[OBJ_PTR]])
-; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- %obj = alloca %class.SomeClass*, align 8
- %ll = alloca i64, align 8
- %i = alloca i32, align 4
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry.H
-; CHECK-NEXT: catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry._J
-; CHECK-NEXT: catch %eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@"
-; CHECK-NEXT: catch i8* null
-; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(
-; CHECK-SAME: i32 1, i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry.H to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch",
-; CHECK-SAME: i32 1, i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry._J to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1",
-; CHECK-SAME: i32 1, i8* bitcast (%eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.2",
-; CHECK-SAME: i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.3")
-; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %ret]
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry.H
- catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry._J
- catch %eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@"
- catch i8* null
- %1 = extractvalue { i8*, i32 } %0, 0
- store i8* %1, i8** %exn.slot
- %2 = extractvalue { i8*, i32 } %0, 1
- store i32 %2, i32* %ehselector.slot
- br label %catch.dispatch
-
-; CHECK-NOT: catch.dispatch:
-catch.dispatch: ; preds = %lpad
- %sel = load i32, i32* %ehselector.slot
- %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry.H to i8*)) #3
- %matches = icmp eq i32 %sel, %3
- br i1 %matches, label %catch14, label %catch.fallthrough
-
-ret:
- ret void
-
-; CHECK-NOT: catch14:
-; CHECK: ret:
-; CHECK-NEXT: ret void
-catch14: ; preds = %catch.dispatch
- %exn15 = load i8*, i8** %exn.slot
- %4 = bitcast i32* %i to i8*
- call void @llvm.eh.begincatch(i8* %exn15, i8* %4) #3
- %5 = load i32, i32* %i, align 4
- call void @"\01?handle_int@@YAXH@Z"(i32 %5)
- call void @llvm.eh.endcatch() #3
- br label %ret
-
-try.cont: ; preds = %invoke.cont
- br label %ret
-
-; CHECK-NOT: catch.fallthrough:
-catch.fallthrough: ; preds = %catch.dispatch
- %6 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry._J to i8*)) #3
- %matches1 = icmp eq i32 %sel, %6
- br i1 %matches1, label %catch10, label %catch.fallthrough2
-
-; CHECK-NOT: catch10:
-catch10: ; preds = %catch.fallthrough
- %exn11 = load i8*, i8** %exn.slot
- %7 = bitcast i64* %ll to i8*
- call void @llvm.eh.begincatch(i8* %exn11, i8* %7) #3
- %8 = load i64, i64* %ll, align 8
- call void @"\01?handle_long_long@@YAX_J@Z"(i64 %8)
- call void @llvm.eh.endcatch() #3
- br label %ret
-
-; CHECK-NOT: catch.fallthrough2:
-catch.fallthrough2: ; preds = %catch.fallthrough
- %9 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@" to i8*)) #3
- %matches3 = icmp eq i32 %sel, %9
- br i1 %matches3, label %catch6, label %catch
-
-; CHECK-NOT: catch6:
-catch6: ; preds = %catch.fallthrough2
- %exn7 = load i8*, i8** %exn.slot
- %10 = bitcast %class.SomeClass** %obj to i8*
- call void @llvm.eh.begincatch(i8* %exn7, i8* %10) #3
- %11 = load %class.SomeClass*, %class.SomeClass** %obj, align 8
- call void @"\01?handle_obj@@YAXPEAVSomeClass@@@Z"(%class.SomeClass* %11)
- call void @llvm.eh.endcatch() #3
- br label %ret
-
-; CHECK-NOT: catch:
-catch: ; preds = %catch.fallthrough2
- %exn = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn, i8* null) #3
- call void @"\01?handle_exception@@YAXXZ"() call void @llvm.eh.endcatch() #3
- br label %ret
-; CHECK: }
-}
-
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK: [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK: [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
-; CHECK: call void @"\01?handle_int@@YAXH@Z"(i32 [[TMP1]])
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %ret)
-; CHECK: }
-
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch.1"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_LL:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK: [[LL_PTR:\%.+]] = bitcast i8* [[RECOVER_LL]] to i64*
-; CHECK: [[TMP2:\%.+]] = load i64, i64* [[LL_PTR]], align 8
-; CHECK: call void @"\01?handle_long_long@@YAX_J@Z"(i64 [[TMP2]])
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %ret)
-; CHECK: }
-
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch.2"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_OBJ:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
-; CHECK: [[OBJ_PTR:\%.+]] = bitcast i8* [[RECOVER_OBJ]] to %class.SomeClass**
-; CHECK: [[TMP3:\%.+]] = load %class.SomeClass*, %class.SomeClass** [[OBJ_PTR]], align 8
-; CHECK: call void @"\01?handle_obj@@YAXPEAVSomeClass@@@Z"(%class.SomeClass* [[TMP3]])
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %ret)
-; CHECK: }
-
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch.3"(i8*, i8*)
-; CHECK: entry:
-; CHECK: call void @"\01?handle_exception@@YAXXZ"()
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %ret)
-; CHECK: }
-
-
-declare void @"\01?may_throw@@YAXXZ"() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-declare void @"\01?handle_exception@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-declare void @"\01?handle_obj@@YAXPEAVSomeClass@@@Z"(%class.SomeClass*) #1
-
-declare void @"\01?handle_long_long@@YAX_J@Z"(i64) #1
-
-declare void @"\01?handle_int@@YAXH@Z"(i32) #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 233155) (llvm/trunk 233153)"}
diff --git a/test/CodeGen/WinEH/cppeh-nested-1.ll b/test/CodeGen/WinEH/cppeh-nested-1.ll
deleted file mode 100644
index d525d8a1a67e..000000000000
--- a/test/CodeGen/WinEH/cppeh-nested-1.ll
+++ /dev/null
@@ -1,194 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-;void test()
-;{
-; try {
-; try {
-; may_throw();
-; } catch (int i) {
-; handle_int(i);
-; }
-; } catch (float f) {
-; handle_float(f);
-; }
-; done();
-;}
-
-; ModuleID = 'cppeh-nested-1.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-
-$"\01??_R0M@8" = comdat any
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0M@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".M\00" }, comdat
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-
-; CHECK: define void @"\01?test@@YAXXZ"()
-; CHECK: entry:
-; CHECK: %i = alloca i32, align 4
-; CHECK: %f = alloca float, align 4
-; CHECK: call void (...) @llvm.localescape(float* %f, i32* %i)
-; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- %i = alloca i32, align 4
- %f = alloca float, align 4
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:
-; CHECK: landingpad { i8*, i32 }
-; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; CHECK: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
-; CHECK: indirectbr i8* [[RECOVER]], [label %try.cont, label %try.cont10]
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
- %1 = extractvalue { i8*, i32 } %0, 0
- store i8* %1, i8** %exn.slot
- %2 = extractvalue { i8*, i32 } %0, 1
- store i32 %2, i32* %ehselector.slot
- br label %catch.dispatch
-
-; CHECK-NOT: catch.dispatch:
-catch.dispatch: ; preds = %lpad
- %sel = load i32, i32* %ehselector.slot
- %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
- %matches = icmp eq i32 %sel, %3
- br i1 %matches, label %catch, label %catch.dispatch3
-
-; CHECK-NOT: catch:
-catch: ; preds = %catch.dispatch
- %exn = load i8*, i8** %exn.slot
- %4 = bitcast i32* %i to i8*
- call void @llvm.eh.begincatch(i8* %exn, i8* %4) #3
- %5 = load i32, i32* %i, align 4
- invoke void @"\01?handle_int@@YAXH@Z"(i32 %5)
- to label %invoke.cont2 unwind label %lpad1
-
-; CHECK-NOT: invoke.cont2:
-invoke.cont2: ; preds = %catch
- call void @llvm.eh.endcatch() #3
- br label %try.cont
-
-try.cont: ; preds = %invoke.cont2, %invoke.cont
- br label %try.cont10
-
-; CHECK-NOT: lpad1:
-lpad1: ; preds = %catch
- %6 = landingpad { i8*, i32 }
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
- %7 = extractvalue { i8*, i32 } %6, 0
- store i8* %7, i8** %exn.slot
- %8 = extractvalue { i8*, i32 } %6, 1
- store i32 %8, i32* %ehselector.slot
- call void @llvm.eh.endcatch() #3
- br label %catch.dispatch3
-
-; CHECK-NOT: catch.dispatch3:
-catch.dispatch3: ; preds = %lpad1, %catch.dispatch
- %sel4 = load i32, i32* %ehselector.slot
- %9 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)) #3
- %matches5 = icmp eq i32 %sel4, %9
- br i1 %matches5, label %catch6, label %eh.resume
-
-; CHECK-NOT: catch6:
-catch6: ; preds = %catch.dispatch3
- %exn7 = load i8*, i8** %exn.slot
- %10 = bitcast float* %f to i8*
- call void @llvm.eh.begincatch(i8* %exn7, i8* %10) #3
- %11 = load float, float* %f, align 4
- call void @"\01?handle_float@@YAXM@Z"(float %11)
- call void @llvm.eh.endcatch() #3
- br label %try.cont10
-
-try.cont10: ; preds = %catch6, %try.cont
- call void @"\01?done@@YAXXZ"()
- ret void
-
-; CHECK-NOT: eh.resume:
-eh.resume: ; %catch.dispatch3
- %exn11 = load i8*, i8** %exn.slot
- %sel12 = load i32, i32* %ehselector.slot
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn11, 0
- %lpad.val13 = insertvalue { i8*, i32 } %lpad.val, i32 %sel12, 1
- resume { i8*, i32 } %lpad.val13
-; CHECK: }
-}
-
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_F1:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK: [[F_PTR1:\%.+]] = bitcast i8* [[RECOVER_F1]] to float*
-; CHECK: [[TMP2:\%.+]] = load float, float* [[F_PTR1]], align 4
-; CHECK: call void @"\01?handle_float@@YAXM@Z"(float [[TMP2]])
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont10)
-; CHECK: }
-
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch.1"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK: [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK: [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
-; CHECK: invoke void @"\01?handle_int@@YAXH@Z"(i32 [[TMP1]])
-; CHECK: to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
-;
-; CHECK: invoke.cont2:
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
-;
-; CHECK: [[LPAD1_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK: [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; CHECK: [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
-; CHECK: indirectbr i8* [[RECOVER1]], []
-;
-; CHECK: }
-
-
-declare void @"\01?may_throw@@YAXXZ"() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-declare void @"\01?handle_int@@YAXH@Z"(i32) #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-declare void @"\01?handle_float@@YAXM@Z"(float) #1
-
-declare void @"\01?done@@YAXXZ"() #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 232069) (llvm/trunk 232070)"}
diff --git a/test/CodeGen/WinEH/cppeh-nested-2.ll b/test/CodeGen/WinEH/cppeh-nested-2.ll
deleted file mode 100644
index 2764e7478c71..000000000000
--- a/test/CodeGen/WinEH/cppeh-nested-2.ll
+++ /dev/null
@@ -1,324 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; class Inner {
-; public:
-; Inner();
-; ~Inner();
-; };
-; class Outer {
-; public:
-; Outer();
-; ~Outer();
-; };
-; void test() {
-; try {
-; Outer outer;
-; try {
-; Inner inner;
-; may_throw();
-; } catch (int i) {
-; handle_int(i);
-; }
-; } catch (float f) {
-; handle_float(f);
-; }
-; done();
-; }
-
-; ModuleID = 'nested-2.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%class.Outer = type { i8 }
-%class.Inner = type { i8 }
-
-@_ZTIf = external constant i8*
-@_ZTIi = external constant i8*
-
-; The function entry should be rewritten like this.
-; CHECK: define void @_Z4testv()
-; CHECK: entry:
-; CHECK: %outer = alloca %class.Outer, align 1
-; CHECK: %inner = alloca %class.Inner, align 1
-; CHECK: %i = alloca i32, align 4
-; CHECK: %f = alloca float, align 4
-; CHECK: call void (...) @llvm.localescape(float* %f, i32* %i, %class.Outer* %outer, %class.Inner* %inner)
-; CHECK: invoke void @_ZN5OuterC1Ev(%class.Outer* %outer)
-; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %outer = alloca %class.Outer, align 1
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- %inner = alloca %class.Inner, align 1
- %i = alloca i32, align 4
- %f = alloca float, align 4
- invoke void @_ZN5OuterC1Ev(%class.Outer* %outer)
- to label %invoke.cont unwind label %lpad
-
-; CHECK: invoke.cont:
-; CHECK: invoke void @_ZN5InnerC1Ev(%class.Inner* %inner)
-; CHECK: to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
-
-invoke.cont: ; preds = %entry
- invoke void @_ZN5InnerC1Ev(%class.Inner* %inner)
- to label %invoke.cont2 unwind label %lpad1
-
-; CHECK: invoke.cont2:
-; CHECK: invoke void @_Z9may_throwv()
-; CHECK: to label %invoke.cont4 unwind label %[[LPAD3_LABEL:lpad[0-9]*]]
-
-invoke.cont2: ; preds = %invoke.cont
- invoke void @_Z9may_throwv()
- to label %invoke.cont4 unwind label %lpad3
-
-; CHECK: invoke.cont4:
-; CHECK: invoke void @_ZN5InnerD1Ev(%class.Inner* %inner)
-; CHECK: to label %invoke.cont5 unwind label %[[LPAD1_LABEL]]
-
-invoke.cont4: ; preds = %invoke.cont2
- invoke void @_ZN5InnerD1Ev(%class.Inner* %inner)
- to label %invoke.cont5 unwind label %lpad1
-
-; CHECK: invoke.cont5:
-; CHECK: br label %try.cont
-
-invoke.cont5: ; preds = %invoke.cont4
- br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIf to i8*)
-; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i8** @_ZTIf to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
-; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont19]
-
-lpad: ; preds = %try.cont, %entry
- %tmp = landingpad { i8*, i32 }
- catch i8* bitcast (i8** @_ZTIf to i8*)
- %tmp1 = extractvalue { i8*, i32 } %tmp, 0
- store i8* %tmp1, i8** %exn.slot
- %tmp2 = extractvalue { i8*, i32 } %tmp, 1
- store i32 %tmp2, i32* %ehselector.slot
- br label %catch.dispatch11
-
-; CHECK: [[LPAD1_LABEL]]:
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*)
-; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIf to i8*)
-; CHECK-NEXT: [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(
-; CHECK-SAME: i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32 1, i8* (i8*, i8*)* @_Z4testv.catch.1,
-; CHECK-SAME: i32 0, void (i8*, i8*)* @_Z4testv.cleanup,
-; CHECK-SAME: i32 1, i8* bitcast (i8** @_ZTIf to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
-; CHECK-NEXT: indirectbr i8* [[RECOVER1]], [label %try.cont, label %try.cont19]
-
-lpad1: ; preds = %invoke.cont4, %invoke.cont
- %tmp3 = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (i8** @_ZTIi to i8*)
- catch i8* bitcast (i8** @_ZTIf to i8*)
- %tmp4 = extractvalue { i8*, i32 } %tmp3, 0
- store i8* %tmp4, i8** %exn.slot
- %tmp5 = extractvalue { i8*, i32 } %tmp3, 1
- store i32 %tmp5, i32* %ehselector.slot
- br label %catch.dispatch
-
-; CHECK: [[LPAD3_LABEL]]:
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*)
-; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIf to i8*)
-; CHECK-NEXT: [[RECOVER3:\%.+]] = call i8* (...) @llvm.eh.actions(
-; CHECK-SAME: i32 0, void (i8*, i8*)* @_Z4testv.cleanup.2,
-; CHECK-SAME: i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32 1, i8* (i8*, i8*)* @_Z4testv.catch.1,
-; CHECK-SAME: i32 0, void (i8*, i8*)* @_Z4testv.cleanup,
-; CHECK-SAME: i32 1, i8* bitcast (i8** @_ZTIf to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
-; CHECK-NEXT: indirectbr i8* [[RECOVER3]], [label %try.cont, label %try.cont19]
-
-lpad3: ; preds = %invoke.cont2
- %tmp6 = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (i8** @_ZTIi to i8*)
- catch i8* bitcast (i8** @_ZTIf to i8*)
- %tmp7 = extractvalue { i8*, i32 } %tmp6, 0
- store i8* %tmp7, i8** %exn.slot
- %tmp8 = extractvalue { i8*, i32 } %tmp6, 1
- store i32 %tmp8, i32* %ehselector.slot
- call void @_ZN5InnerD1Ev(%class.Inner* %inner)
- br label %catch.dispatch
-
-; CHECK-NOT: catch.dispatch:
-
-catch.dispatch: ; preds = %lpad3, %lpad1
- %sel = load i32, i32* %ehselector.slot
- %tmp9 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #4
- %matches = icmp eq i32 %sel, %tmp9
- br i1 %matches, label %catch, label %ehcleanup
-
-; CHECK-NOT: catch:
-
-catch: ; preds = %catch.dispatch
- %exn = load i8*, i8** %exn.slot
- %i.i8 = bitcast i32* %i to i8*
- call void @llvm.eh.begincatch(i8* %exn, i8* %i.i8) #4
- %tmp13 = load i32, i32* %i, align 4
- invoke void @_Z10handle_inti(i32 %tmp13)
- to label %invoke.cont8 unwind label %lpad7
-
-; CHECK-NOT: invoke.cont8:
-
-invoke.cont8: ; preds = %catch
- call void @llvm.eh.endcatch() #4
- br label %try.cont
-
-; CHECK: try.cont:
-; CHECK: invoke void @_ZN5OuterD1Ev(%class.Outer* %outer)
-; CHECK: to label %invoke.cont9 unwind label %[[LPAD_LABEL]]
-
-try.cont: ; preds = %invoke.cont8, %invoke.cont5
- invoke void @_ZN5OuterD1Ev(%class.Outer* %outer)
- to label %invoke.cont9 unwind label %lpad
-
-invoke.cont9: ; preds = %try.cont
- br label %try.cont19
-
-; CHECK-NOT: lpad7:
-
-lpad7: ; preds = %catch
- %tmp14 = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (i8** @_ZTIf to i8*)
- %tmp15 = extractvalue { i8*, i32 } %tmp14, 0
- store i8* %tmp15, i8** %exn.slot
- %tmp16 = extractvalue { i8*, i32 } %tmp14, 1
- store i32 %tmp16, i32* %ehselector.slot
- call void @llvm.eh.endcatch() #4
- br label %ehcleanup
-
-; CHECK-NOT: ehcleanup: ; preds = %lpad7, %catch.dispatch
-
-ehcleanup: ; preds = %lpad7, %catch.dispatch
- call void @_ZN5OuterD1Ev(%class.Outer* %outer)
- br label %catch.dispatch11
-
-; CHECK-NOT: catch.dispatch11:
-
-catch.dispatch11: ; preds = %ehcleanup, %lpad
- %sel12 = load i32, i32* %ehselector.slot
- %tmp17 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*)) #4
- %matches13 = icmp eq i32 %sel12, %tmp17
- br i1 %matches13, label %catch14, label %eh.resume
-
-; CHECK-NOT: catch14:
-
-catch14: ; preds = %catch.dispatch11
- %exn15 = load i8*, i8** %exn.slot
- %f.i8 = bitcast float* %f to i8*
- call void @llvm.eh.begincatch(i8* %exn15, i8* %f.i8) #4
- %tmp21 = load float, float* %f, align 4
- call void @_Z12handle_floatf(float %tmp21)
- call void @llvm.eh.endcatch() #4
- br label %try.cont19
-
-try.cont19: ; preds = %catch14, %invoke.cont9
- call void @_Z4donev()
- ret void
-
-; CHECK-NOT: eh.resume:
-
-eh.resume: ; preds = %catch.dispatch11
- %exn20 = load i8*, i8** %exn.slot
- %sel21 = load i32, i32* %ehselector.slot
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn20, 0
- %lpad.val22 = insertvalue { i8*, i32 } %lpad.val, i32 %sel21, 1
- resume { i8*, i32 } %lpad.val22
-
-; CHECK: }
-}
-
-; This catch handler should be outlined.
-; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_F:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 0)
-; CHECK: [[F_PTR:\%.+]] = bitcast i8* [[RECOVER_F]] to float*
-; CHECK: [[TMP:\%.+]] = load float, float* [[F_PTR]], align 4
-; CHECK: call void @_Z12handle_floatf(float [[TMP]])
-; CHECK: ret i8* blockaddress(@_Z4testv, %try.cont19)
-; CHECK: }
-
-; This catch handler should be outlined.
-; CHECK: define internal i8* @_Z4testv.catch.1(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 1)
-; CHECK: [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK: [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
-; CHECK: invoke void @_Z10handle_inti(i32 [[TMP1]])
-; CHECK: to label %invoke.cont8 unwind label %[[LPAD7_LABEL:lpad[0-9]*]]
-;
-; CHECK: invoke.cont8: ; preds = %entry
-; CHECK: ret i8* blockaddress(@_Z4testv, %try.cont)
-;
-; CHECK: [[LPAD7_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK: [[LPAD7_VAL:\%.+]] = landingpad { i8*, i32 }
-; (FIXME) The nested handler body isn't being populated yet.
-; CHECK: }
-
-; This cleanup handler should be outlined.
-; CHECK: define internal void @_Z4testv.cleanup(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_OUTER:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 2)
-; CHECK: [[OUTER_PTR:\%.+]] = bitcast i8* [[RECOVER_OUTER]] to %class.Outer*
-; CHECK: call void @_ZN5OuterD1Ev(%class.Outer* [[OUTER_PTR]])
-; CHECK: ret void
-; CHECK: }
-
-; This cleanup handler should be outlined.
-; CHECK: define internal void @_Z4testv.cleanup.2(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_INNER:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 3)
-; CHECK: [[INNER_PTR:\%.+]] = bitcast i8* [[RECOVER_INNER]] to %class.Inner*
-; CHECK: call void @_ZN5InnerD1Ev(%class.Inner* [[INNER_PTR]])
-; CHECK: ret void
-; CHECK: }
-
-
-
-declare void @_ZN5OuterC1Ev(%class.Outer*) #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-declare void @_ZN5InnerC1Ev(%class.Inner*) #1
-
-declare void @_Z9may_throwv() #1
-
-declare void @_ZN5InnerD1Ev(%class.Inner*) #1
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #3
-
-declare void @_Z10handle_inti(i32) #1
-
-declare void @llvm.eh.endcatch()
-
-declare void @_ZN5OuterD1Ev(%class.Outer*) #1
-
-declare void @_Z12handle_floatf(float) #1
-
-declare void @_Z4donev() #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { noinline noreturn nounwind }
-attributes #3 = { nounwind readnone }
-attributes #4 = { nounwind }
-attributes #5 = { noreturn nounwind }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 (trunk 226027)"}
diff --git a/test/CodeGen/WinEH/cppeh-nested-3.ll b/test/CodeGen/WinEH/cppeh-nested-3.ll
deleted file mode 100644
index 88759f406fb1..000000000000
--- a/test/CodeGen/WinEH/cppeh-nested-3.ll
+++ /dev/null
@@ -1,260 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-;void test()
-;{
-; try {
-; try {
-; may_throw();
-; } catch (int i) {
-; try {
-; may_throw();
-; }
-; catch (int j) {
-; i = j;
-; }
-; handle_int(i);
-; }
-; } catch (float f) {
-; handle_float(f);
-; }
-; done();
-;}
-
-; ModuleID = 'cppeh-nested-3.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-
-$"\01??_R0M@8" = comdat any
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0M@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".M\00" }, comdat
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-
-; CHECK: define void @"\01?test@@YAXXZ"()
-; CHECK: entry:
-; CHECK: %i = alloca i32, align 4
-; CHECK: %j = alloca i32, align 4
-; CHECK: %f = alloca float, align 4
-; CHECK: call void (...) @llvm.localescape(i32* %j, i32* %i, float* %f)
-; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- %i = alloca i32, align 4
- %j = alloca i32, align 4
- %f = alloca float, align 4
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- br label %try.cont10
-
-; CHECK: [[LPAD_LABEL]]:
-; CHECK: landingpad { i8*, i32 }
-; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; CHECK: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.2", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1")
-; CHECK: indirectbr i8* [[RECOVER]], [label %try.cont10, label %try.cont19]
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
- %1 = extractvalue { i8*, i32 } %0, 0
- store i8* %1, i8** %exn.slot
- %2 = extractvalue { i8*, i32 } %0, 1
- store i32 %2, i32* %ehselector.slot
- br label %catch.dispatch
-
-; CHECK-NOT: catch.dispatch:
-catch.dispatch: ; preds = %lpad
- %sel = load i32, i32* %ehselector.slot
- %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
- %matches = icmp eq i32 %sel, %3
- br i1 %matches, label %catch, label %catch.dispatch11
-
-; CHECK-NOT: catch:
-catch: ; preds = %catch.dispatch
- %exn = load i8*, i8** %exn.slot
- %4 = bitcast i32* %i to i8*
- call void @llvm.eh.begincatch(i8* %exn, i8* %4) #3
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont2 unwind label %lpad1
-
-; CHECK-NOT: invoke.cont2:
-invoke.cont2: ; preds = %catch
- br label %try.cont
-
-; CHECK-NOT: lpad1:
-lpad1: ; preds = %catch
- %5 = landingpad { i8*, i32 }
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
- %6 = extractvalue { i8*, i32 } %5, 0
- store i8* %6, i8** %exn.slot
- %7 = extractvalue { i8*, i32 } %5, 1
- store i32 %7, i32* %ehselector.slot
- br label %catch.dispatch3
-
-; CHECK-NOT: catch.dispatch3:
-catch.dispatch3: ; preds = %lpad1
- %sel4 = load i32, i32* %ehselector.slot
- %8 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
- %matches5 = icmp eq i32 %sel4, %8
- br i1 %matches5, label %catch6, label %catch.dispatch11
-
-; CHECK-NOT: catch6:
-catch6: ; preds = %catch.dispatch3
- %exn7 = load i8*, i8** %exn.slot
- %9 = bitcast i32* %j to i8*
- call void @llvm.eh.begincatch(i8* %exn7, i8* %9) #3
- %10 = load i32, i32* %j, align 4
- store i32 %10, i32* %i, align 4
- call void @llvm.eh.endcatch() #3
- br label %try.cont
-
-; CHECK-NOT: try.cont:
-try.cont: ; preds = %catch6, %invoke.cont2
- %11 = load i32, i32* %i, align 4
- invoke void @"\01?handle_int@@YAXH@Z"(i32 %11)
- to label %invoke.cont9 unwind label %lpad8
-
-; CHECK-NOT: invoke.cont9:
-invoke.cont9: ; preds = %try.cont
- call void @llvm.eh.endcatch() #3
- br label %try.cont10
-
-try.cont10: ; preds = %invoke.cont9, %invoke.cont
- br label %try.cont19
-
-; CHECK-NOT: lpad8:
-lpad8: ; preds = %try.cont
- %12 = landingpad { i8*, i32 }
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
- %13 = extractvalue { i8*, i32 } %12, 0
- store i8* %13, i8** %exn.slot
- %14 = extractvalue { i8*, i32 } %12, 1
- store i32 %14, i32* %ehselector.slot
- call void @llvm.eh.endcatch() #3
- br label %catch.dispatch11
-
-; CHECK-NOT: catch.dispatch11:
-catch.dispatch11: ; preds = %lpad8, %catch.dispatch3, %catch.dispatch
- %sel12 = load i32, i32* %ehselector.slot
- %15 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)) #3
- %matches13 = icmp eq i32 %sel12, %15
- br i1 %matches13, label %catch14, label %eh.resume
-
-; CHECK-NOT: catch14:
-catch14: ; preds = %catch.dispatch11
- %exn15 = load i8*, i8** %exn.slot
- %16 = bitcast float* %f to i8*
- call void @llvm.eh.begincatch(i8* %exn15, i8* %16) #3
- %17 = load float, float* %f, align 4
- call void @"\01?handle_float@@YAXM@Z"(float %17)
- call void @llvm.eh.endcatch() #3
- br label %try.cont19
-
-try.cont19: ; preds = %catch14, %try.cont10
- call void @"\01?done@@YAXXZ"()
- ret void
-
-; CHECK-NOT: eh.resume:
-eh.resume: ; preds = %lpad16, %catch.dispatch11
- %exn20 = load i8*, i8** %exn.slot
- %sel21 = load i32, i32* %ehselector.slot
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn20, 0
- %lpad.val22 = insertvalue { i8*, i32 } %lpad.val, i32 %sel21, 1
- resume { i8*, i32 } %lpad.val22
-; CHECK: }
-}
-
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_J:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK: [[J_PTR:\%.+]] = bitcast i8* [[RECOVER_J]] to i32*
-; CHECK: [[RECOVER_I1:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK: [[I_PTR1:\%.+]] = bitcast i8* [[RECOVER_I1]] to i32*
-; CHECK: [[TMP3:\%.+]] = load i32, i32* [[J_PTR]], align 4
-; CHECK: store i32 [[TMP3]], i32* [[I_PTR1]]
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ.catch.2", %invoke.cont2)
-; CHECK: }
-
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch.1"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_F:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
-; CHECK: [[F_PTR:\%.+]] = bitcast i8* [[RECOVER_F]] to float*
-; CHECK: [[TMP2:\%.+]] = load float, float* [[F_PTR]], align 4
-; CHECK: call void @"\01?handle_float@@YAXM@Z"(float [[TMP2]])
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont19)
-; CHECK: }
-
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch.2"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK: [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK: to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
-;
-; CHECK: invoke.cont2: ; preds = %[[LPAD1_LABEL]], %entry
-; CHECK: [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
-; CHECK: invoke void @"\01?handle_int@@YAXH@Z"(i32 [[TMP1]])
-; CHECK: to label %invoke.cont9 unwind label %[[LPAD8_LABEL:lpad[0-9]*]]
-;
-; CHECK: [[LPAD1_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK: [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; CHECK: [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1")
-; CHECK: indirectbr i8* [[RECOVER1]], [label %invoke.cont2]
-;
-; CHECK: invoke.cont9:
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont10)
-;
-; CHECK: [[LPAD8_LABEL]]:{{[ ]+}}; preds = %invoke.cont2
-; CHECK: [[LPAD8_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; CHECK: [[RECOVER2:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1")
-; CHECK: indirectbr i8* [[RECOVER2]], []
-;
-; CHECK: }
-
-declare void @"\01?may_throw@@YAXXZ"() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-declare void @"\01?handle_int@@YAXH@Z"(i32) #1
-
-declare void @"\01?handle_float@@YAXM@Z"(float) #1
-
-declare void @"\01?done@@YAXXZ"() #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 232069) (llvm/trunk 232070)"}
diff --git a/test/CodeGen/WinEH/cppeh-nested-rethrow.ll b/test/CodeGen/WinEH/cppeh-nested-rethrow.ll
deleted file mode 100644
index 53f532c8eb16..000000000000
--- a/test/CodeGen/WinEH/cppeh-nested-rethrow.ll
+++ /dev/null
@@ -1,212 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test was generated from the following code.
-;
-; void test1() {
-; try {
-; try {
-; throw 1;
-; } catch(...) { throw; }
-; } catch (...) { }
-; }
-; void test2() {
-; try {
-; throw 1;
-; } catch(...) {
-; try {
-; throw;
-; } catch (...) {}
-; }
-; }
-;
-; These two functions result in functionally equivalent code, but the last
-; catch block contains a call to llvm.eh.endcatch that tripped up processing
-; during development.
-;
-; The main purpose of this test is to verify that we can correctly
-; handle the case of nested landing pads that return directly to a block in
-; the parent function.
-
-; ModuleID = 'cppeh-nested-rethrow.cpp'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
-%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-
-$"\01??_R0H@8" = comdat any
-
-$"_CT??_R0H@84" = comdat any
-
-$_CTA1H = comdat any
-
-$_TI1H = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@__ImageBase = external constant i8
-@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
-@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-
-; CHECK-LABEL: define void @"\01?test1@@YAXXZ"()
-; CHECK: entry:
-; CHECK: call void (...) @llvm.localescape
-
-; Function Attrs: nounwind uwtable
-define void @"\01?test1@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %tmp = alloca i32, align 4
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- store i32 1, i32* %tmp
- %0 = bitcast i32* %tmp to i8*
- invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #2
- to label %unreachable unwind label %lpad
-
-lpad: ; preds = %entry
- %1 = landingpad { i8*, i32 }
- catch i8* null
- %2 = extractvalue { i8*, i32 } %1, 0
- store i8* %2, i8** %exn.slot
- %3 = extractvalue { i8*, i32 } %1, 1
- store i32 %3, i32* %ehselector.slot
- br label %catch
-
-catch: ; preds = %lpad
- %exn = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn, i8* null) #1
- invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #2
- to label %unreachable unwind label %lpad1
-
-lpad1: ; preds = %catch
- %4 = landingpad { i8*, i32 }
- catch i8* null
- %5 = extractvalue { i8*, i32 } %4, 0
- store i8* %5, i8** %exn.slot
- %6 = extractvalue { i8*, i32 } %4, 1
- store i32 %6, i32* %ehselector.slot
- br label %catch2
-
-catch2: ; preds = %lpad1
- %exn3 = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn3, i8* null) #1
- call void @llvm.eh.endcatch() #1
- br label %try.cont.4
-
-; This block should not be eliminated.
-; CHECK: try.cont.4:
-try.cont.4: ; preds = %catch2, %try.cont
- ret void
-
-try.cont: ; No predecessors!
- br label %try.cont.4
-
-unreachable: ; preds = %catch, %entry
- unreachable
-; CHECK: }
-}
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #1
-
-; CHECK-LABEL: define void @"\01?test2@@YAXXZ"()
-; CHECK: entry:
-; CHECK: call void (...) @llvm.localescape
-
-; Function Attrs: nounwind uwtable
-define void @"\01?test2@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %tmp = alloca i32, align 4
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- store i32 1, i32* %tmp
- %0 = bitcast i32* %tmp to i8*
- invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #2
- to label %unreachable unwind label %lpad
-
-lpad: ; preds = %entry
- %1 = landingpad { i8*, i32 }
- catch i8* null
- %2 = extractvalue { i8*, i32 } %1, 0
- store i8* %2, i8** %exn.slot
- %3 = extractvalue { i8*, i32 } %1, 1
- store i32 %3, i32* %ehselector.slot
- br label %catch
-
-catch: ; preds = %lpad
- %exn = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn, i8* null) #1
- invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #2
- to label %unreachable unwind label %lpad1
-
-lpad1: ; preds = %catch
- %4 = landingpad { i8*, i32 }
- catch i8* null
- %5 = extractvalue { i8*, i32 } %4, 0
- store i8* %5, i8** %exn.slot
- %6 = extractvalue { i8*, i32 } %4, 1
- store i32 %6, i32* %ehselector.slot
- br label %catch2
-
-catch2: ; preds = %lpad1
- %exn3 = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn3, i8* null) #1
- call void @llvm.eh.endcatch() #1
- br label %try.cont
-
-; This block should not be eliminated.
-; CHECK: try.cont:
-; The endcatch call should be eliminated.
-; CHECK-NOT: call void @llvm.eh.endcatch()
-try.cont: ; preds = %catch2
- call void @llvm.eh.endcatch() #1
- br label %try.cont.4
-
-try.cont.4: ; preds = %try.cont
- ret void
-
-unreachable: ; preds = %catch, %entry
- unreachable
-; CHECK: }
-}
-
-; The outlined test1.catch handler should return to a valid block address.
-; CHECK-LABEL: define internal i8* @"\01?test1@@YAXXZ.catch"(i8*, i8*)
-; CHECK-NOT: ret i8* inttoptr (i32 1 to i8*)
-; CHECK: }
-
-; The outlined test1.catch1 handler should not contain a return instruction.
-; CHECK-LABEL: define internal i8* @"\01?test1@@YAXXZ.catch.1"(i8*, i8*)
-; CHECK-NOT: ret
-; CHECK: }
-
-; The outlined test2.catch handler should return to a valid block address.
-; CHECK-LABEL: define internal i8* @"\01?test2@@YAXXZ.catch"(i8*, i8*)
-; CHECK-NOT: ret i8* inttoptr (i32 1 to i8*)
-; CHECK: }
-
-; The outlined test2.catch2 handler should not contain a return instruction.
-; CHECK-LABEL: define internal i8* @"\01?test2@@YAXXZ.catch.2"(i8*, i8*)
-; CHECK-NOT: ret
-; CHECK: }
-
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind }
-attributes #2 = { noreturn }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 236059)"}
diff --git a/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll b/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll
deleted file mode 100644
index 7b474c9d38a3..000000000000
--- a/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll
+++ /dev/null
@@ -1,278 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; struct SomeData {
-; int a;
-; int b;
-; };
-;
-; void may_throw();
-; void does_not_throw(int i);
-; void dump(int *, int, SomeData&);
-;
-; void test() {
-; int NumExceptions = 0;
-; int ExceptionVal[10];
-; SomeData Data = { 0, 0 };
-;
-; for (int i = 0; i < 10; ++i) {
-; try {
-; may_throw();
-; Data.a += i;
-; }
-; catch (int e) {
-; ExceptionVal[NumExceptions] = e;
-; ++NumExceptions;
-; if (e == i)
-; Data.b += e;
-; else
-; Data.a += e;
-; }
-; does_not_throw(NumExceptions);
-; }
-; dump(ExceptionVal, NumExceptions, Data);
-; }
-;
-; Unlike the cppeh-frame-vars.ll test, this test was generated using -O2
-; optimization, which results in non-alloca values being used in the
-; catch handler.
-
-; ModuleID = 'cppeh-frame-vars.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%struct.SomeData = type { i32, i32 }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-
-; The function entry should be rewritten like this.
-; CHECK: define void @"\01?test@@YAXXZ"()
-; CHECK: entry:
-; CHECK: [[NUMEXCEPTIONS_REGMEM:\%.+]] = alloca i32
-; CHECK: [[I_REGMEM:\%.+]] = alloca i32
-; CHECK: [[B_REGMEM:\%.+]] = alloca i32*
-; CHECK: [[A_REGMEM:\%.+]] = alloca i32*
-; CHECK: [[E_PTR:\%.+]] = alloca i32, align 4
-; CHECK: [[EXCEPTIONVAL:\%.+]] = alloca [10 x i32], align 16
-; CHECK: [[DATA_PTR:\%.+]] = alloca i64, align 8
-; CHECK: [[TMPCAST:\%.+]] = bitcast i64* [[DATA_PTR]] to %struct.SomeData*
-; CHECK: [[TMP:\%.+]] = bitcast [10 x i32]* [[EXCEPTIONVAL]] to i8*
-; CHECK: call void @llvm.lifetime.start(i64 40, i8* [[TMP]])
-; CHECK: store i64 0, i64* [[DATA_PTR]], align 8
-; CHECK: [[A_PTR:\%.+]] = bitcast i64* [[DATA_PTR]] to i32*
-; CHECK: store i32* [[A_PTR]], i32** [[A_REGMEM]]
-; CHECK: [[B_PTR:\%.+]] = getelementptr inbounds %struct.SomeData, %struct.SomeData* [[TMPCAST]], i64 0, i32 1
-; CHECK: store i32* [[B_PTR]], i32** [[B_REGMEM]]
-; CHECK: call void (...) @llvm.localescape(i32* %e, i32* %NumExceptions.020.reg2mem, [10 x i32]* [[EXCEPTIONVAL]], i32* %inc.reg2mem, i32* [[I_REGMEM]], i32** [[A_REGMEM]], i32** [[B_REGMEM]])
-; CHECK: br label %for.body
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %e = alloca i32, align 4
- %ExceptionVal = alloca [10 x i32], align 16
- %Data = alloca i64, align 8
- %tmpcast = bitcast i64* %Data to %struct.SomeData*
- %0 = bitcast [10 x i32]* %ExceptionVal to i8*
- call void @llvm.lifetime.start(i64 40, i8* %0) #1
- store i64 0, i64* %Data, align 8
- %a = bitcast i64* %Data to i32*
- %b = getelementptr inbounds %struct.SomeData, %struct.SomeData* %tmpcast, i64 0, i32 1
- br label %for.body
-
-; CHECK: for.body:
-; CHECK: [[NUMEXCEPTIONS_PHI:\%.*]] = phi i32 [ 0, %entry ], [ {{\%NumExceptions.*}}, %try.cont ]
-; CHECK: [[I_PHI:\%.*]] = phi i32 [ 0, %entry ], [ {{\%inc.*}}, %try.cont ]
-; CHECK: store i32 [[I_PHI]], i32* [[I_REGMEM]]
-; CHECK: store i32 [[NUMEXCEPTIONS_PHI]], i32* [[NUMEXCEPTIONS_REGMEM]]
-; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
-for.body: ; preds = %entry, %try.cont
- %NumExceptions.020 = phi i32 [ 0, %entry ], [ %NumExceptions.1, %try.cont ]
- %i.019 = phi i32 [ 0, %entry ], [ %inc5, %try.cont ]
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont unwind label %lpad
-
-; CHECK: invoke.cont: ; preds = %for.body
-; CHECK: [[A_RELOAD:\%.+]] = load i32*, i32** [[A_REGMEM]]
-; CHECK: [[TMP1:\%.+]] = load i32, i32* [[A_RELOAD]], align 8
-; CHECK: [[I_RELOAD:\%.+]] = load i32, i32* [[I_REGMEM]]
-; CHECK: [[ADD:\%.+]] = add nsw i32 [[TMP1]], [[I_RELOAD]]
-; CHECK: [[A_RELOAD1:\%.+]] = load i32*, i32** [[A_REGMEM]]
-; CHECK: [[NUMEXCEPTIONS_RELOAD:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_REGMEM]]
-; CHECK: br label %try.cont
-invoke.cont: ; preds = %for.body
- %1 = load i32, i32* %a, align 8, !tbaa !2
- %add = add nsw i32 %1, %i.019
- store i32 %add, i32* %a, align 8, !tbaa !2
- br label %try.cont
-
-; CHECK: [[LPAD_LABEL:lpad[0-9]*]]:{{[ ]+}}; preds = %for.body
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
-; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %[[SPLIT_RECOVER_BB:.*]]]
-
-lpad: ; preds = %for.body
- %2 = landingpad { i8*, i32 }
- catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
- %3 = extractvalue { i8*, i32 } %2, 1
- %4 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #1
- %matches = icmp eq i32 %3, %4
- br i1 %matches, label %catch, label %eh.resume
-
-; CHECK-NOT: catch:
-
-catch: ; preds = %lpad
- %5 = extractvalue { i8*, i32 } %2, 0
- %e.i8 = bitcast i32* %e to i8*
- call void @llvm.eh.begincatch(i8* %5, i8* %e.i8) #1
- %tmp8 = load i32, i32* %e, align 4, !tbaa !7
- %idxprom = sext i32 %NumExceptions.020 to i64
- %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i64 0, i64 %idxprom
- store i32 %tmp8, i32* %arrayidx, align 4, !tbaa !7
- %inc = add nsw i32 %NumExceptions.020, 1
- %cmp1 = icmp eq i32 %tmp8, %i.019
- br i1 %cmp1, label %if.then, label %if.else
-
-if.then: ; preds = %catch
- %tmp9 = load i32, i32* %b, align 4, !tbaa !8
- %add2 = add nsw i32 %tmp9, %i.019
- store i32 %add2, i32* %b, align 4, !tbaa !8
- br label %if.end
-
-; CHECK-NOT: if.else:
-
-if.else: ; preds = %catch
- %tmp10 = load i32, i32* %a, align 8, !tbaa !2
- %add4 = add nsw i32 %tmp10, %tmp8
- store i32 %add4, i32* %a, align 8, !tbaa !2
- br label %if.end
-
-; CHECK-NOT: if.end:
-; CHECK: [[SPLIT_RECOVER_BB]]:
-; CHECK: [[INC_RELOAD:\%.*]] = load i32, i32*
-; CHECK: br label %try.cont
-
-if.end: ; preds = %if.else, %if.then
- tail call void @llvm.eh.endcatch() #1
- br label %try.cont
-
-; CHECK: try.cont:{{[ ]+}}; preds = %[[SPLIT_RECOVER_BB]], %invoke.cont
-; CHECK: [[NUMEXCEPTIONS_PHI:\%.*]] = phi i32 [ [[NUMEXCEPTIONS_RELOAD]], %invoke.cont ], [ [[INC_RELOAD]], %[[SPLIT_RECOVER_BB]] ]
-; CHECK: tail call void @"\01?does_not_throw@@YAXH@Z"(i32 [[NUMEXCEPTIONS_PHI]])
-; CHECK: [[I_RELOAD:\%.+]] = load i32, i32* [[I_REGMEM]]
-; CHECK: [[INC:\%.+]] = add nuw nsw i32 [[I_RELOAD]], 1
-; CHECK: [[CMP:\%.+]] = icmp slt i32 [[INC]], 10
-; CHECK: br i1 [[CMP]], label %for.body, label %for.end
-
-try.cont: ; preds = %if.end, %invoke.cont
- %NumExceptions.1 = phi i32 [ %NumExceptions.020, %invoke.cont ], [ %inc, %if.end ]
- tail call void @"\01?does_not_throw@@YAXH@Z"(i32 %NumExceptions.1)
- %inc5 = add nuw nsw i32 %i.019, 1
- %cmp = icmp slt i32 %inc5, 10
- br i1 %cmp, label %for.body, label %for.end
-
-for.end: ; preds = %try.cont
- %NumExceptions.1.lcssa = phi i32 [ %NumExceptions.1, %try.cont ]
- %arraydecay = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i64 0, i64 0
- call void @"\01?dump@@YAXPEAHHAEAUSomeData@@@Z"(i32* %arraydecay, i32 %NumExceptions.1.lcssa, %struct.SomeData* dereferenceable(8) %tmpcast)
- call void @llvm.lifetime.end(i64 40, i8* %0) #1
- ret void
-
-eh.resume: ; preds = %lpad
- %.lcssa = phi { i8*, i32 } [ %2, %lpad ]
- resume { i8*, i32 } %.lcssa
-}
-
-; The following catch handler should be outlined.
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK: [[RECOVER_E:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK: [[E_PTR:\%.+]] = bitcast i8* [[RECOVER_E]] to i32*
-; CHECK: [[RECOVER_NUMEXCEPTIONS:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK: [[NUMEXCEPTIONS_REGMEM:\%.+]] = bitcast i8* [[RECOVER_NUMEXCEPTIONS]] to i32*
-; CHECK: [[RECOVER_EXCEPTIONVAL:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
-; CHECK: [[EXCEPTIONVAL:\%.+]] = bitcast i8* [[RECOVER_EXCEPTIONVAL]] to [10 x i32]*
-; CHECK: [[RECOVER_INC:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 3)
-; CHECK: [[INC_REGMEM:\%.+]] = bitcast i8* [[RECOVER_INC]] to i32*
-; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 4)
-; CHECK: [[I_REGMEM:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK: [[RECOVER_A:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 5)
-; CHECK: [[A_REGMEM:\%.+]] = bitcast i8* [[RECOVER_A]] to i32**
-; CHECK: [[RECOVER_B:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 6)
-; CHECK: [[B_REGMEM:\%.+]] = bitcast i8* [[RECOVER_B]] to i32**
-; CHECK: [[E_I8PTR:\%.+]] = bitcast i32* [[E_PTR]] to i8*
-; CHECK: [[TMP:\%.+]] = load i32, i32* [[E_PTR]], align 4
-; CHECK: [[NUMEXCEPTIONS_RELOAD:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_REGMEM]]
-; CHECK: [[IDXPROM:\%.+]] = sext i32 [[NUMEXCEPTIONS_RELOAD]] to i64
-; CHECK: [[ARRAYIDX:\%.+]] = getelementptr inbounds [10 x i32], [10 x i32]* [[EXCEPTIONVAL]], i64 0, i64 [[IDXPROM]]
-; CHECK: store i32 [[TMP]], i32* [[ARRAYIDX]], align 4
-; CHECK: [[NUMEXCEPTIONS_RELOAD:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_REGMEM]]
-; CHECK: [[INC:\%.+]] = add nsw i32 [[NUMEXCEPTIONS_RELOAD]], 1
-; CHECK: [[CMP:\%.+]] = icmp eq i32 [[TMP]], [[I_RELOAD]]
-; CHECK: br i1 [[CMP]], label %if.then, label %if.else
-;
-; CHECK: if.then:{{[ ]+}}; preds = %entry
-; CHECK: [[B_RELOAD:\%.+]] = load i32*, i32** [[B_REGMEM]]
-; CHECK: [[TMP1:\%.+]] = load i32, i32* [[B_RELOAD]], align 4
-; CHECK: [[I_RELOAD:\%.+]] = load i32, i32* [[I_REGMEM]]
-; CHECK: [[ADD:\%.+]] = add nsw i32 [[TMP1]], [[I_RELOAD]]
-; CHECK: [[B_RELOAD:\%.+]] = load i32*, i32** [[B_REGMEM]]
-; CHECK: store i32 [[ADD]], i32* [[B_RELOAD]], align 4
-; CHECK: br label %if.end
-;
-; CHECK: if.else:{{[ ]+}}; preds = %entry
-; CHECK: [[A_RELOAD:\%.+]] = load i32*, i32** [[A_REGMEM]]
-; CHECK: [[TMP2:\%.+]] = load i32, i32* [[A_RELOAD]], align 8
-; CHECK: [[ADD2:\%.+]] = add nsw i32 [[TMP2]], [[TMP]]
-; CHECK: [[A_RELOAD:\%.+]] = load i32*, i32** [[A_REGMEM]]
-; CHECK: store i32 [[ADD2]], i32* [[A_RELOAD]], align 8
-; CHECK: br label %if.end
-;
-; CHECK: if.end:{{[ ]+}}; preds = %if.else, %if.then
-; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %[[SPLIT_RECOVER_BB]])
-; CHECK: }
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.start(i64, i8* nocapture) #1
-
-declare void @"\01?may_throw@@YAXXZ"() #2
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #3
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-declare void @llvm.eh.endcatch()
-
-declare void @"\01?does_not_throw@@YAXH@Z"(i32) #2
-
-declare void @"\01?dump@@YAXPEAHHAEAUSomeData@@@Z"(i32*, i32, %struct.SomeData* dereferenceable(8)) #2
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.end(i64, i8* nocapture) #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 228868)"}
-!2 = !{!3, !4, i64 0}
-!3 = !{!"?AUSomeData@@", !4, i64 0, !4, i64 4}
-!4 = !{!"int", !5, i64 0}
-!5 = !{!"omnipotent char", !6, i64 0}
-!6 = !{!"Simple C/C++ TBAA"}
-!7 = !{!4, !4, i64 0}
-!8 = !{!3, !4, i64 4}
diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll b/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll
deleted file mode 100644
index 31b5e58562b2..000000000000
--- a/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-; This test case is equivalent to:
-; extern "C" void may_throw();
-; extern "C" void test_catch_all() {
-; try {
-; may_throw();
-; } catch (...) {
-; }
-; }
-
-declare void @may_throw() #1
-declare i32 @__CxxFrameHandler3(...)
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #2
-declare void @llvm.eh.endcatch() #2
-
-; Function Attrs: nounwind uwtable
-define void @test_catch_all() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- invoke void @may_throw()
- to label %try.cont unwind label %lpad
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* null
- %1 = extractvalue { i8*, i32 } %0, 0
- tail call void @llvm.eh.begincatch(i8* %1, i8* null) #2
- tail call void @llvm.eh.endcatch() #2
- br label %try.cont
-
-try.cont: ; preds = %entry, %lpad
- ret void
-}
-
-; CHECK-LABEL: $handlerMap$0$test_catch_all:
-; CHECK: .long {{[0-9]+}}
-; CHECK: .long 0
-; CHECK: .long 0
-; CHECK: .long test_catch_all.catch@IMGREL
-; CHECK: .long .Ltest_catch_all.catch$parent_frame_offset
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind }
diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll b/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll
deleted file mode 100644
index 2d31a1d5cf4f..000000000000
--- a/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll
+++ /dev/null
@@ -1,165 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-; Verify that we get the right frame escape label when the catch comes after the
-; parent function.
-
-; This test case is equivalent to:
-; int main() {
-; try {
-; throw 42;
-; } catch (int e) {
-; printf("e: %d\n", e);
-; }
-; }
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
-%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-%eh.CatchHandlerType = type { i32, i8* }
-
-$"\01??_R0H@8" = comdat any
-
-$"_CT??_R0H@84" = comdat any
-
-$_CTA1H = comdat any
-
-$_TI1H = comdat any
-
-$"\01??_C@_06PNOAJMHG@e?3?5?$CFd?6?$AA@" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@__ImageBase = external constant i8
-@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
-@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-@"\01??_C@_06PNOAJMHG@e?3?5?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [7 x i8] c"e: %d\0A\00", comdat, align 1
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-; Function Attrs: uwtable
-define i32 @main() #1 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %tmp.i = alloca i32, align 4
- %e = alloca i32, align 4
- %0 = bitcast i32* %tmp.i to i8*
- store i32 42, i32* %tmp.i, align 4, !tbaa !2
- call void (...) @llvm.localescape(i32* %e)
- invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #6
- to label %.noexc unwind label %lpad1
-
-.noexc: ; preds = %entry
- unreachable
-
-lpad1: ; preds = %entry
- %1 = landingpad { i8*, i32 }
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
- %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 0, i8* (i8*, i8*)* @main.catch)
- indirectbr i8* %recover, [label %try.cont.split]
-
-try.cont.split: ; preds = %lpad1
- ret i32 0
-}
-
-; CHECK-LABEL: main:
-; CHECK: .seh_handlerdata
-; CHECK: .long ($cppxdata$main)@IMGREL
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare i32 @printf(i8* nocapture readonly, ...) #4
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.start(i64, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare i8* @llvm.eh.actions(...) #3
-
-define internal i8* @main.catch(i8*, i8*) #5 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %e.i8 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @main to i8*), i8* %1, i32 0)
- %e = bitcast i8* %e.i8 to i32*
- %2 = bitcast i32* %e to i8*
- %3 = load i32, i32* %e, align 4, !tbaa !2
- %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @"\01??_C@_06PNOAJMHG@e?3?5?$CFd?6?$AA@", i64 0, i64 0), i32 %3)
- invoke void @llvm.donothing()
- to label %entry.split unwind label %stub
-
-entry.split: ; preds = %entry
- ret i8* blockaddress(@main, %try.cont.split)
-
-stub: ; preds = %entry
- %4 = landingpad { i8*, i32 }
- cleanup
- %recover = call i8* (...) @llvm.eh.actions()
- unreachable
-}
-
-; CHECK-LABEL: main.catch:
-; CHECK: .seh_handlerdata
-; CHECK: .long ($cppxdata$main)@IMGREL
-
-; CHECK: .align 4
-; CHECK-NEXT: $cppxdata$main:
-; CHECK-NEXT: .long 429065506
-; CHECK-NEXT: .long 2
-; CHECK-NEXT: .long ($stateUnwindMap$main)@IMGREL
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long ($tryMap$main)@IMGREL
-; CHECK-NEXT: .long 3
-; CHECK-NEXT: .long ($ip2state$main)@IMGREL
-; CHECK-NEXT: .long 40
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 1
-
-; Make sure we get the right frame escape label.
-
-; CHECK: $handlerMap$0$main:
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long "??_R0H@8"@IMGREL
-; CHECK-NEXT: .long .Lmain$frame_escape_0
-; CHECK-NEXT: .long main.catch@IMGREL
-; CHECK-NEXT: .long .Lmain.catch$parent_frame_offset
-
-; Function Attrs: nounwind readnone
-declare void @llvm.donothing() #2
-
-; Function Attrs: nounwind
-declare void @llvm.localescape(...) #3
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.localrecover(i8*, i8*, i32) #2
-
-attributes #0 = { noreturn uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="main" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-attributes #4 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #5 = { "wineh-parent"="main" }
-attributes #6 = { noreturn }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 "}
-!2 = !{!3, !3, i64 0}
-!3 = !{!"int", !4, i64 0}
-!4 = !{!"omnipotent char", !5, i64 0}
-!5 = !{!"Simple C/C++ TBAA"}
-
diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch.ll b/test/CodeGen/WinEH/cppeh-prepared-catch.ll
deleted file mode 100644
index a5d86dceea93..000000000000
--- a/test/CodeGen/WinEH/cppeh-prepared-catch.ll
+++ /dev/null
@@ -1,232 +0,0 @@
-; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X64
-; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X86
-
-; This test case is equivalent to:
-; void f() {
-; try {
-; try {
-; may_throw();
-; } catch (int &) {
-; may_throw();
-; }
-; may_throw();
-; } catch (double) {
-; }
-; }
-
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchHandlerType = type { i32, i8* }
-
-$"\01??_R0N@8" = comdat any
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0N@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".N\00" }, comdat
-@llvm.eh.handlertype.N.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0N@8" to i8*) }, section "llvm.metadata"
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@llvm.eh.handlertype.H.8 = private unnamed_addr constant %eh.CatchHandlerType { i32 8, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-
-define internal i8* @"\01?f@@YAXXZ.catch"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %.i8 = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?f@@YAXXZ" to i8*), i8* %1, i32 0)
- %bc2 = bitcast i8* %.i8 to i32**
- %bc3 = bitcast i32** %bc2 to i8*
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont2 unwind label %lpad1
-
-invoke.cont2: ; preds = %entry
- ret i8* blockaddress(@"\01?f@@YAXXZ", %try.cont)
-
-lpad1: ; preds = %entry
- %lp4 = landingpad { i8*, i32 }
- cleanup
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0
- %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1")
- indirectbr i8* %recover, [label %invoke.cont2]
-}
-
-; CHECK-LABEL: "?f@@YAXXZ.catch":
-; No code should be generated for the indirectbr.
-; CHECK-NOT: jmp{{[ql]}} *
-; X64: .seh_handlerdata
-; X64-NEXT: .long ("$cppxdata$?f@@YAXXZ")@IMGREL
-
-
-define internal i8* @"\01?f@@YAXXZ.catch1"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %.i8 = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?f@@YAXXZ" to i8*), i8* %1, i32 1)
- %2 = bitcast i8* %.i8 to double*
- %3 = bitcast double* %2 to i8*
- invoke void () @llvm.donothing()
- to label %done unwind label %lpad
-
-done:
- ret i8* blockaddress(@"\01?f@@YAXXZ", %try.cont8)
-
-lpad: ; preds = %entry
- %4 = landingpad { i8*, i32 }
- cleanup
- %recover = call i8* (...) @llvm.eh.actions()
- unreachable
-}
-
-; CHECK-LABEL: "?f@@YAXXZ.catch1":
-; No code should be generated for the indirectbr.
-; CHECK-NOT: jmp{{[ql]}} *
-; X64: ".L?f@@YAXXZ.catch1$parent_frame_offset" = 16
-; X64: movq %rdx, 16(%rsp)
-; X64: .seh_handlerdata
-; X64: .long ("$cppxdata$?f@@YAXXZ")@IMGREL
-
-define void @"\01?f@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- %0 = alloca i32*, align 8
- %1 = alloca double, align 8
- call void (...) @llvm.localescape(i32** %0, double* %1)
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont unwind label %lpad2
-
-invoke.cont: ; preds = %entry
- br label %try.cont
-
-lpad2: ; preds = %entry
- %2 = landingpad { i8*, i32 }
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.8
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0
- %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.8 to i8*), i32 0, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1")
- indirectbr i8* %recover, [label %try.cont, label %try.cont8]
-
-try.cont: ; preds = %lpad2, %invoke.cont
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %try.cont8 unwind label %lpad1
-
-lpad1:
- %3 = landingpad { i8*, i32 }
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0
- %recover2 = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1")
- indirectbr i8* %recover2, [label %try.cont8]
-
-try.cont8: ; preds = %lpad2, %try.cont
- ret void
-}
-
-; CHECK-LABEL: "?f@@YAXXZ":
-; No code should be generated for the indirectbr.
-; CHECK-NOT: jmp{{[ql]}} *
-
-; X64: .seh_handlerdata
-; X64-NEXT: .long ("$cppxdata$?f@@YAXXZ")@IMGREL
-; X86: .section .xdata,"dr"
-
-; CHECK: .align 4
-
-; X64: "$cppxdata$?f@@YAXXZ":
-; X64-NEXT: .long 429065506
-; X64-NEXT: .long 4
-; X64-NEXT: .long ("$stateUnwindMap$?f@@YAXXZ")@IMGREL
-; X64-NEXT: .long 2
-; X64-NEXT: .long ("$tryMap$?f@@YAXXZ")@IMGREL
-; X64-NEXT: .long 6
-; X64-NEXT: .long ("$ip2state$?f@@YAXXZ")@IMGREL
-; X64-NEXT: .long 32
-; X64-NEXT: .long 0
-; X64-NEXT: .long 1
-
-; X86: "L__ehtable$?f@@YAXXZ":
-; X86-NEXT: .long 429065506
-; X86-NEXT: .long 4
-; X86-NEXT: .long ("$stateUnwindMap$?f@@YAXXZ")
-; X86-NEXT: .long 2
-; X86-NEXT: .long ("$tryMap$?f@@YAXXZ")
-; X86-NEXT: .long 0
-; X86-NEXT: .long 0
-; X86-NEXT: .long 0
-; X86-NEXT: .long 1
-
-
-; CHECK-NEXT:"$stateUnwindMap$?f@@YAXXZ":
-; CHECK-NEXT: .long -1
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long -1
-; CHECK-NEXT: .long 0
-; CHECK-NEXT:"$tryMap$?f@@YAXXZ":
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long 2
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long ("$handlerMap$0$?f@@YAXXZ")
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 2
-; CHECK-NEXT: .long 3
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long ("$handlerMap$1$?f@@YAXXZ")
-; CHECK-NEXT:"$handlerMap$0$?f@@YAXXZ":
-; CHECK-NEXT: .long 8
-; CHECK-NEXT: .long "??_R0H@8"
-; CHECK-NEXT: .long "{{.?}}L?f@@YAXXZ$frame_escape_0"
-; CHECK-NEXT: .long "?f@@YAXXZ.catch"
-; X64-NEXT: .long ".L?f@@YAXXZ.catch$parent_frame_offset"
-; CHECK-NEXT:"$handlerMap$1$?f@@YAXXZ":
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long "??_R0N@8"
-; CHECK-NEXT: .long "{{.?}}L?f@@YAXXZ$frame_escape_1"
-; CHECK-NEXT: .long "?f@@YAXXZ.catch1"
-; X64-NEXT: .long ".L?f@@YAXXZ.catch1$parent_frame_offset"
-
-; X64-NEXT:"$ip2state$?f@@YAXXZ":
-; X64-NEXT: .long .Lfunc_begin0
-; X64-NEXT: .long 2
-; X64-NEXT: .long .Ltmp0
-; X64-NEXT: .long 0
-; X64-NEXT: .long .Lfunc_begin1
-; X64-NEXT: .long 3
-; X64-NEXT: .long .Lfunc_begin2
-; X64-NEXT: .long -1
-; X64-NEXT: .long .Ltmp13
-; X64-NEXT: .long 1
-; X64-NEXT: .long .Ltmp16
-; X64-NEXT: .long 0
-
-
-; X86: "___ehhandler$?f@@YAXXZ": # @"__ehhandler$?f@@YAXXZ"
-; X86: movl $"L__ehtable$?f@@YAXXZ", %eax
-; X86: jmp ___CxxFrameHandler3 # TAILCALL
-
-
-declare void @"\01?may_throw@@YAXXZ"() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-; Function Attrs: nounwind
-declare i8* @llvm.eh.actions(...) #3
-
-; Function Attrs: nounwind
-declare void @llvm.localescape(...) #3
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.localrecover(i8*, i8*, i32) #2
-
-declare void @llvm.donothing()
-
-attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?f@@YAXXZ" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-attributes #4 = { "wineh-parent"="?f@@YAXXZ" }
diff --git a/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll b/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll
deleted file mode 100644
index b5cfd65030ab..000000000000
--- a/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll
+++ /dev/null
@@ -1,245 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
-%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-%struct.S = type { i8 }
-
-$"\01??_DS@@QEAA@XZ" = comdat any
-
-$"\01??_R0H@8" = comdat any
-
-$"_CT??_R0H@84" = comdat any
-
-$_CTA1H = comdat any
-
-$_TI1H = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@__ImageBase = external constant i8
-@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
-@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-
-
-; CHECK-LABEL: "?test1@@YAXXZ":
-; CHECK: .seh_handlerdata
-; CHECK-NEXT: .long ("$cppxdata$?test1@@YAXXZ")@IMGREL
-; CHECK-NEXT: .align 4
-; CHECK-NEXT:"$cppxdata$?test1@@YAXXZ":
-; CHECK-NEXT: .long 429065506
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long ("$stateUnwindMap$?test1@@YAXXZ")@IMGREL
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 2
-; CHECK-NEXT: .long ("$ip2state$?test1@@YAXXZ")@IMGREL
-; CHECK-NEXT: .long 32
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 1
-; CHECK-NEXT:"$stateUnwindMap$?test1@@YAXXZ":
-; CHECK-NEXT: .long -1
-; CHECK-NEXT: .long "?test1@@YAXXZ.cleanup"@IMGREL
-; CHECK-NEXT:"$ip2state$?test1@@YAXXZ":
-; CHECK-NEXT: .long .Lfunc_begin0@IMGREL
-; CHECK-NEXT: .long -1
-; CHECK-NEXT: .long .Ltmp0@IMGREL
-; CHECK-NEXT: .long 0
-
-define void @"\01?test1@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %unwindhelp = alloca i64
- %tmp = alloca i32, align 4
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- store i32 0, i32* %tmp
- %0 = bitcast i32* %tmp to i8*
- call void (...) @llvm.localescape()
- store volatile i64 -2, i64* %unwindhelp
- %1 = bitcast i64* %unwindhelp to i8*
- call void @llvm.eh.unwindhelp(i8* %1)
- invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #8
- to label %unreachable unwind label %lpad1
-
-lpad1: ; preds = %entry
- %2 = landingpad { i8*, i32 }
- cleanup
- %recover = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test1@@YAXXZ.cleanup")
- indirectbr i8* %recover, []
-
-unreachable: ; preds = %entry
- unreachable
-}
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind
-define linkonce_odr void @"\01??_DS@@QEAA@XZ"(%struct.S* %this) unnamed_addr #1 comdat align 2 {
-entry:
- %this.addr = alloca %struct.S*, align 8
- store %struct.S* %this, %struct.S** %this.addr, align 8
- %this1 = load %struct.S*, %struct.S** %this.addr
- call void @"\01??1S@@QEAA@XZ"(%struct.S* %this1) #4
- ret void
-}
-
-; CHECK-LABEL: "?test2@@YAX_N@Z":
-; CHECK: .seh_handlerdata
-; CHECK-NEXT: .long ("$cppxdata$?test2@@YAX_N@Z")@IMGREL
-; CHECK-NEXT: .align 4
-; CHECK-NEXT:"$cppxdata$?test2@@YAX_N@Z":
-; CHECK-NEXT: .long 429065506
-; CHECK-NEXT: .long 2
-; CHECK-NEXT: .long ("$stateUnwindMap$?test2@@YAX_N@Z")@IMGREL
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 4
-; CHECK-NEXT: .long ("$ip2state$?test2@@YAX_N@Z")@IMGREL
-; CHECK-NEXT: .long 40
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 1
-; CHECK-NEXT:"$stateUnwindMap$?test2@@YAX_N@Z":
-; CHECK-NEXT: .long -1
-; CHECK-NEXT: .long "?test2@@YAX_N@Z.cleanup"@IMGREL
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long "?test2@@YAX_N@Z.cleanup1"@IMGREL
-; CHECK-NEXT:"$ip2state$?test2@@YAX_N@Z":
-; CHECK-NEXT: .long .Lfunc_begin1@IMGREL
-; CHECK-NEXT: .long -1
-; CHECK-NEXT: .long .Ltmp7@IMGREL
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long .Ltmp9@IMGREL
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long .Ltmp12@IMGREL
-; CHECK-NEXT: .long 0
-
-define void @"\01?test2@@YAX_N@Z"(i1 zeroext %b) #2 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
- %b.addr = alloca i8, align 1
- %s = alloca %struct.S, align 1
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- %s1 = alloca %struct.S, align 1
- %frombool = zext i1 %b to i8
- store i8 %frombool, i8* %b.addr, align 1
- call void (...) @llvm.localescape(%struct.S* %s, %struct.S* %s1)
- call void @"\01?may_throw@@YAXXZ"()
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont unwind label %lpad1
-
-invoke.cont: ; preds = %entry
- %1 = load i8, i8* %b.addr, align 1
- %tobool = trunc i8 %1 to i1
- br i1 %tobool, label %if.then, label %if.else
-
-if.then: ; preds = %invoke.cont
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont3 unwind label %lpad3
-
-invoke.cont3: ; preds = %if.then
- call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s1) #4
- br label %if.end
-
-lpad1: ; preds = %entry, %if.end
- %2 = landingpad { i8*, i32 }
- cleanup
- %recover = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup")
- indirectbr i8* %recover, []
-
-lpad3: ; preds = %if.then
- %3 = landingpad { i8*, i32 }
- cleanup
- %recover4 = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup1", i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup")
- indirectbr i8* %recover4, []
-
-if.else: ; preds = %invoke.cont
- call void @"\01?dont_throw@@YAXXZ"() #4
- br label %if.end
-
-if.end: ; preds = %if.else, %invoke.cont3
- invoke void @"\01?may_throw@@YAXXZ"()
- to label %invoke.cont4 unwind label %lpad1
-
-invoke.cont4: ; preds = %if.end
- call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s) #4
- ret void
-}
-
-declare void @"\01?may_throw@@YAXXZ"() #3
-
-; Function Attrs: nounwind
-declare void @"\01?dont_throw@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @"\01??1S@@QEAA@XZ"(%struct.S*) #1
-
-; Function Attrs: nounwind
-declare i8* @llvm.eh.actions(...) #4
-
-define internal void @"\01?test1@@YAXXZ.cleanup"(i8*, i8*) #5 {
-entry:
- %s = alloca %struct.S, align 1
- call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s) #4
- ret void
-}
-
-; Function Attrs: nounwind
-declare void @llvm.localescape(...) #4
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.localrecover(i8*, i8*, i32) #6
-
-; Function Attrs: nounwind
-declare void @llvm.eh.unwindhelp(i8*) #4
-
-define internal void @"\01?test2@@YAX_N@Z.cleanup"(i8*, i8*) #7 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %s.i8 = call i8* @llvm.localrecover(i8* bitcast (void (i1)* @"\01?test2@@YAX_N@Z" to i8*), i8* %1, i32 0)
- %s = bitcast i8* %s.i8 to %struct.S*
- call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s) #4
- invoke void @llvm.donothing()
- to label %entry.split unwind label %stub
-
-entry.split: ; preds = %entry
- ret void
-
-stub: ; preds = %entry
- %2 = landingpad { i8*, i32 }
- cleanup
- unreachable
-}
-
-define internal void @"\01?test2@@YAX_N@Z.cleanup1"(i8*, i8*) #7 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %s1.i8 = call i8* @llvm.localrecover(i8* bitcast (void (i1)* @"\01?test2@@YAX_N@Z" to i8*), i8* %1, i32 1)
- %s1 = bitcast i8* %s1.i8 to %struct.S*
- call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s1) #4
- invoke void @llvm.donothing()
- to label %entry.split unwind label %stub
-
-entry.split: ; preds = %entry
- ret void
-
-stub: ; preds = %entry
- %2 = landingpad { i8*, i32 }
- cleanup
- unreachable
-}
-
-declare void @llvm.donothing()
-
-attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?test1@@YAXXZ" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?test2@@YAX_N@Z" }
-attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { nounwind }
-attributes #5 = { "wineh-parent"="?test1@@YAXXZ" }
-attributes #6 = { nounwind readnone }
-attributes #7 = { "wineh-parent"="?test2@@YAX_N@Z" }
-attributes #8 = { noreturn }
diff --git a/test/CodeGen/WinEH/cppeh-shared-empty-catch.ll b/test/CodeGen/WinEH/cppeh-shared-empty-catch.ll
deleted file mode 100644
index 87ccc9d9dedd..000000000000
--- a/test/CodeGen/WinEH/cppeh-shared-empty-catch.ll
+++ /dev/null
@@ -1,110 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following source, built with -O2
-;
-; void f() {
-; try {
-; g();
-; try {
-; throw;
-; } catch (int) {
-; }
-; } catch (...) {
-; }
-; }
-;
-
-; ModuleID = '<stdin>'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchHandlerType = type { i32, i8* }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-
-; CHECK-LABEL: define void @"\01?f@@YAXXZ"()
-; CHECK: entry:
-; CHECK: call void (...) @llvm.localescape()
-; CHECK: invoke void @"\01?g@@YAXXZ"()
-
-; Function Attrs: nounwind
-define void @"\01?f@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- invoke void @"\01?g@@YAXXZ"()
- to label %invoke.cont unwind label %lpad
-
-; CHECK-LABEL: invoke.cont:
-; CHECK: invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null)
-; CHECK: to label %unreachable unwind label %[[LPAD1_LABEL:lpad[0-9]+]]
-
-invoke.cont: ; preds = %entry
- invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #4
- to label %unreachable unwind label %lpad1
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* null
- %1 = extractvalue { i8*, i32 } %0, 0
- br label %catch2
-
-; Note: Even though this landing pad has two catch clauses, it only has one action because both
-; handlers do the same thing.
-; CHECK: [[LPAD1_LABEL]]:
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-; CHECK-NEXT: catch i8* null
-; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch")
-; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont4]
-
-lpad1: ; preds = %invoke.cont
- %2 = landingpad { i8*, i32 }
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
- catch i8* null
- %3 = extractvalue { i8*, i32 } %2, 0
- br label %catch2
-
-catch2: ; preds = %lpad1, %lpad
- %exn.slot.0 = phi i8* [ %3, %lpad1 ], [ %1, %lpad ]
- tail call void @llvm.eh.begincatch(i8* %exn.slot.0, i8* null) #3
- tail call void @llvm.eh.endcatch() #3
- br label %try.cont4
-
-try.cont4: ; preds = %catch, %catch2
- ret void
-
-unreachable: ; preds = %invoke.cont
- unreachable
-
-; CHECK: }
-}
-
-declare void @"\01?g@@YAXXZ"() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-attributes #4 = { noreturn }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 (trunk 235112) (llvm/trunk 235121)"}
diff --git a/test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll b/test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll
deleted file mode 100644
index 092135368158..000000000000
--- a/test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll
+++ /dev/null
@@ -1,394 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; int main(void) {
-; try {
-; try {
-; throw 'a';
-; } catch (char c) {
-; printf("%c\n", c);
-; }
-; throw 1;
-; } catch(int x) {
-; printf("%d\n", x);
-; } catch(...) {
-; printf("...\n");
-; }
-; try {
-; try {
-; throw 'b';
-; } catch (char c) {
-; printf("%c\n", c);
-; }
-; throw 2;
-; } catch(int x) {
-; printf("%d\n", x);
-; } catch (char c) {
-; printf("%c\n", c);
-; } catch(...) {
-; printf("...\n");
-; }
-; return 0;
-; }
-
-; This test is just checking for failures in processing the IR.
-; Extensive handler matching is not required.
-
-; ModuleID = 'cppeh-similar-catch-blocks.cpp'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchHandlerType = type { i32, i8* }
-%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
-%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-
-$"\01??_R0H@8" = comdat any
-
-$"\01??_R0D@8" = comdat any
-
-$"_CT??_R0D@81" = comdat any
-
-$_CTA1D = comdat any
-
-$_TI1D = comdat any
-
-$"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@" = comdat any
-
-$"_CT??_R0H@84" = comdat any
-
-$_CTA1H = comdat any
-
-$_TI1H = comdat any
-
-$"\01??_C@_04MPPNMCOK@?4?4?4?6?$AA@" = comdat any
-
-$"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-@"\01??_R0D@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".D\00" }, comdat
-@llvm.eh.handlertype.D.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0D@8" to i8*) }, section "llvm.metadata"
-@__ImageBase = external constant i8
-@"_CT??_R0D@81" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0D@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 1, i32 0 }, section ".xdata", comdat
-@_CTA1D = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0D@81" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1D = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1D to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-@"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@" = linkonce_odr unnamed_addr constant [4 x i8] c"%c\0A\00", comdat, align 1
-@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
-@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-@"\01??_C@_04MPPNMCOK@?4?4?4?6?$AA@" = linkonce_odr unnamed_addr constant [5 x i8] c"...\0A\00", comdat, align 1
-@"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [4 x i8] c"%d\0A\00", comdat, align 1
-
-; This is just a minimal check to verify that main was handled by WinEHPrepare.
-; CHECK: define i32 @main()
-; CHECK: entry:
-; CHECK: call void (...) @llvm.localescape(i32* [[X_PTR:\%.+]], i32* [[X2_PTR:\%.+]], i8* [[C2_PTR:\%.+]], i8* [[C3_PTR:\%.+]], i8* [[C_PTR:\%.+]])
-; CHECK: invoke void @_CxxThrowException
-; CHECK: }
-
-; Function Attrs: uwtable
-define i32 @main() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %retval = alloca i32, align 4
- %tmp = alloca i8, align 1
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- %c = alloca i8, align 1
- %tmp3 = alloca i32, align 4
- %x = alloca i32, align 4
- %tmp20 = alloca i8, align 1
- %c28 = alloca i8, align 1
- %tmp34 = alloca i32, align 4
- %c48 = alloca i8, align 1
- %x56 = alloca i32, align 4
- store i32 0, i32* %retval
- store i8 97, i8* %tmp
- invoke void @_CxxThrowException(i8* %tmp, %eh.ThrowInfo* @_TI1D) #4
- to label %unreachable unwind label %lpad
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.D.0
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
- catch i8* null
- %1 = extractvalue { i8*, i32 } %0, 0
- store i8* %1, i8** %exn.slot
- %2 = extractvalue { i8*, i32 } %0, 1
- store i32 %2, i32* %ehselector.slot
- br label %catch.dispatch
-
-catch.dispatch: ; preds = %lpad
- %sel = load i32, i32* %ehselector.slot
- %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)) #2
- %matches = icmp eq i32 %sel, %3
- br i1 %matches, label %catch, label %catch.dispatch5
-
-catch: ; preds = %catch.dispatch
- %exn = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn, i8* %c) #2
- %4 = load i8, i8* %c, align 1
- %conv = sext i8 %4 to i32
- %call = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@", i32 0, i32 0), i32 %conv)
- to label %invoke.cont unwind label %lpad2
-
-invoke.cont: ; preds = %catch
- call void @llvm.eh.endcatch() #2
- br label %try.cont
-
-try.cont: ; preds = %invoke.cont
- store i32 1, i32* %tmp3
- %5 = bitcast i32* %tmp3 to i8*
- invoke void @_CxxThrowException(i8* %5, %eh.ThrowInfo* @_TI1H) #4
- to label %unreachable unwind label %lpad4
-
-lpad2: ; preds = %catch
- %6 = landingpad { i8*, i32 }
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
- catch i8* null
- %7 = extractvalue { i8*, i32 } %6, 0
- store i8* %7, i8** %exn.slot
- %8 = extractvalue { i8*, i32 } %6, 1
- store i32 %8, i32* %ehselector.slot
- call void @llvm.eh.endcatch() #2
- br label %catch.dispatch5
-
-lpad4: ; preds = %try.cont
- %9 = landingpad { i8*, i32 }
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
- catch i8* null
- %10 = extractvalue { i8*, i32 } %9, 0
- store i8* %10, i8** %exn.slot
- %11 = extractvalue { i8*, i32 } %9, 1
- store i32 %11, i32* %ehselector.slot
- br label %catch.dispatch5
-
-catch.dispatch5: ; preds = %lpad4, %lpad2, %catch.dispatch
- %sel6 = load i32, i32* %ehselector.slot
- %12 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #2
- %matches7 = icmp eq i32 %sel6, %12
- br i1 %matches7, label %catch13, label %catch8
-
-catch13: ; preds = %catch.dispatch5
- %exn14 = load i8*, i8** %exn.slot
- %13 = bitcast i32* %x to i8*
- call void @llvm.eh.begincatch(i8* %exn14, i8* %13) #2
- %14 = load i32, i32* %x, align 4
- %call18 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@", i32 0, i32 0), i32 %14)
- to label %invoke.cont17 unwind label %lpad16
-
-invoke.cont17: ; preds = %catch13
- call void @llvm.eh.endcatch() #2
- br label %try.cont19
-
-try.cont19: ; preds = %invoke.cont17, %invoke.cont11
- store i8 98, i8* %tmp20
- invoke void @_CxxThrowException(i8* %tmp20, %eh.ThrowInfo* @_TI1D) #4
- to label %unreachable unwind label %lpad21
-
-catch8: ; preds = %catch.dispatch5
- %exn9 = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn9, i8* null) #2
- %call12 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @"\01??_C@_04MPPNMCOK@?4?4?4?6?$AA@", i32 0, i32 0))
- to label %invoke.cont11 unwind label %lpad10
-
-invoke.cont11: ; preds = %catch8
- call void @llvm.eh.endcatch() #2
- br label %try.cont19
-
-lpad10: ; preds = %catch8
- %15 = landingpad { i8*, i32 }
- cleanup
- %16 = extractvalue { i8*, i32 } %15, 0
- store i8* %16, i8** %exn.slot
- %17 = extractvalue { i8*, i32 } %15, 1
- store i32 %17, i32* %ehselector.slot
- call void @llvm.eh.endcatch() #2
- br label %eh.resume
-
-lpad16: ; preds = %catch13
- %18 = landingpad { i8*, i32 }
- cleanup
- %19 = extractvalue { i8*, i32 } %18, 0
- store i8* %19, i8** %exn.slot
- %20 = extractvalue { i8*, i32 } %18, 1
- store i32 %20, i32* %ehselector.slot
- call void @llvm.eh.endcatch() #2
- br label %eh.resume
-
-lpad21: ; preds = %try.cont19
- %21 = landingpad { i8*, i32 }
- catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)
- catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
- catch i8* null
- %22 = extractvalue { i8*, i32 } %21, 0
- store i8* %22, i8** %exn.slot
- %23 = extractvalue { i8*, i32 } %21, 1
- store i32 %23, i32* %ehselector.slot
- br label %catch.dispatch22
-
-catch.dispatch22: ; preds = %lpad21
- %sel23 = load i32, i32* %ehselector.slot
- %24 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)) #2
- %matches24 = icmp eq i32 %sel23, %24
- br i1 %matches24, label %catch25, label %catch.dispatch36
-
-catch25: ; preds = %catch.dispatch22
- %exn26 = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn26, i8* %c28) #2
- %25 = load i8, i8* %c28, align 1
- %conv29 = sext i8 %25 to i32
- %call32 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@", i32 0, i32 0), i32 %conv29)
- to label %invoke.cont31 unwind label %lpad30
-
-invoke.cont31: ; preds = %catch25
- call void @llvm.eh.endcatch() #2
- br label %try.cont33
-
-try.cont33: ; preds = %invoke.cont31
- store i32 2, i32* %tmp34
- %26 = bitcast i32* %tmp34 to i8*
- invoke void @_CxxThrowException(i8* %26, %eh.ThrowInfo* @_TI1H) #4
- to label %unreachable unwind label %lpad35
-
-lpad30: ; preds = %catch25
- %27 = landingpad { i8*, i32 }
- catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
- catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)
- catch i8* null
- %28 = extractvalue { i8*, i32 } %27, 0
- store i8* %28, i8** %exn.slot
- %29 = extractvalue { i8*, i32 } %27, 1
- store i32 %29, i32* %ehselector.slot
- call void @llvm.eh.endcatch() #2
- br label %catch.dispatch36
-
-lpad35: ; preds = %try.cont33
- %30 = landingpad { i8*, i32 }
- catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
- catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)
- catch i8* null
- %31 = extractvalue { i8*, i32 } %30, 0
- store i8* %31, i8** %exn.slot
- %32 = extractvalue { i8*, i32 } %30, 1
- store i32 %32, i32* %ehselector.slot
- br label %catch.dispatch36
-
-catch.dispatch36: ; preds = %lpad35, %lpad30, %catch.dispatch22
- %sel37 = load i32, i32* %ehselector.slot
- %33 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #2
- %matches38 = icmp eq i32 %sel37, %33
- br i1 %matches38, label %catch53, label %catch.fallthrough
-
-catch53: ; preds = %catch.dispatch36
- %exn54 = load i8*, i8** %exn.slot
- %34 = bitcast i32* %x56 to i8*
- call void @llvm.eh.begincatch(i8* %exn54, i8* %34) #2
- %35 = load i32, i32* %x56, align 4
- %call59 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@", i32 0, i32 0), i32 %35)
- to label %invoke.cont58 unwind label %lpad57
-
-invoke.cont58: ; preds = %catch53
- call void @llvm.eh.endcatch() #2
- br label %try.cont60
-
-try.cont60: ; preds = %invoke.cont58, %invoke.cont51, %invoke.cont43
- ret i32 0
-
-catch.fallthrough: ; preds = %catch.dispatch36
- %36 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)) #2
- %matches39 = icmp eq i32 %sel37, %36
- br i1 %matches39, label %catch45, label %catch40
-
-catch45: ; preds = %catch.fallthrough
- %exn46 = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn46, i8* %c48) #2
- %37 = load i8, i8* %c48, align 1
- %conv49 = sext i8 %37 to i32
- %call52 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@", i32 0, i32 0), i32 %conv49)
- to label %invoke.cont51 unwind label %lpad50
-
-invoke.cont51: ; preds = %catch45
- call void @llvm.eh.endcatch() #2
- br label %try.cont60
-
-catch40: ; preds = %catch.fallthrough
- %exn41 = load i8*, i8** %exn.slot
- call void @llvm.eh.begincatch(i8* %exn41, i8* null) #2
- %call44 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @"\01??_C@_04MPPNMCOK@?4?4?4?6?$AA@", i32 0, i32 0))
- to label %invoke.cont43 unwind label %lpad42
-
-invoke.cont43: ; preds = %catch40
- call void @llvm.eh.endcatch() #2
- br label %try.cont60
-
-lpad42: ; preds = %catch40
- %38 = landingpad { i8*, i32 }
- cleanup
- %39 = extractvalue { i8*, i32 } %38, 0
- store i8* %39, i8** %exn.slot
- %40 = extractvalue { i8*, i32 } %38, 1
- store i32 %40, i32* %ehselector.slot
- call void @llvm.eh.endcatch() #2
- br label %eh.resume
-
-lpad50: ; preds = %catch45
- %41 = landingpad { i8*, i32 }
- cleanup
- %42 = extractvalue { i8*, i32 } %41, 0
- store i8* %42, i8** %exn.slot
- %43 = extractvalue { i8*, i32 } %41, 1
- store i32 %43, i32* %ehselector.slot
- call void @llvm.eh.endcatch() #2
- br label %eh.resume
-
-lpad57: ; preds = %catch53
- %44 = landingpad { i8*, i32 }
- cleanup
- %45 = extractvalue { i8*, i32 } %44, 0
- store i8* %45, i8** %exn.slot
- %46 = extractvalue { i8*, i32 } %44, 1
- store i32 %46, i32* %ehselector.slot
- call void @llvm.eh.endcatch() #2
- br label %eh.resume
-
-eh.resume: ; preds = %lpad57, %lpad50, %lpad42, %lpad16, %lpad10
- %exn61 = load i8*, i8** %exn.slot
- %sel62 = load i32, i32* %ehselector.slot
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn61, 0
- %lpad.val63 = insertvalue { i8*, i32 } %lpad.val, i32 %sel62, 1
- resume { i8*, i32 } %lpad.val63
-
-unreachable: ; preds = %try.cont33, %try.cont19, %try.cont, %entry
- unreachable
-}
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #2
-
-declare i32 @printf(i8*, ...) #3
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #2
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
-attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { noreturn }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 235214) (llvm/trunk 235213)"}
diff --git a/test/CodeGen/WinEH/cppeh-state-calc-1.ll b/test/CodeGen/WinEH/cppeh-state-calc-1.ll
deleted file mode 100644
index abc5d5292cf7..000000000000
--- a/test/CodeGen/WinEH/cppeh-state-calc-1.ll
+++ /dev/null
@@ -1,289 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-; This test was generated from the following code.
-;
-; void test() {
-; try {
-; try {
-; try {
-; two();
-; throw 2;
-; } catch (int x) {
-; catch_two();
-; }
-; a();
-; throw 'a';
-; } catch (char c) {
-; catch_a();
-; }
-; one();
-; throw 1;
-; } catch(int x) {
-; catch_one();
-; } catch(...) {
-; catch_all();
-; }
-; }
-;
-; The function calls before the throws were declared as 'noexcept' and are
-; just here to make blocks easier to identify in the IR.
-
-; ModuleID = '<stdin>'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchHandlerType = type { i32, i8* }
-%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
-%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-
-$"\01??_R0H@8" = comdat any
-
-$"\01??_R0D@8" = comdat any
-
-$"_CT??_R0H@84" = comdat any
-
-$_CTA1H = comdat any
-
-$_TI1H = comdat any
-
-$"_CT??_R0D@81" = comdat any
-
-$_CTA1D = comdat any
-
-$_TI1D = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-@"\01??_R0D@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".D\00" }, comdat
-@llvm.eh.handlertype.D.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0D@8" to i8*) }, section "llvm.metadata"
-@__ImageBase = external constant i8
-@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
-@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-@"_CT??_R0D@81" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0D@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 1, i32 0 }, section ".xdata", comdat
-@_CTA1D = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0D@81" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1D = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1D to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-
-; Function Attrs: nounwind uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %tmp = alloca i32, align 4
- %x = alloca i32, align 4
- %tmp2 = alloca i8, align 1
- %c = alloca i8, align 1
- %tmp11 = alloca i32, align 4
- %x21 = alloca i32, align 4
- call void @"\01?two@@YAXXZ"() #3
- store i32 2, i32* %tmp
- %0 = bitcast i32* %tmp to i8*
- call void (...) @llvm.localescape(i32* %x, i8* %c, i32* %x21)
- invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #5
- to label %unreachable unwind label %lpad
-
-lpad: ; preds = %entry
- %1 = landingpad { i8*, i32 }
- catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.D.0
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
- catch i8* null
- %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch2", i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch3")
- indirectbr i8* %recover, [label %try.cont, label %try.cont10, label %try.cont22]
-
-try.cont: ; preds = %lpad
- call void @"\01?a@@YAXXZ"() #3
- store i8 97, i8* %tmp2
- invoke void @_CxxThrowException(i8* %tmp2, %eh.ThrowInfo* @_TI1D) #5
- to label %unreachable unwind label %lpad3
-
-lpad3: ; preds = %try.cont
- %2 = landingpad { i8*, i32 }
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.D.0
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
- catch i8* null
- %recover1 = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch2", i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch3")
- indirectbr i8* %recover1, [label %try.cont10, label %try.cont22]
-
-try.cont10: ; preds = %lpad3, %lpad
- call void @"\01?one@@YAXXZ"() #3
- store i32 1, i32* %tmp11
- %3 = bitcast i32* %tmp11 to i8*
- invoke void @_CxxThrowException(i8* %3, %eh.ThrowInfo* @_TI1H) #5
- to label %unreachable unwind label %lpad12
-
-lpad12: ; preds = %try.cont10
- %4 = landingpad { i8*, i32 }
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
- catch i8* null
- %recover2 = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch2", i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch3")
- indirectbr i8* %recover2, [label %try.cont22]
-
-try.cont22: ; preds = %lpad12, %lpad3, %lpad
- ret void
-
-unreachable: ; preds = %try.cont10, %try.cont, %entry
- unreachable
-}
-
-; Function Attrs: nounwind
-declare void @"\01?two@@YAXXZ"() #1
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare void @"\01?catch_two@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-; Function Attrs: nounwind
-declare void @"\01?a@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @"\01?catch_a@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @"\01?one@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @"\01?catch_all@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @"\01?catch_one@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare i8* @llvm.eh.actions(...) #3
-
-define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %x.i8 = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
- %x = bitcast i8* %x.i8 to i32*
- %2 = bitcast i32* %x to i8*
- call void @"\01?catch_two@@YAXXZ"() #3
- invoke void @llvm.donothing()
- to label %entry.split unwind label %stub
-
-entry.split: ; preds = %entry
- ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
-
-stub: ; preds = %entry
- %3 = landingpad { i8*, i32 }
- cleanup
- %recover = call i8* (...) @llvm.eh.actions()
- unreachable
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.donothing() #2
-
-define internal i8* @"\01?test@@YAXXZ.catch1"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- call void @"\01?catch_a@@YAXXZ"() #3
- invoke void @llvm.donothing()
- to label %entry.split unwind label %stub
-
-entry.split: ; preds = %entry
- ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont10)
-
-stub: ; preds = %entry
- %2 = landingpad { i8*, i32 }
- cleanup
- %recover = call i8* (...) @llvm.eh.actions()
- unreachable
-}
-
-define internal i8* @"\01?test@@YAXXZ.catch2"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- %x21.i8 = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
- %x21 = bitcast i8* %x21.i8 to i32*
- %2 = bitcast i32* %x21 to i8*
- call void @"\01?catch_one@@YAXXZ"() #3
- invoke void @llvm.donothing()
- to label %entry.split unwind label %stub
-
-entry.split: ; preds = %entry
- ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont22)
-
-stub: ; preds = %entry
- %3 = landingpad { i8*, i32 }
- cleanup
- %recover = call i8* (...) @llvm.eh.actions()
- unreachable
-}
-
-define internal i8* @"\01?test@@YAXXZ.catch3"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
- call void @"\01?catch_all@@YAXXZ"() #3
- invoke void @llvm.donothing()
- to label %entry.split unwind label %stub
-
-entry.split: ; preds = %entry
- ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont22)
-
-stub: ; preds = %entry
- %2 = landingpad { i8*, i32 }
- cleanup
- %recover = call i8* (...) @llvm.eh.actions()
- unreachable
-}
-
-; Function Attrs: nounwind
-declare void @llvm.localescape(...) #3
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.localrecover(i8*, i8*, i32) #2
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?test@@YAXXZ" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-attributes #4 = { "wineh-parent"="?test@@YAXXZ" }
-attributes #5 = { noreturn }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 236059)"}
-
-; CHECK-LABEL: "$cppxdata$?test@@YAXXZ":
-; CHECK-NEXT: .long 429065506
-; CHECK-NEXT: .long
-; CHECK-NEXT: .long ("$stateUnwindMap$?test@@YAXXZ")@IMGREL
-; CHECK-NEXT: .long
-; CHECK-NEXT: .long ("$tryMap$?test@@YAXXZ")@IMGREL
-; CHECK-NEXT: .long
-; CHECK-NEXT: .long ("$ip2state$?test@@YAXXZ")@IMGREL
-; CHECK-NEXT: .long 40
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 1
-; CHECK: "$stateUnwindMap$?test@@YAXXZ":
-; CHECK: "$tryMap$?test@@YAXXZ":
-; CHECK: "$handlerMap$0$?test@@YAXXZ":
-; CHECK: "$ip2state$?test@@YAXXZ":
-; CHECK-NEXT: .long .Lfunc_begin0@IMGREL
-; CHECK-NEXT: .long -1
-; CHECK-NEXT: .long .Ltmp0@IMGREL
-; CHECK-NEXT: .long 2
-; CHECK-NEXT: .long .Ltmp3@IMGREL
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long .Ltmp6@IMGREL
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long .Lfunc_begin1@IMGREL
-; CHECK-NEXT: .long 3
-; CHECK-NEXT: .long .Lfunc_begin2@IMGREL
-; CHECK-NEXT: .long 4
-; CHECK-NEXT: .long .Lfunc_begin3@IMGREL
-; CHECK-NEXT: .long 5
-; CHECK-NEXT: .long .Lfunc_begin4@IMGREL
-; CHECK-NEXT: .long 6
diff --git a/test/CodeGen/WinEH/seh-catch-all.ll b/test/CodeGen/WinEH/seh-catch-all.ll
deleted file mode 100644
index 5ac2295a5b41..000000000000
--- a/test/CodeGen/WinEH/seh-catch-all.ll
+++ /dev/null
@@ -1,59 +0,0 @@
-; RUN: opt -S -winehprepare < %s | FileCheck %s
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-@str.__except = internal unnamed_addr constant [9 x i8] c"__except\00", align 1
-
-; Function Attrs: uwtable
-
-declare i32 @puts(i8*)
-
-define void @may_crash() {
-entry:
- store volatile i32 42, i32* null, align 4
- ret void
-}
-
-declare i32 @__C_specific_handler(...)
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.frameaddress(i32)
-
-; Function Attrs: uwtable
-define void @seh_catch_all() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- invoke void @may_crash()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- br label %__try.cont
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* null
- %1 = extractvalue { i8*, i32 } %0, 0
- store i8* %1, i8** %exn.slot
- %2 = extractvalue { i8*, i32 } %0, 1
- store i32 %2, i32* %ehselector.slot
- br label %__except
-
-__except: ; preds = %lpad
- %call = call i32 @puts(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @str.__except, i32 0, i32 0))
- br label %__try.cont
-
-__try.cont: ; preds = %__except, %invoke.cont
- ret void
-}
-
-; CHECK-LABEL: define void @seh_catch_all()
-; CHECK: landingpad
-; CHECK-NEXT: catch i8* null
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* blockaddress(@seh_catch_all, %lpad.split))
-; CHECK-NEXT: indirectbr
-;
-; CHECK: lpad.split:
-; CHECK-NOT: extractvalue
-; CHECK: call i32 @puts
diff --git a/test/CodeGen/WinEH/seh-exception-code.ll b/test/CodeGen/WinEH/seh-exception-code.ll
deleted file mode 100644
index 2998e7982133..000000000000
--- a/test/CodeGen/WinEH/seh-exception-code.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; RUN: opt -winehprepare -S < %s | FileCheck %s
-
-; WinEHPrepare was crashing during phi demotion.
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc18.0.0"
-
-declare i32 @__C_specific_handler(...)
-
-@str = linkonce_odr unnamed_addr constant [16 x i8] c"caught it! %lx\0A\00", align 1
-
-; Function Attrs: nounwind uwtable
-declare void @maycrash()
-
-; Function Attrs: nounwind
-declare i32 @printf(i8* nocapture readonly, ...)
-
-; Function Attrs: nounwind uwtable
-define void @doit() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
- invoke void @maycrash()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- invoke void @maycrash()
- to label %__try.cont unwind label %lpad.1
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* null
- %1 = extractvalue { i8*, i32 } %0, 0
- br label %__except
-
-lpad.1: ; preds = %invoke.cont, %lpad
- %2 = landingpad { i8*, i32 }
- catch i8* null
- %3 = extractvalue { i8*, i32 } %2, 0
- br label %__except
-
-__except: ; preds = %lpad, %lpad.1
- %exn.slot.0 = phi i8* [ %3, %lpad.1 ], [ %1, %lpad ]
- %4 = ptrtoint i8* %exn.slot.0 to i64
- %5 = trunc i64 %4 to i32
- %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str, i64 0, i64 0), i32 %5)
- br label %__try.cont
-
-__try.cont: ; preds = %invoke.cont, %__except
- ret void
-}
-
-; CHECK-LABEL: define void @doit()
-; CHECK: landingpad
-; CHECK: indirectbr i8* %{{[^,]*}}, [label %[[except_split1:.*]]]
-; CHECK: [[except_split1]]:
-; CHECK: call i32 @llvm.eh.exceptioncode()
-; CHECK: br label %__except
-;
-; CHECK: landingpad
-; CHECK: indirectbr i8* %{{[^,]*}}, [label %[[except_split2:.*]]]
-; CHECK: [[except_split2]]:
-; CHECK: call i32 @llvm.eh.exceptioncode()
-; CHECK: br label %__except
-;
-; CHECK: __except:
-; CHECK: phi
-; CHECK: call i32 (i8*, ...) @printf
diff --git a/test/CodeGen/WinEH/seh-exception-code2.ll b/test/CodeGen/WinEH/seh-exception-code2.ll
deleted file mode 100644
index 0356956502c0..000000000000
--- a/test/CodeGen/WinEH/seh-exception-code2.ll
+++ /dev/null
@@ -1,91 +0,0 @@
-; RUN: opt -winehprepare -S < %s | FileCheck %s
-
-; WinEHPrepare was crashing during phi demotion.
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc18.0.0"
-
-declare i32 @__C_specific_handler(...)
-
-@str = linkonce_odr unnamed_addr constant [16 x i8] c"caught it! %lx\0A\00", align 1
-
-declare void @maycrash()
-declare void @finally(i1 %abnormal)
-declare i32 @printf(i8* nocapture readonly, ...)
-declare i32 @llvm.eh.typeid.for(i8*)
-
-; Function Attrs: nounwind uwtable
-define void @doit() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
- invoke void @maycrash()
- to label %invoke.cont unwind label %lpad.1
-
-invoke.cont: ; preds = %entry
- invoke void @maycrash()
- to label %__try.cont unwind label %lpad
-
-lpad: ; preds = %entry
- %lp0 = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@doit@@" to i8*)
- %ehptr.0 = extractvalue { i8*, i32 } %lp0, 0
- %ehsel.0 = extractvalue { i8*, i32 } %lp0, 1
- call void @finally(i1 true)
- br label %ehdispatch
-
-lpad.1: ; preds = %invoke.cont, %lpad
- %lp1 = landingpad { i8*, i32 }
- catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@doit@@" to i8*)
- %ehptr.1 = extractvalue { i8*, i32 } %lp1, 0
- %ehsel.1 = extractvalue { i8*, i32 } %lp1, 1
- br label %ehdispatch
-
-ehdispatch:
- %ehptr.2 = phi i8* [ %ehptr.0, %lpad ], [ %ehptr.1, %lpad.1 ]
- %ehsel.2 = phi i32 [ %ehsel.0, %lpad ], [ %ehsel.1, %lpad.1 ]
- %mysel = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@doit@@" to i8*))
- %matches = icmp eq i32 %ehsel.2, %mysel
- br i1 %matches, label %__except, label %eh.resume
-
-__except: ; preds = %lpad, %lpad.1
- %t4 = ptrtoint i8* %ehptr.2 to i64
- %t5 = trunc i64 %t4 to i32
- %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str, i64 0, i64 0), i32 %t5)
- br label %__try.cont
-
-__try.cont: ; preds = %invoke.cont, %__except
- call void @finally(i1 false)
- ret void
-
-eh.resume:
- %ehvals0 = insertvalue { i8*, i32 } undef, i8* %ehptr.2, 0
- %ehvals = insertvalue { i8*, i32 } %ehvals0, i32 %ehsel.2, 1
- resume { i8*, i32 } %ehvals
-}
-
-define internal i32 @"\01?filt$0@0@doit@@"(i8* %exception_pointers, i8* %frame_pointer) #1 {
-entry:
- %0 = bitcast i8* %exception_pointers to { i32*, i8* }*
- %1 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %0, i32 0, i32 0
- %2 = load i32*, i32** %1
- %3 = load i32, i32* %2
- %cmp = icmp eq i32 %3, -1073741819
- %4 = zext i1 %cmp to i32
- ret i32 %4
-}
-
-; CHECK-LABEL: define void @doit()
-; CHECK: %lp0 = landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: catch i8*
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions({{.*}})
-; CHECK-NEXT: indirectbr i8* %{{[^,]*}}, [label %__except]
-;
-; CHECK: %lp1 = landingpad { i8*, i32 }
-; CHECK-NEXT: catch i8*
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions({{.*}})
-; CHECK-NEXT: indirectbr i8* %{{[^,]*}}, [label %__except]
-;
-; CHECK: __except:
-; CHECK: call i32 @llvm.eh.exceptioncode()
-; CHECK: call i32 (i8*, ...) @printf
diff --git a/test/CodeGen/WinEH/seh-inlined-finally.ll b/test/CodeGen/WinEH/seh-inlined-finally.ll
deleted file mode 100644
index 157adf0c8183..000000000000
--- a/test/CodeGen/WinEH/seh-inlined-finally.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: opt -S -winehprepare < %s | FileCheck %s
-
-; Check that things work when the mid-level optimizer inlines the finally
-; block.
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%struct._RTL_CRITICAL_SECTION = type { %struct._RTL_CRITICAL_SECTION_DEBUG*, i32, i32, i8*, i8*, i64 }
-%struct._RTL_CRITICAL_SECTION_DEBUG = type { i16, i16, %struct._RTL_CRITICAL_SECTION*, %struct._LIST_ENTRY, i32, i32, i32, i16, i16 }
-%struct._LIST_ENTRY = type { %struct._LIST_ENTRY*, %struct._LIST_ENTRY* }
-
-declare i32 @puts(i8*)
-declare void @may_crash()
-declare i32 @__C_specific_handler(...)
-declare i8* @llvm.localrecover(i8*, i8*, i32) #1
-declare i8* @llvm.localaddress()
-declare void @llvm.localescape(...)
-declare dllimport void @EnterCriticalSection(%struct._RTL_CRITICAL_SECTION*)
-declare dllimport void @LeaveCriticalSection(%struct._RTL_CRITICAL_SECTION*)
-
-define void @use_finally() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
- invoke void @may_crash()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- %call.i = tail call i32 @puts(i8* null)
- ret void
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- cleanup
- %call.i2 = tail call i32 @puts(i8* null)
- resume { i8*, i32 } %0
-}
-
-; CHECK-LABEL: define void @use_finally()
-; CHECK: invoke void @may_crash()
-;
-; CHECK: landingpad
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @use_finally.cleanup)
-; CHECK-NEXT: indirectbr i8* %recover, []
-
-; Function Attrs: nounwind uwtable
-define i32 @call_may_crash_locked() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
- %p = alloca %struct._RTL_CRITICAL_SECTION, align 8
- call void (...) @llvm.localescape(%struct._RTL_CRITICAL_SECTION* %p)
- call void @EnterCriticalSection(%struct._RTL_CRITICAL_SECTION* %p)
- invoke void @may_crash()
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- %tmp2 = call i8* @llvm.localaddress()
- %tmp3 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @call_may_crash_locked to i8*), i8* %tmp2, i32 0) #2
- %tmp6 = bitcast i8* %tmp3 to %struct._RTL_CRITICAL_SECTION*
- call void @LeaveCriticalSection(%struct._RTL_CRITICAL_SECTION* %tmp6)
- ret i32 42
-
-lpad: ; preds = %entry
- %tmp7 = landingpad { i8*, i32 }
- cleanup
- %tmp8 = call i8* @llvm.localaddress()
- %tmp9 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @call_may_crash_locked to i8*), i8* %tmp8, i32 0)
- %tmp12 = bitcast i8* %tmp9 to %struct._RTL_CRITICAL_SECTION*
- call void @LeaveCriticalSection(%struct._RTL_CRITICAL_SECTION* %tmp12)
- resume { i8*, i32 } %tmp7
-}
-
-; CHECK-LABEL: define i32 @call_may_crash_locked()
-; CHECK: invoke void @may_crash()
-;
-; CHECK: landingpad
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @call_may_crash_locked.cleanup)
-; CHECK-NEXT: indirectbr i8* %recover, []
-
-; CHECK-LABEL: define internal void @call_may_crash_locked.cleanup(i8*, i8*)
-; CHECK: %tmp9 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @call_may_crash_locked to i8*), i8* %1, i32 0)
-; CHECK: %tmp12 = bitcast i8* %tmp9 to %struct._RTL_CRITICAL_SECTION*
-; CHECK: call void @LeaveCriticalSection(%struct._RTL_CRITICAL_SECTION* %tmp12)
diff --git a/test/CodeGen/WinEH/seh-outlined-finally-win32.ll b/test/CodeGen/WinEH/seh-outlined-finally-win32.ll
deleted file mode 100644
index 3649433c4b61..000000000000
--- a/test/CodeGen/WinEH/seh-outlined-finally-win32.ll
+++ /dev/null
@@ -1,172 +0,0 @@
-; RUN: opt -S -winehprepare < %s | FileCheck %s
-
-; Test case based on this code:
-;
-; extern "C" int _abnormal_termination();
-; #pragma intrinsic(_abnormal_termination)
-; extern "C" int printf(const char *, ...);
-; extern "C" void may_crash() {
-; *(volatile int *)0 = 42;
-; }
-; int main() {
-; int myres = 0;
-; __try {
-; __try {
-; may_crash();
-; } __finally {
-; printf("inner finally %d\n", _abnormal_termination());
-; may_crash();
-; }
-; } __finally {
-; printf("outer finally %d\n", _abnormal_termination());
-; }
-; }
-;
-; Note that if the inner finally crashes, the outer finally still runs. There
-; is nothing like a std::terminate call in this situation.
-
-target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "i686-pc-windows-msvc"
-
-$"\01??_C@_0BC@LHHILCPN@outer?5finally?5?$CFd?6?$AA@" = comdat any
-
-$"\01??_C@_0BC@JELAHKN@inner?5finally?5?$CFd?6?$AA@" = comdat any
-
-@"\01??_C@_0BC@LHHILCPN@outer?5finally?5?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [18 x i8] c"outer finally %d\0A\00", comdat, align 1
-@"\01??_C@_0BC@JELAHKN@inner?5finally?5?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [18 x i8] c"inner finally %d\0A\00", comdat, align 1
-
-; Function Attrs: nounwind
-define void @may_crash() #0 {
-entry:
- store volatile i32 42, i32* null, align 4
- ret void
-}
-
-; Function Attrs: nounwind
-define i32 @main() #0 personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
-entry:
- %myres = alloca i32, align 4
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- store i32 0, i32* %myres, align 4
- invoke void @may_crash() #4
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- %0 = call i8* @llvm.frameaddress(i32 0)
- invoke void @"\01?fin$1@0@main@@"(i8 zeroext 0, i8* %0) #4
- to label %invoke.cont.2 unwind label %lpad.1
-
-invoke.cont.2: ; preds = %invoke.cont
- %1 = call i8* @llvm.frameaddress(i32 0)
- call void @"\01?fin$0@0@main@@"(i8 zeroext 0, i8* %1)
- ret i32 0
-
-lpad: ; preds = %entry
- %2 = landingpad { i8*, i32 }
- cleanup
- %3 = extractvalue { i8*, i32 } %2, 0
- store i8* %3, i8** %exn.slot
- %4 = extractvalue { i8*, i32 } %2, 1
- store i32 %4, i32* %ehselector.slot
- %5 = call i8* @llvm.frameaddress(i32 0)
- invoke void @"\01?fin$1@0@main@@"(i8 zeroext 1, i8* %5) #4
- to label %invoke.cont.3 unwind label %lpad.1
-
-lpad.1: ; preds = %lpad, %invoke.cont
- %6 = landingpad { i8*, i32 }
- cleanup
- %7 = extractvalue { i8*, i32 } %6, 0
- store i8* %7, i8** %exn.slot
- %8 = extractvalue { i8*, i32 } %6, 1
- store i32 %8, i32* %ehselector.slot
- br label %ehcleanup
-
-invoke.cont.3: ; preds = %lpad
- br label %ehcleanup
-
-ehcleanup: ; preds = %invoke.cont.3, %lpad.1
- %9 = call i8* @llvm.frameaddress(i32 0)
- call void @"\01?fin$0@0@main@@"(i8 zeroext 1, i8* %9)
- br label %eh.resume
-
-eh.resume: ; preds = %ehcleanup
- %exn = load i8*, i8** %exn.slot
- %sel = load i32, i32* %ehselector.slot
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
- %lpad.val.4 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
- resume { i8*, i32 } %lpad.val.4
-}
-
-; CHECK-LABEL: define i32 @main()
-; CHECK: invoke void @may_crash()
-;
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void ()* @main.cleanup)
-; CHECK-NEXT: indirectbr
-;
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void ()* @main.cleanup.1)
-; CHECK-NEXT: indirectbr
-
-; CHECK-LABEL: define internal void @main.cleanup()
-; CHECK: call i8* @llvm.frameaddress(i32 1)
-; CHECK: call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %{{.*}})
-; CHECK: call void @"\01?fin$1@0@main@@"(i8 zeroext 1, i8* %{{.*}})
-; CHECK: call void @"\01?fin$0@0@main@@"(i8 zeroext 1, i8* %{{.*}})
-
-; CHECK-LABEL: define internal void @main.cleanup.1()
-; CHECK: call i8* @llvm.frameaddress(i32 1)
-; CHECK: call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %{{.*}})
-; CHECK: call void @"\01?fin$0@0@main@@"(i8 zeroext 1, i8* %{{.*}})
-
-; Function Attrs: noinline nounwind
-define internal void @"\01?fin$0@0@main@@"(i8 zeroext %abnormal_termination, i8* %frame_pointer) #1 {
-entry:
- %frame_pointer.addr = alloca i8*, align 4
- %abnormal_termination.addr = alloca i8, align 1
- %0 = call i8* @llvm.frameaddress(i32 1)
- %1 = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %0)
- store i8* %frame_pointer, i8** %frame_pointer.addr, align 4
- store i8 %abnormal_termination, i8* %abnormal_termination.addr, align 1
- %2 = zext i8 %abnormal_termination to i32
- %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @"\01??_C@_0BC@LHHILCPN@outer?5finally?5?$CFd?6?$AA@", i32 0, i32 0), i32 %2)
- ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.frameaddress(i32) #2
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.x86.seh.recoverfp(i8*, i8*) #2
-
-declare i32 @printf(i8*, ...) #3
-
-; Function Attrs: noinline nounwind
-define internal void @"\01?fin$1@0@main@@"(i8 zeroext %abnormal_termination, i8* %frame_pointer) #1 {
-entry:
- %frame_pointer.addr = alloca i8*, align 4
- %abnormal_termination.addr = alloca i8, align 1
- %0 = call i8* @llvm.frameaddress(i32 1)
- %1 = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %0)
- store i8* %frame_pointer, i8** %frame_pointer.addr, align 4
- store i8 %abnormal_termination, i8* %abnormal_termination.addr, align 1
- %2 = zext i8 %abnormal_termination to i32
- %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @"\01??_C@_0BC@JELAHKN@inner?5finally?5?$CFd?6?$AA@", i32 0, i32 0), i32 %2)
- call void @may_crash()
- ret void
-}
-
-declare i32 @_except_handler3(...)
-
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { noinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { noinline }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 "}
diff --git a/test/CodeGen/WinEH/seh-outlined-finally.ll b/test/CodeGen/WinEH/seh-outlined-finally.ll
deleted file mode 100644
index 529f85b9602b..000000000000
--- a/test/CodeGen/WinEH/seh-outlined-finally.ll
+++ /dev/null
@@ -1,155 +0,0 @@
-; RUN: opt -S -winehprepare -mtriple=x86_64-windows-msvc < %s | FileCheck %s
-
-; Test case based on this code:
-;
-; extern "C" int _abnormal_termination();
-; #pragma intrinsic(_abnormal_termination)
-; extern "C" int printf(const char *, ...);
-; extern "C" void may_crash() {
-; *(volatile int *)0 = 42;
-; }
-; int main() {
-; int myres = 0;
-; __try {
-; __try {
-; may_crash();
-; } __finally {
-; printf("inner finally %d\n", _abnormal_termination());
-; may_crash();
-; }
-; } __finally {
-; printf("outer finally %d\n", _abnormal_termination());
-; }
-; }
-;
-; Note that if the inner finally crashes, the outer finally still runs. There
-; is nothing like a std::terminate call in this situation.
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-@str_outer_finally = linkonce_odr unnamed_addr constant [18 x i8] c"outer finally %d\0A\00", align 1
-@str_inner_finally = linkonce_odr unnamed_addr constant [18 x i8] c"inner finally %d\0A\00", align 1
-
-; Function Attrs: nounwind uwtable
-define void @may_crash() #0 {
-entry:
- store volatile i32 42, i32* null, align 4
- ret void
-}
-
-; Function Attrs: uwtable
-define i32 @main() #1 personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
- %myres = alloca i32, align 4
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- store i32 0, i32* %myres, align 4
- invoke void @may_crash() #4
- to label %invoke.cont unwind label %lpad
-
-invoke.cont: ; preds = %entry
- %0 = call i8* @llvm.localaddress()
- invoke void @"\01?fin$1@0@main@@"(i1 zeroext false, i8* %0) #4
- to label %invoke.cont2 unwind label %lpad1
-
-invoke.cont2: ; preds = %invoke.cont
- %1 = call i8* @llvm.localaddress()
- call void @"\01?fin$0@0@main@@"(i1 zeroext false, i8* %1)
- ret i32 0
-
-lpad: ; preds = %entry
- %2 = landingpad { i8*, i32 }
- cleanup
- %3 = extractvalue { i8*, i32 } %2, 0
- store i8* %3, i8** %exn.slot
- %4 = extractvalue { i8*, i32 } %2, 1
- store i32 %4, i32* %ehselector.slot
- %5 = call i8* @llvm.localaddress()
- invoke void @"\01?fin$1@0@main@@"(i1 zeroext true, i8* %5) #4
- to label %invoke.cont3 unwind label %lpad1
-
-lpad1: ; preds = %lpad, %invoke.cont
- %6 = landingpad { i8*, i32 }
- cleanup
- %7 = extractvalue { i8*, i32 } %6, 0
- store i8* %7, i8** %exn.slot
- %8 = extractvalue { i8*, i32 } %6, 1
- store i32 %8, i32* %ehselector.slot
- br label %ehcleanup
-
-invoke.cont3: ; preds = %lpad
- br label %ehcleanup
-
-ehcleanup: ; preds = %invoke.cont3, %lpad1
- %9 = call i8* @llvm.localaddress()
- call void @"\01?fin$0@0@main@@"(i1 zeroext true, i8* %9)
- br label %eh.resume
-
-eh.resume: ; preds = %ehcleanup
- %exn = load i8*, i8** %exn.slot
- %sel = load i32, i32* %ehselector.slot
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
- %lpad.val4 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
- resume { i8*, i32 } %lpad.val4
-}
-
-; CHECK-NOT: define internal void @
-
-; CHECK-LABEL: define i32 @main()
-; CHECK: invoke void @may_crash()
-;
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i1, i8*)* @"\01?fin$1@0@main@@", i32 0, void (i1, i8*)* @"\01?fin$0@0@main@@")
-; CHECK-NEXT: indirectbr
-;
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i1, i8*)* @"\01?fin$0@0@main@@")
-; CHECK-NEXT: indirectbr
-
-; There should not be any *new* cleanup helpers, just the existing ones.
-; CHECK-NOT: define internal void @
-; CHECK: define internal void @"\01?fin$0@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer)
-; CHECK-NOT: define internal void @
-; CHECK: define internal void @"\01?fin$1@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer)
-; CHECK-NOT: define internal void @
-
-define internal void @"\01?fin$0@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer) #2 {
-entry:
- %frame_pointer.addr = alloca i8*, align 8
- %abnormal_termination.addr = alloca i8, align 1
- store i8* %frame_pointer, i8** %frame_pointer.addr, align 8
- %frombool = zext i1 %abnormal_termination to i8
- store i8 %frombool, i8* %abnormal_termination.addr, align 1
- %0 = zext i1 %abnormal_termination to i32
- %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @str_outer_finally, i32 0, i32 0), i32 %0)
- ret void
-}
-
-declare i32 @printf(i8*, ...) #2
-
-define internal void @"\01?fin$1@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer) #2 {
-entry:
- %frame_pointer.addr = alloca i8*, align 8
- %abnormal_termination.addr = alloca i8, align 1
- store i8* %frame_pointer, i8** %frame_pointer.addr, align 8
- %frombool = zext i1 %abnormal_termination to i8
- store i8 %frombool, i8* %abnormal_termination.addr, align 1
- %0 = zext i1 %abnormal_termination to i32
- %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @str_inner_finally, i32 0, i32 0), i32 %0)
- call void @may_crash()
- ret void
-}
-
-declare i32 @__C_specific_handler(...)
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.localaddress() #3
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone }
-attributes #4 = { noinline }
diff --git a/test/CodeGen/WinEH/seh-prepared-basic.ll b/test/CodeGen/WinEH/seh-prepared-basic.ll
deleted file mode 100644
index b6a30309f1c1..000000000000
--- a/test/CodeGen/WinEH/seh-prepared-basic.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-; Test case based on this code:
-; extern "C" unsigned long _exception_code();
-; extern "C" int filt(unsigned long);
-; extern "C" void g();
-; extern "C" void do_except() {
-; __try {
-; g();
-; } __except(filt(_exception_code())) {
-; }
-; }
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-; Function Attrs: uwtable
-define void @do_except() #0 personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
- call void (...) @llvm.localescape()
- invoke void @g() #5
- to label %__try.cont unwind label %lpad1
-
-lpad1: ; preds = %entry
- %ehvals = landingpad { i8*, i32 }
- catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@do_except@@" to i8*)
- %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@do_except@@" to i8*), i32 -1, i8* blockaddress(@do_except, %__try.cont))
- indirectbr i8* %recover, [label %__try.cont]
-
-__try.cont: ; preds = %lpad1, %entry
- ret void
-}
-
-; CHECK-LABEL: do_except:
-; CHECK: .seh_handler __C_specific_handler
-; CHECK-NOT: jmpq *
-; CHECK: .seh_handlerdata
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long .Ltmp{{.*}}
-; CHECK-NEXT: .long .Ltmp{{.*}}
-; CHECK-NEXT: .long "?filt$0@0@do_except@@"@IMGREL
-; CHECK-NEXT: .long .Ltmp{{.*}}@IMGREL
-
-; Function Attrs: noinline nounwind
-define internal i32 @"\01?filt$0@0@do_except@@"(i8* nocapture readonly %exception_pointers, i8* nocapture readnone %frame_pointer) #1 {
-entry:
- %0 = bitcast i8* %exception_pointers to i32**
- %1 = load i32*, i32** %0, align 8
- %2 = load i32, i32* %1, align 4
- %call = tail call i32 @filt(i32 %2) #4
- ret i32 %call
-}
-
-declare i32 @filt(i32) #2
-
-declare void @g() #2
-
-declare i32 @__C_specific_handler(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #3
-
-; Function Attrs: nounwind
-declare i8* @llvm.eh.actions(...) #4
-
-; Function Attrs: nounwind
-declare void @llvm.localescape(...) #4
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.localrecover(i8*, i8*, i32) #3
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="do_except" }
-attributes #1 = { noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone }
-attributes #4 = { nounwind }
-attributes #5 = { noinline }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 "}
diff --git a/test/CodeGen/WinEH/seh-resume-phi.ll b/test/CodeGen/WinEH/seh-resume-phi.ll
deleted file mode 100644
index d2bd64167d22..000000000000
--- a/test/CodeGen/WinEH/seh-resume-phi.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; RUN: opt -S -winehprepare < %s | FileCheck %s
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-declare void @might_crash(i8* %ehptr)
-declare i32 @filt()
-declare void @cleanup()
-declare i32 @__C_specific_handler(...)
-declare i32 @llvm.eh.typeid.for(i8*)
-
-define void @resume_phi() personality i32 (...)* @__C_specific_handler {
-entry:
- invoke void @might_crash(i8* null)
- to label %return unwind label %lpad1
-
-lpad1:
- %ehvals1 = landingpad { i8*, i32 }
- catch i32 ()* @filt
- %ehptr1 = extractvalue { i8*, i32 } %ehvals1, 0
- %ehsel1 = extractvalue { i8*, i32 } %ehvals1, 1
- %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
- %matches = icmp eq i32 %ehsel1, %filt_sel
- br i1 %matches, label %__except, label %eh.resume
-
-__except:
- invoke void @might_crash(i8* %ehptr1)
- to label %return unwind label %lpad2
-
-lpad2:
- %ehvals2 = landingpad { i8*, i32 }
- cleanup
- %ehptr2 = extractvalue { i8*, i32 } %ehvals2, 0
- %ehsel2 = extractvalue { i8*, i32 } %ehvals2, 1
- call void @cleanup()
- br label %eh.resume
-
-return:
- ret void
-
-eh.resume:
- %ehptr.phi = phi i8* [ %ehptr1, %lpad1 ], [ %ehptr2, %lpad2 ]
- %ehsel.phi = phi i32 [ %ehsel1, %lpad1 ], [ %ehsel2, %lpad2 ]
- %ehval.phi1 = insertvalue { i8*, i32 } undef, i8* %ehptr.phi, 0
- %ehval.phi2 = insertvalue { i8*, i32 } %ehval.phi1, i32 %ehsel.phi, 1
- resume { i8*, i32 } %ehval.phi2
-}
-
-; CHECK-LABEL: define void @resume_phi()
-; CHECK: invoke void @might_crash(i8* null)
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(
-; CHECK-SAME: i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@resume_phi, %__except))
-; CHECK-NEXT: indirectbr {{.*}} [label %__except]
-;
-; CHECK: __except:
-; CHECK: call i32 @llvm.eh.exceptioncode()
-; CHECK: invoke void @might_crash(i8* %{{.*}})
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @resume_phi.cleanup)
-; CHECK-NEXT: indirectbr {{.*}} []
-
-; CHECK-LABEL: define internal void @resume_phi.cleanup(i8*, i8*)
-; CHECK: call void @cleanup()
diff --git a/test/CodeGen/WinEH/seh-simple.ll b/test/CodeGen/WinEH/seh-simple.ll
deleted file mode 100644
index 060186484aec..000000000000
--- a/test/CodeGen/WinEH/seh-simple.ll
+++ /dev/null
@@ -1,233 +0,0 @@
-; RUN: opt -S -winehprepare -mtriple=x86_64-windows-msvc < %s \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=X64
-
-; This test should also pass in 32-bit using _except_handler3.
-; RUN: sed -e 's/__C_specific_handler/_except_handler3/' %s \
-; RUN: | opt -S -winehprepare -mtriple=i686-windows-msvc \
-; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=X86
-
-declare void @cleanup()
-declare i32 @filt()
-declare void @might_crash()
-declare i32 @__C_specific_handler(...)
-declare i32 @llvm.eh.typeid.for(i8*)
-
-define i32 @simple_except_store() personality i32 (...)* @__C_specific_handler {
-entry:
- %retval = alloca i32
- store i32 0, i32* %retval
- invoke void @might_crash()
- to label %return unwind label %lpad
-
-lpad:
- %ehvals = landingpad { i8*, i32 }
- catch i32 ()* @filt
- %sel = extractvalue { i8*, i32 } %ehvals, 1
- %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
- %matches = icmp eq i32 %sel, %filt_sel
- br i1 %matches, label %__except, label %eh.resume
-
-__except:
- store i32 1, i32* %retval
- br label %return
-
-return:
- %r = load i32, i32* %retval
- ret i32 %r
-
-eh.resume:
- resume { i8*, i32 } %ehvals
-}
-
-; CHECK-LABEL: define i32 @simple_except_store()
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@simple_except_store, %__except))
-; CHECK-NEXT: indirectbr {{.*}} [label %__except]
-
-define i32 @catch_all() personality i32 (...)* @__C_specific_handler {
-entry:
- %retval = alloca i32
- store i32 0, i32* %retval
- invoke void @might_crash()
- to label %return unwind label %lpad
-
-lpad:
- %ehvals = landingpad { i8*, i32 }
- catch i8* null
- store i32 1, i32* %retval
- br label %return
-
-return:
- %r = load i32, i32* %retval
- ret i32 %r
-}
-
-; CHECK-LABEL: define i32 @catch_all()
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i8* null
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* blockaddress(@catch_all, %lpad.split))
-; CHECK-NEXT: indirectbr {{.*}} [label %lpad.split]
-;
-; CHECK: lpad.split:
-; CHECK: store i32 1, i32* %retval
-
-
-define i32 @except_phi() personality i32 (...)* @__C_specific_handler {
-entry:
- invoke void @might_crash()
- to label %return unwind label %lpad
-
-lpad:
- %ehvals = landingpad { i8*, i32 }
- catch i32 ()* @filt
- %sel = extractvalue { i8*, i32 } %ehvals, 1
- %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
- %matches = icmp eq i32 %sel, %filt_sel
- br i1 %matches, label %return, label %eh.resume
-
-return:
- %r = phi i32 [0, %entry], [1, %lpad]
- ret i32 %r
-
-eh.resume:
- resume { i8*, i32 } %ehvals
-}
-
-; CHECK-LABEL: define i32 @except_phi()
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@except_phi, %lpad.return_crit_edge))
-; CHECK-NEXT: indirectbr {{.*}} [label %lpad.return_crit_edge]
-;
-; CHECK: lpad.return_crit_edge:
-; CHECK: br label %return
-;
-; CHECK: return:
-; CHECK-NEXT: %r = phi i32 [ 0, %entry ], [ 1, %lpad.return_crit_edge ]
-; CHECK-NEXT: ret i32 %r
-
-define i32 @except_join() personality i32 (...)* @__C_specific_handler {
-entry:
- invoke void @might_crash()
- to label %return unwind label %lpad
-
-lpad:
- %ehvals = landingpad { i8*, i32 }
- catch i32 ()* @filt
- %sel = extractvalue { i8*, i32 } %ehvals, 1
- %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
- %matches = icmp eq i32 %sel, %filt_sel
- br i1 %matches, label %return, label %eh.resume
-
-return:
- ret i32 0
-
-eh.resume:
- resume { i8*, i32 } %ehvals
-}
-
-; CHECK-LABEL: define i32 @except_join()
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@except_join, %lpad.return_crit_edge))
-; CHECK-NEXT: indirectbr {{.*}} [label %lpad.return_crit_edge]
-;
-; CHECK: lpad.return_crit_edge:
-; CHECK: br label %return
-;
-; CHECK: return:
-; CHECK-NEXT: ret i32 0
-
-define i32 @lpad_phi() personality i32 (...)* @__C_specific_handler {
-entry:
- invoke void @might_crash()
- to label %cont unwind label %lpad
-
-cont:
- invoke void @might_crash()
- to label %return unwind label %lpad
-
-lpad:
- %ncalls.1 = phi i32 [ 0, %entry ], [ 1, %cont ]
- %ehvals = landingpad { i8*, i32 }
- catch i32 ()* @filt
- %sel = extractvalue { i8*, i32 } %ehvals, 1
- %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
- %matches = icmp eq i32 %sel, %filt_sel
- br i1 %matches, label %return, label %eh.resume
-
-return:
- %r = phi i32 [2, %cont], [%ncalls.1, %lpad]
- ret i32 %r
-
-eh.resume:
- resume { i8*, i32 } %ehvals
-}
-
-; CHECK-LABEL: define i32 @lpad_phi()
-; CHECK: alloca i32
-; CHECK: store i32 0, i32*
-; CHECK: invoke void @might_crash()
-; CHECK: store i32 1, i32*
-; CHECK: invoke void @might_crash()
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void ({{.*}})* @lpad_phi.cleanup, i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@lpad_phi, %lpad.return_crit_edge))
-; CHECK-NEXT: indirectbr {{.*}} [label %lpad.return_crit_edge]
-;
-; CHECK: lpad.return_crit_edge:
-; CHECK: load i32, i32*
-; CHECK: br label %return
-;
-; CHECK: return:
-; CHECK-NEXT: %r = phi i32 [ 2, %cont ], [ %{{.*}}, %lpad.return_crit_edge ]
-; CHECK-NEXT: ret i32 %r
-
-define i32 @cleanup_and_except() personality i32 (...)* @__C_specific_handler {
-entry:
- invoke void @might_crash()
- to label %return unwind label %lpad
-
-lpad:
- %ehvals = landingpad { i8*, i32 }
- cleanup
- catch i32 ()* @filt
- call void @cleanup()
- %sel = extractvalue { i8*, i32 } %ehvals, 1
- %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
- %matches = icmp eq i32 %sel, %filt_sel
- br i1 %matches, label %return, label %eh.resume
-
-return:
- %r = phi i32 [0, %entry], [1, %lpad]
- ret i32 %r
-
-eh.resume:
- resume { i8*, i32 } %ehvals
-}
-
-; CHECK-LABEL: define i32 @cleanup_and_except()
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(
-; CHECK: i32 0, void ({{.*}})* @cleanup_and_except.cleanup,
-; CHECK: i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@cleanup_and_except, %lpad.return_crit_edge))
-; CHECK-NEXT: indirectbr {{.*}} [label %lpad.return_crit_edge]
-;
-; CHECK: lpad.return_crit_edge:
-; CHECK: br label %return
-;
-; CHECK: return:
-; CHECK-NEXT: %r = phi i32 [ 0, %entry ], [ 1, %lpad.return_crit_edge ]
-; CHECK-NEXT: ret i32 %r
-
-; FIXME: This cleanup is an artifact of bad demotion.
-; X64-LABEL: define internal void @lpad_phi.cleanup(i8*, i8*)
-; X86-LABEL: define internal void @lpad_phi.cleanup()
-; X86: call i8* @llvm.frameaddress(i32 1)
-; CHECK: call i8* @llvm.localrecover({{.*}})
-; CHECK: load i32
-; CHECK: store i32 %{{.*}}, i32*
diff --git a/test/CodeGen/WinEH/wineh-cloning.ll b/test/CodeGen/WinEH/wineh-cloning.ll
new file mode 100644
index 000000000000..c13e0a163641
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-cloning.ll
@@ -0,0 +1,391 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -S -winehprepare < %s | FileCheck %s
+
+declare i32 @__CxxFrameHandler3(...)
+declare i32 @__C_specific_handler(...)
+
+declare void @f()
+
+declare void @llvm.foo(i32) nounwind
+declare void @llvm.bar() nounwind
+declare i32 @llvm.qux() nounwind
+declare i1 @llvm.baz() nounwind
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ ; %x def colors: {entry} subset of use colors; must spill
+ %x = call i32 @llvm.qux()
+ invoke void @f()
+ to label %noreturn unwind label %catch.switch
+catch.switch:
+ %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+ %cp = catchpad within %cs []
+ br label %noreturn
+noreturn:
+ ; %x use colors: {entry, cleanup}
+ call void @llvm.foo(i32 %x)
+ unreachable
+}
+; Need two copies of the call to @h, one under entry and one under catch.
+; Currently we generate a load for each, though we shouldn't need one
+; for the use in entry's copy.
+; CHECK-LABEL: define void @test1(
+; CHECK: entry:
+; CHECK: %x = call i32 @llvm.qux()
+; CHECK: invoke void @f()
+; CHECK: to label %[[EntryCopy:[^ ]+]] unwind label %catch
+; CHECK: catch.switch:
+; CHECK: %cs = catchswitch within none [label %catch] unwind to caller
+; CHECK: catch:
+; CHECK: catchpad within %cs []
+; CHECK-NEXT: call void @llvm.foo(i32 %x)
+; CHECK: [[EntryCopy]]:
+; CHECK: call void @llvm.foo(i32 %x)
+
+
+define void @test2() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @f()
+ to label %exit unwind label %cleanup
+cleanup:
+ cleanuppad within none []
+ br label %exit
+exit:
+ call void @llvm.bar()
+ ret void
+}
+; Need two copies of %exit's call to @f -- the subsequent ret is only
+; valid when coming from %entry, but on the path from %cleanup, this
+; might be a valid call to @f which might dynamically not return.
+; CHECK-LABEL: define void @test2(
+; CHECK: entry:
+; CHECK: invoke void @f()
+; CHECK: to label %[[exit:[^ ]+]] unwind label %cleanup
+; CHECK: cleanup:
+; CHECK: cleanuppad within none []
+; CHECK: call void @llvm.bar()
+; CHECK-NEXT: unreachable
+; CHECK: [[exit]]:
+; CHECK: call void @llvm.bar()
+; CHECK-NEXT: ret void
+
+
+define void @test3() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @f()
+ to label %invoke.cont unwind label %catch.switch
+invoke.cont:
+ invoke void @f()
+ to label %exit unwind label %cleanup
+catch.switch:
+ %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+ catchpad within %cs []
+ br label %shared
+cleanup:
+ cleanuppad within none []
+ br label %shared
+shared:
+ call void @llvm.bar()
+ br label %exit
+exit:
+ ret void
+}
+; Need two copies of %shared's call to @f (similar to @test2 but
+; the two regions here are siblings, not parent-child).
+; CHECK-LABEL: define void @test3(
+; CHECK: invoke void @f()
+; CHECK: invoke void @f()
+; CHECK: to label %[[exit:[^ ]+]] unwind
+; CHECK: catch:
+; CHECK: catchpad within %cs []
+; CHECK-NEXT: call void @llvm.bar()
+; CHECK-NEXT: unreachable
+; CHECK: cleanup:
+; CHECK: cleanuppad within none []
+; CHECK: call void @llvm.bar()
+; CHECK-NEXT: unreachable
+; CHECK: [[exit]]:
+; CHECK: ret void
+
+
+define void @test4() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @f()
+ to label %shared unwind label %catch.switch
+catch.switch:
+ %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+ catchpad within %cs []
+ br label %shared
+shared:
+ %x = call i32 @llvm.qux()
+ %i = call i32 @llvm.qux()
+ %zero.trip = icmp eq i32 %i, 0
+ br i1 %zero.trip, label %exit, label %loop
+loop:
+ %i.loop = phi i32 [ %i, %shared ], [ %i.dec, %loop.tail ]
+ %b = call i1 @llvm.baz()
+ br i1 %b, label %left, label %right
+left:
+ %y = call i32 @llvm.qux()
+ br label %loop.tail
+right:
+ call void @llvm.foo(i32 %x)
+ br label %loop.tail
+loop.tail:
+ %i.dec = sub i32 %i.loop, 1
+ %done = icmp eq i32 %i.dec, 0
+ br i1 %done, label %exit, label %loop
+exit:
+ call void @llvm.foo(i32 %x)
+ unreachable
+}
+; Make sure we can clone regions that have internal control
+; flow and SSA values. Here we need two copies of everything
+; from %shared to %exit.
+; CHECK-LABEL: define void @test4(
+; CHECK: entry:
+; CHECK: to label %[[shared_E:[^ ]+]] unwind label %catch.switch
+; CHECK: catch:
+; CHECK: catchpad within %cs []
+; CHECK: [[x_C:%[^ ]+]] = call i32 @llvm.qux()
+; CHECK: [[i_C:%[^ ]+]] = call i32 @llvm.qux()
+; CHECK: [[zt_C:%[^ ]+]] = icmp eq i32 [[i_C]], 0
+; CHECK: br i1 [[zt_C]], label %[[exit_C:[^ ]+]], label %[[loop_C:[^ ]+]]
+; CHECK: [[shared_E]]:
+; CHECK: [[x_E:%[^ ]+]] = call i32 @llvm.qux()
+; CHECK: [[i_E:%[^ ]+]] = call i32 @llvm.qux()
+; CHECK: [[zt_E:%[^ ]+]] = icmp eq i32 [[i_E]], 0
+; CHECK: br i1 [[zt_E]], label %[[exit_E:[^ ]+]], label %[[loop_E:[^ ]+]]
+; CHECK: [[loop_C]]:
+; CHECK: [[iloop_C:%[^ ]+]] = phi i32 [ [[i_C]], %catch ], [ [[idec_C:%[^ ]+]], %[[looptail_C:[^ ]+]] ]
+; CHECK: [[b_C:%[^ ]+]] = call i1 @llvm.baz()
+; CHECK: br i1 [[b_C]], label %[[left_C:[^ ]+]], label %[[right_C:[^ ]+]]
+; CHECK: [[loop_E]]:
+; CHECK: [[iloop_E:%[^ ]+]] = phi i32 [ [[i_E]], %[[shared_E]] ], [ [[idec_E:%[^ ]+]], %[[looptail_E:[^ ]+]] ]
+; CHECK: [[b_E:%[^ ]+]] = call i1 @llvm.baz()
+; CHECK: br i1 [[b_E]], label %[[left_E:[^ ]+]], label %[[right_E:[^ ]+]]
+; CHECK: [[left_C]]:
+; CHECK: [[y_C:%[^ ]+]] = call i32 @llvm.qux()
+; CHECK: br label %[[looptail_C]]
+; CHECK: [[left_E]]:
+; CHECK: [[y_E:%[^ ]+]] = call i32 @llvm.qux()
+; CHECK: br label %[[looptail_E]]
+; CHECK: [[right_C]]:
+; CHECK: call void @llvm.foo(i32 [[x_C]])
+; CHECK: br label %[[looptail_C]]
+; CHECK: [[right_E]]:
+; CHECK: call void @llvm.foo(i32 [[x_E]])
+; CHECK: br label %[[looptail_E]]
+; CHECK: [[looptail_C]]:
+; CHECK: [[idec_C]] = sub i32 [[iloop_C]], 1
+; CHECK: [[done_C:%[^ ]+]] = icmp eq i32 [[idec_C]], 0
+; CHECK: br i1 [[done_C]], label %[[exit_C]], label %[[loop_C]]
+; CHECK: [[looptail_E]]:
+; CHECK: [[idec_E]] = sub i32 [[iloop_E]], 1
+; CHECK: [[done_E:%[^ ]+]] = icmp eq i32 [[idec_E]], 0
+; CHECK: br i1 [[done_E]], label %[[exit_E]], label %[[loop_E]]
+; CHECK: [[exit_C]]:
+; CHECK: call void @llvm.foo(i32 [[x_C]])
+; CHECK: unreachable
+; CHECK: [[exit_E]]:
+; CHECK: call void @llvm.foo(i32 [[x_E]])
+; CHECK: unreachable
+
+
+define void @test5() personality i32 (...)* @__C_specific_handler {
+entry:
+ invoke void @f()
+ to label %exit unwind label %outer
+outer:
+ %o = cleanuppad within none []
+ %x = call i32 @llvm.qux()
+ invoke void @f() [ "funclet"(token %o) ]
+ to label %outer.ret unwind label %catch.switch
+catch.switch:
+ %cs = catchswitch within %o [label %inner] unwind to caller
+inner:
+ %i = catchpad within %cs []
+ catchret from %i to label %outer.post-inner
+outer.post-inner:
+ call void @llvm.foo(i32 %x)
+ br label %outer.ret
+outer.ret:
+ cleanupret from %o unwind to caller
+exit:
+ ret void
+}
+; Simple nested case (catch-inside-cleanup). Nothing needs
+; to be cloned. The def and use of %x are both in %outer
+; and so don't need to be spilled.
+; CHECK-LABEL: define void @test5(
+; CHECK: outer:
+; CHECK: %x = call i32 @llvm.qux()
+; CHECK-NEXT: invoke void @f()
+; CHECK-NEXT: to label %outer.ret unwind label %catch.switch
+; CHECK: inner:
+; CHECK-NEXT: %i = catchpad within %cs []
+; CHECK-NEXT: catchret from %i to label %outer.post-inner
+; CHECK: outer.post-inner:
+; CHECK-NEXT: call void @llvm.foo(i32 %x)
+; CHECK-NEXT: br label %outer.ret
+
+
+define void @test9() personality i32 (...)* @__C_specific_handler {
+entry:
+ invoke void @f()
+ to label %invoke.cont unwind label %left
+invoke.cont:
+ invoke void @f()
+ to label %unreachable unwind label %right
+left:
+ %cp.left = cleanuppad within none []
+ call void @llvm.foo(i32 1)
+ invoke void @f() [ "funclet"(token %cp.left) ]
+ to label %unreachable unwind label %right
+right:
+ %cp.right = cleanuppad within none []
+ call void @llvm.foo(i32 2)
+ invoke void @f() [ "funclet"(token %cp.right) ]
+ to label %unreachable unwind label %left
+unreachable:
+ unreachable
+}
+; This is an irreducible loop with two funclets that enter each other.
+; CHECK-LABEL: define void @test9(
+; CHECK: entry:
+; CHECK: to label %invoke.cont unwind label %[[LEFT:.+]]
+; CHECK: invoke.cont:
+; CHECK: to label %[[UNREACHABLE_ENTRY:.+]] unwind label %[[RIGHT:.+]]
+; CHECK: [[LEFT]]:
+; CHECK: call void @llvm.foo(i32 1)
+; CHECK: invoke void @f()
+; CHECK: to label %[[UNREACHABLE_LEFT:.+]] unwind label %[[RIGHT]]
+; CHECK: [[RIGHT]]:
+; CHECK: call void @llvm.foo(i32 2)
+; CHECK: invoke void @f()
+; CHECK: to label %[[UNREACHABLE_RIGHT:.+]] unwind label %[[LEFT]]
+; CHECK: [[UNREACHABLE_RIGHT]]:
+; CHECK: unreachable
+; CHECK: [[UNREACHABLE_LEFT]]:
+; CHECK: unreachable
+; CHECK: [[UNREACHABLE_ENTRY]]:
+; CHECK: unreachable
+
+
+define void @test10() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @f()
+ to label %unreachable unwind label %inner
+inner:
+ %cleanup = cleanuppad within none []
+ ; make sure we don't overlook this cleanupret and try to process
+ ; successor %outer as a child of inner.
+ cleanupret from %cleanup unwind label %outer
+outer:
+ %cs = catchswitch within none [label %catch.body] unwind to caller
+
+catch.body:
+ %catch = catchpad within %cs []
+ catchret from %catch to label %exit
+exit:
+ ret void
+unreachable:
+ unreachable
+}
+; CHECK-LABEL: define void @test10(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: invoke
+; CHECK-NEXT: to label %unreachable unwind label %inner
+; CHECK: inner:
+; CHECK-NEXT: %cleanup = cleanuppad within none []
+; CHECK-NEXT: cleanupret from %cleanup unwind label %outer
+; CHECK: outer:
+; CHECK-NEXT: %cs = catchswitch within none [label %catch.body] unwind to caller
+; CHECK: catch.body:
+; CHECK-NEXT: %catch = catchpad within %cs []
+; CHECK-NEXT: catchret from %catch to label %exit
+; CHECK: exit:
+; CHECK-NEXT: ret void
+
+define void @test11() personality i32 (...)* @__C_specific_handler {
+entry:
+ invoke void @f()
+ to label %exit unwind label %cleanup.outer
+cleanup.outer:
+ %outer = cleanuppad within none []
+ invoke void @f() [ "funclet"(token %outer) ]
+ to label %outer.cont unwind label %cleanup.inner
+outer.cont:
+ br label %merge
+cleanup.inner:
+ %inner = cleanuppad within %outer []
+ br label %merge
+merge:
+ call void @llvm.bar()
+ unreachable
+exit:
+ ret void
+}
+; merge.end will get cloned for outer and inner, but is implausible
+; from inner, so the call @f() in inner's copy of merge should be
+; rewritten to call @f()
+; CHECK-LABEL: define void @test11()
+; CHECK: %inner = cleanuppad within %outer []
+; CHECK-NEXT: call void @llvm.bar()
+; CHECK-NEXT: unreachable
+
+define void @test12() personality i32 (...)* @__CxxFrameHandler3 !dbg !5 {
+entry:
+ invoke void @f()
+ to label %cont unwind label %left, !dbg !8
+cont:
+ invoke void @f()
+ to label %exit unwind label %right
+left:
+ cleanuppad within none []
+ br label %join
+right:
+ cleanuppad within none []
+ br label %join
+join:
+ ; This call will get cloned; make sure we can handle cloning
+ ; instructions with debug metadata attached.
+ call void @llvm.bar(), !dbg !9
+ unreachable
+exit:
+ ret void
+}
+
+; CHECK-LABEL: define void @test13()
+; CHECK: ret void
+define void @test13() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ ret void
+
+unreachable:
+ cleanuppad within none []
+ unreachable
+}
+
+;; Debug info (from test12)
+
+; Make sure the DISubprogram doesn't get cloned
+; CHECK-LABEL: !llvm.module.flags
+; CHECK-NOT: !DISubprogram
+; CHECK: !{{[0-9]+}} = distinct !DISubprogram(name: "test12"
+; CHECK-NOT: !DISubprogram
+!llvm.module.flags = !{!0}
+!llvm.dbg.cu = !{!1}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !2, producer: "compiler", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !3, subprograms: !4)
+!2 = !DIFile(filename: "test.cpp", directory: ".")
+!3 = !{}
+!4 = !{!5}
+!5 = distinct !DISubprogram(name: "test12", scope: !2, file: !2, type: !6, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !3)
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = !DILocation(line: 1, scope: !5)
+!9 = !DILocation(line: 2, scope: !5)
diff --git a/test/CodeGen/WinEH/wineh-demotion.ll b/test/CodeGen/WinEH/wineh-demotion.ll
new file mode 100644
index 000000000000..411952d84bb6
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-demotion.ll
@@ -0,0 +1,356 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -S -winehprepare < %s | FileCheck %s
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @f()
+
+declare i32 @g()
+
+declare void @h(i32)
+
+declare i1 @i()
+
+declare void @llvm.bar() nounwind
+
+; CHECK-LABEL: @test1(
+define void @test1(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ ; Spill slot should be inserted here
+ ; CHECK: [[Slot:%[^ ]+]] = alloca
+ ; Can't store for %phi at these defs because the lifetimes overlap
+ ; CHECK-NOT: store
+ %x = call i32 @g()
+ %y = call i32 @g()
+ br i1 %B, label %left, label %right
+left:
+ ; CHECK: left:
+ ; CHECK-NEXT: store i32 %x, i32* [[Slot]]
+ ; CHECK-NEXT: invoke void @f
+ invoke void @f()
+ to label %exit unwind label %merge
+right:
+ ; CHECK: right:
+ ; CHECK-NEXT: store i32 %y, i32* [[Slot]]
+ ; CHECK-NEXT: invoke void @f
+ invoke void @f()
+ to label %exit unwind label %merge
+merge:
+ ; CHECK: merge:
+ ; CHECK-NOT: = phi
+ %phi = phi i32 [ %x, %left ], [ %y, %right ]
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+ %cp = catchpad within %cs1 []
+ ; CHECK: catch:
+ ; CHECK: [[Reload:%[^ ]+]] = load i32, i32* [[Slot]]
+ ; CHECK-NEXT: call void @h(i32 [[Reload]])
+ call void @h(i32 %phi) [ "funclet"(token %cp) ]
+ catchret from %cp to label %exit
+
+exit:
+ ret void
+}
+
+; CHECK-LABEL: @test2(
+define void @test2(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ br i1 %B, label %left, label %right
+left:
+ ; Need two stores here because %x and %y interfere so they need 2 slots
+ ; CHECK: left:
+ ; CHECK: store i32 1, i32* [[Slot1:%[^ ]+]]
+ ; CHECK: store i32 1, i32* [[Slot2:%[^ ]+]]
+ ; CHECK-NEXT: invoke void @f
+ invoke void @f()
+ to label %exit unwind label %merge.inner
+right:
+ ; Need two stores here because %x and %y interfere so they need 2 slots
+ ; CHECK: right:
+ ; CHECK-DAG: store i32 2, i32* [[Slot1]]
+ ; CHECK-DAG: store i32 2, i32* [[Slot2]]
+ ; CHECK: invoke void @f
+ invoke void @f()
+ to label %exit unwind label %merge.inner
+merge.inner:
+ ; CHECK: merge.inner:
+ ; CHECK-NOT: = phi
+ ; CHECK: catchswitch within none
+ %x = phi i32 [ 1, %left ], [ 2, %right ]
+ %cs1 = catchswitch within none [label %catch.inner] unwind label %merge.outer
+
+catch.inner:
+ %cpinner = catchpad within %cs1 []
+ ; Need just one store here because only %y is affected
+ ; CHECK: catch.inner:
+ %z = call i32 @g() [ "funclet"(token %cpinner) ]
+ ; CHECK: store i32 %z
+ ; CHECK-NEXT: invoke void @f
+ invoke void @f() [ "funclet"(token %cpinner) ]
+ to label %catchret.inner unwind label %merge.outer
+
+catchret.inner:
+ catchret from %cpinner to label %exit
+
+merge.outer:
+ %y = phi i32 [ %x, %merge.inner ], [ %z, %catch.inner ]
+ ; CHECK: merge.outer:
+ ; CHECK-NOT: = phi
+ ; CHECK: catchswitch within none
+ %cs2 = catchswitch within none [label %catch.outer] unwind to caller
+
+catch.outer:
+ %cpouter = catchpad within %cs2 []
+ ; CHECK: catch.outer:
+ ; CHECK: [[CatchPad:%[^ ]+]] = catchpad within %cs2 []
+ ; Need to load x and y from two different slots since they're both live
+ ; and can have different values (if we came from catch.inner)
+ ; CHECK-DAG: load i32, i32* [[Slot1]]
+ ; CHECK-DAG: load i32, i32* [[Slot2]]
+ ; CHECK: catchret from [[CatchPad]] to label
+ call void @h(i32 %x) [ "funclet"(token %cpouter) ]
+ call void @h(i32 %y) [ "funclet"(token %cpouter) ]
+ catchret from %cpouter to label %exit
+
+exit:
+ ret void
+}
+
+; test4: don't need stores for %phi.inner, as its only use is to feed %phi.outer
+; %phi.outer needs stores in %left, %right, and %join
+; CHECK-LABEL: @test4(
+define void @test4(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ ; CHECK: entry:
+ ; CHECK: [[Slot:%[^ ]+]] = alloca
+ ; CHECK-NEXT: br
+ br i1 %B, label %left, label %right
+left:
+ ; CHECK: left:
+ ; CHECK-NOT: store
+ ; CHECK: store i32 %l, i32* [[Slot]]
+ ; CHECK-NEXT: invoke void @f
+ %l = call i32 @g()
+ invoke void @f()
+ to label %join unwind label %catchpad.inner
+right:
+ ; CHECK: right:
+ ; CHECK-NOT: store
+ ; CHECK: store i32 %r, i32* [[Slot]]
+ ; CHECK-NEXT: invoke void @f
+ %r = call i32 @g()
+ invoke void @f()
+ to label %join unwind label %catchpad.inner
+catchpad.inner:
+ ; CHECK: catchpad.inner:
+ ; CHECK-NEXT: catchswitch within none
+ %phi.inner = phi i32 [ %l, %left ], [ %r, %right ]
+ %cs1 = catchswitch within none [label %catch.inner] unwind label %catchpad.outer
+catch.inner:
+ %cp1 = catchpad within %cs1 []
+ catchret from %cp1 to label %join
+join:
+ ; CHECK: join:
+ ; CHECK-NOT: store
+ ; CHECK: store i32 %j, i32* [[Slot]]
+ ; CHECK-NEXT: invoke void @f
+ %j = call i32 @g()
+ invoke void @f()
+ to label %exit unwind label %catchpad.outer
+
+catchpad.outer:
+ ; CHECK: catchpad.outer:
+ ; CHECK-NEXT: catchswitch within none
+ %phi.outer = phi i32 [ %phi.inner, %catchpad.inner ], [ %j, %join ]
+ %cs2 = catchswitch within none [label %catch.outer] unwind to caller
+catch.outer:
+ ; CHECK: catch.outer:
+ ; CHECK: [[Reload:%[^ ]+]] = load i32, i32* [[Slot]]
+ ; CHECK: call void @h(i32 [[Reload]])
+ %cp2 = catchpad within %cs2 []
+ call void @h(i32 %phi.outer) [ "funclet"(token %cp2) ]
+ catchret from %cp2 to label %exit
+exit:
+ ret void
+}
+
+; CHECK-LABEL: @test5(
+define void @test5() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ ; need store for %phi.cleanup
+ ; CHECK: entry:
+ ; CHECK: store i32 1, i32* [[CleanupSlot:%[^ ]+]]
+ ; CHECK-NEXT: invoke void @f
+ invoke void @f()
+ to label %invoke.cont unwind label %cleanup
+
+invoke.cont:
+ ; need store for %phi.cleanup
+ ; CHECK: invoke.cont:
+ ; CHECK-NEXT: store i32 2, i32* [[CleanupSlot]]
+ ; CHECK-NEXT: invoke void @f
+ invoke void @f()
+ to label %invoke.cont2 unwind label %cleanup
+
+cleanup:
+ ; cleanup phi can be loaded at cleanup entry
+ ; CHECK: cleanup:
+ ; CHECK-NEXT: cleanuppad within none []
+ ; CHECK: [[CleanupReload:%[^ ]+]] = load i32, i32* [[CleanupSlot]]
+ %phi.cleanup = phi i32 [ 1, %entry ], [ 2, %invoke.cont ]
+ %cp = cleanuppad within none []
+ %b = call i1 @i() [ "funclet"(token %cp) ]
+ br i1 %b, label %left, label %right
+
+left:
+ ; CHECK: left:
+ ; CHECK: call void @h(i32 [[CleanupReload]]
+ call void @h(i32 %phi.cleanup) [ "funclet"(token %cp) ]
+ br label %merge
+
+right:
+ ; CHECK: right:
+ ; CHECK: call void @h(i32 [[CleanupReload]]
+ call void @h(i32 %phi.cleanup) [ "funclet"(token %cp) ]
+ br label %merge
+
+merge:
+ ; need store for %phi.catch
+ ; CHECK: merge:
+ ; CHECK-NEXT: store i32 [[CleanupReload]], i32* [[CatchSlot:%[^ ]+]]
+ ; CHECK-NEXT: cleanupret
+ cleanupret from %cp unwind label %catchswitch
+
+invoke.cont2:
+ ; need store for %phi.catch
+ ; CHECK: invoke.cont2:
+ ; CHECK-NEXT: store i32 3, i32* [[CatchSlot]]
+ ; CHECK-NEXT: invoke void @f
+ invoke void @f()
+ to label %exit unwind label %catchswitch
+
+catchswitch:
+ ; CHECK: catchswitch:
+ ; CHECK-NEXT: catchswitch within none
+ %phi.catch = phi i32 [ %phi.cleanup, %merge ], [ 3, %invoke.cont2 ]
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+ ; CHECK: catch:
+ ; CHECK: catchpad within %cs1
+ ; CHECK: [[CatchReload:%[^ ]+]] = load i32, i32* [[CatchSlot]]
+ ; CHECK: call void @h(i32 [[CatchReload]]
+ %cp2 = catchpad within %cs1 []
+ call void @h(i32 %phi.catch) [ "funclet"(token %cp2) ]
+ catchret from %cp2 to label %exit
+
+exit:
+ ret void
+}
+
+; We used to demote %x, but we don't need to anymore.
+; CHECK-LABEL: @test6(
+define void @test6() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ ; CHECK: entry:
+ ; CHECK: %x = invoke i32 @g()
+ ; CHECK-NEXT: to label %loop unwind label %to_caller
+ %x = invoke i32 @g()
+ to label %loop unwind label %to_caller
+to_caller:
+ %cp1 = cleanuppad within none []
+ cleanupret from %cp1 unwind to caller
+loop:
+ invoke void @f()
+ to label %loop unwind label %cleanup
+cleanup:
+ ; CHECK: cleanup:
+ ; CHECK: call void @h(i32 %x)
+ %cp2 = cleanuppad within none []
+ call void @h(i32 %x) [ "funclet"(token %cp2) ]
+ cleanupret from %cp2 unwind to caller
+}
+
+; CHECK-LABEL: @test7(
+define void @test7() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ ; %x is an EH pad phi, so gets stored in pred here
+ ; CHECK: entry:
+ ; CHECK: store i32 1, i32* [[SlotX:%[^ ]+]]
+ ; CHECK: invoke void @f()
+ invoke void @f()
+ to label %invoke.cont unwind label %catchpad
+invoke.cont:
+ ; %x is an EH pad phi, so gets stored in pred here
+ ; CHECK: invoke.cont:
+ ; CHECK: store i32 2, i32* [[SlotX]]
+ ; CHECK: invoke void @f()
+ invoke void @f()
+ to label %exit unwind label %catchpad
+catchpad:
+ ; %x phi should be eliminated
+ ; CHECK: catchpad:
+ ; CHECK-NEXT: catchswitch within none
+ %x = phi i32 [ 1, %entry ], [ 2, %invoke.cont ]
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+catch:
+ ; CHECK: catch:
+ ; CHECK-NEXT: %[[CatchPad:[^ ]+]] = catchpad within %cs1 []
+ %cp = catchpad within %cs1 []
+ %b = call i1 @i() [ "funclet"(token %cp) ]
+ br i1 %b, label %left, label %right
+left:
+ ; Edge from %left to %join needs to be split so that
+ ; the load of %x can be inserted *after* the catchret
+ ; CHECK: left:
+ ; CHECK-NEXT: catchret from %[[CatchPad]] to label %[[SplitLeft:[^ ]+]]
+ catchret from %cp to label %join
+ ; CHECK: [[SplitLeft]]:
+ ; CHECK: [[LoadX:%[^ ]+]] = load i32, i32* [[SlotX]]
+ ; CHECK: br label %join
+right:
+ ; Edge from %right to %join needs to be split so that
+ ; the load of %y can be inserted *after* the catchret
+ ; CHECK: right:
+ ; CHECK: %y = call i32 @g()
+ ; CHECK: catchret from %[[CatchPad]] to label %join
+ %y = call i32 @g() [ "funclet"(token %cp) ]
+ catchret from %cp to label %join
+join:
+ ; CHECK: join:
+ ; CHECK: %phi = phi i32 [ [[LoadX]], %[[SplitLeft]] ], [ %y, %right ]
+ %phi = phi i32 [ %x, %left ], [ %y, %right ]
+ call void @h(i32 %phi)
+ br label %exit
+exit:
+ ret void
+}
+
+; CHECK-LABEL: @test8(
+define void @test8() personality i32 (...)* @__CxxFrameHandler3 { entry:
+ invoke void @f()
+ to label %done unwind label %cleanup1
+ invoke void @f()
+ to label %done unwind label %cleanup2
+
+done:
+ ret void
+
+cleanup1:
+ ; CHECK: [[CleanupPad1:%[^ ]+]] = cleanuppad within none []
+ ; CHECK-NEXT: call void @llvm.bar()
+ ; CHECK-NEXT: cleanupret from [[CleanupPad1]]
+ %cp0 = cleanuppad within none []
+ br label %cleanupexit
+
+cleanup2:
+ ; CHECK: cleanuppad within none []
+ ; CHECK-NEXT: call void @llvm.bar()
+ ; CHECK-NEXT: unreachable
+ %cp1 = cleanuppad within none []
+ br label %cleanupexit
+
+cleanupexit:
+ call void @llvm.bar()
+ cleanupret from %cp0 unwind label %cleanup2
+}
diff --git a/test/CodeGen/WinEH/wineh-intrinsics-invalid.ll b/test/CodeGen/WinEH/wineh-intrinsics-invalid.ll
new file mode 100644
index 000000000000..17d6e70ad1eb
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-intrinsics-invalid.ll
@@ -0,0 +1,26 @@
+; RUN: sed -e s/.T1:// %s | not opt -lint -disable-output 2>&1 | FileCheck --check-prefix=CHECK1 %s
+; RUN: sed -e s/.T2:// %s | not opt -lint -disable-output 2>&1 | FileCheck --check-prefix=CHECK2 %s
+
+target triple = "x86_64-pc-windows-msvc"
+
+declare void @f()
+
+;T1: declare i8* @llvm.eh.exceptionpointer.p0i8(i32)
+;T1:
+;T1: define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+;T1: call i8* @llvm.eh.exceptionpointer.p0i8(i32 0)
+;T1: ret void
+;T1: }
+;CHECK1: Intrinsic has incorrect argument type!
+;CHECK1-NEXT: i8* (i32)* @llvm.eh.exceptionpointer.p0i8
+
+;T2: declare i8* @llvm.eh.exceptionpointer.p0i8(token)
+;T2:
+;T2: define void @test2() personality i32 (...)* @__CxxFrameHandler3 {
+;T2: call i8* @llvm.eh.exceptionpointer.p0i8(token undef)
+;T2: ret void
+;T2: }
+;CHECK2: eh.exceptionpointer argument must be a catchpad
+;CHECK2-NEXT: call i8* @llvm.eh.exceptionpointer.p0i8(token undef)
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/CodeGen/WinEH/wineh-intrinsics.ll b/test/CodeGen/WinEH/wineh-intrinsics.ll
new file mode 100644
index 000000000000..3658792a3843
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-intrinsics.ll
@@ -0,0 +1,44 @@
+; RUN: opt -lint -disable-output < %s
+
+; This test is meant to prove that the verifier does not report errors for correct
+; use of the llvm.eh.exceptionpointer intrinsic.
+
+target triple = "x86_64-pc-windows-msvc"
+
+declare i8* @llvm.eh.exceptionpointer.p0i8(token)
+declare i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token)
+
+declare void @f(...)
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void (...) @f(i32 1)
+ to label %exit unwind label %catchpad
+catchpad:
+ %cs1 = catchswitch within none [label %do_catch] unwind to caller
+do_catch:
+ %catch = catchpad within %cs1 [i32 1]
+ %exn = call i8* @llvm.eh.exceptionpointer.p0i8(token %catch)
+ call void (...) @f(i8* %exn)
+ catchret from %catch to label %exit
+exit:
+ ret void
+}
+
+define void @test2() personality i32 (...)* @ProcessManagedException {
+entry:
+ invoke void (...) @f(i32 1)
+ to label %exit unwind label %catchpad
+catchpad:
+ %cs1 = catchswitch within none [label %do_catch] unwind to caller
+do_catch:
+ %catch = catchpad within %cs1 [i32 1]
+ %exn = call i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token %catch)
+ call void (...) @f(i8 addrspace(1)* %exn)
+ catchret from %catch to label %exit
+exit:
+ ret void
+}
+
+declare i32 @__CxxFrameHandler3(...)
+declare i32 @ProcessManagedException(...)
diff --git a/test/CodeGen/WinEH/wineh-no-demotion.ll b/test/CodeGen/WinEH/wineh-no-demotion.ll
new file mode 100644
index 000000000000..4fb84db89093
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-no-demotion.ll
@@ -0,0 +1,130 @@
+; RUN: opt -mtriple=x86_x64-pc-windows-msvc -S -winehprepare -disable-demotion -disable-cleanups < %s | FileCheck %s
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare i32 @__C_specific_handler(...)
+
+declare void @f()
+
+declare i32 @g()
+
+declare void @h(i32)
+
+; CHECK-LABEL: @test1(
+define void @test1(i1 %bool) personality i32 (...)* @__C_specific_handler {
+entry:
+ invoke void @f()
+ to label %invoke.cont unwind label %left
+
+invoke.cont:
+ invoke void @f()
+ to label %exit unwind label %inner
+
+left:
+ %0 = cleanuppad within none []
+ br i1 %bool, label %shared, label %cleanupret
+
+cleanupret:
+ cleanupret from %0 unwind label %right
+
+right:
+ %1 = cleanuppad within none []
+ br label %shared
+
+shared:
+ %x = call i32 @g()
+ invoke void @f() [ "funclet"(token %0) ]
+ to label %shared.cont unwind label %inner
+
+shared.cont:
+ unreachable
+
+inner:
+ %phi = phi i32 [ %x, %shared ], [ 0, %invoke.cont ]
+ %i = cleanuppad within none []
+ call void @h(i32 %phi)
+ unreachable
+
+; CHECK: %phi = phi i32 [ %x, %shared ], [ 0, %invoke.cont ], [ %x.for.left, %shared.for.left ]
+; CHECK: %i = cleanuppad within none []
+; CHECK: call void @h(i32 %phi)
+
+exit:
+ unreachable
+}
+
+; CHECK-LABEL: @test2(
+define void @test2(i1 %bool) personality i32 (...)* @__C_specific_handler {
+entry:
+ invoke void @f()
+ to label %shared.cont unwind label %left
+
+left:
+ %0 = cleanuppad within none []
+ br i1 %bool, label %shared, label %cleanupret
+
+cleanupret:
+ cleanupret from %0 unwind label %right
+
+right:
+ %1 = cleanuppad within none []
+ br label %shared
+
+shared:
+ %x = call i32 @g()
+ invoke void @f() [ "funclet"(token %0) ]
+ to label %shared.cont unwind label %inner
+
+shared.cont:
+ unreachable
+
+inner:
+ %i = cleanuppad within none []
+ call void @h(i32 %x)
+ unreachable
+
+; CHECK: %x1 = phi i32 [ %x.for.left, %shared.for.left ], [ %x, %shared ]
+; CHECK: %i = cleanuppad within none []
+; CHECK: call void @h(i32 %x1)
+
+exit:
+ unreachable
+}
+
+; CHECK-LABEL: @test4(
+define void @test4(i1 %x) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @f()
+ to label %invoke.cont1 unwind label %left
+
+invoke.cont1:
+ invoke void @f()
+ to label %exit unwind label %right
+
+left:
+ %0 = cleanuppad within none []
+ br label %shared
+
+right:
+ %1 = cleanuppad within none []
+ br i1 %x, label %shared, label %right.other
+
+right.other:
+ br label %shared
+
+shared:
+ %phi = phi i32 [ 1, %left ], [ 0, %right ], [ -1, %right.other ]
+ call void @h(i32 %phi)
+ unreachable
+
+; CHECK: %phi = phi i32 [ 0, %right ], [ -1, %right.other ]
+; CHECK: call void @h(i32 %phi)
+
+; CHECK: %phi.for.left = phi i32 [ 1, %left ]
+; CHECK: call void @h(i32 %phi.for.left)
+
+exit:
+ unreachable
+}
+
+declare void @__std_terminate()
diff --git a/test/CodeGen/WinEH/wineh-statenumbering-cleanups.ll b/test/CodeGen/WinEH/wineh-statenumbering-cleanups.ll
new file mode 100644
index 000000000000..f5889f03965b
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-statenumbering-cleanups.ll
@@ -0,0 +1,62 @@
+; RUN: sed -e s/.Cxx:// %s | opt -mtriple=x86-pc-windows-msvc -S -x86-winehstate | FileCheck %s
+; RUN: sed -e s/.SEH:// %s | opt -mtriple=x86-pc-windows-msvc -S -x86-winehstate | FileCheck %s
+
+declare i32 @__CxxFrameHandler3(...)
+declare i32 @_except_handler3(...)
+declare void @dummy_filter()
+
+declare void @f(i32)
+
+; CHECK-LABEL: define void @test2(
+;Cxx: define void @test2(i1 %b) personality i32 (...)* @__CxxFrameHandler3 {
+;SEH: define void @test2(i1 %b) personality i32 (...)* @_except_handler3 {
+entry:
+ ; CHECK: entry:
+ ; CHECK: store i32 1
+ ; CHECK: invoke void @f(i32 1)
+ invoke void @f(i32 1)
+ to label %exit unwind label %cleanup.pad
+cleanup.pad:
+ %cleanup = cleanuppad within none []
+ br i1 %b, label %left, label %right
+left:
+ cleanupret from %cleanup unwind label %catch.pad
+right:
+ cleanupret from %cleanup unwind label %catch.pad
+catch.pad:
+ %cs1 = catchswitch within none [label %catch.body] unwind to caller
+catch.body:
+;Cxx: %catch = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+;SEH: %catch = catchpad within %cs1 [void ()* @dummy_filter]
+ catchret from %catch to label %exit
+exit:
+ ret void
+}
+
+; CHECK-LABEL: define void @test3(
+;Cxx: define void @test3() personality i32 (...)* @__CxxFrameHandler3 {
+;SEH: define void @test3() personality i32 (...)* @_except_handler3 {
+entry:
+ ; CHECK: entry:
+ ; CHECK: store i32 0
+ ; CHECK: invoke void @f(i32 1)
+ invoke void @f(i32 1)
+ to label %exit unwind label %cleanup.pad
+cleanup.pad:
+ ; CHECK: cleanup.pad:
+ ; CHECK: store i32 1
+ ; CHECK: invoke void @f(i32 0)
+ %cleanup = cleanuppad within none []
+ invoke void @f(i32 0)
+ to label %unreachable unwind label %catch.pad
+unreachable:
+ unreachable
+catch.pad:
+ %cs1 = catchswitch within none [label %catch.body] unwind to caller
+catch.body:
+;Cxx: %catch = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+;SEH: %catch = catchpad within %cs1 [void ()* @dummy_filter]
+ catchret from %catch to label %exit
+exit:
+ ret void
+}
diff --git a/test/CodeGen/WinEH/wineh-statenumbering.ll b/test/CodeGen/WinEH/wineh-statenumbering.ll
new file mode 100644
index 000000000000..dab7fde61a66
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-statenumbering.ll
@@ -0,0 +1,148 @@
+; RUN: opt -mtriple=i686-pc-windows-msvc -S -x86-winehstate < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i8*, i32, i32, i32, i32, i8* }
+%eh.CatchableTypeArray.1 = type { i32, [1 x %eh.CatchableType*] }
+%eh.ThrowInfo = type { i32, i8*, i8*, i8* }
+
+$"\01??_R0H@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i32 -1, i32 0, i32 4, i8* null }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x %eh.CatchableType*] [%eh.CatchableType* @"_CT??_R0H@84"] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i8* null, i8* null, i8* bitcast (%eh.CatchableTypeArray.1* @_CTA1H to i8*) }, section ".xdata", comdat
+
+define i32 @main() #0 personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %tmp = alloca i32, align 4
+ ; CHECK: entry:
+ ; CHECK: store i32 -1
+ ; CHECK: call void @g(i32 3)
+ call void @g(i32 3)
+ store i32 0, i32* %tmp, align 4
+ %0 = bitcast i32* %tmp to i8*
+ ; CHECK: store i32 0
+ ; CHECK: invoke void @_CxxThrowException(
+ invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* nonnull @_TI1H) #1
+ to label %unreachable.for.entry unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+ ; CHECK: catch:
+ ; CHECK: store i32 2
+ ; CHECK: invoke void @_CxxThrowException(
+ invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1
+ to label %unreachable unwind label %catch.dispatch.1
+
+catch.dispatch.1: ; preds = %catch
+ %cs2 = catchswitch within %1 [label %catch.3] unwind to caller
+catch.3: ; preds = %catch.dispatch.1
+ %2 = catchpad within %cs2 [i8* null, i32 u0x40, i8* null]
+ ; CHECK: catch.3:
+ ; CHECK: store i32 3
+ ; CHECK: call void @g(i32 1)
+ call void @g(i32 1)
+ catchret from %2 to label %try.cont
+
+try.cont: ; preds = %catch.3
+ ; CHECK: try.cont:
+ ; CHECK: store i32 1
+ ; CHECK: call void @g(i32 2)
+ call void @g(i32 2)
+ unreachable
+
+unreachable: ; preds = %catch
+ unreachable
+
+unreachable.for.entry: ; preds = %entry
+ unreachable
+}
+
+define i32 @nopads() #0 personality i32 (...)* @__CxxFrameHandler3 {
+ ret i32 0
+}
+
+; CHECK-LABEL: define i32 @nopads()
+; CHECK-NEXT: ret i32 0
+; CHECK-NOT: __ehhandler$nopads
+
+; CHECK-LABEL: define void @PR25926()
+define void @PR25926() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ ; CHECK: entry:
+ ; CHECK: store i32 -1
+ ; CHECK: store i32 0
+ ; CHECK: invoke void @_CxxThrowException(
+ invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null)
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %0 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [i8* null, i32 64, i8* null]
+ ; CHECK: catch:
+ ; CHECK: store i32 3
+ ; CHECK: invoke void @_CxxThrowException(
+ invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) [ "funclet"(token %1) ]
+ to label %unreachable1 unwind label %catch.dispatch1
+
+catch.dispatch1: ; preds = %catch
+ %2 = catchswitch within %1 [label %catch2] unwind label %ehcleanup
+
+catch2: ; preds = %catch.dispatch1
+ %3 = catchpad within %2 [i8* null, i32 64, i8* null]
+ catchret from %3 to label %try.cont
+
+try.cont: ; preds = %catch2
+ ; CHECK: try.cont:
+ ; CHECK: store i32 1
+ ; CHECK: call void @dtor()
+ call void @dtor() #3 [ "funclet"(token %1) ]
+ catchret from %1 to label %try.cont4
+
+try.cont4: ; preds = %try.cont
+ ret void
+
+ehcleanup: ; preds = %catch.dispatch1
+ %4 = cleanuppad within %1 []
+ ; CHECK: ehcleanup:
+ ; CHECK: store i32 -1
+ ; CHECK: call void @dtor()
+ call void @dtor() #3 [ "funclet"(token %4) ]
+ cleanupret from %4 unwind to caller
+
+unreachable: ; preds = %entry
+ unreachable
+
+unreachable1: ; preds = %catch
+ unreachable
+}
+
+declare void @g(i32) #0
+
+declare void @dtor()
+
+declare x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.8.0 (trunk 245153) (llvm/trunk 245238)"}
diff --git a/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll b/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
index 795d4647a3f6..609dbc155ed9 100644
--- a/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
+++ b/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
@@ -1,5 +1,6 @@
; RUN: llc < %s
; PR933
+; REQUIRES: default_triple
define fastcc i1 @test() {
ret i1 true
diff --git a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
index dd670648daf6..332816e22cda 100644
--- a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
+++ b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
@@ -6,8 +6,8 @@
define i32 @test(i32 %argc, i8** %argv) nounwind {
entry:
; CHECK: cmpl $2
-; CHECK-NEXT: jne
-; CHECK-NEXT: %bb2
+; CHECK-NEXT: je
+; CHECK-NEXT: %entry
switch i32 %argc, label %UnifiedReturnBlock [
i32 1, label %bb
diff --git a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
index 62c503da35a6..65b577b1e7d7 100644
--- a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
+++ b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s
-@__gthrw_pthread_once = weak alias i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
+@__gthrw_pthread_once = weak alias i32 (i32*, void ()*), i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
define weak i32 @pthread_once(i32*, void ()*) {
ret i32 0
diff --git a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
index a9e3f33ec618..2ca003e052aa 100644
--- a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
+++ b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
@@ -8,7 +8,7 @@ target triple = "i386-pc-linux-gnu"
@__resp = thread_local global %struct.__res_state* @_res ; <%struct.__res_state**> [#uses=1]
@_res = global %struct.__res_state zeroinitializer, section ".bss" ; <%struct.__res_state*> [#uses=1]
-@__libc_resp = hidden thread_local alias %struct.__res_state** @__resp ; <%struct.__res_state**> [#uses=2]
+@__libc_resp = hidden thread_local alias %struct.__res_state*, %struct.__res_state** @__resp ; <%struct.__res_state**> [#uses=2]
define i32 @foo() {
; CHECK-LABEL: foo:
diff --git a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
index d60d0c2fb0bc..d484b45a5763 100644
--- a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
+++ b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
@@ -6,7 +6,7 @@
%struct.locale_data = type { i8*, i8*, i32, i32, { void (%struct.locale_data*)*, %struct.anon }, i32, i32, i32, [0 x %struct.locale_data_value] }
%struct.locale_data_value = type { i32* }
-@wcstoll_l = alias i64 (i32*, i32**, i32, %struct.__locale_struct*)* @__wcstoll_l
+@wcstoll_l = alias i64 (i32*, i32**, i32, %struct.__locale_struct*), i64 (i32*, i32**, i32, %struct.__locale_struct*)* @__wcstoll_l
define i64 @____wcstoll_l_internal(i32* %nptr, i32** %endptr, i32 %base, i32 %group, %struct.__locale_struct* %loc) nounwind {
entry:
diff --git a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
index 422d68e7ff49..de95e7925f08 100644
--- a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s -march=x86 | not grep movsd
-; RUN: llc < %s -march=x86 | grep movw
-; RUN: llc < %s -march=x86 | grep addw
+; RUN: llc < %s -march=x86 | FileCheck %s
; These transforms are turned off for load volatiles and stores.
; Check that they weren't turned off for all loads and stores!
+; CHECK-LABEL: f:
+; CHECK-NOT: movsd
+; CHECK: movw
+; CHECK: addw
@atomic = global double 0.000000e+00 ; <double*> [#uses=1]
@atomic2 = global double 0.000000e+00 ; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
index 757f1ff68253..84d373d70a2d 100644
--- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -1,18 +1,19 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpcklpd
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpckhpd
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,+mmx | FileCheck %s
; originally from PR2687, but things don't work that way any more.
; there are no MMX instructions here; we use XMM.
define <2 x double> @a(<2 x i32> %x) nounwind {
entry:
+; CHECK-LABEL: a
+; CHECK-NOT: unpcklpd
%y = sitofp <2 x i32> %x to <2 x double>
ret <2 x double> %y
}
define <2 x i32> @b(<2 x double> %x) nounwind {
entry:
+; CHECK-LABEL: b
+; CHECK-NOT: unpckhpd
%y = fptosi <2 x double> %x to <2 x i32>
ret <2 x i32> %y
}
@@ -21,12 +22,18 @@ entry:
define <2 x double> @a2(x86_mmx %x) nounwind {
entry:
+; CHECK-LABEL: a2
+; CHECK: cvtpi2pd
+; CHECK-NOT: cvtpi2pd
%y = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %x)
ret <2 x double> %y
}
define x86_mmx @b2(<2 x double> %x) nounwind {
entry:
+; CHECK-LABEL: b2
+; CHECK: cvttpd2pi
+; CHECK-NOT: cvttpd2pi
%y = tail call x86_mmx @llvm.x86.sse.cvttpd2pi (<2 x double> %x)
ret x86_mmx %y
}
diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index 6c177e5b5f5a..2abb5ba7cd52 100644
--- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -76,15 +76,15 @@ declare i64 @strlen(i8*) nounwind readonly
declare void @llvm.stackrestore(i8*) nounwind
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s1", line: 2, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !2, type: !3)
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !17, enums: !18, retainedTypes: !18)
+!0 = !DILocalVariable(name: "s1", line: 2, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !2, type: !3)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !17, enums: !18, retainedTypes: !18)
!3 = !DISubroutineType(types: !4)
!4 = !{!5, !6}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !5)
!7 = !DILocation(line: 2, scope: !1)
-!8 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "str.0", line: 3, scope: !1, file: !2, type: !9)
+!8 = !DILocalVariable(name: "str.0", line: 3, scope: !1, file: !2, type: !9)
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, scope: !2, baseType: !10)
!10 = !DICompositeType(tag: DW_TAG_array_type, size: 8, align: 8, scope: !2, baseType: !5, elements: !11)
!11 = !{!12}
diff --git a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
index 7c87598d0d9c..609be3bb2e54 100644
--- a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
+++ b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
@@ -4,15 +4,23 @@
; a shr (X, -8) that gets subsequently "optimized away" as undef
; PR4254
+; after fixing PR24373
+; shlq $56, %rdi
+; sarq $48, %rdi
+; folds into
+; movsbq %dil, %rax
+; shlq $8, %rax
+; which is better for x86
+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
define i64 @foo(i64 %b) nounwind readnone {
entry:
; CHECK-LABEL: foo:
-; CHECK: shlq $56, %rdi
-; CHECK: sarq $48, %rdi
-; CHECK: leaq 1(%rdi), %rax
+; CHECK: movsbq %dil, %rax
+; CHECK: shlq $8, %rax
+; CHECK: orq $1, %rax
%shl = shl i64 %b, 56 ; <i64> [#uses=1]
%shr = ashr i64 %shl, 48 ; <i64> [#uses=1]
%add5 = or i64 %shr, 1 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index dfb98bb1ab39..a74aa2dd4623 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,7 +1,9 @@
; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s
-; CHECK: subq $40, %rsp
-; CHECK: movaps %xmm8, 16(%rsp)
-; CHECK: movaps %xmm7, (%rsp)
+; CHECK: pushq %rbp
+; CHECK: subq $32, %rsp
+; CHECK: leaq 32(%rsp), %rbp
+; CHECK: movaps %xmm8, -16(%rbp)
+; CHECK: movaps %xmm7, -32(%rbp)
define i32 @a() nounwind {
entry:
diff --git a/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
index 8bb3dc63a3b9..71a560a63ec5 100644
--- a/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
+++ b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s
+; REQUIRES: default_triple
define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind readnone {
entry:
diff --git a/test/CodeGen/X86/2009-06-06-ConcatVectors.ll b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
index 92419fcb8b81..e26a8608a496 100644
--- a/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
+++ b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s
+; REQUIRES: default_triple
define <2 x i64> @_mm_movpi64_pi64(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
entry:
diff --git a/test/CodeGen/X86/2009-10-16-Scope.ll b/test/CodeGen/X86/2009-10-16-Scope.ll
index bda7340b3643..06a56ad90205 100644
--- a/test/CodeGen/X86/2009-10-16-Scope.ll
+++ b/test/CodeGen/X86/2009-10-16-Scope.ll
@@ -24,9 +24,9 @@ declare i32 @foo(i32) ssp
!0 = !DILocation(line: 5, column: 2, scope: !1)
!1 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !2)
-!2 = !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3)
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !8, retainedTypes: !9)
-!4 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "count_", line: 5, scope: !5, file: !3, type: !6)
+!2 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !8, retainedTypes: !9)
+!4 = !DILocalVariable(name: "count_", line: 5, scope: !5, file: !3, type: !6)
!5 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !1)
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!7 = !DILocation(line: 6, column: 1, scope: !2)
diff --git a/test/CodeGen/X86/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll
index db56ae65d51e..c15e7a79bfa1 100644
--- a/test/CodeGen/X86/2010-01-18-DbgValue.ll
+++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -6,7 +6,7 @@
%struct.Pt = type { double, double }
%struct.Rect = type { %struct.Pt, %struct.Pt }
-define double @foo(%struct.Rect* byval %my_r0) nounwind ssp {
+define double @foo(%struct.Rect* byval %my_r0) nounwind ssp !dbg !1 {
entry:
;CHECK: DEBUG_VALUE
%retval = alloca double ; <double*> [#uses=2]
@@ -31,10 +31,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!21}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "my_r0", line: 11, arg: 0, scope: !1, file: !2, type: !7)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !19, scope: !2, type: !4, function: double (%struct.Rect*)* @foo)
+!0 = !DILocalVariable(name: "my_r0", line: 11, arg: 1, scope: !1, file: !2, type: !7)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !19, scope: !2, type: !4)
!2 = !DIFile(filename: "b2.c", directory: "/tmp/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !19, enums: !20, retainedTypes: !20, subprograms: !18)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !19, enums: !20, retainedTypes: !20, subprograms: !18)
!4 = !DISubroutineType(types: !5)
!5 = !{!6, !7}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
diff --git a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
index e65edac86ecc..eb077c074bc2 100644
--- a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
+++ b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
@@ -1,4 +1,6 @@
; RUN: llc -O1 < %s
+; REQUIRES: default_triple
+
; ModuleID = 'pr6157.bc'
; formerly crashed in SelectionDAGBuilder
@@ -16,7 +18,7 @@ entry:
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
-!0 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !15, enums: !16, retainedTypes: !16)
+!0 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !15, enums: !16, retainedTypes: !16)
!1 = !DIDerivedType(tag: DW_TAG_const_type, size: 192, align: 64, file: !15, scope: !0, baseType: !2)
!2 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 1, size: 192, align: 64, file: !15, scope: !0, elements: !3)
!3 = !{!4, !6, !7}
@@ -24,9 +26,9 @@ declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
!6 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 1, size: 64, align: 64, offset: 64, file: !15, scope: !2, baseType: !5)
!7 = !DIDerivedType(tag: DW_TAG_member, name: "z", line: 1, size: 64, align: 64, offset: 128, file: !15, scope: !2, baseType: !5)
-!8 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 5, scope: !9, file: !0, type: !2)
+!8 = !DILocalVariable(name: "t", line: 5, scope: !9, file: !0, type: !2)
!9 = distinct !DILexicalBlock(line: 0, column: 0, file: null, scope: !10)
-!10 = !DISubprogram(name: "foo", linkageName: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !0, type: !11)
+!10 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !0, type: !11)
!11 = !DISubroutineType(types: !12)
!12 = !{!13}
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 3b99e91915f0..f157d5011b02 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -7,7 +7,7 @@
%0 = type { double }
-define hidden %0 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone {
+define hidden %0 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone !dbg !1 {
entry:
tail call void @llvm.dbg.value(metadata float %a, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
tail call void @llvm.dbg.value(metadata float %b, i64 0, metadata !11, metadata !DIExpression()), !dbg !DILocation(scope: !1)
@@ -199,10 +199,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!48}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1921, arg: 0, scope: !1, file: !2, type: !9)
-!1 = !DISubprogram(name: "__divsc3", linkageName: "__divsc3", line: 1922, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 1922, file: !45, scope: !2, type: !4, function: %0 (float, float, float, float)* @__divsc3, variables: !43)
+!0 = !DILocalVariable(name: "a", line: 1921, arg: 1, scope: !1, file: !2, type: !9)
+!1 = distinct !DISubprogram(name: "__divsc3", linkageName: "__divsc3", line: 1922, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 1922, file: !45, scope: !2, type: !4, variables: !43)
!2 = !DIFile(filename: "libgcc2.c", directory: "/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !45, enums: !47, retainedTypes: !47, subprograms: !44, imports: null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !45, enums: !47, retainedTypes: !47, subprograms: !44, imports: null)
!4 = !DISubroutineType(types: !5)
!5 = !{!6, !9, !9, !9, !9}
!6 = !DIDerivedType(tag: DW_TAG_typedef, name: "SCtype", line: 170, file: !46, scope: !7, baseType: !8)
@@ -210,14 +210,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "complex float", size: 64, align: 32, encoding: DW_ATE_complex_float)
!9 = !DIDerivedType(tag: DW_TAG_typedef, name: "SFtype", line: 167, file: !46, scope: !7, baseType: !10)
!10 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 1921, arg: 0, scope: !1, file: !2, type: !9)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 1921, arg: 0, scope: !1, file: !2, type: !9)
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "d", line: 1921, arg: 0, scope: !1, file: !2, type: !9)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "denom", line: 1923, scope: !15, file: !2, type: !9)
+!11 = !DILocalVariable(name: "b", line: 1921, arg: 2, scope: !1, file: !2, type: !9)
+!12 = !DILocalVariable(name: "c", line: 1921, arg: 3, scope: !1, file: !2, type: !9)
+!13 = !DILocalVariable(name: "d", line: 1921, arg: 4, scope: !1, file: !2, type: !9)
+!14 = !DILocalVariable(name: "denom", line: 1923, scope: !15, file: !2, type: !9)
!15 = distinct !DILexicalBlock(line: 1922, column: 0, file: !45, scope: !1)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ratio", line: 1923, scope: !15, file: !2, type: !9)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 1923, scope: !15, file: !2, type: !9)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 1923, scope: !15, file: !2, type: !9)
+!16 = !DILocalVariable(name: "ratio", line: 1923, scope: !15, file: !2, type: !9)
+!17 = !DILocalVariable(name: "x", line: 1923, scope: !15, file: !2, type: !9)
+!18 = !DILocalVariable(name: "y", line: 1923, scope: !15, file: !2, type: !9)
!19 = !DILocation(line: 1929, scope: !15)
!20 = !DILocation(line: 1931, scope: !15)
!21 = !DILocation(line: 1932, scope: !15)
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 3670c556aa79..a34e7bd9fe43 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-darwin10"
@llvm.used = appending global [1 x i8*] [i8* bitcast (i8* (%struct.a*)* @bar to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
-define i8* @bar(%struct.a* %myvar) nounwind optsize noinline ssp {
+define i8* @bar(%struct.a* %myvar) nounwind optsize noinline ssp !dbg !9 {
entry:
tail call void @llvm.dbg.value(metadata %struct.a* %myvar, i64 0, metadata !8, metadata !DIExpression()), !dbg !DILocation(scope: !9)
%0 = getelementptr inbounds %struct.a, %struct.a* %myvar, i64 0, i32 0, !dbg !28 ; <i32*> [#uses=1]
@@ -26,14 +26,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!0 = !DIGlobalVariable(name: "ret", line: 7, isLocal: false, isDefinition: true, scope: !1, file: !1, type: !3)
!1 = !DIFile(filename: "foo.c", directory: "/tmp/")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !36, enums: !37, retainedTypes: !37, subprograms: !32, globals: !31, imports: !37)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !36, enums: !37, retainedTypes: !37, subprograms: !32, globals: !31, imports: !37)
!3 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!4 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 12, arg: 0, scope: !5, file: !1, type: !3)
-!5 = !DISubprogram(name: "foo", linkageName: "foo", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 13, file: !36, scope: !1, type: !6, function: void (i32)* @foo, variables: !33)
+!4 = !DILocalVariable(name: "x", line: 12, arg: 1, scope: !5, file: !1, type: !3)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 13, file: !36, scope: !1, type: !6, variables: !33)
!6 = !DISubroutineType(types: !7)
!7 = !{null, !3}
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "myvar", line: 17, arg: 0, scope: !9, file: !1, type: !13)
-!9 = !DISubprogram(name: "bar", linkageName: "bar", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 17, file: !36, scope: !1, type: !10, function: i8* (%struct.a*)* @bar, variables: !34)
+!8 = !DILocalVariable(name: "myvar", line: 17, arg: 1, scope: !9, file: !1, type: !13)
+!9 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 17, file: !36, scope: !1, type: !10, variables: !34)
!10 = !DISubroutineType(types: !11)
!11 = !{!12, !13}
!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !36, scope: !1, baseType: null)
@@ -42,15 +42,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!15 = !{!16, !17}
!16 = !DIDerivedType(tag: DW_TAG_member, name: "c", line: 3, size: 32, align: 32, file: !36, scope: !14, baseType: !3)
!17 = !DIDerivedType(tag: DW_TAG_member, name: "d", line: 4, size: 64, align: 64, offset: 64, file: !36, scope: !14, baseType: !13)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 22, arg: 0, scope: !19, file: !1, type: !3)
-!19 = !DISubprogram(name: "main", linkageName: "main", line: 22, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 22, file: !36, scope: !1, type: !20, variables: !35)
+!18 = !DILocalVariable(name: "argc", line: 22, arg: 1, scope: !19, file: !1, type: !3)
+!19 = distinct !DISubprogram(name: "main", linkageName: "main", line: 22, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 22, file: !36, scope: !1, type: !20, variables: !35)
!20 = !DISubroutineType(types: !21)
!21 = !{!3, !3, !22}
!22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !36, scope: !1, baseType: !23)
!23 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !36, scope: !1, baseType: !24)
!24 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 22, arg: 0, scope: !19, file: !1, type: !22)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 23, scope: !27, file: !1, type: !14)
+!25 = !DILocalVariable(name: "argv", line: 22, arg: 2, scope: !19, file: !1, type: !22)
+!26 = !DILocalVariable(name: "e", line: 23, scope: !27, file: !1, type: !14)
!27 = distinct !DILexicalBlock(line: 22, column: 0, file: !36, scope: !19)
!28 = !DILocation(line: 18, scope: !29)
!29 = distinct !DILexicalBlock(line: 17, column: 0, file: !36, scope: !9)
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index c5201614fdd1..7967d45c2ee8 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -2,7 +2,7 @@
; RUN: llc -mtriple=x86_64-apple-darwin -regalloc=basic < %s | FileCheck %s
; Test to check separate label for inlined function argument.
-define i32 @foo(i32 %y) nounwind optsize ssp {
+define i32 @foo(i32 %y) nounwind optsize ssp !dbg !1 {
entry:
tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
%0 = tail call i32 (...) @zoo(i32 %y) nounwind, !dbg !9 ; <i32> [#uses=1]
@@ -13,10 +13,10 @@ declare i32 @zoo(...)
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-define i32 @bar(i32 %x) nounwind optsize ssp {
+define i32 @bar(i32 %x) nounwind optsize ssp !dbg !8 {
entry:
tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !7, metadata !DIExpression()), !dbg !DILocation(scope: !8)
- tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !1)
+ tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !1, inlinedAt: !DILocation(scope: !8))
%0 = tail call i32 (...) @zoo(i32 1) nounwind, !dbg !12 ; <i32> [#uses=1]
%1 = add nsw i32 %0, %x, !dbg !13 ; <i32> [#uses=1]
ret i32 %1, !dbg !13
@@ -25,15 +25,15 @@ entry:
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!20}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 2, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 2, file: !18, scope: !2, type: !4, function: i32 (i32)* @foo, variables: !15)
+!0 = !DILocalVariable(name: "y", line: 2, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 2, file: !18, scope: !2, type: !4, variables: !15)
!2 = !DIFile(filename: "f.c", directory: "/tmp")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !17, imports: null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !17, imports: null)
!4 = !DISubroutineType(types: !5)
!5 = !{!6, !6}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 6, arg: 0, scope: !8, file: !2, type: !6)
-!8 = !DISubprogram(name: "bar", linkageName: "bar", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 6, file: !18, scope: !2, type: !4, function: i32 (i32)* @bar, variables: !16)
+!7 = !DILocalVariable(name: "x", line: 6, arg: 1, scope: !8, file: !2, type: !6)
+!8 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 6, file: !18, scope: !2, type: !4, variables: !16)
!9 = !DILocation(line: 3, scope: !10)
!10 = distinct !DILexicalBlock(line: 2, column: 0, file: !18, scope: !1)
!11 = !{i32 1}
@@ -46,7 +46,7 @@ entry:
!18 = !DIFile(filename: "f.c", directory: "/tmp")
!19 = !{}
-;CHECK: DEBUG_VALUE: bar:x <- E
+;CHECK: DEBUG_VALUE: bar:x <- %E
;CHECK: Ltmp
;CHECK: DEBUG_VALUE: foo:y <- 1{{$}}
!20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
index 757c92808e11..1be800cdfcf0 100644
--- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -7,8 +7,8 @@ target triple = "x86_64-apple-darwin10.2"
@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.foo*, i32)* @_ZN3foo3bazEi to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
-define i32 @_ZN3foo3bazEi(%struct.foo* nocapture %this, i32 %x) nounwind readnone optsize noinline ssp align 2 {
-;CHECK: DEBUG_VALUE: baz:this <- RDI{{$}}
+define i32 @_ZN3foo3bazEi(%struct.foo* nocapture %this, i32 %x) nounwind readnone optsize noinline ssp align 2 !dbg !8 {
+;CHECK: DEBUG_VALUE: baz:this <- %RDI{{$}}
entry:
tail call void @llvm.dbg.value(metadata %struct.foo* %this, i64 0, metadata !15, metadata !DIExpression()), !dbg !DILocation(scope: !8)
tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !16, metadata !DIExpression()), !dbg !DILocation(scope: !8)
@@ -23,35 +23,35 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.module.flags = !{!34}
!llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 11, arg: 0, scope: !1, file: !3, type: !12)
-!1 = !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEi", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 11, file: !31, scope: !2, type: !9, function: i32 (%struct.foo*, i32)* null)
+!0 = !DILocalVariable(name: "this", line: 11, arg: 1, scope: !1, file: !3, type: !12)
+!1 = distinct !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEi", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 11, file: !31, scope: !2, type: !9)
!2 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", line: 3, size: 32, align: 32, file: !31, scope: !3, elements: !5)
!3 = !DIFile(filename: "foo.cp", directory: "/tmp/")
-!4 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 LLVM build", isOptimized: true, emissionKind: 0, file: !31, enums: !32, retainedTypes: !32, subprograms: !33)
+!4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 LLVM build", isOptimized: true, emissionKind: 0, file: !31, enums: !32, retainedTypes: !32, subprograms: !33)
!5 = !{!6, !1, !8}
!6 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 8, size: 32, align: 32, file: !31, scope: !2, baseType: !7)
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !DISubprogram(name: "baz", linkageName: "_ZN3foo3bazEi", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 15, file: !31, scope: !2, type: !9, function: i32 (%struct.foo*, i32)* @_ZN3foo3bazEi)
+!8 = distinct !DISubprogram(name: "baz", linkageName: "_ZN3foo3bazEi", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 15, file: !31, scope: !2, type: !9)
!9 = !DISubroutineType(types: !10)
!10 = !{!7, !11, !7}
!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !31, scope: !3, baseType: !2)
!12 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !31, scope: !3, baseType: !13)
!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !31, scope: !3, baseType: !2)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 11, arg: 0, scope: !1, file: !3, type: !7)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 15, arg: 0, scope: !8, file: !3, type: !12)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 15, arg: 0, scope: !8, file: !3, type: !7)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 19, arg: 0, scope: !18, file: !3, type: !7)
-!18 = !DISubprogram(name: "main", linkageName: "main", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 19, file: !31, scope: !3, type: !19)
+!14 = !DILocalVariable(name: "x", line: 11, arg: 2, scope: !1, file: !3, type: !7)
+!15 = !DILocalVariable(name: "this", line: 15, arg: 1, scope: !8, file: !3, type: !12)
+!16 = !DILocalVariable(name: "x", line: 15, arg: 2, scope: !8, file: !3, type: !7)
+!17 = !DILocalVariable(name: "argc", line: 19, arg: 1, scope: !18, file: !3, type: !7)
+!18 = distinct !DISubprogram(name: "main", linkageName: "main", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 19, file: !31, scope: !3, type: !19)
!19 = !DISubroutineType(types: !20)
!20 = !{!7, !7, !21}
!21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !31, scope: !3, baseType: !22)
!22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !31, scope: !3, baseType: !23)
!23 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 19, arg: 0, scope: !18, file: !3, type: !21)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 20, scope: !26, file: !3, type: !2)
+!24 = !DILocalVariable(name: "argv", line: 19, arg: 2, scope: !18, file: !3, type: !21)
+!25 = !DILocalVariable(name: "a", line: 20, scope: !26, file: !3, type: !2)
!26 = distinct !DILexicalBlock(line: 19, column: 0, file: !31, scope: !27)
!27 = distinct !DILexicalBlock(line: 19, column: 0, file: !31, scope: !18)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 21, scope: !26, file: !3, type: !7)
+!28 = !DILocalVariable(name: "b", line: 21, scope: !26, file: !3, type: !7)
!29 = !DILocation(line: 16, scope: !30)
!30 = distinct !DILexicalBlock(line: 15, column: 0, file: !31, scope: !8)
!31 = !DIFile(filename: "foo.cp", directory: "/tmp/")
diff --git a/test/CodeGen/X86/2010-07-06-DbgCrash.ll b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
index 3ce36eec400a..5e565a1a667f 100644
--- a/test/CodeGen/X86/2010-07-06-DbgCrash.ll
+++ b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
@@ -1,18 +1,19 @@
; RUN: llc -O0 -relocation-model pic < %s -o /dev/null
+; REQUIRES: default_triple
; PR7545
@.str = private constant [4 x i8] c"one\00", align 1 ; <[4 x i8]*> [#uses=1]
@.str1 = private constant [4 x i8] c"two\00", align 1 ; <[5 x i8]*> [#uses=1]
@C.9.2167 = internal constant [2 x i8*] [i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0)]
!38 = !DIFile(filename: "pbmsrch.c", directory: "/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch")
-!39 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !109, enums: !108, retainedTypes: !108)
+!39 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !109, enums: !108, retainedTypes: !108)
!46 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !109, baseType: !47)
!47 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!97 = !DISubprogram(name: "main", linkageName: "main", line: 73, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !39, type: !98)
+!97 = distinct !DISubprogram(name: "main", linkageName: "main", line: 73, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !39, type: !98)
!98 = !DISubroutineType(types: !99)
!99 = !{!100}
!100 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!101 = !{[2 x i8*]* @C.9.2167}
-!102 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "find_strings", line: 75, scope: !103, file: !38, type: !104)
+!102 = !DILocalVariable(name: "find_strings", line: 75, scope: !103, file: !38, type: !104)
!103 = distinct !DILexicalBlock(line: 73, column: 0, file: null, scope: !97)
!104 = !DICompositeType(tag: DW_TAG_array_type, size: 85312, align: 64, file: !109, baseType: !46, elements: !105)
!105 = !{!106}
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index 6129e78fd348..d305d678c596 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -3,7 +3,7 @@
%struct.SVal = type { i8*, i32 }
-define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp {
+define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp !dbg !17 {
entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !23, metadata !DIExpression()), !dbg !24
@@ -31,7 +31,7 @@ return: ; preds = %bb2
ret i32 %.0, !dbg !29
}
-define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 {
+define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 !dbg !16 {
entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
call void @llvm.dbg.value(metadata %struct.SVal* %this, i64 0, metadata !31, metadata !DIExpression()), !dbg !34
@@ -47,7 +47,7 @@ return: ; preds = %entry
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-define i32 @main() nounwind ssp {
+define i32 @main() nounwind ssp !dbg !20 {
entry:
%0 = alloca %struct.SVal ; <%struct.SVal*> [#uses=3]
%v = alloca %struct.SVal ; <%struct.SVal*> [#uses=4]
@@ -81,7 +81,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!0 = !DISubprogram(name: "SVal", line: 11, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !47, scope: !1, type: !14)
!1 = !DICompositeType(tag: DW_TAG_structure_type, name: "SVal", line: 1, size: 128, align: 64, file: !47, scope: !2, elements: !4)
!2 = !DIFile(filename: "small.cc", directory: "/Users/manav/R8248330")
-!3 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 1, file: !47, enums: !48, retainedTypes: !48, subprograms: !46, imports: null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 1, file: !47, enums: !48, retainedTypes: !48, subprograms: !46, imports: null)
!4 = !{!5, !7, !0, !9}
!5 = !DIDerivedType(tag: DW_TAG_member, name: "Data", line: 7, size: 64, align: 64, file: !47, scope: !1, baseType: !6)
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !47, scope: !2, baseType: null)
@@ -94,35 +94,35 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!14 = !DISubroutineType(types: !15)
!15 = !{null, !12}
-!16 = !DISubprogram(name: "SVal", linkageName: "_ZN4SValC1Ev", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !47, scope: !1, type: !14, function: void (%struct.SVal*)* @_ZN4SValC1Ev)
-!17 = !DISubprogram(name: "foo", linkageName: "_Z3fooi4SVal", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 16, file: !47, scope: !2, type: !18, function: i32 (i32, %struct.SVal*)* @_Z3fooi4SVal)
+!16 = distinct !DISubprogram(name: "SVal", linkageName: "_ZN4SValC1Ev", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !47, scope: !1, type: !14)
+!17 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi4SVal", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 16, file: !47, scope: !2, type: !18)
!18 = !DISubroutineType(types: !19)
!19 = !{!13, !13, !1}
-!20 = !DISubprogram(name: "main", linkageName: "main", line: 23, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 23, file: !47, scope: !2, type: !21, function: i32 ()* @main)
+!20 = distinct !DISubprogram(name: "main", linkageName: "main", line: 23, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 23, file: !47, scope: !2, type: !21)
!21 = !DISubroutineType(types: !22)
!22 = !{!13}
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 16, arg: 0, scope: !17, file: !2, type: !13)
+!23 = !DILocalVariable(name: "i", line: 16, arg: 1, scope: !17, file: !2, type: !13)
!24 = !DILocation(line: 16, scope: !17)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "location", line: 16, arg: 0, scope: !17, file: !2, type: !26)
+!25 = !DILocalVariable(name: "location", line: 16, arg: 2, scope: !17, file: !2, type: !26)
!26 = !DIDerivedType(tag: DW_TAG_reference_type, name: "SVal", size: 64, align: 64, file: !47, scope: !2, baseType: !1)
!27 = !DILocation(line: 17, scope: !28)
!28 = distinct !DILexicalBlock(line: 16, column: 0, file: !47, scope: !17)
!29 = !DILocation(line: 18, scope: !28)
!30 = !DILocation(line: 20, scope: !28)
-!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 11, arg: 0, scope: !16, file: !2, type: !32)
+!31 = !DILocalVariable(name: "this", line: 11, arg: 1, scope: !16, file: !2, type: !32)
!32 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !47, scope: !2, baseType: !33)
!33 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !47, scope: !2, baseType: !1)
!34 = !DILocation(line: 11, scope: !16)
!35 = !DILocation(line: 11, scope: !36)
!36 = distinct !DILexicalBlock(line: 11, column: 0, file: !47, scope: !37)
!37 = distinct !DILexicalBlock(line: 11, column: 0, file: !47, scope: !16)
-!38 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "v", line: 24, scope: !39, file: !2, type: !1)
+!38 = !DILocalVariable(name: "v", line: 24, scope: !39, file: !2, type: !1)
!39 = distinct !DILexicalBlock(line: 23, column: 0, file: !47, scope: !40)
!40 = distinct !DILexicalBlock(line: 23, column: 0, file: !47, scope: !20)
!41 = !DILocation(line: 24, scope: !39)
!42 = !DILocation(line: 25, scope: !39)
!43 = !DILocation(line: 26, scope: !39)
-!44 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 26, scope: !39, file: !2, type: !13)
+!44 = !DILocalVariable(name: "k", line: 26, scope: !39, file: !2, type: !13)
!45 = !DILocation(line: 27, scope: !39)
!47 = !DIFile(filename: "small.cc", directory: "/Users/manav/R8248330")
!48 = !{}
diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
index d94bd1c79f91..4303ca991a86 100644
--- a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
+++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -2,12 +2,12 @@
; Radar 8286101
; CHECK: .file {{[0-9]+}} "<stdin>"
-define i32 @foo() nounwind ssp {
+define i32 @foo() nounwind ssp !dbg !0 {
entry:
ret i32 42, !dbg !8
}
-define i32 @bar() nounwind ssp {
+define i32 @bar() nounwind ssp !dbg !6 {
entry:
ret i32 21, !dbg !10
}
@@ -15,13 +15,13 @@ entry:
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!17}
-!0 = !DISubprogram(name: "foo", linkageName: "foo", line: 53, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !14, scope: !1, type: !3, function: i32 ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 53, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !14, scope: !1, type: !3)
!1 = !DIFile(filename: "", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 114084)", isOptimized: false, emissionKind: 0, file: !15, enums: !16, retainedTypes: !16, subprograms: !13)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 114084)", isOptimized: false, emissionKind: 0, file: !15, enums: !16, retainedTypes: !16, subprograms: !13)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !15, scope: !7, type: !3, function: i32 ()* @bar)
+!6 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !15, scope: !7, type: !3)
!7 = !DIFile(filename: "bug.c", directory: "/private/tmp")
!8 = !DILocation(line: 53, column: 13, scope: !9)
!9 = distinct !DILexicalBlock(line: 53, column: 11, file: !14, scope: !0)
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
index 124cc9a430e8..b091003585c2 100644
--- a/test/CodeGen/X86/2010-11-02-DbgParameter.ll
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -6,7 +6,7 @@ target triple = "i386-apple-darwin11.0.0"
%struct.bar = type { i32, i32 }
-define i32 @foo(%struct.bar* nocapture %i) nounwind readnone optsize noinline ssp {
+define i32 @foo(%struct.bar* nocapture %i) nounwind readnone optsize noinline ssp !dbg !0 {
; CHECK: TAG_formal_parameter
entry:
tail call void @llvm.dbg.value(metadata %struct.bar* %i, i64 0, metadata !6, metadata !DIExpression()), !dbg !12
@@ -18,13 +18,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!19}
-!0 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !17, scope: !1, type: !3, function: i32 (%struct.bar*)* @foo, variables: !16)
+!0 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !17, scope: !1, type: !3, variables: !16)
!1 = !DIFile(filename: "one.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 117922)", isOptimized: true, emissionKind: 0, file: !17, enums: !18, retainedTypes: !18, subprograms: !15, imports: null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 117922)", isOptimized: true, emissionKind: 0, file: !17, enums: !18, retainedTypes: !18, subprograms: !15, imports: null)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 3, arg: 0, scope: !0, file: !1, type: !7)
+!6 = !DILocalVariable(name: "i", line: 3, arg: 1, scope: !0, file: !1, type: !7)
!7 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !17, scope: !1, baseType: !8)
!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "bar", line: 2, size: 64, align: 32, file: !17, scope: !1, elements: !9)
!9 = !{!10, !11}
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 0ded66fa3bf9..661ec94fee4e 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -20,7 +20,7 @@ target triple = "x86_64-apple-darwin10.0.0"
@.str1 = private unnamed_addr constant [14 x i8] c"m=%u, z_s=%d\0A\00"
@str = internal constant [21 x i8] c"Failing test vector:\00"
-define i64 @gcd(i64 %a, i64 %b) nounwind readnone optsize noinline ssp {
+define i64 @gcd(i64 %a, i64 %b) nounwind readnone optsize noinline ssp !dbg !0 {
entry:
tail call void @llvm.dbg.value(metadata i64 %a, i64 0, metadata !10, metadata !DIExpression()), !dbg !18
tail call void @llvm.dbg.value(metadata i64 %b, i64 0, metadata !11, metadata !DIExpression()), !dbg !19
@@ -38,7 +38,7 @@ if.then: ; preds = %while.body
ret i64 %b.addr.0, !dbg !23
}
-define i32 @main() nounwind optsize ssp {
+define i32 @main() nounwind optsize ssp !dbg !6 {
entry:
%call = tail call i32 @rand() nounwind optsize, !dbg !24
tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !14, metadata !DIExpression()), !dbg !24
@@ -78,24 +78,24 @@ declare i32 @puts(i8* nocapture) nounwind
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!33}
-!0 = !DISubprogram(name: "gcd", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !31, scope: !1, type: !3, function: i64 (i64, i64)* @gcd, variables: !29)
+!0 = distinct !DISubprogram(name: "gcd", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !31, scope: !1, type: !3, variables: !29)
!1 = !DIFile(filename: "rem_small.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 124117)", isOptimized: true, emissionKind: 1, file: !31, enums: !32, retainedTypes: !32, subprograms: !28, imports: null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 124117)", isOptimized: true, emissionKind: 1, file: !31, enums: !32, retainedTypes: !32, subprograms: !28, imports: null)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "main", line: 25, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !31, scope: !1, type: !7, function: i32 ()* @main, variables: !30)
+!6 = distinct !DISubprogram(name: "main", line: 25, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !31, scope: !1, type: !7, variables: !30)
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 5, arg: 0, scope: !0, file: !1, type: !5)
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 0, scope: !0, file: !1, type: !5)
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 6, scope: !13, file: !1, type: !5)
+!10 = !DILocalVariable(name: "a", line: 5, arg: 1, scope: !0, file: !1, type: !5)
+!11 = !DILocalVariable(name: "b", line: 5, arg: 2, scope: !0, file: !1, type: !5)
+!12 = !DILocalVariable(name: "c", line: 6, scope: !13, file: !1, type: !5)
!13 = distinct !DILexicalBlock(line: 5, column: 52, file: !31, scope: !0)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 26, scope: !15, file: !1, type: !16)
+!14 = !DILocalVariable(name: "m", line: 26, scope: !15, file: !1, type: !16)
!15 = distinct !DILexicalBlock(line: 25, column: 12, file: !31, scope: !6)
!16 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "z_s", line: 27, scope: !15, file: !1, type: !9)
+!17 = !DILocalVariable(name: "z_s", line: 27, scope: !15, file: !1, type: !9)
!18 = !DILocation(line: 5, column: 41, scope: !0)
!19 = !DILocation(line: 5, column: 49, scope: !0)
!20 = !DILocation(line: 7, column: 5, scope: !13)
diff --git a/test/CodeGen/X86/2011-10-21-widen-cmp.ll b/test/CodeGen/X86/2011-10-21-widen-cmp.ll
index 2fe645b07815..cb4648c382f7 100644
--- a/test/CodeGen/X86/2011-10-21-widen-cmp.ll
+++ b/test/CodeGen/X86/2011-10-21-widen-cmp.ll
@@ -1,15 +1,23 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
-
-target triple = "x86_64-unknown-linux-gnu"
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
; Check that a <4 x float> compare is generated and that we are
; not stuck in an endless loop.
-; CHECK: cmp_2_floats
-; CHECK: cmpordps
-; CHECK: ret
-
define void @cmp_2_floats() {
+; CHECK-LABEL: cmp_2_floats:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cmpordps %xmm0, %xmm0
+; CHECK-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; CHECK-NEXT: psllq $32, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-NEXT: psrad $31, %xmm0
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT: pslld $31, %xmm0
+; CHECK-NEXT: blendvps %xmm0, %xmm0
+; CHECK-NEXT: movlps %xmm0, (%rax)
+; CHECK-NEXT: retq
entry:
%0 = fcmp oeq <2 x float> undef, undef
%1 = select <2 x i1> %0, <2 x float> undef, <2 x float> undef
@@ -17,11 +25,13 @@ entry:
ret void
}
-; CHECK: cmp_2_doubles
-; CHECK: cmpordpd
-; CHECK: blendvpd
-; CHECK: ret
define void @cmp_2_doubles() {
+; CHECK-LABEL: cmp_2_doubles:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cmpordpd %xmm0, %xmm0
+; CHECK-NEXT: blendvpd %xmm0, %xmm0
+; CHECK-NEXT: movapd %xmm0, (%rax)
+; CHECK-NEXT: retq
entry:
%0 = fcmp oeq <2 x double> undef, undef
%1 = select <2 x i1> %0, <2 x double> undef, <2 x double> undef
@@ -29,11 +39,11 @@ entry:
ret void
}
-; CHECK: mp_11193
-; CHECK: psraw $15
-; CHECK: ret
-define void @mp_11193(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET)
-nounwind {
+define void @mp_11193(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET) nounwind {
+; CHECK-LABEL: mp_11193:
+; CHECK: # BB#0: # %allocas
+; CHECK-NEXT: movl $-1082130432, (%rsi) # imm = 0xFFFFFFFFBF800000
+; CHECK-NEXT: retq
allocas:
%bincmp = fcmp olt <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 9.000000e+00, float 1.000000e+00, float 9.000000e+00, float 1.000000e+00> , <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
%t = extractelement <8 x i1> %bincmp, i32 0
diff --git a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
index 2a1a5c9fb3ea..e6ba7551421d 100644
--- a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
+++ b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
@@ -4,13 +4,14 @@
define void @test(<4 x i32>* nocapture %p) nounwind {
; CHECK-LABEL: test:
; CHECK: vpxor %xmm0, %xmm0, %xmm0
- ; CHECK-NEXT: vpmaxsd {{.*}}, %xmm0, %xmm0
- ; CHECK-NEXT: vmovdqu %xmm0, (%rdi)
+ ; CHECK-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0
+ ; CHECK-NEXT: vmovdqu %xmm0, (%rdi)
; CHECK-NEXT: ret
- %a = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> <i32 -8, i32 -9, i32 -10, i32 -11>, <4 x i32> zeroinitializer) nounwind
- %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
- %c = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
- store <4 x i32> %c, <4 x i32>* %p, align 1
+ %a = load <4 x i32>, <4 x i32>* %p, align 1
+ %b = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a, <4 x i32> zeroinitializer) nounwind
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
+ %d = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ store <4 x i32> %d, <4 x i32>* %p, align 1
ret void
}
diff --git a/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll b/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
index 78cdfcf0e1f0..539d5547d5f1 100644
--- a/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
+++ b/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
@@ -1,13 +1,17 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
-target triple = "x86_64-unknown-linux-gnu"
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
; Check that the booleans are converted using zext and not via sext.
; 0x1 means that we only look at the first bit.
-;CHECK: 0x1
-;CHECK-LABEL: ui_to_fp_conv:
-;CHECK: ret
define void @ui_to_fp_conv(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET) nounwind {
+; CHECK-LABEL: ui_to_fp_conv:
+; CHECK: # BB#0: # %allocas
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,0.000000e+00,0.000000e+00]
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: movups %xmm1, 16(%rsi)
+; CHECK-NEXT: movups %xmm0, (%rsi)
+; CHECK-NEXT: retq
allocas:
%bincmp = fcmp olt <8 x float> <float 1.000000e+00, float 1.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> , <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
%bool2float = uitofp <8 x i1> %bincmp to <8 x float>
diff --git a/test/CodeGen/X86/2012-01-12-extract-sv.ll b/test/CodeGen/X86/2012-01-12-extract-sv.ll
index 677c902668bc..92ec107a0079 100644
--- a/test/CodeGen/X86/2012-01-12-extract-sv.ll
+++ b/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -2,20 +2,20 @@
define void @endless_loop() {
; CHECK-LABEL: endless_loop:
-; CHECK-NEXT: # BB#0:
-; CHECK-NEXT: vmovaps (%eax), %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vmovsldup %xmm0, %xmm0 # xmm0 = xmm0[0,0,2,2]
-; CHECK-NEXT: vmovddup %xmm0, %xmm1 # xmm1 = xmm0[0,0]
-; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vblendps $128, %ymm1, %ymm2, %ymm1 # ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
-; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2
-; CHECK-NEXT: vblendps $1, %ymm0, %ymm2, %ymm0 # ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
-; CHECK-NEXT: vmovaps %ymm0, (%eax)
-; CHECK-NEXT: vmovaps %ymm1, (%eax)
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retl
+; CHECK-NEXT: # BB#0:
+; CHECK-NEXT: vmovaps (%eax), %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
+; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
+; CHECK-NEXT: vmovaps %ymm0, (%eax)
+; CHECK-NEXT: vmovaps %ymm1, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
entry:
%0 = load <8 x i32>, <8 x i32> addrspace(1)* undef, align 32
%1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
diff --git a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
index a19aa52f302f..816577be15e7 100644
--- a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
+++ b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
@@ -26,6 +26,5 @@ if.end: ; preds = %if.then, %entry
ret void
; CHECK-LABEL: fn1:
-; CHECK: shrq $32, [[REG:%.*]]
-; CHECK: sete
+; CHECK: jb
}
diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll
index d1c0266941fd..eb237847e1bc 100644
--- a/test/CodeGen/X86/2012-1-10-buildvector.ll
+++ b/test/CodeGen/X86/2012-1-10-buildvector.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mattr=+avx -mtriple=i686-unknown-unknown | FileCheck %s
define void @bad_cast() {
diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
index a27db95ba127..50b486c6f925 100644
--- a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -14,7 +14,7 @@
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-define signext i16 @subdivp(%struct.node.0.27* nocapture %p, double %dsq, double %tolsq, %struct.hgstruct.2.29* nocapture byval align 8 %hg) nounwind uwtable readonly ssp {
+define signext i16 @subdivp(%struct.node.0.27* nocapture %p, double %dsq, double %tolsq, %struct.hgstruct.2.29* nocapture byval align 8 %hg) nounwind uwtable readonly ssp !dbg !14 {
entry:
call void @llvm.dbg.declare(metadata %struct.hgstruct.2.29* %hg, metadata !4, metadata !DIExpression()), !dbg !DILocation(scope: !14)
%type = getelementptr inbounds %struct.node.0.27, %struct.node.0.27* %p, i64 0, i32 0
@@ -38,15 +38,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 168918) (llvm/trunk 168920)", isOptimized: true, emissionKind: 0, file: !11, enums: !2, retainedTypes: !2, subprograms: !13, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 168918) (llvm/trunk 168920)", isOptimized: true, emissionKind: 0, file: !11, enums: !2, retainedTypes: !2, subprograms: !13, globals: !2)
!2 = !{}
-!4 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "hg", line: 725, arg: 4, scope: !14, file: !5, type: !6)
+!4 = !DILocalVariable(name: "hg", line: 725, arg: 4, scope: !14, file: !5, type: !6)
!5 = !DIFile(filename: "MultiSource/Benchmarks/Olden/bh/newbh.c", directory: "MultiSource/Benchmarks/Olden/bh")
!6 = !DIDerivedType(tag: DW_TAG_typedef, name: "hgstruct", line: 492, file: !11, baseType: !7)
!7 = !DICompositeType(tag: DW_TAG_structure_type, line: 487, size: 512, align: 64, file: !11)
!11 = !DIFile(filename: "MultiSource/Benchmarks/Olden/bh/newbh.c", directory: "MultiSource/Benchmarks/Olden/bh")
!12 = !{i32 1, !"Debug Info Version", i32 3}
!13 = !{!14}
-!14 = !DISubprogram(name: "subdivp", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !11, scope: !5, type: !15, function: i16 (%struct.node.0.27*, double, double, %struct.hgstruct.2.29* )* @subdivp)
+!14 = distinct !DISubprogram(name: "subdivp", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !11, scope: !5, type: !15)
!15 = !DISubroutineType(types: !16)
!16 = !{null}
diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
index 22227faab942..7ed416e36c22 100644
--- a/test/CodeGen/X86/2012-11-30-misched-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
@@ -14,7 +14,7 @@
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-define i32 @AttachGalley(%union.rec** nocapture %suspend_pt) nounwind uwtable ssp {
+define i32 @AttachGalley(%union.rec** nocapture %suspend_pt) nounwind uwtable ssp !dbg !21 {
entry:
%num14075 = alloca [20 x i8], align 16
br label %if.end33
@@ -65,10 +65,10 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!35}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 168918) (llvm/trunk 168920)", isOptimized: true, emissionKind: 0, file: !19, enums: !2, retainedTypes: !2, subprograms: !20, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 168918) (llvm/trunk 168920)", isOptimized: true, emissionKind: 0, file: !19, enums: !2, retainedTypes: !2, subprograms: !20, globals: !2)
!1 = !{!2}
!2 = !{}
-!4 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "num1", line: 815, scope: !5, file: !14, type: !15)
+!4 = !DILocalVariable(name: "num1", line: 815, scope: !5, file: !14, type: !15)
!5 = distinct !DILexicalBlock(line: 815, column: 0, file: !14, scope: !6)
!6 = distinct !DILexicalBlock(line: 812, column: 0, file: !14, scope: !7)
!7 = distinct !DILexicalBlock(line: 807, column: 0, file: !14, scope: !8)
@@ -86,7 +86,7 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
!19 = !DIFile(filename: "MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", directory: "MultiSource/Benchmarks/MiBench/consumer-typeset")
!20 = !{!21}
-!21 = !DISubprogram(name: "AttachGalley", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !19, scope: !14, type: !22, function: i32 (%union.rec**)* @AttachGalley)
+!21 = distinct !DISubprogram(name: "AttachGalley", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !19, scope: !14, type: !22)
!22 = !DISubroutineType(types: !23)
!23 = !{null}
@@ -99,7 +99,7 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
%"class.__gnu_cxx::hash_map" = type { %"class.__gnu_cxx::hashtable" }
%"class.__gnu_cxx::hashtable" = type { i64, i64, i64, i64, i64, i64 }
-define void @main() uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @main() uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !37 {
entry:
%X = alloca %"class.__gnu_cxx::hash_map", align 8
br i1 undef, label %cond.true, label %cond.end
@@ -134,11 +134,11 @@ declare void @_Znwm()
!llvm.dbg.cu = !{!30}
-!30 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169129) (llvm/trunk 169135)", isOptimized: true, emissionKind: 0, file: !34, enums: !2, retainedTypes: !2, subprograms: !36)
-!31 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "X", line: 29, scope: !37, type: !32)
+!30 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169129) (llvm/trunk 169135)", isOptimized: true, emissionKind: 0, file: !34, enums: !2, retainedTypes: !2, subprograms: !36)
+!31 = !DILocalVariable(name: "X", line: 29, scope: !37, type: !32)
!32 = !DIDerivedType(tag: DW_TAG_typedef, name: "HM", line: 28, file: !34, baseType: null)
!33 = !DIFile(filename: "SingleSource/Benchmarks/Shootout-C++/hash.cpp", directory: "SingleSource/Benchmarks/Shootout-C++")
!34 = !DIFile(filename: "SingleSource/Benchmarks/Shootout-C++/hash.cpp", directory: "SingleSource/Benchmarks/Shootout-C++")
!35 = !{i32 1, !"Debug Info Version", i32 3}
!36 = !{!37}
-!37 = !DISubprogram(name: "main", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !19, scope: !14, type: !22, function: void ()* @main)
+!37 = distinct !DISubprogram(name: "main", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !19, scope: !14, type: !22)
diff --git a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
index 77c017eb0e36..3f7a10ae035b 100644
--- a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
@@ -11,7 +11,7 @@
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-define void @test() unnamed_addr uwtable ssp align 2 {
+define void @test() unnamed_addr uwtable ssp align 2 !dbg !2 {
entry:
%callback = alloca %struct.btCompoundLeafCallback, align 8
br i1 undef, label %if.end, label %if.then
@@ -36,10 +36,10 @@ invoke.cont44: ; preds = %if.end
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 168984) (llvm/trunk 168983)", isOptimized: true, emissionKind: 0, file: !6, subprograms: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 168984) (llvm/trunk 168983)", isOptimized: true, emissionKind: 0, file: !6, subprograms: !1)
!1 = !{!2}
-!2 = !DISubprogram(name: "test", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !5, type: !7, function: void ()* @test)
-!3 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "callback", line: 214, scope: !2, type: !4)
+!2 = distinct !DISubprogram(name: "test", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !5, type: !7)
+!3 = !DILocalVariable(name: "callback", line: 214, scope: !2, type: !4)
!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "btCompoundLeafCallback", line: 90, size: 512, align: 64, file: !6)
!5 = !DIFile(filename: "MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", directory: "MultiSource/Benchmarks/Bullet")
!6 = !DIFile(filename: "MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", directory: "MultiSource/Benchmarks/Bullet")
diff --git a/test/CodeGen/X86/3dnow-intrinsics.ll b/test/CodeGen/X86/3dnow-intrinsics.ll
index 0b27bf2d1853..fe8b95ec4655 100644
--- a/test/CodeGen/X86/3dnow-intrinsics.ll
+++ b/test/CodeGen/X86/3dnow-intrinsics.ll
@@ -277,7 +277,7 @@ entry:
declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
-; CHECK: pswapd
+; CHECK: pswapd {{.*#+}} mm0 = mem[1,0]
entry:
%0 = bitcast <2 x float> %a to x86_mmx
%1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
@@ -286,7 +286,7 @@ entry:
}
define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
-; CHECK: pswapd
+; CHECK: pswapd {{.*#+}} mm0 = mem[1,0]
entry:
%0 = bitcast <2 x i32> %a to x86_mmx
%1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
diff --git a/test/CodeGen/X86/GC/alloc_loop.ll b/test/CodeGen/X86/GC/alloc_loop.ll
index 2a505e80aac8..b924e1cee069 100644
--- a/test/CodeGen/X86/GC/alloc_loop.ll
+++ b/test/CodeGen/X86/GC/alloc_loop.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s
+; REQUIRES: default_triple
declare i8* @llvm_gc_allocate(i32)
diff --git a/test/CodeGen/X86/GC/cg-O0.ll b/test/CodeGen/X86/GC/cg-O0.ll
index b4929425e94a..1a390c9eb1c1 100644
--- a/test/CodeGen/X86/GC/cg-O0.ll
+++ b/test/CodeGen/X86/GC/cg-O0.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -O0
+; REQUIRES: default_triple
define i32 @main() {
entry:
diff --git a/test/CodeGen/X86/GC/dynamic-frame-size.ll b/test/CodeGen/X86/GC/dynamic-frame-size.ll
index a3583d46a29a..9ec9b8b08507 100644
--- a/test/CodeGen/X86/GC/dynamic-frame-size.ll
+++ b/test/CodeGen/X86/GC/dynamic-frame-size.ll
@@ -17,12 +17,12 @@ define void @test(i8* %ptr) gc "erlang" {
; CHECK: .note.gc
; CHECK-NEXT: .align 8
; safe point count
-; CHECK .short 1
-; CHECK .long .Ltmp0
+; CHECK: .short 1
+; CHECK: .long .Ltmp0
; stack frame size (in words)
-; CHECK .short -1
+; CHECK: .short -1
; stack arity (arguments on the stack)
-; CHECK .short 0
+; CHECK: .short 0
; live root count
-; CHECK .short 0
+; CHECK: .short 0
diff --git a/test/CodeGen/X86/GC/lower_gcroot.ll b/test/CodeGen/X86/GC/lower_gcroot.ll
index c2d418ac50ef..8cccd78100f5 100644
--- a/test/CodeGen/X86/GC/lower_gcroot.ll
+++ b/test/CodeGen/X86/GC/lower_gcroot.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s
+; REQUIRES: default_triple
%Env = type i8*
diff --git a/test/CodeGen/X86/MachineBranchProb.ll b/test/CodeGen/X86/MachineBranchProb.ll
index 408c6b9151c3..ee1c658d4c55 100644
--- a/test/CodeGen/X86/MachineBranchProb.ll
+++ b/test/CodeGen/X86/MachineBranchProb.ll
@@ -18,9 +18,9 @@ for.cond2: ; preds = %for.inc, %for.cond
%or.cond = or i1 %tobool, %cmp4
br i1 %or.cond, label %for.inc20, label %for.inc, !prof !0
; CHECK: BB#1: derived from LLVM BB %for.cond2
-; CHECK: Successors according to CFG: BB#3(56008718) BB#4(3615818718)
+; CHECK: Successors according to CFG: BB#3({{[0-9a-fx/= ]+}}1.53%) BB#4({{[0-9a-fx/= ]+}}98.47%)
; CHECK: BB#4: derived from LLVM BB %for.cond2
-; CHECK: Successors according to CFG: BB#3(56008718) BB#2(3559810000)
+; CHECK: Successors according to CFG: BB#3({{[0-9a-fx/= ]+}}1.55%) BB#2({{[0-9a-fx/= ]+}}98.45%)
for.inc: ; preds = %for.cond2
%shl = shl i32 %bit.0, 1
diff --git a/test/CodeGen/X86/MachineSink-DbgValue.ll b/test/CodeGen/X86/MachineSink-DbgValue.ll
index 6f057c5f18e6..457d9beb37d5 100644
--- a/test/CodeGen/X86/MachineSink-DbgValue.ll
+++ b/test/CodeGen/X86/MachineSink-DbgValue.ll
@@ -3,7 +3,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-macosx10.7.0"
-define i32 @foo(i32 %i, i32* nocapture %c) nounwind uwtable readonly ssp {
+define i32 @foo(i32 %i, i32* nocapture %c) nounwind uwtable readonly ssp !dbg !1 {
tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !6, metadata !DIExpression()), !dbg !12
%ab = load i32, i32* %c, align 1, !dbg !14
tail call void @llvm.dbg.value(metadata i32* %c, i64 0, metadata !7, metadata !DIExpression()), !dbg !13
@@ -28,17 +28,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!22}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 1, file: !20, enums: !21, retainedTypes: !21, subprograms: !18, imports: null)
-!1 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !20, scope: !2, type: !3, function: i32 (i32, i32*)* @foo, variables: !19)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 1, file: !20, enums: !21, retainedTypes: !21, subprograms: !18, imports: null)
+!1 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !20, scope: !2, type: !3, variables: !19)
!2 = !DIFile(filename: "a.c", directory: "/private/tmp")
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !5)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 2, arg: 2, scope: !1, file: !2, type: !8)
+!6 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !5)
+!7 = !DILocalVariable(name: "c", line: 2, arg: 2, scope: !1, file: !2, type: !8)
!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !0, baseType: !9)
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 3, scope: !11, file: !2, type: !9)
+!10 = !DILocalVariable(name: "a", line: 3, scope: !11, file: !2, type: !9)
!11 = distinct !DILexicalBlock(line: 2, column: 25, file: !20, scope: !1)
!12 = !DILocation(line: 2, column: 13, scope: !1)
!13 = !DILocation(line: 2, column: 22, scope: !1)
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll
index c8f249b7529d..70af4184e8a2 100644
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -1,13 +1,10 @@
-; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
-; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s
%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
%struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
-; CHECK: merge_const_store
+; CHECK-LABEL: merge_const_store:
; save 1,2,3 ... as one big integer.
; CHECK: movabsq $578437695752307201
; CHECK: ret
@@ -42,7 +39,7 @@ define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwt
}
; No vectors because we use noimplicitfloat
-; CHECK: merge_const_store_no_vec
+; CHECK-LABEL: merge_const_store_no_vec:
; CHECK-NOT: vmovups
; CHECK: ret
define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
@@ -76,7 +73,7 @@ define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimp
}
; Move the constants using a single vector store.
-; CHECK: merge_const_store_vec
+; CHECK-LABEL: merge_const_store_vec:
; CHECK: vmovups
; CHECK: ret
define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
@@ -110,7 +107,7 @@ define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind
}
; Move the first 4 constants as a single vector. Move the rest as scalars.
-; CHECK: merge_nonconst_store
+; CHECK-LABEL: merge_nonconst_store:
; CHECK: movl $67305985
; CHECK: movb
; CHECK: movb
@@ -291,12 +288,16 @@ block4: ; preds = %4, %.lr.ph
ret void
}
-;; On x86, even unaligned copies can be merged to vector ops.
+;; On x86, even unaligned copies should be merged to vector ops.
+;; TODO: however, this cannot happen at the moment, due to brokenness
+;; in MergeConsecutiveStores. See UseAA FIXME in DAGCombiner.cpp
+;; visitSTORE.
+
; CHECK-LABEL: merge_loads_no_align:
; load:
-; CHECK: vmovups
+; CHECK-NOT: vmovups ;; TODO
; store:
-; CHECK: vmovups
+; CHECK-NOT: vmovups ;; TODO
; CHECK: ret
define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
%a1 = icmp sgt i32 %count, 0
@@ -335,7 +336,7 @@ block4: ; preds = %4, %.lr.ph
; Make sure that we merge the consecutive load/store sequence below and use a
; word (16 bit) instead of a byte copy.
-; CHECK: MergeLoadStoreBaseIndexOffset
+; CHECK-LABEL: MergeLoadStoreBaseIndexOffset:
; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
; CHECK: movw [[REG]], (%{{.*}})
define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
@@ -367,7 +368,7 @@ define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
; Make sure that we merge the consecutive load/store sequence below and use a
; word (16 bit) instead of a byte copy even if there are intermediate sign
; extensions.
-; CHECK: MergeLoadStoreBaseIndexOffsetSext
+; CHECK-LABEL: MergeLoadStoreBaseIndexOffsetSext:
; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
; CHECK: movw [[REG]], (%{{.*}})
define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
@@ -399,7 +400,7 @@ define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
; However, we can only merge ignore sign extensions when they are on all memory
; computations;
-; CHECK: loadStoreBaseIndexOffsetSextNoSex
+; CHECK-LABEL: loadStoreBaseIndexOffsetSextNoSex:
; CHECK-NOT: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
; CHECK-NOT: movw [[REG]], (%{{.*}})
define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
@@ -481,10 +482,8 @@ define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x flo
ret void
; CHECK-LABEL: merge_vec_extract_stores
-; CHECK: vmovaps %xmm0, 48(%rdi)
-; CHECK-NEXT: vextractf128 $1, %ymm0, 64(%rdi)
-; CHECK-NEXT: vmovaps %xmm1, 80(%rdi)
-; CHECK-NEXT: vextractf128 $1, %ymm1, 96(%rdi)
+; CHECK: vmovups %ymm0, 48(%rdi)
+; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
}
diff --git a/test/CodeGen/X86/StackColoring-dbg.ll b/test/CodeGen/X86/StackColoring-dbg.ll
index 98c27f44fabc..91fe7f819383 100644
--- a/test/CodeGen/X86/StackColoring-dbg.ll
+++ b/test/CodeGen/X86/StackColoring-dbg.ll
@@ -27,9 +27,9 @@ declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!23}
-!0 = !DICompileUnit(language: DW_LANG_C89, producer: "clang", isOptimized: true, emissionKind: 0, file: !1, enums: !{}, retainedTypes: !{})
+!0 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "clang", isOptimized: true, emissionKind: 0, file: !1, enums: !{}, retainedTypes: !{})
!1 = !DIFile(filename: "t.c", directory: "")
!16 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!2 = !DISubprogram()
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 16, scope: !2, file: !1, type: !16)
+!2 = distinct !DISubprogram()
+!22 = !DILocalVariable(name: "x", line: 16, scope: !2, file: !1, type: !16)
!23 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/add-nsw-sext.ll b/test/CodeGen/X86/add-nsw-sext.ll
new file mode 100644
index 000000000000..0a6f6c315c13
--- /dev/null
+++ b/test/CodeGen/X86/add-nsw-sext.ll
@@ -0,0 +1,168 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; The fundamental problem: an add separated from other arithmetic by a sext can't
+; be combined with the later instructions. However, if the first add is 'nsw',
+; then we can promote the sext ahead of that add to allow optimizations.
+
+define i64 @add_nsw_consts(i32 %i) {
+; CHECK-LABEL: add_nsw_consts:
+; CHECK: # BB#0:
+; CHECK-NEXT: movslq %edi, %rax
+; CHECK-NEXT: addq $12, %rax
+; CHECK-NEXT: retq
+
+ %add = add nsw i32 %i, 5
+ %ext = sext i32 %add to i64
+ %idx = add i64 %ext, 7
+ ret i64 %idx
+}
+
+; An x86 bonus: If we promote the sext ahead of the 'add nsw',
+; we allow LEA formation and eliminate an add instruction.
+
+define i64 @add_nsw_sext_add(i32 %i, i64 %x) {
+; CHECK-LABEL: add_nsw_sext_add:
+; CHECK: # BB#0:
+; CHECK-NEXT: movslq %edi, %rax
+; CHECK-NEXT: leaq 5(%rax,%rsi), %rax
+; CHECK-NEXT: retq
+
+ %add = add nsw i32 %i, 5
+ %ext = sext i32 %add to i64
+ %idx = add i64 %x, %ext
+ ret i64 %idx
+}
+
+; Throw in a scale (left shift) because an LEA can do that too.
+; Use a negative constant (LEA displacement) to verify that's handled correctly.
+
+define i64 @add_nsw_sext_lsh_add(i32 %i, i64 %x) {
+; CHECK-LABEL: add_nsw_sext_lsh_add:
+; CHECK: # BB#0:
+; CHECK-NEXT: movslq %edi, %rax
+; CHECK-NEXT: leaq -40(%rsi,%rax,8), %rax
+; CHECK-NEXT: retq
+
+ %add = add nsw i32 %i, -5
+ %ext = sext i32 %add to i64
+ %shl = shl i64 %ext, 3
+ %idx = add i64 %x, %shl
+ ret i64 %idx
+}
+
+; Don't promote the sext if it has no users. The wider add instruction needs an
+; extra byte to encode.
+
+define i64 @add_nsw_sext(i32 %i, i64 %x) {
+; CHECK-LABEL: add_nsw_sext:
+; CHECK: # BB#0:
+; CHECK-NEXT: addl $5, %edi
+; CHECK-NEXT: movslq %edi, %rax
+; CHECK-NEXT: retq
+
+ %add = add nsw i32 %i, 5
+ %ext = sext i32 %add to i64
+ ret i64 %ext
+}
+
+; The typical use case: a 64-bit system where an 'int' is used as an index into an array.
+
+define i8* @gep8(i32 %i, i8* %x) {
+; CHECK-LABEL: gep8:
+; CHECK: # BB#0:
+; CHECK-NEXT: movslq %edi, %rax
+; CHECK-NEXT: leaq 5(%rax,%rsi), %rax
+; CHECK-NEXT: retq
+
+ %add = add nsw i32 %i, 5
+ %ext = sext i32 %add to i64
+ %idx = getelementptr i8, i8* %x, i64 %ext
+ ret i8* %idx
+}
+
+define i16* @gep16(i32 %i, i16* %x) {
+; CHECK-LABEL: gep16:
+; CHECK: # BB#0:
+; CHECK-NEXT: movslq %edi, %rax
+; CHECK-NEXT: leaq -10(%rsi,%rax,2), %rax
+; CHECK-NEXT: retq
+
+ %add = add nsw i32 %i, -5
+ %ext = sext i32 %add to i64
+ %idx = getelementptr i16, i16* %x, i64 %ext
+ ret i16* %idx
+}
+
+define i32* @gep32(i32 %i, i32* %x) {
+; CHECK-LABEL: gep32:
+; CHECK: # BB#0:
+; CHECK-NEXT: movslq %edi, %rax
+; CHECK-NEXT: leaq 20(%rsi,%rax,4), %rax
+; CHECK-NEXT: retq
+
+ %add = add nsw i32 %i, 5
+ %ext = sext i32 %add to i64
+ %idx = getelementptr i32, i32* %x, i64 %ext
+ ret i32* %idx
+}
+
+define i64* @gep64(i32 %i, i64* %x) {
+; CHECK-LABEL: gep64:
+; CHECK: # BB#0:
+; CHECK-NEXT: movslq %edi, %rax
+; CHECK-NEXT: leaq -40(%rsi,%rax,8), %rax
+; CHECK-NEXT: retq
+
+ %add = add nsw i32 %i, -5
+ %ext = sext i32 %add to i64
+ %idx = getelementptr i64, i64* %x, i64 %ext
+ ret i64* %idx
+}
+
+; LEA can't scale by 16, but the adds can still be combined into an LEA.
+
+define i128* @gep128(i32 %i, i128* %x) {
+; CHECK-LABEL: gep128:
+; CHECK: # BB#0:
+; CHECK-NEXT: movslq %edi, %rax
+; CHECK-NEXT: shlq $4, %rax
+; CHECK-NEXT: leaq 80(%rax,%rsi), %rax
+; CHECK-NEXT: retq
+
+ %add = add nsw i32 %i, 5
+ %ext = sext i32 %add to i64
+ %idx = getelementptr i128, i128* %x, i64 %ext
+ ret i128* %idx
+}
+
+; A bigger win can be achieved when there is more than one use of the
+; sign extended value. In this case, we can eliminate sign extension
+; instructions plus use more efficient addressing modes for memory ops.
+
+define void @PR20134(i32* %a, i32 %i) {
+; CHECK-LABEL: PR20134:
+; CHECK: # BB#0:
+; CHECK-NEXT: movslq %esi, %rax
+; CHECK-NEXT: movl 4(%rdi,%rax,4), %ecx
+; CHECK-NEXT: addl 8(%rdi,%rax,4), %ecx
+; CHECK-NEXT: movl %ecx, (%rdi,%rax,4)
+; CHECK-NEXT: retq
+
+ %add1 = add nsw i32 %i, 1
+ %idx1 = sext i32 %add1 to i64
+ %gep1 = getelementptr i32, i32* %a, i64 %idx1
+ %load1 = load i32, i32* %gep1, align 4
+
+ %add2 = add nsw i32 %i, 2
+ %idx2 = sext i32 %add2 to i64
+ %gep2 = getelementptr i32, i32* %a, i64 %idx2
+ %load2 = load i32, i32* %gep2, align 4
+
+ %add3 = add i32 %load1, %load2
+ %idx3 = sext i32 %i to i64
+ %gep3 = getelementptr i32, i32* %a, i64 %idx3
+ store i32 %add3, i32* %gep3, align 4
+ ret void
+}
+
diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
index 3f19a064323c..50c7b929c827 100644
--- a/test/CodeGen/X86/aliases.ll
+++ b/test/CodeGen/X86/aliases.ll
@@ -2,7 +2,7 @@
; RUN: -relocation-model=pic | FileCheck %s
@thread_var = thread_local global i32 42, align 4
-@thread_alias = thread_local(localdynamic) alias i32* @thread_var
+@thread_alias = thread_local(localdynamic) alias i32, i32* @thread_var
; CHECK-LABEL: get_thread_var
define i32* @get_thread_var() {
@@ -19,10 +19,10 @@ define i32* @get_thread_alias() {
@bar = global i32 42
; CHECK-DAG: .globl foo1
-@foo1 = alias i32* @bar
+@foo1 = alias i32, i32* @bar
; CHECK-DAG: .globl foo2
-@foo2 = alias i32* @bar
+@foo2 = alias i32, i32* @bar
%FunTy = type i32()
@@ -30,35 +30,35 @@ define i32 @foo_f() {
ret i32 0
}
; CHECK-DAG: .weak bar_f
-@bar_f = weak alias %FunTy* @foo_f
+@bar_f = weak alias %FunTy, %FunTy* @foo_f
-@bar_l = linkonce_odr alias i32* @bar
+@bar_l = linkonce_odr alias i32, i32* @bar
; CHECK-DAG: .weak bar_l
-@bar_i = internal alias i32* @bar
+@bar_i = internal alias i32, i32* @bar
; CHECK-DAG: .globl A
-@A = alias bitcast (i32* @bar to i64*)
+@A = alias i64, bitcast (i32* @bar to i64*)
; CHECK-DAG: .globl bar_h
; CHECK-DAG: .hidden bar_h
-@bar_h = hidden alias i32* @bar
+@bar_h = hidden alias i32, i32* @bar
; CHECK-DAG: .globl bar_p
; CHECK-DAG: .protected bar_p
-@bar_p = protected alias i32* @bar
+@bar_p = protected alias i32, i32* @bar
; CHECK-DAG: test2 = bar+4
-@test2 = alias getelementptr(i32, i32 *@bar, i32 1)
+@test2 = alias i32, getelementptr(i32, i32* @bar, i32 1)
; CHECK-DAG: test3 = 42
-@test3 = alias inttoptr(i32 42 to i32*)
+@test3 = alias i32, inttoptr(i32 42 to i32*)
; CHECK-DAG: test4 = bar
-@test4 = alias inttoptr(i64 ptrtoint (i32* @bar to i64) to i32*)
+@test4 = alias i32, inttoptr(i64 ptrtoint (i32* @bar to i64) to i32*)
; CHECK-DAG: test5 = test2-bar
-@test5 = alias inttoptr(i32 sub (i32 ptrtoint (i32* @test2 to i32),
+@test5 = alias i32, inttoptr(i32 sub (i32 ptrtoint (i32* @test2 to i32),
i32 ptrtoint (i32* @bar to i32)) to i32*)
; CHECK-DAG: .globl test
diff --git a/test/CodeGen/X86/and-encoding.ll b/test/CodeGen/X86/and-encoding.ll
new file mode 100644
index 000000000000..f7bbac2a4bd9
--- /dev/null
+++ b/test/CodeGen/X86/and-encoding.ll
@@ -0,0 +1,41 @@
+; RUN: llc -show-mc-encoding < %s | FileCheck %s
+
+; Test that the direct object emission selects the and variant with 8 bit
+; immediate.
+; We used to get this wrong when using direct object emission, but not when
+; reading assembly.
+
+
+target triple = "x86_64-pc-linux"
+
+define void @f1() {
+; CHECK-LABEL: f1:
+; CHECK: andq $-32, %rsp # encoding: [0x48,0x83,0xe4,0xe0]
+ %foo = alloca i8, align 32
+ ret void
+}
+
+define void @f2(i1 *%x, i16 *%y) {
+; CHECK-LABEL: f2:
+; CHECK: andl $1, %eax # encoding: [0x83,0xe0,0x01]
+ %a = load i1, i1* %x
+ %b = zext i1 %a to i16
+ store i16 %b, i16* %y
+ ret void
+}
+
+define i32 @f3(i1 *%x) {
+; CHECK-LABEL: f3:
+; CHECK: andl $1, %eax # encoding: [0x83,0xe0,0x01]
+ %a = load i1, i1* %x
+ %b = zext i1 %a to i32
+ ret i32 %b
+}
+
+define i64 @f4(i1 *%x) {
+; CHECK-LABEL: f4:
+; CHECK: andl $1, %eax # encoding: [0x83,0xe0,0x01]
+ %a = load i1, i1* %x
+ %b = zext i1 %a to i64
+ ret i64 %b
+}
diff --git a/test/CodeGen/X86/atomic-flags.ll b/test/CodeGen/X86/atomic-flags.ll
new file mode 100644
index 000000000000..e0c4a915965c
--- /dev/null
+++ b/test/CodeGen/X86/atomic-flags.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s
+
+; Make sure that flags are properly preserved despite atomic optimizations.
+
+define i32 @atomic_and_flags_1(i8* %p, i32 %a, i32 %b) {
+; CHECK-LABEL: atomic_and_flags_1:
+
+ ; Generate flags value, and use it.
+ ; CHECK: cmpl
+ ; CHECK-NEXT: jne
+ %cmp = icmp eq i32 %a, %b
+ br i1 %cmp, label %L1, label %L2
+
+L1:
+ ; The following pattern will get folded.
+ ; CHECK: incb
+ %1 = load atomic i8, i8* %p seq_cst, align 1
+ %2 = add i8 %1, 1 ; This forces the INC instruction to be generated.
+ store atomic i8 %2, i8* %p release, align 1
+
+ ; Use the comparison result again. We need to rematerialize the comparison
+ ; somehow. This test checks that cmpl gets emitted again, but any
+ ; rematerialization would work (the optimizer used to clobber the flags with
+ ; the add).
+ ; CHECK-NEXT: cmpl
+ ; CHECK-NEXT: jne
+ br i1 %cmp, label %L3, label %L4
+
+L2:
+ ret i32 2
+
+L3:
+ ret i32 3
+
+L4:
+ ret i32 4
+}
+
+; Same as above, but using 2 as immediate to avoid the INC instruction.
+define i32 @atomic_and_flags_2(i8* %p, i32 %a, i32 %b) {
+; CHECK-LABEL: atomic_and_flags_2:
+ ; CHECK: cmpl
+ ; CHECK-NEXT: jne
+ %cmp = icmp eq i32 %a, %b
+ br i1 %cmp, label %L1, label %L2
+L1:
+ ; CHECK: addb
+ %1 = load atomic i8, i8* %p seq_cst, align 1
+ %2 = add i8 %1, 2
+ store atomic i8 %2, i8* %p release, align 1
+ ; CHECK-NEXT: cmpl
+ ; CHECK-NEXT: jne
+ br i1 %cmp, label %L3, label %L4
+L2:
+ ret i32 2
+L3:
+ ret i32 3
+L4:
+ ret i32 4
+}
diff --git a/test/CodeGen/X86/atomic-minmax-i6432.ll b/test/CodeGen/X86/atomic-minmax-i6432.ll
index 4989bc14ef86..d5d3fa6db5e8 100644
--- a/test/CodeGen/X86/atomic-minmax-i6432.ll
+++ b/test/CodeGen/X86/atomic-minmax-i6432.ll
@@ -8,7 +8,7 @@ define void @atomic_maxmin_i6432() {
%1 = atomicrmw max i64* @sc64, i64 5 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
-; LINUX: seta
+; LINUX: sbbl
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock cmpxchg8b
@@ -16,7 +16,7 @@ define void @atomic_maxmin_i6432() {
%2 = atomicrmw min i64* @sc64, i64 6 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
-; LINUX: setb
+; LINUX: sbbl
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock cmpxchg8b
@@ -24,7 +24,7 @@ define void @atomic_maxmin_i6432() {
%3 = atomicrmw umax i64* @sc64, i64 7 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
-; LINUX: seta
+; LINUX: sbbl
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock cmpxchg8b
@@ -32,7 +32,7 @@ define void @atomic_maxmin_i6432() {
%4 = atomicrmw umin i64* @sc64, i64 8 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
-; LINUX: setb
+; LINUX: sbbl
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock cmpxchg8b
diff --git a/test/CodeGen/X86/atomic-non-integer.ll b/test/CodeGen/X86/atomic-non-integer.ll
new file mode 100644
index 000000000000..98fcd96d3e4c
--- /dev/null
+++ b/test/CodeGen/X86/atomic-non-integer.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s
+
+; Note: This test is testing that the lowering for atomics matches what we
+; currently emit for non-atomics + the atomic restriction. The presence of
+; particular lowering detail in these tests should not be read as requiring
+; that detail for correctness unless it's related to the atomicity itself.
+; (Specifically, there were reviewer questions about the lowering for halfs
+; and their calling convention which remain unresolved.)
+
+define void @store_half(half* %fptr, half %v) {
+; CHECK-LABEL: @store_half
+; CHECK: movq %rdi, %rbx
+; CHECK: callq __gnu_f2h_ieee
+; CHECK: movw %ax, (%rbx)
+ store atomic half %v, half* %fptr unordered, align 2
+ ret void
+}
+
+define void @store_float(float* %fptr, float %v) {
+; CHECK-LABEL: @store_float
+; CHECK: movd %xmm0, %eax
+; CHECK: movl %eax, (%rdi)
+ store atomic float %v, float* %fptr unordered, align 4
+ ret void
+}
+
+define void @store_double(double* %fptr, double %v) {
+; CHECK-LABEL: @store_double
+; CHECK: movd %xmm0, %rax
+; CHECK: movq %rax, (%rdi)
+ store atomic double %v, double* %fptr unordered, align 8
+ ret void
+}
+
+define void @store_fp128(fp128* %fptr, fp128 %v) {
+; CHECK-LABEL: @store_fp128
+; CHECK: callq __sync_lock_test_and_set_16
+ store atomic fp128 %v, fp128* %fptr unordered, align 16
+ ret void
+}
+
+define half @load_half(half* %fptr) {
+; CHECK-LABEL: @load_half
+; CHECK: movw (%rdi), %ax
+; CHECK: movzwl %ax, %edi
+; CHECK: jmp __gnu_h2f_ieee
+ %v = load atomic half, half* %fptr unordered, align 2
+ ret half %v
+}
+
+define float @load_float(float* %fptr) {
+; CHECK-LABEL: @load_float
+; CHECK: movl (%rdi), %eax
+; CHECK: movd %eax, %xmm0
+ %v = load atomic float, float* %fptr unordered, align 4
+ ret float %v
+}
+
+define double @load_double(double* %fptr) {
+; CHECK-LABEL: @load_double
+; CHECK: movq (%rdi), %rax
+; CHECK: movd %rax, %xmm0
+ %v = load atomic double, double* %fptr unordered, align 8
+ ret double %v
+}
+
+define fp128 @load_fp128(fp128* %fptr) {
+; CHECK-LABEL: @load_fp128
+; CHECK: callq __sync_val_compare_and_swap_16
+ %v = load atomic fp128, fp128* %fptr unordered, align 16
+ ret fp128 %v
+}
+
+
+; sanity check the seq_cst lowering since that's the
+; interesting one from an ordering perspective on x86.
+
+define void @store_float_seq_cst(float* %fptr, float %v) {
+; CHECK-LABEL: @store_float_seq_cst
+; CHECK: movd %xmm0, %eax
+; CHECK: xchgl %eax, (%rdi)
+ store atomic float %v, float* %fptr seq_cst, align 4
+ ret void
+}
+
+define void @store_double_seq_cst(double* %fptr, double %v) {
+; CHECK-LABEL: @store_double_seq_cst
+; CHECK: movd %xmm0, %rax
+; CHECK: xchgq %rax, (%rdi)
+ store atomic double %v, double* %fptr seq_cst, align 8
+ ret void
+}
+
+define float @load_float_seq_cst(float* %fptr) {
+; CHECK-LABEL: @load_float_seq_cst
+; CHECK: movl (%rdi), %eax
+; CHECK: movd %eax, %xmm0
+ %v = load atomic float, float* %fptr seq_cst, align 4
+ ret float %v
+}
+
+define double @load_double_seq_cst(double* %fptr) {
+; CHECK-LABEL: @load_double_seq_cst
+; CHECK: movq (%rdi), %rax
+; CHECK: movd %rax, %xmm0
+ %v = load atomic double, double* %fptr seq_cst, align 8
+ ret double %v
+}
diff --git a/test/CodeGen/X86/atomic128.ll b/test/CodeGen/X86/atomic128.ll
index dea7d482f989..c41269b0b606 100644
--- a/test/CodeGen/X86/atomic128.ll
+++ b/test/CodeGen/X86/atomic128.ll
@@ -119,16 +119,9 @@ define void @fetch_and_min(i128* %p, i128 %bits) {
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpq %rsi, %rax
-; CHECK: setbe [[CMP:%[a-z0-9]+]]
-; CHECK: cmpq [[INCHI]], %rdx
-; CHECK: setle [[HICMP:%[a-z0-9]+]]
-; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
-
-; CHECK: movb [[HICMP]], [[CMP]]
-; CHECK: [[USE_LO]]:
-; CHECK: testb [[CMP]], [[CMP]]
-; CHECK: movq %rsi, %rbx
+; CHECK: cmpq
+; CHECK: sbbq
+; CHECK: setg
; CHECK: cmovneq %rax, %rbx
; CHECK: movq [[INCHI]], %rcx
; CHECK: cmovneq %rdx, %rcx
@@ -151,16 +144,9 @@ define void @fetch_and_max(i128* %p, i128 %bits) {
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpq %rsi, %rax
-; CHECK: setae [[CMP:%[a-z0-9]+]]
-; CHECK: cmpq [[INCHI]], %rdx
-; CHECK: setge [[HICMP:%[a-z0-9]+]]
-; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
-
-; CHECK: movb [[HICMP]], [[CMP]]
-; CHECK: [[USE_LO]]:
-; CHECK: testb [[CMP]], [[CMP]]
-; CHECK: movq %rsi, %rbx
+; CHECK: cmpq
+; CHECK: sbbq
+; CHECK: setge
; CHECK: cmovneq %rax, %rbx
; CHECK: movq [[INCHI]], %rcx
; CHECK: cmovneq %rdx, %rcx
@@ -183,16 +169,9 @@ define void @fetch_and_umin(i128* %p, i128 %bits) {
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpq %rsi, %rax
-; CHECK: setbe [[CMP:%[a-z0-9]+]]
-; CHECK: cmpq [[INCHI]], %rdx
-; CHECK: setbe [[HICMP:%[a-z0-9]+]]
-; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
-
-; CHECK: movb [[HICMP]], [[CMP]]
-; CHECK: [[USE_LO]]:
-; CHECK: testb [[CMP]], [[CMP]]
-; CHECK: movq %rsi, %rbx
+; CHECK: cmpq
+; CHECK: sbbq
+; CHECK: seta
; CHECK: cmovneq %rax, %rbx
; CHECK: movq [[INCHI]], %rcx
; CHECK: cmovneq %rdx, %rcx
@@ -215,16 +194,9 @@ define void @fetch_and_umax(i128* %p, i128 %bits) {
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpq %rax, %rsi
-; CHECK: setb [[CMP:%[a-z0-9]+]]
-; CHECK: cmpq [[INCHI]], %rdx
-; CHECK: seta [[HICMP:%[a-z0-9]+]]
-; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
-
-; CHECK: movb [[HICMP]], [[CMP]]
-; CHECK: [[USE_LO]]:
-; CHECK: testb [[CMP]], [[CMP]]
-; CHECK: movq %rsi, %rbx
+; CHECK: cmpq
+; CHECK: sbbq
+; CHECK: setb
; CHECK: cmovneq %rax, %rbx
; CHECK: movq [[INCHI]], %rcx
; CHECK: cmovneq %rdx, %rcx
diff --git a/test/CodeGen/X86/atomic_mi.ll b/test/CodeGen/X86/atomic_mi.ll
index 7a6204fc8930..356d9dcff6fa 100644
--- a/test/CodeGen/X86/atomic_mi.ll
+++ b/test/CodeGen/X86/atomic_mi.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s --check-prefix X64
-; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s --check-prefix X32
-; RUN: llc < %s -march=x86-64 -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC
; This file checks that atomic (non-seq_cst) stores of immediate values are
; done in one mov instruction and not 2. More precisely, it makes sure that the
@@ -14,7 +14,11 @@
; The binary operations supported are currently add, and, or, xor.
; sub is not supported because they are translated by an addition of the
; negated immediate.
-; Finally, we also check the same kind of pattern for inc/dec
+;
+; We also check the same patterns:
+; - For inc/dec.
+; - For register instead of immediate operands.
+; - For floating point operations.
; seq_cst stores are left as (lock) xchgl, but we try to check every other
; attribute at least once.
@@ -25,10 +29,10 @@
; an implicit lock prefix, so making it explicit is not required.
define void @store_atomic_imm_8(i8* %p) {
-; X64-LABEL: store_atomic_imm_8
+; X64-LABEL: store_atomic_imm_8:
; X64: movb
; X64-NOT: movb
-; X32-LABEL: store_atomic_imm_8
+; X32-LABEL: store_atomic_imm_8:
; X32: movb
; X32-NOT: movb
store atomic i8 42, i8* %p release, align 1
@@ -36,10 +40,10 @@ define void @store_atomic_imm_8(i8* %p) {
}
define void @store_atomic_imm_16(i16* %p) {
-; X64-LABEL: store_atomic_imm_16
+; X64-LABEL: store_atomic_imm_16:
; X64: movw
; X64-NOT: movw
-; X32-LABEL: store_atomic_imm_16
+; X32-LABEL: store_atomic_imm_16:
; X32: movw
; X32-NOT: movw
store atomic i16 42, i16* %p monotonic, align 2
@@ -47,12 +51,12 @@ define void @store_atomic_imm_16(i16* %p) {
}
define void @store_atomic_imm_32(i32* %p) {
-; X64-LABEL: store_atomic_imm_32
+; X64-LABEL: store_atomic_imm_32:
; X64: movl
; X64-NOT: movl
; On 32 bits, there is an extra movl for each of those functions
; (probably for alignment reasons).
-; X32-LABEL: store_atomic_imm_32
+; X32-LABEL: store_atomic_imm_32:
; X32: movl 4(%esp), %eax
; X32: movl
; X32-NOT: movl
@@ -61,12 +65,12 @@ define void @store_atomic_imm_32(i32* %p) {
}
define void @store_atomic_imm_64(i64* %p) {
-; X64-LABEL: store_atomic_imm_64
+; X64-LABEL: store_atomic_imm_64:
; X64: movq
; X64-NOT: movq
; These are implemented with a CAS loop on 32 bit architectures, and thus
; cannot be optimized in the same way as the others.
-; X32-LABEL: store_atomic_imm_64
+; X32-LABEL: store_atomic_imm_64:
; X32: cmpxchg8b
store atomic i64 42, i64* %p release, align 8
ret void
@@ -75,7 +79,7 @@ define void @store_atomic_imm_64(i64* %p) {
; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
; even on X64, one must use movabsq that can only target a register.
define void @store_atomic_imm_64_big(i64* %p) {
-; X64-LABEL: store_atomic_imm_64_big
+; X64-LABEL: store_atomic_imm_64_big:
; X64: movabsq
; X64: movq
store atomic i64 100000000000, i64* %p monotonic, align 8
@@ -84,9 +88,9 @@ define void @store_atomic_imm_64_big(i64* %p) {
; It would be incorrect to replace a lock xchgl by a movl
define void @store_atomic_imm_32_seq_cst(i32* %p) {
-; X64-LABEL: store_atomic_imm_32_seq_cst
+; X64-LABEL: store_atomic_imm_32_seq_cst:
; X64: xchgl
-; X32-LABEL: store_atomic_imm_32_seq_cst
+; X32-LABEL: store_atomic_imm_32_seq_cst:
; X32: xchgl
store atomic i32 42, i32* %p seq_cst, align 4
ret void
@@ -94,12 +98,12 @@ define void @store_atomic_imm_32_seq_cst(i32* %p) {
; ----- ADD -----
-define void @add_8(i8* %p) {
-; X64-LABEL: add_8
+define void @add_8i(i8* %p) {
+; X64-LABEL: add_8i:
; X64-NOT: lock
; X64: addb
; X64-NOT: movb
-; X32-LABEL: add_8
+; X32-LABEL: add_8i:
; X32-NOT: lock
; X32: addb
; X32-NOT: movb
@@ -109,12 +113,27 @@ define void @add_8(i8* %p) {
ret void
}
-define void @add_16(i16* %p) {
+define void @add_8r(i8* %p, i8 %v) {
+; X64-LABEL: add_8r:
+; X64-NOT: lock
+; X64: addb
+; X64-NOT: movb
+; X32-LABEL: add_8r:
+; X32-NOT: lock
+; X32: addb
+; X32-NOT: movb
+ %1 = load atomic i8, i8* %p seq_cst, align 1
+ %2 = add i8 %1, %v
+ store atomic i8 %2, i8* %p release, align 1
+ ret void
+}
+
+define void @add_16i(i16* %p) {
; Currently the transformation is not done on 16 bit accesses, as the backend
; treat 16 bit arithmetic as expensive on X86/X86_64.
-; X64-LABEL: add_16
+; X64-LABEL: add_16i:
; X64-NOT: addw
-; X32-LABEL: add_16
+; X32-LABEL: add_16i:
; X32-NOT: addw
%1 = load atomic i16, i16* %p acquire, align 2
%2 = add i16 %1, 2
@@ -122,12 +141,25 @@ define void @add_16(i16* %p) {
ret void
}
-define void @add_32(i32* %p) {
-; X64-LABEL: add_32
+define void @add_16r(i16* %p, i16 %v) {
+; Currently the transformation is not done on 16 bit accesses, as the backend
+; treat 16 bit arithmetic as expensive on X86/X86_64.
+; X64-LABEL: add_16r:
+; X64-NOT: addw
+; X32-LABEL: add_16r:
+; X32-NOT: addw [.*], (
+ %1 = load atomic i16, i16* %p acquire, align 2
+ %2 = add i16 %1, %v
+ store atomic i16 %2, i16* %p release, align 2
+ ret void
+}
+
+define void @add_32i(i32* %p) {
+; X64-LABEL: add_32i:
; X64-NOT: lock
; X64: addl
; X64-NOT: movl
-; X32-LABEL: add_32
+; X32-LABEL: add_32i:
; X32-NOT: lock
; X32: addl
; X32-NOT: movl
@@ -137,23 +169,94 @@ define void @add_32(i32* %p) {
ret void
}
-define void @add_64(i64* %p) {
-; X64-LABEL: add_64
+define void @add_32r(i32* %p, i32 %v) {
+; X64-LABEL: add_32r:
+; X64-NOT: lock
+; X64: addl
+; X64-NOT: movl
+; X32-LABEL: add_32r:
+; X32-NOT: lock
+; X32: addl
+; X32-NOT: movl
+ %1 = load atomic i32, i32* %p acquire, align 4
+ %2 = add i32 %1, %v
+ store atomic i32 %2, i32* %p monotonic, align 4
+ ret void
+}
+
+; The following is a corner case where the load is added to itself. The pattern
+; matching should not fold this. We only test with 32-bit add, but the same
+; applies to other sizes and operations.
+define void @add_32r_self(i32* %p) {
+; X64-LABEL: add_32r_self:
+; X64-NOT: lock
+; X64: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]]
+; X64: addl %[[R]], %[[R]]
+; X64: movl %[[R]], (%[[M]])
+; X32-LABEL: add_32r_self:
+; X32-NOT: lock
+; X32: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]]
+; X32: addl %[[R]], %[[R]]
+; X32: movl %[[R]], (%[[M]])
+ %1 = load atomic i32, i32* %p acquire, align 4
+ %2 = add i32 %1, %1
+ store atomic i32 %2, i32* %p monotonic, align 4
+ ret void
+}
+
+; The following is a corner case where the load's result is returned. The
+; optimizer isn't allowed to duplicate the load because it's atomic.
+define i32 @add_32r_ret_load(i32* %p, i32 %v) {
+; X64-LABEL: add_32r_ret_load:
+; X64-NOT: lock
+; X64: movl (%rdi), %eax
+; X64-NEXT: addl %eax, %esi
+; X64-NEXT: movl %esi, (%rdi)
+; X64-NEXT: retq
+; X32-LABEL: add_32r_ret_load:
+; X32-NOT: lock
+; X32: movl 4(%esp), %[[P:[a-z]+]]
+; X32-NEXT: movl (%[[P]]),
+; X32-NOT: %[[P]]
+; More code here, we just don't want it to load from P.
+; X32: movl %{{.*}}, (%[[P]])
+; X32-NEXT: retl
+ %1 = load atomic i32, i32* %p acquire, align 4
+ %2 = add i32 %1, %v
+ store atomic i32 %2, i32* %p monotonic, align 4
+ ret i32 %1
+}
+
+define void @add_64i(i64* %p) {
+; X64-LABEL: add_64i:
; X64-NOT: lock
; X64: addq
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'addq'.
-; X32-LABEL: add_64
+; X32-LABEL: add_64i:
%1 = load atomic i64, i64* %p acquire, align 8
%2 = add i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
}
-define void @add_32_seq_cst(i32* %p) {
-; X64-LABEL: add_32_seq_cst
+define void @add_64r(i64* %p, i64 %v) {
+; X64-LABEL: add_64r:
+; X64-NOT: lock
+; X64: addq
+; X64-NOT: movq
+; We do not check X86-32 as it cannot do 'addq'.
+; X32-LABEL: add_64r:
+ %1 = load atomic i64, i64* %p acquire, align 8
+ %2 = add i64 %1, %v
+ store atomic i64 %2, i64* %p release, align 8
+ ret void
+}
+
+define void @add_32i_seq_cst(i32* %p) {
+; X64-LABEL: add_32i_seq_cst:
; X64: xchgl
-; X32-LABEL: add_32_seq_cst
+; X32-LABEL: add_32i_seq_cst:
; X32: xchgl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = add i32 %1, 2
@@ -161,14 +264,25 @@ define void @add_32_seq_cst(i32* %p) {
ret void
}
+define void @add_32r_seq_cst(i32* %p, i32 %v) {
+; X64-LABEL: add_32r_seq_cst:
+; X64: xchgl
+; X32-LABEL: add_32r_seq_cst:
+; X32: xchgl
+ %1 = load atomic i32, i32* %p monotonic, align 4
+ %2 = add i32 %1, %v
+ store atomic i32 %2, i32* %p seq_cst, align 4
+ ret void
+}
+
; ----- AND -----
-define void @and_8(i8* %p) {
-; X64-LABEL: and_8
+define void @and_8i(i8* %p) {
+; X64-LABEL: and_8i:
; X64-NOT: lock
; X64: andb
; X64-NOT: movb
-; X32-LABEL: and_8
+; X32-LABEL: and_8i:
; X32-NOT: lock
; X32: andb
; X32-NOT: movb
@@ -178,12 +292,27 @@ define void @and_8(i8* %p) {
ret void
}
-define void @and_16(i16* %p) {
+define void @and_8r(i8* %p, i8 %v) {
+; X64-LABEL: and_8r:
+; X64-NOT: lock
+; X64: andb
+; X64-NOT: movb
+; X32-LABEL: and_8r:
+; X32-NOT: lock
+; X32: andb
+; X32-NOT: movb
+ %1 = load atomic i8, i8* %p monotonic, align 1
+ %2 = and i8 %1, %v
+ store atomic i8 %2, i8* %p release, align 1
+ ret void
+}
+
+define void @and_16i(i16* %p) {
; Currently the transformation is not done on 16 bit accesses, as the backend
; treat 16 bit arithmetic as expensive on X86/X86_64.
-; X64-LABEL: and_16
+; X64-LABEL: and_16i:
; X64-NOT: andw
-; X32-LABEL: and_16
+; X32-LABEL: and_16i:
; X32-NOT: andw
%1 = load atomic i16, i16* %p acquire, align 2
%2 = and i16 %1, 2
@@ -191,12 +320,25 @@ define void @and_16(i16* %p) {
ret void
}
-define void @and_32(i32* %p) {
-; X64-LABEL: and_32
+define void @and_16r(i16* %p, i16 %v) {
+; Currently the transformation is not done on 16 bit accesses, as the backend
+; treat 16 bit arithmetic as expensive on X86/X86_64.
+; X64-LABEL: and_16r:
+; X64-NOT: andw
+; X32-LABEL: and_16r:
+; X32-NOT: andw [.*], (
+ %1 = load atomic i16, i16* %p acquire, align 2
+ %2 = and i16 %1, %v
+ store atomic i16 %2, i16* %p release, align 2
+ ret void
+}
+
+define void @and_32i(i32* %p) {
+; X64-LABEL: and_32i:
; X64-NOT: lock
; X64: andl
; X64-NOT: movl
-; X32-LABEL: and_32
+; X32-LABEL: and_32i:
; X32-NOT: lock
; X32: andl
; X32-NOT: movl
@@ -206,23 +348,51 @@ define void @and_32(i32* %p) {
ret void
}
-define void @and_64(i64* %p) {
-; X64-LABEL: and_64
+define void @and_32r(i32* %p, i32 %v) {
+; X64-LABEL: and_32r:
+; X64-NOT: lock
+; X64: andl
+; X64-NOT: movl
+; X32-LABEL: and_32r:
+; X32-NOT: lock
+; X32: andl
+; X32-NOT: movl
+ %1 = load atomic i32, i32* %p acquire, align 4
+ %2 = and i32 %1, %v
+ store atomic i32 %2, i32* %p release, align 4
+ ret void
+}
+
+define void @and_64i(i64* %p) {
+; X64-LABEL: and_64i:
; X64-NOT: lock
; X64: andq
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'andq'.
-; X32-LABEL: and_64
+; X32-LABEL: and_64i:
%1 = load atomic i64, i64* %p acquire, align 8
%2 = and i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
}
-define void @and_32_seq_cst(i32* %p) {
-; X64-LABEL: and_32_seq_cst
+define void @and_64r(i64* %p, i64 %v) {
+; X64-LABEL: and_64r:
+; X64-NOT: lock
+; X64: andq
+; X64-NOT: movq
+; We do not check X86-32 as it cannot do 'andq'.
+; X32-LABEL: and_64r:
+ %1 = load atomic i64, i64* %p acquire, align 8
+ %2 = and i64 %1, %v
+ store atomic i64 %2, i64* %p release, align 8
+ ret void
+}
+
+define void @and_32i_seq_cst(i32* %p) {
+; X64-LABEL: and_32i_seq_cst:
; X64: xchgl
-; X32-LABEL: and_32_seq_cst
+; X32-LABEL: and_32i_seq_cst:
; X32: xchgl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = and i32 %1, 2
@@ -230,14 +400,25 @@ define void @and_32_seq_cst(i32* %p) {
ret void
}
+define void @and_32r_seq_cst(i32* %p, i32 %v) {
+; X64-LABEL: and_32r_seq_cst:
+; X64: xchgl
+; X32-LABEL: and_32r_seq_cst:
+; X32: xchgl
+ %1 = load atomic i32, i32* %p monotonic, align 4
+ %2 = and i32 %1, %v
+ store atomic i32 %2, i32* %p seq_cst, align 4
+ ret void
+}
+
; ----- OR -----
-define void @or_8(i8* %p) {
-; X64-LABEL: or_8
+define void @or_8i(i8* %p) {
+; X64-LABEL: or_8i:
; X64-NOT: lock
; X64: orb
; X64-NOT: movb
-; X32-LABEL: or_8
+; X32-LABEL: or_8i:
; X32-NOT: lock
; X32: orb
; X32-NOT: movb
@@ -247,10 +428,25 @@ define void @or_8(i8* %p) {
ret void
}
-define void @or_16(i16* %p) {
-; X64-LABEL: or_16
+define void @or_8r(i8* %p, i8 %v) {
+; X64-LABEL: or_8r:
+; X64-NOT: lock
+; X64: orb
+; X64-NOT: movb
+; X32-LABEL: or_8r:
+; X32-NOT: lock
+; X32: orb
+; X32-NOT: movb
+ %1 = load atomic i8, i8* %p acquire, align 1
+ %2 = or i8 %1, %v
+ store atomic i8 %2, i8* %p release, align 1
+ ret void
+}
+
+define void @or_16i(i16* %p) {
+; X64-LABEL: or_16i:
; X64-NOT: orw
-; X32-LABEL: or_16
+; X32-LABEL: or_16i:
; X32-NOT: orw
%1 = load atomic i16, i16* %p acquire, align 2
%2 = or i16 %1, 2
@@ -258,12 +454,23 @@ define void @or_16(i16* %p) {
ret void
}
-define void @or_32(i32* %p) {
-; X64-LABEL: or_32
+define void @or_16r(i16* %p, i16 %v) {
+; X64-LABEL: or_16r:
+; X64-NOT: orw
+; X32-LABEL: or_16r:
+; X32-NOT: orw [.*], (
+ %1 = load atomic i16, i16* %p acquire, align 2
+ %2 = or i16 %1, %v
+ store atomic i16 %2, i16* %p release, align 2
+ ret void
+}
+
+define void @or_32i(i32* %p) {
+; X64-LABEL: or_32i:
; X64-NOT: lock
; X64: orl
; X64-NOT: movl
-; X32-LABEL: or_32
+; X32-LABEL: or_32i:
; X32-NOT: lock
; X32: orl
; X32-NOT: movl
@@ -273,23 +480,51 @@ define void @or_32(i32* %p) {
ret void
}
-define void @or_64(i64* %p) {
-; X64-LABEL: or_64
+define void @or_32r(i32* %p, i32 %v) {
+; X64-LABEL: or_32r:
+; X64-NOT: lock
+; X64: orl
+; X64-NOT: movl
+; X32-LABEL: or_32r:
+; X32-NOT: lock
+; X32: orl
+; X32-NOT: movl
+ %1 = load atomic i32, i32* %p acquire, align 4
+ %2 = or i32 %1, %v
+ store atomic i32 %2, i32* %p release, align 4
+ ret void
+}
+
+define void @or_64i(i64* %p) {
+; X64-LABEL: or_64i:
; X64-NOT: lock
; X64: orq
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'orq'.
-; X32-LABEL: or_64
+; X32-LABEL: or_64i:
%1 = load atomic i64, i64* %p acquire, align 8
%2 = or i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
}
-define void @or_32_seq_cst(i32* %p) {
-; X64-LABEL: or_32_seq_cst
+define void @or_64r(i64* %p, i64 %v) {
+; X64-LABEL: or_64r:
+; X64-NOT: lock
+; X64: orq
+; X64-NOT: movq
+; We do not check X86-32 as it cannot do 'orq'.
+; X32-LABEL: or_64r:
+ %1 = load atomic i64, i64* %p acquire, align 8
+ %2 = or i64 %1, %v
+ store atomic i64 %2, i64* %p release, align 8
+ ret void
+}
+
+define void @or_32i_seq_cst(i32* %p) {
+; X64-LABEL: or_32i_seq_cst:
; X64: xchgl
-; X32-LABEL: or_32_seq_cst
+; X32-LABEL: or_32i_seq_cst:
; X32: xchgl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = or i32 %1, 2
@@ -297,14 +532,25 @@ define void @or_32_seq_cst(i32* %p) {
ret void
}
+define void @or_32r_seq_cst(i32* %p, i32 %v) {
+; X64-LABEL: or_32r_seq_cst:
+; X64: xchgl
+; X32-LABEL: or_32r_seq_cst:
+; X32: xchgl
+ %1 = load atomic i32, i32* %p monotonic, align 4
+ %2 = or i32 %1, %v
+ store atomic i32 %2, i32* %p seq_cst, align 4
+ ret void
+}
+
; ----- XOR -----
-define void @xor_8(i8* %p) {
-; X64-LABEL: xor_8
+define void @xor_8i(i8* %p) {
+; X64-LABEL: xor_8i:
; X64-NOT: lock
; X64: xorb
; X64-NOT: movb
-; X32-LABEL: xor_8
+; X32-LABEL: xor_8i:
; X32-NOT: lock
; X32: xorb
; X32-NOT: movb
@@ -314,10 +560,25 @@ define void @xor_8(i8* %p) {
ret void
}
-define void @xor_16(i16* %p) {
-; X64-LABEL: xor_16
+define void @xor_8r(i8* %p, i8 %v) {
+; X64-LABEL: xor_8r:
+; X64-NOT: lock
+; X64: xorb
+; X64-NOT: movb
+; X32-LABEL: xor_8r:
+; X32-NOT: lock
+; X32: xorb
+; X32-NOT: movb
+ %1 = load atomic i8, i8* %p acquire, align 1
+ %2 = xor i8 %1, %v
+ store atomic i8 %2, i8* %p release, align 1
+ ret void
+}
+
+define void @xor_16i(i16* %p) {
+; X64-LABEL: xor_16i:
; X64-NOT: xorw
-; X32-LABEL: xor_16
+; X32-LABEL: xor_16i:
; X32-NOT: xorw
%1 = load atomic i16, i16* %p acquire, align 2
%2 = xor i16 %1, 2
@@ -325,12 +586,23 @@ define void @xor_16(i16* %p) {
ret void
}
-define void @xor_32(i32* %p) {
-; X64-LABEL: xor_32
+define void @xor_16r(i16* %p, i16 %v) {
+; X64-LABEL: xor_16r:
+; X64-NOT: xorw
+; X32-LABEL: xor_16r:
+; X32-NOT: xorw [.*], (
+ %1 = load atomic i16, i16* %p acquire, align 2
+ %2 = xor i16 %1, %v
+ store atomic i16 %2, i16* %p release, align 2
+ ret void
+}
+
+define void @xor_32i(i32* %p) {
+; X64-LABEL: xor_32i:
; X64-NOT: lock
; X64: xorl
; X64-NOT: movl
-; X32-LABEL: xor_32
+; X32-LABEL: xor_32i:
; X32-NOT: lock
; X32: xorl
; X32-NOT: movl
@@ -340,23 +612,51 @@ define void @xor_32(i32* %p) {
ret void
}
-define void @xor_64(i64* %p) {
-; X64-LABEL: xor_64
+define void @xor_32r(i32* %p, i32 %v) {
+; X64-LABEL: xor_32r:
+; X64-NOT: lock
+; X64: xorl
+; X64-NOT: movl
+; X32-LABEL: xor_32r:
+; X32-NOT: lock
+; X32: xorl
+; X32-NOT: movl
+ %1 = load atomic i32, i32* %p acquire, align 4
+ %2 = xor i32 %1, %v
+ store atomic i32 %2, i32* %p release, align 4
+ ret void
+}
+
+define void @xor_64i(i64* %p) {
+; X64-LABEL: xor_64i:
; X64-NOT: lock
; X64: xorq
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'xorq'.
-; X32-LABEL: xor_64
+; X32-LABEL: xor_64i:
%1 = load atomic i64, i64* %p acquire, align 8
%2 = xor i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
}
-define void @xor_32_seq_cst(i32* %p) {
-; X64-LABEL: xor_32_seq_cst
+define void @xor_64r(i64* %p, i64 %v) {
+; X64-LABEL: xor_64r:
+; X64-NOT: lock
+; X64: xorq
+; X64-NOT: movq
+; We do not check X86-32 as it cannot do 'xorq'.
+; X32-LABEL: xor_64r:
+ %1 = load atomic i64, i64* %p acquire, align 8
+ %2 = xor i64 %1, %v
+ store atomic i64 %2, i64* %p release, align 8
+ ret void
+}
+
+define void @xor_32i_seq_cst(i32* %p) {
+; X64-LABEL: xor_32i_seq_cst:
; X64: xchgl
-; X32-LABEL: xor_32_seq_cst
+; X32-LABEL: xor_32i_seq_cst:
; X32: xchgl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = xor i32 %1, 2
@@ -364,18 +664,29 @@ define void @xor_32_seq_cst(i32* %p) {
ret void
}
+define void @xor_32r_seq_cst(i32* %p, i32 %v) {
+; X64-LABEL: xor_32r_seq_cst:
+; X64: xchgl
+; X32-LABEL: xor_32r_seq_cst:
+; X32: xchgl
+ %1 = load atomic i32, i32* %p monotonic, align 4
+ %2 = xor i32 %1, %v
+ store atomic i32 %2, i32* %p seq_cst, align 4
+ ret void
+}
+
; ----- INC -----
define void @inc_8(i8* %p) {
-; X64-LABEL: inc_8
+; X64-LABEL: inc_8:
; X64-NOT: lock
; X64: incb
; X64-NOT: movb
-; X32-LABEL: inc_8
+; X32-LABEL: inc_8:
; X32-NOT: lock
; X32: incb
; X32-NOT: movb
-; SLOW_INC-LABEL: inc_8
+; SLOW_INC-LABEL: inc_8:
; SLOW_INC-NOT: incb
; SLOW_INC-NOT: movb
%1 = load atomic i8, i8* %p seq_cst, align 1
@@ -387,11 +698,11 @@ define void @inc_8(i8* %p) {
define void @inc_16(i16* %p) {
; Currently the transformation is not done on 16 bit accesses, as the backend
; treat 16 bit arithmetic as expensive on X86/X86_64.
-; X64-LABEL: inc_16
+; X64-LABEL: inc_16:
; X64-NOT: incw
-; X32-LABEL: inc_16
+; X32-LABEL: inc_16:
; X32-NOT: incw
-; SLOW_INC-LABEL: inc_16
+; SLOW_INC-LABEL: inc_16:
; SLOW_INC-NOT: incw
%1 = load atomic i16, i16* %p acquire, align 2
%2 = add i16 %1, 1
@@ -400,15 +711,15 @@ define void @inc_16(i16* %p) {
}
define void @inc_32(i32* %p) {
-; X64-LABEL: inc_32
+; X64-LABEL: inc_32:
; X64-NOT: lock
; X64: incl
; X64-NOT: movl
-; X32-LABEL: inc_32
+; X32-LABEL: inc_32:
; X32-NOT: lock
; X32: incl
; X32-NOT: movl
-; SLOW_INC-LABEL: inc_32
+; SLOW_INC-LABEL: inc_32:
; SLOW_INC-NOT: incl
; SLOW_INC-NOT: movl
%1 = load atomic i32, i32* %p acquire, align 4
@@ -418,13 +729,13 @@ define void @inc_32(i32* %p) {
}
define void @inc_64(i64* %p) {
-; X64-LABEL: inc_64
+; X64-LABEL: inc_64:
; X64-NOT: lock
; X64: incq
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'incq'.
-; X32-LABEL: inc_64
-; SLOW_INC-LABEL: inc_64
+; X32-LABEL: inc_64:
+; SLOW_INC-LABEL: inc_64:
; SLOW_INC-NOT: incq
; SLOW_INC-NOT: movq
%1 = load atomic i64, i64* %p acquire, align 8
@@ -434,9 +745,9 @@ define void @inc_64(i64* %p) {
}
define void @inc_32_seq_cst(i32* %p) {
-; X64-LABEL: inc_32_seq_cst
+; X64-LABEL: inc_32_seq_cst:
; X64: xchgl
-; X32-LABEL: inc_32_seq_cst
+; X32-LABEL: inc_32_seq_cst:
; X32: xchgl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = add i32 %1, 1
@@ -447,15 +758,15 @@ define void @inc_32_seq_cst(i32* %p) {
; ----- DEC -----
define void @dec_8(i8* %p) {
-; X64-LABEL: dec_8
+; X64-LABEL: dec_8:
; X64-NOT: lock
; X64: decb
; X64-NOT: movb
-; X32-LABEL: dec_8
+; X32-LABEL: dec_8:
; X32-NOT: lock
; X32: decb
; X32-NOT: movb
-; SLOW_INC-LABEL: dec_8
+; SLOW_INC-LABEL: dec_8:
; SLOW_INC-NOT: decb
; SLOW_INC-NOT: movb
%1 = load atomic i8, i8* %p seq_cst, align 1
@@ -467,11 +778,11 @@ define void @dec_8(i8* %p) {
define void @dec_16(i16* %p) {
; Currently the transformation is not done on 16 bit accesses, as the backend
; treat 16 bit arithmetic as expensive on X86/X86_64.
-; X64-LABEL: dec_16
+; X64-LABEL: dec_16:
; X64-NOT: decw
-; X32-LABEL: dec_16
+; X32-LABEL: dec_16:
; X32-NOT: decw
-; SLOW_INC-LABEL: dec_16
+; SLOW_INC-LABEL: dec_16:
; SLOW_INC-NOT: decw
%1 = load atomic i16, i16* %p acquire, align 2
%2 = sub i16 %1, 1
@@ -480,15 +791,15 @@ define void @dec_16(i16* %p) {
}
define void @dec_32(i32* %p) {
-; X64-LABEL: dec_32
+; X64-LABEL: dec_32:
; X64-NOT: lock
; X64: decl
; X64-NOT: movl
-; X32-LABEL: dec_32
+; X32-LABEL: dec_32:
; X32-NOT: lock
; X32: decl
; X32-NOT: movl
-; SLOW_INC-LABEL: dec_32
+; SLOW_INC-LABEL: dec_32:
; SLOW_INC-NOT: decl
; SLOW_INC-NOT: movl
%1 = load atomic i32, i32* %p acquire, align 4
@@ -498,13 +809,13 @@ define void @dec_32(i32* %p) {
}
define void @dec_64(i64* %p) {
-; X64-LABEL: dec_64
+; X64-LABEL: dec_64:
; X64-NOT: lock
; X64: decq
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'decq'.
-; X32-LABEL: dec_64
-; SLOW_INC-LABEL: dec_64
+; X32-LABEL: dec_64:
+; SLOW_INC-LABEL: dec_64:
; SLOW_INC-NOT: decq
; SLOW_INC-NOT: movq
%1 = load atomic i64, i64* %p acquire, align 8
@@ -514,12 +825,157 @@ define void @dec_64(i64* %p) {
}
define void @dec_32_seq_cst(i32* %p) {
-; X64-LABEL: dec_32_seq_cst
+; X64-LABEL: dec_32_seq_cst:
; X64: xchgl
-; X32-LABEL: dec_32_seq_cst
+; X32-LABEL: dec_32_seq_cst:
; X32: xchgl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = sub i32 %1, 1
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
+
+; ----- FADD -----
+
+define void @fadd_32r(float* %loc, float %val) {
+; X64-LABEL: fadd_32r:
+; X64-NOT: lock
+; X64-NOT: mov
+; X64: addss (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: movss %[[XMM]], (%[[M]])
+; X32-LABEL: fadd_32r:
+; Don't check x86-32.
+; LLVM's SSE handling is conservative on x86-32 even without using atomics.
+ %floc = bitcast float* %loc to i32*
+ %1 = load atomic i32, i32* %floc seq_cst, align 4
+ %2 = bitcast i32 %1 to float
+ %add = fadd float %2, %val
+ %3 = bitcast float %add to i32
+ store atomic i32 %3, i32* %floc release, align 4
+ ret void
+}
+
+define void @fadd_64r(double* %loc, double %val) {
+; X64-LABEL: fadd_64r:
+; X64-NOT: lock
+; X64-NOT: mov
+; X64: addsd (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: movsd %[[XMM]], (%[[M]])
+; X32-LABEL: fadd_64r:
+; Don't check x86-32 (see comment above).
+ %floc = bitcast double* %loc to i64*
+ %1 = load atomic i64, i64* %floc seq_cst, align 8
+ %2 = bitcast i64 %1 to double
+ %add = fadd double %2, %val
+ %3 = bitcast double %add to i64
+ store atomic i64 %3, i64* %floc release, align 8
+ ret void
+}
+
+@glob32 = global float 0.000000e+00, align 4
+@glob64 = global double 0.000000e+00, align 8
+
+; Floating-point add to a global using an immediate.
+define void @fadd_32g() {
+; X64-LABEL: fadd_32g:
+; X64-NOT: lock
+; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: addss glob32(%rip), %[[XMM]]
+; X64-NEXT: movss %[[XMM]], glob32(%rip)
+; X32-LABEL: fadd_32g:
+; Don't check x86-32 (see comment above).
+ %i = load atomic i32, i32* bitcast (float* @glob32 to i32*) monotonic, align 4
+ %f = bitcast i32 %i to float
+ %add = fadd float %f, 1.000000e+00
+ %s = bitcast float %add to i32
+ store atomic i32 %s, i32* bitcast (float* @glob32 to i32*) monotonic, align 4
+ ret void
+}
+
+define void @fadd_64g() {
+; X64-LABEL: fadd_64g:
+; X64-NOT: lock
+; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: addsd glob64(%rip), %[[XMM]]
+; X64-NEXT: movsd %[[XMM]], glob64(%rip)
+; X32-LABEL: fadd_64g:
+; Don't check x86-32 (see comment above).
+ %i = load atomic i64, i64* bitcast (double* @glob64 to i64*) monotonic, align 8
+ %f = bitcast i64 %i to double
+ %add = fadd double %f, 1.000000e+00
+ %s = bitcast double %add to i64
+ store atomic i64 %s, i64* bitcast (double* @glob64 to i64*) monotonic, align 8
+ ret void
+}
+
+; Floating-point add to a hard-coded immediate location using an immediate.
+define void @fadd_32imm() {
+; X64-LABEL: fadd_32imm:
+; X64-NOT: lock
+; X64: movl $3735928559, %e[[M:[a-z]+]]
+; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: addss (%r[[M]]), %[[XMM]]
+; X64-NEXT: movss %[[XMM]], (%r[[M]])
+; X32-LABEL: fadd_32imm:
+; Don't check x86-32 (see comment above).
+ %i = load atomic i32, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4
+ %f = bitcast i32 %i to float
+ %add = fadd float %f, 1.000000e+00
+ %s = bitcast float %add to i32
+ store atomic i32 %s, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4
+ ret void
+}
+
+define void @fadd_64imm() {
+; X64-LABEL: fadd_64imm:
+; X64-NOT: lock
+; X64: movl $3735928559, %e[[M:[a-z]+]]
+; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: addsd (%r[[M]]), %[[XMM]]
+; X64-NEXT: movsd %[[XMM]], (%r[[M]])
+; X32-LABEL: fadd_64imm:
+; Don't check x86-32 (see comment above).
+ %i = load atomic i64, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8
+ %f = bitcast i64 %i to double
+ %add = fadd double %f, 1.000000e+00
+ %s = bitcast double %add to i64
+ store atomic i64 %s, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8
+ ret void
+}
+
+; Floating-point add to a stack location.
+define void @fadd_32stack() {
+; X64-LABEL: fadd_32stack:
+; X64-NOT: lock
+; X64: movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: addss [[STACKOFF:-?[0-9]+]](%rsp), %[[XMM]]
+; X64-NEXT: movss %[[XMM]], [[STACKOFF]](%rsp)
+; X32-LABEL: fadd_32stack:
+; Don't check x86-32 (see comment above).
+ %ptr = alloca i32, align 4
+ %bc3 = bitcast i32* %ptr to float*
+ %load = load atomic i32, i32* %ptr acquire, align 4
+ %bc0 = bitcast i32 %load to float
+ %fadd = fadd float 1.000000e+00, %bc0
+ %bc1 = bitcast float %fadd to i32
+ store atomic i32 %bc1, i32* %ptr release, align 4
+ ret void
+}
+
+define void @fadd_64stack() {
+; X64-LABEL: fadd_64stack:
+; X64-NOT: lock
+; X64: movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: addsd [[STACKOFF:-?[0-9]+]](%rsp), %[[XMM]]
+; X64-NEXT: movsd %[[XMM]], [[STACKOFF]](%rsp)
+; X32-LABEL: fadd_64stack:
+; Don't check x86-32 (see comment above).
+ %ptr = alloca i64, align 8
+ %bc3 = bitcast i64* %ptr to double*
+ %load = load atomic i64, i64* %ptr acquire, align 8
+ %bc0 = bitcast i64 %load to double
+ %fadd = fadd double 1.000000e+00, %bc0
+ %bc1 = bitcast double %fadd to i64
+ store atomic i64 %bc1, i64* %ptr release, align 8
+ ret void
+}
diff --git a/test/CodeGen/X86/avg.ll b/test/CodeGen/X86/avg.ll
new file mode 100644
index 000000000000..f1c636a73305
--- /dev/null
+++ b/test/CodeGen/X86/avg.ll
@@ -0,0 +1,724 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512BW
+
+define void @avg_v4i8(<4 x i8>* %a, <4 x i8>* %b) {
+; SSE2-LABEL: avg_v4i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: pavgb %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: avg_v4i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX2-NEXT: vpavgb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vmovd %xmm0, (%rax)
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v4i8:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovd (%rdi), %xmm0
+; AVX512BW-NEXT: vmovd (%rsi), %xmm1
+; AVX512BW-NEXT: vpavgb %xmm0, %xmm1, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <4 x i8>, <4 x i8>* %a
+ %2 = load <4 x i8>, <4 x i8>* %b
+ %3 = zext <4 x i8> %1 to <4 x i32>
+ %4 = zext <4 x i8> %2 to <4 x i32>
+ %5 = add nuw nsw <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1>
+ %6 = add nuw nsw <4 x i32> %5, %4
+ %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <4 x i32> %7 to <4 x i8>
+ store <4 x i8> %8, <4 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v8i8(<8 x i8>* %a, <8 x i8>* %b) {
+; SSE2-LABEL: avg_v8i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT: pavgb %xmm0, %xmm1
+; SSE2-NEXT: movq %xmm1, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: avg_v8i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX2-NEXT: vpavgb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v8i8:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovq (%rdi), %xmm0
+; AVX512BW-NEXT: vmovq (%rsi), %xmm1
+; AVX512BW-NEXT: vpavgb %xmm0, %xmm1, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <8 x i8>, <8 x i8>* %a
+ %2 = load <8 x i8>, <8 x i8>* %b
+ %3 = zext <8 x i8> %1 to <8 x i32>
+ %4 = zext <8 x i8> %2 to <8 x i32>
+ %5 = add nuw nsw <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %6 = add nuw nsw <8 x i32> %5, %4
+ %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <8 x i32> %7 to <8 x i8>
+ store <8 x i8> %8, <8 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v16i8(<16 x i8>* %a, <16 x i8>* %b) {
+; SSE2-LABEL: avg_v16i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa (%rsi), %xmm0
+; SSE2-NEXT: pavgb (%rdi), %xmm0
+; SSE2-NEXT: movdqu %xmm0, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: avg_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa (%rsi), %xmm0
+; AVX-NEXT: vpavgb (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovdqu %xmm0, (%rax)
+; AVX-NEXT: retq
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = load <16 x i8>, <16 x i8>* %b
+ %3 = zext <16 x i8> %1 to <16 x i32>
+ %4 = zext <16 x i8> %2 to <16 x i32>
+ %5 = add nuw nsw <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %6 = add nuw nsw <16 x i32> %5, %4
+ %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <16 x i32> %7 to <16 x i8>
+ store <16 x i8> %8, <16 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v32i8(<32 x i8>* %a, <32 x i8>* %b) {
+; AVX2-LABEL: avg_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa (%rsi), %ymm0
+; AVX2-NEXT: vpavgb (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v32i8:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovdqa (%rsi), %ymm0
+; AVX512BW-NEXT: vpavgb (%rdi), %ymm0, %ymm0
+; AVX512BW-NEXT: vmovdqu %ymm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <32 x i8>, <32 x i8>* %a
+ %2 = load <32 x i8>, <32 x i8>* %b
+ %3 = zext <32 x i8> %1 to <32 x i32>
+ %4 = zext <32 x i8> %2 to <32 x i32>
+ %5 = add nuw nsw <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %6 = add nuw nsw <32 x i32> %5, %4
+ %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <32 x i32> %7 to <32 x i8>
+ store <32 x i8> %8, <32 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) {
+; AVX512BW-LABEL: avg_v64i8:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovdqu8 (%rsi), %zmm0
+; AVX512BW-NEXT: vpavgb (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <64 x i8>, <64 x i8>* %a
+ %2 = load <64 x i8>, <64 x i8>* %b
+ %3 = zext <64 x i8> %1 to <64 x i32>
+ %4 = zext <64 x i8> %2 to <64 x i32>
+ %5 = add nuw nsw <64 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %6 = add nuw nsw <64 x i32> %5, %4
+ %7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <64 x i32> %7 to <64 x i8>
+ store <64 x i8> %8, <64 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v4i16(<4 x i16>* %a, <4 x i16>* %b) {
+; SSE2-LABEL: avg_v4i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT: pavgw %xmm0, %xmm1
+; SSE2-NEXT: movq %xmm1, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: avg_v4i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX2-NEXT: vpavgw %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v4i16:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovq (%rdi), %xmm0
+; AVX512BW-NEXT: vmovq (%rsi), %xmm1
+; AVX512BW-NEXT: vpavgw %xmm0, %xmm1, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <4 x i16>, <4 x i16>* %a
+ %2 = load <4 x i16>, <4 x i16>* %b
+ %3 = zext <4 x i16> %1 to <4 x i32>
+ %4 = zext <4 x i16> %2 to <4 x i32>
+ %5 = add nuw nsw <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1>
+ %6 = add nuw nsw <4 x i32> %5, %4
+ %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <4 x i32> %7 to <4 x i16>
+ store <4 x i16> %8, <4 x i16>* undef, align 4
+ ret void
+}
+
+define void @avg_v8i16(<8 x i16>* %a, <8 x i16>* %b) {
+; SSE2-LABEL: avg_v8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa (%rsi), %xmm0
+; SSE2-NEXT: pavgw (%rdi), %xmm0
+; SSE2-NEXT: movdqu %xmm0, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: avg_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa (%rsi), %xmm0
+; AVX-NEXT: vpavgw (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovdqu %xmm0, (%rax)
+; AVX-NEXT: retq
+ %1 = load <8 x i16>, <8 x i16>* %a
+ %2 = load <8 x i16>, <8 x i16>* %b
+ %3 = zext <8 x i16> %1 to <8 x i32>
+ %4 = zext <8 x i16> %2 to <8 x i32>
+ %5 = add nuw nsw <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %6 = add nuw nsw <8 x i32> %5, %4
+ %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <8 x i32> %7 to <8 x i16>
+ store <8 x i16> %8, <8 x i16>* undef, align 4
+ ret void
+}
+
+define void @avg_v16i16(<16 x i16>* %a, <16 x i16>* %b) {
+; AVX2-LABEL: avg_v16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa (%rsi), %ymm0
+; AVX2-NEXT: vpavgw (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v16i16:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovdqa (%rsi), %ymm0
+; AVX512BW-NEXT: vpavgw (%rdi), %ymm0, %ymm0
+; AVX512BW-NEXT: vmovdqu %ymm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <16 x i16>, <16 x i16>* %a
+ %2 = load <16 x i16>, <16 x i16>* %b
+ %3 = zext <16 x i16> %1 to <16 x i32>
+ %4 = zext <16 x i16> %2 to <16 x i32>
+ %5 = add nuw nsw <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %6 = add nuw nsw <16 x i32> %5, %4
+ %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <16 x i32> %7 to <16 x i16>
+ store <16 x i16> %8, <16 x i16>* undef, align 4
+ ret void
+}
+
+define void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) {
+; AVX512BW-LABEL: avg_v32i16:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovdqu16 (%rsi), %zmm0
+; AVX512BW-NEXT: vpavgw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <32 x i16>, <32 x i16>* %a
+ %2 = load <32 x i16>, <32 x i16>* %b
+ %3 = zext <32 x i16> %1 to <32 x i32>
+ %4 = zext <32 x i16> %2 to <32 x i32>
+ %5 = add nuw nsw <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %6 = add nuw nsw <32 x i32> %5, %4
+ %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <32 x i32> %7 to <32 x i16>
+ store <32 x i16> %8, <32 x i16>* undef, align 4
+ ret void
+}
+
+define void @avg_v4i8_2(<4 x i8>* %a, <4 x i8>* %b) {
+; SSE2-LABEL: avg_v4i8_2:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: pavgb %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: avg_v4i8_2:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX2-NEXT: vpavgb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovd %xmm0, (%rax)
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v4i8_2:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovd (%rdi), %xmm0
+; AVX512BW-NEXT: vmovd (%rsi), %xmm1
+; AVX512BW-NEXT: vpavgb %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <4 x i8>, <4 x i8>* %a
+ %2 = load <4 x i8>, <4 x i8>* %b
+ %3 = zext <4 x i8> %1 to <4 x i32>
+ %4 = zext <4 x i8> %2 to <4 x i32>
+ %5 = add nuw nsw <4 x i32> %3, %4
+ %6 = add nuw nsw <4 x i32> %5, <i32 1, i32 1, i32 1, i32 1>
+ %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <4 x i32> %7 to <4 x i8>
+ store <4 x i8> %8, <4 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v8i8_2(<8 x i8>* %a, <8 x i8>* %b) {
+; SSE2-LABEL: avg_v8i8_2:
+; SSE2: # BB#0:
+; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT: pavgb %xmm0, %xmm1
+; SSE2-NEXT: movq %xmm1, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: avg_v8i8_2:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX2-NEXT: vpavgb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v8i8_2:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovq (%rdi), %xmm0
+; AVX512BW-NEXT: vmovq (%rsi), %xmm1
+; AVX512BW-NEXT: vpavgb %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <8 x i8>, <8 x i8>* %a
+ %2 = load <8 x i8>, <8 x i8>* %b
+ %3 = zext <8 x i8> %1 to <8 x i32>
+ %4 = zext <8 x i8> %2 to <8 x i32>
+ %5 = add nuw nsw <8 x i32> %3, %4
+ %6 = add nuw nsw <8 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <8 x i32> %7 to <8 x i8>
+ store <8 x i8> %8, <8 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v16i8_2(<16 x i8>* %a, <16 x i8>* %b) {
+; SSE2-LABEL: avg_v16i8_2:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: pavgb (%rsi), %xmm0
+; SSE2-NEXT: movdqu %xmm0, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: avg_v16i8_2:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa (%rdi), %xmm0
+; AVX-NEXT: vpavgb (%rsi), %xmm0, %xmm0
+; AVX-NEXT: vmovdqu %xmm0, (%rax)
+; AVX-NEXT: retq
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = load <16 x i8>, <16 x i8>* %b
+ %3 = zext <16 x i8> %1 to <16 x i32>
+ %4 = zext <16 x i8> %2 to <16 x i32>
+ %5 = add nuw nsw <16 x i32> %3, %4
+ %6 = add nuw nsw <16 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <16 x i32> %7 to <16 x i8>
+ store <16 x i8> %8, <16 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v32i8_2(<32 x i8>* %a, <32 x i8>* %b) {
+; AVX2-LABEL: avg_v32i8_2:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa (%rdi), %ymm0
+; AVX2-NEXT: vpavgb (%rsi), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v32i8_2:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
+; AVX512BW-NEXT: vpavgb (%rsi), %ymm0, %ymm0
+; AVX512BW-NEXT: vmovdqu %ymm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <32 x i8>, <32 x i8>* %a
+ %2 = load <32 x i8>, <32 x i8>* %b
+ %3 = zext <32 x i8> %1 to <32 x i32>
+ %4 = zext <32 x i8> %2 to <32 x i32>
+ %5 = add nuw nsw <32 x i32> %3, %4
+ %6 = add nuw nsw <32 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <32 x i32> %7 to <32 x i8>
+ store <32 x i8> %8, <32 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v64i8_2(<64 x i8>* %a, <64 x i8>* %b) {
+; AVX512BW-LABEL: avg_v64i8_2:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovdqu8 (%rsi), %zmm0
+; AVX512BW-NEXT: vpavgb %zmm0, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <64 x i8>, <64 x i8>* %a
+ %2 = load <64 x i8>, <64 x i8>* %b
+ %3 = zext <64 x i8> %1 to <64 x i32>
+ %4 = zext <64 x i8> %2 to <64 x i32>
+ %5 = add nuw nsw <64 x i32> %4, %4
+ %6 = add nuw nsw <64 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <64 x i32> %7 to <64 x i8>
+ store <64 x i8> %8, <64 x i8>* undef, align 4
+ ret void
+}
+
+
+define void @avg_v4i16_2(<4 x i16>* %a, <4 x i16>* %b) {
+; SSE2-LABEL: avg_v4i16_2:
+; SSE2: # BB#0:
+; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT: pavgw %xmm0, %xmm1
+; SSE2-NEXT: movq %xmm1, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: avg_v4i16_2:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v4i16_2:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovq (%rdi), %xmm0
+; AVX512BW-NEXT: vmovq (%rsi), %xmm1
+; AVX512BW-NEXT: vpavgw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <4 x i16>, <4 x i16>* %a
+ %2 = load <4 x i16>, <4 x i16>* %b
+ %3 = zext <4 x i16> %1 to <4 x i32>
+ %4 = zext <4 x i16> %2 to <4 x i32>
+ %5 = add nuw nsw <4 x i32> %3, %4
+ %6 = add nuw nsw <4 x i32> %5, <i32 1, i32 1, i32 1, i32 1>
+ %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <4 x i32> %7 to <4 x i16>
+ store <4 x i16> %8, <4 x i16>* undef, align 4
+ ret void
+}
+
+define void @avg_v8i16_2(<8 x i16>* %a, <8 x i16>* %b) {
+; SSE2-LABEL: avg_v8i16_2:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: pavgw (%rsi), %xmm0
+; SSE2-NEXT: movdqu %xmm0, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: avg_v8i16_2:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa (%rdi), %xmm0
+; AVX-NEXT: vpavgw (%rsi), %xmm0, %xmm0
+; AVX-NEXT: vmovdqu %xmm0, (%rax)
+; AVX-NEXT: retq
+ %1 = load <8 x i16>, <8 x i16>* %a
+ %2 = load <8 x i16>, <8 x i16>* %b
+ %3 = zext <8 x i16> %1 to <8 x i32>
+ %4 = zext <8 x i16> %2 to <8 x i32>
+ %5 = add nuw nsw <8 x i32> %3, %4
+ %6 = add nuw nsw <8 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <8 x i32> %7 to <8 x i16>
+ store <8 x i16> %8, <8 x i16>* undef, align 4
+ ret void
+}
+
+define void @avg_v16i16_2(<16 x i16>* %a, <16 x i16>* %b) {
+; AVX2-LABEL: avg_v16i16_2:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa (%rdi), %ymm0
+; AVX2-NEXT: vpavgw (%rsi), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v16i16_2:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
+; AVX512BW-NEXT: vpavgw (%rsi), %ymm0, %ymm0
+; AVX512BW-NEXT: vmovdqu %ymm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <16 x i16>, <16 x i16>* %a
+ %2 = load <16 x i16>, <16 x i16>* %b
+ %3 = zext <16 x i16> %1 to <16 x i32>
+ %4 = zext <16 x i16> %2 to <16 x i32>
+ %5 = add nuw nsw <16 x i32> %3, %4
+ %6 = add nuw nsw <16 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <16 x i32> %7 to <16 x i16>
+ store <16 x i16> %8, <16 x i16>* undef, align 4
+ ret void
+}
+
+define void @avg_v32i16_2(<32 x i16>* %a, <32 x i16>* %b) {
+; AVX512BW-LABEL: avg_v32i16_2:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
+; AVX512BW-NEXT: vpavgw (%rsi), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <32 x i16>, <32 x i16>* %a
+ %2 = load <32 x i16>, <32 x i16>* %b
+ %3 = zext <32 x i16> %1 to <32 x i32>
+ %4 = zext <32 x i16> %2 to <32 x i32>
+ %5 = add nuw nsw <32 x i32> %3, %4
+ %6 = add nuw nsw <32 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %8 = trunc <32 x i32> %7 to <32 x i16>
+ store <32 x i16> %8, <32 x i16>* undef, align 4
+ ret void
+}
+
+define void @avg_v4i8_const(<4 x i8>* %a) {
+; SSE2-LABEL: avg_v4i8_const:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: pavgb {{.*}}(%rip), %xmm0
+; SSE2-NEXT: movd %xmm0, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: avg_v4i8_const:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT: vpavgb {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vmovd %xmm0, (%rax)
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v4i8_const:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovd (%rdi), %xmm0
+; AVX512BW-NEXT: vpavgb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vmovd %xmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <4 x i8>, <4 x i8>* %a
+ %2 = zext <4 x i8> %1 to <4 x i32>
+ %3 = add nuw nsw <4 x i32> %2, <i32 1, i32 2, i32 3, i32 4>
+ %4 = lshr <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1>
+ %5 = trunc <4 x i32> %4 to <4 x i8>
+ store <4 x i8> %5, <4 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v8i8_const(<8 x i8>* %a) {
+; SSE2-LABEL: avg_v8i8_const:
+; SSE2: # BB#0:
+; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: pavgb {{.*}}(%rip), %xmm0
+; SSE2-NEXT: movq %xmm0, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: avg_v8i8_const:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT: vpavgb {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v8i8_const:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovq (%rdi), %xmm0
+; AVX512BW-NEXT: vpavgb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <8 x i8>, <8 x i8>* %a
+ %2 = zext <8 x i8> %1 to <8 x i32>
+ %3 = add nuw nsw <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+ %4 = lshr <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %5 = trunc <8 x i32> %4 to <8 x i8>
+ store <8 x i8> %5, <8 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v16i8_const(<16 x i8>* %a) {
+; SSE2-LABEL: avg_v16i8_const:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: pavgb {{.*}}(%rip), %xmm0
+; SSE2-NEXT: movdqu %xmm0, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: avg_v16i8_const:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa (%rdi), %xmm0
+; AVX-NEXT: vpavgb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovdqu %xmm0, (%rax)
+; AVX-NEXT: retq
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = zext <16 x i8> %1 to <16 x i32>
+ %3 = add nuw nsw <16 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+ %4 = lshr <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %5 = trunc <16 x i32> %4 to <16 x i8>
+ store <16 x i8> %5, <16 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v32i8_const(<32 x i8>* %a) {
+; AVX2-LABEL: avg_v32i8_const:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa (%rdi), %ymm0
+; AVX2-NEXT: vpavgb {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v32i8_const:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
+; AVX512BW-NEXT: vpavgb {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BW-NEXT: vmovdqu %ymm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <32 x i8>, <32 x i8>* %a
+ %2 = zext <32 x i8> %1 to <32 x i32>
+ %3 = add nuw nsw <32 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+ %4 = lshr <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %5 = trunc <32 x i32> %4 to <32 x i8>
+ store <32 x i8> %5, <32 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v64i8_const(<64 x i8>* %a) {
+; AVX512BW-LABEL: avg_v64i8_const:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovdqu8 (%rdi), %zmm0
+; AVX512BW-NEXT: vpavgb {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <64 x i8>, <64 x i8>* %a
+ %2 = zext <64 x i8> %1 to <64 x i32>
+ %3 = add nuw nsw <64 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+ %4 = lshr <64 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %5 = trunc <64 x i32> %4 to <64 x i8>
+ store <64 x i8> %5, <64 x i8>* undef, align 4
+ ret void
+}
+
+define void @avg_v4i16_const(<4 x i16>* %a) {
+; SSE2-LABEL: avg_v4i16_const:
+; SSE2: # BB#0:
+; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: pavgw {{.*}}(%rip), %xmm0
+; SSE2-NEXT: movq %xmm0, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX2-LABEL: avg_v4i16_const:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT: vpavgw {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v4i16_const:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovq (%rdi), %xmm0
+; AVX512BW-NEXT: vpavgw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT: vmovq %xmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <4 x i16>, <4 x i16>* %a
+ %2 = zext <4 x i16> %1 to <4 x i32>
+ %3 = add nuw nsw <4 x i32> %2, <i32 1, i32 2, i32 3, i32 4>
+ %4 = lshr <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1>
+ %5 = trunc <4 x i32> %4 to <4 x i16>
+ store <4 x i16> %5, <4 x i16>* undef, align 4
+ ret void
+}
+
+define void @avg_v8i16_const(<8 x i16>* %a) {
+; SSE2-LABEL: avg_v8i16_const:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: pavgw {{.*}}(%rip), %xmm0
+; SSE2-NEXT: movdqu %xmm0, (%rax)
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: avg_v8i16_const:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa (%rdi), %xmm0
+; AVX-NEXT: vpavgw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovdqu %xmm0, (%rax)
+; AVX-NEXT: retq
+ %1 = load <8 x i16>, <8 x i16>* %a
+ %2 = zext <8 x i16> %1 to <8 x i32>
+ %3 = add nuw nsw <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+ %4 = lshr <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %5 = trunc <8 x i32> %4 to <8 x i16>
+ store <8 x i16> %5, <8 x i16>* undef, align 4
+ ret void
+}
+
+define void @avg_v16i16_const(<16 x i16>* %a) {
+; AVX2-LABEL: avg_v16i16_const:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa (%rdi), %ymm0
+; AVX2-NEXT: vpavgw {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: avg_v16i16_const:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
+; AVX512BW-NEXT: vpavgw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BW-NEXT: vmovdqu %ymm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <16 x i16>, <16 x i16>* %a
+ %2 = zext <16 x i16> %1 to <16 x i32>
+ %3 = add nuw nsw <16 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+ %4 = lshr <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %5 = trunc <16 x i32> %4 to <16 x i16>
+ store <16 x i16> %5, <16 x i16>* undef, align 4
+ ret void
+}
+
+define void @avg_v32i16_const(<32 x i16>* %a) {
+; AVX512BW-LABEL: avg_v32i16_const:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
+; AVX512BW-NEXT: vpavgw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rax)
+; AVX512BW-NEXT: retq
+ %1 = load <32 x i16>, <32 x i16>* %a
+ %2 = zext <32 x i16> %1 to <32 x i32>
+ %3 = add nuw nsw <32 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+ %4 = lshr <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %5 = trunc <32 x i32> %4 to <32 x i16>
+ store <32 x i16> %5, <32 x i16>* undef, align 4
+ ret void
+}
diff --git a/test/CodeGen/X86/avx-cvt-2.ll b/test/CodeGen/X86/avx-cvt-2.ll
index 583c7d5947bf..c849312f2367 100644
--- a/test/CodeGen/X86/avx-cvt-2.ll
+++ b/test/CodeGen/X86/avx-cvt-2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s
; Check that we generate vector conversion from float to narrower int types
diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll
index 6df3e5324c11..27339898efdb 100644
--- a/test/CodeGen/X86/avx-cvt.ll
+++ b/test/CodeGen/X86/avx-cvt.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
define <8 x float> @sitofp00(<8 x i32> %a) nounwind {
@@ -113,8 +114,7 @@ define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
define void @fpext() nounwind uwtable {
; CHECK-LABEL: fpext:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vcvtss2sd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
; CHECK-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: retq
%f = alloca float, align 4
@@ -138,7 +138,7 @@ declare double @llvm.nearbyint.f64(double %p)
define float @floor_f32(float %a) {
; CHECK-LABEL: floor_f32:
; CHECK: # BB#0:
-; CHECK-NEXT: vroundss $1, %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call float @llvm.floor.f32(float %a)
ret float %res
diff --git a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
index e2f690bff232..4867869863e3 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -143,3 +143,69 @@ define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxbd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxbq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxbw:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxdq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxwd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxwq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 28a0272ecf02..206be2396cba 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -1,8 +1,9 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx,aes,pclmul | FileCheck %s
define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_x86_aesni_aesdec:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vaesdec %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -13,7 +14,7 @@ declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_x86_aesni_aesdeclast:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -24,7 +25,7 @@ declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind read
define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_x86_aesni_aesenc:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vaesenc %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -35,7 +36,7 @@ declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_x86_aesni_aesenclast:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vaesenclast %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -46,7 +47,7 @@ declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind read
define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
; CHECK-LABEL: test_x86_aesni_aesimc:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vaesimc %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
@@ -57,7 +58,7 @@ declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
; CHECK-LABEL: test_x86_aesni_aeskeygenassist:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vaeskeygenassist $7, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
@@ -68,7 +69,7 @@ declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readno
define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_add_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -79,7 +80,7 @@ declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_cmp_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
@@ -90,7 +91,7 @@ declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounw
define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_cmp_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
@@ -101,7 +102,7 @@ declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounw
define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_comieq_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd %xmm1, %xmm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
@@ -114,7 +115,7 @@ declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readno
define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_comige_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd %xmm1, %xmm0
; CHECK-NEXT: setae %al
; CHECK-NEXT: movzbl %al, %eax
@@ -127,7 +128,7 @@ declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readno
define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_comigt_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd %xmm1, %xmm0
; CHECK-NEXT: seta %al
; CHECK-NEXT: movzbl %al, %eax
@@ -140,7 +141,7 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readno
define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_comile_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd %xmm1, %xmm0
; CHECK-NEXT: setbe %al
; CHECK-NEXT: movzbl %al, %eax
@@ -153,7 +154,7 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno
define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_comilt_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
@@ -166,7 +167,7 @@ declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readno
define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_comineq_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcomisd %xmm1, %xmm0
; CHECK-NEXT: setne %al
; CHECK-NEXT: movzbl %al, %eax
@@ -179,7 +180,7 @@ declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readn
define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
@@ -190,7 +191,7 @@ declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
; CHECK-LABEL: test_x86_sse2_cvtdq2ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
@@ -201,7 +202,7 @@ declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
; CHECK-LABEL: test_x86_sse2_cvtpd2dq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
@@ -212,7 +213,7 @@ declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
; CHECK-LABEL: test_x86_sse2_cvtpd2ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
@@ -223,7 +224,7 @@ declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse2_cvtps2dq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
@@ -234,7 +235,7 @@ declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse2_cvtps2pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
@@ -245,7 +246,7 @@ declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
; CHECK-LABEL: test_x86_sse2_cvtsd2si:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtsd2si %xmm0, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
@@ -256,7 +257,7 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_cvtsd2ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
@@ -267,7 +268,7 @@ declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
; CHECK-LABEL: test_x86_sse2_cvtsi2sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl $7, %eax
; CHECK-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0
; CHECK-NEXT: retl
@@ -279,7 +280,7 @@ declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnon
define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse2_cvtss2sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
@@ -290,7 +291,7 @@ declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind
define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
; CHECK-LABEL: test_x86_sse2_cvttpd2dq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
@@ -301,7 +302,7 @@ declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse2_cvttps2dq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
@@ -312,7 +313,7 @@ declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
; CHECK-LABEL: test_x86_sse2_cvttsd2si:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvttsd2si %xmm0, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
@@ -323,7 +324,7 @@ declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_div_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -335,7 +336,7 @@ declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_max_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -346,7 +347,7 @@ declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_max_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -357,7 +358,7 @@ declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_min_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vminpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -368,7 +369,7 @@ declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_min_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vminsd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -379,7 +380,7 @@ declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind
define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
; CHECK-LABEL: test_x86_sse2_movmsk_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vmovmskpd %xmm0, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
@@ -392,7 +393,7 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_mul_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -403,7 +404,7 @@ declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind
define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse2_packssdw_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
@@ -414,7 +415,7 @@ declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind rea
define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_packsswb_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
@@ -425,7 +426,7 @@ declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind rea
define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_packuswb_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
@@ -436,7 +437,7 @@ declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind rea
define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse2_padds_b:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -447,7 +448,7 @@ declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_padds_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -458,7 +459,7 @@ declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse2_paddus_b:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -469,7 +470,7 @@ declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnon
define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_paddus_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -480,7 +481,7 @@ declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnon
define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse2_pavg_b:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -491,7 +492,7 @@ declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_pavg_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -502,7 +503,7 @@ declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_pmadd_wd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
@@ -513,7 +514,7 @@ declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnon
define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_pmaxs_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -524,7 +525,7 @@ declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse2_pmaxu_b:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -535,7 +536,7 @@ declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_pmins_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -546,7 +547,7 @@ declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse2_pminu_b:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -557,7 +558,7 @@ declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
; CHECK-LABEL: test_x86_sse2_pmovmskb_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmovmskb %xmm0, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
@@ -568,7 +569,7 @@ declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_pmulh_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -579,7 +580,7 @@ declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_pmulhu_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -590,7 +591,7 @@ declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnon
define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse2_pmulu_dq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
@@ -601,7 +602,7 @@ declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnon
define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse2_psad_bw:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
@@ -612,7 +613,7 @@ declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse2_psll_d:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -623,7 +624,7 @@ declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_x86_sse2_psll_q:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -634,7 +635,7 @@ declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_psll_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -645,7 +646,7 @@ declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
; CHECK-LABEL: test_x86_sse2_pslli_d:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpslld $7, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
@@ -656,7 +657,7 @@ declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
; CHECK-LABEL: test_x86_sse2_pslli_q:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsllq $7, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
@@ -667,7 +668,7 @@ declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
; CHECK-LABEL: test_x86_sse2_pslli_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
@@ -678,7 +679,7 @@ declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse2_psra_d:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -689,7 +690,7 @@ declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_psra_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -700,7 +701,7 @@ declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
; CHECK-LABEL: test_x86_sse2_psrai_d:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsrad $7, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
@@ -711,7 +712,7 @@ declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
; CHECK-LABEL: test_x86_sse2_psrai_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsraw $7, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
@@ -722,7 +723,7 @@ declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse2_psrl_d:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -733,7 +734,7 @@ declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_x86_sse2_psrl_q:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -744,7 +745,7 @@ declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_psrl_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -755,7 +756,7 @@ declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
; CHECK-LABEL: test_x86_sse2_psrli_d:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsrld $7, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
@@ -766,7 +767,7 @@ declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
; CHECK-LABEL: test_x86_sse2_psrli_q:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsrlq $7, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
@@ -777,7 +778,7 @@ declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
; CHECK-LABEL: test_x86_sse2_psrli_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsrlw $7, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
@@ -788,7 +789,7 @@ declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse2_psubs_b:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -799,7 +800,7 @@ declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_psubs_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -810,7 +811,7 @@ declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse2_psubus_b:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -821,7 +822,7 @@ declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnon
define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse2_psubus_w:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -832,7 +833,7 @@ declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnon
define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
; CHECK-LABEL: test_x86_sse2_sqrt_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vsqrtpd %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
@@ -843,7 +844,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
; CHECK-LABEL: test_x86_sse2_sqrt_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
@@ -854,7 +855,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse2_storel_dq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmovlps %xmm0, (%eax)
; CHECK-NEXT: retl
@@ -867,7 +868,7 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
; add operation forces the execution domain.
; CHECK-LABEL: test_x86_sse2_storeu_dq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0
; CHECK-NEXT: vmovdqu %xmm0, (%eax)
@@ -882,7 +883,7 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
; fadd operation forces the execution domain.
; CHECK-LABEL: test_x86_sse2_storeu_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
@@ -898,7 +899,7 @@ declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_sub_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -909,7 +910,7 @@ declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind
define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd %xmm1, %xmm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
@@ -922,7 +923,7 @@ declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readn
define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_ucomige_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd %xmm1, %xmm0
; CHECK-NEXT: setae %al
; CHECK-NEXT: movzbl %al, %eax
@@ -935,7 +936,7 @@ declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readn
define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_ucomigt_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd %xmm1, %xmm0
; CHECK-NEXT: seta %al
; CHECK-NEXT: movzbl %al, %eax
@@ -948,7 +949,7 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readn
define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_ucomile_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd %xmm1, %xmm0
; CHECK-NEXT: setbe %al
; CHECK-NEXT: movzbl %al, %eax
@@ -961,7 +962,7 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn
define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
@@ -974,7 +975,7 @@ declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readn
define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vucomisd %xmm1, %xmm0
; CHECK-NEXT: setne %al
; CHECK-NEXT: movzbl %al, %eax
@@ -987,7 +988,7 @@ declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind read
define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse3_addsub_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -998,7 +999,7 @@ declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwi
define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse3_addsub_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1009,7 +1010,7 @@ declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind
define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse3_hadd_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -1020,7 +1021,7 @@ declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind
define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse3_hadd_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vhaddps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1031,7 +1032,7 @@ declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind re
define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse3_hsub_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vhsubpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -1042,7 +1043,7 @@ declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind
define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse3_hsub_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vhsubps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1053,7 +1054,7 @@ declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind re
define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
; CHECK-LABEL: test_x86_sse3_ldu_dq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vlddqu (%eax), %xmm0
; CHECK-NEXT: retl
@@ -1065,7 +1066,7 @@ declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
; CHECK-LABEL: test_x86_sse41_blendvpd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
@@ -1076,7 +1077,7 @@ declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x d
define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
; CHECK-LABEL: test_x86_sse41_blendvps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
@@ -1087,7 +1088,7 @@ declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x floa
define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse41_dppd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
@@ -1098,7 +1099,7 @@ declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwi
define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse41_dpps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
@@ -1109,7 +1110,7 @@ declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind
define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse41_insertps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3]
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
@@ -1121,7 +1122,7 @@ declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounw
define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse41_mpsadbw:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
@@ -1132,7 +1133,7 @@ declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind rea
define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse41_packusdw:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
@@ -1143,7 +1144,7 @@ declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readno
define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
; CHECK-LABEL: test_x86_sse41_pblendvb:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
@@ -1154,7 +1155,7 @@ declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) noun
define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
; CHECK-LABEL: test_x86_sse41_phminposuw:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vphminposuw %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
@@ -1165,7 +1166,7 @@ declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse41_pmaxsb:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -1176,7 +1177,7 @@ declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse41_pmaxsd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -1187,7 +1188,7 @@ declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse41_pmaxud:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -1198,7 +1199,7 @@ declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse41_pmaxuw:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -1209,7 +1210,7 @@ declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse41_pminsb:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -1220,7 +1221,7 @@ declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse41_pminsd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -1231,7 +1232,7 @@ declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse41_pminud:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpminud %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -1242,7 +1243,7 @@ declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_sse41_pminuw:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -1251,75 +1252,9 @@ define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
-define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxbd:
-; CHECK: # BB#0:
-; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0
-; CHECK-NEXT: retl
- %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxbq:
-; CHECK: # BB#0:
-; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0
-; CHECK-NEXT: retl
- %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxbw:
-; CHECK: # BB#0:
-; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0
-; CHECK-NEXT: retl
- %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
- ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxdq:
-; CHECK: # BB#0:
-; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
-; CHECK-NEXT: retl
- %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxwd:
-; CHECK: # BB#0:
-; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
-; CHECK-NEXT: retl
- %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxwq:
-; CHECK: # BB#0:
-; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0
-; CHECK-NEXT: retl
- %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
-
-
define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxbd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
@@ -1330,7 +1265,7 @@ declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxbq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
@@ -1341,7 +1276,7 @@ declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxbw:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
@@ -1352,7 +1287,7 @@ declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxdq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
@@ -1363,7 +1298,7 @@ declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxwd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
@@ -1374,7 +1309,7 @@ declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
; CHECK-LABEL: test_x86_sse41_pmovzxwq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
@@ -1385,7 +1320,7 @@ declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_sse41_pmuldq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
@@ -1396,7 +1331,7 @@ declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_x86_sse41_ptestc:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vptest %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
@@ -1409,7 +1344,7 @@ declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_x86_sse41_ptestnzc:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vptest %xmm1, %xmm0
; CHECK-NEXT: seta %al
; CHECK-NEXT: movzbl %al, %eax
@@ -1422,7 +1357,7 @@ declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_x86_sse41_ptestz:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vptest %xmm1, %xmm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
@@ -1435,7 +1370,7 @@ declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
; CHECK-LABEL: test_x86_sse41_round_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vroundpd $7, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
@@ -1446,7 +1381,7 @@ declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readno
define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse41_round_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vroundps $7, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
@@ -1457,7 +1392,7 @@ declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_sse41_round_sd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
@@ -1468,7 +1403,7 @@ declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) n
define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse41_round_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
@@ -1479,7 +1414,7 @@ declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) noun
define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
; CHECK-LABEL: test_x86_sse42_pcmpestri128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl $7, %eax
; CHECK-NEXT: movl $7, %edx
; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
@@ -1493,7 +1428,7 @@ declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nou
define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
; CHECK-LABEL: test_x86_sse42_pcmpestri128_load:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmovdqa (%eax), %xmm0
@@ -1511,7 +1446,7 @@ define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
; CHECK-LABEL: test_x86_sse42_pcmpestria128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl $7, %eax
; CHECK-NEXT: movl $7, %edx
; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
@@ -1526,7 +1461,7 @@ declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) no
define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
; CHECK-LABEL: test_x86_sse42_pcmpestric128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl $7, %eax
; CHECK-NEXT: movl $7, %edx
; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
@@ -1541,7 +1476,7 @@ declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) no
define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
; CHECK-LABEL: test_x86_sse42_pcmpestrio128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl $7, %eax
; CHECK-NEXT: movl $7, %edx
; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
@@ -1556,7 +1491,7 @@ declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) no
define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
; CHECK-LABEL: test_x86_sse42_pcmpestris128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl $7, %eax
; CHECK-NEXT: movl $7, %edx
; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
@@ -1571,7 +1506,7 @@ declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) no
define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
; CHECK-LABEL: test_x86_sse42_pcmpestriz128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl $7, %eax
; CHECK-NEXT: movl $7, %edx
; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
@@ -1586,7 +1521,7 @@ declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) no
define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
; CHECK-LABEL: test_x86_sse42_pcmpestrm128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl $7, %eax
; CHECK-NEXT: movl $7, %edx
; CHECK-NEXT: vpcmpestrm $7, %xmm1, %xmm0
@@ -1599,7 +1534,7 @@ declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i
define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
; CHECK-LABEL: test_x86_sse42_pcmpestrm128_load:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl $7, %eax
; CHECK-NEXT: movl $7, %edx
@@ -1613,7 +1548,7 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2
define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse42_pcmpistri128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: retl
@@ -1625,7 +1560,7 @@ declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind read
define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
; CHECK-LABEL: test_x86_sse42_pcmpistri128_load:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: vmovdqa (%ecx), %xmm0
@@ -1641,7 +1576,7 @@ define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse42_pcmpistria128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
; CHECK-NEXT: seta %al
; CHECK-NEXT: movzbl %al, %eax
@@ -1654,7 +1589,7 @@ declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse42_pcmpistric128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
@@ -1667,7 +1602,7 @@ declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse42_pcmpistrio128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
; CHECK-NEXT: seto %al
; CHECK-NEXT: movzbl %al, %eax
@@ -1680,7 +1615,7 @@ declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse42_pcmpistris128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
; CHECK-NEXT: sets %al
; CHECK-NEXT: movzbl %al, %eax
@@ -1693,7 +1628,7 @@ declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse42_pcmpistriz128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
@@ -1706,7 +1641,7 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea
define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_sse42_pcmpistrm128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpistrm $7, %xmm1, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
@@ -1717,7 +1652,7 @@ declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwin
define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
; CHECK-LABEL: test_x86_sse42_pcmpistrm128_load:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vpcmpistrm $7, (%eax), %xmm0
; CHECK-NEXT: retl
@@ -1729,7 +1664,7 @@ define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1
define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_add_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1740,7 +1675,7 @@ declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind read
define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_cmp_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcmpordps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
@@ -1751,7 +1686,7 @@ declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind
define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_cmp_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcmpordss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
@@ -1762,7 +1697,7 @@ declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind
define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_comieq_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcomiss %xmm1, %xmm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
@@ -1775,7 +1710,7 @@ declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_comige_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcomiss %xmm1, %xmm0
; CHECK-NEXT: setae %al
; CHECK-NEXT: movzbl %al, %eax
@@ -1788,7 +1723,7 @@ declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_comigt_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcomiss %xmm1, %xmm0
; CHECK-NEXT: seta %al
; CHECK-NEXT: movzbl %al, %eax
@@ -1801,7 +1736,7 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_comile_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcomiss %xmm1, %xmm0
; CHECK-NEXT: setbe %al
; CHECK-NEXT: movzbl %al, %eax
@@ -1814,7 +1749,7 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_comilt_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcomiss %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
@@ -1827,7 +1762,7 @@ declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_comineq_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcomiss %xmm1, %xmm0
; CHECK-NEXT: setne %al
; CHECK-NEXT: movzbl %al, %eax
@@ -1840,7 +1775,7 @@ declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse_cvtsi2ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl $7, %eax
; CHECK-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
; CHECK-NEXT: retl
@@ -1852,7 +1787,7 @@ declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse_cvtss2si:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtss2si %xmm0, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
@@ -1863,7 +1798,7 @@ declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse_cvttss2si:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvttss2si %xmm0, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
@@ -1874,7 +1809,7 @@ declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_div_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1885,7 +1820,7 @@ declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind read
define void @test_x86_sse_ldmxcsr(i8* %a0) {
; CHECK-LABEL: test_x86_sse_ldmxcsr:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vldmxcsr (%eax)
; CHECK-NEXT: retl
@@ -1898,7 +1833,7 @@ declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_max_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1909,7 +1844,7 @@ declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind read
define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_max_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1920,7 +1855,7 @@ declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind read
define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_min_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1931,7 +1866,7 @@ declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind read
define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_min_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vminss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1942,7 +1877,7 @@ declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind read
define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse_movmsk_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vmovmskps %xmm0, %eax
; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
@@ -1954,7 +1889,7 @@ declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_mul_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1965,7 +1900,7 @@ declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind read
define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse_rcp_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vrcpps %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
@@ -1976,7 +1911,7 @@ declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse_rcp_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vrcpss %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
@@ -1987,7 +1922,7 @@ declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse_rsqrt_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vrsqrtps %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
@@ -1998,7 +1933,7 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse_rsqrt_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
@@ -2009,7 +1944,7 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse_sqrt_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vsqrtps %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
@@ -2020,7 +1955,7 @@ declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
; CHECK-LABEL: test_x86_sse_sqrt_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
@@ -2031,7 +1966,7 @@ declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
define void @test_x86_sse_stmxcsr(i8* %a0) {
; CHECK-LABEL: test_x86_sse_stmxcsr:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vstmxcsr (%eax)
; CHECK-NEXT: retl
@@ -2043,7 +1978,7 @@ declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_storeu_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmovups %xmm0, (%eax)
; CHECK-NEXT: retl
@@ -2055,7 +1990,7 @@ declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_sub_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -2066,7 +2001,7 @@ declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind read
define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_ucomieq_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vucomiss %xmm1, %xmm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
@@ -2079,7 +2014,7 @@ declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_ucomige_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vucomiss %xmm1, %xmm0
; CHECK-NEXT: setae %al
; CHECK-NEXT: movzbl %al, %eax
@@ -2092,7 +2027,7 @@ declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_ucomigt_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vucomiss %xmm1, %xmm0
; CHECK-NEXT: seta %al
; CHECK-NEXT: movzbl %al, %eax
@@ -2105,7 +2040,7 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_ucomile_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vucomiss %xmm1, %xmm0
; CHECK-NEXT: setbe %al
; CHECK-NEXT: movzbl %al, %eax
@@ -2118,7 +2053,7 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_ucomilt_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vucomiss %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
@@ -2131,7 +2066,7 @@ declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_sse_ucomineq_ss:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vucomiss %xmm1, %xmm0
; CHECK-NEXT: setne %al
; CHECK-NEXT: movzbl %al, %eax
@@ -2144,7 +2079,7 @@ declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnon
define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
; CHECK-LABEL: test_x86_ssse3_pabs_b_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpabsb %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
@@ -2155,7 +2090,7 @@ declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
; CHECK-LABEL: test_x86_ssse3_pabs_d_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpabsd %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
@@ -2166,7 +2101,7 @@ declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
; CHECK-LABEL: test_x86_ssse3_pabs_w_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpabsw %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
@@ -2177,7 +2112,7 @@ declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_ssse3_phadd_d_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vphaddd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -2188,7 +2123,7 @@ declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind rea
define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_ssse3_phadd_sw_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vphaddsw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -2199,7 +2134,7 @@ declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind re
define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_ssse3_phadd_w_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vphaddw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -2210,7 +2145,7 @@ declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind rea
define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_ssse3_phsub_d_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vphsubd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -2221,7 +2156,7 @@ declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind rea
define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_ssse3_phsub_sw_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vphsubsw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -2232,7 +2167,7 @@ declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind re
define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_ssse3_phsub_w_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vphsubw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -2243,7 +2178,7 @@ declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind rea
define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
@@ -2254,7 +2189,7 @@ declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind
define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_ssse3_pmul_hr_sw_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -2265,7 +2200,7 @@ declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind
define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_ssse3_pshuf_b_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -2276,7 +2211,7 @@ declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind rea
define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: test_x86_ssse3_psign_b_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsignb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -2287,7 +2222,7 @@ declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind rea
define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_ssse3_psign_d_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsignd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -2298,7 +2233,7 @@ declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind rea
define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: test_x86_ssse3_psign_w_128:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpsignw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -2309,7 +2244,7 @@ declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind rea
define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK-LABEL: test_x86_avx_addsub_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
@@ -2320,7 +2255,7 @@ declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nou
define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_addsub_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vaddsubps %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
@@ -2331,7 +2266,7 @@ declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwi
define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
; CHECK-LABEL: test_x86_avx_blendv_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
@@ -2342,7 +2277,7 @@ declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4
define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
; CHECK-LABEL: test_x86_avx_blendv_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
@@ -2353,7 +2288,7 @@ declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x f
define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK-LABEL: test_x86_avx_cmp_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcmpordpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
@@ -2364,7 +2299,7 @@ declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) no
define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_cmp_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
@@ -2373,7 +2308,7 @@ define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1
; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %ymm1
; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %ymm1
@@ -2446,7 +2381,7 @@ declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounw
define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
; CHECK-LABEL: test_x86_avx_cvt_pd2_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtpd2psy %ymm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
@@ -2458,7 +2393,7 @@ declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
; CHECK-LABEL: test_x86_avx_cvt_pd2dq_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtpd2dqy %ymm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
@@ -2470,7 +2405,7 @@ declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
@@ -2481,7 +2416,7 @@ declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
@@ -2492,7 +2427,7 @@ declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
@@ -2503,7 +2438,7 @@ declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
@@ -2514,7 +2449,7 @@ declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvttpd2dqy %ymm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
@@ -2526,7 +2461,7 @@ declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
@@ -2537,7 +2472,7 @@ declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_dp_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
@@ -2548,7 +2483,7 @@ declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwi
define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK-LABEL: test_x86_avx_hadd_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
@@ -2559,7 +2494,7 @@ declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounw
define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_hadd_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
@@ -2570,7 +2505,7 @@ declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind
define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK-LABEL: test_x86_avx_hsub_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vhsubpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
@@ -2581,7 +2516,7 @@ declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounw
define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_hsub_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vhsubps %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
@@ -2592,7 +2527,7 @@ declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind
define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
; CHECK-LABEL: test_x86_avx_ldu_dq_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vlddqu (%eax), %ymm0
; CHECK-NEXT: retl
@@ -2602,107 +2537,107 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
-define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
+define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) {
; CHECK-LABEL: test_x86_avx_maskload_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0
; CHECK-NEXT: retl
- %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+ %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
-declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
+declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly
-define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
+define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) {
; CHECK-LABEL: test_x86_avx_maskload_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0
; CHECK-NEXT: retl
- %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+ %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
-declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
+declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly
-define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
+define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) {
; CHECK-LABEL: test_x86_avx_maskload_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmaskmovps (%eax), %xmm0, %xmm0
; CHECK-NEXT: retl
- %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
+declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly
-define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
+define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) {
; CHECK-LABEL: test_x86_avx_maskload_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmaskmovps (%eax), %ymm0, %ymm0
; CHECK-NEXT: retl
- %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+ %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
-declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly
-define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
+define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) {
; CHECK-LABEL: test_x86_avx_maskstore_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax)
; CHECK-NEXT: retl
- call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
+ call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)
ret void
}
-declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
+declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
-define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
+define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) {
; CHECK-LABEL: test_x86_avx_maskstore_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
- call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
+ call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)
ret void
}
-declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
+declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind
-define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
+define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) {
; CHECK-LABEL: test_x86_avx_maskstore_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmaskmovps %xmm1, %xmm0, (%eax)
; CHECK-NEXT: retl
- call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
+ call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)
ret void
}
-declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
+declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
-define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
+define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) {
; CHECK-LABEL: test_x86_avx_maskstore_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmaskmovps %ymm1, %ymm0, (%eax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
- call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
+ call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)
ret void
}
-declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK-LABEL: test_x86_avx_max_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
@@ -2713,7 +2648,7 @@ declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwi
define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_max_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
@@ -2724,7 +2659,7 @@ declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind
define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK-LABEL: test_x86_avx_min_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vminpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
@@ -2735,7 +2670,7 @@ declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwi
define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_min_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
@@ -2746,7 +2681,7 @@ declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind
define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
; CHECK-LABEL: test_x86_avx_movmsk_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vmovmskpd %ymm0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
@@ -2758,7 +2693,7 @@ declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
; CHECK-LABEL: test_x86_avx_movmsk_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vmovmskps %ymm0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
@@ -2775,7 +2710,7 @@ declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_x86_avx_ptestc_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vptest %ymm1, %ymm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
@@ -2789,7 +2724,7 @@ declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_x86_avx_ptestnzc_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vptest %ymm1, %ymm0
; CHECK-NEXT: seta %al
; CHECK-NEXT: movzbl %al, %eax
@@ -2803,7 +2738,7 @@ declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_x86_avx_ptestz_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vptest %ymm1, %ymm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
@@ -2817,7 +2752,7 @@ declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
; CHECK-LABEL: test_x86_avx_rcp_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vrcpps %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
@@ -2828,7 +2763,7 @@ declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
; CHECK-LABEL: test_x86_avx_round_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vroundpd $7, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
@@ -2839,7 +2774,7 @@ declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind read
define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
; CHECK-LABEL: test_x86_avx_round_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vroundps $7, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
@@ -2850,7 +2785,7 @@ declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readno
define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
; CHECK-LABEL: test_x86_avx_rsqrt_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vrsqrtps %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
@@ -2861,7 +2796,7 @@ declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
; CHECK-LABEL: test_x86_avx_sqrt_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vsqrtpd %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
@@ -2872,7 +2807,7 @@ declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
; CHECK-LABEL: test_x86_avx_sqrt_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vsqrtps %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
@@ -2885,7 +2820,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
; add operation forces the execution domain.
; CHECK-LABEL: test_x86_avx_storeu_dq_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
@@ -2905,7 +2840,7 @@ declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
; add operation forces the execution domain.
; CHECK-LABEL: test_x86_avx_storeu_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
@@ -2921,7 +2856,7 @@ declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_storeu_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmovups %ymm0, (%eax)
; CHECK-NEXT: vzeroupper
@@ -2934,7 +2869,7 @@ declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
; CHECK-LABEL: test_x86_avx_vbroadcastf128_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0
; CHECK-NEXT: retl
@@ -2946,7 +2881,7 @@ declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
; CHECK-LABEL: test_x86_avx_vbroadcastf128_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0
; CHECK-NEXT: retl
@@ -2958,7 +2893,7 @@ declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
@@ -2969,7 +2904,7 @@ declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>,
define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
@@ -2980,7 +2915,7 @@ declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8
define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK-LABEL: test_x86_avx_vperm2f128_si_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
; CHECK-NEXT: retl
%res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
@@ -2991,7 +2926,7 @@ declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) noun
define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
; CHECK-LABEL: test_x86_avx_vpermil_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
@@ -3002,7 +2937,7 @@ declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnon
define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
; CHECK-LABEL: test_x86_avx_vpermil_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
@@ -3013,7 +2948,7 @@ declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind rea
define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
; CHECK-LABEL: test_x86_avx_vpermil_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0]
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
@@ -3024,7 +2959,7 @@ declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
; CHECK-LABEL: test_x86_avx_vpermil_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4]
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
@@ -3035,7 +2970,7 @@ declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readn
define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_x86_avx_vpermilvar_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
@@ -3046,7 +2981,7 @@ declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwi
define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
@@ -3054,10 +2989,18 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64>
}
declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
+define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {
+; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256_2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpermilpd {{.*}}, %ymm0, %ymm0 ## ymm0 = ymm0[1,0,2,3]
+; CHECK-NEXT: retl
+ %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_avx_vpermilvar_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
@@ -3065,7 +3008,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
}
define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
; CHECK-LABEL: test_x86_avx_vpermilvar_ps_load:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vpermilps (%eax), %xmm0, %xmm0
; CHECK-NEXT: retl
@@ -3078,7 +3021,7 @@ declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind
define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
; CHECK-LABEL: test_x86_avx_vpermilvar_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
@@ -3089,7 +3032,7 @@ declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) noun
define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx_vtestc_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vtestpd %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
@@ -3102,7 +3045,7 @@ declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnon
define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK-LABEL: test_x86_avx_vtestc_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vtestpd %ymm1, %ymm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
@@ -3116,7 +3059,7 @@ declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind rea
define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_avx_vtestc_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vtestps %xmm1, %xmm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
@@ -3129,7 +3072,7 @@ declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_vtestc_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vtestps %ymm1, %ymm0
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: andl $1, %eax
@@ -3143,7 +3086,7 @@ declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readn
define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx_vtestnzc_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vtestpd %xmm1, %xmm0
; CHECK-NEXT: seta %al
; CHECK-NEXT: movzbl %al, %eax
@@ -3156,7 +3099,7 @@ declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readn
define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vtestpd %ymm1, %ymm0
; CHECK-NEXT: seta %al
; CHECK-NEXT: movzbl %al, %eax
@@ -3170,7 +3113,7 @@ declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind r
define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_avx_vtestnzc_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vtestps %xmm1, %xmm0
; CHECK-NEXT: seta %al
; CHECK-NEXT: movzbl %al, %eax
@@ -3183,7 +3126,7 @@ declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnon
define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vtestps %ymm1, %ymm0
; CHECK-NEXT: seta %al
; CHECK-NEXT: movzbl %al, %eax
@@ -3197,7 +3140,7 @@ declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind rea
define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx_vtestz_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vtestpd %xmm1, %xmm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
@@ -3210,7 +3153,7 @@ declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnon
define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK-LABEL: test_x86_avx_vtestz_pd_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vtestpd %ymm1, %ymm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
@@ -3224,7 +3167,7 @@ declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind rea
define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_avx_vtestz_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vtestps %xmm1, %xmm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
@@ -3237,7 +3180,7 @@ declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_vtestz_ps_256:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vtestps %ymm1, %ymm0
; CHECK-NEXT: sete %al
; CHECK-NEXT: movzbl %al, %eax
@@ -3251,7 +3194,7 @@ declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readn
define void @test_x86_avx_vzeroall() {
; CHECK-LABEL: test_x86_avx_vzeroall:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vzeroall
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
@@ -3263,7 +3206,7 @@ declare void @llvm.x86.avx.vzeroall() nounwind
define void @test_x86_avx_vzeroupper() {
; CHECK-LABEL: test_x86_avx_vzeroupper:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
@@ -3276,7 +3219,7 @@ declare void @llvm.x86.avx.vzeroupper() nounwind
define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
; CHECK-LABEL: monitor:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -3290,7 +3233,7 @@ declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
define void @mwait(i32 %E, i32 %H) nounwind {
; CHECK-LABEL: mwait:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: mwait
@@ -3302,7 +3245,7 @@ declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
define void @sfence() nounwind {
; CHECK-LABEL: sfence:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: sfence
; CHECK-NEXT: retl
tail call void @llvm.x86.sse.sfence()
@@ -3312,7 +3255,7 @@ declare void @llvm.x86.sse.sfence() nounwind
define void @lfence() nounwind {
; CHECK-LABEL: lfence:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: lfence
; CHECK-NEXT: retl
tail call void @llvm.x86.sse2.lfence()
@@ -3322,7 +3265,7 @@ declare void @llvm.x86.sse2.lfence() nounwind
define void @mfence() nounwind {
; CHECK-LABEL: mfence:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: mfence
; CHECK-NEXT: retl
tail call void @llvm.x86.sse2.mfence()
@@ -3332,7 +3275,7 @@ declare void @llvm.x86.sse2.mfence() nounwind
define void @clflush(i8* %p) nounwind {
; CHECK-LABEL: clflush:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: clflush (%eax)
; CHECK-NEXT: retl
@@ -3343,7 +3286,7 @@ declare void @llvm.x86.sse2.clflush(i8*) nounwind
define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
; CHECK-LABEL: crc32_32_8:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: crc32b {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: retl
@@ -3354,7 +3297,7 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
; CHECK-LABEL: crc32_32_16:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: crc32w {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: retl
@@ -3365,7 +3308,7 @@ declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
; CHECK-LABEL: crc32_32_32:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: crc32l {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: retl
@@ -3376,9 +3319,9 @@ declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
; CHECK-LABEL: movnt_dq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: vpaddq LCPI282_0, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq LCPI277_0, %xmm0, %xmm0
; CHECK-NEXT: vmovntdq %ymm0, (%eax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
@@ -3391,7 +3334,7 @@ declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
; CHECK-LABEL: movnt_ps:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vmovntps %ymm0, (%eax)
; CHECK-NEXT: vzeroupper
@@ -3404,7 +3347,7 @@ declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
; add operation forces the execution domain.
; CHECK-LABEL: movnt_pd:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
@@ -3421,7 +3364,7 @@ declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
; Check for pclmulqdq
define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_x86_pclmulqdq:
-; CHECK: # BB#0:
+; CHECK: ## BB#0:
; CHECK-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/avx-isa-check.ll b/test/CodeGen/X86/avx-isa-check.ll
new file mode 100644
index 000000000000..77bfbd4bb423
--- /dev/null
+++ b/test/CodeGen/X86/avx-isa-check.ll
@@ -0,0 +1,570 @@
+; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
+
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512bw -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx -o /dev/null
+
+define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+ %x = and <4 x i64> %a2, %b
+ ret <4 x i64> %x
+}
+
+define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <2 x i64> %a, <i64 1, i64 1>
+ %x = and <2 x i64> %a2, %b
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+ %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %x = and <4 x i64> %a, %y
+ ret <4 x i64> %x
+}
+
+define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <2 x i64> %a, <i64 1, i64 1>
+ %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
+ %x = and <2 x i64> %a, %y
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+ %x = or <4 x i64> %a2, %b
+ ret <4 x i64> %x
+}
+
+define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+ %x = xor <4 x i64> %a2, %b
+ ret <4 x i64> %x
+}
+
+define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <2 x i64> %a, <i64 1, i64 1>
+ %x = or <2 x i64> %a2, %b
+ ret <2 x i64> %x
+}
+
+define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+ ; Force the execution domain with an add.
+ %a2 = add <2 x i64> %a, <i64 1, i64 1>
+ %x = xor <2 x i64> %a2, %b
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+ %x = add <4 x i64> %i, %j
+ ret <4 x i64> %x
+}
+
+define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+ %x = add <8 x i32> %i, %j
+ ret <8 x i32> %x
+}
+
+define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+ %x = add <16 x i16> %i, %j
+ ret <16 x i16> %x
+}
+
+define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+ %x = add <32 x i8> %i, %j
+ ret <32 x i8> %x
+}
+
+define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+ %x = sub <4 x i64> %i, %j
+ ret <4 x i64> %x
+}
+
+define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+ %x = sub <8 x i32> %i, %j
+ ret <8 x i32> %x
+}
+
+define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+ %x = sub <16 x i16> %i, %j
+ ret <16 x i16> %x
+}
+
+define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+ %x = sub <32 x i8> %i, %j
+ ret <32 x i8> %x
+}
+
+define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+ %x = mul <16 x i16> %i, %j
+ ret <16 x i16> %x
+}
+
+define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+ %bincmp = icmp slt <8 x i32> %i, %j
+ %x = sext <8 x i1> %bincmp to <8 x i32>
+ ret <8 x i32> %x
+}
+
+define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+ %bincmp = icmp eq <32 x i8> %i, %j
+ %x = sext <32 x i1> %bincmp to <32 x i8>
+ ret <32 x i8> %x
+}
+
+define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+ %bincmp = icmp eq <16 x i16> %i, %j
+ %x = sext <16 x i1> %bincmp to <16 x i16>
+ ret <16 x i16> %x
+}
+
+define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+ %bincmp = icmp slt <32 x i8> %i, %j
+ %x = sext <32 x i1> %bincmp to <32 x i8>
+ ret <32 x i8> %x
+}
+
+define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+ %bincmp = icmp slt <16 x i16> %i, %j
+ %x = sext <16 x i1> %bincmp to <16 x i16>
+ ret <16 x i16> %x
+}
+
+define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+ %bincmp = icmp eq <8 x i32> %i, %j
+ %x = sext <8 x i1> %bincmp to <8 x i32>
+ ret <8 x i32> %x
+}
+
+define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
+ %x = add <2 x i64> %i, %j
+ ret <2 x i64> %x
+}
+
+define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
+ %x = add <4 x i32> %i, %j
+ ret <4 x i32> %x
+}
+
+define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+ %x = add <8 x i16> %i, %j
+ ret <8 x i16> %x
+}
+
+define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+ %x = add <16 x i8> %i, %j
+ ret <16 x i8> %x
+}
+
+define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
+ %x = sub <2 x i64> %i, %j
+ ret <2 x i64> %x
+}
+
+define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
+ %x = sub <4 x i32> %i, %j
+ ret <4 x i32> %x
+}
+
+define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+ %x = sub <8 x i16> %i, %j
+ ret <8 x i16> %x
+}
+
+define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+ %x = sub <16 x i8> %i, %j
+ ret <16 x i8> %x
+}
+
+define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+ %x = mul <8 x i16> %i, %j
+ ret <8 x i16> %x
+}
+
+define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+ %bincmp = icmp slt <8 x i16> %i, %j
+ %x = sext <8 x i1> %bincmp to <8 x i16>
+ ret <8 x i16> %x
+}
+
+define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+ %bincmp = icmp slt <16 x i8> %i, %j
+ %x = sext <16 x i1> %bincmp to <16 x i8>
+ ret <16 x i8> %x
+}
+
+define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+ %bincmp = icmp eq <8 x i16> %i, %j
+ %x = sext <8 x i1> %bincmp to <8 x i16>
+ ret <8 x i16> %x
+}
+
+define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+ %bincmp = icmp eq <16 x i8> %i, %j
+ %x = sext <16 x i1> %bincmp to <16 x i8>
+ ret <16 x i8> %x
+}
+
+define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+ ret <8 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+ ret <16 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <32 x i8> %shuffle
+}
+
+define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
+ %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
+ ret <4 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
+ ret <8 x i32> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
+ %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
+ ret <4 x double> %shuffle
+}
+
+define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
+ %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
+ %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
+ %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+ %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
+ ret <2 x double> %bitcast64
+}
+
+define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
+ %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
+ ret <16 x i16> %shuffle
+}
+
+define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
+ %r1 = extractelement <2 x i64> %x, i32 0
+ %r2 = extractelement <2 x i64> %x, i32 1
+ store i64 %r2, i64* %dst, align 1
+ ret i64 %r1
+}
+
+define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
+ %r1 = extractelement <4 x i32> %x, i32 1
+ %r2 = extractelement <4 x i32> %x, i32 3
+ store i32 %r2, i32* %dst, align 1
+ ret i32 %r1
+}
+
+define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
+ %r1 = extractelement <8 x i16> %x, i32 1
+ %r2 = extractelement <8 x i16> %x, i32 3
+ store i16 %r2, i16* %dst, align 1
+ ret i16 %r1
+}
+
+define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
+ %r1 = extractelement <16 x i8> %x, i32 1
+ %r2 = extractelement <16 x i8> %x, i32 3
+ store i8 %r2, i8* %dst, align 1
+ ret i8 %r1
+}
+
+define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
+ %val = load i64, i64* %ptr
+ %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
+ %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
+ ret <2 x i64> %r2
+}
+
+define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
+ %val = load i32, i32* %ptr
+ %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
+ %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
+ ret <4 x i32> %r2
+}
+
+define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
+ %val = load i16, i16* %ptr
+ %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
+ %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
+ ret <8 x i16> %r2
+}
+
+define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
+ %val = load i8, i8* %ptr
+ %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
+ %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
+ ret <16 x i8> %r2
+}
+
+define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
+ ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
+ ret <4 x i32> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <16 x i16> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
+; vmovshdup 256 test
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+ ret <8 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
+; vmovshdup 128 test
+ %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ ret <4 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
+; vmovsldup 256 test
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+ ret <8 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
+; vmovsldup 128 test
+ %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ ret <4 x float> %shuffle
+}
+
+define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
+ %a = load double, double* %ptr
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %shuffle
+}
+
+define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
+ %a = load double, double* %ptr
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
+ ret <2 x double> %shuffle
+}
+
+define void @store_floats(<4 x float> %x, i64* %p) {
+ %a = fadd <4 x float> %x, %x
+ %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %c = bitcast <2 x float> %b to i64
+ store i64 %c, i64* %p
+ ret void
+}
+
+define void @store_double(<2 x double> %x, i64* %p) {
+ %a = fadd <2 x double> %x, %x
+ %b = extractelement <2 x double> %a, i32 0
+ %c = bitcast double %b to i64
+ store i64 %c, i64* %p
+ ret void
+}
+
+define void @store_h_double(<2 x double> %x, i64* %p) {
+ %a = fadd <2 x double> %x, %x
+ %b = extractelement <2 x double> %a, i32 1
+ %c = bitcast double %b to i64
+ store i64 %c, i64* %p
+ ret void
+}
+
+define <2 x double> @test39(double* %ptr) nounwind {
+ %a = load double, double* %ptr
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+ ret <2 x double> %shuffle
+ }
+
+define <2 x double> @test40(<2 x double>* %ptr) nounwind {
+ %v = load <2 x double>, <2 x double>* %ptr
+ %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+ ret <2 x double> %shuffle
+ }
+
+define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
+ %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
+ ret <2 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
+ %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ ret <4 x double> %shuffle
+}
+
+define <8 x i32> @ashr_v8i32(<8 x i32> %a, <8 x i32> %b) {
+ %shift = ashr <8 x i32> %a, %b
+ ret <8 x i32> %shift
+}
+
+define <8 x i32> @lshr_v8i32(<8 x i32> %a, <8 x i32> %b) {
+ %shift = lshr <8 x i32> %a, %b
+ ret <8 x i32> %shift
+}
+
+define <8 x i32> @shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
+ %shift = shl <8 x i32> %a, %b
+ ret <8 x i32> %shift
+}
+
+define <8 x i32> @ashr_const_v8i32(<8 x i32> %a) {
+ %shift = ashr <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x i32> %shift
+}
+
+define <8 x i32> @lshr_const_v8i32(<8 x i32> %a) {
+ %shift = lshr <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x i32> %shift
+}
+
+define <8 x i32> @shl_const_v8i32(<8 x i32> %a) {
+ %shift = shl <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x i32> %shift
+}
+
+define <4 x i64> @ashr_v4i64(<4 x i64> %a, <4 x i64> %b) {
+ %shift = ashr <4 x i64> %a, %b
+ ret <4 x i64> %shift
+}
+
+define <4 x i64> @lshr_v4i64(<4 x i64> %a, <4 x i64> %b) {
+ %shift = lshr <4 x i64> %a, %b
+ ret <4 x i64> %shift
+}
+
+define <4 x i64> @shl_v4i64(<4 x i64> %a, <4 x i64> %b) {
+ %shift = shl <4 x i64> %a, %b
+ ret <4 x i64> %shift
+}
+
+define <4 x i64> @ashr_const_v4i64(<4 x i64> %a) {
+ %shift = ashr <4 x i64> %a, <i64 3, i64 3, i64 3, i64 3>
+ ret <4 x i64> %shift
+}
+
+define <4 x i64> @lshr_const_v4i64(<4 x i64> %a) {
+ %shift = lshr <4 x i64> %a, <i64 3, i64 3, i64 3, i64 3>
+ ret <4 x i64> %shift
+}
+
+define <4 x i64> @shl_const_v4i64(<4 x i64> %a) {
+ %shift = shl <4 x i64> %a, <i64 3, i64 3, i64 3, i64 3>
+ ret <4 x i64> %shift
+}
+
+define <16 x i16> @ashr_v16i16(<16 x i16> %a, <16 x i16> %b) {
+ %shift = ashr <16 x i16> %a, %b
+ ret <16 x i16> %shift
+}
+
+define <16 x i16> @lshr_v16i16(<16 x i16> %a, <16 x i16> %b) {
+ %shift = lshr <16 x i16> %a, %b
+ ret <16 x i16> %shift
+}
+
+define <16 x i16> @shl_v16i16(<16 x i16> %a, <16 x i16> %b) {
+ %shift = shl <16 x i16> %a, %b
+ ret <16 x i16> %shift
+}
+
+define <16 x i16> @ashr_const_v16i16(<16 x i16> %a) {
+ %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <16 x i16> %shift
+}
+
+define <16 x i16> @lshr_const_v16i16(<16 x i16> %a) {
+ %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <16 x i16> %shift
+}
+
+define <16 x i16> @shl_const_v16i16(<16 x i16> %a) {
+ %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <16 x i16> %shift
+}
+
+define <4 x i32> @ashr_v4i32(<4 x i32> %a, <4 x i32> %b) {
+ %shift = ashr <4 x i32> %a, %b
+ ret <4 x i32> %shift
+}
+
+define <4 x i32> @shl_const_v4i32(<4 x i32> %a) {
+ %shift = shl <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+ ret <4 x i32> %shift
+}
+
+define <2 x i64> @ashr_v2i64(<2 x i64> %a, <2 x i64> %b) {
+ %shift = ashr <2 x i64> %a, %b
+ ret <2 x i64> %shift
+}
+
+define <2 x i64> @shl_const_v2i64(<2 x i64> %a) {
+ %shift = shl <2 x i64> %a, <i64 3, i64 3>
+ ret <2 x i64> %shift
+}
+
+define <8 x i16> @ashr_v8i16(<8 x i16> %a, <8 x i16> %b) {
+ %shift = ashr <8 x i16> %a, %b
+ ret <8 x i16> %shift
+}
+
+define <8 x i16> @lshr_v8i16(<8 x i16> %a, <8 x i16> %b) {
+ %shift = lshr <8 x i16> %a, %b
+ ret <8 x i16> %shift
+}
+
+define <8 x i16> @shl_v8i16(<8 x i16> %a, <8 x i16> %b) {
+ %shift = shl <8 x i16> %a, %b
+ ret <8 x i16> %shift
+}
+
+define <8 x i16> @ashr_const_v8i16(<8 x i16> %a) {
+ %shift = ashr <8 x i16> %a,<i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <8 x i16> %shift
+}
+
+define <8 x i16> @lshr_const_v8i16(<8 x i16> %a) {
+ %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <8 x i16> %shift
+}
+
+define <8 x i16> @shl_const_v8i16(<8 x i16> %a) {
+ %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <8 x i16> %shift
+}
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll
index d2f213bac7bb..d7eceb7cce66 100644
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -88,7 +88,7 @@ entry:
ret void
}
-declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
; CHECK_O0: _f_f
; CHECK-O0: vmovss LCPI
@@ -105,7 +105,7 @@ cif_mask_mixed: ; preds = %allocas
br i1 undef, label %cif_mixed_test_all, label %cif_mixed_test_any_check
cif_mixed_test_all: ; preds = %cif_mask_mixed
- call void @llvm.x86.avx.maskstore.ps.256(i8* undef, <8 x float> <float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, <8 x float> undef) nounwind
+ call void @llvm.x86.avx.maskstore.ps.256(i8* undef, <8 x i32> <i32 -1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, <8 x float> undef) nounwind
unreachable
cif_mixed_test_any_check: ; preds = %cif_mask_mixed
diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll
index e71ac473b44d..e9e7d5aea273 100644
--- a/test/CodeGen/X86/avx-logic.ll
+++ b/test/CodeGen/X86/avx-logic.ll
@@ -1,4 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s
define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
; CHECK-LABEL: andpd256:
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index 83585b536095..033a95276608 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
;;; Shift left
diff --git a/test/CodeGen/X86/avx-shuffle-x86_32.ll b/test/CodeGen/X86/avx-shuffle-x86_32.ll
index 78b4888cfa16..fae5b41abfa6 100755
--- a/test/CodeGen/X86/avx-shuffle-x86_32.ll
+++ b/test/CodeGen/X86/avx-shuffle-x86_32.ll
@@ -1,8 +1,26 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s
+; Avoid unnecessary vinsertf128
define <4 x i64> @test1(<4 x i64> %a) nounwind {
+; CHECK-LABEL: test1:
+; CHECK: # BB#0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; CHECK-NEXT: retl
%b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
ret <4 x i64>%b
- ; CHECK-LABEL: test1:
- ; CHECK-NOT: vinsertf128
- }
+}
+
+define <8 x i16> @test2(<4 x i16>* %v) nounwind {
+; CHECK-LABEL: test2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: retl
+ %v9 = load <4 x i16>, <4 x i16> * %v, align 8
+ %v10 = shufflevector <4 x i16> %v9, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ %v11 = shufflevector <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %v10, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %v11
+}
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 3ea7e386c426..ebaaf0e8d00d 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -1,26 +1,34 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
-
-; CHECK: vpshufb {{.*}} ## xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; CHECK-NEXT: vinsertf128 $1
define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
+; CHECK-LABEL: funcA:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <32 x i8> %shuffle
}
-; CHECK: vpshufb {{.*}} ## xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11]
-; CHECK-NEXT: vinsertf128 $1
define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
+; CHECK-LABEL: funcB:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <16 x i16> %shuffle
}
-; CHECK: vmovq
-; CHECK-NEXT: vmovddup %xmm
-; CHECK-NEXT: vinsertf128 $1
define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
+; CHECK-LABEL: funcC:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovq %rdi, %xmm0
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
%vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
@@ -29,9 +37,12 @@ entry:
ret <4 x i64> %vecinit6.i
}
-; CHECK: vmovddup %xmm
-; CHECK-NEXT: vinsertf128 $1
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
+; CHECK-LABEL: funcD:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%vecinit.i = insertelement <4 x double> undef, double %q, i32 0
%vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
@@ -43,8 +54,23 @@ entry:
; Test this turns into a broadcast:
; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
;
-; CHECK: vbroadcastss
define <8 x float> @funcE() nounwind {
+; CHECK-LABEL: funcE:
+; CHECK: ## BB#0: ## %for_exit499
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: ## implicit-def: %YMM0
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne LBB4_2
+; CHECK-NEXT: ## BB#1: ## %load.i1247
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: movq %rsp, %rbp
+; CHECK-NEXT: andq $-32, %rsp
+; CHECK-NEXT: subq $1312, %rsp ## imm = 0x520
+; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0
+; CHECK-NEXT: movq %rbp, %rsp
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249
+; CHECK-NEXT: retq
allocas:
%udx495 = alloca [18 x [18 x float]], align 32
br label %for_test505.preheader
@@ -69,29 +95,79 @@ __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_ex
ret <8 x float> %load_broadcast12281250
}
-; CHECK: vpermilps $4
-; CHECK-NEXT: vinsertf128 $1
define <8 x float> @funcF(i32 %val) nounwind {
+; CHECK-LABEL: funcF:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovd %edi, %xmm0
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
%ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
%ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
%tmp = bitcast <8 x i32> %ret7 to <8 x float>
ret <8 x float> %tmp
}
-; CHECK: vpermilps $0
-; CHECK-NEXT: vinsertf128 $1
define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
+; CHECK-LABEL: funcG:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x float> %shuffle
}
-; CHECK: vextractf128 $1
-; CHECK-NEXT: vpermilps $85
-; CHECK-NEXT: vinsertf128 $1
define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
+; CHECK-LABEL: funcH:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <8 x float> %shuffle
}
+define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
+; CHECK-LABEL: splat_load_2f64_11:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: retq
+ %x = load <2 x double>, <2 x double>* %ptr
+ %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x double> %x1
+}
+
+define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
+; CHECK-LABEL: splat_load_4f64_2222:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
+; CHECK-NEXT: retq
+ %x = load <4 x double>, <4 x double>* %ptr
+ %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ ret <4 x double> %x1
+}
+
+define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
+; CHECK-LABEL: splat_load_4f32_0000:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %x = load <4 x float>, <4 x float>* %ptr
+ %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+ ret <4 x float> %x1
+}
+
+define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
+; CHECK-LABEL: splat_load_8f32_77777777:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss 28(%rdi), %ymm0
+; CHECK-NEXT: retq
+ %x = load <8 x float>, <8 x float>* %ptr
+ %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+ ret <8 x float> %x1
+}
diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll
index 8b8c11b85875..86b0628aa0bc 100644
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@@ -1,7 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-; CHECK: vbroadcastsd (%
define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: A:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
+; CHECK-NEXT: retq
entry:
%q = load i64, i64* %ptr, align 8
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
@@ -11,8 +15,11 @@ entry:
ret <4 x i64> %vecinit6.i
}
-; CHECK: vbroadcastss (%
define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: B:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
+; CHECK-NEXT: retq
entry:
%q = load i32, i32* %ptr, align 4
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
@@ -22,8 +29,11 @@ entry:
ret <8 x i32> %vecinit6.i
}
-; CHECK: vbroadcastsd (%
define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: C:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
+; CHECK-NEXT: retq
entry:
%q = load double, double* %ptr, align 8
%vecinit.i = insertelement <4 x double> undef, double %q, i32 0
@@ -33,8 +43,11 @@ entry:
ret <4 x double> %vecinit6.i
}
-; CHECK: vbroadcastss (%
define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: D:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
+; CHECK-NEXT: retq
entry:
%q = load float, float* %ptr, align 4
%vecinit.i = insertelement <8 x float> undef, float %q, i32 0
@@ -46,8 +59,11 @@ entry:
;;;; 128-bit versions
-; CHECK: vbroadcastss (%
define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: e:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT: retq
entry:
%q = load float, float* %ptr, align 4
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
@@ -57,12 +73,14 @@ entry:
ret <4 x float> %vecinit6.i
}
-
-; CHECK: _e2
-; CHECK-NOT: vbroadcastss
-; CHECK: ret
+; Don't broadcast constants on pre-AVX2 hardware.
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
- %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
+; CHECK-LABEL: _e2:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
+; CHECK-NEXT: retq
+entry:
+ %vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
@@ -70,8 +88,11 @@ define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
}
-; CHECK: vbroadcastss (%
define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: F:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT: retq
entry:
%q = load i32, i32* %ptr, align 4
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
@@ -81,12 +102,158 @@ entry:
ret <4 x i32> %vecinit6.i
}
+; FIXME: Pointer adjusted broadcasts
+
+define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i32_4i32_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i32>, <4 x i32>* %ptr
+ %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_4i32_33333333:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i32>, <4 x i32>* %ptr
+ %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_8i32_55555555:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovaps (%rdi), %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x i32>, <8 x i32>* %ptr
+ %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <8 x i32> %ret
+}
+
+define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f32_4f32_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x float>, <4 x float>* %ptr
+ %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_4f32_33333333:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x float>, <4 x float>* %ptr
+ %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_8f32_55555555:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x float>, <8 x float>* %ptr
+ %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <8 x float> %ret
+}
+
+define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2i64_2i64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x i64>, <2 x i64>* %ptr
+ %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_2i64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x i64>, <2 x i64>* %ptr
+ %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_4i64_2222:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovapd (%rdi), %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i64>, <4 x i64>* %ptr
+ %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ ret <4 x i64> %ret
+}
+
+define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2f64_2f64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x double>, <2 x double>* %ptr
+ %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_2f64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x double>, <2 x double>* %ptr
+ %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_4f64_2222:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x double>, <4 x double>* %ptr
+ %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ ret <4 x double> %ret
+}
+
; Unsupported vbroadcasts
-; CHECK: _G
-; CHECK-NOT: broadcast (%
-; CHECK: ret
define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: G:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; CHECK-NEXT: retq
entry:
%q = load i64, i64* %ptr, align 8
%vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
@@ -94,18 +261,21 @@ entry:
ret <2 x i64> %vecinit2.i
}
-; CHECK: _H
-; CHECK-NOT: broadcast
-; CHECK: ret
define <4 x i32> @H(<4 x i32> %a) {
+; CHECK-LABEL: H:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-NEXT: retq
+entry:
%x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
ret <4 x i32> %x
}
-; CHECK: _I
-; CHECK-NOT: broadcast (%
-; CHECK: ret
define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: I:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; CHECK-NEXT: retq
entry:
%q = load double, double* %ptr, align 4
%vecinit.i = insertelement <2 x double> undef, double %q, i32 0
@@ -113,10 +283,13 @@ entry:
ret <2 x double> %vecinit2.i
}
-; CHECK: _RR
-; CHECK: vbroadcastss (%
-; CHECK: ret
define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _RR:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT: movl (%rsi), %eax
+; CHECK-NEXT: movl %eax, (%rax)
+; CHECK-NEXT: retq
entry:
%q = load float, float* %ptr, align 4
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
@@ -129,11 +302,11 @@ entry:
ret <4 x float> %vecinit6.i
}
-
-; CHECK: _RR2
-; CHECK: vbroadcastss (%
-; CHECK: ret
define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _RR2:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT: retq
entry:
%q = load float, float* %ptr, align 4
%v = insertelement <4 x float> undef, float %q, i32 0
@@ -141,16 +314,15 @@ entry:
ret <4 x float> %t
}
-
; These tests check that a vbroadcast instruction is used when we have a splat
; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
; (via the insertelements).
-; CHECK-LABEL: splat_concat1
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastss (%
-; CHECK-NEXT: ret
define <8 x float> @splat_concat1(float* %p) {
+; CHECK-LABEL: splat_concat1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
+; CHECK-NEXT: retq
%1 = load float, float* %p, align 4
%2 = insertelement <4 x float> undef, float %1, i32 0
%3 = insertelement <4 x float> %2, float %1, i32 1
@@ -160,11 +332,11 @@ define <8 x float> @splat_concat1(float* %p) {
ret <8 x float> %6
}
-; CHECK-LABEL: splat_concat2
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastss (%
-; CHECK-NEXT: ret
define <8 x float> @splat_concat2(float* %p) {
+; CHECK-LABEL: splat_concat2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
+; CHECK-NEXT: retq
%1 = load float, float* %p, align 4
%2 = insertelement <4 x float> undef, float %1, i32 0
%3 = insertelement <4 x float> %2, float %1, i32 1
@@ -178,11 +350,11 @@ define <8 x float> @splat_concat2(float* %p) {
ret <8 x float> %10
}
-; CHECK-LABEL: splat_concat3
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastsd (%
-; CHECK-NEXT: ret
define <4 x double> @splat_concat3(double* %p) {
+; CHECK-LABEL: splat_concat3:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
+; CHECK-NEXT: retq
%1 = load double, double* %p, align 8
%2 = insertelement <2 x double> undef, double %1, i32 0
%3 = insertelement <2 x double> %2, double %1, i32 1
@@ -190,11 +362,11 @@ define <4 x double> @splat_concat3(double* %p) {
ret <4 x double> %4
}
-; CHECK-LABEL: splat_concat4
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastsd (%
-; CHECK-NEXT: ret
define <4 x double> @splat_concat4(double* %p) {
+; CHECK-LABEL: splat_concat4:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
+; CHECK-NEXT: retq
%1 = load double, double* %p, align 8
%2 = insertelement <2 x double> undef, double %1, i32 0
%3 = insertelement <2 x double> %2, double %1, i32 1
@@ -203,4 +375,3 @@ define <4 x double> @splat_concat4(double* %p) {
%6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x double> %6
}
-
diff --git a/test/CodeGen/X86/avx-vperm2x128.ll b/test/CodeGen/X86/avx-vperm2x128.ll
index 4e43f6f51921..0958008d9a3e 100644
--- a/test/CodeGen/X86/avx-vperm2x128.ll
+++ b/test/CodeGen/X86/avx-vperm2x128.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
@@ -262,13 +263,13 @@ entry:
ret <8 x float> %shuffle
}
-;; Test zero mask generation.
+;; Test zero mask generation.
;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984
;; Prefer xor+vblendpd over vperm2f128 because that has better performance.
define <4 x double> @vperm2z_0x08(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x08:
-; ALL: # BB#0:
+; ALL: ## BB#0:
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
@@ -277,7 +278,7 @@ define <4 x double> @vperm2z_0x08(<4 x double> %a) {
define <4 x double> @vperm2z_0x18(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x18:
-; ALL: # BB#0:
+; ALL: ## BB#0:
; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; ALL-NEXT: retq
@@ -287,7 +288,7 @@ define <4 x double> @vperm2z_0x18(<4 x double> %a) {
define <4 x double> @vperm2z_0x28(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x28:
-; ALL: # BB#0:
+; ALL: ## BB#0:
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -296,7 +297,7 @@ define <4 x double> @vperm2z_0x28(<4 x double> %a) {
define <4 x double> @vperm2z_0x38(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x38:
-; ALL: # BB#0:
+; ALL: ## BB#0:
; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; ALL-NEXT: retq
@@ -306,8 +307,9 @@ define <4 x double> @vperm2z_0x38(<4 x double> %a) {
define <4 x double> @vperm2z_0x80(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x80:
-; ALL: # BB#0:
-; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
+; ALL: ## BB#0:
+; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x double> %s
@@ -315,7 +317,7 @@ define <4 x double> @vperm2z_0x80(<4 x double> %a) {
define <4 x double> @vperm2z_0x81(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x81:
-; ALL: # BB#0:
+; ALL: ## BB#0:
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
@@ -324,8 +326,9 @@ define <4 x double> @vperm2z_0x81(<4 x double> %a) {
define <4 x double> @vperm2z_0x82(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x82:
-; ALL: # BB#0:
-; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
+; ALL: ## BB#0:
+; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x double> %s
@@ -333,7 +336,7 @@ define <4 x double> @vperm2z_0x82(<4 x double> %a) {
define <4 x double> @vperm2z_0x83(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x83:
-; ALL: # BB#0:
+; ALL: ## BB#0:
; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
@@ -343,10 +346,21 @@ define <4 x double> @vperm2z_0x83(<4 x double> %a) {
;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection.
define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) {
-; ALL-LABEL: vperm2z_int_0x83:
-; ALL: # BB#0:
-; AVX1: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
-; AVX2: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
+; AVX1-LABEL: vperm2z_int_0x83:
+; AVX1: ## BB#0:
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: vperm2z_int_0x83:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
+; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
%s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
%c = add <4 x i64> %b, %s
ret <4 x i64> %c
diff --git a/test/CodeGen/X86/avx-win64.ll b/test/CodeGen/X86/avx-win64.ll
index dc6bd594450f..64bc398a97ea 100644
--- a/test/CodeGen/X86/avx-win64.ll
+++ b/test/CodeGen/X86/avx-win64.ll
@@ -42,6 +42,4 @@ safe_if_after_false: ; preds = %safe_if_run_false,
}
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
-declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
-declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/avx.ll b/test/CodeGen/X86/avx.ll
index f71ec5c10e69..341dd867e4ff 100644
--- a/test/CodeGen/X86/avx.ll
+++ b/test/CodeGen/X86/avx.ll
@@ -32,7 +32,7 @@ define <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocap
; On X32, account for the argument's move to registers
; X32: movl 4(%esp), %eax
; CHECK-NOT: mov
-; CHECK: insertps $48
+; CHECK: vinsertps $48, (%{{...}}), {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; CHECK-NEXT: ret
%1 = load <4 x float>, <4 x float>* %pb, align 16
%2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
@@ -46,7 +46,7 @@ define <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>
; X32: movl 4(%esp), %eax
; CHECK-NOT: mov
;; Try to match a bit more of the instr, since we need the load's offset.
-; CHECK: insertps $96, 4(%{{...}}), %
+; CHECK: vinsertps $32, 4(%{{...}}), {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; CHECK-NEXT: ret
%1 = load <4 x float>, <4 x float>* %pb, align 16
%2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
@@ -60,7 +60,7 @@ define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x floa
; X32: movl 8(%esp), %ecx
; CHECK-NOT: mov
;; Try to match a bit more of the instr, since we need the load's offset.
-; CHECK: vinsertps $192, 12(%{{...}},%{{...}}), %
+; CHECK: vinsertps $0, 12(%{{...}},%{{...}}), {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
; CHECK-NEXT: ret
%1 = getelementptr inbounds <4 x float>, <4 x float>* %pb, i64 %index
%2 = load <4 x float>, <4 x float>* %1, align 16
diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll
index 9b6d5aa5eeae..176292768253 100755
--- a/test/CodeGen/X86/avx2-conversions.ll
+++ b/test/CodeGen/X86/avx2-conversions.ll
@@ -1,136 +1,151 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
-; CHECK: trunc4
-; CHECK: vpermd
-; CHECK-NOT: vinsert
-; CHECK: ret
define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
+; CHECK-LABEL: trunc4:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
+; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%B = trunc <4 x i64> %A to <4 x i32>
ret <4 x i32>%B
}
-; CHECK: trunc8
-; CHECK: vpshufb
-; CHECK-NOT: vinsert
-; CHECK: ret
-
define <8 x i16> @trunc8(<8 x i32> %A) nounwind {
+; CHECK-LABEL: trunc8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%B = trunc <8 x i32> %A to <8 x i16>
ret <8 x i16>%B
}
-; CHECK: sext4
-; CHECK: vpmovsxdq
-; CHECK-NOT: vinsert
-; CHECK: ret
define <4 x i64> @sext4(<4 x i32> %A) nounwind {
+; CHECK-LABEL: sext4:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0
+; CHECK-NEXT: retq
%B = sext <4 x i32> %A to <4 x i64>
ret <4 x i64>%B
}
-; CHECK: sext8
-; CHECK: vpmovsxwd
-; CHECK-NOT: vinsert
-; CHECK: ret
define <8 x i32> @sext8(<8 x i16> %A) nounwind {
+; CHECK-LABEL: sext8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0
+; CHECK-NEXT: retq
%B = sext <8 x i16> %A to <8 x i32>
ret <8 x i32>%B
}
-; CHECK: zext4
-; CHECK: vpmovzxdq
-; CHECK-NOT: vinsert
-; CHECK: ret
define <4 x i64> @zext4(<4 x i32> %A) nounwind {
+; CHECK-LABEL: zext4:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; CHECK-NEXT: retq
%B = zext <4 x i32> %A to <4 x i64>
ret <4 x i64>%B
}
-; CHECK: zext8
-; CHECK: vpmovzxwd
-; CHECK-NOT: vinsert
-; CHECK: ret
define <8 x i32> @zext8(<8 x i16> %A) nounwind {
+; CHECK-LABEL: zext8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-NEXT: retq
%B = zext <8 x i16> %A to <8 x i32>
ret <8 x i32>%B
}
-; CHECK: zext_8i8_8i32
-; CHECK: vpmovzxwd
-; CHECK: vpand
-; CHECK: ret
+
define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
- %B = zext <8 x i8> %A to <8 x i32>
+; CHECK-LABEL: zext_8i8_8i32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-NEXT: retq
+ %B = zext <8 x i8> %A to <8 x i32>
ret <8 x i32>%B
}
-; CHECK-LABEL: zext_16i8_16i16:
-; CHECK: vpmovzxbw
-; CHECK-NOT: vinsert
-; CHECK: ret
define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) {
+; CHECK-LABEL: zext_16i8_16i16:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; CHECK-NEXT: retq
%t = zext <16 x i8> %z to <16 x i16>
ret <16 x i16> %t
}
-; CHECK-LABEL: sext_16i8_16i16:
-; CHECK: vpmovsxbw
-; CHECK-NOT: vinsert
-; CHECK: ret
define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) {
+; CHECK-LABEL: sext_16i8_16i16:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT: retq
%t = sext <16 x i8> %z to <16 x i16>
ret <16 x i16> %t
}
-; CHECK-LABEL: trunc_16i16_16i8:
-; CHECK: vpshufb
-; CHECK: vpshufb
-; CHECK: vpunpcklqdq
-; CHECK: ret
define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
+; CHECK-LABEL: trunc_16i16_16i8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%t = trunc <16 x i16> %z to <16 x i8>
ret <16 x i8> %t
}
-; CHECK: load_sext_test1
-; CHECK: vpmovsxdq (%r{{[^,]*}}), %ymm{{.*}}
-; CHECK: ret
define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
+; CHECK-LABEL: load_sext_test1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxdq (%rdi), %ymm0
+; CHECK-NEXT: retq
%X = load <4 x i32>, <4 x i32>* %ptr
%Y = sext <4 x i32> %X to <4 x i64>
ret <4 x i64>%Y
}
-; CHECK: load_sext_test2
-; CHECK: vpmovsxbq (%r{{[^,]*}}), %ymm{{.*}}
-; CHECK: ret
define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
+; CHECK-LABEL: load_sext_test2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxbq (%rdi), %ymm0
+; CHECK-NEXT: retq
%X = load <4 x i8>, <4 x i8>* %ptr
%Y = sext <4 x i8> %X to <4 x i64>
ret <4 x i64>%Y
}
-; CHECK: load_sext_test3
-; CHECK: vpmovsxwq (%r{{[^,]*}}), %ymm{{.*}}
-; CHECK: ret
define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
+; CHECK-LABEL: load_sext_test3:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxwq (%rdi), %ymm0
+; CHECK-NEXT: retq
%X = load <4 x i16>, <4 x i16>* %ptr
%Y = sext <4 x i16> %X to <4 x i64>
ret <4 x i64>%Y
}
-; CHECK: load_sext_test4
-; CHECK: vpmovsxwd (%r{{[^,]*}}), %ymm{{.*}}
-; CHECK: ret
define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
+; CHECK-LABEL: load_sext_test4:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxwd (%rdi), %ymm0
+; CHECK-NEXT: retq
%X = load <8 x i16>, <8 x i16>* %ptr
%Y = sext <8 x i16> %X to <8 x i32>
ret <8 x i32>%Y
}
-; CHECK: load_sext_test5
-; CHECK: vpmovsxbd (%r{{[^,]*}}), %ymm{{.*}}
-; CHECK: ret
define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) {
+; CHECK-LABEL: load_sext_test5:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxbd (%rdi), %ymm0
+; CHECK-NEXT: retq
%X = load <8 x i8>, <8 x i8>* %ptr
%Y = sext <8 x i8> %X to <8 x i32>
ret <8 x i32>%Y
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
index a30d8371775c..36b6da5ef960 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
@@ -83,3 +83,123 @@ define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
}
declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
+
+define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
+ ; CHECK-LABEL: test_x86_avx2_vbroadcast_sd_pd_256:
+ ; CHECK: ## BB#0:
+ ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+ ; CHECK-NEXT: retl
+ %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0)
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
+
+
+define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
+ ; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps:
+ ; CHECK: ## BB#0:
+ ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
+ ; CHECK-NEXT: retl
+ %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0)
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
+ ; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps_256:
+ ; CHECK: ## BB#0:
+ ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+ ; CHECK-NEXT: retl
+ %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0)
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
+
+
+define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastb_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0)
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
+
+
+define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastb_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
+; CHECK-NEXT: retl
+ %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0)
+ ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
+
+
+define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastw_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0)
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
+
+
+define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastw_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
+; CHECK-NEXT: retl
+ %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0)
+ ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
+
+
+define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0)
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
+
+
+define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: retl
+ %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0)
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
+
+
+define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0)
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: retl
+ %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0)
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 5b607afef91c..606aca9dc02b 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -641,30 +641,6 @@ define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) {
declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
-define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
- ; CHECK: vbroadcastsd
- %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1]
- ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
-
-
-define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
- ; CHECK: vbroadcastss
- %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
-
-
-define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
- ; CHECK: vbroadcastss
- %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1]
- ret <8 x float> %res
-}
-declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
-
-
define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK: vpblendd
%res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1]
@@ -681,70 +657,6 @@ define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
-define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
- ; CHECK: vpbroadcastb
- %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
- ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
-
-
-define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) {
- ; CHECK: vpbroadcastb
- %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) ; <<32 x i8>> [#uses=1]
- ret <32 x i8> %res
-}
-declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
-
-
-define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) {
- ; CHECK: vpbroadcastw
- %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
- ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
-
-
-define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) {
- ; CHECK: vpbroadcastw
- %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) ; <<16 x i16>> [#uses=1]
- ret <16 x i16> %res
-}
-declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
-
-
-define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) {
- ; CHECK: vbroadcastss
- %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
-
-
-define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) {
- ; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}}
- %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) ; <<8 x i32>> [#uses=1]
- ret <8 x i32> %res
-}
-declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
-
-
-define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
- ; CHECK: vpbroadcastq
- %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
-
-
-define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
- ; CHECK: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}}
- %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
-
-
define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
; Check that the arguments are swapped between the intrinsic definition
; and its lowering. Indeed, the offsets are the first source in
@@ -756,15 +668,15 @@ define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
-define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) {
+define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) {
; Check that the arguments are swapped between the intrinsic definition
; and its lowering. Indeed, the offsets are the first source in
; the instruction.
; CHECK: vpermps %ymm0, %ymm1, %ymm0
- %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+ %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
-declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly
+declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
diff --git a/test/CodeGen/X86/avx2-nontemporal.ll b/test/CodeGen/X86/avx2-nontemporal.ll
index 4d28a979712a..058358f13b86 100644
--- a/test/CodeGen/X86/avx2-nontemporal.ll
+++ b/test/CodeGen/X86/avx2-nontemporal.ll
@@ -1,21 +1,18 @@
; RUN: llc < %s -march=x86 -mattr=+avx2 | FileCheck %s
-define void @f(<8 x float> %A, i8* %B, <4 x double> %C, i32 %D, <4 x i64> %E) {
-; CHECK: vmovntps
+define void @f(<8 x float> %A, i8* %B, <4 x double> %C, <4 x i64> %E) {
+; CHECK: vmovntps %y
%cast = bitcast i8* %B to <8 x float>*
%A2 = fadd <8 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
- store <8 x float> %A2, <8 x float>* %cast, align 16, !nontemporal !0
-; CHECK: vmovntdq
+ store <8 x float> %A2, <8 x float>* %cast, align 32, !nontemporal !0
+; CHECK: vmovntdq %y
%cast1 = bitcast i8* %B to <4 x i64>*
%E2 = add <4 x i64> %E, <i64 1, i64 2, i64 3, i64 4>
- store <4 x i64> %E2, <4 x i64>* %cast1, align 16, !nontemporal !0
-; CHECK: vmovntpd
+ store <4 x i64> %E2, <4 x i64>* %cast1, align 32, !nontemporal !0
+; CHECK: vmovntpd %y
%cast2 = bitcast i8* %B to <4 x double>*
%C2 = fadd <4 x double> %C, <double 0x0, double 0x0, double 0x0, double 0x4200000000000000>
- store <4 x double> %C2, <4 x double>* %cast2, align 16, !nontemporal !0
-; CHECK: movnti
- %cast3 = bitcast i8* %B to i32*
- store i32 %D, i32* %cast3, align 16, !nontemporal !0
+ store <4 x double> %C2, <4 x double>* %cast2, align 32, !nontemporal !0
ret void
}
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
index 94dcdcabdd33..6b77edb155a4 100644
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -1,7 +1,11 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s
-; CHECK: vpbroadcastb (%
define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: BB16:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0
+; CHECK-NEXT: retq
entry:
%q = load i8, i8* %ptr, align 4
%q0 = insertelement <16 x i8> undef, i8 %q, i32 0
@@ -22,8 +26,12 @@ entry:
%qf = insertelement <16 x i8> %qe, i8 %q, i32 15
ret <16 x i8> %qf
}
-; CHECK: vpbroadcastb (%
+
define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: BB32:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0
+; CHECK-NEXT: retq
entry:
%q = load i8, i8* %ptr, align 4
%q0 = insertelement <32 x i8> undef, i8 %q, i32 0
@@ -61,9 +69,12 @@ entry:
%q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
ret <32 x i8> %q2f
}
-; CHECK: vpbroadcastw (%
define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: W16:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0
+; CHECK-NEXT: retq
entry:
%q = load i16, i16* %ptr, align 4
%q0 = insertelement <8 x i16> undef, i16 %q, i32 0
@@ -76,8 +87,12 @@ entry:
%q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
ret <8 x i16> %q7
}
-; CHECK: vpbroadcastw (%
+
define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: WW16:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0
+; CHECK-NEXT: retq
entry:
%q = load i16, i16* %ptr, align 4
%q0 = insertelement <16 x i16> undef, i16 %q, i32 0
@@ -98,8 +113,12 @@ entry:
%qf = insertelement <16 x i16> %qe, i16 %q, i32 15
ret <16 x i16> %qf
}
-; CHECK: vbroadcastss (%
+
define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: D32:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT: retq
entry:
%q = load i32, i32* %ptr, align 4
%q0 = insertelement <4 x i32> undef, i32 %q, i32 0
@@ -108,8 +127,12 @@ entry:
%q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
ret <4 x i32> %q3
}
-; CHECK: vbroadcastss (%
+
define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: DD32:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
+; CHECK-NEXT: retq
entry:
%q = load i32, i32* %ptr, align 4
%q0 = insertelement <8 x i32> undef, i32 %q, i32 0
@@ -122,16 +145,24 @@ entry:
%q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
ret <8 x i32> %q7
}
-; CHECK: vpbroadcastq (%
+
define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: Q64:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0
+; CHECK-NEXT: retq
entry:
%q = load i64, i64* %ptr, align 4
%q0 = insertelement <2 x i64> undef, i64 %q, i32 0
%q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
ret <2 x i64> %q1
}
-; CHECK: vbroadcastsd (%
+
define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: QQ64:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
+; CHECK-NEXT: retq
entry:
%q = load i64, i64* %ptr, align 4
%q0 = insertelement <4 x i64> undef, i64 %q, i32 0
@@ -141,9 +172,214 @@ entry:
ret <4 x i64> %q3
}
+; FIXME: Pointer adjusted broadcasts
+
+define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_16i8_16i8_1111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastb 1(%rdi), %xmm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <16 x i8>, <16 x i8>* %ptr
+ %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i8> %ret
+}
+
+define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <16 x i8>, <16 x i8>* %ptr
+ %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <32 x i8> %ret
+}
+
+define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <32 x i8>, <32 x i8>* %ptr
+ %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <32 x i8> %ret
+}
+
+define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i16_8i16_11111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastw 2(%rdi), %xmm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x i16>, <8 x i16>* %ptr
+ %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i16> %ret
+}
+
+define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_16i16_8i16_1111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x i16>, <8 x i16>* %ptr
+ %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i16> %ret
+}
+
+define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_16i16_16i16_1111111111111111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <16 x i16>, <16 x i16>* %ptr
+ %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i16> %ret
+}
+
+define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i32_4i32_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i32>, <4 x i32>* %ptr
+ %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_4i32_33333333:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i32>, <4 x i32>* %ptr
+ %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_8i32_55555555:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x i32>, <8 x i32>* %ptr
+ %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <8 x i32> %ret
+}
+
+define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f32_4f32_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x float>, <4 x float>* %ptr
+ %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_4f32_33333333:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x float>, <4 x float>* %ptr
+ %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_8f32_55555555:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <8 x float>, <8 x float>* %ptr
+ %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <8 x float> %ret
+}
+
+define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2i64_2i64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastq 8(%rdi), %xmm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x i64>, <2 x i64>* %ptr
+ %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_2i64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x i64>, <2 x i64>* %ptr
+ %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_4i64_2222:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x i64>, <4 x i64>* %ptr
+ %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ ret <4 x i64> %ret
+}
+
+define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2f64_2f64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x double>, <2 x double>* %ptr
+ %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_2f64_1111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <2 x double>, <2 x double>* %ptr
+ %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_4f64_2222:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
+; CHECK-NEXT: retq
+entry:
+ %ld = load <4 x double>, <4 x double>* %ptr
+ %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+ ret <4 x double> %ret
+}
+
; make sure that we still don't support broadcast double into 128-bit vector
; this used to crash
define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: I:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; CHECK-NEXT: retq
entry:
%q = load double, double* %ptr, align 4
%vecinit.i = insertelement <2 x double> undef, double %q, i32 0
@@ -151,28 +387,33 @@ entry:
ret <2 x double> %vecinit2.i
}
-; CHECK: V111
-; CHECK: vpbroadcastd
-; CHECK: ret
define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
+; CHECK-LABEL: V111:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
+; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
ret <8 x i32> %g
}
-; CHECK: V113
-; CHECK: vbroadcastss
-; CHECK: ret
define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
+; CHECK-LABEL: V113:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
+; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
ret <8 x float> %g
}
-; CHECK: _e2
-; CHECK: vbroadcastss
-; CHECK: ret
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _e2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
%vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
@@ -180,10 +421,11 @@ define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
ret <4 x float> %vecinit6.i
}
-; CHECK: _e4
-; CHECK-NOT: broadcast
-; CHECK: ret
define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _e4:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
+; CHECK-NEXT: retq
%vecinit0.i = insertelement <8 x i8> undef, i8 52, i32 0
%vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
%vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
@@ -197,6 +439,17 @@ define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
define void @crash() nounwind alwaysinline {
+; CHECK-LABEL: crash:
+; CHECK: ## BB#0: ## %WGLoopsEntry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je LBB31_1
+; CHECK-NEXT: ## BB#2: ## %ret
+; CHECK-NEXT: retq
+; CHECK-NEXT: .align 4, 0x90
+; CHECK-NEXT: LBB31_1: ## %footer349VF
+; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: jmp LBB31_1
WGLoopsEntry:
br i1 undef, label %ret, label %footer329VF
@@ -223,135 +476,151 @@ ret:
ret void
}
-; CHECK: _inreg0
-; CHECK: broadcastss
-; CHECK: ret
define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _inreg0:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovd %edi, %xmm0
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: retq
%in = insertelement <8 x i32> undef, i32 %scalar, i32 0
%wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
ret <8 x i32> %wide
}
-; CHECK: _inreg1
-; CHECK: broadcastss
-; CHECK: ret
define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _inreg1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: retq
%in = insertelement <8 x float> undef, float %scalar, i32 0
%wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
ret <8 x float> %wide
}
-; CHECK: _inreg2
-; CHECK: broadcastss
-; CHECK: ret
define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _inreg2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT: retq
%in = insertelement <4 x float> undef, float %scalar, i32 0
%wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %wide
}
-; CHECK: _inreg3
-; CHECK: broadcastsd
-; CHECK: ret
define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _inreg3:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: retq
%in = insertelement <4 x double> undef, double %scalar, i32 0
%wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %wide
}
-;CHECK-LABEL: _inreg8xfloat:
-;CHECK: vbroadcastss
-;CHECK: ret
define <8 x float> @_inreg8xfloat(<8 x float> %a) {
+; CHECK-LABEL: _inreg8xfloat:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: retq
%b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
ret <8 x float> %b
}
-;CHECK-LABEL: _inreg4xfloat:
-;CHECK: vbroadcastss
-;CHECK: ret
define <4 x float> @_inreg4xfloat(<4 x float> %a) {
+; CHECK-LABEL: _inreg4xfloat:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT: retq
%b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %b
}
-;CHECK-LABEL: _inreg16xi16:
-;CHECK: vpbroadcastw
-;CHECK: ret
define <16 x i16> @_inreg16xi16(<16 x i16> %a) {
+; CHECK-LABEL: _inreg16xi16:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
+; CHECK-NEXT: retq
%b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
ret <16 x i16> %b
}
-;CHECK-LABEL: _inreg8xi16:
-;CHECK: vpbroadcastw
-;CHECK: ret
define <8 x i16> @_inreg8xi16(<8 x i16> %a) {
+; CHECK-LABEL: _inreg8xi16:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
+; CHECK-NEXT: retq
%b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %b
}
-
-;CHECK-LABEL: _inreg4xi64:
-;CHECK: vbroadcastsd
-;CHECK: ret
define <4 x i64> @_inreg4xi64(<4 x i64> %a) {
+; CHECK-LABEL: _inreg4xi64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: retq
%b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
ret <4 x i64> %b
}
-;CHECK-LABEL: _inreg2xi64:
-;CHECK: vpbroadcastq
-;CHECK: ret
define <2 x i64> @_inreg2xi64(<2 x i64> %a) {
+; CHECK-LABEL: _inreg2xi64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
+; CHECK-NEXT: retq
%b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %b
}
-;CHECK-LABEL: _inreg4xdouble:
-;CHECK: vbroadcastsd
-;CHECK: ret
define <4 x double> @_inreg4xdouble(<4 x double> %a) {
+; CHECK-LABEL: _inreg4xdouble:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: retq
%b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %b
}
-;CHECK-LABEL: _inreg2xdouble:
-;CHECK: vmovddup
-;CHECK: ret
define <2 x double> @_inreg2xdouble(<2 x double> %a) {
+; CHECK-LABEL: _inreg2xdouble:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; CHECK-NEXT: retq
%b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %b
}
-;CHECK-LABEL: _inreg8xi32:
-;CHECK: vbroadcastss
-;CHECK: ret
define <8 x i32> @_inreg8xi32(<8 x i32> %a) {
+; CHECK-LABEL: _inreg8xi32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: retq
%b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
ret <8 x i32> %b
}
-;CHECK-LABEL: _inreg4xi32:
-;CHECK: vbroadcastss
-;CHECK: ret
define <4 x i32> @_inreg4xi32(<4 x i32> %a) {
+; CHECK-LABEL: _inreg4xi32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT: retq
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %b
}
-;CHECK-LABEL: _inreg32xi8:
-;CHECK: vpbroadcastb
-;CHECK: ret
define <32 x i8> @_inreg32xi8(<32 x i8> %a) {
+; CHECK-LABEL: _inreg32xi8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
+; CHECK-NEXT: retq
%b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
ret <32 x i8> %b
}
-;CHECK-LABEL: _inreg16xi8:
-;CHECK: vpbroadcastb
-;CHECK: ret
define <16 x i8> @_inreg16xi8(<16 x i8> %a) {
+; CHECK-LABEL: _inreg16xi8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
+; CHECK-NEXT: retq
%b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %b
}
@@ -360,11 +629,11 @@ define <16 x i8> @_inreg16xi8(<16 x i8> %a) {
; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
; (via the insertelements).
-; CHECK-LABEL: splat_concat1
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastss
-; CHECK-NEXT: ret
define <8 x float> @splat_concat1(float %f) {
+; CHECK-LABEL: splat_concat1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: retq
%1 = insertelement <4 x float> undef, float %f, i32 0
%2 = insertelement <4 x float> %1, float %f, i32 1
%3 = insertelement <4 x float> %2, float %f, i32 2
@@ -373,11 +642,11 @@ define <8 x float> @splat_concat1(float %f) {
ret <8 x float> %5
}
-; CHECK-LABEL: splat_concat2
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastss
-; CHECK-NEXT: ret
define <8 x float> @splat_concat2(float %f) {
+; CHECK-LABEL: splat_concat2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: retq
%1 = insertelement <4 x float> undef, float %f, i32 0
%2 = insertelement <4 x float> %1, float %f, i32 1
%3 = insertelement <4 x float> %2, float %f, i32 2
@@ -390,22 +659,22 @@ define <8 x float> @splat_concat2(float %f) {
ret <8 x float> %9
}
-; CHECK-LABEL: splat_concat3
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastsd
-; CHECK-NEXT: ret
define <4 x double> @splat_concat3(double %d) {
+; CHECK-LABEL: splat_concat3:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: retq
%1 = insertelement <2 x double> undef, double %d, i32 0
%2 = insertelement <2 x double> %1, double %d, i32 1
%3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
ret <4 x double> %3
}
-; CHECK-LABEL: splat_concat4
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastsd
-; CHECK-NEXT: ret
define <4 x double> @splat_concat4(double %d) {
+; CHECK-LABEL: splat_concat4:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: retq
%1 = insertelement <2 x double> undef, double %d, i32 0
%2 = insertelement <2 x double> %1, double %d, i32 1
%3 = insertelement <2 x double> undef, double %d, i32 0
diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll
index 1ecd1007905a..9220e4f269cd 100644
--- a/test/CodeGen/X86/avx512-arith.ll
+++ b/test/CodeGen/X86/avx512-arith.ll
@@ -1,4 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=AVX512DQ %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq -mattr=+avx512bw -mattr=+avx512vl| FileCheck --check-prefix=CHECK --check-prefix=SKX %s
define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
; CHECK-LABEL: addpd512:
@@ -83,18 +88,54 @@ entry:
}
define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
-; CHECK-LABEL: imulq512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
-; CHECK-NEXT: vpsrlq $32, %zmm0, %zmm3
-; CHECK-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
-; CHECK-NEXT: vpsllq $32, %zmm3, %zmm3
-; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm2
-; CHECK-NEXT: vpsrlq $32, %zmm1, %zmm1
-; CHECK-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vpsllq $32, %zmm0, %zmm0
-; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
-; CHECK-NEXT: retq
+; AVX512F-LABEL: imulq512:
+; AVX512F: ## BB#0:
+; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3
+; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
+; AVX512F-NEXT: vpsllq $32, %zmm3, %zmm3
+; AVX512F-NEXT: vpaddq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm1
+; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: vpsllq $32, %zmm0, %zmm0
+; AVX512F-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: imulq512:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
+; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3
+; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
+; AVX512VL-NEXT: vpsllq $32, %zmm3, %zmm3
+; AVX512VL-NEXT: vpaddq %zmm3, %zmm2, %zmm2
+; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm1
+; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
+; AVX512VL-NEXT: vpsllq $32, %zmm0, %zmm0
+; AVX512VL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: imulq512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm2
+; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3
+; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3
+; AVX512BW-NEXT: vpsllq $32, %zmm3, %zmm3
+; AVX512BW-NEXT: vpaddq %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm1
+; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpsllq $32, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: imulq512:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; SKX-LABEL: imulq512:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0
+; SKX-NEXT: retq
%z = mul <8 x i64>%x, %y
ret <8 x i64>%z
}
@@ -463,10 +504,13 @@ entry:
ret <8 x i64>%d
}
-; CHECK-LABEL: test_mask_vaddps
-; CHECK: vaddps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
+; CHECK-LABEL: test_mask_vaddps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
<16 x float> %j, <16 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -475,10 +519,13 @@ define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
ret <16 x float> %r
}
-; CHECK-LABEL: test_mask_vmulps
-; CHECK: vmulps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
+; CHECK-LABEL: test_mask_vmulps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
<16 x float> %j, <16 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -487,10 +534,13 @@ define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
ret <16 x float> %r
}
-; CHECK-LABEL: test_mask_vminps
-; CHECK: vminps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
+; CHECK-LABEL: test_mask_vminps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
<16 x float> %j, <16 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -500,10 +550,41 @@ define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
ret <16 x float> %r
}
-; CHECK-LABEL: test_mask_vminpd
-; CHECK: vminpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
+; AVX512F-LABEL: test_mask_vminpd:
+; AVX512F: ## BB#0:
+; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: test_mask_vminpd:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
+; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: test_mask_vminpd:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: test_mask_vminpd:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512DQ-NEXT: retq
+;
+; SKX-LABEL: test_mask_vminpd:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
+; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; SKX-NEXT: retq
<8 x double> %j, <8 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
@@ -513,10 +594,13 @@ define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
ret <8 x double> %r
}
-; CHECK-LABEL: test_mask_vmaxps
-; CHECK: vmaxps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
+; CHECK-LABEL: test_mask_vmaxps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
<16 x float> %j, <16 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -526,10 +610,41 @@ define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
ret <16 x float> %r
}
-; CHECK-LABEL: test_mask_vmaxpd
-; CHECK: vmaxpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
+; AVX512F-LABEL: test_mask_vmaxpd:
+; AVX512F: ## BB#0:
+; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: test_mask_vmaxpd:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
+; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: test_mask_vmaxpd:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: test_mask_vmaxpd:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512DQ-NEXT: retq
+;
+; SKX-LABEL: test_mask_vmaxpd:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1
+; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; SKX-NEXT: retq
<8 x double> %j, <8 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
@@ -539,10 +654,13 @@ define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
ret <8 x double> %r
}
-; CHECK-LABEL: test_mask_vsubps
-; CHECK: vsubps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
+; CHECK-LABEL: test_mask_vsubps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
<16 x float> %j, <16 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -551,10 +669,13 @@ define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
ret <16 x float> %r
}
-; CHECK-LABEL: test_mask_vdivps
-; CHECK: vdivps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
+; CHECK-LABEL: test_mask_vdivps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
+; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
<16 x float> %j, <16 x i32> %mask1)
nounwind readnone {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -563,10 +684,13 @@ define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
ret <16 x float> %r
}
-; CHECK-LABEL: test_mask_vaddpd
-; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
+; CHECK-LABEL: test_mask_vaddpd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT: vpcmpneqq %zmm4, %zmm3, %k1
+; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
<8 x double> %j, <8 x i64> %mask1)
nounwind readnone {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
@@ -575,10 +699,13 @@ define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
ret <8 x double> %r
}
-; CHECK-LABEL: test_maskz_vaddpd
-; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}}}
-; CHECK: ret
define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
+; CHECK-LABEL: test_maskz_vaddpd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
+; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
<8 x i64> %mask1) nounwind readnone {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%x = fadd <8 x double> %i, %j
@@ -586,10 +713,13 @@ define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
ret <8 x double> %r
}
-; CHECK-LABEL: test_mask_fold_vaddpd
-; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}.*}}
-; CHECK: ret
define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
+; CHECK-LABEL: test_mask_fold_vaddpd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1
+; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
<8 x double>* %j, <8 x i64> %mask1)
nounwind {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
@@ -599,10 +729,13 @@ define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
ret <8 x double> %r
}
-; CHECK-LABEL: test_maskz_fold_vaddpd
-; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}.*}}
-; CHECK: ret
define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
+; CHECK-LABEL: test_maskz_fold_vaddpd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
+; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
<8 x i64> %mask1) nounwind {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%tmp = load <8 x double>, <8 x double>* %j, align 8
@@ -611,10 +744,11 @@ define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
ret <8 x double> %r
}
-; CHECK-LABEL: test_broadcast_vaddpd
-; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*}}
-; CHECK: ret
define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
+; CHECK-LABEL: test_broadcast_vaddpd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT: retq
%tmp = load double, double* %j
%b = insertelement <8 x double> undef, double %tmp, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef,
@@ -623,10 +757,14 @@ define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind
ret <8 x double> %x
}
-; CHECK-LABEL: test_mask_broadcast_vaddpd
-; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]}.*}}
-; CHECK: ret
define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
+; CHECK-LABEL: test_mask_broadcast_vaddpd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1
+; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
double* %j, <8 x i64> %mask1) nounwind {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%tmp = load double, double* %j
@@ -638,10 +776,13 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double>
ret <8 x double> %r
}
-; CHECK-LABEL: test_maskz_broadcast_vaddpd
-; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]} {z}.*}}
-; CHECK: ret
define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
+; CHECK-LABEL: test_maskz_broadcast_vaddpd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1
+; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
<8 x i64> %mask1) nounwind {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%tmp = load double, double* %j
@@ -652,3 +793,104 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
ret <8 x double> %r
}
+
+define <16 x float> @test_fxor(<16 x float> %a) {
+; AVX512F-LABEL: test_fxor:
+; AVX512F: ## BB#0:
+; AVX512F-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: test_fxor:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: test_fxor:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: test_fxor:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; SKX-LABEL: test_fxor:
+; SKX: ## BB#0:
+; SKX-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0
+; SKX-NEXT: retq
+
+ %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+ ret <16 x float>%res
+}
+
+define <8 x float> @test_fxor_8f32(<8 x float> %a) {
+; CHECK-LABEL: test_fxor_8f32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+ ret <8 x float>%res
+}
+
+define <8 x double> @fabs_v8f64(<8 x double> %p)
+; AVX512F-LABEL: fabs_v8f64:
+; AVX512F: ## BB#0:
+; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fabs_v8f64:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: fabs_v8f64:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: fabs_v8f64:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; SKX-LABEL: fabs_v8f64:
+; SKX: ## BB#0:
+; SKX-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
+; SKX-NEXT: retq
+{
+ %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
+ ret <8 x double> %t
+}
+declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
+
+define <16 x float> @fabs_v16f32(<16 x float> %p)
+; AVX512F-LABEL: fabs_v16f32:
+; AVX512F: ## BB#0:
+; AVX512F-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fabs_v16f32:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: fabs_v16f32:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: fabs_v16f32:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; SKX-LABEL: fabs_v16f32:
+; SKX: ## BB#0:
+; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
+; SKX-NEXT: retq
+{
+ %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
+ ret <16 x float> %t
+}
+declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
diff --git a/test/CodeGen/X86/avx512-bugfix-25270.ll b/test/CodeGen/X86/avx512-bugfix-25270.ll
new file mode 100644
index 000000000000..d024475274b4
--- /dev/null
+++ b/test/CodeGen/X86/avx512-bugfix-25270.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+declare void @Print__512(<16 x i32>) #0
+
+define void @bar__512(<16 x i32>* %var) #0 {
+; CHECK-LABEL: bar__512:
+; CHECK: ## BB#0: ## %allocas
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: subq $112, %rsp
+; CHECK-NEXT: movq %rdi, %rbx
+; CHECK-NEXT: vmovdqu32 (%rbx), %zmm0
+; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill
+; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1
+; CHECK-NEXT: vmovdqa32 %zmm1, (%rbx)
+; CHECK-NEXT: callq _Print__512
+; CHECK-NEXT: vmovups (%rsp), %zmm0 ## 64-byte Reload
+; CHECK-NEXT: callq _Print__512
+; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0
+; CHECK-NEXT: vmovdqa32 %zmm0, (%rbx)
+; CHECK-NEXT: addq $112, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: retq
+allocas:
+ %var_load_load = load <16 x i32>, <16 x i32>* %var, align 1
+ store <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>, <16 x i32>* %var, align 64
+ call void @Print__512(<16 x i32> %var_load_load)
+ ; %var_load_load value should be reloaded
+ call void @Print__512(<16 x i32> %var_load_load)
+ store <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32>* %var, align 64
+ ret void
+}
+
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/avx512-build-vector.ll b/test/CodeGen/X86/avx512-build-vector.ll
index e5373c575c1a..0f89aa71162e 100644
--- a/test/CodeGen/X86/avx512-build-vector.ll
+++ b/test/CodeGen/X86/avx512-build-vector.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
define <16 x i32> @test2(<16 x i32> %x) {
diff --git a/test/CodeGen/X86/avx512-calling-conv.ll b/test/CodeGen/X86/avx512-calling-conv.ll
index edb6bef1a4ac..a61aeba5aff9 100644
--- a/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/test/CodeGen/X86/avx512-calling-conv.ll
@@ -1,55 +1,167 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL_X64 --check-prefix=SKX
; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
-; KNL-LABEL: test1
-; KNL: vxorps
define <16 x i1> @test1() {
+; ALL_X64-LABEL: test1:
+; ALL_X64: ## BB#0:
+; ALL_X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; ALL_X64-NEXT: retq
+;
+; KNL_X32-LABEL: test1:
+; KNL_X32: ## BB#0:
+; KNL_X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; KNL_X32-NEXT: retl
ret <16 x i1> zeroinitializer
}
-; SKX-LABEL: test2
-; SKX: vpmovb2m
-; SKX: vpmovb2m
-; SKX: kandw
-; SKX: vpmovm2b
-; KNL-LABEL: test2
-; KNL: vpmovsxbd
-; KNL: vpmovsxbd
-; KNL: vpandd
-; KNL: vpmovdb
define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) {
+; KNL-LABEL: test2:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
+; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test2:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
+; SKX-NEXT: vpmovb2m %xmm1, %k0
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k1
+; SKX-NEXT: kandw %k0, %k1, %k0
+; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: retq
+;
+; KNL_X32-LABEL: test2:
+; KNL_X32: ## BB#0:
+; KNL_X32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_X32-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL_X32-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL_X32-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL_X32-NEXT: vptestmd %zmm0, %zmm0, %k1
+; KNL_X32-NEXT: vptestmd %zmm1, %zmm1, %k1 {%k1}
+; KNL_X32-NEXT: vpbroadcastd LCPI1_0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0
+; KNL_X32-NEXT: retl
%c = and <16 x i1>%a, %b
ret <16 x i1> %c
}
-; SKX-LABEL: test3
-; SKX: vpmovw2m
-; SKX: vpmovw2m
-; SKX: kandb
-; SKX: vpmovm2w
define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) {
+; KNL-LABEL: test3:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 {%k1}
+; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovqw %zmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test3:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
+; SKX-NEXT: vpmovw2m %xmm1, %k0
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k1
+; SKX-NEXT: kandb %k0, %k1, %k0
+; SKX-NEXT: vpmovm2w %k0, %xmm0
+; SKX-NEXT: retq
+;
+; KNL_X32-LABEL: test3:
+; KNL_X32: ## BB#0:
+; KNL_X32-NEXT: vpmovsxwq %xmm1, %zmm1
+; KNL_X32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [63,0,63,0,63,0,63,0,63,0,63,0,63,0,63,0]
+; KNL_X32-NEXT: vpsllvq %zmm2, %zmm1, %zmm1
+; KNL_X32-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL_X32-NEXT: vpsllvq %zmm2, %zmm0, %zmm0
+; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL_X32-NEXT: vptestmq %zmm1, %zmm1, %k1 {%k1}
+; KNL_X32-NEXT: vpbroadcastd LCPI2_1, %zmm0
+; KNL_X32-NEXT: vmovdqu64 %zmm0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT: vpmovqw %zmm0, %xmm0
+; KNL_X32-NEXT: retl
%c = and <8 x i1>%a, %b
ret <8 x i1> %c
}
-; SKX-LABEL: test4
-; SKX: vpmovd2m
-; SKX: vpmovd2m
-; SKX: kandw
-; SKX: vpmovm2d
define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) {
+; KNL-LABEL: test4:
+; KNL: ## BB#0:
+; KNL-NEXT: vandps %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test4:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k0
+; SKX-NEXT: vpslld $31, %xmm1, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: kandw %k1, %k0, %k0
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: retq
+;
+; KNL_X32-LABEL: test4:
+; KNL_X32: ## BB#0:
+; KNL_X32-NEXT: vandps %xmm1, %xmm0, %xmm0
+; KNL_X32-NEXT: retl
%c = and <4 x i1>%a, %b
ret <4 x i1> %c
}
-; SKX-LABEL: test5
-; SKX: vpcmpgtd
-; SKX: vpmovm2w
-; SKX: call
-; SKX: vpmovzxwd
declare <8 x i1> @func8xi1(<8 x i1> %a)
+
define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) {
+; KNL-LABEL: test5:
+; KNL: ## BB#0:
+; KNL-NEXT: pushq %rax
+; KNL-NEXT: Ltmp0:
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
+; KNL-NEXT: vpmovdw %zmm0, %ymm0
+; KNL-NEXT: callq _func8xi1
+; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; KNL-NEXT: vpslld $31, %ymm0, %ymm0
+; KNL-NEXT: vpsrad $31, %ymm0, %ymm0
+; KNL-NEXT: popq %rax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test5:
+; SKX: ## BB#0:
+; SKX-NEXT: pushq %rax
+; SKX-NEXT: Ltmp0:
+; SKX-NEXT: .cfi_def_cfa_offset 16
+; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
+; SKX-NEXT: vpmovm2w %k0, %xmm0
+; SKX-NEXT: callq _func8xi1
+; SKX-NEXT: vpmovzxwd %xmm0, %ymm0
+; SKX-NEXT: vpslld $31, %ymm0, %ymm0
+; SKX-NEXT: vpsrad $31, %ymm0, %ymm0
+; SKX-NEXT: popq %rax
+; SKX-NEXT: retq
+;
+; KNL_X32-LABEL: test5:
+; KNL_X32: ## BB#0:
+; KNL_X32-NEXT: subl $12, %esp
+; KNL_X32-NEXT: Ltmp0:
+; KNL_X32-NEXT: .cfi_def_cfa_offset 16
+; KNL_X32-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
+; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0
+; KNL_X32-NEXT: calll L_func8xi1$stub
+; KNL_X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; KNL_X32-NEXT: vpslld $31, %ymm0, %ymm0
+; KNL_X32-NEXT: vpsrad $31, %ymm0, %ymm0
+; KNL_X32-NEXT: addl $12, %esp
+; KNL_X32-NEXT: retl
%cmpRes = icmp sgt <8 x i32>%a, %b
%resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
%res = sext <8 x i1>%resi to <8 x i32>
@@ -58,14 +170,50 @@ define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) {
declare <16 x i1> @func16xi1(<16 x i1> %a)
-; KNL-LABEL: test6
-; KNL: vpbroadcastd
-; KNL: vpmovdb
-; KNL: call
-; KNL: vpmovzxbd
-; KNL: vpslld $31, %zmm
-; KNL: vpsrad $31, %zmm
define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
+; KNL-LABEL: test6:
+; KNL: ## BB#0:
+; KNL-NEXT: pushq %rax
+; KNL-NEXT: Ltmp1:
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
+; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: callq _func16xi1
+; KNL-NEXT: vpmovzxbd %xmm0, %zmm0
+; KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL-NEXT: vpsrad $31, %zmm0, %zmm0
+; KNL-NEXT: popq %rax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test6:
+; SKX: ## BB#0:
+; SKX-NEXT: pushq %rax
+; SKX-NEXT: Ltmp1:
+; SKX-NEXT: .cfi_def_cfa_offset 16
+; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: callq _func16xi1
+; SKX-NEXT: vpmovzxbd %xmm0, %zmm0
+; SKX-NEXT: vpslld $31, %zmm0, %zmm0
+; SKX-NEXT: vpsrad $31, %zmm0, %zmm0
+; SKX-NEXT: popq %rax
+; SKX-NEXT: retq
+;
+; KNL_X32-LABEL: test6:
+; KNL_X32: ## BB#0:
+; KNL_X32-NEXT: subl $12, %esp
+; KNL_X32-NEXT: Ltmp1:
+; KNL_X32-NEXT: .cfi_def_cfa_offset 16
+; KNL_X32-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
+; KNL_X32-NEXT: vpbroadcastd LCPI5_0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0
+; KNL_X32-NEXT: calll L_func16xi1$stub
+; KNL_X32-NEXT: vpmovzxbd %xmm0, %zmm0
+; KNL_X32-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL_X32-NEXT: vpsrad $31, %zmm0, %zmm0
+; KNL_X32-NEXT: addl $12, %esp
+; KNL_X32-NEXT: retl
%cmpRes = icmp sgt <16 x i32>%a, %b
%resi = call <16 x i1> @func16xi1(<16 x i1> %cmpRes)
%res = sext <16 x i1>%resi to <16 x i32>
@@ -73,82 +221,265 @@ define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
}
declare <4 x i1> @func4xi1(<4 x i1> %a)
-; SKX-LABEL: test7
-; SKX: vpmovm2d
-; SKX: call
-; SKX: vpslld $31, %xmm
-; SKX: vpsrad $31, %xmm
define <4 x i32> @test7(<4 x i32>%a, <4 x i32>%b) {
+; KNL-LABEL: test7:
+; KNL: ## BB#0:
+; KNL-NEXT: pushq %rax
+; KNL-NEXT: Ltmp2:
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; KNL-NEXT: callq _func4xi1
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: popq %rax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test7:
+; SKX: ## BB#0:
+; SKX-NEXT: pushq %rax
+; SKX-NEXT: Ltmp2:
+; SKX-NEXT: .cfi_def_cfa_offset 16
+; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: callq _func4xi1
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpsrad $31, %xmm0, %xmm0
+; SKX-NEXT: popq %rax
+; SKX-NEXT: retq
+;
+; KNL_X32-LABEL: test7:
+; KNL_X32: ## BB#0:
+; KNL_X32-NEXT: subl $12, %esp
+; KNL_X32-NEXT: Ltmp2:
+; KNL_X32-NEXT: .cfi_def_cfa_offset 16
+; KNL_X32-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; KNL_X32-NEXT: calll L_func4xi1$stub
+; KNL_X32-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL_X32-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL_X32-NEXT: addl $12, %esp
+; KNL_X32-NEXT: retl
%cmpRes = icmp sgt <4 x i32>%a, %b
%resi = call <4 x i1> @func4xi1(<4 x i1> %cmpRes)
%res = sext <4 x i1>%resi to <4 x i32>
ret <4 x i32> %res
}
-; SKX-LABEL: test7a
-; SKX: call
-; SKX: vpmovw2m %xmm0, %k0
-; SKX: kandb
define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
+; KNL-LABEL: test7a:
+; KNL: ## BB#0:
+; KNL-NEXT: pushq %rax
+; KNL-NEXT: Ltmp3:
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
+; KNL-NEXT: vpmovdw %zmm0, %ymm0
+; KNL-NEXT: callq _func8xi1
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: movb $85, %al
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 {%k1}
+; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovqw %zmm0, %xmm0
+; KNL-NEXT: popq %rax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test7a:
+; SKX: ## BB#0:
+; SKX-NEXT: pushq %rax
+; SKX-NEXT: Ltmp3:
+; SKX-NEXT: .cfi_def_cfa_offset 16
+; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
+; SKX-NEXT: vpmovm2w %k0, %xmm0
+; SKX-NEXT: callq _func8xi1
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k0
+; SKX-NEXT: movb $85, %al
+; SKX-NEXT: kmovb %eax, %k1
+; SKX-NEXT: kandb %k1, %k0, %k0
+; SKX-NEXT: vpmovm2w %k0, %xmm0
+; SKX-NEXT: popq %rax
+; SKX-NEXT: retq
+;
+; KNL_X32-LABEL: test7a:
+; KNL_X32: ## BB#0:
+; KNL_X32-NEXT: subl $12, %esp
+; KNL_X32-NEXT: Ltmp3:
+; KNL_X32-NEXT: .cfi_def_cfa_offset 16
+; KNL_X32-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
+; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0
+; KNL_X32-NEXT: calll L_func8xi1$stub
+; KNL_X32-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL_X32-NEXT: vpsllvq LCPI7_0, %zmm0, %zmm0
+; KNL_X32-NEXT: movb $85, %al
+; KNL_X32-NEXT: movzbl %al, %eax
+; KNL_X32-NEXT: kmovw %eax, %k1
+; KNL_X32-NEXT: vptestmq %zmm0, %zmm0, %k1 {%k1}
+; KNL_X32-NEXT: vpbroadcastd LCPI7_1, %zmm0
+; KNL_X32-NEXT: vmovdqu64 %zmm0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT: vpmovqw %zmm0, %xmm0
+; KNL_X32-NEXT: addl $12, %esp
+; KNL_X32-NEXT: retl
%cmpRes = icmp sgt <8 x i32>%a, %b
%resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
%res = and <8 x i1>%resi, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
ret <8 x i1> %res
}
-
-; KNL_X32-LABEL: test8
-; KNL_X32: testb $1, 4(%esp)
-; KNL_X32:jne
-
-; KNL-LABEL: test8
-; KNL: testb $1, %dil
-; KNL:jne
-
define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) {
+; ALL_X64-LABEL: test8:
+; ALL_X64: ## BB#0:
+; ALL_X64-NEXT: testb $1, %dil
+; ALL_X64-NEXT: jne LBB8_2
+; ALL_X64-NEXT: ## BB#1:
+; ALL_X64-NEXT: vmovaps %zmm1, %zmm0
+; ALL_X64-NEXT: LBB8_2:
+; ALL_X64-NEXT: retq
+;
+; KNL_X32-LABEL: test8:
+; KNL_X32: ## BB#0:
+; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
+; KNL_X32-NEXT: jne LBB8_2
+; KNL_X32-NEXT: ## BB#1:
+; KNL_X32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_X32-NEXT: LBB8_2:
+; KNL_X32-NEXT: retl
%res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2
ret <16 x i8> %res
}
-; KNL-LABEL: test9
-; KNL: vucomisd
-; KNL: setb
define i1 @test9(double %a, double %b) {
+; ALL_X64-LABEL: test9:
+; ALL_X64: ## BB#0:
+; ALL_X64-NEXT: vucomisd %xmm0, %xmm1
+; ALL_X64-NEXT: setb %al
+; ALL_X64-NEXT: retq
+;
+; KNL_X32-LABEL: test9:
+; KNL_X32: ## BB#0:
+; KNL_X32-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0
+; KNL_X32-NEXT: vucomisd {{[0-9]+}}(%esp), %xmm0
+; KNL_X32-NEXT: setb %al
+; KNL_X32-NEXT: retl
%c = fcmp ugt double %a, %b
ret i1 %c
}
-; KNL_X32-LABEL: test10
-; KNL_X32: testb $1, 12(%esp)
-; KNL_X32: cmovnel
-
-; KNL-LABEL: test10
-; KNL: testb $1, %dl
-; KNL: cmovel
define i32 @test10(i32 %a, i32 %b, i1 %cond) {
+; ALL_X64-LABEL: test10:
+; ALL_X64: ## BB#0:
+; ALL_X64-NEXT: testb $1, %dl
+; ALL_X64-NEXT: cmovel %esi, %edi
+; ALL_X64-NEXT: movl %edi, %eax
+; ALL_X64-NEXT: retq
+;
+; KNL_X32-LABEL: test10:
+; KNL_X32: ## BB#0:
+; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
+; KNL_X32-NEXT: leal {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; KNL_X32-NEXT: cmovnel %eax, %ecx
+; KNL_X32-NEXT: movl (%ecx), %eax
+; KNL_X32-NEXT: retl
%c = select i1 %cond, i32 %a, i32 %b
ret i32 %c
}
-; KNL-LABEL: test11
-; KNL: cmp
-; KNL: setg
define i1 @test11(i32 %a, i32 %b) {
+; ALL_X64-LABEL: test11:
+; ALL_X64: ## BB#0:
+; ALL_X64-NEXT: cmpl %esi, %edi
+; ALL_X64-NEXT: setg %al
+; ALL_X64-NEXT: retq
+;
+; KNL_X32-LABEL: test11:
+; KNL_X32: ## BB#0:
+; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: setg %al
+; KNL_X32-NEXT: retl
%c = icmp sgt i32 %a, %b
ret i1 %c
}
-; KNL-LABEL: test12
-; KNL: callq _test11
-;; return value in %al
-; KNL: movzbl %al, %ebx
-; KNL: callq _test10
-; KNL: testb $1, %bl
-
define i32 @test12(i32 %a1, i32 %a2, i32 %b1) {
+; ALL_X64-LABEL: test12:
+; ALL_X64: ## BB#0:
+; ALL_X64-NEXT: pushq %rbp
+; ALL_X64-NEXT: Ltmp4:
+; ALL_X64-NEXT: .cfi_def_cfa_offset 16
+; ALL_X64-NEXT: pushq %r14
+; ALL_X64-NEXT: Ltmp5:
+; ALL_X64-NEXT: .cfi_def_cfa_offset 24
+; ALL_X64-NEXT: pushq %rbx
+; ALL_X64-NEXT: Ltmp6:
+; ALL_X64-NEXT: .cfi_def_cfa_offset 32
+; ALL_X64-NEXT: Ltmp7:
+; ALL_X64-NEXT: .cfi_offset %rbx, -32
+; ALL_X64-NEXT: Ltmp8:
+; ALL_X64-NEXT: .cfi_offset %r14, -24
+; ALL_X64-NEXT: Ltmp9:
+; ALL_X64-NEXT: .cfi_offset %rbp, -16
+; ALL_X64-NEXT: movl %esi, %r14d
+; ALL_X64-NEXT: movl %edi, %ebp
+; ALL_X64-NEXT: movl %edx, %esi
+; ALL_X64-NEXT: callq _test11
+; ALL_X64-NEXT: movzbl %al, %ebx
+; ALL_X64-NEXT: movl %ebp, %edi
+; ALL_X64-NEXT: movl %r14d, %esi
+; ALL_X64-NEXT: movl %ebx, %edx
+; ALL_X64-NEXT: callq _test10
+; ALL_X64-NEXT: xorl %ecx, %ecx
+; ALL_X64-NEXT: testb $1, %bl
+; ALL_X64-NEXT: cmovel %ecx, %eax
+; ALL_X64-NEXT: popq %rbx
+; ALL_X64-NEXT: popq %r14
+; ALL_X64-NEXT: popq %rbp
+; ALL_X64-NEXT: retq
+;
+; KNL_X32-LABEL: test12:
+; KNL_X32: ## BB#0:
+; KNL_X32-NEXT: pushl %ebx
+; KNL_X32-NEXT: Ltmp4:
+; KNL_X32-NEXT: .cfi_def_cfa_offset 8
+; KNL_X32-NEXT: pushl %edi
+; KNL_X32-NEXT: Ltmp5:
+; KNL_X32-NEXT: .cfi_def_cfa_offset 12
+; KNL_X32-NEXT: pushl %esi
+; KNL_X32-NEXT: Ltmp6:
+; KNL_X32-NEXT: .cfi_def_cfa_offset 16
+; KNL_X32-NEXT: subl $16, %esp
+; KNL_X32-NEXT: Ltmp7:
+; KNL_X32-NEXT: .cfi_def_cfa_offset 32
+; KNL_X32-NEXT: Ltmp8:
+; KNL_X32-NEXT: .cfi_offset %esi, -16
+; KNL_X32-NEXT: Ltmp9:
+; KNL_X32-NEXT: .cfi_offset %edi, -12
+; KNL_X32-NEXT: Ltmp10:
+; KNL_X32-NEXT: .cfi_offset %ebx, -8
+; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; KNL_X32-NEXT: movl %edi, (%esp)
+; KNL_X32-NEXT: calll _test11
+; KNL_X32-NEXT: movb %al, %bl
+; KNL_X32-NEXT: movzbl %bl, %eax
+; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; KNL_X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; KNL_X32-NEXT: movl %edi, (%esp)
+; KNL_X32-NEXT: calll _test10
+; KNL_X32-NEXT: xorl %ecx, %ecx
+; KNL_X32-NEXT: testb $1, %bl
+; KNL_X32-NEXT: cmovel %ecx, %eax
+; KNL_X32-NEXT: addl $16, %esp
+; KNL_X32-NEXT: popl %esi
+; KNL_X32-NEXT: popl %edi
+; KNL_X32-NEXT: popl %ebx
+; KNL_X32-NEXT: retl
%cond = call i1 @test11(i32 %a1, i32 %b1)
%res = call i32 @test10(i32 %a1, i32 %a2, i1 %cond)
%res1 = select i1 %cond, i32 %res, i32 0
ret i32 %res1
-} \ No newline at end of file
+}
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index a211bcd38c9c..586a29545014 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s
; CHECK-LABEL: sitof32
; CHECK: vcvtdq2ps %zmm
@@ -8,6 +8,70 @@ define <16 x float> @sitof32(<16 x i32> %a) nounwind {
ret <16 x float> %b
}
+; CHECK-LABEL: sltof864
+; CHECK: vcvtqq2pd
+define <8 x double> @sltof864(<8 x i64> %a) {
+ %b = sitofp <8 x i64> %a to <8 x double>
+ ret <8 x double> %b
+}
+
+; CHECK-LABEL: sltof464
+; CHECK: vcvtqq2pd
+define <4 x double> @sltof464(<4 x i64> %a) {
+ %b = sitofp <4 x i64> %a to <4 x double>
+ ret <4 x double> %b
+}
+
+; CHECK-LABEL: sltof2f32
+; CHECK: vcvtqq2ps
+define <2 x float> @sltof2f32(<2 x i64> %a) {
+ %b = sitofp <2 x i64> %a to <2 x float>
+ ret <2 x float>%b
+}
+
+; CHECK-LABEL: sltof4f32_mem
+; CHECK: vcvtqq2psy (%rdi)
+define <4 x float> @sltof4f32_mem(<4 x i64>* %a) {
+ %a1 = load <4 x i64>, <4 x i64>* %a, align 8
+ %b = sitofp <4 x i64> %a1 to <4 x float>
+ ret <4 x float>%b
+}
+
+; CHECK-LABEL: f64tosl
+; CHECK: vcvttpd2qq
+define <4 x i64> @f64tosl(<4 x double> %a) {
+ %b = fptosi <4 x double> %a to <4 x i64>
+ ret <4 x i64> %b
+}
+
+; CHECK-LABEL: f32tosl
+; CHECK: vcvttps2qq
+define <4 x i64> @f32tosl(<4 x float> %a) {
+ %b = fptosi <4 x float> %a to <4 x i64>
+ ret <4 x i64> %b
+}
+
+; CHECK-LABEL: sltof432
+; CHECK: vcvtqq2ps
+define <4 x float> @sltof432(<4 x i64> %a) {
+ %b = sitofp <4 x i64> %a to <4 x float>
+ ret <4 x float> %b
+}
+
+; CHECK-LABEL: ultof432
+; CHECK: vcvtuqq2ps
+define <4 x float> @ultof432(<4 x i64> %a) {
+ %b = uitofp <4 x i64> %a to <4 x float>
+ ret <4 x float> %b
+}
+
+; CHECK-LABEL: ultof64
+; CHECK: vcvtuqq2pd
+define <8 x double> @ultof64(<8 x i64> %a) {
+ %b = uitofp <8 x i64> %a to <8 x double>
+ ret <8 x double> %b
+}
+
; CHECK-LABEL: fptosi00
; CHECK: vcvttps2dq %zmm
; CHECK: ret
@@ -64,16 +128,39 @@ define <8 x i32> @fptosi01(<8 x double> %a) {
ret <8 x i32> %b
}
+; CHECK-LABEL: fptosi03
+; CHECK: vcvttpd2dq %ymm
+; CHECK: ret
+define <4 x i32> @fptosi03(<4 x double> %a) {
+ %b = fptosi <4 x double> %a to <4 x i32>
+ ret <4 x i32> %b
+}
+
; CHECK-LABEL: fptrunc00
; CHECK: vcvtpd2ps %zmm
; CHECK-NEXT: vcvtpd2ps %zmm
-; CHECK-NEXT: vinsertf64x4 $1
+; CHECK-NEXT: vinsertf
; CHECK: ret
define <16 x float> @fptrunc00(<16 x double> %b) nounwind {
%a = fptrunc <16 x double> %b to <16 x float>
ret <16 x float> %a
}
+; CHECK-LABEL: fptrunc01
+; CHECK: vcvtpd2ps %ymm
+define <4 x float> @fptrunc01(<4 x double> %b) {
+ %a = fptrunc <4 x double> %b to <4 x float>
+ ret <4 x float> %a
+}
+
+; CHECK-LABEL: fptrunc02
+; CHECK: vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
+define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) {
+ %a = fptrunc <4 x double> %b to <4 x float>
+ %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
+ ret <4 x float> %c
+}
+
; CHECK-LABEL: fpext00
; CHECK: vcvtps2pd %ymm0, %zmm0
; CHECK: ret
@@ -82,6 +169,16 @@ define <8 x double> @fpext00(<8 x float> %b) nounwind {
ret <8 x double> %a
}
+; CHECK-LABEL: fpext01
+; CHECK: vcvtps2pd %xmm0, %ymm0 {%k1} {z}
+; CHECK: ret
+define <4 x double> @fpext01(<4 x float> %b, <4 x double>%b1, <4 x double>%a1) {
+ %a = fpext <4 x float> %b to <4 x double>
+ %mask = fcmp ogt <4 x double>%a1, %b1
+ %c = select <4 x i1>%mask, <4 x double>%a, <4 x double>zeroinitializer
+ ret <4 x double> %c
+}
+
; CHECK-LABEL: funcA
; CHECK: vcvtsi2sdq (%rdi){{.*}} encoding: [0x62
; CHECK: ret
@@ -182,12 +279,14 @@ define i32 @float_to_int(float %x) {
ret i32 %res
}
-; CHECK-LABEL: uitof64
-; CHECK: vcvtudq2pd
-; CHECK: vextracti64x4
-; CHECK: vcvtudq2pd
-; CHECK: ret
define <16 x double> @uitof64(<16 x i32> %a) nounwind {
+; CHECK-LABEL: uitof64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm2
+; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm0
+; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%b = uitofp <16 x i32> %a to <16 x double>
ret <16 x double> %b
}
@@ -257,7 +356,7 @@ define double @uitofp03(i32 %a) nounwind {
}
; CHECK-LABEL: @sitofp_16i1_float
-; CHECK: vpbroadcastd
+; CHECK: vpmovm2d
; CHECK: vcvtdq2ps
define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
%mask = icmp slt <16 x i32> %a, zeroinitializer
@@ -301,7 +400,7 @@ define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
; CHECK-LABEL: @sitofp_8i1_double
-; CHECK: vpbroadcastq
+; CHECK: vpmovm2d
; CHECK: vcvtdq2pd
define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
%cmpres = fcmp ogt <8 x double> %a, zeroinitializer
@@ -310,7 +409,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
}
; CHECK-LABEL: @uitofp_16i8
-; CHECK: vpmovzxbd
+; CHECK: vpmovzxbd
; CHECK: vcvtudq2ps
define <16 x float> @uitofp_16i8(<16 x i8>%a) {
%b = uitofp <16 x i8> %a to <16 x float>
diff --git a/test/CodeGen/X86/avx512-ext.ll b/test/CodeGen/X86/avx512-ext.ll
new file mode 100644
index 000000000000..bc1509684475
--- /dev/null
+++ b/test/CodeGen/X86/avx512-ext.ll
@@ -0,0 +1,1835 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
+
+define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x8mem_to_8x16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
+; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_8x8mem_to_8x16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k1
+; SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = zext <8 x i8> %a to <8 x i16>
+ %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_8x8mem_to_8x16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbw (%rdi), %xmm1
+; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
+; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_8x8mem_to_8x16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k1
+; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = sext <8 x i8> %a to <8 x i16>
+ %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+
+define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_16x8mem_to_16x16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
+; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
+; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_16x8mem_to_16x16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k1
+; SKX-NEXT: vpmovzxbw (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <16 x i8>,<16 x i8> *%i,align 1
+ %x = zext <16 x i8> %a to <16 x i16>
+ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+ ret <16 x i16> %ret
+}
+
+define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_16x8mem_to_16x16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT: vpmovsxbw (%rdi), %ymm1
+; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
+; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
+; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_16x8mem_to_16x16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k1
+; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <16 x i8>,<16 x i8> *%i,align 1
+ %x = sext <16 x i8> %a to <16 x i16>
+ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+ ret <16 x i16> %ret
+}
+
+define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
+; KNL-LABEL: zext_16x8_to_16x16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_16x8_to_16x16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovzxbw %xmm0, %ymm0
+; SKX-NEXT: retq
+ %x = zext <16 x i8> %a to <16 x i16>
+ ret <16 x i16> %x
+}
+
+define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_16x8_to_16x16_mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
+; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
+; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_16x8_to_16x16_mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
+; SKX-NEXT: vpmovb2m %xmm1, %k1
+; SKX-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %x = zext <16 x i8> %a to <16 x i16>
+ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+ ret <16 x i16> %ret
+}
+
+define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
+; ALL-LABEL: sext_16x8_to_16x16:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxbw %xmm0, %ymm0
+; ALL-NEXT: retq
+ %x = sext <16 x i8> %a to <16 x i16>
+ ret <16 x i16> %x
+}
+
+define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_16x8_to_16x16_mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
+; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
+; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
+; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_16x8_to_16x16_mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
+; SKX-NEXT: vpmovb2m %xmm1, %k1
+; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %x = sext <16 x i8> %a to <16 x i16>
+ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+ ret <16 x i16> %ret
+}
+
+define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_32x8mem_to_32x16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT: vpsllw $15, %ymm3, %ymm3
+; KNL-NEXT: vpsraw $15, %ymm3, %ymm3
+; KNL-NEXT: vpand %ymm2, %ymm3, %ymm2
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
+; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
+; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1
+; KNL-NEXT: vmovaps %zmm2, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_32x8mem_to_32x16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
+; SKX-NEXT: vpmovb2m %ymm0, %k1
+; SKX-NEXT: vpmovzxbw (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <32 x i8>,<32 x i8> *%i,align 1
+ %x = zext <32 x i8> %a to <32 x i16>
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
+}
+
+define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_32x8mem_to_32x16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm1
+; KNL-NEXT: vpmovsxbw (%rdi), %ymm2
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT: vpsllw $15, %ymm3, %ymm3
+; KNL-NEXT: vpsraw $15, %ymm3, %ymm3
+; KNL-NEXT: vpand %ymm2, %ymm3, %ymm2
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
+; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
+; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1
+; KNL-NEXT: vmovaps %zmm2, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_32x8mem_to_32x16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
+; SKX-NEXT: vpmovb2m %ymm0, %k1
+; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <32 x i8>,<32 x i8> *%i,align 1
+ %x = sext <32 x i8> %a to <32 x i16>
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
+}
+
+define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
+; KNL-LABEL: zext_32x8_to_32x16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT: vmovaps %zmm2, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_32x8_to_32x16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovzxbw %ymm0, %zmm0
+; SKX-NEXT: retq
+ %x = zext <32 x i8> %a to <32 x i16>
+ ret <32 x i16> %x
+}
+
+define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_32x8_to_32x16_mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; KNL-NEXT: vpsllw $15, %ymm3, %ymm3
+; KNL-NEXT: vpsraw $15, %ymm3, %ymm3
+; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0
+; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
+; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
+; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_32x8_to_32x16_mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
+; SKX-NEXT: vpmovb2m %ymm1, %k1
+; SKX-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %x = zext <32 x i8> %a to <32 x i16>
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
+}
+
+define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
+; KNL-LABEL: sext_32x8_to_32x16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbw %xmm0, %ymm2
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT: vpmovsxbw %xmm0, %ymm1
+; KNL-NEXT: vmovaps %zmm2, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_32x8_to_32x16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovsxbw %ymm0, %zmm0
+; SKX-NEXT: retq
+ %x = sext <32 x i8> %a to <32 x i16>
+ ret <32 x i16> %x
+}
+
+define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_32x8_to_32x16_mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2
+; KNL-NEXT: vpmovsxbw %xmm2, %ymm2
+; KNL-NEXT: vpmovsxbw %xmm0, %ymm0
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; KNL-NEXT: vpsllw $15, %ymm3, %ymm3
+; KNL-NEXT: vpsraw $15, %ymm3, %ymm3
+; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0
+; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; KNL-NEXT: vpsllw $15, %ymm1, %ymm1
+; KNL-NEXT: vpsraw $15, %ymm1, %ymm1
+; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_32x8_to_32x16_mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
+; SKX-NEXT: vpmovb2m %ymm1, %k1
+; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %x = sext <32 x i8> %a to <32 x i16>
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
+}
+
+define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_4x8mem_to_4x32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_4x8mem_to_4x32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vpmovzxbd (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = zext <4 x i8> %a to <4 x i32>
+ %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_4x8mem_to_4x32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpmovsxbd (%rdi), %xmm1
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_4x8mem_to_4x32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = sext <4 x i8> %a to <4 x i32>
+ %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x8mem_to_8x32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
+; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_8x8mem_to_8x32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k1
+; SKX-NEXT: vpmovzxbd (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = zext <8 x i8> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_8x8mem_to_8x32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovsxbd (%rdi), %ymm0
+; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_8x8mem_to_8x32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k1
+; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = sext <8 x i8> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_16x8mem_to_16x32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovzxbd (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_16x8mem_to_16x32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k1
+; SKX-NEXT: vpmovzxbd (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <16 x i8>,<16 x i8> *%i,align 1
+ %x = zext <16 x i8> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_16x8mem_to_16x32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_16x8mem_to_16x32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k1
+; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <16 x i8>,<16 x i8> *%i,align 1
+ %x = sext <16 x i8> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_16x8_to_16x32_mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
+; KNL-NEXT: vpmovzxbd %xmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_16x8_to_16x32_mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
+; SKX-NEXT: vpmovb2m %xmm1, %k1
+; SKX-NEXT: vpmovzxbd %xmm0, %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %x = zext <16 x i8> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_16x8_to_16x32_mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
+; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_16x8_to_16x32_mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
+; SKX-NEXT: vpmovb2m %xmm1, %k1
+; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %x = sext <16 x i8> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
+; ALL-LABEL: zext_16x8_to_16x32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovzxbd %xmm0, %zmm0
+; ALL-NEXT: retq
+ %x = zext <16 x i8> %i to <16 x i32>
+ ret <16 x i32> %x
+}
+
+define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
+; ALL-LABEL: sext_16x8_to_16x32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxbd %xmm0, %zmm0
+; ALL-NEXT: retq
+ %x = sext <16 x i8> %i to <16 x i32>
+ ret <16 x i32> %x
+}
+
+define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_2x8mem_to_2x64:
+; KNL: ## BB#0:
+; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_2x8mem_to_2x64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT: vpmovq2m %xmm0, %k1
+; SKX-NEXT: vpmovzxbq (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <2 x i8>,<2 x i8> *%i,align 1
+ %x = zext <2 x i8> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_2x8mem_to_2x64mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT: vpmovsxbq (%rdi), %xmm1
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_2x8mem_to_2x64mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT: vpmovq2m %xmm0, %k1
+; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <2 x i8>,<2 x i8> *%i,align 1
+ %x = sext <2 x i8> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
+; ALL-LABEL: sext_2x8mem_to_2x64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxbq (%rdi), %xmm0
+; ALL-NEXT: retq
+ %a = load <2 x i8>,<2 x i8> *%i,align 1
+ %x = sext <2 x i8> %a to <2 x i64>
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_4x8mem_to_4x64:
+; KNL: ## BB#0:
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
+; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_4x8mem_to_4x64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vpmovzxbq (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = zext <4 x i8> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_4x8mem_to_4x64mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT: vpmovsxbq (%rdi), %ymm1
+; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_4x8mem_to_4x64mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = sext <4 x i8> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
+; ALL-LABEL: sext_4x8mem_to_4x64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxbq (%rdi), %ymm0
+; ALL-NEXT: retq
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = sext <4 x i8> %a to <4 x i64>
+ ret <4 x i64> %x
+}
+
+define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x8mem_to_8x64:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovzxbq (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_8x8mem_to_8x64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k1
+; SKX-NEXT: vpmovzxbq (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = zext <8 x i8> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_8x8mem_to_8x64mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_8x8mem_to_8x64mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k1
+; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = sext <8 x i8> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
+; ALL-LABEL: sext_8x8mem_to_8x64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxbq (%rdi), %zmm0
+; ALL-NEXT: retq
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = sext <8 x i8> %a to <8 x i64>
+ ret <8 x i64> %x
+}
+
+define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_4x16mem_to_4x32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_4x16mem_to_4x32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vpmovzxwd (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = zext <4 x i16> %a to <4 x i32>
+ %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_4x16mem_to_4x32mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpmovsxwd (%rdi), %xmm1
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_4x16mem_to_4x32mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = sext <4 x i16> %a to <4 x i32>
+ %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
+; ALL-LABEL: sext_4x16mem_to_4x32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxwd (%rdi), %xmm0
+; ALL-NEXT: retq
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = sext <4 x i16> %a to <4 x i32>
+ ret <4 x i32> %x
+}
+
+
+define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x16mem_to_8x32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_8x16mem_to_8x32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k1
+; SKX-NEXT: vpmovzxwd (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = zext <8 x i16> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_8x16mem_to_8x32mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovsxwd (%rdi), %ymm0
+; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_8x16mem_to_8x32mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k1
+; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = sext <8 x i16> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
+; ALL-LABEL: sext_8x16mem_to_8x32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxwd (%rdi), %ymm0
+; ALL-NEXT: retq
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = sext <8 x i16> %a to <8 x i32>
+ ret <8 x i32> %x
+}
+
+define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x16_to_8x32mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
+; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_8x16_to_8x32mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
+; SKX-NEXT: vpmovw2m %xmm1, %k1
+; SKX-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %x = zext <8 x i16> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
+; KNL-LABEL: zext_8x16_to_8x32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_8x16_to_8x32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovzxwd %xmm0, %ymm0
+; SKX-NEXT: retq
+ %x = zext <8 x i16> %a to <8 x i32>
+ ret <8 x i32> %x
+}
+
+define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_16x16mem_to_16x32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_16x16mem_to_16x32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k1
+; SKX-NEXT: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <16 x i16>,<16 x i16> *%i,align 1
+ %x = zext <16 x i16> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_16x16mem_to_16x32mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_16x16mem_to_16x32mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k1
+; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <16 x i16>,<16 x i16> *%i,align 1
+ %x = sext <16 x i16> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
+; ALL-LABEL: sext_16x16mem_to_16x32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxwd (%rdi), %zmm0
+; ALL-NEXT: retq
+ %a = load <16 x i16>,<16 x i16> *%i,align 1
+ %x = sext <16 x i16> %a to <16 x i32>
+ ret <16 x i32> %x
+}
+define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_16x16_to_16x32mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
+; KNL-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_16x16_to_16x32mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm1, %xmm1
+; SKX-NEXT: vpmovb2m %xmm1, %k1
+; SKX-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %x = zext <16 x i16> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
+; ALL-LABEL: zext_16x16_to_16x32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovzxwd %ymm0, %zmm0
+; ALL-NEXT: retq
+ %x = zext <16 x i16> %a to <16 x i32>
+ ret <16 x i32> %x
+}
+
+define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_2x16mem_to_2x64:
+; KNL: ## BB#0:
+; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_2x16mem_to_2x64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT: vpmovq2m %xmm0, %k1
+; SKX-NEXT: vpmovzxwq (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <2 x i16>,<2 x i16> *%i,align 1
+ %x = zext <2 x i16> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_2x16mem_to_2x64mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT: vpmovsxwq (%rdi), %xmm1
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_2x16mem_to_2x64mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT: vpmovq2m %xmm0, %k1
+; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <2 x i16>,<2 x i16> *%i,align 1
+ %x = sext <2 x i16> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
+; ALL-LABEL: sext_2x16mem_to_2x64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxwq (%rdi), %xmm0
+; ALL-NEXT: retq
+ %a = load <2 x i16>,<2 x i16> *%i,align 1
+ %x = sext <2 x i16> %a to <2 x i64>
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_4x16mem_to_4x64:
+; KNL: ## BB#0:
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_4x16mem_to_4x64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vpmovzxwq (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = zext <4 x i16> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_4x16mem_to_4x64mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT: vpmovsxwq (%rdi), %ymm1
+; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_4x16mem_to_4x64mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = sext <4 x i16> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
+; ALL-LABEL: sext_4x16mem_to_4x64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxwq (%rdi), %ymm0
+; ALL-NEXT: retq
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = sext <4 x i16> %a to <4 x i64>
+ ret <4 x i64> %x
+}
+
+define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x16mem_to_8x64:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_8x16mem_to_8x64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k1
+; SKX-NEXT: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = zext <8 x i16> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_8x16mem_to_8x64mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_8x16mem_to_8x64mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k1
+; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = sext <8 x i16> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
+; ALL-LABEL: sext_8x16mem_to_8x64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxwq (%rdi), %zmm0
+; ALL-NEXT: retq
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = sext <8 x i16> %a to <8 x i64>
+ ret <8 x i64> %x
+}
+
+define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x16_to_8x64mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
+; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_8x16_to_8x64mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
+; SKX-NEXT: vpmovw2m %xmm1, %k1
+; SKX-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %x = zext <8 x i16> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
+; ALL-LABEL: zext_8x16_to_8x64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovzxwq %xmm0, %zmm0
+; ALL-NEXT: retq
+ %ret = zext <8 x i16> %a to <8 x i64>
+ ret <8 x i64> %ret
+}
+
+define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_2x32mem_to_2x64:
+; KNL: ## BB#0:
+; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_2x32mem_to_2x64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT: vpmovq2m %xmm0, %k1
+; SKX-NEXT: vpmovzxdq (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <2 x i32>,<2 x i32> *%i,align 1
+ %x = zext <2 x i32> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_2x32mem_to_2x64mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpsllq $63, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT: vpmovsxdq (%rdi), %xmm1
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_2x32mem_to_2x64mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT: vpmovq2m %xmm0, %k1
+; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <2 x i32>,<2 x i32> *%i,align 1
+ %x = sext <2 x i32> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
+; ALL-LABEL: sext_2x32mem_to_2x64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxdq (%rdi), %xmm0
+; ALL-NEXT: retq
+ %a = load <2 x i32>,<2 x i32> *%i,align 1
+ %x = sext <2 x i32> %a to <2 x i64>
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_4x32mem_to_4x64:
+; KNL: ## BB#0:
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_4x32mem_to_4x64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vpmovzxdq (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <4 x i32>,<4 x i32> *%i,align 1
+ %x = zext <4 x i32> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_4x32mem_to_4x64mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT: vpmovsxdq (%rdi), %ymm1
+; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_4x32mem_to_4x64mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <4 x i32>,<4 x i32> *%i,align 1
+ %x = sext <4 x i32> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
+; ALL-LABEL: sext_4x32mem_to_4x64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxdq (%rdi), %ymm0
+; ALL-NEXT: retq
+ %a = load <4 x i32>,<4 x i32> *%i,align 1
+ %x = sext <4 x i32> %a to <4 x i64>
+ ret <4 x i64> %x
+}
+
+define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
+; ALL-LABEL: sext_4x32_to_4x64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxdq %xmm0, %ymm0
+; ALL-NEXT: retq
+ %x = sext <4 x i32> %a to <4 x i64>
+ ret <4 x i64> %x
+}
+
+define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_4x32_to_4x64mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpslld $31, %xmm1, %xmm1
+; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
+; KNL-NEXT: vpmovsxdq %xmm1, %ymm1
+; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_4x32_to_4x64mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm1, %xmm1
+; SKX-NEXT: vpmovd2m %xmm1, %k1
+; SKX-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z}
+; SKX-NEXT: retq
+ %x = zext <4 x i32> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x32mem_to_8x64:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovzxdq (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_8x32mem_to_8x64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k1
+; SKX-NEXT: vpmovzxdq (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <8 x i32>,<8 x i32> *%i,align 1
+ %x = zext <8 x i32> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_8x32mem_to_8x64mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_8x32mem_to_8x64mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k1
+; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = load <8 x i32>,<8 x i32> *%i,align 1
+ %x = sext <8 x i32> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
+; ALL-LABEL: sext_8x32mem_to_8x64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxdq (%rdi), %zmm0
+; ALL-NEXT: retq
+ %a = load <8 x i32>,<8 x i32> *%i,align 1
+ %x = sext <8 x i32> %a to <8 x i64>
+ ret <8 x i64> %x
+}
+
+define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
+; ALL-LABEL: sext_8x32_to_8x64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxdq %ymm0, %zmm0
+; ALL-NEXT: retq
+ %x = sext <8 x i32> %a to <8 x i64>
+ ret <8 x i64> %x
+}
+
+define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x32_to_8x64mask:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
+; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_8x32_to_8x64mask:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
+; SKX-NEXT: vpmovw2m %xmm1, %k1
+; SKX-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %x = zext <8 x i32> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
+; ALL-LABEL: fptrunc_test:
+; ALL: ## BB#0:
+; ALL-NEXT: vcvtpd2ps %zmm0, %ymm0
+; ALL-NEXT: retq
+ %b = fptrunc <8 x double> %a to <8 x float>
+ ret <8 x float> %b
+}
+
+define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
+; ALL-LABEL: fpext_test:
+; ALL: ## BB#0:
+; ALL-NEXT: vcvtps2pd %ymm0, %zmm0
+; ALL-NEXT: retq
+ %b = fpext <8 x float> %a to <8 x double>
+ ret <8 x double> %b
+}
+
+define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
+; ALL-LABEL: zext_16i1_to_16xi32:
+; ALL: ## BB#0:
+; ALL-NEXT: kmovw %edi, %k1
+; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; ALL-NEXT: retq
+ %a = bitcast i16 %b to <16 x i1>
+ %c = zext <16 x i1> %a to <16 x i32>
+ ret <16 x i32> %c
+}
+
+define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
+; KNL-LABEL: zext_8i1_to_8xi64:
+; KNL: ## BB#0:
+; KNL-NEXT: movzbl %dil, %eax
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: zext_8i1_to_8xi64:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovb %edi, %k1
+; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; SKX-NEXT: retq
+ %a = bitcast i8 %b to <8 x i1>
+ %c = zext <8 x i1> %a to <8 x i64>
+ ret <8 x i64> %c
+}
+
+define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
+; KNL-LABEL: trunc_16i8_to_16i1:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_16i8_to_16i1:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k0
+; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: retq
+ %mask_b = trunc <16 x i8>%a to <16 x i1>
+ %mask = bitcast <16 x i1> %mask_b to i16
+ ret i16 %mask
+}
+
+define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
+; KNL-LABEL: trunc_16i32_to_16i1:
+; KNL: ## BB#0:
+; KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_16i32_to_16i1:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %zmm0, %zmm0
+; SKX-NEXT: vpmovd2m %zmm0, %k0
+; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: retq
+ %mask_b = trunc <16 x i32>%a to <16 x i1>
+ %mask = bitcast <16 x i1> %mask_b to i16
+ ret i16 %mask
+}
+
+define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
+; KNL-LABEL: trunc_4i32_to_4i1:
+; KNL: ## BB#0:
+; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_4i32_to_4i1:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k0
+; SKX-NEXT: vpslld $31, %xmm1, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k1
+; SKX-NEXT: kandw %k1, %k0, %k0
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: retq
+ %mask_a = trunc <4 x i32>%a to <4 x i1>
+ %mask_b = trunc <4 x i32>%b to <4 x i1>
+ %a_and_b = and <4 x i1>%mask_a, %mask_b
+ %res = sext <4 x i1>%a_and_b to <4 x i32>
+ ret <4 x i32>%res
+}
+
+
+define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
+; KNL-LABEL: trunc_8i16_to_8i1:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_8i16_to_8i1:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k0
+; SKX-NEXT: kmovb %k0, %eax
+; SKX-NEXT: retq
+ %mask_b = trunc <8 x i16>%a to <8 x i1>
+ %mask = bitcast <8 x i1> %mask_b to i8
+ ret i8 %mask
+}
+
+define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
+; KNL-LABEL: sext_8i1_8i32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
+; KNL-NEXT: knotw %k0, %k1
+; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_8i1_8i32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
+; SKX-NEXT: knotb %k0, %k0
+; SKX-NEXT: vpmovm2d %k0, %ymm0
+; SKX-NEXT: retq
+ %x = icmp slt <8 x i32> %a1, %a2
+ %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+ %y = sext <8 x i1> %x1 to <8 x i32>
+ ret <8 x i32> %y
+}
+
+
+define i16 @trunc_i32_to_i1(i32 %a) {
+; ALL-LABEL: trunc_i32_to_i1:
+; ALL: ## BB#0:
+; ALL-NEXT: andl $1, %edi
+; ALL-NEXT: kmovw %edi, %k0
+; ALL-NEXT: movw $-4, %ax
+; ALL-NEXT: kmovw %eax, %k1
+; ALL-NEXT: korw %k0, %k1, %k0
+; ALL-NEXT: kmovw %k0, %eax
+; ALL-NEXT: retq
+ %a_i = trunc i32 %a to i1
+ %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
+ %res = bitcast <16 x i1> %maskv to i16
+ ret i16 %res
+}
+
+define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
+; KNL-LABEL: sext_8i1_8i16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
+; KNL-NEXT: vpmovdw %zmm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_8i1_8i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
+; SKX-NEXT: vpmovm2w %k0, %xmm0
+; SKX-NEXT: retq
+ %x = icmp slt <8 x i32> %a1, %a2
+ %y = sext <8 x i1> %x to <8 x i16>
+ ret <8 x i16> %y
+}
+
+define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
+; KNL-LABEL: sext_16i1_16i32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
+; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_16i1_16i32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
+; SKX-NEXT: vpmovm2d %k0, %zmm0
+; SKX-NEXT: retq
+ %x = icmp slt <16 x i32> %a1, %a2
+ %y = sext <16 x i1> %x to <16 x i32>
+ ret <16 x i32> %y
+}
+
+define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
+; KNL-LABEL: sext_8i1_8i64:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
+; KNL-NEXT: vpmovsxdq %ymm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: sext_8i1_8i64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
+; SKX-NEXT: vpmovm2q %k0, %zmm0
+; SKX-NEXT: retq
+ %x = icmp slt <8 x i32> %a1, %a2
+ %y = sext <8 x i1> %x to <8 x i64>
+ ret <8 x i64> %y
+}
+
+define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
+; ALL-LABEL: extload_v8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovsxbq (%rdi), %zmm0
+; ALL-NEXT: vmovdqa64 %zmm0, (%rsi)
+; ALL-NEXT: retq
+ %sign_load = load <8 x i8>, <8 x i8>* %a
+ %c = sext <8 x i8> %sign_load to <8 x i64>
+ store <8 x i64> %c, <8 x i64>* %res
+ ret void
+}
+
+define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
+; KNL-LABEL: test21:
+; KNL: ## BB#0:
+; KNL-NEXT: pushq %rbp
+; KNL-NEXT: pushq %r15
+; KNL-NEXT: pushq %r14
+; KNL-NEXT: pushq %r13
+; KNL-NEXT: pushq %r12
+; KNL-NEXT: pushq %rbx
+; KNL-NEXT: vpmovsxbd %xmm7, %zmm7
+; KNL-NEXT: vpslld $31, %zmm7, %zmm7
+; KNL-NEXT: vpmovsxbd %xmm6, %zmm6
+; KNL-NEXT: vpslld $31, %zmm6, %zmm6
+; KNL-NEXT: vpmovsxbd %xmm5, %zmm5
+; KNL-NEXT: vpslld $31, %zmm5, %zmm5
+; KNL-NEXT: vpmovsxbd %xmm4, %zmm4
+; KNL-NEXT: vpslld $31, %zmm4, %zmm4
+; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0
+; KNL-NEXT: kshiftlw $14, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kshiftlw $15, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kshiftlw $13, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kshiftlw $12, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %edi
+; KNL-NEXT: kshiftlw $11, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %esi
+; KNL-NEXT: kshiftlw $10, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %r13d
+; KNL-NEXT: kshiftlw $9, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %r8d
+; KNL-NEXT: kshiftlw $8, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %r10d
+; KNL-NEXT: kshiftlw $7, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %r11d
+; KNL-NEXT: kshiftlw $6, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %ebx
+; KNL-NEXT: kshiftlw $5, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %ebp
+; KNL-NEXT: kshiftlw $4, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %r14d
+; KNL-NEXT: kshiftlw $3, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %r15d
+; KNL-NEXT: kshiftlw $2, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %r9d
+; KNL-NEXT: kshiftlw $1, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kmovw %k1, %r12d
+; KNL-NEXT: vptestmd %zmm5, %zmm5, %k1
+; KNL-NEXT: kshiftlw $0, %k0, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vmovd %eax, %xmm4
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kshiftlw $14, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $1, %edx, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
+; KNL-NEXT: kshiftlw $15, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $2, %ecx, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kshiftlw $13, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $3, %edi, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %edi
+; KNL-NEXT: kshiftlw $12, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $4, %esi, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %esi
+; KNL-NEXT: kshiftlw $11, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $5, %r13d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r13d
+; KNL-NEXT: kshiftlw $10, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $6, %r8d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r8d
+; KNL-NEXT: kshiftlw $9, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $7, %r10d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r10d
+; KNL-NEXT: kshiftlw $8, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $8, %r11d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r11d
+; KNL-NEXT: kshiftlw $7, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $9, %ebx, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %ebx
+; KNL-NEXT: kshiftlw $6, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $10, %ebp, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %ebp
+; KNL-NEXT: kshiftlw $5, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $11, %r14d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r14d
+; KNL-NEXT: kshiftlw $4, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $12, %r15d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r15d
+; KNL-NEXT: kshiftlw $3, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $13, %r9d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
+; KNL-NEXT: kshiftlw $2, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $14, %r12d, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r12d
+; KNL-NEXT: kshiftlw $1, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
+; KNL-NEXT: kmovw %k0, %r9d
+; KNL-NEXT: vptestmd %zmm6, %zmm6, %k0
+; KNL-NEXT: kshiftlw $0, %k1, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vmovd %ecx, %xmm5
+; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kshiftlw $14, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
+; KNL-NEXT: kshiftlw $15, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $2, %edi, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kshiftlw $13, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $3, %esi, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %edi
+; KNL-NEXT: kshiftlw $12, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $4, %r13d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kshiftlw $11, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $5, %r8d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r8d
+; KNL-NEXT: kshiftlw $10, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $6, %r10d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r13d
+; KNL-NEXT: kshiftlw $9, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $7, %r11d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %esi
+; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
+; KNL-NEXT: kshiftlw $8, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $8, %ebx, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %ebx
+; KNL-NEXT: kshiftlw $7, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $9, %ebp, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %ebp
+; KNL-NEXT: kshiftlw $6, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $10, %r14d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r10d
+; KNL-NEXT: kshiftlw $5, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $11, %r15d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r11d
+; KNL-NEXT: kshiftlw $4, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
+; KNL-NEXT: kmovw %k1, %esi
+; KNL-NEXT: kshiftlw $3, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $13, %r12d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r14d
+; KNL-NEXT: kshiftlw $2, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $14, %r9d, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r9d
+; KNL-NEXT: kshiftlw $1, %k0, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: vpinsrb $15, %edx, %xmm5, %xmm5
+; KNL-NEXT: kmovw %k1, %r15d
+; KNL-NEXT: vptestmd %zmm7, %zmm7, %k1
+; KNL-NEXT: kshiftlw $0, %k0, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vmovd %eax, %xmm6
+; KNL-NEXT: kmovw %k0, %r12d
+; KNL-NEXT: kshiftlw $14, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kshiftlw $15, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $2, %edi, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: kshiftlw $13, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $3, %ecx, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: kshiftlw $12, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $4, %r8d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r8d
+; KNL-NEXT: kshiftlw $11, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $5, %r13d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r13d
+; KNL-NEXT: kshiftlw $10, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
+; KNL-NEXT: kmovw %k0, %edi
+; KNL-NEXT: kshiftlw $9, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $7, %ebx, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %ebx
+; KNL-NEXT: kshiftlw $8, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $8, %ebp, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %ebp
+; KNL-NEXT: kshiftlw $7, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $9, %r10d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r10d
+; KNL-NEXT: kshiftlw $6, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $10, %r11d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r11d
+; KNL-NEXT: kshiftlw $5, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $11, %esi, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %esi
+; KNL-NEXT: kshiftlw $4, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $12, %r14d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r14d
+; KNL-NEXT: kshiftlw $3, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $13, %r9d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r9d
+; KNL-NEXT: kshiftlw $2, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $14, %r15d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r15d
+; KNL-NEXT: kshiftlw $1, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vpinsrb $15, %r12d, %xmm6, %xmm6
+; KNL-NEXT: kmovw %k0, %r12d
+; KNL-NEXT: kshiftlw $0, %k1, %k0
+; KNL-NEXT: kshiftrw $15, %k0, %k0
+; KNL-NEXT: vmovd %edx, %xmm7
+; KNL-NEXT: kmovw %k0, %edx
+; KNL-NEXT: vpinsrb $1, %eax, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $2, %ecx, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $3, %r8d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $4, %r13d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $5, %edi, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $6, %ebx, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $7, %ebp, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $8, %r10d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $9, %r11d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $10, %esi, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $11, %r14d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $12, %r9d, %xmm7, %xmm7
+; KNL-NEXT: vpinsrb $13, %r15d, %xmm7, %xmm7
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
+; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
+; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
+; KNL-NEXT: vpand %ymm0, %ymm4, %ymm0
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
+; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
+; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
+; KNL-NEXT: vpand %ymm1, %ymm4, %ymm1
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
+; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
+; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
+; KNL-NEXT: vpand %ymm2, %ymm4, %ymm2
+; KNL-NEXT: vpinsrb $14, %r12d, %xmm7, %xmm4
+; KNL-NEXT: vpinsrb $15, %edx, %xmm4, %xmm4
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
+; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
+; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
+; KNL-NEXT: vpand %ymm3, %ymm4, %ymm3
+; KNL-NEXT: popq %rbx
+; KNL-NEXT: popq %r12
+; KNL-NEXT: popq %r13
+; KNL-NEXT: popq %r14
+; KNL-NEXT: popq %r15
+; KNL-NEXT: popq %rbp
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test21:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %zmm2, %zmm2
+; SKX-NEXT: vpmovb2m %zmm2, %k1
+; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; SKX-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; SKX-NEXT: vmovdqu16 %zmm0, %zmm3 {%k1}
+; SKX-NEXT: kshiftrq $32, %k1, %k1
+; SKX-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1}
+; SKX-NEXT: vmovaps %zmm3, %zmm0
+; SKX-NEXT: vmovaps %zmm2, %zmm1
+; SKX-NEXT: retq
+ %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
+ ret <64 x i16> %ret
+}
+
diff --git a/test/CodeGen/X86/avx512-extract-subvector.ll b/test/CodeGen/X86/avx512-extract-subvector.ll
new file mode 100644
index 000000000000..703f7832588c
--- /dev/null
+++ b/test/CodeGen/X86/avx512-extract-subvector.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
+
+
+define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind {
+; SKX-LABEL: extract_subvector128_v32i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
+; SKX-NEXT: retq
+ %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <8 x i16> %r1
+}
+
+define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounwind {
+; SKX-LABEL: extract_subvector128_v32i16_first_element:
+; SKX: ## BB#0:
+; SKX-NEXT: retq
+ %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %r1
+}
+
+define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind {
+; SKX-LABEL: extract_subvector128_v64i8:
+; SKX: ## BB#0:
+; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
+; SKX-NEXT: retq
+ %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
+ ret <16 x i8> %r1
+}
+
+define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwind {
+; SKX-LABEL: extract_subvector128_v64i8_first_element:
+; SKX: ## BB#0:
+; SKX-NEXT: retq
+ %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %r1
+}
+
+
+define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind {
+; SKX-LABEL: extract_subvector256_v32i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX-NEXT: retq
+ %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ ret <16 x i16> %r1
+}
+
+define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind {
+; SKX-LABEL: extract_subvector256_v64i8:
+; SKX: ## BB#0:
+; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; SKX-NEXT: retq
+ %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ ret <32 x i8> %r1
+}
diff --git a/test/CodeGen/X86/avx512-fma.ll b/test/CodeGen/X86/avx512-fma.ll
index ed046de005cf..9279441a23c7 100644
--- a/test/CodeGen/X86/avx512-fma.ll
+++ b/test/CodeGen/X86/avx512-fma.ll
@@ -1,81 +1,93 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=SKX
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
-; CHECK-LABEL: test_x86_fmadd_ps_z
-; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0
-; CHECK: ret
define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; ALL-LABEL: test_x86_fmadd_ps_z:
+; ALL: ## BB#0:
+; ALL-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0
+; ALL-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%res = fadd <16 x float> %x, %a2
ret <16 x float> %res
}
-; CHECK-LABEL: test_x86_fmsub_ps_z
-; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0
-; CHECK: ret
define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; ALL-LABEL: test_x86_fmsub_ps_z:
+; ALL: ## BB#0:
+; ALL-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0
+; ALL-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%res = fsub <16 x float> %x, %a2
ret <16 x float> %res
}
-; CHECK-LABEL: test_x86_fnmadd_ps_z
-; CHECK: vfnmadd213ps %zmm2, %zmm1, %zmm0
-; CHECK: ret
define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; ALL-LABEL: test_x86_fnmadd_ps_z:
+; ALL: ## BB#0:
+; ALL-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0
+; ALL-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%res = fsub <16 x float> %a2, %x
ret <16 x float> %res
}
-; CHECK-LABEL: test_x86_fnmsub_ps_z
-; CHECK: vfnmsub213ps %zmm2, %zmm1, %zmm0
-; CHECK: ret
define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; ALL-LABEL: test_x86_fnmsub_ps_z:
+; ALL: ## BB#0:
+; ALL-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
+; ALL-NEXT: retq
%x = fmul <16 x float> %a0, %a1
- %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
+ %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
- float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
- float -0.000000e+00>, %x
+ float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
+ float -0.000000e+00>, %x
%res = fsub <16 x float> %y, %a2
ret <16 x float> %res
}
-; CHECK-LABEL: test_x86_fmadd_pd_z
-; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0
-; CHECK: ret
define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+; ALL-LABEL: test_x86_fmadd_pd_z:
+; ALL: ## BB#0:
+; ALL-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0
+; ALL-NEXT: retq
%x = fmul <8 x double> %a0, %a1
%res = fadd <8 x double> %x, %a2
ret <8 x double> %res
}
-; CHECK-LABEL: test_x86_fmsub_pd_z
-; CHECK: vfmsub213pd %zmm2, %zmm1, %zmm0
-; CHECK: ret
define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+; ALL-LABEL: test_x86_fmsub_pd_z:
+; ALL: ## BB#0:
+; ALL-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0
+; ALL-NEXT: retq
%x = fmul <8 x double> %a0, %a1
%res = fsub <8 x double> %x, %a2
ret <8 x double> %res
}
define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
-; CHECK-LABEL: test_x86_fmsub_213:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: test_x86_fmsub_213:
+; ALL: ## BB#0:
+; ALL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1
+; ALL-NEXT: vmovaps %zmm1, %zmm0
+; ALL-NEXT: retq
%x = fmul double %a0, %a1
%res = fsub double %x, %a2
ret double %res
}
define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
-; CHECK-LABEL: test_x86_fmsub_213_m:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test_x86_fmsub_213_m:
+; KNL: ## BB#0:
+; KNL-NEXT: vfmsub213sd (%rdi), %xmm0, %xmm1
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_x86_fmsub_213_m:
+; SKX: ## BB#0:
+; SKX-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0
+; SKX-NEXT: retq
%a2 = load double , double *%a2_ptr
%x = fmul double %a0, %a1
%res = fsub double %x, %a2
@@ -83,11 +95,11 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
}
define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
-; CHECK-LABEL: test_x86_fmsub_231_m:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: test_x86_fmsub_231_m:
+; ALL: ## BB#0:
+; ALL-NEXT: vfmsub231sd (%rdi), %xmm0, %xmm1
+; ALL-NEXT: vmovaps %zmm1, %zmm0
+; ALL-NEXT: retq
%a2 = load double , double *%a2_ptr
%x = fmul double %a0, %a2
%res = fsub double %x, %a1
@@ -95,21 +107,21 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
}
define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
-; CHECK-LABEL: test231_br:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: test231_br:
+; ALL: ## BB#0:
+; ALL-NEXT: vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1
+; ALL-NEXT: vmovaps %zmm1, %zmm0
+; ALL-NEXT: retq
%b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
%b2 = fadd <16 x float> %b1, %a2
ret <16 x float> %b2
}
define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
-; CHECK-LABEL: test213_br:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: test213_br:
+; ALL: ## BB#0:
+; ALL-NEXT: vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
+; ALL-NEXT: retq
%b1 = fmul <16 x float> %a1, %a2
%b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
ret <16 x float> %b2
@@ -117,16 +129,17 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
;mask (a*c+b , a)
define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
-; CHECK-LABEL: test_x86_fmadd132_ps:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
-; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
-; CHECK-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
-; CHECK-NEXT: retq
+; KNL-LABEL: test_x86_fmadd132_ps:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT: vpslld $31, %zmm2, %zmm2
+; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
+; KNL-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
+; KNL-NEXT: retq
;
; SKX-LABEL: test_x86_fmadd132_ps:
; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm2, %xmm2
; SKX-NEXT: vpmovb2m %xmm2, %k1
; SKX-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
; SKX-NEXT: retq
@@ -139,17 +152,18 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <1
;mask (a*c+b , b)
define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
-; CHECK-LABEL: test_x86_fmadd231_ps:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
-; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
-; CHECK-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test_x86_fmadd231_ps:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT: vpslld $31, %zmm2, %zmm2
+; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
+; KNL-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
;
; SKX-LABEL: test_x86_fmadd231_ps:
; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm2, %xmm2
; SKX-NEXT: vpmovb2m %xmm2, %k1
; SKX-NEXT: vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
@@ -163,17 +177,18 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <1
;mask (b*a+c , b)
define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
-; CHECK-LABEL: test_x86_fmadd213_ps:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsxbd %xmm2, %zmm2
-; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1
-; CHECK-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test_x86_fmadd213_ps:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT: vpslld $31, %zmm2, %zmm2
+; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1
+; KNL-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
;
; SKX-LABEL: test_x86_fmadd213_ps:
; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm2, %xmm2
; SKX-NEXT: vpmovb2m %xmm2, %k1
; SKX-NEXT: vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
diff --git a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
index 3fca5a89a6a4..3bc67cceaab5 100644
--- a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
+++ b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
@@ -14,7 +15,7 @@ define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8*
; CHECK-LABEL: gather_mask_dps:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: kmovw %k1, %k2
+; CHECK-NEXT: kmovq %k1, %k2
; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
@@ -29,7 +30,7 @@ define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %b
; CHECK-LABEL: gather_mask_dpd:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: kmovw %k1, %k2
+; CHECK-NEXT: kmovq %k1, %k2
; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; CHECK-NEXT: vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
@@ -44,7 +45,7 @@ define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %ba
; CHECK-LABEL: gather_mask_qps:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: kmovw %k1, %k2
+; CHECK-NEXT: kmovq %k1, %k2
; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
@@ -59,7 +60,7 @@ define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %b
; CHECK-LABEL: gather_mask_qpd:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: kmovw %k1, %k2
+; CHECK-NEXT: kmovq %k1, %k2
; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
@@ -86,7 +87,7 @@ define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %ba
; CHECK-LABEL: gather_mask_dd:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: kmovw %k1, %k2
+; CHECK-NEXT: kmovq %k1, %k2
; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
@@ -101,7 +102,7 @@ define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base,
; CHECK-LABEL: gather_mask_qd:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: kmovw %k1, %k2
+; CHECK-NEXT: kmovq %k1, %k2
; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
@@ -116,7 +117,7 @@ define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base,
; CHECK-LABEL: gather_mask_qq:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: kmovw %k1, %k2
+; CHECK-NEXT: kmovq %k1, %k2
; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
@@ -131,7 +132,7 @@ define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base,
; CHECK-LABEL: gather_mask_dq:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: kmovw %k1, %k2
+; CHECK-NEXT: kmovq %k1, %k2
; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; CHECK-NEXT: vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
@@ -239,8 +240,8 @@ define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %
define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) {
; CHECK-LABEL: gather_qps:
; CHECK: ## BB#0:
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
-; CHECK-NEXT: kxnorw %k2, %k2, %k2
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: kxnorw %k0, %k0, %k2
; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
@@ -256,7 +257,7 @@ declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
define void @prefetch(<8 x i64> %ind, i8* %base) {
; CHECK-LABEL: prefetch:
; CHECK: ## BB#0:
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
@@ -278,12 +279,12 @@ define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
-; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,2), %xmm0 {%k1}
; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
- %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
+ %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}
@@ -311,12 +312,12 @@ define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
-; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,0), %ymm0 {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,2), %ymm0 {%k1}
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
- %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
+ %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
%res2 = fadd <4 x double> %res, %res1
ret <4 x double> %res2
}
@@ -329,7 +330,7 @@ define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
@@ -347,12 +348,12 @@ define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1,
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
-; CHECK-NEXT: vgatherqps (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: vgatherqps (%rdi,%xmm1,2), %xmm0 {%k1}
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
- %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
+ %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
@@ -363,7 +364,7 @@ define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x
; CHECK-LABEL: test_int_x86_avx512_gather3div4_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: kxnorw %k2, %k2, %k2
+; CHECK-NEXT: kxnorw %k0, %k0, %k2
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2}
; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1}
@@ -383,12 +384,12 @@ define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1,
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
-; CHECK-NEXT: vgatherqps (%rdi,%ymm1,0), %xmm0 {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: vgatherqps (%rdi,%ymm1,2), %xmm0 {%k1}
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
- %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
+ %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
@@ -400,7 +401,7 @@ define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
-; CHECK-NEXT: kmovw %k1, %k2
+; CHECK-NEXT: kmovq %k1, %k2
; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1}
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
@@ -419,12 +420,12 @@ define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
-; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %xmm0 {%k1}
; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
- %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
+ %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}
@@ -452,12 +453,12 @@ define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
-; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,0), %ymm0 {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %ymm0 {%k1}
; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
- %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
+ %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
%res2 = fadd <4 x double> %res, %res1
ret <4 x double> %res2
}
@@ -485,12 +486,12 @@ define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1,
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
-; CHECK-NEXT: vgatherdps (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: vgatherdps (%rdi,%xmm1,2), %xmm0 {%k1}
; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
- %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
+ %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
%res2 = fadd <4 x float> %res, %res1
ret <4 x float> %res2
}
@@ -501,14 +502,14 @@ define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x
; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: kxnorw %k2, %k2, %k2
+; CHECK-NEXT: kxnorw %k0, %k0, %k2
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2}
-; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1}
; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4)
- %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 0)
+ %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 2)
%res2 = add <4 x i32> %res, %res1
ret <4 x i32> %res2
}
@@ -521,12 +522,12 @@ define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1,
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
-; CHECK-NEXT: vgatherdps (%rdi,%ymm1,0), %ymm0 {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
+; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm0 {%k1}
; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
- %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 0)
+ %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 2)
%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2
}
@@ -538,13 +539,13 @@ define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm2
-; CHECK-NEXT: kmovw %k1, %k2
+; CHECK-NEXT: kmovq %k1, %k2
; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
-; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,0), %ymm0 {%k1}
+; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,2), %ymm0 {%k1}
; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
- %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 0)
+ %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 2)
%res2 = add <8 x i32> %res, %res1
ret <8 x i32> %res2
}
@@ -555,11 +556,11 @@ define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: kxnorw %k2, %k2, %k2
-; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT: kxnorw %k0, %k0, %k2
+; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2}
; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 0)
+ call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 2)
call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4)
ret void
}
@@ -570,11 +571,11 @@ define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,0) {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
+; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 0)
+ call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 2)
call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4)
ret void
}
@@ -585,11 +586,11 @@ define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,0) {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
+; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 0)
+ call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 2)
call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4)
ret void
}
@@ -600,11 +601,11 @@ define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,0) {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
+; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 0)
+ call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 2)
call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4)
ret void
}
@@ -615,11 +616,11 @@ define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,0) {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
+; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 0)
+ call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 2)
call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4)
ret void
}
@@ -630,11 +631,11 @@ define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: kxnorw %k2, %k2, %k2
-; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT: kxnorw %k0, %k0, %k2
+; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2}
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 0)
+ call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 2)
call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4)
ret void
}
@@ -645,11 +646,11 @@ define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,0) {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
+; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 0)
+ call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 2)
call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4)
ret void
}
@@ -660,11 +661,11 @@ define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,0) {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
+; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 0)
+ call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 2)
call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4)
ret void
}
@@ -675,11 +676,11 @@ define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: kxnorw %k2, %k2, %k2
-; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT: kxnorw %k0, %k0, %k2
+; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2}
; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 0)
+ call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 2)
call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4)
ret void
}
@@ -690,11 +691,11 @@ define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: kxnorw %k2, %k2, %k2
-; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT: kxnorw %k0, %k0, %k2
+; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2}
; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 0)
+ call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 2)
call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4)
ret void
}
@@ -705,11 +706,11 @@ define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,0) {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
+; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 0)
+ call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 2)
call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4)
ret void
}
@@ -720,11 +721,11 @@ define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: kxnorw %k2, %k2, %k2
-; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT: kxnorw %k0, %k0, %k2
+; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2}
; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 0)
+ call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 2)
call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4)
ret void
}
@@ -735,11 +736,11 @@ define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,0) {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
+; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 0)
+ call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 2)
call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4)
ret void
}
@@ -750,11 +751,11 @@ define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,0) {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
+; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 0)
+ call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 2)
call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4)
ret void
}
@@ -765,11 +766,11 @@ define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,0) {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
+; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 0)
+ call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 2)
call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4)
ret void
}
@@ -780,11 +781,11 @@ define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <
; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovb %esi, %k1
-; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,0) {%k1}
-; CHECK-NEXT: kxnorw %k1, %k1, %k1
+; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT: kxnorw %k0, %k0, %k1
; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
; CHECK-NEXT: retq
- call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 0)
+ call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4)
ret void
}
diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll
index 6f985f0bf3a7..41ec62c7e047 100644
--- a/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/test/CodeGen/X86/avx512-insert-extract.ll
@@ -12,14 +12,24 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
ret <16 x float> %rrr3
}
-;CHECK-LABEL: test2:
-;KNL: vinsertf32x4 $0
-;SKX: vinsertf64x2 $0
-;CHECK: vextractf32x4 $3
-;KNL: vinsertf32x4 $3
-;SKX: vinsertf64x2 $3
-;CHECK: ret
define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
+; KNL-LABEL: test2:
+; KNL: ## BB#0:
+; KNL-NEXT: vmovhpd (%rdi), %xmm0, %xmm2
+; KNL-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
+; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm2
+; KNL-NEXT: vmovsd %xmm1, %xmm2, %xmm1
+; KNL-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test2:
+; SKX: ## BB#0:
+; SKX-NEXT: vmovhpd (%rdi), %xmm0, %xmm2
+; SKX-NEXT: vinsertf64x2 $0, %xmm2, %zmm0, %zmm0
+; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm2
+; SKX-NEXT: vmovsd %xmm1, %xmm2, %xmm1
+; SKX-NEXT: vinsertf64x2 $3, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
%rrr = load double, double* %br
%rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
%rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
@@ -36,12 +46,22 @@ define <16 x float> @test3(<16 x float> %x) nounwind {
ret <16 x float> %rrr2
}
-;CHECK-LABEL: test4:
-;CHECK: vextracti32x4 $2
-;KNL: vinserti32x4 $0
-;SKX: vinserti64x2 $0
-;CHECK: ret
define <8 x i64> @test4(<8 x i64> %x) nounwind {
+; KNL-LABEL: test4:
+; KNL: ## BB#0:
+; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; KNL-NEXT: vmovq %xmm1, %rax
+; KNL-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
+; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test4:
+; SKX: ## BB#0:
+; SKX-NEXT: vextracti64x2 $2, %zmm0, %xmm1
+; SKX-NEXT: vmovq %xmm1, %rax
+; SKX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
+; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
%eee = extractelement <8 x i64> %x, i32 4
%rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
ret <8 x i64> %rrr2
@@ -142,7 +162,7 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
;CHECK: andl $1, %eax
;CHECK: kmovw %eax, %k0
;CHECK: movw $-4
-;CHECK: korw
+;CHECK: korw
define i16 @test13(i32 %a, i32 %b) {
%cmp_res = icmp ult i32 %a, %b
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
@@ -211,3 +231,476 @@ define i8 @test17(i1 *%addr, i8 %a) {
ret i8 %x2
}
+define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) {
+; SKX-LABEL: extract_v8i64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpextrq $1, %xmm0, %rax
+; SKX-NEXT: vextracti64x2 $1, %zmm0, %xmm0
+; SKX-NEXT: vpextrq $1, %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %r1 = extractelement <8 x i64> %x, i32 1
+ %r2 = extractelement <8 x i64> %x, i32 3
+ store i64 %r2, i64* %dst, align 1
+ ret i64 %r1
+}
+
+define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) {
+; SKX-LABEL: extract_v4i64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpextrq $1, %xmm0, %rax
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
+; SKX-NEXT: vpextrq $1, %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %r1 = extractelement <4 x i64> %x, i32 1
+ %r2 = extractelement <4 x i64> %x, i32 3
+ store i64 %r2, i64* %dst, align 1
+ ret i64 %r1
+}
+
+define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
+; SKX-LABEL: extract_v2i64:
+; SKX: ## BB#0:
+; SKX-NEXT: vmovq %xmm0, %rax
+; SKX-NEXT: vpextrq $1, %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %r1 = extractelement <2 x i64> %x, i32 0
+ %r2 = extractelement <2 x i64> %x, i32 1
+ store i64 %r2, i64* %dst, align 1
+ ret i64 %r1
+}
+
+define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
+; SKX-LABEL: extract_v16i32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpextrd $1, %xmm0, %eax
+; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0
+; SKX-NEXT: vpextrd $1, %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %r1 = extractelement <16 x i32> %x, i32 1
+ %r2 = extractelement <16 x i32> %x, i32 5
+ store i32 %r2, i32* %dst, align 1
+ ret i32 %r1
+}
+
+define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
+; SKX-LABEL: extract_v8i32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpextrd $1, %xmm0, %eax
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
+; SKX-NEXT: vpextrd $1, %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %r1 = extractelement <8 x i32> %x, i32 1
+ %r2 = extractelement <8 x i32> %x, i32 5
+ store i32 %r2, i32* %dst, align 1
+ ret i32 %r1
+}
+
+define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
+; SKX-LABEL: extract_v4i32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpextrd $1, %xmm0, %eax
+; SKX-NEXT: vpextrd $3, %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %r1 = extractelement <4 x i32> %x, i32 1
+ %r2 = extractelement <4 x i32> %x, i32 3
+ store i32 %r2, i32* %dst, align 1
+ ret i32 %r1
+}
+
+define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) {
+; SKX-LABEL: extract_v32i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpextrw $1, %xmm0, %eax
+; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0
+; SKX-NEXT: vpextrw $1, %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %r1 = extractelement <32 x i16> %x, i32 1
+ %r2 = extractelement <32 x i16> %x, i32 9
+ store i16 %r2, i16* %dst, align 1
+ ret i16 %r1
+}
+
+define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) {
+; SKX-LABEL: extract_v16i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpextrw $1, %xmm0, %eax
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
+; SKX-NEXT: vpextrw $1, %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %r1 = extractelement <16 x i16> %x, i32 1
+ %r2 = extractelement <16 x i16> %x, i32 9
+ store i16 %r2, i16* %dst, align 1
+ ret i16 %r1
+}
+
+define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
+; SKX-LABEL: extract_v8i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpextrw $1, %xmm0, %eax
+; SKX-NEXT: vpextrw $3, %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %r1 = extractelement <8 x i16> %x, i32 1
+ %r2 = extractelement <8 x i16> %x, i32 3
+ store i16 %r2, i16* %dst, align 1
+ ret i16 %r1
+}
+
+define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) {
+; SKX-LABEL: extract_v64i8:
+; SKX: ## BB#0:
+; SKX-NEXT: vpextrb $1, %xmm0, %eax
+; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0
+; SKX-NEXT: vpextrb $1, %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %r1 = extractelement <64 x i8> %x, i32 1
+ %r2 = extractelement <64 x i8> %x, i32 17
+ store i8 %r2, i8* %dst, align 1
+ ret i8 %r1
+}
+
+define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) {
+; SKX-LABEL: extract_v32i8:
+; SKX: ## BB#0:
+; SKX-NEXT: vpextrb $1, %xmm0, %eax
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
+; SKX-NEXT: vpextrb $1, %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %r1 = extractelement <32 x i8> %x, i32 1
+ %r2 = extractelement <32 x i8> %x, i32 17
+ store i8 %r2, i8* %dst, align 1
+ ret i8 %r1
+}
+
+define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
+; SKX-LABEL: extract_v16i8:
+; SKX: ## BB#0:
+; SKX-NEXT: vpextrb $1, %xmm0, %eax
+; SKX-NEXT: vpextrb $3, %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %r1 = extractelement <16 x i8> %x, i32 1
+ %r2 = extractelement <16 x i8> %x, i32 3
+ store i8 %r2, i8* %dst, align 1
+ ret i8 %r1
+}
+
+define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) {
+; SKX-LABEL: insert_v8i64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: vextracti64x2 $1, %zmm0, %xmm1
+; SKX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1
+; SKX-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %val = load i64, i64* %ptr
+ %r1 = insertelement <8 x i64> %x, i64 %val, i32 1
+ %r2 = insertelement <8 x i64> %r1, i64 %y, i32 3
+ ret <8 x i64> %r2
+}
+
+define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) {
+; SKX-LABEL: insert_v4i64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1
+; SKX-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %val = load i64, i64* %ptr
+ %r1 = insertelement <4 x i64> %x, i64 %val, i32 1
+ %r2 = insertelement <4 x i64> %r1, i64 %y, i32 3
+ ret <4 x i64> %r2
+}
+
+define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
+; SKX-LABEL: insert_v2i64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm0
+; SKX-NEXT: vpinsrq $3, %rdi, %xmm0, %xmm0
+; SKX-NEXT: retq
+ %val = load i64, i64* %ptr
+ %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
+ %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
+ ret <2 x i64> %r2
+}
+
+define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, i32* %ptr) {
+; SKX-LABEL: insert_v16i32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm1
+; SKX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1
+; SKX-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %val = load i32, i32* %ptr
+ %r1 = insertelement <16 x i32> %x, i32 %val, i32 1
+ %r2 = insertelement <16 x i32> %r1, i32 %y, i32 5
+ ret <16 x i32> %r2
+}
+
+define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) {
+; KNL-LABEL: insert_v8i32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1
+; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: insert_v8i32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1
+; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %val = load i32, i32* %ptr
+ %r1 = insertelement <8 x i32> %x, i32 %val, i32 1
+ %r2 = insertelement <8 x i32> %r1, i32 %y, i32 5
+ ret <8 x i32> %r2
+}
+
+define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
+; KNL-LABEL: insert_v4i32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0
+; KNL-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: insert_v4i32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0
+; SKX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0
+; SKX-NEXT: retq
+ %val = load i32, i32* %ptr
+ %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
+ %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
+ ret <4 x i32> %r2
+}
+
+define <32 x i16> @insert_v32i16(<32 x i16> %x, i16 %y, i16* %ptr) {
+; KNL-LABEL: insert_v32i16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm2
+; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2
+; KNL-NEXT: vpinsrw $1, %edi, %xmm2, %xmm2
+; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: insert_v32i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm1
+; SKX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1
+; SKX-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %val = load i16, i16* %ptr
+ %r1 = insertelement <32 x i16> %x, i16 %val, i32 1
+ %r2 = insertelement <32 x i16> %r1, i16 %y, i32 9
+ ret <32 x i16> %r2
+}
+
+define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) {
+; KNL-LABEL: insert_v16i16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1
+; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: insert_v16i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1
+; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %val = load i16, i16* %ptr
+ %r1 = insertelement <16 x i16> %x, i16 %val, i32 1
+ %r2 = insertelement <16 x i16> %r1, i16 %y, i32 9
+ ret <16 x i16> %r2
+}
+
+define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
+; KNL-LABEL: insert_v8i16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm0
+; KNL-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: insert_v8i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm0
+; SKX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0
+; SKX-NEXT: retq
+ %val = load i16, i16* %ptr
+ %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
+ %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
+ ret <8 x i16> %r2
+}
+
+define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, i8* %ptr) {
+; KNL-LABEL: insert_v64i8:
+; KNL: ## BB#0:
+; KNL-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm2
+; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2
+; KNL-NEXT: vpinsrb $2, %edi, %xmm2, %xmm2
+; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: insert_v64i8:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; SKX-NEXT: vpinsrb $2, %edi, %xmm1, %xmm1
+; SKX-NEXT: vinserti32x4 $3, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %val = load i8, i8* %ptr
+ %r1 = insertelement <64 x i8> %x, i8 %val, i32 1
+ %r2 = insertelement <64 x i8> %r1, i8 %y, i32 50
+ ret <64 x i8> %r2
+}
+
+define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) {
+; SKX-LABEL: insert_v32i8:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1
+; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %val = load i8, i8* %ptr
+ %r1 = insertelement <32 x i8> %x, i8 %val, i32 1
+ %r2 = insertelement <32 x i8> %r1, i8 %y, i32 17
+ ret <32 x i8> %r2
+}
+
+define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
+; KNL-LABEL: insert_v16i8:
+; KNL: ## BB#0:
+; KNL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: insert_v16i8:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0
+; SKX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; SKX-NEXT: retq
+ %val = load i8, i8* %ptr
+ %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
+ %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
+ ret <16 x i8> %r2
+}
+
+define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) {
+; KNL-LABEL: test_insert_128_v8i64:
+; KNL: ## BB#0:
+; KNL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
+; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_insert_128_v8i64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
+; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %r = insertelement <8 x i64> %x, i64 %y, i32 1
+ ret <8 x i64> %r
+}
+
+define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) {
+; KNL-LABEL: test_insert_128_v16i32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1
+; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_insert_128_v16i32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1
+; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %r = insertelement <16 x i32> %x, i32 %y, i32 1
+ ret <16 x i32> %r
+}
+
+define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
+; KNL-LABEL: test_insert_128_v8f64:
+; KNL: ## BB#0:
+; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
+; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_insert_128_v8f64:
+; SKX: ## BB#0:
+; SKX-NEXT: vunpcklpd %xmm1, %xmm0, %xmm1
+; SKX-NEXT: vinsertf64x2 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %r = insertelement <8 x double> %x, double %y, i32 1
+ ret <8 x double> %r
+}
+
+define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) {
+; KNL-LABEL: test_insert_128_v16f32:
+; KNL: ## BB#0:
+; KNL-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm1
+; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_insert_128_v16f32:
+; SKX: ## BB#0:
+; SKX-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm1
+; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %r = insertelement <16 x float> %x, float %y, i32 1
+ ret <16 x float> %r
+}
+
+define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) {
+; KNL-LABEL: test_insert_128_v16i16:
+; KNL: ## BB#0:
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_insert_128_v16i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1
+; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %r = insertelement <16 x i16> %x, i16 %y, i32 10
+ ret <16 x i16> %r
+}
+
+define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) {
+; KNL-LABEL: test_insert_128_v32i8:
+; KNL: ## BB#0:
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_insert_128_v32i8:
+; SKX: ## BB#0:
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1
+; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %r = insertelement <32 x i8> %x, i8 %y, i32 20
+ ret <32 x i8> %r
+}
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index 7642cd4e6c5c..764e13638485 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -1,60 +1,94 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
-; CHECK-LABEL: test_kortestz
-; CHECK: kortestw
-; CHECK: sete
define i32 @test_kortestz(i16 %a0, i16 %a1) {
+; CHECK-LABEL: test_kortestz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k0
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: kortestw %k0, %k1
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
ret i32 %res
}
declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
-; CHECK-LABEL: test_kortestc
-; CHECK: kortestw
-; CHECK: sbbl
define i32 @test_kortestc(i16 %a0, i16 %a1) {
+; CHECK-LABEL: test_kortestc:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k0
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: kortestw %k0, %k1
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
ret i32 %res
}
declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
-; CHECK-LABEL: test_kand
-; CHECK: kandw
-; CHECK: kandw
define i16 @test_kand(i16 %a0, i16 %a1) {
+; CHECK-LABEL: test_kand:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movw $8, %ax
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: kandw %k0, %k1, %k0
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
ret i16 %t2
}
declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
-; CHECK-LABEL: test_knot
-; CHECK: knotw
define i16 @test_knot(i16 %a0) {
+; CHECK-LABEL: test_knot:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k0
+; CHECK-NEXT: knotw %k0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
ret i16 %res
}
declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
-; CHECK-LABEL: unpckbw_test
-; CHECK: kunpckbw
-; CHECK:ret
define i16 @unpckbw_test(i16 %a0, i16 %a1) {
+; CHECK-LABEL: unpckbw_test:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k0
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: kunpckbw %k1, %k0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
ret i16 %res
}
define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
- ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
+; CHECK-LABEL: test_rcp_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vrcp14ps %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
- ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
+; CHECK-LABEL: test_rcp_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vrcp14pd %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
ret <8 x double> %res
}
@@ -63,7 +97,10 @@ declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i
declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
define <8 x double> @test7(<8 x double> %a) {
-; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
+; CHECK-LABEL: test7:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
ret <8 x double>%res
}
@@ -71,121 +108,246 @@ define <8 x double> @test7(<8 x double> %a) {
declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
define <16 x float> @test8(<16 x float> %a) {
-; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
+; CHECK-LABEL: test8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
ret <16 x float>%res
}
define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
- ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
+; CHECK-LABEL: test_rsqrt_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vrsqrt14ps %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
- ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
+; CHECK-LABEL: test_rsqrt14_ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vrsqrt14ss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
- ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
+; CHECK-LABEL: test_rcp14_ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
- ; CHECK-LABEL: test_sqrt_pd_512
- ; CHECK: vsqrtpd
- %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
+; CHECK-LABEL: test_sqrt_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsqrtpd %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
- ; CHECK-LABEL: test_sqrt_ps_512
- ; CHECK: vsqrtps
- %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
+; CHECK-LABEL: test_sqrt_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsqrtps %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
- ; CHECK-LABEL: test_sqrt_round_ps_512
- ; CHECK: vsqrtps {rz-sae}
- %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
+; CHECK-LABEL: test_sqrt_round_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
- ; CHECK-LABEL: test_getexp_pd_512
- ; CHECK: vgetexppd
- %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
+; CHECK-LABEL: test_getexp_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vgetexppd %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
ret <8 x double> %res
}
define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
- ; CHECK-LABEL: test_getexp_round_pd_512
- ; CHECK: vgetexppd {sae}
- %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
+; CHECK-LABEL: test_getexp_round_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vgetexppd {sae}, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
- ; CHECK-LABEL: test_getexp_ps_512
- ; CHECK: vgetexpps
- %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
+; CHECK-LABEL: test_getexp_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vgetexpps %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
- ; CHECK-LABEL: test_getexp_round_ps_512
- ; CHECK: vgetexpps {sae}
- %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
+; CHECK-LABEL: test_getexp_round_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vgetexpps {sae}, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
-define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vsqrtss {{.*}}encoding: [0x62
- %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
+
+define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+; CHECK-LABEL: test_sqrt_ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
+; CHECK-NEXT: vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
+ %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)
+
+ %res.1 = fadd <4 x float> %res0, %res1
+ %res.2 = fadd <4 x float> %res2, %res3
+ %res = fadd <4 x float> %res.1, %res.2
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
-define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vsqrtsd {{.*}}encoding: [0x62
- %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
+
+define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+; CHECK-LABEL: test_sqrt_sd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
+; CHECK-NEXT: vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vaddpd %xmm0, %xmm4, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
+ %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)
+
+ %res.1 = fadd <2 x double> %res0, %res1
+ %res.2 = fadd <2 x double> %res2, %res3
+ %res = fadd <2 x double> %res.1, %res.2
ret <2 x double> %res
}
-declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
- ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
+; CHECK-LABEL: test_x86_sse2_cvtsd2si64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtsd2si %xmm0, %rax
+; CHECK-NEXT: retq
%res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
ret i64 %res
}
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
- ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
+; CHECK-LABEL: test_x86_sse2_cvtsi642sd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
-define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
- ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
- %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
- ret i64 %res
+define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttsd2si64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvttsd2si %xmm0, %rcx
+; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %rax
+; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: retq
+ %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ;
+ %res2 = add i64 %res0, %res1
+ ret i64 %res2
}
-declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
+define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttsd2usi:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvttsd2usi %xmm0, %ecx
+; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: retq
+ %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
+ %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
+ %res2 = add i32 %res0, %res1
+ ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttsd2si:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvttsd2si %xmm0, %ecx
+; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: retq
+ %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
+ %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
+ %res2 = add i32 %res0, %res1
+ ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
+
+
+
+define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttsd2usi64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvttsd2usi %xmm0, %rcx
+; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %rax
+; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: retq
+ %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ;
+ %res2 = add i64 %res0, %res1
+ ret i64 %res2
+}
+declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone
define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
- ; CHECK: vcvtss2si {{.*}}encoding: [0x62
+; CHECK-LABEL: test_x86_sse_cvtss2si64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtss2si %xmm0, %rax
+; CHECK-NEXT: retq
%res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
ret i64 %res
}
@@ -193,37 +355,139 @@ declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
- ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
+; CHECK-LABEL: test_x86_sse_cvtsi642ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
-define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
- ; CHECK: vcvttss2si {{.*}}encoding: [0x62
- %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
- ret i64 %res
+define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttss2si:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %ecx
+; CHECK-NEXT: vcvttss2si %xmm0, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: retq
+ %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
+ %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
+ %res2 = add i32 %res0, %res1
+ ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
+
+define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttss2si64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvttss2si %xmm0, %rcx
+; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %rax
+; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: retq
+ %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ;
+ %res2 = add i64 %res0, %res1
+ ret i64 %res2
+}
+declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttss2usi:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %ecx
+; CHECK-NEXT: vcvttss2usi %xmm0, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: retq
+ %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
+ %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
+ %res2 = add i32 %res0, %res1
+ ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
+
+define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttss2usi64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvttss2usi %xmm0, %rcx
+; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %rax
+; CHECK-NEXT: addq %rcx, %rax
+; CHECK-NEXT: retq
+ %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ;
+ %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ;
+ %res2 = add i64 %res0, %res1
+ ret i64 %res2
}
-declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
- ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
+; CHECK-LABEL: test_x86_avx512_cvtsd2usi64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtsd2usi %xmm0, %rax
+; CHECK-NEXT: retq
%res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
ret i64 %res
}
declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
- ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
+; CHECK-LABEL: test_x86_vcvtph2ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
}
+
+define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) {
+; CHECK-LABEL: test_x86_vcvtph2ps_512_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) {
+; CHECK-LABEL: test_x86_vcvtph2ps_512_rrk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) {
+; CHECK-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) {
+; CHECK-LABEL: test_x86_vcvtph2ps_512_rrkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
- ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
+; CHECK-LABEL: test_x86_vcvtps2ph_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtps2ph $2, %zmm0, %ymm0
+; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
ret <16 x i16> %res
}
@@ -231,65 +495,124 @@ define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
- ; CHECK: vbroadcastss
+; CHECK-LABEL: test_x86_vbroadcast_ss_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastss (%rdi), %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
- ; CHECK: vbroadcastsd
+; CHECK-LABEL: test_x86_vbroadcast_sd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
+; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
-define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
- ; CHECK: vbroadcastss
- %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
- ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
-
-define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
- ; CHECK: vbroadcastsd
- %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
- ret <8 x double> %res
-}
-declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
-
-define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
- ; CHECK: vpbroadcastd
- %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
- ret <16 x i32> %res
+define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1, i16 %mask ) {
+; CHECK-LABEL: test_x86_vbroadcast_ss_ps_512:
+; CHECK: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vbroadcastss %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vbroadcastss %xmm0, %zmm0
+; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
+
+ %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 -1)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> %a1, i16 %mask)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 %mask)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+declare <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float>, <16 x float>, i16) nounwind readonly
+
+
+define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1, i8 %mask ) {
+; CHECK-LABEL: test_x86_vbroadcast_sd_pd_512:
+; CHECK: kmovw %eax, %k1
+; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vbroadcastsd %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
+
+ %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 -1)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> %a1, i8 %mask)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 %mask)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res2, %res3
+ ret <8 x double> %res4
+}
+declare <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double>, <8 x double>, i8) nounwind readonly
+
+define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpbroadcastd %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1)
+ %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask)
+ %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res2, %res3
+ ret <16 x i32> %res4
}
-declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
+declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
- ; CHECK: vpbroadcastd
+; CHECK-LABEL: test_x86_pbroadcastd_i32_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastd %edi, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
-define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
- ; CHECK: vpbroadcastq
- %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
- ret <8 x i64> %res
+define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpbroadcastq %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastq %xmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1)
+ %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask)
+ %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res2, %res3
+ ret <8 x i64> %res4
}
-declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
+declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
- ; CHECK: vpbroadcastq
+; CHECK-LABEL: test_x86_pbroadcastq_i64_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpbroadcastq %rdi, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
define <16 x i32> @test_conflict_d(<16 x i32> %a) {
- ; CHECK: movw $-1, %ax
- ; CHECK: vpxor
- ; CHECK: vpconflictd
+; CHECK-LABEL: test_conflict_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpconflictd %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
@@ -297,9 +620,10 @@ define <16 x i32> @test_conflict_d(<16 x i32> %a) {
declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
define <8 x i64> @test_conflict_q(<8 x i64> %a) {
- ; CHECK: movb $-1, %al
- ; CHECK: vpxor
- ; CHECK: vpconflictq
+; CHECK-LABEL: test_conflict_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpconflictq %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
@@ -307,21 +631,32 @@ define <8 x i64> @test_conflict_q(<8 x i64> %a) {
declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
- ; CHECK: vpconflictd
+; CHECK-LABEL: test_maskz_conflict_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
- ; CHECK: vpconflictq
+; CHECK-LABEL: test_mask_conflict_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret <8 x i64> %res
}
define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
- ; CHECK: movw $-1, %ax
- ; CHECK: vpxor
- ; CHECK: vplzcntd
+; CHECK-LABEL: test_lzcnt_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vplzcntd %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
@@ -329,9 +664,10 @@ define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
- ; CHECK: movb $-1, %al
- ; CHECK: vpxor
- ; CHECK: vplzcntq
+; CHECK-LABEL: test_lzcnt_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vplzcntq %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
@@ -340,37 +676,34 @@ declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) no
define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
- ; CHECK: vplzcntd
+; CHECK-LABEL: test_mask_lzcnt_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
ret <16 x i32> %res
}
define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
- ; CHECK: vplzcntq
+; CHECK-LABEL: test_mask_lzcnt_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret <8 x i64> %res
}
-define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
- ; CHECK-LABEL: test_ctlz_d
- ; CHECK: vplzcntd
- %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
- ret <16 x i32> %res
-}
-
-declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
-
-define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
- ; CHECK-LABEL: test_ctlz_q
- ; CHECK: vplzcntq
- %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
- ret <8 x i64> %res
-}
-
-declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
-
define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
- ; CHECK: vblendmps %zmm1, %zmm0
+; CHECK-LABEL: test_x86_mask_blend_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
ret <16 x float> %res
}
@@ -378,14 +711,23 @@ define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x
declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
- ; CHECK: vblendmpd %zmm1, %zmm0
+; CHECK-LABEL: test_x86_mask_blend_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
ret <8 x double> %res
}
define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
- ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
- ; CHECK: vblendmpd (%
+; CHECK-LABEL: test_x86_mask_blend_pd_512_memop:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1}
+; CHECK-NEXT: retq
%b = load <8 x double>, <8 x double>* %ptr
%res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
ret <8 x double> %res
@@ -393,28 +735,45 @@ define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x doub
declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
- ; CHECK: vpblendmd
+; CHECK-LABEL: test_x86_mask_blend_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
- ; CHECK: vpblendmq
+; CHECK-LABEL: test_x86_mask_blend_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpblendmq %zmm1, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
- ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
+; CHECK-LABEL: test_cmpps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
ret i16 %res
}
declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
- ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
+; CHECK-LABEL: test_cmppd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
ret i8 %res
}
@@ -422,7 +781,10 @@ declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) no
; fp min - max
define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
- ; CHECK: vmaxpd
+; CHECK-LABEL: test_vmaxpd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
<8 x double>zeroinitializer, i8 -1, i32 4)
ret <8 x double> %res
@@ -431,7 +793,10 @@ declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>
<8 x double>, i8, i32)
define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
- ; CHECK: vminpd
+; CHECK-LABEL: test_vminpd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vminpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
<8 x double>zeroinitializer, i8 -1, i32 4)
ret <8 x double> %res
@@ -441,11 +806,14 @@ declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>
declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpabsd{{.*}}{%k1}
define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpabsd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpabsd %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
%res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
%res2 = add <16 x i32> %res, %res1
@@ -454,11 +822,15 @@ define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32>
declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpabsq{{.*}}{%k1}
define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpabsq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpabsq %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
%res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
%res2 = add <8 x i64> %res, %res1
@@ -466,21 +838,33 @@ define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x
}
define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
- ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
+; CHECK-LABEL: test_vptestmq:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
ret i8 %res
}
declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
- ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
+; CHECK-LABEL: test_vptestmd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
ret i16 %res
}
declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
-; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
+; CHECK-LABEL: test_store1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmovups %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
ret void
}
@@ -488,7 +872,11 @@ define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
-; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
+; CHECK-LABEL: test_store2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmovupd %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
ret void
}
@@ -565,32 +953,45 @@ declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
; CHECK-LABEL: test_valign_q:
-; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
- %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
+; CHECK: ## BB#0:
+; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
; CHECK-LABEL: test_mask_valign_q:
-; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
- %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
ret <8 x i64> %res
}
-declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
+declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
; CHECK-LABEL: test_maskz_valign_d:
-; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
- %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask)
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
-declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
- ; CHECK-LABEL: test_mask_store_ss
- ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
+; CHECK-LABEL: test_mask_store_ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmovss %xmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
ret void
}
@@ -598,15 +999,22 @@ define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
-; CHECK-LABEL: test_pcmpeq_d
-; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_pcmpeq_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
ret i16 %res
}
define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
-; CHECK-LABEL: test_mask_pcmpeq_d
-; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_pcmpeq_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
ret i16 %res
}
@@ -614,15 +1022,23 @@ define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
-; CHECK-LABEL: test_pcmpeq_q
-; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_pcmpeq_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
-; CHECK-LABEL: test_mask_pcmpeq_q
-; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_pcmpeq_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret i8 %res
}
@@ -630,15 +1046,22 @@ define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
-; CHECK-LABEL: test_pcmpgt_d
-; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_pcmpgt_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
ret i16 %res
}
define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
-; CHECK-LABEL: test_mask_pcmpgt_d
-; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_pcmpgt_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
ret i16 %res
}
@@ -646,15 +1069,23 @@ define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
-; CHECK-LABEL: test_pcmpgt_q
-; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_pcmpgt_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
ret i8 %res
}
define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
-; CHECK-LABEL: test_mask_pcmpgt_q
-; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_pcmpgt_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret i8 %res
}
@@ -662,58 +1093,95 @@ define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK_LABEL: test_cmp_d_512
-; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_cmp_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r8d
+; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r9d
+; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r10d
+; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %esi
+; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %edi
+; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %edx
+; CHECK-NEXT: vmovd %r8d, %xmm0
+; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
%vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
-; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
%res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
%vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
-; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
%res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
%vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
-; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
%res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
%vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
-; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
%res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
%vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
-; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
%res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
%vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
-; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
%res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
%vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
-; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
%res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
%vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
ret <8 x i16> %vec7
}
define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
-; CHECK_LABEL: test_mask_cmp_d_512
-; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_cmp_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r8d
+; CHECK-NEXT: vpcmpltd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r9d
+; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r10d
+; CHECK-NEXT: vpcmpunordd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %esi
+; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %edi
+; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vpcmpnled %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: vpcmpordd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %edx
+; CHECK-NEXT: vmovd %r8d, %xmm0
+; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
%vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
-; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
%res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
%vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
-; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
%res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
%vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
-; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
%res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
%vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
-; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
%res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
%vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
-; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
%res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
%vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
-; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
%res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
%vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
-; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
%res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
%vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
ret <8 x i16> %vec7
@@ -722,58 +1190,95 @@ define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask)
declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK_LABEL: test_ucmp_d_512
-; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_ucmp_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r8d
+; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r9d
+; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r10d
+; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %esi
+; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %edi
+; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %edx
+; CHECK-NEXT: vmovd %r8d, %xmm0
+; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
%vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
-; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
%res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
%vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
-; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
%res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
%vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
-; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
%res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
%vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
-; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
%res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
%vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
-; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
%res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
%vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
-; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
%res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
%vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
-; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
%res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
%vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
ret <8 x i16> %vec7
}
define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
-; CHECK_LABEL: test_mask_ucmp_d_512
-; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_ucmp_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpcmpequd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r8d
+; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r9d
+; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r10d
+; CHECK-NEXT: vpcmpunordud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %esi
+; CHECK-NEXT: vpcmpnequd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %edi
+; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: vpcmpordud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %edx
+; CHECK-NEXT: vmovd %r8d, %xmm0
+; CHECK-NEXT: vpinsrw $1, %r9d, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $2, %r10d, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $3, %esi, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $4, %edi, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
%vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
-; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
%res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
%vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
-; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
%res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
%vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
-; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
%res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
%vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
-; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
%res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
%vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
-; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
%res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
%vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
-; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
%res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
%vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
-; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
%res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
%vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
ret <8 x i16> %vec7
@@ -782,58 +1287,112 @@ define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask
declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
-; CHECK_LABEL: test_cmp_q_512
-; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_cmp_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r8d
+; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r9d
+; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r10d
+; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r11d
+; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %edi
+; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %edx
+; CHECK-NEXT: movzbl %r8b, %esi
+; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %r9b, %esi
+; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %r10b, %esi
+; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %r11b, %esi
+; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %dil, %esi
+; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %cl, %eax
+; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
-; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
%res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
%vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
-; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
%res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
%vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
-; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
%res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
%vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
-; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
%res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
%vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
-; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
%res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
%vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
-; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
%res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
%vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
-; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
%res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
%vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
ret <8 x i8> %vec7
}
define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
-; CHECK_LABEL: test_mask_cmp_q_512
-; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_cmp_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r8d
+; CHECK-NEXT: vpcmpltq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r9d
+; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r10d
+; CHECK-NEXT: vpcmpunordq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r11d
+; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %edi
+; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %edx
+; CHECK-NEXT: movzbl %r8b, %esi
+; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %r9b, %esi
+; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %r10b, %esi
+; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %r11b, %esi
+; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %dil, %esi
+; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %cl, %eax
+; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
-; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
%res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
%vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
-; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
%res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
%vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
-; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
%res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
%vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
-; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
%res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
%vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
-; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
%res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
%vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
-; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
%res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
%vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
-; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
%res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
%vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
ret <8 x i8> %vec7
@@ -842,58 +1401,112 @@ define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
-; CHECK_LABEL: test_ucmp_q_512
-; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_ucmp_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r8d
+; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r9d
+; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r10d
+; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %r11d
+; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %edi
+; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %edx
+; CHECK-NEXT: movzbl %r8b, %esi
+; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %r9b, %esi
+; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %r10b, %esi
+; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %r11b, %esi
+; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %dil, %esi
+; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %cl, %eax
+; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
-; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
%res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
%vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
-; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
%res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
%vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
-; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
%res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
%vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
-; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
%res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
%vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
-; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
%res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
%vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
-; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
%res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
%vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
-; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
%res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
%vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
ret <8 x i8> %vec7
}
define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
-; CHECK_LABEL: test_mask_ucmp_q_512
-; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_ucmp_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpcmpequq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r8d
+; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r9d
+; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r10d
+; CHECK-NEXT: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %r11d
+; CHECK-NEXT: vpcmpnequq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %edi
+; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %edx
+; CHECK-NEXT: movzbl %r8b, %esi
+; CHECK-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %r9b, %esi
+; CHECK-NEXT: vpinsrb $2, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %r10b, %esi
+; CHECK-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %r11b, %esi
+; CHECK-NEXT: vpinsrb $6, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %dil, %esi
+; CHECK-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %cl, %eax
+; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; CHECK-NEXT: movzbl %dl, %eax
+; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
-; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
%res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
%vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
-; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
%res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
%vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
-; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
%res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
%vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
-; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
%res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
%vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
-; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
%res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
%vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
-; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
%res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
%vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
-; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
%res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
%vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
ret <8 x i8> %vec7
@@ -903,57 +1516,77 @@ declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounw
define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
; CHECK-LABEL: test_mask_vextractf32x4:
-; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
- %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
+declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
; CHECK-LABEL: test_mask_vextracti64x4:
-; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
- %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 2, <4 x i64> %b, i8 %mask)
ret <4 x i64> %res
}
-declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
+declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
; CHECK-LABEL: test_maskz_vextracti32x4:
-; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
- %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
ret <4 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
define <4 x double> @test_vextractf64x4(<8 x double> %a) {
; CHECK-LABEL: test_vextractf64x4:
-; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
- %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
+; CHECK: ## BB#0:
+; CHECK-NEXT: vextractf64x4 $2, %zmm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 2, <4 x double> zeroinitializer, i8 -1)
ret <4 x double> %res
}
-declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
+declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
- ; CHECK-LABEL: test_x86_avx512_pslli_d
- ; CHECK: vpslld
+; CHECK-LABEL: test_x86_avx512_pslli_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpslld $7, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
- ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_pslli_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpslld $7, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
- ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_pslli_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
@@ -961,22 +1594,33 @@ define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
- ; CHECK-LABEL: test_x86_avx512_pslli_q
- ; CHECK: vpsllq
+; CHECK-LABEL: test_x86_avx512_pslli_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
- ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_pslli_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
- ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_pslli_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
@@ -984,22 +1628,31 @@ define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
- ; CHECK-LABEL: test_x86_avx512_psrli_d
- ; CHECK: vpsrld
+; CHECK-LABEL: test_x86_avx512_psrli_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
- ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrli_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
- ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrli_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
@@ -1007,22 +1660,33 @@ define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
- ; CHECK-LABEL: test_x86_avx512_psrli_q
- ; CHECK: vpsrlq
+; CHECK-LABEL: test_x86_avx512_psrli_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
- ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrli_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
- ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrli_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
@@ -1030,22 +1694,31 @@ define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
- ; CHECK-LABEL: test_x86_avx512_psrai_d
- ; CHECK: vpsrad
+; CHECK-LABEL: test_x86_avx512_psrai_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
- ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrai_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
- ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrai_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
@@ -1053,22 +1726,33 @@ define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
- ; CHECK-LABEL: test_x86_avx512_psrai_q
- ; CHECK: vpsraq
+; CHECK-LABEL: test_x86_avx512_psrai_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
- ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrai_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
- ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrai_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
@@ -1076,22 +1760,31 @@ define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
- ; CHECK-LABEL: test_x86_avx512_psll_d
- ; CHECK: vpslld
+; CHECK-LABEL: test_x86_avx512_psll_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psll_d
- ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psll_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
- ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psll_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
@@ -1099,22 +1792,33 @@ define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i
declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
- ; CHECK-LABEL: test_x86_avx512_psll_q
- ; CHECK: vpsllq
+; CHECK-LABEL: test_x86_avx512_psll_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psll_q
- ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psll_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
- ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psll_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
@@ -1122,22 +1826,31 @@ define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8
declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
- ; CHECK-LABEL: test_x86_avx512_psrl_d
- ; CHECK: vpsrld
+; CHECK-LABEL: test_x86_avx512_psrl_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
- ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrl_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
- ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrl_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
@@ -1145,22 +1858,33 @@ define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i
declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
- ; CHECK-LABEL: test_x86_avx512_psrl_q
- ; CHECK: vpsrlq
+; CHECK-LABEL: test_x86_avx512_psrl_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
- ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrl_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
- ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrl_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
@@ -1168,22 +1892,31 @@ define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8
declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
- ; CHECK-LABEL: test_x86_avx512_psra_d
- ; CHECK: vpsrad
+; CHECK-LABEL: test_x86_avx512_psra_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psra_d
- ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psra_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
- ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psra_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
@@ -1191,22 +1924,33 @@ define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i
declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
- ; CHECK-LABEL: test_x86_avx512_psra_q
- ; CHECK: vpsraq
+; CHECK-LABEL: test_x86_avx512_psra_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psra_q
- ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psra_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
- ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psra_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
@@ -1214,22 +1958,31 @@ define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8
declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
- ; CHECK-LABEL: test_x86_avx512_psllv_d
- ; CHECK: vpsllvd
+; CHECK-LABEL: test_x86_avx512_psllv_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
- ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psllv_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
- ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psllv_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
@@ -1237,22 +1990,33 @@ define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1,
declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
- ; CHECK-LABEL: test_x86_avx512_psllv_q
- ; CHECK: vpsllvq
+; CHECK-LABEL: test_x86_avx512_psllv_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
- ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psllv_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
- ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psllv_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
@@ -1261,22 +2025,31 @@ declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>,
define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
- ; CHECK-LABEL: test_x86_avx512_psrav_d
- ; CHECK: vpsravd
+; CHECK-LABEL: test_x86_avx512_psrav_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
- ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrav_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
- ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrav_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
@@ -1284,22 +2057,33 @@ define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1,
declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
- ; CHECK-LABEL: test_x86_avx512_psrav_q
- ; CHECK: vpsravq
+; CHECK-LABEL: test_x86_avx512_psrav_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
- ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrav_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
- ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrav_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
@@ -1307,22 +2091,31 @@ define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8
declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
- ; CHECK-LABEL: test_x86_avx512_psrlv_d
- ; CHECK: vpsrlvd
+; CHECK-LABEL: test_x86_avx512_psrlv_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
- ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrlv_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
ret <16 x i32> %res
}
define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
- ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
@@ -1330,22 +2123,33 @@ define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1,
declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
- ; CHECK-LABEL: test_x86_avx512_psrlv_q
- ; CHECK: vpsrlvq
+; CHECK-LABEL: test_x86_avx512_psrlv_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
- ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrlv_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
- ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
- ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
@@ -1353,8 +2157,10 @@ define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8
declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
- ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
- ; CHECK: vpsrlvq (%
+; CHECK-LABEL: test_x86_avx512_psrlv_q_memop:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsrlvq (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
%b = load <8 x i64>, <8 x i64>* %ptr
%res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
@@ -1365,64 +2171,80 @@ declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>
declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
- ; CHECK-LABEL: test_vsubps_rn
- ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
+; CHECK-LABEL: test_vsubps_rn:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> zeroinitializer, i16 -1, i32 0)
ret <16 x float> %res
}
define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
- ; CHECK-LABEL: test_vsubps_rd
- ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
+; CHECK-LABEL: test_vsubps_rd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> zeroinitializer, i16 -1, i32 1)
ret <16 x float> %res
}
define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
- ; CHECK-LABEL: test_vsubps_ru
- ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
+; CHECK-LABEL: test_vsubps_ru:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> zeroinitializer, i16 -1, i32 2)
ret <16 x float> %res
}
define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
- ; CHECK-LABEL: test_vsubps_rz
- ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
+; CHECK-LABEL: test_vsubps_rz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> zeroinitializer, i16 -1, i32 3)
ret <16 x float> %res
}
define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
- ; CHECK-LABEL: test_vmulps_rn
- ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_rn:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> zeroinitializer, i16 -1, i32 0)
ret <16 x float> %res
}
define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
- ; CHECK-LABEL: test_vmulps_rd
- ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_rd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> zeroinitializer, i16 -1, i32 1)
ret <16 x float> %res
}
define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
- ; CHECK-LABEL: test_vmulps_ru
- ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_ru:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> zeroinitializer, i16 -1, i32 2)
ret <16 x float> %res
}
define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
- ; CHECK-LABEL: test_vmulps_rz
- ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_rz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> zeroinitializer, i16 -1, i32 3)
ret <16 x float> %res
@@ -1430,32 +2252,44 @@ define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
;; mask float
define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ; CHECK-LABEL: test_vmulps_mask_rn
- ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_mask_rn:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> zeroinitializer, i16 %mask, i32 0)
ret <16 x float> %res
}
define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ; CHECK-LABEL: test_vmulps_mask_rd
- ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_mask_rd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> zeroinitializer, i16 %mask, i32 1)
ret <16 x float> %res
}
define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ; CHECK-LABEL: test_vmulps_mask_ru
- ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_mask_ru:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> zeroinitializer, i16 %mask, i32 2)
ret <16 x float> %res
}
define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ; CHECK-LABEL: test_vmulps_mask_rz
- ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_mask_rz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> zeroinitializer, i16 %mask, i32 3)
ret <16 x float> %res
@@ -1463,32 +2297,48 @@ define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16
;; With Passthru value
define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
- ; CHECK-LABEL: test_vmulps_mask_passthru_rn
- ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
+; CHECK-LABEL: test_vmulps_mask_passthru_rn:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> %passthru, i16 %mask, i32 0)
ret <16 x float> %res
}
define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
- ; CHECK-LABEL: test_vmulps_mask_passthru_rd
- ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
+; CHECK-LABEL: test_vmulps_mask_passthru_rd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> %passthru, i16 %mask, i32 1)
ret <16 x float> %res
}
define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
- ; CHECK-LABEL: test_vmulps_mask_passthru_ru
- ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
+; CHECK-LABEL: test_vmulps_mask_passthru_ru:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> %passthru, i16 %mask, i32 2)
ret <16 x float> %res
}
define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
- ; CHECK-LABEL: test_vmulps_mask_passthru_rz
- ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
+; CHECK-LABEL: test_vmulps_mask_passthru_rz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
<16 x float> %passthru, i16 %mask, i32 3)
ret <16 x float> %res
@@ -1496,47 +2346,69 @@ define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float>
;; mask double
define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
- ; CHECK-LABEL: test_vmulpd_mask_rn
- ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
+; CHECK-LABEL: test_vmulpd_mask_rn:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
<8 x double> zeroinitializer, i8 %mask, i32 0)
ret <8 x double> %res
}
define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
- ; CHECK-LABEL: test_vmulpd_mask_rd
- ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
+; CHECK-LABEL: test_vmulpd_mask_rd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
<8 x double> zeroinitializer, i8 %mask, i32 1)
ret <8 x double> %res
}
define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
- ; CHECK-LABEL: test_vmulpd_mask_ru
- ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
+; CHECK-LABEL: test_vmulpd_mask_ru:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
<8 x double> zeroinitializer, i8 %mask, i32 2)
ret <8 x double> %res
}
define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
- ; CHECK-LABEL: test_vmulpd_mask_rz
- ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
+; CHECK-LABEL: test_vmulpd_mask_rz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
<8 x double> zeroinitializer, i8 %mask, i32 3)
ret <8 x double> %res
}
define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
- ;CHECK-LABEL: test_xor_epi32
- ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
+; CHECK-LABEL: test_xor_epi32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
- ;CHECK-LABEL: test_mask_xor_epi32
- ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
+; CHECK-LABEL: test_mask_xor_epi32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
}
@@ -1544,15 +2416,21 @@ define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %
declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
- ;CHECK-LABEL: test_or_epi32
- ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
+; CHECK-LABEL: test_or_epi32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
- ;CHECK-LABEL: test_mask_or_epi32
- ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
+; CHECK-LABEL: test_mask_or_epi32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
}
@@ -1560,15 +2438,21 @@ define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %p
declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
- ;CHECK-LABEL: test_and_epi32
- ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
+; CHECK-LABEL: test_and_epi32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
- ;CHECK-LABEL: test_mask_and_epi32
- ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
+; CHECK-LABEL: test_mask_and_epi32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
}
@@ -1576,15 +2460,22 @@ define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %
declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
- ;CHECK-LABEL: test_xor_epi64
- ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
+; CHECK-LABEL: test_xor_epi64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_xor_epi64
- ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
+; CHECK-LABEL: test_mask_xor_epi64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
}
@@ -1592,15 +2483,22 @@ define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %pass
declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
- ;CHECK-LABEL: test_or_epi64
- ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
+; CHECK-LABEL: test_or_epi64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_or_epi64
- ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
+; CHECK-LABEL: test_mask_or_epi64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
}
@@ -1608,15 +2506,22 @@ define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passT
declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
- ;CHECK-LABEL: test_and_epi64
- ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
+; CHECK-LABEL: test_and_epi64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_and_epi64
- ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
+; CHECK-LABEL: test_mask_and_epi64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
}
@@ -1625,53 +2530,73 @@ declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i6
define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
- ;CHECK-LABEL: test_mask_add_epi32_rr
- ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
+; CHECK-LABEL: test_mask_add_epi32_rr:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
- ;CHECK-LABEL: test_mask_add_epi32_rrk
- ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
+; CHECK-LABEL: test_mask_add_epi32_rrk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
- ;CHECK-LABEL: test_mask_add_epi32_rrkz
- ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
+; CHECK-LABEL: test_mask_add_epi32_rrkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
- ;CHECK-LABEL: test_mask_add_epi32_rm
- ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
+; CHECK-LABEL: test_mask_add_epi32_rm:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
- ;CHECK-LABEL: test_mask_add_epi32_rmk
- ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
+; CHECK-LABEL: test_mask_add_epi32_rmk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
- ;CHECK-LABEL: test_mask_add_epi32_rmkz
- ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
+; CHECK-LABEL: test_mask_add_epi32_rmkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
- ;CHECK-LABEL: test_mask_add_epi32_rmb
- ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
+; CHECK-LABEL: test_mask_add_epi32_rmb:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0
+; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -1680,8 +2605,12 @@ define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
}
define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
- ;CHECK-LABEL: test_mask_add_epi32_rmbk
- ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
+; CHECK-LABEL: test_mask_add_epi32_rmbk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -1690,8 +2619,11 @@ define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i3
}
define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
- ;CHECK-LABEL: test_mask_add_epi32_rmbkz
- ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
+; CHECK-LABEL: test_mask_add_epi32_rmbkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -1702,53 +2634,73 @@ define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %ma
declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
- ;CHECK-LABEL: test_mask_sub_epi32_rr
- ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
+; CHECK-LABEL: test_mask_sub_epi32_rr:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
- ;CHECK-LABEL: test_mask_sub_epi32_rrk
- ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
+; CHECK-LABEL: test_mask_sub_epi32_rrk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
- ;CHECK-LABEL: test_mask_sub_epi32_rrkz
- ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
+; CHECK-LABEL: test_mask_sub_epi32_rrkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
- ;CHECK-LABEL: test_mask_sub_epi32_rm
- ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
+; CHECK-LABEL: test_mask_sub_epi32_rm:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
- ;CHECK-LABEL: test_mask_sub_epi32_rmk
- ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
+; CHECK-LABEL: test_mask_sub_epi32_rmk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
- ;CHECK-LABEL: test_mask_sub_epi32_rmkz
- ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
+; CHECK-LABEL: test_mask_sub_epi32_rmkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
- ;CHECK-LABEL: test_mask_sub_epi32_rmb
- ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
+; CHECK-LABEL: test_mask_sub_epi32_rmb:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0
+; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -1757,8 +2709,12 @@ define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
}
define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
- ;CHECK-LABEL: test_mask_sub_epi32_rmbk
- ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
+; CHECK-LABEL: test_mask_sub_epi32_rmbk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -1767,8 +2723,11 @@ define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i3
}
define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
- ;CHECK-LABEL: test_mask_sub_epi32_rmbkz
- ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
+; CHECK-LABEL: test_mask_sub_epi32_rmbkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -1779,53 +2738,77 @@ define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %ma
declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
- ;CHECK-LABEL: test_mask_add_epi64_rr
- ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
+; CHECK-LABEL: test_mask_add_epi64_rr:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_add_epi64_rrk
- ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
+; CHECK-LABEL: test_mask_add_epi64_rrk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
- ;CHECK-LABEL: test_mask_add_epi64_rrkz
- ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
+; CHECK-LABEL: test_mask_add_epi64_rrkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
- ;CHECK-LABEL: test_mask_add_epi64_rm
- ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
+; CHECK-LABEL: test_mask_add_epi64_rm:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
%b = load <8 x i64>, <8 x i64>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_add_epi64_rmk
- ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
+; CHECK-LABEL: test_mask_add_epi64_rmk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%b = load <8 x i64>, <8 x i64>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
- ;CHECK-LABEL: test_mask_add_epi64_rmkz
- ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
+; CHECK-LABEL: test_mask_add_epi64_rmkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%b = load <8 x i64>, <8 x i64>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
- ;CHECK-LABEL: test_mask_add_epi64_rmb
- ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
+; CHECK-LABEL: test_mask_add_epi64_rmb:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1834,8 +2817,13 @@ define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
}
define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_add_epi64_rmbk
- ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
+; CHECK-LABEL: test_mask_add_epi64_rmbk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1844,8 +2832,12 @@ define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64>
}
define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
- ;CHECK-LABEL: test_mask_add_epi64_rmbkz
- ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
+; CHECK-LABEL: test_mask_add_epi64_rmbkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1856,53 +2848,77 @@ define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask)
declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
- ;CHECK-LABEL: test_mask_sub_epi64_rr
- ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
+; CHECK-LABEL: test_mask_sub_epi64_rr:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_sub_epi64_rrk
- ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
+; CHECK-LABEL: test_mask_sub_epi64_rrk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
- ;CHECK-LABEL: test_mask_sub_epi64_rrkz
- ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
+; CHECK-LABEL: test_mask_sub_epi64_rrkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
- ;CHECK-LABEL: test_mask_sub_epi64_rm
- ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
+; CHECK-LABEL: test_mask_sub_epi64_rm:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
%b = load <8 x i64>, <8 x i64>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_sub_epi64_rmk
- ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
+; CHECK-LABEL: test_mask_sub_epi64_rmk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%b = load <8 x i64>, <8 x i64>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
- ;CHECK-LABEL: test_mask_sub_epi64_rmkz
- ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
+; CHECK-LABEL: test_mask_sub_epi64_rmkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%b = load <8 x i64>, <8 x i64>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
- ;CHECK-LABEL: test_mask_sub_epi64_rmb
- ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
+; CHECK-LABEL: test_mask_sub_epi64_rmb:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1911,8 +2927,13 @@ define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
}
define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_sub_epi64_rmbk
- ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
+; CHECK-LABEL: test_mask_sub_epi64_rmbk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1921,8 +2942,12 @@ define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64>
}
define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
- ;CHECK-LABEL: test_mask_sub_epi64_rmbkz
- ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
+; CHECK-LABEL: test_mask_sub_epi64_rmbkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
%b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1933,53 +2958,77 @@ define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask)
declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
- ;CHECK-LABEL: test_mask_mul_epi32_rr
- ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
+; CHECK-LABEL: test_mask_mul_epi32_rr:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_mul_epi32_rrk
- ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
+; CHECK-LABEL: test_mask_mul_epi32_rrk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
- ;CHECK-LABEL: test_mask_mul_epi32_rrkz
- ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
+; CHECK-LABEL: test_mask_mul_epi32_rrkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
- ;CHECK-LABEL: test_mask_mul_epi32_rm
- ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
+; CHECK-LABEL: test_mask_mul_epi32_rm:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_mul_epi32_rmk
- ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
+; CHECK-LABEL: test_mask_mul_epi32_rmk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
- ;CHECK-LABEL: test_mask_mul_epi32_rmkz
- ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
+; CHECK-LABEL: test_mask_mul_epi32_rmkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
- ;CHECK-LABEL: test_mask_mul_epi32_rmb
- ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
+; CHECK-LABEL: test_mask_mul_epi32_rmb:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
%b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1989,8 +3038,13 @@ define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
}
define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_mul_epi32_rmbk
- ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
+; CHECK-LABEL: test_mask_mul_epi32_rmbk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
%b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -2000,8 +3054,12 @@ define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64>
}
define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
- ;CHECK-LABEL: test_mask_mul_epi32_rmbkz
- ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
+; CHECK-LABEL: test_mask_mul_epi32_rmbkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
%b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -2013,53 +3071,77 @@ define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask
declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
- ;CHECK-LABEL: test_mask_mul_epu32_rr
- ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
+; CHECK-LABEL: test_mask_mul_epu32_rr:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_mul_epu32_rrk
- ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
+; CHECK-LABEL: test_mask_mul_epu32_rrk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
- ;CHECK-LABEL: test_mask_mul_epu32_rrkz
- ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
+; CHECK-LABEL: test_mask_mul_epu32_rrkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
- ;CHECK-LABEL: test_mask_mul_epu32_rm
- ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
+; CHECK-LABEL: test_mask_mul_epu32_rm:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_mul_epu32_rmk
- ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
+; CHECK-LABEL: test_mask_mul_epu32_rmk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
- ;CHECK-LABEL: test_mask_mul_epu32_rmkz
- ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
+; CHECK-LABEL: test_mask_mul_epu32_rmkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
ret < 8 x i64> %res
}
define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
- ;CHECK-LABEL: test_mask_mul_epu32_rmb
- ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
+; CHECK-LABEL: test_mask_mul_epu32_rmb:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
%b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -2069,8 +3151,13 @@ define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
}
define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
- ;CHECK-LABEL: test_mask_mul_epu32_rmbk
- ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
+; CHECK-LABEL: test_mask_mul_epu32_rmbk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
%b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -2080,8 +3167,12 @@ define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64>
}
define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
- ;CHECK-LABEL: test_mask_mul_epu32_rmbkz
- ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
+; CHECK-LABEL: test_mask_mul_epu32_rmbkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%q = load i64, i64* %ptr_b
%vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
%b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -2093,53 +3184,73 @@ define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask
declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
- ;CHECK-LABEL: test_mask_mullo_epi32_rr_512
- ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
+; CHECK-LABEL: test_mask_mullo_epi32_rr_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
ret <16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
- ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
- ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
+; CHECK-LABEL: test_mask_mullo_epi32_rrk_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
- ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
- ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
+; CHECK-LABEL: test_mask_mullo_epi32_rrkz_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
- ;CHECK-LABEL: test_mask_mullo_epi32_rm_512
- ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
+; CHECK-LABEL: test_mask_mullo_epi32_rm_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
- ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
- ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
+; CHECK-LABEL: test_mask_mullo_epi32_rmk_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
- ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
- ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
+; CHECK-LABEL: test_mask_mullo_epi32_rmkz_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
ret < 16 x i32> %res
}
define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
- ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
- ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
+; CHECK-LABEL: test_mask_mullo_epi32_rmb_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0
+; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -2148,8 +3259,12 @@ define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
}
define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
- ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
- ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
+; CHECK-LABEL: test_mask_mullo_epi32_rmbk_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -2158,8 +3273,11 @@ define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <1
}
define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
- ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
- ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
+; CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -2170,359 +3288,515 @@ define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i
declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae
- ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae
- ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae
- ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae
- ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_add_round_ps_current
- ;CHECK: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_add_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae
- ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae
- ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae
- ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae
- ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_add_round_ps_current
- ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_add_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_add_round_ps_rn_sae
- ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_add_round_ps_rn_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
ret <16 x float> %res
}
define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_add_round_ps_rd_sae
- ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_add_round_ps_rd_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
ret <16 x float> %res
}
define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_add_round_ps_ru_sae
- ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_add_round_ps_ru_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
ret <16 x float> %res
}
define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_add_round_ps_rz_sae
- ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_add_round_ps_rz_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
ret <16 x float> %res
}
define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_add_round_ps_current
- ;CHECK: vaddps %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_add_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae
- ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae
- ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae
- ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae
- ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_sub_round_ps_current
- ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_sub_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_sub_round_ps_rn_sae
- ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
ret <16 x float> %res
}
define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_sub_round_ps_rd_sae
- ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
ret <16 x float> %res
}
define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_sub_round_ps_ru_sae
- ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
ret <16 x float> %res
}
define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_sub_round_ps_rz_sae
- ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
ret <16 x float> %res
}
define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_sub_round_ps_current
- ;CHECK: vsubps %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_sub_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae
- ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae
- ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae
- ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae
- ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_div_round_ps_current
- ;CHECK: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_div_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae
- ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae
- ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae
- ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae
- ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_div_round_ps_current
- ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_div_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_div_round_ps_rn_sae
- ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_div_round_ps_rn_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
ret <16 x float> %res
}
define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_div_round_ps_rd_sae
- ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_div_round_ps_rd_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
ret <16 x float> %res
}
define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_div_round_ps_ru_sae
- ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_div_round_ps_ru_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
ret <16 x float> %res
}
define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_div_round_ps_rz_sae
- ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_div_round_ps_rz_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
ret <16 x float> %res
}
define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_div_round_ps_current
- ;CHECK: vdivps %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_div_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_min_round_ps_sae
- ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_min_round_ps_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_min_round_ps_current
- ;CHECK: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_min_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_min_round_ps_sae
- ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_min_round_ps_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_min_round_ps_current
- ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_min_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_min_round_ps_sae
- ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_min_round_ps_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_min_round_ps_current
- ;CHECK: vminps %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_min_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_max_round_ps_sae
- ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_max_round_ps_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_maskz_max_round_ps_current
- ;CHECK: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_max_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_max_round_ps_sae
- ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_max_round_ps_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
- ;CHECK-LABEL: test_mm512_mask_max_round_ps_current
- ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_max_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
ret <16 x float> %res
}
define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_max_round_ps_sae
- ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_max_round_ps_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
- ;CHECK-LABEL: test_mm512_max_round_ps_current
- ;CHECK: vmaxps %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_max_round_ps_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
ret <16 x float> %res
}
@@ -2531,50 +3805,81 @@ declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>
declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_ss_rn
-; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_ss_rn:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
ret <4 x float> %res
}
define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_ss_rd
-; CHECK: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_ss_rd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
ret <4 x float> %res
}
define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_ss_ru
-; CHECK: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_ss_ru:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
ret <4 x float> %res
}
define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_ss_rz
-; CHECK: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_ss_rz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
ret <4 x float> %res
}
define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_ss_current
-; CHECK: vaddss %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_ss_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
ret <4 x float> %res
}
define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
-; CHECK-LABEL: test_maskz_add_ss_rn
-; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-LABEL: test_maskz_add_ss_rn:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
ret <4 x float> %res
}
define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
-; CHECK-LABEL: test_add_ss_rn
-; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-LABEL: test_add_ss_rn:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
ret <4 x float> %res
}
@@ -2582,50 +3887,81 @@ define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_sd_rn
-; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_sd_rn:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
ret <2 x double> %res
}
define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_sd_rd
-; CHECK: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_sd_rd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
ret <2 x double> %res
}
define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_sd_ru
-; CHECK: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_sd_ru:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
ret <2 x double> %res
}
define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_sd_rz
-; CHECK: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_sd_rz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
ret <2 x double> %res
}
define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_sd_current
-; CHECK: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_sd_current:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
ret <2 x double> %res
}
define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
-; CHECK-LABEL: test_maskz_add_sd_rn
-; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-LABEL: test_maskz_add_sd_rn:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
ret <2 x double> %res
}
define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_add_sd_rn
-; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-LABEL: test_add_sd_rn:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
ret <2 x double> %res
}
@@ -2633,86 +3969,130 @@ define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_max_ss_sae
-; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_max_ss_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
ret <4 x float> %res
}
define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
-; CHECK-LABEL: test_maskz_max_ss_sae
-; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-LABEL: test_maskz_max_ss_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
ret <4 x float> %res
}
define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
-; CHECK-LABEL: test_max_ss_sae
-; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-LABEL: test_max_ss_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmaxss {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
ret <4 x float> %res
}
define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_max_ss
-; CHECK: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_max_ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
ret <4 x float> %res
}
define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
-; CHECK-LABEL: test_maskz_max_ss
-; CHECK: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-LABEL: test_maskz_max_ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
ret <4 x float> %res
}
define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
-; CHECK-LABEL: test_max_ss
-; CHECK: vmaxss %xmm1, %xmm0, %xmm0
+; CHECK-LABEL: test_max_ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
ret <4 x float> %res
}
declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_max_sd_sae
-; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_max_sd_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
ret <2 x double> %res
}
define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
-; CHECK-LABEL: test_maskz_max_sd_sae
-; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-LABEL: test_maskz_max_sd_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
ret <2 x double> %res
}
define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_max_sd_sae
-; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-LABEL: test_max_sd_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
ret <2 x double> %res
}
define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_max_sd
-; CHECK: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_max_sd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
ret <2 x double> %res
}
define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
-; CHECK-LABEL: test_maskz_max_sd
-; CHECK: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-LABEL: test_maskz_max_sd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
ret <2 x double> %res
}
define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_max_sd
-; CHECK: vmaxsd %xmm1, %xmm0, %xmm0
+; CHECK-LABEL: test_max_sd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
ret <2 x double> %res
}
@@ -2720,8 +4100,8 @@ define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -2730,8 +4110,8 @@ declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwin
define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -2740,8 +4120,8 @@ declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwin
define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -2750,8 +4130,8 @@ declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind
define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -2760,8 +4140,8 @@ declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind
define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT: retq
{
%res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
ret <4 x float> %res
@@ -2770,9 +4150,9 @@ define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
; CHECK: ## BB#0:
-; CHECK-NEXT: movl (%rdi), %eax
-; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: movl (%rdi), %eax
+; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT: retq
{
%b = load i32, i32* %ptr
%res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
@@ -2782,8 +4162,8 @@ define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32*
define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
+; CHECK-NEXT: retq
{
%res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
ret <4 x float> %res
@@ -2793,7 +4173,7 @@ define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
; CHECK: ## BB#0:
; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: retq
{
%b = load i32, i32* %ptr
%res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
@@ -2804,8 +4184,8 @@ declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind r
define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
; CHECK-LABEL: _mm_cvt_roundu64_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT: retq
{
%res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
ret <4 x float> %res
@@ -2814,8 +4194,8 @@ define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
; CHECK-LABEL: _mm_cvtu64_ss:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
+; CHECK-NEXT: retq
{
%res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
ret <4 x float> %res
@@ -2825,8 +4205,8 @@ declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind
define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
+; CHECK-NEXT: retq
{
%res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
ret <2 x double> %res
@@ -2836,8 +4216,8 @@ declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind read
define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT: retq
{
%res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
ret <2 x double> %res
@@ -2846,8 +4226,8 @@ define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
; CHECK: ## BB#0:
-; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
+; CHECK-NEXT: retq
{
%res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
ret <2 x double> %res
@@ -2855,7 +4235,10 @@ define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b
declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
- ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
+; CHECK-LABEL: test_vpmaxq:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
<8 x i64>zeroinitializer, i8 -1)
ret <8 x i64> %res
@@ -2863,7 +4246,10 @@ define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
- ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
+; CHECK-LABEL: test_vpminud:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
<16 x i32>zeroinitializer, i16 -1)
ret <16 x i32> %res
@@ -2871,29 +4257,39 @@ define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
- ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
+; CHECK-LABEL: test_vpmaxsd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
<16 x i32>zeroinitializer, i16 -1)
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_512
-; CHECK-NOT: call
-; CHECK: vpmaxsd %zmm
-; CHECK: {%k1}
define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
%res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_512
-; CHECK-NOT: call
-; CHECK: vpmaxsq %zmm
-; CHECK: {%k1}
define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
%res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
%res2 = add <8 x i64> %res, %res1
@@ -2902,11 +4298,14 @@ define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %
declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_512
-; CHECK-NOT: call
-; CHECK: vpmaxud %zmm
-; CHECK: {%k1}
define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
%res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
%res2 = add <16 x i32> %res, %res1
@@ -2915,11 +4314,15 @@ define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32
declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_512
-; CHECK-NOT: call
-; CHECK: vpmaxuq %zmm
-; CHECK: {%k1}
define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
%res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
%res2 = add <8 x i64> %res, %res1
@@ -2928,11 +4331,14 @@ define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %
declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_512
-; CHECK-NOT: call
-; CHECK: vpminsd %zmm
-; CHECK: {%k1}
define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmins_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
%res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
%res2 = add <16 x i32> %res, %res1
@@ -2941,22 +4347,29 @@ define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32
declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_512
-; CHECK-NOT: call
-; CHECK: vpminsq %zmm
-; CHECK: {%k1}
define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmins_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
%res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}
-; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_512
-; CHECK-NOT: call
-; CHECK: vpminud %zmm
-; CHECK: {%k1}
define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pminu_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
%res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
%res2 = add <16 x i32> %res, %res1
@@ -2965,11 +4378,15 @@ define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32
declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_512
-; CHECK-NOT: call
-; CHECK: vpminuq %zmm
-; CHECK: {%k1}
define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pminu_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
%res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
%res2 = add <8 x i64> %res, %res1
@@ -2978,24 +4395,34 @@ define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %
declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermi2d {{.*}}{%k1}
-define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vpermi2d %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %x2 = load <16 x i32>, <16 x i32>* %x2p
%res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermi2pd {{.*}}{%k1}
define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vaddpd %zmm1, %zmm3, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
%res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
%res2 = fadd <8 x double> %res, %res1
@@ -3004,11 +4431,15 @@ define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0,
declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermi2ps {{.*}}{%k1}
define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
%res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
%res2 = fadd <16 x float> %res, %res1
@@ -3017,11 +4448,16 @@ define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0,
declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermi2q {{.*}}{%k1}
define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
%res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
%res2 = add <8 x i64> %res, %res1
@@ -3030,37 +4466,54 @@ define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i
declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermt2d {{.*}}{%k1} {z}
-define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm2
+; CHECK-NEXT: vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpermt2d %zmm1, %zmm0, %zmm1
+; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %x2 = load <16 x i32>, <16 x i32>* %x2p
%res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
- %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+ %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}
declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermt2pd {{.*}}{%k1} {z}
-define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
+define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm2
+; CHECK-NEXT: vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpermt2pd %zmm1, %zmm0, %zmm1
+; CHECK-NEXT: vaddpd %zmm1, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %x2s = load double, double* %x2ptr
+ %x2ins = insertelement <8 x double> undef, double %x2s, i32 0
+ %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
%res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
- %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
+ %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1)
%res2 = fadd <8 x double> %res, %res1
ret <8 x double> %res2
}
declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermt2ps {{.*}}{%k1} {z}
define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vpermt2ps %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vaddps %zmm1, %zmm3, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
%res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
%res2 = fadd <16 x float> %res, %res1
@@ -3070,11 +4523,16 @@ define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0,
declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermt2q {{.*}}{%k1} {z}
define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vpermt2q %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vpaddq %zmm1, %zmm3, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
%res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
%res2 = add <8 x i64> %res, %res1
@@ -3083,12 +4541,15 @@ define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x
declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermt2d {{.*}}{%k1}
-; CHECK-NOT: {z}
define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm1, %zmm3
+; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT: vpermt2d %zmm2, %zmm0, %zmm1
+; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
%res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
%res2 = add <16 x i32> %res, %res1
@@ -3096,11 +4557,15 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16
}
declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vscalefpd{{.*}}{%k1}
define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vscalefpd {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vscalefpd {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
%res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
%res2 = fadd <8 x double> %res, %res1
@@ -3108,13 +4573,1849 @@ define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8
}
declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vscalefps{{.*}}{%k1}
define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vscalefps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vscalefps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
%res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
%res2 = fadd <16 x float> %res, %res1
ret <16 x float> %res2
}
+
+declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
+; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
+; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
+; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
+; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm3 = k1[0],zmm0[0],k1[2],zmm0[2],k1[4],zmm0[4],k1[6],zmm0[6]
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res2, %res3
+ ret <8 x i64> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
+; CHECK-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
+; CHECK-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpunpckldq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
+; CHECK-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovqb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqb %zmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovqb %zmm0, (%rdi)
+; CHECK-NEXT: vpmovqb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsqb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqb %zmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsqb %zmm0, (%rdi)
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovusqb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqb %zmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovusqb %zmm0, (%rdi)
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovqw %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqw %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqw %zmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovqw %zmm0, (%rdi)
+; CHECK-NEXT: vpmovqw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsqw %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqw %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqw %zmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsqw %zmm0, (%rdi)
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovusqw %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqw %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqw %zmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovusqw %zmm0, (%rdi)
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovqd %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovqd %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovqd %zmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i32> %res0, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovqd %zmm0, (%rdi)
+; CHECK-NEXT: vpmovqd %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsqd %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsqd %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqd %zmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i32> %res0, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsqd %zmm0, (%rdi)
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovusqd %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovusqd %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqd %zmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i32> %res0, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovusqd %zmm0, (%rdi)
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovdb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovdb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovdb %zmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovdb %zmm0, (%rdi)
+; CHECK-NEXT: vpmovdb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsdb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsdb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsdb %zmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsdb %zmm0, (%rdi)
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovusdb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusdb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusdb %zmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovusdb %zmm0, (%rdi)
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovdw %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovdw %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovdw %zmm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i16> %res0, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovdw %zmm0, (%rdi)
+; CHECK-NEXT: vpmovdw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsdw %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsdw %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsdw %zmm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i16> %res0, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsdw %zmm0, (%rdi)
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovusdw %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovusdw %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovusdw %zmm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i16> %res0, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovusdw %zmm0, (%rdi)
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
+
+define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
+
+define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtps2dq {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtps2pd %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtps2pd {sae}, %ymm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
+
+define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtps2udq {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+
+declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
+
+define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
+
+define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+
+declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
+define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
+define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
+
+define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+; CHECK-LABEL: test_getexp_ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
+; CHECK-NEXT: vgetexpss {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vaddps %xmm0, %xmm4, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
+ %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
+
+ %res.1 = fadd <4 x float> %res0, %res1
+ %res.2 = fadd <4 x float> %res2, %res3
+ %res = fadd <4 x float> %res.1, %res.2
+ ret <4 x float> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
+
+define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+; CHECK-LABEL: test_getexp_sd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vgetexpsd %xmm1, %xmm0, %xmm4
+; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vaddpd %xmm2, %xmm3, %xmm1
+; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
+ %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
+
+ %res.1 = fadd <2 x double> %res0, %res1
+ %res.2 = fadd <2 x double> %res2, %res3
+ %res = fadd <2 x double> %res.1, %res.2
+ ret <2 x double> %res
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
+
+define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: shlb $7, %al
+; CHECK-NEXT: sarb $7, %al
+; CHECK-NEXT: retq
+
+ %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
+ ret i8 %res4
+}
+
+define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcmpunordsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT: vcmplesd %xmm1, %xmm0, %k1
+; CHECK-NEXT: korw %k0, %k1, %k0
+; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k1
+; CHECK-NEXT: vcmpneqsd %xmm1, %xmm0, %k2
+; CHECK-NEXT: korw %k1, %k2, %k1
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k2
+; CHECK-NEXT: kandw %k2, %k1, %k1
+; CHECK-NEXT: korw %k1, %k0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: shlb $7, %al
+; CHECK-NEXT: sarb $7, %al
+; CHECK-NEXT: retq
+
+ %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
+ %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8)
+ %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4)
+ %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
+
+ %res11 = or i8 %res1, %res2
+ %res12 = or i8 %res3, %res4
+ %res13 = or i8 %res11, %res12
+ ret i8 %res13
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
+
+define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: shlb $7, %al
+; CHECK-NEXT: sarb $7, %al
+; CHECK-NEXT: retq
+
+ %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
+ ret i8 %res2
+}
+
+
+define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcmpless %xmm1, %xmm0, %k1
+; CHECK-NEXT: vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k1
+; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k2
+; CHECK-NEXT: kandw %k2, %k1, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: shlb $7, %al
+; CHECK-NEXT: sarb $7, %al
+; CHECK-NEXT: retq
+ %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
+ %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
+ %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4)
+ %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8)
+
+ %res11 = and i8 %res1, %res2
+ %res12 = and i8 %res3, %res4
+ %res13 = and i8 %res11, %res12
+ ret i8 %res13
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
+; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
+; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
+; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
+; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
+
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res3, %res2
+ ret <8 x double> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
+; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
+; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vgetmantpd $11,{sae}, %zmm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vgetmantps $11,{sae}, %zmm0, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32)
+
+define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm2, %zmm3
+; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
+; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
+; CHECK-NEXT: vgetmantsd $11,{sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0
+; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1
+; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8)
+ %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4)
+ %res11 = fadd <2 x double> %res, %res1
+ %res12 = fadd <2 x double> %res2, %res3
+ %res13 = fadd <2 x double> %res11, %res12
+ ret <2 x double> %res13
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4
+; CHECK-NEXT: vgetmantss $11,{sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8)
+ %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4)
+ %res11 = fadd <4 x float> %res, %res1
+ %res12 = fadd <4 x float> %res2, %res3
+ %res13 = fadd <4 x float> %res11, %res12
+ ret <4 x float> %res13
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vshufpd {{.*#+}} zmm2 = zmm2[0],k1[1],zmm2[3],k1[2],zmm2[5],k1[4],zmm2[6],k1[6]
+; CHECK-NEXT: vshufpd {{.*#+}} zmm3 = k1[0],zmm0[1],k1[3],zmm0[2],k1[5],zmm0[4],k1[6],zmm0[6]
+; CHECK-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
+; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
+
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res3, %res2
+ ret <8 x double> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vshufps {{.*#+}} zmm2 = zmm2[2,1],k1[1,0],zmm2[6,5],k1[5,4],zmm2[10,9],k1[9,8],zmm2[14,13],k1[13,12]
+; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 = zmm1[0,1,3,2,5,4,6,6]
+; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 = k1[0,1,3,2,5,4,6,6]
+; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,3,2,5,4,6,6]
+; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res3, %res2
+ ret <8 x double> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermilps {{.*#+}} zmm1 = zmm1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
+; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = k1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
+; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
+; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res3, %res2
+ ret <16 x float> %res4
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res2, %res3
+ ret <8 x double> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8)
+
+define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 %x4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 -1)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i8 %x4)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i8)
+
+define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 %x4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 -1)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i8 %x4)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res2, %res3
+ ret <16 x i32> %res4
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res2, %res3
+ ret <8 x double> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res2, %res3
+ ret <8 x i64> %res4
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float>, <4 x float>, <2 x double>, i8, i32)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<4 x float> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double>, <2 x double>, <4 x float>, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<2 x double> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
+
+define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
+; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
+
+define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
+; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; CHECK-NEXT: vmovsldup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; CHECK-NEXT: vmovshdup {{.*#+}} zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movddup_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovddup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT: vmovddup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res2, %res3
+ ret <8 x double> %res4
+}
+
+define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retq
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retq
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_comi_sd_eq:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcomisd %xmm1, %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retq
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vucomisd %xmm1, %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retq
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcomisd {sae}, %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retq
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vucomisd {sae}, %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retq
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_comi_sd_lt:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcomisd %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retq
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vucomisd %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retq
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
+ ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
+
+define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) {
+; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vucomiss %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retq
+ %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
+ ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
+declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ ret <4 x float> %res
+}
+
+define <4 x float>@test_int_x86_avx512_mask_move_ss_rrkz(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x2)
+ ret <4 x float> %res
+}
+
+define <4 x float>@test_int_x86_avx512_mask_move_ss_rr(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rr:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8)
+define <2 x double>@test_int_x86_avx512_mask_move_sd_rr(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rr:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 -1)
+ ret <2 x double> %res
+}
+
+define <2 x double>@test_int_x86_avx512_mask_move_sd_rrkz(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrkz:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 %x2)
+ ret <2 x double> %res
+}
+
+define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrk:
+; CHECK: ## BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ ret <2 x double> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
+; CHECK: kmovw %edi, %k1
+; CHECK: vshuff32x4 $0, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshuff32x4 $0, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshuff32x4 $0, %zmm0, %zmm0, %zmm0
+; CHECK: vaddps %zmm1, %zmm0, %zmm0
+; CHECK: vaddps %zmm0, %zmm2, %zmm0
+
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask)
+ %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
+ %res4 = fadd <16 x float> %res1, %res2
+ %res5 = fadd <16 x float> %res3, %res4
+ ret <16 x float> %res5
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
+; CHECK: kmovw %eax, %k1
+; CHECK: vshuff64x2 $68, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshuff64x2 $68, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshuff64x2 $68, %zmm0, %zmm0, %zmm0
+; CHECK: vaddpd %zmm1, %zmm0, %zmm0
+; CHECK: vaddpd %zmm0, %zmm2, %zmm0
+
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 -1)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask)
+ %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
+ %res4 = fadd <8 x double> %res1, %res2
+ %res5 = fadd <8 x double> %res3, %res4
+ ret <8 x double> %res5
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
+; CHECK: kmovw %edi, %k1
+; CHECK: vshufi32x4 $0, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshufi32x4 $0, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshufi32x4 $0, %zmm0, %zmm0, %zmm0
+; CHECK: vpaddd %zmm1, %zmm0, %zmm0
+; CHECK: vpaddd %zmm0, %zmm2, %zmm0
+
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask)
+ %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
+ %res4 = add <16 x i32> %res1, %res2
+ %res5 = add <16 x i32> %res3, %res4
+ ret <16 x i32> %res5
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
+; CHECK: kmovw %eax, %k1
+; CHECK: vshufi64x2 $68, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshufi64x2 $68, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshufi64x2 $68, %zmm0, %zmm0, %zmm0
+; CHECK: vpaddq %zmm1, %zmm0, %zmm0
+; CHECK: vpaddq %zmm0, %zmm2, %zmm0
+
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 -1)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask)
+ %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
+ %res4 = add <8 x i64> %res1, %res2
+ %res5 = add <8 x i64> %res3, %res4
+ ret <8 x i64> %res5
+}
+
diff --git a/test/CodeGen/X86/avx512-logic.ll b/test/CodeGen/X86/avx512-logic.ll
index 140ce3b1ec56..c973b706e8fc 100644
--- a/test/CodeGen/X86/avx512-logic.ll
+++ b/test/CodeGen/X86/avx512-logic.ll
@@ -1,9 +1,14 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
+
-; CHECK-LABEL: vpandd
-; CHECK: vpandd %zmm
-; CHECK: ret
define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: vpandd:
+; ALL: ## BB#0: ## %entry
+; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; ALL-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
%a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
@@ -12,10 +17,12 @@ entry:
ret <16 x i32> %x
}
-; CHECK-LABEL: vpord
-; CHECK: vpord %zmm
-; CHECK: ret
define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: vpord:
+; ALL: ## BB#0: ## %entry
+; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; ALL-NEXT: vpord %zmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
%a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
@@ -24,10 +31,12 @@ entry:
ret <16 x i32> %x
}
-; CHECK-LABEL: vpxord
-; CHECK: vpxord %zmm
-; CHECK: ret
define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: vpxord:
+; ALL: ## BB#0: ## %entry
+; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; ALL-NEXT: vpxord %zmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
%a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
@@ -36,10 +45,12 @@ entry:
ret <16 x i32> %x
}
-; CHECK-LABEL: vpandq
-; CHECK: vpandq %zmm
-; CHECK: ret
define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: vpandq:
+; ALL: ## BB#0: ## %entry
+; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; ALL-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
%a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
@@ -47,10 +58,12 @@ entry:
ret <8 x i64> %x
}
-; CHECK-LABEL: vporq
-; CHECK: vporq %zmm
-; CHECK: ret
define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: vporq:
+; ALL: ## BB#0: ## %entry
+; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; ALL-NEXT: vporq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
%a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
@@ -58,10 +71,12 @@ entry:
ret <8 x i64> %x
}
-; CHECK-LABEL: vpxorq
-; CHECK: vpxorq %zmm
-; CHECK: ret
define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: vpxorq:
+; ALL: ## BB#0: ## %entry
+; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; ALL-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
entry:
; Force the execution domain with an add.
%a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
@@ -70,28 +85,31 @@ entry:
}
-; CHECK-LABEL: orq_broadcast
-; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
-; CHECK: ret
define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
+; ALL-LABEL: orq_broadcast:
+; ALL: ## BB#0:
+; ALL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; ALL-NEXT: retq
%b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
ret <8 x i64> %b
}
-; CHECK-LABEL: andd512fold
-; CHECK: vpandd (%
-; CHECK: ret
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
+; ALL-LABEL: andd512fold:
+; ALL: ## BB#0: ## %entry
+; ALL-NEXT: vpandd (%rdi), %zmm0, %zmm0
+; ALL-NEXT: retq
entry:
%a = load <16 x i32>, <16 x i32>* %x, align 4
%b = and <16 x i32> %y, %a
ret <16 x i32> %b
}
-; CHECK-LABEL: andqbrst
-; CHECK: vpandq (%rdi){1to8}, %zmm
-; CHECK: ret
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
+; ALL-LABEL: andqbrst:
+; ALL: ## BB#0: ## %entry
+; ALL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
+; ALL-NEXT: retq
entry:
%a = load i64, i64* %ap, align 8
%b = insertelement <8 x i64> undef, i64 %a, i32 0
@@ -99,3 +117,93 @@ entry:
%d = and <8 x i64> %p1, %c
ret <8 x i64>%d
}
+
+define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; KNL-LABEL: and_v64i8:
+; KNL: ## BB#0:
+; KNL-NEXT: vandps %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vandps %ymm3, %ymm1, %ymm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: and_v64i8:
+; SKX: ## BB#0:
+; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %res = and <64 x i8> %a, %b
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; KNL-LABEL: or_v64i8:
+; KNL: ## BB#0:
+; KNL-NEXT: vorps %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vorps %ymm3, %ymm1, %ymm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: or_v64i8:
+; SKX: ## BB#0:
+; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %res = or <64 x i8> %a, %b
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; KNL-LABEL: xor_v64i8:
+; KNL: ## BB#0:
+; KNL-NEXT: vxorps %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vxorps %ymm3, %ymm1, %ymm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: xor_v64i8:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %res = xor <64 x i8> %a, %b
+ ret <64 x i8> %res
+}
+
+define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; KNL-LABEL: and_v32i16:
+; KNL: ## BB#0:
+; KNL-NEXT: vandps %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vandps %ymm3, %ymm1, %ymm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: and_v32i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %res = and <32 x i16> %a, %b
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; KNL-LABEL: or_v32i16:
+; KNL: ## BB#0:
+; KNL-NEXT: vorps %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vorps %ymm3, %ymm1, %ymm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: or_v32i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %res = or <32 x i16> %a, %b
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; KNL-LABEL: xor_v32i16:
+; KNL: ## BB#0:
+; KNL-NEXT: vxorps %ymm2, %ymm0, %ymm0
+; KNL-NEXT: vxorps %ymm3, %ymm1, %ymm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: xor_v32i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %res = xor <32 x i16> %a, %b
+ ret <32 x i16> %res
+}
diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll
index d2efd7d6db6e..015c70a6ba08 100644
--- a/test/CodeGen/X86/avx512-mask-op.ll
+++ b/test/CodeGen/X86/avx512-mask-op.ll
@@ -1,39 +1,48 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL --check-prefix=CHECK
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX --check-prefix=CHECK
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; CHECK-LABEL: mask16
-; CHECK: kmovw
-; CHECK-NEXT: knotw
-; CHECK-NEXT: kmovw
define i16 @mask16(i16 %x) {
+; CHECK-LABEL: mask16:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k0
+; CHECK-NEXT: knotw %k0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%m0 = bitcast i16 %x to <16 x i1>
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
%ret = bitcast <16 x i1> %m1 to i16
ret i16 %ret
}
-; CHECK-LABEL: mask8
-; KNL: kmovw
-; KNL-NEXT: knotw
-; KNL-NEXT: kmovw
-; SKX: kmovb
-; SKX-NEXT: knotb
-; SKX-NEXT: kmovb
-
define i8 @mask8(i8 %x) {
+; KNL-LABEL: mask8:
+; KNL: ## BB#0:
+; KNL-NEXT: movzbl %dil, %eax
+; KNL-NEXT: kmovw %eax, %k0
+; KNL-NEXT: knotw %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: mask8:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovb %edi, %k0
+; SKX-NEXT: knotb %k0, %k0
+; SKX-NEXT: kmovb %k0, %eax
+; SKX-NEXT: retq
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
%ret = bitcast <8 x i1> %m1 to i8
ret i8 %ret
}
-; CHECK-LABEL: mask16_mem
-; CHECK: kmovw ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}}
-; CHECK-NEXT: knotw
-; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
-; CHECK: ret
-
define void @mask16_mem(i16* %ptr) {
+; CHECK-LABEL: mask16_mem:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw (%rdi), %k0
+; CHECK-NEXT: knotw %k0, %k0
+; CHECK-NEXT: kmovw %k0, (%rdi)
+; CHECK-NEXT: retq
%x = load i16, i16* %ptr, align 4
%m0 = bitcast i16 %x to <16 x i1>
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@@ -42,15 +51,20 @@ define void @mask16_mem(i16* %ptr) {
ret void
}
-; CHECK-LABEL: mask8_mem
-; KNL: kmovw ([[ARG1]]), %k{{[0-7]}}
-; KNL-NEXT: knotw
-; KNL-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
-; SKX: kmovb ([[ARG1]]), %k{{[0-7]}}
-; SKX-NEXT: knotb
-; SKX-NEXT: kmovb %k{{[0-7]}}, ([[ARG1]])
-
define void @mask8_mem(i8* %ptr) {
+; KNL-LABEL: mask8_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: kmovw (%rdi), %k0
+; KNL-NEXT: knotw %k0, %k0
+; KNL-NEXT: kmovw %k0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: mask8_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovb (%rdi), %k0
+; SKX-NEXT: knotb %k0, %k0
+; SKX-NEXT: kmovb %k0, (%rdi)
+; SKX-NEXT: retq
%x = load i8, i8* %ptr, align 4
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@@ -59,11 +73,16 @@ define void @mask8_mem(i8* %ptr) {
ret void
}
-; CHECK-LABEL: mand16
-; CHECK: kandw
-; CHECK: kxorw
-; CHECK: korw
define i16 @mand16(i16 %x, i16 %y) {
+; CHECK-LABEL: mand16:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k0
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: kandw %k1, %k0, %k2
+; CHECK-NEXT: kxorw %k1, %k0, %k0
+; CHECK-NEXT: korw %k0, %k2, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
%ma = bitcast i16 %x to <16 x i1>
%mb = bitcast i16 %y to <16 x i1>
%mc = and <16 x i1> %ma, %mb
@@ -73,56 +92,68 @@ define i16 @mand16(i16 %x, i16 %y) {
ret i16 %ret
}
-; CHECK-LABEL: shuf_test1
-; CHECK: kshiftrw $8
define i8 @shuf_test1(i16 %v) nounwind {
+; KNL-LABEL: shuf_test1:
+; KNL: ## BB#0:
+; KNL-NEXT: kmovw %edi, %k0
+; KNL-NEXT: kshiftrw $8, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: shuf_test1:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovw %edi, %k0
+; SKX-NEXT: kshiftrw $8, %k0, %k0
+; SKX-NEXT: kmovb %k0, %eax
+; SKX-NEXT: retq
%v1 = bitcast i16 %v to <16 x i1>
%mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%mask1 = bitcast <8 x i1> %mask to i8
ret i8 %mask1
}
-; CHECK-LABEL: zext_test1
-; CHECK: kshiftlw
-; CHECK: kshiftrw
-; CHECK: kmovw
-
define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
+; CHECK-LABEL: zext_test1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
+; CHECK-NEXT: kshiftlw $10, %k0, %k0
+; CHECK-NEXT: kshiftrw $15, %k0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retq
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
%res = zext i1 %cmp_res.i1 to i32
ret i32 %res
-}
-
-; CHECK-LABEL: zext_test2
-; CHECK: kshiftlw
-; CHECK: kshiftrw
-; CHECK: kmovw
-
-define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
+}define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
%res = zext i1 %cmp_res.i1 to i16
ret i16 %res
-}
-
-; CHECK-LABEL: zext_test3
-; CHECK: kshiftlw
-; CHECK: kshiftrw
-; CHECK: kmovw
-
-define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
+}define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
%res = zext i1 %cmp_res.i1 to i8
ret i8 %res
}
-; CHECK-LABEL: conv1
-; KNL: kmovw %k0, %eax
-; KNL: movb %al, (%rdi)
-; SKX: kmovb %k0, (%rdi)
define i8 @conv1(<8 x i1>* %R) {
+; KNL-LABEL: conv1:
+; KNL: ## BB#0: ## %entry
+; KNL-NEXT: kxnorw %k0, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
+; KNL-NEXT: movb $-2, %al
+; KNL-NEXT: retq
+;
+; SKX-LABEL: conv1:
+; SKX: ## BB#0: ## %entry
+; SKX-NEXT: kxnorw %k0, %k0, %k0
+; SKX-NEXT: kmovb %k0, (%rdi)
+; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp)
+; SKX-NEXT: movb $-2, %al
+; SKX-NEXT: retq
entry:
store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
@@ -133,12 +164,27 @@ entry:
ret i8 %mask_convert
}
-; SKX-LABEL: test4
-; SKX: vpcmpgt
-; SKX: knot
-; SKX: vpcmpgt
-; SKX: vpmovm2d
define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
+; KNL-LABEL: test4:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: vpslld $31, %xmm0, %xmm0
+; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1
+; KNL-NEXT: vpmovqd %zmm1, %ymm1
+; KNL-NEXT: vpslld $31, %xmm1, %xmm1
+; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
+; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test4:
+; SKX: ## BB#0:
+; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0
+; SKX-NEXT: knotw %k0, %k1
+; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: retq
%x_gt_y = icmp sgt <4 x i64> %x, %y
%x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
%res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
@@ -146,30 +192,27 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1
ret <4 x i32> %resse
}
-; SKX-LABEL: test5
-; SKX: vpcmpgt
-; SKX: knot
-; SKX: vpcmpgt
-; SKX: vpmovm2q
define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
+; KNL-LABEL: test5:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; KNL-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1
+; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test5:
+; SKX: ## BB#0:
+; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0
+; SKX-NEXT: knotw %k0, %k1
+; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
+; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: retq
%x_gt_y = icmp slt <2 x i64> %x, %y
%x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
%res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
%resse = sext <2 x i1>%res to <2 x i64>
ret <2 x i64> %resse
-}
-
-; KNL-LABEL: test6
-; KNL: vpmovsxbd
-; KNL: vpandd
-; KNL: kmovw %eax, %k1
-; KNL vptestmd {{.*}}, %k0 {%k1}
-
-; SKX-LABEL: test6
-; SKX: vpmovb2m
-; SKX: kmovw %eax, %k1
-; SKX: kandw
-define void @test6(<16 x i1> %mask) {
+}define void @test6(<16 x i1> %mask) {
allocas:
%a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
%b = bitcast <16 x i1> %a to i16
@@ -182,19 +225,30 @@ true:
false:
ret void
}
-
-; KNL-LABEL: test7
-; KNL: vpmovsxwq
-; KNL: vpandq
-; KNL: vptestmq {{.*}}, %k0
-; KNL: korw
-
-; SKX-LABEL: test7
-; SKX: vpmovw2m
-; SKX: kmovb %eax, %k1
-; SKX: korb
-
define void @test7(<8 x i1> %mask) {
+; KNL-LABEL: test7:
+; KNL: ## BB#0: ## %allocas
+; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; KNL-NEXT: movb $85, %al
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: korw %k1, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: testb %al, %al
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test7:
+; SKX: ## BB#0: ## %allocas
+; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT: vpmovw2m %xmm0, %k0
+; SKX-NEXT: movb $85, %al
+; SKX-NEXT: kmovb %eax, %k1
+; SKX-NEXT: korb %k1, %k0, %k0
+; SKX-NEXT: kmovb %k0, %eax
+; SKX-NEXT: testb %al, %al
+; SKX-NEXT: retq
allocas:
%a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
%b = bitcast <8 x i1> %a to i8
@@ -207,22 +261,35 @@ true:
false:
ret void
}
-
-; KNL-LABEL: test8
-; KNL: vpxord %zmm2, %zmm2, %zmm2
-; KNL: jg
-; KNL: vpcmpltud %zmm2, %zmm1, %k1
-; KNL: jmp
-; KNL: vpcmpgtd %zmm2, %zmm0, %k1
-
-; SKX-LABEL: test8
-; SKX: jg
-; SKX: vpcmpltud {{.*}}, %k0
-; SKX: vpmovm2b
-; SKX: vpcmpgtd {{.*}}, %k0
-; SKX: vpmovm2b
-
define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
+; KNL-LABEL: test8:
+; KNL: ## BB#0:
+; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; KNL-NEXT: cmpl %esi, %edi
+; KNL-NEXT: jg LBB14_1
+; KNL-NEXT: ## BB#2:
+; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1
+; KNL-NEXT: jmp LBB14_3
+; KNL-NEXT: LBB14_1:
+; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
+; KNL-NEXT: LBB14_3:
+; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test8:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; SKX-NEXT: cmpl %esi, %edi
+; SKX-NEXT: jg LBB14_1
+; SKX-NEXT: ## BB#2:
+; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0
+; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: retq
+; SKX-NEXT: LBB14_1:
+; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0
+; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: retq
%cond = icmp sgt i32 %a1, %b1
%cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
%cmp2 = icmp ult <16 x i32> %b, zeroinitializer
@@ -230,91 +297,121 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
%res = sext <16 x i1> %mix to <16 x i8>
ret <16 x i8> %res
}
-
-; KNL-LABEL: test9
-; KNL: jg
-; KNL: vpmovsxbd %xmm1, %zmm0
-; KNL: jmp
-; KNL: vpmovsxbd %xmm0, %zmm0
-
-; SKX-LABEL: test9
-; SKX: vpmovb2m %xmm1, %k0
-; SKX: vpmovm2b %k0, %xmm0
-; SKX: retq
-; SKX: vpmovb2m %xmm0, %k0
-; SKX: vpmovm2b %k0, %xmm0
-
define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
+; KNL-LABEL: test9:
+; KNL: ## BB#0:
+; KNL-NEXT: cmpl %esi, %edi
+; KNL-NEXT: jg LBB15_1
+; KNL-NEXT: ## BB#2:
+; KNL-NEXT: vpmovsxbd %xmm1, %zmm0
+; KNL-NEXT: jmp LBB15_3
+; KNL-NEXT: LBB15_1:
+; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT: LBB15_3:
+; KNL-NEXT: vpslld $31, %zmm0, %zmm0
+; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test9:
+; SKX: ## BB#0:
+; SKX-NEXT: cmpl %esi, %edi
+; SKX-NEXT: jg LBB15_1
+; SKX-NEXT: ## BB#2:
+; SKX-NEXT: vpsllw $7, %xmm1, %xmm0
+; SKX-NEXT: jmp LBB15_3
+; SKX-NEXT: LBB15_1:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: LBB15_3:
+; SKX-NEXT: vpmovb2m %xmm0, %k0
+; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: retq
%mask = icmp sgt i32 %a1, %b1
%c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
ret <16 x i1>%c
-}
-
-; KNL-LABEL: test10
-; KNL: jg
-; KNL: vpmovsxwq %xmm1, %zmm0
-; KNL: jmp
-; KNL: vpmovsxwq %xmm0, %zmm0
-
-; SKX-LABEL: test10
-; SKX: jg
-; SKX: vpmovw2m %xmm1, %k0
-; SKX: vpmovm2w %k0, %xmm0
-; SKX: retq
-; SKX: vpmovw2m %xmm0, %k0
-; SKX: vpmovm2w %k0, %xmm0
-define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
+}define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
%mask = icmp sgt i32 %a1, %b1
%c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
ret <8 x i1>%c
}
-; SKX-LABEL: test11
-; SKX: jg
-; SKX: vpmovd2m %xmm1, %k0
-; SKX: vpmovm2d %k0, %xmm0
-; SKX: retq
-; SKX: vpmovd2m %xmm0, %k0
-; SKX: vpmovm2d %k0, %xmm0
define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
+; KNL-LABEL: test11:
+; KNL: ## BB#0:
+; KNL-NEXT: cmpl %esi, %edi
+; KNL-NEXT: jg LBB17_2
+; KNL-NEXT: ## BB#1:
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: LBB17_2:
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test11:
+; SKX: ## BB#0:
+; SKX-NEXT: cmpl %esi, %edi
+; SKX-NEXT: jg LBB17_1
+; SKX-NEXT: ## BB#2:
+; SKX-NEXT: vpslld $31, %xmm1, %xmm0
+; SKX-NEXT: jmp LBB17_3
+; SKX-NEXT: LBB17_1:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: LBB17_3:
+; SKX-NEXT: vpmovd2m %xmm0, %k0
+; SKX-NEXT: vpmovm2d %k0, %xmm0
+; SKX-NEXT: retq
%mask = icmp sgt i32 %a1, %b1
%c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
ret <4 x i1>%c
}
-; KNL-LABEL: test12
-; KNL: movl %edi, %eax
define i32 @test12(i32 %x, i32 %y) {
+; CHECK-LABEL: test12:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
%a = bitcast i16 21845 to <16 x i1>
%b = extractelement <16 x i1> %a, i32 0
%c = select i1 %b, i32 %x, i32 %y
ret i32 %c
}
-; KNL-LABEL: test13
-; KNL: movl %esi, %eax
define i32 @test13(i32 %x, i32 %y) {
+; CHECK-LABEL: test13:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: retq
%a = bitcast i16 21845 to <16 x i1>
%b = extractelement <16 x i1> %a, i32 3
%c = select i1 %b, i32 %x, i32 %y
ret i32 %c
-}
-
-; SKX-LABEL: test14
-; SKX: movb $11, %al
-; SKX: kmovb %eax, %k0
-; SKX: vpmovm2d %k0, %xmm0
-
-define <4 x i1> @test14() {
+}define <4 x i1> @test14() {
%a = bitcast i16 21845 to <16 x i1>
%b = extractelement <16 x i1> %a, i32 2
%c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
ret <4 x i1> %c
}
-; KNL-LABEL: test15
-; KNL: cmovgw
define <16 x i1> @test15(i32 %x, i32 %y) {
+; KNL-LABEL: test15:
+; KNL: ## BB#0:
+; KNL-NEXT: cmpl %esi, %edi
+; KNL-NEXT: movw $21845, %ax ## imm = 0x5555
+; KNL-NEXT: movw $1, %cx
+; KNL-NEXT: cmovgw %ax, %cx
+; KNL-NEXT: kmovw %ecx, %k1
+; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test15:
+; SKX: ## BB#0:
+; SKX-NEXT: cmpl %esi, %edi
+; SKX-NEXT: movw $21845, %ax ## imm = 0x5555
+; SKX-NEXT: movw $1, %cx
+; SKX-NEXT: cmovgw %ax, %cx
+; SKX-NEXT: kmovw %ecx, %k0
+; SKX-NEXT: vpmovm2b %k0, %xmm0
+; SKX-NEXT: retq
%a = bitcast i16 21845 to <16 x i1>
%b = bitcast i16 1 to <16 x i1>
%mask = icmp sgt i32 %x, %y
@@ -322,27 +419,914 @@ define <16 x i1> @test15(i32 %x, i32 %y) {
ret <16 x i1> %c
}
-; SKX-LABEL: test16
-; SKX: kxnorw %k1, %k1, %k1
-; SKX: kshiftrw $15, %k1, %k1
-; SKX: kshiftlq $5, %k1, %k1
-; SKX: korq %k1, %k0, %k0
-; SKX: vpmovm2b %k0, %zmm0
define <64 x i8> @test16(i64 %x) {
+; KNL-LABEL: test16:
+; KNL: ## BB#0:
+; KNL-NEXT: pushq %rbp
+; KNL-NEXT: Ltmp0:
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: Ltmp1:
+; KNL-NEXT: .cfi_offset %rbp, -16
+; KNL-NEXT: movq %rsp, %rbp
+; KNL-NEXT: Ltmp2:
+; KNL-NEXT: .cfi_def_cfa_register %rbp
+; KNL-NEXT: pushq %r15
+; KNL-NEXT: pushq %r14
+; KNL-NEXT: pushq %r13
+; KNL-NEXT: pushq %r12
+; KNL-NEXT: pushq %rbx
+; KNL-NEXT: andq $-32, %rsp
+; KNL-NEXT: subq $128, %rsp
+; KNL-NEXT: Ltmp3:
+; KNL-NEXT: .cfi_offset %rbx, -56
+; KNL-NEXT: Ltmp4:
+; KNL-NEXT: .cfi_offset %r12, -48
+; KNL-NEXT: Ltmp5:
+; KNL-NEXT: .cfi_offset %r13, -40
+; KNL-NEXT: Ltmp6:
+; KNL-NEXT: .cfi_offset %r14, -32
+; KNL-NEXT: Ltmp7:
+; KNL-NEXT: .cfi_offset %r15, -24
+; KNL-NEXT: movq %rdi, %rax
+; KNL-NEXT: shrq $32, %rax
+; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp)
+; KNL-NEXT: movl $271, %eax ## imm = 0x10F
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: movl %edi, %ecx
+; KNL-NEXT: andl $1, %ecx
+; KNL-NEXT: vmovd %ecx, %xmm0
+; KNL-NEXT: movl $257, %ecx ## imm = 0x101
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
+; KNL-NEXT: movl $258, %ecx ## imm = 0x102
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
+; KNL-NEXT: movl $259, %ecx ## imm = 0x103
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
+; KNL-NEXT: movl $260, %ecx ## imm = 0x104
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
+; KNL-NEXT: movl $261, %ecx ## imm = 0x105
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
+; KNL-NEXT: movl $262, %ecx ## imm = 0x106
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; KNL-NEXT: movl $263, %ecx ## imm = 0x107
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
+; KNL-NEXT: movl $264, %ecx ## imm = 0x108
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
+; KNL-NEXT: movl $265, %ecx ## imm = 0x109
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
+; KNL-NEXT: movl $266, %ecx ## imm = 0x10A
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
+; KNL-NEXT: movl $267, %ecx ## imm = 0x10B
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
+; KNL-NEXT: movl $268, %ecx ## imm = 0x10C
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
+; KNL-NEXT: movl $269, %ecx ## imm = 0x10D
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
+; KNL-NEXT: movl $270, %ecx ## imm = 0x10E
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
+; KNL-NEXT: movl $1, %eax
+; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm0
+; KNL-NEXT: movl {{[0-9]+}}(%rsp), %r15d
+; KNL-NEXT: movq %r15, %rdx
+; KNL-NEXT: shrq $17, %rdx
+; KNL-NEXT: andb $1, %dl
+; KNL-NEXT: je LBB22_2
+; KNL-NEXT: ## BB#1:
+; KNL-NEXT: movb $-1, %dl
+; KNL-NEXT: LBB22_2:
+; KNL-NEXT: movq %r15, %r11
+; KNL-NEXT: shrq $16, %r11
+; KNL-NEXT: andb $1, %r11b
+; KNL-NEXT: je LBB22_4
+; KNL-NEXT: ## BB#3:
+; KNL-NEXT: movb $-1, %r11b
+; KNL-NEXT: LBB22_4:
+; KNL-NEXT: movq %r15, %r10
+; KNL-NEXT: shrq $18, %r10
+; KNL-NEXT: andb $1, %r10b
+; KNL-NEXT: je LBB22_6
+; KNL-NEXT: ## BB#5:
+; KNL-NEXT: movb $-1, %r10b
+; KNL-NEXT: LBB22_6:
+; KNL-NEXT: movq %r15, %r9
+; KNL-NEXT: shrq $19, %r9
+; KNL-NEXT: andb $1, %r9b
+; KNL-NEXT: je LBB22_8
+; KNL-NEXT: ## BB#7:
+; KNL-NEXT: movb $-1, %r9b
+; KNL-NEXT: LBB22_8:
+; KNL-NEXT: movq %r15, %rbx
+; KNL-NEXT: shrq $20, %rbx
+; KNL-NEXT: andb $1, %bl
+; KNL-NEXT: je LBB22_10
+; KNL-NEXT: ## BB#9:
+; KNL-NEXT: movb $-1, %bl
+; KNL-NEXT: LBB22_10:
+; KNL-NEXT: movq %r15, %r12
+; KNL-NEXT: shrq $21, %r12
+; KNL-NEXT: andb $1, %r12b
+; KNL-NEXT: je LBB22_12
+; KNL-NEXT: ## BB#11:
+; KNL-NEXT: movb $-1, %r12b
+; KNL-NEXT: LBB22_12:
+; KNL-NEXT: movq %r15, %r14
+; KNL-NEXT: shrq $22, %r14
+; KNL-NEXT: andb $1, %r14b
+; KNL-NEXT: je LBB22_14
+; KNL-NEXT: ## BB#13:
+; KNL-NEXT: movb $-1, %r14b
+; KNL-NEXT: LBB22_14:
+; KNL-NEXT: movq %r15, %r8
+; KNL-NEXT: shrq $23, %r8
+; KNL-NEXT: andb $1, %r8b
+; KNL-NEXT: je LBB22_16
+; KNL-NEXT: ## BB#15:
+; KNL-NEXT: movb $-1, %r8b
+; KNL-NEXT: LBB22_16:
+; KNL-NEXT: movq %r15, %r13
+; KNL-NEXT: shrq $24, %r13
+; KNL-NEXT: andb $1, %r13b
+; KNL-NEXT: je LBB22_18
+; KNL-NEXT: ## BB#17:
+; KNL-NEXT: movb $-1, %r13b
+; KNL-NEXT: LBB22_18:
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $25, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_20
+; KNL-NEXT: ## BB#19:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_20:
+; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $26, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_22
+; KNL-NEXT: ## BB#21:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_22:
+; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: movl $272, %esi ## imm = 0x110
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $27, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_24
+; KNL-NEXT: ## BB#23:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_24:
+; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: movl $273, %eax ## imm = 0x111
+; KNL-NEXT: bextrl %esi, %edi, %esi
+; KNL-NEXT: movq %r15, %rcx
+; KNL-NEXT: shrq $28, %rcx
+; KNL-NEXT: andb $1, %cl
+; KNL-NEXT: je LBB22_26
+; KNL-NEXT: ## BB#25:
+; KNL-NEXT: movb $-1, %cl
+; KNL-NEXT: LBB22_26:
+; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vmovd %esi, %xmm2
+; KNL-NEXT: movl $274, %esi ## imm = 0x112
+; KNL-NEXT: movq %r15, %rcx
+; KNL-NEXT: shrq $29, %rcx
+; KNL-NEXT: andb $1, %cl
+; KNL-NEXT: je LBB22_28
+; KNL-NEXT: ## BB#27:
+; KNL-NEXT: movb $-1, %cl
+; KNL-NEXT: LBB22_28:
+; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; KNL-NEXT: bextrl %esi, %edi, %eax
+; KNL-NEXT: movzbl %r11b, %esi
+; KNL-NEXT: movq %r15, %rcx
+; KNL-NEXT: shrq $30, %rcx
+; KNL-NEXT: andb $1, %cl
+; KNL-NEXT: je LBB22_30
+; KNL-NEXT: ## BB#29:
+; KNL-NEXT: movb $-1, %cl
+; KNL-NEXT: LBB22_30:
+; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; KNL-NEXT: movl $275, %eax ## imm = 0x113
+; KNL-NEXT: bextrl %eax, %edi, %r11d
+; KNL-NEXT: movzbl %dl, %edx
+; KNL-NEXT: vmovd %esi, %xmm3
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $31, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_32
+; KNL-NEXT: ## BB#31:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_32:
+; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
+; KNL-NEXT: movl $276, %eax ## imm = 0x114
+; KNL-NEXT: bextrl %eax, %edi, %esi
+; KNL-NEXT: movl $277, %r11d ## imm = 0x115
+; KNL-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3
+; KNL-NEXT: movzbl %r10b, %r10d
+; KNL-NEXT: movb %r15b, %al
+; KNL-NEXT: shrb %al
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_34
+; KNL-NEXT: ## BB#33:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_34:
+; KNL-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2
+; KNL-NEXT: bextrl %r11d, %edi, %edx
+; KNL-NEXT: movl $278, %r11d ## imm = 0x116
+; KNL-NEXT: vpinsrb $2, %r10d, %xmm3, %xmm3
+; KNL-NEXT: movzbl %r9b, %esi
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: movq %r15, %rcx
+; KNL-NEXT: shlq $63, %rcx
+; KNL-NEXT: sarq $63, %rcx
+; KNL-NEXT: vmovd %ecx, %xmm4
+; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
+; KNL-NEXT: movb %r15b, %al
+; KNL-NEXT: shrb $2, %al
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_36
+; KNL-NEXT: ## BB#35:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_36:
+; KNL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %r11d, %edi, %edx
+; KNL-NEXT: movl $279, %r9d ## imm = 0x117
+; KNL-NEXT: vpinsrb $3, %esi, %xmm3, %xmm3
+; KNL-NEXT: movzbl %bl, %ebx
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
+; KNL-NEXT: movb %r15b, %al
+; KNL-NEXT: shrb $3, %al
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_38
+; KNL-NEXT: ## BB#37:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_38:
+; KNL-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %r9d, %edi, %edx
+; KNL-NEXT: movl $280, %esi ## imm = 0x118
+; KNL-NEXT: vpinsrb $4, %ebx, %xmm3, %xmm3
+; KNL-NEXT: movzbl %r12b, %ebx
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
+; KNL-NEXT: movb %r15b, %al
+; KNL-NEXT: shrb $4, %al
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_40
+; KNL-NEXT: ## BB#39:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_40:
+; KNL-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %esi, %edi, %ecx
+; KNL-NEXT: movl $281, %edx ## imm = 0x119
+; KNL-NEXT: vpinsrb $5, %ebx, %xmm3, %xmm3
+; KNL-NEXT: movzbl %r14b, %esi
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
+; KNL-NEXT: movb %r15b, %al
+; KNL-NEXT: shrb $5, %al
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_42
+; KNL-NEXT: ## BB#41:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_42:
+; KNL-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %edx, %edi, %ecx
+; KNL-NEXT: movl $282, %edx ## imm = 0x11A
+; KNL-NEXT: vpinsrb $6, %esi, %xmm3, %xmm3
+; KNL-NEXT: movzbl %r8b, %esi
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
+; KNL-NEXT: movb %r15b, %bl
+; KNL-NEXT: shrb $6, %bl
+; KNL-NEXT: andb $1, %bl
+; KNL-NEXT: je LBB22_44
+; KNL-NEXT: ## BB#43:
+; KNL-NEXT: movb $-1, %bl
+; KNL-NEXT: LBB22_44:
+; KNL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %edx, %edi, %eax
+; KNL-NEXT: movl $283, %ecx ## imm = 0x11B
+; KNL-NEXT: vpinsrb $7, %esi, %xmm3, %xmm3
+; KNL-NEXT: movzbl %r13b, %esi
+; KNL-NEXT: movzbl %bl, %edx
+; KNL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4
+; KNL-NEXT: movb %r15b, %bl
+; KNL-NEXT: shrb $7, %bl
+; KNL-NEXT: je LBB22_46
+; KNL-NEXT: ## BB#45:
+; KNL-NEXT: movb $-1, %bl
+; KNL-NEXT: LBB22_46:
+; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: movl $284, %edx ## imm = 0x11C
+; KNL-NEXT: vpinsrb $8, %esi, %xmm3, %xmm3
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload
+; KNL-NEXT: movzbl %al, %esi
+; KNL-NEXT: movzbl %bl, %eax
+; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $8, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_48
+; KNL-NEXT: ## BB#47:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_48:
+; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %edx, %edi, %ecx
+; KNL-NEXT: movl $285, %edx ## imm = 0x11D
+; KNL-NEXT: vpinsrb $9, %esi, %xmm3, %xmm3
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
+; KNL-NEXT: movzbl %sil, %esi
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $9, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_50
+; KNL-NEXT: ## BB#49:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_50:
+; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %edx, %edi, %ecx
+; KNL-NEXT: movl $286, %edx ## imm = 0x11E
+; KNL-NEXT: vpinsrb $10, %esi, %xmm3, %xmm3
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
+; KNL-NEXT: movzbl %sil, %esi
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $10, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_52
+; KNL-NEXT: ## BB#51:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_52:
+; KNL-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %edx, %edi, %edx
+; KNL-NEXT: vpinsrb $11, %esi, %xmm3, %xmm3
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT: movzbl %cl, %ecx
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $11, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_54
+; KNL-NEXT: ## BB#53:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_54:
+; KNL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
+; KNL-NEXT: shrl $31, %edi
+; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT: movzbl %cl, %ecx
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $12, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_56
+; KNL-NEXT: ## BB#55:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_56:
+; KNL-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT: movzbl %cl, %ecx
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $13, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_58
+; KNL-NEXT: ## BB#57:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_58:
+; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT: vpinsrb $14, %ecx, %xmm3, %xmm2
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT: movzbl %cl, %ecx
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $14, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB22_60
+; KNL-NEXT: ## BB#59:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB22_60:
+; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; KNL-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm2
+; KNL-NEXT: shrq $15, %r15
+; KNL-NEXT: andb $1, %r15b
+; KNL-NEXT: je LBB22_62
+; KNL-NEXT: ## BB#61:
+; KNL-NEXT: movb $-1, %r15b
+; KNL-NEXT: LBB22_62:
+; KNL-NEXT: movzbl %r15b, %eax
+; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
+; KNL-NEXT: leaq -40(%rbp), %rsp
+; KNL-NEXT: popq %rbx
+; KNL-NEXT: popq %r12
+; KNL-NEXT: popq %r13
+; KNL-NEXT: popq %r14
+; KNL-NEXT: popq %r15
+; KNL-NEXT: popq %rbp
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test16:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovq %rdi, %k0
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: kshiftrw $15, %k1, %k1
+; SKX-NEXT: kshiftlq $5, %k1, %k1
+; SKX-NEXT: korq %k1, %k0, %k0
+; SKX-NEXT: vpmovm2b %k0, %zmm0
+; SKX-NEXT: retq
%a = bitcast i64 %x to <64 x i1>
%b = insertelement <64 x i1>%a, i1 true, i32 5
%c = sext <64 x i1>%b to <64 x i8>
ret <64 x i8>%c
}
-; SKX-LABEL: test17
-; SKX: setg %al
-; SKX: andl $1, %eax
-; SKX: kmovw %eax, %k1
-; SKX: kshiftlq $5, %k1, %k1
-; SKX: korq %k1, %k0, %k0
-; SKX: vpmovm2b %k0, %zmm0
define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
+; KNL-LABEL: test17:
+; KNL: ## BB#0:
+; KNL-NEXT: pushq %rbp
+; KNL-NEXT: Ltmp8:
+; KNL-NEXT: .cfi_def_cfa_offset 16
+; KNL-NEXT: Ltmp9:
+; KNL-NEXT: .cfi_offset %rbp, -16
+; KNL-NEXT: movq %rsp, %rbp
+; KNL-NEXT: Ltmp10:
+; KNL-NEXT: .cfi_def_cfa_register %rbp
+; KNL-NEXT: pushq %r15
+; KNL-NEXT: pushq %r14
+; KNL-NEXT: pushq %r13
+; KNL-NEXT: pushq %r12
+; KNL-NEXT: pushq %rbx
+; KNL-NEXT: andq $-32, %rsp
+; KNL-NEXT: subq $128, %rsp
+; KNL-NEXT: Ltmp11:
+; KNL-NEXT: .cfi_offset %rbx, -56
+; KNL-NEXT: Ltmp12:
+; KNL-NEXT: .cfi_offset %r12, -48
+; KNL-NEXT: Ltmp13:
+; KNL-NEXT: .cfi_offset %r13, -40
+; KNL-NEXT: Ltmp14:
+; KNL-NEXT: .cfi_offset %r14, -32
+; KNL-NEXT: Ltmp15:
+; KNL-NEXT: .cfi_offset %r15, -24
+; KNL-NEXT: movq %rdi, %rax
+; KNL-NEXT: shrq $32, %rax
+; KNL-NEXT: movl %eax, {{[0-9]+}}(%rsp)
+; KNL-NEXT: movl %edi, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: vmovd %eax, %xmm0
+; KNL-NEXT: movl $257, %eax ## imm = 0x101
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $258, %eax ## imm = 0x102
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $259, %eax ## imm = 0x103
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $260, %eax ## imm = 0x104
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $261, %eax ## imm = 0x105
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $262, %eax ## imm = 0x106
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $263, %eax ## imm = 0x107
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $264, %eax ## imm = 0x108
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $265, %eax ## imm = 0x109
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $266, %eax ## imm = 0x10A
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $267, %eax ## imm = 0x10B
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $268, %eax ## imm = 0x10C
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $269, %eax ## imm = 0x10D
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $270, %eax ## imm = 0x10E
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; KNL-NEXT: movl $271, %eax ## imm = 0x10F
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
+; KNL-NEXT: cmpl %edx, %esi
+; KNL-NEXT: setg %al
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm0
+; KNL-NEXT: movl {{[0-9]+}}(%rsp), %r15d
+; KNL-NEXT: movq %r15, %rdx
+; KNL-NEXT: shrq $17, %rdx
+; KNL-NEXT: andb $1, %dl
+; KNL-NEXT: je LBB23_2
+; KNL-NEXT: ## BB#1:
+; KNL-NEXT: movb $-1, %dl
+; KNL-NEXT: LBB23_2:
+; KNL-NEXT: movq %r15, %r11
+; KNL-NEXT: shrq $16, %r11
+; KNL-NEXT: andb $1, %r11b
+; KNL-NEXT: je LBB23_4
+; KNL-NEXT: ## BB#3:
+; KNL-NEXT: movb $-1, %r11b
+; KNL-NEXT: LBB23_4:
+; KNL-NEXT: movq %r15, %r10
+; KNL-NEXT: shrq $18, %r10
+; KNL-NEXT: andb $1, %r10b
+; KNL-NEXT: je LBB23_6
+; KNL-NEXT: ## BB#5:
+; KNL-NEXT: movb $-1, %r10b
+; KNL-NEXT: LBB23_6:
+; KNL-NEXT: movq %r15, %r9
+; KNL-NEXT: shrq $19, %r9
+; KNL-NEXT: andb $1, %r9b
+; KNL-NEXT: je LBB23_8
+; KNL-NEXT: ## BB#7:
+; KNL-NEXT: movb $-1, %r9b
+; KNL-NEXT: LBB23_8:
+; KNL-NEXT: movq %r15, %rbx
+; KNL-NEXT: shrq $20, %rbx
+; KNL-NEXT: andb $1, %bl
+; KNL-NEXT: je LBB23_10
+; KNL-NEXT: ## BB#9:
+; KNL-NEXT: movb $-1, %bl
+; KNL-NEXT: LBB23_10:
+; KNL-NEXT: movq %r15, %r12
+; KNL-NEXT: shrq $21, %r12
+; KNL-NEXT: andb $1, %r12b
+; KNL-NEXT: je LBB23_12
+; KNL-NEXT: ## BB#11:
+; KNL-NEXT: movb $-1, %r12b
+; KNL-NEXT: LBB23_12:
+; KNL-NEXT: movq %r15, %r14
+; KNL-NEXT: shrq $22, %r14
+; KNL-NEXT: andb $1, %r14b
+; KNL-NEXT: je LBB23_14
+; KNL-NEXT: ## BB#13:
+; KNL-NEXT: movb $-1, %r14b
+; KNL-NEXT: LBB23_14:
+; KNL-NEXT: movq %r15, %r8
+; KNL-NEXT: shrq $23, %r8
+; KNL-NEXT: andb $1, %r8b
+; KNL-NEXT: je LBB23_16
+; KNL-NEXT: ## BB#15:
+; KNL-NEXT: movb $-1, %r8b
+; KNL-NEXT: LBB23_16:
+; KNL-NEXT: movq %r15, %r13
+; KNL-NEXT: shrq $24, %r13
+; KNL-NEXT: andb $1, %r13b
+; KNL-NEXT: je LBB23_18
+; KNL-NEXT: ## BB#17:
+; KNL-NEXT: movb $-1, %r13b
+; KNL-NEXT: LBB23_18:
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $25, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_20
+; KNL-NEXT: ## BB#19:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_20:
+; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $26, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_22
+; KNL-NEXT: ## BB#21:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_22:
+; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: movl $272, %esi ## imm = 0x110
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $27, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_24
+; KNL-NEXT: ## BB#23:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_24:
+; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: movl $273, %eax ## imm = 0x111
+; KNL-NEXT: bextrl %esi, %edi, %esi
+; KNL-NEXT: movq %r15, %rcx
+; KNL-NEXT: shrq $28, %rcx
+; KNL-NEXT: andb $1, %cl
+; KNL-NEXT: je LBB23_26
+; KNL-NEXT: ## BB#25:
+; KNL-NEXT: movb $-1, %cl
+; KNL-NEXT: LBB23_26:
+; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: bextrl %eax, %edi, %eax
+; KNL-NEXT: vmovd %esi, %xmm2
+; KNL-NEXT: movl $274, %esi ## imm = 0x112
+; KNL-NEXT: movq %r15, %rcx
+; KNL-NEXT: shrq $29, %rcx
+; KNL-NEXT: andb $1, %cl
+; KNL-NEXT: je LBB23_28
+; KNL-NEXT: ## BB#27:
+; KNL-NEXT: movb $-1, %cl
+; KNL-NEXT: LBB23_28:
+; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; KNL-NEXT: bextrl %esi, %edi, %eax
+; KNL-NEXT: movzbl %r11b, %esi
+; KNL-NEXT: movq %r15, %rcx
+; KNL-NEXT: shrq $30, %rcx
+; KNL-NEXT: andb $1, %cl
+; KNL-NEXT: je LBB23_30
+; KNL-NEXT: ## BB#29:
+; KNL-NEXT: movb $-1, %cl
+; KNL-NEXT: LBB23_30:
+; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; KNL-NEXT: movl $275, %eax ## imm = 0x113
+; KNL-NEXT: bextrl %eax, %edi, %r11d
+; KNL-NEXT: movzbl %dl, %edx
+; KNL-NEXT: vmovd %esi, %xmm3
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $31, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_32
+; KNL-NEXT: ## BB#31:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_32:
+; KNL-NEXT: movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
+; KNL-NEXT: movl $276, %eax ## imm = 0x114
+; KNL-NEXT: bextrl %eax, %edi, %esi
+; KNL-NEXT: movl $277, %r11d ## imm = 0x115
+; KNL-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3
+; KNL-NEXT: movzbl %r10b, %r10d
+; KNL-NEXT: movb %r15b, %al
+; KNL-NEXT: shrb %al
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_34
+; KNL-NEXT: ## BB#33:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_34:
+; KNL-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2
+; KNL-NEXT: bextrl %r11d, %edi, %edx
+; KNL-NEXT: movl $278, %r11d ## imm = 0x116
+; KNL-NEXT: vpinsrb $2, %r10d, %xmm3, %xmm3
+; KNL-NEXT: movzbl %r9b, %esi
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: movq %r15, %rcx
+; KNL-NEXT: shlq $63, %rcx
+; KNL-NEXT: sarq $63, %rcx
+; KNL-NEXT: vmovd %ecx, %xmm4
+; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
+; KNL-NEXT: movb %r15b, %al
+; KNL-NEXT: shrb $2, %al
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_36
+; KNL-NEXT: ## BB#35:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_36:
+; KNL-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %r11d, %edi, %edx
+; KNL-NEXT: movl $279, %r9d ## imm = 0x117
+; KNL-NEXT: vpinsrb $3, %esi, %xmm3, %xmm3
+; KNL-NEXT: movzbl %bl, %ebx
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
+; KNL-NEXT: movb %r15b, %al
+; KNL-NEXT: shrb $3, %al
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_38
+; KNL-NEXT: ## BB#37:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_38:
+; KNL-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %r9d, %edi, %edx
+; KNL-NEXT: movl $280, %esi ## imm = 0x118
+; KNL-NEXT: vpinsrb $4, %ebx, %xmm3, %xmm3
+; KNL-NEXT: movzbl %r12b, %ebx
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
+; KNL-NEXT: movb %r15b, %al
+; KNL-NEXT: shrb $4, %al
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_40
+; KNL-NEXT: ## BB#39:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_40:
+; KNL-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %esi, %edi, %ecx
+; KNL-NEXT: movl $281, %edx ## imm = 0x119
+; KNL-NEXT: vpinsrb $5, %ebx, %xmm3, %xmm3
+; KNL-NEXT: movzbl %r14b, %esi
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
+; KNL-NEXT: movb %r15b, %al
+; KNL-NEXT: shrb $5, %al
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_42
+; KNL-NEXT: ## BB#41:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_42:
+; KNL-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %edx, %edi, %ecx
+; KNL-NEXT: movl $282, %edx ## imm = 0x11A
+; KNL-NEXT: vpinsrb $6, %esi, %xmm3, %xmm3
+; KNL-NEXT: movzbl %r8b, %esi
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
+; KNL-NEXT: movb %r15b, %bl
+; KNL-NEXT: shrb $6, %bl
+; KNL-NEXT: andb $1, %bl
+; KNL-NEXT: je LBB23_44
+; KNL-NEXT: ## BB#43:
+; KNL-NEXT: movb $-1, %bl
+; KNL-NEXT: LBB23_44:
+; KNL-NEXT: vpinsrb $9, %ecx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %edx, %edi, %eax
+; KNL-NEXT: movl $283, %ecx ## imm = 0x11B
+; KNL-NEXT: vpinsrb $7, %esi, %xmm3, %xmm3
+; KNL-NEXT: movzbl %r13b, %esi
+; KNL-NEXT: movzbl %bl, %edx
+; KNL-NEXT: vpinsrb $6, %edx, %xmm4, %xmm4
+; KNL-NEXT: movb %r15b, %bl
+; KNL-NEXT: shrb $7, %bl
+; KNL-NEXT: je LBB23_46
+; KNL-NEXT: ## BB#45:
+; KNL-NEXT: movb $-1, %bl
+; KNL-NEXT: LBB23_46:
+; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; KNL-NEXT: bextrl %ecx, %edi, %ecx
+; KNL-NEXT: movl $284, %edx ## imm = 0x11C
+; KNL-NEXT: vpinsrb $8, %esi, %xmm3, %xmm3
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload
+; KNL-NEXT: movzbl %al, %esi
+; KNL-NEXT: movzbl %bl, %eax
+; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $8, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_48
+; KNL-NEXT: ## BB#47:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_48:
+; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %edx, %edi, %ecx
+; KNL-NEXT: movl $285, %edx ## imm = 0x11D
+; KNL-NEXT: vpinsrb $9, %esi, %xmm3, %xmm3
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
+; KNL-NEXT: movzbl %sil, %esi
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $9, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_50
+; KNL-NEXT: ## BB#49:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_50:
+; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %edx, %edi, %ecx
+; KNL-NEXT: movl $286, %edx ## imm = 0x11E
+; KNL-NEXT: vpinsrb $10, %esi, %xmm3, %xmm3
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
+; KNL-NEXT: movzbl %sil, %esi
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $10, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_52
+; KNL-NEXT: ## BB#51:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_52:
+; KNL-NEXT: vpinsrb $13, %ecx, %xmm2, %xmm2
+; KNL-NEXT: bextrl %edx, %edi, %edx
+; KNL-NEXT: vpinsrb $11, %esi, %xmm3, %xmm3
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT: movzbl %cl, %ecx
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $11, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_54
+; KNL-NEXT: ## BB#53:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_54:
+; KNL-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
+; KNL-NEXT: shrl $31, %edi
+; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT: movzbl %cl, %ecx
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $12, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_56
+; KNL-NEXT: ## BB#55:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_56:
+; KNL-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm3
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT: movzbl %cl, %ecx
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $13, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_58
+; KNL-NEXT: ## BB#57:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_58:
+; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT: vpinsrb $14, %ecx, %xmm3, %xmm2
+; KNL-NEXT: movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT: movzbl %cl, %ecx
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm3
+; KNL-NEXT: movq %r15, %rax
+; KNL-NEXT: shrq $14, %rax
+; KNL-NEXT: andb $1, %al
+; KNL-NEXT: je LBB23_60
+; KNL-NEXT: ## BB#59:
+; KNL-NEXT: movb $-1, %al
+; KNL-NEXT: LBB23_60:
+; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; KNL-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm1
+; KNL-NEXT: movzbl %al, %eax
+; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm2
+; KNL-NEXT: shrq $15, %r15
+; KNL-NEXT: andb $1, %r15b
+; KNL-NEXT: je LBB23_62
+; KNL-NEXT: ## BB#61:
+; KNL-NEXT: movb $-1, %r15b
+; KNL-NEXT: LBB23_62:
+; KNL-NEXT: movzbl %r15b, %eax
+; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
+; KNL-NEXT: leaq -40(%rbp), %rsp
+; KNL-NEXT: popq %rbx
+; KNL-NEXT: popq %r12
+; KNL-NEXT: popq %r13
+; KNL-NEXT: popq %r14
+; KNL-NEXT: popq %r15
+; KNL-NEXT: popq %rbp
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test17:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovq %rdi, %k0
+; SKX-NEXT: cmpl %edx, %esi
+; SKX-NEXT: setg %al
+; SKX-NEXT: andl $1, %eax
+; SKX-NEXT: kmovw %eax, %k1
+; SKX-NEXT: kshiftlq $5, %k1, %k1
+; SKX-NEXT: korq %k1, %k0, %k0
+; SKX-NEXT: vpmovm2b %k0, %zmm0
+; SKX-NEXT: retq
%a = bitcast i64 %x to <64 x i1>
%b = icmp sgt i32 %y, %z
%c = insertelement <64 x i1>%a, i1 %b, i32 5
@@ -350,8 +1334,38 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
ret <64 x i8>%d
}
-; KNL-LABEL: test18
define <8 x i1> @test18(i8 %a, i16 %y) {
+; KNL-LABEL: test18:
+; KNL: ## BB#0:
+; KNL-NEXT: movzbl %dil, %eax
+; KNL-NEXT: kmovw %eax, %k0
+; KNL-NEXT: kmovw %esi, %k1
+; KNL-NEXT: kshiftlw $7, %k1, %k2
+; KNL-NEXT: kshiftrw $15, %k2, %k2
+; KNL-NEXT: kshiftlw $6, %k1, %k1
+; KNL-NEXT: kshiftrw $15, %k1, %k1
+; KNL-NEXT: kshiftlw $6, %k1, %k1
+; KNL-NEXT: korw %k1, %k0, %k0
+; KNL-NEXT: kshiftlw $7, %k2, %k1
+; KNL-NEXT: korw %k1, %k0, %k1
+; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovqw %zmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test18:
+; SKX: ## BB#0:
+; SKX-NEXT: kmovb %edi, %k0
+; SKX-NEXT: kmovw %esi, %k1
+; SKX-NEXT: kshiftlw $6, %k1, %k2
+; SKX-NEXT: kshiftrw $15, %k2, %k2
+; SKX-NEXT: kshiftlw $7, %k1, %k1
+; SKX-NEXT: kshiftrw $15, %k1, %k1
+; SKX-NEXT: kshiftlb $7, %k1, %k1
+; SKX-NEXT: kshiftlb $6, %k2, %k2
+; SKX-NEXT: korb %k2, %k0, %k0
+; SKX-NEXT: korb %k1, %k0, %k0
+; SKX-NEXT: vpmovm2w %k0, %xmm0
+; SKX-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
%b1 = bitcast i16 %y to <16 x i1>
%el1 = extractelement <16 x i1>%b1, i32 8
@@ -360,50 +1374,76 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
%d = insertelement <8 x i1>%c, i1 %el2, i32 6
ret <8 x i1>%d
}
-
-; KNL-LABEL: test19
-; KNL: movzbl %dil, %eax
-; KNL: kmovw %eax, %k0
-; KNL: kshiftlw $13, %k0, %k0
-; KNL: kshiftrw $15, %k0, %k0
-; KNL: kmovw %k0, %eax
-; KNL: andl $1, %eax
-; KNL: testb %al, %al
-
-define <8 x i1> @test19(i8 %a) {
- %b = bitcast i8 %a to <8 x i1>
- %c = shufflevector < 8 x i1>%b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef>
- ret <8 x i1> %c
+define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
+; KNL-LABEL: test21:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
+; KNL-NEXT: vpsllw $15, %ymm3, %ymm3
+; KNL-NEXT: vpsraw $15, %ymm3, %ymm3
+; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0
+; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
+; KNL-NEXT: vpsllw $15, %ymm2, %ymm2
+; KNL-NEXT: vpsraw $15, %ymm2, %ymm2
+; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test21:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %ymm1, %ymm1
+; SKX-NEXT: vpmovb2m %ymm1, %k1
+; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; SKX-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: retq
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
}
-; KNL-LABEL: test20
-; KNL: movzbl %dil, %eax
-; KNL: kmovw %eax, %k0
-; KNL: kshiftlw $13, %k0, %k1
-; KNL: kshiftrw $15, %k1, %k1
-; KNL: kshiftlw $12, %k0, %k0
-; KNL: kshiftrw $15, %k0, %k0
-; KNL: kshiftlw $4, %k0, %k0
-; KNL: kshiftlw $1, %k1, %k2
-; KNL: korw %k0, %k2, %k0
-; KNL: kshiftlw $6, %k1, %k1
-; KNL: korw %k1, %k0, %k1
-define <8 x i1> @test20(i8 %a, i16 %y) {
- %b = bitcast i8 %a to <8 x i1>
- %c = shufflevector < 8 x i1>%b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 3, i32 undef, i32 2, i32 undef>
- ret <8 x i1> %c
+define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
+; KNL-LABEL: test22:
+; KNL: ## BB#0:
+; KNL-NEXT: vpextrd $3, %xmm0, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vpextrd $2, %xmm0, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vpextrd $1, %xmm0, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vmovd %xmm0, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test22:
+; SKX: ## BB#0:
+; SKX-NEXT: vpslld $31, %xmm0, %xmm0
+; SKX-NEXT: vpmovd2m %xmm0, %k0
+; SKX-NEXT: kmovb %k0, (%rdi)
+; SKX-NEXT: retq
+ store <4 x i1> %a, <4 x i1>* %addr
+ ret void
}
-; KNL-LABEL: test21
-; KNL: vpand %ymm
-; KNL: vextracti128 $1, %ymm2
-; KNL: vpand %ymm
-
-; SKX-LABEL: test21
-; SKX: vpmovb2m
-; SKX: vmovdqu16 {{.*}}%k1
-
-define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
- %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
- ret <32 x i16> %ret
+define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
+; KNL-LABEL: test23:
+; KNL: ## BB#0:
+; KNL-NEXT: vpextrq $1, %xmm0, %rax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: vmovq %xmm0, %rax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test23:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT: vpmovq2m %xmm0, %k0
+; SKX-NEXT: kmovb %k0, (%rdi)
+; SKX-NEXT: retq
+ store <2 x i1> %a, <2 x i1>* %addr
+ ret void
}
diff --git a/test/CodeGen/X86/avx512-skx-insert-subvec.ll b/test/CodeGen/X86/avx512-skx-insert-subvec.ll
new file mode 100644
index 000000000000..c54010cd91b9
--- /dev/null
+++ b/test/CodeGen/X86/avx512-skx-insert-subvec.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -mattr=+avx512dq -mattr=+avx512vl| FileCheck %s
+
+define <8 x i1> @test(<2 x i1> %a) {
+; CHECK-LABEL: test:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
+; CHECK-NEXT: vpmovq2m %xmm0, %k0
+; CHECK-NEXT: kshiftlb $2, %k0, %k0
+; CHECK-NEXT: vpmovm2w %k0, %xmm0
+; CHECK-NEXT: retq
+ %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x i1> %res
+}
+
+define <8 x i1> @test1(<2 x i1> %a) {
+; CHECK-LABEL: test1:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
+; CHECK-NEXT: vpmovq2m %xmm0, %k0
+; CHECK-NEXT: kshiftlb $4, %k0, %k0
+; CHECK-NEXT: vpmovm2w %k0, %xmm0
+; CHECK-NEXT: retq
+ %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
+ ret <8 x i1> %res
+}
+
+define <8 x i1> @test2(<2 x i1> %a) {
+; CHECK-LABEL: test2:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
+; CHECK-NEXT: vpmovq2m %xmm0, %k0
+; CHECK-NEXT: vpmovm2q %k0, %zmm0
+; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,0,1],zmm0[0,1,0,1]
+; CHECK-NEXT: vpsllq $63, %zmm0, %zmm0
+; CHECK-NEXT: vpmovq2m %zmm0, %k0
+; CHECK-NEXT: vpmovm2w %k0, %xmm0
+; CHECK-NEXT: retq
+ %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
+ ret <8 x i1> %res
+}
+
+define <8 x i1> @test3(<4 x i1> %a) {
+; CHECK-LABEL: test3:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
+; CHECK-NEXT: vpmovd2m %xmm0, %k0
+; CHECK-NEXT: kshiftlb $4, %k0, %k0
+; CHECK-NEXT: kshiftrb $4, %k0, %k0
+; CHECK-NEXT: vpmovm2w %k0, %xmm0
+; CHECK-NEXT: retq
+
+ %res = shufflevector <4 x i1> %a, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i1> %res
+}
+
+define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) {
+; CHECK-LABEL: test4:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
+; CHECK-NEXT: vpmovd2m %xmm0, %k0
+; CHECK-NEXT: kshiftlb $4, %k0, %k0
+; CHECK-NEXT: kshiftrb $4, %k0, %k1
+; CHECK-NEXT: korb %k0, %k1, %k0
+; CHECK-NEXT: vpmovm2w %k0, %xmm0
+; CHECK-NEXT: retq
+
+ %res = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i1> %res
+}
+
+define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) {
+; CHECK-LABEL: test5:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
+; CHECK-NEXT: vpmovq2m %xmm0, %k0
+; CHECK-NEXT: kshiftlw $2, %k0, %k0
+; CHECK-NEXT: kshiftrw $2, %k0, %k1
+; CHECK-NEXT: korw %k0, %k1, %k0
+; CHECK-NEXT: vpmovm2d %k0, %xmm0
+; CHECK-NEXT: retq
+
+ %res = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i1> %res
+}
+
+define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) {
+; CHECK-LABEL: test6:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
+; CHECK-NEXT: vpmovq2m %xmm0, %k0
+; CHECK-NEXT: kshiftlw $2, %k0, %k0
+; CHECK-NEXT: kshiftrw $2, %k0, %k1
+; CHECK-NEXT: korw %k0, %k1, %k0
+; CHECK-NEXT: kunpckbw %k0, %k0, %k0
+; CHECK-NEXT: vpmovm2b %k0, %xmm0
+; CHECK-NEXT: retq
+
+ %res = shufflevector <2 x i1> %a, <2 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i1> %res
+}
+
+define <32 x i1> @test7(<4 x i1> %a, <4 x i1>%b) {
+; CHECK-LABEL: test7:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
+; CHECK-NEXT: vpmovd2m %xmm0, %k0
+; CHECK-NEXT: kshiftlb $4, %k0, %k0
+; CHECK-NEXT: kshiftrb $4, %k0, %k1
+; CHECK-NEXT: korb %k0, %k1, %k0
+; CHECK-NEXT: kunpckbw %k0, %k0, %k0
+; CHECK-NEXT: kunpckwd %k0, %k0, %k0
+; CHECK-NEXT: vpmovm2b %k0, %ymm0
+; CHECK-NEXT: retq
+
+ %res = shufflevector <4 x i1> %a, <4 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <32 x i1> %res
+}
+
+define <64 x i1> @test8(<8 x i1> %a, <8 x i1>%b) {
+; CHECK-LABEL: test8:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1
+; CHECK-NEXT: vpmovw2m %xmm1, %k0
+; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT: vpmovw2m %xmm0, %k1
+; CHECK-NEXT: kunpckdq %k1, %k0, %k0
+; CHECK-NEXT: vpmovm2b %k0, %zmm0
+; CHECK-NEXT: retq
+
+ %res = shufflevector <8 x i1> %a, <8 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <64 x i1> %res
+}
+
diff --git a/test/CodeGen/X86/avx512-trunc-ext.ll b/test/CodeGen/X86/avx512-trunc-ext.ll
deleted file mode 100644
index f25458972e42..000000000000
--- a/test/CodeGen/X86/avx512-trunc-ext.ll
+++ /dev/null
@@ -1,961 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
-
-
-; KNL-LABEL: trunc_16x32_to_16x8
-; KNL: vpmovdb
-; KNL: ret
-define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone {
- %x = trunc <16 x i32> %i to <16 x i8>
- ret <16 x i8> %x
-}
-
-; KNL-LABEL: trunc_8x64_to_8x16
-; KNL: vpmovqw
-; KNL: ret
-define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone {
- %x = trunc <8 x i64> %i to <8 x i16>
- ret <8 x i16> %x
-}
-
-;SKX-LABEL: zext_8x8mem_to_8x16:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovw2m %xmm0, %k1
-;SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = zext <8 x i8> %a to <8 x i16>
- %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
- ret <8 x i16> %ret
-}
-
-;SKX-LABEL: sext_8x8mem_to_8x16:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovw2m %xmm0, %k1
-;SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = sext <8 x i8> %a to <8 x i16>
- %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
- ret <8 x i16> %ret
-}
-
-;SKX-LABEL: zext_16x8mem_to_16x16:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovb2m %xmm0, %k1
-;SKX-NEXT: vpmovzxbw (%rdi), %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
- %a = load <16 x i8>,<16 x i8> *%i,align 1
- %x = zext <16 x i8> %a to <16 x i16>
- %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
- ret <16 x i16> %ret
-}
-
-;SKX-LABEL: sext_16x8mem_to_16x16:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovb2m %xmm0, %k1
-;SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
- %a = load <16 x i8>,<16 x i8> *%i,align 1
- %x = sext <16 x i8> %a to <16 x i16>
- %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
- ret <16 x i16> %ret
-}
-
-;SKX-LABEL: zext_16x8_to_16x16:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovzxbw %xmm0, %ymm0
-;SKX-NEXT: retq
-define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
- %x = zext <16 x i8> %a to <16 x i16>
- ret <16 x i16> %x
-}
-
-;SKX-LABEL: zext_16x8_to_16x16_mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovb2m %xmm1, %k1
-;SKX-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
- %x = zext <16 x i8> %a to <16 x i16>
- %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
- ret <16 x i16> %ret
-}
-
-;SKX-LABEL: sext_16x8_to_16x16:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxbw %xmm0, %ymm0
-;SKX-NEXT: retq
-define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
- %x = sext <16 x i8> %a to <16 x i16>
- ret <16 x i16> %x
-}
-
-;SKX-LABEL: sext_16x8_to_16x16_mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovb2m %xmm1, %k1
-;SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
- %x = sext <16 x i8> %a to <16 x i16>
- %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
- ret <16 x i16> %ret
-}
-
-;SKX-LABEL: zext_32x8mem_to_32x16:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovb2m %ymm0, %k1
-;SKX-NEXT: vpmovzxbw (%rdi), %zmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
- %a = load <32 x i8>,<32 x i8> *%i,align 1
- %x = zext <32 x i8> %a to <32 x i16>
- %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
- ret <32 x i16> %ret
-}
-
-;SKX-LABEL: sext_32x8mem_to_32x16:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovb2m %ymm0, %k1
-;SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
- %a = load <32 x i8>,<32 x i8> *%i,align 1
- %x = sext <32 x i8> %a to <32 x i16>
- %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
- ret <32 x i16> %ret
-}
-
-;SKX-LABEL: zext_32x8_to_32x16:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovzxbw %ymm0, %zmm0
-;SKX-NEXT: retq
-define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
- %x = zext <32 x i8> %a to <32 x i16>
- ret <32 x i16> %x
-}
-
-;SKX-LABEL: zext_32x8_to_32x16_mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovb2m %ymm1, %k1
-;SKX-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
- %x = zext <32 x i8> %a to <32 x i16>
- %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
- ret <32 x i16> %ret
-}
-
-;SKX-LABEL: sext_32x8_to_32x16:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxbw %ymm0, %zmm0
-;SKX-NEXT: retq
-define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
- %x = sext <32 x i8> %a to <32 x i16>
- ret <32 x i16> %x
-}
-
-;SKX-LABEL: sext_32x8_to_32x16_mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovb2m %ymm1, %k1
-;SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
- %x = sext <32 x i8> %a to <32 x i16>
- %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
- ret <32 x i16> %ret
-}
-
-;SKX-LABEL: zext_4x8mem_to_4x32:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovd2m %xmm0, %k1
-;SKX-NEXT: vpmovzxbd (%rdi), %xmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
- %a = load <4 x i8>,<4 x i8> *%i,align 1
- %x = zext <4 x i8> %a to <4 x i32>
- %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
- ret <4 x i32> %ret
-}
-
-;SKX-LABEL: sext_4x8mem_to_4x32:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovd2m %xmm0, %k1
-;SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
- %a = load <4 x i8>,<4 x i8> *%i,align 1
- %x = sext <4 x i8> %a to <4 x i32>
- %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
- ret <4 x i32> %ret
-}
-
-;SKX-LABEL: zext_8x8mem_to_8x32:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovw2m %xmm0, %k1
-;SKX-NEXT: vpmovzxbd (%rdi), %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = zext <8 x i8> %a to <8 x i32>
- %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
- ret <8 x i32> %ret
-}
-
-;SKX-LABEL: sext_8x8mem_to_8x32:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovw2m %xmm0, %k1
-;SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = sext <8 x i8> %a to <8 x i32>
- %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
- ret <8 x i32> %ret
-}
-
-;KNL-LABEL: zext_16x8mem_to_16x32:
-;KNL: vpmovzxbd (%rdi), %zmm0 {%k1} {z}
-;KNL-NEXT: retq
-define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
- %a = load <16 x i8>,<16 x i8> *%i,align 1
- %x = zext <16 x i8> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-;KNL-LABEL: sext_16x8mem_to_16x32:
-;KNL: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
-;KNL-NEXT: retq
-define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
- %a = load <16 x i8>,<16 x i8> *%i,align 1
- %x = sext <16 x i8> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-;KNL-LABEL: zext_16x8_to_16x32_mask:
-;KNL: vpmovzxbd %xmm0, %zmm0 {%k1} {z}
-;KNL-NEXT: retq
-define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
- %x = zext <16 x i8> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-;KNL-LABEL: sext_16x8_to_16x32_mask:
-;KNL: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
-;KNL-NEXT: retq
-define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
- %x = sext <16 x i8> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-; KNL-LABEL: zext_16x8_to_16x32
-; KNL: vpmovzxbd {{.*}}%zmm
-; KNL: ret
-define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
- %x = zext <16 x i8> %i to <16 x i32>
- ret <16 x i32> %x
-}
-
-; KNL-LABEL: sext_16x8_to_16x32
-; KNL: vpmovsxbd {{.*}}%zmm
-; KNL: ret
-define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
- %x = sext <16 x i8> %i to <16 x i32>
- ret <16 x i32> %x
-}
-
-;SKX-LABEL: zext_2x8mem_to_2x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovq2m %xmm0, %k1
-;SKX-NEXT: vpmovzxbq (%rdi), %xmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
- %a = load <2 x i8>,<2 x i8> *%i,align 1
- %x = zext <2 x i8> %a to <2 x i64>
- %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
- ret <2 x i64> %ret
-}
-;SKX-LABEL: sext_2x8mem_to_2x64mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovq2m %xmm0, %k1
-;SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
- %a = load <2 x i8>,<2 x i8> *%i,align 1
- %x = sext <2 x i8> %a to <2 x i64>
- %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
- ret <2 x i64> %ret
-}
-;SKX-LABEL: sext_2x8mem_to_2x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxbq (%rdi), %xmm0
-;SKX-NEXT: retq
-define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
- %a = load <2 x i8>,<2 x i8> *%i,align 1
- %x = sext <2 x i8> %a to <2 x i64>
- ret <2 x i64> %x
-}
-
-;SKX-LABEL: zext_4x8mem_to_4x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovd2m %xmm0, %k1
-;SKX-NEXT: vpmovzxbq (%rdi), %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
- %a = load <4 x i8>,<4 x i8> *%i,align 1
- %x = zext <4 x i8> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-;SKX-LABEL: sext_4x8mem_to_4x64mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovd2m %xmm0, %k1
-;SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
- %a = load <4 x i8>,<4 x i8> *%i,align 1
- %x = sext <4 x i8> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-;SKX-LABEL: sext_4x8mem_to_4x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxbq (%rdi), %ymm0
-;SKX-NEXT: retq
-define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
- %a = load <4 x i8>,<4 x i8> *%i,align 1
- %x = sext <4 x i8> %a to <4 x i64>
- ret <4 x i64> %x
-}
-
-;KNL-LABEL: zext_8x8mem_to_8x64:
-;KNL: vpmovzxbq (%rdi), %zmm0 {%k1} {z}
-;KNL-NEXT: retq
-define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = zext <8 x i8> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-;KNL-LABEL: sext_8x8mem_to_8x64mask:
-;KNL: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
-;KNL-NEXT: retq
-define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = sext <8 x i8> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-;KNL-LABEL: sext_8x8mem_to_8x64:
-;KNL: vpmovsxbq (%rdi), %zmm0
-;KNL-NEXT: retq
-define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = sext <8 x i8> %a to <8 x i64>
- ret <8 x i64> %x
-}
-
-;SKX-LABEL: zext_4x16mem_to_4x32:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovd2m %xmm0, %k1
-;SKX-NEXT: vpmovzxwd (%rdi), %xmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
- %a = load <4 x i16>,<4 x i16> *%i,align 1
- %x = zext <4 x i16> %a to <4 x i32>
- %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
- ret <4 x i32> %ret
-}
-
-;SKX-LABEL: sext_4x16mem_to_4x32mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovd2m %xmm0, %k1
-;SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
- %a = load <4 x i16>,<4 x i16> *%i,align 1
- %x = sext <4 x i16> %a to <4 x i32>
- %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
- ret <4 x i32> %ret
-}
-
-;SKX-LABEL: sext_4x16mem_to_4x32:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxwd (%rdi), %xmm0
-;SKX-NEXT: retq
-define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
- %a = load <4 x i16>,<4 x i16> *%i,align 1
- %x = sext <4 x i16> %a to <4 x i32>
- ret <4 x i32> %x
-}
-
-
-;SKX-LABEL: zext_8x16mem_to_8x32:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovw2m %xmm0, %k1
-;SKX-NEXT: vpmovzxwd (%rdi), %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
- %a = load <8 x i16>,<8 x i16> *%i,align 1
- %x = zext <8 x i16> %a to <8 x i32>
- %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
- ret <8 x i32> %ret
-}
-
-;SKX-LABEL: sext_8x16mem_to_8x32mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovw2m %xmm0, %k1
-;SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
- %a = load <8 x i16>,<8 x i16> *%i,align 1
- %x = sext <8 x i16> %a to <8 x i32>
- %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
- ret <8 x i32> %ret
-}
-
-;SKX-LABEL: sext_8x16mem_to_8x32:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxwd (%rdi), %ymm0
-;SKX-NEXT: retq
-define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
- %a = load <8 x i16>,<8 x i16> *%i,align 1
- %x = sext <8 x i16> %a to <8 x i32>
- ret <8 x i32> %x
-}
-
-;SKX-LABEL: zext_8x16_to_8x32mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovw2m %xmm1, %k1
-;SKX-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
- %x = zext <8 x i16> %a to <8 x i32>
- %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
- ret <8 x i32> %ret
-}
-
-;SKX-LABEL: zext_8x16_to_8x32:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovzxwd %xmm0, %ymm0
-;SKX-NEXT: retq
-define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
- %x = zext <8 x i16> %a to <8 x i32>
- ret <8 x i32> %x
-}
-
-;SKX-LABEL: zext_16x16mem_to_16x32:
-;KNL-LABEL: zext_16x16mem_to_16x32:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovb2m %xmm0, %k1
-;SKX-NEXT: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
-;KNL: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
- %a = load <16 x i16>,<16 x i16> *%i,align 1
- %x = zext <16 x i16> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-;SKX-LABEL: sext_16x16mem_to_16x32mask:
-;KNL-LABEL: sext_16x16mem_to_16x32mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovb2m %xmm0, %k1
-;SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
-;KNL: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
- %a = load <16 x i16>,<16 x i16> *%i,align 1
- %x = sext <16 x i16> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-;SKX-LABEL: sext_16x16mem_to_16x32:
-;KNL-LABEL: sext_16x16mem_to_16x32:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxwd (%rdi), %zmm0
-;KNL: vpmovsxwd (%rdi), %zmm0
-;SKX-NEXT: retq
-define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
- %a = load <16 x i16>,<16 x i16> *%i,align 1
- %x = sext <16 x i16> %a to <16 x i32>
- ret <16 x i32> %x
-}
-;SKX-LABEL: zext_16x16_to_16x32mask:
-;KNL-LABEL: zext_16x16_to_16x32mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovb2m %xmm1, %k1
-;SKX-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
-;KNL: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
- %x = zext <16 x i16> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-;SKX-LABEL: zext_16x16_to_16x32:
-;KNL-LABEL: zext_16x16_to_16x32:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovzxwd %ymm0, %zmm0
-;KNL: vpmovzxwd %ymm0, %zmm0
-;SKX-NEXT: retq
-define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
- %x = zext <16 x i16> %a to <16 x i32>
- ret <16 x i32> %x
-}
-
-;SKX-LABEL: zext_2x16mem_to_2x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovq2m %xmm0, %k1
-;SKX-NEXT: vpmovzxwq (%rdi), %xmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
- %a = load <2 x i16>,<2 x i16> *%i,align 1
- %x = zext <2 x i16> %a to <2 x i64>
- %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
- ret <2 x i64> %ret
-}
-
-;SKX-LABEL: sext_2x16mem_to_2x64mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovq2m %xmm0, %k1
-;SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
- %a = load <2 x i16>,<2 x i16> *%i,align 1
- %x = sext <2 x i16> %a to <2 x i64>
- %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
- ret <2 x i64> %ret
-}
-
-;SKX-LABEL: sext_2x16mem_to_2x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxwq (%rdi), %xmm0
-;SKX-NEXT: retq
-define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
- %a = load <2 x i16>,<2 x i16> *%i,align 1
- %x = sext <2 x i16> %a to <2 x i64>
- ret <2 x i64> %x
-}
-
-;SKX-LABEL: zext_4x16mem_to_4x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovd2m %xmm0, %k1
-;SKX-NEXT: vpmovzxwq (%rdi), %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
- %a = load <4 x i16>,<4 x i16> *%i,align 1
- %x = zext <4 x i16> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-;SKX-LABEL: sext_4x16mem_to_4x64mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovd2m %xmm0, %k1
-;SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
- %a = load <4 x i16>,<4 x i16> *%i,align 1
- %x = sext <4 x i16> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-;SKX-LABEL: sext_4x16mem_to_4x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxwq (%rdi), %ymm0
-;SKX-NEXT: retq
-define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
- %a = load <4 x i16>,<4 x i16> *%i,align 1
- %x = sext <4 x i16> %a to <4 x i64>
- ret <4 x i64> %x
-}
-
-;SKX-LABEL: zext_8x16mem_to_8x64:
-;KNL-LABEL: zext_8x16mem_to_8x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovw2m %xmm0, %k1
-;SKX-NEXT: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
-;KNL: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
- %a = load <8 x i16>,<8 x i16> *%i,align 1
- %x = zext <8 x i16> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-;SKX-LABEL: sext_8x16mem_to_8x64mask:
-;KNL-LABEL: sext_8x16mem_to_8x64mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovw2m %xmm0, %k1
-;SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
-;KNL: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
- %a = load <8 x i16>,<8 x i16> *%i,align 1
- %x = sext <8 x i16> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-;SKX-LABEL: sext_8x16mem_to_8x64:
-;KNL-LABEL: sext_8x16mem_to_8x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxwq (%rdi), %zmm0
-;KNL: vpmovsxwq (%rdi), %zmm0
-;SKX-NEXT: retq
-define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
- %a = load <8 x i16>,<8 x i16> *%i,align 1
- %x = sext <8 x i16> %a to <8 x i64>
- ret <8 x i64> %x
-}
-
-;SKX-LABEL: zext_8x16_to_8x64mask:
-;KNL-LABEL: zext_8x16_to_8x64mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovw2m %xmm1, %k1
-;SKX-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
-;KNL: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
- %x = zext <8 x i16> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-;SKX-LABEL: zext_8x16_to_8x64:
-;KNL-LABEL: zext_8x16_to_8x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovzxwq %xmm0, %zmm0
-;KNL: vpmovzxwq %xmm0, %zmm0
-;SKX-NEXT: retq
-; KNL: ret
-define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
- %ret = zext <8 x i16> %a to <8 x i64>
- ret <8 x i64> %ret
-}
-
-;SKX-LABEL: zext_2x32mem_to_2x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovq2m %xmm0, %k1
-;SKX-NEXT: vpmovzxdq (%rdi), %xmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
- %a = load <2 x i32>,<2 x i32> *%i,align 1
- %x = zext <2 x i32> %a to <2 x i64>
- %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
- ret <2 x i64> %ret
-}
-
-;SKX-LABEL: sext_2x32mem_to_2x64mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovq2m %xmm0, %k1
-;SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
- %a = load <2 x i32>,<2 x i32> *%i,align 1
- %x = sext <2 x i32> %a to <2 x i64>
- %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
- ret <2 x i64> %ret
-}
-
-;SKX-LABEL: sext_2x32mem_to_2x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxdq (%rdi), %xmm0
-;SKX-NEXT: retq
-define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
- %a = load <2 x i32>,<2 x i32> *%i,align 1
- %x = sext <2 x i32> %a to <2 x i64>
- ret <2 x i64> %x
-}
-
-;SKX-LABEL: zext_4x32mem_to_4x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovd2m %xmm0, %k1
-;SKX-NEXT: vpmovzxdq (%rdi), %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
- %a = load <4 x i32>,<4 x i32> *%i,align 1
- %x = zext <4 x i32> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-;SKX-LABEL: sext_4x32mem_to_4x64mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovd2m %xmm0, %k1
-;SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
- %a = load <4 x i32>,<4 x i32> *%i,align 1
- %x = sext <4 x i32> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-;SKX-LABEL: sext_4x32mem_to_4x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxdq (%rdi), %ymm0
-;SKX-NEXT: retq
-define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
- %a = load <4 x i32>,<4 x i32> *%i,align 1
- %x = sext <4 x i32> %a to <4 x i64>
- ret <4 x i64> %x
-}
-
-;SKX-LABEL: sext_4x32_to_4x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxdq %xmm0, %ymm0
-;SKX-NEXT: retq
-define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
- %x = sext <4 x i32> %a to <4 x i64>
- ret <4 x i64> %x
-}
-
-;SKX-LABEL: zext_4x32_to_4x64mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovd2m %xmm1, %k1
-;SKX-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z}
-;SKX-NEXT: retq
-define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
- %x = zext <4 x i32> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-;SKX-LABEL: zext_8x32mem_to_8x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovw2m %xmm0, %k1
-;SKX-NEXT: vpmovzxdq (%rdi), %zmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
- %a = load <8 x i32>,<8 x i32> *%i,align 1
- %x = zext <8 x i32> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-;SKX-LABEL: sext_8x32mem_to_8x64mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovw2m %xmm0, %k1
-;SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
- %a = load <8 x i32>,<8 x i32> *%i,align 1
- %x = sext <8 x i32> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-;SKX-LABEL: sext_8x32mem_to_8x64:
-;KNL-LABEL: sext_8x32mem_to_8x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxdq (%rdi), %zmm0
-;KNL: vpmovsxdq (%rdi), %zmm0
-;SKX-NEXT: retq
-define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
- %a = load <8 x i32>,<8 x i32> *%i,align 1
- %x = sext <8 x i32> %a to <8 x i64>
- ret <8 x i64> %x
-}
-
-;SKX-LABEL: sext_8x32_to_8x64:
-;KNL-LABEL: sext_8x32_to_8x64:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovsxdq %ymm0, %zmm0
-;KNL: vpmovsxdq %ymm0, %zmm0
-;SKX-NEXT: retq
-define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
- %x = sext <8 x i32> %a to <8 x i64>
- ret <8 x i64> %x
-}
-
-;SKX-LABEL: zext_8x32_to_8x64mask:
-;KNL-LABEL: zext_8x32_to_8x64mask:
-;SKX: ## BB#0:
-;SKX-NEXT: vpmovw2m %xmm1, %k1
-;SKX-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
-;KNL: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
-;SKX-NEXT: retq
-define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
- %x = zext <8 x i32> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-;KNL-LABEL: fptrunc_test
-;KNL: vcvtpd2ps {{.*}}%zmm
-;KNL: ret
-define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
- %b = fptrunc <8 x double> %a to <8 x float>
- ret <8 x float> %b
-}
-
-;KNL-LABEL: fpext_test
-;KNL: vcvtps2pd {{.*}}%zmm
-;KNL: ret
-define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
- %b = fpext <8 x float> %a to <8 x double>
- ret <8 x double> %b
-}
-
-; KNL-LABEL: zext_16i1_to_16xi32
-; KNL: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
-; KNL: ret
-define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
- %a = bitcast i16 %b to <16 x i1>
- %c = zext <16 x i1> %a to <16 x i32>
- ret <16 x i32> %c
-}
-
-; KNL-LABEL: zext_8i1_to_8xi64
-; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
-; KNL: ret
-define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
- %a = bitcast i8 %b to <8 x i1>
- %c = zext <8 x i1> %a to <8 x i64>
- ret <8 x i64> %c
-}
-
-; KNL-LABEL: trunc_16i8_to_16i1
-; KNL: vpmovsxbd
-; KNL: vpandd
-; KNL: vptestmd
-; KNL: ret
-; SKX-LABEL: trunc_16i8_to_16i1
-; SKX: vpmovb2m %xmm
-define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
- %mask_b = trunc <16 x i8>%a to <16 x i1>
- %mask = bitcast <16 x i1> %mask_b to i16
- ret i16 %mask
-}
-
-; KNL-LABEL: trunc_16i32_to_16i1
-; KNL: vpandd
-; KNL: vptestmd
-; KNL: ret
-; SKX-LABEL: trunc_16i32_to_16i1
-; SKX: vpmovd2m %zmm
-define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
- %mask_b = trunc <16 x i32>%a to <16 x i1>
- %mask = bitcast <16 x i1> %mask_b to i16
- ret i16 %mask
-}
-
-; SKX-LABEL: trunc_4i32_to_4i1
-; SKX: vpmovd2m %xmm
-; SKX: kandw
-; SKX: vpmovm2d
-define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
- %mask_a = trunc <4 x i32>%a to <4 x i1>
- %mask_b = trunc <4 x i32>%b to <4 x i1>
- %a_and_b = and <4 x i1>%mask_a, %mask_b
- %res = sext <4 x i1>%a_and_b to <4 x i32>
- ret <4 x i32>%res
-}
-
-; KNL-LABEL: trunc_8i16_to_8i1
-; KNL: vpmovsxwq
-; KNL: vpandq LCP{{.*}}(%rip){1to8}
-; KNL: vptestmq
-; KNL: ret
-
-; SKX-LABEL: trunc_8i16_to_8i1
-; SKX: vpmovw2m %xmm
-define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
- %mask_b = trunc <8 x i16>%a to <8 x i1>
- %mask = bitcast <8 x i1> %mask_b to i8
- ret i8 %mask
-}
-
-; KNL-LABEL: sext_8i1_8i32
-; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
-; SKX: vpmovm2d
-; KNL: ret
-define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
- %x = icmp slt <8 x i32> %a1, %a2
- %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
- %y = sext <8 x i1> %x1 to <8 x i32>
- ret <8 x i32> %y
-}
-
-; KNL-LABEL: trunc_v16i32_to_v16i16
-; KNL: vpmovdw
-; KNL: ret
-define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) {
- %1 = trunc <16 x i32> %x to <16 x i16>
- ret <16 x i16> %1
-}
-
-; KNL-LABEL: trunc_i32_to_i1
-; KNL: movw $-4, %ax
-; KNL: kmovw %eax, %k1
-; KNL: korw
-define i16 @trunc_i32_to_i1(i32 %a) {
- %a_i = trunc i32 %a to i1
- %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
- %res = bitcast <16 x i1> %maskv to i16
- ret i16 %res
-}
-
-; KNL-LABEL: sext_8i1_8i16
-; SKX: vpmovm2w
-; KNL: ret
-define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
- %x = icmp slt <8 x i32> %a1, %a2
- %y = sext <8 x i1> %x to <8 x i16>
- ret <8 x i16> %y
-}
-
-; KNL-LABEL: sext_16i1_16i32
-; SKX: vpmovm2d
-; KNL: ret
-define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
- %x = icmp slt <16 x i32> %a1, %a2
- %y = sext <16 x i1> %x to <16 x i32>
- ret <16 x i32> %y
-}
-
-; KNL-LABEL: sext_8i1_8i64
-; SKX: vpmovm2q
-; KNL: ret
-define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
- %x = icmp slt <8 x i32> %a1, %a2
- %y = sext <8 x i1> %x to <8 x i64>
- ret <8 x i64> %y
-}
-
-; KNL-LABEL: @extload_v8i64
-; KNL: vpmovsxbq
-define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
- %sign_load = load <8 x i8>, <8 x i8>* %a
- %c = sext <8 x i8> %sign_load to <8 x i64>
- store <8 x i64> %c, <8 x i64>* %res
- ret void
-}
-
-;SKX-LABEL: test21:
-;SKX: vmovdqu16 %zmm0, %zmm3 {%k1}
-;SKX-NEXT: kshiftrq $32, %k1, %k1
-;SKX-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1}
-define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
- %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
- ret <64 x i16> %ret
-}
-
diff --git a/test/CodeGen/X86/avx512-trunc.ll b/test/CodeGen/X86/avx512-trunc.ll
new file mode 100644
index 000000000000..e4e5c2b8a1d5
--- /dev/null
+++ b/test/CodeGen/X86/avx512-trunc.ll
@@ -0,0 +1,488 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=SKX
+
+ attributes #0 = { nounwind }
+
+define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
+; ALL-LABEL: trunc_16x32_to_16x8:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovdb %zmm0, %xmm0
+; ALL-NEXT: retq
+ %x = trunc <16 x i32> %i to <16 x i8>
+ ret <16 x i8> %x
+}
+
+define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
+; ALL-LABEL: trunc_8x64_to_8x16:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovqw %zmm0, %xmm0
+; ALL-NEXT: retq
+ %x = trunc <8 x i64> %i to <8 x i16>
+ ret <8 x i16> %x
+}
+
+define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
+; ALL-LABEL: trunc_v16i32_to_v16i16:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovdw %zmm0, %ymm0
+; ALL-NEXT: retq
+ %1 = trunc <16 x i32> %x to <16 x i16>
+ ret <16 x i16> %1
+}
+
+define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
+; ALL-LABEL: trunc_qb_512:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovqw %zmm0, %xmm0
+; ALL-NEXT: retq
+ %x = trunc <8 x i64> %i to <8 x i8>
+ ret <8 x i8> %x
+}
+
+define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
+; ALL-LABEL: trunc_qb_512_mem:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovqb %zmm0, (%rdi)
+; ALL-NEXT: retq
+ %x = trunc <8 x i64> %i to <8 x i8>
+ store <8 x i8> %x, <8 x i8>* %res
+ ret void
+}
+
+define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
+; KNL-LABEL: trunc_qb_256:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_qb_256:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqd %ymm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <4 x i64> %i to <4 x i8>
+ ret <4 x i8> %x
+}
+
+define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
+; KNL-LABEL: trunc_qb_256_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; KNL-NEXT: vmovd %xmm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_qb_256_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqb %ymm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <4 x i64> %i to <4 x i8>
+ store <4 x i8> %x, <4 x i8>* %res
+ ret void
+}
+
+define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
+; ALL-LABEL: trunc_qb_128:
+; ALL: ## BB#0:
+; ALL-NEXT: retq
+ %x = trunc <2 x i64> %i to <2 x i8>
+ ret <2 x i8> %x
+}
+
+define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
+; KNL-LABEL: trunc_qb_128_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; KNL-NEXT: vmovd %xmm0, %eax
+; KNL-NEXT: movw %ax, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_qb_128_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqb %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <2 x i64> %i to <2 x i8>
+ store <2 x i8> %x, <2 x i8>* %res
+ ret void
+}
+
+define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
+; ALL-LABEL: trunc_qw_512:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovqw %zmm0, %xmm0
+; ALL-NEXT: retq
+ %x = trunc <8 x i64> %i to <8 x i16>
+ ret <8 x i16> %x
+}
+
+define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
+; ALL-LABEL: trunc_qw_512_mem:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovqw %zmm0, (%rdi)
+; ALL-NEXT: retq
+ %x = trunc <8 x i64> %i to <8 x i16>
+ store <8 x i16> %x, <8 x i16>* %res
+ ret void
+}
+
+define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
+; KNL-LABEL: trunc_qw_256:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_qw_256:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqd %ymm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <4 x i64> %i to <4 x i16>
+ ret <4 x i16> %x
+}
+
+define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
+; KNL-LABEL: trunc_qw_256_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; KNL-NEXT: vmovq %xmm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_qw_256_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqw %ymm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <4 x i64> %i to <4 x i16>
+ store <4 x i16> %x, <4 x i16>* %res
+ ret void
+}
+
+define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
+; ALL-LABEL: trunc_qw_128:
+; ALL: ## BB#0:
+; ALL-NEXT: retq
+ %x = trunc <2 x i64> %i to <2 x i16>
+ ret <2 x i16> %x
+}
+
+define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
+; KNL-LABEL: trunc_qw_128_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; KNL-NEXT: vmovd %xmm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_qw_128_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqw %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <2 x i64> %i to <2 x i16>
+ store <2 x i16> %x, <2 x i16>* %res
+ ret void
+}
+
+define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
+; ALL-LABEL: trunc_qd_512:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovqd %zmm0, %ymm0
+; ALL-NEXT: retq
+ %x = trunc <8 x i64> %i to <8 x i32>
+ ret <8 x i32> %x
+}
+
+define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
+; ALL-LABEL: trunc_qd_512_mem:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovqd %zmm0, (%rdi)
+; ALL-NEXT: retq
+ %x = trunc <8 x i64> %i to <8 x i32>
+ store <8 x i32> %x, <8 x i32>* %res
+ ret void
+}
+
+define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
+; KNL-LABEL: trunc_qd_256:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_qd_256:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqd %ymm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <4 x i64> %i to <4 x i32>
+ ret <4 x i32> %x
+}
+
+define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
+; KNL-LABEL: trunc_qd_256_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovqd %zmm0, %ymm0
+; KNL-NEXT: vmovaps %xmm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_qd_256_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqd %ymm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <4 x i64> %i to <4 x i32>
+ store <4 x i32> %x, <4 x i32>* %res
+ ret void
+}
+
+define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
+; ALL-LABEL: trunc_qd_128:
+; ALL: ## BB#0:
+; ALL-NEXT: retq
+ %x = trunc <2 x i64> %i to <2 x i32>
+ ret <2 x i32> %x
+}
+
+define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
+; KNL-LABEL: trunc_qd_128_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL-NEXT: vmovq %xmm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_qd_128_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovqd %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <2 x i64> %i to <2 x i32>
+ store <2 x i32> %x, <2 x i32>* %res
+ ret void
+}
+
+define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
+; ALL-LABEL: trunc_db_512:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovdb %zmm0, %xmm0
+; ALL-NEXT: retq
+ %x = trunc <16 x i32> %i to <16 x i8>
+ ret <16 x i8> %x
+}
+
+define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
+; ALL-LABEL: trunc_db_512_mem:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovdb %zmm0, (%rdi)
+; ALL-NEXT: retq
+ %x = trunc <16 x i32> %i to <16 x i8>
+ store <16 x i8> %x, <16 x i8>* %res
+ ret void
+}
+
+define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
+; KNL-LABEL: trunc_db_256:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovdw %zmm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_db_256:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdw %ymm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <8 x i32> %i to <8 x i8>
+ ret <8 x i8> %x
+}
+
+define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
+; KNL-LABEL: trunc_db_256_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovdw %zmm0, %ymm0
+; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; KNL-NEXT: vmovq %xmm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_db_256_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdb %ymm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <8 x i32> %i to <8 x i8>
+ store <8 x i8> %x, <8 x i8>* %res
+ ret void
+}
+
+define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
+; ALL-LABEL: trunc_db_128:
+; ALL: ## BB#0:
+; ALL-NEXT: retq
+ %x = trunc <4 x i32> %i to <4 x i8>
+ ret <4 x i8> %x
+}
+
+define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
+; KNL-LABEL: trunc_db_128_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; KNL-NEXT: vmovd %xmm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_db_128_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdb %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <4 x i32> %i to <4 x i8>
+ store <4 x i8> %x, <4 x i8>* %res
+ ret void
+}
+
+define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
+; ALL-LABEL: trunc_dw_512:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovdw %zmm0, %ymm0
+; ALL-NEXT: retq
+ %x = trunc <16 x i32> %i to <16 x i16>
+ ret <16 x i16> %x
+}
+
+define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
+; ALL-LABEL: trunc_dw_512_mem:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovdw %zmm0, (%rdi)
+; ALL-NEXT: retq
+ %x = trunc <16 x i32> %i to <16 x i16>
+ store <16 x i16> %x, <16 x i16>* %res
+ ret void
+}
+
+define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
+; KNL-LABEL: trunc_dw_256:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovdw %zmm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_dw_256:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdw %ymm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <8 x i32> %i to <8 x i16>
+ ret <8 x i16> %x
+}
+
+define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
+; KNL-LABEL: trunc_dw_256_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovdw %zmm0, %ymm0
+; KNL-NEXT: vmovaps %xmm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_dw_256_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdw %ymm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <8 x i32> %i to <8 x i16>
+ store <8 x i16> %x, <8 x i16>* %res
+ ret void
+}
+
+define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
+; KNL-LABEL: trunc_dw_128_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; KNL-NEXT: vmovq %xmm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_dw_128_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovdw %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <4 x i32> %i to <4 x i16>
+ store <4 x i16> %x, <4 x i16>* %res
+ ret void
+}
+
+define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
+; KNL-LABEL: trunc_wb_512:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
+; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_wb_512:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovwb %zmm0, %ymm0
+; SKX-NEXT: retq
+ %x = trunc <32 x i16> %i to <32 x i8>
+ ret <32 x i8> %x
+}
+
+define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
+; KNL-LABEL: trunc_wb_512_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
+; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: vmovaps %ymm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_wb_512_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovwb %zmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <32 x i16> %i to <32 x i8>
+ store <32 x i8> %x, <32 x i8>* %res
+ ret void
+}
+
+define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
+; KNL-LABEL: trunc_wb_256:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_wb_256:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovwb %ymm0, %xmm0
+; SKX-NEXT: retq
+ %x = trunc <16 x i16> %i to <16 x i8>
+ ret <16 x i8> %x
+}
+
+define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
+; KNL-LABEL: trunc_wb_256_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: vmovaps %xmm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_wb_256_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovwb %ymm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <16 x i16> %i to <16 x i8>
+ store <16 x i8> %x, <16 x i8>* %res
+ ret void
+}
+
+define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
+; ALL-LABEL: trunc_wb_128:
+; ALL: ## BB#0:
+; ALL-NEXT: retq
+ %x = trunc <8 x i16> %i to <8 x i8>
+ ret <8 x i8> %x
+}
+
+define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
+; KNL-LABEL: trunc_wb_128_mem:
+; KNL: ## BB#0:
+; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; KNL-NEXT: vmovq %xmm0, (%rdi)
+; KNL-NEXT: retq
+;
+; SKX-LABEL: trunc_wb_128_mem:
+; SKX: ## BB#0:
+; SKX-NEXT: vpmovwb %xmm0, (%rdi)
+; SKX-NEXT: retq
+ %x = trunc <8 x i16> %i to <8 x i8>
+ store <8 x i8> %x, <8 x i8>* %res
+ ret void
+}
diff --git a/test/CodeGen/X86/avx512-vbroadcast.ll b/test/CodeGen/X86/avx512-vbroadcast.ll
index 854f1019f0f8..4f679f9aca6f 100644
--- a/test/CodeGen/X86/avx512-vbroadcast.ll
+++ b/test/CodeGen/X86/avx512-vbroadcast.ll
@@ -1,47 +1,54 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
define <16 x i32> @_inreg16xi32(i32 %a) {
-; CHECK-LABEL: _inreg16xi32:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastd %edi, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: _inreg16xi32:
+; ALL: # BB#0:
+; ALL-NEXT: vpbroadcastd %edi, %zmm0
+; ALL-NEXT: retq
%b = insertelement <16 x i32> undef, i32 %a, i32 0
%c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
ret <16 x i32> %c
}
define <8 x i64> @_inreg8xi64(i64 %a) {
-; CHECK-LABEL: _inreg8xi64:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastq %rdi, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: _inreg8xi64:
+; ALL: # BB#0:
+; ALL-NEXT: vpbroadcastq %rdi, %zmm0
+; ALL-NEXT: retq
%b = insertelement <8 x i64> undef, i64 %a, i32 0
%c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
ret <8 x i64> %c
}
-;CHECK-LABEL: _ss16xfloat_v4
-;CHECK: vbroadcastss %xmm0, %zmm0
-;CHECK: ret
define <16 x float> @_ss16xfloat_v4(<4 x float> %a) {
+; ALL-LABEL: _ss16xfloat_v4:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastss %xmm0, %zmm0
+; ALL-NEXT: retq
%b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer
ret <16 x float> %b
}
define <16 x float> @_inreg16xfloat(float %a) {
-; CHECK-LABEL: _inreg16xfloat:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastss %xmm0, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: _inreg16xfloat:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastss %xmm0, %zmm0
+; ALL-NEXT: retq
%b = insertelement <16 x float> undef, float %a, i32 0
%c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
ret <16 x float> %c
}
-;CHECK-LABEL: _ss16xfloat_mask:
-;CHECK: vbroadcastss %xmm0, %zmm1 {%k1}
-;CHECK: ret
define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) {
+; ALL-LABEL: _ss16xfloat_mask:
+; ALL: # BB#0:
+; ALL-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; ALL-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
+; ALL-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
+; ALL-NEXT: vmovaps %zmm1, %zmm0
+; ALL-NEXT: retq
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%b = insertelement <16 x float> undef, float %a, i32 0
%c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
@@ -49,10 +56,13 @@ define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %m
ret <16 x float> %r
}
-;CHECK-LABEL: _ss16xfloat_maskz:
-;CHECK: vbroadcastss %xmm0, %zmm0 {%k1} {z}
-;CHECK: ret
define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
+; ALL-LABEL: _ss16xfloat_maskz:
+; ALL: # BB#0:
+; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; ALL-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; ALL-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
+; ALL-NEXT: retq
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%b = insertelement <16 x float> undef, float %a, i32 0
%c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
@@ -60,20 +70,24 @@ define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
ret <16 x float> %r
}
-;CHECK-LABEL: _ss16xfloat_load:
-;CHECK: vbroadcastss (%{{.*}}, %zmm
-;CHECK: ret
define <16 x float> @_ss16xfloat_load(float* %a.ptr) {
+; ALL-LABEL: _ss16xfloat_load:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastss (%rdi), %zmm0
+; ALL-NEXT: retq
%a = load float, float* %a.ptr
%b = insertelement <16 x float> undef, float %a, i32 0
%c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
ret <16 x float> %c
}
-;CHECK-LABEL: _ss16xfloat_mask_load:
-;CHECK: vbroadcastss (%rdi), %zmm0 {%k1}
-;CHECK: ret
define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
+; ALL-LABEL: _ss16xfloat_mask_load:
+; ALL: # BB#0:
+; ALL-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; ALL-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; ALL-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
+; ALL-NEXT: retq
%a = load float, float* %a.ptr
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%b = insertelement <16 x float> undef, float %a, i32 0
@@ -82,10 +96,13 @@ define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16
ret <16 x float> %r
}
-;CHECK-LABEL: _ss16xfloat_maskz_load:
-;CHECK: vbroadcastss (%rdi), %zmm0 {%k1} {z}
-;CHECK: ret
define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
+; ALL-LABEL: _ss16xfloat_maskz_load:
+; ALL: # BB#0:
+; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; ALL-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
+; ALL-NEXT: retq
%a = load float, float* %a.ptr
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%b = insertelement <16 x float> undef, float %a, i32 0
@@ -95,19 +112,23 @@ define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1)
}
define <8 x double> @_inreg8xdouble(double %a) {
-; CHECK-LABEL: _inreg8xdouble:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: _inreg8xdouble:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastsd %xmm0, %zmm0
+; ALL-NEXT: retq
%b = insertelement <8 x double> undef, double %a, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
ret <8 x double> %c
}
-;CHECK-LABEL: _sd8xdouble_mask:
-;CHECK: vbroadcastsd %xmm0, %zmm1 {%k1}
-;CHECK: ret
define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) {
+; ALL-LABEL: _sd8xdouble_mask:
+; ALL: # BB#0:
+; ALL-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; ALL-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
+; ALL-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
+; ALL-NEXT: vmovaps %zmm1, %zmm0
+; ALL-NEXT: retq
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%b = insertelement <8 x double> undef, double %a, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
@@ -115,10 +136,13 @@ define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %m
ret <8 x double> %r
}
-;CHECK-LABEL: _sd8xdouble_maskz:
-;CHECK: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
-;CHECK: ret
define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
+; ALL-LABEL: _sd8xdouble_maskz:
+; ALL: # BB#0:
+; ALL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; ALL-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
+; ALL-NEXT: retq
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%b = insertelement <8 x double> undef, double %a, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
@@ -126,20 +150,24 @@ define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
ret <8 x double> %r
}
-;CHECK-LABEL: _sd8xdouble_load:
-;CHECK: vbroadcastsd (%rdi), %zmm
-;CHECK: ret
define <8 x double> @_sd8xdouble_load(double* %a.ptr) {
+; ALL-LABEL: _sd8xdouble_load:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastsd (%rdi), %zmm0
+; ALL-NEXT: retq
%a = load double, double* %a.ptr
%b = insertelement <8 x double> undef, double %a, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
ret <8 x double> %c
}
-;CHECK-LABEL: _sd8xdouble_mask_load:
-;CHECK: vbroadcastsd (%rdi), %zmm0 {%k1}
-;CHECK: ret
define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
+; ALL-LABEL: _sd8xdouble_mask_load:
+; ALL: # BB#0:
+; ALL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
+; ALL-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
+; ALL-NEXT: retq
%a = load double, double* %a.ptr
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%b = insertelement <8 x double> undef, double %a, i32 0
@@ -149,9 +177,12 @@ define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8
}
define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) {
-; CHECK-LABEL: _sd8xdouble_maskz_load:
-; CHECK: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
-; CHECK: ret
+; ALL-LABEL: _sd8xdouble_maskz_load:
+; ALL: # BB#0:
+; ALL-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
+; ALL-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
+; ALL-NEXT: retq
%a = load double, double* %a.ptr
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%b = insertelement <8 x double> undef, double %a, i32 0
@@ -161,32 +192,32 @@ define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1)
}
define <16 x i32> @_xmm16xi32(<16 x i32> %a) {
-; CHECK-LABEL: _xmm16xi32:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: _xmm16xi32:
+; ALL: # BB#0:
+; ALL-NEXT: vpbroadcastd %xmm0, %zmm0
+; ALL-NEXT: retq
%b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
ret <16 x i32> %b
}
define <16 x float> @_xmm16xfloat(<16 x float> %a) {
-; CHECK-LABEL: _xmm16xfloat:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastss %xmm0, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: _xmm16xfloat:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastss %xmm0, %zmm0
+; ALL-NEXT: retq
%b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
ret <16 x float> %b
}
define <16 x i32> @test_vbroadcast() {
-; CHECK-LABEL: test_vbroadcast:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0
-; CHECK-NEXT: vcmpunordps %zmm0, %zmm0, %k1
-; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
-; CHECK-NEXT: knotw %k1, %k1
-; CHECK-NEXT: vmovdqu32 %zmm0, %zmm0 {%k1} {z}
-; CHECK-NEXT: retq
+; ALL-LABEL: test_vbroadcast:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; ALL-NEXT: vcmpunordps %zmm0, %zmm0, %k1
+; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; ALL-NEXT: knotw %k1, %k1
+; ALL-NEXT: vmovdqu32 %zmm0, %zmm0 {%k1} {z}
+; ALL-NEXT: retq
entry:
%0 = sext <16 x i1> zeroinitializer to <16 x i32>
%1 = fcmp uno <16 x float> undef, zeroinitializer
@@ -198,10 +229,10 @@ entry:
; We implement the set1 intrinsics with vector initializers. Verify that the
; IR generated will produce broadcasts at the end.
define <8 x double> @test_set1_pd(double %d) #2 {
-; CHECK-LABEL: test_set1_pd:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: test_set1_pd:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: vbroadcastsd %xmm0, %zmm0
+; ALL-NEXT: retq
entry:
%vecinit.i = insertelement <8 x double> undef, double %d, i32 0
%vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1
@@ -215,10 +246,10 @@ entry:
}
define <8 x i64> @test_set1_epi64(i64 %d) #2 {
-; CHECK-LABEL: test_set1_epi64:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq %rdi, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: test_set1_epi64:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: vpbroadcastq %rdi, %zmm0
+; ALL-NEXT: retq
entry:
%vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0
%vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1
@@ -232,10 +263,10 @@ entry:
}
define <16 x float> @test_set1_ps(float %f) #2 {
-; CHECK-LABEL: test_set1_ps:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss %xmm0, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: test_set1_ps:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: vbroadcastss %xmm0, %zmm0
+; ALL-NEXT: retq
entry:
%vecinit.i = insertelement <16 x float> undef, float %f, i32 0
%vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1
@@ -257,10 +288,10 @@ entry:
}
define <16 x i32> @test_set1_epi32(i32 %f) #2 {
-; CHECK-LABEL: test_set1_epi32:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd %edi, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: test_set1_epi32:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: vpbroadcastd %edi, %zmm0
+; ALL-NEXT: retq
entry:
%vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0
%vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1
@@ -284,10 +315,10 @@ entry:
; We implement the scalar broadcast intrinsics with vector initializers.
; Verify that the IR generated will produce the broadcast at the end.
define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) {
-; CHECK-LABEL: test_mm512_broadcastsd_pd:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
-; CHECK-NEXT: retq
+; ALL-LABEL: test_mm512_broadcastsd_pd:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: vbroadcastsd %xmm0, %zmm0
+; ALL-NEXT: retq
entry:
%0 = extractelement <2 x double> %a, i32 0
%vecinit.i = insertelement <8 x double> undef, double %0, i32 0
@@ -301,30 +332,69 @@ entry:
ret <8 x double> %vecinit7.i
}
-; CHECK-LABEL: test1
-; CHECK: vbroadcastss
define <16 x float> @test1(<8 x float>%a) {
+; ALL-LABEL: test1:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastss %xmm0, %zmm0
+; ALL-NEXT: retq
%res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer
ret <16 x float>%res
}
-; CHECK-LABEL: test2
-; CHECK: vbroadcastsd
define <8 x double> @test2(<4 x double>%a) {
+; ALL-LABEL: test2:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastsd %xmm0, %zmm0
+; ALL-NEXT: retq
%res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer
ret <8 x double>%res
}
-; CHECK-LABEL: test3
-; CHECK: vpbroadcastd
-define <16 x i32> @test3(<8 x i32>%a) {
+define <64 x i8> @_invec32xi8(<32 x i8>%a) {
+; AVX512F-LABEL: _invec32xi8:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX512F-NEXT: vmovaps %zmm0, %zmm1
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: _invec32xi8:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %res = shufflevector <32 x i8> %a, <32 x i8> undef, <64 x i32> zeroinitializer
+ ret <64 x i8>%res
+}
+
+define <32 x i16> @_invec16xi16(<16 x i16>%a) {
+; AVX512F-LABEL: _invec16xi16:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
+; AVX512F-NEXT: vmovaps %zmm0, %zmm1
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: _invec16xi16:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vpbroadcastw %xmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %res = shufflevector <16 x i16> %a, <16 x i16> undef, <32 x i32> zeroinitializer
+ ret <32 x i16>%res
+}
+
+define <16 x i32> @_invec8xi32(<8 x i32>%a) {
+; ALL-LABEL: _invec8xi32:
+; ALL: # BB#0:
+; ALL-NEXT: vpbroadcastd %xmm0, %zmm0
+; ALL-NEXT: retq
%res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer
ret <16 x i32>%res
}
-; CHECK-LABEL: test4
-; CHECK: vpbroadcastq
-define <8 x i64> @test4(<4 x i64>%a) {
+define <8 x i64> @_invec4xi64(<4 x i64>%a) {
+; ALL-LABEL: _invec4xi64:
+; ALL: # BB#0:
+; ALL-NEXT: vpbroadcastq %xmm0, %zmm0
+; ALL-NEXT: retq
%res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer
ret <8 x i64>%res
}
+
diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll
index 6a4a3aa7e371..a8c558df9de8 100644
--- a/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -152,7 +152,6 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
ret <8 x i32> %max
}
-
define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
; KNL-LABEL: test12:
; KNL: ## BB#0:
@@ -166,6 +165,32 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
ret i16 %res1
}
+define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
+; SKX-LABEL: test12_v32i32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpcmpeqd %zmm2, %zmm0, %k0
+; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
+; SKX-NEXT: kunpckwd %k0, %k1, %k0
+; SKX-NEXT: kmovd %k0, %eax
+; SKX-NEXT: retq
+ %res = icmp eq <32 x i32> %a, %b
+ %res1 = bitcast <32 x i1> %res to i32
+ ret i32 %res1
+}
+
+define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
+; SKX-LABEL: test12_v64i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vpcmpeqw %zmm2, %zmm0, %k0
+; SKX-NEXT: vpcmpeqw %zmm3, %zmm1, %k1
+; SKX-NEXT: kunpckdq %k0, %k1, %k0
+; SKX-NEXT: kmovq %k0, %rax
+; SKX-NEXT: retq
+ %res = icmp eq <64 x i16> %a, %b
+ %res1 = bitcast <64 x i1> %res to i64
+ ret i64 %res1
+}
+
define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
; KNL-LABEL: test13:
; KNL: ## BB#0:
diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll
index 71bf63ed44d0..5f3d16d4efbb 100644
--- a/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -1,15 +1,51 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding| FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
-; CHECK-LABEL: test_pcmpeq_b
-; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
+; AVX512BW-LABEL: test_pcmpeq_b:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_pcmpeq_b:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: subl $12, %esp
+; AVX512F-32-NEXT: .Ltmp0:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, (%esp)
+; AVX512F-32-NEXT: movl (%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $12, %esp
+; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
ret i64 %res
}
define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
-; CHECK-LABEL: test_mask_pcmpeq_b
-; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
+; AVX512BW-LABEL: test_mask_pcmpeq_b:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_pcmpeq_b:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: subl $12, %esp
+; AVX512F-32-NEXT: .Ltmp1:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, (%esp)
+; AVX512F-32-NEXT: movl (%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $12, %esp
+; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
ret i64 %res
}
@@ -17,15 +53,35 @@ define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
-; CHECK-LABEL: test_pcmpeq_w
-; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
+; AVX512BW-LABEL: test_pcmpeq_w:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_pcmpeq_w:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
ret i32 %res
}
define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
-; CHECK-LABEL: test_mask_pcmpeq_w
-; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
+; AVX512BW-LABEL: test_mask_pcmpeq_w:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_pcmpeq_w:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
ret i32 %res
}
@@ -33,15 +89,49 @@ define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
-; CHECK-LABEL: test_pcmpgt_b
-; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 ##
+; AVX512BW-LABEL: test_pcmpgt_b:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_pcmpgt_b:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: subl $12, %esp
+; AVX512F-32-NEXT: .Ltmp2:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, (%esp)
+; AVX512F-32-NEXT: movl (%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $12, %esp
+; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
ret i64 %res
}
define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
-; CHECK-LABEL: test_mask_pcmpgt_b
-; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} ##
+; AVX512BW-LABEL: test_mask_pcmpgt_b:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_pcmpgt_b:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: subl $12, %esp
+; AVX512F-32-NEXT: .Ltmp3:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, (%esp)
+; AVX512F-32-NEXT: movl (%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $12, %esp
+; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
ret i64 %res
}
@@ -49,357 +139,839 @@ define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
-; CHECK-LABEL: test_pcmpgt_w
-; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 ##
+; AVX512BW-LABEL: test_pcmpgt_w:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_pcmpgt_w:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
ret i32 %res
}
define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
-; CHECK-LABEL: test_mask_pcmpgt_w
-; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} ##
+; AVX512BW-LABEL: test_mask_pcmpgt_w:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_pcmpgt_w:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
ret i32 %res
}
declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
-define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
-; CHECK_LABEL: test_cmp_b_512
-; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
+define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
+; AVX512BW-LABEL: test_cmp_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rcx
+; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rcx
+; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rcx
+; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rdx
+; AVX512BW-NEXT: addq %rcx, %rdx
+; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rdx, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_cmp_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: subl $68, %esp
+; AVX512F-32-NEXT: .Ltmp4:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
+; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, (%esp)
+; AVX512F-32-NEXT: addl (%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $68, %esp
+; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
- %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
-; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ##
%res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
- %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
-; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ##
+ %ret1 = add i64 %res0, %res1
%res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
- %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
-; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ##
+ %ret2 = add i64 %ret1, %res2
%res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
- %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
-; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ##
+ %ret3 = add i64 %ret2, %res3
%res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
- %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
-; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ##
+ %ret4 = add i64 %ret3, %res4
%res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
- %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
-; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ##
+ %ret5 = add i64 %ret4, %res5
%res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
- %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
-; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ##
+ %ret6 = add i64 %ret5, %res6
%res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
- %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
- ret <8 x i64> %vec7
-}
-
-define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
-; CHECK_LABEL: test_mask_cmp_b_512
-; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
+ %ret7 = add i64 %ret6, %res7
+ ret i64 %ret7
+}
+
+define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
+; AVX512BW-LABEL: test_mask_cmp_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rcx
+; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rcx
+; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rcx
+; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rdx
+; AVX512BW-NEXT: addq %rcx, %rdx
+; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rdx, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_cmp_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: subl $68, %esp
+; AVX512F-32-NEXT: .Ltmp5:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, (%esp)
+; AVX512F-32-NEXT: movl (%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $68, %esp
+; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
- %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
-; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ##
%res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
- %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
-; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ##
+ %ret1 = add i64 %res0, %res1
%res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
- %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
-; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ##
+ %ret2 = add i64 %ret1, %res2
%res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
- %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
-; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ##
+ %ret3 = add i64 %ret2, %res3
%res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
- %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
-; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ##
+ %ret4 = add i64 %ret3, %res4
%res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
- %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
-; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ##
+ %ret5 = add i64 %ret4, %res5
%res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
- %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
-; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ##
+ %ret6 = add i64 %ret5, %res6
%res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
- %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
- ret <8 x i64> %vec7
+ %ret7 = add i64 %ret6, %res7
+ ret i64 %ret7
}
declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
-define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
-; CHECK_LABEL: test_ucmp_b_512
-; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ##
+define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
+; AVX512BW-LABEL: test_ucmp_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rcx
+; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rcx
+; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rcx
+; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rdx
+; AVX512BW-NEXT: addq %rcx, %rdx
+; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rdx, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_ucmp_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: subl $68, %esp
+; AVX512F-32-NEXT: .Ltmp6:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
+; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, (%esp)
+; AVX512F-32-NEXT: addl (%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $68, %esp
+; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
- %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
-; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ##
%res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
- %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
-; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ##
+ %ret1 = add i64 %res0, %res1
%res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
- %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
-; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ##
+ %ret2 = add i64 %ret1, %res2
%res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
- %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
-; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ##
+ %ret3 = add i64 %ret2, %res3
%res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
- %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
-; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ##
+ %ret4 = add i64 %ret3, %res4
%res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
- %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
-; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ##
+ %ret5 = add i64 %ret4, %res5
%res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
- %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
-; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ##
+ %ret6 = add i64 %ret5, %res6
%res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
- %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
- ret <8 x i64> %vec7
-}
-
-define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
-; CHECK_LABEL: test_mask_ucmp_b_512
-; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ##
+ %ret7 = add i64 %ret6, %res7
+ ret i64 %ret7
+}
+
+define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
+; AVX512BW-LABEL: test_mask_x86_avx512_ucmp_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rcx
+; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rcx
+; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rcx
+; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rdx
+; AVX512BW-NEXT: addq %rcx, %rdx
+; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rdx, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: subl $68, %esp
+; AVX512F-32-NEXT: .Ltmp7:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, (%esp)
+; AVX512F-32-NEXT: movl (%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $68, %esp
+; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
- %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
-; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ##
%res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
- %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
-; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ##
+ %ret1 = add i64 %res0, %res1
%res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
- %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
-; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ##
+ %ret2 = add i64 %ret1, %res2
%res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
- %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
-; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ##
+ %ret3 = add i64 %ret2, %res3
%res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
- %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
-; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ##
+ %ret4 = add i64 %ret3, %res4
%res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
- %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
-; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ##
+ %ret5 = add i64 %ret4, %res5
%res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
- %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
-; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ##
+ %ret6 = add i64 %ret5, %res6
%res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
- %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
- ret <8 x i64> %vec7
+ %ret7 = add i64 %ret6, %res7
+ ret i64 %ret7
}
declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
-define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
-; CHECK_LABEL: test_cmp_w_512
-; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
+define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
+; AVX512BW-LABEL: test_cmp_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %ecx
+; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %ecx
+; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %ecx
+; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %edx
+; AVX512BW-NEXT: addl %ecx, %edx
+; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %edx, %eax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_cmp_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: addl %eax, %ecx
+; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: addl %eax, %ecx
+; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: addl %eax, %ecx
+; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %edx
+; AVX512F-32-NEXT: addl %ecx, %edx
+; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %edx, %eax
+; AVX512F-32-NEXT: retl
%res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
- %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
-; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ##
%res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
- %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
-; CHECK: vpcmplew %zmm1, %zmm0, %k0 ##
+ %ret1 = add i32 %res0, %res1
%res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
- %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
-; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ##
+ %ret2 = add i32 %ret1, %res2
%res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
- %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
-; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ##
+ %ret3 = add i32 %ret2, %res3
%res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
- %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
-; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ##
+ %ret4 = add i32 %ret3, %res4
%res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
- %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
-; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ##
+ %ret5 = add i32 %ret4, %res5
%res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
- %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
-; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ##
+ %ret6 = add i32 %ret5, %res6
%res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
- %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
- ret <8 x i32> %vec7
-}
-
-define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
-; CHECK_LABEL: test_mask_cmp_w_512
-; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
+ %ret7 = add i32 %ret6, %res7
+ ret i32 %ret7
+}
+
+define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
+; AVX512BW-LABEL: test_mask_cmp_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %ecx
+; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %ecx
+; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %ecx
+; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %edx
+; AVX512BW-NEXT: addl %ecx, %edx
+; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %edx, %eax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_cmp_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: addl %eax, %ecx
+; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: addl %eax, %ecx
+; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: addl %eax, %ecx
+; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %edx
+; AVX512F-32-NEXT: addl %ecx, %edx
+; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %edx, %eax
+; AVX512F-32-NEXT: retl
%res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
- %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
-; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ##
%res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
- %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
-; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ##
+ %ret1 = add i32 %res0, %res1
%res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
- %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
-; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ##
+ %ret2 = add i32 %ret1, %res2
%res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
- %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
-; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ##
+ %ret3 = add i32 %ret2, %res3
%res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
- %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
-; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ##
+ %ret4 = add i32 %ret3, %res4
%res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
- %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
-; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ##
+ %ret5 = add i32 %ret4, %res5
%res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
- %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
-; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ##
+ %ret6 = add i32 %ret5, %res6
%res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
- %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
- ret <8 x i32> %vec7
+ %ret7 = add i32 %ret6, %res7
+ ret i32 %ret7
}
declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
-define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
-; CHECK_LABEL: test_ucmp_w_512
-; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ##
+define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
+; AVX512BW-LABEL: test_ucmp_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %ecx
+; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %ecx
+; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %ecx
+; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %edx
+; AVX512BW-NEXT: addl %ecx, %edx
+; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %edx, %eax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_ucmp_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: addl %eax, %ecx
+; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: addl %eax, %ecx
+; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: addl %eax, %ecx
+; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %edx
+; AVX512F-32-NEXT: addl %ecx, %edx
+; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %edx, %eax
+; AVX512F-32-NEXT: retl
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
- %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
-; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ##
%res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
- %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
-; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ##
+ %ret1 = add i32 %res0, %res1
%res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
- %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
-; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ##
+ %ret2 = add i32 %ret1, %res2
%res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
- %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
-; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ##
+ %ret3 = add i32 %ret2, %res3
%res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
- %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
-; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ##
+ %ret4 = add i32 %ret3, %res4
%res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
- %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
-; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ##
+ %ret5 = add i32 %ret4, %res5
%res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
- %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
-; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ##
+ %ret6 = add i32 %ret5, %res6
%res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
- %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
- ret <8 x i32> %vec7
-}
-
-define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
-; CHECK_LABEL: test_mask_ucmp_w_512
-; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ##
+ %ret7 = add i32 %ret6, %res7
+ ret i32 %ret7
+}
+
+define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
+; AVX512BW-LABEL: test_mask_ucmp_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %ecx
+; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %ecx
+; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %ecx
+; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %edx
+; AVX512BW-NEXT: addl %ecx, %edx
+; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %edx, %eax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_ucmp_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: addl %eax, %ecx
+; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: addl %eax, %ecx
+; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: addl %eax, %ecx
+; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %edx
+; AVX512F-32-NEXT: addl %ecx, %edx
+; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %edx, %eax
+; AVX512F-32-NEXT: retl
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
- %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
-; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ##
%res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
- %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
-; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ##
+ %ret1 = add i32 %res0, %res1
%res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
- %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
-; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ##
+ %ret2 = add i32 %ret1, %res2
%res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
- %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
-; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ##
+ %ret3 = add i32 %ret2, %res3
%res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
- %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
-; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ##
+ %ret4 = add i32 %ret3, %res4
%res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
- %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
-; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ##
+ %ret5 = add i32 %ret4, %res5
%res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
- %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
-; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ##
+ %ret6 = add i32 %ret5, %res6
%res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
- %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
- ret <8 x i32> %vec7
+ %ret7 = add i32 %ret6, %res7
+ ret i32 %ret7
}
declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
-; CHECK-LABEL: test_x86_mask_blend_b_256
-; CHECK: vpblendmb
-define <32 x i8> @test_x86_mask_blend_b_256(i32 %a0, <32 x i8> %a1, <32 x i8> %a2) {
- %res = call <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8> %a1, <32 x i8> %a2, i32 %a0) ; <<32 x i8>> [#uses=1]
- ret <32 x i8> %res
-}
-declare <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8>, <32 x i8>, i32) nounwind readonly
-
-; CHECK-LABEL: test_x86_mask_blend_w_256
-define <16 x i16> @test_x86_mask_blend_w_256(i16 %mask, <16 x i16> %a1, <16 x i16> %a2) {
- ; CHECK: vpblendmw
- %res = call <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16> %a1, <16 x i16> %a2, i16 %mask) ; <<16 x i16>> [#uses=1]
- ret <16 x i16> %res
-}
-declare <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16>, <16 x i16>, i16) nounwind readonly
-
-; CHECK-LABEL: test_x86_mask_blend_b_512
-; CHECK: vpblendmb
-define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) {
- %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1]
- ret <64 x i8> %res
-}
declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly
-; CHECK-LABEL: test_x86_mask_blend_w_512
define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) {
- ; CHECK: vpblendmw
- %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1]
+; AVX512BW-LABEL: test_x86_mask_blend_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpblendmw %zmm1, %zmm0, %zmm0 {%k1}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_x86_mask_blend_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpblendmw %zmm1, %zmm0, %zmm0 {%k1}
+; AVX512F-32-NEXT: retl
+ %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1]
ret <32 x i16> %res
}
declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly
-; CHECK-LABEL: test_x86_mask_blend_b_128
-; CHECK: vpblendmb
-define <16 x i8> @test_x86_mask_blend_b_128(i16 %a0, <16 x i8> %a1, <16 x i8> %a2) {
- %res = call <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8> %a1, <16 x i8> %a2, i16 %a0) ; <<16 x i8>> [#uses=1]
- ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8>, <16 x i8>, i16) nounwind readonly
-
-; CHECK-LABEL: test_x86_mask_blend_w_128
-define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %a2) {
- ; CHECK: vpblendmw
- %res = call <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16> %a1, <8 x i16> %a2, i8 %mask) ; <<8 x i16>> [#uses=1]
- ret <8 x i16> %res
+define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) {
+; AVX512BW-LABEL: test_x86_mask_blend_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_x86_mask_blend_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1}
+; AVX512F-32-NEXT: retl
+ %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1]
+ ret <64 x i8> %res
}
-declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly
define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
- ;CHECK-LABEL: test_mask_packs_epi32_rr_512
- ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
+; AVX512BW-LABEL: test_mask_packs_epi32_rr_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rr_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_packs_epi32_rrk_512
- ;CHECK: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
+; AVX512BW-LABEL: test_mask_packs_epi32_rrk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rrk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
- ;CHECK-LABEL: test_mask_packs_epi32_rrkz_512
- ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
+; AVX512BW-LABEL: test_mask_packs_epi32_rrkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rrkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
- ;CHECK-LABEL: test_mask_packs_epi32_rm_512
- ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
+; AVX512BW-LABEL: test_mask_packs_epi32_rm_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rm_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_packs_epi32_rmk_512
- ;CHECK: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
+; AVX512BW-LABEL: test_mask_packs_epi32_rmk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rmk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
- ;CHECK-LABEL: test_mask_packs_epi32_rmkz_512
- ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
+; AVX512BW-LABEL: test_mask_packs_epi32_rmkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rmkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
- ;CHECK-LABEL: test_mask_packs_epi32_rmb_512
- ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
+; AVX512BW-LABEL: test_mask_packs_epi32_rmb_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rmb_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -408,8 +980,20 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
}
define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_packs_epi32_rmbk_512
- ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
+; AVX512BW-LABEL: test_mask_packs_epi32_rmbk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rmbk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -418,8 +1002,18 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <3
}
define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
- ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_512
- ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
+; AVX512BW-LABEL: test_mask_packs_epi32_rmbkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rmbkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -430,45 +1024,110 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i
declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
- ;CHECK-LABEL: test_mask_packs_epi16_rr_512
- ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0xc1]
+; AVX512BW-LABEL: test_mask_packs_epi16_rr_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi16_rr_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
- ;CHECK-LABEL: test_mask_packs_epi16_rrk_512
- ;CHECK: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0xd1]
+; AVX512BW-LABEL: test_mask_packs_epi16_rrk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
- ;CHECK-LABEL: test_mask_packs_epi16_rrkz_512
- ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0xc1]
+; AVX512BW-LABEL: test_mask_packs_epi16_rrkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
- ;CHECK-LABEL: test_mask_packs_epi16_rm_512
- ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0x07]
+; AVX512BW-LABEL: test_mask_packs_epi16_rm_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi16_rm_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
- ;CHECK-LABEL: test_mask_packs_epi16_rmk_512
- ;CHECK: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0x0f]
+; AVX512BW-LABEL: test_mask_packs_epi16_rmk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rsi, %k1
+; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
- ;CHECK-LABEL: test_mask_packs_epi16_rmkz_512
- ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0x07]
+; AVX512BW-LABEL: test_mask_packs_epi16_rmkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rsi, %k1
+; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
ret <64 x i8> %res
@@ -478,53 +1137,118 @@ declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64
define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
- ;CHECK-LABEL: test_mask_packus_epi32_rr_512
- ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0
+; AVX512BW-LABEL: test_mask_packus_epi32_rr_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rr_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_packus_epi32_rrk_512
- ;CHECK: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-LABEL: test_mask_packus_epi32_rrk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rrk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
- ;CHECK-LABEL: test_mask_packus_epi32_rrkz_512
- ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-LABEL: test_mask_packus_epi32_rrkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rrkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
- ;CHECK-LABEL: test_mask_packus_epi32_rm_512
- ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0
+; AVX512BW-LABEL: test_mask_packus_epi32_rm_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rm_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_packus_epi32_rmk_512
- ;CHECK: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-LABEL: test_mask_packus_epi32_rmk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rmk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
- ;CHECK-LABEL: test_mask_packus_epi32_rmkz_512
- ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-LABEL: test_mask_packus_epi32_rmkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rmkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%b = load <16 x i32>, <16 x i32>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
- ;CHECK-LABEL: test_mask_packus_epi32_rmb_512
- ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0
+; AVX512BW-LABEL: test_mask_packus_epi32_rmb_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rmb_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -533,8 +1257,20 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
}
define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_packus_epi32_rmbk_512
- ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
+; AVX512BW-LABEL: test_mask_packus_epi32_rmbk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rmbk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -543,8 +1279,18 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <
}
define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
- ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_512
- ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-LABEL: test_mask_packus_epi32_rmbkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rmbkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
%b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -555,45 +1301,110 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b,
declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
- ;CHECK-LABEL: test_mask_packus_epi16_rr_512
- ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0
+; AVX512BW-LABEL: test_mask_packus_epi16_rr_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi16_rr_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
- ;CHECK-LABEL: test_mask_packus_epi16_rrk_512
- ;CHECK: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-LABEL: test_mask_packus_epi16_rrk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
- ;CHECK-LABEL: test_mask_packus_epi16_rrkz_512
- ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-LABEL: test_mask_packus_epi16_rrkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
- ;CHECK-LABEL: test_mask_packus_epi16_rm_512
- ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0
+; AVX512BW-LABEL: test_mask_packus_epi16_rm_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi16_rm_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
- ;CHECK-LABEL: test_mask_packus_epi16_rmk_512
- ;CHECK: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-LABEL: test_mask_packus_epi16_rmk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rsi, %k1
+; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
ret <64 x i8> %res
}
define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
- ;CHECK-LABEL: test_mask_packus_epi16_rmkz_512
- ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-LABEL: test_mask_packus_epi16_rmkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rsi, %k1
+; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
ret <64 x i8> %res
@@ -602,45 +1413,102 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %pt
declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
- ;CHECK-LABEL: test_mask_adds_epi16_rr_512
- ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0
+; AVX512BW-LABEL: test_mask_adds_epi16_rr_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_adds_epi16_rrk_512
- ;CHECK: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
- ;CHECK-LABEL: test_mask_adds_epi16_rrkz_512
- ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
- ;CHECK-LABEL: test_mask_adds_epi16_rm_512
- ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0
+; AVX512BW-LABEL: test_mask_adds_epi16_rm_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_adds_epi16_rmk_512
- ;CHECK: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
- ;CHECK-LABEL: test_mask_adds_epi16_rmkz_512
- ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
@@ -649,45 +1517,102 @@ define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr
declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
- ;CHECK-LABEL: test_mask_subs_epi16_rr_512
- ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0
+; AVX512BW-LABEL: test_mask_subs_epi16_rr_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_subs_epi16_rrk_512
- ;CHECK: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
- ;CHECK-LABEL: test_mask_subs_epi16_rrkz_512
- ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
- ;CHECK-LABEL: test_mask_subs_epi16_rm_512
- ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0
+; AVX512BW-LABEL: test_mask_subs_epi16_rm_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_subs_epi16_rmk_512
- ;CHECK: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
- ;CHECK-LABEL: test_mask_subs_epi16_rmkz_512
- ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
@@ -696,45 +1621,102 @@ define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr
declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
- ;CHECK-LABEL: test_mask_adds_epu16_rr_512
- ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0
+; AVX512BW-LABEL: test_mask_adds_epu16_rr_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_adds_epu16_rrk_512
- ;CHECK: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
- ;CHECK-LABEL: test_mask_adds_epu16_rrkz_512
- ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
- ;CHECK-LABEL: test_mask_adds_epu16_rm_512
- ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0
+; AVX512BW-LABEL: test_mask_adds_epu16_rm_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_adds_epu16_rmk_512
- ;CHECK: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
- ;CHECK-LABEL: test_mask_adds_epu16_rmkz_512
- ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
@@ -743,45 +1725,102 @@ define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr
declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
- ;CHECK-LABEL: test_mask_subs_epu16_rr_512
- ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0
+; AVX512BW-LABEL: test_mask_subs_epu16_rr_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_subs_epu16_rrk_512
- ;CHECK: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
- ;CHECK-LABEL: test_mask_subs_epu16_rrkz_512
- ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
- ;CHECK-LABEL: test_mask_subs_epu16_rm_512
- ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0
+; AVX512BW-LABEL: test_mask_subs_epu16_rm_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
- ;CHECK-LABEL: test_mask_subs_epu16_rmk_512
- ;CHECK: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
ret <32 x i16> %res
}
define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
- ;CHECK-LABEL: test_mask_subs_epu16_rmkz_512
- ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: retl
%b = load <32 x i16>, <32 x i16>* %ptr_b
%res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
ret <32 x i16> %res
@@ -791,11 +1830,24 @@ declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <3
declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_512
-; CHECK-NOT: call
-; CHECK: vpmaxsb %zmm
-; CHECK: {%k1}
define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res2 = add <64 x i8> %res, %res1
@@ -804,11 +1856,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %
declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_512
-; CHECK-NOT: call
-; CHECK: vpmaxsw %zmm
-; CHECK: {%k1}
define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = add <32 x i16> %res, %res1
@@ -817,11 +1880,24 @@ define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16
declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_512
-; CHECK-NOT: call
-; CHECK: vpmaxub %zmm
-; CHECK: {%k1}
define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res2 = add <64 x i8> %res, %res1
@@ -830,11 +1906,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %
declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_512
-; CHECK-NOT: call
-; CHECK: vpmaxuw %zmm
-; CHECK: {%k1}
define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = add <32 x i16> %res, %res1
@@ -843,11 +1930,24 @@ define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16
declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_512
-; CHECK-NOT: call
-; CHECK: vpminsb %zmm
-; CHECK: {%k1}
define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res2 = add <64 x i8> %res, %res1
@@ -856,11 +1956,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %
declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_512
-; CHECK-NOT: call
-; CHECK: vpminsw %zmm
-; CHECK: {%k1}
define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = add <32 x i16> %res, %res1
@@ -869,11 +1980,24 @@ define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16
declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_512
-; CHECK-NOT: call
-; CHECK: vpminub %zmm
-; CHECK: {%k1}
define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res2 = add <64 x i8> %res, %res1
@@ -882,11 +2006,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %
declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_512
-; CHECK-NOT: call
-; CHECK: vpminuw %zmm
-; CHECK: {%k1}
define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = add <32 x i16> %res, %res1
@@ -895,11 +2030,24 @@ define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16
declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermt2w %zmm{{.*}}{%k1}
define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
+; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
+; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
+; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
+; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
+; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = add <32 x i16> %res, %res1
@@ -908,11 +2056,24 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32
declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermt2w %zmm{{.*}}{%k1} {z}
define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
+; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
+; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
+; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
+; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = add <32 x i16> %res, %res1
@@ -921,11 +2082,24 @@ define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <3
declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermi2w %zmm{{.*}}{%k1}
define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
+; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
+; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3
+; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
+; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
+; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = add <32 x i16> %res, %res1
@@ -934,11 +2108,24 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32
declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_512
-; CHECK-NOT: call
-; CHECK: vpavgb %zmm
-; CHECK: {%k1}
define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res2 = add <64 x i8> %res, %res1
@@ -947,11 +2134,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x
declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_512
-; CHECK-NOT: call
-; CHECK: vpavgw %zmm
-; CHECK: {%k1}
define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = add <32 x i16> %res, %res1
@@ -960,11 +2158,24 @@ define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16>
declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpshufb %zmm{{.*}}{%k1}
define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
%res2 = add <64 x i8> %res, %res1
@@ -973,11 +2184,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %
declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpabsw{{.*}}{%k1}
define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpabsw %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vpabsw %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpabsw %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT: vpabsw %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
%res2 = add <32 x i16> %res, %res1
@@ -986,11 +2208,24 @@ define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16>
declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpabsb{{.*}}{%k1}
define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpabsb %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vpabsb %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpabsb %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT: vpabsb %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
%res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
%res2 = add <64 x i8> %res, %res1
@@ -999,12 +2234,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x
declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: {%k1}
-; CHECK: vpmulhuw {{.*}}encoding: [0x62
define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = add <32 x i16> %res, %res1
@@ -1013,12 +2258,22 @@ define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i1
declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: {%k1}
-; CHECK: vpmulhw {{.*}}encoding: [0x62
define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = add <32 x i16> %res, %res1
@@ -1027,14 +2282,627 @@ define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16
declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_512
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: {%k1}
-; CHECK: vpmulhrsw {{.*}}encoding: [0x62
define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
%res2 = add <32 x i16> %res, %res1
ret <32 x i16> %res2
}
+
+declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm1 {%k1}
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm1 {%k1}
+; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
+; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512F-32-NEXT: retl
+ %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
+ %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
+ %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
+ %res3 = add <32 x i8> %res0, %res1
+ %res4 = add <32 x i8> %res3, %res2
+ ret <32 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
+
+define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi)
+; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax)
+; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) {%k1}
+; AVX512F-32-NEXT: retl
+ call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
+ call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
+ ret void
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpmovswb %zmm0, %ymm1 {%k1}
+; AVX512BW-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
+; AVX512BW-NEXT: vpmovswb %zmm0, %ymm0
+; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm1 {%k1}
+; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
+; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm0
+; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512F-32-NEXT: retl
+ %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
+ %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
+ %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
+ %res3 = add <32 x i8> %res0, %res1
+ %res4 = add <32 x i8> %res3, %res2
+ ret <32 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
+
+define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi)
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx)
+; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) {%k1}
+; AVX512F-32-NEXT: retl
+ call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
+ call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
+ ret void
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm1 {%k1}
+; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
+; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0
+; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm1 {%k1}
+; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
+; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm0
+; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX512F-32-NEXT: retl
+ %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
+ %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
+ %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
+ %res3 = add <32 x i8> %res0, %res1
+ %res4 = add <32 x i8> %res3, %res2
+ ret <32 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
+
+define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi)
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx)
+; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) {%k1}
+; AVX512F-32-NEXT: retl
+ call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
+ call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
+ ret void
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
+ %res2 = add <32 x i16> %res, %res1
+ ret <32 x i16> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovw %edi, %k1
+; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63]
+; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63]
+; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
+ %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
+ %res2 = add <64 x i8> %res, %res1
+ ret <64 x i8> %res2
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55]
+; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55]
+; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
+ %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
+ %res2 = add <64 x i8> %res, %res1
+ ret <64 x i8> %res2
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31]
+; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31]
+; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
+ %res2 = add <32 x i16> %res, %res1
+ ret <32 x i16> %res2
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27]
+; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27]
+; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
+ %res2 = add <32 x i16> %res, %res1
+ ret <32 x i16> %res2
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_palignr_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm1
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_palignr_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT: vpaddb %zmm3, %zmm0, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
+ %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
+ %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
+ %res3 = add <64 x i8> %res, %res1
+ %res4 = add <64 x i8> %res3, %res2
+ ret <64 x i8> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1
+; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res3, %res2
+ ret <32 x i16> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_dq_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpslldq $8, %zmm0, %zmm1
+; AVX512BW-NEXT: vpslldq $4, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_dq_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpslldq $8, %zmm0, %zmm1
+; AVX512F-32-NEXT: vpslldq $4, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
+ %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_dq_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsrldq $8, %zmm0, %zmm1
+; AVX512BW-NEXT: vpsrldq $4, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_dq_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpsrldq $8, %zmm0, %zmm1
+; AVX512F-32-NEXT: vpsrldq $4, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
+ %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
+
+define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psadb_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
+; AVX512BW-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
+; AVX512F-32-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
+ %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
+
+define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
+; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k0
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: kunpckwd %k1, %k0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckwd %k1, %k0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: retl
+ %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
+ ret i32 %res
+}
+
+declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
+
+define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
+; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k0
+; AVX512BW-NEXT: kmovq %rsi, %k1
+; AVX512BW-NEXT: kunpckdq %k1, %k0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: subl $12, %esp
+; AVX512F-32-NEXT: .Ltmp8:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0
+; AVX512F-32-NEXT: kmovq %k0, (%esp)
+; AVX512F-32-NEXT: movl (%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $12, %esp
+; AVX512F-32-NEXT: retl
+ %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
+ ret i64 %res
+}
+
+declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
+
+define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: subl $12, %esp
+; AVX512F-32-NEXT: .Ltmp9:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
+; AVX512F-32-NEXT: kmovq %k0, (%esp)
+; AVX512F-32-NEXT: movl (%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $12, %esp
+; AVX512F-32-NEXT: retl
+ %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0)
+ ret i64 %res
+}
+
+declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>)
+
+define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpmovw2m %zmm0, %k0
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpmovw2m %zmm0, %k0
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: retl
+ %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0)
+ ret i32 %res
+}
+
+declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64)
+
+define <64 x i8>@test_int_x86_avx512_cvtmask2b_512(i64 %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovq %rdi, %k0
+; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0
+; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0)
+ ret <64 x i8> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32)
+
+define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k0
+; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: vpmovm2w %k0, %zmm0
+; AVX512F-32-NEXT: retl
+ %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res3, %res2
+ ret <32 x i16> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i8, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm2 {%k1} {z}
+; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res3, %res2
+ ret <32 x i16> %res4
+}
diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index f5413896789a..1db6756c23a8 100644
--- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -3763,6 +3763,38 @@ define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16>
ret <16 x i16> %res2
}
+; CHECK-LABEL: test_x86_mask_blend_b_256
+; CHECK: vpblendmb
+define <32 x i8> @test_x86_mask_blend_b_256(i32 %a0, <32 x i8> %a1, <32 x i8> %a2) {
+ %res = call <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8> %a1, <32 x i8> %a2, i32 %a0) ; <<32 x i8>> [#uses=1]
+ ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8>, <32 x i8>, i32) nounwind readonly
+
+; CHECK-LABEL: test_x86_mask_blend_w_256
+define <16 x i16> @test_x86_mask_blend_w_256(i16 %mask, <16 x i16> %a1, <16 x i16> %a2) {
+ ; CHECK: vpblendmw
+ %res = call <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16> %a1, <16 x i16> %a2, i16 %mask) ; <<16 x i16>> [#uses=1]
+ ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16>, <16 x i16>, i16) nounwind readonly
+
+; CHECK-LABEL: test_x86_mask_blend_b_128
+; CHECK: vpblendmb
+define <16 x i8> @test_x86_mask_blend_b_128(i16 %a0, <16 x i8> %a1, <16 x i8> %a2) {
+ %res = call <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8> %a1, <16 x i8> %a2, i16 %a0) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8>, <16 x i8>, i16) nounwind readonly
+
+; CHECK-LABEL: test_x86_mask_blend_w_128
+define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %a2) {
+ ; CHECK: vpblendmw
+ %res = call <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16> %a1, <8 x i16> %a2, i8 %mask) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly
+
declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_128
@@ -3843,3 +3875,719 @@ define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i
%res2 = add <16 x i16> %res, %res1
ret <16 x i16> %res2
}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
+; CHECK: vpmovwb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovwb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128:
+; CHECK: vpmovwb %xmm0, (%rdi)
+; CHECK: vpmovwb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
+; CHECK: vpmovswb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovswb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
+; CHECK: vpmovswb %xmm0, (%rdi)
+; CHECK: vpmovswb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
+; CHECK: vpmovuswb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovuswb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
+; CHECK: vpmovuswb %xmm0, (%rdi)
+; CHECK: vpmovuswb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
+; CHECK: vpmovwb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovwb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovwb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16)
+
+define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256:
+; CHECK: vpmovwb %ymm0, (%rdi)
+; CHECK: vpmovwb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_256:
+; CHECK: vpmovswb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovswb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovswb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16)
+
+define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
+; CHECK: vpmovswb %ymm0, (%rdi)
+; CHECK: vpmovswb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_256:
+; CHECK: vpmovuswb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovuswb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovuswb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16)
+
+define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
+; CHECK: vpmovuswb %ymm0, (%rdi)
+; CHECK: vpmovuswb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1)
+ %res2 = add <8 x i16> %res, %res1
+ ret <8 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1)
+ %res2 = add <16 x i16> %res, %res1
+ ret <16 x i16> %res2
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_128:
+; CHECK: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[8],k1[8],xmm2[9],k1[9],xmm2[10],k1[10],xmm2[11],k1[11],xmm2[12],k1[12],xmm2[13],k1[13],xmm2[14],k1[14],xmm2[15],k1[15]
+; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x68,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+ %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
+ %res2 = add <16 x i8> %res, %res1
+ ret <16 x i8> %res2
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_128:
+; CHECK: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1],xmm2[2],k1[2],xmm2[3],k1[3],xmm2[4],k1[4],xmm2[5],k1[5],xmm2[6],k1[6],xmm2[7],k1[7]
+; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x60,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+ %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
+ %res2 = add <16 x i8> %res, %res1
+ ret <16 x i8> %res2
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_256:
+; CHECK: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[8],k1[8],ymm2[9],k1[9],ymm2[10],k1[10],ymm2[11],k1[11],ymm2[12],k1[12],ymm2[13],k1[13],ymm2[14],k1[14],ymm2[15],k1[15],ymm2[24],k1[24],ymm2[25],k1[25],ymm2[26],k1[26],ymm2[27],k1[27],ymm2[28],k1[28],ymm2[29],k1[29],ymm2[30],k1[30],ymm2[31],k1[31]
+; CHECK-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x68,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+ %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
+ %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
+ %res2 = add <32 x i8> %res, %res1
+ ret <32 x i8> %res2
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_256:
+; CHECK: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[2],k1[2],ymm2[3],k1[3],ymm2[4],k1[4],ymm2[5],k1[5],ymm2[6],k1[6],ymm2[7],k1[7],ymm2[16],k1[16],ymm2[17],k1[17],ymm2[18],k1[18],ymm2[19],k1[19],ymm2[20],k1[20],ymm2[21],k1[21],ymm2[22],k1[22],ymm2[23],k1[23]
+; CHECK-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x60,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+ %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
+ %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
+ %res2 = add <32 x i8> %res, %res1
+ ret <32 x i8> %res2
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_128:
+; CHECK: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1],xmm2[2],k1[2],xmm2[3],k1[3]
+; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x61,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+ %res2 = add <8 x i16> %res, %res1
+ ret <8 x i16> %res2
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_128:
+; CHECK: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[4],k1[4],xmm2[5],k1[5],xmm2[6],k1[6],xmm2[7],k1[7]
+; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x69,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+ %res2 = add <8 x i16> %res, %res1
+ ret <8 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_256:
+; CHECK: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[2],k1[2],ymm2[3],k1[3],ymm2[8],k1[8],ymm2[9],k1[9],ymm2[10],k1[10],ymm2[11],k1[11]
+; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x61,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
+ %res2 = add <16 x i16> %res, %res1
+ ret <16 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_256:
+; CHECK: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[4],k1[4],ymm2[5],k1[5],ymm2[6],k1[6],ymm2[7],k1[7],ymm2[12],k1[12],ymm2[13],k1[13],ymm2[14],k1[14],ymm2[15],k1[15]
+; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x69,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
+ %res2 = add <16 x i16> %res, %res1
+ ret <16 x i16> %res2
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_palignr_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 -1)
+ %res3 = add <16 x i8> %res, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8>, <32 x i8>, i32, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_palignr_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4)
+ %res1 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4)
+ %res2 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 -1)
+ %res3 = add <32 x i8> %res, %res1
+ %res4 = add <32 x i8> %res3, %res2
+ ret <32 x i8> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> zeroinitializer, i8 %x4)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 -1)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res2, %res3
+ ret <8 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> zeroinitializer, i16 %x4)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 -1)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpbroadcastb %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
+; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1)
+ %res1 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask)
+ %res2 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask)
+ %res3 = add <32 x i8> %res, %res1
+ %res4 = add <32 x i8> %res2, %res3
+ ret <32 x i8> %res4
+}
+
+declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpbroadcastb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask)
+ %res2 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask)
+ %res3 = add <16 x i8> %res, %res1
+ %res4 = add <16 x i8> %res2, %res3
+ ret <16 x i8> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpbroadcastw %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1)
+ %res1 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask)
+ %res2 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res2, %res3
+ ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpbroadcastw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask)
+ %res2 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res2, %res3
+ ret <8 x i16> %res4
+}
+
+declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovq %rdi, %k1 ## encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; CHECK-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
+; CHECK-NEXT: vpbroadcastb %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xd0]
+; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x78,0xc0]
+; CHECK-NEXT: vpaddb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
+; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1)
+ %res1 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask)
+ %res2 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask)
+ %res3 = add <64 x i8> %res, %res1
+ %res4 = add <64 x i8> %res2, %res3
+ ret <64 x i8> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
+; CHECK-NEXT: vpbroadcastw %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xd0]
+; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x79,0xc0]
+; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
+; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1)
+ %res1 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask)
+ %res2 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res2, %res3
+ ret <32 x i16> %res4
+}
+
+declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>)
+
+define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovb2m %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+ %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0)
+ ret i16 %res
+}
+
+declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>)
+
+define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovb2m %ymm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: retq
+ %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0)
+ ret i32 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>)
+
+define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovw2m %xmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+ %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0)
+ ret i8 %res
+}
+
+declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>)
+
+define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovw2m %ymm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+ %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0)
+ ret i16 %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16)
+
+define <16 x i8>@test_int_x86_avx512_cvtmask2b_128(i16 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2b_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k0
+; CHECK-NEXT: vpmovm2b %k0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16 %x0)
+ ret <16 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32)
+
+define <32 x i8>@test_int_x86_avx512_cvtmask2b_256(i32 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2b_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovd %edi, %k0
+; CHECK-NEXT: vpmovm2b %k0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32 %x0)
+ ret <32 x i8> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8)
+
+define <8 x i16>@test_int_x86_avx512_cvtmask2w_128(i8 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2w_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k0
+; CHECK-NEXT: vpmovm2w %k0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8 %x0)
+ ret <8 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16)
+
+define <16 x i16>@test_int_x86_avx512_cvtmask2w_256(i16 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2w_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k0
+; CHECK-NEXT: vpmovm2w %k0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16 %x0)
+ ret <16 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrl_w_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vpaddw %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res2, %res3
+ ret <8 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrl_w_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16>, i8, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i8 %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrl_wi_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 -1)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i8 3, <8 x i16> zeroinitializer, i8 %x3)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res2, %res3
+ ret <8 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16>, i8, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i8 %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrl_wi_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 -1)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i8 3, <16 x i16> zeroinitializer, i16 %x3)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
diff --git a/test/CodeGen/X86/avx512cd-intrinsics.ll b/test/CodeGen/X86/avx512cd-intrinsics.ll
new file mode 100644
index 000000000000..29f17bbc0190
--- /dev/null
+++ b/test/CodeGen/X86/avx512cd-intrinsics.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s
+
+define <16 x i32> @test_x86_vbroadcastmw_512(i16 %a0) {
+ ; CHECK: test_x86_vbroadcastmw_512
+ ; CHECK: vpbroadcastmw2d %k0, %zmm0
+ %res = call <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16 %a0) ;
+ ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16)
+
+define <8 x i64> @test_x86_broadcastmb_512(i8 %a0) {
+ ; CHECK: test_x86_broadcastmb_512
+ ; CHECK: vpbroadcastmb2q %k0, %zmm0
+ %res = call <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8 %a0) ;
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8)
+
diff --git a/test/CodeGen/X86/avx512cdvl-intrinsics.ll b/test/CodeGen/X86/avx512cdvl-intrinsics.ll
new file mode 100644
index 000000000000..14e91e1a8768
--- /dev/null
+++ b/test/CodeGen/X86/avx512cdvl-intrinsics.ll
@@ -0,0 +1,179 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s
+
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readonly
+
+declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vplzcntd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vplzcntd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vplzcntd %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
+ %res3 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res2 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res2, %res3
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vplzcntd %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vplzcntd %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vplzcntq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vplzcntq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vplzcntq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vplzcntq %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpconflictd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpconflictd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpconflictd %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
+ %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res2 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res2, %res3
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpconflictd %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vpconflictd %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpconflictq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpconflictq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpconflictq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vpconflictq %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) {
+ ; CHECK: test_x86_vbroadcastmw_256
+ ; CHECK: vpbroadcastmw2d %k0, %ymm0
+ %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ;
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16)
+
+define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) {
+ ; CHECK: test_x86_vbroadcastmw_128
+ ; CHECK: vpbroadcastmw2d %k0, %xmm0
+ %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16)
+
+define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) {
+ ; CHECK: test_x86_broadcastmb_256
+ ; CHECK: vpbroadcastmb2q %k0, %ymm0
+ %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ;
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8)
+
+define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) {
+ ; CHECK: test_x86_broadcastmb_128
+ ; CHECK: vpbroadcastmb2q %k0, %xmm0
+ %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8)
diff --git a/test/CodeGen/X86/avx512dq-intrinsics.ll b/test/CodeGen/X86/avx512dq-intrinsics.ll
new file mode 100644
index 000000000000..a59fe393f556
--- /dev/null
+++ b/test/CodeGen/X86/avx512dq-intrinsics.ll
@@ -0,0 +1,667 @@
+
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtpd2qq {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtpd2uqq {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtps2qq {rn-sae}, %ymm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtps2uqq {rn-sae}, %ymm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
+
+define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
+ %res2 = add <8 x i64> %res, %res1
+ ret <8 x i64> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
+
+define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vreducepd {{.*}}{%k1}
+; CHECK: vreducepd
+; CHECK: {sae}
+define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
+ %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vreduceps
+; CHECK: {sae}
+; CKECK: {%k1}
+; CHECK: vreduceps
+define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
+ %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrangepd
+; CKECK: {%k1}
+; CHECK: vrangepd
+; CHECK: {sae}
+define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
+ %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrangeps
+; CKECK: {%k1}
+; CHECK: vrangeps
+; CHECK: {sae}
+define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
+ %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ss
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vreducess
+; CKECK: {%k1}
+; CHECK: vreducess
+; CHECK: {sae}
+define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+ %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_ss
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrangess
+; CHECK: {sae}
+; CKECK: {%k1}
+; CHECK: vrangess
+; CHECK: {sae}
+define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+ %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_sd
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vreducesd
+; CKECK: {%k1}
+; CHECK: vreducesd
+; CHECK: {sae}
+define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+ %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_sd
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrangesd
+; CKECK: {%k1}
+; CHECK: vrangesd
+; CHECK: {sae}
+define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+ %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+
+declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
+ %res3 = fadd <2 x double> %res, %res1
+ %res4 = fadd <2 x double> %res2, %res3
+ ret <2 x double> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 -1)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res2, %res3
+ ret <8 x float> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res3, %res2
+ ret <8 x double> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res3, %res2
+ ret <16 x i32> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res2, %res3
+ ret <8 x i64> %res4
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_pd_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vfpclasspd
+; CHECK: {%k1}
+; CHECK: vfpclasspd
+; CHECK: kmovb %k0
+define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) {
+ %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1)
+ %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ps_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vfpclassps
+; CHECK: vfpclassps
+; CHECK: {%k1}
+; CHECK: kmov
+define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) {
+ %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1)
+ %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1)
+ %res2 = add i16 %res, %res1
+ ret i16 %res2
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_sd
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vfpclasssd
+; CHECK: %k0 {%k1}
+; CHECK: vfpclasssd
+; CHECK: %k0
+define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) {
+ %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1)
+ %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ss
+; CHECK-NOT: call
+; CHECK: kmovw
+; CHECK: vfpclassss
+; CHECK: %k0
+; CHECK: {%k1}
+; CHECK: kmovw
+; CHECK: vfpclassss
+; CHECK: %k0
+define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
+ %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1)
+ %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm0
+; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 %x3)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res3, %res2
+ ret <16 x float> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res3, %res2
+ ret <16 x i32> %res4
+}
+
+declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>)
+
+define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovd2m %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: retq
+ %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0)
+ ret i16 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>)
+
+define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovq2m %zmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+ %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0)
+ ret i8 %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16)
+
+define <16 x i32>@test_int_x86_avx512_cvtmask2d_512(i16 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k0
+; CHECK-NEXT: vpmovm2d %k0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16 %x0)
+ ret <16 x i32> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8)
+
+define <8 x i64>@test_int_x86_avx512_cvtmask2q_512(i8 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k0
+; CHECK-NEXT: vpmovm2q %k0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8 %x0)
+ ret <8 x i64> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512(<8 x float> %x0, <16 x float> %x2, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512:
+; CHECK: kmovw %edi, %k1
+; CHECK: vshuff32x4 $68, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshuff32x4 $68, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshuff32x4 $68, %zmm0, %zmm0, %zmm0
+; CHECK: vaddps %zmm1, %zmm0, %zmm0
+; CHECK: vaddps %zmm0, %zmm2, %zmm0
+
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 -1)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 %mask)
+ %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
+ %res4 = fadd <16 x float> %res1, %res2
+ %res5 = fadd <16 x float> %res3, %res4
+ ret <16 x float> %res5
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0, <8 x double> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512:
+; CHECK: kmovb %edi, %k1
+; CHECK: vshuff64x2 $0, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshuff64x2 $0, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshuff64x2 $0, %zmm0, %zmm0, %zmm0
+; CHECK: vaddpd %zmm1, %zmm0, %zmm0
+; CHECK: vaddpd %zmm0, %zmm2, %zmm0
+
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 -1)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 %mask)
+ %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
+ %res4 = fadd <8 x double> %res1, %res2
+ %res5 = fadd <8 x double> %res3, %res4
+ ret <8 x double> %res5
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512:
+; CHECK: kmovw %edi, %k1
+; CHECK: vshufi32x4 $68, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshufi32x4 $68, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshufi32x4 $68, %zmm0, %zmm0, %zmm0
+; CHECK: vpaddd %zmm1, %zmm0, %zmm0
+; CHECK: vpaddd %zmm0, %zmm2, %zmm0
+
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 -1)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask)
+ %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
+ %res4 = add <16 x i32> %res1, %res2
+ %res5 = add <16 x i32> %res3, %res4
+ ret <16 x i32> %res5
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512:
+; CHECK: kmovb %edi, %k1
+; CHECK: vshufi64x2 $0, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshufi64x2 $0, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshufi64x2 $0, %zmm0, %zmm0, %zmm0
+; CHECK: vpaddq %zmm1, %zmm0, %zmm0
+; CHECK: vpaddq %zmm0, %zmm2, %zmm0
+
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 -1)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask)
+ %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
+ %res4 = add <8 x i64> %res1, %res2
+ %res5 = add <8 x i64> %res3, %res4
+ ret <8 x i64> %res5
+}
diff --git a/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/test/CodeGen/X86/avx512dqvl-intrinsics.ll
index c577abee6640..2065322009da 100644
--- a/test/CodeGen/X86/avx512dqvl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512dqvl-intrinsics.ll
@@ -1134,7 +1134,7 @@ define <16 x float> @test_mask_xor_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
define <16 x float> @test_mask_xor_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
;CHECK-LABEL: test_mask_xor_ps_rmbk_512
- ;CHECK: vxorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x57,0x0f]
+ ;CHECK: vxorps (%rdi){1to16}, %zmm0, %zmm1 {%k1}
%q = load float, float* %ptr_b
%vecinit.i = insertelement <16 x float> undef, float %q, i32 0
%b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
@@ -1144,7 +1144,7 @@ define <16 x float> @test_mask_xor_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <
define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
;CHECK-LABEL: test_mask_xor_ps_rmbkz_512
- ;CHECK: vxorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x07]
+ ;CHECK: vxorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
%q = load float, float* %ptr_b
%vecinit.i = insertelement <16 x float> undef, float %q, i32 0
%b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
@@ -1152,4 +1152,816 @@ define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b,
ret <16 x float> %res
}
-declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) \ No newline at end of file
+declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtpd2qq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtpd2qq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtpd2qq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtpd2qq %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtpd2uqq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtpd2uqq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtpd2uqq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtpd2uqq %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtps2qq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtps2qq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtps2qq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtps2qq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtps2uqq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtps2uqq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtps2uqq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtps2uqq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvttps2qq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvttps2qq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtuqq2ps %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtuqq2ps %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvttps2uqq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double>, i32, <2 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vreducepd {{.*}}{%k1}
+; CHECK: vreducepd
+define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
+ %res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double>, i32, <4 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vreducepd {{.*}}{%k1}
+; CHECK: vreducepd
+define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
+ %res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float>, i32, <4 x float>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vreduceps {{.*}}{%k1}
+; CHECK: vreduceps
+define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
+ %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float>, i32, <8 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vreduceps {{.*}}{%k1}
+; CHECK: vreduceps
+define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
+ %res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrangepd {{.*}}{%k1}
+; CHECK: vrangepd
+define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+ %res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrangepd {{.*}}{%k1}
+; CHECK: vrangepd
+define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
+ %res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrangeps {{.*}}{%k1}
+; CHECK: vrangeps
+define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+ %res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrangeps {{.*}}{%k1}
+; CHECK: vrangeps
+define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
+ %res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double>, i32, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
+ %res3 = fadd <2 x double> %res, %res1
+ %res4 = fadd <2 x double> %res3, %res2
+ ret <2 x double> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1)
+ %res2 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4)
+ %res3 = fadd <4 x double> %res, %res1
+ %res4 = fadd <4 x double> %res2, %res3
+ ret <4 x double> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ps_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vfpclassps
+; CHECK: {%k1}
+; CHECK: vfpclassps
+; CHECK: kmovb %k0
+define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0, i8 %x1) {
+ %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 %x1)
+ %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 -1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ps_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vfpclassps
+; CHECK: {%k1}
+; CHECK: vfpclassps
+; CHECK: kmovb %k0
+define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0, i8 %x1) {
+ %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 %x1)
+ %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 -1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_pd_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vfpclasspd
+; CHECK: {%k1}
+; CHECK: vfpclasspd
+; CHECK: kmovb %k0
+define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0, i8 %x1) {
+ %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 %x1)
+ %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 -1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_pd_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vfpclasspd
+; CHECK: {%k1}
+; CHECK: vfpclasspd
+; CHECK: kmovb %k0
+define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0, i8 %x1) {
+ %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 %x1)
+ %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 -1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_broadcastf32x2_256(<4 x float> %x0, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vbroadcastf32x2 %xmm0, %ymm0
+; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %x3)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res3, %res2
+ ret <8 x float> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 -1)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>)
+
+define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovd2m %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+ %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
+ ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>)
+
+define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovd2m %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+ %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0)
+ ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>)
+
+define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovq2m %xmm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+ %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
+ ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>)
+
+define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovq2m %ymm0, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: retq
+ %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0)
+ ret i8 %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8)
+
+define <4 x i32>@test_int_x86_avx512_cvtmask2d_128(i8 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k0
+; CHECK-NEXT: vpmovm2d %k0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8 %x0)
+ ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8)
+
+define <8 x i32>@test_int_x86_avx512_cvtmask2d_256(i8 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k0
+; CHECK-NEXT: vpmovm2d %k0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8 %x0)
+ ret <8 x i32> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8)
+
+define <2 x i64>@test_int_x86_avx512_cvtmask2q_128(i8 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k0
+; CHECK-NEXT: vpmovm2q %k0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8 %x0)
+ ret <2 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8)
+
+define <4 x i64>@test_int_x86_avx512_cvtmask2q_256(i8 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k0
+; CHECK-NEXT: vpmovm2q %k0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8 %x0)
+ ret <4 x i64> %res
+}
+declare <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256(<2 x double> %x0, <4 x double> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256:
+; CHECK: kmovb %edi, %k1
+; CHECK: vshuff64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z}
+; CHECK: vshuff64x2 $0, %ymm0, %ymm0, %ymm1 {%k1}
+; CHECK: vshuff64x2 $0, %ymm0, %ymm0, %ymm0
+; CHECK: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK: vaddpd %ymm0, %ymm2, %ymm0
+
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 -1)
+ %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask)
+ %res3 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> zeroinitializer, i8 %mask)
+ %res4 = fadd <4 x double> %res1, %res2
+ %res5 = fadd <4 x double> %res3, %res4
+ ret <4 x double> %res5
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256:
+; CHECK: kmovb %edi, %k1
+; CHECK: vshufi64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z}
+; CHECK: vshufi64x2 $0, %ymm0, %ymm0, %ymm1 {%k1}
+; CHECK: vshufi64x2 $0, %ymm0, %ymm0, %ymm0
+; CHECK: vpaddq %ymm1, %ymm0, %ymm0
+; CHECK: vpaddq %ymm0, %ymm2, %ymm0
+
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 -1)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask)
+ %res3 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> zeroinitializer, i8 %mask)
+ %res4 = add <4 x i64> %res1, %res2
+ %res5 = add <4 x i64> %res3, %res4
+ ret <4 x i64> %res5
+}
diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll
index fb7c93dc53b3..d9e8728c5ca6 100644
--- a/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -1867,7 +1867,7 @@ define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i
define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
;CHECK-LABEL: test_mask_xor_epi32_rmbkz_128
- ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
+ ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
%q = load i32, i32* %ptr_b
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
%b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -2299,7 +2299,7 @@ define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1
define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_add_ps_256
- ;CHECK: vaddps %ymm1, %ymm0, %ymm2 {%k1}
+ ;CHECK: vaddps %ymm1, %ymm0, %ymm2 {%k1}
%res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
ret <8 x float> %res
}
@@ -2321,7 +2321,7 @@ define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1
define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_add_ps_128
- ;CHECK: vaddps %xmm1, %xmm0, %xmm2 {%k1}
+ ;CHECK: vaddps %xmm1, %xmm0, %xmm2 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
ret <4 x float> %res
}
@@ -2343,7 +2343,7 @@ define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1
define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_sub_ps_256
- ;CHECK: vsubps %ymm1, %ymm0, %ymm2 {%k1}
+ ;CHECK: vsubps %ymm1, %ymm0, %ymm2 {%k1}
%res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
ret <8 x float> %res
}
@@ -2365,7 +2365,7 @@ define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1
define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_sub_ps_128
- ;CHECK: vsubps %xmm1, %xmm0, %xmm2 {%k1}
+ ;CHECK: vsubps %xmm1, %xmm0, %xmm2 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
ret <4 x float> %res
}
@@ -2387,7 +2387,7 @@ define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1
define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_mul_ps_256
- ;CHECK: vmulps %ymm1, %ymm0, %ymm2 {%k1}
+ ;CHECK: vmulps %ymm1, %ymm0, %ymm2 {%k1}
%res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
ret <8 x float> %res
}
@@ -2409,7 +2409,7 @@ define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1
define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_mul_ps_128
- ;CHECK: vmulps %xmm1, %xmm0, %xmm2 {%k1}
+ ;CHECK: vmulps %xmm1, %xmm0, %xmm2 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
ret <4 x float> %res
}
@@ -2431,7 +2431,7 @@ define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1
define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_div_ps_256
- ;CHECK: vdivps %ymm1, %ymm0, %ymm2 {%k1}
+ ;CHECK: vdivps %ymm1, %ymm0, %ymm2 {%k1}
%res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
ret <8 x float> %res
}
@@ -2453,7 +2453,7 @@ define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1
define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_div_ps_128
- ;CHECK: vdivps %xmm1, %xmm0, %xmm2 {%k1}
+ ;CHECK: vdivps %xmm1, %xmm0, %xmm2 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
ret <4 x float> %res
}
@@ -2475,7 +2475,7 @@ define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1
define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_max_ps_256
- ;CHECK: vmaxps %ymm1, %ymm0, %ymm2 {%k1}
+ ;CHECK: vmaxps %ymm1, %ymm0, %ymm2 {%k1}
%res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
ret <8 x float> %res
}
@@ -2497,7 +2497,7 @@ define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1
define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_max_ps_128
- ;CHECK: vmaxps %xmm1, %xmm0, %xmm2 {%k1}
+ ;CHECK: vmaxps %xmm1, %xmm0, %xmm2 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
ret <4 x float> %res
}
@@ -2519,7 +2519,7 @@ define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1
define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_min_ps_256
- ;CHECK: vminps %ymm1, %ymm0, %ymm2 {%k1}
+ ;CHECK: vminps %ymm1, %ymm0, %ymm2 {%k1}
%res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
ret <8 x float> %res
}
@@ -2541,7 +2541,7 @@ define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1
define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
;CHECK-LABEL: test_mm512_mask_min_ps_128
- ;CHECK: vminps %xmm1, %xmm0, %xmm2 {%k1}
+ ;CHECK: vminps %xmm1, %xmm0, %xmm2 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
ret <4 x float> %res
}
@@ -2591,9 +2591,9 @@ declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>
declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_128
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpmaxsd %xmm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
%res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
@@ -2604,9 +2604,9 @@ define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %
declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_256
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpmaxsd %ymm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
%res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
@@ -2617,9 +2617,9 @@ define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %
declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_128
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpmaxsq %xmm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
%res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
@@ -2630,9 +2630,9 @@ define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %
declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_256
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpmaxsq %ymm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
%res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
@@ -2643,9 +2643,9 @@ define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %
declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_128
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpmaxud %xmm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) {
%res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
@@ -2656,9 +2656,9 @@ define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %
declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_256
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpmaxud %ymm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
%res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
@@ -2669,9 +2669,9 @@ define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %
declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_128
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpmaxuq %xmm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
%res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
@@ -2682,9 +2682,9 @@ define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %
declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_256
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpmaxuq %ymm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
%res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
@@ -2695,9 +2695,9 @@ define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %
declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_128
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpminsd %xmm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
%res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
@@ -2708,9 +2708,9 @@ define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %
declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_256
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpminsd %ymm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
%res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
@@ -2721,9 +2721,9 @@ define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %
declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_128
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpminsq %xmm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
%res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
@@ -2734,9 +2734,9 @@ define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %
declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_256
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpminsq %ymm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
%res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
@@ -2747,9 +2747,9 @@ define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %
declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_128
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpminud %xmm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
%res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
@@ -2760,9 +2760,9 @@ define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %
declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_256
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpminud %ymm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
%res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
@@ -2773,9 +2773,9 @@ define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %
declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_128
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpminuq %xmm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
%res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
@@ -2786,9 +2786,9 @@ define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %
declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_256
-; CHECK-NOT: call
+; CHECK-NOT: call
; CHECK: vpminuq %ymm
-; CHECK: {%k1}
+; CHECK: {%k1}
define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
%res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
@@ -2799,8 +2799,8 @@ define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %
declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_128
-; CHECK-NOT: call
-; CHECK: kmov
+; CHECK-NOT: call
+; CHECK: kmov
; CHECK: vpermt2d %xmm{{.*}}{%k1}
; CHECK-NOT: {z}
define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
@@ -2813,8 +2813,8 @@ define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i
declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_128
-; CHECK-NOT: call
-; CHECK: kmov
+; CHECK-NOT: call
+; CHECK: kmov
; CHECK: vpermt2d %xmm{{.*}}{%k1} {z}
define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
%res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
@@ -2826,8 +2826,8 @@ define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x
declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_256
-; CHECK-NOT: call
-; CHECK: kmov
+; CHECK-NOT: call
+; CHECK: kmov
; CHECK: vpermt2d %ymm{{.*}}{%k1}
; CHECK-NOT: {z}
define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
@@ -2840,8 +2840,8 @@ define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i
declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_256
-; CHECK-NOT: call
-; CHECK: kmov
+; CHECK-NOT: call
+; CHECK: kmov
; CHECK: vpermt2d {{.*}}{%k1} {z}
define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
%res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
@@ -2853,9 +2853,9 @@ define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x
declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_128
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermi2pd %xmm{{.*}}{%k1}
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2pd %xmm{{.*}}{%k1}
define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
%res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
%res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
@@ -2866,9 +2866,9 @@ define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0,
declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_256
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermi2pd %ymm{{.*}}{%k1}
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2pd %ymm{{.*}}{%k1}
define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
%res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
%res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
@@ -2879,9 +2879,9 @@ define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0,
declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_128
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermi2ps %xmm{{.*}}{%k1}
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2ps %xmm{{.*}}{%k1}
define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
%res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
%res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
@@ -2892,9 +2892,9 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <
declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_256
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpermi2ps %ymm{{.*}}{%k1}
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2ps %ymm{{.*}}{%k1}
define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
%res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
@@ -2905,9 +2905,9 @@ define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <
declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_128
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpabsq{{.*}}{%k1}
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpabsq{{.*}}{%k1}
define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
%res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
%res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
@@ -2918,9 +2918,9 @@ define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x
declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_256
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpabsq{{.*}}{%k1}
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpabsq{{.*}}{%k1}
define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
%res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
%res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
@@ -2931,9 +2931,9 @@ define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x
declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_128
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpabsd{{.*}}{%k1}
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpabsd{{.*}}{%k1}
define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
%res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
%res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
@@ -2944,9 +2944,9 @@ define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x
declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_256
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vpabsd{{.*}}{%k1}
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpabsd{{.*}}{%k1}
define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
%res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
%res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
@@ -2958,9 +2958,9 @@ define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x
declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_128
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vscalefpd{{.*}}{%k1}
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vscalefpd{{.*}}{%k1}
define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
%res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
%res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
@@ -2971,9 +2971,9 @@ define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2
declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_256
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vscalefpd{{.*}}{%k1}
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vscalefpd{{.*}}{%k1}
define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
%res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
%res1 = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
@@ -2983,9 +2983,9 @@ define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4
declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_128
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vscalefps{{.*}}{%k1}
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vscalefps{{.*}}{%k1}
define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
%res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
%res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
@@ -2995,12 +2995,2809 @@ define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x
declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_256
-; CHECK-NOT: call
-; CHECK: kmov
-; CHECK: vscalefps{{.*}}{%k1}
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vscalefps{{.*}}{%k1}
define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
%res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
%res1 = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2
-} \ No newline at end of file
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
+; CHECK: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[1],k1[1]
+; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[1],xmm1[1]
+ %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
+; CHECK: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[1],k1[1],ymm2[3],k1[3]
+; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x15,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+ %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
+; CHECK: vunpckhps %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[2],k1[2],xmm2[3],k1[3]
+; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+ %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
+; CHECK: ## BB#0:
+; CHECK: vunpckhps %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[2],k1[2],ymm2[3],k1[3],ymm2[6],k1[6],ymm2[7],k1[7]
+; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+ %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
+; CHECK: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0]
+; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x14,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0]
+ %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
+; CHECK: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[2],k1[2]
+; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x14,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+ %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
+; CHECK: vunpcklps %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1]
+; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x14,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+ %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
+; CHECK: vunpcklps %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[4],k1[4],ymm2[5],k1[5]
+; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x14,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+ %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
+; CHECK: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[2],k1[2],xmm2[3],k1[3]
+; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6a,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
+; CHECK: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1]
+; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x62,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
+; CHECK: ## BB#0:
+; CHECK: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[2],k1[2],ymm2[3],k1[3],ymm2[6],k1[6],ymm2[7],k1[7]
+; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6a,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
+; CHECK: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[4],k1[4],ymm2[5],k1[5]
+; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x62,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
+; CHECK: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[1],k1[1]
+; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6d,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[1],xmm1[1]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
+; CHECK: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[0]
+; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6c,0xc1]
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
+; CHECK: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[0],ymm2[2],k1[2]
+; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6c,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
+; CHECK: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[1],k1[1],ymm2[3],k1[3]
+; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6d,0xc1]
+; CHECK-NEXT: ## ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
+; CHECK: vpmovqb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128:
+; CHECK: vpmovqb %xmm0, (%rdi)
+; CHECK: vpmovqb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
+; CHECK: vpmovsqb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
+; CHECK: vpmovsqb %xmm0, (%rdi)
+; CHECK: vpmovsqb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
+; CHECK: vpmovusqb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
+; CHECK: vpmovusqb %xmm0, (%rdi)
+; CHECK: vpmovusqb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
+; CHECK: vpmovqb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256:
+; CHECK: vpmovqb %ymm0, (%rdi)
+; CHECK: vpmovqb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
+; CHECK: vpmovsqb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
+; CHECK: vpmovsqb %ymm0, (%rdi)
+; CHECK: vpmovsqb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
+; CHECK: vpmovusqb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
+; CHECK: vpmovusqb %ymm0, (%rdi)
+; CHECK: vpmovusqb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
+; CHECK: vpmovqw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqw %xmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128:
+; CHECK: vpmovqw %xmm0, (%rdi)
+; CHECK: vpmovqw %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
+; CHECK: vpmovsqw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqw %xmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
+; CHECK: vpmovsqw %xmm0, (%rdi)
+; CHECK: vpmovsqw %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
+; CHECK: vpmovusqw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqw %xmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
+; CHECK: vpmovusqw %xmm0, (%rdi)
+; CHECK: vpmovusqw %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
+; CHECK: vpmovqw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqw %ymm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256:
+; CHECK: vpmovqw %ymm0, (%rdi)
+; CHECK: vpmovqw %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
+; CHECK: vpmovsqw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqw %ymm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
+; CHECK: vpmovsqw %ymm0, (%rdi)
+; CHECK: vpmovsqw %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
+; CHECK: vpmovusqw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqw %ymm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
+; CHECK: vpmovusqw %ymm0, (%rdi)
+; CHECK: vpmovusqw %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
+; CHECK: vpmovqd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqd %xmm0, %xmm0
+ %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <4 x i32> %res0, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128:
+; CHECK: vpmovqd %xmm0, (%rdi)
+; CHECK: vpmovqd %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
+; CHECK: vpmovsqd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqd %xmm0, %xmm0
+ %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <4 x i32> %res0, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
+; CHECK: vpmovsqd %xmm0, (%rdi)
+; CHECK: vpmovsqd %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
+; CHECK: vpmovusqd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqd %xmm0, %xmm0
+ %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <4 x i32> %res0, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
+; CHECK: vpmovusqd %xmm0, (%rdi)
+; CHECK: vpmovusqd %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
+; CHECK: vpmovqd %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovqd %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovqd %ymm0, %xmm0
+ %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <4 x i32> %res0, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256:
+; CHECK: vpmovqd %ymm0, (%rdi)
+; CHECK: vpmovqd %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_256:
+; CHECK: vpmovsqd %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsqd %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsqd %ymm0, %xmm0
+ %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <4 x i32> %res0, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
+; CHECK: vpmovsqd %ymm0, (%rdi)
+; CHECK: vpmovsqd %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_256:
+; CHECK: vpmovusqd %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusqd %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusqd %ymm0, %xmm0
+ %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res3 = add <4 x i32> %res0, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
+; CHECK: vpmovusqd %ymm0, (%rdi)
+; CHECK: vpmovusqd %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_128:
+; CHECK: vpmovdb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovdb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128:
+; CHECK: vpmovdb %xmm0, (%rdi)
+; CHECK: vpmovdb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
+; CHECK: vpmovsdb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsdb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
+; CHECK: vpmovsdb %xmm0, (%rdi)
+; CHECK: vpmovsdb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
+; CHECK: vpmovusdb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusdb %xmm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
+; CHECK: vpmovusdb %xmm0, (%rdi)
+; CHECK: vpmovusdb %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_256:
+; CHECK: vpmovdb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovdb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256:
+; CHECK: vpmovdb %ymm0, (%rdi)
+; CHECK: vpmovdb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
+; CHECK: vpmovsdb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsdb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
+; CHECK: vpmovsdb %ymm0, (%rdi)
+; CHECK: vpmovsdb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
+; CHECK: vpmovusdb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusdb %ymm0, %xmm0
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
+; CHECK: vpmovusdb %ymm0, (%rdi)
+; CHECK: vpmovusdb %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
+; CHECK: vpmovdw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovdw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovdw %xmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128:
+; CHECK: vpmovdw %xmm0, (%rdi)
+; CHECK: vpmovdw %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
+; CHECK: vpmovsdw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsdw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsdw %xmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
+; CHECK: vpmovsdw %xmm0, (%rdi)
+; CHECK: vpmovsdw %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
+; CHECK: vpmovusdw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusdw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusdw %xmm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
+; CHECK: vpmovusdw %xmm0, (%rdi)
+; CHECK: vpmovusdw %xmm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
+; CHECK: vpmovdw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovdw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovdw %ymm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256:
+; CHECK: vpmovdw %ymm0, (%rdi)
+; CHECK: vpmovdw %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
+; CHECK: vpmovsdw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsdw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsdw %ymm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
+; CHECK: vpmovsdw %ymm0, (%rdi)
+; CHECK: vpmovsdw %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
+; CHECK: vpmovusdw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovusdw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovusdw %ymm0, %xmm0
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
+; CHECK: vpmovusdw %ymm0, (%rdi)
+; CHECK: vpmovusdw %ymm0, (%rdi) {%k1}
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+ ret void
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtpd2dq %ymm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtpd2udq %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtpd2udq %ymm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtps2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtps2dq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtps2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtps2pd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtps2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtps2udq %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtps2udq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtps2udq %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvttpd2udq %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtudq2pd %xmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrndscalepd {{.*}}{%k1}
+; CHECK: vrndscalepd
+define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
+ %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrndscalepd {{.*}}{%k1}
+; CHECK: vrndscalepd
+define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
+ %res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrndscaleps {{.*}}{%k1}
+; CHECK: vrndscaleps
+define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
+ %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrndscaleps {{.*}}{%k1}
+; CHECK: vrndscaleps
+define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
+ %res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: ## ymm3 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res2, %res3
+ ret <8 x float> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
+; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: ## ymm3 = ymm0[0,1],ymm1[2,3]
+; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
+; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
+ %res2 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4)
+ %res3 = fadd <4 x double> %res, %res1
+ %res4 = fadd <4 x double> %res2, %res3
+ ret <4 x double> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm0[0,1],ymm1[2,3]
+; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3]
+; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0
+; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1)
+ %res3 = fadd <4 x float> %res, %res1
+ %res4 = fadd <4 x float> %res2, %res3
+ ret <4 x float> %res4
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_getmant_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vgetmantpd $11, %xmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %x3)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> zeroinitializer, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 -1)
+ %res3 = fadd <2 x double> %res, %res1
+ %res4 = fadd <2 x double> %res2, %res3
+ ret <2 x double> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_getmant_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vgetmantpd $11, %ymm0, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float>, i32, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_getmant_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vgetmantps $11, %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vgetmantps $11, %ymm0, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[0],k1[1]
+; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: ## xmm3 = k1[0],xmm0[1]
+; CHECK-NEXT: vshufpd $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[1]
+; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 %x4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 -1)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> zeroinitializer, i8 %x4)
+ %res3 = fadd <2 x double> %res, %res1
+ %res4 = fadd <2 x double> %res2, %res3
+ ret <2 x double> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[0],k1[1],ymm2[3],k1[2]
+; CHECK-NEXT: vshufpd $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
+; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
+ %res2 = fadd <4 x double> %res, %res1
+ ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: ## xmm2 = xmm2[2,1],k1[1,0]
+; CHECK-NEXT: vshufps $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[2,1],xmm1[1,0]
+; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: ## ymm2 = ymm2[2,1],k1[1,0],ymm2[6,5],k1[5,4]
+; CHECK-NEXT: vshufps $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: valignd $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vpaddd %xmm3, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: valignd $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: valignq $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: valignq $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: ## ymm1 = ymm1[0,1,3,2]
+; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: ## ymm2 = k1[0,1,3,2]
+; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[0,1,3,2]
+; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
+ %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
+ %res3 = fadd <4 x double> %res, %res1
+ %res4 = fadd <4 x double> %res2, %res3
+ ret <4 x double> %res4
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: ## xmm1 = xmm1[1,0]
+; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: ## xmm2 = k1[1,0]
+; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[1,0]
+; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
+ %res3 = fadd <2 x double> %res, %res1
+ %res4 = fadd <2 x double> %res3, %res2
+ ret <2 x double> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: ## ymm1 = ymm1[2,1,1,0,6,5,5,4]
+; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: ## ymm2 = k1[2,1,1,0,6,5,5,4]
+; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[2,1,1,0,6,5,5,4]
+; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res3, %res2
+ ret <8 x float> %res4
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: ## xmm1 = xmm1[2,1,1,0]
+; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: ## xmm2 = k1[2,1,1,0]
+; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[2,1,1,0]
+; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
+ %res3 = fadd <4 x float> %res, %res1
+ %res4 = fadd <4 x float> %res2, %res3
+ ret <4 x float> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vaddpd %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
+ %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
+ %res3 = fadd <4 x double> %res, %res1
+ %res4 = fadd <4 x double> %res2, %res3
+ ret <4 x double> %res4
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
+ %res3 = fadd <2 x double> %res, %res1
+ %res4 = fadd <2 x double> %res3, %res2
+ ret <2 x double> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vaddps %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res3, %res2
+ ret <8 x float> %res4
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
+ %res3 = fadd <4 x float> %res, %res1
+ %res4 = fadd <4 x float> %res2, %res3
+ ret <4 x float> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res2, %res3
+ ret <8 x float> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+
+ %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res2, %res3
+ ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
+
+define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z}
+; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
+ %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
+ %res2 = add <4 x i32> %res, %res1
+ ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
+
+define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
+
+define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z}
+; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
+ %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
+ %res2 = add <2 x i64> %res, %res1
+ ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
+
+define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps %zmm0, %zmm3
+; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
+ %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
+ %res2 = add <4 x i64> %res, %res1
+ ret <4 x i64> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpbroadcastd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 -1)
+ %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask)
+ %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res2, %res3
+ ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpbroadcastd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask)
+ %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res2, %res3
+ ret <4 x i32> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpbroadcastq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1)
+ %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask)
+ %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res2, %res3
+ ret <4 x i64> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpbroadcastq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1)
+ %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask)
+ %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res2, %res3
+ ret <2 x i64> %res4
+}
+
+define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
+ ; CHECK: test_x86_vcvtph2ps_128
+ ; CHECK: vcvtph2ps %xmm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) {
+ ; CHECK: test_x86_vcvtph2ps_128_rrk
+ ; CHECK: vcvtph2ps %xmm0, %xmm1 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask)
+ ret <4 x float> %res
+}
+
+
+define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) {
+ ; CHECK: test_x86_vcvtph2ps_128_rrkz
+ ; CHECK: vcvtph2ps %xmm0, %xmm0 {%k1} {z}
+ %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly
+
+define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
+ ; CHECK: test_x86_vcvtph2ps_256
+ ; CHECK: vcvtph2ps %xmm0, %ymm0
+ %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) {
+ ; CHECK: test_x86_vcvtph2ps_256_rrk
+ ; CHECK: vcvtph2ps %xmm0, %ymm1 {%k1}
+ %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) {
+ ; CHECK: test_x86_vcvtph2ps_256_rrkz
+ ; CHECK: vcvtph2ps %xmm0, %ymm0 {%k1} {z}
+ %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly
+
+define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) {
+ ; CHECK: test_x86_vcvtps2ph_128
+ ; CHECK: vcvtps2ph $2, %xmm0, %xmm0
+ %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+
+declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float>, i32, <8 x i16>, i8) nounwind readonly
+
+define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
+ ; CHECK: test_x86_vcvtps2ph_256
+ ; CHECK: vcvtps2ph $2, %ymm0, %xmm0
+ %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly
+
+declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: ## xmm1 = xmm0[0,0,2,2]
+; CHECK-NEXT: vmovsldup %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: ## xmm2 = xmm0[0,0,2,2]
+; CHECK-NEXT: vmovsldup %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[0,0,2,2]
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
+ %res3 = fadd <4 x float> %res, %res1
+ %res4 = fadd <4 x float> %res2, %res3
+ ret <4 x float> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT: vmovsldup %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT: vmovsldup %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res2, %res3
+ ret <8 x float> %res4
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: ## xmm1 = xmm0[1,1,3,3]
+; CHECK-NEXT: vmovshdup %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: ## xmm2 = xmm0[1,1,3,3]
+; CHECK-NEXT: vmovshdup %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
+ %res3 = fadd <4 x float> %res, %res1
+ %res4 = fadd <4 x float> %res2, %res3
+ ret <4 x float> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: ## ymm1 = ymm0[1,1,3,3,5,5,7,7]
+; CHECK-NEXT: vmovshdup %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: ## ymm2 = ymm0[1,1,3,3,5,5,7,7]
+; CHECK-NEXT: vmovshdup %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res2, %res3
+ ret <8 x float> %res4
+}
+declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovddup %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: ## xmm1 = xmm0[0,0]
+; CHECK-NEXT: vmovddup %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: ## xmm2 = xmm0[0,0]
+; CHECK-NEXT: vmovddup %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[0,0]
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2)
+ %res3 = fadd <2 x double> %res, %res1
+ %res4 = fadd <2 x double> %res2, %res3
+ ret <2 x double> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovddup %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2]
+; CHECK-NEXT: vmovddup %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2]
+; CHECK-NEXT: vmovddup %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2]
+; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1)
+ %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2)
+ %res3 = fadd <4 x double> %res, %res1
+ %res4 = fadd <4 x double> %res2, %res3
+ ret <4 x double> %res4
+}
+
+define <8 x float> @test_rsqrt_ps_256_rr(<8 x float> %a0) {
+; CHECK-LABEL: test_rsqrt_ps_256_rr:
+; CHECK: vrsqrt14ps %ymm0, %ymm0
+ %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_rsqrt_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_ps_256_rrkz:
+; CHECK: vrsqrt14ps %ymm0, %ymm0 {%k1} {z}
+ %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_rsqrt_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_ps_256_rrk:
+; CHECK: vrsqrt14ps %ymm0, %ymm1 {%k1}
+ %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <4 x float> @test_rsqrt_ps_128_rr(<4 x float> %a0) {
+; CHECK-LABEL: test_rsqrt_ps_128_rr:
+; CHECK: vrsqrt14ps %xmm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_rsqrt_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_ps_128_rrkz:
+; CHECK: vrsqrt14ps %xmm0, %xmm0 {%k1} {z}
+ %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_rsqrt_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_ps_128_rrk:
+; CHECK: vrsqrt14ps %xmm0, %xmm1 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
+ ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+declare <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
+
+define <8 x float> @test_rcp_ps_256_rr(<8 x float> %a0) {
+; CHECK-LABEL: test_rcp_ps_256_rr:
+; CHECK: vrcp14ps %ymm0, %ymm0
+ %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_rcp_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
+; CHECK-LABEL: test_rcp_ps_256_rrkz:
+; CHECK: vrcp14ps %ymm0, %ymm0 {%k1} {z}
+ %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_rcp_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+; CHECK-LABEL: test_rcp_ps_256_rrk:
+; CHECK: vrcp14ps %ymm0, %ymm1 {%k1}
+ %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <4 x float> @test_rcp_ps_128_rr(<4 x float> %a0) {
+; CHECK-LABEL: test_rcp_ps_128_rr:
+; CHECK: vrcp14ps %xmm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_rcp_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
+; CHECK-LABEL: test_rcp_ps_128_rrkz:
+; CHECK: vrcp14ps %xmm0, %xmm0 {%k1} {z}
+ %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_rcp_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+; CHECK-LABEL: test_rcp_ps_128_rrk:
+; CHECK: vrcp14ps %xmm0, %xmm1 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
+ ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+declare <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define <4 x double> @test_rsqrt_pd_256_rr(<4 x double> %a0) {
+; CHECK-LABEL: test_rsqrt_pd_256_rr:
+; CHECK: vrsqrt14pd %ymm0, %ymm0
+ %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_rsqrt_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_pd_256_rrkz:
+; CHECK: vrsqrt14pd %ymm0, %ymm0 {%k1} {z}
+ %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_rsqrt_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_pd_256_rrk:
+; CHECK: vrsqrt14pd %ymm0, %ymm1 {%k1}
+ %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
+ ret <4 x double> %res
+}
+
+define <2 x double> @test_rsqrt_pd_128_rr(<2 x double> %a0) {
+; CHECK-LABEL: test_rsqrt_pd_128_rr:
+; CHECK: vrsqrt14pd %xmm0, %xmm0
+ %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_rsqrt_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_pd_128_rrkz:
+; CHECK: vrsqrt14pd %xmm0, %xmm0 {%k1} {z}
+ %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_rsqrt_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_pd_128_rrk:
+; CHECK: vrsqrt14pd %xmm0, %xmm1 {%k1}
+ %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
+ ret <2 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+declare <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x double> @test_rcp_pd_256_rr(<4 x double> %a0) {
+; CHECK-LABEL: test_rcp_pd_256_rr:
+; CHECK: vrcp14pd %ymm0, %ymm0
+ %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_rcp_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
+; CHECK-LABEL: test_rcp_pd_256_rrkz:
+; CHECK: vrcp14pd %ymm0, %ymm0 {%k1} {z}
+ %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_rcp_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
+; CHECK-LABEL: test_rcp_pd_256_rrk:
+; CHECK: vrcp14pd %ymm0, %ymm1 {%k1}
+ %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
+ ret <4 x double> %res
+}
+
+define <2 x double> @test_rcp_pd_128_rr(<2 x double> %a0) {
+; CHECK-LABEL: test_rcp_pd_128_rr:
+; CHECK: vrcp14pd %xmm0, %xmm0
+ %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_rcp_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
+; CHECK-LABEL: test_rcp_pd_128_rrkz:
+; CHECK: vrcp14pd %xmm0, %xmm0 {%k1} {z}
+ %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_rcp_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
+; CHECK-LABEL: test_rcp_pd_128_rrk:
+; CHECK: vrcp14pd %xmm0, %xmm1 {%k1}
+ %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
+ ret <2 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+declare <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) {
+; CHECK-LABEL: test_x86_vbroadcast_sd_pd_256:
+; CHECK: kmovw %eax, %k1
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+
+ %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask)
+ %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
+ %res3 = fadd <4 x double> %res, %res1
+ %res4 = fadd <4 x double> %res2, %res3
+ ret <4 x double> %res4
+}
+declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 x double>, i8) nounwind readonly
+
+define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) {
+; CHECK-LABEL: test_x86_vbroadcast_ss_ps_256:
+; CHECK: kmovw %eax, %k1
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
+
+ %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res2, %res3
+ ret <8 x float> %res4
+}
+declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x float>, i8) nounwind readonly
+
+define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) {
+; CHECK-LABEL: test_x86_vbroadcast_ss_ps_128:
+; CHECK: kmovw %eax, %k1
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
+
+ %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
+ %res3 = fadd <4 x float> %res, %res1
+ %res4 = fadd <4 x float> %res2, %res3
+ ret <4 x float> %res4
+}
+declare <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly
+
+
+declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
+; CHECK: kmovw %eax, %k1
+; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z}
+; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm1 {%k1}
+; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm0
+; CHECK: vaddps %ymm1, %ymm0, %ymm0
+; CHECK: vaddps %ymm0, %ymm2, %ymm0
+
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask)
+ %res3 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %mask)
+ %res4 = fadd <8 x float> %res1, %res2
+ %res5 = fadd <8 x float> %res3, %res4
+ ret <8 x float> %res5
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
+; CHECK: kmovw %eax, %k1
+; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z}
+; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm1 {%k1}
+; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm0
+; CHECK: vpaddd %ymm1, %ymm0, %ymm0
+; CHECK: vpaddd %ymm0, %ymm2, %ymm0
+
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask)
+ %res3 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
+ %res4 = add <8 x i32> %res1, %res2
+ %res5 = add <8 x i32> %res3, %res4
+ ret <8 x i32> %res5
+}
diff --git a/test/CodeGen/X86/bit-piece-comment.ll b/test/CodeGen/X86/bit-piece-comment.ll
new file mode 100644
index 000000000000..6ce858b11dcf
--- /dev/null
+++ b/test/CodeGen/X86/bit-piece-comment.ll
@@ -0,0 +1,64 @@
+; RUN: llc -filetype=asm < %s
+;
+; We check that we don't crash when printing assembly comments that include
+; a DW_OP_bit_piece
+;
+; Regenerate from
+; void fn1() {
+; struct {
+; int dword[2];
+; } u;
+; u.dword[1] = 0;
+; };
+; via clang++ -g -fno-integrated-as -Os
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+%struct.anon = type { [2 x i32] }
+
+; Function Attrs: norecurse nounwind optsize readnone uwtable
+define void @_Z3fn1v() #0 !dbg !4 {
+entry:
+ tail call void @llvm.dbg.declare(metadata %struct.anon* undef, metadata !8, metadata !19), !dbg !20
+ tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !8, metadata !21), !dbg !20
+ ret void, !dbg !22
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { norecurse nounwind optsize readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 256088) (llvm/trunk 256097)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.cpp", directory: "/mnt/extra")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "fn1", linkageName: "_Z3fn1v", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !7)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{!8}
+!8 = !DILocalVariable(name: "u", scope: !4, file: !1, line: 4, type: !9)
+!9 = !DICompositeType(tag: DW_TAG_structure_type, scope: !4, file: !1, line: 2, size: 64, align: 32, elements: !10)
+!10 = !{!11}
+!11 = !DIDerivedType(tag: DW_TAG_member, name: "dword", scope: !9, file: !1, line: 3, baseType: !12, size: 64, align: 32)
+!12 = !DICompositeType(tag: DW_TAG_array_type, baseType: !13, size: 64, align: 32, elements: !14)
+!13 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!14 = !{!15}
+!15 = !DISubrange(count: 2)
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 3}
+!18 = !{!"clang version 3.8.0 (trunk 256088) (llvm/trunk 256097)"}
+!19 = !DIExpression()
+!20 = !DILocation(line: 4, column: 5, scope: !4)
+!21 = !DIExpression(DW_OP_bit_piece, 32, 32)
+!22 = !DILocation(line: 6, column: 1, scope: !4)
diff --git a/test/CodeGen/X86/bitreverse.ll b/test/CodeGen/X86/bitreverse.ll
new file mode 100644
index 000000000000..e3bc8ace38ab
--- /dev/null
+++ b/test/CodeGen/X86/bitreverse.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=x86 %s -o - | FileCheck %s
+
+; These tests just check that the plumbing is in place for @llvm.bitreverse. The
+; actual output is massive at the moment as llvm.bitreverse is not yet legal.
+
+declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone
+
+define <2 x i16> @f(<2 x i16> %a) {
+; CHECK-LABEL: f:
+; CHECK: shll
+ %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
+ ret <2 x i16> %b
+}
+
+declare i8 @llvm.bitreverse.i8(i8) readnone
+
+define i8 @g(i8 %a) {
+; CHECK-LABEL: g:
+; CHECK: shlb
+ %b = call i8 @llvm.bitreverse.i8(i8 %a)
+ ret i8 %b
+}
diff --git a/test/CodeGen/X86/branchfolding-catchpads.ll b/test/CodeGen/X86/branchfolding-catchpads.ll
new file mode 100644
index 000000000000..0468b3c314f6
--- /dev/null
+++ b/test/CodeGen/X86/branchfolding-catchpads.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @throw()
+declare i16 @f()
+
+define i16 @test1(i16 %a, i8* %b) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %cmp = icmp eq i16 %a, 10
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %call1 = invoke i16 @f()
+ to label %cleanup unwind label %catch.dispatch
+
+if.else:
+ %call2 = invoke i16 @f()
+ to label %cleanup unwind label %catch.dispatch
+
+catch.dispatch:
+ %cs = catchswitch within none [ label %catch, label %catch.2 ] unwind to caller
+
+catch:
+ catchpad within %cs [i8* null, i32 8, i8* null]
+ call void @throw() noreturn
+ br label %unreachable
+
+catch.2:
+ catchpad within %cs [i8* null, i32 64, i8* null]
+ store i8 1, i8* %b
+ call void @throw() noreturn
+ br label %unreachable
+
+cleanup:
+ %retval = phi i16 [ %call1, %if.then ], [ %call2, %if.else ]
+ ret i16 %retval
+
+unreachable:
+ unreachable
+}
+
+; This test verifies the case where two funclet blocks meet the old criteria
+; to be placed at the end. The order of the blocks is not important for the
+; purposes of this test. The failure mode is an infinite loop during
+; compilation.
+;
+; CHECK-LABEL: .def test1;
+
+define i16 @test2(i16 %a, i8* %b) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %cmp = icmp eq i16 %a, 10
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %call1 = invoke i16 @f()
+ to label %cleanup unwind label %catch.dispatch
+
+if.else:
+ %call2 = invoke i16 @f()
+ to label %cleanup unwind label %catch.dispatch
+
+catch.dispatch:
+ %cs = catchswitch within none [ label %catch, label %catch.2, label %catch.3 ] unwind to caller
+
+catch:
+ catchpad within %cs [i8* null, i32 8, i8* null]
+ call void @throw() noreturn
+ br label %unreachable
+
+catch.2:
+ %c2 = catchpad within %cs [i8* null, i32 32, i8* null]
+ store i8 1, i8* %b
+ catchret from %c2 to label %cleanup
+
+catch.3:
+ %c3 = catchpad within %cs [i8* null, i32 64, i8* null]
+ store i8 2, i8* %b
+ catchret from %c3 to label %cleanup
+
+cleanup:
+ %retval = phi i16 [ %call1, %if.then ], [ %call2, %if.else ], [ -1, %catch.2 ], [ -1, %catch.3 ]
+ ret i16 %retval
+
+unreachable:
+ unreachable
+}
+
+; This test verifies the case where three funclet blocks all meet the old
+; criteria to be placed at the end. The order of the blocks is not important
+; for the purposes of this test. The failure mode is an infinite loop during
+; compilation.
+;
+; CHECK-LABEL: .def test2;
+
diff --git a/test/CodeGen/X86/buildvec-insertvec.ll b/test/CodeGen/X86/buildvec-insertvec.ll
index 73dbe1f650a1..fd7290d58179 100644
--- a/test/CodeGen/X86/buildvec-insertvec.ll
+++ b/test/CodeGen/X86/buildvec-insertvec.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s
define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind {
diff --git a/test/CodeGen/X86/catchpad-realign-savexmm.ll b/test/CodeGen/X86/catchpad-realign-savexmm.ll
new file mode 100644
index 000000000000..1160101792ff
--- /dev/null
+++ b/test/CodeGen/X86/catchpad-realign-savexmm.ll
@@ -0,0 +1,53 @@
+; RUN: llc -mtriple=x86_64-pc-windows-msvc -verify-machineinstrs < %s | FileCheck %s
+
+; We should store -2 into UnwindHelp in a slot immediately after the last XMM
+; CSR save.
+
+declare void @g()
+declare i32 @__CxxFrameHandler3(...)
+
+@fp_global = global double 0.0
+
+define void @f() personality i32 (...)* @__CxxFrameHandler3 {
+ %v = load double, double* @fp_global
+ call void @g()
+ %v1 = fadd double %v, 1.0
+ store double %v1, double* @fp_global
+ invoke void @g()
+ to label %return unwind label %catch.dispatch
+
+return:
+ ret void
+
+catch.dispatch:
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+ %p = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ catchret from %p to label %return
+}
+
+; CHECK: f: # @f
+; CHECK: pushq %rbp
+; CHECK: .seh_pushreg 5
+; CHECK: subq $64, %rsp
+; CHECK: .seh_stackalloc 64
+; CHECK: leaq 64(%rsp), %rbp
+; CHECK: .seh_setframe 5, 64
+; CHECK: movaps %xmm6, -16(%rbp) # 16-byte Spill
+; CHECK: .seh_savexmm 6, 48
+; CHECK: .seh_endprologue
+; CHECK: movq $-2, -24(%rbp)
+; CHECK: movsd fp_global(%rip), %xmm6 # xmm6 = mem[0],zero
+; CHECK: callq g
+; CHECK: addsd __real@3ff0000000000000(%rip), %xmm6
+; CHECK: movsd %xmm6, fp_global(%rip)
+; CHECK: .Ltmp{{.*}}
+; CHECK: callq g
+; CHECK: .Ltmp{{.*}}
+; CHECK: .LBB{{.*}} # Block address taken
+; CHECK: movaps -16(%rbp), %xmm6
+; CHECK: addq $64, %rsp
+; CHECK: popq %rbp
+; CHECK: retq
+; CHECK: .seh_handlerdata
diff --git a/test/CodeGen/X86/catchpad-regmask.ll b/test/CodeGen/X86/catchpad-regmask.ll
new file mode 100644
index 000000000000..0d436f6eb595
--- /dev/null
+++ b/test/CodeGen/X86/catchpad-regmask.ll
@@ -0,0 +1,144 @@
+; RUN: llc < %s | FileCheck %s
+
+; Based on this code:
+;
+; extern "C" int array[4];
+; extern "C" void global_array(int idx1, int idx2, int idx3) {
+; try {
+; array[idx1] = 111;
+; throw;
+; } catch (...) {
+; array[idx2] = 222;
+; }
+; array[idx3] = 333;
+; }
+; extern "C" __declspec(dllimport) int imported;
+; extern "C" void access_imported() {
+; try {
+; imported = 111;
+; throw;
+; } catch (...) {
+; imported = 222;
+; }
+; imported = 333;
+; }
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+
+@array = external global [4 x i32], align 16
+@imported = external dllimport global i32, align 4
+
+; Function Attrs: uwtable
+define void @global_array(i32 %idx1, i32 %idx2, i32 %idx3) #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %idxprom = sext i32 %idx1 to i64
+ %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* @array, i64 0, i64 %idxprom
+ store i32 111, i32* %arrayidx, align 4, !tbaa !2
+ invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %0 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ %idxprom1 = sext i32 %idx2 to i64
+ %arrayidx2 = getelementptr inbounds [4 x i32], [4 x i32]* @array, i64 0, i64 %idxprom1
+ store i32 222, i32* %arrayidx2, align 4, !tbaa !2
+ catchret from %0 to label %try.cont
+
+try.cont: ; preds = %catch
+ %idxprom3 = sext i32 %idx3 to i64
+ %arrayidx4 = getelementptr inbounds [4 x i32], [4 x i32]* @array, i64 0, i64 %idxprom3
+ store i32 333, i32* %arrayidx4, align 4, !tbaa !2
+ ret void
+
+unreachable: ; preds = %entry
+ unreachable
+}
+
+; CHECK-LABEL: global_array: # @global_array
+; CHECK: pushq %rbp
+; First array access
+; CHECK: movslq %ecx, %[[idx:[^ ]*]]
+; CHECK: leaq array(%rip), %[[base:[^ ]*]]
+; CHECK: movl $111, (%[[base]],%[[idx]],4)
+; Might throw an exception and return to below...
+; CHECK: callq _CxxThrowException
+; Third array access must remat the address of array
+; CHECK: movslq {{.*}}, %[[idx:[^ ]*]]
+; CHECK: leaq array(%rip), %[[base:[^ ]*]]
+; CHECK: movl $333, (%[[base]],%[[idx]],4)
+; CHECK: popq %rbp
+; CHECK: retq
+
+; CHECK: "?catch$2@?0?global_array@4HA":
+; CHECK: pushq %rbp
+; CHECK: movslq {{.*}}, %[[idx:[^ ]*]]
+; CHECK: leaq array(%rip), %[[base:[^ ]*]]
+; CHECK: movl $222, (%[[base]],%[[idx]],4)
+; CHECK: popq %rbp
+; CHECK: retq # CATCHRET
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: uwtable
+define void @access_imported() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ store i32 111, i32* @imported, align 4, !tbaa !2
+ invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %0 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ store i32 222, i32* @imported, align 4, !tbaa !2
+ catchret from %0 to label %try.cont
+
+try.cont: ; preds = %catch
+ store i32 333, i32* @imported, align 4, !tbaa !2
+ ret void
+
+unreachable: ; preds = %entry
+ unreachable
+}
+
+; CHECK-LABEL: access_imported: # @access_imported
+; CHECK: pushq %rbp
+; CHECK: movq __imp_imported(%rip), %[[base:[^ ]*]]
+; CHECK: movl $111, (%[[base]])
+; Might throw an exception and return to below...
+; CHECK: callq _CxxThrowException
+; Third access must reload the address of imported
+; CHECK: movq __imp_imported(%rip), %[[base:[^ ]*]]
+; CHECK: movl $333, (%[[base]])
+; CHECK: popq %rbp
+; CHECK: retq
+
+; CHECK: "?catch$2@?0?access_imported@4HA":
+; CHECK: pushq %rbp
+; CHECK: movq __imp_imported(%rip), %[[base:[^ ]*]]
+; CHECK: movl $222, (%[[base]])
+; CHECK: popq %rbp
+; CHECK: retq # CATCHRET
+
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.8.0 "}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/catchpad-weight.ll b/test/CodeGen/X86/catchpad-weight.ll
new file mode 100644
index 000000000000..60939bc6b03e
--- /dev/null
+++ b/test/CodeGen/X86/catchpad-weight.ll
@@ -0,0 +1,82 @@
+; RUN: llc -march=x86-64 -print-machineinstrs=expand-isel-pseudos %s -o /dev/null 2>&1 | FileCheck %s
+
+; Check if the edge weight to the catchpad is calculated correctly.
+
+; CHECK: Successors according to CFG: BB#2(0x7ffff100 / 0x80000000 = 100.00%) BB#1(0x00000800 / 0x80000000 = 0.00%) BB#3(0x00000400 / 0x80000000 = 0.00%) BB#4(0x00000200 / 0x80000000 = 0.00%) BB#5(0x00000100 / 0x80000000 = 0.00%)
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64--windows-msvc18.0.0"
+
+%rtti.TypeDescriptor7 = type { i8**, i8*, [8 x i8] }
+%struct.HasDtor = type { i8 }
+
+$"\01??_R0?AUA@@@8" = comdat any
+
+$"\01??_R0?AUB@@@8" = comdat any
+
+$"\01??_R0?AUC@@@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0?AUA@@@8" = linkonce_odr global %rtti.TypeDescriptor7 { i8** @"\01??_7type_info@@6B@", i8* null, [8 x i8] c".?AUA@@\00" }, comdat
+@"\01??_R0?AUB@@@8" = linkonce_odr global %rtti.TypeDescriptor7 { i8** @"\01??_7type_info@@6B@", i8* null, [8 x i8] c".?AUB@@\00" }, comdat
+@"\01??_R0?AUC@@@8" = linkonce_odr global %rtti.TypeDescriptor7 { i8** @"\01??_7type_info@@6B@", i8* null, [8 x i8] c".?AUC@@\00" }, comdat
+
+; Function Attrs: uwtable
+define i32 @main() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %o = alloca %struct.HasDtor, align 1
+ %0 = getelementptr inbounds %struct.HasDtor, %struct.HasDtor* %o, i64 0, i32 0
+ call void @llvm.lifetime.start(i64 1, i8* %0) #4
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %catch.5] unwind label %catch.dispatch.1
+
+catch.5: ; preds = %catch.dispatch
+ %1 = catchpad within %cs1 [%rtti.TypeDescriptor7* @"\01??_R0?AUA@@@8", i32 0, i8* null]
+ catchret from %1 to label %try.cont
+
+try.cont: ; preds = %entry, %catch, %catch.3, %catch.5
+ call void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* nonnull %o) #4
+ call void @llvm.lifetime.end(i64 1, i8* %0) #4
+ ret i32 0
+
+catch.dispatch.1: ; preds = %catch.dispatch
+ %cs2 = catchswitch within none [label %catch.3] unwind label %catch.dispatch.2
+
+catch.3: ; preds = %catch.dispatch.1
+ %2 = catchpad within %cs2 [%rtti.TypeDescriptor7* @"\01??_R0?AUB@@@8", i32 0, i8* null]
+ catchret from %2 to label %try.cont
+
+catch.dispatch.2: ; preds = %catch.dispatch.1
+ %cs3 = catchswitch within none [label %catch] unwind label %ehcleanup
+
+catch: ; preds = %catch.dispatch.2
+ %3 = catchpad within %cs3 [%rtti.TypeDescriptor7* @"\01??_R0?AUC@@@8", i32 0, i8* null]
+ catchret from %3 to label %try.cont
+
+ehcleanup: ; preds = %catchendblock
+ %4 = cleanuppad within none []
+ call void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* nonnull %o) #4 [ "funclet"(token %4) ]
+ cleanupret from %4 unwind to caller
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare void @"\01?may_throw@@YAXXZ"() #2
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind
+declare void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor*) #3
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind argmemonly }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind }
diff --git a/test/CodeGen/X86/catchret-empty-fallthrough.ll b/test/CodeGen/X86/catchret-empty-fallthrough.ll
new file mode 100644
index 000000000000..7ad103303171
--- /dev/null
+++ b/test/CodeGen/X86/catchret-empty-fallthrough.ll
@@ -0,0 +1,53 @@
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+; BranchFolding used to remove our empty landingpad block, which is
+; undesirable.
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+declare i32 @__C_specific_handler(...)
+
+declare void @bar()
+
+define void @foo(i1 %cond) personality i32 (...)* @__C_specific_handler {
+entry:
+ br i1 %cond, label %return, label %try
+
+try: ; preds = %entry
+ invoke void @bar()
+ to label %fallthrough unwind label %dispatch
+
+dispatch: ; preds = %try
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %dispatch
+ %0 = catchpad within %cs1 [i8* null]
+ catchret from %0 to label %return
+
+fallthrough: ; preds = %try
+ unreachable
+
+return: ; preds = %catch, %entry
+ ret void
+}
+
+; CHECK-LABEL: foo: # @foo
+; CHECK: testb $1, %cl
+; CHECK: je .LBB0_[[try:[0-9]+]]
+; CHECK: .LBB0_[[return:[0-9]+]]:
+; CHECK: retq
+; CHECK: .LBB0_[[try]]:
+; CHECK: .Ltmp0:
+; CHECK: callq bar
+; CHECK: .Ltmp1:
+; CHECK: .LBB0_[[catch:[0-9]+]]:
+
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .Lfoo$parent_frame_offset = 32
+; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16
+; CHECK-NEXT: .Llsda_begin0:
+; CHECK-NEXT: .long .Ltmp0@IMGREL+1
+; CHECK-NEXT: .long .Ltmp1@IMGREL+1
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long .LBB0_[[catch]]@IMGREL
diff --git a/test/CodeGen/X86/catchret-fallthrough.ll b/test/CodeGen/X86/catchret-fallthrough.ll
new file mode 100644
index 000000000000..6a94b290e823
--- /dev/null
+++ b/test/CodeGen/X86/catchret-fallthrough.ll
@@ -0,0 +1,42 @@
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+; We used to have an issue where we inserted an MBB between invoke.cont.3 and
+; its fallthrough target of ret void.
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-msvc18.0.0"
+
+@some_global = global i32 0
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @g()
+
+define void @f() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @g()
+ to label %invoke.cont.3 unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %0 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ catchret from %0 to label %nrvo.skipdtor
+
+invoke.cont.3: ; preds = %entry
+ store i32 123, i32* @some_global
+ br label %nrvo.skipdtor
+
+nrvo.skipdtor: ; preds = %invoke.cont.3, %invoke.cont.4
+ ret void
+}
+
+; CHECK-LABEL: _f: # @f
+; CHECK: calll _g
+; CHECK: movl $123, _some_global
+; CHECK-NOT: jmp
+; CHECK-NOT: movl {{.*}}, %esp
+; CHECK: retl
+; CHECK: addl $12, %ebp
+; CHECK: jmp LBB0_{{.*}}
diff --git a/test/CodeGen/X86/cleanuppad-inalloca.ll b/test/CodeGen/X86/cleanuppad-inalloca.ll
new file mode 100644
index 000000000000..2e34ada52e6b
--- /dev/null
+++ b/test/CodeGen/X86/cleanuppad-inalloca.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s | FileCheck %s
+
+; Based on this C++:
+; struct A {
+; int x;
+; A();
+; A(const A &a);
+; ~A();
+; };
+; extern "C" void takes_two(A a1, A a2);
+; extern "C" void passes_two() { takes_two(A(), A()); }
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686--windows-msvc"
+
+%struct.A = type { i32 }
+
+define void @passes_two() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %argmem = alloca inalloca <{ %struct.A, %struct.A }>, align 4
+ %0 = getelementptr inbounds <{ %struct.A, %struct.A }>, <{ %struct.A, %struct.A }>* %argmem, i32 0, i32 1
+ %call = call x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"(%struct.A* %0)
+ %1 = getelementptr inbounds <{ %struct.A, %struct.A }>, <{ %struct.A, %struct.A }>* %argmem, i32 0, i32 0
+ %call1 = invoke x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"(%struct.A* %1)
+ to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ call void @takes_two(<{ %struct.A, %struct.A }>* inalloca nonnull %argmem)
+ ret void
+
+ehcleanup: ; preds = %entry
+ %2 = cleanuppad within none []
+ call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %0) [ "funclet"(token %2) ]
+ cleanupret from %2 unwind to caller
+}
+
+; CHECK: _passes_two:
+; CHECK: pushl %ebp
+; CHECK: movl %esp, %ebp
+; CHECK: subl ${{[0-9]+}}, %esp
+; CHECK: movl $8, %eax
+; CHECK: calll __chkstk
+; CHECK: calll "??0A@@QAE@XZ"
+; CHECK: calll "??0A@@QAE@XZ"
+; CHECK: calll _takes_two
+; ESP must be restored via EBP due to "dynamic" alloca.
+; CHECK: leal -{{[0-9]+}}(%ebp), %esp
+; CHECK: popl %ebp
+; CHECK: retl
+
+; CHECK: "?dtor$2@?0?passes_two@4HA":
+; CHECK: pushl %ebp
+; CHECK: subl $8, %esp
+; CHECK: addl $12, %ebp
+; CHECK: {{movl|leal}} -{{[0-9]+}}(%ebp), %ecx
+; CHECK: calll "??1A@@QAE@XZ"
+; CHECK: addl $8, %esp
+; CHECK: retl
+
+declare void @takes_two(<{ %struct.A, %struct.A }>* inalloca) #0
+
+declare x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"(%struct.A* returned) #0
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A*) #0
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/cleanuppad-large-codemodel.ll b/test/CodeGen/X86/cleanuppad-large-codemodel.ll
new file mode 100644
index 000000000000..8ffb97d8dd68
--- /dev/null
+++ b/test/CodeGen/X86/cleanuppad-large-codemodel.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=x86_64-pc-windows-msvc -code-model=large -o - < %s | FileCheck %s
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @bar()
+
+define void @foo() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @bar()
+ to label %exit unwind label %cleanup
+cleanup:
+ %c = cleanuppad within none []
+ call void @bar() [ "funclet"(token %c) ]
+ cleanupret from %c unwind to caller
+exit:
+ ret void
+}
+
+; CHECK: foo: # @foo
+; CHECK: movabsq $bar, %[[reg:[^ ]*]]
+; CHECK: callq *%[[reg]]
+; CHECK: retq
+
+; CHECK: "?dtor$2@?0?foo@4HA":
+; CHECK: movabsq $bar, %[[reg:[^ ]*]]
+; CHECK: callq *%[[reg]]
+; CHECK: retq # CLEANUPRET
diff --git a/test/CodeGen/X86/cleanuppad-realign.ll b/test/CodeGen/X86/cleanuppad-realign.ll
new file mode 100644
index 000000000000..5a565cc1570f
--- /dev/null
+++ b/test/CodeGen/X86/cleanuppad-realign.ll
@@ -0,0 +1,78 @@
+; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s
+; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s
+
+declare i32 @__CxxFrameHandler3(...)
+declare void @Dtor(i64* %o)
+declare void @f(i32)
+
+define void @realigned_cleanup() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ ; Overalign %o to cause stack realignment.
+ %o = alloca i64, align 32
+ invoke void @f(i32 1)
+ to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ call void @Dtor(i64* %o)
+ ret void
+
+ehcleanup: ; preds = %entry
+ %0 = cleanuppad within none []
+ call void @Dtor(i64* %o) [ "funclet"(token %0) ]
+ cleanupret from %0 unwind to caller
+}
+
+; X86-LABEL: _realigned_cleanup: # @realigned_cleanup
+; X86: pushl %ebp
+; X86: movl %esp, %ebp
+; X86: pushl %ebx
+; X86: pushl %edi
+; X86: pushl %esi
+; X86: andl $-32, %esp
+; X86: subl $96, %esp
+; X86: movl %esp, %esi
+; EBP will reload from this offset.
+; X86: movl %ebp, 28(%esi)
+; The last EH reg field is the state number, so dtor adjust is this +4.
+; X86: movl $-1, 72(%esi)
+
+; X86-LABEL: "?dtor$2@?0?realigned_cleanup@4HA":
+; X86: pushl %ebp
+; X86: leal -76(%ebp), %esi
+; X86: movl 28(%esi), %ebp
+; We used to have a bug where we clobbered ESI after the prologue.
+; X86-NOT: movl {{.*}}, %esi
+; X86: popl %ebp
+; X86: retl # CLEANUPRET
+
+; X64-LABEL: realigned_cleanup: # @realigned_cleanup
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: pushq %rbx
+; X64: .seh_pushreg 3
+; X64: subq $104, %rsp
+; X64: .seh_stackalloc 104
+; X64: leaq 96(%rsp), %rbp
+; X64: .seh_setframe 5, 96
+; X64: .seh_endprologue
+; X64: andq $-32, %rsp
+; X64: movq %rsp, %rbx
+; RBP will reload from this offset.
+; X64: movq %rbp, 56(%rbx)
+; X64: movq $-2, (%rbp)
+
+; X64-LABEL: "?dtor$2@?0?realigned_cleanup@4HA":
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: pushq %rbx
+; X64: .seh_pushreg 3
+; X64: subq $40, %rsp
+; X64: .seh_stackalloc 40
+; X64: leaq 96(%rdx), %rbp
+; X64: .seh_endprologue
+; X64: andq $-32, %rdx
+; X64: movq %rdx, %rbx
+; X64-NOT: mov{{.*}}, %rbx
+; X64: popq %rbp
+; X64: retq # CLEANUPRET
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index 6a6f5256f44d..4a094480c931 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
declare i8 @llvm.cttz.i8(i8, i1)
declare i16 @llvm.cttz.i16(i16, i1)
@@ -10,131 +10,151 @@ declare i32 @llvm.ctlz.i32(i32, i1)
declare i64 @llvm.ctlz.i64(i64, i1)
define i8 @cttz_i8(i8 %x) {
+; CHECK-LABEL: cttz_i8:
+; CHECK: # BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: bsfl %eax, %eax
+; CHECK-NEXT: retq
%tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
ret i8 %tmp
-; CHECK-LABEL: cttz_i8:
-; CHECK: bsfl
-; CHECK-NOT: cmov
-; CHECK: ret
}
define i16 @cttz_i16(i16 %x) {
+; CHECK-LABEL: cttz_i16:
+; CHECK: # BB#0:
+; CHECK-NEXT: bsfw %di, %ax
+; CHECK-NEXT: retq
%tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
ret i16 %tmp
-; CHECK-LABEL: cttz_i16:
-; CHECK: bsfw
-; CHECK-NOT: cmov
-; CHECK: ret
}
define i32 @cttz_i32(i32 %x) {
+; CHECK-LABEL: cttz_i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: bsfl %edi, %eax
+; CHECK-NEXT: retq
%tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
ret i32 %tmp
-; CHECK-LABEL: cttz_i32:
-; CHECK: bsfl
-; CHECK-NOT: cmov
-; CHECK: ret
}
define i64 @cttz_i64(i64 %x) {
+; CHECK-LABEL: cttz_i64:
+; CHECK: # BB#0:
+; CHECK-NEXT: bsfq %rdi, %rax
+; CHECK-NEXT: retq
%tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
ret i64 %tmp
-; CHECK-LABEL: cttz_i64:
-; CHECK: bsfq
-; CHECK-NOT: cmov
-; CHECK: ret
}
define i8 @ctlz_i8(i8 %x) {
-entry:
+; CHECK-LABEL: ctlz_i8:
+; CHECK: # BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: bsrl %eax, %eax
+; CHECK-NEXT: xorl $7, %eax
+; CHECK-NEXT: retq
%tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
ret i8 %tmp2
-; CHECK-LABEL: ctlz_i8:
-; CHECK: bsrl
-; CHECK-NOT: cmov
-; CHECK: xorl $7,
-; CHECK: ret
}
define i16 @ctlz_i16(i16 %x) {
-entry:
+; CHECK-LABEL: ctlz_i16:
+; CHECK: # BB#0:
+; CHECK-NEXT: bsrw %di, %ax
+; CHECK-NEXT: xorl $15, %eax
+; CHECK-NEXT: retq
%tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
ret i16 %tmp2
-; CHECK-LABEL: ctlz_i16:
-; CHECK: bsrw
-; CHECK-NOT: cmov
-; CHECK: xorl $15,
-; CHECK: ret
}
define i32 @ctlz_i32(i32 %x) {
+; CHECK-LABEL: ctlz_i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: bsrl %edi, %eax
+; CHECK-NEXT: xorl $31, %eax
+; CHECK-NEXT: retq
%tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
ret i32 %tmp
-; CHECK-LABEL: ctlz_i32:
-; CHECK: bsrl
-; CHECK-NOT: cmov
-; CHECK: xorl $31,
-; CHECK: ret
}
define i64 @ctlz_i64(i64 %x) {
+; CHECK-LABEL: ctlz_i64:
+; CHECK: # BB#0:
+; CHECK-NEXT: bsrq %rdi, %rax
+; CHECK-NEXT: xorq $63, %rax
+; CHECK-NEXT: retq
%tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
ret i64 %tmp
-; CHECK-LABEL: ctlz_i64:
-; CHECK: bsrq
-; CHECK-NOT: cmov
-; CHECK: xorq $63,
-; CHECK: ret
}
-define i32 @ctlz_i32_cmov(i32 %n) {
-entry:
-; Generate a cmov to handle zero inputs when necessary.
-; CHECK-LABEL: ctlz_i32_cmov:
-; CHECK: bsrl
-; CHECK: cmov
-; CHECK: xorl $31,
-; CHECK: ret
+define i32 @ctlz_i32_zero_test(i32 %n) {
+; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
+
+; CHECK-LABEL: ctlz_i32_zero_test:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $32, %eax
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: je .LBB8_2
+; CHECK-NEXT: # BB#1: # %cond.false
+; CHECK-NEXT: bsrl %edi, %eax
+; CHECK-NEXT: xorl $31, %eax
+; CHECK-NEXT: .LBB8_2: # %cond.end
+; CHECK-NEXT: retq
%tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
ret i32 %tmp1
}
define i32 @ctlz_i32_fold_cmov(i32 %n) {
-entry:
; Don't generate the cmovne when the source is known non-zero (and bsr would
; not set ZF).
; rdar://9490949
+; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
+; codegen doesn't know how to delete the movl and je.
+
; CHECK-LABEL: ctlz_i32_fold_cmov:
-; CHECK: bsrl
-; CHECK-NOT: cmov
-; CHECK: xorl $31,
-; CHECK: ret
+; CHECK: # BB#0:
+; CHECK-NEXT: orl $1, %edi
+; CHECK-NEXT: movl $32, %eax
+; CHECK-NEXT: je .LBB9_2
+; CHECK-NEXT: # BB#1: # %cond.false
+; CHECK-NEXT: bsrl %edi, %eax
+; CHECK-NEXT: xorl $31, %eax
+; CHECK-NEXT: .LBB9_2: # %cond.end
+; CHECK-NEXT: retq
%or = or i32 %n, 1
%tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
ret i32 %tmp1
}
define i32 @ctlz_bsr(i32 %n) {
-entry:
; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
; the most significant bit, which is what 'bsr' does natively.
+
; CHECK-LABEL: ctlz_bsr:
-; CHECK: bsrl
-; CHECK-NOT: xorl
-; CHECK: ret
+; CHECK: # BB#0:
+; CHECK-NEXT: bsrl %edi, %eax
+; CHECK-NEXT: retq
%ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
%bsr = xor i32 %ctlz, 31
ret i32 %bsr
}
-define i32 @ctlz_bsr_cmov(i32 %n) {
-entry:
-; Same as ctlz_bsr, but ensure this happens even when there is a potential
-; zero.
-; CHECK-LABEL: ctlz_bsr_cmov:
-; CHECK: bsrl
-; CHECK-NOT: xorl
-; CHECK: ret
+define i32 @ctlz_bsr_zero_test(i32 %n) {
+; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
+; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
+; codegen doesn't know how to combine the $32 and $31 into $63.
+
+; CHECK-LABEL: ctlz_bsr_zero_test:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $32, %eax
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: je .LBB11_2
+; CHECK-NEXT: # BB#1: # %cond.false
+; CHECK-NEXT: bsrl %edi, %eax
+; CHECK-NEXT: xorl $31, %eax
+; CHECK-NEXT: .LBB11_2: # %cond.end
+; CHECK-NEXT: xorl $31, %eax
+; CHECK-NEXT: retq
%ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
%bsr = xor i32 %ctlz, 31
ret i32 %bsr
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
index 584179aacbc9..eb9a29011428 100644
--- a/test/CodeGen/X86/cmp.ll
+++ b/test/CodeGen/X86/cmp.ll
@@ -211,3 +211,47 @@ define zeroext i1 @test15(i32 %bf.load, i32 %n) {
; CHECK: shrl $16, %edi
; CHECK: cmpl %esi, %edi
}
+
+define i8 @test16(i16 signext %L) {
+ %lshr = lshr i16 %L, 15
+ %trunc = trunc i16 %lshr to i8
+ %not = xor i8 %trunc, 1
+ ret i8 %not
+
+; CHECK-LABEL: test16:
+; CHECK: testw %di, %di
+; CHECK: setns %al
+}
+
+define i8 @test17(i32 %L) {
+ %lshr = lshr i32 %L, 31
+ %trunc = trunc i32 %lshr to i8
+ %not = xor i8 %trunc, 1
+ ret i8 %not
+
+; CHECK-LABEL: test17:
+; CHECK: testl %edi, %edi
+; CHECK: setns %al
+}
+
+define i8 @test18(i64 %L) {
+ %lshr = lshr i64 %L, 63
+ %trunc = trunc i64 %lshr to i8
+ %not = xor i8 %trunc, 1
+ ret i8 %not
+
+; CHECK-LABEL: test18:
+; CHECK: testq %rdi, %rdi
+; CHECK: setns %al
+}
+
+define zeroext i1 @test19(i32 %L) {
+ %lshr = lshr i32 %L, 31
+ %trunc = trunc i32 %lshr to i1
+ %not = xor i1 %trunc, 1
+ ret i1 %not
+
+; CHECK-LABEL: test19:
+; CHECK: testl %edi, %edi
+; CHECK: setns %al
+}
diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
index 61123930887b..e21ba2a14cf5 100644
--- a/test/CodeGen/X86/cmpxchg-clobber-flags.ll
+++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
@@ -1,24 +1,72 @@
-; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386
+; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f
-declare i32 @bar()
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s -check-prefix=x8664-sahf
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664-sahf
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -mcpu=corei7 %s -o - | FileCheck %s -check-prefix=x8664-sahf
-define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
-; CHECK-LABEL: test_intervening_call:
-; CHECK: cmpxchg
-; CHECK: pushf[[LQ:[lq]]]
-; CHECK-NEXT: pop[[LQ]] [[FLAGS:%.*]]
+declare i32 @foo()
+declare i32 @bar(i64)
-; CHECK-NEXT: call[[LQ]] bar
+define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
+; i386-LABEL: test_intervening_call:
+; i386: cmpxchg8b
+; i386-NEXT: pushl %eax
+; i386-NEXT: seto %al
+; i386-NEXT: lahf
+; i386-NEXT: movl %eax, [[FLAGS:%.*]]
+; i386-NEXT: popl %eax
+; i386-NEXT: movl %edx, 4(%esp)
+; i386-NEXT: movl %eax, (%esp)
+; i386-NEXT: calll bar
+; i386-NEXT: movl [[FLAGS]], %eax
+; i386-NEXT: addb $127, %al
+; i386-NEXT: sahf
+; i386-NEXT: jne
+
+; i386f-LABEL: test_intervening_call:
+; i386f: cmpxchg8b
+; i386f-NEXT: movl %eax, (%esp)
+; i386f-NEXT: movl %edx, 4(%esp)
+; i386f-NEXT: seto %al
+; i386f-NEXT: lahf
+; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
+; i386f-NEXT: calll bar
+; i386f-NEXT: movl [[FLAGS]], %eax
+; i386f-NEXT: addb $127, %al
+; i386f-NEXT: sahf
+; i386f-NEXT: jne
+
+; x8664-LABEL: test_intervening_call:
+; x8664: cmpxchgq
+; x8664: pushfq
+; x8664-NEXT: popq [[FLAGS:%.*]]
+; x8664-NEXT: movq %rax, %rdi
+; x8664-NEXT: callq bar
+; x8664-NEXT: pushq [[FLAGS]]
+; x8664-NEXT: popfq
+; x8664-NEXT: jne
+
+; x8664-sahf-LABEL: test_intervening_call:
+; x8664-sahf: cmpxchgq
+; x8664-sahf: pushq %rax
+; x8664-sahf-NEXT: seto %al
+; x8664-sahf-NEXT: lahf
+; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]]
+; x8664-sahf-NEXT: popq %rax
+; x8664-sahf-NEXT: movq %rax, %rdi
+; x8664-sahf-NEXT: callq bar
+; x8664-sahf-NEXT: movq [[FLAGS]], %rax
+; x8664-sahf-NEXT: addb $127, %al
+; x8664-sahf-NEXT: sahf
+; x8664-sahf-NEXT: jne
-; CHECK-NEXT: push[[LQ]] [[FLAGS]]
-; CHECK-NEXT: popf[[LQ]]
-; CHECK-NEXT: jne
%cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
+ %v = extractvalue { i64, i1 } %cx, 0
%p = extractvalue { i64, i1 } %cx, 1
- call i32 @bar()
+ call i32 @bar(i64 %v)
br i1 %p, label %t, label %f
t:
@@ -30,10 +78,22 @@ f:
; Interesting in producing a clobber without any function calls.
define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) {
-; CHECK-LABEL: test_control_flow:
+; i386-LABEL: test_control_flow:
+; i386: cmpxchg
+; i386-NEXT: jne
+
+; i386f-LABEL: test_control_flow:
+; i386f: cmpxchg
+; i386f-NEXT: jne
+
+; x8664-LABEL: test_control_flow:
+; x8664: cmpxchg
+; x8664-NEXT: jne
+
+; x8664-sahf-LABEL: test_control_flow:
+; x8664-sahf: cmpxchg
+; x8664-sahf-NEXT: jne
-; CHECK: cmpxchg
-; CHECK-NEXT: jne
entry:
%cmp = icmp sgt i32 %i, %j
br i1 %cmp, label %loop_start, label %cond.end
@@ -67,20 +127,54 @@ cond.end:
; This one is an interesting case because CMOV doesn't have a chain
; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) {
-; CHECK-LABEL: test_feed_cmov:
-
-; CHECK: cmpxchg
-; CHECK: pushf[[LQ:[lq]]]
-; CHECK-NEXT: pop[[LQ]] [[FLAGS:%.*]]
-
-; CHECK-NEXT: call[[LQ]] bar
+; i386-LABEL: test_feed_cmov:
+; i386: cmpxchgl
+; i386-NEXT: seto %al
+; i386-NEXT: lahf
+; i386-NEXT: movl %eax, [[FLAGS:%.*]]
+; i386-NEXT: calll foo
+; i386-NEXT: pushl %eax
+; i386-NEXT: movl [[FLAGS]], %eax
+; i386-NEXT: addb $127, %al
+; i386-NEXT: sahf
+; i386-NEXT: popl %eax
+
+; i386f-LABEL: test_feed_cmov:
+; i386f: cmpxchgl
+; i386f-NEXT: seto %al
+; i386f-NEXT: lahf
+; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
+; i386f-NEXT: calll foo
+; i386f-NEXT: pushl %eax
+; i386f-NEXT: movl [[FLAGS]], %eax
+; i386f-NEXT: addb $127, %al
+; i386f-NEXT: sahf
+; i386f-NEXT: popl %eax
+
+; x8664-LABEL: test_feed_cmov:
+; x8664: cmpxchg
+; x8664: pushfq
+; x8664-NEXT: popq [[FLAGS:%.*]]
+; x8664-NEXT: callq foo
+; x8664-NEXT: pushq [[FLAGS]]
+; x8664-NEXT: popfq
+
+; x8664-sahf-LABEL: test_feed_cmov:
+; x8664-sahf: cmpxchgl
+; x8664-sahf: seto %al
+; x8664-sahf-NEXT: lahf
+; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]]
+; x8664-sahf-NEXT: callq foo
+; x8664-sahf-NEXT: pushq %rax
+; x8664-sahf-NEXT: movq [[FLAGS]], %rax
+; x8664-sahf-NEXT: addb $127, %al
+; x8664-sahf-NEXT: sahf
+; x8664-sahf-NEXT: popq %rax
-; CHECK-NEXT: push[[LQ]] [[FLAGS]]
-; CHECK-NEXT: popf[[LQ]]
%res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
%success = extractvalue { i32, i1 } %res, 1
- %rhs = call i32 @bar()
+ %rhs = call i32 @foo()
%ret = select i1 %success, i32 %new, i32 %rhs
ret i32 %ret
diff --git a/test/CodeGen/X86/coal-sections.ll b/test/CodeGen/X86/coal-sections.ll
new file mode 100644
index 000000000000..05b2a8c8bf87
--- /dev/null
+++ b/test/CodeGen/X86/coal-sections.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
+
+; Check that *coal* sections are not emitted.
+
+; CHECK: .section __TEXT,__text,regular,pure_instructions{{$}}
+; CHECK-NEXT: .globl _foo
+
+; CHECK: .section __TEXT,__const{{$}}
+; CHECK-NEXT: .globl _a
+
+; CHECK: .section __DATA,__data{{$}}
+; CHECK-NEXT: .globl _b
+
+@a = weak_odr constant [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 16
+@b = weak global i32 5, align 4
+@g = common global i32* null, align 8
+
+; Function Attrs: nounwind ssp uwtable
+define weak i32* @foo() {
+entry:
+ store i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 0), i32** @g, align 8
+ ret i32* @b
+}
diff --git a/test/CodeGen/X86/coalescer-win64.ll b/test/CodeGen/X86/coalescer-win64.ll
new file mode 100644
index 000000000000..ff084ae5b9e0
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-win64.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -verify-coalescing | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+@fnptr = external global void ()*
+
+define void @test1() {
+entry:
+ %p = load void ()*, void ()** @fnptr
+ tail call void %p()
+ ret void
+}
+
+; CHECK-LABEL: test1{{$}}
+; CHECK: .seh_proc test1{{$}}
+; CHECK: rex64 jmpq *fnptr(%rip)
+; CHECK: .seh_endproc
diff --git a/test/CodeGen/X86/code_placement_cold_loop_blocks.ll b/test/CodeGen/X86/code_placement_cold_loop_blocks.ll
new file mode 100644
index 000000000000..592d1ce45bb6
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_cold_loop_blocks.ll
@@ -0,0 +1,122 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
+
+define void @foo() !prof !1 {
+; Test if a cold block in a loop will be placed at the end of the function
+; chain.
+;
+; CHECK-LABEL: foo:
+; CHECK: callq b
+; CHECK: callq c
+; CHECK: callq e
+; CHECK: callq f
+; CHECK: callq d
+
+entry:
+ br label %header
+
+header:
+ call void @b()
+ %call = call zeroext i1 @a()
+ br i1 %call, label %if.then, label %if.else, !prof !4
+
+if.then:
+ call void @c()
+ br label %if.end
+
+if.else:
+ call void @d()
+ br label %if.end
+
+if.end:
+ call void @e()
+ %call2 = call zeroext i1 @a()
+ br i1 %call2, label %header, label %end, !prof !5
+
+end:
+ call void @f()
+ ret void
+}
+
+define void @nested_loop_0() !prof !1 {
+; Test if a block that is cold in the inner loop but not cold in the outer loop
+; will merged to the outer loop chain.
+;
+; CHECK-LABEL: nested_loop_0:
+; CHECK: callq c
+; CHECK: callq d
+; CHECK: callq e
+; CHECK: callq b
+; CHECK: callq f
+
+entry:
+ br label %header
+
+header:
+ call void @b()
+ %call4 = call zeroext i1 @a()
+ br i1 %call4, label %header2, label %end
+
+header2:
+ call void @c()
+ %call = call zeroext i1 @a()
+ br i1 %call, label %if.then, label %if.else, !prof !2
+
+if.then:
+ call void @d()
+ %call3 = call zeroext i1 @a()
+ br i1 %call3, label %header2, label %header, !prof !3
+
+if.else:
+ call void @e()
+ %call2 = call zeroext i1 @a()
+ br i1 %call2, label %header2, label %header, !prof !3
+
+end:
+ call void @f()
+ ret void
+}
+
+define void @nested_loop_1() !prof !1 {
+; Test if a cold block in an inner loop will be placed at the end of the
+; function chain.
+;
+; CHECK-LABEL: nested_loop_1:
+; CHECK: callq b
+; CHECK: callq c
+; CHECK: callq e
+; CHECK: callq d
+
+entry:
+ br label %header
+
+header:
+ call void @b()
+ br label %header2
+
+header2:
+ call void @c()
+ %call = call zeroext i1 @a()
+ br i1 %call, label %end, label %if.else, !prof !4
+
+if.else:
+ call void @d()
+ %call2 = call zeroext i1 @a()
+ br i1 %call2, label %header2, label %header, !prof !5
+
+end:
+ call void @e()
+ ret void
+}
+
+declare zeroext i1 @a()
+declare void @b()
+declare void @c()
+declare void @d()
+declare void @e()
+declare void @f()
+
+!1 = !{!"function_entry_count", i64 1}
+!2 = !{!"branch_weights", i32 100, i32 1}
+!3 = !{!"branch_weights", i32 1, i32 10}
+!4 = !{!"branch_weights", i32 1000, i32 1}
+!5 = !{!"branch_weights", i32 100, i32 1}
diff --git a/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll b/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
new file mode 100644
index 000000000000..79b4883fb1d6
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
@@ -0,0 +1,123 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
+
+define void @foo() {
+; Test that when determining the edge probability from a node in an inner loop
+; to a node in an outer loop, the weights on edges in the inner loop should be
+; ignored if we are building the chain for the outer loop.
+;
+; CHECK-LABEL: foo:
+; CHECK: callq c
+; CHECK: callq b
+
+entry:
+ %call = call zeroext i1 @a()
+ br i1 %call, label %if.then, label %if.else, !prof !1
+
+if.then:
+ %call1 = call zeroext i1 @a()
+ br i1 %call1, label %while.body, label %if.end.1, !prof !1
+
+while.body:
+ %call2 = call zeroext i1 @a()
+ br i1 %call2, label %if.then.1, label %while.cond
+
+if.then.1:
+ call void @d()
+ br label %while.cond
+
+while.cond:
+ %call3 = call zeroext i1 @a()
+ br i1 %call3, label %while.body, label %if.end
+
+if.end.1:
+ call void @d()
+ br label %if.end
+
+if.else:
+ call void @b()
+ br label %if.end
+
+if.end:
+ call void @c()
+ ret void
+}
+
+define void @bar() {
+; Test that when determining the edge probability from a node in a loop to a
+; node in its peer loop, the weights on edges in the first loop should be
+; ignored.
+;
+; CHECK-LABEL: bar:
+; CHECK: callq c
+; CHECK: callq b
+
+entry:
+ %call = call zeroext i1 @a()
+ br i1 %call, label %if.then, label %if.else, !prof !1
+
+if.then:
+ %call1 = call zeroext i1 @a()
+ br i1 %call1, label %if.then, label %while.body, !prof !2
+
+while.body:
+ %call2 = call zeroext i1 @a()
+ br i1 %call2, label %while.body, label %if.end, !prof !2
+
+if.else:
+ call void @b()
+ br label %if.end
+
+if.end:
+ call void @c()
+ ret void
+}
+
+define void @par() {
+; Test that when determining the edge probability from a node in a loop to a
+; node in its outer loop, the weights on edges in the outer loop should be
+; ignored if we are building the chain for the inner loop.
+;
+; CHECK-LABEL: par:
+; CHECK: callq c
+; CHECK: callq d
+; CHECK: callq b
+
+entry:
+ br label %if.cond
+
+if.cond:
+ %call = call zeroext i1 @a()
+ br i1 %call, label %if.then, label %if.else, !prof !3
+
+if.then:
+ call void @b()
+ br label %if.end
+
+if.else:
+ call void @c()
+ %call1 = call zeroext i1 @a()
+ br i1 %call1, label %if.end, label %exit, !prof !4
+
+if.end:
+ call void @d()
+ %call2 = call zeroext i1 @a()
+ br i1 %call2, label %if.cond, label %if.end.2, !prof !2
+
+if.end.2:
+ call void @e()
+ br label %if.cond
+
+exit:
+ ret void
+}
+
+declare zeroext i1 @a()
+declare void @b()
+declare void @c()
+declare void @d()
+declare void @e()
+
+!1 = !{!"branch_weights", i32 10, i32 1}
+!2 = !{!"branch_weights", i32 100, i32 1}
+!3 = !{!"branch_weights", i32 1, i32 100}
+!4 = !{!"branch_weights", i32 1, i32 1}
diff --git a/test/CodeGen/X86/code_placement_loop_rotation.ll b/test/CodeGen/X86/code_placement_loop_rotation.ll
new file mode 100644
index 000000000000..3ec5961486e8
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_loop_rotation.ll
@@ -0,0 +1,80 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -precise-rotation-cost < %s | FileCheck %s -check-prefix=CHECK-PROFILE
+
+define void @foo() {
+; Test that not all edges in the loop chain are fall through without profile
+; data.
+;
+; CHECK-LABEL: foo:
+; CHECK: callq e
+; CHECK: callq f
+; CHECK: callq g
+; CHECK: callq h
+
+entry:
+ br label %header
+
+header:
+ call void @e()
+ %call = call zeroext i1 @a()
+ br i1 %call, label %if.then, label %if.else, !prof !2
+
+if.then:
+ call void @f()
+ br label %if.end
+
+if.else:
+ call void @g()
+ br label %if.end
+
+if.end:
+ call void @h()
+ %call2 = call zeroext i1 @a()
+ br i1 %call2, label %header, label %end
+
+end:
+ ret void
+}
+
+define void @bar() !prof !1 {
+; Test that all edges in the loop chain are fall through with profile data.
+;
+; CHECK-PROFILE-LABEL: bar:
+; CHECK-PROFILE: callq g
+; CHECK-PROFILE: callq h
+; CHECK-PROFILE: callq e
+; CHECK-PROFILE: callq f
+
+entry:
+ br label %header
+
+header:
+ call void @e()
+ %call = call zeroext i1 @a()
+ br i1 %call, label %if.then, label %if.else, !prof !2
+
+if.then:
+ call void @f()
+ br label %if.end
+
+if.else:
+ call void @g()
+ br label %if.end
+
+if.end:
+ call void @h()
+ %call2 = call zeroext i1 @a()
+ br i1 %call2, label %header, label %end
+
+end:
+ ret void
+}
+
+declare zeroext i1 @a()
+declare void @e()
+declare void @f()
+declare void @g()
+declare void @h()
+
+!1 = !{!"function_entry_count", i64 1}
+!2 = !{!"branch_weights", i32 16, i32 16}
diff --git a/test/CodeGen/X86/code_placement_loop_rotation2.ll b/test/CodeGen/X86/code_placement_loop_rotation2.ll
new file mode 100644
index 000000000000..6d8b3c99cd05
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_loop_rotation2.ll
@@ -0,0 +1,122 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -precise-rotation-cost < %s | FileCheck %s -check-prefix=CHECK-PROFILE
+
+define void @foo() {
+; Test a nested loop case when profile data is not available.
+;
+; CHECK-LABEL: foo:
+; CHECK: callq b
+; CHECK: callq c
+; CHECK: callq d
+; CHECK: callq e
+; CHECK: callq f
+; CHECK: callq g
+; CHECK: callq h
+
+entry:
+ br label %header
+
+header:
+ call void @b()
+ %call = call zeroext i1 @a()
+ br i1 %call, label %if.then, label %if.else, !prof !2
+
+if.then:
+ br label %header2
+
+header2:
+ call void @c()
+ %call1 = call zeroext i1 @a()
+ br i1 %call1, label %if.then2, label %if.else2, !prof !2
+
+if.then2:
+ call void @d()
+ br label %if.end2
+
+if.else2:
+ call void @e()
+ br label %if.end2
+
+if.end2:
+ call void @f()
+ %call2 = call zeroext i1 @a()
+ br i1 %call2, label %header2, label %if.end
+
+if.else:
+ call void @g()
+ br label %if.end
+
+if.end:
+ call void @h()
+ %call3 = call zeroext i1 @a()
+ br i1 %call3, label %header, label %end
+
+end:
+ ret void
+}
+
+define void @bar() !prof !1 {
+; Test a nested loop case when profile data is available.
+;
+; CHECK-PROFILE-LABEL: bar:
+; CHECK-PROFILE: callq e
+; CHECK-PROFILE: callq f
+; CHECK-PROFILE: callq c
+; CHECK-PROFILE: callq d
+; CHECK-PROFILE: callq h
+; CHECK-PROFILE: callq b
+; CHECK-PROFILE: callq g
+
+entry:
+ br label %header
+
+header:
+ call void @b()
+ %call = call zeroext i1 @a()
+ br i1 %call, label %if.then, label %if.else, !prof !2
+
+if.then:
+ br label %header2
+
+header2:
+ call void @c()
+ %call1 = call zeroext i1 @a()
+ br i1 %call1, label %if.then2, label %if.else2, !prof !2
+
+if.then2:
+ call void @d()
+ br label %if.end2
+
+if.else2:
+ call void @e()
+ br label %if.end2
+
+if.end2:
+ call void @f()
+ %call2 = call zeroext i1 @a()
+ br i1 %call2, label %header2, label %if.end
+
+if.else:
+ call void @g()
+ br label %if.end
+
+if.end:
+ call void @h()
+ %call3 = call zeroext i1 @a()
+ br i1 %call3, label %header, label %end
+
+end:
+ ret void
+}
+
+declare zeroext i1 @a()
+declare void @b()
+declare void @c()
+declare void @d()
+declare void @e()
+declare void @f()
+declare void @g()
+declare void @h()
+
+!1 = !{!"function_entry_count", i64 1}
+!2 = !{!"branch_weights", i32 16, i32 16}
diff --git a/test/CodeGen/X86/codegen-prepare-cast.ll b/test/CodeGen/X86/codegen-prepare-cast.ll
index 1ab8017e8858..c5c2d64f63d8 100644
--- a/test/CodeGen/X86/codegen-prepare-cast.ll
+++ b/test/CodeGen/X86/codegen-prepare-cast.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK-LABEL: @_Dmain
; CHECK: load i8, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0)
-; CHECK ret
+; CHECK: ret
define fastcc i32 @_Dmain(%"char[][]" %unnamed) {
entry:
%tmp = getelementptr [7 x i8], [7 x i8]* @.str, i32 0, i32 0 ; <i8*> [#uses=1]
diff --git a/test/CodeGen/X86/coff-comdat.ll b/test/CodeGen/X86/coff-comdat.ll
index 18f418959ec9..712825a99100 100644
--- a/test/CodeGen/X86/coff-comdat.ll
+++ b/test/CodeGen/X86/coff-comdat.ll
@@ -53,7 +53,7 @@ define x86_fastcallcc void @f8() comdat($f8) {
$vftable = comdat largest
@some_name = private unnamed_addr constant [2 x i8*] zeroinitializer, comdat($vftable)
-@vftable = alias getelementptr([2 x i8*], [2 x i8*]* @some_name, i32 0, i32 1)
+@vftable = alias i8*, getelementptr([2 x i8*], [2 x i8*]* @some_name, i32 0, i32 1)
; CHECK: .section .text,"xr",discard,_f1
; CHECK: .globl _f1
diff --git a/test/CodeGen/X86/combine-and.ll b/test/CodeGen/X86/combine-and.ll
index bb46ac539171..fddf18d1bdb0 100644
--- a/test/CodeGen/X86/combine-and.ll
+++ b/test/CodeGen/X86/combine-and.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s
;
; Verify that the DAGCombiner is able to fold a vector AND into a blend
diff --git a/test/CodeGen/X86/combine-avx-intrinsics.ll b/test/CodeGen/X86/combine-avx-intrinsics.ll
index f610f7fcb91e..64e081523c1f 100644
--- a/test/CodeGen/X86/combine-avx-intrinsics.ll
+++ b/test/CodeGen/X86/combine-avx-intrinsics.ll
@@ -19,24 +19,6 @@ define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0) {
; CHECK: ret
-define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) {
- %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a0, <4 x double> %a1)
- ret <4 x double> %1
-}
-; CHECK-LABEL: test_x86_avx_blendv_pd_256
-; CHECK-NOT: vblendvpd
-; CHECK: ret
-
-
-define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) {
- %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a0, <8 x float> %a1)
- ret <8 x float> %1
-}
-; CHECK-LABEL: test_x86_avx_blendv_ps_256
-; CHECK-NOT: vblendvps
-; CHECK: ret
-
-
define <4 x double> @test2_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
%1 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0)
ret <4 x double> %1
@@ -55,24 +37,6 @@ define <8 x float> @test2_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1)
; CHECK: ret
-define <4 x double> @test2_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) {
- %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> zeroinitializer)
- ret <4 x double> %1
-}
-; CHECK-LABEL: test2_x86_avx_blendv_pd_256
-; CHECK-NOT: vblendvpd
-; CHECK: ret
-
-
-define <8 x float> @test2_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) {
- %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> zeroinitializer)
- ret <8 x float> %1
-}
-; CHECK-LABEL: test2_x86_avx_blendv_ps_256
-; CHECK-NOT: vblendvps
-; CHECK: ret
-
-
define <4 x double> @test3_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
%1 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 -1)
ret <4 x double> %1
@@ -91,29 +55,6 @@ define <8 x float> @test3_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1)
; CHECK: ret
-define <4 x double> @test3_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) {
- %Mask = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <4 x double>
- %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %Mask)
- ret <4 x double> %1
-}
-; CHECK-LABEL: test3_x86_avx_blendv_pd_256
-; CHECK-NOT: vblendvpd
-; CHECK: ret
-
-
-define <8 x float> @test3_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) {
- %Mask = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <8 x float>
- %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %Mask)
- ret <8 x float> %1
-}
-; CHECK-LABEL: test3_x86_avx_blendv_ps_256
-; CHECK-NOT: vblendvps
-; CHECK: ret
-
-
-
declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32)
declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32)
-declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
-declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
diff --git a/test/CodeGen/X86/combine-avx2-intrinsics.ll b/test/CodeGen/X86/combine-avx2-intrinsics.ll
index 8794f8b86849..2714b26c9141 100644
--- a/test/CodeGen/X86/combine-avx2-intrinsics.ll
+++ b/test/CodeGen/X86/combine-avx2-intrinsics.ll
@@ -3,56 +3,6 @@
; Verify that the backend correctly combines AVX2 builtin intrinsics.
-define <8 x i32> @test_psra_1(<8 x i32> %A) {
- %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 3)
- %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
- %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 2)
- ret <8 x i32> %3
-}
-; CHECK-LABEL: test_psra_1
-; CHECK: vpsrad $8, %ymm0, %ymm0
-; CHECK-NEXT: ret
-
-define <16 x i16> @test_psra_2(<16 x i16> %A) {
- %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 3)
- %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
- %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 2)
- ret <16 x i16> %3
-}
-; CHECK-LABEL: test_psra_2
-; CHECK: vpsraw $8, %ymm0, %ymm0
-; CHECK-NEXT: ret
-
-define <16 x i16> @test_psra_3(<16 x i16> %A) {
- %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
- %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
- %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
- ret <16 x i16> %3
-}
-; CHECK-LABEL: test_psra_3
-; CHECK-NOT: vpsraw
-; CHECK: ret
-
-define <8 x i32> @test_psra_4(<8 x i32> %A) {
- %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
- %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
- %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
- ret <8 x i32> %3
-}
-; CHECK-LABEL: test_psra_4
-; CHECK-NOT: vpsrad
-; CHECK: ret
-
-
-define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
- %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a0, <32 x i8> %a1)
- ret <32 x i8> %res
-}
-; CHECK-LABEL: test_x86_avx2_pblendvb
-; CHECK-NOT: vpblendvb
-; CHECK: ret
-
-
define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0) {
%res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a0, i32 7)
ret <16 x i16> %res
@@ -80,15 +30,6 @@ define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0) {
; CHECK: ret
-define <32 x i8> @test2_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
- %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> zeroinitializer)
- ret <32 x i8> %res
-}
-; CHECK-LABEL: test2_x86_avx2_pblendvb
-; CHECK-NOT: vpblendvb
-; CHECK: ret
-
-
define <16 x i16> @test2_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
%res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 0)
ret <16 x i16> %res
@@ -116,16 +57,6 @@ define <8 x i32> @test2_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK: ret
-define <32 x i8> @test3_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
- %1 = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <32 x i8>
- %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %1)
- ret <32 x i8> %res
-}
-; CHECK-LABEL: test3_x86_avx2_pblendvb
-; CHECK-NOT: vpblendvb
-; CHECK: ret
-
-
define <16 x i16> @test3_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
%res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 -1)
ret <16 x i16> %res
@@ -153,12 +84,7 @@ define <8 x i32> @test3_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK: ret
-declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32)
declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32)
declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32)
-declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>)
-declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32)
-declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>)
-declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32)
diff --git a/test/CodeGen/X86/combine-multiplies.ll b/test/CodeGen/X86/combine-multiplies.ll
new file mode 100644
index 000000000000..5e51edbf52f9
--- /dev/null
+++ b/test/CodeGen/X86/combine-multiplies.ll
@@ -0,0 +1,163 @@
+; RUN: llc < %s -mattr=sse2 -mtriple=i386-unknown-linux-gnu | FileCheck %s
+
+; Source file looks something like this:
+;
+; typedef int AAA[100][100];
+;
+; void testCombineMultiplies(AAA a,int lll)
+; {
+; int LOC = lll + 5;
+;
+; a[LOC][LOC] = 11;
+;
+; a[LOC][20] = 22;
+; a[LOC+20][20] = 33;
+; }
+;
+; We want to make sure we don't generate 2 multiply instructions,
+; one for a[LOC][] and one for a[LOC+20]. visitMUL in DAGCombiner.cpp
+; should combine the instructions in such a way to avoid the extra
+; multiply.
+;
+; Output looks roughly like this:
+;
+; movl 8(%esp), %eax
+; movl 12(%esp), %ecx
+; imull $400, %ecx, %edx # imm = 0x190
+; leal (%edx,%eax), %esi
+; movl $11, 2020(%esi,%ecx,4)
+; movl $22, 2080(%edx,%eax)
+; movl $33, 10080(%edx,%eax)
+;
+; CHECK-LABEL: testCombineMultiplies
+; CHECK: imull $400, [[ARG1:%[a-z]+]], [[MUL:%[a-z]+]] # imm = 0x190
+; CHECK-NEXT: leal ([[MUL]],[[ARG2:%[a-z]+]]), [[LEA:%[a-z]+]]
+; CHECK-NEXT: movl $11, {{[0-9]+}}([[LEA]],[[ARG1]],4)
+; CHECK-NEXT: movl $22, {{[0-9]+}}([[MUL]],[[ARG2]])
+; CHECK-NEXT: movl $33, {{[0-9]+}}([[MUL]],[[ARG2]])
+; CHECK: retl
+;
+
+; Function Attrs: nounwind
+define void @testCombineMultiplies([100 x i32]* nocapture %a, i32 %lll) {
+entry:
+ %add = add nsw i32 %lll, 5
+ %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %a, i32 %add, i32 %add
+ store i32 11, i32* %arrayidx1, align 4
+ %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* %a, i32 %add, i32 20
+ store i32 22, i32* %arrayidx3, align 4
+ %add4 = add nsw i32 %lll, 25
+ %arrayidx6 = getelementptr inbounds [100 x i32], [100 x i32]* %a, i32 %add4, i32 20
+ store i32 33, i32* %arrayidx6, align 4
+ ret void
+}
+
+
+; Test for the same optimization on vector multiplies.
+;
+; Source looks something like this:
+;
+; typedef int v4int __attribute__((__vector_size__(16)));
+;
+; v4int x;
+; v4int v2, v3;
+; void testCombineMultiplies_splat(v4int v1) {
+; v2 = (v1 + (v4int){ 11, 11, 11, 11 }) * (v4int) {22, 22, 22, 22};
+; v3 = (v1 + (v4int){ 33, 33, 33, 33 }) * (v4int) {22, 22, 22, 22};
+; x = (v1 + (v4int){ 11, 11, 11, 11 });
+; }
+;
+; Output looks something like this:
+;
+; testCombineMultiplies_splat: # @testCombineMultiplies_splat
+; # BB#0: # %entry
+; movdqa .LCPI1_0, %xmm1 # xmm1 = [11,11,11,11]
+; paddd %xmm0, %xmm1
+; movdqa .LCPI1_1, %xmm2 # xmm2 = [22,22,22,22]
+; pshufd $245, %xmm0, %xmm3 # xmm3 = xmm0[1,1,3,3]
+; pmuludq %xmm2, %xmm0
+; pshufd $232, %xmm0, %xmm0 # xmm0 = xmm0[0,2,2,3]
+; pmuludq %xmm2, %xmm3
+; pshufd $232, %xmm3, %xmm2 # xmm2 = xmm3[0,2,2,3]
+; punpckldq %xmm2, %xmm0 # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; movdqa .LCPI1_2, %xmm2 # xmm2 = [242,242,242,242]
+; paddd %xmm0, %xmm2
+; paddd .LCPI1_3, %xmm0
+; movdqa %xmm2, v2
+; movdqa %xmm0, v3
+; movdqa %xmm1, x
+; retl
+;
+; Again, we want to make sure we don't generate two different multiplies.
+; We should have a single multiply for "v1 * {22, 22, 22, 22}" (made up of two
+; pmuludq instructions), followed by two adds. Without this optimization, we'd
+; do 2 adds, followed by 2 multiplies (i.e. 4 pmuludq instructions).
+;
+; CHECK-LABEL: testCombineMultiplies_splat
+; CHECK: movdqa .LCPI1_0, [[C11:%xmm[0-9]]]
+; CHECK-NEXT: paddd %xmm0, [[C11]]
+; CHECK-NEXT: movdqa .LCPI1_1, [[C22:%xmm[0-9]]]
+; CHECK-NEXT: pshufd $245, %xmm0, [[T1:%xmm[0-9]]]
+; CHECK-NEXT: pmuludq [[C22]], [[T2:%xmm[0-9]]]
+; CHECK-NEXT: pshufd $232, [[T2]], [[T3:%xmm[0-9]]]
+; CHECK-NEXT: pmuludq [[C22]], [[T4:%xmm[0-9]]]
+; CHECK-NEXT: pshufd $232, [[T4]], [[T5:%xmm[0-9]]]
+; CHECK-NEXT: punpckldq [[T5]], [[T6:%xmm[0-9]]]
+; CHECK-NEXT: movdqa .LCPI1_2, [[C242:%xmm[0-9]]]
+; CHECK-NEXT: paddd [[T6]], [[C242]]
+; CHECK-NEXT: paddd .LCPI1_3, [[C726:%xmm[0-9]]]
+; CHECK-NEXT: movdqa [[C242]], v2
+; CHECK-NEXT: [[C726]], v3
+; CHECK-NEXT: [[C11]], x
+; CHECK-NEXT: retl
+
+@v2 = common global <4 x i32> zeroinitializer, align 16
+@v3 = common global <4 x i32> zeroinitializer, align 16
+@x = common global <4 x i32> zeroinitializer, align 16
+
+; Function Attrs: nounwind
+define void @testCombineMultiplies_splat(<4 x i32> %v1) {
+entry:
+ %add1 = add <4 x i32> %v1, <i32 11, i32 11, i32 11, i32 11>
+ %mul1 = mul <4 x i32> %add1, <i32 22, i32 22, i32 22, i32 22>
+ %add2 = add <4 x i32> %v1, <i32 33, i32 33, i32 33, i32 33>
+ %mul2 = mul <4 x i32> %add2, <i32 22, i32 22, i32 22, i32 22>
+ store <4 x i32> %mul1, <4 x i32>* @v2, align 16
+ store <4 x i32> %mul2, <4 x i32>* @v3, align 16
+ store <4 x i32> %add1, <4 x i32>* @x, align 16
+ ret void
+}
+
+; Finally, check the non-splatted vector case. This is very similar
+; to the previous test case, except for the vector values.
+;
+; CHECK-LABEL: testCombineMultiplies_non_splat
+; CHECK: movdqa .LCPI2_0, [[C11:%xmm[0-9]]]
+; CHECK-NEXT: paddd %xmm0, [[C11]]
+; CHECK-NEXT: movdqa .LCPI2_1, [[C22:%xmm[0-9]]]
+; CHECK-NEXT: pshufd $245, %xmm0, [[T1:%xmm[0-9]]]
+; CHECK-NEXT: pmuludq [[C22]], [[T2:%xmm[0-9]]]
+; CHECK-NEXT: pshufd $232, [[T2]], [[T3:%xmm[0-9]]]
+; CHECK-NEXT: pshufd $245, [[C22]], [[T7:%xmm[0-9]]]
+; CHECK-NEXT: pmuludq [[T1]], [[T7]]
+; CHECK-NEXT: pshufd $232, [[T7]], [[T5:%xmm[0-9]]]
+; CHECK-NEXT: punpckldq [[T5]], [[T6:%xmm[0-9]]]
+; CHECK-NEXT: movdqa .LCPI2_2, [[C242:%xmm[0-9]]]
+; CHECK-NEXT: paddd [[T6]], [[C242]]
+; CHECK-NEXT: paddd .LCPI2_3, [[C726:%xmm[0-9]]]
+; CHECK-NEXT: movdqa [[C242]], v2
+; CHECK-NEXT: [[C726]], v3
+; CHECK-NEXT: [[C11]], x
+; CHECK-NEXT: retl
+; Function Attrs: nounwind
+define void @testCombineMultiplies_non_splat(<4 x i32> %v1) {
+entry:
+ %add1 = add <4 x i32> %v1, <i32 11, i32 22, i32 33, i32 44>
+ %mul1 = mul <4 x i32> %add1, <i32 22, i32 33, i32 44, i32 55>
+ %add2 = add <4 x i32> %v1, <i32 33, i32 44, i32 55, i32 66>
+ %mul2 = mul <4 x i32> %add2, <i32 22, i32 33, i32 44, i32 55>
+ store <4 x i32> %mul1, <4 x i32>* @v2, align 16
+ store <4 x i32> %mul2, <4 x i32>* @v3, align 16
+ store <4 x i32> %add1, <4 x i32>* @x, align 16
+ ret void
+}
diff --git a/test/CodeGen/X86/combine-or.ll b/test/CodeGen/X86/combine-or.ll
index 970f1762c1b8..e17cfbeeee12 100644
--- a/test/CodeGen/X86/combine-or.ll
+++ b/test/CodeGen/X86/combine-or.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
diff --git a/test/CodeGen/X86/combine-sse2-intrinsics.ll b/test/CodeGen/X86/combine-sse2-intrinsics.ll
deleted file mode 100644
index fa500e5d8d67..000000000000
--- a/test/CodeGen/X86/combine-sse2-intrinsics.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
-
-; Verify that the backend correctly combines SSE2 builtin intrinsics.
-
-
-define <4 x i32> @test_psra_1(<4 x i32> %A) {
- %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 3)
- %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
- %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %2, i32 2)
- ret <4 x i32> %3
-}
-; CHECK-LABEL: test_psra_1
-; CHECK: psrad $8, %xmm0
-; CHECK-NEXT: ret
-
-define <8 x i16> @test_psra_2(<8 x i16> %A) {
- %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 3)
- %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
- %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 2)
- ret <8 x i16> %3
-}
-; CHECK-LABEL: test_psra_2
-; CHECK: psraw $8, %xmm0
-; CHECK-NEXT: ret
-
-define <4 x i32> @test_psra_3(<4 x i32> %A) {
- %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
- %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
- %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %2, i32 0)
- ret <4 x i32> %3
-}
-; CHECK-LABEL: test_psra_3
-; CHECK-NOT: psrad
-; CHECK: ret
-
-
-define <8 x i16> @test_psra_4(<8 x i16> %A) {
- %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
- %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
- %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
- ret <8 x i16> %3
-}
-; CHECK-LABEL: test_psra_4
-; CHECK-NOT: psraw
-; CHECK: ret
-
-
-declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32)
-declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32)
-
diff --git a/test/CodeGen/X86/combine-sse41-intrinsics.ll b/test/CodeGen/X86/combine-sse41-intrinsics.ll
index 254991aec094..1916883c201b 100644
--- a/test/CodeGen/X86/combine-sse41-intrinsics.ll
+++ b/test/CodeGen/X86/combine-sse41-intrinsics.ll
@@ -19,33 +19,6 @@ define <4 x float> @test_x86_sse41_blend_ps(<4 x float> %a0, <4 x float> %a1) {
; CHECK: ret
-define <2 x double> @test_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
- %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer)
- ret <2 x double> %1
-}
-; CHECK-LABEL: test_x86_sse41_blendv_pd
-; CHECK-NOT: blendvpd
-; CHECK: ret
-
-
-define <4 x float> @test_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
- %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer)
- ret <4 x float> %1
-}
-; CHECK-LABEL: test_x86_sse41_blendv_ps
-; CHECK-NOT: blendvps
-; CHECK: ret
-
-
-define <16 x i8> @test_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1) {
- %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> zeroinitializer)
- ret <16 x i8> %1
-}
-; CHECK-LABEL: test_x86_sse41_pblendv_b
-; CHECK-NOT: pblendvb
-; CHECK: ret
-
-
define <8 x i16> @test_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) {
%1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 0)
ret <8 x i16> %1
@@ -75,39 +48,6 @@ define <4 x float> @test2_x86_sse41_blend_ps(<4 x float> %a0, <4 x float> %a1) {
; CHECK-NEXT: ret
-define <2 x double> @test2_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
- %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <2 x double>
- %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %Mask )
- ret <2 x double> %1
-}
-; CHECK-LABEL: test2_x86_sse41_blendv_pd
-; CHECK-NOT: blendvpd
-; CHECK: movaps %xmm1, %xmm0
-; CHECK-NEXT: ret
-
-
-define <4 x float> @test2_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
- %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x float>
- %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %Mask)
- ret <4 x float> %1
-}
-; CHECK-LABEL: test2_x86_sse41_blendv_ps
-; CHECK-NOT: blendvps
-; CHECK: movaps %xmm1, %xmm0
-; CHECK-NEXT: ret
-
-
-define <16 x i8> @test2_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
- %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <16 x i8>
- %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %Mask)
- ret <16 x i8> %1
-}
-; CHECK-LABEL: test2_x86_sse41_pblendv_b
-; CHECK-NOT: pblendvb
-; CHECK: movaps %xmm1, %xmm0
-; CHECK-NEXT: ret
-
-
define <8 x i16> @test2_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) {
%1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 -1)
ret <8 x i16> %1
@@ -136,33 +76,6 @@ define <4 x float> @test3_x86_sse41_blend_ps(<4 x float> %a0) {
; CHECK: ret
-define <2 x double> @test3_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
- %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a0, <2 x double> %a1 )
- ret <2 x double> %1
-}
-; CHECK-LABEL: test3_x86_sse41_blendv_pd
-; CHECK-NOT: blendvpd
-; CHECK: ret
-
-
-define <4 x float> @test3_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
- %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a0, <4 x float> %a1)
- ret <4 x float> %1
-}
-; CHECK-LABEL: test3_x86_sse41_blendv_ps
-; CHECK-NOT: blendvps
-; CHECK: ret
-
-
-define <16 x i8> @test3_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1) {
- %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> %a1)
- ret <16 x i8> %1
-}
-; CHECK-LABEL: test3_x86_sse41_pblendv_b
-; CHECK-NOT: pblendvb
-; CHECK: ret
-
-
define <8 x i16> @test3_x86_sse41_pblend_w(<8 x i16> %a0) {
%1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a0, i32 7)
ret <8 x i16> %1
@@ -174,9 +87,5 @@ define <8 x i16> @test3_x86_sse41_pblend_w(<8 x i16> %a0) {
declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32)
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32)
-declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
-declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
-declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32)
-declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>)
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index 5b01e2f4e90d..656c385e2bc7 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -39,7 +39,7 @@ define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8
entry:
; DARWIN-LABEL: t3:
; DARWIN: shlq $32, %rcx
-; DARWIN-NEXT: leaq (%rax,%rcx), %rax
+; DARWIN-NEXT: orq %rcx, %rax
; DARWIN-NEXT: shll $8
; DARWIN-NOT: leaq
%tmp21 = zext i32 %lb to i64
diff --git a/test/CodeGen/X86/constant-hoisting-and.ll b/test/CodeGen/X86/constant-hoisting-and.ll
new file mode 100644
index 000000000000..611445f4a249
--- /dev/null
+++ b/test/CodeGen/X86/constant-hoisting-and.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -O3 -march=x86-64 |FileCheck %s
+define i64 @foo(i1 %z, i64 %data1, i64 %data2)
+{
+; If constant 4294967294 is hoisted to a variable, then we won't be able to use
+; the implicit zero extension of 32-bit operations to handle the AND.
+entry:
+ %val1 = and i64 %data1, 4294967294
+ br i1 %z, label %End, label %L_val2
+
+; CHECK: andl $-2, {{.*}}
+; CHECK: andl $-2, {{.*}}
+L_val2:
+ %val2 = and i64 %data2, 4294967294
+ br label %End
+
+End:
+ %p1 = phi i64 [%val1,%entry], [%val2,%L_val2]
+ ret i64 %p1
+}
diff --git a/test/CodeGen/X86/constant-hoisting-cmp.ll b/test/CodeGen/X86/constant-hoisting-cmp.ll
new file mode 100644
index 000000000000..4e9e49487287
--- /dev/null
+++ b/test/CodeGen/X86/constant-hoisting-cmp.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -O3 -march=x86-64 |FileCheck %s
+define i64 @foo(i64 %data1, i64 %data2, i64 %data3)
+{
+; If constant 4294967295 is hoisted to a variable, then we won't be able to
+; use a shift right by 32 to optimize the compare.
+entry:
+ %val1 = add i64 %data3, 1
+ %x = icmp ugt i64 %data1, 4294967295
+ br i1 %x, label %End, label %L_val2
+
+; CHECK: shrq $32, {{.*}}
+; CHECK: shrq $32, {{.*}}
+L_val2:
+ %val2 = add i64 %data3, 2
+ %y = icmp ugt i64 %data2, 4294967295
+ br i1 %y, label %End, label %L_val3
+
+L_val3:
+ %val3 = add i64 %data3, 3
+ br label %End
+
+End:
+ %p1 = phi i64 [%val1,%entry], [%val2,%L_val2], [%val3,%L_val3]
+ ret i64 %p1
+}
diff --git a/test/CodeGen/X86/copysign-constant-magnitude.ll b/test/CodeGen/X86/copysign-constant-magnitude.ll
index 537d6298ddf4..6c577a2cfcc7 100644
--- a/test/CodeGen/X86/copysign-constant-magnitude.ll
+++ b/test/CodeGen/X86/copysign-constant-magnitude.ll
@@ -5,13 +5,13 @@ target triple = "x86_64-apple-macosx10.10.0"
define void @test_copysign_const_magnitude_d(double %X) {
; CHECK: [[SIGNMASK:L.+]]:
-; CHECK-NEXT: .quad -9223372036854775808 ## double -0.000000e+00
-; CHECK-NEXT: .quad 0 ## double 0.000000e+00
+; CHECK-NEXT: .quad -9223372036854775808 ## double -0
+; CHECK-NEXT: .quad 0 ## double 0
; CHECK: [[ZERO:L.+]]:
; CHECK-NEXT: .space 16
; CHECK: [[ONE:L.+]]:
-; CHECK-NEXT: .quad 4607182418800017408 ## double 1.000000e+00
-; CHECK-NEXT: .quad 0 ## double 0.000000e+00
+; CHECK-NEXT: .quad 4607182418800017408 ## double 1
+; CHECK-NEXT: .quad 0 ## double 0
; CHECK-LABEL: test_copysign_const_magnitude_d:
; CHECK: id
@@ -50,17 +50,17 @@ define void @test_copysign_const_magnitude_d(double %X) {
define void @test_copysign_const_magnitude_f(float %X) {
; CHECK: [[SIGNMASK:L.+]]:
-; CHECK-NEXT: .long 2147483648 ## float -0.000000e+00
-; CHECK-NEXT: .long 0 ## float 0.000000e+00
-; CHECK-NEXT: .long 0 ## float 0.000000e+00
-; CHECK-NEXT: .long 0 ## float 0.000000e+00
+; CHECK-NEXT: .long 2147483648 ## float -0
+; CHECK-NEXT: .long 0 ## float 0
+; CHECK-NEXT: .long 0 ## float 0
+; CHECK-NEXT: .long 0 ## float 0
; CHECK: [[ZERO:L.+]]:
; CHECK-NEXT: .space 16
; CHECK: [[ONE:L.+]]:
-; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
-; CHECK-NEXT: .long 0 ## float 0.000000e+00
-; CHECK-NEXT: .long 0 ## float 0.000000e+00
-; CHECK-NEXT: .long 0 ## float 0.000000e+00
+; CHECK-NEXT: .long 1065353216 ## float 1
+; CHECK-NEXT: .long 0 ## float 0
+; CHECK-NEXT: .long 0 ## float 0
+; CHECK-NEXT: .long 0 ## float 0
; CHECK-LABEL: test_copysign_const_magnitude_f:
; CHECK: id
diff --git a/test/CodeGen/X86/cppeh-nounwind.ll b/test/CodeGen/X86/cppeh-nounwind.ll
deleted file mode 100644
index d9bc001a92df..000000000000
--- a/test/CodeGen/X86/cppeh-nounwind.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck %s
-
-; Sometimes invokes of nounwind functions make it through to CodeGen, especially
-; at -O0, where Clang sometimes optimistically annotates functions as nounwind.
-; WinEHPrepare ends up outlining functions, and emitting references to LSDA
-; labels. Make sure we emit the LSDA in that case.
-
-declare i32 @__CxxFrameHandler3(...)
-declare void @nounwind_func() nounwind
-declare void @cleanup()
-
-define void @should_emit_tables() personality i32 (...)* @__CxxFrameHandler3 {
-entry:
- invoke void @nounwind_func()
- to label %done unwind label %lpad
-
-done:
- ret void
-
-lpad:
- %vals = landingpad { i8*, i32 }
- cleanup
- call void @cleanup()
- resume { i8*, i32 } %vals
-}
-
-; CHECK: _should_emit_tables:
-; CHECK: calll _nounwind_func
-; CHECK: retl
-
-; CHECK: L__ehtable$should_emit_tables:
-
-; CHECK: ___ehhandler$should_emit_tables:
-; CHECK: movl $L__ehtable$should_emit_tables, %eax
-; CHECK: jmp ___CxxFrameHandler3 # TAILCALL
diff --git a/test/CodeGen/X86/cxx_tlscc64.ll b/test/CodeGen/X86/cxx_tlscc64.ll
new file mode 100644
index 000000000000..c229521cc9a4
--- /dev/null
+++ b/test/CodeGen/X86/cxx_tlscc64.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck --check-prefix=SHRINK %s
+%struct.S = type { i8 }
+
+@sg = internal thread_local global %struct.S zeroinitializer, align 1
+@__dso_handle = external global i8
+@__tls_guard = internal thread_local unnamed_addr global i1 false
+
+declare void @_ZN1SC1Ev(%struct.S*)
+declare void @_ZN1SD1Ev(%struct.S*)
+declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
+
+; Every GPR should be saved - except rdi, rax, and rsp
+; CHECK-LABEL: _ZTW2sg
+; CHECK: pushq %r11
+; CHECK: pushq %r10
+; CHECK: pushq %r9
+; CHECK: pushq %r8
+; CHECK: pushq %rsi
+; CHECK: pushq %rdx
+; CHECK: pushq %rcx
+; CHECK: pushq %rbx
+; CHECK: callq
+; CHECK: jne
+; CHECK: callq
+; CHECK: tlv_atexit
+; CHECK: callq
+; CHECK: popq %rbx
+; CHECK: popq %rcx
+; CHECK: popq %rdx
+; CHECK: popq %rsi
+; CHECK: popq %r8
+; CHECK: popq %r9
+; CHECK: popq %r10
+; CHECK: popq %r11
+; SHRINK-LABEL: _ZTW2sg
+; SHRINK: callq
+; SHRINK: jne
+; SHRINK: pushq %r11
+; SHRINK: pushq %r10
+; SHRINK: pushq %r9
+; SHRINK: pushq %r8
+; SHRINK: pushq %rsi
+; SHRINK: pushq %rdx
+; SHRINK: pushq %rcx
+; SHRINK: pushq %rbx
+; SHRINK: callq
+; SHRINK: tlv_atexit
+; SHRINK: popq %rbx
+; SHRINK: popq %rcx
+; SHRINK: popq %rdx
+; SHRINK: popq %rsi
+; SHRINK: popq %r8
+; SHRINK: popq %r9
+; SHRINK: popq %r10
+; SHRINK: popq %r11
+; SHRINK: LBB{{.*}}:
+; SHRINK: callq
+define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() {
+ %.b.i = load i1, i1* @__tls_guard, align 1
+ br i1 %.b.i, label %__tls_init.exit, label %init.i
+
+init.i:
+ store i1 true, i1* @__tls_guard, align 1
+ tail call void @_ZN1SC1Ev(%struct.S* nonnull @sg) #2
+ %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (void (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) #2
+ br label %__tls_init.exit
+
+__tls_init.exit:
+ ret %struct.S* @sg
+}
diff --git a/test/CodeGen/X86/dag-fmf-cse.ll b/test/CodeGen/X86/dag-fmf-cse.ll
new file mode 100644
index 000000000000..ac8c5000aba4
--- /dev/null
+++ b/test/CodeGen/X86/dag-fmf-cse.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=fma -enable-unsafe-fp-math -enable-fmf-dag=1 | FileCheck %s
+
+; If fast-math-flags are propagated correctly, the mul1 expression
+; should be recognized as a factor in the last fsub, so we should
+; see a mul and add, not a mul and fma:
+; a * b - (-a * b) ---> (a * b) + (a * b)
+
+define float @fmf_should_not_break_cse(float %a, float %b) {
+; CHECK-LABEL: fmf_should_not_break_cse:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retq
+
+ %mul1 = fmul fast float %a, %b
+ %nega = fsub fast float 0.0, %a
+ %mul2 = fmul fast float %nega, %b
+ %abx2 = fsub fast float %mul1, %mul2
+ ret float %abx2
+}
+
diff --git a/test/CodeGen/X86/dag-merge-fast-accesses.ll b/test/CodeGen/X86/dag-merge-fast-accesses.ll
new file mode 100644
index 000000000000..867881d83d3f
--- /dev/null
+++ b/test/CodeGen/X86/dag-merge-fast-accesses.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-slow-unaligned-mem-16 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+slow-unaligned-mem-16 | FileCheck %s --check-prefix=SLOW
+
+; Verify that the DAGCombiner is creating unaligned 16-byte loads and stores
+; if and only if those are fast.
+
+define void @merge_const_vec_store(i64* %ptr) {
+; FAST-LABEL: merge_const_vec_store:
+; FAST: # BB#0:
+; FAST-NEXT: xorps %xmm0, %xmm0
+; FAST-NEXT: movups %xmm0, (%rdi)
+; FAST-NEXT: retq
+;
+; SLOW-LABEL: merge_const_vec_store:
+; SLOW: # BB#0:
+; SLOW-NEXT: movq $0, (%rdi)
+; SLOW-NEXT: movq $0, 8(%rdi)
+; SLOW-NEXT: retq
+
+ %idx0 = getelementptr i64, i64* %ptr, i64 0
+ %idx1 = getelementptr i64, i64* %ptr, i64 1
+
+ store i64 0, i64* %idx0, align 8
+ store i64 0, i64* %idx1, align 8
+ ret void
+}
+
+
+define void @merge_vec_element_store(<4 x double> %v, double* %ptr) {
+; FAST-LABEL: merge_vec_element_store:
+; FAST: # BB#0:
+; FAST-NEXT: movups %xmm0, (%rdi)
+; FAST-NEXT: retq
+;
+; SLOW-LABEL: merge_vec_element_store:
+; SLOW: # BB#0:
+; SLOW-NEXT: movlpd %xmm0, (%rdi)
+; SLOW-NEXT: movhpd %xmm0, 8(%rdi)
+; SLOW-NEXT: retq
+
+ %vecext0 = extractelement <4 x double> %v, i32 0
+ %vecext1 = extractelement <4 x double> %v, i32 1
+
+ %idx0 = getelementptr double, double* %ptr, i64 0
+ %idx1 = getelementptr double, double* %ptr, i64 1
+
+ store double %vecext0, double* %idx0, align 8
+ store double %vecext1, double* %idx1, align 8
+ ret void
+}
+
+
+;; TODO: FAST *should* be:
+;; movups (%rdi), %xmm0
+;; movups %xmm0, 40(%rdi)
+;; ..but is not currently. See the UseAA FIXME in DAGCombiner.cpp
+;; visitSTORE.
+
+define void @merge_vec_load_and_stores(i64 *%ptr) {
+; FAST-LABEL: merge_vec_load_and_stores:
+; FAST: # BB#0:
+; FAST-NEXT: movq (%rdi), %rax
+; FAST-NEXT: movq 8(%rdi), %rcx
+; FAST-NEXT: movq %rax, 40(%rdi)
+; FAST-NEXT: movq %rcx, 48(%rdi)
+; FAST-NEXT: retq
+;
+; SLOW-LABEL: merge_vec_load_and_stores:
+; SLOW: # BB#0:
+; SLOW-NEXT: movq (%rdi), %rax
+; SLOW-NEXT: movq 8(%rdi), %rcx
+; SLOW-NEXT: movq %rax, 40(%rdi)
+; SLOW-NEXT: movq %rcx, 48(%rdi)
+; SLOW-NEXT: retq
+
+ %idx0 = getelementptr i64, i64* %ptr, i64 0
+ %idx1 = getelementptr i64, i64* %ptr, i64 1
+
+ %ld0 = load i64, i64* %idx0, align 4
+ %ld1 = load i64, i64* %idx1, align 4
+
+ %idx4 = getelementptr i64, i64* %ptr, i64 5
+ %idx5 = getelementptr i64, i64* %ptr, i64 6
+
+ store i64 %ld0, i64* %idx4, align 4
+ store i64 %ld1, i64* %idx5, align 4
+ ret void
+}
+
diff --git a/test/CodeGen/X86/darwin-tls.ll b/test/CodeGen/X86/darwin-tls.ll
new file mode 100644
index 000000000000..ca9a998ccc75
--- /dev/null
+++ b/test/CodeGen/X86/darwin-tls.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
+
+@a = thread_local global i32 4, align 4
+
+define i32 @f2(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
+entry:
+; Parameters are in %edi, %esi, %edx, %ecx, %r8d, there is no need to save
+; these parameters except the one in %edi, before making the TLS call.
+; %edi is used to pass parameter to the TLS call.
+; CHECK-NOT: movl %r8d
+; CHECK-NOT: movl %ecx
+; CHECK-NOT: movl %edx
+; CHECK-NOT: movl %esi
+; CHECK: movq {{.*}}TLVP{{.*}}, %rdi
+; CHECK-NEXT: callq
+; CHECK-NEXT: movl (%rax),
+; CHECK-NOT: movl {{.*}}, %esi
+; CHECK-NOT: movl {{.*}}, %edx
+; CHECK-NOT: movl {{.*}}, %ecx
+; CHECK-NOT: movl {{.*}}, %r8d
+; CHECK: callq
+ %0 = load i32, i32* @a, align 4
+ %call = tail call i32 @f3(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5)
+ %add = add nsw i32 %call, %0
+ ret i32 %add
+}
+
+declare i32 @f3(i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
index 20d0129c3e89..54bd48926834 100644
--- a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
+++ b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
@@ -48,7 +48,7 @@
@.str2 = private unnamed_addr constant [2 x i8] c"-\00", align 1
; Function Attrs: uwtable
-define void @_Z3barii(i32 %param1, i32 %param2) #0 {
+define void @_Z3barii(i32 %param1, i32 %param2) #0 !dbg !24 {
entry:
%var1 = alloca %struct.AAA3, align 1
%var2 = alloca %struct.AAA3, align 1
@@ -113,7 +113,7 @@ attributes #2 = { nounwind readnone }
!llvm.module.flags = !{!44, !45}
!llvm.ident = !{!46}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !23, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !23, globals: !2, imports: !2)
!1 = !DIFile(filename: "dbg-changes-codegen-branch-folding.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4}
@@ -137,26 +137,26 @@ attributes #2 = { nounwind readnone }
!21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !22)
!22 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !"_ZTS4AAA3")
!23 = !{!24, !35, !40}
-!24 = !DISubprogram(name: "bar", linkageName: "_Z3barii", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !1, scope: !25, type: !26, function: void (i32, i32)* @_Z3barii, variables: !29)
+!24 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barii", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !1, scope: !25, type: !26, variables: !29)
!25 = !DIFile(filename: "dbg-changes-codegen-branch-folding.cpp", directory: "/tmp/dbginfo")
!26 = !DISubroutineType(types: !27)
!27 = !{null, !28, !28}
!28 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!29 = !{!30, !31, !32, !33, !34}
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "param1", line: 11, arg: 1, scope: !24, file: !25, type: !28)
-!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "param2", line: 11, arg: 2, scope: !24, file: !25, type: !28)
-!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "temp", line: 12, scope: !24, file: !25, type: !15)
-!33 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var1", line: 17, scope: !24, file: !25, type: !"_ZTS4AAA3")
-!34 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var2", line: 18, scope: !24, file: !25, type: !"_ZTS4AAA3")
-!35 = !DISubprogram(name: "operator=", linkageName: "_ZN4AAA3aSEPKc", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !"_ZTS4AAA3", type: !12, declaration: !17, variables: !36)
+!30 = !DILocalVariable(name: "param1", line: 11, arg: 1, scope: !24, file: !25, type: !28)
+!31 = !DILocalVariable(name: "param2", line: 11, arg: 2, scope: !24, file: !25, type: !28)
+!32 = !DILocalVariable(name: "temp", line: 12, scope: !24, file: !25, type: !15)
+!33 = !DILocalVariable(name: "var1", line: 17, scope: !24, file: !25, type: !"_ZTS4AAA3")
+!34 = !DILocalVariable(name: "var2", line: 18, scope: !24, file: !25, type: !"_ZTS4AAA3")
+!35 = distinct !DISubprogram(name: "operator=", linkageName: "_ZN4AAA3aSEPKc", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !"_ZTS4AAA3", type: !12, declaration: !17, variables: !36)
!36 = !{!37, !39}
-!37 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
+!37 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
!38 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS4AAA3")
-!39 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
-!40 = !DISubprogram(name: "AAA3", linkageName: "_ZN4AAA3C2EPKc", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !"_ZTS4AAA3", type: !12, declaration: !11, variables: !41)
+!39 = !DILocalVariable(name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
+!40 = distinct !DISubprogram(name: "AAA3", linkageName: "_ZN4AAA3C2EPKc", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !"_ZTS4AAA3", type: !12, declaration: !11, variables: !41)
!41 = !{!42, !43}
-!42 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
-!43 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
+!42 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
+!43 = !DILocalVariable(name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
!44 = !{i32 2, !"Dwarf Version", i32 4}
!45 = !{i32 2, !"Debug Info Version", i32 3}
!46 = !{!"clang version 3.5.0 "}
@@ -169,36 +169,36 @@ attributes #2 = { nounwind readnone }
!53 = distinct !DILexicalBlock(line: 14, column: 0, file: !1, scope: !51)
!54 = !DILocation(line: 16, scope: !53)
!55 = !DILocation(line: 17, scope: !24)
-!56 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
+!56 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
!57 = !DILocation(line: 0, scope: !40, inlinedAt: !55)
!58 = !{i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0)}
-!59 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
+!59 = !DILocalVariable(name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
!60 = !DILocation(line: 5, scope: !40, inlinedAt: !55)
!61 = !DILocation(line: 5, scope: !62, inlinedAt: !55)
!62 = distinct !DILexicalBlock(line: 5, column: 0, file: !1, scope: !40)
!63 = !DILocation(line: 18, scope: !24)
-!64 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
+!64 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
!65 = !DILocation(line: 0, scope: !40, inlinedAt: !63)
-!66 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
+!66 = !DILocalVariable(name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
!67 = !DILocation(line: 5, scope: !40, inlinedAt: !63)
!68 = !DILocation(line: 5, scope: !62, inlinedAt: !63)
!69 = !DILocation(line: 20, scope: !70)
!70 = distinct !DILexicalBlock(line: 20, column: 0, file: !1, scope: !24)
-!71 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
+!71 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
!72 = !DILocation(line: 21, scope: !70)
!73 = !DILocation(line: 0, scope: !35, inlinedAt: !72)
!74 = !{i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i64 0, i64 0)}
-!75 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
+!75 = !DILocalVariable(name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
!76 = !DILocation(line: 6, scope: !35, inlinedAt: !72)
-!77 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
+!77 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
!78 = !DILocation(line: 23, scope: !70)
!79 = !DILocation(line: 0, scope: !35, inlinedAt: !78)
!80 = !{i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0)}
-!81 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
+!81 = !DILocalVariable(name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
!82 = !DILocation(line: 6, scope: !35, inlinedAt: !78)
-!83 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
+!83 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
!84 = !DILocation(line: 24, scope: !24)
!85 = !DILocation(line: 0, scope: !35, inlinedAt: !84)
-!86 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
+!86 = !DILocalVariable(name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
!87 = !DILocation(line: 6, scope: !35, inlinedAt: !84)
!88 = !DILocation(line: 25, scope: !24)
diff --git a/test/CodeGen/X86/dbg-changes-codegen.ll b/test/CodeGen/X86/dbg-changes-codegen.ll
index b15e4bd4bf2d..bee86b4617c7 100644
--- a/test/CodeGen/X86/dbg-changes-codegen.ll
+++ b/test/CodeGen/X86/dbg-changes-codegen.ll
@@ -44,7 +44,7 @@
define zeroext i1 @_ZN3Foo3batEv(%struct.Foo* %this) #0 align 2 {
entry:
%0 = load %struct.Foo*, %struct.Foo** @pfoo, align 8
- tail call void @llvm.dbg.value(metadata %struct.Foo* %0, i64 0, metadata !62, metadata !DIExpression()), !dbg !DILocation(scope: !DISubprogram())
+ tail call void @llvm.dbg.value(metadata %struct.Foo* %0, i64 0, metadata !62, metadata !DIExpression()), !dbg !DILocation(scope: !1)
%cmp.i = icmp eq %struct.Foo* %0, %this
ret i1 %cmp.i
}
@@ -53,7 +53,7 @@ entry:
define void @_Z3bazv() #1 {
entry:
%0 = load %struct.Wibble*, %struct.Wibble** @wibble1, align 8
- tail call void @llvm.dbg.value(metadata %struct.Flibble* undef, i64 0, metadata !65, metadata !DIExpression()), !dbg !DILocation(scope: !DISubprogram())
+ tail call void @llvm.dbg.value(metadata %struct.Flibble* undef, i64 0, metadata !65, metadata !DIExpression()), !dbg !DILocation(scope: !1)
%1 = load %struct.Wibble*, %struct.Wibble** @wibble2, align 8
%cmp.i = icmp ugt %struct.Wibble* %1, %0
br i1 %cmp.i, label %if.then.i, label %_ZN7Flibble3barEP6Wibble.exit
@@ -75,9 +75,10 @@ attributes #0 = { nounwind readonly uwtable "less-precise-fpmad"="false" "no-fra
attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind readnone }
+!1 = distinct !DISubprogram()
!17 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: null)
!45 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
-!62 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "arg", line: 4, arg: 2, scope: !DISubprogram(), type: !17)
+!62 = !DILocalVariable(name: "arg", line: 4, arg: 2, scope: !1, type: !17)
!64 = !{%struct.Flibble* undef}
-!65 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 13, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !DISubprogram(), type: !45)
+!65 = !DILocalVariable(name: "this", line: 13, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !1, type: !45)
diff --git a/test/CodeGen/X86/dbg-combine.ll b/test/CodeGen/X86/dbg-combine.ll
index 5eb2ea9df513..3e78c316a06f 100644
--- a/test/CodeGen/X86/dbg-combine.ll
+++ b/test/CodeGen/X86/dbg-combine.ll
@@ -24,7 +24,7 @@
; ModuleID = 'dbg-combine.c'
; Function Attrs: nounwind uwtable
-define i32 @foo() #0 {
+define i32 @foo() #0 !dbg !4 {
entry:
%elems = alloca i32, align 4
%saved_stack = alloca i8*
@@ -74,11 +74,11 @@ attributes #2 = { nounwind }
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 227074)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 227074)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "dbg-combine.c", directory: "/home/probinson/projects/scratch")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "dbg-combine.c", directory: "/home/probinson/projects/scratch")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
@@ -86,12 +86,12 @@ attributes #2 = { nounwind }
!9 = !{i32 2, !"Dwarf Version", i32 4}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.7.0 (trunk 227074)"}
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "elems", line: 3, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "elems", line: 3, scope: !4, file: !5, type: !8)
!13 = !DIExpression()
!14 = !DILocation(line: 3, column: 8, scope: !4)
!15 = !DILocation(line: 4, column: 15, scope: !4)
!16 = !DILocation(line: 4, column: 4, scope: !4)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "array1", line: 4, scope: !4, file: !5, type: !18)
+!17 = !DILocalVariable(name: "array1", line: 4, scope: !4, file: !5, type: !18)
!18 = !DICompositeType(tag: DW_TAG_array_type, align: 32, baseType: !8, elements: !19)
!19 = !{!20}
!20 = !DISubrange(count: -1)
@@ -105,7 +105,7 @@ attributes #2 = { nounwind }
!28 = !DILocation(line: 7, column: 13, scope: !4)
!29 = !DILocation(line: 8, column: 15, scope: !4)
!30 = !DILocation(line: 8, column: 4, scope: !4)
-!31 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "array2", line: 8, scope: !4, file: !5, type: !18)
+!31 = !DILocalVariable(name: "array2", line: 8, scope: !4, file: !5, type: !18)
!32 = !DILocation(line: 8, column: 8, scope: !4)
!33 = !DILocation(line: 9, column: 4, scope: !4)
!34 = !DILocation(line: 9, column: 13, scope: !4)
diff --git a/test/CodeGen/X86/debugloc-argsize.ll b/test/CodeGen/X86/debugloc-argsize.ll
new file mode 100644
index 000000000000..0283154abab2
--- /dev/null
+++ b/test/CodeGen/X86/debugloc-argsize.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
+
+; CHECK-LABEL: _Z3foov:
+; CHECK: .loc 1 4 3 prologue_end
+; CHECK: .cfi_escape 0x2e, 0x10
+define void @_Z3foov() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !4 {
+entry:
+ tail call void @_Z3bariii(i32 0, i32 1, i32 2) #1, !dbg !10
+ invoke void @_Z3bariii(i32 4, i32 5, i32 6) #1
+ to label %try.cont unwind label %lpad, !dbg !11
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 }
+ catch i8* null, !dbg !13
+ %1 = extractvalue { i8*, i32 } %0, 0, !dbg !13
+ %2 = tail call i8* @__cxa_begin_catch(i8* %1) #2, !dbg !14
+ tail call void @__cxa_end_catch(), !dbg !15
+ br label %try.cont, !dbg !15
+
+try.cont: ; preds = %entry, %lpad
+ ret void, !dbg !17
+}
+
+; Function Attrs: optsize
+declare void @_Z3bariii(i32, i32, i32) #0
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { optsize }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 249520)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.cpp", directory: "foo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 249520)"}
+!10 = !DILocation(line: 4, column: 3, scope: !4)
+!11 = !DILocation(line: 6, column: 5, scope: !12)
+!12 = distinct !DILexicalBlock(scope: !4, file: !1, line: 5, column: 7)
+!13 = !DILocation(line: 10, column: 1, scope: !12)
+!14 = !DILocation(line: 7, column: 3, scope: !12)
+!15 = !DILocation(line: 9, column: 3, scope: !16)
+!16 = distinct !DILexicalBlock(scope: !4, file: !1, line: 7, column: 17)
+!17 = !DILocation(line: 10, column: 1, scope: !4)
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index fd07a3f55100..9543d6c4d749 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -94,3 +94,35 @@ define i8 @test9(i8 %x) nounwind {
; CHECK: shrl $11
; CHECK: ret
}
+
+define i32 @testsize1(i32 %x) minsize nounwind {
+entry:
+ %div = sdiv i32 %x, 32
+ ret i32 %div
+; CHECK-LABEL: testsize1:
+; CHECK: divl
+}
+
+define i32 @testsize2(i32 %x) minsize nounwind {
+entry:
+ %div = sdiv i32 %x, 33
+ ret i32 %div
+; CHECK-LABEL: testsize2:
+; CHECK: divl
+}
+
+define i32 @testsize3(i32 %x) minsize nounwind {
+entry:
+ %div = udiv i32 %x, 32
+ ret i32 %div
+; CHECK-LABEL: testsize3:
+; CHECK: shrl
+}
+
+define i32 @testsize4(i32 %x) minsize nounwind {
+entry:
+ %div = udiv i32 %x, 33
+ ret i32 %div
+; CHECK-LABEL: testsize4:
+; CHECK: divl
+}
diff --git a/test/CodeGen/X86/dllexport-x86_64.ll b/test/CodeGen/X86/dllexport-x86_64.ll
index bb5e92f98c7d..58e25f923971 100644
--- a/test/CodeGen/X86/dllexport-x86_64.ll
+++ b/test/CodeGen/X86/dllexport-x86_64.ll
@@ -53,22 +53,22 @@ define weak_odr dllexport void @weak1() {
; CHECK: .globl alias
; CHECK: alias = notExported
-@alias = dllexport alias void()* @notExported
+@alias = dllexport alias void(), void()* @notExported
; CHECK: .globl alias2
; CHECK: alias2 = f1
-@alias2 = dllexport alias void()* @f1
+@alias2 = dllexport alias void(), void()* @f1
; CHECK: .globl alias3
; CHECK: alias3 = notExported
-@alias3 = dllexport alias void()* @notExported
+@alias3 = dllexport alias void(), void()* @notExported
; CHECK: .weak weak_alias
; CHECK: weak_alias = f1
-@weak_alias = weak_odr dllexport alias void()* @f1
+@weak_alias = weak_odr dllexport alias void(), void()* @f1
@blob = global [6 x i8] c"\B8*\00\00\00\C3", section ".text", align 16
-@blob_alias = dllexport alias bitcast ([6 x i8]* @blob to i32 ()*)
+@blob_alias = dllexport alias i32 (), bitcast ([6 x i8]* @blob to i32 ()*)
; CHECK: .section .drectve
; WIN32: /EXPORT:f1
diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll
index 915567de5bf7..cde0955410b7 100644
--- a/test/CodeGen/X86/dllexport.ll
+++ b/test/CodeGen/X86/dllexport.ll
@@ -74,19 +74,19 @@ define weak_odr dllexport void @weak1() {
; CHECK: .globl _alias
; CHECK: _alias = _notExported
-@alias = dllexport alias void()* @notExported
+@alias = dllexport alias void(), void()* @notExported
; CHECK: .globl _alias2
; CHECK: _alias2 = _f1
-@alias2 = dllexport alias void()* @f1
+@alias2 = dllexport alias void(), void()* @f1
; CHECK: .globl _alias3
; CHECK: _alias3 = _notExported
-@alias3 = dllexport alias void()* @notExported
+@alias3 = dllexport alias void(), void()* @notExported
; CHECK: .weak _weak_alias
; CHECK: _weak_alias = _f1
-@weak_alias = weak_odr dllexport alias void()* @f1
+@weak_alias = weak_odr dllexport alias void(), void()* @f1
; CHECK: .section .drectve
; CHECK-CL-NOT: not_exported
diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll
index 27b8b1552ec1..31d2724aade3 100644
--- a/test/CodeGen/X86/dwarf-comp-dir.ll
+++ b/test/CodeGen/X86/dwarf-comp-dir.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!5}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143523)", isOptimized: true, emissionKind: 0, file: !4, enums: !2, retainedTypes: !7, subprograms: !2, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143523)", isOptimized: true, emissionKind: 0, file: !4, enums: !2, retainedTypes: !7, subprograms: !2, globals: !2)
!2 = !{}
!3 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
!4 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
diff --git a/test/CodeGen/X86/dynamic-allocas-VLAs.ll b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
index 2925f243b0e3..b0334d6a63ef 100644
--- a/test/CodeGen/X86/dynamic-allocas-VLAs.ll
+++ b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mcpu=generic -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -mcpu=generic -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s -check-prefix=FORCE-ALIGN
+; RUN: llc < %s -mcpu=generic -stackrealign -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s -check-prefix=FORCE-ALIGN
; rdar://11496434
; no VLAs or dynamic alignment
diff --git a/test/CodeGen/X86/eh-null-personality.ll b/test/CodeGen/X86/eh-null-personality.ll
new file mode 100644
index 000000000000..536f060db8d9
--- /dev/null
+++ b/test/CodeGen/X86/eh-null-personality.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+
+; We should treat non-Function personalities as the unknown personality, which
+; is usually Itanium.
+
+declare void @g()
+declare void @terminate(i8*)
+
+define void @f() personality i8* null {
+ invoke void @g()
+ to label %ret unwind label %lpad
+ret:
+ ret void
+lpad:
+ %vals = landingpad { i8*, i32 } catch i8* null
+ %ptr = extractvalue { i8*, i32 } %vals, 0
+ call void @terminate(i8* %ptr)
+ unreachable
+}
+
+; CHECK: f:
+; CHECK: callq g
+; CHECK: retq
+; CHECK: movq %rax, %rdi
+; CHECK: callq terminate
diff --git a/test/CodeGen/X86/eh_frame.ll b/test/CodeGen/X86/eh_frame.ll
index 3b792b235cb5..0472e773df56 100644
--- a/test/CodeGen/X86/eh_frame.ll
+++ b/test/CodeGen/X86/eh_frame.ll
@@ -7,8 +7,8 @@
@bar1 = constant i8* bitcast (i32* @foo to i8*), section "my_bar1", align 8
-; STATIC: .section .eh_frame,"a",@progbits
+; STATIC: .section .eh_frame,"a",@unwind
; STATIC: .section my_bar1,"a",@progbits
-; PIC: .section .eh_frame,"a",@progbits
+; PIC: .section .eh_frame,"a",@unwind
; PIC: .section my_bar1,"aw",@progbits
diff --git a/test/CodeGen/X86/emutls-pic.ll b/test/CodeGen/X86/emutls-pic.ll
new file mode 100644
index 000000000000..11676aff1892
--- /dev/null
+++ b/test/CodeGen/X86/emutls-pic.ll
@@ -0,0 +1,168 @@
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=i386-linux-android -relocation-model=pic | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck -check-prefix=X64 %s
+
+; Use my_emutls_get_address like __emutls_get_address.
+@my_emutls_v_xyz = external global i8*, align 4
+declare i8* @my_emutls_get_address(i8*)
+
+define i32 @my_get_xyz() {
+; X32-LABEL: my_get_xyz:
+; X32: movl my_emutls_v_xyz@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll my_emutls_get_address@PLT
+; X64-LABEL: my_get_xyz:
+; X64: movq my_emutls_v_xyz@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq my_emutls_get_address@PLT
+; X64-NEXT: movl (%rax), %eax
+
+entry:
+ %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*))
+ %0 = bitcast i8* %call to i32*
+ %1 = load i32, i32* %0, align 4
+ ret i32 %1
+}
+
+@i = thread_local global i32 15
+@j = internal thread_local global i32 42
+@k = internal thread_local global i32 0, align 8
+
+define i32 @f1() {
+entry:
+ %tmp1 = load i32, i32* @i
+ ret i32 %tmp1
+}
+
+; X32-LABEL: f1:
+; X32: movl __emutls_v.i@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X64-LABEL: f1:
+; X64: movq __emutls_v.i@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+; X64-NEXT: movl (%rax), %eax
+
+@i2 = external thread_local global i32
+
+define i32* @f2() {
+entry:
+ ret i32* @i
+}
+
+; X32-LABEL: f2:
+; X64-LABEL: f2:
+
+
+define i32 @f3() {
+entry:
+ %tmp1 = load i32, i32* @i ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+; X32-LABEL: f3:
+; X64-LABEL: f3:
+
+
+define i32* @f4() nounwind {
+entry:
+ ret i32* @i
+}
+
+; X32-LABEL: f4:
+; X64-LABEL: f4:
+
+
+define i32 @f5() nounwind {
+entry:
+ %0 = load i32, i32* @j, align 4
+ %1 = load i32, i32* @k, align 4
+ %add = add nsw i32 %0, %1
+ ret i32 %add
+}
+
+; X32-LABEL: f5:
+; X32: movl __emutls_v.j@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X32-NEXT: movl (%eax), %esi
+; X32-NEXT: movl __emutls_v.k@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X32-NEXT: addl (%eax), %esi
+; X32-NEXT: movl %esi, %eax
+
+; X64-LABEL: f5:
+; X64: movq __emutls_v.j@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+; X64-NEXT: movl (%rax), %ebx
+; X64-NEXT: movq __emutls_v.k@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+; X64-NEXT: addl (%rax), %ebx
+; X64-NEXT: movl %ebx, %eax
+
+;;;;; 32-bit targets
+
+; X32: .data
+; X32-LABEL: __emutls_v.i:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 4
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.i
+
+; X32: .section .rodata,
+; X32-LABEL: __emutls_t.i:
+; X32-NEXT: .long 15
+
+; X32: .data
+; X32-LABEL: __emutls_v.j:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 4
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.j
+
+; X32: .section .rodata,
+; X32-LABEL: __emutls_t.j:
+; X32-NEXT: .long 42
+
+; X32: .data
+; X32-LABEL: __emutls_v.k:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 8
+; X32-NEXT: .long 0
+; X32-NEXT: .long 0
+
+; X32-NOT: __emutls_t.k:
+
+;;;;; 64-bit targets
+
+; X64: .data
+; X64-LABEL: __emutls_v.i:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.i
+
+; X64: .section .rodata,
+; X64-LABEL: __emutls_t.i:
+; X64-NEXT: .long 15
+
+; X64: .data
+; X64-LABEL: __emutls_v.j:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.j
+
+; X64: .section .rodata,
+; X64-LABEL: __emutls_t.j:
+; X64-NEXT: .long 42
+
+; X64: .data
+; X64-LABEL: __emutls_v.k:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 8
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad 0
+
+; X64-NOT: __emutls_t.k:
diff --git a/test/CodeGen/X86/emutls-pie.ll b/test/CodeGen/X86/emutls-pie.ll
new file mode 100644
index 000000000000..45e5c38c0d8a
--- /dev/null
+++ b/test/CodeGen/X86/emutls-pie.ll
@@ -0,0 +1,131 @@
+; RUN: llc < %s -emulated-tls -march=x86 -mcpu=generic -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \
+; RUN: | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mcpu=generic -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \
+; RUN: | FileCheck -check-prefix=X64 %s
+; RUN: llc < %s -emulated-tls -march=x86 -mcpu=generic -mtriple=i386-linux-android -relocation-model=pic -enable-pie \
+; RUN: | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mcpu=generic -mtriple=x86_64-linux-android -relocation-model=pic -enable-pie \
+; RUN: | FileCheck -check-prefix=X64 %s
+
+; Use my_emutls_get_address like __emutls_get_address.
+@my_emutls_v_xyz = external global i8*, align 4
+declare i8* @my_emutls_get_address(i8*)
+
+define i32 @my_get_xyz() {
+; X32-LABEL: my_get_xyz:
+; X32: movl my_emutls_v_xyz@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll my_emutls_get_address@PLT
+; X32-NEXT: movl (%eax), %eax
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: popl %ebx
+; X32-NEXT: retl
+; X64-LABEL: my_get_xyz:
+; X64: movq my_emutls_v_xyz@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq my_emutls_get_address@PLT
+; X64-NEXT: movl (%rax), %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+
+entry:
+ %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*))
+ %0 = bitcast i8* %call to i32*
+ %1 = load i32, i32* %0, align 4
+ ret i32 %1
+}
+
+@i = thread_local global i32 15
+@i2 = external thread_local global i32
+
+define i32 @f1() {
+; X32-LABEL: f1:
+; X32: movl __emutls_v.i@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X32-NEXT: movl (%eax), %eax
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: popl %ebx
+; X32-NEXT: retl
+; X64-LABEL: f1:
+; X64: movq __emutls_v.i@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+; X64-NEXT: movl (%rax), %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+
+entry:
+ %tmp1 = load i32, i32* @i
+ ret i32 %tmp1
+}
+
+define i32* @f2() {
+; X32-LABEL: f2:
+; X32: movl __emutls_v.i@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X64-LABEL: f2:
+; X64: movq __emutls_v.i@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+
+entry:
+ ret i32* @i
+}
+
+define i32 @f3() {
+; X32-LABEL: f3:
+; X32: movl __emutls_v.i2@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X64-LABEL: f3:
+; X64: movq __emutls_v.i2@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+
+entry:
+ %tmp1 = load i32, i32* @i2
+ ret i32 %tmp1
+}
+
+define i32* @f4() {
+; X32-LABEL: f4:
+; X32: movl __emutls_v.i2@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X64-LABEL: f4:
+; X64: movq __emutls_v.i2@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+
+entry:
+ ret i32* @i2
+}
+
+;;;;; 32-bit targets
+
+; X32: .data
+; X32-LABEL: __emutls_v.i:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 4
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.i
+
+; X32: .section .rodata,
+; X32-LABEL: __emutls_t.i:
+; X32-NEXT: .long 15
+
+; X32-NOT: __emutls_v.i2
+; X32-NOT: __emutls_t.i2
+
+;;;;; 64-bit targets
+
+; X64: .data
+; X64-LABEL: __emutls_v.i:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.i
+
+; X64: .section .rodata,
+; X64-LABEL: __emutls_t.i:
+; X64-NEXT: .long 15
+
+; X64-NOT: __emutls_v.i2
+; X64-NOT: __emutls_t.i2
diff --git a/test/CodeGen/X86/emutls.ll b/test/CodeGen/X86/emutls.ll
new file mode 100644
index 000000000000..9266fe962df2
--- /dev/null
+++ b/test/CodeGen/X86/emutls.ll
@@ -0,0 +1,347 @@
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64 %s
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=x86-linux-android | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-android | FileCheck -check-prefix=X64 %s
+
+; Copied from tls.ll; emulated TLS model is not implemented
+; for *-pc-win32 and *-pc-winows targets yet.
+
+; Use my_emutls_get_address like __emutls_get_address.
+@my_emutls_v_xyz = external global i8*, align 4
+declare i8* @my_emutls_get_address(i8*)
+
+define i32 @my_get_xyz() {
+; X32-LABEL: my_get_xyz:
+; X32: movl $my_emutls_v_xyz, (%esp)
+; X32-NEXT: calll my_emutls_get_address
+; X32-NEXT: movl (%eax), %eax
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+; X64-LABEL: my_get_xyz:
+; X64: movl $my_emutls_v_xyz, %edi
+; X64-NEXT: callq my_emutls_get_address
+; X64-NEXT: movl (%rax), %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+
+entry:
+ %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*))
+ %0 = bitcast i8* %call to i32*
+ %1 = load i32, i32* %0, align 4
+ ret i32 %1
+}
+
+@i1 = thread_local global i32 15
+@i2 = external thread_local global i32
+@i3 = internal thread_local global i32 15
+@i4 = hidden thread_local global i32 15
+@i5 = external hidden thread_local global i32
+@s1 = thread_local global i16 15
+@b1 = thread_local global i8 0
+
+define i32 @f1() {
+; X32-LABEL: f1:
+; X32: movl $__emutls_v.i1, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: movl (%eax), %eax
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+; X64-LABEL: f1:
+; X64: movl $__emutls_v.i1, %edi
+; X64-NEXT: callq __emutls_get_address
+; X64-NEXT: movl (%rax), %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+
+entry:
+ %tmp1 = load i32, i32* @i1
+ ret i32 %tmp1
+}
+
+define i32* @f2() {
+; X32-LABEL: f2:
+; X32: movl $__emutls_v.i1, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+; X64-LABEL: f2:
+; X64: movl $__emutls_v.i1, %edi
+; X64-NEXT: callq __emutls_get_address
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+
+entry:
+ ret i32* @i1
+}
+
+define i32 @f3() nounwind {
+; X32-LABEL: f3:
+; X32: movl $__emutls_v.i2, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: movl (%eax), %eax
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+
+entry:
+ %tmp1 = load i32, i32* @i2
+ ret i32 %tmp1
+}
+
+define i32* @f4() {
+; X32-LABEL: f4:
+; X32: movl $__emutls_v.i2, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+
+entry:
+ ret i32* @i2
+}
+
+define i32 @f5() nounwind {
+; X32-LABEL: f5:
+; X32: movl $__emutls_v.i3, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: movl (%eax), %eax
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+
+entry:
+ %tmp1 = load i32, i32* @i3
+ ret i32 %tmp1
+}
+
+define i32* @f6() {
+; X32-LABEL: f6:
+; X32: movl $__emutls_v.i3, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+
+entry:
+ ret i32* @i3
+}
+
+define i32 @f7() {
+; X32-LABEL: f7:
+; X32: movl $__emutls_v.i4, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: movl (%eax), %eax
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+
+entry:
+ %tmp1 = load i32, i32* @i4
+ ret i32 %tmp1
+}
+
+define i32* @f8() {
+; X32-LABEL: f8:
+; X32: movl $__emutls_v.i4, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+
+entry:
+ ret i32* @i4
+}
+
+define i32 @f9() {
+; X32-LABEL: f9:
+; X32: movl $__emutls_v.i5, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: movl (%eax), %eax
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+
+entry:
+ %tmp1 = load i32, i32* @i5
+ ret i32 %tmp1
+}
+
+define i32* @f10() {
+; X32-LABEL: f10:
+; X32: movl $__emutls_v.i5, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+
+entry:
+ ret i32* @i5
+}
+
+define i16 @f11() {
+; X32-LABEL: f11:
+; X32: movl $__emutls_v.s1, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: movzwl (%eax), %eax
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+
+entry:
+ %tmp1 = load i16, i16* @s1
+ ret i16 %tmp1
+}
+
+define i32 @f12() {
+; X32-LABEL: f12:
+; X32: movl $__emutls_v.s1, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: movswl (%eax), %eax
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+
+entry:
+ %tmp1 = load i16, i16* @s1
+ %tmp2 = sext i16 %tmp1 to i32
+ ret i32 %tmp2
+}
+
+define i8 @f13() {
+; X32-LABEL: f13:
+; X32: movl $__emutls_v.b1, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: movb (%eax), %al
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+
+entry:
+ %tmp1 = load i8, i8* @b1
+ ret i8 %tmp1
+}
+
+define i32 @f14() {
+; X32-LABEL: f14:
+; X32: movl $__emutls_v.b1, (%esp)
+; X32-NEXT: calll __emutls_get_address
+; X32-NEXT: movsbl (%eax), %eax
+; X32-NEXT: addl $12, %esp
+; X32-NEXT: retl
+
+entry:
+ %tmp1 = load i8, i8* @b1
+ %tmp2 = sext i8 %tmp1 to i32
+ ret i32 %tmp2
+}
+
+;;;;;;;;;;;;;; 32-bit __emutls_v. and __emutls_t.
+
+; X32 .section .data.rel.local,
+; X32-LABEL: __emutls_v.i1:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 4
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.i1
+
+; X32 .section .rodata,
+; X32-LABEL: __emutls_t.i1:
+; X32-NEXT: .long 15
+
+; X32-NOT: __emutls_v.i2
+
+; X32 .section .data.rel.local,
+; X32-LABEL: __emutls_v.i3:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 4
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.i3
+
+; X32 .section .rodata,
+; X32-LABEL: __emutls_t.i3:
+; X32-NEXT: .long 15
+
+; X32 .section .data.rel.local,
+; X32-LABEL: __emutls_v.i4:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 4
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.i4
+
+; X32 .section .rodata,
+; X32-LABEL: __emutls_t.i4:
+; X32-NEXT: .long 15
+
+; X32-NOT: __emutls_v.i5:
+; X32 .hidden __emutls_v.i5
+; X32-NOT: __emutls_v.i5:
+
+; X32 .section .data.rel.local,
+; X32-LABEL: __emutls_v.s1:
+; X32-NEXT: .long 2
+; X32-NEXT: .long 2
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.s1
+
+; X32 .section .rodata,
+; X32-LABEL: __emutls_t.s1:
+; X32-NEXT: .short 15
+
+; X32 .section .data.rel.local,
+; X32-LABEL: __emutls_v.b1:
+; X32-NEXT: .long 1
+; X32-NEXT: .long 1
+; X32-NEXT: .long 0
+; X32-NEXT: .long 0
+
+; X32-NOT: __emutls_t.b1
+
+;;;;;;;;;;;;;; 64-bit __emutls_v. and __emutls_t.
+
+; X64 .section .data.rel.local,
+; X64-LABEL: __emutls_v.i1:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.i1
+
+; X64 .section .rodata,
+; X64-LABEL: __emutls_t.i1:
+; X64-NEXT: .long 15
+
+; X64-NOT: __emutls_v.i2
+
+; X64 .section .data.rel.local,
+; X64-LABEL: __emutls_v.i3:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.i3
+
+; X64 .section .rodata,
+; X64-LABEL: __emutls_t.i3:
+; X64-NEXT: .long 15
+
+; X64 .section .data.rel.local,
+; X64-LABEL: __emutls_v.i4:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.i4
+
+; X64 .section .rodata,
+; X64-LABEL: __emutls_t.i4:
+; X64-NEXT: .long 15
+
+; X64-NOT: __emutls_v.i5:
+; X64 .hidden __emutls_v.i5
+; X64-NOT: __emutls_v.i5:
+
+; X64 .section .data.rel.local,
+; X64-LABEL: __emutls_v.s1:
+; X64-NEXT: .quad 2
+; X64-NEXT: .quad 2
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.s1
+
+; X64 .section .rodata,
+; X64-LABEL: __emutls_t.s1:
+; X64-NEXT: .short 15
+
+; X64 .section .data.rel.local,
+; X64-LABEL: __emutls_v.b1:
+; X64-NEXT: .quad 1
+; X64-NEXT: .quad 1
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad 0
+
+; X64-NOT: __emutls_t.b1
diff --git a/test/CodeGen/X86/emutls_generic.ll b/test/CodeGen/X86/emutls_generic.ll
new file mode 100644
index 000000000000..b99a195426c2
--- /dev/null
+++ b/test/CodeGen/X86/emutls_generic.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -emulated-tls -mtriple=i686-linux-android -relocation-model=pic \
+; RUN: | FileCheck -check-prefix=X86_32 %s
+; RUN: llc < %s -emulated-tls -mtriple=x86_64-linux-android -march=x86 -relocation-model=pic \
+; RUN: | FileCheck -check-prefix=X86_32 %s
+; RUN: llc < %s -emulated-tls -mtriple=x86_64-linux-android -relocation-model=pic \
+; RUN: | FileCheck -check-prefix=X86_64 %s
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic \
+; RUN: | FileCheck %s
+
+; Make sure that TLS symbols are emitted in expected order.
+
+@external_x = external thread_local global i32, align 8
+@external_y = thread_local global i8 7, align 2
+@internal_y = internal thread_local global i64 9, align 16
+
+define i32* @get_external_x() {
+entry:
+ ret i32* @external_x
+}
+
+define i8* @get_external_y() {
+entry:
+ ret i8* @external_y
+}
+
+define i64* @get_internal_y() {
+entry:
+ ret i64* @internal_y
+}
+
+; CHECK-LABEL: get_external_x:
+; CHECK-NOT: _tls_get_address
+; CHECK: __emutls_get_address
+; CHECK-LABEL: get_external_y:
+; CHECK: __emutls_get_address
+; CHECK-NOT: _tls_get_address
+; CHECK-LABEL: get_internal_y:
+; CHECK-NOT: __emutls_t.external_x:
+; CHECK-NOT: __emutls_v.external_x:
+; CHECK-LABEL: __emutls_v.external_y:
+; CHECK-LABEL: __emutls_t.external_y:
+; CHECK: __emutls_t.external_y
+; CHECK-LABEL: __emutls_v.internal_y:
+; CHECK-LABEL: __emutls_t.internal_y:
+; CHECK: __emutls_t.internal_y
+
+; X86_32-LABEL: get_external_x:
+; X86_32: movl __emutls_v.external_x
+; X86_32: calll __emutls_get_address
+; X86_32-LABEL: get_external_y:
+; X86_32: movl __emutls_v.external_y
+; X86_32: calll __emutls_get_address
+; X86_32-LABEL: get_internal_y:
+; X86_32: movl __emutls_v.internal_y
+; X86_32: calll __emutls_get_address
+; X86_32-NOT: __emutls_t.external_x
+; X86_32-NOT: __emutls_v.external_x:
+; X86_32: .data
+; X86_32: .align 4
+; X86_32-LABEL: __emutls_v.external_y:
+; X86_32-NEXT: .long 1
+; X86_32-NEXT: .long 2
+; X86_32-NEXT: .long 0
+; X86_32-NEXT: .long __emutls_t.external_y
+; X86_32: .section .rodata,
+; X86_32-LABEL: __emutls_t.external_y:
+; X86_32-NEXT: .byte 7
+; X86_32: .data
+; X86_32: .align 4
+; X86_32-LABEL: __emutls_v.internal_y:
+; X86_32-NEXT: .long 8
+; X86_32-NEXT: .long 16
+; X86_32-NEXT: .long 0
+; X86_32-NEXT: .long __emutls_t.internal_y
+; X86_32-LABEL: __emutls_t.internal_y:
+; X86_32-NEXT: .quad 9
+; X86_64-LABEL: get_external_x:
+; X86_64: __emutls_v.external_x
+; X86_64: __emutls_get_address
+; X86_64-LABEL: get_external_y:
+; X86_64: __emutls_v.external_y
+; X86_64: __emutls_get_address
+; X86_64-LABEL: get_internal_y:
+; X86_64: __emutls_v.internal_y
+; X86_64: __emutls_get_address
+; X86_64-NOT: __emutls_t.external_x
+; X86_64-NOT: __emutls_v.external_x:
+; X86_64: .align 8
+; X86_64-LABEL: __emutls_v.external_y:
+; X86_64-NEXT: .quad 1
+; X86_64-NEXT: .quad 2
+; X86_64-NEXT: .quad 0
+; X86_64-NEXT: .quad __emutls_t.external_y
+; X86_64-NOT: __emutls_v.external_x:
+; X86_64: .section .rodata,
+; X86_64-LABEL: __emutls_t.external_y:
+; X86_64-NEXT: .byte 7
+; X86_64: .data
+; X86_64: .align 8
+; X86_64-LABEL: __emutls_v.internal_y:
+; X86_64-NEXT: .quad 8
+; X86_64-NEXT: .quad 16
+; X86_64-NEXT: .quad 0
+; X86_64-NEXT: .quad __emutls_t.internal_y
+; X86_64: .section .rodata,
+; X86_64-LABEL: __emutls_t.internal_y:
+; X86_64-NEXT: .quad 9
diff --git a/test/CodeGen/X86/exedeps-movq.ll b/test/CodeGen/X86/exedeps-movq.ll
index a5873be6f27f..c1c60981edf5 100644
--- a/test/CodeGen/X86/exedeps-movq.ll
+++ b/test/CodeGen/X86/exedeps-movq.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX
@@ -66,3 +67,21 @@ define void @store_int(<4 x i32> %x, <2 x float>* %p) {
ret void
}
+define void @store_h_double(<2 x double> %x, i64* %p) {
+; SSE-LABEL: store_h_double:
+; SSE: # BB#0:
+; SSE-NEXT: addpd %xmm0, %xmm0
+; SSE-NEXT: movhpd %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: store_h_double:
+; AVX: # BB#0:
+; AVX-NEXT: vaddpd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vmovhpd %xmm0, (%rdi)
+; AVX-NEXT: retq
+ %a = fadd <2 x double> %x, %x
+ %b = extractelement <2 x double> %a, i32 1
+ %c = bitcast double %b to i64
+ store i64 %c, i64* %p
+ ret void
+}
diff --git a/test/CodeGen/X86/expand-vr64-gr64-copy.mir b/test/CodeGen/X86/expand-vr64-gr64-copy.mir
new file mode 100644
index 000000000000..8ce1c7eaae70
--- /dev/null
+++ b/test/CodeGen/X86/expand-vr64-gr64-copy.mir
@@ -0,0 +1,36 @@
+# RUN: llc -run-pass postrapseudos -mtriple=x86_64-unknown-unknown -mattr=+3dnow -o /dev/null %s | FileCheck %s
+# This test verifies that the ExpandPostRA pass expands the GR64 <-> VR64
+# copies into appropriate MMX_MOV instructions.
+
+--- |
+
+ define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
+ entry:
+ %0 = bitcast <2 x i32> %a to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
+ %2 = bitcast x86_mmx %1 to <2 x i32>
+ ret <2 x i32> %2
+ }
+
+ declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone
+
+...
+---
+name: test_pswapdsi
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: %xmm0
+
+ %xmm0 = PSHUFDri killed %xmm0, -24
+ MOVPQI2QImr %rsp, 1, _, -8, _, killed %xmm0
+ %mm0 = PSWAPDrm %rsp, 1, _, -8, _
+ ; CHECK: %rax = MMX_MOVD64from64rr %mm0
+ ; CHECK-NEXT: %mm0 = MMX_MOVD64to64rr %rax
+ %rax = COPY %mm0
+ %mm0 = COPY %rax
+ MMX_MOVQ64mr %rsp, 1, _, -16, _, killed %mm0
+ %xmm0 = MOVQI2PQIrm %rsp, 1, _, -16, _
+ %xmm0 = PSHUFDri killed %xmm0, -44
+ RETQ %xmm0
+...
diff --git a/test/CodeGen/X86/extractelement-legalization-cycle.ll b/test/CodeGen/X86/extractelement-legalization-cycle.ll
new file mode 100644
index 000000000000..d75f03ba1680
--- /dev/null
+++ b/test/CodeGen/X86/extractelement-legalization-cycle.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; When the extractelement is converted to a load the store can be re-used.
+; This will, however, introduce a cycle into the selection DAG (the load
+; of the extractelement index is dependent on the store, and so after the
+; conversion it becomes dependent on the new load, which is dependent on
+; the index). Make sure we skip the store, and conservatively instead
+; use a store to the stack.
+
+define float @foo(i32* %i, <4 x float>* %v) {
+; CHECK-LABEL: foo:
+; CHECK: movaps %xmm0, -[[OFFSET:[0-9]+]](%rsp)
+; CHECK: movss -[[OFFSET]](%rsp,{{.*}}), %xmm0 {{.*}}
+; CHECK-NEXT: retq
+ %1 = load <4 x float>, <4 x float>* %v, align 16
+ %mul = fmul <4 x float> %1, %1
+ store <4 x float> %mul, <4 x float>* %v, align 16
+ %2 = load i32, i32* %i, align 4
+ %vecext = extractelement <4 x float> %mul, i32 %2
+ ret float %vecext
+}
diff --git a/test/CodeGen/X86/extractelement-shuffle.ll b/test/CodeGen/X86/extractelement-shuffle.ll
index d1ba9a845800..1b04c41d5c6f 100644
--- a/test/CodeGen/X86/extractelement-shuffle.ll
+++ b/test/CodeGen/X86/extractelement-shuffle.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s
+; REQUIRES: default_triple
; Examples that exhibits a bug in DAGCombine. The case is triggered by the
; following program. The bug is DAGCombine assumes that the bit convert
diff --git a/test/CodeGen/X86/fadd-combines.ll b/test/CodeGen/X86/fadd-combines.ll
new file mode 100644
index 000000000000..2df0e06dc252
--- /dev/null
+++ b/test/CodeGen/X86/fadd-combines.ll
@@ -0,0 +1,224 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+define float @fadd_zero_f32(float %x) #0 {
+; CHECK-LABEL: fadd_zero_f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: retq
+ %y = fadd float %x, 0.0
+ ret float %y
+}
+
+define <4 x float> @fadd_zero_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_zero_4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: retq
+ %y = fadd <4 x float> %x, zeroinitializer
+ ret <4 x float> %y
+}
+
+; CHECK: float 3
+define float @fadd_2const_f32(float %x) #0 {
+; CHECK-LABEL: fadd_2const_f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: addss {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fadd float %x, 1.0
+ %z = fadd float %y, 2.0
+ ret float %z
+}
+
+; CHECK: float 5
+; CHECK: float 5
+; CHECK: float 5
+; CHECK: float 5
+define <4 x float> @fadd_2const_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_2const_4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: addps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %z = fadd <4 x float> %y, <float 4.0, float 3.0, float 2.0, float 1.0>
+ ret <4 x float> %z
+}
+
+; CHECK: float 3
+define float @fadd_x_fmul_x_c_f32(float %x) #0 {
+; CHECK-LABEL: fadd_x_fmul_x_c_f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fmul float %x, 2.0
+ %z = fadd float %x, %y
+ ret float %z
+}
+
+; CHECK: float 2
+; CHECK: float 3
+; CHECK: float 4
+; CHECK: float 5
+define <4 x float> @fadd_x_fmul_x_c_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_x_fmul_x_c_4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %z = fadd <4 x float> %x, %y
+ ret <4 x float> %z
+}
+
+; CHECK: float 3
+define float @fadd_fmul_x_c_x_f32(float %x) #0 {
+; CHECK-LABEL: fadd_fmul_x_c_x_f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fmul float %x, 2.0
+ %z = fadd float %y, %x
+ ret float %z
+}
+
+; CHECK: float 2
+; CHECK: float 3
+; CHECK: float 4
+; CHECK: float 5
+define <4 x float> @fadd_fmul_x_c_x_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_fmul_x_c_x_4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %z = fadd <4 x float> %y, %x
+ ret <4 x float> %z
+}
+
+; CHECK: float 4
+define float @fadd_fadd_x_x_fmul_x_c_f32(float %x) #0 {
+; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fadd float %x, %x
+ %z = fmul float %x, 2.0
+ %w = fadd float %y, %z
+ ret float %w
+}
+
+; CHECK: float 3
+; CHECK: float 4
+; CHECK: float 5
+; CHECK: float 6
+define <4 x float> @fadd_fadd_x_x_fmul_x_c_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fadd <4 x float> %x, %x
+ %z = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %w = fadd <4 x float> %y, %z
+ ret <4 x float> %w
+}
+
+; CHECK: float 4
+define float @fadd_fmul_x_c_fadd_x_x_f32(float %x) #0 {
+; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fadd float %x, %x
+ %z = fmul float %x, 2.0
+ %w = fadd float %z, %y
+ ret float %w
+}
+
+; CHECK: float 3
+; CHECK: float 4
+; CHECK: float 5
+; CHECK: float 6
+define <4 x float> @fadd_fmul_x_c_fadd_x_x_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fadd <4 x float> %x, %x
+ %z = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %w = fadd <4 x float> %z, %y
+ ret <4 x float> %w
+}
+
+; CHECK: float 3
+define float @fadd_x_fadd_x_x_f32(float %x) #0 {
+; CHECK-LABEL: fadd_x_fadd_x_x_f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fadd float %x, %x
+ %z = fadd float %x, %y
+ ret float %z
+}
+
+; CHECK: float 3
+; CHECK: float 3
+; CHECK: float 3
+; CHECK: float 3
+define <4 x float> @fadd_x_fadd_x_x_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_x_fadd_x_x_4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fadd <4 x float> %x, %x
+ %z = fadd <4 x float> %x, %y
+ ret <4 x float> %z
+}
+
+; CHECK: float 3
+define float @fadd_fadd_x_x_x_f32(float %x) #0 {
+; CHECK-LABEL: fadd_fadd_x_x_x_f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fadd float %x, %x
+ %z = fadd float %y, %x
+ ret float %z
+}
+
+; CHECK: float 3
+; CHECK: float 3
+; CHECK: float 3
+; CHECK: float 3
+define <4 x float> @fadd_fadd_x_x_x_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_fadd_x_x_x_4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fadd <4 x float> %x, %x
+ %z = fadd <4 x float> %y, %x
+ ret <4 x float> %z
+}
+
+; CHECK: float 4
+define float @fadd_fadd_x_x_fadd_x_x_f32(float %x) #0 {
+; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fadd float %x, %x
+ %z = fadd float %y, %y
+ ret float %z
+}
+
+; CHECK: float 4
+; CHECK: float 4
+; CHECK: float 4
+; CHECK: float 4
+define <4 x float> @fadd_fadd_x_x_fadd_x_x_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %y = fadd <4 x float> %x, %x
+ %z = fadd <4 x float> %y, %y
+ ret <4 x float> %z
+}
+
+attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/fast-isel-bitcasts-avx.ll b/test/CodeGen/X86/fast-isel-bitcasts-avx.ll
new file mode 100644
index 000000000000..03cefbc86822
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-bitcasts-avx.ll
@@ -0,0 +1,244 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel -fast-isel-abort=1 -asm-verbose=0 | FileCheck %s
+;
+; Bitcasts between 256-bit vector types are no-ops since no instruction is
+; needed for the conversion.
+
+define <4 x i64> @v8i32_to_v4i64(<8 x i32> %a) {
+;CHECK-LABEL: v8i32_to_v4i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i32> %a to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @v16i16_to_v4i64(<16 x i16> %a) {
+;CHECK-LABEL: v16i16_to_v4i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i16> %a to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @v32i8_to_v4i64(<32 x i8> %a) {
+;CHECK-LABEL: v32i8_to_v4i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <32 x i8> %a to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @v4f64_to_v4i64(<4 x double> %a) {
+;CHECK-LABEL: v4f64_to_v4i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x double> %a to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @v8f32_to_v4i64(<8 x float> %a) {
+;CHECK-LABEL: v8f32_to_v4i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x float> %a to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define <8 x i32> @v4i64_to_v8i32(<4 x i64> %a) {
+;CHECK-LABEL: v4i64_to_v8i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x i64> %a to <8 x i32>
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @v16i16_to_v8i32(<16 x i16> %a) {
+;CHECK-LABEL: v16i16_to_v8i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i16> %a to <8 x i32>
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @v32i8_to_v8i32(<32 x i8> %a) {
+;CHECK-LABEL: v32i8_to_v8i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <32 x i8> %a to <8 x i32>
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @v4f64_to_v8i32(<4 x double> %a) {
+;CHECK-LABEL: v4f64_to_v8i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x double> %a to <8 x i32>
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @v8f32_to_v8i32(<8 x float> %a) {
+;CHECK-LABEL: v8f32_to_v8i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x float> %a to <8 x i32>
+ ret <8 x i32> %1
+}
+
+define <16 x i16> @v4i64_to_v16i16(<4 x i64> %a) {
+;CHECK-LABEL: v4i64_to_v16i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x i64> %a to <16 x i16>
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @v8i32_to_v16i16(<8 x i32> %a) {
+;CHECK-LABEL: v8i32_to_v16i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i32> %a to <16 x i16>
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @v32i8_to_v16i16(<32 x i8> %a) {
+;CHECK-LABEL: v32i8_to_v16i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <32 x i8> %a to <16 x i16>
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @v4f64_to_v16i16(<4 x double> %a) {
+;CHECK-LABEL: v4f64_to_v16i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x double> %a to <16 x i16>
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @v8f32_to_v16i16(<8 x float> %a) {
+;CHECK-LABEL: v8f32_to_v16i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x float> %a to <16 x i16>
+ ret <16 x i16> %1
+}
+
+define <32 x i8> @v16i16_to_v32i8(<16 x i16> %a) {
+;CHECK-LABEL: v16i16_to_v32i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i16> %a to <32 x i8>
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @v4i64_to_v32i8(<4 x i64> %a) {
+;CHECK-LABEL: v4i64_to_v32i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x i64> %a to <32 x i8>
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @v8i32_to_v32i8(<8 x i32> %a) {
+;CHECK-LABEL: v8i32_to_v32i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i32> %a to <32 x i8>
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @v4f64_to_v32i8(<4 x double> %a) {
+;CHECK-LABEL: v4f64_to_v32i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x double> %a to <32 x i8>
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @v8f32_to_v32i8(<8 x float> %a) {
+;CHECK-LABEL: v8f32_to_v32i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x float> %a to <32 x i8>
+ ret <32 x i8> %1
+}
+
+define <8 x float> @v32i8_to_v8f32(<32 x i8> %a) {
+;CHECK-LABEL: v32i8_to_v8f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <32 x i8> %a to <8 x float>
+ ret <8 x float> %1
+}
+
+define <8 x float> @v16i16_to_v8f32(<16 x i16> %a) {
+;CHECK-LABEL: v16i16_to_v8f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i16> %a to <8 x float>
+ ret <8 x float> %1
+}
+
+define <8 x float> @v4i64_to_v8f32(<4 x i64> %a) {
+;CHECK-LABEL: v4i64_to_v8f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x i64> %a to <8 x float>
+ ret <8 x float> %1
+}
+
+define <8 x float> @v8i32_to_v8f32(<8 x i32> %a) {
+;CHECK-LABEL: v8i32_to_v8f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i32> %a to <8 x float>
+ ret <8 x float> %1
+}
+
+define <8 x float> @v4f64_to_v8f32(<4 x double> %a) {
+;CHECK-LABEL: v4f64_to_v8f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x double> %a to <8 x float>
+ ret <8 x float> %1
+}
+
+define <4 x double> @v8f32_to_v4f64(<8 x float> %a) {
+;CHECK-LABEL: v8f32_to_v4f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x float> %a to <4 x double>
+ ret <4 x double> %1
+}
+
+define <4 x double> @v32i8_to_v4f64(<32 x i8> %a) {
+;CHECK-LABEL: v32i8_to_v4f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <32 x i8> %a to <4 x double>
+ ret <4 x double> %1
+}
+
+define <4 x double> @v16i16_to_v4f64(<16 x i16> %a) {
+;CHECK-LABEL: v16i16_to_v4f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i16> %a to <4 x double>
+ ret <4 x double> %1
+}
+
+define <4 x double> @v4i64_to_v4f64(<4 x i64> %a) {
+;CHECK-LABEL: v4i64_to_v4f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x i64> %a to <4 x double>
+ ret <4 x double> %1
+}
+
+define <4 x double> @v8i32_to_v4f64(<8 x i32> %a) {
+;CHECK-LABEL: v8i32_to_v4f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i32> %a to <4 x double>
+ ret <4 x double> %1
+}
diff --git a/test/CodeGen/X86/fast-isel-bitcasts.ll b/test/CodeGen/X86/fast-isel-bitcasts.ll
new file mode 100644
index 000000000000..892b517fe873
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-bitcasts.ll
@@ -0,0 +1,245 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel -fast-isel-abort=1 -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel -fast-isel-abort=1 -asm-verbose=0 | FileCheck %s
+;
+; Bitcasts between 128-bit vector types are no-ops since no instruction is
+; needed for the conversion.
+
+define <2 x i64> @v4i32_to_v2i64(<4 x i32> %a) {
+;CHECK-LABEL: v4i32_to_v2i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x i32> %a to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @v8i16_to_v2i64(<8 x i16> %a) {
+;CHECK-LABEL: v8i16_to_v2i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i16> %a to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @v16i8_to_v2i64(<16 x i8> %a) {
+;CHECK-LABEL: v16i8_to_v2i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i8> %a to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @v2f64_to_v2i64(<2 x double> %a) {
+;CHECK-LABEL: v2f64_to_v2i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <2 x double> %a to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @v4f32_to_v2i64(<4 x float> %a) {
+;CHECK-LABEL: v4f32_to_v2i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x float> %a to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <4 x i32> @v2i64_to_v4i32(<2 x i64> %a) {
+;CHECK-LABEL: v2i64_to_v4i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <2 x i64> %a to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @v8i16_to_v4i32(<8 x i16> %a) {
+;CHECK-LABEL: v8i16_to_v4i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i16> %a to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @v16i8_to_v4i32(<16 x i8> %a) {
+;CHECK-LABEL: v16i8_to_v4i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i8> %a to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @v2f64_to_v4i32(<2 x double> %a) {
+;CHECK-LABEL: v2f64_to_v4i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <2 x double> %a to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @v4f32_to_v4i32(<4 x float> %a) {
+;CHECK-LABEL: v4f32_to_v4i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x float> %a to <4 x i32>
+ ret <4 x i32> %1
+}
+
+define <8 x i16> @v2i64_to_v8i16(<2 x i64> %a) {
+;CHECK-LABEL: v2i64_to_v8i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <2 x i64> %a to <8 x i16>
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @v4i32_to_v8i16(<4 x i32> %a) {
+;CHECK-LABEL: v4i32_to_v8i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x i32> %a to <8 x i16>
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @v16i8_to_v8i16(<16 x i8> %a) {
+;CHECK-LABEL: v16i8_to_v8i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i8> %a to <8 x i16>
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @v2f64_to_v8i16(<2 x double> %a) {
+;CHECK-LABEL: v2f64_to_v8i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <2 x double> %a to <8 x i16>
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @v4f32_to_v8i16(<4 x float> %a) {
+;CHECK-LABEL: v4f32_to_v8i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x float> %a to <8 x i16>
+ ret <8 x i16> %1
+}
+
+define <16 x i8> @v8i16_to_v16i8(<8 x i16> %a) {
+;CHECK-LABEL: v8i16_to_v16i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i16> %a to <16 x i8>
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @v2i64_to_v16i8(<2 x i64> %a) {
+;CHECK-LABEL: v2i64_to_v16i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <2 x i64> %a to <16 x i8>
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @v4i32_to_v16i8(<4 x i32> %a) {
+;CHECK-LABEL: v4i32_to_v16i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x i32> %a to <16 x i8>
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @v2f64_to_v16i8(<2 x double> %a) {
+;CHECK-LABEL: v2f64_to_v16i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <2 x double> %a to <16 x i8>
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @v4f32_to_v16i8(<4 x float> %a) {
+;CHECK-LABEL: v4f32_to_v16i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x float> %a to <16 x i8>
+ ret <16 x i8> %1
+}
+
+define <4 x float> @v16i8_to_v4f32(<16 x i8> %a) {
+;CHECK-LABEL: v16i8_to_v4f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i8> %a to <4 x float>
+ ret <4 x float> %1
+}
+
+define <4 x float> @v8i16_to_v4f32(<8 x i16> %a) {
+;CHECK-LABEL: v8i16_to_v4f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i16> %a to <4 x float>
+ ret <4 x float> %1
+}
+
+define <4 x float> @v2i64_to_v4f32(<2 x i64> %a) {
+;CHECK-LABEL: v2i64_to_v4f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <2 x i64> %a to <4 x float>
+ ret <4 x float> %1
+}
+
+define <4 x float> @v4i32_to_v4f32(<4 x i32> %a) {
+;CHECK-LABEL: v4i32_to_v4f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x i32> %a to <4 x float>
+ ret <4 x float> %1
+}
+
+define <4 x float> @v2f64_to_v4f32(<2 x double> %a) {
+;CHECK-LABEL: v2f64_to_v4f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <2 x double> %a to <4 x float>
+ ret <4 x float> %1
+}
+
+define <2 x double> @v4f32_to_v2f64(<4 x float> %a) {
+;CHECK-LABEL: v4f32_to_v2f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x float> %a to <2 x double>
+ ret <2 x double> %1
+}
+
+define <2 x double> @v16i8_to_v2f64(<16 x i8> %a) {
+;CHECK-LABEL: v16i8_to_v2f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <16 x i8> %a to <2 x double>
+ ret <2 x double> %1
+}
+
+define <2 x double> @v8i16_to_v2f64(<8 x i16> %a) {
+;CHECK-LABEL: v8i16_to_v2f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <8 x i16> %a to <2 x double>
+ ret <2 x double> %1
+}
+
+define <2 x double> @v2i64_to_v2f64(<2 x i64> %a) {
+;CHECK-LABEL: v2i64_to_v2f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <2 x i64> %a to <2 x double>
+ ret <2 x double> %1
+}
+
+define <2 x double> @v4i32_to_v2f64(<4 x i32> %a) {
+;CHECK-LABEL: v4i32_to_v2f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+ %1 = bitcast <4 x i32> %a to <2 x double>
+ ret <2 x double> %1
+}
diff --git a/test/CodeGen/X86/fast-isel-cmp-branch.ll b/test/CodeGen/X86/fast-isel-cmp-branch.ll
index d7b64ed3a5b8..e262448468eb 100644
--- a/test/CodeGen/X86/fast-isel-cmp-branch.ll
+++ b/test/CodeGen/X86/fast-isel-cmp-branch.ll
@@ -1,5 +1,18 @@
-; RUN: llc -O0 -mtriple=x86_64-linux -asm-verbose=false < %s | FileCheck %s
-; RUN: llc -O0 -mtriple=x86_64-windows-itanium -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=x86_64-linux -asm-verbose=false -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=x86_64-windows-itanium -asm-verbose=false -verify-machineinstrs < %s | FileCheck %s
+
+; Fast-isel mustn't add a block to the MBB successor/predecessor list twice.
+; The machine verifier will catch and complain about this case.
+; CHECK-LABEL: baz
+; CHECK: retq
+define void @baz() {
+entry:
+ br i1 undef, label %exit, label %exit
+
+exit:
+ ret void
+}
+
; rdar://8337108
; Fast-isel shouldn't try to look through the compare because it's in a
diff --git a/test/CodeGen/X86/fast-isel-deadcode.ll b/test/CodeGen/X86/fast-isel-deadcode.ll
new file mode 100644
index 000000000000..0a53d60f8352
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-deadcode.ll
@@ -0,0 +1,147 @@
+; RUN: llc < %s | FileCheck %s
+;
+; Generated with clang -O2 -S -emit-llvm
+;
+; /* Test 1 */
+; extern "C" bool bar (long double);
+; __attribute__((optnone))
+; extern "C" bool foo(long double x, long double y)
+; {
+; return (x == y) || (bar(x));
+; }
+;
+; /* Test 2 */
+; struct FVector {
+; float x, y, z;
+; inline __attribute__((always_inline)) FVector(float f): x(f), y(f), z(f) {}
+; inline __attribute__((always_inline)) FVector func(float p) const
+; {
+; if( x == 1.f ) {
+; return *this;
+; } else if( x < p ) {
+; return FVector(0.f);
+; }
+; return FVector(x);
+; }
+; };
+;
+; __attribute__((optnone))
+; int main()
+; {
+; FVector v(1.0);
+; v = v.func(1.e-8);
+; return 0;
+; }
+;
+; ModuleID = 'test.cpp'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.FVector = type { float, float, float }
+
+define zeroext i1 @foo(x86_fp80 %x, x86_fp80 %y) noinline optnone {
+entry:
+ %x.addr = alloca x86_fp80, align 16
+ %y.addr = alloca x86_fp80, align 16
+ store x86_fp80 %x, x86_fp80* %x.addr, align 16
+ store x86_fp80 %y, x86_fp80* %y.addr, align 16
+ %0 = load x86_fp80, x86_fp80* %x.addr, align 16
+ %1 = load x86_fp80, x86_fp80* %y.addr, align 16
+ %cmp = fcmp oeq x86_fp80 %0, %1
+
+; Test 1
+; Make sure that there is no dead code generated
+; from Fast-ISel Phi-node handling. We should only
+; see one movb of the constant 1, feeding the PHI
+; node in lor.end. This covers the code path with
+; handlePHINodesInSuccessorBlocks() returning true.
+;
+; CHECK-LABEL: foo:
+; CHECK: movb $1,
+; CHECK-NOT: movb $1,
+; CHECK-LABEL: .LBB0_1:
+
+ br i1 %cmp, label %lor.end, label %lor.rhs
+
+lor.rhs: ; preds = %entry
+ %2 = load x86_fp80, x86_fp80* %x.addr, align 16
+ %call = call zeroext i1 @bar(x86_fp80 %2)
+ br label %lor.end
+
+lor.end: ; preds = %lor.rhs, %entry
+ %3 = phi i1 [ true, %entry ], [ %call, %lor.rhs ]
+ ret i1 %3
+}
+
+declare zeroext i1 @bar(x86_fp80)
+
+define i32 @main() noinline optnone {
+entry:
+ %retval = alloca i32, align 4
+ %v = alloca %struct.FVector, align 4
+ %ref.tmp = alloca %struct.FVector, align 4
+ %tmp = alloca { <2 x float>, float }, align 8
+ store i32 0, i32* %retval, align 4
+ %0 = bitcast %struct.FVector* %v to i8*
+ call void @llvm.lifetime.start(i64 12, i8* %0) nounwind
+ %x.i = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 0
+ store float 1.000000e+00, float* %x.i, align 4
+ %y.i = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 1
+ store float 1.000000e+00, float* %y.i, align 4
+ %z.i = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 2
+ store float 1.000000e+00, float* %z.i, align 4
+ %x.i.1 = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 0
+ %1 = load float, float* %x.i.1, align 4
+ %cmp.i = fcmp oeq float %1, 1.000000e+00
+ br i1 %cmp.i, label %if.then.i, label %if.else.i
+
+if.then.i: ; preds = %entry
+ %retval.sroa.0.0..sroa_cast.i = bitcast %struct.FVector* %v to <2 x float>*
+ %retval.sroa.0.0.copyload.i = load <2 x float>, <2 x float>* %retval.sroa.0.0..sroa_cast.i, align 4
+ %retval.sroa.6.0..sroa_idx16.i = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 2
+ %retval.sroa.6.0.copyload.i = load float, float* %retval.sroa.6.0..sroa_idx16.i, align 4
+ br label %func.exit
+
+if.else.i: ; preds = %entry
+
+; Test 2
+; In order to feed the first PHI node in func.exit handlePHINodesInSuccessorBlocks()
+; generates a local value instruction, but it cannot handle the second PHI node and
+; returns false to let SelectionDAGISel handle both cases. Make sure the generated
+; local value instruction is removed.
+; CHECK-LABEL: main:
+; CHECK-LABEL: .LBB1_2:
+; CHECK: xorps [[REG:%xmm[0-7]]], [[REG]]
+; CHECK-NOT: xorps [[REG]], [[REG]]
+; CHECK-LABEL: .LBB1_3:
+
+ %cmp3.i = fcmp olt float %1, 0x3E45798EE0000000
+ br i1 %cmp3.i, label %func.exit, label %if.end.5.i
+
+if.end.5.i: ; preds = %if.else.i
+ %retval.sroa.0.0.vec.insert13.i = insertelement <2 x float> undef, float %1, i32 0
+ %retval.sroa.0.4.vec.insert15.i = insertelement <2 x float> %retval.sroa.0.0.vec.insert13.i, float %1, i32 1
+ br label %func.exit
+
+func.exit: ; preds = %if.then.i, %if.else.i, %if.end.5.i
+ %retval.sroa.6.0.i = phi float [ %retval.sroa.6.0.copyload.i, %if.then.i ], [ %1, %if.end.5.i ], [ 0.000000e+00, %if.else.i ]
+ %retval.sroa.0.0.i = phi <2 x float> [ %retval.sroa.0.0.copyload.i, %if.then.i ], [ %retval.sroa.0.4.vec.insert15.i, %if.end.5.i ], [ zeroinitializer, %if.else.i ]
+ %.fca.0.insert.i = insertvalue { <2 x float>, float } undef, <2 x float> %retval.sroa.0.0.i, 0
+ %.fca.1.insert.i = insertvalue { <2 x float>, float } %.fca.0.insert.i, float %retval.sroa.6.0.i, 1
+ store { <2 x float>, float } %.fca.1.insert.i, { <2 x float>, float }* %tmp, align 8
+ %2 = bitcast { <2 x float>, float }* %tmp to i8*
+ %3 = bitcast %struct.FVector* %ref.tmp to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 12, i32 4, i1 false)
+ %4 = bitcast %struct.FVector* %v to i8*
+ %5 = bitcast %struct.FVector* %ref.tmp to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* %5, i64 12, i32 4, i1 false)
+ %6 = bitcast %struct.FVector* %v to i8*
+ call void @llvm.lifetime.end(i64 12, i8* %6) nounwind
+ ret i32 0
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) argmemonly nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) argmemonly nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) argmemonly nounwind
diff --git a/test/CodeGen/X86/fast-isel-emutls.ll b/test/CodeGen/X86/fast-isel-emutls.ll
new file mode 100644
index 000000000000..cb8012c0fa39
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-emutls.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -emulated-tls -march=x86 -relocation-model=pic -mtriple=i686-unknown-linux-gnu -fast-isel | FileCheck %s
+; PR3654
+
+@v = thread_local global i32 0
+define i32 @f() nounwind {
+entry:
+ %t = load i32, i32* @v
+ %s = add i32 %t, 1
+ ret i32 %s
+}
+
+; CHECK-LABEL: f:
+; CHECK: movl __emutls_v.v@GOT(%ebx), %eax
+; CHECK-NEXT: movl %eax, (%esp)
+; CHECK-NEXT: calll __emutls_get_address@PLT
+; CHECK-NEXT: movl (%eax), %eax
+
+@alias = internal alias i32, i32* @v
+define i32 @f_alias() nounwind {
+entry:
+ %t = load i32, i32* @v
+ %s = add i32 %t, 1
+ ret i32 %s
+}
+
+; CHECK-LABEL: f_alias:
+; CHECK: movl __emutls_v.v@GOT(%ebx), %eax
+; CHECK-NEXT: movl %eax, (%esp)
+; CHECK-NEXT: calll __emutls_get_address@PLT
+; CHECK-NEXT: movl (%eax), %eax
+
+; Use my_emutls_get_address like __emutls_get_address.
+@my_emutls_v_xyz = external global i8*, align 4
+declare i8* @my_emutls_get_address(i8*)
+
+define i32 @my_get_xyz() {
+entry:
+ %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*))
+ %0 = bitcast i8* %call to i32*
+ %1 = load i32, i32* %0, align 4
+ ret i32 %1
+}
+
+; CHECK-LABEL: my_get_xyz:
+; CHECK: movl my_emutls_v_xyz@GOT(%ebx), %eax
+; CHECK-NEXT: movl %eax, (%esp)
+; CHECK-NEXT: calll my_emutls_get_address@PLT
+; CHECK-NEXT: movl (%eax), %eax
diff --git a/test/CodeGen/X86/fast-isel-nontemporal.ll b/test/CodeGen/X86/fast-isel-nontemporal.ll
new file mode 100644
index 000000000000..6a174dbf5a8a
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-nontemporal.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4a -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE4A
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+
+define void @test_nti32(i32* nocapture %ptr, i32 %X) {
+; ALL-LABEL: test_nti32:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: movntil %esi, (%rdi)
+; ALL-NEXT: retq
+entry:
+ store i32 %X, i32* %ptr, align 4, !nontemporal !1
+ ret void
+}
+
+define void @test_nti64(i64* nocapture %ptr, i64 %X) {
+; ALL-LABEL: test_nti64:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: movntiq %rsi, (%rdi)
+; ALL-NEXT: retq
+entry:
+ store i64 %X, i64* %ptr, align 8, !nontemporal !1
+ ret void
+}
+
+define void @test_ntfloat(float* nocapture %ptr, float %X) {
+; SSE2-LABEL: test_ntfloat:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movss %xmm0, (%rdi)
+; SSE2-NEXT: retq
+;
+; SSE4A-LABEL: test_ntfloat:
+; SSE4A: # BB#0: # %entry
+; SSE4A-NEXT: movntss %xmm0, (%rdi)
+; SSE4A-NEXT: retq
+;
+; AVX-LABEL: test_ntfloat:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovss %xmm0, (%rdi)
+; AVX-NEXT: retq
+entry:
+ store float %X, float* %ptr, align 4, !nontemporal !1
+ ret void
+}
+
+define void @test_ntdouble(double* nocapture %ptr, double %X) {
+; SSE2-LABEL: test_ntdouble:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movsd %xmm0, (%rdi)
+; SSE2-NEXT: retq
+;
+; SSE4A-LABEL: test_ntdouble:
+; SSE4A: # BB#0: # %entry
+; SSE4A-NEXT: movntsd %xmm0, (%rdi)
+; SSE4A-NEXT: retq
+;
+; AVX-LABEL: test_ntdouble:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovsd %xmm0, (%rdi)
+; AVX-NEXT: retq
+entry:
+ store double %X, double* %ptr, align 8, !nontemporal !1
+ ret void
+}
+
+define void @test_nt4xfloat(<4 x float>* nocapture %ptr, <4 x float> %X) {
+; SSE-LABEL: test_nt4xfloat:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: movntps %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_nt4xfloat:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovntps %xmm0, (%rdi)
+; AVX-NEXT: retq
+entry:
+ store <4 x float> %X, <4 x float>* %ptr, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_nt2xdouble(<2 x double>* nocapture %ptr, <2 x double> %X) {
+; SSE-LABEL: test_nt2xdouble:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: movntpd %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_nt2xdouble:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovntpd %xmm0, (%rdi)
+; AVX-NEXT: retq
+entry:
+ store <2 x double> %X, <2 x double>* %ptr, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_nt2xi64(<2 x i64>* nocapture %ptr, <2 x i64> %X) {
+; SSE-LABEL: test_nt2xi64:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: movntdq %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_nt2xi64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovntdq %xmm0, (%rdi)
+; AVX-NEXT: retq
+entry:
+ store <2 x i64> %X, <2 x i64>* %ptr, align 16, !nontemporal !1
+ ret void
+}
+
+!1 = !{i32 1}
diff --git a/test/CodeGen/X86/fast-isel-stackcheck.ll b/test/CodeGen/X86/fast-isel-stackcheck.ll
new file mode 100644
index 000000000000..3b7318fa77d9
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-stackcheck.ll
@@ -0,0 +1,44 @@
+; RUN: llc -o - %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; selectiondag stack protector uses a GuardReg which the fast-isel stack
+; protection code did not but the state was not reset properly.
+; The optnone attribute on @bar forces fast-isel.
+
+; CHECK-LABEL: foo:
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
+; CHECK-NOT: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
+define void @foo() #0 {
+entry:
+ %_tags = alloca [3 x i32], align 4
+ ret void
+}
+
+; CHECK-LABEL: bar:
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
+define void @bar() #1 {
+entry:
+ %vt = alloca [2 x double], align 16
+ br i1 undef, label %cleanup.4091, label %for.cond.3850
+
+unreachable:
+ unreachable
+
+for.cond.3850:
+ br i1 undef, label %land.rhs.3853, label %land.end.3857
+
+land.rhs.3853:
+ br label %land.end.3857
+
+land.end.3857:
+ %0 = phi i1 [ false, %for.cond.3850 ], [ false, %land.rhs.3853 ]
+ br i1 %0, label %unreachable, label %unreachable
+
+cleanup.4091:
+ ret void
+}
+
+attributes #0 = { ssp }
+attributes #1 = { noinline optnone ssp }
diff --git a/test/CodeGen/X86/fast-isel-tls.ll b/test/CodeGen/X86/fast-isel-tls.ll
index 18bb9c13ff01..0b7a5d9759d2 100644
--- a/test/CodeGen/X86/fast-isel-tls.ll
+++ b/test/CodeGen/X86/fast-isel-tls.ll
@@ -13,7 +13,7 @@ entry:
; CHECK: leal v@TLSGD
; CHECK: __tls_get_addr
-@alias = internal alias i32* @v
+@alias = internal alias i32, i32* @v
define i32 @f_alias() nounwind {
entry:
%t = load i32, i32* @v
diff --git a/test/CodeGen/X86/fdiv-combine.ll b/test/CodeGen/X86/fdiv-combine.ll
index b65e9d01ab8b..d9d9ac401fb5 100644
--- a/test/CodeGen/X86/fdiv-combine.ll
+++ b/test/CodeGen/X86/fdiv-combine.ll
@@ -1,9 +1,11 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s
-; Anything more than one division using a single divisor operand
+; More than one 'arcp' division using a single divisor operand
; should be converted into a reciprocal and multiplication.
-define float @div1_arcp(float %x, float %y, float %z) #0 {
+; Don't do anything for just one division.
+
+define float @div1_arcp(float %x, float %y, float %z) {
; CHECK-LABEL: div1_arcp:
; CHECK: # BB#0:
; CHECK-NEXT: divss %xmm1, %xmm0
@@ -12,13 +14,15 @@ define float @div1_arcp(float %x, float %y, float %z) #0 {
ret float %div1
}
-define float @div2_arcp(float %x, float %y, float %z) #0 {
-; CHECK-LABEL: div2_arcp:
+; All math instructions are 'arcp', so optimize.
+
+define float @div2_arcp_all(float %x, float %y, float %z) {
+; CHECK-LABEL: div2_arcp_all:
; CHECK: # BB#0:
; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; CHECK-NEXT: divss %xmm2, %xmm3
-; CHECK-NEXT: mulss %xmm1, %xmm0
; CHECK-NEXT: mulss %xmm3, %xmm0
+; CHECK-NEXT: mulss %xmm1, %xmm0
; CHECK-NEXT: mulss %xmm3, %xmm0
; CHECK-NEXT: retq
%div1 = fdiv arcp float %x, %z
@@ -27,10 +31,57 @@ define float @div2_arcp(float %x, float %y, float %z) #0 {
ret float %div2
}
+; The first division is not 'arcp', so do not optimize.
+
+define float @div2_arcp_partial1(float %x, float %y, float %z) {
+; CHECK-LABEL: div2_arcp_partial1:
+; CHECK: # BB#0:
+; CHECK-NEXT: divss %xmm2, %xmm0
+; CHECK-NEXT: mulss %xmm1, %xmm0
+; CHECK-NEXT: divss %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %div1 = fdiv float %x, %z
+ %mul = fmul arcp float %div1, %y
+ %div2 = fdiv arcp float %mul, %z
+ ret float %div2
+}
+
+; The second division is not 'arcp', so do not optimize.
+
+define float @div2_arcp_partial2(float %x, float %y, float %z) {
+; CHECK-LABEL: div2_arcp_partial2:
+; CHECK: # BB#0:
+; CHECK-NEXT: divss %xmm2, %xmm0
+; CHECK-NEXT: mulss %xmm1, %xmm0
+; CHECK-NEXT: divss %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %div1 = fdiv arcp float %x, %z
+ %mul = fmul arcp float %div1, %y
+ %div2 = fdiv float %mul, %z
+ ret float %div2
+}
+
+; The multiply is not 'arcp', but that does not prevent optimizing the divisions.
+
+define float @div2_arcp_partial3(float %x, float %y, float %z) {
+; CHECK-LABEL: div2_arcp_partial3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: divss %xmm2, %xmm3
+; CHECK-NEXT: mulss %xmm3, %xmm0
+; CHECK-NEXT: mulss %xmm1, %xmm0
+; CHECK-NEXT: mulss %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %div1 = fdiv arcp float %x, %z
+ %mul = fmul float %div1, %y
+ %div2 = fdiv arcp float %mul, %z
+ ret float %div2
+}
+
; If the reciprocal is already calculated, we should not
; generate an extra multiplication by 1.0.
-define double @div3_arcp(double %x, double %y, double %z) #0 {
+define double @div3_arcp(double %x, double %y, double %z) {
; CHECK-LABEL: div3_arcp:
; CHECK: # BB#0:
; CHECK-NEXT: movsd{{.*#+}} xmm2 = mem[0],zero
@@ -44,7 +95,7 @@ define double @div3_arcp(double %x, double %y, double %z) #0 {
ret double %ret
}
-define void @PR24141() #0 {
+define void @PR24141() {
; CHECK-LABEL: PR24141:
; CHECK: callq
; CHECK-NEXT: divsd
@@ -57,11 +108,9 @@ while.body:
%call = call { double, double } @g(double %x.0)
%xv0 = extractvalue { double, double } %call, 0
%xv1 = extractvalue { double, double } %call, 1
- %div = fdiv double %xv0, %xv1
+ %div = fdiv arcp double %xv0, %xv1
br label %while.body
}
declare { double, double } @g(double)
-; FIXME: If the backend understands 'arcp', then this attribute is unnecessary.
-attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/fdiv.ll b/test/CodeGen/X86/fdiv.ll
index 0749682e2f68..226e6d269c3b 100644
--- a/test/CodeGen/X86/fdiv.ll
+++ b/test/CodeGen/X86/fdiv.ll
@@ -1,41 +1,69 @@
-; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -enable-unsafe-fp-math | FileCheck %s
define double @exact(double %x) {
; Exact division by a constant converted to multiplication.
-; CHECK: @exact
-; CHECK: mulsd
+; CHECK-LABEL: exact:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
%div = fdiv double %x, 2.0
ret double %div
}
define double @inexact(double %x) {
; Inexact division by a constant converted to multiplication.
-; CHECK: @inexact
-; CHECK: mulsd
- %div = fdiv double %x, 0x41DFFFFFFFC00000
+; CHECK-LABEL: inexact:
+; CHECK: # BB#0:
+; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %div = fdiv double %x, 0x41DFFFFFFFC00000
ret double %div
}
define double @funky(double %x) {
; No conversion to multiplication if too funky.
-; CHECK: @funky
-; CHECK: divsd
+; CHECK-LABEL: funky:
+; CHECK: # BB#0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: divsd %xmm1, %xmm0
+; CHECK-NEXT: retq
%div = fdiv double %x, 0.0
ret double %div
}
define double @denormal1(double %x) {
; Don't generate multiplication by a denormal.
-; CHECK: @denormal1
-; CHECK: divsd
+; CHECK-LABEL: denormal1:
+; CHECK: # BB#0:
+; CHECK-NEXT: divsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
%div = fdiv double %x, 0x7FD0000000000001
ret double %div
}
define double @denormal2(double %x) {
; Don't generate multiplication by a denormal.
-; CHECK: @denormal
-; CHECK: divsd
+; CHECK-LABEL: denormal2:
+; CHECK: # BB#0:
+; CHECK-NEXT: divsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
%div = fdiv double %x, 0x7FEFFFFFFFFFFFFF
ret double %div
}
+
+; Deleting the negates does not require unsafe-fp-math.
+
+define float @double_negative(float %x, float %y) #0 {
+; CHECK-LABEL: double_negative:
+; CHECK: # BB#0:
+; CHECK-NEXT: divss %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %neg1 = fsub float -0.0, %x
+ %neg2 = fsub float -0.0, %y
+ %div = fdiv float %neg1, %neg2
+ ret float %div
+}
+
+attributes #0 = { "unsafe-fp-math"="false" }
+
diff --git a/test/CodeGen/X86/fixup-lea.ll b/test/CodeGen/X86/fixup-lea.ll
new file mode 100644
index 000000000000..1ddc099ffd62
--- /dev/null
+++ b/test/CodeGen/X86/fixup-lea.ll
@@ -0,0 +1,34 @@
+;RUN: llc < %s -march=x86 | FileCheck %s
+
+define void @foo(i32 inreg %dns) minsize {
+entry:
+; CHECK-LABEL: foo
+; CHECK: dec
+ br label %for.body
+
+for.body:
+ %i.05 = phi i16 [ %dec, %for.body ], [ 0, %entry ]
+ %dec = add i16 %i.05, -1
+ %conv = zext i16 %dec to i32
+ %cmp = icmp slt i32 %conv, %dns
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+define void @bar(i32 inreg %dns) minsize {
+entry:
+; CHECK-LABEL: bar
+; CHECK: inc
+ br label %for.body
+
+for.body:
+ %i.05 = phi i16 [ %inc, %for.body ], [ 0, %entry ]
+ %inc = add i16 %i.05, 1
+ %conv = zext i16 %inc to i32
+ %cmp = icmp slt i32 %conv, %dns
+ br i1 %cmp, label %for.body, label %for.end
+for.end:
+ ret void
+}
diff --git a/test/CodeGen/X86/float-asmprint.ll b/test/CodeGen/X86/float-asmprint.ll
index 5de9700fc064..0108430ee93e 100644
--- a/test/CodeGen/X86/float-asmprint.ll
+++ b/test/CodeGen/X86/float-asmprint.ll
@@ -9,6 +9,8 @@
@var64 = global double -0.0, align 8
@var32 = global float -0.0, align 4
@var16 = global half -0.0, align 2
+@var4f32 = global <4 x float> <float -0.0, float 0.0, float 1.0, float 2.0>
+@var4f16 = global <4 x half> <half -0.0, half 0.0, half 1.0, half 2.0>
; CHECK: var128:
; CHECK-NEXT: .quad 0 # fp128 -0
@@ -39,3 +41,16 @@
; CHECK-NEXT: .short 32768 # half -0
; CHECK-NEXT: .size
+; CHECK: var4f32:
+; CHECK-NEXT: .long 2147483648 # float -0
+; CHECK-NEXT: .long 0 # float 0
+; CHECK-NEXT: .long 1065353216 # float 1
+; CHECK-NEXT: .long 1073741824 # float 2
+; CHECK-NEXT: .size
+
+; CHECK: var4f16:
+; CHECK-NEXT: .short 32768 # half -0
+; CHECK-NEXT: .short 0 # half 0
+; CHECK-NEXT: .short 15360 # half 1
+; CHECK-NEXT: .short 16384 # half 2
+; CHECK-NEXT: .size
diff --git a/test/CodeGen/X86/floor-soft-float.ll b/test/CodeGen/X86/floor-soft-float.ll
index 7bb738513f54..3b28ecc6379d 100644
--- a/test/CodeGen/X86/floor-soft-float.ll
+++ b/test/CodeGen/X86/floor-soft-float.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
declare float @llvm.floor.f32(float)
; CHECK-SOFT-FLOAT: callq floorf
-; CHECK-HARD-FLOAT: roundss $1, %xmm0, %xmm0
+; CHECK-HARD-FLOAT: roundss $9, %xmm0, %xmm0
define float @myfloor(float %a) {
%val = tail call float @llvm.floor.f32(float %a)
ret float %val
diff --git a/test/CodeGen/X86/fma-commute-x86.ll b/test/CodeGen/X86/fma-commute-x86.ll
new file mode 100644
index 000000000000..162a97ac025c
--- /dev/null
+++ b/test/CodeGen/X86/fma-commute-x86.ll
@@ -0,0 +1,761 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s
+; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
+
+attributes #0 = { nounwind }
+
+declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_baa_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfmadd213ss %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_aba_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vfmadd132ss (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_bba_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rdx), %xmm0
+; CHECK-NEXT: vfmadd213ss (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_baa_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vfmadd132ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_aba_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vfmadd231ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_bba_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rdx), %xmm0
+; CHECK-NEXT: vfmadd213ps (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_baa_ps_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %ymm0
+; CHECK-NEXT: vfmadd132ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_aba_ps_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %ymm0
+; CHECK-NEXT: vfmadd231ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_bba_ps_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rdx), %ymm0
+; CHECK-NEXT: vfmadd213ps (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
+ ret <8 x float> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_baa_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfmadd213sd %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_aba_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
+; CHECK-NEXT: vfmadd132sd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_bba_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rdx), %xmm0
+; CHECK-NEXT: vfmadd213sd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_baa_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
+; CHECK-NEXT: vfmadd132pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_aba_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
+; CHECK-NEXT: vfmadd231pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_bba_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rdx), %xmm0
+; CHECK-NEXT: vfmadd213pd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_baa_pd_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %ymm0
+; CHECK-NEXT: vfmadd132pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_aba_pd_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %ymm0
+; CHECK-NEXT: vfmadd231pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_bba_pd_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rdx), %ymm0
+; CHECK-NEXT: vfmadd213pd (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
+ ret <4 x double> %res
+}
+
+
+declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_baa_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfnmadd213ss %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_aba_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vfnmadd132ss (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_bba_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rdx), %xmm0
+; CHECK-NEXT: vfnmadd213ss (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_baa_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vfnmadd132ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_aba_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vfnmadd231ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_bba_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rdx), %xmm0
+; CHECK-NEXT: vfnmadd213ps (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_baa_ps_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %ymm0
+; CHECK-NEXT: vfnmadd132ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_aba_ps_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %ymm0
+; CHECK-NEXT: vfnmadd231ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_bba_ps_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rdx), %ymm0
+; CHECK-NEXT: vfnmadd213ps (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
+ ret <8 x float> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_baa_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfnmadd213sd %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_aba_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
+; CHECK-NEXT: vfnmadd132sd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_bba_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rdx), %xmm0
+; CHECK-NEXT: vfnmadd213sd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_baa_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
+; CHECK-NEXT: vfnmadd132pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_aba_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
+; CHECK-NEXT: vfnmadd231pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_bba_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rdx), %xmm0
+; CHECK-NEXT: vfnmadd213pd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_baa_pd_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %ymm0
+; CHECK-NEXT: vfnmadd132pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_aba_pd_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %ymm0
+; CHECK-NEXT: vfnmadd231pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_bba_pd_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rdx), %ymm0
+; CHECK-NEXT: vfnmadd213pd (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
+ ret <4 x double> %res
+}
+
+
+declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_baa_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfmsub213ss %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_aba_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vfmsub132ss (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_bba_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rdx), %xmm0
+; CHECK-NEXT: vfmsub213ss (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_baa_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vfmsub132ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_aba_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vfmsub231ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_bba_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rdx), %xmm0
+; CHECK-NEXT: vfmsub213ps (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_baa_ps_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %ymm0
+; CHECK-NEXT: vfmsub132ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_aba_ps_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %ymm0
+; CHECK-NEXT: vfmsub231ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_bba_ps_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rdx), %ymm0
+; CHECK-NEXT: vfmsub213ps (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
+ ret <8 x float> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_baa_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfmsub213sd %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_aba_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
+; CHECK-NEXT: vfmsub132sd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_bba_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rdx), %xmm0
+; CHECK-NEXT: vfmsub213sd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_baa_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
+; CHECK-NEXT: vfmsub132pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_aba_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
+; CHECK-NEXT: vfmsub231pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_bba_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rdx), %xmm0
+; CHECK-NEXT: vfmsub213pd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_baa_pd_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %ymm0
+; CHECK-NEXT: vfmsub132pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_aba_pd_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %ymm0
+; CHECK-NEXT: vfmsub231pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_bba_pd_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rdx), %ymm0
+; CHECK-NEXT: vfmsub213pd (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
+ ret <4 x double> %res
+}
+
+
+declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_baa_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfnmsub213ss %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_aba_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vfnmsub132ss (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_bba_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rdx), %xmm0
+; CHECK-NEXT: vfnmsub213ss (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_baa_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vfnmsub132ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_aba_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %xmm0
+; CHECK-NEXT: vfnmsub231ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_bba_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rdx), %xmm0
+; CHECK-NEXT: vfnmsub213ps (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+ ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_baa_ps_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %ymm0
+; CHECK-NEXT: vfnmsub132ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_aba_ps_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rcx), %ymm0
+; CHECK-NEXT: vfnmsub231ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_bba_ps_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps (%rdx), %ymm0
+; CHECK-NEXT: vfnmsub213ps (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
+ ret <8 x float> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_baa_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfnmsub213sd %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_aba_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
+; CHECK-NEXT: vfnmsub132sd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_bba_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rdx), %xmm0
+; CHECK-NEXT: vfnmsub213sd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_baa_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
+; CHECK-NEXT: vfnmsub132pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_aba_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %xmm0
+; CHECK-NEXT: vfnmsub231pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_bba_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rdx), %xmm0
+; CHECK-NEXT: vfnmsub213pd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+ ret <2 x double> %res
+}
+
+declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_baa_pd_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %ymm0
+; CHECK-NEXT: vfnmsub132pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_aba_pd_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rcx), %ymm0
+; CHECK-NEXT: vfnmsub231pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_bba_pd_y:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovapd (%rdx), %ymm0
+; CHECK-NEXT: vfnmsub213pd (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
+ ret <4 x double> %res
+}
+
diff --git a/test/CodeGen/X86/fma-do-not-commute.ll b/test/CodeGen/X86/fma-do-not-commute.ll
index 1f6a19cfff83..89be0795d206 100644
--- a/test/CodeGen/X86/fma-do-not-commute.ll
+++ b/test/CodeGen/X86/fma-do-not-commute.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx"
; CHECK-LABEL: test1:
; %arg lives in xmm0 and it shouldn't be redefined until it is used in the FMA.
-; CHECK-NOT {{.*}}, %xmm0
+; CHECK-NOT: {{.*}}, %xmm0
; %addr lives in rdi.
; %addr2 lives in rsi.
; CHECK: vmovss (%rsi), [[ADDR2:%xmm[0-9]+]]
diff --git a/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll b/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll
index f7d0cdf3c65a..8d0318bb93e0 100644
--- a/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll
+++ b/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll
@@ -1,8 +1,337 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s
-; CHECK-LABEL: fmaddsubpd_loop:
-; CHECK: vfmaddsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <4 x double> @fmaddsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+; CHECK-LABEL: fmaddsubpd_loop_128:
+; CHECK: vfmaddsub231pd %xmm1, %xmm0, %xmm2
+; CHECK: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <2 x double> @fmaddsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <2 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubaddpd_loop_128:
+; CHECK: vfmsubadd231pd %xmm1, %xmm0, %xmm2
+; CHECK: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <2 x double> @fmsubaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <2 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fmaddpd_loop_128:
+; CHECK: vfmadd231pd %xmm1, %xmm0, %xmm2
+; CHECK: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <2 x double> @fmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <2 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubpd_loop_128:
+; CHECK: vfmsub231pd %xmm1, %xmm0, %xmm2
+; CHECK: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <2 x double> @fmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <2 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fnmaddpd_loop_128:
+; CHECK: vfnmadd231pd %xmm1, %xmm0, %xmm2
+; CHECK: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <2 x double> @fnmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <2 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fnmsubpd_loop_128:
+; CHECK: vfnmsub231pd %xmm1, %xmm0, %xmm2
+; CHECK: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <2 x double> @fnmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <2 x double> %c.addr.0
+}
+
+declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
+
+
+; CHECK-LABEL: fmaddsubps_loop_128:
+; CHECK: vfmaddsub231ps %xmm1, %xmm0, %xmm2
+; CHECK: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fmaddsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <4 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubaddps_loop_128:
+; CHECK: vfmsubadd231ps %xmm1, %xmm0, %xmm2
+; CHECK: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fmsubaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <4 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fmaddps_loop_128:
+; CHECK: vfmadd231ps %xmm1, %xmm0, %xmm2
+; CHECK: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fmaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <4 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubps_loop_128:
+; CHECK: vfmsub231ps %xmm1, %xmm0, %xmm2
+; CHECK: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fmsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <4 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fnmaddps_loop_128:
+; CHECK: vfnmadd231ps %xmm1, %xmm0, %xmm2
+; CHECK: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fnmaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <4 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fnmsubps_loop_128:
+; CHECK: vfnmsub231ps %xmm1, %xmm0, %xmm2
+; CHECK: vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fnmsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <4 x float> %c.addr.0
+}
+
+declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
+
+; CHECK-LABEL: fmaddsubpd_loop_256:
+; CHECK: vfmaddsub231pd %ymm1, %ymm0, %ymm2
+; CHECK: vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <4 x double> @fmaddsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
entry:
br label %for.cond
@@ -24,9 +353,11 @@ for.end:
ret <4 x double> %c.addr.0
}
-; CHECK-LABEL: fmsubaddpd_loop:
-; CHECK: vfmsubadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <4 x double> @fmsubaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+; CHECK-LABEL: fmsubaddpd_loop_256:
+; CHECK: vfmsubadd231pd %ymm1, %ymm0, %ymm2
+; CHECK: vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <4 x double> @fmsubaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
entry:
br label %for.cond
@@ -48,9 +379,11 @@ for.end:
ret <4 x double> %c.addr.0
}
-; CHECK-LABEL: fmaddpd_loop:
-; CHECK: vfmadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <4 x double> @fmaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+; CHECK-LABEL: fmaddpd_loop_256:
+; CHECK: vfmadd231pd %ymm1, %ymm0, %ymm2
+; CHECK: vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <4 x double> @fmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
entry:
br label %for.cond
@@ -72,9 +405,11 @@ for.end:
ret <4 x double> %c.addr.0
}
-; CHECK-LABEL: fmsubpd_loop:
-; CHECK: vfmsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <4 x double> @fmsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+; CHECK-LABEL: fmsubpd_loop_256:
+; CHECK: vfmsub231pd %ymm1, %ymm0, %ymm2
+; CHECK: vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <4 x double> @fmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
entry:
br label %for.cond
@@ -96,15 +431,71 @@ for.end:
ret <4 x double> %c.addr.0
}
+; CHECK-LABEL: fnmaddpd_loop_256:
+; CHECK: vfnmadd231pd %ymm1, %ymm0, %ymm2
+; CHECK: vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <4 x double> @fnmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <4 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fnmsubpd_loop_256:
+; CHECK: vfnmsub231pd %ymm1, %ymm0, %ymm2
+; CHECK: vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <4 x double> @fnmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <4 x double> %c.addr.0
+}
+
declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
+declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
-; CHECK-LABEL: fmaddsubps_loop:
-; CHECK: vfmaddsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <8 x float> @fmaddsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+; CHECK-LABEL: fmaddsubps_loop_256:
+; CHECK: vfmaddsub231ps %ymm1, %ymm0, %ymm2
+; CHECK: vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <8 x float> @fmaddsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
entry:
br label %for.cond
@@ -126,9 +517,11 @@ for.end:
ret <8 x float> %c.addr.0
}
-; CHECK-LABEL: fmsubaddps_loop:
-; CHECK: vfmsubadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <8 x float> @fmsubaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+; CHECK-LABEL: fmsubaddps_loop_256:
+; CHECK: vfmsubadd231ps %ymm1, %ymm0, %ymm2
+; CHECK: vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <8 x float> @fmsubaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
entry:
br label %for.cond
@@ -150,9 +543,11 @@ for.end:
ret <8 x float> %c.addr.0
}
-; CHECK-LABEL: fmaddps_loop:
-; CHECK: vfmadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <8 x float> @fmaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+; CHECK-LABEL: fmaddps_loop_256:
+; CHECK: vfmadd231ps %ymm1, %ymm0, %ymm2
+; CHECK: vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <8 x float> @fmaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
entry:
br label %for.cond
@@ -174,9 +569,11 @@ for.end:
ret <8 x float> %c.addr.0
}
-; CHECK-LABEL: fmsubps_loop:
-; CHECK: vfmsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <8 x float> @fmsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+; CHECK-LABEL: fmsubps_loop_256:
+; CHECK: vfmsub231ps %ymm1, %ymm0, %ymm2
+; CHECK: vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <8 x float> @fmsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
entry:
br label %for.cond
@@ -198,7 +595,61 @@ for.end:
ret <8 x float> %c.addr.0
}
+; CHECK-LABEL: fnmaddps_loop_256:
+; CHECK: vfnmadd231ps %ymm1, %ymm0, %ymm2
+; CHECK: vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <8 x float> @fnmaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <8 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fnmsubps_loop_256:
+; CHECK: vfnmsub231ps %ymm1, %ymm0, %ymm2
+; CHECK: vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <8 x float> @fnmsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, %iter
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ br label %for.inc
+
+for.inc:
+ %0 = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret <8 x float> %c.addr.0
+}
+
declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
diff --git a/test/CodeGen/X86/fma-intrinsics-x86.ll b/test/CodeGen/X86/fma-intrinsics-x86.ll
index 881436386bac..cf4c8933fcab 100644
--- a/test/CodeGen/X86/fma-intrinsics-x86.ll
+++ b/test/CodeGen/X86/fma-intrinsics-x86.ll
@@ -1,95 +1,149 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
+; RUN: llc < %s -mtriple=x86_64-pc-windows -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-WIN
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
; VFMADD
define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmadd_ss:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ss:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
}
+
+define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmadd_bac_ss:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
+ ret <4 x float> %res
+}
declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmadd_sd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_sd:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
}
+
+define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmadd_bac_sd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
+ ret <2 x double> %res
+}
declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmadd_ps:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ps:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmadd_pd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_pd:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps_256:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0
+;
; CHECK-FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ps_256:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd_256:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0
+;
; CHECK-FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_pd_256:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
}
@@ -97,90 +151,144 @@ declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4
; VFMSUB
define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ss:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmsub_ss:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ss:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
}
+
+define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmsub_bac_ss:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
+ ret <4 x float> %res
+}
declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsub_sd:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmsub_sd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_sd:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
}
+
+define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmsub_bac_sd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
+ ret <2 x double> %res
+}
declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmsub_ps:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ps:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmsub_pd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_pd:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps_256:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0
+;
; CHECK-FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ps_256:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd_256:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0
+;
; CHECK-FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_pd_256:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
}
@@ -188,90 +296,144 @@ declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4
; VFNMADD
define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ss:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfnmadd_ss:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ss:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
}
+
+define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmadd_bac_ss:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
+ ret <4 x float> %res
+}
declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_sd:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfnmadd_sd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_sd:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
}
+
+define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmadd_bac_sd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
+ ret <2 x double> %res
+}
declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ps:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_pd:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps_256:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0
+;
; CHECK-FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ps_256:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd_256:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0
+;
; CHECK-FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_pd_256:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
}
@@ -279,90 +441,144 @@ declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4
; VFNMSUB
define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ss:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfnmsub_ss:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ss:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
}
+
+define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmsub_bac_ss:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
+ ret <4 x float> %res
+}
declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_sd:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfnmsub_sd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_sd:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
}
+
+define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmsub_bac_sd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
+ ret <2 x double> %res
+}
declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ps:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_pd:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps_256:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0
+;
; CHECK-FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ps_256:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd_256:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0
+;
; CHECK-FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_pd_256:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
}
@@ -370,60 +586,72 @@ declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4
; VFMADDSUB
define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_ps:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_pd:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps_256:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0
+;
; CHECK-FMA-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_ps_256:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd_256:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0
+;
; CHECK-FMA-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_pd_256:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
}
@@ -431,60 +659,72 @@ declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>,
; VFMSUBADD
define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_ps:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0
+;
; CHECK-FMA-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_pd:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
ret <2 x double> %res
}
declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps_256:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0
+;
; CHECK-FMA-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_ps_256:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
%res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd_256:
-; CHECK-FMA: # BB#0:
+; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
+; CHECK-NEXT: # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0
+;
; CHECK-FMA-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT: retq
;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_pd_256:
-; CHECK-FMA4: # BB#0:
-; CHECK-FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT: retq
+; CHECK-FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
%res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
ret <4 x double> %res
}
diff --git a/test/CodeGen/X86/fma-scalar-memfold.ll b/test/CodeGen/X86/fma-scalar-memfold.ll
new file mode 100644
index 000000000000..0ceaa562a5d4
--- /dev/null
+++ b/test/CodeGen/X86/fma-scalar-memfold.ll
@@ -0,0 +1,383 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s
+
+attributes #0 = { nounwind }
+
+declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
+
+declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
+
+define void @fmadd_aab_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fmadd_aab_ss:
+; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmadd213ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load float, float* %a
+ %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+ %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+ %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+ %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+ %b.val = load float, float* %b
+ %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+ %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+ %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+ %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+ %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
+
+ %sr = extractelement <4 x float> %vr, i32 0
+ store float %sr, float* %a
+ ret void
+}
+
+define void @fmadd_aba_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fmadd_aba_ss:
+; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmadd132ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load float, float* %a
+ %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+ %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+ %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+ %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+ %b.val = load float, float* %b
+ %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+ %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+ %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+ %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+ %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
+
+ %sr = extractelement <4 x float> %vr, i32 0
+ store float %sr, float* %a
+ ret void
+}
+
+define void @fmsub_aab_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fmsub_aab_ss:
+; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmsub213ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load float, float* %a
+ %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+ %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+ %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+ %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+ %b.val = load float, float* %b
+ %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+ %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+ %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+ %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+ %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
+
+ %sr = extractelement <4 x float> %vr, i32 0
+ store float %sr, float* %a
+ ret void
+}
+
+define void @fmsub_aba_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fmsub_aba_ss:
+; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmsub132ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load float, float* %a
+ %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+ %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+ %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+ %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+ %b.val = load float, float* %b
+ %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+ %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+ %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+ %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+ %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
+
+ %sr = extractelement <4 x float> %vr, i32 0
+ store float %sr, float* %a
+ ret void
+}
+
+define void @fnmadd_aab_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fnmadd_aab_ss:
+; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmadd213ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load float, float* %a
+ %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+ %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+ %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+ %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+ %b.val = load float, float* %b
+ %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+ %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+ %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+ %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+ %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
+
+ %sr = extractelement <4 x float> %vr, i32 0
+ store float %sr, float* %a
+ ret void
+}
+
+define void @fnmadd_aba_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fnmadd_aba_ss:
+; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmadd132ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load float, float* %a
+ %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+ %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+ %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+ %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+ %b.val = load float, float* %b
+ %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+ %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+ %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+ %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+ %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
+
+ %sr = extractelement <4 x float> %vr, i32 0
+ store float %sr, float* %a
+ ret void
+}
+
+define void @fnmsub_aab_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fnmsub_aab_ss:
+; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmsub213ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load float, float* %a
+ %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+ %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+ %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+ %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+ %b.val = load float, float* %b
+ %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+ %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+ %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+ %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+ %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
+
+ %sr = extractelement <4 x float> %vr, i32 0
+ store float %sr, float* %a
+ ret void
+}
+
+define void @fnmsub_aba_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fnmsub_aba_ss:
+; CHECK: vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmsub132ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load float, float* %a
+ %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+ %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+ %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+ %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+ %b.val = load float, float* %b
+ %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+ %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+ %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+ %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+ %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
+
+ %sr = extractelement <4 x float> %vr, i32 0
+ store float %sr, float* %a
+ ret void
+}
+
+define void @fmadd_aab_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fmadd_aab_sd:
+; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmadd213sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load double, double* %a
+ %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+ %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+ %b.val = load double, double* %b
+ %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+ %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+ %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
+
+ %sr = extractelement <2 x double> %vr, i32 0
+ store double %sr, double* %a
+ ret void
+}
+
+define void @fmadd_aba_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fmadd_aba_sd:
+; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmadd132sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load double, double* %a
+ %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+ %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+ %b.val = load double, double* %b
+ %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+ %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+ %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
+
+ %sr = extractelement <2 x double> %vr, i32 0
+ store double %sr, double* %a
+ ret void
+}
+
+define void @fmsub_aab_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fmsub_aab_sd:
+; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmsub213sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load double, double* %a
+ %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+ %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+ %b.val = load double, double* %b
+ %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+ %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+ %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
+
+ %sr = extractelement <2 x double> %vr, i32 0
+ store double %sr, double* %a
+ ret void
+}
+
+define void @fmsub_aba_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fmsub_aba_sd:
+; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmsub132sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load double, double* %a
+ %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+ %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+ %b.val = load double, double* %b
+ %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+ %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+ %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
+
+ %sr = extractelement <2 x double> %vr, i32 0
+ store double %sr, double* %a
+ ret void
+}
+
+define void @fnmadd_aab_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fnmadd_aab_sd:
+; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmadd213sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load double, double* %a
+ %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+ %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+ %b.val = load double, double* %b
+ %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+ %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+ %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
+
+ %sr = extractelement <2 x double> %vr, i32 0
+ store double %sr, double* %a
+ ret void
+}
+
+define void @fnmadd_aba_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fnmadd_aba_sd:
+; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmadd132sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load double, double* %a
+ %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+ %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+ %b.val = load double, double* %b
+ %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+ %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+ %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
+
+ %sr = extractelement <2 x double> %vr, i32 0
+ store double %sr, double* %a
+ ret void
+}
+
+define void @fnmsub_aab_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fnmsub_aab_sd:
+; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmsub213sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load double, double* %a
+ %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+ %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+ %b.val = load double, double* %b
+ %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+ %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+ %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
+
+ %sr = extractelement <2 x double> %vr, i32 0
+ store double %sr, double* %a
+ ret void
+}
+
+define void @fnmsub_aba_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fnmsub_aba_sd:
+; CHECK: vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmsub132sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+ %a.val = load double, double* %a
+ %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+ %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+ %b.val = load double, double* %b
+ %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+ %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+ %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
+
+ %sr = extractelement <2 x double> %vr, i32 0
+ store double %sr, double* %a
+ ret void
+}
+
+
diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll
index a27b760face7..76a4acf00f90 100644
--- a/test/CodeGen/X86/fma_patterns.ll
+++ b/test/CodeGen/X86/fma_patterns.ll
@@ -1,212 +1,1195 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 -fp-contract=fast | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4
-
-; CHECK: test_x86_fmadd_ps
-; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmadd_ps
-; CHECK_FMA4: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
- %x = fmul <4 x float> %a0, %a1
- %res = fadd <4 x float> %x, %a2
- ret <4 x float> %res
-}
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512
-; CHECK: test_x86_fmsub_ps
-; CHECK: fmsub213ps %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmsub_ps
-; CHECK_FMA4: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
- %x = fmul <4 x float> %a0, %a1
- %res = fsub <4 x float> %x, %a2
- ret <4 x float> %res
-}
+;
+; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
+;
-; CHECK: test_x86_fnmadd_ps
-; CHECK: fnmadd213ps %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fnmadd_ps
-; CHECK_FMA4: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
- %x = fmul <4 x float> %a0, %a1
- %res = fsub <4 x float> %a2, %x
- ret <4 x float> %res
+define float @test_f32_fmadd(float %a0, float %a1, float %a2) {
+; FMA-LABEL: test_f32_fmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_f32_fmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_f32_fmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %x = fmul float %a0, %a1
+ %res = fadd float %x, %a2
+ ret float %res
}
-; CHECK: test_x86_fnmsub_ps
-; CHECK: fnmsub213ps %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fnmsub_ps
-; CHECK_FMA4: fnmsubps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+; FMA-LABEL: test_4f32_fmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_4f32_fmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_4f32_fmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
%x = fmul <4 x float> %a0, %a1
- %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
- %res = fsub <4 x float> %y, %a2
+ %res = fadd <4 x float> %x, %a2
ret <4 x float> %res
}
-; CHECK: test_x86_fmadd_ps_y
-; CHECK: vfmadd213ps %ymm2, %ymm1, %ymm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmadd_ps_y
-; CHECK_FMA4: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK_FMA4: ret
-define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+; FMA-LABEL: test_8f32_fmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_8f32_fmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_8f32_fmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
%x = fmul <8 x float> %a0, %a1
%res = fadd <8 x float> %x, %a2
ret <8 x float> %res
}
-; CHECK: test_x86_fmsub_ps_y
-; CHECK: vfmsub213ps %ymm2, %ymm1, %ymm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmsub_ps_y
-; CHECK_FMA4: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK_FMA4: ret
-define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
- %x = fmul <8 x float> %a0, %a1
- %res = fsub <8 x float> %x, %a2
- ret <8 x float> %res
-}
-
-; CHECK: test_x86_fnmadd_ps_y
-; CHECK: vfnmadd213ps %ymm2, %ymm1, %ymm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fnmadd_ps_y
-; CHECK_FMA4: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK_FMA4: ret
-define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
- %x = fmul <8 x float> %a0, %a1
- %res = fsub <8 x float> %a2, %x
- ret <8 x float> %res
+define double @test_f64_fmadd(double %a0, double %a1, double %a2) {
+; FMA-LABEL: test_f64_fmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_f64_fmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_f64_fmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %x = fmul double %a0, %a1
+ %res = fadd double %x, %a2
+ ret double %res
}
-; CHECK: test_x86_fnmsub_ps_y
-; CHECK: vfnmsub213ps %ymm2, %ymm1, %ymm0
-; CHECK: ret
-define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
- %x = fmul <8 x float> %a0, %a1
- %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
- %res = fsub <8 x float> %y, %a2
- ret <8 x float> %res
+define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+; FMA-LABEL: test_2f64_fmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_2f64_fmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_2f64_fmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %x = fmul <2 x double> %a0, %a1
+ %res = fadd <2 x double> %x, %a2
+ ret <2 x double> %res
}
-; CHECK: test_x86_fmadd_pd_y
-; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmadd_pd_y
-; CHECK_FMA4: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK_FMA4: ret
-define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+; FMA-LABEL: test_4f64_fmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_4f64_fmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_4f64_fmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
%x = fmul <4 x double> %a0, %a1
%res = fadd <4 x double> %x, %a2
ret <4 x double> %res
}
-; CHECK: test_x86_fmsub_pd_y
-; CHECK: vfmsub213pd %ymm2, %ymm1, %ymm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmsub_pd_y
-; CHECK_FMA4: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK_FMA4: ret
-define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
- %x = fmul <4 x double> %a0, %a1
- %res = fsub <4 x double> %x, %a2
- ret <4 x double> %res
+;
+; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
+;
+
+define float @test_f32_fmsub(float %a0, float %a1, float %a2) {
+; FMA-LABEL: test_f32_fmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_f32_fmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_f32_fmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %x = fmul float %a0, %a1
+ %res = fsub float %x, %a2
+ ret float %res
+}
+
+define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+; FMA-LABEL: test_4f32_fmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_4f32_fmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_4f32_fmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %x = fmul <4 x float> %a0, %a1
+ %res = fsub <4 x float> %x, %a2
+ ret <4 x float> %res
}
-; CHECK: test_x86_fmsub_pd
-; CHECK: vfmsub213pd %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmsub_pd
-; CHECK_FMA4: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+; FMA-LABEL: test_8f32_fmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_8f32_fmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_8f32_fmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %x = fmul <8 x float> %a0, %a1
+ %res = fsub <8 x float> %x, %a2
+ ret <8 x float> %res
+}
+
+define double @test_f64_fmsub(double %a0, double %a1, double %a2) {
+; FMA-LABEL: test_f64_fmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_f64_fmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_f64_fmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %x = fmul double %a0, %a1
+ %res = fsub double %x, %a2
+ ret double %res
+}
+
+define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+; FMA-LABEL: test_2f64_fmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_2f64_fmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_2f64_fmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
%x = fmul <2 x double> %a0, %a1
%res = fsub <2 x double> %x, %a2
ret <2 x double> %res
}
-; CHECK: test_x86_fnmadd_ss
-; CHECK: vfnmadd213ss %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fnmadd_ss
-; CHECK_FMA4: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) {
+define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+; FMA-LABEL: test_4f64_fmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_4f64_fmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_4f64_fmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %x = fmul <4 x double> %a0, %a1
+ %res = fsub <4 x double> %x, %a2
+ ret <4 x double> %res
+}
+
+;
+; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
+;
+
+define float @test_f32_fnmadd(float %a0, float %a1, float %a2) {
+; FMA-LABEL: test_f32_fnmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_f32_fnmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_f32_fnmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: retq
%x = fmul float %a0, %a1
%res = fsub float %a2, %x
ret float %res
}
-; CHECK: test_x86_fnmadd_sd
-; CHECK: vfnmadd213sd %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fnmadd_sd
-; CHECK_FMA4: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) {
+define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+; FMA-LABEL: test_4f32_fnmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_4f32_fnmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_4f32_fnmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %x = fmul <4 x float> %a0, %a1
+ %res = fsub <4 x float> %a2, %x
+ ret <4 x float> %res
+}
+
+define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+; FMA-LABEL: test_8f32_fnmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_8f32_fnmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_8f32_fnmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %x = fmul <8 x float> %a0, %a1
+ %res = fsub <8 x float> %a2, %x
+ ret <8 x float> %res
+}
+
+define double @test_f64_fnmadd(double %a0, double %a1, double %a2) {
+; FMA-LABEL: test_f64_fnmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_f64_fnmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_f64_fnmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: retq
%x = fmul double %a0, %a1
%res = fsub double %a2, %x
ret double %res
}
-; CHECK: test_x86_fmsub_sd
-; CHECK: vfmsub213sd %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmsub_sd
-; CHECK_FMA4: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) {
- %x = fmul double %a0, %a1
- %res = fsub double %x, %a2
- ret double %res
+define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+; FMA-LABEL: test_2f64_fnmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_2f64_fnmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_2f64_fnmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %x = fmul <2 x double> %a0, %a1
+ %res = fsub <2 x double> %a2, %x
+ ret <2 x double> %res
}
-; CHECK: test_x86_fnmsub_ss
-; CHECK: vfnmsub213ss %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fnmsub_ss
-; CHECK_FMA4: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
- %x = fsub float -0.000000e+00, %a0
- %y = fmul float %x, %a1
+define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+; FMA-LABEL: test_4f64_fnmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_4f64_fnmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_4f64_fnmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %x = fmul <4 x double> %a0, %a1
+ %res = fsub <4 x double> %a2, %x
+ ret <4 x double> %res
+}
+
+;
+; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
+;
+
+define float @test_f32_fnmsub(float %a0, float %a1, float %a2) {
+; FMA-LABEL: test_f32_fnmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_f32_fnmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_f32_fnmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %x = fmul float %a0, %a1
+ %y = fsub float -0.000000e+00, %x
%res = fsub float %y, %a2
ret float %res
}
-; CHECK: test_x86_fmadd_ps_load
-; CHECK: vmovaps (%rdi), %xmm2
-; CHECK: vfmadd213ps %xmm1, %xmm2, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmadd_ps_load
-; CHECK_FMA4: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
- %x = load <4 x float>, <4 x float>* %a0
- %y = fmul <4 x float> %x, %a1
- %res = fadd <4 x float> %y, %a2
+define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+; FMA-LABEL: test_4f32_fnmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_4f32_fnmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_4f32_fnmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %x = fmul <4 x float> %a0, %a1
+ %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+ %res = fsub <4 x float> %y, %a2
ret <4 x float> %res
}
-; CHECK: test_x86_fmsub_ps_load
-; CHECK: vmovaps (%rdi), %xmm2
-; CHECK: fmsub213ps %xmm1, %xmm2, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmsub_ps_load
-; CHECK_FMA4: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
+define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+; FMA-LABEL: test_8f32_fnmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_8f32_fnmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_8f32_fnmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %x = fmul <8 x float> %a0, %a1
+ %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+ %res = fsub <8 x float> %y, %a2
+ ret <8 x float> %res
+}
+
+define double @test_f64_fnmsub(double %a0, double %a1, double %a2) {
+; FMA-LABEL: test_f64_fnmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_f64_fnmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_f64_fnmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %x = fmul double %a0, %a1
+ %y = fsub double -0.000000e+00, %x
+ %res = fsub double %y, %a2
+ ret double %res
+}
+
+define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+; FMA-LABEL: test_2f64_fnmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_2f64_fnmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_2f64_fnmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %x = fmul <2 x double> %a0, %a1
+ %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x
+ %res = fsub <2 x double> %y, %a2
+ ret <2 x double> %res
+}
+
+define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+; FMA-LABEL: test_4f64_fnmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_4f64_fnmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_4f64_fnmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %x = fmul <4 x double> %a0, %a1
+ %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
+ %res = fsub <4 x double> %y, %a2
+ ret <4 x double> %res
+}
+
+;
+; Load Folding Patterns
+;
+
+define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
+; FMA-LABEL: test_4f32_fmadd_load:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd132ps (%rdi), %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_4f32_fmadd_load:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_4f32_fmadd_load:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps (%rdi), %xmm2
+; AVX512-NEXT: vfmadd213ps %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vmovaps %zmm2, %zmm0
+; AVX512-NEXT: retq
%x = load <4 x float>, <4 x float>* %a0
%y = fmul <4 x float> %x, %a1
- %res = fsub <4 x float> %y, %a2
+ %res = fadd <4 x float> %y, %a2
ret <4 x float> %res
}
+define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) {
+; FMA-LABEL: test_2f64_fmsub_load:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub132pd (%rdi), %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_2f64_fmsub_load:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_2f64_fmsub_load:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovapd (%rdi), %xmm2
+; AVX512-NEXT: vfmsub213pd %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vmovaps %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %x = load <2 x double>, <2 x double>* %a0
+ %y = fmul <2 x double> %x, %a1
+ %res = fsub <2 x double> %y, %a2
+ ret <2 x double> %res
+}
+
+;
+; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
+;
+
+define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_add_x_one_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_mul_add_x_one_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_mul_add_x_one_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
+ %m = fmul <4 x float> %a, %y
+ ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_y_add_x_one:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_mul_y_add_x_one:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_mul_y_add_x_one:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
+ %m = fmul <4 x float> %y, %a
+ ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_add_x_negone_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_mul_add_x_negone_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_mul_add_x_negone_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
+ %m = fmul <4 x float> %a, %y
+ ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_y_add_x_negone:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_mul_y_add_x_negone:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_mul_y_add_x_negone:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
+ %m = fmul <4 x float> %y, %a
+ ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_sub_one_x_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_mul_sub_one_x_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_mul_sub_one_x_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
+ %m = fmul <4 x float> %s, %y
+ ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_y_sub_one_x:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_mul_y_sub_one_x:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_mul_y_sub_one_x:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
+ %m = fmul <4 x float> %y, %s
+ ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_sub_negone_x_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_mul_sub_negone_x_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_mul_sub_negone_x_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
+ %m = fmul <4 x float> %s, %y
+ ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_y_sub_negone_x:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
+ %m = fmul <4 x float> %y, %s
+ ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_sub_x_one_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_mul_sub_x_one_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_mul_sub_x_one_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
+ %m = fmul <4 x float> %s, %y
+ ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_y_sub_x_one:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_mul_y_sub_x_one:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_mul_y_sub_x_one:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
+ %m = fmul <4 x float> %y, %s
+ ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_sub_x_negone_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_mul_sub_x_negone_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_mul_sub_x_negone_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
+ %m = fmul <4 x float> %s, %y
+ ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_y_sub_x_negone:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
+ %m = fmul <4 x float> %y, %s
+ ret <4 x float> %m
+}
+
+;
+; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
+;
+
+define float @test_f32_interp(float %x, float %y, float %t) {
+; FMA-LABEL: test_f32_interp:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1
+; FMA-NEXT: vfmadd213ss %xmm1, %xmm2, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_f32_interp:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1
+; FMA4-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_f32_interp:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213ss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vfmadd213ss %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vmovaps %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %t1 = fsub float 1.0, %t
+ %tx = fmul float %x, %t
+ %ty = fmul float %y, %t1
+ %r = fadd float %tx, %ty
+ ret float %r
+}
+
+define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
+; FMA-LABEL: test_v4f32_interp:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213ps %xmm1, %xmm2, %xmm1
+; FMA-NEXT: vfmadd213ps %xmm1, %xmm2, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_interp:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1
+; FMA4-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_interp:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps %zmm2, %zmm3
+; AVX512-NEXT: vfnmadd213ps %xmm1, %xmm1, %xmm3
+; AVX512-NEXT: vfmadd213ps %xmm3, %xmm2, %xmm0
+; AVX512-NEXT: retq
+ %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
+ %tx = fmul <4 x float> %x, %t
+ %ty = fmul <4 x float> %y, %t1
+ %r = fadd <4 x float> %tx, %ty
+ ret <4 x float> %r
+}
+
+define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
+; FMA-LABEL: test_v8f32_interp:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213ps %ymm1, %ymm2, %ymm1
+; FMA-NEXT: vfmadd213ps %ymm1, %ymm2, %ymm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v8f32_interp:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1
+; FMA4-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v8f32_interp:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps %zmm2, %zmm3
+; AVX512-NEXT: vfnmadd213ps %ymm1, %ymm1, %ymm3
+; AVX512-NEXT: vfmadd213ps %ymm3, %ymm2, %ymm0
+; AVX512-NEXT: retq
+ %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
+ %tx = fmul <8 x float> %x, %t
+ %ty = fmul <8 x float> %y, %t1
+ %r = fadd <8 x float> %tx, %ty
+ ret <8 x float> %r
+}
+
+define double @test_f64_interp(double %x, double %y, double %t) {
+; FMA-LABEL: test_f64_interp:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1
+; FMA-NEXT: vfmadd213sd %xmm1, %xmm2, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_f64_interp:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1
+; FMA4-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_f64_interp:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213sd %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vfmadd213sd %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vmovaps %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %t1 = fsub double 1.0, %t
+ %tx = fmul double %x, %t
+ %ty = fmul double %y, %t1
+ %r = fadd double %tx, %ty
+ ret double %r
+}
+
+define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
+; FMA-LABEL: test_v2f64_interp:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213pd %xmm1, %xmm2, %xmm1
+; FMA-NEXT: vfmadd213pd %xmm1, %xmm2, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v2f64_interp:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1
+; FMA4-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v2f64_interp:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps %zmm2, %zmm3
+; AVX512-NEXT: vfnmadd213pd %xmm1, %xmm1, %xmm3
+; AVX512-NEXT: vfmadd213pd %xmm3, %xmm2, %xmm0
+; AVX512-NEXT: retq
+ %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
+ %tx = fmul <2 x double> %x, %t
+ %ty = fmul <2 x double> %y, %t1
+ %r = fadd <2 x double> %tx, %ty
+ ret <2 x double> %r
+}
+
+define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
+; FMA-LABEL: test_v4f64_interp:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213pd %ymm1, %ymm2, %ymm1
+; FMA-NEXT: vfmadd213pd %ymm1, %ymm2, %ymm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f64_interp:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1
+; FMA4-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f64_interp:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps %zmm2, %zmm3
+; AVX512-NEXT: vfnmadd213pd %ymm1, %ymm1, %ymm3
+; AVX512-NEXT: vfmadd213pd %ymm3, %ymm2, %ymm0
+; AVX512-NEXT: retq
+ %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
+ %tx = fmul <4 x double> %x, %t
+ %ty = fmul <4 x double> %y, %t1
+ %r = fadd <4 x double> %tx, %ty
+ ret <4 x double> %r
+}
+
+;
+; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
+;
+
+define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; FMA-LABEL: test_v4f32_fneg_fmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_fneg_fmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_fneg_fmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %mul = fmul <4 x float> %a0, %a1
+ %add = fadd <4 x float> %mul, %a2
+ %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+ ret <4 x float> %neg
+}
+
+define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; FMA-LABEL: test_v4f64_fneg_fmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f64_fneg_fmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f64_fneg_fmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %mul = fmul <4 x double> %a0, %a1
+ %sub = fsub <4 x double> %mul, %a2
+ %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+ ret <4 x double> %neg
+}
+
+define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; FMA-LABEL: test_v4f32_fneg_fnmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_fneg_fnmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_fneg_fnmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %mul = fmul <4 x float> %a0, %a1
+ %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
+ %add = fadd <4 x float> %neg0, %a2
+ %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+ ret <4 x float> %neg1
+}
+
+define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; FMA-LABEL: test_v4f64_fneg_fnmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f64_fneg_fnmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f64_fneg_fnmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %mul = fmul <4 x double> %a0, %a1
+ %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
+ %sub = fsub <4 x double> %neg0, %a2
+ %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+ ret <4 x double> %neg1
+}
+
+;
+; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+;
+
+define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
+; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
+; FMA: # BB#0:
+; FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
+; FMA4: # BB#0:
+; FMA4-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT: retq
+ %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
+ %a = fadd <4 x float> %m0, %m1
+ ret <4 x float> %a
+}
+
+;
+; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+;
+
+define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 {
+; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd231ps {{.*}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+ %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
+ %a = fadd <4 x float> %m1, %y
+ ret <4 x float> %a
+}
+
+; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
+
+define double @test_f64_fneg_fmul(double %x, double %y) #0 {
+; FMA-LABEL: test_f64_fneg_fmul:
+; FMA: # BB#0:
+; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_f64_fneg_fmul:
+; FMA4: # BB#0:
+; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_f64_fneg_fmul:
+; AVX512: # BB#0:
+; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %m = fmul nsz double %x, %y
+ %n = fsub double -0.0, %m
+ ret double %n
+}
+
+define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
+; FMA-LABEL: test_v4f32_fneg_fmul:
+; FMA: # BB#0:
+; FMA-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f32_fneg_fmul:
+; FMA4: # BB#0:
+; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f32_fneg_fmul:
+; AVX512: # BB#0:
+; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %m = fmul nsz <4 x float> %x, %y
+ %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m
+ ret <4 x float> %n
+}
+
+define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
+; FMA-LABEL: test_v4f64_fneg_fmul:
+; FMA: # BB#0:
+; FMA-NEXT: vxorpd %ymm2, %ymm2, %ymm2
+; FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v4f64_fneg_fmul:
+; FMA4: # BB#0:
+; FMA4-NEXT: vxorpd %ymm2, %ymm2, %ymm2
+; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v4f64_fneg_fmul:
+; AVX512: # BB#0:
+; AVX512-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %m = fmul nsz <4 x double> %x, %y
+ %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
+ ret <4 x double> %n
+}
+
+define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 {
+; ALL-LABEL: test_v4f64_fneg_fmul_no_nsz:
+; ALL: # BB#0:
+; ALL-NEXT: vmulpd %ymm1, %ymm0, %ymm0
+; ALL-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0
+; ALL-NEXT: retq
+ %m = fmul <4 x double> %x, %y
+ %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
+ ret <4 x double> %n
+}
+
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/fma_patterns_wide.ll b/test/CodeGen/X86/fma_patterns_wide.ll
index 04db2d76cd8c..7b6509ad51c7 100644
--- a/test/CodeGen/X86/fma_patterns_wide.ll
+++ b/test/CodeGen/X86/fma_patterns_wide.ll
@@ -1,84 +1,821 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 -fp-contract=fast | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4
-
-; CHECK-LABEL: test_x86_fmadd_ps_y_wide
-; CHECK: vfmadd213ps
-; CHECK: vfmadd213ps
-; CHECK: ret
-; CHECK_FMA4-LABEL: test_x86_fmadd_ps_y_wide
-; CHECK_FMA4: vfmaddps
-; CHECK_FMA4: vfmaddps
-; CHECK_FMA4: ret
-define <16 x float> @test_x86_fmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=FMA4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast | FileCheck %s --check-prefix=AVX512
+
+;
+; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
+;
+
+define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; FMA-LABEL: test_16f32_fmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfmadd213ps %ymm5, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_16f32_fmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_16f32_fmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%res = fadd <16 x float> %x, %a2
ret <16 x float> %res
}
-; CHECK-LABEL: test_x86_fmsub_ps_y_wide
-; CHECK: vfmsub213ps
-; CHECK: vfmsub213ps
-; CHECK: ret
-; CHECK_FMA4-LABEL: test_x86_fmsub_ps_y_wide
-; CHECK_FMA4: vfmsubps
-; CHECK_FMA4: vfmsubps
-; CHECK_FMA4: ret
-define <16 x float> @test_x86_fmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+; FMA-LABEL: test_8f64_fmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfmadd213pd %ymm5, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_8f64_fmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_8f64_fmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %x = fmul <8 x double> %a0, %a1
+ %res = fadd <8 x double> %x, %a2
+ ret <8 x double> %res
+}
+
+;
+; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
+;
+
+define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; FMA-LABEL: test_16f32_fmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfmsub213ps %ymm5, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_16f32_fmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_16f32_fmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%res = fsub <16 x float> %x, %a2
ret <16 x float> %res
}
-; CHECK-LABEL: test_x86_fnmadd_ps_y_wide
-; CHECK: vfnmadd213ps
-; CHECK: vfnmadd213ps
-; CHECK: ret
-; CHECK_FMA4-LABEL: test_x86_fnmadd_ps_y_wide
-; CHECK_FMA4: vfnmaddps
-; CHECK_FMA4: vfnmaddps
-; CHECK_FMA4: ret
-define <16 x float> @test_x86_fnmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+; FMA-LABEL: test_8f64_fmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfmsub213pd %ymm5, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_8f64_fmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmsubpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_8f64_fmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %x = fmul <8 x double> %a0, %a1
+ %res = fsub <8 x double> %x, %a2
+ ret <8 x double> %res
+}
+
+;
+; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
+;
+
+define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; FMA-LABEL: test_16f32_fnmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfnmadd213ps %ymm5, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_16f32_fnmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_16f32_fnmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%res = fsub <16 x float> %a2, %x
ret <16 x float> %res
}
-; CHECK-LABEL: test_x86_fnmsub_ps_y_wide
-; CHECK: vfnmsub213ps
-; CHECK: vfnmsub213ps
-; CHECK: ret
-define <16 x float> @test_x86_fnmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+; FMA-LABEL: test_8f64_fnmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfnmadd213pd %ymm5, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_8f64_fnmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_8f64_fnmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %x = fmul <8 x double> %a0, %a1
+ %res = fsub <8 x double> %a2, %x
+ ret <8 x double> %res
+}
+
+;
+; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
+;
+
+define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; FMA-LABEL: test_16f32_fnmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfnmsub213ps %ymm5, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_16f32_fnmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_16f32_fnmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
%x = fmul <16 x float> %a0, %a1
%y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
%res = fsub <16 x float> %y, %a2
ret <16 x float> %res
}
-; CHECK-LABEL: test_x86_fmadd_pd_y_wide
-; CHECK: vfmadd213pd
-; CHECK: vfmadd213pd
-; CHECK: ret
-; CHECK_FMA4-LABEL: test_x86_fmadd_pd_y_wide
-; CHECK_FMA4: vfmaddpd
-; CHECK_FMA4: vfmaddpd
-; CHECK_FMA4: ret
-define <8 x double> @test_x86_fmadd_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+; FMA-LABEL: test_8f64_fnmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfnmsub213pd %ymm5, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_8f64_fnmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_8f64_fnmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
%x = fmul <8 x double> %a0, %a1
- %res = fadd <8 x double> %x, %a2
+ %y = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
+ %res = fsub <8 x double> %y, %a2
ret <8 x double> %res
}
-; CHECK-LABEL: test_x86_fmsub_pd_y_wide
-; CHECK: vfmsub213pd
-; CHECK: vfmsub213pd
-; CHECK: ret
-; CHECK_FMA4-LABEL: test_x86_fmsub_pd_y_wide
-; CHECK_FMA4: vfmsubpd
-; CHECK_FMA4: vfmsubpd
-; CHECK_FMA4: ret
-define <8 x double> @test_x86_fmsub_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
- %x = fmul <8 x double> %a0, %a1
- %res = fsub <8 x double> %x, %a2
+;
+; Load Folding Patterns
+;
+
+define <16 x float> @test_16f32_fmadd_load(<16 x float>* %a0, <16 x float> %a1, <16 x float> %a2) {
+; FMA-LABEL: test_16f32_fmadd_load:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd132ps (%rdi), %ymm2, %ymm0
+; FMA-NEXT: vfmadd132ps 32(%rdi), %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_16f32_fmadd_load:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddps %ymm2, (%rdi), %ymm0, %ymm0
+; FMA4-NEXT: vfmaddps %ymm3, 32(%rdi), %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_16f32_fmadd_load:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps (%rdi), %zmm2
+; AVX512-NEXT: vfmadd213ps %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vmovaps %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %x = load <16 x float>, <16 x float>* %a0
+ %y = fmul <16 x float> %x, %a1
+ %res = fadd <16 x float> %y, %a2
+ ret <16 x float> %res
+}
+
+define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <8 x double> %a2) {
+; FMA-LABEL: test_8f64_fmsub_load:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub132pd (%rdi), %ymm2, %ymm0
+; FMA-NEXT: vfmsub132pd 32(%rdi), %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_8f64_fmsub_load:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubpd %ymm2, (%rdi), %ymm0, %ymm0
+; FMA4-NEXT: vfmsubpd %ymm3, 32(%rdi), %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_8f64_fmsub_load:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovapd (%rdi), %zmm2
+; AVX512-NEXT: vfmsub213pd %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vmovaps %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %x = load <8 x double>, <8 x double>* %a0
+ %y = fmul <8 x double> %x, %a1
+ %res = fsub <8 x double> %y, %a2
ret <8 x double> %res
}
+
+;
+; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
+;
+
+define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) {
+; FMA-LABEL: test_v16f32_mul_add_x_one_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0
+; FMA-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v16f32_mul_add_x_one_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v16f32_mul_add_x_one_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %a = fadd <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
+ %m = fmul <16 x float> %a, %y
+ ret <16 x float> %m
+}
+
+define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) {
+; FMA-LABEL: test_v8f64_mul_y_add_x_one:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0
+; FMA-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v8f64_mul_y_add_x_one:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v8f64_mul_y_add_x_one:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %a = fadd <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
+ %m = fmul <8 x double> %y, %a
+ ret <8 x double> %m
+}
+
+define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) {
+; FMA-LABEL: test_v16f32_mul_add_x_negone_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0
+; FMA-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v16f32_mul_add_x_negone_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v16f32_mul_add_x_negone_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %a = fadd <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
+ %m = fmul <16 x float> %a, %y
+ ret <16 x float> %m
+}
+
+define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) {
+; FMA-LABEL: test_v8f64_mul_y_add_x_negone:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0
+; FMA-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v8f64_mul_y_add_x_negone:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v8f64_mul_y_add_x_negone:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %a = fadd <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
+ %m = fmul <8 x double> %y, %a
+ ret <8 x double> %m
+}
+
+define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) {
+; FMA-LABEL: test_v16f32_mul_sub_one_x_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213ps %ymm2, %ymm2, %ymm0
+; FMA-NEXT: vfnmadd213ps %ymm3, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v16f32_mul_sub_one_x_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v16f32_mul_sub_one_x_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213ps %zmm1, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %s = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
+ %m = fmul <16 x float> %s, %y
+ ret <16 x float> %m
+}
+
+define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) {
+; FMA-LABEL: test_v8f64_mul_y_sub_one_x:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213pd %ymm2, %ymm2, %ymm0
+; FMA-NEXT: vfnmadd213pd %ymm3, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v8f64_mul_y_sub_one_x:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v8f64_mul_y_sub_one_x:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213pd %zmm1, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %s = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x
+ %m = fmul <8 x double> %y, %s
+ ret <8 x double> %m
+}
+
+define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) {
+; FMA-LABEL: test_v16f32_mul_sub_negone_x_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213ps %ymm2, %ymm2, %ymm0
+; FMA-NEXT: vfnmsub213ps %ymm3, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v16f32_mul_sub_negone_x_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v16f32_mul_sub_negone_x_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213ps %zmm1, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %s = fsub <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x
+ %m = fmul <16 x float> %s, %y
+ ret <16 x float> %m
+}
+
+define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) {
+; FMA-LABEL: test_v8f64_mul_y_sub_negone_x:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213pd %ymm2, %ymm2, %ymm0
+; FMA-NEXT: vfnmsub213pd %ymm3, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v8f64_mul_y_sub_negone_x:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v8f64_mul_y_sub_negone_x:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213pd %zmm1, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x
+ %m = fmul <8 x double> %y, %s
+ ret <8 x double> %m
+}
+
+define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
+; FMA-LABEL: test_v16f32_mul_sub_x_one_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0
+; FMA-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v16f32_mul_sub_x_one_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v16f32_mul_sub_x_one_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
+ %m = fmul <16 x float> %s, %y
+ ret <16 x float> %m
+}
+
+define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
+; FMA-LABEL: test_v8f64_mul_y_sub_x_one:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0
+; FMA-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v8f64_mul_y_sub_x_one:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v8f64_mul_y_sub_x_one:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
+ %m = fmul <8 x double> %y, %s
+ ret <8 x double> %m
+}
+
+define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
+; FMA-LABEL: test_v16f32_mul_sub_x_negone_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0
+; FMA-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v16f32_mul_sub_x_negone_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v16f32_mul_sub_x_negone_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
+ %m = fmul <16 x float> %s, %y
+ ret <16 x float> %m
+}
+
+define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
+; FMA-LABEL: test_v8f64_mul_y_sub_x_negone:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0
+; FMA-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v8f64_mul_y_sub_x_negone:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v8f64_mul_y_sub_x_negone:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %s = fsub <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
+ %m = fmul <8 x double> %y, %s
+ ret <8 x double> %m
+}
+
+;
+; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
+;
+
+define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) {
+; FMA-LABEL: test_v16f32_interp:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213ps %ymm3, %ymm5, %ymm3
+; FMA-NEXT: vfnmadd213ps %ymm2, %ymm4, %ymm2
+; FMA-NEXT: vfmadd213ps %ymm2, %ymm4, %ymm0
+; FMA-NEXT: vfmadd213ps %ymm3, %ymm5, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v16f32_interp:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddps %ymm3, %ymm3, %ymm5, %ymm3
+; FMA4-NEXT: vfnmaddps %ymm2, %ymm2, %ymm4, %ymm2
+; FMA4-NEXT: vfmaddps %ymm2, %ymm4, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddps %ymm3, %ymm5, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v16f32_interp:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps %zmm2, %zmm3
+; AVX512-NEXT: vfnmadd213ps %zmm1, %zmm1, %zmm3
+; AVX512-NEXT: vfmadd213ps %zmm3, %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
+ %tx = fmul <16 x float> %x, %t
+ %ty = fmul <16 x float> %y, %t1
+ %r = fadd <16 x float> %tx, %ty
+ ret <16 x float> %r
+}
+
+define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) {
+; FMA-LABEL: test_v8f64_interp:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213pd %ymm3, %ymm5, %ymm3
+; FMA-NEXT: vfnmadd213pd %ymm2, %ymm4, %ymm2
+; FMA-NEXT: vfmadd213pd %ymm2, %ymm4, %ymm0
+; FMA-NEXT: vfmadd213pd %ymm3, %ymm5, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v8f64_interp:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm5, %ymm3
+; FMA4-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm4, %ymm2
+; FMA4-NEXT: vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v8f64_interp:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovaps %zmm2, %zmm3
+; AVX512-NEXT: vfnmadd213pd %zmm1, %zmm1, %zmm3
+; AVX512-NEXT: vfmadd213pd %zmm3, %zmm2, %zmm0
+; AVX512-NEXT: retq
+ %t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
+ %tx = fmul <8 x double> %x, %t
+ %ty = fmul <8 x double> %y, %t1
+ %r = fadd <8 x double> %tx, %ty
+ ret <8 x double> %r
+}
+
+;
+; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
+;
+
+define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
+; FMA-LABEL: test_v16f32_fneg_fmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfnmsub213ps %ymm5, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v16f32_fneg_fmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v16f32_fneg_fmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %mul = fmul <16 x float> %a0, %a1
+ %add = fadd <16 x float> %mul, %a2
+ %neg = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
+ ret <16 x float> %neg
+}
+
+define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
+; FMA-LABEL: test_v8f64_fneg_fmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfnmadd213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfnmadd213pd %ymm5, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v8f64_fneg_fmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v8f64_fneg_fmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %mul = fmul <8 x double> %a0, %a1
+ %sub = fsub <8 x double> %mul, %a2
+ %neg = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+ ret <8 x double> %neg
+}
+
+define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
+; FMA-LABEL: test_v16f32_fneg_fnmadd:
+; FMA: # BB#0:
+; FMA-NEXT: vfmsub213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfmsub213ps %ymm5, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v16f32_fneg_fnmadd:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v16f32_fneg_fnmadd:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %mul = fmul <16 x float> %a0, %a1
+ %neg0 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul
+ %add = fadd <16 x float> %neg0, %a2
+ %neg1 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
+ ret <16 x float> %neg1
+}
+
+define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
+; FMA-LABEL: test_v8f64_fneg_fnmsub:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfmadd213pd %ymm5, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v8f64_fneg_fnmsub:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v8f64_fneg_fnmsub:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %mul = fmul <8 x double> %a0, %a1
+ %neg0 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul
+ %sub = fsub <8 x double> %neg0, %a2
+ %neg1 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+ ret <8 x double> %neg1
+}
+
+;
+; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+;
+
+define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 {
+; FMA-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
+; FMA: # BB#0:
+; FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
+; FMA4: # BB#0:
+; FMA4-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; FMA4-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: retq
+ %m0 = fmul <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0>
+ %m1 = fmul <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
+ %a = fadd <16 x float> %m0, %m1
+ ret <16 x float> %a
+}
+
+;
+; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+;
+
+define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float> %y) #0 {
+; FMA-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
+; FMA: # BB#0:
+; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %ymm2, %ymm0
+; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
+; FMA4: # BB#0:
+; FMA4-NEXT: vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0
+; FMA4-NEXT: vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
+; AVX512: # BB#0:
+; AVX512-NEXT: vfmadd231ps {{.*}}(%rip), %zmm0, %zmm1
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %m0 = fmul <16 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
+ %m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
+ %a = fadd <16 x float> %m1, %y
+ ret <16 x float> %a
+}
+
+; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
+
+define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 {
+; FMA-LABEL: test_v16f32_fneg_fmul:
+; FMA: # BB#0:
+; FMA-NEXT: vxorps %ymm4, %ymm4, %ymm4
+; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfnmsub213ps %ymm4, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v16f32_fneg_fmul:
+; FMA4: # BB#0:
+; FMA4-NEXT: vxorps %ymm4, %ymm4, %ymm4
+; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubps %ymm4, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v16f32_fneg_fmul:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %m = fmul nsz <16 x float> %x, %y
+ %n = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %m
+ ret <16 x float> %n
+}
+
+define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 {
+; FMA-LABEL: test_v8f64_fneg_fmul:
+; FMA: # BB#0:
+; FMA-NEXT: vxorpd %ymm4, %ymm4, %ymm4
+; FMA-NEXT: vfnmsub213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT: vfnmsub213pd %ymm4, %ymm3, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v8f64_fneg_fmul:
+; FMA4: # BB#0:
+; FMA4-NEXT: vxorpd %ymm4, %ymm4, %ymm4
+; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vfnmsubpd %ymm4, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v8f64_fneg_fmul:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; AVX512-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %m = fmul nsz <8 x double> %x, %y
+ %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m
+ ret <8 x double> %n
+}
+
+define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %y) #0 {
+; FMA-LABEL: test_v8f64_fneg_fmul_no_nsz:
+; FMA: # BB#0:
+; FMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1
+; FMA-NEXT: vmulpd %ymm2, %ymm0, %ymm0
+; FMA-NEXT: vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; FMA-NEXT: vxorpd %ymm2, %ymm0, %ymm0
+; FMA-NEXT: vxorpd %ymm2, %ymm1, %ymm1
+; FMA-NEXT: retq
+;
+; FMA4-LABEL: test_v8f64_fneg_fmul_no_nsz:
+; FMA4: # BB#0:
+; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1
+; FMA4-NEXT: vmulpd %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; FMA4-NEXT: vxorpd %ymm2, %ymm0, %ymm0
+; FMA4-NEXT: vxorpd %ymm2, %ymm1, %ymm1
+; FMA4-NEXT: retq
+;
+; AVX512-LABEL: test_v8f64_fneg_fmul_no_nsz:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vxorpd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: retq
+ %m = fmul <8 x double> %x, %y
+ %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m
+ ret <8 x double> %n
+}
+
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/fmaxnum.ll b/test/CodeGen/X86/fmaxnum.ll
index 23678c46dba0..ebfbd064572a 100644
--- a/test/CodeGen/X86/fmaxnum.ll
+++ b/test/CodeGen/X86/fmaxnum.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=x86 -mtriple=i386-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
declare float @fmaxf(float, float)
declare double @fmax(double, double)
@@ -7,44 +8,232 @@ declare float @llvm.maxnum.f32(float, float)
declare double @llvm.maxnum.f64(double, double)
declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80)
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
+declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
+declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>)
+
+; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
+
; CHECK-LABEL: @test_fmaxf
-; CHECK: calll fmaxf
+; SSE: movaps %xmm0, %xmm2
+; SSE-NEXT: cmpunordss %xmm2, %xmm2
+; SSE-NEXT: movaps %xmm2, %xmm3
+; SSE-NEXT: andps %xmm1, %xmm3
+; SSE-NEXT: maxss %xmm0, %xmm1
+; SSE-NEXT: andnps %xmm1, %xmm2
+; SSE-NEXT: orps %xmm3, %xmm2
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vmaxss %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
define float @test_fmaxf(float %x, float %y) {
%z = call float @fmaxf(float %x, float %y) readnone
ret float %z
}
+; CHECK-LABEL: @test_fmaxf_minsize
+; CHECK: jmp fmaxf
+define float @test_fmaxf_minsize(float %x, float %y) minsize {
+ %z = call float @fmaxf(float %x, float %y) readnone
+ ret float %z
+}
+
+; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
+
; CHECK-LABEL: @test_fmax
-; CHECK: calll fmax
+; SSE: movapd %xmm0, %xmm2
+; SSE-NEXT: cmpunordsd %xmm2, %xmm2
+; SSE-NEXT: movapd %xmm2, %xmm3
+; SSE-NEXT: andpd %xmm1, %xmm3
+; SSE-NEXT: maxsd %xmm0, %xmm1
+; SSE-NEXT: andnpd %xmm1, %xmm2
+; SSE-NEXT: orpd %xmm3, %xmm2
+; SSE-NEXT: movapd %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vmaxsd %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
define double @test_fmax(double %x, double %y) {
%z = call double @fmax(double %x, double %y) readnone
ret double %z
}
; CHECK-LABEL: @test_fmaxl
-; CHECK: calll fmaxl
+; CHECK: callq fmaxl
define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) {
%z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone
ret x86_fp80 %z
}
; CHECK-LABEL: @test_intrinsic_fmaxf
-; CHECK: calll fmaxf
+; SSE: movaps %xmm0, %xmm2
+; SSE-NEXT: cmpunordss %xmm2, %xmm2
+; SSE-NEXT: movaps %xmm2, %xmm3
+; SSE-NEXT: andps %xmm1, %xmm3
+; SSE-NEXT: maxss %xmm0, %xmm1
+; SSE-NEXT: andnps %xmm1, %xmm2
+; SSE-NEXT: orps %xmm3, %xmm2
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vmaxss %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
define float @test_intrinsic_fmaxf(float %x, float %y) {
%z = call float @llvm.maxnum.f32(float %x, float %y) readnone
ret float %z
}
+
; CHECK-LABEL: @test_intrinsic_fmax
-; CHECK: calll fmax
+; SSE: movapd %xmm0, %xmm2
+; SSE-NEXT: cmpunordsd %xmm2, %xmm2
+; SSE-NEXT: movapd %xmm2, %xmm3
+; SSE-NEXT: andpd %xmm1, %xmm3
+; SSE-NEXT: maxsd %xmm0, %xmm1
+; SSE-NEXT: andnpd %xmm1, %xmm2
+; SSE-NEXT: orpd %xmm3, %xmm2
+; SSE-NEXT: movapd %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vmaxsd %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
define double @test_intrinsic_fmax(double %x, double %y) {
%z = call double @llvm.maxnum.f64(double %x, double %y) readnone
ret double %z
}
; CHECK-LABEL: @test_intrinsic_fmaxl
-; CHECK: calll fmaxl
+; CHECK: callq fmaxl
define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) {
%z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
ret x86_fp80 %z
}
+
+; CHECK-LABEL: @test_intrinsic_fmax_v2f32
+; SSE: movaps %xmm1, %xmm2
+; SSE-NEXT: maxps %xmm0, %xmm2
+; SSE-NEXT: cmpunordps %xmm0, %xmm0
+; SSE-NEXT: andps %xmm0, %xmm1
+; SSE-NEXT: andnps %xmm2, %xmm0
+; SSE-NEXT: orps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vmaxps %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
+define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) {
+ %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
+ ret <2 x float> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmax_v4f32
+; SSE: movaps %xmm1, %xmm2
+; SSE-NEXT: maxps %xmm0, %xmm2
+; SSE-NEXT: cmpunordps %xmm0, %xmm0
+; SSE-NEXT: andps %xmm0, %xmm1
+; SSE-NEXT: andnps %xmm2, %xmm0
+; SSE-NEXT: orps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vmaxps %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
+define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) {
+ %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
+ ret <4 x float> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmax_v2f64
+; SSE: movapd %xmm1, %xmm2
+; SSE-NEXT: maxpd %xmm0, %xmm2
+; SSE-NEXT: cmpunordpd %xmm0, %xmm0
+; SSE-NEXT: andpd %xmm0, %xmm1
+; SSE-NEXT: andnpd %xmm2, %xmm0
+; SSE-NEXT: orpd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vmaxpd %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
+define <2 x double> @test_intrinsic_fmax_v2f64(<2 x double> %x, <2 x double> %y) {
+ %z = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) readnone
+ ret <2 x double> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmax_v4f64
+; SSE: movapd %xmm2, %xmm4
+; SSE-NEXT: maxpd %xmm0, %xmm4
+; SSE-NEXT: cmpunordpd %xmm0, %xmm0
+; SSE-NEXT: andpd %xmm0, %xmm2
+; SSE-NEXT: andnpd %xmm4, %xmm0
+; SSE-NEXT: orpd %xmm2, %xmm0
+; SSE-NEXT: movapd %xmm3, %xmm2
+; SSE-NEXT: maxpd %xmm1, %xmm2
+; SSE-NEXT: cmpunordpd %xmm1, %xmm1
+; SSE-NEXT: andpd %xmm1, %xmm3
+; SSE-NEXT: andnpd %xmm2, %xmm1
+; SSE-NEXT: orpd %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX: vmaxpd %ymm0, %ymm1, %ymm2
+; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
+; AVX-NEXT: retq
+define <4 x double> @test_intrinsic_fmax_v4f64(<4 x double> %x, <4 x double> %y) {
+ %z = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone
+ ret <4 x double> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmax_v8f64
+; SSE: movapd %xmm4, %xmm8
+; SSE-NEXT: maxpd %xmm0, %xmm8
+; SSE-NEXT: cmpunordpd %xmm0, %xmm0
+; SSE-NEXT: andpd %xmm0, %xmm4
+; SSE-NEXT: andnpd %xmm8, %xmm0
+; SSE-NEXT: orpd %xmm4, %xmm0
+; SSE-NEXT: movapd %xmm5, %xmm4
+; SSE-NEXT: maxpd %xmm1, %xmm4
+; SSE-NEXT: cmpunordpd %xmm1, %xmm1
+; SSE-NEXT: andpd %xmm1, %xmm5
+; SSE-NEXT: andnpd %xmm4, %xmm1
+; SSE-NEXT: orpd %xmm5, %xmm1
+; SSE-NEXT: movapd %xmm6, %xmm4
+; SSE-NEXT: maxpd %xmm2, %xmm4
+; SSE-NEXT: cmpunordpd %xmm2, %xmm2
+; SSE-NEXT: andpd %xmm2, %xmm6
+; SSE-NEXT: andnpd %xmm4, %xmm2
+; SSE-NEXT: orpd %xmm6, %xmm2
+; SSE-NEXT: movapd %xmm7, %xmm4
+; SSE-NEXT: maxpd %xmm3, %xmm4
+; SSE-NEXT: cmpunordpd %xmm3, %xmm3
+; SSE-NEXT: andpd %xmm3, %xmm7
+; SSE-NEXT: andnpd %xmm4, %xmm3
+; SSE-NEXT: orpd %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX: vmaxpd %ymm0, %ymm2, %ymm4
+; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vblendvpd %ymm0, %ymm2, %ymm4, %ymm0
+; AVX-NEXT: vmaxpd %ymm1, %ymm3, %ymm2
+; AVX-NEXT: vcmpunordpd %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vblendvpd %ymm1, %ymm3, %ymm2, %ymm1
+; AVX-NEXT: retq
+define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y) {
+ %z = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %x, <8 x double> %y) readnone
+ ret <8 x double> %z
+}
+
diff --git a/test/CodeGen/X86/fminnum.ll b/test/CodeGen/X86/fminnum.ll
index 1e33cf4696af..afe8b804f267 100644
--- a/test/CodeGen/X86/fminnum.ll
+++ b/test/CodeGen/X86/fminnum.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=x86 -mtriple=i386-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
declare float @fminf(float, float)
declare double @fmin(double, double)
@@ -10,85 +11,219 @@ declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80)
declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
+declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>)
declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>)
+; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
+
; CHECK-LABEL: @test_fminf
-; CHECK: jmp fminf
+; SSE: movaps %xmm0, %xmm2
+; SSE-NEXT: cmpunordss %xmm2, %xmm2
+; SSE-NEXT: movaps %xmm2, %xmm3
+; SSE-NEXT: andps %xmm1, %xmm3
+; SSE-NEXT: minss %xmm0, %xmm1
+; SSE-NEXT: andnps %xmm1, %xmm2
+; SSE-NEXT: orps %xmm3, %xmm2
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vminss %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
define float @test_fminf(float %x, float %y) {
%z = call float @fminf(float %x, float %y) readnone
ret float %z
}
+; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
+
; CHECK-LABEL: @test_fmin
-; CHECK: jmp fmin
+; SSE: movapd %xmm0, %xmm2
+; SSE-NEXT: cmpunordsd %xmm2, %xmm2
+; SSE-NEXT: movapd %xmm2, %xmm3
+; SSE-NEXT: andpd %xmm1, %xmm3
+; SSE-NEXT: minsd %xmm0, %xmm1
+; SSE-NEXT: andnpd %xmm1, %xmm2
+; SSE-NEXT: orpd %xmm3, %xmm2
+; SSE-NEXT: movapd %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vminsd %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
define double @test_fmin(double %x, double %y) {
%z = call double @fmin(double %x, double %y) readnone
ret double %z
}
; CHECK-LABEL: @test_fminl
-; CHECK: calll fminl
+; CHECK: callq fminl
define x86_fp80 @test_fminl(x86_fp80 %x, x86_fp80 %y) {
%z = call x86_fp80 @fminl(x86_fp80 %x, x86_fp80 %y) readnone
ret x86_fp80 %z
}
; CHECK-LABEL: @test_intrinsic_fminf
-; CHECK: jmp fminf
+; SSE: movaps %xmm0, %xmm2
+; SSE-NEXT: cmpunordss %xmm2, %xmm2
+; SSE-NEXT: movaps %xmm2, %xmm3
+; SSE-NEXT: andps %xmm1, %xmm3
+; SSE-NEXT: minss %xmm0, %xmm1
+; SSE-NEXT: andnps %xmm1, %xmm2
+; SSE-NEXT: orps %xmm3, %xmm2
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vminss %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
define float @test_intrinsic_fminf(float %x, float %y) {
%z = call float @llvm.minnum.f32(float %x, float %y) readnone
ret float %z
}
; CHECK-LABEL: @test_intrinsic_fmin
-; CHECK: jmp fmin
+; SSE: movapd %xmm0, %xmm2
+; SSE-NEXT: cmpunordsd %xmm2, %xmm2
+; SSE-NEXT: movapd %xmm2, %xmm3
+; SSE-NEXT: andpd %xmm1, %xmm3
+; SSE-NEXT: minsd %xmm0, %xmm1
+; SSE-NEXT: andnpd %xmm1, %xmm2
+; SSE-NEXT: orpd %xmm3, %xmm2
+; SSE-NEXT: movapd %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vminsd %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
define double @test_intrinsic_fmin(double %x, double %y) {
%z = call double @llvm.minnum.f64(double %x, double %y) readnone
ret double %z
}
; CHECK-LABEL: @test_intrinsic_fminl
-; CHECK: calll fminl
+; CHECK: callq fminl
define x86_fp80 @test_intrinsic_fminl(x86_fp80 %x, x86_fp80 %y) {
%z = call x86_fp80 @llvm.minnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
ret x86_fp80 %z
}
; CHECK-LABEL: @test_intrinsic_fmin_v2f32
-; CHECK: calll fminf
-; CHECK: calll fminf
+; SSE: movaps %xmm1, %xmm2
+; SSE-NEXT: minps %xmm0, %xmm2
+; SSE-NEXT: cmpunordps %xmm0, %xmm0
+; SSE-NEXT: andps %xmm0, %xmm1
+; SSE-NEXT: andnps %xmm2, %xmm0
+; SSE-NEXT: orps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vminps %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
define <2 x float> @test_intrinsic_fmin_v2f32(<2 x float> %x, <2 x float> %y) {
%z = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
ret <2 x float> %z
}
; CHECK-LABEL: @test_intrinsic_fmin_v4f32
-; CHECK: calll fminf
-; CHECK: calll fminf
-; CHECK: calll fminf
-; CHECK: calll fminf
+; SSE: movaps %xmm1, %xmm2
+; SSE-NEXT: minps %xmm0, %xmm2
+; SSE-NEXT: cmpunordps %xmm0, %xmm0
+; SSE-NEXT: andps %xmm0, %xmm1
+; SSE-NEXT: andnps %xmm2, %xmm0
+; SSE-NEXT: orps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vminps %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
define <4 x float> @test_intrinsic_fmin_v4f32(<4 x float> %x, <4 x float> %y) {
%z = call <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
ret <4 x float> %z
}
; CHECK-LABEL: @test_intrinsic_fmin_v2f64
-; CHECK: calll fmin
-; CHECK: calll fmin
+; SSE: movapd %xmm1, %xmm2
+; SSE-NEXT: minpd %xmm0, %xmm2
+; SSE-NEXT: cmpunordpd %xmm0, %xmm0
+; SSE-NEXT: andpd %xmm0, %xmm1
+; SSE-NEXT: andnpd %xmm2, %xmm0
+; SSE-NEXT: orpd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX: vminpd %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: retq
define <2 x double> @test_intrinsic_fmin_v2f64(<2 x double> %x, <2 x double> %y) {
%z = call <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y) readnone
ret <2 x double> %z
}
+; CHECK-LABEL: @test_intrinsic_fmin_v4f64
+; SSE: movapd %xmm2, %xmm4
+; SSE-NEXT: minpd %xmm0, %xmm4
+; SSE-NEXT: cmpunordpd %xmm0, %xmm0
+; SSE-NEXT: andpd %xmm0, %xmm2
+; SSE-NEXT: andnpd %xmm4, %xmm0
+; SSE-NEXT: orpd %xmm2, %xmm0
+; SSE-NEXT: movapd %xmm3, %xmm2
+; SSE-NEXT: minpd %xmm1, %xmm2
+; SSE-NEXT: cmpunordpd %xmm1, %xmm1
+; SSE-NEXT: andpd %xmm1, %xmm3
+; SSE-NEXT: andnpd %xmm2, %xmm1
+; SSE-NEXT: orpd %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX: vminpd %ymm0, %ymm1, %ymm2
+; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
+; AVX-NEXT: retq
+define <4 x double> @test_intrinsic_fmin_v4f64(<4 x double> %x, <4 x double> %y) {
+ %z = call <4 x double> @llvm.minnum.v4f64(<4 x double> %x, <4 x double> %y) readnone
+ ret <4 x double> %z
+}
+
; CHECK-LABEL: @test_intrinsic_fmin_v8f64
-; CHECK: calll fmin
-; CHECK: calll fmin
-; CHECK: calll fmin
-; CHECK: calll fmin
-; CHECK: calll fmin
-; CHECK: calll fmin
-; CHECK: calll fmin
-; CHECK: calll fmin
+; SSE: movapd %xmm4, %xmm8
+; SSE-NEXT: minpd %xmm0, %xmm8
+; SSE-NEXT: cmpunordpd %xmm0, %xmm0
+; SSE-NEXT: andpd %xmm0, %xmm4
+; SSE-NEXT: andnpd %xmm8, %xmm0
+; SSE-NEXT: orpd %xmm4, %xmm0
+; SSE-NEXT: movapd %xmm5, %xmm4
+; SSE-NEXT: minpd %xmm1, %xmm4
+; SSE-NEXT: cmpunordpd %xmm1, %xmm1
+; SSE-NEXT: andpd %xmm1, %xmm5
+; SSE-NEXT: andnpd %xmm4, %xmm1
+; SSE-NEXT: orpd %xmm5, %xmm1
+; SSE-NEXT: movapd %xmm6, %xmm4
+; SSE-NEXT: minpd %xmm2, %xmm4
+; SSE-NEXT: cmpunordpd %xmm2, %xmm2
+; SSE-NEXT: andpd %xmm2, %xmm6
+; SSE-NEXT: andnpd %xmm4, %xmm2
+; SSE-NEXT: orpd %xmm6, %xmm2
+; SSE-NEXT: movapd %xmm7, %xmm4
+; SSE-NEXT: minpd %xmm3, %xmm4
+; SSE-NEXT: cmpunordpd %xmm3, %xmm3
+; SSE-NEXT: andpd %xmm3, %xmm7
+; SSE-NEXT: andnpd %xmm4, %xmm3
+; SSE-NEXT: orpd %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX: vminpd %ymm0, %ymm2, %ymm4
+; AVX-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0
+; AVX-NEXT: vblendvpd %ymm0, %ymm2, %ymm4, %ymm0
+; AVX-NEXT: vminpd %ymm1, %ymm3, %ymm2
+; AVX-NEXT: vcmpunordpd %ymm1, %ymm1, %ymm1
+; AVX-NEXT: vblendvpd %ymm1, %ymm3, %ymm2, %ymm1
+; AVX-NEXT: retq
define <8 x double> @test_intrinsic_fmin_v8f64(<8 x double> %x, <8 x double> %y) {
%z = call <8 x double> @llvm.minnum.v8f64(<8 x double> %x, <8 x double> %y) readnone
ret <8 x double> %z
diff --git a/test/CodeGen/X86/fmul-combines.ll b/test/CodeGen/X86/fmul-combines.ll
index 7d75611e1330..564ce42fdb75 100644
--- a/test/CodeGen/X86/fmul-combines.ll
+++ b/test/CodeGen/X86/fmul-combines.ll
@@ -56,10 +56,10 @@ define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 {
}
; We should be able to pre-multiply the two constant vectors.
-; CHECK: float 5.000000e+00
-; CHECK: float 1.200000e+01
-; CHECK: float 2.100000e+01
-; CHECK: float 3.200000e+01
+; CHECK: float 5
+; CHECK: float 12
+; CHECK: float 21
+; CHECK: float 32
; CHECK-LABEL: fmul_v4f32_two_consts_no_splat:
; CHECK: mulps
; CHECK-NOT: mulps
@@ -71,10 +71,10 @@ define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) #0 {
}
; Same as above, but reverse operands to make sure non-canonical form is also handled.
-; CHECK: float 5.000000e+00
-; CHECK: float 1.200000e+01
-; CHECK: float 2.100000e+01
-; CHECK: float 3.200000e+01
+; CHECK: float 5
+; CHECK: float 12
+; CHECK: float 21
+; CHECK: float 32
; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_non_canonical:
; CHECK: mulps
; CHECK-NOT: mulps
@@ -86,15 +86,13 @@ define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x)
}
; More than one use of a constant multiply should not inhibit the optimization.
-; Instead of a chain of 2 dependent mults, this test will have 2 independent mults.
-; CHECK: float 5.000000e+00
-; CHECK: float 1.200000e+01
-; CHECK: float 2.100000e+01
-; CHECK: float 3.200000e+01
+; Instead of a chain of 2 dependent mults, this test will have 2 independent mults.
+; CHECK: float 6
+; CHECK: float 14
+; CHECK: float 24
+; CHECK: float 36
; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use:
; CHECK: mulps
-; CHECK: mulps
-; CHECK: addps
; CHECK: ret
define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) #0 {
%y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
@@ -112,10 +110,10 @@ define <4 x float> @PR22698_splats(<4 x float> %a) #0 {
%mul3 = fmul fast <4 x float> %a, %mul2
ret <4 x float> %mul3
-; CHECK: float 2.400000e+01
-; CHECK: float 2.400000e+01
-; CHECK: float 2.400000e+01
-; CHECK: float 2.400000e+01
+; CHECK: float 24
+; CHECK: float 24
+; CHECK: float 24
+; CHECK: float 24
; CHECK-LABEL: PR22698_splats:
; CHECK: mulps
; CHECK: ret
@@ -128,10 +126,10 @@ define <4 x float> @PR22698_no_splats(<4 x float> %a) #0 {
%mul3 = fmul fast <4 x float> %a, %mul2
ret <4 x float> %mul3
-; CHECK: float 4.500000e+01
-; CHECK: float 1.200000e+02
-; CHECK: float 2.310000e+02
-; CHECK: float 3.840000e+02
+; CHECK: float 45
+; CHECK: float 120
+; CHECK: float 231
+; CHECK: float 384
; CHECK-LABEL: PR22698_no_splats:
; CHECK: mulps
; CHECK: ret
diff --git a/test/CodeGen/X86/fold-load-binops.ll b/test/CodeGen/X86/fold-load-binops.ll
index 6d501c74fe57..43966f60718b 100644
--- a/test/CodeGen/X86/fold-load-binops.ll
+++ b/test/CodeGen/X86/fold-load-binops.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
diff --git a/test/CodeGen/X86/fold-load-unops.ll b/test/CodeGen/X86/fold-load-unops.ll
index fcde0218158a..bedda3f297da 100644
--- a/test/CodeGen/X86/fold-load-unops.ll
+++ b/test/CodeGen/X86/fold-load-unops.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
diff --git a/test/CodeGen/X86/fold-push.ll b/test/CodeGen/X86/fold-push.ll
new file mode 100644
index 000000000000..eaf91351021f
--- /dev/null
+++ b/test/CodeGen/X86/fold-push.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL
+; RUN: llc < %s -mtriple=i686-windows -mattr=call-reg-indirect | FileCheck %s -check-prefix=CHECK -check-prefix=SLM
+
+declare void @foo(i32 %r)
+
+define void @test(i32 %a, i32 %b) optsize nounwind {
+; CHECK-LABEL: test:
+; CHECK: movl [[EAX:%e..]], (%esp)
+; CHECK-NEXT: pushl [[EAX]]
+; CHECK-NEXT: calll
+; CHECK-NEXT: addl $4, %esp
+; CHECK: nop
+; NORMAL: pushl (%esp)
+; SLM: movl (%esp), [[RELOAD:%e..]]
+; SLM-NEXT: pushl [[RELOAD]]
+; CHECK: calll
+; CHECK-NEXT: addl $4, %esp
+ %c = add i32 %a, %b
+ call void @foo(i32 %c)
+ call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
+ call void @foo(i32 %c)
+ ret void
+}
+
+define void @test_min(i32 %a, i32 %b) minsize nounwind {
+; CHECK-LABEL: test_min:
+; CHECK: movl [[EAX:%e..]], (%esp)
+; CHECK-NEXT: pushl [[EAX]]
+; CHECK-NEXT: calll
+; CHECK-NEXT: popl
+; CHECK: nop
+; CHECK: pushl (%esp)
+; CHECK: calll
+; CHECK-NEXT: popl
+ %c = add i32 %a, %b
+ call void @foo(i32 %c)
+ call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
+ call void @foo(i32 %c)
+ ret void
+}
diff --git a/test/CodeGen/X86/force-align-stack-alloca.ll b/test/CodeGen/X86/force-align-stack-alloca.ll
index a9ba20f45e84..d0cf34170081 100644
--- a/test/CodeGen/X86/force-align-stack-alloca.ll
+++ b/test/CodeGen/X86/force-align-stack-alloca.ll
@@ -3,7 +3,7 @@
; arbitrarily force alignment up to 32-bytes for i386 hoping that this will
; exceed any ABI provisions.
;
-; RUN: llc < %s -mcpu=generic -force-align-stack -stack-alignment=32 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -stackrealign -stack-alignment=32 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
target triple = "i386-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/force-align-stack.ll b/test/CodeGen/X86/force-align-stack.ll
index ffcbf8a908c8..fa94ad4dcd86 100644
--- a/test/CodeGen/X86/force-align-stack.ll
+++ b/test/CodeGen/X86/force-align-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=static -force-align-stack | FileCheck %s
+; RUN: llc < %s -relocation-model=static -stackrealign | FileCheck %s
; Tests to make sure that we always align the stack out to the minimum needed -
; in this case 16-bytes.
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll
index 27af5738ca3e..fa31b9c9e128 100644
--- a/test/CodeGen/X86/fp-fast.ll
+++ b/test/CodeGen/X86/fp-fast.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s
define float @test1(float %a) {
diff --git a/test/CodeGen/X86/fp-logic.ll b/test/CodeGen/X86/fp-logic.ll
new file mode 100644
index 000000000000..64c3f6b79a23
--- /dev/null
+++ b/test/CodeGen/X86/fp-logic.ll
@@ -0,0 +1,264 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s
+
+; PR22428: https://llvm.org/bugs/show_bug.cgi?id=22428
+; f1, f2, f3, and f4 should use an integer logic instruction.
+; f9 and f10 should use an FP (SSE) logic instruction.
+;
+; f5, f6, f7, and f8 are less clear.
+;
+; For f5 and f6, we can save a register move by using an FP logic instruction,
+; but we may need to calculate the relative costs of an SSE op vs. int op vs.
+; scalar <-> SSE register moves.
+;
+; For f7 and f8, the SSE instructions don't take immediate operands, so if we
+; use one of those, we either have to load a constant from memory or move the
+; scalar immediate value from an integer register over to an SSE register.
+; Optimizing for size may affect that decision. Also, note that there are no
+; scalar versions of the FP logic ops, so if we want to fold a load into a
+; logic op, we have to load or splat a 16-byte vector constant.
+
+; 1 FP operand, 1 int operand, int result
+
+define i32 @f1(float %x, i32 %y) {
+; CHECK-LABEL: f1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %and = and i32 %bc1, %y
+ ret i32 %and
+}
+
+; Swap operands of the logic op.
+
+define i32 @f2(float %x, i32 %y) {
+; CHECK-LABEL: f2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %and = and i32 %y, %bc1
+ ret i32 %and
+}
+
+; 1 FP operand, 1 constant operand, int result
+
+define i32 @f3(float %x) {
+; CHECK-LABEL: f3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %and = and i32 %bc1, 1
+ ret i32 %and
+}
+
+; Swap operands of the logic op.
+
+define i32 @f4(float %x) {
+; CHECK-LABEL: f4:
+; CHECK: # BB#0:
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: andl $2, %eax
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %and = and i32 2, %bc1
+ ret i32 %and
+}
+
+; 1 FP operand, 1 integer operand, FP result
+
+define float @f5(float %x, i32 %y) {
+; CHECK-LABEL: f5:
+; CHECK: # BB#0:
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %and = and i32 %bc1, %y
+ %bc2 = bitcast i32 %and to float
+ ret float %bc2
+}
+
+; Swap operands of the logic op.
+
+define float @f6(float %x, i32 %y) {
+; CHECK-LABEL: f6:
+; CHECK: # BB#0:
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: andl %edi, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %and = and i32 %y, %bc1
+ %bc2 = bitcast i32 %and to float
+ ret float %bc2
+}
+
+; 1 FP operand, 1 constant operand, FP result
+
+define float @f7(float %x) {
+; CHECK-LABEL: f7:
+; CHECK: # BB#0:
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: andps %xmm1, %xmm0
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %and = and i32 %bc1, 3
+ %bc2 = bitcast i32 %and to float
+ ret float %bc2
+}
+
+; Swap operands of the logic op.
+
+define float @f8(float %x) {
+; CHECK-LABEL: f8:
+; CHECK: # BB#0:
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: andps %xmm1, %xmm0
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %and = and i32 4, %bc1
+ %bc2 = bitcast i32 %and to float
+ ret float %bc2
+}
+
+; 2 FP operands, int result
+
+define i32 @f9(float %x, float %y) {
+; CHECK-LABEL: f9:
+; CHECK: # BB#0:
+; CHECK-NEXT: andps %xmm1, %xmm0
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %bc2 = bitcast float %y to i32
+ %and = and i32 %bc1, %bc2
+ ret i32 %and
+}
+
+; 2 FP operands, FP result
+
+define float @f10(float %x, float %y) {
+; CHECK-LABEL: f10:
+; CHECK: # BB#0:
+; CHECK-NEXT: andps %xmm1, %xmm0
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %bc2 = bitcast float %y to i32
+ %and = and i32 %bc1, %bc2
+ %bc3 = bitcast i32 %and to float
+ ret float %bc3
+}
+
+define float @or(float %x, float %y) {
+; CHECK-LABEL: or:
+; CHECK: # BB#0:
+; CHECK-NEXT: orps %xmm1, %xmm0
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %bc2 = bitcast float %y to i32
+ %and = or i32 %bc1, %bc2
+ %bc3 = bitcast i32 %and to float
+ ret float %bc3
+}
+
+define float @xor(float %x, float %y) {
+; CHECK-LABEL: xor:
+; CHECK: # BB#0:
+; CHECK-NEXT: xorps %xmm1, %xmm0
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %bc2 = bitcast float %y to i32
+ %and = xor i32 %bc1, %bc2
+ %bc3 = bitcast i32 %and to float
+ ret float %bc3
+}
+
+define float @f7_or(float %x) {
+; CHECK-LABEL: f7_or:
+; CHECK: # BB#0:
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: orps %xmm1, %xmm0
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %and = or i32 %bc1, 3
+ %bc2 = bitcast i32 %and to float
+ ret float %bc2
+}
+
+define float @f7_xor(float %x) {
+; CHECK-LABEL: f7_xor:
+; CHECK: # BB#0:
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: xorps %xmm1, %xmm0
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %and = xor i32 %bc1, 3
+ %bc2 = bitcast i32 %and to float
+ ret float %bc2
+}
+
+; Make sure that doubles work too.
+
+define double @doubles(double %x, double %y) {
+; CHECK-LABEL: doubles:
+; CHECK: # BB#0:
+; CHECK-NEXT: andpd %xmm1, %xmm0
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast double %x to i64
+ %bc2 = bitcast double %y to i64
+ %and = and i64 %bc1, %bc2
+ %bc3 = bitcast i64 %and to double
+ ret double %bc3
+}
+
+define double @f7_double(double %x) {
+; CHECK-LABEL: f7_double:
+; CHECK: # BB#0:
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT: andpd %xmm1, %xmm0
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast double %x to i64
+ %and = and i64 %bc1, 3
+ %bc2 = bitcast i64 %and to double
+ ret double %bc2
+}
+
+; Grabbing the sign bit is a special case that could be handled
+; by movmskps/movmskpd, but if we're not shifting it over, then
+; a simple FP logic op is cheaper.
+
+define float @movmsk(float %x) {
+; CHECK-LABEL: movmsk:
+; CHECK: # BB#0:
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: andps %xmm1, %xmm0
+; CHECK-NEXT: retq
+
+ %bc1 = bitcast float %x to i32
+ %and = and i32 %bc1, 2147483648
+ %bc2 = bitcast i32 %and to float
+ ret float %bc2
+}
+
diff --git a/test/CodeGen/X86/fp128-calling-conv.ll b/test/CodeGen/X86/fp128-calling-conv.ll
new file mode 100644
index 000000000000..e1dab30847c8
--- /dev/null
+++ b/test/CodeGen/X86/fp128-calling-conv.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+; __float128 myFP128 = 1.0L; // x86_64-linux-android
+@myFP128 = global fp128 0xL00000000000000003FFF000000000000, align 16
+
+; The first few parameters are passed in registers and the other are on stack.
+
+define fp128 @TestParam_FP128_0(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
+entry:
+ ret fp128 %d0
+; CHECK-LABEL: TestParam_FP128_0:
+; CHECK-NOT: mov
+; CHECK: retq
+}
+
+define fp128 @TestParam_FP128_1(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
+entry:
+ ret fp128 %d1
+; CHECK-LABEL: TestParam_FP128_1:
+; CHECK: movaps %xmm1, %xmm0
+; CHECK-NEXT: retq
+}
+
+define fp128 @TestParam_FP128_7(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
+entry:
+ ret fp128 %d7
+; CHECK-LABEL: TestParam_FP128_7:
+; CHECK: movaps %xmm7, %xmm0
+; CHECK-NEXT: retq
+}
+
+define fp128 @TestParam_FP128_8(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
+entry:
+ ret fp128 %d8
+; CHECK-LABEL: TestParam_FP128_8:
+; CHECK: movaps 8(%rsp), %xmm0
+; CHECK-NEXT: retq
+}
+
+define fp128 @TestParam_FP128_9(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
+entry:
+ ret fp128 %d9
+; CHECK-LABEL: TestParam_FP128_9:
+; CHECK: movaps 24(%rsp), %xmm0
+; CHECK-NEXT: retq
+}
diff --git a/test/CodeGen/X86/fp128-cast.ll b/test/CodeGen/X86/fp128-cast.ll
new file mode 100644
index 000000000000..73878e31d0ef
--- /dev/null
+++ b/test/CodeGen/X86/fp128-cast.ll
@@ -0,0 +1,279 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+; Check soft floating point conversion function calls.
+
+@vi32 = common global i32 0, align 4
+@vi64 = common global i64 0, align 8
+@vu32 = common global i32 0, align 4
+@vu64 = common global i64 0, align 8
+@vf32 = common global float 0.000000e+00, align 4
+@vf64 = common global double 0.000000e+00, align 8
+@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16
+
+define void @TestFPExtF32_F128() {
+entry:
+ %0 = load float, float* @vf32, align 4
+ %conv = fpext float %0 to fp128
+ store fp128 %conv, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: TestFPExtF32_F128:
+; CHECK: movss vf32(%rip), %xmm0
+; CHECK-NEXT: callq __extendsftf2
+; CHECK-NEXT: movaps %xmm0, vf128(%rip)
+; CHECK: retq
+}
+
+define void @TestFPExtF64_F128() {
+entry:
+ %0 = load double, double* @vf64, align 8
+ %conv = fpext double %0 to fp128
+ store fp128 %conv, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: TestFPExtF64_F128:
+; CHECK: movsd vf64(%rip), %xmm0
+; CHECK-NEXT: callq __extenddftf2
+; CHECK-NEXT: movapd %xmm0, vf128(%rip)
+; CHECK: ret
+}
+
+define void @TestFPToSIF128_I32() {
+entry:
+ %0 = load fp128, fp128* @vf128, align 16
+ %conv = fptosi fp128 %0 to i32
+ store i32 %conv, i32* @vi32, align 4
+ ret void
+; CHECK-LABEL: TestFPToSIF128_I32:
+; CHECK: movaps vf128(%rip), %xmm0
+; CHECK-NEXT: callq __fixtfsi
+; CHECK-NEXT: movl %eax, vi32(%rip)
+; CHECK: retq
+}
+
+define void @TestFPToUIF128_U32() {
+entry:
+ %0 = load fp128, fp128* @vf128, align 16
+ %conv = fptoui fp128 %0 to i32
+ store i32 %conv, i32* @vu32, align 4
+ ret void
+; CHECK-LABEL: TestFPToUIF128_U32:
+; CHECK: movaps vf128(%rip), %xmm0
+; CHECK-NEXT: callq __fixunstfsi
+; CHECK-NEXT: movl %eax, vu32(%rip)
+; CHECK: retq
+}
+
+define void @TestFPToSIF128_I64() {
+entry:
+ %0 = load fp128, fp128* @vf128, align 16
+ %conv = fptosi fp128 %0 to i32
+ %conv1 = sext i32 %conv to i64
+ store i64 %conv1, i64* @vi64, align 8
+ ret void
+; CHECK-LABEL: TestFPToSIF128_I64:
+; CHECK: movaps vf128(%rip), %xmm0
+; CHECK-NEXT: callq __fixtfsi
+; CHECK-NEXT: cltq
+; CHECK-NEXT: movq %rax, vi64(%rip)
+; CHECK: retq
+}
+
+define void @TestFPToUIF128_U64() {
+entry:
+ %0 = load fp128, fp128* @vf128, align 16
+ %conv = fptoui fp128 %0 to i32
+ %conv1 = zext i32 %conv to i64
+ store i64 %conv1, i64* @vu64, align 8
+ ret void
+; CHECK-LABEL: TestFPToUIF128_U64:
+; CHECK: movaps vf128(%rip), %xmm0
+; CHECK-NEXT: callq __fixunstfsi
+; CHECK-NEXT: movl %eax, %eax
+; CHECK-NEXT: movq %rax, vu64(%rip)
+; CHECK: retq
+}
+
+define void @TestFPTruncF128_F32() {
+entry:
+ %0 = load fp128, fp128* @vf128, align 16
+ %conv = fptrunc fp128 %0 to float
+ store float %conv, float* @vf32, align 4
+ ret void
+; CHECK-LABEL: TestFPTruncF128_F32:
+; CHECK: movaps vf128(%rip), %xmm0
+; CHECK-NEXT: callq __trunctfsf2
+; CHECK-NEXT: movss %xmm0, vf32(%rip)
+; CHECK: retq
+}
+
+define void @TestFPTruncF128_F64() {
+entry:
+ %0 = load fp128, fp128* @vf128, align 16
+ %conv = fptrunc fp128 %0 to double
+ store double %conv, double* @vf64, align 8
+ ret void
+; CHECK-LABEL: TestFPTruncF128_F64:
+; CHECK: movapd vf128(%rip), %xmm0
+; CHECK-NEXT: callq __trunctfdf2
+; CHECK-NEXT: movsd %xmm0, vf64(%rip)
+; CHECK: retq
+}
+
+define void @TestSIToFPI32_F128() {
+entry:
+ %0 = load i32, i32* @vi32, align 4
+ %conv = sitofp i32 %0 to fp128
+ store fp128 %conv, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: TestSIToFPI32_F128:
+; CHECK: movl vi32(%rip), %edi
+; CHECK-NEXT: callq __floatsitf
+; CHECK-NEXT: movaps %xmm0, vf128(%rip)
+; CHECK: retq
+}
+
+define void @TestUIToFPU32_F128() #2 {
+entry:
+ %0 = load i32, i32* @vu32, align 4
+ %conv = uitofp i32 %0 to fp128
+ store fp128 %conv, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: TestUIToFPU32_F128:
+; CHECK: movl vu32(%rip), %edi
+; CHECK-NEXT: callq __floatunsitf
+; CHECK-NEXT: movaps %xmm0, vf128(%rip)
+; CHECK: retq
+}
+
+define void @TestSIToFPI64_F128(){
+entry:
+ %0 = load i64, i64* @vi64, align 8
+ %conv = sitofp i64 %0 to fp128
+ store fp128 %conv, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: TestSIToFPI64_F128:
+; CHECK: movq vi64(%rip), %rdi
+; CHECK-NEXT: callq __floatditf
+; CHECK-NEXT: movaps %xmm0, vf128(%rip)
+; CHECK: retq
+}
+
+define void @TestUIToFPU64_F128() #2 {
+entry:
+ %0 = load i64, i64* @vu64, align 8
+ %conv = uitofp i64 %0 to fp128
+ store fp128 %conv, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: TestUIToFPU64_F128:
+; CHECK: movq vu64(%rip), %rdi
+; CHECK-NEXT: callq __floatunditf
+; CHECK-NEXT: movaps %xmm0, vf128(%rip)
+; CHECK: retq
+}
+
+define i32 @TestConst128(fp128 %v) {
+entry:
+ %cmp = fcmp ogt fp128 %v, 0xL00000000000000003FFF000000000000
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK-LABEL: TestConst128:
+; CHECK: movaps {{.*}}, %xmm1
+; CHECK-NEXT: callq __gttf2
+; CHECK-NEXT: test
+; CHECK: retq
+}
+
+; C code:
+; struct TestBits_ieee_ext {
+; unsigned v1;
+; unsigned v2;
+; };
+; union TestBits_LDU {
+; FP128 ld;
+; struct TestBits_ieee_ext bits;
+; };
+; int TestBits128(FP128 ld) {
+; union TestBits_LDU u;
+; u.ld = ld * ld;
+; return ((u.bits.v1 | u.bits.v2) == 0);
+; }
+define i32 @TestBits128(fp128 %ld) {
+entry:
+ %mul = fmul fp128 %ld, %ld
+ %0 = bitcast fp128 %mul to i128
+ %shift = lshr i128 %0, 32
+ %or5 = or i128 %shift, %0
+ %or = trunc i128 %or5 to i32
+ %cmp = icmp eq i32 %or, 0
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK-LABEL: TestBits128:
+; CHECK: movaps %xmm0, %xmm1
+; CHECK-NEXT: callq __multf3
+; CHECK-NEXT: movaps %xmm0, (%rsp)
+; CHECK-NEXT: movq (%rsp),
+; CHECK-NEXT: movq %
+; CHECK-NEXT: shrq $32,
+; CHECK: orl
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK: retq
+;
+; If TestBits128 fails due to any llvm or clang change,
+; please make sure the original simplified C code will
+; be compiled into correct IL and assembly code, not
+; just this TestBits128 test case. Better yet, try to
+; test the whole libm and its test cases.
+}
+
+; C code: (compiled with -target x86_64-linux-android)
+; typedef long double __float128;
+; __float128 TestPair128(unsigned long a, unsigned long b) {
+; unsigned __int128 n;
+; unsigned __int128 v1 = ((unsigned __int128)a << 64);
+; unsigned __int128 v2 = (unsigned __int128)b;
+; n = (v1 | v2) + 3;
+; return *(__float128*)&n;
+; }
+define fp128 @TestPair128(i64 %a, i64 %b) {
+entry:
+ %conv = zext i64 %a to i128
+ %shl = shl nuw i128 %conv, 64
+ %conv1 = zext i64 %b to i128
+ %or = or i128 %shl, %conv1
+ %add = add i128 %or, 3
+ %0 = bitcast i128 %add to fp128
+ ret fp128 %0
+; CHECK-LABEL: TestPair128:
+; CHECK: addq $3, %rsi
+; CHECK: movq %rsi, -24(%rsp)
+; CHECK: movq %rdi, -16(%rsp)
+; CHECK: movaps -24(%rsp), %xmm0
+; CHECK-NEXT: retq
+}
+
+define fp128 @TestTruncCopysign(fp128 %x, i32 %n) {
+entry:
+ %cmp = icmp sgt i32 %n, 50000
+ br i1 %cmp, label %if.then, label %cleanup
+
+if.then: ; preds = %entry
+ %conv = fptrunc fp128 %x to double
+ %call = tail call double @copysign(double 0x7FF0000000000000, double %conv) #2
+ %conv1 = fpext double %call to fp128
+ br label %cleanup
+
+cleanup: ; preds = %entry, %if.then
+ %retval.0 = phi fp128 [ %conv1, %if.then ], [ %x, %entry ]
+ ret fp128 %retval.0
+; CHECK-LABEL: TestTruncCopysign:
+; CHECK: callq __trunctfdf2
+; CHECK-NEXT: andpd {{.*}}, %xmm0
+; CHECK-NEXT: orpd {{.*}}, %xmm0
+; CHECK-NEXT: callq __extenddftf2
+; CHECK: retq
+}
+
+declare double @copysign(double, double) #1
+
+attributes #2 = { nounwind readnone }
diff --git a/test/CodeGen/X86/fp128-compare.ll b/test/CodeGen/X86/fp128-compare.ll
new file mode 100644
index 000000000000..b5d4fbe1b74e
--- /dev/null
+++ b/test/CodeGen/X86/fp128-compare.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+define i32 @TestComp128GT(fp128 %d1, fp128 %d2) {
+entry:
+ %cmp = fcmp ogt fp128 %d1, %d2
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK-LABEL: TestComp128GT:
+; CHECK: callq __gttf2
+; CHECK: setg %al
+; CHECK: movzbl %al, %eax
+; CHECK: retq
+}
+
+define i32 @TestComp128GE(fp128 %d1, fp128 %d2) {
+entry:
+ %cmp = fcmp oge fp128 %d1, %d2
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK-LABEL: TestComp128GE:
+; CHECK: callq __getf2
+; CHECK: testl %eax, %eax
+; CHECK: setns %al
+; CHECK: movzbl %al, %eax
+; CHECK: retq
+}
+
+define i32 @TestComp128LT(fp128 %d1, fp128 %d2) {
+entry:
+ %cmp = fcmp olt fp128 %d1, %d2
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK-LABEL: TestComp128LT:
+; CHECK: callq __lttf2
+; CHECK-NEXT: shrl $31, %eax
+; CHECK: retq
+;
+; The 'shrl' is a special optimization in llvm to combine
+; the effect of 'fcmp olt' and 'zext'. The main purpose is
+; to test soften call to __lttf2.
+}
+
+define i32 @TestComp128LE(fp128 %d1, fp128 %d2) {
+entry:
+ %cmp = fcmp ole fp128 %d1, %d2
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK-LABEL: TestComp128LE:
+; CHECK: callq __letf2
+; CHECK-NEXT: testl %eax, %eax
+; CHECK: setle %al
+; CHECK: movzbl %al, %eax
+; CHECK: retq
+}
+
+define i32 @TestComp128EQ(fp128 %d1, fp128 %d2) {
+entry:
+ %cmp = fcmp oeq fp128 %d1, %d2
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK-LABEL: TestComp128EQ:
+; CHECK: callq __eqtf2
+; CHECK-NEXT: testl %eax, %eax
+; CHECK: sete %al
+; CHECK: movzbl %al, %eax
+; CHECK: retq
+}
+
+define i32 @TestComp128NE(fp128 %d1, fp128 %d2) {
+entry:
+ %cmp = fcmp une fp128 %d1, %d2
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK-LABEL: TestComp128NE:
+; CHECK: callq __netf2
+; CHECK-NEXT: testl %eax, %eax
+; CHECK: setne %al
+; CHECK: movzbl %al, %eax
+; CHECK: retq
+}
+
+define fp128 @TestMax(fp128 %x, fp128 %y) {
+entry:
+ %cmp = fcmp ogt fp128 %x, %y
+ %cond = select i1 %cmp, fp128 %x, fp128 %y
+ ret fp128 %cond
+; CHECK-LABEL: TestMax:
+; CHECK: movaps %xmm1
+; CHECK: movaps %xmm0
+; CHECK: callq __gttf2
+; CHECK: movaps {{.*}}, %xmm0
+; CHECK: testl %eax, %eax
+; CHECK: movaps {{.*}}, %xmm0
+; CHECK: retq
+}
diff --git a/test/CodeGen/X86/fp128-i128.ll b/test/CodeGen/X86/fp128-i128.ll
new file mode 100644
index 000000000000..77160674ab20
--- /dev/null
+++ b/test/CodeGen/X86/fp128-i128.ll
@@ -0,0 +1,320 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+; These tests were generated from simplified libm C code.
+; When compiled for the x86_64-linux-android target,
+; long double is mapped to f128 type that should be passed
+; in SSE registers. When the f128 type calling convention
+; problem was fixed, old llvm code failed to handle f128 values
+; in several f128/i128 type operations. These unit tests hopefully
+; will catch regression in any future change in this area.
+; To modified or enhance these test cases, please consult libm
+; code pattern and compile with -target x86_64-linux-android
+; to generate IL. The __float128 keyword if not accepted by
+; clang, just define it to "long double".
+;
+
+; typedef long double __float128;
+; union IEEEl2bits {
+; __float128 e;
+; struct {
+; unsigned long manl :64;
+; unsigned long manh :48;
+; unsigned int exp :15;
+; unsigned int sign :1;
+; } bits;
+; struct {
+; unsigned long manl :64;
+; unsigned long manh :48;
+; unsigned int expsign :16;
+; } xbits;
+; };
+
+; C code:
+; void foo(__float128 x);
+; void TestUnionLD1(__float128 s, unsigned long n) {
+; union IEEEl2bits u;
+; __float128 w;
+; u.e = s;
+; u.bits.manh = n;
+; w = u.e;
+; foo(w);
+; }
+define void @TestUnionLD1(fp128 %s, i64 %n) #0 {
+entry:
+ %0 = bitcast fp128 %s to i128
+ %1 = zext i64 %n to i128
+ %bf.value = shl nuw i128 %1, 64
+ %bf.shl = and i128 %bf.value, 5192296858534809181786422619668480
+ %bf.clear = and i128 %0, -5192296858534809181786422619668481
+ %bf.set = or i128 %bf.shl, %bf.clear
+ %2 = bitcast i128 %bf.set to fp128
+ tail call void @foo(fp128 %2) #2
+ ret void
+; CHECK-LABEL: TestUnionLD1:
+; CHECK: movaps %xmm0, -24(%rsp)
+; CHECK-NEXT: movq -24(%rsp), %rax
+; CHECK-NEXT: movabsq $281474976710655, %rcx
+; CHECK-NEXT: andq %rdi, %rcx
+; CHECK-NEXT: movabsq $-281474976710656, %rdx
+; CHECK-NEXT: andq -16(%rsp), %rdx
+; CHECK-NEXT: movq %rax, -40(%rsp)
+; CHECK-NEXT: orq %rcx, %rdx
+; CHECK-NEXT: movq %rdx, -32(%rsp)
+; CHECK-NEXT: movaps -40(%rsp), %xmm0
+; CHECK-NEXT: jmp foo
+}
+
+; C code:
+; __float128 TestUnionLD2(__float128 s) {
+; union IEEEl2bits u;
+; __float128 w;
+; u.e = s;
+; u.bits.manl = 0;
+; w = u.e;
+; return w;
+; }
+define fp128 @TestUnionLD2(fp128 %s) #0 {
+entry:
+ %0 = bitcast fp128 %s to i128
+ %bf.clear = and i128 %0, -18446744073709551616
+ %1 = bitcast i128 %bf.clear to fp128
+ ret fp128 %1
+; CHECK-LABEL: TestUnionLD2:
+; CHECK: movaps %xmm0, -24(%rsp)
+; CHECK-NEXT: movq -16(%rsp), %rax
+; CHECK-NEXT: movq %rax, -32(%rsp)
+; CHECK-NEXT: movq $0, -40(%rsp)
+; CHECK-NEXT: movaps -40(%rsp), %xmm0
+; CHECK-NEXT: retq
+}
+
+; C code:
+; __float128 TestI128_1(__float128 x)
+; {
+; union IEEEl2bits z;
+; z.e = x;
+; z.bits.sign = 0;
+; return (z.e < 0.1L) ? 1.0L : 2.0L;
+; }
+define fp128 @TestI128_1(fp128 %x) #0 {
+entry:
+ %0 = bitcast fp128 %x to i128
+ %bf.clear = and i128 %0, 170141183460469231731687303715884105727
+ %1 = bitcast i128 %bf.clear to fp128
+ %cmp = fcmp olt fp128 %1, 0xL999999999999999A3FFB999999999999
+ %cond = select i1 %cmp, fp128 0xL00000000000000003FFF000000000000, fp128 0xL00000000000000004000000000000000
+ ret fp128 %cond
+; CHECK-LABEL: TestI128_1:
+; CHECK: movaps %xmm0,
+; CHECK: movabsq $9223372036854775807,
+; CHECK: callq __lttf2
+; CHECK: testl %eax, %eax
+; CHECK: movaps {{.*}}, %xmm0
+; CHECK: retq
+}
+
+; C code:
+; __float128 TestI128_2(__float128 x, __float128 y)
+; {
+; unsigned short hx;
+; union IEEEl2bits ge_u;
+; ge_u.e = x;
+; hx = ge_u.xbits.expsign;
+; return (hx & 0x8000) == 0 ? x : y;
+; }
+define fp128 @TestI128_2(fp128 %x, fp128 %y) #0 {
+entry:
+ %0 = bitcast fp128 %x to i128
+ %cmp = icmp sgt i128 %0, -1
+ %cond = select i1 %cmp, fp128 %x, fp128 %y
+ ret fp128 %cond
+; CHECK-LABEL: TestI128_2:
+; CHECK: movaps %xmm0, -24(%rsp)
+; CHECK-NEXT: cmpq $0, -16(%rsp)
+; CHECK-NEXT: jns
+; CHECK: movaps %xmm1, %xmm0
+; CHECK: retq
+}
+
+; C code:
+; __float128 TestI128_3(__float128 x, int *ex)
+; {
+; union IEEEl2bits u;
+; u.e = x;
+; if (u.bits.exp == 0) {
+; u.e *= 0x1.0p514;
+; u.bits.exp = 0x3ffe;
+; }
+; return (u.e);
+; }
+define fp128 @TestI128_3(fp128 %x, i32* nocapture readnone %ex) #0 {
+entry:
+ %0 = bitcast fp128 %x to i128
+ %bf.cast = and i128 %0, 170135991163610696904058773219554885632
+ %cmp = icmp eq i128 %bf.cast, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %mul = fmul fp128 %x, 0xL00000000000000004201000000000000
+ %1 = bitcast fp128 %mul to i128
+ %bf.clear4 = and i128 %1, -170135991163610696904058773219554885633
+ %bf.set = or i128 %bf.clear4, 85060207136517546210586590865283612672
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %u.sroa.0.0 = phi i128 [ %bf.set, %if.then ], [ %0, %entry ]
+ %2 = bitcast i128 %u.sroa.0.0 to fp128
+ ret fp128 %2
+; CHECK-LABEL: TestI128_3:
+; CHECK: movaps %xmm0,
+; CHECK: movabsq $9223090561878065152,
+; CHECK: testq
+; CHECK: callq __multf3
+; CHECK-NEXT: movaps %xmm0
+; CHECK: movabsq $-9223090561878065153,
+; CHECK: movabsq $4611123068473966592,
+; CHECK: retq
+}
+
+; C code:
+; __float128 TestI128_4(__float128 x)
+; {
+; union IEEEl2bits u;
+; __float128 df;
+; u.e = x;
+; u.xbits.manl = 0;
+; df = u.e;
+; return x + df;
+; }
+define fp128 @TestI128_4(fp128 %x) #0 {
+entry:
+ %0 = bitcast fp128 %x to i128
+ %bf.clear = and i128 %0, -18446744073709551616
+ %1 = bitcast i128 %bf.clear to fp128
+ %add = fadd fp128 %1, %x
+ ret fp128 %add
+; CHECK-LABEL: TestI128_4:
+; CHECK: movaps %xmm0, %xmm1
+; CHECK-NEXT: movaps %xmm1, 16(%rsp)
+; CHECK-NEXT: movq 24(%rsp), %rax
+; CHECK-NEXT: movq %rax, 8(%rsp)
+; CHECK-NEXT: movq $0, (%rsp)
+; CHECK-NEXT: movaps (%rsp), %xmm0
+; CHECK-NEXT: callq __addtf3
+; CHECK: retq
+}
+
+@v128 = common global i128 0, align 16
+@v128_2 = common global i128 0, align 16
+
+; C code:
+; unsigned __int128 v128, v128_2;
+; void TestShift128_2() {
+; v128 = ((v128 << 96) | v128_2);
+; }
+define void @TestShift128_2() #2 {
+entry:
+ %0 = load i128, i128* @v128, align 16
+ %shl = shl i128 %0, 96
+ %1 = load i128, i128* @v128_2, align 16
+ %or = or i128 %shl, %1
+ store i128 %or, i128* @v128, align 16
+ ret void
+; CHECK-LABEL: TestShift128_2:
+; CHECK: movq v128(%rip), %rax
+; CHECK-NEXT: shlq $32, %rax
+; CHECK-NEXT: movq v128_2(%rip), %rcx
+; CHECK-NEXT: orq v128_2+8(%rip), %rax
+; CHECK-NEXT: movq %rcx, v128(%rip)
+; CHECK-NEXT: movq %rax, v128+8(%rip)
+; CHECK-NEXT: retq
+}
+
+define fp128 @acosl(fp128 %x) #0 {
+entry:
+ %0 = bitcast fp128 %x to i128
+ %bf.clear = and i128 %0, -18446744073709551616
+ %1 = bitcast i128 %bf.clear to fp128
+ %add = fadd fp128 %1, %x
+ ret fp128 %add
+; CHECK-LABEL: acosl:
+; CHECK: movaps %xmm0, %xmm1
+; CHECK-NEXT: movaps %xmm1, 16(%rsp)
+; CHECK-NEXT: movq 24(%rsp), %rax
+; CHECK-NEXT: movq %rax, 8(%rsp)
+; CHECK-NEXT: movq $0, (%rsp)
+; CHECK-NEXT: movaps (%rsp), %xmm0
+; CHECK-NEXT: callq __addtf3
+; CHECK: retq
+}
+
+; Compare i128 values and check i128 constants.
+define fp128 @TestComp(fp128 %x, fp128 %y) #0 {
+entry:
+ %0 = bitcast fp128 %x to i128
+ %cmp = icmp sgt i128 %0, -1
+ %cond = select i1 %cmp, fp128 %x, fp128 %y
+ ret fp128 %cond
+; CHECK-LABEL: TestComp:
+; CHECK: movaps %xmm0, -24(%rsp)
+; CHECK-NEXT: cmpq $0, -16(%rsp)
+; CHECK-NEXT: jns
+; CHECK: movaps %xmm1, %xmm0
+; CHECK: retq
+}
+
+declare void @foo(fp128) #1
+
+; Test logical operations on fp128 values.
+define fp128 @TestFABS_LD(fp128 %x) #0 {
+entry:
+ %call = tail call fp128 @fabsl(fp128 %x) #2
+ ret fp128 %call
+; CHECK-LABEL: TestFABS_LD
+; CHECK: andps {{.*}}, %xmm0
+; CHECK-NEXT: retq
+}
+
+declare fp128 @fabsl(fp128) #1
+
+declare fp128 @copysignl(fp128, fp128) #1
+
+; Test more complicated logical operations generated from copysignl.
+define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result, { fp128, fp128 }* byval nocapture readonly align 16 %z) #0 {
+entry:
+ %z.realp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 0
+ %z.real = load fp128, fp128* %z.realp, align 16
+ %z.imagp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 1
+ %z.imag4 = load fp128, fp128* %z.imagp, align 16
+ %cmp = fcmp ogt fp128 %z.real, %z.imag4
+ %sub = fsub fp128 %z.imag4, %z.imag4
+ br i1 %cmp, label %if.then, label %cleanup
+
+if.then: ; preds = %entry
+ %call = tail call fp128 @fabsl(fp128 %sub) #2
+ br label %cleanup
+
+cleanup: ; preds = %entry, %if.then
+ %z.real.sink = phi fp128 [ %z.real, %if.then ], [ %sub, %entry ]
+ %call.sink = phi fp128 [ %call, %if.then ], [ %z.real, %entry ]
+ %call5 = tail call fp128 @copysignl(fp128 %z.real.sink, fp128 %z.imag4) #2
+ %0 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 0
+ %1 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 1
+ store fp128 %call.sink, fp128* %0, align 16
+ store fp128 %call5, fp128* %1, align 16
+ ret void
+; CHECK-LABEL: TestCopySign
+; CHECK-NOT: call
+; CHECK: callq __subtf3
+; CHECK-NOT: call
+; CHECK: callq __gttf2
+; CHECK-NOT: call
+; CHECK: andps {{.*}}, %xmm0
+; CHECK: retq
+}
+
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
diff --git a/test/CodeGen/X86/fp128-libcalls.ll b/test/CodeGen/X86/fp128-libcalls.ll
new file mode 100644
index 000000000000..ee5fa447448c
--- /dev/null
+++ b/test/CodeGen/X86/fp128-libcalls.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+; Check all soft floating point library function calls.
+
+@vf64 = common global double 0.000000e+00, align 8
+@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16
+
+define void @Test128Add(fp128 %d1, fp128 %d2) {
+entry:
+ %add = fadd fp128 %d1, %d2
+ store fp128 %add, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: Test128Add:
+; CHECK: callq __addtf3
+; CHECK-NEXT: movaps %xmm0, vf128(%rip)
+; CHECK: retq
+}
+
+define void @Test128_1Add(fp128 %d1){
+entry:
+ %0 = load fp128, fp128* @vf128, align 16
+ %add = fadd fp128 %0, %d1
+ store fp128 %add, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: Test128_1Add:
+; CHECK: movaps %xmm0, %xmm1
+; CHECK-NEXT: movaps vf128(%rip), %xmm0
+; CHECK-NEXT: callq __addtf3
+; CHECK-NEXT: movaps %xmm0, vf128(%rip)
+; CHECK: retq
+}
+
+define void @Test128Sub(fp128 %d1, fp128 %d2){
+entry:
+ %sub = fsub fp128 %d1, %d2
+ store fp128 %sub, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: Test128Sub:
+; CHECK: callq __subtf3
+; CHECK-NEXT: movaps %xmm0, vf128(%rip)
+; CHECK: retq
+}
+
+define void @Test128_1Sub(fp128 %d1){
+entry:
+ %0 = load fp128, fp128* @vf128, align 16
+ %sub = fsub fp128 %0, %d1
+ store fp128 %sub, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: Test128_1Sub:
+; CHECK: movaps %xmm0, %xmm1
+; CHECK-NEXT: movaps vf128(%rip), %xmm0
+; CHECK-NEXT: callq __subtf3
+; CHECK-NEXT: movaps %xmm0, vf128(%rip)
+; CHECK: retq
+}
+
+define void @Test128Mul(fp128 %d1, fp128 %d2){
+entry:
+ %mul = fmul fp128 %d1, %d2
+ store fp128 %mul, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: Test128Mul:
+; CHECK: callq __multf3
+; CHECK-NEXT: movaps %xmm0, vf128(%rip)
+; CHECK: retq
+}
+
+define void @Test128_1Mul(fp128 %d1){
+entry:
+ %0 = load fp128, fp128* @vf128, align 16
+ %mul = fmul fp128 %0, %d1
+ store fp128 %mul, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: Test128_1Mul:
+; CHECK: movaps %xmm0, %xmm1
+; CHECK-NEXT: movaps vf128(%rip), %xmm0
+; CHECK-NEXT: callq __multf3
+; CHECK-NEXT: movaps %xmm0, vf128(%rip)
+; CHECK: retq
+}
+
+define void @Test128Div(fp128 %d1, fp128 %d2){
+entry:
+ %div = fdiv fp128 %d1, %d2
+ store fp128 %div, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: Test128Div:
+; CHECK: callq __divtf3
+; CHECK-NEXT: movaps %xmm0, vf128(%rip)
+; CHECK: retq
+}
+
+define void @Test128_1Div(fp128 %d1){
+entry:
+ %0 = load fp128, fp128* @vf128, align 16
+ %div = fdiv fp128 %0, %d1
+ store fp128 %div, fp128* @vf128, align 16
+ ret void
+; CHECK-LABEL: Test128_1Div:
+; CHECK: movaps %xmm0, %xmm1
+; CHECK-NEXT: movaps vf128(%rip), %xmm0
+; CHECK-NEXT: callq __divtf3
+; CHECK-NEXT: movaps %xmm0, vf128(%rip)
+; CHECK: retq
+}
diff --git a/test/CodeGen/X86/fp128-load.ll b/test/CodeGen/X86/fp128-load.ll
new file mode 100644
index 000000000000..73bacf87275e
--- /dev/null
+++ b/test/CodeGen/X86/fp128-load.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+; __float128 myFP128 = 1.0L; // x86_64-linux-android
+@my_fp128 = global fp128 0xL00000000000000003FFF000000000000, align 16
+
+define fp128 @get_fp128() {
+entry:
+ %0 = load fp128, fp128* @my_fp128, align 16
+ ret fp128 %0
+; CHECK-LABEL: get_fp128:
+; CHECK: movaps my_fp128(%rip), %xmm0
+; CHECK-NEXT: retq
+}
+
+@TestLoadExtend.data = internal unnamed_addr constant [2 x float] [float 0x3FB99999A0000000, float 0x3FC99999A0000000], align 4
+
+define fp128 @TestLoadExtend(fp128 %x, i32 %n) {
+entry:
+ %idxprom = sext i32 %n to i64
+ %arrayidx = getelementptr inbounds [2 x float], [2 x float]* @TestLoadExtend.data, i64 0, i64 %idxprom
+ %0 = load float, float* %arrayidx, align 4
+ %conv = fpext float %0 to fp128
+ ret fp128 %conv
+; CHECK-LABEL: TestLoadExtend:
+; CHECK: movslq %edi, %rax
+; CHECK-NEXT: movss TestLoadExtend.data(,%rax,4), %xmm0
+; CHECK-NEXT: callq __extendsftf2
+; CHECK: retq
+}
+
+; CHECK-LABEL: my_fp128:
+; CHECK-NEXT: .quad 0
+; CHECK-NEXT: .quad 4611404543450677248
+; CHECK-NEXT: .size my_fp128, 16
diff --git a/test/CodeGen/X86/fp128-store.ll b/test/CodeGen/X86/fp128-store.ll
new file mode 100644
index 000000000000..ca3af637cff5
--- /dev/null
+++ b/test/CodeGen/X86/fp128-store.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+; __float128 myFP128 = 1.0L; // x86_64-linux-android
+@myFP128 = global fp128 0xL00000000000000003FFF000000000000, align 16
+
+define void @set_FP128(fp128 %x) {
+entry:
+ store fp128 %x, fp128* @myFP128, align 16
+ ret void
+; CHECK-LABEL: set_FP128:
+; CHECK: movaps %xmm0, myFP128(%rip)
+; CHECK-NEXT: retq
+}
diff --git a/test/CodeGen/X86/fpcmp-soft-fp.ll b/test/CodeGen/X86/fpcmp-soft-fp.ll
new file mode 100644
index 000000000000..58d57017d18a
--- /dev/null
+++ b/test/CodeGen/X86/fpcmp-soft-fp.ll
@@ -0,0 +1,127 @@
+; RUN: llc < %s -march=x86 -mcpu=pentium -mtriple=x86-linux-gnu -float-abi=soft | FileCheck %s
+
+define i1 @test1(double %d) #0 {
+entry:
+ %cmp = fcmp ule double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test1:
+; CHECK: calll __gtdf2
+; CHECK: setle
+; CHECK: retl
+
+define i1 @test2(double %d) #0 {
+entry:
+ %cmp = fcmp ult double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test2:
+; CHECK: calll __gedf2
+; CHECK: sets
+; CHECK: retl
+
+define i1 @test3(double %d) #0 {
+entry:
+ %cmp = fcmp ugt double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test3:
+; CHECK: calll __ledf2
+; CHECK: setg
+; CHECK: retl
+
+define i1 @test4(double %d) #0 {
+entry:
+ %cmp = fcmp uge double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test4:
+; CHECK: calll __ltdf2
+; CHECK: setns
+; CHECK: retl
+
+define i1 @test5(double %d) #0 {
+entry:
+ %cmp = fcmp ole double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test5:
+; CHECK: calll __ledf2
+; CHECK: setle
+; CHECK: retl
+
+define i1 @test6(double %d) #0 {
+entry:
+ %cmp = fcmp olt double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test6:
+; CHECK: calll __ltdf2
+; CHECK: sets
+; CHECK: retl
+
+define i1 @test7(double %d) #0 {
+entry:
+ %cmp = fcmp ogt double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test7:
+; CHECK: calll __gtdf2
+; CHECK: setg
+; CHECK: retl
+
+define i1 @test8(double %d) #0 {
+entry:
+ %cmp = fcmp oge double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test8:
+; CHECK: calll __gedf2
+; CHECK: setns
+; CHECK: retl
+
+define i1 @test9(double %d) #0 {
+entry:
+ %cmp = fcmp oeq double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test9:
+; CHECK: calll __eqdf2
+; CHECK: sete
+; CHECK: retl
+
+define i1 @test10(double %d) #0 {
+entry:
+ %cmp = fcmp ueq double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test10:
+; CHECK: calll __eqdf2
+; CHECK: sete
+; CHECK: calll __unorddf2
+; CHECK: setne
+; CHECK: retl
+
+define i1 @test11(double %d) #0 {
+entry:
+ %cmp = fcmp one double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test11:
+; CHECK: calll __gtdf2
+; CHECK: setg
+; CHECK: calll __ltdf2
+; CHECK: sets
+; CHECK: retl
+
+define i1 @test12(double %d) #0 {
+entry:
+ %cmp = fcmp une double %d, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: test12:
+; CHECK: calll __nedf2
+; CHECK: setne
+; CHECK: retl
+
+attributes #0 = { "use-soft-float"="true" }
diff --git a/test/CodeGen/X86/fpstack-debuginstr-kill.ll b/test/CodeGen/X86/fpstack-debuginstr-kill.ll
index 34398414a76c..2ee67dc190bd 100644
--- a/test/CodeGen/X86/fpstack-debuginstr-kill.ll
+++ b/test/CodeGen/X86/fpstack-debuginstr-kill.ll
@@ -3,7 +3,7 @@
@g1 = global double 0.000000e+00, align 8
@g2 = global i32 0, align 4
-define void @_Z16fpuop_arithmeticjj(i32, i32) {
+define void @_Z16fpuop_arithmeticjj(i32, i32) !dbg !4 {
entry:
switch i32 undef, label %sw.bb.i1921 [
]
@@ -43,27 +43,27 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!24, !25}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (http://llvm.org/git/clang 8444ae7cfeaefae031f8fedf0d1435ca3b14d90b) (http://llvm.org/git/llvm 886f0101a7d176543b831f5efb74c03427244a55)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !21, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (http://llvm.org/git/clang 8444ae7cfeaefae031f8fedf0d1435ca3b14d90b) (http://llvm.org/git/llvm 886f0101a7d176543b831f5efb74c03427244a55)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !21, imports: !2)
!1 = !DIFile(filename: "fpu_ieee.cpp", directory: "x87stackifier")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "fpuop_arithmetic", linkageName: "_Z16fpuop_arithmeticjj", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !6, type: !7, function: void (i32, i32)* @_Z16fpuop_arithmeticjj, variables: !10)
+!4 = distinct !DISubprogram(name: "fpuop_arithmetic", linkageName: "_Z16fpuop_arithmeticjj", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !6, type: !7, variables: !10)
!5 = !DIFile(filename: "f1.cpp", directory: "x87stackifier")
!6 = !DIFile(filename: "f1.cpp", directory: "x87stackifier")
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9, !9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
!10 = !{!11, !12, !13, !18, !20}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 11, arg: 1, scope: !4, file: !6, type: !9)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 11, arg: 2, scope: !4, file: !6, type: !9)
-!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 14, scope: !4, file: !6, type: !14)
+!11 = !DILocalVariable(name: "", line: 11, arg: 1, scope: !4, file: !6, type: !9)
+!12 = !DILocalVariable(name: "", line: 11, arg: 2, scope: !4, file: !6, type: !9)
+!13 = !DILocalVariable(name: "x", line: 14, scope: !4, file: !6, type: !14)
!14 = !DIDerivedType(tag: DW_TAG_typedef, name: "fpu_extended", line: 3, file: !5, baseType: !15)
!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "fpu_register", line: 2, file: !5, baseType: !16)
!16 = !DIDerivedType(tag: DW_TAG_typedef, name: "uae_f64", line: 1, file: !5, baseType: !17)
!17 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 15, scope: !4, file: !6, type: !19)
+!18 = !DILocalVariable(name: "a", line: 15, scope: !4, file: !6, type: !19)
!19 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "value", line: 16, scope: !4, file: !6, type: !14)
+!20 = !DILocalVariable(name: "value", line: 16, scope: !4, file: !6, type: !14)
!21 = !{!22, !23}
!22 = !DIGlobalVariable(name: "g1", line: 5, isLocal: false, isDefinition: true, scope: null, file: !6, type: !14, variable: double* @g1)
!23 = !DIGlobalVariable(name: "g2", line: 6, isLocal: false, isDefinition: true, scope: null, file: !6, type: !19, variable: i32* @g2)
diff --git a/test/CodeGen/X86/frameescape.ll b/test/CodeGen/X86/frameescape.ll
deleted file mode 100644
index 179a936304ba..000000000000
--- a/test/CodeGen/X86/frameescape.ll
+++ /dev/null
@@ -1,128 +0,0 @@
-; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s --check-prefix=X86
-; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=X64
-
-declare void @llvm.localescape(...)
-declare i8* @llvm.frameaddress(i32)
-declare i8* @llvm.localrecover(i8*, i8*, i32)
-declare i32 @printf(i8*, ...)
-
-@str = internal constant [10 x i8] c"asdf: %d\0A\00"
-
-define void @print_framealloc_from_fp(i8* %fp) {
- %a.i8 = call i8* @llvm.localrecover(i8* bitcast (void()* @alloc_func to i8*), i8* %fp, i32 0)
- %a = bitcast i8* %a.i8 to i32*
- %a.val = load i32, i32* %a
- call i32 (i8*, ...) @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %a.val)
- %b.i8 = call i8* @llvm.localrecover(i8* bitcast (void()* @alloc_func to i8*), i8* %fp, i32 1)
- %b = bitcast i8* %b.i8 to i32*
- %b.val = load i32, i32* %b
- call i32 (i8*, ...) @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %b.val)
- store i32 42, i32* %b
- %b2 = getelementptr i32, i32* %b, i32 1
- %b2.val = load i32, i32* %b2
- call i32 (i8*, ...) @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %b2.val)
- ret void
-}
-
-; X64-LABEL: print_framealloc_from_fp:
-; X64: movq %rcx, %[[parent_fp:[a-z]+]]
-; X64: movl .Lalloc_func$frame_escape_0(%[[parent_fp]]), %edx
-; X64: leaq {{.*}}(%rip), %[[str:[a-z]+]]
-; X64: movq %[[str]], %rcx
-; X64: callq printf
-; X64: movl .Lalloc_func$frame_escape_1(%[[parent_fp]]), %edx
-; X64: movq %[[str]], %rcx
-; X64: callq printf
-; X64: movl $42, .Lalloc_func$frame_escape_1(%[[parent_fp]])
-; X64: retq
-
-; X86-LABEL: print_framealloc_from_fp:
-; X86: pushl %esi
-; X86: subl $8, %esp
-; X86: movl 16(%esp), %esi
-; X86: movl Lalloc_func$frame_escape_0(%esi), %eax
-; X86: movl %eax, 4(%esp)
-; X86: movl $_str, (%esp)
-; X86: calll _printf
-; X86: movl Lalloc_func$frame_escape_1(%esi), %eax
-; X86: movl %eax, 4(%esp)
-; X86: movl $_str, (%esp)
-; X86: calll _printf
-; X86: movl $42, Lalloc_func$frame_escape_1(%esi)
-; X86: movl $4, %eax
-; X86: movl Lalloc_func$frame_escape_1(%esi,%eax), %eax
-; X86: movl %eax, 4(%esp)
-; X86: movl $_str, (%esp)
-; X86: calll _printf
-; X86: addl $8, %esp
-; X86: popl %esi
-; X86: retl
-
-define void @alloc_func() {
- %a = alloca i32
- %b = alloca i32, i32 2
- call void (...) @llvm.localescape(i32* %a, i32* %b)
- store i32 42, i32* %a
- store i32 13, i32* %b
- %fp = call i8* @llvm.frameaddress(i32 0)
- call void @print_framealloc_from_fp(i8* %fp)
- ret void
-}
-
-; X64-LABEL: alloc_func:
-; X64: subq $48, %rsp
-; X64: .seh_stackalloc 48
-; X64: leaq 48(%rsp), %rbp
-; X64: .seh_setframe 5, 48
-; X64: .Lalloc_func$frame_escape_0 = 44
-; X64: .Lalloc_func$frame_escape_1 = 36
-; X64: movl $42, -4(%rbp)
-; X64: movl $13, -12(%rbp)
-; X64: leaq -48(%rbp), %rcx
-; X64: callq print_framealloc_from_fp
-; X64: retq
-
-; X86-LABEL: alloc_func:
-; X86: pushl %ebp
-; X86: movl %esp, %ebp
-; X86: subl $16, %esp
-; X86: Lalloc_func$frame_escape_0 = -4
-; X86: Lalloc_func$frame_escape_1 = -12
-; X86: movl $42, -4(%ebp)
-; X86: movl $13, -12(%ebp)
-; X86: movl %ebp, (%esp)
-; X86: calll _print_framealloc_from_fp
-; X86: addl $16, %esp
-; X86: popl %ebp
-; X86: retl
-
-; Helper to make this a complete program so it can be compiled and tested.
-define i32 @main() {
- call void @alloc_func()
- ret i32 0
-}
-
-define void @alloc_func_no_frameaddr() {
- %a = alloca i32
- %b = alloca i32
- call void (...) @llvm.localescape(i32* %a, i32* %b)
- store i32 42, i32* %a
- store i32 13, i32* %b
- call void @print_framealloc_from_fp(i8* null)
- ret void
-}
-
-; X64-LABEL: alloc_func_no_frameaddr:
-; X64: subq $40, %rsp
-; X64: .seh_stackalloc 40
-; X64: .seh_endprologue
-; X64: .Lalloc_func_no_frameaddr$frame_escape_0 = 36
-; X64: .Lalloc_func_no_frameaddr$frame_escape_1 = 32
-; X64: movl $42, 36(%rsp)
-; X64: movl $13, 32(%rsp)
-; X64: xorl %ecx, %ecx
-; X64: callq print_framealloc_from_fp
-; X64: addq $40, %rsp
-; X64: retq
-
-; X86-LABEL: alloc_func_no_frameaddr:
diff --git a/test/CodeGen/X86/frem-msvc32.ll b/test/CodeGen/X86/frem-msvc32.ll
new file mode 100644
index 000000000000..01144eb44de4
--- /dev/null
+++ b/test/CodeGen/X86/frem-msvc32.ll
@@ -0,0 +1,12 @@
+; Make sure that 32-bit FREM is promoted to 64-bit FREM on 32-bit MSVC.
+
+; MSVC does not have a 32-bit fmodf function, so it must be promoted to
+; a 64-bit fmod rtlib call.
+; RUN: llc -mtriple=i686-pc-windows-msvc -O0 < %s | FileCheck %s
+
+; CHECK: @do_frem32
+; CHECK: {{_fmod$}}
+define float @do_frem32(float %a, float %b) {
+ %val = frem float %a, %b
+ ret float %val
+}
diff --git a/test/CodeGen/X86/funclet-layout.ll b/test/CodeGen/X86/funclet-layout.ll
new file mode 100644
index 000000000000..0942645cf5a4
--- /dev/null
+++ b/test/CodeGen/X86/funclet-layout.ll
@@ -0,0 +1,158 @@
+; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = internal global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }
+
+define void @test1(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @g()
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+ %cp = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ br label %catch.loop
+
+catch.loop:
+ br i1 %B, label %catchret, label %catch.loop
+
+catchret:
+ catchret from %cp to label %try.cont
+
+try.cont:
+ ret void
+
+unreachable:
+ unreachable
+}
+
+; CHECK-LABEL: test1:
+
+; The entry funclet contains %entry and %try.cont
+; CHECK: # %entry
+; CHECK: # %try.cont
+; CHECK: retq
+
+; The catch funclet contains %catch and %catchret
+; CHECK: # %catch{{$}}
+; CHECK: # %catchret
+; CHECK: retq
+
+declare void @g()
+
+
+define i32 @test2(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %0 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1 ["funclet"(token %0)]
+ to label %unreachable unwind label %catch.dispatch.1
+
+catch.dispatch.1: ; preds = %catch
+ %cs2 = catchswitch within %0 [label %catch.3] unwind to caller
+
+catch.3: ; preds = %catch.dispatch.1
+ %1 = catchpad within %cs2 [i8* null, i32 64, i8* null]
+ catchret from %1 to label %try.cont
+
+try.cont: ; preds = %catch.3
+ catchret from %0 to label %try.cont.5
+
+try.cont.5: ; preds = %try.cont
+ ret i32 0
+
+unreachable: ; preds = %catch, %entry
+ unreachable
+}
+
+; CHECK-LABEL: test2:
+
+; The parent function contains %entry and %try.cont.5
+; CHECK: .seh_proc
+; CHECK: # %entry
+; CHECK: # %try.cont.5
+; CHECK: retq
+
+; The inner catch funclet contains %catch.3
+; CHECK: .seh_proc
+; CHECK: # %catch.3{{$}}
+; CHECK: retq
+
+; The outer catch funclet contains %catch
+; CHECK: .seh_proc
+; CHECK: # %catch{{$}}
+; CHECK: callq _CxxThrowException
+; CHECK: # %unreachable
+; CHECK: ud2
+
+
+define void @test3(i1 %V) #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ invoke void @g()
+ to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %catch.2] unwind label %catch.dispatch.1
+
+catch.2: ; preds = %catch.dispatch
+ %0 = catchpad within %cs1 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+ tail call void @exit(i32 0) #2 [ "funclet"(token %0) ]
+ unreachable
+
+catch.dispatch.1: ; preds = %catch.dispatch
+ %cs2 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch.1
+ %1 = catchpad within %cs2 [i8* null, i32 64, i8* null]
+ tail call void @exit(i32 0) #2 [ "funclet"(token %1) ]
+ unreachable
+
+try.cont: ; preds = %entry
+ br i1 %V, label %exit_one, label %exit_two
+
+exit_one:
+ tail call void @exit(i32 0)
+ unreachable
+
+exit_two:
+ tail call void @exit(i32 0)
+ unreachable
+}
+
+; CHECK-LABEL: test3:
+
+; The entry funclet contains %entry and %try.cont
+; CHECK: # %entry
+; CHECK: # %try.cont
+; CHECK: callq exit
+; CHECK-NOT: # exit_one
+; CHECK-NOT: # exit_two
+; CHECK: ud2
+
+; The catch(...) funclet contains %catch.2
+; CHECK: # %catch.2{{$}}
+; CHECK: callq exit
+; CHECK: ud2
+
+; The catch(int) funclet contains %catch
+; CHECK: # %catch{{$}}
+; CHECK: callq exit
+; CHECK: ud2
+
+declare void @exit(i32) noreturn nounwind
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/CodeGen/X86/function-alias.ll b/test/CodeGen/X86/function-alias.ll
new file mode 100644
index 000000000000..d68d75d5578a
--- /dev/null
+++ b/test/CodeGen/X86/function-alias.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; "data" constant
+@0 = private constant <{ i8, i8 }> <{i8 15, i8 11}>, section ".text"
+
+; function-typed alias
+@ud2 = alias void (), bitcast (<{ i8, i8 }>* @0 to void ()*)
+
+; Check that "ud2" is emitted as a function symbol.
+; CHECK: .type{{.*}}ud2,@function
diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll
index 82064c2a3907..92ea539bcf77 100644
--- a/test/CodeGen/X86/gcc_except_table.ll
+++ b/test/CodeGen/X86/gcc_except_table.ll
@@ -18,9 +18,9 @@ define i32 @main() uwtable optsize ssp personality i8* bitcast (i32 (...)* @__gx
; MINGW64: .seh_setframe 5, 32
; MINGW64: callq _Unwind_Resume
; MINGW64: .seh_handlerdata
+; MINGW64: .seh_endproc
; MINGW64: GCC_except_table0:
; MINGW64: Lexception0:
-; MINGW64: .seh_endproc
; MINGW32: .cfi_startproc
; MINGW32: .cfi_personality 0, ___gxx_personality_v0
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index 82547a606742..92440f2b3316 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -117,7 +117,7 @@ bb7:
; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2"
-; DARWIN: .section __TEXT,__const_coal,coalesced
+; DARWIN: .section __TEXT,__const{{$}}
; DARWIN: _G2:
; DARWIN: .long 42
@@ -176,7 +176,6 @@ bb7:
; LINUX: .weak "foo bar"
; LINUX: "foo bar":
-; DARWIN: .section __DATA,__datacoal_nt,coalesced
; DARWIN: .globl "_foo bar"
; DARWIN: .weak_definition "_foo bar"
; DARWIN: "_foo bar":
@@ -190,7 +189,7 @@ bb7:
; LINUX: .byte 1
; LINUX: .size G6, 1
-; DARWIN: .section __TEXT,__const_coal,coalesced
+; DARWIN: .section __TEXT,__const{{$}}
; DARWIN: .globl _G6
; DARWIN: .weak_definition _G6
; DARWIN:_G6:
@@ -239,7 +238,7 @@ bb7:
@G10 = weak global [100 x i32] zeroinitializer, align 32 ; <[100 x i32]*> [#uses=0]
-; DARWIN: .section __DATA,__datacoal_nt,coalesced
+; DARWIN: .section __DATA,__data{{$}}
; DARWIN: .globl _G10
; DARWIN: .weak_definition _G10
; DARWIN: .align 5
diff --git a/test/CodeGen/X86/h-register-store.ll b/test/CodeGen/X86/h-register-store.ll
index 0adb2b148c39..0e6a0236d2c3 100644
--- a/test/CodeGen/X86/h-register-store.ll
+++ b/test/CodeGen/X86/h-register-store.ll
@@ -7,6 +7,15 @@
; X64-NEXT: movb %ah, (%rsi)
; X64-NOT: mov
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s -check-prefix=X32
+; X32: mov
+; X32-NEXT: movb %ah, (%esi)
+; X32: mov
+; X32-NEXT: movb %ah, (%esi)
+; X32: mov
+; X32-NEXT: movb %ah, (%esi)
+; X32-NOT: mov
+
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
; W64-NOT: mov
; W64: movb %ch, (%rdx)
@@ -16,14 +25,14 @@
; W64: movb %ch, (%rdx)
; W64-NOT: mov
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
-; X32-NOT: mov
-; X32: movb %ah, (%e
-; X32-NOT: mov
-; X32: movb %ah, (%e
-; X32-NOT: mov
-; X32: movb %ah, (%e
-; X32-NOT: mov
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X86
+; X86-NOT: mov
+; X86: movb %ah, (%e
+; X86-NOT: mov
+; X86: movb %ah, (%e
+; X86-NOT: mov
+; X86: movb %ah, (%e
+; X86-NOT: mov
; Use h-register extract and store.
diff --git a/test/CodeGen/X86/h-registers-0.ll b/test/CodeGen/X86/h-registers-0.ll
index 6a5ccaa1e76f..9b72916ea743 100644
--- a/test/CodeGen/X86/h-registers-0.ll
+++ b/test/CodeGen/X86/h-registers-0.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mattr=-bmi -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -mattr=-bmi -mtriple=x86_64-linux-gnux32 | FileCheck %s -check-prefix=X86-64
; RUN: llc < %s -mattr=-bmi -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
; RUN: llc < %s -mattr=-bmi -march=x86 | FileCheck %s -check-prefix=X86-32
diff --git a/test/CodeGen/X86/h-registers-1.ll b/test/CodeGen/X86/h-registers-1.ll
index 7254325a9265..469d5517b40b 100644
--- a/test/CodeGen/X86/h-registers-1.ll
+++ b/test/CodeGen/X86/h-registers-1.ll
@@ -1,4 +1,5 @@
; RUN: llc -mattr=-bmi < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc -mattr=-bmi < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s
; LLVM creates virtual registers for values live across blocks
; based on the type of the value. Make sure that the extracts
diff --git a/test/CodeGen/X86/h-registers-3.ll b/test/CodeGen/X86/h-registers-3.ll
index 29d0c280c4fb..58b02b7df21f 100644
--- a/test/CodeGen/X86/h-registers-3.ll
+++ b/test/CodeGen/X86/h-registers-3.ll
@@ -1,5 +1,6 @@
; RUN: llc < %s -march=x86 | grep mov | count 1
; RUN: llc < %s -march=x86-64 | grep mov | count 1
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | grep mov | count 1
define zeroext i8 @foo() nounwind ssp {
entry:
diff --git a/test/CodeGen/X86/half.ll b/test/CodeGen/X86/half.ll
index 8a726370f19a..3b2518e28f58 100644
--- a/test/CodeGen/X86/half.ll
+++ b/test/CodeGen/X86/half.ll
@@ -77,7 +77,7 @@ define i64 @test_fptosi_i64(half* %p) #0 {
; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi
; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax
-; CHECK-LIBCALL-NEXT: popq %rdx
+; CHECK-LIBCALL-NEXT: popq %rcx
; CHECK-LIBCALL-NEXT: retq
; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]]
@@ -127,7 +127,7 @@ define i64 @test_fptoui_i64(half* %p) #0 {
; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]]
; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0
; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]]
-; CHECK-LIBCALL-NEXT: popq %rdx
+; CHECK-LIBCALL-NEXT: popq %rcx
; CHECK-LIBCALL-NEXT: retq
; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]]
diff --git a/test/CodeGen/X86/hhvm-cc.ll b/test/CodeGen/X86/hhvm-cc.ll
new file mode 100644
index 000000000000..3b729ed72f1c
--- /dev/null
+++ b/test/CodeGen/X86/hhvm-cc.ll
@@ -0,0 +1,241 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare hhvmcc i64 @bar(i64, i64, i64) nounwind
+
+; Simply check we can modify %rbx and %rbp before returning via call to bar.
+define hhvmcc i64 @foo(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL: foo:
+; CHECK-DAG: movl $1, %ebx
+; CHECK-DAG: movl $3, %ebp
+; CHECK: jmp bar
+ %ret = musttail call hhvmcc i64 @bar(i64 1, i64 %b, i64 3)
+ ret i64 %ret
+}
+
+; Check that we can read and modify %rbx returned from PHP function.
+define hhvmcc i64 @mod_return(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL: mod_return:
+; CHECK-NEXT: {{^#.*}}
+; CHECK-NEXT: callq bar
+; CHECK-NEXT: incq %rbx
+ %tmp = call hhvmcc i64 @bar(i64 %a, i64 %b, i64 %c)
+ %retval = add i64 %tmp, 1
+ ret i64 %retval
+}
+
+%rettype = type { i64, i64, i64, i64, i64, i64, i64,
+ i64, i64, i64, i64, i64, i64, i64
+}
+
+; Check that we can return up to 14 64-bit args in registers.
+define hhvmcc %rettype @return_all(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL: return_all:
+; CHECK-DAG: movl $1, %ebx
+; CHECK-DAG: movl $2, %ebp
+; CHECK-DAG: movl $3, %edi
+; CHECK-DAG: movl $4, %esi
+; CHECK-DAG: movl $5, %edx
+; CHECK-DAG: movl $6, %ecx
+; CHECK-DAG: movl $7, %r8
+; CHECK-DAG: movl $8, %r9
+; CHECK-DAG: movl $9, %eax
+; CHECK-DAG: movl $10, %r10
+; CHECK-DAG: movl $11, %r11
+; CHECK-DAG: movl $12, %r13
+; CHECK-DAG: movl $13, %r14
+; CHECK-DAG: movl $14, %r15
+; CHECK: retq
+ %r1 = insertvalue %rettype zeroinitializer, i64 1, 0
+ %r2 = insertvalue %rettype %r1, i64 2, 1
+ %r3 = insertvalue %rettype %r2, i64 3, 2
+ %r4 = insertvalue %rettype %r3, i64 4, 3
+ %r5 = insertvalue %rettype %r4, i64 5, 4
+ %r6 = insertvalue %rettype %r5, i64 6, 5
+ %r7 = insertvalue %rettype %r6, i64 7, 6
+ %r8 = insertvalue %rettype %r7, i64 8, 7
+ %r9 = insertvalue %rettype %r8, i64 9, 8
+ %r10 = insertvalue %rettype %r9, i64 10, 9
+ %r11 = insertvalue %rettype %r10, i64 11, 10
+ %r12 = insertvalue %rettype %r11, i64 12, 11
+ %r13 = insertvalue %rettype %r12, i64 13, 12
+ %r14 = insertvalue %rettype %r13, i64 14, 13
+ ret %rettype %r14
+}
+
+declare hhvmcc void @return_all_tc(i64, i64, i64, i64, i64, i64, i64, i64,
+ i64, i64, i64, i64, i64, i64, i64)
+
+; Check that we can return up to 14 64-bit args in registers via tail call.
+define hhvmcc void @test_return_all_tc(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL: test_return_all_tc:
+; CHECK-NEXT: {{^#.*}}
+; CHECK-DAG: movl $1, %ebx
+; CHECK-DAG: movl $3, %ebp
+; CHECK-DAG: movl $4, %r15
+; CHECK-DAG: movl $5, %edi
+; CHECK-DAG: movl $6, %esi
+; CHECK-DAG: movl $7, %edx
+; CHECK-DAG: movl $8, %ecx
+; CHECK-DAG: movl $9, %r8
+; CHECK-DAG: movl $10, %r9
+; CHECK-DAG: movl $11, %eax
+; CHECK-DAG: movl $12, %r10
+; CHECK-DAG: movl $13, %r11
+; CHECK-DAG: movl $14, %r13
+; CHECK-DAG: movl $15, %r14
+; CHECK: jmp return_all_tc
+ tail call hhvmcc void @return_all_tc(
+ i64 1, i64 %b, i64 3, i64 4, i64 5, i64 6, i64 7,
+ i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15)
+ ret void
+}
+
+declare hhvmcc {i64, i64} @php_short(i64, i64, i64, i64)
+
+define hhvmcc i64 @test_php_short(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL: test_php_short:
+; CHECK-NEXT: {{^#.*}}
+; CHECK-NEXT: movl $42, %r15
+; CHECK-NEXT: callq php_short
+; CHECK-NEXT: leaq (%rbp,%r12), %rbx
+; CHECK-NEXT: retq
+ %pair = call hhvmcc {i64, i64} @php_short(i64 %a, i64 %b, i64 %c, i64 42)
+ %fp = extractvalue {i64, i64} %pair, 1
+ %rv = add i64 %fp, %b
+ ret i64 %rv
+}
+
+declare hhvmcc %rettype @php_all(i64, i64, i64, i64, i64, i64, i64,
+ i64, i64, i64, i64, i64, i64, i64, i64)
+
+; Check that we can pass 15 arguments in registers.
+; Also check that %r12 (2nd arg) is not spilled.
+define hhvmcc i64 @test_php_all(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL: test_php_all:
+; CHECK-NEXT: {{^#.*}}
+; CHECK-NOT: sub
+; CHECK-NOT: sub
+; CHECK-DAG: movl $1, %ebx
+; CHECK-DAG: movl $3, %ebp
+; CHECK-DAG: movl $4, %r15
+; CHECK-DAG: movl $5, %edi
+; CHECK-DAG: movl $6, %esi
+; CHECK-DAG: movl $7, %edx
+; CHECK-DAG: movl $8, %ecx
+; CHECK-DAG: movl $9, %r8
+; CHECK-DAG: movl $10, %r9
+; CHECK-DAG: movl $11, %eax
+; CHECK-DAG: movl $12, %r10
+; CHECK-DAG: movl $13, %r11
+; CHECK-DAG: movl $14, %r13
+; CHECK-DAG: movl $15, %r14
+; CHECK: callq php_all
+ %pair = call hhvmcc %rettype @php_all(
+ i64 1, i64 %b, i64 3, i64 4, i64 5, i64 6, i64 7,
+ i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15)
+ %fp = extractvalue %rettype %pair, 1
+ %rv = add i64 %fp, %b
+ ret i64 %rv
+}
+
+declare hhvmcc void @svcreq(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
+ i64, i64)
+
+define hhvmcc void @test_svcreq(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL: test_svcreq:
+; CHECK-DAG: movl $42, %r10
+; CHECK-DAG: movl $1, %edi
+; CHECK-DAG: movl $2, %esi
+; CHECK-DAG: movl $3, %edx
+; CHECK-DAG: movl $4, %ecx
+; CHECK-DAG: movl $5, %r8
+; CHECK-DAG: movl $6, %r9
+; CHECK: jmp svcreq
+ tail call hhvmcc void @svcreq(i64 %a, i64 %b, i64 %c, i64 undef, i64 1,
+ i64 2, i64 3, i64 4, i64 5, i64 6, i64 undef,
+ i64 42)
+ ret void
+}
+
+declare hhvm_ccc void @helper_short(i64, i64, i64, i64, i64, i64, i64)
+
+; Pass all arguments in registers and check that we don't adjust stack
+; for the call.
+define hhvmcc void @test_helper_short(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL: test_helper_short:
+; CHECK-NOT: push
+; CHECK-NOT: sub
+; CHECK-DAG: movl $1, %edi
+; CHECK-DAG: movl $2, %esi
+; CHECK-DAG: movl $3, %edx
+; CHECK-DAG: movl $4, %ecx
+; CHECK-DAG: movl $5, %r8
+; CHECK-DAG: movl $6, %r9
+; CHECK: callq helper_short
+ call hhvm_ccc void @helper_short(i64 %c, i64 1, i64 2, i64 3, i64 4,
+ i64 5, i64 6)
+ ret void
+}
+
+declare hhvm_ccc void @helper(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)
+
+define hhvmcc void @test_helper(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL: test_helper:
+; CHECK-DAG: movl $1, %edi
+; CHECK-DAG: movl $2, %esi
+; CHECK-DAG: movl $3, %edx
+; CHECK-DAG: movl $4, %ecx
+; CHECK-DAG: movl $5, %r8
+; CHECK-DAG: movl $6, %r9
+; CHECK: callq helper
+ call hhvm_ccc void @helper(i64 %c, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6,
+ i64 7, i64 8, i64 9)
+ ret void
+}
+
+; When we enter function with HHVM calling convention, the stack is aligned
+; at 16 bytes. This means we align objects on the stack differently and
+; adjust the stack differently for calls.
+declare hhvm_ccc void @stack_helper(i64, i64, i64)
+declare hhvm_ccc void @stack_helper2(<2 x double>, i64)
+
+define hhvmcc void @test_stack_helper(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL: test_stack_helper:
+; CHECK-NOT: push
+; CHECK: subq $32, %rsp
+; CHECK: movaps 16(%rsp), %xmm0
+; CHECK: callq stack_helper2
+ %t1 = alloca <2 x double>, align 16
+ %t2 = alloca i64, align 8
+ %t3 = alloca i64, align 8
+ %load3 = load i64, i64 *%t3
+ call hhvm_ccc void @stack_helper(i64 %c, i64 %load3, i64 42)
+ %load = load <2 x double>, <2 x double> *%t1
+ %load2 = load i64, i64 *%t2
+ call hhvm_ccc void @stack_helper2(<2 x double> %load, i64 %load2)
+ ret void
+}
+
+; Check that we are not adjusting the stack before calling the helper.
+define hhvmcc void @test_stack_helper2(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL: test_stack_helper2:
+; CHECK-NOT: push
+; CHECK-NOT: subq
+ call hhvm_ccc void @stack_helper(i64 %c, i64 7, i64 42)
+ ret void
+}
+
diff --git a/test/CodeGen/X86/i386-shrink-wrapping.ll b/test/CodeGen/X86/i386-shrink-wrapping.ll
new file mode 100644
index 000000000000..748c397143c5
--- /dev/null
+++ b/test/CodeGen/X86/i386-shrink-wrapping.ll
@@ -0,0 +1,113 @@
+; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-apple-macosx"
+
+@a = common global i32 0, align 4
+@d = internal unnamed_addr global i1 false
+@b = common global i32 0, align 4
+@e = common global i8 0, align 1
+@f = common global i8 0, align 1
+@c = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+
+; Check that we are clobbering the flags when they are live-in of the
+; prologue block and the prologue needs to adjust the stack.
+; PR25607.
+;
+; CHECK-LABEL: eflagsLiveInPrologue:
+;
+; DISABLE: pushl
+; DISABLE-NEXT: pushl
+; DISABLE-NEXT: subl $20, %esp
+;
+; CHECK: movl L_a$non_lazy_ptr, [[A:%[a-z]+]]
+; CHECK-NEXT: cmpl $0, ([[A]])
+; CHECK-NEXT: je [[PREHEADER_LABEL:LBB[0-9_]+]]
+;
+; CHECK: movb $1, _d
+;
+; CHECK: [[PREHEADER_LABEL]]:
+; CHECK-NEXT: movl L_b$non_lazy_ptr, [[B:%[a-z]+]]
+; CHECK-NEXT: movl ([[B]]), [[TMP1:%[a-z]+]]
+; CHECK-NEXT: testl [[TMP1]], [[TMP1]]
+; CHECK-NEXT: je [[FOREND_LABEL:LBB[0-9_]+]]
+;
+; Skip the loop.
+; [...]
+;
+; The for.end block is split to accomadate the different selects.
+; We are interested in the one with the call, so skip until the branch.
+; CHECK: [[FOREND_LABEL]]:
+; CHECK-NEXT: movb _d, [[D:%[a-z]+]]
+; [...]
+; CHECK: jne [[CALL_LABEL:LBB[0-9_]+]]
+;
+; CHECK: movb $6, [[D]]
+;
+; CHECK: [[CALL_LABEL]]
+;
+; ENABLE-NEXT: pushl
+; ENABLE-NEXT: pushl
+; We must not use sub here otherwise we will clobber the eflags.
+; ENABLE-NEXT: leal -20(%esp), %esp
+;
+; CHECK-NEXT: L_e$non_lazy_ptr, [[E:%[a-z]+]]
+; CHECK-NEXT: movb [[D]], ([[E]])
+; CHECK-NEXT: L_f$non_lazy_ptr, [[F:%[a-z]+]]
+; CHECK-NEXT: movsbl ([[F]]), [[CONV:%[a-z]+]]
+; CHECK-NEXT: movl $6, [[CONV:%[a-z]+]]
+; The eflags is used in the next instruction.
+; If that instruction disappear, we are not exercising the bug
+; anymore.
+; CHECK-NEXT: cmovnel {{%[a-z]+}}, [[CONV]]
+;
+; Skip all the crust of vaarg lowering.
+; CHECK: calll L_varfunc$stub
+; Set the return value to 0.
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: addl $20, %esp
+; CHECK-NEXT: popl
+; CHECK-NEXT: popl
+; CHECK-NEXT: retl
+define i32 @eflagsLiveInPrologue() #0 {
+entry:
+ %tmp = load i32, i32* @a, align 4
+ %tobool = icmp eq i32 %tmp, 0
+ br i1 %tobool, label %for.cond.preheader, label %if.then
+
+if.then: ; preds = %entry
+ store i1 true, i1* @d, align 1
+ br label %for.cond.preheader
+
+for.cond.preheader: ; preds = %if.then, %entry
+ %tmp1 = load i32, i32* @b, align 4
+ %tobool14 = icmp eq i32 %tmp1, 0
+ br i1 %tobool14, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %for.cond.preheader
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.preheader
+ br label %for.body
+
+for.end: ; preds = %for.cond.preheader
+ %.b3 = load i1, i1* @d, align 1
+ %tmp2 = select i1 %.b3, i8 0, i8 6
+ store i8 %tmp2, i8* @e, align 1
+ %tmp3 = load i8, i8* @f, align 1
+ %conv = sext i8 %tmp3 to i32
+ %add = add nsw i32 %conv, 1
+ %rem = srem i32 %tmp1, %add
+ store i32 %rem, i32* @c, align 4
+ %conv2 = select i1 %.b3, i32 0, i32 6
+ %call = tail call i32 (i8*, ...) @varfunc(i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %conv2) #1
+ ret i32 0
+}
+
+; Function Attrs: nounwind
+declare i32 @varfunc(i8* nocapture readonly, ...) #0
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/X86/immediate_merging.ll b/test/CodeGen/X86/immediate_merging.ll
new file mode 100644
index 000000000000..8aef9c279b31
--- /dev/null
+++ b/test/CodeGen/X86/immediate_merging.ll
@@ -0,0 +1,82 @@
+; RUN: llc -o - -mtriple=i386-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+@e = common global i32 0, align 4
+@x = common global i32 0, align 4
+@f = common global i32 0, align 4
+@h = common global i32 0, align 4
+@i = common global i32 0, align 4
+
+; Test -Os to make sure immediates with multiple users don't get pulled in to
+; instructions.
+define i32 @foo() optsize {
+; CHECK-LABEL: foo:
+; CHECK: movl $1234, [[R1:%[a-z]+]]
+; CHECK-NOT: movl $1234, a
+; CHECK-NOT: movl $1234, b
+; CHECK-NOT: movl $12, c
+; CHECK-NOT: cmpl $12, e
+; CHECK: movl [[R1]], a
+; CHECK: movl [[R1]], b
+
+entry:
+ store i32 1234, i32* @a
+ store i32 1234, i32* @b
+ store i32 12, i32* @c
+ %0 = load i32, i32* @e
+ %cmp = icmp eq i32 %0, 12
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 1, i32* @x
+ br label %if.end
+
+; New block.. Make sure 1234 isn't live across basic blocks from before.
+; CHECK: movl $1234, f
+; CHECK: movl $555, [[R3:%[a-z]+]]
+; CHECK-NOT: movl $555, h
+; CHECK-NOT: addl $555, i
+; CHECK: movl [[R3]], h
+; CHECK: addl [[R3]], i
+
+if.end: ; preds = %if.then, %entry
+ store i32 1234, i32* @f
+ store i32 555, i32* @h
+ %1 = load i32, i32* @i
+ %add1 = add nsw i32 %1, 555
+ store i32 %add1, i32* @i
+ ret i32 0
+}
+
+; Test -O2 to make sure that all immediates get pulled in to their users.
+define i32 @foo2() {
+; CHECK-LABEL: foo2:
+; CHECK: movl $1234, a
+; CHECK: movl $1234, b
+
+entry:
+ store i32 1234, i32* @a
+ store i32 1234, i32* @b
+
+ ret i32 0
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #1
+
+@AA = common global [100 x i8] zeroinitializer, align 1
+
+; memset gets lowered in DAG. Constant merging should hoist all the
+; immediates used to store to the individual memory locations. Make
+; sure we don't directly store the immediates.
+define void @foomemset() optsize {
+; CHECK-LABEL: foomemset:
+; CHECK-NOT: movl ${{.*}}, AA
+; CHECK: mov{{l|q}} %{{e|r}}ax, AA
+
+entry:
+ call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([100 x i8], [100 x i8]* @AA, i32 0, i32 0), i8 33, i32 24, i32 1, i1 false)
+ ret void
+}
diff --git a/test/CodeGen/X86/implicit-null-check.ll b/test/CodeGen/X86/implicit-null-check.ll
index fd7a902eefc1..8b905f5d23b6 100644
--- a/test/CodeGen/X86/implicit-null-check.ll
+++ b/test/CodeGen/X86/implicit-null-check.ll
@@ -101,6 +101,40 @@ define i32 @imp_null_check_hoist_over_unrelated_load(i32* %x, i32* %y, i32* %z)
ret i32 %t1
}
+define i32 @imp_null_check_via_mem_comparision(i32* %x, i32 %val) {
+; CHECK-LABEL: _imp_null_check_via_mem_comparision
+; CHECK: Ltmp9:
+; CHECK: cmpl %esi, 4(%rdi)
+; CHECK: jge LBB4_2
+; CHECK: movl $100, %eax
+; CHECK: retq
+; CHECK: Ltmp8:
+; CHECK: movl $42, %eax
+; CHECK: retq
+; CHECK: LBB4_2:
+; CHECK: movl $200, %eax
+; CHECK: retq
+
+ entry:
+ %c = icmp eq i32* %x, null
+ br i1 %c, label %is_null, label %not_null, !make.implicit !0
+
+ is_null:
+ ret i32 42
+
+ not_null:
+ %x.loc = getelementptr i32, i32* %x, i32 1
+ %t = load i32, i32* %x.loc
+ %m = icmp slt i32 %t, %val
+ br i1 %m, label %ret_100, label %ret_200
+
+ ret_100:
+ ret i32 100
+
+ ret_200:
+ ret i32 200
+}
+
!0 = !{}
; CHECK-LABEL: __LLVM_FaultMaps:
@@ -113,7 +147,7 @@ define i32 @imp_null_check_hoist_over_unrelated_load(i32* %x, i32* %y, i32* %z)
; CHECK-NEXT: .short 0
; # functions:
-; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long 5
; FunctionAddr:
; CHECK-NEXT: .quad _imp_null_check_add_result
@@ -167,9 +201,22 @@ define i32 @imp_null_check_hoist_over_unrelated_load(i32* %x, i32* %y, i32* %z)
; Fault[0].HandlerOffset:
; CHECK-NEXT: .long Ltmp0-_imp_null_check_load
+; FunctionAddr:
+; CHECK-NEXT: .quad _imp_null_check_via_mem_comparision
+; NumFaultingPCs
+; CHECK-NEXT: .long 1
+; Reserved:
+; CHECK-NEXT: .long 0
+; Fault[0].Type:
+; CHECK-NEXT: .long 1
+; Fault[0].FaultOffset:
+; CHECK-NEXT: .long Ltmp9-_imp_null_check_via_mem_comparision
+; Fault[0].HandlerOffset:
+; CHECK-NEXT: .long Ltmp8-_imp_null_check_via_mem_comparision
+
; OBJDUMP: FaultMap table:
; OBJDUMP-NEXT: Version: 0x1
-; OBJDUMP-NEXT: NumFunctions: 4
+; OBJDUMP-NEXT: NumFunctions: 5
; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1
; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 5
; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1
diff --git a/test/CodeGen/X86/imul.ll b/test/CodeGen/X86/imul.ll
index c64b4e302b92..9d4d19332dbb 100644
--- a/test/CodeGen/X86/imul.ll
+++ b/test/CodeGen/X86/imul.ll
@@ -108,3 +108,66 @@ define i64 @mul40_64(i64 %A) {
%mul = mul i64 %A, 40
ret i64 %mul
}
+
+define i32 @mul4_32_minsize(i32 %A) minsize {
+; X64-LABEL: mul4_32_minsize:
+; X64: leal
+; X86-LABEL: mul4_32_minsize:
+; X86: shll
+ %mul = mul i32 %A, 4
+ ret i32 %mul
+}
+
+define i32 @mul40_32_minsize(i32 %A) minsize {
+; X64-LABEL: mul40_32_minsize:
+; X64: imull
+; X86-LABEL: mul40_32_minsize:
+; X86: imull
+ %mul = mul i32 %A, 40
+ ret i32 %mul
+}
+
+define i32 @mul33_32(i32 %A) {
+; X64-LABEL: mul33_32:
+; X64: shll
+; X64-NEXT: leal
+; X86-LABEL: mul33_32:
+; X86: shll
+; X86-NEXT: addl
+ %mul = mul i32 %A, 33
+ ret i32 %mul
+}
+
+define i32 @mul31_32(i32 %A) {
+; X64-LABEL: mul31_32:
+; X64: shll
+; X64-NEXT: subl
+; X86-LABEL: mul31_32:
+; X86: shll
+; X86-NEXT: subl
+ %mul = mul i32 %A, 31
+ ret i32 %mul
+}
+
+define i32 @mul0_32(i32 %A) {
+; X64-LABEL: mul0_32:
+; X64: xorl %eax, %eax
+ %mul = mul i32 %A, 0
+ ret i32 %mul
+}
+
+define i32 @mul4294967295_32(i32 %A) {
+; X64-LABEL: mul4294967295_32:
+; X64: negl %edi
+; X64-NEXT: movl %edi, %eax
+ %mul = mul i32 %A, 4294967295
+ ret i32 %mul
+}
+
+define i64 @mul18446744073709551615_64(i64 %A) {
+; X64-LABEL: mul18446744073709551615_64:
+; X64: negq %rdi
+; X64-NEXT: movq %rdi, %rax
+ %mul = mul i64 %A, 18446744073709551615
+ ret i64 %mul
+}
diff --git a/test/CodeGen/X86/inalloca-stdcall.ll b/test/CodeGen/X86/inalloca-stdcall.ll
index e5f6ea70e9cb..4f7e4092a99c 100644
--- a/test/CodeGen/X86/inalloca-stdcall.ll
+++ b/test/CodeGen/X86/inalloca-stdcall.ll
@@ -14,8 +14,9 @@ define void @g() {
%f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
-; CHECK: movl $13, (%esp)
-; CHECK: movl $42, 4(%esp)
+; CHECK: movl %esp, %eax
+; CHECK: movl $13, (%eax)
+; CHECK: movl $42, 4(%eax)
call x86_stdcallcc void @f(%Foo* inalloca %b)
; CHECK: calll _f@8
; CHECK-NOT: %esp
diff --git a/test/CodeGen/X86/inalloca.ll b/test/CodeGen/X86/inalloca.ll
index 904366219ab7..e523c945a69f 100644
--- a/test/CodeGen/X86/inalloca.ll
+++ b/test/CodeGen/X86/inalloca.ll
@@ -14,8 +14,9 @@ entry:
%f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
-; CHECK: movl $13, (%esp)
-; CHECK: movl $42, 4(%esp)
+; CHECK: movl %esp, %eax
+; CHECK: movl $13, (%eax)
+; CHECK: movl $42, 4(%eax)
call void @f(%Foo* inalloca %b)
; CHECK: calll _f
ret void
@@ -33,8 +34,9 @@ entry:
%f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
-; CHECK: movl $13, (%esp)
-; CHECK: movl $42, 4(%esp)
+; CHECK: movl %esp, %eax
+; CHECK: movl $13, (%eax)
+; CHECK: movl $42, 4(%eax)
call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b)
; CHECK: movl $1, %eax
; CHECK: calll _inreg_with_inalloca
@@ -53,8 +55,9 @@ entry:
%f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
-; CHECK-DAG: movl $13, (%esp)
-; CHECK-DAG: movl $42, 4(%esp)
+; CHECK: movl %esp, %eax
+; CHECK-DAG: movl $13, (%eax)
+; CHECK-DAG: movl $42, 4(%eax)
call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b)
; CHECK-DAG: xorl %ecx, %ecx
; CHECK: calll _thiscall_with_inalloca
diff --git a/test/CodeGen/X86/inconsistent_landingpad.ll b/test/CodeGen/X86/inconsistent_landingpad.ll
new file mode 100644
index 000000000000..495e999c4a95
--- /dev/null
+++ b/test/CodeGen/X86/inconsistent_landingpad.ll
@@ -0,0 +1,30 @@
+; RUN: not llvm-as -disable-output <%s 2>&1 | FileCheck %s
+
+define void @test() personality i32 (...)* @dummy_personality {
+; CHECK: The landingpad instruction should have a consistent result type inside a function
+entry:
+ invoke void @dummy1()
+ to label %next unwind label %unwind1
+
+unwind1:
+ %lp1 = landingpad token
+ cleanup
+ br label %return
+
+next:
+ invoke void @dummy2()
+ to label %return unwind label %unwind2
+
+unwind2:
+ %lp2 = landingpad { i8*, i32 }
+ cleanup
+ br label %return
+
+return:
+ ret void
+}
+
+declare void @dummy1()
+declare void @dummy2()
+
+declare i32 @dummy_personality(...)
diff --git a/test/CodeGen/X86/inline-asm-2addr.ll b/test/CodeGen/X86/inline-asm-2addr.ll
index 4a2c7fc5ebac..079f883186fb 100644
--- a/test/CodeGen/X86/inline-asm-2addr.ll
+++ b/test/CodeGen/X86/inline-asm-2addr.ll
@@ -1,9 +1,18 @@
-; RUN: llc < %s -march=x86-64 | not grep movq
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
define i64 @t(i64 %a, i64 %b) nounwind ssp {
entry:
+; CHECK-LABEL: t:
%asmtmp = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %a) nounwind ; <i64> [#uses=1]
+; CHECK: #APP
+; CHECK-NEXT: rorq %[[REG1:.*]]
+; CHECK-NEXT: #NO_APP
%asmtmp1 = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %b) nounwind ; <i64> [#uses=1]
+; CHECK-NEXT: #APP
+; CHECK-NEXT: rorq %[[REG2:.*]]
+; CHECK-NEXT: #NO_APP
%0 = add i64 %asmtmp1, %asmtmp ; <i64> [#uses=1]
+; CHECK-NEXT: leaq (%[[REG2]],%[[REG1]]), %rax
ret i64 %0
+; CHECK: retq
}
diff --git a/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll b/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
index b55571bcba09..970b9943948f 100644
--- a/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
+++ b/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -force-align-stack -mtriple i386-apple-darwin -mcpu=i486 | FileCheck %s
+; RUN: llc < %s -stackrealign -mtriple i386-apple-darwin -mcpu=i486 | FileCheck %s
%struct.foo = type { [88 x i8] }
diff --git a/test/CodeGen/X86/inline-sse.ll b/test/CodeGen/X86/inline-sse.ll
new file mode 100644
index 000000000000..78d6b762b5e5
--- /dev/null
+++ b/test/CodeGen/X86/inline-sse.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
+
+; PR16133 - we must treat XMM registers as v4f32 as SSE1 targets don't permit other vector types.
+
+define void @nop() nounwind {
+; X32-LABEL: nop:
+; X32: # BB#0:
+; X32-NEXT: pushl %ebp
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: andl $-16, %esp
+; X32-NEXT: subl $32, %esp
+; X32-NEXT: #APP
+; X32-NEXT: #NO_APP
+; X32-NEXT: movaps %xmm0, (%esp)
+; X32-NEXT: movl %ebp, %esp
+; X32-NEXT: popl %ebp
+; X32-NEXT: retl
+;
+; X64-LABEL: nop:
+; X64: # BB#0:
+; X64-NEXT: subq $24, %rsp
+; X64-NEXT: #APP
+; X64-NEXT: #NO_APP
+; X64-NEXT: movaps %xmm0, (%rsp)
+; X64-NEXT: addq $24, %rsp
+; X64-NEXT: retq
+ %1 = alloca <4 x float>, align 16
+ %2 = call <4 x float> asm "", "=x,~{dirflag},~{fpsr},~{flags}"()
+ store <4 x float> %2, <4 x float>* %1, align 16
+ ret void
+}
diff --git a/test/CodeGen/X86/insertps-from-constantpool.ll b/test/CodeGen/X86/insertps-from-constantpool.ll
new file mode 100644
index 000000000000..cfcfeacad067
--- /dev/null
+++ b/test/CodeGen/X86/insertps-from-constantpool.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=i686-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=X32
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=X64
+
+; Test for case where insertps folds the load of an insertion element from a constant pool.
+
+define <4 x float> @fold_from_constantpool(<4 x float> %a) {
+; X32-LABEL: fold_from_constantpool:
+; X32: # BB#0:
+; X32-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
+; X32-NEXT: retl
+;
+; X64-LABEL: fold_from_constantpool:
+; X64: # BB#0:
+; X64-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
+; X64-NEXT: retq
+ %1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> <float 0.0, float 1.0, float 0.0, float 0.0>, i8 64)
+ ret <4 x float> %1
+}
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/insertps-unfold-load-bug.ll b/test/CodeGen/X86/insertps-unfold-load-bug.ll
new file mode 100644
index 000000000000..bf7c4bc4d7b9
--- /dev/null
+++ b/test/CodeGen/X86/insertps-unfold-load-bug.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=i686-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s -check-prefix=X32
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s -check-prefix=X64
+
+; Test for case where insertps was folding the load of the insertion element, but a later optimization
+; was then manipulating the load.
+
+define <4 x float> @insertps_unfold(<4 x float>* %v0, <4 x float>* %v1) {
+; X32-LABEL: insertps_unfold:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: movaps (%eax), %xmm0
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; X32-NEXT: addps %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: insertps_unfold:
+; X64: # BB#0:
+; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: movaps (%rdi), %xmm0
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; X64-NEXT: addps %xmm1, %xmm0
+; X64-NEXT: retq
+ %a = getelementptr inbounds <4 x float>, <4 x float>* %v1, i64 0, i64 1
+ %b = load float, float* %a, align 4
+ %c = insertelement <4 x float> undef, float %b, i32 0
+ %d = load <4 x float>, <4 x float>* %v1, align 16
+ %e = load <4 x float>, <4 x float>* %v0, align 16
+ %f = shufflevector <4 x float> %e, <4 x float> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+ %g = fadd <4 x float> %c, %f
+ ret <4 x float> %g
+}
diff --git a/test/CodeGen/X86/int-intrinsic.ll b/test/CodeGen/X86/int-intrinsic.ll
index 45a9b0f15c67..b253e6c5f3b0 100644
--- a/test/CodeGen/X86/int-intrinsic.ll
+++ b/test/CodeGen/X86/int-intrinsic.ll
@@ -11,7 +11,7 @@ bb.entry:
ret void
}
-; CHECK: int $-128
+; CHECK: int $128
; CHECK: ret
define void @primitive_int128 () {
bb.entry:
diff --git a/test/CodeGen/X86/late-address-taken.ll b/test/CodeGen/X86/late-address-taken.ll
new file mode 100644
index 000000000000..f98c53595abb
--- /dev/null
+++ b/test/CodeGen/X86/late-address-taken.ll
@@ -0,0 +1,68 @@
+; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s -enable-shrink-wrap=false | FileCheck %s
+; Make sure shrink-wrapping does not break the lowering of exception handling.
+; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s -enable-shrink-wrap=true | FileCheck %s
+
+; Repro cases from PR25168
+
+; test @catchret - catchret target is not address-taken until PEI
+; splits it into lea/mov followed by ret. Make sure the MBB is
+; handled, both by tempting BranchFolding to merge it with %early_out
+; and delete it, and by checking that we emit a proper reference
+; to it in the LEA
+
+declare void @ProcessCLRException()
+declare void @f()
+
+define void @catchret(i1 %b) personality void ()* @ProcessCLRException {
+entry:
+ br i1 %b, label %body, label %early_out
+early_out:
+ ret void
+body:
+ invoke void @f()
+ to label %exit unwind label %catch.pad
+catch.pad:
+ %cs1 = catchswitch within none [label %catch.body] unwind to caller
+catch.body:
+ %catch = catchpad within %cs1 [i32 33554467]
+ catchret from %catch to label %exit
+exit:
+ ret void
+}
+; CHECK-LABEL: catchret: # @catchret
+; CHECK: [[Exit:^[^ :]+]]: # Block address taken
+; CHECK-NEXT: # %exit
+; CHECK: # %catch.body
+; CHECK: .seh_endprolog
+; CHECK: leaq [[Exit]](%rip), %rax
+; CHECK: retq # CATCHRET
+
+
+; test @setjmp - similar to @catchret, but the MBB in question
+; is the one generated when the setjmp's block is split
+
+@buf = internal global [5 x i8*] zeroinitializer
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+declare i8* @llvm.stacksave() nounwind
+declare i32 @llvm.eh.sjlj.setjmp(i8*) nounwind
+declare void @llvm.eh.sjlj.longjmp(i8*) nounwind
+
+define void @setjmp(i1 %b) nounwind {
+entry:
+ br i1 %b, label %early_out, label %sj
+early_out:
+ ret void
+sj:
+ %fp = call i8* @llvm.frameaddress(i32 0)
+ store i8* %fp, i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* @buf, i64 0, i64 0), align 16
+ %sp = call i8* @llvm.stacksave()
+ store i8* %sp, i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* @buf, i64 0, i64 2), align 16
+ call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([5 x i8*]* @buf to i8*))
+ ret void
+}
+; CHECK-LABEL: setjmp: # @setjmp
+; CHECK: # %sj
+; CHECK: leaq [[Label:\..+]](%rip), %[[Reg:.+]]{{$}}
+; CHECK-NEXT: movq %[[Reg]], buf
+; CHECK: {{^}}[[Label]]: # Block address taken
+; CHECK-NEXT: # %sj
diff --git a/test/CodeGen/X86/lea-opt.ll b/test/CodeGen/X86/lea-opt.ll
new file mode 100644
index 000000000000..571f2d9084c4
--- /dev/null
+++ b/test/CodeGen/X86/lea-opt.ll
@@ -0,0 +1,131 @@
+; RUN: llc < %s -mtriple=x86_64-linux -enable-x86-lea-opt | FileCheck %s
+
+%struct.anon1 = type { i32, i32, i32 }
+%struct.anon2 = type { i32, [32 x i32], i32 }
+
+@arr1 = external global [65 x %struct.anon1], align 16
+@arr2 = external global [65 x %struct.anon2], align 16
+
+define void @test1(i64 %x) nounwind {
+entry:
+ %a = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 0
+ %tmp = load i32, i32* %a, align 4
+ %b = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 1
+ %tmp1 = load i32, i32* %b, align 4
+ %sub = sub i32 %tmp, %tmp1
+ %c = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 2
+ %tmp2 = load i32, i32* %c, align 4
+ %add = add nsw i32 %sub, %tmp2
+ switch i32 %add, label %sw.epilog [
+ i32 1, label %sw.bb.1
+ i32 2, label %sw.bb.2
+ ]
+
+sw.bb.1: ; preds = %entry
+ store i32 111, i32* %b, align 4
+ store i32 222, i32* %c, align 4
+ br label %sw.epilog
+
+sw.bb.2: ; preds = %entry
+ store i32 333, i32* %b, align 4
+ store i32 444, i32* %c, align 4
+ br label %sw.epilog
+
+sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry
+ ret void
+; CHECK-LABEL: test1:
+; CHECK: leaq (%rdi,%rdi,2), [[REG1:%[a-z]+]]
+; CHECK: movl arr1(,[[REG1]],4), {{.*}}
+; CHECK: leaq arr1+4(,[[REG1]],4), [[REG2:%[a-z]+]]
+; CHECK: subl arr1+4(,[[REG1]],4), {{.*}}
+; CHECK: leaq arr1+8(,[[REG1]],4), [[REG3:%[a-z]+]]
+; CHECK: addl arr1+8(,[[REG1]],4), {{.*}}
+; CHECK: movl ${{[1-4]+}}, ([[REG2]])
+; CHECK: movl ${{[1-4]+}}, ([[REG3]])
+; CHECK: movl ${{[1-4]+}}, ([[REG2]])
+; CHECK: movl ${{[1-4]+}}, ([[REG3]])
+}
+
+define void @test2(i64 %x) nounwind optsize {
+entry:
+ %a = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 0
+ %tmp = load i32, i32* %a, align 4
+ %b = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 1
+ %tmp1 = load i32, i32* %b, align 4
+ %sub = sub i32 %tmp, %tmp1
+ %c = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 2
+ %tmp2 = load i32, i32* %c, align 4
+ %add = add nsw i32 %sub, %tmp2
+ switch i32 %add, label %sw.epilog [
+ i32 1, label %sw.bb.1
+ i32 2, label %sw.bb.2
+ ]
+
+sw.bb.1: ; preds = %entry
+ store i32 111, i32* %b, align 4
+ store i32 222, i32* %c, align 4
+ br label %sw.epilog
+
+sw.bb.2: ; preds = %entry
+ store i32 333, i32* %b, align 4
+ store i32 444, i32* %c, align 4
+ br label %sw.epilog
+
+sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry
+ ret void
+; CHECK-LABEL: test2:
+; CHECK: leaq (%rdi,%rdi,2), [[REG1:%[a-z]+]]
+; CHECK: leaq arr1+4(,[[REG1]],4), [[REG2:%[a-z]+]]
+; CHECK: movl -4([[REG2]]), {{.*}}
+; CHECK: subl ([[REG2]]), {{.*}}
+; CHECK: leaq arr1+8(,[[REG1]],4), [[REG3:%[a-z]+]]
+; CHECK: addl ([[REG3]]), {{.*}}
+; CHECK: movl ${{[1-4]+}}, ([[REG2]])
+; CHECK: movl ${{[1-4]+}}, ([[REG3]])
+; CHECK: movl ${{[1-4]+}}, ([[REG2]])
+; CHECK: movl ${{[1-4]+}}, ([[REG3]])
+}
+
+; Check that LEA optimization pass takes into account a resultant address
+; displacement when choosing a LEA instruction for replacing a redundant
+; address recalculation.
+
+define void @test3(i64 %x) nounwind optsize {
+entry:
+ %a = getelementptr inbounds [65 x %struct.anon2], [65 x %struct.anon2]* @arr2, i64 0, i64 %x, i32 2
+ %tmp = load i32, i32* %a, align 4
+ %b = getelementptr inbounds [65 x %struct.anon2], [65 x %struct.anon2]* @arr2, i64 0, i64 %x, i32 0
+ %tmp1 = load i32, i32* %b, align 4
+ %add = add nsw i32 %tmp, %tmp1
+ switch i32 %add, label %sw.epilog [
+ i32 1, label %sw.bb.1
+ i32 2, label %sw.bb.2
+ ]
+
+sw.bb.1: ; preds = %entry
+ store i32 111, i32* %a, align 4
+ store i32 222, i32* %b, align 4
+ br label %sw.epilog
+
+sw.bb.2: ; preds = %entry
+ store i32 333, i32* %a, align 4
+ store i32 444, i32* %b, align 4
+ br label %sw.epilog
+
+sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry
+ ret void
+; CHECK-LABEL: test3:
+; CHECK: imulq {{.*}}, [[REG1:%[a-z]+]]
+; CHECK: leaq arr2+132([[REG1]]), [[REG2:%[a-z]+]]
+; CHECK: leaq arr2([[REG1]]), [[REG3:%[a-z]+]]
+
+; REG3's definition is closer to movl than REG2's, but the pass still chooses
+; REG2 because it provides the resultant address displacement fitting 1 byte.
+
+; CHECK: movl ([[REG2]]), {{.*}}
+; CHECK: addl ([[REG3]]), {{.*}}
+; CHECK: movl ${{[1-4]+}}, ([[REG2]])
+; CHECK: movl ${{[1-4]+}}, ([[REG3]])
+; CHECK: movl ${{[1-4]+}}, ([[REG2]])
+; CHECK: movl ${{[1-4]+}}, ([[REG3]])
+}
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
index 8ed58f119c4f..4a1dd86abc45 100644
--- a/test/CodeGen/X86/lit.local.cfg
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -4,7 +4,7 @@
#
# It should be possible to remove this override once all the bots have cycled
# cleanly.
-config.suffixes = ['.ll', '.test', '.txt']
+config.suffixes = ['.ll', '.mir', '.test', '.txt']
if not 'X86' in config.root.targets:
config.unsupported = True
diff --git a/test/CodeGen/X86/localescape.ll b/test/CodeGen/X86/localescape.ll
new file mode 100644
index 000000000000..3cd174df0b71
--- /dev/null
+++ b/test/CodeGen/X86/localescape.ll
@@ -0,0 +1,143 @@
+; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s --check-prefix=X86
+
+declare i8* @llvm.frameaddress(i32)
+declare void @llvm.localescape(...)
+declare i8* @llvm.localaddress()
+declare i8* @llvm.localrecover(i8*, i8*, i32)
+declare i32 @printf(i8*, ...)
+
+@str = internal constant [10 x i8] c"asdf: %d\0A\00"
+
+define void @print_framealloc_from_fp(i8* %fp) {
+ %a.i8 = call i8* @llvm.localrecover(i8* bitcast (void(i32)* @alloc_func to i8*), i8* %fp, i32 0)
+ %a = bitcast i8* %a.i8 to i32*
+ %a.val = load i32, i32* %a
+ call i32 (i8*, ...) @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %a.val)
+ %b.i8 = call i8* @llvm.localrecover(i8* bitcast (void(i32)* @alloc_func to i8*), i8* %fp, i32 1)
+ %b = bitcast i8* %b.i8 to i32*
+ %b.val = load i32, i32* %b
+ call i32 (i8*, ...) @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %b.val)
+ store i32 42, i32* %b
+ %b2 = getelementptr i32, i32* %b, i32 1
+ %b2.val = load i32, i32* %b2
+ call i32 (i8*, ...) @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %b2.val)
+ ret void
+}
+
+; X64-LABEL: print_framealloc_from_fp:
+; X64: movq %rcx, %[[parent_fp:[a-z]+]]
+; X64: movl .Lalloc_func$frame_escape_0(%[[parent_fp]]), %edx
+; X64: leaq {{.*}}(%rip), %[[str:[a-z]+]]
+; X64: movq %[[str]], %rcx
+; X64: callq printf
+; X64: movl .Lalloc_func$frame_escape_1(%[[parent_fp]]), %edx
+; X64: movq %[[str]], %rcx
+; X64: callq printf
+; X64: movl $42, .Lalloc_func$frame_escape_1(%[[parent_fp]])
+; X64: retq
+
+; X86-LABEL: print_framealloc_from_fp:
+; X86: pushl %esi
+; X86: subl $8, %esp
+; X86: movl 16(%esp), %esi
+; X86: movl Lalloc_func$frame_escape_0(%esi), %eax
+; X86: movl %eax, 4(%esp)
+; X86: movl $_str, (%esp)
+; X86: calll _printf
+; X86: movl Lalloc_func$frame_escape_1(%esi), %eax
+; X86: movl %eax, 4(%esp)
+; X86: movl $_str, (%esp)
+; X86: calll _printf
+; X86: movl $42, Lalloc_func$frame_escape_1(%esi)
+; X86: movl $4, %eax
+; X86: movl Lalloc_func$frame_escape_1(%esi,%eax), %eax
+; X86: movl %eax, 4(%esp)
+; X86: movl $_str, (%esp)
+; X86: calll _printf
+; X86: addl $8, %esp
+; X86: popl %esi
+; X86: retl
+
+define void @alloc_func(i32 %n) {
+ %a = alloca i32
+ %b = alloca i32, i32 2
+ call void (...) @llvm.localescape(i32* %a, i32* %b)
+ store i32 42, i32* %a
+ store i32 13, i32* %b
+
+ ; Force usage of EBP with a dynamic alloca.
+ alloca i8, i32 %n
+
+ %lp = call i8* @llvm.localaddress()
+ call void @print_framealloc_from_fp(i8* %lp)
+ ret void
+}
+
+; X64-LABEL: alloc_func:
+; X64: pushq %rbp
+; X64: subq $16, %rsp
+; X64: .seh_stackalloc 16
+; X64: leaq 16(%rsp), %rbp
+; X64: .seh_setframe 5, 16
+; X64: .Lalloc_func$frame_escape_0 = -4
+; X64: .Lalloc_func$frame_escape_1 = -12
+; X64: movl $42, -4(%rbp)
+; X64: movl $13, -12(%rbp)
+; X64: movq %rbp, %rcx
+; X64: callq print_framealloc_from_fp
+; X64: retq
+
+; X86-LABEL: alloc_func:
+; X86: pushl %ebp
+; X86: movl %esp, %ebp
+; X86: subl $12, %esp
+; X86: Lalloc_func$frame_escape_0 = -4
+; X86: Lalloc_func$frame_escape_1 = -12
+; X86: movl $42, -4(%ebp)
+; X86: movl $13, -12(%ebp)
+; X86: pushl %ebp
+; X86: calll _print_framealloc_from_fp
+; X86: movl %ebp, %esp
+; X86: popl %ebp
+; X86: retl
+
+; Helper to make this a complete program so it can be compiled and tested.
+define i32 @main() {
+ call void @alloc_func(i32 3)
+ ret i32 0
+}
+
+define void @alloc_func_no_frameaddr() {
+ %a = alloca i32
+ %b = alloca i32
+ call void (...) @llvm.localescape(i32* %a, i32* %b)
+ store i32 42, i32* %a
+ store i32 13, i32* %b
+ call void @print_framealloc_from_fp(i8* null)
+ ret void
+}
+
+; X64-LABEL: alloc_func_no_frameaddr:
+; X64: subq $40, %rsp
+; X64: .seh_stackalloc 40
+; X64: .seh_endprologue
+; X64: .Lalloc_func_no_frameaddr$frame_escape_0 = 36
+; X64: .Lalloc_func_no_frameaddr$frame_escape_1 = 32
+; X64: movl $42, 36(%rsp)
+; X64: movl $13, 32(%rsp)
+; X64: xorl %ecx, %ecx
+; X64: callq print_framealloc_from_fp
+; X64: addq $40, %rsp
+; X64: retq
+
+; X86-LABEL: alloc_func_no_frameaddr:
+; X86: subl $12, %esp
+; X86: Lalloc_func_no_frameaddr$frame_escape_0 = 8
+; X86: Lalloc_func_no_frameaddr$frame_escape_1 = 4
+; X86: movl $42, 8(%esp)
+; X86: movl $13, 4(%esp)
+; X86: movl $0, (%esp)
+; X86: calll _print_framealloc_from_fp
+; X86: addl $12, %esp
+; X86: retl
diff --git a/test/CodeGen/X86/lower-vec-shift-2.ll b/test/CodeGen/X86/lower-vec-shift-2.ll
index fb8fbba71fca..281461577004 100644
--- a/test/CodeGen/X86/lower-vec-shift-2.ll
+++ b/test/CodeGen/X86/lower-vec-shift-2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE2
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll
index 1765ed7871d8..97451e5573fe 100644
--- a/test/CodeGen/X86/lsr-static-addr.ll
+++ b/test/CodeGen/X86/lsr-static-addr.ll
@@ -18,7 +18,7 @@
; ATOM-NEXT: movsd A(,%rax,8)
; ATOM-NEXT: mulsd
; ATOM-NEXT: movsd
-; ATOM-NEXT: leaq 1(%rax), %rax
+; ATOM-NEXT: incq %rax
@A = external global [0 x double]
diff --git a/test/CodeGen/X86/machine-combiner-int-vec.ll b/test/CodeGen/X86/machine-combiner-int-vec.ll
new file mode 100644
index 000000000000..dc1ce77e13b7
--- /dev/null
+++ b/test/CodeGen/X86/machine-combiner-int-vec.ll
@@ -0,0 +1,112 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse2 < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx2 < %s | FileCheck %s --check-prefix=AVX
+
+; Verify that 128-bit vector logical ops are reassociated.
+
+define <4 x i32> @reassociate_and_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) {
+; SSE-LABEL: reassociate_and_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: paddd %xmm1, %xmm0
+; SSE-NEXT: pand %xmm3, %xmm2
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_and_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %t0 = add <4 x i32> %x0, %x1
+ %t1 = and <4 x i32> %x2, %t0
+ %t2 = and <4 x i32> %x3, %t1
+ ret <4 x i32> %t2
+}
+
+define <4 x i32> @reassociate_or_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) {
+; SSE-LABEL: reassociate_or_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: paddd %xmm1, %xmm0
+; SSE-NEXT: por %xmm3, %xmm2
+; SSE-NEXT: por %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_or_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %t0 = add <4 x i32> %x0, %x1
+ %t1 = or <4 x i32> %x2, %t0
+ %t2 = or <4 x i32> %x3, %t1
+ ret <4 x i32> %t2
+}
+
+define <4 x i32> @reassociate_xor_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) {
+; SSE-LABEL: reassociate_xor_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: paddd %xmm1, %xmm0
+; SSE-NEXT: pxor %xmm3, %xmm2
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_xor_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %t0 = add <4 x i32> %x0, %x1
+ %t1 = xor <4 x i32> %x2, %t0
+ %t2 = xor <4 x i32> %x3, %t1
+ ret <4 x i32> %t2
+}
+
+; Verify that 256-bit vector logical ops are reassociated.
+
+define <8 x i32> @reassociate_and_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; AVX-LABEL: reassociate_and_v8i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpand %ymm3, %ymm2, %ymm1
+; AVX-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+
+ %t0 = add <8 x i32> %x0, %x1
+ %t1 = and <8 x i32> %x2, %t0
+ %t2 = and <8 x i32> %x3, %t1
+ ret <8 x i32> %t2
+}
+
+define <8 x i32> @reassociate_or_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; AVX-LABEL: reassociate_or_v8i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpor %ymm3, %ymm2, %ymm1
+; AVX-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+
+ %t0 = add <8 x i32> %x0, %x1
+ %t1 = or <8 x i32> %x2, %t0
+ %t2 = or <8 x i32> %x3, %t1
+ ret <8 x i32> %t2
+}
+
+define <8 x i32> @reassociate_xor_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; AVX-LABEL: reassociate_xor_v8i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vpxor %ymm3, %ymm2, %ymm1
+; AVX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+
+ %t0 = add <8 x i32> %x0, %x1
+ %t1 = xor <8 x i32> %x2, %t0
+ %t2 = xor <8 x i32> %x3, %t1
+ ret <8 x i32> %t2
+}
+
diff --git a/test/CodeGen/X86/machine-combiner-int.ll b/test/CodeGen/X86/machine-combiner-int.ll
new file mode 100644
index 000000000000..4a1ba1a980ae
--- /dev/null
+++ b/test/CodeGen/X86/machine-combiner-int.ll
@@ -0,0 +1,194 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -stop-after machine-combiner -o /dev/null 2>&1 | FileCheck %s --check-prefix=DEAD
+
+; Verify that integer multiplies are reassociated. The first multiply in
+; each test should be independent of the result of the preceding add (lea).
+
+; TODO: This test does not actually test i16 machine instruction reassociation
+; because the operands are being promoted to i32 types.
+
+define i16 @reassociate_muls_i16(i16 %x0, i16 %x1, i16 %x2, i16 %x3) {
+; CHECK-LABEL: reassociate_muls_i16:
+; CHECK: # BB#0:
+; CHECK-NEXT: leal (%rdi,%rsi), %eax
+; CHECK-NEXT: imull %ecx, %edx
+; CHECK-NEXT: imull %edx, %eax
+; CHECK-NEXT: retq
+ %t0 = add i16 %x0, %x1
+ %t1 = mul i16 %x2, %t0
+ %t2 = mul i16 %x3, %t1
+ ret i16 %t2
+}
+
+define i32 @reassociate_muls_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: reassociate_muls_i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: leal (%rdi,%rsi), %eax
+; CHECK-NEXT: imull %ecx, %edx
+; CHECK-NEXT: imull %edx, %eax
+; CHECK-NEXT: retq
+
+; DEAD: ADD32rr
+; DEAD-NEXT: IMUL32rr{{.*}}implicit-def dead %eflags
+; DEAD-NEXT: IMUL32rr{{.*}}implicit-def dead %eflags
+
+ %t0 = add i32 %x0, %x1
+ %t1 = mul i32 %x2, %t0
+ %t2 = mul i32 %x3, %t1
+ ret i32 %t2
+}
+
+define i64 @reassociate_muls_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
+; CHECK-LABEL: reassociate_muls_i64:
+; CHECK: # BB#0:
+; CHECK-NEXT: leaq (%rdi,%rsi), %rax
+; CHECK-NEXT: imulq %rcx, %rdx
+; CHECK-NEXT: imulq %rdx, %rax
+; CHECK-NEXT: retq
+ %t0 = add i64 %x0, %x1
+ %t1 = mul i64 %x2, %t0
+ %t2 = mul i64 %x3, %t1
+ ret i64 %t2
+}
+
+; Verify that integer 'ands' are reassociated. The first 'and' in
+; each test should be independent of the result of the preceding sub.
+
+define i8 @reassociate_ands_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
+; CHECK-LABEL: reassociate_ands_i8:
+; CHECK: # BB#0:
+; CHECK-NEXT: subb %sil, %dil
+; CHECK-NEXT: andb %cl, %dl
+; CHECK-NEXT: andb %dil, %dl
+; CHECK_NEXT: movb %dx, %ax
+; CHECK_NEXT: retq
+ %t0 = sub i8 %x0, %x1
+ %t1 = and i8 %x2, %t0
+ %t2 = and i8 %x3, %t1
+ ret i8 %t2
+}
+
+; TODO: No way to test i16? These appear to always get promoted to i32.
+
+define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: reassociate_ands_i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: subl %esi, %edi
+; CHECK-NEXT: andl %ecx, %edx
+; CHECK-NEXT: andl %edi, %edx
+; CHECK_NEXT: movl %edx, %eax
+; CHECK_NEXT: retq
+ %t0 = sub i32 %x0, %x1
+ %t1 = and i32 %x2, %t0
+ %t2 = and i32 %x3, %t1
+ ret i32 %t2
+}
+
+define i64 @reassociate_ands_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
+; CHECK-LABEL: reassociate_ands_i64:
+; CHECK: # BB#0:
+; CHECK-NEXT: subq %rsi, %rdi
+; CHECK-NEXT: andq %rcx, %rdx
+; CHECK-NEXT: andq %rdi, %rdx
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK_NEXT: retq
+ %t0 = sub i64 %x0, %x1
+ %t1 = and i64 %x2, %t0
+ %t2 = and i64 %x3, %t1
+ ret i64 %t2
+}
+
+; Verify that integer 'ors' are reassociated. The first 'or' in
+; each test should be independent of the result of the preceding sub.
+
+define i8 @reassociate_ors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
+; CHECK-LABEL: reassociate_ors_i8:
+; CHECK: # BB#0:
+; CHECK-NEXT: subb %sil, %dil
+; CHECK-NEXT: orb %cl, %dl
+; CHECK-NEXT: orb %dil, %dl
+; CHECK_NEXT: movb %dx, %ax
+; CHECK_NEXT: retq
+ %t0 = sub i8 %x0, %x1
+ %t1 = or i8 %x2, %t0
+ %t2 = or i8 %x3, %t1
+ ret i8 %t2
+}
+
+; TODO: No way to test i16? These appear to always get promoted to i32.
+
+define i32 @reassociate_ors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: reassociate_ors_i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: subl %esi, %edi
+; CHECK-NEXT: orl %ecx, %edx
+; CHECK-NEXT: orl %edi, %edx
+; CHECK_NEXT: movl %edx, %eax
+; CHECK_NEXT: retq
+ %t0 = sub i32 %x0, %x1
+ %t1 = or i32 %x2, %t0
+ %t2 = or i32 %x3, %t1
+ ret i32 %t2
+}
+
+define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
+; CHECK-LABEL: reassociate_ors_i64:
+; CHECK: # BB#0:
+; CHECK-NEXT: subq %rsi, %rdi
+; CHECK-NEXT: orq %rcx, %rdx
+; CHECK-NEXT: orq %rdi, %rdx
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK_NEXT: retq
+ %t0 = sub i64 %x0, %x1
+ %t1 = or i64 %x2, %t0
+ %t2 = or i64 %x3, %t1
+ ret i64 %t2
+}
+
+; Verify that integer 'xors' are reassociated. The first 'xor' in
+; each test should be independent of the result of the preceding sub.
+
+define i8 @reassociate_xors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
+; CHECK-LABEL: reassociate_xors_i8:
+; CHECK: # BB#0:
+; CHECK-NEXT: subb %sil, %dil
+; CHECK-NEXT: xorb %cl, %dl
+; CHECK-NEXT: xorb %dil, %dl
+; CHECK_NEXT: movb %dx, %ax
+; CHECK_NEXT: retq
+ %t0 = sub i8 %x0, %x1
+ %t1 = xor i8 %x2, %t0
+ %t2 = xor i8 %x3, %t1
+ ret i8 %t2
+}
+
+; TODO: No way to test i16? These appear to always get promoted to i32.
+
+define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: reassociate_xors_i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: subl %esi, %edi
+; CHECK-NEXT: xorl %ecx, %edx
+; CHECK-NEXT: xorl %edi, %edx
+; CHECK_NEXT: movl %edx, %eax
+; CHECK_NEXT: retq
+ %t0 = sub i32 %x0, %x1
+ %t1 = xor i32 %x2, %t0
+ %t2 = xor i32 %x3, %t1
+ ret i32 %t2
+}
+
+define i64 @reassociate_xors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
+; CHECK-LABEL: reassociate_xors_i64:
+; CHECK: # BB#0:
+; CHECK-NEXT: subq %rsi, %rdi
+; CHECK-NEXT: xorq %rcx, %rdx
+; CHECK-NEXT: xorq %rdi, %rdx
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK_NEXT: retq
+ %t0 = sub i64 %x0, %x1
+ %t1 = xor i64 %x2, %t0
+ %t2 = xor i64 %x3, %t1
+ ret i64 %t2
+}
+
diff --git a/test/CodeGen/X86/machine-combiner.ll b/test/CodeGen/X86/machine-combiner.ll
index 0943bebbb099..3fbb233696c8 100644
--- a/test/CodeGen/X86/machine-combiner.ll
+++ b/test/CodeGen/X86/machine-combiner.ll
@@ -144,7 +144,7 @@ define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
ret float %t2
}
-; Verify that SSE and AVX scalar single-precison multiplies are reassociated.
+; Verify that SSE and AVX scalar single-precision multiplies are reassociated.
define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
; SSE-LABEL: reassociate_muls1:
@@ -166,7 +166,7 @@ define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
ret float %t2
}
-; Verify that SSE and AVX scalar double-precison adds are reassociated.
+; Verify that SSE and AVX scalar double-precision adds are reassociated.
define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
; SSE-LABEL: reassociate_adds_double:
@@ -188,7 +188,7 @@ define double @reassociate_adds_double(double %x0, double %x1, double %x2, doubl
ret double %t2
}
-; Verify that SSE and AVX scalar double-precison multiplies are reassociated.
+; Verify that SSE and AVX scalar double-precision multiplies are reassociated.
define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
; SSE-LABEL: reassociate_muls_double:
@@ -210,3 +210,464 @@ define double @reassociate_muls_double(double %x0, double %x1, double %x2, doubl
ret double %t2
}
+; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated.
+
+define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; SSE-LABEL: reassociate_adds_v4f32:
+; SSE: # BB#0:
+; SSE-NEXT: mulps %xmm1, %xmm0
+; SSE-NEXT: addps %xmm3, %xmm2
+; SSE-NEXT: addps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_adds_v4f32:
+; AVX: # BB#0:
+; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddps %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %t0 = fmul <4 x float> %x0, %x1
+ %t1 = fadd <4 x float> %x2, %t0
+ %t2 = fadd <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
+; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated.
+
+define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
+; SSE-LABEL: reassociate_adds_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: mulpd %xmm1, %xmm0
+; SSE-NEXT: addpd %xmm3, %xmm2
+; SSE-NEXT: addpd %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_adds_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddpd %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %t0 = fmul <2 x double> %x0, %x1
+ %t1 = fadd <2 x double> %x2, %t0
+ %t2 = fadd <2 x double> %x3, %t1
+ ret <2 x double> %t2
+}
+
+; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated.
+
+define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; SSE-LABEL: reassociate_muls_v4f32:
+; SSE: # BB#0:
+; SSE-NEXT: addps %xmm1, %xmm0
+; SSE-NEXT: mulps %xmm3, %xmm2
+; SSE-NEXT: mulps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_muls_v4f32:
+; AVX: # BB#0:
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmulps %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fmul <4 x float> %x2, %t0
+ %t2 = fmul <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
+; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated.
+
+define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
+; SSE-LABEL: reassociate_muls_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: addpd %xmm1, %xmm0
+; SSE-NEXT: mulpd %xmm3, %xmm2
+; SSE-NEXT: mulpd %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_muls_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmulpd %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %t0 = fadd <2 x double> %x0, %x1
+ %t1 = fmul <2 x double> %x2, %t0
+ %t2 = fmul <2 x double> %x3, %t1
+ ret <2 x double> %t2
+}
+
+; Verify that AVX 256-bit vector single-precision adds are reassociated.
+
+define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
+; AVX-LABEL: reassociate_adds_v8f32:
+; AVX: # BB#0:
+; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vaddps %ymm3, %ymm2, %ymm1
+; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %t0 = fmul <8 x float> %x0, %x1
+ %t1 = fadd <8 x float> %x2, %t0
+ %t2 = fadd <8 x float> %x3, %t1
+ ret <8 x float> %t2
+}
+
+; Verify that AVX 256-bit vector double-precision adds are reassociated.
+
+define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
+; AVX-LABEL: reassociate_adds_v4f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vaddpd %ymm3, %ymm2, %ymm1
+; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %t0 = fmul <4 x double> %x0, %x1
+ %t1 = fadd <4 x double> %x2, %t0
+ %t2 = fadd <4 x double> %x3, %t1
+ ret <4 x double> %t2
+}
+
+; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
+
+define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
+; AVX-LABEL: reassociate_muls_v8f32:
+; AVX: # BB#0:
+; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmulps %ymm3, %ymm2, %ymm1
+; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %t0 = fadd <8 x float> %x0, %x1
+ %t1 = fmul <8 x float> %x2, %t0
+ %t2 = fmul <8 x float> %x3, %t1
+ ret <8 x float> %t2
+}
+
+; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
+
+define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
+; AVX-LABEL: reassociate_muls_v4f64:
+; AVX: # BB#0:
+; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmulpd %ymm3, %ymm2, %ymm1
+; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %t0 = fadd <4 x double> %x0, %x1
+ %t1 = fmul <4 x double> %x2, %t0
+ %t2 = fmul <4 x double> %x3, %t1
+ ret <4 x double> %t2
+}
+
+; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
+
+define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
+; SSE-LABEL: reassociate_mins_single:
+; SSE: # BB#0:
+; SSE-NEXT: divss %xmm1, %xmm0
+; SSE-NEXT: minss %xmm3, %xmm2
+; SSE-NEXT: minss %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_mins_single:
+; AVX: # BB#0:
+; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminss %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %t0 = fdiv float %x0, %x1
+ %cmp1 = fcmp olt float %x2, %t0
+ %sel1 = select i1 %cmp1, float %x2, float %t0
+ %cmp2 = fcmp olt float %x3, %sel1
+ %sel2 = select i1 %cmp2, float %x3, float %sel1
+ ret float %sel2
+}
+
+; Verify that SSE and AVX scalar single-precision maximum ops are reassociated.
+
+define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
+; SSE-LABEL: reassociate_maxs_single:
+; SSE: # BB#0:
+; SSE-NEXT: divss %xmm1, %xmm0
+; SSE-NEXT: maxss %xmm3, %xmm2
+; SSE-NEXT: maxss %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_maxs_single:
+; AVX: # BB#0:
+; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxss %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %t0 = fdiv float %x0, %x1
+ %cmp1 = fcmp ogt float %x2, %t0
+ %sel1 = select i1 %cmp1, float %x2, float %t0
+ %cmp2 = fcmp ogt float %x3, %sel1
+ %sel2 = select i1 %cmp2, float %x3, float %sel1
+ ret float %sel2
+}
+
+; Verify that SSE and AVX scalar double-precision minimum ops are reassociated.
+
+define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) {
+; SSE-LABEL: reassociate_mins_double:
+; SSE: # BB#0:
+; SSE-NEXT: divsd %xmm1, %xmm0
+; SSE-NEXT: minsd %xmm3, %xmm2
+; SSE-NEXT: minsd %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_mins_double:
+; AVX: # BB#0:
+; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminsd %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %t0 = fdiv double %x0, %x1
+ %cmp1 = fcmp olt double %x2, %t0
+ %sel1 = select i1 %cmp1, double %x2, double %t0
+ %cmp2 = fcmp olt double %x3, %sel1
+ %sel2 = select i1 %cmp2, double %x3, double %sel1
+ ret double %sel2
+}
+
+; Verify that SSE and AVX scalar double-precision maximum ops are reassociated.
+
+define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) {
+; SSE-LABEL: reassociate_maxs_double:
+; SSE: # BB#0:
+; SSE-NEXT: divsd %xmm1, %xmm0
+; SSE-NEXT: maxsd %xmm3, %xmm2
+; SSE-NEXT: maxsd %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_maxs_double:
+; AVX: # BB#0:
+; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxsd %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %t0 = fdiv double %x0, %x1
+ %cmp1 = fcmp ogt double %x2, %t0
+ %sel1 = select i1 %cmp1, double %x2, double %t0
+ %cmp2 = fcmp ogt double %x3, %sel1
+ %sel2 = select i1 %cmp2, double %x3, double %sel1
+ ret double %sel2
+}
+
+; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated.
+
+define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; SSE-LABEL: reassociate_mins_v4f32:
+; SSE: # BB#0:
+; SSE-NEXT: addps %xmm1, %xmm0
+; SSE-NEXT: minps %xmm3, %xmm2
+; SSE-NEXT: minps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_mins_v4f32:
+; AVX: # BB#0:
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminps %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %t0 = fadd <4 x float> %x0, %x1
+ %cmp1 = fcmp olt <4 x float> %x2, %t0
+ %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
+ %cmp2 = fcmp olt <4 x float> %x3, %sel1
+ %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
+ ret <4 x float> %sel2
+}
+
+; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated.
+
+define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; SSE-LABEL: reassociate_maxs_v4f32:
+; SSE: # BB#0:
+; SSE-NEXT: addps %xmm1, %xmm0
+; SSE-NEXT: maxps %xmm3, %xmm2
+; SSE-NEXT: maxps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_maxs_v4f32:
+; AVX: # BB#0:
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxps %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %t0 = fadd <4 x float> %x0, %x1
+ %cmp1 = fcmp ogt <4 x float> %x2, %t0
+ %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
+ %cmp2 = fcmp ogt <4 x float> %x3, %sel1
+ %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
+ ret <4 x float> %sel2
+}
+
+; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated.
+
+define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
+; SSE-LABEL: reassociate_mins_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: addpd %xmm1, %xmm0
+; SSE-NEXT: minpd %xmm3, %xmm2
+; SSE-NEXT: minpd %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_mins_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminpd %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %t0 = fadd <2 x double> %x0, %x1
+ %cmp1 = fcmp olt <2 x double> %x2, %t0
+ %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
+ %cmp2 = fcmp olt <2 x double> %x3, %sel1
+ %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
+ ret <2 x double> %sel2
+}
+
+; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated.
+
+define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
+; SSE-LABEL: reassociate_maxs_v2f64:
+; SSE: # BB#0:
+; SSE-NEXT: addpd %xmm1, %xmm0
+; SSE-NEXT: maxpd %xmm3, %xmm2
+; SSE-NEXT: maxpd %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_maxs_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxpd %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %t0 = fadd <2 x double> %x0, %x1
+ %cmp1 = fcmp ogt <2 x double> %x2, %t0
+ %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
+ %cmp2 = fcmp ogt <2 x double> %x3, %sel1
+ %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
+ ret <2 x double> %sel2
+}
+
+; Verify that AVX 256-bit vector single-precision minimum ops are reassociated.
+
+define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
+; AVX-LABEL: reassociate_mins_v8f32:
+; AVX: # BB#0:
+; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminps %ymm3, %ymm2, %ymm1
+; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %t0 = fadd <8 x float> %x0, %x1
+ %cmp1 = fcmp olt <8 x float> %x2, %t0
+ %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
+ %cmp2 = fcmp olt <8 x float> %x3, %sel1
+ %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
+ ret <8 x float> %sel2
+}
+
+; Verify that AVX 256-bit vector single-precision maximum ops are reassociated.
+
+define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
+; AVX-LABEL: reassociate_maxs_v8f32:
+; AVX: # BB#0:
+; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxps %ymm3, %ymm2, %ymm1
+; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %t0 = fadd <8 x float> %x0, %x1
+ %cmp1 = fcmp ogt <8 x float> %x2, %t0
+ %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
+ %cmp2 = fcmp ogt <8 x float> %x3, %sel1
+ %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
+ ret <8 x float> %sel2
+}
+
+; Verify that AVX 256-bit vector double-precision minimum ops are reassociated.
+
+define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
+; AVX-LABEL: reassociate_mins_v4f64:
+; AVX: # BB#0:
+; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vminpd %ymm3, %ymm2, %ymm1
+; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %t0 = fadd <4 x double> %x0, %x1
+ %cmp1 = fcmp olt <4 x double> %x2, %t0
+ %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
+ %cmp2 = fcmp olt <4 x double> %x3, %sel1
+ %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
+ ret <4 x double> %sel2
+}
+
+; Verify that AVX 256-bit vector double-precision maximum ops are reassociated.
+
+define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
+; AVX-LABEL: reassociate_maxs_v4f64:
+; AVX: # BB#0:
+; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vmaxpd %ymm3, %ymm2, %ymm1
+; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %t0 = fadd <4 x double> %x0, %x1
+ %cmp1 = fcmp ogt <4 x double> %x2, %t0
+ %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
+ %cmp2 = fcmp ogt <4 x double> %x3, %sel1
+ %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
+ ret <4 x double> %sel2
+}
+
+; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
+; Verify that reassociation is not happening needlessly or wrongly.
+
+declare double @bar()
+
+define double @reassociate_adds_from_calls() {
+; AVX-LABEL: reassociate_adds_from_calls:
+; AVX: callq bar
+; AVX-NEXT: vmovsd %xmm0, 16(%rsp)
+; AVX-NEXT: callq bar
+; AVX-NEXT: vmovsd %xmm0, 8(%rsp)
+; AVX-NEXT: callq bar
+; AVX-NEXT: vmovsd %xmm0, (%rsp)
+; AVX-NEXT: callq bar
+; AVX-NEXT: vmovsd 8(%rsp), %xmm1
+; AVX: vaddsd 16(%rsp), %xmm1, %xmm1
+; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0
+; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
+
+ %x0 = call double @bar()
+ %x1 = call double @bar()
+ %x2 = call double @bar()
+ %x3 = call double @bar()
+ %t0 = fadd double %x0, %x1
+ %t1 = fadd double %t0, %x2
+ %t2 = fadd double %t1, %x3
+ ret double %t2
+}
+
+define double @already_reassociated() {
+; AVX-LABEL: already_reassociated:
+; AVX: callq bar
+; AVX-NEXT: vmovsd %xmm0, 16(%rsp)
+; AVX-NEXT: callq bar
+; AVX-NEXT: vmovsd %xmm0, 8(%rsp)
+; AVX-NEXT: callq bar
+; AVX-NEXT: vmovsd %xmm0, (%rsp)
+; AVX-NEXT: callq bar
+; AVX-NEXT: vmovsd 8(%rsp), %xmm1
+; AVX: vaddsd 16(%rsp), %xmm1, %xmm1
+; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0
+; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
+
+ %x0 = call double @bar()
+ %x1 = call double @bar()
+ %x2 = call double @bar()
+ %x3 = call double @bar()
+ %t0 = fadd double %x0, %x1
+ %t1 = fadd double %x2, %x3
+ %t2 = fadd double %t0, %t1
+ ret double %t2
+}
+
diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll
index aaed0f0a23dc..143a1c3787a0 100644
--- a/test/CodeGen/X86/machine-cp.ll
+++ b/test/CodeGen/X86/machine-cp.ll
@@ -66,29 +66,23 @@ while.end: ; preds = %while.body, %entry
;
; CHECK-LABEL: foo:
; CHECK: psllw $7,
-; CHECK: psllw $7,
-; CHECK-NEXT: pand
-; CHECK-NEXT: pcmpgtb
-; CHECK-NEXT: pand %xmm{{[0-9]+}}, [[SRC:%xmm[0-9]+]]
-; Machine propagation used to delete the first copy as the
-; first few uses were <undef>.
-; CHECK-NEXT: movdqa [[SRC]], [[CPY1:%xmm[0-9]+]]
-; CHECK-NEXT: movdqa [[SRC]], [[CPY2:%xmm[0-9]+]]
-; CHECK-NEXT: punpckhbw [[SRC]],
-; Check that CPY1 is not redefined.
-; CHECK-NOT: , [[CPY1]]
-; undef use, we do not care.
-; CHECK: punpcklwd [[CPY1]],
-; Check that CPY1 is not redefined.
-; CHECK-NOT: , [[CPY1]]
-; CHECK: punpcklbw [[CPY2]], [[CPY2]]
-; CHECK-NEXT: punpckhwd [[CPY2]], [[CPY2]]
-; CHECK-NEXT pslld $31, [[CPY2]]
+; CHECK: psllw $7, [[SRC1:%xmm[0-9]+]]
+; CHECK-NEXT: pand {{.*}}(%rip), [[SRC1]]
+; CHECK-NEXT: pcmpgtb [[SRC1]], [[SRC2:%xmm[0-9]+]]
+; CHECK-NEXT: pand %xmm{{[0-9]+}}, [[SRC2]]
+; CHECK-NEXT: movdqa [[SRC2]], [[CPY1:%xmm[0-9]+]]
+; CHECK-NEXT: punpcklbw %xmm{{[0-9]+}}, [[CPY1]]
; Check that CPY1 is not redefined.
-; CHECK-NOT: , [[CPY1]]
-; CHECK: punpcklbw [[CPY1]], [[CPY1]]
-; CHECK-NEXT: punpcklwd [[CPY1]], [[CPY1]]
-; CHECK-NEXT pslld $31, [[CPY1]]
+; CHECK-NOT: , [[CPY1]]
+; CHECK: punpckhwd %xmm{{[0-9]+}}, [[CPY1]]
+; CHECK-NEXT: pslld $31, [[CPY1]]
+; CHECK-NEXT: psrad $31, [[CPY1]]
+; CHECK: punpckhbw %xmm{{[0-9]+}}, [[CPY2:%xmm[0-9]+]]
+; Check that CPY2 is not redefined.
+; CHECK-NOT: , [[CPY2]]
+; CHECK: punpckhwd %xmm{{[0-9]+}}, [[CPY2]]
+; CHECK-NEXT: pslld $31, [[CPY2]]
+; CHECK-NEXT: psrad $31, [[CPY2]]
define <16 x float> @foo(<16 x float> %x) {
bb:
%v3 = icmp slt <16 x i32> undef, zeroinitializer
diff --git a/test/CodeGen/X86/machine-trace-metrics-crash.ll b/test/CodeGen/X86/machine-trace-metrics-crash.ll
index 1d0ee79f04a9..048260c51fe3 100644
--- a/test/CodeGen/X86/machine-trace-metrics-crash.ll
+++ b/test/CodeGen/X86/machine-trace-metrics-crash.ll
@@ -54,9 +54,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: 1)
!1 = !DIFile(filename: "24199.cpp", directory: "/bin")
!2 = !{i32 2, !"Debug Info Version", i32 3}
-!3 = !DISubprogram(linkageName: "foo", file: !1, line: 18, isLocal: false, isDefinition: true, scopeLine: 18, function: void (%struct.A*)* @foo)
+!3 = distinct !DISubprogram(linkageName: "foo", file: !1, line: 18, isLocal: false, isDefinition: true, scopeLine: 18)
!4 = !DIExpression()
-!5 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, scope: !3, flags: DIFlagArtificial | DIFlagObjectPointer)
+!5 = !DILocalVariable(name: "this", arg: 1, scope: !3, flags: DIFlagArtificial | DIFlagObjectPointer)
!6 = !DILocation(line: 0, scope: !3)
diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll
index de16e5ddc06b..b7280d87d3b7 100644
--- a/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/test/CodeGen/X86/masked_gather_scatter.ll
@@ -1,19 +1,51 @@
-; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=knl < %s | FileCheck %s -check-prefix=KNL
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_64
+; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_32
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=SKX
+; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=SKX_32
+; RUN: opt -mtriple=x86_64-apple-darwin -codegenprepare -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR
+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-; KNL-LABEL: test1
-; KNL: kxnorw %k1, %k1, %k1
-; KNL: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+
+; SCALAR-LABEL: test1
+; SCALAR: extractelement <16 x float*>
+; SCALAR-NEXT: load float
+; SCALAR-NEXT: insertelement <16 x float>
+; SCALAR-NEXT: extractelement <16 x float*>
+; SCALAR-NEXT: load float
+
define <16 x float> @test1(float* %base, <16 x i32> %ind) {
+; KNL_64-LABEL: test1:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: kxnorw %k0, %k0, %k1
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test1:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: kxnorw %k0, %k0, %k1
+; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test1:
+; SKX: # BB#0:
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: retq
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
%sext_ind = sext <16 x i32> %ind to <16 x i64>
%gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-
+
%res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
ret <16 x float>%res
}
@@ -21,11 +53,41 @@ define <16 x float> @test1(float* %base, <16 x i32> %ind) {
declare <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> )
-
-; KNL-LABEL: test2
-; KNL: kmovw %esi, %k1
-; KNL: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+
+
+; SCALAR-LABEL: test2
+; SCALAR: extractelement <16 x float*>
+; SCALAR-NEXT: load float
+; SCALAR-NEXT: insertelement <16 x float>
+; SCALAR-NEXT: br label %else
+; SCALAR: else:
+; SCALAR-NEXT: %res.phi.else = phi
+; SCALAR-NEXT: %Mask1 = extractelement <16 x i1> %imask, i32 1
+; SCALAR-NEXT: %ToLoad1 = icmp eq i1 %Mask1, true
+; SCALAR-NEXT: br i1 %ToLoad1, label %cond.load1, label %else2
+
define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
+; KNL_64-LABEL: test2:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: kmovw %esi, %k1
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test2:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test2:
+; SKX: # BB#0:
+; SKX-NEXT: kmovw %esi, %k1
+; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: retq
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
@@ -37,10 +99,28 @@ define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
ret <16 x float> %res
}
-; KNL-LABEL: test3
-; KNL: kmovw %esi, %k1
-; KNL: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) {
+; KNL_64-LABEL: test3:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: kmovw %esi, %k1
+; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test3:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test3:
+; SKX: # BB#0:
+; SKX-NEXT: kmovw %esi, %k1
+; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: retq
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
@@ -52,13 +132,38 @@ define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) {
ret <16 x i32> %res
}
-; KNL-LABEL: test4
-; KNL: kmovw %esi, %k1
-; KNL: kmovw
-; KNL: vpgatherdd
-; KNL: vpgatherdd
define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) {
+; KNL_64-LABEL: test4:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: kmovw %esi, %k1
+; KNL_64-NEXT: kmovw %k1, %k2
+; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm2
+; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
+; KNL_64-NEXT: vpaddd %zmm2, %zmm1, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test4:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; KNL_32-NEXT: kmovw %k1, %k2
+; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm2
+; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1}
+; KNL_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test4:
+; SKX: # BB#0:
+; SKX-NEXT: kmovw %esi, %k1
+; SKX-NEXT: kmovw %k1, %k2
+; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2}
+; SKX-NEXT: vmovaps %zmm1, %zmm2
+; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
+; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm0
+; SKX-NEXT: retq
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
@@ -71,12 +176,46 @@ define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) {
ret <16 x i32> %res
}
-; KNL-LABEL: test5
-; KNL: kmovw %k1, %k2
-; KNL: vpscatterdd {{.*}}%k2
-; KNL: vpscatterdd {{.*}}%k1
+
+; SCALAR-LABEL: test5
+; SCALAR: %Mask0 = extractelement <16 x i1> %imask, i32 0
+; SCALAR-NEXT: %ToStore0 = icmp eq i1 %Mask0, true
+; SCALAR-NEXT: br i1 %ToStore0, label %cond.store, label %else
+; SCALAR: cond.store:
+; SCALAR-NEXT: %Elt0 = extractelement <16 x i32> %val, i32 0
+; SCALAR-NEXT: %Ptr0 = extractelement <16 x i32*> %gep.random, i32 0
+; SCALAR-NEXT: store i32 %Elt0, i32* %Ptr0, align 4
+; SCALAR-NEXT: br label %else
+; SCALAR: else:
+; SCALAR-NEXT: %Mask1 = extractelement <16 x i1> %imask, i32 1
+; SCALAR-NEXT: %ToStore1 = icmp eq i1 %Mask1, true
+; SCALAR-NEXT: br i1 %ToStore1, label %cond.store1, label %else2
define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
+; KNL_64-LABEL: test5:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: kmovw %esi, %k1
+; KNL_64-NEXT: kmovw %k1, %k2
+; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2}
+; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test5:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; KNL_32-NEXT: kmovw %k1, %k2
+; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k2}
+; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test5:
+; SKX: # BB#0:
+; SKX-NEXT: kmovw %esi, %k1
+; SKX-NEXT: kmovw %k1, %k2
+; SKX-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2}
+; SKX-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
+; SKX-NEXT: retq
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
@@ -91,12 +230,44 @@ define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
declare void @llvm.masked.scatter.v8i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
declare void @llvm.masked.scatter.v16i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> )
-; KNL-LABEL: test6
-; KNL: kxnorw %k1, %k1, %k1
-; KNL: kxnorw %k2, %k2, %k2
-; KNL: vpgatherqd (,%zmm{{.*}}), %ymm{{.*}} {%k2}
-; KNL: vpscatterqd %ymm{{.*}}, (,%zmm{{.*}}) {%k1}
+
+; SCALAR-LABEL: test6
+; SCALAR: store i32 %Elt0, i32* %Ptr01, align 4
+; SCALAR-NEXT: %Elt1 = extractelement <8 x i32> %a1, i32 1
+; SCALAR-NEXT: %Ptr12 = extractelement <8 x i32*> %ptr, i32 1
+; SCALAR-NEXT: store i32 %Elt1, i32* %Ptr12, align 4
+; SCALAR-NEXT: %Elt2 = extractelement <8 x i32> %a1, i32 2
+; SCALAR-NEXT: %Ptr23 = extractelement <8 x i32*> %ptr, i32 2
+; SCALAR-NEXT: store i32 %Elt2, i32* %Ptr23, align 4
+
define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) {
+; KNL_64-LABEL: test6:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: kxnorw %k0, %k0, %k1
+; KNL_64-NEXT: kxnorw %k0, %k0, %k2
+; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
+; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test6:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: kxnorw %k0, %k0, %k1
+; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm2
+; KNL_32-NEXT: kxnorw %k0, %k0, %k2
+; KNL_32-NEXT: vpgatherqd (,%zmm2), %ymm1 {%k2}
+; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm2) {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test6:
+; SKX: # BB#0:
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: kxnorw %k0, %k0, %k2
+; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
+; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
+; SKX-NEXT: vmovaps %zmm2, %zmm0
+; SKX-NEXT: retq
%a = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
@@ -104,13 +275,41 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) {
ret <8 x i32>%a
}
-; In this case the index should be promoted to <8 x i64> for KNL
-; KNL-LABEL: test7
-; KNL: vpmovsxdq %ymm0, %zmm0
-; KNL: kmovw %k1, %k2
-; KNL: vpgatherqd {{.*}} {%k2}
-; KNL: vpgatherqd {{.*}} {%k1}
define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) {
+;
+; KNL_64-LABEL: test7:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: movzbl %sil, %eax
+; KNL_64-NEXT: kmovw %eax, %k1
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
+; KNL_64-NEXT: kmovw %k1, %k2
+; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k2}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm2
+; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1}
+; KNL_64-NEXT: vpaddd %ymm2, %ymm1, %ymm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test7:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
+; KNL_32-NEXT: kmovw %k1, %k2
+; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k2}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm2
+; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1}
+; KNL_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test7:
+; SKX: # BB#0:
+; SKX-NEXT: kmovb %esi, %k1
+; SKX-NEXT: kmovw %k1, %k2
+; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm1 {%k2}
+; SKX-NEXT: vmovaps %zmm1, %zmm2
+; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm2 {%k1}
+; SKX-NEXT: vpaddd %ymm2, %ymm1, %ymm0
+; SKX-NEXT: retq
%broadcast.splatinsert = insertelement <8 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <8 x i32*> %broadcast.splatinsert, <8 x i32*> undef, <8 x i32> zeroinitializer
@@ -125,18 +324,1751 @@ define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) {
; No uniform base in this case, index <8 x i64> contains addresses,
; each gather call will be split into two
-; KNL-LABEL: test8
-; KNL: kshiftrw $8, %k1, %k2
-; KNL: vpgatherqd
-; KNL: vpgatherqd
-; KNL: vinserti64x4
-; KNL: vpgatherqd
-; KNL: vpgatherqd
-; KNL: vinserti64x4
define <16 x i32> @test8(<16 x i32*> %ptr.random, <16 x i32> %ind, i16 %mask) {
+; KNL_64-LABEL: test8:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: kmovw %edi, %k1
+; KNL_64-NEXT: kshiftrw $8, %k1, %k2
+; KNL_64-NEXT: kmovw %k2, %k3
+; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k3}
+; KNL_64-NEXT: kmovw %k1, %k3
+; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k3}
+; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm4
+; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
+; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1}
+; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
+; KNL_64-NEXT: vpaddd %zmm0, %zmm4, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test8:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; KNL_32-NEXT: kmovw %k1, %k2
+; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k2}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm2
+; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
+; KNL_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test8:
+; SKX: # BB#0:
+; SKX-NEXT: kmovw %edi, %k1
+; SKX-NEXT: kshiftrw $8, %k1, %k2
+; SKX-NEXT: kmovw %k2, %k3
+; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k3}
+; SKX-NEXT: kmovw %k1, %k3
+; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k3}
+; SKX-NEXT: vinserti32x8 $1, %ymm2, %zmm3, %zmm4
+; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
+; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1}
+; SKX-NEXT: vinserti32x8 $1, %ymm2, %zmm3, %zmm0
+; SKX-NEXT: vpaddd %zmm0, %zmm4, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test8:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; SKX_32-NEXT: kmovw %k1, %k2
+; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k2}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm2
+; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
+; SKX_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0
+; SKX_32-NEXT: retl
+
%imask = bitcast i16 %mask to <16 x i1>
%gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
%gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
%res = add <16 x i32> %gt1, %gt2
ret <16 x i32> %res
}
+
+%struct.RT = type { i8, [10 x [20 x i32]], i8 }
+%struct.ST = type { i32, double, %struct.RT }
+
+; Masked gather for agregate types
+; Test9 and Test10 should give the same result (scalar and vector indices in GEP)
+
+
+define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
+; KNL_64-LABEL: test9:
+; KNL_64: # BB#0: # %entry
+; KNL_64-NEXT: vpbroadcastq %rdi, %zmm2
+; KNL_64-NEXT: vpmovsxdq %ymm1, %zmm1
+; KNL_64-NEXT: vpbroadcastq {{.*}}(%rip), %zmm3
+; KNL_64-NEXT: vpmuludq %zmm3, %zmm1, %zmm4
+; KNL_64-NEXT: vpsrlq $32, %zmm1, %zmm1
+; KNL_64-NEXT: vpmuludq %zmm3, %zmm1, %zmm1
+; KNL_64-NEXT: vpsllq $32, %zmm1, %zmm1
+; KNL_64-NEXT: vpaddq %zmm1, %zmm4, %zmm1
+; KNL_64-NEXT: vpbroadcastq {{.*}}(%rip), %zmm3
+; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm4
+; KNL_64-NEXT: vpsrlq $32, %zmm0, %zmm0
+; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm0
+; KNL_64-NEXT: vpsllq $32, %zmm0, %zmm0
+; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm0
+; KNL_64-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; KNL_64-NEXT: kxnorw %k0, %k0, %k1
+; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test9:
+; KNL_32: # BB#0: # %entry
+; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm2
+; KNL_32-NEXT: vpbroadcastd .LCPI8_0, %ymm3
+; KNL_32-NEXT: vpmulld %ymm3, %ymm1, %ymm1
+; KNL_32-NEXT: vpmovqd %zmm0, %ymm0
+; KNL_32-NEXT: vpbroadcastd .LCPI8_1, %ymm3
+; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0
+; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; KNL_32-NEXT: vpbroadcastd .LCPI8_2, %ymm1
+; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_32-NEXT: kxnorw %k0, %k0, %k1
+; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test9:
+; SKX: # BB#0: # %entry
+; SKX-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; SKX-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX-NEXT: retq
+entry:
+ %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
+ %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
+
+ %arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %ind1, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <8 x i32><i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <8 x i32> %ind5, <8 x i64> <i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13>
+ %res = call <8 x i32 > @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
+; KNL_64-LABEL: test10:
+; KNL_64: # BB#0: # %entry
+; KNL_64-NEXT: vpbroadcastq %rdi, %zmm2
+; KNL_64-NEXT: vpmovsxdq %ymm1, %zmm1
+; KNL_64-NEXT: vpbroadcastq {{.*}}(%rip), %zmm3
+; KNL_64-NEXT: vpmuludq %zmm3, %zmm1, %zmm4
+; KNL_64-NEXT: vpsrlq $32, %zmm1, %zmm1
+; KNL_64-NEXT: vpmuludq %zmm3, %zmm1, %zmm1
+; KNL_64-NEXT: vpsllq $32, %zmm1, %zmm1
+; KNL_64-NEXT: vpaddq %zmm1, %zmm4, %zmm1
+; KNL_64-NEXT: vpbroadcastq {{.*}}(%rip), %zmm3
+; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm4
+; KNL_64-NEXT: vpsrlq $32, %zmm0, %zmm0
+; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm0
+; KNL_64-NEXT: vpsllq $32, %zmm0, %zmm0
+; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm0
+; KNL_64-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; KNL_64-NEXT: kxnorw %k0, %k0, %k1
+; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test10:
+; KNL_32: # BB#0: # %entry
+; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm2
+; KNL_32-NEXT: vpbroadcastd .LCPI9_0, %ymm3
+; KNL_32-NEXT: vpmulld %ymm3, %ymm1, %ymm1
+; KNL_32-NEXT: vpmovqd %zmm0, %ymm0
+; KNL_32-NEXT: vpbroadcastd .LCPI9_1, %ymm3
+; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0
+; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; KNL_32-NEXT: vpbroadcastd .LCPI9_2, %ymm1
+; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_32-NEXT: kxnorw %k0, %k0, %k1
+; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test10:
+; SKX: # BB#0: # %entry
+; SKX-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; SKX-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX-NEXT: retq
+entry:
+ %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
+ %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
+
+ %arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %i1, i32 2, i32 1, <8 x i32> %ind5, i64 13
+ %res = call <8 x i32 > @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+ ret <8 x i32> %res
+}
+
+; Splat index in GEP, requires broadcast
+define <16 x float> @test11(float* %base, i32 %ind) {
+; KNL_64-LABEL: test11:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpbroadcastd %esi, %zmm1
+; KNL_64-NEXT: kxnorw %k0, %k0, %k1
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test11:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm1
+; KNL_32-NEXT: kxnorw %k0, %k0, %k1
+; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test11:
+; SKX: # BB#0:
+; SKX-NEXT: vpbroadcastd %esi, %zmm1
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
+; SKX-NEXT: retq
+
+ %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
+ %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+
+ %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
+
+ %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+ ret <16 x float>%res
+}
+
+; We are checking the uniform base here. It is taken directly from input to vgatherdps
+define <16 x float> @test12(float* %base, <16 x i32> %ind) {
+; KNL_64-LABEL: test12:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: kxnorw %k0, %k0, %k1
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test12:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: kxnorw %k0, %k0, %k1
+; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test12:
+; SKX: # BB#0:
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: retq
+
+ %sext_ind = sext <16 x i32> %ind to <16 x i64>
+ %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
+
+ %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+ ret <16 x float>%res
+}
+
+; The same as the previous, but the mask is undefined
+define <16 x float> @test13(float* %base, <16 x i32> %ind) {
+; KNL_64-LABEL: test13:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test13:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test13:
+; SKX: # BB#0:
+; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: retq
+
+ %sext_ind = sext <16 x i32> %ind to <16 x i64>
+ %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
+
+ %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> undef, <16 x float> undef)
+ ret <16 x float>%res
+}
+
+; The base pointer is not splat, can't find unform base
+define <16 x float> @test14(float* %base, i32 %ind, <16 x float*> %vec) {
+; KNL_64-LABEL: test14:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
+; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL_64-NEXT: vpbroadcastq %xmm0, %zmm0
+; KNL_64-NEXT: vmovd %esi, %xmm1
+; KNL_64-NEXT: vpbroadcastd %xmm1, %ymm1
+; KNL_64-NEXT: vpmovsxdq %ymm1, %zmm1
+; KNL_64-NEXT: vpsllq $2, %zmm1, %zmm1
+; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; KNL_64-NEXT: kshiftrw $8, %k0, %k1
+; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm1 {%k1}
+; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm2 {%k1}
+; KNL_64-NEXT: vinsertf64x4 $1, %ymm1, %zmm2, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test14:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm1
+; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL_32-NEXT: vpbroadcastd %xmm0, %zmm0
+; KNL_32-NEXT: vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1
+; KNL_32-NEXT: vpaddd %zmm1, %zmm0, %zmm1
+; KNL_32-NEXT: vgatherdps (,%zmm1), %zmm0 {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test14:
+; SKX: # BB#0:
+; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
+; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: vpbroadcastq %xmm0, %zmm0
+; SKX-NEXT: vmovd %esi, %xmm1
+; SKX-NEXT: vpbroadcastd %xmm1, %ymm1
+; SKX-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX-NEXT: vpsllq $2, %zmm1, %zmm1
+; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX-NEXT: kshiftrw $8, %k0, %k1
+; SKX-NEXT: vgatherqps (,%zmm0), %ymm1 {%k1}
+; SKX-NEXT: vgatherqps (,%zmm0), %ymm2 {%k1}
+; SKX-NEXT: vinsertf32x8 $1, %ymm1, %zmm2, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test14:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm1
+; SKX_32-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX_32-NEXT: vpbroadcastd %xmm0, %zmm0
+; SKX_32-NEXT: vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1
+; SKX_32-NEXT: vpaddd %zmm1, %zmm0, %zmm1
+; SKX_32-NEXT: vgatherdps (,%zmm1), %zmm0 {%k1}
+; SKX_32-NEXT: retl
+
+ %broadcast.splatinsert = insertelement <16 x float*> %vec, float* %base, i32 1
+ %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+
+ %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
+
+ %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> undef, <16 x float> undef)
+ ret <16 x float>%res
+}
+
+declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
+declare <4 x double> @llvm.masked.gather.v4f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
+declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
+
+; Gather smaller than existing instruction
+define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
+;
+; KNL_64-LABEL: test15:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm2
+; KNL_64-NEXT: vpmovsxdq %ymm1, %zmm0
+; KNL_64-NEXT: vpsllq $63, %zmm0, %zmm0
+; KNL_64-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test15:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm2
+; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm0
+; KNL_32-NEXT: vpsllvq .LCPI14_0, %zmm0, %zmm0
+; KNL_32-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm0 {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test15:
+; SKX: # BB#0:
+; SKX-NEXT: vpslld $31, %xmm1, %xmm1
+; SKX-NEXT: vpmovd2m %xmm1, %k1
+; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test15:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
+; SKX_32-NEXT: vpmovd2m %xmm1, %k1
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
+
+ %sext_ind = sext <4 x i32> %ind to <4 x i64>
+ %gep.random = getelementptr float, float* %base, <4 x i64> %sext_ind
+ %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef)
+ ret <4 x float>%res
+}
+
+; Gather smaller than existing instruction
+define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x double> %src0) {
+;
+; KNL_64-LABEL: test16:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
+; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
+; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
+; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
+; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
+; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test16:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
+; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
+; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
+; KNL_32-NEXT: vpsllvq .LCPI15_0, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
+; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test16:
+; SKX: # BB#0:
+; SKX-NEXT: vpslld $31, %xmm1, %xmm1
+; SKX-NEXT: vpmovd2m %xmm1, %k1
+; SKX-NEXT: vgatherdpd (%rdi,%xmm0,8), %ymm2 {%k1}
+; SKX-NEXT: vmovaps %zmm2, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test16:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
+; SKX_32-NEXT: vpmovd2m %xmm1, %k1
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vgatherdpd (%eax,%xmm0,8), %ymm2 {%k1}
+; SKX_32-NEXT: vmovaps %zmm2, %zmm0
+; SKX_32-NEXT: retl
+
+ %sext_ind = sext <4 x i32> %ind to <4 x i64>
+ %gep.random = getelementptr double, double* %base, <4 x i64> %sext_ind
+ %res = call <4 x double> @llvm.masked.gather.v4f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0)
+ ret <4 x double>%res
+}
+
+define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x double> %src0) {
+;
+; KNL_64-LABEL: test17:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
+; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
+; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test17:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpsllvq .LCPI16_0, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
+; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test17:
+; SKX: # BB#0:
+; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
+; SKX-NEXT: vpmovq2m %xmm1, %k1
+; SKX-NEXT: vgatherqpd (%rdi,%xmm0,8), %xmm2 {%k1}
+; SKX-NEXT: vmovaps %zmm2, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test17:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
+; SKX_32-NEXT: vpmovq2m %xmm1, %k1
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vgatherqpd (%eax,%xmm0,8), %xmm2 {%k1}
+; SKX_32-NEXT: vmovaps %zmm2, %zmm0
+; SKX_32-NEXT: retl
+
+ %sext_ind = sext <2 x i32> %ind to <2 x i64>
+ %gep.random = getelementptr double, double* %base, <2 x i64> %sext_ind
+ %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0)
+ ret <2 x double>%res
+}
+
+declare void @llvm.masked.scatter.v4i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> )
+declare void @llvm.masked.scatter.v4f64(<4 x double> , <4 x double*> , i32 , <4 x i1> )
+declare void @llvm.masked.scatter.v2i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> )
+declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
+declare void @llvm.masked.scatter.v2f32(<2 x float> , <2 x float*> , i32 , <2 x i1> )
+
+define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
+;
+; KNL_64-LABEL: test18:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
+; KNL_64-NEXT: vpmovsxdq %ymm2, %zmm2
+; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2
+; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1
+; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test18:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
+; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
+; KNL_32-NEXT: vpmovsxdq %ymm2, %zmm2
+; KNL_32-NEXT: vpsllvq .LCPI17_0, %zmm2, %zmm2
+; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
+; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test18:
+; SKX: # BB#0:
+; SKX-NEXT: vpslld $31, %xmm2, %xmm2
+; SKX-NEXT: vpmovd2m %xmm2, %k1
+; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test18:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2
+; SKX_32-NEXT: vpmovd2m %xmm2, %k1
+; SKX_32-NEXT: vpscatterdd %xmm0, (,%xmm1) {%k1}
+; SKX_32-NEXT: retl
+ call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+ ret void
+}
+
+define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind) {
+;
+; KNL_64-LABEL: test19:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
+; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
+; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1
+; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
+; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_64-NEXT: vscatterqpd %zmm0, (%rdi,%zmm2,8) {%k1}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test19:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
+; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
+; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpsllvq .LCPI18_0, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm2,8) {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test19:
+; SKX: # BB#0:
+; SKX-NEXT: vpslld $31, %xmm1, %xmm1
+; SKX-NEXT: vpmovd2m %xmm1, %k1
+; SKX-NEXT: vscatterqpd %ymm0, (%rdi,%ymm2,8) {%k1}
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test19:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1
+; SKX_32-NEXT: vpmovd2m %xmm1, %k1
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vscatterqpd %ymm0, (%eax,%ymm2,8) {%k1}
+; SKX_32-NEXT: retl
+ %gep = getelementptr double, double* %ptr, <4 x i64> %ind
+ call void @llvm.masked.scatter.v4f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask)
+ ret void
+}
+
+; Data type requires widening
+define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
+;
+; KNL_64-LABEL: test20:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; KNL_64-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero
+; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
+; KNL_64-NEXT: vpmovsxdq %ymm2, %zmm2
+; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2
+; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1
+; KNL_64-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test20:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; KNL_32-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero
+; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
+; KNL_32-NEXT: vpmovsxdq %ymm2, %zmm2
+; KNL_32-NEXT: vpsllvq .LCPI19_0, %zmm2, %zmm2
+; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
+; KNL_32-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test20:
+; SKX: # BB#0:
+; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
+; SKX-NEXT: vpmovq2m %xmm2, %k0
+; SKX-NEXT: kshiftlw $2, %k0, %k0
+; SKX-NEXT: kshiftrw $2, %k0, %k1
+; SKX-NEXT: vscatterqps %xmm0, (,%ymm1) {%k1}
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test20:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
+; SKX_32-NEXT: vpmovq2m %xmm2, %k0
+; SKX_32-NEXT: kshiftlw $2, %k0, %k0
+; SKX_32-NEXT: kshiftrw $2, %k0, %k1
+; SKX_32-NEXT: vscatterdps %xmm0, (,%xmm1) {%k1}
+; SKX_32-NEXT: retl
+ call void @llvm.masked.scatter.v2f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask)
+ ret void
+}
+
+; Data type requires promotion
+define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
+;
+; KNL_64-LABEL: test21:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2
+; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1
+; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test21:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_32-NEXT: vpsllvq .LCPI20_0, %zmm2, %zmm2
+; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
+; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test21:
+; SKX: # BB#0:
+; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
+; SKX-NEXT: vpmovq2m %xmm2, %k0
+; SKX-NEXT: kshiftlw $2, %k0, %k0
+; SKX-NEXT: kshiftrw $2, %k0, %k1
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test21:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
+; SKX_32-NEXT: vpmovq2m %xmm2, %k0
+; SKX_32-NEXT: kshiftlw $2, %k0, %k0
+; SKX_32-NEXT: kshiftrw $2, %k0, %k1
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
+; SKX_32-NEXT: retl
+ call void @llvm.masked.scatter.v2i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
+ ret void
+}
+
+; The result type requires widening
+declare <2 x float> @llvm.masked.gather.v2f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
+
+define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x float> %src0) {
+;
+;
+; KNL_64-LABEL: test22:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; KNL_64-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero
+; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0
+; KNL_64-NEXT: vpmovsxdq %ymm1, %zmm1
+; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
+; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
+; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test22:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; KNL_32-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero
+; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
+; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
+; KNL_32-NEXT: vpsllvq .LCPI21_0, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
+; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test22:
+; SKX: # BB#0:
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
+; SKX-NEXT: vpmovq2m %xmm1, %k0
+; SKX-NEXT: kshiftlw $2, %k0, %k0
+; SKX-NEXT: kshiftrw $2, %k0, %k1
+; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1}
+; SKX-NEXT: vmovaps %zmm2, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test22:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
+; SKX_32-NEXT: vpmovq2m %xmm1, %k0
+; SKX_32-NEXT: kshiftlw $2, %k0, %k0
+; SKX_32-NEXT: kshiftrw $2, %k0, %k1
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm2 {%k1}
+; SKX_32-NEXT: vmovaps %zmm2, %zmm0
+; SKX_32-NEXT: retl
+ %sext_ind = sext <2 x i32> %ind to <2 x i64>
+ %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
+ %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
+ ret <2 x float>%res
+}
+
+declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
+declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
+
+define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) {
+;
+; KNL_64-LABEL: test23:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
+; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
+; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test23:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpsllvq .LCPI22_0, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
+; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test23:
+; SKX: # BB#0:
+; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
+; SKX-NEXT: vpmovq2m %xmm1, %k1
+; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1}
+; SKX-NEXT: vmovaps %zmm2, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test23:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
+; SKX_32-NEXT: vpmovq2m %xmm1, %k1
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1}
+; SKX_32-NEXT: vmovaps %zmm2, %zmm0
+; SKX_32-NEXT: retl
+ %sext_ind = sext <2 x i32> %ind to <2 x i64>
+ %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
+ %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
+ ret <2 x i32>%res
+}
+
+define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
+; KNL_64-LABEL: test24:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: movb $3, %al
+; KNL_64-NEXT: movzbl %al, %eax
+; KNL_64-NEXT: kmovw %eax, %k1
+; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test24:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; KNL_32-NEXT: vinserti32x4 $0, .LCPI23_0, %zmm1, %zmm1
+; KNL_32-NEXT: vpsllvq .LCPI23_1, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test24:
+; SKX: # BB#0:
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test24:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
+ %sext_ind = sext <2 x i32> %ind to <2 x i64>
+ %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
+ %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
+ ret <2 x i32>%res
+}
+
+define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %src0) {
+;
+; KNL_64-LABEL: test25:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
+; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
+; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test25:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpsllvq .LCPI24_0, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
+; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test25:
+; SKX: # BB#0:
+; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
+; SKX-NEXT: vpmovq2m %xmm1, %k1
+; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1}
+; SKX-NEXT: vmovaps %zmm2, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test25:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1
+; SKX_32-NEXT: vpmovq2m %xmm1, %k1
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1}
+; SKX_32-NEXT: vmovaps %zmm2, %zmm0
+; SKX_32-NEXT: retl
+ %sext_ind = sext <2 x i32> %ind to <2 x i64>
+ %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
+ %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0)
+ ret <2 x i64>%res
+}
+
+define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
+;
+; KNL_64-LABEL: test26:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: movb $3, %al
+; KNL_64-NEXT: movzbl %al, %eax
+; KNL_64-NEXT: kmovw %eax, %k1
+; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test26:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; KNL_32-NEXT: vinserti32x4 $0, .LCPI25_0, %zmm2, %zmm2
+; KNL_32-NEXT: vpsllvq .LCPI25_1, %zmm2, %zmm2
+; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
+; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test26:
+; SKX: # BB#0:
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test26:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
+ %sext_ind = sext <2 x i32> %ind to <2 x i64>
+ %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
+ %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %gep.random, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %src0)
+ ret <2 x i64>%res
+}
+
+; Result type requires widening; all-ones mask
+define <2 x float> @test27(float* %base, <2 x i32> %ind) {
+;
+; KNL_64-LABEL: test27:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_64-NEXT: movb $3, %al
+; KNL_64-NEXT: movzbl %al, %eax
+; KNL_64-NEXT: kmovw %eax, %k1
+; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test27:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
+; KNL_32-NEXT: movb $3, %cl
+; KNL_32-NEXT: movzbl %cl, %ecx
+; KNL_32-NEXT: kmovw %ecx, %k1
+; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test27:
+; SKX: # BB#0:
+; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; SKX-NEXT: movb $3, %al
+; SKX-NEXT: kmovb %eax, %k1
+; SKX-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1}
+; SKX-NEXT: retq
+ %sext_ind = sext <2 x i32> %ind to <2 x i64>
+ %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
+ %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef)
+ ret <2 x float>%res
+}
+
+; Data type requires promotion, mask is all-ones
+define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
+;
+;
+; KNL_64-LABEL: test28:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_64-NEXT: movb $3, %al
+; KNL_64-NEXT: movzbl %al, %eax
+; KNL_64-NEXT: kmovw %eax, %k1
+; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test28:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; KNL_32-NEXT: vinserti32x4 $0, .LCPI27_0, %zmm2, %zmm2
+; KNL_32-NEXT: vpsllvq .LCPI27_1, %zmm2, %zmm2
+; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
+; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test28:
+; SKX: # BB#0:
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT: movb $3, %al
+; SKX-NEXT: kmovb %eax, %k1
+; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test28:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX_32-NEXT: movb $3, %al
+; SKX_32-NEXT: kmovb %eax, %k1
+; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
+; SKX_32-NEXT: retl
+ call void @llvm.masked.scatter.v2i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> <i1 true, i1 true>)
+ ret void
+}
+
+
+; SCALAR-LABEL: test29
+; SCALAR: extractelement <16 x float*>
+; SCALAR-NEXT: load float
+; SCALAR-NEXT: insertelement <16 x float>
+; SCALAR-NEXT: extractelement <16 x float*>
+; SCALAR-NEXT: load float
+
+define <16 x float> @test29(float* %base, <16 x i32> %ind) {
+; KNL_64-LABEL: test29:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: movw $44, %ax
+; KNL_64-NEXT: kmovw %eax, %k1
+; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT: vmovaps %zmm1, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test29:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: movw $44, %cx
+; KNL_32-NEXT: kmovw %ecx, %k1
+; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test29:
+; SKX: # BB#0:
+; SKX-NEXT: movw $44, %ax
+; SKX-NEXT: kmovw %eax, %k1
+; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: retq
+
+ %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
+ %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+
+ %sext_ind = sext <16 x i32> %ind to <16 x i64>
+ %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
+
+ %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x float> undef)
+ ret <16 x float>%res
+}
+
+; Check non-power-of-2 case. It should be scalarized.
+declare <3 x i32> @llvm.masked.gather.v3i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>)
+define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) {
+; KNL_64-LABEL: test30:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: andl $1, %edx
+; KNL_64-NEXT: kmovw %edx, %k1
+; KNL_64-NEXT: andl $1, %esi
+; KNL_64-NEXT: kmovw %esi, %k2
+; KNL_64-NEXT: movl %edi, %eax
+; KNL_64-NEXT: andl $1, %eax
+; KNL_64-NEXT: kmovw %eax, %k0
+; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
+; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1
+; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1
+; KNL_64-NEXT: # implicit-def: %XMM0
+; KNL_64-NEXT: testb $1, %dil
+; KNL_64-NEXT: je .LBB29_2
+; KNL_64-NEXT: # BB#1: # %cond.load
+; KNL_64-NEXT: vmovq %xmm1, %rax
+; KNL_64-NEXT: vmovd (%rax), %xmm0
+; KNL_64-NEXT: .LBB29_2: # %else
+; KNL_64-NEXT: kmovw %k2, %eax
+; KNL_64-NEXT: movl %eax, %ecx
+; KNL_64-NEXT: andl $1, %ecx
+; KNL_64-NEXT: testb %cl, %cl
+; KNL_64-NEXT: je .LBB29_4
+; KNL_64-NEXT: # BB#3: # %cond.load1
+; KNL_64-NEXT: vpextrq $1, %xmm1, %rcx
+; KNL_64-NEXT: vpinsrd $1, (%rcx), %xmm0, %xmm0
+; KNL_64-NEXT: .LBB29_4: # %else2
+; KNL_64-NEXT: kmovw %k1, %ecx
+; KNL_64-NEXT: movl %ecx, %edx
+; KNL_64-NEXT: andl $1, %edx
+; KNL_64-NEXT: testb %dl, %dl
+; KNL_64-NEXT: je .LBB29_6
+; KNL_64-NEXT: # BB#5: # %cond.load4
+; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1
+; KNL_64-NEXT: vmovq %xmm1, %rdx
+; KNL_64-NEXT: vpinsrd $2, (%rdx), %xmm0, %xmm0
+; KNL_64-NEXT: .LBB29_6: # %else5
+; KNL_64-NEXT: kmovw %k0, %edx
+; KNL_64-NEXT: vmovd %edx, %xmm1
+; KNL_64-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; KNL_64-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
+; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
+; KNL_64-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test30:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: andl $1, %eax
+; KNL_32-NEXT: kmovw %eax, %k1
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: andl $1, %eax
+; KNL_32-NEXT: kmovw %eax, %k2
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: movl %eax, %ecx
+; KNL_32-NEXT: andl $1, %ecx
+; KNL_32-NEXT: kmovw %ecx, %k0
+; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1
+; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
+; KNL_32-NEXT: # implicit-def: %XMM0
+; KNL_32-NEXT: testb $1, %al
+; KNL_32-NEXT: je .LBB29_2
+; KNL_32-NEXT: # BB#1: # %cond.load
+; KNL_32-NEXT: vmovd %xmm1, %eax
+; KNL_32-NEXT: vmovd (%eax), %xmm0
+; KNL_32-NEXT: .LBB29_2: # %else
+; KNL_32-NEXT: kmovw %k2, %eax
+; KNL_32-NEXT: movl %eax, %ecx
+; KNL_32-NEXT: andl $1, %ecx
+; KNL_32-NEXT: testb %cl, %cl
+; KNL_32-NEXT: je .LBB29_4
+; KNL_32-NEXT: # BB#3: # %cond.load1
+; KNL_32-NEXT: vpextrd $1, %xmm1, %ecx
+; KNL_32-NEXT: vpinsrd $1, (%ecx), %xmm0, %xmm0
+; KNL_32-NEXT: .LBB29_4: # %else2
+; KNL_32-NEXT: kmovw %k1, %ecx
+; KNL_32-NEXT: movl %ecx, %edx
+; KNL_32-NEXT: andl $1, %edx
+; KNL_32-NEXT: testb %dl, %dl
+; KNL_32-NEXT: je .LBB29_6
+; KNL_32-NEXT: # BB#5: # %cond.load4
+; KNL_32-NEXT: vpextrd $2, %xmm1, %edx
+; KNL_32-NEXT: vpinsrd $2, (%edx), %xmm0, %xmm0
+; KNL_32-NEXT: .LBB29_6: # %else5
+; KNL_32-NEXT: kmovw %k0, %edx
+; KNL_32-NEXT: vmovd %edx, %xmm1
+; KNL_32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; KNL_32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
+; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
+; KNL_32-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test30:
+; SKX: # BB#0:
+; SKX-NEXT: vpslld $31, %xmm2, %xmm2
+; SKX-NEXT: vpmovd2m %xmm2, %k1
+; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp)
+; SKX-NEXT: vpmovsxdq %xmm1, %ymm1
+; SKX-NEXT: vpsllq $2, %ymm1, %ymm1
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
+; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SKX-NEXT: # implicit-def: %XMM0
+; SKX-NEXT: andb $1, %al
+; SKX-NEXT: je .LBB29_2
+; SKX-NEXT: # BB#1: # %cond.load
+; SKX-NEXT: vmovq %xmm1, %rax
+; SKX-NEXT: vmovd (%rax), %xmm0
+; SKX-NEXT: .LBB29_2: # %else
+; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp)
+; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SKX-NEXT: andb $1, %al
+; SKX-NEXT: je .LBB29_4
+; SKX-NEXT: # BB#3: # %cond.load1
+; SKX-NEXT: vpextrq $1, %xmm1, %rax
+; SKX-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
+; SKX-NEXT: .LBB29_4: # %else2
+; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp)
+; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; SKX-NEXT: andb $1, %al
+; SKX-NEXT: je .LBB29_6
+; SKX-NEXT: # BB#5: # %cond.load4
+; SKX-NEXT: vextracti128 $1, %ymm1, %xmm1
+; SKX-NEXT: vmovq %xmm1, %rax
+; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
+; SKX-NEXT: .LBB29_6: # %else5
+; SKX-NEXT: vmovdqa32 %xmm0, %xmm3 {%k1}
+; SKX-NEXT: vmovaps %zmm3, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test30:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: subl $12, %esp
+; SKX_32-NEXT: .Ltmp0:
+; SKX_32-NEXT: .cfi_def_cfa_offset 16
+; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2
+; SKX_32-NEXT: vpmovd2m %xmm2, %k1
+; SKX_32-NEXT: kmovb %k1, {{[0-9]+}}(%esp)
+; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1
+; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2
+; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; SKX_32-NEXT: # implicit-def: %XMM1
+; SKX_32-NEXT: andb $1, %al
+; SKX_32-NEXT: je .LBB29_2
+; SKX_32-NEXT: # BB#1: # %cond.load
+; SKX_32-NEXT: vmovd %xmm2, %eax
+; SKX_32-NEXT: vmovd (%eax), %xmm1
+; SKX_32-NEXT: .LBB29_2: # %else
+; SKX_32-NEXT: kmovb %k1, {{[0-9]+}}(%esp)
+; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al
+; SKX_32-NEXT: andb $1, %al
+; SKX_32-NEXT: je .LBB29_4
+; SKX_32-NEXT: # BB#3: # %cond.load1
+; SKX_32-NEXT: vpextrd $1, %xmm2, %eax
+; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
+; SKX_32-NEXT: .LBB29_4: # %else2
+; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm0
+; SKX_32-NEXT: kmovb %k1, (%esp)
+; SKX_32-NEXT: movb (%esp), %al
+; SKX_32-NEXT: andb $1, %al
+; SKX_32-NEXT: je .LBB29_6
+; SKX_32-NEXT: # BB#5: # %cond.load4
+; SKX_32-NEXT: vpextrd $2, %xmm2, %eax
+; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1
+; SKX_32-NEXT: .LBB29_6: # %else5
+; SKX_32-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
+; SKX_32-NEXT: addl $12, %esp
+; SKX_32-NEXT: retl
+
+ %sext_ind = sext <3 x i32> %ind to <3 x i64>
+ %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind
+ %res = call <3 x i32> @llvm.masked.gather.v3i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0)
+ ret <3 x i32>%res
+}
+
+declare <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>)
+
+; KNL-LABEL: test31
+; KNL: vpgatherqq
+; KNL: vpgatherqq
+define <16 x float*> @test31(<16 x float**> %ptrs) {
+; KNL_64-LABEL: test31:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: kxnorw %k0, %k0, %k1
+; KNL_64-NEXT: kxnorw %k0, %k0, %k2
+; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2}
+; KNL_64-NEXT: kshiftrw $8, %k1, %k1
+; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1}
+; KNL_64-NEXT: vmovaps %zmm2, %zmm0
+; KNL_64-NEXT: vmovaps %zmm3, %zmm1
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test31:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: kxnorw %k0, %k0, %k1
+; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1}
+; KNL_32-NEXT: vmovaps %zmm1, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test31:
+; SKX: # BB#0:
+; SKX-NEXT: kxnorw %k0, %k0, %k1
+; SKX-NEXT: kxnorw %k0, %k0, %k2
+; SKX-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2}
+; SKX-NEXT: kshiftrw $8, %k1, %k1
+; SKX-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1}
+; SKX-NEXT: vmovaps %zmm2, %zmm0
+; SKX-NEXT: vmovaps %zmm3, %zmm1
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test31:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
+
+ %res = call <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef)
+ ret <16 x float*>%res
+}
+
+define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) {
+; KNL_64-LABEL: test_gather_16i32:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT: vextracti64x4 $1, %zmm3, %ymm2
+; KNL_64-NEXT: kshiftrw $8, %k1, %k2
+; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
+; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1}
+; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test_gather_16i32:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
+; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test_gather_16i32:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpslld $31, %zmm2, %zmm2
+; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT: vextracti32x8 $1, %zmm3, %ymm2
+; SKX-NEXT: kshiftrw $8, %k1, %k2
+; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
+; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1}
+; SKX-NEXT: vinserti32x8 $1, %ymm2, %zmm3, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test_gather_16i32:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1}
+; SKX_32-NEXT: vmovaps %zmm2, %zmm0
+; SKX_32-NEXT: retl
+ %res = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0)
+ ret <16 x i32> %res
+}
+define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
+; KNL_64-LABEL: test_gather_16i64:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT: kshiftrw $8, %k1, %k2
+; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k1}
+; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm4 {%k2}
+; KNL_64-NEXT: vmovaps %zmm3, %zmm0
+; KNL_64-NEXT: vmovaps %zmm4, %zmm1
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test_gather_16i64:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: pushl %ebp
+; KNL_32-NEXT: .Ltmp0:
+; KNL_32-NEXT: .cfi_def_cfa_offset 8
+; KNL_32-NEXT: .Ltmp1:
+; KNL_32-NEXT: .cfi_offset %ebp, -8
+; KNL_32-NEXT: movl %esp, %ebp
+; KNL_32-NEXT: .Ltmp2:
+; KNL_32-NEXT: .cfi_def_cfa_register %ebp
+; KNL_32-NEXT: andl $-64, %esp
+; KNL_32-NEXT: subl $64, %esp
+; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1
+; KNL_32-NEXT: kshiftrw $8, %k1, %k2
+; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1}
+; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2}
+; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: movl %ebp, %esp
+; KNL_32-NEXT: popl %ebp
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test_gather_16i64:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpslld $31, %zmm2, %zmm2
+; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT: kshiftrw $8, %k1, %k2
+; SKX-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k1}
+; SKX-NEXT: vpgatherqq (,%zmm1), %zmm4 {%k2}
+; SKX-NEXT: vmovaps %zmm3, %zmm0
+; SKX-NEXT: vmovaps %zmm4, %zmm1
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test_gather_16i64:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: pushl %ebp
+; SKX_32-NEXT: .Ltmp1:
+; SKX_32-NEXT: .cfi_def_cfa_offset 8
+; SKX_32-NEXT: .Ltmp2:
+; SKX_32-NEXT: .cfi_offset %ebp, -8
+; SKX_32-NEXT: movl %esp, %ebp
+; SKX_32-NEXT: .Ltmp3:
+; SKX_32-NEXT: .cfi_def_cfa_register %ebp
+; SKX_32-NEXT: andl $-64, %esp
+; SKX_32-NEXT: subl $64, %esp
+; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1
+; SKX_32-NEXT: kshiftrw $8, %k1, %k2
+; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1}
+; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2}
+; SKX_32-NEXT: vmovaps %zmm2, %zmm0
+; SKX_32-NEXT: movl %ebp, %esp
+; SKX_32-NEXT: popl %ebp
+; SKX_32-NEXT: retl
+ %res = call <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
+ ret <16 x i64> %res
+}
+declare <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
+define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) {
+; KNL_64-LABEL: test_gather_16f32:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT: vextractf64x4 $1, %zmm3, %ymm2
+; KNL_64-NEXT: kshiftrw $8, %k1, %k2
+; KNL_64-NEXT: vgatherqps (,%zmm1), %ymm2 {%k2}
+; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm3 {%k1}
+; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm3, %zmm0
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test_gather_16f32:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1}
+; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test_gather_16f32:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpslld $31, %zmm2, %zmm2
+; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT: vextractf32x8 $1, %zmm3, %ymm2
+; SKX-NEXT: kshiftrw $8, %k1, %k2
+; SKX-NEXT: vgatherqps (,%zmm1), %ymm2 {%k2}
+; SKX-NEXT: vgatherqps (,%zmm0), %ymm3 {%k1}
+; SKX-NEXT: vinsertf32x8 $1, %ymm2, %zmm3, %zmm0
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test_gather_16f32:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1}
+; SKX_32-NEXT: vmovaps %zmm2, %zmm0
+; SKX_32-NEXT: retl
+ %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
+ ret <16 x float> %res
+}
+define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) {
+; KNL_64-LABEL: test_gather_16f64:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT: kshiftrw $8, %k1, %k2
+; KNL_64-NEXT: vgatherqpd (,%zmm0), %zmm3 {%k1}
+; KNL_64-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2}
+; KNL_64-NEXT: vmovaps %zmm3, %zmm0
+; KNL_64-NEXT: vmovaps %zmm4, %zmm1
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test_gather_16f64:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: pushl %ebp
+; KNL_32-NEXT: .Ltmp3:
+; KNL_32-NEXT: .cfi_def_cfa_offset 8
+; KNL_32-NEXT: .Ltmp4:
+; KNL_32-NEXT: .cfi_offset %ebp, -8
+; KNL_32-NEXT: movl %esp, %ebp
+; KNL_32-NEXT: .Ltmp5:
+; KNL_32-NEXT: .cfi_def_cfa_register %ebp
+; KNL_32-NEXT: andl $-64, %esp
+; KNL_32-NEXT: subl $64, %esp
+; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1
+; KNL_32-NEXT: kshiftrw $8, %k1, %k2
+; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1}
+; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2}
+; KNL_32-NEXT: vmovaps %zmm2, %zmm0
+; KNL_32-NEXT: movl %ebp, %esp
+; KNL_32-NEXT: popl %ebp
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test_gather_16f64:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpslld $31, %zmm2, %zmm2
+; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT: kshiftrw $8, %k1, %k2
+; SKX-NEXT: vgatherqpd (,%zmm0), %zmm3 {%k1}
+; SKX-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2}
+; SKX-NEXT: vmovaps %zmm3, %zmm0
+; SKX-NEXT: vmovaps %zmm4, %zmm1
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test_gather_16f64:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: pushl %ebp
+; SKX_32-NEXT: .Ltmp4:
+; SKX_32-NEXT: .cfi_def_cfa_offset 8
+; SKX_32-NEXT: .Ltmp5:
+; SKX_32-NEXT: .cfi_offset %ebp, -8
+; SKX_32-NEXT: movl %esp, %ebp
+; SKX_32-NEXT: .Ltmp6:
+; SKX_32-NEXT: .cfi_def_cfa_register %ebp
+; SKX_32-NEXT: andl $-64, %esp
+; SKX_32-NEXT: subl $64, %esp
+; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1
+; SKX_32-NEXT: kshiftrw $8, %k1, %k2
+; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1}
+; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2}
+; SKX_32-NEXT: vmovaps %zmm2, %zmm0
+; SKX_32-NEXT: movl %ebp, %esp
+; SKX_32-NEXT: popl %ebp
+; SKX_32-NEXT: retl
+ %res = call <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
+ ret <16 x double> %res
+}
+declare <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
+define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) {
+; KNL_64-LABEL: test_scatter_16i32:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT: kshiftrw $8, %k1, %k2
+; KNL_64-NEXT: vpscatterqd %ymm3, (,%zmm0) {%k1}
+; KNL_64-NEXT: vextracti64x4 $1, %zmm3, %ymm0
+; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k2}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test_scatter_16i32:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test_scatter_16i32:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpslld $31, %zmm2, %zmm2
+; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT: kshiftrw $8, %k1, %k2
+; SKX-NEXT: vpscatterqd %ymm3, (,%zmm0) {%k1}
+; SKX-NEXT: vextracti32x8 $1, %zmm3, %ymm0
+; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k2}
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test_scatter_16i32:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1}
+; SKX_32-NEXT: retl
+ call void @llvm.masked.scatter.v16i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask)
+ ret void
+}
+define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
+; KNL_64-LABEL: test_scatter_16i64:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT: kshiftrw $8, %k1, %k2
+; KNL_64-NEXT: vpscatterqq %zmm3, (,%zmm0) {%k1}
+; KNL_64-NEXT: vpscatterqq %zmm4, (,%zmm1) {%k2}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test_scatter_16i64:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: pushl %ebp
+; KNL_32-NEXT: .Ltmp6:
+; KNL_32-NEXT: .cfi_def_cfa_offset 8
+; KNL_32-NEXT: .Ltmp7:
+; KNL_32-NEXT: .cfi_offset %ebp, -8
+; KNL_32-NEXT: movl %esp, %ebp
+; KNL_32-NEXT: .Ltmp8:
+; KNL_32-NEXT: .cfi_def_cfa_register %ebp
+; KNL_32-NEXT: andl $-64, %esp
+; KNL_32-NEXT: subl $64, %esp
+; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1
+; KNL_32-NEXT: kshiftrw $8, %k1, %k2
+; KNL_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1}
+; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2}
+; KNL_32-NEXT: movl %ebp, %esp
+; KNL_32-NEXT: popl %ebp
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test_scatter_16i64:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpslld $31, %zmm2, %zmm2
+; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT: kshiftrw $8, %k1, %k2
+; SKX-NEXT: vpscatterqq %zmm3, (,%zmm0) {%k1}
+; SKX-NEXT: vpscatterqq %zmm4, (,%zmm1) {%k2}
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test_scatter_16i64:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: pushl %ebp
+; SKX_32-NEXT: .Ltmp7:
+; SKX_32-NEXT: .cfi_def_cfa_offset 8
+; SKX_32-NEXT: .Ltmp8:
+; SKX_32-NEXT: .cfi_offset %ebp, -8
+; SKX_32-NEXT: movl %esp, %ebp
+; SKX_32-NEXT: .Ltmp9:
+; SKX_32-NEXT: .cfi_def_cfa_register %ebp
+; SKX_32-NEXT: andl $-64, %esp
+; SKX_32-NEXT: subl $64, %esp
+; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1
+; SKX_32-NEXT: kshiftrw $8, %k1, %k2
+; SKX_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1}
+; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2}
+; SKX_32-NEXT: movl %ebp, %esp
+; SKX_32-NEXT: popl %ebp
+; SKX_32-NEXT: retl
+ call void @llvm.masked.scatter.v16i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask)
+ ret void
+}
+declare void @llvm.masked.scatter.v16i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask)
+define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) {
+; KNL_64-LABEL: test_scatter_16f32:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT: kshiftrw $8, %k1, %k2
+; KNL_64-NEXT: vscatterqps %ymm3, (,%zmm0) {%k1}
+; KNL_64-NEXT: vextractf64x4 $1, %zmm3, %ymm0
+; KNL_64-NEXT: vscatterqps %ymm0, (,%zmm1) {%k2}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test_scatter_16f32:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1}
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test_scatter_16f32:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpslld $31, %zmm2, %zmm2
+; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT: kshiftrw $8, %k1, %k2
+; SKX-NEXT: vscatterqps %ymm3, (,%zmm0) {%k1}
+; SKX-NEXT: vextractf32x8 $1, %zmm3, %ymm0
+; SKX-NEXT: vscatterqps %ymm0, (,%zmm1) {%k2}
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test_scatter_16f32:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1}
+; SKX_32-NEXT: retl
+ call void @llvm.masked.scatter.v16f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask)
+ ret void
+}
+declare void @llvm.masked.scatter.v16f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask)
+define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) {
+; KNL_64-LABEL: test_scatter_16f64:
+; KNL_64: # BB#0:
+; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT: kshiftrw $8, %k1, %k2
+; KNL_64-NEXT: vscatterqpd %zmm3, (,%zmm0) {%k1}
+; KNL_64-NEXT: vscatterqpd %zmm4, (,%zmm1) {%k2}
+; KNL_64-NEXT: retq
+;
+; KNL_32-LABEL: test_scatter_16f64:
+; KNL_32: # BB#0:
+; KNL_32-NEXT: pushl %ebp
+; KNL_32-NEXT: .Ltmp9:
+; KNL_32-NEXT: .cfi_def_cfa_offset 8
+; KNL_32-NEXT: .Ltmp10:
+; KNL_32-NEXT: .cfi_offset %ebp, -8
+; KNL_32-NEXT: movl %esp, %ebp
+; KNL_32-NEXT: .Ltmp11:
+; KNL_32-NEXT: .cfi_def_cfa_register %ebp
+; KNL_32-NEXT: andl $-64, %esp
+; KNL_32-NEXT: subl $64, %esp
+; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1
+; KNL_32-NEXT: kshiftrw $8, %k1, %k2
+; KNL_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1}
+; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2}
+; KNL_32-NEXT: movl %ebp, %esp
+; KNL_32-NEXT: popl %ebp
+; KNL_32-NEXT: retl
+;
+; SKX-LABEL: test_scatter_16f64:
+; SKX: # BB#0:
+; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT: vpslld $31, %zmm2, %zmm2
+; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT: kshiftrw $8, %k1, %k2
+; SKX-NEXT: vscatterqpd %zmm3, (,%zmm0) {%k1}
+; SKX-NEXT: vscatterqpd %zmm4, (,%zmm1) {%k2}
+; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test_scatter_16f64:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: pushl %ebp
+; SKX_32-NEXT: .Ltmp10:
+; SKX_32-NEXT: .cfi_def_cfa_offset 8
+; SKX_32-NEXT: .Ltmp11:
+; SKX_32-NEXT: .cfi_offset %ebp, -8
+; SKX_32-NEXT: movl %esp, %ebp
+; SKX_32-NEXT: .Ltmp12:
+; SKX_32-NEXT: .cfi_def_cfa_register %ebp
+; SKX_32-NEXT: andl $-64, %esp
+; SKX_32-NEXT: subl $64, %esp
+; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1
+; SKX_32-NEXT: kshiftrw $8, %k1, %k2
+; SKX_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1}
+; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2}
+; SKX_32-NEXT: movl %ebp, %esp
+; SKX_32-NEXT: popl %ebp
+; SKX_32-NEXT: retl
+ call void @llvm.masked.scatter.v16f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask)
+ ret void
+}
+declare void @llvm.masked.scatter.v16f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask)
diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll
index 6c16e634a59f..c29933e266b2 100644
--- a/test/CodeGen/X86/masked_memop.ll
+++ b/test/CodeGen/X86/masked_memop.ll
@@ -1,7 +1,8 @@
-; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=knl < %s | FileCheck %s -check-prefix=AVX512
-; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
-; RUN: opt -mtriple=x86_64-apple-darwin -codegenprepare -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=AVX_SCALAR
-; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=skx < %s | FileCheck %s -check-prefix=SKX
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=knl < %s | FileCheck %s --check-prefix=AVX512
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core-avx2 < %s | FileCheck %s --check-prefix=AVX2
+; RUN: opt -mtriple=x86_64-apple-darwin -codegenprepare -mcpu=corei7-avx -S < %s | FileCheck %s --check-prefix=AVX_SCALAR
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=skx < %s | FileCheck %s --check-prefix=SKX
; AVX512-LABEL: test1
; AVX512: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
@@ -139,18 +140,55 @@ define <4 x double> @test10(<4 x i32> %trigger, <4 x double>* %addr, <4 x double
ret <4 x double> %res
}
-; AVX2-LABEL: test11
+; AVX2-LABEL: test11a
; AVX2: vmaskmovps
; AVX2: vblendvps
-; SKX-LABEL: test11
-; SKX: vmovaps {{.*}}{%k1}
-define <8 x float> @test11(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
+; SKX-LABEL: test11a
+; SKX: vmovaps (%rdi), %ymm1 {%k1}
+; AVX512-LABEL: test11a
+; AVX512: kshiftlw $8
+; AVX512: kshiftrw $8
+; AVX512: vmovups (%rdi), %zmm1 {%k1}
+define <8 x float> @test11a(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
%res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 32, <8 x i1>%mask, <8 x float>%dst)
ret <8 x float> %res
}
+; SKX-LABEL: test11b
+; SKX: vmovdqu32 (%rdi), %ymm1 {%k1}
+; AVX512-LABEL: test11b
+; AVX512: kshiftlw $8
+; AVX512: kshiftrw $8
+; AVX512: vmovdqu32 (%rdi), %zmm1 {%k1}
+define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) {
+ %res = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %addr, i32 4, <8 x i1>%mask, <8 x i32>%dst)
+ ret <8 x i32> %res
+}
+
+; SKX-LABEL: test11c
+; SKX: vmovaps (%rdi), %ymm0 {%k1} {z}
+; AVX512-LABEL: test11c
+; AVX512: kshiftlw $8
+; AVX512: kshiftrw $8
+; AVX512: vmovups (%rdi), %zmm0 {%k1} {z}
+define <8 x float> @test11c(<8 x i1> %mask, <8 x float>* %addr) {
+ %res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 32, <8 x i1> %mask, <8 x float> zeroinitializer)
+ ret <8 x float> %res
+}
+
+; SKX-LABEL: test11d
+; SKX: vmovdqu32 (%rdi), %ymm0 {%k1} {z}
+; AVX512-LABEL: test11d
+; AVX512: kshiftlw $8
+; AVX512: kshiftrw $8
+; AVX512: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
+define <8 x i32> @test11d(<8 x i1> %mask, <8 x i32>* %addr) {
+ %res = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %addr, i32 4, <8 x i1> %mask, <8 x i32> zeroinitializer)
+ ret <8 x i32> %res
+}
+
; AVX2-LABEL: test12
; AVX2: vpmaskmovd %ymm
@@ -190,10 +228,13 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
; AVX2-LABEL: test15
; AVX2: vpmaskmovd
-; SKX-LABEL: test15
-; SKX: kshiftl
-; SKX: kshiftr
-; SKX: vmovdqu32 {{.*}}{%k1}
+; SKX-LABEL: test15:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
+; SKX-NEXT: vpmovqd %xmm1, (%rdi) {%k1}
+; SKX-NEXT: retq
define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
@@ -232,12 +273,58 @@ define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
; AVX2-LABEL: test18
; AVX2: vmaskmovps
; AVX2-NOT: blend
+; AVX2: ret
define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) {
+; SKX-LABEL: test18:
+; SKX: ## BB#0:
+; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
+; SKX-NEXT: kshiftlw $2, %k0, %k0
+; SKX-NEXT: kshiftrw $2, %k0, %k1
+; SKX-NEXT: vmovups (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT: retq
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
%res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>undef)
ret <2 x float> %res
}
+; AVX_SCALAR-LABEL: test19
+; AVX_SCALAR: load <4 x float>, <4 x float>* %addr, align 4
+
+define <4 x float> @test19(<4 x i32> %trigger, <4 x float>* %addr) {
+ %mask = icmp eq <4 x i32> %trigger, zeroinitializer
+ %res = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>, <4 x float>undef)
+ ret <4 x float> %res
+}
+
+; AVX_SCALAR-LABEL: test20
+; AVX_SCALAR: load float, {{.*}}, align 4
+; AVX_SCALAR: insertelement <4 x float> undef, float
+; AVX_SCALAR: select <4 x i1> <i1 true, i1 false, i1 true, i1 true>
+
+define <4 x float> @test20(<4 x i32> %trigger, <4 x float>* %addr, <4 x float> %src0) {
+ %mask = icmp eq <4 x i32> %trigger, zeroinitializer
+ %res = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %addr, i32 16, <4 x i1><i1 true, i1 false, i1 true, i1 true>, <4 x float> %src0)
+ ret <4 x float> %res
+}
+
+; AVX_SCALAR-LABEL: test21
+; AVX_SCALAR: store <4 x i32> %val
+define void @test21(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
+ %mask = icmp eq <4 x i32> %trigger, zeroinitializer
+ call void @llvm.masked.store.v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>)
+ ret void
+}
+
+; AVX_SCALAR-LABEL: test22
+; AVX_SCALAR: extractelement <4 x i32> %val, i32 0
+; AVX_SCALAR: store i32
+define void @test22(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
+ %mask = icmp eq <4 x i32> %trigger, zeroinitializer
+ call void @llvm.masked.store.v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>)
+ ret void
+}
declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
@@ -251,6 +338,7 @@ declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i
declare void @llvm.masked.store.v16f32p(<16 x float>*, <16 x float>**, i32, <16 x i1>)
declare <16 x float> @llvm.masked.load.v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
declare <8 x float> @llvm.masked.load.v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
+declare <8 x i32> @llvm.masked.load.v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
declare <2 x float> @llvm.masked.load.v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
declare <8 x double> @llvm.masked.load.v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
@@ -260,3 +348,415 @@ declare void @llvm.masked.store.v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>
declare void @llvm.masked.store.v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
declare void @llvm.masked.store.v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
+declare <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>*, i32, <16 x i1>, <16 x i32*>)
+
+; AVX512-LABEL: test23
+; AVX512: vmovdqu64 64(%rdi), %zmm1 {%k2} {z}
+; AVX512: vmovdqu64 (%rdi), %zmm0 {%k1} {z}
+
+define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) {
+ %mask = icmp eq <16 x i32*> %trigger, zeroinitializer
+ %res = call <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer)
+ ret <16 x i32*> %res
+}
+
+%mystruct = type { i16, i16, [1 x i8*] }
+
+declare <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>*, i32, <16 x i1>, <16 x %mystruct*>)
+
+define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) {
+; AVX512-LABEL: test24:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z}
+; AVX512-NEXT: kshiftrw $8, %k1, %k1
+; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: test24:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT: vpmaskmovq (%rdi), %ymm1, %ymm4
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT: vpmaskmovq 96(%rdi), %ymm1, %ymm3
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT: vpmaskmovq 64(%rdi), %ymm1, %ymm2
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT: vpmaskmovq 32(%rdi), %ymm0, %ymm1
+; AVX2-NEXT: vmovdqa %ymm4, %ymm0
+; AVX2-NEXT: retq
+;
+; SKX-LABEL: test24:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k1
+; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT: kshiftrw $8, %k1, %k1
+; SKX-NEXT: vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
+; SKX-NEXT: retq
+ %res = call <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>* %addr, i32 4, <16 x i1>%mask, <16 x %mystruct*>zeroinitializer)
+ ret <16 x %mystruct*> %res
+}
+
+define void @test_store_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
+; AVX512-LABEL: test_store_16i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: vmovdqu64 %zmm1, (%rdi) {%k1}
+; AVX512-NEXT: kshiftrw $8, %k1, %k1
+; AVX512-NEXT: vmovdqu64 %zmm2, 64(%rdi) {%k1}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: test_store_16i64:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
+; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5
+; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vpmaskmovq %ymm1, %ymm5, (%rdi)
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT: vpmaskmovq %ymm4, %ymm1, 96(%rdi)
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT: vpmaskmovq %ymm3, %ymm1, 64(%rdi)
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT: vpmaskmovq %ymm2, %ymm0, 32(%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; SKX-LABEL: test_store_16i64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k1
+; SKX-NEXT: vmovdqu64 %zmm1, (%rdi) {%k1}
+; SKX-NEXT: kshiftrw $8, %k1, %k1
+; SKX-NEXT: vmovdqu64 %zmm2, 64(%rdi) {%k1}
+; SKX-NEXT: retq
+ call void @llvm.masked.store.v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32 4, <16 x i1> %mask)
+ ret void
+}
+declare void @llvm.masked.store.v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32, <16 x i1> %mask)
+define void @test_store_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0) {
+; AVX512-LABEL: test_store_16f64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: vmovupd %zmm1, (%rdi) {%k1}
+; AVX512-NEXT: kshiftrw $8, %k1, %k1
+; AVX512-NEXT: vmovupd %zmm2, 64(%rdi) {%k1}
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: test_store_16f64:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
+; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5
+; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vmaskmovpd %ymm1, %ymm5, (%rdi)
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT: vmaskmovpd %ymm4, %ymm1, 96(%rdi)
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT: vmaskmovpd %ymm3, %ymm1, 64(%rdi)
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT: vmaskmovpd %ymm2, %ymm0, 32(%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; SKX-LABEL: test_store_16f64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k1
+; SKX-NEXT: vmovupd %zmm1, (%rdi) {%k1}
+; SKX-NEXT: kshiftrw $8, %k1, %k1
+; SKX-NEXT: vmovupd %zmm2, 64(%rdi) {%k1}
+; SKX-NEXT: retq
+ call void @llvm.masked.store.v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32 4, <16 x i1> %mask)
+ ret void
+}
+declare void @llvm.masked.store.v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32, <16 x i1> %mask)
+define <16 x i64> @test_load_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0) {
+; AVX512-LABEL: test_load_16i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1}
+; AVX512-NEXT: kshiftrw $8, %k1, %k1
+; AVX512-NEXT: vmovdqu64 64(%rdi), %zmm2 {%k1}
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: vmovaps %zmm2, %zmm1
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: test_load_16i64:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
+; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5
+; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vpmaskmovq (%rdi), %ymm5, %ymm9
+; AVX2-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[1,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm7, %xmm7
+; AVX2-NEXT: vpsrad $31, %xmm7, %xmm7
+; AVX2-NEXT: vpmovsxdq %xmm7, %ymm7
+; AVX2-NEXT: vpmaskmovq 32(%rdi), %ymm7, %ymm8
+; AVX2-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[2,3,0,1]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm6, %xmm6
+; AVX2-NEXT: vpsrad $31, %xmm6, %xmm6
+; AVX2-NEXT: vpmovsxdq %xmm6, %ymm6
+; AVX2-NEXT: vpmaskmovq 64(%rdi), %ymm6, %ymm10
+; AVX2-NEXT: vblendvpd %ymm5, %ymm9, %ymm1, %ymm5
+; AVX2-NEXT: vblendvpd %ymm7, %ymm8, %ymm2, %ymm1
+; AVX2-NEXT: vblendvpd %ymm6, %ymm10, %ymm3, %ymm2
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT: vpmaskmovq 96(%rdi), %ymm0, %ymm3
+; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm4, %ymm3
+; AVX2-NEXT: vmovapd %ymm5, %ymm0
+; AVX2-NEXT: retq
+;
+; SKX-LABEL: test_load_16i64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k1
+; SKX-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1}
+; SKX-NEXT: kshiftrw $8, %k1, %k1
+; SKX-NEXT: vmovdqu64 64(%rdi), %zmm2 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: vmovaps %zmm2, %zmm1
+; SKX-NEXT: retq
+ %res = call <16 x i64> @llvm.masked.load.v16i64(<16 x i64>* %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
+ ret <16 x i64> %res
+}
+declare <16 x i64> @llvm.masked.load.v16i64(<16 x i64>* %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
+define <16 x double> @test_load_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0) {
+; AVX512-LABEL: test_load_16f64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT: vmovupd (%rdi), %zmm1 {%k1}
+; AVX512-NEXT: kshiftrw $8, %k1, %k1
+; AVX512-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: vmovaps %zmm2, %zmm1
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: test_load_16f64:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm5, %xmm5
+; AVX2-NEXT: vpsrad $31, %xmm5, %xmm5
+; AVX2-NEXT: vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT: vmaskmovpd (%rdi), %ymm5, %ymm9
+; AVX2-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[1,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm7, %xmm7
+; AVX2-NEXT: vpsrad $31, %xmm7, %xmm7
+; AVX2-NEXT: vpmovsxdq %xmm7, %ymm7
+; AVX2-NEXT: vmaskmovpd 32(%rdi), %ymm7, %ymm8
+; AVX2-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[2,3,0,1]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm6, %xmm6
+; AVX2-NEXT: vpsrad $31, %xmm6, %xmm6
+; AVX2-NEXT: vpmovsxdq %xmm6, %ymm6
+; AVX2-NEXT: vmaskmovpd 64(%rdi), %ymm6, %ymm10
+; AVX2-NEXT: vblendvpd %ymm5, %ymm9, %ymm1, %ymm5
+; AVX2-NEXT: vblendvpd %ymm7, %ymm8, %ymm2, %ymm1
+; AVX2-NEXT: vblendvpd %ymm6, %ymm10, %ymm3, %ymm2
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT: vmaskmovpd 96(%rdi), %ymm0, %ymm3
+; AVX2-NEXT: vblendvpd %ymm0, %ymm3, %ymm4, %ymm3
+; AVX2-NEXT: vmovapd %ymm5, %ymm0
+; AVX2-NEXT: retq
+;
+; SKX-LABEL: test_load_16f64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT: vpmovb2m %xmm0, %k1
+; SKX-NEXT: vmovupd (%rdi), %zmm1 {%k1}
+; SKX-NEXT: kshiftrw $8, %k1, %k1
+; SKX-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: vmovaps %zmm2, %zmm1
+; SKX-NEXT: retq
+ %res = call <16 x double> @llvm.masked.load.v16f64(<16 x double>* %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
+ ret <16 x double> %res
+}
+declare <16 x double> @llvm.masked.load.v16f64(<16 x double>* %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
+
+define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0) {
+; AVX512-LABEL: test_load_32f64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX512-NEXT: vpmovsxbd %xmm5, %zmm5
+; AVX512-NEXT: vpslld $31, %zmm5, %zmm5
+; AVX512-NEXT: vptestmd %zmm5, %zmm5, %k1
+; AVX512-NEXT: vmovupd 128(%rdi), %zmm3 {%k1}
+; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k2
+; AVX512-NEXT: vmovupd (%rdi), %zmm1 {%k2}
+; AVX512-NEXT: kshiftrw $8, %k1, %k1
+; AVX512-NEXT: vmovupd 192(%rdi), %zmm4 {%k1}
+; AVX512-NEXT: kshiftrw $8, %k2, %k1
+; AVX512-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
+; AVX512-NEXT: vmovaps %zmm1, %zmm0
+; AVX512-NEXT: vmovaps %zmm2, %zmm1
+; AVX512-NEXT: vmovaps %zmm3, %zmm2
+; AVX512-NEXT: vmovaps %zmm4, %zmm3
+; AVX512-NEXT: retq
+;
+; AVX2-LABEL: test_load_32f64:
+; AVX2: ## BB#0:
+; AVX2-NEXT: pushq %rbp
+; AVX2-NEXT: Ltmp0:
+; AVX2-NEXT: .cfi_def_cfa_offset 16
+; AVX2-NEXT: Ltmp1:
+; AVX2-NEXT: .cfi_offset %rbp, -16
+; AVX2-NEXT: movq %rsp, %rbp
+; AVX2-NEXT: Ltmp2:
+; AVX2-NEXT: .cfi_def_cfa_register %rbp
+; AVX2-NEXT: andq $-32, %rsp
+; AVX2-NEXT: subq $32, %rsp
+; AVX2-NEXT: vpshufd {{.*#+}} xmm8 = xmm0[1,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero,xmm8[2],zero,zero,zero,xmm8[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm8, %xmm8
+; AVX2-NEXT: vpsrad $31, %xmm8, %xmm8
+; AVX2-NEXT: vpmovsxdq %xmm8, %ymm8
+; AVX2-NEXT: vmaskmovpd 32(%rsi), %ymm8, %ymm9
+; AVX2-NEXT: vpshufd {{.*#+}} xmm10 = xmm0[2,3,0,1]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm10 = xmm10[0],zero,zero,zero,xmm10[1],zero,zero,zero,xmm10[2],zero,zero,zero,xmm10[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm10, %xmm10
+; AVX2-NEXT: vpsrad $31, %xmm10, %xmm10
+; AVX2-NEXT: vpmovsxdq %xmm10, %ymm10
+; AVX2-NEXT: vmaskmovpd 64(%rsi), %ymm10, %ymm11
+; AVX2-NEXT: vpshufd {{.*#+}} xmm12 = xmm0[3,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm12 = xmm12[0],zero,zero,zero,xmm12[1],zero,zero,zero,xmm12[2],zero,zero,zero,xmm12[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm12, %xmm12
+; AVX2-NEXT: vpsrad $31, %xmm12, %xmm12
+; AVX2-NEXT: vpmovsxdq %xmm12, %ymm12
+; AVX2-NEXT: vmaskmovpd 96(%rsi), %ymm12, %ymm13
+; AVX2-NEXT: vblendvpd %ymm8, %ymm9, %ymm2, %ymm8
+; AVX2-NEXT: vblendvpd %ymm10, %ymm11, %ymm3, %ymm9
+; AVX2-NEXT: vblendvpd %ymm12, %ymm13, %ymm4, %ymm11
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm3, %xmm3
+; AVX2-NEXT: vpsrad $31, %xmm3, %xmm3
+; AVX2-NEXT: vpmovsxdq %xmm3, %ymm3
+; AVX2-NEXT: vmaskmovpd 160(%rsi), %ymm3, %ymm10
+; AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm4, %xmm4
+; AVX2-NEXT: vpsrad $31, %xmm4, %xmm4
+; AVX2-NEXT: vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT: vmaskmovpd 192(%rsi), %ymm4, %ymm12
+; AVX2-NEXT: vblendvpd %ymm3, %ymm10, %ymm6, %ymm3
+; AVX2-NEXT: vmovapd 16(%rbp), %ymm6
+; AVX2-NEXT: vblendvpd %ymm4, %ymm12, %ymm7, %ymm4
+; AVX2-NEXT: vpshufd {{.*#+}} xmm7 = xmm2[3,1,2,3]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm7, %xmm7
+; AVX2-NEXT: vpsrad $31, %xmm7, %xmm7
+; AVX2-NEXT: vpmovsxdq %xmm7, %ymm7
+; AVX2-NEXT: vmaskmovpd 224(%rsi), %ymm7, %ymm10
+; AVX2-NEXT: vblendvpd %ymm7, %ymm10, %ymm6, %ymm6
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT: vmaskmovpd (%rsi), %ymm0, %ymm7
+; AVX2-NEXT: vblendvpd %ymm0, %ymm7, %ymm1, %ymm0
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
+; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT: vmaskmovpd 128(%rsi), %ymm1, %ymm2
+; AVX2-NEXT: vblendvpd %ymm1, %ymm2, %ymm5, %ymm1
+; AVX2-NEXT: vmovapd %ymm1, 128(%rdi)
+; AVX2-NEXT: vmovapd %ymm0, (%rdi)
+; AVX2-NEXT: vmovapd %ymm6, 224(%rdi)
+; AVX2-NEXT: vmovapd %ymm4, 192(%rdi)
+; AVX2-NEXT: vmovapd %ymm3, 160(%rdi)
+; AVX2-NEXT: vmovapd %ymm11, 96(%rdi)
+; AVX2-NEXT: vmovapd %ymm9, 64(%rdi)
+; AVX2-NEXT: vmovapd %ymm8, 32(%rdi)
+; AVX2-NEXT: movq %rdi, %rax
+; AVX2-NEXT: movq %rbp, %rsp
+; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; SKX-LABEL: test_load_32f64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
+; SKX-NEXT: vpmovb2m %ymm0, %k1
+; SKX-NEXT: vmovupd (%rdi), %zmm1 {%k1}
+; SKX-NEXT: kshiftrd $16, %k1, %k2
+; SKX-NEXT: vmovupd 128(%rdi), %zmm3 {%k2}
+; SKX-NEXT: kshiftrw $8, %k1, %k1
+; SKX-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
+; SKX-NEXT: kshiftrw $8, %k2, %k1
+; SKX-NEXT: vmovupd 192(%rdi), %zmm4 {%k1}
+; SKX-NEXT: vmovaps %zmm1, %zmm0
+; SKX-NEXT: vmovaps %zmm2, %zmm1
+; SKX-NEXT: vmovaps %zmm3, %zmm2
+; SKX-NEXT: vmovaps %zmm4, %zmm3
+; SKX-NEXT: retq
+ %res = call <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
+ ret <32 x double> %res
+}
+declare <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)
diff --git a/test/CodeGen/X86/materialize.ll b/test/CodeGen/X86/materialize.ll
new file mode 100644
index 000000000000..695bf0fa5b98
--- /dev/null
+++ b/test/CodeGen/X86/materialize.ll
@@ -0,0 +1,184 @@
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK32
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK64
+; RUN: llc -mtriple=x86_64-pc-win32 -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECKWIN64
+
+define i32 @one32_nooptsize() {
+entry:
+ ret i32 1
+
+; When not optimizing for size, use mov.
+; CHECK32-LABEL: one32_nooptsize:
+; CHECK32: movl $1, %eax
+; CHECK32-NEXT: retl
+; CHECK64-LABEL: one32_nooptsize:
+; CHECK64: movl $1, %eax
+; CHECK64-NEXT: retq
+}
+
+define i32 @one32() optsize {
+entry:
+ ret i32 1
+
+; CHECK32-LABEL: one32:
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: incl %eax
+; CHECK32-NEXT: retl
+
+; FIXME: Figure out the best approach in 64-bit mode.
+; CHECK64-LABEL: one32:
+; CHECK64: movl $1, %eax
+; CHECK64-NEXT: retq
+}
+
+define i32 @one32_minsize() minsize {
+entry:
+ ret i32 1
+
+; On 32-bit, xor-inc is preferred over push-pop.
+; CHECK32-LABEL: one32_minsize:
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: incl %eax
+; CHECK32-NEXT: retl
+
+; On 64-bit we don't do xor-inc yet, so push-pop it is. Note that we have to
+; pop into a 64-bit register even when we just need 32 bits.
+; CHECK64-LABEL: one32_minsize:
+; CHECK64: pushq $1
+; CHECK64: .cfi_adjust_cfa_offset 8
+; CHECK64: popq %rax
+; CHECK64: .cfi_adjust_cfa_offset -8
+; CHECK64-NEXT: retq
+}
+
+define i64 @one64_minsize() minsize {
+entry:
+ ret i64 1
+; On 64-bit we don't do xor-inc yet, so push-pop it is.
+; CHECK64-LABEL: one64_minsize:
+; CHECK64: pushq $1
+; CHECK64: .cfi_adjust_cfa_offset 8
+; CHECK64: popq %rax
+; CHECK64: .cfi_adjust_cfa_offset -8
+; CHECK64-NEXT: retq
+
+; On Win64 we can't adjust the stack unless there's a frame pointer.
+; CHECKWIN64-LABEL: one64_minsize:
+; CHECKWIN64: movl $1, %eax
+; CHECKWIN64-NEXT: retq
+}
+
+define i32 @minus_one32() optsize {
+entry:
+ ret i32 -1
+
+; CHECK32-LABEL: minus_one32:
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: decl %eax
+; CHECK32-NEXT: retl
+}
+
+define i32 @minus_one32_minsize() minsize {
+entry:
+ ret i32 -1
+
+; xor-dec is preferred over push-pop.
+; CHECK32-LABEL: minus_one32_minsize:
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: decl %eax
+; CHECK32-NEXT: retl
+}
+
+define i16 @one16() optsize {
+entry:
+ ret i16 1
+
+; CHECK32-LABEL: one16:
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: incl %eax
+; CHECK32-NEXT: retl
+}
+
+define i16 @minus_one16() optsize {
+entry:
+ ret i16 -1
+
+; CHECK32-LABEL: minus_one16:
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: decl %eax
+; CHECK32-NEXT: retl
+}
+
+define i32 @minus_five32() minsize {
+entry:
+ ret i32 -5
+
+; CHECK32-LABEL: minus_five32:
+; CHECK32: pushl $-5
+; CHECK32: popl %eax
+; CHECK32: retl
+}
+
+define i64 @minus_five64() minsize {
+entry:
+ ret i64 -5
+
+; CHECK64-LABEL: minus_five64:
+; CHECK64: pushq $-5
+; CHECK64: .cfi_adjust_cfa_offset 8
+; CHECK64: popq %rax
+; CHECK64: .cfi_adjust_cfa_offset -8
+; CHECK64: retq
+}
+
+define i32 @rematerialize_minus_one() optsize {
+entry:
+ ; Materialize -1 (thiscall forces it into %ecx).
+ tail call x86_thiscallcc void @f(i32 -1)
+
+ ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
+ ; spilling it to the stack.
+ tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
+
+ ; -1 should be re-materialized here instead of getting spilled above.
+ ret i32 -1
+
+; CHECK32-LABEL: rematerialize_minus_one
+; CHECK32: xorl %ecx, %ecx
+; CHECK32-NEXT: decl %ecx
+; CHECK32: calll
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: decl %eax
+; CHECK32-NOT: %eax
+; CHECK32: retl
+}
+
+define i32 @rematerialize_minus_one_eflags(i32 %x) optsize {
+entry:
+ ; Materialize -1 (thiscall forces it into %ecx).
+ tail call x86_thiscallcc void @f(i32 -1)
+
+ ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
+ ; spilling it to the stack.
+ tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
+
+ ; Define eflags.
+ %a = icmp ne i32 %x, 123
+ %b = zext i1 %a to i32
+ ; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
+ ; It must therefore not use the xor-dec lowering.
+ %c = select i1 %a, i32 %b, i32 -1
+ ret i32 %c
+
+; CHECK32-LABEL: rematerialize_minus_one_eflags
+; CHECK32: xorl %ecx, %ecx
+; CHECK32-NEXT: decl %ecx
+; CHECK32: calll
+; CHECK32: cmpl
+; CHECK32: setne
+; CHECK32-NOT: xorl
+; CHECK32: movl $-1
+; CHECK32: cmov
+; CHECK32: retl
+}
+
+declare x86_thiscallcc void @f(i32)
diff --git a/test/CodeGen/X86/mcu-abi.ll b/test/CodeGen/X86/mcu-abi.ll
new file mode 100644
index 000000000000..966fd4521f2d
--- /dev/null
+++ b/test/CodeGen/X86/mcu-abi.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -mtriple=i686-pc-elfiamcu | FileCheck %s
+
+%struct.st12_t = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+; CHECK-LABEL: test_ints:
+; CHECK: addl %edx, %eax
+; CHECK-NEXT: imull %ecx, %eax
+; CHECK-NEXT: addl 4(%esp), %eax
+; CHECK-NEXT: retl
+define i32 @test_ints(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
+entry:
+ %r1 = add i32 %b, %a
+ %r2 = mul i32 %c, %r1
+ %r3 = add i32 %d, %r2
+ ret i32 %r3
+}
+
+; CHECK-LABEL: test_floats:
+; CHECK: addl %edx, %eax
+; CHECK-NEXT: imull %ecx, %eax
+; CHECK-NEXT: addl 4(%esp), %eax
+; CHECK-NEXT: retl
+define i32 @test_floats(i32 %a, i32 %b, float %c, float %d) #0 {
+entry:
+ %ci = bitcast float %c to i32
+ %di = bitcast float %d to i32
+ %r1 = add i32 %b, %a
+ %r2 = mul i32 %ci, %r1
+ %r3 = add i32 %di, %r2
+ ret i32 %r3
+}
+
+; CHECK-LABEL: test_doubles:
+; CHECK: addl 4(%esp), %eax
+; CHECK-NEXT: adcl 8(%esp), %edx
+; CHECK-NEXT: retl
+define double @test_doubles(double %d1, double %d2) #0 {
+entry:
+ %d1i = bitcast double %d1 to i64
+ %d2i = bitcast double %d2 to i64
+ %r = add i64 %d1i, %d2i
+ %rd = bitcast i64 %r to double
+ ret double %rd
+}
+
+; CHECK-LABEL: test_mixed_doubles:
+; CHECK: addl %ecx, %eax
+; CHECK-NEXT: adcl $0, %edx
+; CHECK-NEXT: retl
+define double @test_mixed_doubles(double %d2, i32 %i) #0 {
+entry:
+ %iext = zext i32 %i to i64
+ %d2i = bitcast double %d2 to i64
+ %r = add i64 %iext, %d2i
+ %rd = bitcast i64 %r to double
+ ret double %rd
+}
+
+; CHECK-LABEL: ret_large_struct:
+; CHECK: pushl %esi
+; CHECK-NEXT: movl %eax, %esi
+; CHECK-NEXT: leal 8(%esp), %edx
+; CHECK-NEXT: movl $48, %ecx
+; CHECK-NEXT: calll memcpy
+; CHECK-NEXT: movl %esi, %eax
+; CHECK-NEXT: popl %esi
+; CHECK-NOT: retl $4
+; CHECK-NEXT: retl
+define void @ret_large_struct(%struct.st12_t* noalias nocapture sret %agg.result, %struct.st12_t* byval nocapture readonly align 4 %r) #0 {
+entry:
+ %0 = bitcast %struct.st12_t* %agg.result to i8*
+ %1 = bitcast %struct.st12_t* %r to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 48, i32 1, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: var_args:
+; CHECK: movl 4(%esp), %eax
+; CHECK-NEXT: retl
+define i32 @var_args(i32 %i1, ...) #0 {
+entry:
+ ret i32 %i1
+}
+
+; CHECK-LABEL: test_lib_args:
+; CHECK: movl %edx, %eax
+; CHECK: calll __fixsfsi
+define i32 @test_lib_args(float %a, float %b) #0 {
+ %ret = fptosi float %b to i32
+ ret i32 %ret
+}
+
+; CHECK-LABEL: test_fp128:
+; CHECK: movl (%eax), %e[[CX:..]]
+; CHECK-NEXT: movl 4(%eax), %e[[DX:..]]
+; CHECK-NEXT: movl 8(%eax), %e[[SI:..]]
+; CHECK-NEXT: movl 12(%eax), %e[[AX:..]]
+; CHECK-NEXT: movl %e[[AX]], 12(%esp)
+; CHECK-NEXT: movl %e[[SI]], 8(%esp)
+; CHECK-NEXT: movl %e[[DX]], 4(%esp)
+; CHECK-NEXT: movl %e[[CX]], (%esp)
+; CHECK-NEXT: calll __fixtfsi
+define i32 @test_fp128(fp128* %ptr) #0 {
+ %v = load fp128, fp128* %ptr
+ %ret = fptosi fp128 %v to i32
+ ret i32 %ret
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
+attributes #0 = { nounwind "use-soft-float"="true"}
+attributes #1 = { nounwind argmemonly }
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 0111c0d433f1..7ef61c9a677b 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -3,6 +3,8 @@
; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE1
; RUN: llc < %s -mattr=-sse -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck %s -check-prefix=NHM_64
+
@.str = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"
@.str2 = internal constant [30 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 4
@@ -179,13 +181,25 @@ entry:
; NOSSE: movl $2021161080
; NOSSE: movl $2021161080
+;;; TODO: (1) Some of the loads and stores are certainly unaligned and (2) the first load and first
+;;; store overlap with the second load and second store respectively.
+;;;
+;;; Is either of the sequences ideal?
+
; X86-64-LABEL: t4:
-; X86-64: movabsq $8680820740569200760, %rax
-; X86-64: movq %rax
-; X86-64: movq %rax
-; X86-64: movq %rax
-; X86-64: movw $120
-; X86-64: movl $2021161080
+; X86-64: movabsq $33909456017848440, %rax ## imm = 0x78787878787878
+; X86-64: movq %rax, -10(%rsp)
+; X86-64: movabsq $8680820740569200760, %rax ## imm = 0x7878787878787878
+; X86-64: movq %rax, -16(%rsp)
+; X86-64: movq %rax, -24(%rsp)
+; X86-64: movq %rax, -32(%rsp)
+
+; NHM_64-LABEL: t4:
+; NHM_64: movups _.str2+14(%rip), %xmm0
+; NHM_64: movups %xmm0, -26(%rsp)
+; NHM_64: movups _.str2(%rip), %xmm0
+; NHM_64: movaps %xmm0, -40(%rsp)
+
%tmp1 = alloca [30 x i8]
%tmp2 = bitcast [30 x i8]* %tmp1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1, i1 false)
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index f582571252b5..4351014192bb 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=DARWIN
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p256i8.p256i8.i64(i8 addrspace(256)* nocapture, i8 addrspace(256)* nocapture, i64, i32, i1) nounwind
; Variable memcpy's should lower to calls.
@@ -59,6 +60,26 @@ entry:
; DARWIN: movq
}
+define void @test3_minsize(i8* nocapture %A, i8* nocapture %B) nounwind minsize noredzone {
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
+ ret void
+; LINUX-LABEL: test3_minsize:
+; LINUX: memcpy
+
+; DARWIN-LABEL: test3_minsize:
+; DARWIN: memcpy
+}
+
+define void @test3_minsize_optsize(i8* nocapture %A, i8* nocapture %B) nounwind optsize minsize noredzone {
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
+ ret void
+; LINUX-LABEL: test3_minsize_optsize:
+; LINUX: memcpy
+
+; DARWIN-LABEL: test3_minsize_optsize:
+; DARWIN: memcpy
+}
+
; Large constant memcpy's should be inlined when not optimizing for size.
define void @test4(i8* nocapture %A, i8* nocapture %B) nounwind noredzone {
entry:
@@ -118,3 +139,15 @@ define void @PR15348(i8* %a, i8* %b) {
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 17, i32 0, i1 false)
ret void
}
+
+; Memcpys from / to address space 256 should be lowered to appropriate loads /
+; stores if small enough.
+define void @addrspace256(i8 addrspace(256)* %a, i8 addrspace(256)* %b) nounwind {
+ tail call void @llvm.memcpy.p256i8.p256i8.i64(i8 addrspace(256)* %a, i8 addrspace(256)* %b, i64 16, i32 8, i1 false)
+ ret void
+; LINUX-LABEL: addrspace256:
+; LINUX: movq %gs:
+; LINUX: movq %gs:
+; LINUX: movq {{.*}}, %gs:
+; LINUX: movq {{.*}}, %gs:
+}
diff --git a/test/CodeGen/X86/merge-store-partially-alias-loads.ll b/test/CodeGen/X86/merge-store-partially-alias-loads.ll
new file mode 100644
index 000000000000..8e148aa76d38
--- /dev/null
+++ b/test/CodeGen/X86/merge-store-partially-alias-loads.ll
@@ -0,0 +1,52 @@
+; REQUIRES: asserts
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck -check-prefix=X86 %s
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -debug-only=isel < %s 2>&1 | FileCheck -check-prefix=DBGDAG %s
+
+; It's OK to merge the load / store of the first 2 components, but
+; they must not be placed on the same chain after merging.
+
+; X86-LABEL: {{^}}merge_store_partial_overlap_load:
+; X86-DAG: movw ([[BASEREG:%[a-z]+]]), [[LO2:%[a-z]+]]
+; X86-DAG: movb 2([[BASEREG]]), [[HI1:%[a-z]+]]
+
+; X86-NEXT: movw [[LO2]], 1([[BASEREG]])
+; X86-NEXT: movb [[HI1]], 3([[BASEREG]])
+; X86-NEXT: retq
+
+; DBGDAG-LABEL: Optimized lowered selection DAG: BB#0 'merge_store_partial_overlap_load:'
+; DBGDAG: [[ENTRYTOKEN:t[0-9]+]]: ch = EntryToken
+; DBGDAG-DAG: [[BASEPTR:t[0-9]+]]: i64,ch = CopyFromReg [[ENTRYTOKEN]],
+; DBGDAG-DAG: [[ADDPTR:t[0-9]+]]: i64 = add [[BASEPTR]], Constant:i64<2>
+
+; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load<LD2[%tmp81](align=1)> [[ENTRYTOKEN]], [[BASEPTR]], undef:i64
+; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load<LD1[%tmp12]> [[ENTRYTOKEN]], [[ADDPTR]], undef:i64
+
+; DBGDAG: [[LOADTOKEN:t[0-9]+]]: ch = TokenFactor [[LD2]]:1, [[LD1]]:1
+
+; DBGDAG-DAG: [[ST2:t[0-9]+]]: ch = store<ST2[%tmp10](align=1)> [[LOADTOKEN]], [[LD2]], t{{[0-9]+}}, undef:i64
+; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store<ST1[%tmp14]> [[ST2]], [[LD1]], t{{[0-9]+}}, undef:i64
+; DBGDAG: X86ISD::RET_FLAG [[ST1]],
+
+; DBGDAG: Type-legalized selection DAG: BB#0 'merge_store_partial_overlap_load:'
+define void @merge_store_partial_overlap_load([4 x i8]* %tmp) {
+ %tmp8 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 0
+ %tmp10 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 1
+ %tmp12 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 2
+ %tmp14 = getelementptr [4 x i8], [4 x i8]* %tmp, i32 0, i8 3
+
+ %tmp9 = load i8, i8* %tmp8, align 1 ; base + 0
+ %tmp11 = load i8, i8* %tmp10, align 1 ; base + 1
+ %tmp13 = load i8, i8* %tmp12, align 1 ; base + 2
+
+ store i8 %tmp9, i8* %tmp10, align 1 ; base + 1
+ store i8 %tmp11, i8* %tmp12, align 1 ; base + 2
+ store i8 %tmp13, i8* %tmp14, align 1 ; base + 3
+
+; Should emit
+; load base + 0, base + 1
+; store base + 1, base + 2
+; load base + 2
+; store base + 3
+
+ ret void
+}
diff --git a/test/CodeGen/X86/misched-code-difference-with-debug.ll b/test/CodeGen/X86/misched-code-difference-with-debug.ll
index 0f1f382c49a8..0a1ea830a41d 100644
--- a/test/CodeGen/X86/misched-code-difference-with-debug.ll
+++ b/test/CodeGen/X86/misched-code-difference-with-debug.ll
@@ -43,7 +43,7 @@ entry:
; CHECK-LABEL: test_with_debug
; CHECK: movl [[A]], [[B]]
; CHECK-NEXT: movl [[A]], [[C]]
-define void @test_with_debug() {
+define void @test_with_debug() !dbg !13 {
entry:
%c = alloca %class.C, align 1
%0 = load i8, i8* @argc, align 1
@@ -62,26 +62,26 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!22, !23}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !20, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !20, imports: !2)
!1 = !DIFile(filename: "test.cpp", directory: "")
!2 = !{}
!3 = !{!4}
!4 = !DICompositeType(tag: DW_TAG_class_type, name: "C", line: 2, size: 8, align: 8, file: !1, elements: !5, identifier: "_ZTS1C")
!5 = !{!6}
-!6 = !DISubprogram(name: "test", file: !1, scope: !"_ZTS1C", type: !7)
+!6 = !DISubprogram(name: "test", file: !1, scope: !"_ZTS1C", type: !7, isDefinition: false)
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !10, !11, !11, !11, null}
!9 = !DIBasicType(encoding: DW_ATE_signed, size: 32, align: 32, name: "int")
!10 = !DIDerivedType(baseType: !"_ZTS1C", tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial)
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!12 = !{!13}
-!13 = !DISubprogram(name: "test_with_debug", linkageName: "test_with_debug", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !14, type: !15, function: void ()* @test_with_debug, variables: !17)
+!13 = distinct !DISubprogram(name: "test_with_debug", linkageName: "test_with_debug", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !14, type: !15, variables: !17)
!14 = !DIFile(filename: "test.cpp", directory: "")
!15 = !DISubroutineType(types: !16)
!16 = !{null}
!17 = !{!18, !19}
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 7, scope: !13, file: !14, type: !"_ZTS1C")
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "lc", line: 8, scope: !13, file: !14, type: !11)
+!18 = !DILocalVariable(name: "c", line: 7, scope: !13, file: !14, type: !"_ZTS1C")
+!19 = !DILocalVariable(name: "lc", line: 8, scope: !13, file: !14, type: !11)
!20 = !{!21}
!21 = !DIGlobalVariable(name: "argc", line: 1, isLocal: false, isDefinition: true, scope: null, file: !14, type: !11, variable: i8* @argc)
!22 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/CodeGen/X86/mmx-arg-passing-x86-64.ll b/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
index 2727e3eb0280..9841381b560d 100644
--- a/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
+++ b/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86-64
;
; On Darwin x86-64, v8i8, v4i16, v2i32 values are passed in XMM[0-7].
diff --git a/test/CodeGen/X86/mmx-arg-passing.ll b/test/CodeGen/X86/mmx-arg-passing.ll
index 4e0031076200..67ccb9e32dde 100644
--- a/test/CodeGen/X86/mmx-arg-passing.ll
+++ b/test/CodeGen/X86/mmx-arg-passing.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | FileCheck %s --check-prefix=X86-32
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86-64
;
diff --git a/test/CodeGen/X86/mmx-coalescing.ll b/test/CodeGen/X86/mmx-coalescing.ll
new file mode 100644
index 000000000000..a515e5ee3754
--- /dev/null
+++ b/test/CodeGen/X86/mmx-coalescing.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s
+
+%SA = type <{ %union.anon, i32, [4 x i8], i8*, i8*, i8*, i32, [4 x i8] }>
+%union.anon = type { <1 x i64> }
+
+; Check that extra movd (copy) instructions aren't generated.
+
+define i32 @test(%SA* %pSA, i16* %A, i32 %B, i32 %C, i32 %D, i8* %E) {
+entry:
+; CHECK-LABEL: test
+; CHECK: # BB#0:
+; CHECK-NEXT: pshufw
+; CHECK-NEXT: movd
+; CHECK-NOT: movd
+; CHECK-NEXT: testl
+ %shl = shl i32 1, %B
+ %shl1 = shl i32 %C, %B
+ %shl2 = shl i32 1, %D
+ %v = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 0, i32 0
+ %v0 = load <1 x i64>, <1 x i64>* %v, align 8
+ %SA0 = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 1
+ %v1 = load i32, i32* %SA0, align 4
+ %SA1 = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 3
+ %v2 = load i8*, i8** %SA1, align 8
+ %SA2 = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 4
+ %v3 = load i8*, i8** %SA2, align 8
+ %v4 = bitcast <1 x i64> %v0 to <4 x i16>
+ %v5 = bitcast <4 x i16> %v4 to x86_mmx
+ %v6 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v5, i8 -18)
+ %v7 = bitcast x86_mmx %v6 to <4 x i16>
+ %v8 = bitcast <4 x i16> %v7 to <1 x i64>
+ %v9 = extractelement <1 x i64> %v8, i32 0
+ %v10 = bitcast i64 %v9 to <2 x i32>
+ %v11 = extractelement <2 x i32> %v10, i32 0
+ %cmp = icmp eq i32 %v11, 0
+ br i1 %cmp, label %if.A, label %if.B
+
+if.A:
+; CHECK: %if.A
+; CHECK-NEXT: movd
+; CHECK-NEXT: psllq
+ %pa = phi <1 x i64> [ %v8, %entry ], [ %vx, %if.C ]
+ %v17 = extractelement <1 x i64> %pa, i32 0
+ %v18 = bitcast i64 %v17 to x86_mmx
+ %v19 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %v18, i32 %B) #2
+ %v20 = bitcast x86_mmx %v19 to i64
+ %v21 = insertelement <1 x i64> undef, i64 %v20, i32 0
+ %cmp3 = icmp eq i64 %v20, 0
+ br i1 %cmp3, label %if.C, label %merge
+
+if.B:
+ %v34 = bitcast <1 x i64> %v8 to <4 x i16>
+ %v35 = bitcast <4 x i16> %v34 to x86_mmx
+ %v36 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v35, i8 -18)
+ %v37 = bitcast x86_mmx %v36 to <4 x i16>
+ %v38 = bitcast <4 x i16> %v37 to <1 x i64>
+ br label %if.C
+
+if.C:
+ %vx = phi <1 x i64> [ %v21, %if.A ], [ %v38, %if.B ]
+ %cvt = bitcast <1 x i64> %vx to <2 x i32>
+ %ex = extractelement <2 x i32> %cvt, i32 0
+ %cmp2 = icmp eq i32 %ex, 0
+ br i1 %cmp2, label %if.A, label %merge
+
+merge:
+; CHECK: %merge
+; CHECK-NOT: movd
+; CHECK-NEXT: pshufw
+ %vy = phi <1 x i64> [ %v21, %if.A ], [ %vx, %if.C ]
+ %v130 = bitcast <1 x i64> %vy to <4 x i16>
+ %v131 = bitcast <4 x i16> %v130 to x86_mmx
+ %v132 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v131, i8 -18)
+ %v133 = bitcast x86_mmx %v132 to <4 x i16>
+ %v134 = bitcast <4 x i16> %v133 to <1 x i64>
+ %v135 = extractelement <1 x i64> %v134, i32 0
+ %v136 = bitcast i64 %v135 to <2 x i32>
+ %v137 = extractelement <2 x i32> %v136, i32 0
+ ret i32 %v137
+}
+
+
+declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8)
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
diff --git a/test/CodeGen/X86/mmx-intrinsics.ll b/test/CodeGen/X86/mmx-intrinsics.ll
index 39d481b16e7a..7647fccb5803 100644
--- a/test/CodeGen/X86/mmx-intrinsics.ll
+++ b/test/CodeGen/X86/mmx-intrinsics.ll
@@ -1,12 +1,13 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s
-; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
+; RUN: llc < %s -march=x86 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: phaddw
+; ALL-LABEL: @test1
+; ALL: phaddw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -22,7 +23,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pcmpgtd
+; ALL-LABEL: @test88
+; ALL: pcmpgtd
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -38,7 +40,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pcmpgtw
+; ALL-LABEL: @test87
+; ALL: pcmpgtw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -54,7 +57,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pcmpgtb
+; ALL-LABEL: @test86
+; ALL: pcmpgtb
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -70,7 +74,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pcmpeqd
+; ALL-LABEL: @test85
+; ALL: pcmpeqd
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -86,7 +91,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pcmpeqw
+; ALL-LABEL: @test84
+; ALL: pcmpeqw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -102,7 +108,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pcmpeqb
+; ALL-LABEL: @test83
+; ALL: pcmpeqb
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -118,7 +125,9 @@ entry:
declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: punpckldq
+; ALL-LABEL: @test82
+; X86: punpckldq {{.*#+}} mm0 = mm0[0],mem[0]
+; X64: punpckldq {{.*#+}} mm0 = mm0[0],mm1[0]
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -134,7 +143,9 @@ entry:
declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: punpcklwd
+; ALL-LABEL: @test81
+; X86: punpcklwd {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1]
+; X64: punpcklwd {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -150,7 +161,9 @@ entry:
declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: punpcklbw
+; ALL-LABEL: @test80
+; X86: punpcklbw {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
+; X64: punpcklbw {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -166,7 +179,9 @@ entry:
declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: punpckhdq
+; ALL-LABEL: @test79
+; X86: punpckhdq {{.*#+}} mm0 = mm0[1],mem[1]
+; X64: punpckhdq {{.*#+}} mm0 = mm0[1],mm1[1]
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -182,7 +197,9 @@ entry:
declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: punpckhwd
+; ALL-LABEL: @test78
+; X86: punpckhwd {{.*#+}} mm0 = mm0[2],mem[2],mm0[3],mem[3]
+; X64: punpckhwd {{.*#+}} mm0 = mm0[2],mm1[2],mm0[3],mm1[3]
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -198,7 +215,9 @@ entry:
declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: punpckhbw
+; ALL-LABEL: @test77
+; X86: punpckhbw {{.*#+}} mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
+; X64: punpckhbw {{.*#+}} mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7]
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -214,7 +233,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: packuswb
+; ALL-LABEL: @test76
+; ALL: packuswb
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -230,7 +250,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: packssdw
+; ALL-LABEL: @test75
+; ALL: packssdw
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -246,7 +267,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: packsswb
+; ALL-LABEL: @test74
+; ALL: packsswb
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -262,7 +284,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psrad
+; ALL-LABEL: @test73
+; ALL: psrad
entry:
%0 = bitcast <1 x i64> %a to <2 x i32>
%mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
@@ -276,7 +299,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psraw
+; ALL-LABEL: @test72
+; ALL: psraw
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
%mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
@@ -290,7 +314,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psrlq
+; ALL-LABEL: @test71
+; ALL: psrlq
entry:
%0 = extractelement <1 x i64> %a, i32 0
%mmx_var.i = bitcast i64 %0 to x86_mmx
@@ -302,7 +327,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psrld
+; ALL-LABEL: @test70
+; ALL: psrld
entry:
%0 = bitcast <1 x i64> %a to <2 x i32>
%mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
@@ -316,7 +342,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psrlw
+; ALL-LABEL: @test69
+; ALL: psrlw
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
%mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
@@ -330,7 +357,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psllq
+; ALL-LABEL: @test68
+; ALL: psllq
entry:
%0 = extractelement <1 x i64> %a, i32 0
%mmx_var.i = bitcast i64 %0 to x86_mmx
@@ -342,7 +370,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: pslld
+; ALL-LABEL: @test67
+; ALL: pslld
entry:
%0 = bitcast <1 x i64> %a to <2 x i32>
%mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
@@ -356,7 +385,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psllw
+; ALL-LABEL: @test66
+; ALL: psllw
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
%mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
@@ -370,7 +400,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psrad
+; ALL-LABEL: @test65
+; ALL: psrad
entry:
%0 = bitcast <1 x i64> %a to <2 x i32>
%mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
@@ -386,7 +417,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psraw
+; ALL-LABEL: @test64
+; ALL: psraw
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
%mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
@@ -402,7 +434,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psrlq
+; ALL-LABEL: @test63
+; ALL: psrlq
entry:
%0 = extractelement <1 x i64> %a, i32 0
%mmx_var.i = bitcast i64 %0 to x86_mmx
@@ -416,7 +449,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psrld
+; ALL-LABEL: @test62
+; ALL: psrld
entry:
%0 = bitcast <1 x i64> %a to <2 x i32>
%mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
@@ -432,7 +466,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psrlw
+; ALL-LABEL: @test61
+; ALL: psrlw
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
%mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
@@ -448,7 +483,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psllq
+; ALL-LABEL: @test60
+; ALL: psllq
entry:
%0 = extractelement <1 x i64> %a, i32 0
%mmx_var.i = bitcast i64 %0 to x86_mmx
@@ -462,7 +498,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pslld
+; ALL-LABEL: @test59
+; ALL: pslld
entry:
%0 = bitcast <1 x i64> %a to <2 x i32>
%mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
@@ -478,7 +515,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psllw
+; ALL-LABEL: @test58
+; ALL: psllw
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
%mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
@@ -494,7 +532,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pxor
+; ALL-LABEL: @test56
+; ALL: pxor
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -510,7 +549,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: por
+; ALL-LABEL: @test55
+; ALL: por
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -526,7 +566,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pandn
+; ALL-LABEL: @test54
+; ALL: pandn
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -542,7 +583,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pand
+; ALL-LABEL: @test53
+; ALL: pand
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -558,7 +600,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmullw
+; ALL-LABEL: @test52
+; ALL: pmullw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -572,7 +615,8 @@ entry:
}
define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmullw
+; ALL-LABEL: @test51
+; ALL: pmullw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -588,7 +632,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmulhw
+; ALL-LABEL: @test50
+; ALL: pmulhw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -604,7 +649,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmaddwd
+; ALL-LABEL: @test49
+; ALL: pmaddwd
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -620,7 +666,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubusw
+; ALL-LABEL: @test48
+; ALL: psubusw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -636,7 +683,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubusb
+; ALL-LABEL: @test47
+; ALL: psubusb
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -652,7 +700,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubsw
+; ALL-LABEL: @test46
+; ALL: psubsw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -668,7 +717,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubsb
+; ALL-LABEL: @test45
+; ALL: psubsb
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -682,7 +732,8 @@ entry:
}
define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubq
+; ALL-LABEL: @test44
+; ALL: psubq
entry:
%0 = extractelement <1 x i64> %a, i32 0
%mmx_var = bitcast i64 %0 to x86_mmx
@@ -698,7 +749,8 @@ declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubd
+; ALL-LABEL: @test43
+; ALL: psubd
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -714,7 +766,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubw
+; ALL-LABEL: @test42
+; ALL: psubw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -730,7 +783,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubb
+; ALL-LABEL: @test41
+; ALL: psubb
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -746,7 +800,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddusw
+; ALL-LABEL: @test40
+; ALL: paddusw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -762,7 +817,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddusb
+; ALL-LABEL: @test39
+; ALL: paddusb
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -778,7 +834,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddsw
+; ALL-LABEL: @test38
+; ALL: paddsw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -794,7 +851,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddsb
+; ALL-LABEL: @test37
+; ALL: paddsb
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -810,7 +868,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddq
+; ALL-LABEL: @test36
+; ALL: paddq
entry:
%0 = extractelement <1 x i64> %a, i32 0
%mmx_var = bitcast i64 %0 to x86_mmx
@@ -824,7 +883,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddd
+; ALL-LABEL: @test35
+; ALL: paddd
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -840,7 +900,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddw
+; ALL-LABEL: @test34
+; ALL: paddw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -856,7 +917,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddb
+; ALL-LABEL: @test33
+; ALL: paddb
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -872,7 +934,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psadbw
+; ALL-LABEL: @test32
+; ALL: psadbw
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -886,7 +949,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pminsw
+; ALL-LABEL: @test31
+; ALL: pminsw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -902,7 +966,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pminub
+; ALL-LABEL: @test30
+; ALL: pminub
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -918,7 +983,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmaxsw
+; ALL-LABEL: @test29
+; ALL: pmaxsw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -934,7 +1000,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmaxub
+; ALL-LABEL: @test28
+; ALL: pmaxub
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -950,7 +1017,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pavgw
+; ALL-LABEL: @test27
+; ALL: pavgw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -966,7 +1034,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pavgb
+; ALL-LABEL: @test26
+; ALL: pavgb
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -982,7 +1051,8 @@ entry:
declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
-; CHECK: movntq
+; ALL-LABEL: @test25
+; ALL: movntq
entry:
%mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
%0 = extractelement <1 x i64> %a, i32 0
@@ -994,7 +1064,8 @@ entry:
declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: pmovmskb
+; ALL-LABEL: @test24
+; ALL: pmovmskb
entry:
%0 = bitcast <1 x i64> %a to <8 x i8>
%mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
@@ -1005,7 +1076,8 @@ entry:
declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
-; CHECK: maskmovq
+; ALL-LABEL: @test23
+; ALL: maskmovq
entry:
%0 = bitcast <1 x i64> %n to <8 x i8>
%1 = bitcast <1 x i64> %d to <8 x i8>
@@ -1018,7 +1090,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmulhuw
+; ALL-LABEL: @test22
+; ALL: pmulhuw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -1034,7 +1107,9 @@ entry:
declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: pshufw
+; ALL-LABEL: @test21
+; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
+; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
%1 = bitcast <4 x i16> %0 to x86_mmx
@@ -1046,9 +1121,10 @@ entry:
}
define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: test21_2
-; CHECK: pshufw
-; CHECK: movd
+; ALL-LABEL: @test21_2
+; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
+; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
+; ALL: movd
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
%1 = bitcast <4 x i16> %0 to x86_mmx
@@ -1062,7 +1138,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmuludq
+; ALL-LABEL: @test20
+; ALL: pmuludq
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -1076,7 +1153,8 @@ entry:
declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: cvtpi2pd
+; ALL-LABEL: @test19
+; ALL: cvtpi2pd
entry:
%0 = bitcast <1 x i64> %a to <2 x i32>
%1 = bitcast <2 x i32> %0 to x86_mmx
@@ -1087,7 +1165,8 @@ entry:
declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
-; CHECK: cvttpd2pi
+; ALL-LABEL: @test18
+; ALL: cvttpd2pi
entry:
%0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
%1 = bitcast x86_mmx %0 to <2 x i32>
@@ -1099,7 +1178,8 @@ entry:
declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
-; CHECK: cvtpd2pi
+; ALL-LABEL: @test17
+; ALL: cvtpd2pi
entry:
%0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
%1 = bitcast x86_mmx %0 to <2 x i32>
@@ -1111,7 +1191,8 @@ entry:
declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: palignr
+; ALL-LABEL: @test16
+; ALL: palignr
entry:
%0 = extractelement <1 x i64> %a, i32 0
%mmx_var = bitcast i64 %0 to x86_mmx
@@ -1125,7 +1206,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: pabsd
+; ALL-LABEL: @test15
+; ALL: pabsd
entry:
%0 = bitcast <1 x i64> %a to <2 x i32>
%1 = bitcast <2 x i32> %0 to x86_mmx
@@ -1139,7 +1221,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: pabsw
+; ALL-LABEL: @test14
+; ALL: pabsw
entry:
%0 = bitcast <1 x i64> %a to <4 x i16>
%1 = bitcast <4 x i16> %0 to x86_mmx
@@ -1153,7 +1236,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: pabsb
+; ALL-LABEL: @test13
+; ALL: pabsb
entry:
%0 = bitcast <1 x i64> %a to <8 x i8>
%1 = bitcast <8 x i8> %0 to x86_mmx
@@ -1167,7 +1251,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psignd
+; ALL-LABEL: @test12
+; ALL: psignd
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -1183,7 +1268,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psignw
+; ALL-LABEL: @test11
+; ALL: psignw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -1199,7 +1285,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psignb
+; ALL-LABEL: @test10
+; ALL: psignb
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -1215,7 +1302,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pshufb
+; ALL-LABEL: @test9
+; ALL: pshufb
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -1231,7 +1319,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmulhrsw
+; ALL-LABEL: @test8
+; ALL: pmulhrsw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -1247,7 +1336,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmaddubsw
+; ALL-LABEL: @test7
+; ALL: pmaddubsw
entry:
%0 = bitcast <1 x i64> %b to <8 x i8>
%1 = bitcast <1 x i64> %a to <8 x i8>
@@ -1263,7 +1353,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: phsubsw
+; ALL-LABEL: @test6
+; ALL: phsubsw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -1279,7 +1370,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: phsubd
+; ALL-LABEL: @test5
+; ALL: phsubd
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -1295,7 +1387,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: phsubw
+; ALL-LABEL: @test4
+; ALL: phsubw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -1311,7 +1404,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: phaddsw
+; ALL-LABEL: @test3
+; ALL: phaddsw
entry:
%0 = bitcast <1 x i64> %b to <4 x i16>
%1 = bitcast <1 x i64> %a to <4 x i16>
@@ -1327,7 +1421,8 @@ entry:
declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: phaddd
+; ALL-LABEL: @test2
+; ALL: phaddd
entry:
%0 = bitcast <1 x i64> %b to <2 x i32>
%1 = bitcast <1 x i64> %a to <2 x i32>
@@ -1341,16 +1436,18 @@ entry:
}
define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind {
-; CHECK: cvtpi2ps
+; ALL-LABEL: @test89
+; ALL: cvtpi2ps
%c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b)
ret <4 x float> %c
}
declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
-; CHECK-LABEL: test90
+; ALL-LABEL: test90
define void @test90() {
-; CHECK: emms
+; ALL-LABEL: @test90
+; ALL: emms
call void @llvm.x86.mmx.emms()
ret void
}
diff --git a/test/CodeGen/X86/mmx-only.ll b/test/CodeGen/X86/mmx-only.ll
new file mode 100644
index 000000000000..35598d5f6e19
--- /dev/null
+++ b/test/CodeGen/X86/mmx-only.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+mmx,-sse | FileCheck %s
+
+; Test that turning off sse doesn't turn off mmx.
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
+; CHECK-LABEL: @test88
+; CHECK: pcmpgtd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
diff --git a/test/CodeGen/X86/movntdq-no-avx.ll b/test/CodeGen/X86/movntdq-no-avx.ll
index cc35e201e6b3..2bf09dd6f581 100644
--- a/test/CodeGen/X86/movntdq-no-avx.ll
+++ b/test/CodeGen/X86/movntdq-no-avx.ll
@@ -5,7 +5,7 @@
define void @test(<2 x i64>* nocapture %a, <2 x i64> %b) nounwind optsize {
entry:
- store <2 x i64> %b, <2 x i64>* %a, align 16, !nontemporal !0
+ store <2 x i64> %b, <2 x i64>* %a, align 32, !nontemporal !0
ret void
}
diff --git a/test/CodeGen/X86/movpc32-check.ll b/test/CodeGen/X86/movpc32-check.ll
new file mode 100644
index 000000000000..606af3c898f4
--- /dev/null
+++ b/test/CodeGen/X86/movpc32-check.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -relocation-model=pic | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i686-pc-linux"
+
+; Function Attrs: nounwind
+define void @test() #0 !dbg !4 {
+entry:
+ call void bitcast (void (...)* @bar to void ()*)(), !dbg !11
+ ret void, !dbg !12
+}
+
+declare void @bar(...) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="i686" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="i686" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (http://llvm.org/git/clang.git 3490ab8630d5643f71f1f04e46984f05b27b8d67) (http://llvm.org/git/llvm.git d2643e2ff955ed234944fe3c6b4ffc1250085843)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "movpc-test")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git 3490ab8630d5643f71f1f04e46984f05b27b8d67) (http://llvm.org/git/llvm.git d2643e2ff955ed234944fe3c6b4ffc1250085843)"}
+!11 = !DILocation(line: 4, column: 3, scope: !4)
+!12 = !DILocation(line: 5, column: 1, scope: !4)
+
+; CHECK: calll .L0$pb
+; CHECK-NEXT: .Ltmp3:
+; CHECK-NEXT: .cfi_adjust_cfa_offset 4
+; CHECK-NEXT: .L0$pb:
+; CHECK-NEXT: popl
+; CHECK-NEXT: .Ltmp4:
+; CHECK-NEXT: .cfi_adjust_cfa_offset -4
diff --git a/test/CodeGen/X86/movtopush.ll b/test/CodeGen/X86/movtopush.ll
index b02f9ec45e7f..de4c87cf30ad 100644
--- a/test/CodeGen/X86/movtopush.ll
+++ b/test/CodeGen/X86/movtopush.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
+; RUN: llc < %s -mtriple=i686-windows -stackrealign -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
%class.Class = type { i32 }
%struct.s = type { i64 }
@@ -357,3 +357,26 @@ entry:
call void @good(i32 9, i32 10, i32 11, i32 12)
ret void
}
+
+; Make sure the add does not prevent folding loads into pushes.
+; val1 and val2 will not be folded into pushes since they have
+; an additional use, but val3 should be.
+; NORMAL-LABEL: test13:
+; NORMAL: movl ([[P1:%e..]]), [[V1:%e..]]
+; NORMAL-NEXT: movl ([[P2:%e..]]), [[V2:%e..]]
+; NORMAL-NEXT: , [[ADD:%e..]]
+; NORMAL-NEXT: pushl [[ADD]]
+; NORMAL-NEXT: pushl ([[P3:%e..]])
+; NORMAL-NEXT: pushl [[V2]]
+; NORMAL-NEXT: pushl [[V1]]
+; NORMAL-NEXT: calll _good
+; NORMAL: movl [[P3]], %eax
+define i32* @test13(i32* inreg %ptr1, i32* inreg %ptr2, i32* inreg %ptr3) optsize {
+entry:
+ %val1 = load i32, i32* %ptr1
+ %val2 = load i32, i32* %ptr2
+ %val3 = load i32, i32* %ptr3
+ %add = add i32 %val1, %val2
+ call void @good(i32 %val1, i32 %val2, i32 %val3, i32 %add)
+ ret i32* %ptr3
+}
diff --git a/test/CodeGen/X86/mult-alt-x86.ll b/test/CodeGen/X86/mult-alt-x86.ll
index 5174f85adb9f..1c83fedad3ce 100644
--- a/test/CodeGen/X86/mult-alt-x86.ll
+++ b/test/CodeGen/X86/mult-alt-x86.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -no-integrated-as
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 -no-integrated-as
; ModuleID = 'mult-alt-x86.c'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i686-pc-win32"
diff --git a/test/CodeGen/X86/musttail-varargs.ll b/test/CodeGen/X86/musttail-varargs.ll
index 3613f4c08cce..247d78776b80 100644
--- a/test/CodeGen/X86/musttail-varargs.ll
+++ b/test/CodeGen/X86/musttail-varargs.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-linux | FileCheck %s --check-prefix=LINUX
+; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-linux-gnux32 | FileCheck %s --check-prefix=LINUX-X32
; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-windows | FileCheck %s --check-prefix=WINDOWS
; RUN: llc < %s -enable-tail-merge=0 -mtriple=i686-windows | FileCheck %s --check-prefix=X86
@@ -57,6 +58,40 @@ define void @f_thunk(i8* %this, ...) {
; LINUX-DAG: movb {{.*}}, %al
; LINUX: jmpq *{{.*}} # TAILCALL
+; LINUX-X32-LABEL: f_thunk:
+; LINUX-X32-DAG: movl %edi, {{.*}}
+; LINUX-X32-DAG: movq %rsi, {{.*}}
+; LINUX-X32-DAG: movq %rdx, {{.*}}
+; LINUX-X32-DAG: movq %rcx, {{.*}}
+; LINUX-X32-DAG: movq %r8, {{.*}}
+; LINUX-X32-DAG: movq %r9, {{.*}}
+; LINUX-X32-DAG: movb %al, {{.*}}
+; LINUX-X32-DAG: movaps %xmm0, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm1, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm2, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm3, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm4, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm5, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm6, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm7, {{[0-9]*}}(%esp)
+; LINUX-X32: callq get_f
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm0
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm1
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm2
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm3
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm4
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm5
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm6
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm7
+; LINUX-X32-DAG: movl {{.*}}, %edi
+; LINUX-X32-DAG: movq {{.*}}, %rsi
+; LINUX-X32-DAG: movq {{.*}}, %rdx
+; LINUX-X32-DAG: movq {{.*}}, %rcx
+; LINUX-X32-DAG: movq {{.*}}, %r8
+; LINUX-X32-DAG: movq {{.*}}, %r9
+; LINUX-X32-DAG: movb {{.*}}, %al
+; LINUX-X32: jmpq *{{.*}} # TAILCALL
+
; WINDOWS-LABEL: f_thunk:
; WINDOWS-NOT: mov{{.}}ps
; WINDOWS-DAG: movq %rdx, {{.*}}
@@ -92,6 +127,10 @@ define void @g_thunk(i8* %fptr_i8, ...) {
; LINUX-NOT: movq
; LINUX: jmpq *%rdi # TAILCALL
+; LINUX-X32-LABEL: g_thunk:
+; LINUX-X32-DAG: movl %edi, %[[REG:e[abcd]x|ebp|esi|edi|r8|r9|r1[0-5]]]
+; LINUX-X32-DAG: jmpq *%[[REG]] # TAILCALL
+
; WINDOWS-LABEL: g_thunk:
; WINDOWS-NOT: movq
; WINDOWS: jmpq *%rcx # TAILCALL
@@ -130,6 +169,10 @@ else:
; LINUX: jne
; LINUX: jmpq *{{.*}} # TAILCALL
; LINUX: jmpq *{{.*}} # TAILCALL
+; LINUX-X32-LABEL: h_thunk:
+; LINUX-X32: jne
+; LINUX-X32: jmpq *{{.*}} # TAILCALL
+; LINUX-X32: jmpq *{{.*}} # TAILCALL
; WINDOWS-LABEL: h_thunk:
; WINDOWS: jne
; WINDOWS: jmpq *{{.*}} # TAILCALL
diff --git a/test/CodeGen/X86/nontemporal-2.ll b/test/CodeGen/X86/nontemporal-2.ll
index 8c08b3c163c0..c9767f88488c 100644
--- a/test/CodeGen/X86/nontemporal-2.ll
+++ b/test/CodeGen/X86/nontemporal-2.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
; Make sure that we generate non-temporal stores for the test cases below.
; We use xorps for zeroing, so domain information isn't available anymore.
@@ -300,4 +300,19 @@ define void @test_op_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8>* %dst) {
ret void
}
+; 256-bit NT stores require 256-bit alignment.
+; FIXME: For AVX, we could lower this to 2x movntps %xmm. Taken further, we
+; could even scalarize to movnti when we have 1-alignment: nontemporal is
+; probably always worth even some 20 instruction scalarization.
+define void @test_unaligned_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %dst) {
+; CHECK-LABEL: test_unaligned_v8f32:
+; SSE: movntps %xmm
+; SSE: movntps %xmm
+; AVX-NOT: movnt
+; AVX: vmovups %ymm
+ %r = fadd <8 x float> %a, %b
+ store <8 x float> %r, <8 x float>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
!1 = !{i32 1}
diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll
index f9385df36421..9a2f23596f79 100644
--- a/test/CodeGen/X86/nontemporal.ll
+++ b/test/CodeGen/X86/nontemporal.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple x86_64-unknown-unknown | FileCheck %s
-define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
+define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E, i64 %F) {
; CHECK: movntps
%cast = bitcast i8* %B to <4 x float>*
%A2 = fadd <4 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
@@ -13,9 +13,12 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
%cast2 = bitcast i8* %B to <2 x double>*
%C2 = fadd <2 x double> %C, <double 0x0, double 0x4200000000000000>
store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0
-; CHECK: movnti
+; CHECK: movntil
%cast3 = bitcast i8* %B to i32*
- store i32 %D, i32* %cast3, align 16, !nontemporal !0
+ store i32 %D, i32* %cast3, align 1, !nontemporal !0
+; CHECK: movntiq
+ %cast4 = bitcast i8* %B to i64*
+ store i64 %F, i64* %cast4, align 1, !nontemporal !0
ret void
}
diff --git a/test/CodeGen/X86/null-streamer.ll b/test/CodeGen/X86/null-streamer.ll
index 3f5abfd40f29..e80f3fcbe58d 100644
--- a/test/CodeGen/X86/null-streamer.ll
+++ b/test/CodeGen/X86/null-streamer.ll
@@ -14,11 +14,11 @@ define void @f1() {
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!11, !13}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: " ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: " ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
!1 = !DIFile(filename: "file.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !1, type: !6, function: i32 ()* null, variables: !2)
+!4 = distinct !DISubprogram(name: "", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !1, type: !6, variables: !2)
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
!8 = !DIBasicType(tag: DW_TAG_base_type, size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/CodeGen/X86/opt-ext-uses.ll b/test/CodeGen/X86/opt-ext-uses.ll
index 5d05ad9c4544..39e6fd0e6a59 100644
--- a/test/CodeGen/X86/opt-ext-uses.ll
+++ b/test/CodeGen/X86/opt-ext-uses.ll
@@ -1,4 +1,10 @@
-; RUN: llc < %s -march=x86 | grep movw | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; This test should get one and only one register to register mov.
+; CHECK-LABEL: t:
+; CHECK: movw
+; CHECK-NOT: movw
+; CHECK: ret
define signext i16 @t() {
entry:
diff --git a/test/CodeGen/X86/or-branch.ll b/test/CodeGen/X86/or-branch.ll
index 9db948adb465..4899a0fc7e88 100644
--- a/test/CodeGen/X86/or-branch.ll
+++ b/test/CodeGen/X86/or-branch.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i386-unknown-unknown -jump-is-expensive=0 | FileCheck %s --check-prefix=JUMP2
-; RUN: llc < %s -mtriple=i386-unknown-unknown -jump-is-expensive=1 | FileCheck %s --check-prefix=JUMP1
+; RUN: llc < %s -mtriple=i386-unknown-unknown -jump-is-expensive=0 | FileCheck %s --check-prefix=JUMP2 --check-prefix=CHECK
+; RUN: llc < %s -mtriple=i386-unknown-unknown -jump-is-expensive=1 | FileCheck %s --check-prefix=JUMP1 --check-prefix=CHECK
define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
; JUMP2-LABEL: foo:
@@ -25,4 +25,30 @@ UnifiedReturnBlock:
ret void
}
+; If the branch is unpredictable, don't add another branch
+; regardless of whether they are expensive or not.
+
+define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind {
+; CHECK-LABEL: unpredictable:
+; CHECK-DAG: sete
+; CHECK-DAG: setl
+; CHECK: orb
+; CHECK: jne
+entry:
+ %tmp1 = icmp eq i32 %X, 0
+ %tmp3 = icmp slt i32 %Y, 5
+ %tmp4 = or i1 %tmp3, %tmp1
+ br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock, !unpredictable !0
+
+cond_true:
+ %tmp5 = tail call i32 (...) @bar( )
+ ret void
+
+UnifiedReturnBlock:
+ ret void
+}
+
declare i32 @bar(...)
+
+!0 = !{}
+
diff --git a/test/CodeGen/X86/or-lea.ll b/test/CodeGen/X86/or-lea.ll
new file mode 100644
index 000000000000..f45a639ffa2c
--- /dev/null
+++ b/test/CodeGen/X86/or-lea.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; InstCombine and DAGCombiner transform an 'add' into an 'or'
+; if there are no common bits from the incoming operands.
+; LEA instruction selection should be able to see through that
+; transform and reduce add/shift/or instruction counts.
+
+define i32 @or_shift1_and1(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift1_and1:
+; CHECK: # BB#0:
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: leal (%rsi,%rdi,2), %eax
+; CHECK-NEXT: retq
+
+ %shl = shl i32 %x, 1
+ %and = and i32 %y, 1
+ %or = or i32 %and, %shl
+ ret i32 %or
+}
+
+define i32 @or_shift1_and1_swapped(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift1_and1_swapped:
+; CHECK: # BB#0:
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: leal (%rsi,%rdi,2), %eax
+; CHECK-NEXT: retq
+
+ %shl = shl i32 %x, 1
+ %and = and i32 %y, 1
+ %or = or i32 %shl, %and
+ ret i32 %or
+}
+
+define i32 @or_shift2_and1(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift2_and1:
+; CHECK: # BB#0:
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: leal (%rsi,%rdi,4), %eax
+; CHECK-NEXT: retq
+
+ %shl = shl i32 %x, 2
+ %and = and i32 %y, 1
+ %or = or i32 %shl, %and
+ ret i32 %or
+}
+
+define i32 @or_shift3_and1(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift3_and1:
+; CHECK: # BB#0:
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: leal (%rsi,%rdi,8), %eax
+; CHECK-NEXT: retq
+
+ %shl = shl i32 %x, 3
+ %and = and i32 %y, 1
+ %or = or i32 %shl, %and
+ ret i32 %or
+}
+
+define i32 @or_shift3_and7(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift3_and7:
+; CHECK: # BB#0:
+; CHECK-NEXT: andl $7, %esi
+; CHECK-NEXT: leal (%rsi,%rdi,8), %eax
+; CHECK-NEXT: retq
+
+ %shl = shl i32 %x, 3
+ %and = and i32 %y, 7
+ %or = or i32 %shl, %and
+ ret i32 %or
+}
+
+; The shift is too big for an LEA.
+
+define i32 @or_shift4_and1(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift4_and1:
+; CHECK: # BB#0:
+; CHECK-NEXT: shll $4, %edi
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: leal (%rsi,%rdi), %eax
+; CHECK-NEXT: retq
+
+ %shl = shl i32 %x, 4
+ %and = and i32 %y, 1
+ %or = or i32 %shl, %and
+ ret i32 %or
+}
+
+; The mask is too big for the shift, so the 'or' isn't equivalent to an 'add'.
+
+define i32 @or_shift3_and8(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift3_and8:
+; CHECK: # BB#0:
+; CHECK-NEXT: leal (,%rdi,8), %eax
+; CHECK-NEXT: andl $8, %esi
+; CHECK-NEXT: orl %esi, %eax
+; CHECK-NEXT: retq
+
+ %shl = shl i32 %x, 3
+ %and = and i32 %y, 8
+ %or = or i32 %shl, %and
+ ret i32 %or
+}
+
+; 64-bit operands should work too.
+
+define i64 @or_shift1_and1_64(i64 %x, i64 %y) {
+; CHECK-LABEL: or_shift1_and1_64:
+; CHECK: # BB#0:
+; CHECK-NEXT: andl $1, %esi
+; CHECK-NEXT: leaq (%rsi,%rdi,2), %rax
+; CHECK-NEXT: retq
+
+ %shl = shl i64 %x, 1
+ %and = and i64 %y, 1
+ %or = or i64 %and, %shl
+ ret i64 %or
+}
+
diff --git a/test/CodeGen/X86/palignr.ll b/test/CodeGen/X86/palignr.ll
index dfa2cedf45a2..d75506cadfa2 100644
--- a/test/CodeGen/X86/palignr.ll
+++ b/test/CodeGen/X86/palignr.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=x86 -mcpu=core2 -mattr=+ssse3 | FileCheck %s
; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=CHECK-YONAH %s
diff --git a/test/CodeGen/X86/patchpoint-verifiable.mir b/test/CodeGen/X86/patchpoint-verifiable.mir
new file mode 100644
index 000000000000..300ecaf002f2
--- /dev/null
+++ b/test/CodeGen/X86/patchpoint-verifiable.mir
@@ -0,0 +1,42 @@
+# RUN: llc -mtriple=x86_64-apple-darwin -stop-after branch-folder -start-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test verifies that the machine verifier won't report an error when
+# verifying the PATCHPOINT instruction.
+
+--- |
+
+ define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+ entry:
+ %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 5, i8* null, i32 2, i64 %p1, i64 %p2)
+ ret void
+ }
+
+ declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
+
+...
+---
+name: small_patchpoint_codegen
+tracksRegLiveness: true
+liveins:
+ - { reg: '%rdi' }
+ - { reg: '%rsi' }
+frameInfo:
+ hasPatchPoint: true
+ stackSize: 8
+ adjustsStack: true
+ hasCalls: true
+fixedStack:
+ - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16 }
+body: |
+ bb.0.entry:
+ liveins: %rdi, %rsi, %rbp
+
+ frame-setup PUSH64r killed %rbp, implicit-def %rsp, implicit %rsp
+ CFI_INSTRUCTION .cfi_def_cfa_offset 16
+ CFI_INSTRUCTION .cfi_offset %rbp, -16
+ %rbp = frame-setup MOV64rr %rsp
+ CFI_INSTRUCTION .cfi_def_cfa_register %rbp
+ ; CHECK: PATCHPOINT 5, 5, 0, 2, 0, %rdi, %rsi, csr_64, implicit-def dead early-clobber %r11, implicit-def %rsp, implicit-def dead %rax
+ PATCHPOINT 5, 5, 0, 2, 0, %rdi, %rsi, csr_64, implicit-def dead early-clobber %r11, implicit-def %rsp, implicit-def dead %rax
+ %rbp = POP64r implicit-def %rsp, implicit %rsp
+ RETQ
+...
diff --git a/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
new file mode 100644
index 000000000000..bf457814079c
--- /dev/null
+++ b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
@@ -0,0 +1,190 @@
+; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s
+
+; The peephole optimizer can elide some physical register copies such as
+; EFLAGS. Make sure the flags are used directly, instead of needlessly using
+; lahf, when possible.
+
+@L = external global i32
+@M = external global i8
+declare i32 @bar(i64)
+
+; CHECK-LABEL: plus_one
+; CHECK-NOT: seto
+; CHECK-NOT: lahf
+; CHECK-NOT: sahf
+; CHECK-NOT: pushf
+; CHECK-NOT: popf
+; CHECK: incl L
+define i1 @plus_one() {
+entry:
+ %loaded_L = load i32, i32* @L
+ %val = add nsw i32 %loaded_L, 1 ; N.B. will emit inc.
+ store i32 %val, i32* @L
+ %loaded_M = load i8, i8* @M
+ %masked = and i8 %loaded_M, 8
+ %M_is_true = icmp ne i8 %masked, 0
+ %L_is_false = icmp eq i32 %val, 0
+ %cond = and i1 %L_is_false, %M_is_true
+ br i1 %cond, label %exit2, label %exit
+
+exit:
+ ret i1 true
+
+exit2:
+ ret i1 false
+}
+
+; CHECK-LABEL: plus_forty_two
+; CHECK-NOT: seto
+; CHECK-NOT: lahf
+; CHECK-NOT: sahf
+; CHECK-NOT: pushf
+; CHECK-NOT: popf
+; CHECK: addl $42,
+define i1 @plus_forty_two() {
+entry:
+ %loaded_L = load i32, i32* @L
+ %val = add nsw i32 %loaded_L, 42 ; N.B. won't emit inc.
+ store i32 %val, i32* @L
+ %loaded_M = load i8, i8* @M
+ %masked = and i8 %loaded_M, 8
+ %M_is_true = icmp ne i8 %masked, 0
+ %L_is_false = icmp eq i32 %val, 0
+ %cond = and i1 %L_is_false, %M_is_true
+ br i1 %cond, label %exit2, label %exit
+
+exit:
+ ret i1 true
+
+exit2:
+ ret i1 false
+}
+
+; CHECK-LABEL: minus_one
+; CHECK-NOT: seto
+; CHECK-NOT: lahf
+; CHECK-NOT: sahf
+; CHECK-NOT: pushf
+; CHECK-NOT: popf
+; CHECK: decl L
+define i1 @minus_one() {
+entry:
+ %loaded_L = load i32, i32* @L
+ %val = add nsw i32 %loaded_L, -1 ; N.B. will emit dec.
+ store i32 %val, i32* @L
+ %loaded_M = load i8, i8* @M
+ %masked = and i8 %loaded_M, 8
+ %M_is_true = icmp ne i8 %masked, 0
+ %L_is_false = icmp eq i32 %val, 0
+ %cond = and i1 %L_is_false, %M_is_true
+ br i1 %cond, label %exit2, label %exit
+
+exit:
+ ret i1 true
+
+exit2:
+ ret i1 false
+}
+
+; CHECK-LABEL: minus_forty_two
+; CHECK-NOT: seto
+; CHECK-NOT: lahf
+; CHECK-NOT: sahf
+; CHECK-NOT: pushf
+; CHECK-NOT: popf
+; CHECK: addl $-42,
+define i1 @minus_forty_two() {
+entry:
+ %loaded_L = load i32, i32* @L
+ %val = add nsw i32 %loaded_L, -42 ; N.B. won't emit dec.
+ store i32 %val, i32* @L
+ %loaded_M = load i8, i8* @M
+ %masked = and i8 %loaded_M, 8
+ %M_is_true = icmp ne i8 %masked, 0
+ %L_is_false = icmp eq i32 %val, 0
+ %cond = and i1 %L_is_false, %M_is_true
+ br i1 %cond, label %exit2, label %exit
+
+exit:
+ ret i1 true
+
+exit2:
+ ret i1 false
+}
+
+; CHECK-LABEL: test_intervening_call:
+; CHECK: cmpxchg
+; CHECK: seto %al
+; CHECK-NEXT: lahf
+; CHECK: call{{[lq]}} bar
+; CHECK: addb $127, %al
+; CHECK-NEXT: sahf
+define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
+ ; cmpxchg sets EFLAGS, call clobbers it, then br uses EFLAGS.
+ %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
+ %v = extractvalue { i64, i1 } %cx, 0
+ %p = extractvalue { i64, i1 } %cx, 1
+ call i32 @bar(i64 %v)
+ br i1 %p, label %t, label %f
+
+t:
+ ret i64 42
+
+f:
+ ret i64 0
+}
+
+; CHECK-LABEL: test_two_live_flags:
+; CHECK: cmpxchg
+; CHECK: seto %al
+; CHECK-NEXT: lahf
+; Save result of the first cmpxchg into D.
+; CHECK-NEXT: mov{{[lq]}} %[[AX:[er]ax]], %[[D:[re]d[xi]]]
+; CHECK: cmpxchg
+; CHECK-NEXT: sete %al
+; Save result of the second cmpxchg onto the stack.
+; CHECK-NEXT: push{{[lq]}} %[[AX]]
+; Restore result of the first cmpxchg from D, put it back in EFLAGS.
+; CHECK-NEXT: mov{{[lq]}} %[[D]], %[[AX]]
+; CHECK-NEXT: addb $127, %al
+; CHECK-NEXT: sahf
+; Restore result of the second cmpxchg from the stack.
+; CHECK-NEXT: pop{{[lq]}} %[[AX]]
+; Test from EFLAGS restored from first cmpxchg, jump if that fails.
+; CHECK-NEXT: jne
+; Fallthrough to test the second cmpxchg's result.
+; CHECK: testb %al, %al
+; CHECK-NEXT: je
+define i64 @test_two_live_flags(
+ i64* %foo0, i64 %bar0, i64 %baz0,
+ i64* %foo1, i64 %bar1, i64 %baz1) {
+ %cx0 = cmpxchg i64* %foo0, i64 %bar0, i64 %baz0 seq_cst seq_cst
+ %p0 = extractvalue { i64, i1 } %cx0, 1
+ %cx1 = cmpxchg i64* %foo1, i64 %bar1, i64 %baz1 seq_cst seq_cst
+ %p1 = extractvalue { i64, i1 } %cx1, 1
+ %flag = and i1 %p0, %p1
+ br i1 %flag, label %t, label %f
+
+t:
+ ret i64 42
+
+f:
+ ret i64 0
+}
+
+; CHECK-LABEL: asm_clobbering_flags:
+; CHECK: test
+; CHECK-NEXT: setg
+; CHECK-NEXT: #APP
+; CHECK-NEXT: bsfl
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: movl
+; CHECK-NEXT: ret
+define i1 @asm_clobbering_flags(i32* %mem) {
+ %val = load i32, i32* %mem, align 4
+ %cmp = icmp sgt i32 %val, 0
+ %res = tail call i32 asm "bsfl $1,$0", "=r,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %val)
+ store i32 %res, i32* %mem, align 4
+ ret i1 %cmp
+}
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index dbe5bd646c7f..37b6fdf7cfeb 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
define <16 x i8> @mul8c(<16 x i8> %i) nounwind {
; SSE2-LABEL: mul8c:
@@ -34,16 +35,34 @@ define <16 x i8> @mul8c(<16 x i8> %i) nounwind {
; SSE41-NEXT: packuswb %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
+;
+; AVX2-LABEL: mul8c:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm1
+; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
entry:
%A = mul <16 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
ret <16 x i8> %A
}
define <8 x i16> @mul16c(<8 x i16> %i) nounwind {
-; ALL-LABEL: mul16c:
-; ALL: # BB#0: # %entry
-; ALL-NEXT: pmullw {{.*}}(%rip), %xmm0
-; ALL-NEXT: retq
+; SSE-LABEL: mul16c:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: mul16c:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: retq
entry:
%A = mul <8 x i16> %i, < i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117 >
ret <8 x i16> %A
@@ -65,22 +84,38 @@ define <4 x i32> @a(<4 x i32> %i) nounwind {
; SSE41: # BB#0: # %entry
; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
; SSE41-NEXT: retq
+;
+; AVX2-LABEL: a:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
entry:
%A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
ret <4 x i32> %A
}
define <2 x i64> @b(<2 x i64> %i) nounwind {
-; ALL-LABEL: b:
-; ALL: # BB#0: # %entry
-; ALL-NEXT: movdqa {{.*#+}} xmm1 = [117,117]
-; ALL-NEXT: movdqa %xmm0, %xmm2
-; ALL-NEXT: pmuludq %xmm1, %xmm2
-; ALL-NEXT: psrlq $32, %xmm0
-; ALL-NEXT: pmuludq %xmm1, %xmm0
-; ALL-NEXT: psllq $32, %xmm0
-; ALL-NEXT: paddq %xmm2, %xmm0
-; ALL-NEXT: retq
+; SSE-LABEL: b:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: movdqa {{.*#+}} xmm1 = [117,117]
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: pmuludq %xmm1, %xmm2
+; SSE-NEXT: psrlq $32, %xmm0
+; SSE-NEXT: pmuludq %xmm1, %xmm0
+; SSE-NEXT: psllq $32, %xmm0
+; SSE-NEXT: paddq %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: b:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [117,117]
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
+; AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
+; AVX2-NEXT: retq
entry:
%A = mul <2 x i64> %i, < i64 117, i64 117 >
ret <2 x i64> %A
@@ -123,16 +158,34 @@ define <16 x i8> @mul8(<16 x i8> %i, <16 x i8> %j) nounwind {
; SSE41-NEXT: packuswb %xmm0, %xmm2
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
+;
+; AVX2-LABEL: mul8:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1
+; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
entry:
%A = mul <16 x i8> %i, %j
ret <16 x i8> %A
}
define <8 x i16> @mul16(<8 x i16> %i, <8 x i16> %j) nounwind {
-; ALL-LABEL: mul16:
-; ALL: # BB#0: # %entry
-; ALL-NEXT: pmullw %xmm1, %xmm0
-; ALL-NEXT: retq
+; SSE-LABEL: mul16:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmullw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: mul16:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
entry:
%A = mul <8 x i16> %i, %j
ret <8 x i16> %A
@@ -154,26 +207,44 @@ define <4 x i32> @c(<4 x i32> %i, <4 x i32> %j) nounwind {
; SSE41: # BB#0: # %entry
; SSE41-NEXT: pmulld %xmm1, %xmm0
; SSE41-NEXT: retq
+;
+; AVX2-LABEL: c:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
entry:
%A = mul <4 x i32> %i, %j
ret <4 x i32> %A
}
define <2 x i64> @d(<2 x i64> %i, <2 x i64> %j) nounwind {
-; ALL-LABEL: d:
-; ALL: # BB#0: # %entry
-; ALL-NEXT: movdqa %xmm0, %xmm2
-; ALL-NEXT: pmuludq %xmm1, %xmm2
-; ALL-NEXT: movdqa %xmm1, %xmm3
-; ALL-NEXT: psrlq $32, %xmm3
-; ALL-NEXT: pmuludq %xmm0, %xmm3
-; ALL-NEXT: psllq $32, %xmm3
-; ALL-NEXT: paddq %xmm3, %xmm2
-; ALL-NEXT: psrlq $32, %xmm0
-; ALL-NEXT: pmuludq %xmm1, %xmm0
-; ALL-NEXT: psllq $32, %xmm0
-; ALL-NEXT: paddq %xmm2, %xmm0
-; ALL-NEXT: retq
+; SSE-LABEL: d:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: pmuludq %xmm1, %xmm2
+; SSE-NEXT: movdqa %xmm1, %xmm3
+; SSE-NEXT: psrlq $32, %xmm3
+; SSE-NEXT: pmuludq %xmm0, %xmm3
+; SSE-NEXT: psllq $32, %xmm3
+; SSE-NEXT: paddq %xmm3, %xmm2
+; SSE-NEXT: psrlq $32, %xmm0
+; SSE-NEXT: pmuludq %xmm1, %xmm0
+; SSE-NEXT: psllq $32, %xmm0
+; SSE-NEXT: paddq %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: d:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpsllq $32, %xmm3, %xmm3
+; AVX2-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
+; AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
+; AVX2-NEXT: retq
entry:
%A = mul <2 x i64> %i, %j
ret <2 x i64> %A
@@ -210,6 +281,17 @@ define <4 x i32> @e(<4 x i32> %i, <4 x i32> %j) nounwind {
; SSE41-NEXT: pmulld {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
; SSE41-NEXT: addq $40, %rsp
; SSE41-NEXT: retq
+;
+; AVX2-LABEL: e:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: subq $40, %rsp
+; AVX2-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX2-NEXT: callq foo
+; AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX2-NEXT: vpmulld {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX2-NEXT: addq $40, %rsp
+; AVX2-NEXT: retq
entry:
; Use a call to force spills.
call void @foo()
@@ -218,27 +300,47 @@ entry:
}
define <2 x i64> @f(<2 x i64> %i, <2 x i64> %j) nounwind {
-; ALL-LABEL: f:
-; ALL: # BB#0: # %entry
-; ALL-NEXT: subq $40, %rsp
-; ALL-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
-; ALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
-; ALL-NEXT: callq foo
-; ALL-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
-; ALL-NEXT: movdqa %xmm0, %xmm2
-; ALL-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload
-; ALL-NEXT: pmuludq %xmm3, %xmm2
-; ALL-NEXT: movdqa %xmm3, %xmm1
-; ALL-NEXT: psrlq $32, %xmm1
-; ALL-NEXT: pmuludq %xmm0, %xmm1
-; ALL-NEXT: psllq $32, %xmm1
-; ALL-NEXT: paddq %xmm1, %xmm2
-; ALL-NEXT: psrlq $32, %xmm0
-; ALL-NEXT: pmuludq %xmm3, %xmm0
-; ALL-NEXT: psllq $32, %xmm0
-; ALL-NEXT: paddq %xmm2, %xmm0
-; ALL-NEXT: addq $40, %rsp
-; ALL-NEXT: retq
+; SSE-LABEL: f:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: subq $40, %rsp
+; SSE-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT: callq foo
+; SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload
+; SSE-NEXT: pmuludq %xmm3, %xmm2
+; SSE-NEXT: movdqa %xmm3, %xmm1
+; SSE-NEXT: psrlq $32, %xmm1
+; SSE-NEXT: pmuludq %xmm0, %xmm1
+; SSE-NEXT: psllq $32, %xmm1
+; SSE-NEXT: paddq %xmm1, %xmm2
+; SSE-NEXT: psrlq $32, %xmm0
+; SSE-NEXT: pmuludq %xmm3, %xmm0
+; SSE-NEXT: psllq $32, %xmm0
+; SSE-NEXT: paddq %xmm2, %xmm0
+; SSE-NEXT: addq $40, %rsp
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: f:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: subq $40, %rsp
+; AVX2-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX2-NEXT: callq foo
+; AVX2-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
+; AVX2-NEXT: vmovdqa (%rsp), %xmm3 # 16-byte Reload
+; AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm0
+; AVX2-NEXT: vpsrlq $32, %xmm2, %xmm1
+; AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
+; AVX2-NEXT: vpsllq $32, %xmm1, %xmm1
+; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlq $32, %xmm3, %xmm1
+; AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsllq $32, %xmm1, %xmm1
+; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: addq $40, %rsp
+; AVX2-NEXT: retq
entry:
; Use a call to force spills.
call void @foo()
@@ -247,31 +349,76 @@ entry:
}
define <4 x i64> @b1(<4 x i64> %i) nounwind {
-; AVX2-LABEL: @b1
-; AVX2: vpbroadcastq
-; AVX2-NEXT: vpmuludq
-; AVX2-NEXT: vpsrlq $32
-; AVX2-NEXT: vpmuludq
-; AVX2-NEXT: vpsllq $32
-; AVX2-NEXT: vpaddq
-; AVX2-NEXT: retq
+; SSE-LABEL: b1:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [117,117]
+; SSE-NEXT: movdqa %xmm0, %xmm3
+; SSE-NEXT: pmuludq %xmm2, %xmm3
+; SSE-NEXT: psrlq $32, %xmm0
+; SSE-NEXT: pmuludq %xmm2, %xmm0
+; SSE-NEXT: psllq $32, %xmm0
+; SSE-NEXT: paddq %xmm3, %xmm0
+; SSE-NEXT: movdqa %xmm1, %xmm3
+; SSE-NEXT: pmuludq %xmm2, %xmm3
+; SSE-NEXT: psrlq $32, %xmm1
+; SSE-NEXT: pmuludq %xmm2, %xmm1
+; SSE-NEXT: psllq $32, %xmm1
+; SSE-NEXT: paddq %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: b1:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
+; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: retq
entry:
%A = mul <4 x i64> %i, < i64 117, i64 117, i64 117, i64 117 >
ret <4 x i64> %A
}
define <4 x i64> @b2(<4 x i64> %i, <4 x i64> %j) nounwind {
-; AVX2-LABEL: @b2
-; AVX2: vpmuludq
-; AVX2-NEXT: vpsrlq $32
-; AVX2-NEXT: vpmuludq
-; AVX2-NEXT: vpsllq $32
-; AVX2-NEXT: vpaddq
-; AVX2-NEXT: vpsrlq $32
-; AVX2-NEXT: vpmuludq
-; AVX2-NEXT: vpsllq $32
-; AVX2-NEXT: vpaddq
-; AVX2-NEXT: retq
+; SSE-LABEL: b2:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: movdqa %xmm0, %xmm4
+; SSE-NEXT: pmuludq %xmm2, %xmm4
+; SSE-NEXT: movdqa %xmm2, %xmm5
+; SSE-NEXT: psrlq $32, %xmm5
+; SSE-NEXT: pmuludq %xmm0, %xmm5
+; SSE-NEXT: psllq $32, %xmm5
+; SSE-NEXT: paddq %xmm5, %xmm4
+; SSE-NEXT: psrlq $32, %xmm0
+; SSE-NEXT: pmuludq %xmm2, %xmm0
+; SSE-NEXT: psllq $32, %xmm0
+; SSE-NEXT: paddq %xmm4, %xmm0
+; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: pmuludq %xmm3, %xmm2
+; SSE-NEXT: movdqa %xmm3, %xmm4
+; SSE-NEXT: psrlq $32, %xmm4
+; SSE-NEXT: pmuludq %xmm1, %xmm4
+; SSE-NEXT: psllq $32, %xmm4
+; SSE-NEXT: paddq %xmm4, %xmm2
+; SSE-NEXT: psrlq $32, %xmm1
+; SSE-NEXT: pmuludq %xmm3, %xmm1
+; SSE-NEXT: psllq $32, %xmm1
+; SSE-NEXT: paddq %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX2-LABEL: b2:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpsrlq $32, %ymm1, %ymm3
+; AVX2-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX2-NEXT: vpsllq $32, %ymm3, %ymm3
+; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: retq
entry:
%A = mul <4 x i64> %i, %j
ret <4 x i64> %A
diff --git a/test/CodeGen/X86/pop-stack-cleanup.ll b/test/CodeGen/X86/pop-stack-cleanup.ll
new file mode 100644
index 000000000000..bcf7594065f3
--- /dev/null
+++ b/test/CodeGen/X86/pop-stack-cleanup.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=LINUX64
+
+declare void @param1(i32 %a)
+declare i32 @param2_ret(i32 %a, i32 %b)
+declare i64 @param2_ret64(i32 %a, i32 %b)
+declare void @param2(i32 %a, i32 %b)
+declare void @param3(i32 %a, i32 %b, i32 %c)
+declare void @param8(i64, i64, i64, i64, i64, i64, i64, i64)
+
+
+define void @test() minsize nounwind {
+; CHECK-LABEL: test:
+; CHECK: calll _param1
+; CHECK-NEXT: popl %eax
+; CHECK: calll _param2
+; CHECK-NEXT: popl %eax
+; CHECK-NEXT: popl %ecx
+; CHECK: calll _param2_ret
+; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: popl %edx
+; CHECK-NEXT: pushl %eax
+; CHECK: calll _param3
+; CHECK-NEXT: addl $12, %esp
+; CHECK: calll _param2_ret64
+; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: popl %ecx
+ call void @param1(i32 1)
+ call void @param2(i32 1, i32 2)
+ %ret = call i32 @param2_ret(i32 1, i32 2)
+ call void @param3(i32 1, i32 2, i32 %ret)
+ %ret64 = call i64 @param2_ret64(i32 1, i32 2)
+ ret void
+}
+
+define void @negative(i32 %k) {
+; CHECK-LABEL: negative:
+; CHECK: calll _param1
+; CHECK-NEXT: addl $4, %esp
+; CHECK: calll _param2
+; CHECK-NEXT: addl $8, %esp
+; CHECK: calll _param3
+; CHECK-NEXT: movl %ebp, %esp
+ %v = alloca i32, i32 %k
+ call void @param1(i32 1)
+ call void @param2(i32 1, i32 2)
+ call void @param3(i32 1, i32 2, i32 3)
+ ret void
+}
+
+define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize nounwind {
+; CHECK-LABEL: spill:
+; CHECK-DAG: movl %ecx,
+; CHECK-DAG: movl %edx,
+; CHECK: calll _param2_ret
+; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: popl %edx
+; CHECK-DAG: movl {{.*}}, %ecx
+; CHECK-DAG: movl {{.*}}, %edx
+; CHECK: calll _spill
+ %i = call i32 @param2_ret(i32 1, i32 2)
+ call void @spill(i32 %a, i32 %b, i32 %c)
+ ret void
+}
+
+define void @test_linux64(i32 %size) minsize nounwind {
+; LINUX64-LABEL: test_linux64:
+; LINUX64: pushq %rbp
+; LINUX64: callq param8
+; LINUX64-NEXT: popq %rax
+; LINUX64-NEXT: popq %rcx
+
+ %a = alloca i64, i32 %size, align 8
+ call void @param8(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8)
+ ret void
+}
diff --git a/test/CodeGen/X86/powi.ll b/test/CodeGen/X86/powi.ll
index c3d68312ce15..17d3e3e7d33c 100644
--- a/test/CodeGen/X86/powi.ll
+++ b/test/CodeGen/X86/powi.ll
@@ -1,10 +1,38 @@
-; RUN: llc %s -march=x86 -mcpu=yonah -o - | grep mulsd | count 6
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
; Ideally this would compile to 5 multiplies.
-define double @_Z3f10d(double %a) nounwind readonly ssp noredzone {
-entry:
- %0 = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
- ret double %0
+define double @pow_wrapper(double %a) nounwind readonly ssp noredzone {
+; CHECK-LABEL: pow_wrapper:
+; CHECK: # BB#0:
+; CHECK-NEXT: movapd %xmm0, %xmm1
+; CHECK-NEXT: mulsd %xmm1, %xmm1
+; CHECK-NEXT: mulsd %xmm1, %xmm0
+; CHECK-NEXT: mulsd %xmm1, %xmm1
+; CHECK-NEXT: mulsd %xmm1, %xmm0
+; CHECK-NEXT: mulsd %xmm1, %xmm1
+; CHECK-NEXT: mulsd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
+ ret double %ret
+}
+
+define double @pow_wrapper_optsize(double %a) optsize {
+; CHECK-LABEL: pow_wrapper_optsize:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $15, %edi
+; CHECK-NEXT: jmp
+ %ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
+ ret double %ret
+}
+
+define double @pow_wrapper_minsize(double %a) minsize {
+; CHECK-LABEL: pow_wrapper_minsize:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $128, %edi
+; CHECK-NEXT: jmp
+ %ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ; <double> [#uses=1]
+ ret double %ret
}
declare double @llvm.powi.f64(double, i32) nounwind readonly
diff --git a/test/CodeGen/X86/pr11415.ll b/test/CodeGen/X86/pr11415.ll
index 6c32a2206a7e..73c497014116 100644
--- a/test/CodeGen/X86/pr11415.ll
+++ b/test/CodeGen/X86/pr11415.ll
@@ -4,15 +4,17 @@
; defining %0 before it was read. This caused us to omit the
; movq -8(%rsp), %rdx
+; CHECK: pushq %rax
; CHECK: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: movq %rcx, %rax
-; CHECK-NEXT: movq %rax, -8(%rsp)
-; CHECK-NEXT: movq -8(%rsp), %rdx
+; CHECK-NEXT: movq %rax, (%rsp)
+; CHECK-NEXT: movq (%rsp), %rdx
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: movq %rdx, %rax
-; CHECK-NEXT: movq %rdx, -8(%rsp)
+; CHECK-NEXT: movq %rdx, (%rsp)
+; CHECK-NEXT: popq %rcx
; CHECK-NEXT: ret
define i64 @foo() {
diff --git a/test/CodeGen/X86/pr11468.ll b/test/CodeGen/X86/pr11468.ll
index f721df11586b..7a2cc5b1a60d 100644
--- a/test/CodeGen/X86/pr11468.ll
+++ b/test/CodeGen/X86/pr11468.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -stackrealign -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
; PR11468
define void @f(i64 %sz) uwtable {
diff --git a/test/CodeGen/X86/pr11985.ll b/test/CodeGen/X86/pr11985.ll
index fa378502f724..aae00de112d3 100644
--- a/test/CodeGen/X86/pr11985.ll
+++ b/test/CodeGen/X86/pr11985.ll
@@ -1,6 +1,28 @@
-; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=prescott | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=prescott | FileCheck %s --check-prefix=PRESCOTT
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=nehalem | FileCheck %s --check-prefix=NEHALEM
+
+;;; TODO: (1) Some of the loads and stores are certainly unaligned and (2) the first load and first
+;;; store overlap with the second load and second store respectively.
+;;;
+;;; Is either of these sequences ideal?
define float @foo(i8* nocapture %buf, float %a, float %b) nounwind uwtable {
+; PRESCOTT-LABEL: foo:
+; PRESCOTT: # BB#0: # %entry
+; PRESCOTT-NEXT: movq .Ltmp0+14(%rip), %rax
+; PRESCOTT-NEXT: movq %rax, 14(%rdi)
+; PRESCOTT-NEXT: movq .Ltmp0+8(%rip), %rax
+; PRESCOTT-NEXT: movq %rax, 8(%rdi)
+; PRESCOTT-NEXT: movq .Ltmp0(%rip), %rax
+; PRESCOTT-NEXT: movq %rax, (%rdi)
+;
+; NEHALEM-LABEL: foo:
+; NEHALEM: # BB#0: # %entry
+; NEHALEM-NEXT: movq .Ltmp0+14(%rip), %rax
+; NEHALEM-NEXT: movq %rax, 14(%rdi)
+; NEHALEM-NEXT: movups .Ltmp0(%rip), %xmm2
+; NEHALEM-NEXT: movups %xmm2, (%rdi)
+
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %buf, i8* blockaddress(@foo, %out), i64 22, i32 1, i1 false)
br label %out
@@ -8,12 +30,6 @@ entry:
out: ; preds = %entry
%add = fadd float %a, %b
ret float %add
-; CHECK: foo
-; CHECK: movw .L{{.*}}+20(%rip), %{{.*}}
-; CHECK: movl .L{{.*}}+16(%rip), %{{.*}}
-; CHECK: movq .L{{.*}}+8(%rip), %{{.*}}
-; CHECK: movq .L{{.*}}(%rip), %{{.*}}
-; CHECK: ret
}
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/pr13577.ll b/test/CodeGen/X86/pr13577.ll
index a6b721a7a6f1..2228fbbaa53b 100644
--- a/test/CodeGen/X86/pr13577.ll
+++ b/test/CodeGen/X86/pr13577.ll
@@ -6,10 +6,7 @@
; CHECK-NEXT: .long 2139095040
; CHECK-LABEL: foo:
-; CHECK: movq {{.*}}, %rax
-; CHECK: shlq $48, %rax
-; CHECK: sets %al
-; CHECK: testb %al, %al
+; CHECK: testb $-128, -15(%rsp)
; CHECK: flds LCPI0_0(%rip)
; CHECK: flds LCPI0_1(%rip)
; CHECK: fcmovne %st(1), %st(0)
diff --git a/test/CodeGen/X86/pr15267.ll b/test/CodeGen/X86/pr15267.ll
index 95d7deb34170..9fc754aa1128 100644
--- a/test/CodeGen/X86/pr15267.ll
+++ b/test/CodeGen/X86/pr15267.ll
@@ -1,138 +1,146 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx | FileCheck %s
define <4 x i3> @test1(<4 x i3>* %in) nounwind {
+; CHECK-LABEL: test1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movzwl (%rdi), %eax
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shrl $3, %ecx
+; CHECK-NEXT: andl $7, %ecx
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $7, %edx
+; CHECK-NEXT: vmovd %edx, %xmm0
+; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shrl $6, %ecx
+; CHECK-NEXT: andl $7, %ecx
+; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: shrl $9, %eax
+; CHECK-NEXT: andl $7, %eax
+; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; CHECK-NEXT: retq
%ret = load <4 x i3>, <4 x i3>* %in, align 1
ret <4 x i3> %ret
}
-; CHECK-LABEL: test1
-; CHECK: movzwl
-; CHECK: shrl $3
-; CHECK: andl $7
-; CHECK: andl $7
-; CHECK: vmovd
-; CHECK: pinsrd $1
-; CHECK: shrl $6
-; CHECK: andl $7
-; CHECK: pinsrd $2
-; CHECK: shrl $9
-; CHECK: andl $7
-; CHECK: pinsrd $3
-; CHECK: ret
define <4 x i1> @test2(<4 x i1>* %in) nounwind {
+; CHECK-LABEL: test2:
+; CHECK: # BB#0:
+; CHECK-NEXT: movzbl (%rdi), %eax
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shrl %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: vmovd %edx, %xmm0
+; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shrl $2, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: shrl $3, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; CHECK-NEXT: retq
%ret = load <4 x i1>, <4 x i1>* %in, align 1
ret <4 x i1> %ret
}
-; CHECK-LABEL: test2
-; CHECK: movzbl
-; CHECK: shrl
-; CHECK: andl $1
-; CHECK: andl $1
-; CHECK: vmovd
-; CHECK: pinsrd $1
-; CHECK: shrl $2
-; CHECK: andl $1
-; CHECK: pinsrd $2
-; CHECK: shrl $3
-; CHECK: andl $1
-; CHECK: pinsrd $3
-; CHECK: ret
-
define <4 x i64> @test3(<4 x i1>* %in) nounwind {
+; CHECK-LABEL: test3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movzbl (%rdi), %eax
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shlq $62, %rcx
+; CHECK-NEXT: sarq $63, %rcx
+; CHECK-NEXT: movq %rax, %rdx
+; CHECK-NEXT: shlq $63, %rdx
+; CHECK-NEXT: sarq $63, %rdx
+; CHECK-NEXT: vmovd %edx, %xmm0
+; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shlq $61, %rcx
+; CHECK-NEXT: sarq $63, %rcx
+; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: shlq $60, %rax
+; CHECK-NEXT: sarq $63, %rax
+; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%wide.load35 = load <4 x i1>, <4 x i1>* %in, align 1
%sext = sext <4 x i1> %wide.load35 to <4 x i64>
ret <4 x i64> %sext
}
-; CHECK-LABEL: test3
-; CHECK: movzbl
-; CHECK: movq
-; CHECK: shlq
-; CHECK: sarq
-; CHECK: movq
-; CHECK: shlq
-; CHECK: sarq
-; CHECK: vmovd
-; CHECK: vpinsrd
-; CHECK: movq
-; CHECK: shlq
-; CHECK: sarq
-; CHECK: vpinsrd
-; CHECK: shlq
-; CHECK: sarq
-; CHECK: vpinsrd
-; CHECK: vpmovsxdq
-; CHECK: vmovd
-; CHECK: vpinsrd
-; CHECK: vpmovsxdq
-; CHECK: vinsertf128
-; CHECK: ret
-
define <16 x i4> @test4(<16 x i4>* %in) nounwind {
+; CHECK-LABEL: test4:
+; CHECK: # BB#0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shrl $4, %ecx
+; CHECK-NEXT: andl $15, %ecx
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $15, %edx
+; CHECK-NEXT: vmovd %edx, %xmm0
+; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shrl $8, %ecx
+; CHECK-NEXT: andl $15, %ecx
+; CHECK-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shrl $12, %ecx
+; CHECK-NEXT: andl $15, %ecx
+; CHECK-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shrl $16, %ecx
+; CHECK-NEXT: andl $15, %ecx
+; CHECK-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shrl $20, %ecx
+; CHECK-NEXT: andl $15, %ecx
+; CHECK-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shrl $24, %ecx
+; CHECK-NEXT: andl $15, %ecx
+; CHECK-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movl %eax, %ecx
+; CHECK-NEXT: shrl $28, %ecx
+; CHECK-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq $32, %rcx
+; CHECK-NEXT: andl $15, %ecx
+; CHECK-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq $36, %rcx
+; CHECK-NEXT: andl $15, %ecx
+; CHECK-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq $40, %rcx
+; CHECK-NEXT: andl $15, %ecx
+; CHECK-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq $44, %rcx
+; CHECK-NEXT: andl $15, %ecx
+; CHECK-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq $48, %rcx
+; CHECK-NEXT: andl $15, %ecx
+; CHECK-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq $52, %rcx
+; CHECK-NEXT: andl $15, %ecx
+; CHECK-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq $56, %rcx
+; CHECK-NEXT: andl $15, %ecx
+; CHECK-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; CHECK-NEXT: shrq $60, %rax
+; CHECK-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; CHECK-NEXT: retq
%ret = load <16 x i4>, <16 x i4>* %in, align 1
ret <16 x i4> %ret
}
-
-; CHECK-LABEL: test4
-; CHECK: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: andl
-; CHECK-NEXT: movl
-; CHECK-NEXT: andl
-; CHECK-NEXT: vmovd
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: shrq
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: retq
diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll
index 98f951f1b10c..08c393d29d69 100644
--- a/test/CodeGen/X86/pr17631.ll
+++ b/test/CodeGen/X86/pr17631.ll
@@ -30,5 +30,5 @@ define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) {
; CHECK: foo
; CHECK-NOT: vzeroupper
-; CHECK: _ftol2
+; CHECK: {{cvtt|fist}}
; CHECK: ret
diff --git a/test/CodeGen/X86/pr21529.ll b/test/CodeGen/X86/pr21529.ll
deleted file mode 100644
index 655bc844f503..000000000000
--- a/test/CodeGen/X86/pr21529.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc -show-mc-encoding < %s | FileCheck %s
-
-; Test that the direct object emission selects the and variant with 8 bit
-; immediate.
-; We used to get this wrong when using direct object emission, but not when
-; reading assembly.
-
-; CHECK: andq $-32, %rsp # encoding: [0x48,0x83,0xe4,0xe0]
-
-target triple = "x86_64-pc-linux"
-
-define void @f() {
- %foo = alloca i8, align 32
- ret void
-}
diff --git a/test/CodeGen/X86/pr22019.ll b/test/CodeGen/X86/pr22019.ll
index 4cee5d704d3a..cfc53cb6be0b 100644
--- a/test/CodeGen/X86/pr22019.ll
+++ b/test/CodeGen/X86/pr22019.ll
@@ -20,4 +20,4 @@ define void @pselect() {
@var = global i32 0
; CHECK: alias = var
-@alias = alias i32* @var
+@alias = alias i32, i32* @var
diff --git a/test/CodeGen/X86/pr23900.ll b/test/CodeGen/X86/pr23900.ll
deleted file mode 100644
index cbc77161c042..000000000000
--- a/test/CodeGen/X86/pr23900.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc -filetype=obj %s -o %t.o
-; RUN: llvm-nm %t.o | FileCheck %s
-
-; Test that it doesn't crash (and produces an object file).
-; This use to pass a symbol with a null name to code that expected a valid
-; C string.
-
-; CHECK: U __CxxFrameHandler3
-; CHECK: T f
-; CHECK: t f.cleanup
-; CHECK: U g
-; CHECK: U h
-
-
-target triple = "x86_64-pc-windows-msvc18.0.0"
-define void @f(i32 %x) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
- invoke void @h()
- to label %invoke.cont unwind label %lpad
-invoke.cont:
- ret void
-lpad:
- landingpad { i8*, i32 }
- cleanup
- call void @g(i32 %x)
- ret void
-}
-declare void @h()
-declare i32 @__CxxFrameHandler3(...)
-declare void @g(i32 %x)
diff --git a/test/CodeGen/X86/pr24139.ll b/test/CodeGen/X86/pr24139.ll
new file mode 100644
index 000000000000..fbe55abcbf7c
--- /dev/null
+++ b/test/CodeGen/X86/pr24139.ll
@@ -0,0 +1,148 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+
+; Check that we do not get excessive spilling from splitting of constant live ranges.
+
+; CHECK-LABEL: PR24139:
+; CHECK: # 16-byte Spill
+; CHECK-NOT: # 16-byte Spill
+; CHECK: retq
+
+define <2 x double> @PR24139(<2 x double> %arg, <2 x double> %arg1, <2 x double> %arg2) {
+ %tmp = bitcast <2 x double> %arg to <4 x float>
+ %tmp3 = fmul <4 x float> %tmp, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
+ %tmp4 = bitcast <2 x double> %arg to <4 x i32>
+ %tmp5 = and <4 x i32> %tmp4, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %tmp6 = or <4 x i32> %tmp5, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+ %tmp7 = bitcast <4 x i32> %tmp6 to <4 x float>
+ %tmp8 = fadd <4 x float> %tmp3, %tmp7
+ %tmp9 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp8) #2
+ %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64>
+ %tmp11 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp9) #2
+ %tmp12 = fmul <4 x float> %tmp11, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
+ %tmp13 = fsub <4 x float> %tmp, %tmp12
+ %tmp14 = fmul <4 x float> %tmp11, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
+ %tmp15 = fsub <4 x float> %tmp13, %tmp14
+ %tmp16 = fmul <4 x float> %tmp15, %tmp15
+ %tmp17 = fmul <4 x float> %tmp15, %tmp16
+ %tmp18 = fmul <4 x float> %tmp16, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
+ %tmp19 = fadd <4 x float> %tmp18, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
+ %tmp20 = fmul <4 x float> %tmp16, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
+ %tmp21 = fadd <4 x float> %tmp20, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
+ %tmp22 = fmul <4 x float> %tmp16, %tmp19
+ %tmp23 = fadd <4 x float> %tmp22, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
+ %tmp24 = fmul <4 x float> %tmp16, %tmp21
+ %tmp25 = fadd <4 x float> %tmp24, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
+ %tmp26 = fmul <4 x float> %tmp16, %tmp23
+ %tmp27 = fadd <4 x float> %tmp26, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ %tmp28 = fmul <4 x float> %tmp17, %tmp25
+ %tmp29 = fadd <4 x float> %tmp15, %tmp28
+ %tmp30 = and <2 x i64> %tmp10, <i64 4294967297, i64 4294967297>
+ %tmp31 = bitcast <2 x i64> %tmp30 to <4 x i32>
+ %tmp32 = icmp eq <4 x i32> %tmp31, zeroinitializer
+ %tmp33 = sext <4 x i1> %tmp32 to <4 x i32>
+ %tmp34 = bitcast <4 x i32> %tmp33 to <4 x float>
+ %tmp35 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp27, <4 x float> %tmp29, <4 x float> %tmp34) #2
+ %tmp36 = and <2 x i64> %tmp10, <i64 8589934594, i64 8589934594>
+ %tmp37 = bitcast <2 x i64> %tmp36 to <4 x i32>
+ %tmp38 = icmp eq <4 x i32> %tmp37, zeroinitializer
+ %tmp39 = sext <4 x i1> %tmp38 to <4 x i32>
+ %tmp40 = bitcast <4 x float> %tmp35 to <4 x i32>
+ %tmp41 = xor <4 x i32> %tmp40, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %tmp42 = bitcast <4 x i32> %tmp41 to <4 x float>
+ %tmp43 = bitcast <4 x i32> %tmp39 to <4 x float>
+ %tmp44 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp42, <4 x float> %tmp35, <4 x float> %tmp43) #2
+ %tmp45 = bitcast <2 x double> %arg1 to <4 x float>
+ %tmp46 = fmul <4 x float> %tmp45, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
+ %tmp47 = bitcast <2 x double> %arg1 to <4 x i32>
+ %tmp48 = and <4 x i32> %tmp47, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %tmp49 = or <4 x i32> %tmp48, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+ %tmp50 = bitcast <4 x i32> %tmp49 to <4 x float>
+ %tmp51 = fadd <4 x float> %tmp46, %tmp50
+ %tmp52 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp51) #2
+ %tmp53 = bitcast <4 x i32> %tmp52 to <2 x i64>
+ %tmp54 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp52) #2
+ %tmp55 = fmul <4 x float> %tmp54, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
+ %tmp56 = fsub <4 x float> %tmp45, %tmp55
+ %tmp57 = fmul <4 x float> %tmp54, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
+ %tmp58 = fsub <4 x float> %tmp56, %tmp57
+ %tmp59 = fmul <4 x float> %tmp58, %tmp58
+ %tmp60 = fmul <4 x float> %tmp58, %tmp59
+ %tmp61 = fmul <4 x float> %tmp59, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
+ %tmp62 = fadd <4 x float> %tmp61, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
+ %tmp63 = fmul <4 x float> %tmp59, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
+ %tmp64 = fadd <4 x float> %tmp63, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
+ %tmp65 = fmul <4 x float> %tmp59, %tmp62
+ %tmp66 = fadd <4 x float> %tmp65, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
+ %tmp67 = fmul <4 x float> %tmp59, %tmp64
+ %tmp68 = fadd <4 x float> %tmp67, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
+ %tmp69 = fmul <4 x float> %tmp59, %tmp66
+ %tmp70 = fadd <4 x float> %tmp69, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ %tmp71 = fmul <4 x float> %tmp60, %tmp68
+ %tmp72 = fadd <4 x float> %tmp58, %tmp71
+ %tmp73 = and <2 x i64> %tmp53, <i64 4294967297, i64 4294967297>
+ %tmp74 = bitcast <2 x i64> %tmp73 to <4 x i32>
+ %tmp75 = icmp eq <4 x i32> %tmp74, zeroinitializer
+ %tmp76 = sext <4 x i1> %tmp75 to <4 x i32>
+ %tmp77 = bitcast <4 x i32> %tmp76 to <4 x float>
+ %tmp78 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp70, <4 x float> %tmp72, <4 x float> %tmp77) #2
+ %tmp79 = and <2 x i64> %tmp53, <i64 8589934594, i64 8589934594>
+ %tmp80 = bitcast <2 x i64> %tmp79 to <4 x i32>
+ %tmp81 = icmp eq <4 x i32> %tmp80, zeroinitializer
+ %tmp82 = sext <4 x i1> %tmp81 to <4 x i32>
+ %tmp83 = bitcast <4 x float> %tmp78 to <4 x i32>
+ %tmp84 = xor <4 x i32> %tmp83, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %tmp85 = bitcast <4 x i32> %tmp84 to <4 x float>
+ %tmp86 = bitcast <4 x i32> %tmp82 to <4 x float>
+ %tmp87 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp85, <4 x float> %tmp78, <4 x float> %tmp86) #2
+ %tmp88 = fadd <4 x float> %tmp44, %tmp87
+ %tmp89 = bitcast <2 x double> %arg2 to <4 x float>
+ %tmp90 = fmul <4 x float> %tmp89, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
+ %tmp91 = bitcast <2 x double> %arg2 to <4 x i32>
+ %tmp92 = and <4 x i32> %tmp91, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %tmp93 = or <4 x i32> %tmp92, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+ %tmp94 = bitcast <4 x i32> %tmp93 to <4 x float>
+ %tmp95 = fadd <4 x float> %tmp90, %tmp94
+ %tmp96 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp95) #2
+ %tmp97 = bitcast <4 x i32> %tmp96 to <2 x i64>
+ %tmp98 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp96) #2
+ %tmp99 = fmul <4 x float> %tmp98, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
+ %tmp100 = fsub <4 x float> %tmp89, %tmp99
+ %tmp101 = fmul <4 x float> %tmp98, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
+ %tmp102 = fsub <4 x float> %tmp100, %tmp101
+ %tmp103 = fmul <4 x float> %tmp102, %tmp102
+ %tmp104 = fmul <4 x float> %tmp102, %tmp103
+ %tmp105 = fmul <4 x float> %tmp103, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
+ %tmp106 = fadd <4 x float> %tmp105, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
+ %tmp107 = fmul <4 x float> %tmp103, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
+ %tmp108 = fadd <4 x float> %tmp107, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
+ %tmp109 = fmul <4 x float> %tmp103, %tmp106
+ %tmp110 = fadd <4 x float> %tmp109, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
+ %tmp111 = fmul <4 x float> %tmp103, %tmp108
+ %tmp112 = fadd <4 x float> %tmp111, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
+ %tmp113 = fmul <4 x float> %tmp103, %tmp110
+ %tmp114 = fadd <4 x float> %tmp113, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ %tmp115 = fmul <4 x float> %tmp104, %tmp112
+ %tmp116 = fadd <4 x float> %tmp102, %tmp115
+ %tmp117 = and <2 x i64> %tmp97, <i64 4294967297, i64 4294967297>
+ %tmp118 = bitcast <2 x i64> %tmp117 to <4 x i32>
+ %tmp119 = icmp eq <4 x i32> %tmp118, zeroinitializer
+ %tmp120 = sext <4 x i1> %tmp119 to <4 x i32>
+ %tmp121 = bitcast <4 x i32> %tmp120 to <4 x float>
+ %tmp122 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp114, <4 x float> %tmp116, <4 x float> %tmp121) #2
+ %tmp123 = and <2 x i64> %tmp97, <i64 8589934594, i64 8589934594>
+ %tmp124 = bitcast <2 x i64> %tmp123 to <4 x i32>
+ %tmp125 = icmp eq <4 x i32> %tmp124, zeroinitializer
+ %tmp126 = sext <4 x i1> %tmp125 to <4 x i32>
+ %tmp127 = bitcast <4 x float> %tmp122 to <4 x i32>
+ %tmp128 = xor <4 x i32> %tmp127, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+ %tmp129 = bitcast <4 x i32> %tmp128 to <4 x float>
+ %tmp130 = bitcast <4 x i32> %tmp126 to <4 x float>
+ %tmp131 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp129, <4 x float> %tmp122, <4 x float> %tmp130) #2
+ %tmp132 = fadd <4 x float> %tmp88, %tmp131
+ %tmp133 = bitcast <4 x float> %tmp132 to <2 x double>
+ ret <2 x double> %tmp133
+}
+
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
diff --git a/test/CodeGen/X86/pr24602.ll b/test/CodeGen/X86/pr24602.ll
new file mode 100644
index 000000000000..9c029aeefec9
--- /dev/null
+++ b/test/CodeGen/X86/pr24602.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; PR24602: Make sure we don't barf on non-foldable code (with opaque constants).
+
+; CHECK-LABEL: pr24602:
+; CHECK-NEXT: # BB#0
+; CHECK-NEXT: movabsq $-10000000000, [[CST:%[a-z0-9]+]]
+; CHECK-NEXT: imulq [[CST]], %rsi
+; CHECK-NEXT: leaq (%rdi,%rsi,8), %rax
+; CHECK-NEXT: movq [[CST]], (%rdi,%rsi,8)
+; CHECK-NEXT: retq
+define i64* @pr24602(i64* %p, i64 %n) nounwind {
+ %mul = mul nsw i64 %n, -10000000000
+ %add.ptr = getelementptr inbounds i64, i64* %p, i64 %mul
+ store i64 -10000000000, i64* %add.ptr
+ ret i64* %add.ptr
+}
diff --git a/test/CodeGen/X86/pr25828.ll b/test/CodeGen/X86/pr25828.ll
new file mode 100644
index 000000000000..8fbabc7d0c6d
--- /dev/null
+++ b/test/CodeGen/X86/pr25828.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i686-pc-windows-msvc -relocation-model=pic | FileCheck %s
+; MOVPC32r should not generate CFI under windows
+
+; CHECK-LABEL: _foo:
+; CHECK-NOT: .cfi_adjust_cfa_offset
+define void @foo(i8) {
+entry-block:
+ switch i8 %0, label %bb2 [
+ i8 1, label %bb1
+ i8 2, label %bb2
+ i8 3, label %bb3
+ i8 4, label %bb4
+ i8 5, label %bb5
+ ]
+
+bb1:
+ ret void
+
+bb2:
+ ret void
+
+bb3:
+ ret void
+
+bb4:
+ ret void
+
+bb5:
+ ret void
+}
diff --git a/test/CodeGen/X86/prolog-push-seq.ll b/test/CodeGen/X86/prolog-push-seq.ll
new file mode 100644
index 000000000000..f23791aef922
--- /dev/null
+++ b/test/CodeGen/X86/prolog-push-seq.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-msvc18.0.0"
+
+declare x86_thiscallcc void @bar(i32 %a, i32 %b)
+
+define fastcc void @foo(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: foo:
+; CHECK: subl $64, %esp
+; CHECK-NEXT: pushl
+; CHECK-NEXT: calll _bar
+ %local = alloca i32, i32 16
+ call x86_thiscallcc void @bar(i32 %a, i32 %b)
+ call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
+ ret void
+}
+
+attributes #0 = { nounwind optsize "no-frame-pointer-elim-non-leaf"} \ No newline at end of file
diff --git a/test/CodeGen/X86/pseudo_cmov_lower.ll b/test/CodeGen/X86/pseudo_cmov_lower.ll
new file mode 100644
index 000000000000..c59e3478ff51
--- /dev/null
+++ b/test/CodeGen/X86/pseudo_cmov_lower.ll
@@ -0,0 +1,267 @@
+; RUN: llc < %s -mtriple=i386-linux-gnu -o - | FileCheck %s
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo1:
+; CHECK: js
+; CHECK-NOT: js
+define i32 @foo1(i32 %v1, i32 %v2, i32 %v3) nounwind {
+entry:
+ %cmp = icmp slt i32 %v1, 0
+ %v2.v3 = select i1 %cmp, i32 %v2, i32 %v3
+ %v1.v2 = select i1 %cmp, i32 %v1, i32 %v2
+ %sub = sub i32 %v1.v2, %v2.v3
+ ret i32 %sub
+}
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR. This makes
+; sure the code for the lowering for opposite conditions gets tested.
+; CHECK-LABEL: foo11:
+; CHECK: js
+; CHECK-NOT: js
+; CHECK-NOT: jns
+define i32 @foo11(i32 %v1, i32 %v2, i32 %v3) nounwind {
+entry:
+ %cmp1 = icmp slt i32 %v1, 0
+ %v2.v3 = select i1 %cmp1, i32 %v2, i32 %v3
+ %cmp2 = icmp sge i32 %v1, 0
+ %v1.v2 = select i1 %cmp2, i32 %v1, i32 %v2
+ %sub = sub i32 %v1.v2, %v2.v3
+ ret i32 %sub
+}
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo2:
+; CHECK: js
+; CHECK-NOT: js
+define i32 @foo2(i8 %v1, i8 %v2, i8 %v3) nounwind {
+entry:
+ %cmp = icmp slt i8 %v1, 0
+ %v2.v3 = select i1 %cmp, i8 %v2, i8 %v3
+ %v1.v2 = select i1 %cmp, i8 %v1, i8 %v2
+ %t1 = sext i8 %v2.v3 to i32
+ %t2 = sext i8 %v1.v2 to i32
+ %sub = sub i32 %t1, %t2
+ ret i32 %sub
+}
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo3:
+; CHECK: js
+; CHECK-NOT: js
+define i32 @foo3(i16 %v1, i16 %v2, i16 %v3) nounwind {
+entry:
+ %cmp = icmp slt i16 %v1, 0
+ %v2.v3 = select i1 %cmp, i16 %v2, i16 %v3
+ %v1.v2 = select i1 %cmp, i16 %v1, i16 %v2
+ %t1 = sext i16 %v2.v3 to i32
+ %t2 = sext i16 %v1.v2 to i32
+ %sub = sub i32 %t1, %t2
+ ret i32 %sub
+}
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo4:
+; CHECK: js
+; CHECK-NOT: js
+define float @foo4(i32 %v1, float %v2, float %v3, float %v4) nounwind {
+entry:
+ %cmp = icmp slt i32 %v1, 0
+ %t1 = select i1 %cmp, float %v2, float %v3
+ %t2 = select i1 %cmp, float %v3, float %v4
+ %sub = fsub float %t1, %t2
+ ret float %sub
+}
+
+; This test checks that only a single je gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo5:
+; CHECK: je
+; CHECK-NOT: je
+define double @foo5(i32 %v1, double %v2, double %v3, double %v4) nounwind {
+entry:
+ %cmp = icmp eq i32 %v1, 0
+ %t1 = select i1 %cmp, double %v2, double %v3
+ %t2 = select i1 %cmp, double %v3, double %v4
+ %sub = fsub double %t1, %t2
+ ret double %sub
+}
+
+; This test checks that only a single je gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo6:
+; CHECK: je
+; CHECK-NOT: je
+define <4 x float> @foo6(i32 %v1, <4 x float> %v2, <4 x float> %v3, <4 x float> %v4) nounwind {
+entry:
+ %cmp = icmp eq i32 %v1, 0
+ %t1 = select i1 %cmp, <4 x float> %v2, <4 x float> %v3
+ %t2 = select i1 %cmp, <4 x float> %v3, <4 x float> %v4
+ %sub = fsub <4 x float> %t1, %t2
+ ret <4 x float> %sub
+}
+
+; This test checks that only a single je gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo7:
+; CHECK: je
+; CHECK-NOT: je
+define <2 x double> @foo7(i32 %v1, <2 x double> %v2, <2 x double> %v3, <2 x double> %v4) nounwind {
+entry:
+ %cmp = icmp eq i32 %v1, 0
+ %t1 = select i1 %cmp, <2 x double> %v2, <2 x double> %v3
+ %t2 = select i1 %cmp, <2 x double> %v3, <2 x double> %v4
+ %sub = fsub <2 x double> %t1, %t2
+ ret <2 x double> %sub
+}
+
+; This test checks that only a single ja gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR. This combines
+; all the supported types together into one long string of selects based
+; on the same condition.
+; CHECK-LABEL: foo8:
+; CHECK: ja
+; CHECK-NOT: ja
+define void @foo8(i32 %v1,
+ i8 %v2, i8 %v3,
+ i16 %v12, i16 %v13,
+ i32 %v22, i32 %v23,
+ float %v32, float %v33,
+ double %v42, double %v43,
+ <4 x float> %v52, <4 x float> %v53,
+ <2 x double> %v62, <2 x double> %v63,
+ <8 x float> %v72, <8 x float> %v73,
+ <4 x double> %v82, <4 x double> %v83,
+ <16 x float> %v92, <16 x float> %v93,
+ <8 x double> %v102, <8 x double> %v103,
+ i8 * %dst) nounwind {
+entry:
+ %add.ptr11 = getelementptr inbounds i8, i8* %dst, i32 2
+ %a11 = bitcast i8* %add.ptr11 to i16*
+
+ %add.ptr21 = getelementptr inbounds i8, i8* %dst, i32 4
+ %a21 = bitcast i8* %add.ptr21 to i32*
+
+ %add.ptr31 = getelementptr inbounds i8, i8* %dst, i32 8
+ %a31 = bitcast i8* %add.ptr31 to float*
+
+ %add.ptr41 = getelementptr inbounds i8, i8* %dst, i32 16
+ %a41 = bitcast i8* %add.ptr41 to double*
+
+ %add.ptr51 = getelementptr inbounds i8, i8* %dst, i32 32
+ %a51 = bitcast i8* %add.ptr51 to <4 x float>*
+
+ %add.ptr61 = getelementptr inbounds i8, i8* %dst, i32 48
+ %a61 = bitcast i8* %add.ptr61 to <2 x double>*
+
+ %add.ptr71 = getelementptr inbounds i8, i8* %dst, i32 64
+ %a71 = bitcast i8* %add.ptr71 to <8 x float>*
+
+ %add.ptr81 = getelementptr inbounds i8, i8* %dst, i32 128
+ %a81 = bitcast i8* %add.ptr81 to <4 x double>*
+
+ %add.ptr91 = getelementptr inbounds i8, i8* %dst, i32 64
+ %a91 = bitcast i8* %add.ptr91 to <16 x float>*
+
+ %add.ptr101 = getelementptr inbounds i8, i8* %dst, i32 128
+ %a101 = bitcast i8* %add.ptr101 to <8 x double>*
+
+ ; These operations are necessary, because select of two single use loads
+ ; ends up getting optimized into a select of two leas, followed by a
+ ; single load of the selected address.
+ %t13 = xor i16 %v13, 11
+ %t23 = xor i32 %v23, 1234
+ %t33 = fadd float %v33, %v32
+ %t43 = fadd double %v43, %v42
+ %t53 = fadd <4 x float> %v53, %v52
+ %t63 = fadd <2 x double> %v63, %v62
+ %t73 = fsub <8 x float> %v73, %v72
+ %t83 = fsub <4 x double> %v83, %v82
+ %t93 = fsub <16 x float> %v93, %v92
+ %t103 = fsub <8 x double> %v103, %v102
+
+ %cmp = icmp ugt i32 %v1, 31
+ %t11 = select i1 %cmp, i16 %v12, i16 %t13
+ %t21 = select i1 %cmp, i32 %v22, i32 %t23
+ %t31 = select i1 %cmp, float %v32, float %t33
+ %t41 = select i1 %cmp, double %v42, double %t43
+ %t51 = select i1 %cmp, <4 x float> %v52, <4 x float> %t53
+ %t61 = select i1 %cmp, <2 x double> %v62, <2 x double> %t63
+ %t71 = select i1 %cmp, <8 x float> %v72, <8 x float> %t73
+ %t81 = select i1 %cmp, <4 x double> %v82, <4 x double> %t83
+ %t91 = select i1 %cmp, <16 x float> %v92, <16 x float> %t93
+ %t101 = select i1 %cmp, <8 x double> %v102, <8 x double> %t103
+
+ store i16 %t11, i16* %a11, align 2
+ store i32 %t21, i32* %a21, align 4
+ store float %t31, float* %a31, align 4
+ store double %t41, double* %a41, align 8
+ store <4 x float> %t51, <4 x float>* %a51, align 16
+ store <2 x double> %t61, <2 x double>* %a61, align 16
+ store <8 x float> %t71, <8 x float>* %a71, align 32
+ store <4 x double> %t81, <4 x double>* %a81, align 32
+ store <16 x float> %t91, <16 x float>* %a91, align 32
+ store <8 x double> %t101, <8 x double>* %a101, align 32
+
+ ret void
+}
+
+; This test checks that only a single ja gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; on the same condition.
+; Contrary to my expectations, this doesn't exercise the code for
+; CMOV_V8I1, CMOV_V16I1, CMOV_V32I1, or CMOV_V64I1. Instead the selects all
+; get lowered into vector length number of selects, which all eventually turn
+; into a huge number of CMOV_GR8, which are all contiguous, so the optimization
+; kicks in as long as CMOV_GR8 is supported. I couldn't find a way to get
+; CMOV_V*I1 pseudo-opcodes to get generated. If a way exists to get CMOV_V*1
+; pseudo-opcodes to be generated, this test should be replaced with one that
+; tests those opcodes.
+;
+; CHECK-LABEL: foo9:
+; CHECK: ja
+; CHECK-NOT: ja
+define void @foo9(i32 %v1,
+ <8 x i1> %v12, <8 x i1> %v13,
+ <16 x i1> %v22, <16 x i1> %v23,
+ <32 x i1> %v32, <32 x i1> %v33,
+ <64 x i1> %v42, <64 x i1> %v43,
+ i8 * %dst) nounwind {
+entry:
+ %add.ptr11 = getelementptr inbounds i8, i8* %dst, i32 0
+ %a11 = bitcast i8* %add.ptr11 to <8 x i1>*
+
+ %add.ptr21 = getelementptr inbounds i8, i8* %dst, i32 4
+ %a21 = bitcast i8* %add.ptr21 to <16 x i1>*
+
+ %add.ptr31 = getelementptr inbounds i8, i8* %dst, i32 8
+ %a31 = bitcast i8* %add.ptr31 to <32 x i1>*
+
+ %add.ptr41 = getelementptr inbounds i8, i8* %dst, i32 16
+ %a41 = bitcast i8* %add.ptr41 to <64 x i1>*
+
+ ; These operations are necessary, because select of two single use loads
+ ; ends up getting optimized into a select of two leas, followed by a
+ ; single load of the selected address.
+ %t13 = xor <8 x i1> %v13, %v12
+ %t23 = xor <16 x i1> %v23, %v22
+ %t33 = xor <32 x i1> %v33, %v32
+ %t43 = xor <64 x i1> %v43, %v42
+
+ %cmp = icmp ugt i32 %v1, 31
+ %t11 = select i1 %cmp, <8 x i1> %v12, <8 x i1> %t13
+ %t21 = select i1 %cmp, <16 x i1> %v22, <16 x i1> %t23
+ %t31 = select i1 %cmp, <32 x i1> %v32, <32 x i1> %t33
+ %t41 = select i1 %cmp, <64 x i1> %v42, <64 x i1> %t43
+
+ store <8 x i1> %t11, <8 x i1>* %a11, align 16
+ store <16 x i1> %t21, <16 x i1>* %a21, align 4
+ store <32 x i1> %t31, <32 x i1>* %a31, align 8
+ store <64 x i1> %t41, <64 x i1>* %a41, align 16
+
+ ret void
+}
diff --git a/test/CodeGen/X86/pseudo_cmov_lower1.ll b/test/CodeGen/X86/pseudo_cmov_lower1.ll
new file mode 100644
index 000000000000..4ce131bb8645
--- /dev/null
+++ b/test/CodeGen/X86/pseudo_cmov_lower1.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=i386-linux-gnu -mattr=+sse2 -o - | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -o - | FileCheck %s
+
+; This test checks that only a single jae gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo1:
+; CHECK: jae
+; CHECK-NOT: jae
+define double @foo1(float %p1, double %p2, double %p3) nounwind {
+entry:
+ %c1 = fcmp oge float %p1, 0.000000e+00
+ %d0 = fadd double %p2, 1.25e0
+ %d1 = fadd double %p3, 1.25e0
+ %d2 = select i1 %c1, double %d0, double %d1
+ %d3 = select i1 %c1, double %d0, double %p2
+ %d4 = select i1 %c1, double %p3, double %d1
+ %d5 = fsub double %d2, %d3
+ %d6 = fadd double %d5, %d4
+ ret double %d6
+}
+
+; This test checks that only a single jae gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo2:
+; CHECK: jae
+; CHECK-NOT: jae
+define float @foo2(float %p1, float %p2, float %p3) nounwind {
+entry:
+ %c1 = fcmp oge float %p1, 0.000000e+00
+ %d0 = fadd float %p2, 1.25e0
+ %d1 = fadd float %p3, 1.25e0
+ %d2 = select i1 %c1, float %d0, float %d1
+ %d3 = select i1 %c1, float %d1, float %p2
+ %d4 = select i1 %c1, float %d0, float %p3
+ %d5 = fsub float %d2, %d3
+ %d6 = fadd float %d5, %d4
+ ret float %d6
+}
+
diff --git a/test/CodeGen/X86/pseudo_cmov_lower2.ll b/test/CodeGen/X86/pseudo_cmov_lower2.ll
new file mode 100644
index 000000000000..0133963b36d0
--- /dev/null
+++ b/test/CodeGen/X86/pseudo_cmov_lower2.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -o - | FileCheck %s
+
+; This test checks that only a single jae gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR. The tricky part
+; of this test is that it tests the special PHI operand rewriting code in
+; X86TargetLowering::EmitLoweredSelect.
+;
+; CHECK-LABEL: foo1:
+; CHECK: jae
+; CHECK-NOT: jae
+define double @foo1(float %p1, double %p2, double %p3) nounwind {
+entry:
+ %c1 = fcmp oge float %p1, 0.000000e+00
+ %d0 = fadd double %p2, 1.25e0
+ %d1 = fadd double %p3, 1.25e0
+ %d2 = select i1 %c1, double %d0, double %d1
+ %d3 = select i1 %c1, double %d2, double %p2
+ %d4 = select i1 %c1, double %d3, double %p3
+ %d5 = fsub double %d2, %d3
+ %d6 = fadd double %d5, %d4
+ ret double %d6
+}
+
+; This test checks that only a single jae gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR. The tricky part
+; of this test is that it tests the special PHI operand rewriting code in
+; X86TargetLowering::EmitLoweredSelect.
+;
+; CHECK-LABEL: foo2:
+; CHECK: jae
+; CHECK-NOT: jae
+define double @foo2(float %p1, double %p2, double %p3) nounwind {
+entry:
+ %c1 = fcmp oge float %p1, 0.000000e+00
+ %d0 = fadd double %p2, 1.25e0
+ %d1 = fadd double %p3, 1.25e0
+ %d2 = select i1 %c1, double %d0, double %d1
+ %d3 = select i1 %c1, double %p2, double %d2
+ %d4 = select i1 %c1, double %p3, double %d3
+ %d5 = fsub double %d2, %d3
+ %d6 = fadd double %d5, %d4
+ ret double %d6
+}
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR. The tricky part
+; of this test is that it tests the special PHI operand rewriting code in
+; X86TargetLowering::EmitLoweredSelect. It also tests to make sure all
+; the operands of the resulting instructions are from the proper places.
+;
+; CHECK-LABEL: foo3:
+; CHECK: js
+; CHECK-NOT: js
+; CHECK-LABEL: # BB#1:
+; CHECK-DAG: movapd %xmm2, %xmm1
+; CHECK-DAG: movapd %xmm2, %xmm0
+; CHECK-LABEL:.LBB2_2:
+; CHECK: divsd %xmm1, %xmm0
+; CHECK: ret
+define double @foo3(i32 %p1, double %p2, double %p3,
+ double %p4, double %p5) nounwind {
+entry:
+ %c1 = icmp slt i32 %p1, 0
+ %d2 = select i1 %c1, double %p2, double %p3
+ %d3 = select i1 %c1, double %p3, double %p4
+ %d4 = select i1 %c1, double %d2, double %d3
+ %d5 = fdiv double %d4, %d3
+ ret double %d5
+}
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR. The tricky part
+; of this test is that it tests the special PHI operand rewriting code in
+; X86TargetLowering::EmitLoweredSelect. It also tests to make sure all
+; the operands of the resulting instructions are from the proper places
+; when the "opposite condition" handling code in the compiler is used.
+; This should be the same code as foo3 above, because we use the opposite
+; condition code in the second two selects, but we also swap the operands
+; of the selects to give the same actual computation.
+;
+; CHECK-LABEL: foo4:
+; CHECK: js
+; CHECK-NOT: js
+; CHECK-LABEL: # BB#1:
+; CHECK-DAG: movapd %xmm2, %xmm1
+; CHECK-DAG: movapd %xmm2, %xmm0
+; CHECK-LABEL:.LBB3_2:
+; CHECK: divsd %xmm1, %xmm0
+; CHECK: ret
+define double @foo4(i32 %p1, double %p2, double %p3,
+ double %p4, double %p5) nounwind {
+entry:
+ %c1 = icmp slt i32 %p1, 0
+ %d2 = select i1 %c1, double %p2, double %p3
+ %c2 = icmp sge i32 %p1, 0
+ %d3 = select i1 %c2, double %p4, double %p3
+ %d4 = select i1 %c2, double %d3, double %d2
+ %d5 = fdiv double %d4, %d3
+ ret double %d5
+}
diff --git a/test/CodeGen/X86/psubus.ll b/test/CodeGen/X86/psubus.ll
index 4b83b55997e2..c6d118d6da69 100644
--- a/test/CodeGen/X86/psubus.ll
+++ b/test/CodeGen/X86/psubus.ll
@@ -1,11 +1,22 @@
-; RUN: llc -mcpu=core2 < %s | FileCheck %s -check-prefix=SSSE3
-; RUN: llc -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
-; RUN: llc -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
define void @test1(i16* nocapture %head) nounwind {
+; SSE-LABEL: test1:
+; SSE: ## BB#0: ## %vector.ph
+; SSE-NEXT: movdqu (%rdi), %xmm0
+; SSE-NEXT: psubusw {{.*}}(%rip), %xmm0
+; SSE-NEXT: movdqu %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test1:
+; AVX: ## BB#0: ## %vector.ph
+; AVX-NEXT: vmovdqu (%rdi), %xmm0
+; AVX-NEXT: vpsubusw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX-NEXT: retq
vector.ph:
%0 = getelementptr inbounds i16, i16* %head, i64 0
%1 = bitcast i16* %0 to <8 x i16>*
@@ -15,30 +26,22 @@ vector.ph:
%5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
store <8 x i16> %5, <8 x i16>* %1, align 2
ret void
-
-; SSSE3: @test1
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movdqu (%rdi), %xmm0
-; SSSE3-NEXT: psubusw LCPI0_0(%rip), %xmm0
-; SSSE3-NEXT: movdqu %xmm0, (%rdi)
-; SSSE3-NEXT: retq
-
-; AVX1: @test1
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqu (%rdi), %xmm0
-; AVX1-NEXT: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT: retq
-
-; AVX2: @test1
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %xmm0
-; AVX2-NEXT: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT: retq
}
define void @test2(i16* nocapture %head) nounwind {
+; SSE-LABEL: test2:
+; SSE: ## BB#0: ## %vector.ph
+; SSE-NEXT: movdqu (%rdi), %xmm0
+; SSE-NEXT: psubusw {{.*}}(%rip), %xmm0
+; SSE-NEXT: movdqu %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test2:
+; AVX: ## BB#0: ## %vector.ph
+; AVX-NEXT: vmovdqu (%rdi), %xmm0
+; AVX-NEXT: vpsubusw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX-NEXT: retq
vector.ph:
%0 = getelementptr inbounds i16, i16* %head, i64 0
%1 = bitcast i16* %0 to <8 x i16>*
@@ -48,30 +51,46 @@ vector.ph:
%5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
store <8 x i16> %5, <8 x i16>* %1, align 2
ret void
-
-; SSSE3: @test2
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movdqu (%rdi), %xmm0
-; SSSE3-NEXT: psubusw LCPI1_0(%rip), %xmm0
-; SSSE3-NEXT: movdqu %xmm0, (%rdi)
-; SSSE3-NEXT: retq
-
-; AVX1: @test2
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqu (%rdi), %xmm0
-; AVX1-NEXT: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT: retq
-
-; AVX2: @test2
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %xmm0
-; AVX2-NEXT: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT: retq
}
define void @test3(i16* nocapture %head, i16 zeroext %w) nounwind {
+; SSE2-LABEL: test3:
+; SSE2: ## BB#0: ## %vector.ph
+; SSE2-NEXT: movd %esi, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: movdqu (%rdi), %xmm1
+; SSE2-NEXT: psubusw %xmm0, %xmm1
+; SSE2-NEXT: movdqu %xmm1, (%rdi)
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: test3:
+; SSSE3: ## BB#0: ## %vector.ph
+; SSSE3-NEXT: movd %esi, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT: movdqu (%rdi), %xmm1
+; SSSE3-NEXT: psubusw %xmm0, %xmm1
+; SSSE3-NEXT: movdqu %xmm1, (%rdi)
+; SSSE3-NEXT: retq
+;
+; AVX1-LABEL: test3:
+; AVX1: ## BB#0: ## %vector.ph
+; AVX1-NEXT: vmovd %esi, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: vmovdqu (%rdi), %xmm1
+; AVX1-NEXT: vpsubusw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test3:
+; AVX2: ## BB#0: ## %vector.ph
+; AVX2-NEXT: vmovd %esi, %xmm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT: vmovdqu (%rdi), %xmm1
+; AVX2-NEXT: vpsubusw %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX2-NEXT: retq
vector.ph:
%0 = insertelement <8 x i16> undef, i16 %w, i32 0
%broadcast15 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -83,36 +102,22 @@ vector.ph:
%6 = select <8 x i1> %4, <8 x i16> zeroinitializer, <8 x i16> %5
store <8 x i16> %6, <8 x i16>* %2, align 2
ret void
-
-; SSSE3: @test3
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movd %esi, %xmm0
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
-; SSSE3-NEXT: movdqu (%rdi), %xmm1
-; SSSE3-NEXT: psubusw %xmm0, %xmm1
-; SSSE3-NEXT: movdqu %xmm1, (%rdi)
-; SSSE3-NEXT: retq
-
-; AVX1: @test3
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovd %esi, %xmm0
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
-; AVX1-NEXT: vmovdqu (%rdi), %xmm1
-; AVX1-NEXT: vpsubusw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT: retq
-
-; AVX2: @test3
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovd %esi, %xmm0
-; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX2-NEXT: vmovdqu (%rdi), %xmm1
-; AVX2-NEXT: vpsubusw %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT: retq
}
define void @test4(i8* nocapture %head) nounwind {
+; SSE-LABEL: test4:
+; SSE: ## BB#0: ## %vector.ph
+; SSE-NEXT: movdqu (%rdi), %xmm0
+; SSE-NEXT: psubusb {{.*}}(%rip), %xmm0
+; SSE-NEXT: movdqu %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test4:
+; AVX: ## BB#0: ## %vector.ph
+; AVX-NEXT: vmovdqu (%rdi), %xmm0
+; AVX-NEXT: vpsubusb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX-NEXT: retq
vector.ph:
%0 = getelementptr inbounds i8, i8* %head, i64 0
%1 = bitcast i8* %0 to <16 x i8>*
@@ -122,30 +127,22 @@ vector.ph:
%5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
store <16 x i8> %5, <16 x i8>* %1, align 1
ret void
-
-; SSSE3: @test4
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movdqu (%rdi), %xmm0
-; SSSE3-NEXT: psubusb LCPI3_0(%rip), %xmm0
-; SSSE3-NEXT: movdqu %xmm0, (%rdi)
-; SSSE3-NEXT: retq
-
-; AVX1: @test4
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqu (%rdi), %xmm0
-; AVX1-NEXT: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT: retq
-
-; AVX2: @test4
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %xmm0
-; AVX2-NEXT: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT: retq
}
define void @test5(i8* nocapture %head) nounwind {
+; SSE-LABEL: test5:
+; SSE: ## BB#0: ## %vector.ph
+; SSE-NEXT: movdqu (%rdi), %xmm0
+; SSE-NEXT: psubusb {{.*}}(%rip), %xmm0
+; SSE-NEXT: movdqu %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test5:
+; AVX: ## BB#0: ## %vector.ph
+; AVX-NEXT: vmovdqu (%rdi), %xmm0
+; AVX-NEXT: vpsubusb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX-NEXT: retq
vector.ph:
%0 = getelementptr inbounds i8, i8* %head, i64 0
%1 = bitcast i8* %0 to <16 x i8>*
@@ -155,30 +152,49 @@ vector.ph:
%5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
store <16 x i8> %5, <16 x i8>* %1, align 1
ret void
-
-; SSSE3: @test5
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movdqu (%rdi), %xmm0
-; SSSE3-NEXT: psubusb LCPI4_0(%rip), %xmm0
-; SSSE3-NEXT: movdqu %xmm0, (%rdi)
-; SSSE3-NEXT: retq
-
-; AVX1: @test5
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqu (%rdi), %xmm0
-; AVX1-NEXT: vpsubusb LCPI4_0(%rip), %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT: retq
-
-; AVX2: @test5
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %xmm0
-; AVX2-NEXT: vpsubusb LCPI4_0(%rip), %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT: retq
}
define void @test6(i8* nocapture %head, i8 zeroext %w) nounwind {
+; SSE2-LABEL: test6:
+; SSE2: ## BB#0: ## %vector.ph
+; SSE2-NEXT: movd %esi, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: movdqu (%rdi), %xmm1
+; SSE2-NEXT: psubusb %xmm0, %xmm1
+; SSE2-NEXT: movdqu %xmm1, (%rdi)
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: test6:
+; SSSE3: ## BB#0: ## %vector.ph
+; SSSE3-NEXT: movd %esi, %xmm0
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pshufb %xmm1, %xmm0
+; SSSE3-NEXT: movdqu (%rdi), %xmm1
+; SSSE3-NEXT: psubusb %xmm0, %xmm1
+; SSSE3-NEXT: movdqu %xmm1, (%rdi)
+; SSSE3-NEXT: retq
+;
+; AVX1-LABEL: test6:
+; AVX1: ## BB#0: ## %vector.ph
+; AVX1-NEXT: vmovd %esi, %xmm0
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqu (%rdi), %xmm1
+; AVX1-NEXT: vpsubusb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test6:
+; AVX2: ## BB#0: ## %vector.ph
+; AVX2-NEXT: vmovd %esi, %xmm0
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT: vmovdqu (%rdi), %xmm1
+; AVX2-NEXT: vpsubusb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX2-NEXT: retq
vector.ph:
%0 = insertelement <16 x i8> undef, i8 %w, i32 0
%broadcast15 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -190,38 +206,41 @@ vector.ph:
%6 = select <16 x i1> %4, <16 x i8> zeroinitializer, <16 x i8> %5
store <16 x i8> %6, <16 x i8>* %2, align 1
ret void
-
-; SSSE3: @test6
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movd %esi, %xmm0
-; SSSE3-NEXT: pxor %xmm1, %xmm1
-; SSSE3-NEXT: pshufb %xmm1, %xmm0
-; SSSE3-NEXT: movdqu (%rdi), %xmm1
-; SSSE3-NEXT: psubusb %xmm0, %xmm1
-; SSSE3-NEXT: movdqu %xmm1, (%rdi)
-; SSSE3-NEXT: retq
-
-; AVX1: @test6
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovd %esi, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1
-; AVX1-NEXT: vpshufb %xmm1, %xmm0
-; AVX1-NEXT: vmovdqu (%rdi), %xmm1
-; AVX1-NEXT: vpsubusb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT: retq
-
-; AVX2: @test6
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovd %esi, %xmm0
-; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
-; AVX2-NEXT: vmovdqu (%rdi), %xmm1
-; AVX2-NEXT: vpsubusb %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT: retq
}
define void @test7(i16* nocapture %head) nounwind {
+; SSE-LABEL: test7:
+; SSE: ## BB#0: ## %vector.ph
+; SSE-NEXT: movdqu (%rdi), %xmm0
+; SSE-NEXT: movdqu 16(%rdi), %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT: psubusw %xmm2, %xmm0
+; SSE-NEXT: psubusw %xmm2, %xmm1
+; SSE-NEXT: movdqu %xmm1, 16(%rdi)
+; SSE-NEXT: movdqu %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test7:
+; AVX1: ## BB#0: ## %vector.ph
+; AVX1-NEXT: vmovups (%rdi), %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test7:
+; AVX2: ## BB#0: ## %vector.ph
+; AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; AVX2-NEXT: vpsubusw {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
vector.ph:
%0 = getelementptr inbounds i16, i16* %head, i64 0
%1 = bitcast i16* %0 to <16 x i16>*
@@ -231,17 +250,47 @@ vector.ph:
%5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
store <16 x i16> %5, <16 x i16>* %1, align 2
ret void
-
-; AVX2: @test7
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; AVX2-NEXT: vpsubusw LCPI6_0(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
}
define void @test8(i16* nocapture %head) nounwind {
+; SSE-LABEL: test8:
+; SSE: ## BB#0: ## %vector.ph
+; SSE-NEXT: movdqu (%rdi), %xmm0
+; SSE-NEXT: movdqu 16(%rdi), %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; SSE-NEXT: psubusw %xmm2, %xmm0
+; SSE-NEXT: psubusw %xmm2, %xmm1
+; SSE-NEXT: movdqu %xmm1, 16(%rdi)
+; SSE-NEXT: movdqu %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test8:
+; AVX1: ## BB#0: ## %vector.ph
+; AVX1-NEXT: vmovups (%rdi), %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [65534,65534,65534,65534,65534,65534,65534,65534]
+; AVX1-NEXT: vpcmpgtw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpgtw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32769,32769,32769,32769,32769,32769,32769,32769]
+; AVX1-NEXT: vpaddw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpaddw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test8:
+; AVX2: ## BB#0: ## %vector.ph
+; AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; AVX2-NEXT: vpsubusw {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
vector.ph:
%0 = getelementptr inbounds i16, i16* %head, i64 0
%1 = bitcast i16* %0 to <16 x i16>*
@@ -252,16 +301,63 @@ vector.ph:
store <16 x i16> %5, <16 x i16>* %1, align 2
ret void
-; AVX2: @test8
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; AVX2-NEXT: vpsubusw LCPI7_0(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
}
define void @test9(i16* nocapture %head, i16 zeroext %w) nounwind {
+; SSE2-LABEL: test9:
+; SSE2: ## BB#0: ## %vector.ph
+; SSE2-NEXT: movd %esi, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: movdqu (%rdi), %xmm1
+; SSE2-NEXT: movdqu 16(%rdi), %xmm2
+; SSE2-NEXT: psubusw %xmm0, %xmm1
+; SSE2-NEXT: psubusw %xmm0, %xmm2
+; SSE2-NEXT: movdqu %xmm2, 16(%rdi)
+; SSE2-NEXT: movdqu %xmm1, (%rdi)
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: test9:
+; SSSE3: ## BB#0: ## %vector.ph
+; SSSE3-NEXT: movd %esi, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT: movdqu (%rdi), %xmm1
+; SSSE3-NEXT: movdqu 16(%rdi), %xmm2
+; SSSE3-NEXT: psubusw %xmm0, %xmm1
+; SSSE3-NEXT: psubusw %xmm0, %xmm2
+; SSSE3-NEXT: movdqu %xmm2, 16(%rdi)
+; SSSE3-NEXT: movdqu %xmm1, (%rdi)
+; SSSE3-NEXT: retq
+;
+; AVX1-LABEL: test9:
+; AVX1: ## BB#0: ## %vector.ph
+; AVX1-NEXT: vmovups (%rdi), %ymm0
+; AVX1-NEXT: vmovd %esi, %xmm1
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm3
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm4
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
+; AVX1-NEXT: vpmaxuw %xmm1, %xmm2, %xmm4
+; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vandps %ymm3, %ymm0, %ymm0
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test9:
+; AVX2: ## BB#0: ## %vector.ph
+; AVX2-NEXT: vmovd %esi, %xmm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT: vmovdqu (%rdi), %ymm1
+; AVX2-NEXT: vpsubusw %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
vector.ph:
%0 = insertelement <16 x i16> undef, i16 %w, i32 0
%broadcast15 = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -273,19 +369,41 @@ vector.ph:
%6 = select <16 x i1> %4, <16 x i16> zeroinitializer, <16 x i16> %5
store <16 x i16> %6, <16 x i16>* %2, align 2
ret void
-
-; AVX2: @test9
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovd %esi, %xmm0
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX2-NEXT: vmovdqu (%rdi), %ymm1
-; AVX2-NEXT: vpsubusw %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
}
define void @test10(i8* nocapture %head) nounwind {
+; SSE-LABEL: test10:
+; SSE: ## BB#0: ## %vector.ph
+; SSE-NEXT: movdqu (%rdi), %xmm0
+; SSE-NEXT: movdqu 16(%rdi), %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; SSE-NEXT: psubusb %xmm2, %xmm0
+; SSE-NEXT: psubusb %xmm2, %xmm1
+; SSE-NEXT: movdqu %xmm1, 16(%rdi)
+; SSE-NEXT: movdqu %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test10:
+; AVX1: ## BB#0: ## %vector.ph
+; AVX1-NEXT: vmovups (%rdi), %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test10:
+; AVX2: ## BB#0: ## %vector.ph
+; AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; AVX2-NEXT: vpsubusb {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
vector.ph:
%0 = getelementptr inbounds i8, i8* %head, i64 0
%1 = bitcast i8* %0 to <32 x i8>*
@@ -296,16 +414,47 @@ vector.ph:
store <32 x i8> %5, <32 x i8>* %1, align 1
ret void
-; AVX2: @test10
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; AVX2-NEXT: vpsubusb LCPI9_0(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
}
define void @test11(i8* nocapture %head) nounwind {
+; SSE-LABEL: test11:
+; SSE: ## BB#0: ## %vector.ph
+; SSE-NEXT: movdqu (%rdi), %xmm0
+; SSE-NEXT: movdqu 16(%rdi), %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; SSE-NEXT: psubusb %xmm2, %xmm0
+; SSE-NEXT: psubusb %xmm2, %xmm1
+; SSE-NEXT: movdqu %xmm1, 16(%rdi)
+; SSE-NEXT: movdqu %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test11:
+; AVX1: ## BB#0: ## %vector.ph
+; AVX1-NEXT: vmovups (%rdi), %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254]
+; AVX1-NEXT: vpcmpgtb %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpgtb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129]
+; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test11:
+; AVX2: ## BB#0: ## %vector.ph
+; AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; AVX2-NEXT: vpsubusb {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
vector.ph:
%0 = getelementptr inbounds i8, i8* %head, i64 0
%1 = bitcast i8* %0 to <32 x i8>*
@@ -315,17 +464,66 @@ vector.ph:
%5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
store <32 x i8> %5, <32 x i8>* %1, align 1
ret void
-
-; AVX2: @test11
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; AVX2-NEXT: vpsubusb LCPI10_0(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
}
define void @test12(i8* nocapture %head, i8 zeroext %w) nounwind {
+; SSE2-LABEL: test12:
+; SSE2: ## BB#0: ## %vector.ph
+; SSE2-NEXT: movd %esi, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: movdqu (%rdi), %xmm1
+; SSE2-NEXT: movdqu 16(%rdi), %xmm2
+; SSE2-NEXT: psubusb %xmm0, %xmm1
+; SSE2-NEXT: psubusb %xmm0, %xmm2
+; SSE2-NEXT: movdqu %xmm2, 16(%rdi)
+; SSE2-NEXT: movdqu %xmm1, (%rdi)
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: test12:
+; SSSE3: ## BB#0: ## %vector.ph
+; SSSE3-NEXT: movd %esi, %xmm0
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pshufb %xmm1, %xmm0
+; SSSE3-NEXT: movdqu (%rdi), %xmm1
+; SSSE3-NEXT: movdqu 16(%rdi), %xmm2
+; SSSE3-NEXT: psubusb %xmm0, %xmm1
+; SSSE3-NEXT: psubusb %xmm0, %xmm2
+; SSSE3-NEXT: movdqu %xmm2, 16(%rdi)
+; SSSE3-NEXT: movdqu %xmm1, (%rdi)
+; SSSE3-NEXT: retq
+;
+; AVX1-LABEL: test12:
+; AVX1: ## BB#0: ## %vector.ph
+; AVX1-NEXT: vmovups (%rdi), %ymm0
+; AVX1-NEXT: vmovd %esi, %xmm1
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm3
+; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm4
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
+; AVX1-NEXT: vpmaxub %xmm1, %xmm2, %xmm4
+; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vandps %ymm3, %ymm0, %ymm0
+; AVX1-NEXT: vmovups %ymm0, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test12:
+; AVX2: ## BB#0: ## %vector.ph
+; AVX2-NEXT: vmovd %esi, %xmm0
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT: vmovdqu (%rdi), %ymm1
+; AVX2-NEXT: vpsubusb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
vector.ph:
%0 = insertelement <32 x i8> undef, i8 %w, i32 0
%broadcast15 = shufflevector <32 x i8> %0, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -337,14 +535,4 @@ vector.ph:
%6 = select <32 x i1> %4, <32 x i8> zeroinitializer, <32 x i8> %5
store <32 x i8> %6, <32 x i8>* %2, align 1
ret void
-
-; AVX2: @test12
-; AVX2: # BB#0:
-; AVX2-NEXT: vmovd %esi, %xmm0
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqu (%rdi), %ymm1
-; AVX2-NEXT: vpsubusb %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
}
diff --git a/test/CodeGen/X86/push-cfi-debug.ll b/test/CodeGen/X86/push-cfi-debug.ll
new file mode 100644
index 000000000000..cc00fab525ab
--- /dev/null
+++ b/test/CodeGen/X86/push-cfi-debug.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
+
+
+; Function Attrs: optsize
+declare void @foo(i32, i32) #0
+declare x86_stdcallcc void @stdfoo(i32, i32) #0
+
+; CHECK-LABEL: test1:
+; CHECK: subl $8, %esp
+; CHECK: .cfi_adjust_cfa_offset 8
+; CHECK: pushl $2
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: pushl $1
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: calll foo
+; CHECK: addl $16, %esp
+; CHECK: .cfi_adjust_cfa_offset -16
+; CHECK: subl $8, %esp
+; CHECK: .cfi_adjust_cfa_offset 8
+; CHECK: pushl $4
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: pushl $3
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: calll stdfoo
+; CHECK: .cfi_adjust_cfa_offset -8
+; CHECK: addl $8, %esp
+; CHECK: .cfi_adjust_cfa_offset -8
+define void @test1() #0 !dbg !4 {
+entry:
+ tail call void @foo(i32 1, i32 2) #1, !dbg !10
+ tail call x86_stdcallcc void @stdfoo(i32 3, i32 4) #1, !dbg !11
+ ret void, !dbg !12
+}
+
+attributes #0 = { nounwind optsize }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250289)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.c", directory: "foo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "test1", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 250289)"}
+!10 = !DILocation(line: 4, column: 3, scope: !4)
+!11 = !DILocation(line: 5, column: 3, scope: !4)
+!12 = !DILocation(line: 6, column: 1, scope: !4)
diff --git a/test/CodeGen/X86/push-cfi-obj.ll b/test/CodeGen/X86/push-cfi-obj.ll
new file mode 100644
index 000000000000..33291ec3318a
--- /dev/null
+++ b/test/CodeGen/X86/push-cfi-obj.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i686-darwin-macosx10.7 -filetype=obj | llvm-readobj -sections | FileCheck -check-prefix=DARWIN %s
+
+; On darwin, check that we manage to generate the compact unwind section
+; DARWIN: Name: __compact_unwind
+; DARWIN: Segment: __LD
+
+; LINUX: Name: .eh_frame
+; LINUX-NEXT: Type: SHT_PROGBITS (0x1)
+; LINUX-NEXT: Flags [ (0x2)
+; LINUX-NEXT: SHF_ALLOC (0x2)
+; LINUX-NEXT: ]
+; LINUX-NEXT: Address: 0x0
+; LINUX-NEXT: Offset: 0x68
+; LINUX-NEXT: Size: 64
+; LINUX-NEXT: Link: 0
+; LINUX-NEXT: Info: 0
+; LINUX-NEXT: AddressAlignment: 4
+; LINUX-NEXT: EntrySize: 0
+; LINUX-NEXT: Relocations [
+; LINUX-NEXT: ]
+; LINUX-NEXT: SectionData (
+; LINUX-NEXT: 0000: 1C000000 00000000 017A504C 5200017C |.........zPLR..||
+; LINUX-NEXT: 0010: 08070000 00000000 1B0C0404 88010000 |................|
+; LINUX-NEXT: 0020: 1C000000 24000000 00000000 1D000000 |....$...........|
+; LINUX-NEXT: 0030: 04000000 00410E08 8502420D 05432E10 |.....A....B..C..|
+; LINUX-NEXT: )
+
+declare i32 @__gxx_personality_v0(...)
+declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
+
+define void @test() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ invoke void @good(i32 1, i32 2, i32 3, i32 4)
+ to label %continue unwind label %cleanup
+continue:
+ ret void
+cleanup:
+ landingpad { i8*, i32 }
+ cleanup
+ ret void
+}
+
+attributes #0 = { optsize "no-frame-pointer-elim"="true" }
diff --git a/test/CodeGen/X86/push-cfi.ll b/test/CodeGen/X86/push-cfi.ll
new file mode 100644
index 000000000000..6389708f42cc
--- /dev/null
+++ b/test/CodeGen/X86/push-cfi.ll
@@ -0,0 +1,304 @@
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX -check-prefix=CHECK
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=DARWIN -check-prefix=CHECK
+
+declare i32 @__gxx_personality_v0(...)
+declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
+declare void @large(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f)
+declare void @empty()
+
+; When we use an invoke, we expect a .cfi_escape GNU_ARGS_SIZE
+; with size 16 before the invocation. Without FP, we also expect
+; .cfi_adjust_cfa_offset after each push.
+; Darwin should not generate pushes in either circumstance.
+; CHECK-LABEL: test1_nofp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl $4
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl $3
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl $2
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl $1
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_adjust_cfa_offset -16
+; DARWIN-NOT: .cfi_escape
+; DARWIN-NOT: pushl
+define void @test1_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ invoke void @good(i32 1, i32 2, i32 3, i32 4)
+ to label %continue unwind label %cleanup
+continue:
+ ret void
+cleanup:
+ landingpad { i8*, i32 }
+ cleanup
+ ret void
+}
+
+; CHECK-LABEL: test1_fp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl $4
+; LINUX-NEXT: pushl $3
+; LINUX-NEXT: pushl $2
+; LINUX-NEXT: pushl $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; DARWIN: pushl %ebp
+; DARWIN-NOT: .cfi_escape
+; DARWIN-NOT: pushl
+define void @test1_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ invoke void @good(i32 1, i32 2, i32 3, i32 4)
+ to label %continue unwind label %cleanup
+continue:
+ ret void
+cleanup:
+ landingpad { i8*, i32 }
+ cleanup
+ ret void
+}
+
+; If the function has no handlers, we don't need to generate GNU_ARGS_SIZE,
+; even if it has an unwind table. Without FP, we still need cfi_adjust_cfa_offset,
+; so darwin should not generate pushes.
+; CHECK-LABEL: test2_nofp:
+; LINUX-NOT: .cfi_escape
+; LINUX: pushl $4
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl $3
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl $2
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl $1
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_adjust_cfa_offset -16
+; DARWIN-NOT: .cfi_escape
+; DARWIN-NOT: pushl
+define void @test2_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ call void @good(i32 1, i32 2, i32 3, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test2_fp:
+; CHECK-NOT: .cfi_escape
+; CHECK-NOT: .cfi_adjust_cfa_offset
+; CHECK: pushl $4
+; CHECK-NEXT: pushl $3
+; CHECK-NEXT: pushl $2
+; CHECK-NEXT: pushl $1
+; CHECK-NEXT: call
+; CHECK-NEXT: addl $24, %esp
+define void @test2_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ call void @good(i32 1, i32 2, i32 3, i32 4)
+ ret void
+}
+
+; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or
+; cfi_adjust_cfa_offset.
+; CHECK-LABEL: test3_nofp:
+; LINUX-NOT: .cfi_escape
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX-NOT: pushl
+; LINUX: retl
+define void @test3_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ invoke void @empty()
+ to label %continue unwind label %cleanup
+continue:
+ ret void
+cleanup:
+ landingpad { i8*, i32 }
+ cleanup
+ ret void
+}
+
+; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or
+; cfi_adjust_cfa_offset.
+; CHECK-LABEL: test3_fp:
+; LINUX: pushl %ebp
+; LINUX-NOT: .cfi_escape
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX-NOT: pushl
+; LINUX: retl
+define void @test3_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ invoke void @empty()
+ to label %continue unwind label %cleanup
+continue:
+ ret void
+cleanup:
+ landingpad { i8*, i32 }
+ cleanup
+ ret void
+}
+
+; Different sized stacks need different GNU_ARGS_SIZEs
+; CHECK-LABEL: test4:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl $4
+; LINUX-NEXT: pushl $3
+; LINUX-NEXT: pushl $2
+; LINUX-NEXT: pushl $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_escape 0x2e, 0x20
+; LINUX: subl $8, %esp
+; LINUX-NEXT: pushl $11
+; LINUX-NEXT: pushl $10
+; LINUX-NEXT: pushl $9
+; LINUX-NEXT: pushl $8
+; LINUX-NEXT: pushl $7
+; LINUX-NEXT: pushl $6
+; LINUX-NEXT: calll large
+; LINUX-NEXT: addl $32, %esp
+define void @test4() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ invoke void @good(i32 1, i32 2, i32 3, i32 4)
+ to label %continue1 unwind label %cleanup
+continue1:
+ invoke void @large(i32 6, i32 7, i32 8, i32 9, i32 10, i32 11)
+ to label %continue2 unwind label %cleanup
+continue2:
+ ret void
+cleanup:
+ landingpad { i8*, i32 }
+ cleanup
+ ret void
+}
+
+; If we did use pushes, we need to reset GNU_ARGS_SIZE before a call
+; without parameters, but don't need to adjust the cfa offset
+; CHECK-LABEL: test5_nofp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl $4
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl $3
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl $2
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl $1
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_adjust_cfa_offset -16
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX: .cfi_escape 0x2e, 0x00
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX: call
+define void @test5_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ invoke void @good(i32 1, i32 2, i32 3, i32 4)
+ to label %continue1 unwind label %cleanup
+continue1:
+ invoke void @empty()
+ to label %continue2 unwind label %cleanup
+continue2:
+ ret void
+cleanup:
+ landingpad { i8*, i32 }
+ cleanup
+ ret void
+}
+
+; CHECK-LABEL: test5_fp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl $4
+; LINUX-NEXT: pushl $3
+; LINUX-NEXT: pushl $2
+; LINUX-NEXT: pushl $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_escape 0x2e, 0x00
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX: call
+define void @test5_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ invoke void @good(i32 1, i32 2, i32 3, i32 4)
+ to label %continue1 unwind label %cleanup
+continue1:
+ invoke void @empty()
+ to label %continue2 unwind label %cleanup
+continue2:
+ ret void
+cleanup:
+ landingpad { i8*, i32 }
+ cleanup
+ ret void
+}
+
+; FIXME: This is actually inefficient - we don't need to repeat the .cfi_escape twice.
+; CHECK-LABEL: test6:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX: call
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX: call
+define void @test6() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ invoke void @good(i32 1, i32 2, i32 3, i32 4)
+ to label %continue1 unwind label %cleanup
+continue1:
+ invoke void @good(i32 5, i32 6, i32 7, i32 8)
+ to label %continue2 unwind label %cleanup
+continue2:
+ ret void
+cleanup:
+ landingpad { i8*, i32 }
+ cleanup
+ ret void
+}
+
+; Darwin should generate pushes in the presense of FP and an unwind table,
+; but not FP and invoke.
+; CHECK-LABEL: test7:
+; DARWIN: pushl %ebp
+; DARWIN: movl %esp, %ebp
+; DARWIN: .cfi_def_cfa_register %ebp
+; DARWIN-NOT: .cfi_adjust_cfa_offset
+; DARWIN: pushl $4
+; DARWIN-NEXT: pushl $3
+; DARWIN-NEXT: pushl $2
+; DARWIN-NEXT: pushl $1
+; DARWIN-NEXT: call
+define void @test7() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ call void @good(i32 1, i32 2, i32 3, i32 4)
+ ret void
+}
+
+; CHECK-LABEL: test8:
+; DARWIN: pushl %ebp
+; DARWIN: movl %esp, %ebp
+; DARWIN-NOT: .cfi_adjust_cfa_offset
+; DARWIN-NOT: pushl
+define void @test8() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ invoke void @good(i32 1, i32 2, i32 3, i32 4)
+ to label %continue unwind label %cleanup
+continue:
+ ret void
+cleanup:
+ landingpad { i8*, i32 }
+ cleanup
+ ret void
+}
+
+attributes #0 = { optsize }
+attributes #1 = { optsize "no-frame-pointer-elim"="true" }
diff --git a/test/CodeGen/X86/ragreedy-hoist-spill.ll b/test/CodeGen/X86/ragreedy-hoist-spill.ll
index e7dda5349568..46b65bd24fc0 100644
--- a/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s
-; This testing case is reduced from 254.gap SyFgets funciton.
+; This testing case is reduced from 254.gap SyFgets function.
; We make sure a spill is not hoisted to a hotter outer loop.
%struct.TMP.1 = type { %struct.TMP.2*, %struct.TMP.2*, [1024 x i8] }
diff --git a/test/CodeGen/X86/rem_crash.ll b/test/CodeGen/X86/rem_crash.ll
new file mode 100644
index 000000000000..8363b22ab65f
--- /dev/null
+++ b/test/CodeGen/X86/rem_crash.ll
@@ -0,0 +1,257 @@
+; RUN: llc < %s
+
+define i8 @test_minsize_uu8(i8 %x) minsize optsize {
+entry:
+ %0 = udiv i8 %x, 10
+ %1 = urem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i8 @test_minsize_ss8(i8 %x) minsize optsize {
+entry:
+ %0 = sdiv i8 %x, 10
+ %1 = srem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i8 @test_minsize_us8(i8 %x) minsize optsize {
+entry:
+ %0 = udiv i8 %x, 10
+ %1 = srem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i8 @test_minsize_su8(i8 %x) minsize optsize {
+entry:
+ %0 = sdiv i8 %x, 10
+ %1 = urem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i16 @test_minsize_uu16(i16 %x) minsize optsize {
+entry:
+ %0 = udiv i16 %x, 10
+ %1 = urem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i16 @test_minsize_ss16(i16 %x) minsize optsize {
+entry:
+ %0 = sdiv i16 %x, 10
+ %1 = srem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i16 @test_minsize_us16(i16 %x) minsize optsize {
+entry:
+ %0 = udiv i16 %x, 10
+ %1 = srem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i16 @test_minsize_su16(i16 %x) minsize optsize {
+entry:
+ %0 = sdiv i16 %x, 10
+ %1 = urem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i32 @test_minsize_uu32(i32 %x) minsize optsize {
+entry:
+ %0 = udiv i32 %x, 10
+ %1 = urem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i32 @test_minsize_ss32(i32 %x) minsize optsize {
+entry:
+ %0 = sdiv i32 %x, 10
+ %1 = srem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i32 @test_minsize_us32(i32 %x) minsize optsize {
+entry:
+ %0 = udiv i32 %x, 10
+ %1 = srem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i32 @test_minsize_su32(i32 %x) minsize optsize {
+entry:
+ %0 = sdiv i32 %x, 10
+ %1 = urem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i64 @test_minsize_uu64(i64 %x) minsize optsize {
+entry:
+ %0 = udiv i64 %x, 10
+ %1 = urem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i64 @test_minsize_ss64(i64 %x) minsize optsize {
+entry:
+ %0 = sdiv i64 %x, 10
+ %1 = srem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i64 @test_minsize_us64(i64 %x) minsize optsize {
+entry:
+ %0 = udiv i64 %x, 10
+ %1 = srem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i64 @test_minsize_su64(i64 %x) minsize optsize {
+entry:
+ %0 = sdiv i64 %x, 10
+ %1 = urem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i8 @test_uu8(i8 %x) optsize {
+entry:
+ %0 = udiv i8 %x, 10
+ %1 = urem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i8 @test_ss8(i8 %x) optsize {
+entry:
+ %0 = sdiv i8 %x, 10
+ %1 = srem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i8 @test_us8(i8 %x) optsize {
+entry:
+ %0 = udiv i8 %x, 10
+ %1 = srem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i8 @test_su8(i8 %x) optsize {
+entry:
+ %0 = sdiv i8 %x, 10
+ %1 = urem i8 %x, 10
+ %res = add i8 %0, %1
+ ret i8 %res
+}
+
+define i16 @test_uu16(i16 %x) optsize {
+entry:
+ %0 = udiv i16 %x, 10
+ %1 = urem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i16 @test_ss16(i16 %x) optsize {
+entry:
+ %0 = sdiv i16 %x, 10
+ %1 = srem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i16 @test_us16(i16 %x) optsize {
+entry:
+ %0 = udiv i16 %x, 10
+ %1 = srem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i16 @test_su16(i16 %x) optsize {
+entry:
+ %0 = sdiv i16 %x, 10
+ %1 = urem i16 %x, 10
+ %res = add i16 %0, %1
+ ret i16 %res
+}
+
+define i32 @test_uu32(i32 %x) optsize {
+entry:
+ %0 = udiv i32 %x, 10
+ %1 = urem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i32 @test_ss32(i32 %x) optsize {
+entry:
+ %0 = sdiv i32 %x, 10
+ %1 = srem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i32 @test_us32(i32 %x) optsize {
+entry:
+ %0 = udiv i32 %x, 10
+ %1 = srem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i32 @test_su32(i32 %x) optsize {
+entry:
+ %0 = sdiv i32 %x, 10
+ %1 = urem i32 %x, 10
+ %res = add i32 %0, %1
+ ret i32 %res
+}
+
+define i64 @test_uu64(i64 %x) optsize {
+entry:
+ %0 = udiv i64 %x, 10
+ %1 = urem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i64 @test_ss64(i64 %x) optsize {
+entry:
+ %0 = sdiv i64 %x, 10
+ %1 = srem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i64 @test_us64(i64 %x) optsize {
+entry:
+ %0 = udiv i64 %x, 10
+ %1 = srem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
+
+define i64 @test_su64(i64 %x) optsize {
+entry:
+ %0 = sdiv i64 %x, 10
+ %1 = urem i64 %x, 10
+ %res = add i64 %0, %1
+ ret i64 %res
+}
diff --git a/test/CodeGen/X86/remat-invalid-liveness.ll b/test/CodeGen/X86/remat-invalid-liveness.ll
deleted file mode 100644
index c6b43b0dd3e4..000000000000
--- a/test/CodeGen/X86/remat-invalid-liveness.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: llc %s -mcpu=core2 -o - | FileCheck %s
-; This test was failing while tracking the liveness in the register scavenger
-; during the branching folding pass. The allocation of the subregisters was
-; incorrect.
-; I.e., the faulty pattern looked like:
-; CH = movb 64
-; ECX = movl 3 <- CH was killed here.
-; CH = subb CH, ...
-;
-; This reduced test case triggers the crash before the fix, but does not
-; strictly speaking check that the resulting code is correct.
-; To check that the code is actually correct we would need to check the
-; liveness of the produced code.
-;
-; Currently, we check that after ECX = movl 3, we do not have subb CH,
-; whereas CH could have been redefine in between and that would have been
-; totally fine.
-; <rdar://problem/16582185>
-target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
-target triple = "i386-apple-macosx10.9"
-
-%struct.A = type { %struct.B, %struct.C, %struct.D*, [1 x i8*] }
-%struct.B = type { i32, [4 x i8] }
-%struct.C = type { i128 }
-%struct.D = type { {}*, [0 x i32] }
-%union.E = type { i32 }
-
-; CHECK-LABEL: __XXX1:
-; CHECK: movl $3, %ecx
-; CHECK-NOT: subb %{{[a-z]+}}, %ch
-; Function Attrs: nounwind optsize ssp
-define fastcc void @__XXX1(%struct.A* %ht) #0 {
-entry:
- %const72 = bitcast i128 72 to i128
- %const3 = bitcast i128 3 to i128
- switch i32 undef, label %if.end196 [
- i32 1, label %sw.bb.i
- i32 3, label %sw.bb2.i
- ]
-
-sw.bb.i: ; preds = %entry
- %call.i.i.i = tail call i32 undef(%struct.A* %ht, i8 zeroext 22, i32 undef, i32 0, %struct.D* undef)
- %bf.load.i.i = load i128, i128* undef, align 4
- %bf.lshr.i.i = lshr i128 %bf.load.i.i, %const72
- %shl1.i.i = shl nuw nsw i128 %bf.lshr.i.i, 8
- %shl.i.i = trunc i128 %shl1.i.i to i32
- br i1 undef, label %cond.false10.i.i, label %__XXX2.exit.i.i
-
-__XXX2.exit.i.i: ; preds = %sw.bb.i
- %extract11.i.i.i = lshr i128 %bf.load.i.i, %const3
- %extract.t12.i.i.i = trunc i128 %extract11.i.i.i to i32
- %bf.cast7.i.i.i = and i32 %extract.t12.i.i.i, 3
- %arrayidx.i.i.i = getelementptr inbounds %struct.A, %struct.A* %ht, i32 0, i32 3, i32 %bf.cast7.i.i.i
- br label %cond.end12.i.i
-
-cond.false10.i.i: ; preds = %sw.bb.i
- %arrayidx.i6.i.i = getelementptr inbounds %struct.A, %struct.A* %ht, i32 0, i32 3, i32 0
- br label %cond.end12.i.i
-
-cond.end12.i.i: ; preds = %cond.false10.i.i, %__XXX2.exit.i.i
- %.sink.in.i.i = phi i8** [ %arrayidx.i.i.i, %__XXX2.exit.i.i ], [ %arrayidx.i6.i.i, %cond.false10.i.i ]
- %.sink.i.i = load i8*, i8** %.sink.in.i.i, align 4
- %tmp = bitcast i8* %.sink.i.i to %union.E*
- br i1 undef, label %for.body.i.i, label %if.end196
-
-for.body.i.i: ; preds = %for.body.i.i, %cond.end12.i.i
- %weak.i.i = getelementptr inbounds %union.E, %union.E* %tmp, i32 undef, i32 0
- %tmp1 = load i32, i32* %weak.i.i, align 4
- %cmp36.i.i = icmp ne i32 %tmp1, %shl.i.i
- %or.cond = and i1 %cmp36.i.i, false
- br i1 %or.cond, label %for.body.i.i, label %if.end196
-
-sw.bb2.i: ; preds = %entry
- %bf.lshr.i85.i = lshr i128 undef, %const72
- br i1 undef, label %if.end196, label %__XXX2.exit.i95.i
-
-__XXX2.exit.i95.i: ; preds = %sw.bb2.i
- %extract11.i.i91.i = lshr i128 undef, %const3
- br label %if.end196
-
-if.end196: ; preds = %__XXX2.exit.i95.i, %sw.bb2.i, %for.body.i.i, %cond.end12.i.i, %entry
- ret void
-}
-
-attributes #0 = { nounwind optsize ssp "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
diff --git a/test/CodeGen/X86/rodata-relocs.ll b/test/CodeGen/X86/rodata-relocs.ll
index 9228ea1f621f..6379ef1bf737 100644
--- a/test/CodeGen/X86/rodata-relocs.ll
+++ b/test/CodeGen/X86/rodata-relocs.ll
@@ -32,15 +32,15 @@ target triple = "x86_64-unknown-linux-gnu"
; PIC: .section .rodata.cst16,"aM",@progbits,16
; PIC: e:
; PIC: e1:
-; PIC: .section .data.rel.ro.local,"aw",@progbits
+; PIC: .section .data.rel.ro,"aw",@progbits
; PIC: p:
; PIC: t:
-; PIC: .section .data.rel.ro,"aw",@progbits
+; PIC-NOT: .section
; PIC: p1:
; PIC: t1:
-; PIC: .section .data.rel,"aw",@progbits
+; PIC: .data
; PIC: p2:
; PIC: t2:
-; PIC: .section .data.rel.local,"aw",@progbits
+; PIC-NOT: .section
; PIC: p3:
; PIC: t3:
diff --git a/test/CodeGen/X86/rounding-ops.ll b/test/CodeGen/X86/rounding-ops.ll
index 69f4bfb9f47d..15a11d1d6a96 100644
--- a/test/CodeGen/X86/rounding-ops.ll
+++ b/test/CodeGen/X86/rounding-ops.ll
@@ -6,10 +6,10 @@ define float @test1(float %x) nounwind {
ret float %call
; CHECK-SSE-LABEL: test1:
-; CHECK-SSE: roundss $1
+; CHECK-SSE: roundss $9
; CHECK-AVX-LABEL: test1:
-; CHECK-AVX: vroundss $1
+; CHECK-AVX: vroundss $9
}
declare float @floorf(float) nounwind readnone
@@ -19,10 +19,10 @@ define double @test2(double %x) nounwind {
ret double %call
; CHECK-SSE-LABEL: test2:
-; CHECK-SSE: roundsd $1
+; CHECK-SSE: roundsd $9
; CHECK-AVX-LABEL: test2:
-; CHECK-AVX: vroundsd $1
+; CHECK-AVX: vroundsd $9
}
declare double @floor(double) nounwind readnone
@@ -58,10 +58,10 @@ define float @test5(float %x) nounwind {
ret float %call
; CHECK-SSE-LABEL: test5:
-; CHECK-SSE: roundss $2
+; CHECK-SSE: roundss $10
; CHECK-AVX-LABEL: test5:
-; CHECK-AVX: vroundss $2
+; CHECK-AVX: vroundss $10
}
declare float @ceilf(float) nounwind readnone
@@ -71,10 +71,10 @@ define double @test6(double %x) nounwind {
ret double %call
; CHECK-SSE-LABEL: test6:
-; CHECK-SSE: roundsd $2
+; CHECK-SSE: roundsd $10
; CHECK-AVX-LABEL: test6:
-; CHECK-AVX: vroundsd $2
+; CHECK-AVX: vroundsd $10
}
declare double @ceil(double) nounwind readnone
@@ -110,10 +110,10 @@ define float @test9(float %x) nounwind {
ret float %call
; CHECK-SSE-LABEL: test9:
-; CHECK-SSE: roundss $3
+; CHECK-SSE: roundss $11
; CHECK-AVX-LABEL: test9:
-; CHECK-AVX: vroundss $3
+; CHECK-AVX: vroundss $11
}
declare float @truncf(float) nounwind readnone
@@ -123,10 +123,10 @@ define double @test10(double %x) nounwind {
ret double %call
; CHECK-SSE-LABEL: test10:
-; CHECK-SSE: roundsd $3
+; CHECK-SSE: roundsd $11
; CHECK-AVX-LABEL: test10:
-; CHECK-AVX: vroundsd $3
+; CHECK-AVX: vroundsd $11
}
declare double @trunc(double) nounwind readnone
diff --git a/test/CodeGen/X86/safestack.ll b/test/CodeGen/X86/safestack.ll
new file mode 100644
index 000000000000..1ff9a050aefb
--- /dev/null
+++ b/test/CodeGen/X86/safestack.ll
@@ -0,0 +1,32 @@
+; RUN: llc -mtriple=i386-linux < %s -o - | FileCheck --check-prefix=LINUX-I386 %s
+; RUN: llc -mtriple=x86_64-linux < %s -o - | FileCheck --check-prefix=LINUX-X64 %s
+; RUN: llc -mtriple=i386-linux-android < %s -o - | FileCheck --check-prefix=ANDROID-I386 %s
+; RUN: llc -mtriple=x86_64-linux-android < %s -o - | FileCheck --check-prefix=ANDROID-X64 %s
+
+define void @_Z1fv() safestack {
+entry:
+ %x = alloca i32, align 4
+ %0 = bitcast i32* %x to i8*
+ call void @_Z7CapturePi(i32* nonnull %x)
+ ret void
+}
+
+declare void @_Z7CapturePi(i32*)
+
+; LINUX-X64: movq __safestack_unsafe_stack_ptr@GOTTPOFF(%rip), %[[A:.*]]
+; LINUX-X64: movq %fs:(%[[A]]), %[[B:.*]]
+; LINUX-X64: leaq -16(%[[B]]), %[[C:.*]]
+; LINUX-X64: movq %[[C]], %fs:(%[[A]])
+
+; LINUX-I386: movl __safestack_unsafe_stack_ptr@INDNTPOFF, %[[A:.*]]
+; LINUX-I386: movl %gs:(%[[A]]), %[[B:.*]]
+; LINUX-I386: leal -16(%[[B]]), %[[C:.*]]
+; LINUX-I386: movl %[[C]], %gs:(%[[A]])
+
+; ANDROID-I386: movl %gs:36, %[[A:.*]]
+; ANDROID-I386: leal -16(%[[A]]), %[[B:.*]]
+; ANDROID-I386: movl %[[B]], %gs:36
+
+; ANDROID-X64: movq %fs:72, %[[A:.*]]
+; ANDROID-X64: leaq -16(%[[A]]), %[[B:.*]]
+; ANDROID-X64: movq %[[B]], %fs:72
diff --git a/test/CodeGen/X86/sar_fold.ll b/test/CodeGen/X86/sar_fold.ll
new file mode 100644
index 000000000000..bd0d0c7057d3
--- /dev/null
+++ b/test/CodeGen/X86/sar_fold.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s
+
+define i32 @shl16sar15(i32 %a) #0 {
+; CHECK-LABEL: shl16sar15:
+; CHECK: # BB#0:
+; CHECK-NEXT: movswl {{[0-9]+}}(%esp), %eax
+ %1 = shl i32 %a, 16
+ %2 = ashr exact i32 %1, 15
+ ret i32 %2
+}
+
+define i32 @shl16sar17(i32 %a) #0 {
+; CHECK-LABEL: shl16sar17:
+; CHECK: # BB#0:
+; CHECK-NEXT: movswl {{[0-9]+}}(%esp), %eax
+ %1 = shl i32 %a, 16
+ %2 = ashr exact i32 %1, 17
+ ret i32 %2
+}
+
+define i32 @shl24sar23(i32 %a) #0 {
+; CHECK-LABEL: shl24sar23:
+; CHECK: # BB#0:
+; CHECK-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+ %1 = shl i32 %a, 24
+ %2 = ashr exact i32 %1, 23
+ ret i32 %2
+}
+
+define i32 @shl24sar25(i32 %a) #0 {
+; CHECK-LABEL: shl24sar25:
+; CHECK: # BB#0:
+; CHECK-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+ %1 = shl i32 %a, 24
+ %2 = ashr exact i32 %1, 25
+ ret i32 %2
+}
diff --git a/test/CodeGen/X86/sar_fold64.ll b/test/CodeGen/X86/sar_fold64.ll
new file mode 100644
index 000000000000..7b33bb8c0616
--- /dev/null
+++ b/test/CodeGen/X86/sar_fold64.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+define i32 @shl48sar47(i64 %a) #0 {
+; CHECK-LABEL: shl48sar47:
+; CHECK: # BB#0:
+; CHECK-NEXT: movswq %di, %rax
+ %1 = shl i64 %a, 48
+ %2 = ashr exact i64 %1, 47
+ %3 = trunc i64 %2 to i32
+ ret i32 %3
+}
+
+define i32 @shl48sar49(i64 %a) #0 {
+; CHECK-LABEL: shl48sar49:
+; CHECK: # BB#0:
+; CHECK-NEXT: movswq %di, %rax
+ %1 = shl i64 %a, 48
+ %2 = ashr exact i64 %1, 49
+ %3 = trunc i64 %2 to i32
+ ret i32 %3
+}
+
+define i32 @shl56sar55(i64 %a) #0 {
+; CHECK-LABEL: shl56sar55:
+; CHECK: # BB#0:
+; CHECK-NEXT: movsbq %dil, %rax
+ %1 = shl i64 %a, 56
+ %2 = ashr exact i64 %1, 55
+ %3 = trunc i64 %2 to i32
+ ret i32 %3
+}
+
+define i32 @shl56sar57(i64 %a) #0 {
+; CHECK-LABEL: shl56sar57:
+; CHECK: # BB#0:
+; CHECK-NEXT: movsbq %dil, %rax
+ %1 = shl i64 %a, 56
+ %2 = ashr exact i64 %1, 57
+ %3 = trunc i64 %2 to i32
+ ret i32 %3
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/scalar-fp-to-i64.ll b/test/CodeGen/X86/scalar-fp-to-i64.ll
new file mode 100644
index 000000000000..d112d2340bdb
--- /dev/null
+++ b/test/CodeGen/X86/scalar-fp-to-i64.ll
@@ -0,0 +1,151 @@
+; Check that scalar FP conversions to signed and unsigned int64 are using
+; reasonable sequences, across platforms and target switches.
+;
+; The signed case is straight forward, and the tests here basically
+; ensure successful compilation (f80 with avx512 was broken at one point).
+;
+; For the unsigned case there are many possible sequences, so to avoid
+; a fragile test we just check for the presence of a few key instructions.
+; AVX512 on Intel64 can use vcvtts[ds]2usi directly for float and double.
+; Otherwise the sequence will involve an FP subtract (fsub, subss or subsd),
+; and a truncating conversion (cvtts[ds]2si, fisttp, or fnstcw+fist). When
+; both a subtract and fnstcw are needed, they can occur in either order.
+;
+; The interesting subtargets are AVX512F (vcvtts[ds]2usi), SSE3 (fisttp),
+; SSE2 (cvtts[ds]2si) and vanilla X87 (fnstcw+fist, 32-bit only).
+;
+; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64
+; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_32
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_32
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_64
+; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64
+; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=-sse | FileCheck %s --check-prefix=CHECK --check-prefix=X87
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=-sse | FileCheck %s --check-prefix=CHECK --check-prefix=X87
+
+; CHECK-LABEL: f_to_u64
+; X87-DAG: fsub
+; X87-DAG: fnstcw
+; X87: fist
+; SSE2_32-DAG: {{subss|fsub}}
+; SSE2_32-DAG: fnstcw
+; SSE2_32: fist
+; SSE2_64: subss
+; SSE2_64: cvttss2si
+; SSE3_32: {{subss|fsub}}
+; SSE3_32: fistt
+; SSE3_64: subss
+; SSE3_64: cvttss2si
+; AVX512_32: {{subss|fsub}}
+; AVX512_32: fistt
+; AVX512_64: vcvttss2usi
+; CHECK: ret
+define i64 @f_to_u64(float %a) nounwind {
+ %r = fptoui float %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: f_to_s64
+; X87: fnstcw
+; X87: fist
+; SSE2_32: fnstcw
+; SSE2_32: fist
+; SSE2_64: cvttss2si
+; SSE3_32: fistt
+; SSE3_64: cvttss2si
+; AVX512_32: fistt
+; AVX512_64: vcvttss2si
+; CHECK: ret
+define i64 @f_to_s64(float %a) nounwind {
+ %r = fptosi float %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: d_to_u64
+; X87-DAG: fsub
+; X87-DAG: fnstcw
+; X87: fist
+; SSE2_32-DAG: {{subsd|fsub}}
+; SSE2_32-DAG: fnstcw
+; SSE2_32: fist
+; SSE2_64: subsd
+; SSE2_64: cvttsd2si
+; SSE3_32: {{subsd|fsub}}
+; SSE3_32: fistt
+; SSE3_64: subsd
+; SSE3_64: cvttsd2si
+; AVX512_32: {{subsd|fsub}}
+; AVX512_32: fistt
+; AVX512_64: vcvttsd2usi
+; CHECK: ret
+define i64 @d_to_u64(double %a) nounwind {
+ %r = fptoui double %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: d_to_s64
+; X87: fnstcw
+; X87: fist
+; SSE2_32: fnstcw
+; SSE2_32: fist
+; SSE2_64: cvttsd2si
+; SSE3_32: fistt
+; SSE3_64: cvttsd2si
+; AVX512_32: fistt
+; AVX512_64: vcvttsd2si
+; CHECK: ret
+define i64 @d_to_s64(double %a) nounwind {
+ %r = fptosi double %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: x_to_u64
+; CHECK-DAG: fsub
+; X87-DAG: fnstcw
+; SSE2_32-DAG: fnstcw
+; SSE2_64-DAG: fnstcw
+; CHECK: fist
+; CHECK: ret
+define i64 @x_to_u64(x86_fp80 %a) nounwind {
+ %r = fptoui x86_fp80 %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: x_to_s64
+; X87: fnstcw
+; X87: fist
+; SSE2_32: fnstcw
+; SSE2_32: fist
+; SSE2_64: fnstcw
+; SSE2_64: fist
+; SSE3_32: fistt
+; SSE3_64: fistt
+; AVX512_32: fistt
+; AVX512_64: fistt
+; CHECK: ret
+define i64 @x_to_s64(x86_fp80 %a) nounwind {
+ %r = fptosi x86_fp80 %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: t_to_u64
+; CHECK: __fixunstfdi
+; CHECK: ret
+define i64 @t_to_u64(fp128 %a) nounwind {
+ %r = fptoui fp128 %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: t_to_s64
+; CHECK: __fixtfdi
+; CHECK: ret
+define i64 @t_to_s64(fp128 %a) nounwind {
+ %r = fptosi fp128 %a to i64
+ ret i64 %r
+}
diff --git a/test/CodeGen/X86/scalar-int-to-fp.ll b/test/CodeGen/X86/scalar-int-to-fp.ll
new file mode 100644
index 000000000000..93039859cdfb
--- /dev/null
+++ b/test/CodeGen/X86/scalar-int-to-fp.ll
@@ -0,0 +1,132 @@
+; Verify that scalar integer conversions to FP compile successfully
+; (at one time long double failed with avx512f), and that reasonable
+; instruction sequences are selected based on subtarget features.
+; Due to the plethora of reasonable sequences we just check for
+; one key instruction, usually a cvt or fild, allowing the test
+; to be relatively easily updated when sequences are improved.
+;
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=-sse | FileCheck %s --check-prefix=CHECK --check-prefix=X87
+
+; CHECK-LABEL: u32_to_f
+; AVX512_32: vcvtusi2ssl
+; AVX512_64: vcvtusi2ssl
+; SSE2_32: cvtsd2ss
+; SSE2_64: cvtsi2ssq
+; X87: fildll
+define float @u32_to_f(i32 %a) nounwind {
+ %r = uitofp i32 %a to float
+ ret float %r
+}
+
+; CHECK-LABEL: s32_to_f
+; AVX512_32: vcvtsi2ssl
+; AVX512_64: vcvtsi2ssl
+; SSE2_32: cvtsi2ssl
+; SSE2_64: cvtsi2ssl
+; X87: fildl
+define float @s32_to_f(i32 %a) nounwind {
+ %r = sitofp i32 %a to float
+ ret float %r
+}
+
+; CHECK-LABEL: u32_to_d
+; AVX512_32: vcvtusi2sdl
+; AVX512_64: vcvtusi2sdl
+; SSE2_32: subsd
+; SSE2_64: cvtsi2sdq
+; X87: fildll
+define double @u32_to_d(i32 %a) nounwind {
+ %r = uitofp i32 %a to double
+ ret double %r
+}
+
+; CHECK-LABEL: s32_to_d
+; AVX512_32: vcvtsi2sdl
+; AVX512_64: vcvtsi2sdl
+; SSE2_32: cvtsi2sdl
+; SSE2_64: cvtsi2sdl
+; X87: fildl
+define double @s32_to_d(i32 %a) nounwind {
+ %r = sitofp i32 %a to double
+ ret double %r
+}
+
+; CHECK-LABEL: u32_to_x
+; AVX512_32: vsubsd
+; AVX512_64: vsubsd
+; SSE2_32: subsd
+; SSE2_64: fildll
+; X87: fildll
+define x86_fp80 @u32_to_x(i32 %a) nounwind {
+ %r = uitofp i32 %a to x86_fp80
+ ret x86_fp80 %r
+}
+
+; CHECK-LABEL: s32_to_x
+; CHECK: fildl
+define x86_fp80 @s32_to_x(i32 %a) nounwind {
+ %r = sitofp i32 %a to x86_fp80
+ ret x86_fp80 %r
+}
+
+; CHECK-LABEL: u64_to_f
+; AVX512_32: fildll
+; AVX512_64: vcvtusi2ssq
+; SSE2_32: fildll
+; SSE2_64: cvtsi2ssq
+; X87: fildll
+define float @u64_to_f(i64 %a) nounwind {
+ %r = uitofp i64 %a to float
+ ret float %r
+}
+
+; CHECK-LABEL: s64_to_f
+; AVX512_32: fildll
+; AVX512_64: vcvtsi2ssq
+; SSE2_32: fildll
+; SSE2_64: cvtsi2ssq
+; X87: fildll
+define float @s64_to_f(i64 %a) nounwind {
+ %r = sitofp i64 %a to float
+ ret float %r
+}
+
+; CHECK-LABEL: u64_to_d
+; AVX512_32: vpunpckldq
+; AVX512_64: vcvtusi2sdq
+; SSE2_32: punpckldq
+; SSE2_64: punpckldq
+; X87: fildll
+define double @u64_to_d(i64 %a) nounwind {
+ %r = uitofp i64 %a to double
+ ret double %r
+}
+
+; CHECK-LABEL: s64_to_d
+; AVX512_32: fildll
+; AVX512_64: vcvtsi2sdq
+; SSE2_32: fildll
+; SSE2_64: cvtsi2sdq
+; X87: fildll
+define double @s64_to_d(i64 %a) nounwind {
+ %r = sitofp i64 %a to double
+ ret double %r
+}
+
+; CHECK-LABEL: u64_to_x
+; CHECK: fildll
+define x86_fp80 @u64_to_x(i64 %a) nounwind {
+ %r = uitofp i64 %a to x86_fp80
+ ret x86_fp80 %r
+}
+
+; CHECK-LABEL: s64_to_x
+; CHECK: fildll
+define x86_fp80 @s64_to_x(i64 %a) nounwind {
+ %r = sitofp i64 %a to x86_fp80
+ ret x86_fp80 %r
+}
diff --git a/test/CodeGen/X86/sdiv-pow2.ll b/test/CodeGen/X86/sdiv-pow2.ll
new file mode 100644
index 000000000000..e89f76931e18
--- /dev/null
+++ b/test/CodeGen/X86/sdiv-pow2.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+; No attributes, should not use idiv
+define i32 @test1(i32 inreg %x) {
+entry:
+ %div = sdiv i32 %x, 16
+ ret i32 %div
+; CHECK-LABEL: test1:
+; CHECK-NOT: idivl
+; CHECK: ret
+}
+
+; Has minsize (-Oz) attribute, should generate idiv
+define i32 @test2(i32 inreg %x) minsize {
+entry:
+ %div = sdiv i32 %x, 16
+ ret i32 %div
+; CHECK-LABEL: test2:
+; CHECK: idivl
+; CHECK: ret
+}
+
+; Has optsize (-Os) attribute, should not generate idiv
+define i32 @test3(i32 inreg %x) optsize {
+entry:
+ %div = sdiv i32 %x, 16
+ ret i32 %div
+; CHECK-LABEL: test3:
+; CHECK-NOT: idivl
+; CHECK: ret
+}
+
+
diff --git a/test/CodeGen/X86/seh-catch-all-win32.ll b/test/CodeGen/X86/seh-catch-all-win32.ll
index a4ea8ab78c79..e8da7ab971b1 100644
--- a/test/CodeGen/X86/seh-catch-all-win32.ll
+++ b/test/CodeGen/X86/seh-catch-all-win32.ll
@@ -22,23 +22,16 @@ entry:
to label %__try.cont unwind label %lpad
lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* bitcast (i32 ()* @"filt$main" to i8*)
- %1 = extractvalue { i8*, i32 } %0, 1
- %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @"filt$main" to i8*)) #4
- %matches = icmp eq i32 %1, %2
- br i1 %matches, label %__except, label %eh.resume
+ %cs1 = catchswitch within none [label %__except] unwind to caller
__except: ; preds = %lpad
- %3 = load i32, i32* %__exceptioncode, align 4
- %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i32 0, i32 0), i32 %3) #4
- br label %__try.cont
+ %p = catchpad within %cs1 [i8* bitcast (i32 ()* @"filt$main" to i8*)]
+ %code = load i32, i32* %__exceptioncode, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i32 0, i32 0), i32 %code) #4 [ "funclet"(token %p) ]
+ catchret from %p to label %__try.cont
__try.cont: ; preds = %entry, %__except
ret i32 0
-
-eh.resume: ; preds = %lpad
- resume { i8*, i32 } %0
}
define internal i32 @"filt$main"() {
@@ -68,33 +61,31 @@ entry:
; CHECK: pushl %esi
; CHECK: Lmain$frame_escape_0 = [[code_offs:[-0-9]+]]
-; CHECK: Lmain$frame_escape_1 = [[reg_offs:[-0-9]+]]
-; CHECK: movl %esp, [[reg_offs]](%ebp)
+; CHECK: movl %esp, [[reg_offs:[-0-9]+]](%ebp)
; CHECK: movl $L__ehtable$main,
-; EH state 0
+; EH state 0
; CHECK: movl $0, -16(%ebp)
; CHECK: calll _crash
; CHECK: popl %esi
; CHECK: popl %edi
; CHECK: popl %ebx
; CHECK: retl
-; CHECK: # Block address taken
-; stackrestore
+; CHECK: LBB0_[[lpbb:[0-9]+]]: # %__except{{$}}
+; stackrestore
; CHECK: movl -24(%ebp), %esp
-; EH state -1
+; EH state -1
; CHECK: movl [[code_offs]](%ebp), %[[code:[a-z]+]]
-; CHECK: movl $-1, -16(%ebp)
; CHECK-DAG: movl %[[code]], 4(%esp)
; CHECK-DAG: movl $_str, (%esp)
; CHECK: calll _printf
; CHECK: .section .xdata,"dr"
-; CHECK: Lmain$parent_frame_offset = Lmain$frame_escape_1
+; CHECK: Lmain$parent_frame_offset = [[reg_offs]]
; CHECK: .align 4
; CHECK: L__ehtable$main
; CHECK-NEXT: .long -1
; CHECK-NEXT: .long _filt$main
-; CHECK-NEXT: .long Ltmp{{[0-9]+}}
+; CHECK-NEXT: .long LBB0_[[lpbb]]
; CHECK-LABEL: _filt$main:
; CHECK: pushl %ebp
diff --git a/test/CodeGen/X86/seh-catch-all.ll b/test/CodeGen/X86/seh-catch-all.ll
index 1c1a3c2139d6..c6a2e4a1094a 100644
--- a/test/CodeGen/X86/seh-catch-all.ll
+++ b/test/CodeGen/X86/seh-catch-all.ll
@@ -2,6 +2,7 @@
@str = linkonce_odr unnamed_addr constant [27 x i8] c"GetExceptionCode(): 0x%lx\0A\00", align 1
+declare i32 @llvm.eh.exceptioncode(token)
declare i32 @__C_specific_handler(...)
declare void @crash()
declare i32 @printf(i8* nocapture readonly, ...) nounwind
@@ -11,20 +12,17 @@ entry:
invoke void @crash()
to label %__try.cont unwind label %lpad
-lpad:
- %0 = landingpad { i8*, i32 }
- catch i8* null
- %1 = extractvalue { i8*, i32 } %0, 0
- %2 = ptrtoint i8* %1 to i64
- %3 = trunc i64 %2 to i32
- call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i64 0, i64 0), i32 %3)
- br label %__try.cont
-
__try.cont:
ret i32 0
-eh.resume:
- resume { i8*, i32 } %0
+lpad:
+ %cs1 = catchswitch within none [label %catchall] unwind to caller
+
+catchall:
+ %p = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ %code = call i32 @llvm.eh.exceptioncode(token %p)
+ call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i64 0, i64 0), i32 %code) [ "funclet"(token %p) ]
+ catchret from %p to label %__try.cont
}
; Check that we can get the exception code from eax to the printf.
@@ -32,14 +30,17 @@ eh.resume:
; CHECK-LABEL: main:
; CHECK: callq crash
; CHECK: retq
-; CHECK: # Block address taken
+; CHECK: .LBB0_2: # %catchall
; CHECK: leaq str(%rip), %rcx
; CHECK: movl %eax, %edx
; CHECK: callq printf
; CHECK: .seh_handlerdata
-; CHECK-NEXT: .long 1
+; CHECK-NEXT: .Lmain$parent_frame_offset
+; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16
+; CHECK-NEXT: .Llsda_begin0:
; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL
; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL+1
; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL
+; CHECK-NEXT: .long .LBB0_2@IMGREL
+; CHECK-NEXT: .Llsda_end0:
diff --git a/test/CodeGen/X86/seh-catchpad.ll b/test/CodeGen/X86/seh-catchpad.ll
new file mode 100644
index 000000000000..d9b4c5c6bcf5
--- /dev/null
+++ b/test/CodeGen/X86/seh-catchpad.ll
@@ -0,0 +1,198 @@
+; RUN: llc < %s | FileCheck %s
+
+; Based on the source:
+; extern "C" int puts(const char *);
+; extern "C" int printf(const char *, ...);
+; extern "C" int do_div(int a, int b) { return a / b; }
+; extern "C" int filt();
+; int main() {
+; __try {
+; __try {
+; do_div(1, 0);
+; } __except (1) {
+; __try {
+; do_div(1, 0);
+; } __finally {
+; puts("finally");
+; }
+; }
+; } __except (filt()) {
+; puts("caught");
+; }
+; return 0;
+; }
+
+; ModuleID = 't.cpp'
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+$"\01??_C@_07MKBLAIAL@finally?$AA@" = comdat any
+
+$"\01??_C@_06IBDBCMGJ@caught?$AA@" = comdat any
+
+@"\01??_C@_07MKBLAIAL@finally?$AA@" = linkonce_odr unnamed_addr constant [8 x i8] c"finally\00", comdat, align 1
+@"\01??_C@_06IBDBCMGJ@caught?$AA@" = linkonce_odr unnamed_addr constant [7 x i8] c"caught\00", comdat, align 1
+
+; Function Attrs: nounwind readnone
+define i32 @do_div(i32 %a, i32 %b) #0 {
+entry:
+ %div = sdiv i32 %a, %b
+ ret i32 %div
+}
+
+define i32 @main() #1 personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
+entry:
+ %call = invoke i32 @do_div(i32 1, i32 0) #4
+ to label %__try.cont.12 unwind label %catch.dispatch
+
+__except.2: ; preds = %__except
+ %call4 = invoke i32 @do_div(i32 1, i32 0) #4
+ to label %invoke.cont.3 unwind label %ehcleanup
+
+invoke.cont.3: ; preds = %__except.2
+ invoke fastcc void @"\01?fin$0@0@main@@"() #4
+ to label %__try.cont.12 unwind label %catch.dispatch.7
+
+__except.9: ; preds = %__except.ret
+ %call11 = tail call i32 @puts(i8* nonnull getelementptr inbounds ([7 x i8], [7 x i8]* @"\01??_C@_06IBDBCMGJ@caught?$AA@", i64 0, i64 0))
+ br label %__try.cont.12
+
+__try.cont.12: ; preds = %invoke.cont.3, %entry, %__except.9
+ ret i32 0
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %__except] unwind label %catch.dispatch.7
+
+__except: ; preds = %catch.dispatch
+ %cp1 = catchpad within %cs1 [i8* null]
+ catchret from %cp1 to label %__except.2
+
+ehcleanup: ; preds = %__except.2
+ %cp2 = cleanuppad within none []
+ invoke fastcc void @"\01?fin$0@0@main@@"() #4 [ "funclet"(token %cp2) ]
+ to label %invoke.cont.6 unwind label %catch.dispatch.7
+
+invoke.cont.6: ; preds = %ehcleanup
+ cleanupret from %cp2 unwind label %catch.dispatch.7
+
+catch.dispatch.7:
+ %cs2 = catchswitch within none [label %__except.ret] unwind to caller
+
+__except.ret: ; preds = %catch.dispatch.7
+ %cp3 = catchpad within %cs2 [i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@main@@" to i8*)]
+ catchret from %cp3 to label %__except.9
+}
+
+; CHECK: main: # @main
+; CHECK: .seh_proc main
+; CHECK: .seh_handler __C_specific_handler, @unwind, @except
+; CHECK: pushq %rbp
+; CHECK: .seh_pushreg 5
+; CHECK: subq $32, %rsp
+; CHECK: .seh_stackalloc 32
+; CHECK: leaq 32(%rsp), %rbp
+; CHECK: .seh_setframe 5, 32
+; CHECK: .seh_endprologue
+; CHECK: .Ltmp0:
+; CHECK: movl $1, %ecx
+; CHECK: xorl %edx, %edx
+; CHECK: callq do_div
+; CHECK: .Ltmp1:
+; CHECK: .LBB1_[[epilogue:[0-9]+]]: # %__try.cont.12
+; CHECK: xorl %eax, %eax
+; CHECK: addq $32, %rsp
+; CHECK: popq %rbp
+; CHECK: retq
+; CHECK: .LBB1_[[except1bb:[0-9]+]]: # %__except
+; CHECK: .Ltmp2:
+; CHECK: movl $1, %ecx
+; CHECK: xorl %edx, %edx
+; CHECK: callq do_div
+; CHECK: .Ltmp3:
+; CHECK: callq "?fin$0@0@main@@"
+; CHECK: jmp .LBB1_[[epilogue]]
+; CHECK: .LBB1_[[except2bb:[0-9]+]]: # %__except.ret
+; CHECK: leaq "??_C@_06IBDBCMGJ@caught?$AA@"(%rip), %rcx
+; CHECK: callq puts
+; CHECK: jmp .LBB1_[[epilogue]]
+
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .Lmain$parent_frame_offset = 32
+; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16
+; CHECK-NEXT: .Llsda_begin0:
+; CHECK-NEXT: .long .Ltmp0@IMGREL+1
+; CHECK-NEXT: .long .Ltmp1@IMGREL+1
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long .LBB1_[[except1bb]]@IMGREL
+; CHECK-NEXT: .long .Ltmp0@IMGREL+1
+; CHECK-NEXT: .long .Ltmp1@IMGREL+1
+; CHECK-NEXT: .long "?filt$0@0@main@@"@IMGREL
+; CHECK-NEXT: .long .LBB1_[[except2bb]]@IMGREL
+; CHECK-NEXT: .long .Ltmp2@IMGREL+1
+; CHECK-NEXT: .long .Ltmp3@IMGREL+1
+; CHECK-NEXT: .long "?dtor$[[finbb:[0-9]+]]@?0?main@4HA"@IMGREL
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long .Ltmp2@IMGREL+1
+; CHECK-NEXT: .long .Ltmp3@IMGREL+1
+; CHECK-NEXT: .long "?filt$0@0@main@@"@IMGREL
+; CHECK-NEXT: .long .LBB1_3@IMGREL
+; CHECK-NEXT: .long .Ltmp6@IMGREL+1
+; CHECK-NEXT: .long .Ltmp7@IMGREL+1
+; CHECK-NEXT: .long "?filt$0@0@main@@"@IMGREL
+; CHECK-NEXT: .long .LBB1_3@IMGREL
+; CHECK-NEXT: .Llsda_end0:
+
+; CHECK: .text
+; CHECK: .seh_endproc
+
+; CHECK: "?dtor$[[finbb]]@?0?main@4HA":
+; CHECK: .seh_proc "?dtor$[[finbb]]@?0?main@4HA"
+; CHECK: .seh_handler __C_specific_handler, @unwind, @except
+; CHECK: .LBB1_[[finbb]]: # %ehcleanup
+; CHECK: movq %rdx, 16(%rsp)
+; CHECK: pushq %rbp
+; CHECK: .seh_pushreg 5
+; CHECK: subq $32, %rsp
+; CHECK: .seh_stackalloc 32
+; CHECK: leaq 32(%rdx), %rbp
+; CHECK: .seh_endprologue
+; CHECK: callq "?fin$0@0@main@@"
+; CHECK: nop
+; CHECK: addq $32, %rsp
+; CHECK: popq %rbp
+; CHECK: retq
+; CHECK: .seh_handlerdata
+; CHECK: .seh_endproc
+
+define internal i32 @"\01?filt$0@0@main@@"(i8* nocapture readnone %exception_pointers, i8* nocapture readnone %frame_pointer) #1 {
+entry:
+ %call = tail call i32 @filt()
+ ret i32 %call
+}
+
+; CHECK: "?filt$0@0@main@@": # @"\01?filt$0@0@main@@"
+; CHECK: .seh_proc "?filt$0@0@main@@"
+; CHECK: .seh_endprologue
+; CHECK: rex64 jmp filt # TAILCALL
+; CHECK: .seh_handlerdata
+
+declare i32 @filt() #1
+
+declare i32 @__C_specific_handler(...)
+
+; Function Attrs: noinline nounwind
+define internal fastcc void @"\01?fin$0@0@main@@"() #2 {
+entry:
+ %call = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @"\01??_C@_07MKBLAIAL@finally?$AA@", i64 0, i64 0)) #5
+ ret void
+}
+
+; Function Attrs: nounwind
+declare i32 @puts(i8* nocapture readonly) #3
+
+attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { noinline }
+attributes #5 = { nounwind }
diff --git a/test/CodeGen/X86/seh-except-finally.ll b/test/CodeGen/X86/seh-except-finally.ll
index 0630d001bb76..b29788cd015d 100644
--- a/test/CodeGen/X86/seh-except-finally.ll
+++ b/test/CodeGen/X86/seh-except-finally.ll
@@ -38,84 +38,63 @@ entry:
%exn.slot = alloca i8*
%ehselector.slot = alloca i32
invoke void @crash() #5
- to label %invoke.cont unwind label %lpad
+ to label %invoke.cont unwind label %__finally
invoke.cont: ; preds = %entry
%0 = call i8* @llvm.localaddress()
invoke void @"\01?fin$0@0@use_both@@"(i1 zeroext false, i8* %0) #5
- to label %invoke.cont2 unwind label %lpad1
+ to label %invoke.cont2 unwind label %catch.dispatch
invoke.cont2: ; preds = %invoke.cont
br label %__try.cont
-lpad: ; preds = %entry
- %1 = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_both@@" to i8*)
- %2 = extractvalue { i8*, i32 } %1, 0
- store i8* %2, i8** %exn.slot
- %3 = extractvalue { i8*, i32 } %1, 1
- store i32 %3, i32* %ehselector.slot
- %4 = call i8* @llvm.localaddress()
- invoke void @"\01?fin$0@0@use_both@@"(i1 zeroext true, i8* %4) #5
- to label %invoke.cont3 unwind label %lpad1
-
-lpad1: ; preds = %lpad, %invoke.cont
- %5 = landingpad { i8*, i32 }
- catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_both@@" to i8*)
- %6 = extractvalue { i8*, i32 } %5, 0
- store i8* %6, i8** %exn.slot
- %7 = extractvalue { i8*, i32 } %5, 1
- store i32 %7, i32* %ehselector.slot
- br label %catch.dispatch
+__finally: ; preds = %entry
+ %cleanuppad = cleanuppad within none []
+ %locals = call i8* @llvm.localaddress()
+ invoke void @"\01?fin$0@0@use_both@@"(i1 zeroext true, i8* %locals) #5 [ "funclet"(token %cleanuppad) ]
+ to label %invoke.cont3 unwind label %catch.dispatch
-invoke.cont3: ; preds = %lpad
- br label %catch.dispatch
+invoke.cont3: ; preds = %__finally
+ cleanupret from %cleanuppad unwind label %catch.dispatch
catch.dispatch: ; preds = %invoke.cont3, %lpad1
- %sel = load i32, i32* %ehselector.slot
- %8 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_both@@" to i8*)) #6
- %matches = icmp eq i32 %sel, %8
- br i1 %matches, label %__except, label %eh.resume
+ %cs1 = catchswitch within none [label %__except] unwind to caller
__except: ; preds = %catch.dispatch
- %call = call i32 @puts(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @"\01??_C@_08MLCMLGHM@__except?$AA@", i32 0, i32 0))
- br label %__try.cont
+ %catchpad = catchpad within %cs1 [i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_both@@" to i8*)]
+ %call = call i32 @puts(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @"\01??_C@_08MLCMLGHM@__except?$AA@", i32 0, i32 0)) [ "funclet"(token %catchpad) ]
+ catchret from %catchpad to label %__try.cont
__try.cont: ; preds = %__except, %invoke.cont2
ret void
-
-eh.resume: ; preds = %catch.dispatch
- %exn = load i8*, i8** %exn.slot
- %sel4 = load i32, i32* %ehselector.slot
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
- %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
- resume { i8*, i32 } %lpad.val5
}
; CHECK-LABEL: use_both:
; CHECK: .Ltmp0
; CHECK: callq crash
; CHECK: .Ltmp1
-; CHECK: .Ltmp3
-; CHECK: callq "?fin$0@0@use_both@@"
; CHECK: .Ltmp4
+; CHECK: callq "?fin$0@0@use_both@@"
+; CHECK: .Ltmp5
; CHECK: retq
;
; CHECK: .seh_handlerdata
-; CHECK-NEXT: .long 3
-; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .Luse_both$parent_frame_offset
+; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16
+; CHECK-NEXT: .Llsda_begin0:
+; CHECK-NEXT: .long .Ltmp0@IMGREL+1
; CHECK-NEXT: .long .Ltmp1@IMGREL+1
-; CHECK-NEXT: .long "?fin$0@0@use_both@@"@IMGREL
+; CHECK-NEXT: .long "?dtor$2@?0?use_both@4HA"@IMGREL
; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long .Ltmp0@IMGREL+1
; CHECK-NEXT: .long .Ltmp1@IMGREL+1
; CHECK-NEXT: .long "?filt$0@0@use_both@@"@IMGREL
-; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL
-; CHECK-NEXT: .long .Ltmp3@IMGREL
+; CHECK-NEXT: .long .LBB0_{{[0-9]+}}@IMGREL
; CHECK-NEXT: .long .Ltmp4@IMGREL+1
+; CHECK-NEXT: .long .Ltmp5@IMGREL+1
; CHECK-NEXT: .long "?filt$0@0@use_both@@"@IMGREL
-; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL
+; CHECK-NEXT: .long .LBB0_{{[0-9]+}}@IMGREL
+; CHECK-NEXT: .Llsda_end0:
; Function Attrs: noinline nounwind
define internal i32 @"\01?filt$0@0@use_both@@"(i8* %exception_pointers, i8* %frame_pointer) #2 {
diff --git a/test/CodeGen/X86/seh-exception-code.ll b/test/CodeGen/X86/seh-exception-code.ll
new file mode 100644
index 000000000000..20e1544e0b59
--- /dev/null
+++ b/test/CodeGen/X86/seh-exception-code.ll
@@ -0,0 +1,38 @@
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -O0 < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+declare void @f(i32)
+declare i32 @__C_specific_handler(...)
+declare i32 @llvm.eh.exceptioncode(token)
+
+define void @ehcode() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
+entry:
+ invoke void @f(i32 0)
+ to label %__try.cont unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs = catchswitch within none [label %__except] unwind to caller
+
+__except: ; preds = %catch.dispatch
+ %pad = catchpad within %cs [i8* null]
+ catchret from %pad to label %__except.1
+
+__except.1: ; preds = %__except
+ %code = call i32 @llvm.eh.exceptioncode(token %pad)
+ call void @f(i32 %code)
+ br label %__try.cont
+
+__try.cont: ; preds = %entry, %__except.1
+ ret void
+}
+
+; CHECK-LABEL: ehcode:
+; CHECK: xorl %ecx, %ecx
+; CHECK: callq f
+
+; CHECK: # %__except
+; CHECK: movl %eax, %ecx
+; CHECK-NEXT: callq f
diff --git a/test/CodeGen/X86/seh-filter.ll b/test/CodeGen/X86/seh-filter.ll
deleted file mode 100644
index 37ed15841a93..000000000000
--- a/test/CodeGen/X86/seh-filter.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc -O0 -mtriple=x86_64-windows-msvc < %s | FileCheck %s
-
-declare void @g()
-define void @f() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
- invoke void @g() to label %return unwind label %lpad
-
-return:
- ret void
-
-lpad:
- %ehptrs = landingpad {i8*, i32}
- filter [0 x i8*] zeroinitializer
- call void @__cxa_call_unexpected(i8* null)
- unreachable
-}
-declare i32 @__C_specific_handler(...)
-declare void @__cxa_call_unexpected(i8*)
-
-; We don't emit entries for filters.
-; CHECK: .seh_handlerdata
-; CHECK: .long 0
diff --git a/test/CodeGen/X86/seh-finally.ll b/test/CodeGen/X86/seh-finally.ll
index 350cd932f481..2ef1c984851c 100644
--- a/test/CodeGen/X86/seh-finally.ll
+++ b/test/CodeGen/X86/seh-finally.ll
@@ -17,50 +17,42 @@ invoke.cont: ; preds = %entry
ret i32 0
lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- cleanup
- %1 = extractvalue { i8*, i32 } %0, 0
- %2 = extractvalue { i8*, i32 } %0, 1
- %call2 = invoke i32 @puts(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @str_recovered, i64 0, i64 0))
- to label %invoke.cont1 unwind label %terminate.lpad
-
-invoke.cont1: ; preds = %lpad
- resume { i8*, i32 } %0
-
-terminate.lpad: ; preds = %lpad
- %3 = landingpad { i8*, i32 }
- catch i8* null
- call void @abort()
- unreachable
+ %p = cleanuppad within none []
+ %call2 = call i32 @puts(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @str_recovered, i64 0, i64 0)) [ "funclet"(token %p) ]
+ cleanupret from %p unwind to caller
}
; X64-LABEL: main:
; X64: retq
; X64: .seh_handlerdata
-; X64-NEXT: .long 1
-; X64-NEXT: .long .Ltmp0@IMGREL
-; X64-NEXT: .long .Ltmp1@IMGREL
-; X64-NEXT: .long main.cleanup@IMGREL
-; X64-NEXT: .long 0
-
-; X64-LABEL: main.cleanup:
+; X64-NEXT: .Lmain$parent_frame_offset = 32
+; X64-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16 # Number of call sites
+; X64-NEXT: .Llsda_begin0:
+; X64-NEXT: .long .Ltmp0@IMGREL+1 # LabelStart
+; X64-NEXT: .long .Ltmp1@IMGREL+1 # LabelEnd
+; X64-NEXT: .long "?dtor$2@?0?main@4HA"@IMGREL # FinallyFunclet
+; X64-NEXT: .long 0 # Null
+; X64-NEXT: .Llsda_end0:
+
+; X64-LABEL: "?dtor$2@?0?main@4HA":
; X64: callq puts
; X64: retq
; X86-LABEL: _main:
; X86: retl
-; X86: .section .xdata,"dr"
-; X86: L__ehtable$main:
-; X86-NEXT: .long -1
-; X86-NEXT: .long 0
-; X86-NEXT: .long _main.cleanup
-
-; X86-LABEL: _main.cleanup:
+; X86-LABEL: "?dtor$2@?0?main@4HA":
+; X86: LBB0_2:
; X86: calll _puts
; X86: retl
+; X86: .section .xdata,"dr"
+; X86: L__ehtable$main:
+; X86-NEXT: .long -1 # ToState
+; X86-NEXT: .long 0 # Null
+; X86-NEXT: .long "?dtor$2@?0?main@4HA" # FinallyFunclet
+
declare i32 @__C_specific_handler(...)
declare i32 @puts(i8*)
diff --git a/test/CodeGen/X86/seh-safe-div-win32.ll b/test/CodeGen/X86/seh-safe-div-win32.ll
index b1bcde2c7ff3..643af3a472fb 100644
--- a/test/CodeGen/X86/seh-safe-div-win32.ll
+++ b/test/CodeGen/X86/seh-safe-div-win32.ll
@@ -28,35 +28,25 @@ entry:
%r = alloca i32, align 4
store i32 42, i32* %r
invoke void @try_body(i32* %r, i32* %n, i32* %d)
- to label %__try.cont unwind label %lpad
-
-lpad:
- %vals = landingpad { i8*, i32 }
- catch i8* bitcast (i32 ()* @safe_div_filt0 to i8*)
- catch i8* bitcast (i32 ()* @safe_div_filt1 to i8*)
- %ehptr = extractvalue { i8*, i32 } %vals, 0
- %sel = extractvalue { i8*, i32 } %vals, 1
- %filt0_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @safe_div_filt0 to i8*))
- %is_filt0 = icmp eq i32 %sel, %filt0_val
- br i1 %is_filt0, label %handler0, label %eh.dispatch1
-
-eh.dispatch1:
- %filt1_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @safe_div_filt1 to i8*))
- %is_filt1 = icmp eq i32 %sel, %filt1_val
- br i1 %is_filt1, label %handler1, label %eh.resume
+ to label %__try.cont unwind label %lpad0
+
+lpad0:
+ %cs0 = catchswitch within none [label %handler0] unwind label %lpad1
handler0:
- call void @puts(i8* getelementptr ([27 x i8], [27 x i8]* @str1, i32 0, i32 0))
+ %p0 = catchpad within %cs0 [i8* bitcast (i32 ()* @safe_div_filt0 to i8*)]
+ call void @puts(i8* getelementptr ([27 x i8], [27 x i8]* @str1, i32 0, i32 0)) [ "funclet"(token %p0) ]
store i32 -1, i32* %r, align 4
- br label %__try.cont
+ catchret from %p0 to label %__try.cont
+
+lpad1:
+ %cs1 = catchswitch within none [label %handler1] unwind to caller
handler1:
- call void @puts(i8* getelementptr ([29 x i8], [29 x i8]* @str2, i32 0, i32 0))
+ %p1 = catchpad within %cs1 [i8* bitcast (i32 ()* @safe_div_filt1 to i8*)]
+ call void @puts(i8* getelementptr ([29 x i8], [29 x i8]* @str2, i32 0, i32 0)) [ "funclet"(token %p1) ]
store i32 -2, i32* %r, align 4
- br label %__try.cont
-
-eh.resume:
- resume { i8*, i32 } %vals
+ catchret from %p1 to label %__try.cont
__try.cont:
%safe_ret = load i32, i32* %r, align 4
@@ -75,15 +65,13 @@ __try.cont:
; Landing pad code
-; CHECK: [[handler0:Ltmp[0-9]+]]: # Block address taken
-; CHECK: # %handler0
+; CHECK: [[handler0:LBB0_[0-9]+]]: # %handler0
; Restore SP
; CHECK: movl {{.*}}(%ebp), %esp
; CHECK: calll _puts
; CHECK: jmp [[cont_bb]]
-; CHECK: [[handler1:Ltmp[0-9]+]]: # Block address taken
-; CHECK: # %handler1
+; CHECK: [[handler1:LBB0_[0-9]+]]: # %handler1
; Restore SP
; CHECK: movl {{.*}}(%ebp), %esp
; CHECK: calll _puts
diff --git a/test/CodeGen/X86/seh-safe-div.ll b/test/CodeGen/X86/seh-safe-div.ll
index 699e58ee8bae..60918cf07058 100644
--- a/test/CodeGen/X86/seh-safe-div.ll
+++ b/test/CodeGen/X86/seh-safe-div.ll
@@ -27,35 +27,25 @@ define i32 @safe_div(i32* %n, i32* %d) personality i8* bitcast (i32 (...)* @__C_
entry:
%r = alloca i32, align 4
invoke void @try_body(i32* %r, i32* %n, i32* %d)
- to label %__try.cont unwind label %lpad
-
-lpad:
- %vals = landingpad { i8*, i32 }
- catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*)
- catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*)
- %ehptr = extractvalue { i8*, i32 } %vals, 0
- %sel = extractvalue { i8*, i32 } %vals, 1
- %filt0_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*))
- %is_filt0 = icmp eq i32 %sel, %filt0_val
- br i1 %is_filt0, label %handler0, label %eh.dispatch1
-
-eh.dispatch1:
- %filt1_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*))
- %is_filt1 = icmp eq i32 %sel, %filt1_val
- br i1 %is_filt1, label %handler1, label %eh.resume
+ to label %__try.cont unwind label %lpad0
+
+lpad0:
+ %cs0 = catchswitch within none [label %handler0] unwind label %lpad1
handler0:
- call void @puts(i8* getelementptr ([27 x i8], [27 x i8]* @str1, i32 0, i32 0))
+ %p0 = catchpad within %cs0 [i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*)]
+ call void @puts(i8* getelementptr ([27 x i8], [27 x i8]* @str1, i32 0, i32 0)) [ "funclet"(token %p0) ]
store i32 -1, i32* %r, align 4
- br label %__try.cont
+ catchret from %p0 to label %__try.cont
+
+lpad1:
+ %cs1 = catchswitch within none [label %handler1] unwind to caller
handler1:
- call void @puts(i8* getelementptr ([29 x i8], [29 x i8]* @str2, i32 0, i32 0))
+ %p1 = catchpad within %cs1 [i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*)]
+ call void @puts(i8* getelementptr ([29 x i8], [29 x i8]* @str2, i32 0, i32 0)) [ "funclet"(token %p1) ]
store i32 -2, i32* %r, align 4
- br label %__try.cont
-
-eh.resume:
- resume { i8*, i32 } %vals
+ catchret from %p1 to label %__try.cont
__try.cont:
%safe_ret = load i32, i32* %r, align 4
@@ -68,7 +58,7 @@ __try.cont:
; CHECK: .seh_proc safe_div
; CHECK: .seh_handler __C_specific_handler, @unwind, @except
; CHECK: .Ltmp0:
-; CHECK: leaq [[rloc:.*\(%rsp\)]], %rcx
+; CHECK: leaq [[rloc:.*\(%rbp\)]], %rcx
; CHECK: callq try_body
; CHECK-NEXT: .Ltmp1
; CHECK: [[cont_bb:\.LBB0_[0-9]+]]:
@@ -77,32 +67,32 @@ __try.cont:
; Landing pad code
-; CHECK: [[handler0:\.Ltmp[0-9]+]]: # Block address taken
-; CHECK: # %handler0
+; CHECK: [[handler0:\.LBB0_[0-9]+]]: # %handler0
; CHECK: callq puts
; CHECK: movl $-1, [[rloc]]
; CHECK: jmp [[cont_bb]]
-; CHECK: [[handler1:\.Ltmp[0-9]+]]: # Block address taken
-; CHECK: # %handler1
+; CHECK: [[handler1:\.LBB0_[0-9]+]]: # %handler1
; CHECK: callq puts
; CHECK: movl $-2, [[rloc]]
; CHECK: jmp [[cont_bb]]
; CHECK: .seh_handlerdata
-; CHECK-NEXT: .long 2
-; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .Lsafe_div$parent_frame_offset
+; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16
+; CHECK-NEXT: .Llsda_begin0:
+; CHECK-NEXT: .long .Ltmp0@IMGREL+1
; CHECK-NEXT: .long .Ltmp1@IMGREL+1
; CHECK-NEXT: .long safe_div_filt0@IMGREL
; CHECK-NEXT: .long [[handler0]]@IMGREL
-; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long .Ltmp0@IMGREL+1
; CHECK-NEXT: .long .Ltmp1@IMGREL+1
; CHECK-NEXT: .long safe_div_filt1@IMGREL
; CHECK-NEXT: .long [[handler1]]@IMGREL
+; CHECK-NEXT: .Llsda_end0:
; CHECK: .text
; CHECK: .seh_endproc
-
define void @try_body(i32* %r, i32* %n, i32* %d) {
entry:
%0 = load i32, i32* %n, align 4
diff --git a/test/CodeGen/X86/seh-stack-realign-win32.ll b/test/CodeGen/X86/seh-stack-realign-win32.ll
deleted file mode 100644
index f3ab71803ca7..000000000000
--- a/test/CodeGen/X86/seh-stack-realign-win32.ll
+++ /dev/null
@@ -1,99 +0,0 @@
-; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s
-
-; 32-bit catch-all has to use a filter function because that's how it saves the
-; exception code.
-
-@str = linkonce_odr unnamed_addr constant [27 x i8] c"GetExceptionCode(): 0x%lx\0A\00", align 1
-
-declare i32 @_except_handler3(...)
-declare void @crash()
-declare i32 @printf(i8* nocapture readonly, ...) nounwind
-declare i32 @llvm.eh.typeid.for(i8*)
-declare i8* @llvm.frameaddress(i32)
-declare i8* @llvm.localrecover(i8*, i8*, i32)
-declare void @llvm.localescape(...)
-declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
-
-define i32 @main() personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
-entry:
- ; The EH code allocation is overaligned, triggering realignment.
- %__exceptioncode = alloca i32, align 8
- call void (...) @llvm.localescape(i32* %__exceptioncode)
- invoke void @crash() #5
- to label %__try.cont unwind label %lpad
-
-lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* bitcast (i32 ()* @"filt$main" to i8*)
- %1 = extractvalue { i8*, i32 } %0, 1
- %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @"filt$main" to i8*)) #4
- %matches = icmp eq i32 %1, %2
- br i1 %matches, label %__except, label %eh.resume
-
-__except: ; preds = %lpad
- %3 = load i32, i32* %__exceptioncode, align 4
- %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i32 0, i32 0), i32 %3) #4
- br label %__try.cont
-
-__try.cont: ; preds = %entry, %__except
- ret i32 0
-
-eh.resume: ; preds = %lpad
- resume { i8*, i32 } %0
-}
-
-define internal i32 @"filt$main"() {
-entry:
- %ebp = tail call i8* @llvm.frameaddress(i32 1)
- %parentfp = tail call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %ebp)
- %code.i8 = tail call i8* @llvm.localrecover(i8* bitcast (i32 ()* @main to i8*), i8* %parentfp, i32 0)
- %__exceptioncode = bitcast i8* %code.i8 to i32*
- %info.addr = getelementptr inbounds i8, i8* %ebp, i32 -20
- %0 = bitcast i8* %info.addr to i32***
- %1 = load i32**, i32*** %0, align 4
- %2 = load i32*, i32** %1, align 4
- %3 = load i32, i32* %2, align 4
- store i32 %3, i32* %__exceptioncode, align 4
- ret i32 1
-}
-
-; Check that we can get the exception code from eax to the printf.
-
-; CHECK-LABEL: _main:
-; CHECK: Lmain$frame_escape_0 = [[code_offs:[-0-9]+]]
-; CHECK: Lmain$frame_escape_1 = [[reg_offs:[-0-9]+]]
-; CHECK: movl %esp, [[reg_offs]](%esi)
-; CHECK: movl $L__ehtable$main,
-; EH state 0
-; CHECK: movl $0, 40(%esi)
-; CHECK: calll _crash
-; CHECK: retl
-; CHECK: # Block address taken
-; stackrestore
-; CHECK: movl -24(%ebp), %esp
-; CHECK: movl $Lmain$parent_frame_offset, %eax
-; CHECK: negl %eax
-; CHECK: leal -24(%ebp,%eax), %esi
-; CHECK: movl 12(%esi), %ebp # 4-byte Reload
-; EH state -1
-; CHECK: movl [[code_offs]](%esi), %[[code:[a-z]+]]
-; CHECK: movl $-1, 40(%esi)
-; CHECK-DAG: movl %[[code]], 4(%esp)
-; CHECK-DAG: movl $_str, (%esp)
-; CHECK: calll _printf
-
-; CHECK: .section .xdata,"dr"
-; CHECK: Lmain$parent_frame_offset = Lmain$frame_escape_1
-; CHECK: L__ehtable$main
-; CHECK-NEXT: .long -1
-; CHECK-NEXT: .long _filt$main
-; CHECK-NEXT: .long Ltmp{{[0-9]+}}
-
-; CHECK-LABEL: _filt$main:
-; CHECK: pushl %ebp
-; CHECK: movl %esp, %ebp
-; CHECK: movl (%ebp), %[[oldebp:[a-z]+]]
-; CHECK: movl -20(%[[oldebp]]), %[[ehinfo:[a-z]+]]
-; CHECK: movl (%[[ehinfo]]), %[[ehrec:[a-z]+]]
-; CHECK: movl (%[[ehrec]]), %[[ehcode:[a-z]+]]
-; CHECK: movl %[[ehcode]], {{.*}}(%{{.*}})
diff --git a/test/CodeGen/X86/seh-stack-realign.ll b/test/CodeGen/X86/seh-stack-realign.ll
index f2fb28a081f9..654cad347f6b 100644
--- a/test/CodeGen/X86/seh-stack-realign.ll
+++ b/test/CodeGen/X86/seh-stack-realign.ll
@@ -23,23 +23,16 @@ entry:
to label %__try.cont unwind label %lpad
lpad: ; preds = %entry
- %0 = landingpad { i8*, i32 }
- catch i8* bitcast (i32 ()* @"filt$main" to i8*)
- %1 = extractvalue { i8*, i32 } %0, 1
- %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @"filt$main" to i8*)) #4
- %matches = icmp eq i32 %1, %2
- br i1 %matches, label %__except, label %eh.resume
+ %cs1 = catchswitch within none [label %__except] unwind to caller
__except: ; preds = %lpad
- %3 = load i32, i32* %__exceptioncode, align 4
- %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i32 0, i32 0), i32 %3) #4
- br label %__try.cont
+ %p = catchpad within %cs1 [i8* bitcast (i32 ()* @"filt$main" to i8*)]
+ %code = load i32, i32* %__exceptioncode, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i32 0, i32 0), i32 %code) #4 [ "funclet"(token %p) ]
+ catchret from %p to label %__try.cont
__try.cont: ; preds = %entry, %__except
ret i32 0
-
-eh.resume: ; preds = %lpad
- resume { i8*, i32 } %0
}
define internal i32 @"filt$main"() {
@@ -61,35 +54,30 @@ entry:
; CHECK-LABEL: _main:
; CHECK: Lmain$frame_escape_0 = [[code_offs:[-0-9]+]]
-; CHECK: Lmain$frame_escape_1 = [[reg_offs:[-0-9]+]]
-; CHECK: movl %esp, [[reg_offs]](%esi)
+; CHECK: movl %esp, [[reg_offs:[-0-9]+]](%esi)
; CHECK: movl $L__ehtable$main,
; EH state 0
; CHECK: movl $0, 40(%esi)
; CHECK: calll _crash
; CHECK: retl
-; CHECK: # Block address taken
+; CHECK: LBB0_[[lpbb:[0-9]+]]: # %__except
; Restore ESP
; CHECK: movl -24(%ebp), %esp
; Restore ESI
-; CHECK: movl $Lmain$parent_frame_offset, %eax
-; CHECK: negl %eax
-; CHECK: leal -24(%ebp,%eax), %esi
+; CHECK: leal -44(%ebp), %esi
; Restore EBP
-; CHECK: movl 12(%esi), %ebp # 4-byte Reload
-; EH state -1
+; CHECK: movl 12(%esi), %ebp
; CHECK: movl [[code_offs]](%esi), %[[code:[a-z]+]]
-; CHECK: movl $-1, 40(%esi)
; CHECK-DAG: movl %[[code]], 4(%esp)
; CHECK-DAG: movl $_str, (%esp)
; CHECK: calll _printf
; CHECK: .section .xdata,"dr"
-; CHECK: Lmain$parent_frame_offset = Lmain$frame_escape_1
+; CHECK: Lmain$parent_frame_offset = [[reg_offs]]
; CHECK: L__ehtable$main
; CHECK-NEXT: .long -1
; CHECK-NEXT: .long _filt$main
-; CHECK-NEXT: .long Ltmp{{[0-9]+}}
+; CHECK-NEXT: .long LBB0_[[lpbb]]
; CHECK-LABEL: _filt$main:
; CHECK: pushl %ebp
diff --git a/test/CodeGen/X86/setcc-lowering.ll b/test/CodeGen/X86/setcc-lowering.ll
index 3149fb51576f..77739e72fcc8 100644
--- a/test/CodeGen/X86/setcc-lowering.ll
+++ b/test/CodeGen/X86/setcc-lowering.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s
; Verify that we don't crash during codegen due to a wrong lowering
diff --git a/test/CodeGen/X86/setcc.ll b/test/CodeGen/X86/setcc.ll
index 6f1ddbdc6aca..b4847c54ffaf 100644
--- a/test/CodeGen/X86/setcc.ll
+++ b/test/CodeGen/X86/setcc.ll
@@ -34,3 +34,23 @@ entry:
%iftmp.2.0 = select i1 %0, i64 64, i64 0 ; <i64> [#uses=1]
ret i64 %iftmp.2.0
}
+
+@v4 = common global i32 0, align 4
+
+define i32 @t4(i32 %a) {
+entry:
+; CHECK-LABEL: t4:
+; CHECK: movq _v4@GOTPCREL(%rip), %rax
+; CHECK: cmpl $1, (%rax)
+; CHECK: sbbl %eax, %eax
+; CHECK: andl $32768, %eax
+; CHECK: leal 65536(%rax,%rax), %eax
+ %0 = load i32, i32* @v4, align 4
+ %not.tobool = icmp eq i32 %0, 0
+ %conv.i = sext i1 %not.tobool to i16
+ %call.lobit = lshr i16 %conv.i, 15
+ %add.i.1 = add nuw nsw i16 %call.lobit, 1
+ %conv4.2 = zext i16 %add.i.1 to i32
+ %add = shl nuw nsw i32 %conv4.2, 16
+ ret i32 %add
+}
diff --git a/test/CodeGen/X86/shift-bmi2.ll b/test/CodeGen/X86/shift-bmi2.ll
index 63b6ec55fac8..fdeddffdfb0e 100644
--- a/test/CodeGen/X86/shift-bmi2.ll
+++ b/test/CodeGen/X86/shift-bmi2.ll
@@ -30,11 +30,10 @@ entry:
%x = load i32, i32* %p
%shl = shl i32 %x, %shamt
; BMI2: shl32p
-; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI2: ret
; BMI264: shl32p
-; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i32 %shl
}
@@ -75,7 +74,7 @@ entry:
%x = load i64, i64* %p
%shl = shl i64 %x, %shamt
; BMI264: shl64p
-; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i64 %shl
}
@@ -107,11 +106,10 @@ entry:
%x = load i32, i32* %p
%shl = lshr i32 %x, %shamt
; BMI2: lshr32p
-; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI2: ret
; BMI264: lshr32p
-; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i32 %shl
}
@@ -130,7 +128,7 @@ entry:
%x = load i64, i64* %p
%shl = lshr i64 %x, %shamt
; BMI264: lshr64p
-; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i64 %shl
}
@@ -153,10 +151,10 @@ entry:
%shl = ashr i32 %x, %shamt
; BMI2: ashr32p
; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI2: ret
; BMI264: ashr32p
-; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i32 %shl
}
@@ -175,7 +173,7 @@ entry:
%x = load i64, i64* %p
%shl = ashr i64 %x, %shamt
; BMI264: ashr64p
-; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i64 %shl
}
diff --git a/test/CodeGen/X86/shrink-wrap-chkstk.ll b/test/CodeGen/X86/shrink-wrap-chkstk.ll
new file mode 100644
index 000000000000..c0b2b45e676f
--- /dev/null
+++ b/test/CodeGen/X86/shrink-wrap-chkstk.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s
+
+; chkstk cannot come before the usual prologue, since it adjusts ESP.
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+%struct.S = type { [12 x i8] }
+
+define x86_thiscallcc void @call_inalloca(i1 %x) {
+entry:
+ %argmem = alloca inalloca <{ %struct.S }>, align 4
+ %argidx1 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0, i32 0, i32 0
+ %argidx2 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0, i32 0, i32 1
+ store i8 42, i8* %argidx2, align 4
+ br i1 %x, label %bb1, label %bb2
+
+bb1:
+ store i8 42, i8* %argidx1, align 4
+ br label %bb2
+
+bb2:
+ call void @inalloca_params(<{ %struct.S }>* inalloca nonnull %argmem)
+ ret void
+}
+
+; CHECK-LABEL: _call_inalloca: # @call_inalloca
+; CHECK: pushl %ebp
+; CHECK: movl %esp, %ebp
+; CHECK: movl $12, %eax
+; CHECK: calll __chkstk
+; CHECK: calll _inalloca_params
+; CHECK: movl %ebp, %esp
+; CHECK: popl %ebp
+; CHECK: retl
+
+declare void @inalloca_params(<{ %struct.S }>* inalloca)
diff --git a/test/CodeGen/X86/slow-div.ll b/test/CodeGen/X86/slow-div.ll
index 52223824bf96..82928521ac2b 100644
--- a/test/CodeGen/X86/slow-div.ll
+++ b/test/CodeGen/X86/slow-div.ll
@@ -25,4 +25,19 @@ entry:
ret i64 %div
}
+; Verify that no extra code is generated when optimizing for size.
+
+define i32 @div32_optsize(i32 %a, i32 %b) optsize {
+; DIV32-LABEL: div32_optsize:
+; DIV32-NOT: divb
+ %div = sdiv i32 %a, %b
+ ret i32 %div
+}
+
+define i32 @div32_minsize(i32 %a, i32 %b) minsize {
+; DIV32-LABEL: div32_minsize:
+; DIV32-NOT: divb
+ %div = sdiv i32 %a, %b
+ ret i32 %div
+}
diff --git a/test/CodeGen/X86/slow-unaligned-mem.ll b/test/CodeGen/X86/slow-unaligned-mem.ll
new file mode 100644
index 000000000000..27cbef681b7e
--- /dev/null
+++ b/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -0,0 +1,95 @@
+; Intel chips with slow unaligned memory accesses
+
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=penryn 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefix=SLOW
+
+; Intel chips with fast unaligned memory accesses
+
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake 2>&1 | FileCheck %s --check-prefix=FAST
+
+; AMD chips with slow unaligned memory accesses
+
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-4 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-xp 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-fx 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8-sse3 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron-sse3 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64-sse3 2>&1 | FileCheck %s --check-prefix=SLOW
+
+; AMD chips with fast unaligned memory accesses
+
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST
+
+; Other chips with slow unaligned memory accesses
+
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=c3-2 2>&1 | FileCheck %s --check-prefix=SLOW
+
+; Verify that the slow/fast unaligned memory attribute is set correctly for each CPU model.
+; Slow chips use 4-byte stores. Fast chips with SSE or later use something other than 4-byte stores.
+; Chips that don't have SSE use 4-byte stores either way, so they're not tested.
+
+; Also verify that SSE4.2 or SSE4a imply fast unaligned accesses.
+
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4.2 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4a 2>&1 | FileCheck %s --check-prefix=FAST
+
+define void @store_zeros(i8* %a) {
+; SLOW-NOT: not a recognized processor
+; SLOW-LABEL: store_zeros:
+; SLOW: # BB#0:
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+; SLOW-NEXT: movl
+;
+; FAST-NOT: not a recognized processor
+; FAST-LABEL: store_zeros:
+; FAST: # BB#0:
+; FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
+; FAST-NOT: movl
+ call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i32 1, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
diff --git a/test/CodeGen/X86/soft-fp.ll b/test/CodeGen/X86/soft-fp.ll
index fa38d1044a48..138e66c394ba 100644
--- a/test/CodeGen/X86/soft-fp.ll
+++ b/test/CodeGen/X86/soft-fp.ll
@@ -1,7 +1,14 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,+soft-float | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+soft-float | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse,+soft-float \
+; RUN: | FileCheck %s --check-prefix=SOFT1 --check-prefix=CHECK
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2,+soft-float \
+; RUN: | FileCheck %s --check-prefix=SOFT2 --check-prefix=CHECK
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse \
+; RUN: | FileCheck %s --check-prefix=SSE1 --check-prefix=CHECK
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 \
+; RUN: | FileCheck %s --check-prefix=SSE2 --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-gnux32 -mattr=+mmx,+sse2,+soft-float | FileCheck %s
-; CHECK-NOT: xmm{[0-9]+}
+; CHECK-NOT: xmm{{[0-9]+}}
%struct.__va_list_tag = type { i32, i32, i8*, i8* }
@@ -14,6 +21,8 @@ entry:
call void @bar(%struct.__va_list_tag* %va3) nounwind
call void @llvm.va_end(i8* %va12)
ret i32 undef
+; CHECK-LABEL: t1:
+; CHECK: ret{{[lq]}}
}
declare void @llvm.va_start(i8*) nounwind
@@ -26,4 +35,23 @@ define float @t2(float %a, float %b) nounwind readnone {
entry:
%0 = fadd float %a, %b ; <float> [#uses=1]
ret float %0
+; CHECK-LABEL: t2:
+; SOFT1-NOT: xmm{{[0-9]+}}
+; SOFT2-NOT: xmm{{[0-9]+}}
+; SSE1: xmm{{[0-9]+}}
+; SSE2: xmm{{[0-9]+}}
+; CHECK: ret{{[lq]}}
+}
+
+; soft-float means no SSE instruction and passing fp128 as pair of i64.
+define fp128 @t3(fp128 %a, fp128 %b) nounwind readnone {
+entry:
+ %0 = fadd fp128 %b, %a
+ ret fp128 %0
+; CHECK-LABEL: t3:
+; SOFT1-NOT: xmm{{[0-9]+}}
+; SOFT2-NOT: xmm{{[0-9]+}}
+; SSE1: xmm{{[0-9]+}}
+; SSE2: xmm{{[0-9]+}}
+; CHECK: ret{{[lq]}}
}
diff --git a/test/CodeGen/X86/soft-sitofp.ll b/test/CodeGen/X86/soft-sitofp.ll
new file mode 100644
index 000000000000..acb4bb906e70
--- /dev/null
+++ b/test/CodeGen/X86/soft-sitofp.ll
@@ -0,0 +1,169 @@
+; RUN: llc -mtriple=i386-pc-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+
+; Function Attrs: nounwind
+; CHECK-LABEL: s64_to_d:
+; CHECK: call{{l|q}} __floatdidf
+define double @s64_to_d(i64 %n) #0 {
+entry:
+ %conv = sitofp i64 %n to double
+ ret double %conv
+}
+
+; CHECK-LABEL: s64_to_f:
+; CHECK: call{{l|q}} __floatdisf
+define float @s64_to_f(i64 %n) #0 {
+entry:
+ %conv = sitofp i64 %n to float
+ ret float %conv
+}
+
+; CHECK-LABEL: s32_to_d:
+; CHECK: call{{l|q}} __floatsidf
+define double @s32_to_d(i32 %n) #0 {
+entry:
+ %conv = sitofp i32 %n to double
+ ret double %conv
+}
+
+; CHECK-LABEL: s32_to_f:
+; CHECK: call{{l|q}} __floatsisf
+define float @s32_to_f(i32 %n) #0 {
+entry:
+ %conv = sitofp i32 %n to float
+ ret float %conv
+}
+
+; CHECK-LABEL: u64_to_d:
+; CHECK: call{{l|q}} __floatundidf
+define double @u64_to_d(i64 %n) #0 {
+entry:
+ %conv = uitofp i64 %n to double
+ ret double %conv
+}
+
+; CHECK-LABEL: u64_to_f:
+; CHECK: call{{l|q}} __floatundisf
+define float @u64_to_f(i64 %n) #0 {
+entry:
+ %conv = uitofp i64 %n to float
+ ret float %conv
+}
+
+; CHECK-LABEL: u32_to_d:
+; CHECK: call{{l|q}} __floatunsidf
+define double @u32_to_d(i32 %n) #0 {
+entry:
+ %conv = uitofp i32 %n to double
+ ret double %conv
+}
+
+; CHECK-LABEL: u32_to_f:
+; CHECK: call{{l|q}} __floatunsisf
+define float @u32_to_f(i32 %n) #0 {
+entry:
+ %conv = uitofp i32 %n to float
+ ret float %conv
+}
+
+; CHECK-LABEL: d_to_s64:
+; CHECK: call{{l|q}} __fixdfdi
+define i64 @d_to_s64(double %n) #0 {
+entry:
+ %conv = fptosi double %n to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: d_to_s32:
+; CHECK: call{{l|q}} __fixdfsi
+define i32 @d_to_s32(double %n) #0 {
+entry:
+ %conv = fptosi double %n to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: f_to_s64:
+; CHECK: call{{l|q}} __fixsfdi
+define i64 @f_to_s64(float %n) #0 {
+entry:
+ %conv = fptosi float %n to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: f_to_s32:
+; CHECK: call{{l|q}} __fixsfsi
+define i32 @f_to_s32(float %n) #0 {
+entry:
+ %conv = fptosi float %n to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: d_to_u64:
+; CHECK: call{{l|q}} __fixunsdfdi
+define i64 @d_to_u64(double %n) #0 {
+entry:
+ %conv = fptoui double %n to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: d_to_u32:
+; CHECK: call{{l|q}} __fixunsdfsi
+define i32 @d_to_u32(double %n) #0 {
+entry:
+ %conv = fptoui double %n to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: f_to_u64:
+; CHECK: call{{l|q}} __fixunssfdi
+define i64 @f_to_u64(float %n) #0 {
+entry:
+ %conv = fptoui float %n to i64
+ ret i64 %conv
+}
+
+; CHECK-LABEL: f_to_u32:
+; CHECK: call{{l|q}} __fixunssfsi
+define i32 @f_to_u32(float %n) #0 {
+entry:
+ %conv = fptoui float %n to i32
+ ret i32 %conv
+}
+
+; CHECK-LABEL: f_to_s8:
+; CHECK: call{{l|q}} __fixsfsi
+define i8 @f_to_s8(float %f, i8 %i) #0 {
+entry:
+ %conv = fptosi float %f to i8
+ %add = add i8 %conv, %i
+ ret i8 %add
+}
+
+; CHECK-LABEL: f_to_u8:
+; CHECK: call{{l|q}} __fixunssfsi
+define i8 @f_to_u8(float %f, i8 %i) #0 {
+entry:
+ %conv = fptoui float %f to i8
+ %add = add i8 %conv, %i
+ ret i8 %add
+}
+
+; CHECK-LABEL: f_to_s16:
+; CHECK: call{{l|q}} __fixsfsi
+define i16 @f_to_s16(float %f, i16 %i) #0 {
+entry:
+ %conv = fptosi float %f to i16
+ %add = add i16 %conv, %i
+ ret i16 %add
+}
+
+; CHECK-LABEL: f_to_u16:
+; CHECK: call{{l|q}} __fixunssfsi
+define i16 @f_to_u16(float %f, i16 %i) #0 {
+entry:
+ %conv = fptoui float %f to i16
+ %add = add i16 %conv, %i
+ ret i16 %add
+}
+
+attributes #0 = { nounwind "use-soft-float"="true" }
diff --git a/test/CodeGen/X86/splat-for-size.ll b/test/CodeGen/X86/splat-for-size.ll
index 635aa821d78a..277472f49b3a 100644
--- a/test/CodeGen/X86/splat-for-size.ll
+++ b/test/CodeGen/X86/splat-for-size.ll
@@ -1,141 +1,191 @@
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s -check-prefix=CHECK --check-prefix=AVX
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s -check-prefix=CHECK --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
-; Check constant loads of every 128-bit and 256-bit vector type
+; Check constant loads of every 128-bit and 256-bit vector type
; for size optimization using splat ops available with AVX and AVX2.
; There is no AVX broadcast from double to 128-bit vector because movddup has been around since SSE3 (grrr).
define <2 x double> @splat_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: splat_v2f64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
%add = fadd <2 x double> %x, <double 1.0, double 1.0>
ret <2 x double> %add
-; CHECK-LABEL: splat_v2f64
-; CHECK: vmovddup
-; CHECK: vaddpd
-; CHECK-NEXT: retq
}
-define <4 x double> @splat_v4f64(<4 x double> %x) #0 {
+define <4 x double> @splat_v4f64(<4 x double> %x) #1 {
+; CHECK-LABEL: splat_v4f64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
%add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
ret <4 x double> %add
-; CHECK-LABEL: splat_v4f64
-; CHECK: vbroadcastsd
-; CHECK-NEXT: vaddpd
-; CHECK-NEXT: retq
}
define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: splat_v4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
%add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
ret <4 x float> %add
-; CHECK-LABEL: splat_v4f32
-; CHECK: vbroadcastss
-; CHECK-NEXT: vaddps
-; CHECK-NEXT: retq
}
-define <8 x float> @splat_v8f32(<8 x float> %x) #0 {
+define <8 x float> @splat_v8f32(<8 x float> %x) #1 {
+; CHECK-LABEL: splat_v8f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
+; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
%add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
ret <8 x float> %add
-; CHECK-LABEL: splat_v8f32
-; CHECK: vbroadcastss
-; CHECK-NEXT: vaddps
-; CHECK-NEXT: retq
}
; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value.
; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq.
-define <2 x i64> @splat_v2i64(<2 x i64> %x) #0 {
+define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 {
+; CHECK-LABEL: splat_v2i64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
+; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
%add = add <2 x i64> %x, <i64 1, i64 1>
ret <2 x i64> %add
-; CHECK-LABEL: splat_v2i64
-; CHECK: vmovddup
-; CHECK: vpaddq
-; CHECK-NEXT: retq
}
; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors,
; and then we fake it: use vmovddup to splat 64-bit value.
define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
+; AVX-LABEL: splat_v4i64:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
+; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: splat_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
+; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
%add = add <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i64> %add
-; CHECK-LABEL: splat_v4i64
-; AVX: vmovddup
-; AVX: vpaddq
-; AVX: vpaddq
-; AVX2: vpbroadcastq
-; AVX2: vpaddq
-; CHECK: retq
}
; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
-define <4 x i32> @splat_v4i32(<4 x i32> %x) #0 {
+define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 {
+; AVX-LABEL: splat_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
+; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: splat_v4i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
%add = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %add
-; CHECK-LABEL: splat_v4i32
-; AVX: vbroadcastss
-; AVX2: vpbroadcastd
-; CHECK-NEXT: vpaddd
-; CHECK-NEXT: retq
}
; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
+; AVX-LABEL: splat_v8i32:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
+; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: splat_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
%add = add <8 x i32> %x, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
ret <8 x i32> %add
-; CHECK-LABEL: splat_v8i32
-; AVX: vbroadcastss
-; AVX: vpaddd
-; AVX: vpaddd
-; AVX2: vpbroadcastd
-; AVX2: vpaddd
-; CHECK: retq
}
; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
-define <8 x i16> @splat_v8i16(<8 x i16> %x) #0 {
+define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 {
+; AVX-LABEL: splat_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: splat_v8i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastw {{.*}}(%rip), %xmm1
+; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
%add = add <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %add
-; CHECK-LABEL: splat_v8i16
-; AVX-NOT: broadcast
-; AVX2: vpbroadcastw
-; CHECK: vpaddw
-; CHECK-NEXT: retq
}
; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
+; AVX-LABEL: splat_v16i16:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
+; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: splat_v16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastw {{.*}}(%rip), %ymm1
+; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
%add = add <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <16 x i16> %add
-; CHECK-LABEL: splat_v16i16
-; AVX-NOT: broadcast
-; AVX: vpaddw
-; AVX: vpaddw
-; AVX2: vpbroadcastw
-; AVX2: vpaddw
-; CHECK: retq
}
; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
-define <16 x i8> @splat_v16i8(<16 x i8> %x) #0 {
+define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 {
+; AVX-LABEL: splat_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: splat_v16i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastb {{.*}}(%rip), %xmm1
+; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
%add = add <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <16 x i8> %add
-; CHECK-LABEL: splat_v16i8
-; AVX-NOT: broadcast
-; AVX2: vpbroadcastb
-; CHECK: vpaddb
-; CHECK-NEXT: retq
}
; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
+; AVX-LABEL: splat_v32i8:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: splat_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastb {{.*}}(%rip), %ymm1
+; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
%add = add <32 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <32 x i8> %add
-; CHECK-LABEL: splat_v32i8
-; AVX-NOT: broadcast
-; AVX: vpaddb
-; AVX: vpaddb
-; AVX2: vpbroadcastb
-; AVX2: vpaddb
-; CHECK: retq
}
; PR23259: Verify that ISel doesn't crash with a 'fatal error in backend'
@@ -144,7 +194,7 @@ define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
@A = common global <3 x i64> zeroinitializer, align 32
-define <8 x i64> @pr23259() #0 {
+define <8 x i64> @pr23259() #1 {
entry:
%0 = load <4 x i64>, <4 x i64>* bitcast (<3 x i64>* @A to <4 x i64>*), align 32
%1 = shufflevector <4 x i64> %0, <4 x i64> undef, <3 x i32> <i32 undef, i32 undef, i32 2>
@@ -153,3 +203,4 @@ entry:
}
attributes #0 = { optsize }
+attributes #1 = { minsize }
diff --git a/test/CodeGen/X86/sqrt-fastmath.ll b/test/CodeGen/X86/sqrt-fastmath.ll
index 9b851db8121c..386409a674ef 100644
--- a/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/test/CodeGen/X86/sqrt-fastmath.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!sqrtf,!vec-sqrtf,!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=sqrtf,vec-sqrtf | FileCheck %s --check-prefix=ESTIMATE
@@ -99,8 +100,8 @@ define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 {
; ESTIMATE-LABEL: reciprocal_square_root_v4f32:
; ESTIMATE: # BB#0:
; ESTIMATE-NEXT: vrsqrtps %xmm0, %xmm1
-; ESTIMATE-NEXT: vmulps %xmm1, %xmm1, %xmm2
-; ESTIMATE-NEXT: vmulps %xmm0, %xmm2, %xmm0
+; ESTIMATE-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; ESTIMATE-NEXT: vmulps %xmm0, %xmm1, %xmm0
; ESTIMATE-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
; ESTIMATE-NEXT: vmulps {{.*}}(%rip), %xmm1, %xmm1
; ESTIMATE-NEXT: vmulps %xmm1, %xmm0, %xmm0
@@ -124,8 +125,8 @@ define <8 x float> @reciprocal_square_root_v8f32(<8 x float> %x) #0 {
; ESTIMATE-LABEL: reciprocal_square_root_v8f32:
; ESTIMATE: # BB#0:
; ESTIMATE-NEXT: vrsqrtps %ymm0, %ymm1
-; ESTIMATE-NEXT: vmulps %ymm1, %ymm1, %ymm2
-; ESTIMATE-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; ESTIMATE-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; ESTIMATE-NEXT: vmulps %ymm0, %ymm1, %ymm0
; ESTIMATE-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0
; ESTIMATE-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
; ESTIMATE-NEXT: vmulps %ymm1, %ymm0, %ymm0
diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll
index 9441cc0002fb..4fbb6e42ccae 100644
--- a/test/CodeGen/X86/sse-align-12.ll
+++ b/test/CodeGen/X86/sse-align-12.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=nehalem | FileCheck %s
define <4 x float> @a(<4 x float>* %y) nounwind {
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index e4d0373299fb..f0341277851d 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -3,7 +3,7 @@
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
; Some of these patterns can be matched as SSE min or max. Some of
-; then can be matched provided that the operands are swapped.
+; them can be matched provided that the operands are swapped.
; Some of them can't be matched at all and require a comparison
; and a conditional branch.
diff --git a/test/CodeGen/X86/sse-only.ll b/test/CodeGen/X86/sse-only.ll
new file mode 100644
index 000000000000..3fe9faaba850
--- /dev/null
+++ b/test/CodeGen/X86/sse-only.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=x86 -mattr=+sse2,-mmx | FileCheck %s
+
+; Test that turning off mmx doesn't turn off sse
+
+define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
+; CHECK-LABEL: test1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movapd (%ecx), %xmm0
+; CHECK-NEXT: movlpd {{[0-9]+}}(%esp), %xmm0
+; CHECK-NEXT: movapd %xmm0, (%eax)
+; CHECK-NEXT: retl
+ %tmp3 = load <2 x double>, <2 x double>* %A, align 16
+ %tmp7 = insertelement <2 x double> undef, double %B, i32 0
+ %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
+ store <2 x double> %tmp9, <2 x double>* %r, align 16
+ ret void
+}
diff --git a/test/CodeGen/X86/sse-scalar-fp-arith-unary.ll b/test/CodeGen/X86/sse-scalar-fp-arith-unary.ll
index fab4f90279e8..63751e1ab7e1 100644
--- a/test/CodeGen/X86/sse-scalar-fp-arith-unary.ll
+++ b/test/CodeGen/X86/sse-scalar-fp-arith-unary.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck --check-prefix=SSE %s
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse4.1 < %s | FileCheck --check-prefix=SSE %s
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck --check-prefix=AVX %s
diff --git a/test/CodeGen/X86/sse2-vector-shifts.ll b/test/CodeGen/X86/sse2-vector-shifts.ll
index 45028cf4bd37..d1c7adb6263b 100644
--- a/test/CodeGen/X86/sse2-vector-shifts.ll
+++ b/test/CodeGen/X86/sse2-vector-shifts.ll
@@ -1,367 +1,373 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 | FileCheck %s
; SSE2 Logical Shift Left
define <8 x i16> @test_sllw_1(<8 x i16> %InVec) {
+; CHECK-LABEL: test_sllw_1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: retq
entry:
%shl = shl <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
ret <8 x i16> %shl
}
-; CHECK-LABEL: test_sllw_1:
-; CHECK-NOT: psllw $0, %xmm0
-; CHECK: ret
-
define <8 x i16> @test_sllw_2(<8 x i16> %InVec) {
+; CHECK-LABEL: test_sllw_2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: paddw %xmm0, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = shl <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %shl
}
-; CHECK-LABEL: test_sllw_2:
-; CHECK: paddw %xmm0, %xmm0
-; CHECK-NEXT: ret
-
define <8 x i16> @test_sllw_3(<8 x i16> %InVec) {
+; CHECK-LABEL: test_sllw_3:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: psllw $15, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = shl <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
ret <8 x i16> %shl
}
-; CHECK-LABEL: test_sllw_3:
-; CHECK: psllw $15, %xmm0
-; CHECK-NEXT: ret
-
define <4 x i32> @test_slld_1(<4 x i32> %InVec) {
+; CHECK-LABEL: test_slld_1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: retq
entry:
%shl = shl <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
ret <4 x i32> %shl
}
-; CHECK-LABEL: test_slld_1:
-; CHECK-NOT: pslld $0, %xmm0
-; CHECK: ret
-
define <4 x i32> @test_slld_2(<4 x i32> %InVec) {
+; CHECK-LABEL: test_slld_2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: paddd %xmm0, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %shl
}
-; CHECK-LABEL: test_slld_2:
-; CHECK: paddd %xmm0, %xmm0
-; CHECK-NEXT: ret
-
define <4 x i32> @test_slld_3(<4 x i32> %InVec) {
+; CHECK-LABEL: test_slld_3:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: pslld $31, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = shl <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %shl
}
-; CHECK-LABEL: test_slld_3:
-; CHECK: pslld $31, %xmm0
-; CHECK-NEXT: ret
-
define <2 x i64> @test_sllq_1(<2 x i64> %InVec) {
+; CHECK-LABEL: test_sllq_1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: retq
entry:
%shl = shl <2 x i64> %InVec, <i64 0, i64 0>
ret <2 x i64> %shl
}
-; CHECK-LABEL: test_sllq_1:
-; CHECK-NOT: psllq $0, %xmm0
-; CHECK: ret
-
define <2 x i64> @test_sllq_2(<2 x i64> %InVec) {
+; CHECK-LABEL: test_sllq_2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: paddq %xmm0, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = shl <2 x i64> %InVec, <i64 1, i64 1>
ret <2 x i64> %shl
}
-; CHECK-LABEL: test_sllq_2:
-; CHECK: paddq %xmm0, %xmm0
-; CHECK-NEXT: ret
-
define <2 x i64> @test_sllq_3(<2 x i64> %InVec) {
+; CHECK-LABEL: test_sllq_3:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: psllq $63, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = shl <2 x i64> %InVec, <i64 63, i64 63>
ret <2 x i64> %shl
}
-; CHECK-LABEL: test_sllq_3:
-; CHECK: psllq $63, %xmm0
-; CHECK-NEXT: ret
-
; SSE2 Arithmetic Shift
define <8 x i16> @test_sraw_1(<8 x i16> %InVec) {
+; CHECK-LABEL: test_sraw_1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: retq
entry:
%shl = ashr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
ret <8 x i16> %shl
}
-; CHECK-LABEL: test_sraw_1:
-; CHECK-NOT: psraw $0, %xmm0
-; CHECK: ret
-
define <8 x i16> @test_sraw_2(<8 x i16> %InVec) {
+; CHECK-LABEL: test_sraw_2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: psraw $1, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = ashr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %shl
}
-; CHECK-LABEL: test_sraw_2:
-; CHECK: psraw $1, %xmm0
-; CHECK-NEXT: ret
-
define <8 x i16> @test_sraw_3(<8 x i16> %InVec) {
+; CHECK-LABEL: test_sraw_3:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: psraw $15, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = ashr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
ret <8 x i16> %shl
}
-; CHECK-LABEL: test_sraw_3:
-; CHECK: psraw $15, %xmm0
-; CHECK-NEXT: ret
-
define <4 x i32> @test_srad_1(<4 x i32> %InVec) {
+; CHECK-LABEL: test_srad_1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: retq
entry:
%shl = ashr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
ret <4 x i32> %shl
}
-; CHECK-LABEL: test_srad_1:
-; CHECK-NOT: psrad $0, %xmm0
-; CHECK: ret
-
define <4 x i32> @test_srad_2(<4 x i32> %InVec) {
+; CHECK-LABEL: test_srad_2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: psrad $1, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = ashr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %shl
}
-; CHECK-LABEL: test_srad_2:
-; CHECK: psrad $1, %xmm0
-; CHECK-NEXT: ret
-
define <4 x i32> @test_srad_3(<4 x i32> %InVec) {
+; CHECK-LABEL: test_srad_3:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: psrad $31, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = ashr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %shl
}
-; CHECK-LABEL: test_srad_3:
-; CHECK: psrad $31, %xmm0
-; CHECK-NEXT: ret
-
; SSE Logical Shift Right
define <8 x i16> @test_srlw_1(<8 x i16> %InVec) {
+; CHECK-LABEL: test_srlw_1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: retq
entry:
%shl = lshr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
ret <8 x i16> %shl
}
-; CHECK-LABEL: test_srlw_1:
-; CHECK-NOT: psrlw $0, %xmm0
-; CHECK: ret
-
define <8 x i16> @test_srlw_2(<8 x i16> %InVec) {
+; CHECK-LABEL: test_srlw_2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: psrlw $1, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = lshr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %shl
}
-; CHECK-LABEL: test_srlw_2:
-; CHECK: psrlw $1, %xmm0
-; CHECK-NEXT: ret
-
define <8 x i16> @test_srlw_3(<8 x i16> %InVec) {
+; CHECK-LABEL: test_srlw_3:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: psrlw $15, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = lshr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
ret <8 x i16> %shl
}
-; CHECK-LABEL: test_srlw_3:
-; CHECK: psrlw $15, %xmm0
-; CHECK-NEXT: ret
-
define <4 x i32> @test_srld_1(<4 x i32> %InVec) {
+; CHECK-LABEL: test_srld_1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: retq
entry:
%shl = lshr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
ret <4 x i32> %shl
}
-; CHECK-LABEL: test_srld_1:
-; CHECK-NOT: psrld $0, %xmm0
-; CHECK: ret
-
define <4 x i32> @test_srld_2(<4 x i32> %InVec) {
+; CHECK-LABEL: test_srld_2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: psrld $1, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = lshr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %shl
}
-; CHECK-LABEL: test_srld_2:
-; CHECK: psrld $1, %xmm0
-; CHECK-NEXT: ret
-
define <4 x i32> @test_srld_3(<4 x i32> %InVec) {
+; CHECK-LABEL: test_srld_3:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: psrld $31, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = lshr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %shl
}
-; CHECK-LABEL: test_srld_3:
-; CHECK: psrld $31, %xmm0
-; CHECK-NEXT: ret
-
define <2 x i64> @test_srlq_1(<2 x i64> %InVec) {
+; CHECK-LABEL: test_srlq_1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: retq
entry:
%shl = lshr <2 x i64> %InVec, <i64 0, i64 0>
ret <2 x i64> %shl
}
-; CHECK-LABEL: test_srlq_1:
-; CHECK-NOT: psrlq $0, %xmm0
-; CHECK: ret
-
define <2 x i64> @test_srlq_2(<2 x i64> %InVec) {
+; CHECK-LABEL: test_srlq_2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: psrlq $1, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = lshr <2 x i64> %InVec, <i64 1, i64 1>
ret <2 x i64> %shl
}
-; CHECK-LABEL: test_srlq_2:
-; CHECK: psrlq $1, %xmm0
-; CHECK-NEXT: ret
-
define <2 x i64> @test_srlq_3(<2 x i64> %InVec) {
+; CHECK-LABEL: test_srlq_3:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: psrlq $63, %xmm0
+; CHECK-NEXT: retq
entry:
%shl = lshr <2 x i64> %InVec, <i64 63, i64 63>
ret <2 x i64> %shl
}
-; CHECK-LABEL: test_srlq_3:
-; CHECK: psrlq $63, %xmm0
-; CHECK-NEXT: ret
-
-
-; CHECK-LABEL: sra_sra_v4i32:
-; CHECK: psrad $6, %xmm0
-; CHECK-NEXT: retq
define <4 x i32> @sra_sra_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: sra_sra_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: psrad $6, %xmm0
+; CHECK-NEXT: retq
%sra0 = ashr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
%sra1 = ashr <4 x i32> %sra0, <i32 4, i32 4, i32 4, i32 4>
ret <4 x i32> %sra1
}
-; CHECK-LABEL: @srl_srl_v4i32
-; CHECK: psrld $6, %xmm0
-; CHECK-NEXT: ret
define <4 x i32> @srl_srl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: srl_srl_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: psrld $6, %xmm0
+; CHECK-NEXT: retq
%srl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
%srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4>
ret <4 x i32> %srl1
}
-; CHECK-LABEL: @srl_shl_v4i32
-; CHECK: andps
-; CHECK-NEXT: retq
define <4 x i32> @srl_shl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: srl_shl_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
%srl0 = shl <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
%srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4>
ret <4 x i32> %srl1
}
-; CHECK-LABEL: @srl_sra_31_v4i32
-; CHECK: psrld $31, %xmm0
-; CHECK-NEXT: ret
define <4 x i32> @srl_sra_31_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: srl_sra_31_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: psrld $31, %xmm0
+; CHECK-NEXT: retq
%sra = ashr <4 x i32> %x, %y
%srl1 = lshr <4 x i32> %sra, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %srl1
}
-; CHECK-LABEL: @shl_shl_v4i32
-; CHECK: pslld $6, %xmm0
-; CHECK-NEXT: ret
define <4 x i32> @shl_shl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: shl_shl_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: pslld $6, %xmm0
+; CHECK-NEXT: retq
%shl0 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
%shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4>
ret <4 x i32> %shl1
}
-; CHECK-LABEL: @shl_sra_v4i32
-; CHECK: andps
-; CHECK-NEXT: ret
define <4 x i32> @shl_sra_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: shl_sra_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
%shl0 = ashr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
%shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4>
ret <4 x i32> %shl1
}
-; CHECK-LABEL: @shl_srl_v4i32
-; CHECK: pslld $3, %xmm0
-; CHECK-NEXT: pand
-; CHECK-NEXT: ret
define <4 x i32> @shl_srl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: shl_srl_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: pslld $3, %xmm0
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
%shl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
%shl1 = shl <4 x i32> %shl0, <i32 5, i32 5, i32 5, i32 5>
ret <4 x i32> %shl1
}
-; CHECK-LABEL: @shl_zext_srl_v4i32
-; CHECK: andps
-; CHECK-NEXT: ret
define <4 x i32> @shl_zext_srl_v4i32(<4 x i16> %x) nounwind {
+; CHECK-LABEL: shl_zext_srl_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: retq
%srl = lshr <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
%zext = zext <4 x i16> %srl to <4 x i32>
%shl = shl <4 x i32> %zext, <i32 2, i32 2, i32 2, i32 2>
ret <4 x i32> %shl
}
-; CHECK: @sra_trunc_srl_v4i32
-; CHECK: psrad $19, %xmm0
-; CHECK-NEXT: retq
define <4 x i16> @sra_trunc_srl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: sra_trunc_srl_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: psrad $19, %xmm0
+; CHECK-NEXT: retq
%srl = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
%trunc = trunc <4 x i32> %srl to <4 x i16>
%sra = ashr <4 x i16> %trunc, <i16 3, i16 3, i16 3, i16 3>
ret <4 x i16> %sra
}
-; CHECK-LABEL: @shl_zext_shl_v4i32
-; CHECK: pand
-; CHECK-NEXT: pslld $19, %xmm0
-; CHECK-NEXT: ret
define <4 x i32> @shl_zext_shl_v4i32(<4 x i16> %x) nounwind {
+; CHECK-LABEL: shl_zext_shl_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT: pslld $19, %xmm0
+; CHECK-NEXT: retq
%shl0 = shl <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
%ext = zext <4 x i16> %shl0 to <4 x i32>
%shl1 = shl <4 x i32> %ext, <i32 17, i32 17, i32 17, i32 17>
ret <4 x i32> %shl1
}
-; CHECK-LABEL: @sra_v4i32
-; CHECK: psrad $3, %xmm0
-; CHECK-NEXT: ret
define <4 x i32> @sra_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: sra_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: psrad $3, %xmm0
+; CHECK-NEXT: retq
%sra = ashr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
ret <4 x i32> %sra
}
-; CHECK-LABEL: @srl_v4i32
-; CHECK: psrld $3, %xmm0
-; CHECK-NEXT: ret
define <4 x i32> @srl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: srl_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: psrld $3, %xmm0
+; CHECK-NEXT: retq
%sra = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
ret <4 x i32> %sra
}
-; CHECK-LABEL: @shl_v4i32
-; CHECK: pslld $3, %xmm0
-; CHECK-NEXT: ret
define <4 x i32> @shl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: shl_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: pslld $3, %xmm0
+; CHECK-NEXT: retq
%sra = shl <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
ret <4 x i32> %sra
}
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index d3ee3c6f0454..ed84905b1907 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; Tests for SSE2 and below, without SSE3+.
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
diff --git a/test/CodeGen/X86/sse3-avx-addsub-2.ll b/test/CodeGen/X86/sse3-avx-addsub-2.ll
index 71efa3f8f105..79317e4576b9 100644
--- a/test/CodeGen/X86/sse3-avx-addsub-2.ll
+++ b/test/CodeGen/X86/sse3-avx-addsub-2.ll
@@ -1,11 +1,20 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX
; Verify that we correctly generate 'addsub' instructions from
; a sequence of vector extracts + float add/sub + vector inserts.
define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test1:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test1:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%1 = extractelement <4 x float> %A, i32 0
%2 = extractelement <4 x float> %B, i32 0
%sub = fsub float %1, %2
@@ -24,13 +33,17 @@ define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
%vecinsert4 = insertelement <4 x float> %vecinsert3, float %sub2, i32 2
ret <4 x float> %vecinsert4
}
-; CHECK-LABEL: test1
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
define <4 x float> @test2(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test2:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test2:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%1 = extractelement <4 x float> %A, i32 2
%2 = extractelement <4 x float> %B, i32 2
%sub2 = fsub float %1, %2
@@ -41,13 +54,17 @@ define <4 x float> @test2(<4 x float> %A, <4 x float> %B) {
%vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3
ret <4 x float> %vecinsert2
}
-; CHECK-LABEL: test2
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test3:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test3:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%1 = extractelement <4 x float> %A, i32 0
%2 = extractelement <4 x float> %B, i32 0
%sub = fsub float %1, %2
@@ -58,13 +75,17 @@ define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
%vecinsert2 = insertelement <4 x float> %vecinsert1, float %add, i32 3
ret <4 x float> %vecinsert2
}
-; CHECK-LABEL: test3
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
define <4 x float> @test4(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test4:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test4:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%1 = extractelement <4 x float> %A, i32 2
%2 = extractelement <4 x float> %B, i32 2
%sub = fsub float %1, %2
@@ -75,13 +96,17 @@ define <4 x float> @test4(<4 x float> %A, <4 x float> %B) {
%vecinsert2 = insertelement <4 x float> %vecinsert1, float %add, i32 1
ret <4 x float> %vecinsert2
}
-; CHECK-LABEL: test4
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
define <4 x float> @test5(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test5:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test5:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%1 = extractelement <4 x float> %A, i32 0
%2 = extractelement <4 x float> %B, i32 0
%sub2 = fsub float %1, %2
@@ -92,13 +117,17 @@ define <4 x float> @test5(<4 x float> %A, <4 x float> %B) {
%vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 1
ret <4 x float> %vecinsert2
}
-; CHECK-LABEL: test5
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
define <4 x float> @test6(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test6:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test6:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%1 = extractelement <4 x float> %A, i32 0
%2 = extractelement <4 x float> %B, i32 0
%sub = fsub float %1, %2
@@ -117,13 +146,18 @@ define <4 x float> @test6(<4 x float> %A, <4 x float> %B) {
%vecinsert4 = insertelement <4 x float> %vecinsert3, float %sub2, i32 2
ret <4 x float> %vecinsert4
}
-; CHECK-LABEL: test6
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
define <4 x double> @test7(<4 x double> %A, <4 x double> %B) {
+; SSE-LABEL: test7:
+; SSE: # BB#0:
+; SSE-NEXT: addsubpd %xmm2, %xmm0
+; SSE-NEXT: addsubpd %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test7:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
%1 = extractelement <4 x double> %A, i32 0
%2 = extractelement <4 x double> %B, i32 0
%sub = fsub double %1, %2
@@ -142,15 +176,17 @@ define <4 x double> @test7(<4 x double> %A, <4 x double> %B) {
%vecinsert4 = insertelement <4 x double> %vecinsert3, double %sub2, i32 2
ret <4 x double> %vecinsert4
}
-; CHECK-LABEL: test7
-; SSE: addsubpd
-; SSE-NEXT: addsubpd
-; AVX: vaddsubpd
-; AVX-NOT: vaddsubpd
-; CHECK: ret
-
define <2 x double> @test8(<2 x double> %A, <2 x double> %B) {
+; SSE-LABEL: test8:
+; SSE: # BB#0:
+; SSE-NEXT: addsubpd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test8:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%1 = extractelement <2 x double> %A, i32 0
%2 = extractelement <2 x double> %B, i32 0
%sub = fsub double %1, %2
@@ -161,13 +197,18 @@ define <2 x double> @test8(<2 x double> %A, <2 x double> %B) {
%vecinsert2 = insertelement <2 x double> %vecinsert1, double %add, i32 1
ret <2 x double> %vecinsert2
}
-; CHECK-LABEL: test8
-; SSE: addsubpd
-; AVX: vaddsubpd
-; CHECK: ret
-
define <8 x float> @test9(<8 x float> %A, <8 x float> %B) {
+; SSE-LABEL: test9:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps %xmm2, %xmm0
+; SSE-NEXT: addsubps %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test9:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
%1 = extractelement <8 x float> %A, i32 0
%2 = extractelement <8 x float> %B, i32 0
%sub = fsub float %1, %2
@@ -202,65 +243,118 @@ define <8 x float> @test9(<8 x float> %A, <8 x float> %B) {
%vecinsert8 = insertelement <8 x float> %vecinsert7, float %sub4, i32 6
ret <8 x float> %vecinsert8
}
-; CHECK-LABEL: test9
-; SSE: addsubps
-; SSE-NEXT: addsubps
-; AVX: vaddsubps
-; AVX-NOT: vaddsubps
-; CHECK: ret
-
; Verify that we don't generate addsub instruction for the following
; functions.
+
define <4 x float> @test10(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test10:
+; SSE: # BB#0:
+; SSE-NEXT: subss %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test10:
+; AVX: # BB#0:
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%1 = extractelement <4 x float> %A, i32 0
%2 = extractelement <4 x float> %B, i32 0
%sub = fsub float %1, %2
%vecinsert1 = insertelement <4 x float> undef, float %sub, i32 0
ret <4 x float> %vecinsert1
}
-; CHECK-LABEL: test10
-; CHECK-NOT: addsubps
-; CHECK: ret
-
define <4 x float> @test11(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test11:
+; SSE: # BB#0:
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT: subss %xmm1, %xmm0
+; SSE-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test11:
+; AVX: # BB#0:
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: retq
%1 = extractelement <4 x float> %A, i32 2
%2 = extractelement <4 x float> %B, i32 2
%sub = fsub float %1, %2
%vecinsert1 = insertelement <4 x float> undef, float %sub, i32 2
ret <4 x float> %vecinsert1
}
-; CHECK-LABEL: test11
-; CHECK-NOT: addsubps
-; CHECK: ret
-
define <4 x float> @test12(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test12:
+; SSE: # BB#0:
+; SSE-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT: addss %xmm0, %xmm1
+; SSE-NEXT: movsldup {{.*#+}} xmm0 = xmm1[0,0,2,2]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test12:
+; AVX: # BB#0:
+; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; AVX-NEXT: retq
%1 = extractelement <4 x float> %A, i32 1
%2 = extractelement <4 x float> %B, i32 1
%add = fadd float %1, %2
%vecinsert1 = insertelement <4 x float> undef, float %add, i32 1
ret <4 x float> %vecinsert1
}
-; CHECK-LABEL: test12
-; CHECK-NOT: addsubps
-; CHECK: ret
-
define <4 x float> @test13(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test13:
+; SSE: # BB#0:
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE-NEXT: addss %xmm0, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,2,0]
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test13:
+; AVX: # BB#0:
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,0]
+; AVX-NEXT: retq
%1 = extractelement <4 x float> %A, i32 3
%2 = extractelement <4 x float> %B, i32 3
%add = fadd float %1, %2
%vecinsert1 = insertelement <4 x float> undef, float %add, i32 3
ret <4 x float> %vecinsert1
}
-; CHECK-LABEL: test13
-; CHECK-NOT: addsubps
-; CHECK: ret
-
define <4 x float> @test14(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test14:
+; SSE: # BB#0:
+; SSE-NEXT: movaps %xmm0, %xmm2
+; SSE-NEXT: subss %xmm1, %xmm2
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT: subss %xmm1, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1,1,3]
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test14:
+; AVX: # BB#0:
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1],xmm0[0],xmm2[3]
+; AVX-NEXT: retq
%1 = extractelement <4 x float> %A, i32 0
%2 = extractelement <4 x float> %B, i32 0
%sub = fsub float %1, %2
@@ -271,12 +365,32 @@ define <4 x float> @test14(<4 x float> %A, <4 x float> %B) {
%vecinsert2 = insertelement <4 x float> %vecinsert1, float %sub2, i32 2
ret <4 x float> %vecinsert2
}
-; CHECK-LABEL: test14
-; CHECK-NOT: addsubps
-; CHECK: ret
-
define <4 x float> @test15(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test15:
+; SSE: # BB#0:
+; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; SSE-NEXT: addss %xmm3, %xmm2
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE-NEXT: addss %xmm0, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0,2,1]
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test15:
+; AVX: # BB#0:
+; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm2[0],xmm0[2,3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX-NEXT: retq
%1 = extractelement <4 x float> %A, i32 1
%2 = extractelement <4 x float> %B, i32 1
%add = fadd float %1, %2
@@ -287,12 +401,43 @@ define <4 x float> @test15(<4 x float> %A, <4 x float> %B) {
%vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3
ret <4 x float> %vecinsert2
}
-; CHECK-LABEL: test15
-; CHECK-NOT: addsubps
-; CHECK: ret
-
define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test16:
+; SSE: # BB#0:
+; SSE-NEXT: movaps %xmm0, %xmm2
+; SSE-NEXT: subss %xmm0, %xmm2
+; SSE-NEXT: movaps %xmm0, %xmm3
+; SSE-NEXT: shufpd {{.*#+}} xmm3 = xmm3[1,0]
+; SSE-NEXT: movapd %xmm1, %xmm4
+; SSE-NEXT: shufpd {{.*#+}} xmm4 = xmm4[1,0]
+; SSE-NEXT: subss %xmm4, %xmm3
+; SSE-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE-NEXT: addss %xmm0, %xmm4
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE-NEXT: addss %xmm0, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test16:
+; AVX: # BB#0:
+; AVX-NEXT: vsubss %xmm0, %xmm0, %xmm2
+; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
+; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0]
+; AVX-NEXT: vsubss %xmm4, %xmm3, %xmm3
+; AVX-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; AVX-NEXT: vaddss %xmm0, %xmm4, %xmm4
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm4[0],xmm2[2,3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX-NEXT: retq
%1 = extractelement <4 x float> %A, i32 0
%2 = extractelement <4 x float> %B, i32 0
%sub = fsub float %1, undef
@@ -311,11 +456,17 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
%vecinsert4 = insertelement <4 x float> %vecinsert3, float %sub2, i32 2
ret <4 x float> %vecinsert4
}
-; CHECK-LABEL: test16
-; CHECK-NOT: addsubps
-; CHECK: ret
define <2 x float> @test_v2f32(<2 x float> %v0, <2 x float> %v1) {
+; SSE-LABEL: test_v2f32:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_v2f32:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%v2 = extractelement <2 x float> %v0, i32 0
%v3 = extractelement <2 x float> %v1, i32 0
%v4 = extractelement <2 x float> %v0, i32 1
@@ -326,6 +477,3 @@ define <2 x float> @test_v2f32(<2 x float> %v0, <2 x float> %v1) {
%res1 = insertelement <2 x float> %res0, float %add, i32 1
ret <2 x float> %res1
}
-; CHECK-LABEL: test_v2f32
-; CHECK: addsubps %xmm1, %xmm0
-; CHECK-NEXT: retq
diff --git a/test/CodeGen/X86/sse3-avx-addsub.ll b/test/CodeGen/X86/sse3-avx-addsub.ll
index 76141fc876ae..8665edf8f1d5 100644
--- a/test/CodeGen/X86/sse3-avx-addsub.ll
+++ b/test/CodeGen/X86/sse3-avx-addsub.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=SSE -check-prefix=CHECK
-; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX -check-prefix=CHECK
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX
; Test ADDSUB ISel patterns.
@@ -35,109 +36,207 @@
; }
define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test1:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test1:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%sub = fsub <4 x float> %A, %B
%add = fadd <4 x float> %A, %B
%vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x float> %vecinit6
}
-; CHECK-LABEL: test1
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
define <8 x float> @test2(<8 x float> %A, <8 x float> %B) {
+; SSE-LABEL: test2:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps %xmm2, %xmm0
+; SSE-NEXT: addsubps %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test2:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
%sub = fsub <8 x float> %A, %B
%add = fadd <8 x float> %A, %B
%vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
ret <8 x float> %vecinit14
}
-; CHECK-LABEL: test2
-; SSE: addsubps
-; SSE-NEXT: addsubps
-; AVX: vaddsubps
-; AVX-NOT: vaddsubps
-; CHECK: ret
-
define <4 x double> @test3(<4 x double> %A, <4 x double> %B) {
+; SSE-LABEL: test3:
+; SSE: # BB#0:
+; SSE-NEXT: addsubpd %xmm2, %xmm0
+; SSE-NEXT: addsubpd %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test3:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
%sub = fsub <4 x double> %A, %B
%add = fadd <4 x double> %A, %B
%vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x double> %vecinit6
}
-; CHECK-LABEL: test3
-; SSE: addsubpd
-; SSE: addsubpd
-; AVX: vaddsubpd
-; AVX-NOT: vaddsubpd
-; CHECK: ret
-
define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 {
+; SSE-LABEL: test4:
+; SSE: # BB#0:
+; SSE-NEXT: addsubpd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test4:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%add = fadd <2 x double> %A, %B
%sub = fsub <2 x double> %A, %B
%vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
ret <2 x double> %vecinit2
}
-; CHECK-LABEL: test4
-; SSE: addsubpd
-; AVX: vaddsubpd
-; CHECK-NEXT: ret
-
define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
+; SSE-LABEL: test1b:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test1b:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
%1 = load <4 x float>, <4 x float>* %B
%add = fadd <4 x float> %A, %1
%sub = fsub <4 x float> %A, %1
%vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x float> %vecinit6
}
-; CHECK-LABEL: test1b
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) {
+; SSE-LABEL: test2b:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps (%rdi), %xmm0
+; SSE-NEXT: addsubps 16(%rdi), %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test2b:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0
+; AVX-NEXT: retq
%1 = load <8 x float>, <8 x float>* %B
%add = fadd <8 x float> %A, %1
%sub = fsub <8 x float> %A, %1
%vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
ret <8 x float> %vecinit14
}
-; CHECK-LABEL: test2b
-; SSE: addsubps
-; SSE-NEXT: addsubps
-; AVX: vaddsubps
-; AVX-NOT: vaddsubps
-; CHECK: ret
-
define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) {
+; SSE-LABEL: test3b:
+; SSE: # BB#0:
+; SSE-NEXT: addsubpd (%rdi), %xmm0
+; SSE-NEXT: addsubpd 16(%rdi), %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test3b:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0
+; AVX-NEXT: retq
%1 = load <4 x double>, <4 x double>* %B
%add = fadd <4 x double> %A, %1
%sub = fsub <4 x double> %A, %1
%vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x double> %vecinit6
}
-; CHECK-LABEL: test3b
-; SSE: addsubpd
-; SSE: addsubpd
-; AVX: vaddsubpd
-; AVX-NOT: vaddsubpd
-; CHECK: ret
-
define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
+; SSE-LABEL: test4b:
+; SSE: # BB#0:
+; SSE-NEXT: addsubpd (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test4b:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
%1 = load <2 x double>, <2 x double>* %B
%sub = fsub <2 x double> %A, %1
%add = fadd <2 x double> %A, %1
%vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
ret <2 x double> %vecinit2
}
-; CHECK-LABEL: test4b
-; SSE: addsubpd
-; AVX: vaddsubpd
-; CHECK-NEXT: ret
+define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) {
+; SSE-LABEL: test1c:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test1c:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = load <4 x float>, <4 x float>* %B
+ %add = fadd <4 x float> %A, %1
+ %sub = fsub <4 x float> %A, %1
+ %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+ ret <4 x float> %vecinit6
+}
+
+define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) {
+; SSE-LABEL: test2c:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps (%rdi), %xmm0
+; SSE-NEXT: addsubps 16(%rdi), %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test2c:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0
+; AVX-NEXT: retq
+ %1 = load <8 x float>, <8 x float>* %B
+ %add = fadd <8 x float> %A, %1
+ %sub = fsub <8 x float> %A, %1
+ %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+ ret <8 x float> %vecinit14
+}
+
+define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) {
+; SSE-LABEL: test3c:
+; SSE: # BB#0:
+; SSE-NEXT: addsubpd (%rdi), %xmm0
+; SSE-NEXT: addsubpd 16(%rdi), %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test3c:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0
+; AVX-NEXT: retq
+ %1 = load <4 x double>, <4 x double>* %B
+ %add = fadd <4 x double> %A, %1
+ %sub = fsub <4 x double> %A, %1
+ %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+ ret <4 x double> %vecinit6
+}
+
+define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) {
+; SSE-LABEL: test4c:
+; SSE: # BB#0:
+; SSE-NEXT: addsubpd (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test4c:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = load <2 x double>, <2 x double>* %B
+ %sub = fsub <2 x double> %A, %1
+ %add = fadd <2 x double> %A, %1
+ %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 2, i32 1>
+ ret <2 x double> %vecinit2
+}
diff --git a/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll
new file mode 100644
index 000000000000..217be9aeae3a
--- /dev/null
+++ b/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll
@@ -0,0 +1,171 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=X32
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=X64
+
+; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse3-builtins.c
+
+define <2 x double> @test_mm_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
+; X32-LABEL: test_mm_addsub_pd:
+; X32: # BB#0:
+; X32-NEXT: addsubpd %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_addsub_pd:
+; X64: # BB#0:
+; X64-NEXT: addsubpd %xmm1, %xmm0
+; X64-NEXT: retq
+ %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x float> @test_mm_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
+; X32-LABEL: test_mm_addsub_ps:
+; X32: # BB#0:
+; X32-NEXT: addsubps %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_addsub_ps:
+; X64: # BB#0:
+; X64-NEXT: addsubps %xmm1, %xmm0
+; X64-NEXT: retq
+ %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @test_mm_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
+; X32-LABEL: test_mm_hadd_pd:
+; X32: # BB#0:
+; X32-NEXT: haddpd %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_hadd_pd:
+; X64: # BB#0:
+; X64-NEXT: haddpd %xmm1, %xmm0
+; X64-NEXT: retq
+ %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x float> @test_mm_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
+; X32-LABEL: test_mm_hadd_ps:
+; X32: # BB#0:
+; X32-NEXT: haddps %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_hadd_ps:
+; X64: # BB#0:
+; X64-NEXT: haddps %xmm1, %xmm0
+; X64-NEXT: retq
+ %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @test_mm_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
+; X32-LABEL: test_mm_hsub_pd:
+; X32: # BB#0:
+; X32-NEXT: hsubpd %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_hsub_pd:
+; X64: # BB#0:
+; X64-NEXT: hsubpd %xmm1, %xmm0
+; X64-NEXT: retq
+ %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x float> @test_mm_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
+; X32-LABEL: test_mm_hsub_ps:
+; X32: # BB#0:
+; X32-NEXT: hsubps %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_hsub_ps:
+; X64: # BB#0:
+; X64-NEXT: hsubps %xmm1, %xmm0
+; X64-NEXT: retq
+ %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x i64> @test_mm_lddqu_si128(i8* %a0) {
+; X32-LABEL: test_mm_lddqu_si128:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: lddqu (%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_lddqu_si128:
+; X64: # BB#0:
+; X64-NEXT: lddqu (%rdi), %xmm0
+; X64-NEXT: retq
+ %call = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
+ %res = bitcast <16 x i8> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
+
+define <2 x double> @test_mm_loaddup_pd(double* %a0) {
+; X32-LABEL: test_mm_loaddup_pd:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movddup (%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_loaddup_pd:
+; X64: # BB#0:
+; X64-NEXT: movddup (%rdi), %xmm0
+; X64-NEXT: retq
+ %ld = load double, double* %a0
+ %res0 = insertelement <2 x double> undef, double %ld, i32 0
+ %res1 = insertelement <2 x double> %res0, double %ld, i32 1
+ ret <2 x double> %res1
+}
+
+define <2 x double> @test_mm_movedup_pd(<2 x double> %a0) {
+; X32-LABEL: test_mm_movedup_pd:
+; X32: # BB#0:
+; X32-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_movedup_pd:
+; X64: # BB#0:
+; X64-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64-NEXT: retq
+ %res = shufflevector <2 x double> %a0, <2 x double> %a0, <2 x i32> zeroinitializer
+ ret <2 x double> %res
+}
+
+define <4 x float> @test_mm_movehdup_ps(<4 x float> %a0) {
+; X32-LABEL: test_mm_movehdup_ps:
+; X32: # BB#0:
+; X32-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_movehdup_ps:
+; X64: # BB#0:
+; X64-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X64-NEXT: retq
+ %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mm_moveldup_ps(<4 x float> %a0) {
+; X32-LABEL: test_mm_moveldup_ps:
+; X32: # BB#0:
+; X32-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_moveldup_ps:
+; X64: # BB#0:
+; X64-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; X64-NEXT: retq
+ %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ ret <4 x float> %res
+}
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 398675276c66..2c24478706e6 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; These are tests for SSE3 codegen.
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 --mattr=+sse3 | FileCheck %s --check-prefix=X64
@@ -269,8 +270,10 @@ entry:
define <4 x i32> @t17() nounwind {
; X64-LABEL: t17:
; X64: ## BB#0: ## %entry
-; X64-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
-; X64-NEXT: andpd {{.*}}(%rip), %xmm0
+; X64-NEXT: movaps (%rax), %xmm0
+; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X64-NEXT: pxor %xmm1, %xmm1
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: retq
entry:
%tmp1 = load <4 x float>, <4 x float>* undef, align 16
diff --git a/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
index 6fab98e70a89..75f69ffd6db9 100644
--- a/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
+++ b/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
@@ -42,7 +42,6 @@ define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
-
define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: mpsadbw
%res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
@@ -59,3 +58,49 @@ define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
+ ; CHECK: pmovsxbd
+ %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
+ ; CHECK: pmovsxbq
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
+ ; CHECK: pmovsxbw
+ %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
+ ; CHECK: pmovsxdq
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
+ ; CHECK: pmovsxwd
+ %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
+ ; CHECK: pmovsxwq
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/sse41-intrinsics-x86.ll b/test/CodeGen/X86/sse41-intrinsics-x86.ll
index 771e4024336c..ceff4f9782e9 100644
--- a/test/CodeGen/X86/sse41-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse41-intrinsics-x86.ll
@@ -162,54 +162,6 @@ define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
-define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
- ; CHECK: pmovsxbd
- %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
- ; CHECK: pmovsxbq
- %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
- ; CHECK: pmovsxbw
- %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
- ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
- ; CHECK: pmovsxdq
- %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
- ; CHECK: pmovsxwd
- %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
- ; CHECK: pmovsxwq
- %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
-
-
define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
; CHECK: pmovzxbd
%res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll b/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
index a16e79277143..a7e48d8ac038 100644
--- a/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
+++ b/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
@@ -1,109 +1,188 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
define <8 x i16> @test_llvm_x86_sse41_pmovsxbw(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbw
-; SSE41: pmovsxbw (%rdi), %xmm0
-; AVX: vpmovsxbw (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovsxbw:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovsxbw (%rdi), %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovsxbw:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovsxbw (%rdi), %xmm0
+; AVX-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a, align 1
- %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %1)
- ret <8 x i16> %2
+ %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = sext <8 x i8> %2 to <8 x i16>
+ ret <8 x i16> %3
}
define <4 x i32> @test_llvm_x86_sse41_pmovsxbd(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbd
-; SSE41: pmovsxbd (%rdi), %xmm0
-; AVX: vpmovsxbd (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovsxbd:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovsxbd (%rdi), %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovsxbd:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovsxbd (%rdi), %xmm0
+; AVX-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a, align 1
- %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %1)
- ret <4 x i32> %2
+ %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = sext <4 x i8> %2 to <4 x i32>
+ ret <4 x i32> %3
}
define <2 x i64> @test_llvm_x86_sse41_pmovsxbq(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbq
-; SSE41: pmovsxbq (%rdi), %xmm0
-; AVX: vpmovsxbq (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovsxbq:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovsxbq (%rdi), %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovsxbq:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovsxbq (%rdi), %xmm0
+; AVX-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a, align 1
- %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %1)
- ret <2 x i64> %2
+ %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+ %3 = sext <2 x i8> %2 to <2 x i64>
+ ret <2 x i64> %3
}
define <4 x i32> @test_llvm_x86_sse41_pmovsxwd(<8 x i16>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwd
-; SSE41: pmovsxwd (%rdi), %xmm0
-; AVX: vpmovsxwd (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovsxwd:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovsxwd (%rdi), %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovsxwd:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovsxwd (%rdi), %xmm0
+; AVX-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a, align 1
- %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1)
- ret <4 x i32> %2
+ %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = sext <4 x i16> %2 to <4 x i32>
+ ret <4 x i32> %3
}
define <2 x i64> @test_llvm_x86_sse41_pmovsxwq(<8 x i16>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwq
-; SSE41: pmovsxwq (%rdi), %xmm0
-; AVX: vpmovsxwq (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovsxwq:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovsxwq (%rdi), %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovsxwq:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovsxwq (%rdi), %xmm0
+; AVX-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a, align 1
- %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %1)
- ret <2 x i64> %2
+ %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+ %3 = sext <2 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
}
define <2 x i64> @test_llvm_x86_sse41_pmovsxdq(<4 x i32>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovsxdq
-; SSE41: pmovsxdq (%rdi), %xmm0
-; AVX: vpmovsxdq (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovsxdq:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovsxdq (%rdi), %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovsxdq:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovsxdq (%rdi), %xmm0
+; AVX-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %a, align 1
- %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %1)
- ret <2 x i64> %2
+ %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %3 = sext <2 x i32> %2 to <2 x i64>
+ ret <2 x i64> %3
}
define <8 x i16> @test_llvm_x86_sse41_pmovzxbw(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbw
-; SSE41: pmovzxbw (%rdi), %xmm0
-; AVX: vpmovzxbw (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovzxbw:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovzxbw:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %1)
ret <8 x i16> %2
}
define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbd
-; SSE41: pmovzxbd (%rdi), %xmm0
-; AVX: vpmovzxbd (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovzxbd:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovzxbd:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %1)
ret <4 x i32> %2
}
define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbq
-; SSE41: pmovzxbq (%rdi), %xmm0
-; AVX: vpmovzxbq (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovzxbq:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovzxbq:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %1)
ret <2 x i64> %2
}
define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwd
-; SSE41: pmovzxwd (%rdi), %xmm0
-; AVX: vpmovzxwd (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovzxwd:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovzxwd:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; AVX-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %1)
ret <4 x i32> %2
}
define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwq
-; SSE41: pmovzxwq (%rdi), %xmm0
-; AVX: vpmovzxwq (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovzxwq:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovzxwq:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; AVX-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %1)
ret <2 x i64> %2
}
define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovzxdq
-; SSE41: pmovzxdq (%rdi), %xmm0
-; AVX: vpmovzxdq (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovzxdq:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovzxdq:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; AVX-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %a, align 1
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %1)
ret <2 x i64> %2
@@ -115,9 +194,3 @@ declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>)
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>)
declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>)
declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>)
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>)
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>)
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>)
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>)
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>)
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>)
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 8532c012aa9b..0a83a9753b81 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -31,49 +31,6 @@ define <16 x i8> @pinsrb_1(i8 %s, <16 x i8> %tmp) nounwind {
ret <16 x i8> %tmp1
}
-define <2 x i64> @pmovsxbd_1(i32* %p) nounwind {
-; X32-LABEL: pmovsxbd_1:
-; X32: ## BB#0: ## %entry
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: pmovsxbd (%eax), %xmm0
-; X32-NEXT: retl
-;
-; X64-LABEL: pmovsxbd_1:
-; X64: ## BB#0: ## %entry
-; X64-NEXT: pmovsxbd (%rdi), %xmm0
-; X64-NEXT: retq
-entry:
- %0 = load i32, i32* %p, align 4
- %1 = insertelement <4 x i32> undef, i32 %0, i32 0
- %2 = insertelement <4 x i32> %1, i32 0, i32 1
- %3 = insertelement <4 x i32> %2, i32 0, i32 2
- %4 = insertelement <4 x i32> %3, i32 0, i32 3
- %5 = bitcast <4 x i32> %4 to <16 x i8>
- %6 = tail call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %5) nounwind readnone
- %7 = bitcast <4 x i32> %6 to <2 x i64>
- ret <2 x i64> %7
-}
-
-define <2 x i64> @pmovsxwd_1(i64* %p) nounwind readonly {
-; X32-LABEL: pmovsxwd_1:
-; X32: ## BB#0: ## %entry
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: pmovsxwd (%eax), %xmm0
-; X32-NEXT: retl
-;
-; X64-LABEL: pmovsxwd_1:
-; X64: ## BB#0: ## %entry
-; X64-NEXT: pmovsxwd (%rdi), %xmm0
-; X64-NEXT: retq
-entry:
- %0 = load i64, i64* %p ; <i64> [#uses=1]
- %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0 ; <<2 x i64>> [#uses=1]
- %1 = bitcast <2 x i64> %tmp2 to <8 x i16> ; <<8 x i16>> [#uses=1]
- %2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone ; <<4 x i32>> [#uses=1]
- %3 = bitcast <4 x i32> %2 to <2 x i64> ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %3
-}
-
define <2 x i64> @pmovzxbq_1() nounwind {
; X32-LABEL: pmovzxbq_1:
; X32: ## BB#0: ## %entry
@@ -94,8 +51,6 @@ entry:
ret <2 x i64> %3
}
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
define i32 @extractps_1(<4 x float> %v) nounwind {
@@ -137,7 +92,7 @@ define float @ext_1(<4 x float> %v) nounwind {
; X32: ## BB#0:
; X32-NEXT: pushl %eax
; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; X32-NEXT: addss LCPI7_0, %xmm0
+; X32-NEXT: addss LCPI5_0, %xmm0
; X32-NEXT: movss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
@@ -204,7 +159,7 @@ declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) noun
define <4 x float> @blendps_not_insertps_1(<4 x float> %t1, float %t2) nounwind {
; X32-LABEL: blendps_not_insertps_1:
; X32: ## BB#0:
-; X32-NEXT: movss {{.*#+}} xmm1
+; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X32-NEXT: retl
;
@@ -839,12 +794,12 @@ define <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocap
; X32-LABEL: insertps_from_vector_load:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; X32-NEXT: insertps $48, (%{{...}}), {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_vector_load:
; X64: ## BB#0:
-; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; X64-NEXT: insertps $48, (%{{...}}), {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; X64-NEXT: retq
%1 = load <4 x float>, <4 x float>* %pb, align 16
%2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
@@ -857,12 +812,12 @@ define <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>
; X32-LABEL: insertps_from_vector_load_offset:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[1],xmm0[3]
+; X32-NEXT: insertps $32, 4(%{{...}}), {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_vector_load_offset:
; X64: ## BB#0:
-; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[1],xmm0[3]
+; X64-NEXT: insertps $32, 4(%{{...}}), {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
; X64-NEXT: retq
%1 = load <4 x float>, <4 x float>* %pb, align 16
%2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
@@ -876,13 +831,13 @@ define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x floa
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: shll $4, %ecx
-; X32-NEXT: insertps {{.*#+}} xmm0 = mem[3],xmm0[1,2,3]
+; X32-NEXT: insertps $0, 12(%{{...}},%{{...}}), {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_vector_load_offset_2:
; X64: ## BB#0:
; X64-NEXT: shlq $4, %rsi
-; X64-NEXT: insertps {{.*#+}} xmm0 = mem[3],xmm0[1,2,3]
+; X64-NEXT: insertps $0, 12(%{{...}},%{{...}}), {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
; X64-NEXT: retq
%1 = getelementptr inbounds <4 x float>, <4 x float>* %pb, i64 %index
%2 = load <4 x float>, <4 x float>* %1, align 16
@@ -1013,12 +968,12 @@ define <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) {
; X32-LABEL: pr20087:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[2]
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[0]
; X32-NEXT: retl
;
; X64-LABEL: pr20087:
; X64: ## BB#0:
-; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[2]
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[0]
; X64-NEXT: retq
%load = load <4 x float> , <4 x float> *%ptr
%ret = shufflevector <4 x float> %load, <4 x float> %a, <4 x i32> <i32 4, i32 undef, i32 6, i32 2>
diff --git a/test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll
new file mode 100644
index 000000000000..f93a16a5eb3d
--- /dev/null
+++ b/test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=X32
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=X64
+
+; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse4a-builtins.c
+
+define <2 x i64> @test_mm_extracti_si64(<2 x i64> %x) {
+; X32-LABEL: test_mm_extracti_si64:
+; X32: # BB#0:
+; X32-NEXT: extrq $2, $3, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_extracti_si64:
+; X64: # BB#0:
+; X64-NEXT: extrq $2, $3, %xmm0
+; X64-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind readnone
+
+define <2 x i64> @test_mm_extract_si64(<2 x i64> %x, <2 x i64> %y) {
+; X32-LABEL: test_mm_extract_si64:
+; X32: # BB#0:
+; X32-NEXT: extrq %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_extract_si64:
+; X64: # BB#0:
+; X64-NEXT: extrq %xmm1, %xmm0
+; X64-NEXT: retq
+ %bc = bitcast <2 x i64> %y to <16 x i8>
+ %res = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %bc)
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind readnone
+
+define <2 x i64> @test_mm_inserti_si64(<2 x i64> %x, <2 x i64> %y) {
+; X32-LABEL: test_mm_inserti_si64:
+; X32: # BB#0:
+; X32-NEXT: insertq $6, $5, %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_inserti_si64:
+; X64: # BB#0:
+; X64-NEXT: insertq $6, $5, %xmm1, %xmm0
+; X64-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 5, i8 6)
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind readnone
+
+define <2 x i64> @test_mm_insert_si64(<2 x i64> %x, <2 x i64> %y) {
+; X32-LABEL: test_mm_insert_si64:
+; X32: # BB#0:
+; X32-NEXT: insertq %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_insert_si64:
+; X64: # BB#0:
+; X64-NEXT: insertq %xmm1, %xmm0
+; X64-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define void @test_stream_sd(i8* %p, <2 x double> %a) {
+; X32-LABEL: test_stream_sd:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movntsd %xmm0, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_stream_sd:
+; X64: # BB#0:
+; X64-NEXT: movntsd %xmm0, (%rdi)
+; X64-NEXT: retq
+ call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a)
+ ret void
+}
+declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>) nounwind readnone
+
+define void @test_mm_stream_ss(i8* %p, <4 x float> %a) {
+; X32-LABEL: test_mm_stream_ss:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movntss %xmm0, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_stream_ss:
+; X64: # BB#0:
+; X64-NEXT: movntss %xmm0, (%rdi)
+; X64-NEXT: retq
+ call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a)
+ ret void
+}
+declare void @llvm.x86.sse4a.movnt.ss(i8*, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/sse_partial_update.ll b/test/CodeGen/X86/sse_partial_update.ll
index 377c3b7d6ead..8d61428420f6 100644
--- a/test/CodeGen/X86/sse_partial_update.ll
+++ b/test/CodeGen/X86/sse_partial_update.ll
@@ -90,3 +90,36 @@ entry:
declare void @callee2(float, float)
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+define <2 x double> @load_fold_cvtss2sd_int(<4 x float> *%a) {
+; CHECK-LABEL: load_fold_cvtss2sd_int:
+; CHECK: movaps (%rdi), %xmm1
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtss2sd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %ld = load <4 x float>, <4 x float> *%a
+ %x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %ld)
+ ret <2 x double> %x
+}
+
+define <2 x double> @load_fold_cvtss2sd_int_optsize(<4 x float> *%a) optsize {
+; CHECK-LABEL: load_fold_cvtss2sd_int_optsize:
+; CHECK: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtss2sd (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %ld = load <4 x float>, <4 x float> *%a
+ %x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %ld)
+ ret <2 x double> %x
+}
+
+define <2 x double> @load_fold_cvtss2sd_int_minsize(<4 x float> *%a) minsize {
+; CHECK-LABEL: load_fold_cvtss2sd_int_minsize:
+; CHECK: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtss2sd (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %ld = load <4 x float>, <4 x float> *%a
+ %x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %ld)
+ ret <2 x double> %x
+}
+
+declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
+
diff --git a/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll b/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll
new file mode 100644
index 000000000000..4f7ff20c6e0d
--- /dev/null
+++ b/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll
@@ -0,0 +1,290 @@
+; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=X32
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=X64
+
+; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/ssse3-builtins.c
+
+define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) {
+; X32-LABEL: test_mm_abs_epi8:
+; X32: # BB#0:
+; X32-NEXT: pabsb %xmm0, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_abs_epi8:
+; X64: # BB#0:
+; X64-NEXT: pabsb %xmm0, %xmm0
+; X64-NEXT: retq
+ %arg = bitcast <2 x i64> %a0 to <16 x i8>
+ %call = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %arg)
+ %res = bitcast <16 x i8> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) {
+; X32-LABEL: test_mm_abs_epi16:
+; X32: # BB#0:
+; X32-NEXT: pabsw %xmm0, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_abs_epi16:
+; X64: # BB#0:
+; X64-NEXT: pabsw %xmm0, %xmm0
+; X64-NEXT: retq
+ %arg = bitcast <2 x i64> %a0 to <8 x i16>
+ %call = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %arg)
+ %res = bitcast <8 x i16> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) {
+; X32-LABEL: test_mm_abs_epi32:
+; X32: # BB#0:
+; X32-NEXT: pabsd %xmm0, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_abs_epi32:
+; X64: # BB#0:
+; X64-NEXT: pabsd %xmm0, %xmm0
+; X64-NEXT: retq
+ %arg = bitcast <2 x i64> %a0 to <4 x i32>
+ %call = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %arg)
+ %res = bitcast <4 x i32> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
+
+define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_alignr_epi8:
+; X32: # BB#0:
+; X32-NEXT: palignr {{.*#}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
+; X32-NEXT: movdqa %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_alignr_epi8:
+; X64: # BB#0:
+; X64-NEXT: palignr {{.*#}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
+; X64-NEXT: movdqa %xmm1, %xmm0
+; X64-NEXT: retq
+ %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
+ %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
+ %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+ %res = bitcast <16 x i8> %shuf to <2 x i64>
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mm_hadd_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_hadd_epi16:
+; X32: # BB#0:
+; X32-NEXT: phaddw %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_hadd_epi16:
+; X64: # BB#0:
+; X64-NEXT: phaddw %xmm1, %xmm0
+; X64-NEXT: retq
+ %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
+ %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
+ %call = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
+ %res = bitcast <8 x i16> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_hadd_epi32(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_hadd_epi32:
+; X32: # BB#0:
+; X32-NEXT: phaddd %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_hadd_epi32:
+; X64: # BB#0:
+; X64-NEXT: phaddd %xmm1, %xmm0
+; X64-NEXT: retq
+ %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
+ %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
+ %call = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
+ %res = bitcast <4 x i32> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_mm_hadds_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_hadds_epi16:
+; X32: # BB#0:
+; X32-NEXT: phaddsw %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_hadds_epi16:
+; X64: # BB#0:
+; X64-NEXT: phaddsw %xmm1, %xmm0
+; X64-NEXT: retq
+ %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
+ %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
+ %call = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
+ %res = bitcast <8 x i16> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_hsub_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_hsub_epi16:
+; X32: # BB#0:
+; X32-NEXT: phsubw %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_hsub_epi16:
+; X64: # BB#0:
+; X64-NEXT: phsubw %xmm1, %xmm0
+; X64-NEXT: retq
+ %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
+ %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
+ %call = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
+ %res = bitcast <8 x i16> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_hsub_epi32(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_hsub_epi32:
+; X32: # BB#0:
+; X32-NEXT: phsubd %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_hsub_epi32:
+; X64: # BB#0:
+; X64-NEXT: phsubd %xmm1, %xmm0
+; X64-NEXT: retq
+ %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
+ %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
+ %call = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
+ %res = bitcast <4 x i32> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_mm_hsubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_hsubs_epi16:
+; X32: # BB#0:
+; X32-NEXT: phsubsw %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_hsubs_epi16:
+; X64: # BB#0:
+; X64-NEXT: phsubsw %xmm1, %xmm0
+; X64-NEXT: retq
+ %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
+ %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
+ %call = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
+ %res = bitcast <8 x i16> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_maddubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_maddubs_epi16:
+; X32: # BB#0:
+; X32-NEXT: pmaddubsw %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_maddubs_epi16:
+; X64: # BB#0:
+; X64-NEXT: pmaddubsw %xmm1, %xmm0
+; X64-NEXT: retq
+ %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
+ %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
+ %call = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %arg0, <16 x i8> %arg1)
+ %res = bitcast <8 x i16> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <2 x i64> @test_mm_mulhrs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_mulhrs_epi16:
+; X32: # BB#0:
+; X32-NEXT: pmulhrsw %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_mulhrs_epi16:
+; X64: # BB#0:
+; X64-NEXT: pmulhrsw %xmm1, %xmm0
+; X64-NEXT: retq
+ %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
+ %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
+ %call = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
+ %res = bitcast <8 x i16> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_shuffle_epi8(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_shuffle_epi8:
+; X32: # BB#0:
+; X32-NEXT: pshufb %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_shuffle_epi8:
+; X64: # BB#0:
+; X64-NEXT: pshufb %xmm1, %xmm0
+; X64-NEXT: retq
+ %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
+ %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
+ %call = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
+ %res = bitcast <16 x i8> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <2 x i64> @test_mm_sign_epi8(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_sign_epi8:
+; X32: # BB#0:
+; X32-NEXT: psignb %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_sign_epi8:
+; X64: # BB#0:
+; X64-NEXT: psignb %xmm1, %xmm0
+; X64-NEXT: retq
+ %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
+ %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
+ %call = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
+ %res = bitcast <16 x i8> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <2 x i64> @test_mm_sign_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_sign_epi16:
+; X32: # BB#0:
+; X32-NEXT: psignw %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_sign_epi16:
+; X64: # BB#0:
+; X64-NEXT: psignw %xmm1, %xmm0
+; X64-NEXT: retq
+ %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
+ %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
+ %call = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
+ %res = bitcast <8 x i16> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_sign_epi32(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_sign_epi32:
+; X32: # BB#0:
+; X32-NEXT: psignd %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_sign_epi32:
+; X64: # BB#0:
+; X64-NEXT: psignd %xmm1, %xmm0
+; X64-NEXT: retq
+ %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
+ %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
+ %call = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
+ %res = bitcast <4 x i32> %call to <2 x i64>
+ ret <2 x i64> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/X86/stack-align-memcpy.ll b/test/CodeGen/X86/stack-align-memcpy.ll
index 0cc3aa848891..129fb0c6b1f6 100644
--- a/test/CodeGen/X86/stack-align-memcpy.ll
+++ b/test/CodeGen/X86/stack-align-memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -force-align-stack -mtriple i386-apple-darwin -mcpu=i486 | FileCheck %s
+; RUN: llc < %s -stackrealign -mtriple i386-apple-darwin -mcpu=i486 | FileCheck %s
%struct.foo = type { [88 x i8] }
diff --git a/test/CodeGen/X86/stack-folding-adx-x86_64.ll b/test/CodeGen/X86/stack-folding-adx-x86_64.ll
new file mode 100644
index 000000000000..5f109f09aa19
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-adx-x86_64.ll
@@ -0,0 +1,45 @@
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+adx < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Stack reload folding tests.
+;
+; By including a nop call with sideeffects we can force a partial register spill of the
+; relevant registers and check that the reload is correctly folded into the instruction.
+
+define i8 @stack_fold_addcarry_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) {
+ ;CHECK-LABEL: stack_fold_addcarry_u32
+ ;CHECK: adcxl {{-?[0-9]*}}(%rsp), %ecx {{.*#+}} 4-byte Folded Reload
+ %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = tail call i8 @llvm.x86.addcarry.u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3)
+ ret i8 %2;
+}
+declare i8 @llvm.x86.addcarry.u32(i8, i32, i32, i8*)
+
+define i8 @stack_fold_addcarry_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) {
+ ;CHECK-LABEL: stack_fold_addcarry_u64
+ ;CHECK: adcxq {{-?[0-9]*}}(%rsp), %rcx {{.*#+}} 8-byte Folded Reload
+ %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = tail call i8 @llvm.x86.addcarry.u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3)
+ ret i8 %2;
+}
+declare i8 @llvm.x86.addcarry.u64(i8, i64, i64, i8*)
+
+define i8 @stack_fold_addcarryx_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) {
+ ;CHECK-LABEL: stack_fold_addcarryx_u32
+ ;CHECK: adcxl {{-?[0-9]*}}(%rsp), %ecx {{.*#+}} 4-byte Folded Reload
+ %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = tail call i8 @llvm.x86.addcarryx.u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3)
+ ret i8 %2;
+}
+declare i8 @llvm.x86.addcarryx.u32(i8, i32, i32, i8*)
+
+define i8 @stack_fold_addcarryx_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) {
+ ;CHECK-LABEL: stack_fold_addcarryx_u64
+ ;CHECK: adcxq {{-?[0-9]*}}(%rsp), %rcx {{.*#+}} 8-byte Folded Reload
+ %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = tail call i8 @llvm.x86.addcarryx.u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3)
+ ret i8 %2;
+}
+declare i8 @llvm.x86.addcarryx.u64(i8, i64, i64, i8*)
diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll
index 63aa742bdf01..b86ec0ea22ff 100644
--- a/test/CodeGen/X86/stack-folding-fp-avx1.ll
+++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+f16c < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@@ -946,7 +946,15 @@ define <8 x float> @stack_fold_insertf128(<4 x float> %a0, <4 x float> %a1) {
ret <8 x float> %2
}
-; TODO stack_fold_insertps
+define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_insertps
+ ;CHECK: vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3]
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) {
;CHECK-LABEL: stack_fold_maxpd
@@ -1411,7 +1419,7 @@ declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readno
define double @stack_fold_roundsd(double %a0) optsize {
;CHECK-LABEL: stack_fold_roundsd
- ;CHECK: vroundsd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ ;CHECK: vroundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call double @llvm.floor.f64(double %a0)
ret double %2
@@ -1423,7 +1431,7 @@ declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) n
define float @stack_fold_roundss(float %a0) optsize {
;CHECK-LABEL: stack_fold_roundss
- ;CHECK: vroundss $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ ;CHECK: vroundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call float @llvm.floor.f32(float %a0)
ret float %2
@@ -1494,7 +1502,7 @@ define <8 x float> @stack_fold_shufps_ymm(<8 x float> %a0, <8 x float> %a1) {
;CHECK-LABEL: stack_fold_shufps_ymm
;CHECK: vshufps $148, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
+ %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 13, i32 14>
ret <8 x float> %2
}
diff --git a/test/CodeGen/X86/stack-folding-fp-sse42.ll b/test/CodeGen/X86/stack-folding-fp-sse42.ll
index f9fcbaabdebb..105115bc7d25 100644
--- a/test/CodeGen/X86/stack-folding-fp-sse42.ll
+++ b/test/CodeGen/X86/stack-folding-fp-sse42.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.2 < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@@ -314,7 +314,7 @@ define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
}
declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
-define float @stack_fold_cvtsd2ss(double %a0) optsize {
+define float @stack_fold_cvtsd2ss(double %a0) minsize {
;CHECK-LABEL: stack_fold_cvtsd2ss
;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
@@ -331,7 +331,7 @@ define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize {
}
declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
-define double @stack_fold_cvtsi2sd(i32 %a0) optsize {
+define double @stack_fold_cvtsi2sd(i32 %a0) minsize {
;CHECK-LABEL: stack_fold_cvtsi2sd
;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -365,7 +365,7 @@ define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
}
declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
-define float @stack_fold_cvtsi2ss(i32 %a0) optsize {
+define float @stack_fold_cvtsi2ss(i32 %a0) minsize {
;CHECK-LABEL: stack_fold_cvtsi2ss
;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -399,7 +399,7 @@ define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
}
declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
-define double @stack_fold_cvtss2sd(float %a0) optsize {
+define double @stack_fold_cvtss2sd(float %a0) minsize {
;CHECK-LABEL: stack_fold_cvtss2sd
;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
@@ -637,7 +637,15 @@ define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) {
}
declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
-; TODO stack_fold_insertps
+define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_insertps
+ ;CHECK: insertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3]
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) {
;CHECK-LABEL: stack_fold_maxpd
@@ -886,7 +894,7 @@ declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
define double @stack_fold_roundsd(double %a0) optsize {
;CHECK-LABEL: stack_fold_roundsd
- ;CHECK: roundsd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ ;CHECK: roundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call double @llvm.floor.f64(double %a0)
ret double %2
@@ -896,9 +904,9 @@ declare double @llvm.floor.f64(double) nounwind readnone
; TODO stack_fold_roundsd_int
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
-define float @stack_fold_roundss(float %a0) optsize {
+define float @stack_fold_roundss(float %a0) minsize {
;CHECK-LABEL: stack_fold_roundss
- ;CHECK: roundss $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ ;CHECK: roundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call float @llvm.floor.f32(float %a0)
ret float %2
@@ -968,7 +976,7 @@ declare double @llvm.sqrt.f64(double) nounwind readnone
; TODO stack_fold_sqrtsd_int
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
-define float @stack_fold_sqrtss(float %a0) optsize {
+define float @stack_fold_sqrtss(float %a0) minsize {
;CHECK-LABEL: stack_fold_sqrtss
;CHECK: sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
diff --git a/test/CodeGen/X86/stack-folding-int-avx1.ll b/test/CodeGen/X86/stack-folding-int-avx1.ll
index fec297d5e9d4..15ffb1d2dcc5 100644
--- a/test/CodeGen/X86/stack-folding-int-avx1.ll
+++ b/test/CodeGen/X86/stack-folding-int-avx1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+aes,+pclmul < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+aes,+pclmul < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@@ -671,55 +671,55 @@ define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbd
;CHECK: vpmovsxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0)
- ret <4 x i32> %2
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = sext <4 x i8> %2 to <4 x i32>
+ ret <4 x i32> %3
}
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbq
- ;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ ;CHECK: vpmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0)
- ret <2 x i64> %2
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+ %3 = sext <2 x i8> %2 to <2 x i64>
+ ret <2 x i64> %3
}
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbw
;CHECK: vpmovsxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0)
- ret <8 x i16> %2
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = sext <8 x i8> %2 to <8 x i16>
+ ret <8 x i16> %3
}
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
;CHECK-LABEL: stack_fold_pmovsxdq
;CHECK: vpmovsxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0)
- ret <2 x i64> %2
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %3 = sext <2 x i32> %2 to <2 x i64>
+ ret <2 x i64> %3
}
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pmovsxwd
;CHECK: vpmovsxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0)
- ret <4 x i32> %2
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = sext <4 x i16> %2 to <4 x i32>
+ ret <4 x i32> %3
}
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pmovsxwq
;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0)
- ret <2 x i64> %2
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+ %3 = sext <2 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
}
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovzxbd
diff --git a/test/CodeGen/X86/stack-folding-int-avx2.ll b/test/CodeGen/X86/stack-folding-int-avx2.ll
index a164fbbc7a6a..235a10ed4678 100644
--- a/test/CodeGen/X86/stack-folding-int-avx2.ll
+++ b/test/CodeGen/X86/stack-folding-int-avx2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@@ -12,7 +12,7 @@ define <4 x double> @stack_fold_broadcastsd_ymm(<2 x double> %a0) {
;CHECK-LABEL: stack_fold_broadcastsd_ymm
;CHECK: vbroadcastsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0)
+ %2 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer
; fadd forces execution domain
%3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
ret <4 x double> %3
@@ -23,7 +23,7 @@ define <4 x float> @stack_fold_broadcastss(<4 x float> %a0) {
;CHECK-LABEL: stack_fold_broadcastss
;CHECK: vbroadcastss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0)
+ %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer
; fadd forces execution domain
%3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
ret <4 x float> %3
@@ -34,7 +34,7 @@ define <8 x float> @stack_fold_broadcastss_ymm(<4 x float> %a0) {
;CHECK-LABEL: stack_fold_broadcastss_ymm
;CHECK: vbroadcastss {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0)
+ %2 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer
; fadd forces execution domain
%3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
ret <8 x float> %3
@@ -286,81 +286,73 @@ define <16 x i8> @stack_fold_pbroadcastb(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pbroadcastb
;CHECK: vpbroadcastb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0)
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %2
}
-declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
define <32 x i8> @stack_fold_pbroadcastb_ymm(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pbroadcastb_ymm
;CHECK: vpbroadcastb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0)
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <32 x i32> zeroinitializer
ret <32 x i8> %2
}
-declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
define <4 x i32> @stack_fold_pbroadcastd(<4 x i32> %a0) {
;CHECK-LABEL: stack_fold_pbroadcastd
;CHECK: vpbroadcastd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0)
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer
; add forces execution domain
%3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %3
}
-declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
define <8 x i32> @stack_fold_pbroadcastd_ymm(<4 x i32> %a0) {
;CHECK-LABEL: stack_fold_pbroadcastd_ymm
;CHECK: vpbroadcastd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0)
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <8 x i32> zeroinitializer
; add forces execution domain
%3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
ret <8 x i32> %3
}
-declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
define <2 x i64> @stack_fold_pbroadcastq(<2 x i64> %a0) {
;CHECK-LABEL: stack_fold_pbroadcastq
;CHECK: vpbroadcastq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0)
+ %2 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
; add forces execution domain
%3 = add <2 x i64> %2, <i64 1, i64 1>
ret <2 x i64> %3
}
-declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
define <4 x i64> @stack_fold_pbroadcastq_ymm(<2 x i64> %a0) {
;CHECK-LABEL: stack_fold_pbroadcastq_ymm
;CHECK: vpbroadcastq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0)
+ %2 = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> zeroinitializer
; add forces execution domain
%3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i64> %3
}
-declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
define <8 x i16> @stack_fold_pbroadcastw(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pbroadcastw
;CHECK: vpbroadcastw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0)
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %2
}
-declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
define <16 x i16> @stack_fold_pbroadcastw_ymm(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pbroadcastw_ymm
;CHECK: vpbroadcastw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0)
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <16 x i32> zeroinitializer
ret <16 x i16> %2
}
-declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
define <32 x i8> @stack_fold_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1) {
;CHECK-LABEL: stack_fold_pcmpeqb
@@ -455,28 +447,28 @@ declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
define <4 x double> @stack_fold_permpd(<4 x double> %a0) {
;CHECK-LABEL: stack_fold_permpd
- ;CHECK: vpermpd $255, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ ;CHECK: vpermpd $235, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
; fadd forces execution domain
%3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
ret <4 x double> %3
}
-define <8 x float> @stack_fold_permps(<8 x float> %a0, <8 x float> %a1) {
+define <8 x float> @stack_fold_permps(<8 x i32> %a0, <8 x float> %a1) {
;CHECK-LABEL: stack_fold_permps
;CHECK: vpermps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x float> %a0)
+ %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0)
ret <8 x float> %2
}
-declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly
+declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
define <4 x i64> @stack_fold_permq(<4 x i64> %a0) {
;CHECK-LABEL: stack_fold_permq
- ;CHECK: vpermq $255, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ ;CHECK: vpermq $235, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ %2 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
; add forces execution domain
%3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
ret <4 x i64> %3
@@ -684,28 +676,25 @@ define <16 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbw
;CHECK: vpmovsxbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0)
+ %2 = sext <16 x i8> %a0 to <16 x i16>
ret <16 x i16> %2
}
-declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
define <4 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
;CHECK-LABEL: stack_fold_pmovsxdq
;CHECK: vpmovsxdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0)
+ %2 = sext <4 x i32> %a0 to <4 x i64>
ret <4 x i64> %2
}
-declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
define <8 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pmovsxwd
;CHECK: vpmovsxwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0)
+ %2 = sext <8 x i16> %a0 to <8 x i32>
ret <8 x i32> %2
}
-declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
define <4 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pmovsxwq
diff --git a/test/CodeGen/X86/stack-folding-int-sse42.ll b/test/CodeGen/X86/stack-folding-int-sse42.ll
index e814ae6df501..f732607851fc 100644
--- a/test/CodeGen/X86/stack-folding-int-sse42.ll
+++ b/test/CodeGen/X86/stack-folding-int-sse42.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.2,+aes,+pclmul < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+aes,+pclmul < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@@ -698,55 +698,55 @@ define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbd
;CHECK: pmovsxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0)
- ret <4 x i32> %2
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = sext <4 x i8> %2 to <4 x i32>
+ ret <4 x i32> %3
}
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbq
;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0)
- ret <2 x i64> %2
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+ %3 = sext <2 x i8> %2 to <2 x i64>
+ ret <2 x i64> %3
}
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbw
;CHECK: pmovsxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0)
- ret <8 x i16> %2
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = sext <8 x i8> %2 to <8 x i16>
+ ret <8 x i16> %3
}
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
;CHECK-LABEL: stack_fold_pmovsxdq
;CHECK: pmovsxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0)
- ret <2 x i64> %2
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %3 = sext <2 x i32> %2 to <2 x i64>
+ ret <2 x i64> %3
}
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pmovsxwd
;CHECK: pmovsxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0)
- ret <4 x i32> %2
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = sext <4 x i16> %2 to <4 x i32>
+ ret <4 x i32> %3
}
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pmovsxwq
;CHECK: pmovsxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0)
- ret <2 x i64> %2
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+ %3 = sext <2 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
}
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovzxbd
diff --git a/test/CodeGen/X86/stack-folding-mmx.ll b/test/CodeGen/X86/stack-folding-mmx.ll
index 8a5d4e2770dc..3b1a4956726f 100644
--- a/test/CodeGen/X86/stack-folding-mmx.ll
+++ b/test/CodeGen/X86/stack-folding-mmx.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s
define x86_mmx @stack_fold_cvtpd2pi(<2 x double> %a0) {
;CHECK-LABEL: stack_fold_cvtpd2pi
@@ -59,6 +59,33 @@ declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone
; TODO stack_fold_movq_load
; TODO stack_fold_movq_store
+define x86_mmx @stack_fold_pabsb(x86_mmx %a0) {
+ ;CHECK-LABEL: stack_fold_pabsb
+ ;CHECK: pabsb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %a0) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pabsd(x86_mmx %a0) {
+ ;CHECK-LABEL: stack_fold_pabsd
+ ;CHECK: pabsd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %a0) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pabsw(x86_mmx %a0) {
+ ;CHECK-LABEL: stack_fold_pabsw
+ ;CHECK: pabsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %a0) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
+
define x86_mmx @stack_fold_packssdw(x86_mmx %a, x86_mmx %b) {
;CHECK-LABEL: stack_fold_packssdw
;CHECK: packssdw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
@@ -158,6 +185,15 @@ define x86_mmx @stack_fold_paddw(x86_mmx %a, x86_mmx %b) {
}
declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
+define x86_mmx @stack_fold_palignr(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_palignr
+ ;CHECK: palignr $1, {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a, x86_mmx %b, i8 1) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
+
define x86_mmx @stack_fold_pand(x86_mmx %a, x86_mmx %b) {
;CHECK-LABEL: stack_fold_pand
;CHECK: pand {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
@@ -248,8 +284,71 @@ define x86_mmx @stack_fold_pcmpgtw(x86_mmx %a, x86_mmx %b) {
}
declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
+define x86_mmx @stack_fold_phaddd(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_phaddd
+ ;CHECK: phaddd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_phaddsw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_phaddsw
+ ;CHECK: phaddsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_phaddw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_phaddw
+ ;CHECK: phaddw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_phsubd(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_phsubd
+ ;CHECK: phsubd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_phsubsw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_phsubsw
+ ;CHECK: phsubsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_phsubw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_phsubw
+ ;CHECK: phsubw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
+
; TODO stack_fold_pinsrw
+define x86_mmx @stack_fold_pmaddubsw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pmaddubsw
+ ;CHECK: pmaddubsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
+
define x86_mmx @stack_fold_pmaddwd(x86_mmx %a, x86_mmx %b) {
;CHECK-LABEL: stack_fold_pmaddwd
;CHECK: pmaddwd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
@@ -295,6 +394,15 @@ define x86_mmx @stack_fold_pminub(x86_mmx %a, x86_mmx %b) {
}
declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
+define x86_mmx @stack_fold_pmulhrsw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pmulhrsw
+ ;CHECK: pmulhrsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
+
define x86_mmx @stack_fold_pmulhuw(x86_mmx %a, x86_mmx %b) {
;CHECK-LABEL: stack_fold_pmulhuw
;CHECK: pmulhuw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
@@ -349,7 +457,16 @@ define x86_mmx @stack_fold_psadbw(x86_mmx %a, x86_mmx %b) {
}
declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
-define x86_mmx @stack_fold_pshufw(x86_mmx %a, x86_mmx %b) {
+define x86_mmx @stack_fold_pshufb(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pshufb
+ ;CHECK: pshufb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pshufw(x86_mmx %a) {
;CHECK-LABEL: stack_fold_pshufw
;CHECK: pshufw $1, {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
%1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
@@ -358,6 +475,33 @@ define x86_mmx @stack_fold_pshufw(x86_mmx %a, x86_mmx %b) {
}
declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
+define x86_mmx @stack_fold_psignb(x86_mmx %a0, x86_mmx %a1) {
+ ;CHECK-LABEL: stack_fold_psignb
+ ;CHECK: psignb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psignd(x86_mmx %a0, x86_mmx %a1) {
+ ;CHECK-LABEL: stack_fold_psignd
+ ;CHECK: psignd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psignw(x86_mmx %a0, x86_mmx %a1) {
+ ;CHECK-LABEL: stack_fold_psignw
+ ;CHECK: psignw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
+
define x86_mmx @stack_fold_pslld(x86_mmx %a, x86_mmx %b) {
;CHECK-LABEL: stack_fold_pslld
;CHECK: pslld {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
diff --git a/test/CodeGen/X86/stack-folding-x86_64.ll b/test/CodeGen/X86/stack-folding-x86_64.ll
index 211227916a09..f96880d0237a 100644
--- a/test/CodeGen/X86/stack-folding-x86_64.ll
+++ b/test/CodeGen/X86/stack-folding-x86_64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/stack-folding-xop.ll b/test/CodeGen/X86/stack-folding-xop.ll
index 44a0d1dc6582..d0c48b400804 100644
--- a/test/CodeGen/X86/stack-folding-xop.ll
+++ b/test/CodeGen/X86/stack-folding-xop.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+xop < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/stack-probe-size.ll b/test/CodeGen/X86/stack-probe-size.ll
index 21482c3abded..4d1f88d11172 100644
--- a/test/CodeGen/X86/stack-probe-size.ll
+++ b/test/CodeGen/X86/stack-probe-size.ll
@@ -6,10 +6,9 @@
; stack probe size equals the page size (4096 bytes for all x86 targets), and
; this is unlikely to change in the future.
;
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s
target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
-target triple = "i686-pc-windows-msvc"
define i32 @test1() "stack-probe-size"="0" {
%buffer = alloca [4095 x i8]
diff --git a/test/CodeGen/X86/stack-protector-dbginfo.ll b/test/CodeGen/X86/stack-protector-dbginfo.ll
index 3aba19464b9d..237b96603c00 100644
--- a/test/CodeGen/X86/stack-protector-dbginfo.ll
+++ b/test/CodeGen/X86/stack-protector-dbginfo.ll
@@ -8,7 +8,7 @@
@a = external global { i64, [56 x i8] }, align 32
; Function Attrs: nounwind sspreq
-define i32 @_Z18read_response_sizev() #0 {
+define i32 @_Z18read_response_sizev() #0 !dbg !9 {
entry:
tail call void @llvm.dbg.value(metadata !22, i64 0, metadata !23, metadata !DIExpression()), !dbg !39
%0 = load i64, i64* getelementptr inbounds ({ i64, [56 x i8] }, { i64, [56 x i8] }* @a, i32 0, i32 0), align 8, !dbg !40
@@ -25,7 +25,7 @@ attributes #0 = { sspreq }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!21, !72}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !5, subprograms: !8, globals: !20, imports: !5)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !5, subprograms: !8, globals: !20, imports: !5)
!1 = !DIFile(filename: "<unknown>", directory: "/Users/matt/ryan_bug")
!2 = !{!3}
!3 = !DICompositeType(tag: DW_TAG_enumeration_type, line: 20, size: 32, align: 32, file: !1, scope: !4, elements: !6)
@@ -34,22 +34,22 @@ attributes #0 = { sspreq }
!6 = !{!7}
!7 = !DIEnumerator(name: "max_frame_size", value: 0) ; [ DW_TAG_enumerator ] [max_frame_size :: 0]
!8 = !{!9, !24, !41, !65}
-!9 = !DISubprogram(name: "read_response_size", linkageName: "_Z18read_response_sizev", line: 27, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 27, file: !1, scope: !10, type: !11, function: i32 ()* @_Z18read_response_sizev, variables: !14)
+!9 = distinct !DISubprogram(name: "read_response_size", linkageName: "_Z18read_response_sizev", line: 27, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 27, file: !1, scope: !10, type: !11, variables: !14)
!10 = !DIFile(filename: "<unknown>", directory: "/Users/matt/ryan_bug")
!11 = !DISubroutineType(types: !12)
!12 = !{!13}
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!14 = !{!15, !19}
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 28, scope: !9, file: !10, type: !16)
+!15 = !DILocalVariable(name: "b", line: 28, scope: !9, file: !10, type: !16)
!16 = !DICompositeType(tag: DW_TAG_structure_type, name: "B", line: 16, size: 32, align: 32, file: !1, elements: !17)
!17 = !{!18}
!18 = !DIDerivedType(tag: DW_TAG_member, name: "end_of_file", line: 17, size: 32, align: 32, file: !1, scope: !16, baseType: !13)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 29, scope: !9, file: !10, type: !13)
+!19 = !DILocalVariable(name: "c", line: 29, scope: !9, file: !10, type: !13)
!20 = !{}
!21 = !{i32 2, !"Dwarf Version", i32 2}
!22 = !{i64* getelementptr inbounds ({ i64, [56 x i8] }, { i64, [56 x i8] }* @a, i32 0, i32 0)}
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p2", line: 12, arg: 2, scope: !24, file: !10, type: !32)
-!24 = !DISubprogram(name: "min<unsigned long long>", linkageName: "_ZN3__13minIyEERKT_S3_RS1_", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !1, scope: !25, type: !27, templateParams: !33, variables: !35)
+!23 = !DILocalVariable(name: "p2", line: 12, arg: 2, scope: !24, file: !10, type: !32)
+!24 = distinct !DISubprogram(name: "min<unsigned long long>", linkageName: "_ZN3__13minIyEERKT_S3_RS1_", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !1, scope: !25, type: !27, templateParams: !33, variables: !35)
!25 = !DINamespace(name: "__1", line: 1, file: !26, scope: null)
!26 = !DIFile(filename: "main.cpp", directory: "/Users/matt/ryan_bug")
!27 = !DISubroutineType(types: !28)
@@ -61,12 +61,12 @@ attributes #0 = { sspreq }
!33 = !{!34}
!34 = !DITemplateTypeParameter(name: "_Tp", type: !31)
!35 = !{!36, !37}
-!36 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 12, arg: 1, scope: !24, file: !10, type: !29)
-!37 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p2", line: 12, arg: 2, scope: !24, file: !10, type: !32)
+!36 = !DILocalVariable(name: "p1", line: 12, arg: 1, scope: !24, file: !10, type: !29)
+!37 = !DILocalVariable(name: "p2", line: 12, arg: 2, scope: !24, file: !10, type: !32)
!38 = !DILocation(line: 33, scope: !9)
!39 = !DILocation(line: 12, scope: !24, inlinedAt: !38)
!40 = !DILocation(line: 9, scope: !41, inlinedAt: !59)
-!41 = !DISubprogram(name: "min<unsigned long long, __1::A>", linkageName: "_ZN3__13minIyNS_1AEEERKT_S4_RS2_T0_", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !1, scope: !25, type: !42, templateParams: !53, variables: !55)
+!41 = distinct !DISubprogram(name: "min<unsigned long long, __1::A>", linkageName: "_ZN3__13minIyNS_1AEEERKT_S4_RS2_T0_", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !1, scope: !25, type: !42, templateParams: !53, variables: !55)
!42 = !DISubroutineType(types: !43)
!43 = !{!29, !29, !32, !44}
!44 = !DICompositeType(tag: DW_TAG_structure_type, name: "A", size: 8, align: 8, file: !1, scope: !25, elements: !45)
@@ -80,17 +80,17 @@ attributes #0 = { sspreq }
!53 = !{!34, !54}
!54 = !DITemplateTypeParameter(name: "_Compare", type: !44)
!55 = !{!56, !57, !58}
-!56 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 7, arg: 1, scope: !41, file: !10, type: !29)
-!57 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p2", line: 7, arg: 2, scope: !41, file: !10, type: !32)
-!58 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p3", line: 8, arg: 3, scope: !41, file: !10, type: !44)
+!56 = !DILocalVariable(name: "p1", line: 7, arg: 1, scope: !41, file: !10, type: !29)
+!57 = !DILocalVariable(name: "p2", line: 7, arg: 2, scope: !41, file: !10, type: !32)
+!58 = !DILocalVariable(name: "p3", line: 8, arg: 3, scope: !41, file: !10, type: !44)
!59 = !DILocation(line: 13, scope: !24, inlinedAt: !38)
!63 = !{i32 undef}
-!64 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 1, arg: 2, scope: !65, file: !10, type: !50)
-!65 = !DISubprogram(name: "operator()", linkageName: "_ZN3__11AclERKiS2_", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !25, type: !47, declaration: !46, variables: !66)
+!64 = !DILocalVariable(name: "p1", line: 1, arg: 2, scope: !65, file: !10, type: !50)
+!65 = distinct !DISubprogram(name: "operator()", linkageName: "_ZN3__11AclERKiS2_", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !25, type: !47, declaration: !46, variables: !66)
!66 = !{!67, !69, !70}
-!67 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !65, type: !68)
+!67 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !65, type: !68)
!68 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !44)
-!69 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 1, arg: 2, scope: !65, file: !10, type: !50)
-!70 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 2, arg: 3, scope: !65, file: !10, type: !50)
+!69 = !DILocalVariable(name: "p1", line: 1, arg: 2, scope: !65, file: !10, type: !50)
+!70 = !DILocalVariable(name: "", line: 2, arg: 3, scope: !65, file: !10, type: !50)
!71 = !DILocation(line: 1, scope: !65, inlinedAt: !40)
!72 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/stack-protector-weight.ll b/test/CodeGen/X86/stack-protector-weight.ll
index 4220a4c46a0a..dea66d28e3dd 100644
--- a/test/CodeGen/X86/stack-protector-weight.ll
+++ b/test/CodeGen/X86/stack-protector-weight.ll
@@ -2,13 +2,13 @@
; RUN: llc -mtriple=x86_64-apple-darwin -print-machineinstrs=expand-isel-pseudos -enable-selectiondag-sp=false %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=IR
; SELDAG: # Machine code for function test_branch_weights:
-; SELDAG: Successors according to CFG: BB#[[SUCCESS:[0-9]+]](1048575) BB#[[FAILURE:[0-9]+]](1)
+; SELDAG: Successors according to CFG: BB#[[SUCCESS:[0-9]+]]({{[0-9a-fx/= ]+}}100.00%) BB#[[FAILURE:[0-9]+]]
; SELDAG: BB#[[FAILURE]]:
; SELDAG: CALL64pcrel32 <es:__stack_chk_fail>
; SELDAG: BB#[[SUCCESS]]:
; IR: # Machine code for function test_branch_weights:
-; IR: Successors according to CFG: BB#[[SUCCESS:[0-9]+]](1048575) BB#[[FAILURE:[0-9]+]](1)
+; IR: Successors according to CFG: BB#[[SUCCESS:[0-9]+]]({{[0-9a-fx/= ]+}}100.00%) BB#[[FAILURE:[0-9]+]]
; IR: BB#[[SUCCESS]]:
; IR: BB#[[FAILURE]]:
; IR: CALL64pcrel32 <ga:@__stack_chk_fail>
diff --git a/test/CodeGen/X86/stackmap-frame-setup.ll b/test/CodeGen/X86/stackmap-frame-setup.ll
new file mode 100644
index 000000000000..076e2482f8ba
--- /dev/null
+++ b/test/CodeGen/X86/stackmap-frame-setup.ll
@@ -0,0 +1,20 @@
+; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=x86_64-apple-darwin -mcpu=corei7 -stop-after machine-sink %s | FileCheck %s --check-prefix=ISEL
+; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=x86_64-apple-darwin -mcpu=corei7 -fast-isel -fast-isel-abort=1 -stop-after machine-sink %s | FileCheck %s --check-prefix=FAST-ISEL
+
+define void @caller_meta_leaf() {
+entry:
+ %metadata = alloca i64, i32 3, align 8
+ store i64 11, i64* %metadata
+ store i64 12, i64* %metadata
+ store i64 13, i64* %metadata
+; ISEL: ADJCALLSTACKDOWN64 0, 0, implicit-def
+; ISEL-NEXT: STACKMAP
+; ISEL-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+; FAST-ISEL: ADJCALLSTACKDOWN64 0, 0, implicit-def
+; FAST-ISEL-NEXT: STACKMAP
+; FAST-ISEL-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def
+ ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
diff --git a/test/CodeGen/X86/statepoint-allocas.ll b/test/CodeGen/X86/statepoint-allocas.ll
index 4af33e1f5478..fa2621e7d2fe 100644
--- a/test/CodeGen/X86/statepoint-allocas.ll
+++ b/test/CodeGen/X86/statepoint-allocas.ll
@@ -16,12 +16,12 @@ define i32 addrspace(1)* @test(i32 addrspace(1)* %ptr) gc "statepoint-example" {
; CHECK: movq %rdi, (%rsp)
; CHECK: callq return_i1
; CHECK: movq (%rsp), %rax
-; CHECK: popq %rdx
+; CHECK: popq %rcx
; CHECK: retq
entry:
%alloca = alloca i32 addrspace(1)*, align 8
store i32 addrspace(1)* %ptr, i32 addrspace(1)** %alloca
- call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)** %alloca)
+ call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)** %alloca)
%rel = load i32 addrspace(1)*, i32 addrspace(1)** %alloca
ret i32 addrspace(1)* %rel
}
@@ -33,16 +33,16 @@ define i32 addrspace(1)* @test2(i32 addrspace(1)* %ptr) gc "statepoint-example"
; CHECK: movq %rdi, (%rsp)
; CHECK: callq return_i1
; CHECK: xorl %eax, %eax
-; CHECK: popq %rdx
+; CHECK: popq %rcx
; CHECK: retq
entry:
%alloca = alloca i32 addrspace(1)*, align 8
store i32 addrspace(1)* %ptr, i32 addrspace(1)** %alloca
- call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 1, i32 addrspace(1)** %alloca)
+ call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 1, i32 addrspace(1)** %alloca)
ret i32 addrspace(1)* null
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
; CHECK-LABEL: .section .llvm_stackmaps
diff --git a/test/CodeGen/X86/statepoint-call-lowering.ll b/test/CodeGen/X86/statepoint-call-lowering.ll
index 8f352b7728c3..a8fa3cb37782 100644
--- a/test/CodeGen/X86/statepoint-call-lowering.ll
+++ b/test/CodeGen/X86/statepoint-call-lowering.ll
@@ -5,10 +5,13 @@
target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
+%struct = type { i64, i64 }
+
declare zeroext i1 @return_i1()
declare zeroext i32 @return_i32()
declare i32* @return_i32ptr()
declare float @return_float()
+declare %struct @return_struct()
declare void @varargf(i32, ...)
define i1 @test_i1_return() gc "statepoint-example" {
@@ -17,11 +20,11 @@ define i1 @test_i1_return() gc "statepoint-example" {
; state arguments to the statepoint
; CHECK: pushq %rax
; CHECK: callq return_i1
-; CHECK: popq %rdx
+; CHECK: popq %rcx
; CHECK: retq
entry:
- %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
- %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+ %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
ret i1 %call1
}
@@ -29,11 +32,11 @@ define i32 @test_i32_return() gc "statepoint-example" {
; CHECK-LABEL: test_i32_return
; CHECK: pushq %rax
; CHECK: callq return_i32
-; CHECK: popq %rdx
+; CHECK: popq %rcx
; CHECK: retq
entry:
- %safepoint_token = tail call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 0, i32 0, i32 0)
- %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token)
+ %safepoint_token = tail call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
ret i32 %call1
}
@@ -41,11 +44,11 @@ define i32* @test_i32ptr_return() gc "statepoint-example" {
; CHECK-LABEL: test_i32ptr_return
; CHECK: pushq %rax
; CHECK: callq return_i32ptr
-; CHECK: popq %rdx
+; CHECK: popq %rcx
; CHECK: retq
entry:
- %safepoint_token = tail call i32 (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 0, i32 0, i32 0)
- %call1 = call i32* @llvm.experimental.gc.result.p0i32(i32 %safepoint_token)
+ %safepoint_token = tail call token (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call i32* @llvm.experimental.gc.result.p0i32(token %safepoint_token)
ret i32* %call1
}
@@ -56,23 +59,35 @@ define float @test_float_return() gc "statepoint-example" {
; CHECK: popq %rax
; CHECK: retq
entry:
- %safepoint_token = tail call i32 (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 0, i32 0, i32 0)
- %call1 = call float @llvm.experimental.gc.result.f32(i32 %safepoint_token)
+ %safepoint_token = tail call token (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call float @llvm.experimental.gc.result.f32(token %safepoint_token)
ret float %call1
}
+define %struct @test_struct_return() gc "statepoint-example" {
+; CHECK-LABEL: test_struct_return
+; CHECK: pushq %rax
+; CHECK: callq return_struct
+; CHECK: popq %rcx
+; CHECK: retq
+entry:
+ %safepoint_token = tail call token (i64, i32, %struct ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_structf(i64 0, i32 0, %struct ()* @return_struct, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call %struct @llvm.experimental.gc.result.struct(token %safepoint_token)
+ ret %struct %call1
+}
+
define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
; CHECK-LABEL: test_relocate
; Check that an ununsed relocate has no code-generation impact
; CHECK: pushq %rax
; CHECK: callq return_i1
-; CHECK-NEXT: .Ltmp9:
-; CHECK-NEXT: popq %rdx
+; CHECK-NEXT: .Ltmp11:
+; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
entry:
- %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a)
- %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
- %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+ %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a)
+ %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
+ %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
ret i1 %call2
}
@@ -81,7 +96,7 @@ define void @test_void_vararg() gc "statepoint-example" {
; Check a statepoint wrapping a *void* returning vararg function works
; CHECK: callq varargf
entry:
- %safepoint_token = tail call i32 (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 0, i32 42, i32 43, i32 0, i32 0)
+ %safepoint_token = tail call token (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 0, i32 42, i32 43, i32 0, i32 0)
;; if we try to use the result from a statepoint wrapping a
;; non-void-returning varargf, we will experience a crash.
ret void
@@ -92,26 +107,54 @@ define i1 @test_i1_return_patchable() gc "statepoint-example" {
; A patchable variant of test_i1_return
; CHECK: pushq %rax
; CHECK: nopl
-; CHECK: popq %rdx
+; CHECK: popq %rcx
; CHECK: retq
entry:
- %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 3, i1 ()*null, i32 0, i32 0, i32 0, i32 0)
- %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+ %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 3, i1 ()*null, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
ret i1 %call1
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i1 @llvm.experimental.gc.result.i1(i32)
+declare void @consume(i32 addrspace(1)* %obj)
+
+define i1 @test_cross_bb(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint-example" {
+; CHECK-LABEL: test_cross_bb
+; CHECK: movq
+; CHECK: callq return_i1
+; CHECK: %left
+; CHECK: movq
+; CHECK-NEXT: callq consume
+; CHECK: retq
+entry:
+ %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a)
+ br i1 %external_cond, label %left, label %right
+
+left:
+ %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
+ %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
+ call void @consume(i32 addrspace(1)* %call1)
+ ret i1 %call2
+
+right:
+ ret i1 true
+}
+
+
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i1 @llvm.experimental.gc.result.i1(token)
+
+declare token @llvm.experimental.gc.statepoint.p0f_i32f(i64, i32, i32 ()*, i32, i32, ...)
+declare i32 @llvm.experimental.gc.result.i32(token)
-declare i32 @llvm.experimental.gc.statepoint.p0f_i32f(i64, i32, i32 ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.result.i32(i32)
+declare token @llvm.experimental.gc.statepoint.p0f_p0i32f(i64, i32, i32* ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.result.p0i32(token)
-declare i32 @llvm.experimental.gc.statepoint.p0f_p0i32f(i64, i32, i32* ()*, i32, i32, ...)
-declare i32* @llvm.experimental.gc.result.p0i32(i32)
+declare token @llvm.experimental.gc.statepoint.p0f_f32f(i64, i32, float ()*, i32, i32, ...)
+declare float @llvm.experimental.gc.result.f32(token)
-declare i32 @llvm.experimental.gc.statepoint.p0f_f32f(i64, i32, float ()*, i32, i32, ...)
-declare float @llvm.experimental.gc.result.f32(i32)
+declare token @llvm.experimental.gc.statepoint.p0f_structf(i64, i32, %struct ()*, i32, i32, ...)
+declare %struct @llvm.experimental.gc.result.struct(token)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64, i32, void (i32, ...)*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64, i32, void (i32, ...)*, i32, i32, ...)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
diff --git a/test/CodeGen/X86/statepoint-far-call.ll b/test/CodeGen/X86/statepoint-far-call.ll
index cd8dd0f35a20..2ebf38c5c019 100644
--- a/test/CodeGen/X86/statepoint-far-call.ll
+++ b/test/CodeGen/X86/statepoint-far-call.ll
@@ -14,9 +14,9 @@ define void @test_far_call() gc "statepoint-example" {
; CHECK: retq
entry:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* inttoptr (i64 140727162896504 to void ()*), i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* inttoptr (i64 140727162896504 to void ()*), i32 0, i32 0, i32 0, i32 0)
ret void
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/CodeGen/X86/statepoint-forward.ll b/test/CodeGen/X86/statepoint-forward.ll
index 698229e705f4..d97bc0c75602 100644
--- a/test/CodeGen/X86/statepoint-forward.ll
+++ b/test/CodeGen/X86/statepoint-forward.ll
@@ -25,8 +25,8 @@ entry:
%before = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %p
%cmp1 = call i1 @f(i32 addrspace(1)* %before)
call void @llvm.assume(i1 %cmp1)
- %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
- %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
+ %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(token %safepoint_token, i32 7, i32 7)
%after = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %pnew
%cmp2 = call i1 @f(i32 addrspace(1)* %after)
ret i1 %cmp2
@@ -44,8 +44,8 @@ entry:
%cmp1 = call i1 @f(i32 addrspace(1)* %v)
call void @llvm.assume(i1 %cmp1)
store i32 addrspace(1)* %v, i32 addrspace(1)* addrspace(1)* %p
- %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
- %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
+ %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(token %safepoint_token, i32 7, i32 7)
%after = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %pnew
%cmp2 = call i1 @f(i32 addrspace(1)* %after)
ret i1 %cmp2
@@ -72,7 +72,7 @@ entry:
%before = load i32 addrspace(1)*, i32 addrspace(1)** %p
%cmp1 = call i1 @f(i32 addrspace(1)* %before)
call void @llvm.assume(i1 %cmp1)
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0)
%after = load i32 addrspace(1)*, i32 addrspace(1)** %p
%cmp2 = call i1 @f(i32 addrspace(1)* %after)
ret i1 %cmp2
@@ -90,7 +90,7 @@ entry:
%cmp1 = call i1 @f(i32 addrspace(1)* %v)
call void @llvm.assume(i1 %cmp1)
store i32 addrspace(1)* %v, i32 addrspace(1)** %p
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0)
%after = load i32 addrspace(1)*, i32 addrspace(1)** %p
%cmp2 = call i1 @f(i32 addrspace(1)* %after)
ret i1 %cmp2
@@ -102,5 +102,5 @@ entry:
}
declare void @llvm.assume(i1)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32, i32, i32) #3
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(token, i32, i32) #3
diff --git a/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll b/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
index 61b8ded2c472..b4ba0964fdd6 100644
--- a/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
+++ b/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
@@ -18,11 +18,11 @@ define i1 @test_i1_return() gc "statepoint-example" {
; state arguments to the statepoint
; CHECK: pushq %rax
; CHECK: callq return_i1
-; CHECK: popq %rdx
+; CHECK: popq %rcx
; CHECK: retq
entry:
- %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0)
- %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+ %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0)
+ %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
ret i1 %call1
}
@@ -30,11 +30,11 @@ define i32 @test_i32_return() gc "statepoint-example" {
; CHECK-LABEL: test_i32_return
; CHECK: pushq %rax
; CHECK: callq return_i32
-; CHECK: popq %rdx
+; CHECK: popq %rcx
; CHECK: retq
entry:
- %safepoint_token = tail call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 1, i32 0, i32 0)
- %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token)
+ %safepoint_token = tail call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 1, i32 0, i32 0)
+ %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
ret i32 %call1
}
@@ -42,11 +42,11 @@ define i32* @test_i32ptr_return() gc "statepoint-example" {
; CHECK-LABEL: test_i32ptr_return
; CHECK: pushq %rax
; CHECK: callq return_i32ptr
-; CHECK: popq %rdx
+; CHECK: popq %rcx
; CHECK: retq
entry:
- %safepoint_token = tail call i32 (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 1, i32 0, i32 0)
- %call1 = call i32* @llvm.experimental.gc.result.p0i32(i32 %safepoint_token)
+ %safepoint_token = tail call token (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 1, i32 0, i32 0)
+ %call1 = call i32* @llvm.experimental.gc.result.p0i32(token %safepoint_token)
ret i32* %call1
}
@@ -57,8 +57,8 @@ define float @test_float_return() gc "statepoint-example" {
; CHECK: popq %rax
; CHECK: retq
entry:
- %safepoint_token = tail call i32 (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 1, i32 0, i32 0)
- %call1 = call float @llvm.experimental.gc.result.f32(i32 %safepoint_token)
+ %safepoint_token = tail call token (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 1, i32 0, i32 0)
+ %call1 = call float @llvm.experimental.gc.result.f32(token %safepoint_token)
ret float %call1
}
@@ -68,12 +68,12 @@ define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
; CHECK: pushq %rax
; CHECK: callq return_i1
; CHECK-NEXT: .Ltmp9:
-; CHECK-NEXT: popq %rdx
+; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
entry:
- %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0, i32 addrspace(1)* %a)
- %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
- %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+ %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0, i32 addrspace(1)* %a)
+ %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
+ %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
ret i1 %call2
}
@@ -82,7 +82,7 @@ define void @test_void_vararg() gc "statepoint-example" {
; Check a statepoint wrapping a *void* returning vararg function works
; CHECK: callq varargf
entry:
- %safepoint_token = tail call i32 (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 1, i32 42, i32 43, i32 0, i32 0)
+ %safepoint_token = tail call token (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 1, i32 42, i32 43, i32 0, i32 0)
;; if we try to use the result from a statepoint wrapping a
;; non-void-returning varargf, we will experience a crash.
ret void
@@ -92,12 +92,12 @@ define i32 @test_transition_args() gc "statepoint-example" {
; CHECK-LABEL: test_transition_args
; CHECK: pushq %rax
; CHECK: callq return_i32
-; CHECK: popq %rdx
+; CHECK: popq %rcx
; CHECK: retq
entry:
%val = alloca i32
- %safepoint_token = call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 1, i32 2, i32* %val, i64 42, i32 0)
- %call1 = call i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token)
+ %safepoint_token = call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 1, i32 2, i32* %val, i64 42, i32 0)
+ %call1 = call i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
ret i32 %call1
}
@@ -105,29 +105,29 @@ define i32 @test_transition_args_2() gc "statepoint-example" {
; CHECK-LABEL: test_transition_args_2
; CHECK: pushq %rax
; CHECK: callq return_i32
-; CHECK: popq %rdx
+; CHECK: popq %rcx
; CHECK: retq
entry:
%val = alloca i32
%arg = alloca i8
- %safepoint_token = call i32 (i64, i32, i32 (i32, i8*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32p0i8f(i64 0, i32 0, i32 (i32, i8*)* @return_i32_with_args, i32 2, i32 1, i32 0, i8* %arg, i32 2, i32* %val, i64 42, i32 0)
- %call1 = call i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token)
+ %safepoint_token = call token (i64, i32, i32 (i32, i8*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32p0i8f(i64 0, i32 0, i32 (i32, i8*)* @return_i32_with_args, i32 2, i32 1, i32 0, i8* %arg, i32 2, i32* %val, i64 42, i32 0)
+ %call1 = call i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
ret i32 %call1
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i1 @llvm.experimental.gc.result.i1(i32)
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i1 @llvm.experimental.gc.result.i1(token)
-declare i32 @llvm.experimental.gc.statepoint.p0f_i32f(i64, i32, i32 ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_i32i32p0i8f(i64, i32, i32 (i32, i8*)*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.result.i32(i32)
+declare token @llvm.experimental.gc.statepoint.p0f_i32f(i64, i32, i32 ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_i32i32p0i8f(i64, i32, i32 (i32, i8*)*, i32, i32, ...)
+declare i32 @llvm.experimental.gc.result.i32(token)
-declare i32 @llvm.experimental.gc.statepoint.p0f_p0i32f(i64, i32, i32* ()*, i32, i32, ...)
-declare i32* @llvm.experimental.gc.result.p0i32(i32)
+declare token @llvm.experimental.gc.statepoint.p0f_p0i32f(i64, i32, i32* ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.result.p0i32(token)
-declare i32 @llvm.experimental.gc.statepoint.p0f_f32f(i64, i32, float ()*, i32, i32, ...)
-declare float @llvm.experimental.gc.result.f32(i32)
+declare token @llvm.experimental.gc.statepoint.p0f_f32f(i64, i32, float ()*, i32, i32, ...)
+declare float @llvm.experimental.gc.result.f32(token)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64, i32, void (i32, ...)*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64, i32, void (i32, ...)*, i32, i32, ...)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) \ No newline at end of file
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) \ No newline at end of file
diff --git a/test/CodeGen/X86/statepoint-invoke.ll b/test/CodeGen/X86/statepoint-invoke.ll
index 81b9ab89ebca..1d38b2facc73 100644
--- a/test/CodeGen/X86/statepoint-invoke.ll
+++ b/test/CodeGen/X86/statepoint-invoke.ll
@@ -14,13 +14,13 @@ entry:
; CHECK: Ltmp{{[0-9]+}}:
; CHECK: callq some_call
; CHECK: Ltmp{{[0-9]+}}:
- %0 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1)
+ %0 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1)
to label %invoke_safepoint_normal_dest unwind label %exceptional_return
invoke_safepoint_normal_dest:
; CHECK: movq
- %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %0, i32 13, i32 13)
- %obj1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %0, i32 14, i32 14)
+ %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 13, i32 13)
+ %obj1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 14, i32 14)
br label %normal_return
normal_return:
@@ -31,11 +31,10 @@ exceptional_return:
; CHECK: Ltmp{{[0-9]+}}:
; CHECK: movq
; CHECK: retq
- %landing_pad = landingpad { i8*, i32 }
+ %landing_pad = landingpad token
cleanup
- %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
- %obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13)
- %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 14, i32 14)
+ %obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 13, i32 13)
+ %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 14, i32 14)
ret i64 addrspace(1)* %obj1.relocated1
}
; CHECK-LABEL: GCC_except_table{{[0-9]+}}:
@@ -51,22 +50,21 @@ entry:
; CHECK: .Ltmp{{[0-9]+}}:
; CHECK: callq some_other_call
; CHECK: .Ltmp{{[0-9]+}}:
- %0 = invoke i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 0, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @some_other_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1)
+ %0 = invoke token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 0, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @some_other_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1)
to label %normal_return unwind label %exceptional_return
normal_return:
; CHECK: popq
; CHECK: retq
- %ret_val = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32 %0)
+ %ret_val = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token %0)
ret i64 addrspace(1)* %ret_val
exceptional_return:
; CHECK: .Ltmp{{[0-9]+}}:
; CHECK: movq
- %landing_pad = landingpad { i8*, i32 }
+ %landing_pad = landingpad token
cleanup
- %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
- %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13)
+ %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 13, i32 13)
ret i64 addrspace(1)* %obj.relocated
}
; CHECK-LABEL: GCC_except_table{{[0-9]+}}:
@@ -85,14 +83,14 @@ left:
; CHECK: movq %rdx, 8(%rsp)
; CHECK: movq
; CHECK: callq some_call
- %sp1 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2)
+ %sp1 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2)
to label %left.relocs unwind label %exceptional_return.left
left.relocs:
; CHECK: movq (%rsp),
; CHECK: movq 8(%rsp), [[REGVAL2:%[a-z]+]]
- %val1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 13, i32 13)
- %val2.relocated_left = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 14, i32 14)
+ %val1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 13, i32 13)
+ %val2.relocated_left = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 14, i32 14)
br label %normal_return
right:
@@ -100,37 +98,35 @@ right:
; CHECK: movq
; CHECK: movq %rdx, (%rsp)
; CHECK: callq some_call
- %sp2 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3)
+ %sp2 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3)
to label %right.relocs unwind label %exceptional_return.right
right.relocs:
; CHECK: movq (%rsp), [[REGVAL2]]
; CHECK: movq
- %val2.relocated_right = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp2, i32 13, i32 13)
- %val3.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp2, i32 14, i32 14)
+ %val2.relocated_right = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 13, i32 13)
+ %val3.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 14, i32 14)
br label %normal_return
normal_return:
; CHECK-LABEL: %normal_return
; CHECK: cmoveq {{.*}}[[REGVAL2]]{{.*}}
- ; CHECK retq
+ ; CHECK: retq
%a1 = phi i64 addrspace(1)* [%val1.relocated, %left.relocs], [%val3.relocated, %right.relocs]
%a2 = phi i64 addrspace(1)* [%val2.relocated_left, %left.relocs], [%val2.relocated_right, %right.relocs]
%ret = select i1 %cond, i64 addrspace(1)* %a1, i64 addrspace(1)* %a2
ret i64 addrspace(1)* %ret
exceptional_return.left:
- %landing_pad = landingpad { i8*, i32 }
+ %landing_pad = landingpad token
cleanup
- %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
- %val.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13)
+ %val.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 13, i32 13)
ret i64 addrspace(1)* %val.relocated2
exceptional_return.right:
- %landing_pad1 = landingpad { i8*, i32 }
+ %landing_pad1 = landingpad token
cleanup
- %relocate_token1 = extractvalue { i8*, i32 } %landing_pad1, 1
- %val.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token1, i32 13, i32 13)
+ %val.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad1, i32 13, i32 13)
ret i64 addrspace(1)* %val.relocated3
}
@@ -139,7 +135,7 @@ define i64 addrspace(1)* @test_null_undef(i64 addrspace(1)* %val1)
; CHECK-LABEL: test_null_undef:
entry:
; CHECK: callq some_call
- %sp1 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* null, i64 addrspace(1)* undef)
+ %sp1 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* null, i64 addrspace(1)* undef)
to label %normal_return unwind label %exceptional_return
normal_return:
@@ -147,16 +143,15 @@ normal_return:
; CHECK: xorl %eax, %eax
; CHECK-NEXT: popq
; CHECK-NEXT: retq
- %null.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 13, i32 13)
- %undef.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 14, i32 14)
+ %null.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 13, i32 13)
+ %undef.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 14, i32 14)
ret i64 addrspace(1)* %null.relocated
exceptional_return:
- %landing_pad = landingpad { i8*, i32 }
+ %landing_pad = landingpad token
cleanup
- %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
- %null.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13)
- %undef.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 14, i32 14)
+ %null.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 13, i32 13)
+ %undef.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 14, i32 14)
ret i64 addrspace(1)* %null.relocated2
}
@@ -168,14 +163,14 @@ entry:
%aa = addrspacecast i32* %a to i32 addrspace(1)*
%c = inttoptr i64 15 to i64 addrspace(1)*
; CHECK: callq
- %sp = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %aa, i64 addrspace(1)* %c)
+ %sp = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %aa, i64 addrspace(1)* %c)
to label %normal_return unwind label %exceptional_return
normal_return:
; CHECK: leaq
; CHECK-NEXT: popq
; CHECK-NEXT: retq
- %aa.rel = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %sp, i32 13, i32 13)
+ %aa.rel = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %sp, i32 13, i32 13)
%aa.converted = bitcast i32 addrspace(1)* %aa.rel to i64 addrspace(1)*
ret i64 addrspace(1)* %aa.converted
@@ -183,16 +178,15 @@ exceptional_return:
; CHECK: movl $15
; CHECK-NEXT: popq
; CHECK-NEXT: retq
- %landing_pad = landingpad { i8*, i32 }
+ %landing_pad = landingpad token
cleanup
- %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
- %aa.rel2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 14, i32 14)
+ %aa.rel2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 14, i32 14)
ret i64 addrspace(1)* %aa.rel2
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...)
-declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32, i32, i32)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
-declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32)
+declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token, i32, i32)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
+declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token)
diff --git a/test/CodeGen/X86/statepoint-stack-usage.ll b/test/CodeGen/X86/statepoint-stack-usage.ll
index a4aa747af8cf..d4784212810f 100644
--- a/test/CodeGen/X86/statepoint-stack-usage.ll
+++ b/test/CodeGen/X86/statepoint-stack-usage.ll
@@ -16,17 +16,17 @@ define i32 @back_to_back_calls(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 a
; CHECK: movq %rsi, (%rsp)
; There should be no more than three moves
; CHECK-NOT: movq
- %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
- %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 12)
- %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 13)
- %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 14)
+ %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
+ %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 12)
+ %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 13)
+ %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 14)
; CHECK: callq
; This is the key check. There should NOT be any memory moves here
; CHECK-NOT: movq
- %safepoint_token2 = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
- %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 14)
- %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 13)
- %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 12)
+ %safepoint_token2 = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
+ %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 14)
+ %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 13)
+ %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 12)
; CHECK: callq
ret i32 1
}
@@ -39,17 +39,17 @@ define i32 @reserve_first(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrsp
; CHECK: movq %rdi, 16(%rsp)
; CHECK: movq %rdx, 8(%rsp)
; CHECK: movq %rsi, (%rsp)
- %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
- %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 12)
- %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 13)
- %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 14)
+ %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
+ %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 12)
+ %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 13)
+ %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 14)
; CHECK: callq
; This is the key check. There should NOT be any memory moves here
; CHECK-NOT: movq
- %safepoint_token2 = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 addrspace(1)* %a1, i32 0, i32 addrspace(1)* %c1, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
- %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 14)
- %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 13)
- %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 12)
+ %safepoint_token2 = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 addrspace(1)* %a1, i32 0, i32 addrspace(1)* %c1, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
+ %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 14)
+ %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 13)
+ %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 12)
; CHECK: callq
ret i32 1
}
@@ -63,25 +63,25 @@ entry:
; CHECK: movq %rdx, 8(%rsp)
; CHECK: movq %rsi, (%rsp)
; CHECK: callq
- %safepoint_token = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
+ %safepoint_token = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
to label %normal_return unwind label %exceptional_return
normal_return:
- %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 12)
- %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 13)
- %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 14)
+ %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 12)
+ %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 13)
+ %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 14)
; Should work even through bitcasts
%c1.casted = bitcast i32 addrspace(1)* %c1 to i8 addrspace(1)*
; This is the key check. There should NOT be any memory moves here
; CHECK-NOT: movq
; CHECK: callq
- %safepoint_token2 = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %c1.casted, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
+ %safepoint_token2 = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %c1.casted, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
to label %normal_return2 unwind label %exceptional_return2
normal_return2:
- %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 14)
- %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 13)
- %c2 = tail call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %safepoint_token2, i32 12, i32 12)
+ %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 14)
+ %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 13)
+ %c2 = tail call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token2, i32 12, i32 12)
ret i32 1
exceptional_return:
@@ -96,10 +96,10 @@ exceptional_return2:
}
; Function Attrs: nounwind
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
-declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32, i32, i32) #3
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #3
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) #3
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
declare i32 @"personality_function"()
diff --git a/test/CodeGen/X86/statepoint-stackmap-format.ll b/test/CodeGen/X86/statepoint-stackmap-format.ll
index e18476cee53c..4f8b2ce6efd9 100644
--- a/test/CodeGen/X86/statepoint-stackmap-format.ll
+++ b/test/CodeGen/X86/statepoint-stackmap-format.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple="x86_64-pc-linux-gnu" | FileCheck %s
-; RUN: llc < %s -mtriple="x86_64-pc-win64-coff" | FileCheck %s
+; RUN: llc < %s -mtriple="x86_64-pc-unknown-elf" | FileCheck %s
; This test is a sanity check to ensure statepoints are generating StackMap
; sections correctly. This is not intended to be a rigorous test of the
@@ -11,7 +11,7 @@ declare zeroext i1 @return_i1()
define i1 @test(i32 addrspace(1)* %ptr_base, i32 %arg)
gc "statepoint-example" {
-; CHECK-LABEL: test
+; CHECK-LABEL: test:
; Do we see two spills for the local values and the store to the
; alloca?
; CHECK: subq $40, %rsp
@@ -25,11 +25,11 @@ entry:
%metadata1 = alloca i32 addrspace(1)*, i32 2, align 8
store i32 addrspace(1)* null, i32 addrspace(1)** %metadata1
%ptr_derived = getelementptr i32, i32 addrspace(1)* %ptr_base, i32 %arg
- %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* null, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* %ptr_derived, i32 addrspace(1)* null)
- %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
- %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 9)
- %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 10)
- %c = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 11, i32 11)
+ %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* null, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* %ptr_derived, i32 addrspace(1)* null)
+ %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
+ %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 9, i32 9)
+ %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 9, i32 10)
+ %c = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 11, i32 11)
;
ret i1 %call1
}
@@ -53,11 +53,11 @@ define i1 @test_derived_arg(i32 addrspace(1)* %ptr_base,
entry:
%metadata1 = alloca i32 addrspace(1)*, i32 2, align 8
store i32 addrspace(1)* null, i32 addrspace(1)** %metadata1
- %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* null, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* %ptr_derived, i32 addrspace(1)* null)
- %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
- %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 9)
- %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 10)
- %c = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 11, i32 11)
+ %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* null, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* %ptr_derived, i32 addrspace(1)* null)
+ %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
+ %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 9, i32 9)
+ %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 9, i32 10)
+ %c = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 11, i32 11)
;
ret i1 %call1
}
@@ -66,15 +66,15 @@ entry:
define i1 @test_id() gc "statepoint-example" {
; CHECK-LABEL: test_id
entry:
- %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 237, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
- %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+ %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 237, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
ret i1 %call1
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i1 @llvm.experimental.gc.result.i1(i32)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i1 @llvm.experimental.gc.result.i1(token)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #3
; CHECK-LABEL: .section .llvm_stackmaps
; CHECK-NEXT: __LLVM_StackMaps:
@@ -94,18 +94,19 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
; CHECK-NEXT: .quad 40
; CHECK-NEXT: .quad test_derived_arg
; CHECK-NEXT: .quad 40
+; CHECK-NEXT: .quad test_id
+; CHECK-NEXT: .quad 8
;
; test
;
-; Large Constants
-; Statepoint ID only
-; CHECK: .quad 0
+; Statepoint ID
+; CHECK-NEXT: .quad 0
; Callsites
; Constant arguments
-; CHECK: .long .Ltmp1-test
+; CHECK-NEXT: .long .Ltmp1-test
; CHECK: .short 0
; CHECK: .short 11
; SmallConstant (0)
@@ -123,8 +124,8 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
; CHECK: .byte 8
; CHECK: .short 0
; CHECK: .long 2
-; Direct Spill Slot [RSP+0]
-; CHECK: .byte 2
+; Indirect Spill Slot [RSP+0]
+; CHECK: .byte 3
; CHECK: .byte 8
; CHECK: .short 7
; CHECK: .long 16
@@ -143,23 +144,23 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
; CHECK: .byte 8
; CHECK: .short 0
; CHECK: .long 0
-; Direct Spill Slot [RSP+16]
-; CHECK: .byte 2
+; Indirect Spill Slot [RSP+16]
+; CHECK: .byte 3
; CHECK: .byte 8
; CHECK: .short 7
; CHECK: .long 16
-; Direct Spill Slot [RSP+8]
-; CHECK: .byte 2
+; Indirect Spill Slot [RSP+8]
+; CHECK: .byte 3
; CHECK: .byte 8
; CHECK: .short 7
; CHECK: .long 8
-; Direct Spill Slot [RSP+16]
-; CHECK: .byte 2
+; Indirect Spill Slot [RSP+16]
+; CHECK: .byte 3
; CHECK: .byte 8
; CHECK: .short 7
; CHECK: .long 16
-; Direct Spill Slot [RSP+16]
-; CHECK: .byte 2
+; Indirect Spill Slot [RSP+16]
+; CHECK: .byte 3
; CHECK: .byte 8
; CHECK: .short 7
; CHECK: .long 16
@@ -171,15 +172,13 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
;
; test_derived_arg
-;
-; Large Constants
-; Statepoint ID only
-; CHECK: .quad 0
+; Statepoint ID
+; CHECK-NEXT: .quad 0
; Callsites
; Constant arguments
-; CHECK: .long .Ltmp3-test_derived_arg
+; CHECK-NEXT: .long .Ltmp3-test_derived_arg
; CHECK: .short 0
; CHECK: .short 11
; SmallConstant (0)
@@ -192,8 +191,8 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
; CHECK: .byte 8
; CHECK: .short 0
; CHECK: .long 2
-; Direct Spill Slot [RSP+0]
-; CHECK: .byte 2
+; Indirect Spill Slot [RSP+0]
+; CHECK: .byte 3
; CHECK: .byte 8
; CHECK: .short 7
; CHECK: .long 16
@@ -212,23 +211,23 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
; CHECK: .byte 8
; CHECK: .short 0
; CHECK: .long 0
-; Direct Spill Slot [RSP+16]
-; CHECK: .byte 2
+; Indirect Spill Slot [RSP+16]
+; CHECK: .byte 3
; CHECK: .byte 8
; CHECK: .short 7
; CHECK: .long 16
-; Direct Spill Slot [RSP+8]
-; CHECK: .byte 2
+; Indirect Spill Slot [RSP+8]
+; CHECK: .byte 3
; CHECK: .byte 8
; CHECK: .short 7
; CHECK: .long 8
-; Direct Spill Slot [RSP+16]
-; CHECK: .byte 2
+; Indirect Spill Slot [RSP+16]
+; CHECK: .byte 3
; CHECK: .byte 8
; CHECK: .short 7
; CHECK: .long 16
-; Direct Spill Slot [RSP+16]
-; CHECK: .byte 2
+; Indirect Spill Slot [RSP+16]
+; CHECK: .byte 3
; CHECK: .byte 8
; CHECK: .short 7
; CHECK: .long 16
@@ -239,13 +238,12 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
; CHECK: .align 8
; Records for the test_id function:
-; No large constants
; The Statepoint ID:
-; CHECK: .quad 237
+; CHECK-NEXT: .quad 237
; Instruction Offset
-; CHECK: .long .Ltmp5-test_id
+; CHECK-NEXT: .long .Ltmp5-test_id
; Reserved:
; CHECK: .short 0
diff --git a/test/CodeGen/X86/stdarg.ll b/test/CodeGen/X86/stdarg.ll
index 18d502ad5834..42cbcb1008d3 100644
--- a/test/CodeGen/X86/stdarg.ll
+++ b/test/CodeGen/X86/stdarg.ll
@@ -1,5 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; CHECK: testb %al, %al
%struct.__va_list_tag = type { i32, i32, i8*, i8* }
@@ -8,6 +7,15 @@ entry:
%ap = alloca [1 x %struct.__va_list_tag], align 8; <[1 x %struct.__va_list_tag]*> [#uses=2]
%ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*; <i8*> [#uses=2]
call void @llvm.va_start(i8* %ap12)
+; CHECK: testb %al, %al
+
+; These test for specific offsets, which is very fragile. Still, the test needs
+; to ensure that va_list has the correct element types.
+;
+; CHECK-DAG: movq {{.*}}, 192(%rsp)
+; CHECK-DAG: movq {{.*}}, 184(%rsp)
+; CHECK-DAG: movl {{.*}}, 180(%rsp)
+; CHECK-DAG: movl {{.*}}, 176(%rsp)
%ap3 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1]
call void @bar(%struct.__va_list_tag* %ap3) nounwind
call void @llvm.va_end(i8* %ap12)
diff --git a/test/CodeGen/X86/stores-merging.ll b/test/CodeGen/X86/stores-merging.ll
index d6daa573b4ae..9e479bd71b98 100644
--- a/test/CodeGen/X86/stores-merging.ll
+++ b/test/CodeGen/X86/stores-merging.ll
@@ -7,17 +7,51 @@ target triple = "x86_64-unknown-linux-gnu"
@e = common global %structTy zeroinitializer, align 4
-; CHECK-LABEL: f
-define void @f() {
-entry:
+;; Ensure that MergeConsecutiveStores doesn't incorrectly reorder
+;; store operations. The first test stores in increasing address
+;; order, the second in decreasing -- but in both cases should have
+;; the same result in memory in the end.
-; CHECK: movabsq $528280977409, %rax
+; CHECK-LABEL: redundant_stores_merging:
+; CHECK: movl $123, e+8(%rip)
+; CHECK: movabsq $1958505086977, %rax
; CHECK: movq %rax, e+4(%rip)
-; CHECK: movl $456, e+8(%rip)
-
+define void @redundant_stores_merging() {
+entry:
store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
ret void
}
+;; This variant tests PR25154.
+; CHECK-LABEL: redundant_stores_merging_reverse:
+; CHECK: movl $123, e+8(%rip)
+; CHECK: movabsq $1958505086977, %rax
+; CHECK: movq %rax, e+4(%rip)
+define void @redundant_stores_merging_reverse() {
+entry:
+ store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
+ store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
+ store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
+ ret void
+}
+
+@b = common global [8 x i8] zeroinitializer, align 2
+
+;; The 2-byte store to offset 3 overlaps the 2-byte store to offset 2;
+;; these must not be reordered in MergeConsecutiveStores such that the
+;; store to 3 comes first (e.g. by merging the stores to 0 and 2 into
+;; a movl, after the store to 3).
+
+;; CHECK-LABEL: overlapping_stores_merging:
+;; CHECK: movw $0, b+2(%rip)
+;; CHECK: movw $2, b+3(%rip)
+;; CHECK: movw $1, b(%rip)
+define void @overlapping_stores_merging() {
+entry:
+ store i16 0, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 2) to i16*), align 2
+ store i16 2, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 3) to i16*), align 1
+ store i16 1, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 0) to i16*), align 2
+ ret void
+}
diff --git a/test/CodeGen/X86/switch-bt.ll b/test/CodeGen/X86/switch-bt.ll
index 2cf3aafe5471..6a2cbe1ec6ca 100644
--- a/test/CodeGen/X86/switch-bt.ll
+++ b/test/CodeGen/X86/switch-bt.ll
@@ -5,7 +5,7 @@
; CHECK: movabsq $2305843009482129440, %r
; CHECK-NEXT: btq %rax, %r
-; CHECK-NEXT: jae
+; CHECK-NEXT: jb
; CHECK: movl $671088640, %e
; CHECK-NEXT: btq %rax, %r
; CHECK-NEXT: jae
@@ -145,13 +145,13 @@ sw.epilog:
; CHECK: cmpl $10
; CHECK: je
; CHECK: cmpl $20
+; CHECK: je
+; CHECK: cmpl $30
; CHECK: jne
; CHECK: cmpl $40
; CHECK: je
; CHECK: cmpl $50
-; CHECK: jne
-; CHECK: cmpl $30
-; CHECK: jne
+; CHECK: je
; CHECK: cmpl $60
; CHECK: jne
}
diff --git a/test/CodeGen/X86/switch-edge-weight.ll b/test/CodeGen/X86/switch-edge-weight.ll
new file mode 100644
index 000000000000..b8cb7b1280ad
--- /dev/null
+++ b/test/CodeGen/X86/switch-edge-weight.ll
@@ -0,0 +1,281 @@
+; RUN: llc -march=x86-64 -print-machineinstrs=expand-isel-pseudos %s -o /dev/null 2>&1 | FileCheck %s
+
+declare void @foo(i32)
+
+; CHECK-LABEL: test
+
+define void @test(i32 %x) nounwind {
+entry:
+ switch i32 %x, label %sw.default [
+ i32 1, label %sw.bb
+ i32 155, label %sw.bb
+ i32 156, label %sw.bb
+ i32 157, label %sw.bb
+ i32 158, label %sw.bb
+ i32 159, label %sw.bb
+ i32 1134, label %sw.bb
+ i32 1140, label %sw.bb
+ ], !prof !1
+
+sw.bb:
+ call void @foo(i32 0)
+ br label %sw.epilog
+
+sw.default:
+ call void @foo(i32 1)
+ br label %sw.epilog
+
+sw.epilog:
+ ret void
+
+; Check if weights are correctly assigned to edges generated from switch
+; statement.
+;
+; CHECK: BB#0:
+; BB#0 to BB#4: [0, 1133] (65 = 60 + 5)
+; BB#0 to BB#5: [1134, UINT32_MAX] (25 = 20 + 5)
+; CHECK: Successors according to CFG: BB#4({{[0-9a-fx/= ]+}}72.22%) BB#5({{[0-9a-fx/= ]+}}27.78%)
+;
+; CHECK: BB#4:
+; BB#4 to BB#1: [155, 159] (50)
+; BB#4 to BB#5: [0, 1133] - [155, 159] (15 = 10 + 5)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}76.92%) BB#7({{[0-9a-fx/= ]+}}23.08%)
+;
+; CHECK: BB#5:
+; BB#5 to BB#1: {1140} (10)
+; BB#5 to BB#6: [1134, UINT32_MAX] - {1140} (15 = 10 + 5)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}40.00%) BB#6({{[0-9a-fx/= ]+}}60.00%)
+;
+; CHECK: BB#6:
+; BB#6 to BB#1: {1134} (10)
+; BB#6 to BB#2: [1134, UINT32_MAX] - {1134, 1140} (5)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}66.67%) BB#2({{[0-9a-fx/= ]+}}33.33%)
+}
+
+; CHECK-LABEL: test2
+
+define void @test2(i32 %x) nounwind {
+entry:
+
+; In this switch statement, there is an edge from jump table to default
+; statement.
+
+ switch i32 %x, label %sw.default [
+ i32 1, label %sw.bb
+ i32 10, label %sw.bb2
+ i32 11, label %sw.bb3
+ i32 12, label %sw.bb4
+ i32 13, label %sw.bb5
+ i32 14, label %sw.bb5
+ ], !prof !3
+
+sw.bb:
+ call void @foo(i32 0)
+ br label %sw.epilog
+
+sw.bb2:
+ call void @foo(i32 2)
+ br label %sw.epilog
+
+sw.bb3:
+ call void @foo(i32 3)
+ br label %sw.epilog
+
+sw.bb4:
+ call void @foo(i32 4)
+ br label %sw.epilog
+
+sw.bb5:
+ call void @foo(i32 5)
+ br label %sw.epilog
+
+sw.default:
+ call void @foo(i32 1)
+ br label %sw.epilog
+
+sw.epilog:
+ ret void
+
+; Check if weights are correctly assigned to edges generated from switch
+; statement.
+;
+; CHECK: BB#0:
+; BB#0 to BB#6: {0} + [15, UINT32_MAX] (5)
+; BB#0 to BB#8: [1, 14] (jump table) (65 = 60 + 5)
+; CHECK: Successors according to CFG: BB#6({{[0-9a-fx/= ]+}}7.14%) BB#8({{[0-9a-fx/= ]+}}92.86%
+;
+; CHECK: BB#8:
+; BB#8 to BB#1: {1} (10)
+; BB#8 to BB#6: [2, 9] (5)
+; BB#8 to BB#2: {10} (10)
+; BB#8 to BB#3: {11} (10)
+; BB#8 to BB#4: {12} (10)
+; BB#8 to BB#5: {13, 14} (20)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}15.38%) BB#6({{[0-9a-fx/= ]+}}7.69%) BB#2({{[0-9a-fx/= ]+}}15.38%) BB#3({{[0-9a-fx/= ]+}}15.38%) BB#4({{[0-9a-fx/= ]+}}15.38%) BB#5({{[0-9a-fx/= ]+}}30.77%)
+}
+
+; CHECK-LABEL: test3
+
+define void @test3(i32 %x) nounwind {
+entry:
+
+; In this switch statement, there is no edge from jump table to default
+; statement.
+
+ switch i32 %x, label %sw.default [
+ i32 10, label %sw.bb
+ i32 11, label %sw.bb2
+ i32 12, label %sw.bb3
+ i32 13, label %sw.bb4
+ i32 14, label %sw.bb5
+ ], !prof !2
+
+sw.bb:
+ call void @foo(i32 0)
+ br label %sw.epilog
+
+sw.bb2:
+ call void @foo(i32 2)
+ br label %sw.epilog
+
+sw.bb3:
+ call void @foo(i32 3)
+ br label %sw.epilog
+
+sw.bb4:
+ call void @foo(i32 4)
+ br label %sw.epilog
+
+sw.bb5:
+ call void @foo(i32 5)
+ br label %sw.epilog
+
+sw.default:
+ call void @foo(i32 1)
+ br label %sw.epilog
+
+sw.epilog:
+ ret void
+
+; Check if weights are correctly assigned to edges generated from switch
+; statement.
+;
+; CHECK: BB#0:
+; BB#0 to BB#6: [0, 9] + [15, UINT32_MAX] {10}
+; BB#0 to BB#8: [10, 14] (jump table) (50)
+; CHECK: Successors according to CFG: BB#6({{[0-9a-fx/= ]+}}16.67%) BB#8({{[0-9a-fx/= ]+}}83.33%)
+;
+; CHECK: BB#8:
+; BB#8 to BB#1: {10} (10)
+; BB#8 to BB#2: {11} (10)
+; BB#8 to BB#3: {12} (10)
+; BB#8 to BB#4: {13} (10)
+; BB#8 to BB#5: {14} (10)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}20.00%) BB#2({{[0-9a-fx/= ]+}}20.00%) BB#3({{[0-9a-fx/= ]+}}20.00%) BB#4({{[0-9a-fx/= ]+}}20.00%) BB#5({{[0-9a-fx/= ]+}}20.00%)
+}
+
+; CHECK-LABEL: test4
+
+define void @test4(i32 %x) nounwind {
+entry:
+
+; In this switch statement, there is no edge from bit test to default basic
+; block.
+
+ switch i32 %x, label %sw.default [
+ i32 1, label %sw.bb
+ i32 111, label %sw.bb2
+ i32 112, label %sw.bb3
+ i32 113, label %sw.bb3
+ i32 114, label %sw.bb2
+ i32 115, label %sw.bb2
+ ], !prof !3
+
+sw.bb:
+ call void @foo(i32 0)
+ br label %sw.epilog
+
+sw.bb2:
+ call void @foo(i32 2)
+ br label %sw.epilog
+
+sw.bb3:
+ call void @foo(i32 3)
+ br label %sw.epilog
+
+sw.default:
+ call void @foo(i32 1)
+ br label %sw.epilog
+
+sw.epilog:
+ ret void
+
+; Check if weights are correctly assigned to edges generated from switch
+; statement.
+;
+; CHECK: BB#0:
+; BB#0 to BB#6: [0, 110] + [116, UINT32_MAX] (20)
+; BB#0 to BB#7: [111, 115] (bit test) (50)
+; CHECK: Successors according to CFG: BB#6({{[0-9a-fx/= ]+}}28.57%) BB#7({{[0-9a-fx/= ]+}}71.43%)
+;
+; CHECK: BB#7:
+; BB#7 to BB#2: {111, 114, 115} (30)
+; BB#7 to BB#3: {112, 113} (20)
+; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}60.00%) BB#3({{[0-9a-fx/= ]+}}40.00%)
+}
+
+; CHECK-LABEL: test5
+
+define void @test5(i32 %x) nounwind {
+entry:
+
+; In this switch statement, there is an edge from jump table to default basic
+; block.
+
+ switch i32 %x, label %sw.default [
+ i32 1, label %sw.bb
+ i32 5, label %sw.bb2
+ i32 7, label %sw.bb3
+ i32 9, label %sw.bb4
+ i32 31, label %sw.bb5
+ ], !prof !2
+
+sw.bb:
+ call void @foo(i32 0)
+ br label %sw.epilog
+
+sw.bb2:
+ call void @foo(i32 1)
+ br label %sw.epilog
+
+sw.bb3:
+ call void @foo(i32 2)
+ br label %sw.epilog
+
+sw.bb4:
+ call void @foo(i32 3)
+ br label %sw.epilog
+
+sw.bb5:
+ call void @foo(i32 4)
+ br label %sw.epilog
+
+sw.default:
+ call void @foo(i32 5)
+ br label %sw.epilog
+
+sw.epilog:
+ ret void
+
+; Check if weights are correctly assigned to edges generated from switch
+; statement.
+;
+; CHECK: BB#0:
+; BB#0 to BB#6: [10, UINT32_MAX] (15)
+; BB#0 to BB#8: [1, 5, 7, 9] (jump table) (45)
+; CHECK: Successors according to CFG: BB#8({{[0-9a-fx/= ]+}}25.00%) BB#9({{[0-9a-fx/= ]+}}75.00%)
+}
+
+!1 = !{!"branch_weights", i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10}
+!2 = !{!"branch_weights", i32 10, i32 10, i32 10, i32 10, i32 10, i32 10}
+!3 = !{!"branch_weights", i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10}
diff --git a/test/CodeGen/X86/switch-jump-table.ll b/test/CodeGen/X86/switch-jump-table.ll
index a84fb4aafd17..896a067da230 100644
--- a/test/CodeGen/X86/switch-jump-table.ll
+++ b/test/CodeGen/X86/switch-jump-table.ll
@@ -1,17 +1,18 @@
-; RUN: llc -mtriple=i686-pc-gnu-linux < %s | FileCheck %s
+; RUN: llc -mtriple=i686-pc-gnu-linux < %s | FileCheck %s -check-prefix=CHECK
+; RUN: llc -mtriple=i686-pc-gnu-linux -print-machineinstrs=expand-isel-pseudos %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-JT-PROB
; An unreachable default destination is replaced with the most popular case label.
-define void @sum2(i32 %x, i32* %to) {
-; CHECK-LABEL: sum2:
+define void @foo(i32 %x, i32* %to) {
+; CHECK-LABEL: foo:
; CHECK: movl 4(%esp), [[REG:%e[a-z]{2}]]
; CHECK: cmpl $3, [[REG]]
-; CHECK: jbe .LBB0_1
+; CHECK: ja .LBB0_6
+; CHECK-NEXT: # BB#1:
+; CHECK-NEXT: jmpl *.LJTI0_0(,[[REG]],4)
; CHECK: movl $4
; CHECK: retl
-; CHECK-LABEL: .LBB0_1:
-; CHECK-NEXT: jmpl *.LJTI0_0(,[[REG]],4)
entry:
switch i32 %x, label %default [
@@ -48,5 +49,44 @@ default:
; CHECK-NEXT: .long .LBB0_3
; CHECK-NEXT: .long .LBB0_4
; CHECK-NEXT: .long .LBB0_5
-; CHECK-NOT: .long
}
+
+; Check if branch probabilities are correctly assigned to the jump table.
+
+define void @bar(i32 %x, i32* %to) {
+; CHECK-JT-PROB-LABEL: bar:
+; CHECK-JT-PROB: Successors according to CFG: BB#6({{[0-9a-fx/= ]+}}14.29%) BB#8({{[0-9a-fx/= ]+}}85.71%)
+; CHECK-JT-PROB: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}16.67%) BB#2({{[0-9a-fx/= ]+}}16.67%) BB#3({{[0-9a-fx/= ]+}}16.67%) BB#4({{[0-9a-fx/= ]+}}16.67%) BB#5({{[0-9a-fx/= ]+}}33.33%)
+
+entry:
+ switch i32 %x, label %default [
+ i32 0, label %bb0
+ i32 1, label %bb1
+ i32 2, label %bb2
+ i32 3, label %bb3
+ i32 4, label %bb4
+ i32 5, label %bb4
+ ], !prof !1
+bb0:
+ store i32 0, i32* %to
+ br label %exit
+bb1:
+ store i32 1, i32* %to
+ br label %exit
+bb2:
+ store i32 2, i32* %to
+ br label %exit
+bb3:
+ store i32 3, i32* %to
+ br label %exit
+bb4:
+ store i32 4, i32* %to
+ br label %exit
+default:
+ store i32 5, i32* %to
+ br label %exit
+exit:
+ ret void
+}
+
+!1 = !{!"branch_weights", i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16}
diff --git a/test/CodeGen/X86/switch-order-weight.ll b/test/CodeGen/X86/switch-order-weight.ll
index 207e0b3f707b..8c0c1a7d8108 100644
--- a/test/CodeGen/X86/switch-order-weight.ll
+++ b/test/CodeGen/X86/switch-order-weight.ll
@@ -13,8 +13,8 @@ entry:
; CHECK-LABEL: test1:
; CHECK-NOT: unr
; CHECK: cmpl $10
-; CHECK: bar
; CHECK: cmpl $20
+; CHECK: bar
if.then:
tail call void @unr(i32 23) noreturn nounwind
diff --git a/test/CodeGen/X86/switch.ll b/test/CodeGen/X86/switch.ll
index 748fd6f238b1..46587341ea74 100644
--- a/test/CodeGen/X86/switch.ll
+++ b/test/CodeGen/X86/switch.ll
@@ -51,7 +51,7 @@ return: ret void
; CHECK-LABEL: simple_ranges
; CHECK: leal -100
; CHECK: cmpl $4
-; CHECK: jae
+; CHECK: jb
; CHECK: cmpl $3
; CHECK: ja
@@ -90,7 +90,7 @@ return: ret void
; but with 6-8, the whole switch is suitable for a jump table.
; CHECK-LABEL: jt_is_better
; CHECK: cmpl $8
-; CHECK: jbe
+; CHECK: ja
; CHECK: jmpq *.LJTI
}
@@ -107,7 +107,6 @@ entry:
i32 2, label %bb2
i32 5, label %bb2
i32 8, label %bb2
-
]
bb0: tail call void @g(i32 0) br label %return
bb1: tail call void @g(i32 1) br label %return
@@ -116,6 +115,10 @@ return: ret void
; This could be lowered as a jump table, but bit tests is more efficient.
; CHECK-LABEL: bt_is_better
+; The bit test on 2,5,8 is unnecessary as all cases cover the rage [0, 8].
+; The range check guarantees that cases other than 0,3,6 and 1,4,7 must be
+; in 2,5,8.
+;
; 73 = 2^0 + 2^3 + 2^6
; CHECK: movl $73
; CHECK: btl
@@ -123,7 +126,74 @@ return: ret void
; CHECK: movl $146
; CHECK: btl
; 292 = 2^2 + 2^5 + 2^8
-; CHECK: movl $292
+; CHECK-NOT: movl $292
+; CHECK-NOT: btl
+}
+
+define void @bt_is_better2(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 0, label %bb0
+ i32 3, label %bb0
+ i32 6, label %bb0
+ i32 1, label %bb1
+ i32 4, label %bb1
+ i32 7, label %bb1
+ i32 2, label %bb2
+ i32 8, label %bb2
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+return: ret void
+
+; This will also be lowered as bit test, but as the range [0,8] is not fully
+; covered (5 missing), the default statement can be jumped to and we end up
+; with one more branch.
+; CHECK-LABEL: bt_is_better2
+;
+; 73 = 2^0 + 2^3 + 2^6
+; CHECK: movl $73
+; CHECK: btl
+; 146 = 2^1 + 2^4 + 2^7
+; CHECK: movl $146
+; CHECK: btl
+; 260 = 2^2 + 2^8
+; CHECK: movl $260
+; CHECK: btl
+}
+
+define void @bt_is_better3(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 10, label %bb0
+ i32 13, label %bb0
+ i32 16, label %bb0
+ i32 11, label %bb1
+ i32 14, label %bb1
+ i32 17, label %bb1
+ i32 12, label %bb2
+ i32 18, label %bb2
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+return: ret void
+
+; We don't have to subtract 10 from the case value to let the range become
+; [0, 8], as each value in the range [10, 18] can be represented by bits in a
+; word. Then we still need a branch to jump to the default statement for the
+; range [0, 10).
+; CHECK-LABEL: bt_is_better3
+;
+; 74752 = 2^10 + 2^13 + 2^16
+; CHECK: movl $74752
+; CHECK: btl
+; 149504 = 2^11 + 2^14 + 2^17
+; CHECK: movl $149504
+; CHECK: btl
+; 266240 = 2^12 + 2^15 + 2^18
+; CHECK: movl $266240
; CHECK: btl
}
@@ -410,6 +480,9 @@ return: ret void
; Cases 1,4,7 have a very large branch weight (which shouldn't overflow), so
; their bit test should come first. 0,3,6 and 2,5,8,9 both have a weight of 12,
; but the latter set has more cases, so should be tested for earlier.
+; The bit test on 0,3,6 is unnecessary as all cases cover the rage [0, 9].
+; The range check guarantees that cases other than 1,4,7 and 2,5,8,9 must be
+; in 0,3,6.
; CHECK-LABEL: bt_order_by_weight
; 146 = 2^1 + 2^4 + 2^7
@@ -419,8 +492,8 @@ return: ret void
; CHECK: movl $804
; CHECK: btl
; 73 = 2^0 + 2^3 + 2^6
-; CHECK: movl $73
-; CHECK: btl
+; CHECK-NOT: movl $73
+; CHECK-NOT: btl
}
!1 = !{!"branch_weights",
diff --git a/test/CodeGen/X86/swizzle-2.ll b/test/CodeGen/X86/swizzle-2.ll
index 697af843abb1..fd81573edec9 100644
--- a/test/CodeGen/X86/swizzle-2.ll
+++ b/test/CodeGen/X86/swizzle-2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
; Test that we correctly fold a shuffle that performs a swizzle of another
diff --git a/test/CodeGen/X86/system-intrinsics-64-xsave.ll b/test/CodeGen/X86/system-intrinsics-64-xsave.ll
new file mode 100644
index 000000000000..feec9516220b
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-64-xsave.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave | FileCheck %s
+
+define void @test_xsave(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsave
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsave (%rdi)
+ call void @llvm.x86.xsave(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsave(i8*, i32, i32)
+
+define void @test_xsave64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsave64
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsave64 (%rdi)
+ call void @llvm.x86.xsave64(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsave64(i8*, i32, i32)
+
+define void @test_xrstor(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstor
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xrstor (%rdi)
+ call void @llvm.x86.xrstor(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstor(i8*, i32, i32)
+
+define void @test_xrstor64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstor64
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xrstor64 (%rdi)
+ call void @llvm.x86.xrstor64(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstor64(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-64-xsavec.ll b/test/CodeGen/X86/system-intrinsics-64-xsavec.ll
new file mode 100644
index 000000000000..068034886515
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-64-xsavec.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave,+xsavec | FileCheck %s
+
+define void @test_xsavec(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsavec
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsavec (%rdi)
+ call void @llvm.x86.xsavec(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsavec(i8*, i32, i32)
+
+define void @test_xsavec64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsavec64
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsavec64 (%rdi)
+ call void @llvm.x86.xsavec64(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsavec64(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll b/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll
new file mode 100644
index 000000000000..ee0a5360da8e
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsaveopt | FileCheck %s
+
+define void @test_xsaveopt(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaveopt
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsaveopt (%rdi)
+ call void @llvm.x86.xsaveopt(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsaveopt(i8*, i32, i32)
+
+define void @test_xsaveopt64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaveopt64
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsaveopt64 (%rdi)
+ call void @llvm.x86.xsaveopt64(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsaveopt64(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-64-xsaves.ll b/test/CodeGen/X86/system-intrinsics-64-xsaves.ll
new file mode 100644
index 000000000000..5c1c5be4e7e2
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-64-xsaves.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave,+xsaves | FileCheck %s
+
+define void @test_xsaves(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaves
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsaves (%rdi)
+ call void @llvm.x86.xsaves(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsaves(i8*, i32, i32)
+
+define void @test_xsaves64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaves64
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xsaves64 (%rdi)
+ call void @llvm.x86.xsaves64(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsaves64(i8*, i32, i32)
+
+define void @test_xrstors(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstors
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xrstors (%rdi)
+ call void @llvm.x86.xrstors(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstors(i8*, i32, i32)
+
+define void @test_xrstors64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstors64
+; CHECK: movl %edx, %eax
+; CHECK: movl %esi, %edx
+; CHECK: xrstors64 (%rdi)
+ call void @llvm.x86.xrstors64(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstors64(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-64.ll b/test/CodeGen/X86/system-intrinsics-64.ll
index 96c441773390..e18a79c2b614 100644
--- a/test/CodeGen/X86/system-intrinsics-64.ll
+++ b/test/CodeGen/X86/system-intrinsics-64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fxsr | FileCheck %s
define void @test_fxsave(i8* %ptr) {
; CHECK-LABEL: test_fxsave
diff --git a/test/CodeGen/X86/system-intrinsics-xsave.ll b/test/CodeGen/X86/system-intrinsics-xsave.ll
new file mode 100644
index 000000000000..ff9fb7e247a4
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-xsave.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave | FileCheck %s
+
+define void @test_xsave(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsave
+; CHECK: movl 8(%esp), %edx
+; CHECK: movl 12(%esp), %eax
+; CHECK: movl 4(%esp), %ecx
+; CHECK: xsave (%ecx)
+ call void @llvm.x86.xsave(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsave(i8*, i32, i32)
+
+define void @test_xrstor(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstor
+; CHECK: movl 8(%esp), %edx
+; CHECK: movl 12(%esp), %eax
+; CHECK: movl 4(%esp), %ecx
+; CHECK: xrstor (%ecx)
+ call void @llvm.x86.xrstor(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstor(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-xsavec.ll b/test/CodeGen/X86/system-intrinsics-xsavec.ll
new file mode 100644
index 000000000000..4a55ea9531b1
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-xsavec.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsavec | FileCheck %s
+
+define void @test_xsavec(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsavec
+; CHECK: movl 8(%esp), %edx
+; CHECK: movl 12(%esp), %eax
+; CHECK: movl 4(%esp), %ecx
+; CHECK: xsavec (%ecx)
+ call void @llvm.x86.xsavec(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsavec(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-xsaveopt.ll b/test/CodeGen/X86/system-intrinsics-xsaveopt.ll
new file mode 100644
index 000000000000..f9bd7acd5a7c
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-xsaveopt.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsaveopt | FileCheck %s
+
+define void @test_xsaveopt(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaveopt
+; CHECK: movl 8(%esp), %edx
+; CHECK: movl 12(%esp), %eax
+; CHECK: movl 4(%esp), %ecx
+; CHECK: xsaveopt (%ecx)
+ call void @llvm.x86.xsaveopt(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsaveopt(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-xsaves.ll b/test/CodeGen/X86/system-intrinsics-xsaves.ll
new file mode 100644
index 000000000000..ca1c5c1a9ed0
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-xsaves.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsaves | FileCheck %s
+
+define void @test_xsaves(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaves
+; CHECK: movl 8(%esp), %edx
+; CHECK: movl 12(%esp), %eax
+; CHECK: movl 4(%esp), %ecx
+; CHECK: xsaves (%ecx)
+ call void @llvm.x86.xsaves(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsaves(i8*, i32, i32)
+
+define void @test_xrstors(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstors
+; CHECK: movl 8(%esp), %edx
+; CHECK: movl 12(%esp), %eax
+; CHECK: movl 4(%esp), %ecx
+; CHECK: xrstors (%ecx)
+ call void @llvm.x86.xrstors(i8* %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstors(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics.ll b/test/CodeGen/X86/system-intrinsics.ll
index 84fcd052d7db..90dc9cd21e67 100644
--- a/test/CodeGen/X86/system-intrinsics.ll
+++ b/test/CodeGen/X86/system-intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+fxsr | FileCheck %s
define void @test_fxsave(i8* %ptr) {
; CHECK-LABEL: test_fxsave
diff --git a/test/CodeGen/X86/tail-dup-catchret.ll b/test/CodeGen/X86/tail-dup-catchret.ll
new file mode 100644
index 000000000000..3eeb24d20f2d
--- /dev/null
+++ b/test/CodeGen/X86/tail-dup-catchret.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+define void @f() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ invoke void @g()
+ to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %0 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ catchret from %0 to label %try.cont
+
+try.cont: ; preds = %entry, %catch
+ %b.0 = phi i1 [ false, %catch ], [ true, %entry ]
+ tail call void @h(i1 zeroext %b.0)
+ ret void
+}
+
+; CHECK-LABEL: _f:
+; CHECK: calll _g
+; CHECK: calll _h
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @h(i1 zeroext)
diff --git a/test/CodeGen/X86/tail-merge-wineh.ll b/test/CodeGen/X86/tail-merge-wineh.ll
new file mode 100644
index 000000000000..69c2fda6949b
--- /dev/null
+++ b/test/CodeGen/X86/tail-merge-wineh.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s | FileCheck %s
+
+; Started from this code:
+; void f() {
+; try {
+; try {
+; throw 42;
+; } catch (int) {
+; }
+; try {
+; throw 42;
+; } catch (int) {
+; }
+; } catch (int) {
+; }
+; }
+
+; Don't tail merge the calls.
+; CHECK: calll __CxxThrowException@8
+; CHECK: calll __CxxThrowException@8
+
+; ModuleID = 'cppeh-pingpong.cpp'
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i8*, i32, i32, i32, i32, i8* }
+%eh.CatchableTypeArray.1 = type { i32, [1 x %eh.CatchableType*] }
+%eh.ThrowInfo = type { i32, i8*, i8*, i8* }
+
+$"\01??_R0H@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i32 -1, i32 0, i32 4, i8* null }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x %eh.CatchableType*] [%eh.CatchableType* @"_CT??_R0H@84"] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i8* null, i8* null, i8* bitcast (%eh.CatchableTypeArray.1* @_CTA1H to i8*) }, section ".xdata", comdat
+
+define void @"\01?f@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %i = alloca i32, align 4
+ %tmp = alloca i32, align 4
+ %tmp1 = alloca i32, align 4
+ store i32 0, i32* %i, align 4
+ store i32 42, i32* %tmp, align 4
+ %0 = bitcast i32* %tmp to i8*
+ invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #1
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %catch] unwind label %catch.dispatch.7
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %cs1 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+ catchret from %1 to label %catchret.dest
+
+catchret.dest: ; preds = %catch
+ br label %try.cont
+
+try.cont: ; preds = %catchret.dest
+ store i32 42, i32* %tmp1, align 4
+ %2 = bitcast i32* %tmp1 to i8*
+ invoke void @_CxxThrowException(i8* %2, %eh.ThrowInfo* @_TI1H) #1
+ to label %unreachable unwind label %catch.dispatch.2
+
+catch.dispatch.2: ; preds = %try.cont
+ %cs2 = catchswitch within none [label %catch.4] unwind label %catch.dispatch.7
+
+catch.4: ; preds = %catch.dispatch.2
+ %3 = catchpad within %cs2 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+ catchret from %3 to label %catchret.dest.5
+
+catchret.dest.5: ; preds = %catch.4
+ br label %try.cont.6
+
+try.cont.6: ; preds = %catchret.dest.5
+ br label %try.cont.11
+
+catch.dispatch.7:
+ %cs3 = catchswitch within none [label %catch.9] unwind to caller
+
+catch.9: ; preds = %catch.dispatch.7
+ %4 = catchpad within %cs3 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+ catchret from %4 to label %catchret.dest.10
+
+catchret.dest.10: ; preds = %catch.9
+ br label %try.cont.11
+
+try.cont.11: ; preds = %catchret.dest.10, %try.cont.6
+ ret void
+
+unreachable: ; preds = %try.cont, %entry
+ unreachable
+}
+
+declare x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn }
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index f590176d9815..bf778e5bad2b 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -277,8 +277,8 @@ declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
; CHECK-LABEL: foo:
; CHECK: callq func
-; CHECK-NEXT: .LBB4_2:
; CHECK-NEXT: popq
+; CHECK-NEXT: .LBB4_2:
; CHECK-NEXT: ret
define void @foo(i1* %V) nounwind {
@@ -371,6 +371,44 @@ return:
ret void
}
+; two_minsize - Same as two, but with minsize instead of optsize.
+
+; CHECK-LABEL: two_minsize:
+; CHECK-NOT: XYZ
+; CHECK: ret
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK-NOT: XYZ
+
+define void @two_minsize() nounwind minsize {
+entry:
+ %0 = icmp eq i32 undef, 0
+ br i1 %0, label %bbx, label %bby
+
+bby:
+ switch i32 undef, label %bb7 [
+ i32 16, label %return
+ ]
+
+bb7:
+ store volatile i32 0, i32* @XYZ
+ store volatile i32 1, i32* @XYZ
+ unreachable
+
+bbx:
+ switch i32 undef, label %bb12 [
+ i32 128, label %return
+ ]
+
+bb12:
+ store volatile i32 0, i32* @XYZ
+ store volatile i32 1, i32* @XYZ
+ unreachable
+
+return:
+ ret void
+}
+
; two_nosize - Same as two, but without the optsize attribute.
; Now two instructions are enough to be tail-duplicated.
diff --git a/test/CodeGen/X86/tailcall-mem-intrinsics.ll b/test/CodeGen/X86/tailcall-mem-intrinsics.ll
index 0e0ab5c478fc..8e1e4f464baa 100644
--- a/test/CodeGen/X86/tailcall-mem-intrinsics.ll
+++ b/test/CodeGen/X86/tailcall-mem-intrinsics.ll
@@ -8,8 +8,8 @@ entry:
ret void
}
-; CHECK-LABEL: tail_memset
-; CHECK; jmp memmove
+; CHECK-LABEL: tail_memmove
+; CHECK: jmp memmove
define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
entry:
tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
diff --git a/test/CodeGen/X86/tailcall-msvc-conventions.ll b/test/CodeGen/X86/tailcall-msvc-conventions.ll
new file mode 100644
index 000000000000..98b02c9c07e8
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-msvc-conventions.ll
@@ -0,0 +1,189 @@
+; RUN: llc -mtriple=i686-unknown-linux-gnu -O1 < %s | FileCheck %s
+; RUN: llc -mtriple=i686-unknown-linux-gnu -O0 < %s | FileCheck %s
+
+; The MSVC family of x86 calling conventions makes tail calls really tricky.
+; Tests of all the various combinations should live here.
+
+declare i32 @cdecl_i32()
+declare void @cdecl_void()
+
+; Don't allow tail calling these cdecl functions, because we need to clear the
+; incoming stack arguments for these argument-clearing conventions.
+
+define x86_thiscallcc void @thiscall_cdecl_notail(i32 %a, i32 %b, i32 %c) {
+ tail call void @cdecl_void()
+ ret void
+}
+; CHECK-LABEL: thiscall_cdecl_notail
+; CHECK: calll cdecl_void
+; CHECK: retl $8
+
+define x86_stdcallcc void @stdcall_cdecl_notail(i32 %a, i32 %b, i32 %c) {
+ tail call void @cdecl_void()
+ ret void
+}
+; CHECK-LABEL: stdcall_cdecl_notail
+; CHECK: calll cdecl_void
+; CHECK: retl $12
+
+define x86_vectorcallcc void @vectorcall_cdecl_notail(i32 inreg %a, i32 inreg %b, i32 %c) {
+ tail call void @cdecl_void()
+ ret void
+}
+; CHECK-LABEL: vectorcall_cdecl_notail
+; CHECK: calll cdecl_void
+; CHECK: retl $4
+
+define x86_fastcallcc void @fastcall_cdecl_notail(i32 inreg %a, i32 inreg %b, i32 %c) {
+ tail call void @cdecl_void()
+ ret void
+}
+; CHECK-LABEL: fastcall_cdecl_notail
+; CHECK: calll cdecl_void
+; CHECK: retl $4
+
+
+; Tail call to/from callee pop functions can work under the right circumstances:
+
+declare x86_thiscallcc void @no_args_method(i8*)
+declare x86_thiscallcc void @one_arg_method(i8*, i32)
+declare x86_thiscallcc void @two_args_method(i8*, i32, i32)
+declare void @ccall_func()
+declare void @ccall_func1(i32)
+
+define x86_thiscallcc void @thiscall_thiscall_tail(i8* %this) {
+entry:
+ tail call x86_thiscallcc void @no_args_method(i8* %this)
+ ret void
+}
+; CHECK-LABEL: thiscall_thiscall_tail:
+; CHECK: jmp no_args_method
+
+define x86_thiscallcc void @thiscall_thiscall_tail2(i8* %this, i32 %a, i32 %b) {
+entry:
+ tail call x86_thiscallcc void @two_args_method(i8* %this, i32 %a, i32 %b)
+ ret void
+}
+; @two_args_method will take care of popping %a and %b from the stack for us.
+; CHECK-LABEL: thiscall_thiscall_tail2:
+; CHECK: jmp two_args_method
+
+define x86_thiscallcc void @thiscall_thiscall_notail(i8* %this, i32 %a, i32 %b, i32 %x) {
+entry:
+ tail call x86_thiscallcc void @two_args_method(i8* %this, i32 %a, i32 %b)
+ ret void
+}
+; @two_args_method would not pop %x.
+; CHECK-LABEL: thiscall_thiscall_notail:
+; CHECK: calll two_args_method
+; CHECK: retl $12
+
+define x86_thiscallcc void @thiscall_thiscall_notail2(i8* %this, i32 %a) {
+entry:
+ tail call x86_thiscallcc void @no_args_method(i8* %this)
+ ret void
+}
+; @no_args_method would not pop %x for us. Make sure this is checked even
+; when there are no arguments to the call.
+; CHECK-LABEL: thiscall_thiscall_notail2:
+; CHECK: calll no_args_method
+; CHECK: retl $4
+
+define void @ccall_thiscall_tail(i8* %x) {
+entry:
+ tail call x86_thiscallcc void @no_args_method(i8* %x)
+ ret void
+}
+; Tail calling from ccall to thiscall works.
+; CHECK-LABEL: ccall_thiscall_tail:
+; CHECK: jmp no_args_method
+
+define void @ccall_thiscall_notail(i8* %x, i32 %y) {
+entry:
+ tail call x86_thiscallcc void @one_arg_method(i8* %x, i32 %y);
+ ret void
+}
+; @one_arg_method would pop %y off the stack.
+; CHECK-LABEL: ccall_thiscall_notail:
+; CHECK: calll one_arg_method
+
+define x86_thiscallcc void @thiscall_ccall_tail(i8* %this) {
+entry:
+ tail call void @ccall_func()
+ ret void
+}
+; Tail call from thiscall to ccall works if no arguments need popping.
+; CHECK-LABEL: thiscall_ccall_tail:
+; CHECK: jmp ccall_func
+
+define x86_thiscallcc void @thiscall_ccall_notail(i8* %this, i32 %x) {
+entry:
+ tail call void @ccall_func1(i32 %x)
+ ret void
+}
+; No tail call: %x needs to be popped.
+; CHECK-LABEL: thiscall_ccall_notail:
+; CHECK: calll ccall_func1
+; CHECK: retl $4
+
+%S = type { i32 (...)** }
+define x86_thiscallcc void @tailcall_through_pointer(%S* %this, i32 %a) {
+entry:
+ %0 = bitcast %S* %this to void (%S*, i32)***
+ %vtable = load void (%S*, i32)**, void (%S*, i32)*** %0
+ %1 = load void (%S*, i32)*, void (%S*, i32)** %vtable
+ tail call x86_thiscallcc void %1(%S* %this, i32 %a)
+ ret void
+}
+; Tail calling works through function pointers too.
+; CHECK-LABEL: tailcall_through_pointer:
+; CHECK: jmpl
+
+define x86_stdcallcc void @stdcall_cdecl_tail() {
+ tail call void @ccall_func()
+ ret void
+}
+; stdcall to cdecl works if no arguments need popping.
+; CHECK-LABEL: stdcall_cdecl_tail
+; CHECK: jmp ccall_func
+
+define x86_vectorcallcc void @vectorcall_cdecl_tail(i32 inreg %a, i32 inreg %b) {
+ tail call void @ccall_func()
+ ret void
+}
+; vectorcall to cdecl works if no arguments need popping.
+; CHECK-LABEL: vectorcall_cdecl_tail
+; CHECK: jmp ccall_func
+
+define x86_fastcallcc void @fastcall_cdecl_tail(i32 inreg %a, i32 inreg %b) {
+ tail call void @ccall_func()
+ ret void
+}
+; fastcall to cdecl works if no arguments need popping.
+; CHECK-LABEL: fastcall_cdecl_tail
+; CHECK: jmp ccall_func
+
+define x86_stdcallcc void @stdcall_thiscall_notail(i8* %this, i32 %a, i32 %b) {
+ tail call x86_thiscallcc void @two_args_method(i8* %this, i32 %a, i32 %b)
+ ret void
+}
+; two_args_method will not pop %this.
+; CHECK-LABEL: stdcall_thiscall_notail
+; CHECK: calll two_args_method
+
+define x86_stdcallcc void @stdcall_thiscall_tail(i32 %a, i32 %b) {
+ tail call x86_thiscallcc void @two_args_method(i8* null, i32 %a, i32 %b)
+ ret void
+}
+; The callee pop amounts match up.
+; CHECK-LABEL: stdcall_thiscall_tail
+; CHECK: jmp two_args_method
+
+declare x86_fastcallcc void @fastcall2(i32 inreg %a, i32 inreg %b)
+define void @cdecl_fastcall_tail(i32 %a, i32 %b) {
+ tail call x86_fastcallcc void @fastcall2(i32 %a, i32 %b)
+ ret void
+}
+; fastcall2 won't pop anything.
+; CHECK-LABEL: cdecl_fastcall_tail
+; CHECK: jmp fastcall2
diff --git a/test/CodeGen/X86/tailcall-readnone.ll b/test/CodeGen/X86/tailcall-readnone.ll
new file mode 100644
index 000000000000..b43f69120e7c
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - %s | FileCheck %s
+
+define void @f(i32** %p) unnamed_addr {
+entry:
+ %v = tail call i32* @g()
+ store i32* %v, i32** %p, align 8
+ ret void
+}
+; CHECK-LABEL: f:
+; CHECK: callq g
+; CHECK: movq %rax, (%rbx)
+
+declare i32* @g() #2
+
+attributes #2 = { nounwind readnone }
diff --git a/test/CodeGen/X86/tls-android-negative.ll b/test/CodeGen/X86/tls-android-negative.ll
new file mode 100644
index 000000000000..e90b8914ab28
--- /dev/null
+++ b/test/CodeGen/X86/tls-android-negative.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck %s
+
+; Make sure that some symboles are not emitted in emulated TLS model.
+
+@external_x = external thread_local global i32
+@external_y = thread_local global i32 7
+@internal_y = internal thread_local global i32 9
+@internal_y0 = internal thread_local global i32 0
+
+define i32* @get_external_x() {
+entry:
+ ret i32* @external_x
+}
+
+define i32* @get_external_y() {
+entry:
+ ret i32* @external_y
+}
+
+define i32* @get_internal_y() {
+entry:
+ ret i32* @internal_y
+}
+
+define i32* @get_internal_y0() {
+entry:
+ ret i32* @internal_y0
+}
+
+; no direct access to emulated TLS variables.
+; no definition of emulated TLS variables.
+; no initializer for external TLS variables, __emutls_t.external_x
+; no initializer for 0-initialized TLS variables, __emutls_t.internal_y0
+; not global linkage for __emutls_t.external_y
+
+; CHECK-NOT: external_x@TLS
+; CHECK-NOT: external_y@TLS
+; CHECK-NOT: internal_y@TLS
+; CHECK-NOT: .size external_x
+; CHECK-NOT: .size external_y
+; CHECK-NOT: .size internal_y
+; CHECK-NOT: .size internal_y0
+; CHECK-NOT: __emutls_v.external_x:
+; CHECK-NOT: __emutls_t.external_x:
+; CHECK-NOT: __emutls_t.internal_y0:
+; CHECK-NOT: global __emutls_t.external_y
+; CHECK-NOT: global __emutls_v.internal_y
+; CHECK-NOT: global __emutls_v.internal_y0
+
+; CHECK: __emutls_t.external_y
+
+; CHECK-NOT: external_x@TLS
+; CHECK-NOT: external_y@TLS
+; CHECK-NOT: internal_y@TLS
+; CHECK-NOT: .size external_x
+; CHECK-NOT: .size external_y
+; CHECK-NOT: .size internal_y
+; CHECK-NOT: .size internal_y0
+; CHECK-NOT: __emutls_v.external_x:
+; CHECK-NOT: __emutls_t.external_x:
+; CHECK-NOT: __emutls_t.internal_y0:
+; CHECK-NOT: global __emutls_t.external_y
+; CHECK-NOT: global __emutls_v.internal_y
+; CHECK-NOT: global __emutls_v.internal_y0
diff --git a/test/CodeGen/X86/tls-android.ll b/test/CodeGen/X86/tls-android.ll
new file mode 100644
index 000000000000..4156c7b3f5b9
--- /dev/null
+++ b/test/CodeGen/X86/tls-android.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck -check-prefix=X64 %s
+
+; Make sure that TLS symboles are emitted in expected order.
+
+@external_x = external thread_local global i32
+@external_y = thread_local global i32 7
+@internal_y = internal thread_local global i32 9
+
+define i32* @get_external_x() {
+entry:
+ ret i32* @external_x
+}
+
+define i32* @get_external_y() {
+entry:
+ ret i32* @external_y
+}
+
+define i32* @get_internal_y() {
+entry:
+ ret i32* @internal_y
+}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 32-bit mode
+; CHECK-LABEL: get_external_x:
+; CHECK: __emutls_v.external_x
+; CHECK: __emutls_get_address
+
+; CHECK-LABEL: get_external_y:
+; CHECK: __emutls_v.external_y
+; CHECK: __emutls_get_address
+
+; CHECK-LABEL: get_internal_y:
+; CHECK: __emutls_v.internal_y
+; CHECK: __emutls_get_address
+
+; CHECK-NOT: __emutls_v.external_x:
+
+; CHECK: .align 4
+; CHECK-LABEL: __emutls_v.external_y:
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long __emutls_t.external_y
+; CHECK-LABEL: __emutls_t.external_y:
+; CHECK-NEXT: .long 7
+
+; CHECK: .align 4
+; CHECK-LABEL: __emutls_v.internal_y:
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long __emutls_t.internal_y
+; CHECK-LABEL: __emutls_t.internal_y:
+; CHECK-NEXT: .long 9
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 64-bit mode
+; X64-LABEL: get_external_x:
+; X64: __emutls_v.external_x
+; X64: __emutls_get_address
+
+; X64-LABEL: get_external_y:
+; X64: __emutls_v.external_y
+; X64: __emutls_get_address
+
+; X64-LABEL: get_internal_y:
+; X64: __emutls_v.internal_y
+; X64: __emutls_get_address
+
+; X64-NOT: __emutls_v.external_x:
+
+; X64: .align 8
+; X64-LABEL: __emutls_v.external_y:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.external_y
+; X64-LABEL: __emutls_t.external_y:
+; X64-NEXT: .long 7
+
+; X64: .align 8
+; X64-LABEL: __emutls_v.internal_y:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.internal_y
+; X64-LABEL: __emutls_t.internal_y:
+; X64-NEXT: .long 9
diff --git a/test/CodeGen/X86/tls-models.ll b/test/CodeGen/X86/tls-models.ll
index 0fd785328211..2377da4f025a 100644
--- a/test/CodeGen/X86/tls-models.ll
+++ b/test/CodeGen/X86/tls-models.ll
@@ -18,6 +18,8 @@
@external_le = external thread_local(localexec) global i32
@internal_le = internal thread_local(localexec) global i32 42
+; See test cases for emulated model in emutls.ll, emutls-pic.ll and emutls-pie.ll.
+
; ----- no model specified -----
define i32* @f1() {
diff --git a/test/CodeGen/X86/tls-pie.ll b/test/CodeGen/X86/tls-pie.ll
index 10fe1e94bbdc..235230e3c6a8 100644
--- a/test/CodeGen/X86/tls-pie.ll
+++ b/test/CodeGen/X86/tls-pie.ll
@@ -36,9 +36,13 @@ entry:
define i32 @f3() {
; X32-LABEL: f3:
; X32: calll .L{{[0-9]+}}$pb
+; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: .cfi_adjust_cfa_offset 4
; X32-NEXT: .L{{[0-9]+}}$pb:
; X32-NEXT: popl %eax
; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: .cfi_adjust_cfa_offset -4
+; X32-NEXT: .Ltmp{{[0-9]+}}:
; X32-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp{{[0-9]+}}-.L{{[0-9]+}}$pb), %eax
; X32-NEXT: movl i2@GOTNTPOFF(%eax), %eax
; X32-NEXT: movl %gs:(%eax), %eax
@@ -56,9 +60,13 @@ entry:
define i32* @f4() {
; X32-LABEL: f4:
; X32: calll .L{{[0-9]+}}$pb
+; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: .cfi_adjust_cfa_offset 4
; X32-NEXT: .L{{[0-9]+}}$pb:
; X32-NEXT: popl %ecx
; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: .cfi_adjust_cfa_offset -4
+; X32-NEXT: .Ltmp{{[0-9]+}}:
; X32-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp{{[0-9]+}}-.L{{[0-9]+}}$pb), %ecx
; X32-NEXT: movl %gs:0, %eax
; X32-NEXT: addl i2@GOTNTPOFF(%ecx), %eax
diff --git a/test/CodeGen/X86/token_landingpad.ll b/test/CodeGen/X86/token_landingpad.ll
new file mode 100644
index 000000000000..087b68bfce8a
--- /dev/null
+++ b/test/CodeGen/X86/token_landingpad.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s
+
+; This test verifies that SelectionDAG can handle landingPad of token type and not crash LLVM.
+
+define void @test() personality i32 (...)* @dummy_personality {
+entry:
+ invoke void @dummy()
+ to label %return unwind label %unwind
+
+unwind: ; preds = %entry
+ %lp = landingpad token
+ cleanup
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+declare void @dummy()
+
+declare i32 @dummy_personality(...)
diff --git a/test/CodeGen/X86/trunc-store.ll b/test/CodeGen/X86/trunc-store.ll
new file mode 100644
index 000000000000..646b4b2c336d
--- /dev/null
+++ b/test/CodeGen/X86/trunc-store.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; With optimization at O2 we actually get the legalized function optimized
+; away through legalization and stack coloring, but check that we do all of
+; that here and don't crash during legalization.
+
+; Original program:
+; typedef enum { A, B, C, D } P;
+; struct { P x[2]; } a;
+
+; void fn2();
+; void fn1() {
+; int b;
+; unsigned c;
+; for (;; c++) {
+; fn2();
+; unsigned n;
+; for (; c; c++) {
+; b = a.x[c] == A || a.x[c] == B || a.x[c] == D;
+; if (b) n++;
+; }
+; if (n)
+; for (;;)
+; ;
+; }
+; }
+
+define void @fn1() {
+; CHECK-LABEL: fn1
+; CHECK: movb $0, {{.*}}(%rsp)
+; CHECK: cmpq $8, %rax
+for.cond:
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %for.cond
+ %x42 = bitcast <4 x i4> zeroinitializer to i16
+ %x43 = icmp ne i16 %x42, 0
+ %x44 = select i1 %x43, i32 undef, i32 0
+ %x72 = bitcast <4 x i1> zeroinitializer to i4
+ %x73 = icmp ne i4 %x72, 0
+ %x74 = select i1 %x73, i32 %x44, i32 undef
+ %x84 = select i1 undef, i32 undef, i32 %x74
+ %x88 = icmp eq i64 undef, 8
+ br i1 %x88, label %middle.block, label %vector.body
+
+middle.block: ; preds = %vector.body
+ %0 = select i1 undef, i32 undef, i32 %x84
+ ret void
+}
diff --git a/test/CodeGen/X86/unaligned-32-byte-memops.ll b/test/CodeGen/X86/unaligned-32-byte-memops.ll
index d979c16f4abd..b9deb058cb3f 100644
--- a/test/CodeGen/X86/unaligned-32-byte-memops.ll
+++ b/test/CodeGen/X86/unaligned-32-byte-memops.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,+slow-unaligned-mem-32 | FileCheck %s --check-prefix=AVXSLOW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,-slow-unaligned-mem-32 | FileCheck %s --check-prefix=AVXFAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX2
@@ -75,12 +76,12 @@ define <8 x float> @combine_16_byte_loads_no_intrinsic(<4 x float>* %ptr) {
ret <8 x float> %v3
}
+; If the first load is 32-byte aligned, then the loads should be merged in all cases.
+
define <8 x float> @combine_16_byte_loads_aligned(<4 x float>* %ptr) {
-;; FIXME: The first load is 32-byte aligned, so the second load should get merged.
; AVXSLOW-LABEL: combine_16_byte_loads_aligned:
; AVXSLOW: # BB#0:
-; AVXSLOW-NEXT: vmovaps 48(%rdi), %xmm0
-; AVXSLOW-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0
+; AVXSLOW-NEXT: vmovaps 48(%rdi), %ymm0
; AVXSLOW-NEXT: retq
;
; AVXFAST-LABEL: combine_16_byte_loads_aligned:
diff --git a/test/CodeGen/X86/unaligned-spill-folding.ll b/test/CodeGen/X86/unaligned-spill-folding.ll
index 33e2daf9dc1b..dee94bce15a5 100644
--- a/test/CodeGen/X86/unaligned-spill-folding.ll
+++ b/test/CodeGen/X86/unaligned-spill-folding.ll
@@ -1,6 +1,6 @@
; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=4 -relocation-model=pic < %s | FileCheck %s -check-prefix=UNALIGNED
; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=16 -relocation-model=pic < %s | FileCheck %s -check-prefix=ALIGNED
-; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=4 -force-align-stack -relocation-model=pic < %s | FileCheck %s -check-prefix=FORCEALIGNED
+; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=4 -stackrealign -relocation-model=pic < %s | FileCheck %s -check-prefix=FORCEALIGNED
@arr = internal unnamed_addr global [32 x i32] zeroinitializer, align 16
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index c018a49d135e..c41e529aa954 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -10,7 +10,7 @@
; CHECK-NEXT: idivl
; CHECK-NEXT: .loc 1 4 3
-define i32 @foo(i32 %w, i32 %x, i32 %y, i32 %z) nounwind {
+define i32 @foo(i32 %w, i32 %x, i32 %y, i32 %z) nounwind !dbg !1 {
entry:
%a = add i32 %w, %x, !dbg !8
%b = sdiv i32 %a, %y
@@ -21,10 +21,10 @@ entry:
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!12}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !10, scope: !2, type: !4, function: i32 (i32, i32, i32, i32)* @foo)
+!0 = !DILocalVariable(name: "x", line: 1, arg: 2, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !10, scope: !2, type: !4)
!2 = !DIFile(filename: "test.c", directory: "/dir")
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "producer", isOptimized: false, emissionKind: 0, file: !10, enums: !11, retainedTypes: !11, subprograms: !9)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "producer", isOptimized: false, emissionKind: 0, file: !10, enums: !11, retainedTypes: !11, subprograms: !9)
!4 = !DISubroutineType(types: !5)
!5 = !{!6}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll
index 7beed52295ee..3b7160c71869 100644
--- a/test/CodeGen/X86/v2f32.ll
+++ b/test/CodeGen/X86/v2f32.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -o - | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mcpu=yonah -march=x86 -mtriple=i386-linux-gnu -o - | FileCheck %s --check-prefix=X32
diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll
index 1ba11f51baa2..dda50b7b94b7 100644
--- a/test/CodeGen/X86/vec_cast2.ll
+++ b/test/CodeGen/X86/vec_cast2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
@@ -46,17 +47,19 @@ define <4 x float> @foo1_4(<4 x i8> %src) {
define <8 x float> @foo2_8(<8 x i8> %src) {
; CHECK-LABEL: foo2_8:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; CHECK-NEXT: vandps LCPI2_0, %ymm0, %ymm0
+; CHECK-NEXT: vpand LCPI2_0, %xmm0, %xmm0
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
; CHECK-NEXT: retl
;
; CHECK-WIDE-LABEL: foo2_8:
; CHECK-WIDE: ## BB#0:
; CHECK-WIDE-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-WIDE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; CHECK-WIDE-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; CHECK-WIDE-NEXT: vcvtdq2ps %ymm0, %ymm0
; CHECK-WIDE-NEXT: retl
@@ -96,25 +99,25 @@ define <8 x i8> @foo3_8(<8 x float> %src) {
; CHECK-WIDE: ## BB#0:
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
-; CHECK-WIDE-NEXT: vmovshdup %xmm0, %xmm2 ## xmm2 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0]
+; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps $231, %xmm0, %xmm2 ## xmm2 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
; CHECK-WIDE-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vmovshdup %xmm0, %xmm2 ## xmm2 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
; CHECK-WIDE-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0]
+; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
; CHECK-WIDE-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps $231, %xmm0, %xmm0 ## xmm0 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
; CHECK-WIDE-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
; CHECK-WIDE-NEXT: vzeroupper
@@ -133,13 +136,13 @@ define <4 x i8> @foo3_4(<4 x float> %src) {
; CHECK-WIDE: ## BB#0:
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
-; CHECK-WIDE-NEXT: vmovshdup %xmm0, %xmm2 ## xmm2 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0]
+; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps $231, %xmm0, %xmm0 ## xmm0 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm0
; CHECK-WIDE-NEXT: retl
diff --git a/test/CodeGen/X86/vec_cmp_sint-128.ll b/test/CodeGen/X86/vec_cmp_sint-128.ll
new file mode 100644
index 000000000000..1407f71de714
--- /dev/null
+++ b/test/CodeGen/X86/vec_cmp_sint-128.ll
@@ -0,0 +1,722 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Equal
+;
+
+define <2 x i64> @eq_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: eq_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: eq_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: eq_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: pcmpeqq %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: eq_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: eq_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomeqq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp eq <2 x i64> %a, %b
+ %2 = sext <2 x i1> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <4 x i32> @eq_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: eq_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: eq_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: eq_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomeqd %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp eq <4 x i32> %a, %b
+ %2 = sext <4 x i1> %1 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @eq_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: eq_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: eq_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: eq_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomeqw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp eq <8 x i16> %a, %b
+ %2 = sext <8 x i1> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @eq_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: eq_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpeqb %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: eq_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: eq_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomeqb %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp eq <16 x i8> %a, %b
+ %2 = sext <16 x i1> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
+
+;
+; Not Equal
+;
+
+define <2 x i64> @ne_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: ne_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: ne_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: ne_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: pcmpeqq %xmm1, %xmm0
+; SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE42-NEXT: pxor %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: ne_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ne_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomneqq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ne <2 x i64> %a, %b
+ %2 = sext <2 x i1> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <4 x i32> @ne_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: ne_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ne_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ne_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomneqd %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ne <4 x i32> %a, %b
+ %2 = sext <4 x i1> %1 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @ne_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: ne_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ne_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ne_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomneqw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ne <8 x i16> %a, %b
+ %2 = sext <8 x i1> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @ne_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: ne_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpeqb %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ne_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ne_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomneqb %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ne <16 x i8> %a, %b
+ %2 = sext <16 x i1> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
+
+;
+; Greater Than Or Equal
+;
+
+define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: ge_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: ge_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pxor %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm2
+; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE41-NEXT: pand %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: ge_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: pcmpgtq %xmm0, %xmm1
+; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE42-NEXT: pxor %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: ge_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ge_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgeq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp sge <2 x i64> %a, %b
+ %2 = sext <2 x i1> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <4 x i32> @ge_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: ge_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ge_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ge_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomged %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp sge <4 x i32> %a, %b
+ %2 = sext <4 x i1> %1 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @ge_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: ge_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpgtw %xmm0, %xmm1
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ge_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ge_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgew %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp sge <8 x i16> %a, %b
+ %2 = sext <8 x i1> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @ge_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: ge_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpgtb %xmm0, %xmm1
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ge_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ge_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgeb %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp sge <16 x i8> %a, %b
+ %2 = sext <16 x i1> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
+
+;
+; Greater Than
+;
+
+define <2 x i64> @gt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: gt_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm3, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: gt_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE41-NEXT: pxor %xmm2, %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm3, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: gt_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: gt_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: gt_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgtq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp sgt <2 x i64> %a, %b
+ %2 = sext <2 x i1> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <4 x i32> @gt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: gt_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: gt_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: gt_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgtd %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp sgt <4 x i32> %a, %b
+ %2 = sext <4 x i1> %1 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @gt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: gt_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpgtw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: gt_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: gt_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgtw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp sgt <8 x i16> %a, %b
+ %2 = sext <8 x i1> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @gt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: gt_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpgtb %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: gt_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: gt_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgtb %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp sgt <16 x i8> %a, %b
+ %2 = sext <16 x i1> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
+
+;
+; Less Than Or Equal
+;
+
+define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: le_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: le_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE41-NEXT: pxor %xmm2, %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: le_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE42-NEXT: pxor %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: le_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: le_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomleq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp sle <2 x i64> %a, %b
+ %2 = sext <2 x i1> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <4 x i32> @le_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: le_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: le_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: le_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomled %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp sle <4 x i32> %a, %b
+ %2 = sext <4 x i1> %1 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @le_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: le_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpgtw %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: le_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: le_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomlew %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp sle <8 x i16> %a, %b
+ %2 = sext <8 x i1> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @le_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: le_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpgtb %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: le_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: le_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomleb %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp sle <16 x i8> %a, %b
+ %2 = sext <16 x i1> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
+
+;
+; Less Than
+;
+
+define <2 x i64> @lt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: lt_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: pand %xmm3, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: lt_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pxor %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm2
+; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT: pand %xmm3, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: lt_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: pcmpgtq %xmm0, %xmm1
+; SSE42-NEXT: movdqa %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: lt_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: lt_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomltq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp slt <2 x i64> %a, %b
+ %2 = sext <2 x i1> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <4 x i32> @lt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: lt_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: lt_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: lt_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomltd %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp slt <4 x i32> %a, %b
+ %2 = sext <4 x i1> %1 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @lt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: lt_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpgtw %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: lt_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: lt_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomltw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp slt <8 x i16> %a, %b
+ %2 = sext <8 x i1> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @lt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: lt_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpgtb %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: lt_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: lt_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomltb %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp slt <16 x i8> %a, %b
+ %2 = sext <16 x i1> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
diff --git a/test/CodeGen/X86/vec_cmp_uint-128.ll b/test/CodeGen/X86/vec_cmp_uint-128.ll
new file mode 100644
index 000000000000..8bed14e7e5f5
--- /dev/null
+++ b/test/CodeGen/X86/vec_cmp_uint-128.ll
@@ -0,0 +1,860 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Equal
+;
+
+define <2 x i64> @eq_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: eq_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: eq_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: eq_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: pcmpeqq %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: eq_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: eq_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomeqq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp eq <2 x i64> %a, %b
+ %2 = sext <2 x i1> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <4 x i32> @eq_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: eq_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: eq_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: eq_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomeqd %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp eq <4 x i32> %a, %b
+ %2 = sext <4 x i1> %1 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @eq_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: eq_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: eq_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: eq_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomeqw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp eq <8 x i16> %a, %b
+ %2 = sext <8 x i1> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @eq_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: eq_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpeqb %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: eq_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: eq_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomeqb %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp eq <16 x i8> %a, %b
+ %2 = sext <16 x i1> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
+
+;
+; Not Equal
+;
+
+define <2 x i64> @ne_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: ne_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: ne_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: ne_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: pcmpeqq %xmm1, %xmm0
+; SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE42-NEXT: pxor %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: ne_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ne_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomneqq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ne <2 x i64> %a, %b
+ %2 = sext <2 x i1> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <4 x i32> @ne_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: ne_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ne_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ne_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomneqd %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ne <4 x i32> %a, %b
+ %2 = sext <4 x i1> %1 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @ne_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: ne_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ne_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ne_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomneqw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ne <8 x i16> %a, %b
+ %2 = sext <8 x i1> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @ne_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: ne_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pcmpeqb %xmm1, %xmm0
+; SSE-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ne_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ne_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomneqb %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ne <16 x i8> %a, %b
+ %2 = sext <16 x i1> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
+
+;
+; Greater Than Or Equal
+;
+
+define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: ge_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: ge_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pxor %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm2
+; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE41-NEXT: pand %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: ge_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: pxor %xmm2, %xmm0
+; SSE42-NEXT: pxor %xmm1, %xmm2
+; SSE42-NEXT: pcmpgtq %xmm0, %xmm2
+; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE42-NEXT: pxor %xmm2, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: ge_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ge_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgeuq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp uge <2 x i64> %a, %b
+ %2 = sext <2 x i1> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <4 x i32> @ge_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE2-LABEL: ge_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: ge_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxud %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: ge_v4i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxud %xmm0, %xmm1
+; SSE42-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: ge_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ge_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgeud %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp uge <4 x i32> %a, %b
+ %2 = sext <4 x i1> %1 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @ge_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE2-LABEL: ge_v8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: psubusw %xmm0, %xmm1
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: ge_v8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxuw %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: ge_v8i16:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxuw %xmm0, %xmm1
+; SSE42-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: ge_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ge_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgeuw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp uge <8 x i16> %a, %b
+ %2 = sext <8 x i1> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @ge_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: ge_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pmaxub %xmm0, %xmm1
+; SSE-NEXT: pcmpeqb %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ge_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: ge_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgeub %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp uge <16 x i8> %a, %b
+ %2 = sext <16 x i1> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
+
+;
+; Greater Than
+;
+
+define <2 x i64> @gt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: gt_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm3, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: gt_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT: pxor %xmm2, %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm3, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: gt_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: pxor %xmm2, %xmm1
+; SSE42-NEXT: pxor %xmm2, %xmm0
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: gt_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: gt_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgtuq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ugt <2 x i64> %a, %b
+ %2 = sext <2 x i1> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <4 x i32> @gt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: gt_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: pxor %xmm2, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: gt_v4i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: gt_v4i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOP-LABEL: gt_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgtud %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: gt_v4i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX512-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+ %1 = icmp ugt <4 x i32> %a, %b
+ %2 = sext <4 x i1> %1 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @gt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: gt_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT: pxor %xmm2, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pcmpgtw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: gt_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: gt_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgtuw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ugt <8 x i16> %a, %b
+ %2 = sext <8 x i1> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @gt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: gt_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; SSE-NEXT: pxor %xmm2, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pcmpgtb %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: gt_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: gt_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomgtub %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ugt <16 x i8> %a, %b
+ %2 = sext <16 x i1> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
+
+;
+; Less Than Or Equal
+;
+
+define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: le_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: le_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT: pxor %xmm2, %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: le_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: pxor %xmm2, %xmm1
+; SSE42-NEXT: pxor %xmm2, %xmm0
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE42-NEXT: pxor %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: le_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: le_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomleuq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ule <2 x i64> %a, %b
+ %2 = sext <2 x i1> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <4 x i32> @le_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE2-LABEL: le_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: le_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminud %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: le_v4i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminud %xmm0, %xmm1
+; SSE42-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: le_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: le_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomleud %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ule <4 x i32> %a, %b
+ %2 = sext <4 x i1> %1 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @le_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE2-LABEL: le_v8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: psubusw %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: le_v8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminuw %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: le_v8i16:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminuw %xmm0, %xmm1
+; SSE42-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: le_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: le_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomleuw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ule <8 x i16> %a, %b
+ %2 = sext <8 x i1> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @le_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: le_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pminub %xmm0, %xmm1
+; SSE-NEXT: pcmpeqb %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: le_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: le_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomleub %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ule <16 x i8> %a, %b
+ %2 = sext <16 x i1> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
+
+;
+; Less Than
+;
+
+define <2 x i64> @lt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: lt_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: pand %xmm3, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: lt_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: pxor %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm2
+; SSE41-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT: pand %xmm3, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: lt_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: pxor %xmm2, %xmm0
+; SSE42-NEXT: pxor %xmm1, %xmm2
+; SSE42-NEXT: pcmpgtq %xmm0, %xmm2
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: lt_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: lt_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ult <2 x i64> %a, %b
+ %2 = sext <2 x i1> %1 to <2 x i64>
+ ret <2 x i64> %2
+}
+
+define <4 x i32> @lt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: lt_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm2
+; SSE-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: lt_v4i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: lt_v4i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; XOP-LABEL: lt_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomltud %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: lt_v4i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX512-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: retq
+ %1 = icmp ult <4 x i32> %a, %b
+ %2 = sext <4 x i1> %1 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @lt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: lt_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm2
+; SSE-NEXT: pcmpgtw %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: lt_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: lt_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ult <8 x i16> %a, %b
+ %2 = sext <8 x i1> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @lt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: lt_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm2
+; SSE-NEXT: pcmpgtb %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: lt_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: lt_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpcomltub %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+ %1 = icmp ult <16 x i8> %a, %b
+ %2 = sext <16 x i1> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
diff --git a/test/CodeGen/X86/vec_ctbits.ll b/test/CodeGen/X86/vec_ctbits.ll
index 318aca1d54cb..66114bc9c6bc 100644
--- a/test/CodeGen/X86/vec_ctbits.ll
+++ b/test/CodeGen/X86/vec_ctbits.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s
declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
@@ -5,25 +6,63 @@ declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
define <2 x i64> @footz(<2 x i64> %a) nounwind {
+; CHECK-LABEL: footz:
+; CHECK: # BB#0:
+; CHECK-NEXT: movd %xmm0, %rax
+; CHECK-NEXT: bsfq %rax, %rax
+; CHECK-NEXT: movd %rax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movd %xmm0, %rax
+; CHECK-NEXT: bsfq %rax, %rax
+; CHECK-NEXT: movd %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
%c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
ret <2 x i64> %c
-; CHECK-LABEL: footz
-; CHECK: bsfq
-; CHECK: bsfq
}
define <2 x i64> @foolz(<2 x i64> %a) nounwind {
+; CHECK-LABEL: foolz:
+; CHECK: # BB#0:
+; CHECK-NEXT: movd %xmm0, %rax
+; CHECK-NEXT: bsrq %rax, %rax
+; CHECK-NEXT: xorq $63, %rax
+; CHECK-NEXT: movd %rax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movd %xmm0, %rax
+; CHECK-NEXT: bsrq %rax, %rax
+; CHECK-NEXT: xorq $63, %rax
+; CHECK-NEXT: movd %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
%c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
ret <2 x i64> %c
-; CHECK-LABEL: foolz
-; CHECK: bsrq
-; CHECK: xorq $63
-; CHECK: bsrq
-; CHECK: xorq $63
}
define <2 x i64> @foopop(<2 x i64> %a) nounwind {
+; CHECK-LABEL: foopop:
+; CHECK: # BB#0:
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlq $1, %xmm1
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm1
+; CHECK-NEXT: psubq %xmm1, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323]
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm1, %xmm2
+; CHECK-NEXT: psrlq $2, %xmm0
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: paddq %xmm2, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlq $4, %xmm1
+; CHECK-NEXT: paddq %xmm0, %xmm1
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm1
+; CHECK-NEXT: pxor %xmm0, %xmm0
+; CHECK-NEXT: psadbw %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
%c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
ret <2 x i64> %c
}
@@ -33,35 +72,73 @@ declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
define <2 x i32> @promtz(<2 x i32> %a) nounwind {
+; CHECK-LABEL: promtz:
+; CHECK: # BB#0:
+; CHECK-NEXT: por {{.*}}(%rip), %xmm0
+; CHECK-NEXT: movd %xmm0, %rax
+; CHECK-NEXT: bsfq %rax, %rax
+; CHECK-NEXT: movl $64, %ecx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: movd %rax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movd %xmm0, %rax
+; CHECK-NEXT: bsfq %rax, %rax
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: movd %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
%c = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
ret <2 x i32> %c
-; CHECK: .quad 4294967296
-; CHECK: .quad 4294967296
-; CHECK-LABEL: promtz
-; CHECK: bsfq
-; CHECK: cmov
-; CHECK: bsfq
-; CHECK: cmov
}
define <2 x i32> @promlz(<2 x i32> %a) nounwind {
+; CHECK-LABEL: promlz:
+; CHECK: # BB#0:
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT: movd %xmm0, %rax
+; CHECK-NEXT: bsrq %rax, %rax
+; CHECK-NEXT: movl $127, %ecx
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: xorq $63, %rax
+; CHECK-NEXT: movd %rax, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: movd %xmm0, %rax
+; CHECK-NEXT: bsrq %rax, %rax
+; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: xorq $63, %rax
+; CHECK-NEXT: movd %rax, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT: psubq {{.*}}(%rip), %xmm1
+; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
%c = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
ret <2 x i32> %c
-; CHECK: .quad 4294967295
-; CHECK: .quad 4294967295
-; CHECK: .quad 32
-; CHECK: .quad 32
-; CHECK-LABEL: promlz
-; CHECK: pand
-; CHECK: bsrq
-; CHECK: xorq $63
-; CHECK: bsrq
-; CHECK: xorq $63
-; CHECK: psub
}
define <2 x i32> @prompop(<2 x i32> %a) nounwind {
+; CHECK-LABEL: prompop:
+; CHECK: # BB#0:
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlq $1, %xmm1
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm1
+; CHECK-NEXT: psubq %xmm1, %xmm0
+; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323]
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm1, %xmm2
+; CHECK-NEXT: psrlq $2, %xmm0
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: paddq %xmm2, %xmm0
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlq $4, %xmm1
+; CHECK-NEXT: paddq %xmm0, %xmm1
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm1
+; CHECK-NEXT: pxor %xmm0, %xmm0
+; CHECK-NEXT: psadbw %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
%c = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
ret <2 x i32> %c
}
diff --git a/test/CodeGen/X86/vec_extract-avx.ll b/test/CodeGen/X86/vec_extract-avx.ll
index fbb84170dc83..abb07233d35e 100644
--- a/test/CodeGen/X86/vec_extract-avx.ll
+++ b/test/CodeGen/X86/vec_extract-avx.ll
@@ -1,14 +1,18 @@
-target triple = "x86_64-unknown-unknown"
-
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
; When extracting multiple consecutive elements from a larger
; vector into a smaller one, do it efficiently. We should use
; an EXTRACT_SUBVECTOR node internally rather than a bunch of
-; single element extractions.
+; single element extractions.
; Extracting the low elements only requires using the right kind of store.
define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
+; CHECK-LABEL: low_v8f32_to_v4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovaps %xmm0, (%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%ext0 = extractelement <8 x float> %v, i32 0
%ext1 = extractelement <8 x float> %v, i32 1
%ext2 = extractelement <8 x float> %v, i32 2
@@ -19,15 +23,15 @@ define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
%ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
store <4 x float> %ins3, <4 x float>* %ptr, align 16
ret void
-
-; CHECK-LABEL: low_v8f32_to_v4f32
-; CHECK: vmovaps
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
}
-; Extracting the high elements requires just one AVX instruction.
+; Extracting the high elements requires just one AVX instruction.
define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
+; CHECK-LABEL: high_v8f32_to_v4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%ext0 = extractelement <8 x float> %v, i32 4
%ext1 = extractelement <8 x float> %v, i32 5
%ext2 = extractelement <8 x float> %v, i32 6
@@ -38,17 +42,17 @@ define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
%ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
store <4 x float> %ins3, <4 x float>* %ptr, align 16
ret void
-
-; CHECK-LABEL: high_v8f32_to_v4f32
-; CHECK: vextractf128
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
}
; Make sure element type doesn't alter the codegen. Note that
; if we were actually using the vector in this function and
; have AVX2, we should generate vextracti128 (the int version).
define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
+; CHECK-LABEL: high_v8i32_to_v4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%ext0 = extractelement <8 x i32> %v, i32 4
%ext1 = extractelement <8 x i32> %v, i32 5
%ext2 = extractelement <8 x i32> %v, i32 6
@@ -59,24 +63,86 @@ define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
%ins3 = insertelement <4 x i32> %ins2, i32 %ext3, i32 3
store <4 x i32> %ins3, <4 x i32>* %ptr, align 16
ret void
-
-; CHECK-LABEL: high_v8i32_to_v4i32
-; CHECK: vextractf128
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
}
; Make sure that element size doesn't alter the codegen.
define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
+; CHECK-LABEL: high_v4f64_to_v2f64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, (%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%ext0 = extractelement <4 x double> %v, i32 2
%ext1 = extractelement <4 x double> %v, i32 3
%ins0 = insertelement <2 x double> undef, double %ext0, i32 0
%ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
store <2 x double> %ins1, <2 x double>* %ptr, align 16
ret void
+}
+
+; PR25320 Make sure that a widened (possibly legalized) vector correctly zero-extends upper elements.
+; FIXME - Ideally these should just call VMOVD/VMOVQ/VMOVSS/VMOVSD
+
+define void @legal_vzmovl_2i32_8i32(<2 x i32>* %in, <8 x i32>* %out) {
+; CHECK-LABEL: legal_vzmovl_2i32_8i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; CHECK-NEXT: vmovaps %ymm0, (%rsi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %ld = load <2 x i32>, <2 x i32>* %in, align 8
+ %ext = extractelement <2 x i32> %ld, i64 0
+ %ins = insertelement <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %ext, i64 0
+ store <8 x i32> %ins, <8 x i32>* %out, align 32
+ ret void
+}
+
+define void @legal_vzmovl_2i64_4i64(<2 x i64>* %in, <4 x i64>* %out) {
+; CHECK-LABEL: legal_vzmovl_2i64_4i64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovupd (%rdi), %xmm0
+; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; CHECK-NEXT: vmovapd %ymm0, (%rsi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %ld = load <2 x i64>, <2 x i64>* %in, align 8
+ %ext = extractelement <2 x i64> %ld, i64 0
+ %ins = insertelement <4 x i64> <i64 undef, i64 0, i64 0, i64 0>, i64 %ext, i64 0
+ store <4 x i64> %ins, <4 x i64>* %out, align 32
+ ret void
+}
-; CHECK-LABEL: high_v4f64_to_v2f64
-; CHECK: vextractf128
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+define void @legal_vzmovl_2f32_8f32(<2 x float>* %in, <8 x float>* %out) {
+; CHECK-LABEL: legal_vzmovl_2f32_8f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; CHECK-NEXT: vmovaps %ymm0, (%rsi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %ld = load <2 x float>, <2 x float>* %in, align 8
+ %ext = extractelement <2 x float> %ld, i64 0
+ %ins = insertelement <8 x float> <float undef, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, float %ext, i64 0
+ store <8 x float> %ins, <8 x float>* %out, align 32
+ ret void
+}
+
+define void @legal_vzmovl_2f64_4f64(<2 x double>* %in, <4 x double>* %out) {
+; CHECK-LABEL: legal_vzmovl_2f64_4f64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovupd (%rdi), %xmm0
+; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; CHECK-NEXT: vmovapd %ymm0, (%rsi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %ld = load <2 x double>, <2 x double>* %in, align 8
+ %ext = extractelement <2 x double> %ld, i64 0
+ %ins = insertelement <4 x double> <double undef, double 0.0, double 0.0, double 0.0>, double %ext, i64 0
+ store <4 x double> %ins, <4 x double>* %out, align 32
+ ret void
}
diff --git a/test/CodeGen/X86/vec_fabs.ll b/test/CodeGen/X86/vec_fabs.ll
index 960b5f27cf53..54f33b2bd224 100644
--- a/test/CodeGen/X86/vec_fabs.ll
+++ b/test/CodeGen/X86/vec_fabs.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s
-
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s
define <2 x double> @fabs_v2f64(<2 x double> %p)
{
diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll
index 3b1b2f5c1c77..7834b2804247 100644
--- a/test/CodeGen/X86/vec_fp_to_int.ll
+++ b/test/CodeGen/X86/vec_fp_to_int.ll
@@ -10,19 +10,19 @@
; Double to Signed Integer
;
-define <2 x i64> @fptosi_2vf64(<2 x double> %a) {
-; SSE2-LABEL: fptosi_2vf64:
-; SSE2: # BB#0:
-; SSE2-NEXT: cvttsd2si %xmm0, %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT: cvttsd2si %xmm0, %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: fptosi_2vf64:
+define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
+; SSE-LABEL: fptosi_2f64_to_2i64:
+; SSE: # BB#0:
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptosi_2f64_to_2i64:
; AVX: # BB#0:
; AVX-NEXT: vcvttsd2si %xmm0, %rax
; AVX-NEXT: vmovq %rax, %xmm1
@@ -35,19 +35,19 @@ define <2 x i64> @fptosi_2vf64(<2 x double> %a) {
ret <2 x i64> %cvt
}
-define <4 x i32> @fptosi_2vf64_i32(<2 x double> %a) {
-; SSE2-LABEL: fptosi_2vf64_i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: cvttsd2si %xmm0, %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT: cvttsd2si %xmm0, %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
-; SSE2-NEXT: retq
+define <4 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
+; SSE-LABEL: fptosi_2f64_to_2i32:
+; SSE: # BB#0:
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptosi_2vf64_i32:
+; AVX-LABEL: fptosi_2f64_to_2i32:
; AVX: # BB#0:
; AVX-NEXT: vcvttsd2si %xmm0, %rax
; AVX-NEXT: vmovq %rax, %xmm1
@@ -62,26 +62,53 @@ define <4 x i32> @fptosi_2vf64_i32(<2 x double> %a) {
ret <4 x i32> %ext
}
-define <4 x i64> @fptosi_4vf64(<4 x double> %a) {
-; SSE2-LABEL: fptosi_4vf64:
-; SSE2: # BB#0:
-; SSE2-NEXT: cvttsd2si %xmm0, %rax
-; SSE2-NEXT: movd %rax, %xmm2
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT: cvttsd2si %xmm0, %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
-; SSE2-NEXT: cvttsd2si %xmm1, %rax
-; SSE2-NEXT: movd %rax, %xmm3
-; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
-; SSE2-NEXT: cvttsd2si %xmm1, %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
-; SSE2-NEXT: movdqa %xmm2, %xmm0
-; SSE2-NEXT: movdqa %xmm3, %xmm1
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: fptosi_4vf64:
+define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
+; SSE-LABEL: fptosi_4f64_to_2i32:
+; SSE: # BB#0:
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptosi_4f64_to_2i32:
+; AVX: # BB#0:
+; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ %cvt = fptosi <4 x double> %ext to <4 x i32>
+ ret <4 x i32> %cvt
+}
+
+define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
+; SSE-LABEL: fptosi_4f64_to_4i64:
+; SSE: # BB#0:
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm2
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT: cvttsd2si %xmm1, %rax
+; SSE-NEXT: movd %rax, %xmm3
+; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT: cvttsd2si %xmm1, %rax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: movdqa %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptosi_4f64_to_4i64:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vcvttsd2si %xmm1, %rax
@@ -102,27 +129,27 @@ define <4 x i64> @fptosi_4vf64(<4 x double> %a) {
ret <4 x i64> %cvt
}
-define <4 x i32> @fptosi_4vf64_i32(<4 x double> %a) {
-; SSE2-LABEL: fptosi_4vf64_i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: cvttsd2si %xmm1, %rax
-; SSE2-NEXT: movd %rax, %xmm2
-; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
-; SSE2-NEXT: cvttsd2si %xmm1, %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; SSE2-NEXT: cvttsd2si %xmm0, %rax
-; SSE2-NEXT: movd %rax, %xmm2
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT: cvttsd2si %xmm0, %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: fptosi_4vf64_i32:
+define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
+; SSE-LABEL: fptosi_4f64_to_4i32:
+; SSE: # BB#0:
+; SSE-NEXT: cvttsd2si %xmm1, %rax
+; SSE-NEXT: movd %rax, %xmm2
+; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT: cvttsd2si %xmm1, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm2
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptosi_4f64_to_4i32:
; AVX: # BB#0:
; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0
; AVX-NEXT: vzeroupper
@@ -135,33 +162,33 @@ define <4 x i32> @fptosi_4vf64_i32(<4 x double> %a) {
; Double to Unsigned Integer
;
-define <2 x i64> @fptoui_2vf64(<2 x double> %a) {
-; SSE2-LABEL: fptoui_2vf64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE2-NEXT: movapd %xmm0, %xmm1
-; SSE2-NEXT: subsd %xmm2, %xmm1
-; SSE2-NEXT: cvttsd2si %xmm1, %rax
-; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; SSE2-NEXT: xorq %rcx, %rax
-; SSE2-NEXT: cvttsd2si %xmm0, %rdx
-; SSE2-NEXT: ucomisd %xmm2, %xmm0
-; SSE2-NEXT: cmovaeq %rax, %rdx
-; SSE2-NEXT: movd %rdx, %xmm1
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT: movapd %xmm0, %xmm3
-; SSE2-NEXT: subsd %xmm2, %xmm3
-; SSE2-NEXT: cvttsd2si %xmm3, %rax
-; SSE2-NEXT: xorq %rcx, %rax
-; SSE2-NEXT: cvttsd2si %xmm0, %rcx
-; SSE2-NEXT: ucomisd %xmm2, %xmm0
-; SSE2-NEXT: cmovaeq %rax, %rcx
-; SSE2-NEXT: movd %rcx, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: fptoui_2vf64:
+define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
+; SSE-LABEL: fptoui_2f64_to_2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: subsd %xmm2, %xmm1
+; SSE-NEXT: cvttsd2si %xmm1, %rax
+; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; SSE-NEXT: xorq %rcx, %rax
+; SSE-NEXT: cvttsd2si %xmm0, %rdx
+; SSE-NEXT: ucomisd %xmm2, %xmm0
+; SSE-NEXT: cmovaeq %rax, %rdx
+; SSE-NEXT: movd %rdx, %xmm1
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: movapd %xmm0, %xmm3
+; SSE-NEXT: subsd %xmm2, %xmm3
+; SSE-NEXT: cvttsd2si %xmm3, %rax
+; SSE-NEXT: xorq %rcx, %rax
+; SSE-NEXT: cvttsd2si %xmm0, %rcx
+; SSE-NEXT: ucomisd %xmm2, %xmm0
+; SSE-NEXT: cmovaeq %rax, %rcx
+; SSE-NEXT: movd %rcx, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptoui_2f64_to_2i64:
; AVX: # BB#0:
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
@@ -186,33 +213,33 @@ define <2 x i64> @fptoui_2vf64(<2 x double> %a) {
ret <2 x i64> %cvt
}
-define <4 x i32> @fptoui_2vf64_i32(<2 x double> %a) {
-; SSE2-LABEL: fptoui_2vf64_i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE2-NEXT: movapd %xmm0, %xmm2
-; SSE2-NEXT: subsd %xmm1, %xmm2
-; SSE2-NEXT: cvttsd2si %xmm2, %rax
-; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; SSE2-NEXT: xorq %rcx, %rax
-; SSE2-NEXT: cvttsd2si %xmm0, %rdx
-; SSE2-NEXT: ucomisd %xmm1, %xmm0
-; SSE2-NEXT: cmovaeq %rax, %rdx
-; SSE2-NEXT: movd %rdx, %xmm2
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT: movapd %xmm0, %xmm3
-; SSE2-NEXT: subsd %xmm1, %xmm3
-; SSE2-NEXT: cvttsd2si %xmm3, %rax
-; SSE2-NEXT: xorq %rcx, %rax
-; SSE2-NEXT: cvttsd2si %xmm0, %rcx
-; SSE2-NEXT: ucomisd %xmm1, %xmm0
-; SSE2-NEXT: cmovaeq %rax, %rcx
-; SSE2-NEXT: movd %rcx, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: fptoui_2vf64_i32:
+define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
+; SSE-LABEL: fptoui_2f64_to_2i32:
+; SSE: # BB#0:
+; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE-NEXT: movapd %xmm0, %xmm2
+; SSE-NEXT: subsd %xmm1, %xmm2
+; SSE-NEXT: cvttsd2si %xmm2, %rax
+; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; SSE-NEXT: xorq %rcx, %rax
+; SSE-NEXT: cvttsd2si %xmm0, %rdx
+; SSE-NEXT: ucomisd %xmm1, %xmm0
+; SSE-NEXT: cmovaeq %rax, %rdx
+; SSE-NEXT: movd %rdx, %xmm2
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: movapd %xmm0, %xmm3
+; SSE-NEXT: subsd %xmm1, %xmm3
+; SSE-NEXT: cvttsd2si %xmm3, %rax
+; SSE-NEXT: xorq %rcx, %rax
+; SSE-NEXT: cvttsd2si %xmm0, %rcx
+; SSE-NEXT: ucomisd %xmm1, %xmm0
+; SSE-NEXT: cmovaeq %rax, %rcx
+; SSE-NEXT: movd %rcx, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptoui_2f64_to_2i32:
; AVX: # BB#0:
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
@@ -239,51 +266,101 @@ define <4 x i32> @fptoui_2vf64_i32(<2 x double> %a) {
ret <4 x i32> %ext
}
-define <4 x i64> @fptoui_4vf64(<4 x double> %a) {
-; SSE2-LABEL: fptoui_4vf64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movapd %xmm0, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
-; SSE2-NEXT: subsd %xmm3, %xmm0
-; SSE2-NEXT: cvttsd2si %xmm0, %rcx
-; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; SSE2-NEXT: xorq %rax, %rcx
-; SSE2-NEXT: cvttsd2si %xmm2, %rdx
-; SSE2-NEXT: ucomisd %xmm3, %xmm2
-; SSE2-NEXT: cmovaeq %rcx, %rdx
-; SSE2-NEXT: movd %rdx, %xmm0
-; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0]
-; SSE2-NEXT: movapd %xmm2, %xmm4
-; SSE2-NEXT: subsd %xmm3, %xmm4
-; SSE2-NEXT: cvttsd2si %xmm4, %rcx
-; SSE2-NEXT: xorq %rax, %rcx
-; SSE2-NEXT: cvttsd2si %xmm2, %rdx
-; SSE2-NEXT: ucomisd %xmm3, %xmm2
-; SSE2-NEXT: cmovaeq %rcx, %rdx
-; SSE2-NEXT: movd %rdx, %xmm2
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE2-NEXT: movapd %xmm1, %xmm2
-; SSE2-NEXT: subsd %xmm3, %xmm2
-; SSE2-NEXT: cvttsd2si %xmm2, %rcx
-; SSE2-NEXT: xorq %rax, %rcx
-; SSE2-NEXT: cvttsd2si %xmm1, %rdx
-; SSE2-NEXT: ucomisd %xmm3, %xmm1
-; SSE2-NEXT: cmovaeq %rcx, %rdx
-; SSE2-NEXT: movd %rdx, %xmm2
-; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
-; SSE2-NEXT: movapd %xmm1, %xmm4
-; SSE2-NEXT: subsd %xmm3, %xmm4
-; SSE2-NEXT: cvttsd2si %xmm4, %rcx
-; SSE2-NEXT: xorq %rax, %rcx
-; SSE2-NEXT: cvttsd2si %xmm1, %rax
-; SSE2-NEXT: ucomisd %xmm3, %xmm1
-; SSE2-NEXT: cmovaeq %rcx, %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSE2-NEXT: movdqa %xmm2, %xmm1
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: fptoui_4vf64:
+define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
+; SSE-LABEL: fptoui_4f64_to_2i32:
+; SSE: # BB#0:
+; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE-NEXT: movapd %xmm0, %xmm2
+; SSE-NEXT: subsd %xmm1, %xmm2
+; SSE-NEXT: cvttsd2si %xmm2, %rax
+; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; SSE-NEXT: xorq %rcx, %rax
+; SSE-NEXT: cvttsd2si %xmm0, %rdx
+; SSE-NEXT: ucomisd %xmm1, %xmm0
+; SSE-NEXT: cmovaeq %rax, %rdx
+; SSE-NEXT: movd %rdx, %xmm2
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: movapd %xmm0, %xmm3
+; SSE-NEXT: subsd %xmm1, %xmm3
+; SSE-NEXT: cvttsd2si %xmm3, %rax
+; SSE-NEXT: xorq %rcx, %rax
+; SSE-NEXT: cvttsd2si %xmm0, %rdx
+; SSE-NEXT: ucomisd %xmm1, %xmm0
+; SSE-NEXT: cmovaeq %rax, %rdx
+; SSE-NEXT: movd %rdx, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: ucomisd %xmm1, %xmm0
+; SSE-NEXT: cmovbq %rax, %rcx
+; SSE-NEXT: movd %rcx, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptoui_4f64_to_2i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-NEXT: vcvttsd2si %xmm1, %rax
+; AVX-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX-NEXT: vmovd %ecx, %xmm0
+; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0
+; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ %cvt = fptoui <4 x double> %ext to <4 x i32>
+ ret <4 x i32> %cvt
+}
+
+define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) {
+; SSE-LABEL: fptoui_4f64_to_4i64:
+; SSE: # BB#0:
+; SSE-NEXT: movapd %xmm0, %xmm2
+; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
+; SSE-NEXT: subsd %xmm3, %xmm0
+; SSE-NEXT: cvttsd2si %xmm0, %rcx
+; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttsd2si %xmm2, %rdx
+; SSE-NEXT: ucomisd %xmm3, %xmm2
+; SSE-NEXT: cmovaeq %rcx, %rdx
+; SSE-NEXT: movd %rdx, %xmm0
+; SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0]
+; SSE-NEXT: movapd %xmm2, %xmm4
+; SSE-NEXT: subsd %xmm3, %xmm4
+; SSE-NEXT: cvttsd2si %xmm4, %rcx
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttsd2si %xmm2, %rdx
+; SSE-NEXT: ucomisd %xmm3, %xmm2
+; SSE-NEXT: cmovaeq %rcx, %rdx
+; SSE-NEXT: movd %rdx, %xmm2
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE-NEXT: movapd %xmm1, %xmm2
+; SSE-NEXT: subsd %xmm3, %xmm2
+; SSE-NEXT: cvttsd2si %xmm2, %rcx
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttsd2si %xmm1, %rdx
+; SSE-NEXT: ucomisd %xmm3, %xmm1
+; SSE-NEXT: cmovaeq %rcx, %rdx
+; SSE-NEXT: movd %rdx, %xmm2
+; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT: movapd %xmm1, %xmm4
+; SSE-NEXT: subsd %xmm3, %xmm4
+; SSE-NEXT: cvttsd2si %xmm4, %rcx
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttsd2si %xmm1, %rax
+; SSE-NEXT: ucomisd %xmm3, %xmm1
+; SSE-NEXT: cmovaeq %rcx, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT: movdqa %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptoui_4f64_to_4i64:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
@@ -326,53 +403,53 @@ define <4 x i64> @fptoui_4vf64(<4 x double> %a) {
ret <4 x i64> %cvt
}
-define <4 x i32> @fptoui_4vf64_i32(<4 x double> %a) {
-; SSE2-LABEL: fptoui_4vf64_i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE2-NEXT: movapd %xmm1, %xmm3
-; SSE2-NEXT: subsd %xmm2, %xmm3
-; SSE2-NEXT: cvttsd2si %xmm3, %rcx
-; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; SSE2-NEXT: xorq %rax, %rcx
-; SSE2-NEXT: cvttsd2si %xmm1, %rdx
-; SSE2-NEXT: ucomisd %xmm2, %xmm1
-; SSE2-NEXT: cmovaeq %rcx, %rdx
-; SSE2-NEXT: movd %rdx, %xmm3
-; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
-; SSE2-NEXT: movapd %xmm1, %xmm4
-; SSE2-NEXT: subsd %xmm2, %xmm4
-; SSE2-NEXT: cvttsd2si %xmm4, %rcx
-; SSE2-NEXT: xorq %rax, %rcx
-; SSE2-NEXT: cvttsd2si %xmm1, %rdx
-; SSE2-NEXT: ucomisd %xmm2, %xmm1
-; SSE2-NEXT: cmovaeq %rcx, %rdx
-; SSE2-NEXT: movd %rdx, %xmm1
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
-; SSE2-NEXT: movapd %xmm0, %xmm3
-; SSE2-NEXT: subsd %xmm2, %xmm3
-; SSE2-NEXT: cvttsd2si %xmm3, %rcx
-; SSE2-NEXT: xorq %rax, %rcx
-; SSE2-NEXT: cvttsd2si %xmm0, %rdx
-; SSE2-NEXT: ucomisd %xmm2, %xmm0
-; SSE2-NEXT: cmovaeq %rcx, %rdx
-; SSE2-NEXT: movd %rdx, %xmm3
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT: movapd %xmm0, %xmm4
-; SSE2-NEXT: subsd %xmm2, %xmm4
-; SSE2-NEXT: cvttsd2si %xmm4, %rcx
-; SSE2-NEXT: xorq %rax, %rcx
-; SSE2-NEXT: cvttsd2si %xmm0, %rax
-; SSE2-NEXT: ucomisd %xmm2, %xmm0
-; SSE2-NEXT: cmovaeq %rcx, %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: fptoui_4vf64_i32:
+define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
+; SSE-LABEL: fptoui_4f64_to_4i32:
+; SSE: # BB#0:
+; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-NEXT: movapd %xmm1, %xmm3
+; SSE-NEXT: subsd %xmm2, %xmm3
+; SSE-NEXT: cvttsd2si %xmm3, %rcx
+; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttsd2si %xmm1, %rdx
+; SSE-NEXT: ucomisd %xmm2, %xmm1
+; SSE-NEXT: cmovaeq %rcx, %rdx
+; SSE-NEXT: movd %rdx, %xmm3
+; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT: movapd %xmm1, %xmm4
+; SSE-NEXT: subsd %xmm2, %xmm4
+; SSE-NEXT: cvttsd2si %xmm4, %rcx
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttsd2si %xmm1, %rdx
+; SSE-NEXT: ucomisd %xmm2, %xmm1
+; SSE-NEXT: cmovaeq %rcx, %rdx
+; SSE-NEXT: movd %rdx, %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; SSE-NEXT: movapd %xmm0, %xmm3
+; SSE-NEXT: subsd %xmm2, %xmm3
+; SSE-NEXT: cvttsd2si %xmm3, %rcx
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttsd2si %xmm0, %rdx
+; SSE-NEXT: ucomisd %xmm2, %xmm0
+; SSE-NEXT: cmovaeq %rcx, %rdx
+; SSE-NEXT: movd %rdx, %xmm3
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: movapd %xmm0, %xmm4
+; SSE-NEXT: subsd %xmm2, %xmm4
+; SSE-NEXT: cvttsd2si %xmm4, %rcx
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: ucomisd %xmm2, %xmm0
+; SSE-NEXT: cmovaeq %rcx, %rax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptoui_4f64_to_4i32:
; AVX: # BB#0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vcvttsd2si %xmm1, %rax
@@ -395,13 +472,13 @@ define <4 x i32> @fptoui_4vf64_i32(<4 x double> %a) {
; Float to Signed Integer
;
-define <4 x i32> @fptosi_4vf32(<4 x float> %a) {
-; SSE2-LABEL: fptosi_4vf32:
-; SSE2: # BB#0:
-; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE2-NEXT: retq
+define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
+; SSE-LABEL: fptosi_4f32_to_4i32:
+; SSE: # BB#0:
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptosi_4vf32:
+; AVX-LABEL: fptosi_4f32_to_4i32:
; AVX: # BB#0:
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: retq
@@ -409,19 +486,19 @@ define <4 x i32> @fptosi_4vf32(<4 x float> %a) {
ret <4 x i32> %cvt
}
-define <2 x i64> @fptosi_4vf32_i64(<4 x float> %a) {
-; SSE2-LABEL: fptosi_4vf32_i64:
-; SSE2: # BB#0:
-; SSE2-NEXT: cvttss2si %xmm0, %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: cvttss2si %xmm0, %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: retq
+define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
+; SSE-LABEL: fptosi_2f32_to_2i64:
+; SSE: # BB#0:
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptosi_4vf32_i64:
+; AVX-LABEL: fptosi_2f32_to_2i64:
; AVX: # BB#0:
; AVX-NEXT: vcvttss2si %xmm0, %rax
; AVX-NEXT: vmovq %rax, %xmm1
@@ -430,19 +507,45 @@ define <2 x i64> @fptosi_4vf32_i64(<4 x float> %a) {
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX-NEXT: retq
- %shuf = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
+ %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%cvt = fptosi <2 x float> %shuf to <2 x i64>
ret <2 x i64> %cvt
}
-define <8 x i32> @fptosi_8vf32(<8 x float> %a) {
-; SSE2-LABEL: fptosi_8vf32:
-; SSE2: # BB#0:
-; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
-; SSE2-NEXT: retq
+define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) {
+; SSE-LABEL: fptosi_4f32_to_2i64:
+; SSE: # BB#0:
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptosi_4f32_to_2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-NEXT: vcvttss2si %xmm1, %rax
+; AVX-NEXT: vcvttss2si %xmm0, %rcx
+; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+ %cvt = fptosi <4 x float> %a to <4 x i64>
+ %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x i64> %shuf
+}
+
+define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) {
+; SSE-LABEL: fptosi_8f32_to_8i32:
+; SSE: # BB#0:
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: cvttps2dq %xmm1, %xmm1
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptosi_8vf32:
+; AVX-LABEL: fptosi_8f32_to_8i32:
; AVX: # BB#0:
; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
; AVX-NEXT: retq
@@ -450,28 +553,28 @@ define <8 x i32> @fptosi_8vf32(<8 x float> %a) {
ret <8 x i32> %cvt
}
-define <4 x i64> @fptosi_8vf32_i64(<8 x float> %a) {
-; SSE2-LABEL: fptosi_8vf32_i64:
-; SSE2: # BB#0:
-; SSE2-NEXT: cvttss2si %xmm0, %rax
-; SSE2-NEXT: movd %rax, %xmm2
-; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
-; SSE2-NEXT: cvttss2si %xmm1, %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
-; SSE2-NEXT: cvttss2si %xmm1, %rax
-; SSE2-NEXT: movd %rax, %xmm3
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT: cvttss2si %xmm0, %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; SSE2-NEXT: movdqa %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: fptosi_8vf32_i64:
+define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
+; SSE-LABEL: fptosi_4f32_to_4i64:
+; SSE: # BB#0:
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm2
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE-NEXT: cvttss2si %xmm1, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE-NEXT: cvttss2si %xmm1, %rax
+; SSE-NEXT: movd %rax, %xmm3
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptosi_4f32_to_4i64:
; AVX: # BB#0:
; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
; AVX-NEXT: vcvttss2si %xmm1, %rax
@@ -488,38 +591,81 @@ define <4 x i64> @fptosi_8vf32_i64(<8 x float> %a) {
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
- %shuf = shufflevector <8 x float> %a, <8 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%cvt = fptosi <4 x float> %shuf to <4 x i64>
ret <4 x i64> %cvt
}
+define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
+; SSE-LABEL: fptosi_8f32_to_4i64:
+; SSE: # BB#0:
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm2
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE-NEXT: cvttss2si %xmm1, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE-NEXT: cvttss2si %xmm1, %rax
+; SSE-NEXT: movd %rax, %xmm3
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptosi_8f32_to_4i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; AVX-NEXT: vcvttss2si %xmm1, %rax
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX-NEXT: vcvttss2si %xmm2, %rax
+; AVX-NEXT: vmovq %rax, %xmm2
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT: vcvttss2si %xmm0, %rax
+; AVX-NEXT: vmovq %rax, %xmm2
+; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT: vcvttss2si %xmm0, %rax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %cvt = fptosi <8 x float> %a to <8 x i64>
+ %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i64> %shuf
+}
+
;
; Float to Unsigned Integer
;
-define <4 x i32> @fptoui_4vf32(<4 x float> %a) {
-; SSE2-LABEL: fptoui_4vf32:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
-; SSE2-NEXT: cvttss2si %xmm1, %rax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
-; SSE2-NEXT: cvttss2si %xmm2, %rax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT: cvttss2si %xmm0, %rax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT: cvttss2si %xmm0, %rax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: fptoui_4vf32:
+define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
+; SSE-LABEL: fptoui_4f32_to_4i32:
+; SSE: # BB#0:
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE-NEXT: cvttss2si %xmm1, %rax
+; SSE-NEXT: movd %eax, %xmm1
+; SSE-NEXT: movaps %xmm0, %xmm2
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE-NEXT: cvttss2si %xmm2, %rax
+; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: movd %eax, %xmm1
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptoui_4f32_to_4i32:
; AVX: # BB#0:
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX-NEXT: vcvttss2si %xmm1, %rax
@@ -537,33 +683,33 @@ define <4 x i32> @fptoui_4vf32(<4 x float> %a) {
ret <4 x i32> %cvt
}
-define <2 x i64> @fptoui_4vf32_i64(<4 x float> %a) {
-; SSE2-LABEL: fptoui_4vf32_i64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: subss %xmm2, %xmm1
-; SSE2-NEXT: cvttss2si %xmm1, %rax
-; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; SSE2-NEXT: xorq %rcx, %rax
-; SSE2-NEXT: cvttss2si %xmm0, %rdx
-; SSE2-NEXT: ucomiss %xmm2, %xmm0
-; SSE2-NEXT: cmovaeq %rax, %rdx
-; SSE2-NEXT: movd %rdx, %xmm1
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: movaps %xmm0, %xmm3
-; SSE2-NEXT: subss %xmm2, %xmm3
-; SSE2-NEXT: cvttss2si %xmm3, %rax
-; SSE2-NEXT: xorq %rcx, %rax
-; SSE2-NEXT: cvttss2si %xmm0, %rcx
-; SSE2-NEXT: ucomiss %xmm2, %xmm0
-; SSE2-NEXT: cmovaeq %rax, %rcx
-; SSE2-NEXT: movd %rcx, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: fptoui_4vf32_i64:
+define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
+; SSE-LABEL: fptoui_2f32_to_2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: subss %xmm2, %xmm1
+; SSE-NEXT: cvttss2si %xmm1, %rax
+; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; SSE-NEXT: xorq %rcx, %rax
+; SSE-NEXT: cvttss2si %xmm0, %rdx
+; SSE-NEXT: ucomiss %xmm2, %xmm0
+; SSE-NEXT: cmovaeq %rax, %rdx
+; SSE-NEXT: movd %rdx, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-NEXT: movaps %xmm0, %xmm3
+; SSE-NEXT: subss %xmm2, %xmm3
+; SSE-NEXT: cvttss2si %xmm3, %rax
+; SSE-NEXT: xorq %rcx, %rax
+; SSE-NEXT: cvttss2si %xmm0, %rcx
+; SSE-NEXT: ucomiss %xmm2, %xmm0
+; SSE-NEXT: cmovaeq %rax, %rcx
+; SSE-NEXT: movd %rcx, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptoui_2f32_to_2i64:
; AVX: # BB#0:
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm2
@@ -584,50 +730,102 @@ define <2 x i64> @fptoui_4vf32_i64(<4 x float> %a) {
; AVX-NEXT: vmovq %rcx, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
; AVX-NEXT: retq
- %shuf = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
+ %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%cvt = fptoui <2 x float> %shuf to <2 x i64>
ret <2 x i64> %cvt
}
-define <8 x i32> @fptoui_8vf32(<8 x float> %a) {
-; SSE2-LABEL: fptoui_8vf32:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE2-NEXT: cvttss2si %xmm0, %rax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movaps %xmm2, %xmm3
-; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
-; SSE2-NEXT: cvttss2si %xmm3, %rax
-; SSE2-NEXT: movd %eax, %xmm3
-; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
-; SSE2-NEXT: cvttss2si %xmm2, %rax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0]
-; SSE2-NEXT: cvttss2si %xmm2, %rax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
-; SSE2-NEXT: cvttss2si %xmm2, %rax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: movaps %xmm1, %xmm3
-; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
-; SSE2-NEXT: cvttss2si %xmm3, %rax
-; SSE2-NEXT: movd %eax, %xmm3
-; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE2-NEXT: cvttss2si %xmm1, %rax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
-; SSE2-NEXT: cvttss2si %xmm1, %rax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT: movdqa %xmm2, %xmm1
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: fptoui_8vf32:
+define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) {
+; SSE-LABEL: fptoui_4f32_to_2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: subss %xmm2, %xmm1
+; SSE-NEXT: cvttss2si %xmm1, %rax
+; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; SSE-NEXT: xorq %rcx, %rax
+; SSE-NEXT: cvttss2si %xmm0, %rdx
+; SSE-NEXT: ucomiss %xmm2, %xmm0
+; SSE-NEXT: cmovaeq %rax, %rdx
+; SSE-NEXT: movd %rdx, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-NEXT: movaps %xmm0, %xmm3
+; SSE-NEXT: subss %xmm2, %xmm3
+; SSE-NEXT: cvttss2si %xmm3, %rax
+; SSE-NEXT: xorq %rcx, %rax
+; SSE-NEXT: cvttss2si %xmm0, %rcx
+; SSE-NEXT: ucomiss %xmm2, %xmm0
+; SSE-NEXT: cmovaeq %rax, %rcx
+; SSE-NEXT: movd %rcx, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptoui_4f32_to_2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX-NEXT: vsubss %xmm2, %xmm1, %xmm3
+; AVX-NEXT: vcvttss2si %xmm3, %rax
+; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttss2si %xmm1, %rdx
+; AVX-NEXT: vucomiss %xmm2, %xmm1
+; AVX-NEXT: cmovaeq %rax, %rdx
+; AVX-NEXT: vsubss %xmm2, %xmm0, %xmm1
+; AVX-NEXT: vcvttss2si %xmm1, %rax
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttss2si %xmm0, %rcx
+; AVX-NEXT: vucomiss %xmm2, %xmm0
+; AVX-NEXT: cmovaeq %rax, %rcx
+; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: vmovq %rdx, %xmm1
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+ %cvt = fptoui <4 x float> %a to <4 x i64>
+ %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x i64> %shuf
+}
+
+define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
+; SSE-LABEL: fptoui_8f32_to_8i32:
+; SSE: # BB#0:
+; SSE-NEXT: movaps %xmm0, %xmm2
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: movaps %xmm2, %xmm3
+; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT: cvttss2si %xmm3, %rax
+; SSE-NEXT: movd %eax, %xmm3
+; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; SSE-NEXT: cvttss2si %xmm2, %rax
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0]
+; SSE-NEXT: cvttss2si %xmm2, %rax
+; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE-NEXT: movaps %xmm1, %xmm2
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
+; SSE-NEXT: cvttss2si %xmm2, %rax
+; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: movaps %xmm1, %xmm3
+; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT: cvttss2si %xmm3, %rax
+; SSE-NEXT: movd %eax, %xmm3
+; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE-NEXT: cvttss2si %xmm1, %rax
+; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT: cvttss2si %xmm1, %rax
+; SSE-NEXT: movd %eax, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT: movdqa %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptoui_8f32_to_8i32:
; AVX: # BB#0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
@@ -658,54 +856,54 @@ define <8 x i32> @fptoui_8vf32(<8 x float> %a) {
ret <8 x i32> %cvt
}
-define <4 x i64> @fptoui_8vf32_i64(<8 x float> %a) {
-; SSE2-LABEL: fptoui_8vf32_i64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: subss %xmm1, %xmm2
-; SSE2-NEXT: cvttss2si %xmm2, %rcx
-; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; SSE2-NEXT: xorq %rax, %rcx
-; SSE2-NEXT: cvttss2si %xmm0, %rdx
-; SSE2-NEXT: ucomiss %xmm1, %xmm0
-; SSE2-NEXT: cmovaeq %rcx, %rdx
-; SSE2-NEXT: movd %rdx, %xmm2
-; SSE2-NEXT: movaps %xmm0, %xmm3
-; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
-; SSE2-NEXT: movaps %xmm3, %xmm4
-; SSE2-NEXT: subss %xmm1, %xmm4
-; SSE2-NEXT: cvttss2si %xmm4, %rcx
-; SSE2-NEXT: xorq %rax, %rcx
-; SSE2-NEXT: cvttss2si %xmm3, %rdx
-; SSE2-NEXT: ucomiss %xmm1, %xmm3
-; SSE2-NEXT: cmovaeq %rcx, %rdx
-; SSE2-NEXT: movd %rdx, %xmm3
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSE2-NEXT: movaps %xmm0, %xmm3
-; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
-; SSE2-NEXT: movaps %xmm3, %xmm4
-; SSE2-NEXT: subss %xmm1, %xmm4
-; SSE2-NEXT: cvttss2si %xmm4, %rcx
-; SSE2-NEXT: xorq %rax, %rcx
-; SSE2-NEXT: cvttss2si %xmm3, %rdx
-; SSE2-NEXT: ucomiss %xmm1, %xmm3
-; SSE2-NEXT: cmovaeq %rcx, %rdx
-; SSE2-NEXT: movd %rdx, %xmm3
-; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT: movapd %xmm0, %xmm4
-; SSE2-NEXT: subss %xmm1, %xmm4
-; SSE2-NEXT: cvttss2si %xmm4, %rcx
-; SSE2-NEXT: xorq %rax, %rcx
-; SSE2-NEXT: cvttss2si %xmm0, %rax
-; SSE2-NEXT: ucomiss %xmm1, %xmm0
-; SSE2-NEXT: cmovaeq %rcx, %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; SSE2-NEXT: movdqa %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: fptoui_8vf32_i64:
+define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
+; SSE-LABEL: fptoui_4f32_to_4i64:
+; SSE: # BB#0:
+; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE-NEXT: movaps %xmm0, %xmm2
+; SSE-NEXT: subss %xmm1, %xmm2
+; SSE-NEXT: cvttss2si %xmm2, %rcx
+; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttss2si %xmm0, %rdx
+; SSE-NEXT: ucomiss %xmm1, %xmm0
+; SSE-NEXT: cmovaeq %rcx, %rdx
+; SSE-NEXT: movd %rdx, %xmm2
+; SSE-NEXT: movaps %xmm0, %xmm3
+; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT: movaps %xmm3, %xmm4
+; SSE-NEXT: subss %xmm1, %xmm4
+; SSE-NEXT: cvttss2si %xmm4, %rcx
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttss2si %xmm3, %rdx
+; SSE-NEXT: ucomiss %xmm1, %xmm3
+; SSE-NEXT: cmovaeq %rcx, %rdx
+; SSE-NEXT: movd %rdx, %xmm3
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE-NEXT: movaps %xmm0, %xmm3
+; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
+; SSE-NEXT: movaps %xmm3, %xmm4
+; SSE-NEXT: subss %xmm1, %xmm4
+; SSE-NEXT: cvttss2si %xmm4, %rcx
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttss2si %xmm3, %rdx
+; SSE-NEXT: ucomiss %xmm1, %xmm3
+; SSE-NEXT: cmovaeq %rcx, %rdx
+; SSE-NEXT: movd %rdx, %xmm3
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: movapd %xmm0, %xmm4
+; SSE-NEXT: subss %xmm1, %xmm4
+; SSE-NEXT: cvttss2si %xmm4, %rcx
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: ucomiss %xmm1, %xmm0
+; SSE-NEXT: cmovaeq %rcx, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptoui_4f32_to_4i64:
; AVX: # BB#0:
; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
@@ -744,22 +942,113 @@ define <4 x i64> @fptoui_8vf32_i64(<8 x float> %a) {
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX-NEXT: retq
- %shuf = shufflevector <8 x float> %a, <8 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%cvt = fptoui <4 x float> %shuf to <4 x i64>
ret <4 x i64> %cvt
}
+define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
+; SSE-LABEL: fptoui_8f32_to_4i64:
+; SSE: # BB#0:
+; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE-NEXT: movaps %xmm0, %xmm2
+; SSE-NEXT: subss %xmm1, %xmm2
+; SSE-NEXT: cvttss2si %xmm2, %rcx
+; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttss2si %xmm0, %rdx
+; SSE-NEXT: ucomiss %xmm1, %xmm0
+; SSE-NEXT: cmovaeq %rcx, %rdx
+; SSE-NEXT: movd %rdx, %xmm2
+; SSE-NEXT: movaps %xmm0, %xmm3
+; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT: movaps %xmm3, %xmm4
+; SSE-NEXT: subss %xmm1, %xmm4
+; SSE-NEXT: cvttss2si %xmm4, %rcx
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttss2si %xmm3, %rdx
+; SSE-NEXT: ucomiss %xmm1, %xmm3
+; SSE-NEXT: cmovaeq %rcx, %rdx
+; SSE-NEXT: movd %rdx, %xmm3
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE-NEXT: movaps %xmm0, %xmm3
+; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
+; SSE-NEXT: movaps %xmm3, %xmm4
+; SSE-NEXT: subss %xmm1, %xmm4
+; SSE-NEXT: cvttss2si %xmm4, %rcx
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttss2si %xmm3, %rdx
+; SSE-NEXT: ucomiss %xmm1, %xmm3
+; SSE-NEXT: cmovaeq %rcx, %rdx
+; SSE-NEXT: movd %rdx, %xmm3
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT: movapd %xmm0, %xmm4
+; SSE-NEXT: subss %xmm1, %xmm4
+; SSE-NEXT: cvttss2si %xmm4, %rcx
+; SSE-NEXT: xorq %rax, %rcx
+; SSE-NEXT: cvttss2si %xmm0, %rax
+; SSE-NEXT: ucomiss %xmm1, %xmm0
+; SSE-NEXT: cmovaeq %rcx, %rax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fptoui_8f32_to_4i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vsubss %xmm1, %xmm2, %xmm3
+; AVX-NEXT: vcvttss2si %xmm3, %rax
+; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttss2si %xmm2, %rdx
+; AVX-NEXT: vucomiss %xmm1, %xmm2
+; AVX-NEXT: cmovaeq %rax, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm2
+; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
+; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm4
+; AVX-NEXT: vcvttss2si %xmm4, %rax
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttss2si %xmm3, %rdx
+; AVX-NEXT: vucomiss %xmm1, %xmm3
+; AVX-NEXT: cmovaeq %rax, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm3
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3
+; AVX-NEXT: vcvttss2si %xmm3, %rax
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttss2si %xmm0, %rdx
+; AVX-NEXT: vucomiss %xmm1, %xmm0
+; AVX-NEXT: cmovaeq %rax, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm3
+; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm4
+; AVX-NEXT: vcvttss2si %xmm4, %rax
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttss2si %xmm0, %rcx
+; AVX-NEXT: vucomiss %xmm1, %xmm0
+; AVX-NEXT: cmovaeq %rax, %rcx
+; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %cvt = fptoui <8 x float> %a to <8 x i64>
+ %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i64> %shuf
+}
+
;
; Constant Folding
;
-define <2 x i64> @fptosi_2vf64c() {
-; SSE2-LABEL: fptosi_2vf64c:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
-; SSE2-NEXT: retq
+define <2 x i64> @fptosi_2f64_to_2i64_const() {
+; SSE-LABEL: fptosi_2f64_to_2i64_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptosi_2vf64c:
+; AVX-LABEL: fptosi_2f64_to_2i64_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
; AVX-NEXT: retq
@@ -767,13 +1056,13 @@ define <2 x i64> @fptosi_2vf64c() {
ret <2 x i64> %cvt
}
-define <4 x i32> @fptosi_2vf64c_i32() {
-; SSE2-LABEL: fptosi_2vf64c_i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
-; SSE2-NEXT: retq
+define <4 x i32> @fptosi_2f64_to_2i32_const() {
+; SSE-LABEL: fptosi_2f64_to_2i32_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptosi_2vf64c_i32:
+; AVX-LABEL: fptosi_2f64_to_2i32_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u>
; AVX-NEXT: retq
@@ -782,14 +1071,14 @@ define <4 x i32> @fptosi_2vf64c_i32() {
ret <4 x i32> %ext
}
-define <4 x i64> @fptosi_4vf64c() {
-; SSE2-LABEL: fptosi_4vf64c:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
-; SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613]
-; SSE2-NEXT: retq
+define <4 x i64> @fptosi_4f64_to_4i64_const() {
+; SSE-LABEL: fptosi_4f64_to_4i64_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613]
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptosi_4vf64c:
+; AVX-LABEL: fptosi_4f64_to_4i64_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
; AVX-NEXT: retq
@@ -797,13 +1086,13 @@ define <4 x i64> @fptosi_4vf64c() {
ret <4 x i64> %cvt
}
-define <4 x i32> @fptosi_4vf64c_i32() {
-; SSE2-LABEL: fptosi_4vf64c_i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
-; SSE2-NEXT: retq
+define <4 x i32> @fptosi_4f64_to_4i32_const() {
+; SSE-LABEL: fptosi_4f64_to_4i32_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptosi_4vf64c_i32:
+; AVX-LABEL: fptosi_4f64_to_4i32_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
; AVX-NEXT: retq
@@ -811,13 +1100,13 @@ define <4 x i32> @fptosi_4vf64c_i32() {
ret <4 x i32> %cvt
}
-define <2 x i64> @fptoui_2vf64c() {
-; SSE2-LABEL: fptoui_2vf64c:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = [2,4]
-; SSE2-NEXT: retq
+define <2 x i64> @fptoui_2f64_to_2i64_const() {
+; SSE-LABEL: fptoui_2f64_to_2i64_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptoui_2vf64c:
+; AVX-LABEL: fptoui_2f64_to_2i64_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4]
; AVX-NEXT: retq
@@ -825,13 +1114,13 @@ define <2 x i64> @fptoui_2vf64c() {
ret <2 x i64> %cvt
}
-define <4 x i32> @fptoui_2vf64c_i32(<2 x double> %a) {
-; SSE2-LABEL: fptoui_2vf64c_i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = <2,4,u,u>
-; SSE2-NEXT: retq
+define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
+; SSE-LABEL: fptoui_2f64_to_2i32_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = <2,4,u,u>
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptoui_2vf64c_i32:
+; AVX-LABEL: fptoui_2f64_to_2i32_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u>
; AVX-NEXT: retq
@@ -840,14 +1129,14 @@ define <4 x i32> @fptoui_2vf64c_i32(<2 x double> %a) {
ret <4 x i32> %ext
}
-define <4 x i64> @fptoui_4vf64c(<4 x double> %a) {
-; SSE2-LABEL: fptoui_4vf64c:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = [2,4]
-; SSE2-NEXT: movaps {{.*#+}} xmm1 = [6,8]
-; SSE2-NEXT: retq
+define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) {
+; SSE-LABEL: fptoui_4f64_to_4i64_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,8]
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptoui_4vf64c:
+; AVX-LABEL: fptoui_4f64_to_4i64_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8]
; AVX-NEXT: retq
@@ -855,13 +1144,13 @@ define <4 x i64> @fptoui_4vf64c(<4 x double> %a) {
ret <4 x i64> %cvt
}
-define <4 x i32> @fptoui_4vf64c_i32(<4 x double> %a) {
-; SSE2-LABEL: fptoui_4vf64c_i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8]
-; SSE2-NEXT: retq
+define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) {
+; SSE-LABEL: fptoui_4f64_to_4i32_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8]
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptoui_4vf64c_i32:
+; AVX-LABEL: fptoui_4f64_to_4i32_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8]
; AVX-NEXT: retq
@@ -869,13 +1158,13 @@ define <4 x i32> @fptoui_4vf64c_i32(<4 x double> %a) {
ret <4 x i32> %cvt
}
-define <4 x i32> @fptosi_4vf32c() {
-; SSE2-LABEL: fptosi_4vf32c:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
-; SSE2-NEXT: retq
+define <4 x i32> @fptosi_4f32_to_4i32_const() {
+; SSE-LABEL: fptosi_4f32_to_4i32_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptosi_4vf32c:
+; AVX-LABEL: fptosi_4f32_to_4i32_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
; AVX-NEXT: retq
@@ -883,14 +1172,14 @@ define <4 x i32> @fptosi_4vf32c() {
ret <4 x i32> %cvt
}
-define <4 x i64> @fptosi_4vf32c_i64() {
-; SSE2-LABEL: fptosi_4vf32c_i64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
-; SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,3]
-; SSE2-NEXT: retq
+define <4 x i64> @fptosi_4f32_to_4i64_const() {
+; SSE-LABEL: fptosi_4f32_to_4i64_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3]
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptosi_4vf32c_i64:
+; AVX-LABEL: fptosi_4f32_to_4i64_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
; AVX-NEXT: retq
@@ -898,14 +1187,14 @@ define <4 x i64> @fptosi_4vf32c_i64() {
ret <4 x i64> %cvt
}
-define <8 x i32> @fptosi_8vf32c(<8 x float> %a) {
-; SSE2-LABEL: fptosi_8vf32c:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
-; SSE2-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
-; SSE2-NEXT: retq
+define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) {
+; SSE-LABEL: fptosi_8f32_to_8i32_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptosi_8vf32c:
+; AVX-LABEL: fptosi_8f32_to_8i32_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
; AVX-NEXT: retq
@@ -913,13 +1202,13 @@ define <8 x i32> @fptosi_8vf32c(<8 x float> %a) {
ret <8 x i32> %cvt
}
-define <4 x i32> @fptoui_4vf32c(<4 x float> %a) {
-; SSE2-LABEL: fptoui_4vf32c:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
-; SSE2-NEXT: retq
+define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) {
+; SSE-LABEL: fptoui_4f32_to_4i32_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptoui_4vf32c:
+; AVX-LABEL: fptoui_4f32_to_4i32_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6]
; AVX-NEXT: retq
@@ -927,14 +1216,14 @@ define <4 x i32> @fptoui_4vf32c(<4 x float> %a) {
ret <4 x i32> %cvt
}
-define <4 x i64> @fptoui_4vf32c_i64() {
-; SSE2-LABEL: fptoui_4vf32c_i64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,2]
-; SSE2-NEXT: movaps {{.*#+}} xmm1 = [4,8]
-; SSE2-NEXT: retq
+define <4 x i64> @fptoui_4f32_to_4i64_const() {
+; SSE-LABEL: fptoui_4f32_to_4i64_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [4,8]
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptoui_4vf32c_i64:
+; AVX-LABEL: fptoui_4f32_to_4i64_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
; AVX-NEXT: retq
@@ -942,14 +1231,14 @@ define <4 x i64> @fptoui_4vf32c_i64() {
ret <4 x i64> %cvt
}
-define <8 x i32> @fptoui_8vf32c(<8 x float> %a) {
-; SSE2-LABEL: fptoui_8vf32c:
-; SSE2: # BB#0:
-; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
-; SSE2-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1]
-; SSE2-NEXT: retq
+define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
+; SSE-LABEL: fptoui_8f32_to_8i32_const:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1]
+; SSE-NEXT: retq
;
-; AVX-LABEL: fptoui_8vf32c:
+; AVX-LABEL: fptoui_8f32_to_8i32_const:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
; AVX-NEXT: retq
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index 4018a21090e7..14b57e76dc8f 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=x86 -mattr=+sse2,+ssse3 | FileCheck %s
; There are no MMX operations in @t1
diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll
index 4a3d08813904..fd98791815e7 100644
--- a/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/test/CodeGen/X86/vec_int_to_fp.ll
@@ -11,20 +11,20 @@
; Signed Integer to Double
;
-define <2 x double> @sitofp_2vf64(<2 x i64> %a) {
-; SSE2-LABEL: sitofp_2vf64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: sitofp_2vf64:
+define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) {
+; SSE-LABEL: sitofp_2i64_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: cvtsi2sdq %rax, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2sdq %rax, %xmm0
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sitofp_2i64_to_2f64:
; AVX: # BB#0:
; AVX-NEXT: vpextrq $1, %xmm0, %rax
; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
@@ -37,13 +37,13 @@ define <2 x double> @sitofp_2vf64(<2 x i64> %a) {
ret <2 x double> %cvt
}
-define <2 x double> @sitofp_2vf64_i32(<4 x i32> %a) {
-; SSE2-LABEL: sitofp_2vf64_i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
-; SSE2-NEXT: retq
+define <2 x double> @sitofp_2i32_to_2f64(<4 x i32> %a) {
+; SSE-LABEL: sitofp_2i32_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: retq
;
-; AVX-LABEL: sitofp_2vf64_i32:
+; AVX-LABEL: sitofp_2i32_to_2f64:
; AVX: # BB#0:
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT: retq
@@ -52,15 +52,31 @@ define <2 x double> @sitofp_2vf64_i32(<4 x i32> %a) {
ret <2 x double> %cvt
}
-define <2 x double> @sitofp_2vf64_i16(<8 x i16> %a) {
-; SSE2-LABEL: sitofp_2vf64_i16:
-; SSE2: # BB#0:
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: psrad $16, %xmm0
-; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: sitofp_2vf64_i16:
+define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) {
+; SSE-LABEL: sitofp_4i32_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sitofp_4i32_to_2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %cvt = sitofp <4 x i32> %a to <4 x double>
+ %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x double> %shuf
+}
+
+define <2 x double> @sitofp_2i16_to_2f64(<8 x i16> %a) {
+; SSE-LABEL: sitofp_2i16_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT: psrad $16, %xmm0
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sitofp_2i16_to_2f64:
; AVX: # BB#0:
; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
@@ -70,16 +86,42 @@ define <2 x double> @sitofp_2vf64_i16(<8 x i16> %a) {
ret <2 x double> %cvt
}
-define <2 x double> @sitofp_2vf64_i8(<16 x i8> %a) {
-; SSE2-LABEL: sitofp_2vf64_i8:
-; SSE2: # BB#0:
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: psrad $24, %xmm0
-; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: sitofp_2vf64_i8:
+define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) {
+; SSE-LABEL: sitofp_8i16_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT: psrad $16, %xmm0
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: sitofp_8i16_to_2f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
+; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: sitofp_8i16_to_2f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+ %cvt = sitofp <8 x i16> %a to <8 x double>
+ %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x double> %shuf
+}
+
+define <2 x double> @sitofp_2i8_to_2f64(<16 x i8> %a) {
+; SSE-LABEL: sitofp_2i8_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT: psrad $24, %xmm0
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sitofp_2i8_to_2f64:
; AVX: # BB#0:
; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
@@ -89,28 +131,56 @@ define <2 x double> @sitofp_2vf64_i8(<16 x i8> %a) {
ret <2 x double> %cvt
}
-define <4 x double> @sitofp_4vf64(<4 x i64> %a) {
-; SSE2-LABEL: sitofp_4vf64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: cvtsi2sdq %rax, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: cvtsi2sdq %rax, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
-; SSE2-NEXT: movapd %xmm2, %xmm0
-; SSE2-NEXT: movapd %xmm3, %xmm1
-; SSE2-NEXT: retq
-;
-; AVX1-LABEL: sitofp_4vf64:
+define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
+; SSE-LABEL: sitofp_16i8_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT: psrad $24, %xmm0
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: sitofp_16i8_to_2f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
+; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: sitofp_16i8_to_2f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+ %cvt = sitofp <16 x i8> %a to <16 x double>
+ %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x double> %shuf
+}
+
+define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) {
+; SSE-LABEL: sitofp_4i64_to_4f64:
+; SSE: # BB#0:
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: cvtsi2sdq %rax, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2sdq %rax, %xmm0
+; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT: movd %xmm1, %rax
+; SSE-NEXT: cvtsi2sdq %rax, %xmm3
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2sdq %rax, %xmm0
+; SSE-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSE-NEXT: movapd %xmm2, %xmm0
+; SSE-NEXT: movapd %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: sitofp_4i64_to_4f64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
@@ -127,7 +197,7 @@ define <4 x double> @sitofp_4vf64(<4 x i64> %a) {
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: sitofp_4vf64:
+; AVX2-LABEL: sitofp_4i64_to_4f64:
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
@@ -147,16 +217,16 @@ define <4 x double> @sitofp_4vf64(<4 x i64> %a) {
ret <4 x double> %cvt
}
-define <4 x double> @sitofp_4vf64_i32(<4 x i32> %a) {
-; SSE2-LABEL: sitofp_4vf64_i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: sitofp_4vf64_i32:
+define <4 x double> @sitofp_4i32_to_4f64(<4 x i32> %a) {
+; SSE-LABEL: sitofp_4i32_to_4f64:
+; SSE: # BB#0:
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sitofp_4i32_to_4f64:
; AVX: # BB#0:
; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX-NEXT: retq
@@ -164,17 +234,17 @@ define <4 x double> @sitofp_4vf64_i32(<4 x i32> %a) {
ret <4 x double> %cvt
}
-define <4 x double> @sitofp_4vf64_i16(<8 x i16> %a) {
-; SSE2-LABEL: sitofp_4vf64_i16:
-; SSE2: # BB#0:
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: sitofp_4vf64_i16:
+define <4 x double> @sitofp_4i16_to_4f64(<8 x i16> %a) {
+; SSE-LABEL: sitofp_4i16_to_4f64:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT: psrad $16, %xmm1
+; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sitofp_4i16_to_4f64:
; AVX: # BB#0:
; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
@@ -184,18 +254,44 @@ define <4 x double> @sitofp_4vf64_i16(<8 x i16> %a) {
ret <4 x double> %cvt
}
-define <4 x double> @sitofp_4vf64_i8(<16 x i8> %a) {
-; SSE2-LABEL: sitofp_4vf64_i8:
-; SSE2: # BB#0:
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: psrad $24, %xmm1
-; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: sitofp_4vf64_i8:
+define <4 x double> @sitofp_8i16_to_4f64(<8 x i16> %a) {
+; SSE-LABEL: sitofp_8i16_to_4f64:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT: psrad $16, %xmm1
+; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: sitofp_8i16_to_4f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
+; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: sitofp_8i16_to_4f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT: retq
+ %cvt = sitofp <8 x i16> %a to <8 x double>
+ %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x double> %shuf
+}
+
+define <4 x double> @sitofp_4i8_to_4f64(<16 x i8> %a) {
+; SSE-LABEL: sitofp_4i8_to_4f64:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT: psrad $24, %xmm1
+; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sitofp_4i8_to_4f64:
; AVX: # BB#0:
; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
@@ -205,28 +301,56 @@ define <4 x double> @sitofp_4vf64_i8(<16 x i8> %a) {
ret <4 x double> %cvt
}
+define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) {
+; SSE-LABEL: sitofp_16i8_to_4f64:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT: psrad $24, %xmm1
+; SSE-NEXT: cvtdq2pd %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: sitofp_16i8_to_4f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
+; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: sitofp_16i8_to_4f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT: retq
+ %cvt = sitofp <16 x i8> %a to <16 x double>
+ %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x double> %shuf
+}
+
;
; Unsigned Integer to Double
;
-define <2 x double> @uitofp_2vf64(<2 x i64> %a) {
-; SSE2-LABEL: uitofp_2vf64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
-; SSE2-NEXT: subpd %xmm3, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
-; SSE2-NEXT: addpd %xmm4, %xmm0
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT: subpd %xmm3, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
-; SSE2-NEXT: addpd %xmm2, %xmm1
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: uitofp_2vf64:
+define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) {
+; SSE-LABEL: uitofp_2i64_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
+; SSE-NEXT: subpd %xmm3, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE-NEXT: addpd %xmm4, %xmm0
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT: subpd %xmm3, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; SSE-NEXT: addpd %xmm2, %xmm1
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uitofp_2i64_to_2f64:
; AVX: # BB#0:
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
; AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
@@ -243,26 +367,26 @@ define <2 x double> @uitofp_2vf64(<2 x i64> %a) {
ret <2 x double> %cvt
}
-define <2 x double> @uitofp_2vf64_i32(<4 x i32> %a) {
-; SSE2-LABEL: uitofp_2vf64_i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
-; SSE2-NEXT: subpd %xmm3, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
-; SSE2-NEXT: addpd %xmm4, %xmm0
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT: subpd %xmm3, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
-; SSE2-NEXT: addpd %xmm2, %xmm1
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: uitofp_2vf64_i32:
+define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) {
+; SSE-LABEL: uitofp_2i32_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
+; SSE-NEXT: subpd %xmm3, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE-NEXT: addpd %xmm4, %xmm0
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT: subpd %xmm3, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; SSE-NEXT: addpd %xmm2, %xmm1
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uitofp_2i32_to_2f64:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
@@ -281,21 +405,64 @@ define <2 x double> @uitofp_2vf64_i32(<4 x i32> %a) {
ret <2 x double> %cvt
}
-define <2 x double> @uitofp_2vf64_i16(<8 x i16> %a) {
-; SSE2-LABEL: uitofp_2vf64_i16:
-; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
-; SSE2-NEXT: pand .LCPI10_0(%rip), %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: uitofp_2vf64_i16:
+define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) {
+; SSE-LABEL: uitofp_4i32_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
+; SSE-NEXT: subpd %xmm3, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE-NEXT: addpd %xmm4, %xmm0
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT: subpd %xmm3, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; SSE-NEXT: addpd %xmm2, %xmm1
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_4i32_to_2f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: uitofp_4i32_to_2f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
+; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
+; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+ %cvt = uitofp <4 x i32> %a to <4 x double>
+ %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x double> %shuf
+}
+
+define <2 x double> @uitofp_2i16_to_2f64(<8 x i16> %a) {
+; SSE-LABEL: uitofp_2i16_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uitofp_2i16_to_2f64:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; AVX-NEXT: vpand .LCPI10_0(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT: retq
%shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
@@ -303,22 +470,44 @@ define <2 x double> @uitofp_2vf64_i16(<8 x i16> %a) {
ret <2 x double> %cvt
}
-define <2 x double> @uitofp_2vf64_i8(<16 x i8> %a) {
-; SSE2-LABEL: uitofp_2vf64_i8:
-; SSE2: # BB#0:
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; SSE2-NEXT: pand .LCPI11_0(%rip), %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: uitofp_2vf64_i8:
+define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) {
+; SSE-LABEL: uitofp_8i16_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_8i16_to_2f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: uitofp_8i16_to_2f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+ %cvt = uitofp <8 x i16> %a to <8 x double>
+ %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x double> %shuf
+}
+
+define <2 x double> @uitofp_2i8_to_2f64(<16 x i8> %a) {
+; SSE-LABEL: uitofp_2i8_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uitofp_2i8_to_2f64:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT: vpand .LCPI11_0(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
; AVX-NEXT: retq
%shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
@@ -326,34 +515,62 @@ define <2 x double> @uitofp_2vf64_i8(<16 x i8> %a) {
ret <2 x double> %cvt
}
-define <4 x double> @uitofp_4vf64(<4 x i64> %a) {
-; SSE2-LABEL: uitofp_4vf64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
-; SSE2-NEXT: subpd %xmm4, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
-; SSE2-NEXT: addpd %xmm5, %xmm0
-; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE2-NEXT: subpd %xmm4, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
-; SSE2-NEXT: addpd %xmm3, %xmm5
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT: subpd %xmm4, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
-; SSE2-NEXT: addpd %xmm5, %xmm1
-; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE2-NEXT: subpd %xmm4, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
-; SSE2-NEXT: addpd %xmm3, %xmm2
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSE2-NEXT: retq
-;
-; AVX1-LABEL: uitofp_4vf64:
+define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
+; SSE-LABEL: uitofp_16i8_to_2f64:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_16i8_to_2f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: uitofp_16i8_to_2f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+ %cvt = uitofp <16 x i8> %a to <16 x double>
+ %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
+ ret <2 x double> %shuf
+}
+
+define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) {
+; SSE-LABEL: uitofp_4i64_to_4f64:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
+; SSE-NEXT: subpd %xmm4, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
+; SSE-NEXT: addpd %xmm5, %xmm0
+; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE-NEXT: subpd %xmm4, %xmm3
+; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
+; SSE-NEXT: addpd %xmm3, %xmm5
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: subpd %xmm4, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
+; SSE-NEXT: addpd %xmm5, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE-NEXT: subpd %xmm4, %xmm3
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
+; SSE-NEXT: addpd %xmm3, %xmm2
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_4i64_to_4f64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
@@ -377,7 +594,7 @@ define <4 x double> @uitofp_4vf64(<4 x i64> %a) {
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: uitofp_4vf64:
+; AVX2-LABEL: uitofp_4i64_to_4f64:
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
@@ -404,54 +621,54 @@ define <4 x double> @uitofp_4vf64(<4 x i64> %a) {
ret <4 x double> %cvt
}
-define <4 x double> @uitofp_4vf64_i32(<4 x i32> %a) {
-; SSE2-LABEL: uitofp_4vf64_i32:
-; SSE2: # BB#0:
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE2-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
-; SSE2-NEXT: subpd %xmm4, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
-; SSE2-NEXT: addpd %xmm5, %xmm0
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-NEXT: subpd %xmm4, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
-; SSE2-NEXT: addpd %xmm1, %xmm5
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
-; SSE2-NEXT: pand .LCPI13_2(%rip), %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT: subpd %xmm4, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
-; SSE2-NEXT: addpd %xmm2, %xmm1
-; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
-; SSE2-NEXT: subpd %xmm4, %xmm5
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
-; SSE2-NEXT: addpd %xmm5, %xmm2
-; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSE2-NEXT: retq
-;
-; AVX1-LABEL: uitofp_4vf64_i32:
+define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) {
+; SSE-LABEL: uitofp_4i32_to_4f64:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE-NEXT: movapd {{.*#+}} xmm5 = [4.503600e+15,1.934281e+25]
+; SSE-NEXT: subpd %xmm5, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm0[2,3,0,1]
+; SSE-NEXT: addpd %xmm6, %xmm0
+; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; SSE-NEXT: subpd %xmm5, %xmm4
+; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[2,3,0,1]
+; SSE-NEXT: addpd %xmm4, %xmm6
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm6[0]
+; SSE-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT: subpd %xmm5, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; SSE-NEXT: addpd %xmm2, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; SSE-NEXT: subpd %xmm5, %xmm4
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
+; SSE-NEXT: addpd %xmm4, %xmm2
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_4i32_to_4f64:
; AVX1: # BB#0:
-; AVX1-NEXT: vpand .LCPI13_0(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1
; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
-; AVX1-NEXT: vmulpd .LCPI13_1(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: uitofp_4vf64_i32:
+; AVX2-LABEL: uitofp_4i32_to_4f64:
; AVX2: # BB#0:
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
-; AVX2-NEXT: vbroadcastsd .LCPI13_0(%rip), %ymm2
+; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpbroadcastd .LCPI13_1(%rip), %xmm2
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
@@ -460,18 +677,18 @@ define <4 x double> @uitofp_4vf64_i32(<4 x i32> %a) {
ret <4 x double> %cvt
}
-define <4 x double> @uitofp_4vf64_i16(<8 x i16> %a) {
-; SSE2-LABEL: uitofp_4vf64_i16:
-; SSE2: # BB#0:
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: uitofp_4vf64_i16:
+define <4 x double> @uitofp_4i16_to_4f64(<8 x i16> %a) {
+; SSE-LABEL: uitofp_4i16_to_4f64:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uitofp_4i16_to_4f64:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
@@ -481,19 +698,46 @@ define <4 x double> @uitofp_4vf64_i16(<8 x i16> %a) {
ret <4 x double> %cvt
}
-define <4 x double> @uitofp_4vf64_i8(<16 x i8> %a) {
-; SSE2-LABEL: uitofp_4vf64_i8:
-; SSE2: # BB#0:
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: uitofp_4vf64_i8:
+define <4 x double> @uitofp_8i16_to_4f64(<8 x i16> %a) {
+; SSE-LABEL: uitofp_8i16_to_4f64:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_8i16_to_4f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: uitofp_8i16_to_4f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT: retq
+ %cvt = uitofp <8 x i16> %a to <8 x double>
+ %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x double> %shuf
+}
+
+define <4 x double> @uitofp_4i8_to_4f64(<16 x i8> %a) {
+; SSE-LABEL: uitofp_4i8_to_4f64:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uitofp_4i8_to_4f64:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
@@ -503,38 +747,86 @@ define <4 x double> @uitofp_4vf64_i8(<16 x i8> %a) {
ret <4 x double> %cvt
}
+define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) {
+; SSE-LABEL: uitofp_16i8_to_4f64:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: cvtdq2pd %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_16i8_to_4f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: uitofp_16i8_to_4f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT: retq
+ %cvt = uitofp <16 x i8> %a to <16 x double>
+ %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x double> %shuf
+}
+
;
; Signed Integer to Float
;
-define <4 x float> @sitofp_4vf32(<4 x i32> %a) {
-; SSE2-LABEL: sitofp_4vf32:
-; SSE2: # BB#0:
-; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT: retq
+define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
+; SSE-LABEL: sitofp_2i64_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: retq
;
-; AVX-LABEL: sitofp_4vf32:
+; AVX-LABEL: sitofp_2i64_to_4f32:
; AVX: # BB#0:
-; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX-NEXT: retq
- %cvt = sitofp <4 x i32> %a to <4 x float>
- ret <4 x float> %cvt
+ %cvt = sitofp <2 x i64> %a to <2 x float>
+ %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ ret <4 x float> %ext
}
-define <4 x float> @sitofp_4vf32_i64(<2 x i64> %a) {
-; SSE2-LABEL: sitofp_4vf32_i64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: sitofp_4vf32_i64:
+define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
+; SSE-LABEL: sitofp_4i64_to_4f32_undef:
+; SSE: # BB#0:
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sitofp_4i64_to_4f32_undef:
; AVX: # BB#0:
; AVX-NEXT: vpextrq $1, %xmm0, %rax
; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
@@ -546,20 +838,34 @@ define <4 x float> @sitofp_4vf32_i64(<2 x i64> %a) {
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX-NEXT: retq
- %cvt = sitofp <2 x i64> %a to <2 x float>
- %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
- ret <4 x float> %ext
+ %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ %cvt = sitofp <4 x i64> %ext to <4 x float>
+ ret <4 x float> %cvt
+}
+
+define <4 x float> @sitofp_4i32_to_4f32(<4 x i32> %a) {
+; SSE-LABEL: sitofp_4i32_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sitofp_4i32_to_4f32:
+; AVX: # BB#0:
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT: retq
+ %cvt = sitofp <4 x i32> %a to <4 x float>
+ ret <4 x float> %cvt
}
-define <4 x float> @sitofp_4vf32_i16(<8 x i16> %a) {
-; SSE2-LABEL: sitofp_4vf32_i16:
-; SSE2: # BB#0:
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: psrad $16, %xmm0
-; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: sitofp_4vf32_i16:
+define <4 x float> @sitofp_4i16_to_4f32(<8 x i16> %a) {
+; SSE-LABEL: sitofp_4i16_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT: psrad $16, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sitofp_4i16_to_4f32:
; AVX: # BB#0:
; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
@@ -569,16 +875,45 @@ define <4 x float> @sitofp_4vf32_i16(<8 x i16> %a) {
ret <4 x float> %cvt
}
-define <4 x float> @sitofp_4vf32_i8(<16 x i8> %a) {
-; SSE2-LABEL: sitofp_4vf32_i8:
-; SSE2: # BB#0:
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: psrad $24, %xmm0
-; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: sitofp_4vf32_i8:
+define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) {
+; SSE-LABEL: sitofp_8i16_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT: psrad $16, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: sitofp_8i16_to_4f32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: sitofp_8i16_to_4f32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+ %cvt = sitofp <8 x i16> %a to <8 x float>
+ %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %shuf
+}
+
+define <4 x float> @sitofp_4i8_to_4f32(<16 x i8> %a) {
+; SSE-LABEL: sitofp_4i8_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT: psrad $24, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sitofp_4i8_to_4f32:
; AVX: # BB#0:
; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
@@ -588,43 +923,59 @@ define <4 x float> @sitofp_4vf32_i8(<16 x i8> %a) {
ret <4 x float> %cvt
}
-define <8 x float> @sitofp_8vf32(<8 x i32> %a) {
-; SSE2-LABEL: sitofp_8vf32:
-; SSE2: # BB#0:
-; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: sitofp_8vf32:
-; AVX: # BB#0:
-; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX-NEXT: retq
- %cvt = sitofp <8 x i32> %a to <8 x float>
- ret <8 x float> %cvt
+define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
+; SSE-LABEL: sitofp_16i8_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT: psrad $24, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: sitofp_16i8_to_4f32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: sitofp_16i8_to_4f32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+ %cvt = sitofp <16 x i8> %a to <16 x float>
+ %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %shuf
}
-define <4 x float> @sitofp_4vf32_4i64(<4 x i64> %a) {
-; SSE2-LABEL: sitofp_4vf32_4i64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX1-LABEL: sitofp_4vf32_4i64:
+define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
+; SSE-LABEL: sitofp_4i64_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: movd %xmm1, %rax
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: movd %xmm1, %rax
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: sitofp_4i64_to_4f32:
; AVX1: # BB#0:
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
@@ -642,7 +993,7 @@ define <4 x float> @sitofp_4vf32_4i64(<4 x i64> %a) {
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
-; AVX2-LABEL: sitofp_4vf32_4i64:
+; AVX2-LABEL: sitofp_4i64_to_4f32:
; AVX2: # BB#0:
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
@@ -663,20 +1014,34 @@ define <4 x float> @sitofp_4vf32_4i64(<4 x i64> %a) {
ret <4 x float> %cvt
}
-define <8 x float> @sitofp_8vf32_i16(<8 x i16> %a) {
-; SSE2-LABEL: sitofp_8vf32_i16:
-; SSE2: # BB#0:
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: psrad $16, %xmm0
-; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX1-LABEL: sitofp_8vf32_i16:
+define <8 x float> @sitofp_8i32_to_8f32(<8 x i32> %a) {
+; SSE-LABEL: sitofp_8i32_to_8f32:
+; SSE: # BB#0:
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sitofp_8i32_to_8f32:
+; AVX: # BB#0:
+; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX-NEXT: retq
+ %cvt = sitofp <8 x i32> %a to <8 x float>
+ ret <8 x float> %cvt
+}
+
+define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) {
+; SSE-LABEL: sitofp_8i16_to_8f32:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT: psrad $16, %xmm1
+; SSE-NEXT: cvtdq2ps %xmm1, %xmm2
+; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE-NEXT: psrad $16, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: sitofp_8i16_to_8f32:
; AVX1: # BB#0:
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
@@ -685,7 +1050,7 @@ define <8 x float> @sitofp_8vf32_i16(<8 x i16> %a) {
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: sitofp_8vf32_i16:
+; AVX2-LABEL: sitofp_8i16_to_8f32:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
@@ -694,22 +1059,22 @@ define <8 x float> @sitofp_8vf32_i16(<8 x i16> %a) {
ret <8 x float> %cvt
}
-define <8 x float> @sitofp_8vf32_i8(<16 x i8> %a) {
-; SSE2-LABEL: sitofp_8vf32_i8:
-; SSE2: # BB#0:
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: psrad $24, %xmm1
-; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: psrad $24, %xmm0
-; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX1-LABEL: sitofp_8vf32_i8:
+define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) {
+; SSE-LABEL: sitofp_8i8_to_8f32:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE-NEXT: psrad $24, %xmm1
+; SSE-NEXT: cvtdq2ps %xmm1, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT: psrad $24, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: sitofp_8i8_to_8f32:
; AVX1: # BB#0:
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
@@ -718,9 +1083,9 @@ define <8 x float> @sitofp_8vf32_i8(<16 x i8> %a) {
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: sitofp_8vf32_i8:
+; AVX2-LABEL: sitofp_8i8_to_8f32:
; AVX2: # BB#0:
-; AVX2-NEXT: vpmovzxbd %xmm0, %ymm0
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX2-NEXT: vpslld $24, %ymm0, %ymm0
; AVX2-NEXT: vpsrad $24, %ymm0, %ymm0
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
@@ -730,125 +1095,124 @@ define <8 x float> @sitofp_8vf32_i8(<16 x i8> %a) {
ret <8 x float> %cvt
}
+define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) {
+; SSE-LABEL: sitofp_16i8_to_8f32:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE-NEXT: psrad $24, %xmm1
+; SSE-NEXT: cvtdq2ps %xmm1, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT: psrad $24, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
;
-; Unsigned Integer to Float
-;
-
-define <4 x float> @uitofp_4vf32(<4 x i32> %a) {
-; SSE2-LABEL: uitofp_4vf32:
-; SSE2: # BB#0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: por .LCPI24_1(%rip), %xmm1
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: por .LCPI24_2(%rip), %xmm0
-; SSE2-NEXT: addps .LCPI24_3(%rip), %xmm0
-; SSE2-NEXT: addps %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX1-LABEL: uitofp_4vf32:
+; AVX1-LABEL: sitofp_16i8_to_8f32:
; AVX1: # BB#0:
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
-; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
-; AVX1-NEXT: vaddps .LCPI24_2(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: uitofp_4vf32:
+; AVX2-LABEL: sitofp_16i8_to_8f32:
; AVX2: # BB#0:
-; AVX2-NEXT: vpbroadcastd .LCPI24_0(%rip), %xmm1
-; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
-; AVX2-NEXT: vpbroadcastd .LCPI24_1(%rip), %xmm2
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
-; AVX2-NEXT: vbroadcastss .LCPI24_2(%rip), %xmm2
-; AVX2-NEXT: vaddps %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: retq
- %cvt = uitofp <4 x i32> %a to <4 x float>
- ret <4 x float> %cvt
+ %cvt = sitofp <16 x i8> %a to <16 x float>
+ %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x float> %shuf
}
-define <4 x float> @uitofp_4vf32_i64(<2 x i64> %a) {
-; SSE2-LABEL: uitofp_4vf32_i64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: andl $1, %ecx
-; SSE2-NEXT: testq %rax, %rax
-; SSE2-NEXT: js .LBB25_1
-; SSE2-NEXT: # BB#2:
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE2-NEXT: jmp .LBB25_3
-; SSE2-NEXT: .LBB25_1:
-; SSE2-NEXT: shrq %rax
-; SSE2-NEXT: orq %rax, %rcx
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ssq %rcx, %xmm0
-; SSE2-NEXT: addss %xmm0, %xmm0
-; SSE2-NEXT: .LBB25_3:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: andl $1, %ecx
-; SSE2-NEXT: testq %rax, %rax
-; SSE2-NEXT: js .LBB25_4
-; SSE2-NEXT: # BB#5:
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: retq
-; SSE2-NEXT: .LBB25_4:
-; SSE2-NEXT: shrq %rax
-; SSE2-NEXT: orq %rax, %rcx
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ssq %rcx, %xmm1
-; SSE2-NEXT: addss %xmm1, %xmm1
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: uitofp_4vf32_i64:
+;
+; Unsigned Integer to Float
+;
+
+define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
+; SSE-LABEL: uitofp_2i64_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: movd %xmm1, %rax
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: andl $1, %ecx
+; SSE-NEXT: testq %rax, %rax
+; SSE-NEXT: js .LBB38_1
+; SSE-NEXT: # BB#2:
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: jmp .LBB38_3
+; SSE-NEXT: .LBB38_1:
+; SSE-NEXT: shrq %rax
+; SSE-NEXT: orq %rax, %rcx
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rcx, %xmm0
+; SSE-NEXT: addss %xmm0, %xmm0
+; SSE-NEXT: .LBB38_3:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: movd %xmm1, %rax
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: andl $1, %ecx
+; SSE-NEXT: testq %rax, %rax
+; SSE-NEXT: js .LBB38_4
+; SSE-NEXT: # BB#5:
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+; SSE-NEXT: .LBB38_4:
+; SSE-NEXT: shrq %rax
+; SSE-NEXT: orq %rax, %rcx
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rcx, %xmm1
+; SSE-NEXT: addss %xmm1, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uitofp_2i64_to_4f32:
; AVX: # BB#0:
; AVX-NEXT: vpextrq $1, %xmm0, %rax
; AVX-NEXT: movl %eax, %ecx
; AVX-NEXT: andl $1, %ecx
; AVX-NEXT: testq %rax, %rax
-; AVX-NEXT: js .LBB25_1
+; AVX-NEXT: js .LBB38_1
; AVX-NEXT: # BB#2:
; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
-; AVX-NEXT: jmp .LBB25_3
-; AVX-NEXT: .LBB25_1:
+; AVX-NEXT: jmp .LBB38_3
+; AVX-NEXT: .LBB38_1:
; AVX-NEXT: shrq %rax
; AVX-NEXT: orq %rax, %rcx
; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1
; AVX-NEXT: vaddss %xmm1, %xmm1, %xmm1
-; AVX-NEXT: .LBB25_3:
+; AVX-NEXT: .LBB38_3:
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: movl %eax, %ecx
; AVX-NEXT: andl $1, %ecx
; AVX-NEXT: testq %rax, %rax
-; AVX-NEXT: js .LBB25_4
+; AVX-NEXT: js .LBB38_4
; AVX-NEXT: # BB#5:
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
-; AVX-NEXT: jmp .LBB25_6
-; AVX-NEXT: .LBB25_4:
+; AVX-NEXT: jmp .LBB38_6
+; AVX-NEXT: .LBB38_4:
; AVX-NEXT: shrq %rax
; AVX-NEXT: orq %rax, %rcx
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; AVX-NEXT: .LBB25_6:
+; AVX-NEXT: .LBB38_6:
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: testq %rax, %rax
-; AVX-NEXT: js .LBB25_8
+; AVX-NEXT: js .LBB38_8
; AVX-NEXT: # BB#7:
; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
-; AVX-NEXT: .LBB25_8:
+; AVX-NEXT: .LBB38_8:
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX-NEXT: retq
@@ -857,15 +1221,147 @@ define <4 x float> @uitofp_4vf32_i64(<2 x i64> %a) {
ret <4 x float> %ext
}
-define <4 x float> @uitofp_4vf32_i16(<8 x i16> %a) {
-; SSE2-LABEL: uitofp_4vf32_i16:
-; SSE2: # BB#0:
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: uitofp_4vf32_i16:
+define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
+; SSE-LABEL: uitofp_4i64_to_4f32_undef:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: testq %rax, %rax
+; SSE-NEXT: xorps %xmm2, %xmm2
+; SSE-NEXT: js .LBB39_2
+; SSE-NEXT: # BB#1:
+; SSE-NEXT: xorps %xmm2, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: .LBB39_2:
+; SSE-NEXT: movd %xmm1, %rax
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: andl $1, %ecx
+; SSE-NEXT: testq %rax, %rax
+; SSE-NEXT: js .LBB39_3
+; SSE-NEXT: # BB#4:
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: jmp .LBB39_5
+; SSE-NEXT: .LBB39_3:
+; SSE-NEXT: shrq %rax
+; SSE-NEXT: orq %rax, %rcx
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rcx, %xmm0
+; SSE-NEXT: addss %xmm0, %xmm0
+; SSE-NEXT: .LBB39_5:
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: movd %xmm1, %rax
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: andl $1, %ecx
+; SSE-NEXT: testq %rax, %rax
+; SSE-NEXT: js .LBB39_6
+; SSE-NEXT: # BB#7:
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: jmp .LBB39_8
+; SSE-NEXT: .LBB39_6:
+; SSE-NEXT: shrq %rax
+; SSE-NEXT: orq %rax, %rcx
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rcx, %xmm1
+; SSE-NEXT: addss %xmm1, %xmm1
+; SSE-NEXT: .LBB39_8:
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uitofp_4i64_to_4f32_undef:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: andl $1, %ecx
+; AVX-NEXT: testq %rax, %rax
+; AVX-NEXT: js .LBB39_1
+; AVX-NEXT: # BB#2:
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: jmp .LBB39_3
+; AVX-NEXT: .LBB39_1:
+; AVX-NEXT: shrq %rax
+; AVX-NEXT: orq %rax, %rcx
+; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1
+; AVX-NEXT: vaddss %xmm1, %xmm1, %xmm1
+; AVX-NEXT: .LBB39_3:
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: andl $1, %ecx
+; AVX-NEXT: testq %rax, %rax
+; AVX-NEXT: js .LBB39_4
+; AVX-NEXT: # BB#5:
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
+; AVX-NEXT: jmp .LBB39_6
+; AVX-NEXT: .LBB39_4:
+; AVX-NEXT: shrq %rax
+; AVX-NEXT: orq %rax, %rcx
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: .LBB39_6:
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: testq %rax, %rax
+; AVX-NEXT: js .LBB39_8
+; AVX-NEXT: # BB#7:
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: .LBB39_8:
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; AVX-NEXT: retq
+ %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ %cvt = uitofp <4 x i64> %ext to <4 x float>
+ ret <4 x float> %cvt
+}
+
+define <4 x float> @uitofp_4i32_to_4f32(<4 x i32> %a) {
+; SSE-LABEL: uitofp_4i32_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; SSE-NEXT: pand %xmm0, %xmm1
+; SSE-NEXT: por {{.*}}(%rip), %xmm1
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: por {{.*}}(%rip), %xmm0
+; SSE-NEXT: addps {{.*}}(%rip), %xmm0
+; SSE-NEXT: addps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_4i32_to_4f32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
+; AVX1-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: uitofp_4i32_to_4f32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
+; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
+; AVX2-NEXT: vaddps %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+ %cvt = uitofp <4 x i32> %a to <4 x float>
+ ret <4 x float> %cvt
+}
+
+define <4 x float> @uitofp_4i16_to_4f32(<8 x i16> %a) {
+; SSE-LABEL: uitofp_4i16_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uitofp_4i16_to_4f32:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
@@ -875,16 +1371,45 @@ define <4 x float> @uitofp_4vf32_i16(<8 x i16> %a) {
ret <4 x float> %cvt
}
-define <4 x float> @uitofp_4vf32_i8(<16 x i8> %a) {
-; SSE2-LABEL: uitofp_4vf32_i8:
-; SSE2: # BB#0:
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX-LABEL: uitofp_4vf32_i8:
+define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) {
+; SSE-LABEL: uitofp_8i16_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_8i16_to_4f32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: uitofp_8i16_to_4f32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+ %cvt = uitofp <8 x i16> %a to <8 x float>
+ %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %shuf
+}
+
+define <4 x float> @uitofp_4i8_to_4f32(<16 x i8> %a) {
+; SSE-LABEL: uitofp_4i8_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uitofp_4i8_to_4f32:
; AVX: # BB#0:
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
@@ -894,186 +1419,168 @@ define <4 x float> @uitofp_4vf32_i8(<16 x i8> %a) {
ret <4 x float> %cvt
}
-define <8 x float> @uitofp_8vf32(<8 x i32> %a) {
-; SSE2-LABEL: uitofp_8vf32:
-; SSE2: # BB#0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
-; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: pand %xmm2, %xmm3
-; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
-; SSE2-NEXT: por %xmm4, %xmm3
-; SSE2-NEXT: psrld $16, %xmm0
-; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
-; SSE2-NEXT: por %xmm5, %xmm0
-; SSE2-NEXT: movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
-; SSE2-NEXT: addps %xmm6, %xmm0
-; SSE2-NEXT: addps %xmm3, %xmm0
-; SSE2-NEXT: pand %xmm1, %xmm2
-; SSE2-NEXT: por %xmm4, %xmm2
-; SSE2-NEXT: psrld $16, %xmm1
-; SSE2-NEXT: por %xmm5, %xmm1
-; SSE2-NEXT: addps %xmm6, %xmm1
-; SSE2-NEXT: addps %xmm2, %xmm1
-; SSE2-NEXT: retq
-;
-; AVX1-LABEL: uitofp_8vf32:
+define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
+; SSE-LABEL: uitofp_16i8_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_16i8_to_4f32:
; AVX1: # BB#0:
-; AVX1-NEXT: vandps .LCPI28_0(%rip), %ymm0, %ymm1
-; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
-; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX1-NEXT: vmulps .LCPI28_1(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
-; AVX2-LABEL: uitofp_8vf32:
+; AVX2-LABEL: uitofp_16i8_to_4f32:
; AVX2: # BB#0:
-; AVX2-NEXT: vpbroadcastd .LCPI28_0(%rip), %ymm1
-; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
-; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
-; AVX2-NEXT: vpbroadcastd .LCPI28_1(%rip), %ymm2
-; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
-; AVX2-NEXT: vbroadcastss .LCPI28_2(%rip), %ymm2
-; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
- %cvt = uitofp <8 x i32> %a to <8 x float>
- ret <8 x float> %cvt
+ %cvt = uitofp <16 x i8> %a to <16 x float>
+ %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %shuf
}
-define <4 x float> @uitofp_4vf32_4i64(<4 x i64> %a) {
-; SSE2-LABEL: uitofp_4vf32_4i64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: andl $1, %ecx
-; SSE2-NEXT: testq %rax, %rax
-; SSE2-NEXT: js .LBB29_1
-; SSE2-NEXT: # BB#2:
-; SSE2-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE2-NEXT: jmp .LBB29_3
-; SSE2-NEXT: .LBB29_1:
-; SSE2-NEXT: shrq %rax
-; SSE2-NEXT: orq %rax, %rcx
-; SSE2-NEXT: cvtsi2ssq %rcx, %xmm3
-; SSE2-NEXT: addss %xmm3, %xmm3
-; SSE2-NEXT: .LBB29_3:
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: andl $1, %ecx
-; SSE2-NEXT: testq %rax, %rax
-; SSE2-NEXT: js .LBB29_4
-; SSE2-NEXT: # BB#5:
-; SSE2-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE2-NEXT: jmp .LBB29_6
-; SSE2-NEXT: .LBB29_4:
-; SSE2-NEXT: shrq %rax
-; SSE2-NEXT: orq %rax, %rcx
-; SSE2-NEXT: cvtsi2ssq %rcx, %xmm2
-; SSE2-NEXT: addss %xmm2, %xmm2
-; SSE2-NEXT: .LBB29_6:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: andl $1, %ecx
-; SSE2-NEXT: testq %rax, %rax
-; SSE2-NEXT: js .LBB29_7
-; SSE2-NEXT: # BB#8:
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE2-NEXT: jmp .LBB29_9
-; SSE2-NEXT: .LBB29_7:
-; SSE2-NEXT: shrq %rax
-; SSE2-NEXT: orq %rax, %rcx
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ssq %rcx, %xmm1
-; SSE2-NEXT: addss %xmm1, %xmm1
-; SSE2-NEXT: .LBB29_9:
-; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: andl $1, %ecx
-; SSE2-NEXT: testq %rax, %rax
-; SSE2-NEXT: js .LBB29_10
-; SSE2-NEXT: # BB#11:
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE2-NEXT: jmp .LBB29_12
-; SSE2-NEXT: .LBB29_10:
-; SSE2-NEXT: shrq %rax
-; SSE2-NEXT: orq %rax, %rcx
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ssq %rcx, %xmm0
-; SSE2-NEXT: addss %xmm0, %xmm0
-; SSE2-NEXT: .LBB29_12:
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX1-LABEL: uitofp_4vf32_4i64:
+define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
+; SSE-LABEL: uitofp_4i64_to_4f32:
+; SSE: # BB#0:
+; SSE-NEXT: movd %xmm1, %rax
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: andl $1, %ecx
+; SSE-NEXT: testq %rax, %rax
+; SSE-NEXT: js .LBB45_1
+; SSE-NEXT: # BB#2:
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: jmp .LBB45_3
+; SSE-NEXT: .LBB45_1:
+; SSE-NEXT: shrq %rax
+; SSE-NEXT: orq %rax, %rcx
+; SSE-NEXT: cvtsi2ssq %rcx, %xmm3
+; SSE-NEXT: addss %xmm3, %xmm3
+; SSE-NEXT: .LBB45_3:
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: andl $1, %ecx
+; SSE-NEXT: testq %rax, %rax
+; SSE-NEXT: js .LBB45_4
+; SSE-NEXT: # BB#5:
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: jmp .LBB45_6
+; SSE-NEXT: .LBB45_4:
+; SSE-NEXT: shrq %rax
+; SSE-NEXT: orq %rax, %rcx
+; SSE-NEXT: cvtsi2ssq %rcx, %xmm2
+; SSE-NEXT: addss %xmm2, %xmm2
+; SSE-NEXT: .LBB45_6:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: movd %xmm1, %rax
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: andl $1, %ecx
+; SSE-NEXT: testq %rax, %rax
+; SSE-NEXT: js .LBB45_7
+; SSE-NEXT: # BB#8:
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: jmp .LBB45_9
+; SSE-NEXT: .LBB45_7:
+; SSE-NEXT: shrq %rax
+; SSE-NEXT: orq %rax, %rcx
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rcx, %xmm1
+; SSE-NEXT: addss %xmm1, %xmm1
+; SSE-NEXT: .LBB45_9:
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: andl $1, %ecx
+; SSE-NEXT: testq %rax, %rax
+; SSE-NEXT: js .LBB45_10
+; SSE-NEXT: # BB#11:
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: jmp .LBB45_12
+; SSE-NEXT: .LBB45_10:
+; SSE-NEXT: shrq %rax
+; SSE-NEXT: orq %rax, %rcx
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rcx, %xmm0
+; SSE-NEXT: addss %xmm0, %xmm0
+; SSE-NEXT: .LBB45_12:
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_4i64_to_4f32:
; AVX1: # BB#0:
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: andl $1, %ecx
; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB29_1
+; AVX1-NEXT: js .LBB45_1
; AVX1-NEXT: # BB#2:
; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
-; AVX1-NEXT: jmp .LBB29_3
-; AVX1-NEXT: .LBB29_1:
+; AVX1-NEXT: jmp .LBB45_3
+; AVX1-NEXT: .LBB45_1:
; AVX1-NEXT: shrq %rax
; AVX1-NEXT: orq %rax, %rcx
; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1
; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: .LBB29_3:
+; AVX1-NEXT: .LBB45_3:
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: andl $1, %ecx
; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB29_4
+; AVX1-NEXT: js .LBB45_4
; AVX1-NEXT: # BB#5:
; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
-; AVX1-NEXT: jmp .LBB29_6
-; AVX1-NEXT: .LBB29_4:
+; AVX1-NEXT: jmp .LBB45_6
+; AVX1-NEXT: .LBB45_4:
; AVX1-NEXT: shrq %rax
; AVX1-NEXT: orq %rax, %rcx
; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2
; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: .LBB29_6:
+; AVX1-NEXT: .LBB45_6:
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: andl $1, %ecx
; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB29_7
+; AVX1-NEXT: js .LBB45_7
; AVX1-NEXT: # BB#8:
; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
-; AVX1-NEXT: jmp .LBB29_9
-; AVX1-NEXT: .LBB29_7:
+; AVX1-NEXT: jmp .LBB45_9
+; AVX1-NEXT: .LBB45_7:
; AVX1-NEXT: shrq %rax
; AVX1-NEXT: orq %rax, %rcx
; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2
; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: .LBB29_9:
+; AVX1-NEXT: .LBB45_9:
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: andl $1, %ecx
; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB29_10
+; AVX1-NEXT: js .LBB45_10
; AVX1-NEXT: # BB#11:
; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB29_10:
+; AVX1-NEXT: .LBB45_10:
; AVX1-NEXT: shrq %rax
; AVX1-NEXT: orq %rax, %rcx
; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
@@ -1082,65 +1589,65 @@ define <4 x float> @uitofp_4vf32_4i64(<4 x i64> %a) {
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
-; AVX2-LABEL: uitofp_4vf32_4i64:
+; AVX2-LABEL: uitofp_4i64_to_4f32:
; AVX2: # BB#0:
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: testq %rax, %rax
-; AVX2-NEXT: js .LBB29_1
+; AVX2-NEXT: js .LBB45_1
; AVX2-NEXT: # BB#2:
; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
-; AVX2-NEXT: jmp .LBB29_3
-; AVX2-NEXT: .LBB29_1:
+; AVX2-NEXT: jmp .LBB45_3
+; AVX2-NEXT: .LBB45_1:
; AVX2-NEXT: shrq %rax
; AVX2-NEXT: orq %rax, %rcx
; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1
; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: .LBB29_3:
+; AVX2-NEXT: .LBB45_3:
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: testq %rax, %rax
-; AVX2-NEXT: js .LBB29_4
+; AVX2-NEXT: js .LBB45_4
; AVX2-NEXT: # BB#5:
; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
-; AVX2-NEXT: jmp .LBB29_6
-; AVX2-NEXT: .LBB29_4:
+; AVX2-NEXT: jmp .LBB45_6
+; AVX2-NEXT: .LBB45_4:
; AVX2-NEXT: shrq %rax
; AVX2-NEXT: orq %rax, %rcx
; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2
; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: .LBB29_6:
+; AVX2-NEXT: .LBB45_6:
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: testq %rax, %rax
-; AVX2-NEXT: js .LBB29_7
+; AVX2-NEXT: js .LBB45_7
; AVX2-NEXT: # BB#8:
; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
-; AVX2-NEXT: jmp .LBB29_9
-; AVX2-NEXT: .LBB29_7:
+; AVX2-NEXT: jmp .LBB45_9
+; AVX2-NEXT: .LBB45_7:
; AVX2-NEXT: shrq %rax
; AVX2-NEXT: orq %rax, %rcx
; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2
; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: .LBB29_9:
+; AVX2-NEXT: .LBB45_9:
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: testq %rax, %rax
-; AVX2-NEXT: js .LBB29_10
+; AVX2-NEXT: js .LBB45_10
; AVX2-NEXT: # BB#11:
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
-; AVX2-NEXT: .LBB29_10:
+; AVX2-NEXT: .LBB45_10:
; AVX2-NEXT: shrq %rax
; AVX2-NEXT: orq %rax, %rcx
; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
@@ -1152,20 +1659,69 @@ define <4 x float> @uitofp_4vf32_4i64(<4 x i64> %a) {
ret <4 x float> %cvt
}
-define <8 x float> @uitofp_8vf32_i16(<8 x i16> %a) {
-; SSE2-LABEL: uitofp_8vf32_i16:
-; SSE2: # BB#0:
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pand .LCPI30_0(%rip), %xmm0
-; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX1-LABEL: uitofp_8vf32_i16:
+define <8 x float> @uitofp_8i32_to_8f32(<8 x i32> %a) {
+; SSE-LABEL: uitofp_8i32_to_8f32:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
+; SSE-NEXT: movdqa %xmm0, %xmm3
+; SSE-NEXT: pand %xmm2, %xmm3
+; SSE-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
+; SSE-NEXT: por %xmm4, %xmm3
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
+; SSE-NEXT: por %xmm5, %xmm0
+; SSE-NEXT: movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
+; SSE-NEXT: addps %xmm6, %xmm0
+; SSE-NEXT: addps %xmm3, %xmm0
+; SSE-NEXT: pand %xmm1, %xmm2
+; SSE-NEXT: por %xmm4, %xmm2
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: por %xmm5, %xmm1
+; SSE-NEXT: addps %xmm6, %xmm1
+; SSE-NEXT: addps %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_8i32_to_8f32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm1
+; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: uitofp_8i32_to_8f32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
+; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
+; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+ %cvt = uitofp <8 x i32> %a to <8 x float>
+ ret <8 x float> %cvt
+}
+
+define <8 x float> @uitofp_8i16_to_8f32(<8 x i16> %a) {
+; SSE-LABEL: uitofp_8i16_to_8f32:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE-NEXT: cvtdq2ps %xmm2, %xmm2
+; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_8i16_to_8f32:
; AVX1: # BB#0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
@@ -1174,7 +1730,7 @@ define <8 x float> @uitofp_8vf32_i16(<8 x i16> %a) {
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: uitofp_8vf32_i16:
+; AVX2-LABEL: uitofp_8i16_to_8f32:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
@@ -1183,36 +1739,35 @@ define <8 x float> @uitofp_8vf32_i16(<8 x i16> %a) {
ret <8 x float> %cvt
}
-define <8 x float> @uitofp_8vf32_i8(<16 x i8> %a) {
-; SSE2-LABEL: uitofp_8vf32_i8:
-; SSE2: # BB#0:
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pand .LCPI31_0(%rip), %xmm0
-; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; AVX1-LABEL: uitofp_8vf32_i8:
+define <8 x float> @uitofp_8i8_to_8f32(<16 x i8> %a) {
+; SSE-LABEL: uitofp_8i8_to_8f32:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE-NEXT: cvtdq2ps %xmm2, %xmm2
+; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_8i8_to_8f32:
; AVX1: # BB#0:
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vandps .LCPI31_0(%rip), %ymm0, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: uitofp_8vf32_i8:
+; AVX2-LABEL: uitofp_8i8_to_8f32:
; AVX2: # BB#0:
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-NEXT: vpbroadcastd .LCPI31_0(%rip), %ymm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: retq
%shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -1220,28 +1775,61 @@ define <8 x float> @uitofp_8vf32_i8(<16 x i8> %a) {
ret <8 x float> %cvt
}
+define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) {
+; SSE-LABEL: uitofp_16i8_to_8f32:
+; SSE: # BB#0:
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE-NEXT: cvtdq2ps %xmm2, %xmm2
+; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm1
+; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: uitofp_16i8_to_8f32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: uitofp_16i8_to_8f32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %cvt = uitofp <16 x i8> %a to <16 x float>
+ %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x float> %shuf
+}
+
;
; Aggregates
;
%Arguments = type <{ <8 x i8>, <8 x i16>, <8 x float>* }>
-define void @aggregate_sitofp_8f32_i16(%Arguments* nocapture readonly %a0) {
-; SSE2-LABEL: aggregate_sitofp_8f32_i16:
-; SSE2: # BB#0:
-; SSE2-NEXT: movq 24(%rdi), %rax
-; SSE2-NEXT: movdqu 8(%rdi), %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: psrad $16, %xmm0
-; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT: movaps %xmm0, (%rax)
-; SSE2-NEXT: movaps %xmm1, 16(%rax)
-; SSE2-NEXT: retq
-;
-; AVX1-LABEL: aggregate_sitofp_8f32_i16:
+define void @aggregate_sitofp_8i16_to_8f32(%Arguments* nocapture readonly %a0) {
+; SSE-LABEL: aggregate_sitofp_8i16_to_8f32:
+; SSE: # BB#0:
+; SSE-NEXT: movq 24(%rdi), %rax
+; SSE-NEXT: movdqu 8(%rdi), %xmm0
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT: psrad $16, %xmm1
+; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
+; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE-NEXT: psrad $16, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: movaps %xmm0, 16(%rax)
+; SSE-NEXT: movaps %xmm1, (%rax)
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: aggregate_sitofp_8i16_to_8f32:
; AVX1: # BB#0:
; AVX1-NEXT: movq 24(%rdi), %rax
; AVX1-NEXT: vmovdqu 8(%rdi), %xmm0
@@ -1254,7 +1842,7 @@ define void @aggregate_sitofp_8f32_i16(%Arguments* nocapture readonly %a0) {
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
-; AVX2-LABEL: aggregate_sitofp_8f32_i16:
+; AVX2-LABEL: aggregate_sitofp_8i16_to_8f32:
; AVX2: # BB#0:
; AVX2-NEXT: movq 24(%rdi), %rax
; AVX2-NEXT: vpmovsxwd 8(%rdi), %ymm0
diff --git a/test/CodeGen/X86/vec_minmax_sint.ll b/test/CodeGen/X86/vec_minmax_sint.ll
new file mode 100644
index 000000000000..419eb2bed743
--- /dev/null
+++ b/test/CodeGen/X86/vec_minmax_sint.ll
@@ -0,0 +1,2090 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Signed Maximum (GT)
+;
+
+define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: max_gt_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_gt_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pxor %xmm0, %xmm3
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm5, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; SSE41-NEXT: por %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm2, %xmm1
+; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_gt_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT: blendvpd %xmm2, %xmm1
+; SSE42-NEXT: movapd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: max_gt_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp sgt <2 x i64> %a, %b
+ %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %2
+}
+
+define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: max_gt_v4i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: movdqa %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm0, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_gt_v4i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm8
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT: movdqa %xmm3, %xmm5
+; SSE41-NEXT: pxor %xmm0, %xmm5
+; SSE41-NEXT: movdqa %xmm1, %xmm6
+; SSE41-NEXT: pxor %xmm0, %xmm6
+; SSE41-NEXT: movdqa %xmm6, %xmm7
+; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT: pand %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT: por %xmm6, %xmm5
+; SSE41-NEXT: movdqa %xmm2, %xmm4
+; SSE41-NEXT: pxor %xmm0, %xmm4
+; SSE41-NEXT: pxor %xmm8, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm6
+; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm7, %xmm4
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
+; SSE41-NEXT: por %xmm4, %xmm0
+; SSE41-NEXT: blendvpd %xmm8, %xmm2
+; SSE41-NEXT: movdqa %xmm5, %xmm0
+; SSE41-NEXT: blendvpd %xmm1, %xmm3
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: movapd %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_gt_v4i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm4
+; SSE42-NEXT: movdqa %xmm1, %xmm5
+; SSE42-NEXT: pcmpgtq %xmm3, %xmm5
+; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT: blendvpd %xmm4, %xmm2
+; SSE42-NEXT: movdqa %xmm5, %xmm0
+; SSE42-NEXT: blendvpd %xmm1, %xmm3
+; SSE42-NEXT: movapd %xmm2, %xmm0
+; SSE42-NEXT: movapd %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: max_gt_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_gt_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_gt_v4i64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp sgt <4 x i64> %a, %b
+ %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %2
+}
+
+define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: max_gt_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_gt_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxsd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_gt_v4i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxsd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: max_gt_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp sgt <4 x i32> %a, %b
+ %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: max_gt_v8i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm0, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_gt_v8i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxsd %xmm2, %xmm0
+; SSE41-NEXT: pmaxsd %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_gt_v8i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxsd %xmm2, %xmm0
+; SSE42-NEXT: pmaxsd %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: max_gt_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_gt_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_gt_v8i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp sgt <8 x i32> %a, %b
+ %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %2
+}
+
+define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: max_gt_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: pmaxsw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp sgt <8 x i16> %a, %b
+ %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %2
+}
+
+define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: max_gt_v16i16:
+; SSE: # BB#0:
+; SSE-NEXT: pmaxsw %xmm2, %xmm0
+; SSE-NEXT: pmaxsw %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: max_gt_v16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_gt_v16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_gt_v16i16:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp sgt <16 x i16> %a, %b
+ %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %2
+}
+
+define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: max_gt_v16i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_gt_v16i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxsb %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_gt_v16i8:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxsb %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: max_gt_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp sgt <16 x i8> %a, %b
+ %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %2
+}
+
+define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: max_gt_v32i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pcmpgtb %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm0, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_gt_v32i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxsb %xmm2, %xmm0
+; SSE41-NEXT: pmaxsb %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_gt_v32i8:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxsb %xmm2, %xmm0
+; SSE42-NEXT: pmaxsb %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: max_gt_v32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_gt_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_gt_v32i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp sgt <32 x i8> %a, %b
+ %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %2
+}
+
+;
+; Signed Maximum (GE)
+;
+
+define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: max_ge_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_ge_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT: movdqa %xmm2, %xmm3
+; SSE41-NEXT: pxor %xmm0, %xmm3
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm5, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE41-NEXT: por %xmm0, %xmm3
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm2, %xmm1
+; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_ge_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: movdqa %xmm1, %xmm3
+; SSE42-NEXT: pcmpgtq %xmm2, %xmm3
+; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE42-NEXT: pxor %xmm3, %xmm0
+; SSE42-NEXT: blendvpd %xmm2, %xmm1
+; SSE42-NEXT: movapd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: max_ge_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp sge <2 x i64> %a, %b
+ %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %2
+}
+
+define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: max_ge_v4i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm8
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm8, %xmm9
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm0, %xmm6
+; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: pxor %xmm2, %xmm7
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm10, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm8
+; SSE2-NEXT: pandn %xmm3, %xmm9
+; SSE2-NEXT: por %xmm8, %xmm9
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_ge_v4i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm8
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT: movdqa %xmm1, %xmm5
+; SSE41-NEXT: pxor %xmm0, %xmm5
+; SSE41-NEXT: movdqa %xmm3, %xmm6
+; SSE41-NEXT: pxor %xmm0, %xmm6
+; SSE41-NEXT: movdqa %xmm6, %xmm7
+; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT: pand %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT: por %xmm6, %xmm5
+; SSE41-NEXT: pcmpeqd %xmm9, %xmm9
+; SSE41-NEXT: pxor %xmm9, %xmm5
+; SSE41-NEXT: movdqa %xmm8, %xmm6
+; SSE41-NEXT: pxor %xmm0, %xmm6
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm7
+; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
+; SSE41-NEXT: por %xmm6, %xmm0
+; SSE41-NEXT: pxor %xmm9, %xmm0
+; SSE41-NEXT: blendvpd %xmm8, %xmm2
+; SSE41-NEXT: movdqa %xmm5, %xmm0
+; SSE41-NEXT: blendvpd %xmm1, %xmm3
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: movapd %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_ge_v4i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm4
+; SSE42-NEXT: movdqa %xmm3, %xmm5
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm5
+; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE42-NEXT: pxor %xmm0, %xmm5
+; SSE42-NEXT: movdqa %xmm2, %xmm6
+; SSE42-NEXT: pcmpgtq %xmm4, %xmm6
+; SSE42-NEXT: pxor %xmm6, %xmm0
+; SSE42-NEXT: blendvpd %xmm4, %xmm2
+; SSE42-NEXT: movdqa %xmm5, %xmm0
+; SSE42-NEXT: blendvpd %xmm1, %xmm3
+; SSE42-NEXT: movapd %xmm2, %xmm0
+; SSE42-NEXT: movapd %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: max_ge_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
+; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_ge_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_ge_v4i64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp sge <4 x i64> %a, %b
+ %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %2
+}
+
+define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: max_ge_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_ge_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxsd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_ge_v4i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxsd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: max_ge_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp sge <4 x i32> %a, %b
+ %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: max_ge_v8i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm6
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm2, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm7
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm7
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm7, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm6
+; SSE2-NEXT: pandn %xmm3, %xmm5
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_ge_v8i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxsd %xmm2, %xmm0
+; SSE41-NEXT: pmaxsd %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_ge_v8i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxsd %xmm2, %xmm0
+; SSE42-NEXT: pmaxsd %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: max_ge_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_ge_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_ge_v8i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp sge <8 x i32> %a, %b
+ %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %2
+}
+
+define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: max_ge_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: pmaxsw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp sge <8 x i16> %a, %b
+ %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %2
+}
+
+define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: max_ge_v16i16:
+; SSE: # BB#0:
+; SSE-NEXT: pmaxsw %xmm2, %xmm0
+; SSE-NEXT: pmaxsw %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: max_ge_v16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_ge_v16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_ge_v16i16:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp sge <16 x i16> %a, %b
+ %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %2
+}
+
+define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: max_ge_v16i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_ge_v16i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxsb %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_ge_v16i8:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxsb %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: max_ge_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp sge <16 x i8> %a, %b
+ %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %2
+}
+
+define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: max_ge_v32i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm6
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm2, %xmm7
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm7
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm7
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm7, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm6
+; SSE2-NEXT: pandn %xmm3, %xmm5
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_ge_v32i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxsb %xmm2, %xmm0
+; SSE41-NEXT: pmaxsb %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_ge_v32i8:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxsb %xmm2, %xmm0
+; SSE42-NEXT: pmaxsb %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: max_ge_v32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_ge_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_ge_v32i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp sge <32 x i8> %a, %b
+ %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %2
+}
+
+;
+; Signed Minimum (LT)
+;
+
+define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: min_lt_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_lt_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT: movdqa %xmm2, %xmm3
+; SSE41-NEXT: pxor %xmm0, %xmm3
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm5, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; SSE41-NEXT: por %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm2, %xmm1
+; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_lt_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: movdqa %xmm1, %xmm0
+; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT: blendvpd %xmm2, %xmm1
+; SSE42-NEXT: movapd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: min_lt_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp slt <2 x i64> %a, %b
+ %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %2
+}
+
+define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: min_lt_v4i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: movdqa %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm2, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_lt_v4i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm8
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT: movdqa %xmm1, %xmm5
+; SSE41-NEXT: pxor %xmm0, %xmm5
+; SSE41-NEXT: movdqa %xmm3, %xmm6
+; SSE41-NEXT: pxor %xmm0, %xmm6
+; SSE41-NEXT: movdqa %xmm6, %xmm7
+; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT: pand %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT: por %xmm6, %xmm5
+; SSE41-NEXT: movdqa %xmm8, %xmm4
+; SSE41-NEXT: pxor %xmm0, %xmm4
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm6
+; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm7, %xmm4
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
+; SSE41-NEXT: por %xmm4, %xmm0
+; SSE41-NEXT: blendvpd %xmm8, %xmm2
+; SSE41-NEXT: movdqa %xmm5, %xmm0
+; SSE41-NEXT: blendvpd %xmm1, %xmm3
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: movapd %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_lt_v4i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm4
+; SSE42-NEXT: movdqa %xmm3, %xmm5
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm5
+; SSE42-NEXT: movdqa %xmm2, %xmm0
+; SSE42-NEXT: pcmpgtq %xmm4, %xmm0
+; SSE42-NEXT: blendvpd %xmm4, %xmm2
+; SSE42-NEXT: movdqa %xmm5, %xmm0
+; SSE42-NEXT: blendvpd %xmm1, %xmm3
+; SSE42-NEXT: movapd %xmm2, %xmm0
+; SSE42-NEXT: movapd %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: min_lt_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_lt_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_lt_v4i64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp slt <4 x i64> %a, %b
+ %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %2
+}
+
+define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: min_lt_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_lt_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminsd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_lt_v4i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminsd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: min_lt_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp slt <4 x i32> %a, %b
+ %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: min_lt_v8i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_lt_v8i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminsd %xmm2, %xmm0
+; SSE41-NEXT: pminsd %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_lt_v8i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminsd %xmm2, %xmm0
+; SSE42-NEXT: pminsd %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: min_lt_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_lt_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_lt_v8i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp slt <8 x i32> %a, %b
+ %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %2
+}
+
+define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: min_lt_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: pminsw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp slt <8 x i16> %a, %b
+ %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %2
+}
+
+define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: min_lt_v16i16:
+; SSE: # BB#0:
+; SSE-NEXT: pminsw %xmm2, %xmm0
+; SSE-NEXT: pminsw %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: min_lt_v16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_lt_v16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_lt_v16i16:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp slt <16 x i16> %a, %b
+ %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %2
+}
+
+define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: min_lt_v16i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_lt_v16i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminsb %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_lt_v16i8:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminsb %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: min_lt_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp slt <16 x i8> %a, %b
+ %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %2
+}
+
+define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: min_lt_v32i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_lt_v32i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminsb %xmm2, %xmm0
+; SSE41-NEXT: pminsb %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_lt_v32i8:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminsb %xmm2, %xmm0
+; SSE42-NEXT: pminsb %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: min_lt_v32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_lt_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_lt_v32i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp slt <32 x i8> %a, %b
+ %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %2
+}
+
+;
+; Signed Minimum (LE)
+;
+
+define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: min_le_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_le_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pxor %xmm0, %xmm3
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm5, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE41-NEXT: por %xmm0, %xmm3
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm2, %xmm1
+; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_le_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE42-NEXT: pxor %xmm3, %xmm0
+; SSE42-NEXT: blendvpd %xmm2, %xmm1
+; SSE42-NEXT: movapd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: min_le_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp sle <2 x i64> %a, %b
+ %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %2
+}
+
+define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: min_le_v4i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm8
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm8, %xmm9
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm7
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm10, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm8
+; SSE2-NEXT: pandn %xmm3, %xmm9
+; SSE2-NEXT: por %xmm8, %xmm9
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_le_v4i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm8
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT: movdqa %xmm3, %xmm5
+; SSE41-NEXT: pxor %xmm0, %xmm5
+; SSE41-NEXT: movdqa %xmm1, %xmm6
+; SSE41-NEXT: pxor %xmm0, %xmm6
+; SSE41-NEXT: movdqa %xmm6, %xmm7
+; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT: pand %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT: por %xmm6, %xmm5
+; SSE41-NEXT: pcmpeqd %xmm9, %xmm9
+; SSE41-NEXT: pxor %xmm9, %xmm5
+; SSE41-NEXT: movdqa %xmm2, %xmm6
+; SSE41-NEXT: pxor %xmm0, %xmm6
+; SSE41-NEXT: pxor %xmm8, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm7
+; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
+; SSE41-NEXT: por %xmm6, %xmm0
+; SSE41-NEXT: pxor %xmm9, %xmm0
+; SSE41-NEXT: blendvpd %xmm8, %xmm2
+; SSE41-NEXT: movdqa %xmm5, %xmm0
+; SSE41-NEXT: blendvpd %xmm1, %xmm3
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: movapd %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_le_v4i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm4
+; SSE42-NEXT: movdqa %xmm1, %xmm5
+; SSE42-NEXT: pcmpgtq %xmm3, %xmm5
+; SSE42-NEXT: pcmpeqd %xmm6, %xmm6
+; SSE42-NEXT: pxor %xmm6, %xmm5
+; SSE42-NEXT: pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT: pxor %xmm6, %xmm0
+; SSE42-NEXT: blendvpd %xmm4, %xmm2
+; SSE42-NEXT: movdqa %xmm5, %xmm0
+; SSE42-NEXT: blendvpd %xmm1, %xmm3
+; SSE42-NEXT: movapd %xmm2, %xmm0
+; SSE42-NEXT: movapd %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: min_le_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4
+; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_le_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_le_v4i64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp sle <4 x i64> %a, %b
+ %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %2
+}
+
+define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: min_le_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_le_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminsd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_le_v4i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminsd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: min_le_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp sle <4 x i32> %a, %b
+ %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: min_le_v8i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
+; SSE2-NEXT: pcmpeqd %xmm7, %xmm7
+; SSE2-NEXT: movdqa %xmm6, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm7
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm7
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm6
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_le_v8i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminsd %xmm2, %xmm0
+; SSE41-NEXT: pminsd %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_le_v8i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminsd %xmm2, %xmm0
+; SSE42-NEXT: pminsd %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: min_le_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_le_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_le_v8i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp sle <8 x i32> %a, %b
+ %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %2
+}
+
+define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: min_le_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: pminsw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp sle <8 x i16> %a, %b
+ %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %2
+}
+
+define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: min_le_v16i16:
+; SSE: # BB#0:
+; SSE-NEXT: pminsw %xmm2, %xmm0
+; SSE-NEXT: pminsw %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: min_le_v16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_le_v16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_le_v16i16:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp sle <16 x i16> %a, %b
+ %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %2
+}
+
+define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: min_le_v16i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_le_v16i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminsb %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_le_v16i8:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminsb %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: min_le_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp sle <16 x i8> %a, %b
+ %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %2
+}
+
+define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: min_le_v32i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pcmpgtb %xmm3, %xmm6
+; SSE2-NEXT: pcmpeqd %xmm7, %xmm7
+; SSE2-NEXT: movdqa %xmm6, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm2, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm7
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm7
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm6
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_le_v32i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminsb %xmm2, %xmm0
+; SSE41-NEXT: pminsb %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_le_v32i8:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminsb %xmm2, %xmm0
+; SSE42-NEXT: pminsb %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: min_le_v32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_le_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_le_v32i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp sle <32 x i8> %a, %b
+ %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %2
+}
+
+;
+; Constant Folding
+;
+
+define <2 x i64> @max_gt_v2i64c() {
+; SSE-LABEL: max_gt_v2i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v2i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; AVX-NEXT: retq
+ %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+ %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+ %3 = icmp sgt <2 x i64> %1, %2
+ %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+ ret <2 x i64> %4
+}
+
+define <4 x i64> @max_gt_v4i64c() {
+; SSE-LABEL: max_gt_v4i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7]
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v4i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+ %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+ %3 = icmp sgt <4 x i64> %1, %2
+ %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+ ret <4 x i64> %4
+}
+
+define <4 x i32> @max_gt_v4i32c() {
+; SSE-LABEL: max_gt_v4i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v4i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+ %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+ %3 = icmp sgt <4 x i32> %1, %2
+ %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+ ret <4 x i32> %4
+}
+
+define <8 x i32> @max_gt_v8i32c() {
+; SSE-LABEL: max_gt_v8i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+ %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+ %3 = icmp sgt <8 x i32> %1, %2
+ %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+ ret <8 x i32> %4
+}
+
+define <8 x i16> @max_gt_v8i16c() {
+; SSE-LABEL: max_gt_v8i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v8i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+ %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+ %3 = icmp sgt <8 x i16> %1, %2
+ %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+ ret <8 x i16> %4
+}
+
+define <16 x i16> @max_gt_v16i16c() {
+; SSE-LABEL: max_gt_v16i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+ %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+ %3 = icmp sgt <16 x i16> %1, %2
+ %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+ ret <16 x i16> %4
+}
+
+define <16 x i8> @max_gt_v16i8c() {
+; SSE-LABEL: max_gt_v16i8c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v16i8c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+ %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+ %3 = icmp sgt <16 x i8> %1, %2
+ %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+ ret <16 x i8> %4
+}
+
+define <2 x i64> @max_ge_v2i64c() {
+; SSE-LABEL: max_ge_v2i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v2i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; AVX-NEXT: retq
+ %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+ %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+ %3 = icmp sge <2 x i64> %1, %2
+ %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+ ret <2 x i64> %4
+}
+
+define <4 x i64> @max_ge_v4i64c() {
+; SSE-LABEL: max_ge_v4i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7]
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v4i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+ %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+ %3 = icmp sge <4 x i64> %1, %2
+ %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+ ret <4 x i64> %4
+}
+
+define <4 x i32> @max_ge_v4i32c() {
+; SSE-LABEL: max_ge_v4i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v4i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+ %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+ %3 = icmp sge <4 x i32> %1, %2
+ %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+ ret <4 x i32> %4
+}
+
+define <8 x i32> @max_ge_v8i32c() {
+; SSE-LABEL: max_ge_v8i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+ %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+ %3 = icmp sge <8 x i32> %1, %2
+ %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+ ret <8 x i32> %4
+}
+
+define <8 x i16> @max_ge_v8i16c() {
+; SSE-LABEL: max_ge_v8i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v8i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+ %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+ %3 = icmp sge <8 x i16> %1, %2
+ %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+ ret <8 x i16> %4
+}
+
+define <16 x i16> @max_ge_v16i16c() {
+; SSE-LABEL: max_ge_v16i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+ %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+ %3 = icmp sge <16 x i16> %1, %2
+ %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+ ret <16 x i16> %4
+}
+
+define <16 x i8> @max_ge_v16i8c() {
+; SSE-LABEL: max_ge_v16i8c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v16i8c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+ %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+ %3 = icmp sge <16 x i8> %1, %2
+ %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+ ret <16 x i8> %4
+}
+
+define <2 x i64> @min_lt_v2i64c() {
+; SSE-LABEL: min_lt_v2i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v2i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; AVX-NEXT: retq
+ %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+ %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+ %3 = icmp slt <2 x i64> %1, %2
+ %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+ ret <2 x i64> %4
+}
+
+define <4 x i64> @min_lt_v4i64c() {
+; SSE-LABEL: min_lt_v4i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v4i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+ %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+ %3 = icmp slt <4 x i64> %1, %2
+ %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+ ret <4 x i64> %4
+}
+
+define <4 x i32> @min_lt_v4i32c() {
+; SSE-LABEL: min_lt_v4i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v4i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+ %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+ %3 = icmp slt <4 x i32> %1, %2
+ %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+ ret <4 x i32> %4
+}
+
+define <8 x i32> @min_lt_v8i32c() {
+; SSE-LABEL: min_lt_v8i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+ %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+ %3 = icmp slt <8 x i32> %1, %2
+ %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+ ret <8 x i32> %4
+}
+
+define <8 x i16> @min_lt_v8i16c() {
+; SSE-LABEL: min_lt_v8i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v8i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+ %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+ %3 = icmp slt <8 x i16> %1, %2
+ %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+ ret <8 x i16> %4
+}
+
+define <16 x i16> @min_lt_v16i16c() {
+; SSE-LABEL: min_lt_v16i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+ %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+ %3 = icmp slt <16 x i16> %1, %2
+ %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+ ret <16 x i16> %4
+}
+
+define <16 x i8> @min_lt_v16i8c() {
+; SSE-LABEL: min_lt_v16i8c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v16i8c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+ %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+ %3 = icmp slt <16 x i8> %1, %2
+ %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+ ret <16 x i8> %4
+}
+
+define <2 x i64> @min_le_v2i64c() {
+; SSE-LABEL: min_le_v2i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v2i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; AVX-NEXT: retq
+ %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+ %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+ %3 = icmp sle <2 x i64> %1, %2
+ %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+ ret <2 x i64> %4
+}
+
+define <4 x i64> @min_le_v4i64c() {
+; SSE-LABEL: min_le_v4i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v4i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+ %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+ %3 = icmp sle <4 x i64> %1, %2
+ %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+ ret <4 x i64> %4
+}
+
+define <4 x i32> @min_le_v4i32c() {
+; SSE-LABEL: min_le_v4i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v4i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+ %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+ %3 = icmp sle <4 x i32> %1, %2
+ %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+ ret <4 x i32> %4
+}
+
+define <8 x i32> @min_le_v8i32c() {
+; SSE-LABEL: min_le_v8i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+ %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+ %3 = icmp sle <8 x i32> %1, %2
+ %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+ ret <8 x i32> %4
+}
+
+define <8 x i16> @min_le_v8i16c() {
+; SSE-LABEL: min_le_v8i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v8i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+ %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+ %3 = icmp sle <8 x i16> %1, %2
+ %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+ ret <8 x i16> %4
+}
+
+define <16 x i16> @min_le_v16i16c() {
+; SSE-LABEL: min_le_v16i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+ %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+ %3 = icmp sle <16 x i16> %1, %2
+ %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+ ret <16 x i16> %4
+}
+
+define <16 x i8> @min_le_v16i8c() {
+; SSE-LABEL: min_le_v16i8c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v16i8c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+ %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+ %3 = icmp sle <16 x i8> %1, %2
+ %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+ ret <16 x i8> %4
+}
diff --git a/test/CodeGen/X86/vec_minmax_uint.ll b/test/CodeGen/X86/vec_minmax_uint.ll
new file mode 100644
index 000000000000..6e48423c1520
--- /dev/null
+++ b/test/CodeGen/X86/vec_minmax_uint.ll
@@ -0,0 +1,2229 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Unsigned Maximum (GT)
+;
+
+define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: max_gt_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_gt_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pxor %xmm0, %xmm3
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm5, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; SSE41-NEXT: por %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm2, %xmm1
+; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_gt_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: movdqa %xmm1, %xmm3
+; SSE42-NEXT: pxor %xmm0, %xmm3
+; SSE42-NEXT: pxor %xmm2, %xmm0
+; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
+; SSE42-NEXT: blendvpd %xmm2, %xmm1
+; SSE42-NEXT: movapd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: max_gt_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp ugt <2 x i64> %a, %b
+ %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %2
+}
+
+define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: max_gt_v4i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: movdqa %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm0, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_gt_v4i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm8
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT: movdqa %xmm3, %xmm5
+; SSE41-NEXT: pxor %xmm0, %xmm5
+; SSE41-NEXT: movdqa %xmm1, %xmm6
+; SSE41-NEXT: pxor %xmm0, %xmm6
+; SSE41-NEXT: movdqa %xmm6, %xmm7
+; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT: pand %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT: por %xmm6, %xmm5
+; SSE41-NEXT: movdqa %xmm2, %xmm4
+; SSE41-NEXT: pxor %xmm0, %xmm4
+; SSE41-NEXT: pxor %xmm8, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm6
+; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm7, %xmm4
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
+; SSE41-NEXT: por %xmm4, %xmm0
+; SSE41-NEXT: blendvpd %xmm8, %xmm2
+; SSE41-NEXT: movdqa %xmm5, %xmm0
+; SSE41-NEXT: blendvpd %xmm1, %xmm3
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: movapd %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_gt_v4i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm4
+; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: movdqa %xmm3, %xmm6
+; SSE42-NEXT: pxor %xmm0, %xmm6
+; SSE42-NEXT: movdqa %xmm1, %xmm5
+; SSE42-NEXT: pxor %xmm0, %xmm5
+; SSE42-NEXT: pcmpgtq %xmm6, %xmm5
+; SSE42-NEXT: movdqa %xmm2, %xmm6
+; SSE42-NEXT: pxor %xmm0, %xmm6
+; SSE42-NEXT: pxor %xmm4, %xmm0
+; SSE42-NEXT: pcmpgtq %xmm6, %xmm0
+; SSE42-NEXT: blendvpd %xmm4, %xmm2
+; SSE42-NEXT: movdqa %xmm5, %xmm0
+; SSE42-NEXT: blendvpd %xmm1, %xmm3
+; SSE42-NEXT: movapd %xmm2, %xmm0
+; SSE42-NEXT: movapd %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: max_gt_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_gt_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_gt_v4i64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm3
+; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm2
+; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ugt <4 x i64> %a, %b
+ %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %2
+}
+
+define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: max_gt_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_gt_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxud %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_gt_v4i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxud %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: max_gt_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp ugt <4 x i32> %a, %b
+ %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: max_gt_v8i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_gt_v8i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxud %xmm2, %xmm0
+; SSE41-NEXT: pmaxud %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_gt_v8i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxud %xmm2, %xmm0
+; SSE42-NEXT: pmaxud %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: max_gt_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_gt_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_gt_v8i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ugt <8 x i32> %a, %b
+ %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %2
+}
+
+define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: max_gt_v8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtw %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_gt_v8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxuw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_gt_v8i16:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxuw %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: max_gt_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp ugt <8 x i16> %a, %b
+ %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %2
+}
+
+define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: max_gt_v16i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pcmpgtw %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtw %xmm6, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_gt_v16i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxuw %xmm2, %xmm0
+; SSE41-NEXT: pmaxuw %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_gt_v16i16:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxuw %xmm2, %xmm0
+; SSE42-NEXT: pmaxuw %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: max_gt_v16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_gt_v16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_gt_v16i16:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ugt <16 x i16> %a, %b
+ %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %2
+}
+
+define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: max_gt_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pmaxub %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp ugt <16 x i8> %a, %b
+ %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %2
+}
+
+define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: max_gt_v32i8:
+; SSE: # BB#0:
+; SSE-NEXT: pmaxub %xmm2, %xmm0
+; SSE-NEXT: pmaxub %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: max_gt_v32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_gt_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_gt_v32i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ugt <32 x i8> %a, %b
+ %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %2
+}
+
+;
+; Unsigned Maximum (GE)
+;
+
+define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: max_ge_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_ge_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT: movdqa %xmm2, %xmm3
+; SSE41-NEXT: pxor %xmm0, %xmm3
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm5, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE41-NEXT: por %xmm0, %xmm3
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm2, %xmm1
+; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_ge_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: pxor %xmm3, %xmm0
+; SSE42-NEXT: pxor %xmm1, %xmm3
+; SSE42-NEXT: pcmpgtq %xmm0, %xmm3
+; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE42-NEXT: pxor %xmm3, %xmm0
+; SSE42-NEXT: blendvpd %xmm2, %xmm1
+; SSE42-NEXT: movapd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: max_ge_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp uge <2 x i64> %a, %b
+ %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %2
+}
+
+define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: max_ge_v4i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm8
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm8, %xmm9
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm0, %xmm6
+; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: pxor %xmm2, %xmm7
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm10, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm8
+; SSE2-NEXT: pandn %xmm3, %xmm9
+; SSE2-NEXT: por %xmm8, %xmm9
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_ge_v4i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm8
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT: movdqa %xmm1, %xmm5
+; SSE41-NEXT: pxor %xmm0, %xmm5
+; SSE41-NEXT: movdqa %xmm3, %xmm6
+; SSE41-NEXT: pxor %xmm0, %xmm6
+; SSE41-NEXT: movdqa %xmm6, %xmm7
+; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT: pand %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT: por %xmm6, %xmm5
+; SSE41-NEXT: pcmpeqd %xmm9, %xmm9
+; SSE41-NEXT: pxor %xmm9, %xmm5
+; SSE41-NEXT: movdqa %xmm8, %xmm6
+; SSE41-NEXT: pxor %xmm0, %xmm6
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm7
+; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
+; SSE41-NEXT: por %xmm6, %xmm0
+; SSE41-NEXT: pxor %xmm9, %xmm0
+; SSE41-NEXT: blendvpd %xmm8, %xmm2
+; SSE41-NEXT: movdqa %xmm5, %xmm0
+; SSE41-NEXT: blendvpd %xmm1, %xmm3
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: movapd %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_ge_v4i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm4
+; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: movdqa %xmm1, %xmm6
+; SSE42-NEXT: pxor %xmm0, %xmm6
+; SSE42-NEXT: movdqa %xmm3, %xmm5
+; SSE42-NEXT: pxor %xmm0, %xmm5
+; SSE42-NEXT: pcmpgtq %xmm6, %xmm5
+; SSE42-NEXT: pcmpeqd %xmm6, %xmm6
+; SSE42-NEXT: pxor %xmm6, %xmm5
+; SSE42-NEXT: movdqa %xmm4, %xmm7
+; SSE42-NEXT: pxor %xmm0, %xmm7
+; SSE42-NEXT: pxor %xmm2, %xmm0
+; SSE42-NEXT: pcmpgtq %xmm7, %xmm0
+; SSE42-NEXT: pxor %xmm6, %xmm0
+; SSE42-NEXT: blendvpd %xmm4, %xmm2
+; SSE42-NEXT: movdqa %xmm5, %xmm0
+; SSE42-NEXT: blendvpd %xmm1, %xmm3
+; SSE42-NEXT: movapd %xmm2, %xmm0
+; SSE42-NEXT: movapd %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: max_ge_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm5
+; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_ge_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_ge_v4i64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm2
+; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp uge <4 x i64> %a, %b
+ %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %2
+}
+
+define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: max_ge_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_ge_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxud %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_ge_v4i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxud %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: max_ge_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp uge <4 x i32> %a, %b
+ %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: max_ge_v8i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm3, %xmm7
+; SSE2-NEXT: pxor %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm0, %xmm8
+; SSE2-NEXT: pxor %xmm6, %xmm8
+; SSE2-NEXT: pxor %xmm2, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm6
+; SSE2-NEXT: pxor %xmm6, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm7
+; SSE2-NEXT: pandn %xmm3, %xmm5
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_ge_v8i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxud %xmm2, %xmm0
+; SSE41-NEXT: pmaxud %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_ge_v8i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxud %xmm2, %xmm0
+; SSE42-NEXT: pmaxud %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: max_ge_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_ge_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_ge_v8i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp uge <8 x i32> %a, %b
+ %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %2
+}
+
+define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: max_ge_v8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psubusw %xmm0, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpeqw %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_ge_v8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxuw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_ge_v8i16:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxuw %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: max_ge_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp uge <8 x i16> %a, %b
+ %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %2
+}
+
+define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: max_ge_v16i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: psubusw %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpeqw %xmm5, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: psubusw %xmm0, %xmm6
+; SSE2-NEXT: pcmpeqw %xmm5, %xmm6
+; SSE2-NEXT: pand %xmm6, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: max_ge_v16i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: pmaxuw %xmm2, %xmm0
+; SSE41-NEXT: pmaxuw %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: max_ge_v16i16:
+; SSE42: # BB#0:
+; SSE42-NEXT: pmaxuw %xmm2, %xmm0
+; SSE42-NEXT: pmaxuw %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: max_ge_v16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_ge_v16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_ge_v16i16:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp uge <16 x i16> %a, %b
+ %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %2
+}
+
+define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: max_ge_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pmaxub %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp uge <16 x i8> %a, %b
+ %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %2
+}
+
+define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: max_ge_v32i8:
+; SSE: # BB#0:
+; SSE-NEXT: pmaxub %xmm2, %xmm0
+; SSE-NEXT: pmaxub %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: max_ge_v32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: max_ge_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: max_ge_v32i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp uge <32 x i8> %a, %b
+ %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %2
+}
+
+;
+; Unsigned Minimum (LT)
+;
+
+define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: min_lt_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_lt_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT: movdqa %xmm2, %xmm3
+; SSE41-NEXT: pxor %xmm0, %xmm3
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm5, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; SSE41-NEXT: por %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm2, %xmm1
+; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_lt_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: movdqa %xmm2, %xmm3
+; SSE42-NEXT: pxor %xmm0, %xmm3
+; SSE42-NEXT: pxor %xmm1, %xmm0
+; SSE42-NEXT: pcmpgtq %xmm3, %xmm0
+; SSE42-NEXT: blendvpd %xmm2, %xmm1
+; SSE42-NEXT: movapd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: min_lt_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp ult <2 x i64> %a, %b
+ %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %2
+}
+
+define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: min_lt_v4i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: movdqa %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm2, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_lt_v4i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm8
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT: movdqa %xmm1, %xmm5
+; SSE41-NEXT: pxor %xmm0, %xmm5
+; SSE41-NEXT: movdqa %xmm3, %xmm6
+; SSE41-NEXT: pxor %xmm0, %xmm6
+; SSE41-NEXT: movdqa %xmm6, %xmm7
+; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT: pand %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT: por %xmm6, %xmm5
+; SSE41-NEXT: movdqa %xmm8, %xmm4
+; SSE41-NEXT: pxor %xmm0, %xmm4
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm6
+; SSE41-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm4, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm7, %xmm4
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
+; SSE41-NEXT: por %xmm4, %xmm0
+; SSE41-NEXT: blendvpd %xmm8, %xmm2
+; SSE41-NEXT: movdqa %xmm5, %xmm0
+; SSE41-NEXT: blendvpd %xmm1, %xmm3
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: movapd %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_lt_v4i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm4
+; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: movdqa %xmm1, %xmm6
+; SSE42-NEXT: pxor %xmm0, %xmm6
+; SSE42-NEXT: movdqa %xmm3, %xmm5
+; SSE42-NEXT: pxor %xmm0, %xmm5
+; SSE42-NEXT: pcmpgtq %xmm6, %xmm5
+; SSE42-NEXT: movdqa %xmm4, %xmm6
+; SSE42-NEXT: pxor %xmm0, %xmm6
+; SSE42-NEXT: pxor %xmm2, %xmm0
+; SSE42-NEXT: pcmpgtq %xmm6, %xmm0
+; SSE42-NEXT: blendvpd %xmm4, %xmm2
+; SSE42-NEXT: movdqa %xmm5, %xmm0
+; SSE42-NEXT: blendvpd %xmm1, %xmm3
+; SSE42-NEXT: movapd %xmm2, %xmm0
+; SSE42-NEXT: movapd %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: min_lt_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_lt_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_lt_v4i64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm2
+; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ult <4 x i64> %a, %b
+ %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %2
+}
+
+define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: min_lt_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_lt_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminud %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_lt_v4i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminud %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: min_lt_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp ult <4 x i32> %a, %b
+ %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: min_lt_v8i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm4
+; SSE2-NEXT: pand %xmm4, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_lt_v8i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminud %xmm2, %xmm0
+; SSE41-NEXT: pminud %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_lt_v8i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminud %xmm2, %xmm0
+; SSE42-NEXT: pminud %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: min_lt_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_lt_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_lt_v8i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ult <8 x i32> %a, %b
+ %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %2
+}
+
+define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: min_lt_v8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtw %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_lt_v8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminuw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_lt_v8i16:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminuw %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: min_lt_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp ult <8 x i16> %a, %b
+ %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %2
+}
+
+define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: min_lt_v16i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: pcmpgtw %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtw %xmm5, %xmm4
+; SSE2-NEXT: pand %xmm4, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_lt_v16i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminuw %xmm2, %xmm0
+; SSE41-NEXT: pminuw %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_lt_v16i16:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminuw %xmm2, %xmm0
+; SSE42-NEXT: pminuw %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: min_lt_v16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_lt_v16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_lt_v16i16:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ult <16 x i16> %a, %b
+ %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %2
+}
+
+define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: min_lt_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pminub %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp ult <16 x i8> %a, %b
+ %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %2
+}
+
+define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: min_lt_v32i8:
+; SSE: # BB#0:
+; SSE-NEXT: pminub %xmm2, %xmm0
+; SSE-NEXT: pminub %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: min_lt_v32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_lt_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_lt_v32i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ult <32 x i8> %a, %b
+ %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %2
+}
+
+;
+; Unsigned Minimum (LE)
+;
+
+define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: min_le_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_le_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pxor %xmm0, %xmm3
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm5, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE41-NEXT: por %xmm0, %xmm3
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT: pxor %xmm3, %xmm0
+; SSE41-NEXT: blendvpd %xmm2, %xmm1
+; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_le_v2i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm2
+; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: movdqa %xmm1, %xmm0
+; SSE42-NEXT: pxor %xmm3, %xmm0
+; SSE42-NEXT: pxor %xmm2, %xmm3
+; SSE42-NEXT: pcmpgtq %xmm0, %xmm3
+; SSE42-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE42-NEXT: pxor %xmm3, %xmm0
+; SSE42-NEXT: blendvpd %xmm2, %xmm1
+; SSE42-NEXT: movapd %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: min_le_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp ule <2 x i64> %a, %b
+ %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %2
+}
+
+define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: min_le_v4i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm8
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm8, %xmm9
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm7
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm10, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm8
+; SSE2-NEXT: pandn %xmm3, %xmm9
+; SSE2-NEXT: por %xmm8, %xmm9
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_le_v4i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm8
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT: movdqa %xmm3, %xmm5
+; SSE41-NEXT: pxor %xmm0, %xmm5
+; SSE41-NEXT: movdqa %xmm1, %xmm6
+; SSE41-NEXT: pxor %xmm0, %xmm6
+; SSE41-NEXT: movdqa %xmm6, %xmm7
+; SSE41-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT: pand %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT: por %xmm6, %xmm5
+; SSE41-NEXT: pcmpeqd %xmm9, %xmm9
+; SSE41-NEXT: pxor %xmm9, %xmm5
+; SSE41-NEXT: movdqa %xmm2, %xmm6
+; SSE41-NEXT: pxor %xmm0, %xmm6
+; SSE41-NEXT: pxor %xmm8, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm7
+; SSE41-NEXT: pcmpgtd %xmm6, %xmm7
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT: pcmpeqd %xmm6, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
+; SSE41-NEXT: pand %xmm4, %xmm6
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
+; SSE41-NEXT: por %xmm6, %xmm0
+; SSE41-NEXT: pxor %xmm9, %xmm0
+; SSE41-NEXT: blendvpd %xmm8, %xmm2
+; SSE41-NEXT: movdqa %xmm5, %xmm0
+; SSE41-NEXT: blendvpd %xmm1, %xmm3
+; SSE41-NEXT: movapd %xmm2, %xmm0
+; SSE41-NEXT: movapd %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_le_v4i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: movdqa %xmm0, %xmm4
+; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT: movdqa %xmm3, %xmm6
+; SSE42-NEXT: pxor %xmm0, %xmm6
+; SSE42-NEXT: movdqa %xmm1, %xmm5
+; SSE42-NEXT: pxor %xmm0, %xmm5
+; SSE42-NEXT: pcmpgtq %xmm6, %xmm5
+; SSE42-NEXT: pcmpeqd %xmm6, %xmm6
+; SSE42-NEXT: pxor %xmm6, %xmm5
+; SSE42-NEXT: movdqa %xmm2, %xmm7
+; SSE42-NEXT: pxor %xmm0, %xmm7
+; SSE42-NEXT: pxor %xmm4, %xmm0
+; SSE42-NEXT: pcmpgtq %xmm7, %xmm0
+; SSE42-NEXT: pxor %xmm6, %xmm0
+; SSE42-NEXT: blendvpd %xmm4, %xmm2
+; SSE42-NEXT: movdqa %xmm5, %xmm0
+; SSE42-NEXT: blendvpd %xmm1, %xmm3
+; SSE42-NEXT: movapd %xmm2, %xmm0
+; SSE42-NEXT: movapd %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: min_le_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm5
+; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_le_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_le_v4i64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm3
+; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm2
+; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ule <4 x i64> %a, %b
+ %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %2
+}
+
+define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: min_le_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pxor %xmm0, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_le_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminud %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_le_v4i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminud %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: min_le_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp ule <4 x i32> %a, %b
+ %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: min_le_v8i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm1, %xmm7
+; SSE2-NEXT: pxor %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm2, %xmm8
+; SSE2-NEXT: pxor %xmm6, %xmm8
+; SSE2-NEXT: pxor %xmm0, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm6
+; SSE2-NEXT: pxor %xmm6, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm7
+; SSE2-NEXT: pandn %xmm3, %xmm5
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_le_v8i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminud %xmm2, %xmm0
+; SSE41-NEXT: pminud %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_le_v8i32:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminud %xmm2, %xmm0
+; SSE42-NEXT: pminud %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: min_le_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_le_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_le_v8i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ule <8 x i32> %a, %b
+ %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %2
+}
+
+define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: min_le_v8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psubusw %xmm1, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpeqw %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_le_v8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminuw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_le_v8i16:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminuw %xmm1, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX-LABEL: min_le_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp ule <8 x i16> %a, %b
+ %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %2
+}
+
+define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: min_le_v16i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: psubusw %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm6, %xmm6
+; SSE2-NEXT: pcmpeqw %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: psubusw %xmm2, %xmm5
+; SSE2-NEXT: pcmpeqw %xmm6, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm0, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: min_le_v16i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: pminuw %xmm2, %xmm0
+; SSE41-NEXT: pminuw %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; SSE42-LABEL: min_le_v16i16:
+; SSE42: # BB#0:
+; SSE42-NEXT: pminuw %xmm2, %xmm0
+; SSE42-NEXT: pminuw %xmm3, %xmm1
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: min_le_v16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_le_v16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_le_v16i16:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ule <16 x i16> %a, %b
+ %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %2
+}
+
+define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: min_le_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: pminub %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = icmp ule <16 x i8> %a, %b
+ %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %2
+}
+
+define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: min_le_v32i8:
+; SSE: # BB#0:
+; SSE-NEXT: pminub %xmm2, %xmm0
+; SSE-NEXT: pminub %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: min_le_v32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: min_le_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: min_le_v32i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
+ %1 = icmp ule <32 x i8> %a, %b
+ %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %2
+}
+
+;
+; Constant Folding
+;
+
+define <2 x i64> @max_gt_v2i64c() {
+; SSE-LABEL: max_gt_v2i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v2i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; AVX-NEXT: retq
+ %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+ %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+ %3 = icmp ugt <2 x i64> %1, %2
+ %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+ ret <2 x i64> %4
+}
+
+define <4 x i64> @max_gt_v4i64c() {
+; SSE-LABEL: max_gt_v4i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7]
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v4i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+ %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+ %3 = icmp ugt <4 x i64> %1, %2
+ %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+ ret <4 x i64> %4
+}
+
+define <4 x i32> @max_gt_v4i32c() {
+; SSE-LABEL: max_gt_v4i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v4i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+ %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+ %3 = icmp ugt <4 x i32> %1, %2
+ %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+ ret <4 x i32> %4
+}
+
+define <8 x i32> @max_gt_v8i32c() {
+; SSE-LABEL: max_gt_v8i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+ %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+ %3 = icmp ugt <8 x i32> %1, %2
+ %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+ ret <8 x i32> %4
+}
+
+define <8 x i16> @max_gt_v8i16c() {
+; SSE-LABEL: max_gt_v8i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v8i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+ %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+ %3 = icmp ugt <8 x i16> %1, %2
+ %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+ ret <8 x i16> %4
+}
+
+define <16 x i16> @max_gt_v16i16c() {
+; SSE-LABEL: max_gt_v16i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+ %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+ %3 = icmp ugt <16 x i16> %1, %2
+ %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+ ret <16 x i16> %4
+}
+
+define <16 x i8> @max_gt_v16i8c() {
+; SSE-LABEL: max_gt_v16i8c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_gt_v16i8c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+ %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+ %3 = icmp ugt <16 x i8> %1, %2
+ %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+ ret <16 x i8> %4
+}
+
+define <2 x i64> @max_ge_v2i64c() {
+; SSE-LABEL: max_ge_v2i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v2i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; AVX-NEXT: retq
+ %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+ %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+ %3 = icmp uge <2 x i64> %1, %2
+ %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+ ret <2 x i64> %4
+}
+
+define <4 x i64> @max_ge_v4i64c() {
+; SSE-LABEL: max_ge_v4i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7]
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v4i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+ %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+ %3 = icmp uge <4 x i64> %1, %2
+ %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+ ret <4 x i64> %4
+}
+
+define <4 x i32> @max_ge_v4i32c() {
+; SSE-LABEL: max_ge_v4i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v4i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+ %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+ %3 = icmp uge <4 x i32> %1, %2
+ %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+ ret <4 x i32> %4
+}
+
+define <8 x i32> @max_ge_v8i32c() {
+; SSE-LABEL: max_ge_v8i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+ %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+ %3 = icmp uge <8 x i32> %1, %2
+ %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+ ret <8 x i32> %4
+}
+
+define <8 x i16> @max_ge_v8i16c() {
+; SSE-LABEL: max_ge_v8i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v8i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+ %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+ %3 = icmp uge <8 x i16> %1, %2
+ %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+ ret <8 x i16> %4
+}
+
+define <16 x i16> @max_ge_v16i16c() {
+; SSE-LABEL: max_ge_v16i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+ %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+ %3 = icmp uge <16 x i16> %1, %2
+ %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+ ret <16 x i16> %4
+}
+
+define <16 x i8> @max_ge_v16i8c() {
+; SSE-LABEL: max_ge_v16i8c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: max_ge_v16i8c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+ %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+ %3 = icmp uge <16 x i8> %1, %2
+ %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+ ret <16 x i8> %4
+}
+
+define <2 x i64> @min_lt_v2i64c() {
+; SSE-LABEL: min_lt_v2i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v2i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; AVX-NEXT: retq
+ %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+ %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+ %3 = icmp ult <2 x i64> %1, %2
+ %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+ ret <2 x i64> %4
+}
+
+define <4 x i64> @min_lt_v4i64c() {
+; SSE-LABEL: min_lt_v4i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v4i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+ %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+ %3 = icmp ult <4 x i64> %1, %2
+ %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+ ret <4 x i64> %4
+}
+
+define <4 x i32> @min_lt_v4i32c() {
+; SSE-LABEL: min_lt_v4i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v4i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+ %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+ %3 = icmp ult <4 x i32> %1, %2
+ %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+ ret <4 x i32> %4
+}
+
+define <8 x i32> @min_lt_v8i32c() {
+; SSE-LABEL: min_lt_v8i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+ %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+ %3 = icmp ult <8 x i32> %1, %2
+ %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+ ret <8 x i32> %4
+}
+
+define <8 x i16> @min_lt_v8i16c() {
+; SSE-LABEL: min_lt_v8i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v8i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+ %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 1, i32 0
+ %3 = icmp ult <8 x i16> %1, %2
+ %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+ ret <8 x i16> %4
+}
+
+define <16 x i16> @min_lt_v16i16c() {
+; SSE-LABEL: min_lt_v16i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,65530,65531,65532,65531,65530,65529,0]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+ %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 1, i32 0
+ %3 = icmp ult <16 x i16> %1, %2
+ %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+ ret <16 x i16> %4
+}
+
+define <16 x i8> @min_lt_v16i8c() {
+; SSE-LABEL: min_lt_v16i8c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_lt_v16i8c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+ %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 1, i32 0
+ %3 = icmp ult <16 x i8> %1, %2
+ %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+ ret <16 x i8> %4
+}
+
+define <2 x i64> @min_le_v2i64c() {
+; SSE-LABEL: min_le_v2i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v2i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; AVX-NEXT: retq
+ %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+ %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+ %3 = icmp ule <2 x i64> %1, %2
+ %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+ ret <2 x i64> %4
+}
+
+define <4 x i64> @min_le_v4i64c() {
+; SSE-LABEL: min_le_v4i64c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v4i64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+ %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+ %3 = icmp ule <4 x i64> %1, %2
+ %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+ ret <4 x i64> %4
+}
+
+define <4 x i32> @min_le_v4i32c() {
+; SSE-LABEL: min_le_v4i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v4i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; AVX-NEXT: retq
+ %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+ %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+ %3 = icmp ule <4 x i32> %1, %2
+ %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+ ret <4 x i32> %4
+}
+
+define <8 x i32> @min_le_v8i32c() {
+; SSE-LABEL: min_le_v8i32c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v8i32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+ %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+ %3 = icmp ule <8 x i32> %1, %2
+ %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+ ret <8 x i32> %4
+}
+
+define <8 x i16> @min_le_v8i16c() {
+; SSE-LABEL: min_le_v8i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v8i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
+; AVX-NEXT: retq
+ %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+ %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+ %3 = icmp ule <8 x i16> %1, %2
+ %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+ ret <8 x i16> %4
+}
+
+define <16 x i16> @min_le_v16i16c() {
+; SSE-LABEL: min_le_v16i16c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v16i16c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+ %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+ %3 = icmp ule <16 x i16> %1, %2
+ %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+ ret <16 x i16> %4
+}
+
+define <16 x i8> @min_le_v16i8c() {
+; SSE-LABEL: min_le_v16i8c:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: min_le_v16i8c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT: retq
+ %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+ %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+ %3 = icmp ule <16 x i8> %1, %2
+ %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+ ret <16 x i8> %4
+}
diff --git a/test/CodeGen/X86/vec_sdiv_to_shift.ll b/test/CodeGen/X86/vec_sdiv_to_shift.ll
index 56855d3c44eb..7f71a0c2ea5b 100644
--- a/test/CodeGen/X86/vec_sdiv_to_shift.ll
+++ b/test/CodeGen/X86/vec_sdiv_to_shift.ll
@@ -13,6 +13,19 @@ entry:
ret <8 x i16> %0
}
+define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
+entry:
+; CHECK: sdiv_vec8x16_minsize
+; CHECK: psraw $15
+; CHECK: vpsrlw $11
+; CHECK: vpaddw
+; CHECK: vpsraw $5
+; CHECK: ret
+ %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+ ret <8 x i16> %0
+}
+
+
define <4 x i32> @sdiv_zero(<4 x i32> %var) {
entry:
; CHECK: sdiv_zero
diff --git a/test/CodeGen/X86/vec_trunc_sext.ll b/test/CodeGen/X86/vec_trunc_sext.ll
index dcfe423eb748..66af87c78187 100644
--- a/test/CodeGen/X86/vec_trunc_sext.ll
+++ b/test/CodeGen/X86/vec_trunc_sext.ll
@@ -1,5 +1,5 @@
-; RUN: llc %s -mtriple=x86_64-unknown-unknown -mattr='-sse4.1' -o - | FileCheck %s -check-prefix=NO_SSE_41
-; RUN: llc %s -mtriple=x86_64-unknown-unknown -mattr='+sse4.1' -o - | FileCheck %s -check-prefix=SSE_41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse4.1 | FileCheck %s --check-prefix=NO_SSE_41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE_41
; PR20472 ( http://llvm.org/bugs/show_bug.cgi?id=20472 )
; When sexting a trunc'd vector value, we can't eliminate the zext.
@@ -9,22 +9,23 @@
; but that is beyond our current codegen capabilities.
define <4 x i32> @trunc_sext(<4 x i16>* %in) {
+; NO_SSE_41-LABEL: trunc_sext:
+; NO_SSE_41: # BB#0:
+; NO_SSE_41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; NO_SSE_41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; NO_SSE_41-NEXT: pslld $24, %xmm0
+; NO_SSE_41-NEXT: psrad $24, %xmm0
+; NO_SSE_41-NEXT: retq
+;
+; SSE_41-LABEL: trunc_sext:
+; SSE_41: # BB#0:
+; SSE_41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; SSE_41-NEXT: pslld $24, %xmm0
+; SSE_41-NEXT: psrad $24, %xmm0
+; SSE_41-NEXT: retq
%load = load <4 x i16>, <4 x i16>* %in
%trunc = trunc <4 x i16> %load to <4 x i8>
%sext = sext <4 x i8> %trunc to <4 x i32>
ret <4 x i32> %sext
-
-; NO_SSE_41-LABEL: trunc_sext:
-; NO_SSE_41: movq (%rdi), %xmm0
-; NO_SSE_41-NEXT: punpcklwd %xmm0, %xmm0
-; NO_SSE_41-NEXT: pslld $24, %xmm0
-; NO_SSE_41-NEXT: psrad $24, %xmm0
-; NO_SSE_41-NEXT: retq
-
-; SSE_41-LABEL: trunc_sext:
-; SSE_41: pmovzxwd (%rdi), %xmm0
-; SSE_41-NEXT: pslld $24, %xmm0
-; SSE_41-NEXT: psrad $24, %xmm0
-; SSE_41-NEXT: retq
}
diff --git a/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll b/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
new file mode 100644
index 000000000000..1f36d064f873
--- /dev/null
+++ b/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
@@ -0,0 +1,130 @@
+; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=CST
+; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+sse4.1 \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=CST
+; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=CST
+; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx2 \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+
+; CST: [[MASKCSTADDR:.LCPI[0-9_]+]]:
+; CST-NEXT: .long 65535 # 0xffff
+; CST-NEXT: .long 65535 # 0xffff
+; CST-NEXT: .long 65535 # 0xffff
+; CST-NEXT: .long 65535 # 0xffff
+
+; CST: [[FPMASKCSTADDR:.LCPI[0-9_]+]]:
+; CST-NEXT: .long 1199570944 # float 65536
+; CST-NEXT: .long 1199570944 # float 65536
+; CST-NEXT: .long 1199570944 # float 65536
+; CST-NEXT: .long 1199570944 # float 65536
+
+; AVX2: [[FPMASKCSTADDR:.LCPI[0-9_]+]]:
+; AVX2-NEXT: .long 1199570944 # float 65536
+
+; AVX2: [[MASKCSTADDR:.LCPI[0-9_]+]]:
+; AVX2-NEXT: .long 65535 # 0xffff
+
+define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) {
+; SSE-LABEL: test_uitofp_v4i32_to_v4f32:
+; SSE: # BB#0:
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; SSE-NEXT: andps %xmm0, %xmm1
+; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: mulps [[FPMASKCSTADDR]](%rip), %xmm0
+; SSE-NEXT: addps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_uitofp_v4i32_to_v4f32:
+; AVX: # BB#0:
+; AVX-NEXT: vandps [[MASKCSTADDR]](%rip), %xmm0, %xmm1
+; AVX-NEXT: vcvtdq2ps %xmm1, %xmm1
+; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT: vmulps [[FPMASKCSTADDR]](%rip), %xmm0, %xmm0
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: test_uitofp_v4i32_to_v4f32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
+; AVX2-NEXT: vcvtdq2ps %xmm1, %xmm1
+; AVX2-NEXT: vbroadcastss [[FPMASKCSTADDR]](%rip), %xmm2
+; AVX2-NEXT: vmulps %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd [[MASKCSTADDR]](%rip), %xmm2
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+ %tmp = uitofp <4 x i32> %arg to <4 x float>
+ ret <4 x float> %tmp
+}
+
+; AVX: [[MASKCSTADDR_v8:.LCPI[0-9_]+]]:
+; AVX-NEXT: .long 65535 # 0xffff
+; AVX-NEXT: .long 65535 # 0xffff
+; AVX-NEXT: .long 65535 # 0xffff
+; AVX-NEXT: .long 65535 # 0xffff
+
+; AVX: [[FPMASKCSTADDR_v8:.LCPI[0-9_]+]]:
+; AVX-NEXT: .long 1199570944 # float 65536
+; AVX-NEXT: .long 1199570944 # float 65536
+; AVX-NEXT: .long 1199570944 # float 65536
+; AVX-NEXT: .long 1199570944 # float 65536
+
+; AVX2: [[FPMASKCSTADDR_v8:.LCPI[0-9_]+]]:
+; AVX2-NEXT: .long 1199570944 # float 65536
+
+; AVX2: [[MASKCSTADDR_v8:.LCPI[0-9_]+]]:
+; AVX2-NEXT: .long 65535 # 0xffff
+
+define <8 x float> @test_uitofp_v8i32_to_v8f32(<8 x i32> %arg) {
+; SSE-LABEL: test_uitofp_v8i32_to_v8f32:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: psrld $16, %xmm2
+; SSE-NEXT: cvtdq2ps %xmm2, %xmm2
+; SSE-NEXT: movaps {{.*#+}} xmm3 = [6.553600e+04,6.553600e+04,6.553600e+04,6.553600e+04]
+; SSE-NEXT: mulps %xmm3, %xmm2
+; SSE-NEXT: movdqa {{.*#+}} xmm4 = [65535,65535,65535,65535]
+; SSE-NEXT: pand %xmm4, %xmm0
+; SSE-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT: addps %xmm2, %xmm0
+; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: psrld $16, %xmm2
+; SSE-NEXT: cvtdq2ps %xmm2, %xmm2
+; SSE-NEXT: mulps %xmm3, %xmm2
+; SSE-NEXT: pand %xmm4, %xmm1
+; SSE-NEXT: cvtdq2ps %xmm1, %xmm1
+; SSE-NEXT: addps %xmm2, %xmm1
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_uitofp_v8i32_to_v8f32:
+; AVX: # BB#0:
+; AVX-NEXT: vandps [[MASKCSTADDR_v8]](%rip), %ymm0, %ymm1
+; AVX-NEXT: vcvtdq2ps %ymm1, %ymm1
+; AVX-NEXT: vpsrld $16, %xmm0, %xmm2
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX-NEXT: vmulps [[FPMASKCSTADDR_v8]](%rip), %ymm0, %ymm0
+; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: test_uitofp_v8i32_to_v8f32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrld $16, %ymm0, %ymm1
+; AVX2-NEXT: vcvtdq2ps %ymm1, %ymm1
+; AVX2-NEXT: vbroadcastss [[FPMASKCSTADDR_v8]](%rip), %ymm2
+; AVX2-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpbroadcastd [[MASKCSTADDR_v8]](%rip), %ymm2
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+ %tmp = uitofp <8 x i32> %arg to <8 x float>
+ ret <8 x float> %tmp
+}
diff --git a/test/CodeGen/X86/vec_uint_to_fp.ll b/test/CodeGen/X86/vec_uint_to_fp.ll
index 46cfcd9a9a12..ce0c11b2fa2a 100644
--- a/test/CodeGen/X86/vec_uint_to_fp.ll
+++ b/test/CodeGen/X86/vec_uint_to_fp.ll
@@ -23,10 +23,10 @@
; CST-NEXT: .long 1392508928 ## 0x53000000
; CST: [[MAGICCSTADDR:LCPI0_[0-9]+]]:
-; CST-NEXT: .long 3539992704 ## float -5.497642e+11
-; CST-NEXT: .long 3539992704 ## float -5.497642e+11
-; CST-NEXT: .long 3539992704 ## float -5.497642e+11
-; CST-NEXT: .long 3539992704 ## float -5.497642e+11
+; CST-NEXT: .long 3539992704 ## float -5.49764202E+11
+; CST-NEXT: .long 3539992704 ## float -5.49764202E+11
+; CST-NEXT: .long 3539992704 ## float -5.49764202E+11
+; CST-NEXT: .long 3539992704 ## float -5.49764202E+11
; AVX2: [[LOWCSTADDR:LCPI0_[0-9]+]]:
; AVX2-NEXT: .long 1258291200 ## 0x4b000000
diff --git a/test/CodeGen/X86/vector-blend.ll b/test/CodeGen/X86/vector-blend.ll
index e15daaa54a33..aaf81f2f9bb6 100644
--- a/test/CodeGen/X86/vector-blend.ll
+++ b/test/CodeGen/X86/vector-blend.ll
@@ -255,31 +255,32 @@ entry:
define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
; SSE2-LABEL: vsel_i8:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,255,255,255,255,255,255,255]
-; SSE2-NEXT: andps %xmm2, %xmm0
-; SSE2-NEXT: andnps %xmm1, %xmm2
-; SSE2-NEXT: orps %xmm2, %xmm0
+; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
+; SSE2-NEXT: andps %xmm2, %xmm1
+; SSE2-NEXT: andnps %xmm0, %xmm2
+; SSE2-NEXT: orps %xmm1, %xmm2
+; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i8:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3],zero,xmm1[5,6,7],zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[8,9,10,11,12,13,14,15]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[12],zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3],zero,xmm1[5,6,7],zero,xmm1[9,10,11],zero,xmm1[13,14,15]
; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i8:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,255,255,255,255,255,255,255]
-; SSE41-NEXT: pblendvb %xmm2, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
+; SSE41-NEXT: pblendvb %xmm1, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: vsel_i8:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,255,255,255,255,255,255,255]
-; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
@@ -623,49 +624,52 @@ entry:
define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
; SSE2-LABEL: constant_pblendvb_avx2:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movaps {{.*#+}} xmm4 = [0,0,255,0,255,255,255,0,255,255,255,255,255,255,255,255]
+; SSE2-NEXT: movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
; SSE2-NEXT: movaps %xmm4, %xmm5
-; SSE2-NEXT: andnps %xmm2, %xmm5
-; SSE2-NEXT: andps %xmm4, %xmm0
-; SSE2-NEXT: orps %xmm5, %xmm0
-; SSE2-NEXT: andps %xmm4, %xmm1
-; SSE2-NEXT: andnps %xmm3, %xmm4
-; SSE2-NEXT: orps %xmm4, %xmm1
+; SSE2-NEXT: andnps %xmm0, %xmm5
+; SSE2-NEXT: andps %xmm4, %xmm2
+; SSE2-NEXT: orps %xmm2, %xmm5
+; SSE2-NEXT: andps %xmm4, %xmm3
+; SSE2-NEXT: andnps %xmm1, %xmm4
+; SSE2-NEXT: orps %xmm3, %xmm4
+; SSE2-NEXT: movaps %xmm5, %xmm0
+; SSE2-NEXT: movaps %xmm4, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: constant_pblendvb_avx2:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,1,128,3,128,128,128,7,128,128,128,128,128,128,128,128]
-; SSSE3-NEXT: pshufb %xmm4, %xmm2
-; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [128,128,2,128,4,5,6,128,8,9,10,11,12,13,14,15]
-; SSSE3-NEXT: pshufb %xmm5, %xmm0
+; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [128,128,2,128,4,5,6,128,128,128,10,128,12,13,14,128]
+; SSSE3-NEXT: pshufb %xmm4, %xmm0
+; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [0,1,128,3,128,128,128,7,8,9,128,11,128,128,128,15]
+; SSSE3-NEXT: pshufb %xmm5, %xmm2
; SSSE3-NEXT: por %xmm2, %xmm0
-; SSSE3-NEXT: pshufb %xmm4, %xmm3
-; SSSE3-NEXT: pshufb %xmm5, %xmm1
+; SSSE3-NEXT: pshufb %xmm4, %xmm1
+; SSSE3-NEXT: pshufb %xmm5, %xmm3
; SSSE3-NEXT: por %xmm3, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: constant_pblendvb_avx2:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: movdqa %xmm0, %xmm4
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,255,255,255,255,255,255,255,255]
-; SSE41-NEXT: pblendvb %xmm4, %xmm2
-; SSE41-NEXT: pblendvb %xmm1, %xmm3
-; SSE41-NEXT: movdqa %xmm2, %xmm0
-; SSE41-NEXT: movdqa %xmm3, %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
+; SSE41-NEXT: pblendvb %xmm2, %xmm4
+; SSE41-NEXT: pblendvb %xmm3, %xmm1
+; SSE41-NEXT: movdqa %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: constant_pblendvb_avx2:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,255,0,255,255,255,0,255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vmovdqa .LCPI18_0(%rip), %xmm4 # xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
+; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: constant_pblendvb_avx2:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,0,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
entry:
diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll
index 2e482a0f1430..1117e206e5b0 100644
--- a/test/CodeGen/X86/vector-idiv.ll
+++ b/test/CodeGen/X86/vector-idiv.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=x86-64 -mcpu=core2 -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=SSE41
; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s --check-prefix=SSE
; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s --check-prefix=AVX
diff --git a/test/CodeGen/X86/vector-lzcnt-128.ll b/test/CodeGen/X86/vector-lzcnt-128.ll
index b43188b7c6ea..8bf0af68e6dc 100644
--- a/test/CodeGen/X86/vector-lzcnt-128.ll
+++ b/test/CodeGen/X86/vector-lzcnt-128.ll
@@ -1,13 +1,13 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VLCD --check-prefix=ALL --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=AVX512CD --check-prefix=ALL --check-prefix=AVX512
-target triple = "x86_64-unknown-unknown"
-
-define <2 x i64> @testv2i64(<2 x i64> %in) {
+define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; SSE2-LABEL: testv2i64:
; SSE2: # BB#0:
; SSE2-NEXT: movd %xmm0, %rax
@@ -16,13 +16,13 @@ define <2 x i64> @testv2i64(<2 x i64> %in) {
; SSE2-NEXT: cmoveq %rcx, %rax
; SSE2-NEXT: xorq $63, %rax
; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: pshufd $78, %xmm0, %xmm0 # xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE2-NEXT: movd %xmm0, %rax
; SSE2-NEXT: bsrq %rax, %rax
; SSE2-NEXT: cmoveq %rcx, %rax
; SSE2-NEXT: xorq $63, %rax
; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq %xmm0, %xmm1 # xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
;
@@ -34,13 +34,13 @@ define <2 x i64> @testv2i64(<2 x i64> %in) {
; SSE3-NEXT: cmoveq %rcx, %rax
; SSE3-NEXT: xorq $63, %rax
; SSE3-NEXT: movd %rax, %xmm1
-; SSE3-NEXT: pshufd $78, %xmm0, %xmm0 # xmm0 = xmm0[2,3,0,1]
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE3-NEXT: movd %xmm0, %rax
; SSE3-NEXT: bsrq %rax, %rax
; SSE3-NEXT: cmoveq %rcx, %rax
; SSE3-NEXT: xorq $63, %rax
; SSE3-NEXT: movd %rax, %xmm0
-; SSE3-NEXT: punpcklqdq %xmm0, %xmm1 # xmm1 = xmm1[0],xmm0[0]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE3-NEXT: movdqa %xmm1, %xmm0
; SSE3-NEXT: retq
;
@@ -52,16 +52,15 @@ define <2 x i64> @testv2i64(<2 x i64> %in) {
; SSSE3-NEXT: cmoveq %rcx, %rax
; SSSE3-NEXT: xorq $63, %rax
; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: pshufd $78, %xmm0, %xmm0 # xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSSE3-NEXT: movd %xmm0, %rax
; SSSE3-NEXT: bsrq %rax, %rax
; SSSE3-NEXT: cmoveq %rcx, %rax
; SSSE3-NEXT: xorq $63, %rax
; SSSE3-NEXT: movd %rax, %xmm0
-; SSSE3-NEXT: punpcklqdq %xmm0, %xmm1 # xmm1 = xmm1[0],xmm0[0]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
-
;
; SSE41-LABEL: testv2i64:
; SSE41: # BB#0:
@@ -94,11 +93,22 @@ define <2 x i64> @testv2i64(<2 x i64> %in) {
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv2i64:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vplzcntq %xmm0, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv2i64:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
+; AVX512CD-NEXT: retq
+
%out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 0)
ret <2 x i64> %out
}
-define <2 x i64> @testv2i64u(<2 x i64> %in) {
+define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
; SSE2-LABEL: testv2i64u:
; SSE2: # BB#0:
; SSE2-NEXT: movd %xmm0, %rax
@@ -169,11 +179,22 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) {
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv2i64u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vplzcntq %xmm0, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv2i64u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
+; AVX512CD-NEXT: retq
+
%out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 -1)
ret <2 x i64> %out
}
-define <4 x i32> @testv4i32(<4 x i32> %in) {
+define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
; SSE2-LABEL: testv4i32:
; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
@@ -320,11 +341,22 @@ define <4 x i32> @testv4i32(<4 x i32> %in) {
; AVX-NEXT: xorl $31, %eax
; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv4i32:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vplzcntd %xmm0, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv4i32:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: retq
+
%out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %in, i1 0)
ret <4 x i32> %out
}
-define <4 x i32> @testv4i32u(<4 x i32> %in) {
+define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
; SSE2-LABEL: testv4i32u:
; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
@@ -446,11 +478,22 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) {
; AVX-NEXT: xorl $31, %eax
; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv4i32u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vplzcntd %xmm0, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv4i32u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: retq
+
%out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %in, i1 -1)
ret <4 x i32> %out
}
-define <8 x i16> @testv8i16(<8 x i16> %in) {
+define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
; SSE2-LABEL: testv8i16:
; SSE2: # BB#0:
; SSE2-NEXT: pextrw $7, %xmm0, %eax
@@ -697,11 +740,27 @@ define <8 x i16> @testv8i16(<8 x i16> %in) {
; AVX-NEXT: xorl $15, %ecx
; AVX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv8i16:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vpmovzxwd %xmm0, %ymm0
+; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0
+; AVX512VLCD-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512VLCD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv8i16:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512CD-NEXT: retq
%out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %in, i1 0)
ret <8 x i16> %out
}
-define <8 x i16> @testv8i16u(<8 x i16> %in) {
+define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
; SSE2-LABEL: testv8i16u:
; SSE2: # BB#0:
; SSE2-NEXT: pextrw $7, %xmm0, %eax
@@ -903,29 +962,46 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) {
; AVX-NEXT: xorl $15, %eax
; AVX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv8i16u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vpmovzxwd %xmm0, %ymm0
+; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0
+; AVX512VLCD-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512VLCD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv8i16u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512CD-NEXT: retq
%out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %in, i1 -1)
ret <8 x i16> %out
}
-define <16 x i8> @testv16i8(<16 x i8> %in) {
+define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
; SSE2-LABEL: testv16i8:
; SSE2: # BB#0:
-; SSE2: pushq %rbp
-; SSE2: movaps %xmm0, -24(%rsp)
-; SSE2-NEXT: movzbl -9(%rsp), %eax
+; SSE2-NEXT: pushq %rbp
+; SSE2-NEXT: pushq %rbx
+; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: bsrl %eax, %ecx
; SSE2-NEXT: movl $15, %eax
; SSE2-NEXT: cmovel %eax, %ecx
; SSE2-NEXT: xorl $7, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
-; SSE2-NEXT: movzbl -10(%rsp), %ebx
-; SSE2-NEXT: movzbl -11(%rsp), %edi
-; SSE2-NEXT: movzbl -12(%rsp), %r9d
-; SSE2-NEXT: movzbl -13(%rsp), %edx
-; SSE2-NEXT: movzbl -14(%rsp), %r11d
-; SSE2-NEXT: movzbl -15(%rsp), %esi
-; SSE2-NEXT: movzbl -16(%rsp), %r8d
-; SSE2-NEXT: movzbl -17(%rsp), %ecx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ebx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %r9d
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %r11d
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %r8d
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE2-NEXT: bsrl %ecx, %ecx
; SSE2-NEXT: cmovel %eax, %ecx
; SSE2-NEXT: xorl $7, %ecx
@@ -935,10 +1011,10 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
; SSE2-NEXT: cmovel %eax, %ecx
; SSE2-NEXT: xorl $7, %ecx
; SSE2-NEXT: movd %ecx, %xmm2
-; SSE2-NEXT: movzbl -18(%rsp), %edx
-; SSE2-NEXT: movzbl -19(%rsp), %ecx
-; SSE2-NEXT: movzbl -20(%rsp), %r10d
-; SSE2-NEXT: movzbl -21(%rsp), %ebp
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %r10d
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ebp
; SSE2-NEXT: bsrl %ebp, %ebp
; SSE2-NEXT: cmovel %eax, %ebp
; SSE2-NEXT: xorl $7, %ebp
@@ -958,8 +1034,8 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
; SSE2-NEXT: cmovel %eax, %ecx
; SSE2-NEXT: xorl $7, %ecx
; SSE2-NEXT: movd %ecx, %xmm3
-; SSE2-NEXT: movzbl -22(%rsp), %esi
-; SSE2-NEXT: movzbl -23(%rsp), %ecx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE2-NEXT: bsrl %ecx, %ecx
; SSE2-NEXT: cmovel %eax, %ecx
; SSE2-NEXT: xorl $7, %ecx
@@ -999,7 +1075,7 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
; SSE2-NEXT: cmovel %eax, %ecx
; SSE2-NEXT: xorl $7, %ecx
; SSE2-NEXT: movd %ecx, %xmm4
-; SSE2-NEXT: movzbl -24(%rsp), %ecx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE2-NEXT: bsrl %ecx, %ecx
; SSE2-NEXT: cmovel %eax, %ecx
; SSE2-NEXT: xorl $7, %ecx
@@ -1014,22 +1090,23 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
;
; SSE3-LABEL: testv16i8:
; SSE3: # BB#0:
-; SSE3: pushq %rbp
-; SSE3: movaps %xmm0, -24(%rsp)
-; SSE3-NEXT: movzbl -9(%rsp), %eax
+; SSE3-NEXT: pushq %rbp
+; SSE3-NEXT: pushq %rbx
+; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE3-NEXT: bsrl %eax, %ecx
; SSE3-NEXT: movl $15, %eax
; SSE3-NEXT: cmovel %eax, %ecx
; SSE3-NEXT: xorl $7, %ecx
; SSE3-NEXT: movd %ecx, %xmm0
-; SSE3-NEXT: movzbl -10(%rsp), %ebx
-; SSE3-NEXT: movzbl -11(%rsp), %edi
-; SSE3-NEXT: movzbl -12(%rsp), %r9d
-; SSE3-NEXT: movzbl -13(%rsp), %edx
-; SSE3-NEXT: movzbl -14(%rsp), %r11d
-; SSE3-NEXT: movzbl -15(%rsp), %esi
-; SSE3-NEXT: movzbl -16(%rsp), %r8d
-; SSE3-NEXT: movzbl -17(%rsp), %ecx
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ebx
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r9d
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r11d
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r8d
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE3-NEXT: bsrl %ecx, %ecx
; SSE3-NEXT: cmovel %eax, %ecx
; SSE3-NEXT: xorl $7, %ecx
@@ -1039,10 +1116,10 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
; SSE3-NEXT: cmovel %eax, %ecx
; SSE3-NEXT: xorl $7, %ecx
; SSE3-NEXT: movd %ecx, %xmm2
-; SSE3-NEXT: movzbl -18(%rsp), %edx
-; SSE3-NEXT: movzbl -19(%rsp), %ecx
-; SSE3-NEXT: movzbl -20(%rsp), %r10d
-; SSE3-NEXT: movzbl -21(%rsp), %ebp
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r10d
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ebp
; SSE3-NEXT: bsrl %ebp, %ebp
; SSE3-NEXT: cmovel %eax, %ebp
; SSE3-NEXT: xorl $7, %ebp
@@ -1062,8 +1139,8 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
; SSE3-NEXT: cmovel %eax, %ecx
; SSE3-NEXT: xorl $7, %ecx
; SSE3-NEXT: movd %ecx, %xmm3
-; SSE3-NEXT: movzbl -22(%rsp), %esi
-; SSE3-NEXT: movzbl -23(%rsp), %ecx
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE3-NEXT: bsrl %ecx, %ecx
; SSE3-NEXT: cmovel %eax, %ecx
; SSE3-NEXT: xorl $7, %ecx
@@ -1103,7 +1180,7 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
; SSE3-NEXT: cmovel %eax, %ecx
; SSE3-NEXT: xorl $7, %ecx
; SSE3-NEXT: movd %ecx, %xmm4
-; SSE3-NEXT: movzbl -24(%rsp), %ecx
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSE3-NEXT: bsrl %ecx, %ecx
; SSE3-NEXT: cmovel %eax, %ecx
; SSE3-NEXT: xorl $7, %ecx
@@ -1118,22 +1195,23 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
;
; SSSE3-LABEL: testv16i8:
; SSSE3: # BB#0:
-; SSSE3: pushq %rbp
-; SSSE3: movaps %xmm0, -24(%rsp)
-; SSSE3-NEXT: movzbl -9(%rsp), %eax
+; SSSE3-NEXT: pushq %rbp
+; SSSE3-NEXT: pushq %rbx
+; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: bsrl %eax, %ecx
; SSSE3-NEXT: movl $15, %eax
; SSSE3-NEXT: cmovel %eax, %ecx
; SSSE3-NEXT: xorl $7, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
-; SSSE3-NEXT: movzbl -10(%rsp), %ebx
-; SSSE3-NEXT: movzbl -11(%rsp), %edi
-; SSSE3-NEXT: movzbl -12(%rsp), %r9d
-; SSSE3-NEXT: movzbl -13(%rsp), %edx
-; SSSE3-NEXT: movzbl -14(%rsp), %r11d
-; SSSE3-NEXT: movzbl -15(%rsp), %esi
-; SSSE3-NEXT: movzbl -16(%rsp), %r8d
-; SSSE3-NEXT: movzbl -17(%rsp), %ecx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ebx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r9d
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r11d
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r8d
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSSE3-NEXT: bsrl %ecx, %ecx
; SSSE3-NEXT: cmovel %eax, %ecx
; SSSE3-NEXT: xorl $7, %ecx
@@ -1143,10 +1221,10 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
; SSSE3-NEXT: cmovel %eax, %ecx
; SSSE3-NEXT: xorl $7, %ecx
; SSSE3-NEXT: movd %ecx, %xmm2
-; SSSE3-NEXT: movzbl -18(%rsp), %edx
-; SSSE3-NEXT: movzbl -19(%rsp), %ecx
-; SSSE3-NEXT: movzbl -20(%rsp), %r10d
-; SSSE3-NEXT: movzbl -21(%rsp), %ebp
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r10d
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ebp
; SSSE3-NEXT: bsrl %ebp, %ebp
; SSSE3-NEXT: cmovel %eax, %ebp
; SSSE3-NEXT: xorl $7, %ebp
@@ -1166,8 +1244,8 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
; SSSE3-NEXT: cmovel %eax, %ecx
; SSSE3-NEXT: xorl $7, %ecx
; SSSE3-NEXT: movd %ecx, %xmm3
-; SSSE3-NEXT: movzbl -22(%rsp), %esi
-; SSSE3-NEXT: movzbl -23(%rsp), %ecx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSSE3-NEXT: bsrl %ecx, %ecx
; SSSE3-NEXT: cmovel %eax, %ecx
; SSSE3-NEXT: xorl $7, %ecx
@@ -1207,7 +1285,7 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
; SSSE3-NEXT: cmovel %eax, %ecx
; SSSE3-NEXT: xorl $7, %ecx
; SSSE3-NEXT: movd %ecx, %xmm4
-; SSSE3-NEXT: movzbl -24(%rsp), %ecx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; SSSE3-NEXT: bsrl %ecx, %ecx
; SSSE3-NEXT: cmovel %eax, %ecx
; SSSE3-NEXT: xorl $7, %ecx
@@ -1390,27 +1468,43 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
; AVX-NEXT: xorl $7, %ecx
; AVX-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv16i8:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VLCD-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv16i8:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512CD-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512CD-NEXT: retq
%out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %in, i1 0)
ret <16 x i8> %out
}
-define <16 x i8> @testv16i8u(<16 x i8> %in) {
+define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
; SSE2-LABEL: testv16i8u:
; SSE2: # BB#0:
-; SSE2: pushq %rbx
-; SSE2: movaps %xmm0, -16(%rsp)
-; SSE2-NEXT: movzbl -1(%rsp), %eax
+; SSE2-NEXT: pushq %rbx
+; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: bsrl %eax, %eax
; SSE2-NEXT: xorl $7, %eax
; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movzbl -2(%rsp), %edi
-; SSE2-NEXT: movzbl -3(%rsp), %edx
-; SSE2-NEXT: movzbl -4(%rsp), %r9d
-; SSE2-NEXT: movzbl -5(%rsp), %eax
-; SSE2-NEXT: movzbl -6(%rsp), %r10d
-; SSE2-NEXT: movzbl -7(%rsp), %ecx
-; SSE2-NEXT: movzbl -8(%rsp), %r8d
-; SSE2-NEXT: movzbl -9(%rsp), %esi
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %r9d
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %r10d
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %r8d
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
; SSE2-NEXT: bsrl %esi, %esi
; SSE2-NEXT: xorl $7, %esi
; SSE2-NEXT: movd %esi, %xmm1
@@ -1418,10 +1512,10 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
; SSE2-NEXT: bsrl %eax, %eax
; SSE2-NEXT: xorl $7, %eax
; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movzbl -10(%rsp), %eax
-; SSE2-NEXT: movzbl -11(%rsp), %esi
-; SSE2-NEXT: movzbl -12(%rsp), %r11d
-; SSE2-NEXT: movzbl -13(%rsp), %ebx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %r11d
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ebx
; SSE2-NEXT: bsrl %ebx, %ebx
; SSE2-NEXT: xorl $7, %ebx
; SSE2-NEXT: movd %ebx, %xmm2
@@ -1437,8 +1531,8 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
; SSE2-NEXT: bsrl %ecx, %ecx
; SSE2-NEXT: xorl $7, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
-; SSE2-NEXT: movzbl -14(%rsp), %ecx
-; SSE2-NEXT: movzbl -15(%rsp), %edx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
; SSE2-NEXT: bsrl %edx, %edx
; SSE2-NEXT: xorl $7, %edx
; SSE2-NEXT: movd %edx, %xmm1
@@ -1470,7 +1564,7 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
; SSE2-NEXT: bsrl %r8d, %eax
; SSE2-NEXT: xorl $7, %eax
; SSE2-NEXT: movd %eax, %xmm4
-; SSE2-NEXT: movzbl -16(%rsp), %eax
+; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: bsrl %eax, %eax
; SSE2-NEXT: xorl $7, %eax
; SSE2-NEXT: movd %eax, %xmm0
@@ -1483,20 +1577,20 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
;
; SSE3-LABEL: testv16i8u:
; SSE3: # BB#0:
-; SSE3: pushq %rbx
-; SSE3: movaps %xmm0, -16(%rsp)
-; SSE3-NEXT: movzbl -1(%rsp), %eax
+; SSE3-NEXT: pushq %rbx
+; SSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE3-NEXT: bsrl %eax, %eax
; SSE3-NEXT: xorl $7, %eax
; SSE3-NEXT: movd %eax, %xmm0
-; SSE3-NEXT: movzbl -2(%rsp), %edi
-; SSE3-NEXT: movzbl -3(%rsp), %edx
-; SSE3-NEXT: movzbl -4(%rsp), %r9d
-; SSE3-NEXT: movzbl -5(%rsp), %eax
-; SSE3-NEXT: movzbl -6(%rsp), %r10d
-; SSE3-NEXT: movzbl -7(%rsp), %ecx
-; SSE3-NEXT: movzbl -8(%rsp), %r8d
-; SSE3-NEXT: movzbl -9(%rsp), %esi
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r9d
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r10d
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r8d
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
; SSE3-NEXT: bsrl %esi, %esi
; SSE3-NEXT: xorl $7, %esi
; SSE3-NEXT: movd %esi, %xmm1
@@ -1504,10 +1598,10 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
; SSE3-NEXT: bsrl %eax, %eax
; SSE3-NEXT: xorl $7, %eax
; SSE3-NEXT: movd %eax, %xmm0
-; SSE3-NEXT: movzbl -10(%rsp), %eax
-; SSE3-NEXT: movzbl -11(%rsp), %esi
-; SSE3-NEXT: movzbl -12(%rsp), %r11d
-; SSE3-NEXT: movzbl -13(%rsp), %ebx
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r11d
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ebx
; SSE3-NEXT: bsrl %ebx, %ebx
; SSE3-NEXT: xorl $7, %ebx
; SSE3-NEXT: movd %ebx, %xmm2
@@ -1523,8 +1617,8 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
; SSE3-NEXT: bsrl %ecx, %ecx
; SSE3-NEXT: xorl $7, %ecx
; SSE3-NEXT: movd %ecx, %xmm0
-; SSE3-NEXT: movzbl -14(%rsp), %ecx
-; SSE3-NEXT: movzbl -15(%rsp), %edx
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
; SSE3-NEXT: bsrl %edx, %edx
; SSE3-NEXT: xorl $7, %edx
; SSE3-NEXT: movd %edx, %xmm1
@@ -1556,7 +1650,7 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
; SSE3-NEXT: bsrl %r8d, %eax
; SSE3-NEXT: xorl $7, %eax
; SSE3-NEXT: movd %eax, %xmm4
-; SSE3-NEXT: movzbl -16(%rsp), %eax
+; SSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE3-NEXT: bsrl %eax, %eax
; SSE3-NEXT: xorl $7, %eax
; SSE3-NEXT: movd %eax, %xmm0
@@ -1569,20 +1663,20 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
;
; SSSE3-LABEL: testv16i8u:
; SSSE3: # BB#0:
-; SSSE3: pushq %rbx
-; SSSE3: movaps %xmm0, -16(%rsp)
-; SSSE3-NEXT: movzbl -1(%rsp), %eax
+; SSSE3-NEXT: pushq %rbx
+; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: bsrl %eax, %eax
; SSSE3-NEXT: xorl $7, %eax
; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: movzbl -2(%rsp), %edi
-; SSSE3-NEXT: movzbl -3(%rsp), %edx
-; SSSE3-NEXT: movzbl -4(%rsp), %r9d
-; SSSE3-NEXT: movzbl -5(%rsp), %eax
-; SSSE3-NEXT: movzbl -6(%rsp), %r10d
-; SSSE3-NEXT: movzbl -7(%rsp), %ecx
-; SSSE3-NEXT: movzbl -8(%rsp), %r8d
-; SSSE3-NEXT: movzbl -9(%rsp), %esi
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r9d
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r10d
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r8d
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
; SSSE3-NEXT: bsrl %esi, %esi
; SSSE3-NEXT: xorl $7, %esi
; SSSE3-NEXT: movd %esi, %xmm1
@@ -1590,10 +1684,10 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
; SSSE3-NEXT: bsrl %eax, %eax
; SSSE3-NEXT: xorl $7, %eax
; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: movzbl -10(%rsp), %eax
-; SSSE3-NEXT: movzbl -11(%rsp), %esi
-; SSSE3-NEXT: movzbl -12(%rsp), %r11d
-; SSSE3-NEXT: movzbl -13(%rsp), %ebx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %r11d
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ebx
; SSSE3-NEXT: bsrl %ebx, %ebx
; SSSE3-NEXT: xorl $7, %ebx
; SSSE3-NEXT: movd %ebx, %xmm2
@@ -1609,8 +1703,8 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
; SSSE3-NEXT: bsrl %ecx, %ecx
; SSSE3-NEXT: xorl $7, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
-; SSSE3-NEXT: movzbl -14(%rsp), %ecx
-; SSSE3-NEXT: movzbl -15(%rsp), %edx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx
; SSSE3-NEXT: bsrl %edx, %edx
; SSSE3-NEXT: xorl $7, %edx
; SSSE3-NEXT: movd %edx, %xmm1
@@ -1642,7 +1736,7 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
; SSSE3-NEXT: bsrl %r8d, %eax
; SSSE3-NEXT: xorl $7, %eax
; SSSE3-NEXT: movd %eax, %xmm4
-; SSSE3-NEXT: movzbl -16(%rsp), %eax
+; SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: bsrl %eax, %eax
; SSSE3-NEXT: xorl $7, %eax
; SSSE3-NEXT: movd %eax, %xmm0
@@ -1789,11 +1883,27 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
; AVX-NEXT: xorl $7, %eax
; AVX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv16i8u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VLCD-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv16i8u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512CD-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512CD-NEXT: retq
%out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %in, i1 -1)
ret <16 x i8> %out
}
-define <2 x i64> @foldv2i64() {
+define <2 x i64> @foldv2i64() nounwind {
; SSE-LABEL: foldv2i64:
; SSE: # BB#0:
; SSE-NEXT: movl $55, %eax
@@ -1805,11 +1915,23 @@ define <2 x i64> @foldv2i64() {
; AVX-NEXT: movl $55, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv2i64:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: movl $55, %eax
+; AVX512VLCD-NEXT: vmovq %rax, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv2i64:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: movl $55, %eax
+; AVX512CD-NEXT: vmovq %rax, %xmm0
+; AVX512CD-NEXT: retq
%out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> <i64 256, i64 -1>, i1 0)
ret <2 x i64> %out
}
-define <2 x i64> @foldv2i64u() {
+define <2 x i64> @foldv2i64u() nounwind {
; SSE-LABEL: foldv2i64u:
; SSE: # BB#0:
; SSE-NEXT: movl $55, %eax
@@ -1821,11 +1943,23 @@ define <2 x i64> @foldv2i64u() {
; AVX-NEXT: movl $55, %eax
; AVX-NEXT: vmovq %rax, %xmm0
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv2i64u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: movl $55, %eax
+; AVX512VLCD-NEXT: vmovq %rax, %xmm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv2i64u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: movl $55, %eax
+; AVX512CD-NEXT: vmovq %rax, %xmm0
+; AVX512CD-NEXT: retq
%out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> <i64 256, i64 -1>, i1 -1)
ret <2 x i64> %out
}
-define <4 x i32> @foldv4i32() {
+define <4 x i32> @foldv4i32() nounwind {
; SSE-LABEL: foldv4i32:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [23,0,32,24]
@@ -1835,11 +1969,21 @@ define <4 x i32> @foldv4i32() {
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv4i32:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv4i32:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512CD-NEXT: retq
%out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>, i1 0)
ret <4 x i32> %out
}
-define <4 x i32> @foldv4i32u() {
+define <4 x i32> @foldv4i32u() nounwind {
; SSE-LABEL: foldv4i32u:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [23,0,32,24]
@@ -1849,11 +1993,21 @@ define <4 x i32> @foldv4i32u() {
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv4i32u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv4i32u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512CD-NEXT: retq
%out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>, i1 -1)
ret <4 x i32> %out
}
-define <8 x i16> @foldv8i16() {
+define <8 x i16> @foldv8i16() nounwind {
; SSE-LABEL: foldv8i16:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
@@ -1863,11 +2017,21 @@ define <8 x i16> @foldv8i16() {
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv8i16:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv8i16:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512CD-NEXT: retq
%out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>, i1 0)
ret <8 x i16> %out
}
-define <8 x i16> @foldv8i16u() {
+define <8 x i16> @foldv8i16u() nounwind {
; SSE-LABEL: foldv8i16u:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
@@ -1877,11 +2041,21 @@ define <8 x i16> @foldv8i16u() {
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv8i16u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv8i16u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512CD-NEXT: retq
%out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>, i1 -1)
ret <8 x i16> %out
}
-define <16 x i8> @foldv16i8() {
+define <16 x i8> @foldv16i8() nounwind {
; SSE-LABEL: foldv16i8:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
@@ -1891,11 +2065,21 @@ define <16 x i8> @foldv16i8() {
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv16i8:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv16i8:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512CD-NEXT: retq
%out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>, i1 0)
ret <16 x i8> %out
}
-define <16 x i8> @foldv16i8u() {
+define <16 x i8> @foldv16i8u() nounwind {
; SSE-LABEL: foldv16i8u:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
@@ -1905,6 +2089,16 @@ define <16 x i8> @foldv16i8u() {
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv16i8u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv16i8u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512CD-NEXT: retq
%out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>, i1 -1)
ret <16 x i8> %out
}
diff --git a/test/CodeGen/X86/vector-lzcnt-256.ll b/test/CodeGen/X86/vector-lzcnt-256.ll
index 48abe1290528..1608bf53748d 100644
--- a/test/CodeGen/X86/vector-lzcnt-256.ll
+++ b/test/CodeGen/X86/vector-lzcnt-256.ll
@@ -1,9 +1,10 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s --check-prefix=AVX512VLCD --check-prefix=ALL --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=AVX512CD --check-prefix=ALL --check-prefix=AVX512
-target triple = "x86_64-unknown-unknown"
-
-define <4 x i64> @testv4i64(<4 x i64> %in) {
+define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
; AVX1-LABEL: testv4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -61,11 +62,22 @@ define <4 x i64> @testv4i64(<4 x i64> %in) {
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv4i64:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vplzcntq %ymm0, %ymm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv4i64:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
+; AVX512CD-NEXT: retq
+
%out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 0)
ret <4 x i64> %out
}
-define <4 x i64> @testv4i64u(<4 x i64> %in) {
+define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
; AVX1-LABEL: testv4i64u:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -113,11 +125,22 @@ define <4 x i64> @testv4i64u(<4 x i64> %in) {
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv4i64u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vplzcntq %ymm0, %ymm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv4i64u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
+; AVX512CD-NEXT: retq
+
%out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 -1)
ret <4 x i64> %out
}
-define <8 x i32> @testv8i32(<8 x i32> %in) {
+define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
; AVX1-LABEL: testv8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -211,11 +234,22 @@ define <8 x i32> @testv8i32(<8 x i32> %in) {
; AVX2-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv8i32:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv8i32:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: retq
+
%out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 0)
ret <8 x i32> %out
}
-define <8 x i32> @testv8i32u(<8 x i32> %in) {
+define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
; AVX1-LABEL: testv8i32u:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -291,11 +325,22 @@ define <8 x i32> @testv8i32u(<8 x i32> %in) {
; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv8i32u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vplzcntd %ymm0, %ymm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv8i32u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: retq
+
%out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 -1)
ret <8 x i32> %out
}
-define <16 x i16> @testv16i16(<16 x i16> %in) {
+define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
; AVX1-LABEL: testv16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -469,11 +514,27 @@ define <16 x i16> @testv16i16(<16 x i16> %in) {
; AVX2-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv16i16:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vpmovzxwd %ymm0, %zmm0
+; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512VLCD-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv16i16:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vpmovzxwd %ymm0, %zmm0
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512CD-NEXT: retq
%out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 0)
ret <16 x i16> %out
}
-define <16 x i16> @testv16i16u(<16 x i16> %in) {
+define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
; AVX1-LABEL: testv16i16u:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -613,11 +674,27 @@ define <16 x i16> @testv16i16u(<16 x i16> %in) {
; AVX2-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv16i16u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vpmovzxwd %ymm0, %zmm0
+; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512VLCD-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv16i16u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vpmovzxwd %ymm0, %zmm0
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512CD-NEXT: retq
%out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 -1)
ret <16 x i16> %out
}
-define <32 x i8> @testv32i8(<32 x i8> %in) {
+define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
; AVX1-LABEL: testv32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -951,11 +1028,41 @@ define <32 x i8> @testv32i8(<32 x i8> %in) {
; AVX2-NEXT: vpinsrb $15, %ecx, %xmm2, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv32i8:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX512VLCD-NEXT: vpmovzxbd %xmm1, %zmm1
+; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1
+; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
+; AVX512VLCD-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm0, %xmm0
+; AVX512VLCD-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv32i8:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX512CD-NEXT: vpmovzxbd %xmm1, %zmm1
+; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1
+; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512CD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
+; AVX512CD-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512CD-NEXT: vpsubb %xmm2, %xmm0, %xmm0
+; AVX512CD-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512CD-NEXT: retq
%out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 0)
ret <32 x i8> %out
}
-define <32 x i8> @testv32i8u(<32 x i8> %in) {
+define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
; AVX1-LABEL: testv32i8u:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -1223,78 +1330,188 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) {
; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; AVX512VLCD-LABEL: testv32i8u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX512VLCD-NEXT: vpmovzxbd %xmm1, %zmm1
+; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1
+; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
+; AVX512VLCD-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512VLCD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512VLCD-NEXT: vpsubb %xmm2, %xmm0, %xmm0
+; AVX512VLCD-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: testv32i8u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX512CD-NEXT: vpmovzxbd %xmm1, %zmm1
+; AVX512CD-NEXT: vplzcntd %zmm1, %zmm1
+; AVX512CD-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512CD-NEXT: vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512CD-NEXT: vpsubb %xmm2, %xmm1, %xmm1
+; AVX512CD-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512CD-NEXT: vpsubb %xmm2, %xmm0, %xmm0
+; AVX512CD-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512CD-NEXT: retq
%out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 -1)
ret <32 x i8> %out
}
-define <4 x i64> @foldv4i64() {
+define <4 x i64> @foldv4i64() nounwind {
; AVX-LABEL: foldv4i64:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv4i64:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv4i64:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
+; AVX512CD-NEXT: retq
%out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
ret <4 x i64> %out
}
-define <4 x i64> @foldv4i64u() {
+define <4 x i64> @foldv4i64u() nounwind {
; AVX-LABEL: foldv4i64u:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv4i64u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv4i64u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
+; AVX512CD-NEXT: retq
%out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
ret <4 x i64> %out
}
-define <8 x i32> @foldv8i32() {
+define <8 x i32> @foldv8i32() nounwind {
; AVX-LABEL: foldv8i32:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv8i32:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv8i32:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
+; AVX512CD-NEXT: retq
%out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
ret <8 x i32> %out
}
-define <8 x i32> @foldv8i32u() {
+define <8 x i32> @foldv8i32u() nounwind {
; AVX-LABEL: foldv8i32u:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv8i32u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv8i32u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
+; AVX512CD-NEXT: retq
%out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
ret <8 x i32> %out
}
-define <16 x i16> @foldv16i16() {
+define <16 x i16> @foldv16i16() nounwind {
; AVX-LABEL: foldv16i16:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv16i16:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv16i16:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
+; AVX512CD-NEXT: retq
%out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
ret <16 x i16> %out
}
-define <16 x i16> @foldv16i16u() {
+define <16 x i16> @foldv16i16u() nounwind {
; AVX-LABEL: foldv16i16u:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv16i16u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv16i16u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
+; AVX512CD-NEXT: retq
%out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
ret <16 x i16> %out
}
-define <32 x i8> @foldv32i8() {
+define <32 x i8> @foldv32i8() nounwind {
; AVX-LABEL: foldv32i8:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv32i8:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv32i8:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
+; AVX512CD-NEXT: retq
%out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
ret <32 x i8> %out
}
-define <32 x i8> @foldv32i8u() {
+define <32 x i8> @foldv32i8u() nounwind {
; AVX-LABEL: foldv32i8u:
; AVX: # BB#0:
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
; AVX-NEXT: retq
+;
+; AVX512VLCD-LABEL: foldv32i8u:
+; AVX512VLCD: ## BB#0:
+; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
+; AVX512VLCD-NEXT: retq
+;
+; AVX512CD-LABEL: foldv32i8u:
+; AVX512CD: ## BB#0:
+; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
+; AVX512CD-NEXT: retq
%out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
ret <32 x i8> %out
}
diff --git a/test/CodeGen/X86/vector-lzcnt-512.ll b/test/CodeGen/X86/vector-lzcnt-512.ll
new file mode 100644
index 000000000000..20ea86e5d439
--- /dev/null
+++ b/test/CodeGen/X86/vector-lzcnt-512.ll
@@ -0,0 +1,219 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
+
+define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
+; ALL-LABEL: testv8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vplzcntq %zmm0, %zmm0
+; ALL-NEXT: retq
+ %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0)
+ ret <8 x i64> %out
+}
+
+define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
+; ALL-LABEL: testv8i64u:
+; ALL: ## BB#0:
+; ALL-NEXT: vplzcntq %zmm0, %zmm0
+; ALL-NEXT: retq
+ %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1)
+ ret <8 x i64> %out
+}
+
+define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
+; ALL-LABEL: testv16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vplzcntd %zmm0, %zmm0
+; ALL-NEXT: retq
+ %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 0)
+ ret <16 x i32> %out
+}
+
+define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
+; ALL-LABEL: testv16i32u:
+; ALL: ## BB#0:
+; ALL-NEXT: vplzcntd %zmm0, %zmm0
+; ALL-NEXT: retq
+ %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 -1)
+ ret <16 x i32> %out
+}
+
+define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
+; ALL-LABEL: testv32i16:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovzxwd %ymm0, %zmm0
+; ALL-NEXT: vplzcntd %zmm0, %zmm0
+; ALL-NEXT: vpmovdw %zmm0, %ymm0
+; ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; ALL-NEXT: vpsubw %ymm2, %ymm0, %ymm0
+; ALL-NEXT: vpmovzxwd %ymm1, %zmm1
+; ALL-NEXT: vplzcntd %zmm1, %zmm1
+; ALL-NEXT: vpmovdw %zmm1, %ymm1
+; ALL-NEXT: vpsubw %ymm2, %ymm1, %ymm1
+; ALL-NEXT: retq
+;
+; AVX512BW-LABEL: testv32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmovzxwd %ymm1, %zmm1
+; AVX512BW-NEXT: vplzcntd %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovdw %zmm1, %ymm1
+; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpsubw %ymm2, %ymm1, %ymm1
+; AVX512BW-NEXT: vpmovzxwd %ymm0, %zmm0
+; AVX512BW-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT: vpsubw %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 0)
+ ret <32 x i16> %out
+}
+
+define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
+; ALL-LABEL: testv32i16u:
+; ALL: ## BB#0:
+; ALL-NEXT: vpmovzxwd %ymm0, %zmm0
+; ALL-NEXT: vplzcntd %zmm0, %zmm0
+; ALL-NEXT: vpmovdw %zmm0, %ymm0
+; ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; ALL-NEXT: vpsubw %ymm2, %ymm0, %ymm0
+; ALL-NEXT: vpmovzxwd %ymm1, %zmm1
+; ALL-NEXT: vplzcntd %zmm1, %zmm1
+; ALL-NEXT: vpmovdw %zmm1, %ymm1
+; ALL-NEXT: vpsubw %ymm2, %ymm1, %ymm1
+; ALL-NEXT: retq
+;
+; AVX512BW-LABEL: testv32i16u:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vpmovzxwd %ymm1, %zmm1
+; AVX512BW-NEXT: vplzcntd %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovdw %zmm1, %ymm1
+; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpsubw %ymm2, %ymm1, %ymm1
+; AVX512BW-NEXT: vpmovzxwd %ymm0, %zmm0
+; AVX512BW-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT: vpsubw %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 -1)
+ ret <32 x i16> %out
+}
+
+define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
+; ALL-LABEL: testv64i8:
+; ALL: ## BB#0:
+; ALL-NEXT: vextractf128 $1, %ymm0, %xmm2
+; ALL-NEXT: vpmovzxbd %xmm2, %zmm2
+; ALL-NEXT: vplzcntd %zmm2, %zmm2
+; ALL-NEXT: vpmovdb %zmm2, %xmm2
+; ALL-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; ALL-NEXT: vpsubb %xmm3, %xmm2, %xmm2
+; ALL-NEXT: vpmovzxbd %xmm0, %zmm0
+; ALL-NEXT: vplzcntd %zmm0, %zmm0
+; ALL-NEXT: vpmovdb %zmm0, %xmm0
+; ALL-NEXT: vpsubb %xmm3, %xmm0, %xmm0
+; ALL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT: vextractf128 $1, %ymm1, %xmm2
+; ALL-NEXT: vpmovzxbd %xmm2, %zmm2
+; ALL-NEXT: vplzcntd %zmm2, %zmm2
+; ALL-NEXT: vpmovdb %zmm2, %xmm2
+; ALL-NEXT: vpsubb %xmm3, %xmm2, %xmm2
+; ALL-NEXT: vpmovzxbd %xmm1, %zmm1
+; ALL-NEXT: vplzcntd %zmm1, %zmm1
+; ALL-NEXT: vpmovdb %zmm1, %xmm1
+; ALL-NEXT: vpsubb %xmm3, %xmm1, %xmm1
+; ALL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; ALL-NEXT: retq
+;
+; AVX512BW-LABEL: testv64i8:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX512BW-NEXT: vpmovzxbd %xmm2, %zmm2
+; AVX512BW-NEXT: vplzcntd %zmm2, %zmm2
+; AVX512BW-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpmovzxbd %xmm1, %zmm1
+; AVX512BW-NEXT: vplzcntd %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512BW-NEXT: vpsubb %xmm3, %xmm1, %xmm1
+; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX512BW-NEXT: vpmovzxbd %xmm2, %zmm2
+; AVX512BW-NEXT: vplzcntd %zmm2, %zmm2
+; AVX512BW-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512BW-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT: vpsubb %xmm3, %xmm0, %xmm0
+; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 0)
+ ret <64 x i8> %out
+}
+
+define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
+; ALL-LABEL: testv64i8u:
+; ALL: ## BB#0:
+; ALL-NEXT: vextractf128 $1, %ymm0, %xmm2
+; ALL-NEXT: vpmovzxbd %xmm2, %zmm2
+; ALL-NEXT: vplzcntd %zmm2, %zmm2
+; ALL-NEXT: vpmovdb %zmm2, %xmm2
+; ALL-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; ALL-NEXT: vpsubb %xmm3, %xmm2, %xmm2
+; ALL-NEXT: vpmovzxbd %xmm0, %zmm0
+; ALL-NEXT: vplzcntd %zmm0, %zmm0
+; ALL-NEXT: vpmovdb %zmm0, %xmm0
+; ALL-NEXT: vpsubb %xmm3, %xmm0, %xmm0
+; ALL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT: vextractf128 $1, %ymm1, %xmm2
+; ALL-NEXT: vpmovzxbd %xmm2, %zmm2
+; ALL-NEXT: vplzcntd %zmm2, %zmm2
+; ALL-NEXT: vpmovdb %zmm2, %xmm2
+; ALL-NEXT: vpsubb %xmm3, %xmm2, %xmm2
+; ALL-NEXT: vpmovzxbd %xmm1, %zmm1
+; ALL-NEXT: vplzcntd %zmm1, %zmm1
+; ALL-NEXT: vpmovdb %zmm1, %xmm1
+; ALL-NEXT: vpsubb %xmm3, %xmm1, %xmm1
+; ALL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; ALL-NEXT: retq
+;
+; AVX512BW-LABEL: testv64i8u:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX512BW-NEXT: vpmovzxbd %xmm2, %zmm2
+; AVX512BW-NEXT: vplzcntd %zmm2, %zmm2
+; AVX512BW-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpmovzxbd %xmm1, %zmm1
+; AVX512BW-NEXT: vplzcntd %zmm1, %zmm1
+; AVX512BW-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512BW-NEXT: vpsubb %xmm3, %xmm1, %xmm1
+; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX512BW-NEXT: vpmovzxbd %xmm2, %zmm2
+; AVX512BW-NEXT: vplzcntd %zmm2, %zmm2
+; AVX512BW-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpmovzxbd %xmm0, %zmm0
+; AVX512BW-NEXT: vplzcntd %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT: vpsubb %xmm3, %xmm0, %xmm0
+; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 -1)
+ ret <64 x i8> %out
+}
+
+declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1)
+declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
+declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>, i1)
+declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1)
diff --git a/test/CodeGen/X86/vector-merge-store-fp-constants.ll b/test/CodeGen/X86/vector-merge-store-fp-constants.ll
new file mode 100644
index 000000000000..a6fb32d48a7c
--- /dev/null
+++ b/test/CodeGen/X86/vector-merge-store-fp-constants.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefix=DEFAULTCPU -check-prefix=ALL %s
+; RUN: llc -march=x86-64 -mcpu=x86-64 -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefix=X8664CPU -check-prefix=ALL %s
+
+
+; ALL-LABEL: {{^}}merge_8_float_zero_stores:
+
+; DEFAULTCPU-DAG: movq $0, ([[PTR:%[a-z]+]])
+; DEFAULTCPU-DAG: movq $0, 8([[PTR]])
+; DEFAULTCPU-DAG: movq $0, 16([[PTR]])
+; DEFAULTCPU-DAG: movq $0, 24([[PTR]])
+
+; X8664CPU: xorps [[ZEROREG:%xmm[0-9]+]], [[ZEROREG]]
+; X8664CPU-DAG: movups [[ZEROREG]], ([[PTR:%[a-z]+]])
+; X8664CPU-DAG: movups [[ZEROREG]], 16([[PTR:%[a-z]+]])
+
+; ALL: retq
+define void @merge_8_float_zero_stores(float* %ptr) {
+ %idx0 = getelementptr float, float* %ptr, i64 0
+ %idx1 = getelementptr float, float* %ptr, i64 1
+ %idx2 = getelementptr float, float* %ptr, i64 2
+ %idx3 = getelementptr float, float* %ptr, i64 3
+ %idx4 = getelementptr float, float* %ptr, i64 4
+ %idx5 = getelementptr float, float* %ptr, i64 5
+ %idx6 = getelementptr float, float* %ptr, i64 6
+ %idx7 = getelementptr float, float* %ptr, i64 7
+ store float 0.0, float* %idx0, align 4
+ store float 0.0, float* %idx1, align 4
+ store float 0.0, float* %idx2, align 4
+ store float 0.0, float* %idx3, align 4
+ store float 0.0, float* %idx4, align 4
+ store float 0.0, float* %idx5, align 4
+ store float 0.0, float* %idx6, align 4
+ store float 0.0, float* %idx7, align 4
+ ret void
+}
diff --git a/test/CodeGen/X86/vector-popcnt-128.ll b/test/CodeGen/X86/vector-popcnt-128.ll
index fef445de04ab..358bd4018290 100644
--- a/test/CodeGen/X86/vector-popcnt-128.ll
+++ b/test/CodeGen/X86/vector-popcnt-128.ll
@@ -1,13 +1,12 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-target triple = "x86_64-unknown-unknown"
-
-define <2 x i64> @testv2i64(<2 x i64> %in) {
+define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; SSE2-LABEL: testv2i64:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -93,13 +92,13 @@ define <2 x i64> @testv2i64(<2 x i64> %in) {
; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0
; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%out = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %in)
ret <2 x i64> %out
}
-define <4 x i32> @testv4i32(<4 x i32> %in) {
+define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
; SSE2-LABEL: testv4i32:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -208,16 +207,16 @@ define <4 x i32> @testv4i32(<4 x i32> %in) {
; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX-NEXT: vpsadbw %xmm2, %xmm1, %xmm2
+; AVX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
%out = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %in)
ret <4 x i32> %out
}
-define <8 x i16> @testv8i16(<8 x i16> %in) {
+define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
; SSE2-LABEL: testv8i16:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -316,7 +315,7 @@ define <8 x i16> @testv8i16(<8 x i16> %in) {
ret <8 x i16> %out
}
-define <16 x i8> @testv16i8(<16 x i8> %in) {
+define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
; SSE2-LABEL: testv16i8:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -400,7 +399,7 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
ret <16 x i8> %out
}
-define <2 x i64> @foldv2i64() {
+define <2 x i64> @foldv2i64() nounwind {
; SSE-LABEL: foldv2i64:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,64]
@@ -414,7 +413,7 @@ define <2 x i64> @foldv2i64() {
ret <2 x i64> %out
}
-define <4 x i32> @foldv4i32() {
+define <4 x i32> @foldv4i32() nounwind {
; SSE-LABEL: foldv4i32:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,32,0,8]
@@ -428,7 +427,7 @@ define <4 x i32> @foldv4i32() {
ret <4 x i32> %out
}
-define <8 x i16> @foldv8i16() {
+define <8 x i16> @foldv8i16() nounwind {
; SSE-LABEL: foldv8i16:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3]
@@ -442,7 +441,7 @@ define <8 x i16> @foldv8i16() {
ret <8 x i16> %out
}
-define <16 x i8> @foldv16i8() {
+define <16 x i8> @foldv16i8() nounwind {
; SSE-LABEL: foldv16i8:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1]
diff --git a/test/CodeGen/X86/vector-popcnt-256.ll b/test/CodeGen/X86/vector-popcnt-256.ll
index 7ce4f712483a..b0e39bdf49f9 100644
--- a/test/CodeGen/X86/vector-popcnt-256.ll
+++ b/test/CodeGen/X86/vector-popcnt-256.ll
@@ -1,9 +1,8 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-target triple = "x86_64-unknown-unknown"
-
-define <4 x i64> @testv4i64(<4 x i64> %in) {
+define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
; AVX1-LABEL: testv4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -16,14 +15,14 @@ define <4 x i64> @testv4i64(<4 x i64> %in) {
; AVX1-NEXT: vpshufb %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpsadbw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm5
; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpsadbw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -38,13 +37,13 @@ define <4 x i64> @testv4i64(<4 x i64> %in) {
; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpsadbw %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%out = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %in)
ret <4 x i64> %out
}
-define <8 x i32> @testv8i32(<8 x i32> %in) {
+define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
; AVX1-LABEL: testv8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -58,9 +57,9 @@ define <8 x i32> @testv8i32(<8 x i32> %in) {
; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; AVX1-NEXT: vpsadbw %xmm5, %xmm3, %xmm5
+; AVX1-NEXT: vpsadbw %xmm3, %xmm5, %xmm5
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; AVX1-NEXT: vpsadbw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpsadbw %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1
; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm5
; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm5
@@ -69,9 +68,9 @@ define <8 x i32> @testv8i32(<8 x i32> %in) {
; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpaddb %xmm5, %xmm0, %xmm0
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; AVX1-NEXT: vpsadbw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; AVX1-NEXT: vpsadbw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpsadbw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -88,16 +87,16 @@ define <8 x i32> @testv8i32(<8 x i32> %in) {
; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
-; AVX2-NEXT: vpsadbw %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2
; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
-; AVX2-NEXT: vpsadbw %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
%out = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %in)
ret <8 x i32> %out
}
-define <16 x i16> @testv16i16(<16 x i16> %in) {
+define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
; AVX1-LABEL: testv16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@@ -142,7 +141,7 @@ define <16 x i16> @testv16i16(<16 x i16> %in) {
ret <16 x i16> %out
}
-define <32 x i8> @testv32i8(<32 x i8> %in) {
+define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
; AVX1-LABEL: testv32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -178,38 +177,38 @@ define <32 x i8> @testv32i8(<32 x i8> %in) {
ret <32 x i8> %out
}
-define <4 x i64> @foldv4i64() {
-; AVX-LABEL: foldv4i64:
-; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,64,0,8]
-; AVX-NEXT: retq
+define <4 x i64> @foldv4i64() nounwind {
+; ALL-LABEL: foldv4i64:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [1,64,0,8]
+; ALL-NEXT: retq
%out = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>)
ret <4 x i64> %out
}
-define <8 x i32> @foldv8i32() {
-; AVX-LABEL: foldv8i32:
-; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,32,0,8,16,3,2,3]
-; AVX-NEXT: retq
+define <8 x i32> @foldv8i32() nounwind {
+; ALL-LABEL: foldv8i32:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [1,32,0,8,16,3,2,3]
+; ALL-NEXT: retq
%out = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>)
ret <8 x i32> %out
}
-define <16 x i16> @foldv16i16() {
-; AVX-LABEL: foldv16i16:
-; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,16,0,8,0,3,2,3,15,7,1,1,1,1,1,1]
-; AVX-NEXT: retq
+define <16 x i16> @foldv16i16() nounwind {
+; ALL-LABEL: foldv16i16:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [1,16,0,8,0,3,2,3,15,7,1,1,1,1,1,1]
+; ALL-NEXT: retq
%out = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>)
ret <16 x i16> %out
}
-define <32 x i8> @foldv32i8() {
-; AVX-LABEL: foldv32i8:
-; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1,1,1,0,0,1,2,3,4,5,6,7,8,2,2,3,7]
-; AVX-NEXT: retq
+define <32 x i8> @foldv32i8() nounwind {
+; ALL-LABEL: foldv32i8:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1,1,1,0,0,1,2,3,4,5,6,7,8,2,2,3,7]
+; ALL-NEXT: retq
%out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>)
ret <32 x i8> %out
}
diff --git a/test/CodeGen/X86/vector-popcnt-512.ll b/test/CodeGen/X86/vector-popcnt-512.ll
new file mode 100644
index 000000000000..54b7af6830c0
--- /dev/null
+++ b/test/CodeGen/X86/vector-popcnt-512.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD
+
+define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
+; ALL-LABEL: testv8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; ALL-NEXT: vpextrq $1, %xmm1, %rax
+; ALL-NEXT: popcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm2
+; ALL-NEXT: vmovq %xmm1, %rax
+; ALL-NEXT: popcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm1
+; ALL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; ALL-NEXT: vextracti32x4 $2, %zmm0, %xmm2
+; ALL-NEXT: vpextrq $1, %xmm2, %rax
+; ALL-NEXT: popcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm3
+; ALL-NEXT: vmovq %xmm2, %rax
+; ALL-NEXT: popcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm2
+; ALL-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; ALL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; ALL-NEXT: vextracti32x4 $1, %zmm0, %xmm2
+; ALL-NEXT: vpextrq $1, %xmm2, %rax
+; ALL-NEXT: popcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm3
+; ALL-NEXT: vmovq %xmm2, %rax
+; ALL-NEXT: popcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm2
+; ALL-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; ALL-NEXT: vpextrq $1, %xmm0, %rax
+; ALL-NEXT: popcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm3
+; ALL-NEXT: vmovq %xmm0, %rax
+; ALL-NEXT: popcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm0
+; ALL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; ALL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %out = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %in)
+ ret <8 x i64> %out
+}
+
+define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
+; ALL-LABEL: testv16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; ALL-NEXT: vpextrd $1, %xmm1, %eax
+; ALL-NEXT: popcntl %eax, %eax
+; ALL-NEXT: vmovd %xmm1, %ecx
+; ALL-NEXT: popcntl %ecx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm2
+; ALL-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
+; ALL-NEXT: vpextrd $2, %xmm1, %eax
+; ALL-NEXT: popcntl %eax, %eax
+; ALL-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; ALL-NEXT: vpextrd $3, %xmm1, %eax
+; ALL-NEXT: popcntl %eax, %eax
+; ALL-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
+; ALL-NEXT: vextracti32x4 $2, %zmm0, %xmm2
+; ALL-NEXT: vpextrd $1, %xmm2, %eax
+; ALL-NEXT: popcntl %eax, %eax
+; ALL-NEXT: vmovd %xmm2, %ecx
+; ALL-NEXT: popcntl %ecx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm3
+; ALL-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
+; ALL-NEXT: vpextrd $2, %xmm2, %eax
+; ALL-NEXT: popcntl %eax, %eax
+; ALL-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
+; ALL-NEXT: vpextrd $3, %xmm2, %eax
+; ALL-NEXT: popcntl %eax, %eax
+; ALL-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2
+; ALL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; ALL-NEXT: vextracti32x4 $1, %zmm0, %xmm2
+; ALL-NEXT: vpextrd $1, %xmm2, %eax
+; ALL-NEXT: popcntl %eax, %eax
+; ALL-NEXT: vmovd %xmm2, %ecx
+; ALL-NEXT: popcntl %ecx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm3
+; ALL-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
+; ALL-NEXT: vpextrd $2, %xmm2, %eax
+; ALL-NEXT: popcntl %eax, %eax
+; ALL-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
+; ALL-NEXT: vpextrd $3, %xmm2, %eax
+; ALL-NEXT: popcntl %eax, %eax
+; ALL-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2
+; ALL-NEXT: vpextrd $1, %xmm0, %eax
+; ALL-NEXT: popcntl %eax, %eax
+; ALL-NEXT: vmovd %xmm0, %ecx
+; ALL-NEXT: popcntl %ecx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm3
+; ALL-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
+; ALL-NEXT: vpextrd $2, %xmm0, %eax
+; ALL-NEXT: popcntl %eax, %eax
+; ALL-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
+; ALL-NEXT: vpextrd $3, %xmm0, %eax
+; ALL-NEXT: popcntl %eax, %eax
+; ALL-NEXT: vpinsrd $3, %eax, %xmm3, %xmm0
+; ALL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %out = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %in)
+ ret <16 x i32> %out
+}
+
+define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
+; ALL-LABEL: testv32i16:
+; ALL: ## BB#0:
+; ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT: vpand %ymm2, %ymm0, %ymm3
+; ALL-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; ALL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; ALL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; ALL-NEXT: vpand %ymm2, %ymm0, %ymm0
+; ALL-NEXT: vpshufb %ymm0, %ymm4, %ymm0
+; ALL-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; ALL-NEXT: vpsllw $8, %ymm0, %ymm3
+; ALL-NEXT: vpaddb %ymm0, %ymm3, %ymm0
+; ALL-NEXT: vpsrlw $8, %ymm0, %ymm0
+; ALL-NEXT: vpand %ymm2, %ymm1, %ymm3
+; ALL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; ALL-NEXT: vpsrlw $4, %ymm1, %ymm1
+; ALL-NEXT: vpand %ymm2, %ymm1, %ymm1
+; ALL-NEXT: vpshufb %ymm1, %ymm4, %ymm1
+; ALL-NEXT: vpaddb %ymm3, %ymm1, %ymm1
+; ALL-NEXT: vpsllw $8, %ymm1, %ymm2
+; ALL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
+; ALL-NEXT: vpsrlw $8, %ymm1, %ymm1
+; ALL-NEXT: retq
+ %out = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %in)
+ ret <32 x i16> %out
+}
+
+define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
+; ALL-LABEL: testv64i8:
+; ALL: ## BB#0:
+; ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT: vpand %ymm2, %ymm0, %ymm3
+; ALL-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; ALL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; ALL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; ALL-NEXT: vpand %ymm2, %ymm0, %ymm0
+; ALL-NEXT: vpshufb %ymm0, %ymm4, %ymm0
+; ALL-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; ALL-NEXT: vpand %ymm2, %ymm1, %ymm3
+; ALL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; ALL-NEXT: vpsrlw $4, %ymm1, %ymm1
+; ALL-NEXT: vpand %ymm2, %ymm1, %ymm1
+; ALL-NEXT: vpshufb %ymm1, %ymm4, %ymm1
+; ALL-NEXT: vpaddb %ymm3, %ymm1, %ymm1
+; ALL-NEXT: retq
+ %out = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %in)
+ ret <64 x i8> %out
+}
+
+declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>)
+declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
+declare <32 x i16> @llvm.ctpop.v32i16(<32 x i16>)
+declare <64 x i8> @llvm.ctpop.v64i8(<64 x i8>)
diff --git a/test/CodeGen/X86/vector-rotate-128.ll b/test/CodeGen/X86/vector-rotate-128.ll
new file mode 100644
index 000000000000..4ad4aa46c5a0
--- /dev/null
+++ b/test/CodeGen/X86/vector-rotate-128.ll
@@ -0,0 +1,1595 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+;
+; Just one 32-bit run to make sure we do reasonable things for i64 rotates.
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE2
+
+;
+; Variable Rotates
+;
+
+define <2 x i64> @var_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: var_rotate_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [64,64]
+; SSE2-NEXT: psubq %xmm1, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: psllq %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psllq %xmm1, %xmm3
+; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm3[0],xmm4[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrlq %xmm3, %xmm1
+; SSE2-NEXT: psrlq %xmm2, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: orpd %xmm4, %xmm1
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: var_rotate_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [64,64]
+; SSE41-NEXT: psubq %xmm1, %xmm2
+; SSE41-NEXT: movdqa %xmm0, %xmm3
+; SSE41-NEXT: psllq %xmm1, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: psllq %xmm1, %xmm4
+; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm3[0,1,2,3],xmm4[4,5,6,7]
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlq %xmm2, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE41-NEXT: psrlq %xmm2, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: por %xmm4, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: var_rotate_v2i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64]
+; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: var_rotate_v2i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64]
+; AVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpsrlvq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; XOP-LABEL: var_rotate_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vprotq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: var_rotate_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [64,0,64,0]
+; X32-SSE-NEXT: psubq %xmm1, %xmm2
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm4
+; X32-SSE-NEXT: psllq %xmm3, %xmm4
+; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
+; X32-SSE-NEXT: movdqa %xmm0, %xmm3
+; X32-SSE-NEXT: psllq %xmm1, %xmm3
+; X32-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm3[0],xmm4[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psrlq %xmm3, %xmm1
+; X32-SSE-NEXT: movq {{.*#+}} xmm2 = xmm2[0],zero
+; X32-SSE-NEXT: psrlq %xmm2, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT: orpd %xmm4, %xmm1
+; X32-SSE-NEXT: movapd %xmm1, %xmm0
+; X32-SSE-NEXT: retl
+ %b64 = sub <2 x i64> <i64 64, i64 64>, %b
+ %shl = shl <2 x i64> %a, %b
+ %lshr = lshr <2 x i64> %a, %b64
+ %or = or <2 x i64> %shl, %lshr
+ ret <2 x i64> %or
+}
+
+define <4 x i32> @var_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE2-LABEL: var_rotate_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32]
+; SSE2-NEXT: psubd %xmm1, %xmm2
+; SSE2-NEXT: pslld $23, %xmm1
+; SSE2-NEXT: paddd {{.*}}(%rip), %xmm1
+; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; SSE2-NEXT: pmuludq %xmm0, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE2-NEXT: pmuludq %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: psrld %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: psrlq $32, %xmm3
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: psrld %xmm3, %xmm5
+; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm5[0],xmm4[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,3,2,3]
+; SSE2-NEXT: pxor %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm5 = xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; SSE2-NEXT: movdqa %xmm0, %xmm6
+; SSE2-NEXT: psrld %xmm5, %xmm6
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; SSE2-NEXT: psrld %xmm2, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm0[0],xmm6[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: var_rotate_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32]
+; SSE41-NEXT: psubd %xmm1, %xmm2
+; SSE41-NEXT: pslld $23, %xmm1
+; SSE41-NEXT: paddd {{.*}}(%rip), %xmm1
+; SSE41-NEXT: cvttps2dq %xmm1, %xmm1
+; SSE41-NEXT: pmulld %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm2, %xmm3
+; SSE41-NEXT: psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: psrld %xmm3, %xmm4
+; SSE41-NEXT: movdqa %xmm2, %xmm3
+; SSE41-NEXT: psrlq $32, %xmm3
+; SSE41-NEXT: movdqa %xmm0, %xmm5
+; SSE41-NEXT: psrld %xmm3, %xmm5
+; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm4[4,5,6,7]
+; SSE41-NEXT: pxor %xmm3, %xmm3
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero
+; SSE41-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; SSE41-NEXT: movdqa %xmm0, %xmm3
+; SSE41-NEXT: psrld %xmm2, %xmm3
+; SSE41-NEXT: psrld %xmm4, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3],xmm0[4,5],xmm5[6,7]
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: var_rotate_v4i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32,32,32,32]
+; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
+; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
+; AVX1-NEXT: vpmulld %xmm0, %xmm1, %xmm1
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm3 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpsrld %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm4
+; AVX1-NEXT: vpsrld %xmm4, %xmm0, %xmm4
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm4 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; AVX1-NEXT: vpsrld %xmm4, %xmm0, %xmm4
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
+; AVX1-NEXT: vpsrld %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
+; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: var_rotate_v4i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpsrlvd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; XOP-LABEL: var_rotate_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vprotd %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: var_rotate_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32]
+; X32-SSE-NEXT: psubd %xmm1, %xmm2
+; X32-SSE-NEXT: pslld $23, %xmm1
+; X32-SSE-NEXT: paddd .LCPI1_1, %xmm1
+; X32-SSE-NEXT: cvttps2dq %xmm1, %xmm1
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X32-SSE-NEXT: pmuludq %xmm0, %xmm1
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; X32-SSE-NEXT: pmuludq %xmm3, %xmm4
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X32-SSE-NEXT: movdqa %xmm0, %xmm4
+; X32-SSE-NEXT: psrld %xmm3, %xmm4
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: psrlq $32, %xmm3
+; X32-SSE-NEXT: movdqa %xmm0, %xmm5
+; X32-SSE-NEXT: psrld %xmm3, %xmm5
+; X32-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm5[0],xmm4[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,3,2,3]
+; X32-SSE-NEXT: pxor %xmm4, %xmm4
+; X32-SSE-NEXT: movdqa %xmm2, %xmm5
+; X32-SSE-NEXT: punpckhdq {{.*#+}} xmm5 = xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm6
+; X32-SSE-NEXT: psrld %xmm5, %xmm6
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X32-SSE-NEXT: psrld %xmm2, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm6 = xmm0[0],xmm6[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3]
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE-NEXT: por %xmm1, %xmm0
+; X32-SSE-NEXT: retl
+ %b32 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %b
+ %shl = shl <4 x i32> %a, %b
+ %lshr = lshr <4 x i32> %a, %b32
+ %or = or <4 x i32> %shl, %lshr
+ ret <4 x i32> %or
+}
+
+define <8 x i16> @var_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE2-LABEL: var_rotate_v8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; SSE2-NEXT: psubw %xmm1, %xmm3
+; SSE2-NEXT: psllw $12, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psraw $15, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: psllw $8, %xmm4
+; SSE2-NEXT: pand %xmm2, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm4, %xmm2
+; SSE2-NEXT: paddw %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: psraw $15, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: psllw $4, %xmm2
+; SSE2-NEXT: pand %xmm4, %xmm2
+; SSE2-NEXT: por %xmm5, %xmm2
+; SSE2-NEXT: paddw %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: psraw $15, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: psllw $2, %xmm2
+; SSE2-NEXT: pand %xmm4, %xmm2
+; SSE2-NEXT: por %xmm5, %xmm2
+; SSE2-NEXT: paddw %xmm1, %xmm1
+; SSE2-NEXT: psraw $15, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: psllw $1, %xmm2
+; SSE2-NEXT: pand %xmm1, %xmm2
+; SSE2-NEXT: psllw $12, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm1
+; SSE2-NEXT: psraw $15, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: psrlw $8, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: paddw %xmm3, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm1
+; SSE2-NEXT: psraw $15, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: psrlw $4, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: paddw %xmm3, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm1
+; SSE2-NEXT: psraw $15, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: psrlw $2, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: paddw %xmm3, %xmm3
+; SSE2-NEXT: psraw $15, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: psrlw $1, %xmm0
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: var_rotate_v8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
+; SSE41-NEXT: psubw %xmm1, %xmm2
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: psllw $12, %xmm0
+; SSE41-NEXT: psllw $4, %xmm1
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm4
+; SSE41-NEXT: paddw %xmm4, %xmm4
+; SSE41-NEXT: movdqa %xmm3, %xmm6
+; SSE41-NEXT: psllw $8, %xmm6
+; SSE41-NEXT: movdqa %xmm3, %xmm5
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pblendvb %xmm6, %xmm5
+; SSE41-NEXT: movdqa %xmm5, %xmm1
+; SSE41-NEXT: psllw $4, %xmm1
+; SSE41-NEXT: movdqa %xmm4, %xmm0
+; SSE41-NEXT: pblendvb %xmm1, %xmm5
+; SSE41-NEXT: movdqa %xmm5, %xmm1
+; SSE41-NEXT: psllw $2, %xmm1
+; SSE41-NEXT: paddw %xmm4, %xmm4
+; SSE41-NEXT: movdqa %xmm4, %xmm0
+; SSE41-NEXT: pblendvb %xmm1, %xmm5
+; SSE41-NEXT: movdqa %xmm5, %xmm1
+; SSE41-NEXT: psllw $1, %xmm1
+; SSE41-NEXT: paddw %xmm4, %xmm4
+; SSE41-NEXT: movdqa %xmm4, %xmm0
+; SSE41-NEXT: pblendvb %xmm1, %xmm5
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: psllw $12, %xmm0
+; SSE41-NEXT: psllw $4, %xmm2
+; SSE41-NEXT: por %xmm0, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm1
+; SSE41-NEXT: paddw %xmm1, %xmm1
+; SSE41-NEXT: movdqa %xmm3, %xmm4
+; SSE41-NEXT: psrlw $8, %xmm4
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: pblendvb %xmm4, %xmm3
+; SSE41-NEXT: movdqa %xmm3, %xmm2
+; SSE41-NEXT: psrlw $4, %xmm2
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pblendvb %xmm2, %xmm3
+; SSE41-NEXT: movdqa %xmm3, %xmm2
+; SSE41-NEXT: psrlw $2, %xmm2
+; SSE41-NEXT: paddw %xmm1, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pblendvb %xmm2, %xmm3
+; SSE41-NEXT: movdqa %xmm3, %xmm2
+; SSE41-NEXT: psrlw $1, %xmm2
+; SSE41-NEXT: paddw %xmm1, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pblendvb %xmm2, %xmm3
+; SSE41-NEXT: por %xmm5, %xmm3
+; SSE41-NEXT: movdqa %xmm3, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: var_rotate_v8i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
+; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsllw $12, %xmm1, %xmm3
+; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm3
+; AVX1-NEXT: vpsllw $8, %xmm0, %xmm4
+; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm1
+; AVX1-NEXT: vpsllw $4, %xmm1, %xmm4
+; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpsllw $2, %xmm1, %xmm4
+; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpsllw $1, %xmm1, %xmm4
+; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpblendvb %xmm3, %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpsllw $12, %xmm2, %xmm3
+; AVX1-NEXT: vpsllw $4, %xmm2, %xmm2
+; AVX1-NEXT: vpor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm3
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm4
+; AVX1-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2
+; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: var_rotate_v8i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
+; AVX2-NEXT: vpshufb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; AVX2-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; XOP-LABEL: var_rotate_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vprotw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: var_rotate_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; X32-SSE-NEXT: psubw %xmm1, %xmm3
+; X32-SSE-NEXT: psllw $12, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psraw $15, %xmm2
+; X32-SSE-NEXT: movdqa %xmm0, %xmm4
+; X32-SSE-NEXT: psllw $8, %xmm4
+; X32-SSE-NEXT: pand %xmm2, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm2
+; X32-SSE-NEXT: por %xmm4, %xmm2
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm4
+; X32-SSE-NEXT: psraw $15, %xmm4
+; X32-SSE-NEXT: movdqa %xmm4, %xmm5
+; X32-SSE-NEXT: pandn %xmm2, %xmm5
+; X32-SSE-NEXT: psllw $4, %xmm2
+; X32-SSE-NEXT: pand %xmm4, %xmm2
+; X32-SSE-NEXT: por %xmm5, %xmm2
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm4
+; X32-SSE-NEXT: psraw $15, %xmm4
+; X32-SSE-NEXT: movdqa %xmm4, %xmm5
+; X32-SSE-NEXT: pandn %xmm2, %xmm5
+; X32-SSE-NEXT: psllw $2, %xmm2
+; X32-SSE-NEXT: pand %xmm4, %xmm2
+; X32-SSE-NEXT: por %xmm5, %xmm2
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: psraw $15, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm4
+; X32-SSE-NEXT: pandn %xmm2, %xmm4
+; X32-SSE-NEXT: psllw $1, %xmm2
+; X32-SSE-NEXT: pand %xmm1, %xmm2
+; X32-SSE-NEXT: psllw $12, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm1
+; X32-SSE-NEXT: psraw $15, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm5
+; X32-SSE-NEXT: pandn %xmm0, %xmm5
+; X32-SSE-NEXT: psrlw $8, %xmm0
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm5, %xmm0
+; X32-SSE-NEXT: paddw %xmm3, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm1
+; X32-SSE-NEXT: psraw $15, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm5
+; X32-SSE-NEXT: pandn %xmm0, %xmm5
+; X32-SSE-NEXT: psrlw $4, %xmm0
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm5, %xmm0
+; X32-SSE-NEXT: paddw %xmm3, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm1
+; X32-SSE-NEXT: psraw $15, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm5
+; X32-SSE-NEXT: pandn %xmm0, %xmm5
+; X32-SSE-NEXT: psrlw $2, %xmm0
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm5, %xmm0
+; X32-SSE-NEXT: paddw %xmm3, %xmm3
+; X32-SSE-NEXT: psraw $15, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm1
+; X32-SSE-NEXT: pandn %xmm0, %xmm1
+; X32-SSE-NEXT: psrlw $1, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: por %xmm2, %xmm0
+; X32-SSE-NEXT: retl
+ %b16 = sub <8 x i16> <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>, %b
+ %shl = shl <8 x i16> %a, %b
+ %lshr = lshr <8 x i16> %a, %b16
+ %or = or <8 x i16> %shl, %lshr
+ ret <8 x i16> %or
+}
+
+define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE2-LABEL: var_rotate_v16i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; SSE2-NEXT: psubb %xmm1, %xmm4
+; SSE2-NEXT: psllw $5, %xmm1
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: psllw $4, %xmm5
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm5
+; SSE2-NEXT: pand %xmm2, %xmm5
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm5, %xmm2
+; SSE2-NEXT: paddb %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm6
+; SSE2-NEXT: psllw $2, %xmm2
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: por %xmm6, %xmm2
+; SSE2-NEXT: paddb %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: pandn %xmm2, %xmm1
+; SSE2-NEXT: paddb %xmm2, %xmm2
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: psllw $5, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pandn %xmm0, %xmm6
+; SSE2-NEXT: psrlw $4, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: paddb %xmm4, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pandn %xmm0, %xmm6
+; SSE2-NEXT: psrlw $2, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: paddb %xmm4, %xmm4
+; SSE2-NEXT: pcmpgtb %xmm4, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: psrlw $1, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: var_rotate_v16i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; SSE41-NEXT: psubb %xmm3, %xmm2
+; SSE41-NEXT: psllw $5, %xmm3
+; SSE41-NEXT: movdqa %xmm1, %xmm5
+; SSE41-NEXT: psllw $4, %xmm5
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm5
+; SSE41-NEXT: movdqa %xmm1, %xmm4
+; SSE41-NEXT: movdqa %xmm3, %xmm0
+; SSE41-NEXT: pblendvb %xmm5, %xmm4
+; SSE41-NEXT: movdqa %xmm4, %xmm5
+; SSE41-NEXT: psllw $2, %xmm5
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm5
+; SSE41-NEXT: paddb %xmm3, %xmm3
+; SSE41-NEXT: movdqa %xmm3, %xmm0
+; SSE41-NEXT: pblendvb %xmm5, %xmm4
+; SSE41-NEXT: movdqa %xmm4, %xmm5
+; SSE41-NEXT: paddb %xmm5, %xmm5
+; SSE41-NEXT: paddb %xmm3, %xmm3
+; SSE41-NEXT: movdqa %xmm3, %xmm0
+; SSE41-NEXT: pblendvb %xmm5, %xmm4
+; SSE41-NEXT: psllw $5, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm3
+; SSE41-NEXT: paddb %xmm3, %xmm3
+; SSE41-NEXT: movdqa %xmm1, %xmm5
+; SSE41-NEXT: psrlw $4, %xmm5
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm5
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: pblendvb %xmm5, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm2
+; SSE41-NEXT: psrlw $2, %xmm2
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
+; SSE41-NEXT: movdqa %xmm3, %xmm0
+; SSE41-NEXT: pblendvb %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm2
+; SSE41-NEXT: psrlw $1, %xmm2
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
+; SSE41-NEXT: paddb %xmm3, %xmm3
+; SSE41-NEXT: movdqa %xmm3, %xmm0
+; SSE41-NEXT: pblendvb %xmm2, %xmm1
+; SSE41-NEXT: por %xmm4, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: var_rotate_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX-NEXT: vpsubb %xmm1, %xmm2, %xmm2
+; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX-NEXT: vpsllw $4, %xmm0, %xmm3
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
+; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm3
+; AVX-NEXT: vpsllw $2, %xmm3, %xmm4
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm4, %xmm4
+; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendvb %xmm1, %xmm4, %xmm3, %xmm3
+; AVX-NEXT: vpaddb %xmm3, %xmm3, %xmm4
+; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendvb %xmm1, %xmm4, %xmm3, %xmm1
+; AVX-NEXT: vpsllw $5, %xmm2, %xmm2
+; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm3
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm4
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm4, %xmm4
+; AVX-NEXT: vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $2, %xmm0, %xmm2
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT: vpaddb %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: var_rotate_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vprotb %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: var_rotate_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; X32-SSE-NEXT: psubb %xmm1, %xmm4
+; X32-SSE-NEXT: psllw $5, %xmm1
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pxor %xmm2, %xmm2
+; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm2
+; X32-SSE-NEXT: movdqa %xmm0, %xmm5
+; X32-SSE-NEXT: psllw $4, %xmm5
+; X32-SSE-NEXT: pand .LCPI3_1, %xmm5
+; X32-SSE-NEXT: pand %xmm2, %xmm5
+; X32-SSE-NEXT: pandn %xmm0, %xmm2
+; X32-SSE-NEXT: por %xmm5, %xmm2
+; X32-SSE-NEXT: paddb %xmm1, %xmm1
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm6
+; X32-SSE-NEXT: pandn %xmm2, %xmm6
+; X32-SSE-NEXT: psllw $2, %xmm2
+; X32-SSE-NEXT: pand .LCPI3_2, %xmm2
+; X32-SSE-NEXT: pand %xmm5, %xmm2
+; X32-SSE-NEXT: por %xmm6, %xmm2
+; X32-SSE-NEXT: paddb %xmm1, %xmm1
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm1
+; X32-SSE-NEXT: pandn %xmm2, %xmm1
+; X32-SSE-NEXT: paddb %xmm2, %xmm2
+; X32-SSE-NEXT: pand %xmm5, %xmm2
+; X32-SSE-NEXT: psllw $5, %xmm4
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtb %xmm4, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm6
+; X32-SSE-NEXT: pandn %xmm0, %xmm6
+; X32-SSE-NEXT: psrlw $4, %xmm0
+; X32-SSE-NEXT: pand .LCPI3_3, %xmm0
+; X32-SSE-NEXT: pand %xmm5, %xmm0
+; X32-SSE-NEXT: por %xmm6, %xmm0
+; X32-SSE-NEXT: paddb %xmm4, %xmm4
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtb %xmm4, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm6
+; X32-SSE-NEXT: pandn %xmm0, %xmm6
+; X32-SSE-NEXT: psrlw $2, %xmm0
+; X32-SSE-NEXT: pand .LCPI3_4, %xmm0
+; X32-SSE-NEXT: pand %xmm5, %xmm0
+; X32-SSE-NEXT: por %xmm6, %xmm0
+; X32-SSE-NEXT: paddb %xmm4, %xmm4
+; X32-SSE-NEXT: pcmpgtb %xmm4, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psrlw $1, %xmm0
+; X32-SSE-NEXT: pand .LCPI3_5, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: por %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm2, %xmm0
+; X32-SSE-NEXT: retl
+ %b8 = sub <16 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>, %b
+ %shl = shl <16 x i8> %a, %b
+ %lshr = lshr <16 x i8> %a, %b8
+ %or = or <16 x i8> %shl, %lshr
+ ret <16 x i8> %or
+}
+
+;
+; Constant Rotates
+;
+
+define <2 x i64> @constant_rotate_v2i64(<2 x i64> %a) nounwind {
+; SSE2-LABEL: constant_rotate_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psllq $14, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psllq $4, %xmm1
+; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrlq $50, %xmm1
+; SSE2-NEXT: psrlq $60, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: orpd %xmm2, %xmm1
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: constant_rotate_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psllq $14, %xmm1
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: psllq $4, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlq $50, %xmm1
+; SSE41-NEXT: psrlq $60, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: por %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: constant_rotate_v2i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsllq $14, %xmm0, %xmm1
+; AVX1-NEXT: vpsllq $4, %xmm0, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpsrlq $50, %xmm0, %xmm2
+; AVX1-NEXT: vpsrlq $60, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: constant_rotate_v2i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_rotate_v2i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshlq %xmm2, %xmm0, %xmm0
+; XOPAVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_rotate_v2i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; X32-SSE-LABEL: constant_rotate_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE-NEXT: psllq $14, %xmm2
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psllq $4, %xmm1
+; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psrlq $50, %xmm1
+; X32-SSE-NEXT: psrlq $60, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT: orpd %xmm2, %xmm1
+; X32-SSE-NEXT: movapd %xmm1, %xmm0
+; X32-SSE-NEXT: retl
+ %shl = shl <2 x i64> %a, <i64 4, i64 14>
+ %lshr = lshr <2 x i64> %a, <i64 60, i64 50>
+ %or = or <2 x i64> %shl, %lshr
+ ret <2 x i64> %or
+}
+
+define <4 x i32> @constant_rotate_v4i32(<4 x i32> %a) nounwind {
+; SSE2-LABEL: constant_rotate_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,32,64,128]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pmuludq %xmm1, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE2-NEXT: pmuludq %xmm1, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrld $25, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psrld $27, %xmm3
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psrld $26, %xmm3
+; SSE2-NEXT: psrld $28, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: constant_rotate_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [16,32,64,128]
+; SSE41-NEXT: pmulld %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: psrld $25, %xmm2
+; SSE41-NEXT: movdqa %xmm0, %xmm3
+; SSE41-NEXT: psrld $27, %xmm3
+; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: psrld $26, %xmm2
+; SSE41-NEXT: psrld $28, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: constant_rotate_v4i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vpsrld $25, %xmm0, %xmm2
+; AVX1-NEXT: vpsrld $27, %xmm0, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vpsrld $26, %xmm0, %xmm3
+; AVX1-NEXT: vpsrld $28, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: constant_rotate_v4i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_rotate_v4i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_rotate_v4i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; X32-SSE-LABEL: constant_rotate_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,32,64,128]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE-NEXT: pmuludq %xmm1, %xmm2
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; X32-SSE-NEXT: pmuludq %xmm1, %xmm3
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psrld $25, %xmm1
+; X32-SSE-NEXT: movdqa %xmm0, %xmm3
+; X32-SSE-NEXT: psrld $27, %xmm3
+; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm3
+; X32-SSE-NEXT: psrld $26, %xmm3
+; X32-SSE-NEXT: psrld $28, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE-NEXT: por %xmm2, %xmm0
+; X32-SSE-NEXT: retl
+ %shl = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
+ %lshr = lshr <4 x i32> %a, <i32 28, i32 27, i32 26, i32 25>
+ %or = or <4 x i32> %shl, %lshr
+ ret <4 x i32> %or
+}
+
+define <8 x i16> @constant_rotate_v8i16(<8 x i16> %a) nounwind {
+; SSE2-LABEL: constant_rotate_v8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
+; SSE2-NEXT: pmullw %xmm0, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: psrlw $8, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,0,0,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: psrlw $4, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,0,0,65535,65535,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: psrlw $2, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [65535,0,65535,0,65535,0,65535,0]
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm3, %xmm1
+; SSE2-NEXT: psrlw $1, %xmm0
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: constant_rotate_v8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
+; SSE41-NEXT: pmullw %xmm1, %xmm2
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: psrlw $8, %xmm3
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [256,61680,57568,53456,49344,45232,41120,37008]
+; SSE41-NEXT: pblendvb %xmm3, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: psrlw $4, %xmm3
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [512,57824,49600,41376,33152,24928,16704,8480]
+; SSE41-NEXT: pblendvb %xmm3, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: psrlw $2, %xmm3
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [1024,50112,33664,17216,768,49856,33408,16960]
+; SSE41-NEXT: pblendvb %xmm3, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: psrlw $1, %xmm3
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [2048,34688,1792,34432,1536,34176,1280,33920]
+; SSE41-NEXT: pblendvb %xmm3, %xmm1
+; SSE41-NEXT: por %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: constant_rotate_v8i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [256,61680,57568,53456,49344,45232,41120,37008]
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [512,57824,49600,41376,33152,24928,16704,8480]
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1024,50112,33664,17216,768,49856,33408,16960]
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2048,34688,1792,34432,1536,34176,1280,33920]
+; AVX1-NEXT: vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: constant_rotate_v8i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX2-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; XOP-LABEL: constant_rotate_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm1
+; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vpsubw {{.*}}(%rip), %xmm2, %xmm2
+; XOP-NEXT: vpshlw %xmm2, %xmm0, %xmm0
+; XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: constant_rotate_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
+; X32-SSE-NEXT: pmullw %xmm0, %xmm2
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
+; X32-SSE-NEXT: movdqa %xmm1, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psrlw $8, %xmm0
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,0,0,0]
+; X32-SSE-NEXT: movdqa %xmm1, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psrlw $4, %xmm0
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,0,0,65535,65535,0]
+; X32-SSE-NEXT: movdqa %xmm1, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psrlw $2, %xmm0
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [65535,0,65535,0,65535,0,65535,0]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: pand %xmm3, %xmm1
+; X32-SSE-NEXT: psrlw $1, %xmm0
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: por %xmm2, %xmm3
+; X32-SSE-NEXT: por %xmm3, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE-NEXT: retl
+ %shl = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+ %lshr = lshr <8 x i16> %a, <i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9>
+ %or = or <8 x i16> %shl, %lshr
+ ret <8 x i16> %or
+}
+
+define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind {
+; SSE2-LABEL: constant_rotate_v16i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; SSE2-NEXT: psllw $5, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtb %xmm3, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: psllw $4, %xmm4
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm4
+; SSE2-NEXT: pand %xmm1, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: por %xmm4, %xmm1
+; SSE2-NEXT: paddb %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm4, %xmm4
+; SSE2-NEXT: pcmpgtb %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm5
+; SSE2-NEXT: psllw $2, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: por %xmm5, %xmm1
+; SSE2-NEXT: paddb %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm4, %xmm4
+; SSE2-NEXT: pcmpgtb %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: paddb %xmm1, %xmm1
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
+; SSE2-NEXT: psllw $5, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pandn %xmm0, %xmm6
+; SSE2-NEXT: psrlw $4, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: paddb %xmm4, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pandn %xmm0, %xmm6
+; SSE2-NEXT: psrlw $2, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: paddb %xmm4, %xmm4
+; SSE2-NEXT: pcmpgtb %xmm4, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: psrlw $1, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: constant_rotate_v16i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; SSE41-NEXT: psllw $5, %xmm0
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: psllw $4, %xmm3
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
+; SSE41-NEXT: movdqa %xmm1, %xmm2
+; SSE41-NEXT: pblendvb %xmm3, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm3
+; SSE41-NEXT: psllw $2, %xmm3
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
+; SSE41-NEXT: paddb %xmm0, %xmm0
+; SSE41-NEXT: pblendvb %xmm3, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm3
+; SSE41-NEXT: paddb %xmm3, %xmm3
+; SSE41-NEXT: paddb %xmm0, %xmm0
+; SSE41-NEXT: pblendvb %xmm3, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
+; SSE41-NEXT: psllw $5, %xmm0
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: psrlw $4, %xmm3
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
+; SSE41-NEXT: pblendvb %xmm3, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: psrlw $2, %xmm3
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
+; SSE41-NEXT: paddb %xmm0, %xmm0
+; SSE41-NEXT: pblendvb %xmm3, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: psrlw $1, %xmm3
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm3
+; SSE41-NEXT: paddb %xmm0, %xmm0
+; SSE41-NEXT: pblendvb %xmm3, %xmm1
+; SSE41-NEXT: por %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: constant_rotate_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; AVX-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm2
+; AVX-NEXT: vpsllw $2, %xmm2, %xmm3
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
+; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm3
+; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendvb %xmm1, %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
+; AVX-NEXT: vpsllw $5, %xmm2, %xmm2
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
+; AVX-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $2, %xmm0, %xmm3
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
+; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm3
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3
+; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: constant_rotate_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm1
+; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vpsubb {{.*}}(%rip), %xmm2, %xmm2
+; XOP-NEXT: vpshlb %xmm2, %xmm0, %xmm0
+; XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: constant_rotate_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; X32-SSE-NEXT: psllw $5, %xmm3
+; X32-SSE-NEXT: pxor %xmm2, %xmm2
+; X32-SSE-NEXT: pxor %xmm1, %xmm1
+; X32-SSE-NEXT: pcmpgtb %xmm3, %xmm1
+; X32-SSE-NEXT: movdqa %xmm0, %xmm4
+; X32-SSE-NEXT: psllw $4, %xmm4
+; X32-SSE-NEXT: pand .LCPI7_1, %xmm4
+; X32-SSE-NEXT: pand %xmm1, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm1
+; X32-SSE-NEXT: por %xmm4, %xmm1
+; X32-SSE-NEXT: paddb %xmm3, %xmm3
+; X32-SSE-NEXT: pxor %xmm4, %xmm4
+; X32-SSE-NEXT: pcmpgtb %xmm3, %xmm4
+; X32-SSE-NEXT: movdqa %xmm4, %xmm5
+; X32-SSE-NEXT: pandn %xmm1, %xmm5
+; X32-SSE-NEXT: psllw $2, %xmm1
+; X32-SSE-NEXT: pand .LCPI7_2, %xmm1
+; X32-SSE-NEXT: pand %xmm4, %xmm1
+; X32-SSE-NEXT: por %xmm5, %xmm1
+; X32-SSE-NEXT: paddb %xmm3, %xmm3
+; X32-SSE-NEXT: pxor %xmm4, %xmm4
+; X32-SSE-NEXT: pcmpgtb %xmm3, %xmm4
+; X32-SSE-NEXT: movdqa %xmm4, %xmm3
+; X32-SSE-NEXT: pandn %xmm1, %xmm3
+; X32-SSE-NEXT: paddb %xmm1, %xmm1
+; X32-SSE-NEXT: pand %xmm4, %xmm1
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
+; X32-SSE-NEXT: psllw $5, %xmm4
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtb %xmm4, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm6
+; X32-SSE-NEXT: pandn %xmm0, %xmm6
+; X32-SSE-NEXT: psrlw $4, %xmm0
+; X32-SSE-NEXT: pand .LCPI7_4, %xmm0
+; X32-SSE-NEXT: pand %xmm5, %xmm0
+; X32-SSE-NEXT: por %xmm6, %xmm0
+; X32-SSE-NEXT: paddb %xmm4, %xmm4
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtb %xmm4, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm6
+; X32-SSE-NEXT: pandn %xmm0, %xmm6
+; X32-SSE-NEXT: psrlw $2, %xmm0
+; X32-SSE-NEXT: pand .LCPI7_5, %xmm0
+; X32-SSE-NEXT: pand %xmm5, %xmm0
+; X32-SSE-NEXT: por %xmm6, %xmm0
+; X32-SSE-NEXT: paddb %xmm4, %xmm4
+; X32-SSE-NEXT: pcmpgtb %xmm4, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psrlw $1, %xmm0
+; X32-SSE-NEXT: pand .LCPI7_6, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm1, %xmm0
+; X32-SSE-NEXT: retl
+ %shl = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>
+ %lshr = lshr <16 x i8> %a, <i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+ %or = or <16 x i8> %shl, %lshr
+ ret <16 x i8> %or
+}
+
+;
+; Uniform Constant Rotates
+;
+
+define <2 x i64> @splatconstant_rotate_v2i64(<2 x i64> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psllq $14, %xmm1
+; SSE-NEXT: psrlq $50, %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: splatconstant_rotate_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpsllq $14, %xmm0, %xmm1
+; AVX-NEXT: vpsrlq $50, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_rotate_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vprotq $14, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psllq $14, %xmm1
+; X32-SSE-NEXT: psrlq $50, %xmm0
+; X32-SSE-NEXT: por %xmm1, %xmm0
+; X32-SSE-NEXT: retl
+ %shl = shl <2 x i64> %a, <i64 14, i64 14>
+ %lshr = lshr <2 x i64> %a, <i64 50, i64 50>
+ %or = or <2 x i64> %shl, %lshr
+ ret <2 x i64> %or
+}
+
+define <4 x i32> @splatconstant_rotate_v4i32(<4 x i32> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: pslld $4, %xmm1
+; SSE-NEXT: psrld $28, %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: splatconstant_rotate_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpslld $4, %xmm0, %xmm1
+; AVX-NEXT: vpsrld $28, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_rotate_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vprotd $4, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: pslld $4, %xmm1
+; X32-SSE-NEXT: psrld $28, %xmm0
+; X32-SSE-NEXT: por %xmm1, %xmm0
+; X32-SSE-NEXT: retl
+ %shl = shl <4 x i32> %a, <i32 4, i32 4, i32 4, i32 4>
+ %lshr = lshr <4 x i32> %a, <i32 28, i32 28, i32 28, i32 28>
+ %or = or <4 x i32> %shl, %lshr
+ ret <4 x i32> %or
+}
+
+define <8 x i16> @splatconstant_rotate_v8i16(<8 x i16> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psllw $7, %xmm1
+; SSE-NEXT: psrlw $9, %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: splatconstant_rotate_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpsllw $7, %xmm0, %xmm1
+; AVX-NEXT: vpsrlw $9, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_rotate_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vprotw $7, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psllw $7, %xmm1
+; X32-SSE-NEXT: psrlw $9, %xmm0
+; X32-SSE-NEXT: por %xmm1, %xmm0
+; X32-SSE-NEXT: retl
+ %shl = shl <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ %lshr = lshr <8 x i16> %a, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+ %or = or <8 x i16> %shl, %lshr
+ ret <8 x i16> %or
+}
+
+define <16 x i8> @splatconstant_rotate_v16i8(<16 x i8> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psllw $4, %xmm1
+; SSE-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE-NEXT: psrlw $4, %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: splatconstant_rotate_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_rotate_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vprotb $4, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psllw $4, %xmm1
+; X32-SSE-NEXT: pand .LCPI11_0, %xmm1
+; X32-SSE-NEXT: psrlw $4, %xmm0
+; X32-SSE-NEXT: pand .LCPI11_1, %xmm0
+; X32-SSE-NEXT: por %xmm1, %xmm0
+; X32-SSE-NEXT: retl
+ %shl = shl <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+ %lshr = lshr <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+ %or = or <16 x i8> %shl, %lshr
+ ret <16 x i8> %or
+}
+
+;
+; Masked Uniform Constant Rotates
+;
+
+define <2 x i64> @splatconstant_rotate_mask_v2i64(<2 x i64> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_mask_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psllq $15, %xmm1
+; SSE-NEXT: psrlq $49, %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: splatconstant_rotate_mask_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpsllq $15, %xmm0, %xmm1
+; AVX-NEXT: vpsrlq $49, %xmm0, %xmm0
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_rotate_mask_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vprotq $15, %xmm0, %xmm0
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_mask_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psllq $15, %xmm1
+; X32-SSE-NEXT: psrlq $49, %xmm0
+; X32-SSE-NEXT: pand .LCPI12_0, %xmm0
+; X32-SSE-NEXT: pand .LCPI12_1, %xmm1
+; X32-SSE-NEXT: por %xmm0, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE-NEXT: retl
+ %shl = shl <2 x i64> %a, <i64 15, i64 15>
+ %lshr = lshr <2 x i64> %a, <i64 49, i64 49>
+ %rmask = and <2 x i64> %lshr, <i64 255, i64 127>
+ %lmask = and <2 x i64> %shl, <i64 65, i64 33>
+ %or = or <2 x i64> %lmask, %rmask
+ ret <2 x i64> %or
+}
+
+define <4 x i32> @splatconstant_rotate_mask_v4i32(<4 x i32> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_mask_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: pslld $4, %xmm1
+; SSE-NEXT: psrld $28, %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: splatconstant_rotate_mask_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpslld $4, %xmm0, %xmm1
+; AVX-NEXT: vpsrld $28, %xmm0, %xmm0
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_rotate_mask_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vprotd $4, %xmm0, %xmm0
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_mask_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: pslld $4, %xmm1
+; X32-SSE-NEXT: psrld $28, %xmm0
+; X32-SSE-NEXT: pand .LCPI13_0, %xmm0
+; X32-SSE-NEXT: pand .LCPI13_1, %xmm1
+; X32-SSE-NEXT: por %xmm0, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE-NEXT: retl
+ %shl = shl <4 x i32> %a, <i32 4, i32 4, i32 4, i32 4>
+ %lshr = lshr <4 x i32> %a, <i32 28, i32 28, i32 28, i32 28>
+ %rmask = and <4 x i32> %lshr, <i32 127, i32 255, i32 511, i32 1023>
+ %lmask = and <4 x i32> %shl, <i32 1023, i32 511, i32 255, i32 127>
+ %or = or <4 x i32> %lmask, %rmask
+ ret <4 x i32> %or
+}
+
+define <8 x i16> @splatconstant_rotate_mask_v8i16(<8 x i16> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_mask_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psllw $5, %xmm1
+; SSE-NEXT: psrlw $11, %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: splatconstant_rotate_mask_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpsllw $5, %xmm0, %xmm1
+; AVX-NEXT: vpsrlw $11, %xmm0, %xmm0
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_rotate_mask_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vprotw $5, %xmm0, %xmm0
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_mask_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psllw $5, %xmm1
+; X32-SSE-NEXT: psrlw $11, %xmm0
+; X32-SSE-NEXT: pand .LCPI14_0, %xmm0
+; X32-SSE-NEXT: pand .LCPI14_1, %xmm1
+; X32-SSE-NEXT: por %xmm0, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE-NEXT: retl
+ %shl = shl <8 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+ %lshr = lshr <8 x i16> %a, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+ %rmask = and <8 x i16> %lshr, <i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55>
+ %lmask = and <8 x i16> %shl, <i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33>
+ %or = or <8 x i16> %lmask, %rmask
+ ret <8 x i16> %or
+}
+
+define <16 x i8> @splatconstant_rotate_mask_v16i8(<16 x i8> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_mask_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: psllw $4, %xmm1
+; SSE-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE-NEXT: psrlw $4, %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: splatconstant_rotate_mask_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_rotate_mask_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vprotb $4, %xmm0, %xmm0
+; XOP-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_mask_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psllw $4, %xmm1
+; X32-SSE-NEXT: pand .LCPI15_0, %xmm1
+; X32-SSE-NEXT: psrlw $4, %xmm0
+; X32-SSE-NEXT: pand .LCPI15_1, %xmm0
+; X32-SSE-NEXT: pand .LCPI15_2, %xmm0
+; X32-SSE-NEXT: pand .LCPI15_3, %xmm1
+; X32-SSE-NEXT: por %xmm0, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE-NEXT: retl
+ %shl = shl <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+ %lshr = lshr <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+ %rmask = and <16 x i8> %lshr, <i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55>
+ %lmask = and <16 x i8> %shl, <i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33>
+ %or = or <16 x i8> %lmask, %rmask
+ ret <16 x i8> %or
+}
diff --git a/test/CodeGen/X86/vector-rotate-256.ll b/test/CodeGen/X86/vector-rotate-256.ll
new file mode 100644
index 000000000000..379b5fcb635f
--- /dev/null
+++ b/test/CodeGen/X86/vector-rotate-256.ll
@@ -0,0 +1,1089 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+
+;
+; Variable Rotates
+;
+
+define <4 x i64> @var_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
+; AVX1-LABEL: var_rotate_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [64,64]
+; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpsubq %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpsllq %xmm4, %xmm5, %xmm6
+; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
+; AVX1-NEXT: vpsllq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm6[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm6
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm6[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX1-NEXT: vpsrlq %xmm2, %xmm5, %xmm4
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT: vpsrlq %xmm2, %xmm5, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm4
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
+; AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: var_rotate_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpsubq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpsrlvq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_rotate_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT: vprotq %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vprotq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_rotate_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT: vprotq %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vprotq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+ %b64 = sub <4 x i64> <i64 64, i64 64, i64 64, i64 64>, %b
+ %shl = shl <4 x i64> %a, %b
+ %lshr = lshr <4 x i64> %a, %b64
+ %or = or <4 x i64> %shl, %lshr
+ ret <4 x i64> %or
+}
+
+define <8 x i32> @var_rotate_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
+; AVX1-LABEL: var_rotate_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32]
+; AVX1-NEXT: vpsubd %xmm1, %xmm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpsubd %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpslld $23, %xmm4, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1065353216,1065353216,1065353216,1065353216]
+; AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vcvttps2dq %xmm4, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpmulld %xmm6, %xmm4, %xmm4
+; AVX1-NEXT: vpslld $23, %xmm1, %xmm1
+; AVX1-NEXT: vpaddd %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vcvttps2dq %xmm1, %xmm1
+; AVX1-NEXT: vpmulld %xmm0, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm4 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpsrld %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm5
+; AVX1-NEXT: vpsrld %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm7 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
+; AVX1-NEXT: vpsrld %xmm7, %xmm6, %xmm7
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
+; AVX1-NEXT: vpsrld %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm7[4,5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7]
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm4 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpsrld %xmm4, %xmm0, %xmm4
+; AVX1-NEXT: vpsrlq $32, %xmm2, %xmm6
+; AVX1-NEXT: vpsrld %xmm6, %xmm0, %xmm6
+; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm6[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm5[2],xmm2[3],xmm5[3]
+; AVX1-NEXT: vpsrld %xmm5, %xmm0, %xmm5
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
+; AVX1-NEXT: vpsrld %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm5[4,5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3],xmm0[4,5],xmm4[6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: var_rotate_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpsubd %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_rotate_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT: vprotd %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vprotd %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_rotate_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT: vprotd %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vprotd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+ %b32 = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, %b
+ %shl = shl <8 x i32> %a, %b
+ %lshr = lshr <8 x i32> %a, %b32
+ %or = or <8 x i32> %shl, %lshr
+ ret <8 x i32> %or
+}
+
+define <16 x i16> @var_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
+; AVX1-LABEL: var_rotate_v16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpsubw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpsllw $12, %xmm4, %xmm5
+; AVX1-NEXT: vpsllw $4, %xmm4, %xmm4
+; AVX1-NEXT: vpor %xmm5, %xmm4, %xmm5
+; AVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpsllw $8, %xmm4, %xmm7
+; AVX1-NEXT: vpblendvb %xmm5, %xmm7, %xmm4, %xmm5
+; AVX1-NEXT: vpsllw $4, %xmm5, %xmm7
+; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm5, %xmm5
+; AVX1-NEXT: vpsllw $2, %xmm5, %xmm7
+; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm6
+; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm5, %xmm5
+; AVX1-NEXT: vpsllw $1, %xmm5, %xmm7
+; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm6
+; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm5, %xmm5
+; AVX1-NEXT: vpsllw $12, %xmm1, %xmm6
+; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpor %xmm6, %xmm1, %xmm1
+; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm6
+; AVX1-NEXT: vpsllw $8, %xmm0, %xmm7
+; AVX1-NEXT: vpblendvb %xmm1, %xmm7, %xmm0, %xmm1
+; AVX1-NEXT: vpsllw $4, %xmm1, %xmm7
+; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm1, %xmm1
+; AVX1-NEXT: vpsllw $2, %xmm1, %xmm7
+; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm6
+; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm1, %xmm1
+; AVX1-NEXT: vpsllw $1, %xmm1, %xmm7
+; AVX1-NEXT: vpaddw %xmm6, %xmm6, %xmm6
+; AVX1-NEXT: vpblendvb %xmm6, %xmm7, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm1, %ymm1
+; AVX1-NEXT: vpsllw $12, %xmm3, %xmm5
+; AVX1-NEXT: vpsllw $4, %xmm3, %xmm3
+; AVX1-NEXT: vpor %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpaddw %xmm3, %xmm3, %xmm5
+; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm6
+; AVX1-NEXT: vpblendvb %xmm3, %xmm6, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm3, %xmm4
+; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpsrlw $2, %xmm3, %xmm4
+; AVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5
+; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpsrlw $1, %xmm3, %xmm4
+; AVX1-NEXT: vpaddw %xmm5, %xmm5, %xmm5
+; AVX1-NEXT: vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpsllw $12, %xmm2, %xmm4
+; AVX1-NEXT: vpsllw $4, %xmm2, %xmm2
+; AVX1-NEXT: vpor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm4
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm5
+; AVX1-NEXT: vpblendvb %xmm2, %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm2
+; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm2
+; AVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: var_rotate_v16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT: vpsubw %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpxor %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm1[4],ymm3[4],ymm1[5],ymm3[5],ymm1[6],ymm3[6],ymm1[7],ymm3[7],ymm1[12],ymm3[12],ymm1[13],ymm3[13],ymm1[14],ymm3[14],ymm1[15],ymm3[15]
+; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX2-NEXT: vpsllvd %ymm4, %ymm5, %ymm4
+; AVX2-NEXT: vpsrld $16, %ymm4, %ymm4
+; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[8],ymm3[8],ymm1[9],ymm3[9],ymm1[10],ymm3[10],ymm1[11],ymm3[11]
+; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpsrld $16, %ymm1, %ymm1
+; AVX2-NEXT: vpackusdw %ymm4, %ymm1, %ymm1
+; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm3[4],ymm2[5],ymm3[5],ymm2[6],ymm3[6],ymm2[7],ymm3[7],ymm2[12],ymm3[12],ymm2[13],ymm3[13],ymm2[14],ymm3[14],ymm2[15],ymm3[15]
+; AVX2-NEXT: vpsrlvd %ymm4, %ymm5, %ymm4
+; AVX2-NEXT: vpsrld $16, %ymm4, %ymm4
+; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm3[0],ymm2[1],ymm3[1],ymm2[2],ymm3[2],ymm2[3],ymm3[3],ymm2[8],ymm3[8],ymm2[9],ymm3[9],ymm2[10],ymm3[10],ymm2[11],ymm3[11]
+; AVX2-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
+; AVX2-NEXT: vpackusdw %ymm4, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_rotate_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT: vprotw %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vprotw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_rotate_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT: vprotw %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vprotw %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+ %b16 = sub <16 x i16> <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>, %b
+ %shl = shl <16 x i16> %a, %b
+ %lshr = lshr <16 x i16> %a, %b16
+ %or = or <16 x i16> %shl, %lshr
+ ret <16 x i16> %or
+}
+
+define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
+; AVX1-LABEL: var_rotate_v32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX1-NEXT: vpsubb %xmm1, %xmm3, %xmm8
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpsubb %xmm4, %xmm3, %xmm9
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpsllw $4, %xmm5, %xmm6
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX1-NEXT: vpand %xmm7, %xmm6, %xmm6
+; AVX1-NEXT: vpsllw $5, %xmm4, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm6, %xmm5, %xmm6
+; AVX1-NEXT: vpsllw $2, %xmm6, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm6
+; AVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm6, %xmm2, %xmm2
+; AVX1-NEXT: vpsllw $4, %xmm0, %xmm4
+; AVX1-NEXT: vpand %xmm7, %xmm4, %xmm4
+; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm0, %xmm4
+; AVX1-NEXT: vpsllw $2, %xmm4, %xmm6
+; AVX1-NEXT: vpand %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpaddb %xmm3, %xmm3, %xmm4
+; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendvb %xmm1, %xmm4, %xmm3, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vpsrlw $4, %xmm5, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsllw $5, %xmm9, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm5, %xmm2
+; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm5
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm5
+; AVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm5
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX1-NEXT: vpand %xmm7, %xmm5, %xmm5
+; AVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm4
+; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsllw $5, %xmm8, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm3
+; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
+; AVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm3
+; AVX1-NEXT: vpand %xmm7, %xmm3, %xmm3
+; AVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: var_rotate_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX2-NEXT: vpsubb %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX2-NEXT: vpsllw $4, %ymm0, %ymm3
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm3
+; AVX2-NEXT: vpsllw $2, %ymm3, %ymm4
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
+; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpblendvb %ymm1, %ymm4, %ymm3, %ymm3
+; AVX2-NEXT: vpaddb %ymm3, %ymm3, %ymm4
+; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpblendvb %ymm1, %ymm4, %ymm3, %ymm1
+; AVX2-NEXT: vpsllw $5, %ymm2, %ymm2
+; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm3
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm4
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
+; AVX2-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm2
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm2
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT: vpaddb %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_rotate_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT: vprotb %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vprotb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_rotate_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT: vprotb %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vprotb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+ %b8 = sub <32 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>, %b
+ %shl = shl <32 x i8> %a, %b
+ %lshr = lshr <32 x i8> %a, %b8
+ %or = or <32 x i8> %shl, %lshr
+ ret <32 x i8> %or
+}
+
+;
+; Constant Rotates
+;
+
+define <4 x i64> @constant_rotate_v4i64(<4 x i64> %a) nounwind {
+; AVX1-LABEL: constant_rotate_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsllq $60, %xmm1, %xmm2
+; AVX1-NEXT: vpsllq $50, %xmm1, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vpsllq $14, %xmm0, %xmm3
+; AVX1-NEXT: vpsllq $4, %xmm0, %xmm4
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vpsrlq $2, %xmm1, %xmm3
+; AVX1-NEXT: vpsrlq $14, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT: vpsrlq $50, %xmm0, %xmm3
+; AVX1-NEXT: vpsrlq $60, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: constant_rotate_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_rotate_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm2, %xmm3
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm3, %xmm4
+; XOPAVX1-NEXT: vpshlq %xmm4, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm3, %xmm3
+; XOPAVX1-NEXT: vpshlq %xmm3, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_rotate_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm1
+; XOPAVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; XOPAVX2-NEXT: retq
+ %shl = shl <4 x i64> %a, <i64 4, i64 14, i64 50, i64 60>
+ %lshr = lshr <4 x i64> %a, <i64 60, i64 50, i64 14, i64 2>
+ %or = or <4 x i64> %shl, %lshr
+ ret <4 x i64> %or
+}
+
+define <8 x i32> @constant_rotate_v8i32(<8 x i32> %a) nounwind {
+; AVX1-LABEL: constant_rotate_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm2, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vpsrld $21, %xmm2, %xmm3
+; AVX1-NEXT: vpsrld $23, %xmm2, %xmm4
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT: vpsrld $22, %xmm2, %xmm4
+; AVX1-NEXT: vpsrld $24, %xmm2, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
+; AVX1-NEXT: vpsrld $25, %xmm0, %xmm3
+; AVX1-NEXT: vpsrld $27, %xmm0, %xmm4
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT: vpsrld $26, %xmm0, %xmm4
+; AVX1-NEXT: vpsrld $28, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: constant_rotate_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_rotate_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm2, %xmm3
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm2, %xmm2
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_rotate_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm1
+; XOPAVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; XOPAVX2-NEXT: retq
+ %shl = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+ %lshr = lshr <8 x i32> %a, <i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21>
+ %or = or <8 x i32> %shl, %lshr
+ ret <8 x i32> %or
+}
+
+define <16 x i16> @constant_rotate_v8i16(<16 x i16> %a) nounwind {
+; AVX1-LABEL: constant_rotate_v8i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm2, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [32896,28784,24672,20560,16448,12336,8224,4112]
+; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,57568,49344,41120,32896,24672,16448,8224]
+; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw $2, %xmm2, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [512,49600,33152,16704,256,49344,32896,16448]
+; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1024,33664,768,33408,512,33152,256,32896]
+; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,61680,57568,53456,49344,45232,41120,37008]
+; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [512,57824,49600,41376,33152,24928,16704,8480]
+; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1024,50112,33664,17216,768,49856,33408,16960]
+; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [2048,34688,1792,34432,1536,34176,1280,33920]
+; AVX1-NEXT: vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: constant_rotate_v8i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
+; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15]
+; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX2-NEXT: vpsrlvd %ymm4, %ymm5, %ymm4
+; AVX2-NEXT: vpsrld $16, %ymm4, %ymm4
+; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11]
+; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX2-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
+; AVX2-NEXT: vpackusdw %ymm4, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_rotate_v8i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm2, %xmm3
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT: vpsubw {{.*}}(%rip), %xmm3, %xmm4
+; XOPAVX1-NEXT: vpshlw %xmm4, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsubw {{.*}}(%rip), %xmm3, %xmm3
+; XOPAVX1-NEXT: vpshlw %xmm3, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_rotate_v8i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm1
+; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpsubw {{.*}}(%rip), %xmm2, %xmm3
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT: vpshlw %xmm3, %xmm4, %xmm3
+; XOPAVX2-NEXT: vpsubw {{.*}}(%rip), %xmm2, %xmm2
+; XOPAVX2-NEXT: vpshlw %xmm2, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; XOPAVX2-NEXT: retq
+ %shl = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+ %lshr = lshr <16 x i16> %a, <i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1>
+ %or = or <16 x i16> %shl, %lshr
+ ret <16 x i16> %or
+}
+
+define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind {
+; AVX1-LABEL: constant_rotate_v32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsllw $4, %xmm1, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX1-NEXT: vpand %xmm8, %xmm2, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; AVX1-NEXT: vpsllw $5, %xmm4, %xmm4
+; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpsllw $2, %xmm2, %xmm5
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm5
+; AVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm7
+; AVX1-NEXT: vpblendvb %xmm7, %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm5
+; AVX1-NEXT: vpaddb %xmm7, %xmm7, %xmm3
+; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpsllw $4, %xmm0, %xmm5
+; AVX1-NEXT: vpand %xmm8, %xmm5, %xmm5
+; AVX1-NEXT: vpblendvb %xmm4, %xmm5, %xmm0, %xmm4
+; AVX1-NEXT: vpsllw $2, %xmm4, %xmm5
+; AVX1-NEXT: vpand %xmm6, %xmm5, %xmm5
+; AVX1-NEXT: vpblendvb %xmm7, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpaddb %xmm4, %xmm4, %xmm5
+; AVX1-NEXT: vpblendvb %xmm3, %xmm5, %xmm4, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm9
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm8 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm8, %xmm3, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
+; AVX1-NEXT: vpsllw $5, %xmm5, %xmm5
+; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
+; AVX1-NEXT: vpaddb %xmm5, %xmm5, %xmm7
+; AVX1-NEXT: vpblendvb %xmm7, %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpaddb %xmm7, %xmm7, %xmm2
+; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3
+; AVX1-NEXT: vpand %xmm8, %xmm3, %xmm3
+; AVX1-NEXT: vpblendvb %xmm5, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm3
+; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
+; AVX1-NEXT: vpblendvb %xmm7, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm3
+; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %ymm0, %ymm9, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: constant_rotate_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; AVX2-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX2-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpsllw $2, %ymm2, %ymm3
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm3
+; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpblendvb %ymm1, %ymm3, %ymm2, %ymm1
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
+; AVX2-NEXT: vpsllw $5, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm3
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlw $2, %ymm0, %ymm3
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm3
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_rotate_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm3
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm1
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT: vpsubb {{.*}}(%rip), %xmm3, %xmm3
+; XOPAVX1-NEXT: vpshlb %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm3, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_rotate_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm2, %xmm3
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm1
+; XOPAVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT: vpsubb {{.*}}(%rip), %xmm3, %xmm3
+; XOPAVX2-NEXT: vpshlb %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpshlb %xmm3, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; XOPAVX2-NEXT: retq
+ %shl = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>
+ %lshr = lshr <32 x i8> %a, <i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+ %or = or <32 x i8> %shl, %lshr
+ ret <32 x i8> %or
+}
+
+;
+; Uniform Constant Rotates
+;
+
+define <4 x i64> @splatconstant_rotate_v4i64(<4 x i64> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsllq $14, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsllq $14, %xmm2, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vpsrlq $50, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlq $50, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatconstant_rotate_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllq $14, %ymm0, %ymm1
+; AVX2-NEXT: vpsrlq $50, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vprotq $14, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vprotq $14, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vprotq $14, %xmm0, %xmm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT: vprotq $14, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT: retq
+ %shl = shl <4 x i64> %a, <i64 14, i64 14, i64 14, i64 14>
+ %lshr = lshr <4 x i64> %a, <i64 50, i64 50, i64 50, i64 50>
+ %or = or <4 x i64> %shl, %lshr
+ ret <4 x i64> %or
+}
+
+define <8 x i32> @splatconstant_rotate_v8i32(<8 x i32> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpslld $4, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpslld $4, %xmm2, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vpsrld $28, %xmm0, %xmm0
+; AVX1-NEXT: vpsrld $28, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatconstant_rotate_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpslld $4, %ymm0, %ymm1
+; AVX2-NEXT: vpsrld $28, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vprotd $4, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vprotd $4, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT: retq
+ %shl = shl <8 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+ %lshr = lshr <8 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
+ %or = or <8 x i32> %shl, %lshr
+ ret <8 x i32> %or
+}
+
+define <16 x i16> @splatconstant_rotate_v16i16(<16 x i16> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_v16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsllw $7, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsllw $7, %xmm2, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vpsrlw $9, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $9, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatconstant_rotate_v16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllw $7, %ymm0, %ymm1
+; AVX2-NEXT: vpsrlw $9, %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vprotw $7, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vprotw $7, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vprotw $7, %xmm0, %xmm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT: vprotw $7, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT: retq
+ %shl = shl <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+ %lshr = lshr <16 x i16> %a, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+ %or = or <16 x i16> %shl, %lshr
+ ret <16 x i16> %or
+}
+
+define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_v32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsllw $4, %xmm1, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsllw $4, %xmm0, %xmm4
+; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatconstant_rotate_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT: retq
+ %shl = shl <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+ %lshr = lshr <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+ %or = or <32 x i8> %shl, %lshr
+ ret <32 x i8> %or
+}
+
+;
+; Masked Uniform Constant Rotates
+;
+
+define <4 x i64> @splatconstant_rotate_mask_v4i64(<4 x i64> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_mask_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsllq $15, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsllq $15, %xmm2, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vpsrlq $49, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlq $49, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatconstant_rotate_mask_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllq $15, %ymm0, %ymm1
+; AVX2-NEXT: vpsrlq $49, %ymm0, %ymm0
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_mask_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vprotq $15, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vprotq $15, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_mask_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vprotq $15, %xmm0, %xmm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT: vprotq $15, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+ %shl = shl <4 x i64> %a, <i64 15, i64 15, i64 15, i64 15>
+ %lshr = lshr <4 x i64> %a, <i64 49, i64 49, i64 49, i64 49>
+ %rmask = and <4 x i64> %lshr, <i64 255, i64 127, i64 127, i64 255>
+ %lmask = and <4 x i64> %shl, <i64 33, i64 65, i64 129, i64 257>
+ %or = or <4 x i64> %lmask, %rmask
+ ret <4 x i64> %or
+}
+
+define <8 x i32> @splatconstant_rotate_mask_v8i32(<8 x i32> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_mask_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpslld $4, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpslld $4, %xmm2, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vpsrld $28, %xmm0, %xmm0
+; AVX1-NEXT: vpsrld $28, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatconstant_rotate_mask_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpslld $4, %ymm0, %ymm1
+; AVX2-NEXT: vpsrld $28, %ymm0, %ymm0
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_mask_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vprotd $4, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vprotd $4, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_mask_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT: vprotd $4, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+ %shl = shl <8 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+ %lshr = lshr <8 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
+ %rmask = and <8 x i32> %lshr, <i32 3, i32 7, i32 15, i32 31, i32 63, i32 127, i32 255, i32 511>
+ %lmask = and <8 x i32> %shl, <i32 511, i32 255, i32 127, i32 63, i32 31, i32 15, i32 7, i32 3>
+ %or = or <8 x i32> %lmask, %rmask
+ ret <8 x i32> %or
+}
+
+define <16 x i16> @splatconstant_rotate_mask_v16i16(<16 x i16> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_mask_v16i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsllw $5, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsllw $5, %xmm2, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vpsrlw $11, %xmm0, %xmm0
+; AVX1-NEXT: vpsrlw $11, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatconstant_rotate_mask_v16i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllw $5, %ymm0, %ymm1
+; AVX2-NEXT: vpsrlw $11, %ymm0, %ymm0
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_mask_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vprotw $5, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vprotw $5, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_mask_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vprotw $5, %xmm0, %xmm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT: vprotw $5, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+ %shl = shl <16 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+ %lshr = lshr <16 x i16> %a, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+ %rmask = and <16 x i16> %lshr, <i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55>
+ %lmask = and <16 x i16> %shl, <i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33>
+ %or = or <16 x i16> %lmask, %rmask
+ ret <16 x i16> %or
+}
+
+define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_mask_v32i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsllw $4, %xmm1, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsllw $4, %xmm0, %xmm4
+; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vandps {{.*}}(%rip), %ymm2, %ymm1
+; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splatconstant_rotate_mask_v32i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_mask_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vprotb $4, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_mask_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT: vprotb $4, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+ %shl = shl <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+ %lshr = lshr <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+ %rmask = and <32 x i8> %lshr, <i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55>
+ %lmask = and <32 x i8> %shl, <i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33>
+ %or = or <32 x i8> %lmask, %rmask
+ ret <32 x i8> %or
+}
diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll
index 8e79493ddd07..b63c3f084b22 100644
--- a/test/CodeGen/X86/vector-sext.ll
+++ b/test/CodeGen/X86/vector-sext.ll
@@ -1,19 +1,348 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
;
; Just one 32-bit run to make sure we do reasonable things there.
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=i686 -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41
+
+define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_16i8_to_8i16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psraw $8, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: sext_16i8_to_8i16:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: psraw $8, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: sext_16i8_to_8i16:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: sext_16i8_to_8i16:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; X32-SSE41-LABEL: sext_16i8_to_8i16:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %C = sext <8 x i8> %B to <8 x i16>
+ ret <8 x i16> %C
+}
+
+define <16 x i16> @sext_16i8_to_16i16(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_16i8_to_16i16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: psraw $8, %xmm2
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; SSE2-NEXT: psraw $8, %xmm1
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: sext_16i8_to_16i16:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT: psraw $8, %xmm2
+; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; SSSE3-NEXT: psraw $8, %xmm1
+; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: sext_16i8_to_16i16:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxbw %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE41-NEXT: pmovsxbw %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: sext_16i8_to_16i16:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: sext_16i8_to_16i16:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: sext_16i8_to_16i16:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm2
+; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm1
+; X32-SSE41-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %B = sext <16 x i8> %A to <16 x i16>
+ ret <16 x i16> %B
+}
+
+define <4 x i32> @sext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_16i8_to_4i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: psrad $24, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: sext_16i8_to_4i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: psrad $24, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: sext_16i8_to_4i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: sext_16i8_to_4i32:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; X32-SSE41-LABEL: sext_16i8_to_4i32:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %C = sext <4 x i8> %B to <4 x i32>
+ ret <4 x i32> %C
+}
+
+define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_16i8_to_8i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: psrad $24, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: psrad $24, %xmm1
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: sext_16i8_to_8i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: psrad $24, %xmm2
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: psrad $24, %xmm1
+; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: sext_16i8_to_8i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxbd %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT: pmovsxbd %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: sext_16i8_to_8i32:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: sext_16i8_to_8i32:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX2-NEXT: vpslld $24, %ymm0, %ymm0
+; AVX2-NEXT: vpsrad $24, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: sext_16i8_to_8i32:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm2
+; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm1
+; X32-SSE41-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %C = sext <8 x i8> %B to <8 x i32>
+ ret <8 x i32> %C
+}
+
+define <2 x i64> @sext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_16i8_to_2i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: psrad $24, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: sext_16i8_to_2i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: psrad $24, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: sext_16i8_to_2i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxbq %xmm0, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: sext_16i8_to_2i64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovsxbq %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; X32-SSE41-LABEL: sext_16i8_to_2i64:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+ %C = sext <2 x i8> %B to <2 x i64>
+ ret <2 x i64> %C
+}
+
+define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_16i8_to_4i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: movdqa %xmm2, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: psrad $24, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: psrld $16, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: psrad $24, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: sext_16i8_to_4i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: movdqa %xmm2, %xmm1
+; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: psrad $24, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: psrld $16, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: psrad $24, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: sext_16i8_to_4i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxbq %xmm0, %xmm2
+; SSE41-NEXT: psrld $16, %xmm0
+; SSE41-NEXT: pmovsxbq %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: sext_16i8_to_4i64:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: sext_16i8_to_4i64:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: vpslld $24, %xmm0, %xmm0
+; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: sext_16i8_to_4i64:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm2
+; X32-SSE41-NEXT: psrld $16, %xmm0
+; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm1
+; X32-SSE41-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %C = sext <4 x i8> %B to <4 x i64>
+ ret <4 x i64> %C
+}
+
+define <4 x i32> @sext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_8i16_to_4i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: sext_8i16_to_4i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: psrad $16, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: sext_8i16_to_4i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: sext_8i16_to_4i32:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; X32-SSE41-LABEL: sext_8i16_to_4i32:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %C = sext <4 x i16> %B to <4 x i32>
+ ret <4 x i32> %C
+}
define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: sext_8i16_to_8i32:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
; SSE2-NEXT: psrad $16, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: psrad $16, %xmm1
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq
@@ -22,8 +351,7 @@ define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
; SSSE3-NEXT: psrad $16, %xmm2
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSSE3-NEXT: psrad $16, %xmm1
; SSSE3-NEXT: movdqa %xmm2, %xmm0
; SSSE3-NEXT: retq
@@ -58,7 +386,151 @@ define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
; X32-SSE41-NEXT: retl
entry:
%B = sext <8 x i16> %A to <8 x i32>
- ret <8 x i32>%B
+ ret <8 x i32> %B
+}
+
+define <2 x i64> @sext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_8i16_to_2i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: sext_8i16_to_2i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: psrad $16, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: sext_8i16_to_2i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxwq %xmm0, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: sext_8i16_to_2i64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovsxwq %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; X32-SSE41-LABEL: sext_8i16_to_2i64:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+ %C = sext <2 x i16> %B to <2 x i64>
+ ret <2 x i64> %C
+}
+
+define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_8i16_to_4i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT: movdqa %xmm2, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: psrad $16, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: psrad $16, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: sext_8i16_to_4i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT: movdqa %xmm2, %xmm1
+; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: psrad $16, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: psrad $16, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: sext_8i16_to_4i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxwq %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT: pmovsxwq %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: sext_8i16_to_4i64:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: sext_8i16_to_4i64:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2-NEXT: vpslld $16, %xmm0, %xmm0
+; AVX2-NEXT: vpsrad $16, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: sext_8i16_to_4i64:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm2
+; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm1
+; X32-SSE41-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %C = sext <4 x i16> %B to <4 x i64>
+ ret <4 x i64> %C
+}
+
+define <2 x i64> @sext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_4i32_to_2i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: sext_4i32_to_2i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: sext_4i32_to_2i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxdq %xmm0, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: sext_4i32_to_2i64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; X32-SSE41-LABEL: sext_4i32_to_2i64:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %C = sext <2 x i32> %B to <2 x i64>
+ ret <2 x i64> %C
}
define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
@@ -114,87 +586,217 @@ define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp
; X32-SSE41-NEXT: retl
entry:
%B = sext <4 x i32> %A to <4 x i64>
- ret <4 x i64>%B
+ ret <4 x i64> %B
}
-define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp {
-; SSE2-LABEL: sext_2i8_to_i32:
+define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
+; SSE-LABEL: load_sext_2i1_to_2i64:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: movzbl (%rdi), %eax
+; SSE-NEXT: movq %rax, %rcx
+; SSE-NEXT: shlq $62, %rcx
+; SSE-NEXT: sarq $63, %rcx
+; SSE-NEXT: movd %rcx, %xmm1
+; SSE-NEXT: shlq $63, %rax
+; SSE-NEXT: sarq $63, %rax
+; SSE-NEXT: movd %rax, %xmm0
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: load_sext_2i1_to_2i64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: movzbl (%rdi), %eax
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $62, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: shlq $63, %rax
+; AVX-NEXT: sarq $63, %rax
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movzbl (%eax), %eax
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $31, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: movd %ecx, %xmm0
+; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0
+; X32-SSE41-NEXT: shll $30, %eax
+; X32-SSE41-NEXT: sarl $31, %eax
+; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0
+; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <2 x i1>, <2 x i1>* %ptr
+ %Y = sext <2 x i1> %X to <2 x i64>
+ ret <2 x i64> %Y
+}
+
+define <2 x i64> @load_sext_2i8_to_2i64(<2 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_2i8_to_2i64:
; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movzwl (%rdi), %eax
+; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: psraw $8, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: psrad $24, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: sext_2i8_to_i32:
+; SSSE3-LABEL: load_sext_2i8_to_2i64:
; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movzwl (%rdi), %eax
+; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: psraw $8, %xmm0
-; SSSE3-NEXT: movd %xmm0, %eax
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: psrad $24, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: sext_2i8_to_i32:
+; SSE41-LABEL: load_sext_2i8_to_2i64:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
-; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: pmovsxbq (%rdi), %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: sext_2i8_to_i32:
+; AVX-LABEL: load_sext_2i8_to_2i64:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vpmovsxbq (%rdi), %xmm0
; AVX-NEXT: retq
;
-; X32-SSE41-LABEL: sext_2i8_to_i32:
+; X32-SSE41-LABEL: load_sext_2i8_to_2i64:
; X32-SSE41: # BB#0: # %entry
-; X32-SSE41: pmovsxbw %xmm0, %xmm0
-; X32-SSE41-NEXT: movd %xmm0, %eax
-; X32-SSE41-NEXT: popl %edx
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0
; X32-SSE41-NEXT: retl
entry:
- %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
- %Ex = sext <2 x i8> %Shuf to <2 x i16>
- %Bc = bitcast <2 x i16> %Ex to i32
- ret i32 %Bc
+ %X = load <2 x i8>, <2 x i8>* %ptr
+ %Y = sext <2 x i8> %X to <2 x i64>
+ ret <2 x i64> %Y
}
-define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
-; SSE2-LABEL: load_sext_test1:
+define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
+; SSE2-LABEL: load_sext_4i1_to_4i32:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: movzbl (%rdi), %eax
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: shlq $60, %rcx
+; SSE2-NEXT: sarq $63, %rcx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: shlq $62, %rcx
+; SSE2-NEXT: sarq $63, %rcx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: shlq $61, %rcx
+; SSE2-NEXT: sarq $63, %rcx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: shlq $63, %rax
+; SSE2-NEXT: sarq $63, %rax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: load_sext_test1:
+; SSSE3-LABEL: load_sext_4i1_to_4i32:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: psrad $16, %xmm0
+; SSSE3-NEXT: movzbl (%rdi), %eax
+; SSSE3-NEXT: movq %rax, %rcx
+; SSSE3-NEXT: shlq $60, %rcx
+; SSSE3-NEXT: sarq $63, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movq %rax, %rcx
+; SSSE3-NEXT: shlq $62, %rcx
+; SSSE3-NEXT: sarq $63, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: movq %rax, %rcx
+; SSSE3-NEXT: shlq $61, %rcx
+; SSSE3-NEXT: sarq $63, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: shlq $63, %rax
+; SSSE3-NEXT: sarq $63, %rax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: load_sext_test1:
+; SSE41-LABEL: load_sext_4i1_to_4i32:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovsxwd (%rdi), %xmm0
+; SSE41-NEXT: movzbl (%rdi), %eax
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $62, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: movq %rax, %rdx
+; SSE41-NEXT: shlq $63, %rdx
+; SSE41-NEXT: sarq $63, %rdx
+; SSE41-NEXT: movd %edx, %xmm0
+; SSE41-NEXT: pinsrd $1, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $61, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrd $2, %ecx, %xmm0
+; SSE41-NEXT: shlq $60, %rax
+; SSE41-NEXT: sarq $63, %rax
+; SSE41-NEXT: pinsrd $3, %eax, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: load_sext_test1:
+; AVX-LABEL: load_sext_4i1_to_4i32:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vpmovsxwd (%rdi), %xmm0
+; AVX-NEXT: movzbl (%rdi), %eax
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $62, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: movq %rax, %rdx
+; AVX-NEXT: shlq $63, %rdx
+; AVX-NEXT: sarq $63, %rdx
+; AVX-NEXT: vmovd %edx, %xmm0
+; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $61, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX-NEXT: shlq $60, %rax
+; AVX-NEXT: sarq $63, %rax
+; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
; AVX-NEXT: retq
;
-; X32-SSE41-LABEL: load_sext_test1:
+; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
; X32-SSE41: # BB#0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT: pmovsxwd (%eax), %xmm0
+; X32-SSE41-NEXT: movl (%eax), %eax
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $30, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: movl %eax, %edx
+; X32-SSE41-NEXT: shll $31, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: movd %edx, %xmm0
+; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $29, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm0
+; X32-SSE41-NEXT: shll $28, %eax
+; X32-SSE41-NEXT: sarl $31, %eax
+; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0
; X32-SSE41-NEXT: retl
entry:
- %X = load <4 x i16>, <4 x i16>* %ptr
- %Y = sext <4 x i16> %X to <4 x i32>
- ret <4 x i32>%Y
+ %X = load <4 x i1>, <4 x i1>* %ptr
+ %Y = sext <4 x i1> %X to <4 x i32>
+ ret <4 x i32> %Y
}
-define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
-; SSE2-LABEL: load_sext_test2:
+define <4 x i32> @load_sext_4i8_to_4i32(<4 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_4i8_to_4i32:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -202,7 +804,7 @@ define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
; SSE2-NEXT: psrad $24, %xmm0
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: load_sext_test2:
+; SSSE3-LABEL: load_sext_4i8_to_4i32:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -210,17 +812,17 @@ define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
; SSSE3-NEXT: psrad $24, %xmm0
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: load_sext_test2:
+; SSE41-LABEL: load_sext_4i8_to_4i32:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: pmovsxbd (%rdi), %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: load_sext_test2:
+; AVX-LABEL: load_sext_4i8_to_4i32:
; AVX: # BB#0: # %entry
; AVX-NEXT: vpmovsxbd (%rdi), %xmm0
; AVX-NEXT: retq
;
-; X32-SSE41-LABEL: load_sext_test2:
+; X32-SSE41-LABEL: load_sext_4i8_to_4i32:
; X32-SSE41: # BB#0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE41-NEXT: pmovsxbd (%eax), %xmm0
@@ -228,57 +830,2662 @@ define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
entry:
%X = load <4 x i8>, <4 x i8>* %ptr
%Y = sext <4 x i8> %X to <4 x i32>
- ret <4 x i32>%Y
+ ret <4 x i32> %Y
}
-define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
-; SSE2-LABEL: load_sext_test3:
+define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
+; SSE2-LABEL: load_sext_4i1_to_4i64:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movzwl (%rdi), %eax
+; SSE2-NEXT: movzbl (%rdi), %eax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $3, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: shrl $2, %eax
+; SSE2-NEXT: andl $1, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
+; SSE2-NEXT: psllq $63, %xmm0
+; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3]
+; SSE2-NEXT: psllq $63, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_4i1_to_4i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movzbl (%rdi), %eax
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $3, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: shrl $2, %eax
+; SSSE3-NEXT: andl $1, %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
+; SSSE3-NEXT: psllq $63, %xmm0
+; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3]
+; SSSE3-NEXT: psllq $63, %xmm1
+; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_4i1_to_4i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movzbl (%rdi), %eax
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: movl %eax, %edx
+; SSE41-NEXT: andl $1, %edx
+; SSE41-NEXT: movd %edx, %xmm1
+; SSE41-NEXT: pinsrd $1, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $2, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrd $2, %ecx, %xmm1
+; SSE41-NEXT: shrl $3, %eax
+; SSE41-NEXT: andl $1, %eax
+; SSE41-NEXT: pinsrd $3, %eax, %xmm1
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
+; SSE41-NEXT: psllq $63, %xmm0
+; SSE41-NEXT: psrad $31, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; SSE41-NEXT: psllq $63, %xmm1
+; SSE41-NEXT: psrad $31, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_sext_4i1_to_4i64:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: movzbl (%rdi), %eax
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shlq $62, %rcx
+; AVX1-NEXT: sarq $63, %rcx
+; AVX1-NEXT: movq %rax, %rdx
+; AVX1-NEXT: shlq $63, %rdx
+; AVX1-NEXT: sarq $63, %rdx
+; AVX1-NEXT: vmovd %edx, %xmm0
+; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shlq $61, %rcx
+; AVX1-NEXT: sarq $63, %rcx
+; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: shlq $60, %rax
+; AVX1-NEXT: sarq $63, %rax
+; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_sext_4i1_to_4i64:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: movzbl (%rdi), %eax
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shlq $60, %rcx
+; AVX2-NEXT: sarq $63, %rcx
+; AVX2-NEXT: vmovq %rcx, %xmm0
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shlq $61, %rcx
+; AVX2-NEXT: sarq $63, %rcx
+; AVX2-NEXT: vmovq %rcx, %xmm1
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shlq $62, %rcx
+; AVX2-NEXT: sarq $63, %rcx
+; AVX2-NEXT: vmovq %rcx, %xmm1
+; AVX2-NEXT: shlq $63, %rax
+; AVX2-NEXT: sarq $63, %rax
+; AVX2-NEXT: vmovq %rax, %xmm2
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_4i1_to_4i64:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movzbl (%eax), %eax
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: movl %eax, %edx
+; X32-SSE41-NEXT: andl $1, %edx
+; X32-SSE41-NEXT: movd %edx, %xmm1
+; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $2, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm1
+; X32-SSE41-NEXT: shrl $3, %eax
+; X32-SSE41-NEXT: andl $1, %eax
+; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm1
+; X32-SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
+; X32-SSE41-NEXT: psllq $63, %xmm0
+; X32-SSE41-NEXT: psrad $31, %xmm0
+; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; X32-SSE41-NEXT: psllq $63, %xmm1
+; X32-SSE41-NEXT: psrad $31, %xmm1
+; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <4 x i1>, <4 x i1>* %ptr
+ %Y = sext <4 x i1> %X to <4 x i64>
+ ret <4 x i64> %Y
+}
+
+define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_4i8_to_4i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movsbq 1(%rdi), %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: movsbq (%rdi), %rax
+; SSE2-NEXT: movd %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movsbq 3(%rdi), %rax
+; SSE2-NEXT: movd %rax, %xmm2
+; SSE2-NEXT: movsbq 2(%rdi), %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_4i8_to_4i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movsbq 1(%rdi), %rax
+; SSSE3-NEXT: movd %rax, %xmm1
+; SSSE3-NEXT: movsbq (%rdi), %rax
+; SSSE3-NEXT: movd %rax, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: movsbq 3(%rdi), %rax
+; SSSE3-NEXT: movd %rax, %xmm2
+; SSSE3-NEXT: movsbq 2(%rdi), %rax
+; SSSE3-NEXT: movd %rax, %xmm1
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_4i8_to_4i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxbq (%rdi), %xmm0
+; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_sext_4i8_to_4i64:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0
+; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_sext_4i8_to_4i64:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_4i8_to_4i64:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0
+; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <4 x i8>, <4 x i8>* %ptr
+ %Y = sext <4 x i8> %X to <4 x i64>
+ ret <4 x i64> %Y
+}
+
+define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
+; SSE2-LABEL: load_sext_8i1_to_8i16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movsbq (%rdi), %rax
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: shrq $7, %rcx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: shlq $60, %rcx
+; SSE2-NEXT: sarq $63, %rcx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: shlq $58, %rcx
+; SSE2-NEXT: sarq $63, %rcx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: shlq $62, %rcx
+; SSE2-NEXT: sarq $63, %rcx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: shlq $57, %rcx
+; SSE2-NEXT: sarq $63, %rcx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: shlq $61, %rcx
+; SSE2-NEXT: sarq $63, %rcx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: shlq $59, %rcx
+; SSE2-NEXT: sarq $63, %rcx
+; SSE2-NEXT: movd %ecx, %xmm3
+; SSE2-NEXT: shlq $63, %rax
+; SSE2-NEXT: sarq $63, %rax
; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_8i1_to_8i16:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movsbq (%rdi), %rax
+; SSSE3-NEXT: movq %rax, %rcx
+; SSSE3-NEXT: shrq $7, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movq %rax, %rcx
+; SSSE3-NEXT: shlq $60, %rcx
+; SSSE3-NEXT: sarq $63, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT: movq %rax, %rcx
+; SSSE3-NEXT: shlq $58, %rcx
+; SSSE3-NEXT: sarq $63, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movq %rax, %rcx
+; SSSE3-NEXT: shlq $62, %rcx
+; SSSE3-NEXT: sarq $63, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: movq %rax, %rcx
+; SSSE3-NEXT: shlq $57, %rcx
+; SSSE3-NEXT: sarq $63, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movq %rax, %rcx
+; SSSE3-NEXT: shlq $61, %rcx
+; SSSE3-NEXT: sarq $63, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT: movq %rax, %rcx
+; SSSE3-NEXT: shlq $59, %rcx
+; SSSE3-NEXT: sarq $63, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm3
+; SSSE3-NEXT: shlq $63, %rax
+; SSSE3-NEXT: sarq $63, %rax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_8i1_to_8i16:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movsbq (%rdi), %rax
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $62, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: movq %rax, %rdx
+; SSE41-NEXT: shlq $63, %rdx
+; SSE41-NEXT: sarq $63, %rdx
+; SSE41-NEXT: movd %edx, %xmm0
+; SSE41-NEXT: pinsrw $1, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $61, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrw $2, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $60, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrw $3, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $59, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrw $4, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $58, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrw $5, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $57, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrw $6, %ecx, %xmm0
+; SSE41-NEXT: shrq $7, %rax
+; SSE41-NEXT: pinsrw $7, %eax, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: load_sext_8i1_to_8i16:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: movsbq (%rdi), %rax
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $62, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: movq %rax, %rdx
+; AVX-NEXT: shlq $63, %rdx
+; AVX-NEXT: sarq $63, %rdx
+; AVX-NEXT: vmovd %edx, %xmm0
+; AVX-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $61, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $60, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $59, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $58, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $57, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; AVX-NEXT: shrq $7, %rax
+; AVX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; AVX-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_8i1_to_8i16:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movsbl (%eax), %eax
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $30, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: movl %eax, %edx
+; X32-SSE41-NEXT: shll $31, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: movd %edx, %xmm0
+; X32-SSE41-NEXT: pinsrw $1, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $29, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrw $2, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $28, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrw $3, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $27, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrw $4, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $26, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrw $5, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $25, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrw $6, %ecx, %xmm0
+; X32-SSE41-NEXT: shrl $7, %eax
+; X32-SSE41-NEXT: pinsrw $7, %eax, %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <8 x i1>, <8 x i1>* %ptr
+ %Y = sext <8 x i1> %X to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <8 x i16> @load_sext_8i8_to_8i16(<8 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_8i8_to_8i16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psraw $8, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_8i8_to_8i16:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: psraw $8, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_8i8_to_8i16:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxbw (%rdi), %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: load_sext_8i8_to_8i16:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovsxbw (%rdi), %xmm0
+; AVX-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_8i8_to_8i16:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <8 x i8>, <8 x i8>* %ptr
+ %Y = sext <8 x i8> %X to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
+; SSE2-LABEL: load_sext_8i1_to_8i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movzbl (%rdi), %eax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $6, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $2, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $4, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $5, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $3, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: shrl $7, %eax
+; SSE2-NEXT: movzwl %ax, %eax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pslld $31, %xmm0
+; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: pslld $31, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_8i1_to_8i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movzbl (%rdi), %eax
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $6, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $2, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $4, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $5, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $3, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: shrl $7, %eax
+; SSSE3-NEXT: movzwl %ax, %eax
+; SSSE3-NEXT: movd %eax, %xmm3
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: pslld $31, %xmm0
+; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: pslld $31, %xmm1
+; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_8i1_to_8i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movzbl (%rdi), %eax
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: movl %eax, %edx
+; SSE41-NEXT: andl $1, %edx
+; SSE41-NEXT: movd %edx, %xmm1
+; SSE41-NEXT: pinsrw $1, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $2, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrw $2, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $3, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrw $3, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $4, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrw $4, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $5, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrw $5, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $6, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrw $6, %ecx, %xmm1
+; SSE41-NEXT: shrl $7, %eax
+; SSE41-NEXT: movzwl %ax, %eax
+; SSE41-NEXT: pinsrw $7, %eax, %xmm1
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SSE41-NEXT: pslld $31, %xmm0
+; SSE41-NEXT: psrad $31, %xmm0
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE41-NEXT: pslld $31, %xmm1
+; SSE41-NEXT: psrad $31, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_sext_8i1_to_8i32:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: movsbq (%rdi), %rax
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shlq $58, %rcx
+; AVX1-NEXT: sarq $63, %rcx
+; AVX1-NEXT: movq %rax, %rdx
+; AVX1-NEXT: shlq $59, %rdx
+; AVX1-NEXT: sarq $63, %rdx
+; AVX1-NEXT: vmovd %edx, %xmm0
+; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shlq $57, %rcx
+; AVX1-NEXT: sarq $63, %rcx
+; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq $7, %rcx
+; AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shlq $62, %rcx
+; AVX1-NEXT: sarq $63, %rcx
+; AVX1-NEXT: movq %rax, %rdx
+; AVX1-NEXT: shlq $63, %rdx
+; AVX1-NEXT: sarq $63, %rdx
+; AVX1-NEXT: vmovd %edx, %xmm1
+; AVX1-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shlq $61, %rcx
+; AVX1-NEXT: sarq $63, %rcx
+; AVX1-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
+; AVX1-NEXT: shlq $60, %rax
+; AVX1-NEXT: sarq $63, %rax
+; AVX1-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_sext_8i1_to_8i32:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: movsbq (%rdi), %rax
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shlq $58, %rcx
+; AVX2-NEXT: sarq $63, %rcx
+; AVX2-NEXT: movq %rax, %rdx
+; AVX2-NEXT: shlq $59, %rdx
+; AVX2-NEXT: sarq $63, %rdx
+; AVX2-NEXT: vmovd %edx, %xmm0
+; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shlq $57, %rcx
+; AVX2-NEXT: sarq $63, %rcx
+; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shrq $7, %rcx
+; AVX2-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shlq $62, %rcx
+; AVX2-NEXT: sarq $63, %rcx
+; AVX2-NEXT: movq %rax, %rdx
+; AVX2-NEXT: shlq $63, %rdx
+; AVX2-NEXT: sarq $63, %rdx
+; AVX2-NEXT: vmovd %edx, %xmm1
+; AVX2-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shlq $61, %rcx
+; AVX2-NEXT: sarq $63, %rcx
+; AVX2-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
+; AVX2-NEXT: shlq $60, %rax
+; AVX2-NEXT: sarq $63, %rax
+; AVX2-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
+; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_8i1_to_8i32:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movzbl (%eax), %eax
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: movl %eax, %edx
+; X32-SSE41-NEXT: andl $1, %edx
+; X32-SSE41-NEXT: movd %edx, %xmm1
+; X32-SSE41-NEXT: pinsrw $1, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $2, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrw $2, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $3, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrw $3, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $4, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrw $4, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $5, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrw $5, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $6, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrw $6, %ecx, %xmm1
+; X32-SSE41-NEXT: shrl $7, %eax
+; X32-SSE41-NEXT: pinsrw $7, %eax, %xmm1
+; X32-SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; X32-SSE41-NEXT: pslld $31, %xmm0
+; X32-SSE41-NEXT: psrad $31, %xmm0
+; X32-SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE41-NEXT: pslld $31, %xmm1
+; X32-SSE41-NEXT: psrad $31, %xmm1
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <8 x i1>, <8 x i1>* %ptr
+ %Y = sext <8 x i1> %X to <8 x i32>
+ ret <8 x i32> %Y
+}
+
+define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_8i8_to_8i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: psrad $24, %xmm0
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: psrad $24, %xmm1
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: load_sext_test3:
+; SSSE3-LABEL: load_sext_8i8_to_8i32:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movzwl (%rdi), %eax
-; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
; SSSE3-NEXT: psrad $24, %xmm0
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: psrad $24, %xmm1
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: load_sext_test3:
+; SSE41-LABEL: load_sext_8i8_to_8i32:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovsxbq (%rdi), %xmm0
+; SSE41-NEXT: pmovsxbd (%rdi), %xmm0
+; SSE41-NEXT: pmovsxbd 4(%rdi), %xmm1
; SSE41-NEXT: retq
;
-; AVX-LABEL: load_sext_test3:
+; AVX1-LABEL: load_sext_8i8_to_8i32:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0
+; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_sext_8i8_to_8i32:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_8i8_to_8i32:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: pmovsxbd (%eax), %xmm0
+; X32-SSE41-NEXT: pmovsxbd 4(%eax), %xmm1
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <8 x i8>, <8 x i8>* %ptr
+ %Y = sext <8 x i8> %X to <8 x i32>
+ ret <8 x i32> %Y
+}
+
+define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
+; SSE2-LABEL: load_sext_16i1_to_16i8:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pushq %rbp
+; SSE2-NEXT: pushq %r15
+; SSE2-NEXT: pushq %r14
+; SSE2-NEXT: pushq %r13
+; SSE2-NEXT: pushq %r12
+; SSE2-NEXT: pushq %rbx
+; SSE2-NEXT: movswq (%rdi), %rax
+; SSE2-NEXT: movq %rax, %r8
+; SSE2-NEXT: movq %rax, %r9
+; SSE2-NEXT: movq %rax, %r10
+; SSE2-NEXT: movq %rax, %r11
+; SSE2-NEXT: movq %rax, %r14
+; SSE2-NEXT: movq %rax, %r15
+; SSE2-NEXT: movq %rax, %r12
+; SSE2-NEXT: movq %rax, %r13
+; SSE2-NEXT: movq %rax, %rbx
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: movq %rax, %rdx
+; SSE2-NEXT: movq %rax, %rsi
+; SSE2-NEXT: movq %rax, %rdi
+; SSE2-NEXT: movq %rax, %rbp
+; SSE2-NEXT: shlq $49, %rbp
+; SSE2-NEXT: sarq $63, %rbp
+; SSE2-NEXT: movd %ebp, %xmm0
+; SSE2-NEXT: movq %rax, %rbp
+; SSE2-NEXT: movsbq %al, %rax
+; SSE2-NEXT: shlq $57, %r8
+; SSE2-NEXT: sarq $63, %r8
+; SSE2-NEXT: movd %r8d, %xmm1
+; SSE2-NEXT: shlq $53, %r9
+; SSE2-NEXT: sarq $63, %r9
+; SSE2-NEXT: movd %r9d, %xmm2
+; SSE2-NEXT: shlq $61, %r10
+; SSE2-NEXT: sarq $63, %r10
+; SSE2-NEXT: movd %r10d, %xmm3
+; SSE2-NEXT: shlq $51, %r11
+; SSE2-NEXT: sarq $63, %r11
+; SSE2-NEXT: movd %r11d, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: shlq $59, %r14
+; SSE2-NEXT: sarq $63, %r14
+; SSE2-NEXT: movd %r14d, %xmm5
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: shlq $55, %r15
+; SSE2-NEXT: sarq $63, %r15
+; SSE2-NEXT: movd %r15d, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSE2-NEXT: shlq $63, %r12
+; SSE2-NEXT: sarq $63, %r12
+; SSE2-NEXT: movd %r12d, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSE2-NEXT: shlq $50, %r13
+; SSE2-NEXT: sarq $63, %r13
+; SSE2-NEXT: movd %r13d, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: shlq $58, %rbx
+; SSE2-NEXT: sarq $63, %rbx
+; SSE2-NEXT: movd %ebx, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSE2-NEXT: shlq $54, %rcx
+; SSE2-NEXT: sarq $63, %rcx
+; SSE2-NEXT: movd %ecx, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT: shlq $62, %rdx
+; SSE2-NEXT: sarq $63, %rdx
+; SSE2-NEXT: movd %edx, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: shlq $52, %rsi
+; SSE2-NEXT: sarq $63, %rsi
+; SSE2-NEXT: movd %esi, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSE2-NEXT: shlq $60, %rdi
+; SSE2-NEXT: sarq $63, %rdi
+; SSE2-NEXT: movd %edi, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
+; SSE2-NEXT: shrq $15, %rbp
+; SSE2-NEXT: movd %ebp, %xmm1
+; SSE2-NEXT: shrq $7, %rax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT: popq %rbx
+; SSE2-NEXT: popq %r12
+; SSE2-NEXT: popq %r13
+; SSE2-NEXT: popq %r14
+; SSE2-NEXT: popq %r15
+; SSE2-NEXT: popq %rbp
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_16i1_to_16i8:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pushq %rbp
+; SSSE3-NEXT: pushq %r15
+; SSSE3-NEXT: pushq %r14
+; SSSE3-NEXT: pushq %r13
+; SSSE3-NEXT: pushq %r12
+; SSSE3-NEXT: pushq %rbx
+; SSSE3-NEXT: movswq (%rdi), %rax
+; SSSE3-NEXT: movq %rax, %r8
+; SSSE3-NEXT: movq %rax, %r9
+; SSSE3-NEXT: movq %rax, %r10
+; SSSE3-NEXT: movq %rax, %r11
+; SSSE3-NEXT: movq %rax, %r14
+; SSSE3-NEXT: movq %rax, %r15
+; SSSE3-NEXT: movq %rax, %r12
+; SSSE3-NEXT: movq %rax, %r13
+; SSSE3-NEXT: movq %rax, %rbx
+; SSSE3-NEXT: movq %rax, %rcx
+; SSSE3-NEXT: movq %rax, %rdx
+; SSSE3-NEXT: movq %rax, %rsi
+; SSSE3-NEXT: movq %rax, %rdi
+; SSSE3-NEXT: movq %rax, %rbp
+; SSSE3-NEXT: shlq $49, %rbp
+; SSSE3-NEXT: sarq $63, %rbp
+; SSSE3-NEXT: movd %ebp, %xmm0
+; SSSE3-NEXT: movq %rax, %rbp
+; SSSE3-NEXT: movsbq %al, %rax
+; SSSE3-NEXT: shlq $57, %r8
+; SSSE3-NEXT: sarq $63, %r8
+; SSSE3-NEXT: movd %r8d, %xmm1
+; SSSE3-NEXT: shlq $53, %r9
+; SSSE3-NEXT: sarq $63, %r9
+; SSSE3-NEXT: movd %r9d, %xmm2
+; SSSE3-NEXT: shlq $61, %r10
+; SSSE3-NEXT: sarq $63, %r10
+; SSSE3-NEXT: movd %r10d, %xmm3
+; SSSE3-NEXT: shlq $51, %r11
+; SSSE3-NEXT: sarq $63, %r11
+; SSSE3-NEXT: movd %r11d, %xmm4
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: shlq $59, %r14
+; SSSE3-NEXT: sarq $63, %r14
+; SSSE3-NEXT: movd %r14d, %xmm5
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT: shlq $55, %r15
+; SSSE3-NEXT: sarq $63, %r15
+; SSSE3-NEXT: movd %r15d, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSSE3-NEXT: shlq $63, %r12
+; SSSE3-NEXT: sarq $63, %r12
+; SSSE3-NEXT: movd %r12d, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSSE3-NEXT: shlq $50, %r13
+; SSSE3-NEXT: sarq $63, %r13
+; SSSE3-NEXT: movd %r13d, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: shlq $58, %rbx
+; SSSE3-NEXT: sarq $63, %rbx
+; SSSE3-NEXT: movd %ebx, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSSE3-NEXT: shlq $54, %rcx
+; SSSE3-NEXT: sarq $63, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm4
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT: shlq $62, %rdx
+; SSSE3-NEXT: sarq $63, %rdx
+; SSSE3-NEXT: movd %edx, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT: shlq $52, %rsi
+; SSSE3-NEXT: sarq $63, %rsi
+; SSSE3-NEXT: movd %esi, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSSE3-NEXT: shlq $60, %rdi
+; SSSE3-NEXT: sarq $63, %rdi
+; SSSE3-NEXT: movd %edi, %xmm4
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
+; SSSE3-NEXT: shrq $15, %rbp
+; SSSE3-NEXT: movd %ebp, %xmm1
+; SSSE3-NEXT: shrq $7, %rax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT: popq %rbx
+; SSSE3-NEXT: popq %r12
+; SSSE3-NEXT: popq %r13
+; SSSE3-NEXT: popq %r14
+; SSSE3-NEXT: popq %r15
+; SSSE3-NEXT: popq %rbp
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_16i1_to_16i8:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movswq (%rdi), %rax
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $62, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: movq %rax, %rdx
+; SSE41-NEXT: shlq $63, %rdx
+; SSE41-NEXT: sarq $63, %rdx
+; SSE41-NEXT: movd %edx, %xmm0
+; SSE41-NEXT: pinsrb $1, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $61, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $2, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $60, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $3, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $59, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $4, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $58, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $5, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $57, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $6, %ecx, %xmm0
+; SSE41-NEXT: movsbq %al, %rcx
+; SSE41-NEXT: shrq $7, %rcx
+; SSE41-NEXT: pinsrb $7, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $55, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $8, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $54, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $9, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $53, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $10, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $52, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $11, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $51, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $12, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $50, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $13, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $49, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $14, %ecx, %xmm0
+; SSE41-NEXT: shrq $15, %rax
+; SSE41-NEXT: pinsrb $15, %eax, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: load_sext_16i1_to_16i8:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vpmovsxbq (%rdi), %xmm0
+; AVX-NEXT: movswq (%rdi), %rax
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $62, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: movq %rax, %rdx
+; AVX-NEXT: shlq $63, %rdx
+; AVX-NEXT: sarq $63, %rdx
+; AVX-NEXT: vmovd %edx, %xmm0
+; AVX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $61, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $60, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $59, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $58, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $57, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movsbq %al, %rcx
+; AVX-NEXT: shrq $7, %rcx
+; AVX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $55, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $54, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $53, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $52, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $51, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $50, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
+; AVX-NEXT: movq %rax, %rcx
+; AVX-NEXT: shlq $49, %rcx
+; AVX-NEXT: sarq $63, %rcx
+; AVX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; AVX-NEXT: shrq $15, %rax
+; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; AVX-NEXT: retq
;
-; X32-SSE41-LABEL: load_sext_test3:
+; X32-SSE41-LABEL: load_sext_16i1_to_16i8:
; X32-SSE41: # BB#0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0
+; X32-SSE41-NEXT: movswl (%eax), %eax
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $30, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: movl %eax, %edx
+; X32-SSE41-NEXT: shll $31, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: movd %edx, %xmm0
+; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $29, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $28, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $27, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $26, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $25, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm0
+; X32-SSE41-NEXT: movsbl %al, %ecx
+; X32-SSE41-NEXT: shrl $7, %ecx
+; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $23, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $22, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $21, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $20, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $19, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $18, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm0
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $17, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm0
+; X32-SSE41-NEXT: shrl $15, %eax
+; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm0
; X32-SSE41-NEXT: retl
entry:
- %X = load <2 x i8>, <2 x i8>* %ptr
- %Y = sext <2 x i8> %X to <2 x i64>
- ret <2 x i64>%Y
+ %X = load <16 x i1>, <16 x i1>* %ptr
+ %Y = sext <16 x i1> %X to <16 x i8>
+ ret <16 x i8> %Y
}
-define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
-; SSE2-LABEL: load_sext_test4:
+define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
+; SSE2-LABEL: load_sext_16i1_to_16i16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movzwl (%rdi), %eax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $14, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $6, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $10, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $2, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $12, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $4, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $13, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $5, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $9, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm3
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $11, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $3, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $7, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: shrl $15, %eax
+; SSE2-NEXT: movzwl %ax, %eax
+; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psllw $15, %xmm0
+; SSE2-NEXT: psraw $15, %xmm0
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; SSE2-NEXT: psllw $15, %xmm1
+; SSE2-NEXT: psraw $15, %xmm1
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_16i1_to_16i16:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movzwl (%rdi), %eax
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $14, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $6, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $10, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $2, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $12, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $4, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $8, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $13, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $5, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $9, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm3
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $11, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $3, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT: movl %eax, %ecx
+; SSSE3-NEXT: shrl $7, %ecx
+; SSSE3-NEXT: andl $1, %ecx
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: shrl $15, %eax
+; SSSE3-NEXT: movzwl %ax, %eax
+; SSSE3-NEXT: movd %eax, %xmm4
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: psllw $15, %xmm0
+; SSSE3-NEXT: psraw $15, %xmm0
+; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; SSSE3-NEXT: psllw $15, %xmm1
+; SSSE3-NEXT: psraw $15, %xmm1
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_16i1_to_16i16:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movzwl (%rdi), %eax
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: movl %eax, %edx
+; SSE41-NEXT: andl $1, %edx
+; SSE41-NEXT: movd %edx, %xmm1
+; SSE41-NEXT: pinsrb $1, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $2, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrb $2, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $3, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrb $3, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $4, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrb $4, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $5, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrb $5, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $6, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrb $6, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $7, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrb $7, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $8, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrb $8, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $9, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrb $9, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $10, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrb $10, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $11, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrb $11, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $12, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrb $12, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $13, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrb $13, %ecx, %xmm1
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: shrl $14, %ecx
+; SSE41-NEXT: andl $1, %ecx
+; SSE41-NEXT: pinsrb $14, %ecx, %xmm1
+; SSE41-NEXT: shrl $15, %eax
+; SSE41-NEXT: movzwl %ax, %eax
+; SSE41-NEXT: pinsrb $15, %eax, %xmm1
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; SSE41-NEXT: psllw $15, %xmm0
+; SSE41-NEXT: psraw $15, %xmm0
+; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE41-NEXT: psllw $15, %xmm1
+; SSE41-NEXT: psraw $15, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_sext_16i1_to_16i16:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: pushq %rbp
+; AVX1-NEXT: .Ltmp0:
+; AVX1-NEXT: .cfi_def_cfa_offset 16
+; AVX1-NEXT: pushq %r15
+; AVX1-NEXT: .Ltmp1:
+; AVX1-NEXT: .cfi_def_cfa_offset 24
+; AVX1-NEXT: pushq %r14
+; AVX1-NEXT: .Ltmp2:
+; AVX1-NEXT: .cfi_def_cfa_offset 32
+; AVX1-NEXT: pushq %r13
+; AVX1-NEXT: .Ltmp3:
+; AVX1-NEXT: .cfi_def_cfa_offset 40
+; AVX1-NEXT: pushq %r12
+; AVX1-NEXT: .Ltmp4:
+; AVX1-NEXT: .cfi_def_cfa_offset 48
+; AVX1-NEXT: pushq %rbx
+; AVX1-NEXT: .Ltmp5:
+; AVX1-NEXT: .cfi_def_cfa_offset 56
+; AVX1-NEXT: .Ltmp6:
+; AVX1-NEXT: .cfi_offset %rbx, -56
+; AVX1-NEXT: .Ltmp7:
+; AVX1-NEXT: .cfi_offset %r12, -48
+; AVX1-NEXT: .Ltmp8:
+; AVX1-NEXT: .cfi_offset %r13, -40
+; AVX1-NEXT: .Ltmp9:
+; AVX1-NEXT: .cfi_offset %r14, -32
+; AVX1-NEXT: .Ltmp10:
+; AVX1-NEXT: .cfi_offset %r15, -24
+; AVX1-NEXT: .Ltmp11:
+; AVX1-NEXT: .cfi_offset %rbp, -16
+; AVX1-NEXT: movswq (%rdi), %rax
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shlq $55, %rcx
+; AVX1-NEXT: sarq $63, %rcx
+; AVX1-NEXT: vmovd %ecx, %xmm0
+; AVX1-NEXT: movq %rax, %r8
+; AVX1-NEXT: movq %rax, %r10
+; AVX1-NEXT: movq %rax, %r11
+; AVX1-NEXT: movq %rax, %r14
+; AVX1-NEXT: movq %rax, %r15
+; AVX1-NEXT: movq %rax, %r9
+; AVX1-NEXT: movq %rax, %r12
+; AVX1-NEXT: movq %rax, %r13
+; AVX1-NEXT: movq %rax, %rbx
+; AVX1-NEXT: movq %rax, %rdi
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: movq %rax, %rdx
+; AVX1-NEXT: movq %rax, %rsi
+; AVX1-NEXT: movsbq %al, %rbp
+; AVX1-NEXT: shlq $54, %rax
+; AVX1-NEXT: sarq $63, %rax
+; AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX1-NEXT: shlq $53, %r8
+; AVX1-NEXT: sarq $63, %r8
+; AVX1-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0
+; AVX1-NEXT: shlq $52, %r10
+; AVX1-NEXT: sarq $63, %r10
+; AVX1-NEXT: vpinsrw $3, %r10d, %xmm0, %xmm0
+; AVX1-NEXT: shlq $51, %r11
+; AVX1-NEXT: sarq $63, %r11
+; AVX1-NEXT: vpinsrw $4, %r11d, %xmm0, %xmm0
+; AVX1-NEXT: shlq $50, %r14
+; AVX1-NEXT: sarq $63, %r14
+; AVX1-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0
+; AVX1-NEXT: shlq $49, %r15
+; AVX1-NEXT: sarq $63, %r15
+; AVX1-NEXT: vpinsrw $6, %r15d, %xmm0, %xmm0
+; AVX1-NEXT: shrq $15, %r9
+; AVX1-NEXT: vpinsrw $7, %r9d, %xmm0, %xmm0
+; AVX1-NEXT: shlq $63, %r13
+; AVX1-NEXT: sarq $63, %r13
+; AVX1-NEXT: vmovd %r13d, %xmm1
+; AVX1-NEXT: shlq $62, %r12
+; AVX1-NEXT: sarq $63, %r12
+; AVX1-NEXT: vpinsrw $1, %r12d, %xmm1, %xmm1
+; AVX1-NEXT: shlq $61, %rbx
+; AVX1-NEXT: sarq $63, %rbx
+; AVX1-NEXT: vpinsrw $2, %ebx, %xmm1, %xmm1
+; AVX1-NEXT: shlq $60, %rdi
+; AVX1-NEXT: sarq $63, %rdi
+; AVX1-NEXT: vpinsrw $3, %edi, %xmm1, %xmm1
+; AVX1-NEXT: shlq $59, %rcx
+; AVX1-NEXT: sarq $63, %rcx
+; AVX1-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
+; AVX1-NEXT: shlq $58, %rdx
+; AVX1-NEXT: sarq $63, %rdx
+; AVX1-NEXT: vpinsrw $5, %edx, %xmm1, %xmm1
+; AVX1-NEXT: shlq $57, %rsi
+; AVX1-NEXT: sarq $63, %rsi
+; AVX1-NEXT: vpinsrw $6, %esi, %xmm1, %xmm1
+; AVX1-NEXT: shrq $7, %rbp
+; AVX1-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: popq %rbx
+; AVX1-NEXT: popq %r12
+; AVX1-NEXT: popq %r13
+; AVX1-NEXT: popq %r14
+; AVX1-NEXT: popq %r15
+; AVX1-NEXT: popq %rbp
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_sext_16i1_to_16i16:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: pushq %rbp
+; AVX2-NEXT: .Ltmp0:
+; AVX2-NEXT: .cfi_def_cfa_offset 16
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: .Ltmp1:
+; AVX2-NEXT: .cfi_def_cfa_offset 24
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: .Ltmp2:
+; AVX2-NEXT: .cfi_def_cfa_offset 32
+; AVX2-NEXT: pushq %r13
+; AVX2-NEXT: .Ltmp3:
+; AVX2-NEXT: .cfi_def_cfa_offset 40
+; AVX2-NEXT: pushq %r12
+; AVX2-NEXT: .Ltmp4:
+; AVX2-NEXT: .cfi_def_cfa_offset 48
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: .Ltmp5:
+; AVX2-NEXT: .cfi_def_cfa_offset 56
+; AVX2-NEXT: .Ltmp6:
+; AVX2-NEXT: .cfi_offset %rbx, -56
+; AVX2-NEXT: .Ltmp7:
+; AVX2-NEXT: .cfi_offset %r12, -48
+; AVX2-NEXT: .Ltmp8:
+; AVX2-NEXT: .cfi_offset %r13, -40
+; AVX2-NEXT: .Ltmp9:
+; AVX2-NEXT: .cfi_offset %r14, -32
+; AVX2-NEXT: .Ltmp10:
+; AVX2-NEXT: .cfi_offset %r15, -24
+; AVX2-NEXT: .Ltmp11:
+; AVX2-NEXT: .cfi_offset %rbp, -16
+; AVX2-NEXT: movswq (%rdi), %rax
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shlq $55, %rcx
+; AVX2-NEXT: sarq $63, %rcx
+; AVX2-NEXT: vmovd %ecx, %xmm0
+; AVX2-NEXT: movq %rax, %r8
+; AVX2-NEXT: movq %rax, %r10
+; AVX2-NEXT: movq %rax, %r11
+; AVX2-NEXT: movq %rax, %r14
+; AVX2-NEXT: movq %rax, %r15
+; AVX2-NEXT: movq %rax, %r9
+; AVX2-NEXT: movq %rax, %r12
+; AVX2-NEXT: movq %rax, %r13
+; AVX2-NEXT: movq %rax, %rbx
+; AVX2-NEXT: movq %rax, %rdi
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: movq %rax, %rdx
+; AVX2-NEXT: movq %rax, %rsi
+; AVX2-NEXT: movsbq %al, %rbp
+; AVX2-NEXT: shlq $54, %rax
+; AVX2-NEXT: sarq $63, %rax
+; AVX2-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX2-NEXT: shlq $53, %r8
+; AVX2-NEXT: sarq $63, %r8
+; AVX2-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0
+; AVX2-NEXT: shlq $52, %r10
+; AVX2-NEXT: sarq $63, %r10
+; AVX2-NEXT: vpinsrw $3, %r10d, %xmm0, %xmm0
+; AVX2-NEXT: shlq $51, %r11
+; AVX2-NEXT: sarq $63, %r11
+; AVX2-NEXT: vpinsrw $4, %r11d, %xmm0, %xmm0
+; AVX2-NEXT: shlq $50, %r14
+; AVX2-NEXT: sarq $63, %r14
+; AVX2-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0
+; AVX2-NEXT: shlq $49, %r15
+; AVX2-NEXT: sarq $63, %r15
+; AVX2-NEXT: vpinsrw $6, %r15d, %xmm0, %xmm0
+; AVX2-NEXT: shrq $15, %r9
+; AVX2-NEXT: vpinsrw $7, %r9d, %xmm0, %xmm0
+; AVX2-NEXT: shlq $63, %r13
+; AVX2-NEXT: sarq $63, %r13
+; AVX2-NEXT: vmovd %r13d, %xmm1
+; AVX2-NEXT: shlq $62, %r12
+; AVX2-NEXT: sarq $63, %r12
+; AVX2-NEXT: vpinsrw $1, %r12d, %xmm1, %xmm1
+; AVX2-NEXT: shlq $61, %rbx
+; AVX2-NEXT: sarq $63, %rbx
+; AVX2-NEXT: vpinsrw $2, %ebx, %xmm1, %xmm1
+; AVX2-NEXT: shlq $60, %rdi
+; AVX2-NEXT: sarq $63, %rdi
+; AVX2-NEXT: vpinsrw $3, %edi, %xmm1, %xmm1
+; AVX2-NEXT: shlq $59, %rcx
+; AVX2-NEXT: sarq $63, %rcx
+; AVX2-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
+; AVX2-NEXT: shlq $58, %rdx
+; AVX2-NEXT: sarq $63, %rdx
+; AVX2-NEXT: vpinsrw $5, %edx, %xmm1, %xmm1
+; AVX2-NEXT: shlq $57, %rsi
+; AVX2-NEXT: sarq $63, %rsi
+; AVX2-NEXT: vpinsrw $6, %esi, %xmm1, %xmm1
+; AVX2-NEXT: shrq $7, %rbp
+; AVX2-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1
+; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r12
+; AVX2-NEXT: popq %r13
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_16i1_to_16i16:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movzwl (%eax), %eax
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: movl %eax, %edx
+; X32-SSE41-NEXT: andl $1, %edx
+; X32-SSE41-NEXT: movd %edx, %xmm1
+; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $2, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $3, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $4, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $5, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $6, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $7, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $8, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $9, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $10, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $11, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $12, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $13, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shrl $14, %ecx
+; X32-SSE41-NEXT: andl $1, %ecx
+; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm1
+; X32-SSE41-NEXT: shrl $15, %eax
+; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm1
+; X32-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; X32-SSE41-NEXT: psllw $15, %xmm0
+; X32-SSE41-NEXT: psraw $15, %xmm0
+; X32-SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE41-NEXT: psllw $15, %xmm1
+; X32-SSE41-NEXT: psraw $15, %xmm1
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <16 x i1>, <16 x i1>* %ptr
+ %Y = sext <16 x i1> %X to <16 x i16>
+ ret <16 x i16> %Y
+}
+
+define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
+; SSE2-LABEL: load_sext_32i1_to_32i8:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pushq %rbp
+; SSE2-NEXT: pushq %r15
+; SSE2-NEXT: pushq %r14
+; SSE2-NEXT: pushq %r13
+; SSE2-NEXT: pushq %r12
+; SSE2-NEXT: pushq %rbx
+; SSE2-NEXT: movswq (%rdi), %rbx
+; SSE2-NEXT: movq %rbx, %r10
+; SSE2-NEXT: movq %rbx, %r8
+; SSE2-NEXT: movq %rbx, %r9
+; SSE2-NEXT: movq %rbx, %r11
+; SSE2-NEXT: movq %rbx, %r14
+; SSE2-NEXT: movq %rbx, %r15
+; SSE2-NEXT: movq %rbx, %r12
+; SSE2-NEXT: movq %rbx, %r13
+; SSE2-NEXT: movq %rbx, %rdx
+; SSE2-NEXT: movq %rbx, %rsi
+; SSE2-NEXT: movq %rbx, %rcx
+; SSE2-NEXT: movq %rbx, %rbp
+; SSE2-NEXT: movq %rbx, %rax
+; SSE2-NEXT: shlq $49, %rax
+; SSE2-NEXT: sarq $63, %rax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: movq %rbx, %rax
+; SSE2-NEXT: shlq $57, %r10
+; SSE2-NEXT: sarq $63, %r10
+; SSE2-NEXT: movd %r10d, %xmm15
+; SSE2-NEXT: movq %rbx, %r10
+; SSE2-NEXT: movsbq %bl, %rbx
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
+; SSE2-NEXT: shlq $53, %r8
+; SSE2-NEXT: sarq $63, %r8
+; SSE2-NEXT: movd %r8d, %xmm8
+; SSE2-NEXT: shlq $61, %r9
+; SSE2-NEXT: sarq $63, %r9
+; SSE2-NEXT: movd %r9d, %xmm2
+; SSE2-NEXT: shlq $51, %r11
+; SSE2-NEXT: sarq $63, %r11
+; SSE2-NEXT: movd %r11d, %xmm9
+; SSE2-NEXT: shlq $59, %r14
+; SSE2-NEXT: sarq $63, %r14
+; SSE2-NEXT: movd %r14d, %xmm5
+; SSE2-NEXT: shlq $55, %r15
+; SSE2-NEXT: sarq $63, %r15
+; SSE2-NEXT: movd %r15d, %xmm10
+; SSE2-NEXT: shlq $63, %r12
+; SSE2-NEXT: sarq $63, %r12
+; SSE2-NEXT: movd %r12d, %xmm0
+; SSE2-NEXT: shlq $50, %r13
+; SSE2-NEXT: sarq $63, %r13
+; SSE2-NEXT: movd %r13d, %xmm11
+; SSE2-NEXT: shlq $58, %rdx
+; SSE2-NEXT: sarq $63, %rdx
+; SSE2-NEXT: movd %edx, %xmm4
+; SSE2-NEXT: shlq $54, %rsi
+; SSE2-NEXT: sarq $63, %rsi
+; SSE2-NEXT: movd %esi, %xmm12
+; SSE2-NEXT: shlq $62, %rcx
+; SSE2-NEXT: sarq $63, %rcx
+; SSE2-NEXT: movd %ecx, %xmm6
+; SSE2-NEXT: shlq $52, %rbp
+; SSE2-NEXT: sarq $63, %rbp
+; SSE2-NEXT: movd %ebp, %xmm13
+; SSE2-NEXT: shlq $60, %rax
+; SSE2-NEXT: sarq $63, %rax
+; SSE2-NEXT: movd %eax, %xmm7
+; SSE2-NEXT: shrq $15, %r10
+; SSE2-NEXT: movd %r10d, %xmm14
+; SSE2-NEXT: shrq $7, %rbx
+; SSE2-NEXT: movd %ebx, %xmm3
+; SSE2-NEXT: movswq 2(%rdi), %rdx
+; SSE2-NEXT: movq %rdx, %r8
+; SSE2-NEXT: movq %rdx, %r9
+; SSE2-NEXT: movq %rdx, %r10
+; SSE2-NEXT: movq %rdx, %r11
+; SSE2-NEXT: movq %rdx, %r14
+; SSE2-NEXT: movq %rdx, %r15
+; SSE2-NEXT: movq %rdx, %r12
+; SSE2-NEXT: movq %rdx, %r13
+; SSE2-NEXT: movq %rdx, %rbx
+; SSE2-NEXT: movq %rdx, %rax
+; SSE2-NEXT: movq %rdx, %rcx
+; SSE2-NEXT: movq %rdx, %rsi
+; SSE2-NEXT: movq %rdx, %rdi
+; SSE2-NEXT: movq %rdx, %rbp
+; SSE2-NEXT: shlq $49, %rbp
+; SSE2-NEXT: sarq $63, %rbp
+; SSE2-NEXT: movd %ebp, %xmm1
+; SSE2-NEXT: movq %rdx, %rbp
+; SSE2-NEXT: movsbq %dl, %rdx
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7]
+; SSE2-NEXT: shlq $57, %r8
+; SSE2-NEXT: sarq $63, %r8
+; SSE2-NEXT: movd %r8d, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7]
+; SSE2-NEXT: shlq $53, %r9
+; SSE2-NEXT: sarq $63, %r9
+; SSE2-NEXT: movd %r9d, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
+; SSE2-NEXT: shlq $61, %r10
+; SSE2-NEXT: sarq $63, %r10
+; SSE2-NEXT: movd %r10d, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSE2-NEXT: shlq $51, %r11
+; SSE2-NEXT: sarq $63, %r11
+; SSE2-NEXT: movd %r11d, %xmm5
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: shlq $59, %r14
+; SSE2-NEXT: sarq $63, %r14
+; SSE2-NEXT: movd %r14d, %xmm6
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE2-NEXT: shlq $55, %r15
+; SSE2-NEXT: sarq $63, %r15
+; SSE2-NEXT: movd %r15d, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSE2-NEXT: shlq $63, %r12
+; SSE2-NEXT: sarq $63, %r12
+; SSE2-NEXT: movd %r12d, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; SSE2-NEXT: shlq $50, %r13
+; SSE2-NEXT: sarq $63, %r13
+; SSE2-NEXT: movd %r13d, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE2-NEXT: shlq $58, %rbx
+; SSE2-NEXT: sarq $63, %rbx
+; SSE2-NEXT: movd %ebx, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; SSE2-NEXT: shlq $54, %rax
+; SSE2-NEXT: sarq $63, %rax
+; SSE2-NEXT: movd %eax, %xmm5
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSE2-NEXT: shlq $62, %rcx
+; SSE2-NEXT: sarq $63, %rcx
+; SSE2-NEXT: movd %ecx, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: shlq $52, %rsi
+; SSE2-NEXT: sarq $63, %rsi
+; SSE2-NEXT: movd %esi, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE2-NEXT: shlq $60, %rdi
+; SSE2-NEXT: sarq $63, %rdi
+; SSE2-NEXT: movd %edi, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT: shrq $15, %rbp
+; SSE2-NEXT: movd %ebp, %xmm2
+; SSE2-NEXT: shrq $7, %rdx
+; SSE2-NEXT: movd %edx, %xmm5
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSE2-NEXT: popq %rbx
+; SSE2-NEXT: popq %r12
+; SSE2-NEXT: popq %r13
+; SSE2-NEXT: popq %r14
+; SSE2-NEXT: popq %r15
+; SSE2-NEXT: popq %rbp
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_32i1_to_32i8:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pushq %rbp
+; SSSE3-NEXT: pushq %r15
+; SSSE3-NEXT: pushq %r14
+; SSSE3-NEXT: pushq %r13
+; SSSE3-NEXT: pushq %r12
+; SSSE3-NEXT: pushq %rbx
+; SSSE3-NEXT: movswq (%rdi), %rbx
+; SSSE3-NEXT: movq %rbx, %r10
+; SSSE3-NEXT: movq %rbx, %r8
+; SSSE3-NEXT: movq %rbx, %r9
+; SSSE3-NEXT: movq %rbx, %r11
+; SSSE3-NEXT: movq %rbx, %r14
+; SSSE3-NEXT: movq %rbx, %r15
+; SSSE3-NEXT: movq %rbx, %r12
+; SSSE3-NEXT: movq %rbx, %r13
+; SSSE3-NEXT: movq %rbx, %rdx
+; SSSE3-NEXT: movq %rbx, %rsi
+; SSSE3-NEXT: movq %rbx, %rcx
+; SSSE3-NEXT: movq %rbx, %rbp
+; SSSE3-NEXT: movq %rbx, %rax
+; SSSE3-NEXT: shlq $49, %rax
+; SSSE3-NEXT: sarq $63, %rax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: movq %rbx, %rax
+; SSSE3-NEXT: shlq $57, %r10
+; SSSE3-NEXT: sarq $63, %r10
+; SSSE3-NEXT: movd %r10d, %xmm15
+; SSSE3-NEXT: movq %rbx, %r10
+; SSSE3-NEXT: movsbq %bl, %rbx
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
+; SSSE3-NEXT: shlq $53, %r8
+; SSSE3-NEXT: sarq $63, %r8
+; SSSE3-NEXT: movd %r8d, %xmm8
+; SSSE3-NEXT: shlq $61, %r9
+; SSSE3-NEXT: sarq $63, %r9
+; SSSE3-NEXT: movd %r9d, %xmm2
+; SSSE3-NEXT: shlq $51, %r11
+; SSSE3-NEXT: sarq $63, %r11
+; SSSE3-NEXT: movd %r11d, %xmm9
+; SSSE3-NEXT: shlq $59, %r14
+; SSSE3-NEXT: sarq $63, %r14
+; SSSE3-NEXT: movd %r14d, %xmm5
+; SSSE3-NEXT: shlq $55, %r15
+; SSSE3-NEXT: sarq $63, %r15
+; SSSE3-NEXT: movd %r15d, %xmm10
+; SSSE3-NEXT: shlq $63, %r12
+; SSSE3-NEXT: sarq $63, %r12
+; SSSE3-NEXT: movd %r12d, %xmm0
+; SSSE3-NEXT: shlq $50, %r13
+; SSSE3-NEXT: sarq $63, %r13
+; SSSE3-NEXT: movd %r13d, %xmm11
+; SSSE3-NEXT: shlq $58, %rdx
+; SSSE3-NEXT: sarq $63, %rdx
+; SSSE3-NEXT: movd %edx, %xmm4
+; SSSE3-NEXT: shlq $54, %rsi
+; SSSE3-NEXT: sarq $63, %rsi
+; SSSE3-NEXT: movd %esi, %xmm12
+; SSSE3-NEXT: shlq $62, %rcx
+; SSSE3-NEXT: sarq $63, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm6
+; SSSE3-NEXT: shlq $52, %rbp
+; SSSE3-NEXT: sarq $63, %rbp
+; SSSE3-NEXT: movd %ebp, %xmm13
+; SSSE3-NEXT: shlq $60, %rax
+; SSSE3-NEXT: sarq $63, %rax
+; SSSE3-NEXT: movd %eax, %xmm7
+; SSSE3-NEXT: shrq $15, %r10
+; SSSE3-NEXT: movd %r10d, %xmm14
+; SSSE3-NEXT: shrq $7, %rbx
+; SSSE3-NEXT: movd %ebx, %xmm3
+; SSSE3-NEXT: movswq 2(%rdi), %rdx
+; SSSE3-NEXT: movq %rdx, %r8
+; SSSE3-NEXT: movq %rdx, %r9
+; SSSE3-NEXT: movq %rdx, %r10
+; SSSE3-NEXT: movq %rdx, %r11
+; SSSE3-NEXT: movq %rdx, %r14
+; SSSE3-NEXT: movq %rdx, %r15
+; SSSE3-NEXT: movq %rdx, %r12
+; SSSE3-NEXT: movq %rdx, %r13
+; SSSE3-NEXT: movq %rdx, %rbx
+; SSSE3-NEXT: movq %rdx, %rax
+; SSSE3-NEXT: movq %rdx, %rcx
+; SSSE3-NEXT: movq %rdx, %rsi
+; SSSE3-NEXT: movq %rdx, %rdi
+; SSSE3-NEXT: movq %rdx, %rbp
+; SSSE3-NEXT: shlq $49, %rbp
+; SSSE3-NEXT: sarq $63, %rbp
+; SSSE3-NEXT: movd %ebp, %xmm1
+; SSSE3-NEXT: movq %rdx, %rbp
+; SSSE3-NEXT: movsbq %dl, %rdx
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7]
+; SSSE3-NEXT: shlq $57, %r8
+; SSSE3-NEXT: sarq $63, %r8
+; SSSE3-NEXT: movd %r8d, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7]
+; SSSE3-NEXT: shlq $53, %r9
+; SSSE3-NEXT: sarq $63, %r9
+; SSSE3-NEXT: movd %r9d, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
+; SSSE3-NEXT: shlq $61, %r10
+; SSSE3-NEXT: sarq $63, %r10
+; SSSE3-NEXT: movd %r10d, %xmm4
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSSE3-NEXT: shlq $51, %r11
+; SSSE3-NEXT: sarq $63, %r11
+; SSSE3-NEXT: movd %r11d, %xmm5
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT: shlq $59, %r14
+; SSSE3-NEXT: sarq $63, %r14
+; SSSE3-NEXT: movd %r14d, %xmm6
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSSE3-NEXT: shlq $55, %r15
+; SSSE3-NEXT: sarq $63, %r15
+; SSSE3-NEXT: movd %r15d, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSSE3-NEXT: shlq $63, %r12
+; SSSE3-NEXT: sarq $63, %r12
+; SSSE3-NEXT: movd %r12d, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; SSSE3-NEXT: shlq $50, %r13
+; SSSE3-NEXT: sarq $63, %r13
+; SSSE3-NEXT: movd %r13d, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSSE3-NEXT: shlq $58, %rbx
+; SSSE3-NEXT: sarq $63, %rbx
+; SSSE3-NEXT: movd %ebx, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; SSSE3-NEXT: shlq $54, %rax
+; SSSE3-NEXT: sarq $63, %rax
+; SSSE3-NEXT: movd %eax, %xmm5
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSSE3-NEXT: shlq $62, %rcx
+; SSSE3-NEXT: sarq $63, %rcx
+; SSSE3-NEXT: movd %ecx, %xmm4
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT: shlq $52, %rsi
+; SSSE3-NEXT: sarq $63, %rsi
+; SSSE3-NEXT: movd %esi, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSSE3-NEXT: shlq $60, %rdi
+; SSSE3-NEXT: sarq $63, %rdi
+; SSSE3-NEXT: movd %edi, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT: shrq $15, %rbp
+; SSSE3-NEXT: movd %ebp, %xmm2
+; SSSE3-NEXT: shrq $7, %rdx
+; SSSE3-NEXT: movd %edx, %xmm5
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSSE3-NEXT: popq %rbx
+; SSSE3-NEXT: popq %r12
+; SSSE3-NEXT: popq %r13
+; SSSE3-NEXT: popq %r14
+; SSSE3-NEXT: popq %r15
+; SSSE3-NEXT: popq %rbp
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_32i1_to_32i8:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movswq (%rdi), %rax
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $62, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: movq %rax, %rdx
+; SSE41-NEXT: shlq $63, %rdx
+; SSE41-NEXT: sarq $63, %rdx
+; SSE41-NEXT: movd %edx, %xmm0
+; SSE41-NEXT: pinsrb $1, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $61, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $2, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $60, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $3, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $59, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $4, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $58, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $5, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $57, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $6, %ecx, %xmm0
+; SSE41-NEXT: movsbq %al, %rcx
+; SSE41-NEXT: shrq $7, %rcx
+; SSE41-NEXT: pinsrb $7, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $55, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $8, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $54, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $9, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $53, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $10, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $52, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $11, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $51, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $12, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $50, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $13, %ecx, %xmm0
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $49, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $14, %ecx, %xmm0
+; SSE41-NEXT: shrq $15, %rax
+; SSE41-NEXT: pinsrb $15, %eax, %xmm0
+; SSE41-NEXT: movswq 2(%rdi), %rax
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $62, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: movq %rax, %rdx
+; SSE41-NEXT: shlq $63, %rdx
+; SSE41-NEXT: sarq $63, %rdx
+; SSE41-NEXT: movd %edx, %xmm1
+; SSE41-NEXT: pinsrb $1, %ecx, %xmm1
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $61, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $2, %ecx, %xmm1
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $60, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $3, %ecx, %xmm1
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $59, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $4, %ecx, %xmm1
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $58, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $5, %ecx, %xmm1
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $57, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $6, %ecx, %xmm1
+; SSE41-NEXT: movsbq %al, %rcx
+; SSE41-NEXT: shrq $7, %rcx
+; SSE41-NEXT: pinsrb $7, %ecx, %xmm1
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $55, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $8, %ecx, %xmm1
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $54, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $9, %ecx, %xmm1
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $53, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $10, %ecx, %xmm1
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $52, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $11, %ecx, %xmm1
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $51, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $12, %ecx, %xmm1
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $50, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $13, %ecx, %xmm1
+; SSE41-NEXT: movq %rax, %rcx
+; SSE41-NEXT: shlq $49, %rcx
+; SSE41-NEXT: sarq $63, %rcx
+; SSE41-NEXT: pinsrb $14, %ecx, %xmm1
+; SSE41-NEXT: shrq $15, %rax
+; SSE41-NEXT: pinsrb $15, %eax, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_sext_32i1_to_32i8:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: pushq %rbp
+; AVX1-NEXT: pushq %r15
+; AVX1-NEXT: pushq %r14
+; AVX1-NEXT: pushq %r13
+; AVX1-NEXT: pushq %r12
+; AVX1-NEXT: pushq %rbx
+; AVX1-NEXT: movslq (%rdi), %rax
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shlq $47, %rcx
+; AVX1-NEXT: sarq $63, %rcx
+; AVX1-NEXT: vmovd %ecx, %xmm0
+; AVX1-NEXT: movq %rax, %r8
+; AVX1-NEXT: movq %rax, %rdx
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: movq %rax, %rdi
+; AVX1-NEXT: movq %rax, %r13
+; AVX1-NEXT: movq %rax, %rsi
+; AVX1-NEXT: movq %rax, %r10
+; AVX1-NEXT: movq %rax, %r11
+; AVX1-NEXT: movq %rax, %r9
+; AVX1-NEXT: movq %rax, %rbx
+; AVX1-NEXT: movq %rax, %r14
+; AVX1-NEXT: movq %rax, %r15
+; AVX1-NEXT: movq %rax, %r12
+; AVX1-NEXT: movq %rax, %rbp
+; AVX1-NEXT: shlq $46, %rbp
+; AVX1-NEXT: sarq $63, %rbp
+; AVX1-NEXT: vpinsrb $1, %ebp, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rbp
+; AVX1-NEXT: shlq $45, %r8
+; AVX1-NEXT: sarq $63, %r8
+; AVX1-NEXT: vpinsrb $2, %r8d, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %r8
+; AVX1-NEXT: shlq $44, %rdx
+; AVX1-NEXT: sarq $63, %rdx
+; AVX1-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rdx
+; AVX1-NEXT: shlq $43, %rcx
+; AVX1-NEXT: sarq $63, %rcx
+; AVX1-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shlq $42, %rdi
+; AVX1-NEXT: sarq $63, %rdi
+; AVX1-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rdi
+; AVX1-NEXT: shlq $41, %r13
+; AVX1-NEXT: sarq $63, %r13
+; AVX1-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %r13
+; AVX1-NEXT: shlq $40, %rsi
+; AVX1-NEXT: sarq $63, %rsi
+; AVX1-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rsi
+; AVX1-NEXT: shlq $39, %r10
+; AVX1-NEXT: sarq $63, %r10
+; AVX1-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %r10
+; AVX1-NEXT: shlq $38, %r11
+; AVX1-NEXT: sarq $63, %r11
+; AVX1-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0
+; AVX1-NEXT: movsbq %al, %r11
+; AVX1-NEXT: shlq $37, %r9
+; AVX1-NEXT: sarq $63, %r9
+; AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %r9
+; AVX1-NEXT: shlq $36, %rbx
+; AVX1-NEXT: sarq $63, %rbx
+; AVX1-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rbx
+; AVX1-NEXT: shlq $35, %r14
+; AVX1-NEXT: sarq $63, %r14
+; AVX1-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %r14
+; AVX1-NEXT: shlq $34, %r15
+; AVX1-NEXT: sarq $63, %r15
+; AVX1-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %r15
+; AVX1-NEXT: shlq $33, %r12
+; AVX1-NEXT: sarq $63, %r12
+; AVX1-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %r12
+; AVX1-NEXT: shrq $31, %rbp
+; AVX1-NEXT: vpinsrb $15, %ebp, %xmm0, %xmm0
+; AVX1-NEXT: movq %rax, %rbp
+; AVX1-NEXT: shlq $63, %rdx
+; AVX1-NEXT: sarq $63, %rdx
+; AVX1-NEXT: vmovd %edx, %xmm1
+; AVX1-NEXT: movq %rax, %rdx
+; AVX1-NEXT: movswq %ax, %rax
+; AVX1-NEXT: shlq $62, %r8
+; AVX1-NEXT: sarq $63, %r8
+; AVX1-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm1
+; AVX1-NEXT: shlq $61, %rcx
+; AVX1-NEXT: sarq $63, %rcx
+; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
+; AVX1-NEXT: shlq $60, %rdi
+; AVX1-NEXT: sarq $63, %rdi
+; AVX1-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1
+; AVX1-NEXT: shlq $59, %r13
+; AVX1-NEXT: sarq $63, %r13
+; AVX1-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1
+; AVX1-NEXT: shlq $58, %rsi
+; AVX1-NEXT: sarq $63, %rsi
+; AVX1-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1
+; AVX1-NEXT: shlq $57, %r10
+; AVX1-NEXT: sarq $63, %r10
+; AVX1-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1
+; AVX1-NEXT: shrq $7, %r11
+; AVX1-NEXT: vpinsrb $7, %r11d, %xmm1, %xmm1
+; AVX1-NEXT: shlq $55, %r9
+; AVX1-NEXT: sarq $63, %r9
+; AVX1-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1
+; AVX1-NEXT: shlq $54, %rbx
+; AVX1-NEXT: sarq $63, %rbx
+; AVX1-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1
+; AVX1-NEXT: shlq $53, %r14
+; AVX1-NEXT: sarq $63, %r14
+; AVX1-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1
+; AVX1-NEXT: shlq $52, %r15
+; AVX1-NEXT: sarq $63, %r15
+; AVX1-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1
+; AVX1-NEXT: shlq $51, %r12
+; AVX1-NEXT: sarq $63, %r12
+; AVX1-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1
+; AVX1-NEXT: shlq $50, %rbp
+; AVX1-NEXT: sarq $63, %rbp
+; AVX1-NEXT: vpinsrb $13, %ebp, %xmm1, %xmm1
+; AVX1-NEXT: shlq $49, %rdx
+; AVX1-NEXT: sarq $63, %rdx
+; AVX1-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1
+; AVX1-NEXT: shrq $15, %rax
+; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: popq %rbx
+; AVX1-NEXT: popq %r12
+; AVX1-NEXT: popq %r13
+; AVX1-NEXT: popq %r14
+; AVX1-NEXT: popq %r15
+; AVX1-NEXT: popq %rbp
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_sext_32i1_to_32i8:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: pushq %rbp
+; AVX2-NEXT: pushq %r15
+; AVX2-NEXT: pushq %r14
+; AVX2-NEXT: pushq %r13
+; AVX2-NEXT: pushq %r12
+; AVX2-NEXT: pushq %rbx
+; AVX2-NEXT: movslq (%rdi), %rax
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shlq $47, %rcx
+; AVX2-NEXT: sarq $63, %rcx
+; AVX2-NEXT: vmovd %ecx, %xmm0
+; AVX2-NEXT: movq %rax, %r8
+; AVX2-NEXT: movq %rax, %rdx
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: movq %rax, %rdi
+; AVX2-NEXT: movq %rax, %r13
+; AVX2-NEXT: movq %rax, %rsi
+; AVX2-NEXT: movq %rax, %r10
+; AVX2-NEXT: movq %rax, %r11
+; AVX2-NEXT: movq %rax, %r9
+; AVX2-NEXT: movq %rax, %rbx
+; AVX2-NEXT: movq %rax, %r14
+; AVX2-NEXT: movq %rax, %r15
+; AVX2-NEXT: movq %rax, %r12
+; AVX2-NEXT: movq %rax, %rbp
+; AVX2-NEXT: shlq $46, %rbp
+; AVX2-NEXT: sarq $63, %rbp
+; AVX2-NEXT: vpinsrb $1, %ebp, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %rbp
+; AVX2-NEXT: shlq $45, %r8
+; AVX2-NEXT: sarq $63, %r8
+; AVX2-NEXT: vpinsrb $2, %r8d, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %r8
+; AVX2-NEXT: shlq $44, %rdx
+; AVX2-NEXT: sarq $63, %rdx
+; AVX2-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %rdx
+; AVX2-NEXT: shlq $43, %rcx
+; AVX2-NEXT: sarq $63, %rcx
+; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %rcx
+; AVX2-NEXT: shlq $42, %rdi
+; AVX2-NEXT: sarq $63, %rdi
+; AVX2-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %rdi
+; AVX2-NEXT: shlq $41, %r13
+; AVX2-NEXT: sarq $63, %r13
+; AVX2-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %r13
+; AVX2-NEXT: shlq $40, %rsi
+; AVX2-NEXT: sarq $63, %rsi
+; AVX2-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %rsi
+; AVX2-NEXT: shlq $39, %r10
+; AVX2-NEXT: sarq $63, %r10
+; AVX2-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %r10
+; AVX2-NEXT: shlq $38, %r11
+; AVX2-NEXT: sarq $63, %r11
+; AVX2-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0
+; AVX2-NEXT: movsbq %al, %r11
+; AVX2-NEXT: shlq $37, %r9
+; AVX2-NEXT: sarq $63, %r9
+; AVX2-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %r9
+; AVX2-NEXT: shlq $36, %rbx
+; AVX2-NEXT: sarq $63, %rbx
+; AVX2-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %rbx
+; AVX2-NEXT: shlq $35, %r14
+; AVX2-NEXT: sarq $63, %r14
+; AVX2-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %r14
+; AVX2-NEXT: shlq $34, %r15
+; AVX2-NEXT: sarq $63, %r15
+; AVX2-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %r15
+; AVX2-NEXT: shlq $33, %r12
+; AVX2-NEXT: sarq $63, %r12
+; AVX2-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %r12
+; AVX2-NEXT: shrq $31, %rbp
+; AVX2-NEXT: vpinsrb $15, %ebp, %xmm0, %xmm0
+; AVX2-NEXT: movq %rax, %rbp
+; AVX2-NEXT: shlq $63, %rdx
+; AVX2-NEXT: sarq $63, %rdx
+; AVX2-NEXT: vmovd %edx, %xmm1
+; AVX2-NEXT: movq %rax, %rdx
+; AVX2-NEXT: movswq %ax, %rax
+; AVX2-NEXT: shlq $62, %r8
+; AVX2-NEXT: sarq $63, %r8
+; AVX2-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm1
+; AVX2-NEXT: shlq $61, %rcx
+; AVX2-NEXT: sarq $63, %rcx
+; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
+; AVX2-NEXT: shlq $60, %rdi
+; AVX2-NEXT: sarq $63, %rdi
+; AVX2-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1
+; AVX2-NEXT: shlq $59, %r13
+; AVX2-NEXT: sarq $63, %r13
+; AVX2-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1
+; AVX2-NEXT: shlq $58, %rsi
+; AVX2-NEXT: sarq $63, %rsi
+; AVX2-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1
+; AVX2-NEXT: shlq $57, %r10
+; AVX2-NEXT: sarq $63, %r10
+; AVX2-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1
+; AVX2-NEXT: shrq $7, %r11
+; AVX2-NEXT: vpinsrb $7, %r11d, %xmm1, %xmm1
+; AVX2-NEXT: shlq $55, %r9
+; AVX2-NEXT: sarq $63, %r9
+; AVX2-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1
+; AVX2-NEXT: shlq $54, %rbx
+; AVX2-NEXT: sarq $63, %rbx
+; AVX2-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1
+; AVX2-NEXT: shlq $53, %r14
+; AVX2-NEXT: sarq $63, %r14
+; AVX2-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1
+; AVX2-NEXT: shlq $52, %r15
+; AVX2-NEXT: sarq $63, %r15
+; AVX2-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1
+; AVX2-NEXT: shlq $51, %r12
+; AVX2-NEXT: sarq $63, %r12
+; AVX2-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1
+; AVX2-NEXT: shlq $50, %rbp
+; AVX2-NEXT: sarq $63, %rbp
+; AVX2-NEXT: vpinsrb $13, %ebp, %xmm1, %xmm1
+; AVX2-NEXT: shlq $49, %rdx
+; AVX2-NEXT: sarq $63, %rdx
+; AVX2-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1
+; AVX2-NEXT: shrq $15, %rax
+; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT: popq %rbx
+; AVX2-NEXT: popq %r12
+; AVX2-NEXT: popq %r13
+; AVX2-NEXT: popq %r14
+; AVX2-NEXT: popq %r15
+; AVX2-NEXT: popq %rbp
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_32i1_to_32i8:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: pushl %esi
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: movswl (%eax), %ecx
+; X32-SSE41-NEXT: movl %ecx, %edx
+; X32-SSE41-NEXT: shll $30, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: movl %ecx, %esi
+; X32-SSE41-NEXT: shll $31, %esi
+; X32-SSE41-NEXT: sarl $31, %esi
+; X32-SSE41-NEXT: movd %esi, %xmm0
+; X32-SSE41-NEXT: pinsrb $1, %edx, %xmm0
+; X32-SSE41-NEXT: movl %ecx, %edx
+; X32-SSE41-NEXT: shll $29, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: pinsrb $2, %edx, %xmm0
+; X32-SSE41-NEXT: movl %ecx, %edx
+; X32-SSE41-NEXT: shll $28, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: pinsrb $3, %edx, %xmm0
+; X32-SSE41-NEXT: movl %ecx, %edx
+; X32-SSE41-NEXT: shll $27, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: pinsrb $4, %edx, %xmm0
+; X32-SSE41-NEXT: movl %ecx, %edx
+; X32-SSE41-NEXT: shll $26, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: pinsrb $5, %edx, %xmm0
+; X32-SSE41-NEXT: movl %ecx, %edx
+; X32-SSE41-NEXT: shll $25, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: pinsrb $6, %edx, %xmm0
+; X32-SSE41-NEXT: movsbl %cl, %edx
+; X32-SSE41-NEXT: shrl $7, %edx
+; X32-SSE41-NEXT: pinsrb $7, %edx, %xmm0
+; X32-SSE41-NEXT: movl %ecx, %edx
+; X32-SSE41-NEXT: shll $23, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: pinsrb $8, %edx, %xmm0
+; X32-SSE41-NEXT: movl %ecx, %edx
+; X32-SSE41-NEXT: shll $22, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: pinsrb $9, %edx, %xmm0
+; X32-SSE41-NEXT: movl %ecx, %edx
+; X32-SSE41-NEXT: shll $21, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: pinsrb $10, %edx, %xmm0
+; X32-SSE41-NEXT: movl %ecx, %edx
+; X32-SSE41-NEXT: shll $20, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: pinsrb $11, %edx, %xmm0
+; X32-SSE41-NEXT: movl %ecx, %edx
+; X32-SSE41-NEXT: shll $19, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: pinsrb $12, %edx, %xmm0
+; X32-SSE41-NEXT: movl %ecx, %edx
+; X32-SSE41-NEXT: shll $18, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: pinsrb $13, %edx, %xmm0
+; X32-SSE41-NEXT: movl %ecx, %edx
+; X32-SSE41-NEXT: shll $17, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: pinsrb $14, %edx, %xmm0
+; X32-SSE41-NEXT: shrl $15, %ecx
+; X32-SSE41-NEXT: pinsrb $15, %ecx, %xmm0
+; X32-SSE41-NEXT: movswl 2(%eax), %eax
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $30, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: movl %eax, %edx
+; X32-SSE41-NEXT: shll $31, %edx
+; X32-SSE41-NEXT: sarl $31, %edx
+; X32-SSE41-NEXT: movd %edx, %xmm1
+; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $29, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $28, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $27, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $26, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $25, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm1
+; X32-SSE41-NEXT: movsbl %al, %ecx
+; X32-SSE41-NEXT: shrl $7, %ecx
+; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $23, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $22, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $21, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $20, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $19, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $18, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm1
+; X32-SSE41-NEXT: movl %eax, %ecx
+; X32-SSE41-NEXT: shll $17, %ecx
+; X32-SSE41-NEXT: sarl $31, %ecx
+; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm1
+; X32-SSE41-NEXT: shrl $15, %eax
+; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm1
+; X32-SSE41-NEXT: popl %esi
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <32 x i1>, <32 x i1>* %ptr
+ %Y = sext <32 x i1> %X to <32 x i8>
+ ret <32 x i8> %Y
+}
+
+define <16 x i16> @load_sext_16i8_to_16i16(<16 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_16i8_to_16i16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psraw $8, %xmm0
+; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psraw $8, %xmm1
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_16i8_to_16i16:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: psraw $8, %xmm0
+; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: psraw $8, %xmm1
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_16i8_to_16i16:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxbw (%rdi), %xmm0
+; SSE41-NEXT: pmovsxbw 8(%rdi), %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_sext_16i8_to_16i16:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0
+; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_sext_16i8_to_16i16:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovsxbw (%rdi), %ymm0
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_16i8_to_16i16:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0
+; X32-SSE41-NEXT: pmovsxbw 8(%eax), %xmm1
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <16 x i8>, <16 x i8>* %ptr
+ %Y = sext <16 x i8> %X to <16 x i16>
+ ret <16 x i16> %Y
+}
+
+define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) {
+; SSE2-LABEL: load_sext_2i16_to_2i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
@@ -288,7 +3495,7 @@ define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: load_sext_test4:
+; SSSE3-LABEL: load_sext_2i16_to_2i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
@@ -298,17 +3505,17 @@ define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: load_sext_test4:
+; SSE41-LABEL: load_sext_2i16_to_2i64:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: pmovsxwq (%rdi), %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: load_sext_test4:
+; AVX-LABEL: load_sext_2i16_to_2i64:
; AVX: # BB#0: # %entry
; AVX-NEXT: vpmovsxwq (%rdi), %xmm0
; AVX-NEXT: retq
;
-; X32-SSE41-LABEL: load_sext_test4:
+; X32-SSE41-LABEL: load_sext_2i16_to_2i64:
; X32-SSE41: # BB#0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0
@@ -316,11 +3523,159 @@ define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
entry:
%X = load <2 x i16>, <2 x i16>* %ptr
%Y = sext <2 x i16> %X to <2 x i64>
- ret <2 x i64>%Y
+ ret <2 x i64> %Y
+}
+
+define <4 x i32> @load_sext_4i16_to_4i32(<4 x i16> *%ptr) {
+; SSE2-LABEL: load_sext_4i16_to_4i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_4i16_to_4i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: psrad $16, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_4i16_to_4i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxwd (%rdi), %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: load_sext_4i16_to_4i32:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovsxwd (%rdi), %xmm0
+; AVX-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_4i16_to_4i32:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: pmovsxwd (%eax), %xmm0
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <4 x i16>, <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
+; SSE2-LABEL: load_sext_4i16_to_4i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movswq 2(%rdi), %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: movswq (%rdi), %rax
+; SSE2-NEXT: movd %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movswq 6(%rdi), %rax
+; SSE2-NEXT: movd %rax, %xmm2
+; SSE2-NEXT: movswq 4(%rdi), %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_4i16_to_4i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movswq 2(%rdi), %rax
+; SSSE3-NEXT: movd %rax, %xmm1
+; SSSE3-NEXT: movswq (%rdi), %rax
+; SSSE3-NEXT: movd %rax, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: movswq 6(%rdi), %rax
+; SSSE3-NEXT: movd %rax, %xmm2
+; SSSE3-NEXT: movswq 4(%rdi), %rax
+; SSSE3-NEXT: movd %rax, %xmm1
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_4i16_to_4i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxwq (%rdi), %xmm0
+; SSE41-NEXT: pmovsxwq 4(%rdi), %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_sext_4i16_to_4i64:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovsxwd (%rdi), %xmm0
+; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_sext_4i16_to_4i64:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovsxwq (%rdi), %ymm0
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_4i16_to_4i64:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0
+; X32-SSE41-NEXT: pmovsxwq 4(%eax), %xmm1
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <4 x i16>, <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i64>
+ ret <4 x i64> %Y
}
-define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
-; SSE2-LABEL: load_sext_test5:
+define <8 x i32> @load_sext_8i16_to_8i32(<8 x i16> *%ptr) {
+; SSE2-LABEL: load_sext_8i16_to_8i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: psrad $16, %xmm1
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_8i16_to_8i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: psrad $16, %xmm0
+; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: psrad $16, %xmm1
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_8i16_to_8i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxwd (%rdi), %xmm0
+; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_sext_8i16_to_8i32:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovsxwd (%rdi), %xmm0
+; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_sext_8i16_to_8i32:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_8i16_to_8i32:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: pmovsxwd (%eax), %xmm0
+; X32-SSE41-NEXT: pmovsxwd 8(%eax), %xmm1
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <8 x i16>, <8 x i16>* %ptr
+ %Y = sext <8 x i16> %X to <8 x i32>
+ ret <8 x i32> %Y
+}
+
+define <2 x i64> @load_sext_2i32_to_2i64(<2 x i32> *%ptr) {
+; SSE2-LABEL: load_sext_2i32_to_2i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -328,7 +3683,7 @@ define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: load_sext_test5:
+; SSSE3-LABEL: load_sext_2i32_to_2i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSSE3-NEXT: movdqa %xmm0, %xmm1
@@ -336,17 +3691,17 @@ define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: load_sext_test5:
+; SSE41-LABEL: load_sext_2i32_to_2i64:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: pmovsxdq (%rdi), %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: load_sext_test5:
+; AVX-LABEL: load_sext_2i32_to_2i64:
; AVX: # BB#0: # %entry
; AVX-NEXT: vpmovsxdq (%rdi), %xmm0
; AVX-NEXT: retq
;
-; X32-SSE41-LABEL: load_sext_test5:
+; X32-SSE41-LABEL: load_sext_2i32_to_2i64:
; X32-SSE41: # BB#0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE41-NEXT: pmovsxdq (%eax), %xmm0
@@ -354,43 +3709,105 @@ define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
entry:
%X = load <2 x i32>, <2 x i32>* %ptr
%Y = sext <2 x i32> %X to <2 x i64>
- ret <2 x i64>%Y
+ ret <2 x i64> %Y
}
-define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
-; SSE2-LABEL: load_sext_test6:
+define <4 x i64> @load_sext_4i32_to_4i64(<4 x i32> *%ptr) {
+; SSE2-LABEL: load_sext_4i32_to_4i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa (%rdi), %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_sext_4i32_to_4i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa (%rdi), %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm2
+; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT: movdqa %xmm1, %xmm2
+; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_sext_4i32_to_4i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxdq (%rdi), %xmm0
+; SSE41-NEXT: pmovsxdq 8(%rdi), %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_sext_4i32_to_4i64:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovsxdq (%rdi), %xmm0
+; AVX1-NEXT: vpmovsxdq 8(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_sext_4i32_to_4i64:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovsxdq (%rdi), %ymm0
+; AVX2-NEXT: retq
+;
+; X32-SSE41-LABEL: load_sext_4i32_to_4i64:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT: pmovsxdq (%eax), %xmm0
+; X32-SSE41-NEXT: pmovsxdq 8(%eax), %xmm1
+; X32-SSE41-NEXT: retl
+entry:
+ %X = load <4 x i32>, <4 x i32>* %ptr
+ %Y = sext <4 x i32> %X to <4 x i64>
+ ret <4 x i64> %Y
+}
+
+define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_2i8_to_i32:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: load_sext_test6:
+; SSSE3-LABEL: sext_2i8_to_i32:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: psraw $8, %xmm0
+; SSSE3-NEXT: movd %xmm0, %eax
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: load_sext_test6:
+; SSE41-LABEL: sext_2i8_to_i32:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovsxbw (%rdi), %xmm0
+; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
+; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: retq
;
-; AVX-LABEL: load_sext_test6:
+; AVX-LABEL: sext_2i8_to_i32:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vpmovsxbw (%rdi), %xmm0
+; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
+; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
;
-; X32-SSE41-LABEL: load_sext_test6:
+; X32-SSE41-LABEL: sext_2i8_to_i32:
; X32-SSE41: # BB#0: # %entry
-; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0
+; X32-SSE41-NEXT: pushl %eax
+; X32-SSE41-NEXT: .Ltmp0:
+; X32-SSE41-NEXT: .cfi_def_cfa_offset 8
+; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0
+; X32-SSE41-NEXT: movd %xmm0, %eax
+; X32-SSE41-NEXT: popl %ecx
; X32-SSE41-NEXT: retl
entry:
- %X = load <8 x i8>, <8 x i8>* %ptr
- %Y = sext <8 x i8> %X to <8 x i16>
- ret <8 x i16>%Y
+ %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+ %Ex = sext <2 x i8> %Shuf to <2 x i16>
+ %Bc = bitcast <2 x i16> %Ex to i32
+ ret i32 %Bc
}
define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
@@ -460,57 +3877,6 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
ret <4 x i64> %extmask
}
-define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {
-; SSE2-LABEL: sext_16i8_to_16i16:
-; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: psraw $8, %xmm0
-; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: psraw $8, %xmm1
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: sext_16i8_to_16i16:
-; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: psraw $8, %xmm0
-; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: psraw $8, %xmm1
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: sext_16i8_to_16i16:
-; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovsxbw (%rdi), %xmm0
-; SSE41-NEXT: pmovsxbw 8(%rdi), %xmm1
-; SSE41-NEXT: retq
-;
-; AVX1-LABEL: sext_16i8_to_16i16:
-; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0
-; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: sext_16i8_to_16i16:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpmovsxbw (%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; X32-SSE41-LABEL: sext_16i8_to_16i16:
-; X32-SSE41: # BB#0: # %entry
-; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0
-; X32-SSE41-NEXT: pmovsxbw 8(%eax), %xmm1
-; X32-SSE41-NEXT: retl
-entry:
- %X = load <16 x i8>, <16 x i8>* %ptr
- %Y = sext <16 x i8> %X to <16 x i16>
- ret <16 x i16> %Y
-}
-
define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
; SSE2-LABEL: sext_4i8_to_4i64:
; SSE2: # BB#0:
@@ -577,125 +3943,3 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
%extmask = sext <4 x i8> %mask to <4 x i64>
ret <4 x i64> %extmask
}
-
-define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
-; SSE2-LABEL: load_sext_4i8_to_4i64:
-; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movsbq 1(%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: movsbq (%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT: movsbq 3(%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm2
-; SSE2-NEXT: movsbq 2(%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: load_sext_4i8_to_4i64:
-; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movsbq 1(%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: movsbq (%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm0
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSSE3-NEXT: movsbq 3(%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm2
-; SSSE3-NEXT: movsbq 2(%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: load_sext_4i8_to_4i64:
-; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovsxbq (%rdi), %xmm0
-; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1
-; SSE41-NEXT: retq
-;
-; AVX1-LABEL: load_sext_4i8_to_4i64:
-; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0
-; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_sext_4i8_to_4i64:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; X32-SSE41-LABEL: load_sext_4i8_to_4i64:
-; X32-SSE41: # BB#0: # %entry
-; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0
-; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1
-; X32-SSE41-NEXT: retl
-entry:
- %X = load <4 x i8>, <4 x i8>* %ptr
- %Y = sext <4 x i8> %X to <4 x i64>
- ret <4 x i64>%Y
-}
-
-define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
-; SSE2-LABEL: load_sext_4i16_to_4i64:
-; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movswq 2(%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: movswq (%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT: movswq 6(%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm2
-; SSE2-NEXT: movswq 4(%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: load_sext_4i16_to_4i64:
-; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movswq 2(%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: movswq (%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm0
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSSE3-NEXT: movswq 6(%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm2
-; SSSE3-NEXT: movswq 4(%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: load_sext_4i16_to_4i64:
-; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovsxwq (%rdi), %xmm0
-; SSE41-NEXT: pmovsxwq 4(%rdi), %xmm1
-; SSE41-NEXT: retq
-;
-; AVX1-LABEL: load_sext_4i16_to_4i64:
-; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpmovsxwd (%rdi), %xmm0
-; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_sext_4i16_to_4i64:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpmovsxwq (%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; X32-SSE41-LABEL: load_sext_4i16_to_4i64:
-; X32-SSE41: # BB#0: # %entry
-; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0
-; X32-SSE41-NEXT: pmovsxwq 4(%eax), %xmm1
-; X32-SSE41-NEXT: retl
-entry:
- %X = load <4 x i16>, <4 x i16>* %ptr
- %Y = sext <4 x i16> %X to <4 x i64>
- ret <4 x i64>%Y
-}
diff --git a/test/CodeGen/X86/vector-shift-ashr-128.ll b/test/CodeGen/X86/vector-shift-ashr-128.ll
index 61b30154950d..771445df85e0 100644
--- a/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -1,59 +1,115 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE2
;
; Variable Shifts
;
-define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE2-LABEL: var_shift_v2i64:
; SSE2: # BB#0:
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: movd %xmm1, %rcx
-; SSE2-NEXT: sarq %cl, %rax
-; SSE2-NEXT: movd %rax, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rcx
-; SSE2-NEXT: sarq %cl, %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: psrlq %xmm3, %xmm4
+; SSE2-NEXT: psrlq %xmm1, %xmm2
+; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrlq %xmm3, %xmm2
+; SSE2-NEXT: psrlq %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSE2-NEXT: xorpd %xmm4, %xmm2
+; SSE2-NEXT: psubq %xmm4, %xmm2
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: var_shift_v2i64:
; SSE41: # BB#0:
-; SSE41-NEXT: pextrq $1, %xmm0, %rax
-; SSE41-NEXT: pextrq $1, %xmm1, %rcx
-; SSE41-NEXT: sarq %cl, %rax
-; SSE41-NEXT: movd %rax, %xmm2
-; SSE41-NEXT: movd %xmm0, %rax
-; SSE41-NEXT: movd %xmm1, %rcx
-; SSE41-NEXT: sarq %cl, %rax
-; SSE41-NEXT: movd %rax, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE41-NEXT: movdqa %xmm2, %xmm3
+; SSE41-NEXT: psrlq %xmm1, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; SSE41-NEXT: psrlq %xmm4, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; SSE41-NEXT: movdqa %xmm0, %xmm3
+; SSE41-NEXT: psrlq %xmm1, %xmm3
+; SSE41-NEXT: psrlq %xmm4, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: pxor %xmm2, %xmm0
+; SSE41-NEXT: psubq %xmm2, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: var_shift_v2i64:
-; AVX: # BB#0:
-; AVX-NEXT: vpextrq $1, %xmm0, %rax
-; AVX-NEXT: vpextrq $1, %xmm1, %rcx
-; AVX-NEXT: sarq %cl, %rax
-; AVX-NEXT: vmovq %rax, %xmm2
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: vmovq %xmm1, %rcx
-; AVX-NEXT: sarq %cl, %rax
-; AVX-NEXT: vmovq %rax, %xmm0
-; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX-NEXT: retq
+; AVX1-LABEL: var_shift_v2i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; AVX1-NEXT: vpsrlq %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpsrlq %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: var_shift_v2i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm2, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsubq %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOP-LABEL: var_shift_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vpsubq %xmm1, %xmm2, %xmm1
+; XOP-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v2i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX512-NEXT: vpsrlvq %xmm1, %xmm2, %xmm3
+; AVX512-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsubq %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: var_shift_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: psrlq %xmm2, %xmm4
+; X32-SSE-NEXT: movq {{.*#+}} xmm5 = xmm1[0],zero
+; X32-SSE-NEXT: psrlq %xmm5, %xmm3
+; X32-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm3[0],xmm4[1]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psrlq %xmm2, %xmm1
+; X32-SSE-NEXT: psrlq %xmm5, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT: xorpd %xmm4, %xmm1
+; X32-SSE-NEXT: psubq %xmm4, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%shift = ashr <2 x i64> %a, %b
ret <2 x i64> %shift
}
-define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE2-LABEL: var_shift_v4i32:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa %xmm1, %xmm2
@@ -119,11 +175,52 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v4i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vpshad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v4i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: var_shift_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psrldq {{.*#+}} xmm2 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X32-SSE-NEXT: movdqa %xmm0, %xmm3
+; X32-SSE-NEXT: psrad %xmm2, %xmm3
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psrlq $32, %xmm2
+; X32-SSE-NEXT: movdqa %xmm0, %xmm4
+; X32-SSE-NEXT: psrad %xmm2, %xmm4
+; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: movdqa %xmm1, %xmm4
+; X32-SSE-NEXT: punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm5
+; X32-SSE-NEXT: psrad %xmm4, %xmm5
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE-NEXT: psrad %xmm1, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm0[0],xmm5[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,2,2,3]
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE-NEXT: retl
%shift = ashr <4 x i32> %a, %b
ret <4 x i32> %shift
}
-define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE2-LABEL: var_shift_v8i16:
; SSE2: # BB#0:
; SSE2-NEXT: psllw $12, %xmm1
@@ -216,11 +313,58 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
+;
+; XOP-LABEL: var_shift_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; XOP-NEXT: vpshaw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: var_shift_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psllw $12, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psraw $15, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psraw $8, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psraw $15, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psraw $4, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psraw $15, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psraw $2, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: psraw $15, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: pandn %xmm0, %xmm2
+; X32-SSE-NEXT: psraw $1, %xmm0
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%shift = ashr <8 x i16> %a, %b
ret <8 x i16> %shift
}
-define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; SSE2-LABEL: var_shift_v16i8:
; SSE2: # BB#0:
; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
@@ -342,6 +486,99 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: var_shift_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOP-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512-NEXT: vpsraw $4, %xmm3, %xmm4
+; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX512-NEXT: vpsraw $2, %xmm3, %xmm4
+; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX512-NEXT: vpsraw $1, %xmm3, %xmm4
+; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
+; AVX512-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512-NEXT: vpsraw $4, %xmm0, %xmm3
+; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: vpsraw $2, %xmm0, %xmm3
+; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: vpsraw $1, %xmm0, %xmm3
+; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX512-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: var_shift_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; X32-SSE-NEXT: psllw $5, %xmm1
+; X32-SSE-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm6
+; X32-SSE-NEXT: pandn %xmm2, %xmm6
+; X32-SSE-NEXT: psraw $4, %xmm2
+; X32-SSE-NEXT: pand %xmm5, %xmm2
+; X32-SSE-NEXT: por %xmm6, %xmm2
+; X32-SSE-NEXT: paddw %xmm4, %xmm4
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm6
+; X32-SSE-NEXT: pandn %xmm2, %xmm6
+; X32-SSE-NEXT: psraw $2, %xmm2
+; X32-SSE-NEXT: pand %xmm5, %xmm2
+; X32-SSE-NEXT: por %xmm6, %xmm2
+; X32-SSE-NEXT: paddw %xmm4, %xmm4
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm4
+; X32-SSE-NEXT: pandn %xmm2, %xmm4
+; X32-SSE-NEXT: psraw $1, %xmm2
+; X32-SSE-NEXT: pand %xmm5, %xmm2
+; X32-SSE-NEXT: por %xmm4, %xmm2
+; X32-SSE-NEXT: psrlw $8, %xmm2
+; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT: pxor %xmm4, %xmm4
+; X32-SSE-NEXT: pcmpgtw %xmm1, %xmm4
+; X32-SSE-NEXT: movdqa %xmm4, %xmm5
+; X32-SSE-NEXT: pandn %xmm0, %xmm5
+; X32-SSE-NEXT: psraw $4, %xmm0
+; X32-SSE-NEXT: pand %xmm4, %xmm0
+; X32-SSE-NEXT: por %xmm5, %xmm0
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: pxor %xmm4, %xmm4
+; X32-SSE-NEXT: pcmpgtw %xmm1, %xmm4
+; X32-SSE-NEXT: movdqa %xmm4, %xmm5
+; X32-SSE-NEXT: pandn %xmm0, %xmm5
+; X32-SSE-NEXT: psraw $2, %xmm0
+; X32-SSE-NEXT: pand %xmm4, %xmm0
+; X32-SSE-NEXT: por %xmm5, %xmm0
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: pcmpgtw %xmm1, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm1
+; X32-SSE-NEXT: pandn %xmm0, %xmm1
+; X32-SSE-NEXT: psraw $1, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm1, %xmm0
+; X32-SSE-NEXT: psrlw $8, %xmm0
+; X32-SSE-NEXT: packuswb %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%shift = ashr <16 x i8> %a, %b
ret <16 x i8> %shift
}
@@ -350,71 +587,65 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; Uniform Variable Shifts
;
-define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; SSE2-LABEL: splatvar_shift_v2i64:
-; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: movd %xmm2, %rcx
-; SSE2-NEXT: sarq %cl, %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rcx
-; SSE2-NEXT: sarq %cl, %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: retq
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE-LABEL: splatvar_shift_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE-NEXT: psrlq %xmm1, %xmm2
+; SSE-NEXT: psrlq %xmm1, %xmm0
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: psubq %xmm2, %xmm0
+; SSE-NEXT: retq
;
-; SSE41-LABEL: splatvar_shift_v2i64:
-; SSE41: # BB#0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
-; SSE41-NEXT: pextrq $1, %xmm0, %rax
-; SSE41-NEXT: pextrq $1, %xmm1, %rcx
-; SSE41-NEXT: sarq %cl, %rax
-; SSE41-NEXT: movd %rax, %xmm2
-; SSE41-NEXT: movd %xmm0, %rax
-; SSE41-NEXT: movd %xmm1, %rcx
-; SSE41-NEXT: sarq %cl, %rax
-; SSE41-NEXT: movd %rax, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE41-NEXT: retq
+; AVX-LABEL: splatvar_shift_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX-NEXT: retq
;
-; AVX1-LABEL: splatvar_shift_v2i64:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
-; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: vpextrq $1, %xmm1, %rcx
-; AVX1-NEXT: sarq %cl, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: vmovq %xmm1, %rcx
-; AVX1-NEXT: sarq %cl, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX1-NEXT: retq
+; XOPAVX1-LABEL: splatvar_shift_v2i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
;
-; AVX2-LABEL: splatvar_shift_v2i64:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX2-NEXT: vpextrq $1, %xmm0, %rax
-; AVX2-NEXT: vpextrq $1, %xmm1, %rcx
-; AVX2-NEXT: sarq %cl, %rax
-; AVX2-NEXT: vmovq %rax, %xmm2
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: vmovq %xmm1, %rcx
-; AVX2-NEXT: sarq %cl, %rax
-; AVX2-NEXT: vmovq %rax, %xmm0
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX2-NEXT: retq
+; XOPAVX2-LABEL: splatvar_shift_v2i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpsubq %xmm1, %xmm2, %xmm1
+; XOPAVX2-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v2i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX512-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatvar_shift_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
+; X32-SSE-NEXT: psrlq %xmm1, %xmm2
+; X32-SSE-NEXT: psrlq %xmm1, %xmm0
+; X32-SSE-NEXT: pxor %xmm2, %xmm0
+; X32-SSE-NEXT: psubq %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
%shift = ashr <2 x i64> %a, %splat
ret <2 x i64> %shift
}
-define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE2-LABEL: splatvar_shift_v4i32:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm2, %xmm2
@@ -435,12 +666,33 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatvar_shift_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOP-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatvar_shift_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: xorps %xmm2, %xmm2
+; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; X32-SSE-NEXT: psrad %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
%shift = ashr <4 x i32> %a, %splat
ret <4 x i32> %shift
}
-define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE2-LABEL: splatvar_shift_v8i16:
; SSE2: # BB#0:
; SSE2-NEXT: movd %xmm1, %eax
@@ -462,12 +714,34 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatvar_shift_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; XOP-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatvar_shift_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movd %xmm1, %eax
+; X32-SSE-NEXT: movzwl %ax, %eax
+; X32-SSE-NEXT: movd %eax, %xmm1
+; X32-SSE-NEXT: psraw %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i16> %a, %splat
ret <8 x i16> %shift
}
-define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; SSE2-LABEL: splatvar_shift_v16i8:
; SSE2: # BB#0:
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -626,6 +900,113 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v16i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v16i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX2-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512-NEXT: vpsraw $4, %xmm3, %xmm4
+; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX512-NEXT: vpsraw $2, %xmm3, %xmm4
+; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX512-NEXT: vpsraw $1, %xmm3, %xmm4
+; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
+; AVX512-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512-NEXT: vpsraw $4, %xmm0, %xmm3
+; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: vpsraw $2, %xmm0, %xmm3
+; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: vpsraw $1, %xmm0, %xmm3
+; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX512-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatvar_shift_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; X32-SSE-NEXT: pshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,4,4]
+; X32-SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; X32-SSE-NEXT: psllw $5, %xmm3
+; X32-SSE-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15]
+; X32-SSE-NEXT: pxor %xmm2, %xmm2
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm6
+; X32-SSE-NEXT: pandn %xmm1, %xmm6
+; X32-SSE-NEXT: psraw $4, %xmm1
+; X32-SSE-NEXT: pand %xmm5, %xmm1
+; X32-SSE-NEXT: por %xmm6, %xmm1
+; X32-SSE-NEXT: paddw %xmm4, %xmm4
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm6
+; X32-SSE-NEXT: pandn %xmm1, %xmm6
+; X32-SSE-NEXT: psraw $2, %xmm1
+; X32-SSE-NEXT: pand %xmm5, %xmm1
+; X32-SSE-NEXT: por %xmm6, %xmm1
+; X32-SSE-NEXT: paddw %xmm4, %xmm4
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm4
+; X32-SSE-NEXT: pandn %xmm1, %xmm4
+; X32-SSE-NEXT: psraw $1, %xmm1
+; X32-SSE-NEXT: pand %xmm5, %xmm1
+; X32-SSE-NEXT: por %xmm4, %xmm1
+; X32-SSE-NEXT: psrlw $8, %xmm1
+; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT: pxor %xmm4, %xmm4
+; X32-SSE-NEXT: pcmpgtw %xmm3, %xmm4
+; X32-SSE-NEXT: movdqa %xmm4, %xmm5
+; X32-SSE-NEXT: pandn %xmm0, %xmm5
+; X32-SSE-NEXT: psraw $4, %xmm0
+; X32-SSE-NEXT: pand %xmm4, %xmm0
+; X32-SSE-NEXT: por %xmm5, %xmm0
+; X32-SSE-NEXT: paddw %xmm3, %xmm3
+; X32-SSE-NEXT: pxor %xmm4, %xmm4
+; X32-SSE-NEXT: pcmpgtw %xmm3, %xmm4
+; X32-SSE-NEXT: movdqa %xmm4, %xmm5
+; X32-SSE-NEXT: pandn %xmm0, %xmm5
+; X32-SSE-NEXT: psraw $2, %xmm0
+; X32-SSE-NEXT: pand %xmm4, %xmm0
+; X32-SSE-NEXT: por %xmm5, %xmm0
+; X32-SSE-NEXT: paddw %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtw %xmm3, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psraw $1, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: psrlw $8, %xmm0
+; X32-SSE-NEXT: packuswb %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = ashr <16 x i8> %a, %splat
ret <16 x i8> %shift
@@ -635,46 +1016,83 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; Constant Shifts
;
-define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
; SSE2-LABEL: constant_shift_v2i64:
; SSE2: # BB#0:
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: sarq %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: sarq $7, %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrlq $7, %xmm1
+; SSE2-NEXT: psrlq $1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd {{.*#+}} xmm0 = [4611686018427387904,72057594037927936]
+; SSE2-NEXT: xorpd %xmm0, %xmm1
+; SSE2-NEXT: psubq %xmm0, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: constant_shift_v2i64:
; SSE41: # BB#0:
-; SSE41-NEXT: pextrq $1, %xmm0, %rax
-; SSE41-NEXT: sarq $7, %rax
-; SSE41-NEXT: movd %rax, %xmm1
-; SSE41-NEXT: movd %xmm0, %rax
-; SSE41-NEXT: sarq %rax
-; SSE41-NEXT: movd %rax, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrlq $7, %xmm1
+; SSE41-NEXT: psrlq $1, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4611686018427387904,72057594037927936]
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: psubq %xmm1, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: constant_shift_v2i64:
-; AVX: # BB#0:
-; AVX-NEXT: vpextrq $1, %xmm0, %rax
-; AVX-NEXT: sarq $7, %rax
-; AVX-NEXT: vmovq %rax, %xmm1
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: sarq %rax
-; AVX-NEXT: vmovq %rax, %xmm0
-; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX-NEXT: retq
+; AVX1-LABEL: constant_shift_v2i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm1
+; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [4611686018427387904,72057594037927936]
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: constant_shift_v2i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [4611686018427387904,72057594037927936]
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsubq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; XOP-LABEL: constant_shift_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v2i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [4611686018427387904,72057594037927936]
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: constant_shift_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psrlq $7, %xmm2
+; X32-SSE-NEXT: psrlq $1, %xmm1
+; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psrlq $7, %xmm1
+; X32-SSE-NEXT: psrlq $1, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT: xorpd %xmm2, %xmm1
+; X32-SSE-NEXT: psubq %xmm2, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%shift = ashr <2 x i64> %a, <i64 1, i64 7>
ret <2 x i64> %shift
}
-define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
; SSE2-LABEL: constant_shift_v4i32:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -720,11 +1138,42 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v4i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshad {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v4i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: constant_shift_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psrad $7, %xmm1
+; X32-SSE-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE-NEXT: psrad $5, %xmm2
+; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE-NEXT: psrad $6, %xmm2
+; X32-SSE-NEXT: psrad $4, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE-NEXT: retl
%shift = ashr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
ret <4 x i32> %shift
}
-define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
; SSE2-LABEL: constant_shift_v8i16:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -789,11 +1238,41 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
+;
+; XOP-LABEL: constant_shift_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpshaw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
+; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: constant_shift_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psraw $4, %xmm1
+; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
+; X32-SSE-NEXT: psraw $2, %xmm1
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3]
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [65535,0,65535,0,65535,0,65535,0]
+; X32-SSE-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE-NEXT: pand %xmm0, %xmm1
+; X32-SSE-NEXT: psraw $1, %xmm2
+; X32-SSE-NEXT: pandn %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
}
-define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
; SSE2-LABEL: constant_shift_v16i8:
; SSE2: # BB#0:
; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
@@ -918,6 +1397,101 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: constant_shift_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX512-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512-NEXT: vpsraw $4, %xmm3, %xmm4
+; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX512-NEXT: vpsraw $2, %xmm3, %xmm4
+; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX512-NEXT: vpsraw $1, %xmm3, %xmm4
+; AVX512-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
+; AVX512-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512-NEXT: vpsraw $4, %xmm0, %xmm3
+; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: vpsraw $2, %xmm0, %xmm3
+; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: vpsraw $1, %xmm0, %xmm3
+; AVX512-NEXT: vpaddw %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX512-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: constant_shift_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; X32-SSE-NEXT: psllw $5, %xmm3
+; X32-SSE-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15]
+; X32-SSE-NEXT: pxor %xmm2, %xmm2
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm6
+; X32-SSE-NEXT: pandn %xmm1, %xmm6
+; X32-SSE-NEXT: psraw $4, %xmm1
+; X32-SSE-NEXT: pand %xmm5, %xmm1
+; X32-SSE-NEXT: por %xmm6, %xmm1
+; X32-SSE-NEXT: paddw %xmm4, %xmm4
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm6
+; X32-SSE-NEXT: pandn %xmm1, %xmm6
+; X32-SSE-NEXT: psraw $2, %xmm1
+; X32-SSE-NEXT: pand %xmm5, %xmm1
+; X32-SSE-NEXT: por %xmm6, %xmm1
+; X32-SSE-NEXT: paddw %xmm4, %xmm4
+; X32-SSE-NEXT: pxor %xmm5, %xmm5
+; X32-SSE-NEXT: pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT: movdqa %xmm5, %xmm4
+; X32-SSE-NEXT: pandn %xmm1, %xmm4
+; X32-SSE-NEXT: psraw $1, %xmm1
+; X32-SSE-NEXT: pand %xmm5, %xmm1
+; X32-SSE-NEXT: por %xmm4, %xmm1
+; X32-SSE-NEXT: psrlw $8, %xmm1
+; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT: pxor %xmm4, %xmm4
+; X32-SSE-NEXT: pcmpgtw %xmm3, %xmm4
+; X32-SSE-NEXT: movdqa %xmm4, %xmm5
+; X32-SSE-NEXT: pandn %xmm0, %xmm5
+; X32-SSE-NEXT: psraw $4, %xmm0
+; X32-SSE-NEXT: pand %xmm4, %xmm0
+; X32-SSE-NEXT: por %xmm5, %xmm0
+; X32-SSE-NEXT: paddw %xmm3, %xmm3
+; X32-SSE-NEXT: pxor %xmm4, %xmm4
+; X32-SSE-NEXT: pcmpgtw %xmm3, %xmm4
+; X32-SSE-NEXT: movdqa %xmm4, %xmm5
+; X32-SSE-NEXT: pandn %xmm0, %xmm5
+; X32-SSE-NEXT: psraw $2, %xmm0
+; X32-SSE-NEXT: pand %xmm4, %xmm0
+; X32-SSE-NEXT: por %xmm5, %xmm0
+; X32-SSE-NEXT: paddw %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtw %xmm3, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psraw $1, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: psrlw $8, %xmm0
+; X32-SSE-NEXT: packuswb %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
}
@@ -926,7 +1500,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; Uniform Constant Shifts
;
-define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind {
; SSE2-LABEL: splatconstant_shift_v2i64:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -958,11 +1532,35 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
; AVX2-NEXT: vpsrlq $7, %xmm0, %xmm0
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; AVX2-NEXT: retq
+;
+; XOP-LABEL: splatconstant_shift_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v2i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrad $7, %xmm0, %xmm1
+; AVX512-NEXT: vpsrlq $7, %xmm0, %xmm0
+; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psrad $7, %xmm1
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; X32-SSE-NEXT: psrlq $7, %xmm0
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE-NEXT: retl
%shift = ashr <2 x i64> %a, <i64 7, i64 7>
ret <2 x i64> %shift
}
-define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind {
; SSE-LABEL: splatconstant_shift_v4i32:
; SSE: # BB#0:
; SSE-NEXT: psrad $5, %xmm0
@@ -972,11 +1570,26 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
; AVX: # BB#0:
; AVX-NEXT: vpsrad $5, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_shift_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpsrad $5, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrad $5, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psrad $5, %xmm0
+; X32-SSE-NEXT: retl
%shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
ret <4 x i32> %shift
}
-define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind {
; SSE-LABEL: splatconstant_shift_v8i16:
; SSE: # BB#0:
; SSE-NEXT: psraw $3, %xmm0
@@ -986,11 +1599,26 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
; AVX: # BB#0:
; AVX-NEXT: vpsraw $3, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_shift_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpsraw $3, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsraw $3, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psraw $3, %xmm0
+; X32-SSE-NEXT: retl
%shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %shift
}
-define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
; SSE-LABEL: splatconstant_shift_v16i8:
; SSE: # BB#0:
; SSE-NEXT: psrlw $3, %xmm0
@@ -1008,6 +1636,31 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_shift_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psrlw $3, %xmm0
+; X32-SSE-NEXT: pand .LCPI15_0, %xmm0
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; X32-SSE-NEXT: pxor %xmm1, %xmm0
+; X32-SSE-NEXT: psubb %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %shift
}
diff --git a/test/CodeGen/X86/vector-shift-ashr-256.ll b/test/CodeGen/X86/vector-shift-ashr-256.ll
index e4642558e0e4..0b9c318da047 100644
--- a/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -1,65 +1,83 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
;
; Variable Shifts
;
-define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-LABEL: var_shift_v4i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpextrq $1, %xmm2, %rax
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpextrq $1, %xmm3, %rcx
-; AVX1-NEXT: sarq %cl, %rax
-; AVX1-NEXT: vmovq %rax, %xmm4
-; AVX1-NEXT: vmovq %xmm2, %rax
-; AVX1-NEXT: vmovq %xmm3, %rcx
-; AVX1-NEXT: sarq %cl, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
-; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: vpextrq $1, %xmm1, %rcx
-; AVX1-NEXT: sarq %cl, %rax
-; AVX1-NEXT: vmovq %rax, %xmm3
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: vmovq %xmm1, %rcx
-; AVX1-NEXT: sarq %cl, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
+; AVX1-NEXT: vpsrlq %xmm5, %xmm3, %xmm6
+; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm6[4,5,6,7]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpsrlq %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vpsrlq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm5[4,5,6,7]
+; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpsubq %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm4
+; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
+; AVX1-NEXT: vpsrlq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpsrlq %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: var_shift_v4i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT: vpextrq $1, %xmm2, %rax
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpextrq $1, %xmm3, %rcx
-; AVX2-NEXT: sarq %cl, %rax
-; AVX2-NEXT: vmovq %rax, %xmm4
-; AVX2-NEXT: vmovq %xmm2, %rax
-; AVX2-NEXT: vmovq %xmm3, %rcx
-; AVX2-NEXT: sarq %cl, %rax
-; AVX2-NEXT: vmovq %rax, %xmm2
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
-; AVX2-NEXT: vpextrq $1, %xmm0, %rax
-; AVX2-NEXT: vpextrq $1, %xmm1, %rcx
-; AVX2-NEXT: sarq %cl, %rax
-; AVX2-NEXT: vmovq %rax, %xmm3
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: vmovq %xmm1, %rcx
-; AVX2-NEXT: sarq %cl, %rax
-; AVX2-NEXT: vmovq %rax, %xmm0
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT: vpshaq %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3
+; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsubq %ymm3, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX512-NEXT: vpsrlvq %ymm1, %ymm2, %ymm3
+; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsubq %ymm3, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = ashr <4 x i64> %a, %b
ret <4 x i64> %shift
}
-define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: var_shift_v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -94,11 +112,33 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT: vpshad %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT: vpsubd %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT: vpshad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = ashr <8 x i32> %a, %b
ret <8 x i32> %shift
}
-define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: var_shift_v16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
@@ -147,11 +187,40 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT: vpshaw %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT: vpshaw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT: vpsubw %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT: vpshaw %xmm2, %xmm4, %xmm2
+; XOPAVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; XOPAVX2-NEXT: vpshaw %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retq
%shift = ashr <16 x i16> %a, %b
ret <16 x i16> %shift
}
-define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: var_shift_v32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
@@ -234,6 +303,58 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT: vpshab %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT: vpsubb %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT: vpshab %xmm2, %xmm4, %xmm2
+; XOPAVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1
+; XOPAVX2-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512-NEXT: vpsraw $4, %ymm3, %ymm4
+; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512-NEXT: vpsraw $2, %ymm3, %ymm4
+; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512-NEXT: vpsraw $1, %ymm3, %ymm4
+; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX512-NEXT: vpsrlw $8, %ymm2, %ymm2
+; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512-NEXT: vpsraw $4, %ymm0, %ymm3
+; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT: vpsraw $2, %ymm0, %ymm3
+; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT: vpsraw $1, %ymm0, %ymm3
+; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX512-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = ashr <32 x i8> %a, %b
ret <32 x i8> %shift
}
@@ -242,65 +363,64 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; Uniform Variable Shifts
;
-define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v4i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpextrq $1, %xmm2, %rdx
-; AVX1-NEXT: vpextrq $1, %xmm1, %rax
-; AVX1-NEXT: movb %al, %cl
-; AVX1-NEXT: sarq %cl, %rdx
-; AVX1-NEXT: vmovq %rdx, %xmm3
-; AVX1-NEXT: vmovq %xmm2, %rsi
-; AVX1-NEXT: vmovq %xmm1, %rdx
-; AVX1-NEXT: movb %dl, %cl
-; AVX1-NEXT: sarq %cl, %rsi
-; AVX1-NEXT: vmovq %rsi, %xmm1
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; AVX1-NEXT: vpextrq $1, %xmm0, %rsi
-; AVX1-NEXT: movb %al, %cl
-; AVX1-NEXT: sarq %cl, %rsi
-; AVX1-NEXT: vmovq %rsi, %xmm2
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: movb %dl, %cl
-; AVX1-NEXT: sarq %cl, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splatvar_shift_v4i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT: vpextrq $1, %xmm2, %rax
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpextrq $1, %xmm3, %rcx
-; AVX2-NEXT: sarq %cl, %rax
-; AVX2-NEXT: vmovq %rax, %xmm4
-; AVX2-NEXT: vmovq %xmm2, %rax
-; AVX2-NEXT: vmovq %xmm3, %rcx
-; AVX2-NEXT: sarq %cl, %rax
-; AVX2-NEXT: vmovq %rax, %xmm2
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
-; AVX2-NEXT: vpextrq $1, %xmm0, %rax
-; AVX2-NEXT: vpextrq $1, %xmm1, %rcx
-; AVX2-NEXT: sarq %cl, %rax
-; AVX2-NEXT: vmovq %rax, %xmm3
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: vmovq %xmm1, %rcx
-; AVX2-NEXT: sarq %cl, %rax
-; AVX2-NEXT: vmovq %rax, %xmm0
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshaq %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX512-NEXT: vpsrlq %xmm1, %ymm2, %ymm2
+; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: retq
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
%shift = ashr <4 x i64> %a, %splat
ret <4 x i64> %shift
}
-define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
@@ -317,12 +437,36 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsrad %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOPAVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vpsrad %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i32> %a, %splat
ret <8 x i32> %shift
}
-define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -341,12 +485,39 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vmovd %xmm1, %eax
+; XOPAVX1-NEXT: movzwl %ax, %eax
+; XOPAVX1-NEXT: vmovd %eax, %xmm1
+; XOPAVX1-NEXT: vpsraw %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vmovd %xmm1, %eax
+; XOPAVX2-NEXT: movzwl %ax, %eax
+; XOPAVX2-NEXT: vmovd %eax, %xmm1
+; XOPAVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovd %xmm1, %eax
+; AVX512-NEXT: movzwl %ax, %eax
+; AVX512-NEXT: vmovd %eax, %xmm1
+; AVX512-NEXT: vpsraw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
%shift = ashr <16 x i16> %a, %splat
ret <16 x i16> %shift
}
-define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
@@ -424,6 +595,59 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshab %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %ymm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT: vpshab %xmm2, %xmm4, %xmm2
+; XOPAVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1
+; XOPAVX2-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512-NEXT: vpsraw $4, %ymm3, %ymm4
+; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512-NEXT: vpsraw $2, %ymm3, %ymm4
+; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512-NEXT: vpsraw $1, %ymm3, %ymm4
+; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX512-NEXT: vpsrlw $8, %ymm2, %ymm2
+; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512-NEXT: vpsraw $4, %ymm0, %ymm3
+; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT: vpsraw $2, %ymm0, %ymm3
+; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT: vpsraw $1, %ymm0, %ymm3
+; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX512-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: retq
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
%shift = ashr <32 x i8> %a, %splat
ret <32 x i8> %shift
@@ -433,51 +657,64 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; Constant Shifts
;
-define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
; AVX1-LABEL: constant_shift_v4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrq $1, %xmm1, %rax
-; AVX1-NEXT: sarq $62, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vmovq %xmm1, %rax
-; AVX1-NEXT: sarq $31, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: sarq $7, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: sarq %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vpsrlq $62, %xmm1, %xmm2
+; AVX1-NEXT: vpsrlq $31, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4294967296,2]
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsubq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm2
+; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4611686018427387904,72057594037927936]
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: constant_shift_v4i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpextrq $1, %xmm1, %rax
-; AVX2-NEXT: sarq $62, %rax
-; AVX2-NEXT: vmovq %rax, %xmm2
-; AVX2-NEXT: vmovq %xmm1, %rax
-; AVX2-NEXT: sarq $31, %rax
-; AVX2-NEXT: vmovq %rax, %xmm1
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX2-NEXT: vpextrq $1, %xmm0, %rax
-; AVX2-NEXT: sarq $7, %rax
-; AVX2-NEXT: vmovq %rax, %xmm2
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: sarq %rax
-; AVX2-NEXT: vmovq %rax, %xmm0
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2]
+; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT: vpshaq %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2]
+; XOPAVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2]
+; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
ret <4 x i64> %shift
}
-define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
; AVX1-LABEL: constant_shift_v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vpsrad $7, %xmm0, %xmm1
@@ -500,11 +737,29 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshad {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpshad {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
ret <8 x i32> %shift
}
-define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX1-LABEL: constant_shift_v16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -549,11 +804,39 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT: vpshaw %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpshaw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT: vpshaw %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpshaw %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retq
%shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
ret <16 x i16> %shift
}
-define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX1-LABEL: constant_shift_v32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
@@ -630,6 +913,55 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshab %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; XOPAVX2-NEXT: vpshab %xmm1, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512-NEXT: vpsraw $4, %ymm3, %ymm4
+; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512-NEXT: vpsraw $2, %ymm3, %ymm4
+; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512-NEXT: vpsraw $1, %ymm3, %ymm4
+; AVX512-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX512-NEXT: vpsrlw $8, %ymm2, %ymm2
+; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512-NEXT: vpsraw $4, %ymm0, %ymm3
+; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT: vpsraw $2, %ymm0, %ymm3
+; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT: vpsraw $1, %ymm0, %ymm3
+; AVX512-NEXT: vpaddw %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX512-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <32 x i8> %shift
}
@@ -638,7 +970,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; Uniform Constant Shifts
;
-define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
; AVX1-LABEL: splatconstant_shift_v4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -657,11 +989,36 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
; AVX2-NEXT: vpsrlq $7, %ymm0, %ymm0
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshaq %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlq $7, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
+; XOPAVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrad $7, %ymm0, %ymm1
+; AVX512-NEXT: vpsrlq $7, %ymm0, %ymm0
+; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX512-NEXT: retq
%shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
ret <4 x i64> %shift
}
-define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
; AVX1-LABEL: splatconstant_shift_v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vpsrad $5, %xmm0, %xmm1
@@ -674,11 +1031,29 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsrad $5, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpsrad $5, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpsrad $5, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrad $5, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrad $5, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <8 x i32> %shift
}
-define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX1-LABEL: splatconstant_shift_v16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vpsraw $3, %xmm0, %xmm1
@@ -691,11 +1066,29 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsraw $3, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpsraw $3, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpsraw $3, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsraw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsraw $3, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
-define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX1-LABEL: splatconstant_shift_v32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -720,6 +1113,34 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshab %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; XOPAVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlw $3, %ymm0, %ymm0
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}
diff --git a/test/CodeGen/X86/vector-shift-ashr-512.ll b/test/CodeGen/X86/vector-shift-ashr-512.ll
new file mode 100644
index 000000000000..147e58f4710e
--- /dev/null
+++ b/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -0,0 +1,378 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+;
+; Variable Shifts
+;
+
+define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: var_shift_v8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsravq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = ashr <8 x i64> %a, %b
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: var_shift_v16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsravd %zmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = ashr <16 x i32> %a, %b
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: var_shift_v32i16:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15]
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm6 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT: vpsravd %ymm5, %ymm6, %ymm5
+; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11]
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT: vpsravd %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15]
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT: vpsravd %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT: vpsrld $16, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11]
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT: vpsravd %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpackusdw %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: var_shift_v32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %shift = ashr <32 x i16> %a, %b
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: var_shift_v64i8:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
+; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm6
+; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm6, %ymm5, %ymm5
+; AVX512DQ-NEXT: vpsraw $2, %ymm5, %ymm6
+; AVX512DQ-NEXT: vpaddw %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm6, %ymm5, %ymm5
+; AVX512DQ-NEXT: vpsraw $1, %ymm5, %ymm6
+; AVX512DQ-NEXT: vpaddw %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm6, %ymm5, %ymm4
+; AVX512DQ-NEXT: vpsrlw $8, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
+; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQ-NEXT: vpsraw $4, %ymm0, %ymm5
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5
+; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5
+; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsllw $5, %ymm3, %ymm2
+; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
+; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
+; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpsraw $2, %ymm4, %ymm5
+; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpsraw $1, %ymm4, %ymm5
+; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
+; AVX512DQ-NEXT: vpsrlw $8, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
+; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512DQ-NEXT: vpsraw $4, %ymm1, %ymm4
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsraw $2, %ymm1, %ymm4
+; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsraw $1, %ymm1, %ymm4
+; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+ %shift = ashr <64 x i8> %a, %b
+ ret <64 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: splatvar_shift_v8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsraq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+ %shift = ashr <8 x i64> %a, %splat
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: splatvar_shift_v16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; ALL-NEXT: vmovss %xmm1, %xmm2, %xmm1
+; ALL-NEXT: vpsrad %xmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+ %shift = ashr <16 x i32> %a, %splat
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_shift_v32i16:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vmovd %xmm2, %eax
+; AVX512DQ-NEXT: movzwl %ax, %eax
+; AVX512DQ-NEXT: vmovd %eax, %xmm2
+; AVX512DQ-NEXT: vpsraw %xmm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsraw %xmm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_shift_v32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vmovd %xmm1, %eax
+; AVX512BW-NEXT: movzwl %ax, %eax
+; AVX512BW-NEXT: vmovd %eax, %xmm1
+; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+ %shift = ashr <32 x i16> %a, %splat
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_shift_v64i8:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
+; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
+; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpsraw $2, %ymm4, %ymm5
+; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm6
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpsraw $1, %ymm4, %ymm5
+; AVX512DQ-NEXT: vpaddw %ymm6, %ymm6, %ymm7
+; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpsrlw $8, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
+; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQ-NEXT: vpsraw $4, %ymm0, %ymm5
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5
+; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8
+; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5
+; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9
+; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
+; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
+; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpsrlw $8, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512DQ-NEXT: vpsraw $4, %ymm1, %ymm4
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsraw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsraw $1, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+ %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+ %shift = ashr <64 x i8> %a, %splat
+ ret <64 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
+; ALL-LABEL: constant_shift_v8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsravq {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = ashr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
+; ALL-LABEL: constant_shift_v16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsravd {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
+; AVX512DQ-LABEL: constant_shift_v32i16:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15]
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT: vpsravd %ymm4, %ymm5, %ymm5
+; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11]
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT: vpsravd %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT: vpsravd %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpsrld $16, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT: vpsravd %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpackusdw %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: constant_shift_v32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
+; AVX512DQ-LABEL: constant_shift_v64i8:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
+; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
+; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpsraw $2, %ymm4, %ymm5
+; AVX512DQ-NEXT: vpaddw %ymm3, %ymm3, %ymm6
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpsraw $1, %ymm4, %ymm5
+; AVX512DQ-NEXT: vpaddw %ymm6, %ymm6, %ymm7
+; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpsrlw $8, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
+; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQ-NEXT: vpsraw $4, %ymm0, %ymm5
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5
+; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8
+; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5
+; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9
+; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5
+; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
+; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4
+; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpsrlw $8, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512DQ-NEXT: vpsraw $4, %ymm1, %ymm4
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsraw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsraw $1, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpackuswb %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+ %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <64 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
+; ALL-LABEL: splatconstant_shift_v8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsraq $7, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
+; ALL-LABEL: splatconstant_shift_v16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsrad $5, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
+; AVX512DQ-LABEL: splatconstant_shift_v32i16:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpsraw $3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsraw $3, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_shift_v32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsraw $3, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
+; AVX512DQ-LABEL: splatconstant_shift_v64i8:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
+; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512DQ-NEXT: vpxor %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsubb %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpxor %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsubb %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_shift_v64i8:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <64 x i8> %shift
+}
diff --git a/test/CodeGen/X86/vector-shift-lshr-128.ll b/test/CodeGen/X86/vector-shift-lshr-128.ll
index ca55800e2713..86e54612ae74 100644
--- a/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -1,13 +1,20 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+;
+; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE2
;
; Variable Shifts
;
-define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE2-LABEL: var_shift_v2i64:
; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
@@ -39,11 +46,39 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v2i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v2i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v2i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: var_shift_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE-NEXT: psrlq %xmm3, %xmm2
+; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
+; X32-SSE-NEXT: psrlq %xmm1, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; X32-SSE-NEXT: movapd %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%shift = lshr <2 x i64> %a, %b
ret <2 x i64> %shift
}
-define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE2-LABEL: var_shift_v4i32:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa %xmm1, %xmm2
@@ -109,11 +144,52 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v4i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v4i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: var_shift_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psrldq {{.*#+}} xmm2 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X32-SSE-NEXT: movdqa %xmm0, %xmm3
+; X32-SSE-NEXT: psrld %xmm2, %xmm3
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psrlq $32, %xmm2
+; X32-SSE-NEXT: movdqa %xmm0, %xmm4
+; X32-SSE-NEXT: psrld %xmm2, %xmm4
+; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: movdqa %xmm1, %xmm4
+; X32-SSE-NEXT: punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm5
+; X32-SSE-NEXT: psrld %xmm4, %xmm5
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE-NEXT: psrld %xmm1, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm5 = xmm0[0],xmm5[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,2,2,3]
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE-NEXT: retl
%shift = lshr <4 x i32> %a, %b
ret <4 x i32> %shift
}
-define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE2-LABEL: var_shift_v8i16:
; SSE2: # BB#0:
; SSE2-NEXT: psllw $12, %xmm1
@@ -206,11 +282,58 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
+;
+; XOP-LABEL: var_shift_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: var_shift_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psllw $12, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psraw $15, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psrlw $8, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psraw $15, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psrlw $4, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psraw $15, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psrlw $2, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: psraw $15, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: pandn %xmm0, %xmm2
+; X32-SSE-NEXT: psrlw $1, %xmm0
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%shift = lshr <8 x i16> %a, %b
ret <8 x i16> %shift
}
-define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; SSE2-LABEL: var_shift_v16i8:
; SSE2: # BB#0:
; SSE2-NEXT: psllw $5, %xmm1
@@ -281,6 +404,60 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: var_shift_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $2, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: var_shift_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psllw $5, %xmm1
+; X32-SSE-NEXT: pxor %xmm2, %xmm2
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psrlw $4, %xmm0
+; X32-SSE-NEXT: pand .LCPI3_0, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: paddb %xmm1, %xmm1
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psrlw $2, %xmm0
+; X32-SSE-NEXT: pand .LCPI3_1, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: paddb %xmm1, %xmm1
+; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE-NEXT: pandn %xmm0, %xmm1
+; X32-SSE-NEXT: psrlw $1, %xmm0
+; X32-SSE-NEXT: pand .LCPI3_2, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%shift = lshr <16 x i8> %a, %b
ret <16 x i8> %shift
}
@@ -289,7 +466,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; Uniform Variable Shifts
;
-define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-LABEL: splatvar_shift_v2i64:
; SSE: # BB#0:
; SSE-NEXT: psrlq %xmm1, %xmm0
@@ -299,12 +476,28 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatvar_shift_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v2i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatvar_shift_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
+; X32-SSE-NEXT: psrlq %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
%shift = lshr <2 x i64> %a, %splat
ret <2 x i64> %shift
}
-define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE2-LABEL: splatvar_shift_v4i32:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm2, %xmm2
@@ -325,12 +518,33 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatvar_shift_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatvar_shift_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: xorps %xmm2, %xmm2
+; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; X32-SSE-NEXT: psrld %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
%shift = lshr <4 x i32> %a, %splat
ret <4 x i32> %shift
}
-define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE2-LABEL: splatvar_shift_v8i16:
; SSE2: # BB#0:
; SSE2-NEXT: movd %xmm1, %eax
@@ -352,12 +566,34 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatvar_shift_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatvar_shift_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movd %xmm1, %eax
+; X32-SSE-NEXT: movzwl %ax, %eax
+; X32-SSE-NEXT: movd %eax, %xmm1
+; X32-SSE-NEXT: psrlw %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = lshr <8 x i16> %a, %splat
ret <8 x i16> %shift
}
-define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; SSE2-LABEL: splatvar_shift_v16i8:
; SSE2: # BB#0:
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -454,6 +690,74 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; AVX2-NEXT: vpaddb %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v16i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v16i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $2, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatvar_shift_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; X32-SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,4,4,4]
+; X32-SSE-NEXT: psllw $5, %xmm2
+; X32-SSE-NEXT: pxor %xmm1, %xmm1
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psrlw $4, %xmm0
+; X32-SSE-NEXT: pand .LCPI7_0, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: paddb %xmm2, %xmm2
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psrlw $2, %xmm0
+; X32-SSE-NEXT: pand .LCPI7_1, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: paddb %xmm2, %xmm2
+; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: pandn %xmm0, %xmm2
+; X32-SSE-NEXT: psrlw $1, %xmm0
+; X32-SSE-NEXT: pand .LCPI7_2, %xmm0
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = lshr <16 x i8> %a, %splat
ret <16 x i8> %shift
@@ -463,7 +767,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; Constant Shifts
;
-define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
; SSE2-LABEL: constant_shift_v2i64:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -492,61 +796,118 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v2i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v2i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v2i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: constant_shift_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psrlq $7, %xmm1
+; X32-SSE-NEXT: psrlq $1, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT: movapd %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%shift = lshr <2 x i64> %a, <i64 1, i64 7>
ret <2 x i64> %shift
}
-define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
; SSE2-LABEL: constant_shift_v4i32:
; SSE2: # BB#0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $7, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrld $5, %xmm2
-; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrld $6, %xmm2
-; SSE2-NEXT: psrld $4, %xmm0
-; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: retq
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrld $7, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrld $5, %xmm2
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrld $6, %xmm2
+; SSE2-NEXT: psrld $4, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: retq
;
; SSE41-LABEL: constant_shift_v4i32:
-; SSE41: # BB#0:
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $7, %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psrld $5, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrld $6, %xmm1
-; SSE41-NEXT: psrld $4, %xmm0
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; SSE41-NEXT: retq
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrld $7, %xmm1
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: psrld $5, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrld $6, %xmm1
+; SSE41-NEXT: psrld $4, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; SSE41-NEXT: retq
;
; AVX1-LABEL: constant_shift_v4i32:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpsrld $7, %xmm0, %xmm1
-; AVX1-NEXT: vpsrld $5, %xmm0, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; AVX1-NEXT: vpsrld $6, %xmm0, %xmm2
-; AVX1-NEXT: vpsrld $4, %xmm0, %xmm0
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT: retq
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsrld $7, %xmm0, %xmm1
+; AVX1-NEXT: vpsrld $5, %xmm0, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpsrld $6, %xmm0, %xmm2
+; AVX1-NEXT: vpsrld $4, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; AVX1-NEXT: retq
;
; AVX2-LABEL: constant_shift_v4i32:
; AVX2: # BB#0:
; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v4i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v4i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: constant_shift_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psrld $7, %xmm1
+; X32-SSE-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE-NEXT: psrld $5, %xmm2
+; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE-NEXT: psrld $6, %xmm2
+; X32-SSE-NEXT: psrld $4, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE-NEXT: retl
%shift = lshr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
ret <4 x i32> %shift
}
-define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
; SSE2-LABEL: constant_shift_v8i16:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -611,11 +972,41 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
+;
+; XOP-LABEL: constant_shift_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
+; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: constant_shift_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psrlw $4, %xmm1
+; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
+; X32-SSE-NEXT: psrlw $2, %xmm1
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3]
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm0 = [65535,0,65535,0,65535,0,65535,0]
+; X32-SSE-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE-NEXT: pand %xmm0, %xmm1
+; X32-SSE-NEXT: psrlw $1, %xmm2
+; X32-SSE-NEXT: pandn %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
}
-define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
; SSE2-LABEL: constant_shift_v16i8:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
@@ -686,6 +1077,62 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: constant_shift_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $2, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: constant_shift_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; X32-SSE-NEXT: psllw $5, %xmm2
+; X32-SSE-NEXT: pxor %xmm1, %xmm1
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psrlw $4, %xmm0
+; X32-SSE-NEXT: pand .LCPI11_1, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: paddb %xmm2, %xmm2
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psrlw $2, %xmm0
+; X32-SSE-NEXT: pand .LCPI11_2, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: paddb %xmm2, %xmm2
+; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: pandn %xmm0, %xmm2
+; X32-SSE-NEXT: psrlw $1, %xmm0
+; X32-SSE-NEXT: pand .LCPI11_3, %xmm0
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
}
@@ -694,7 +1141,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; Uniform Constant Shifts
;
-define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind {
; SSE-LABEL: splatconstant_shift_v2i64:
; SSE: # BB#0:
; SSE-NEXT: psrlq $7, %xmm0
@@ -704,11 +1151,26 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $7, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_shift_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpsrlq $7, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v2i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlq $7, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psrlq $7, %xmm0
+; X32-SSE-NEXT: retl
%shift = lshr <2 x i64> %a, <i64 7, i64 7>
ret <2 x i64> %shift
}
-define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind {
; SSE-LABEL: splatconstant_shift_v4i32:
; SSE: # BB#0:
; SSE-NEXT: psrld $5, %xmm0
@@ -718,11 +1180,26 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
; AVX: # BB#0:
; AVX-NEXT: vpsrld $5, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_shift_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpsrld $5, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrld $5, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psrld $5, %xmm0
+; X32-SSE-NEXT: retl
%shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
ret <4 x i32> %shift
}
-define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind {
; SSE-LABEL: splatconstant_shift_v8i16:
; SSE: # BB#0:
; SSE-NEXT: psrlw $3, %xmm0
@@ -732,11 +1209,26 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_shift_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpsrlw $3, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psrlw $3, %xmm0
+; X32-SSE-NEXT: retl
%shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %shift
}
-define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
; SSE-LABEL: splatconstant_shift_v16i8:
; SSE: # BB#0:
; SSE-NEXT: psrlw $3, %xmm0
@@ -748,6 +1240,25 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
; AVX-NEXT: vpsrlw $3, %xmm0, %xmm0
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_shift_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psrlw $3, %xmm0
+; X32-SSE-NEXT: pand .LCPI15_0, %xmm0
+; X32-SSE-NEXT: retl
%shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %shift
}
diff --git a/test/CodeGen/X86/vector-shift-lshr-256.ll b/test/CodeGen/X86/vector-shift-lshr-256.ll
index bb0cceed7720..ecc68cf2e278 100644
--- a/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -1,11 +1,14 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
;
; Variable Shifts
;
-define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-LABEL: var_shift_v4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
@@ -25,11 +28,33 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT: vpshlq %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT: vpsubq %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = lshr <4 x i64> %a, %b
ret <4 x i64> %shift
}
-define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: var_shift_v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -64,11 +89,33 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT: vpshld %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT: vpsubd %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = lshr <8 x i32> %a, %b
ret <8 x i32> %shift
}
-define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: var_shift_v16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
@@ -117,11 +164,40 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT: vpsubw %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT: vpshlw %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT: vpsubw %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT: vpshlw %xmm2, %xmm4, %xmm2
+; XOPAVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1
+; XOPAVX2-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retq
%shift = lshr <16 x i16> %a, %b
ret <16 x i16> %shift
}
-define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: var_shift_v32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -171,6 +247,46 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT: vpshlb %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT: vpsubb %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT: vpshlb %xmm2, %xmm4, %xmm2
+; XOPAVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpsrlw $2, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = lshr <32 x i8> %a, %b
ret <32 x i8> %shift
}
@@ -179,7 +295,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; Uniform Variable Shifts
;
-define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -192,12 +308,30 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
%shift = lshr <4 x i64> %a, %splat
ret <4 x i64> %shift
}
-define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
@@ -214,12 +348,36 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsrld %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOPAVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vpsrld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = lshr <8 x i32> %a, %splat
ret <8 x i32> %shift
}
-define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -238,12 +396,39 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vmovd %xmm1, %eax
+; XOPAVX1-NEXT: movzwl %ax, %eax
+; XOPAVX1-NEXT: vmovd %eax, %xmm1
+; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vmovd %xmm1, %eax
+; XOPAVX2-NEXT: movzwl %ax, %eax
+; XOPAVX2-NEXT: vmovd %eax, %xmm1
+; XOPAVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovd %xmm1, %eax
+; AVX512-NEXT: movzwl %ax, %eax
+; AVX512-NEXT: vmovd %eax, %xmm1
+; AVX512-NEXT: vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
%shift = lshr <16 x i16> %a, %splat
ret <16 x i16> %shift
}
-define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
@@ -292,6 +477,47 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %ymm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT: vpsubb %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT: vpshlb %xmm2, %xmm4, %xmm2
+; XOPAVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpsrlw $2, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: retq
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
%shift = lshr <32 x i8> %a, %splat
ret <32 x i8> %shift
@@ -301,7 +527,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; Constant Shifts
;
-define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
; AVX1-LABEL: constant_shift_v4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -318,11 +544,32 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT: vpshlq %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
ret <4 x i64> %shift
}
-define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
; AVX1-LABEL: constant_shift_v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vpsrld $7, %xmm0, %xmm1
@@ -345,11 +592,29 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
ret <8 x i32> %shift
}
-define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX1-LABEL: constant_shift_v16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -394,11 +659,39 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT: vpshlw %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT: vpshlw %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vpsubw {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retq
%shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
ret <16 x i16> %shift
}
-define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX1-LABEL: constant_shift_v32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -446,6 +739,43 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT: vpsrlw $4, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpsrlw $2, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <32 x i8> %shift
}
@@ -454,7 +784,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; Uniform Constant Shifts
;
-define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
; AVX1-LABEL: splatconstant_shift_v4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm1
@@ -467,11 +797,29 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsrlq $7, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpsrlq $7, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpsrlq $7, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlq $7, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlq $7, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
ret <4 x i64> %shift
}
-define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
; AVX1-LABEL: splatconstant_shift_v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vpsrld $5, %xmm0, %xmm1
@@ -484,11 +832,29 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsrld $5, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpsrld $5, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpsrld $5, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrld $5, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrld $5, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <8 x i32> %shift
}
-define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX1-LABEL: splatconstant_shift_v16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vpsrlw $3, %xmm0, %xmm1
@@ -501,11 +867,29 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpsrlw $3, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpsrlw $3, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlw $3, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
-define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX1-LABEL: splatconstant_shift_v32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -522,6 +906,28 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
; AVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsrlw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsrlw $3, %ymm0, %ymm0
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}
diff --git a/test/CodeGen/X86/vector-shift-lshr-512.ll b/test/CodeGen/X86/vector-shift-lshr-512.ll
new file mode 100644
index 000000000000..68644e61b0e5
--- /dev/null
+++ b/test/CodeGen/X86/vector-shift-lshr-512.ll
@@ -0,0 +1,317 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Variable Shifts
+;
+
+define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: var_shift_v8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = lshr <8 x i64> %a, %b
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: var_shift_v16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = lshr <16 x i32> %a, %b
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: var_shift_v32i16:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15]
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm6 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT: vpsrlvd %ymm5, %ymm6, %ymm5
+; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11]
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15]
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT: vpsrld $16, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11]
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT: vpsrlvd %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpackusdw %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: var_shift_v32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %shift = lshr <32 x i16> %a, %b
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: var_shift_v64i8:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm4
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT: vpand %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm4
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX512DQ-NEXT: vpand %ymm6, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm4
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX512DQ-NEXT: vpand %ymm7, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsllw $5, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpand %ymm6, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+
+ %shift = lshr <64 x i8> %a, %b
+ ret <64 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: splatvar_shift_v8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsrlq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+ %shift = lshr <8 x i64> %a, %splat
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: splatvar_shift_v16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; ALL-NEXT: vmovss %xmm1, %xmm2, %xmm1
+; ALL-NEXT: vpsrld %xmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+ %shift = lshr <16 x i32> %a, %splat
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_shift_v32i16:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vmovd %xmm2, %eax
+; AVX512DQ-NEXT: movzwl %ax, %eax
+; AVX512DQ-NEXT: vmovd %eax, %xmm2
+; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_shift_v32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vmovd %xmm1, %eax
+; AVX512BW-NEXT: movzwl %ax, %eax
+; AVX512BW-NEXT: vmovd %eax, %xmm1
+; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+ %shift = lshr <32 x i16> %a, %splat
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_shift_v64i8:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm3
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm3
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX512DQ-NEXT: vpand %ymm5, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm6
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm3
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX512DQ-NEXT: vpand %ymm7, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm8
+; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm3
+; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+ %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+ %shift = lshr <64 x i8> %a, %splat
+ ret <64 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
+; ALL-LABEL: constant_shift_v8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsrlvq {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
+; ALL-LABEL: constant_shift_v16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
+; AVX512DQ-LABEL: constant_shift_v32i16:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15]
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT: vpsrlvd %ymm4, %ymm5, %ymm5
+; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11]
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT: vpsrlvd %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpsrld $16, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT: vpsrlvd %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpackusdw %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: constant_shift_v32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
+; AVX512DQ-LABEL: constant_shift_v64i8:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm2
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512DQ-NEXT: vpsllw $5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $2, %ymm0, %ymm2
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpaddb %ymm4, %ymm4, %ymm6
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $1, %ymm0, %ymm2
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm8
+; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrlw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrlw $1, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpand %ymm7, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+ %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <64 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
+; ALL-LABEL: splatconstant_shift_v8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsrlq $7, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
+; ALL-LABEL: splatconstant_shift_v16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsrld $5, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
+; AVX512DQ-LABEL: splatconstant_shift_v32i16:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_shift_v32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
+; AVX512DQ-LABEL: splatconstant_shift_v64i8:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpsrlw $3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
+; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrlw $3, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_shift_v64i8:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <64 x i8> %shift
+}
diff --git a/test/CodeGen/X86/vector-shift-shl-128.ll b/test/CodeGen/X86/vector-shift-shl-128.ll
index 6dbd9eab2a72..9b59c6224ef2 100644
--- a/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -1,13 +1,20 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+;
+; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE2
;
; Variable Shifts
;
-define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE2-LABEL: var_shift_v2i64:
; SSE2: # BB#0:
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
@@ -39,11 +46,37 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v2i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v2i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v2i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: var_shift_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; X32-SSE-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE-NEXT: psllq %xmm3, %xmm2
+; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
+; X32-SSE-NEXT: psllq %xmm1, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; X32-SSE-NEXT: movapd %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%shift = shl <2 x i64> %a, %b
ret <2 x i64> %shift
}
-define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE2-LABEL: var_shift_v4i32:
; SSE2: # BB#0:
; SSE2-NEXT: pslld $23, %xmm1
@@ -79,11 +112,41 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v4i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v4i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: var_shift_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: pslld $23, %xmm1
+; X32-SSE-NEXT: paddd .LCPI1_0, %xmm1
+; X32-SSE-NEXT: cvttps2dq %xmm1, %xmm1
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; X32-SSE-NEXT: pmuludq %xmm0, %xmm1
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X32-SSE-NEXT: pmuludq %xmm2, %xmm0
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%shift = shl <4 x i32> %a, %b
ret <4 x i32> %shift
}
-define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE2-LABEL: var_shift_v8i16:
; SSE2: # BB#0:
; SSE2-NEXT: psllw $12, %xmm1
@@ -176,11 +239,56 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
+;
+; XOP-LABEL: var_shift_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: var_shift_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psllw $12, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psraw $15, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psllw $8, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psraw $15, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psllw $4, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: psraw $15, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm3
+; X32-SSE-NEXT: pandn %xmm0, %xmm3
+; X32-SSE-NEXT: psllw $2, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm3, %xmm0
+; X32-SSE-NEXT: paddw %xmm1, %xmm1
+; X32-SSE-NEXT: psraw $15, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: pandn %xmm0, %xmm2
+; X32-SSE-NEXT: psllw $1, %xmm0
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%shift = shl <8 x i16> %a, %b
ret <8 x i16> %shift
}
-define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; SSE2-LABEL: var_shift_v16i8:
; SSE2: # BB#0:
; SSE2-NEXT: psllw $5, %xmm1
@@ -248,6 +356,56 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: var_shift_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpsllw $2, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpaddb %xmm0, %xmm0, %xmm2
+; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: var_shift_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psllw $5, %xmm1
+; X32-SSE-NEXT: pxor %xmm2, %xmm2
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psllw $4, %xmm0
+; X32-SSE-NEXT: pand .LCPI3_0, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: paddb %xmm1, %xmm1
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psllw $2, %xmm0
+; X32-SSE-NEXT: pand .LCPI3_1, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: paddb %xmm1, %xmm1
+; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm2
+; X32-SSE-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE-NEXT: pandn %xmm0, %xmm1
+; X32-SSE-NEXT: paddb %xmm0, %xmm0
+; X32-SSE-NEXT: pand %xmm2, %xmm0
+; X32-SSE-NEXT: por %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%shift = shl <16 x i8> %a, %b
ret <16 x i8> %shift
}
@@ -256,7 +414,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; Uniform Variable Shifts
;
-define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
; SSE-LABEL: splatvar_shift_v2i64:
; SSE: # BB#0:
; SSE-NEXT: psllq %xmm1, %xmm0
@@ -266,12 +424,28 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
; AVX: # BB#0:
; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatvar_shift_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v2i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatvar_shift_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
+; X32-SSE-NEXT: psllq %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
%shift = shl <2 x i64> %a, %splat
ret <2 x i64> %shift
}
-define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
; SSE2-LABEL: splatvar_shift_v4i32:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm2, %xmm2
@@ -292,12 +466,33 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatvar_shift_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOP-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatvar_shift_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: xorps %xmm2, %xmm2
+; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; X32-SSE-NEXT: pslld %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
%shift = shl <4 x i32> %a, %splat
ret <4 x i32> %shift
}
-define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; SSE2-LABEL: splatvar_shift_v8i16:
; SSE2: # BB#0:
; SSE2-NEXT: movd %xmm1, %eax
@@ -319,12 +514,34 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatvar_shift_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; XOP-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatvar_shift_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movd %xmm1, %eax
+; X32-SSE-NEXT: movzwl %ax, %eax
+; X32-SSE-NEXT: movd %eax, %xmm1
+; X32-SSE-NEXT: psllw %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i16> %a, %splat
ret <8 x i16> %shift
}
-define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
; SSE2-LABEL: splatvar_shift_v16i8:
; SSE2: # BB#0:
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -417,6 +634,69 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; AVX2-NEXT: vpaddb %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v16i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v16i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpsllw $2, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpaddb %xmm0, %xmm0, %xmm2
+; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatvar_shift_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; X32-SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,4,4,4]
+; X32-SSE-NEXT: psllw $5, %xmm2
+; X32-SSE-NEXT: pxor %xmm1, %xmm1
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psllw $4, %xmm0
+; X32-SSE-NEXT: pand .LCPI7_0, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: paddb %xmm2, %xmm2
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psllw $2, %xmm0
+; X32-SSE-NEXT: pand .LCPI7_1, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: paddb %xmm2, %xmm2
+; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: pandn %xmm0, %xmm2
+; X32-SSE-NEXT: paddb %xmm0, %xmm0
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i8> %a, %splat
ret <16 x i8> %shift
@@ -426,7 +706,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; Constant Shifts
;
-define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
; SSE2-LABEL: constant_shift_v2i64:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
@@ -455,11 +735,35 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v2i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v2i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v2i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: constant_shift_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psllq $7, %xmm1
+; X32-SSE-NEXT: psllq $1, %xmm0
+; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT: movapd %xmm1, %xmm0
+; X32-SSE-NEXT: retl
%shift = shl <2 x i64> %a, <i64 1, i64 7>
ret <2 x i64> %shift
}
-define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
; SSE2-LABEL: constant_shift_v4i32:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,32,64,128]
@@ -486,11 +790,38 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v4i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v4i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: constant_shift_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [16,32,64,128]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X32-SSE-NEXT: pmuludq %xmm1, %xmm0
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X32-SSE-NEXT: pmuludq %xmm2, %xmm1
+; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE-NEXT: retl
%shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
ret <4 x i32> %shift
}
-define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
; SSE-LABEL: constant_shift_v8i16:
; SSE: # BB#0:
; SSE-NEXT: pmullw {{.*}}(%rip), %xmm0
@@ -500,11 +831,27 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; AVX: # BB#0:
; AVX-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: constant_shift_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
+; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: constant_shift_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: pmullw .LCPI10_0, %xmm0
+; X32-SSE-NEXT: retl
%shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
}
-define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
; SSE2-LABEL: constant_shift_v16i8:
; SSE2: # BB#0:
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
@@ -572,6 +919,58 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: constant_shift_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512-NEXT: vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpsllw $2, %xmm0, %xmm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vpaddb %xmm0, %xmm0, %xmm2
+; AVX512-NEXT: vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: constant_shift_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; X32-SSE-NEXT: psllw $5, %xmm2
+; X32-SSE-NEXT: pxor %xmm1, %xmm1
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psllw $4, %xmm0
+; X32-SSE-NEXT: pand .LCPI11_1, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: paddb %xmm2, %xmm2
+; X32-SSE-NEXT: pxor %xmm3, %xmm3
+; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE-NEXT: pandn %xmm0, %xmm4
+; X32-SSE-NEXT: psllw $2, %xmm0
+; X32-SSE-NEXT: pand .LCPI11_2, %xmm0
+; X32-SSE-NEXT: pand %xmm3, %xmm0
+; X32-SSE-NEXT: por %xmm4, %xmm0
+; X32-SSE-NEXT: paddb %xmm2, %xmm2
+; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm1
+; X32-SSE-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE-NEXT: pandn %xmm0, %xmm2
+; X32-SSE-NEXT: paddb %xmm0, %xmm0
+; X32-SSE-NEXT: pand %xmm1, %xmm0
+; X32-SSE-NEXT: por %xmm2, %xmm0
+; X32-SSE-NEXT: retl
%shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
}
@@ -580,7 +979,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; Uniform Constant Shifts
;
-define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind {
; SSE-LABEL: splatconstant_shift_v2i64:
; SSE: # BB#0:
; SSE-NEXT: psllq $7, %xmm0
@@ -590,11 +989,26 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
; AVX: # BB#0:
; AVX-NEXT: vpsllq $7, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_shift_v2i64:
+; XOP: # BB#0:
+; XOP-NEXT: vpsllq $7, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v2i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllq $7, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v2i64:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psllq $7, %xmm0
+; X32-SSE-NEXT: retl
%shift = shl <2 x i64> %a, <i64 7, i64 7>
ret <2 x i64> %shift
}
-define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind {
; SSE-LABEL: splatconstant_shift_v4i32:
; SSE: # BB#0:
; SSE-NEXT: pslld $5, %xmm0
@@ -604,11 +1018,26 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
; AVX: # BB#0:
; AVX-NEXT: vpslld $5, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_shift_v4i32:
+; XOP: # BB#0:
+; XOP-NEXT: vpslld $5, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v4i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpslld $5, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v4i32:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: pslld $5, %xmm0
+; X32-SSE-NEXT: retl
%shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
ret <4 x i32> %shift
}
-define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind {
; SSE-LABEL: splatconstant_shift_v8i16:
; SSE: # BB#0:
; SSE-NEXT: psllw $3, %xmm0
@@ -618,11 +1047,26 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
; AVX: # BB#0:
; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_shift_v8i16:
+; XOP: # BB#0:
+; XOP-NEXT: vpsllw $3, %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v8i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v8i16:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psllw $3, %xmm0
+; X32-SSE-NEXT: retl
%shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %shift
}
-define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
; SSE-LABEL: splatconstant_shift_v16i8:
; SSE: # BB#0:
; SSE-NEXT: psllw $3, %xmm0
@@ -634,6 +1078,23 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
; AVX-NEXT: vpsllw $3, %xmm0, %xmm0
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; XOP-LABEL: splatconstant_shift_v16i8:
+; XOP: # BB#0:
+; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v16i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v16i8:
+; X32-SSE: # BB#0:
+; X32-SSE-NEXT: psllw $3, %xmm0
+; X32-SSE-NEXT: pand .LCPI15_0, %xmm0
+; X32-SSE-NEXT: retl
%shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <16 x i8> %shift
}
diff --git a/test/CodeGen/X86/vector-shift-shl-256.ll b/test/CodeGen/X86/vector-shift-shl-256.ll
index b287875f6541..3daf24f1a82e 100644
--- a/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -1,11 +1,15 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
;
; Variable Shifts
;
-define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-LABEL: var_shift_v4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
@@ -25,11 +29,30 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT: vpshlq %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = shl <4 x i64> %a, %b
ret <4 x i64> %shift
}
-define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: var_shift_v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
@@ -50,11 +73,30 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT: vpshld %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = shl <8 x i32> %a, %b
ret <8 x i32> %shift
}
-define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: var_shift_v16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
@@ -103,11 +145,34 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT: vpshlw %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT: vpshlw %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vpshlw %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retq
%shift = shl <16 x i16> %a, %b
ret <16 x i16> %shift
}
-define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: var_shift_v32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -153,6 +218,39 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: var_shift_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT: vpshlb %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: var_shift_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT: vpshlb %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: var_shift_v32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpsllw $2, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpaddb %ymm0, %ymm0, %ymm2
+; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = shl <32 x i8> %a, %b
ret <32 x i8> %shift
}
@@ -161,7 +259,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; Uniform Variable Shifts
;
-define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -174,12 +272,30 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpsllq %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
%shift = shl <4 x i64> %a, %splat
ret <4 x i64> %shift
}
-define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
@@ -196,12 +312,36 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpslld %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOPAVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT: vmovss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vpslld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i32> %a, %splat
ret <8 x i32> %shift
}
-define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
@@ -220,12 +360,39 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vmovd %xmm1, %eax
+; XOPAVX1-NEXT: movzwl %ax, %eax
+; XOPAVX1-NEXT: vmovd %eax, %xmm1
+; XOPAVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vmovd %xmm1, %eax
+; XOPAVX2-NEXT: movzwl %ax, %eax
+; XOPAVX2-NEXT: vmovd %eax, %xmm1
+; XOPAVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovd %xmm1, %eax
+; AVX512-NEXT: movzwl %ax, %eax
+; AVX512-NEXT: vmovd %eax, %xmm1
+; AVX512-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: retq
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i16> %a, %splat
ret <16 x i16> %shift
}
-define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
; AVX1-LABEL: splatvar_shift_v32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
@@ -270,6 +437,42 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpbroadcastb %xmm1, %ymm1
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
+; XOPAVX2-NEXT: vpshlb %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatvar_shift_v32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpbroadcastb %xmm1, %ymm1
+; AVX512-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpsllw $2, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpaddb %ymm0, %ymm0, %ymm2
+; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: retq
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
%shift = shl <32 x i8> %a, %splat
ret <32 x i8> %shift
@@ -279,7 +482,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; Constant Shifts
;
-define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
; AVX1-LABEL: constant_shift_v4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -296,11 +499,29 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = shl <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
ret <4 x i64> %shift
}
-define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
; AVX1-LABEL: constant_shift_v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1
@@ -313,11 +534,29 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
ret <8 x i32> %shift
}
-define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX1-LABEL: constant_shift_v16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm1
@@ -330,11 +569,30 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: retq
%shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
ret <16 x i16> %shift
}
-define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX1-LABEL: constant_shift_v32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -378,6 +636,40 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; AVX2-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: constant_shift_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: constant_shift_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; XOPAVX2-NEXT: vpshlb %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT: vpshlb %xmm2, %xmm0, %xmm0
+; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: constant_shift_v32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512-NEXT: vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpsllw $2, %ymm0, %ymm2
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: vpaddb %ymm0, %ymm0, %ymm2
+; AVX512-NEXT: vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <32 x i8> %shift
}
@@ -386,7 +678,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; Uniform Constant Shifts
;
-define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
; AVX1-LABEL: splatconstant_shift_v4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vpsllq $7, %xmm0, %xmm1
@@ -399,11 +691,29 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsllq $7, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v4i64:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpsllq $7, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpsllq $7, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v4i64:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllq $7, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v4i64:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllq $7, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
ret <4 x i64> %shift
}
-define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
; AVX1-LABEL: splatconstant_shift_v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vpslld $5, %xmm0, %xmm1
@@ -416,11 +726,29 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpslld $5, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v8i32:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpslld $5, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpslld $5, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v8i32:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpslld $5, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v8i32:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpslld $5, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
ret <8 x i32> %shift
}
-define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX1-LABEL: splatconstant_shift_v16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vpsllw $3, %xmm0, %xmm1
@@ -433,11 +761,29 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsllw $3, %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v16i16:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vpsllw $3, %xmm0, %xmm1
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT: vpsllw $3, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v16i16:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v16i16:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllw $3, %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
-define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
; AVX1-LABEL: splatconstant_shift_v32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
@@ -454,6 +800,27 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
; AVX2-NEXT: vpsllw $3, %ymm0, %ymm0
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v32i8:
+; XOPAVX1: # BB#0:
+; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; XOPAVX1-NEXT: vpshlb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT: vpshlb %xmm2, %xmm0, %xmm0
+; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; XOPAVX1-NEXT: retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v32i8:
+; XOPAVX2: # BB#0:
+; XOPAVX2-NEXT: vpsllw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT: retq
+;
+; AVX512-LABEL: splatconstant_shift_v32i8:
+; AVX512: ## BB#0:
+; AVX512-NEXT: vpsllw $3, %ymm0, %ymm0
+; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
%shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <32 x i8> %shift
}
diff --git a/test/CodeGen/X86/vector-shift-shl-512.ll b/test/CodeGen/X86/vector-shift-shl-512.ll
new file mode 100644
index 000000000000..26ddb1c127e1
--- /dev/null
+++ b/test/CodeGen/X86/vector-shift-shl-512.ll
@@ -0,0 +1,293 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Variable Shifts
+;
+
+define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: var_shift_v8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsllvq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = shl <8 x i64> %a, %b
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: var_shift_v16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = shl <16 x i32> %a, %b
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: var_shift_v32i16:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15]
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm6 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT: vpsllvd %ymm5, %ymm6, %ymm5
+; AVX512DQ-NEXT: vpsrld $16, %ymm5, %ymm5
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11]
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT: vpsllvd %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpackusdw %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15]
+; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT: vpsllvd %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT: vpsrld $16, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11]
+; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT: vpsllvd %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsrld $16, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpackusdw %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: var_shift_v32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %shift = shl <32 x i16> %a, %b
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: var_shift_v64i8:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm4
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512DQ-NEXT: vpand %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm4
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX512DQ-NEXT: vpand %ymm6, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm4
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsllw $4, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpsllw $5, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpand %ymm6, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpaddb %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+ %shift = shl <64 x i8> %a, %b
+ ret <64 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: splatvar_shift_v8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsllq %xmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+ %shift = shl <8 x i64> %a, %splat
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: splatvar_shift_v16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; ALL-NEXT: vmovss %xmm1, %xmm2, %xmm1
+; ALL-NEXT: vpslld %xmm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+ %shift = shl <16 x i32> %a, %splat
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_shift_v32i16:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vmovd %xmm2, %eax
+; AVX512DQ-NEXT: movzwl %ax, %eax
+; AVX512DQ-NEXT: vmovd %eax, %xmm2
+; AVX512DQ-NEXT: vpsllw %xmm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsllw %xmm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatvar_shift_v32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vmovd %xmm1, %eax
+; AVX512BW-NEXT: movzwl %ax, %eax
+; AVX512BW-NEXT: vmovd %eax, %xmm1
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+ %shift = shl <32 x i16> %a, %splat
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_shift_v64i8:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpbroadcastb %xmm2, %ymm2
+; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm3
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm3
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX512DQ-NEXT: vpand %ymm5, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpaddb %ymm2, %ymm2, %ymm6
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm3
+; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm7
+; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsllw $4, %ymm1, %ymm3
+; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+
+ %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+ %shift = shl <64 x i8> %a, %splat
+ ret <64 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
+; ALL-LABEL: constant_shift_v8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
+; ALL-LABEL: constant_shift_v16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
+; AVX512DQ-LABEL: constant_shift_v32i16:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
+; AVX512DQ-NEXT: vpmullw %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpmullw %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: constant_shift_v32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
+; AVX512DQ-LABEL: constant_shift_v64i8:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512DQ-NEXT: vpsllw $5, %ymm4, %ymm4
+; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsllw $2, %ymm0, %ymm2
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpaddb %ymm4, %ymm4, %ymm6
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpaddb %ymm0, %ymm0, %ymm2
+; AVX512DQ-NEXT: vpaddb %ymm6, %ymm6, %ymm7
+; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsllw $4, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpsllw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpaddb %ymm1, %ymm1, %ymm2
+; AVX512DQ-NEXT: vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+ %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+ ret <64 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
+; ALL-LABEL: splatconstant_shift_v8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vpsllq $7, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+ ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
+; ALL-LABEL: splatconstant_shift_v16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vpslld $5, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
+; AVX512DQ-LABEL: splatconstant_shift_v32i16:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_shift_v32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+ ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
+; AVX512DQ-LABEL: splatconstant_shift_v64i8:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpsllw $3, %ymm0, %ymm0
+; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
+; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpsllw $3, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: splatconstant_shift_v64i8:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+ %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+ ret <64 x i8> %shift
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll
index 124d6e8c8ba2..13a9543ddd90 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
@@ -469,6 +470,20 @@ define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(
ret <16 x i8> %shuffle
}
+define <16 x i8> @shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz(<16 x i8> %a) {
+; SSE-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
+; SSE: # BB#0:
+; SSE-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
+; AVX: # BB#0:
+; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
+ ret <16 x i8> %shuffle
+}
+
define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(<16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
; SSE2: # BB#0:
@@ -1356,3 +1371,264 @@ define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) {
%bitcast8 = bitcast <8 x i16> %shuffle16 to <16 x i8>
ret <16 x i8> %bitcast8
}
+
+define <16 x i8> @insert_dup_mem_v16i8_i32(i32* %ptr) {
+; SSE2-LABEL: insert_dup_mem_v16i8_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_mem_v16i8_i32:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pshufb %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_mem_v16i8_i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pshufb %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_dup_mem_v16i8_i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_mem_v16i8_i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
+; AVX2-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+ %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
+ %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %tmp3
+}
+
+define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
+; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsbl (%rdi), %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movsbl (%rdi), %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pshufb %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: movsbl (%rdi), %eax
+; SSE41-NEXT: movd %eax, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pshufb %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: movsbl (%rdi), %eax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_mem_v16i8_sext_i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
+; AVX2-NEXT: retq
+ %tmp = load i8, i8* %ptr, align 1
+ %tmp1 = sext i8 %tmp to i32
+ %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+ %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
+ %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> zeroinitializer
+ ret <16 x i8> %tmp4
+}
+
+define <16 x i8> @insert_dup_elt1_mem_v16i8_i32(i32* %ptr) {
+; SSE2-LABEL: insert_dup_elt1_mem_v16i8_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_i32:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_elt1_mem_v16i8_i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_dup_elt1_mem_v16i8_i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v16i8_i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastb 1(%rdi), %xmm0
+; AVX2-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+ %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
+ %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i8> %tmp3
+}
+
+define <16 x i8> @insert_dup_elt2_mem_v16i8_i32(i32* %ptr) {
+; SSE2-LABEL: insert_dup_elt2_mem_v16i8_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_i32:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_elt2_mem_v16i8_i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_dup_elt2_mem_v16i8_i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_elt2_mem_v16i8_i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastb 2(%rdi), %xmm0
+; AVX2-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+ %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
+ %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ ret <16 x i8> %tmp3
+}
+
+define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) {
+; SSE2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsbl (%rdi), %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movsbl (%rdi), %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: movsbl (%rdi), %eax
+; SSE41-NEXT: movd %eax, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: movsbl (%rdi), %eax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: movsbl (%rdi), %eax
+; AVX2-NEXT: shrl $8, %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT: retq
+ %tmp = load i8, i8* %ptr, align 1
+ %tmp1 = sext i8 %tmp to i32
+ %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+ %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
+ %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i8> %tmp4
+}
+
+define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) {
+; SSE2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsbl (%rdi), %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movsbl (%rdi), %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: movsbl (%rdi), %eax
+; SSE41-NEXT: movd %eax, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: movsbl (%rdi), %eax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: movsbl (%rdi), %eax
+; AVX2-NEXT: shrl $16, %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT: retq
+ %tmp = load i8, i8* %ptr, align 1
+ %tmp1 = sext i8 %tmp to i32
+ %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+ %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
+ %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ ret <16 x i8> %tmp4
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll
index ee68df581bfd..1d32f9e38523 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -1,9 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@@ -23,6 +25,11 @@ define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_00:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpbroadcastq %xmm0, %xmm0
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
ret <2 x i64> %shuffle
}
@@ -67,6 +74,11 @@ define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpbroadcastq %xmm1, %xmm0
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_22:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm0
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
ret <2 x i64> %shuffle
}
@@ -135,6 +147,7 @@ define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
; AVX: # BB#0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX-NEXT: retq
+
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
ret <2 x double> %shuffle
}
@@ -191,6 +204,7 @@ define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
; AVX: # BB#0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
; AVX-NEXT: retq
+
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
ret <2 x double> %shuffle
}
@@ -329,6 +343,11 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_03:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %shuffle
}
@@ -366,6 +385,11 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
; AVX2: # BB#0:
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_03_copy:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %shuffle
}
@@ -516,6 +540,11 @@ define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_21:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
ret <2 x i64> %shuffle
}
@@ -553,6 +582,11 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
; AVX2: # BB#0:
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_21_copy:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
ret <2 x i64> %shuffle
}
@@ -725,6 +759,12 @@ define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_z1:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
ret <2 x i64> %shuffle
}
@@ -750,11 +790,23 @@ define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
-; AVX-LABEL: shuffle_v2f64_1z:
-; AVX: # BB#0:
-; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v2f64_1z:
+; AVX1: # BB#0:
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v2f64_1z:
+; AVX2: # BB#0:
+; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v2f64_1z:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
ret <2 x double> %shuffle
}
@@ -767,11 +819,23 @@ define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: shuffle_v2f64_z0:
-; AVX: # BB#0:
-; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v2f64_z0:
+; AVX1: # BB#0:
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v2f64_z0:
+; AVX2: # BB#0:
+; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v2f64_z0:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
ret <2 x double> %shuffle
}
@@ -817,11 +881,23 @@ define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE-NEXT: retq
;
-; AVX-LABEL: shuffle_v2f64_bitcast_1z:
-; AVX: # BB#0:
-; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
+; AVX1: # BB#0:
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
+; AVX2: # BB#0:
+; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; AVX512VL-NEXT: retq
%shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
%bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
%shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
@@ -829,6 +905,66 @@ define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
ret <2 x double> %bitcast64
}
+define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
+; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
+; SSE2: # BB#0:
+; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
+; SSE3: # BB#0:
+; SSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
+; SSE41: # BB#0:
+; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vmovss {{.*}}(%rip), %xmm1
+; AVX512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512VL-NEXT: retq
+ %bitcast32 = bitcast <2 x i64> %x to <4 x float>
+ %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+ %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
+ %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
+ ret <2 x i64> %and
+}
+
define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
; SSE-LABEL: insert_reg_and_zero_v2i64:
; SSE: # BB#0:
@@ -850,10 +986,20 @@ define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: retq
;
-; AVX-LABEL: insert_mem_and_zero_v2i64:
-; AVX: # BB#0:
-; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: retq
+; AVX1-LABEL: insert_mem_and_zero_v2i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_mem_and_zero_v2i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: insert_mem_and_zero_v2i64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vmovq (%rdi), %xmm0
+; AVX512VL-NEXT: retq
%a = load i64, i64* %ptr
%v = insertelement <2 x i64> undef, i64 %a, i32 0
%shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
@@ -881,10 +1027,20 @@ define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: retq
;
-; AVX-LABEL: insert_mem_and_zero_v2f64:
-; AVX: # BB#0:
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT: retq
+; AVX1-LABEL: insert_mem_and_zero_v2f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_mem_and_zero_v2f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: insert_mem_and_zero_v2f64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
+; AVX512VL-NEXT: retq
%a = load double, double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
@@ -927,6 +1083,12 @@ define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
; AVX2-NEXT: vmovq %rdi, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: insert_reg_lo_v2i64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vmovq %rdi, %xmm1
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX512VL-NEXT: retq
%v = insertelement <2 x i64> undef, i64 %a, i32 0
%shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %shuffle
@@ -965,6 +1127,12 @@ define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: insert_mem_lo_v2i64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vmovq (%rdi), %xmm1
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX512VL-NEXT: retq
%a = load i64, i64* %ptr
%v = insertelement <2 x i64> undef, i64 %a, i32 0
%shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
@@ -995,11 +1163,23 @@ define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
-; AVX-LABEL: insert_mem_hi_v2i64:
-; AVX: # BB#0:
-; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX-NEXT: retq
+; AVX1-LABEL: insert_mem_hi_v2i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_mem_hi_v2i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: insert_mem_hi_v2i64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vmovq (%rdi), %xmm1
+; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT: retq
%a = load i64, i64* %ptr
%v = insertelement <2 x i64> undef, i64 %a, i32 0
%shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
@@ -1013,10 +1193,20 @@ define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: insert_reg_lo_v2f64:
-; AVX: # BB#0:
-; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
-; AVX-NEXT: retq
+; AVX1-LABEL: insert_reg_lo_v2f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_reg_lo_v2f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: insert_reg_lo_v2f64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: retq
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %shuffle
@@ -1071,8 +1261,6 @@ define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
}
define <2 x double> @insert_dup_reg_v2f64(double %a) {
-; FIXME: We should match movddup for SSE3 and higher here.
-;
; SSE2-LABEL: insert_dup_reg_v2f64:
; SSE2: # BB#0:
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
@@ -1101,6 +1289,7 @@ define <2 x double> @insert_dup_reg_v2f64(double %a) {
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
ret <2 x double> %shuffle
}
+
define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
; SSE2-LABEL: insert_dup_mem_v2f64:
; SSE2: # BB#0:
@@ -1133,6 +1322,66 @@ define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
ret <2 x double> %shuffle
}
+define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
+; SSE2-LABEL: insert_dup_mem128_v2f64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps (%rdi), %xmm0
+; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: insert_dup_mem128_v2f64:
+; SSE3: # BB#0:
+; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_mem128_v2f64:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_mem128_v2f64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: insert_dup_mem128_v2f64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; AVX-NEXT: retq
+ %v = load <2 x double>, <2 x double>* %ptr
+ %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+ ret <2 x double> %shuffle
+}
+
+
+define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
+; SSE-LABEL: insert_dup_mem_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: insert_dup_mem_v2i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_mem_v2i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: insert_dup_mem_v2i64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpbroadcastq (%rdi), %xmm0
+; AVX512VL-NEXT: retq
+ %tmp = load i64, i64* %ptr, align 1
+ %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
+ %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
+ ret <2 x i64> %tmp2
+}
+
define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
; SSE-LABEL: shuffle_mem_v2f64_10:
; SSE: # BB#0:
@@ -1144,6 +1393,7 @@ define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
; AVX: # BB#0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
; AVX-NEXT: retq
+
%a = load <2 x double>, <2 x double>* %ptr
%shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
ret <2 x double> %shuffle
diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll
index 8612a5afa3d2..35c3b708fd08 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
@@ -952,6 +953,43 @@ define <4 x float> @shuffle_v4f32_0zz3(<4 x float> %a) {
ret <4 x float> %shuffle
}
+define <4 x float> @shuffle_v4f32_0z2z(<4 x float> %v) {
+; SSE2-LABEL: shuffle_v4f32_0z2z:
+; SSE2: # BB#0:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: shuffle_v4f32_0z2z:
+; SSE3: # BB#0:
+; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4f32_0z2z:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4f32_0z2z:
+; SSE41: # BB#0:
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4f32_0z2z:
+; AVX: # BB#0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <4 x float> %v, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
+ ret <4 x float> %shuffle
+}
+
define <4 x float> @shuffle_v4f32_u051(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: shuffle_v4f32_u051:
; SSE: # BB#0:
@@ -1591,6 +1629,43 @@ define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) {
ret <4 x i32> %bitcast32
}
+define <4 x float> @shuffle_v4f32_bitcast_4401(<4 x float> %a, <4 x i32> %b) {
+; SSE-LABEL: shuffle_v4f32_bitcast_4401:
+; SSE: # BB#0:
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4f32_bitcast_4401:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: retq
+ %1 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+ %2 = bitcast <4 x i32> %1 to <2 x double>
+ %3 = bitcast <4 x float> %a to <2 x double>
+ %4 = shufflevector <2 x double> %2, <2 x double> %3, <2 x i32> <i32 0, i32 2>
+ %5 = bitcast <2 x double> %4 to <4 x float>
+ ret <4 x float> %5
+}
+
+define <4 x float> @shuffle_v4f32_bitcast_0045(<4 x float> %a, <4 x i32> %b) {
+; SSE-LABEL: shuffle_v4f32_bitcast_0045:
+; SSE: # BB#0:
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4f32_bitcast_0045:
+; AVX: # BB#0:
+; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
+; AVX-NEXT: retq
+ %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+ %2 = bitcast <4 x i32> %b to <4 x float>
+ %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 1, i32 0, i32 4, i32 5>
+ ret <4 x float> %3
+}
+
define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
; SSE-LABEL: insert_reg_and_zero_v4i32:
; SSE: # BB#0:
@@ -1875,6 +1950,23 @@ define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
ret <4 x float> %shuffle
}
+define <4 x i32> @insert_dup_mem_v4i32(i32* %ptr) {
+; SSE-LABEL: insert_dup_mem_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: insert_dup_mem_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vbroadcastss (%rdi), %xmm0
+; AVX-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+ %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %tmp2
+}
+
;
; Shuffle to logical bit shifts
;
diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll
index 6a29d33d6c5e..168b3e33bfcf 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
@@ -2145,3 +2146,254 @@ define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
ret <8 x i16> %shuffle
}
+
+define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
+; SSE2-LABEL: insert_dup_mem_v8i16_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_mem_v8i16_i32:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_mem_v8i16_i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_dup_mem_v8i16_i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_mem_v8i16_i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
+; AVX2-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+ %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
+; SSE2-LABEL: insert_dup_mem_v8i16_sext_i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movswl (%rdi), %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movswl (%rdi), %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_mem_v8i16_sext_i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: movswl (%rdi), %eax
+; SSE41-NEXT: movd %eax, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: movswl (%rdi), %eax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: movswl (%rdi), %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT: retq
+ %tmp = load i16, i16* %ptr, align 2
+ %tmp1 = sext i16 %tmp to i32
+ %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+ %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
+ %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
+ ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
+; SSE2-LABEL: insert_dup_elt1_mem_v8i16_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_i32:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_elt1_mem_v8i16_i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
+; AVX2-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+ %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
+; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
+; AVX2-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
+ %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
+; SSE2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movswl (%rdi), %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movswl (%rdi), %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: movswl (%rdi), %eax
+; SSE41-NEXT: movd %eax, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: movswl (%rdi), %eax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: movswl (%rdi), %eax
+; AVX2-NEXT: shrl $16, %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT: retq
+ %tmp = load i16, i16* %ptr, align 2
+ %tmp1 = sext i16 %tmp to i32
+ %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+ %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
+ %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
+; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: movswl (%rdi), %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movswl (%rdi), %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: movswl (%rdi), %eax
+; SSE41-NEXT: movd %eax, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: movswl (%rdi), %eax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: movswl (%rdi), %eax
+; AVX2-NEXT: shrl $16, %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT: retq
+ %tmp = load i16, i16* %ptr, align 2
+ %tmp1 = sext i16 %tmp to i32
+ %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
+ %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
+ %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <8 x i16> %tmp4
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll
index df4994da6932..7e3dc6e294f8 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
@@ -158,11 +159,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
-; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm1
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
@@ -1439,11 +1440,10 @@ define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_z
define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) {
; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[4,5,2,3,4,5,6,7,6,7,10,11,4,5,6,7]
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
@@ -1702,21 +1702,21 @@ define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_1
; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11>
ret <16 x i16> %shuffle
@@ -1726,21 +1726,21 @@ define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_1
; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15>
ret <16 x i16> %shuffle
@@ -2444,13 +2444,13 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_2
;
; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
; AVX2: # BB#0:
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
-; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
@@ -2498,13 +2498,13 @@ define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_3
;
; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
; AVX2: # BB#0:
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -2647,13 +2647,13 @@ define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_1
;
; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
; AVX2: # BB#0:
-; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11>
@@ -2674,13 +2674,13 @@ define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_1
;
; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
; AVX2: # BB#0:
-; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
-; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
@@ -3250,6 +3250,90 @@ define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_u
ret <16 x i16> %shuffle
}
+define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
+; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
+; ALL-LABEL: shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u:
+; ALL: # BB#0:
+; ALL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
+; ALL-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
+; ALL: # BB#0:
+; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
; ALL: # BB#0:
@@ -3261,3 +3345,112 @@ define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
ret <16 x i16> %i0
}
+define <16 x i16> @concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
+; AVX1: # BB#0:
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT: retq
+ %alo = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %shuf = shufflevector <8 x i16> %alo, <8 x i16> %bhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i16> %shuf
+}
+
+define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc(<16 x i16> %a, <16 x i16> %b) {
+; ALL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT: retq
+ %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %bc0hi = bitcast <8 x i16> %ahi to <16 x i8>
+ %bc1hi = bitcast <8 x i16> %bhi to <16 x i8>
+ %shuffle8 = shufflevector <16 x i8> %bc0hi, <16 x i8> %bc1hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %shuffle16 = bitcast <32 x i8> %shuffle8 to <16 x i16>
+ ret <16 x i16> %shuffle16
+}
+
+define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) {
+; AVX1-LABEL: insert_dup_mem_v16i16_i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_mem_v16i16_i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0
+; AVX2-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+ %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer
+ ret <16 x i16> %tmp3
+}
+
+define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
+; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: movswl (%rdi), %eax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: movswl (%rdi), %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT: retq
+ %tmp = load i16, i16* %ptr, align 2
+ %tmp1 = sext i16 %tmp to i32
+ %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+ %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
+ %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer
+ ret <16 x i16> %tmp4
+}
+
+define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 {
+; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v16i16_i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
+; AVX2-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+ %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <16 x i16> %tmp3
+}
+
+define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {
+; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_elt3_mem_v16i16_i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
+; AVX2-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
+ %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <16 x i16> %tmp3
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll
index a0f43de75630..161a21cef030 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
@@ -324,7 +325,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -947,6 +948,24 @@ define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_
ret <32 x i8> %shuffle
}
+define <32 x i8> @shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255]
+; AVX1-NEXT: vandps %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
+; AVX2: # BB#0:
+; AVX2-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
+ ret <32 x i8> %shuffle
+}
+
define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32(<32 x i8> %a, <32 x i8> %b) {
; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
; AVX1: # BB#0:
@@ -1737,7 +1756,8 @@ define <32 x i8> @shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_
; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
; AVX1: # BB#0:
; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -1754,7 +1774,8 @@ define <32 x i8> @shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_
; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
; AVX1: # BB#0:
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -1956,3 +1977,186 @@ define <32 x i8> @shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
ret <32 x i8> %shuffle
}
+
+define <32 x i8> @shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
+; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,12,12,13,13,14,14,15,15]
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,12,12,13,13,14,14,15,15]
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
+; ALL-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; ALL: # BB#0:
+; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @insert_dup_mem_v32i8_i32(i32* %ptr) {
+; AVX1-LABEL: insert_dup_mem_v32i8_i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_mem_v32i8_i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastb (%rdi), %ymm0
+; AVX2-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+ %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
+ %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <32 x i32> zeroinitializer
+ ret <32 x i8> %tmp3
+}
+
+define <32 x i8> @insert_dup_mem_v32i8_sext_i8(i8* %ptr) {
+; AVX1-LABEL: insert_dup_mem_v32i8_sext_i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: movsbl (%rdi), %eax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_mem_v32i8_sext_i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastb (%rdi), %ymm0
+; AVX2-NEXT: retq
+ %tmp = load i8, i8* %ptr, align 1
+ %tmp1 = sext i8 %tmp to i32
+ %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+ %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
+ %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <32 x i32> zeroinitializer
+ ret <32 x i8> %tmp4
+}
+
+define <32 x i8> @insert_dup_elt1_mem_v32i8_i32(i32* %ptr) {
+; AVX1-LABEL: insert_dup_elt1_mem_v32i8_i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v32i8_i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastb 1(%rdi), %ymm0
+; AVX2-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+ %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
+ %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <32 x i8> %tmp3
+}
+
+define <32 x i8> @insert_dup_elt3_mem_v32i8_i32(i32* %ptr) {
+; AVX1-LABEL: insert_dup_elt3_mem_v32i8_i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_elt3_mem_v32i8_i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastb 3(%rdi), %ymm0
+; AVX2-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+ %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
+ %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ ret <32 x i8> %tmp3
+}
+
+define <32 x i8> @insert_dup_elt1_mem_v32i8_sext_i8(i8* %ptr) {
+; AVX1-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
+; AVX1: # BB#0:
+; AVX1-NEXT: movsbl (%rdi), %eax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: movsbl (%rdi), %eax
+; AVX2-NEXT: shrl $8, %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT: retq
+ %tmp = load i8, i8* %ptr, align 1
+ %tmp1 = sext i8 %tmp to i32
+ %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+ %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
+ %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <32 x i8> %tmp4
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 62bf288a870d..7e33f5f3aa86 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+; RUN: llc < %s -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
target triple = "x86_64-unknown-unknown"
@@ -14,6 +16,11 @@ define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_0000:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x double> %shuffle
}
@@ -29,6 +36,11 @@ define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_0001:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
ret <4 x double> %shuffle
}
@@ -46,6 +58,11 @@ define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_0020:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
ret <4 x double> %shuffle
}
@@ -62,6 +79,11 @@ define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_0300:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
ret <4 x double> %shuffle
}
@@ -78,6 +100,11 @@ define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_1000:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
ret <4 x double> %shuffle
}
@@ -93,6 +120,11 @@ define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_2200:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
ret <4 x double> %shuffle
}
@@ -109,6 +141,11 @@ define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_3330:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
ret <4 x double> %shuffle
}
@@ -124,6 +161,11 @@ define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_3210:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x double> %shuffle
}
@@ -133,6 +175,7 @@ define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
; ALL: # BB#0:
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
; ALL-NEXT: retq
+
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
ret <4 x double> %shuffle
}
@@ -146,6 +189,16 @@ define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
ret <4 x double> %shuffle
}
+define <4 x double> @shuffle_v4f64mem_0022(<4 x double>* %ptr, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64mem_0022:
+; ALL: # BB#0:
+; ALL-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
+; ALL-NEXT: retq
+ %a = load <4 x double>, <4 x double>* %ptr
+ %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ ret <4 x double> %shuffle
+}
+
define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
; ALL-LABEL: shuffle_v4f64_1032:
; ALL: # BB#0:
@@ -183,17 +236,11 @@ define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
}
define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_0423:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v4f64_0423:
-; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1
-; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
-; AVX2-NEXT: retq
+; ALL-LABEL: shuffle_v4f64_0423:
+; ALL: # BB#0:
+; ALL-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
+; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
+; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
ret <4 x double> %shuffle
}
@@ -273,19 +320,39 @@ define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
}
define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) {
-; ALL-LABEL: shuffle_v4f64_0145:
-; ALL: # BB#0:
-; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT: retq
+; AVX1-LABEL: shuffle_v4f64_0145:
+; AVX1: # BB#0:
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4f64_0145:
+; AVX2: # BB#0:
+; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_0145:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) {
-; ALL-LABEL: shuffle_v4f64_4501:
-; ALL: # BB#0:
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; ALL-NEXT: retq
+; AVX1-LABEL: shuffle_v4f64_4501:
+; AVX1: # BB#0:
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4f64_4501:
+; AVX2: # BB#0:
+; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_4501:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm1, %ymm0
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x double> %shuffle
}
@@ -300,31 +367,67 @@ define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
}
define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) {
-; ALL-LABEL: shuffle_v4f64_1054:
-; ALL: # BB#0:
-; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT: retq
+; AVX1-LABEL: shuffle_v4f64_1054:
+; AVX1: # BB#0:
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4f64_1054:
+; AVX2: # BB#0:
+; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_1054:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
-; ALL-LABEL: shuffle_v4f64_3254:
-; ALL: # BB#0:
-; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT: retq
+; AVX1-LABEL: shuffle_v4f64_3254:
+; AVX1: # BB#0:
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4f64_3254:
+; AVX2: # BB#0:
+; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_3254:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
ret <4 x double> %shuffle
}
define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
-; ALL-LABEL: shuffle_v4f64_3276:
-; ALL: # BB#0:
-; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT: retq
+; AVX1-LABEL: shuffle_v4f64_3276:
+; AVX1: # BB#0:
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4f64_3276:
+; AVX2: # BB#0:
+; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_3276:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
ret <4 x double> %shuffle
}
@@ -353,6 +456,13 @@ define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) {
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_0415:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; AVX512VL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
ret <4 x double> %shuffle
}
@@ -366,6 +476,65 @@ define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) {
ret <4 x double> %shuffle
}
+define <4 x double> @shuffle_v4f64_15uu(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_15uu:
+; ALL: # BB#0:
+; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+ ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_11uu:
+; ALL: # BB#0:
+; ALL-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
+ ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_22uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4f64_22uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_22uu:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
+; AVX512VL-NEXT: retq
+ %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef>
+ ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_3333(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_3333:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4f64_3333:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_3333:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
+; AVX512VL-NEXT: retq
+ %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ ret <4 x double> %shuffle
+}
+
define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_0000:
; AVX1: # BB#0:
@@ -377,6 +546,11 @@ define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0000:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i64> %shuffle
}
@@ -392,6 +566,11 @@ define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0001:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
ret <4 x i64> %shuffle
}
@@ -409,6 +588,11 @@ define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0020:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
ret <4 x i64> %shuffle
}
@@ -425,6 +609,11 @@ define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0112:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
ret <4 x i64> %shuffle
}
@@ -441,6 +630,11 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0300:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
ret <4 x i64> %shuffle
}
@@ -457,6 +651,11 @@ define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_1000:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
ret <4 x i64> %shuffle
}
@@ -472,6 +671,11 @@ define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_2200:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
ret <4 x i64> %shuffle
}
@@ -488,6 +692,11 @@ define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_3330:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
ret <4 x i64> %shuffle
}
@@ -503,6 +712,11 @@ define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_3210:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i64> %shuffle
}
@@ -520,6 +734,12 @@ define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0124:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
ret <4 x i64> %shuffle
}
@@ -527,17 +747,24 @@ define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_0142:
; AVX1: # BB#0:
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_0142:
; AVX2: # BB#0:
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0142:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm1
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
ret <4 x i64> %shuffle
}
@@ -548,16 +775,23 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_0412:
; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
-; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0412:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
ret <4 x i64> %shuffle
}
@@ -577,15 +811,31 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_4012:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
ret <4 x i64> %shuffle
}
define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) {
-; ALL-LABEL: shuffle_v4i64_0145:
-; ALL: # BB#0:
-; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT: retq
+; AVX1-LABEL: shuffle_v4i64_0145:
+; AVX1: # BB#0:
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i64_0145:
+; AVX2: # BB#0:
+; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0145:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x i64> %shuffle
}
@@ -604,15 +854,32 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0451:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
ret <4 x i64> %shuffle
}
define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) {
-; ALL-LABEL: shuffle_v4i64_4501:
-; ALL: # BB#0:
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; ALL-NEXT: retq
+; AVX1-LABEL: shuffle_v4i64_4501:
+; AVX1: # BB#0:
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i64_4501:
+; AVX2: # BB#0:
+; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_4501:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm1, %ymm0
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x i64> %shuffle
}
@@ -631,6 +898,13 @@ define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_4015:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm1, %ymm1
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
ret <4 x i64> %shuffle
}
@@ -648,6 +922,12 @@ define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) {
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_2u35:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5>
ret <4 x i64> %shuffle
}
@@ -668,6 +948,13 @@ define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) {
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_1251:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1>
ret <4 x i64> %shuffle
}
@@ -684,6 +971,12 @@ define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_1054:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
ret <4 x i64> %shuffle
}
@@ -700,6 +993,12 @@ define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) {
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_3254:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
ret <4 x i64> %shuffle
}
@@ -716,6 +1015,12 @@ define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) {
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_3276:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
ret <4 x i64> %shuffle
}
@@ -732,6 +1037,12 @@ define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) {
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_1076:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
ret <4 x i64> %shuffle
}
@@ -750,6 +1061,13 @@ define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) {
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0415:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
ret <4 x i64> %shuffle
}
@@ -765,6 +1083,11 @@ define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_z4z6:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6>
ret <4 x i64> %shuffle
}
@@ -780,6 +1103,11 @@ define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_5zuz:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 undef, i32 0>
ret <4 x i64> %shuffle
}
@@ -794,10 +1122,74 @@ define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_40u2:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2>
ret <4 x i64> %shuffle
}
+define <4 x i64> @shuffle_v4i64_15uu(<4 x i64> %a, <4 x i64> %b) {
+; ALL-LABEL: shuffle_v4i64_15uu:
+; ALL: # BB#0:
+; ALL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+ ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) {
+; ALL-LABEL: shuffle_v4i64_11uu:
+; ALL: # BB#0:
+; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
+ ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_22uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i64_22uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_22uu:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
+; AVX512VL-NEXT: retq
+ %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef>
+ ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_3333(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_3333:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i64_3333:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_3333:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
+; AVX512VL-NEXT: retq
+ %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ ret <4 x i64> %shuffle
+}
+
define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
; ALL-LABEL: stress_test1:
; ALL: retq
@@ -820,10 +1212,20 @@ define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
}
define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
-; ALL-LABEL: insert_mem_and_zero_v4i64:
-; ALL: # BB#0:
-; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT: retq
+; AVX1-LABEL: insert_mem_and_zero_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_mem_and_zero_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: insert_mem_and_zero_v4i64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vmovq (%rdi), %xmm0
+; AVX512VL-NEXT: retq
%a = load i64, i64* %ptr
%v = insertelement <4 x i64> undef, i64 %a, i64 0
%shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -831,21 +1233,43 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
}
define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
-; ALL-LABEL: insert_reg_and_zero_v4f64:
-; ALL: # BB#0:
-; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; ALL-NEXT: retq
+; AVX1-LABEL: insert_reg_and_zero_v4f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_reg_and_zero_v4f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: insert_reg_and_zero_v4f64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT: retq
%v = insertelement <4 x double> undef, double %a, i32 0
%shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x double> %shuffle
}
define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
-; ALL-LABEL: insert_mem_and_zero_v4f64:
-; ALL: # BB#0:
-; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT: retq
+; AVX1-LABEL: insert_mem_and_zero_v4f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_mem_and_zero_v4f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: insert_mem_and_zero_v4f64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vmovsd (%rdi), %xmm0
+; AVX512VL-NEXT: retq
%a = load double, double* %ptr
%v = insertelement <4 x double> undef, double %a, i32 0
%shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -864,10 +1288,20 @@ define <4 x double> @splat_mem_v4f64(double* %ptr) {
}
define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
-; ALL-LABEL: splat_mem_v4i64:
-; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
-; ALL-NEXT: retq
+; AVX1-LABEL: splat_mem_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splat_mem_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: splat_mem_v4i64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0
+; AVX512VL-NEXT: retq
%a = load i64, i64* %ptr
%v = insertelement <4 x i64> undef, i64 %a, i64 0
%shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
@@ -896,6 +1330,11 @@ define <4 x double> @splat_v4f64(<2 x double> %r) {
; AVX2: # BB#0:
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: splat_v4f64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0
+; AVX512VL-NEXT: retq
%1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %1
}
@@ -911,44 +1350,67 @@ define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
; AVX2: # BB#0:
; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: splat_mem_v4i64_from_v2i64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0
+; AVX512VL-NEXT: retq
%v = load <2 x i64>, <2 x i64>* %ptr
%shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i64> %shuffle
}
define <4 x double> @splat_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
-; AVX1-LABEL: splat_mem_v4f64_from_v2f64:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: splat_mem_v4f64_from_v2f64:
-; AVX2: # BB#0:
-; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX2-NEXT: retq
+; ALL-LABEL: splat_mem_v4f64_from_v2f64:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
+; ALL-NEXT: retq
%v = load <2 x double>, <2 x double>* %ptr
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x double> %shuffle
}
define <4 x i64> @splat128_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
-; ALL-LABEL: splat128_mem_v4i64_from_v2i64:
-; ALL: # BB#0:
-; ALL-NEXT: vmovaps (%rdi), %xmm0
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: retq
+; AVX1-LABEL: splat128_mem_v4i64_from_v2i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovaps (%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splat128_mem_v4i64_from_v2i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovaps (%rdi), %xmm0
+; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: splat128_mem_v4i64_from_v2i64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vmovdqa64 (%rdi), %xmm0
+; AVX512VL-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
%v = load <2 x i64>, <2 x i64>* %ptr
%shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
ret <4 x i64> %shuffle
}
define <4 x double> @splat128_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
-; ALL-LABEL: splat128_mem_v4f64_from_v2f64:
-; ALL: # BB#0:
-; ALL-NEXT: vmovaps (%rdi), %xmm0
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: retq
+; AVX1-LABEL: splat128_mem_v4f64_from_v2f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovaps (%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: splat128_mem_v4f64_from_v2f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovaps (%rdi), %xmm0
+; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: splat128_mem_v4f64_from_v2f64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vmovapd (%rdi), %xmm0
+; AVX512VL-NEXT: vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
%v = load <2 x double>, <2 x double>* %ptr
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
ret <4 x double> %shuffle
@@ -964,6 +1426,11 @@ define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: bitcast_v4f64_0426:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX512VL-NEXT: retq
%shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
%bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
%shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
@@ -972,3 +1439,69 @@ define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
%bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
ret <4 x double> %bitcast64
}
+
+define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) {
+; AVX1-LABEL: concat_v4i64_0167:
+; AVX1: # BB#0:
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: concat_v4i64_0167:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: concat_v4i64_0167:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX512VL-NEXT: retq
+ %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
+ %a1hi = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 6, i32 7>
+ %shuffle64 = shufflevector <2 x i64> %a0lo, <2 x i64> %a1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i64> %shuffle64
+}
+
+define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) {
+; AVX1-LABEL: concat_v4i64_0145_bc:
+; AVX1: # BB#0:
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: concat_v4i64_0145_bc:
+; AVX2: # BB#0:
+; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: concat_v4i64_0145_bc:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT: retq
+ %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
+ %a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5>
+ %bc0lo = bitcast <2 x i64> %a0lo to <4 x i32>
+ %bc1lo = bitcast <2 x i64> %a1lo to <4 x i32>
+ %shuffle32 = shufflevector <4 x i32> %bc0lo, <4 x i32> %bc1lo, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %shuffle64 = bitcast <8 x i32> %shuffle32 to <4 x i64>
+ ret <4 x i64> %shuffle64
+}
+
+define <4 x i64> @insert_dup_mem_v4i64(i64* %ptr) {
+; AVX1-LABEL: insert_dup_mem_v4i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_dup_mem_v4i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512VL-LABEL: insert_dup_mem_v4i64:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpbroadcastq (%rdi), %ymm0
+; AVX512VL-NEXT: retq
+ %tmp = load i64, i64* %ptr, align 1
+ %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
+ %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <4 x i32> zeroinitializer
+ ret <4 x i64> %tmp2
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll
index bc72e0a66177..e8b886afd1ae 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
@@ -72,10 +73,10 @@ define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
; AVX1-LABEL: shuffle_v8f32_00040000:
; AVX1: # BB#0:
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
-; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8f32_00040000:
@@ -830,6 +831,87 @@ define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
ret <8 x float> %shuffle
}
+define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_uuuu1111:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_44444444:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8f32_44444444:
+; AVX2: # BB#0:
+; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
+; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+ ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_1188uuuu(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_1188uuuu:
+; ALL: # BB#0:
+; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_uuuu3210(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_uuuu3210:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 2, i32 1, i32 0>
+ ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_uuuu1188(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_uuuu1188:
+; ALL: # BB#0:
+; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
+; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 8, i32 8>
+ ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_1111uuuu(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_1111uuuu:
+; ALL: # BB#0:
+; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_5555uuuu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8f32_5555uuuu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
+; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x float> %shuffle
+}
+
define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00000000:
; AVX1: # BB#0:
@@ -899,10 +981,10 @@ define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00040000:
; AVX1: # BB#0:
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
-; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_00040000:
@@ -1895,6 +1977,73 @@ define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
ret <8 x i32> %shuffle
}
+define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_uuuu1111:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_uuuu1111:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) {
+; ALL-LABEL: shuffle_v8i32_2222uuuu:
+; ALL: # BB#0:
+; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) {
+; ALL-LABEL: shuffle_v8i32_2A3Buuuu:
+; ALL: # BB#0:
+; ALL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_44444444:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_44444444:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+ ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_5555uuuu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_5555uuuu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x i32> %shuffle
+}
+
define <8 x float> @splat_mem_v8f32_2(float* %p) {
; ALL-LABEL: splat_mem_v8f32_2:
; ALL: # BB#0:
@@ -2098,3 +2247,59 @@ define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
ret <8 x i32> %shuffle
}
+define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: concat_v8i32_0123CDEF:
+; AVX1: # BB#0:
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: concat_v8i32_0123CDEF:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT: retq
+ %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %shuf
+}
+
+define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) {
+; ALL-LABEL: concat_v8i32_4567CDEF_bc:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT: retq
+ %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+ %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64>
+ %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
+ %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32>
+ ret <8 x i32> %shuffle32
+}
+
+define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) {
+; ALL-LABEL: concat_v8f32_4567CDEF_bc:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT: retq
+ %a0 = bitcast <8 x float> %f0 to <4 x i64>
+ %a1 = bitcast <8 x float> %f1 to <8 x i32>
+ %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+ %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64>
+ %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
+ %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float>
+ ret <8 x float> %shuffle32
+}
+
+define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) {
+; ALL-LABEL: insert_dup_mem_v8i32:
+; ALL: # BB#0:
+; ALL-NEXT: vbroadcastss (%rdi), %ymm0
+; ALL-NEXT: retq
+ %tmp = load i32, i32* %ptr, align 4
+ %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+ %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer
+ ret <8 x i32> %tmp2
+}
diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll
index 406d52406d95..bef54b05041b 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
@@ -12,6 +13,25 @@ define <16 x float> @shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d
ret <16 x float> %shuffle
}
+define <16 x float> @shuffle_v16f32_00_zz_01_zz_04_zz_05_zz_08_zz_09_zz_0c_zz_0d_zz(<16 x float> %a, <16 x float> %b) {
+; ALL-LABEL: shuffle_v16f32_00_zz_01_zz_04_zz_05_zz_08_zz_09_zz_0c_zz_0d_zz:
+; ALL: # BB#0:
+; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <16 x i32><i32 0, i32 16, i32 1, i32 16, i32 4, i32 16, i32 5, i32 16, i32 8, i32 16, i32 9, i32 16, i32 12, i32 16, i32 13, i32 16>
+ ret <16 x float> %shuffle
+}
+
+define <16 x float> @shuffle_v16f32_vunpcklps_swap(<16 x float> %a, <16 x float> %b) {
+; ALL-LABEL: shuffle_v16f32_vunpcklps_swap:
+; ALL: # BB#0:
+; ALL-NEXT: vunpcklps {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[12],zmm0[12],zmm1[13],zmm0[13]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 20, i32 4, i32 21, i32 5, i32 24, i32 8, i32 25, i32 9, i32 28, i32 12, i32 29, i32 13>
+ ret <16 x float> %shuffle
+}
+
define <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d:
; ALL: # BB#0:
@@ -21,6 +41,16 @@ define <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1
ret <16 x i32> %shuffle
}
+define <16 x i32> @shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d(<16 x i32> %a, <16 x i32> %b) {
+; ALL-LABEL: shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d:
+; ALL: # BB#0:
+; ALL-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; ALL-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <16 x i32> zeroinitializer, <16 x i32> %b, <16 x i32><i32 15, i32 16, i32 13, i32 17, i32 11, i32 20, i32 9, i32 21, i32 7, i32 24, i32 5, i32 25, i32 3, i32 28, i32 1, i32 29>
+ ret <16 x i32> %shuffle
+}
+
define <16 x float> @shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x float> %a, <16 x float> %b) {
; ALL-LABEL: shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f:
; ALL: # BB#0:
@@ -30,6 +60,16 @@ define <16 x float> @shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f
ret <16 x float> %shuffle
}
+define <16 x float> @shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz_1f(<16 x float> %a, <16 x float> %b) {
+; ALL-LABEL: shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz_1f:
+; ALL: # BB#0:
+; ALL-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; ALL-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <16 x float> zeroinitializer, <16 x float> %b, <16 x i32><i32 0, i32 18, i32 0, i32 19, i32 4, i32 22, i32 4, i32 23, i32 6, i32 26, i32 6, i32 27, i32 8, i32 30, i32 8, i32 31>
+ ret <16 x float> %shuffle
+}
+
define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f:
; ALL: # BB#0:
@@ -38,3 +78,97 @@ define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1
%shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
ret <16 x i32> %shuffle
}
+
+define <16 x i32> @shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz(<16 x i32> %a, <16 x i32> %b) {
+; ALL-LABEL: shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz:
+; ALL: # BB#0:
+; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT: vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32><i32 2, i32 30, i32 3, i32 28, i32 6, i32 26, i32 7, i32 24, i32 10, i32 22, i32 11, i32 20, i32 14, i32 18, i32 15, i32 16>
+ ret <16 x i32> %shuffle
+}
+
+define <16 x float> @shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x float> %a) {
+; ALL-LABEL: shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01:
+; ALL: # BB#0:
+; ALL-NEXT: vmovdqa32 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
+; ALL-NEXT: vpermps %zmm0, %zmm1, %zmm0
+; ALL-NEXT: retq
+ %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
+ ret <16 x float> %c
+}
+
+define <16 x i32> @shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x i32> %a) {
+; ALL-LABEL: shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01:
+; ALL: # BB#0:
+; ALL-NEXT: vmovdqa32 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
+; ALL-NEXT: vpermd %zmm0, %zmm1, %zmm0
+; ALL-NEXT: retq
+ %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
+ ret <16 x i32> %c
+}
+
+define <16 x i32> @shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32> %b) {
+; ALL-LABEL: shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
+; ALL: # BB#0:
+; ALL-NEXT: vmovdqa32 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
+; ALL-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
+; ALL-NEXT: retq
+ %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+ ret <16 x i32> %c
+}
+
+define <16 x float> @shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x float> %a, <16 x float> %b) {
+; ALL-LABEL: shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
+; ALL: # BB#0:
+; ALL-NEXT: vmovdqa32 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
+; ALL-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
+; ALL-NEXT: retq
+ %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+ ret <16 x float> %c
+}
+
+define <16 x float> @shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x float> %a, <16 x float>* %b) {
+; ALL-LABEL: shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
+; ALL: # BB#0:
+; ALL-NEXT: vmovdqa32 {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
+; ALL-NEXT: vpermt2ps (%rdi), %zmm1, %zmm0
+; ALL-NEXT: retq
+ %c = load <16 x float>, <16 x float>* %b
+ %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+ ret <16 x float> %d
+}
+
+define <16 x i32> @shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32>* %b) {
+; ALL-LABEL: shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
+; ALL: # BB#0:
+; ALL-NEXT: vmovdqa32 {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
+; ALL-NEXT: vpermt2d (%rdi), %zmm1, %zmm0
+; ALL-NEXT: retq
+ %c = load <16 x i32>, <16 x i32>* %b
+ %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+ ret <16 x i32> %d
+}
+
+define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b) {
+; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
+; ALL: # BB#0:
+; ALL-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,1,2,19,u,u,u,u,u,u,u,u,u,u,u,u>
+; ALL-NEXT: vpermt2d %zmm1, %zmm2, %zmm0
+; ALL-NEXT: retq
+ %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i32> %c
+}
+
+define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) {
+; ALL-LABEL: shuffle_v16f32_extract_256:
+; ALL: # BB#0:
+; ALL-NEXT: vmovups (%rsi), %zmm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: retq
+ %ptr_a = bitcast float* %a to <16 x float>*
+ %v_a = load <16 x float>, <16 x float>* %ptr_a, align 4
+ %v2 = shufflevector <16 x float> %v_a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x float> %v2
+}
diff --git a/test/CodeGen/X86/vector-shuffle-512-v32.ll b/test/CodeGen/X86/vector-shuffle-512-v32.ll
new file mode 100644
index 000000000000..ab809beb4b48
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-512-v32.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+
+target triple = "x86_64-unknown-unknown"
+
+define <32 x i16> @shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f(<32 x i16> %a) {
+; ALL-LABEL: shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f:
+; ALL: # BB#0:
+; ALL-NEXT: vmovdqu16 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1,2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,31>
+; ALL-NEXT: vpermw %zmm0, %zmm1, %zmm0
+; ALL-NEXT: retq
+ %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1, i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1, i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 31>
+ ret <32 x i16> %c
+}
+
+define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38(<32 x i16> %a, <32 x i16> %b) {
+; ALL-LABEL: shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38:
+; ALL: # BB#0:
+; ALL-NEXT: vmovdqu16 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24,15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,56]
+; ALL-NEXT: vpermt2w %zmm1, %zmm2, %zmm0
+; ALL-NEXT: retq
+ %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24, i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 56>
+ ret <32 x i16> %c
+}
+
+define <32 x i16> @shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u(<32 x i16> %a, <32 x i16> %b) {
+; ALL-LABEL: shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u:
+; ALL: # BB#0:
+; ALL-NEXT: vmovdqu16 {{.*#+}} zmm2 = <0,32,1,33,2,34,3,35,8,40,9,41,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
+; ALL-NEXT: vpermt2w %zmm1, %zmm2, %zmm0
+; ALL-NEXT: retq
+ %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <32 x i16> %c
+}
+
+define <32 x i16> @shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u(<32 x i16> %a, <32 x i16> %b) {
+; ALL-LABEL: shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u:
+; ALL: # BB#0:
+; ALL-NEXT: vmovdqu16 {{.*#+}} zmm2 = <4,36,5,37,6,38,7,39,12,44,13,45,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
+; ALL-NEXT: vpermt2w %zmm1, %zmm2, %zmm0
+; ALL-NEXT: retq
+ %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <32 x i16> %c
+}
diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll
index 62d4af7809b6..631968f6afa2 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -1,1452 +1,2209 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
target triple = "x86_64-unknown-unknown"
define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00000000:
-; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd %xmm0, %zmm0
-; ALL-NEXT: retq
+; AVX512F-LABEL: shuffle_v8f64_00000000:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00000000:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00000010:
-; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+; AVX512F-LABEL: shuffle_v8f64_00000010:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00000010:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00000200:
-; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+; AVX512F-LABEL: shuffle_v8f64_00000200:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00000200:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00003000:
-; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+; AVX512F-LABEL: shuffle_v8f64_00003000:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00003000:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00040000:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+; AVX512F-LABEL: shuffle_v8f64_00040000:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00040000:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00500000:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,0]
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+; AVX512F-LABEL: shuffle_v8f64_00500000:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00500000:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_06000000:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,0,0]
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+; AVX512F-LABEL: shuffle_v8f64_06000000:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_06000000:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_70000000:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,0,0,0]
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+; AVX512F-LABEL: shuffle_v8f64_70000000:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT: movl $7, %eax
+; AVX512F-NEXT: vpinsrq $0, %rax, %xmm1, %xmm2
+; AVX512F-NEXT: vinserti32x4 $0, %xmm2, %zmm1, %zmm1
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_70000000:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-32-NEXT: movl $7, %eax
+; AVX512F-32-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
+; AVX512F-32-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; AVX512F-32-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_01014545:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT: retq
+; AVX512F-LABEL: shuffle_v8f64_01014545:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_01014545:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00112233:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,1,1]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,3,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+; AVX512F-LABEL: shuffle_v8f64_00112233:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00112233:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00001111:
-; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+; AVX512F-LABEL: shuffle_v8f64_00001111:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00001111:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_81a3c5e7:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_81a3c5e7:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,9,2,11,4,13,6,15]
+; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_81a3c5e7:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,2,0,11,0,4,0,13,0,6,0,15,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_08080808:
-; ALL: # BB#0:
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_08080808:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
+; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_08080808:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_08084c4c:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vbroadcastsd %xmm3, %ymm3
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3]
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_08084c4c:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,4,12,4,12]
+; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_08084c4c:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,4,0,12,0,4,0,12,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_8823cc67:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vbroadcastsd %xmm3, %ymm3
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3]
-; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_8823cc67:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
+; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_8823cc67:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_9832dc76(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_9832dc76:
-; ALL: # BB#0:
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm1[0,1],ymm0[2,3]
-; ALL-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_9832dc76:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
+; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_9832dc76:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_9810dc54(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_9810dc54:
-; ALL: # BB#0:
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm2
-; ALL-NEXT: vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_9810dc54:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
+; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_9810dc54:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_08194c5d:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,0,2,1]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
-; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,1,1,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_08194c5d:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,4,12,5,13]
+; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_08194c5d:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,4,0,12,0,5,0,13,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_2a3b6e7f(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_2a3b6e7f:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
-; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[2,1,3,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_2a3b6e7f:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,10,3,11,6,14,7,15]
+; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_2a3b6e7f:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,0,10,0,3,0,11,0,6,0,14,0,7,0,15,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_08192a3b:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm1[0,2,2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_08192a3b:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,2,10,3,11]
+; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_08192a3b:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,2,0,10,0,3,0,11,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_08991abb:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm1[0,0,1,1]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm0[0],ymm2[1,2,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_08991abb:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
+; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_08991abb:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_091b2d3f:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_091b2d3f:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,9,1,11,2,13,3,15]
+; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_091b2d3f:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,1,0,11,0,2,0,13,0,3,0,15,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_09ab1def:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermilpd {{.*#+}} ymm3 = ymm0[1,0,2,2]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_09ab1def:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
+; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_09ab1def:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00014445:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00014445:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,4,4,4,5]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00014445:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,4,0,4,0,4,0,5,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00204464:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00204464:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,4,6,4]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00204464:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,4,0,6,0,4,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_03004744:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_03004744:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,7,4,4]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_03004744:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,7,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_10005444:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_10005444:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,5,4,4,4]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_10005444:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,5,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_22006644:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_22006644:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,6,4,4]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_22006644:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_33307774:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_33307774:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,7,7,4]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_33307774:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,7,0,7,0,4,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_32107654:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_32107654:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,7,6,5,4]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_32107654:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00234467:
-; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00234467:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,4,4,6,7]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00234467:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,4,0,4,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00224466:
-; ALL: # BB#0:
-; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00224466:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,4,4,6,6]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00224466:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_10325476(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_10325476:
-; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_10325476:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,5,4,7,6]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_10325476:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,5,0,4,0,7,0,6,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_11335577(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_11335577:
-; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_11335577:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,5,5,7,7]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_11335577:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,5,0,5,0,7,0,7,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_10235467(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_10235467:
-; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_10235467:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,2,3,5,4,6,7]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_10235467:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,3,0,5,0,4,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_10225466(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_10225466:
-; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_10225466:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,2,2,5,4,6,6]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_10225466:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,2,0,5,0,4,0,6,0,6,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00015444:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00015444:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00015444:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00204644:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00204644:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00204644:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_03004474:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,3,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_03004474:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_03004474:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_10004444:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_10004444:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_10004444:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_22006446:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,0,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_22006446:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_22006446:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_33307474:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,3,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_33307474:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_33307474:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_32104567:
-; ALL: # BB#0:
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_32104567:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_32104567:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00236744:
-; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00236744:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00236744:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00226644:
-; ALL: # BB#0:
-; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00226644:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00226644:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_10324567(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_10324567:
-; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_10324567:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,4,5,6,7]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_10324567:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_11334567:
-; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_11334567:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,4,5,6,7]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_11334567:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_01235467:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_01235467:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,7]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_01235467:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_01235466:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_01235466:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,6]
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_01235466:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,6,0]
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_002u6u44:
-; ALL: # BB#0:
-; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,0,0]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_002u6u44:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_002u6u44:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00uu66uu:
-; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00uu66uu:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00uu66uu:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_103245uu(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_103245uu:
-; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_103245uu:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,0,3,2,4,5,u,u>
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_103245uu:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,0,0,0,3,0,2,0,4,0,5,0,u,u,u,u>
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_1133uu67:
-; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_1133uu67:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,1,3,3,u,u,6,7>
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_1133uu67:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,0,1,0,3,0,3,0,u,u,u,u,6,0,7,0>
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_0uu354uu:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_0uu354uu:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,u,u,3,5,4,u,u>
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_0uu354uu:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,u,u,3,0,5,0,4,0,u,u,u,u>
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_uuu3uu66:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
-; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_uuu3uu66:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <u,u,u,3,u,u,6,6>
+; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_uuu3uu66:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <u,u,u,u,u,u,3,0,u,u,u,u,6,0,6,0>
+; AVX512F-32-NEXT: vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_c348cda0:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm2[0,1]
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vbroadcastsd %xmm1, %ymm4
-; ALL-NEXT: vblendpd {{.*#+}} ymm4 = ymm3[0,1,2],ymm4[3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm4[0],ymm2[1,2],ymm4[3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2],ymm3[3]
-; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_c348cda0:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,11,12,0,4,5,2,8]
+; AVX512F-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_c348cda0:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,0,11,0,12,0,0,0,4,0,5,0,2,0,8,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
ret <8 x double> %shuffle
}
define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_f511235a:
-; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vblendpd {{.*#+}} ymm3 = ymm0[0],ymm2[1],ymm0[2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[2,3,1,3]
-; ALL-NEXT: vmovddup {{.*#+}} ymm4 = ymm1[0,0,2,2]
-; ALL-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1,2],ymm4[3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_f511235a:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,5,1,1,2,3,5,10]
+; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_f511235a:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,0,5,0,1,0,1,0,2,0,3,0,5,0,10,0]
+; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
ret <8 x double> %shuffle
}
define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00000000:
-; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq %xmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00000000:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00000000:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00000010:
-; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00000010:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00000010:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00000200:
-; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00000200:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00000200:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00003000:
-; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00003000:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00003000:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00040000:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00040000:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00040000:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00500000:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,0]
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00500000:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00500000:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_06000000:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,0,0]
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_06000000:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_06000000:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_70000000:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,0,0,0]
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_70000000:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT: movl $7, %eax
+; AVX512F-NEXT: vpinsrq $0, %rax, %xmm1, %xmm2
+; AVX512F-NEXT: vinserti32x4 $0, %xmm2, %zmm1, %zmm1
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_70000000:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-32-NEXT: movl $7, %eax
+; AVX512F-32-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
+; AVX512F-32-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; AVX512F-32-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_01014545:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
-; ALL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT: retq
+; AVX512F-LABEL: shuffle_v8i64_01014545:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_01014545:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
+; AVX512F-32-NEXT: retl
+
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00112233:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,1,1]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,3,3]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00112233:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00112233:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00001111:
-; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,1,1]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00001111:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00001111:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_81a3c5e7:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_81a3c5e7:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,9,2,11,4,13,6,15]
+; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_81a3c5e7:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,2,0,11,0,4,0,13,0,6,0,15,0]
+; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_08080808:
-; ALL: # BB#0:
-; ALL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_08080808:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
+; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_08080808:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
+; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_08084c4c:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vinserti128 $1, %xmm2, %ymm2, %ymm2
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vpbroadcastq %xmm3, %ymm3
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3],ymm2[4,5],ymm3[6,7]
-; ALL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_08084c4c:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,4,12,4,12]
+; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_08084c4c:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,4,0,12,0,4,0,12,0]
+; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_8823cc67:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
-; ALL-NEXT: vpbroadcastq %xmm3, %ymm3
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
-; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_8823cc67:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
+; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_8823cc67:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
+; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_9832dc76(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_9832dc76:
-; ALL: # BB#0:
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; ALL-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_9832dc76:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
+; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_9832dc76:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
+; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_9810dc54(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_9810dc54:
-; ALL: # BB#0:
-; ALL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm2
-; ALL-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_9810dc54:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
+; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_9810dc54:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
+; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_08194c5d:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,0,2,1]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; ALL-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,1,1,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_08194c5d:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,4,12,5,13]
+; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_08194c5d:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,4,0,12,0,5,0,13,0]
+; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_2a3b6e7f(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_2a3b6e7f:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; ALL-NEXT: vpermq {{.*#+}} ymm3 = ymm3[2,1,3,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_2a3b6e7f:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,10,3,11,6,14,7,15]
+; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_2a3b6e7f:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,0,10,0,3,0,11,0,6,0,14,0,7,0,15,0]
+; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_08192a3b(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_08192a3b:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm1[0,2,2,3]
-; ALL-NEXT: vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_08192a3b:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,2,10,3,11]
+; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_08192a3b:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,2,0,10,0,3,0,11,0]
+; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_08991abb:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm1[0,0,1,1]
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm0[0,1],ymm2[2,3,4,5,6,7]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,3]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_08991abb:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
+; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_08991abb:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
+; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_091b2d3f:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_091b2d3f:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,9,1,11,2,13,3,15]
+; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_091b2d3f:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,1,0,11,0,2,0,13,0,3,0,15,0]
+; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_09ab1def(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_09ab1def:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3,4,5,6,7]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_09ab1def:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
+; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_09ab1def:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00014445:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00014445:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,4,4,4,5]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00014445:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,4,0,4,0,4,0,5,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00204464:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00204464:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,4,6,4]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00204464:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,4,0,6,0,4,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_03004744:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_03004744:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,7,4,4]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_03004744:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,7,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_10005444:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_10005444:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,5,4,4,4]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_10005444:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,5,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_22006644:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_22006644:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,6,4,4]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_22006644:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_33307774:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_33307774:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,7,7,4]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_33307774:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,7,0,7,0,4,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_32107654:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_32107654:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,7,6,5,4]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_32107654:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00234467:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00234467:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,4,4,6,7]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00234467:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,4,0,4,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00224466(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00224466:
-; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00224466:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,4,4,6,6]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00224466:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_10325476(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_10325476:
-; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_10325476:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,5,4,7,6]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_10325476:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,5,0,4,0,7,0,6,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_11335577(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_11335577:
-; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_11335577:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,5,5,7,7]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_11335577:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,5,0,5,0,7,0,7,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_10235467:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,0,2,3]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,3]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_10235467:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,2,3,5,4,6,7]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_10235467:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,3,0,5,0,4,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_10225466:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,0,2,2]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,2]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_10225466:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,2,2,5,4,6,6]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_10225466:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,2,0,5,0,4,0,6,0,6,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00015444(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00015444:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00015444:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00015444:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00204644(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00204644:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00204644:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00204644:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_03004474(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_03004474:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,3,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_03004474:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_03004474:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_10004444:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_10004444:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_10004444:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_22006446(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_22006446:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,0,0,2]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_22006446:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_22006446:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_33307474(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_33307474:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,3,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_33307474:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_33307474:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_32104567(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_32104567:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_32104567:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_32104567:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00236744(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00236744:
-; ALL: # BB#0:
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00236744:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00236744:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00226644(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00226644:
-; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00226644:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00226644:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_10324567(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_10324567:
-; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_10324567:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,4,5,6,7]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_10324567:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_11334567:
-; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_11334567:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,4,5,6,7]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_11334567:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_01235467:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,0,2,3]
-; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_01235467:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,7]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_01235467:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,7,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_01235466:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,0,2,2]
-; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_01235466:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,6]
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_01235466:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,6,0]
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_002u6u44:
-; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,0,0]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_002u6u44:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_002u6u44:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00uu66uu:
-; ALL: # BB#0:
-; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00uu66uu:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00uu66uu:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_103245uu:
-; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_103245uu:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,0,3,2,4,5,u,u>
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_103245uu:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,0,0,0,3,0,2,0,4,0,5,0,u,u,u,u>
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_1133uu67:
-; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_1133uu67:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,1,3,3,u,u,6,7>
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_1133uu67:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <1,0,1,0,3,0,3,0,u,u,u,u,6,0,7,0>
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_0uu354uu:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_0uu354uu:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,u,u,3,5,4,u,u>
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_0uu354uu:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,u,u,3,0,5,0,4,0,u,u,u,u>
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_uuu3uu66:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_uuu3uu66:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = <u,u,u,3,u,u,6,6>
+; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_uuu3uu66:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm1 = <u,u,u,u,u,u,3,0,u,u,u,u,6,0,6,0>
+; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
ret <8 x i64> %shuffle
}
define <8 x i64> @shuffle_v8i64_6caa87e5(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_6caa87e5:
-; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT: vpblendd {{.*#+}} ymm3 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6,7]
-; ALL-NEXT: vpblendd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2,3],ymm3[4,5],ymm0[6,7]
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
-; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_6caa87e5:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,4,2,2,0,15,6,13]
+; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_6caa87e5:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,0,4,0,2,0,2,0,0,0,15,0,6,0,13,0]
+; AVX512F-32-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
ret <8 x i64> %shuffle
}
define <8 x double> @shuffle_v8f64_082a4c6e(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_082a4c6e:
-; ALL: # BB#0:
-; ALL-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_082a4c6e:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_082a4c6e:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
ret <8 x double> %shuffle
}
+define <8 x double> @shuffle_v8f64_0z2z4z6z(<8 x double> %a, <8 x double> %b) {
+;
+; AVX512F-LABEL: shuffle_v8f64_0z2z4z6z:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_0z2z4z6z:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-32-NEXT: retl
+ %shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32><i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6, i32 8>
+ ret <8 x double> %shuffle
+}
+
define <8 x i64> @shuffle_v8i64_082a4c6e(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_082a4c6e:
-; ALL: # BB#0:
-; ALL-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_082a4c6e:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_082a4c6e:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
ret <8 x i64> %shuffle
}
+define <8 x i64> @shuffle_v8i64_z8zazcze(<8 x i64> %a, <8 x i64> %b) {
+;
+; AVX512F-LABEL: shuffle_v8i64_z8zazcze:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_z8zazcze:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; AVX512F-32-NEXT: vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-32-NEXT: retl
+ %shuffle = shufflevector <8 x i64> zeroinitializer, <8 x i64> %b, <8 x i32><i32 7, i32 8, i32 5, i32 10, i32 3, i32 12, i32 1, i32 14>
+ ret <8 x i64> %shuffle
+}
+
define <8 x double> @shuffle_v8f64_193b5d7f(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_193b5d7f:
-; ALL: # BB#0:
-; ALL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8f64_193b5d7f:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_193b5d7f:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
ret <8 x double> %shuffle
}
+define <8 x double> @shuffle_v8f64_z9zbzdzf(<8 x double> %a, <8 x double> %b) {
+;
+; AVX512F-LABEL: shuffle_v8f64_z9zbzdzf:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_z9zbzdzf:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; AVX512F-32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-32-NEXT: retl
+ %shuffle = shufflevector <8 x double> zeroinitializer, <8 x double> %b, <8 x i32><i32 0, i32 9, i32 0, i32 11, i32 0, i32 13, i32 0, i32 15>
+ ret <8 x double> %shuffle
+}
+
define <8 x i64> @shuffle_v8i64_193b5d7f(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_193b5d7f:
-; ALL: # BB#0:
-; ALL-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
-; ALL-NEXT: retq
+;
+; AVX512F-LABEL: shuffle_v8i64_193b5d7f:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_193b5d7f:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
ret <8 x i64> %shuffle
}
+
+define <8 x i64> @shuffle_v8i64_1z3z5z7z(<8 x i64> %a, <8 x i64> %b) {
+;
+; AVX512F-LABEL: shuffle_v8i64_1z3z5z7z:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_1z3z5z7z:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-32-NEXT: vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-32-NEXT: retl
+ %shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 15, i32 5, i32 8, i32 7, i32 15>
+ ret <8 x i64> %shuffle
+}
+
+define <8 x double> @test_vshuff64x2_512(<8 x double> %x, <8 x double> %x1) nounwind {
+; AVX512F-LABEL: test_vshuff64x2_512:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: test_vshuff64x2_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
+; AVX512F-32-NEXT: retl
+ %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1, <8 x i1> %mask) nounwind {
+; AVX512F-LABEL: test_vshuff64x2_512_maskz:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
+; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm2
+; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k1
+; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: test_vshuff64x2_512_maskz:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2
+; AVX512F-32-NEXT: vpsllvq .LCPI122_0, %zmm2, %zmm2
+; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1
+; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
+; AVX512F-32-NEXT: retl
+ %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
+ %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
+ ret <8 x double> %res
+}
+
+define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> %mask) nounwind {
+; AVX512F-LABEL: test_vshufi64x2_512_mask:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpmovsxwq %xmm2, %zmm2
+; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm2
+; AVX512F-NEXT: vptestmq %zmm2, %zmm2, %k1
+; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: test_vshufi64x2_512_mask:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2
+; AVX512F-32-NEXT: vpsllvq .LCPI123_0, %zmm2, %zmm2
+; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1
+; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
+; AVX512F-32-NEXT: retl
+ %y = shufflevector <8 x i64> %x, <8 x i64> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
+ %res = select <8 x i1> %mask, <8 x i64> %y, <8 x i64> %x
+ ret <8 x i64> %res
+}
+
+define <8 x double> @test_vshuff64x2_512_mem(<8 x double> %x, <8 x double> *%ptr) nounwind {
+; AVX512F-LABEL: test_vshuff64x2_512_mem:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: test_vshuff64x2_512_mem:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
+; AVX512F-32-NEXT: retl
+ %x1 = load <8 x double>,<8 x double> *%ptr,align 1
+ %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
+; AVX512F-LABEL: test_vshuff64x2_512_mem_mask:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
+; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1
+; AVX512F-32-NEXT: vpsllvq .LCPI125_0, %zmm1, %zmm1
+; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
+; AVX512F-32-NEXT: retl
+ %x1 = load <8 x double>,<8 x double> *%ptr,align 1
+ %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
+ %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> %x
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
+; AVX512F-LABEL: test_vshuff64x2_512_mem_maskz:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpmovsxwq %xmm1, %zmm1
+; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1
+; AVX512F-32-NEXT: vpsllvq .LCPI126_0, %zmm1, %zmm1
+; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
+; AVX512F-32-NEXT: retl
+ %x1 = load <8 x double>,<8 x double> *%ptr,align 1
+ %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
+ %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
+ ret <8 x double> %res
+}
+
+define <16 x float> @test_vshuff32x4_512(<16 x float> %x, <16 x float> %x1) nounwind {
+; AVX512F-LABEL: test_vshuff32x4_512:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
+; AVX512F-NEXT: retq
+;
+; AVX512F-32-LABEL: test_vshuff32x4_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
+; AVX512F-32-NEXT: retl
+ %res = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19>
+ ret <16 x float> %res
+}
diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll
index 92c59e2fca08..75ce9753525b 100644
--- a/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
; RUN: llc < %s -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
; RUN: llc < %s -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
diff --git a/test/CodeGen/X86/vector-shuffle-mmx.ll b/test/CodeGen/X86/vector-shuffle-mmx.ll
index dbccd2694b07..37f9ea98949f 100644
--- a/test/CodeGen/X86/vector-shuffle-mmx.ll
+++ b/test/CodeGen/X86/vector-shuffle-mmx.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-darwin -mattr=+mmx,+sse2 | FileCheck --check-prefix=X32 %s
; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse2 | FileCheck --check-prefix=X64 %s
diff --git a/test/CodeGen/X86/vector-shuffle-sse1.ll b/test/CodeGen/X86/vector-shuffle-sse1.ll
index 66e53bbb7502..548de4ce6ea3 100644
--- a/test/CodeGen/X86/vector-shuffle-sse1.ll
+++ b/test/CodeGen/X86/vector-shuffle-sse1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mcpu=x86-64 -mattr=-sse2 | FileCheck %s --check-prefix=SSE1
target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/vector-shuffle-sse4a.ll b/test/CodeGen/X86/vector-shuffle-sse4a.ll
index 26062335cc16..eec915d91bbb 100644
--- a/test/CodeGen/X86/vector-shuffle-sse4a.ll
+++ b/test/CodeGen/X86/vector-shuffle-sse4a.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER2
@@ -5,6 +6,35 @@
; EXTRQI
;
+; A length of zero is equivalent to a bit length of 64.
+define <2 x i64> @extrqi_len0_idx0(<2 x i64> %a) {
+; ALL-LABEL: extrqi_len0_idx0:
+; ALL: # BB#0:
+; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
+; ALL-NEXT: retq
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 0, i8 0)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @extrqi_len8_idx16(<2 x i64> %a) {
+; ALL-LABEL: extrqi_len8_idx16:
+; ALL: # BB#0:
+; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; ALL-NEXT: retq
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 8, i8 16)
+ ret <2 x i64> %1
+}
+
+; If the length + index exceeds the bottom 64 bits the result is undefined.
+define <2 x i64> @extrqi_len32_idx48(<2 x i64> %a) {
+; ALL-LABEL: extrqi_len32_idx48:
+; ALL: # BB#0:
+; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; ALL-NEXT: retq
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 32, i8 48)
+ ret <2 x i64> %1
+}
+
define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(<16 x i8> %a0) {
; BTVER1-LABEL: shuf_0zzzuuuuuuuuuuuu:
; BTVER1: # BB#0:
@@ -36,6 +66,24 @@ define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) {
ret <16 x i8> %s
}
+define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) {
+; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz:
+; BTVER1: # BB#0:
+; BTVER1-NEXT: movaps %xmm0, %xmm1
+; BTVER1-NEXT: extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
+; BTVER1-NEXT: extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; BTVER1-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; BTVER1-NEXT: retq
+;
+; BTVER2-LABEL: shuf_2zzzzzzz3zzzzzzz:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsrld $16, %xmm0, %xmm0
+; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; BTVER2-NEXT: retq
+ %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ ret <16 x i8> %s
+}
+
define <16 x i8> @shuf_01zzuuuuuuuuuuuu(<16 x i8> %a0) {
; BTVER1-LABEL: shuf_01zzuuuuuuuuuuuu:
; BTVER1: # BB#0:
@@ -139,6 +187,35 @@ define <4 x i32> @shuf_0z1z(<4 x i32> %a0) {
; INSERTQI
;
+; A length of zero is equivalent to a bit length of 64.
+define <2 x i64> @insertqi_len0_idx0(<2 x i64> %a, <2 x i64> %b) {
+; ALL-LABEL: insertqi_len0_idx0:
+; ALL: # BB#0:
+; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6,7],xmm0[u,u,u,u,u,u,u,u]
+; ALL-NEXT: retq
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 0, i8 0)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @insertqi_len8_idx16(<2 x i64> %a, <2 x i64> %b) {
+; ALL-LABEL: insertqi_len8_idx16:
+; ALL: # BB#0:
+; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
+; ALL-NEXT: retq
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 8, i8 16)
+ ret <2 x i64> %1
+}
+
+; If the length + index exceeds the bottom 64 bits the result is undefined
+define <2 x i64> @insertqi_len32_idx48(<2 x i64> %a, <2 x i64> %b) {
+; ALL-LABEL: insertqi_len32_idx48:
+; ALL: # BB#0:
+; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; ALL-NEXT: retq
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 32, i8 48)
+ ret <2 x i64> %1
+}
+
define <16 x i8> @shuf_0_0_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
; ALL-LABEL: shuf_0_0_2_3_uuuu_uuuu_uuuu:
; ALL: # BB#0:
@@ -219,3 +296,66 @@ define <8 x i16> @shuf_089uuuuu(<8 x i16> %a0, <8 x i16> %a1) {
%s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %s
}
+
+;
+; Special Cases
+;
+
+; Out of range.
+define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) {
+; BTVER1-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
+; BTVER1: # BB#0:
+; BTVER1-NEXT: psrld $16, %xmm1
+; BTVER1-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; BTVER1-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; BTVER1-NEXT: retq
+;
+; BTVER2-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpsrld $16, %xmm1, %xmm1
+; BTVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; BTVER2-NEXT: retq
+ %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
+; BTVER1-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; BTVER1: # BB#0:
+; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7]
+; BTVER1-NEXT: retq
+;
+; BTVER2-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7]
+; BTVER2-NEXT: retq
+ %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 0, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
+; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; BTVER1: # BB#0:
+; BTVER1-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
+; BTVER1-NEXT: retq
+;
+; BTVER2-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
+; BTVER2-NEXT: retq
+ %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 16, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
+; ALL-LABEL: shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; ALL: # BB#0:
+; ALL-NEXT: extrq {{.*#+}} xmm0 = xmm0[2,3,4],zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; ALL-NEXT: retq
+ %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 undef, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %1
+}
+
+declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
diff --git a/test/CodeGen/X86/vector-shuffle-v1.ll b/test/CodeGen/X86/vector-shuffle-v1.ll
new file mode 100644
index 000000000000..a387f894a067
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-v1.ll
@@ -0,0 +1,439 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512dq| FileCheck %s --check-prefix=VL_BW_DQ
+
+target triple = "x86_64-unknown-unknown"
+
+define <2 x i1> @shuf2i1_1_0(<2 x i1> %a) {
+; AVX512F-LABEL: shuf2i1_1_0:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf2i1_1_0:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0
+; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0
+; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0
+; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0
+; VL_BW_DQ-NEXT: retq
+ %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 0>
+ ret <2 x i1> %b
+}
+
+define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) {
+; AVX512F-LABEL: shuf2i1_1_2:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: movl $1, %eax
+; AVX512F-NEXT: vmovq %rax, %xmm1
+; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf2i1_1_2:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0
+; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0
+; VL_BW_DQ-NEXT: movb $1, %al
+; VL_BW_DQ-NEXT: kmovb %eax, %k0
+; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm1
+; VL_BW_DQ-NEXT: vpalignr $8, %xmm0, %xmm1, %xmm0
+; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0
+; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0
+; VL_BW_DQ-NEXT: retq
+ %b = shufflevector <2 x i1> %a, <2 x i1> <i1 1, i1 0>, <2 x i32> <i32 1, i32 2>
+ ret <2 x i1> %b
+}
+
+
+define <4 x i1> @shuf4i1_3_2_10(<4 x i1> %a) {
+; AVX512F-LABEL: shuf4i1_3_2_10:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf4i1_3_2_10:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: vpslld $31, %xmm0, %xmm0
+; VL_BW_DQ-NEXT: vpmovd2m %xmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2d %k0, %xmm0
+; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; VL_BW_DQ-NEXT: vpslld $31, %xmm0, %xmm0
+; VL_BW_DQ-NEXT: vpmovd2m %xmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2d %k0, %xmm0
+; VL_BW_DQ-NEXT: retq
+ %b = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i1> %b
+}
+
+define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0(<8 x i64> %a, <8 x i64> %b, <8 x i64> %a1, <8 x i64> %b1) {
+; AVX512F-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
+; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0
+; AVX512F-NEXT: vmovdqu64 %zmm0, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,1,0,3,7,7,0]
+; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1
+; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vmovdqu64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
+; VL_BW_DQ-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
+; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2w %k0, %xmm0
+; VL_BW_DQ-NEXT: retq
+ %a2 = icmp eq <8 x i64> %a, %a1
+ %b2 = icmp eq <8 x i64> %b, %b1
+ %c = shufflevector <8 x i1> %a2, <8 x i1> %b2, <8 x i32> <i32 3, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
+ ret <8 x i1> %c
+}
+
+define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <16 x i32> %b, <16 x i32> %a1, <16 x i32> %b1) {
+; AVX512F-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
+; AVX512F-NEXT: vpcmpeqd %zmm3, %zmm1, %k2
+; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0
+; AVX512F-NEXT: vmovdqu32 %zmm0, %zmm1 {%k2} {z}
+; AVX512F-NEXT: vmovdqu32 %zmm0, %zmm2 {%k1} {z}
+; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512F-NEXT: vpermt2d %zmm1, %zmm3, %zmm2
+; AVX512F-NEXT: vpslld $31, %zmm2, %zmm1
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vmovdqu32 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: vpcmpeqd %zmm2, %zmm0, %k0
+; VL_BW_DQ-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
+; VL_BW_DQ-NEXT: vpmovm2d %k1, %zmm0
+; VL_BW_DQ-NEXT: vpmovm2d %k0, %zmm1
+; VL_BW_DQ-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; VL_BW_DQ-NEXT: vpermt2d %zmm0, %zmm2, %zmm1
+; VL_BW_DQ-NEXT: vpslld $31, %zmm1, %zmm0
+; VL_BW_DQ-NEXT: vpmovd2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2b %k0, %xmm0
+; VL_BW_DQ-NEXT: retq
+ %a2 = icmp eq <16 x i32> %a, %a1
+ %b2 = icmp eq <16 x i32> %b, %b1
+ %c = shufflevector <16 x i1> %a2, <16 x i1> %b2, <16 x i32> <i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0>
+ ret <16 x i1> %c
+}
+
+define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<32 x i1> %a) {
+; AVX512F-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
+; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
+; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: vpsllw $7, %ymm0, %ymm0
+; VL_BW_DQ-NEXT: vpmovb2m %ymm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2w %k0, %zmm0
+; VL_BW_DQ-NEXT: vmovdqu16 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; VL_BW_DQ-NEXT: vpermw %zmm0, %zmm1, %zmm0
+; VL_BW_DQ-NEXT: vpsllw $15, %zmm0, %zmm0
+; VL_BW_DQ-NEXT: vpmovw2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2b %k0, %ymm0
+; VL_BW_DQ-NEXT: retq
+ %b = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0, i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0>
+ ret <32 x i1> %b
+}
+
+define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u(i8 %a) {
+; AVX512F-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: movzbl %dil, %eax
+; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0
+; AVX512F-NEXT: vmovdqu64 %zmm0, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm2
+; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1
+; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT: vmovdqu64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: kmovb %edi, %k0
+; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT: vpbroadcastq {{.*}}(%rip), %zmm1
+; VL_BW_DQ-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
+; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2w %k0, %xmm0
+; VL_BW_DQ-NEXT: retq
+ %b = bitcast i8 %a to <8 x i1>
+ %c = shufflevector < 8 x i1> %b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef>
+ ret <8 x i1> %c
+}
+
+define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) {
+; AVX512F-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: movzbl %dil, %eax
+; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
+; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: kmovb %edi, %k0
+; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
+; VL_BW_DQ-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
+; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: kmovb %k0, %eax
+; VL_BW_DQ-NEXT: retq
+ %b = bitcast i8 %a to <8 x i1>
+ %c = shufflevector < 8 x i1> %b, <8 x i1> zeroinitializer, <8 x i32> <i32 10, i32 2, i32 9, i32 undef, i32 3, i32 undef, i32 2, i32 undef>
+ %d = bitcast <8 x i1> %c to i8
+ ret i8 %d
+}
+
+define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) {
+; AVX512F-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: movzbl %dil, %eax
+; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vshufi64x2 $8, %zmm0, %zmm0, %zmm0 # zmm0 = zmm0[0,1,4,5,0,1,0,1]
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: kmovb %edi, %k0
+; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT: vshufi64x2 $8, %zmm0, %zmm0, %zmm0 # zmm0 = zmm0[0,1,4,5,0,1,0,1]
+; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
+; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: kmovb %k0, %eax
+; VL_BW_DQ-NEXT: retq
+ %b = bitcast i8 %a to <8 x i1>
+ %c = shufflevector < 8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
+ %d = bitcast <8 x i1> %c to i8
+ ret i8 %d
+}
+
+define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) {
+; AVX512F-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: movzbl %dil, %eax
+; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
+; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: kmovb %edi, %k0
+; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
+; VL_BW_DQ-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
+; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: kmovb %k0, %eax
+; VL_BW_DQ-NEXT: retq
+ %b = bitcast i8 %a to <8 x i1>
+ %c = shufflevector <8 x i1> %b, <8 x i1> zeroinitializer, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
+ %d = bitcast <8 x i1>%c to i8
+ ret i8 %d
+}
+
+define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) {
+; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: movzbl %dil, %eax
+; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
+; AVX512F-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: kmovb %edi, %k0
+; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
+; VL_BW_DQ-NEXT: vpxord %zmm2, %zmm2, %zmm2
+; VL_BW_DQ-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
+; VL_BW_DQ-NEXT: vpsllq $63, %zmm2, %zmm0
+; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: kmovb %k0, %eax
+; VL_BW_DQ-NEXT: retq
+ %b = bitcast i8 %a to <8 x i1>
+ %c = shufflevector <8 x i1> zeroinitializer, <8 x i1> %b, <8 x i32> <i32 9, i32 6, i32 1, i32 10, i32 3, i32 7, i32 7, i32 0>
+ %d = bitcast <8 x i1>%c to i8
+ ret i8 %d
+}
+
+define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
+; AVX512F-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: movzbl %dil, %eax
+; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: movb $51, %al
+; AVX512F-NEXT: movzbl %al, %eax
+; AVX512F-NEXT: kmovw %eax, %k2
+; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0
+; AVX512F-NEXT: vmovdqu64 %zmm0, %zmm1 {%k2} {z}
+; AVX512F-NEXT: vmovdqu64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
+; AVX512F-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: kmovb %edi, %k0
+; VL_BW_DQ-NEXT: movb $51, %al
+; VL_BW_DQ-NEXT: kmovb %eax, %k1
+; VL_BW_DQ-NEXT: vpmovm2q %k1, %zmm0
+; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm1
+; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
+; VL_BW_DQ-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
+; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: kmovb %k0, %eax
+; VL_BW_DQ-NEXT: retq
+ %b = bitcast i8 %a to <8 x i1>
+ %c = shufflevector <8 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <8 x i1> %b, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 1>
+ %c1 = bitcast <8 x i1>%c to i8
+ ret i8 %c1
+}
+
+define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
+; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
+; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2
+; AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
+; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: vpsllw $15, %xmm0, %xmm0
+; VL_BW_DQ-NEXT: vpmovw2m %xmm0, %k0
+; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
+; VL_BW_DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm2
+; VL_BW_DQ-NEXT: vpermt2q %zmm0, %zmm1, %zmm2
+; VL_BW_DQ-NEXT: vpsllq $63, %zmm2, %zmm0
+; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT: kmovb %k0, %eax
+; VL_BW_DQ-NEXT: retq
+ %c = shufflevector <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1> %a, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
+ %c1 = bitcast <8 x i1>%c to i8
+ ret i8 %c1
+}
+
+
+define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
+; AVX512F-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: kmovw %edi, %k1
+; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpbroadcastd %xmm0, %zmm0
+; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: kmovw %edi, %k0
+; VL_BW_DQ-NEXT: vpmovm2d %k0, %zmm0
+; VL_BW_DQ-NEXT: vpbroadcastd %xmm0, %zmm0
+; VL_BW_DQ-NEXT: vpslld $31, %zmm0, %zmm0
+; VL_BW_DQ-NEXT: vpmovd2m %zmm0, %k0
+; VL_BW_DQ-NEXT: kmovw %k0, %eax
+; VL_BW_DQ-NEXT: retq
+ %b = bitcast i16 %a to <16 x i1>
+ %c = shufflevector < 16 x i1> %b, <16 x i1> undef, <16 x i32> zeroinitializer
+ %d = bitcast <16 x i1> %c to i16
+ ret i16 %d
+}
+
+define i64 @shuf64i1_zero(i64 %a) {
+; AVX512F-LABEL: shuf64i1_zero:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: pushq %rbp
+; AVX512F-NEXT: .Ltmp0:
+; AVX512F-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-NEXT: .Ltmp1:
+; AVX512F-NEXT: .cfi_offset %rbp, -16
+; AVX512F-NEXT: movq %rsp, %rbp
+; AVX512F-NEXT: .Ltmp2:
+; AVX512F-NEXT: .cfi_def_cfa_register %rbp
+; AVX512F-NEXT: andq $-32, %rsp
+; AVX512F-NEXT: subq $32, %rsp
+; AVX512F-NEXT: movb $0, (%rsp)
+; AVX512F-NEXT: movl (%rsp), %ecx
+; AVX512F-NEXT: movq %rcx, %rax
+; AVX512F-NEXT: shlq $32, %rax
+; AVX512F-NEXT: orq %rcx, %rax
+; AVX512F-NEXT: movq %rbp, %rsp
+; AVX512F-NEXT: popq %rbp
+; AVX512F-NEXT: retq
+;
+; VL_BW_DQ-LABEL: shuf64i1_zero:
+; VL_BW_DQ: # BB#0:
+; VL_BW_DQ-NEXT: kxorq %k0, %k0, %k0
+; VL_BW_DQ-NEXT: kmovq %k0, %rax
+; VL_BW_DQ-NEXT: retq
+ %b = bitcast i64 %a to <64 x i1>
+ %c = shufflevector < 64 x i1> zeroinitializer, <64 x i1> undef, <64 x i32> zeroinitializer
+ %d = bitcast <64 x i1> %c to i64
+ ret i64 %d
+}
diff --git a/test/CodeGen/X86/vector-trunc.ll b/test/CodeGen/X86/vector-trunc.ll
index 2480e676cad0..8c02c5a5433f 100644
--- a/test/CodeGen/X86/vector-trunc.ll
+++ b/test/CodeGen/X86/vector-trunc.ll
@@ -1,38 +1,590 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
-define <4 x i32> @trunc2x2i64(<2 x i64> %a, <2 x i64> %b) {
-; SSE2-LABEL: trunc2x2i64:
+define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) {
+; SSE2-LABEL: trunc8i64_8i32:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: trunc2x2i64:
+; SSSE3-LABEL: trunc8i64_8i32:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: trunc2x2i64:
+; SSE41-LABEL: trunc8i64_8i32:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
; SSE41-NEXT: retq
;
-; AVX-LABEL: trunc2x2i64:
-; AVX: # BB#0: # %entry
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; AVX-NEXT: retq
+; AVX1-LABEL: trunc8i64_8i32:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc8i64_8i32:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
+; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: trunc8i64_8i32:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %0 = trunc <8 x i64> %a to <8 x i32>
+ ret <8 x i32> %0
+}
+
+define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) {
+; SSE2-LABEL: trunc8i64_8i16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pextrw $4, %xmm1, %eax
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE2-NEXT: pextrw $4, %xmm0, %ecx
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: pextrw $4, %xmm3, %edx
+; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT: pextrw $4, %xmm2, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc8i64_8i16:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pextrw $4, %xmm1, %eax
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSSE3-NEXT: pextrw $4, %xmm0, %ecx
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: pextrw $4, %xmm3, %edx
+; SSSE3-NEXT: movd %edx, %xmm1
+; SSSE3-NEXT: movd %eax, %xmm3
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSSE3-NEXT: pextrw $4, %xmm2, %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc8i64_8i16:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pxor %xmm4, %xmm4
+; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
+; SSE41-NEXT: packusdw %xmm3, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
+; SSE41-NEXT: packusdw %xmm1, %xmm0
+; SSE41-NEXT: packusdw %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc8i64_8i16:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
+; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc8i64_8i16:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
+; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: trunc8i64_8i16:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %0 = trunc <8 x i64> %a to <8 x i16>
+ ret <8 x i16> %0
+}
+
+define void @trunc8i64_8i8(<8 x i64> %a) {
+; SSE-LABEL: trunc8i64_8i8:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; SSE-NEXT: pand %xmm4, %xmm3
+; SSE-NEXT: pand %xmm4, %xmm2
+; SSE-NEXT: packuswb %xmm3, %xmm2
+; SSE-NEXT: pand %xmm4, %xmm1
+; SSE-NEXT: pand %xmm4, %xmm0
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: packuswb %xmm2, %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: movq %xmm0, (%rax)
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: trunc8i64_8i8:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vmovq %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc8i64_8i8:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
+; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: trunc8i64_8i8:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmovqb %zmm0, (%rax)
+; AVX512BW-NEXT: retq
+entry:
+ %0 = trunc <8 x i64> %a to <8 x i8>
+ store <8 x i8> %0, <8 x i8>* undef, align 4
+ ret void
+}
+
+define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) {
+; SSE2-LABEL: trunc8i32_8i16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pslld $16, %xmm1
+; SSE2-NEXT: psrad $16, %xmm1
+; SSE2-NEXT: pslld $16, %xmm0
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: packssdw %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc8i32_8i16:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT: pshufb %xmm2, %xmm1
+; SSSE3-NEXT: pshufb %xmm2, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc8i32_8i16:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT: pshufb %xmm2, %xmm1
+; SSE41-NEXT: pshufb %xmm2, %xmm0
+; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc8i32_8i16:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc8i32_8i16:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: trunc8i32_8i16:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %0 = trunc <8 x i32> %a to <8 x i16>
+ ret <8 x i16> %0
+}
+define void @trunc8i32_8i8(<8 x i32> %a) {
+; SSE2-LABEL: trunc8i32_8i8:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: packuswb %xmm1, %xmm0
+; SSE2-NEXT: packuswb %xmm0, %xmm0
+; SSE2-NEXT: movq %xmm0, (%rax)
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc8i32_8i8:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSSE3-NEXT: pshufb %xmm2, %xmm1
+; SSSE3-NEXT: pshufb %xmm2, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: movq %xmm0, (%rax)
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc8i32_8i8:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSE41-NEXT: pshufb %xmm2, %xmm1
+; SSE41-NEXT: pshufb %xmm2, %xmm0
+; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE41-NEXT: movq %xmm0, (%rax)
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc8i32_8i8:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT: vmovq %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc8i32_8i8:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vmovq %xmm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: trunc8i32_8i8:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vmovq %xmm0, (%rax)
+; AVX512BW-NEXT: retq
+entry:
+ %0 = trunc <8 x i32> %a to <8 x i8>
+ store <8 x i8> %0, <8 x i8>* undef, align 4
+ ret void
+}
+define void @trunc16i32_16i8(<16 x i32> %a) {
+; SSE-LABEL: trunc16i32_16i8:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE-NEXT: pand %xmm4, %xmm3
+; SSE-NEXT: pand %xmm4, %xmm2
+; SSE-NEXT: packuswb %xmm3, %xmm2
+; SSE-NEXT: pand %xmm4, %xmm1
+; SSE-NEXT: pand %xmm4, %xmm0
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: packuswb %xmm2, %xmm0
+; SSE-NEXT: movdqu %xmm0, (%rax)
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: trunc16i32_16i8:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vandps %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vandps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vandps %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rax)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc16i32_16i8:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
+; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT: vmovdqu %xmm0, (%rax)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: trunc16i32_16i8:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmovdb %zmm0, (%rax)
+; AVX512BW-NEXT: retq
+entry:
+ %0 = trunc <16 x i32> %a to <16 x i8>
+ store <16 x i8> %0, <16 x i8>* undef, align 4
+ ret void
+}
+
+define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: trunc2x4i64_8i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc2x4i64_8i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x4i64_8i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc2x4i64_8i32:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc2x4i64_8i32:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
+; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: trunc2x4i64_8i32:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512BW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %0 = trunc <4 x i64> %a to <4 x i32>
+ %1 = trunc <4 x i64> %b to <4 x i32>
+ %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %2
+}
+
+define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: trunc2x4i64_8i16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pextrw $4, %xmm1, %eax
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE2-NEXT: pextrw $4, %xmm0, %ecx
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: pextrw $4, %xmm3, %edx
+; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT: pextrw $4, %xmm2, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movd %ecx, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc2x4i64_8i16:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pextrw $4, %xmm1, %eax
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSSE3-NEXT: pextrw $4, %xmm0, %ecx
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: pextrw $4, %xmm3, %edx
+; SSSE3-NEXT: movd %edx, %xmm1
+; SSSE3-NEXT: movd %eax, %xmm3
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSSE3-NEXT: pextrw $4, %xmm2, %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: movd %ecx, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x4i64_8i16:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pextrw $4, %xmm0, %eax
+; SSE41-NEXT: pinsrw $1, %eax, %xmm0
+; SSE41-NEXT: movd %xmm1, %eax
+; SSE41-NEXT: pinsrw $2, %eax, %xmm0
+; SSE41-NEXT: pextrw $4, %xmm1, %eax
+; SSE41-NEXT: pinsrw $3, %eax, %xmm0
+; SSE41-NEXT: movd %xmm2, %eax
+; SSE41-NEXT: pinsrw $4, %eax, %xmm0
+; SSE41-NEXT: pextrw $4, %xmm2, %eax
+; SSE41-NEXT: pinsrw $5, %eax, %xmm0
+; SSE41-NEXT: movd %xmm3, %eax
+; SSE41-NEXT: pinsrw $6, %eax, %xmm0
+; SSE41-NEXT: pextrw $4, %xmm3, %eax
+; SSE41-NEXT: pinsrw $7, %eax, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc2x4i64_8i16:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc2x4i64_8i16:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
+; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: trunc2x4i64_8i16:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512BW-NEXT: retq
+entry:
+ %0 = trunc <4 x i64> %a to <4 x i16>
+ %1 = trunc <4 x i64> %b to <4 x i16>
+ %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @trunc2x2i64_4i32(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: trunc2x2i64_4i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc2x2i64_4i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x2i64_4i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: trunc2x2i64_4i32:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: trunc2x2i64_4i32:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: trunc2x2i64_4i32:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX512BW-NEXT: retq
entry:
%0 = trunc <2 x i64> %a to <2 x i32>
%1 = trunc <2 x i64> %b to <2 x i32>
@@ -40,28 +592,32 @@ entry:
ret <4 x i32> %2
}
-define i64 @trunc2i64(<2 x i64> %inval) {
-; SSE-LABEL: trunc2i64:
+define i64 @trunc2i64_i64(<2 x i64> %inval) {
+; SSE-LABEL: trunc2i64_i64:
; SSE: # BB#0: # %entry
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE-NEXT: movd %xmm0, %rax
; SSE-NEXT: retq
;
-; AVX-LABEL: trunc2i64:
+; AVX-LABEL: trunc2i64_i64:
; AVX: # BB#0: # %entry
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
-
-
+;
+; AVX512BW-LABEL: trunc2i64_i64:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: retq
entry:
%0 = trunc <2 x i64> %inval to <2 x i32>
%1 = bitcast <2 x i32> %0 to i64
ret i64 %1
}
-define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) {
-; SSE2-LABEL: trunc2x4i32:
+define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: trunc2x4i32_8i16:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
@@ -72,7 +628,7 @@ define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) {
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: trunc2x4i32:
+; SSSE3-LABEL: trunc2x4i32_8i16:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; SSSE3-NEXT: pshufb %xmm2, %xmm1
@@ -80,7 +636,7 @@ define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) {
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: trunc2x4i32:
+; SSE41-LABEL: trunc2x4i32_8i16:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; SSE41-NEXT: pshufb %xmm2, %xmm1
@@ -88,17 +644,21 @@ define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) {
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE41-NEXT: retq
;
-; AVX-LABEL: trunc2x4i32:
+; AVX-LABEL: trunc2x4i32_8i16:
; AVX: # BB#0: # %entry
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
-
-
-
-
+;
+; AVX512BW-LABEL: trunc2x4i32_8i16:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512BW-NEXT: retq
entry:
%0 = trunc <4 x i32> %a to <4 x i16>
%1 = trunc <4 x i32> %b to <4 x i16>
@@ -107,8 +667,8 @@ entry:
}
; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
-define i64 @trunc4i32(<4 x i32> %inval) {
-; SSE2-LABEL: trunc4i32:
+define i64 @trunc4i32_i64(<4 x i32> %inval) {
+; SSE2-LABEL: trunc4i32_i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
@@ -116,35 +676,37 @@ define i64 @trunc4i32(<4 x i32> %inval) {
; SSE2-NEXT: movd %xmm0, %rax
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: trunc4i32:
+; SSSE3-LABEL: trunc4i32_i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; SSSE3-NEXT: movd %xmm0, %rax
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: trunc4i32:
+; SSE41-LABEL: trunc4i32_i64:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; SSE41-NEXT: movd %xmm0, %rax
; SSE41-NEXT: retq
;
-; AVX-LABEL: trunc4i32:
+; AVX-LABEL: trunc4i32_i64:
; AVX: # BB#0: # %entry
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
-
-
-
-
+;
+; AVX512BW-LABEL: trunc4i32_i64:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: retq
entry:
%0 = trunc <4 x i32> %inval to <4 x i16>
%1 = bitcast <4 x i16> %0 to i64
ret i64 %1
}
-define <16 x i8> @trunc2x8i16(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: trunc2x8i16:
+define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: trunc2x8i16_16i8:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
; SSE2-NEXT: pand %xmm2, %xmm1
@@ -152,7 +714,7 @@ define <16 x i8> @trunc2x8i16(<8 x i16> %a, <8 x i16> %b) {
; SSE2-NEXT: packuswb %xmm1, %xmm0
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: trunc2x8i16:
+; SSSE3-LABEL: trunc2x8i16_16i8:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; SSSE3-NEXT: pshufb %xmm2, %xmm1
@@ -160,7 +722,7 @@ define <16 x i8> @trunc2x8i16(<8 x i16> %a, <8 x i16> %b) {
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: trunc2x8i16:
+; SSE41-LABEL: trunc2x8i16_16i8:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; SSE41-NEXT: pshufb %xmm2, %xmm1
@@ -168,17 +730,21 @@ define <16 x i8> @trunc2x8i16(<8 x i16> %a, <8 x i16> %b) {
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE41-NEXT: retq
;
-; AVX-LABEL: trunc2x8i16:
+; AVX-LABEL: trunc2x8i16_16i8:
; AVX: # BB#0: # %entry
; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
-
-
-
-
+;
+; AVX512BW-LABEL: trunc2x8i16_16i8:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512BW-NEXT: retq
entry:
%0 = trunc <8 x i16> %a to <8 x i8>
%1 = trunc <8 x i16> %b to <8 x i8>
@@ -187,51 +753,58 @@ entry:
}
; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
-define i64 @trunc8i16(<8 x i16> %inval) {
-; SSE2-LABEL: trunc8i16:
+define i64 @trunc8i16_i64(<8 x i16> %inval) {
+; SSE2-LABEL: trunc8i16_i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
; SSE2-NEXT: movd %xmm0, %rax
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: trunc8i16:
+; SSSE3-LABEL: trunc8i16_i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; SSSE3-NEXT: movd %xmm0, %rax
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: trunc8i16:
+; SSE41-LABEL: trunc8i16_i64:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; SSE41-NEXT: movd %xmm0, %rax
; SSE41-NEXT: retq
;
-; AVX-LABEL: trunc8i16:
+; AVX-LABEL: trunc8i16_i64:
; AVX: # BB#0: # %entry
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: retq
-
-
-
-
+;
+; AVX512BW-LABEL: trunc8i16_i64:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vmovq %xmm0, %rax
+; AVX512BW-NEXT: retq
entry:
%0 = trunc <8 x i16> %inval to <8 x i8>
%1 = bitcast <8 x i8> %0 to i64
ret i64 %1
}
-define <16 x i8> @trunc16i64_const() {
-; SSE-LABEL: trunc16i64_const:
+define <16 x i8> @trunc16i64_16i8_const() {
+; SSE-LABEL: trunc16i64_16i8_const:
; SSE: # BB#0: # %entry
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: trunc16i64_const:
+; AVX-LABEL: trunc16i64_16i8_const:
; AVX: # BB#0: # %entry
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
+;
+; AVX512BW-LABEL: trunc16i64_16i8_const:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
entry:
%0 = trunc <16 x i64> zeroinitializer to <16 x i8>
diff --git a/test/CodeGen/X86/vector-tzcnt-128.ll b/test/CodeGen/X86/vector-tzcnt-128.ll
index 422fe052d38b..f1714d4845de 100644
--- a/test/CodeGen/X86/vector-tzcnt-128.ll
+++ b/test/CodeGen/X86/vector-tzcnt-128.ll
@@ -1,13 +1,12 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-target triple = "x86_64-unknown-unknown"
-
-define <2 x i64> @testv2i64(<2 x i64> %in) {
+define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; SSE2-LABEL: testv2i64:
; SSE2: # BB#0:
; SSE2-NEXT: movd %xmm0, %rax
@@ -87,7 +86,7 @@ define <2 x i64> @testv2i64(<2 x i64> %in) {
ret <2 x i64> %out
}
-define <2 x i64> @testv2i64u(<2 x i64> %in) {
+define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
; SSE2-LABEL: testv2i64u:
; SSE2: # BB#0:
; SSE2-NEXT: movd %xmm0, %rax
@@ -152,1521 +151,755 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) {
ret <2 x i64> %out
}
-define <4 x i32> @testv4i32(<4 x i32> %in) {
+define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
; SSE2-LABEL: testv4i32:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: bsfl %eax, %eax
-; SSE2-NEXT: movl $32, %ecx
-; SSE2-NEXT: cmovel %ecx, %eax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; SSE2-NEXT: movd %xmm2, %eax
-; SSE2-NEXT: bsfl %eax, %eax
-; SSE2-NEXT: cmovel %ecx, %eax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: bsfl %eax, %eax
-; SSE2-NEXT: cmovel %ecx, %eax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: bsfl %eax, %eax
-; SSE2-NEXT: cmovel %ecx, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: psubd %xmm0, %xmm2
+; SSE2-NEXT: pand %xmm0, %xmm2
+; SSE2-NEXT: psubd {{.*}}(%rip), %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: psrld $1, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: psubd %xmm0, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm0, %xmm3
+; SSE2-NEXT: psrld $2, %xmm2
+; SSE2-NEXT: pand %xmm0, %xmm2
+; SSE2-NEXT: paddd %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: psrld $4, %xmm0
+; SSE2-NEXT: paddd %xmm2, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: psadbw %xmm1, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: psadbw %xmm1, %xmm0
+; SSE2-NEXT: packuswb %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: testv4i32:
; SSE3: # BB#0:
-; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; SSE3-NEXT: movd %xmm1, %eax
-; SSE3-NEXT: bsfl %eax, %eax
-; SSE3-NEXT: movl $32, %ecx
-; SSE3-NEXT: cmovel %ecx, %eax
-; SSE3-NEXT: movd %eax, %xmm1
-; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; SSE3-NEXT: movd %xmm2, %eax
-; SSE3-NEXT: bsfl %eax, %eax
-; SSE3-NEXT: cmovel %ecx, %eax
-; SSE3-NEXT: movd %eax, %xmm2
-; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE3-NEXT: movd %xmm0, %eax
-; SSE3-NEXT: bsfl %eax, %eax
-; SSE3-NEXT: cmovel %ecx, %eax
-; SSE3-NEXT: movd %eax, %xmm1
-; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE3-NEXT: movd %xmm0, %eax
-; SSE3-NEXT: bsfl %eax, %eax
-; SSE3-NEXT: cmovel %ecx, %eax
-; SSE3-NEXT: movd %eax, %xmm0
-; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: pxor %xmm1, %xmm1
+; SSE3-NEXT: pxor %xmm2, %xmm2
+; SSE3-NEXT: psubd %xmm0, %xmm2
+; SSE3-NEXT: pand %xmm0, %xmm2
+; SSE3-NEXT: psubd {{.*}}(%rip), %xmm2
+; SSE3-NEXT: movdqa %xmm2, %xmm0
+; SSE3-NEXT: psrld $1, %xmm0
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT: psubd %xmm0, %xmm2
+; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459]
+; SSE3-NEXT: movdqa %xmm2, %xmm3
+; SSE3-NEXT: pand %xmm0, %xmm3
+; SSE3-NEXT: psrld $2, %xmm2
+; SSE3-NEXT: pand %xmm0, %xmm2
+; SSE3-NEXT: paddd %xmm3, %xmm2
+; SSE3-NEXT: movdqa %xmm2, %xmm0
+; SSE3-NEXT: psrld $4, %xmm0
+; SSE3-NEXT: paddd %xmm2, %xmm0
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm2
+; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE3-NEXT: psadbw %xmm1, %xmm2
+; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT: psadbw %xmm1, %xmm0
+; SSE3-NEXT: packuswb %xmm2, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: testv4i32:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; SSSE3-NEXT: movd %xmm1, %eax
-; SSSE3-NEXT: bsfl %eax, %eax
-; SSSE3-NEXT: movl $32, %ecx
-; SSSE3-NEXT: cmovel %ecx, %eax
-; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; SSSE3-NEXT: movd %xmm2, %eax
-; SSSE3-NEXT: bsfl %eax, %eax
-; SSSE3-NEXT: cmovel %ecx, %eax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSSE3-NEXT: movd %xmm0, %eax
-; SSSE3-NEXT: bsfl %eax, %eax
-; SSSE3-NEXT: cmovel %ecx, %eax
-; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSSE3-NEXT: movd %xmm0, %eax
-; SSSE3-NEXT: bsfl %eax, %eax
-; SSSE3-NEXT: cmovel %ecx, %eax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: psubd %xmm0, %xmm2
+; SSSE3-NEXT: pand %xmm0, %xmm2
+; SSSE3-NEXT: psubd {{.*}}(%rip), %xmm2
+; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT: movdqa %xmm2, %xmm4
+; SSSE3-NEXT: pand %xmm3, %xmm4
+; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT: movdqa %xmm0, %xmm5
+; SSSE3-NEXT: pshufb %xmm4, %xmm5
+; SSSE3-NEXT: psrlw $4, %xmm2
+; SSSE3-NEXT: pand %xmm3, %xmm2
+; SSSE3-NEXT: pshufb %xmm2, %xmm0
+; SSSE3-NEXT: paddb %xmm5, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm2
+; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: psadbw %xmm1, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: psadbw %xmm1, %xmm0
+; SSSE3-NEXT: packuswb %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: testv4i32:
; SSE41: # BB#0:
-; SSE41-NEXT: pextrd $1, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: movl $32, %ecx
-; SSE41-NEXT: cmovel %ecx, %eax
-; SSE41-NEXT: movd %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: movd %edx, %xmm1
-; SSE41-NEXT: pinsrd $1, %eax, %xmm1
-; SSE41-NEXT: pextrd $2, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: cmovel %ecx, %eax
-; SSE41-NEXT: pinsrd $2, %eax, %xmm1
-; SSE41-NEXT: pextrd $3, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: cmovel %ecx, %eax
-; SSE41-NEXT: pinsrd $3, %eax, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: psubd %xmm0, %xmm2
+; SSE41-NEXT: pand %xmm0, %xmm2
+; SSE41-NEXT: psubd {{.*}}(%rip), %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT: movdqa %xmm2, %xmm4
+; SSE41-NEXT: pand %xmm3, %xmm4
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT: movdqa %xmm0, %xmm5
+; SSE41-NEXT: pshufb %xmm4, %xmm5
+; SSE41-NEXT: psrlw $4, %xmm2
+; SSE41-NEXT: pand %xmm3, %xmm2
+; SSE41-NEXT: pshufb %xmm2, %xmm0
+; SSE41-NEXT: paddb %xmm5, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE41-NEXT: psadbw %xmm1, %xmm2
+; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE41-NEXT: psadbw %xmm1, %xmm0
+; SSE41-NEXT: packuswb %xmm2, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: testv4i32:
-; AVX: # BB#0:
-; AVX-NEXT: vpextrd $1, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: movl $32, %ecx
-; AVX-NEXT: cmovel %ecx, %eax
-; AVX-NEXT: vmovd %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vmovd %edx, %xmm1
-; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrd $2, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: cmovel %ecx, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrd $3, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: cmovel %ecx, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: testv4i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: testv4i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpsubd %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpshufb %xmm0, %xmm4, %xmm0
+; AVX2-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: retq
%out = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %in, i1 0)
ret <4 x i32> %out
}
-define <4 x i32> @testv4i32u(<4 x i32> %in) {
+define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
; SSE2-LABEL: testv4i32u:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: bsfl %eax, %eax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; SSE2-NEXT: movd %xmm2, %eax
-; SSE2-NEXT: bsfl %eax, %eax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: bsfl %eax, %eax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: bsfl %eax, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: psubd %xmm0, %xmm2
+; SSE2-NEXT: pand %xmm0, %xmm2
+; SSE2-NEXT: psubd {{.*}}(%rip), %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: psrld $1, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: psubd %xmm0, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459]
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm0, %xmm3
+; SSE2-NEXT: psrld $2, %xmm2
+; SSE2-NEXT: pand %xmm0, %xmm2
+; SSE2-NEXT: paddd %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: psrld $4, %xmm0
+; SSE2-NEXT: paddd %xmm2, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: psadbw %xmm1, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: psadbw %xmm1, %xmm0
+; SSE2-NEXT: packuswb %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: testv4i32u:
; SSE3: # BB#0:
-; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; SSE3-NEXT: movd %xmm1, %eax
-; SSE3-NEXT: bsfl %eax, %eax
-; SSE3-NEXT: movd %eax, %xmm1
-; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; SSE3-NEXT: movd %xmm2, %eax
-; SSE3-NEXT: bsfl %eax, %eax
-; SSE3-NEXT: movd %eax, %xmm2
-; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE3-NEXT: movd %xmm0, %eax
-; SSE3-NEXT: bsfl %eax, %eax
-; SSE3-NEXT: movd %eax, %xmm1
-; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE3-NEXT: movd %xmm0, %eax
-; SSE3-NEXT: bsfl %eax, %eax
-; SSE3-NEXT: movd %eax, %xmm0
-; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: pxor %xmm1, %xmm1
+; SSE3-NEXT: pxor %xmm2, %xmm2
+; SSE3-NEXT: psubd %xmm0, %xmm2
+; SSE3-NEXT: pand %xmm0, %xmm2
+; SSE3-NEXT: psubd {{.*}}(%rip), %xmm2
+; SSE3-NEXT: movdqa %xmm2, %xmm0
+; SSE3-NEXT: psrld $1, %xmm0
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT: psubd %xmm0, %xmm2
+; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459]
+; SSE3-NEXT: movdqa %xmm2, %xmm3
+; SSE3-NEXT: pand %xmm0, %xmm3
+; SSE3-NEXT: psrld $2, %xmm2
+; SSE3-NEXT: pand %xmm0, %xmm2
+; SSE3-NEXT: paddd %xmm3, %xmm2
+; SSE3-NEXT: movdqa %xmm2, %xmm0
+; SSE3-NEXT: psrld $4, %xmm0
+; SSE3-NEXT: paddd %xmm2, %xmm0
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT: movdqa %xmm0, %xmm2
+; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE3-NEXT: psadbw %xmm1, %xmm2
+; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT: psadbw %xmm1, %xmm0
+; SSE3-NEXT: packuswb %xmm2, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: testv4i32u:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; SSSE3-NEXT: movd %xmm1, %eax
-; SSSE3-NEXT: bsfl %eax, %eax
-; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; SSSE3-NEXT: movd %xmm2, %eax
-; SSSE3-NEXT: bsfl %eax, %eax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSSE3-NEXT: movd %xmm0, %eax
-; SSSE3-NEXT: bsfl %eax, %eax
-; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSSE3-NEXT: movd %xmm0, %eax
-; SSSE3-NEXT: bsfl %eax, %eax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: psubd %xmm0, %xmm2
+; SSSE3-NEXT: pand %xmm0, %xmm2
+; SSSE3-NEXT: psubd {{.*}}(%rip), %xmm2
+; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT: movdqa %xmm2, %xmm4
+; SSSE3-NEXT: pand %xmm3, %xmm4
+; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT: movdqa %xmm0, %xmm5
+; SSSE3-NEXT: pshufb %xmm4, %xmm5
+; SSSE3-NEXT: psrlw $4, %xmm2
+; SSSE3-NEXT: pand %xmm3, %xmm2
+; SSSE3-NEXT: pshufb %xmm2, %xmm0
+; SSSE3-NEXT: paddb %xmm5, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm2
+; SSSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: psadbw %xmm1, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: psadbw %xmm1, %xmm0
+; SSSE3-NEXT: packuswb %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: testv4i32u:
; SSE41: # BB#0:
-; SSE41-NEXT: pextrd $1, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: movd %xmm0, %ecx
-; SSE41-NEXT: bsfl %ecx, %ecx
-; SSE41-NEXT: movd %ecx, %xmm1
-; SSE41-NEXT: pinsrd $1, %eax, %xmm1
-; SSE41-NEXT: pextrd $2, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrd $2, %eax, %xmm1
-; SSE41-NEXT: pextrd $3, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrd $3, %eax, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: psubd %xmm0, %xmm2
+; SSE41-NEXT: pand %xmm0, %xmm2
+; SSE41-NEXT: psubd {{.*}}(%rip), %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT: movdqa %xmm2, %xmm4
+; SSE41-NEXT: pand %xmm3, %xmm4
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT: movdqa %xmm0, %xmm5
+; SSE41-NEXT: pshufb %xmm4, %xmm5
+; SSE41-NEXT: psrlw $4, %xmm2
+; SSE41-NEXT: pand %xmm3, %xmm2
+; SSE41-NEXT: pshufb %xmm2, %xmm0
+; SSE41-NEXT: paddb %xmm5, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE41-NEXT: psadbw %xmm1, %xmm2
+; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE41-NEXT: psadbw %xmm1, %xmm0
+; SSE41-NEXT: packuswb %xmm2, %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: testv4i32u:
-; AVX: # BB#0:
-; AVX-NEXT: vpextrd $1, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vmovd %xmm0, %ecx
-; AVX-NEXT: bsfl %ecx, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm1
-; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrd $2, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrd $3, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: testv4i32u:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: testv4i32u:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpsubd %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %xmm3, %xmm4, %xmm3
+; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpshufb %xmm0, %xmm4, %xmm0
+; AVX2-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: retq
%out = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %in, i1 -1)
ret <4 x i32> %out
}
-define <8 x i16> @testv8i16(<8 x i16> %in) {
+define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
; SSE2-LABEL: testv8i16:
; SSE2: # BB#0:
-; SSE2-NEXT: pextrw $7, %xmm0, %eax
-; SSE2-NEXT: bsfw %ax, %cx
-; SSE2-NEXT: movw $16, %ax
-; SSE2-NEXT: cmovew %ax, %cx
-; SSE2-NEXT: movd %ecx, %xmm1
-; SSE2-NEXT: pextrw $3, %xmm0, %ecx
-; SSE2-NEXT: bsfw %cx, %cx
-; SSE2-NEXT: cmovew %ax, %cx
-; SSE2-NEXT: movd %ecx, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT: pextrw $5, %xmm0, %ecx
-; SSE2-NEXT: bsfw %cx, %cx
-; SSE2-NEXT: cmovew %ax, %cx
-; SSE2-NEXT: movd %ecx, %xmm3
-; SSE2-NEXT: pextrw $1, %xmm0, %ecx
-; SSE2-NEXT: bsfw %cx, %cx
-; SSE2-NEXT: cmovew %ax, %cx
-; SSE2-NEXT: movd %ecx, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; SSE2-NEXT: pextrw $6, %xmm0, %ecx
-; SSE2-NEXT: bsfw %cx, %cx
-; SSE2-NEXT: cmovew %ax, %cx
-; SSE2-NEXT: movd %ecx, %xmm2
-; SSE2-NEXT: pextrw $2, %xmm0, %ecx
-; SSE2-NEXT: bsfw %cx, %cx
-; SSE2-NEXT: cmovew %ax, %cx
-; SSE2-NEXT: movd %ecx, %xmm3
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT: pextrw $4, %xmm0, %ecx
-; SSE2-NEXT: bsfw %cx, %cx
-; SSE2-NEXT: cmovew %ax, %cx
-; SSE2-NEXT: movd %ecx, %xmm2
-; SSE2-NEXT: movd %xmm0, %ecx
-; SSE2-NEXT: bsfw %cx, %cx
-; SSE2-NEXT: cmovew %ax, %cx
-; SSE2-NEXT: movd %ecx, %xmm0
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: psubw %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm0, %xmm1
+; SSE2-NEXT: psubw {{.*}}(%rip), %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: psrlw $1, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: psubw %xmm0, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107]
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm0, %xmm2
+; SSE2-NEXT: psrlw $2, %xmm1
+; SSE2-NEXT: pand %xmm0, %xmm1
+; SSE2-NEXT: paddw %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrlw $4, %xmm2
+; SSE2-NEXT: paddw %xmm1, %xmm2
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: psllw $8, %xmm0
+; SSE2-NEXT: paddb %xmm2, %xmm0
+; SSE2-NEXT: psrlw $8, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: testv8i16:
; SSE3: # BB#0:
-; SSE3-NEXT: pextrw $7, %xmm0, %eax
-; SSE3-NEXT: bsfw %ax, %cx
-; SSE3-NEXT: movw $16, %ax
-; SSE3-NEXT: cmovew %ax, %cx
-; SSE3-NEXT: movd %ecx, %xmm1
-; SSE3-NEXT: pextrw $3, %xmm0, %ecx
-; SSE3-NEXT: bsfw %cx, %cx
-; SSE3-NEXT: cmovew %ax, %cx
-; SSE3-NEXT: movd %ecx, %xmm2
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE3-NEXT: pextrw $5, %xmm0, %ecx
-; SSE3-NEXT: bsfw %cx, %cx
-; SSE3-NEXT: cmovew %ax, %cx
-; SSE3-NEXT: movd %ecx, %xmm3
-; SSE3-NEXT: pextrw $1, %xmm0, %ecx
-; SSE3-NEXT: bsfw %cx, %cx
-; SSE3-NEXT: cmovew %ax, %cx
-; SSE3-NEXT: movd %ecx, %xmm1
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; SSE3-NEXT: pextrw $6, %xmm0, %ecx
-; SSE3-NEXT: bsfw %cx, %cx
-; SSE3-NEXT: cmovew %ax, %cx
-; SSE3-NEXT: movd %ecx, %xmm2
-; SSE3-NEXT: pextrw $2, %xmm0, %ecx
-; SSE3-NEXT: bsfw %cx, %cx
-; SSE3-NEXT: cmovew %ax, %cx
-; SSE3-NEXT: movd %ecx, %xmm3
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE3-NEXT: pextrw $4, %xmm0, %ecx
-; SSE3-NEXT: bsfw %cx, %cx
-; SSE3-NEXT: cmovew %ax, %cx
-; SSE3-NEXT: movd %ecx, %xmm2
-; SSE3-NEXT: movd %xmm0, %ecx
-; SSE3-NEXT: bsfw %cx, %cx
-; SSE3-NEXT: cmovew %ax, %cx
-; SSE3-NEXT: movd %ecx, %xmm0
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE3-NEXT: pxor %xmm1, %xmm1
+; SSE3-NEXT: psubw %xmm0, %xmm1
+; SSE3-NEXT: pand %xmm0, %xmm1
+; SSE3-NEXT: psubw {{.*}}(%rip), %xmm1
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: psrlw $1, %xmm0
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT: psubw %xmm0, %xmm1
+; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107]
+; SSE3-NEXT: movdqa %xmm1, %xmm2
+; SSE3-NEXT: pand %xmm0, %xmm2
+; SSE3-NEXT: psrlw $2, %xmm1
+; SSE3-NEXT: pand %xmm0, %xmm1
+; SSE3-NEXT: paddw %xmm2, %xmm1
+; SSE3-NEXT: movdqa %xmm1, %xmm2
+; SSE3-NEXT: psrlw $4, %xmm2
+; SSE3-NEXT: paddw %xmm1, %xmm2
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm2
+; SSE3-NEXT: movdqa %xmm2, %xmm0
+; SSE3-NEXT: psllw $8, %xmm0
+; SSE3-NEXT: paddb %xmm2, %xmm0
+; SSE3-NEXT: psrlw $8, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: testv8i16:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pextrw $7, %xmm0, %eax
-; SSSE3-NEXT: bsfw %ax, %cx
-; SSSE3-NEXT: movw $16, %ax
-; SSSE3-NEXT: cmovew %ax, %cx
-; SSSE3-NEXT: movd %ecx, %xmm1
-; SSSE3-NEXT: pextrw $3, %xmm0, %ecx
-; SSSE3-NEXT: bsfw %cx, %cx
-; SSSE3-NEXT: cmovew %ax, %cx
-; SSSE3-NEXT: movd %ecx, %xmm2
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT: pextrw $5, %xmm0, %ecx
-; SSSE3-NEXT: bsfw %cx, %cx
-; SSSE3-NEXT: cmovew %ax, %cx
-; SSSE3-NEXT: movd %ecx, %xmm3
-; SSSE3-NEXT: pextrw $1, %xmm0, %ecx
-; SSSE3-NEXT: bsfw %cx, %cx
-; SSSE3-NEXT: cmovew %ax, %cx
-; SSSE3-NEXT: movd %ecx, %xmm1
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; SSSE3-NEXT: pextrw $6, %xmm0, %ecx
-; SSSE3-NEXT: bsfw %cx, %cx
-; SSSE3-NEXT: cmovew %ax, %cx
-; SSSE3-NEXT: movd %ecx, %xmm2
-; SSSE3-NEXT: pextrw $2, %xmm0, %ecx
-; SSSE3-NEXT: bsfw %cx, %cx
-; SSSE3-NEXT: cmovew %ax, %cx
-; SSSE3-NEXT: movd %ecx, %xmm3
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSSE3-NEXT: pextrw $4, %xmm0, %ecx
-; SSSE3-NEXT: bsfw %cx, %cx
-; SSSE3-NEXT: cmovew %ax, %cx
-; SSSE3-NEXT: movd %ecx, %xmm2
-; SSSE3-NEXT: movd %xmm0, %ecx
-; SSSE3-NEXT: bsfw %cx, %cx
-; SSSE3-NEXT: cmovew %ax, %cx
-; SSSE3-NEXT: movd %ecx, %xmm0
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: psubw %xmm0, %xmm1
+; SSSE3-NEXT: pand %xmm0, %xmm1
+; SSSE3-NEXT: psubw {{.*}}(%rip), %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT: movdqa %xmm1, %xmm2
+; SSSE3-NEXT: pand %xmm0, %xmm2
+; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT: movdqa %xmm3, %xmm4
+; SSSE3-NEXT: pshufb %xmm2, %xmm4
+; SSSE3-NEXT: psrlw $4, %xmm1
+; SSSE3-NEXT: pand %xmm0, %xmm1
+; SSSE3-NEXT: pshufb %xmm1, %xmm3
+; SSSE3-NEXT: paddb %xmm4, %xmm3
+; SSSE3-NEXT: movdqa %xmm3, %xmm0
+; SSSE3-NEXT: psllw $8, %xmm0
+; SSSE3-NEXT: paddb %xmm3, %xmm0
+; SSSE3-NEXT: psrlw $8, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: testv8i16:
; SSE41: # BB#0:
-; SSE41-NEXT: pextrw $1, %xmm0, %eax
-; SSE41-NEXT: bsfw %ax, %cx
-; SSE41-NEXT: movw $16, %ax
-; SSE41-NEXT: cmovew %ax, %cx
-; SSE41-NEXT: movd %xmm0, %edx
-; SSE41-NEXT: bsfw %dx, %dx
-; SSE41-NEXT: cmovew %ax, %dx
-; SSE41-NEXT: movd %edx, %xmm1
-; SSE41-NEXT: pinsrw $1, %ecx, %xmm1
-; SSE41-NEXT: pextrw $2, %xmm0, %ecx
-; SSE41-NEXT: bsfw %cx, %cx
-; SSE41-NEXT: cmovew %ax, %cx
-; SSE41-NEXT: pinsrw $2, %ecx, %xmm1
-; SSE41-NEXT: pextrw $3, %xmm0, %ecx
-; SSE41-NEXT: bsfw %cx, %cx
-; SSE41-NEXT: cmovew %ax, %cx
-; SSE41-NEXT: pinsrw $3, %ecx, %xmm1
-; SSE41-NEXT: pextrw $4, %xmm0, %ecx
-; SSE41-NEXT: bsfw %cx, %cx
-; SSE41-NEXT: cmovew %ax, %cx
-; SSE41-NEXT: pinsrw $4, %ecx, %xmm1
-; SSE41-NEXT: pextrw $5, %xmm0, %ecx
-; SSE41-NEXT: bsfw %cx, %cx
-; SSE41-NEXT: cmovew %ax, %cx
-; SSE41-NEXT: pinsrw $5, %ecx, %xmm1
-; SSE41-NEXT: pextrw $6, %xmm0, %ecx
-; SSE41-NEXT: bsfw %cx, %cx
-; SSE41-NEXT: cmovew %ax, %cx
-; SSE41-NEXT: pinsrw $6, %ecx, %xmm1
-; SSE41-NEXT: pextrw $7, %xmm0, %ecx
-; SSE41-NEXT: bsfw %cx, %cx
-; SSE41-NEXT: cmovew %ax, %cx
-; SSE41-NEXT: pinsrw $7, %ecx, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: psubw %xmm0, %xmm1
+; SSE41-NEXT: pand %xmm0, %xmm1
+; SSE41-NEXT: psubw {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT: movdqa %xmm1, %xmm2
+; SSE41-NEXT: pand %xmm0, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT: movdqa %xmm3, %xmm4
+; SSE41-NEXT: pshufb %xmm2, %xmm4
+; SSE41-NEXT: psrlw $4, %xmm1
+; SSE41-NEXT: pand %xmm0, %xmm1
+; SSE41-NEXT: pshufb %xmm1, %xmm3
+; SSE41-NEXT: paddb %xmm4, %xmm3
+; SSE41-NEXT: movdqa %xmm3, %xmm0
+; SSE41-NEXT: psllw $8, %xmm0
+; SSE41-NEXT: paddb %xmm3, %xmm0
+; SSE41-NEXT: psrlw $8, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: testv8i16:
; AVX: # BB#0:
-; AVX-NEXT: vpextrw $1, %xmm0, %eax
-; AVX-NEXT: bsfw %ax, %cx
-; AVX-NEXT: movw $16, %ax
-; AVX-NEXT: cmovew %ax, %cx
-; AVX-NEXT: vmovd %xmm0, %edx
-; AVX-NEXT: bsfw %dx, %dx
-; AVX-NEXT: cmovew %ax, %dx
-; AVX-NEXT: vmovd %edx, %xmm1
-; AVX-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
-; AVX-NEXT: vpextrw $2, %xmm0, %ecx
-; AVX-NEXT: bsfw %cx, %cx
-; AVX-NEXT: cmovew %ax, %cx
-; AVX-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
-; AVX-NEXT: vpextrw $3, %xmm0, %ecx
-; AVX-NEXT: bsfw %cx, %cx
-; AVX-NEXT: cmovew %ax, %cx
-; AVX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
-; AVX-NEXT: vpextrw $4, %xmm0, %ecx
-; AVX-NEXT: bsfw %cx, %cx
-; AVX-NEXT: cmovew %ax, %cx
-; AVX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
-; AVX-NEXT: vpextrw $5, %xmm0, %ecx
-; AVX-NEXT: bsfw %cx, %cx
-; AVX-NEXT: cmovew %ax, %cx
-; AVX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; AVX-NEXT: vpextrw $6, %xmm0, %ecx
-; AVX-NEXT: bsfw %cx, %cx
-; AVX-NEXT: cmovew %ax, %cx
-; AVX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; AVX-NEXT: vpextrw $7, %xmm0, %ecx
-; AVX-NEXT: bsfw %cx, %cx
-; AVX-NEXT: cmovew %ax, %cx
-; AVX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm0
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpsubw %xmm0, %xmm1, %xmm1
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsllw $8, %xmm0, %xmm1
+; AVX-NEXT: vpaddb %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX-NEXT: retq
%out = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %in, i1 0)
ret <8 x i16> %out
}
-define <8 x i16> @testv8i16u(<8 x i16> %in) {
+define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
; SSE2-LABEL: testv8i16u:
; SSE2: # BB#0:
-; SSE2-NEXT: pextrw $7, %xmm0, %eax
-; SSE2-NEXT: bsfw %ax, %ax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: pextrw $3, %xmm0, %eax
-; SSE2-NEXT: bsfw %ax, %ax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT: pextrw $5, %xmm0, %eax
-; SSE2-NEXT: bsfw %ax, %ax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: pextrw $1, %xmm0, %eax
-; SSE2-NEXT: bsfw %ax, %ax
-; SSE2-NEXT: movd %eax, %xmm3
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT: pextrw $6, %xmm0, %eax
-; SSE2-NEXT: bsfw %ax, %ax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: pextrw $2, %xmm0, %eax
-; SSE2-NEXT: bsfw %ax, %ax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT: pextrw $4, %xmm0, %eax
-; SSE2-NEXT: bsfw %ax, %ax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: bsfw %ax, %ax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: psubw %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm0, %xmm1
+; SSE2-NEXT: psubw {{.*}}(%rip), %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: psrlw $1, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: psubw %xmm0, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107]
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm0, %xmm2
+; SSE2-NEXT: psrlw $2, %xmm1
+; SSE2-NEXT: pand %xmm0, %xmm1
+; SSE2-NEXT: paddw %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrlw $4, %xmm2
+; SSE2-NEXT: paddw %xmm1, %xmm2
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: psllw $8, %xmm0
+; SSE2-NEXT: paddb %xmm2, %xmm0
+; SSE2-NEXT: psrlw $8, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: testv8i16u:
; SSE3: # BB#0:
-; SSE3-NEXT: pextrw $7, %xmm0, %eax
-; SSE3-NEXT: bsfw %ax, %ax
-; SSE3-NEXT: movd %eax, %xmm1
-; SSE3-NEXT: pextrw $3, %xmm0, %eax
-; SSE3-NEXT: bsfw %ax, %ax
-; SSE3-NEXT: movd %eax, %xmm2
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE3-NEXT: pextrw $5, %xmm0, %eax
-; SSE3-NEXT: bsfw %ax, %ax
-; SSE3-NEXT: movd %eax, %xmm1
-; SSE3-NEXT: pextrw $1, %xmm0, %eax
-; SSE3-NEXT: bsfw %ax, %ax
-; SSE3-NEXT: movd %eax, %xmm3
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE3-NEXT: pextrw $6, %xmm0, %eax
-; SSE3-NEXT: bsfw %ax, %ax
-; SSE3-NEXT: movd %eax, %xmm1
-; SSE3-NEXT: pextrw $2, %xmm0, %eax
-; SSE3-NEXT: bsfw %ax, %ax
-; SSE3-NEXT: movd %eax, %xmm2
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE3-NEXT: pextrw $4, %xmm0, %eax
-; SSE3-NEXT: bsfw %ax, %ax
-; SSE3-NEXT: movd %eax, %xmm1
-; SSE3-NEXT: movd %xmm0, %eax
-; SSE3-NEXT: bsfw %ax, %ax
-; SSE3-NEXT: movd %eax, %xmm0
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE3-NEXT: pxor %xmm1, %xmm1
+; SSE3-NEXT: psubw %xmm0, %xmm1
+; SSE3-NEXT: pand %xmm0, %xmm1
+; SSE3-NEXT: psubw {{.*}}(%rip), %xmm1
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: psrlw $1, %xmm0
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT: psubw %xmm0, %xmm1
+; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107]
+; SSE3-NEXT: movdqa %xmm1, %xmm2
+; SSE3-NEXT: pand %xmm0, %xmm2
+; SSE3-NEXT: psrlw $2, %xmm1
+; SSE3-NEXT: pand %xmm0, %xmm1
+; SSE3-NEXT: paddw %xmm2, %xmm1
+; SSE3-NEXT: movdqa %xmm1, %xmm2
+; SSE3-NEXT: psrlw $4, %xmm2
+; SSE3-NEXT: paddw %xmm1, %xmm2
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm2
+; SSE3-NEXT: movdqa %xmm2, %xmm0
+; SSE3-NEXT: psllw $8, %xmm0
+; SSE3-NEXT: paddb %xmm2, %xmm0
+; SSE3-NEXT: psrlw $8, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: testv8i16u:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pextrw $7, %xmm0, %eax
-; SSSE3-NEXT: bsfw %ax, %ax
-; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: pextrw $3, %xmm0, %eax
-; SSSE3-NEXT: bsfw %ax, %ax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT: pextrw $5, %xmm0, %eax
-; SSSE3-NEXT: bsfw %ax, %ax
-; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: pextrw $1, %xmm0, %eax
-; SSSE3-NEXT: bsfw %ax, %ax
-; SSSE3-NEXT: movd %eax, %xmm3
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSSE3-NEXT: pextrw $6, %xmm0, %eax
-; SSSE3-NEXT: bsfw %ax, %ax
-; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: pextrw $2, %xmm0, %eax
-; SSSE3-NEXT: bsfw %ax, %ax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT: pextrw $4, %xmm0, %eax
-; SSSE3-NEXT: bsfw %ax, %ax
-; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: movd %xmm0, %eax
-; SSSE3-NEXT: bsfw %ax, %ax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: psubw %xmm0, %xmm1
+; SSSE3-NEXT: pand %xmm0, %xmm1
+; SSSE3-NEXT: psubw {{.*}}(%rip), %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT: movdqa %xmm1, %xmm2
+; SSSE3-NEXT: pand %xmm0, %xmm2
+; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT: movdqa %xmm3, %xmm4
+; SSSE3-NEXT: pshufb %xmm2, %xmm4
+; SSSE3-NEXT: psrlw $4, %xmm1
+; SSSE3-NEXT: pand %xmm0, %xmm1
+; SSSE3-NEXT: pshufb %xmm1, %xmm3
+; SSSE3-NEXT: paddb %xmm4, %xmm3
+; SSSE3-NEXT: movdqa %xmm3, %xmm0
+; SSSE3-NEXT: psllw $8, %xmm0
+; SSSE3-NEXT: paddb %xmm3, %xmm0
+; SSSE3-NEXT: psrlw $8, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: testv8i16u:
; SSE41: # BB#0:
-; SSE41-NEXT: pextrw $1, %xmm0, %eax
-; SSE41-NEXT: bsfw %ax, %ax
-; SSE41-NEXT: movd %xmm0, %ecx
-; SSE41-NEXT: bsfw %cx, %cx
-; SSE41-NEXT: movd %ecx, %xmm1
-; SSE41-NEXT: pinsrw $1, %eax, %xmm1
-; SSE41-NEXT: pextrw $2, %xmm0, %eax
-; SSE41-NEXT: bsfw %ax, %ax
-; SSE41-NEXT: pinsrw $2, %eax, %xmm1
-; SSE41-NEXT: pextrw $3, %xmm0, %eax
-; SSE41-NEXT: bsfw %ax, %ax
-; SSE41-NEXT: pinsrw $3, %eax, %xmm1
-; SSE41-NEXT: pextrw $4, %xmm0, %eax
-; SSE41-NEXT: bsfw %ax, %ax
-; SSE41-NEXT: pinsrw $4, %eax, %xmm1
-; SSE41-NEXT: pextrw $5, %xmm0, %eax
-; SSE41-NEXT: bsfw %ax, %ax
-; SSE41-NEXT: pinsrw $5, %eax, %xmm1
-; SSE41-NEXT: pextrw $6, %xmm0, %eax
-; SSE41-NEXT: bsfw %ax, %ax
-; SSE41-NEXT: pinsrw $6, %eax, %xmm1
-; SSE41-NEXT: pextrw $7, %xmm0, %eax
-; SSE41-NEXT: bsfw %ax, %ax
-; SSE41-NEXT: pinsrw $7, %eax, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: retq
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: psubw %xmm0, %xmm1
+; SSE41-NEXT: pand %xmm0, %xmm1
+; SSE41-NEXT: psubw {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT: movdqa %xmm1, %xmm2
+; SSE41-NEXT: pand %xmm0, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT: movdqa %xmm3, %xmm4
+; SSE41-NEXT: pshufb %xmm2, %xmm4
+; SSE41-NEXT: psrlw $4, %xmm1
+; SSE41-NEXT: pand %xmm0, %xmm1
+; SSE41-NEXT: pshufb %xmm1, %xmm3
+; SSE41-NEXT: paddb %xmm4, %xmm3
+; SSE41-NEXT: movdqa %xmm3, %xmm0
+; SSE41-NEXT: psllw $8, %xmm0
+; SSE41-NEXT: paddb %xmm3, %xmm0
+; SSE41-NEXT: psrlw $8, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: testv8i16u:
; AVX: # BB#0:
-; AVX-NEXT: vpextrw $1, %xmm0, %eax
-; AVX-NEXT: bsfw %ax, %ax
-; AVX-NEXT: vmovd %xmm0, %ecx
-; AVX-NEXT: bsfw %cx, %cx
-; AVX-NEXT: vmovd %ecx, %xmm1
-; AVX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrw $2, %xmm0, %eax
-; AVX-NEXT: bsfw %ax, %ax
-; AVX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrw $3, %xmm0, %eax
-; AVX-NEXT: bsfw %ax, %ax
-; AVX-NEXT: vpinsrw $3, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrw $4, %xmm0, %eax
-; AVX-NEXT: bsfw %ax, %ax
-; AVX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrw $5, %xmm0, %eax
-; AVX-NEXT: bsfw %ax, %ax
-; AVX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrw $6, %xmm0, %eax
-; AVX-NEXT: bsfw %ax, %ax
-; AVX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrw $7, %xmm0, %eax
-; AVX-NEXT: bsfw %ax, %ax
-; AVX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpsubw %xmm0, %xmm1, %xmm1
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsllw $8, %xmm0, %xmm1
+; AVX-NEXT: vpaddb %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
; AVX-NEXT: retq
%out = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %in, i1 -1)
ret <8 x i16> %out
}
-define <16 x i8> @testv16i8(<16 x i8> %in) {
+define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
; SSE2-LABEL: testv16i8:
; SSE2: # BB#0:
-; SSE2: pushq %rbp
-; SSE2: pushq %r14
-; SSE2: pushq %rbx
-; SSE2: movaps %xmm0, -16(%rsp)
-; SSE2-NEXT: movzbl -1(%rsp), %eax
-; SSE2-NEXT: bsfl %eax, %edx
-; SSE2-NEXT: movl $32, %eax
-; SSE2-NEXT: cmovel %eax, %edx
-; SSE2-NEXT: cmpl $32, %edx
-; SSE2-NEXT: movl $8, %ecx
-; SSE2-NEXT: cmovel %ecx, %edx
-; SSE2-NEXT: movd %edx, %xmm0
-; SSE2-NEXT: movzbl -2(%rsp), %r14d
-; SSE2-NEXT: movzbl -3(%rsp), %ebx
-; SSE2-NEXT: movzbl -4(%rsp), %r9d
-; SSE2-NEXT: movzbl -5(%rsp), %edi
-; SSE2-NEXT: movzbl -6(%rsp), %r11d
-; SSE2-NEXT: movzbl -7(%rsp), %edx
-; SSE2-NEXT: movzbl -8(%rsp), %r8d
-; SSE2-NEXT: movzbl -9(%rsp), %esi
-; SSE2-NEXT: bsfl %esi, %esi
-; SSE2-NEXT: cmovel %eax, %esi
-; SSE2-NEXT: cmpl $32, %esi
-; SSE2-NEXT: cmovel %ecx, %esi
-; SSE2-NEXT: movd %esi, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: bsfl %edi, %esi
-; SSE2-NEXT: cmovel %eax, %esi
-; SSE2-NEXT: cmpl $32, %esi
-; SSE2-NEXT: cmovel %ecx, %esi
-; SSE2-NEXT: movd %esi, %xmm2
-; SSE2-NEXT: movzbl -10(%rsp), %edi
-; SSE2-NEXT: movzbl -11(%rsp), %esi
-; SSE2-NEXT: movzbl -12(%rsp), %r10d
-; SSE2-NEXT: movzbl -13(%rsp), %ebp
-; SSE2-NEXT: bsfl %ebp, %ebp
-; SSE2-NEXT: cmovel %eax, %ebp
-; SSE2-NEXT: cmpl $32, %ebp
-; SSE2-NEXT: cmovel %ecx, %ebp
-; SSE2-NEXT: movd %ebp, %xmm0
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: bsfl %ebx, %ebx
-; SSE2-NEXT: cmovel %eax, %ebx
-; SSE2-NEXT: cmpl $32, %ebx
-; SSE2-NEXT: cmovel %ecx, %ebx
-; SSE2-NEXT: movd %ebx, %xmm1
-; SSE2-NEXT: bsfl %esi, %esi
-; SSE2-NEXT: cmovel %eax, %esi
-; SSE2-NEXT: cmpl $32, %esi
-; SSE2-NEXT: cmovel %ecx, %esi
-; SSE2-NEXT: movd %esi, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT: bsfl %edx, %edx
-; SSE2-NEXT: cmovel %eax, %edx
-; SSE2-NEXT: cmpl $32, %edx
-; SSE2-NEXT: cmovel %ecx, %edx
-; SSE2-NEXT: movd %edx, %xmm3
-; SSE2-NEXT: movzbl -14(%rsp), %edx
-; SSE2-NEXT: movzbl -15(%rsp), %esi
-; SSE2-NEXT: bsfl %esi, %esi
-; SSE2-NEXT: cmovel %eax, %esi
-; SSE2-NEXT: cmpl $32, %esi
-; SSE2-NEXT: cmovel %ecx, %esi
-; SSE2-NEXT: movd %esi, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: bsfl %r14d, %esi
-; SSE2-NEXT: cmovel %eax, %esi
-; SSE2-NEXT: cmpl $32, %esi
-; SSE2-NEXT: cmovel %ecx, %esi
-; SSE2-NEXT: movd %esi, %xmm0
-; SSE2-NEXT: bsfl %edi, %esi
-; SSE2-NEXT: cmovel %eax, %esi
-; SSE2-NEXT: cmpl $32, %esi
-; SSE2-NEXT: cmovel %ecx, %esi
-; SSE2-NEXT: movd %esi, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT: bsfl %r11d, %esi
-; SSE2-NEXT: cmovel %eax, %esi
-; SSE2-NEXT: cmpl $32, %esi
-; SSE2-NEXT: cmovel %ecx, %esi
-; SSE2-NEXT: movd %esi, %xmm0
-; SSE2-NEXT: bsfl %edx, %edx
-; SSE2-NEXT: cmovel %eax, %edx
-; SSE2-NEXT: cmpl $32, %edx
-; SSE2-NEXT: cmovel %ecx, %edx
-; SSE2-NEXT: movd %edx, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSE2-NEXT: bsfl %r9d, %edx
-; SSE2-NEXT: cmovel %eax, %edx
-; SSE2-NEXT: cmpl $32, %edx
-; SSE2-NEXT: cmovel %ecx, %edx
-; SSE2-NEXT: movd %edx, %xmm0
-; SSE2-NEXT: bsfl %r10d, %edx
-; SSE2-NEXT: cmovel %eax, %edx
-; SSE2-NEXT: cmpl $32, %edx
-; SSE2-NEXT: cmovel %ecx, %edx
-; SSE2-NEXT: movd %edx, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT: bsfl %r8d, %edx
-; SSE2-NEXT: cmovel %eax, %edx
-; SSE2-NEXT: cmpl $32, %edx
-; SSE2-NEXT: cmovel %ecx, %edx
-; SSE2-NEXT: movd %edx, %xmm4
-; SSE2-NEXT: movzbl -16(%rsp), %edx
-; SSE2-NEXT: bsfl %edx, %edx
-; SSE2-NEXT: cmovel %eax, %edx
-; SSE2-NEXT: cmpl $32, %edx
-; SSE2-NEXT: cmovel %ecx, %edx
-; SSE2-NEXT: movd %edx, %xmm0
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: popq %rbx
-; SSE2-NEXT: popq %r14
-; SSE2-NEXT: popq %rbp
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: psubb %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm0, %xmm1
+; SSE2-NEXT: psubb {{.*}}(%rip), %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: psrlw $1, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: psubb %xmm0, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm0, %xmm2
+; SSE2-NEXT: psrlw $2, %xmm1
+; SSE2-NEXT: pand %xmm0, %xmm1
+; SSE2-NEXT: paddb %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: psrlw $4, %xmm0
+; SSE2-NEXT: paddb %xmm1, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: testv16i8:
; SSE3: # BB#0:
-; SSE3: pushq %rbp
-; SSE3: pushq %r14
-; SSE3: pushq %rbx
-; SSE3: movaps %xmm0, -16(%rsp)
-; SSE3-NEXT: movzbl -1(%rsp), %eax
-; SSE3-NEXT: bsfl %eax, %edx
-; SSE3-NEXT: movl $32, %eax
-; SSE3-NEXT: cmovel %eax, %edx
-; SSE3-NEXT: cmpl $32, %edx
-; SSE3-NEXT: movl $8, %ecx
-; SSE3-NEXT: cmovel %ecx, %edx
-; SSE3-NEXT: movd %edx, %xmm0
-; SSE3-NEXT: movzbl -2(%rsp), %r14d
-; SSE3-NEXT: movzbl -3(%rsp), %ebx
-; SSE3-NEXT: movzbl -4(%rsp), %r9d
-; SSE3-NEXT: movzbl -5(%rsp), %edi
-; SSE3-NEXT: movzbl -6(%rsp), %r11d
-; SSE3-NEXT: movzbl -7(%rsp), %edx
-; SSE3-NEXT: movzbl -8(%rsp), %r8d
-; SSE3-NEXT: movzbl -9(%rsp), %esi
-; SSE3-NEXT: bsfl %esi, %esi
-; SSE3-NEXT: cmovel %eax, %esi
-; SSE3-NEXT: cmpl $32, %esi
-; SSE3-NEXT: cmovel %ecx, %esi
-; SSE3-NEXT: movd %esi, %xmm1
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE3-NEXT: bsfl %edi, %esi
-; SSE3-NEXT: cmovel %eax, %esi
-; SSE3-NEXT: cmpl $32, %esi
-; SSE3-NEXT: cmovel %ecx, %esi
-; SSE3-NEXT: movd %esi, %xmm2
-; SSE3-NEXT: movzbl -10(%rsp), %edi
-; SSE3-NEXT: movzbl -11(%rsp), %esi
-; SSE3-NEXT: movzbl -12(%rsp), %r10d
-; SSE3-NEXT: movzbl -13(%rsp), %ebp
-; SSE3-NEXT: bsfl %ebp, %ebp
-; SSE3-NEXT: cmovel %eax, %ebp
-; SSE3-NEXT: cmpl $32, %ebp
-; SSE3-NEXT: cmovel %ecx, %ebp
-; SSE3-NEXT: movd %ebp, %xmm0
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE3-NEXT: bsfl %ebx, %ebx
-; SSE3-NEXT: cmovel %eax, %ebx
-; SSE3-NEXT: cmpl $32, %ebx
-; SSE3-NEXT: cmovel %ecx, %ebx
-; SSE3-NEXT: movd %ebx, %xmm1
-; SSE3-NEXT: bsfl %esi, %esi
-; SSE3-NEXT: cmovel %eax, %esi
-; SSE3-NEXT: cmpl $32, %esi
-; SSE3-NEXT: cmovel %ecx, %esi
-; SSE3-NEXT: movd %esi, %xmm2
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE3-NEXT: bsfl %edx, %edx
-; SSE3-NEXT: cmovel %eax, %edx
-; SSE3-NEXT: cmpl $32, %edx
-; SSE3-NEXT: cmovel %ecx, %edx
-; SSE3-NEXT: movd %edx, %xmm3
-; SSE3-NEXT: movzbl -14(%rsp), %edx
-; SSE3-NEXT: movzbl -15(%rsp), %esi
-; SSE3-NEXT: bsfl %esi, %esi
-; SSE3-NEXT: cmovel %eax, %esi
-; SSE3-NEXT: cmpl $32, %esi
-; SSE3-NEXT: cmovel %ecx, %esi
-; SSE3-NEXT: movd %esi, %xmm1
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE3-NEXT: bsfl %r14d, %esi
-; SSE3-NEXT: cmovel %eax, %esi
-; SSE3-NEXT: cmpl $32, %esi
-; SSE3-NEXT: cmovel %ecx, %esi
-; SSE3-NEXT: movd %esi, %xmm0
-; SSE3-NEXT: bsfl %edi, %esi
-; SSE3-NEXT: cmovel %eax, %esi
-; SSE3-NEXT: cmpl $32, %esi
-; SSE3-NEXT: cmovel %ecx, %esi
-; SSE3-NEXT: movd %esi, %xmm3
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE3-NEXT: bsfl %r11d, %esi
-; SSE3-NEXT: cmovel %eax, %esi
-; SSE3-NEXT: cmpl $32, %esi
-; SSE3-NEXT: cmovel %ecx, %esi
-; SSE3-NEXT: movd %esi, %xmm0
-; SSE3-NEXT: bsfl %edx, %edx
-; SSE3-NEXT: cmovel %eax, %edx
-; SSE3-NEXT: cmpl $32, %edx
-; SSE3-NEXT: cmovel %ecx, %edx
-; SSE3-NEXT: movd %edx, %xmm2
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSE3-NEXT: bsfl %r9d, %edx
-; SSE3-NEXT: cmovel %eax, %edx
-; SSE3-NEXT: cmpl $32, %edx
-; SSE3-NEXT: cmovel %ecx, %edx
-; SSE3-NEXT: movd %edx, %xmm0
-; SSE3-NEXT: bsfl %r10d, %edx
-; SSE3-NEXT: cmovel %eax, %edx
-; SSE3-NEXT: cmpl $32, %edx
-; SSE3-NEXT: cmovel %ecx, %edx
-; SSE3-NEXT: movd %edx, %xmm3
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE3-NEXT: bsfl %r8d, %edx
-; SSE3-NEXT: cmovel %eax, %edx
-; SSE3-NEXT: cmpl $32, %edx
-; SSE3-NEXT: cmovel %ecx, %edx
-; SSE3-NEXT: movd %edx, %xmm4
-; SSE3-NEXT: movzbl -16(%rsp), %edx
-; SSE3-NEXT: bsfl %edx, %edx
-; SSE3-NEXT: cmovel %eax, %edx
-; SSE3-NEXT: cmpl $32, %edx
-; SSE3-NEXT: cmovel %ecx, %edx
-; SSE3-NEXT: movd %edx, %xmm0
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE3-NEXT: popq %rbx
-; SSE3-NEXT: popq %r14
-; SSE3-NEXT: popq %rbp
+; SSE3-NEXT: pxor %xmm1, %xmm1
+; SSE3-NEXT: psubb %xmm0, %xmm1
+; SSE3-NEXT: pand %xmm0, %xmm1
+; SSE3-NEXT: psubb {{.*}}(%rip), %xmm1
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: psrlw $1, %xmm0
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT: psubb %xmm0, %xmm1
+; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE3-NEXT: movdqa %xmm1, %xmm2
+; SSE3-NEXT: pand %xmm0, %xmm2
+; SSE3-NEXT: psrlw $2, %xmm1
+; SSE3-NEXT: pand %xmm0, %xmm1
+; SSE3-NEXT: paddb %xmm2, %xmm1
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: psrlw $4, %xmm0
+; SSE3-NEXT: paddb %xmm1, %xmm0
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: testv16i8:
; SSSE3: # BB#0:
-; SSSE3: pushq %rbp
-; SSSE3: pushq %r14
-; SSSE3: pushq %rbx
-; SSSE3: movaps %xmm0, -16(%rsp)
-; SSSE3-NEXT: movzbl -1(%rsp), %eax
-; SSSE3-NEXT: bsfl %eax, %edx
-; SSSE3-NEXT: movl $32, %eax
-; SSSE3-NEXT: cmovel %eax, %edx
-; SSSE3-NEXT: cmpl $32, %edx
-; SSSE3-NEXT: movl $8, %ecx
-; SSSE3-NEXT: cmovel %ecx, %edx
-; SSSE3-NEXT: movd %edx, %xmm0
-; SSSE3-NEXT: movzbl -2(%rsp), %r14d
-; SSSE3-NEXT: movzbl -3(%rsp), %ebx
-; SSSE3-NEXT: movzbl -4(%rsp), %r9d
-; SSSE3-NEXT: movzbl -5(%rsp), %edi
-; SSSE3-NEXT: movzbl -6(%rsp), %r11d
-; SSSE3-NEXT: movzbl -7(%rsp), %edx
-; SSSE3-NEXT: movzbl -8(%rsp), %r8d
-; SSSE3-NEXT: movzbl -9(%rsp), %esi
-; SSSE3-NEXT: bsfl %esi, %esi
-; SSSE3-NEXT: cmovel %eax, %esi
-; SSSE3-NEXT: cmpl $32, %esi
-; SSSE3-NEXT: cmovel %ecx, %esi
-; SSSE3-NEXT: movd %esi, %xmm1
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: bsfl %edi, %esi
-; SSSE3-NEXT: cmovel %eax, %esi
-; SSSE3-NEXT: cmpl $32, %esi
-; SSSE3-NEXT: cmovel %ecx, %esi
-; SSSE3-NEXT: movd %esi, %xmm2
-; SSSE3-NEXT: movzbl -10(%rsp), %edi
-; SSSE3-NEXT: movzbl -11(%rsp), %esi
-; SSSE3-NEXT: movzbl -12(%rsp), %r10d
-; SSSE3-NEXT: movzbl -13(%rsp), %ebp
-; SSSE3-NEXT: bsfl %ebp, %ebp
-; SSSE3-NEXT: cmovel %eax, %ebp
-; SSSE3-NEXT: cmpl $32, %ebp
-; SSSE3-NEXT: cmovel %ecx, %ebp
-; SSSE3-NEXT: movd %ebp, %xmm0
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSSE3-NEXT: bsfl %ebx, %ebx
-; SSSE3-NEXT: cmovel %eax, %ebx
-; SSSE3-NEXT: cmpl $32, %ebx
-; SSSE3-NEXT: cmovel %ecx, %ebx
-; SSSE3-NEXT: movd %ebx, %xmm1
-; SSSE3-NEXT: bsfl %esi, %esi
-; SSSE3-NEXT: cmovel %eax, %esi
-; SSSE3-NEXT: cmpl $32, %esi
-; SSSE3-NEXT: cmovel %ecx, %esi
-; SSSE3-NEXT: movd %esi, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT: bsfl %edx, %edx
-; SSSE3-NEXT: cmovel %eax, %edx
-; SSSE3-NEXT: cmpl $32, %edx
-; SSSE3-NEXT: cmovel %ecx, %edx
-; SSSE3-NEXT: movd %edx, %xmm3
-; SSSE3-NEXT: movzbl -14(%rsp), %edx
-; SSSE3-NEXT: movzbl -15(%rsp), %esi
-; SSSE3-NEXT: bsfl %esi, %esi
-; SSSE3-NEXT: cmovel %eax, %esi
-; SSSE3-NEXT: cmpl $32, %esi
-; SSSE3-NEXT: cmovel %ecx, %esi
-; SSSE3-NEXT: movd %esi, %xmm1
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: bsfl %r14d, %esi
-; SSSE3-NEXT: cmovel %eax, %esi
-; SSSE3-NEXT: cmpl $32, %esi
-; SSSE3-NEXT: cmovel %ecx, %esi
-; SSSE3-NEXT: movd %esi, %xmm0
-; SSSE3-NEXT: bsfl %edi, %esi
-; SSSE3-NEXT: cmovel %eax, %esi
-; SSSE3-NEXT: cmpl $32, %esi
-; SSSE3-NEXT: cmovel %ecx, %esi
-; SSSE3-NEXT: movd %esi, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSSE3-NEXT: bsfl %r11d, %esi
-; SSSE3-NEXT: cmovel %eax, %esi
-; SSSE3-NEXT: cmpl $32, %esi
-; SSSE3-NEXT: cmovel %ecx, %esi
-; SSSE3-NEXT: movd %esi, %xmm0
-; SSSE3-NEXT: bsfl %edx, %edx
-; SSSE3-NEXT: cmovel %eax, %edx
-; SSSE3-NEXT: cmpl $32, %edx
-; SSSE3-NEXT: cmovel %ecx, %edx
-; SSSE3-NEXT: movd %edx, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSSE3-NEXT: bsfl %r9d, %edx
-; SSSE3-NEXT: cmovel %eax, %edx
-; SSSE3-NEXT: cmpl $32, %edx
-; SSSE3-NEXT: cmovel %ecx, %edx
-; SSSE3-NEXT: movd %edx, %xmm0
-; SSSE3-NEXT: bsfl %r10d, %edx
-; SSSE3-NEXT: cmovel %eax, %edx
-; SSSE3-NEXT: cmpl $32, %edx
-; SSSE3-NEXT: cmovel %ecx, %edx
-; SSSE3-NEXT: movd %edx, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSSE3-NEXT: bsfl %r8d, %edx
-; SSSE3-NEXT: cmovel %eax, %edx
-; SSSE3-NEXT: cmpl $32, %edx
-; SSSE3-NEXT: cmovel %ecx, %edx
-; SSSE3-NEXT: movd %edx, %xmm4
-; SSSE3-NEXT: movzbl -16(%rsp), %edx
-; SSSE3-NEXT: bsfl %edx, %edx
-; SSSE3-NEXT: cmovel %eax, %edx
-; SSSE3-NEXT: cmpl $32, %edx
-; SSSE3-NEXT: cmovel %ecx, %edx
-; SSSE3-NEXT: movd %edx, %xmm0
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSSE3-NEXT: popq %rbx
-; SSSE3-NEXT: popq %r14
-; SSSE3-NEXT: popq %rbp
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: psubb %xmm0, %xmm1
+; SSSE3-NEXT: pand %xmm0, %xmm1
+; SSSE3-NEXT: psubb {{.*}}(%rip), %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT: movdqa %xmm1, %xmm3
+; SSSE3-NEXT: pand %xmm2, %xmm3
+; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT: movdqa %xmm0, %xmm4
+; SSSE3-NEXT: pshufb %xmm3, %xmm4
+; SSSE3-NEXT: psrlw $4, %xmm1
+; SSSE3-NEXT: pand %xmm2, %xmm1
+; SSSE3-NEXT: pshufb %xmm1, %xmm0
+; SSSE3-NEXT: paddb %xmm4, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: testv16i8:
; SSE41: # BB#0:
-; SSE41-NEXT: pextrb $1, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %edx
-; SSE41-NEXT: movl $32, %eax
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: movl $8, %ecx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pextrb $0, %xmm0, %esi
-; SSE41-NEXT: bsfl %esi, %esi
-; SSE41-NEXT: cmovel %eax, %esi
-; SSE41-NEXT: cmpl $32, %esi
-; SSE41-NEXT: cmovel %ecx, %esi
-; SSE41-NEXT: movd %esi, %xmm1
-; SSE41-NEXT: pinsrb $1, %edx, %xmm1
-; SSE41-NEXT: pextrb $2, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $2, %edx, %xmm1
-; SSE41-NEXT: pextrb $3, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $3, %edx, %xmm1
-; SSE41-NEXT: pextrb $4, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $4, %edx, %xmm1
-; SSE41-NEXT: pextrb $5, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $5, %edx, %xmm1
-; SSE41-NEXT: pextrb $6, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $6, %edx, %xmm1
-; SSE41-NEXT: pextrb $7, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $7, %edx, %xmm1
-; SSE41-NEXT: pextrb $8, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $8, %edx, %xmm1
-; SSE41-NEXT: pextrb $9, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $9, %edx, %xmm1
-; SSE41-NEXT: pextrb $10, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $10, %edx, %xmm1
-; SSE41-NEXT: pextrb $11, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $11, %edx, %xmm1
-; SSE41-NEXT: pextrb $12, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $12, %edx, %xmm1
-; SSE41-NEXT: pextrb $13, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $13, %edx, %xmm1
-; SSE41-NEXT: pextrb $14, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $14, %edx, %xmm1
-; SSE41-NEXT: pextrb $15, %xmm0, %edx
-; SSE41-NEXT: bsfl %edx, %edx
-; SSE41-NEXT: cmovel %eax, %edx
-; SSE41-NEXT: cmpl $32, %edx
-; SSE41-NEXT: cmovel %ecx, %edx
-; SSE41-NEXT: pinsrb $15, %edx, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: psubb %xmm0, %xmm1
+; SSE41-NEXT: pand %xmm0, %xmm1
+; SSE41-NEXT: psubb {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pand %xmm2, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: pshufb %xmm3, %xmm4
+; SSE41-NEXT: psrlw $4, %xmm1
+; SSE41-NEXT: pand %xmm2, %xmm1
+; SSE41-NEXT: pshufb %xmm1, %xmm0
+; SSE41-NEXT: paddb %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: testv16i8:
; AVX: # BB#0:
-; AVX-NEXT: vpextrb $1, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %edx
-; AVX-NEXT: movl $32, %eax
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: movl $8, %ecx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpextrb $0, %xmm0, %esi
-; AVX-NEXT: bsfl %esi, %esi
-; AVX-NEXT: cmovel %eax, %esi
-; AVX-NEXT: cmpl $32, %esi
-; AVX-NEXT: cmovel %ecx, %esi
-; AVX-NEXT: vmovd %esi, %xmm1
-; AVX-NEXT: vpinsrb $1, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $2, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $3, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $3, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $4, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $5, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $5, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $6, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $7, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $7, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $8, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $9, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $9, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $10, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $10, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $11, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $11, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $12, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $12, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $13, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $14, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $15, %xmm0, %edx
-; AVX-NEXT: bsfl %edx, %edx
-; AVX-NEXT: cmovel %eax, %edx
-; AVX-NEXT: cmpl $32, %edx
-; AVX-NEXT: cmovel %ecx, %edx
-; AVX-NEXT: vpinsrb $15, %edx, %xmm1, %xmm0
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpsubb %xmm0, %xmm1, %xmm1
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
%out = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %in, i1 0)
ret <16 x i8> %out
}
-define <16 x i8> @testv16i8u(<16 x i8> %in) {
+define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
; SSE2-LABEL: testv16i8u:
; SSE2: # BB#0:
-; SSE2: pushq %rbx
-; SSE2: movaps %xmm0, -16(%rsp)
-; SSE2-NEXT: movzbl -1(%rsp), %eax
-; SSE2-NEXT: bsfl %eax, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movzbl -2(%rsp), %r11d
-; SSE2-NEXT: movzbl -3(%rsp), %eax
-; SSE2-NEXT: movzbl -4(%rsp), %r9d
-; SSE2-NEXT: movzbl -5(%rsp), %edi
-; SSE2-NEXT: movzbl -6(%rsp), %r10d
-; SSE2-NEXT: movzbl -7(%rsp), %ecx
-; SSE2-NEXT: movzbl -8(%rsp), %r8d
-; SSE2-NEXT: movzbl -9(%rsp), %edx
-; SSE2-NEXT: bsfl %edx, %edx
-; SSE2-NEXT: movd %edx, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: bsfl %edi, %edx
-; SSE2-NEXT: movd %edx, %xmm0
-; SSE2-NEXT: movzbl -10(%rsp), %edx
-; SSE2-NEXT: movzbl -11(%rsp), %esi
-; SSE2-NEXT: movzbl -12(%rsp), %edi
-; SSE2-NEXT: movzbl -13(%rsp), %ebx
-; SSE2-NEXT: bsfl %ebx, %ebx
-; SSE2-NEXT: movd %ebx, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT: bsfl %eax, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: bsfl %esi, %eax
-; SSE2-NEXT: movd %eax, %xmm3
-; SSE2-NEXT: punpcklbw %xmm0, %xmm3 # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT: bsfl %ecx, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movzbl -14(%rsp), %eax
-; SSE2-NEXT: movzbl -15(%rsp), %ecx
-; SSE2-NEXT: bsfl %ecx, %ecx
-; SSE2-NEXT: movd %ecx, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-NEXT: bsfl %r11d, %ecx
-; SSE2-NEXT: movd %ecx, %xmm0
-; SSE2-NEXT: bsfl %edx, %ecx
-; SSE2-NEXT: movd %ecx, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: bsfl %r10d, %ecx
-; SSE2-NEXT: movd %ecx, %xmm0
-; SSE2-NEXT: bsfl %eax, %eax
-; SSE2-NEXT: movd %eax, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT: bsfl %r9d, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: bsfl %edi, %eax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: bsfl %r8d, %eax
-; SSE2-NEXT: movd %eax, %xmm4
-; SSE2-NEXT: movzbl -16(%rsp), %eax
-; SSE2-NEXT: bsfl %eax, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: popq %rbx
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: psubb %xmm0, %xmm1
+; SSE2-NEXT: pand %xmm0, %xmm1
+; SSE2-NEXT: psubb {{.*}}(%rip), %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: psrlw $1, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: psubb %xmm0, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm0, %xmm2
+; SSE2-NEXT: psrlw $2, %xmm1
+; SSE2-NEXT: pand %xmm0, %xmm1
+; SSE2-NEXT: paddb %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: psrlw $4, %xmm0
+; SSE2-NEXT: paddb %xmm1, %xmm0
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: testv16i8u:
; SSE3: # BB#0:
-; SSE3: pushq %rbx
-; SSE3: movaps %xmm0, -16(%rsp)
-; SSE3-NEXT: movzbl -1(%rsp), %eax
-; SSE3-NEXT: bsfl %eax, %eax
-; SSE3-NEXT: movd %eax, %xmm0
-; SSE3-NEXT: movzbl -2(%rsp), %r11d
-; SSE3-NEXT: movzbl -3(%rsp), %eax
-; SSE3-NEXT: movzbl -4(%rsp), %r9d
-; SSE3-NEXT: movzbl -5(%rsp), %edi
-; SSE3-NEXT: movzbl -6(%rsp), %r10d
-; SSE3-NEXT: movzbl -7(%rsp), %ecx
-; SSE3-NEXT: movzbl -8(%rsp), %r8d
-; SSE3-NEXT: movzbl -9(%rsp), %edx
-; SSE3-NEXT: bsfl %edx, %edx
-; SSE3-NEXT: movd %edx, %xmm1
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE3-NEXT: bsfl %edi, %edx
-; SSE3-NEXT: movd %edx, %xmm0
-; SSE3-NEXT: movzbl -10(%rsp), %edx
-; SSE3-NEXT: movzbl -11(%rsp), %esi
-; SSE3-NEXT: movzbl -12(%rsp), %edi
-; SSE3-NEXT: movzbl -13(%rsp), %ebx
-; SSE3-NEXT: bsfl %ebx, %ebx
-; SSE3-NEXT: movd %ebx, %xmm2
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE3-NEXT: bsfl %eax, %eax
-; SSE3-NEXT: movd %eax, %xmm0
-; SSE3-NEXT: bsfl %esi, %eax
-; SSE3-NEXT: movd %eax, %xmm3
-; SSE3-NEXT: punpcklbw %xmm0, %xmm3 # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE3-NEXT: bsfl %ecx, %eax
-; SSE3-NEXT: movd %eax, %xmm0
-; SSE3-NEXT: movzbl -14(%rsp), %eax
-; SSE3-NEXT: movzbl -15(%rsp), %ecx
-; SSE3-NEXT: bsfl %ecx, %ecx
-; SSE3-NEXT: movd %ecx, %xmm1
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE3-NEXT: bsfl %r11d, %ecx
-; SSE3-NEXT: movd %ecx, %xmm0
-; SSE3-NEXT: bsfl %edx, %ecx
-; SSE3-NEXT: movd %ecx, %xmm2
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE3-NEXT: bsfl %r10d, %ecx
-; SSE3-NEXT: movd %ecx, %xmm0
-; SSE3-NEXT: bsfl %eax, %eax
-; SSE3-NEXT: movd %eax, %xmm3
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE3-NEXT: bsfl %r9d, %eax
-; SSE3-NEXT: movd %eax, %xmm0
-; SSE3-NEXT: bsfl %edi, %eax
-; SSE3-NEXT: movd %eax, %xmm2
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE3-NEXT: bsfl %r8d, %eax
-; SSE3-NEXT: movd %eax, %xmm4
-; SSE3-NEXT: movzbl -16(%rsp), %eax
-; SSE3-NEXT: bsfl %eax, %eax
-; SSE3-NEXT: movd %eax, %xmm0
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE3-NEXT: popq %rbx
+; SSE3-NEXT: pxor %xmm1, %xmm1
+; SSE3-NEXT: psubb %xmm0, %xmm1
+; SSE3-NEXT: pand %xmm0, %xmm1
+; SSE3-NEXT: psubb {{.*}}(%rip), %xmm1
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: psrlw $1, %xmm0
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT: psubb %xmm0, %xmm1
+; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE3-NEXT: movdqa %xmm1, %xmm2
+; SSE3-NEXT: pand %xmm0, %xmm2
+; SSE3-NEXT: psrlw $2, %xmm1
+; SSE3-NEXT: pand %xmm0, %xmm1
+; SSE3-NEXT: paddb %xmm2, %xmm1
+; SSE3-NEXT: movdqa %xmm1, %xmm0
+; SSE3-NEXT: psrlw $4, %xmm0
+; SSE3-NEXT: paddb %xmm1, %xmm0
+; SSE3-NEXT: pand {{.*}}(%rip), %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: testv16i8u:
; SSSE3: # BB#0:
-; SSSE3: pushq %rbx
-; SSSE3: movaps %xmm0, -16(%rsp)
-; SSSE3-NEXT: movzbl -1(%rsp), %eax
-; SSSE3-NEXT: bsfl %eax, %eax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: movzbl -2(%rsp), %r11d
-; SSSE3-NEXT: movzbl -3(%rsp), %eax
-; SSSE3-NEXT: movzbl -4(%rsp), %r9d
-; SSSE3-NEXT: movzbl -5(%rsp), %edi
-; SSSE3-NEXT: movzbl -6(%rsp), %r10d
-; SSSE3-NEXT: movzbl -7(%rsp), %ecx
-; SSSE3-NEXT: movzbl -8(%rsp), %r8d
-; SSSE3-NEXT: movzbl -9(%rsp), %edx
-; SSSE3-NEXT: bsfl %edx, %edx
-; SSSE3-NEXT: movd %edx, %xmm1
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: bsfl %edi, %edx
-; SSSE3-NEXT: movd %edx, %xmm0
-; SSSE3-NEXT: movzbl -10(%rsp), %edx
-; SSSE3-NEXT: movzbl -11(%rsp), %esi
-; SSSE3-NEXT: movzbl -12(%rsp), %edi
-; SSSE3-NEXT: movzbl -13(%rsp), %ebx
-; SSSE3-NEXT: bsfl %ebx, %ebx
-; SSSE3-NEXT: movd %ebx, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT: bsfl %eax, %eax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: bsfl %esi, %eax
-; SSSE3-NEXT: movd %eax, %xmm3
-; SSSE3-NEXT: punpcklbw %xmm0, %xmm3 # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSSE3-NEXT: bsfl %ecx, %eax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: movzbl -14(%rsp), %eax
-; SSSE3-NEXT: movzbl -15(%rsp), %ecx
-; SSSE3-NEXT: bsfl %ecx, %ecx
-; SSSE3-NEXT: movd %ecx, %xmm1
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSSE3-NEXT: bsfl %r11d, %ecx
-; SSSE3-NEXT: movd %ecx, %xmm0
-; SSSE3-NEXT: bsfl %edx, %ecx
-; SSSE3-NEXT: movd %ecx, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT: bsfl %r10d, %ecx
-; SSSE3-NEXT: movd %ecx, %xmm0
-; SSSE3-NEXT: bsfl %eax, %eax
-; SSSE3-NEXT: movd %eax, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT: bsfl %r9d, %eax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: bsfl %edi, %eax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT: bsfl %r8d, %eax
-; SSSE3-NEXT: movd %eax, %xmm4
-; SSSE3-NEXT: movzbl -16(%rsp), %eax
-; SSSE3-NEXT: bsfl %eax, %eax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSSE3-NEXT: popq %rbx
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: psubb %xmm0, %xmm1
+; SSSE3-NEXT: pand %xmm0, %xmm1
+; SSSE3-NEXT: psubb {{.*}}(%rip), %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT: movdqa %xmm1, %xmm3
+; SSSE3-NEXT: pand %xmm2, %xmm3
+; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT: movdqa %xmm0, %xmm4
+; SSSE3-NEXT: pshufb %xmm3, %xmm4
+; SSSE3-NEXT: psrlw $4, %xmm1
+; SSSE3-NEXT: pand %xmm2, %xmm1
+; SSSE3-NEXT: pshufb %xmm1, %xmm0
+; SSSE3-NEXT: paddb %xmm4, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: testv16i8u:
; SSE41: # BB#0:
-; SSE41-NEXT: pextrb $1, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pextrb $0, %xmm0, %ecx
-; SSE41-NEXT: bsfl %ecx, %ecx
-; SSE41-NEXT: movd %ecx, %xmm1
-; SSE41-NEXT: pinsrb $1, %eax, %xmm1
-; SSE41-NEXT: pextrb $2, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $2, %eax, %xmm1
-; SSE41-NEXT: pextrb $3, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $3, %eax, %xmm1
-; SSE41-NEXT: pextrb $4, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $4, %eax, %xmm1
-; SSE41-NEXT: pextrb $5, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $5, %eax, %xmm1
-; SSE41-NEXT: pextrb $6, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $6, %eax, %xmm1
-; SSE41-NEXT: pextrb $7, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $7, %eax, %xmm1
-; SSE41-NEXT: pextrb $8, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $8, %eax, %xmm1
-; SSE41-NEXT: pextrb $9, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $9, %eax, %xmm1
-; SSE41-NEXT: pextrb $10, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $10, %eax, %xmm1
-; SSE41-NEXT: pextrb $11, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $11, %eax, %xmm1
-; SSE41-NEXT: pextrb $12, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $12, %eax, %xmm1
-; SSE41-NEXT: pextrb $13, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $13, %eax, %xmm1
-; SSE41-NEXT: pextrb $14, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $14, %eax, %xmm1
-; SSE41-NEXT: pextrb $15, %xmm0, %eax
-; SSE41-NEXT: bsfl %eax, %eax
-; SSE41-NEXT: pinsrb $15, %eax, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: psubb %xmm0, %xmm1
+; SSE41-NEXT: pand %xmm0, %xmm1
+; SSE41-NEXT: psubb {{.*}}(%rip), %xmm1
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: pand %xmm2, %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: pshufb %xmm3, %xmm4
+; SSE41-NEXT: psrlw $4, %xmm1
+; SSE41-NEXT: pand %xmm2, %xmm1
+; SSE41-NEXT: pshufb %xmm1, %xmm0
+; SSE41-NEXT: paddb %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: testv16i8u:
; AVX: # BB#0:
-; AVX-NEXT: vpextrb $1, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX-NEXT: bsfl %ecx, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm1
-; AVX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $2, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $3, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $4, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $5, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $6, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $7, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $8, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $9, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $10, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $11, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $12, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $13, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $14, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX-NEXT: vpextrb $15, %xmm0, %eax
-; AVX-NEXT: bsfl %eax, %eax
-; AVX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpsubb %xmm0, %xmm1, %xmm1
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX-NEXT: vpshufb %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufb %xmm0, %xmm3, %xmm0
+; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
%out = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %in, i1 -1)
ret <16 x i8> %out
}
-define <2 x i64> @foldv2i64() {
+define <2 x i64> @foldv2i64() nounwind {
; SSE-LABEL: foldv2i64:
; SSE: # BB#0:
; SSE-NEXT: movl $8, %eax
@@ -1682,7 +915,7 @@ define <2 x i64> @foldv2i64() {
ret <2 x i64> %out
}
-define <2 x i64> @foldv2i64u() {
+define <2 x i64> @foldv2i64u() nounwind {
; SSE-LABEL: foldv2i64u:
; SSE: # BB#0:
; SSE-NEXT: movl $8, %eax
@@ -1698,7 +931,7 @@ define <2 x i64> @foldv2i64u() {
ret <2 x i64> %out
}
-define <4 x i32> @foldv4i32() {
+define <4 x i32> @foldv4i32() nounwind {
; SSE-LABEL: foldv4i32:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0]
@@ -1712,7 +945,7 @@ define <4 x i32> @foldv4i32() {
ret <4 x i32> %out
}
-define <4 x i32> @foldv4i32u() {
+define <4 x i32> @foldv4i32u() nounwind {
; SSE-LABEL: foldv4i32u:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0]
@@ -1726,7 +959,7 @@ define <4 x i32> @foldv4i32u() {
ret <4 x i32> %out
}
-define <8 x i16> @foldv8i16() {
+define <8 x i16> @foldv8i16() nounwind {
; SSE-LABEL: foldv8i16:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
@@ -1740,7 +973,7 @@ define <8 x i16> @foldv8i16() {
ret <8 x i16> %out
}
-define <8 x i16> @foldv8i16u() {
+define <8 x i16> @foldv8i16u() nounwind {
; SSE-LABEL: foldv8i16u:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
@@ -1754,7 +987,7 @@ define <8 x i16> @foldv8i16u() {
ret <8 x i16> %out
}
-define <16 x i8> @foldv16i8() {
+define <16 x i8> @foldv16i8() nounwind {
; SSE-LABEL: foldv16i8:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
@@ -1768,7 +1001,7 @@ define <16 x i8> @foldv16i8() {
ret <16 x i8> %out
}
-define <16 x i8> @foldv16i8u() {
+define <16 x i8> @foldv16i8u() nounwind {
; SSE-LABEL: foldv16i8u:
; SSE: # BB#0:
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
diff --git a/test/CodeGen/X86/vector-tzcnt-256.ll b/test/CodeGen/X86/vector-tzcnt-256.ll
index 8f744f79f85f..a9ee27cc51f0 100644
--- a/test/CodeGen/X86/vector-tzcnt-256.ll
+++ b/test/CodeGen/X86/vector-tzcnt-256.ll
@@ -1,1190 +1,525 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-target triple = "x86_64-unknown-unknown"
-
-define <4 x i64> @testv4i64(<4 x i64> %in) {
+define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
; AVX1-LABEL: testv4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrq $1, %xmm1, %rax
-; AVX1-NEXT: bsfq %rax, %rax
-; AVX1-NEXT: movl $64, %ecx
-; AVX1-NEXT: cmoveq %rcx, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vmovq %xmm1, %rax
-; AVX1-NEXT: bsfq %rax, %rax
-; AVX1-NEXT: cmoveq %rcx, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: bsfq %rax, %rax
-; AVX1-NEXT: cmoveq %rcx, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: bsfq %rax, %rax
-; AVX1-NEXT: cmoveq %rcx, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1]
+; AVX1-NEXT: vpsubq %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm5
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm6, %xmm1
+; AVX1-NEXT: vpaddb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpsadbw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm3
+; AVX1-NEXT: vpshufb %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm6, %xmm0
+; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsadbw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: testv4i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpextrq $1, %xmm1, %rax
-; AVX2-NEXT: bsfq %rax, %rax
-; AVX2-NEXT: movl $64, %ecx
-; AVX2-NEXT: cmoveq %rcx, %rax
-; AVX2-NEXT: vmovq %rax, %xmm2
-; AVX2-NEXT: vmovq %xmm1, %rax
-; AVX2-NEXT: bsfq %rax, %rax
-; AVX2-NEXT: cmoveq %rcx, %rax
-; AVX2-NEXT: vmovq %rax, %xmm1
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX2-NEXT: vpextrq $1, %xmm0, %rax
-; AVX2-NEXT: bsfq %rax, %rax
-; AVX2-NEXT: cmoveq %rcx, %rax
-; AVX2-NEXT: vmovq %rax, %xmm2
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: bsfq %rax, %rax
-; AVX2-NEXT: cmoveq %rcx, %rax
-; AVX2-NEXT: vmovq %rax, %xmm0
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb %ymm0, %ymm4, %ymm0
+; AVX2-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %in, i1 0)
ret <4 x i64> %out
}
-define <4 x i64> @testv4i64u(<4 x i64> %in) {
+define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
; AVX1-LABEL: testv4i64u:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrq $1, %xmm1, %rax
-; AVX1-NEXT: bsfq %rax, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vmovq %xmm1, %rax
-; AVX1-NEXT: bsfq %rax, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: bsfq %rax, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: bsfq %rax, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpsubq %xmm0, %xmm2, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1]
+; AVX1-NEXT: vpsubq %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm5
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm6, %xmm1
+; AVX1-NEXT: vpaddb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpsadbw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm3
+; AVX1-NEXT: vpshufb %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm6, %xmm0
+; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpsadbw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: testv4i64u:
; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpextrq $1, %xmm1, %rax
-; AVX2-NEXT: bsfq %rax, %rax
-; AVX2-NEXT: vmovq %rax, %xmm2
-; AVX2-NEXT: vmovq %xmm1, %rax
-; AVX2-NEXT: bsfq %rax, %rax
-; AVX2-NEXT: vmovq %rax, %xmm1
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX2-NEXT: vpextrq $1, %xmm0, %rax
-; AVX2-NEXT: bsfq %rax, %rax
-; AVX2-NEXT: vmovq %rax, %xmm2
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: bsfq %rax, %rax
-; AVX2-NEXT: vmovq %rax, %xmm0
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb %ymm0, %ymm4, %ymm0
+; AVX2-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %in, i1 -1)
ret <4 x i64> %out
}
-define <8 x i32> @testv8i32(<8 x i32> %in) {
+define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
; AVX1-LABEL: testv8i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrd $1, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %ecx
-; AVX1-NEXT: movl $32, %eax
-; AVX1-NEXT: cmovel %eax, %ecx
-; AVX1-NEXT: vmovd %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: vmovd %edx, %xmm2
-; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrd $2, %xmm1, %ecx
-; AVX1-NEXT: bsfl %ecx, %ecx
-; AVX1-NEXT: cmovel %eax, %ecx
-; AVX1-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrd $3, %xmm1, %ecx
-; AVX1-NEXT: bsfl %ecx, %ecx
-; AVX1-NEXT: cmovel %eax, %ecx
-; AVX1-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm1
-; AVX1-NEXT: vpextrd $1, %xmm0, %ecx
-; AVX1-NEXT: bsfl %ecx, %ecx
-; AVX1-NEXT: cmovel %eax, %ecx
-; AVX1-NEXT: vmovd %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: vmovd %edx, %xmm2
-; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrd $2, %xmm0, %ecx
-; AVX1-NEXT: bsfl %ecx, %ecx
-; AVX1-NEXT: cmovel %eax, %ecx
-; AVX1-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrd $3, %xmm0, %ecx
-; AVX1-NEXT: bsfl %ecx, %ecx
-; AVX1-NEXT: cmovel %eax, %ecx
-; AVX1-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpsubd %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1,1,1]
+; AVX1-NEXT: vpsubd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm5
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2
+; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpshufb %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vpaddb %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; AVX1-NEXT: vpsadbw %xmm1, %xmm5, %xmm5
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm3
+; AVX1-NEXT: vpshufb %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm6, %xmm0
+; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpsadbw %xmm1, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: testv8i32:
; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpextrd $1, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %ecx
-; AVX2-NEXT: movl $32, %eax
-; AVX2-NEXT: cmovel %eax, %ecx
-; AVX2-NEXT: vmovd %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: vmovd %edx, %xmm2
-; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrd $2, %xmm1, %ecx
-; AVX2-NEXT: bsfl %ecx, %ecx
-; AVX2-NEXT: cmovel %eax, %ecx
-; AVX2-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
-; AVX2-NEXT: bsfl %ecx, %ecx
-; AVX2-NEXT: cmovel %eax, %ecx
-; AVX2-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm1
-; AVX2-NEXT: vpextrd $1, %xmm0, %ecx
-; AVX2-NEXT: bsfl %ecx, %ecx
-; AVX2-NEXT: cmovel %eax, %ecx
-; AVX2-NEXT: vmovd %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: vmovd %edx, %xmm2
-; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrd $2, %xmm0, %ecx
-; AVX2-NEXT: bsfl %ecx, %ecx
-; AVX2-NEXT: cmovel %eax, %ecx
-; AVX2-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrd $3, %xmm0, %ecx
-; AVX2-NEXT: bsfl %ecx, %ecx
-; AVX2-NEXT: cmovel %eax, %ecx
-; AVX2-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb %ymm0, %ymm4, %ymm0
+; AVX2-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
%out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %in, i1 0)
ret <8 x i32> %out
}
-define <8 x i32> @testv8i32u(<8 x i32> %in) {
+define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
; AVX1-LABEL: testv8i32u:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrd $1, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vmovd %xmm1, %ecx
-; AVX1-NEXT: bsfl %ecx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrd $2, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrd $3, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
-; AVX1-NEXT: vpextrd $1, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vmovd %xmm0, %ecx
-; AVX1-NEXT: bsfl %ecx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrd $2, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrd $3, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpsubd %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1,1,1]
+; AVX1-NEXT: vpsubd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm5
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2
+; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpshufb %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vpaddb %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; AVX1-NEXT: vpsadbw %xmm1, %xmm5, %xmm5
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; AVX1-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpackuswb %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vpsubd %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm3
+; AVX1-NEXT: vpshufb %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm6, %xmm0
+; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpsadbw %xmm1, %xmm3, %xmm3
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: testv8i32u:
; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpextrd $1, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vmovd %xmm1, %ecx
-; AVX2-NEXT: bsfl %ecx, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrd $2, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrd $3, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
-; AVX2-NEXT: vpextrd $1, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vmovd %xmm0, %ecx
-; AVX2-NEXT: bsfl %ecx, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrd $2, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrd $3, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpsubd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb %ymm0, %ymm4, %ymm0
+; AVX2-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; AVX2-NEXT: vpsadbw %ymm1, %ymm2, %ymm2
+; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
%out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %in, i1 -1)
ret <8 x i32> %out
}
-define <16 x i16> @testv16i16(<16 x i16> %in) {
+define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
; AVX1-LABEL: testv16i16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm1, %eax
-; AVX1-NEXT: bsfw %ax, %cx
-; AVX1-NEXT: movw $16, %ax
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vmovd %xmm1, %edx
-; AVX1-NEXT: bsfw %dx, %dx
-; AVX1-NEXT: cmovew %ax, %dx
-; AVX1-NEXT: vmovd %edx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm1, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm1, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm1, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm1, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm1, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm1, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm0, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vmovd %xmm0, %edx
-; AVX1-NEXT: bsfw %dx, %dx
-; AVX1-NEXT: cmovew %ax, %dx
-; AVX1-NEXT: vmovd %edx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm0, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm0, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm0, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm0, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm0, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm0, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: cmovew %ax, %cx
-; AVX1-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpsubw %xmm0, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2
+; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4
+; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0
+; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: testv16i16:
; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpextrw $1, %xmm1, %eax
-; AVX2-NEXT: bsfw %ax, %cx
-; AVX2-NEXT: movw $16, %ax
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vmovd %xmm1, %edx
-; AVX2-NEXT: bsfw %dx, %dx
-; AVX2-NEXT: cmovew %ax, %dx
-; AVX2-NEXT: vmovd %edx, %xmm2
-; AVX2-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $2, %xmm1, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $3, %xmm1, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $4, %xmm1, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $5, %xmm1, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $6, %xmm1, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $7, %xmm1, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm1
-; AVX2-NEXT: vpextrw $1, %xmm0, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vmovd %xmm0, %edx
-; AVX2-NEXT: bsfw %dx, %dx
-; AVX2-NEXT: cmovew %ax, %dx
-; AVX2-NEXT: vmovd %edx, %xmm2
-; AVX2-NEXT: vpinsrw $1, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $2, %xmm0, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vpinsrw $2, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $3, %xmm0, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $4, %xmm0, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vpinsrw $4, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $5, %xmm0, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $6, %xmm0, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $7, %xmm0, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: cmovew %ax, %cx
-; AVX2-NEXT: vpinsrw $7, %ecx, %xmm2, %xmm0
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpsubw %ymm0, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1
+; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX2-NEXT: retq
%out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %in, i1 0)
ret <16 x i16> %out
}
-define <16 x i16> @testv16i16u(<16 x i16> %in) {
+define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
; AVX1-LABEL: testv16i16u:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm1, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vmovd %xmm1, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm1, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm1, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm1, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm1, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm1, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm1, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT: vpextrw $1, %xmm0, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vmovd %xmm0, %ecx
-; AVX1-NEXT: bsfw %cx, %cx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $2, %xmm0, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm0, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $4, %xmm0, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm0, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm0, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrw $7, %xmm0, %eax
-; AVX1-NEXT: bsfw %ax, %ax
-; AVX1-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpsubw %xmm0, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpsrlw $4, %xmm2, %xmm2
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2
+; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vpsllw $8, %xmm2, %xmm4
+; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0
+; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
+; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: testv16i16u:
; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpextrw $1, %xmm1, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vmovd %xmm1, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $2, %xmm1, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $3, %xmm1, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $4, %xmm1, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $5, %xmm1, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $6, %xmm1, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $7, %xmm1, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX2-NEXT: vpextrw $1, %xmm0, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vmovd %xmm0, %ecx
-; AVX2-NEXT: bsfw %cx, %cx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $2, %xmm0, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $3, %xmm0, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $4, %xmm0, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $5, %xmm0, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $6, %xmm0, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrw $7, %xmm0, %eax
-; AVX2-NEXT: bsfw %ax, %ax
-; AVX2-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpsubw %ymm0, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsllw $8, %ymm0, %ymm1
+; AVX2-NEXT: vpaddb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
; AVX2-NEXT: retq
%out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %in, i1 -1)
ret <16 x i16> %out
}
-define <32 x i8> @testv32i8(<32 x i8> %in) {
+define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
; AVX1-LABEL: testv32i8:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrb $1, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %edx
-; AVX1-NEXT: movl $32, %eax
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: movl $8, %ecx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpextrb $0, %xmm1, %esi
-; AVX1-NEXT: bsfl %esi, %esi
-; AVX1-NEXT: cmovel %eax, %esi
-; AVX1-NEXT: cmpl $32, %esi
-; AVX1-NEXT: cmovel %ecx, %esi
-; AVX1-NEXT: vmovd %esi, %xmm2
-; AVX1-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $2, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $3, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $4, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $5, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $6, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $7, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $8, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $8, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $9, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $9, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $10, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $10, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $11, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $11, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $12, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $13, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $13, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $14, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $15, %xmm1, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $15, %edx, %xmm2, %xmm1
-; AVX1-NEXT: vpextrb $1, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpextrb $0, %xmm0, %esi
-; AVX1-NEXT: bsfl %esi, %esi
-; AVX1-NEXT: cmovel %eax, %esi
-; AVX1-NEXT: cmpl $32, %esi
-; AVX1-NEXT: cmovel %ecx, %esi
-; AVX1-NEXT: vmovd %esi, %xmm2
-; AVX1-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $2, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $3, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $4, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $5, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $6, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $7, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $8, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $8, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $9, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $9, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $10, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $10, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $11, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $11, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $12, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $13, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $13, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $14, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $15, %xmm0, %edx
-; AVX1-NEXT: bsfl %edx, %edx
-; AVX1-NEXT: cmovel %eax, %edx
-; AVX1-NEXT: cmpl $32, %edx
-; AVX1-NEXT: cmovel %ecx, %edx
-; AVX1-NEXT: vpinsrb $15, %edx, %xmm2, %xmm0
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpsubb %xmm0, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm2
+; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0
+; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: testv32i8:
; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpextrb $1, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %edx
-; AVX2-NEXT: movl $32, %eax
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: movl $8, %ecx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpextrb $0, %xmm1, %esi
-; AVX2-NEXT: bsfl %esi, %esi
-; AVX2-NEXT: cmovel %eax, %esi
-; AVX2-NEXT: cmpl $32, %esi
-; AVX2-NEXT: cmovel %ecx, %esi
-; AVX2-NEXT: vmovd %esi, %xmm2
-; AVX2-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $2, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $3, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $4, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $5, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $6, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $7, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $8, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $8, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $9, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $9, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $10, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $10, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $11, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $11, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $12, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $13, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $13, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $14, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $15, %xmm1, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $15, %edx, %xmm2, %xmm1
-; AVX2-NEXT: vpextrb $1, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpextrb $0, %xmm0, %esi
-; AVX2-NEXT: bsfl %esi, %esi
-; AVX2-NEXT: cmovel %eax, %esi
-; AVX2-NEXT: cmpl $32, %esi
-; AVX2-NEXT: cmovel %ecx, %esi
-; AVX2-NEXT: vmovd %esi, %xmm2
-; AVX2-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $2, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $3, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $3, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $4, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $5, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $6, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $6, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $7, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $7, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $8, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $8, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $9, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $9, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $10, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $10, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $11, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $11, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $12, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $13, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $13, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $14, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $14, %edx, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $15, %xmm0, %edx
-; AVX2-NEXT: bsfl %edx, %edx
-; AVX2-NEXT: cmovel %eax, %edx
-; AVX2-NEXT: cmpl $32, %edx
-; AVX2-NEXT: cmovel %ecx, %edx
-; AVX2-NEXT: vpinsrb $15, %edx, %xmm2, %xmm0
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpsubb %ymm0, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubb {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
%out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %in, i1 0)
ret <32 x i8> %out
}
-define <32 x i8> @testv32i8u(<32 x i8> %in) {
+define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
; AVX1-LABEL: testv32i8u:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrb $1, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX1-NEXT: bsfl %ecx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $2, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $3, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $4, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $5, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $6, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $7, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $8, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $9, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $10, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $11, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $12, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $13, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $14, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $15, %xmm1, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
-; AVX1-NEXT: vpextrb $1, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX1-NEXT: bsfl %ecx, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $2, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $3, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $4, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $5, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $6, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $7, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $8, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $9, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $10, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $11, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $12, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $13, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $14, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrb $15, %xmm0, %eax
-; AVX1-NEXT: bsfl %eax, %eax
-; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpsubb %xmm0, %xmm2, %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm2
+; AVX1-NEXT: vpshufb %xmm2, %xmm5, %xmm2
+; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpshufb %xmm0, %xmm5, %xmm0
+; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: testv32i8u:
; AVX2: # BB#0:
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpextrb $1, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX2-NEXT: bsfl %ecx, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $2, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $3, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $4, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $5, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $6, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $7, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $8, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $9, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $10, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $11, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $12, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $13, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $14, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $15, %xmm1, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
-; AVX2-NEXT: vpextrb $1, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX2-NEXT: bsfl %ecx, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm2
-; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $2, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $3, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $4, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $5, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $6, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $7, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $8, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $9, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $10, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $11, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $12, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $13, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $14, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX2-NEXT: vpextrb $15, %xmm0, %eax
-; AVX2-NEXT: bsfl %eax, %eax
-; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpsubb %ymm0, %ymm1, %ymm1
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubb {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT: vpshufb %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb %ymm0, %ymm3, %ymm0
+; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
%out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %in, i1 -1)
ret <32 x i8> %out
}
-define <4 x i64> @foldv4i64() {
-; AVX-LABEL: foldv4i64:
-; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
-; AVX-NEXT: retq
+define <4 x i64> @foldv4i64() nounwind {
+; ALL-LABEL: foldv4i64:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
+; ALL-NEXT: retq
%out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
ret <4 x i64> %out
}
-define <4 x i64> @foldv4i64u() {
-; AVX-LABEL: foldv4i64u:
-; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
-; AVX-NEXT: retq
+define <4 x i64> @foldv4i64u() nounwind {
+; ALL-LABEL: foldv4i64u:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
+; ALL-NEXT: retq
%out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
ret <4 x i64> %out
}
-define <8 x i32> @foldv8i32() {
-; AVX-LABEL: foldv8i32:
-; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
-; AVX-NEXT: retq
+define <8 x i32> @foldv8i32() nounwind {
+; ALL-LABEL: foldv8i32:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
+; ALL-NEXT: retq
%out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
ret <8 x i32> %out
}
-define <8 x i32> @foldv8i32u() {
-; AVX-LABEL: foldv8i32u:
-; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
-; AVX-NEXT: retq
+define <8 x i32> @foldv8i32u() nounwind {
+; ALL-LABEL: foldv8i32u:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
+; ALL-NEXT: retq
%out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
ret <8 x i32> %out
}
-define <16 x i16> @foldv16i16() {
-; AVX-LABEL: foldv16i16:
-; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
-; AVX-NEXT: retq
+define <16 x i16> @foldv16i16() nounwind {
+; ALL-LABEL: foldv16i16:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
+; ALL-NEXT: retq
%out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
ret <16 x i16> %out
}
-define <16 x i16> @foldv16i16u() {
-; AVX-LABEL: foldv16i16u:
-; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
-; AVX-NEXT: retq
+define <16 x i16> @foldv16i16u() nounwind {
+; ALL-LABEL: foldv16i16u:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
+; ALL-NEXT: retq
%out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
ret <16 x i16> %out
}
-define <32 x i8> @foldv32i8() {
-; AVX-LABEL: foldv32i8:
-; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
-; AVX-NEXT: retq
+define <32 x i8> @foldv32i8() nounwind {
+; ALL-LABEL: foldv32i8:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
+; ALL-NEXT: retq
%out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
ret <32 x i8> %out
}
-define <32 x i8> @foldv32i8u() {
-; AVX-LABEL: foldv32i8u:
-; AVX: # BB#0:
-; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
-; AVX-NEXT: retq
+define <32 x i8> @foldv32i8u() nounwind {
+; ALL-LABEL: foldv32i8u:
+; ALL: # BB#0:
+; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
+; ALL-NEXT: retq
%out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
ret <32 x i8> %out
}
diff --git a/test/CodeGen/X86/vector-tzcnt-512.ll b/test/CodeGen/X86/vector-tzcnt-512.ll
new file mode 100644
index 000000000000..9265fad0176c
--- /dev/null
+++ b/test/CodeGen/X86/vector-tzcnt-512.ll
@@ -0,0 +1,271 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD
+
+define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
+; ALL-LABEL: testv8i64:
+; ALL: ## BB#0:
+; ALL-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; ALL-NEXT: vpextrq $1, %xmm1, %rax
+; ALL-NEXT: tzcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm2
+; ALL-NEXT: vmovq %xmm1, %rax
+; ALL-NEXT: tzcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm1
+; ALL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; ALL-NEXT: vextracti32x4 $2, %zmm0, %xmm2
+; ALL-NEXT: vpextrq $1, %xmm2, %rax
+; ALL-NEXT: tzcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm3
+; ALL-NEXT: vmovq %xmm2, %rax
+; ALL-NEXT: tzcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm2
+; ALL-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; ALL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; ALL-NEXT: vextracti32x4 $1, %zmm0, %xmm2
+; ALL-NEXT: vpextrq $1, %xmm2, %rax
+; ALL-NEXT: tzcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm3
+; ALL-NEXT: vmovq %xmm2, %rax
+; ALL-NEXT: tzcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm2
+; ALL-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; ALL-NEXT: vpextrq $1, %xmm0, %rax
+; ALL-NEXT: tzcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm3
+; ALL-NEXT: vmovq %xmm0, %rax
+; ALL-NEXT: tzcntq %rax, %rax
+; ALL-NEXT: vmovq %rax, %xmm0
+; ALL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; ALL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %out = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %in, i1 0)
+ ret <8 x i64> %out
+}
+
+define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
+; ALL-LABEL: testv8i64u:
+; ALL: ## BB#0:
+; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT: vpsubq %zmm0, %zmm1, %zmm1
+; ALL-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vplzcntq %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpsubq %zmm0, %zmm1, %zmm0
+; ALL-NEXT: retq
+ %out = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %in, i1 -1)
+ ret <8 x i64> %out
+}
+
+define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
+; ALL-LABEL: testv16i32:
+; ALL: ## BB#0:
+; ALL-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; ALL-NEXT: vpextrd $1, %xmm1, %eax
+; ALL-NEXT: tzcntl %eax, %eax
+; ALL-NEXT: vmovd %xmm1, %ecx
+; ALL-NEXT: tzcntl %ecx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm2
+; ALL-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
+; ALL-NEXT: vpextrd $2, %xmm1, %eax
+; ALL-NEXT: tzcntl %eax, %eax
+; ALL-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; ALL-NEXT: vpextrd $3, %xmm1, %eax
+; ALL-NEXT: tzcntl %eax, %eax
+; ALL-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
+; ALL-NEXT: vextracti32x4 $2, %zmm0, %xmm2
+; ALL-NEXT: vpextrd $1, %xmm2, %eax
+; ALL-NEXT: tzcntl %eax, %eax
+; ALL-NEXT: vmovd %xmm2, %ecx
+; ALL-NEXT: tzcntl %ecx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm3
+; ALL-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
+; ALL-NEXT: vpextrd $2, %xmm2, %eax
+; ALL-NEXT: tzcntl %eax, %eax
+; ALL-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
+; ALL-NEXT: vpextrd $3, %xmm2, %eax
+; ALL-NEXT: tzcntl %eax, %eax
+; ALL-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2
+; ALL-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; ALL-NEXT: vextracti32x4 $1, %zmm0, %xmm2
+; ALL-NEXT: vpextrd $1, %xmm2, %eax
+; ALL-NEXT: tzcntl %eax, %eax
+; ALL-NEXT: vmovd %xmm2, %ecx
+; ALL-NEXT: tzcntl %ecx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm3
+; ALL-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
+; ALL-NEXT: vpextrd $2, %xmm2, %eax
+; ALL-NEXT: tzcntl %eax, %eax
+; ALL-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
+; ALL-NEXT: vpextrd $3, %xmm2, %eax
+; ALL-NEXT: tzcntl %eax, %eax
+; ALL-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2
+; ALL-NEXT: vpextrd $1, %xmm0, %eax
+; ALL-NEXT: tzcntl %eax, %eax
+; ALL-NEXT: vmovd %xmm0, %ecx
+; ALL-NEXT: tzcntl %ecx, %ecx
+; ALL-NEXT: vmovd %ecx, %xmm3
+; ALL-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
+; ALL-NEXT: vpextrd $2, %xmm0, %eax
+; ALL-NEXT: tzcntl %eax, %eax
+; ALL-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
+; ALL-NEXT: vpextrd $3, %xmm0, %eax
+; ALL-NEXT: tzcntl %eax, %eax
+; ALL-NEXT: vpinsrd $3, %eax, %xmm3, %xmm0
+; ALL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: retq
+ %out = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %in, i1 0)
+ ret <16 x i32> %out
+}
+
+define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
+; ALL-LABEL: testv16i32u:
+; ALL: ## BB#0:
+; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT: vpsubd %zmm0, %zmm1, %zmm1
+; ALL-NEXT: vpandd %zmm1, %zmm0, %zmm0
+; ALL-NEXT: vplzcntd %zmm0, %zmm0
+; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1
+; ALL-NEXT: vpsubd %zmm0, %zmm1, %zmm0
+; ALL-NEXT: retq
+ %out = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %in, i1 -1)
+ ret <16 x i32> %out
+}
+
+define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
+; ALL-LABEL: testv32i16:
+; ALL: ## BB#0:
+; ALL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT: vpsubw %ymm0, %ymm2, %ymm3
+; ALL-NEXT: vpand %ymm3, %ymm0, %ymm0
+; ALL-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; ALL-NEXT: vpsubw %ymm3, %ymm0, %ymm0
+; ALL-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT: vpand %ymm4, %ymm0, %ymm5
+; ALL-NEXT: vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; ALL-NEXT: vpshufb %ymm5, %ymm6, %ymm5
+; ALL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; ALL-NEXT: vpand %ymm4, %ymm0, %ymm0
+; ALL-NEXT: vpshufb %ymm0, %ymm6, %ymm0
+; ALL-NEXT: vpaddb %ymm5, %ymm0, %ymm0
+; ALL-NEXT: vpsllw $8, %ymm0, %ymm5
+; ALL-NEXT: vpaddb %ymm0, %ymm5, %ymm0
+; ALL-NEXT: vpsrlw $8, %ymm0, %ymm0
+; ALL-NEXT: vpsubw %ymm1, %ymm2, %ymm2
+; ALL-NEXT: vpand %ymm2, %ymm1, %ymm1
+; ALL-NEXT: vpsubw %ymm3, %ymm1, %ymm1
+; ALL-NEXT: vpand %ymm4, %ymm1, %ymm2
+; ALL-NEXT: vpshufb %ymm2, %ymm6, %ymm2
+; ALL-NEXT: vpsrlw $4, %ymm1, %ymm1
+; ALL-NEXT: vpand %ymm4, %ymm1, %ymm1
+; ALL-NEXT: vpshufb %ymm1, %ymm6, %ymm1
+; ALL-NEXT: vpaddb %ymm2, %ymm1, %ymm1
+; ALL-NEXT: vpsllw $8, %ymm1, %ymm2
+; ALL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
+; ALL-NEXT: vpsrlw $8, %ymm1, %ymm1
+; ALL-NEXT: retq
+ %out = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %in, i1 0)
+ ret <32 x i16> %out
+}
+
+define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
+; ALL-LABEL: testv32i16u:
+; ALL: ## BB#0:
+; ALL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT: vpsubw %ymm0, %ymm2, %ymm3
+; ALL-NEXT: vpand %ymm3, %ymm0, %ymm0
+; ALL-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; ALL-NEXT: vpsubw %ymm3, %ymm0, %ymm0
+; ALL-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT: vpand %ymm4, %ymm0, %ymm5
+; ALL-NEXT: vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; ALL-NEXT: vpshufb %ymm5, %ymm6, %ymm5
+; ALL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; ALL-NEXT: vpand %ymm4, %ymm0, %ymm0
+; ALL-NEXT: vpshufb %ymm0, %ymm6, %ymm0
+; ALL-NEXT: vpaddb %ymm5, %ymm0, %ymm0
+; ALL-NEXT: vpsllw $8, %ymm0, %ymm5
+; ALL-NEXT: vpaddb %ymm0, %ymm5, %ymm0
+; ALL-NEXT: vpsrlw $8, %ymm0, %ymm0
+; ALL-NEXT: vpsubw %ymm1, %ymm2, %ymm2
+; ALL-NEXT: vpand %ymm2, %ymm1, %ymm1
+; ALL-NEXT: vpsubw %ymm3, %ymm1, %ymm1
+; ALL-NEXT: vpand %ymm4, %ymm1, %ymm2
+; ALL-NEXT: vpshufb %ymm2, %ymm6, %ymm2
+; ALL-NEXT: vpsrlw $4, %ymm1, %ymm1
+; ALL-NEXT: vpand %ymm4, %ymm1, %ymm1
+; ALL-NEXT: vpshufb %ymm1, %ymm6, %ymm1
+; ALL-NEXT: vpaddb %ymm2, %ymm1, %ymm1
+; ALL-NEXT: vpsllw $8, %ymm1, %ymm2
+; ALL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
+; ALL-NEXT: vpsrlw $8, %ymm1, %ymm1
+; ALL-NEXT: retq
+ %out = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %in, i1 -1)
+ ret <32 x i16> %out
+}
+
+define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
+; ALL-LABEL: testv64i8:
+; ALL: ## BB#0:
+; ALL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT: vpsubb %ymm0, %ymm2, %ymm3
+; ALL-NEXT: vpand %ymm3, %ymm0, %ymm0
+; ALL-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; ALL-NEXT: vpsubb %ymm3, %ymm0, %ymm0
+; ALL-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT: vpand %ymm4, %ymm0, %ymm5
+; ALL-NEXT: vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; ALL-NEXT: vpshufb %ymm5, %ymm6, %ymm5
+; ALL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; ALL-NEXT: vpand %ymm4, %ymm0, %ymm0
+; ALL-NEXT: vpshufb %ymm0, %ymm6, %ymm0
+; ALL-NEXT: vpaddb %ymm5, %ymm0, %ymm0
+; ALL-NEXT: vpsubb %ymm1, %ymm2, %ymm2
+; ALL-NEXT: vpand %ymm2, %ymm1, %ymm1
+; ALL-NEXT: vpsubb %ymm3, %ymm1, %ymm1
+; ALL-NEXT: vpand %ymm4, %ymm1, %ymm2
+; ALL-NEXT: vpshufb %ymm2, %ymm6, %ymm2
+; ALL-NEXT: vpsrlw $4, %ymm1, %ymm1
+; ALL-NEXT: vpand %ymm4, %ymm1, %ymm1
+; ALL-NEXT: vpshufb %ymm1, %ymm6, %ymm1
+; ALL-NEXT: vpaddb %ymm2, %ymm1, %ymm1
+; ALL-NEXT: retq
+ %out = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %in, i1 0)
+ ret <64 x i8> %out
+}
+
+define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
+; ALL-LABEL: testv64i8u:
+; ALL: ## BB#0:
+; ALL-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT: vpsubb %ymm0, %ymm2, %ymm3
+; ALL-NEXT: vpand %ymm3, %ymm0, %ymm0
+; ALL-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; ALL-NEXT: vpsubb %ymm3, %ymm0, %ymm0
+; ALL-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT: vpand %ymm4, %ymm0, %ymm5
+; ALL-NEXT: vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; ALL-NEXT: vpshufb %ymm5, %ymm6, %ymm5
+; ALL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; ALL-NEXT: vpand %ymm4, %ymm0, %ymm0
+; ALL-NEXT: vpshufb %ymm0, %ymm6, %ymm0
+; ALL-NEXT: vpaddb %ymm5, %ymm0, %ymm0
+; ALL-NEXT: vpsubb %ymm1, %ymm2, %ymm2
+; ALL-NEXT: vpand %ymm2, %ymm1, %ymm1
+; ALL-NEXT: vpsubb %ymm3, %ymm1, %ymm1
+; ALL-NEXT: vpand %ymm4, %ymm1, %ymm2
+; ALL-NEXT: vpshufb %ymm2, %ymm6, %ymm2
+; ALL-NEXT: vpsrlw $4, %ymm1, %ymm1
+; ALL-NEXT: vpand %ymm4, %ymm1, %ymm1
+; ALL-NEXT: vpshufb %ymm1, %ymm6, %ymm1
+; ALL-NEXT: vpaddb %ymm2, %ymm1, %ymm1
+; ALL-NEXT: retq
+ %out = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %in, i1 -1)
+ ret <64 x i8> %out
+}
+
+declare <8 x i64> @llvm.cttz.v8i64(<8 x i64>, i1)
+declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
+declare <32 x i16> @llvm.cttz.v32i16(<32 x i16>, i1)
+declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1)
diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll
index b119f5eb89f6..b8024203ab2f 100644
--- a/test/CodeGen/X86/vector-zext.ll
+++ b/test/CodeGen/X86/vector-zext.ll
@@ -1,8 +1,267 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+
+define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_16i8_to_8i16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: zext_16i8_to_8i16:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: zext_16i8_to_8i16:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: zext_16i8_to_8i16:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX-NEXT: retq
+entry:
+ %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %C = zext <8 x i8> %B to <8 x i16>
+ ret <8 x i16> %C
+}
+
+; PR17654
+define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
+; SSE2-LABEL: zext_16i8_to_16i16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: zext_16i8_to_16i16:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: zext_16i8_to_16i16:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: zext_16i8_to_16i16:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: zext_16i8_to_16i16:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: retq
+entry:
+ %B = zext <16 x i8> %A to <16 x i16>
+ ret <16 x i16> %B
+}
+
+define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_16i8_to_4i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: zext_16i8_to_4i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: zext_16i8_to_4i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: zext_16i8_to_4i32:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX-NEXT: retq
+entry:
+ %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %C = zext <4 x i8> %B to <4 x i32>
+ ret <4 x i32> %C
+}
+
+define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_16i8_to_8i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: zext_16i8_to_8i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: zext_16i8_to_8i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: zext_16i8_to_8i32:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: zext_16i8_to_8i32:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: retq
+entry:
+ %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %C = zext <8 x i8> %B to <8 x i32>
+ ret <8 x i32> %C
+}
+
+define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_16i8_to_2i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: zext_16i8_to_2i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: zext_16i8_to_2i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: zext_16i8_to_2i64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: retq
+entry:
+ %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+ %C = zext <2 x i8> %B to <2 x i64>
+ ret <2 x i64> %C
+}
+
+define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_16i8_to_4i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: zext_16i8_to_4i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: zext_16i8_to_4i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: psrld $16, %xmm0
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: zext_16i8_to_4i64:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: zext_16i8_to_4i64:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT: retq
+entry:
+ %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %C = zext <4 x i8> %B to <4 x i64>
+ ret <4 x i64> %C
+}
+
+define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_8i16_to_4i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: zext_8i16_to_4i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: zext_8i16_to_4i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: zext_8i16_to_4i32:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX-NEXT: retq
+entry:
+ %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %C = zext <4 x i16> %B to <4 x i32>
+ ret <4 x i32> %C
+}
define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: zext_8i16_to_8i32:
@@ -10,8 +269,7 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: zext_8i16_to_8i32:
@@ -19,16 +277,15 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: zext_8i16_to_8i32:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: zext_8i16_to_8i32:
@@ -48,35 +305,139 @@ entry:
ret <8 x i32>%B
}
+define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_8i16_to_2i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: zext_8i16_to_2i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: zext_8i16_to_2i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: zext_8i16_to_2i64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX-NEXT: retq
+entry:
+ %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+ %C = zext <2 x i16> %B to <2 x i64>
+ ret <2 x i64> %C
+}
+
+define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_8i16_to_4i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: zext_8i16_to_4i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: zext_8i16_to_4i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: zext_8i16_to_4i64:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: zext_8i16_to_4i64:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
+; AVX2-NEXT: retq
+entry:
+ %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %C = zext <4 x i16> %B to <4 x i64>
+ ret <4 x i64> %C
+}
+
+define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_4i32_to_2i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: zext_4i32_to_2i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: zext_4i32_to_2i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: zext_4i32_to_2i64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX-NEXT: retq
+entry:
+ %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %C = zext <2 x i32> %B to <2 x i64>
+ ret <2 x i64> %C
+}
+
define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: zext_4i32_to_4i64:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
-; SSE2-NEXT: pand %xmm3, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
-; SSE2-NEXT: pand %xmm3, %xmm1
-; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: zext_4i32_to_4i64:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
-; SSSE3-NEXT: pand %xmm3, %xmm2
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
-; SSSE3-NEXT: pand %xmm3, %xmm1
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: zext_4i32_to_4i64:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
-; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
-; SSE41-NEXT: pand %xmm3, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
-; SSE41-NEXT: pand %xmm3, %xmm1
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
+; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE41-NEXT: retq
;
; AVX1-LABEL: zext_4i32_to_4i64:
@@ -96,101 +457,190 @@ entry:
ret <4 x i64>%B
}
-define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
-; SSE2-LABEL: zext_8i8_to_8i32:
+define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
+; SSE2-LABEL: load_zext_2i8_to_2i64:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
-; SSE2-NEXT: pand %xmm1, %xmm2
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: movzwl (%rdi), %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: zext_8i8_to_8i32:
+; SSSE3-LABEL: load_zext_2i8_to_2i64:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
-; SSSE3-NEXT: pand %xmm1, %xmm2
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: pand %xmm0, %xmm1
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: movzwl (%rdi), %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: zext_8i8_to_8i32:
+; SSE41-LABEL: load_zext_2i8_to_2i64:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
-; SSE41-NEXT: pand %xmm1, %xmm2
-; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
-; AVX1-LABEL: zext_8i8_to_8i32:
+; AVX-LABEL: load_zext_2i8_to_2i64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: retq
+entry:
+ %X = load <2 x i8>, <2 x i8>* %ptr
+ %Y = zext <2 x i8> %X to <2 x i64>
+ ret <2 x i64> %Y
+}
+
+define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
+; SSE2-LABEL: load_zext_4i8_to_4i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_zext_4i8_to_4i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_zext_4i8_to_4i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: load_zext_4i8_to_4i32:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX-NEXT: retq
+entry:
+ %X = load <4 x i8>, <4 x i8>* %ptr
+ %Y = zext <4 x i8> %X to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
+; SSE2-LABEL: load_zext_4i8_to_4i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_zext_4i8_to_4i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,zero,zero,zero,zero,xmm1[12],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_zext_4i8_to_4i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_zext_4i8_to_4i64:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: zext_8i8_to_8i32:
+; AVX2-LABEL: load_zext_4i8_to_4i64:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: retq
entry:
- %t = zext <8 x i8> %z to <8 x i32>
- ret <8 x i32> %t
+ %X = load <4 x i8>, <4 x i8>* %ptr
+ %Y = zext <4 x i8> %X to <4 x i64>
+ ret <4 x i64> %Y
}
-; PR17654
-define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
-; SSE2-LABEL: zext_16i8_to_16i16:
+define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
+; SSE2-LABEL: load_zext_8i8_to_8i16:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: retq
;
-; SSSE3-LABEL: zext_16i8_to_16i16:
+; SSSE3-LABEL: load_zext_8i8_to_8i16:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: pxor %xmm2, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSSE3-NEXT: retq
;
-; SSE41-LABEL: zext_16i8_to_16i16:
+; SSE41-LABEL: load_zext_8i8_to_8i16:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; SSE41-NEXT: retq
;
-; AVX1-LABEL: zext_16i8_to_16i16:
+; AVX-LABEL: load_zext_8i8_to_8i16:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX-NEXT: retq
+entry:
+ %X = load <8 x i8>, <8 x i8>* %ptr
+ %Y = zext <8 x i8> %X to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
+; SSE2-LABEL: load_zext_8i8_to_8i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_zext_8i8_to_8i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[6],zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[14],zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_zext_8i8_to_8i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_zext_8i8_to_8i32:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
-; AVX2-LABEL: zext_16i8_to_16i16:
+; AVX2-LABEL: load_zext_8i8_to_8i32:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
; AVX2-NEXT: retq
entry:
- %t = zext <16 x i8> %z to <16 x i16>
- ret <16 x i16> %t
+ %X = load <8 x i8>, <8 x i8>* %ptr
+ %Y = zext <8 x i8> %X to <8 x i32>
+ ret <8 x i32> %Y
}
define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
@@ -200,8 +650,7 @@ define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_zext_16i8_to_16i16:
@@ -210,8 +659,7 @@ define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_zext_16i8_to_16i16:
@@ -237,6 +685,112 @@ entry:
ret <16 x i16> %Y
}
+define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
+; SSE2-LABEL: load_zext_2i16_to_2i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_zext_2i16_to_2i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_zext_2i16_to_2i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: load_zext_2i16_to_2i64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; AVX-NEXT: retq
+entry:
+ %X = load <2 x i16>, <2 x i16>* %ptr
+ %Y = zext <2 x i16> %X to <2 x i64>
+ ret <2 x i64> %Y
+}
+
+define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
+; SSE2-LABEL: load_zext_4i16_to_4i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_zext_4i16_to_4i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_zext_4i16_to_4i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: load_zext_4i16_to_4i32:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; AVX-NEXT: retq
+entry:
+ %X = load <4 x i16>, <4 x i16>* %ptr
+ %Y = zext <4 x i16> %X to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
+; SSE2-LABEL: load_zext_4i16_to_4i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0,65535,0,0,0]
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_zext_4i16_to_4i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9],zero,zero,zero,zero,zero,zero,xmm1[12,13],zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_zext_4i16_to_4i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: load_zext_4i16_to_4i64:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: load_zext_4i16_to_4i64:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX2-NEXT: retq
+entry:
+ %X = load <4 x i16>, <4 x i16>* %ptr
+ %Y = zext <4 x i16> %X to <4 x i64>
+ ret <4 x i64> %Y
+}
+
define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
; SSE2-LABEL: load_zext_8i16_to_8i32:
; SSE2: # BB#0: # %entry
@@ -244,8 +798,7 @@ define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_zext_8i16_to_8i32:
@@ -254,8 +807,7 @@ define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_zext_8i16_to_8i32:
@@ -278,28 +830,56 @@ define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
entry:
%X = load <8 x i16>, <8 x i16>* %ptr
%Y = zext <8 x i16> %X to <8 x i32>
- ret <8 x i32>%Y
+ ret <8 x i32> %Y
+}
+
+define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
+; SSE2-LABEL: load_zext_2i32_to_2i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: load_zext_2i32_to_2i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: load_zext_2i32_to_2i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: load_zext_2i32_to_2i64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; AVX-NEXT: retq
+entry:
+ %X = load <2 x i32>, <2 x i32>* %ptr
+ %Y = zext <2 x i32> %X to <2 x i64>
+ ret <2 x i64> %Y
}
define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
; SSE2-LABEL: load_zext_4i32_to_4i64:
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movdqa (%rdi), %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
-; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_zext_4i32_to_4i64:
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movdqa (%rdi), %xmm1
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
-; SSSE3-NEXT: pand %xmm2, %xmm0
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
-; SSSE3-NEXT: pand %xmm2, %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_zext_4i32_to_4i64:
@@ -322,7 +902,56 @@ define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
entry:
%X = load <4 x i32>, <4 x i32>* %ptr
%Y = zext <4 x i32> %X to <4 x i64>
- ret <4 x i64>%Y
+ ret <4 x i64> %Y
+}
+
+define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
+; SSE2-LABEL: zext_8i8_to_8i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: zext_8i8_to_8i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: zext_8i8_to_8i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: zext_8i8_to_8i32:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: zext_8i8_to_8i32:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: retq
+entry:
+ %t = zext <8 x i8> %z to <8 x i32>
+ ret <8 x i32> %t
}
define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
@@ -415,17 +1044,14 @@ entry:
define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
; SSE2-LABEL: shuf_zext_8i8_to_8i32:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
-; SSE2-NEXT: packuswb %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
-; SSE2-NEXT: pandn %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT: packuswb %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
@@ -433,25 +1059,27 @@ define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuf_zext_8i8_to_8i32:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
-; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: shuf_zext_8i8_to_8i32:
; AVX1: # BB#0: # %entry
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -465,3 +1093,346 @@ entry:
%Z = bitcast <32 x i8> %B to <8 x i32>
ret <8 x i32> %Z
}
+
+define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: psrlq $48, %xmm0
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: retq
+entry:
+ %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ %Z = bitcast <16 x i8> %B to <2 x i64>
+ ret <2 x i64> %Z
+}
+
+define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: retq
+entry:
+ %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ %Z = bitcast <32 x i8> %B to <4 x i64>
+ ret <4 x i64> %Z
+}
+
+define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX-NEXT: retq
+entry:
+ %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
+ %Z = bitcast <8 x i16> %B to <2 x i64>
+ ret <2 x i64> %Z
+}
+
+define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
+; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: retq
+entry:
+ %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
+ %Z = bitcast <16 x i16> %B to <4 x i64>
+ ret <4 x i64> %Z
+}
+
+define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE-LABEL: shuf_zext_8i16_to_4i32_offset1:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuf_zext_8i16_to_4i32_offset1:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT: retq
+entry:
+ %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
+ %Z = bitcast <8 x i16> %B to <4 x i32>
+ ret <4 x i32> %Z
+}
+
+define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: retq
+entry:
+ %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
+ %Z = bitcast <16 x i16> %B to <8 x i32>
+ ret <8 x i32> %Z
+}
+
+define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SSE41-NEXT: movdqa %xmm2, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: retq
+entry:
+ %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
+ %Z = bitcast <16 x i16> %B to <8 x i32>
+ ret <8 x i32> %Z
+}
+
+define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
+; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT: retq
+entry:
+ %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
+ %Z = bitcast <4 x i32> %B to <2 x i64>
+ ret <2 x i64> %Z
+}
+
+define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
+; SSSE3-NEXT: pand %xmm1, %xmm0
+; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: pxor %xmm0, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[3],zero,zero,zero
+; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2-NEXT: retq
+entry:
+ %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
+ %Z = bitcast <8 x i32> %B to <4 x i64>
+ ret <4 x i64> %Z
+}
diff --git a/test/CodeGen/X86/vector-zmov.ll b/test/CodeGen/X86/vector-zmov.ll
index 298683559054..e378a3244b4e 100644
--- a/test/CodeGen/X86/vector-zmov.ll
+++ b/test/CodeGen/X86/vector-zmov.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
diff --git a/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll b/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
new file mode 100644
index 000000000000..2ff8c3a9028f
--- /dev/null
+++ b/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -o /dev/null -stop-after machine-scheduler %s | FileCheck %s --check-prefix=PRE-RA
+; RUN: llc -mtriple=x86_64-unknown-unknown -o /dev/null -stop-after prologepilog %s | FileCheck %s --check-prefix=POST-RA
+
+; This test verifies that the virtual register references in machine function's
+; liveins are cleared after register allocation.
+
+define i32 @test(i32 %a, i32 %b) {
+body:
+ %c = mul i32 %a, %b
+ ret i32 %c
+}
+
+; PRE-RA: liveins:
+; PRE-RA-NEXT: - { reg: '%edi', virtual-reg: '%0' }
+; PRE-RA-NEXT: - { reg: '%esi', virtual-reg: '%1' }
+
+; POST-RA: liveins:
+; POST-RA-NEXT: - { reg: '%edi' }
+; POST-RA-NEXT: - { reg: '%esi' }
diff --git a/test/CodeGen/X86/vmovq.ll b/test/CodeGen/X86/vmovq.ll
new file mode 100644
index 000000000000..45d350c743e2
--- /dev/null
+++ b/test/CodeGen/X86/vmovq.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX
+
+define <2 x i64> @PR25554(<2 x i64> %v0, <2 x i64> %v1) {
+; SSE-LABEL: PR25554:
+; SSE: # BB#0:
+; SSE-NEXT: movl $1, %eax
+; SSE-NEXT: movd %rax, %xmm1
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
+; SSE-NEXT: paddq %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: PR25554:
+; AVX: # BB#0:
+; AVX-NEXT: movl $1, %eax
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
+; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %c1 = or <2 x i64> %v0, <i64 1, i64 0>
+ %c2 = add <2 x i64> %c1, <i64 0, i64 1>
+ ret <2 x i64> %c2
+}
+
diff --git a/test/CodeGen/X86/vselect-2.ll b/test/CodeGen/X86/vselect-2.ll
index fe4cfba08b8a..8e0f4a4ef447 100644
--- a/test/CodeGen/X86/vselect-2.ll
+++ b/test/CodeGen/X86/vselect-2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
diff --git a/test/CodeGen/X86/vselect-avx.ll b/test/CodeGen/X86/vselect-avx.ll
index de04a097de02..002561042688 100644
--- a/test/CodeGen/X86/vselect-avx.ll
+++ b/test/CodeGen/X86/vselect-avx.ll
@@ -62,13 +62,15 @@ bb:
; CHECK-LABEL: test3:
; Compute the mask.
-; CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[MASK:%xmm[0-9]+]]
+; CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[MASK:%xmm[0-9]+]]
; Do not shrink the bit of the mask.
-; CHECK-NOT: vpslld $31, [[MASK]], {{%xmm[0-9]+}}
+; CHECK-NOT: vpslld $31, [[MASK]], {{%xmm[0-9]+}}
; Use the mask in the blend.
-; CHECK-NEXT: vblendvps [[MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
-; Use the mask in the and.
-; CHECK-NEXT: vpand LCPI2_2(%rip), [[MASK]], {{%xmm[0-9]+}}
+; CHECK-NEXT: vblendvps [[MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; Shuffle mask to truncate.
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK: vpshufb %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; CHECK: vpshufb %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
; CHECK: retq
define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17, <4 x i16> %tmp3, <4 x i16> %tmp12) {
%tmp6 = srem <4 x i32> %induction30, <i32 3, i32 3, i32 3, i32 3>
diff --git a/test/CodeGen/X86/vselect-minmax.ll b/test/CodeGen/X86/vselect-minmax.ll
index 5ed687f50576..edf2a442918a 100644
--- a/test/CodeGen/X86/vselect-minmax.ll
+++ b/test/CodeGen/X86/vselect-minmax.ll
@@ -1,5578 +1,11060 @@
-; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
-; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
-; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
-; RUN: llc -march=x86-64 -mcpu=core-avx2 -mattr=+avx2 < %s | FileCheck %s -check-prefix=AVX2
-; RUN: llc -march=x86-64 -mcpu=knl < %s | FileCheck %s -check-prefix=AVX2 -check-prefix=AVX512F
-; RUN: llc -march=x86-64 -mcpu=skx < %s | FileCheck %s -check-prefix=AVX512BW -check-prefix=AVX512VL -check-prefix=AVX512F
-
-define void @test1(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp slt <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F --check-prefix=AVX512BW --check-prefix=AVX512VL
+
+define <16 x i8> @test1(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test1:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test1:
-; SSE4: pminsb
-
-; AVX1-LABEL: test1:
-; AVX1: vpminsb
-
-; AVX2-LABEL: test1:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test1:
-; AVX512VL: vpminsb
-}
-
-define void @test2(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp sle <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsb %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test1:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp slt <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test2(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test2:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test2:
-; SSE4: pminsb
-
-; AVX1-LABEL: test2:
-; AVX1: vpminsb
-
-; AVX2-LABEL: test2:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test2:
-; AVX512VL: vpminsb
-}
-
-define void @test3(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp sgt <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsb %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test2:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sle <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test3(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test3:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test3:
-; SSE4: pmaxsb
-
-; AVX1-LABEL: test3:
-; AVX1: vpmaxsb
-
-; AVX2-LABEL: test3:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test3:
-; AVX512VL: vpmaxsb
-}
-
-define void @test4(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp sge <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsb %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test3:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sgt <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test4(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test4:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test4:
-; SSE4: pmaxsb
-
-; AVX1-LABEL: test4:
-; AVX1: vpmaxsb
-
-; AVX2-LABEL: test4:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test4:
-; AVX512VL: vpmaxsb
-}
-
-define void @test5(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp ult <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test5:
-; SSE2: pminub
-
-; AVX1-LABEL: test5:
-; AVX1: vpminub
-
-; AVX2-LABEL: test5:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test5:
-; AVX512VL: vpminub
-}
-
-define void @test6(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp ule <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test6:
-; SSE2: pminub
-
-; AVX1-LABEL: test6:
-; AVX1: vpminub
-
-; AVX2-LABEL: test6:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test6:
-; AVX512VL: vpminub
-}
-
-define void @test7(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp ugt <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test7:
-; SSE2: pmaxub
-
-; AVX1-LABEL: test7:
-; AVX1: vpmaxub
-
-; AVX2-LABEL: test7:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test7:
-; AVX512VL: vpmaxub
-}
-
-define void @test8(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp uge <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test8:
-; SSE2: pmaxub
-
-; AVX1-LABEL: test8:
-; AVX1: vpmaxub
-
-; AVX2-LABEL: test8:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test8:
-; AVX512VL: vpmaxub
-}
-
-define void @test9(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp slt <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test9:
-; SSE2: pminsw
-
-; AVX1-LABEL: test9:
-; AVX1: vpminsw
-
-; AVX2-LABEL: test9:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test9:
-; AVX512VL: vpminsw
-}
-
-define void @test10(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp sle <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test10:
-; SSE2: pminsw
-
-; AVX1-LABEL: test10:
-; AVX1: vpminsw
-
-; AVX2-LABEL: test10:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test10:
-; AVX512VL: vpminsw
-}
-
-define void @test11(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp sgt <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test11:
-; SSE2: pmaxsw
-
-; AVX1-LABEL: test11:
-; AVX1: vpmaxsw
-
-; AVX2-LABEL: test11:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test11:
-; AVX512VL: vpmaxsw
-}
-
-define void @test12(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp sge <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test12:
-; SSE2: pmaxsw
-
-; AVX1-LABEL: test12:
-; AVX1: vpmaxsw
-
-; AVX2-LABEL: test12:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test12:
-; AVX512VL: vpmaxsw
-}
-
-define void @test13(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp ult <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsb %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test4:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sge <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test5(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test5:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminub %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test5:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ult <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test6(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test6:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminub %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test6:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ule <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test7(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test7:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxub %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test7:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ugt <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test8(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test8:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxub %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test8:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp uge <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %sel
+}
+
+define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test9:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminsw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test9:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp slt <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test10(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test10:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminsw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test10:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sle <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test11:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxsw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test11:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sgt <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test12:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxsw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test12:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sge <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test13:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtw %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test13:
-; SSE4: pminuw
-
-; AVX1-LABEL: test13:
-; AVX1: vpminuw
-
-; AVX2-LABEL: test13:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test13:
-; AVX512VL: vpminuw
-}
-
-define void @test14(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp ule <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminuw %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test13:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ult <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test14(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test14:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psubusw %xmm1, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpeqw %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test14:
-; SSE4: pminuw
-
-; AVX1-LABEL: test14:
-; AVX1: vpminuw
-
-; AVX2-LABEL: test14:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test14:
-; AVX512VL: vpminuw
-}
-
-define void @test15(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp ugt <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminuw %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test14:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ule <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test15:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtw %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test15:
-; SSE4: pmaxuw
-
-; AVX1-LABEL: test15:
-; AVX1: vpmaxuw
-
-; AVX2-LABEL: test15:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test15:
-; AVX512VL: vpmaxuw
-}
-
-define void @test16(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp uge <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxuw %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test15:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ugt <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psubusw %xmm0, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpeqw %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test16:
-; SSE4: pmaxuw
-
-; AVX1-LABEL: test16:
-; AVX1: vpmaxuw
-
-; AVX2-LABEL: test16:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test16:
-; AVX512VL: vpmaxuw
-}
-
-define void @test17(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp slt <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxuw %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test16:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp uge <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %sel
+}
+
+define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test17:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test17:
-; SSE4: pminsd
-
-; AVX1-LABEL: test17:
-; AVX1: vpminsd
-
-; AVX2-LABEL: test17:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test17:
-; AVX512VL: vpminsd
-}
-
-define void @test18(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp sle <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test17:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp slt <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test18:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test18:
-; SSE4: pminsd
-
-; AVX1-LABEL: test18:
-; AVX1: vpminsd
-
-; AVX2-LABEL: test18:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test18:
-; AVX512VL: vpminsd
-}
-
-define void @test19(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp sgt <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test18:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sle <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test19:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test19:
-; SSE4: pmaxsd
-
-; AVX1-LABEL: test19:
-; AVX1: vpmaxsd
-
-; AVX2-LABEL: test19:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test19:
-; AVX512VL: vpmaxsd
-}
-
-define void @test20(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp sge <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test19:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sgt <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test20(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test20:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test20:
-; SSE4: pmaxsd
-
-; AVX1-LABEL: test20:
-; AVX1: vpmaxsd
-
-; AVX2-LABEL: test20:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test20:
-; AVX512VL: vpmaxsd
-}
-
-define void @test21(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp ult <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test20:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sge <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test21(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test21:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test21:
-; SSE4: pminud
-
-; AVX1-LABEL: test21:
-; AVX1: vpminud
-
-; AVX2-LABEL: test21:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test21:
-; AVX512VL: vpminud
-}
-
-define void @test22(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp ule <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminud %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test21:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ult <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test22(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test22:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pxor %xmm0, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test22:
-; SSE4: pminud
-
-; AVX1-LABEL: test22:
-; AVX1: vpminud
-
-; AVX2-LABEL: test22:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test22:
-; AVX512VL: vpminud
-}
-
-define void @test23(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp ugt <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminud %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test22:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ule <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test23:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test23:
-; SSE4: pmaxud
-
-; AVX1-LABEL: test23:
-; AVX1: vpmaxud
-
-; AVX2-LABEL: test23:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test23:
-; AVX512VL: vpmaxud
-}
-
-define void @test24(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp uge <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxud %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test23:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ugt <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test24(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test24:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test24:
-; SSE4: pmaxud
-
-; AVX1-LABEL: test24:
-; AVX1: vpmaxud
-
-; AVX2-LABEL: test24:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test24:
-; AVX512VL: vpmaxud
-}
-
-define void @test25(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp slt <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxud %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test24:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp uge <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %sel
+}
+
+define <32 x i8> @test25(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test25:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test25:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsb %xmm2, %xmm0
+; SSE4-NEXT: pminsb %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test25:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test25:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test25:
-; AVX512VL: vpminsb
-}
-
-define void @test26(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp sle <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test25:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp slt <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test26(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test26:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pcmpgtb %xmm3, %xmm6
+; SSE2-NEXT: pcmpeqd %xmm7, %xmm7
+; SSE2-NEXT: movdqa %xmm6, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm2, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm7
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm7
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm6
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test26:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsb %xmm2, %xmm0
+; SSE4-NEXT: pminsb %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test26:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test26:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test26:
-; AVX512VL: vpminsb
-}
-
-define void @test27(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp sgt <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test26:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sle <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test27(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test27:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pcmpgtb %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm0, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test27:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsb %xmm2, %xmm0
+; SSE4-NEXT: pmaxsb %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test27:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test27:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test27:
-; AVX512VL: vpmaxsb
-}
-
-define void @test28(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp sge <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test27:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sgt <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test28(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test28:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm6
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm2, %xmm7
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm7
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm7
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm7, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm6
+; SSE2-NEXT: pandn %xmm3, %xmm5
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test28:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsb %xmm2, %xmm0
+; SSE4-NEXT: pmaxsb %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test28:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test28:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test28:
-; AVX512VL: vpmaxsb
-}
-
-define void @test29(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp ult <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test28:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sge <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test29(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test29:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminub %xmm2, %xmm0
+; SSE-NEXT: pminub %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test29:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test29:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test29:
-; AVX512VL: vpminub
-}
-
-define void @test30(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp ule <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test29:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ult <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test30(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test30:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminub %xmm2, %xmm0
+; SSE-NEXT: pminub %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test30:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test30:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test30:
-; AVX512VL: vpminub
-}
-
-define void @test31(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp ugt <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test30:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ule <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test31(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test31:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxub %xmm2, %xmm0
+; SSE-NEXT: pmaxub %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test31:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test31:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test31:
-; AVX512VL: vpmaxub
-}
-
-define void @test32(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp uge <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test31:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ugt <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test32(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test32:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxub %xmm2, %xmm0
+; SSE-NEXT: pmaxub %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test32:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test32:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test32:
-; AVX512VL: vpmaxub
-}
-
-define void @test33(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp slt <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test32:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp uge <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+ ret <32 x i8> %sel
+}
+
+define <16 x i16> @test33(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test33:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminsw %xmm2, %xmm0
+; SSE-NEXT: pminsw %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test33:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test33:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test33:
-; AVX512VL: vpminsw
-}
-
-define void @test34(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp sle <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test33:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp slt <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test34(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test34:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminsw %xmm2, %xmm0
+; SSE-NEXT: pminsw %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test34:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test34:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test34:
-; AVX512VL: vpminsw
-}
-
-define void @test35(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp sgt <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test34:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sle <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test35(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test35:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxsw %xmm2, %xmm0
+; SSE-NEXT: pmaxsw %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test35:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test35:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test35:
-; AVX512VL: vpmaxsw
-}
-
-define void @test36(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp sge <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test35:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sgt <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test36(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test36:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxsw %xmm2, %xmm0
+; SSE-NEXT: pmaxsw %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test36:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test36:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test36:
-; AVX512VL: vpmaxsw
-}
-
-define void @test37(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp ult <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test36:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sge <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test37(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test37:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: pcmpgtw %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtw %xmm5, %xmm4
+; SSE2-NEXT: pand %xmm4, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test37:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminuw %xmm2, %xmm0
+; SSE4-NEXT: pminuw %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test37:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test37:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test37:
-; AVX512VL: vpminuw
-}
-
-define void @test38(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp ule <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test37:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ult <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test38(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test38:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: psubusw %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm6, %xmm6
+; SSE2-NEXT: pcmpeqw %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: psubusw %xmm2, %xmm5
+; SSE2-NEXT: pcmpeqw %xmm6, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm0, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test38:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminuw %xmm2, %xmm0
+; SSE4-NEXT: pminuw %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test38:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test38:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test38:
-; AVX512VL: vpminuw
-}
-
-define void @test39(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp ugt <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test38:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ule <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test39(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test39:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pcmpgtw %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtw %xmm6, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test39:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxuw %xmm2, %xmm0
+; SSE4-NEXT: pmaxuw %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test39:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test39:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test39:
-; AVX512VL: vpmaxuw
-}
-
-define void @test40(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp uge <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test39:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ugt <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test40(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test40:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: psubusw %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpeqw %xmm5, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: psubusw %xmm0, %xmm6
+; SSE2-NEXT: pcmpeqw %xmm5, %xmm6
+; SSE2-NEXT: pand %xmm6, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test40:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxuw %xmm2, %xmm0
+; SSE4-NEXT: pmaxuw %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test40:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test40:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test40:
-; AVX512VL: vpmaxuw
-}
-
-define void @test41(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp slt <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test40:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp uge <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+ ret <16 x i16> %sel
+}
+
+define <8 x i32> @test41(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test41:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test41:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsd %xmm2, %xmm0
+; SSE4-NEXT: pminsd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test41:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test41:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test41:
-; AVX512VL: vpminsd
-}
-
-define void @test42(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp sle <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test41:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp slt <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test42(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test42:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
+; SSE2-NEXT: pcmpeqd %xmm7, %xmm7
+; SSE2-NEXT: movdqa %xmm6, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm7
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm7
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm6
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test42:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsd %xmm2, %xmm0
+; SSE4-NEXT: pminsd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test42:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test42:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test42:
-; AVX512VL: vpminsd
-}
-
-define void @test43(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp sgt <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test42:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sle <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test43(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test43:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm0, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test43:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsd %xmm2, %xmm0
+; SSE4-NEXT: pmaxsd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test43:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test43:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test43:
-; AVX512VL: vpmaxsd
-}
-
-define void @test44(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp sge <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test43:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sgt <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test44(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test44:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm6
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm2, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm7
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm7
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm7, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm6
+; SSE2-NEXT: pandn %xmm3, %xmm5
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test44:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsd %xmm2, %xmm0
+; SSE4-NEXT: pmaxsd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test44:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test44:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test44:
-; AVX512VL: vpmaxsd
-}
-
-define void @test45(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp ult <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test44:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sge <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test45(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test45:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm4
+; SSE2-NEXT: pand %xmm4, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test45:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminud %xmm2, %xmm0
+; SSE4-NEXT: pminud %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test45:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test45:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test45:
-; AVX512VL: vpminud
-}
-
-define void @test46(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp ule <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test45:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ult <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test46(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test46:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm1, %xmm7
+; SSE2-NEXT: pxor %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm2, %xmm8
+; SSE2-NEXT: pxor %xmm6, %xmm8
+; SSE2-NEXT: pxor %xmm0, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm6
+; SSE2-NEXT: pxor %xmm6, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm7
+; SSE2-NEXT: pandn %xmm3, %xmm5
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test46:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminud %xmm2, %xmm0
+; SSE4-NEXT: pminud %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test46:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test46:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test46:
-; AVX512VL: vpminud
-}
-
-define void @test47(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp ugt <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test46:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ule <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test47(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test47:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm4, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm4
+; SSE2-NEXT: por %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test47:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxud %xmm2, %xmm0
+; SSE4-NEXT: pmaxud %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test47:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test47:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test47:
-; AVX512VL: vpmaxud
-}
-
-define void @test48(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp uge <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test47:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ugt <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test48(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test48:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm3, %xmm7
+; SSE2-NEXT: pxor %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm0, %xmm8
+; SSE2-NEXT: pxor %xmm6, %xmm8
+; SSE2-NEXT: pxor %xmm2, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm6
+; SSE2-NEXT: pxor %xmm6, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm7
+; SSE2-NEXT: pandn %xmm3, %xmm5
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test48:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxud %xmm2, %xmm0
+; SSE4-NEXT: pmaxud %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test48:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test48:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test48:
-; AVX512VL: vpmaxud
-}
-
-define void @test49(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp slt <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test48:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp uge <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %sel
+}
+
+define <16 x i8> @test49(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test49:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test49:
-; SSE4: pmaxsb
-
-; AVX1-LABEL: test49:
-; AVX1: vpmaxsb
-
-; AVX2-LABEL: test49:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test49:
-; AVX512VL: vpmaxsb
-}
-
-define void @test50(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp sle <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsb %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test49:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp slt <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test50(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test50:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test50:
-; SSE4: pmaxsb
-
-; AVX1-LABEL: test50:
-; AVX1: vpmaxsb
-
-; AVX2-LABEL: test50:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test50:
-; AVX512VL: vpmaxsb
-}
-
-define void @test51(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp sgt <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsb %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test50:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sle <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test51(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test51:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test51:
-; SSE4: pminsb
-
-; AVX1-LABEL: test51:
-; AVX1: vpminsb
-
-; AVX2-LABEL: test51:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test51:
-; AVX512VL: vpminsb
-}
-
-define void @test52(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp sge <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsb %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test51:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sgt <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test52(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test52:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test52:
-; SSE4: pminsb
-
-; AVX1-LABEL: test52:
-; AVX1: vpminsb
-
-; AVX2-LABEL: test52:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test52:
-; AVX512VL: vpminsb
-}
-
-define void @test53(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp ult <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test53:
-; SSE2: pmaxub
-
-; AVX1-LABEL: test53:
-; AVX1: vpmaxub
-
-; AVX2-LABEL: test53:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test53:
-; AVX512VL: vpmaxub
-}
-
-define void @test54(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp ule <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test54:
-; SSE2: pmaxub
-
-; AVX1-LABEL: test54:
-; AVX1: vpmaxub
-
-; AVX2-LABEL: test54:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test54:
-; AVX512VL: vpmaxub
-}
-
-define void @test55(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp ugt <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test55:
-; SSE2: pminub
-
-; AVX1-LABEL: test55:
-; AVX1: vpminub
-
-; AVX2-LABEL: test55:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test55:
-; AVX512VL: vpminub
-}
-
-define void @test56(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <16 x i8>*
- %ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
- %cmp = icmp uge <16 x i8> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
- store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test56:
-; SSE2: pminub
-
-; AVX1-LABEL: test56:
-; AVX1: vpminub
-
-; AVX2-LABEL: test56:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test56:
-; AVX512VL: vpminub
-}
-
-define void @test57(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp slt <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test57:
-; SSE2: pmaxsw
-
-; AVX1-LABEL: test57:
-; AVX1: vpmaxsw
-
-; AVX2-LABEL: test57:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test57:
-; AVX512VL: vpmaxsw
-}
-
-define void @test58(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp sle <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test58:
-; SSE2: pmaxsw
-
-; AVX1-LABEL: test58:
-; AVX1: vpmaxsw
-
-; AVX2-LABEL: test58:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test58:
-; AVX512VL: vpmaxsw
-}
-
-define void @test59(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp sgt <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test59:
-; SSE2: pminsw
-
-; AVX1-LABEL: test59:
-; AVX1: vpminsw
-
-; AVX2-LABEL: test59:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test59:
-; AVX512VL: vpminsw
-}
-
-define void @test60(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp sge <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; SSE2-LABEL: test60:
-; SSE2: pminsw
-
-; AVX1-LABEL: test60:
-; AVX1: vpminsw
-
-; AVX2-LABEL: test60:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test60:
-; AVX512VL: vpminsw
-}
-
-define void @test61(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp ult <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsb %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test52:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sge <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test53(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test53:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxub %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test53:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ult <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test54(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test54:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxub %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test54:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ule <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test55(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test55:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminub %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test55:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ugt <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+ ret <16 x i8> %sel
+}
+
+define <16 x i8> @test56(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test56:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminub %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test56:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp uge <16 x i8> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+ ret <16 x i8> %sel
+}
+
+define <8 x i16> @test57(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test57:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxsw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test57:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp slt <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test58(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test58:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxsw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test58:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sle <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test59(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test59:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminsw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test59:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sgt <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test60(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test60:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminsw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test60:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sge <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test61(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test61:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtw %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test61:
-; SSE4: pmaxuw
-
-; AVX1-LABEL: test61:
-; AVX1: vpmaxuw
-
-; AVX2-LABEL: test61:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test61:
-; AVX512VL: vpmaxuw
-}
-
-define void @test62(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp ule <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxuw %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test61:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ult <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test62(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test62:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: psubusw %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqw %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test62:
-; SSE4: pmaxuw
-
-; AVX1-LABEL: test62:
-; AVX1: vpmaxuw
-
-; AVX2-LABEL: test62:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test62:
-; AVX512VL: vpmaxuw
-}
-
-define void @test63(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp ugt <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxuw %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test62:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ule <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test63(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test63:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtw %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test63:
-; SSE4: pminuw
-
-; AVX1-LABEL: test63:
-; AVX1: vpminuw
-
-; AVX2-LABEL: test63:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test63:
-; AVX512VL: vpminuw
-}
-
-define void @test64(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <8 x i16>*
- %ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
- %cmp = icmp uge <8 x i16> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
- store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminuw %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test63:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ugt <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+ ret <8 x i16> %sel
+}
+
+define <8 x i16> @test64(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: psubusw %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqw %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test64:
-; SSE4: pminuw
-
-; AVX1-LABEL: test64:
-; AVX1: vpminuw
-
-; AVX2-LABEL: test64:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test64:
-; AVX512VL: vpminuw
-}
-
-define void @test65(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp slt <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminuw %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp uge <8 x i16> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+ ret <8 x i16> %sel
+}
+
+define <4 x i32> @test65(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test65:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test65:
-; SSE4: pmaxsd
-
-; AVX1-LABEL: test65:
-; AVX1: vpmaxsd
-
-; AVX2-LABEL: test65:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test65:
-; AVX512VL: vpmaxsd
-}
-
-define void @test66(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp sle <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test65:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp slt <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test66(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test66:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test66:
-; SSE4: pmaxsd
-
-; AVX1-LABEL: test66:
-; AVX1: vpmaxsd
-
-; AVX2-LABEL: test66:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test66:
-; AVX512VL: vpmaxsd
-}
-
-define void @test67(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp sgt <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test66:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sle <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test67(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test67:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test67:
-; SSE4: pminsd
-
-; AVX1-LABEL: test67:
-; AVX1: vpminsd
-
-; AVX2-LABEL: test67:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test67:
-; AVX512VL: vpminsd
-}
-
-define void @test68(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp sge <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test67:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sgt <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test68(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test68:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test68:
-; SSE4: pminsd
-
-; AVX1-LABEL: test68:
-; AVX1: vpminsd
-
-; AVX2-LABEL: test68:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test68:
-; AVX512VL: vpminsd
-}
-
-define void @test69(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp ult <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test68:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp sge <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test69(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test69:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test69:
-; SSE4: pmaxud
-
-; AVX1-LABEL: test69:
-; AVX1: vpmaxud
-
-; AVX2-LABEL: test69:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test69:
-; AVX512VL: vpmaxud
-}
-
-define void @test70(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp ule <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxud %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test69:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ult <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test70(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test70:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pxor %xmm0, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test70:
-; SSE4: pmaxud
-
-; AVX1-LABEL: test70:
-; AVX1: vpmaxud
-
-; AVX2-LABEL: test70:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test70:
-; AVX512VL: vpmaxud
-}
-
-define void @test71(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp ugt <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxud %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test70:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ule <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test71(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test71:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test71:
-; SSE4: pminud
-
-; AVX1-LABEL: test71:
-; AVX1: vpminud
-
-; AVX2-LABEL: test71:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test71:
-; AVX512VL: vpminud
-}
-
-define void @test72(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i32>*
- %ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
- %cmp = icmp uge <4 x i32> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
- store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 4
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminud %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test71:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp ugt <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+ ret <4 x i32> %sel
+}
+
+define <4 x i32> @test72(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test72:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pxor %xmm1, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
; SSE4-LABEL: test72:
-; SSE4: pminud
-
-; AVX1-LABEL: test72:
-; AVX1: vpminud
-
-; AVX2-LABEL: test72:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test72:
-; AVX512VL: vpminud
-}
-
-define void @test73(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp slt <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminud %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX-LABEL: test72:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %cmp = icmp uge <4 x i32> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+ ret <4 x i32> %sel
+}
+
+define <32 x i8> @test73(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test73:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: por %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test73:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsb %xmm2, %xmm0
+; SSE4-NEXT: pmaxsb %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test73:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test73:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test73:
-; AVX512VL: vpmaxsb
-}
-
-define void @test74(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp sle <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test73:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp slt <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test74(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test74:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pcmpgtb %xmm3, %xmm6
+; SSE2-NEXT: pcmpeqd %xmm7, %xmm7
+; SSE2-NEXT: movdqa %xmm6, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm2, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm7
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: pandn %xmm0, %xmm7
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test74:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsb %xmm2, %xmm0
+; SSE4-NEXT: pmaxsb %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test74:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test74:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test74:
-; AVX512VL: vpmaxsb
-}
-
-define void @test75(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp sgt <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test74:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sle <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test75(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test75:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pcmpgtb %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtb %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: por %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test75:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsb %xmm2, %xmm0
+; SSE4-NEXT: pminsb %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test75:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test75:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test75:
-; AVX512VL: vpminsb
-}
-
-define void @test76(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp sge <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test75:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sgt <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test76(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test76:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm6
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm2, %xmm7
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm7
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: pandn %xmm2, %xmm7
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: por %xmm7, %xmm4
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: pandn %xmm1, %xmm5
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test76:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsb %xmm2, %xmm0
+; SSE4-NEXT: pminsb %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test76:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test76:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test76:
-; AVX512VL: vpminsb
-}
-
-define void @test77(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp ult <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test76:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sge <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test77(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test77:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxub %xmm2, %xmm0
+; SSE-NEXT: pmaxub %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test77:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test77:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test77:
-; AVX512VL: vpmaxub
-}
-
-define void @test78(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp ule <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test77:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ult <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test78(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test78:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxub %xmm2, %xmm0
+; SSE-NEXT: pmaxub %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test78:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test78:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test78:
-; AVX512VL: vpmaxub
-}
-
-define void @test79(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp ugt <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test78:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ule <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test79(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test79:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminub %xmm2, %xmm0
+; SSE-NEXT: pminub %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test79:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test79:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test79:
-; AVX512VL: vpminub
-}
-
-define void @test80(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <32 x i8>*
- %ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
- %cmp = icmp uge <32 x i8> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
- store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test79:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ugt <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+ ret <32 x i8> %sel
+}
+
+define <32 x i8> @test80(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test80:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminub %xmm2, %xmm0
+; SSE-NEXT: pminub %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test80:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test80:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test80:
-; AVX512VL: vpminub
-}
-
-define void @test81(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp slt <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test80:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp uge <32 x i8> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+ ret <32 x i8> %sel
+}
+
+define <16 x i16> @test81(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test81:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxsw %xmm2, %xmm0
+; SSE-NEXT: pmaxsw %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test81:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test81:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test81:
-; AVX512VL: vpmaxsw
-}
-
-define void @test82(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp sle <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test81:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp slt <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test82(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test82:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxsw %xmm2, %xmm0
+; SSE-NEXT: pmaxsw %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test82:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test82:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test82:
-; AVX512VL: vpmaxsw
-}
-
-define void @test83(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp sgt <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test82:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sle <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test83(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test83:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminsw %xmm2, %xmm0
+; SSE-NEXT: pminsw %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test83:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test83:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test83:
-; AVX512VL: vpminsw
-}
-
-define void @test84(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp sge <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test83:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sgt <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test84(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test84:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminsw %xmm2, %xmm0
+; SSE-NEXT: pminsw %xmm3, %xmm1
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test84:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test84:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test84:
-; AVX512VL: vpminsw
-}
-
-define void @test85(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp ult <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test84:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sge <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test85(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test85:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pcmpgtw %xmm6, %xmm5
+; SSE2-NEXT: movdqa %xmm0, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: pxor %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtw %xmm6, %xmm4
+; SSE2-NEXT: pand %xmm4, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: por %xmm2, %xmm4
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm5
+; SSE2-NEXT: por %xmm3, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test85:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxuw %xmm2, %xmm0
+; SSE4-NEXT: pmaxuw %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test85:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test85:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test85:
-; AVX512VL: vpmaxuw
-}
-
-define void @test86(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp ule <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test85:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ult <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test86(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test86:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: psubusw %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm6, %xmm6
+; SSE2-NEXT: pcmpeqw %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: psubusw %xmm2, %xmm5
+; SSE2-NEXT: pcmpeqw %xmm6, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: por %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test86:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxuw %xmm2, %xmm0
+; SSE4-NEXT: pmaxuw %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test86:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test86:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test86:
-; AVX512VL: vpmaxuw
-}
-
-define void @test87(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp ugt <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test86:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ule <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test87(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test87:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pcmpgtw %xmm6, %xmm5
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm4
+; SSE2-NEXT: pcmpgtw %xmm6, %xmm4
+; SSE2-NEXT: pand %xmm4, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: por %xmm2, %xmm4
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm5
+; SSE2-NEXT: por %xmm3, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test87:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminuw %xmm2, %xmm0
+; SSE4-NEXT: pminuw %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test87:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test87:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test87:
-; AVX512VL: vpminuw
-}
-
-define void @test88(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <16 x i16>*
- %ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
- %cmp = icmp uge <16 x i16> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
- store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test87:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ugt <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+ ret <16 x i16> %sel
+}
+
+define <16 x i16> @test88(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test88:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: psubusw %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm6, %xmm6
+; SSE2-NEXT: pcmpeqw %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: psubusw %xmm0, %xmm5
+; SSE2-NEXT: pcmpeqw %xmm6, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: por %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test88:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminuw %xmm2, %xmm0
+; SSE4-NEXT: pminuw %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test88:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test88:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test88:
-; AVX512VL: vpminuw
-}
-
-define void @test89(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp slt <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test88:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp uge <16 x i16> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+ ret <16 x i16> %sel
+}
+
+define <8 x i32> @test89(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test89:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: por %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test89:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsd %xmm2, %xmm0
+; SSE4-NEXT: pmaxsd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test89:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test89:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test89:
-; AVX512VL: vpmaxsd
-}
-
-define void @test90(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp sle <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test89:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp slt <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test90(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test90:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
+; SSE2-NEXT: pcmpeqd %xmm7, %xmm7
+; SSE2-NEXT: movdqa %xmm6, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm7
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: pandn %xmm0, %xmm7
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test90:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsd %xmm2, %xmm0
+; SSE4-NEXT: pmaxsd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test90:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test90:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test90:
-; AVX512VL: vpmaxsd
-}
-
-define void @test91(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp sgt <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test90:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sle <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test91(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test91:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: por %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test91:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsd %xmm2, %xmm0
+; SSE4-NEXT: pminsd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test91:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test91:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test91:
-; AVX512VL: vpminsd
-}
-
-define void @test92(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp sge <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test91:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sgt <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test92(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test92:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm6
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm2, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm7
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: pandn %xmm2, %xmm7
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: por %xmm7, %xmm4
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: pandn %xmm1, %xmm5
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test92:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsd %xmm2, %xmm0
+; SSE4-NEXT: pminsd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test92:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test92:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test92:
-; AVX512VL: vpminsd
-}
-
-define void @test93(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp ult <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test92:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sge <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test93(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test93:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: movdqa %xmm0, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: pxor %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm4
+; SSE2-NEXT: pand %xmm4, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: por %xmm2, %xmm4
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm5
+; SSE2-NEXT: por %xmm3, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test93:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxud %xmm2, %xmm0
+; SSE4-NEXT: pmaxud %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test93:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test93:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test93:
-; AVX512VL: vpmaxud
-}
-
-define void @test94(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp ule <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test93:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ult <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test94(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test94:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm1, %xmm7
+; SSE2-NEXT: pxor %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm2, %xmm8
+; SSE2-NEXT: pxor %xmm6, %xmm8
+; SSE2-NEXT: pxor %xmm0, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm6
+; SSE2-NEXT: pxor %xmm6, %xmm4
+; SSE2-NEXT: pandn %xmm2, %xmm6
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: pandn %xmm3, %xmm7
+; SSE2-NEXT: pandn %xmm1, %xmm5
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test94:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxud %xmm2, %xmm0
+; SSE4-NEXT: pmaxud %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test94:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test94:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test94:
-; AVX512VL: vpmaxud
-}
-
-define void @test95(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp ugt <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test94:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ule <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test95(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test95:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm4
+; SSE2-NEXT: pand %xmm4, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: por %xmm2, %xmm4
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm5
+; SSE2-NEXT: por %xmm3, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test95:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminud %xmm2, %xmm0
+; SSE4-NEXT: pminud %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test95:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test95:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test95:
-; AVX512VL: vpminud
-}
-
-define void @test96(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i32>*
- %ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
- %cmp = icmp uge <8 x i32> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
- store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test95:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ugt <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+ ret <8 x i32> %sel
+}
+
+define <8 x i32> @test96(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test96:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm3, %xmm7
+; SSE2-NEXT: pxor %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm0, %xmm8
+; SSE2-NEXT: pxor %xmm6, %xmm8
+; SSE2-NEXT: pxor %xmm2, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm6
+; SSE2-NEXT: pxor %xmm6, %xmm4
+; SSE2-NEXT: pandn %xmm2, %xmm6
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: pandn %xmm3, %xmm7
+; SSE2-NEXT: pandn %xmm1, %xmm5
+; SSE2-NEXT: por %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test96:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminud %xmm2, %xmm0
+; SSE4-NEXT: pminud %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test96:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: test96:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test96:
-; AVX512VL: vpminud
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test96:
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp uge <8 x i32> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+ ret <8 x i32> %sel
}
; ----------------------------
-define void @test97(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp slt <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+define <64 x i8> @test97(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test97:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pcmpgtb %xmm3, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm9
+; SSE2-NEXT: pcmpgtb %xmm2, %xmm9
+; SSE2-NEXT: movdqa %xmm5, %xmm10
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm10
+; SSE2-NEXT: movdqa %xmm4, %xmm11
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm11
+; SSE2-NEXT: por %xmm11, %xmm0
+; SSE2-NEXT: pand %xmm10, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm10
+; SSE2-NEXT: por %xmm10, %xmm1
+; SSE2-NEXT: pand %xmm9, %xmm2
+; SSE2-NEXT: pandn %xmm6, %xmm9
+; SSE2-NEXT: por %xmm9, %xmm2
+; SSE2-NEXT: pand %xmm8, %xmm3
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test97:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsb %xmm4, %xmm0
+; SSE4-NEXT: pminsb %xmm5, %xmm1
+; SSE4-NEXT: pminsb %xmm6, %xmm2
+; SSE4-NEXT: pminsb %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test97:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test97:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test97:
-; AVX512BW: vpminsb {{.*}}
-}
-
-define void @test98(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp sle <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp slt <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test98(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test98:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: movdqa %xmm8, %xmm12
+; SSE2-NEXT: pcmpgtb %xmm7, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm13, %xmm13
+; SSE2-NEXT: movdqa %xmm12, %xmm3
+; SSE2-NEXT: pxor %xmm13, %xmm3
+; SSE2-NEXT: movdqa %xmm9, %xmm14
+; SSE2-NEXT: pcmpgtb %xmm6, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm2
+; SSE2-NEXT: pxor %xmm13, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm15
+; SSE2-NEXT: pcmpgtb %xmm5, %xmm15
+; SSE2-NEXT: movdqa %xmm15, %xmm10
+; SSE2-NEXT: pxor %xmm13, %xmm10
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtb %xmm4, %xmm11
+; SSE2-NEXT: pxor %xmm11, %xmm13
+; SSE2-NEXT: pandn %xmm0, %xmm11
+; SSE2-NEXT: pandn %xmm4, %xmm13
+; SSE2-NEXT: por %xmm13, %xmm11
+; SSE2-NEXT: pandn %xmm1, %xmm15
+; SSE2-NEXT: pandn %xmm5, %xmm10
+; SSE2-NEXT: por %xmm15, %xmm10
+; SSE2-NEXT: pandn %xmm9, %xmm14
+; SSE2-NEXT: pandn %xmm6, %xmm2
+; SSE2-NEXT: por %xmm14, %xmm2
+; SSE2-NEXT: pandn %xmm8, %xmm12
+; SSE2-NEXT: pandn %xmm7, %xmm3
+; SSE2-NEXT: por %xmm12, %xmm3
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test98:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsb %xmm4, %xmm0
+; SSE4-NEXT: pminsb %xmm5, %xmm1
+; SSE4-NEXT: pminsb %xmm6, %xmm2
+; SSE4-NEXT: pminsb %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test98:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test98:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test98:
-; AVX512BW: vpminsb {{.*}}
-}
-
-define void @test99(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp sgt <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sle <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test99(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test99:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: pcmpgtb %xmm7, %xmm3
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: pcmpgtb %xmm6, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm10
+; SSE2-NEXT: pcmpgtb %xmm5, %xmm10
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtb %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm11
+; SSE2-NEXT: por %xmm0, %xmm11
+; SSE2-NEXT: pand %xmm10, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm10
+; SSE2-NEXT: por %xmm1, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm2
+; SSE2-NEXT: pandn %xmm6, %xmm9
+; SSE2-NEXT: por %xmm2, %xmm9
+; SSE2-NEXT: pand %xmm3, %xmm8
+; SSE2-NEXT: pandn %xmm7, %xmm3
+; SSE2-NEXT: por %xmm8, %xmm3
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test99:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsb %xmm4, %xmm0
+; SSE4-NEXT: pmaxsb %xmm5, %xmm1
+; SSE4-NEXT: pmaxsb %xmm6, %xmm2
+; SSE4-NEXT: pmaxsb %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test99:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test99:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test99:
-; AVX512BW: vpmaxsb {{.*}}
-}
-
-define void @test100(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp sge <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sgt <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test100(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test100:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: movdqa %xmm0, %xmm10
+; SSE2-NEXT: movdqa %xmm7, %xmm12
+; SSE2-NEXT: pcmpgtb %xmm8, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: movdqa %xmm12, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm3
+; SSE2-NEXT: movdqa %xmm6, %xmm13
+; SSE2-NEXT: pcmpgtb %xmm9, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm2
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm5, %xmm14
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm11
+; SSE2-NEXT: pxor %xmm0, %xmm11
+; SSE2-NEXT: movdqa %xmm4, %xmm15
+; SSE2-NEXT: pcmpgtb %xmm10, %xmm15
+; SSE2-NEXT: pxor %xmm15, %xmm0
+; SSE2-NEXT: pandn %xmm10, %xmm15
+; SSE2-NEXT: pandn %xmm4, %xmm0
+; SSE2-NEXT: por %xmm15, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm14
+; SSE2-NEXT: pandn %xmm5, %xmm11
+; SSE2-NEXT: por %xmm14, %xmm11
+; SSE2-NEXT: pandn %xmm9, %xmm13
+; SSE2-NEXT: pandn %xmm6, %xmm2
+; SSE2-NEXT: por %xmm13, %xmm2
+; SSE2-NEXT: pandn %xmm8, %xmm12
+; SSE2-NEXT: pandn %xmm7, %xmm3
+; SSE2-NEXT: por %xmm12, %xmm3
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test100:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsb %xmm4, %xmm0
+; SSE4-NEXT: pmaxsb %xmm5, %xmm1
+; SSE4-NEXT: pmaxsb %xmm6, %xmm2
+; SSE4-NEXT: pmaxsb %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test100:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test100:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test100:
-; AVX512BW: vpmaxsb {{.*}}
-}
-
-define void @test101(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp ult <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sge <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test101(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test101:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminub %xmm4, %xmm0
+; SSE-NEXT: pminub %xmm5, %xmm1
+; SSE-NEXT: pminub %xmm6, %xmm2
+; SSE-NEXT: pminub %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test101:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test101:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test101:
-; AVX512BW: vpminub {{.*}}
-}
-
-define void @test102(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp ule <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ult <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test102(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test102:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminub %xmm4, %xmm0
+; SSE-NEXT: pminub %xmm5, %xmm1
+; SSE-NEXT: pminub %xmm6, %xmm2
+; SSE-NEXT: pminub %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test102:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test102:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test102:
-; AVX512BW: vpminub {{.*}}
-}
-
-define void @test103(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp ugt <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ule <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test103(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test103:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxub %xmm4, %xmm0
+; SSE-NEXT: pmaxub %xmm5, %xmm1
+; SSE-NEXT: pmaxub %xmm6, %xmm2
+; SSE-NEXT: pmaxub %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test103:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test103:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test103:
-; AVX512BW: vpmaxub {{.*}}
-}
-
-define void @test104(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp uge <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ugt <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test104(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test104:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxub %xmm4, %xmm0
+; SSE-NEXT: pmaxub %xmm5, %xmm1
+; SSE-NEXT: pmaxub %xmm6, %xmm2
+; SSE-NEXT: pmaxub %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test104:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test104:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test104:
-; AVX512BW: vpmaxub {{.*}}
-}
-
-define void @test105(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp slt <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp uge <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+ ret <64 x i8> %sel
+}
+
+define <32 x i16> @test105(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test105:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminsw %xmm4, %xmm0
+; SSE-NEXT: pminsw %xmm5, %xmm1
+; SSE-NEXT: pminsw %xmm6, %xmm2
+; SSE-NEXT: pminsw %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test105:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test105:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test105:
-; AVX512BW: vpminsw {{.*}}
-}
-
-define void @test106(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp sle <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp slt <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test106(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test106:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminsw %xmm4, %xmm0
+; SSE-NEXT: pminsw %xmm5, %xmm1
+; SSE-NEXT: pminsw %xmm6, %xmm2
+; SSE-NEXT: pminsw %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test106:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test106:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test106:
-; AVX512BW: vpminsw {{.*}}
-}
-
-define void @test107(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp sgt <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sle <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test107(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test107:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxsw %xmm4, %xmm0
+; SSE-NEXT: pmaxsw %xmm5, %xmm1
+; SSE-NEXT: pmaxsw %xmm6, %xmm2
+; SSE-NEXT: pmaxsw %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test107:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test107:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test107:
-; AVX512BW: vpmaxsw {{.*}}
-}
-
-define void @test108(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp sge <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sgt <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test108(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test108:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxsw %xmm4, %xmm0
+; SSE-NEXT: pmaxsw %xmm5, %xmm1
+; SSE-NEXT: pmaxsw %xmm6, %xmm2
+; SSE-NEXT: pmaxsw %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test108:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test108:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test108:
-; AVX512BW: vpmaxsw {{.*}}
-}
-
-define void @test109(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp ult <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sge <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test109(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test109:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm3, %xmm9
+; SSE2-NEXT: pxor %xmm10, %xmm9
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pxor %xmm10, %xmm8
+; SSE2-NEXT: pcmpgtw %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm6, %xmm9
+; SSE2-NEXT: pxor %xmm10, %xmm9
+; SSE2-NEXT: pcmpgtw %xmm11, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm5, %xmm12
+; SSE2-NEXT: pxor %xmm10, %xmm12
+; SSE2-NEXT: pcmpgtw %xmm11, %xmm12
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: pxor %xmm4, %xmm10
+; SSE2-NEXT: pcmpgtw %xmm11, %xmm10
+; SSE2-NEXT: pand %xmm10, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm10
+; SSE2-NEXT: por %xmm10, %xmm0
+; SSE2-NEXT: pand %xmm12, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm12
+; SSE2-NEXT: por %xmm12, %xmm1
+; SSE2-NEXT: pand %xmm9, %xmm2
+; SSE2-NEXT: pandn %xmm6, %xmm9
+; SSE2-NEXT: por %xmm9, %xmm2
+; SSE2-NEXT: pand %xmm8, %xmm3
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test109:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminuw %xmm4, %xmm0
+; SSE4-NEXT: pminuw %xmm5, %xmm1
+; SSE4-NEXT: pminuw %xmm6, %xmm2
+; SSE4-NEXT: pminuw %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test109:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test109:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test109:
-; AVX512BW: vpminuw {{.*}}
-}
-
-define void @test110(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp ule <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ult <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test110(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test110:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm10
+; SSE2-NEXT: psubusw %xmm7, %xmm3
+; SSE2-NEXT: pxor %xmm12, %xmm12
+; SSE2-NEXT: pcmpeqw %xmm12, %xmm3
+; SSE2-NEXT: psubusw %xmm6, %xmm2
+; SSE2-NEXT: pcmpeqw %xmm12, %xmm2
+; SSE2-NEXT: psubusw %xmm5, %xmm1
+; SSE2-NEXT: pcmpeqw %xmm12, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: psubusw %xmm4, %xmm11
+; SSE2-NEXT: pcmpeqw %xmm12, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm11
+; SSE2-NEXT: por %xmm0, %xmm11
+; SSE2-NEXT: pand %xmm1, %xmm10
+; SSE2-NEXT: pandn %xmm5, %xmm1
+; SSE2-NEXT: por %xmm10, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm9
+; SSE2-NEXT: pandn %xmm6, %xmm2
+; SSE2-NEXT: por %xmm9, %xmm2
+; SSE2-NEXT: pand %xmm3, %xmm8
+; SSE2-NEXT: pandn %xmm7, %xmm3
+; SSE2-NEXT: por %xmm8, %xmm3
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test110:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminuw %xmm4, %xmm0
+; SSE4-NEXT: pminuw %xmm5, %xmm1
+; SSE4-NEXT: pminuw %xmm6, %xmm2
+; SSE4-NEXT: pminuw %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test110:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test110:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test110:
-; AVX512BW: vpminuw {{.*}}
-}
-
-define void @test111(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp ugt <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ule <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test111(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test111:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm7, %xmm9
+; SSE2-NEXT: pxor %xmm11, %xmm9
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: pxor %xmm11, %xmm8
+; SSE2-NEXT: pcmpgtw %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm10
+; SSE2-NEXT: pxor %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: pxor %xmm11, %xmm9
+; SSE2-NEXT: pcmpgtw %xmm10, %xmm9
+; SSE2-NEXT: movdqa %xmm5, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm12
+; SSE2-NEXT: movdqa %xmm1, %xmm10
+; SSE2-NEXT: pxor %xmm11, %xmm10
+; SSE2-NEXT: pcmpgtw %xmm12, %xmm10
+; SSE2-NEXT: movdqa %xmm4, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm12
+; SSE2-NEXT: pxor %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtw %xmm12, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm11
+; SSE2-NEXT: por %xmm11, %xmm0
+; SSE2-NEXT: pand %xmm10, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm10
+; SSE2-NEXT: por %xmm1, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm2
+; SSE2-NEXT: pandn %xmm6, %xmm9
+; SSE2-NEXT: por %xmm2, %xmm9
+; SSE2-NEXT: pand %xmm8, %xmm3
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm3, %xmm8
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test111:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxuw %xmm4, %xmm0
+; SSE4-NEXT: pmaxuw %xmm5, %xmm1
+; SSE4-NEXT: pmaxuw %xmm6, %xmm2
+; SSE4-NEXT: pmaxuw %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test111:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test111:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test111:
-; AVX512BW: vpmaxuw {{.*}}
-}
-
-define void @test112(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp uge <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ugt <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test112(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test112:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: psubusw %xmm3, %xmm8
+; SSE2-NEXT: pxor %xmm9, %xmm9
+; SSE2-NEXT: pcmpeqw %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm10
+; SSE2-NEXT: psubusw %xmm2, %xmm10
+; SSE2-NEXT: pcmpeqw %xmm9, %xmm10
+; SSE2-NEXT: movdqa %xmm5, %xmm11
+; SSE2-NEXT: psubusw %xmm1, %xmm11
+; SSE2-NEXT: pcmpeqw %xmm9, %xmm11
+; SSE2-NEXT: movdqa %xmm4, %xmm12
+; SSE2-NEXT: psubusw %xmm0, %xmm12
+; SSE2-NEXT: pcmpeqw %xmm9, %xmm12
+; SSE2-NEXT: pand %xmm12, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm12
+; SSE2-NEXT: por %xmm12, %xmm0
+; SSE2-NEXT: pand %xmm11, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm11
+; SSE2-NEXT: por %xmm11, %xmm1
+; SSE2-NEXT: pand %xmm10, %xmm2
+; SSE2-NEXT: pandn %xmm6, %xmm10
+; SSE2-NEXT: por %xmm10, %xmm2
+; SSE2-NEXT: pand %xmm8, %xmm3
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test112:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxuw %xmm4, %xmm0
+; SSE4-NEXT: pmaxuw %xmm5, %xmm1
+; SSE4-NEXT: pmaxuw %xmm6, %xmm2
+; SSE4-NEXT: pmaxuw %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test112:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test112:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test112:
-; AVX512BW: vpmaxuw {{.*}}
-}
-
-define void @test113(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp slt <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp uge <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+ ret <32 x i16> %sel
+}
+
+define <16 x i32> @test113(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test113:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm9
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm9
+; SSE2-NEXT: movdqa %xmm5, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm10
+; SSE2-NEXT: movdqa %xmm4, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm11
+; SSE2-NEXT: por %xmm11, %xmm0
+; SSE2-NEXT: pand %xmm10, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm10
+; SSE2-NEXT: por %xmm10, %xmm1
+; SSE2-NEXT: pand %xmm9, %xmm2
+; SSE2-NEXT: pandn %xmm6, %xmm9
+; SSE2-NEXT: por %xmm9, %xmm2
+; SSE2-NEXT: pand %xmm8, %xmm3
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test113:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsd %xmm4, %xmm0
+; SSE4-NEXT: pminsd %xmm5, %xmm1
+; SSE4-NEXT: pminsd %xmm6, %xmm2
+; SSE4-NEXT: pminsd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test113:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test113:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test113:
-; AVX512F: vpminsd {{.*}}
-}
-
-define void @test114(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp sle <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp slt <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test114(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test114:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: movdqa %xmm8, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm7, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm13, %xmm13
+; SSE2-NEXT: movdqa %xmm12, %xmm3
+; SSE2-NEXT: pxor %xmm13, %xmm3
+; SSE2-NEXT: movdqa %xmm9, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm2
+; SSE2-NEXT: pxor %xmm13, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm15
+; SSE2-NEXT: movdqa %xmm15, %xmm10
+; SSE2-NEXT: pxor %xmm13, %xmm10
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm11
+; SSE2-NEXT: pxor %xmm11, %xmm13
+; SSE2-NEXT: pandn %xmm0, %xmm11
+; SSE2-NEXT: pandn %xmm4, %xmm13
+; SSE2-NEXT: por %xmm13, %xmm11
+; SSE2-NEXT: pandn %xmm1, %xmm15
+; SSE2-NEXT: pandn %xmm5, %xmm10
+; SSE2-NEXT: por %xmm15, %xmm10
+; SSE2-NEXT: pandn %xmm9, %xmm14
+; SSE2-NEXT: pandn %xmm6, %xmm2
+; SSE2-NEXT: por %xmm14, %xmm2
+; SSE2-NEXT: pandn %xmm8, %xmm12
+; SSE2-NEXT: pandn %xmm7, %xmm3
+; SSE2-NEXT: por %xmm12, %xmm3
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test114:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsd %xmm4, %xmm0
+; SSE4-NEXT: pminsd %xmm5, %xmm1
+; SSE4-NEXT: pminsd %xmm6, %xmm2
+; SSE4-NEXT: pminsd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test114:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test114:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test114:
-; AVX512F: vpminsd {{.*}}
-}
-
-define void @test115(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp sgt <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sle <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test115(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test115:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: pcmpgtd %xmm7, %xmm3
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm10
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm11
+; SSE2-NEXT: por %xmm0, %xmm11
+; SSE2-NEXT: pand %xmm10, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm10
+; SSE2-NEXT: por %xmm1, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm2
+; SSE2-NEXT: pandn %xmm6, %xmm9
+; SSE2-NEXT: por %xmm2, %xmm9
+; SSE2-NEXT: pand %xmm3, %xmm8
+; SSE2-NEXT: pandn %xmm7, %xmm3
+; SSE2-NEXT: por %xmm8, %xmm3
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test115:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsd %xmm4, %xmm0
+; SSE4-NEXT: pmaxsd %xmm5, %xmm1
+; SSE4-NEXT: pmaxsd %xmm6, %xmm2
+; SSE4-NEXT: pmaxsd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test115:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test115:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test115:
-; AVX512F: vpmaxsd {{.*}}
-}
-
-define void @test116(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp sge <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sgt <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test116(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test116:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: movdqa %xmm0, %xmm10
+; SSE2-NEXT: movdqa %xmm7, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: movdqa %xmm12, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm3
+; SSE2-NEXT: movdqa %xmm6, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm2
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm5, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm11
+; SSE2-NEXT: pxor %xmm0, %xmm11
+; SSE2-NEXT: movdqa %xmm4, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm10, %xmm15
+; SSE2-NEXT: pxor %xmm15, %xmm0
+; SSE2-NEXT: pandn %xmm10, %xmm15
+; SSE2-NEXT: pandn %xmm4, %xmm0
+; SSE2-NEXT: por %xmm15, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm14
+; SSE2-NEXT: pandn %xmm5, %xmm11
+; SSE2-NEXT: por %xmm14, %xmm11
+; SSE2-NEXT: pandn %xmm9, %xmm13
+; SSE2-NEXT: pandn %xmm6, %xmm2
+; SSE2-NEXT: por %xmm13, %xmm2
+; SSE2-NEXT: pandn %xmm8, %xmm12
+; SSE2-NEXT: pandn %xmm7, %xmm3
+; SSE2-NEXT: por %xmm12, %xmm3
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test116:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsd %xmm4, %xmm0
+; SSE4-NEXT: pmaxsd %xmm5, %xmm1
+; SSE4-NEXT: pmaxsd %xmm6, %xmm2
+; SSE4-NEXT: pmaxsd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test116:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test116:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test116:
-; AVX512F: vpmaxsd {{.*}}
-}
-
-define void @test117(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp ult <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sge <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test117(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test117:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm9
+; SSE2-NEXT: pxor %xmm10, %xmm9
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pxor %xmm10, %xmm8
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm6, %xmm9
+; SSE2-NEXT: pxor %xmm10, %xmm9
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm5, %xmm12
+; SSE2-NEXT: pxor %xmm10, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm12
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: pxor %xmm4, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm10
+; SSE2-NEXT: pand %xmm10, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm10
+; SSE2-NEXT: por %xmm10, %xmm0
+; SSE2-NEXT: pand %xmm12, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm12
+; SSE2-NEXT: por %xmm12, %xmm1
+; SSE2-NEXT: pand %xmm9, %xmm2
+; SSE2-NEXT: pandn %xmm6, %xmm9
+; SSE2-NEXT: por %xmm9, %xmm2
+; SSE2-NEXT: pand %xmm8, %xmm3
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test117:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminud %xmm4, %xmm0
+; SSE4-NEXT: pminud %xmm5, %xmm1
+; SSE4-NEXT: pminud %xmm6, %xmm2
+; SSE4-NEXT: pminud %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test117:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test117:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test117:
-; AVX512F: vpminud {{.*}}
-}
-
-define void @test118(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp ule <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ult <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test118(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test118:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm10
+; SSE2-NEXT: movdqa {{.*#+}} xmm14 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm7, %xmm0
+; SSE2-NEXT: pxor %xmm14, %xmm0
+; SSE2-NEXT: movdqa %xmm3, %xmm12
+; SSE2-NEXT: pxor %xmm14, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: movdqa %xmm12, %xmm8
+; SSE2-NEXT: pxor %xmm0, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm9
+; SSE2-NEXT: pxor %xmm14, %xmm9
+; SSE2-NEXT: movdqa %xmm2, %xmm13
+; SSE2-NEXT: pxor %xmm14, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: movdqa %xmm5, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm11
+; SSE2-NEXT: movdqa %xmm1, %xmm15
+; SSE2-NEXT: pxor %xmm14, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT: movdqa %xmm4, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: movdqa %xmm15, %xmm11
+; SSE2-NEXT: pxor %xmm0, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm0
+; SSE2-NEXT: pandn %xmm10, %xmm14
+; SSE2-NEXT: pandn %xmm4, %xmm0
+; SSE2-NEXT: por %xmm14, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm15
+; SSE2-NEXT: pandn %xmm5, %xmm11
+; SSE2-NEXT: por %xmm15, %xmm11
+; SSE2-NEXT: pandn %xmm2, %xmm13
+; SSE2-NEXT: pandn %xmm6, %xmm9
+; SSE2-NEXT: por %xmm13, %xmm9
+; SSE2-NEXT: pandn %xmm3, %xmm12
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm12, %xmm8
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test118:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminud %xmm4, %xmm0
+; SSE4-NEXT: pminud %xmm5, %xmm1
+; SSE4-NEXT: pminud %xmm6, %xmm2
+; SSE4-NEXT: pminud %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test118:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test118:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test118:
-; AVX512F: vpminud {{.*}}
-}
-
-define void @test119(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp ugt <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ule <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test119(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test119:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm7, %xmm9
+; SSE2-NEXT: pxor %xmm11, %xmm9
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: pxor %xmm11, %xmm8
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm10
+; SSE2-NEXT: pxor %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: pxor %xmm11, %xmm9
+; SSE2-NEXT: pcmpgtd %xmm10, %xmm9
+; SSE2-NEXT: movdqa %xmm5, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm12
+; SSE2-NEXT: movdqa %xmm1, %xmm10
+; SSE2-NEXT: pxor %xmm11, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm12, %xmm10
+; SSE2-NEXT: movdqa %xmm4, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm12
+; SSE2-NEXT: pxor %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm12, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm11
+; SSE2-NEXT: por %xmm11, %xmm0
+; SSE2-NEXT: pand %xmm10, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm10
+; SSE2-NEXT: por %xmm1, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm2
+; SSE2-NEXT: pandn %xmm6, %xmm9
+; SSE2-NEXT: por %xmm2, %xmm9
+; SSE2-NEXT: pand %xmm8, %xmm3
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm3, %xmm8
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test119:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxud %xmm4, %xmm0
+; SSE4-NEXT: pmaxud %xmm5, %xmm1
+; SSE4-NEXT: pmaxud %xmm6, %xmm2
+; SSE4-NEXT: pmaxud %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test119:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test119:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test119:
-; AVX512F: vpmaxud {{.*}}
-}
-
-define void @test120(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp uge <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ugt <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test120(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test120:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm10
+; SSE2-NEXT: movdqa {{.*#+}} xmm14 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm0
+; SSE2-NEXT: pxor %xmm14, %xmm0
+; SSE2-NEXT: movdqa %xmm7, %xmm12
+; SSE2-NEXT: pxor %xmm14, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: movdqa %xmm12, %xmm8
+; SSE2-NEXT: pxor %xmm0, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: pxor %xmm14, %xmm9
+; SSE2-NEXT: movdqa %xmm6, %xmm13
+; SSE2-NEXT: pxor %xmm14, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm11
+; SSE2-NEXT: movdqa %xmm5, %xmm15
+; SSE2-NEXT: pxor %xmm14, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT: movdqa %xmm10, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm11
+; SSE2-NEXT: pxor %xmm4, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: movdqa %xmm15, %xmm11
+; SSE2-NEXT: pxor %xmm0, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm0
+; SSE2-NEXT: pandn %xmm10, %xmm14
+; SSE2-NEXT: pandn %xmm4, %xmm0
+; SSE2-NEXT: por %xmm14, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm15
+; SSE2-NEXT: pandn %xmm5, %xmm11
+; SSE2-NEXT: por %xmm15, %xmm11
+; SSE2-NEXT: pandn %xmm2, %xmm13
+; SSE2-NEXT: pandn %xmm6, %xmm9
+; SSE2-NEXT: por %xmm13, %xmm9
+; SSE2-NEXT: pandn %xmm3, %xmm12
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm12, %xmm8
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test120:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxud %xmm4, %xmm0
+; SSE4-NEXT: pmaxud %xmm5, %xmm1
+; SSE4-NEXT: pmaxud %xmm6, %xmm2
+; SSE4-NEXT: pmaxud %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test120:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test120:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test120:
-; AVX512F: vpmaxud {{.*}}
-}
-
-define void @test121(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp slt <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp uge <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+ ret <16 x i32> %sel
+}
+
+define <8 x i64> @test121(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test121:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: pxor %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm7, %xmm10
+; SSE2-NEXT: pxor %xmm9, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm11[1,1,3,3]
+; SSE2-NEXT: por %xmm10, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm10
+; SSE2-NEXT: pxor %xmm9, %xmm10
+; SSE2-NEXT: movdqa %xmm6, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm11
+; SSE2-NEXT: movdqa %xmm11, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm10, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm10, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm11[1,1,3,3]
+; SSE2-NEXT: pand %xmm13, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm12[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm11
+; SSE2-NEXT: movdqa %xmm5, %xmm12
+; SSE2-NEXT: pxor %xmm9, %xmm12
+; SSE2-NEXT: movdqa %xmm12, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm12[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm12
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm11
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm9, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm9, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm11
+; SSE2-NEXT: por %xmm11, %xmm0
+; SSE2-NEXT: pand %xmm12, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm12
+; SSE2-NEXT: por %xmm12, %xmm1
+; SSE2-NEXT: pand %xmm10, %xmm2
+; SSE2-NEXT: pandn %xmm6, %xmm10
+; SSE2-NEXT: por %xmm10, %xmm2
+; SSE2-NEXT: pand %xmm8, %xmm3
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test121:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa %xmm7, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm3, %xmm9
+; SSE4-NEXT: movdqa %xmm6, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm2, %xmm10
+; SSE4-NEXT: movdqa %xmm5, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm1, %xmm11
+; SSE4-NEXT: movdqa %xmm4, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm8, %xmm0
+; SSE4-NEXT: blendvpd %xmm8, %xmm4
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm5
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm6
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm7
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: movapd %xmm5, %xmm1
+; SSE4-NEXT: movapd %xmm6, %xmm2
+; SSE4-NEXT: movapd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test121:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test121:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm3, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm5
+; AVX2-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test121:
-; AVX512F: vpminsq {{.*}}
-}
-
-define void @test122(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp sle <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp slt <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test122(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test122:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT: movdqa %xmm3, %xmm7
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm9
+; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: pxor %xmm10, %xmm8
+; SSE2-NEXT: movdqa %xmm7, %xmm0
+; SSE2-NEXT: pxor %xmm10, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm12, %xmm8
+; SSE2-NEXT: pxor %xmm1, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm3, %xmm13
+; SSE2-NEXT: pxor %xmm10, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: pand %xmm15, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm13
+; SSE2-NEXT: movdqa %xmm5, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm2, %xmm14
+; SSE2-NEXT: pxor %xmm10, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT: por %xmm14, %xmm15
+; SSE2-NEXT: movdqa %xmm4, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm13, %xmm10
+; SSE2-NEXT: pxor %xmm1, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm14
+; SSE2-NEXT: movdqa %xmm15, %xmm11
+; SSE2-NEXT: pxor %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm9, %xmm14
+; SSE2-NEXT: pandn %xmm4, %xmm1
+; SSE2-NEXT: por %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm2, %xmm15
+; SSE2-NEXT: pandn %xmm5, %xmm11
+; SSE2-NEXT: por %xmm15, %xmm11
+; SSE2-NEXT: pandn %xmm3, %xmm13
+; SSE2-NEXT: pandn %xmm6, %xmm10
+; SSE2-NEXT: por %xmm13, %xmm10
+; SSE2-NEXT: pandn %xmm7, %xmm12
+; SSE2-NEXT: pandn -{{[0-9]+}}(%rsp), %xmm8 # 16-byte Folded Reload
+; SSE2-NEXT: por %xmm12, %xmm8
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm10, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test122:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa %xmm3, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm7, %xmm9
+; SSE4-NEXT: pcmpeqd %xmm12, %xmm12
+; SSE4-NEXT: pxor %xmm12, %xmm9
+; SSE4-NEXT: movdqa %xmm2, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm10
+; SSE4-NEXT: pxor %xmm12, %xmm10
+; SSE4-NEXT: movdqa %xmm1, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm5, %xmm11
+; SSE4-NEXT: pxor %xmm12, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm4, %xmm0
+; SSE4-NEXT: pxor %xmm12, %xmm0
+; SSE4-NEXT: blendvpd %xmm8, %xmm4
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm5
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm6
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm7
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: movapd %xmm5, %xmm1
+; SSE4-NEXT: movapd %xmm6, %xmm2
+; SSE4-NEXT: movapd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test122:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
+; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm6
+; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm7
+; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test122:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm4
+; AVX2-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5
+; AVX2-NEXT: vpxor %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm6
+; AVX2-NEXT: vpxor %ymm5, %ymm6, %ymm5
+; AVX2-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test122:
-; AVX512F: vpminsq {{.*}}
-}
-
-define void @test123(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp sgt <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sle <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test123(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test123:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pxor %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm3, %xmm10
+; SSE2-NEXT: pxor %xmm9, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm11[1,1,3,3]
+; SSE2-NEXT: por %xmm10, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm10
+; SSE2-NEXT: pxor %xmm9, %xmm10
+; SSE2-NEXT: movdqa %xmm2, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm11
+; SSE2-NEXT: movdqa %xmm11, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm10, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm10, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm11[1,1,3,3]
+; SSE2-NEXT: pand %xmm13, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm12[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm5, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm11
+; SSE2-NEXT: movdqa %xmm1, %xmm12
+; SSE2-NEXT: pxor %xmm9, %xmm12
+; SSE2-NEXT: movdqa %xmm12, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm12[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm12
+; SSE2-NEXT: movdqa %xmm4, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm11
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: movdqa %xmm9, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm9, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm11
+; SSE2-NEXT: por %xmm11, %xmm0
+; SSE2-NEXT: pand %xmm12, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm12
+; SSE2-NEXT: por %xmm12, %xmm1
+; SSE2-NEXT: pand %xmm10, %xmm2
+; SSE2-NEXT: pandn %xmm6, %xmm10
+; SSE2-NEXT: por %xmm10, %xmm2
+; SSE2-NEXT: pand %xmm8, %xmm3
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test123:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa %xmm3, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm7, %xmm9
+; SSE4-NEXT: movdqa %xmm2, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm10
+; SSE4-NEXT: movdqa %xmm1, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm5, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm4, %xmm0
+; SSE4-NEXT: blendvpd %xmm8, %xmm4
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm5
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm6
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm7
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: movapd %xmm5, %xmm1
+; SSE4-NEXT: movapd %xmm6, %xmm2
+; SSE4-NEXT: movapd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test123:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test123:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm5
+; AVX2-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test123:
-; AVX512F: vpmaxsq {{.*}}
-}
-
-define void @test124(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp sge <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sgt <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test124(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test124:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm7, %xmm11
+; SSE2-NEXT: movdqa %xmm11, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT: movdqa %xmm3, %xmm7
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm9
+; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pxor %xmm10, %xmm8
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: pxor %xmm10, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm12, %xmm8
+; SSE2-NEXT: pxor %xmm1, %xmm8
+; SSE2-NEXT: movdqa %xmm3, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm6, %xmm13
+; SSE2-NEXT: pxor %xmm10, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: pand %xmm15, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm13
+; SSE2-NEXT: movdqa %xmm2, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm5, %xmm14
+; SSE2-NEXT: pxor %xmm10, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT: por %xmm14, %xmm15
+; SSE2-NEXT: movdqa %xmm9, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: pxor %xmm4, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm13, %xmm10
+; SSE2-NEXT: pxor %xmm1, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm14
+; SSE2-NEXT: movdqa %xmm15, %xmm11
+; SSE2-NEXT: pxor %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm9, %xmm14
+; SSE2-NEXT: pandn %xmm4, %xmm1
+; SSE2-NEXT: por %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm2, %xmm15
+; SSE2-NEXT: pandn %xmm5, %xmm11
+; SSE2-NEXT: por %xmm15, %xmm11
+; SSE2-NEXT: pandn %xmm3, %xmm13
+; SSE2-NEXT: pandn %xmm6, %xmm10
+; SSE2-NEXT: por %xmm13, %xmm10
+; SSE2-NEXT: pandn %xmm7, %xmm12
+; SSE2-NEXT: pandn -{{[0-9]+}}(%rsp), %xmm8 # 16-byte Folded Reload
+; SSE2-NEXT: por %xmm12, %xmm8
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm10, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test124:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa %xmm7, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm3, %xmm9
+; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT: pxor %xmm0, %xmm9
+; SSE4-NEXT: movdqa %xmm6, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm2, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: movdqa %xmm5, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm1, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: movdqa %xmm4, %xmm12
+; SSE4-NEXT: pcmpgtq %xmm8, %xmm12
+; SSE4-NEXT: pxor %xmm12, %xmm0
+; SSE4-NEXT: blendvpd %xmm8, %xmm4
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm5
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm6
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm7
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: movapd %xmm5, %xmm1
+; SSE4-NEXT: movapd %xmm6, %xmm2
+; SSE4-NEXT: movapd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test124:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
+; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm6
+; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm7
+; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test124:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm3, %ymm4
+; AVX2-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5
+; AVX2-NEXT: vpxor %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm6
+; AVX2-NEXT: vpxor %ymm5, %ymm6, %ymm5
+; AVX2-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test124:
-; AVX512F: vpmaxsq {{.*}}
-}
-
-define void @test125(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp ult <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sge <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test125(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test125:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: pxor %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm7, %xmm10
+; SSE2-NEXT: pxor %xmm9, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm11[1,1,3,3]
+; SSE2-NEXT: por %xmm10, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm10
+; SSE2-NEXT: pxor %xmm9, %xmm10
+; SSE2-NEXT: movdqa %xmm6, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm11
+; SSE2-NEXT: movdqa %xmm11, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm10, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm10, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm11[1,1,3,3]
+; SSE2-NEXT: pand %xmm13, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm12[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm11
+; SSE2-NEXT: movdqa %xmm5, %xmm12
+; SSE2-NEXT: pxor %xmm9, %xmm12
+; SSE2-NEXT: movdqa %xmm12, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm12[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm12
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm11
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm9, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm9, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm11
+; SSE2-NEXT: por %xmm11, %xmm0
+; SSE2-NEXT: pand %xmm12, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm12
+; SSE2-NEXT: por %xmm12, %xmm1
+; SSE2-NEXT: pand %xmm10, %xmm2
+; SSE2-NEXT: pandn %xmm6, %xmm10
+; SSE2-NEXT: por %xmm10, %xmm2
+; SSE2-NEXT: pand %xmm8, %xmm3
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test125:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm3, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: movdqa %xmm7, %xmm9
+; SSE4-NEXT: pxor %xmm0, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm10, %xmm9
+; SSE4-NEXT: movdqa %xmm2, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: movdqa %xmm6, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT: movdqa %xmm1, %xmm12
+; SSE4-NEXT: pxor %xmm0, %xmm12
+; SSE4-NEXT: movdqa %xmm5, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm12, %xmm11
+; SSE4-NEXT: movdqa %xmm8, %xmm12
+; SSE4-NEXT: pxor %xmm0, %xmm12
+; SSE4-NEXT: pxor %xmm4, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm12, %xmm0
+; SSE4-NEXT: blendvpd %xmm8, %xmm4
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm5
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm6
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm7
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: movapd %xmm5, %xmm1
+; SSE4-NEXT: movapd %xmm6, %xmm2
+; SSE4-NEXT: movapd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test125:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test125:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm6
+; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm6
+; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm4
+; AVX2-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test125:
-; AVX512F: vpminuq {{.*}}
-}
-
-define void @test126(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp ule <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ult <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test126(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test126:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT: movdqa %xmm3, %xmm7
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm9
+; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm10, %xmm8
+; SSE2-NEXT: movdqa %xmm7, %xmm0
+; SSE2-NEXT: pxor %xmm10, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm12, %xmm8
+; SSE2-NEXT: pxor %xmm1, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm3, %xmm13
+; SSE2-NEXT: pxor %xmm10, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: pand %xmm15, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm13
+; SSE2-NEXT: movdqa %xmm5, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm2, %xmm14
+; SSE2-NEXT: pxor %xmm10, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT: por %xmm14, %xmm15
+; SSE2-NEXT: movdqa %xmm4, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm13, %xmm10
+; SSE2-NEXT: pxor %xmm1, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm14
+; SSE2-NEXT: movdqa %xmm15, %xmm11
+; SSE2-NEXT: pxor %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm9, %xmm14
+; SSE2-NEXT: pandn %xmm4, %xmm1
+; SSE2-NEXT: por %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm2, %xmm15
+; SSE2-NEXT: pandn %xmm5, %xmm11
+; SSE2-NEXT: por %xmm15, %xmm11
+; SSE2-NEXT: pandn %xmm3, %xmm13
+; SSE2-NEXT: pandn %xmm6, %xmm10
+; SSE2-NEXT: por %xmm13, %xmm10
+; SSE2-NEXT: pandn %xmm7, %xmm12
+; SSE2-NEXT: pandn -{{[0-9]+}}(%rsp), %xmm8 # 16-byte Folded Reload
+; SSE2-NEXT: por %xmm12, %xmm8
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm10, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test126:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm9
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm7, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: movdqa %xmm3, %xmm8
+; SSE4-NEXT: pxor %xmm0, %xmm8
+; SSE4-NEXT: pcmpgtq %xmm10, %xmm8
+; SSE4-NEXT: pcmpeqd %xmm12, %xmm12
+; SSE4-NEXT: pxor %xmm12, %xmm8
+; SSE4-NEXT: movdqa %xmm6, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: movdqa %xmm2, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT: pxor %xmm12, %xmm10
+; SSE4-NEXT: movdqa %xmm5, %xmm13
+; SSE4-NEXT: pxor %xmm0, %xmm13
+; SSE4-NEXT: movdqa %xmm1, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm13, %xmm11
+; SSE4-NEXT: pxor %xmm12, %xmm11
+; SSE4-NEXT: movdqa %xmm4, %xmm13
+; SSE4-NEXT: pxor %xmm0, %xmm13
+; SSE4-NEXT: pxor %xmm9, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm13, %xmm0
+; SSE4-NEXT: pxor %xmm12, %xmm0
+; SSE4-NEXT: blendvpd %xmm9, %xmm4
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm5
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm6
+; SSE4-NEXT: movdqa %xmm8, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm7
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: movapd %xmm5, %xmm1
+; SSE4-NEXT: movapd %xmm6, %xmm2
+; SSE4-NEXT: movapd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test126:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8
+; AVX1-NEXT: vpxor %xmm8, %xmm4, %xmm4
+; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm6
+; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT: vpxor %xmm8, %xmm5, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test126:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm6
+; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT: vpcmpeqd %ymm6, %ymm6, %ymm6
+; AVX2-NEXT: vpxor %ymm6, %ymm5, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm7
+; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm7, %ymm4, %ymm4
+; AVX2-NEXT: vpxor %ymm6, %ymm4, %ymm4
+; AVX2-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test126:
-; AVX512F: vpminuq {{.*}}
-}
-
-define void @test127(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp ugt <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ule <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test127(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test127:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pxor %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm3, %xmm10
+; SSE2-NEXT: pxor %xmm9, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm11[1,1,3,3]
+; SSE2-NEXT: por %xmm10, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm10
+; SSE2-NEXT: pxor %xmm9, %xmm10
+; SSE2-NEXT: movdqa %xmm2, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm11
+; SSE2-NEXT: movdqa %xmm11, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm10, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm10, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm11[1,1,3,3]
+; SSE2-NEXT: pand %xmm13, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm12[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm5, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm11
+; SSE2-NEXT: movdqa %xmm1, %xmm12
+; SSE2-NEXT: pxor %xmm9, %xmm12
+; SSE2-NEXT: movdqa %xmm12, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm12[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm12
+; SSE2-NEXT: movdqa %xmm4, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm11
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: movdqa %xmm9, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm9, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm11
+; SSE2-NEXT: por %xmm11, %xmm0
+; SSE2-NEXT: pand %xmm12, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm12
+; SSE2-NEXT: por %xmm12, %xmm1
+; SSE2-NEXT: pand %xmm10, %xmm2
+; SSE2-NEXT: pandn %xmm6, %xmm10
+; SSE2-NEXT: por %xmm10, %xmm2
+; SSE2-NEXT: pand %xmm8, %xmm3
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test127:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm7, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: movdqa %xmm3, %xmm9
+; SSE4-NEXT: pxor %xmm0, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm10, %xmm9
+; SSE4-NEXT: movdqa %xmm6, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: movdqa %xmm2, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT: movdqa %xmm5, %xmm12
+; SSE4-NEXT: pxor %xmm0, %xmm12
+; SSE4-NEXT: movdqa %xmm1, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm12, %xmm11
+; SSE4-NEXT: movdqa %xmm4, %xmm12
+; SSE4-NEXT: pxor %xmm0, %xmm12
+; SSE4-NEXT: pxor %xmm8, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm12, %xmm0
+; SSE4-NEXT: blendvpd %xmm8, %xmm4
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm5
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm6
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm7
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: movapd %xmm5, %xmm1
+; SSE4-NEXT: movapd %xmm6, %xmm2
+; SSE4-NEXT: movapd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test127:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test127:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm6
+; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm6
+; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm4
+; AVX2-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test127:
-; AVX512F: vpmaxuq {{.*}}
-}
-
-define void @test128(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp uge <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ugt <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test128(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test128:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm7, %xmm11
+; SSE2-NEXT: movdqa %xmm11, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT: movdqa %xmm3, %xmm7
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm9
+; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pxor %xmm10, %xmm8
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: pxor %xmm10, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm12, %xmm8
+; SSE2-NEXT: pxor %xmm1, %xmm8
+; SSE2-NEXT: movdqa %xmm3, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm6, %xmm13
+; SSE2-NEXT: pxor %xmm10, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: pand %xmm15, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm13
+; SSE2-NEXT: movdqa %xmm2, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm5, %xmm14
+; SSE2-NEXT: pxor %xmm10, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT: por %xmm14, %xmm15
+; SSE2-NEXT: movdqa %xmm9, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: pxor %xmm4, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm13, %xmm10
+; SSE2-NEXT: pxor %xmm1, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm14
+; SSE2-NEXT: movdqa %xmm15, %xmm11
+; SSE2-NEXT: pxor %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm9, %xmm14
+; SSE2-NEXT: pandn %xmm4, %xmm1
+; SSE2-NEXT: por %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm2, %xmm15
+; SSE2-NEXT: pandn %xmm5, %xmm11
+; SSE2-NEXT: por %xmm15, %xmm11
+; SSE2-NEXT: pandn %xmm3, %xmm13
+; SSE2-NEXT: pandn %xmm6, %xmm10
+; SSE2-NEXT: por %xmm13, %xmm10
+; SSE2-NEXT: pandn %xmm7, %xmm12
+; SSE2-NEXT: pandn -{{[0-9]+}}(%rsp), %xmm8 # 16-byte Folded Reload
+; SSE2-NEXT: por %xmm12, %xmm8
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm10, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test128:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm9
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm3, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: movdqa %xmm7, %xmm8
+; SSE4-NEXT: pxor %xmm0, %xmm8
+; SSE4-NEXT: pcmpgtq %xmm10, %xmm8
+; SSE4-NEXT: pcmpeqd %xmm12, %xmm12
+; SSE4-NEXT: pxor %xmm12, %xmm8
+; SSE4-NEXT: movdqa %xmm2, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: movdqa %xmm6, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT: pxor %xmm12, %xmm10
+; SSE4-NEXT: movdqa %xmm1, %xmm13
+; SSE4-NEXT: pxor %xmm0, %xmm13
+; SSE4-NEXT: movdqa %xmm5, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm13, %xmm11
+; SSE4-NEXT: pxor %xmm12, %xmm11
+; SSE4-NEXT: movdqa %xmm9, %xmm13
+; SSE4-NEXT: pxor %xmm0, %xmm13
+; SSE4-NEXT: pxor %xmm4, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm13, %xmm0
+; SSE4-NEXT: pxor %xmm12, %xmm0
+; SSE4-NEXT: blendvpd %xmm9, %xmm4
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm5
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm6
+; SSE4-NEXT: movdqa %xmm8, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm7
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: movapd %xmm5, %xmm1
+; SSE4-NEXT: movapd %xmm6, %xmm2
+; SSE4-NEXT: movapd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test128:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8
+; AVX1-NEXT: vpxor %xmm8, %xmm4, %xmm4
+; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm6
+; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT: vpxor %xmm8, %xmm5, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test128:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm6
+; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT: vpcmpeqd %ymm6, %ymm6, %ymm6
+; AVX2-NEXT: vpxor %ymm6, %ymm5, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm7
+; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm7, %ymm4, %ymm4
+; AVX2-NEXT: vpxor %ymm6, %ymm4, %ymm4
+; AVX2-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test128:
-; AVX512F: vpmaxuq {{.*}}
-}
-
-define void @test129(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp slt <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp uge <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+ ret <8 x i64> %sel
+}
+
+define <64 x i8> @test129(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test129:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: movdqa %xmm7, %xmm3
+; SSE2-NEXT: pcmpgtb %xmm8, %xmm3
+; SSE2-NEXT: movdqa %xmm6, %xmm9
+; SSE2-NEXT: pcmpgtb %xmm2, %xmm9
+; SSE2-NEXT: movdqa %xmm5, %xmm10
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm10
+; SSE2-NEXT: movdqa %xmm4, %xmm11
+; SSE2-NEXT: pcmpgtb %xmm0, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm11
+; SSE2-NEXT: por %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm10, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm5, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm6, %xmm9
+; SSE2-NEXT: pand %xmm3, %xmm7
+; SSE2-NEXT: pandn %xmm8, %xmm3
+; SSE2-NEXT: por %xmm7, %xmm3
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test129:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsb %xmm4, %xmm0
+; SSE4-NEXT: pmaxsb %xmm5, %xmm1
+; SSE4-NEXT: pmaxsb %xmm6, %xmm2
+; SSE4-NEXT: pmaxsb %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test129:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test129:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test129:
-; AVX512BW: vpmaxsb
-}
-
-define void @test130(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp sle <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp slt <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test130(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test130:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm2, %xmm8
+; SSE2-NEXT: movdqa %xmm3, %xmm12
+; SSE2-NEXT: pcmpgtb %xmm7, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm13, %xmm13
+; SSE2-NEXT: movdqa %xmm12, %xmm9
+; SSE2-NEXT: pxor %xmm13, %xmm9
+; SSE2-NEXT: movdqa %xmm8, %xmm14
+; SSE2-NEXT: pcmpgtb %xmm6, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm2
+; SSE2-NEXT: pxor %xmm13, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm15
+; SSE2-NEXT: pcmpgtb %xmm5, %xmm15
+; SSE2-NEXT: movdqa %xmm15, %xmm10
+; SSE2-NEXT: pxor %xmm13, %xmm10
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtb %xmm4, %xmm11
+; SSE2-NEXT: pxor %xmm11, %xmm13
+; SSE2-NEXT: pandn %xmm4, %xmm11
+; SSE2-NEXT: pandn %xmm0, %xmm13
+; SSE2-NEXT: por %xmm13, %xmm11
+; SSE2-NEXT: pandn %xmm5, %xmm15
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm15, %xmm10
+; SSE2-NEXT: pandn %xmm6, %xmm14
+; SSE2-NEXT: pandn %xmm8, %xmm2
+; SSE2-NEXT: por %xmm14, %xmm2
+; SSE2-NEXT: pandn %xmm7, %xmm12
+; SSE2-NEXT: pandn %xmm3, %xmm9
+; SSE2-NEXT: por %xmm12, %xmm9
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test130:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsb %xmm4, %xmm0
+; SSE4-NEXT: pmaxsb %xmm5, %xmm1
+; SSE4-NEXT: pmaxsb %xmm6, %xmm2
+; SSE4-NEXT: pmaxsb %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test130:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test130:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test130:
-; AVX512BW: vpmaxsb
-}
-
-define void @test131(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp sgt <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sle <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test131(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test131:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: pcmpgtb %xmm7, %xmm3
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: pcmpgtb %xmm6, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm10
+; SSE2-NEXT: pcmpgtb %xmm5, %xmm10
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtb %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm11
+; SSE2-NEXT: por %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm10, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm5, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm6, %xmm9
+; SSE2-NEXT: pand %xmm3, %xmm7
+; SSE2-NEXT: pandn %xmm8, %xmm3
+; SSE2-NEXT: por %xmm7, %xmm3
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test131:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsb %xmm4, %xmm0
+; SSE4-NEXT: pminsb %xmm5, %xmm1
+; SSE4-NEXT: pminsb %xmm6, %xmm2
+; SSE4-NEXT: pminsb %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test131:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test131:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test131:
-; AVX512BW: vpminsb
-}
-
-define void @test132(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp sge <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sgt <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test132(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test132:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm2, %xmm8
+; SSE2-NEXT: movdqa %xmm0, %xmm10
+; SSE2-NEXT: movdqa %xmm7, %xmm12
+; SSE2-NEXT: pcmpgtb %xmm3, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: movdqa %xmm12, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: movdqa %xmm6, %xmm13
+; SSE2-NEXT: pcmpgtb %xmm8, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm2
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm5, %xmm14
+; SSE2-NEXT: pcmpgtb %xmm1, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm11
+; SSE2-NEXT: pxor %xmm0, %xmm11
+; SSE2-NEXT: movdqa %xmm4, %xmm15
+; SSE2-NEXT: pcmpgtb %xmm10, %xmm15
+; SSE2-NEXT: pxor %xmm15, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm15
+; SSE2-NEXT: pandn %xmm10, %xmm0
+; SSE2-NEXT: por %xmm15, %xmm0
+; SSE2-NEXT: pandn %xmm5, %xmm14
+; SSE2-NEXT: pandn %xmm1, %xmm11
+; SSE2-NEXT: por %xmm14, %xmm11
+; SSE2-NEXT: pandn %xmm6, %xmm13
+; SSE2-NEXT: pandn %xmm8, %xmm2
+; SSE2-NEXT: por %xmm13, %xmm2
+; SSE2-NEXT: pandn %xmm7, %xmm12
+; SSE2-NEXT: pandn %xmm3, %xmm9
+; SSE2-NEXT: por %xmm12, %xmm9
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test132:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsb %xmm4, %xmm0
+; SSE4-NEXT: pminsb %xmm5, %xmm1
+; SSE4-NEXT: pminsb %xmm6, %xmm2
+; SSE4-NEXT: pminsb %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test132:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test132:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test132:
-; AVX512BW: vpminsb
-}
-
-define void @test133(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp ult <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sge <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test133(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test133:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxub %xmm4, %xmm0
+; SSE-NEXT: pmaxub %xmm5, %xmm1
+; SSE-NEXT: pmaxub %xmm6, %xmm2
+; SSE-NEXT: pmaxub %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test133:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test133:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test133:
-; AVX512BW: vpmaxub
-}
-
-define void @test134(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp ule <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ult <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test134(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test134:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxub %xmm4, %xmm0
+; SSE-NEXT: pmaxub %xmm5, %xmm1
+; SSE-NEXT: pmaxub %xmm6, %xmm2
+; SSE-NEXT: pmaxub %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test134:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test134:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test134:
-; AVX512BW: vpmaxub
-}
-
-define void @test135(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp ugt <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ule <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test135(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test135:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminub %xmm4, %xmm0
+; SSE-NEXT: pminub %xmm5, %xmm1
+; SSE-NEXT: pminub %xmm6, %xmm2
+; SSE-NEXT: pminub %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test135:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test135:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test135:
-; AVX512BW: vpminub
-}
-
-define void @test136(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
- %ptr.a = bitcast i8* %gep.a to <64 x i8>*
- %ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
- %cmp = icmp uge <64 x i8> %load.a, %load.b
- %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
- store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
- %index.next = add i64 %index, 32
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ugt <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+ ret <64 x i8> %sel
+}
+
+define <64 x i8> @test136(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test136:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminub %xmm4, %xmm0
+; SSE-NEXT: pminub %xmm5, %xmm1
+; SSE-NEXT: pminub %xmm6, %xmm2
+; SSE-NEXT: pminub %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test136:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test136:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test136:
-; AVX512BW: vpminub
-}
-
-define void @test137(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp slt <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp uge <64 x i8> %a, %b
+ %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+ ret <64 x i8> %sel
+}
+
+define <32 x i16> @test137(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test137:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxsw %xmm4, %xmm0
+; SSE-NEXT: pmaxsw %xmm5, %xmm1
+; SSE-NEXT: pmaxsw %xmm6, %xmm2
+; SSE-NEXT: pmaxsw %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test137:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test137:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test137:
-; AVX512BW: vpmaxsw
-}
-
-define void @test138(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp sle <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp slt <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test138(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test138:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pmaxsw %xmm4, %xmm0
+; SSE-NEXT: pmaxsw %xmm5, %xmm1
+; SSE-NEXT: pmaxsw %xmm6, %xmm2
+; SSE-NEXT: pmaxsw %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test138:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test138:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test138:
-; AVX512BW: vpmaxsw
-}
-
-define void @test139(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp sgt <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sle <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test139(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test139:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminsw %xmm4, %xmm0
+; SSE-NEXT: pminsw %xmm5, %xmm1
+; SSE-NEXT: pminsw %xmm6, %xmm2
+; SSE-NEXT: pminsw %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test139:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test139:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test139:
-; AVX512BW: vpminsw
-}
-
-define void @test140(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp sge <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sgt <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test140(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test140:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pminsw %xmm4, %xmm0
+; SSE-NEXT: pminsw %xmm5, %xmm1
+; SSE-NEXT: pminsw %xmm6, %xmm2
+; SSE-NEXT: pminsw %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test140:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test140:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test140:
-; AVX512BW: vpminsw
-}
-
-define void @test141(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp ult <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sge <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test141(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test141:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm3, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pxor %xmm0, %xmm8
+; SSE2-NEXT: pcmpgtw %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm10
+; SSE2-NEXT: pxor %xmm0, %xmm10
+; SSE2-NEXT: movdqa %xmm6, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: pcmpgtw %xmm10, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm12
+; SSE2-NEXT: pxor %xmm0, %xmm12
+; SSE2-NEXT: movdqa %xmm5, %xmm10
+; SSE2-NEXT: pxor %xmm0, %xmm10
+; SSE2-NEXT: pcmpgtw %xmm12, %xmm10
+; SSE2-NEXT: movdqa %xmm11, %xmm12
+; SSE2-NEXT: pxor %xmm0, %xmm12
+; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: pcmpgtw %xmm12, %xmm0
+; SSE2-NEXT: pand %xmm0, %xmm4
+; SSE2-NEXT: pandn %xmm11, %xmm0
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: pand %xmm10, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm5, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm6, %xmm9
+; SSE2-NEXT: pand %xmm8, %xmm7
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: por %xmm7, %xmm8
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test141:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxuw %xmm4, %xmm0
+; SSE4-NEXT: pmaxuw %xmm5, %xmm1
+; SSE4-NEXT: pmaxuw %xmm6, %xmm2
+; SSE4-NEXT: pmaxuw %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test141:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test141:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test141:
-; AVX512BW: vpmaxuw
-}
-
-define void @test142(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp ule <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ult <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test142(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test142:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm10
+; SSE2-NEXT: psubusw %xmm7, %xmm3
+; SSE2-NEXT: pxor %xmm12, %xmm12
+; SSE2-NEXT: pcmpeqw %xmm12, %xmm3
+; SSE2-NEXT: psubusw %xmm6, %xmm2
+; SSE2-NEXT: pcmpeqw %xmm12, %xmm2
+; SSE2-NEXT: psubusw %xmm5, %xmm1
+; SSE2-NEXT: pcmpeqw %xmm12, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: psubusw %xmm4, %xmm11
+; SSE2-NEXT: pcmpeqw %xmm12, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm11
+; SSE2-NEXT: por %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm1, %xmm5
+; SSE2-NEXT: pandn %xmm10, %xmm1
+; SSE2-NEXT: por %xmm5, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm6
+; SSE2-NEXT: pandn %xmm9, %xmm2
+; SSE2-NEXT: por %xmm6, %xmm2
+; SSE2-NEXT: pand %xmm3, %xmm7
+; SSE2-NEXT: pandn %xmm8, %xmm3
+; SSE2-NEXT: por %xmm7, %xmm3
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test142:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxuw %xmm4, %xmm0
+; SSE4-NEXT: pmaxuw %xmm5, %xmm1
+; SSE4-NEXT: pmaxuw %xmm6, %xmm2
+; SSE4-NEXT: pmaxuw %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test142:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test142:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test142:
-; AVX512BW: vpmaxuw
-}
-
-define void @test143(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp ugt <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ule <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test143(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test143:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT: movdqa %xmm7, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: pxor %xmm0, %xmm8
+; SSE2-NEXT: pcmpgtw %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm10
+; SSE2-NEXT: pxor %xmm0, %xmm10
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: pcmpgtw %xmm10, %xmm9
+; SSE2-NEXT: movdqa %xmm5, %xmm12
+; SSE2-NEXT: pxor %xmm0, %xmm12
+; SSE2-NEXT: movdqa %xmm1, %xmm10
+; SSE2-NEXT: pxor %xmm0, %xmm10
+; SSE2-NEXT: pcmpgtw %xmm12, %xmm10
+; SSE2-NEXT: movdqa %xmm4, %xmm12
+; SSE2-NEXT: pxor %xmm0, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm0
+; SSE2-NEXT: pcmpgtw %xmm12, %xmm0
+; SSE2-NEXT: pand %xmm0, %xmm4
+; SSE2-NEXT: pandn %xmm11, %xmm0
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: pand %xmm10, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm5, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm6, %xmm9
+; SSE2-NEXT: pand %xmm8, %xmm7
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: por %xmm7, %xmm8
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test143:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminuw %xmm4, %xmm0
+; SSE4-NEXT: pminuw %xmm5, %xmm1
+; SSE4-NEXT: pminuw %xmm6, %xmm2
+; SSE4-NEXT: pminuw %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test143:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test143:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test143:
-; AVX512BW: vpminuw
-}
-
-define void @test144(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
- %ptr.a = bitcast i16* %gep.a to <32 x i16>*
- %ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
- %cmp = icmp uge <32 x i16> %load.a, %load.b
- %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
- store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
- %index.next = add i64 %index, 16
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ugt <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+ ret <32 x i16> %sel
+}
+
+define <32 x i16> @test144(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test144:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm10
+; SSE2-NEXT: movdqa %xmm7, %xmm3
+; SSE2-NEXT: psubusw %xmm8, %xmm3
+; SSE2-NEXT: pxor %xmm12, %xmm12
+; SSE2-NEXT: pcmpeqw %xmm12, %xmm3
+; SSE2-NEXT: movdqa %xmm6, %xmm2
+; SSE2-NEXT: psubusw %xmm9, %xmm2
+; SSE2-NEXT: pcmpeqw %xmm12, %xmm2
+; SSE2-NEXT: movdqa %xmm5, %xmm1
+; SSE2-NEXT: psubusw %xmm10, %xmm1
+; SSE2-NEXT: pcmpeqw %xmm12, %xmm1
+; SSE2-NEXT: movdqa %xmm4, %xmm11
+; SSE2-NEXT: psubusw %xmm0, %xmm11
+; SSE2-NEXT: pcmpeqw %xmm12, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm11
+; SSE2-NEXT: por %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm1, %xmm5
+; SSE2-NEXT: pandn %xmm10, %xmm1
+; SSE2-NEXT: por %xmm5, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm6
+; SSE2-NEXT: pandn %xmm9, %xmm2
+; SSE2-NEXT: por %xmm6, %xmm2
+; SSE2-NEXT: pand %xmm3, %xmm7
+; SSE2-NEXT: pandn %xmm8, %xmm3
+; SSE2-NEXT: por %xmm7, %xmm3
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test144:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminuw %xmm4, %xmm0
+; SSE4-NEXT: pminuw %xmm5, %xmm1
+; SSE4-NEXT: pminuw %xmm6, %xmm2
+; SSE4-NEXT: pminuw %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test144:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test144:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512BW-LABEL: test144:
-; AVX512BW: vpminuw
-}
-
-define void @test145(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp slt <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp uge <32 x i16> %a, %b
+ %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+ ret <32 x i16> %sel
+}
+
+define <16 x i32> @test145(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test145:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: movdqa %xmm7, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm3
+; SSE2-NEXT: movdqa %xmm6, %xmm9
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm9
+; SSE2-NEXT: movdqa %xmm5, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm10
+; SSE2-NEXT: movdqa %xmm4, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm11
+; SSE2-NEXT: por %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm10, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm5, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm6, %xmm9
+; SSE2-NEXT: pand %xmm3, %xmm7
+; SSE2-NEXT: pandn %xmm8, %xmm3
+; SSE2-NEXT: por %xmm7, %xmm3
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test145:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsd %xmm4, %xmm0
+; SSE4-NEXT: pmaxsd %xmm5, %xmm1
+; SSE4-NEXT: pmaxsd %xmm6, %xmm2
+; SSE4-NEXT: pmaxsd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test145:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test145:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test145:
-; AVX512F: vpmaxsd
-}
-
-define void @test146(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp sle <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp slt <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test146(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test146:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm2, %xmm8
+; SSE2-NEXT: movdqa %xmm3, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm7, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm13, %xmm13
+; SSE2-NEXT: movdqa %xmm12, %xmm9
+; SSE2-NEXT: pxor %xmm13, %xmm9
+; SSE2-NEXT: movdqa %xmm8, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm2
+; SSE2-NEXT: pxor %xmm13, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm15
+; SSE2-NEXT: movdqa %xmm15, %xmm10
+; SSE2-NEXT: pxor %xmm13, %xmm10
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm11
+; SSE2-NEXT: pxor %xmm11, %xmm13
+; SSE2-NEXT: pandn %xmm4, %xmm11
+; SSE2-NEXT: pandn %xmm0, %xmm13
+; SSE2-NEXT: por %xmm13, %xmm11
+; SSE2-NEXT: pandn %xmm5, %xmm15
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm15, %xmm10
+; SSE2-NEXT: pandn %xmm6, %xmm14
+; SSE2-NEXT: pandn %xmm8, %xmm2
+; SSE2-NEXT: por %xmm14, %xmm2
+; SSE2-NEXT: pandn %xmm7, %xmm12
+; SSE2-NEXT: pandn %xmm3, %xmm9
+; SSE2-NEXT: por %xmm12, %xmm9
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test146:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxsd %xmm4, %xmm0
+; SSE4-NEXT: pmaxsd %xmm5, %xmm1
+; SSE4-NEXT: pmaxsd %xmm6, %xmm2
+; SSE4-NEXT: pmaxsd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test146:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test146:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test146:
-; AVX512F: vpmaxsd
-}
-
-define void @test147(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp sgt <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sle <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test147(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test147:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: pcmpgtd %xmm7, %xmm3
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm10
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm11
+; SSE2-NEXT: por %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm10, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm5, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm6, %xmm9
+; SSE2-NEXT: pand %xmm3, %xmm7
+; SSE2-NEXT: pandn %xmm8, %xmm3
+; SSE2-NEXT: por %xmm7, %xmm3
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test147:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsd %xmm4, %xmm0
+; SSE4-NEXT: pminsd %xmm5, %xmm1
+; SSE4-NEXT: pminsd %xmm6, %xmm2
+; SSE4-NEXT: pminsd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test147:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test147:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test147:
-; AVX512F: vpminsd
-}
-
-define void @test148(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp sge <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sgt <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test148(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test148:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm2, %xmm8
+; SSE2-NEXT: movdqa %xmm0, %xmm10
+; SSE2-NEXT: movdqa %xmm7, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: movdqa %xmm12, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: movdqa %xmm6, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm2
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm5, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm11
+; SSE2-NEXT: pxor %xmm0, %xmm11
+; SSE2-NEXT: movdqa %xmm4, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm10, %xmm15
+; SSE2-NEXT: pxor %xmm15, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm15
+; SSE2-NEXT: pandn %xmm10, %xmm0
+; SSE2-NEXT: por %xmm15, %xmm0
+; SSE2-NEXT: pandn %xmm5, %xmm14
+; SSE2-NEXT: pandn %xmm1, %xmm11
+; SSE2-NEXT: por %xmm14, %xmm11
+; SSE2-NEXT: pandn %xmm6, %xmm13
+; SSE2-NEXT: pandn %xmm8, %xmm2
+; SSE2-NEXT: por %xmm13, %xmm2
+; SSE2-NEXT: pandn %xmm7, %xmm12
+; SSE2-NEXT: pandn %xmm3, %xmm9
+; SSE2-NEXT: por %xmm12, %xmm9
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test148:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminsd %xmm4, %xmm0
+; SSE4-NEXT: pminsd %xmm5, %xmm1
+; SSE4-NEXT: pminsd %xmm6, %xmm2
+; SSE4-NEXT: pminsd %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test148:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test148:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test148:
-; AVX512F: vpminsd
-}
-
-define void @test149(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp ult <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sge <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test149(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test149:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pxor %xmm0, %xmm8
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm10
+; SSE2-NEXT: pxor %xmm0, %xmm10
+; SSE2-NEXT: movdqa %xmm6, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: pcmpgtd %xmm10, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm12
+; SSE2-NEXT: pxor %xmm0, %xmm12
+; SSE2-NEXT: movdqa %xmm5, %xmm10
+; SSE2-NEXT: pxor %xmm0, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm12, %xmm10
+; SSE2-NEXT: movdqa %xmm11, %xmm12
+; SSE2-NEXT: pxor %xmm0, %xmm12
+; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: pcmpgtd %xmm12, %xmm0
+; SSE2-NEXT: pand %xmm0, %xmm4
+; SSE2-NEXT: pandn %xmm11, %xmm0
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: pand %xmm10, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm5, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm6, %xmm9
+; SSE2-NEXT: pand %xmm8, %xmm7
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: por %xmm7, %xmm8
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test149:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxud %xmm4, %xmm0
+; SSE4-NEXT: pmaxud %xmm5, %xmm1
+; SSE4-NEXT: pmaxud %xmm6, %xmm2
+; SSE4-NEXT: pmaxud %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test149:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test149:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test149:
-; AVX512F: vpmaxud
-}
-
-define void @test150(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp ule <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ult <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test150(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test150:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm10
+; SSE2-NEXT: movdqa {{.*#+}} xmm14 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm7, %xmm0
+; SSE2-NEXT: pxor %xmm14, %xmm0
+; SSE2-NEXT: movdqa %xmm3, %xmm12
+; SSE2-NEXT: pxor %xmm14, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: movdqa %xmm12, %xmm8
+; SSE2-NEXT: pxor %xmm0, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm9
+; SSE2-NEXT: pxor %xmm14, %xmm9
+; SSE2-NEXT: movdqa %xmm2, %xmm13
+; SSE2-NEXT: pxor %xmm14, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: movdqa %xmm5, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm11
+; SSE2-NEXT: movdqa %xmm1, %xmm15
+; SSE2-NEXT: pxor %xmm14, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT: movdqa %xmm4, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: movdqa %xmm15, %xmm11
+; SSE2-NEXT: pxor %xmm0, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm14
+; SSE2-NEXT: pandn %xmm10, %xmm0
+; SSE2-NEXT: por %xmm14, %xmm0
+; SSE2-NEXT: pandn %xmm5, %xmm15
+; SSE2-NEXT: pandn %xmm1, %xmm11
+; SSE2-NEXT: por %xmm15, %xmm11
+; SSE2-NEXT: pandn %xmm6, %xmm13
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm13, %xmm9
+; SSE2-NEXT: pandn %xmm7, %xmm12
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: por %xmm12, %xmm8
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test150:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pmaxud %xmm4, %xmm0
+; SSE4-NEXT: pmaxud %xmm5, %xmm1
+; SSE4-NEXT: pmaxud %xmm6, %xmm2
+; SSE4-NEXT: pmaxud %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test150:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpmaxud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpmaxud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpmaxud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpmaxud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test150:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmaxud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpmaxud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test150:
-; AVX512F: vpmaxud
-}
-
-define void @test151(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp ugt <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ule <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test151(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test151:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm7, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: pxor %xmm0, %xmm8
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm10
+; SSE2-NEXT: pxor %xmm0, %xmm10
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: pcmpgtd %xmm10, %xmm9
+; SSE2-NEXT: movdqa %xmm5, %xmm12
+; SSE2-NEXT: pxor %xmm0, %xmm12
+; SSE2-NEXT: movdqa %xmm1, %xmm10
+; SSE2-NEXT: pxor %xmm0, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm12, %xmm10
+; SSE2-NEXT: movdqa %xmm4, %xmm12
+; SSE2-NEXT: pxor %xmm0, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm0
+; SSE2-NEXT: pcmpgtd %xmm12, %xmm0
+; SSE2-NEXT: pand %xmm0, %xmm4
+; SSE2-NEXT: pandn %xmm11, %xmm0
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: pand %xmm10, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm5, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm6, %xmm9
+; SSE2-NEXT: pand %xmm8, %xmm7
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: por %xmm7, %xmm8
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test151:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminud %xmm4, %xmm0
+; SSE4-NEXT: pminud %xmm5, %xmm1
+; SSE4-NEXT: pminud %xmm6, %xmm2
+; SSE4-NEXT: pminud %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test151:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test151:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test151:
-; AVX512F: vpminud
-}
-
-define void @test152(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <16 x i32>*
- %ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
- %cmp = icmp uge <16 x i32> %load.a, %load.b
- %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
- store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ugt <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+ ret <16 x i32> %sel
+}
+
+define <16 x i32> @test152(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test152:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm10
+; SSE2-NEXT: movdqa {{.*#+}} xmm14 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm0
+; SSE2-NEXT: pxor %xmm14, %xmm0
+; SSE2-NEXT: movdqa %xmm7, %xmm12
+; SSE2-NEXT: pxor %xmm14, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: movdqa %xmm12, %xmm8
+; SSE2-NEXT: pxor %xmm0, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: pxor %xmm14, %xmm9
+; SSE2-NEXT: movdqa %xmm6, %xmm13
+; SSE2-NEXT: pxor %xmm14, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm9
+; SSE2-NEXT: pxor %xmm0, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm11
+; SSE2-NEXT: movdqa %xmm5, %xmm15
+; SSE2-NEXT: pxor %xmm14, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT: movdqa %xmm10, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm11
+; SSE2-NEXT: pxor %xmm4, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: movdqa %xmm15, %xmm11
+; SSE2-NEXT: pxor %xmm0, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm0
+; SSE2-NEXT: pandn %xmm4, %xmm14
+; SSE2-NEXT: pandn %xmm10, %xmm0
+; SSE2-NEXT: por %xmm14, %xmm0
+; SSE2-NEXT: pandn %xmm5, %xmm15
+; SSE2-NEXT: pandn %xmm1, %xmm11
+; SSE2-NEXT: por %xmm15, %xmm11
+; SSE2-NEXT: pandn %xmm6, %xmm13
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm13, %xmm9
+; SSE2-NEXT: pandn %xmm7, %xmm12
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: por %xmm12, %xmm8
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test152:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: pminud %xmm4, %xmm0
+; SSE4-NEXT: pminud %xmm5, %xmm1
+; SSE4-NEXT: pminud %xmm6, %xmm2
+; SSE4-NEXT: pminud %xmm7, %xmm3
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test152:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpminud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpminud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test152:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpminud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpminud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test152:
-; AVX512F: vpminud
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp uge <16 x i32> %a, %b
+ %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+ ret <16 x i32> %sel
}
; -----------------------
-define void @test153(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp slt <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+define <8 x i64> @test153(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test153:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: pxor %xmm11, %xmm8
+; SSE2-NEXT: movdqa %xmm7, %xmm9
+; SSE2-NEXT: pxor %xmm11, %xmm9
+; SSE2-NEXT: movdqa %xmm9, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm10[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm10[1,1,3,3]
+; SSE2-NEXT: por %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: pxor %xmm11, %xmm9
+; SSE2-NEXT: movdqa %xmm6, %xmm10
+; SSE2-NEXT: pxor %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm9, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm13, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm12[1,1,3,3]
+; SSE2-NEXT: por %xmm10, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm10
+; SSE2-NEXT: pxor %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm5, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm12
+; SSE2-NEXT: movdqa %xmm12, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm10, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm10, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm12[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm12, %xmm10
+; SSE2-NEXT: movdqa %xmm0, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm12
+; SSE2-NEXT: pxor %xmm4, %xmm11
+; SSE2-NEXT: movdqa %xmm11, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm12, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm12, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm12, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm11
+; SSE2-NEXT: por %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm10, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm5, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm6, %xmm9
+; SSE2-NEXT: pand %xmm8, %xmm7
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: por %xmm7, %xmm8
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test153:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa %xmm7, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm3, %xmm9
+; SSE4-NEXT: movdqa %xmm6, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm2, %xmm10
+; SSE4-NEXT: movdqa %xmm5, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm1, %xmm11
+; SSE4-NEXT: movdqa %xmm4, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm8, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm8
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm5, %xmm1
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm6, %xmm2
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm7, %xmm3
+; SSE4-NEXT: movapd %xmm8, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test153:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test153:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm3, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm5
+; AVX2-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test153:
-; AVX512F: vpmaxsq
-}
-
-define void @test154(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp sle <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp slt <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test154(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test154:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT: movdqa %xmm3, %xmm7
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm9
+; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: pxor %xmm10, %xmm8
+; SSE2-NEXT: movdqa %xmm7, %xmm0
+; SSE2-NEXT: pxor %xmm10, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm12, %xmm8
+; SSE2-NEXT: pxor %xmm1, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm3, %xmm13
+; SSE2-NEXT: pxor %xmm10, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: pand %xmm15, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm13
+; SSE2-NEXT: movdqa %xmm5, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm2, %xmm14
+; SSE2-NEXT: pxor %xmm10, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT: por %xmm14, %xmm15
+; SSE2-NEXT: movdqa %xmm4, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm13, %xmm10
+; SSE2-NEXT: pxor %xmm1, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm14
+; SSE2-NEXT: movdqa %xmm15, %xmm11
+; SSE2-NEXT: pxor %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm4, %xmm14
+; SSE2-NEXT: pandn %xmm9, %xmm1
+; SSE2-NEXT: por %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm15
+; SSE2-NEXT: pandn %xmm2, %xmm11
+; SSE2-NEXT: por %xmm15, %xmm11
+; SSE2-NEXT: pandn %xmm6, %xmm13
+; SSE2-NEXT: pandn %xmm3, %xmm10
+; SSE2-NEXT: por %xmm13, %xmm10
+; SSE2-NEXT: pandn -{{[0-9]+}}(%rsp), %xmm12 # 16-byte Folded Reload
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm12, %xmm8
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm10, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test154:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa %xmm3, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm7, %xmm9
+; SSE4-NEXT: pcmpeqd %xmm12, %xmm12
+; SSE4-NEXT: pxor %xmm12, %xmm9
+; SSE4-NEXT: movdqa %xmm2, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm10
+; SSE4-NEXT: pxor %xmm12, %xmm10
+; SSE4-NEXT: movdqa %xmm1, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm5, %xmm11
+; SSE4-NEXT: pxor %xmm12, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm4, %xmm0
+; SSE4-NEXT: pxor %xmm12, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm8
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm5, %xmm1
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm6, %xmm2
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm7, %xmm3
+; SSE4-NEXT: movapd %xmm8, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test154:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
+; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm6
+; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm7
+; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test154:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm4
+; AVX2-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5
+; AVX2-NEXT: vpxor %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm6
+; AVX2-NEXT: vpxor %ymm5, %ymm6, %ymm5
+; AVX2-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test154:
-; AVX512F: vpmaxsq
-}
-
-define void @test155(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp sgt <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sle <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test155(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test155:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pxor %xmm11, %xmm8
+; SSE2-NEXT: movdqa %xmm3, %xmm9
+; SSE2-NEXT: pxor %xmm11, %xmm9
+; SSE2-NEXT: movdqa %xmm9, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm10[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm10[1,1,3,3]
+; SSE2-NEXT: por %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm9
+; SSE2-NEXT: pxor %xmm11, %xmm9
+; SSE2-NEXT: movdqa %xmm2, %xmm10
+; SSE2-NEXT: pxor %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm9, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm13, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm12[1,1,3,3]
+; SSE2-NEXT: por %xmm10, %xmm9
+; SSE2-NEXT: movdqa %xmm5, %xmm10
+; SSE2-NEXT: pxor %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm1, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm12
+; SSE2-NEXT: movdqa %xmm12, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm10, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm10, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm12[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm12, %xmm10
+; SSE2-NEXT: movdqa %xmm4, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm12
+; SSE2-NEXT: pxor %xmm0, %xmm11
+; SSE2-NEXT: movdqa %xmm11, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm12, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm12, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm12, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm11
+; SSE2-NEXT: por %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm10, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm5, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm6, %xmm9
+; SSE2-NEXT: pand %xmm8, %xmm7
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: por %xmm7, %xmm8
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test155:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa %xmm3, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm7, %xmm9
+; SSE4-NEXT: movdqa %xmm2, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm10
+; SSE4-NEXT: movdqa %xmm1, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm5, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm4, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm8
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm5, %xmm1
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm6, %xmm2
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm7, %xmm3
+; SSE4-NEXT: movapd %xmm8, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test155:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm6, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test155:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm5
+; AVX2-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test155:
-; AVX512F: vpminsq
-}
-
-define void @test156(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp sge <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sgt <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test156(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test156:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm7, %xmm11
+; SSE2-NEXT: movdqa %xmm11, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT: movdqa %xmm3, %xmm7
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm9
+; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pxor %xmm10, %xmm8
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: pxor %xmm10, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm12, %xmm8
+; SSE2-NEXT: pxor %xmm1, %xmm8
+; SSE2-NEXT: movdqa %xmm3, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm6, %xmm13
+; SSE2-NEXT: pxor %xmm10, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: pand %xmm15, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm13
+; SSE2-NEXT: movdqa %xmm2, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm5, %xmm14
+; SSE2-NEXT: pxor %xmm10, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT: por %xmm14, %xmm15
+; SSE2-NEXT: movdqa %xmm9, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: pxor %xmm4, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm13, %xmm10
+; SSE2-NEXT: pxor %xmm1, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm14
+; SSE2-NEXT: movdqa %xmm15, %xmm11
+; SSE2-NEXT: pxor %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm4, %xmm14
+; SSE2-NEXT: pandn %xmm9, %xmm1
+; SSE2-NEXT: por %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm15
+; SSE2-NEXT: pandn %xmm2, %xmm11
+; SSE2-NEXT: por %xmm15, %xmm11
+; SSE2-NEXT: pandn %xmm6, %xmm13
+; SSE2-NEXT: pandn %xmm3, %xmm10
+; SSE2-NEXT: por %xmm13, %xmm10
+; SSE2-NEXT: pandn -{{[0-9]+}}(%rsp), %xmm12 # 16-byte Folded Reload
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm12, %xmm8
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm10, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test156:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa %xmm7, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm3, %xmm9
+; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT: pxor %xmm0, %xmm9
+; SSE4-NEXT: movdqa %xmm6, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm2, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: movdqa %xmm5, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm1, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: movdqa %xmm4, %xmm12
+; SSE4-NEXT: pcmpgtq %xmm8, %xmm12
+; SSE4-NEXT: pxor %xmm12, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm8
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm5, %xmm1
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm6, %xmm2
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm7, %xmm3
+; SSE4-NEXT: movapd %xmm8, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test156:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
+; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm6
+; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm7
+; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test156:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm3, %ymm4
+; AVX2-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5
+; AVX2-NEXT: vpxor %ymm5, %ymm4, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm6
+; AVX2-NEXT: vpxor %ymm5, %ymm6, %ymm5
+; AVX2-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test156:
-; AVX512F: vpminsq
-}
-
-define void @test157(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp ult <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp sge <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test157(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test157:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm8
+; SSE2-NEXT: pxor %xmm11, %xmm8
+; SSE2-NEXT: movdqa %xmm7, %xmm9
+; SSE2-NEXT: pxor %xmm11, %xmm9
+; SSE2-NEXT: movdqa %xmm9, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm10[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm10[1,1,3,3]
+; SSE2-NEXT: por %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm2, %xmm9
+; SSE2-NEXT: pxor %xmm11, %xmm9
+; SSE2-NEXT: movdqa %xmm6, %xmm10
+; SSE2-NEXT: pxor %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm9, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm13, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm12[1,1,3,3]
+; SSE2-NEXT: por %xmm10, %xmm9
+; SSE2-NEXT: movdqa %xmm1, %xmm10
+; SSE2-NEXT: pxor %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm5, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm12
+; SSE2-NEXT: movdqa %xmm12, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm10, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm10, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm12[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm12, %xmm10
+; SSE2-NEXT: movdqa %xmm0, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm12
+; SSE2-NEXT: pxor %xmm4, %xmm11
+; SSE2-NEXT: movdqa %xmm11, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm12, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm12, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm12, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm11
+; SSE2-NEXT: por %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm10, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm5, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm6, %xmm9
+; SSE2-NEXT: pand %xmm8, %xmm7
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: por %xmm7, %xmm8
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test157:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm3, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: movdqa %xmm7, %xmm9
+; SSE4-NEXT: pxor %xmm0, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm10, %xmm9
+; SSE4-NEXT: movdqa %xmm2, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: movdqa %xmm6, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT: movdqa %xmm1, %xmm12
+; SSE4-NEXT: pxor %xmm0, %xmm12
+; SSE4-NEXT: movdqa %xmm5, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm12, %xmm11
+; SSE4-NEXT: movdqa %xmm8, %xmm12
+; SSE4-NEXT: pxor %xmm0, %xmm12
+; SSE4-NEXT: pxor %xmm4, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm12, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm8
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm5, %xmm1
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm6, %xmm2
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm7, %xmm3
+; SSE4-NEXT: movapd %xmm8, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test157:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test157:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm6
+; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm6
+; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm4
+; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test157:
-; AVX512F: vpmaxuq
-}
-
-define void @test158(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp ule <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ult <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test158(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test158:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT: movdqa %xmm3, %xmm7
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm9
+; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm10, %xmm8
+; SSE2-NEXT: movdqa %xmm7, %xmm0
+; SSE2-NEXT: pxor %xmm10, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm12, %xmm8
+; SSE2-NEXT: pxor %xmm1, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm3, %xmm13
+; SSE2-NEXT: pxor %xmm10, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: pand %xmm15, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm13
+; SSE2-NEXT: movdqa %xmm5, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm2, %xmm14
+; SSE2-NEXT: pxor %xmm10, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT: por %xmm14, %xmm15
+; SSE2-NEXT: movdqa %xmm4, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: pxor %xmm9, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm13, %xmm10
+; SSE2-NEXT: pxor %xmm1, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm14
+; SSE2-NEXT: movdqa %xmm15, %xmm11
+; SSE2-NEXT: pxor %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm4, %xmm14
+; SSE2-NEXT: pandn %xmm9, %xmm1
+; SSE2-NEXT: por %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm15
+; SSE2-NEXT: pandn %xmm2, %xmm11
+; SSE2-NEXT: por %xmm15, %xmm11
+; SSE2-NEXT: pandn %xmm6, %xmm13
+; SSE2-NEXT: pandn %xmm3, %xmm10
+; SSE2-NEXT: por %xmm13, %xmm10
+; SSE2-NEXT: pandn -{{[0-9]+}}(%rsp), %xmm12 # 16-byte Folded Reload
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm12, %xmm8
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm10, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test158:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm7, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: movdqa %xmm3, %xmm9
+; SSE4-NEXT: pxor %xmm0, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm10, %xmm9
+; SSE4-NEXT: pcmpeqd %xmm12, %xmm12
+; SSE4-NEXT: pxor %xmm12, %xmm9
+; SSE4-NEXT: movdqa %xmm6, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: movdqa %xmm2, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT: pxor %xmm12, %xmm10
+; SSE4-NEXT: movdqa %xmm5, %xmm13
+; SSE4-NEXT: pxor %xmm0, %xmm13
+; SSE4-NEXT: movdqa %xmm1, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm13, %xmm11
+; SSE4-NEXT: pxor %xmm12, %xmm11
+; SSE4-NEXT: movdqa %xmm4, %xmm13
+; SSE4-NEXT: pxor %xmm0, %xmm13
+; SSE4-NEXT: pxor %xmm8, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm13, %xmm0
+; SSE4-NEXT: pxor %xmm12, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm8
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm5, %xmm1
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm6, %xmm2
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm7, %xmm3
+; SSE4-NEXT: movapd %xmm8, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test158:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8
+; AVX1-NEXT: vpxor %xmm8, %xmm4, %xmm4
+; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm6
+; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT: vpxor %xmm8, %xmm5, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test158:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm6
+; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT: vpcmpeqd %ymm6, %ymm6, %ymm6
+; AVX2-NEXT: vpxor %ymm6, %ymm5, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm7
+; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm7, %ymm4, %ymm4
+; AVX2-NEXT: vpxor %ymm6, %ymm4, %ymm4
+; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test158:
-; AVX512F: vpmaxuq
-}
-
-define void @test159(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp ugt <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ule <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test159(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test159:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pxor %xmm11, %xmm8
+; SSE2-NEXT: movdqa %xmm3, %xmm9
+; SSE2-NEXT: pxor %xmm11, %xmm9
+; SSE2-NEXT: movdqa %xmm9, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm10[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm10[1,1,3,3]
+; SSE2-NEXT: por %xmm9, %xmm8
+; SSE2-NEXT: movdqa %xmm6, %xmm9
+; SSE2-NEXT: pxor %xmm11, %xmm9
+; SSE2-NEXT: movdqa %xmm2, %xmm10
+; SSE2-NEXT: pxor %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm12
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm9, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm13, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm12[1,1,3,3]
+; SSE2-NEXT: por %xmm10, %xmm9
+; SSE2-NEXT: movdqa %xmm5, %xmm10
+; SSE2-NEXT: pxor %xmm11, %xmm10
+; SSE2-NEXT: movdqa %xmm1, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm12
+; SSE2-NEXT: movdqa %xmm12, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm10, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm10, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm12[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm12, %xmm10
+; SSE2-NEXT: movdqa %xmm4, %xmm12
+; SSE2-NEXT: pxor %xmm11, %xmm12
+; SSE2-NEXT: pxor %xmm0, %xmm11
+; SSE2-NEXT: movdqa %xmm11, %xmm13
+; SSE2-NEXT: pcmpgtd %xmm12, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm12, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT: pand %xmm14, %xmm12
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: por %xmm12, %xmm11
+; SSE2-NEXT: pand %xmm11, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm11
+; SSE2-NEXT: por %xmm4, %xmm11
+; SSE2-NEXT: pand %xmm10, %xmm5
+; SSE2-NEXT: pandn %xmm1, %xmm10
+; SSE2-NEXT: por %xmm5, %xmm10
+; SSE2-NEXT: pand %xmm9, %xmm6
+; SSE2-NEXT: pandn %xmm2, %xmm9
+; SSE2-NEXT: por %xmm6, %xmm9
+; SSE2-NEXT: pand %xmm8, %xmm7
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: por %xmm7, %xmm8
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm10, %xmm1
+; SSE2-NEXT: movdqa %xmm9, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test159:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm7, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: movdqa %xmm3, %xmm9
+; SSE4-NEXT: pxor %xmm0, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm10, %xmm9
+; SSE4-NEXT: movdqa %xmm6, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: movdqa %xmm2, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT: movdqa %xmm5, %xmm12
+; SSE4-NEXT: pxor %xmm0, %xmm12
+; SSE4-NEXT: movdqa %xmm1, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm12, %xmm11
+; SSE4-NEXT: movdqa %xmm4, %xmm12
+; SSE4-NEXT: pxor %xmm0, %xmm12
+; SSE4-NEXT: pxor %xmm8, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm12, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm8
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm5, %xmm1
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm6, %xmm2
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm7, %xmm3
+; SSE4-NEXT: movapd %xmm8, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test159:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test159:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm6
+; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm6
+; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm6, %ymm4, %ymm4
+; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test159:
-; AVX512F: vpminuq
-}
-
-define void @test160(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <8 x i64>*
- %ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
- %cmp = icmp uge <8 x i64> %load.a, %load.b
- %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
- store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp ugt <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+ ret <8 x i64> %sel
+}
+
+define <8 x i64> @test160(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test160:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm7, %xmm11
+; SSE2-NEXT: movdqa %xmm11, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT: movdqa %xmm3, %xmm7
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm9
+; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm7, %xmm8
+; SSE2-NEXT: pxor %xmm10, %xmm8
+; SSE2-NEXT: movdqa %xmm11, %xmm0
+; SSE2-NEXT: pxor %xmm10, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm11
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT: pand %xmm12, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm12
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm12, %xmm8
+; SSE2-NEXT: pxor %xmm1, %xmm8
+; SSE2-NEXT: movdqa %xmm3, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm6, %xmm13
+; SSE2-NEXT: pxor %xmm10, %xmm13
+; SSE2-NEXT: movdqa %xmm13, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT: pand %xmm15, %xmm11
+; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm11, %xmm13
+; SSE2-NEXT: movdqa %xmm2, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: movdqa %xmm5, %xmm14
+; SSE2-NEXT: pxor %xmm10, %xmm14
+; SSE2-NEXT: movdqa %xmm14, %xmm15
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm14
+; SSE2-NEXT: pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT: por %xmm14, %xmm15
+; SSE2-NEXT: movdqa %xmm9, %xmm11
+; SSE2-NEXT: pxor %xmm10, %xmm11
+; SSE2-NEXT: pxor %xmm4, %xmm10
+; SSE2-NEXT: movdqa %xmm10, %xmm14
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT: pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm0
+; SSE2-NEXT: movdqa %xmm13, %xmm10
+; SSE2-NEXT: pxor %xmm1, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT: por %xmm0, %xmm14
+; SSE2-NEXT: movdqa %xmm15, %xmm11
+; SSE2-NEXT: pxor %xmm1, %xmm11
+; SSE2-NEXT: pxor %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm4, %xmm14
+; SSE2-NEXT: pandn %xmm9, %xmm1
+; SSE2-NEXT: por %xmm14, %xmm1
+; SSE2-NEXT: pandn %xmm5, %xmm15
+; SSE2-NEXT: pandn %xmm2, %xmm11
+; SSE2-NEXT: por %xmm15, %xmm11
+; SSE2-NEXT: pandn %xmm6, %xmm13
+; SSE2-NEXT: pandn %xmm3, %xmm10
+; SSE2-NEXT: por %xmm13, %xmm10
+; SSE2-NEXT: pandn -{{[0-9]+}}(%rsp), %xmm12 # 16-byte Folded Reload
+; SSE2-NEXT: pandn %xmm7, %xmm8
+; SSE2-NEXT: por %xmm12, %xmm8
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: movdqa %xmm11, %xmm1
+; SSE2-NEXT: movdqa %xmm10, %xmm2
+; SSE2-NEXT: movdqa %xmm8, %xmm3
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test160:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm8
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm3, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: movdqa %xmm7, %xmm9
+; SSE4-NEXT: pxor %xmm0, %xmm9
+; SSE4-NEXT: pcmpgtq %xmm10, %xmm9
+; SSE4-NEXT: pcmpeqd %xmm12, %xmm12
+; SSE4-NEXT: pxor %xmm12, %xmm9
+; SSE4-NEXT: movdqa %xmm2, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: movdqa %xmm6, %xmm10
+; SSE4-NEXT: pxor %xmm0, %xmm10
+; SSE4-NEXT: pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT: pxor %xmm12, %xmm10
+; SSE4-NEXT: movdqa %xmm1, %xmm13
+; SSE4-NEXT: pxor %xmm0, %xmm13
+; SSE4-NEXT: movdqa %xmm5, %xmm11
+; SSE4-NEXT: pxor %xmm0, %xmm11
+; SSE4-NEXT: pcmpgtq %xmm13, %xmm11
+; SSE4-NEXT: pxor %xmm12, %xmm11
+; SSE4-NEXT: movdqa %xmm8, %xmm13
+; SSE4-NEXT: pxor %xmm0, %xmm13
+; SSE4-NEXT: pxor %xmm4, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm13, %xmm0
+; SSE4-NEXT: pxor %xmm12, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm8
+; SSE4-NEXT: movdqa %xmm11, %xmm0
+; SSE4-NEXT: blendvpd %xmm5, %xmm1
+; SSE4-NEXT: movdqa %xmm10, %xmm0
+; SSE4-NEXT: blendvpd %xmm6, %xmm2
+; SSE4-NEXT: movdqa %xmm9, %xmm0
+; SSE4-NEXT: blendvpd %xmm7, %xmm3
+; SSE4-NEXT: movapd %xmm8, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test160:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8
+; AVX1-NEXT: vpxor %xmm8, %xmm4, %xmm4
+; AVX1-NEXT: vxorps %xmm5, %xmm1, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm3, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm6
+; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT: vxorps %xmm5, %xmm0, %xmm7
+; AVX1-NEXT: vxorps %xmm5, %xmm2, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT: vpxor %xmm8, %xmm5, %xmm5
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT: vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test160:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm6
+; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT: vpcmpeqd %ymm6, %ymm6, %ymm6
+; AVX2-NEXT: vpxor %ymm6, %ymm5, %ymm5
+; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm7
+; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm4
+; AVX2-NEXT: vpcmpgtq %ymm7, %ymm4, %ymm4
+; AVX2-NEXT: vpxor %ymm6, %ymm4, %ymm4
+; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vblendvpd %ymm5, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: retq
+;
; AVX512F-LABEL: test160:
-; AVX512F: vpminuq
-}
-
-define void @test161(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp slt <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test161:
-; AVX512VL: vpminsq
-}
-
-define void @test162(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp sle <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test162:
-; AVX512VL: vpminsq
-}
-
-define void @test163(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp sgt <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test163:
-; AVX512VL: vpmaxsq
-}
-
-define void @test164(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp sge <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test164:
-; AVX512VL: vpmaxsq
-}
-
-define void @test165(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp ult <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test165:
-; AVX512VL: vpminuq
-}
-
-define void @test166(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp ule <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test166:
-; AVX512VL: vpminuq
-}
-
-define void @test167(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp ugt <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test167:
-; AVX512VL: vpmaxuq
-}
-
-define void @test168(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp uge <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test168:
-; AVX512VL: vpmaxuq
-}
-
-define void @test169(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp slt <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test169:
-; AVX512VL: vpmaxsq
-}
-
-define void @test170(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp sle <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test170:
-; AVX512VL: vpmaxsq
-}
-
-define void @test171(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp sgt <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test171:
-; AVX512VL: vpminsq
-}
-
-define void @test172(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp sge <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test172:
-; AVX512VL: vpminsq
-}
-
-define void @test173(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp ult <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test173:
-; AVX512VL: vpmaxuq
-}
-
-define void @test174(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp ule <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test174:
-; AVX512VL: vpmaxuq
-}
-
-define void @test175(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp ugt <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test175:
-; AVX512VL: vpminuq
-}
-
-define void @test176(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <4 x i64>*
- %ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
- %cmp = icmp uge <4 x i64> %load.a, %load.b
- %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
- store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test176:
-; AVX512VL: vpminuq
-}
-
-define void @test177(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp slt <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test177:
-; AVX512VL: vpminsq
-}
-
-define void @test178(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp sle <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test178:
-; AVX512VL: vpminsq
-}
-
-define void @test179(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp sgt <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test179:
-; AVX512VL: vpmaxsq
-}
-
-define void @test180(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp sge <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test180:
-; AVX512VL: vpmaxsq
-}
-
-define void @test181(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp ult <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test181:
-; AVX512VL: vpminuq
-}
-
-define void @test182(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp ule <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test182:
-; AVX512VL: vpminuq
-}
-
-define void @test183(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp ugt <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test183:
-; AVX512VL: vpmaxuq
-}
-
-define void @test184(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp uge <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test184:
-; AVX512VL: vpmaxuq
-}
-
-define void @test185(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp slt <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test185:
-; AVX512VL: vpmaxsq
-}
-
-define void @test186(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp sle <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test186:
-; AVX512VL: vpmaxsq
-}
-
-define void @test187(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp sgt <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test187:
-; AVX512VL: vpminsq
-}
-
-define void @test188(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp sge <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test188:
-; AVX512VL: vpminsq
-}
-
-define void @test189(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp ult <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test189:
-; AVX512VL: vpmaxuq
-}
-
-define void @test190(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp ule <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test190:
-; AVX512VL: vpmaxuq
-}
-
-define void @test191(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp ugt <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test191:
-; AVX512VL: vpminuq
-}
-
-define void @test192(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
- %ptr.a = bitcast i32* %gep.a to <2 x i64>*
- %ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
- %cmp = icmp uge <2 x i64> %load.a, %load.b
- %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
- store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
- %index.next = add i64 %index, 8
- %loop = icmp eq i64 %index.next, 16384
- br i1 %loop, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
- ret void
-
-; AVX512VL-LABEL: test192:
-; AVX512VL: vpminuq
+; AVX512F: # BB#0: # %entry
+; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT: retq
+entry:
+ %cmp = icmp uge <8 x i64> %a, %b
+ %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+ ret <8 x i64> %sel
+}
+
+define <4 x i64> @test161(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test161:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: movdqa %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm2, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test161:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa %xmm3, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm1, %xmm5
+; SSE4-NEXT: movdqa %xmm2, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm4, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm2
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm3
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: movapd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test161:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test161:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test161:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp slt <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test162(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test162:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm8
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm8, %xmm9
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm7
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm10, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm8
+; SSE2-NEXT: pandn %xmm3, %xmm9
+; SSE2-NEXT: por %xmm8, %xmm9
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test162:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa %xmm1, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm3, %xmm5
+; SSE4-NEXT: pcmpeqd %xmm6, %xmm6
+; SSE4-NEXT: pxor %xmm6, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm2, %xmm0
+; SSE4-NEXT: pxor %xmm6, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm2
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm3
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: movapd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test162:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4
+; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test162:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test162:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sle <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test163(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test163:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: movdqa %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm0, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test163:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa %xmm1, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm3, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm2, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm2
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm3
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: movapd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test163:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test163:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test163:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sgt <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test164(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test164:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm8
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm8, %xmm9
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm0, %xmm6
+; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: pxor %xmm2, %xmm7
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm10, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm8
+; SSE2-NEXT: pandn %xmm3, %xmm9
+; SSE2-NEXT: por %xmm8, %xmm9
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test164:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa %xmm3, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm1, %xmm5
+; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT: pxor %xmm0, %xmm5
+; SSE4-NEXT: movdqa %xmm2, %xmm6
+; SSE4-NEXT: pcmpgtq %xmm4, %xmm6
+; SSE4-NEXT: pxor %xmm6, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm2
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm3
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: movapd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test164:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
+; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test164:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test164:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sge <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test165(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test165:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: movdqa %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm2, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test165:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm1, %xmm6
+; SSE4-NEXT: pxor %xmm0, %xmm6
+; SSE4-NEXT: movdqa %xmm3, %xmm5
+; SSE4-NEXT: pxor %xmm0, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT: movdqa %xmm4, %xmm6
+; SSE4-NEXT: pxor %xmm0, %xmm6
+; SSE4-NEXT: pxor %xmm2, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm2
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm3
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: movapd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test165:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test165:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test165:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ult <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test166(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test166:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm8
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm8, %xmm9
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm7
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm10, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm8
+; SSE2-NEXT: pandn %xmm3, %xmm9
+; SSE2-NEXT: por %xmm8, %xmm9
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test166:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm3, %xmm6
+; SSE4-NEXT: pxor %xmm0, %xmm6
+; SSE4-NEXT: movdqa %xmm1, %xmm5
+; SSE4-NEXT: pxor %xmm0, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT: pcmpeqd %xmm6, %xmm6
+; SSE4-NEXT: pxor %xmm6, %xmm5
+; SSE4-NEXT: movdqa %xmm2, %xmm7
+; SSE4-NEXT: pxor %xmm0, %xmm7
+; SSE4-NEXT: pxor %xmm4, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm7, %xmm0
+; SSE4-NEXT: pxor %xmm6, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm2
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm3
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: movapd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test166:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm5
+; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test166:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test166:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ule <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test167(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test167:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: movdqa %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm2, %xmm5
+; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm0, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pandn %xmm3, %xmm6
+; SSE2-NEXT: por %xmm6, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test167:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm3, %xmm6
+; SSE4-NEXT: pxor %xmm0, %xmm6
+; SSE4-NEXT: movdqa %xmm1, %xmm5
+; SSE4-NEXT: pxor %xmm0, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT: movdqa %xmm2, %xmm6
+; SSE4-NEXT: pxor %xmm0, %xmm6
+; SSE4-NEXT: pxor %xmm4, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm2
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm3
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: movapd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test167:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test167:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test167:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ugt <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test168(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test168:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm8
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm8, %xmm9
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm0, %xmm6
+; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: pxor %xmm2, %xmm7
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm10, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: pandn %xmm2, %xmm4
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm1, %xmm8
+; SSE2-NEXT: pandn %xmm3, %xmm9
+; SSE2-NEXT: por %xmm8, %xmm9
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test168:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm1, %xmm6
+; SSE4-NEXT: pxor %xmm0, %xmm6
+; SSE4-NEXT: movdqa %xmm3, %xmm5
+; SSE4-NEXT: pxor %xmm0, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT: pcmpeqd %xmm6, %xmm6
+; SSE4-NEXT: pxor %xmm6, %xmm5
+; SSE4-NEXT: movdqa %xmm4, %xmm7
+; SSE4-NEXT: pxor %xmm0, %xmm7
+; SSE4-NEXT: pxor %xmm2, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm7, %xmm0
+; SSE4-NEXT: pxor %xmm6, %xmm0
+; SSE4-NEXT: blendvpd %xmm4, %xmm2
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm3
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: movapd %xmm3, %xmm1
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test168:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm5
+; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test168:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test168:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp uge <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test169(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test169:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: pxor %xmm2, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: por %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test169:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa %xmm3, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm1, %xmm5
+; SSE4-NEXT: movdqa %xmm2, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm4, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm4
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm1
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test169:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test169:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test169:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp slt <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test170(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test170:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm8
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm8, %xmm9
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm7
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm10, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: pandn %xmm1, %xmm9
+; SSE2-NEXT: por %xmm8, %xmm9
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test170:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa %xmm1, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm3, %xmm5
+; SSE4-NEXT: pcmpeqd %xmm6, %xmm6
+; SSE4-NEXT: pxor %xmm6, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm2, %xmm0
+; SSE4-NEXT: pxor %xmm6, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm4
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm1
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test170:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4
+; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test170:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test170:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sle <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test171(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test171:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: por %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test171:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa %xmm1, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm3, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm2, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm4
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm1
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test171:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test171:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test171:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sgt <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test172(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test172:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm8
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm8, %xmm9
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm0, %xmm6
+; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: pxor %xmm2, %xmm7
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm10, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: pandn %xmm1, %xmm9
+; SSE2-NEXT: por %xmm8, %xmm9
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test172:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa %xmm3, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm1, %xmm5
+; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT: pxor %xmm0, %xmm5
+; SSE4-NEXT: movdqa %xmm2, %xmm6
+; SSE4-NEXT: pcmpgtq %xmm4, %xmm6
+; SSE4-NEXT: pxor %xmm6, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm4
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm1
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test172:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm4
+; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test172:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test172:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sge <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test173(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test173:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: pxor %xmm2, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: por %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test173:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm1, %xmm6
+; SSE4-NEXT: pxor %xmm0, %xmm6
+; SSE4-NEXT: movdqa %xmm3, %xmm5
+; SSE4-NEXT: pxor %xmm0, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT: movdqa %xmm4, %xmm6
+; SSE4-NEXT: pxor %xmm0, %xmm6
+; SSE4-NEXT: pxor %xmm2, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm4
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm1
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test173:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test173:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test173:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ult <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test174(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test174:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pxor %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm8
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm8, %xmm9
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm7
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm10, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: pandn %xmm1, %xmm9
+; SSE2-NEXT: por %xmm8, %xmm9
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test174:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm3, %xmm6
+; SSE4-NEXT: pxor %xmm0, %xmm6
+; SSE4-NEXT: movdqa %xmm1, %xmm5
+; SSE4-NEXT: pxor %xmm0, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT: pcmpeqd %xmm6, %xmm6
+; SSE4-NEXT: pxor %xmm6, %xmm5
+; SSE4-NEXT: movdqa %xmm2, %xmm7
+; SSE4-NEXT: pxor %xmm0, %xmm7
+; SSE4-NEXT: pxor %xmm4, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm7, %xmm0
+; SSE4-NEXT: pxor %xmm6, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm4
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm1
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test174:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm5
+; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test174:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test174:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ule <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test175(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test175:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: movdqa %xmm1, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: movdqa %xmm6, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm4
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pxor %xmm5, %xmm6
+; SSE2-NEXT: pxor %xmm0, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm5
+; SSE2-NEXT: por %xmm2, %xmm5
+; SSE2-NEXT: pand %xmm4, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm4
+; SSE2-NEXT: por %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm4, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test175:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm3, %xmm6
+; SSE4-NEXT: pxor %xmm0, %xmm6
+; SSE4-NEXT: movdqa %xmm1, %xmm5
+; SSE4-NEXT: pxor %xmm0, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT: movdqa %xmm2, %xmm6
+; SSE4-NEXT: pxor %xmm0, %xmm6
+; SSE4-NEXT: pxor %xmm4, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm4
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm1
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test175:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test175:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test175:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ugt <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+ ret <4 x i64> %sel
+}
+
+define <4 x i64> @test176(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test176:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm7, %xmm5
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm8, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm4, %xmm8
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: movdqa %xmm8, %xmm9
+; SSE2-NEXT: pxor %xmm4, %xmm9
+; SSE2-NEXT: movdqa %xmm0, %xmm6
+; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: pxor %xmm2, %xmm7
+; SSE2-NEXT: movdqa %xmm7, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm10, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm2, %xmm5
+; SSE2-NEXT: pandn %xmm0, %xmm4
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: pandn %xmm3, %xmm8
+; SSE2-NEXT: pandn %xmm1, %xmm9
+; SSE2-NEXT: por %xmm8, %xmm9
+; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: movdqa %xmm9, %xmm1
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test176:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm4
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm1, %xmm6
+; SSE4-NEXT: pxor %xmm0, %xmm6
+; SSE4-NEXT: movdqa %xmm3, %xmm5
+; SSE4-NEXT: pxor %xmm0, %xmm5
+; SSE4-NEXT: pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT: pcmpeqd %xmm6, %xmm6
+; SSE4-NEXT: pxor %xmm6, %xmm5
+; SSE4-NEXT: movdqa %xmm4, %xmm7
+; SSE4-NEXT: pxor %xmm0, %xmm7
+; SSE4-NEXT: pxor %xmm2, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm7, %xmm0
+; SSE4-NEXT: pxor %xmm6, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm4
+; SSE4-NEXT: movdqa %xmm5, %xmm0
+; SSE4-NEXT: blendvpd %xmm3, %xmm1
+; SSE4-NEXT: movapd %xmm4, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test176:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm5
+; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test176:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test176:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp uge <4 x i64> %a, %b
+ %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+ ret <4 x i64> %sel
+}
+
+define <2 x i64> @test177(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test177:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test177:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: movdqa %xmm1, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm2, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm1
+; SSE4-NEXT: movapd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test177:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test177:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test177:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp slt <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test178(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test178:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test178:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE4-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE4-NEXT: pxor %xmm3, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm1
+; SSE4-NEXT: movapd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test178:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test178:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test178:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sle <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test179(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test179:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test179:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm1
+; SSE4-NEXT: movapd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test179:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test179:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test179:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sgt <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test180(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test180:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test180:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: movdqa %xmm1, %xmm3
+; SSE4-NEXT: pcmpgtq %xmm2, %xmm3
+; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT: pxor %xmm3, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm1
+; SSE4-NEXT: movapd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test180:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test180:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test180:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sge <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test181(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test181:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test181:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm2, %xmm3
+; SSE4-NEXT: pxor %xmm0, %xmm3
+; SSE4-NEXT: pxor %xmm1, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm3, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm1
+; SSE4-NEXT: movapd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test181:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test181:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test181:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ult <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test182(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test182:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test182:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm1, %xmm0
+; SSE4-NEXT: pxor %xmm3, %xmm0
+; SSE4-NEXT: pxor %xmm2, %xmm3
+; SSE4-NEXT: pcmpgtq %xmm0, %xmm3
+; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT: pxor %xmm3, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm1
+; SSE4-NEXT: movapd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test182:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test182:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test182:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ule <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test183(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test183:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: por %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test183:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm1, %xmm3
+; SSE4-NEXT: pxor %xmm0, %xmm3
+; SSE4-NEXT: pxor %xmm2, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm3, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm1
+; SSE4-NEXT: movapd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test183:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test183:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test183:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ugt <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test184(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test184:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test184:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: pxor %xmm3, %xmm0
+; SSE4-NEXT: pxor %xmm1, %xmm3
+; SSE4-NEXT: pcmpgtq %xmm0, %xmm3
+; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT: pxor %xmm3, %xmm0
+; SSE4-NEXT: blendvpd %xmm2, %xmm1
+; SSE4-NEXT: movapd %xmm1, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test184:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test184:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test184:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp uge <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test185(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test185:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test185:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: movdqa %xmm1, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm2, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm2
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test185:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test185:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test185:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp slt <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test186(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test186:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test186:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE4-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE4-NEXT: pxor %xmm3, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm2
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test186:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test186:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test186:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sle <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test187(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test187:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test187:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: pcmpgtq %xmm1, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm2
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test187:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test187:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test187:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sgt <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test188(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test188:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test188:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: movdqa %xmm1, %xmm3
+; SSE4-NEXT: pcmpgtq %xmm2, %xmm3
+; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT: pxor %xmm3, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm2
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test188:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test188:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test188:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp sge <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test189(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test189:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test189:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm2, %xmm3
+; SSE4-NEXT: pxor %xmm0, %xmm3
+; SSE4-NEXT: pxor %xmm1, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm3, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm2
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test189:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test189:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test189:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ult <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test190(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test190:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test190:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm1, %xmm0
+; SSE4-NEXT: pxor %xmm3, %xmm0
+; SSE4-NEXT: pxor %xmm2, %xmm3
+; SSE4-NEXT: pcmpgtq %xmm0, %xmm3
+; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT: pxor %xmm3, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm2
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test190:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test190:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test190:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ule <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test191(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test191:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test191:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: movdqa %xmm1, %xmm3
+; SSE4-NEXT: pxor %xmm0, %xmm3
+; SSE4-NEXT: pxor %xmm2, %xmm0
+; SSE4-NEXT: pcmpgtq %xmm3, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm2
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test191:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test191:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test191:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp ugt <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+ ret <2 x i64> %sel
+}
+
+define <2 x i64> @test192(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test192:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT: por %xmm2, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pandn %xmm1, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE4-LABEL: test192:
+; SSE4: # BB#0: # %entry
+; SSE4-NEXT: movdqa %xmm0, %xmm2
+; SSE4-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT: pxor %xmm3, %xmm0
+; SSE4-NEXT: pxor %xmm1, %xmm3
+; SSE4-NEXT: pcmpgtq %xmm0, %xmm3
+; SSE4-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT: pxor %xmm3, %xmm0
+; SSE4-NEXT: blendvpd %xmm1, %xmm2
+; SSE4-NEXT: movapd %xmm2, %xmm0
+; SSE4-NEXT: retq
+;
+; AVX1-LABEL: test192:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test192:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
+; AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512BW-LABEL: test192:
+; AVX512BW: # BB#0: # %entry
+; AVX512BW-NEXT: vpminuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: retq
+entry:
+ %cmp = icmp uge <2 x i64> %a, %b
+ %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+ ret <2 x i64> %sel
}
diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll
index 8c8092888834..359ea7eb3ee5 100644
--- a/test/CodeGen/X86/vselect.ll
+++ b/test/CodeGen/X86/vselect.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2,-sse4.1 < %s | FileCheck %s
; Verify that we don't emit packed vector shifts instructions if the
diff --git a/test/CodeGen/X86/vshift_scalar.ll b/test/CodeGen/X86/vshift_scalar.ll
index 9dd8478caaed..87eec3f9e97b 100644
--- a/test/CodeGen/X86/vshift_scalar.ll
+++ b/test/CodeGen/X86/vshift_scalar.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s
+; REQUIRES: default_triple
; Legalization test that requires scalarizing a vector.
diff --git a/test/CodeGen/X86/wide-integer-cmp.ll b/test/CodeGen/X86/wide-integer-cmp.ll
new file mode 100644
index 000000000000..c45a0541e6a7
--- /dev/null
+++ b/test/CodeGen/X86/wide-integer-cmp.ll
@@ -0,0 +1,130 @@
+; RUN: llc -mtriple=i686-linux-gnu %s -o - | FileCheck %s
+
+
+define i32 @branch_eq(i64 %a, i64 %b) {
+entry:
+ %cmp = icmp eq i64 %a, %b
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 2
+
+; CHECK-LABEL: branch_eq:
+; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]]
+; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]]
+; CHECK: xorl 16(%esp), [[LHSHi]]
+; CHECK: xorl 12(%esp), [[LHSLo]]
+; CHECK: orl [[LHSHi]], [[LHSLo]]
+; CHECK: jne [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
+
+define i32 @branch_slt(i64 %a, i64 %b) {
+entry:
+ %cmp = icmp slt i64 %a, %b
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 2
+
+; CHECK-LABEL: branch_slt:
+; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]]
+; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]]
+; CHECK: cmpl 12(%esp), [[LHSLo]]
+; CHECK: sbbl 16(%esp), [[LHSHi]]
+; CHECK: jge [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
+
+define i32 @branch_ule(i64 %a, i64 %b) {
+entry:
+ %cmp = icmp ule i64 %a, %b
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 2
+
+; CHECK-LABEL: branch_ule:
+; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]]
+; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]]
+; CHECK: cmpl 4(%esp), [[RHSLo]]
+; CHECK: sbbl 8(%esp), [[RHSHi]]
+; CHECK: jb [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
+
+define i32 @set_gt(i64 %a, i64 %b) {
+entry:
+ %cmp = icmp sgt i64 %a, %b
+ %res = select i1 %cmp, i32 1, i32 0
+ ret i32 %res
+
+; CHECK-LABEL: set_gt:
+; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]]
+; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]]
+; CHECK: cmpl 4(%esp), [[RHSLo]]
+; CHECK: sbbl 8(%esp), [[RHSHi]]
+; CHECK: setl %al
+; CHECK: retl
+}
+
+define i32 @test_wide(i128 %a, i128 %b) {
+entry:
+ %cmp = icmp slt i128 %a, %b
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 2
+
+; CHECK-LABEL: test_wide:
+; CHECK: cmpl 24(%esp)
+; CHECK: sbbl 28(%esp)
+; CHECK: sbbl 32(%esp)
+; CHECK: sbbl 36(%esp)
+; CHECK: jge [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
+
+define i32 @test_carry_false(i64 %a, i64 %b) {
+entry:
+ %x = and i64 %a, -4294967296 ;0xffffffff00000000
+ %y = and i64 %b, -4294967296
+ %cmp = icmp slt i64 %x, %y
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 2
+
+; The comparison of the low bits will be folded to a CARRY_FALSE node. Make
+; sure the code can handle that.
+; CHECK-LABEL: carry_false:
+; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]]
+; CHECK: cmpl 16(%esp), [[LHSHi]]
+; CHECK: jge [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 6f1bd7541231..fad1fa32559a 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -191,9 +191,7 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
; CHECK-NEXT: movd %[[CONSTANT1]], %e[[R1:[abcd]]]x
; CHECK-NEXT: movw %[[R1]]x, (%[[PTR1:.*]])
; CHECK-NEXT: movb $1, 2(%[[PTR1]])
-; CHECK-NEXT: movl (%[[PTR0]]), [[TMP1:%e[abcd]+x]]
-; CHECK-NEXT: movl [[TMP1]], [[TMP2:.*]]
-; CHECK-NEXT: pmovzxbd [[TMP2]], %[[X0:xmm[0-9]+]]
+; CHECK-NEXT: pmovzxbd (%[[PTR0]]), %[[X0:xmm[0-9]+]]
; CHECK-NEXT: movdqa %[[X0]], %[[X1:xmm[0-9]+]]
; CHECK-NEXT: psrld $1, %[[X1]]
; CHECK-NEXT: pblendw $192, %[[X0]], %[[X1]]
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 302805213d06..66ba6350c8a8 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/win-catchpad-csrs.ll b/test/CodeGen/X86/win-catchpad-csrs.ll
new file mode 100644
index 000000000000..327ee45b4326
--- /dev/null
+++ b/test/CodeGen/X86/win-catchpad-csrs.ll
@@ -0,0 +1,268 @@
+; RUN: llc -verify-machineinstrs -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i8*, i32, i32, i32, i32, i8* }
+%eh.CatchableTypeArray.1 = type { i32, [1 x %eh.CatchableType*] }
+%eh.ThrowInfo = type { i32, i8*, i8*, i8* }
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+
+declare i32 @getint()
+declare void @useints(...)
+declare void @f(i32 %p)
+declare i32 @__CxxFrameHandler3(...)
+
+define i32 @try_catch_catch() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %a = call i32 @getint()
+ %b = call i32 @getint()
+ %c = call i32 @getint()
+ %d = call i32 @getint()
+ call void (...) @useints(i32 %a, i32 %b, i32 %c, i32 %d)
+ invoke void @f(i32 1)
+ to label %try.cont unwind label %catch.dispatch
+
+try.cont:
+ ret i32 0
+
+catch.dispatch:
+ %cs = catchswitch within none [label %handler1] unwind to caller
+
+handler1:
+ %h1 = catchpad within %cs [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+ call void @f(i32 2) [ "funclet"(token %h1) ]
+ catchret from %h1 to label %try.cont
+}
+
+; X86-LABEL: _try_catch_catch:
+; X86: pushl %ebp
+; X86: movl %esp, %ebp
+; X86: pushl %ebx
+; X86: pushl %edi
+; X86: pushl %esi
+; X86: subl ${{[0-9]+}}, %esp
+; X86: calll _getint
+; X86: calll _getint
+; X86: calll _getint
+; X86: calll _getint
+; X86: calll _useints
+; X86: movl $0, -{{[0-9]+}}(%ebp)
+; X86: movl $1, (%esp)
+; X86: calll _f
+; X86: [[contbb:LBB0_[0-9]+]]: # %try.cont
+; X86: popl %esi
+; X86: popl %edi
+; X86: popl %ebx
+; X86: popl %ebp
+; X86: retl
+
+; X86: [[restorebb:LBB0_[0-9]+]]:
+; X86: addl $12, %ebp
+; X86: jmp [[contbb]]
+
+; X86: "?catch$[[catch1bb:[0-9]+]]@?0?try_catch_catch@4HA":
+; X86: LBB0_[[catch1bb]]: # %handler1{{$}}
+; X86: pushl %ebp
+; X86-NOT: pushl
+; X86: subl $16, %esp
+; X86: addl $12, %ebp
+; X86: movl $1, -{{[0-9]+}}(%ebp)
+; X86: movl $2, (%esp)
+; X86: calll _f
+; X86: movl $[[restorebb]], %eax
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+
+; X86: L__ehtable$try_catch_catch:
+; X86: $handlerMap$0$try_catch_catch:
+; X86: .long 0
+; X86: .long "??_R0H@8"
+; X86: .long 0
+; X86: .long "?catch$[[catch1bb]]@?0?try_catch_catch@4HA"
+
+; X64-LABEL: try_catch_catch:
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: pushq %rsi
+; X64: .seh_pushreg 6
+; X64: pushq %rdi
+; X64: .seh_pushreg 7
+; X64: pushq %rbx
+; X64: .seh_pushreg 3
+; X64: subq $40, %rsp
+; X64: .seh_stackalloc 40
+; X64: leaq 32(%rsp), %rbp
+; X64: .seh_setframe 5, 32
+; X64: .seh_endprologue
+; X64: movq $-2, (%rbp)
+; X64: callq getint
+; X64: callq getint
+; X64: callq getint
+; X64: callq getint
+; X64: callq useints
+; X64: movl $1, %ecx
+; X64: callq f
+; X64: [[contbb:\.LBB0_[0-9]+]]: # Block address taken
+; X64-NEXT: # %try.cont
+; X64: addq $40, %rsp
+; X64: popq %rbp
+; X64: retq
+
+; X64: "?catch$[[catch1bb:[0-9]+]]@?0?try_catch_catch@4HA":
+; X64: LBB0_[[catch1bb]]: # %handler1{{$}}
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: pushq %rsi
+; X64: .seh_pushreg 6
+; X64: pushq %rdi
+; X64: .seh_pushreg 7
+; X64: pushq %rbx
+; X64: .seh_pushreg 3
+; X64: subq $40, %rsp
+; X64: .seh_stackalloc 40
+; X64: leaq 32(%rdx), %rbp
+; X64: .seh_endprologue
+; X64: movl $2, %ecx
+; X64: callq f
+; X64: leaq [[contbb]](%rip), %rax
+; X64: addq $40, %rsp
+; X64: popq %rbx
+; X64: popq %rdi
+; X64: popq %rsi
+; X64: popq %rbp
+; X64: retq
+
+; X64: $handlerMap$0$try_catch_catch:
+; X64: .long 0
+; X64: .long "??_R0H@8"@IMGREL
+; X64: .long 0
+; X64: .long "?catch$[[catch1bb]]@?0?try_catch_catch@4HA"@IMGREL
+; X64: .long 88
+
+define i32 @try_one_csr() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %a = call i32 @getint()
+ %b = call i32 @getint()
+ call void (...) @useints(i32 %a)
+ invoke void @f(i32 1)
+ to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch:
+ %cs = catchswitch within none [label %handler1] unwind to caller
+
+handler1:
+ %0 = catchpad within %cs [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+ catchret from %0 to label %try.cont
+
+try.cont:
+ ret i32 0
+}
+
+; X64-LABEL: try_one_csr:
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: pushq %rsi
+; X64: .seh_pushreg 6
+; X64-NOT: pushq
+; X64: subq $40, %rsp
+; X64: .seh_stackalloc 40
+; X64: leaq 32(%rsp), %rbp
+; X64: .seh_setframe 5, 32
+; X64: .seh_endprologue
+; X64: callq getint
+; X64: callq getint
+; X64: callq useints
+; X64: movl $1, %ecx
+; X64: callq f
+; X64: [[contbb:\.LBB1_[0-9]+]]: # Block address taken
+; X64-NEXT: # %try.cont
+; X64: addq $40, %rsp
+; X64-NOT: popq
+; X64: popq %rsi
+; X64: popq %rbp
+; X64: retq
+
+; X64: "?catch$[[catch1bb:[0-9]+]]@?0?try_one_csr@4HA":
+; X64: LBB1_[[catch1bb]]: # %handler1{{$}}
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: pushq %rsi
+; X64: .seh_pushreg 6
+; X64: subq $40, %rsp
+; X64: .seh_stackalloc 40
+; X64: leaq 32(%rdx), %rbp
+; X64: .seh_endprologue
+; X64: leaq [[contbb]](%rip), %rax
+; X64: addq $40, %rsp
+; X64: popq %rsi
+; X64: popq %rbp
+; X64: retq
+
+; X64: $handlerMap$0$try_one_csr:
+; X64: .long 0
+; X64: .long "??_R0H@8"@IMGREL
+; X64: .long 0
+; X64: .long "?catch$[[catch1bb]]@?0?try_one_csr@4HA"@IMGREL
+; X64: .long 72
+
+define i32 @try_no_csr() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @f(i32 1)
+ to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch:
+ %cs = catchswitch within none [label %handler1] unwind to caller
+
+handler1:
+ %cp1 = catchpad within %cs [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+ catchret from %cp1 to label %try.cont
+
+try.cont:
+ ret i32 0
+}
+
+; X64-LABEL: try_no_csr:
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64-NOT: pushq
+; X64: subq $48, %rsp
+; X64: .seh_stackalloc 48
+; X64: leaq 48(%rsp), %rbp
+; X64: .seh_setframe 5, 48
+; X64: .seh_endprologue
+; X64: movl $1, %ecx
+; X64: callq f
+; X64: [[contbb:\.LBB2_[0-9]+]]: # Block address taken
+; X64-NEXT: # %try.cont
+; X64: addq $48, %rsp
+; X64-NOT: popq
+; X64: popq %rbp
+; X64: retq
+
+; X64: "?catch$[[catch1bb:[0-9]+]]@?0?try_no_csr@4HA":
+; X64: LBB2_[[catch1bb]]: # %handler1{{$}}
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: subq $32, %rsp
+; X64: .seh_stackalloc 32
+; X64: leaq 48(%rdx), %rbp
+; X64: .seh_endprologue
+; X64: leaq [[contbb]](%rip), %rax
+; X64: addq $32, %rsp
+; X64: popq %rbp
+; X64: retq
+
+; X64: $handlerMap$0$try_no_csr:
+; X64: .long 0
+; X64: .long "??_R0H@8"@IMGREL
+; X64: .long 0
+; X64: .long "?catch$[[catch1bb]]@?0?try_no_csr@4HA"@IMGREL
+; X64: .long 56
diff --git a/test/CodeGen/X86/win-catchpad-nested-cxx.ll b/test/CodeGen/X86/win-catchpad-nested-cxx.ll
new file mode 100644
index 000000000000..ac4598385cd1
--- /dev/null
+++ b/test/CodeGen/X86/win-catchpad-nested-cxx.ll
@@ -0,0 +1,105 @@
+; RUN: llc -verify-machineinstrs -mtriple=i686-pc-windows-msvc < %s \
+; RUN: | FileCheck --check-prefix=CHECK --check-prefix=X86 %s
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-pc-windows-msvc < %s \
+; RUN: | FileCheck --check-prefix=CHECK --check-prefix=X64 %s
+
+; Loosely based on IR for this C++ source code:
+; void f(int p);
+; void try_in_catch() {
+; try {
+; f(1);
+; } catch (...) {
+; try {
+; f(2);
+; } catch (...) {
+; f(3);
+; }
+; }
+; }
+
+declare void @f(i32 %p)
+declare i32 @__CxxFrameHandler3(...)
+
+define i32 @try_in_catch() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @f(i32 1)
+ to label %try.cont unwind label %catch.dispatch.1
+try.cont:
+ ret i32 0
+
+catch.dispatch.1:
+ %cs1 = catchswitch within none [label %handler1] unwind to caller
+handler1:
+ %h1 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ invoke void @f(i32 2) [ "funclet"(token %h1) ]
+ to label %catchret1 unwind label %catch.dispatch.2
+catchret1:
+ catchret from %h1 to label %try.cont
+
+catch.dispatch.2:
+ %cs2 = catchswitch within %h1 [label %handler2] unwind to caller
+handler2:
+ %h2 = catchpad within %cs2 [i8* null, i32 64, i8* null]
+ call void @f(i32 3)
+ catchret from %h2 to label %catchret1
+}
+
+; X86-LABEL: L__ehtable$try_in_catch:
+; X64-LABEL: $cppxdata$try_in_catch:
+; CHECK-NEXT: .long 429065506
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long ($stateUnwindMap$try_in_catch)
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long ($tryMap$try_in_catch)
+; ip2state num + ptr
+; X86-NEXT: .long 0
+; X86-NEXT: .long 0
+; X64-NEXT: .long 7
+; X64-NEXT: .long ($ip2state$try_in_catch)
+; unwindhelp offset
+; X64-NEXT: .long 40
+; CHECK-NEXT: .long 0
+; EHFlags
+; CHECK-NEXT: .long 1
+
+; CHECK: $tryMap$try_in_catch:
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long 3
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long ($handlerMap$0$try_in_catch)
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 3
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long ($handlerMap$1$try_in_catch)
+
+; CHECK: $handlerMap$0$try_in_catch:
+; CHECK-NEXT: .long 64
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long "?catch${{[0-9]+}}@?0?try_in_catch@4HA"
+; X64-NEXT: .long 56
+
+; CHECK: $handlerMap$1$try_in_catch:
+; CHECK-NEXT: .long 64
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long "?catch${{[0-9]+}}@?0?try_in_catch@4HA"
+; X64-NEXT: .long 56
+
+; X64: $ip2state$try_in_catch:
+; X64-NEXT: .long .Lfunc_begin0@IMGREL
+; X64-NEXT: .long -1
+; X64-NEXT: .long .Ltmp0@IMGREL+1
+; X64-NEXT: .long 0
+; X64-NEXT: .long .Ltmp1@IMGREL+1
+; X64-NEXT: .long -1
+; X64-NEXT: .long "?catch$2@?0?try_in_catch@4HA"@IMGREL
+; X64-NEXT: .long 1
+; X64-NEXT: .long .Ltmp2@IMGREL+1
+; X64-NEXT: .long 2
+; X64-NEXT: .long .Ltmp3@IMGREL+1
+; X64-NEXT: .long 1
+; X64-NEXT: .long "?catch$4@?0?try_in_catch@4HA"@IMGREL
+; X64-NEXT: .long 3
diff --git a/test/CodeGen/X86/win-catchpad-nested.ll b/test/CodeGen/X86/win-catchpad-nested.ll
new file mode 100644
index 000000000000..7afcd9cc1f3e
--- /dev/null
+++ b/test/CodeGen/X86/win-catchpad-nested.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=x86_64-pc-windows-coreclr < %s | FileCheck %s
+
+declare void @ProcessCLRException()
+
+declare void @f()
+
+define void @test1() personality void ()* @ProcessCLRException {
+entry:
+ invoke void @f()
+ to label %exit unwind label %catch.dispatch.1
+exit:
+ ret void
+
+catch.dispatch.1:
+ %cs1 = catchswitch within none [label %outer.catch] unwind to caller
+
+outer.catch:
+ %cp1 = catchpad within %cs1 [i32 1]
+ invoke void @f() [ "funclet"(token %cp1) ]
+ to label %outer.ret unwind label %catch.dispatch.2
+outer.ret:
+ catchret from %cp1 to label %exit
+
+catch.dispatch.2:
+ %cs2 = catchswitch within %cp1 [label %inner.catch] unwind to caller
+inner.catch:
+ %cp2 = catchpad within %cs2 [i32 2]
+ catchret from %cp2 to label %outer.ret
+}
+
+; Check the catchret targets
+; CHECK-LABEL: test1: # @test1
+; CHECK: [[Exit:^[^: ]+]]: # Block address taken
+; CHECK-NEXT: # %exit
+; CHECK: [[OuterRet:^[^: ]+]]: # Block address taken
+; CHECK-NEXT: # %outer.ret
+; CHECK-NEXT: leaq [[Exit]](%rip), %rax
+; CHECK: retq # CATCHRET
+; CHECK: {{^[^: ]+}}: # %inner.catch
+; CHECK: .seh_endprolog
+; CHECK-NEXT: leaq [[OuterRet]](%rip), %rax
+; CHECK: retq # CATCHRET
diff --git a/test/CodeGen/X86/win-catchpad-varargs.ll b/test/CodeGen/X86/win-catchpad-varargs.ll
new file mode 100644
index 000000000000..6508f3bd7d64
--- /dev/null
+++ b/test/CodeGen/X86/win-catchpad-varargs.ll
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s --check-prefix=X86
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
+declare i32 @__CxxFrameHandler3(...)
+declare void @g()
+
+define i32 @f(i32 %a, ...) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %ap = alloca i8*
+ invoke void @g()
+ to label %return unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %0 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ %ap1 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap1)
+ %argp.cur = load i8*, i8** %ap
+ %1 = bitcast i8* %argp.cur to i32*
+ %arg2 = load i32, i32* %1
+ call void @llvm.va_end(i8* %ap1)
+ catchret from %0 to label %return
+
+return: ; preds = %entry, %catch
+ %retval.0 = phi i32 [ %arg2, %catch ], [ -1, %entry ]
+ ret i32 %retval.0
+}
+
+; X64-LABEL: .seh_proc f
+; X64: pushq %rbp
+; X64: subq $64, %rsp
+; X64: leaq 64(%rsp), %rbp
+; X64: movq $-2, -8(%rbp)
+; X64: movl $-1, -20(%rbp) # 4-byte Folded Spill
+; X64: callq g
+; X64: .LBB0_1
+; X64: movl -20(%rbp), %eax # 4-byte Reload
+; X64: addq $64, %rsp
+; X64: popq %rbp
+
+; X64-LABEL: "?catch${{[0-9]}}@?0?f@4HA":
+; X64: .seh_proc "?catch${{[0-9]}}@?0?f@4HA"
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: subq $32, %rsp
+; X64: leaq 64(%rdx), %rbp
+; arg2 is at RBP+40:
+; start at arg2
+; + 8 for arg1
+; + 8 for retaddr
+; + 8 for RBP
+; + 64 for stackalloc
+; - 64 for setframe
+; = 40
+; X64: movl 24(%rbp), %eax
+; X64: movl %eax, -20(%rbp) # 4-byte Spill
+; X64: leaq .LBB0_1(%rip), %rax
+; X64: addq $32, %rsp
+; X64: popq %rbp
+; X64: retq # CATCHRET
+
+; X86-LABEL: _f: # @f
+; X86: pushl %ebp
+; X86: movl %esp, %ebp
+; X86: pushl %ebx
+; X86: pushl %edi
+; X86: pushl %esi
+; X86: subl $24, %esp
+; X86: movl $-1, -36(%ebp)
+; X86: calll _g
+; X86: LBB0_[[retbb:[0-9]+]]:
+; X86: movl -36(%ebp), %eax
+; X86: addl $24, %esp
+; X86: popl %esi
+; X86: popl %edi
+; X86: popl %ebx
+; X86: popl %ebp
+; X86: retl
+
+; X86: LBB0_[[restorebb:[0-9]+]]: # Block address taken
+; X86: addl $12, %ebp
+; arg2 is at EBP offset 12:
+; + 4 for arg1
+; + 4 for retaddr
+; + 4 for EBP
+; X86: movl 12(%ebp), %eax
+; X86: movl %eax, -36(%ebp)
+; X86: jmp LBB0_[[retbb]]
+
+; X86-LABEL: "?catch${{[0-9]}}@?0?f@4HA":
+; X86: pushl %ebp
+; X86: addl $12, %ebp
+; Done due to mov %esp, %ebp
+; X86: leal 12(%ebp), %eax
+; X86: movl $LBB0_[[restorebb]], %eax
+; X86: popl %ebp
+; X86: retl # CATCHRET
diff --git a/test/CodeGen/X86/win-catchpad.ll b/test/CodeGen/X86/win-catchpad.ll
new file mode 100644
index 000000000000..836c53bda8e6
--- /dev/null
+++ b/test/CodeGen/X86/win-catchpad.ll
@@ -0,0 +1,353 @@
+; RUN: llc -verify-machineinstrs -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s
+
+; Loosely based on IR for this C++ source code:
+; void f(int p);
+; int main() {
+; try {
+; f(1);
+; } catch (int e) {
+; f(e);
+; } catch (...) {
+; f(3);
+; }
+; }
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i8*, i32, i32, i32, i32, i8* }
+%eh.CatchableTypeArray.1 = type { i32, [1 x %eh.CatchableType*] }
+%eh.ThrowInfo = type { i32, i8*, i8*, i8* }
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+
+
+declare void @f(i32 %p, i32* %l)
+declare i1 @getbool()
+declare i32 @__CxxFrameHandler3(...)
+
+define i32 @try_catch_catch() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %e.addr = alloca i32
+ %local = alloca i32
+ invoke void @f(i32 1, i32* %local)
+ to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs = catchswitch within none [label %handler1, label %handler2] unwind to caller
+
+handler1:
+ %h1 = catchpad within %cs [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i32* %e.addr]
+ %e = load i32, i32* %e.addr
+ call void @f(i32 %e, i32* %local) [ "funclet"(token %h1) ]
+ catchret from %h1 to label %try.cont
+
+handler2:
+ %h2 = catchpad within %cs [i8* null, i32 64, i8* null]
+ call void @f(i32 3, i32* %local) [ "funclet"(token %h2) ]
+ catchret from %h2 to label %try.cont
+
+try.cont:
+ ret i32 0
+}
+
+; X86-LABEL: _try_catch_catch:
+; X86: movl %esp, -[[sp_offset:[0-9]+]](%ebp)
+; X86: movl $0, -{{[0-9]+}}(%ebp)
+; X86: leal -[[local_offs:[0-9]+]](%ebp), %[[addr_reg:[a-z]+]]
+; X86-DAG: movl %[[addr_reg]], 4(%esp)
+; X86-DAG: movl $1, (%esp)
+; X86: calll _f
+; X86: [[contbb:LBB0_[0-9]+]]: # %try.cont
+; X86: retl
+
+; X86: [[restorebb1:LBB0_[0-9]+]]: # Block address taken
+; X86-NEXT: # %handler1
+; X86-NEXT: addl $12, %ebp
+; X86: jmp [[contbb]]
+
+; FIXME: These should be de-duplicated.
+; X86: [[restorebb2:LBB0_[0-9]+]]: # Block address taken
+; X86-NEXT: # %handler2
+; X86-NEXT: addl $12, %ebp
+; X86: jmp [[contbb]]
+
+; X86: "?catch$[[catch1bb:[0-9]+]]@?0?try_catch_catch@4HA":
+; X86: LBB0_[[catch1bb]]: # %handler1{{$}}
+; X86: pushl %ebp
+; X86: subl $8, %esp
+; X86: addl $12, %ebp
+; X86: movl %esp, -[[sp_offset]](%ebp)
+; X86-DAG: movl -32(%ebp), %[[e_reg:[a-z]+]]
+; X86-DAG: leal -[[local_offs]](%ebp), %[[addr_reg:[a-z]+]]
+; X86-DAG: movl $1, -{{[0-9]+}}(%ebp)
+; X86-DAG: movl %[[addr_reg]], 4(%esp)
+; X86-DAG: movl %[[e_reg]], (%esp)
+; X86: calll _f
+; X86-NEXT: movl $[[restorebb1]], %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+
+; X86: "?catch$[[catch2bb:[0-9]+]]@?0?try_catch_catch@4HA":
+; X86: LBB0_[[catch2bb]]: # %handler2{{$}}
+; X86: pushl %ebp
+; X86: subl $8, %esp
+; X86: addl $12, %ebp
+; X86: movl %esp, -[[sp_offset]](%ebp)
+; X86-DAG: leal -[[local_offs]](%ebp), %[[addr_reg:[a-z]+]]
+; X86-DAG: movl $1, -{{[0-9]+}}(%ebp)
+; X86-DAG: movl %[[addr_reg]], 4(%esp)
+; X86-DAG: movl $3, (%esp)
+; X86: calll _f
+; X86-NEXT: movl $[[restorebb2]], %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+
+; X86: L__ehtable$try_catch_catch:
+; X86: $handlerMap$0$try_catch_catch:
+; X86-NEXT: .long 0
+; X86-NEXT: .long "??_R0H@8"
+; X86-NEXT: .long -20
+; X86-NEXT: .long "?catch$[[catch1bb]]@?0?try_catch_catch@4HA"
+; X86-NEXT: .long 64
+; X86-NEXT: .long 0
+; X86-NEXT: .long 0
+; X86-NEXT: .long "?catch$[[catch2bb]]@?0?try_catch_catch@4HA"
+
+; X64-LABEL: try_catch_catch:
+; X64: Lfunc_begin0:
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: subq $48, %rsp
+; X64: .seh_stackalloc 48
+; X64: leaq 48(%rsp), %rbp
+; X64: .seh_setframe 5, 48
+; X64: .seh_endprologue
+; X64: movq $-2, -8(%rbp)
+; X64: .Ltmp0
+; X64-DAG: leaq -[[local_offs:[0-9]+]](%rbp), %rdx
+; X64-DAG: movl $1, %ecx
+; X64: callq f
+; X64: [[contbb:\.LBB0_[0-9]+]]: # Block address taken
+; X64-NEXT: # %try.cont
+; X64: addq $48, %rsp
+; X64: popq %rbp
+; X64: retq
+
+; X64: "?catch$[[catch1bb:[0-9]+]]@?0?try_catch_catch@4HA":
+; X64: LBB0_[[catch1bb]]: # %handler1{{$}}
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: subq $32, %rsp
+; X64: .seh_stackalloc 32
+; X64: leaq 48(%rdx), %rbp
+; X64: .seh_endprologue
+; X64-DAG: leaq -[[local_offs]](%rbp), %rdx
+; X64-DAG: movl -12(%rbp), %ecx
+; X64: callq f
+; X64: leaq [[contbb]](%rip), %rax
+; X64-NEXT: addq $32, %rsp
+; X64-NEXT: popq %rbp
+; X64-NEXT: retq
+
+; X64: "?catch$[[catch2bb:[0-9]+]]@?0?try_catch_catch@4HA":
+; X64: LBB0_[[catch2bb]]: # %handler2{{$}}
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: subq $32, %rsp
+; X64: .seh_stackalloc 32
+; X64: leaq 48(%rdx), %rbp
+; X64: .seh_endprologue
+; X64-DAG: leaq -[[local_offs]](%rbp), %rdx
+; X64-DAG: movl $3, %ecx
+; X64: callq f
+; X64: leaq [[contbb]](%rip), %rax
+; X64-NEXT: addq $32, %rsp
+; X64-NEXT: popq %rbp
+; X64-NEXT: retq
+
+; X64: $cppxdata$try_catch_catch:
+; X64-NEXT: .long 429065506
+; X64-NEXT: .long 2
+; X64-NEXT: .long ($stateUnwindMap$try_catch_catch)@IMGREL
+; X64-NEXT: .long 1
+; X64-NEXT: .long ($tryMap$try_catch_catch)@IMGREL
+; X64-NEXT: .long 5
+; X64-NEXT: .long ($ip2state$try_catch_catch)@IMGREL
+; X64-NEXT: .long 40
+; X64-NEXT: .long 0
+; X64-NEXT: .long 1
+
+; X64: $tryMap$try_catch_catch:
+; X64-NEXT: .long 0
+; X64-NEXT: .long 0
+; X64-NEXT: .long 1
+; X64-NEXT: .long 2
+; X64-NEXT: .long ($handlerMap$0$try_catch_catch)@IMGREL
+
+; X64: $handlerMap$0$try_catch_catch:
+; X64-NEXT: .long 0
+; X64-NEXT: .long "??_R0H@8"@IMGREL
+; X64-NEXT: .long 36
+; X64-NEXT: .long "?catch$[[catch1bb]]@?0?try_catch_catch@4HA"@IMGREL
+; X64-NEXT: .long 56
+; X64-NEXT: .long 64
+; X64-NEXT: .long 0
+; X64-NEXT: .long 0
+; X64-NEXT: .long "?catch$[[catch2bb]]@?0?try_catch_catch@4HA"@IMGREL
+; X64-NEXT: .long 56
+
+; X64: $ip2state$try_catch_catch:
+; X64-NEXT: .long .Lfunc_begin0@IMGREL
+; X64-NEXT: .long -1
+; X64-NEXT: .long .Ltmp0@IMGREL+1
+; X64-NEXT: .long 0
+; X64-NEXT: .long .Ltmp1@IMGREL+1
+; X64-NEXT: .long -1
+; X64-NEXT: .long "?catch$[[catch1bb]]@?0?try_catch_catch@4HA"@IMGREL
+; X64-NEXT: .long 1
+; X64-NEXT: .long "?catch$[[catch2bb]]@?0?try_catch_catch@4HA"@IMGREL
+; X64-NEXT: .long 1
+
+
+define i32 @branch_to_normal_dest() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @f(i32 1, i32* null)
+ to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch:
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+ %cp1 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ br label %loop
+
+loop:
+ %V = call i1 @getbool() [ "funclet"(token %cp1) ]
+ br i1 %V, label %loop, label %catch.done
+
+catch.done:
+ catchret from %cp1 to label %try.cont
+
+try.cont:
+ ret i32 0
+}
+
+; X86-LABEL: _branch_to_normal_dest:
+; X86: calll _f
+
+; X86: [[contbb:LBB1_[0-9]+]]: # %try.cont
+; X86: retl
+
+; X86: [[restorebb:LBB1_[0-9]+]]: # Block address taken
+; X86-NEXT: # %catch.done
+; X86-NEXT: addl $12, %ebp
+; X86: jmp [[contbb]]
+
+; X86: "?catch$[[catchbb:[0-9]+]]@?0?branch_to_normal_dest@4HA":
+; X86: LBB1_[[catchbb]]: # %catch{{$}}
+; X86: pushl %ebp
+; X86: subl $8, %esp
+; X86: addl $12, %ebp
+; X86: LBB1_[[loopbb:[0-9]+]]: # %loop
+; X86: movl $1, -16(%ebp)
+; X86: calll _getbool
+; X86: testb $1, %al
+; X86: jne LBB1_[[loopbb]]
+; X86: # %catch.done
+; X86-NEXT: movl $[[restorebb]], %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+
+; X86: L__ehtable$branch_to_normal_dest:
+; X86: $handlerMap$0$branch_to_normal_dest:
+; X86-NEXT: .long 64
+; X86-NEXT: .long 0
+; X86-NEXT: .long 0
+; X86-NEXT: .long "?catch$[[catchbb]]@?0?branch_to_normal_dest@4HA"
+
+; X64-LABEL: branch_to_normal_dest:
+; X64: # %entry
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: subq $48, %rsp
+; X64: .seh_stackalloc 48
+; X64: leaq 48(%rsp), %rbp
+; X64: .seh_setframe 5, 48
+; X64: .seh_endprologue
+; X64: .Ltmp[[before_call:[0-9]+]]:
+; X64: callq f
+; X64: .Ltmp[[after_call:[0-9]+]]:
+; X64: [[contbb:\.LBB1_[0-9]+]]: # Block address taken
+; X64-NEXT: # %try.cont
+; X64: addq $48, %rsp
+; X64: popq %rbp
+; X64: retq
+
+; X64: "?catch$[[catchbb:[0-9]+]]@?0?branch_to_normal_dest@4HA":
+; X64: LBB1_[[catchbb]]: # %catch{{$}}
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: subq $32, %rsp
+; X64: .seh_stackalloc 32
+; X64: leaq 48(%rdx), %rbp
+; X64: .seh_endprologue
+; X64: .LBB1_[[normal_dest_bb:[0-9]+]]: # %loop
+; X64: callq getbool
+; X64: testb $1, %al
+; X64: jne .LBB1_[[normal_dest_bb]]
+; X64: # %catch.done
+; X64: leaq [[contbb]](%rip), %rax
+; X64-NEXT: addq $32, %rsp
+; X64-NEXT: popq %rbp
+; X64-NEXT: retq
+
+; X64-LABEL: $cppxdata$branch_to_normal_dest:
+; X64-NEXT: .long 429065506
+; X64-NEXT: .long 2
+; X64-NEXT: .long ($stateUnwindMap$branch_to_normal_dest)@IMGREL
+; X64-NEXT: .long 1
+; X64-NEXT: .long ($tryMap$branch_to_normal_dest)@IMGREL
+; X64-NEXT: .long 4
+; X64-NEXT: .long ($ip2state$branch_to_normal_dest)@IMGREL
+; X64-NEXT: .long 40
+; X64-NEXT: .long 0
+; X64-NEXT: .long 1
+
+; X64-LABEL: $stateUnwindMap$branch_to_normal_dest:
+; X64-NEXT: .long -1
+; X64-NEXT: .long 0
+; X64-NEXT: .long -1
+; X64-NEXT: .long 0
+
+; X64-LABEL: $tryMap$branch_to_normal_dest:
+; X64-NEXT: .long 0
+; X64-NEXT: .long 0
+; X64-NEXT: .long 1
+; X64-NEXT: .long 1
+; X64-NEXT: .long ($handlerMap$0$branch_to_normal_dest)@IMGREL
+
+; X64-LABEL: $handlerMap$0$branch_to_normal_dest:
+; X64-NEXT: .long 64
+; X64-NEXT: .long 0
+; X64-NEXT: .long 0
+; X64-NEXT: .long "?catch$[[catchbb]]@?0?branch_to_normal_dest@4HA"@IMGREL
+; X64-NEXT: .long 56
+
+; X64-LABEL: $ip2state$branch_to_normal_dest:
+; X64-NEXT: .long .Lfunc_begin1@IMGREL
+; X64-NEXT: .long -1
+; X64-NEXT: .long .Ltmp[[before_call]]@IMGREL+1
+; X64-NEXT: .long 0
+; X64-NEXT: .long .Ltmp[[after_call]]@IMGREL+1
+; X64-NEXT: .long -1
+; X64-NEXT: .long "?catch$[[catchbb]]@?0?branch_to_normal_dest@4HA"@IMGREL
+; X64-NEXT: .long 1
diff --git a/test/CodeGen/X86/win-cleanuppad.ll b/test/CodeGen/X86/win-cleanuppad.ll
new file mode 100644
index 000000000000..4b0a543a876a
--- /dev/null
+++ b/test/CodeGen/X86/win-cleanuppad.ll
@@ -0,0 +1,199 @@
+; RUN: llc -verify-machineinstrs -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s
+
+%struct.Dtor = type { i8 }
+
+define void @simple_cleanup() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %o = alloca %struct.Dtor, align 1
+ invoke void @f(i32 1)
+ to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ call x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor* %o) #2
+ ret void
+
+ehcleanup: ; preds = %entry
+ %0 = cleanuppad within none []
+ call x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor* %o) #2 [ "funclet"(token %0) ]
+ cleanupret from %0 unwind to caller
+}
+
+; CHECK: simple_cleanup: # @simple_cleanup
+; CHECK: pushq %rbp
+; CHECK: subq $48, %rsp
+; CHECK: leaq 48(%rsp), %rbp
+; CHECK: movq $-2, -8(%rbp)
+; CHECK: movl $1, %ecx
+; CHECK: callq f
+; CHECK: callq "??1Dtor@@QAE@XZ"
+; CHECK: nop
+; CHECK: addq $48, %rsp
+; CHECK: popq %rbp
+; CHECK: retq
+
+; CHECK: "?dtor$2@?0?simple_cleanup@4HA":
+; CHECK: callq "??1Dtor@@QAE@XZ"
+; CHECK: retq
+
+; CHECK: $cppxdata$simple_cleanup:
+; CHECK-NEXT: .long 429065506
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long ($stateUnwindMap$simple_cleanup)@IMGREL
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 3
+; CHECK-NEXT: .long ($ip2state$simple_cleanup)@IMGREL
+; UnwindHelp offset should match the -2 store above
+; CHECK-NEXT: .long 40
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1
+
+declare void @f(i32) #0
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind
+declare x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor*) #1
+
+define void @nested_cleanup() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %o1 = alloca %struct.Dtor, align 1
+ %o2 = alloca %struct.Dtor, align 1
+ invoke void @f(i32 1)
+ to label %invoke.cont unwind label %cleanup.outer
+
+invoke.cont: ; preds = %entry
+ invoke void @f(i32 2)
+ to label %invoke.cont.1 unwind label %cleanup.inner
+
+invoke.cont.1: ; preds = %invoke.cont
+ call x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor* %o2) #2
+ invoke void @f(i32 3)
+ to label %invoke.cont.2 unwind label %cleanup.outer
+
+invoke.cont.2: ; preds = %invoke.cont.1
+ call x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor* %o1) #2
+ ret void
+
+cleanup.inner: ; preds = %invoke.cont
+ %0 = cleanuppad within none []
+ call x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor* %o2) #2 [ "funclet"(token %0) ]
+ cleanupret from %0 unwind label %cleanup.outer
+
+cleanup.outer: ; preds = %invoke.cont.1, %cleanup.inner, %entry
+ %1 = cleanuppad within none []
+ call x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor* %o1) #2 [ "funclet"(token %1) ]
+ cleanupret from %1 unwind to caller
+}
+
+; X86-LABEL: _nested_cleanup:
+; X86: movl $1, (%esp)
+; X86: calll _f
+; X86: movl $2, (%esp)
+; X86: calll _f
+; X86: movl $3, (%esp)
+; X86: calll _f
+
+; X86: "?dtor$[[cleanup_inner:[0-9]+]]@?0?nested_cleanup@4HA":
+; X86: LBB1_[[cleanup_inner]]: # %cleanup.inner{{$}}
+; X86: pushl %ebp
+; X86: leal {{.*}}(%ebp), %ecx
+; X86: calll "??1Dtor@@QAE@XZ"
+; X86: popl %ebp
+; X86: retl
+
+; X86: "?dtor$[[cleanup_outer:[0-9]+]]@?0?nested_cleanup@4HA":
+; X86: LBB1_[[cleanup_outer]]: # %cleanup.outer{{$}}
+; X86: pushl %ebp
+; X86: leal {{.*}}(%ebp), %ecx
+; X86: calll "??1Dtor@@QAE@XZ"
+; X86: popl %ebp
+; X86: retl
+
+; X86: L__ehtable$nested_cleanup:
+; X86: .long 429065506
+; X86: .long 2
+; X86: .long ($stateUnwindMap$nested_cleanup)
+; X86: .long 0
+; X86: .long 0
+; X86: .long 0
+; X86: .long 0
+; X86: .long 0
+; X86: .long 1
+; X86: $stateUnwindMap$nested_cleanup:
+; X86: .long -1
+; X86: .long "?dtor$[[cleanup_outer]]@?0?nested_cleanup@4HA"
+; X86: .long 0
+; X86: .long "?dtor$[[cleanup_inner]]@?0?nested_cleanup@4HA"
+
+; X64-LABEL: nested_cleanup:
+; X64: .Lfunc_begin1:
+; X64: .Ltmp13:
+; X64: movl $1, %ecx
+; X64: callq f
+; X64: .Ltmp15:
+; X64: movl $2, %ecx
+; X64: callq f
+; X64: .Ltmp16:
+; X64: callq "??1Dtor@@QAE@XZ"
+; X64: .Ltmp17:
+; X64: movl $3, %ecx
+; X64: callq f
+; X64: .Ltmp18:
+
+; X64: "?dtor$[[cleanup_inner:[0-9]+]]@?0?nested_cleanup@4HA":
+; X64: LBB1_[[cleanup_inner]]: # %cleanup.inner{{$}}
+; X64: pushq %rbp
+; X64: leaq {{.*}}(%rbp), %rcx
+; X64: callq "??1Dtor@@QAE@XZ"
+; X64: popq %rbp
+; X64: retq
+
+; X64: .seh_handlerdata
+; X64: .text
+; X64: .seh_endproc
+
+; X64: "?dtor$[[cleanup_outer:[0-9]+]]@?0?nested_cleanup@4HA":
+; X64: LBB1_[[cleanup_outer]]: # %cleanup.outer{{$}}
+; X64: pushq %rbp
+; X64: leaq {{.*}}(%rbp), %rcx
+; X64: callq "??1Dtor@@QAE@XZ"
+; X64: popq %rbp
+; X64: retq
+
+; X64: .section .xdata,"dr"
+; X64-NEXT: .align 4
+; X64: $cppxdata$nested_cleanup:
+; X64-NEXT: .long 429065506
+; X64-NEXT: .long 2
+; X64-NEXT: .long ($stateUnwindMap$nested_cleanup)@IMGREL
+; X64-NEXT: .long 0
+; X64-NEXT: .long 0
+; X64-NEXT: .long 5
+; X64-NEXT: .long ($ip2state$nested_cleanup)@IMGREL
+; X64-NEXT: .long 56
+; X64-NEXT: .long 0
+; X64-NEXT: .long 1
+
+; X64: $stateUnwindMap$nested_cleanup:
+; X64-NEXT: .long -1
+; X64-NEXT: .long "?dtor$[[cleanup_outer]]@?0?nested_cleanup@4HA"@IMGREL
+; X64-NEXT: .long 0
+; X64-NEXT: .long "?dtor$[[cleanup_inner]]@?0?nested_cleanup@4HA"@IMGREL
+
+; X64: $ip2state$nested_cleanup:
+; X64-NEXT: .long .Lfunc_begin1@IMGREL
+; X64-NEXT: .long -1
+; X64-NEXT: .long .Ltmp13@IMGREL
+; X64-NEXT: .long 0
+; X64-NEXT: .long .Ltmp15@IMGREL
+; X64-NEXT: .long 1
+; X64-NEXT: .long .Ltmp17@IMGREL
+; X64-NEXT: .long 0
+; X64-NEXT: .long .Ltmp18@IMGREL+1
+; X64-NEXT: .long -1
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
diff --git a/test/CodeGen/X86/win-funclet-cfi.ll b/test/CodeGen/X86/win-funclet-cfi.ll
new file mode 100644
index 000000000000..2151cdc7bb4b
--- /dev/null
+++ b/test/CodeGen/X86/win-funclet-cfi.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+define void @"\01?f@@YAXXZ"(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @g()
+ to label %unreachable unwind label %cleanupblock
+
+cleanupblock:
+ %cleanp = cleanuppad within none []
+ call void @g() [ "funclet"(token %cleanp) ]
+ cleanupret from %cleanp unwind label %catch.dispatch
+
+catch.dispatch:
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+ %cp = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ call void @g() [ "funclet"(token %cp) ]
+ catchret from %cp to label %try.cont
+
+try.cont:
+ ret void
+
+unreachable:
+ unreachable
+}
+
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Destructors need CFI but they shouldn't use the .seh_handler directive.
+; CHECK: "?dtor$[[cleanup:[0-9]+]]@?0??f@@YAXXZ@4HA":
+; CHECK: .seh_proc "?dtor$[[cleanup]]@?0??f@@YAXXZ@4HA"
+; CHECK-NOT: .seh_handler __CxxFrameHandler3
+; CHECK: LBB0_[[cleanup]]: # %cleanupblock{{$}}
+
+; Emit CFI for pushing RBP.
+; CHECK: movq %rdx, 16(%rsp)
+; CHECK: pushq %rbp
+; CHECK: .seh_pushreg 5
+
+; Emit CFI for allocating from the stack pointer.
+; CHECK: subq $32, %rsp
+; CHECK: .seh_stackalloc 32
+
+; CHECK: leaq 48(%rdx), %rbp
+; CHECK-NOT: .seh_setframe
+
+; Prologue is done, emit the .seh_endprologue directive.
+; CHECK: .seh_endprologue
+
+; Make sure there is a nop after a call if the call precedes the epilogue.
+; CHECK: callq g
+; CHECK-NEXT: nop
+
+; Don't emit a reference to the LSDA.
+; CHECK: .seh_handlerdata
+; CHECK-NOT: .long ("$cppxdata$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT: .text
+; CHECK: .seh_endproc
+
+; CHECK: "?catch$[[catch:[0-9]+]]@?0??f@@YAXXZ@4HA":
+; CHECK: .seh_proc "?catch$[[catch]]@?0??f@@YAXXZ@4HA"
+; CHECK-NEXT: .seh_handler __CxxFrameHandler3, @unwind, @except
+; CHECK: LBB0_[[catch]]: # %catch{{$}}
+
+; Emit CFI for pushing RBP.
+; CHECK: movq %rdx, 16(%rsp)
+; CHECK: pushq %rbp
+; CHECK: .seh_pushreg 5
+
+; Emit CFI for allocating from the stack pointer.
+; CHECK: subq $32, %rsp
+; CHECK: .seh_stackalloc 32
+
+; CHECK: leaq 48(%rdx), %rbp
+; CHECK-NOT: .seh_setframe
+
+; Prologue is done, emit the .seh_endprologue directive.
+; CHECK: .seh_endprologue
+
+; Make sure there is at least one instruction after a call before the epilogue.
+; CHECK: callq g
+; CHECK-NEXT: leaq .LBB0_{{[0-9]+}}(%rip), %rax
+
+; Emit a reference to the LSDA.
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .long ("$cppxdata$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT: .text
+; CHECK: .seh_endproc
diff --git a/test/CodeGen/X86/win-mixed-ehpersonality.ll b/test/CodeGen/X86/win-mixed-ehpersonality.ll
new file mode 100644
index 000000000000..f7b6d0702ebe
--- /dev/null
+++ b/test/CodeGen/X86/win-mixed-ehpersonality.ll
@@ -0,0 +1,81 @@
+; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s
+
+declare void @maybe_throw()
+
+@_ZTIi = external constant i8*
+@g = external global i32
+
+declare i32 @__C_specific_handler(...)
+declare i32 @__gxx_personality_seh0(...)
+declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind
+
+define i32 @use_seh() personality i32 (...)* @__C_specific_handler {
+entry:
+ invoke void @maybe_throw()
+ to label %cont unwind label %lpad
+
+cont:
+ ret i32 0
+
+lpad:
+ %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+ %p = catchpad within %cs [i8* bitcast (i32 (i8*, i8*)* @filt_g to i8*)]
+ catchret from %p to label %ret1
+
+ret1:
+ ret i32 1
+}
+
+define internal i32 @filt_g(i8*, i8*) {
+ %g = load i32, i32* @g
+ ret i32 %g
+}
+
+; CHECK-LABEL: use_seh:
+; CHECK: callq maybe_throw
+; CHECK: xorl %eax, %eax
+; CHECK: .LBB0_[[epilogue:[0-9]+]]
+; CHECK: retq
+; CHECK: # %catch{{$}}
+; CHECK: movl $1, %eax
+; CHECK: jmp .LBB0_[[epilogue]]
+
+; A MinGW64-ish EH style. It could happen if a binary uses both MSVC CRT and
+; mingw CRT and is linked with LTO.
+define i32 @use_gcc() personality i32 (...)* @__gxx_personality_seh0 {
+entry:
+ invoke void @maybe_throw()
+ to label %cont unwind label %lpad
+
+cont:
+ ret i32 0
+
+lpad:
+ %ehvals = landingpad { i8*, i32 }
+ cleanup
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ %ehsel = extractvalue { i8*, i32 } %ehvals, 1
+ %filt_g_sel = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @filt_g to i8*))
+ %matches = icmp eq i32 %ehsel, %filt_g_sel
+ br i1 %matches, label %ret1, label %eh.resume
+
+ret1:
+ ret i32 1
+
+eh.resume:
+ resume { i8*, i32 } %ehvals
+}
+
+; CHECK-LABEL: use_gcc:
+; CHECK: callq maybe_throw
+; CHECK: xorl %eax, %eax
+;
+; CHECK: # %lpad
+; CHECK: cmpl $2, %edx
+; CHECK: jne
+;
+; CHECK: # %ret1
+; CHECK: movl $1, %eax
+;
+; CHECK: callq _Unwind_Resume
diff --git a/test/CodeGen/X86/win32-eh-states.ll b/test/CodeGen/X86/win32-eh-states.ll
index 0aae8c4d0189..2777d6644e6a 100644
--- a/test/CodeGen/X86/win32-eh-states.ll
+++ b/test/CodeGen/X86/win32-eh-states.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck %s
+; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck %s --check-prefix=X86
+; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=X64
; Based on this source:
; extern "C" void may_throw(int);
@@ -33,82 +34,174 @@ $"\01??_R0H@8" = comdat any
define void @f() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
entry:
invoke void @may_throw(i32 1)
- to label %invoke.cont unwind label %lpad
+ to label %invoke.cont unwind label %lpad.1
invoke.cont: ; preds = %entry
invoke void @may_throw(i32 2)
- to label %try.cont.9 unwind label %lpad.1
+ to label %try.cont.9 unwind label %lpad
try.cont.9: ; preds = %invoke.cont.3, %invoke.cont, %catch.7
- ; FIXME: Something about our CFG breaks TailDuplication. This empy asm blocks
- ; it so we can focus on testing the state numbering.
- call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"()
ret void
lpad: ; preds = %catch, %entry
- %0 = landingpad { i8*, i32 }
- catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
- %1 = extractvalue { i8*, i32 } %0, 0
- %2 = extractvalue { i8*, i32 } %0, 1
- br label %catch.dispatch.4
-
-lpad.1: ; preds = %invoke.cont
- %3 = landingpad { i8*, i32 }
- catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
- %4 = extractvalue { i8*, i32 } %3, 0
- %5 = extractvalue { i8*, i32 } %3, 1
- %6 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #3
- %matches = icmp eq i32 %5, %6
- br i1 %matches, label %catch, label %catch.dispatch.4
-
-catch.dispatch.4: ; preds = %lpad.1, %lpad
- %exn.slot.0 = phi i8* [ %4, %lpad.1 ], [ %1, %lpad ]
- %ehselector.slot.0 = phi i32 [ %5, %lpad.1 ], [ %2, %lpad ]
- %.pre = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #3
- %matches6 = icmp eq i32 %ehselector.slot.0, %.pre
- br i1 %matches6, label %catch.7, label %eh.resume
-
-catch.7: ; preds = %catch.dispatch.4
- tail call void @llvm.eh.begincatch(i8* %exn.slot.0, i8* null) #3
- tail call void @may_throw(i32 4)
- tail call void @llvm.eh.endcatch() #3
- br label %try.cont.9
+ %cs1 = catchswitch within none [label %catch] unwind label %lpad.1
catch: ; preds = %lpad.1
- tail call void @llvm.eh.begincatch(i8* %4, i8* null) #3
- invoke void @may_throw(i32 3)
- to label %invoke.cont.3 unwind label %lpad
+ %p1 = catchpad within %cs1 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+ invoke void @may_throw(i32 3) [ "funclet"(token %p1) ]
+ to label %invoke.cont.3 unwind label %lpad.1
invoke.cont.3: ; preds = %catch
- tail call void @llvm.eh.endcatch() #3
- br label %try.cont.9
+ catchret from %p1 to label %try.cont.9
+
+lpad.1: ; preds = %invoke.cont
+ %cs2 = catchswitch within none [label %catch.7] unwind to caller
-eh.resume: ; preds = %catch.dispatch.4
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
- %lpad.val.12 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
- resume { i8*, i32 } %lpad.val.12
+catch.7:
+ %p2 = catchpad within %cs2 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+ call void @may_throw(i32 4) [ "funclet"(token %p2) ]
+ catchret from %p2 to label %try.cont.9
}
-; CHECK-LABEL: _f:
-; CHECK: movl $-1, [[state:[-0-9]+]](%ebp)
-; CHECK: movl $___ehhandler$f, {{.*}}
+; X86-LABEL: _f:
+; X86: movl $-1, [[state:[-0-9]+]](%ebp)
+; X86: movl $___ehhandler$f, {{.*}}
;
-; CHECK: movl $0, [[state]](%ebp)
-; CHECK: movl $1, (%esp)
-; CHECK: calll _may_throw
+; X86: movl $0, [[state]](%ebp)
+; X86: movl $1, (%esp)
+; X86: calll _may_throw
;
-; CHECK: movl $1, [[state]](%ebp)
-; CHECK: movl $2, (%esp)
-; CHECK: calll _may_throw
+; X86: movl $1, [[state]](%ebp)
+; X86: movl $2, (%esp)
+; X86: calll _may_throw
+;
+; X86: movl $2, [[state]](%ebp)
+; X86: movl $3, (%esp)
+; X86: calll _may_throw
+;
+; X86: movl $3, [[state]](%ebp)
+; X86: movl $4, (%esp)
+; X86: calll _may_throw
+
+
+; X64-LABEL: f:
+; X64-LABEL: $ip2state$f:
+; X64-NEXT: .long .Lfunc_begin0@IMGREL
+; X64-NEXT: .long -1
+; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT: .long 0
+; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT: .long 1
+; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT: .long -1
+; X64-NEXT: .long "?catch${{.*}}@?0?f@4HA"@IMGREL
+; X64-NEXT: .long 2
+; X64-NEXT: .long "?catch${{.*}}@?0?f@4HA"@IMGREL
+; X64-NEXT: .long 3
+
+; Based on this source:
+; extern "C" void may_throw(int);
+; struct S { ~S(); };
+; void g() {
+; S x;
+; try {
+; may_throw(-1);
+; } catch (...) {
+; may_throw(0);
+; {
+; S y;
+; may_throw(1);
+; }
+; may_throw(2);
+; }
+; }
+
+%struct.S = type { i8 }
+declare void @"\01??1S@@QEAA@XZ"(%struct.S*)
+
+define void @g() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %x = alloca %struct.S, align 1
+ %y = alloca %struct.S, align 1
+ invoke void @may_throw(i32 -1)
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %0 = catchswitch within none [label %catch] unwind label %ehcleanup5
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [i8* null, i32 64, i8* null]
+ invoke void @may_throw(i32 0) [ "funclet"(token %1) ]
+ to label %invoke.cont unwind label %ehcleanup5
+
+invoke.cont: ; preds = %catch
+ invoke void @may_throw(i32 1) [ "funclet"(token %1) ]
+ to label %invoke.cont2 unwind label %ehcleanup
-; CHECK-LABEL: _f.catch:
-; CHECK: movl $4, Lf$frame_escape_{{[0-9]+.*}}
-; CHECK: movl $4, (%esp)
-; CHECK: calll _may_throw
+invoke.cont2: ; preds = %invoke.cont
+ invoke void @"\01??1S@@QEAA@XZ"(%struct.S* nonnull %y) [ "funclet"(token %1) ]
+ to label %invoke.cont3 unwind label %ehcleanup5
+
+invoke.cont3: ; preds = %invoke.cont2
+ invoke void @may_throw(i32 2) [ "funclet"(token %1) ]
+ to label %invoke.cont4 unwind label %ehcleanup5
+
+invoke.cont4: ; preds = %invoke.cont3
+ catchret from %1 to label %try.cont
+
+try.cont: ; preds = %invoke.cont4
+ call void @"\01??1S@@QEAA@XZ"(%struct.S* nonnull %x)
+ ret void
-; CHECK-LABEL: _f.catch.1:
-; CHECK: movl $3, Lf$frame_escape_{{[0-9]+.*}}
-; CHECK: movl $3, (%esp)
-; CHECK: calll _may_throw
+ehcleanup: ; preds = %invoke.cont
+ %2 = cleanuppad within %1 []
+ call void @"\01??1S@@QEAA@XZ"(%struct.S* nonnull %y) [ "funclet"(token %2) ]
+ cleanupret from %2 unwind label %ehcleanup5
-; CHECK: .safeseh ___ehhandler$f
+ehcleanup5: ; preds = %invoke.cont2, %invoke.cont3, %ehcleanup, %catch, %catch.dispatch
+ %3 = cleanuppad within none []
+ call void @"\01??1S@@QEAA@XZ"(%struct.S* nonnull %x) [ "funclet"(token %3) ]
+ cleanupret from %3 unwind to caller
+
+unreachable: ; preds = %entry
+ unreachable
+}
+
+; X86-LABEL: _g:
+; X86: movl $-1, [[state:[-0-9]+]](%ebp)
+; X86: movl $___ehhandler$g, {{.*}}
+;
+; X86: movl $1, [[state]](%ebp)
+; X86: movl $-1, (%esp)
+; X86: calll _may_throw
+;
+; X86: movl $2, [[state]](%ebp)
+; X86: movl $0, (%esp)
+; X86: calll _may_throw
+;
+; X86: movl $3, [[state]](%ebp)
+; X86: movl $1, (%esp)
+; X86: calll _may_throw
+;
+; X86: movl $2, [[state]](%ebp)
+; X86: movl $2, (%esp)
+; X86: calll _may_throw
+
+; X64-LABEL: g:
+; X64-LABEL: $ip2state$g:
+; X64-NEXT: .long .Lfunc_begin1@IMGREL
+; X64-NEXT: .long -1
+; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT: .long 1
+; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT: .long -1
+; X64-NEXT: .long "?catch${{.*}}@?0?g@4HA"@IMGREL
+; X64-NEXT: .long 2
+; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT: .long 3
+; X64-NEXT: .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT: .long 2
+
+
+; X86: .safeseh ___ehhandler$f
+; X86: .safeseh ___ehhandler$g
diff --git a/test/CodeGen/X86/win32-eh.ll b/test/CodeGen/X86/win32-eh.ll
index 3ee4723ce5f3..73c7b486a55a 100644
--- a/test/CodeGen/X86/win32-eh.ll
+++ b/test/CodeGen/X86/win32-eh.ll
@@ -15,18 +15,14 @@ define internal i32 @catchall_filt() {
define void @use_except_handler3() personality i32 (...)* @_except_handler3 {
entry:
invoke void @may_throw_or_crash()
- to label %cont unwind label %catchall
+ to label %cont unwind label %lpad
cont:
ret void
-catchall:
- %0 = landingpad { i8*, i32 }
- catch i8* bitcast (i32 ()* @catchall_filt to i8*)
- %1 = extractvalue { i8*, i32 } %0, 1
- %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @catchall_filt to i8*)) #4
- %matches = icmp eq i32 %1, %2
- br i1 %matches, label %cont, label %eh.resume
-eh.resume:
- resume { i8*, i32 } %0
+lpad:
+ %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+ %p = catchpad within %cs [i8* bitcast (i32 ()* @catchall_filt to i8*)]
+ catchret from %p to label %cont
}
; CHECK-LABEL: _use_except_handler3:
@@ -47,28 +43,25 @@ eh.resume:
; CHECK: movl -28(%ebp), %[[next:[^ ,]*]]
; CHECK: movl %[[next]], %fs:0
; CHECK: retl
+; CHECK: LBB1_2: # %catch{{$}}
; CHECK: .section .xdata,"dr"
; CHECK-LABEL: L__ehtable$use_except_handler3:
; CHECK-NEXT: .long -1
; CHECK-NEXT: .long _catchall_filt
-; CHECK-NEXT: .long Ltmp{{[0-9]+}}
+; CHECK-NEXT: .long LBB1_2
define void @use_except_handler4() personality i32 (...)* @_except_handler4 {
entry:
invoke void @may_throw_or_crash()
- to label %cont unwind label %catchall
+ to label %cont unwind label %lpad
cont:
ret void
-catchall:
- %0 = landingpad { i8*, i32 }
- catch i8* bitcast (i32 ()* @catchall_filt to i8*)
- %1 = extractvalue { i8*, i32 } %0, 1
- %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @catchall_filt to i8*)) #4
- %matches = icmp eq i32 %1, %2
- br i1 %matches, label %cont, label %eh.resume
-eh.resume:
- resume { i8*, i32 } %0
+lpad:
+ %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+ %p = catchpad within %cs [i8* bitcast (i32 ()* @catchall_filt to i8*)]
+ catchret from %p to label %cont
}
; CHECK-LABEL: _use_except_handler4:
@@ -89,6 +82,7 @@ eh.resume:
; CHECK: movl -28(%ebp), %[[next:[^ ,]*]]
; CHECK: movl %[[next]], %fs:0
; CHECK: retl
+; CHECK: LBB2_2: # %catch{{$}}
; CHECK: .section .xdata,"dr"
; CHECK-LABEL: L__ehtable$use_except_handler4:
@@ -98,20 +92,19 @@ eh.resume:
; CHECK-NEXT: .long 0
; CHECK-NEXT: .long -2
; CHECK-NEXT: .long _catchall_filt
-; CHECK-NEXT: .long Ltmp{{[0-9]+}}
+; CHECK-NEXT: .long LBB2_2
define void @use_CxxFrameHandler3() personality i32 (...)* @__CxxFrameHandler3 {
invoke void @may_throw_or_crash()
to label %cont unwind label %catchall
cont:
ret void
+
catchall:
- %ehvals = landingpad { i8*, i32 }
- catch i8* null
- %ehptr = extractvalue { i8*, i32 } %ehvals, 0
- call void @llvm.eh.begincatch(i8* %ehptr, i8* null)
- call void @llvm.eh.endcatch()
- br label %cont
+ %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+ %p = catchpad within %cs [i8* null, i32 64, i8* null]
+ catchret from %p to label %cont
}
; CHECK-LABEL: _use_CxxFrameHandler3:
diff --git a/test/CodeGen/X86/win32-pic-jumptable.ll b/test/CodeGen/X86/win32-pic-jumptable.ll
index cabd36ae395d..3a8ef2d0b916 100644
--- a/test/CodeGen/X86/win32-pic-jumptable.ll
+++ b/test/CodeGen/X86/win32-pic-jumptable.ll
@@ -1,16 +1,20 @@
; RUN: llc < %s -relocation-model=pic | FileCheck %s
; CHECK: calll L0$pb
+; CHECK-NEXT: Ltmp{{[0-9]+}}:
+; CHECK-NEXT: .cfi_adjust_cfa_offset 4
; CHECK-NEXT: L0$pb:
; CHECK-NEXT: popl %eax
+; CHECK-NEXT: Ltmp{{[0-9]+}}:
+; CHECK-NEXT: .cfi_adjust_cfa_offset -4
; CHECK-NEXT: addl LJTI0_0(,%ecx,4), %eax
; CHECK-NEXT: jmpl *%eax
; CHECK: LJTI0_0:
+; CHECK-NEXT: .long LBB0_2-L0$pb
+; CHECK-NEXT: .long LBB0_3-L0$pb
; CHECK-NEXT: .long LBB0_4-L0$pb
; CHECK-NEXT: .long LBB0_5-L0$pb
-; CHECK-NEXT: .long LBB0_6-L0$pb
-; CHECK-NEXT: .long LBB0_7-L0$pb
target triple = "i686--windows-itanium"
diff --git a/test/CodeGen/X86/win32-seh-catchpad-realign.ll b/test/CodeGen/X86/win32-seh-catchpad-realign.ll
new file mode 100644
index 000000000000..23aeea37c117
--- /dev/null
+++ b/test/CodeGen/X86/win32-seh-catchpad-realign.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s | FileCheck %s
+
+; The aligned alloca means that we have to realign the stack, which forces the
+; use of ESI to address local variables.
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686--windows-msvc"
+
+; Function Attrs: nounwind
+define void @realigned_try() personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+ %x = alloca [4 x i32], align 16
+ %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %x, i32 0, i32 0
+ invoke void @useit(i32* %arrayidx)
+ to label %__try.cont unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %__except.ret] unwind to caller
+
+__except.ret: ; preds = %catch.dispatch
+ %pad = catchpad within %cs1 [i8* bitcast (i32 ()* @"\01?filt$0@0@realigned_try@@" to i8*)]
+ catchret from %pad to label %__try.cont
+
+__try.cont: ; preds = %entry, %__except.ret
+ ret void
+}
+
+; Function Attrs: nounwind argmemonly
+
+; Function Attrs: nounwind
+define internal i32 @"\01?filt$0@0@realigned_try@@"() {
+entry:
+ ret i32 1
+}
+
+declare void @useit(i32*)
+
+declare i32 @_except_handler3(...)
+
+; CHECK-LABEL: _realigned_try:
+; Prologue
+; CHECK: pushl %ebp
+; CHECK: movl %esp, %ebp
+; CHECK: pushl %ebx
+; CHECK: pushl %edi
+; CHECK: pushl %esi
+; CHECK: andl $-16, %esp
+; CHECK: subl $64, %esp
+; CHECK: movl %esp, %esi
+; Spill EBP
+; CHECK: movl %ebp, 12(%esi)
+; Spill ESP
+; CHECK: movl %esp, 36(%esi)
+; The state is stored at ESI+56, the end of the node is ESI+60.
+; CHECK: movl $-1, 56(%esi)
+;
+; __try
+; CHECK: calll _useit
+;
+; Epilogue
+; CHECK: LBB0_2: # %__try.cont
+; CHECK: leal -12(%ebp), %esp
+; CHECK: popl %esi
+; CHECK: popl %edi
+; CHECK: popl %ebx
+; CHECK: popl %ebp
+; CHECK: retl
+;
+; CHECK: LBB0_1: # %__except.ret
+; Restore ESP
+; CHECK: movl -24(%ebp), %esp
+; Recompute ESI by subtracting 60 from the end of the registration node.
+; CHECK: leal -60(%ebp), %esi
+; Restore EBP
+; CHECK: movl 12(%esi), %ebp
+; Rejoin normal control flow
+; CHECK: jmp LBB0_2
diff --git a/test/CodeGen/X86/win32-seh-catchpad.ll b/test/CodeGen/X86/win32-seh-catchpad.ll
new file mode 100644
index 000000000000..224e96f8b8f0
--- /dev/null
+++ b/test/CodeGen/X86/win32-seh-catchpad.ll
@@ -0,0 +1,231 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+define void @try_except() #0 personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+ %__exception_code = alloca i32, align 4
+ call void (...) @llvm.localescape(i32* %__exception_code)
+ invoke void @f(i32 1) #3
+ to label %invoke.cont unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %__except.ret] unwind to caller
+
+__except.ret: ; preds = %catch.dispatch
+ %0 = catchpad within %cs1 [i8* bitcast (i32 ()* @try_except_filter_catchall to i8*)]
+ catchret from %0 to label %__except
+
+__except: ; preds = %__except.ret
+ call void @f(i32 2)
+ br label %__try.cont
+
+__try.cont: ; preds = %__except, %invoke.cont
+ call void @f(i32 3)
+ ret void
+
+invoke.cont: ; preds = %entry
+ br label %__try.cont
+}
+
+; CHECK-LABEL: _try_except:
+; Store state #0
+; CHECK: movl $0, -[[state:[0-9]+]](%ebp)
+; CHECK: movl $1, (%esp)
+; CHECK: calll _f
+; CHECK: movl $-1, -[[state]](%ebp)
+; CHECK: movl $3, (%esp)
+; CHECK: calll _f
+; CHECK: retl
+
+; __except
+; CHECK: movl $-1, -[[state]](%ebp)
+; CHECK: movl $2, (%esp)
+; CHECK: calll _f
+
+; CHECK: .section .xdata,"dr"
+; CHECK: L__ehtable$try_except:
+; CHECK: .long -1 # ToState
+; CHECK: .long _try_except_filter_catchall # Filter
+; CHECK: .long LBB0_1
+
+define internal i32 @try_except_filter_catchall() #0 {
+entry:
+ %0 = call i8* @llvm.frameaddress(i32 1)
+ %1 = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (void ()* @try_except to i8*), i8* %0)
+ %2 = call i8* @llvm.localrecover(i8* bitcast (void ()* @try_except to i8*), i8* %1, i32 0)
+ %__exception_code = bitcast i8* %2 to i32*
+ %3 = getelementptr inbounds i8, i8* %0, i32 -20
+ %4 = bitcast i8* %3 to i8**
+ %5 = load i8*, i8** %4, align 4
+ %6 = bitcast i8* %5 to { i32*, i8* }*
+ %7 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %6, i32 0, i32 0
+ %8 = load i32*, i32** %7, align 4
+ %9 = load i32, i32* %8, align 4
+ store i32 %9, i32* %__exception_code, align 4
+ ret i32 1
+}
+
+define void @nested_exceptions() #0 personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+ %__exception_code = alloca i32, align 4
+ call void (...) @llvm.localescape(i32* %__exception_code)
+ invoke void @crash() #3
+ to label %__try.cont unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %__except.ret] unwind label %catch.dispatch.11
+
+__except.ret: ; preds = %catch.dispatch
+ %0 = catchpad within %cs1 [i8* bitcast (i32 ()* @nested_exceptions_filter_catchall to i8*)]
+ catchret from %0 to label %__try.cont
+
+__try.cont: ; preds = %entry, %__except.ret
+ invoke void @crash() #3
+ to label %__try.cont.9 unwind label %catch.dispatch.5
+
+catch.dispatch.5: ; preds = %__try.cont
+ %cs2 = catchswitch within none [label %__except.ret.7] unwind label %catch.dispatch.11
+
+__except.ret.7: ; preds = %catch.dispatch.5
+ %1 = catchpad within %cs2 [i8* bitcast (i32 ()* @nested_exceptions_filter_catchall to i8*)]
+ catchret from %1 to label %__try.cont.9
+
+__try.cont.9: ; preds = %__try.cont, %__except.ret.7
+ invoke void @crash() #3
+ to label %__try.cont.15 unwind label %catch.dispatch.11
+
+catch.dispatch.11: ; preds = %catchendblock, %catchendblock.6, %__try.cont.9
+ %cs3 = catchswitch within none [label %__except.ret.13] unwind label %catch.dispatch.17
+
+__except.ret.13: ; preds = %catch.dispatch.11
+ %2 = catchpad within %cs3 [i8* bitcast (i32 ()* @nested_exceptions_filter_catchall to i8*)]
+ catchret from %2 to label %__try.cont.15
+
+__try.cont.15: ; preds = %__try.cont.9, %__except.ret.13
+ invoke void @crash() #3
+ to label %__try.cont.35 unwind label %catch.dispatch.17
+
+catch.dispatch.17: ; preds = %catchendblock.12, %__try.cont.15
+ %cs4 = catchswitch within none [label %__except.ret.19] unwind to caller
+
+__except.ret.19: ; preds = %catch.dispatch.17
+ %3 = catchpad within %cs4 [i8* bitcast (i32 ()* @nested_exceptions_filter_catchall to i8*)]
+ catchret from %3 to label %__except.20
+
+__except.20: ; preds = %__except.ret.19
+ invoke void @crash() #3
+ to label %__try.cont.27 unwind label %catch.dispatch.23
+
+catch.dispatch.23: ; preds = %__except.20
+ %cs5 = catchswitch within none [label %__except.ret.25] unwind to caller
+
+__except.ret.25: ; preds = %catch.dispatch.23
+ %4 = catchpad within %cs5 [i8* bitcast (i32 ()* @nested_exceptions_filter_catchall to i8*)]
+ catchret from %4 to label %__try.cont.27
+
+__try.cont.27: ; preds = %__except.20, %__except.ret.25
+ invoke void @crash() #3
+ to label %__try.cont.35 unwind label %catch.dispatch.30
+
+catch.dispatch.30: ; preds = %__try.cont.27
+ %cs6 = catchswitch within none [label %__except.ret.32] unwind to caller
+
+__except.ret.32: ; preds = %catch.dispatch.30
+ %5 = catchpad within %cs6 [i8* bitcast (i32 ()* @nested_exceptions_filter_catchall to i8*)]
+ catchret from %5 to label %__try.cont.35
+
+__try.cont.35: ; preds = %__try.cont.15, %__try.cont.27, %__except.ret.32
+ ret void
+}
+
+; This table is equivalent to the one produced by MSVC, even if it isn't in
+; quite the same order.
+
+; CHECK-LABEL: _nested_exceptions:
+; CHECK: L__ehtable$nested_exceptions:
+; CHECK: .long -1
+; CHECK: .long _nested_exceptions_filter_catchall
+; CHECK: .long LBB
+; CHECK: .long 0
+; CHECK: .long _nested_exceptions_filter_catchall
+; CHECK: .long LBB
+; CHECK: .long 1
+; CHECK: .long _nested_exceptions_filter_catchall
+; CHECK: .long LBB
+; CHECK: .long 1
+; CHECK: .long _nested_exceptions_filter_catchall
+; CHECK: .long LBB
+; CHECK: .long -1
+; CHECK: .long _nested_exceptions_filter_catchall
+; CHECK: .long LBB
+; CHECK: .long -1
+; CHECK: .long _nested_exceptions_filter_catchall
+; CHECK: .long LBB
+
+declare void @crash() #0
+
+define internal i32 @nested_exceptions_filter_catchall() #0 {
+entry:
+ %0 = call i8* @llvm.frameaddress(i32 1)
+ %1 = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (void ()* @nested_exceptions to i8*), i8* %0)
+ %2 = call i8* @llvm.localrecover(i8* bitcast (void ()* @nested_exceptions to i8*), i8* %1, i32 0)
+ %__exception_code3 = bitcast i8* %2 to i32*
+ %3 = getelementptr inbounds i8, i8* %0, i32 -20
+ %4 = bitcast i8* %3 to i8**
+ %5 = load i8*, i8** %4, align 4
+ %6 = bitcast i8* %5 to { i32*, i8* }*
+ %7 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %6, i32 0, i32 0
+ %8 = load i32*, i32** %7, align 4
+ %9 = load i32, i32* %8, align 4
+ store i32 %9, i32* %__exception_code3, align 4
+ ret i32 1
+}
+
+define void @code_in_catchpad() #0 personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+ invoke void @f(i32 1) #3
+ to label %__except unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %__except.ret] unwind to caller
+
+__except.ret: ; preds = %catch.dispatch
+ %0 = catchpad within %cs1 [i8* bitcast (i32 ()* @try_except_filter_catchall to i8*)]
+ call void @f(i32 2) [ "funclet"(token %0) ]
+ catchret from %0 to label %__except
+
+__except:
+ ret void
+}
+
+; CHECK-LABEL: _code_in_catchpad:
+; CHECK: # %__except.ret
+; CHECK-NEXT: movl -24(%ebp), %esp
+; CHECK-NEXT: addl $12, %ebp
+; CHECK-NEXT: movl $-1, -16(%ebp)
+; CHECK-NEXT: movl $2, (%esp)
+; CHECK-NEXT: calll _f
+
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.frameaddress(i32) #1
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.x86.seh.recoverfp(i8*, i8*) #1
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.localrecover(i8*, i8*, i32) #1
+
+declare void @f(i32) #0
+
+declare i32 @_except_handler3(...)
+
+; Function Attrs: nounwind
+declare void @llvm.localescape(...) #2
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+attributes #3 = { noinline }
diff --git a/test/CodeGen/X86/win32-seh-nested-finally.ll b/test/CodeGen/X86/win32-seh-nested-finally.ll
new file mode 100644
index 000000000000..c283a35d70cf
--- /dev/null
+++ b/test/CodeGen/X86/win32-seh-nested-finally.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+define void @nested_finally() #0 personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+ invoke void @f(i32 1) #3
+ to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ invoke void @f(i32 2) #3
+ to label %invoke.cont.1 unwind label %ehcleanup.3
+
+invoke.cont.1: ; preds = %invoke.cont
+ call void @f(i32 3) #3
+ ret void
+
+ehcleanup: ; preds = %entry
+ %0 = cleanuppad within none []
+ invoke void @f(i32 2) #3 [ "funclet"(token %0) ]
+ to label %invoke.cont.2 unwind label %ehcleanup.3
+
+invoke.cont.2: ; preds = %ehcleanup
+ cleanupret from %0 unwind label %ehcleanup.3
+
+ehcleanup.3: ; preds = %invoke.cont.2, %ehcleanup.end, %invoke.cont
+ %1 = cleanuppad within none []
+ call void @f(i32 3) #3 [ "funclet"(token %1) ]
+ cleanupret from %1 unwind to caller
+}
+
+declare void @f(i32) #0
+
+declare i32 @_except_handler3(...)
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { noinline }
+
+; CHECK: _nested_finally:
+; CHECK: movl $-1, -[[state:[0-9]+]](%ebp)
+; CHECK: movl {{.*}}, %fs:0
+; CHECK: movl $1, -[[state]](%ebp)
+; CHECK: movl $1, (%esp)
+; CHECK: calll _f
+; CHECK: movl $0, -[[state]](%ebp)
+; CHECK: movl $2, (%esp)
+; CHECK: calll _f
+; CHECK: movl $-1, -[[state]](%ebp)
+; CHECK: movl $3, (%esp)
+; CHECK: calll _f
+; CHECK: retl
+
+; CHECK: LBB0_[[inner:[0-9]+]]: # %ehcleanup
+; CHECK: pushl %ebp
+; CHECK: addl $12, %ebp
+; CHECK: movl $0, -[[state]](%ebp)
+; CHECK: movl $2, (%esp)
+; CHECK: calll _f
+; CHECK: popl %ebp
+; CHECK: retl
+
+; CHECK: LBB0_[[outer:[0-9]+]]: # %ehcleanup.3
+; CHECK: pushl %ebp
+; CHECK: addl $12, %ebp
+; CHECK: movl $-1, -[[state]](%ebp)
+; CHECK: movl $3, (%esp)
+; CHECK: calll _f
+; CHECK: popl %ebp
+; CHECK: retl
+
+; CHECK: L__ehtable$nested_finally:
+; CHECK: .long -1 # ToState
+; CHECK: .long 0 # Null
+; CHECK: .long "?dtor$[[outer]]@?0?nested_finally@4HA" # FinallyFunclet
+; CHECK: .long 0 # ToState
+; CHECK: .long 0 # Null
+; CHECK: .long "?dtor$[[inner]]@?0?nested_finally@4HA" # FinallyFunclet
diff --git a/test/CodeGen/X86/win32-spill-xmm.ll b/test/CodeGen/X86/win32-spill-xmm.ll
new file mode 100644
index 000000000000..0db97cfe20f0
--- /dev/null
+++ b/test/CodeGen/X86/win32-spill-xmm.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mcpu=generic -mtriple=i686-pc-windows-msvc -mattr=+sse < %s | FileCheck %s
+
+; Check proper alignment of spilled vector
+
+; CHECK-LABEL: spill_ok
+; CHECK: subl $32, %esp
+; CHECK: movaps %xmm3, (%esp)
+; CHECK: movl $0, 16(%esp)
+; CHECK: calll _bar
+define void @spill_ok(i32, <16 x float> *) {
+entry:
+ %2 = alloca i32, i32 %0
+ %3 = load <16 x float>, <16 x float> * %1, align 64
+ tail call void @bar(<16 x float> %3, i32 0) nounwind
+ ret void
+}
+
+declare void @bar(<16 x float> %a, i32 %b)
+
+; Check that proper alignment of spilled vector does not affect vargs
+
+; CHECK-LABEL: vargs_not_affected
+; CHECK: leal 28(%ebp), %eax
+define i32 @vargs_not_affected(<4 x float> %v, i8* %f, ...) {
+entry:
+ %ap = alloca i8*, align 4
+ %0 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %0)
+ %argp.cur = load i8*, i8** %ap, align 4
+ %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4
+ store i8* %argp.next, i8** %ap, align 4
+ %1 = bitcast i8* %argp.cur to i32*
+ %2 = load i32, i32* %1, align 4
+ call void @llvm.va_end(i8* %0)
+ ret i32 %2
+}
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
diff --git a/test/CodeGen/X86/win64_frame.ll b/test/CodeGen/X86/win64_frame.ll
index 477b3144d9e7..27d78dbe5479 100644
--- a/test/CodeGen/X86/win64_frame.ll
+++ b/test/CodeGen/X86/win64_frame.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s --check-prefix=CHECK --check-prefix=PUSHF
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+sahf | FileCheck %s --check-prefix=SAHF
define i32 @f1(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) "no-frame-pointer-elim"="true" {
; CHECK-LABEL: f1:
@@ -118,6 +119,73 @@ define i32 @f8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="
; CHECK: leaq 224(%rbp), %rsp
}
+define i64 @f9() {
+entry:
+ ; CHECK-LABEL: f9:
+ ; CHECK: pushq %rbp
+ ; CHECK: .seh_pushreg 5
+ ; CHECK-NEXT: movq %rsp, %rbp
+ ; CHECK: .seh_setframe 5, 0
+ ; CHECK: .seh_endprologue
+
+ %call = call i64 asm sideeffect "pushf\0A\09popq $0\0A", "=r,~{dirflag},~{fpsr},~{flags}"()
+ ; CHECK-NEXT: #APP
+ ; CHECK-NEXT: pushfq
+ ; CHECK-NEXT: popq %rax
+ ; CHECK: #NO_APP
+
+ ret i64 %call
+ ; CHECK-NEXT: popq %rbp
+ ; CHECK-NEXT: retq
+}
+
+declare i64 @dummy()
+
+define i64 @f10(i64* %foo, i64 %bar, i64 %baz) {
+ ; CHECK-LABEL: f10:
+ ; CHECK: pushq %rbp
+ ; CHECK: .seh_pushreg 5
+ ; CHECK: pushq %rsi
+ ; CHECK: .seh_pushreg 6
+ ; CHECK: pushq %rdi
+ ; CHECK: .seh_pushreg 7
+ ; CHECK: subq $32, %rsp
+ ; CHECK: .seh_stackalloc 32
+ ; CHECK: leaq 32(%rsp), %rbp
+ ; CHECK: .seh_setframe 5, 32
+ ; CHECK: .seh_endprologue
+
+ %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
+ ; PUSHF: lock cmpxchgq
+ ; PUSHF-NEXT: pushfq
+ ; PUSHF-NEXT: popq %[[REG:.*]]
+ ; SAHF: lock cmpxchgq
+ ; SAHF-NEXT: seto %al
+ ; SAHF-NEXT: lahf
+
+ %v = extractvalue { i64, i1 } %cx, 0
+ %p = extractvalue { i64, i1 } %cx, 1
+
+ %call = call i64 @dummy()
+ ; PUSHF: callq dummy
+ ; PUSHF-NEXT: pushq %[[REG]]
+ ; PUSHF-NEXT: popfq
+ ; SAHF: callq dummy
+ ; SAHF-NEXT: pushq
+ ; SAHF: addb $127, %al
+ ; SAHF-NEXT: sahf
+ ; SAHF-NEXT: popq
+
+ %sel = select i1 %p, i64 %call, i64 %bar
+ ; CHECK-NEXT: cmovneq
+
+ ret i64 %sel
+ ; CHECK-NEXT: addq $32, %rsp
+ ; CHECK-NEXT: popq %rdi
+ ; CHECK-NEXT: popq %rsi
+ ; CHECK-NEXT: popq %rbp
+}
+
declare i8* @llvm.returnaddress(i32) nounwind readnone
declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/X86/win64_sibcall.ll b/test/CodeGen/X86/win64_sibcall.ll
new file mode 100644
index 000000000000..4001f638c2ab
--- /dev/null
+++ b/test/CodeGen/X86/win64_sibcall.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32-coreclr | FileCheck %s -check-prefix=WIN_X64
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=LINUX
+
+%Object = type <{ [0 x i64*]* }>
+
+define void @C1(%Object addrspace(1)* %param0) gc "coreclr" {
+entry:
+
+; WIN_X64: # BB#0:
+; WIN_X64: pushq %rax
+; LINUX: # BB#0: # %entry
+; LINUX: movq $0, -8(%rsp)
+
+ %this = alloca %Object addrspace(1)*
+ store %Object addrspace(1)* null, %Object addrspace(1)** %this
+ store %Object addrspace(1)* %param0, %Object addrspace(1)** %this
+ br label %0
+
+; <label>:0 ; preds = %entry
+ %1 = load %Object addrspace(1)*, %Object addrspace(1)** %this, align 8
+
+; WIN_X64: xorl %r8d, %r8d
+; WIN_X64: popq %rax
+; WIN_X64: rex64 jmp C2 # TAILCALL
+; LINUX: xorl %edx, %edx
+; LINUX: jmp C2 # TAILCALL
+
+ tail call void @C2(%Object addrspace(1)* %1, i32 0, %Object addrspace(1)* null)
+ ret void
+}
+
+declare void @C2(%Object addrspace(1)*, i32, %Object addrspace(1)*)
+
+; Function Attrs: nounwind
+declare void @llvm.localescape(...) #0
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/X86/win_coreclr_chkstk.ll b/test/CodeGen/X86/win_coreclr_chkstk.ll
new file mode 100644
index 000000000000..c9a5fc2b3288
--- /dev/null
+++ b/test/CodeGen/X86/win_coreclr_chkstk.ll
@@ -0,0 +1,143 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32-coreclr | FileCheck %s -check-prefix=WIN_X64
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=LINUX
+
+; By default, windows CoreCLR requires an inline prologue stack expansion check
+; if more than 4096 bytes are allocated on the stack.
+
+; Prolog stack allocation >= 4096 bytes will require the probe sequence
+define i32 @main4k() nounwind {
+entry:
+; WIN_X64-LABEL:main4k:
+; WIN_X64: # BB#0:
+; WIN_X64: movl $4096, %eax
+; WIN_X64: movq %rcx, 8(%rsp)
+; WIN_X64: movq %rdx, 16(%rsp)
+; WIN_X64: xorq %rcx, %rcx
+; WIN_X64: movq %rsp, %rdx
+; WIN_X64: subq %rax, %rdx
+; WIN_X64: cmovbq %rcx, %rdx
+; WIN_X64: movq %gs:16, %rcx
+; WIN_X64: cmpq %rcx, %rdx
+; WIN_X64: jae .LBB0_3
+; WIN_X64:# BB#1:
+; WIN_X64: andq $-4096, %rdx
+; WIN_X64:.LBB0_2:
+; WIN_X64: leaq -4096(%rcx), %rcx
+; WIN_X64: movb $0, (%rcx)
+; WIN_X64: cmpq %rcx, %rdx
+; WIN_X64: jne .LBB0_2
+; WIN_X64:.LBB0_3:
+; WIN_X64: movq 8(%rsp), %rcx
+; WIN_X64: movq 16(%rsp), %rdx
+; WIN_X64: subq %rax, %rsp
+; WIN_X64: xorl %eax, %eax
+; WIN_X64: addq $4096, %rsp
+; WIN_X64: retq
+; LINUX-LABEL:main4k:
+; LINUX-NOT: movq %gs:16, %rcx
+; LINUX: retq
+ %a = alloca [4096 x i8]
+ ret i32 0
+}
+
+; Prolog stack allocation >= 4096 bytes will require the probe sequence
+; Case with frame pointer
+define i32 @main4k_frame() nounwind "no-frame-pointer-elim"="true" {
+entry:
+; WIN_X64-LABEL:main4k_frame:
+; WIN_X64: movq %rcx, 16(%rsp)
+; WIN_X64: movq %gs:16, %rcx
+; LINUX-LABEL:main4k_frame:
+; LINUX-NOT: movq %gs:16, %rcx
+; LINUX: retq
+ %a = alloca [4096 x i8]
+ ret i32 0
+}
+
+; Prolog stack allocation >= 4096 bytes will require the probe sequence
+; Case with INT args
+define i32 @main4k_intargs(i32 %x, i32 %y) nounwind {
+entry:
+; WIN_X64: movq %rcx, 8(%rsp)
+; WIN_X64: movq %gs:16, %rcx
+; LINUX-NOT: movq %gs:16, %rcx
+; LINUX: retq
+ %a = alloca [4096 x i8]
+ %t = add i32 %x, %y
+ ret i32 %t
+}
+
+; Prolog stack allocation >= 4096 bytes will require the probe sequence
+; Case with FP regs
+define i32 @main4k_fpargs(double %x, double %y) nounwind {
+entry:
+; WIN_X64: movq %rcx, 8(%rsp)
+; WIN_X64: movq %gs:16, %rcx
+; LINUX-NOT: movq %gs:16, %rcx
+; LINUX: retq
+ %a = alloca [4096 x i8]
+ ret i32 0
+}
+
+; Prolog stack allocation >= 4096 bytes will require the probe sequence
+; Case with mixed regs
+define i32 @main4k_mixargs(double %x, i32 %y) nounwind {
+entry:
+; WIN_X64: movq %gs:16, %rcx
+; LINUX-NOT: movq %gs:16, %rcx
+; LINUX: retq
+ %a = alloca [4096 x i8]
+ ret i32 %y
+}
+
+; Make sure we don't emit the probe for a smaller prolog stack allocation.
+define i32 @main128() nounwind {
+entry:
+; WIN_X64-NOT: movq %gs:16, %rcx
+; WIN_X64: retq
+; LINUX-NOT: movq %gs:16, %rcx
+; LINUX: retq
+ %a = alloca [128 x i8]
+ ret i32 0
+}
+
+; Make sure we don't emit the probe sequence if not on windows even if the
+; caller has the Win64 calling convention.
+define x86_64_win64cc i32 @main4k_win64() nounwind {
+entry:
+; WIN_X64: movq %gs:16, %rcx
+; LINUX-NOT: movq %gs:16, %rcx
+; LINUX: retq
+ %a = alloca [4096 x i8]
+ ret i32 0
+}
+
+declare i32 @bar(i8*) nounwind
+
+; Within-body inline probe expansion
+define x86_64_win64cc i32 @main4k_alloca(i64 %n) nounwind {
+entry:
+; WIN_X64: callq bar
+; WIN_X64: movq %gs:16, [[R:%r.*]]
+; WIN_X64: callq bar
+; LINUX: callq bar
+; LINUX-NOT: movq %gs:16, [[R:%r.*]]
+; LINUX: callq bar
+ %a = alloca i8, i64 1024
+ %ra = call i32 @bar(i8* %a) nounwind
+ %b = alloca i8, i64 %n
+ %rb = call i32 @bar(i8* %b) nounwind
+ %r = add i32 %ra, %rb
+ ret i32 %r
+}
+
+; Influence of stack-probe-size attribute
+; Note this is not exposed in coreclr
+define i32 @test_probe_size() "stack-probe-size"="8192" nounwind {
+; WIN_X64-NOT: movq %gs:16, %rcx
+; WIN_X64: retq
+; LINUX-NOT: movq %gs:16, %rcx
+; LINUX: retq
+ %a = alloca [4096 x i8]
+ ret i32 0
+}
diff --git a/test/CodeGen/X86/win_eh_prepare.ll b/test/CodeGen/X86/win_eh_prepare.ll
deleted file mode 100644
index 3e3f9af05822..000000000000
--- a/test/CodeGen/X86/win_eh_prepare.ll
+++ /dev/null
@@ -1,82 +0,0 @@
-; RUN: opt -S -winehprepare -dwarfehprepare -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s
-
-; FIXME: Add and test outlining here.
-
-declare void @maybe_throw()
-
-@_ZTIi = external constant i8*
-@g = external global i32
-
-declare i32 @__C_specific_handler(...)
-declare i32 @__gxx_personality_seh0(...)
-declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind
-
-define i32 @use_seh() personality i32 (...)* @__C_specific_handler {
-entry:
- invoke void @maybe_throw()
- to label %cont unwind label %lpad
-
-cont:
- ret i32 0
-
-lpad:
- %ehvals = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (i32 (i8*, i8*)* @filt_g to i8*)
- %ehsel = extractvalue { i8*, i32 } %ehvals, 1
- %filt_g_sel = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @filt_g to i8*))
- %matches = icmp eq i32 %ehsel, %filt_g_sel
- br i1 %matches, label %ret1, label %eh.resume
-
-ret1:
- ret i32 1
-
-eh.resume:
- resume { i8*, i32 } %ehvals
-}
-
-define internal i32 @filt_g(i8*, i8*) {
- %g = load i32, i32* @g
- ret i32 %g
-}
-
-; CHECK-LABEL: define i32 @use_seh()
-; CHECK: invoke void @maybe_throw()
-; CHECK-NEXT: to label %cont unwind label %lpad
-; CHECK: landingpad
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: catch
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions({{.*}})
-
-
-; A MinGW64-ish EH style. It could happen if a binary uses both MSVC CRT and
-; mingw CRT and is linked with LTO.
-define i32 @use_gcc() personality i32 (...)* @__gxx_personality_seh0 {
-entry:
- invoke void @maybe_throw()
- to label %cont unwind label %lpad
-
-cont:
- ret i32 0
-
-lpad:
- %ehvals = landingpad { i8*, i32 }
- cleanup
- catch i8* bitcast (i8** @_ZTIi to i8*)
- %ehsel = extractvalue { i8*, i32 } %ehvals, 1
- %filt_g_sel = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @filt_g to i8*))
- %matches = icmp eq i32 %ehsel, %filt_g_sel
- br i1 %matches, label %ret1, label %eh.resume
-
-ret1:
- ret i32 1
-
-eh.resume:
- resume { i8*, i32 } %ehvals
-}
-
-; CHECK-LABEL: define i32 @use_gcc()
-; CHECK: invoke void @maybe_throw()
-; CHECK-NEXT: to label %cont unwind label %lpad
-; CHECK: eh.resume:
-; CHECK: call void @_Unwind_Resume(i8* %exn.obj)
diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll
deleted file mode 100644
index dfa6e3aa76bd..000000000000
--- a/test/CodeGen/X86/win_ftol2.ll
+++ /dev/null
@@ -1,166 +0,0 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=generic | FileCheck %s -check-prefix=FTOL
-; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
-; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
-; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
-; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
-; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=COMPILERRT
-; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL_2
-
-; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This
-; function has a nonstandard calling convention: the input value is expected on
-; the x87 stack instead of the callstack. The input value is popped by the
-; callee. Mingw32 uses normal cdecl compiler-rt functions.
-
-define i64 @double_ui64(double %x) nounwind {
-entry:
-; COMPILERRT: @double_ui64
-; COMPILERRT-NOT: calll __ftol2
-; FTOL: @double_ui64
-; FTOL: fldl
-; FTOL: calll __ftol2
-; FTOL-NOT: fstp
- %0 = fptoui double %x to i64
- ret i64 %0
-}
-
-define i64 @float_ui64(float %x) nounwind {
-entry:
-; COMPILERRT: @float_ui64
-; COMPILERRT-NOT: calll __ftol2
-; FTOL: @float_ui64
-; FTOL: flds
-; FTOL: calll __ftol2
-; FTOL-NOT: fstp
- %0 = fptoui float %x to i64
- ret i64 %0
-}
-
-define i64 @double_ui64_2(double %x, double %y, double %z) nounwind {
-; COMPILERRT: @double_ui64_2
-; FTOL: @double_ui64_2
-; FTOL_2: @double_ui64_2
-;; stack is empty
-; FTOL_2: fldl
-;; stack is %z
-; FTOL_2: fldl
-;; stack is %y %z
-; FTOL_2: fldl
-;; stack is %x %y %z
-; FTOL_2: fdiv %st(0), %st(1)
-;; stack is %x %1 %z
-; FTOL_2: fsubp %st(2)
-;; stack is %1 %2
-; FTOL_2: fxch
-; FTOL_2-NOT: fld
-; FTOL_2-NOT: fst
-;; stack is %2 %1
-; FTOL_2: calll __ftol2
-; FTOL_2-NOT: fxch
-; FTOL_2-NOT: fld
-; FTOL_2-NOT: fst
-; FTOL_2: calll __ftol2
-;; stack is empty
-
- %1 = fdiv double %x, %y
- %2 = fsub double %x, %z
- %3 = fptoui double %2 to i64
- %4 = fptoui double %1 to i64
- %5 = sub i64 %4, %3
- ret i64 %5
-}
-
-define i64 @double_ui64_3(double %x, double %y, double %z) nounwind {
-; COMPILERRT: @double_ui64_3
-; FTOL: @double_ui64_3
-; FTOL_2: @double_ui64_3
-;; stack is empty
-; FTOL_2: fldl
-;; stack is %z
-; FTOL_2: fldl
-;; stack is %y %z
-; FTOL_2: fldl
-;; stack is %x %y %z
-; FTOL_2: fdiv %st(0), %st(1)
-;; stack is %x %1 %z
-; FTOL_2: fsubp %st(2)
-;; stack is %1 %2
-; FTOL_2-NOT: fxch
-; FTOL_2-NOT: fld
-; FTOL_2-NOT: fst
-;; stack is %1 %2 (still)
-; FTOL_2: calll __ftol2
-; FTOL_2-NOT: fxch
-; FTOL_2-NOT: fld
-; FTOL_2-NOT: fst
-; FTOL_2: calll __ftol2
-;; stack is empty
-
- %1 = fdiv double %x, %y
- %2 = fsub double %x, %z
- %3 = fptoui double %1 to i64
- %4 = fptoui double %2 to i64
- %5 = sub i64 %4, %3
- ret i64 %5
-}
-
-define {double, i64} @double_ui64_4(double %x, double %y) nounwind {
-; COMPILERRT: @double_ui64_4
-; FTOL: @double_ui64_4
-; FTOL_2: @double_ui64_4
-;; stack is empty
-; FTOL_2: fldl
-;; stack is %y
-; FTOL_2: fldl
-;; stack is %x %y
-; FTOL_2: fxch
-;; stack is %y %x
-; FTOL_2: calll __ftol2
-;; stack is %x
-; FTOL_2: fld %st(0)
-;; stack is %x %x
-; FTOL_2: calll __ftol2
-;; stack is %x
-
- %1 = fptoui double %y to i64
- %2 = fptoui double %x to i64
- %3 = sub i64 %2, %1
- %4 = insertvalue {double, i64} undef, double %x, 0
- %5 = insertvalue {double, i64} %4, i64 %3, 1
- ret {double, i64} %5
-}
-
-define i32 @double_ui32_5(double %X) {
-; FTOL: @double_ui32_5
-; FTOL: calll __ftol2
- %tmp.1 = fptoui double %X to i32
- ret i32 %tmp.1
-}
-
-define i64 @double_ui64_5(double %X) {
-; FTOL: @double_ui64_5
-; FTOL: calll __ftol2
- %tmp.1 = fptoui double %X to i64
- ret i64 %tmp.1
-}
-
-define double @pr23957_32(double %A) {
-; FTOL-LABEL: @pr23957_32
-; FTOL: fldl
-; FTOL-NEXT: fld %st(0)
-; FTOL-NEXT: calll __ftol2
- %B = fptoui double %A to i32
- %C = uitofp i32 %B to double
- %D = fsub double %C, %A
- ret double %D
-}
-
-define double @pr23957_64(double %A) {
-; FTOL-LABEL: @pr23957_64
-; FTOL: fldl
-; FTOL-NEXT: fld %st(0)
-; FTOL-NEXT: calll __ftol2
- %B = fptoui double %A to i64
- %C = uitofp i64 %B to double
- %D = fsub double %C, %A
- ret double %D
-}
diff --git a/test/CodeGen/X86/wineh-coreclr.ll b/test/CodeGen/X86/wineh-coreclr.ll
new file mode 100644
index 000000000000..b61876827cac
--- /dev/null
+++ b/test/CodeGen/X86/wineh-coreclr.ll
@@ -0,0 +1,267 @@
+; RUN: llc -mtriple=x86_64-pc-windows-coreclr -verify-machineinstrs < %s | FileCheck %s
+
+declare void @ProcessCLRException()
+declare void @f(i32)
+declare void @g(i8 addrspace(1)*)
+declare i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token)
+
+; Simplified IR for pseudo-C# like the following:
+; void test1() {
+; try {
+; f(1);
+; try {
+; f(2);
+; } catch (type1) {
+; f(3);
+; } catch (type2) {
+; f(4);
+; try {
+; f(5);
+; } fault {
+; f(6);
+; }
+; }
+; } finally {
+; f(7);
+; }
+; f(8);
+; }
+
+; CHECK-LABEL: test1: # @test1
+; CHECK-NEXT: [[L_begin:.*func_begin.*]]:
+define void @test1() personality i8* bitcast (void ()* @ProcessCLRException to i8*) {
+entry:
+; CHECK: # %entry
+; CHECK: leaq [[FPOffset:[0-9]+]](%rsp), %rbp
+; CHECK: .seh_endprologue
+; CHECK: movq %rsp, [[PSPSymOffset:[0-9]+]](%rsp)
+; CHECK: [[L_before_f1:.+]]:
+; CHECK-NEXT: movl $1, %ecx
+; CHECK-NEXT: callq f
+; CHECK-NEXT: [[L_after_f1:.+]]:
+ invoke void @f(i32 1)
+ to label %inner_try unwind label %finally.pad
+inner_try:
+; CHECK: # %inner_try
+; CHECK: [[L_before_f2:.+]]:
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq f
+; CHECK-NEXT: [[L_after_f2:.+]]:
+ invoke void @f(i32 2)
+ to label %finally.clone unwind label %catch1.pad
+catch1.pad:
+ %cs1 = catchswitch within none [label %catch1.body, label %catch2.body] unwind label %finally.pad
+catch1.body:
+ %catch1 = catchpad within %cs1 [i32 1]
+; CHECK: .seh_proc [[L_catch1:[^ ]+]]
+; CHECK: .seh_stackalloc [[FuncletFrameSize:[0-9]+]]
+; ^ all funclets use the same frame size
+; CHECK: movq [[PSPSymOffset]](%rcx), %rcx
+; ^ establisher frame pointer passed in rcx
+; CHECK: movq %rcx, [[PSPSymOffset]](%rsp)
+; CHECK: leaq [[FPOffset]](%rcx), %rbp
+; CHECK: .seh_endprologue
+; CHECK: movq %rdx, %rcx
+; ^ exception pointer passed in rdx
+; CHECK-NEXT: callq g
+ %exn1 = call i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token %catch1)
+ call void @g(i8 addrspace(1)* %exn1) [ "funclet"(token %catch1) ]
+; CHECK: [[L_before_f3:.+]]:
+; CHECK-NEXT: movl $3, %ecx
+; CHECK-NEXT: callq f
+; CHECK-NEXT: [[L_after_f3:.+]]:
+ invoke void @f(i32 3) [ "funclet"(token %catch1) ]
+ to label %catch1.ret unwind label %finally.pad
+catch1.ret:
+ catchret from %catch1 to label %finally.clone
+catch2.body:
+ %catch2 = catchpad within %cs1 [i32 2]
+; CHECK: .seh_proc [[L_catch2:[^ ]+]]
+; CHECK: .seh_stackalloc [[FuncletFrameSize:[0-9]+]]
+; ^ all funclets use the same frame size
+; CHECK: movq [[PSPSymOffset]](%rcx), %rcx
+; ^ establisher frame pointer passed in rcx
+; CHECK: movq %rcx, [[PSPSymOffset]](%rsp)
+; CHECK: leaq [[FPOffset]](%rcx), %rbp
+; CHECK: .seh_endprologue
+; CHECK: movq %rdx, %rcx
+; ^ exception pointer passed in rdx
+; CHECK-NEXT: callq g
+ %exn2 = call i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token %catch2)
+ call void @g(i8 addrspace(1)* %exn2) [ "funclet"(token %catch2) ]
+; CHECK: [[L_before_f4:.+]]:
+; CHECK-NEXT: movl $4, %ecx
+; CHECK-NEXT: callq f
+; CHECK-NEXT: [[L_after_f4:.+]]:
+ invoke void @f(i32 4) [ "funclet"(token %catch2) ]
+ to label %try_in_catch unwind label %finally.pad
+try_in_catch:
+; CHECK: # %try_in_catch
+; CHECK: [[L_before_f5:.+]]:
+; CHECK-NEXT: movl $5, %ecx
+; CHECK-NEXT: callq f
+; CHECK-NEXT: [[L_after_f5:.+]]:
+ invoke void @f(i32 5) [ "funclet"(token %catch2) ]
+ to label %catch2.ret unwind label %fault.pad
+fault.pad:
+; CHECK: .seh_proc [[L_fault:[^ ]+]]
+ %fault = cleanuppad within none [i32 undef]
+; CHECK: .seh_stackalloc [[FuncletFrameSize:[0-9]+]]
+; ^ all funclets use the same frame size
+; CHECK: movq [[PSPSymOffset]](%rcx), %rcx
+; ^ establisher frame pointer passed in rcx
+; CHECK: movq %rcx, [[PSPSymOffset]](%rsp)
+; CHECK: leaq [[FPOffset]](%rcx), %rbp
+; CHECK: .seh_endprologue
+; CHECK: [[L_before_f6:.+]]:
+; CHECK-NEXT: movl $6, %ecx
+; CHECK-NEXT: callq f
+; CHECK-NEXT: [[L_after_f6:.+]]:
+ invoke void @f(i32 6) [ "funclet"(token %fault) ]
+ to label %fault.ret unwind label %finally.pad
+fault.ret:
+ cleanupret from %fault unwind label %finally.pad
+catch2.ret:
+ catchret from %catch2 to label %finally.clone
+finally.clone:
+ call void @f(i32 7)
+ br label %tail
+finally.pad:
+; CHECK: .seh_proc [[L_finally:[^ ]+]]
+ %finally = cleanuppad within none []
+; CHECK: .seh_stackalloc [[FuncletFrameSize:[0-9]+]]
+; ^ all funclets use the same frame size
+; CHECK: movq [[PSPSymOffset]](%rcx), %rcx
+; ^ establisher frame pointer passed in rcx
+; CHECK: movq %rcx, [[PSPSymOffset]](%rsp)
+; CHECK: leaq [[FPOffset]](%rcx), %rbp
+; CHECK: .seh_endprologue
+; CHECK-NEXT: movl $7, %ecx
+; CHECK-NEXT: callq f
+ call void @f(i32 7) [ "funclet"(token %finally) ]
+ cleanupret from %finally unwind to caller
+tail:
+ call void @f(i32 8)
+ ret void
+; CHECK: [[L_end:.*func_end.*]]:
+}
+
+; FIXME: Verify that the new clauses are correct and re-enable these checks.
+
+; Now check for EH table in xdata (following standard xdata)
+; CHECKX-LABEL: .section .xdata
+; standard xdata comes here
+; CHECKX: .long 4{{$}}
+; ^ number of funclets
+; CHECKX-NEXT: .long [[L_catch1]]-[[L_begin]]
+; ^ offset from L_begin to start of 1st funclet
+; CHECKX-NEXT: .long [[L_catch2]]-[[L_begin]]
+; ^ offset from L_begin to start of 2nd funclet
+; CHECKX-NEXT: .long [[L_fault]]-[[L_begin]]
+; ^ offset from L_begin to start of 3rd funclet
+; CHECKX-NEXT: .long [[L_finally]]-[[L_begin]]
+; ^ offset from L_begin to start of 4th funclet
+; CHECKX-NEXT: .long [[L_end]]-[[L_begin]]
+; ^ offset from L_begin to end of last funclet
+; CHECKX-NEXT: .long 7
+; ^ number of EH clauses
+; Clause 1: call f(2) is guarded by catch1
+; CHECKX-NEXT: .long 0
+; ^ flags (0 => catch handler)
+; CHECKX-NEXT: .long ([[L_before_f2]]-[[L_begin]])+1
+; ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f2]]-[[L_begin]])+1
+; ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_catch1]]-[[L_begin]]
+; ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_catch2]]-[[L_begin]]
+; ^ offset of end of handler
+; CHECKX-NEXT: .long 1
+; ^ type token of catch (from catchpad)
+; Clause 2: call f(2) is also guarded by catch2
+; CHECKX-NEXT: .long 0
+; ^ flags (0 => catch handler)
+; CHECKX-NEXT: .long ([[L_before_f2]]-[[L_begin]])+1
+; ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f2]]-[[L_begin]])+1
+; ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_catch2]]-[[L_begin]]
+; ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_fault]]-[[L_begin]]
+; ^ offset of end of handler
+; CHECKX-NEXT: .long 2
+; ^ type token of catch (from catchpad)
+; Clause 3: calls f(1) and f(2) are guarded by finally
+; CHECKX-NEXT: .long 2
+; ^ flags (2 => finally handler)
+; CHECKX-NEXT: .long ([[L_before_f1]]-[[L_begin]])+1
+; ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f2]]-[[L_begin]])+1
+; ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_finally]]-[[L_begin]]
+; ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_end]]-[[L_begin]]
+; ^ offset of end of handler
+; CHECKX-NEXT: .long 0
+; ^ type token slot (null for finally)
+; Clause 4: call f(3) is guarded by finally
+; This is a "duplicate" because the protected range (f(3))
+; is in funclet catch1 but the finally's immediate parent
+; is the main function, not that funclet.
+; CHECKX-NEXT: .long 10
+; ^ flags (2 => finally handler | 8 => duplicate)
+; CHECKX-NEXT: .long ([[L_before_f3]]-[[L_begin]])+1
+; ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f3]]-[[L_begin]])+1
+; ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_finally]]-[[L_begin]]
+; ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_end]]-[[L_begin]]
+; ^ offset of end of handler
+; CHECKX-NEXT: .long 0
+; ^ type token slot (null for finally)
+; Clause 5: call f(5) is guarded by fault
+; CHECKX-NEXT: .long 4
+; ^ flags (4 => fault handler)
+; CHECKX-NEXT: .long ([[L_before_f5]]-[[L_begin]])+1
+; ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f5]]-[[L_begin]])+1
+; ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_fault]]-[[L_begin]]
+; ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_finally]]-[[L_begin]]
+; ^ offset of end of handler
+; CHECKX-NEXT: .long 0
+; ^ type token slot (null for fault)
+; Clause 6: calls f(4) and f(5) are guarded by finally
+; This is a "duplicate" because the protected range (f(4)-f(5))
+; is in funclet catch2 but the finally's immediate parent
+; is the main function, not that funclet.
+; CHECKX-NEXT: .long 10
+; ^ flags (2 => finally handler | 8 => duplicate)
+; CHECKX-NEXT: .long ([[L_before_f4]]-[[L_begin]])+1
+; ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f5]]-[[L_begin]])+1
+; ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_finally]]-[[L_begin]]
+; ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_end]]-[[L_begin]]
+; ^ offset of end of handler
+; CHECKX-NEXT: .long 0
+; ^ type token slot (null for finally)
+; Clause 7: call f(6) is guarded by finally
+; This is a "duplicate" because the protected range (f(3))
+; is in funclet catch1 but the finally's immediate parent
+; is the main function, not that funclet.
+; CHECKX-NEXT: .long 10
+; ^ flags (2 => finally handler | 8 => duplicate)
+; CHECKX-NEXT: .long ([[L_before_f6]]-[[L_begin]])+1
+; ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f6]]-[[L_begin]])+1
+; ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_finally]]-[[L_begin]]
+; ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_end]]-[[L_begin]]
+; ^ offset of end of handler
+; CHECKX-NEXT: .long 0
+; ^ type token slot (null for finally)
diff --git a/test/CodeGen/X86/wineh-exceptionpointer.ll b/test/CodeGen/X86/wineh-exceptionpointer.ll
new file mode 100644
index 000000000000..f6fd4fe7c525
--- /dev/null
+++ b/test/CodeGen/X86/wineh-exceptionpointer.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=x86_64-pc-windows-coreclr < %s | FileCheck %s
+
+declare void @ProcessCLRException()
+declare i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token)
+declare void @f()
+declare void @g(i32 addrspace(1)*)
+
+; CHECK-LABEL: test1: # @test1
+define void @test1() personality i8* bitcast (void ()* @ProcessCLRException to i8*) {
+entry:
+ invoke void @f()
+ to label %exit unwind label %catch.pad
+catch.pad:
+ %cs1 = catchswitch within none [label %catch.body] unwind to caller
+catch.body:
+ ; CHECK: {{^[^: ]+}}: # %catch.body
+ ; CHECK: movq %rdx, %rcx
+ ; CHECK-NEXT: callq g
+ %catch = catchpad within %cs1 [i32 5]
+ %exn = call i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token %catch)
+ %cast_exn = bitcast i8 addrspace(1)* %exn to i32 addrspace(1)*
+ call void @g(i32 addrspace(1)* %cast_exn) [ "funclet"(token %catch) ]
+ catchret from %catch to label %exit
+exit:
+ ret void
+}
diff --git a/test/CodeGen/X86/wineh-no-ehpads.ll b/test/CodeGen/X86/wineh-no-ehpads.ll
new file mode 100644
index 000000000000..fd6798f2e088
--- /dev/null
+++ b/test/CodeGen/X86/wineh-no-ehpads.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s | FileCheck %s
+
+target triple = "x86_64-pc-windows-msvc"
+
+declare void @g()
+declare i32 @__CxxFrameHandler3(...)
+
+define void @personality_no_ehpad() personality i32 (...)* @__CxxFrameHandler3 {
+ call void @g()
+ ret void
+}
+
+; CHECK-LABEL: personality_no_ehpad: # @personality_no_ehpad
+; CHECK-NOT: movq $-2,
+; CHECK: callq g
+; CHECK: nop
+; CHECK: retq
+
+; Shouldn't have any LSDA either.
+; CHECK-NOT: cppxdata
diff --git a/test/CodeGen/X86/x32-function_pointer-3.ll b/test/CodeGen/X86/x32-function_pointer-3.ll
index 5eaf85d8f931..f5687b8a9de2 100644
--- a/test/CodeGen/X86/x32-function_pointer-3.ll
+++ b/test/CodeGen/X86/x32-function_pointer-3.ll
@@ -3,7 +3,7 @@
; Test calling function pointer passed in struct
-; The fuction argument `h' in
+; The function argument `h' in
; struct foo {
; void (*f) (void);
diff --git a/test/CodeGen/X86/x32-indirectbr.ll b/test/CodeGen/X86/x32-indirectbr.ll
new file mode 100644
index 000000000000..7c83827990c7
--- /dev/null
+++ b/test/CodeGen/X86/x32-indirectbr.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-none-none-gnux32 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-none-none-gnux32 -mcpu=generic -fast-isel | FileCheck %s
+; Bug 22859
+;
+; x32 pointers are 32-bits wide. x86-64 indirect branches use the full 64-bit
+; registers. Therefore, x32 CodeGen needs to zero extend indirectbr's target to
+; 64-bit.
+
+define i8 @test1() nounwind ssp {
+entry:
+ %0 = select i1 undef, ; <i8*> [#uses=1]
+ i8* blockaddress(@test1, %bb),
+ i8* blockaddress(@test1, %bb6)
+ indirectbr i8* %0, [label %bb, label %bb6]
+bb: ; preds = %entry
+ ret i8 1
+
+bb6: ; preds = %entry
+ ret i8 2
+}
+; CHECK-LABEL: @test1
+; We are looking for a movl ???, %r32 followed by a 64-bit jmp through the
+; same register.
+; CHECK: movl {{.*}}, %{{e|r}}[[REG:.[^d]*]]{{d?}}
+; CHECK-NEXT: jmpq *%r[[REG]]
+
diff --git a/test/CodeGen/X86/x32-landingpad.ll b/test/CodeGen/X86/x32-landingpad.ll
new file mode 100644
index 000000000000..b026a31a4045
--- /dev/null
+++ b/test/CodeGen/X86/x32-landingpad.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-none-none-gnux32 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-none-none-gnux32 -mcpu=generic -fast-isel | FileCheck %s
+;
+; Ensures that landingpad instructions in x32 use the right Exception Pointer
+; and Exception Selector registers.
+
+declare void @foo()
+declare void @bar(i8*, i32) noreturn
+declare i32 @__gxx_personality_v0(...)
+
+define void @test1() uwtable personality i32 (...)* @__gxx_personality_v0 {
+entry:
+ invoke void @foo() to label %done unwind label %lpad
+done:
+ ret void
+lpad:
+ %0 = landingpad { i8*, i32 } cleanup
+; The Exception Pointer is %eax; the Exception Selector, %edx.
+; CHECK: LBB{{[^%]*}} %lpad
+; CHECK-DAG: movl %eax, {{.*}}
+; CHECK-DAG: movl %edx, {{.*}}
+; CHECK: callq bar
+ %1 = extractvalue { i8*, i32 } %0, 0
+ %2 = extractvalue { i8*, i32 } %0, 1
+ call void @bar(i8* %1, i32 %2)
+ unreachable
+}
diff --git a/test/CodeGen/X86/x32-va_start.ll b/test/CodeGen/X86/x32-va_start.ll
new file mode 100644
index 000000000000..a48468880507
--- /dev/null
+++ b/test/CodeGen/X86/x32-va_start.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -mattr=-sse | FileCheck %s -check-prefix=CHECK -check-prefix=NOSSE
+;
+; Verifies that x32 va_start lowering is sane. To regenerate this test, use
+; cat <<EOF |
+; #include <stdarg.h>
+;
+; int foo(float a, const char* fmt, ...) {
+; va_list ap;
+; va_start(ap, fmt);
+; int value = va_arg(ap, int);
+; va_end(ap);
+; return value;
+; }
+; EOF
+; build/bin/clang -mx32 -O3 -o- -S -emit-llvm -xc -
+;
+target datalayout = "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnux32"
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define i32 @foo(float %a, i8* nocapture readnone %fmt, ...) nounwind {
+entry:
+ %ap = alloca [1 x %struct.__va_list_tag], align 16
+ %0 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*
+ call void @llvm.lifetime.start(i64 16, i8* %0) #2
+ call void @llvm.va_start(i8* %0)
+; SSE: subl $72, %esp
+; SSE: testb %al, %al
+; SSE: je .[[NOFP:.*]]
+; SSE-DAG: movaps %xmm1
+; SSE-DAG: movaps %xmm2
+; SSE-DAG: movaps %xmm3
+; SSE-DAG: movaps %xmm4
+; SSE-DAG: movaps %xmm5
+; SSE-DAG: movaps %xmm6
+; SSE-DAG: movaps %xmm7
+; NOSSE-NOT: xmm
+; SSE: .[[NOFP]]:
+; CHECK-DAG: movq %r9
+; CHECK-DAG: movq %r8
+; CHECK-DAG: movq %rcx
+; CHECK-DAG: movq %rdx
+; CHECK-DAG: movq %rsi
+ %gp_offset_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 0
+ %gp_offset = load i32, i32* %gp_offset_p, align 16
+ %fits_in_gp = icmp ult i32 %gp_offset, 41
+ br i1 %fits_in_gp, label %vaarg.in_reg, label %vaarg.in_mem
+; CHECK: cmpl $40, [[COUNT:.*]]
+; CHECK: ja .[[IN_MEM:.*]]
+
+vaarg.in_reg: ; preds = %entry
+ %1 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 3
+ %reg_save_area = load i8*, i8** %1, align 4
+ %2 = getelementptr i8, i8* %reg_save_area, i32 %gp_offset
+ %3 = add i32 %gp_offset, 8
+ store i32 %3, i32* %gp_offset_p, align 16
+ br label %vaarg.end
+; CHECK: movl {{[^,]*}}, [[ADDR:.*]]
+; CHECK: addl [[COUNT]], [[ADDR]]
+; SSE: jmp .[[END:.*]]
+; NOSSE: movl ([[ADDR]]), %eax
+; NOSSE: retq
+; CHECK: .[[IN_MEM]]:
+vaarg.in_mem: ; preds = %entry
+ %overflow_arg_area_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 2
+ %overflow_arg_area = load i8*, i8** %overflow_arg_area_p, align 8
+ %overflow_arg_area.next = getelementptr i8, i8* %overflow_arg_area, i32 8
+ store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8
+ br label %vaarg.end
+; CHECK: movl {{[^,]*}}, [[ADDR]]
+; NOSSE: movl ([[ADDR]]), %eax
+; NOSSE: retq
+; SSE: .[[END]]:
+
+vaarg.end: ; preds = %vaarg.in_mem, %vaarg.in_reg
+ %vaarg.addr.in = phi i8* [ %2, %vaarg.in_reg ], [ %overflow_arg_area, %vaarg.in_mem ]
+ %vaarg.addr = bitcast i8* %vaarg.addr.in to i32*
+ %4 = load i32, i32* %vaarg.addr, align 4
+ call void @llvm.va_end(i8* %0)
+ call void @llvm.lifetime.end(i64 16, i8* %0) #2
+ ret i32 %4
+; SSE: movl ([[ADDR]]), %eax
+; SSE: retq
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) nounwind
+
+; Function Attrs: nounwind
+declare void @llvm.va_end(i8*) nounwind
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
diff --git a/test/CodeGen/X86/x86-32-intrcc.ll b/test/CodeGen/X86/x86-32-intrcc.ll
new file mode 100644
index 000000000000..908da3d11206
--- /dev/null
+++ b/test/CodeGen/X86/x86-32-intrcc.ll
@@ -0,0 +1,79 @@
+; RUN: llc -mtriple=i686-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mtriple=i686-unknown-unknown -O0 < %s | FileCheck %s -check-prefix=CHECK0
+
+%struct.interrupt_frame = type { i32, i32, i32, i32, i32 }
+
+@llvm.used = appending global [3 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_clobbers to i8*)], section "llvm.metadata"
+
+; Spills eax, putting original esp at +4.
+; No stack adjustment if declared with no error code
+define x86_intrcc void @test_isr_no_ecode(%struct.interrupt_frame* %frame) {
+ ; CHECK-LABEL: test_isr_no_ecode:
+ ; CHECK: pushl %eax
+ ; CHECK: movl 12(%esp), %eax
+ ; CHECK: popl %eax
+ ; CHECK: iretl
+ ; CHECK0-LABEL: test_isr_no_ecode:
+ ; CHECK0: pushl %eax
+ ; CHECK0: leal 4(%esp), %eax
+ ; CHECK0: movl 8(%eax), %eax
+ ; CHECK0: popl %eax
+ ; CHECK0: iretl
+ %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+ %flags = load i32, i32* %pflags, align 4
+ call void asm sideeffect "", "r"(i32 %flags)
+ ret void
+}
+
+; Spills eax and ecx, putting original esp at +8. Stack is adjusted up another 4 bytes
+; before return, popping the error code.
+define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i32 %ecode) {
+ ; CHECK-LABEL: test_isr_ecode
+ ; CHECK: pushl %ecx
+ ; CHECK: pushl %eax
+ ; CHECK: movl 8(%esp), %eax
+ ; CHECK: movl 20(%esp), %ecx
+ ; CHECK: popl %eax
+ ; CHECK: popl %ecx
+ ; CHECK: addl $4, %esp
+ ; CHECK: iretl
+ ; CHECK0-LABEL: test_isr_ecode
+ ; CHECK0: pushl %ecx
+ ; CHECK0: pushl %eax
+ ; CHECK0: movl 8(%esp), %eax
+ ; CHECK0: leal 12(%esp), %ecx
+ ; CHECK0: movl 8(%ecx), %ecx
+ ; CHECK0: popl %eax
+ ; CHECK0: popl %ecx
+ ; CHECK0: addl $4, %esp
+ ; CHECK0: iretl
+ %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+ %flags = load i32, i32* %pflags, align 4
+ call x86_fastcallcc void asm sideeffect "", "r,r"(i32 %flags, i32 %ecode)
+ ret void
+}
+
+; All clobbered registers must be saved
+define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i32 %ecode) {
+ call void asm sideeffect "", "~{eax},~{ebx},~{ebp}"()
+ ; CHECK-LABEL: test_isr_clobbers
+ ; CHECK-SSE-NEXT: pushl %ebp
+ ; CHECK-SSE-NEXT: pushl %ebx
+ ; CHECK-SSE-NEXT; pushl %eax
+ ; CHECK-SSE-NEXT: popl %eax
+ ; CHECK-SSE-NEXT: popl %ebx
+ ; CHECK-SSE-NEXT: popl %ebp
+ ; CHECK-SSE-NEXT: addl $4, %esp
+ ; CHECK-SSE-NEXT: iretl
+ ; CHECK0-LABEL: test_isr_clobbers
+ ; CHECK0-SSE-NEXT: pushl %ebp
+ ; CHECK0-SSE-NEXT: pushl %ebx
+ ; CHECK0-SSE-NEXT; pushl %eax
+ ; CHECK0-SSE-NEXT: popl %eax
+ ; CHECK0-SSE-NEXT: popl %ebx
+ ; CHECK0-SSE-NEXT: popl %ebp
+ ; CHECK0-SSE-NEXT: addl $4, %esp
+ ; CHECK0-SSE-NEXT: iretl
+ ret void
+}
+
diff --git a/test/CodeGen/X86/x86-64-baseptr.ll b/test/CodeGen/X86/x86-64-baseptr.ll
index 7fd94fa10f6c..ad8334719b32 100644
--- a/test/CodeGen/X86/x86-64-baseptr.ll
+++ b/test/CodeGen/X86/x86-64-baseptr.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=x86_64-pc-linux -force-align-stack -stack-alignment=32 < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -force-align-stack -stack-alignment=32 < %s | FileCheck -check-prefix=X32ABI %s
+; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -stack-alignment=32 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -stackrealign -stack-alignment=32 < %s | FileCheck -check-prefix=X32ABI %s
; This should run with NaCl as well ( -mtriple=x86_64-pc-nacl ) but currently doesn't due to PR22655
; Make sure the correct register gets set up as the base pointer
diff --git a/test/CodeGen/X86/x86-64-double-precision-shift-left.ll b/test/CodeGen/X86/x86-64-double-precision-shift-left.ll
index f2380f23b8ee..7515c46f7cee 100644
--- a/test/CodeGen/X86/x86-64-double-precision-shift-left.ll
+++ b/test/CodeGen/X86/x86-64-double-precision-shift-left.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 | FileCheck %s
; Verify that for the architectures that are known to have poor latency
; double precision shift instructions we generate alternative sequence
; of instructions with lower latencies instead of shld instruction.
@@ -8,11 +8,9 @@
; return (a << 1) | (b >> 63);
;}
-; CHECK: lshift1:
-; CHECK: addq {{.*}},{{.*}}
-; CHECK-NEXT: shrq $63, {{.*}}
-; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}}
-
+; CHECK-LABEL: lshift1:
+; CHECK: shrq $63, %rsi
+; CHECK-NEXT: leaq (%rsi,%rdi,2), %rax
define i64 @lshift1(i64 %a, i64 %b) nounwind readnone uwtable {
entry:
@@ -27,10 +25,9 @@ entry:
; return (a << 2) | (b >> 62);
;}
-; CHECK: lshift2:
-; CHECK: shlq $2, {{.*}}
-; CHECK-NEXT: shrq $62, {{.*}}
-; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}}
+; CHECK-LABEL: lshift2:
+; CHECK: shrq $62, %rsi
+; CHECK-NEXT: leaq (%rsi,%rdi,4), %rax
define i64 @lshift2(i64 %a, i64 %b) nounwind readnone uwtable {
entry:
diff --git a/test/CodeGen/X86/x86-64-double-precision-shift-right.ll b/test/CodeGen/X86/x86-64-double-precision-shift-right.ll
index 5edaad89df4c..5e3f22941713 100644
--- a/test/CodeGen/X86/x86-64-double-precision-shift-right.ll
+++ b/test/CodeGen/X86/x86-64-double-precision-shift-right.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 | FileCheck %s
; Verify that for the architectures that are known to have poor latency
; double precision shift instructions we generate alternative sequence
; of instructions with lower latencies instead of shrd instruction.
@@ -61,10 +61,9 @@ define i64 @rshift7(i64 %a, i64 %b) nounwind readnone uwtable {
; return (a >> 63) | (b << 1);
;}
-; CHECK: rshift63:
-; CHECK: shrq $63, {{.*}}
-; CHECK-NEXT: leaq ({{.*}},{{.*}}), {{.*}}
-; CHECK-NEXT: orq {{.*}}, {{.*}}
+; CHECK-LABEL: rshift63:
+; CHECK: shrq $63, %rdi
+; CHECK-NEXT: leaq (%rdi,%rsi,2), %rax
define i64 @rshift63(i64 %a, i64 %b) nounwind readnone uwtable {
%1 = lshr i64 %a, 63
diff --git a/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll b/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
index 08d0257a0e5c..ba559aa2ff0e 100644
--- a/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
+++ b/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
@@ -9,7 +9,7 @@
; return (a << 10) | (b >> 54);
; }
-; Function Attrs: minsize nounwind optsize readnone uwtable
+; Function Attrs: minsize nounwind readnone uwtable
define i64 @_Z8lshift10mm(i64 %a, i64 %b) #0 {
entry:
; CHECK: shldq $10
@@ -19,7 +19,7 @@ entry:
ret i64 %or
}
-attributes #0 = { minsize nounwind optsize readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { minsize nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
; clang -Os -c test2.cpp -emit-llvm -S
diff --git a/test/CodeGen/X86/x86-64-intrcc.ll b/test/CodeGen/X86/x86-64-intrcc.ll
new file mode 100644
index 000000000000..8f70b391fa10
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-intrcc.ll
@@ -0,0 +1,86 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -O0 < %s | FileCheck %s -check-prefix=CHECK0
+
+%struct.interrupt_frame = type { i64, i64, i64, i64, i64 }
+
+@llvm.used = appending global [3 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_clobbers to i8*)], section "llvm.metadata"
+
+; Spills rax, putting original esp at +8.
+; No stack adjustment if declared with no error code
+define x86_intrcc void @test_isr_no_ecode(%struct.interrupt_frame* %frame) {
+ ; CHECK-LABEL: test_isr_no_ecode:
+ ; CHECK: pushq %rax
+ ; CHECK: movq 24(%rsp), %rax
+ ; CHECK: popq %rax
+ ; CHECK: iretq
+ ; CHECK0-LABEL: test_isr_no_ecode:
+ ; CHECK0: pushq %rax
+ ; CHECK0: leaq 8(%rsp), %rax
+ ; CHECK0: movq 16(%rax), %rax
+ ; CHECK0: popq %rax
+ ; CHECK0: iretq
+ %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+ %flags = load i64, i64* %pflags, align 4
+ call void asm sideeffect "", "r"(i64 %flags)
+ ret void
+}
+
+; Spills rax and rcx, putting original rsp at +16. Stack is adjusted up another 8 bytes
+; before return, popping the error code.
+define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i64 %ecode) {
+ ; CHECK-LABEL: test_isr_ecode
+ ; CHECK: pushq %rax
+ ; CHECK: pushq %rcx
+ ; CHECK: movq 16(%rsp), %rax
+ ; CHECK: movq 40(%rsp), %rcx
+ ; CHECK: popq %rcx
+ ; CHECK: popq %rax
+ ; CHECK: addq $8, %rsp
+ ; CHECK: iretq
+ ; CHECK0-LABEL: test_isr_ecode
+ ; CHECK0: pushq %rax
+ ; CHECK0: pushq %rcx
+ ; CHECK0: movq 16(%rsp), %rax
+ ; CHECK0: leaq 24(%rsp), %rcx
+ ; CHECK0: movq 16(%rcx), %rcx
+ ; CHECK0: popq %rcx
+ ; CHECK0: popq %rax
+ ; CHECK0: addq $8, %rsp
+ ; CHECK0: iretq
+ %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+ %flags = load i64, i64* %pflags, align 4
+ call void asm sideeffect "", "r,r"(i64 %flags, i64 %ecode)
+ ret void
+}
+
+; All clobbered registers must be saved
+define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i64 %ecode) {
+ call void asm sideeffect "", "~{rax},~{rbx},~{rbp},~{r11},~{xmm0}"()
+ ; CHECK-LABEL: test_isr_clobbers
+ ; CHECK-SSE-NEXT: pushq %rax
+ ; CHECK-SSE-NEXT; pushq %r11
+ ; CHECK-SSE-NEXT: pushq %rbp
+ ; CHECK-SSE-NEXT: pushq %rbx
+ ; CHECK-SSE-NEXT: movaps %xmm0
+ ; CHECK-SSE-NEXT: movaps %xmm0
+ ; CHECK-SSE-NEXT: popq %rbx
+ ; CHECK-SSE-NEXT: popq %rbp
+ ; CHECK-SSE-NEXT: popq %r11
+ ; CHECK-SSE-NEXT: popq %rax
+ ; CHECK-SSE-NEXT: addq $8, %rsp
+ ; CHECK-SSE-NEXT: iretq
+ ; CHECK0-LABEL: test_isr_clobbers
+ ; CHECK0-SSE-NEXT: pushq %rax
+ ; CHECK0-SSE-NEXT; pushq %r11
+ ; CHECK0-SSE-NEXT: pushq %rbp
+ ; CHECK0-SSE-NEXT: pushq %rbx
+ ; CHECK0-SSE-NEXT: movaps %xmm0
+ ; CHECK0-SSE-NEXT: movaps %xmm0
+ ; CHECK0-SSE-NEXT: popq %rbx
+ ; CHECK0-SSE-NEXT: popq %rbp
+ ; CHECK0-SSE-NEXT: popq %r11
+ ; CHECK0-SSE-NEXT: popq %rax
+ ; CHECK0-SSE-NEXT: addq $8, %rsp
+ ; CHECK0-SSE-NEXT: iretq
+ ret void
+} \ No newline at end of file
diff --git a/test/CodeGen/X86/x86-64-ms_abi-vararg.ll b/test/CodeGen/X86/x86-64-ms_abi-vararg.ll
new file mode 100644
index 000000000000..e3436521a5bd
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-ms_abi-vararg.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-linux-gnu | FileCheck %s
+
+; Verify that the var arg parameters which are passed in registers are stored
+; in home stack slots allocated by the caller and that AP is correctly
+; calculated.
+define x86_64_win64cc void @average_va(i32 %count, ...) nounwind {
+entry:
+; CHECK: pushq
+; CHECK: movq %r9, 40(%rsp)
+; CHECK: movq %r8, 32(%rsp)
+; CHECK: movq %rdx, 24(%rsp)
+; CHECK: leaq 24(%rsp), %rax
+
+ %ap = alloca i8*, align 8 ; <i8**> [#uses=1]
+ %ap.0 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap.0)
+ ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
+declare void @llvm.va_copy(i8*, i8*) nounwind
+declare void @llvm.va_end(i8*) nounwind
+
+; CHECK-LABEL: f5:
+; CHECK: pushq
+; CHECK: leaq 56(%rsp),
+define x86_64_win64cc i8** @f5(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, ...) nounwind {
+entry:
+ %ap = alloca i8*, align 8
+ %ap.0 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap.0)
+ ret i8** %ap
+}
+
+; CHECK-LABEL: f4:
+; CHECK: pushq
+; CHECK: leaq 48(%rsp),
+define x86_64_win64cc i8** @f4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
+entry:
+ %ap = alloca i8*, align 8
+ %ap.0 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap.0)
+ ret i8** %ap
+}
+
+; CHECK-LABEL: f3:
+; CHECK: pushq
+; CHECK: leaq 40(%rsp),
+define x86_64_win64cc i8** @f3(i64 %a0, i64 %a1, i64 %a2, ...) nounwind {
+entry:
+ %ap = alloca i8*, align 8
+ %ap.0 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap.0)
+ ret i8** %ap
+}
+
+; WinX86_64 uses char* for va_list. Verify that the correct amount of bytes
+; are copied using va_copy.
+
+; CHECK-LABEL: copy1:
+; CHECK: leaq 32(%rsp), [[REG_copy1:%[a-z]+]]
+; CHECK: movq [[REG_copy1]], 8(%rsp)
+; CHECK: movq [[REG_copy1]], (%rsp)
+; CHECK: ret
+define x86_64_win64cc void @copy1(i64 %a0, ...) nounwind {
+entry:
+ %ap = alloca i8*, align 8
+ %cp = alloca i8*, align 8
+ %ap.0 = bitcast i8** %ap to i8*
+ %cp.0 = bitcast i8** %cp to i8*
+ call void @llvm.va_start(i8* %ap.0)
+ call void @llvm.va_copy(i8* %cp.0, i8* %ap.0)
+ ret void
+}
+
+; CHECK-LABEL: copy4:
+; CHECK: leaq 56(%rsp), [[REG_copy4:%[a-z]+]]
+; CHECK: movq [[REG_copy4]], 8(%rsp)
+; CHECK: movq [[REG_copy4]], (%rsp)
+; CHECK: ret
+define x86_64_win64cc void @copy4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
+entry:
+ %ap = alloca i8*, align 8
+ %cp = alloca i8*, align 8
+ %ap.0 = bitcast i8** %ap to i8*
+ %cp.0 = bitcast i8** %cp to i8*
+ call void @llvm.va_start(i8* %ap.0)
+ call void @llvm.va_copy(i8* %cp.0, i8* %ap.0)
+ ret void
+}
+
+; CHECK-LABEL: arg4:
+; va_start:
+; CHECK: leaq 48(%rsp), [[REG_arg4_1:%[a-z]+]]
+; CHECK: movq [[REG_arg4_1]], (%rsp)
+; va_arg:
+; CHECK: leaq 52(%rsp), [[REG_arg4_2:%[a-z]+]]
+; CHECK: movq [[REG_arg4_2]], (%rsp)
+; CHECK: movl 48(%rsp), %eax
+; CHECK: ret
+define x86_64_win64cc i32 @arg4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
+entry:
+ %ap = alloca i8*, align 8
+ %ap.0 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap.0)
+ %tmp = va_arg i8** %ap, i32
+ ret i32 %tmp
+}
diff --git a/test/CodeGen/X86/x86-64-pic-10.ll b/test/CodeGen/X86/x86-64-pic-10.ll
index 8790fa6072b3..d76cf6a17552 100644
--- a/test/CodeGen/X86/x86-64-pic-10.ll
+++ b/test/CodeGen/X86/x86-64-pic-10.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
; RUN: grep "callq g@PLT" %t1
-@g = weak alias i32 ()* @f
+@g = weak alias i32 (), i32 ()* @f
define void @h() {
entry:
diff --git a/test/CodeGen/X86/x86-fold-pshufb.ll b/test/CodeGen/X86/x86-fold-pshufb.ll
index c29e592bfe83..84af4f5d4b86 100644
--- a/test/CodeGen/X86/x86-fold-pshufb.ll
+++ b/test/CodeGen/X86/x86-fold-pshufb.ll
@@ -1,11 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -relocation-model=pic -march=x86-64 -mtriple=x86_64-unknown-unknown -mattr=+ssse3 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-unknown -mattr=+ssse3 < %s | FileCheck %s
; Verify that the backend correctly folds the shuffle in function 'fold_pshufb'
; into a simple load from constant pool.
define <2 x i64> @fold_pshufb() {
; CHECK-LABEL: fold_pshufb:
-; CHECK: # BB#0:
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0]
; CHECK-NEXT: retq
entry:
@@ -14,4 +16,20 @@ entry:
ret <2 x i64> %1
}
+; The pshufb from function @pr24562 was wrongly folded into its first operand
+; as a result of a late target shuffle combine on the legalized selection dag.
+;
+; Check that the pshufb is correctly folded to a zero vector.
+
+define <2 x i64> @pr24562() {
+; CHECK-LABEL: pr24562:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) #2
+ %1 = bitcast <16 x i8> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
diff --git a/test/CodeGen/X86/x86-sanitizer-shrink-wrapping.ll b/test/CodeGen/X86/x86-sanitizer-shrink-wrapping.ll
new file mode 100644
index 000000000000..4cb11bf3f5cd
--- /dev/null
+++ b/test/CodeGen/X86/x86-sanitizer-shrink-wrapping.ll
@@ -0,0 +1,40 @@
+; RUN: llc -o - < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; Even if the target supports shrink-wrapping, the prologue and epilogue
+; must not move because a crash can happen anywhere and sanitizers need
+; to be able to unwind from the PC of the crash.
+; CHECK-LABEL: sanitize:
+; CHECK: pushq
+; CHECK: incl 40
+; CHECK: popq
+; CHECK-NEXT: retq
+; CHECK: movl $40, %edi
+; CHECK-NEXT callq ___asan_report_load4
+define void @sanitize() #0 {
+entry:
+ %tmp = load i8, i8* inttoptr (i64 17592186044421 to i8*)
+ %tmp1 = icmp ne i8 %tmp, 0
+ br i1 %tmp1, label %if.then, label %else
+
+if.then:
+ %tmp3 = icmp sge i8 3, %tmp
+ br i1 %tmp3, label %else, label %end
+
+else:
+ call void @__asan_report_load4(i64 40)
+ call void asm sideeffect "", ""()
+ unreachable
+
+end:
+ %tmp6 = load i32, i32* inttoptr (i64 40 to i32*), align 8
+ %inc = add nsw i32 %tmp6, 1
+ store i32 %inc, i32* inttoptr (i64 40 to i32*), align 8
+ ret void
+}
+
+attributes #0 = { sanitize_address nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
+
+declare void @__asan_report_load4(i64)
diff --git a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
index 248a9202e997..99b27efe7f54 100644
--- a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
+++ b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
@@ -39,10 +39,10 @@ define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwin
; Also test the general purpose constant folding of int->fp.
define void @foo2(<4 x float>* noalias %result) nounwind {
; CHECK-LABEL: LCPI2_0:
-; CHECK-NEXT: .long 1082130432 ## float 4.000000e+00
-; CHECK-NEXT: .long 1084227584 ## float 5.000000e+00
-; CHECK-NEXT: .long 1086324736 ## float 6.000000e+00
-; CHECK-NEXT: .long 1088421888 ## float 7.000000e+00
+; CHECK-NEXT: .long 1082130432 ## float 4
+; CHECK-NEXT: .long 1084227584 ## float 5
+; CHECK-NEXT: .long 1086324736 ## float 6
+; CHECK-NEXT: .long 1088421888 ## float 7
; CHECK-LABEL: foo2:
; CHECK: movaps LCPI2_0(%rip), %xmm0
@@ -72,10 +72,10 @@ define <4 x float> @foo3(<4 x float> %val, <4 x float> %test) nounwind {
; Test the general purpose constant folding of uint->fp.
define void @foo4(<4 x float>* noalias %result) nounwind {
; CHECK-LABEL: LCPI4_0:
-; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
-; CHECK-NEXT: .long 1123942400 ## float 1.270000e+02
-; CHECK-NEXT: .long 1124073472 ## float 1.280000e+02
-; CHECK-NEXT: .long 1132396544 ## float 2.550000e+02
+; CHECK-NEXT: .long 1065353216 ## float 1
+; CHECK-NEXT: .long 1123942400 ## float 127
+; CHECK-NEXT: .long 1124073472 ## float 128
+; CHECK-NEXT: .long 1132396544 ## float 255
; CHECK-LABEL: foo4:
; CHECK: movaps LCPI4_0(%rip), %xmm0
diff --git a/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
new file mode 100644
index 000000000000..7c00f407b1e0
--- /dev/null
+++ b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
@@ -0,0 +1,153 @@
+; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK
+;
+; This test checks that we do not use shrink-wrapping when
+; the function does not have any frame pointer and may unwind.
+; This is a workaround for a limitation in the emission of
+; the CFI directives, that are not correct in such case.
+; PR25614
+;
+; Note: This test cannot be merged with the shrink-wrapping tests
+; because the booleans set on the command line take precedence on
+; the target logic that disable shrink-wrapping.
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+
+; No shrink-wrapping should occur here, until the CFI information are fixed.
+; CHECK-LABEL: framelessUnwind:
+;
+; Prologue code.
+; (What we push does not matter. It should be some random sratch register.)
+; CHECK: pushq
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]]
+; CHECK-NEXT: cmpl %esi, [[ARG0CPY]]
+; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Store %a in the alloca.
+; CHECK: movl [[ARG0CPY]], 4(%rsp)
+; Set the alloca address in the second argument.
+; CHECK-NEXT: leaq 4(%rsp), %rsi
+; Set the first argument to zero.
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq _doSomething
+;
+; CHECK: [[EXIT_LABEL]]:
+;
+; Without shrink-wrapping, epilogue is in the exit block.
+; Epilogue code. (What we pop does not matter.)
+; CHECK-NEXT: popq
+;
+; CHECK-NEXT: retq
+define i32 @framelessUnwind(i32 %a, i32 %b) #0 {
+ %tmp = alloca i32, align 4
+ %tmp2 = icmp slt i32 %a, %b
+ br i1 %tmp2, label %true, label %false
+
+true:
+ store i32 %a, i32* %tmp, align 4
+ %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+ br label %false
+
+false:
+ %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+ ret i32 %tmp.0
+}
+
+declare i32 @doSomething(i32, i32*)
+
+attributes #0 = { "no-frame-pointer-elim"="false" }
+
+; Shrink-wrapping should occur here. We have a frame pointer.
+; CHECK-LABEL: frameUnwind:
+;
+; Compare the arguments and jump to exit.
+; No prologue needed.
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]]
+; CHECK-NEXT: cmpl %esi, [[ARG0CPY]]
+; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+;
+; Store %a in the alloca.
+; CHECK: movl [[ARG0CPY]], -4(%rbp)
+; Set the alloca address in the second argument.
+; CHECK-NEXT: leaq -4(%rbp), %rsi
+; Set the first argument to zero.
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq _doSomething
+;
+; Epilogue code. (What we pop does not matter.)
+; CHECK: popq %rbp
+;
+; CHECK: [[EXIT_LABEL]]:
+; CHECK-NEXT: retq
+define i32 @frameUnwind(i32 %a, i32 %b) #1 {
+ %tmp = alloca i32, align 4
+ %tmp2 = icmp slt i32 %a, %b
+ br i1 %tmp2, label %true, label %false
+
+true:
+ store i32 %a, i32* %tmp, align 4
+ %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+ br label %false
+
+false:
+ %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+ ret i32 %tmp.0
+}
+
+attributes #1 = { "no-frame-pointer-elim"="true" }
+
+; Shrink-wrapping should occur here. We do not have to unwind.
+; CHECK-LABEL: framelessnoUnwind:
+;
+; Compare the arguments and jump to exit.
+; No prologue needed.
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]]
+; CHECK-NEXT: cmpl %esi, [[ARG0CPY]]
+; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; (What we push does not matter. It should be some random sratch register.)
+; CHECK: pushq
+;
+; Store %a in the alloca.
+; CHECK: movl [[ARG0CPY]], 4(%rsp)
+; Set the alloca address in the second argument.
+; CHECK-NEXT: leaq 4(%rsp), %rsi
+; Set the first argument to zero.
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq _doSomething
+;
+; Epilogue code.
+; CHECK-NEXT: addq
+;
+; CHECK: [[EXIT_LABEL]]:
+; CHECK-NEXT: retq
+define i32 @framelessnoUnwind(i32 %a, i32 %b) #2 {
+ %tmp = alloca i32, align 4
+ %tmp2 = icmp slt i32 %a, %b
+ br i1 %tmp2, label %true, label %false
+
+true:
+ store i32 %a, i32* %tmp, align 4
+ %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+ br label %false
+
+false:
+ %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+ ret i32 %tmp.0
+}
+
+attributes #2 = { "no-frame-pointer-elim"="false" nounwind }
diff --git a/test/CodeGen/X86/x86-shrink-wrapping.ll b/test/CodeGen/X86/x86-shrink-wrapping.ll
index 8c91335d91a2..34e56919468b 100644
--- a/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -445,9 +445,9 @@ if.end: ; preds = %for.body, %if.else
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: %esi, %edi
; CHECK-NEXT: %esi, %edx
+; CHECK-NEXT: %esi, %ecx
; CHECK-NEXT: %esi, %r8d
; CHECK-NEXT: %esi, %r9d
-; CHECK-NEXT: %esi, %ecx
; CHECK-NEXT: callq _someVariadicFunc
; CHECK-NEXT: movl %eax, %esi
; CHECK-NEXT: shll $3, %esi
@@ -532,7 +532,11 @@ declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rt
;
; CHECK: movl $24599, [[TMP2:%e[a-z]+]]
; CHECK-NEXT: btl [[TMP]], [[TMP2]]
-; CHECK-NEXT: jb [[CLEANUP]]
+; CHECK-NEXT: jae [[LOR_LHS_FALSE:LBB[0-9_]+]]
+;
+; CHECK: [[CLEANUP]]: ## %cleanup
+; DISABLE: popq
+; CHECK-NEXT: retq
;
; CHECK: [[LOR_LHS_FALSE]]: ## %lor.lhs.false
; CHECK: cmpl $134, %e[[BF_LOAD2]]
@@ -551,10 +555,6 @@ declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rt
; CHECK-NEXT: je [[CLEANUP]]
;
; CHECK: movb $1, 57(%rax)
-;
-; CHECK: [[CLEANUP]]: ## %cleanup
-; DISABLE: popq
-; CHECK-NEXT: retq
define void @useLEA(%struct.rtx_def* readonly %x) {
entry:
%cmp = icmp eq %struct.rtx_def* %x, null
@@ -637,3 +637,245 @@ if.end:
declare void @abort() #0
attributes #0 = { noreturn nounwind }
+
+
+; Make sure that we handle infinite loops properly When checking that the Save
+; and Restore blocks are control flow equivalent, the loop searches for the
+; immediate (post) dominator for the (restore) save blocks. When either the Save
+; or Restore block is located in an infinite loop the only immediate (post)
+; dominator is itself. In this case, we cannot perform shrink wrapping, but we
+; should return gracefully and continue compilation.
+; The only condition for this test is the compilation finishes correctly.
+;
+; CHECK-LABEL: infiniteloop
+; CHECK: retq
+define void @infiniteloop() {
+entry:
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+ %ptr = alloca i32, i32 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+ %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+ %add = add nsw i32 %call, %sum.03
+ store i32 %add, i32* %ptr
+ br label %for.body
+
+if.end:
+ ret void
+}
+
+; Another infinite loop test this time with a body bigger than just one block.
+; CHECK-LABEL: infiniteloop2
+; CHECK: retq
+define void @infiniteloop2() {
+entry:
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+ %ptr = alloca i32, i32 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
+ %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+ %add = add nsw i32 %call, %sum.03
+ store i32 %add, i32* %ptr
+ br i1 undef, label %body1, label %body2
+
+body1:
+ tail call void asm sideeffect "nop", "~{ebx}"()
+ br label %for.body
+
+body2:
+ tail call void asm sideeffect "nop", "~{ebx}"()
+ br label %for.body
+
+if.end:
+ ret void
+}
+
+; Another infinite loop test this time with two nested infinite loop.
+; CHECK-LABEL: infiniteloop3
+; CHECK: retq
+define void @infiniteloop3() {
+entry:
+ br i1 undef, label %loop2a, label %body
+
+body: ; preds = %entry
+ br i1 undef, label %loop2a, label %end
+
+loop1: ; preds = %loop2a, %loop2b
+ %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
+ %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
+ %0 = icmp eq i32* %var, null
+ %next.load = load i32*, i32** undef
+ br i1 %0, label %loop2a, label %loop2b
+
+loop2a: ; preds = %loop1, %body, %entry
+ %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
+ %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
+ br label %loop1
+
+loop2b: ; preds = %loop1
+ %gep1 = bitcast i32* %var.phi to i32*
+ %next.ptr = bitcast i32* %gep1 to i32**
+ store i32* %next.phi, i32** %next.ptr
+ br label %loop1
+
+end:
+ ret void
+}
+
+; Check that we just don't bail out on RegMask.
+; In this case, the RegMask does not touch a CSR so we are good to go!
+; CHECK-LABEL: regmask:
+;
+; Compare the arguments and jump to exit.
+; No prologue needed.
+; ENABLE: cmpl %esi, %edi
+; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; (What we push does not matter. It should be some random sratch register.)
+; CHECK: pushq
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; DISABLE: cmpl %esi, %edi
+; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; CHECK: nop
+; Set the first argument to zero.
+; CHECK: xorl %edi, %edi
+; Set the second argument to addr.
+; CHECK-NEXT: movq %rdx, %rsi
+; CHECK-NEXT: callq _doSomething
+; CHECK-NEXT: popq
+; CHECK-NEXT: retq
+;
+; CHECK: [[EXIT_LABEL]]:
+; Set the first argument to 6.
+; CHECK-NEXT: movl $6, %edi
+; Set the second argument to addr.
+; CHECK-NEXT: movq %rdx, %rsi
+;
+; Without shrink-wrapping, we need to restore the stack before
+; making the tail call.
+; Epilogue code.
+; DISABLE-NEXT: popq
+;
+; CHECK-NEXT: jmp _doSomething
+define i32 @regmask(i32 %a, i32 %b, i32* %addr) {
+ %tmp2 = icmp slt i32 %a, %b
+ br i1 %tmp2, label %true, label %false
+
+true:
+ ; Clobber a CSR so that we check something on the regmask
+ ; of the tail call.
+ tail call void asm sideeffect "nop", "~{ebx}"()
+ %tmp4 = call i32 @doSomething(i32 0, i32* %addr)
+ br label %end
+
+false:
+ %tmp5 = tail call i32 @doSomething(i32 6, i32* %addr)
+ br label %end
+
+end:
+ %tmp.0 = phi i32 [ %tmp4, %true ], [ %tmp5, %false ]
+ ret i32 %tmp.0
+}
+
+@b = internal unnamed_addr global i1 false
+@c = internal unnamed_addr global i8 0, align 1
+@a = common global i32 0, align 4
+
+; Make sure the prologue does not clobber the EFLAGS when
+; it is live accross.
+; PR25629.
+; Note: The registers may change in the following patterns, but
+; because they imply register hierarchy (e.g., eax, al) this is
+; tricky to write robust patterns.
+;
+; CHECK-LABEL: useLEAForPrologue:
+;
+; Prologue is at the beginning of the function when shrink-wrapping
+; is disabled.
+; DISABLE: pushq
+; The stack adjustment can use SUB instr because we do not need to
+; preserve the EFLAGS at this point.
+; DISABLE-NEXT: subq $16, %rsp
+;
+; Load the value of b.
+; CHECK: movb _b(%rip), [[BOOL:%cl]]
+; Extract i1 from the loaded value.
+; CHECK-NEXT: andb $1, [[BOOL]]
+; Create the zero value for the select assignment.
+; CHECK-NEXT: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]]
+; CHECK-NEXT: testb [[BOOL]], [[BOOL]]
+; CHECK-NEXT: jne [[STOREC_LABEL:LBB[0-9_]+]]
+;
+; CHECK: movb $48, [[CMOVE_VAL:%al]]
+;
+; CHECK: [[STOREC_LABEL]]:
+;
+; ENABLE-NEXT: pushq
+; For the stack adjustment, we need to preserve the EFLAGS.
+; ENABLE-NEXT: leaq -16(%rsp), %rsp
+;
+; Technically, we should use CMOVE_VAL here or its subregister.
+; CHECK-NEXT: movb %al, _c(%rip)
+; testb set the EFLAGS read here.
+; CHECK-NEXT: je [[VARFUNC_CALL:LBB[0-9_]+]]
+;
+; The code of the loop is not interesting.
+; [...]
+;
+; CHECK: [[VARFUNC_CALL]]:
+; Set the null parameter.
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq _varfunc
+;
+; Set the return value.
+; CHECK-NEXT: xorl %eax, %eax
+;
+; Epilogue code.
+; CHECK-NEXT: addq $16, %rsp
+; CHECK-NEXT: popq
+; CHECK-NEXT: retq
+define i32 @useLEAForPrologue(i32 %d, i32 %a, i8 %c) #3 {
+entry:
+ %tmp = alloca i3
+ %.b = load i1, i1* @b, align 1
+ %bool = select i1 %.b, i8 0, i8 48
+ store i8 %bool, i8* @c, align 1
+ br i1 %.b, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ tail call void asm sideeffect "nop", "~{ebx}"()
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %inc6 = phi i8 [ %c, %for.body.lr.ph ], [ %inc, %for.body ]
+ %cond5 = phi i32 [ %a, %for.body.lr.ph ], [ %conv3, %for.body ]
+ %cmp2 = icmp slt i32 %d, %cond5
+ %conv3 = zext i1 %cmp2 to i32
+ %inc = add i8 %inc6, 1
+ %cmp = icmp slt i8 %inc, 45
+ br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+ store i32 %conv3, i32* @a, align 4
+ br label %for.end
+
+for.end: ; preds = %for.cond.for.end_crit_edge, %entry
+ %call = tail call i32 (i8*) @varfunc(i8* null)
+ ret i32 0
+}
+
+declare i32 @varfunc(i8* nocapture readonly)
+
+attributes #3 = { nounwind }
diff --git a/test/CodeGen/X86/x86-win64-shrink-wrapping.ll b/test/CodeGen/X86/x86-win64-shrink-wrapping.ll
new file mode 100644
index 000000000000..395de686d2e2
--- /dev/null
+++ b/test/CodeGen/X86/x86-win64-shrink-wrapping.ll
@@ -0,0 +1,126 @@
+; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "x86_64--windows-gnu"
+
+; The output of this function with or without shrink-wrapping
+; shouldn't change.
+; Indeed, the epilogue block would have been if.else, meaning
+; after the pops, we will have additional instruction (jump, mov,
+; etc.) prior to the return and this is forbidden for Win64.
+; CHECK-LABEL: loopInfoSaveOutsideLoop:
+; CHECK: push
+; CHECK: push
+; CHECK-NOT: popq
+; CHECK: popq
+; CHECK: popq
+; CHECK-NOT: popq
+; CHECK-NEXT: retq
+define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) #0 {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader: ; preds = %entry
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.preheader
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+ %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ tail call void asm "nop", "~{ebx}"()
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+; When we can sink the epilogue of the function into an existing exit block,
+; this is Ok for shrink-wrapping to kicks in.
+; CHECK-LABEL: loopInfoSaveOutsideLoop2:
+; ENABLE: testl %ecx, %ecx
+; ENABLE-NEXT: je [[ELSE_LABEL:.LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: rbx.
+; CHECK: pushq %rbp
+; CHECK: pushq %rbx
+;
+; DISABLE: testl %ecx, %ecx
+; DISABLE-NEXT: je [[ELSE_LABEL:.LBB[0-9_]+]]
+;
+; CHECK: nop
+; CHECK: xorl [[SUM:%eax]], [[SUM]]
+; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
+;
+; CHECK: [[LOOP_LABEL:.LBB[0-9_]+]]: # %for.body
+; CHECK: movl $1, [[TMP:%e[a-z]+]]
+; CHECK: addl [[TMP]], [[SUM]]
+; CHECK-NEXT: decl [[IV]]
+; CHECK-NEXT: jne [[LOOP_LABEL]]
+; Next BB.
+; CHECK: nop
+; CHECK: shll $3, [[SUM]]
+;
+; DISABLE: jmp [[EPILOG_BB:.LBB[0-9_]+]]
+;
+; ENABLE-NEXT: popq %rbx
+; ENABLE-NEXT: popq %rbp
+; ENABLE-NEXT: retq
+;
+; CHECK: [[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: addl %edx, %edx
+; CHECK: movl %edx, %eax
+;
+; DISABLE: [[EPILOG_BB]]: # %if.end
+; DISABLE-NEXT: popq %rbx
+;
+; CHECK: retq
+;
+define i32 @loopInfoSaveOutsideLoop2(i32 %cond, i32 %N) #0 {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader: ; preds = %entry
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.preheader
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+ %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ tail call void asm "nop", "~{ebx}"()
+ %shl = shl i32 %add, 3
+ ret i32 %shl
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ ret i32 %mul
+}
+
+attributes #0 = { uwtable }
diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
index 2516116f7697..3b4c6ea12107 100644
--- a/test/CodeGen/X86/xop-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
@@ -61,15 +61,14 @@ define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float>
declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
- ; CHECK: vpcmov
+ ; CHECK: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
%res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
- ; CHECK: vpcmov
- ; CHECK: ymm
+ ; CHECK: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
%res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
ret <4 x i64> %res
}
@@ -805,6 +804,34 @@ define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) {
}
declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
+define <16 x i8> @test_int_x86_xop_vprotbi(<16 x i8> %a0) {
+ ; CHECK: vprotb
+ %res = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %a0, i8 1) ;
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vprotdi(<4 x i32> %a0) {
+ ; CHECK: vprotd
+ %res = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %a0, i8 -2) ;
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vprotqi(<2 x i64> %a0) {
+ ; CHECK: vprotq
+ %res = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 3) ;
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vprotwi(<8 x i16> %a0) {
+ ; CHECK: vprotw
+ %res = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %a0, i8 -4) ;
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone
+
define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: vpshab
%res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
diff --git a/test/CodeGen/X86/xop-pcmov.ll b/test/CodeGen/X86/xop-pcmov.ll
new file mode 100644
index 000000000000..77aefe993b29
--- /dev/null
+++ b/test/CodeGen/X86/xop-pcmov.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s
+
+define <4 x double> @pcmov_4f64(<4 x double> %a, <4 x double> %b, <4 x double> %m) {
+; CHECK-LABEL: pcmov_4f64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = bitcast <4 x double> %m to <4 x i64>
+ %2 = bitcast <4 x double> %a to <4 x i64>
+ %3 = and <4 x i64> %1, %2
+ %4 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %5 = bitcast <4 x double> %b to <4 x i64>
+ %6 = and <4 x i64> %4, %5
+ %7 = or <4 x i64> %3, %6
+ %8 = bitcast <4 x i64> %7 to <4 x double>
+ ret <4 x double> %8
+}
+
+define <2 x double> @pcmov_2f64(<2 x double> %a, <2 x double> %b, <2 x double> %m) {
+; CHECK-LABEL: pcmov_2f64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %1 = bitcast <2 x double> %m to <2 x i64>
+ %2 = bitcast <2 x double> %a to <2 x i64>
+ %3 = and <2 x i64> %1, %2
+ %4 = xor <2 x i64> %1, <i64 -1, i64 -1>
+ %5 = bitcast <2 x double> %b to <2 x i64>
+ %6 = and <2 x i64> %4, %5
+ %7 = or <2 x i64> %3, %6
+ %8 = bitcast <2 x i64> %7 to <2 x double>
+ ret <2 x double> %8
+}
+
+define <8 x float> @pcmov_8f32(<8 x float> %a, <8 x float> %b, <8 x float> %m) {
+; CHECK-LABEL: pcmov_8f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = bitcast <8 x float> %m to <8 x i32>
+ %2 = bitcast <8 x float> %a to <8 x i32>
+ %3 = and <8 x i32> %1, %2
+ %4 = xor <8 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ %5 = bitcast <8 x float> %b to <8 x i32>
+ %6 = and <8 x i32> %4, %5
+ %7 = or <8 x i32> %3, %6
+ %8 = bitcast <8 x i32> %7 to <8 x float>
+ ret <8 x float> %8
+}
+
+define <4 x float> @pcmov_4f32(<4 x float> %a, <4 x float> %b, <4 x float> %m) {
+; CHECK-LABEL: pcmov_4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %1 = bitcast <4 x float> %m to <4 x i32>
+ %2 = bitcast <4 x float> %a to <4 x i32>
+ %3 = and <4 x i32> %1, %2
+ %4 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %5 = bitcast <4 x float> %b to <4 x i32>
+ %6 = and <4 x i32> %4, %5
+ %7 = or <4 x i32> %3, %6
+ %8 = bitcast <4 x i32> %7 to <4 x float>
+ ret <4 x float> %8
+}
+
+define <4 x i64> @pcmov_4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %m) {
+; CHECK-LABEL: pcmov_4i64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = and <4 x i64> %a, %m
+ %2 = xor <4 x i64> %m, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %3 = and <4 x i64> %b, %2
+ %4 = or <4 x i64> %1, %3
+ ret <4 x i64> %4
+}
+
+define <2 x i64> @pcmov_2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %m) {
+; CHECK-LABEL: pcmov_2i64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %1 = and <2 x i64> %a, %m
+ %2 = xor <2 x i64> %m, <i64 -1, i64 -1>
+ %3 = and <2 x i64> %b, %2
+ %4 = or <2 x i64> %1, %3
+ ret <2 x i64> %4
+}
+
+define <8 x i32> @pcmov_8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %m) {
+; CHECK-LABEL: pcmov_8i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = and <8 x i32> %a, %m
+ %2 = xor <8 x i32> %m, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ %3 = and <8 x i32> %b, %2
+ %4 = or <8 x i32> %1, %3
+ ret <8 x i32> %4
+}
+
+define <4 x i32> @pcmov_4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %m) {
+; CHECK-LABEL: pcmov_4i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %1 = and <4 x i32> %a, %m
+ %2 = xor <4 x i32> %m, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %3 = and <4 x i32> %b, %2
+ %4 = or <4 x i32> %1, %3
+ ret <4 x i32> %4
+}
+
+define <16 x i16> @pcmov_16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %m) {
+; CHECK-LABEL: pcmov_16i16:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = and <16 x i16> %a, %m
+ %2 = xor <16 x i16> %m, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %3 = and <16 x i16> %b, %2
+ %4 = or <16 x i16> %1, %3
+ ret <16 x i16> %4
+}
+
+define <8 x i16> @pcmov_8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %m) {
+; CHECK-LABEL: pcmov_8i16:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %1 = and <8 x i16> %a, %m
+ %2 = xor <8 x i16> %m, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %3 = and <8 x i16> %b, %2
+ %4 = or <8 x i16> %1, %3
+ ret <8 x i16> %4
+}
+
+define <32 x i8> @pcmov_32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %m) {
+; CHECK-LABEL: pcmov_32i8:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %1 = and <32 x i8> %a, %m
+ %2 = xor <32 x i8> %m, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %3 = and <32 x i8> %b, %2
+ %4 = or <32 x i8> %1, %3
+ ret <32 x i8> %4
+}
+
+define <16 x i8> @pcmov_16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %m) {
+; CHECK-LABEL: pcmov_16i8:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %1 = and <16 x i8> %a, %m
+ %2 = xor <16 x i8> %m, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %3 = and <16 x i8> %b, %2
+ %4 = or <16 x i8> %1, %3
+ ret <16 x i8> %4
+}
diff --git a/test/CodeGen/XCore/aliases.ll b/test/CodeGen/XCore/aliases.ll
index b7ad416968f4..62427dad9b7c 100644
--- a/test/CodeGen/XCore/aliases.ll
+++ b/test/CodeGen/XCore/aliases.ll
@@ -5,9 +5,9 @@ define void @a_val() nounwind {
@b_val = constant i32 42, section ".cp.rodata"
@c_val = global i32 42
-@a = alias void ()* @a_val
-@b = alias i32* @b_val
-@c = alias i32* @c_val
+@a = alias void (), void ()* @a_val
+@b = alias i32, i32* @b_val
+@c = alias i32, i32* @c_val
; CHECK-LABEL: a_addr:
; CHECK: ldap r11, a
diff --git a/test/CodeGen/XCore/dwarf_debug.ll b/test/CodeGen/XCore/dwarf_debug.ll
index ba71dc798a04..6c8f389e8a98 100644
--- a/test/CodeGen/XCore/dwarf_debug.ll
+++ b/test/CodeGen/XCore/dwarf_debug.ll
@@ -9,7 +9,7 @@
; CHECK: .loc 1 2 0 prologue_end # test.c:2:0
; CHECK: add r0, r0, 1
; CHECK: retsp 2
-define i32 @f(i32 %a) {
+define i32 @f(i32 %a) !dbg !4 {
entry:
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
@@ -23,16 +23,16 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10}
-!0 = !DICompileUnit(language: DW_LANG_C99, isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !1, type: !6, function: i32 (i32)* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !1, type: !6, variables: !2)
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{i32 2, !"Dwarf Version", i32 4}
!10 = !{i32 2, !"Debug Info Version", i32 3}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 2, arg: 1, scope: !4, file: !1, type: !8)
+!11 = !DILocalVariable(name: "a", line: 2, arg: 1, scope: !4, file: !1, type: !8)
!12 = !DILocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/2009-11-03-InsertExtractValue.ll b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
deleted file mode 100644
index f8382af3abc7..000000000000
--- a/test/DebugInfo/2009-11-03-InsertExtractValue.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis | FileCheck %s
-
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.cu = !{!5}
-!llvm.module.flags = !{!6}
-
-!0 = !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagProtected | DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !4, scope: !1, type: !2)
-!1 = !DIFile(filename: "/foo", directory: "bar.cpp")
-!2 = !DISubroutineType(types: !3)
-!3 = !{null}
-!4 = !DIFile(filename: "/foo", directory: "bar.cpp")
-!5 = !DICompileUnit(language: DW_LANG_C99, isOptimized: true, emissionKind: 0, file: !4, enums: !{}, retainedTypes: !{})
-
-define <{i32, i32}> @f1() {
-; CHECK: !dbgx ![[NUMBER:[0-9]+]]
- %r = insertvalue <{ i32, i32 }> zeroinitializer, i32 4, 1, !dbgx !1
-; CHECK: !dbgx ![[NUMBER]]
- %e = extractvalue <{ i32, i32 }> %r, 0, !dbgx !1
- ret <{ i32, i32 }> %r
-}
-
-; CHECK: DIFlagProtected
-!6 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
deleted file mode 100644
index 32d4c0ac6c39..000000000000
--- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc %s -o /dev/null
-; Here variable bar is optimized away. Do not trip over while trying to generate debug info.
-
-
-define i32 @foo() nounwind uwtable readnone ssp {
-entry:
- ret i32 42, !dbg !15
-}
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!18}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 139632)", isOptimized: true, emissionKind: 0, file: !17, enums: !1, retainedTypes: !1, subprograms: !3, globals: !12)
-!1 = !{}
-!3 = !{!5}
-!5 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !17, scope: !6, type: !7, function: i32 ()* @foo)
-!6 = !DIFile(filename: "fb.c", directory: "/private/tmp")
-!7 = !DISubroutineType(types: !8)
-!8 = !{!9}
-!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!12 = !{!14}
-!14 = !DIGlobalVariable(name: "bar", line: 2, isLocal: true, isDefinition: true, scope: !5, file: !6, type: !9)
-!15 = !DILocation(line: 3, column: 3, scope: !16)
-!16 = distinct !DILexicalBlock(line: 1, column: 11, file: !17, scope: !5)
-!17 = !DIFile(filename: "fb.c", directory: "/private/tmp")
-!18 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll b/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
deleted file mode 100644
index 79f949b438f8..000000000000
--- a/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc %s -o /dev/null
-@0 = internal constant i32 1
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!9}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 139632)", isOptimized: true, emissionKind: 0, file: !8, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3)
-!2 = !{}
-!3 = !{!5}
-!5 = !DIGlobalVariable(name: "a", line: 2, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: i32* @0)
-!6 = !DIFile(filename: "g.c", directory: "/private/tmp")
-!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !DIFile(filename: "g.c", directory: "/private/tmp")
-!9 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2009-11-10-CurrentFn.ll b/test/DebugInfo/2009-11-10-CurrentFn.ll
deleted file mode 100644
index 228edec93c42..000000000000
--- a/test/DebugInfo/2009-11-10-CurrentFn.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc < %s -o /dev/null
-
-define void @bar(i32 %i) nounwind uwtable ssp {
-entry:
- tail call void (...) @foo() nounwind, !dbg !14
- ret void, !dbg !16
-}
-
-declare void @foo(...)
-
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!18}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 139632)", isOptimized: true, emissionKind: 0, file: !17, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
-!1 = !{}
-!3 = !{!5}
-!5 = !DISubprogram(name: "bar", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !17, scope: !6, type: !7, function: void (i32)* @bar, variables: !9)
-!6 = !DIFile(filename: "cf.c", directory: "/private/tmp")
-!7 = !DISubroutineType(types: !8)
-!8 = !{null}
-!9 = !{!11}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 3, arg: 1, scope: !5, file: !17, type: !12)
-!12 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!13 = !DILocation(line: 3, column: 14, scope: !5)
-!14 = !DILocation(line: 4, column: 3, scope: !15)
-!15 = distinct !DILexicalBlock(line: 3, column: 17, file: !17, scope: !5)
-!16 = !DILocation(line: 5, column: 1, scope: !15)
-!17 = !DIFile(filename: "cf.c", directory: "/private/tmp")
-!18 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-01-05-DbgScope.ll b/test/DebugInfo/2010-01-05-DbgScope.ll
deleted file mode 100644
index d5baa1984227..000000000000
--- a/test/DebugInfo/2010-01-05-DbgScope.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -o /dev/null
-; PR 5942
-define i8* @foo() nounwind {
-entry:
- %0 = load i32, i32* undef, align 4, !dbg !0 ; <i32> [#uses=1]
- %1 = inttoptr i32 %0 to i8*, !dbg !0 ; <i8*> [#uses=1]
- ret i8* %1, !dbg !10
-
-}
-
-!llvm.dbg.cu = !{!3}
-!llvm.module.flags = !{!14}
-
-!0 = !DILocation(line: 571, column: 3, scope: !1)
-!1 = distinct !DILexicalBlock(line: 1, column: 1, file: !11, scope: !2)
-!2 = !DISubprogram(name: "foo", linkageName: "foo", line: 561, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3, type: !4)
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !11, enums: !12, retainedTypes: !12, subprograms: !13)
-!4 = !DISubroutineType(types: !5)
-!5 = !{!6}
-!6 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!10 = !DILocation(line: 588, column: 1, scope: !2)
-!11 = !DIFile(filename: "hashtab.c", directory: "/usr/src/gnu/usr.bin/cc/cc_tools/../../../../contrib/gcclibs/libiberty")
-!12 = !{}
-!13 = !{!2}
-!14 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-03-12-llc-crash.ll b/test/DebugInfo/2010-03-12-llc-crash.ll
deleted file mode 100644
index 60e657ce5ce6..000000000000
--- a/test/DebugInfo/2010-03-12-llc-crash.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc -O0 < %s -o /dev/null
-; llc should not crash on this invalid input.
-; PR6588
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-
-define void @foo() {
-entry:
- call void @llvm.dbg.declare(metadata i32* undef, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
- ret void
-}
-
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sy", line: 890, arg: 0, scope: !1, file: !2, type: !7)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 892, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !3, type: !4)
-!2 = !DIFile(filename: "qpainter.h", directory: "QtGui")
-!3 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !9, enums: !10, retainedTypes: !10)
-!4 = !DISubroutineType(types: !6)
-!5 = !DIFile(filename: "splineeditor.cpp", directory: "src")
-!6 = !{null}
-!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !DIFile(filename: "qpainter.h", directory: "QtGui")
-!9 = !DIFile(filename: "splineeditor.cpp", directory: "src")
-!10 = !{i32 0}
diff --git a/test/DebugInfo/2010-03-19-DbgDeclare.ll b/test/DebugInfo/2010-03-19-DbgDeclare.ll
deleted file mode 100644
index 40d96e1d443e..000000000000
--- a/test/DebugInfo/2010-03-19-DbgDeclare.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -verify -S | FileCheck %s
-
-; CHECK: DW_LANG_Mips_Assembler
-
-define void @Foo(i32 %a, i32 %b) {
-entry:
- call void @llvm.dbg.declare(metadata i32* null, metadata !1, metadata !DIExpression()), !dbg !DILocation(scope: !6)
- ret void
-}
-!llvm.dbg.cu = !{!2}
-!llvm.module.flags = !{!5}
-!2 = !DICompileUnit(language: DW_LANG_Mips_Assembler, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 1, file: !4, enums: !3, retainedTypes: !3, subprograms: !3, globals: !3, imports: !3)
-!3 = !{}
-!0 = !DILocation(line: 662302, column: 26, scope: !1)
-!1 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "foo", scope: !6)
-!4 = !DIFile(filename: "scratch.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
-!6 = !DISubprogram()
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-!5 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-03-24-MemberFn.ll b/test/DebugInfo/2010-03-24-MemberFn.ll
deleted file mode 100644
index 627364ea4f00..000000000000
--- a/test/DebugInfo/2010-03-24-MemberFn.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: %llc_dwarf -O0 < %s | grep AT_decl_file | grep 2
-; Here _ZN1S3fooEv is defined in header file identified as AT_decl_file no. 2 in debug info.
-%struct.S = type <{ i8 }>
-
-define i32 @_Z3barv() nounwind ssp {
-entry:
- %retval = alloca i32 ; <i32*> [#uses=2]
- %0 = alloca i32 ; <i32*> [#uses=2]
- %s1 = alloca %struct.S ; <%struct.S*> [#uses=1]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.declare(metadata %struct.S* %s1, metadata !0, metadata !DIExpression()), !dbg !16
- %1 = call i32 @_ZN1S3fooEv(%struct.S* %s1) nounwind, !dbg !17 ; <i32> [#uses=1]
- store i32 %1, i32* %0, align 4, !dbg !17
- %2 = load i32, i32* %0, align 4, !dbg !17 ; <i32> [#uses=1]
- store i32 %2, i32* %retval, align 4, !dbg !17
- br label %return, !dbg !17
-
-return: ; preds = %entry
- %retval1 = load i32, i32* %retval, !dbg !17 ; <i32> [#uses=1]
- ret i32 %retval1, !dbg !16
-}
-
-define linkonce_odr i32 @_ZN1S3fooEv(%struct.S* %this) nounwind ssp align 2 {
-entry:
- %this_addr = alloca %struct.S* ; <%struct.S**> [#uses=1]
- %retval = alloca i32 ; <i32*> [#uses=1]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.declare(metadata %struct.S** %this_addr, metadata !18, metadata !DIExpression()), !dbg !21
- store %struct.S* %this, %struct.S** %this_addr
- br label %return, !dbg !21
-
-return: ; preds = %entry
- %retval1 = load i32, i32* %retval, !dbg !21 ; <i32> [#uses=1]
- ret i32 %retval1, !dbg !22
-}
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!5}
-!llvm.module.flags = !{!28}
-
-!0 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "s1", line: 3, scope: !1, file: !4, type: !9)
-!1 = distinct !DILexicalBlock(line: 3, column: 0, file: !25, scope: !2)
-!2 = distinct !DILexicalBlock(line: 3, column: 0, file: !25, scope: !3)
-!3 = !DISubprogram(name: "bar", linkageName: "_Z3barv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 3, file: !25, scope: !4, type: !6, function: i32 ()* @_Z3barv)
-!4 = !DIFile(filename: "one.cc", directory: "/tmp/")
-!5 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !25, enums: !27, retainedTypes: !27, subprograms: !24, imports: null)
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", line: 2, size: 8, align: 8, file: !26, scope: !4, elements: !11)
-!10 = !DIFile(filename: "one.h", directory: "/tmp/")
-!11 = !{!12}
-!12 = !DISubprogram(name: "foo", linkageName: "_ZN1S3fooEv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 3, file: !26, scope: !9, type: !13, function: i32 (%struct.S*)* @_ZN1S3fooEv)
-!13 = !DISubroutineType(types: !14)
-!14 = !{!8, !15}
-!15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !25, scope: !4, baseType: !9)
-!16 = !DILocation(line: 3, scope: !1)
-!17 = !DILocation(line: 3, scope: !3)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 3, arg: 0, scope: !12, file: !10, type: !19)
-!19 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !25, scope: !4, baseType: !20)
-!20 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !25, scope: !4, baseType: !9)
-!21 = !DILocation(line: 3, scope: !12)
-!22 = !DILocation(line: 3, scope: !23)
-!23 = distinct !DILexicalBlock(line: 3, column: 0, file: !26, scope: !12)
-!24 = !{!3, !12}
-!25 = !DIFile(filename: "one.cc", directory: "/tmp/")
-!26 = !DIFile(filename: "one.h", directory: "/tmp/")
-!27 = !{}
-!28 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
deleted file mode 100644
index 45ed4962f7b4..000000000000
--- a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
+++ /dev/null
@@ -1,112 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj -o - < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-; Radar 7833483
-; Do not emit a separate out-of-line definition DIE for the function-local 'foo'
-; function (member of the function local 'A' type)
-; CHECK: DW_TAG_class_type
-; CHECK: DW_TAG_class_type
-; CHECK-NEXT: DW_AT_name {{.*}} "A"
-; Check that the subprogram inside the class definition has low_pc, only
-; attached to the definition.
-; CHECK: [[FOO_INL:0x........]]: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_low_pc
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_linkage_name {{.*}} "_ZZN1B2fnEvEN1A3fooEv"
-; And just double check that there's no out of line definition that references
-; this subprogram.
-; CHECK-NOT: DW_AT_specification {{.*}} {[[FOO_INL]]}
-
-%class.A = type { i8 }
-%class.B = type { i8 }
-
-define i32 @main() ssp {
-entry:
- %retval = alloca i32, align 4 ; <i32*> [#uses=3]
- %b = alloca %class.A, align 1 ; <%class.A*> [#uses=1]
- store i32 0, i32* %retval
- call void @llvm.dbg.declare(metadata %class.A* %b, metadata !0, metadata !DIExpression()), !dbg !14
- %call = call i32 @_ZN1B2fnEv(%class.A* %b), !dbg !15 ; <i32> [#uses=1]
- store i32 %call, i32* %retval, !dbg !15
- %0 = load i32, i32* %retval, !dbg !16 ; <i32> [#uses=1]
- ret i32 %0, !dbg !16
-}
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-
-define linkonce_odr i32 @_ZN1B2fnEv(%class.A* %this) ssp align 2 {
-entry:
- %retval = alloca i32, align 4 ; <i32*> [#uses=2]
- %this.addr = alloca %class.A*, align 8 ; <%class.A**> [#uses=2]
- %a = alloca %class.A, align 1 ; <%class.A*> [#uses=1]
- %i = alloca i32, align 4 ; <i32*> [#uses=2]
- store %class.A* %this, %class.A** %this.addr
- call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !17, metadata !DIExpression()), !dbg !18
- %this1 = load %class.A*, %class.A** %this.addr ; <%class.A*> [#uses=0]
- call void @llvm.dbg.declare(metadata %class.A* %a, metadata !19, metadata !DIExpression()), !dbg !27
- call void @llvm.dbg.declare(metadata i32* %i, metadata !28, metadata !DIExpression()), !dbg !29
- %call = call i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %a), !dbg !30 ; <i32> [#uses=1]
- store i32 %call, i32* %i, !dbg !30
- %tmp = load i32, i32* %i, !dbg !31 ; <i32> [#uses=1]
- store i32 %tmp, i32* %retval, !dbg !31
- %0 = load i32, i32* %retval, !dbg !32 ; <i32> [#uses=1]
- ret i32 %0, !dbg !32
-}
-
-define internal i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %this) ssp align 2 {
-entry:
- %retval = alloca i32, align 4 ; <i32*> [#uses=2]
- %this.addr = alloca %class.A*, align 8 ; <%class.A**> [#uses=2]
- store %class.A* %this, %class.A** %this.addr
- call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !33, metadata !DIExpression()), !dbg !34
- %this1 = load %class.A*, %class.A** %this.addr ; <%class.A*> [#uses=0]
- store i32 42, i32* %retval, !dbg !35
- %0 = load i32, i32* %retval, !dbg !35 ; <i32> [#uses=1]
- ret i32 %0, !dbg !35
-}
-
-!llvm.dbg.cu = !{!4}
-!llvm.module.flags = !{!40}
-!37 = !{!2, !10, !23}
-
-!0 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 16, scope: !1, file: !3, type: !8)
-!1 = distinct !DILexicalBlock(line: 15, column: 12, file: !38, scope: !2)
-!2 = !DISubprogram(name: "main", linkageName: "main", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 15, file: !38, scope: !3, type: !5, function: i32 ()* @main)
-!3 = !DIFile(filename: "one.cc", directory: "/tmp")
-!4 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang 1.5", isOptimized: false, emissionKind: 0, file: !38, enums: !39, retainedTypes: !39, subprograms: !37, imports: null)
-!5 = !DISubroutineType(types: !6)
-!6 = !{!7}
-!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !DICompositeType(tag: DW_TAG_class_type, name: "B", line: 2, size: 8, align: 8, file: !38, scope: !3, elements: !9)
-!9 = !{!10}
-!10 = !DISubprogram(name: "fn", linkageName: "_ZN1B2fnEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !38, scope: !8, type: !11, function: i32 (%class.A*)* @_ZN1B2fnEv)
-!11 = !DISubroutineType(types: !12)
-!12 = !{!7, !13}
-!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !38, scope: !3, baseType: !8)
-!14 = !DILocation(line: 16, column: 5, scope: !1)
-!15 = !DILocation(line: 17, column: 3, scope: !1)
-!16 = !DILocation(line: 18, column: 1, scope: !2)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 4, arg: 0, scope: !10, file: !3, type: !13)
-!18 = !DILocation(line: 4, column: 7, scope: !10)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 9, scope: !20, file: !3, type: !21)
-!20 = distinct !DILexicalBlock(line: 4, column: 12, file: !38, scope: !10)
-!21 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 5, size: 8, align: 8, file: !38, scope: !10, elements: !22)
-!22 = !{!23}
-!23 = !DISubprogram(name: "foo", linkageName: "_ZZN1B2fnEvEN1A3fooEv", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !38, scope: !21, type: !24, function: i32 (%class.A*)* @_ZZN1B2fnEvEN1A3fooEv)
-!24 = !DISubroutineType(types: !25)
-!25 = !{!7, !26}
-!26 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !38, scope: !3, baseType: !21)
-!27 = !DILocation(line: 9, column: 7, scope: !20)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 10, scope: !20, file: !3, type: !7)
-!29 = !DILocation(line: 10, column: 9, scope: !20)
-!30 = !DILocation(line: 10, column: 5, scope: !20)
-!31 = !DILocation(line: 11, column: 5, scope: !20)
-!32 = !DILocation(line: 12, column: 3, scope: !10)
-!33 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 7, arg: 0, scope: !23, file: !3, type: !26)
-!34 = !DILocation(line: 7, column: 11, scope: !23)
-!35 = !DILocation(line: 7, column: 19, scope: !36)
-!36 = distinct !DILexicalBlock(line: 7, column: 17, file: !38, scope: !23)
-!38 = !DIFile(filename: "one.cc", directory: "/tmp")
-!39 = !{}
-!40 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-04-19-FramePtr.ll b/test/DebugInfo/2010-04-19-FramePtr.ll
deleted file mode 100644
index ed6a4fdb54e1..000000000000
--- a/test/DebugInfo/2010-04-19-FramePtr.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: %llc_dwarf -asm-verbose -O1 -o %t < %s
-; RUN: grep DW_AT_APPLE_omit_frame_ptr %t
-; RUN: %llc_dwarf -disable-fp-elim -asm-verbose -O1 -o %t < %s
-; RUN: grep -v DW_AT_APPLE_omit_frame_ptr %t
-
-
-define i32 @foo() nounwind ssp {
-entry:
- %retval = alloca i32 ; <i32*> [#uses=2]
- %0 = alloca i32 ; <i32*> [#uses=2]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- store i32 42, i32* %0, align 4, !dbg !0
- %1 = load i32, i32* %0, align 4, !dbg !0 ; <i32> [#uses=1]
- store i32 %1, i32* %retval, align 4, !dbg !0
- br label %return, !dbg !0
-
-return: ; preds = %entry
- %retval1 = load i32, i32* %retval, !dbg !0 ; <i32> [#uses=1]
- ret i32 %retval1, !dbg !7
-}
-
-!llvm.dbg.cu = !{!3}
-!llvm.module.flags = !{!12}
-!9 = !{!1}
-
-!0 = !DILocation(line: 2, scope: !1)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !10, scope: null, type: !4, function: i32 ()* @foo)
-!2 = !DIFile(filename: "a.c", directory: "/tmp")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !10, enums: !11, retainedTypes: !11, subprograms: !9, imports: null)
-!4 = !DISubroutineType(types: !5)
-!5 = !{!6}
-!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!7 = !DILocation(line: 2, scope: !8)
-!8 = distinct !DILexicalBlock(line: 2, column: 0, file: !10, scope: !1)
-!10 = !DIFile(filename: "a.c", directory: "/tmp")
-!11 = !{}
-!12 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-05-03-DisableFramePtr.ll b/test/DebugInfo/2010-05-03-DisableFramePtr.ll
deleted file mode 100644
index 660e9dba498b..000000000000
--- a/test/DebugInfo/2010-05-03-DisableFramePtr.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: llc -o /dev/null < %s
-; Radar 7937664
-%struct.AppleEvent = type opaque
-
-define void @DisposeDMNotificationUPP(void (%struct.AppleEvent*)* %userUPP) "no-frame-pointer-elim-non-leaf" nounwind ssp {
-entry:
- %userUPP_addr = alloca void (%struct.AppleEvent*)* ; <void (%struct.AppleEvent*)**> [#uses=1]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.declare(metadata void (%struct.AppleEvent*)** %userUPP_addr, metadata !0, metadata !DIExpression()), !dbg !13
- store void (%struct.AppleEvent*)* %userUPP, void (%struct.AppleEvent*)** %userUPP_addr
- br label %return, !dbg !14
-
-return: ; preds = %entry
- ret void, !dbg !14
-}
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!3}
-!llvm.module.flags = !{!19}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "userUPP", line: 7, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "DisposeDMNotificationUPP", linkageName: "DisposeDMNotificationUPP", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !16, scope: null, type: !4)
-!2 = !DIFile(filename: "t.c", directory: "/Users/echeng/LLVM/radars/r7937664/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !16, enums: !17, retainedTypes: !17, subprograms: !18)
-!4 = !DISubroutineType(types: !5)
-!5 = !{null, !6}
-!6 = !DIDerivedType(tag: DW_TAG_typedef, name: "DMNotificationUPP", line: 6, file: !16, scope: !2, baseType: !7)
-!7 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !16, scope: !2, baseType: !8)
-!8 = !DISubroutineType(types: !9)
-!9 = !{null, !10}
-!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !16, scope: !2, baseType: !11)
-!11 = !DIDerivedType(tag: DW_TAG_typedef, name: "AppleEvent", line: 4, file: !16, scope: !2, baseType: !12)
-!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "AEDesc", line: 1, flags: DIFlagFwdDecl, file: !16, scope: !2)
-!13 = !DILocation(line: 7, scope: !1)
-!14 = !DILocation(line: 8, scope: !15)
-!15 = distinct !DILexicalBlock(line: 7, column: 0, file: !16, scope: !1)
-!16 = !DIFile(filename: "t.c", directory: "/Users/echeng/LLVM/radars/r7937664/")
-!17 = !{}
-!18 = !{!1}
-!19 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-05-03-OriginDIE.ll b/test/DebugInfo/2010-05-03-OriginDIE.ll
deleted file mode 100644
index da1e57caba14..000000000000
--- a/test/DebugInfo/2010-05-03-OriginDIE.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-
-;RUN: llc < %s -o /dev/null
-;Radar 7937109
-
-%struct.anon = type { i64, i32, i32, i32, [1 x i32] }
-%struct.gpm_t = type { i32, i8*, [16 x i8], i32, i64, i64, i64, i64, i64, i64, i32, i16, i16, [8 x %struct.gpmr_t] }
-%struct.gpmr_t = type { [48 x i8], [48 x i8], [16 x i8], i64, i64, i64, i64, i16 }
-%struct.gpt_t = type { [8 x i8], i32, i32, i32, i32, i64, i64, i64, i64, [16 x i8], %struct.anon }
-
-@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.gpm_t*, %struct.gpt_t*)* @gpt2gpm to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
-
-define fastcc void @gpt2gpm(%struct.gpm_t* %gpm, %struct.gpt_t* %gpt) nounwind optsize ssp {
-entry:
- %data_addr.i18 = alloca i64, align 8 ; <i64*> [#uses=1]
- %data_addr.i17 = alloca i64, align 8 ; <i64*> [#uses=2]
- %data_addr.i16 = alloca i64, align 8 ; <i64*> [#uses=0]
- %data_addr.i15 = alloca i32, align 4 ; <i32*> [#uses=0]
- %data_addr.i = alloca i64, align 8 ; <i64*> [#uses=0]
- %0 = getelementptr inbounds %struct.gpm_t, %struct.gpm_t* %gpm, i32 0, i32 2, i32 0 ; <i8*> [#uses=1]
- %1 = getelementptr inbounds %struct.gpt_t, %struct.gpt_t* %gpt, i32 0, i32 9, i32 0 ; <i8*> [#uses=1]
- call void @uuid_LtoB(i8* %0, i8* %1) nounwind, !dbg !0
- %a9 = load volatile i64, i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
- %a10 = call i64 @llvm.bswap.i64(i64 %a9) nounwind ; <i64> [#uses=1]
- %a11 = getelementptr inbounds %struct.gpt_t, %struct.gpt_t* %gpt, i32 0, i32 8, !dbg !7 ; <i64*> [#uses=1]
- %a12 = load i64, i64* %a11, align 4, !dbg !7 ; <i64> [#uses=1]
- call void @llvm.dbg.declare(metadata i64* %data_addr.i17, metadata !8, metadata !DIExpression()) nounwind, !dbg !14
- store i64 %a12, i64* %data_addr.i17, align 8
- call void @llvm.dbg.value(metadata !6, i64 0, metadata !15, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !16)
- call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !19, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !16)
- call void @llvm.dbg.declare(metadata !6, metadata !23, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !24)
- call void @llvm.dbg.value(metadata i64* %data_addr.i17, i64 0, metadata !34, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !24)
- %a13 = load volatile i64, i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
- %a14 = call i64 @llvm.bswap.i64(i64 %a13) nounwind ; <i64> [#uses=2]
- %a15 = add i64 %a10, %a14, !dbg !7 ; <i64> [#uses=1]
- %a16 = sub i64 %a15, %a14 ; <i64> [#uses=1]
- %a17 = getelementptr inbounds %struct.gpm_t, %struct.gpm_t* %gpm, i32 0, i32 5, !dbg !7 ; <i64*> [#uses=1]
- store i64 %a16, i64* %a17, align 4, !dbg !7
- ret void, !dbg !7
-}
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-
-declare i32 @llvm.bswap.i32(i32) nounwind readnone
-
-declare i64 @llvm.bswap.i64(i64) nounwind readnone
-
-declare void @uuid_LtoB(i8*, i8*)
-
-!llvm.dbg.cu = !{!4}
-!llvm.module.flags = !{!41}
-!0 = !DILocation(line: 808, scope: !1)
-!1 = distinct !DILexicalBlock(line: 807, column: 0, file: !39, scope: !2)
-!2 = !DISubprogram(name: "gpt2gpm", linkageName: "gpt2gpm", line: 807, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !39, scope: null, type: !5)
-!3 = !DIFile(filename: "G.c", directory: "/tmp")
-!4 = !DICompileUnit(language: DW_LANG_C89, producer: "llvm-gcc", isOptimized: true, emissionKind: 0, file: !39, enums: !18, retainedTypes: !18, subprograms: !40)
-!5 = !DISubroutineType(types: !6)
-!6 = !{null}
-!7 = !DILocation(line: 810, scope: !1)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "data", line: 201, arg: 0, scope: !9, file: !10, type: !11)
-!9 = !DISubprogram(name: "_OSSwapInt64", linkageName: "_OSSwapInt64", line: 202, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !10, scope: null, type: !5)
-!10 = !DIFile(filename: "OSByteOrder.h", directory: "/usr/include/libkern/ppc")
-!11 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", line: 59, file: !36, scope: !3, baseType: !13)
-!12 = !DIFile(filename: "stdint.h", directory: "/usr/4.2.1/include")
-!13 = !DIBasicType(tag: DW_TAG_base_type, name: "long long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
-!14 = !DILocation(line: 202, scope: !9, inlinedAt: !7)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "base", line: 92, arg: 0, scope: !16, file: !10, type: !17)
-!16 = !DISubprogram(name: "OSReadSwapInt64", linkageName: "OSReadSwapInt64", line: 95, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !38, scope: null, type: !5)
-!17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !39, scope: !3, baseType: null)
-!18 = !{}
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "byteOffset", line: 94, arg: 0, scope: !16, file: !10, type: !20)
-!20 = !DIDerivedType(tag: DW_TAG_typedef, name: "uintptr_t", line: 114, file: !37, scope: !3, baseType: !22)
-!21 = !DIFile(filename: "types.h", directory: "/usr/include/ppc")
-!22 = !DIBasicType(tag: DW_TAG_base_type, name: "long unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "u", line: 100, scope: !24, file: !10, type: !25)
-!24 = distinct !DILexicalBlock(line: 95, column: 0, file: !38, scope: !16)
-!25 = !DICompositeType(tag: DW_TAG_union_type, line: 97, size: 64, align: 64, file: !38, scope: !16, elements: !26)
-!26 = !{!27, !28}
-!27 = !DIDerivedType(tag: DW_TAG_member, name: "u64", line: 98, size: 64, align: 64, file: !38, scope: !25, baseType: !11)
-!28 = !DIDerivedType(tag: DW_TAG_member, name: "u32", line: 99, size: 64, align: 32, file: !38, scope: !25, baseType: !29)
-!29 = !DICompositeType(tag: DW_TAG_array_type, size: 64, align: 32, file: !39, scope: !3, baseType: !30, elements: !32)
-!30 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", line: 55, file: !36, scope: !3, baseType: !31)
-!31 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!32 = !{!33}
-!33 = !DISubrange(count: 2)
-!34 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "addr", line: 96, scope: !24, file: !10, type: !35)
-!35 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !39, scope: !3, baseType: !11)
-!36 = !DIFile(filename: "stdint.h", directory: "/usr/4.2.1/include")
-!37 = !DIFile(filename: "types.h", directory: "/usr/include/ppc")
-!38 = !DIFile(filename: "OSByteOrder.h", directory: "/usr/include/libkern/ppc")
-!39 = !DIFile(filename: "G.c", directory: "/tmp")
-!40 = !{!2, !9, !16}
-!41 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-05-10-MultipleCU.ll b/test/DebugInfo/2010-05-10-MultipleCU.ll
deleted file mode 100644
index 6513165ac248..000000000000
--- a/test/DebugInfo/2010-05-10-MultipleCU.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; Check that two compile units are generated
-
-; CHECK: Compile Unit:
-; CHECK: Compile Unit:
-
-define i32 @foo() nounwind readnone ssp {
-return:
- ret i32 42, !dbg !0
-}
-
-define i32 @bar() nounwind readnone ssp {
-return:
- ret i32 21, !dbg !8
-}
-
-!llvm.dbg.cu = !{!4, !12}
-!llvm.module.flags = !{!21}
-!16 = !{!2}
-!17 = !{!10}
-
-!0 = !DILocation(line: 3, scope: !1)
-!1 = distinct !DILexicalBlock(line: 2, column: 0, file: !18, scope: !2)
-!2 = !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !18, scope: !3, type: !5, function: i32 ()* @foo)
-!3 = !DIFile(filename: "a.c", directory: "/tmp/")
-!4 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !16)
-!5 = !DISubroutineType(types: !6)
-!6 = !{!7}
-!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !DILocation(line: 3, scope: !9)
-!9 = distinct !DILexicalBlock(line: 2, column: 0, file: !20, scope: !10)
-!10 = !DISubprogram(name: "bar", linkageName: "bar", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !20, scope: !11, type: !13, function: i32 ()* @bar)
-!11 = !DIFile(filename: "b.c", directory: "/tmp/")
-!12 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !20, enums: !19, retainedTypes: !19, subprograms: !17)
-!13 = !DISubroutineType(types: !14)
-!14 = !{!15}
-!15 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!18 = !DIFile(filename: "a.c", directory: "/tmp/")
-!19 = !{}
-!20 = !DIFile(filename: "b.c", directory: "/tmp/")
-!21 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
deleted file mode 100644
index 27a0535bc8c3..000000000000
--- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ /dev/null
@@ -1,61 +0,0 @@
-; RUN: %llc_dwarf -O2 %s -o - | FileCheck %s
-; Check struct X for dead variable xyz from inlined function foo.
-
-; CHECK: section_info
-; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name
-
-
-@i = common global i32 0 ; <i32*> [#uses=2]
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-
-define i32 @bar() nounwind ssp {
-entry:
- %0 = load i32, i32* @i, align 4, !dbg !17 ; <i32> [#uses=2]
- tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !59, metadata !DIExpression()), !dbg !19
- tail call void @llvm.dbg.declare(metadata !29, metadata !60, metadata !DIExpression()), !dbg !21
- %1 = mul nsw i32 %0, %0, !dbg !22 ; <i32> [#uses=2]
- store i32 %1, i32* @i, align 4, !dbg !17
- ret i32 %1, !dbg !23
-}
-
-!llvm.dbg.cu = !{!2}
-!llvm.module.flags = !{!28}
-
-!0 = !DISubprogram(name: "foo", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 9, file: !27, scope: !1, type: !3, variables: !24)
-!1 = !DIFile(filename: "bar.c", directory: "/tmp/")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !27, enums: !20, retainedTypes: !20, subprograms: !25, globals: !26, imports: !20)
-!3 = !DISubroutineType(types: !4)
-!4 = !{!5, !5}
-!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "bar", linkageName: "bar", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !27, scope: !1, type: !7, function: i32 ()* @bar)
-!7 = !DISubroutineType(types: !8)
-!8 = !{!5}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
-
-!59 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
-!60 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
-
-!11 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !0)
-!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "X", line: 10, size: 64, align: 32, file: !27, scope: !0, elements: !13)
-!13 = !{!14, !15}
-!14 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 10, size: 32, align: 32, file: !27, scope: !12, baseType: !5)
-!15 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 10, size: 32, align: 32, offset: 32, file: !27, scope: !12, baseType: !5)
-!16 = !DIGlobalVariable(name: "i", line: 5, isLocal: false, isDefinition: true, scope: !1, file: !1, type: !5, variable: i32* @i)
-!17 = !DILocation(line: 15, scope: !18)
-!18 = distinct !DILexicalBlock(line: 14, column: 0, file: !1, scope: !6)
-!19 = !DILocation(line: 9, scope: !0, inlinedAt: !17)
-!20 = !{}
-!21 = !DILocation(line: 9, scope: !11, inlinedAt: !17)
-!22 = !DILocation(line: 11, scope: !11, inlinedAt: !17)
-!23 = !DILocation(line: 16, scope: !18)
-!24 = !{!9, !10}
-!25 = !{!0, !6}
-!26 = !{!16}
-!27 = !DIFile(filename: "bar.c", directory: "/tmp/")
-!28 = !{i32 1, !"Debug Info Version", i32 3}
-!29 = !{null}
diff --git a/test/DebugInfo/2010-07-19-Crash.ll b/test/DebugInfo/2010-07-19-Crash.ll
deleted file mode 100644
index fbfe9f80189d..000000000000
--- a/test/DebugInfo/2010-07-19-Crash.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc -o /dev/null < %s
-; PR7662
-; Do not add variables to !11 because it is a declaration entry.
-
-define i32 @bar() nounwind readnone ssp {
-entry:
- ret i32 42, !dbg !9
-}
-
-!llvm.dbg.cu = !{!2}
-!llvm.module.flags = !{!15}
-!llvm.dbg.sp = !{!0, !6, !11}
-!llvm.dbg.lv.foo = !{!7}
-
-!0 = !DISubprogram(name: "bar", linkageName: "bar", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3, function: i32 ()* @bar)
-!1 = !DIFile(filename: "one.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 2.8", isOptimized: true, emissionKind: 0, file: !12, enums: !14, retainedTypes: !14, subprograms: !13)
-!3 = !DISubroutineType(types: !4)
-!4 = !{!5}
-!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3)
-!7 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "one", line: 8, scope: !8, file: !1, type: !5)
-!8 = distinct !DILexicalBlock(line: 7, column: 18, file: !12, scope: !6)
-!9 = !DILocation(line: 4, column: 3, scope: !10)
-!10 = distinct !DILexicalBlock(line: 3, column: 11, file: !12, scope: !0)
-!11 = !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: true, isDefinition: false, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3)
-!12 = !DIFile(filename: "one.c", directory: "/private/tmp")
-!13 = !{!0}
-!14 = !{}
-!15 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-10-01-crash.ll b/test/DebugInfo/2010-10-01-crash.ll
deleted file mode 100644
index 5c736ba85552..000000000000
--- a/test/DebugInfo/2010-10-01-crash.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc -O0 %s -o /dev/null
-
-define void @CGRectStandardize(i32* sret %agg.result, i32* byval %rect) nounwind ssp {
-entry:
- call void @llvm.dbg.declare(metadata i32* %rect, metadata !23, metadata !DIExpression()), !dbg !24
- ret void
-}
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-
-
-!llvm.dbg.cu = !{!2}
-!llvm.module.flags = !{!27}
-!0 = !DISubprogram(name: "CGRectStandardize", linkageName: "CGRectStandardize", line: 54, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !1, scope: null, function: void (i32*, i32*)* @CGRectStandardize)
-!1 = !DIFile(filename: "GSFusedSilica.m", directory: "/Volumes/Data/Users/sabre/Desktop")
-!2 = !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 2.9 (trunk 115292)", isOptimized: true, runtimeVersion: 1, emissionKind: 0, file: !25, enums: !26, retainedTypes: !26)
-!5 = !DIDerivedType(tag: DW_TAG_typedef, name: "CGRect", line: 49, file: !25, baseType: null)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "rect", line: 53, arg: 0, scope: !0, file: !1, type: !5)
-!24 = !DILocation(line: 53, column: 33, scope: !0)
-!25 = !DIFile(filename: "GSFusedSilica.m", directory: "/Volumes/Data/Users/sabre/Desktop")
-!26 = !{}
-!27 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/AArch64/big-endian.ll b/test/DebugInfo/AArch64/big-endian.ll
index 45f49da63077..22b7af9df3d4 100644
--- a/test/DebugInfo/AArch64/big-endian.ll
+++ b/test/DebugInfo/AArch64/big-endian.ll
@@ -9,7 +9,7 @@ target triple = "aarch64_be--none-eabi"
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
!1 = !DIFile(filename: "-", directory: "/work/validation")
!2 = !{}
!3 = !{!4}
diff --git a/test/DebugInfo/AArch64/bitfields.ll b/test/DebugInfo/AArch64/bitfields.ll
index 5f0caab286b8..867b5be91ee1 100644
--- a/test/DebugInfo/AArch64/bitfields.ll
+++ b/test/DebugInfo/AArch64/bitfields.ll
@@ -54,7 +54,7 @@ target triple = "aarch64_be--linux-gnu"
!llvm.module.flags = !{!13, !14, !15}
!llvm.ident = !{!16}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
!1 = !DIFile(filename: "bitfields.c", directory: "/")
!2 = !{}
!3 = !{!4}
diff --git a/test/DebugInfo/AArch64/cfi-eof-prologue.ll b/test/DebugInfo/AArch64/cfi-eof-prologue.ll
index 756443a951dd..c4681ca12fbe 100644
--- a/test/DebugInfo/AArch64/cfi-eof-prologue.ll
+++ b/test/DebugInfo/AArch64/cfi-eof-prologue.ll
@@ -26,7 +26,7 @@ target triple = "aarch64-apple-ios"
@_ZTV1B = external unnamed_addr constant [4 x i8*]
; Function Attrs: nounwind
-define %struct.B* @_ZN1BC2Ev(%struct.B* %this) unnamed_addr #0 align 2 {
+define %struct.B* @_ZN1BC2Ev(%struct.B* %this) unnamed_addr #0 align 2 !dbg !28 {
entry:
tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !30, metadata !38), !dbg !39
%0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, !dbg !40
@@ -39,7 +39,7 @@ entry:
declare %struct.A* @_ZN1AC2Ev(%struct.A*)
; Function Attrs: nounwind
-define %struct.B* @_ZN1BC1Ev(%struct.B* %this) unnamed_addr #0 align 2 {
+define %struct.B* @_ZN1BC1Ev(%struct.B* %this) unnamed_addr #0 align 2 !dbg !32 {
entry:
tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !34, metadata !38), !dbg !44
tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !45, metadata !38) #3, !dbg !47
@@ -61,7 +61,7 @@ attributes #3 = { nounwind }
!llvm.module.flags = !{!35, !36}
!llvm.ident = !{!37}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !27, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !27, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "")
!2 = !{}
!3 = !{!4, !13}
@@ -89,13 +89,13 @@ attributes #3 = { nounwind }
!25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
!26 = !DISubprogram(name: "~A", line: 3, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !"_ZTS1A", type: !23, containingType: !"_ZTS1A")
!27 = !{!28, !32}
-!28 = !DISubprogram(name: "B", linkageName: "_ZN1BC2Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, function: %struct.B* (%struct.B*)* @_ZN1BC2Ev, declaration: !8, variables: !29)
+!28 = distinct !DISubprogram(name: "B", linkageName: "_ZN1BC2Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, declaration: !8, variables: !29)
!29 = !{!30}
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
+!30 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
!31 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1B")
-!32 = !DISubprogram(name: "B", linkageName: "_ZN1BC1Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, function: %struct.B* (%struct.B*)* @_ZN1BC1Ev, declaration: !8, variables: !33)
+!32 = distinct !DISubprogram(name: "B", linkageName: "_ZN1BC1Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, declaration: !8, variables: !33)
!33 = !{!34}
-!34 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !32, type: !31)
+!34 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !32, type: !31)
!35 = !{i32 2, !"Dwarf Version", i32 4}
!36 = !{i32 2, !"Debug Info Version", i32 3}
!37 = !{!"clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)"}
@@ -106,7 +106,7 @@ attributes #3 = { nounwind }
!42 = !{!"vtable pointer", !43, i64 0}
!43 = !{!"Simple C/C++ TBAA"}
!44 = !DILocation(line: 0, scope: !32)
-!45 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
+!45 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
!46 = !DILocation(line: 9, scope: !32)
!47 = !DILocation(line: 0, scope: !28, inlinedAt: !46)
!48 = !DILocation(line: 9, scope: !28, inlinedAt: !46)
diff --git a/test/DebugInfo/AArch64/coalescing.ll b/test/DebugInfo/AArch64/coalescing.ll
index b1d566197126..5f69895c7363 100644
--- a/test/DebugInfo/AArch64/coalescing.ll
+++ b/test/DebugInfo/AArch64/coalescing.ll
@@ -14,7 +14,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios"
; Function Attrs: nounwind optsize
-define void @_Z5startv() #0 {
+define void @_Z5startv() #0 !dbg !4 {
entry:
%size = alloca i32, align 4
%0 = bitcast i32* %size to i8*, !dbg !15
@@ -44,22 +44,22 @@ attributes #3 = { nounwind optsize }
!llvm.module.flags = !{!12, !13}
!llvm.ident = !{!14}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 223149) (llvm/trunk 223115)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 223149) (llvm/trunk 223115)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "start", linkageName: "_Z5startv", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, function: void ()* @_Z5startv, variables: !9)
+!4 = distinct !DISubprogram(name: "start", linkageName: "_Z5startv", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !9)
!5 = !DIFile(filename: "test1.c", directory: "")
!6 = !DIFile(filename: "test1.c", directory: "")
!7 = !DISubroutineType(types: !8)
!8 = !{null}
!9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "size", line: 4, scope: !4, file: !6, type: !11)
+!10 = !DILocalVariable(name: "size", line: 4, scope: !4, file: !6, type: !11)
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
!12 = !{i32 2, !"Dwarf Version", i32 2}
!13 = !{i32 2, !"Debug Info Version", i32 3}
!14 = !{!"clang version 3.6.0 (trunk 223149) (llvm/trunk 223115)"}
!15 = !DILocation(line: 5, column: 3, scope: !4)
-!16 = !DIExpression()
+!16 = !DIExpression(DW_OP_deref)
!17 = !DILocation(line: 4, column: 12, scope: !4)
!18 = !DILocation(line: 8, column: 1, scope: !4)
diff --git a/test/DebugInfo/AArch64/constant-dbgloc.ll b/test/DebugInfo/AArch64/constant-dbgloc.ll
index a71b869f5366..aae518725fa4 100644
--- a/test/DebugInfo/AArch64/constant-dbgloc.ll
+++ b/test/DebugInfo/AArch64/constant-dbgloc.ll
@@ -13,7 +13,7 @@ target triple = "aarch64--linux-gnueabihf"
; CHECK: movn
; Function Attrs: nounwind
-define i32 @main() {
+define i32 @main() !dbg !4 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
@@ -23,11 +23,11 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
!1 = !DIFile(filename: "test.c", directory: "/home/user/clang/build")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/DebugInfo/AArch64/dwarfdump.ll b/test/DebugInfo/AArch64/dwarfdump.ll
index 3e95a6fe314f..57dcdf82d69e 100644
--- a/test/DebugInfo/AArch64/dwarfdump.ll
+++ b/test/DebugInfo/AArch64/dwarfdump.ll
@@ -18,7 +18,7 @@
; CHECK-4: DW_AT_high_pc [DW_FORM_data4] (0x00000008)
; CHECK-3: DW_AT_high_pc [DW_FORM_addr] (0x0000000000000008)
-define i32 @main() nounwind {
+define i32 @main() nounwind !dbg !3 {
ret i32 0, !dbg !8
}
@@ -27,10 +27,10 @@ attributes #0 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!10}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !9, enums: !1, retainedTypes: !1, subprograms: !2, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !9, enums: !1, retainedTypes: !1, subprograms: !2, globals: !1, imports: !1)
!1 = !{}
!2 = !{!3}
-!3 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !9, scope: !4, type: !5, function: i32 ()* @main, variables: !1)
+!3 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !9, scope: !4, type: !5, variables: !1)
!4 = !DIFile(filename: "tmp.c", directory: "/home/tim/llvm/build")
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
diff --git a/test/DebugInfo/AArch64/frameindices.ll b/test/DebugInfo/AArch64/frameindices.ll
index 029077423368..5fb03dfc8834 100644
--- a/test/DebugInfo/AArch64/frameindices.ll
+++ b/test/DebugInfo/AArch64/frameindices.ll
@@ -42,7 +42,7 @@ target triple = "aarch64-apple-ios"
@a = global i64 0, align 8
@b = global i32* null, align 8
-define void @_Z3f131A(%struct.A* nocapture readonly %p1) #0 {
+define void @_Z3f131A(%struct.A* nocapture readonly %p1) #0 !dbg !25 {
entry:
%agg.tmp = alloca %struct.A, align 8
tail call void @llvm.dbg.declare(metadata %struct.A* %p1, metadata !30, metadata !46), !dbg !47
@@ -67,7 +67,7 @@ declare void @_Z2f91A(%struct.A*) #0
; Function Attrs: nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #3
-define void @_Z3f111A(%struct.A* nocapture readonly %p1) #0 {
+define void @_Z3f111A(%struct.A* nocapture readonly %p1) #0 !dbg !31 {
entry:
%agg.tmp.i = alloca %struct.A, align 8
tail call void @llvm.dbg.declare(metadata %struct.A* %p1, metadata !33, metadata !46), !dbg !63
@@ -83,7 +83,7 @@ entry:
ret void, !dbg !73
}
-define void @_Z3f16v() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @_Z3f16v() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !34 {
entry:
%agg.tmp.i.i = alloca %struct.A, align 8
%d = alloca %struct.B, align 1
@@ -160,7 +160,7 @@ attributes #5 = { builtin }
!llvm.module.flags = !{!43, !44}
!llvm.ident = !{!45}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !24, globals: !40, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !24, globals: !40, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "")
!2 = !{}
!3 = !{!4, !12, !14}
@@ -185,21 +185,21 @@ attributes #5 = { builtin }
!22 = !DISubroutineType(types: !23)
!23 = !{null, !19}
!24 = !{!25, !31, !34}
-!25 = !DISubprogram(name: "f13", linkageName: "_Z3f131A", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !26, type: !27, function: void (%struct.A*)* @_Z3f131A, variables: !29)
+!25 = distinct !DISubprogram(name: "f13", linkageName: "_Z3f131A", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !26, type: !27, variables: !29)
!26 = !DIFile(filename: "test.cpp", directory: "")
!27 = !DISubroutineType(types: !28)
!28 = !{null, !"_ZTS1A"}
!29 = !{!30}
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 13, arg: 1, scope: !25, file: !26, type: !"_ZTS1A")
-!31 = !DISubprogram(name: "f11", linkageName: "_Z3f111A", line: 17, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !5, scope: !26, type: !27, function: void (%struct.A*)* @_Z3f111A, variables: !32)
+!30 = !DILocalVariable(name: "p1", line: 13, arg: 1, scope: !25, file: !26, type: !"_ZTS1A")
+!31 = distinct !DISubprogram(name: "f11", linkageName: "_Z3f111A", line: 17, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !5, scope: !26, type: !27, variables: !32)
!32 = !{!33}
-!33 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 17, arg: 1, scope: !31, file: !26, type: !"_ZTS1A")
-!34 = !DISubprogram(name: "f16", linkageName: "_Z3f16v", line: 18, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !5, scope: !26, type: !35, function: void ()* @_Z3f16v, variables: !37)
+!33 = !DILocalVariable(name: "p1", line: 17, arg: 1, scope: !31, file: !26, type: !"_ZTS1A")
+!34 = distinct !DISubprogram(name: "f16", linkageName: "_Z3f16v", line: 18, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !5, scope: !26, type: !35, variables: !37)
!35 = !DISubroutineType(types: !36)
!36 = !{null}
!37 = !{!38, !39}
-!38 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 19, scope: !34, file: !26, type: !"_ZTS1A")
-!39 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 20, scope: !34, file: !26, type: !"_ZTS1B")
+!38 = !DILocalVariable(name: "c", line: 19, scope: !34, file: !26, type: !"_ZTS1A")
+!39 = !DILocalVariable(name: "d", line: 20, scope: !34, file: !26, type: !"_ZTS1B")
!40 = !{!41, !42}
!41 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !26, type: !20, variable: i64* @a)
!42 = !DIGlobalVariable(name: "b", line: 7, isLocal: false, isDefinition: true, scope: null, file: !26, type: !12, variable: i32** @b)
@@ -234,7 +234,7 @@ attributes #5 = { builtin }
!71 = !DILocation(line: 15, column: 3, scope: !25, inlinedAt: !66)
!72 = !DILocation(line: 16, column: 1, scope: !25, inlinedAt: !66)
!73 = !DILocation(line: 17, column: 27, scope: !31)
-!74 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 17, arg: 1, scope: !31, file: !26, type: !"_ZTS1A")
+!74 = !DILocalVariable(name: "p1", line: 17, arg: 1, scope: !31, file: !26, type: !"_ZTS1A")
!75 = distinct !DILocation(line: 22, column: 3, scope: !34)
!76 = !DIExpression(DW_OP_bit_piece, 8, 120)
!77 = !DILocation(line: 17, column: 12, scope: !31, inlinedAt: !75)
diff --git a/test/DebugInfo/AArch64/prologue_end.ll b/test/DebugInfo/AArch64/prologue_end.ll
new file mode 100644
index 000000000000..a70112229959
--- /dev/null
+++ b/test/DebugInfo/AArch64/prologue_end.ll
@@ -0,0 +1,43 @@
+; RUN: llc -disable-fp-elim -O0 %s -mtriple aarch64-apple-darwin -o - | FileCheck %s
+
+; int func(void);
+; void prologue_end_test() {
+; func();
+; func();
+; }
+
+define void @prologue_end_test() nounwind uwtable !dbg !4 {
+ ; CHECK: prologue_end_test:
+ ; CHECK: .cfi_startproc
+ ; CHECK: stp x29, x30
+ ; CHECK: mov x29, sp
+ ; CHECK: sub sp, sp
+ ; CHECK: .loc 1 3 3 prologue_end
+ ; CHECK: bl _func
+ ; CHECK: bl _func
+entry:
+ %call = call i32 @func(), !dbg !11
+ %call1 = call i32 @func(), !dbg !12
+ ret void, !dbg !13
+}
+
+declare i32 @func()
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 242129)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "prologue_end_test", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.7.0 (trunk 242129)"}
+!11 = !DILocation(line: 3, column: 3, scope: !4)
+!12 = !DILocation(line: 4, column: 3, scope: !4)
+!13 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/DebugInfo/AArch64/struct_by_value.ll b/test/DebugInfo/AArch64/struct_by_value.ll
index dc9ac42e8a0f..edbef82557cb 100644
--- a/test/DebugInfo/AArch64/struct_by_value.ll
+++ b/test/DebugInfo/AArch64/struct_by_value.ll
@@ -31,7 +31,7 @@ target triple = "arm64-apple-ios3.0.0"
%struct.five = type { i32, i32, i32, i32, i32 }
; Function Attrs: nounwind ssp
-define i32 @return_five_int(%struct.five* %f) #0 {
+define i32 @return_five_int(%struct.five* %f) #0 !dbg !4 {
entry:
call void @llvm.dbg.declare(metadata %struct.five* %f, metadata !17, metadata !DIExpression(DW_OP_deref)), !dbg !18
%a = getelementptr inbounds %struct.five, %struct.five* %f, i32 0, i32 0, !dbg !19
@@ -48,11 +48,11 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!16, !20}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "LLVM version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "LLVM version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "struct_by_value.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "return_five_int", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: !5, type: !6, function: i32 (%struct.five*)* @return_five_int, variables: !2)
+!4 = distinct !DISubprogram(name: "return_five_int", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "struct_by_value.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !9}
@@ -65,7 +65,7 @@ attributes #1 = { nounwind readnone }
!14 = !DIDerivedType(tag: DW_TAG_member, name: "d", line: 6, size: 32, align: 32, offset: 96, file: !1, scope: !9, baseType: !8)
!15 = !DIDerivedType(tag: DW_TAG_member, name: "e", line: 7, size: 32, align: 32, offset: 128, file: !1, scope: !9, baseType: !8)
!16 = !{i32 2, !"Dwarf Version", i32 2}
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "f", line: 13, arg: 1, scope: !4, file: !5, type: !9)
+!17 = !DILocalVariable(name: "f", line: 13, arg: 1, scope: !4, file: !5, type: !9)
!18 = !DILocation(line: 13, scope: !4)
!19 = !DILocation(line: 16, scope: !4)
!20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/ARM/PR16736.ll b/test/DebugInfo/ARM/PR16736.ll
index 9d4992210a24..dd299a2a97d0 100644
--- a/test/DebugInfo/ARM/PR16736.ll
+++ b/test/DebugInfo/ARM/PR16736.ll
@@ -1,5 +1,5 @@
; RUN: llc -filetype=asm < %s | FileCheck %s
-; CHECK: @DEBUG_VALUE: h:x <- [R{{.*}}+{{.*}}]
+; CHECK: @DEBUG_VALUE: h:x <- [%R{{.*}}+{{.*}}]
; generated from:
; clang -cc1 -triple thumbv7 -S -O1 arm.cpp -g
;
@@ -13,7 +13,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:64:64-
target triple = "thumbv7-apple-ios"
; Function Attrs: nounwind
-define arm_aapcscc void @_Z1hiiiif(i32, i32, i32, i32, float %x) #0 {
+define arm_aapcscc void @_Z1hiiiif(i32, i32, i32, i32, float %x) #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !12, metadata !DIExpression()), !dbg !18
tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !13, metadata !DIExpression()), !dbg !18
@@ -41,11 +41,11 @@ attributes #3 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!17, !21}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 190804) (llvm/trunk 190797)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 190804) (llvm/trunk 190797)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "/<unknown>", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "h", linkageName: "_Z1hiiiif", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, function: void (i32, i32, i32, i32, float)* @_Z1hiiiif, variables: !11)
+!4 = distinct !DISubprogram(name: "h", linkageName: "_Z1hiiiif", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !11)
!5 = !DIFile(filename: "/arm.cpp", directory: "")
!6 = !DIFile(filename: "/arm.cpp", directory: "")
!7 = !DISubroutineType(types: !8)
@@ -53,11 +53,11 @@ attributes #3 = { nounwind }
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!10 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
!11 = !{!12, !13, !14, !15, !16}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 3, arg: 1, scope: !4, file: !6, type: !9)
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 3, arg: 2, scope: !4, file: !6, type: !9)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 3, arg: 3, scope: !4, file: !6, type: !9)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 3, arg: 4, scope: !4, file: !6, type: !9)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 3, arg: 5, scope: !4, file: !6, type: !10)
+!12 = !DILocalVariable(name: "", line: 3, arg: 1, scope: !4, file: !6, type: !9)
+!13 = !DILocalVariable(name: "", line: 3, arg: 2, scope: !4, file: !6, type: !9)
+!14 = !DILocalVariable(name: "", line: 3, arg: 3, scope: !4, file: !6, type: !9)
+!15 = !DILocalVariable(name: "", line: 3, arg: 4, scope: !4, file: !6, type: !9)
+!16 = !DILocalVariable(name: "x", line: 3, arg: 5, scope: !4, file: !6, type: !10)
!17 = !{i32 2, !"Dwarf Version", i32 4}
!18 = !DILocation(line: 3, scope: !4)
!19 = !DILocation(line: 4, scope: !4)
diff --git a/test/DebugInfo/ARM/bitfield.ll b/test/DebugInfo/ARM/bitfield.ll
index 9b41e4e4f7bc..4c528d06df11 100644
--- a/test/DebugInfo/ARM/bitfield.ll
+++ b/test/DebugInfo/ARM/bitfield.ll
@@ -26,7 +26,7 @@ target triple = "thumbv7-apple-ios"
!llvm.module.flags = !{!11, !12, !13, !14, !15}
!llvm.ident = !{!16}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
!1 = !DIFile(filename: "test.i", directory: "/")
!2 = !{}
!3 = !{!4}
diff --git a/test/DebugInfo/ARM/cfi-eof-prologue.ll b/test/DebugInfo/ARM/cfi-eof-prologue.ll
index 07e4d8a717b6..7430551fd610 100644
--- a/test/DebugInfo/ARM/cfi-eof-prologue.ll
+++ b/test/DebugInfo/ARM/cfi-eof-prologue.ll
@@ -27,7 +27,7 @@ target triple = "thumbv7-apple-ios"
@_ZTV1B = external unnamed_addr constant [4 x i8*]
; Function Attrs: nounwind
-define %struct.B* @_ZN1BC2Ev(%struct.B* %this) unnamed_addr #0 align 2 {
+define %struct.B* @_ZN1BC2Ev(%struct.B* %this) unnamed_addr #0 align 2 !dbg !28 {
entry:
tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !30, metadata !40), !dbg !41
%0 = getelementptr inbounds %struct.B, %struct.B* %this, i32 0, i32 0, !dbg !42
@@ -40,7 +40,7 @@ entry:
declare %struct.A* @_ZN1AC2Ev(%struct.A*)
; Function Attrs: nounwind
-define %struct.B* @_ZN1BC1Ev(%struct.B* %this) unnamed_addr #0 align 2 {
+define %struct.B* @_ZN1BC1Ev(%struct.B* %this) unnamed_addr #0 align 2 !dbg !32 {
entry:
tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !34, metadata !40), !dbg !46
tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !47, metadata !40) #3, !dbg !49
@@ -62,7 +62,7 @@ attributes #3 = { nounwind }
!llvm.module.flags = !{!35, !36, !37, !38}
!llvm.ident = !{!39}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !27, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !27, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "")
!2 = !{}
!3 = !{!4, !13}
@@ -90,13 +90,13 @@ attributes #3 = { nounwind }
!25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
!26 = !DISubprogram(name: "~A", line: 3, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !"_ZTS1A", type: !23, containingType: !"_ZTS1A")
!27 = !{!28, !32}
-!28 = !DISubprogram(name: "B", linkageName: "_ZN1BC2Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, function: %struct.B* (%struct.B*)* @_ZN1BC2Ev, declaration: !8, variables: !29)
+!28 = distinct !DISubprogram(name: "B", linkageName: "_ZN1BC2Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, declaration: !8, variables: !29)
!29 = !{!30}
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
+!30 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
!31 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !"_ZTS1B")
-!32 = !DISubprogram(name: "B", linkageName: "_ZN1BC1Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, function: %struct.B* (%struct.B*)* @_ZN1BC1Ev, declaration: !8, variables: !33)
+!32 = distinct !DISubprogram(name: "B", linkageName: "_ZN1BC1Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, declaration: !8, variables: !33)
!33 = !{!34}
-!34 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !32, type: !31)
+!34 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !32, type: !31)
!35 = !{i32 2, !"Dwarf Version", i32 4}
!36 = !{i32 2, !"Debug Info Version", i32 3}
!37 = !{i32 1, !"wchar_size", i32 4}
@@ -109,7 +109,7 @@ attributes #3 = { nounwind }
!44 = !{!"vtable pointer", !45, i64 0}
!45 = !{!"Simple C/C++ TBAA"}
!46 = !DILocation(line: 0, scope: !32)
-!47 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
+!47 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
!48 = !DILocation(line: 9, scope: !32)
!49 = !DILocation(line: 0, scope: !28, inlinedAt: !48)
!50 = !DILocation(line: 9, scope: !28, inlinedAt: !48)
diff --git a/test/DebugInfo/ARM/constant-dbgloc.ll b/test/DebugInfo/ARM/constant-dbgloc.ll
index c4a5703f3e8b..5fd219af86c2 100644
--- a/test/DebugInfo/ARM/constant-dbgloc.ll
+++ b/test/DebugInfo/ARM/constant-dbgloc.ll
@@ -13,7 +13,7 @@ target triple = "armv7--linux-gnueabihf"
; CHECK: mvn
; Function Attrs: nounwind
-define i32 @main() {
+define i32 @main() !dbg !4 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
@@ -23,11 +23,11 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
!1 = !DIFile(filename: "test.c", directory: "/home/user/clang/build")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/DebugInfo/ARM/float-args.ll b/test/DebugInfo/ARM/float-args.ll
new file mode 100644
index 000000000000..9ec7db2bdf07
--- /dev/null
+++ b/test/DebugInfo/ARM/float-args.ll
@@ -0,0 +1,45 @@
+; RUN: %llc_dwarf -filetype=obj -mattr=+vfp2 -float-abi=hard < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; RUN: %llc_dwarf -filetype=obj -mattr=-vfp2 -float-abi=soft < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; REQUIRES: object-emission
+
+; Generated by clang -O1 -g from the following C source:
+; float foo(float p) {
+; return p;
+; }
+
+; When using the soft-float calling convention, we have to look through a
+; bitcast to find the register which contains the argument.
+
+; CHECK: 0x{{[0-9a-f]*}}: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_location
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--none-eabi"
+
+define float @foo(float %p) !dbg !4 {
+entry:
+ tail call void @llvm.dbg.value(metadata float %p, i64 0, metadata !9, metadata !15), !dbg !16
+ ret float %p, !dbg !18
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+!8 = !{!9}
+!9 = !DILocalVariable(name: "p", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !DIExpression()
+!16 = !DILocation(line: 1, column: 17, scope: !4)
+!17 = !DILocation(line: 2, column: 12, scope: !4)
+!18 = !DILocation(line: 2, column: 3, scope: !4)
diff --git a/test/DebugInfo/ARM/header.ll b/test/DebugInfo/ARM/header.ll
index 6723f093a59b..bb1f141c84d6 100644
--- a/test/DebugInfo/ARM/header.ll
+++ b/test/DebugInfo/ARM/header.ll
@@ -12,17 +12,17 @@
; CHECK: .section __DWARF,__debug_str,regular,debug
-define void @f() {
+define void @f() !dbg !4 {
ret void, !dbg !9
}
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "foo", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "foo", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "/foo/test.c", directory: "/foo")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null}
!7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll b/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
index daf26507a036..806bbc3b1d1f 100644
--- a/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
+++ b/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
@@ -17,7 +17,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
target triple = "thumbv7-apple-ios8.0.0"
; Function Attrs: nounwind optsize readnone
-define void @run(float %r) #0 {
+define void @run(float %r) #0 !dbg !4 {
entry:
tail call void @llvm.dbg.declare(metadata float %r, metadata !11, metadata !DIExpression()), !dbg !22
%conv = fptosi float %r to i32, !dbg !23
@@ -67,25 +67,25 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!20, !33}
!llvm.ident = !{!21}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "<unknown>", directory: "/Volumes/Data/radar/15464571")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "run", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !5, scope: !6, type: !7, function: void (float)* @run, variables: !10)
+!4 = distinct !DISubprogram(name: "run", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !5, scope: !6, type: !7, variables: !10)
!5 = !DIFile(filename: "test.c", directory: "/Volumes/Data/radar/15464571")
!6 = !DIFile(filename: "test.c", directory: "/Volumes/Data/radar/15464571")
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
!10 = !{!11, !12, !14, !18}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "r", line: 1, arg: 1, scope: !4, file: !6, type: !9)
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "count", line: 3, scope: !4, file: !6, type: !13)
+!11 = !DILocalVariable(name: "r", line: 1, arg: 1, scope: !4, file: !6, type: !9)
+!12 = !DILocalVariable(name: "count", line: 3, scope: !4, file: !6, type: !13)
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vla", line: 4, scope: !4, file: !6, type: !15)
+!14 = !DILocalVariable(name: "vla", line: 4, scope: !4, file: !6, type: !15)
!15 = !DICompositeType(tag: DW_TAG_array_type, align: 32, baseType: !9, elements: !16)
!16 = !{!17}
!17 = !DISubrange(count: -1)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 6, scope: !19, file: !6, type: !13)
+!18 = !DILocalVariable(name: "i", line: 6, scope: !19, file: !6, type: !13)
!19 = distinct !DILexicalBlock(line: 6, column: 0, file: !5, scope: !4)
!20 = !{i32 2, !"Dwarf Version", i32 2}
!21 = !{!"clang version 3.4 "}
diff --git a/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll b/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll
index e68c5b824163..de3bee248a1f 100644
--- a/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll
+++ b/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll
@@ -19,7 +19,7 @@ target triple = "armv7--linux-gnueabihf"
@b = common global i32 0, align 4
; Function Attrs: nounwind
-define void @proc() #0 {
+define void @proc() #0 !dbg !4 {
entry:
store i8 65, i8* @ch, align 1, !dbg !17
store i32 0, i32* @b, align 4, !dbg !18
@@ -32,11 +32,11 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
!llvm.module.flags = !{!12, !13, !14, !15}
!llvm.ident = !{!16}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
!1 = !DIFile(filename: "test.c", directory: "/home/user/clang/build")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "proc", scope: !1, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @proc, variables: !2)
+!4 = distinct !DISubprogram(name: "proc", scope: !1, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null}
!7 = !{!8, !10}
diff --git a/test/DebugInfo/ARM/prologue_end.ll b/test/DebugInfo/ARM/prologue_end.ll
new file mode 100644
index 000000000000..b663b98e0387
--- /dev/null
+++ b/test/DebugInfo/ARM/prologue_end.ll
@@ -0,0 +1,46 @@
+; RUN: llc -disable-fp-elim -O0 %s -mtriple armv7-apple-darwin -o - | FileCheck %s
+; RUN: llc -disable-fp-elim -O0 %s -mtriple thumbv7-apple-darwin -o - | FileCheck %s
+
+; int func(void);
+; void prologue_end_test() {
+; func();
+; func();
+; }
+
+define void @prologue_end_test() nounwind uwtable !dbg !4 {
+ ; CHECK: prologue_end_test:
+ ; CHECK: .cfi_startproc
+ ; CHECK: push {r7, lr}
+ ; CHECK: {{mov r7, sp|add r7, sp}}
+ ; CHECK: sub sp
+ ; CHECK: .loc 1 3 3 prologue_end
+ ; CHECK: bl {{_func|Ltmp}}
+ ; CHECK: bl {{_func|Ltmp}}
+entry:
+ %call = call i32 @func(), !dbg !13
+ %call1 = call i32 @func(), !dbg !14
+ ret void, !dbg !15
+}
+
+declare i32 @func()
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9, !10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 242129)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "prologue_end_test", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"wchar_size", i32 4}
+!10 = !{i32 1, !"min_enum_size", i32 4}
+!11 = !{i32 1, !"PIC Level", i32 2}
+!12 = !{!"clang version 3.7.0 (trunk 242129)"}
+!13 = !DILocation(line: 3, column: 3, scope: !4)
+!14 = !DILocation(line: 4, column: 3, scope: !4)
+!15 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/DebugInfo/ARM/s-super-register.ll b/test/DebugInfo/ARM/s-super-register.ll
index def87695ccce..887d37e27635 100644
--- a/test/DebugInfo/ARM/s-super-register.ll
+++ b/test/DebugInfo/ARM/s-super-register.ll
@@ -9,7 +9,7 @@ target triple = "thumbv7-apple-macosx10.6.7"
; 0x9d DW_OP_bit_piece
; CHECK: Location description: 90 {{.. .. ((93 ..)|(9d .. ..)) $}}
-define void @_Z3foov() optsize ssp {
+define void @_Z3foov() optsize ssp !dbg !1 {
entry:
%call = tail call float @_Z3barv() optsize, !dbg !11
tail call void @llvm.dbg.value(metadata float %call, i64 0, metadata !5, metadata !DIExpression()), !dbg !11
@@ -40,15 +40,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!20}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130845)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !16, imports: null)
-!1 = !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !18, scope: !2, type: !3, function: void ()* @_Z3foov, variables: !17)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130845)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !16, imports: null)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !18, scope: !2, type: !3, variables: !17)
!2 = !DIFile(filename: "k.cc", directory: "/private/tmp")
!3 = !DISubroutineType(types: !4)
!4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 6, scope: !6, file: !2, type: !7)
+!5 = !DILocalVariable(name: "k", line: 6, scope: !6, file: !2, type: !7)
!6 = distinct !DILexicalBlock(line: 5, column: 12, file: !18, scope: !1)
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
-!8 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 8, scope: !9, file: !2, type: !7)
+!8 = !DILocalVariable(name: "y", line: 8, scope: !9, file: !2, type: !7)
!9 = distinct !DILexicalBlock(line: 7, column: 25, file: !18, scope: !10)
!10 = distinct !DILexicalBlock(line: 7, column: 3, file: !18, scope: !6)
!11 = !DILocation(line: 6, column: 18, scope: !6)
diff --git a/test/DebugInfo/ARM/selectiondag-deadcode.ll b/test/DebugInfo/ARM/selectiondag-deadcode.ll
index 1b6528269f7a..fe5e87658dde 100644
--- a/test/DebugInfo/ARM/selectiondag-deadcode.ll
+++ b/test/DebugInfo/ARM/selectiondag-deadcode.ll
@@ -1,7 +1,7 @@
; RUN: llc -filetype=asm < %s | FileCheck %s
target triple = "thumbv7-apple-ios7.0.0"
%class.Matrix3.0.6.10 = type { [9 x float] }
-define arm_aapcscc void @_Z9GetMatrixv(%class.Matrix3.0.6.10* noalias nocapture sret %agg.result) #0 {
+define arm_aapcscc void @_Z9GetMatrixv(%class.Matrix3.0.6.10* noalias nocapture sret %agg.result) #0 !dbg !39 {
br i1 fcmp oeq (float fadd (float fadd (float fmul (float undef, float undef), float fmul (float undef, float undef)), float fmul (float undef, float undef)), float 0.000000e+00), label %_ZN7Vector39NormalizeEv.exit, label %1
tail call arm_aapcscc void @_ZL4Sqrtd() #3
br label %_ZN7Vector39NormalizeEv.exit
@@ -21,7 +21,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare arm_aapcscc void @_ZL4Sqrtd() #2
!4 = !DICompositeType(tag: DW_TAG_class_type, name: "Matrix3", line: 20, size: 288, align: 32, file: !5, identifier: "_ZTS7Matrix3")
!5 = !DIFile(filename: "test.ii", directory: "/Volumes/Data/radar/15094721")
-!39 = !DISubprogram(name: "GetMatrix", linkageName: "_Z9GetMatrixv", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 32, file: !5, scope: !40, type: !41, function: void (%class.Matrix3.0.6.10*)* @_Z9GetMatrixv)
+!39 = distinct !DISubprogram(name: "GetMatrix", linkageName: "_Z9GetMatrixv", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 32, file: !5, scope: !40, type: !41)
!40 = !DIFile(filename: "test.ii", directory: "/Volumes/Data/radar/15094721")
!41 = !DISubroutineType(types: null)
-!45 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "matrix", line: 35, scope: !39, file: !40, type: !4)
+!45 = !DILocalVariable(name: "matrix", line: 35, scope: !39, file: !40, type: !4)
diff --git a/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll b/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll
index 396eb4fe1946..9bd7becb35b8 100644
--- a/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll
+++ b/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll
@@ -16,7 +16,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "armv7--linux-gnueabihf"
; Function Attrs: nounwind
-define i32 @main() {
+define i32 @main() !dbg !4 {
entry:
%retval = alloca i32, align 4
%x = alloca i32, align 4
@@ -48,17 +48,17 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
!1 = !DIFile(filename: "test.c", directory: "/home/user/clang/build")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !{i32 2, !"Dwarf Version", i32 4}
!9 = !{i32 2, !"Debug Info Version", i32 3}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", scope: !4, file: !1, line: 3, type: !7)
+!10 = !DILocalVariable(name: "x", scope: !4, file: !1, line: 3, type: !7)
!11 = !DIExpression()
!12 = !DILocation(line: 3, column: 9, scope: !4)
!13 = !DILocation(line: 4, column: 9, scope: !14)
diff --git a/test/DebugInfo/ARM/tls.ll b/test/DebugInfo/ARM/tls.ll
index e349120b6075..4cb707ad4956 100644
--- a/test/DebugInfo/ARM/tls.ll
+++ b/test/DebugInfo/ARM/tls.ll
@@ -1,5 +1,8 @@
-; RUN: llc -O0 -filetype=asm -mtriple=armv7-linux-gnuehabi < %s | FileCheck %s
-;
+; RUN: llc -O0 -filetype=asm -mtriple=armv7-linux-gnuehabi < %s \
+; RUN: | FileCheck %s --check-prefix=CHECK
+; RUN: llc -O0 -filetype=asm -mtriple=armv7-linux-gnuehabi -emulated-tls < %s \
+; RUN: | FileCheck %s --check-prefix=EMU
+
; Generated with clang with source
; __thread int x;
@@ -16,7 +19,10 @@
; The debug relocation of the address of the tls variable
; CHECK: .long x(tlsldo)
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+; TODO: Add expected output for -emulated-tls tests.
+; EMU-NOT: .long x(tlsldo)
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
!1 = !DIFile(filename: "tls.c", directory: "/tmp")
!2 = !{}
!3 = !{!4}
diff --git a/test/DebugInfo/COFF/asan-module-ctor.ll b/test/DebugInfo/COFF/asan-module-ctor.ll
index 3f8b129ca9b6..f132eb425706 100644
--- a/test/DebugInfo/COFF/asan-module-ctor.ll
+++ b/test/DebugInfo/COFF/asan-module-ctor.ll
@@ -24,7 +24,7 @@ target triple = "i686-pc-win32"
@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 1, void ()* @asan.module_ctor }]
; Function Attrs: nounwind sanitize_address
-define i32 @foo() #0 {
+define i32 @foo() #0 !dbg !4 {
entry:
ret i32 0, !dbg !10
}
@@ -82,14 +82,14 @@ attributes #0 = { nounwind sanitize_address "less-precise-fpmad"="false" "no-fra
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "asan.c", directory: "D:\5C")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "asan.c", directory: "D:C")
!6 = !DISubroutineType(types: !2)
-!7 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = !{i32 2, !"CodeView", i32 1}
!8 = !{i32 1, !"Debug Info Version", i32 3}
!9 = !{!"clang version 3.5.0 "}
!10 = !DILocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/COFF/asan-module-without-functions.ll b/test/DebugInfo/COFF/asan-module-without-functions.ll
index 88eec67a4bdb..fe337fb24769 100644
--- a/test/DebugInfo/COFF/asan-module-without-functions.ll
+++ b/test/DebugInfo/COFF/asan-module-without-functions.ll
@@ -45,9 +45,9 @@ define internal void @asan.module_dtor() {
!llvm.module.flags = !{!3, !4}
!llvm.ident = !{!5}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "asan.c", directory: "D:\5C")
!2 = !{}
-!3 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"CodeView", i32 1}
!4 = !{i32 1, !"Debug Info Version", i32 3}
!5 = !{!"clang version 3.5.0 "}
diff --git a/test/DebugInfo/COFF/asm.ll b/test/DebugInfo/COFF/asm.ll
index bc2a11d066b5..f3e52df54be0 100644
--- a/test/DebugInfo/COFF/asm.ll
+++ b/test/DebugInfo/COFF/asm.ll
@@ -1,7 +1,7 @@
; RUN: llc -mcpu=core2 -mtriple=i686-pc-win32 -O0 < %s | FileCheck --check-prefix=X86 %s
-; RUN: llc -mcpu=core2 -mtriple=i686-pc-win32 -o - -O0 < %s | llvm-mc -triple=i686-pc-win32 -filetype=obj | llvm-readobj -s -sr -codeview -section-symbols | FileCheck --check-prefix=OBJ32 %s
+; RUN: llc -mcpu=core2 -mtriple=i686-pc-win32 -o - -O0 < %s | llvm-mc -triple=i686-pc-win32 -filetype=obj | llvm-readobj -s -sr -codeview | FileCheck --check-prefix=OBJ32 %s
; RUN: llc -mcpu=core2 -mtriple=x86_64-pc-win32 -O0 < %s | FileCheck --check-prefix=X64 %s
-; RUN: llc -mcpu=core2 -mtriple=x86_64-pc-win32 -o - -O0 < %s | llvm-mc -triple=x86_64-pc-win32 -filetype=obj | llvm-readobj -s -sr -codeview -section-symbols | FileCheck --check-prefix=OBJ64 %s
+; RUN: llc -mcpu=core2 -mtriple=x86_64-pc-win32 -o - -O0 < %s | llvm-mc -triple=x86_64-pc-win32 -filetype=obj | llvm-readobj -s -sr -codeview | FileCheck --check-prefix=OBJ64 %s
; This LL file was generated by running clang on the following code:
; D:\asm.c:
@@ -19,7 +19,6 @@
; X86: calll _g
; X86-NEXT: [[RETURN_STMT:.*]]:
; X86: ret
-; X86-NEXT: L{{.*}}:
; X86-NEXT: [[END_OF_F:^L.*]]:
;
; X86-LABEL: .section .debug$S,"dr"
@@ -95,6 +94,7 @@
; OBJ32-NEXT: 0x44 IMAGE_REL_I386_SECREL _f
; OBJ32-NEXT: 0x48 IMAGE_REL_I386_SECTION _f
; OBJ32-NEXT: ]
+; OBJ32: CodeViewDebugInfo [
; OBJ32: Subsection [
; OBJ32-NEXT: Type: 0xF1
; OBJ32-NOT: ]
@@ -125,20 +125,21 @@
; OBJ32-NEXT: ColEnd: 0
; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
-; OBJ32: }
; X64-LABEL: f:
; X64-NEXT: .L{{.*}}:{{$}}
; X64-NEXT: [[START:.*]]:{{$}}
; X64: # BB
-; X64: subq $40, %rsp
+; X64: pushq %rbp
+; X64-NEXT: subq $32, %rsp
+; X64-NEXT: leaq 32(%rsp), %rbp
; X64-NEXT: [[ASM_LINE:.*]]:{{$}}
; X64: [[CALL_LINE:.*]]:{{$}}
; X64: callq g
; X64-NEXT: [[EPILOG_AND_RET:.*]]:
-; X64: addq $40, %rsp
+; X64: addq $32, %rsp
+; X64-NEXT: popq %rbp
; X64-NEXT: ret
-; X64-NEXT: .L{{.*}}:
; X64-NEXT: [[END_OF_F:.*]]:
;
; X64-LABEL: .section .debug$S,"dr"
@@ -224,22 +225,22 @@
; OBJ64: ProcStart {
; OBJ64-NEXT: DisplayName: f
; OBJ64-NEXT: Section: f
-; OBJ64-NEXT: CodeSize: 0xE
+; OBJ64-NEXT: CodeSize: 0x17
; OBJ64-NEXT: }
; OBJ64-NEXT: ProcEnd
; OBJ64-NEXT: ]
; OBJ64: FunctionLineTable [
; OBJ64-NEXT: Name: f
; OBJ64-NEXT: Flags: 0x1
-; OBJ64-NEXT: CodeSize: 0xE
+; OBJ64-NEXT: CodeSize: 0x17
; OBJ64-NEXT: FilenameSegment [
; OBJ64-NEXT: Filename: D:\asm.c
; OBJ64-NEXT: +0x0: 3
; FIXME: An empty __asm stmt creates an extra entry.
; See PR18679 for the details.
-; OBJ64-NEXT: +0x4: 4
-; OBJ64-NEXT: +0x4: 5
-; OBJ64-NEXT: +0x9: 6
+; OBJ64-NEXT: +0xA: 4
+; OBJ64-NEXT: +0xC: 5
+; OBJ64-NEXT: +0x11: 6
; OBJ64-NEXT: ColStart: 0
; OBJ64-NEXT: ColEnd: 0
; OBJ64-NEXT: ColStart: 0
@@ -250,10 +251,9 @@
; OBJ64-NEXT: ColEnd: 0
; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
-; OBJ64: }
; Function Attrs: nounwind
-define void @f() #0 {
+define void @f() #0 !dbg !4 {
entry:
call void asm sideeffect inteldialect ".align 4", "~{dirflag},~{fpsr},~{flags}"() #2, !dbg !12
call void @g(), !dbg !13
@@ -270,16 +270,16 @@ attributes #2 = { nounwind }
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "<unknown>", directory: "D:\5C")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, function: void ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !2)
!5 = !DIFile(filename: "asm.c", directory: "D:\5C")
!6 = !DIFile(filename: "asm.c", directory: "D:C")
!7 = !DISubroutineType(types: !8)
!8 = !{null}
-!9 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"CodeView", i32 1}
!10 = !{i32 1, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.5 "}
!12 = !DILocation(line: 4, scope: !4)
diff --git a/test/DebugInfo/COFF/cpp-mangling.ll b/test/DebugInfo/COFF/cpp-mangling.ll
index 137377847aae..25461baa2266 100644
--- a/test/DebugInfo/COFF/cpp-mangling.ll
+++ b/test/DebugInfo/COFF/cpp-mangling.ll
@@ -29,15 +29,15 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "D:\5C")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, function: i32 (i32)* @"\01?bar@foo@@YAHH@Z", variables: !2)
+!4 = distinct !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, variables: !2)
!5 = !DIFile(filename: "src.cpp", directory: "D:\5C")
!6 = !DIFile(filename: "src.cpp", directory: "D:C")
!7 = !DISubroutineType(types: !2)
-!8 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"CodeView", i32 1}
!9 = !{i32 2, !"Debug Info Version", i32 3}
!10 = !{!"clang version 3.6.0 "}
!11 = !DILocation(line: 3, scope: !4)
diff --git a/test/DebugInfo/COFF/multifile.ll b/test/DebugInfo/COFF/multifile.ll
index 3dedacebc140..70bc0022cfb2 100644
--- a/test/DebugInfo/COFF/multifile.ll
+++ b/test/DebugInfo/COFF/multifile.ll
@@ -26,7 +26,6 @@
; X86: calll _g
; X86-NEXT: [[RETURN_STMT:.*]]:
; X86: ret
-; X86-NEXT: L{{.*}}:
; X86-NEXT: [[END_OF_F:.*]]:
;
; X86-LABEL: .section .debug$S,"dr"
@@ -158,7 +157,6 @@
; OBJ32-NEXT: ColEnd: 0
; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
-; OBJ32: }
; X64-LABEL: f:
; X64-NEXT: .L{{.*}}:{{$}}
@@ -174,7 +172,6 @@
; X64-NEXT: [[EPILOG_AND_RET:.*]]:
; X64: addq $40, %rsp
; X64-NEXT: ret
-; X64-NEXT: .L{{.*}}:
; X64-NEXT: [[END_OF_F:.*]]:
;
; X64-LABEL: .section .debug$S,"dr"
@@ -326,10 +323,9 @@
; OBJ64-NEXT: ColEnd: 0
; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
-; OBJ64: }
; Function Attrs: nounwind
-define void @f() #0 {
+define void @f() #0 !dbg !4 {
entry:
call void @g(), !dbg !12
call void @g(), !dbg !15
@@ -346,16 +342,16 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "<unknown>", directory: "D:\5C")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, function: void ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !2)
!5 = !DIFile(filename: "input.c", directory: "D:\5C")
!6 = !DIFile(filename: "input.c", directory: "D:C")
!7 = !DISubroutineType(types: !8)
!8 = !{null}
-!9 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"CodeView", i32 1}
!10 = !{i32 1, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.5 "}
!12 = !DILocation(line: 1, scope: !13)
diff --git a/test/DebugInfo/COFF/multifunction.ll b/test/DebugInfo/COFF/multifunction.ll
index bbf97dd4afc0..4c044fa9c4fa 100644
--- a/test/DebugInfo/COFF/multifunction.ll
+++ b/test/DebugInfo/COFF/multifunction.ll
@@ -28,7 +28,6 @@
; X86: calll _z
; X86-NEXT: [[X_RETURN:.*]]:
; X86: ret
-; X86-NEXT: L{{.*}}:
; X86-NEXT: [[END_OF_X:.*]]:
;
; X86-LABEL: _y:
@@ -37,7 +36,6 @@
; X86: calll _z
; X86-NEXT: [[Y_RETURN:.*]]:
; X86: ret
-; X86-NEXT: L{{.*}}:
; X86-NEXT: [[END_OF_Y:.*]]:
;
; X86-LABEL: _f:
@@ -50,7 +48,6 @@
; X86: calll _z
; X86-NEXT: [[F_RETURN:.*]]:
; X86: ret
-; X86-NEXT: L{{.*}}:
; X86-NEXT: [[END_OF_F:.*]]:
;
; X86-LABEL: .section .debug$S,"dr"
@@ -310,7 +307,6 @@
; OBJ32-NEXT: ColEnd: 73
; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
-; OBJ32: }
; X64-LABEL: x:
; X64-NEXT: .L{{.*}}:
@@ -322,7 +318,6 @@
; X64-NEXT: [[X_EPILOG_AND_RET:.*]]:
; X64: addq $40, %rsp
; X64-NEXT: ret
-; X64-NEXT: .L{{.*}}:
; X64-NEXT: [[END_OF_X:.*]]:
;
; X64-LABEL: y:
@@ -335,7 +330,6 @@
; X64-NEXT: [[Y_EPILOG_AND_RET:.*]]:
; X64: addq $40, %rsp
; X64-NEXT: ret
-; X64-NEXT: .L{{.*}}:
; X64-NEXT: [[END_OF_Y:.*]]:
;
; X64-LABEL: f:
@@ -352,7 +346,6 @@
; X64-NEXT: [[F_EPILOG_AND_RET:.*]]:
; X64: addq $40, %rsp
; X64-NEXT: ret
-; X64-NEXT: .L{{.*}}:
; X64-NEXT: [[END_OF_F:.*]]:
;
; X64-LABEL: .section .debug$S,"dr"
@@ -633,10 +626,9 @@
; OBJ64-NEXT: ColEnd: 73
; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
-; OBJ64: }
; Function Attrs: nounwind
-define void @x() #0 {
+define void @x() #0 !dbg !4 {
entry:
call void @z(), !dbg !14
ret void, !dbg !15
@@ -645,14 +637,14 @@ entry:
declare void @z() #1
; Function Attrs: nounwind
-define void @y() #0 {
+define void @y() #0 !dbg !9 {
entry:
call void @z(), !dbg !16
ret void, !dbg !17
}
; Function Attrs: nounwind
-define void @f() #0 {
+define void @f() #0 !dbg !10 {
entry:
call void @x(), !dbg !18
call void @y(), !dbg !19
@@ -667,18 +659,18 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
!llvm.module.flags = !{!11, !12}
!llvm.ident = !{!13}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "<unknown>", directory: "D:\5C")
!2 = !{}
!3 = !{!4, !9, !10}
-!4 = !DISubprogram(name: "x", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, function: void ()* @x, variables: !2)
+!4 = distinct !DISubprogram(name: "x", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !2)
!5 = !DIFile(filename: "source.c", directory: "D:\5C")
!6 = !DIFile(filename: "source.c", directory: "D:C")
!7 = !DISubroutineType(types: !8)
!8 = !{null}
-!9 = !DISubprogram(name: "y", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !5, scope: !6, type: !7, function: void ()* @y, variables: !2)
-!10 = !DISubprogram(name: "f", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !6, type: !7, function: void ()* @f, variables: !2)
-!11 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = distinct !DISubprogram(name: "y", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !5, scope: !6, type: !7, variables: !2)
+!10 = distinct !DISubprogram(name: "f", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !6, type: !7, variables: !2)
+!11 = !{i32 2, !"CodeView", i32 1}
!12 = !{i32 1, !"Debug Info Version", i32 3}
!13 = !{!"clang version 3.5 "}
!14 = !DILocation(line: 4, column: 42, scope: !4)
diff --git a/test/DebugInfo/COFF/simple.ll b/test/DebugInfo/COFF/simple.ll
index 0d9857c7831d..2103df07f6dc 100644
--- a/test/DebugInfo/COFF/simple.ll
+++ b/test/DebugInfo/COFF/simple.ll
@@ -17,7 +17,6 @@
; X86: calll _g
; X86-NEXT: [[RETURN_STMT:.*]]:
; X86: ret
-; X86-NEXT: L{{.*}}:
; X86-NEXT: [[END_OF_F:.*]]:
;
; X86-LABEL: .section .debug$S,"dr"
@@ -113,7 +112,6 @@
; OBJ32-NEXT: ColEnd: 0
; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
-; OBJ32: }
; X64-LABEL: f:
; X64-NEXT: .L{{.*}}:{{$}}
@@ -125,7 +123,6 @@
; X64-NEXT: [[EPILOG_AND_RET:.*]]:
; X64: addq $40, %rsp
; X64-NEXT: ret
-; X64-NEXT: .L{{.*}}:
; X64-NEXT: [[END_OF_F:.*]]:
;
; X64-LABEL: .section .debug$S,"dr"
@@ -228,10 +225,9 @@
; OBJ64-NEXT: ColEnd: 0
; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
-; OBJ64: }
; Function Attrs: nounwind
-define void @f() #0 {
+define void @f() #0 !dbg !4 {
entry:
call void @g(), !dbg !12
ret void, !dbg !13
@@ -246,16 +242,16 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "<unknown>", directory: "D:\5C")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, function: void ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !2)
!5 = !DIFile(filename: "test.c", directory: "D:\5C")
!6 = !DIFile(filename: "test.c", directory: "D:C")
!7 = !DISubroutineType(types: !8)
!8 = !{null}
-!9 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"CodeView", i32 1}
!10 = !{i32 1, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.5 "}
!12 = !DILocation(line: 4, scope: !4)
diff --git a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
index 054a2e89a8fb..28b8d2859efa 100644
--- a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
+++ b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
@@ -62,15 +62,15 @@ attributes #2 = { nounwind }
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.cpp", directory: "D:\5C")
!2 = !{}
!3 = !{!4, !7}
-!4 = !DISubprogram(name: "spam", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !1, scope: !5, type: !6, function: void ()* @"\01?spam@@YAXXZ", variables: !2)
+!4 = distinct !DISubprogram(name: "spam", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "test.cpp", directory: "D:C")
!6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "bar", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
-!8 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = distinct !DISubprogram(name: "bar", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!8 = !{i32 2, !"CodeView", i32 1}
!9 = !{i32 1, !"Debug Info Version", i32 3}
!10 = !{!"clang version 3.5.0 "}
!11 = !DILocation(line: 8, scope: !4)
diff --git a/test/DebugInfo/2009-10-16-Phi.ll b/test/DebugInfo/Generic/2009-10-16-Phi.ll
index e14653f6fcf2..e14653f6fcf2 100644
--- a/test/DebugInfo/2009-10-16-Phi.ll
+++ b/test/DebugInfo/Generic/2009-10-16-Phi.ll
diff --git a/test/DebugInfo/Generic/2009-11-03-InsertExtractValue.ll b/test/DebugInfo/Generic/2009-11-03-InsertExtractValue.ll
new file mode 100644
index 000000000000..c992a43c858c
--- /dev/null
+++ b/test/DebugInfo/Generic/2009-11-03-InsertExtractValue.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!6}
+
+!0 = !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagProtected | DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !4, scope: !1, type: !2)
+!1 = !DIFile(filename: "/foo", directory: "bar.cpp")
+!2 = !DISubroutineType(types: !3)
+!3 = !{null}
+!4 = !DIFile(filename: "/foo", directory: "bar.cpp")
+!5 = distinct !DICompileUnit(language: DW_LANG_C99, isOptimized: true, emissionKind: 0, file: !4, enums: !{}, retainedTypes: !{})
+
+define <{i32, i32}> @f1() {
+; CHECK: !dbgx ![[NUMBER:[0-9]+]]
+ %r = insertvalue <{ i32, i32 }> zeroinitializer, i32 4, 1, !dbgx !1
+; CHECK: !dbgx ![[NUMBER]]
+ %e = extractvalue <{ i32, i32 }> %r, 0, !dbgx !1
+ ret <{ i32, i32 }> %r
+}
+
+; CHECK: DIFlagProtected
+!6 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/Generic/2009-11-05-DeadGlobalVariable.ll
new file mode 100644
index 000000000000..40dacf8848b2
--- /dev/null
+++ b/test/DebugInfo/Generic/2009-11-05-DeadGlobalVariable.ll
@@ -0,0 +1,26 @@
+; RUN: llc %s -o /dev/null
+; Here variable bar is optimized away. Do not trip over while trying to generate debug info.
+
+
+define i32 @foo() nounwind uwtable readnone ssp !dbg !5 {
+entry:
+ ret i32 42, !dbg !15
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 139632)", isOptimized: true, emissionKind: 0, file: !17, enums: !1, retainedTypes: !1, subprograms: !3, globals: !12)
+!1 = !{}
+!3 = !{!5}
+!5 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !17, scope: !6, type: !7)
+!6 = !DIFile(filename: "fb.c", directory: "/private/tmp")
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9}
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!12 = !{!14}
+!14 = !DIGlobalVariable(name: "bar", line: 2, isLocal: true, isDefinition: true, scope: !5, file: !6, type: !9)
+!15 = !DILocation(line: 3, column: 3, scope: !16)
+!16 = distinct !DILexicalBlock(line: 1, column: 11, file: !17, scope: !5)
+!17 = !DIFile(filename: "fb.c", directory: "/private/tmp")
+!18 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/2009-11-06-NamelessGlobalVariable.ll b/test/DebugInfo/Generic/2009-11-06-NamelessGlobalVariable.ll
new file mode 100644
index 000000000000..a871a257cc8b
--- /dev/null
+++ b/test/DebugInfo/Generic/2009-11-06-NamelessGlobalVariable.ll
@@ -0,0 +1,14 @@
+; RUN: llc %s -o /dev/null
+@0 = internal constant i32 1
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 139632)", isOptimized: true, emissionKind: 0, file: !8, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3)
+!2 = !{}
+!3 = !{!5}
+!5 = !DIGlobalVariable(name: "a", line: 2, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: i32* @0)
+!6 = !DIFile(filename: "g.c", directory: "/private/tmp")
+!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !DIFile(filename: "g.c", directory: "/private/tmp")
+!9 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/2009-11-10-CurrentFn.ll b/test/DebugInfo/Generic/2009-11-10-CurrentFn.ll
new file mode 100644
index 000000000000..b0961135ebf0
--- /dev/null
+++ b/test/DebugInfo/Generic/2009-11-10-CurrentFn.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -o /dev/null
+
+define void @bar(i32 %i) nounwind uwtable ssp !dbg !5 {
+entry:
+ tail call void (...) @foo() nounwind, !dbg !14
+ ret void, !dbg !16
+}
+
+declare void @foo(...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 139632)", isOptimized: true, emissionKind: 0, file: !17, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
+!1 = !{}
+!3 = !{!5}
+!5 = distinct !DISubprogram(name: "bar", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !17, scope: !6, type: !7, variables: !9)
+!6 = !DIFile(filename: "cf.c", directory: "/private/tmp")
+!7 = !DISubroutineType(types: !8)
+!8 = !{null}
+!9 = !{!11}
+!11 = !DILocalVariable(name: "i", line: 3, arg: 1, scope: !5, file: !17, type: !12)
+!12 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = !DILocation(line: 3, column: 14, scope: !5)
+!14 = !DILocation(line: 4, column: 3, scope: !15)
+!15 = distinct !DILexicalBlock(line: 3, column: 17, file: !17, scope: !5)
+!16 = !DILocation(line: 5, column: 1, scope: !15)
+!17 = !DIFile(filename: "cf.c", directory: "/private/tmp")
+!18 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/2010-01-05-DbgScope.ll b/test/DebugInfo/Generic/2010-01-05-DbgScope.ll
new file mode 100644
index 000000000000..c6d7ca85847f
--- /dev/null
+++ b/test/DebugInfo/Generic/2010-01-05-DbgScope.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -o /dev/null
+; PR 5942
+define i8* @foo() nounwind {
+entry:
+ %0 = load i32, i32* undef, align 4, !dbg !0 ; <i32> [#uses=1]
+ %1 = inttoptr i32 %0 to i8*, !dbg !0 ; <i8*> [#uses=1]
+ ret i8* %1, !dbg !10
+
+}
+
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!14}
+
+!0 = !DILocation(line: 571, column: 3, scope: !1)
+!1 = distinct !DILexicalBlock(line: 1, column: 1, file: !11, scope: !2)
+!2 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 561, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3, type: !4)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !11, enums: !12, retainedTypes: !12, subprograms: !13)
+!4 = !DISubroutineType(types: !5)
+!5 = !{!6}
+!6 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!10 = !DILocation(line: 588, column: 1, scope: !2)
+!11 = !DIFile(filename: "hashtab.c", directory: "/usr/src/gnu/usr.bin/cc/cc_tools/../../../../contrib/gcclibs/libiberty")
+!12 = !{}
+!13 = !{!2}
+!14 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/2010-03-12-llc-crash.ll b/test/DebugInfo/Generic/2010-03-12-llc-crash.ll
new file mode 100644
index 000000000000..aaa013c803f5
--- /dev/null
+++ b/test/DebugInfo/Generic/2010-03-12-llc-crash.ll
@@ -0,0 +1,22 @@
+; RUN: llc -O0 < %s -o /dev/null
+; llc should not crash on this optimized out debug info.
+; PR6588
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+define void @foo() {
+entry:
+ call void @llvm.dbg.declare(metadata i32* undef, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
+ ret void
+}
+
+!0 = !DILocalVariable(name: "sy", line: 890, arg: 1, scope: !1, file: !2, type: !7)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 892, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !3, type: !4)
+!2 = !DIFile(filename: "qpainter.h", directory: "QtGui")
+!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !9, enums: !10, retainedTypes: !10)
+!4 = !DISubroutineType(types: !6)
+!5 = !DIFile(filename: "splineeditor.cpp", directory: "src")
+!6 = !{null}
+!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !DIFile(filename: "qpainter.h", directory: "QtGui")
+!9 = !DIFile(filename: "splineeditor.cpp", directory: "src")
+!10 = !{i32 0}
diff --git a/test/DebugInfo/Generic/2010-03-19-DbgDeclare.ll b/test/DebugInfo/Generic/2010-03-19-DbgDeclare.ll
new file mode 100644
index 000000000000..fe7eaebc4ed5
--- /dev/null
+++ b/test/DebugInfo/Generic/2010-03-19-DbgDeclare.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -verify -S | FileCheck %s
+
+; CHECK: DW_LANG_Mips_Assembler
+
+define void @Foo(i32 %a, i32 %b) {
+entry:
+ call void @llvm.dbg.declare(metadata i32* null, metadata !1, metadata !DIExpression()), !dbg !DILocation(scope: !6)
+ ret void
+}
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!5}
+!2 = distinct !DICompileUnit(language: DW_LANG_Mips_Assembler, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 1, file: !4, enums: !3, retainedTypes: !3, subprograms: !3, globals: !3, imports: !3)
+!3 = !{}
+!0 = !DILocation(line: 662302, column: 26, scope: !1)
+!1 = !DILocalVariable(name: "foo", scope: !6)
+!4 = !DIFile(filename: "scratch.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
+!6 = distinct !DISubprogram()
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+!5 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/2010-03-24-MemberFn.ll b/test/DebugInfo/Generic/2010-03-24-MemberFn.ll
new file mode 100644
index 000000000000..5f63ce295fad
--- /dev/null
+++ b/test/DebugInfo/Generic/2010-03-24-MemberFn.ll
@@ -0,0 +1,70 @@
+; RUN: %llc_dwarf -O0 < %s | grep AT_decl_file | grep 2
+; Here _ZN1S3fooEv is defined in header file identified as AT_decl_file no. 2 in debug info.
+%struct.S = type <{ i8 }>
+
+define i32 @_Z3barv() nounwind ssp !dbg !3 {
+entry:
+ %retval = alloca i32 ; <i32*> [#uses=2]
+ %0 = alloca i32 ; <i32*> [#uses=2]
+ %s1 = alloca %struct.S ; <%struct.S*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ call void @llvm.dbg.declare(metadata %struct.S* %s1, metadata !0, metadata !DIExpression()), !dbg !16
+ %1 = call i32 @_ZN1S3fooEv(%struct.S* %s1) nounwind, !dbg !17 ; <i32> [#uses=1]
+ store i32 %1, i32* %0, align 4, !dbg !17
+ %2 = load i32, i32* %0, align 4, !dbg !17 ; <i32> [#uses=1]
+ store i32 %2, i32* %retval, align 4, !dbg !17
+ br label %return, !dbg !17
+
+return: ; preds = %entry
+ %retval1 = load i32, i32* %retval, !dbg !17 ; <i32> [#uses=1]
+ ret i32 %retval1, !dbg !16
+}
+
+define linkonce_odr i32 @_ZN1S3fooEv(%struct.S* %this) nounwind ssp align 2 !dbg !12 {
+entry:
+ %this_addr = alloca %struct.S* ; <%struct.S**> [#uses=1]
+ %retval = alloca i32 ; <i32*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ call void @llvm.dbg.declare(metadata %struct.S** %this_addr, metadata !18, metadata !DIExpression()), !dbg !21
+ store %struct.S* %this, %struct.S** %this_addr
+ br label %return, !dbg !21
+
+return: ; preds = %entry
+ %retval1 = load i32, i32* %retval, !dbg !21 ; <i32> [#uses=1]
+ ret i32 %retval1, !dbg !22
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!28}
+
+!0 = !DILocalVariable(name: "s1", line: 3, scope: !1, file: !4, type: !9)
+!1 = distinct !DILexicalBlock(line: 3, column: 0, file: !25, scope: !2)
+!2 = distinct !DILexicalBlock(line: 3, column: 0, file: !25, scope: !3)
+!3 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 3, file: !25, scope: !4, type: !6)
+!4 = !DIFile(filename: "one.cc", directory: "/tmp/")
+!5 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !25, enums: !27, retainedTypes: !27, subprograms: !24, imports: null)
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", line: 2, size: 8, align: 8, file: !26, scope: !4, elements: !11)
+!10 = !DIFile(filename: "one.h", directory: "/tmp/")
+!11 = !{!12}
+!12 = distinct !DISubprogram(name: "foo", linkageName: "_ZN1S3fooEv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 3, file: !26, scope: !9, type: !13)
+!13 = !DISubroutineType(types: !14)
+!14 = !{!8, !15}
+!15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !25, scope: !4, baseType: !9)
+!16 = !DILocation(line: 3, scope: !1)
+!17 = !DILocation(line: 3, scope: !3)
+!18 = !DILocalVariable(name: "this", line: 3, arg: 1, scope: !12, file: !10, type: !19)
+!19 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !25, scope: !4, baseType: !20)
+!20 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !25, scope: !4, baseType: !9)
+!21 = !DILocation(line: 3, scope: !12)
+!22 = !DILocation(line: 3, scope: !23)
+!23 = distinct !DILexicalBlock(line: 3, column: 0, file: !26, scope: !12)
+!24 = !{!3, !12}
+!25 = !DIFile(filename: "one.cc", directory: "/tmp/")
+!26 = !DIFile(filename: "one.h", directory: "/tmp/")
+!27 = !{}
+!28 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/Generic/2010-04-06-NestedFnDbgInfo.ll
new file mode 100644
index 000000000000..a85ad9adcf7d
--- /dev/null
+++ b/test/DebugInfo/Generic/2010-04-06-NestedFnDbgInfo.ll
@@ -0,0 +1,112 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj -o - < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; Radar 7833483
+; Do not emit a separate out-of-line definition DIE for the function-local 'foo'
+; function (member of the function local 'A' type)
+; CHECK: DW_TAG_class_type
+; CHECK: DW_TAG_class_type
+; CHECK-NEXT: DW_AT_name {{.*}} "A"
+; Check that the subprogram inside the class definition has low_pc, only
+; attached to the definition.
+; CHECK: [[FOO_INL:0x........]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_low_pc
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "foo"
+; And just double check that there's no out of line definition that references
+; this subprogram.
+; CHECK-NOT: DW_AT_specification {{.*}} {[[FOO_INL]]}
+
+%class.A = type { i8 }
+%class.B = type { i8 }
+
+define i32 @main() ssp !dbg !2 {
+entry:
+ %retval = alloca i32, align 4 ; <i32*> [#uses=3]
+ %b = alloca %class.A, align 1 ; <%class.A*> [#uses=1]
+ store i32 0, i32* %retval
+ call void @llvm.dbg.declare(metadata %class.A* %b, metadata !0, metadata !DIExpression()), !dbg !14
+ %call = call i32 @_ZN1B2fnEv(%class.A* %b), !dbg !15 ; <i32> [#uses=1]
+ store i32 %call, i32* %retval, !dbg !15
+ %0 = load i32, i32* %retval, !dbg !16 ; <i32> [#uses=1]
+ ret i32 %0, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+define linkonce_odr i32 @_ZN1B2fnEv(%class.A* %this) ssp align 2 !dbg !10 {
+entry:
+ %retval = alloca i32, align 4 ; <i32*> [#uses=2]
+ %this.addr = alloca %class.A*, align 8 ; <%class.A**> [#uses=2]
+ %a = alloca %class.A, align 1 ; <%class.A*> [#uses=1]
+ %i = alloca i32, align 4 ; <i32*> [#uses=2]
+ store %class.A* %this, %class.A** %this.addr
+ call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !17, metadata !DIExpression()), !dbg !18
+ %this1 = load %class.A*, %class.A** %this.addr ; <%class.A*> [#uses=0]
+ call void @llvm.dbg.declare(metadata %class.A* %a, metadata !19, metadata !DIExpression()), !dbg !27
+ call void @llvm.dbg.declare(metadata i32* %i, metadata !28, metadata !DIExpression()), !dbg !29
+ %call = call i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %a), !dbg !30 ; <i32> [#uses=1]
+ store i32 %call, i32* %i, !dbg !30
+ %tmp = load i32, i32* %i, !dbg !31 ; <i32> [#uses=1]
+ store i32 %tmp, i32* %retval, !dbg !31
+ %0 = load i32, i32* %retval, !dbg !32 ; <i32> [#uses=1]
+ ret i32 %0, !dbg !32
+}
+
+define internal i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %this) ssp align 2 !dbg !23 {
+entry:
+ %retval = alloca i32, align 4 ; <i32*> [#uses=2]
+ %this.addr = alloca %class.A*, align 8 ; <%class.A**> [#uses=2]
+ store %class.A* %this, %class.A** %this.addr
+ call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !33, metadata !DIExpression()), !dbg !34
+ %this1 = load %class.A*, %class.A** %this.addr ; <%class.A*> [#uses=0]
+ store i32 42, i32* %retval, !dbg !35
+ %0 = load i32, i32* %retval, !dbg !35 ; <i32> [#uses=1]
+ ret i32 %0, !dbg !35
+}
+
+!llvm.dbg.cu = !{!4}
+!llvm.module.flags = !{!40}
+!37 = !{!2, !10, !23}
+
+!0 = !DILocalVariable(name: "b", line: 16, scope: !1, file: !3, type: !8)
+!1 = distinct !DILexicalBlock(line: 15, column: 12, file: !38, scope: !2)
+!2 = distinct !DISubprogram(name: "main", linkageName: "main", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 15, file: !38, scope: !3, type: !5)
+!3 = !DIFile(filename: "one.cc", directory: "/tmp")
+!4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang 1.5", isOptimized: false, emissionKind: 0, file: !38, enums: !39, retainedTypes: !39, subprograms: !37, imports: null)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !DICompositeType(tag: DW_TAG_class_type, name: "B", line: 2, size: 8, align: 8, file: !38, scope: !3, elements: !9)
+!9 = !{!10}
+!10 = distinct !DISubprogram(name: "fn", linkageName: "_ZN1B2fnEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !38, scope: !8, type: !11)
+!11 = !DISubroutineType(types: !12)
+!12 = !{!7, !13}
+!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !38, scope: !3, baseType: !8)
+!14 = !DILocation(line: 16, column: 5, scope: !1)
+!15 = !DILocation(line: 17, column: 3, scope: !1)
+!16 = !DILocation(line: 18, column: 1, scope: !2)
+!17 = !DILocalVariable(name: "this", line: 4, arg: 1, scope: !10, file: !3, type: !13)
+!18 = !DILocation(line: 4, column: 7, scope: !10)
+!19 = !DILocalVariable(name: "a", line: 9, scope: !20, file: !3, type: !21)
+!20 = distinct !DILexicalBlock(line: 4, column: 12, file: !38, scope: !10)
+!21 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 5, size: 8, align: 8, file: !38, scope: !10, elements: !22)
+!22 = !{!23}
+!23 = distinct !DISubprogram(name: "foo", linkageName: "_ZZN1B2fnEvEN1A3fooEv", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !38, scope: !21, type: !24)
+!24 = !DISubroutineType(types: !25)
+!25 = !{!7, !26}
+!26 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !38, scope: !3, baseType: !21)
+!27 = !DILocation(line: 9, column: 7, scope: !20)
+!28 = !DILocalVariable(name: "i", line: 10, scope: !20, file: !3, type: !7)
+!29 = !DILocation(line: 10, column: 9, scope: !20)
+!30 = !DILocation(line: 10, column: 5, scope: !20)
+!31 = !DILocation(line: 11, column: 5, scope: !20)
+!32 = !DILocation(line: 12, column: 3, scope: !10)
+!33 = !DILocalVariable(name: "this", line: 7, arg: 1, scope: !23, file: !3, type: !26)
+!34 = !DILocation(line: 7, column: 11, scope: !23)
+!35 = !DILocation(line: 7, column: 19, scope: !36)
+!36 = distinct !DILexicalBlock(line: 7, column: 17, file: !38, scope: !23)
+!38 = !DIFile(filename: "one.cc", directory: "/tmp")
+!39 = !{}
+!40 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/2010-04-19-FramePtr.ll b/test/DebugInfo/Generic/2010-04-19-FramePtr.ll
new file mode 100644
index 000000000000..3b7280028c01
--- /dev/null
+++ b/test/DebugInfo/Generic/2010-04-19-FramePtr.ll
@@ -0,0 +1,37 @@
+; RUN: %llc_dwarf -asm-verbose -O1 -o %t < %s
+; RUN: grep DW_AT_APPLE_omit_frame_ptr %t
+; RUN: %llc_dwarf -disable-fp-elim -asm-verbose -O1 -o %t < %s
+; RUN: grep -v DW_AT_APPLE_omit_frame_ptr %t
+
+
+define i32 @foo() nounwind ssp !dbg !1 {
+entry:
+ %retval = alloca i32 ; <i32*> [#uses=2]
+ %0 = alloca i32 ; <i32*> [#uses=2]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ store i32 42, i32* %0, align 4, !dbg !0
+ %1 = load i32, i32* %0, align 4, !dbg !0 ; <i32> [#uses=1]
+ store i32 %1, i32* %retval, align 4, !dbg !0
+ br label %return, !dbg !0
+
+return: ; preds = %entry
+ %retval1 = load i32, i32* %retval, !dbg !0 ; <i32> [#uses=1]
+ ret i32 %retval1, !dbg !7
+}
+
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!12}
+!9 = !{!1}
+
+!0 = !DILocation(line: 2, scope: !1)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !10, scope: null, type: !4)
+!2 = !DIFile(filename: "a.c", directory: "/tmp")
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !10, enums: !11, retainedTypes: !11, subprograms: !9, imports: null)
+!4 = !DISubroutineType(types: !5)
+!5 = !{!6}
+!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!7 = !DILocation(line: 2, scope: !8)
+!8 = distinct !DILexicalBlock(line: 2, column: 0, file: !10, scope: !1)
+!10 = !DIFile(filename: "a.c", directory: "/tmp")
+!11 = !{}
+!12 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/2010-05-03-DisableFramePtr.ll b/test/DebugInfo/Generic/2010-05-03-DisableFramePtr.ll
new file mode 100644
index 000000000000..c67ed73dac6c
--- /dev/null
+++ b/test/DebugInfo/Generic/2010-05-03-DisableFramePtr.ll
@@ -0,0 +1,40 @@
+; RUN: llc -o /dev/null < %s
+; Radar 7937664
+%struct.AppleEvent = type opaque
+
+define void @DisposeDMNotificationUPP(void (%struct.AppleEvent*)* %userUPP) "no-frame-pointer-elim-non-leaf" nounwind ssp {
+entry:
+ %userUPP_addr = alloca void (%struct.AppleEvent*)* ; <void (%struct.AppleEvent*)**> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ call void @llvm.dbg.declare(metadata void (%struct.AppleEvent*)** %userUPP_addr, metadata !0, metadata !DIExpression()), !dbg !13
+ store void (%struct.AppleEvent*)* %userUPP, void (%struct.AppleEvent*)** %userUPP_addr
+ br label %return, !dbg !14
+
+return: ; preds = %entry
+ ret void, !dbg !14
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!19}
+!0 = !DILocalVariable(name: "userUPP", line: 7, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "DisposeDMNotificationUPP", linkageName: "DisposeDMNotificationUPP", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !16, scope: null, type: !4)
+!2 = !DIFile(filename: "t.c", directory: "/Users/echeng/LLVM/radars/r7937664/")
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !16, enums: !17, retainedTypes: !17, subprograms: !18)
+!4 = !DISubroutineType(types: !5)
+!5 = !{null, !6}
+!6 = !DIDerivedType(tag: DW_TAG_typedef, name: "DMNotificationUPP", line: 6, file: !16, scope: !2, baseType: !7)
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !16, scope: !2, baseType: !8)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !16, scope: !2, baseType: !11)
+!11 = !DIDerivedType(tag: DW_TAG_typedef, name: "AppleEvent", line: 4, file: !16, scope: !2, baseType: !12)
+!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "AEDesc", line: 1, flags: DIFlagFwdDecl, file: !16, scope: !2)
+!13 = !DILocation(line: 7, scope: !1)
+!14 = !DILocation(line: 8, scope: !15)
+!15 = distinct !DILexicalBlock(line: 7, column: 0, file: !16, scope: !1)
+!16 = !DIFile(filename: "t.c", directory: "/Users/echeng/LLVM/radars/r7937664/")
+!17 = !{}
+!18 = !{!1}
+!19 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/2010-05-03-OriginDIE.ll b/test/DebugInfo/Generic/2010-05-03-OriginDIE.ll
new file mode 100644
index 000000000000..9ebfb06cc5e3
--- /dev/null
+++ b/test/DebugInfo/Generic/2010-05-03-OriginDIE.ll
@@ -0,0 +1,94 @@
+
+;RUN: llc < %s -o /dev/null
+;Radar 7937109
+
+%struct.anon = type { i64, i32, i32, i32, [1 x i32] }
+%struct.gpm_t = type { i32, i8*, [16 x i8], i32, i64, i64, i64, i64, i64, i64, i32, i16, i16, [8 x %struct.gpmr_t] }
+%struct.gpmr_t = type { [48 x i8], [48 x i8], [16 x i8], i64, i64, i64, i64, i16 }
+%struct.gpt_t = type { [8 x i8], i32, i32, i32, i32, i64, i64, i64, i64, [16 x i8], %struct.anon }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.gpm_t*, %struct.gpt_t*)* @gpt2gpm to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define fastcc void @gpt2gpm(%struct.gpm_t* %gpm, %struct.gpt_t* %gpt) nounwind optsize ssp {
+entry:
+ %data_addr.i18 = alloca i64, align 8 ; <i64*> [#uses=1]
+ %data_addr.i17 = alloca i64, align 8 ; <i64*> [#uses=2]
+ %data_addr.i16 = alloca i64, align 8 ; <i64*> [#uses=0]
+ %data_addr.i15 = alloca i32, align 4 ; <i32*> [#uses=0]
+ %data_addr.i = alloca i64, align 8 ; <i64*> [#uses=0]
+ %0 = getelementptr inbounds %struct.gpm_t, %struct.gpm_t* %gpm, i32 0, i32 2, i32 0 ; <i8*> [#uses=1]
+ %1 = getelementptr inbounds %struct.gpt_t, %struct.gpt_t* %gpt, i32 0, i32 9, i32 0 ; <i8*> [#uses=1]
+ call void @uuid_LtoB(i8* %0, i8* %1) nounwind, !dbg !0
+ %a9 = load volatile i64, i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
+ %a10 = call i64 @llvm.bswap.i64(i64 %a9) nounwind ; <i64> [#uses=1]
+ %a11 = getelementptr inbounds %struct.gpt_t, %struct.gpt_t* %gpt, i32 0, i32 8, !dbg !7 ; <i64*> [#uses=1]
+ %a12 = load i64, i64* %a11, align 4, !dbg !7 ; <i64> [#uses=1]
+ call void @llvm.dbg.declare(metadata i64* %data_addr.i17, metadata !8, metadata !DIExpression()) nounwind, !dbg !14
+ store i64 %a12, i64* %data_addr.i17, align 8
+ call void @llvm.dbg.value(metadata !6, i64 0, metadata !15, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !16)
+ call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !19, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !16)
+ call void @llvm.dbg.declare(metadata !6, metadata !23, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !24)
+ call void @llvm.dbg.value(metadata i64* %data_addr.i17, i64 0, metadata !34, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !24)
+ %a13 = load volatile i64, i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
+ %a14 = call i64 @llvm.bswap.i64(i64 %a13) nounwind ; <i64> [#uses=2]
+ %a15 = add i64 %a10, %a14, !dbg !7 ; <i64> [#uses=1]
+ %a16 = sub i64 %a15, %a14 ; <i64> [#uses=1]
+ %a17 = getelementptr inbounds %struct.gpm_t, %struct.gpm_t* %gpm, i32 0, i32 5, !dbg !7 ; <i64*> [#uses=1]
+ store i64 %a16, i64* %a17, align 4, !dbg !7
+ ret void, !dbg !7
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
+declare void @uuid_LtoB(i8*, i8*)
+
+!llvm.dbg.cu = !{!4}
+!llvm.module.flags = !{!41}
+!0 = !DILocation(line: 808, scope: !1)
+!1 = distinct !DILexicalBlock(line: 807, column: 0, file: !39, scope: !2)
+!2 = distinct !DISubprogram(name: "gpt2gpm", linkageName: "gpt2gpm", line: 807, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !39, scope: null, type: !5)
+!3 = !DIFile(filename: "G.c", directory: "/tmp")
+!4 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "llvm-gcc", isOptimized: true, emissionKind: 0, file: !39, enums: !18, retainedTypes: !18, subprograms: !40)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !DILocation(line: 810, scope: !1)
+!8 = !DILocalVariable(name: "data", line: 201, arg: 1, scope: !9, file: !10, type: !11)
+!9 = distinct !DISubprogram(name: "_OSSwapInt64", linkageName: "_OSSwapInt64", line: 202, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !10, scope: null, type: !5)
+!10 = !DIFile(filename: "OSByteOrder.h", directory: "/usr/include/libkern/ppc")
+!11 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", line: 59, file: !36, scope: !3, baseType: !13)
+!12 = !DIFile(filename: "stdint.h", directory: "/usr/4.2.1/include")
+!13 = !DIBasicType(tag: DW_TAG_base_type, name: "long long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
+!14 = !DILocation(line: 202, scope: !9, inlinedAt: !7)
+!15 = !DILocalVariable(name: "base", line: 92, arg: 2, scope: !16, file: !10, type: !17)
+!16 = distinct !DISubprogram(name: "OSReadSwapInt64", linkageName: "OSReadSwapInt64", line: 95, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !38, scope: null, type: !5)
+!17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !39, scope: !3, baseType: null)
+!18 = !{}
+!19 = !DILocalVariable(name: "byteOffset", line: 94, arg: 3, scope: !16, file: !10, type: !20)
+!20 = !DIDerivedType(tag: DW_TAG_typedef, name: "uintptr_t", line: 114, file: !37, scope: !3, baseType: !22)
+!21 = !DIFile(filename: "types.h", directory: "/usr/include/ppc")
+!22 = !DIBasicType(tag: DW_TAG_base_type, name: "long unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!23 = !DILocalVariable(name: "u", line: 100, scope: !24, file: !10, type: !25)
+!24 = distinct !DILexicalBlock(line: 95, column: 0, file: !38, scope: !16)
+!25 = !DICompositeType(tag: DW_TAG_union_type, line: 97, size: 64, align: 64, file: !38, scope: !16, elements: !26)
+!26 = !{!27, !28}
+!27 = !DIDerivedType(tag: DW_TAG_member, name: "u64", line: 98, size: 64, align: 64, file: !38, scope: !25, baseType: !11)
+!28 = !DIDerivedType(tag: DW_TAG_member, name: "u32", line: 99, size: 64, align: 32, file: !38, scope: !25, baseType: !29)
+!29 = !DICompositeType(tag: DW_TAG_array_type, size: 64, align: 32, file: !39, scope: !3, baseType: !30, elements: !32)
+!30 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", line: 55, file: !36, scope: !3, baseType: !31)
+!31 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!32 = !{!33}
+!33 = !DISubrange(count: 2)
+!34 = !DILocalVariable(name: "addr", line: 96, scope: !24, file: !10, type: !35)
+!35 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !39, scope: !3, baseType: !11)
+!36 = !DIFile(filename: "stdint.h", directory: "/usr/4.2.1/include")
+!37 = !DIFile(filename: "types.h", directory: "/usr/include/ppc")
+!38 = !DIFile(filename: "OSByteOrder.h", directory: "/usr/include/libkern/ppc")
+!39 = !DIFile(filename: "G.c", directory: "/tmp")
+!40 = !{!2, !9, !16}
+!41 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/2010-05-10-MultipleCU.ll b/test/DebugInfo/Generic/2010-05-10-MultipleCU.ll
new file mode 100644
index 000000000000..486dbf0e940c
--- /dev/null
+++ b/test/DebugInfo/Generic/2010-05-10-MultipleCU.ll
@@ -0,0 +1,44 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Check that two compile units are generated
+
+; CHECK: Compile Unit:
+; CHECK: Compile Unit:
+
+define i32 @foo() nounwind readnone ssp !dbg !2 {
+return:
+ ret i32 42, !dbg !0
+}
+
+define i32 @bar() nounwind readnone ssp !dbg !10 {
+return:
+ ret i32 21, !dbg !8
+}
+
+!llvm.dbg.cu = !{!4, !12}
+!llvm.module.flags = !{!21}
+!16 = !{!2}
+!17 = !{!10}
+
+!0 = !DILocation(line: 3, scope: !1)
+!1 = distinct !DILexicalBlock(line: 2, column: 0, file: !18, scope: !2)
+!2 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !18, scope: !3, type: !5)
+!3 = !DIFile(filename: "a.c", directory: "/tmp/")
+!4 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !16)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !DILocation(line: 3, scope: !9)
+!9 = distinct !DILexicalBlock(line: 2, column: 0, file: !20, scope: !10)
+!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !20, scope: !11, type: !13)
+!11 = !DIFile(filename: "b.c", directory: "/tmp/")
+!12 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !20, enums: !19, retainedTypes: !19, subprograms: !17)
+!13 = !DISubroutineType(types: !14)
+!14 = !{!15}
+!15 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!18 = !DIFile(filename: "a.c", directory: "/tmp/")
+!19 = !{}
+!20 = !DIFile(filename: "b.c", directory: "/tmp/")
+!21 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/Generic/2010-06-29-InlinedFnLocalVar.ll
new file mode 100644
index 000000000000..f01cf6db905c
--- /dev/null
+++ b/test/DebugInfo/Generic/2010-06-29-InlinedFnLocalVar.ll
@@ -0,0 +1,61 @@
+; RUN: %llc_dwarf -O2 %s -o - | FileCheck %s
+; Check struct X for dead variable xyz from inlined function foo.
+
+; CHECK: section_info
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name
+
+
+@i = common global i32 0 ; <i32*> [#uses=2]
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
+
+define i32 @bar() nounwind ssp !dbg !6 {
+entry:
+ %0 = load i32, i32* @i, align 4, !dbg !17 ; <i32> [#uses=2]
+ tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !59, metadata !DIExpression()), !dbg !19
+ tail call void @llvm.dbg.declare(metadata !29, metadata !60, metadata !DIExpression()), !dbg !21
+ %1 = mul nsw i32 %0, %0, !dbg !22 ; <i32> [#uses=2]
+ store i32 %1, i32* @i, align 4, !dbg !17
+ ret i32 %1, !dbg !23
+}
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!28}
+
+!0 = distinct !DISubprogram(name: "foo", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 9, file: !27, scope: !1, type: !3, variables: !24)
+!1 = !DIFile(filename: "bar.c", directory: "/tmp/")
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !27, enums: !20, retainedTypes: !20, subprograms: !25, globals: !26, imports: !20)
+!3 = !DISubroutineType(types: !4)
+!4 = !{!5, !5}
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !27, scope: !1, type: !7)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!5}
+!9 = !DILocalVariable(name: "j", line: 9, arg: 1, scope: !0, file: !1, type: !5)
+!10 = !DILocalVariable(name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+
+!59 = !DILocalVariable(name: "j", line: 9, arg: 1, scope: !0, file: !1, type: !5)
+!60 = !DILocalVariable(name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+
+!11 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !0)
+!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "X", line: 10, size: 64, align: 32, file: !27, scope: !0, elements: !13)
+!13 = !{!14, !15}
+!14 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 10, size: 32, align: 32, file: !27, scope: !12, baseType: !5)
+!15 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 10, size: 32, align: 32, offset: 32, file: !27, scope: !12, baseType: !5)
+!16 = !DIGlobalVariable(name: "i", line: 5, isLocal: false, isDefinition: true, scope: !1, file: !1, type: !5, variable: i32* @i)
+!17 = !DILocation(line: 15, scope: !18)
+!18 = distinct !DILexicalBlock(line: 14, column: 0, file: !1, scope: !6)
+!19 = !DILocation(line: 9, scope: !0, inlinedAt: !17)
+!20 = !{}
+!21 = !DILocation(line: 9, scope: !11, inlinedAt: !17)
+!22 = !DILocation(line: 11, scope: !11, inlinedAt: !17)
+!23 = !DILocation(line: 16, scope: !18)
+!24 = !{!9, !10}
+!25 = !{!0, !6}
+!26 = !{!16}
+!27 = !DIFile(filename: "bar.c", directory: "/tmp/")
+!28 = !{i32 1, !"Debug Info Version", i32 3}
+!29 = !{null}
diff --git a/test/DebugInfo/Generic/2010-07-19-Crash.ll b/test/DebugInfo/Generic/2010-07-19-Crash.ll
new file mode 100644
index 000000000000..9565a2f09cbc
--- /dev/null
+++ b/test/DebugInfo/Generic/2010-07-19-Crash.ll
@@ -0,0 +1,30 @@
+; RUN: llc -o /dev/null < %s
+; PR7662
+; Do not add variables to !11 because it is a declaration entry.
+
+define i32 @bar() nounwind readnone ssp !dbg !0 {
+entry:
+ ret i32 42, !dbg !9
+}
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!15}
+!llvm.dbg.sp = !{!0, !6, !11}
+!llvm.dbg.lv.foo = !{!7}
+
+!0 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3)
+!1 = !DIFile(filename: "one.c", directory: "/private/tmp")
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang 2.8", isOptimized: true, emissionKind: 0, file: !12, enums: !14, retainedTypes: !14, subprograms: !13)
+!3 = !DISubroutineType(types: !4)
+!4 = !{!5}
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3)
+!7 = !DILocalVariable(name: "one", line: 8, scope: !8, file: !1, type: !5)
+!8 = distinct !DILexicalBlock(line: 7, column: 18, file: !12, scope: !6)
+!9 = !DILocation(line: 4, column: 3, scope: !10)
+!10 = distinct !DILexicalBlock(line: 3, column: 11, file: !12, scope: !0)
+!11 = !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: true, isDefinition: false, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3)
+!12 = !DIFile(filename: "one.c", directory: "/private/tmp")
+!13 = !{!0}
+!14 = !{}
+!15 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/2010-10-01-crash.ll b/test/DebugInfo/Generic/2010-10-01-crash.ll
new file mode 100644
index 000000000000..712c32a7f627
--- /dev/null
+++ b/test/DebugInfo/Generic/2010-10-01-crash.ll
@@ -0,0 +1,24 @@
+; RUN: llc -O0 %s -o /dev/null
+
+define void @CGRectStandardize(i32* sret %agg.result, i32* byval %rect) nounwind ssp !dbg !0 {
+entry:
+ call void @llvm.dbg.declare(metadata i32* %rect, metadata !23, metadata !DIExpression()), !dbg !24
+ ret void
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!27}
+!0 = distinct !DISubprogram(name: "CGRectStandardize", linkageName: "CGRectStandardize", line: 54, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !1, scope: null)
+!1 = !DIFile(filename: "GSFusedSilica.m", directory: "/Volumes/Data/Users/sabre/Desktop")
+!2 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 2.9 (trunk 115292)", isOptimized: true, runtimeVersion: 1, emissionKind: 0, file: !25, enums: !26, retainedTypes: !26, subprograms: !{!0})
+!5 = !DIDerivedType(tag: DW_TAG_typedef, name: "CGRect", line: 49, file: !25, baseType: null)
+!23 = !DILocalVariable(name: "rect", line: 53, arg: 2, scope: !0, file: !1, type: !5)
+!24 = !DILocation(line: 53, column: 33, scope: !0)
+!25 = !DIFile(filename: "GSFusedSilica.m", directory: "/Volumes/Data/Users/sabre/Desktop")
+!26 = !{}
+!27 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/Inputs/gmlt.ll b/test/DebugInfo/Generic/Inputs/gmlt.ll
new file mode 100644
index 000000000000..b03a80b4deae
--- /dev/null
+++ b/test/DebugInfo/Generic/Inputs/gmlt.ll
@@ -0,0 +1,153 @@
+; REQUIRES: object-emission
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s
+
+; Generated from the following source compiled with clang++ -gmlt:
+; void f1() {}
+; void __attribute__((section("__TEXT,__bar"))) f2() {}
+; void __attribute__((always_inline)) f3() { f1(); }
+; void f4() { f3(); }
+
+; Check that
+; * -gmlt includes no DW_TAG_subprograms for subprograms without inlined
+; subroutines.
+; * yet still produces DW_AT_ranges and a range list in debug_ranges that
+; describes those subprograms
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
+; CHECK-NOT: {{DW_TAG|NULL}}
+
+; Omitting the subprograms without inlined subroutines is not possible
+; currently on Darwin as dsymutil will drop the whole CU if it has no subprograms
+; (which happens with this optimization if there are no inlined subroutines).
+
+; DARWIN: DW_TAG_subprogram
+; DARWIN-NOT: DW_TAG
+; DARWIN: DW_AT_name {{.*}} "f1"
+; DARWIN-NOT: {{DW_TAG|NULL}}
+; DARWIN: DW_TAG_subprogram
+; DARWIN-NOT: DW_TAG
+; DARWIN: DW_AT_name {{.*}} "f2"
+; DARWIN-NOT: {{DW_TAG|NULL}}
+; DARWIN: DW_TAG_subprogram
+; DARWIN-NOT: DW_TAG
+; Can't check the abstract_origin value across the DARWIN/CHECK checking and
+; ordering, so don't bother - just trust me, it refers to f3 down there.
+; DARWIN: DW_AT_abstract_origin
+; DARWIN-NOT: {{DW_TAG|NULL}}
+
+
+; FIXME: Emitting separate abstract definitions is inefficient when we could
+; just attach the DW_AT_name to the inlined_subroutine directly. Except that
+; would produce many string relocations. Implement string indexing in the
+; skeleton CU to address the relocation problem, then remove abstract
+; definitions from -gmlt here.
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_name {{.*}} "f3"
+
+; FIXME: We don't really need DW_AT_inline, consumers can ignore this due to
+; the absence of high_pc/low_pc/ranges and know that they just need it for
+; retrieving the name of a concrete inlined instance
+
+; CHECK-NOT: {{DW_TAG|DW_AT|NULL}}
+
+; Check that we only provide the minimal attributes on a subprogram to save space.
+; CHECK: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_low_pc
+; CHECK-NEXT: DW_AT_high_pc
+; CHECK-NEXT: DW_AT_name
+; CHECK-NOT: {{DW_TAG|DW_AT}}
+; CHECK: DW_TAG_inlined_subroutine
+
+; As mentioned above - replace DW_AT_abstract_origin with DW_AT_name to save
+; space once we have support for string indexing in non-dwo sections
+
+; CHECK-NEXT: DW_AT_abstract_origin {{.*}} "f3"
+; CHECK-NEXT: DW_AT_low_pc
+; CHECK-NEXT: DW_AT_high_pc
+; CHECK-NEXT: DW_AT_call_file
+; CHECK-NEXT: DW_AT_call_line
+
+; Make sure we don't have any other subprograms here (subprograms with no
+; inlined subroutines are omitted by design to save space)
+
+; CHECK-NOT: {{DW_TAG|DW_AT}}
+; CHECK: NULL
+; CHECK-NOT: {{DW_TAG|DW_AT}}
+; CHECK: NULL
+
+
+; CHECK: .debug_ranges contents:
+
+; ... some addresses (depends on platform (such as platforms with function
+; reordering in the linker), and looks wonky on platforms with zero values
+; written in relocation places (dumper needs to be fixed to read the
+; relocations rather than interpret that as the end of a range list))
+
+; CHECK: 00000000 <End of list>
+
+
+; Check that we don't emit any pubnames or pubtypes under -gmlt
+; CHECK: .debug_pubnames contents:
+; CHECK-NOT: Offset
+
+; CHECK: .debug_pubtypes contents:
+; CHECK-NOT: Offset
+
+; CHECK: .apple{{.*}} contents:
+
+; Function Attrs: nounwind uwtable
+define void @_Z2f1v() #0 !dbg !4 {
+entry:
+ ret void, !dbg !13
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z2f2v() #0 section "__TEXT,__bar" !dbg !7 {
+entry:
+ ret void, !dbg !14
+}
+
+; Function Attrs: alwaysinline nounwind uwtable
+define void @_Z2f3v() #1 !dbg !8 {
+entry:
+ call void @_Z2f1v(), !dbg !15
+ ret void, !dbg !16
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z2f4v() #0 !dbg !9 {
+entry:
+ call void @_Z2f1v() #2, !dbg !17
+ ret void, !dbg !19
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "gmlt.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4, !7, !8, !9}
+!4 = distinct !DISubprogram(name: "f1", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "gmlt.cpp", directory: "/tmp/dbginfo")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "f3", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!9 = distinct !DISubprogram(name: "f4", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{!"clang version 3.6.0 "}
+!13 = !DILocation(line: 1, column: 12, scope: !4)
+!14 = !DILocation(line: 2, column: 53, scope: !7)
+!15 = !DILocation(line: 3, column: 44, scope: !8)
+!16 = !DILocation(line: 3, column: 50, scope: !8)
+!17 = !DILocation(line: 3, column: 44, scope: !8, inlinedAt: !18)
+!18 = !DILocation(line: 4, column: 13, scope: !9)
+!19 = !DILocation(line: 4, column: 19, scope: !9)
diff --git a/test/DebugInfo/Generic/PR20038.ll b/test/DebugInfo/Generic/PR20038.ll
new file mode 100644
index 000000000000..3e9145f03e85
--- /dev/null
+++ b/test/DebugInfo/Generic/PR20038.ll
@@ -0,0 +1,172 @@
+; REQUIRES: object-emission
+
+; For some reason, the output when targetting sparc is not quite as expected.
+; XFAIL: sparc
+
+; RUN: %llc_dwarf -O0 -filetype=obj -dwarf-linkage-names=Enable < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; IR generated from clang -O0 with:
+; struct C {
+; ~C();
+; };
+; extern bool b;
+; void fun4() { b && (C(), 1); }
+; __attribute__((always_inline)) C::~C() { }
+
+; CHECK: DW_TAG_structure_type
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "C"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "~C"
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_linkage_name {{.*}} "_ZN1CD1Ev"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "this"
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "fun4"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} "_ZN1CD1Ev"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} "this"
+
+; FIXME: D2 is actually inlined into D1 but doesn't show up here, possibly due
+; to there being no work in D2 (calling another member function from the dtor
+; causes D2 to show up, calling a free function doesn't).
+
+; CHECK-NOT: DW_TAG
+; CHECK: NULL
+; CHECK-NOT: DW_TAG
+; CHECK: NULL
+
+%struct.C = type { i8 }
+
+@b = external global i8
+
+; Function Attrs: nounwind
+define void @_Z4fun4v() #0 !dbg !12 {
+entry:
+ %this.addr.i.i = alloca %struct.C*, align 8, !dbg !21
+ %this.addr.i = alloca %struct.C*, align 8, !dbg !22
+ %agg.tmp.ensured = alloca %struct.C, align 1
+ %cleanup.cond = alloca i1
+ %0 = load i8, i8* @b, align 1, !dbg !24
+ %tobool = trunc i8 %0 to i1, !dbg !24
+ store i1 false, i1* %cleanup.cond
+ br i1 %tobool, label %land.rhs, label %land.end, !dbg !24
+
+land.rhs: ; preds = %entry
+ store i1 true, i1* %cleanup.cond, !dbg !25
+ br label %land.end
+
+land.end: ; preds = %land.rhs, %entry
+ %1 = phi i1 [ false, %entry ], [ true, %land.rhs ]
+ %cleanup.is_active = load i1, i1* %cleanup.cond, !dbg !27
+ br i1 %cleanup.is_active, label %cleanup.action, label %cleanup.done, !dbg !27
+
+cleanup.action: ; preds = %land.end
+ store %struct.C* %agg.tmp.ensured, %struct.C** %this.addr.i, align 8, !dbg !22
+ call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i, metadata !129, metadata !DIExpression()), !dbg !31
+ %this1.i = load %struct.C*, %struct.C** %this.addr.i, !dbg !22
+ store %struct.C* %this1.i, %struct.C** %this.addr.i.i, align 8, !dbg !21
+ call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i.i, metadata !132, metadata !DIExpression()), !dbg !33
+ %this1.i.i = load %struct.C*, %struct.C** %this.addr.i.i, !dbg !21
+ br label %cleanup.done, !dbg !22
+
+cleanup.done: ; preds = %cleanup.action, %land.end
+ ret void, !dbg !34
+}
+
+; Function Attrs: alwaysinline nounwind
+define void @_ZN1CD1Ev(%struct.C* %this) unnamed_addr #1 align 2 !dbg !17 {
+entry:
+ %this.addr.i = alloca %struct.C*, align 8, !dbg !37
+ %this.addr = alloca %struct.C*, align 8
+ store %struct.C* %this, %struct.C** %this.addr, align 8
+ call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !29, metadata !DIExpression()), !dbg !38
+ %this1 = load %struct.C*, %struct.C** %this.addr
+ store %struct.C* %this1, %struct.C** %this.addr.i, align 8, !dbg !37
+ call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i, metadata !232, metadata !DIExpression()), !dbg !39
+ %this1.i = load %struct.C*, %struct.C** %this.addr.i, !dbg !37
+ ret void, !dbg !37
+}
+
+; Function Attrs: alwaysinline nounwind
+define void @_ZN1CD2Ev(%struct.C* %this) unnamed_addr #1 align 2 !dbg !16 {
+entry:
+ %this.addr = alloca %struct.C*, align 8
+ store %struct.C* %this, %struct.C** %this.addr, align 8
+ call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !32, metadata !DIExpression()), !dbg !40
+ %this1 = load %struct.C*, %struct.C** %this.addr
+ ret void, !dbg !41
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18, !19}
+!llvm.ident = !{!20}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
+!1 = !DIFile(filename: "<stdin>", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 1, size: 8, align: 8, file: !5, elements: !6, identifier: "_ZTS1C")
+!5 = !DIFile(filename: "PR20038.cpp", directory: "/tmp/dbginfo")
+!6 = !{!7}
+!7 = !DISubprogram(name: "~C", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !"_ZTS1C", type: !8)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1C")
+!11 = !{!12, !16, !17}
+!12 = distinct !DISubprogram(name: "fun4", linkageName: "_Z4fun4v", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !13, type: !14, variables: !2)
+!13 = !DIFile(filename: "PR20038.cpp", directory: "/tmp/dbginfo")
+!14 = !DISubroutineType(types: !15)
+!15 = !{null}
+!16 = distinct !DISubprogram(name: "~C", linkageName: "_ZN1CD2Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !5, scope: !"_ZTS1C", type: !8, declaration: !7, variables: !2)
+!17 = distinct !DISubprogram(name: "~C", linkageName: "_ZN1CD1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !5, scope: !"_ZTS1C", type: !8, declaration: !7, variables: !2)
+!18 = !{i32 2, !"Dwarf Version", i32 4}
+!19 = !{i32 2, !"Debug Info Version", i32 3}
+!20 = !{!"clang version 3.5.0 "}
+!21 = !DILocation(line: 6, scope: !17, inlinedAt: !22)
+!22 = !DILocation(line: 5, scope: !23)
+!23 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !12)
+!24 = !DILocation(line: 5, scope: !12)
+!25 = !DILocation(line: 5, scope: !26)
+!26 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !12)
+!27 = !DILocation(line: 5, scope: !28)
+!28 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !12)
+!29 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !30)
+!30 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1C")
+!31 = !DILocation(line: 0, scope: !17, inlinedAt: !22)
+!32 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
+!33 = !DILocation(line: 0, scope: !16, inlinedAt: !21)
+
+!129 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !30)
+!132 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
+!232 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
+
+!34 = !DILocation(line: 5, scope: !35)
+!35 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !36)
+!36 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !12)
+!37 = !DILocation(line: 6, scope: !17)
+!38 = !DILocation(line: 0, scope: !17)
+!39 = !DILocation(line: 0, scope: !16, inlinedAt: !37)
+!40 = !DILocation(line: 0, scope: !16)
+!41 = !DILocation(line: 6, scope: !16)
diff --git a/test/DebugInfo/Generic/accel-table-hash-collisions.ll b/test/DebugInfo/Generic/accel-table-hash-collisions.ll
new file mode 100644
index 000000000000..ff9c7851826f
--- /dev/null
+++ b/test/DebugInfo/Generic/accel-table-hash-collisions.ll
@@ -0,0 +1,92 @@
+; REQUIRES: object-emission
+; RUN: %llc_dwarf -dwarf-accel-tables=Enable -filetype=obj -o - < %s | llvm-dwarfdump -debug-dump=apple_names - | FileCheck %s
+
+; Generated from the following C code using
+; clang -S -emit-llvm hash-collision.c
+;
+; The names of the variables have been chosen so that they produce hash collisions.
+; There are 12 names here that are hashed to only 6 hashes (each pair of lines
+; hashes to the same value, see the CHECK lines below).
+;
+; int ForceTopDown;
+; int _ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_;
+; int _ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE;
+; int _ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv;
+; int _ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE;
+; int _ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE;
+; int k1;
+; int is;
+; int setStmt;
+; int _ZN4llvm5TwineC1Ei;
+; int _ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE;
+; int _ZN4llvm22MachineModuleInfoMachOD2Ev;
+
+; Check that we have the right amount of hashes.
+; CHECK: Bucket count = 6
+; CHECK: Hashes count = 6
+
+; Check that all the names are present in the output
+; CHECK: Hash = 0x00597841
+; CHECK: Name: {{[0-9a-f]*}} "is"
+; CHECK: Name: {{[0-9a-f]*}} "k1"
+
+; CHECK: Hash = 0xa4b42a1e
+; CHECK: Name: {{[0-9a-f]*}} "_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE"
+; CHECK: Name: {{[0-9a-f]*}} "_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv"
+
+; CHECK: Hash = 0xeee7c0b2
+; CHECK: Name: {{[0-9a-f]*}} "_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE"
+; CHECK: Name: {{[0-9a-f]*}} "_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE"
+
+; CHECK: Hash = 0xea48ac5f
+; CHECK: Name: {{[0-9a-f]*}} "ForceTopDown"
+; CHECK: Name: {{[0-9a-f]*}} "_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_"
+
+; CHECK: Hash = 0x6b22f71f
+; CHECK: Name: {{[0-9a-f]*}} "_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE"
+; CHECK: Name: {{[0-9a-f]*}} "_ZN4llvm22MachineModuleInfoMachOD2Ev"
+
+; CHECK: Hash = 0x8c248979
+; CHECK: Name: {{[0-9a-f]*}} "setStmt"
+; CHECK: Name: {{[0-9a-f]*}} "_ZN4llvm5TwineC1Ei"
+
+
+
+@ForceTopDown = common global i32 0, align 4
+@_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_ = common global i32 0, align 4
+@_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE = common global i32 0, align 4
+@_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv = common global i32 0, align 4
+@_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE = common global i32 0, align 4
+@_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE = common global i32 0, align 4
+@k1 = common global i32 0, align 4
+@is = common global i32 0, align 4
+@setStmt = common global i32 0, align 4
+@_ZN4llvm5TwineC1Ei = common global i32 0, align 4
+@_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE = common global i32 0, align 4
+@_ZN4llvm22MachineModuleInfoMachOD2Ev = common global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18, !19}
+!llvm.ident = !{!20}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 231548) (llvm/trunk 231547)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!1 = !DIFile(filename: "hash-collisions.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16}
+!4 = !DIGlobalVariable(name: "ForceTopDown", scope: !0, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, variable: i32* @ForceTopDown)
+!5 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !DIGlobalVariable(name: "_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_)
+!7 = !DIGlobalVariable(name: "_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE", scope: !0, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE)
+!8 = !DIGlobalVariable(name: "_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv", scope: !0, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv)
+!9 = !DIGlobalVariable(name: "_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE", scope: !0, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE)
+!10 = !DIGlobalVariable(name: "_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE", scope: !0, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE)
+!11 = !DIGlobalVariable(name: "k1", scope: !0, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, variable: i32* @k1)
+!12 = !DIGlobalVariable(name: "is", scope: !0, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, variable: i32* @is)
+!13 = !DIGlobalVariable(name: "setStmt", scope: !0, file: !1, line: 9, type: !5, isLocal: false, isDefinition: true, variable: i32* @setStmt)
+!14 = !DIGlobalVariable(name: "_ZN4llvm5TwineC1Ei", scope: !0, file: !1, line: 10, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm5TwineC1Ei)
+!15 = !DIGlobalVariable(name: "_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE", scope: !0, file: !1, line: 11, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE)
+!16 = !DIGlobalVariable(name: "_ZN4llvm22MachineModuleInfoMachOD2Ev", scope: !0, file: !1, line: 12, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm22MachineModuleInfoMachOD2Ev)
+!17 = !{i32 2, !"Dwarf Version", i32 2}
+!18 = !{i32 2, !"Debug Info Version", i32 3}
+!19 = !{i32 1, !"PIC Level", i32 2}
+!20 = !{!"clang version 3.7.0 (trunk 231548) (llvm/trunk 231547)"}
diff --git a/test/DebugInfo/Generic/array.ll b/test/DebugInfo/Generic/array.ll
new file mode 100644
index 000000000000..2f181e3b5953
--- /dev/null
+++ b/test/DebugInfo/Generic/array.ll
@@ -0,0 +1,40 @@
+; RUN: %llc_dwarf -O0 < %s | FileCheck %s
+; Do not emit AT_upper_bound for an unbounded array.
+; radar 9241695
+define i32 @main() nounwind ssp !dbg !0 {
+entry:
+ %retval = alloca i32, align 4
+ %a = alloca [0 x i32], align 4
+ store i32 0, i32* %retval
+ call void @llvm.dbg.declare(metadata [0 x i32]* %a, metadata !6, metadata !DIExpression()), !dbg !11
+ ret i32 0, !dbg !12
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!16}
+
+!0 = distinct !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 3, file: !14, scope: !1, type: !3)
+!1 = !DIFile(filename: "array.c", directory: "/private/tmp")
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129138)", isOptimized: false, emissionKind: 0, file: !14, enums: !15, retainedTypes: !15, subprograms: !13, imports: null)
+!3 = !DISubroutineType(types: !4)
+!4 = !{!5}
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !DILocalVariable(name: "a", line: 4, scope: !7, file: !1, type: !8)
+!7 = distinct !DILexicalBlock(line: 3, column: 12, file: !14, scope: !0)
+!8 = !DICompositeType(tag: DW_TAG_array_type, align: 32, file: !14, scope: !2, baseType: !5, elements: !9)
+!9 = !{!10}
+;CHECK: section_info:
+;CHECK: DW_TAG_subrange_type
+;CHECK-NEXT: DW_AT_type
+;CHECK-NOT: DW_AT_lower_bound
+;CHECK-NOT: DW_AT_upper_bound
+;CHECK-NEXT: End Of Children Mark
+!10 = !DISubrange(count: -1)
+!11 = !DILocation(line: 4, column: 7, scope: !7)
+!12 = !DILocation(line: 5, column: 3, scope: !7)
+!13 = !{!0}
+!14 = !DIFile(filename: "array.c", directory: "/private/tmp")
+!15 = !{}
+!16 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/block-asan.ll b/test/DebugInfo/Generic/block-asan.ll
new file mode 100644
index 000000000000..92119631444c
--- /dev/null
+++ b/test/DebugInfo/Generic/block-asan.ll
@@ -0,0 +1,87 @@
+; RUN: opt -S -asan %s | FileCheck %s
+
+; The IR of this testcase is generated from the following C code:
+; void bar (int);
+;
+; void foo() {
+; __block int x;
+; bar(x);
+; }
+; by compiling it with 'clang -emit-llvm -g -S' and then by manually
+; adding the sanitize_address attribute to the @foo() function (so
+; that ASAN accepts to instrument the function in the above opt run).
+
+; Check that the location of the ASAN instrumented __block variable is
+; correct.
+; CHECK: !DIExpression(DW_OP_deref, DW_OP_plus, 8, DW_OP_deref, DW_OP_plus, 24)
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.__block_byref_x = type { i8*, %struct.__block_byref_x*, i32, i32, i32 }
+
+; Function Attrs: nounwind ssp uwtable
+define void @foo() #0 !dbg !4 {
+entry:
+ %x = alloca %struct.__block_byref_x, align 8
+ call void @llvm.dbg.declare(metadata %struct.__block_byref_x* %x, metadata !12, metadata !22), !dbg !23
+ %byref.isa = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %x, i32 0, i32 0, !dbg !24
+ store i8* null, i8** %byref.isa, !dbg !24
+ %byref.forwarding = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %x, i32 0, i32 1, !dbg !24
+ store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, !dbg !24
+ %byref.flags = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %x, i32 0, i32 2, !dbg !24
+ store i32 0, i32* %byref.flags, !dbg !24
+ %byref.size = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %x, i32 0, i32 3, !dbg !24
+ store i32 32, i32* %byref.size, !dbg !24
+ %forwarding = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %x, i32 0, i32 1, !dbg !25
+ %0 = load %struct.__block_byref_x*, %struct.__block_byref_x** %forwarding, !dbg !25
+ %x1 = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %0, i32 0, i32 4, !dbg !25
+ %1 = load i32, i32* %x1, align 4, !dbg !25
+ call void @bar(i32 %1), !dbg !25
+ %2 = bitcast %struct.__block_byref_x* %x to i8*, !dbg !26
+ call void @_Block_object_dispose(i8* %2, i32 8) #3, !dbg !26
+ ret void, !dbg !26
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @bar(i32) #2
+
+declare void @_Block_object_dispose(i8*, i32)
+
+attributes #0 = { nounwind ssp uwtable sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 223120) (llvm/trunk 223119)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "block.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "block.c", directory: "/tmp")
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 2}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{i32 1, !"PIC Level", i32 2}
+!11 = !{!"clang version 3.6.0 (trunk 223120) (llvm/trunk 223119)"}
+!12 = !DILocalVariable(name: "x", line: 4, scope: !4, file: !5, type: !13)
+!13 = !DICompositeType(tag: DW_TAG_structure_type, size: 224, flags: DIFlagBlockByrefStruct, file: !1, scope: !5, elements: !14)
+!14 = !{!15, !17, !18, !20, !21}
+!15 = !DIDerivedType(tag: DW_TAG_member, name: "__isa", size: 64, align: 64, file: !1, scope: !5, baseType: !16)
+!16 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "__forwarding", size: 64, align: 64, offset: 64, file: !1, scope: !5, baseType: !16)
+!18 = !DIDerivedType(tag: DW_TAG_member, name: "__flags", size: 32, align: 32, offset: 128, file: !1, scope: !5, baseType: !19)
+!19 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!20 = !DIDerivedType(tag: DW_TAG_member, name: "__size", size: 32, align: 32, offset: 160, file: !1, scope: !5, baseType: !19)
+!21 = !DIDerivedType(tag: DW_TAG_member, name: "x", size: 32, align: 32, offset: 192, file: !1, scope: !5, baseType: !19)
+!22 = !DIExpression(DW_OP_plus, 8, DW_OP_deref, DW_OP_plus, 24)
+!23 = !DILocation(line: 4, column: 15, scope: !4)
+!24 = !DILocation(line: 4, column: 3, scope: !4)
+!25 = !DILocation(line: 5, column: 3, scope: !4)
+!26 = !DILocation(line: 6, column: 1, scope: !4)
diff --git a/test/DebugInfo/Generic/bug_null_debuginfo.ll b/test/DebugInfo/Generic/bug_null_debuginfo.ll
new file mode 100644
index 000000000000..09e36db42b49
--- /dev/null
+++ b/test/DebugInfo/Generic/bug_null_debuginfo.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, isOptimized: false, emissionKind: 0, file: !1, globals: null)
+!1 = !DIFile(filename: "t", directory: "")
+!2 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/constant-pointers.ll b/test/DebugInfo/Generic/constant-pointers.ll
new file mode 100644
index 000000000000..af0b6b0743c0
--- /dev/null
+++ b/test/DebugInfo/Generic/constant-pointers.ll
@@ -0,0 +1,51 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Ensure that pointer constants are emitted as unsigned data. Alternatively,
+; these could be signless data (dataN).
+
+; Built with Clang from:
+; template <void *V, void (*F)(), int i>
+; void func() {}
+; template void func<nullptr, nullptr, 42>();
+
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_TAG_template_value_parameter
+; CHECK: DW_AT_name {{.*}} "V"
+; CHECK: DW_AT_const_value [DW_FORM_udata] (0)
+; CHECK: DW_TAG_template_value_parameter
+; CHECK: DW_AT_name {{.*}} "F"
+; CHECK: DW_AT_const_value [DW_FORM_udata] (0)
+
+; Function Attrs: nounwind uwtable
+define weak_odr void @_Z4funcILPv0ELPFvvE0ELi42EEvv() #0 !dbg !4 {
+entry:
+ ret void, !dbg !18
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !16}
+!llvm.ident = !{!17}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "constant-pointers.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "func<nullptr, nullptr, 42>", linkageName: "_Z4funcILPv0ELPFvvE0ELi42EEvv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, templateParams: !8, variables: !2)
+!5 = !DIFile(filename: "constant-pointers.cpp", directory: "/tmp/dbginfo")
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = !{!9, !11, !13}
+!9 = !DITemplateValueParameter(tag: DW_TAG_template_value_parameter, name: "V", type: !10, value: i8 0)
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
+!11 = !DITemplateValueParameter(tag: DW_TAG_template_value_parameter, name: "F", type: !12, value: i8 0)
+!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !6)
+!13 = !DITemplateValueParameter(tag: DW_TAG_template_value_parameter, name: "i", type: !14, value: i32 42)
+!14 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!15 = !{i32 2, !"Dwarf Version", i32 4}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{!"clang version 3.5.0 "}
+!18 = !DILocation(line: 3, scope: !4)
diff --git a/test/DebugInfo/Generic/constant-sdnodes-have-dbg-location.ll b/test/DebugInfo/Generic/constant-sdnodes-have-dbg-location.ll
new file mode 100644
index 000000000000..833774185182
--- /dev/null
+++ b/test/DebugInfo/Generic/constant-sdnodes-have-dbg-location.ll
@@ -0,0 +1,26 @@
+; RUN: llc -debug -dag-dump-verbose < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; CHECK: t{{[0-9]+}}: i32 = Constant<-1>test.c:4:5
+
+define i32 @main() !dbg !4 {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval
+ ret i32 -1, !dbg !10
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "/home/user/clang-llvm/build")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !DILocation(line: 4, column: 5, scope: !4)
diff --git a/test/DebugInfo/Generic/constantfp-sdnodes-have-dbg-location.ll b/test/DebugInfo/Generic/constantfp-sdnodes-have-dbg-location.ll
new file mode 100644
index 000000000000..6cf9178269c4
--- /dev/null
+++ b/test/DebugInfo/Generic/constantfp-sdnodes-have-dbg-location.ll
@@ -0,0 +1,24 @@
+; RUN: llc -debug -dag-dump-verbose < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; CHECK: t{{[0-9]+}}: f64 = ConstantFP<1.500000e+00>test.c:3:5
+
+define double @f() !dbg !4 {
+entry:
+ ret double 1.500000e+00, !dbg !10
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "/home/user/clang-llvm/build")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !DILocation(line: 3, column: 5, scope: !4)
diff --git a/test/DebugInfo/Generic/cross-cu-inlining.ll b/test/DebugInfo/Generic/cross-cu-inlining.ll
new file mode 100644
index 000000000000..d95b43400607
--- /dev/null
+++ b/test/DebugInfo/Generic/cross-cu-inlining.ll
@@ -0,0 +1,143 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj -dwarf-linkage-names=Enable < %s | llvm-dwarfdump -debug-dump=info - | FileCheck -implicit-check-not=DW_TAG %s
+; RUN: %llc_dwarf -dwarf-accel-tables=Enable -dwarf-linkage-names=Enable -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck --check-prefix=CHECK-ACCEL --check-prefix=CHECK %s
+
+; Build from source:
+; $ clang++ a.cpp b.cpp -g -c -emit-llvm
+; $ llvm-link a.bc b.bc -o ab.bc
+; $ opt -inline ab.bc -o ab-opt.bc
+; $ cat a.cpp
+; extern int i;
+; int func(int);
+; int main() {
+; return func(i);
+; }
+; $ cat b.cpp
+; int __attribute__((always_inline)) func(int x) {
+; return x * 2;
+; }
+
+; Ensure that func inlined into main is described and references the abstract
+; definition in b.cpp's CU.
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_name {{.*}} "a.cpp"
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_AT_type [DW_FORM_ref_addr] (0x00000000[[INT:.*]])
+; CHECK: 0x[[INLINED:[0-9a-f]*]]:{{.*}}DW_TAG_inlined_subroutine
+; CHECK: DW_AT_abstract_origin {{.*}}[[ABS_FUNC:........]] "_Z4funci"
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_abstract_origin {{.*}}[[ABS_VAR:........]] "x"
+
+; Check the abstract definition is in the 'b.cpp' CU and doesn't contain any
+; concrete information (address range or variable location)
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_name {{.*}} "b.cpp"
+; CHECK: 0x[[ABS_FUNC]]: DW_TAG_subprogram
+; CHECK-NOT: DW_AT_low_pc
+; CHECK: 0x[[ABS_VAR]]: DW_TAG_formal_parameter
+; CHECK-NOT: DW_AT_location
+; CHECK: DW_AT_type [DW_FORM_ref4] {{.*}} {0x[[INT]]}
+; CHECK-NOT: DW_AT_location
+
+; CHECK: 0x[[INT]]: DW_TAG_base_type
+; CHECK: DW_AT_name {{.*}} "int"
+
+; Check the concrete out of line definition references the abstract and
+; provides the address range and variable location
+; CHECK: 0x[[FUNC:[0-9a-f]*]]{{.*}}DW_TAG_subprogram
+; CHECK: DW_AT_low_pc
+; CHECK: DW_AT_abstract_origin {{.*}} {0x[[ABS_FUNC]]} "_Z4funci"
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_location
+; CHECK: DW_AT_abstract_origin {{.*}} {0x[[ABS_VAR]]} "x"
+
+; Check that both the inline and the non out of line version of func are
+; correctly referenced in the accelerator table. Before r221837, the one
+; in the second compilation unit had a wrong offset
+; CHECK-ACCEL: .apple_names contents:
+; CHECK-ACCEL: Name{{.*}}"func"
+; CHECK-ACCEL-NOT: Name
+; CHECK-ACCEL: Atom[0]{{.*}}[[INLINED]]
+; CHECK-ACCEL-NOT: Name
+; CHECK-ACCEL: Atom[0]{{.*}}[[FUNC]]
+
+@i = external global i32
+
+; Function Attrs: uwtable
+define i32 @main() #0 !dbg !4 {
+entry:
+ %x.addr.i = alloca i32, align 4
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = load i32, i32* @i, align 4, !dbg !19
+ %1 = bitcast i32* %x.addr.i to i8*
+ call void @llvm.lifetime.start(i64 4, i8* %1)
+ store i32 %0, i32* %x.addr.i, align 4
+ call void @llvm.dbg.declare(metadata i32* %x.addr.i, metadata !120, metadata !DIExpression()), !dbg !21
+ %2 = load i32, i32* %x.addr.i, align 4, !dbg !22
+ %mul.i = mul nsw i32 %2, 2, !dbg !22
+ %3 = bitcast i32* %x.addr.i to i8*, !dbg !22
+ call void @llvm.lifetime.end(i64 4, i8* %3), !dbg !22
+ ret i32 %mul.i, !dbg !19
+}
+
+; Function Attrs: alwaysinline nounwind uwtable
+define i32 @_Z4funci(i32 %x) #1 !dbg !12 {
+entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !20, metadata !DIExpression()), !dbg !23
+ %0 = load i32, i32* %x.addr, align 4, !dbg !24
+ %mul = mul nsw i32 %0, 2, !dbg !24
+ ret i32 %mul, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #3
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0, !9}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18, !18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !10, enums: !2, retainedTypes: !2, subprograms: !11, globals: !2, imports: !2)
+!10 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
+!11 = !{!12}
+!12 = distinct !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !10, scope: !13, type: !14, variables: !2)
+!13 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
+!14 = !DISubroutineType(types: !15)
+!15 = !{!8, !8}
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 3}
+!18 = !{!"clang version 3.5.0 "}
+!19 = !DILocation(line: 4, scope: !4)
+!20 = !DILocalVariable(name: "x", line: 1, arg: 1, scope: !12, file: !13, type: !8)
+
+!120 = !DILocalVariable(name: "x", line: 1, arg: 1, scope: !12, file: !13, type: !8)
+
+!21 = !DILocation(line: 1, scope: !12, inlinedAt: !19)
+!22 = !DILocation(line: 2, scope: !12, inlinedAt: !19)
+!23 = !DILocation(line: 1, scope: !12)
+!24 = !DILocation(line: 2, scope: !12)
+
diff --git a/test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll b/test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll
new file mode 100644
index 000000000000..1713ea112d44
--- /dev/null
+++ b/test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll
@@ -0,0 +1,95 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Testing that two distinct (distinct by writing them in separate files, while
+; still fulfilling C++'s ODR by having identical token sequences) functions,
+; linked under LTO, get plausible debug info (and don't crash).
+
+; Built from source:
+; $ clang++ a.cpp b.cpp -g -c -emit-llvm
+; $ llvm-link a.bc b.bc -o ab.bc
+
+; This change is intended to tickle a case where the subprogram MDNode
+; associated with the llvm::Function will differ from the subprogram
+; referenced by the DbgLocs in the function.
+
+; $ sed -ie "s/!12, !0/!0, !12/" ab.ll
+; $ cat a.cpp
+; inline int func(int i) {
+; return i * 2;
+; }
+; int (*x)(int) = &func;
+; $ cat b.cpp
+; inline int func(int i) {
+; return i * 2;
+; }
+; int (*y)(int) = &func;
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "func"
+; CHECK: DW_TAG_compile_unit
+
+; FIXME: Maybe we should drop the subprogram here - since the function was
+; emitted in one CU, due to linkonce_odr uniquing. We certainly don't emit the
+; subprogram here if the source location for this definition is the same (see
+; test/DebugInfo/cross-cu-linkonce.ll), though it's very easy to tickle that
+; into failing even without duplicating the source as has been done in this
+; case (two cpp files in different directories, including the same header that
+; contains an inline function - clang will produce distinct subprogram metadata
+; that won't deduplicate owing to the file location information containing the
+; directory of the source file even though the file name is absolute, not
+; relative)
+
+; CHECK: DW_TAG_subprogram
+
+@x = global i32 (i32)* @_Z4funci, align 8
+@y = global i32 (i32)* @_Z4funci, align 8
+
+; Function Attrs: inlinehint nounwind uwtable
+define linkonce_odr i32 @_Z4funci(i32 %i) #0 !dbg !4 {
+ %1 = alloca i32, align 4
+ store i32 %i, i32* %1, align 4
+ call void @llvm.dbg.declare(metadata i32* %1, metadata !22, metadata !DIExpression()), !dbg !23
+ %2 = load i32, i32* %1, align 4, !dbg !24
+ %3 = mul nsw i32 %2, 2, !dbg !24
+ ret i32 %3, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!12, !0}
+!llvm.module.flags = !{!19, !20}
+!llvm.ident = !{!21, !21}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
+!1 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8, !8}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{!10}
+!10 = !DIGlobalVariable(name: "x", line: 4, isLocal: false, isDefinition: true, scope: null, file: !5, type: !11, variable: i32 (i32)** @x)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !6)
+!12 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !13, enums: !2, retainedTypes: !2, subprograms: !14, globals: !17, imports: !2)
+!13 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
+!14 = !{!15}
+!15 = distinct !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !13, scope: !16, type: !6, variables: !2)
+!16 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
+!17 = !{!18}
+!18 = !DIGlobalVariable(name: "y", line: 4, isLocal: false, isDefinition: true, scope: null, file: !16, type: !11, variable: i32 (i32)** @y)
+!19 = !{i32 2, !"Dwarf Version", i32 4}
+!20 = !{i32 1, !"Debug Info Version", i32 3}
+!21 = !{!"clang version 3.5.0 "}
+!22 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!23 = !DILocation(line: 1, scope: !4)
+!24 = !DILocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/Generic/cross-cu-linkonce.ll b/test/DebugInfo/Generic/cross-cu-linkonce.ll
new file mode 100644
index 000000000000..983871a1d0f1
--- /dev/null
+++ b/test/DebugInfo/Generic/cross-cu-linkonce.ll
@@ -0,0 +1,73 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Built from source:
+; $ clang++ a.cpp b.cpp -g -c -emit-llvm
+; $ llvm-link a.bc b.bc -o ab.bc
+; $ cat a.cpp
+; # 1 "func.h"
+; inline int func(int i) {
+; return i * 2;
+; }
+; int (*x)(int) = &func;
+; $ cat b.cpp
+; # 1 "func.h"
+; inline int func(int i) {
+; return i * 2;
+; }
+; int (*y)(int) = &func;
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "func"
+; CHECK: DW_TAG_compile_unit
+; CHECK-NOT: DW_TAG_subprogram
+
+@x = global i32 (i32)* @_Z4funci, align 8
+@y = global i32 (i32)* @_Z4funci, align 8
+
+; Function Attrs: inlinehint nounwind uwtable
+define linkonce_odr i32 @_Z4funci(i32 %i) #0 !dbg !4 {
+ %1 = alloca i32, align 4
+ store i32 %i, i32* %1, align 4
+ call void @llvm.dbg.declare(metadata i32* %1, metadata !20, metadata !DIExpression()), !dbg !21
+ %2 = load i32, i32* %1, align 4, !dbg !22
+ %3 = mul nsw i32 %2, 2, !dbg !22
+ ret i32 %3, !dbg !22
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0, !13}
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19, !19}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !10, imports: !2)
+!1 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !5, scope: !6, type: !7, variables: !2)
+!5 = !DIFile(filename: "func.h", directory: "/tmp/dbginfo")
+!6 = !DIFile(filename: "func.h", directory: "/tmp/dbginfo")
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9}
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !{!11}
+!11 = !DIGlobalVariable(name: "x", line: 4, isLocal: false, isDefinition: true, scope: null, file: !6, type: !12, variable: i32 (i32)** @x)
+!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !7)
+!13 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !14, enums: !2, retainedTypes: !2, subprograms: !3, globals: !15, imports: !2)
+!14 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
+!15 = !{!16}
+!16 = !DIGlobalVariable(name: "y", line: 4, isLocal: false, isDefinition: true, scope: null, file: !6, type: !12, variable: i32 (i32)** @y)
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 1, !"Debug Info Version", i32 3}
+!19 = !{!"clang version 3.5.0 "}
+!20 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !4, file: !6, type: !9)
+!21 = !DILocation(line: 1, scope: !4)
+!22 = !DILocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/Generic/cu-range-hole.ll b/test/DebugInfo/Generic/cu-range-hole.ll
new file mode 100644
index 000000000000..12651a844ef1
--- /dev/null
+++ b/test/DebugInfo/Generic/cu-range-hole.ll
@@ -0,0 +1,74 @@
+; REQUIRES: object-emission
+; RUN: %llc_dwarf -O0 -filetype=obj %s -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Check that we emit ranges for this CU since we have a function with and
+; without debug info.
+; Note: This depends upon the order of output in the .o file. Currently it's
+; in order of the output to make sure that the CU has multiple ranges since
+; there's a function in the middle. If they were together then it would have
+; a single range and no DW_AT_ranges.
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_ranges
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
+
+; Function Attrs: nounwind uwtable
+define i32 @b(i32 %c) #0 !dbg !5 {
+entry:
+ %c.addr = alloca i32, align 4
+ store i32 %c, i32* %c.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %c.addr, metadata !13, metadata !DIExpression()), !dbg !14
+ %0 = load i32, i32* %c.addr, align 4, !dbg !14
+ %add = add nsw i32 %0, 1, !dbg !14
+ ret i32 %add, !dbg !14
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @a(i32 %b) #0 {
+entry:
+ %b.addr = alloca i32, align 4
+ store i32 %b, i32* %b.addr, align 4
+ %0 = load i32, i32* %b.addr, align 4
+ %add = add nsw i32 %0, 1
+ ret i32 %add
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @d(i32 %e) #0 !dbg !10 {
+entry:
+ %e.addr = alloca i32, align 4
+ store i32 %e, i32* %e.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %e.addr, metadata !15, metadata !DIExpression()), !dbg !16
+ %0 = load i32, i32* %e.addr, align 4, !dbg !16
+ %add = add nsw i32 %0, 1, !dbg !16
+ ret i32 %add, !dbg !16
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.ident = !{!0, !0}
+!llvm.dbg.cu = !{!1}
+!llvm.module.flags = !{!11, !12}
+
+!0 = !{!"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !2, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3, imports: !3)
+!2 = !DIFile(filename: "b.c", directory: "/usr/local/google/home/echristo")
+!3 = !{}
+!4 = !{!5, !10}
+!5 = distinct !DISubprogram(name: "b", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !2, scope: !6, type: !7, variables: !3)
+!6 = !DIFile(filename: "b.c", directory: "/usr/local/google/home/echristo")
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9}
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = distinct !DISubprogram(name: "d", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !2, scope: !6, type: !7, variables: !3)
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 1, !"Debug Info Version", i32 3}
+!13 = !DILocalVariable(name: "c", line: 1, arg: 1, scope: !5, file: !6, type: !9)
+!14 = !DILocation(line: 1, scope: !5)
+!15 = !DILocalVariable(name: "e", line: 3, arg: 1, scope: !10, file: !6, type: !9)
+!16 = !DILocation(line: 3, scope: !10)
diff --git a/test/DebugInfo/Generic/cu-ranges.ll b/test/DebugInfo/Generic/cu-ranges.ll
new file mode 100644
index 000000000000..ab5577984e69
--- /dev/null
+++ b/test/DebugInfo/Generic/cu-ranges.ll
@@ -0,0 +1,71 @@
+; REQUIRES: object-emission
+; RUN: %llc_dwarf -O0 -filetype=obj %s -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Check that we emit ranges for this which has a non-traditional section and a normal section.
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_ranges
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_AT_low_pc
+; CHECK: DW_AT_high_pc
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_AT_low_pc
+; CHECK: DW_AT_high_pc
+
+; CHECK: .debug_ranges contents:
+; FIXME: When we get better dumping facilities we'll want to elaborate here.
+; CHECK: 00000000 <End of list>
+
+; Function Attrs: nounwind uwtable
+define i32 @foo(i32 %a) #0 section "__TEXT,__foo" !dbg !4 {
+entry:
+ %a.addr = alloca i32, align 4
+ store i32 %a, i32* %a.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !13, metadata !DIExpression()), !dbg !14
+ %0 = load i32, i32* %a.addr, align 4, !dbg !15
+ %add = add nsw i32 %0, 5, !dbg !15
+ ret i32 %add, !dbg !15
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %a) #0 !dbg !9 {
+entry:
+ %a.addr = alloca i32, align 4
+ store i32 %a, i32* %a.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !16, metadata !DIExpression()), !dbg !17
+ %0 = load i32, i32* %a.addr, align 4, !dbg !18
+ %add = add nsw i32 %0, 5, !dbg !18
+ ret i32 %add, !dbg !18
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo")
+!2 = !{}
+!3 = !{!4, !9}
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8, !8}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = distinct !DISubprogram(name: "bar", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 1, !"Debug Info Version", i32 3}
+!12 = !{!"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
+!13 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!14 = !DILocation(line: 1, scope: !4)
+!15 = !DILocation(line: 2, scope: !4)
+!16 = !DILocalVariable(name: "a", line: 5, arg: 1, scope: !9, file: !5, type: !8)
+!17 = !DILocation(line: 5, scope: !9)
+!18 = !DILocation(line: 6, scope: !9)
+
diff --git a/test/DebugInfo/Generic/dbg-at-specficiation.ll b/test/DebugInfo/Generic/dbg-at-specficiation.ll
new file mode 100644
index 000000000000..7302aaecb93f
--- /dev/null
+++ b/test/DebugInfo/Generic/dbg-at-specficiation.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s | FileCheck %s
+; Radar 10147769
+; Do not unnecessarily use AT_specification DIE.
+; CHECK-NOT: AT_specification
+
+@a = common global [10 x i32] zeroinitializer, align 16
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 140253)", isOptimized: true, emissionKind: 0, file: !11, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3)
+!2 = !{}
+!3 = !{!5}
+!5 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: [10 x i32]* @a)
+!6 = !DIFile(filename: "x.c", directory: "/private/tmp")
+!7 = !DICompositeType(tag: DW_TAG_array_type, size: 320, align: 32, baseType: !8, elements: !9)
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{!10}
+!10 = !DISubrange(count: 10)
+!11 = !DIFile(filename: "x.c", directory: "/private/tmp")
+!12 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/dead-argument-order.ll b/test/DebugInfo/Generic/dead-argument-order.ll
new file mode 100644
index 000000000000..a6451a038cdb
--- /dev/null
+++ b/test/DebugInfo/Generic/dead-argument-order.ll
@@ -0,0 +1,81 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Built from the following source with clang -O1
+; struct S { int i; };
+; int function(struct S s, int i) { return s.i + i; }
+
+; Due to the X86_64 ABI, 's' is passed in registers and once optimized, the
+; entirety of 's' is never reconstituted, since only the int is required, and
+; thus the variable's location is unknown/dead to debug info.
+
+; Future/current work should enable us to describe partial variables, which, in
+; this case, happens to be the entire variable.
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "function"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "s"
+; CHECK-NOT: DW_TAG
+; FIXME: Even though 's' is never reconstituted into a struct, the one member
+; variable is still live and used, and so we should be able to describe 's's
+; location as the location of that int.
+; CHECK-NOT: DW_AT_location
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_location
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "i"
+
+
+%struct.S = type { i32 }
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @_Z8function1Si(i32 %s.coerce, i32 %i) #0 !dbg !9 {
+entry:
+ tail call void @llvm.dbg.declare(metadata %struct.S* undef, metadata !14, metadata !DIExpression()), !dbg !20
+ tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !15, metadata !DIExpression()), !dbg !20
+ %add = add nsw i32 %i, %s.coerce, !dbg !20
+ ret i32 %add, !dbg !20
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !8, globals: !2, imports: !2)
+!1 = !DIFile(filename: "dead-argument-order.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", line: 1, size: 32, align: 32, file: !1, elements: !5, identifier: "_ZTS1S")
+!5 = !{!6}
+!6 = !DIDerivedType(tag: DW_TAG_member, name: "i", line: 1, size: 32, align: 32, file: !1, scope: !"_ZTS1S", baseType: !7)
+!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9}
+!9 = distinct !DISubprogram(name: "function", linkageName: "_Z8function1Si", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !10, type: !11, variables: !13)
+!10 = !DIFile(filename: "dead-argument-order.cpp", directory: "/tmp/dbginfo")
+!11 = !DISubroutineType(types: !12)
+!12 = !{!7, !4, !7}
+!13 = !{!14, !15}
+!14 = !DILocalVariable(name: "s", line: 2, arg: 1, scope: !9, file: !10, type: !"_ZTS1S")
+!15 = !DILocalVariable(name: "i", line: 2, arg: 2, scope: !9, file: !10, type: !7)
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 3}
+!18 = !{!"clang version 3.5.0 "}
+!19 = !{%struct.S* undef}
+!20 = !DILocation(line: 2, scope: !9)
+
diff --git a/test/DebugInfo/debug-info-always-inline.ll b/test/DebugInfo/Generic/debug-info-always-inline.ll
index 5eaa67522181..5eaa67522181 100644
--- a/test/DebugInfo/debug-info-always-inline.ll
+++ b/test/DebugInfo/Generic/debug-info-always-inline.ll
diff --git a/test/DebugInfo/Generic/debug-info-qualifiers.ll b/test/DebugInfo/Generic/debug-info-qualifiers.ll
new file mode 100644
index 000000000000..aa197dd9f6fa
--- /dev/null
+++ b/test/DebugInfo/Generic/debug-info-qualifiers.ll
@@ -0,0 +1,98 @@
+; REQUIRES: object-emission
+; Test (r)value qualifiers on C++11 non-static member functions.
+; Generated from tools/clang/test/CodeGenCXX/debug-info-qualifiers.cpp
+;
+; class A {
+; public:
+; void l() const &;
+; void r() const &&;
+; };
+;
+; void g() {
+; A a;
+; auto pl = &A::l;
+; auto pr = &A::r;
+; }
+;
+; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump - | FileCheck %s
+; CHECK: DW_TAG_subroutine_type DW_CHILDREN_yes
+; CHECK-NEXT: DW_AT_reference DW_FORM_flag_present
+; CHECK: DW_TAG_subroutine_type DW_CHILDREN_yes
+; CHECK-NEXT: DW_AT_rvalue_reference DW_FORM_flag_present
+;
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG_subprogram
+; CHECK: DW_AT_name {{.*}}"l"
+; CHECK-NOT: DW_TAG_subprogram
+; CHECK: DW_AT_reference [DW_FORM_flag_present] (true)
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG_subprogram
+; CHECK: DW_AT_name {{.*}}"r"
+; CHECK-NOT: DW_TAG_subprogram
+; CHECK: DW_AT_rvalue_reference [DW_FORM_flag_present] (true)
+
+%class.A = type { i8 }
+
+; Function Attrs: nounwind
+define void @_Z1gv() #0 !dbg !17 {
+ %a = alloca %class.A, align 1
+ %pl = alloca { i64, i64 }, align 8
+ %pr = alloca { i64, i64 }, align 8
+ call void @llvm.dbg.declare(metadata %class.A* %a, metadata !24, metadata !DIExpression()), !dbg !25
+ call void @llvm.dbg.declare(metadata { i64, i64 }* %pl, metadata !26, metadata !DIExpression()), !dbg !31
+ store { i64, i64 } { i64 ptrtoint (void (%class.A*)* @_ZNKR1A1lEv to i64), i64 0 }, { i64, i64 }* %pl, align 8, !dbg !31
+ call void @llvm.dbg.declare(metadata { i64, i64 }* %pr, metadata !32, metadata !DIExpression()), !dbg !35
+ store { i64, i64 } { i64 ptrtoint (void (%class.A*)* @_ZNKO1A1rEv to i64), i64 0 }, { i64, i64 }* %pr, align 8, !dbg !35
+ ret void, !dbg !36
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @_ZNKR1A1lEv(%class.A*)
+
+declare void @_ZNKO1A1rEv(%class.A*)
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21, !22}
+!llvm.ident = !{!23}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !16, globals: !2, imports: !2)
+!1 = !DIFile(filename: "debug-info-qualifiers.cpp", directory: "")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 2, size: 8, align: 8, file: !5, elements: !6, identifier: "_ZTS1A")
+!5 = !DIFile(filename: "debug-info-qualifiers.cpp", directory: "")
+!6 = !{!7, !13}
+!7 = !DISubprogram(name: "l", linkageName: "_ZNKR1A1lEv", line: 5, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped | DIFlagLValueReference, isOptimized: false, scopeLine: 5, file: !5, scope: !"_ZTS1A", type: !8)
+!8 = !DISubroutineType(flags: DIFlagLValueReference, types: !9)
+!9 = !{null, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !11)
+!11 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !"_ZTS1A")
+!13 = !DISubprogram(name: "r", linkageName: "_ZNKO1A1rEv", line: 7, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagObjectPointer | DIFlagRValueReference, isOptimized: false, scopeLine: 7, file: !5, scope: !"_ZTS1A", type: !14)
+!14 = !DISubroutineType(flags: DIFlagRValueReference, types: !9)
+!16 = !{!17}
+!17 = distinct !DISubprogram(name: "g", linkageName: "_Z1gv", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !5, scope: !18, type: !19, variables: !2)
+!18 = !DIFile(filename: "debug-info-qualifiers.cpp", directory: "")
+!19 = !DISubroutineType(types: !20)
+!20 = !{null}
+!21 = !{i32 2, !"Dwarf Version", i32 4}
+!22 = !{i32 1, !"Debug Info Version", i32 3}
+!23 = !{!"clang version 3.5 "}
+!24 = !DILocalVariable(name: "a", line: 11, scope: !17, file: !18, type: !4)
+!25 = !DILocation(line: 11, scope: !17)
+!26 = !DILocalVariable(name: "pl", line: 16, scope: !17, file: !18, type: !27)
+!27 = !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: !28, extraData: !"_ZTS1A")
+!28 = !DISubroutineType(flags: DIFlagLValueReference, types: !29)
+!29 = !{null, !30}
+!30 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
+!31 = !DILocation(line: 16, scope: !17)
+!32 = !DILocalVariable(name: "pr", line: 21, scope: !17, file: !18, type: !33)
+!33 = !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: !34, extraData: !"_ZTS1A")
+!34 = !DISubroutineType(flags: DIFlagRValueReference, types: !29)
+!35 = !DILocation(line: 21, scope: !17)
+!36 = !DILocation(line: 22, scope: !17)
diff --git a/test/DebugInfo/Generic/debuginfofinder-forward-declaration.ll b/test/DebugInfo/Generic/debuginfofinder-forward-declaration.ll
new file mode 100644
index 000000000000..62c151bc8d76
--- /dev/null
+++ b/test/DebugInfo/Generic/debuginfofinder-forward-declaration.ll
@@ -0,0 +1,42 @@
+; RUN: opt -analyze -module-debuginfo < %s | FileCheck %s
+
+
+; This module is generated from the following c-code:
+;
+; > union X;
+; >
+; > struct Y {
+; > union X *x;
+; > };
+; >
+; > struct Y y;
+
+
+; CHECK: Type: Y from /tmp/minimal.c:3 DW_TAG_structure_type
+; CHECK: Type: x from /tmp/minimal.c:4 DW_TAG_member
+; CHECK: Type: DW_TAG_pointer_type
+; CHECK: Type: X from /tmp/minimal.c:1 DW_TAG_structure_type
+
+
+%struct.Y = type { %struct.X* }
+%struct.X = type opaque
+
+@y = common global %struct.Y zeroinitializer, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (http://llvm.org/git/clang.git 247b30a043eb8f39ea3708e7e995089da0a6b00f) (http://llvm.org/git/llvm.git 6ecc7365a89c771fd229bdd9ffcc178684ea1aa5)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!1 = !DIFile(filename: "minimal.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIGlobalVariable(name: "y", scope: !0, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, variable: %struct.Y* @y)
+!5 = !DICompositeType(tag: DW_TAG_structure_type, name: "Y", file: !1, line: 3, size: 64, align: 64, elements: !6)
+!6 = !{!7}
+!7 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !5, file: !1, line: 4, baseType: !8, size: 64, align: 64)
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64)
+!9 = !DICompositeType(tag: DW_TAG_structure_type, name: "X", file: !1, line: 1, flags: DIFlagFwdDecl)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{!"clang version 3.7.0 (http://llvm.org/git/clang.git 247b30a043eb8f39ea3708e7e995089da0a6b00f) (http://llvm.org/git/llvm.git 6ecc7365a89c771fd229bdd9ffcc178684ea1aa5)"}
diff --git a/test/DebugInfo/Generic/debuginfofinder-multiple-cu.ll b/test/DebugInfo/Generic/debuginfofinder-multiple-cu.ll
new file mode 100644
index 000000000000..a4d226b86a69
--- /dev/null
+++ b/test/DebugInfo/Generic/debuginfofinder-multiple-cu.ll
@@ -0,0 +1,41 @@
+; RUN: opt -analyze -module-debuginfo < %s | FileCheck %s
+
+; Produced from linking:
+; /tmp/test1.c containing f()
+; /tmp/test2.c containing g()
+
+; Verify that both compile units and both their contained functions are
+; listed by DebugInfoFinder:
+;CHECK: Compile unit: DW_LANG_C99 from /tmp/test1.c
+;CHECK: Compile unit: DW_LANG_C99 from /tmp/test2.c
+;CHECK: Subprogram: f from /tmp/test1.c:1
+;CHECK: Subprogram: g from /tmp/test2.c:1
+
+define void @f() !dbg !4 {
+ ret void, !dbg !14
+}
+
+define void @g() !dbg !11 {
+ ret void, !dbg !15
+}
+
+!llvm.dbg.cu = !{!0, !8}
+!llvm.module.flags = !{!13, !16}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (192092)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "test1.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "test1.c", directory: "/tmp")
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (192092)", isOptimized: false, emissionKind: 0, file: !9, enums: !2, retainedTypes: !2, subprograms: !10, globals: !2, imports: !2)
+!9 = !DIFile(filename: "test2.c", directory: "/tmp")
+!10 = !{!11}
+!11 = distinct !DISubprogram(name: "g", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !9, scope: !12, type: !6, variables: !2)
+!12 = !DIFile(filename: "test2.c", directory: "/tmp")
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !DILocation(line: 1, scope: !4)
+!15 = !DILocation(line: 1, scope: !11)
+!16 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/def-line.ll b/test/DebugInfo/Generic/def-line.ll
new file mode 100644
index 000000000000..3ab7feaf9545
--- /dev/null
+++ b/test/DebugInfo/Generic/def-line.ll
@@ -0,0 +1,93 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Given the following source, ensure that the decl_line/file is correctly
+; emitted and omitted on definitions if it mismatches/matches the declaration
+
+; struct foo {
+; static void f1() {
+; }
+; static void f2();
+; static void f3();
+; };
+; void foo::f2() {
+; f1(); // just to ensure f1 is emitted
+; }
+; #line 1 "bar.cpp"
+; void foo::f3() {
+; }
+
+; Skip the declarations
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: {{DW_TAG|NULL|DW_AT_decl_file}}
+; CHECK: DW_AT_decl_line {{.*}}7
+; CHECK-NOT: {{DW_TAG|NULL|DW_AT_decl_file}}
+; CHECK: DW_AT_specification {{.*}}f2
+; CHECK-NOT: {{DW_TAG|NULL|DW_AT_decl_file}}
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: {{DW_TAG|NULL|DW_AT_decl_line|DW_AT_decl_file}}
+; CHECK: DW_AT_specification {{.*}}f1
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_AT_decl_file {{.*}}bar.cpp
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_AT_decl_line {{.*}}1
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_AT_specification {{.*}}f3
+
+; Function Attrs: uwtable
+define void @_ZN3foo2f2Ev() #0 align 2 !dbg !12 {
+entry:
+ call void @_ZN3foo2f1Ev(), !dbg !19
+ ret void, !dbg !20
+}
+
+; Function Attrs: nounwind uwtable
+define linkonce_odr void @_ZN3foo2f1Ev() #1 align 2 !dbg !15 {
+entry:
+ ret void, !dbg !21
+}
+
+; Function Attrs: nounwind uwtable
+define void @_ZN3foo2f3Ev() #1 align 2 !dbg !13 {
+entry:
+ ret void, !dbg !22
+}
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 249440) (llvm/trunk 249465)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !11)
+!1 = !DIFile(filename: "def-line.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", file: !1, line: 1, size: 8, align: 8, elements: !5, identifier: "_ZTS3foo")
+!5 = !{!6, !9, !10}
+!6 = !DISubprogram(name: "f1", linkageName: "_ZN3foo2f1Ev", scope: !"_ZTS3foo", file: !1, line: 2, type: !7, isLocal: false, isDefinition: false, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false)
+!7 = !DISubroutineType(types: !8)
+!8 = !{null}
+!9 = !DISubprogram(name: "f2", linkageName: "_ZN3foo2f2Ev", scope: !"_ZTS3foo", file: !1, line: 4, type: !7, isLocal: false, isDefinition: false, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false)
+!10 = !DISubprogram(name: "f3", linkageName: "_ZN3foo2f3Ev", scope: !"_ZTS3foo", file: !1, line: 5, type: !7, isLocal: false, isDefinition: false, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false)
+!11 = !{!12, !13, !15}
+!12 = distinct !DISubprogram(name: "f2", linkageName: "_ZN3foo2f2Ev", scope: !"_ZTS3foo", file: !1, line: 7, type: !7, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, declaration: !9, variables: !2)
+!13 = distinct !DISubprogram(name: "f3", linkageName: "_ZN3foo2f3Ev", scope: !"_ZTS3foo", file: !14, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, declaration: !10, variables: !2)
+!14 = !DIFile(filename: "bar.cpp", directory: "/tmp/dbginfo")
+!15 = distinct !DISubprogram(name: "f1", linkageName: "_ZN3foo2f1Ev", scope: !"_ZTS3foo", file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, declaration: !6, variables: !2)
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 3}
+!18 = !{!"clang version 3.8.0 (trunk 249440) (llvm/trunk 249465)"}
+!19 = !DILocation(line: 8, column: 3, scope: !12)
+!20 = !DILocation(line: 9, column: 1, scope: !12)
+!21 = !DILocation(line: 3, column: 3, scope: !15)
+!22 = !DILocation(line: 2, column: 1, scope: !13)
diff --git a/test/DebugInfo/Generic/discriminator.ll b/test/DebugInfo/Generic/discriminator.ll
new file mode 100644
index 000000000000..6dbd558c5025
--- /dev/null
+++ b/test/DebugInfo/Generic/discriminator.ll
@@ -0,0 +1,52 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Given the following source, ensure that the discriminator is emitted for
+; the inlined callsite.
+
+;void xyz();
+;static void __attribute__((always_inline)) bar() { xyz(); }
+;void foo() {
+; bar(); bar();
+;}
+
+;CHECK: DW_TAG_inlined_subroutine
+;CHECK-NOT: DW_AT_GNU_discriminator
+;CHECK: DW_TAG_inlined_subroutine
+;CHECK-NOT: {{DW_TAG|NULL}}
+;CHECK: DW_AT_GNU_discriminator{{.*}}0x01
+
+; Function Attrs: uwtable
+define void @_Z3foov() #0 !dbg !4 {
+ tail call void @_Z3xyzv(), !dbg !11
+ tail call void @_Z3xyzv(), !dbg !13
+ ret void, !dbg !16
+}
+
+declare void @_Z3xyzv() #1
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 252497)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "a.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4, !7}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = distinct !DISubprogram(name: "bar", linkageName: "_ZL3barv", scope: !1, file: !1, line: 2, type: !5, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 252497)"}
+!11 = !DILocation(line: 2, column: 52, scope: !7, inlinedAt: !12)
+!12 = distinct !DILocation(line: 4, column: 3, scope: !4)
+!13 = !DILocation(line: 2, column: 52, scope: !7, inlinedAt: !14)
+!14 = distinct !DILocation(line: 4, column: 10, scope: !15)
+!15 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 1)
+!16 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/DebugInfo/Generic/dwarf-public-names.ll b/test/DebugInfo/Generic/dwarf-public-names.ll
new file mode 100644
index 000000000000..b98a8e7415c7
--- /dev/null
+++ b/test/DebugInfo/Generic/dwarf-public-names.ll
@@ -0,0 +1,131 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -generate-dwarf-pub-sections=Enable -filetype=obj -o %t.o < %s
+; RUN: llvm-dwarfdump -debug-dump=pubnames %t.o | FileCheck %s
+; ModuleID = 'dwarf-public-names.cpp'
+;
+; Generated from:
+;
+; struct C {
+; void member_function();
+; static int static_member_function();
+; static int static_member_variable;
+; };
+;
+; int C::static_member_variable = 0;
+;
+; void C::member_function() {
+; static_member_variable = 0;
+; }
+;
+; int C::static_member_function() {
+; return static_member_variable;
+; }
+;
+; C global_variable;
+;
+; int global_function() {
+; return -1;
+; }
+;
+; namespace ns {
+; void global_namespace_function() {
+; global_variable.member_function();
+; }
+; int global_namespace_variable = 1;
+; }
+
+; Skip the output to the header of the pubnames section.
+; CHECK: debug_pubnames
+; CHECK: version = 0x0002
+
+; Check for each name in the output.
+; CHECK-DAG: "ns"
+; CHECK-DAG: "C::static_member_function"
+; CHECK-DAG: "global_variable"
+; CHECK-DAG: "ns::global_namespace_variable"
+; CHECK-DAG: "ns::global_namespace_function"
+; CHECK-DAG: "global_function"
+; CHECK-DAG: "C::static_member_variable"
+; CHECK-DAG: "C::member_function"
+
+%struct.C = type { i8 }
+
+@_ZN1C22static_member_variableE = global i32 0, align 4
+@global_variable = global %struct.C zeroinitializer, align 1
+@_ZN2ns25global_namespace_variableE = global i32 1, align 4
+
+define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2 !dbg !3 {
+entry:
+ %this.addr = alloca %struct.C*, align 8
+ store %struct.C* %this, %struct.C** %this.addr, align 8
+ call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !28, metadata !DIExpression()), !dbg !30
+ %this1 = load %struct.C*, %struct.C** %this.addr
+ store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !31
+ ret void, !dbg !32
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 !dbg !18 {
+entry:
+ %0 = load i32, i32* @_ZN1C22static_member_variableE, align 4, !dbg !33
+ ret i32 %0, !dbg !33
+}
+
+define i32 @_Z15global_functionv() nounwind uwtable !dbg !19 {
+entry:
+ ret i32 -1, !dbg !34
+}
+
+define void @_ZN2ns25global_namespace_functionEv() nounwind uwtable !dbg !20 {
+entry:
+ call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !35
+ ret void, !dbg !36
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!38}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", isOptimized: false, emissionKind: 0, file: !37, enums: !1, retainedTypes: !1, subprograms: !2, globals: !24, imports: !1)
+!1 = !{}
+!2 = !{!3, !18, !19, !20}
+!3 = distinct !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !4, scope: null, type: !5, declaration: !12, variables: !1)
+!4 = !DIFile(filename: "dwarf-public-names.cpp", directory: "/usr2/kparzysz/s.hex/t")
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7}
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !8)
+!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 1, size: 8, align: 8, file: !37, elements: !9)
+!9 = !{!10, !12, !14}
+!10 = !DIDerivedType(tag: DW_TAG_member, name: "static_member_variable", line: 4, flags: DIFlagStaticMember, file: !37, scope: !8, baseType: !11)
+!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!12 = !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !4, scope: !8, type: !5, variables: !13)
+!13 = !{} ; previously: invalid DW_TAG_base_type
+!14 = !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !4, scope: !8, type: !15, variables: !17)
+!15 = !DISubroutineType(types: !16)
+!16 = !{!11}
+!17 = !{} ; previously: invalid DW_TAG_base_type
+!18 = distinct !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !4, scope: null, type: !15, declaration: !14, variables: !1)
+!19 = distinct !DISubprogram(name: "global_function", linkageName: "_Z15global_functionv", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !4, scope: !4, type: !15, variables: !1)
+!20 = distinct !DISubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !4, scope: !21, type: !22, variables: !1)
+!21 = !DINamespace(name: "ns", line: 23, file: !4, scope: null)
+!22 = !DISubroutineType(types: !23)
+!23 = !{null}
+!24 = !{!25, !26, !27}
+!25 = !DIGlobalVariable(name: "static_member_variable", linkageName: "_ZN1C22static_member_variableE", line: 7, isLocal: false, isDefinition: true, scope: !8, file: !4, type: !11, variable: i32* @_ZN1C22static_member_variableE, declaration: !10)
+!26 = !DIGlobalVariable(name: "global_variable", line: 17, isLocal: false, isDefinition: true, scope: null, file: !4, type: !8, variable: %struct.C* @global_variable)
+!27 = !DIGlobalVariable(name: "global_namespace_variable", linkageName: "_ZN2ns25global_namespace_variableE", line: 27, isLocal: false, isDefinition: true, scope: !21, file: !4, type: !11, variable: i32* @_ZN2ns25global_namespace_variableE)
+!28 = !DILocalVariable(name: "this", line: 9, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !3, file: !4, type: !29)
+!29 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !8)
+!30 = !DILocation(line: 9, scope: !3)
+!31 = !DILocation(line: 10, scope: !3)
+!32 = !DILocation(line: 11, scope: !3)
+!33 = !DILocation(line: 14, scope: !18)
+!34 = !DILocation(line: 20, scope: !19)
+!35 = !DILocation(line: 25, scope: !20)
+!36 = !DILocation(line: 26, scope: !20)
+!37 = !DIFile(filename: "dwarf-public-names.cpp", directory: "/usr2/kparzysz/s.hex/t")
+!38 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/empty.ll b/test/DebugInfo/Generic/empty.ll
new file mode 100644
index 000000000000..f787039885b9
--- /dev/null
+++ b/test/DebugInfo/Generic/empty.ll
@@ -0,0 +1,31 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump - | FileCheck %s
+; RUN: %llc_dwarf -split-dwarf=Enable < %s -filetype=obj | llvm-dwarfdump - | FileCheck --check-prefix=FISSION %s
+
+; darwin has a workaround for a linker bug so it always emits one line table entry
+; XFAIL: darwin
+
+; Expect no line table entry since there are no functions and file references in this compile unit
+; CHECK: .debug_line contents:
+; CHECK: Line table prologue:
+; CHECK: total_length: 0x00000019
+; CHECK-NOT: file_names[
+
+; CHECK: .debug_pubnames contents:
+; CHECK-NOT: Offset
+
+; CHECK: .debug_pubtypes contents:
+; CHECK-NOT: Offset
+
+; Don't emit DW_AT_addr_base when there are no addresses.
+; FISSION-NOT: DW_AT_GNU_addr_base [DW_FORM_sec_offset]
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143523)", isOptimized: true, emissionKind: 0, file: !4, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2)
+!2 = !{}
+!3 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
+!4 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
+!5 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/enum-types.ll b/test/DebugInfo/Generic/enum-types.ll
new file mode 100644
index 000000000000..e71fcbc52bfb
--- /dev/null
+++ b/test/DebugInfo/Generic/enum-types.ll
@@ -0,0 +1,78 @@
+; REQUIRES: object-emission
+;
+; RUN: %llc_dwarf -filetype=obj -O0 -dwarf-linkage-names=Enable < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Make sure we can handle enums with the same identifier but in enum types of
+; different compile units.
+; rdar://17628609
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: 0x[[ENUM:.*]]: DW_TAG_enumeration_type
+; CHECK-NEXT: DW_AT_name {{.*}} "EA"
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_Z4topA2EA"
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x{{.*}} => {0x[[ENUM]]})
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_Z4topB2EA"
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[ENUM]]
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z4topA2EA(i32 %sa) #0 !dbg !7 {
+entry:
+ %sa.addr = alloca i32, align 4
+ store i32 %sa, i32* %sa.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %sa.addr, metadata !22, metadata !DIExpression()), !dbg !23
+ ret void, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z4topB2EA(i32 %sa) #0 !dbg !17 {
+entry:
+ %sa.addr = alloca i32, align 4
+ store i32 %sa, i32* %sa.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %sa.addr, metadata !25, metadata !DIExpression()), !dbg !26
+ ret void, !dbg !27
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0, !12}
+!llvm.module.flags = !{!19, !20}
+!llvm.ident = !{!21, !21}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !6, globals: !11, imports: !11)
+!1 = !DIFile(filename: "a.cpp", directory: "")
+!2 = !{!3}
+!3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "EA", line: 1, size: 32, align: 32, file: !1, elements: !4, identifier: "_ZTS2EA")
+!4 = !{!5}
+!5 = !DIEnumerator(name: "EA_0", value: 0) ; [ DW_TAG_enumerator ] [EA_0 :: 0]
+!6 = !{!7}
+!7 = distinct !DISubprogram(name: "topA", linkageName: "_Z4topA2EA", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !8, type: !9, variables: !11)
+!8 = !DIFile(filename: "a.cpp", directory: "")
+!9 = !DISubroutineType(types: !10)
+!10 = !{null, !"_ZTS2EA"}
+!11 = !{}
+!12 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)", isOptimized: false, emissionKind: 1, file: !13, enums: !14, retainedTypes: !14, subprograms: !16, globals: !11, imports: !11)
+!13 = !DIFile(filename: "b.cpp", directory: "")
+!14 = !{!15}
+!15 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "EA", line: 1, size: 32, align: 32, file: !13, elements: !4, identifier: "_ZTS2EA")
+!16 = !{!17}
+!17 = distinct !DISubprogram(name: "topB", linkageName: "_Z4topB2EA", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !13, scope: !18, type: !9, variables: !11)
+!18 = !DIFile(filename: "b.cpp", directory: "")
+!19 = !{i32 2, !"Dwarf Version", i32 2}
+!20 = !{i32 2, !"Debug Info Version", i32 3}
+!21 = !{!"clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)"}
+!22 = !DILocalVariable(name: "sa", line: 5, arg: 1, scope: !7, file: !8, type: !"_ZTS2EA")
+!23 = !DILocation(line: 5, column: 14, scope: !7)
+!24 = !DILocation(line: 6, column: 1, scope: !7)
+!25 = !DILocalVariable(name: "sa", line: 5, arg: 1, scope: !17, file: !18, type: !"_ZTS2EA")
+!26 = !DILocation(line: 5, column: 14, scope: !17)
+!27 = !DILocation(line: 6, column: 1, scope: !17)
diff --git a/test/DebugInfo/Generic/enum.ll b/test/DebugInfo/Generic/enum.ll
new file mode 100644
index 000000000000..6d1bddc2f728
--- /dev/null
+++ b/test/DebugInfo/Generic/enum.ll
@@ -0,0 +1,80 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; IR generated from the following code compiled with clang -g:
+; enum e1 { I, J = 0xffffffffU, K = 0xf000000000000000ULL } a;
+; enum e2 { X };
+; void func() {
+; int b = X;
+; }
+
+; These values were previously being truncated to -1 and 0 respectively.
+
+; CHECK: debug_info contents
+; CHECK: DW_TAG_enumeration_type
+; CHECK-NEXT: DW_AT_name{{.*}} = "e1"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_enumerator
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_enumerator
+; CHECK-NEXT: DW_AT_name{{.*}} = "J"
+; CHECK-NEXT: DW_AT_const_value [DW_FORM_sdata] (4294967295)
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_enumerator
+; CHECK-NEXT: DW_AT_name{{.*}} = "K"
+; CHECK-NEXT: DW_AT_const_value [DW_FORM_sdata] (-1152921504606846976)
+
+; Check that we retain enums that aren't referenced by any variables, etc
+; CHECK: DW_TAG_enumeration_type
+; CHECK-NEXT: DW_AT_name{{.*}} = "e2"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_enumerator
+; CHECK-NEXT: DW_AT_name{{.*}} = "X"
+
+@a = global i64 0, align 8
+
+; Function Attrs: nounwind uwtable
+define void @_Z4funcv() #0 !dbg !13 {
+entry:
+ %b = alloca i32, align 4
+ call void @llvm.dbg.declare(metadata i32* %b, metadata !20, metadata !DIExpression()), !dbg !22
+ store i32 0, i32* %b, align 4, !dbg !22
+ ret void, !dbg !23
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!19, !24}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !11, subprograms: !12, globals: !17, imports: !11)
+!1 = !DIFile(filename: "enum.cpp", directory: "/tmp")
+!2 = !{!3, !8}
+!3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "e1", line: 1, size: 64, align: 64, file: !1, elements: !4)
+!4 = !{!5, !6, !7}
+!5 = !DIEnumerator(name: "I", value: 0) ; [ DW_TAG_enumerator ] [I :: 0]
+!6 = !DIEnumerator(name: "J", value: 4294967295) ; [ DW_TAG_enumerator ] [J :: 4294967295]
+!7 = !DIEnumerator(name: "K", value: -1152921504606846976) ; [ DW_TAG_enumerator ] [K :: 17293822569102704640]
+!8 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "e2", line: 2, size: 32, align: 32, file: !1, elements: !9)
+!9 = !{!10}
+!10 = !DIEnumerator(name: "X", value: 0) ; [ DW_TAG_enumerator ] [X :: 0]
+!11 = !{}
+!12 = !{!13}
+!13 = distinct !DISubprogram(name: "func", linkageName: "_Z4funcv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !14, type: !15, variables: !11)
+!14 = !DIFile(filename: "enum.cpp", directory: "/tmp")
+!15 = !DISubroutineType(types: !16)
+!16 = !{null}
+!17 = !{!18}
+!18 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !14, type: !3, variable: i64* @a)
+!19 = !{i32 2, !"Dwarf Version", i32 3}
+!20 = !DILocalVariable(name: "b", line: 4, scope: !13, file: !14, type: !21)
+!21 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!22 = !DILocation(line: 4, scope: !13)
+!23 = !DILocation(line: 5, scope: !13)
+!24 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/global.ll b/test/DebugInfo/Generic/global.ll
new file mode 100644
index 000000000000..6cac9a2de05e
--- /dev/null
+++ b/test/DebugInfo/Generic/global.ll
@@ -0,0 +1,42 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Also test that the null streamer doesn't crash with debug info.
+; RUN: %llc_dwarf -O0 -filetype=null < %s
+
+; generated from the following source compiled to bitcode with clang -g -O1
+; static int i;
+; int main() {
+; (void)&i;
+; }
+
+; CHECK: debug_info contents
+; CHECK: DW_TAG_variable
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @main() #0 !dbg !4 {
+entry:
+ ret i32 0, !dbg !12
+}
+
+attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
+!1 = !DIFile(filename: "global.cpp", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "global.cpp", directory: "/tmp")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{!10}
+!10 = !DIGlobalVariable(name: "i", linkageName: "_ZL1i", line: 1, isLocal: true, isDefinition: true, scope: null, file: !5, type: !8)
+!11 = !{i32 2, !"Dwarf Version", i32 3}
+!12 = !DILocation(line: 4, scope: !4)
+!13 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/gmlt.test b/test/DebugInfo/Generic/gmlt.test
index 0514dbfc461a..0514dbfc461a 100644
--- a/test/DebugInfo/gmlt.test
+++ b/test/DebugInfo/Generic/gmlt.test
diff --git a/test/DebugInfo/Generic/gvn.ll b/test/DebugInfo/Generic/gvn.ll
new file mode 100644
index 000000000000..f9fb7ab5520f
--- /dev/null
+++ b/test/DebugInfo/Generic/gvn.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+;
+; Produced at -O2 from:
+; int a, b;
+; void f1(int *p1) {
+; if (b)
+; a = 1;
+; if (a && *p1)
+; f4();
+; }
+; int f2(int);
+; void f3(void) {
+; a = f2(1);
+; f1(&a);
+; }
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @f3() #0 !dbg !12 {
+entry:
+ ; Verify that the call still has a debug location after GVN.
+ ; CHECK: %call = tail call i32 @f2(i32 1) #{{[0-9]}}, !dbg
+ %call = tail call i32 @f2(i32 1) #3, !dbg !36
+ store i32 %call, i32* @a, align 4, !dbg !36, !tbaa !25
+ tail call void @llvm.dbg.value(metadata i32* @a, i64 0, metadata !11, metadata !21) #3, !dbg !39
+ %0 = load i32, i32* @b, align 4, !dbg !39, !tbaa !25
+ %tobool.i = icmp eq i32 %0, 0, !dbg !39
+ br i1 %tobool.i, label %if.end.i, label %land.lhs.true.i.thread, !dbg !40
+
+land.lhs.true.i.thread: ; preds = %entry
+ store i32 1, i32* @a, align 4, !dbg !41, !tbaa !25
+ br label %if.then.3.i, !dbg !42
+
+if.end.i: ; preds = %entry
+ ; This instruction has no debug location -- in this
+ ; particular case it was removed by a bug in SimplifyCFG.
+ %.pr = load i32, i32* @a, align 4
+
+ ; GVN is supposed to replace the load of %.pr with a direct reference to %call.
+ ; CHECK: %tobool2.i = icmp eq i32 %call, 0, !dbg
+ %tobool2.i = icmp eq i32 %.pr, 0, !dbg !43
+ br i1 %tobool2.i, label %f1.exit, label %if.then.3.i, !dbg !43
+
+if.then.3.i: ; preds = %if.end.i, %land.lhs.true.i.thread
+ %call.i = tail call i32 bitcast (i32 (...)* @f4 to i32 ()*)() #3, !dbg !44
+ br label %f1.exit, !dbg !44
+
+f1.exit: ; preds = %if.end.i, %if.then.3.i
+ ret void, !dbg !45
+}
+
+declare i32 @f2(i32)
+declare i32 @f4(...)
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18, !19}
+!llvm.ident = !{!20}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 245562) (llvm/trunk 245569)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !15)
+!1 = !DIFile(filename: "test.c", directory: "/")
+!2 = !{}
+!3 = !{!4, !12}
+!4 = distinct !DISubprogram(name: "f1", scope: !1, file: !1, line: 2, type: !6, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !10)
+!6 = !DISubroutineType(types: !7)
+!7 = !{null, !8}
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64)
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !{!11}
+!11 = !DILocalVariable(name: "p1", arg: 1, scope: !4, file: !1, line: 2, type: !8)
+!12 = distinct !DISubprogram(name: "f3", scope: !1, file: !1, line: 9, type: !13, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
+!13 = !DISubroutineType(types: !14)
+!14 = !{null}
+!15 = !{!16, !17}
+!16 = !DIGlobalVariable(name: "a", scope: !0, file: !1, line: 1, type: !9, isLocal: false, isDefinition: true, variable: i32* @a)
+!17 = !DIGlobalVariable(name: "b", scope: !0, file: !1, line: 1, type: !9, isLocal: false, isDefinition: true, variable: i32* @b)
+!18 = !{i32 2, !"Dwarf Version", i32 2}
+!19 = !{i32 2, !"Debug Info Version", i32 3}
+!20 = !{!"clang version 3.8.0 (trunk 245562) (llvm/trunk 245569)"}
+!21 = !DIExpression()
+!22 = !DILocation(line: 2, scope: !4)
+!23 = !DILocation(line: 3, scope: !24)
+!24 = distinct !DILexicalBlock(scope: !4, file: !1, line: 3)
+!25 = !{!26, !26, i64 0}
+!26 = !{!"int", !27, i64 0}
+!27 = !{!"omnipotent char", !28, i64 0}
+!28 = !{!"Simple C/C++ TBAA"}
+!29 = !DILocation(line: 3, scope: !4)
+!30 = !DILocation(line: 4, scope: !24)
+!31 = !DILocation(line: 5, scope: !32)
+!32 = distinct !DILexicalBlock(scope: !4, file: !1, line: 5)
+!33 = !DILocation(line: 5, scope: !4)
+!34 = !DILocation(line: 6, scope: !32)
+!35 = !DILocation(line: 7, scope: !4)
+!36 = !DILocation(line: 5, scope: !32, inlinedAt: !37)
+!37 = distinct !DILocation(line: 11, scope: !12)
+!38 = !DILocation(line: 10, scope: !12)
+!39 = !DILocation(line: 2, scope: !4, inlinedAt: !37)
+!40 = !DILocation(line: 3, scope: !24, inlinedAt: !37)
+!41 = !DILocation(line: 3, scope: !4, inlinedAt: !37)
+!42 = !DILocation(line: 4, scope: !24, inlinedAt: !37)
+!43 = !DILocation(line: 5, scope: !4, inlinedAt: !37)
+!44 = !DILocation(line: 6, scope: !32, inlinedAt: !37)
+!45 = !DILocation(line: 12, scope: !12)
diff --git a/test/DebugInfo/Generic/incorrect-variable-debugloc.ll b/test/DebugInfo/Generic/incorrect-variable-debugloc.ll
new file mode 100644
index 000000000000..cf713ee3b970
--- /dev/null
+++ b/test/DebugInfo/Generic/incorrect-variable-debugloc.ll
@@ -0,0 +1,391 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O2 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; This is a test case that's as reduced as I can get it, though I haven't fully
+; understood the mechanisms by which this bug occurs, so perhaps there's further
+; simplification to be had (it's certainly a bit non-obvious what's going on). I
+; hesitate to hand-craft or otherwise simplify the IR compared to what Clang
+; generates as this is a particular tickling of optimizations and debug location
+; propagation I want a realistic example of.
+
+; Generated with clang-tot -cc1 -g -O2 -w -std=c++11 -fsanitize=address,use-after-return -fcxx-exceptions -fexceptions -x c++ incorrect-variable-debug-loc.cpp -emit-llvm
+
+; struct A {
+; int m_fn1();
+; };
+;
+; struct B {
+; void __attribute__((always_inline)) m_fn2() { i = 0; }
+; int i;
+; };
+;
+; struct C {
+; void m_fn3();
+; int j;
+; B b;
+; };
+;
+; int fn1() {
+; C A;
+; A.b.m_fn2();
+; A.m_fn3();
+; }
+; void C::m_fn3() {
+; A().m_fn1();
+; b.m_fn2();
+; }
+
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}} "C"
+; CHECK: [[M_FN3_DECL:.*]]: DW_TAG_subprogram
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_AT_name {{.*}} "m_fn3"
+
+; CHECK: DW_AT_specification {{.*}} {[[M_FN3_DECL]]}
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "this"
+
+%struct.C = type { i32, %struct.B }
+%struct.B = type { i32 }
+%struct.A = type { i8 }
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 1, void ()* @asan.module_ctor }]
+@__asan_option_detect_stack_use_after_return = external global i32
+@__asan_gen_ = private unnamed_addr constant [11 x i8] c"1 32 8 1 A\00", align 1
+@__asan_gen_1 = private unnamed_addr constant [13 x i8] c"1 32 1 3 tmp\00", align 1
+
+; Function Attrs: noreturn sanitize_address
+define i32 @_Z3fn1v() #0 !dbg !22 {
+entry:
+ %MyAlloca = alloca [64 x i8], align 32, !dbg !39
+ %0 = ptrtoint [64 x i8]* %MyAlloca to i64, !dbg !39
+ %1 = load i32, i32* @__asan_option_detect_stack_use_after_return, !dbg !39
+ %2 = icmp ne i32 %1, 0, !dbg !39
+ br i1 %2, label %3, label %5
+
+; <label>:3 ; preds = %entry
+ %4 = call i64 @__asan_stack_malloc_0(i64 64, i64 %0), !dbg !39
+ br label %5
+
+; <label>:5 ; preds = %entry, %3
+ %6 = phi i64 [ %0, %entry ], [ %4, %3 ], !dbg !39
+ %7 = add i64 %6, 32, !dbg !39
+ %8 = inttoptr i64 %7 to %struct.C*, !dbg !39
+ %9 = inttoptr i64 %6 to i64*, !dbg !39
+ store i64 1102416563, i64* %9, !dbg !39
+ %10 = add i64 %6, 8, !dbg !39
+ %11 = inttoptr i64 %10 to i64*, !dbg !39
+ store i64 ptrtoint ([11 x i8]* @__asan_gen_ to i64), i64* %11, !dbg !39
+ %12 = add i64 %6, 16, !dbg !39
+ %13 = inttoptr i64 %12 to i64*, !dbg !39
+ store i64 ptrtoint (i32 ()* @_Z3fn1v to i64), i64* %13, !dbg !39
+ %14 = lshr i64 %6, 3, !dbg !39
+ %15 = add i64 %14, 2147450880, !dbg !39
+ %16 = add i64 %15, 0, !dbg !39
+ %17 = inttoptr i64 %16 to i64*, !dbg !39
+ store i64 -868083117767659023, i64* %17, !dbg !39
+ %i.i = getelementptr inbounds %struct.C, %struct.C* %8, i64 0, i32 1, i32 0, !dbg !39
+ %18 = ptrtoint i32* %i.i to i64, !dbg !39
+ %19 = lshr i64 %18, 3, !dbg !39
+ %20 = add i64 %19, 2147450880, !dbg !39
+ %21 = inttoptr i64 %20 to i8*, !dbg !39
+ %22 = load i8, i8* %21, !dbg !39
+ %23 = icmp ne i8 %22, 0, !dbg !39
+ br i1 %23, label %24, label %30, !dbg !39
+
+; <label>:24 ; preds = %5
+ %25 = and i64 %18, 7, !dbg !39
+ %26 = add i64 %25, 3, !dbg !39
+ %27 = trunc i64 %26 to i8, !dbg !39
+ %28 = icmp sge i8 %27, %22, !dbg !39
+ br i1 %28, label %29, label %30
+
+; <label>:29 ; preds = %24
+ call void @__asan_report_store4(i64 %18), !dbg !39
+ call void asm sideeffect "", ""()
+ unreachable
+
+; <label>:30 ; preds = %24, %5
+ store i32 0, i32* %i.i, align 4, !dbg !39, !tbaa !41
+ tail call void @llvm.dbg.value(metadata %struct.C* %8, i64 0, metadata !27, metadata !DIExpression()), !dbg !46
+ call void @_ZN1C5m_fn3Ev(%struct.C* %8), !dbg !47
+ unreachable, !dbg !47
+}
+
+; Function Attrs: sanitize_address
+define void @_ZN1C5m_fn3Ev(%struct.C* nocapture %this) #1 align 2 !dbg !28 {
+entry:
+ %MyAlloca = alloca [64 x i8], align 32, !dbg !48
+ %0 = ptrtoint [64 x i8]* %MyAlloca to i64, !dbg !48
+ %1 = load i32, i32* @__asan_option_detect_stack_use_after_return, !dbg !48
+ %2 = icmp ne i32 %1, 0, !dbg !48
+ br i1 %2, label %3, label %5
+
+; <label>:3 ; preds = %entry
+ %4 = call i64 @__asan_stack_malloc_0(i64 64, i64 %0), !dbg !48
+ br label %5
+
+; <label>:5 ; preds = %entry, %3
+ %6 = phi i64 [ %0, %entry ], [ %4, %3 ], !dbg !48
+ %7 = add i64 %6, 32, !dbg !48
+ %8 = inttoptr i64 %7 to %struct.A*, !dbg !48
+ %9 = inttoptr i64 %6 to i64*, !dbg !48
+ store i64 1102416563, i64* %9, !dbg !48
+ %10 = add i64 %6, 8, !dbg !48
+ %11 = inttoptr i64 %10 to i64*, !dbg !48
+ store i64 ptrtoint ([13 x i8]* @__asan_gen_1 to i64), i64* %11, !dbg !48
+ %12 = add i64 %6, 16, !dbg !48
+ %13 = inttoptr i64 %12 to i64*, !dbg !48
+ store i64 ptrtoint (void (%struct.C*)* @_ZN1C5m_fn3Ev to i64), i64* %13, !dbg !48
+ %14 = lshr i64 %6, 3, !dbg !48
+ %15 = add i64 %14, 2147450880, !dbg !48
+ %16 = add i64 %15, 0, !dbg !48
+ %17 = inttoptr i64 %16 to i64*, !dbg !48
+ store i64 -868083113472691727, i64* %17, !dbg !48
+ tail call void @llvm.dbg.value(metadata %struct.C* %this, i64 0, metadata !30, metadata !DIExpression()), !dbg !48
+ %call = call i32 @_ZN1A5m_fn1Ev(%struct.A* %8), !dbg !49
+ %i.i = getelementptr inbounds %struct.C, %struct.C* %this, i64 0, i32 1, i32 0, !dbg !50
+ %18 = ptrtoint i32* %i.i to i64, !dbg !50
+ %19 = lshr i64 %18, 3, !dbg !50
+ %20 = add i64 %19, 2147450880, !dbg !50
+ %21 = inttoptr i64 %20 to i8*, !dbg !50
+ %22 = load i8, i8* %21, !dbg !50
+ %23 = icmp ne i8 %22, 0, !dbg !50
+ br i1 %23, label %24, label %30, !dbg !50
+
+; <label>:24 ; preds = %5
+ %25 = and i64 %18, 7, !dbg !50
+ %26 = add i64 %25, 3, !dbg !50
+ %27 = trunc i64 %26 to i8, !dbg !50
+ %28 = icmp sge i8 %27, %22, !dbg !50
+ br i1 %28, label %29, label %30
+
+; <label>:29 ; preds = %24
+ call void @__asan_report_store4(i64 %18), !dbg !50
+ call void asm sideeffect "", ""()
+ unreachable
+
+; <label>:30 ; preds = %24, %5
+ store i32 0, i32* %i.i, align 4, !dbg !50, !tbaa !41
+ store i64 1172321806, i64* %9, !dbg !52
+ %31 = icmp ne i64 %6, %0, !dbg !52
+ br i1 %31, label %32, label %39, !dbg !52
+
+; <label>:32 ; preds = %30
+ %33 = add i64 %15, 0, !dbg !52
+ %34 = inttoptr i64 %33 to i64*, !dbg !52
+ store i64 -723401728380766731, i64* %34, !dbg !52
+ %35 = add i64 %6, 56, !dbg !52
+ %36 = inttoptr i64 %35 to i64*, !dbg !52
+ %37 = load i64, i64* %36, !dbg !52
+ %38 = inttoptr i64 %37 to i8*, !dbg !52
+ store i8 0, i8* %38, !dbg !52
+ br label %42, !dbg !52
+
+; <label>:39 ; preds = %30
+ %40 = add i64 %15, 0, !dbg !52
+ %41 = inttoptr i64 %40 to i64*, !dbg !52
+ store i64 0, i64* %41, !dbg !52
+ br label %42, !dbg !52
+
+; <label>:42 ; preds = %39, %32
+ ret void, !dbg !52
+}
+
+declare i32 @_ZN1A5m_fn1Ev(%struct.A*) #2
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
+
+define internal void @asan.module_ctor() {
+ tail call void @__asan_init_v3()
+ ret void
+}
+
+declare void @__asan_init_v3()
+
+declare void @__asan_report_load1(i64)
+
+declare void @__asan_load1(i64)
+
+declare void @__asan_report_load2(i64)
+
+declare void @__asan_load2(i64)
+
+declare void @__asan_report_load4(i64)
+
+declare void @__asan_load4(i64)
+
+declare void @__asan_report_load8(i64)
+
+declare void @__asan_load8(i64)
+
+declare void @__asan_report_load16(i64)
+
+declare void @__asan_load16(i64)
+
+declare void @__asan_report_store1(i64)
+
+declare void @__asan_store1(i64)
+
+declare void @__asan_report_store2(i64)
+
+declare void @__asan_store2(i64)
+
+declare void @__asan_report_store4(i64)
+
+declare void @__asan_store4(i64)
+
+declare void @__asan_report_store8(i64)
+
+declare void @__asan_store8(i64)
+
+declare void @__asan_report_store16(i64)
+
+declare void @__asan_store16(i64)
+
+declare void @__asan_report_load_n(i64, i64)
+
+declare void @__asan_report_store_n(i64, i64)
+
+declare void @__asan_loadN(i64, i64)
+
+declare void @__asan_storeN(i64, i64)
+
+declare i8* @__asan_memmove(i8*, i8*, i64)
+
+declare i8* @__asan_memcpy(i8*, i8*, i64)
+
+declare i8* @__asan_memset(i8*, i32, i64)
+
+declare void @__asan_handle_no_return()
+
+declare void @__sanitizer_cov()
+
+declare void @__sanitizer_ptr_cmp(i64, i64)
+
+declare void @__sanitizer_ptr_sub(i64, i64)
+
+declare i64 @__asan_stack_malloc_0(i64, i64)
+
+declare void @__asan_stack_free_0(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_1(i64, i64)
+
+declare void @__asan_stack_free_1(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_2(i64, i64)
+
+declare void @__asan_stack_free_2(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_3(i64, i64)
+
+declare void @__asan_stack_free_3(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_4(i64, i64)
+
+declare void @__asan_stack_free_4(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_5(i64, i64)
+
+declare void @__asan_stack_free_5(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_6(i64, i64)
+
+declare void @__asan_stack_free_6(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_7(i64, i64)
+
+declare void @__asan_stack_free_7(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_8(i64, i64)
+
+declare void @__asan_stack_free_8(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_9(i64, i64)
+
+declare void @__asan_stack_free_9(i64, i64, i64)
+
+declare i64 @__asan_stack_malloc_10(i64, i64)
+
+declare void @__asan_stack_free_10(i64, i64, i64)
+
+declare void @__asan_poison_stack_memory(i64, i64)
+
+declare void @__asan_unpoison_stack_memory(i64, i64)
+
+declare void @__asan_before_dynamic_init(i64)
+
+declare void @__asan_after_dynamic_init()
+
+declare void @__asan_register_globals(i64, i64)
+
+declare void @__asan_unregister_globals(i64, i64)
+
+declare void @__sanitizer_cov_module_init(i64)
+
+attributes #0 = { noreturn sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!36, !37}
+!llvm.ident = !{!38}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !21, globals: !2, imports: !2)
+!1 = !DIFile(filename: "<stdin>", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4, !14}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 10, size: 64, align: 32, file: !5, elements: !6, identifier: "_ZTS1C")
+!5 = !DIFile(filename: "incorrect-variable-debug-loc.cpp", directory: "/tmp/dbginfo")
+!6 = !{!7, !9, !10}
+!7 = !DIDerivedType(tag: DW_TAG_member, name: "j", line: 12, size: 32, align: 32, file: !5, scope: !"_ZTS1C", baseType: !8)
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 13, size: 32, align: 32, offset: 32, file: !5, scope: !"_ZTS1C", baseType: !"_ZTS1B")
+!10 = !DISubprogram(name: "m_fn3", linkageName: "_ZN1C5m_fn3Ev", line: 11, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !5, scope: !"_ZTS1C", type: !11)
+!11 = !DISubroutineType(types: !12)
+!12 = !{null, !13}
+!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1C")
+!14 = !DICompositeType(tag: DW_TAG_structure_type, name: "B", line: 5, size: 32, align: 32, file: !5, elements: !15, identifier: "_ZTS1B")
+!15 = !{!16, !17}
+!16 = !DIDerivedType(tag: DW_TAG_member, name: "i", line: 7, size: 32, align: 32, file: !5, scope: !"_ZTS1B", baseType: !8)
+!17 = !DISubprogram(name: "m_fn2", linkageName: "_ZN1B5m_fn2Ev", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !5, scope: !"_ZTS1B", type: !18)
+!18 = !DISubroutineType(types: !19)
+!19 = !{null, !20}
+!20 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1B")
+!21 = !{!22, !28, !32}
+!22 = distinct !DISubprogram(name: "fn1", linkageName: "_Z3fn1v", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !5, scope: !23, type: !24, variables: !26)
+!23 = !DIFile(filename: "incorrect-variable-debug-loc.cpp", directory: "/tmp/dbginfo")
+!24 = !DISubroutineType(types: !25)
+!25 = !{!8}
+!26 = !{!27}
+!27 = !DILocalVariable(name: "A", line: 17, scope: !22, file: !23, type: !"_ZTS1C")
+!28 = distinct !DISubprogram(name: "m_fn3", linkageName: "_ZN1C5m_fn3Ev", line: 21, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 21, file: !5, scope: !"_ZTS1C", type: !11, declaration: !10, variables: !29)
+!29 = !{!30}
+!30 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
+!31 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1C")
+!32 = distinct !DISubprogram(name: "m_fn2", linkageName: "_ZN1B5m_fn2Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !5, scope: !"_ZTS1B", type: !18, declaration: !17, variables: !33)
+!33 = !{!34}
+!34 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !32, type: !35)
+!35 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1B")
+!36 = !{i32 2, !"Dwarf Version", i32 4}
+!37 = !{i32 2, !"Debug Info Version", i32 3}
+!38 = !{!"clang version 3.5.0 "}
+!39 = !DILocation(line: 6, scope: !32, inlinedAt: !40)
+!40 = !DILocation(line: 18, scope: !22)
+!41 = !{!42, !43, i64 0}
+!42 = !{!"_ZTS1B", !43, i64 0}
+!43 = !{!"int", !44, i64 0}
+!44 = !{!"omnipotent char", !45, i64 0}
+!45 = !{!"Simple C/C++ TBAA"}
+!46 = !DILocation(line: 17, scope: !22)
+!47 = !DILocation(line: 19, scope: !22)
+!48 = !DILocation(line: 0, scope: !28)
+!49 = !DILocation(line: 22, scope: !28)
+!50 = !DILocation(line: 6, scope: !32, inlinedAt: !51)
+!51 = !DILocation(line: 23, scope: !28)
+!52 = !DILocation(line: 24, scope: !28)
diff --git a/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll b/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll
new file mode 100644
index 000000000000..33af0baeb658
--- /dev/null
+++ b/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll
@@ -0,0 +1,77 @@
+; REQUIRES: object-emission
+; This test is failing for powerpc64, because a location list for the
+; variable 'c' is not generated at all. Temporary marking this test as XFAIL
+; for powerpc, until PR21881 is fixed.
+; XFAIL: powerpc64
+
+; RUN: %llc_dwarf -O2 -dwarf-version 2 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF23
+; RUN: %llc_dwarf -O2 -dwarf-version 3 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF23
+; RUN: %llc_dwarf -O2 -dwarf-version 4 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF4
+
+; This is a test for PR21176.
+; DW_OP_const <const> doesn't describe a constant value, but a value at a constant address.
+; The proper way to describe a constant value is DW_OP_constu <const>, DW_OP_stack_value.
+
+; Generated with clang -S -emit-llvm -g -O2 test.cpp
+
+; extern int func();
+;
+; int main()
+; {
+; volatile int c = 13;
+; c = func();
+; return c;
+; }
+
+; DWARF23: Location description: 10 0d {{$}}
+; DWARF4: Location description: 10 0d 9f
+
+; Function Attrs: uwtable
+define i32 @main() #0 !dbg !4 {
+entry:
+ %c = alloca i32, align 4
+ tail call void @llvm.dbg.value(metadata i32 13, i64 0, metadata !10, metadata !16), !dbg !17
+ store volatile i32 13, i32* %c, align 4, !dbg !18
+ %call = tail call i32 @_Z4funcv(), !dbg !19
+ tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !10, metadata !16), !dbg !17
+ store volatile i32 %call, i32* %c, align 4, !dbg !19
+ tail call void @llvm.dbg.value(metadata i32* %c, i64 0, metadata !10, metadata !16), !dbg !17
+ %c.0.c.0. = load volatile i32, i32* %c, align 4, !dbg !20
+ ret i32 %c.0.c.0., !dbg !20
+}
+
+declare i32 @_Z4funcv() #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12, !13}
+!llvm.ident = !{!14}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 223522)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "test.cpp", directory: "/home/kromanova/ngh/ToT_latest/llvm/test/DebugInfo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !9)
+!5 = !DIFile(filename: "test.cpp", directory: "/home/kromanova/ngh/ToT_latest/llvm/test/DebugInfo")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{!10}
+!10 = !DILocalVariable(name: "c", line: 5, scope: !4, file: !5, type: !11)
+!11 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !8)
+!12 = !{i32 2, !"Dwarf Version", i32 2}
+!13 = !{i32 2, !"Debug Info Version", i32 3}
+!14 = !{!"clang version 3.6.0 (trunk 223522)"}
+!15 = !{i32 13}
+!16 = !DIExpression()
+!17 = !DILocation(line: 5, column: 16, scope: !4)
+!18 = !DILocation(line: 5, column: 3, scope: !4)
+!19 = !DILocation(line: 6, column: 7, scope: !4)
+!20 = !DILocation(line: 7, column: 3, scope: !4)
+
diff --git a/test/DebugInfo/Generic/inheritance.ll b/test/DebugInfo/Generic/inheritance.ll
new file mode 100644
index 000000000000..802c4f195d47
--- /dev/null
+++ b/test/DebugInfo/Generic/inheritance.ll
@@ -0,0 +1,154 @@
+; RUN: llc %s -o /dev/null
+; PR 2613.
+
+%struct.__class_type_info_pseudo = type { %struct.__type_info_pseudo }
+%struct.__type_info_pseudo = type { i8*, i8* }
+%struct.test1 = type { i32 (...)** }
+
+@_ZTV5test1 = weak_odr constant [4 x i32 (...)*] [i32 (...)* null, i32 (...)* bitcast (%struct.__class_type_info_pseudo* @_ZTI5test1 to i32 (...)*), i32 (...)* bitcast (void (%struct.test1*)* @_ZN5test1D1Ev to i32 (...)*), i32 (...)* bitcast (void (%struct.test1*)* @_ZN5test1D0Ev to i32 (...)*)], align 32 ; <[4 x i32 (...)*]*> [#uses=1]
+@_ZTI5test1 = weak_odr constant %struct.__class_type_info_pseudo { %struct.__type_info_pseudo { i8* inttoptr (i64 add (i64 ptrtoint ([0 x i32 (...)*]* @_ZTVN10__cxxabiv117__class_type_infoE to i64), i64 16) to i8*), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @_ZTS5test1, i64 0, i64 0) } }, align 16 ; <%struct.__class_type_info_pseudo*> [#uses=1]
+@_ZTVN10__cxxabiv117__class_type_infoE = external constant [0 x i32 (...)*] ; <[0 x i32 (...)*]*> [#uses=1]
+@_ZTS5test1 = weak_odr constant [7 x i8] c"5test1\00" ; <[7 x i8]*> [#uses=2]
+
+define i32 @main() nounwind ssp {
+entry:
+ %retval = alloca i32 ; <i32*> [#uses=2]
+ %0 = alloca i32 ; <i32*> [#uses=2]
+ %tst = alloca %struct.test1 ; <%struct.test1*> [#uses=1]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ call void @llvm.dbg.declare(metadata %struct.test1* %tst, metadata !0, metadata !DIExpression()), !dbg !21
+ call void @_ZN5test1C1Ev(%struct.test1* %tst) nounwind, !dbg !22
+ store i32 0, i32* %0, align 4, !dbg !23
+ %1 = load i32, i32* %0, align 4, !dbg !23 ; <i32> [#uses=1]
+ store i32 %1, i32* %retval, align 4, !dbg !23
+ br label %return, !dbg !23
+
+return: ; preds = %entry
+ %retval1 = load i32, i32* %retval, !dbg !23 ; <i32> [#uses=1]
+ ret i32 %retval1, !dbg !23
+}
+
+define linkonce_odr void @_ZN5test1C1Ev(%struct.test1* %this) nounwind ssp align 2 {
+entry:
+ %this_addr = alloca %struct.test1* ; <%struct.test1**> [#uses=2]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ call void @llvm.dbg.declare(metadata %struct.test1** %this_addr, metadata !24, metadata !DIExpression()), !dbg !28
+ store %struct.test1* %this, %struct.test1** %this_addr
+ %0 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !28 ; <%struct.test1*> [#uses=1]
+ %1 = getelementptr inbounds %struct.test1, %struct.test1* %0, i32 0, i32 0, !dbg !28 ; <i32 (...)***> [#uses=1]
+ store i32 (...)** getelementptr inbounds ([4 x i32 (...)*], [4 x i32 (...)*]* @_ZTV5test1, i64 0, i64 2), i32 (...)*** %1, align 8, !dbg !28
+ br label %return, !dbg !28
+
+return: ; preds = %entry
+ ret void, !dbg !29
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+define linkonce_odr void @_ZN5test1D1Ev(%struct.test1* %this) nounwind ssp align 2 {
+entry:
+ %this_addr = alloca %struct.test1* ; <%struct.test1**> [#uses=3]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ call void @llvm.dbg.declare(metadata %struct.test1** %this_addr, metadata !32, metadata !DIExpression()), !dbg !34
+ store %struct.test1* %this, %struct.test1** %this_addr
+ %0 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !35 ; <%struct.test1*> [#uses=1]
+ %1 = getelementptr inbounds %struct.test1, %struct.test1* %0, i32 0, i32 0, !dbg !35 ; <i32 (...)***> [#uses=1]
+ store i32 (...)** getelementptr inbounds ([4 x i32 (...)*], [4 x i32 (...)*]* @_ZTV5test1, i64 0, i64 2), i32 (...)*** %1, align 8, !dbg !35
+ br label %bb, !dbg !37
+
+bb: ; preds = %entry
+ %2 = trunc i32 0 to i8, !dbg !37 ; <i8> [#uses=1]
+ %toBool = icmp ne i8 %2, 0, !dbg !37 ; <i1> [#uses=1]
+ br i1 %toBool, label %bb1, label %bb2, !dbg !37
+
+bb1: ; preds = %bb
+ %3 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !37 ; <%struct.test1*> [#uses=1]
+ %4 = bitcast %struct.test1* %3 to i8*, !dbg !37 ; <i8*> [#uses=1]
+ call void @_ZdlPv(i8* %4) nounwind, !dbg !37
+ br label %bb2, !dbg !37
+
+bb2: ; preds = %bb1, %bb
+ br label %return, !dbg !37
+
+return: ; preds = %bb2
+ ret void, !dbg !37
+}
+
+define linkonce_odr void @_ZN5test1D0Ev(%struct.test1* %this) nounwind ssp align 2 {
+entry:
+ %this_addr = alloca %struct.test1* ; <%struct.test1**> [#uses=3]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ call void @llvm.dbg.declare(metadata %struct.test1** %this_addr, metadata !38, metadata !DIExpression()), !dbg !40
+ store %struct.test1* %this, %struct.test1** %this_addr
+ %0 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !41 ; <%struct.test1*> [#uses=1]
+ %1 = getelementptr inbounds %struct.test1, %struct.test1* %0, i32 0, i32 0, !dbg !41 ; <i32 (...)***> [#uses=1]
+ store i32 (...)** getelementptr inbounds ([4 x i32 (...)*], [4 x i32 (...)*]* @_ZTV5test1, i64 0, i64 2), i32 (...)*** %1, align 8, !dbg !41
+ br label %bb, !dbg !43
+
+bb: ; preds = %entry
+ %2 = trunc i32 1 to i8, !dbg !43 ; <i8> [#uses=1]
+ %toBool = icmp ne i8 %2, 0, !dbg !43 ; <i1> [#uses=1]
+ br i1 %toBool, label %bb1, label %bb2, !dbg !43
+
+bb1: ; preds = %bb
+ %3 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !43 ; <%struct.test1*> [#uses=1]
+ %4 = bitcast %struct.test1* %3 to i8*, !dbg !43 ; <i8*> [#uses=1]
+ call void @_ZdlPv(i8* %4) nounwind, !dbg !43
+ br label %bb2, !dbg !43
+
+bb2: ; preds = %bb1, %bb
+ br label %return, !dbg !43
+
+return: ; preds = %bb2
+ ret void, !dbg !43
+}
+
+declare void @_ZdlPv(i8*) nounwind
+
+!0 = !DILocalVariable(name: "tst", line: 13, scope: !1, file: !4, type: !8)
+!1 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !2)
+!2 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !3)
+!3 = distinct !DISubprogram(name: "main", linkageName: "main", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !4, type: !5)
+!4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !44, enums: !45, retainedTypes: !45)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "test1", line: 1, size: 64, align: 64, file: !44, scope: !4, elements: !9, vtableHolder: !8)
+!9 = !{!10, !14, !18}
+!10 = !DIDerivedType(tag: DW_TAG_member, name: "_vptr$test1", line: 1, size: 64, align: 64, file: !44, scope: !8, baseType: !11)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !4, baseType: !12)
+!12 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "__vtbl_ptr_type", scope: !4, baseType: !5)
+!13 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !46, enums: !45, retainedTypes: !45)
+!14 = !DISubprogram(name: "test1", line: 1, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrivate, isOptimized: false, scope: !8, type: !15)
+!15 = !DISubroutineType(types: !16)
+!16 = !{null, !17}
+!17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !4, baseType: !8)
+!18 = !DISubprogram(name: "~test1", line: 4, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, isOptimized: false, scope: !8, type: !19, containingType: !8)
+!19 = !DISubroutineType(types: !20)
+!20 = !{null, !17, !7}
+!21 = !DILocation(line: 11, scope: !1)
+!22 = !DILocation(line: 13, scope: !1)
+!23 = !DILocation(line: 14, scope: !1)
+!24 = !DILocalVariable(name: "this", line: 13, arg: 1, scope: !25, file: !4, type: !26)
+!25 = distinct !DISubprogram(name: "test1", linkageName: "_ZN5test1C1Ev", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !4, type: !15)
+!26 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !4, baseType: !27)
+!27 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !4, baseType: !8)
+!28 = !DILocation(line: 1, scope: !25)
+!29 = !DILocation(line: 1, scope: !30)
+!30 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !31)
+!31 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !25)
+!32 = !DILocalVariable(name: "this", line: 4, arg: 1, scope: !33, file: !4, type: !26)
+!33 = distinct !DISubprogram(name: "~test1", linkageName: "_ZN5test1D1Ev", line: 4, isLocal: false, isDefinition: true, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, isOptimized: false, scope: !8, type: !15, containingType: !8)
+!34 = !DILocation(line: 4, scope: !33)
+!35 = !DILocation(line: 5, scope: !36)
+!36 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !33)
+!37 = !DILocation(line: 6, scope: !36)
+!38 = !DILocalVariable(name: "this", line: 4, arg: 1, scope: !39, file: !4, type: !26)
+!39 = distinct !DISubprogram(name: "~test1", linkageName: "_ZN5test1D0Ev", line: 4, isLocal: false, isDefinition: true, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, isOptimized: false, scope: !8, type: !15, containingType: !8)
+!40 = !DILocation(line: 4, scope: !39)
+!41 = !DILocation(line: 5, scope: !42)
+!42 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !39)
+!43 = !DILocation(line: 6, scope: !42)
+!44 = !DIFile(filename: "inheritance.cpp", directory: "/tmp/")
+!45 = !{i32 0}
+!46 = !DIFile(filename: "<built-in>", directory: "/tmp/")
diff --git a/test/DebugInfo/Generic/inline-debug-info-multiret.ll b/test/DebugInfo/Generic/inline-debug-info-multiret.ll
new file mode 100644
index 000000000000..5be261ac3741
--- /dev/null
+++ b/test/DebugInfo/Generic/inline-debug-info-multiret.ll
@@ -0,0 +1,156 @@
+; RUN: opt -inline -S < %s | FileCheck %s
+;
+; A hand-edited version of inline-debug-info.ll to test inlining of a
+; function with multiple returns.
+;
+; Make sure the branch instructions created during inlining has a debug location,
+; so the range of the inlined function is correct.
+; CHECK: br label %_Z4testi.exit, !dbg ![[MD:[0-9]+]]
+; CHECK: br label %_Z4testi.exit, !dbg ![[MD]]
+; CHECK: br label %invoke.cont, !dbg ![[MD]]
+; The branch instruction has the source location of line 9 and its inlined location
+; has the source location of line 14.
+; CHECK: ![[INL:[0-9]+]] = distinct !DILocation(line: 14, scope: {{.*}})
+; CHECK: ![[MD]] = !DILocation(line: 9, scope: {{.*}}, inlinedAt: ![[INL]])
+
+; ModuleID = 'test.cpp'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin12.0.0"
+
+@_ZTIi = external constant i8*
+@global_var = external global i32
+
+; copy of above function with multiple returns
+define i32 @_Z4testi(i32 %k) !dbg !4 {
+entry:
+ %retval = alloca i32, align 4
+ %k.addr = alloca i32, align 4
+ %k2 = alloca i32, align 4
+ store i32 %k, i32* %k.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %k.addr, metadata !13, metadata !DIExpression()), !dbg !14
+ call void @llvm.dbg.declare(metadata i32* %k2, metadata !15, metadata !DIExpression()), !dbg !16
+ %0 = load i32, i32* %k.addr, align 4, !dbg !16
+ %call = call i32 @_Z8test_exti(i32 %0), !dbg !16
+ store i32 %call, i32* %k2, align 4, !dbg !16
+ %1 = load i32, i32* %k2, align 4, !dbg !17
+ %cmp = icmp sgt i32 %1, 100, !dbg !17
+ br i1 %cmp, label %if.then, label %if.end, !dbg !17
+
+if.then: ; preds = %entry
+ %2 = load i32, i32* %k2, align 4, !dbg !18
+ store i32 %2, i32* %retval, !dbg !18
+ br label %return, !dbg !18
+
+if.end: ; preds = %entry
+ store i32 0, i32* %retval, !dbg !19
+ %3 = load i32, i32* %retval, !dbg !20 ; hand-edited
+ ret i32 %3, !dbg !20 ; hand-edited
+
+return: ; preds = %if.end, %if.then
+ %4 = load i32, i32* %retval, !dbg !20
+ ret i32 %4, !dbg !20
+}
+
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare i32 @_Z8test_exti(i32)
+
+define i32 @_Z5test2v() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !10 {
+entry:
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ %e = alloca i32, align 4
+ %0 = load i32, i32* @global_var, align 4, !dbg !21
+ %call = invoke i32 @_Z4testi(i32 %0)
+ to label %invoke.cont unwind label %lpad, !dbg !21
+
+invoke.cont: ; preds = %entry
+ br label %try.cont, !dbg !23
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 }
+ catch i8* bitcast (i8** @_ZTIi to i8*), !dbg !21
+ %2 = extractvalue { i8*, i32 } %1, 0, !dbg !21
+ store i8* %2, i8** %exn.slot, !dbg !21
+ %3 = extractvalue { i8*, i32 } %1, 1, !dbg !21
+ store i32 %3, i32* %ehselector.slot, !dbg !21
+ br label %catch.dispatch, !dbg !21
+
+catch.dispatch: ; preds = %lpad
+ %sel = load i32, i32* %ehselector.slot, !dbg !23
+ %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2, !dbg !23
+ %matches = icmp eq i32 %sel, %4, !dbg !23
+ br i1 %matches, label %catch, label %eh.resume, !dbg !23
+
+catch: ; preds = %catch.dispatch
+ call void @llvm.dbg.declare(metadata i32* %e, metadata !24, metadata !DIExpression()), !dbg !25
+ %exn = load i8*, i8** %exn.slot, !dbg !23
+ %5 = call i8* @__cxa_begin_catch(i8* %exn) #2, !dbg !23
+ %6 = bitcast i8* %5 to i32*, !dbg !23
+ %7 = load i32, i32* %6, align 4, !dbg !23
+ store i32 %7, i32* %e, align 4, !dbg !23
+ store i32 0, i32* @global_var, align 4, !dbg !26
+ call void @__cxa_end_catch() #2, !dbg !28
+ br label %try.cont, !dbg !28
+
+try.cont: ; preds = %catch, %invoke.cont
+ store i32 1, i32* @global_var, align 4, !dbg !29
+ ret i32 0, !dbg !30
+
+eh.resume: ; preds = %catch.dispatch
+ %exn1 = load i8*, i8** %exn.slot, !dbg !23
+ %sel2 = load i32, i32* %ehselector.slot, !dbg !23
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn1, 0, !dbg !23
+ %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel2, 1, !dbg !23
+ resume { i8*, i32 } %lpad.val3, !dbg !23
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #1
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!31}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "<unknown>", directory: "")
+!2 = !{}
+!3 = !{!4, !10}
+!4 = distinct !DISubprogram(name: "test", linkageName: "_Z4testi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !7, variables: !2)
+!5 = !DIFile(filename: "test.cpp", directory: "")
+!6 = !DIFile(filename: "test.cpp", directory: "")
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9}
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = distinct !DISubprogram(name: "test2", linkageName: "_Z5test2v", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !6, type: !11, variables: !2)
+!11 = !DISubroutineType(types: !12)
+!12 = !{!9}
+!13 = !DILocalVariable(name: "k", line: 4, arg: 1, scope: !4, file: !6, type: !9)
+!14 = !DILocation(line: 4, scope: !4)
+!15 = !DILocalVariable(name: "k2", line: 5, scope: !4, file: !6, type: !9)
+!16 = !DILocation(line: 5, scope: !4)
+!17 = !DILocation(line: 6, scope: !4)
+!18 = !DILocation(line: 7, scope: !4)
+!19 = !DILocation(line: 8, scope: !4)
+!20 = !DILocation(line: 9, scope: !4)
+!21 = !DILocation(line: 14, scope: !22)
+!22 = distinct !DILexicalBlock(line: 13, column: 0, file: !5, scope: !10)
+!23 = !DILocation(line: 15, scope: !22)
+!24 = !DILocalVariable(name: "e", line: 16, scope: !10, file: !6, type: !9)
+!25 = !DILocation(line: 16, scope: !10)
+!26 = !DILocation(line: 17, scope: !27)
+!27 = distinct !DILexicalBlock(line: 16, column: 0, file: !5, scope: !10)
+!28 = !DILocation(line: 18, scope: !27)
+!29 = !DILocation(line: 19, scope: !10)
+!30 = !DILocation(line: 20, scope: !10)
+!31 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/inline-debug-info.ll b/test/DebugInfo/Generic/inline-debug-info.ll
new file mode 100644
index 000000000000..a5d55a4f98a1
--- /dev/null
+++ b/test/DebugInfo/Generic/inline-debug-info.ll
@@ -0,0 +1,174 @@
+; RUN: opt -inline -S < %s | FileCheck %s
+
+; Created from source
+;
+;
+; 1 // test.cpp
+; 2 extern int global_var;
+; 3 extern int test_ext(int k);
+; 4 int test (int k) {
+; 5 int k2 = test_ext(k);
+; 6 if (k2 > 100)
+; 7 return k2;
+; 8 return 0;
+; 9 }
+; 10
+; 11 int test2() {
+; 12 try
+; 13 {
+; 14 test(global_var);
+; 15 }
+; 16 catch (int e) {
+; 17 global_var = 0;
+; 18 }
+; 19 global_var = 1;
+; 20 return 0;
+; 21 }
+
+; CHECK: _Z4testi.exit:
+; Make sure the branch instruction created during inlining has a debug location,
+; so the range of the inlined function is correct.
+; CHECK: br label %invoke.cont, !dbg [[MD:![0-9]+]]
+; The branch instruction has the source location of line 9 and its inlined location
+; has the source location of line 14.
+; CHECK: [[INL:![0-9]*]] = distinct !DILocation(line: 14, scope: {{.*}})
+; CHECK: [[MD]] = !DILocation(line: 9, scope: {{.*}}, inlinedAt: [[INL]])
+
+; ModuleID = 'test.cpp'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin12.0.0"
+
+@_ZTIi = external constant i8*
+@global_var = external global i32
+
+define i32 @_Z4testi(i32 %k) !dbg !4 {
+entry:
+ %retval = alloca i32, align 4
+ %k.addr = alloca i32, align 4
+ %k2 = alloca i32, align 4
+ store i32 %k, i32* %k.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %k.addr, metadata !13, metadata !DIExpression()), !dbg !14
+ call void @llvm.dbg.declare(metadata i32* %k2, metadata !15, metadata !DIExpression()), !dbg !16
+ %0 = load i32, i32* %k.addr, align 4, !dbg !16
+ %call = call i32 @_Z8test_exti(i32 %0), !dbg !16
+ store i32 %call, i32* %k2, align 4, !dbg !16
+ %1 = load i32, i32* %k2, align 4, !dbg !17
+ %cmp = icmp sgt i32 %1, 100, !dbg !17
+ br i1 %cmp, label %if.then, label %if.end, !dbg !17
+
+if.then: ; preds = %entry
+ %2 = load i32, i32* %k2, align 4, !dbg !18
+ store i32 %2, i32* %retval, !dbg !18
+ br label %return, !dbg !18
+
+if.end: ; preds = %entry
+ store i32 0, i32* %retval, !dbg !19
+ br label %return, !dbg !19
+
+return: ; preds = %if.end, %if.then
+ %3 = load i32, i32* %retval, !dbg !20
+ ret i32 %3, !dbg !20
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare i32 @_Z8test_exti(i32)
+
+define i32 @_Z5test2v() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !10 {
+entry:
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ %e = alloca i32, align 4
+ %0 = load i32, i32* @global_var, align 4, !dbg !21
+ %call = invoke i32 @_Z4testi(i32 %0)
+ to label %invoke.cont unwind label %lpad, !dbg !21
+
+invoke.cont: ; preds = %entry
+ br label %try.cont, !dbg !23
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 }
+ catch i8* bitcast (i8** @_ZTIi to i8*), !dbg !21
+ %2 = extractvalue { i8*, i32 } %1, 0, !dbg !21
+ store i8* %2, i8** %exn.slot, !dbg !21
+ %3 = extractvalue { i8*, i32 } %1, 1, !dbg !21
+ store i32 %3, i32* %ehselector.slot, !dbg !21
+ br label %catch.dispatch, !dbg !21
+
+catch.dispatch: ; preds = %lpad
+ %sel = load i32, i32* %ehselector.slot, !dbg !23
+ %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2, !dbg !23
+ %matches = icmp eq i32 %sel, %4, !dbg !23
+ br i1 %matches, label %catch, label %eh.resume, !dbg !23
+
+catch: ; preds = %catch.dispatch
+ call void @llvm.dbg.declare(metadata i32* %e, metadata !24, metadata !DIExpression()), !dbg !25
+ %exn = load i8*, i8** %exn.slot, !dbg !23
+ %5 = call i8* @__cxa_begin_catch(i8* %exn) #2, !dbg !23
+ %6 = bitcast i8* %5 to i32*, !dbg !23
+ %7 = load i32, i32* %6, align 4, !dbg !23
+ store i32 %7, i32* %e, align 4, !dbg !23
+ store i32 0, i32* @global_var, align 4, !dbg !26
+ call void @__cxa_end_catch() #2, !dbg !28
+ br label %try.cont, !dbg !28
+
+try.cont: ; preds = %catch, %invoke.cont
+ store i32 1, i32* @global_var, align 4, !dbg !29
+ ret i32 0, !dbg !30
+
+eh.resume: ; preds = %catch.dispatch
+ %exn1 = load i8*, i8** %exn.slot, !dbg !23
+ %sel2 = load i32, i32* %ehselector.slot, !dbg !23
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn1, 0, !dbg !23
+ %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel2, 1, !dbg !23
+ resume { i8*, i32 } %lpad.val3, !dbg !23
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #1
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!31}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "<unknown>", directory: "")
+!2 = !{}
+!3 = !{!4, !10}
+!4 = distinct !DISubprogram(name: "test", linkageName: "_Z4testi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !7, variables: !2)
+!5 = !DIFile(filename: "test.cpp", directory: "")
+!6 = !DIFile(filename: "test.cpp", directory: "")
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9}
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = distinct !DISubprogram(name: "test2", linkageName: "_Z5test2v", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !6, type: !11, variables: !2)
+!11 = !DISubroutineType(types: !12)
+!12 = !{!9}
+!13 = !DILocalVariable(name: "k", line: 4, arg: 1, scope: !4, file: !6, type: !9)
+!14 = !DILocation(line: 4, scope: !4)
+!15 = !DILocalVariable(name: "k2", line: 5, scope: !4, file: !6, type: !9)
+!16 = !DILocation(line: 5, scope: !4)
+!17 = !DILocation(line: 6, scope: !4)
+!18 = !DILocation(line: 7, scope: !4)
+!19 = !DILocation(line: 8, scope: !4)
+!20 = !DILocation(line: 9, scope: !4)
+!21 = !DILocation(line: 14, scope: !22)
+!22 = distinct !DILexicalBlock(line: 13, column: 0, file: !5, scope: !10)
+!23 = !DILocation(line: 15, scope: !22)
+!24 = !DILocalVariable(name: "e", line: 16, scope: !10, file: !6, type: !9)
+!25 = !DILocation(line: 16, scope: !10)
+!26 = !DILocation(line: 17, scope: !27)
+!27 = distinct !DILexicalBlock(line: 16, column: 0, file: !5, scope: !10)
+!28 = !DILocation(line: 18, scope: !27)
+!29 = !DILocation(line: 19, scope: !10)
+!30 = !DILocation(line: 20, scope: !10)
+!31 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/inline-no-debug-info.ll b/test/DebugInfo/Generic/inline-no-debug-info.ll
new file mode 100644
index 000000000000..443ba9da6b59
--- /dev/null
+++ b/test/DebugInfo/Generic/inline-no-debug-info.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+; This was generated from the following source:
+; int a, b;
+; __attribute__((__always_inline__)) static void callee2() { b = 2; }
+; __attribute__((__nodebug__)) void callee() { a = 1; callee2(); }
+; void caller() { callee(); }
+; by running
+; clang -S test.c -emit-llvm -O1 -gline-tables-only -fno-strict-aliasing
+
+; CHECK-LABEL: @caller(
+
+; This instruction did not have a !dbg metadata in the callee.
+; CHECK: store i32 1, {{.*}}, !dbg [[A:!.*]]
+
+; This instruction came from callee with a !dbg metadata.
+; CHECK: store i32 2, {{.*}}, !dbg [[B:!.*]]
+
+; The remaining instruction from the caller.
+; CHECK: ret void, !dbg [[A]]
+
+; Debug location of the code in caller() and of the inlined code that did not
+; have any debug location before.
+; CHECK-DAG: [[A]] = !DILocation(line: 4, scope: !{{[0-9]+}})
+
+; Debug location of the inlined code.
+; CHECK-DAG: [[B]] = !DILocation(line: 2, scope: !{{[0-9]+}}, inlinedAt: [[A_INL:![0-9]*]])
+; CHECK-DAG: [[A_INL]] = distinct !DILocation(line: 4, scope: !{{[0-9]+}})
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+; Function Attrs: nounwind uwtable
+define void @callee() #0 {
+entry:
+ store i32 1, i32* @a, align 4
+ store i32 2, i32* @b, align 4, !dbg !11
+ ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @caller() #0 !dbg !4 {
+entry:
+ tail call void @callee(), !dbg !12
+ ret void, !dbg !12
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (210174)", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "test.c", directory: "/code/llvm/build0")
+!2 = !{}
+!3 = !{!4, !7}
+!4 = distinct !DISubprogram(name: "caller", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "test.c", directory: "/code/llvm/build0")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "callee2", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5.0 (210174)"}
+!11 = !DILocation(line: 2, scope: !7)
+!12 = !DILocation(line: 4, scope: !4)
diff --git a/test/DebugInfo/Generic/inline-scopes.ll b/test/DebugInfo/Generic/inline-scopes.ll
new file mode 100644
index 000000000000..432c58b79e14
--- /dev/null
+++ b/test/DebugInfo/Generic/inline-scopes.ll
@@ -0,0 +1,130 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; bool f();
+; inline __attribute__((always_inline)) int f1() {
+; if (bool b = f())
+; return 1;
+; return 2;
+; }
+;
+; inline __attribute__((always_inline)) int f2() {
+; # 2 "y.cc"
+; if (bool b = f())
+; return 3;
+; return 4;
+; }
+;
+; int main() {
+; f1();
+; f2();
+; }
+
+; Ensure that lexical_blocks within inlined_subroutines are preserved/emitted.
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NOT: DW_TAG
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_lexical_block
+; CHECK-NOT: DW_TAG
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_variable
+; Ensure that file changes don't interfere with creating inlined subroutines.
+; (see the line directive inside 'f2' in thesource)
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin
+
+; Function Attrs: uwtable
+define i32 @main() #0 !dbg !4 {
+entry:
+ %retval.i2 = alloca i32, align 4
+ %b.i3 = alloca i8, align 1
+ %retval.i = alloca i32, align 4
+ %b.i = alloca i8, align 1
+ call void @llvm.dbg.declare(metadata i8* %b.i, metadata !16, metadata !DIExpression()), !dbg !19
+ %call.i = call zeroext i1 @_Z1fv(), !dbg !19
+ %frombool.i = zext i1 %call.i to i8, !dbg !19
+ store i8 %frombool.i, i8* %b.i, align 1, !dbg !19
+ %0 = load i8, i8* %b.i, align 1, !dbg !19
+ %tobool.i = trunc i8 %0 to i1, !dbg !19
+ br i1 %tobool.i, label %if.then.i, label %if.end.i, !dbg !19
+
+if.then.i: ; preds = %entry
+ store i32 1, i32* %retval.i, !dbg !21
+ br label %_Z2f1v.exit, !dbg !21
+
+if.end.i: ; preds = %entry
+ store i32 2, i32* %retval.i, !dbg !22
+ br label %_Z2f1v.exit, !dbg !22
+
+_Z2f1v.exit: ; preds = %if.then.i, %if.end.i
+ %1 = load i32, i32* %retval.i, !dbg !23
+ call void @llvm.dbg.declare(metadata i8* %b.i3, metadata !24, metadata !DIExpression()), !dbg !27
+ %call.i4 = call zeroext i1 @_Z1fv(), !dbg !27
+ %frombool.i5 = zext i1 %call.i4 to i8, !dbg !27
+ store i8 %frombool.i5, i8* %b.i3, align 1, !dbg !27
+ %2 = load i8, i8* %b.i3, align 1, !dbg !27
+ %tobool.i6 = trunc i8 %2 to i1, !dbg !27
+ br i1 %tobool.i6, label %if.then.i7, label %if.end.i8, !dbg !27
+
+if.then.i7: ; preds = %_Z2f1v.exit
+ store i32 3, i32* %retval.i2, !dbg !29
+ br label %_Z2f2v.exit, !dbg !29
+
+if.end.i8: ; preds = %_Z2f1v.exit
+ store i32 4, i32* %retval.i2, !dbg !30
+ br label %_Z2f2v.exit, !dbg !30
+
+_Z2f2v.exit: ; preds = %if.then.i7, %if.end.i8
+ %3 = load i32, i32* %retval.i2, !dbg !31
+ ret i32 0, !dbg !32
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare zeroext i1 @_Z1fv() #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "inline-scopes.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4, !10, !12}
+!4 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !5, scope: !6, type: !7, variables: !2)
+!5 = !DIFile(filename: "y.cc", directory: "/tmp/dbginfo")
+!6 = !DIFile(filename: "y.cc", directory: "/tmp/dbginfo")
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9}
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !11, type: !7, variables: !2)
+!11 = !DIFile(filename: "inline-scopes.cpp", directory: "/tmp/dbginfo")
+!12 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !11, type: !7, variables: !2)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 1, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.5.0 "}
+!16 = !DILocalVariable(name: "b", line: 3, scope: !17, file: !11, type: !18)
+!17 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !12)
+!18 = !DIBasicType(tag: DW_TAG_base_type, name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
+!19 = !DILocation(line: 3, scope: !17, inlinedAt: !20)
+!20 = !DILocation(line: 8, scope: !4)
+!21 = !DILocation(line: 4, scope: !17, inlinedAt: !20)
+!22 = !DILocation(line: 5, scope: !12, inlinedAt: !20)
+!23 = !DILocation(line: 6, scope: !12, inlinedAt: !20)
+!24 = !DILocalVariable(name: "b", line: 2, scope: !25, file: !6, type: !18)
+!25 = distinct !DILexicalBlock(line: 2, column: 0, file: !5, scope: !26)
+!26 = !DILexicalBlockFile(discriminator: 0, file: !5, scope: !10)
+!27 = !DILocation(line: 2, scope: !25, inlinedAt: !28)
+!28 = !DILocation(line: 9, scope: !4)
+!29 = !DILocation(line: 3, scope: !25, inlinedAt: !28)
+!30 = !DILocation(line: 4, scope: !26, inlinedAt: !28)
+!31 = !DILocation(line: 5, scope: !26, inlinedAt: !28)
+!32 = !DILocation(line: 10, scope: !4)
diff --git a/test/DebugInfo/Generic/inlined-arguments.ll b/test/DebugInfo/Generic/inlined-arguments.ll
new file mode 100644
index 000000000000..af4820845a03
--- /dev/null
+++ b/test/DebugInfo/Generic/inlined-arguments.ll
@@ -0,0 +1,79 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; IR generated from clang -O -g with the following source
+;
+; void f1(int x, int y);
+; void f3(int line);
+; void f2() {
+; f1(1, 2);
+; }
+; void f1(int x, int y) {
+; f3(y);
+; }
+
+; CHECK: DW_AT_name{{.*}}"f1"
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}"x"
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}"y"
+
+; Function Attrs: uwtable
+define void @_Z2f2v() #0 !dbg !4 {
+ tail call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !16, metadata !DIExpression()), !dbg !18
+ tail call void @llvm.dbg.value(metadata i32 2, i64 0, metadata !20, metadata !DIExpression()), !dbg !18
+ tail call void @_Z2f3i(i32 2), !dbg !21
+ ret void, !dbg !22
+}
+
+; Function Attrs: uwtable
+define void @_Z2f1ii(i32 %x, i32 %y) #0 !dbg !8 {
+ tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !13, metadata !DIExpression()), !dbg !23
+ tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !14, metadata !DIExpression()), !dbg !23
+ tail call void @_Z2f3i(i32 %y), !dbg !24
+ ret void, !dbg !25
+}
+
+declare void @_Z2f3i(i32) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!26}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "exp.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
+!2 = !{}
+!3 = !{!4, !8}
+!4 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "exp.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1ii", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !9, variables: !12)
+!9 = !DISubroutineType(types: !10)
+!10 = !{null, !11, !11}
+!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!12 = !{!13, !14}
+!13 = !DILocalVariable(name: "x", line: 6, arg: 1, scope: !8, file: !5, type: !11)
+!14 = !DILocalVariable(name: "y", line: 6, arg: 2, scope: !8, file: !5, type: !11)
+!15 = !{i32 undef}
+!16 = !DILocalVariable(name: "x", line: 6, arg: 1, scope: !8, file: !5, type: !11)
+!17 = !DILocation(line: 4, scope: !4)
+!18 = !DILocation(line: 6, scope: !8, inlinedAt: !17)
+!19 = !{i32 2}
+!20 = !DILocalVariable(name: "y", line: 6, arg: 2, scope: !8, file: !5, type: !11)
+!21 = !DILocation(line: 7, scope: !8, inlinedAt: !17)
+!22 = !DILocation(line: 5, scope: !4)
+!23 = !DILocation(line: 6, scope: !8)
+!24 = !DILocation(line: 7, scope: !8)
+!25 = !DILocation(line: 8, scope: !8)
+!26 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/inlined-vars.ll b/test/DebugInfo/Generic/inlined-vars.ll
new file mode 100644
index 000000000000..a294380bb5c8
--- /dev/null
+++ b/test/DebugInfo/Generic/inlined-vars.ll
@@ -0,0 +1,56 @@
+; RUN: %llc_dwarf -O0 < %s | FileCheck %s -check-prefix ARGUMENT
+; RUN: %llc_dwarf -O0 < %s | FileCheck %s -check-prefix VARIABLE
+; PR 13202
+
+define i32 @main() uwtable !dbg !5 {
+entry:
+ tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !18, metadata !DIExpression()), !dbg !21
+ tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !DIExpression()), !dbg !23
+ tail call void @smth(i32 0), !dbg !24
+ tail call void @smth(i32 0), !dbg !25
+ ret i32 0, !dbg !19
+}
+
+declare void @smth(i32)
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!27}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 159419)", isOptimized: true, emissionKind: 0, file: !26, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !{i32 0}
+!2 = !{}
+!3 = !{!5, !10}
+!5 = distinct !DISubprogram(name: "main", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !26, scope: !6, type: !7, variables: !2)
+!6 = !DIFile(filename: "inline-bug.cc", directory: "/tmp/dbginfo/pr13202")
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9}
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = distinct !DISubprogram(name: "f", linkageName: "_ZL1fi", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !26, scope: !6, type: !11, variables: !13)
+!11 = !DISubroutineType(types: !12)
+!12 = !{!9, !9}
+!13 = !{!15, !16}
+!15 = !DILocalVariable(name: "argument", line: 3, arg: 1, scope: !10, file: !6, type: !9)
+
+; Two DW_TAG_formal_parameter: one abstract and one inlined.
+; ARGUMENT: {{.*Abbrev.*DW_TAG_formal_parameter}}
+; ARGUMENT: {{.*Abbrev.*DW_TAG_formal_parameter}}
+; ARGUMENT-NOT: {{.*Abbrev.*DW_TAG_formal_parameter}}
+
+!16 = !DILocalVariable(name: "local", line: 4, scope: !10, file: !6, type: !9)
+
+; Two DW_TAG_variable: one abstract and one inlined.
+; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
+; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
+; VARIABLE-NOT: {{.*Abbrev.*DW_TAG_variable}}
+
+!18 = !DILocalVariable(name: "argument", line: 3, arg: 1, scope: !10, file: !6, type: !9)
+!19 = !DILocation(line: 11, column: 10, scope: !5)
+!21 = !DILocation(line: 3, column: 25, scope: !10, inlinedAt: !19)
+!22 = !DILocalVariable(name: "local", line: 4, scope: !10, file: !6, type: !9)
+!23 = !DILocation(line: 4, column: 16, scope: !10, inlinedAt: !19)
+!24 = !DILocation(line: 5, column: 3, scope: !10, inlinedAt: !19)
+!25 = !DILocation(line: 6, column: 3, scope: !10, inlinedAt: !19)
+!26 = !DIFile(filename: "inline-bug.cc", directory: "/tmp/dbginfo/pr13202")
+!27 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/lit.local.cfg b/test/DebugInfo/Generic/lit.local.cfg
new file mode 100644
index 000000000000..f22d4aabd730
--- /dev/null
+++ b/test/DebugInfo/Generic/lit.local.cfg
@@ -0,0 +1,3 @@
+if not config.target_triple:
+ config.unsupported = True
+
diff --git a/test/DebugInfo/Generic/location-verifier.ll b/test/DebugInfo/Generic/location-verifier.ll
new file mode 100644
index 000000000000..aa725a2369a4
--- /dev/null
+++ b/test/DebugInfo/Generic/location-verifier.ll
@@ -0,0 +1,33 @@
+; RUN: not llvm-as -disable-output -verify-debug-info < %s 2>&1 | FileCheck %s
+; ModuleID = 'test.c'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @foo() #0 !dbg !4 {
+entry:
+ ret i32 42, !dbg !13
+}
+
+attributes #0 = { nounwind ssp uwtable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "test.c", directory: "")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "test.c", directory: "")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{i32 1, !"PIC Level", i32 2}
+!12 = !{!"clang version 3.7.0 "}
+; An old-style DILocation should not pass verify.
+; CHECK: invalid !dbg metadata attachment
+!13 = !{i32 2, i32 2, !4, null}
diff --git a/test/DebugInfo/Generic/lto-comp-dir.ll b/test/DebugInfo/Generic/lto-comp-dir.ll
new file mode 100644
index 000000000000..8d5da504a82e
--- /dev/null
+++ b/test/DebugInfo/Generic/lto-comp-dir.ll
@@ -0,0 +1,84 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump -debug-dump=line - | FileCheck %s
+; RUN: %llc_dwarf < %s -filetype=asm | FileCheck --check-prefix=ASM %s
+
+; If multiple line tables are emitted, one per CU, those line tables can
+; unambiguously rely on the comp_dir of their owning CU and use directory '0'
+; to refer to it.
+
+; CHECK: .debug_line contents:
+; CHECK-NEXT: Line table prologue:
+; CHECK-NOT: include_directories
+; CHECK: file_names[ 1] 0 {{.*}} a.cpp
+; CHECK-NOT: file_names
+
+; CHECK: Line table prologue:
+; CHECK-NOT: include_directories
+; CHECK: file_names[ 1] 0 {{.*}} b.cpp
+; CHECK-NOT: file_names
+
+; However, if a single line table is emitted and shared between CUs, the
+; comp_dir is ambiguous and relying on it would lead to different path
+; interpretations depending on which CU lead to the table - so ensure that
+; full paths are always emitted in this case, never comp_dir relative.
+
+; ASM: .file 1 "/tmp/dbginfo/a{{[/\\]+}}a.cpp"
+; ASM: .file 2 "/tmp/dbginfo/b{{[/\\]+}}b.cpp"
+
+; Generated from the following source compiled to bitcode from within their
+; respective directories (with debug info) and linked together with llvm-link
+
+; a/a.cpp
+; void func() {
+; }
+
+; b/b.cpp
+; void func();
+; int main() {
+; func();
+; }
+
+; Function Attrs: nounwind uwtable
+define void @_Z4funcv() #0 !dbg !4 {
+entry:
+ ret void, !dbg !19
+}
+
+; Function Attrs: uwtable
+define i32 @main() #1 !dbg !11 {
+entry:
+ call void @_Z4funcv(), !dbg !20
+ ret i32 0, !dbg !21
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0, !8}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18, !18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo/a")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "func", linkageName: "_Z4funcv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo/a")
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !9, enums: !2, retainedTypes: !2, subprograms: !10, globals: !2, imports: !2)
+!9 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo/b")
+!10 = !{!11}
+!11 = distinct !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !9, scope: !12, type: !13, variables: !2)
+!12 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo/b")
+!13 = !DISubroutineType(types: !14)
+!14 = !{!15}
+!15 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 1, !"Debug Info Version", i32 3}
+!18 = !{!"clang version 3.5.0 "}
+!19 = !DILocation(line: 2, scope: !4)
+!20 = !DILocation(line: 3, scope: !11)
+!21 = !DILocation(line: 4, scope: !11)
+
diff --git a/test/DebugInfo/Generic/member-order.ll b/test/DebugInfo/Generic/member-order.ll
new file mode 100644
index 000000000000..55ada4f829b7
--- /dev/null
+++ b/test/DebugInfo/Generic/member-order.ll
@@ -0,0 +1,66 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; generated by clang from:
+; struct foo {
+; void f1();
+; void f2();
+; };
+;
+; void foo::f1() {
+; }
+
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}} "foo"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: NULL
+; CHECK: DW_AT_name {{.*}} "f1"
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: NULL
+; CHECK: DW_AT_name {{.*}} "f2"
+
+
+%struct.foo = type { i8 }
+
+; Function Attrs: nounwind uwtable
+define void @_ZN3foo2f1Ev(%struct.foo* %this) #0 align 2 !dbg !14 {
+entry:
+ %this.addr = alloca %struct.foo*, align 8
+ store %struct.foo* %this, %struct.foo** %this.addr, align 8
+ call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !16, metadata !DIExpression()), !dbg !18
+ %this1 = load %struct.foo*, %struct.foo** %this.addr
+ ret void, !dbg !19
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !20}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !2, imports: !2)
+!1 = !DIFile(filename: "member-order.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", line: 1, size: 8, align: 8, file: !1, elements: !5, identifier: "_ZTS3foo")
+!5 = !{!6, !11}
+!6 = !DISubprogram(name: "f1", linkageName: "_ZN3foo2f1Ev", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !4, type: !7)
+!7 = !DISubroutineType(types: !8)
+!8 = !{null, !9}
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS3foo")
+!10 = !{i32 786468}
+!11 = !DISubprogram(name: "f2", linkageName: "_ZN3foo2f2Ev", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !4, type: !7)
+!12 = !{i32 786468}
+!13 = !{!14}
+!14 = distinct !DISubprogram(name: "f1", linkageName: "_ZN3foo2f1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: null, type: !7, declaration: !6, variables: !2)
+!15 = !{i32 2, !"Dwarf Version", i32 4}
+!16 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !14, type: !17)
+!17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS3foo")
+!18 = !DILocation(line: 0, scope: !14)
+!19 = !DILocation(line: 7, scope: !14)
+!20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/member-pointers.ll b/test/DebugInfo/Generic/member-pointers.ll
new file mode 100644
index 000000000000..1570c07ddb22
--- /dev/null
+++ b/test/DebugInfo/Generic/member-pointers.ll
@@ -0,0 +1,40 @@
+; REQUIRES: object-emission
+; XFAIL: hexagon
+
+; RUN: %llc_dwarf -filetype=obj -O0 < %s > %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; CHECK: DW_TAG_ptr_to_member_type
+; CHECK: DW_TAG_ptr_to_member_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE:0x[0-9a-f]+]]})
+; CHECK: [[TYPE]]: DW_TAG_subroutine_type
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_type
+; CHECK-NEXT: DW_AT_artificial [DW_FORM_flag
+; IR generated from clang -g with the following source:
+; struct S {
+; };
+;
+; int S::*x = 0;
+; void (S::*y)(int) = 0;
+
+@x = global i64 -1, align 8
+@y = global { i64, i64 } zeroinitializer, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !15, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
+!1 = !{}
+!3 = !{!5, !10}
+!5 = !DIGlobalVariable(name: "x", line: 4, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: i64* @x)
+!6 = !DIFile(filename: "simple.cpp", directory: "/home/blaikie/Development/scratch")
+!7 = !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: !8, extraData: !9)
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", line: 1, size: 8, align: 8, file: !15, elements: !1)
+!10 = !DIGlobalVariable(name: "y", line: 5, isLocal: false, isDefinition: true, scope: null, file: !6, type: !11, variable: { i64, i64 }* @y)
+!11 = !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: !12, extraData: !9)
+!12 = !DISubroutineType(types: !13)
+!13 = !{null, !14, !8}
+!14 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !9)
+!15 = !DIFile(filename: "simple.cpp", directory: "/home/blaikie/Development/scratch")
+!16 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/missing-abstract-variable.ll b/test/DebugInfo/Generic/missing-abstract-variable.ll
new file mode 100644
index 000000000000..ee4f1666d2b7
--- /dev/null
+++ b/test/DebugInfo/Generic/missing-abstract-variable.ll
@@ -0,0 +1,182 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; The formal parameter 'b' for Function 'x' when inlined within 'a' is lost on
+; mips and powerpc64 (and on x86_64 at at least -O2). Presumably this is a
+; SelectionDAG issue (do mips/powerpc64 use FastISel?).
+; XFAIL: mips, powerpc64, s390x, sparc
+
+; Build from the following source with clang -O2.
+
+; The important details are that 'x's abstract definition is first built during
+; the definition of 'b', where the parameter to 'x' is constant and so 'x's 's'
+; variable is optimized away. No abstract definition DIE for 's' is constructed.
+; Then, during 'a' emission, the abstract DbgVariable for 's' is created, but
+; the abstract DIE isn't (since the abstract definition for 'b' is already
+; built). This results in 's' inlined in 'a' being emitted with its name, line,
+; file there, rather than referencing an abstract definition.
+
+; extern int t;
+;
+; void f(int);
+;
+; inline void x(bool b) {
+; if (b) {
+; int s = t;
+; f(s);
+; }
+; f(0);
+; }
+;
+; void b() {
+; x(false);
+; }
+;
+; void a(bool u) {
+; x(u);
+; }
+
+; CHECK: [[X_DECL:.*]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "x"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "b"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_lexical_block
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "s"
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "b"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} {[[X_DECL]]}
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} "b"
+; Notice 'x's local variable 's' is missing. Not necessarily a bug here,
+; since it's been optimized entirely away and it should be described in
+; abstract subprogram.
+; CHECK-NOT: DW_TAG
+; CHECK: NULL
+; CHECK-NOT: DW_TAG
+; CHECK: NULL
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "a"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} {[[X_DECL]]}
+; CHECK-NOT: {{DW_TAG|NULL}}
+; FIXME: This formal parameter goes missing at least at -O2 (& on
+; mips/powerpc), maybe before that. Perhaps SelectionDAG is to blame (and
+; fastisel succeeds).
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} "b"
+
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_lexical_block
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} "s"
+
+@t = external global i32
+
+; Function Attrs: uwtable
+define void @_Z1bv() #0 !dbg !4 {
+entry:
+ tail call void @llvm.dbg.value(metadata i1 false, i64 0, metadata !25, metadata !DIExpression()), !dbg !27
+ tail call void @_Z1fi(i32 0), !dbg !28
+ ret void, !dbg !29
+}
+
+; Function Attrs: uwtable
+define void @_Z1ab(i1 zeroext %u) #0 !dbg !8 {
+entry:
+ tail call void @llvm.dbg.value(metadata i1 %u, i64 0, metadata !13, metadata !DIExpression()), !dbg !30
+ tail call void @llvm.dbg.value(metadata i1 %u, i64 0, metadata !31, metadata !DIExpression()), !dbg !33
+ br i1 %u, label %if.then.i, label %_Z1xb.exit, !dbg !34
+
+if.then.i: ; preds = %entry
+ %0 = load i32, i32* @t, align 4, !dbg !35, !tbaa !36
+ tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !40, metadata !DIExpression()), !dbg !35
+ tail call void @_Z1fi(i32 %0), !dbg !41
+ br label %_Z1xb.exit, !dbg !42
+
+_Z1xb.exit: ; preds = %entry, %if.then.i
+ tail call void @_Z1fi(i32 0), !dbg !43
+ ret void, !dbg !44
+}
+
+declare void @_Z1fi(i32) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21, !22}
+!llvm.ident = !{!23}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "missing-abstract-variables.cc", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4, !8, !14}
+!4 = distinct !DISubprogram(name: "b", linkageName: "_Z1bv", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "missing-abstract-variables.cc", directory: "/tmp/dbginfo")
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = distinct !DISubprogram(name: "a", linkageName: "_Z1ab", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !1, scope: !5, type: !9, variables: !12)
+!9 = !DISubroutineType(types: !10)
+!10 = !{null, !11}
+!11 = !DIBasicType(tag: DW_TAG_base_type, name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
+!12 = !{!13}
+!13 = !DILocalVariable(name: "u", line: 17, arg: 1, scope: !8, file: !5, type: !11)
+!14 = distinct !DISubprogram(name: "x", linkageName: "_Z1xb", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !9, variables: !15)
+!15 = !{!16, !17}
+!16 = !DILocalVariable(name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
+!17 = !DILocalVariable(name: "s", line: 7, scope: !18, file: !5, type: !20)
+!18 = distinct !DILexicalBlock(line: 6, column: 0, file: !1, scope: !19)
+!19 = distinct !DILexicalBlock(line: 6, column: 0, file: !1, scope: !14)
+!20 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!21 = !{i32 2, !"Dwarf Version", i32 4}
+!22 = !{i32 2, !"Debug Info Version", i32 3}
+!23 = !{!"clang version 3.5.0 "}
+!24 = !{i1 false}
+!25 = !DILocalVariable(name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
+!26 = !DILocation(line: 14, scope: !4)
+!27 = !DILocation(line: 5, scope: !14, inlinedAt: !26)
+!28 = !DILocation(line: 10, scope: !14, inlinedAt: !26)
+!29 = !DILocation(line: 15, scope: !4)
+!30 = !DILocation(line: 17, scope: !8)
+!31 = !DILocalVariable(name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
+!32 = !DILocation(line: 18, scope: !8)
+!33 = !DILocation(line: 5, scope: !14, inlinedAt: !32)
+!34 = !DILocation(line: 6, scope: !19, inlinedAt: !32)
+!35 = !DILocation(line: 7, scope: !18, inlinedAt: !32)
+!36 = !{!37, !37, i64 0}
+!37 = !{!"int", !38, i64 0}
+!38 = !{!"omnipotent char", !39, i64 0}
+!39 = !{!"Simple C/C++ TBAA"}
+!40 = !DILocalVariable(name: "s", line: 7, scope: !18, file: !5, type: !20)
+!41 = !DILocation(line: 8, scope: !18, inlinedAt: !32)
+!42 = !DILocation(line: 9, scope: !18, inlinedAt: !32)
+!43 = !DILocation(line: 10, scope: !14, inlinedAt: !32)
+!44 = !DILocation(line: 19, scope: !8)
diff --git a/test/DebugInfo/Generic/multiline.ll b/test/DebugInfo/Generic/multiline.ll
new file mode 100644
index 000000000000..7740bb6918a2
--- /dev/null
+++ b/test/DebugInfo/Generic/multiline.ll
@@ -0,0 +1,82 @@
+; RUN: llc -filetype=asm -asm-verbose=0 -O0 < %s | FileCheck %s
+; RUN: llc -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=line - | FileCheck %s --check-prefix=INT
+; XFAIL: hexagon
+
+; Check that the assembly output properly handles is_stmt changes. And since
+; we're testing anyway, check the integrated assembler too.
+
+; Generated with clang from multiline.c:
+; void f1();
+; void f2() {
+; f1(); f1(); f1();
+; f1(); f1(); f1();
+; }
+
+
+; CHECK: .loc 1 2 0{{$}}
+; CHECK-NOT: .loc{{ }}
+; CHECK: .loc 1 3 3 prologue_end{{$}}
+; CHECK-NOT: .loc
+; CHECK: .loc 1 3 9 is_stmt 0{{$}}
+; CHECK-NOT: .loc
+; CHECK: .loc 1 3 15{{$}}
+; CHECK-NOT: .loc
+; CHECK: .loc 1 4 3 is_stmt 1{{$}}
+; CHECK-NOT: .loc
+; CHECK: .loc 1 4 9 is_stmt 0{{$}}
+; CHECK-NOT: .loc
+; CHECK: .loc 1 4 15{{$}}
+; CHECK-NOT: .loc
+; CHECK: .loc 1 5 1 is_stmt 1{{$}}
+
+; INT: {{^}}Address
+; INT: -----
+; INT-NEXT: 2 0 1 0 0 is_stmt{{$}}
+; INT-NEXT: 3 3 1 0 0 is_stmt prologue_end{{$}}
+; INT-NEXT: 3 9 1 0 0 {{$}}
+; INT-NEXT: 3 15 1 0 0 {{$}}
+; INT-NEXT: 4 3 1 0 0 is_stmt{{$}}
+; INT-NEXT: 4 9 1 0 0 {{$}}
+; INT-NEXT: 4 15 1 0 0 {{$}}
+; INT-NEXT: 5 1 1 0 0 is_stmt{{$}}
+
+
+; Function Attrs: nounwind uwtable
+define void @f2() #0 !dbg !4 {
+entry:
+ call void (...) @f1(), !dbg !11
+ call void (...) @f1(), !dbg !12
+ call void (...) @f1(), !dbg !13
+ call void (...) @f1(), !dbg !14
+ call void (...) @f1(), !dbg !15
+ call void (...) @f1(), !dbg !16
+ ret void, !dbg !17
+}
+
+declare void @f1(...) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 225000) (llvm/trunk 224999)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "multiline.c", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "multiline.c", directory: "/tmp/dbginfo")
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.6.0 (trunk 225000) (llvm/trunk 224999)"}
+!11 = !DILocation(line: 3, column: 3, scope: !4)
+!12 = !DILocation(line: 3, column: 9, scope: !4)
+!13 = !DILocation(line: 3, column: 15, scope: !4)
+!14 = !DILocation(line: 4, column: 3, scope: !4)
+!15 = !DILocation(line: 4, column: 9, scope: !4)
+!16 = !DILocation(line: 4, column: 15, scope: !4)
+!17 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/DebugInfo/Generic/namespace.ll b/test/DebugInfo/Generic/namespace.ll
new file mode 100644
index 000000000000..e446806249f9
--- /dev/null
+++ b/test/DebugInfo/Generic/namespace.ll
@@ -0,0 +1,365 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj -dwarf-linkage-names=Enable < %s | llvm-dwarfdump - | FileCheck %s
+; CHECK: debug_info contents
+; CHECK: [[NS1:0x[0-9a-f]*]]:{{ *}}DW_TAG_namespace
+; CHECK-NEXT: DW_AT_name{{.*}} = "A"
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F1:".*debug-info-namespace.cpp"]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(5)
+; CHECK-NOT: NULL
+; CHECK: [[NS2:0x[0-9a-f]*]]:{{ *}}DW_TAG_namespace
+; CHECK-NEXT: DW_AT_name{{.*}} = "B"
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2:".*foo.cpp"]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(1)
+; CHECK-NOT: NULL
+; CHECK: [[I:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable
+; CHECK-NEXT: DW_AT_name{{.*}}= "i"
+; CHECK: [[VAR_FWD:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable
+; CHECK-NEXT: DW_AT_name{{.*}}= "var_fwd"
+; CHECK-NOT: NULL
+; CHECK: [[FOO:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name{{.*}}= "foo"
+; CHECK-NEXT: DW_AT_declaration
+; CHECK-NOT: NULL
+; CHECK: [[BAR:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name{{.*}}= "bar"
+; CHECK: [[FUNC1:.*]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_MIPS_linkage_name
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "f1"
+; CHECK: [[BAZ:0x[0-9a-f]*]]:{{.*}}DW_TAG_typedef
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "baz"
+; CHECK: [[VAR_DECL:0x[0-9a-f]*]]:{{.*}}DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "var_decl"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_declaration
+; CHECK: [[FUNC_DECL:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "func_decl"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_declaration
+; CHECK: [[FUNC_FWD:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "func_fwd"
+; CHECK-NOT: DW_AT_declaration
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_MIPS_linkage_name
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "f1"
+; CHECK: NULL
+
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_module
+; This is a bug, it should be in F2 but it inherits the file from its
+; enclosing scope
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F1]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(15)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS2]]})
+; CHECK: NULL
+; CHECK-NOT: NULL
+
+; CHECK: DW_TAG_imported_module
+; Same bug as above, this should be F2, not F1
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F1]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(18)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
+; CHECK-NOT: NULL
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_MIPS_linkage_name
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "func"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_module
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(26)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(27)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FOO]]})
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(28)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[BAR]]})
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(29)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FUNC1]]})
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(30)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[I]]})
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(31)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[BAZ]]})
+; CHECK-NOT: NULL
+; CHECK: [[X:0x[0-9a-f]*]]:{{ *}}DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(32)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
+; CHECK-NEXT: DW_AT_name{{.*}}"X"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(33)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[X]]})
+; CHECK-NEXT: DW_AT_name{{.*}}"Y"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(34)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[VAR_DECL]]})
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(35)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FUNC_DECL]]})
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(36)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[VAR_FWD]]})
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(37)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FUNC_FWD]]})
+
+; CHECK: DW_TAG_lexical_block
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_module
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(23)
+; CHECK-NEXT: DW_AT_import{{.*}}=>
+; CHECK: NULL
+; CHECK: NULL
+; CHECK: NULL
+
+; IR generated from clang/test/CodeGenCXX/debug-info-namespace.cpp, file paths
+; changed to protect the guilty. The C++ source code is:
+; // RUN...
+; // RUN...
+; // RUN...
+;
+; namespace A {
+; #line 1 "foo.cpp"
+; namespace B {
+; extern int i;
+; int f1() { return 0; }
+; void f1(int) { }
+; struct foo;
+; struct bar { };
+; typedef bar baz;
+; extern int var_decl;
+; void func_decl(void);
+; extern int var_fwd;
+; void func_fwd(void);
+; }
+; }
+; namespace A {
+; using namespace B;
+; }
+;
+; using namespace A;
+; namespace E = A;
+; int B::i = f1();
+; int func(bool b) {
+; if (b) {
+; using namespace A::B;
+; return i;
+; }
+; using namespace A;
+; using B::foo;
+; using B::bar;
+; using B::f1;
+; using B::i;
+; using B::baz;
+; namespace X = A;
+; namespace Y = X;
+; using B::var_decl;
+; using B::func_decl;
+; using B::var_fwd;
+; using B::func_fwd;
+; return i + X::B::i + Y::B::i;
+; }
+;
+; namespace A {
+; using B::i;
+; namespace B {
+; int var_fwd = i;
+; }
+; }
+; void B::func_fwd() {}
+
+@_ZN1A1B1iE = global i32 0, align 4
+@_ZN1A1B7var_fwdE = global i32 0, align 4
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_debug_info_namespace.cpp, i8* null }]
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @_ZN1A1B2f1Ev() #0 !dbg !10 {
+entry:
+ ret i32 0, !dbg !60
+}
+
+; Function Attrs: nounwind ssp uwtable
+define void @_ZN1A1B2f1Ei(i32) #0 !dbg !14 {
+entry:
+ %.addr = alloca i32, align 4
+ store i32 %0, i32* %.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %.addr, metadata !61, metadata !62), !dbg !63
+ ret void, !dbg !64
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" !dbg !17 {
+entry:
+ %call = call i32 @_ZN1A1B2f1Ev(), !dbg !65
+ store i32 %call, i32* @_ZN1A1B1iE, align 4, !dbg !65
+ ret void, !dbg !65
+}
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @_Z4funcb(i1 zeroext %b) #0 !dbg !21 {
+entry:
+ %retval = alloca i32, align 4
+ %b.addr = alloca i8, align 1
+ %frombool = zext i1 %b to i8
+ store i8 %frombool, i8* %b.addr, align 1
+ call void @llvm.dbg.declare(metadata i8* %b.addr, metadata !66, metadata !62), !dbg !67
+ %0 = load i8, i8* %b.addr, align 1, !dbg !68
+ %tobool = trunc i8 %0 to i1, !dbg !68
+ br i1 %tobool, label %if.then, label %if.end, !dbg !68
+
+if.then: ; preds = %entry
+ %1 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !69
+ store i32 %1, i32* %retval, !dbg !69
+ br label %return, !dbg !69
+
+if.end: ; preds = %entry
+ %2 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !70
+ %3 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !70
+ %add = add nsw i32 %2, %3, !dbg !70
+ %4 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !70
+ %add1 = add nsw i32 %add, %4, !dbg !70
+ store i32 %add1, i32* %retval, !dbg !70
+ br label %return, !dbg !70
+
+return: ; preds = %if.end, %if.then
+ %5 = load i32, i32* %retval, !dbg !71
+ ret i32 %5, !dbg !71
+}
+
+define internal void @__cxx_global_var_init1() section "__TEXT,__StaticInit,regular,pure_instructions" !dbg !25 {
+entry:
+ %0 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !72
+ store i32 %0, i32* @_ZN1A1B7var_fwdE, align 4, !dbg !72
+ ret void, !dbg !72
+}
+
+; Function Attrs: nounwind ssp uwtable
+define void @_ZN1A1B8func_fwdEv() #0 !dbg !26 {
+entry:
+ ret void, !dbg !73
+}
+
+define internal void @_GLOBAL__sub_I_debug_info_namespace.cpp() section "__TEXT,__StaticInit,regular,pure_instructions" {
+entry:
+ call void @__cxx_global_var_init(), !dbg !74
+ call void @__cxx_global_var_init1(), !dbg !74
+ ret void, !dbg !74
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!57, !58}
+!llvm.ident = !{!59}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !30, imports: !33)
+!1 = !DIFile(filename: "debug-info-namespace.cpp", directory: "/tmp")
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", line: 5, flags: DIFlagFwdDecl, file: !5, scope: !6, identifier: "_ZTSN1A1B3fooE")
+!5 = !DIFile(filename: "foo.cpp", directory: "/tmp")
+!6 = !DINamespace(name: "B", line: 1, file: !5, scope: !7)
+!7 = !DINamespace(name: "A", line: 5, file: !1, scope: null)
+!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "bar", line: 6, size: 8, align: 8, file: !5, scope: !6, elements: !2, identifier: "_ZTSN1A1B3barE")
+!9 = !{!10, !14, !17, !21, !25, !26, !27}
+!10 = distinct !DISubprogram(name: "f1", linkageName: "_ZN1A1B2f1Ev", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !11, variables: !2)
+!11 = !DISubroutineType(types: !12)
+!12 = !{!13}
+!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!14 = distinct !DISubprogram(name: "f1", linkageName: "_ZN1A1B2f1Ei", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !15, variables: !2)
+!15 = !DISubroutineType(types: !16)
+!16 = !{null, !13}
+!17 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 20, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !5, scope: !18, type: !19, variables: !2)
+!18 = !DIFile(filename: "foo.cpp", directory: "/tmp")
+!19 = !DISubroutineType(types: !20)
+!20 = !{null}
+!21 = distinct !DISubprogram(name: "func", linkageName: "_Z4funcb", line: 21, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 21, file: !5, scope: !18, type: !22, variables: !2)
+!22 = !DISubroutineType(types: !23)
+!23 = !{!13, !24}
+!24 = !DIBasicType(tag: DW_TAG_base_type, name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
+!25 = distinct !DISubprogram(name: "__cxx_global_var_init1", line: 44, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 44, file: !5, scope: !18, type: !19, variables: !2)
+!26 = distinct !DISubprogram(name: "func_fwd", linkageName: "_ZN1A1B8func_fwdEv", line: 47, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 47, file: !5, scope: !6, type: !19, variables: !2)
+!27 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_debug_info_namespace.cpp", isLocal: true, isDefinition: true, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !28, type: !29, variables: !2)
+!28 = !DIFile(filename: "debug-info-namespace.cpp", directory: "/tmp")
+!29 = !DISubroutineType(types: !2)
+!30 = !{!31, !32}
+!31 = !DIGlobalVariable(name: "i", linkageName: "_ZN1A1B1iE", line: 20, isLocal: false, isDefinition: true, scope: !6, file: !18, type: !13, variable: i32* @_ZN1A1B1iE)
+!32 = !DIGlobalVariable(name: "var_fwd", linkageName: "_ZN1A1B7var_fwdE", line: 44, isLocal: false, isDefinition: true, scope: !6, file: !18, type: !13, variable: i32* @_ZN1A1B7var_fwdE)
+!33 = !{!34, !35, !36, !37, !40, !41, !42, !43, !44, !45, !47, !48, !49, !51, !54, !55, !56}
+!34 = !DIImportedEntity(tag: DW_TAG_imported_module, line: 15, scope: !7, entity: !6)
+!35 = !DIImportedEntity(tag: DW_TAG_imported_module, line: 18, scope: !0, entity: !7)
+!36 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 19, name: "E", scope: !0, entity: !7)
+!37 = !DIImportedEntity(tag: DW_TAG_imported_module, line: 23, scope: !38, entity: !6)
+!38 = distinct !DILexicalBlock(line: 22, column: 10, file: !5, scope: !39)
+!39 = distinct !DILexicalBlock(line: 22, column: 7, file: !5, scope: !21)
+!40 = !DIImportedEntity(tag: DW_TAG_imported_module, line: 26, scope: !21, entity: !7)
+!41 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 27, scope: !21, entity: !"_ZTSN1A1B3fooE")
+!42 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 28, scope: !21, entity: !"_ZTSN1A1B3barE")
+!43 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 29, scope: !21, entity: !14)
+!44 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 30, scope: !21, entity: !31)
+!45 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 31, scope: !21, entity: !46)
+!46 = !DIDerivedType(tag: DW_TAG_typedef, name: "baz", line: 7, file: !5, scope: !6, baseType: !"_ZTSN1A1B3barE")
+!47 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 32, name: "X", scope: !21, entity: !7)
+!48 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 33, name: "Y", scope: !21, entity: !47)
+!49 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 34, scope: !21, entity: !50)
+!50 = !DIGlobalVariable(name: "var_decl", linkageName: "_ZN1A1B8var_declE", line: 8, isLocal: false, isDefinition: false, scope: !6, file: !18, type: !13)
+!51 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 35, scope: !21, entity: !52)
+!52 = !DISubprogram(name: "func_decl", linkageName: "_ZN1A1B9func_declEv", line: 9, isLocal: false, isDefinition: false, flags: DIFlagPrototyped, isOptimized: false, file: !5, scope: !6, type: !19, variables: !53)
+!53 = !{} ; previously: invalid DW_TAG_base_type
+!54 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 36, scope: !21, entity: !32)
+!55 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 37, scope: !21, entity: !26)
+!56 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 42, scope: !7, entity: !31)
+!57 = !{i32 2, !"Dwarf Version", i32 2}
+!58 = !{i32 2, !"Debug Info Version", i32 3}
+!59 = !{!"clang version 3.6.0 "}
+!60 = !DILocation(line: 3, column: 12, scope: !10)
+!61 = !DILocalVariable(name: "", line: 4, arg: 1, scope: !14, file: !18, type: !13)
+!62 = !DIExpression()
+!63 = !DILocation(line: 4, column: 12, scope: !14)
+!64 = !DILocation(line: 4, column: 16, scope: !14)
+!65 = !DILocation(line: 20, column: 12, scope: !17)
+!66 = !DILocalVariable(name: "b", line: 21, arg: 1, scope: !21, file: !18, type: !24)
+!67 = !DILocation(line: 21, column: 15, scope: !21)
+!68 = !DILocation(line: 22, column: 7, scope: !21)
+!69 = !DILocation(line: 24, column: 5, scope: !38)
+!70 = !DILocation(line: 38, column: 3, scope: !21)
+!71 = !DILocation(line: 39, column: 1, scope: !21)
+!72 = !DILocation(line: 44, column: 15, scope: !25)
+!73 = !DILocation(line: 47, column: 21, scope: !26)
+!74 = !DILocation(line: 0, scope: !75)
+!75 = !DILexicalBlockFile(discriminator: 0, file: !5, scope: !27)
diff --git a/test/DebugInfo/Generic/namespace_function_definition.ll b/test/DebugInfo/Generic/namespace_function_definition.ll
new file mode 100644
index 000000000000..58a144fb1189
--- /dev/null
+++ b/test/DebugInfo/Generic/namespace_function_definition.ll
@@ -0,0 +1,44 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj -dwarf-linkage-names=Enable < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Generated from clang with the following source:
+; namespace ns {
+; void func() {
+; }
+; }
+
+; CHECK: DW_TAG_namespace
+; CHECK-NEXT: DW_AT_name {{.*}} "ns"
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_low_pc
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_linkage_name {{.*}} "_ZN2ns4funcEv"
+; CHECK: NULL
+; CHECK: NULL
+
+; Function Attrs: nounwind uwtable
+define void @_ZN2ns4funcEv() #0 !dbg !4 {
+entry:
+ ret void, !dbg !11
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "namespace_function_definition.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "func", linkageName: "_ZN2ns4funcEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DINamespace(name: "ns", line: 1, file: !1, scope: null)
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5.0 "}
+!11 = !DILocation(line: 3, scope: !4)
diff --git a/test/DebugInfo/Generic/namespace_inline_function_definition.ll b/test/DebugInfo/Generic/namespace_inline_function_definition.ll
new file mode 100644
index 000000000000..cca5e19db445
--- /dev/null
+++ b/test/DebugInfo/Generic/namespace_inline_function_definition.ll
@@ -0,0 +1,95 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj -dwarf-linkage-names=Enable < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Generate from clang with the following source. Note that the definition of
+; the inline function follows its use to workaround another bug that should be
+; fixed soon.
+; namespace ns {
+; int func(int i);
+; }
+; extern int x;
+; int main() { return ns::func(x); }
+; int __attribute__((always_inline)) ns::func(int i) { return i * 2; }
+
+; CHECK: DW_TAG_namespace
+; CHECK-NEXT: DW_AT_name {{.*}} "ns"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_linkage_name {{.*}} "_ZN2ns4funcEi"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_formal_parameter
+; CHECK: NULL
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} "_ZN2ns4funcEi"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_abstract_origin {{.*}} "i"
+; CHECK: NULL
+; CHECK: NULL
+; CHECK: NULL
+
+@x = external global i32
+
+; Function Attrs: uwtable
+define i32 @main() #0 !dbg !4 {
+entry:
+ %i.addr.i = alloca i32, align 4
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = load i32, i32* @x, align 4, !dbg !16
+ store i32 %0, i32* %i.addr.i, align 4
+ call void @llvm.dbg.declare(metadata i32* %i.addr.i, metadata !117, metadata !DIExpression()), !dbg !18
+ %1 = load i32, i32* %i.addr.i, align 4, !dbg !18
+ %mul.i = mul nsw i32 %1, 2, !dbg !18
+ ret i32 %mul.i, !dbg !16
+}
+
+; Function Attrs: alwaysinline nounwind uwtable
+define i32 @_ZN2ns4funcEi(i32 %i) #1 !dbg !9 {
+entry:
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !17, metadata !DIExpression()), !dbg !19
+ %0 = load i32, i32* %i.addr, align 4, !dbg !19
+ %mul = mul nsw i32 %0, 2, !dbg !19
+ ret i32 %mul, !dbg !19
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "namespace_inline_function_definition.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4, !9}
+!4 = distinct !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "namespace_inline_function_definition.cpp", directory: "/tmp/dbginfo")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = distinct !DISubprogram(name: "func", linkageName: "_ZN2ns4funcEi", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !10, type: !11, variables: !2)
+!10 = !DINamespace(name: "ns", line: 1, file: !1, scope: null)
+!11 = !DISubroutineType(types: !12)
+!12 = !{!8, !8}
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.5.0 "}
+!16 = !DILocation(line: 5, scope: !4)
+!17 = !DILocalVariable(name: "i", line: 6, arg: 1, scope: !9, file: !5, type: !8)
+
+!117 = !DILocalVariable(name: "i", line: 6, arg: 1, scope: !9, file: !5, type: !8)
+
+!18 = !DILocation(line: 6, scope: !9, inlinedAt: !16)
+!19 = !DILocation(line: 6, scope: !9)
diff --git a/test/DebugInfo/Generic/nodebug.ll b/test/DebugInfo/Generic/nodebug.ll
new file mode 100644
index 000000000000..6f20aecaaf5e
--- /dev/null
+++ b/test/DebugInfo/Generic/nodebug.ll
@@ -0,0 +1,51 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Test that a nodebug function (a function not appearing in the debug info IR
+; metadata subprogram list) with DebugLocs on its IR doesn't cause crashes/does
+; the right thing.
+
+; Build with clang from the following:
+; extern int i;
+; inline __attribute__((always_inline)) void f1() {
+; i = 3;
+; }
+;
+; __attribute__((nodebug)) void f2() {
+; f1();
+; }
+
+; Check that there's only one DW_TAG_subprogram, nothing for the 'f2' function.
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "f1"
+; CHECK-NOT: DW_TAG_subprogram
+
+@i = external global i32
+
+; Function Attrs: uwtable
+define void @_Z2f2v() #0 {
+entry:
+ store i32 3, i32* @i, align 4, !dbg !11
+ ret void
+}
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "nodebug.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "nodebug.cpp", directory: "/tmp/dbginfo")
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5.0 "}
+!11 = !DILocation(line: 3, scope: !4)
diff --git a/test/DebugInfo/Generic/piece-verifier.ll b/test/DebugInfo/Generic/piece-verifier.ll
new file mode 100644
index 000000000000..e1f5c24a21dc
--- /dev/null
+++ b/test/DebugInfo/Generic/piece-verifier.ll
@@ -0,0 +1,56 @@
+; RUN: not llvm-as -disable-output < %s 2>&1 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @foo(i64 %s.coerce0, i32 %s.coerce1) #0 !dbg !4 {
+entry:
+ call void @llvm.dbg.value(metadata i64 %s.coerce0, i64 0, metadata !20, metadata !24), !dbg !21
+ call void @llvm.dbg.value(metadata i32 %s.coerce1, i64 0, metadata !22, metadata !27), !dbg !21
+ ret i32 %s.coerce1, !dbg !23
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "pieces.c", directory: "")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !15)
+!5 = !DIFile(filename: "pieces.c", directory: "")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8, !9}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DIDerivedType(tag: DW_TAG_typedef, name: "S", line: 1, file: !1, baseType: !10)
+!10 = !DICompositeType(tag: DW_TAG_structure_type, line: 1, size: 128, align: 64, file: !1, elements: !11)
+!11 = !{!12, !14}
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 1, size: 64, align: 64, file: !1, scope: !10, baseType: !13)
+!13 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!14 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 1, size: 32, align: 32, offset: 64, file: !1, scope: !10, baseType: !8)
+!15 = !{!16}
+!16 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 1, !"Debug Info Version", i32 3}
+!19 = !{!"clang version 3.5 "}
+!20 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!21 = !DILocation(line: 3, scope: !4)
+!22 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!23 = !DILocation(line: 4, scope: !4)
+!24 = !DIExpression(DW_OP_deref, DW_OP_bit_piece, 0, 64)
+!25 = !{}
+; This expression has elements after DW_OP_bit_piece.
+; CHECK: invalid expression
+; CHECK-NEXT: !DIExpression({{[0-9]+}}, 64, 32, {{[0-9]+}})
+; CHECK-NOT: invalid expression
+!27 = !DIExpression(DW_OP_bit_piece, 64, 32, DW_OP_deref)
diff --git a/test/DebugInfo/Generic/ptrsize.ll b/test/DebugInfo/Generic/ptrsize.ll
new file mode 100755
index 000000000000..dfdcb5c0e79f
--- /dev/null
+++ b/test/DebugInfo/Generic/ptrsize.ll
@@ -0,0 +1,47 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Check that pointers and references get emitted without size information in
+; DWARF, even if they are so specified in the IR
+
+; CHECK: 0x[[O1:[0-9a-f]+]]: DW_TAG_pointer_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
+; CHECK-NOT: DW_AT_byte_size
+; CHECK: 0x[[O2:[0-9a-f]+]]: DW_TAG_
+
+; CHECK: 0x[[O3:[0-9a-f]+]]: DW_TAG_reference_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
+; CHECK-NOT: DW_AT_byte_size
+
+define i32 @foo() !dbg !4 {
+entry:
+ ret i32 0, !dbg !13
+}
+
+define i32 @bar() !dbg !5 {
+entry:
+ ret i32 0, !dbg !16
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 0, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "dwarf-test.c", directory: "test")
+!2 = !{}
+!3 = !{!4, !5}
+!4 = distinct !DISubprogram(name: "foo", scope: !0, file: !1, line: 6, type: !6, isLocal: false, isDefinition: true, scopeLine: 6, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = distinct !DISubprogram(name: "bar", scope: !0, file: !1, line: 6, type: !15, isLocal: false, isDefinition: true, scopeLine: 6, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!6 = !DISubroutineType(types: !7)
+!7 = !{!9}
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, scope: !0, baseType: !8, size: 64, align: 64)
+!10 = !DIDerivedType(tag: DW_TAG_reference_type, scope: !0, baseType: !8, size: 64, align: 64)
+!11 = !{i32 2, !"Dwarf Version", i32 3}
+!12 = !{i32 1, !"Debug Info Version", i32 3}
+!13 = !DILocation(line: 7, scope: !4)
+!14 = !{!10}
+!15 = !DISubroutineType(types: !14)
+!16 = !DILocation(line: 7, scope: !5)
diff --git a/test/DebugInfo/Generic/recursive_inlining.ll b/test/DebugInfo/Generic/recursive_inlining.ll
new file mode 100644
index 000000000000..02c147e7aa68
--- /dev/null
+++ b/test/DebugInfo/Generic/recursive_inlining.ll
@@ -0,0 +1,275 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; This isn't a very pretty test case - I imagine there might be other ways to
+; tickle the optimizers into producing the desired code, but I haven't found
+; them.
+
+; The issue is when a function is inlined into itself, the inlined argument
+; accidentally overwrote the concrete argument and was lost.
+
+; IR generated from the following source compiled with clang -g:
+; void fn1(void *);
+; void fn2(int, int, int, int);
+; void fn3();
+; void fn8();
+; struct C {
+; int b;
+; void m_fn2() {
+; fn8();
+; if (b) fn2(0, 0, 0, 0);
+; fn3();
+; }
+; };
+; C *x;
+; inline void fn7() {}
+; void fn6() {
+; fn8();
+; x->m_fn2();
+; fn7();
+; }
+; void fn3() { fn6(); }
+; void fn4() { x->m_fn2(); }
+; void fn5() { x->m_fn2(); }
+
+; The definition of C and declaration of C::m_fn2
+; CHECK: DW_TAG_structure_type
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_member
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[M_FN2_DECL:.*]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "m_fn2"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_formal_parameter
+
+; The abstract definition of C::m_fn2
+; CHECK: [[M_FN2_ABS_DEF:.*]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_specification {{.*}} {[[M_FN2_DECL]]}
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_inline
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[M_FN2_THIS_ABS_DEF:.*]]: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "this"
+
+; Skip some other functions
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
+
+; The concrete definition of C::m_fn2
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} {[[M_FN2_ABS_DEF]]}
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} {[[M_FN2_THIS_ABS_DEF]]}
+; CHECK-NOT: {{DW_TAG|NULL}}
+; Inlined fn3:
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NOT: {{DW_TAG|NULL}}
+; Inlined fn6:
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NOT: {{DW_TAG|NULL}}
+; Inlined C::m_fn2:
+; CHECK: DW_TAG_inlined_subroutine
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} {[[M_FN2_ABS_DEF]]}
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_abstract_origin {{.*}} {[[M_FN2_THIS_ABS_DEF]]}
+
+
+
+%struct.C = type { i32 }
+
+@x = global %struct.C* null, align 8
+
+; Function Attrs: nounwind
+define void @_Z3fn6v() #0 !dbg !14 {
+entry:
+ tail call void @_Z3fn8v() #3, !dbg !31
+ %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !32, !tbaa !33
+ tail call void @llvm.dbg.value(metadata %struct.C* %0, i64 0, metadata !37, metadata !DIExpression()) #3, !dbg !38
+ tail call void @_Z3fn8v() #3, !dbg !39
+ %b.i = getelementptr inbounds %struct.C, %struct.C* %0, i64 0, i32 0, !dbg !40
+ %1 = load i32, i32* %b.i, align 4, !dbg !40, !tbaa !42
+ %tobool.i = icmp eq i32 %1, 0, !dbg !40
+ br i1 %tobool.i, label %_ZN1C5m_fn2Ev.exit, label %if.then.i, !dbg !40
+
+if.then.i: ; preds = %entry
+ tail call void @_Z3fn2iiii(i32 0, i32 0, i32 0, i32 0) #3, !dbg !45
+ br label %_ZN1C5m_fn2Ev.exit, !dbg !45
+
+_ZN1C5m_fn2Ev.exit: ; preds = %entry, %if.then.i
+ tail call void @_Z3fn3v() #3, !dbg !47
+ ret void, !dbg !48
+}
+
+declare void @_Z3fn8v() #1
+
+; Function Attrs: nounwind
+define linkonce_odr void @_ZN1C5m_fn2Ev(%struct.C* nocapture readonly %this) #0 align 2 !dbg !22 {
+entry:
+ tail call void @llvm.dbg.value(metadata %struct.C* %this, i64 0, metadata !24, metadata !DIExpression()), !dbg !49
+ tail call void @_Z3fn8v() #3, !dbg !50
+ %b = getelementptr inbounds %struct.C, %struct.C* %this, i64 0, i32 0, !dbg !51
+ %0 = load i32, i32* %b, align 4, !dbg !51, !tbaa !42
+ %tobool = icmp eq i32 %0, 0, !dbg !51
+ br i1 %tobool, label %if.end, label %if.then, !dbg !51
+
+if.then: ; preds = %entry
+ tail call void @_Z3fn2iiii(i32 0, i32 0, i32 0, i32 0) #3, !dbg !52
+ br label %if.end, !dbg !52
+
+if.end: ; preds = %entry, %if.then
+ tail call void @_Z3fn8v() #3, !dbg !53
+ %1 = load %struct.C*, %struct.C** @x, align 8, !dbg !56, !tbaa !33
+ tail call void @llvm.dbg.value(metadata %struct.C* %1, i64 0, metadata !57, metadata !DIExpression()) #3, !dbg !58
+ tail call void @_Z3fn8v() #3, !dbg !59
+ %b.i.i = getelementptr inbounds %struct.C, %struct.C* %1, i64 0, i32 0, !dbg !60
+ %2 = load i32, i32* %b.i.i, align 4, !dbg !60, !tbaa !42
+ %tobool.i.i = icmp eq i32 %2, 0, !dbg !60
+ br i1 %tobool.i.i, label %_Z3fn6v.exit, label %if.then.i.i, !dbg !60
+
+if.then.i.i: ; preds = %if.end
+ tail call void @_Z3fn2iiii(i32 0, i32 0, i32 0, i32 0) #3, !dbg !61
+ br label %_Z3fn6v.exit, !dbg !61
+
+_Z3fn6v.exit: ; preds = %if.end, %if.then.i.i
+ tail call void @_Z3fn3v() #3, !dbg !62
+ ret void, !dbg !63
+}
+
+; Function Attrs: nounwind
+define void @_Z3fn3v() #0 !dbg !18 {
+entry:
+ br label %tailrecurse
+
+tailrecurse: ; preds = %tailrecurse.backedge, %entry
+ tail call void @_Z3fn8v() #3, !dbg !64
+ %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !66, !tbaa !33
+ tail call void @llvm.dbg.value(metadata %struct.C* %0, i64 0, metadata !67, metadata !DIExpression()) #3, !dbg !68
+ tail call void @_Z3fn8v() #3, !dbg !69
+ %b.i.i = getelementptr inbounds %struct.C, %struct.C* %0, i64 0, i32 0, !dbg !70
+ %1 = load i32, i32* %b.i.i, align 4, !dbg !70, !tbaa !42
+ %tobool.i.i = icmp eq i32 %1, 0, !dbg !70
+ br i1 %tobool.i.i, label %tailrecurse.backedge, label %if.then.i.i, !dbg !70
+
+tailrecurse.backedge: ; preds = %tailrecurse, %if.then.i.i
+ br label %tailrecurse
+
+if.then.i.i: ; preds = %tailrecurse
+ tail call void @_Z3fn2iiii(i32 0, i32 0, i32 0, i32 0) #3, !dbg !71
+ br label %tailrecurse.backedge, !dbg !71
+}
+
+; Function Attrs: nounwind
+define void @_Z3fn4v() #0 !dbg !19 {
+entry:
+ %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !72, !tbaa !33
+ tail call void @_ZN1C5m_fn2Ev(%struct.C* %0), !dbg !72
+ ret void, !dbg !72
+}
+
+; Function Attrs: nounwind
+define void @_Z3fn5v() #0 !dbg !20 {
+entry:
+ %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !73, !tbaa !33
+ tail call void @_ZN1C5m_fn2Ev(%struct.C* %0), !dbg !73
+ ret void, !dbg !73
+}
+
+declare void @_Z3fn2iiii(i32, i32, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!28, !29}
+!llvm.ident = !{!30}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !26, imports: !2)
+!1 = !DIFile(filename: "<stdin>", directory: "/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 5, size: 32, align: 32, file: !5, elements: !6, identifier: "_ZTS1C")
+!5 = !DIFile(filename: "recursive_inlining.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce")
+!6 = !{!7, !9}
+!7 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 6, size: 32, align: 32, file: !5, scope: !"_ZTS1C", baseType: !8)
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DISubprogram(name: "m_fn2", linkageName: "_ZN1C5m_fn2Ev", line: 7, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !5, scope: !"_ZTS1C", type: !10)
+!10 = !DISubroutineType(types: !11)
+!11 = !{null, !12}
+!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1C")
+!13 = !{!14, !18, !19, !20, !21, !22}
+!14 = distinct !DISubprogram(name: "fn6", linkageName: "_Z3fn6v", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 15, file: !5, scope: !15, type: !16, variables: !2)
+!15 = !DIFile(filename: "recursive_inlining.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce")
+!16 = !DISubroutineType(types: !17)
+!17 = !{null}
+!18 = distinct !DISubprogram(name: "fn3", linkageName: "_Z3fn3v", line: 20, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 20, file: !5, scope: !15, type: !16, variables: !2)
+!19 = distinct !DISubprogram(name: "fn4", linkageName: "_Z3fn4v", line: 21, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 21, file: !5, scope: !15, type: !16, variables: !2)
+!20 = distinct !DISubprogram(name: "fn5", linkageName: "_Z3fn5v", line: 22, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 22, file: !5, scope: !15, type: !16, variables: !2)
+!21 = distinct !DISubprogram(name: "fn7", linkageName: "_Z3fn7v", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !5, scope: !15, type: !16, variables: !2)
+!22 = distinct !DISubprogram(name: "m_fn2", linkageName: "_ZN1C5m_fn2Ev", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !5, scope: !"_ZTS1C", type: !10, declaration: !9, variables: !23)
+!23 = !{!24}
+!24 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
+!25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1C")
+!26 = !{!27}
+!27 = !DIGlobalVariable(name: "x", line: 13, isLocal: false, isDefinition: true, scope: null, file: !15, type: !25, variable: %struct.C** @x)
+!28 = !{i32 2, !"Dwarf Version", i32 4}
+!29 = !{i32 2, !"Debug Info Version", i32 3}
+!30 = !{!"clang version 3.6.0 "}
+!31 = !DILocation(line: 16, scope: !14)
+!32 = !DILocation(line: 17, scope: !14)
+!33 = !{!34, !34, i64 0}
+!34 = !{!"any pointer", !35, i64 0}
+!35 = !{!"omnipotent char", !36, i64 0}
+!36 = !{!"Simple C/C++ TBAA"}
+!37 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
+!38 = !DILocation(line: 0, scope: !22, inlinedAt: !32)
+!39 = !DILocation(line: 8, scope: !22, inlinedAt: !32)
+!40 = !DILocation(line: 9, scope: !41, inlinedAt: !32)
+!41 = distinct !DILexicalBlock(line: 9, column: 0, file: !5, scope: !22)
+!42 = !{!43, !44, i64 0}
+!43 = !{!"_ZTS1C", !44, i64 0}
+!44 = !{!"int", !35, i64 0}
+!45 = !DILocation(line: 9, scope: !46, inlinedAt: !32)
+!46 = distinct !DILexicalBlock(line: 9, column: 0, file: !5, scope: !41)
+!47 = !DILocation(line: 10, scope: !22, inlinedAt: !32)
+!48 = !DILocation(line: 19, scope: !14)
+!49 = !DILocation(line: 0, scope: !22)
+!50 = !DILocation(line: 8, scope: !22)
+!51 = !DILocation(line: 9, scope: !41)
+!52 = !DILocation(line: 9, scope: !46)
+!53 = !DILocation(line: 16, scope: !14, inlinedAt: !54)
+!54 = !DILocation(line: 20, scope: !18, inlinedAt: !55)
+!55 = !DILocation(line: 10, scope: !22)
+!56 = !DILocation(line: 17, scope: !14, inlinedAt: !54)
+!57 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
+!58 = !DILocation(line: 0, scope: !22, inlinedAt: !56)
+!59 = !DILocation(line: 8, scope: !22, inlinedAt: !56)
+!60 = !DILocation(line: 9, scope: !41, inlinedAt: !56)
+!61 = !DILocation(line: 9, scope: !46, inlinedAt: !56)
+!62 = !DILocation(line: 10, scope: !22, inlinedAt: !56)
+!63 = !DILocation(line: 11, scope: !22)
+!64 = !DILocation(line: 16, scope: !14, inlinedAt: !65)
+!65 = !DILocation(line: 20, scope: !18)
+!66 = !DILocation(line: 17, scope: !14, inlinedAt: !65)
+!67 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
+!68 = !DILocation(line: 0, scope: !22, inlinedAt: !66)
+!69 = !DILocation(line: 8, scope: !22, inlinedAt: !66)
+!70 = !DILocation(line: 9, scope: !41, inlinedAt: !66)
+!71 = !DILocation(line: 9, scope: !46, inlinedAt: !66)
+!72 = !DILocation(line: 21, scope: !19)
+!73 = !DILocation(line: 22, scope: !20)
diff --git a/test/DebugInfo/Generic/restrict.ll b/test/DebugInfo/Generic/restrict.ll
new file mode 100644
index 000000000000..b0536acd03cc
--- /dev/null
+++ b/test/DebugInfo/Generic/restrict.ll
@@ -0,0 +1,53 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -dwarf-version=2 -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=CHECK --check-prefix=V2 %s
+; RUN: %llc_dwarf -dwarf-version=3 -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=CHECK --check-prefix=V3 %s
+
+; CHECK: DW_AT_name {{.*}} "dst"
+; V2: DW_AT_type {{.*}} {[[PTR:0x.*]]}
+; V3: DW_AT_type {{.*}} {[[RESTRICT:0x.*]]}
+; V3: [[RESTRICT]]: {{.*}}DW_TAG_restrict_type
+; V3-NEXT: DW_AT_type {{.*}} {[[PTR:0x.*]]}
+; CHECK: [[PTR]]: {{.*}}DW_TAG_pointer_type
+; CHECK-NOT: DW_AT_type
+
+; Generated with clang from:
+; void foo(void* __restrict__ dst) {
+; }
+
+
+; Function Attrs: nounwind uwtable
+define void @_Z3fooPv(i8* noalias %dst) #0 !dbg !4 {
+entry:
+ %dst.addr = alloca i8*, align 8
+ store i8* %dst, i8** %dst.addr, align 8
+ call void @llvm.dbg.declare(metadata i8** %dst.addr, metadata !13, metadata !DIExpression()), !dbg !14
+ ret void, !dbg !15
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "restrict.c", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooPv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "restrict.c", directory: "/tmp/dbginfo")
+!6 = !DISubroutineType(types: !7)
+!7 = !{null, !8}
+!8 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !9)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 1, !"Debug Info Version", i32 3}
+!12 = !{!"clang version 3.5.0 "}
+!13 = !DILocalVariable(name: "dst", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!14 = !DILocation(line: 1, scope: !4)
+!15 = !DILocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/Generic/skeletoncu.ll b/test/DebugInfo/Generic/skeletoncu.ll
new file mode 100644
index 000000000000..ca040e302564
--- /dev/null
+++ b/test/DebugInfo/Generic/skeletoncu.ll
@@ -0,0 +1,16 @@
+; RUN: %llc_dwarf %s -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; XFAIL: hexagon
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_GNU_dwo_id {{.*}}abcd
+; CHECK: DW_AT_GNU_dwo_name {{.*}}"my.dwo"
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM", isOptimized: false, runtimeVersion: 2, splitDebugFilename: "my.dwo", emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, dwoId: 43981)
+!1 = !DIFile(filename: "<stdin>", directory: "/")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+
diff --git a/test/DebugInfo/Generic/sugared-constants.ll b/test/DebugInfo/Generic/sugared-constants.ll
new file mode 100644
index 000000000000..5e32b794afde
--- /dev/null
+++ b/test/DebugInfo/Generic/sugared-constants.ll
@@ -0,0 +1,82 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; Use correct signedness when emitting constants of derived (sugared) types.
+
+; Test compiled to IR from clang with -O1 and the following source:
+
+; void func(int);
+; void func(unsigned);
+; void func(char16_t);
+; int main() {
+; const int i = 42;
+; func(i);
+; const unsigned j = 117;
+; func(j);
+; char16_t c = 7;
+; func(c);
+; }
+
+; CHECK: DW_AT_const_value [DW_FORM_sdata] (42)
+; CHECK: DW_AT_const_value [DW_FORM_udata] (117)
+; CHECK: DW_AT_const_value [DW_FORM_udata] (7)
+
+; Function Attrs: uwtable
+define i32 @main() #0 !dbg !4 {
+entry:
+ tail call void @llvm.dbg.value(metadata i32 42, i64 0, metadata !10, metadata !DIExpression()), !dbg !21
+ tail call void @_Z4funci(i32 42), !dbg !22
+ tail call void @llvm.dbg.value(metadata i32 117, i64 0, metadata !12, metadata !DIExpression()), !dbg !24
+ tail call void @_Z4funcj(i32 117), !dbg !25
+ tail call void @llvm.dbg.value(metadata i16 7, i64 0, metadata !15, metadata !DIExpression()), !dbg !27
+ tail call void @_Z4funcDs(i16 zeroext 7), !dbg !28
+ ret i32 0, !dbg !29
+}
+
+declare void @_Z4funci(i32) #1
+
+declare void @_Z4funcj(i32) #1
+
+declare void @_Z4funcDs(i16 zeroext) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "const.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !9)
+!5 = !DIFile(filename: "const.cpp", directory: "/tmp/dbginfo")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{!10, !12, !15}
+!10 = !DILocalVariable(name: "i", line: 5, scope: !4, file: !5, type: !11)
+!11 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !8)
+!12 = !DILocalVariable(name: "j", line: 7, scope: !4, file: !5, type: !13)
+!13 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !14)
+!14 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!15 = !DILocalVariable(name: "c", line: 9, scope: !4, file: !5, type: !16)
+!16 = !DIBasicType(tag: DW_TAG_base_type, name: "char16_t", size: 16, align: 16, encoding: 16)
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 1, !"Debug Info Version", i32 3}
+!19 = !{!"clang version 3.5.0 "}
+!20 = !{i32 42}
+!21 = !DILocation(line: 5, scope: !4)
+!22 = !DILocation(line: 6, scope: !4)
+!23 = !{i32 117}
+!24 = !DILocation(line: 7, scope: !4)
+!25 = !DILocation(line: 8, scope: !4)
+!26 = !{i16 7}
+!27 = !DILocation(line: 9, scope: !4)
+!28 = !DILocation(line: 10, scope: !4)
+!29 = !DILocation(line: 11, scope: !4)
diff --git a/test/DebugInfo/Generic/template-recursive-void.ll b/test/DebugInfo/Generic/template-recursive-void.ll
new file mode 100644
index 000000000000..645f1795c76c
--- /dev/null
+++ b/test/DebugInfo/Generic/template-recursive-void.ll
@@ -0,0 +1,61 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; This was pulled from clang's debug-info-template-recursive.cpp test.
+; class base { };
+
+; template <class T> class foo : public base {
+; void operator=(const foo r) { }
+; };
+
+; class bar : public foo<void> { };
+; bar filters;
+
+; CHECK: DW_TAG_template_type_parameter [{{.*}}]
+; CHECK-NEXT: DW_AT_name{{.*}}"T"
+; CHECK-NOT: DW_AT_type
+; CHECK: NULL
+
+%class.bar = type { i8 }
+
+@filters = global %class.bar zeroinitializer, align 1
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!36, !37}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 187958) (llvm/trunk 187964)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!1 = !DIFile(filename: "debug-info-template-recursive.cpp", directory: "/usr/local/google/home/echristo/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIGlobalVariable(name: "filters", line: 10, isLocal: false, isDefinition: true, scope: null, file: !5, type: !6, variable: %class.bar* @filters)
+!5 = !DIFile(filename: "debug-info-template-recursive.cpp", directory: "/usr/local/google/home/echristo/tmp")
+!6 = !DICompositeType(tag: DW_TAG_class_type, name: "bar", line: 9, size: 8, align: 8, file: !1, elements: !7)
+!7 = !{!8, !31}
+!8 = !DIDerivedType(tag: DW_TAG_inheritance, scope: !6, baseType: !9)
+!9 = !DICompositeType(tag: DW_TAG_class_type, name: "foo<void>", line: 5, size: 8, align: 8, file: !1, elements: !10, templateParams: !29)
+!10 = !{!11, !19, !25}
+!11 = !DIDerivedType(tag: DW_TAG_inheritance, scope: !9, baseType: !12)
+!12 = !DICompositeType(tag: DW_TAG_class_type, name: "base", line: 3, size: 8, align: 8, file: !1, elements: !13)
+!13 = !{!14}
+!14 = !DISubprogram(name: "base", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !12, type: !15)
+!15 = !DISubroutineType(types: !16)
+!16 = !{null, !17}
+!17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !12)
+!19 = !DISubprogram(name: "operator=", linkageName: "_ZN3fooIvEaSES0_", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrivate | DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !9, type: !20)
+!20 = !DISubroutineType(types: !21)
+!21 = !{null, !22, !23}
+!22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !9)
+!23 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9)
+!25 = !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !9, type: !26)
+!26 = !DISubroutineType(types: !27)
+!27 = !{null, !22}
+!29 = !{!30}
+!30 = !DITemplateTypeParameter(name: "T", type: null)
+!31 = !DISubprogram(name: "bar", line: 9, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !1, scope: !6, type: !32)
+!32 = !DISubroutineType(types: !33)
+!33 = !{null, !34}
+!34 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !6)
+!36 = !{i32 2, !"Dwarf Version", i32 3}
+!37 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/tu-composite.ll b/test/DebugInfo/Generic/tu-composite.ll
new file mode 100644
index 000000000000..77f99d2f8976
--- /dev/null
+++ b/test/DebugInfo/Generic/tu-composite.ll
@@ -0,0 +1,184 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -filetype=obj -O0 < %s > %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; CHECK: [[TYPE:.*]]: DW_TAG_structure_type
+; Make sure we correctly handle containing type of a struct being a type identifier.
+; CHECK-NEXT: DW_AT_containing_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE]]})
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "C")
+
+; Make sure we correctly handle context of a subprogram being a type identifier.
+; CHECK: [[SP:.*]]: DW_TAG_subprogram
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "foo")
+; Make sure we correctly handle containing type of a subprogram being a type identifier.
+; CHECK: DW_AT_containing_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE]]})
+; CHECK: DW_TAG_formal_parameter
+; CHECK: NULL
+; CHECK: NULL
+
+; CHECK: [[TYPE2:.*]]: DW_TAG_structure_type
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "bar")
+; CHECK: DW_TAG_structure_type
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "D")
+; CHECK: DW_TAG_member
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "a")
+; Make sure we correctly handle context of a struct being a type identifier.
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "Nested")
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "Nested2")
+; CHECK-NEXT: DW_AT_declaration [DW_FORM_flag] (0x01)
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "virt<bar>")
+; Make sure we correctly handle type of a template_type being a type identifier.
+; CHECK: DW_TAG_template_type_parameter
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
+; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "T")
+; Make sure we correctly handle derived-from of a typedef being a type identifier.
+; CHECK: DW_TAG_typedef
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "baz2")
+; Make sure we correctly handle derived-from of a pointer type being a type identifier.
+; CHECK: DW_TAG_pointer_type
+; CHECK: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE]]})
+; CHECK: DW_TAG_typedef
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
+; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "baz")
+; Make sure we correctly handle derived-from of an array type being a type identifier.
+; CHECK: DW_TAG_array_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
+; IR generated from clang -g with the following source:
+; struct C {
+; virtual void foo();
+; };
+; void C::foo() {
+; }
+;
+; struct bar { };
+; typedef bar baz;
+; struct D {
+; typedef bar baz2;
+; static int a;
+; struct Nested { };
+; struct Nested2 { };
+; template <typename T>
+; struct virt {
+; T* values;
+; };
+; };
+; void test() {
+; baz B;
+; bar A[3];
+; D::baz2 B2;
+; D::Nested e;
+; D::Nested2 *p;
+; D::virt<bar> t;
+; }
+
+%struct.C = type { i32 (...)** }
+%struct.bar = type { i8 }
+%"struct.D::Nested" = type { i8 }
+%"struct.D::Nested2" = type { i8 }
+%"struct.D::virt" = type { %struct.bar* }
+
+@_ZTV1C = unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1C to i8*), i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)]
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTS1C = constant [3 x i8] c"1C\00"
+@_ZTI1C = unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1C, i32 0, i32 0) }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_ZN1C3fooEv(%struct.C* %this) unnamed_addr #0 align 2 !dbg !31 {
+entry:
+ %this.addr = alloca %struct.C*, align 8
+ store %struct.C* %this, %struct.C** %this.addr, align 8
+ call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !36, metadata !DIExpression()), !dbg !38
+ %this1 = load %struct.C*, %struct.C** %this.addr
+ ret void, !dbg !39
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z4testv() #0 !dbg !32 {
+entry:
+ %B = alloca %struct.bar, align 1
+ %A = alloca [3 x %struct.bar], align 1
+ %B2 = alloca %struct.bar, align 1
+ %e = alloca %"struct.D::Nested", align 1
+ %p = alloca %"struct.D::Nested2"*, align 8
+ %t = alloca %"struct.D::virt", align 8
+ call void @llvm.dbg.declare(metadata %struct.bar* %B, metadata !40, metadata !DIExpression()), !dbg !42
+ call void @llvm.dbg.declare(metadata [3 x %struct.bar]* %A, metadata !43, metadata !DIExpression()), !dbg !47
+ call void @llvm.dbg.declare(metadata %struct.bar* %B2, metadata !48, metadata !DIExpression()), !dbg !50
+ call void @llvm.dbg.declare(metadata %"struct.D::Nested"* %e, metadata !51, metadata !DIExpression()), !dbg !52
+ call void @llvm.dbg.declare(metadata %"struct.D::Nested2"** %p, metadata !53, metadata !DIExpression()), !dbg !55
+ call void @llvm.dbg.declare(metadata %"struct.D::virt"* %t, metadata !56, metadata !DIExpression()), !dbg !57
+ ret void, !dbg !58
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!35, !59}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !30, globals: !2, imports: !2)
+!1 = !DIFile(filename: "tmp.cpp", directory: ".")
+!2 = !{}
+!3 = !{!4, !18, !19, !22, !23, !24}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 1, size: 64, align: 64, file: !1, elements: !5, vtableHolder: !"_ZTS1C", identifier: "_ZTS1C")
+!5 = !{!6, !13}
+!6 = !DIDerivedType(tag: DW_TAG_member, name: "_vptr$C", size: 64, flags: DIFlagArtificial, file: !1, scope: !7, baseType: !8)
+!7 = !DIFile(filename: "tmp.cpp", directory: ".")
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, baseType: !9)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "__vtbl_ptr_type", size: 64, baseType: !10)
+!10 = !DISubroutineType(types: !11)
+!11 = !{!12}
+!12 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = !DISubprogram(name: "foo", linkageName: "_ZN1C3fooEv", line: 2, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !"_ZTS1C", type: !14, containingType: !"_ZTS1C")
+!14 = !DISubroutineType(types: !15)
+!15 = !{null, !16}
+!16 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1C")
+!18 = !DICompositeType(tag: DW_TAG_structure_type, name: "bar", line: 7, size: 8, align: 8, file: !1, elements: !2, identifier: "_ZTS3bar")
+!19 = !DICompositeType(tag: DW_TAG_structure_type, name: "D", line: 9, size: 8, align: 8, file: !1, elements: !20, identifier: "_ZTS1D")
+!20 = !{!21}
+!21 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 11, flags: DIFlagStaticMember, file: !1, scope: !"_ZTS1D", baseType: !12)
+!22 = !DICompositeType(tag: DW_TAG_structure_type, name: "Nested", line: 12, size: 8, align: 8, file: !1, scope: !"_ZTS1D", elements: !2, identifier: "_ZTSN1D6NestedE")
+!23 = !DICompositeType(tag: DW_TAG_structure_type, name: "Nested2", line: 13, flags: DIFlagFwdDecl, file: !1, scope: !"_ZTS1D", identifier: "_ZTSN1D7Nested2E")
+!24 = !DICompositeType(tag: DW_TAG_structure_type, name: "virt<bar>", line: 15, size: 64, align: 64, file: !1, scope: !"_ZTS1D", elements: !25, templateParams: !28, identifier: "_ZTSN1D4virtI3barEE")
+!25 = !{!26}
+!26 = !DIDerivedType(tag: DW_TAG_member, name: "values", line: 16, size: 64, align: 64, file: !1, scope: !"_ZTSN1D4virtI3barEE", baseType: !27)
+!27 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS3bar")
+!28 = !{!29}
+!29 = !DITemplateTypeParameter(name: "T", type: !"_ZTS3bar")
+!30 = !{!31, !32}
+!31 = distinct !DISubprogram(name: "foo", linkageName: "_ZN1C3fooEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: null, type: !14, declaration: !13, variables: !2)
+!32 = distinct !DISubprogram(name: "test", linkageName: "_Z4testv", line: 20, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !1, scope: !7, type: !33, variables: !2)
+!33 = !DISubroutineType(types: !34)
+!34 = !{null}
+!35 = !{i32 2, !"Dwarf Version", i32 2}
+!36 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, type: !37)
+!37 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1C")
+!38 = !DILocation(line: 0, scope: !31)
+!39 = !DILocation(line: 5, scope: !31)
+!40 = !DILocalVariable(name: "B", line: 21, scope: !32, file: !7, type: !41)
+!41 = !DIDerivedType(tag: DW_TAG_typedef, name: "baz", line: 8, file: !1, baseType: !"_ZTS3bar")
+!42 = !DILocation(line: 21, scope: !32)
+!43 = !DILocalVariable(name: "A", line: 22, scope: !32, file: !7, type: !44)
+!44 = !DICompositeType(tag: DW_TAG_array_type, size: 24, align: 8, baseType: !"_ZTS3bar", elements: !45)
+!45 = !{!46}
+!46 = !DISubrange(count: 3)
+!47 = !DILocation(line: 22, scope: !32)
+!48 = !DILocalVariable(name: "B2", line: 23, scope: !32, file: !7, type: !49)
+!49 = !DIDerivedType(tag: DW_TAG_typedef, name: "baz2", line: 10, file: !1, scope: !"_ZTS1D", baseType: !"_ZTS3bar")
+!50 = !DILocation(line: 23, scope: !32)
+!51 = !DILocalVariable(name: "e", line: 24, scope: !32, file: !7, type: !22)
+!52 = !DILocation(line: 24, scope: !32)
+!53 = !DILocalVariable(name: "p", line: 25, scope: !32, file: !7, type: !54)
+!54 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTSN1D7Nested2E")
+!55 = !DILocation(line: 25, scope: !32)
+!56 = !DILocalVariable(name: "t", line: 26, scope: !32, file: !7, type: !24)
+!57 = !DILocation(line: 26, scope: !32)
+!58 = !DILocation(line: 27, scope: !32)
+!59 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/tu-member-pointer.ll b/test/DebugInfo/Generic/tu-member-pointer.ll
new file mode 100644
index 000000000000..8b1eb3bb6d14
--- /dev/null
+++ b/test/DebugInfo/Generic/tu-member-pointer.ll
@@ -0,0 +1,30 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -filetype=obj -O0 < %s > %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; CHECK: DW_TAG_ptr_to_member_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE:0x[0-9a-f]+]]})
+; CHECK: [[TYPE]]: DW_TAG_base_type
+; IR generated from clang -g with the following source:
+; struct Foo {
+; int e;
+; };
+; int Foo:*x = 0;
+
+@x = global i64 -1, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !5, imports: !2)
+!1 = !DIFile(filename: "foo.cpp", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", line: 1, flags: DIFlagFwdDecl, file: !1, identifier: "_ZTS3Foo")
+!5 = !{!6}
+!6 = !DIGlobalVariable(name: "x", line: 4, isLocal: false, isDefinition: true, scope: null, file: !7, type: !8, variable: i64* @x)
+!7 = !DIFile(filename: "foo.cpp", directory: ".")
+!8 = !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: !9, extraData: !"_ZTS3Foo")
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !{i32 2, !"Dwarf Version", i32 2}
+!11 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/two-cus-from-same-file.ll b/test/DebugInfo/Generic/two-cus-from-same-file.ll
new file mode 100644
index 000000000000..65d376c814d9
--- /dev/null
+++ b/test/DebugInfo/Generic/two-cus-from-same-file.ll
@@ -0,0 +1,73 @@
+; For http://llvm.org/bugs/show_bug.cgi?id=12942
+; There are two CUs coming from /tmp/foo.c in this module. Make sure it doesn't
+; blow llc up and produces something reasonable.
+;
+
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf %s -o %t -filetype=obj -O0
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+
+; ModuleID = 'test.bc'
+
+@str = private unnamed_addr constant [4 x i8] c"FOO\00"
+@str1 = private unnamed_addr constant [6 x i8] c"Main!\00"
+
+define void @foo() nounwind !dbg !5 {
+entry:
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @str, i32 0, i32 0)), !dbg !23
+ ret void, !dbg !25
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind !dbg !12 {
+entry:
+ tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !21, metadata !DIExpression()), !dbg !26
+ tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !22, metadata !DIExpression()), !dbg !27
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @str1, i32 0, i32 0)), !dbg !28
+ tail call void @foo() nounwind, !dbg !30
+ ret i32 0, !dbg !31
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0, !9}
+!llvm.module.flags = !{!33}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 156513)", isOptimized: true, emissionKind: 1, file: !32, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!1 = !{}
+!3 = !{!5}
+!5 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !32, scope: !6, type: !7, variables: !1)
+!6 = !DIFile(filename: "foo.c", directory: "/tmp")
+!7 = !DISubroutineType(types: !8)
+!8 = !{null}
+!9 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 156513)", isOptimized: true, emissionKind: 1, file: !32, enums: !1, retainedTypes: !1, subprograms: !10, globals: !1, imports: !1)
+!10 = !{!12}
+!12 = distinct !DISubprogram(name: "main", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !32, scope: !6, type: !13, variables: !19)
+!13 = !DISubroutineType(types: !14)
+!14 = !{!15, !15, !16}
+!15 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!16 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !17)
+!17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !18)
+!18 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!19 = !{!21, !22}
+!21 = !DILocalVariable(name: "argc", line: 11, arg: 1, scope: !12, file: !6, type: !15)
+!22 = !DILocalVariable(name: "argv", line: 11, arg: 2, scope: !12, file: !6, type: !16)
+!23 = !DILocation(line: 6, column: 3, scope: !24)
+!24 = distinct !DILexicalBlock(line: 5, column: 16, file: !32, scope: !5)
+!25 = !DILocation(line: 7, column: 1, scope: !24)
+!26 = !DILocation(line: 11, column: 14, scope: !12)
+!27 = !DILocation(line: 11, column: 26, scope: !12)
+!28 = !DILocation(line: 12, column: 3, scope: !29)
+!29 = distinct !DILexicalBlock(line: 11, column: 34, file: !32, scope: !12)
+!30 = !DILocation(line: 13, column: 3, scope: !29)
+!31 = !DILocation(line: 14, column: 3, scope: !29)
+!32 = !DIFile(filename: "foo.c", directory: "/tmp")
+
+; This test is simple to be cross platform (many targets don't yet have
+; sufficiently good DWARF emission and/or dumping)
+; CHECK: {{DW_TAG_compile_unit}}
+; CHECK: {{foo\.c}}
+
+!33 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Generic/typedef.ll b/test/DebugInfo/Generic/typedef.ll
new file mode 100644
index 000000000000..3cf4dffe9372
--- /dev/null
+++ b/test/DebugInfo/Generic/typedef.ll
@@ -0,0 +1,32 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; From source:
+; typedef void x;
+; x *y;
+
+; Check that a typedef with no DW_AT_type is produced. The absence of a type is used to imply the 'void' type.
+
+; CHECK: DW_TAG_typedef
+; CHECK-NOT: DW_AT_type
+; CHECK: {{DW_TAG|NULL}}
+
+@y = global i8* null, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!1 = !DIFile(filename: "typedef.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIGlobalVariable(name: "y", line: 2, isLocal: false, isDefinition: true, scope: null, file: !5, type: !6, variable: i8** @y)
+!5 = !DIFile(filename: "typedef.cpp", directory: "/tmp/dbginfo")
+!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !7)
+!7 = !DIDerivedType(tag: DW_TAG_typedef, name: "x", line: 1, file: !1, baseType: null)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5.0 "}
+
diff --git a/test/DebugInfo/Generic/unconditional-branch.ll b/test/DebugInfo/Generic/unconditional-branch.ll
new file mode 100644
index 000000000000..9325e1b27ada
--- /dev/null
+++ b/test/DebugInfo/Generic/unconditional-branch.ll
@@ -0,0 +1,65 @@
+; REQUIRES: object-emission
+; PR 19261
+
+; RUN: %llc_dwarf -fast-isel=false -O0 -filetype=obj %s -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: {{0x[0-9a-f]+}} 1 0 1 0 0 is_stmt
+; CHECK: {{0x[0-9a-f]+}} 2 0 1 0 0 is_stmt
+; CHECK: {{0x[0-9a-f]+}} 4 0 1 0 0 is_stmt
+
+; IR generated from clang -O0 -g with the following source:
+;void foo(int i){
+; switch(i){
+; default:
+; break;
+; }
+; return;
+;}
+
+; Function Attrs: nounwind
+define void @foo(i32 %i) #0 !dbg !4 {
+entry:
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !12, metadata !DIExpression()), !dbg !13
+ %0 = load i32, i32* %i.addr, align 4, !dbg !14
+ switch i32 %0, label %sw.default [
+ ], !dbg !14
+
+sw.epilog: ; preds = %sw.default
+ ret void, !dbg !17
+
+sw.default: ; preds = %entry
+ br label %sw.epilog, !dbg !15
+
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (204712)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "test.c", directory: "D:\5Cwork\5CEPRs\5C396363")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "test.c", directory: "D:CworkCEPRsC396363")
+!6 = !DISubroutineType(types: !7)
+!7 = !{null, !8}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 1, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.5.0 (204712)"}
+!12 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!13 = !DILocation(line: 1, scope: !4)
+!14 = !DILocation(line: 2, scope: !4)
+!15 = !DILocation(line: 4, scope: !16)
+!16 = distinct !DILexicalBlock(line: 2, column: 0, file: !1, scope: !4)
+!17 = !DILocation(line: 6, scope: !4)
diff --git a/test/DebugInfo/Generic/varargs.ll b/test/DebugInfo/Generic/varargs.ll
new file mode 100644
index 000000000000..8567bf715145
--- /dev/null
+++ b/test/DebugInfo/Generic/varargs.ll
@@ -0,0 +1,101 @@
+; RUN: %llc_dwarf -O0 -filetype=obj -o %t.o %s
+; RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s
+; REQUIRES: object-emission
+;
+; Test debug info for variadic function arguments.
+; Created from tools/clang/tests/CodeGenCXX/debug-info-varargs.cpp
+;
+; The ... parameter of variadic should be emitted as
+; DW_TAG_unspecified_parameters.
+;
+; Normal variadic function.
+; void b(int c, ...);
+;
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "a"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_unspecified_parameters
+;
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "b"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_unspecified_parameters
+;
+; Variadic C++ member function.
+; struct A { void a(int c, ...); }
+;
+; Variadic function pointer.
+; void (*fptr)(int, ...);
+;
+; CHECK: DW_TAG_subroutine_type
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_unspecified_parameters
+;
+; ModuleID = 'llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp'
+
+%struct.A = type { i8 }
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z1biz(i32 %c, ...) #0 !dbg !14 {
+ %1 = alloca i32, align 4
+ %a = alloca %struct.A, align 1
+ %fptr = alloca void (i32, ...)*, align 8
+ store i32 %c, i32* %1, align 4
+ call void @llvm.dbg.declare(metadata i32* %1, metadata !21, metadata !DIExpression()), !dbg !22
+ call void @llvm.dbg.declare(metadata %struct.A* %a, metadata !23, metadata !DIExpression()), !dbg !24
+ call void @llvm.dbg.declare(metadata void (i32, ...)** %fptr, metadata !25, metadata !DIExpression()), !dbg !27
+ store void (i32, ...)* @_Z1biz, void (i32, ...)** %fptr, align 8, !dbg !27
+ ret void, !dbg !28
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18, !19}
+!llvm.ident = !{!20}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !2, imports: !2)
+!1 = !DIFile(filename: "llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp", directory: "radar/13690847")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "A", line: 3, size: 8, align: 8, file: !1, elements: !5, identifier: "_ZTS1A")
+!5 = !{!6}
+!6 = !DISubprogram(name: "a", linkageName: "_ZN1A1aEiz", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !7)
+!7 = !DISubroutineType(types: !8)
+!8 = !{null, !9, !10, null}
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
+!10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = !{!14}
+!14 = distinct !DISubprogram(name: "b", linkageName: "_Z1biz", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !1, scope: !15, type: !16, variables: !2)
+!15 = !DIFile(filename: "llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp", directory: "radar/13690847")
+!16 = !DISubroutineType(types: !17)
+!17 = !{null, !10, null}
+!18 = !{i32 2, !"Dwarf Version", i32 2}
+!19 = !{i32 1, !"Debug Info Version", i32 3}
+!20 = !{!"clang version 3.5 "}
+!21 = !DILocalVariable(name: "c", line: 13, arg: 1, scope: !14, file: !15, type: !10)
+!22 = !DILocation(line: 13, scope: !14)
+!23 = !DILocalVariable(name: "a", line: 16, scope: !14, file: !15, type: !4)
+!24 = !DILocation(line: 16, scope: !14)
+!25 = !DILocalVariable(name: "fptr", line: 18, scope: !14, file: !15, type: !26)
+!26 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !16)
+!27 = !DILocation(line: 18, scope: !14)
+!28 = !DILocation(line: 22, scope: !14)
diff --git a/test/DebugInfo/Generic/version.ll b/test/DebugInfo/Generic/version.ll
new file mode 100644
index 000000000000..936e08872233
--- /dev/null
+++ b/test/DebugInfo/Generic/version.ll
@@ -0,0 +1,32 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Make sure we are generating DWARF version 3 when module flag says so.
+; CHECK: Compile Unit: length = {{.*}} version = 0x0003
+
+define i32 @main() #0 !dbg !4 {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval
+ ret i32 0, !dbg !10
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 185475)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "CodeGen/dwarf-version.c", directory: "test")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "CodeGen/dwarf-version.c", directory: "test")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{i32 2, !"Dwarf Version", i32 3}
+!10 = !DILocation(line: 7, scope: !4)
+!11 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/Inputs/dwarfdump-dwp.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-dwp.x86_64.o
new file mode 100644
index 000000000000..da059f73b9ff
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-dwp.x86_64.o
Binary files differ
diff --git a/test/DebugInfo/Inputs/dwarfdump-macho-relocs.macho.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-macho-relocs.macho.x86_64.o
new file mode 100644
index 000000000000..a5f8476df5e7
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-macho-relocs.macho.x86_64.o
Binary files differ
diff --git a/test/DebugInfo/Inputs/dwarfdump-macro-cmd.h b/test/DebugInfo/Inputs/dwarfdump-macro-cmd.h
new file mode 100644
index 000000000000..66e6b00a2bbf
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-macro-cmd.h
@@ -0,0 +1 @@
+#define M4 Value4
diff --git a/test/DebugInfo/Inputs/dwarfdump-macro.cc b/test/DebugInfo/Inputs/dwarfdump-macro.cc
new file mode 100644
index 000000000000..5abcb255c6c7
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-macro.cc
@@ -0,0 +1,11 @@
+#define M1 Value1
+#include "dwarfdump-macro.h"
+#define M2(x, y) ((x)+(y)* Value2)
+
+// Built with GCC
+// $ mkdir -p /tmp/dbginfo
+// $ cp dwarfdump-macro.cc /tmp/dbginfo
+// $ cp dwarfdump-macro.h /tmp/dbginfo
+// $ cp dwarfdump-macro-cmd.h /tmp/dbginfo
+// $ cd /tmp/dbginfo
+// $ g++ -c -g3 -O0 -DM3=Value3 -include dwarfdump-macro-cmd.h dwarfdump-macro.cc -o <output>
diff --git a/test/DebugInfo/Inputs/dwarfdump-macro.h b/test/DebugInfo/Inputs/dwarfdump-macro.h
new file mode 100644
index 000000000000..98f0206cb43f
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-macro.h
@@ -0,0 +1,5 @@
+
+
+
+#undef M1
+#define M1 NewValue1
diff --git a/test/DebugInfo/Inputs/dwarfdump-macro.o b/test/DebugInfo/Inputs/dwarfdump-macro.o
new file mode 100644
index 000000000000..5f1cb5e2707f
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-macro.o
Binary files differ
diff --git a/test/DebugInfo/Inputs/dwarfdump-test.cc b/test/DebugInfo/Inputs/dwarfdump-test.cc
index 14295d3cffa0..815c33dd3ac3 100644
--- a/test/DebugInfo/Inputs/dwarfdump-test.cc
+++ b/test/DebugInfo/Inputs/dwarfdump-test.cc
@@ -27,3 +27,5 @@ int main() {
// $ cp <output> output2.dwz
// $ dwz -m output.dwz -r output1.dwz output2.dwz
// $ rm output2.dwz
+
+// The mach-o version was generated using clang-3.6.2.
diff --git a/test/DebugInfo/Inputs/dwarfdump-test.macho-i386.o b/test/DebugInfo/Inputs/dwarfdump-test.macho-i386.o
new file mode 100644
index 000000000000..b53c8a6959e3
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test.macho-i386.o
Binary files differ
diff --git a/test/DebugInfo/Inputs/fat-test.o b/test/DebugInfo/Inputs/fat-test.o
new file mode 100644
index 000000000000..8159cc749772
--- /dev/null
+++ b/test/DebugInfo/Inputs/fat-test.o
Binary files differ
diff --git a/test/DebugInfo/Inputs/gmlt.ll b/test/DebugInfo/Inputs/gmlt.ll
index 1436abedfab4..b03a80b4deae 100644
--- a/test/DebugInfo/Inputs/gmlt.ll
+++ b/test/DebugInfo/Inputs/gmlt.ll
@@ -98,26 +98,26 @@
; CHECK: .apple{{.*}} contents:
; Function Attrs: nounwind uwtable
-define void @_Z2f1v() #0 {
+define void @_Z2f1v() #0 !dbg !4 {
entry:
ret void, !dbg !13
}
; Function Attrs: nounwind uwtable
-define void @_Z2f2v() #0 section "__TEXT,__bar" {
+define void @_Z2f2v() #0 section "__TEXT,__bar" !dbg !7 {
entry:
ret void, !dbg !14
}
; Function Attrs: alwaysinline nounwind uwtable
-define void @_Z2f3v() #1 {
+define void @_Z2f3v() #1 !dbg !8 {
entry:
call void @_Z2f1v(), !dbg !15
ret void, !dbg !16
}
; Function Attrs: nounwind uwtable
-define void @_Z2f4v() #0 {
+define void @_Z2f4v() #0 !dbg !9 {
entry:
call void @_Z2f1v() #2, !dbg !17
ret void, !dbg !19
@@ -131,16 +131,16 @@ attributes #2 = { nounwind }
!llvm.module.flags = !{!10, !11}
!llvm.ident = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "gmlt.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4, !7, !8, !9}
-!4 = !DISubprogram(name: "f1", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @_Z2f1v, variables: !2)
+!4 = distinct !DISubprogram(name: "f1", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "gmlt.cpp", directory: "/tmp/dbginfo")
!6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: void ()* @_Z2f2v, variables: !2)
-!8 = !DISubprogram(name: "f3", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: void ()* @_Z2f3v, variables: !2)
-!9 = !DISubprogram(name: "f4", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: void ()* @_Z2f4v, variables: !2)
+!7 = distinct !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "f3", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!9 = distinct !DISubprogram(name: "f4", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 2, !"Debug Info Version", i32 3}
!12 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/Inputs/line.ll b/test/DebugInfo/Inputs/line.ll
index d56c7e01a17f..5bdd3b9eb346 100644
--- a/test/DebugInfo/Inputs/line.ll
+++ b/test/DebugInfo/Inputs/line.ll
@@ -12,7 +12,7 @@
; CHECK: cmp
; Function Attrs: nounwind uwtable
-define i32 @_Z1fii(i32 %a, i32 %b) #0 {
+define i32 @_Z1fii(i32 %a, i32 %b) #0 !dbg !4 {
entry:
%a.addr = alloca i32, align 4
%b.addr = alloca i32, align 4
@@ -39,11 +39,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227472) (llvm/trunk 227476)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227472) (llvm/trunk 227476)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "line.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32, i32)* @_Z1fii, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "line.cpp", directory: "/tmp/dbginfo")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/MIR/X86/lit.local.cfg b/test/DebugInfo/MIR/X86/lit.local.cfg
new file mode 100644
index 000000000000..c8625f4d9d24
--- /dev/null
+++ b/test/DebugInfo/MIR/X86/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+ config.unsupported = True
diff --git a/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir b/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir
new file mode 100644
index 000000000000..84be910aaf74
--- /dev/null
+++ b/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir
@@ -0,0 +1,299 @@
+# RUN: llc -run-pass=livedebugvalues -march=x86-64 -o /dev/null %s | FileCheck %s
+
+# Test the extension of debug ranges from 3 predecessors.
+# Generated from the source file LiveDebugValues-3preds.c:
+# #include <stdio.h>
+# int add(int x, int y, int z, int a) {
+# int i;
+# for (i = 0; i < x * y; i++) {
+# if (i < x) {
+# a = a * x;
+# break;
+# }
+# if (i < y) {
+# a = a * y;
+# break;
+# }
+# if (i < z) {
+# a = a * z;
+# break;
+# }
+# }
+# return a;
+# }
+# with clang -g -O1 -c -emit-llvm LiveDebugValues-3preds.c -S -o live-debug-values-3preds.ll
+# then llc -stop-after stackmap-liveness live-debug-values-3preds.ll -o /dev/null > live-debug-values-3preds.mir
+
+# DBG_VALUE for variables "x", "y" and "z" are extended into BB#9 from its
+# predecessors BB#0, BB#2 and BB#8.
+# CHECK: bb.9.for.end:
+# CHECK: DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+# CHECK-NEXT: DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+# CHECK-NEXT: DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+
+
+--- |
+ ; ModuleID = 'live-debug-values-3preds.ll'
+ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+ target triple = "x86_64-unknown-linux-gnu"
+
+ ; Function Attrs: norecurse nounwind readnone uwtable
+ define i32 @add(i32 %x, i32 %y, i32 %z, i32 %a) #0 !dbg !4 {
+ entry:
+ tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !9, metadata !17), !dbg !18
+ tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !10, metadata !17), !dbg !19
+ tail call void @llvm.dbg.value(metadata i32 %z, i64 0, metadata !11, metadata !17), !dbg !21
+ tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !12, metadata !17), !dbg !23
+ tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !13, metadata !17), !dbg !25
+ %mul = mul nsw i32 %y, %x, !dbg !26
+ %cmp.24 = icmp sgt i32 %mul, 0, !dbg !30
+ br i1 %cmp.24, label %for.body.preheader, label %for.end, !dbg !31
+
+ for.body.preheader: ; preds = %entry
+ br label %for.body, !dbg !32
+
+ for.cond: ; preds = %if.end.6
+ %cmp = icmp slt i32 %inc, %mul, !dbg !30
+ br i1 %cmp, label %for.body, label %for.end, !dbg !31
+
+ for.body: ; preds = %for.cond, %for.body.preheader
+ %i.025 = phi i32 [ %inc, %for.cond ], [ 0, %for.body.preheader ]
+ %0 = icmp sgt i32 %x, 0
+ br i1 %0, label %if.then, label %if.end, !dbg !35
+
+ if.then: ; preds = %for.body
+ %mul2 = mul nsw i32 %a, %x, !dbg !36
+ tail call void @llvm.dbg.value(metadata i32 %mul2, i64 0, metadata !12, metadata !17), !dbg !23
+ br label %for.end, !dbg !38
+
+ if.end: ; preds = %for.body
+ %1 = icmp sgt i32 %y, 0
+ br i1 %1, label %if.then.4, label %if.end.6, !dbg !39
+
+ if.then.4: ; preds = %if.end
+ %mul5 = mul nsw i32 %a, %y, !dbg !40
+ tail call void @llvm.dbg.value(metadata i32 %mul5, i64 0, metadata !12, metadata !17), !dbg !23
+ br label %for.end, !dbg !43
+
+ if.end.6: ; preds = %if.end
+ %2 = icmp sgt i32 %z, 0
+ %inc = add nuw nsw i32 %i.025, 1, !dbg !44
+ tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !13, metadata !17), !dbg !25
+ br i1 %2, label %if.then.8, label %for.cond, !dbg !45
+
+ if.then.8: ; preds = %if.end.6
+ %mul9 = mul nsw i32 %a, %z, !dbg !46
+ tail call void @llvm.dbg.value(metadata i32 %mul9, i64 0, metadata !12, metadata !17), !dbg !23
+ br label %for.end, !dbg !49
+
+ for.end: ; preds = %for.cond, %if.then.8, %if.then.4, %if.then, %entry
+ %a.addr.0 = phi i32 [ %mul2, %if.then ], [ %mul5, %if.then.4 ], [ %mul9, %if.then.8 ], [ %a, %entry ], [ %a, %for.cond ]
+ ret i32 %a.addr.0, !dbg !50
+ }
+
+ ; Function Attrs: nounwind readnone
+ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+ attributes #0 = { norecurse nounwind readnone uwtable }
+ attributes #1 = { nounwind readnone }
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!14, !15}
+ !llvm.ident = !{!16}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 253049) ", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+ !1 = !DIFile(filename: "LiveDebugValues-3preds.c", directory: "/home/vt/julia/test/tvvikram")
+ !2 = !{}
+ !3 = !{!4}
+ !4 = distinct !DISubprogram(name: "add", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+ !5 = !DISubroutineType(types: !6)
+ !6 = !{!7, !7, !7, !7, !7}
+ !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+ !8 = !{!9, !10, !11, !12, !13}
+ !9 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+ !10 = !DILocalVariable(name: "y", arg: 2, scope: !4, file: !1, line: 1, type: !7)
+ !11 = !DILocalVariable(name: "z", arg: 3, scope: !4, file: !1, line: 1, type: !7)
+ !12 = !DILocalVariable(name: "a", arg: 4, scope: !4, file: !1, line: 1, type: !7)
+ !13 = !DILocalVariable(name: "i", scope: !4, file: !1, line: 2, type: !7)
+ !14 = !{i32 2, !"Dwarf Version", i32 4}
+ !15 = !{i32 2, !"Debug Info Version", i32 3}
+ !16 = !{!"clang version 3.8.0 (trunk 253049) "}
+ !17 = !DIExpression()
+ !18 = !DILocation(line: 1, column: 13, scope: !4)
+ !19 = !DILocation(line: 1, column: 20, scope: !20)
+ !20 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 1)
+ !21 = !DILocation(line: 1, column: 27, scope: !22)
+ !22 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 2)
+ !23 = !DILocation(line: 1, column: 34, scope: !24)
+ !24 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 3)
+ !25 = !DILocation(line: 2, column: 7, scope: !20)
+ !26 = !DILocation(line: 3, column: 21, scope: !27)
+ !27 = !DILexicalBlockFile(scope: !28, file: !1, discriminator: 1)
+ !28 = distinct !DILexicalBlock(scope: !29, file: !1, line: 3, column: 3)
+ !29 = distinct !DILexicalBlock(scope: !4, file: !1, line: 3, column: 3)
+ !30 = !DILocation(line: 3, column: 17, scope: !27)
+ !31 = !DILocation(line: 3, column: 3, scope: !27)
+ !32 = !DILocation(line: 4, column: 11, scope: !33)
+ !33 = distinct !DILexicalBlock(scope: !34, file: !1, line: 4, column: 9)
+ !34 = distinct !DILexicalBlock(scope: !28, file: !1, line: 3, column: 31)
+ !35 = !DILocation(line: 4, column: 9, scope: !34)
+ !36 = !DILocation(line: 5, column: 13, scope: !37)
+ !37 = distinct !DILexicalBlock(scope: !33, file: !1, line: 4, column: 16)
+ !38 = !DILocation(line: 6, column: 7, scope: !37)
+ !39 = !DILocation(line: 8, column: 9, scope: !34)
+ !40 = !DILocation(line: 9, column: 13, scope: !41)
+ !41 = distinct !DILexicalBlock(scope: !42, file: !1, line: 8, column: 16)
+ !42 = distinct !DILexicalBlock(scope: !34, file: !1, line: 8, column: 9)
+ !43 = !DILocation(line: 10, column: 7, scope: !41)
+ !44 = !DILocation(line: 3, column: 27, scope: !28)
+ !45 = !DILocation(line: 12, column: 9, scope: !34)
+ !46 = !DILocation(line: 13, column: 13, scope: !47)
+ !47 = distinct !DILexicalBlock(scope: !48, file: !1, line: 12, column: 16)
+ !48 = distinct !DILexicalBlock(scope: !34, file: !1, line: 12, column: 9)
+ !49 = !DILocation(line: 14, column: 7, scope: !47)
+ !50 = !DILocation(line: 17, column: 3, scope: !4)
+
+...
+---
+name: add
+alignment: 4
+exposesReturnsTwice: false
+hasInlineAsm: false
+isSSA: false
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+liveins:
+ - { reg: '%edi' }
+ - { reg: '%esi' }
+ - { reg: '%edx' }
+ - { reg: '%ecx' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0.entry:
+ successors: %bb.1.for.body.preheader(20), %bb.9.for.end(12)
+ liveins: %ecx, %edi, %edx, %esi
+
+ DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+ DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+ DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+ DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+ DBG_VALUE 0, 0, !13, !17, debug-location !25
+ %r8d = MOV32rr %esi, debug-location !26
+ %r8d = IMUL32rr killed %r8d, %edi, implicit-def dead %eflags, debug-location !26
+ TEST32rr %r8d, %r8d, implicit-def %eflags, debug-location !31
+ JLE_1 %bb.9.for.end, implicit %eflags
+
+ bb.1.for.body.preheader:
+ successors: %bb.3.for.body(0)
+ liveins: %ecx, %edi, %edx, %esi, %r8d
+
+ DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+ DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+ DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+ DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+ DBG_VALUE 0, 0, !13, !17, debug-location !25
+ %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags
+
+ bb.3.for.body (align 4):
+ successors: %bb.4.if.then(4), %bb.5.if.end(124)
+ liveins: %eax, %ecx, %edi, %edx, %esi, %r8d
+
+ DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+ DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+ DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+ DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+ DBG_VALUE 0, 0, !13, !17, debug-location !25
+ TEST32rr %edi, %edi, implicit-def %eflags, debug-location !35
+ JG_1 %bb.4.if.then, implicit %eflags
+
+ bb.5.if.end:
+ successors: %bb.6.if.then.4(4), %bb.7.if.end.6(124)
+ liveins: %eax, %ecx, %edi, %edx, %esi, %r8d
+
+ DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+ DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+ DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+ DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+ DBG_VALUE 0, 0, !13, !17, debug-location !25
+ TEST32rr %esi, %esi, implicit-def %eflags, debug-location !39
+ JG_1 %bb.6.if.then.4, implicit %eflags
+
+ bb.7.if.end.6:
+ successors: %bb.8.if.then.8(4), %bb.2.for.cond(124)
+ liveins: %eax, %ecx, %edi, %edx, %esi, %r8d
+
+ DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+ DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+ DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+ DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+ DBG_VALUE 0, 0, !13, !17, debug-location !25
+ TEST32rr %edx, %edx, implicit-def %eflags, debug-location !45
+ JG_1 %bb.8.if.then.8, implicit %eflags
+
+ bb.2.for.cond:
+ successors: %bb.3.for.body(124), %bb.9.for.end(4)
+ liveins: %eax, %ecx, %edi, %edx, %esi, %r8d
+
+ DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+ DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+ DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+ DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+ DBG_VALUE 0, 0, !13, !17, debug-location !25
+ %eax = INC32r killed %eax, implicit-def dead %eflags, debug-location !44
+ DBG_VALUE debug-use %eax, debug-use _, !13, !17, debug-location !25
+ CMP32rr %eax, %r8d, implicit-def %eflags, debug-location !31
+ JL_1 %bb.3.for.body, implicit %eflags
+ JMP_1 %bb.9.for.end
+
+ bb.4.if.then:
+ liveins: %ecx, %edi
+
+ DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+ DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+ DBG_VALUE 0, 0, !13, !17, debug-location !25
+ %ecx = IMUL32rr killed %ecx, killed %edi, implicit-def dead %eflags, debug-location !36
+ DBG_VALUE 0, 0, !13, !17, debug-location !25
+ %eax = MOV32rr killed %ecx, debug-location !50
+ RETQ %eax, debug-location !50
+
+ bb.6.if.then.4:
+ liveins: %ecx, %esi
+
+ DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+ DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+ DBG_VALUE 0, 0, !13, !17, debug-location !25
+ %ecx = IMUL32rr killed %ecx, killed %esi, implicit-def dead %eflags, debug-location !40
+ DBG_VALUE 0, 0, !13, !17, debug-location !25
+ %eax = MOV32rr killed %ecx, debug-location !50
+ RETQ %eax, debug-location !50
+
+ bb.8.if.then.8:
+ successors: %bb.9.for.end(0)
+ liveins: %ecx, %edx
+
+ DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+ DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+ DBG_VALUE 0, 0, !13, !17, debug-location !25
+ %ecx = IMUL32rr killed %ecx, killed %edx, implicit-def dead %eflags, debug-location !46
+
+ bb.9.for.end:
+ liveins: %ecx
+
+ DBG_VALUE 0, 0, !13, !17, debug-location !25
+ %eax = MOV32rr killed %ecx, debug-location !50
+ RETQ %eax, debug-location !50
+
+...
diff --git a/test/DebugInfo/MIR/X86/live-debug-values.mir b/test/DebugInfo/MIR/X86/live-debug-values.mir
new file mode 100644
index 000000000000..0af408a635f6
--- /dev/null
+++ b/test/DebugInfo/MIR/X86/live-debug-values.mir
@@ -0,0 +1,260 @@
+# RUN: llc -run-pass=livedebugvalues -march=x86-64 -o /dev/null %s | FileCheck %s
+
+# Test the extension of debug ranges from predecessors.
+# Generated from the source file LiveDebugValues.c:
+# #include <stdio.h>
+# int m;
+# extern int inc(int n);
+# extern int change(int n);
+# extern int modify(int n);
+# int main(int argc, char **argv) {
+# int n;
+# if (argc != 2)
+# n = 2;
+# else
+# n = atoi(argv[1]);
+# n = change(n);
+# if (n > 10) {
+# m = modify(n);
+# m = m + n; // var `m' doesn't has a dbg.value
+# }
+# else
+# m = inc(n); // var `m' doesn't has a dbg.value
+# printf("m(main): %d\n", m);
+# return 0;
+# }
+# with clang -g -O3 -c -emit-llvm LiveDebugValues.c -S -o live-debug-values.ll
+# then llc -stop-after stackmap-liveness live-debug-values.ll -o /dev/null > live-debug-values.mir
+# This case will also produce multiple locations but only the debug range
+# extension is tested here. This test case is tested with DWARF information under
+# llvm/test/DebugInfo/live-debug-values.ll and present here for testing under
+# MIR->MIR serialization.
+
+# DBG_VALUE for variable "n" is extended into BB#5 from its predecessors BB#3
+# and BB#4.
+# CHECK: bb.5.if.end.7:
+# CHECK: DBG_VALUE debug-use %rsi, debug-use _, !13, !20, debug-location !22
+# CHECK-NEXT: DBG_VALUE debug-use %ebx, debug-use _, !14, !20, debug-location !33
+
+
+--- |
+ ; ModuleID = 'live-debug-values.ll'
+ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+ target triple = "x86_64-unknown-linux-gnu"
+
+ @m = common global i32 0, align 4
+ @.str = private unnamed_addr constant [13 x i8] c"m(main): %d\0A\00", align 1
+
+ ; Function Attrs: nounwind uwtable
+ define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 !dbg !4 {
+ entry:
+ tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !12, metadata !20), !dbg !21
+ tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !13, metadata !20), !dbg !22
+ %cmp = icmp eq i32 %argc, 2, !dbg !24
+ br i1 %cmp, label %if.else, label %if.end, !dbg !26
+
+ if.else: ; preds = %entry
+ %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1, !dbg !27
+ %0 = load i8*, i8** %arrayidx, align 8, !dbg !27, !tbaa !28
+ %call = tail call i32 (i8*, ...) bitcast (i32 (...)* @atoi to i32 (i8*, ...)*)(i8* %0) #4, !dbg !32
+ tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !14, metadata !20), !dbg !33
+ br label %if.end
+
+ if.end: ; preds = %if.else, %entry
+ %n.0 = phi i32 [ %call, %if.else ], [ 2, %entry ]
+ %call1 = tail call i32 @change(i32 %n.0) #4, !dbg !34
+ tail call void @llvm.dbg.value(metadata i32 %call1, i64 0, metadata !14, metadata !20), !dbg !33
+ %cmp2 = icmp sgt i32 %call1, 10, !dbg !35
+ br i1 %cmp2, label %if.then.3, label %if.else.5, !dbg !37
+
+ if.then.3: ; preds = %if.end
+ %call4 = tail call i32 @modify(i32 %call1) #4, !dbg !38
+ %add = add nsw i32 %call4, %call1, !dbg !40
+ br label %if.end.7, !dbg !41
+
+ if.else.5: ; preds = %if.end
+ %call6 = tail call i32 @inc(i32 %call1) #4, !dbg !42
+ br label %if.end.7
+
+ if.end.7: ; preds = %if.else.5, %if.then.3
+ %storemerge = phi i32 [ %call6, %if.else.5 ], [ %add, %if.then.3 ]
+ store i32 %storemerge, i32* @m, align 4, !dbg !43, !tbaa !44
+ %call8 = tail call i32 (i8*, ...) @printf(i8* nonnull getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i32 %storemerge) #4, !dbg !46
+ ret i32 0, !dbg !47
+ }
+
+ declare i32 @atoi(...) #1
+
+ declare i32 @change(i32) #1
+
+ declare i32 @modify(i32) #1
+
+ declare i32 @inc(i32) #1
+
+ ; Function Attrs: nounwind
+ declare i32 @printf(i8* nocapture readonly, ...) #2
+
+ ; Function Attrs: nounwind readnone
+ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
+
+ attributes #0 = { nounwind uwtable }
+ attributes #1 = { nounwind }
+ attributes #2 = { nounwind }
+ attributes #3 = { nounwind readnone }
+ attributes #4 = { nounwind }
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!17, !18}
+ !llvm.ident = !{!19}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 253049) ", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !15)
+ !1 = !DIFile(filename: "LiveDebugValues.c", directory: "/home/vt/julia/test/tvvikram")
+ !2 = !{}
+ !3 = !{!4}
+ !4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+ !5 = !DISubroutineType(types: !6)
+ !6 = !{!7, !7, !8}
+ !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+ !8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64)
+ !9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 64)
+ !10 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+ !11 = !{!12, !13, !14}
+ !12 = !DILocalVariable(name: "argc", arg: 1, scope: !4, file: !1, line: 6, type: !7)
+ !13 = !DILocalVariable(name: "argv", arg: 2, scope: !4, file: !1, line: 6, type: !8)
+ !14 = !DILocalVariable(name: "n", scope: !4, file: !1, line: 7, type: !7)
+ !15 = !{!16}
+ !16 = !DIGlobalVariable(name: "m", scope: !0, file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, variable: i32* @m)
+ !17 = !{i32 2, !"Dwarf Version", i32 4}
+ !18 = !{i32 2, !"Debug Info Version", i32 3}
+ !19 = !{!"clang version 3.8.0 (trunk 253049)"}
+ !20 = !DIExpression()
+ !21 = !DILocation(line: 6, column: 14, scope: !4)
+ !22 = !DILocation(line: 6, column: 27, scope: !23)
+ !23 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 1)
+ !24 = !DILocation(line: 8, column: 12, scope: !25)
+ !25 = distinct !DILexicalBlock(scope: !4, file: !1, line: 8, column: 7)
+ !26 = !DILocation(line: 8, column: 7, scope: !4)
+ !27 = !DILocation(line: 11, column: 14, scope: !25)
+ !28 = !{!29, !29, i64 0}
+ !29 = !{!"any pointer", !30, i64 0}
+ !30 = !{!"omnipotent char", !31, i64 0}
+ !31 = !{!"Simple C/C++ TBAA"}
+ !32 = !DILocation(line: 11, column: 9, scope: !25)
+ !33 = !DILocation(line: 7, column: 7, scope: !23)
+ !34 = !DILocation(line: 12, column: 7, scope: !4)
+ !35 = !DILocation(line: 13, column: 9, scope: !36)
+ !36 = distinct !DILexicalBlock(scope: !4, file: !1, line: 13, column: 7)
+ !37 = !DILocation(line: 13, column: 7, scope: !4)
+ !38 = !DILocation(line: 14, column: 9, scope: !39)
+ !39 = distinct !DILexicalBlock(scope: !36, file: !1, line: 13, column: 15)
+ !40 = !DILocation(line: 15, column: 11, scope: !39)
+ !41 = !DILocation(line: 16, column: 3, scope: !39)
+ !42 = !DILocation(line: 18, column: 9, scope: !36)
+ !43 = !DILocation(line: 15, column: 7, scope: !39)
+ !44 = !{!45, !45, i64 0}
+ !45 = !{!"int", !30, i64 0}
+ !46 = !DILocation(line: 19, column: 3, scope: !4)
+ !47 = !DILocation(line: 20, column: 3, scope: !4)
+
+...
+---
+name: main
+alignment: 4
+exposesReturnsTwice: false
+hasInlineAsm: false
+isSSA: false
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+liveins:
+ - { reg: '%edi' }
+ - { reg: '%rsi' }
+calleeSavedRegisters: [ '%bh', '%bl', '%bp', '%bpl', '%bx', '%ebp', '%ebx',
+ '%rbp', '%rbx', '%r12', '%r13', '%r14', '%r15',
+ '%r12b', '%r13b', '%r14b', '%r15b', '%r12d', '%r13d',
+ '%r14d', '%r15d', '%r12w', '%r13w', '%r14w', '%r15w' ]
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 8
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+fixedStack:
+ - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%rbx' }
+body: |
+ bb.0.entry:
+ successors: %bb.1.if.else(16), %bb.2.if.end(16)
+ liveins: %edi, %rsi, %rbx
+
+ frame-setup PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+ CFI_INSTRUCTION .cfi_def_cfa_offset 16
+ CFI_INSTRUCTION .cfi_offset %rbx, -16
+ DBG_VALUE debug-use %edi, debug-use _, !12, !20, debug-location !21
+ DBG_VALUE debug-use %rsi, debug-use _, !13, !20, debug-location !22
+ %eax = MOV32rr %edi
+ DBG_VALUE debug-use %eax, debug-use _, !12, !20, debug-location !21
+ %edi = MOV32ri 2
+ CMP32ri8 killed %eax, 2, implicit-def %eflags, debug-location !26
+ JNE_1 %bb.2.if.end, implicit %eflags
+
+ bb.1.if.else:
+ successors: %bb.2.if.end(0)
+ liveins: %rsi
+
+ DBG_VALUE debug-use %rsi, debug-use _, !13, !20, debug-location !22
+ %rdi = MOV64rm killed %rsi, 1, _, 8, _, debug-location !27 :: (load 8 from %ir.arrayidx, !tbaa !28)
+ dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al, debug-location !32
+ CALL64pcrel32 @atoi, csr_64, implicit %rsp, implicit %rdi, implicit %al, implicit-def %rsp, implicit-def %eax, debug-location !32
+ %edi = MOV32rr %eax, debug-location !32
+ DBG_VALUE debug-use %edi, debug-use _, !14, !20, debug-location !33
+
+ bb.2.if.end:
+ successors: %bb.3.if.then.3(16), %bb.4.if.else.5(16)
+ liveins: %edi
+
+ CALL64pcrel32 @change, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax, debug-location !34
+ %ebx = MOV32rr %eax, debug-location !34
+ DBG_VALUE debug-use %ebx, debug-use _, !14, !20, debug-location !33
+ CMP32ri8 %ebx, 11, implicit-def %eflags, debug-location !37
+ JL_1 %bb.4.if.else.5, implicit killed %eflags, debug-location !37
+
+ bb.3.if.then.3:
+ successors: %bb.5.if.end.7(0)
+ liveins: %ebx
+
+ DBG_VALUE debug-use %ebx, debug-use _, !14, !20, debug-location !33
+ %edi = MOV32rr %ebx, debug-location !38
+ CALL64pcrel32 @modify, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax, debug-location !38
+ %ecx = MOV32rr %eax, debug-location !38
+ %ecx = ADD32rr killed %ecx, killed %ebx, implicit-def dead %eflags, debug-location !40
+ JMP_1 %bb.5.if.end.7
+
+ bb.4.if.else.5:
+ successors: %bb.5.if.end.7(0)
+ liveins: %ebx
+
+ DBG_VALUE debug-use %ebx, debug-use _, !14, !20, debug-location !33
+ %edi = MOV32rr killed %ebx, debug-location !42
+ CALL64pcrel32 @inc, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax, debug-location !42
+ %ecx = MOV32rr %eax, debug-location !42
+
+ bb.5.if.end.7:
+ liveins: %ecx
+
+ MOV32mr %rip, 1, _, @m, _, %ecx, debug-location !43 :: (store 4 into @m, !tbaa !44)
+ dead undef %edi = MOV32ri64 @.str, implicit-def %rdi, debug-location !46
+ dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al, debug-location !47
+ %esi = MOV32rr killed %ecx, debug-location !46
+ CALL64pcrel32 @printf, csr_64, implicit %rsp, implicit %rdi, implicit %esi, implicit %al, implicit-def %rsp, implicit-def dead %eax, debug-location !46
+ %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, debug-location !47
+ %rbx = POP64r implicit-def %rsp, implicit %rsp, debug-location !47
+ RETQ %eax, debug-location !47
+
+...
diff --git a/test/DebugInfo/MIR/lit.local.cfg b/test/DebugInfo/MIR/lit.local.cfg
new file mode 100644
index 000000000000..e69aa5765356
--- /dev/null
+++ b/test/DebugInfo/MIR/lit.local.cfg
@@ -0,0 +1,2 @@
+config.suffixes = ['.mir']
+
diff --git a/test/DebugInfo/Mips/InlinedFnLocalVar.ll b/test/DebugInfo/Mips/InlinedFnLocalVar.ll
index 2badeba2f579..fe661522da7c 100644
--- a/test/DebugInfo/Mips/InlinedFnLocalVar.ll
+++ b/test/DebugInfo/Mips/InlinedFnLocalVar.ll
@@ -12,7 +12,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-define i32 @bar() nounwind ssp {
+define i32 @bar() nounwind ssp !dbg !6 {
entry:
%0 = load i32, i32* @i, align 4, !dbg !17 ; <i32> [#uses=2]
tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !59, metadata !DIExpression()), !dbg !19
@@ -25,20 +25,20 @@ entry:
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!28}
-!0 = !DISubprogram(name: "foo", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 9, file: !27, scope: !1, type: !3, variables: !24)
+!0 = distinct !DISubprogram(name: "foo", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 9, file: !27, scope: !1, type: !3, variables: !24)
!1 = !DIFile(filename: "bar.c", directory: "/tmp/")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !27, enums: !20, retainedTypes: !20, subprograms: !25, globals: !26, imports: !20)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !27, enums: !20, retainedTypes: !20, subprograms: !25, globals: !26, imports: !20)
!3 = !DISubroutineType(types: !4)
!4 = !{!5, !5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "bar", linkageName: "bar", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !27, scope: !1, type: !7, function: i32 ()* @bar)
+!6 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !27, scope: !1, type: !7)
!7 = !DISubroutineType(types: !8)
!8 = !{!5}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+!9 = !DILocalVariable(name: "j", line: 9, arg: 1, scope: !0, file: !1, type: !5)
+!10 = !DILocalVariable(name: "xyz", line: 10, scope: !11, file: !1, type: !12)
-!59 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
-!60 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+!59 = !DILocalVariable(name: "j", line: 9, arg: 1, scope: !0, file: !1, type: !5)
+!60 = !DILocalVariable(name: "xyz", line: 10, scope: !11, file: !1, type: !12)
!11 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !0)
!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "X", line: 10, size: 64, align: 32, file: !27, scope: !0, elements: !13)
diff --git a/test/DebugInfo/Mips/delay-slot.ll b/test/DebugInfo/Mips/delay-slot.ll
index df01775a12e6..e4cb9eac7cef 100644
--- a/test/DebugInfo/Mips/delay-slot.ll
+++ b/test/DebugInfo/Mips/delay-slot.ll
@@ -16,8 +16,8 @@
; FIXME: The next address probably ought to be 0x0000000000000004 but there's
; a constant initialization before the prologue's end.
; CHECK: 0x0000000000000008 2 0 1 0 0 is_stmt prologue_end
-; CHECK: 0x0000000000000028 3 0 1 0 0 is_stmt
-; CHECK: 0x0000000000000038 4 0 1 0 0 is_stmt
+; CHECK: 0x000000000000002c 3 0 1 0 0 is_stmt
+; CHECK: 0x000000000000003c 4 0 1 0 0 is_stmt
; CHECK: 0x0000000000000048 5 0 1 0 0 is_stmt
; CHECK: 0x0000000000000058 5 0 1 0 0 is_stmt end_sequence
@@ -26,7 +26,7 @@ target datalayout = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"
target triple = "mips--linux-gnu"
; Function Attrs: nounwind
-define i32 @foo(i32 %x) #0 {
+define i32 @foo(i32 %x) #0 !dbg !4 {
entry:
call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !12, metadata !DIExpression()), !dbg !13
%tobool = icmp ne i32 %x, 0, !dbg !14
@@ -56,11 +56,11 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "test.c", directory: "/tmp")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
@@ -68,7 +68,7 @@ attributes #1 = { nounwind readnone }
!9 = !{i32 2, !"Dwarf Version", i32 4}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.5.0"}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "x", line: 1, arg: 1, scope: !4, file: !5, type: !8)
!13 = !DILocation(line: 1, scope: !4)
!14 = !DILocation(line: 2, scope: !15)
!15 = distinct !DILexicalBlock(line: 2, column: 0, file: !1, scope: !4)
diff --git a/test/DebugInfo/Mips/dsr-fixed-objects.ll b/test/DebugInfo/Mips/dsr-fixed-objects.ll
new file mode 100644
index 000000000000..ee98272859a7
--- /dev/null
+++ b/test/DebugInfo/Mips/dsr-fixed-objects.ll
@@ -0,0 +1,156 @@
+; RUN: llc -march=mips -mcpu=mips32r2 -O1 -filetype=obj <%s | \
+; RUN: llvm-dwarfdump -debug-dump=all - | FileCheck %s -check-prefix=F0
+; RUN: llc -march=mips -mcpu=mips32r2 -O1 -filetype=obj <%s | \
+; RUN: llvm-dwarfdump -debug-dump=all - | FileCheck %s -check-prefix=F1
+
+; void foo(int *);
+;
+; int f0(int a, int b, int c, int d, int e) {
+; int x = a + b + c + d + e;
+; foo(&x);
+; return x;
+; }
+;
+; int f1(int a, int b, int c, int d, int e) {
+; int x __attribute__((aligned(16))) = a + b + c + d + e;
+; foo(&x);
+; return x;
+; }
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+declare void @foo(i32*)
+
+; F0: DW_AT_location [DW_FORM_sec_offset] (0x00000014)
+; F0: DW_AT_name [DW_FORM_strp] ( .debug_str[0x0000006b] = "x")
+;
+; x -> DW_OP_reg1(51)
+; F0: 0x00000014: Beginning address offset: 0x0000000000000028
+; F0: Ending address offset: 0x0000000000000030
+; F0: Location description: 51
+
+define i32 @f0(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e) !dbg !4 {
+entry:
+ %x = alloca i32, align 4
+ tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !9, metadata !DIExpression()), !dbg !27
+ tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !10, metadata !DIExpression()), !dbg !28
+ tail call void @llvm.dbg.value(metadata i32 %c, i64 0, metadata !11, metadata !DIExpression()), !dbg !29
+ tail call void @llvm.dbg.value(metadata i32 %d, i64 0, metadata !12, metadata !DIExpression()), !dbg !30
+ tail call void @llvm.dbg.value(metadata i32 %e, i64 0, metadata !13, metadata !DIExpression()), !dbg !31
+ %0 = bitcast i32* %x to i8*, !dbg !32
+ call void @llvm.lifetime.start(i64 4, i8* %0) #4, !dbg !32
+ %add = add nsw i32 %b, %a, !dbg !33
+ %add1 = add nsw i32 %add, %c, !dbg !34
+ %add2 = add nsw i32 %add1, %d, !dbg !35
+ %add3 = add nsw i32 %add2, %e, !dbg !36
+ tail call void @llvm.dbg.value(metadata i32 %add3, i64 0, metadata !14, metadata !DIExpression()), !dbg !37
+ store i32 %add3, i32* %x, align 4, !dbg !37, !tbaa !38
+ tail call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !14, metadata !26), !dbg !37
+ call void @foo(i32* nonnull %x) #4, !dbg !42
+ call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !14, metadata !26), !dbg !37
+ %1 = load i32, i32* %x, align 4, !dbg !43, !tbaa !38
+ call void @llvm.lifetime.end(i64 4, i8* %0) #4, !dbg !44
+ ret i32 %1, !dbg !45
+}
+
+
+; F1: DW_AT_location [DW_FORM_sec_offset] (0x00000033)
+; F1: DW_AT_name [DW_FORM_strp] ( .debug_str[0x0000006b] = "x")
+
+; x -> DW_OP_reg1(51)
+; F1: 0x00000033: Beginning address offset: 0x0000000000000080
+; F1: Ending address offset: 0x0000000000000088
+; F1: Location description: 51
+
+define i32 @f1(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e) !dbg !15 {
+entry:
+ %x = alloca i32, align 16
+ tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !17, metadata !DIExpression()), !dbg !46
+ tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !18, metadata !DIExpression()), !dbg !47
+ tail call void @llvm.dbg.value(metadata i32 %c, i64 0, metadata !19, metadata !DIExpression()), !dbg !48
+ tail call void @llvm.dbg.value(metadata i32 %d, i64 0, metadata !20, metadata !DIExpression()), !dbg !49
+ tail call void @llvm.dbg.value(metadata i32 %e, i64 0, metadata !21, metadata !DIExpression()), !dbg !50
+ %0 = bitcast i32* %x to i8*, !dbg !51
+ call void @llvm.lifetime.start(i64 4, i8* %0) #4, !dbg !51
+ %add = add nsw i32 %b, %a, !dbg !52
+ %add1 = add nsw i32 %add, %c, !dbg !53
+ %add2 = add nsw i32 %add1, %d, !dbg !54
+ %add3 = add nsw i32 %add2, %e, !dbg !55
+ tail call void @llvm.dbg.value(metadata i32 %add3, i64 0, metadata !22, metadata !DIExpression()), !dbg !56
+ store i32 %add3, i32* %x, align 16, !dbg !56, !tbaa !38
+ tail call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !22, metadata !26), !dbg !56
+ call void @foo(i32* nonnull %x) #4, !dbg !57
+ call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !22, metadata !26), !dbg !56
+ %1 = load i32, i32* %x, align 16, !dbg !58, !tbaa !38
+ call void @llvm.lifetime.end(i64 4, i8* %0) #4, !dbg !59
+ ret i32 %1, !dbg !60
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!23, !24}
+!llvm.ident = !{!25}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251783) (llvm/trunk 251781)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "/home/vk/repos/tmp/dwarf")
+!2 = !{}
+!3 = !{!4, !15}
+!4 = distinct !DISubprogram(name: "f0", scope: !1, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7, !7, !7, !7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9, !10, !11, !12, !13, !14}
+!9 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 4, type: !7)
+!10 = !DILocalVariable(name: "b", arg: 2, scope: !4, file: !1, line: 4, type: !7)
+!11 = !DILocalVariable(name: "c", arg: 3, scope: !4, file: !1, line: 4, type: !7)
+!12 = !DILocalVariable(name: "d", arg: 4, scope: !4, file: !1, line: 4, type: !7)
+!13 = !DILocalVariable(name: "e", arg: 5, scope: !4, file: !1, line: 4, type: !7)
+!14 = !DILocalVariable(name: "x", scope: !4, file: !1, line: 5, type: !7)
+!15 = distinct !DISubprogram(name: "f1", scope: !1, file: !1, line: 11, type: !5, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: true, variables: !16)
+!16 = !{!17, !18, !19, !20, !21, !22}
+!17 = !DILocalVariable(name: "a", arg: 1, scope: !15, file: !1, line: 11, type: !7)
+!18 = !DILocalVariable(name: "b", arg: 2, scope: !15, file: !1, line: 11, type: !7)
+!19 = !DILocalVariable(name: "c", arg: 3, scope: !15, file: !1, line: 11, type: !7)
+!20 = !DILocalVariable(name: "d", arg: 4, scope: !15, file: !1, line: 11, type: !7)
+!21 = !DILocalVariable(name: "e", arg: 5, scope: !15, file: !1, line: 11, type: !7)
+!22 = !DILocalVariable(name: "x", scope: !15, file: !1, line: 12, type: !7)
+!23 = !{i32 2, !"Dwarf Version", i32 4}
+!24 = !{i32 2, !"Debug Info Version", i32 3}
+!25 = !{!"clang version 3.8.0 (trunk 251783) (llvm/trunk 251781)"}
+!26 = !DIExpression(DW_OP_deref)
+!27 = !DILocation(line: 4, column: 12, scope: !4)
+!28 = !DILocation(line: 4, column: 19, scope: !4)
+!29 = !DILocation(line: 4, column: 26, scope: !4)
+!30 = !DILocation(line: 4, column: 33, scope: !4)
+!31 = !DILocation(line: 4, column: 40, scope: !4)
+!32 = !DILocation(line: 5, column: 3, scope: !4)
+!33 = !DILocation(line: 5, column: 13, scope: !4)
+!34 = !DILocation(line: 5, column: 17, scope: !4)
+!35 = !DILocation(line: 5, column: 21, scope: !4)
+!36 = !DILocation(line: 5, column: 25, scope: !4)
+!37 = !DILocation(line: 5, column: 7, scope: !4)
+!38 = !{!39, !39, i64 0}
+!39 = !{!"int", !40, i64 0}
+!40 = !{!"omnipotent char", !41, i64 0}
+!41 = !{!"Simple C/C++ TBAA"}
+!42 = !DILocation(line: 6, column: 3, scope: !4)
+!43 = !DILocation(line: 7, column: 10, scope: !4)
+!44 = !DILocation(line: 8, column: 1, scope: !4)
+!45 = !DILocation(line: 7, column: 3, scope: !4)
+!46 = !DILocation(line: 11, column: 12, scope: !15)
+!47 = !DILocation(line: 11, column: 19, scope: !15)
+!48 = !DILocation(line: 11, column: 26, scope: !15)
+!49 = !DILocation(line: 11, column: 33, scope: !15)
+!50 = !DILocation(line: 11, column: 40, scope: !15)
+!51 = !DILocation(line: 12, column: 3, scope: !15)
+!52 = !DILocation(line: 12, column: 42, scope: !15)
+!53 = !DILocation(line: 12, column: 46, scope: !15)
+!54 = !DILocation(line: 12, column: 50, scope: !15)
+!55 = !DILocation(line: 12, column: 54, scope: !15)
+!56 = !DILocation(line: 12, column: 7, scope: !15)
+!57 = !DILocation(line: 13, column: 3, scope: !15)
+!58 = !DILocation(line: 14, column: 10, scope: !15)
+!59 = !DILocation(line: 15, column: 1, scope: !15)
+!60 = !DILocation(line: 14, column: 3, scope: !15)
diff --git a/test/DebugInfo/Mips/dsr-non-fixed-objects.ll b/test/DebugInfo/Mips/dsr-non-fixed-objects.ll
new file mode 100644
index 000000000000..7bd68318ca51
--- /dev/null
+++ b/test/DebugInfo/Mips/dsr-non-fixed-objects.ll
@@ -0,0 +1,125 @@
+; RUN: llc -march=mips -mcpu=mips32r2 -O0 -filetype=obj <%s | \
+; RUN: llvm-dwarfdump -debug-dump=all - | FileCheck %s -check-prefix=F2
+; RUN: llc -march=mips -mcpu=mips32r2 -O0 -filetype=obj <%s | \
+; RUN: llvm-dwarfdump -debug-dump=all - | FileCheck %s -check-prefix=F3
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+declare void @foo(i32*)
+
+; void foo(int *);
+;
+; int f2(int a, int b) {
+; int c __attribute__((aligned(16))) = a + b;
+; foo(&c);
+; return c;
+; }
+;
+; int *f3(int a, int b) {
+; int c __attribute__((aligned(16))) = a + b;
+; int *w = alloca(c);
+; foo(&c);
+; return w;
+; }
+
+; c -> DW_OP_breg29(r29): 16
+; F2: DW_AT_location [DW_FORM_exprloc] (<0x2> 8d 10 )
+; F2: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000065] = "c")
+
+; Function Attrs: nounwind
+define i32 @f2(i32 signext %a, i32 signext %b) !dbg !4 {
+entry:
+ %a.addr = alloca i32, align 4
+ %b.addr = alloca i32, align 4
+ %c = alloca i32, align 16
+ store i32 %a, i32* %a.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !15, metadata !16), !dbg !17
+ store i32 %b, i32* %b.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !18, metadata !16), !dbg !19
+ call void @llvm.dbg.declare(metadata i32* %c, metadata !20, metadata !16), !dbg !21
+ %0 = load i32, i32* %a.addr, align 4, !dbg !22
+ %1 = load i32, i32* %b.addr, align 4, !dbg !23
+ %add = add nsw i32 %0, %1, !dbg !24
+ store i32 %add, i32* %c, align 16, !dbg !21
+ call void @foo(i32* %c), !dbg !25
+ %2 = load i32, i32* %c, align 16, !dbg !26
+ ret i32 %2, !dbg !27
+}
+
+; c -> DW_OP_breg23(r23): 16
+; F3: DW_AT_location [DW_FORM_exprloc] (<0x2> 87 10 )
+; F3: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000065] = "c")
+
+define i32* @f3(i32 signext %a, i32 signext %b) !dbg !8 {
+entry:
+ %a.addr = alloca i32, align 4
+ %b.addr = alloca i32, align 4
+ %c = alloca i32, align 16
+ %w = alloca i32*, align 4
+ store i32 %a, i32* %a.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !28, metadata !16), !dbg !29
+ store i32 %b, i32* %b.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !30, metadata !16), !dbg !31
+ call void @llvm.dbg.declare(metadata i32* %c, metadata !32, metadata !16), !dbg !33
+ %0 = load i32, i32* %a.addr, align 4, !dbg !34
+ %1 = load i32, i32* %b.addr, align 4, !dbg !35
+ %add = add nsw i32 %0, %1, !dbg !36
+ store i32 %add, i32* %c, align 16, !dbg !33
+ call void @llvm.dbg.declare(metadata i32** %w, metadata !37, metadata !16), !dbg !38
+ %2 = load i32, i32* %c, align 16, !dbg !39
+ %3 = alloca i8, i32 %2, !dbg !40
+ %4 = bitcast i8* %3 to i32*, !dbg !40
+ store i32* %4, i32** %w, align 4, !dbg !38
+ call void @foo(i32* %c), !dbg !41
+ %5 = load i32*, i32** %w, align 4, !dbg !42
+ ret i32* %5, !dbg !43
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12, !13}
+!llvm.ident = !{!14}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251783) (llvm/trunk 251781)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "/home/vk/repos/tmp/dwarf")
+!2 = !{}
+!3 = !{!4, !8}
+!4 = distinct !DISubprogram(name: "f2", scope: !1, file: !1, line: 20, type: !5, isLocal: false, isDefinition: true, scopeLine: 20, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = distinct !DISubprogram(name: "f3", scope: !1, file: !1, line: 27, type: !9, isLocal: false, isDefinition: true, scopeLine: 27, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11, !7, !7}
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 32, align: 32)
+!12 = !{i32 2, !"Dwarf Version", i32 4}
+!13 = !{i32 2, !"Debug Info Version", i32 3}
+!14 = !{!"clang version 3.8.0 (trunk 251783) (llvm/trunk 251781)"}
+!15 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 20, type: !7)
+!16 = !DIExpression()
+!17 = !DILocation(line: 20, column: 12, scope: !4)
+!18 = !DILocalVariable(name: "b", arg: 2, scope: !4, file: !1, line: 20, type: !7)
+!19 = !DILocation(line: 20, column: 19, scope: !4)
+!20 = !DILocalVariable(name: "c", scope: !4, file: !1, line: 21, type: !7)
+!21 = !DILocation(line: 21, column: 7, scope: !4)
+!22 = !DILocation(line: 21, column: 40, scope: !4)
+!23 = !DILocation(line: 21, column: 44, scope: !4)
+!24 = !DILocation(line: 21, column: 42, scope: !4)
+!25 = !DILocation(line: 22, column: 3, scope: !4)
+!26 = !DILocation(line: 23, column: 10, scope: !4)
+!27 = !DILocation(line: 23, column: 3, scope: !4)
+!28 = !DILocalVariable(name: "a", arg: 1, scope: !8, file: !1, line: 27, type: !7)
+!29 = !DILocation(line: 27, column: 13, scope: !8)
+!30 = !DILocalVariable(name: "b", arg: 2, scope: !8, file: !1, line: 27, type: !7)
+!31 = !DILocation(line: 27, column: 20, scope: !8)
+!32 = !DILocalVariable(name: "c", scope: !8, file: !1, line: 28, type: !7)
+!33 = !DILocation(line: 28, column: 7, scope: !8)
+!34 = !DILocation(line: 28, column: 40, scope: !8)
+!35 = !DILocation(line: 28, column: 44, scope: !8)
+!36 = !DILocation(line: 28, column: 42, scope: !8)
+!37 = !DILocalVariable(name: "w", scope: !8, file: !1, line: 29, type: !11)
+!38 = !DILocation(line: 29, column: 8, scope: !8)
+!39 = !DILocation(line: 29, column: 19, scope: !8)
+!40 = !DILocation(line: 29, column: 12, scope: !8)
+!41 = !DILocation(line: 30, column: 3, scope: !8)
+!42 = !DILocation(line: 31, column: 10, scope: !8)
+!43 = !DILocation(line: 31, column: 3, scope: !8)
diff --git a/test/DebugInfo/Mips/fn-call-line.ll b/test/DebugInfo/Mips/fn-call-line.ll
index 18ed3dfa43ad..a3130ce8c6eb 100644
--- a/test/DebugInfo/Mips/fn-call-line.ll
+++ b/test/DebugInfo/Mips/fn-call-line.ll
@@ -52,7 +52,7 @@
; Function Attrs: nounwind uwtable
-define void @f2() #0 {
+define void @f2() #0 !dbg !4 {
entry:
call void (...) @f1(), !dbg !11
call void (...) @f1(), !dbg !12
@@ -68,11 +68,11 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 226641)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 226641)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "fn-call-line.c", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: void ()* @f2, variables: !2)
+!4 = distinct !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "fn-call-line.c", directory: "/tmp/dbginfo")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
diff --git a/test/DebugInfo/Mips/prologue_end.ll b/test/DebugInfo/Mips/prologue_end.ll
new file mode 100644
index 000000000000..d93836d84983
--- /dev/null
+++ b/test/DebugInfo/Mips/prologue_end.ll
@@ -0,0 +1,70 @@
+; RUN: llc -O0 -mtriple mips-unknown-linux-gnu -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC
+; RUN: llc -O0 -mtriple mips-unknown-linux-gnu -relocation-model=static -disable-fp-elim < %s | FileCheck %s -check-prefix=STATIC-FP
+; RUN: llc -O0 -mtriple mips-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC
+; RUN: llc -O0 -mtriple mips-unknown-linux-gnu -relocation-model=pic -disable-fp-elim < %s | FileCheck %s -check-prefix=PIC-FP
+
+; Generated using clang -O0 -emit-llvm -S -target mipsel-unknown-linux -g test.c -o test.ll
+; test.c:
+;
+; void hello_world(void) {
+; printf("Hello, World!\n");
+; }
+
+@.str = private unnamed_addr constant [15 x i8] c"Hello, World!\0A\00", align 1
+
+define void @hello_world() #0 !dbg !4 {
+entry:
+; STATIC: addiu $sp, $sp, -{{[0-9]+}}
+; STATIC: sw $ra, {{[0-9]+}}($sp)
+; STATIC: .loc 1 2 3 prologue_end
+; STATIC: lui $[[R0:[0-9]+]], %hi($.str)
+
+; STATIC-FP: addiu $sp, $sp, -{{[0-9]+}}
+; STATIC-FP: sw $ra, {{[0-9]+}}($sp)
+; STATIC-FP: sw $fp, {{[0-9]+}}($sp)
+; STATIC-FP: move $fp, $sp
+; STATIC-FP: .loc 1 2 3 prologue_end
+; STATIC-FP: lui $[[R0:[0-9]+]], %hi($.str)
+
+; PIC: lui $[[R0:[0-9]+]], %hi(_gp_disp)
+; PIC: addiu $[[R0]], $[[R0]], %lo(_gp_disp)
+; PIC: addiu $sp, $sp, -{{[0-9]+}}
+; PIC: sw $ra, {{[0-9]+}}($sp)
+; PIC: addu $[[R1:[0-9]+]], $[[R0]], $25
+; PIC: .loc 1 2 3 prologue_end
+; PIC: lw $[[R2:[0-9]+]], %got($.str)($[[R1]])
+
+; PIC-FP: lui $[[R0:[0-9]+]], %hi(_gp_disp)
+; PIC-FP: addiu $[[R0]], $[[R0]], %lo(_gp_disp)
+; PIC-FP: addiu $sp, $sp, -{{[0-9]+}}
+; PIC-FP: sw $ra, {{[0-9]+}}($sp)
+; PIC-FP: sw $fp, {{[0-9]+}}($sp)
+; PIC-FP: move $fp, $sp
+; PIC-FP: addu $[[R1:[0-9]+]], $[[R0]], $25
+; PIC-FP: .loc 1 2 3 prologue_end
+; PIC-FP: lw $[[R2:[0-9]+]], %got($.str)($[[R1]])
+
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i32 0, i32 0)), !dbg !10
+ ret void, !dbg !11
+}
+
+declare i32 @printf(i8*, ...)
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "hello_world", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0"}
+!10 = !DILocation(line: 2, column: 3, scope: !4)
+!11 = !DILocation(line: 3, column: 1, scope: !4)
diff --git a/test/DebugInfo/PDB/lit.local.cfg b/test/DebugInfo/PDB/DIA/lit.local.cfg
index 28a895f51148..28a895f51148 100644
--- a/test/DebugInfo/PDB/lit.local.cfg
+++ b/test/DebugInfo/PDB/DIA/lit.local.cfg
diff --git a/test/DebugInfo/PDB/DIA/pdbdump-flags.test b/test/DebugInfo/PDB/DIA/pdbdump-flags.test
new file mode 100644
index 000000000000..c2fffcb50620
--- /dev/null
+++ b/test/DebugInfo/PDB/DIA/pdbdump-flags.test
@@ -0,0 +1,40 @@
+; RUN: llvm-pdbdump %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=NO_ARGS
+; RUN: llvm-pdbdump -types %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=TYPES
+; RUN: llvm-pdbdump -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=COMPILANDS
+; RUN: llvm-pdbdump -types -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=MULTIPLE
+
+; Check that neither symbols nor compilands are dumped when neither argument specified.
+; NO_ARGS: empty.pdb
+; NO_ARGS: Guid: {0B355641-86A0-A249-896F-9988FAE52FF0}
+; NO_ARGS: Attributes: HasPrivateSymbols
+; NO_ARGS-NOT: ---TYPES---
+; NO_ARGS-NOT: ---COMPILANDS---
+; NO_ARGS-NOT: ---GLOBALS---
+; NO_ARGS-NOT: ---SYMBOLS---
+
+; Check that only types are dumped when only -types is specified.
+; TYPES: empty.pdb
+; TYPES: Guid: {0B355641-86A0-A249-896F-9988FAE52FF0}
+; TYPES: Attributes: HasPrivateSymbols
+; TYPES: ---TYPES---
+; TYPES-NOT: ---COMPILANDS---
+; TYPES-NOT: ---GLOBALS---
+; TYPES-NOT: ---SYMBOLS---
+
+; Check that only compilands are dumped when only -compilands is specified.
+; COMPILANDS: empty.pdb
+; COMPILANDS: Guid: {0B355641-86A0-A249-896F-9988FAE52FF0}
+; COMPILANDS: Attributes: HasPrivateSymbols
+; COMPILANDS: ---COMPILANDS---
+; COMPILANDS-NOT: ---TYPES---
+; COMPILANDS-NOT: ---GLOBALS---
+; COMPILANDS-NOT: ---SYMBOLS---
+
+; Check that types and compilands are dumped when both arguments are specified.
+; MULTIPLE: empty.pdb
+; MULTIPLE: Guid: {0B355641-86A0-A249-896F-9988FAE52FF0}
+; MULTIPLE: Attributes: HasPrivateSymbols
+; MULTIPLE: ---COMPILANDS---
+; MULTIPLE: ---TYPES---
+; MULTIPLE-NOT: ---GLOBALS---
+; MULTIPLE-NOT: ---SYMBOLS---
diff --git a/test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test b/test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test
new file mode 100644
index 000000000000..ea5bb13d2ff6
--- /dev/null
+++ b/test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test
@@ -0,0 +1,53 @@
+; RUN: llvm-pdbdump -symbols %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT %s
+; RUN: llvm-pdbdump -types %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=TYPES_FORMAT %s
+; RUN: llvm-pdbdump -globals %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=GLOBALS %s
+
+; The format is func [0x<rva_start>+<prologue_length> - 0x<rva_end>-<epilogue_length>]
+; SYM_FORMAT: ---SYMBOLS---
+; SYM_FORMAT: symbolformat-fpo.obj
+; SYM_FORMAT-DAG: func [{{.*}}] (FPO) unsigned __cdecl fpo_func(unsigned n)
+; SYM_FORMAT: symbolformat.obj
+; SYM_FORMAT-DAG: func [{{.*}}] (EBP) int __cdecl _purecall()
+; SYM_FORMAT-DAG: func [{{.*}}] (EBP) int __cdecl main(int argc, char** argv)
+; SYM_FORMAT-DAG: func [{{.*}}] (EBP) void A::A()
+; SYM_FORMAT-DAG: func [{{.*}}] (EBP) void B::B()
+; SYM_FORMAT-DAG: thunk [{{.*}}] (Pcode) B::`vcall'{0}'
+; SYM_FORMAT-DAG: func [{{.*}}] (EBP) virtual void B::PureFunc()
+; SYM_FORMAT-DAG: func [{{.*}}] (EBP) void A::RegularFunc()
+; SYM_FORMAT-DAG: func [{{.*}}] (EBP) virtual void A::VirtualFunc()
+
+; TYPES_FORMAT: ---TYPES---
+; TYPES_FORMAT: Enums
+; TYPES_FORMAT-DAG: enum TestEnum
+; TYPES_FORMAT-DAG: enum TestEnumClass
+; TYPES_FORMAT: Typedefs
+; TYPES_FORMAT-DAG: typedef int IntType
+; TYPES_FORMAT-DAG: typedef class A ClassAType
+; TYPES_FORMAT: Classes
+; TYPES_FORMAT: struct A {
+; TYPES_FORMAT: public:
+; TYPES_FORMAT: virtual void PureFunc() = 0
+; TYPES_FORMAT: virtual void VirtualFunc()
+; TYPES_FORMAT: void RegularFunc()
+; TYPES_FORMAT: }
+; TYPES_FORMAT-DAG: struct MemberTest {
+; TYPES_FORMAT: data +0x00 MemberTest::NestedEnum m_nested_enum
+; TYPES_FORMAT: data +0x04 int m_typedef
+; TYPES_FORMAT: data +0x08 bool m_bool
+; TYPES_FORMAT: data +0x09 char m_char
+; TYPES_FORMAT: data +0x0a wchar_t m_wchar_t
+; TYPES_FORMAT: data +0x0c int m_int
+; TYPES_FORMAT: data +0x10 unsigned m_unsigned
+; TYPES_FORMAT: data +0x14 long m_long
+; TYPES_FORMAT: data +0x18 unsigned long m_unsigned_long
+; TYPES_FORMAT: data +0x20 __int64 m_int64
+; TYPES_FORMAT: data +0x28 unsigned __int64 m_unsigned_int64
+; TYPES_FORMAT: data +0x30 float m_float
+; TYPES_FORMAT: data +0x38 double m_double
+; TYPES_FORMAT: data +0x40 void (__cdecl *m_pfn_2_args)(int, double)
+; TYPES_FORMAT: }
+
+; GLOBALS: ---GLOBALS---
+; GLOBALS-DAG: func [{{.*}}] (FPO) unsigned __cdecl fpo_func(unsigned n)
+; GLOBALS-DAG: data [{{.*}}] static void* g_global_pointer
+; GLOBALS-DAG: data [{{.*}}] static int g_global_int
diff --git a/test/DebugInfo/PDB/pdbdump-flags.test b/test/DebugInfo/PDB/pdbdump-flags.test
deleted file mode 100644
index badbf0784b5e..000000000000
--- a/test/DebugInfo/PDB/pdbdump-flags.test
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: llvm-pdbdump %p/Inputs/empty.pdb | FileCheck %s -check-prefix=NO_ARGS
-; RUN: llvm-pdbdump -types %p/Inputs/empty.pdb | FileCheck %s -check-prefix=TYPES
-; RUN: llvm-pdbdump -compilands %p/Inputs/empty.pdb | FileCheck %s -check-prefix=COMPILANDS
-; RUN: llvm-pdbdump -types -compilands %p/Inputs/empty.pdb | FileCheck %s -check-prefix=MULTIPLE
-
-; Check that neither symbols nor compilands are dumped when neither argument specified.
-; NO_ARGS: empty.pdb
-; NO_ARGS: Guid: {0B355641-86A0-A249-896F-9988FAE52FF0}
-; NO_ARGS: Attributes: HasPrivateSymbols
-; NO_ARGS-NOT: ---TYPES---
-; NO_ARGS-NOT: ---COMPILANDS---
-; NO_ARGS-NOT: ---GLOBALS---
-; NO_ARGS-NOT: ---SYMBOLS---
-
-; Check that only types are dumped when only -types is specified.
-; TYPES: empty.pdb
-; TYPES: Guid: {0B355641-86A0-A249-896F-9988FAE52FF0}
-; TYPES: Attributes: HasPrivateSymbols
-; TYPES: ---TYPES---
-; TYPES-NOT: ---COMPILANDS---
-; TYPES-NOT: ---GLOBALS---
-; TYPES-NOT: ---SYMBOLS---
-
-; Check that only compilands are dumped when only -compilands is specified.
-; COMPILANDS: empty.pdb
-; COMPILANDS: Guid: {0B355641-86A0-A249-896F-9988FAE52FF0}
-; COMPILANDS: Attributes: HasPrivateSymbols
-; COMPILANDS: ---COMPILANDS---
-; COMPILANDS-NOT: ---TYPES---
-; COMPILANDS-NOT: ---GLOBALS---
-; COMPILANDS-NOT: ---SYMBOLS---
-
-; Check that types and compilands are dumped when both arguments are specified.
-; MULTIPLE: empty.pdb
-; MULTIPLE: Guid: {0B355641-86A0-A249-896F-9988FAE52FF0}
-; MULTIPLE: Attributes: HasPrivateSymbols
-; MULTIPLE: ---COMPILANDS---
-; MULTIPLE: ---TYPES---
-; MULTIPLE-NOT: ---GLOBALS---
-; MULTIPLE-NOT: ---SYMBOLS---
diff --git a/test/DebugInfo/PDB/pdbdump-headers.test b/test/DebugInfo/PDB/pdbdump-headers.test
new file mode 100644
index 000000000000..5c68cf40e3ef
--- /dev/null
+++ b/test/DebugInfo/PDB/pdbdump-headers.test
@@ -0,0 +1,12 @@
+; RUN: llvm-pdbdump --dump-headers %p/Inputs/empty.pdb | FileCheck %s
+
+; CHECK: BlockSize: 4096
+; CHECK-NEXT: Unknown0: 2
+; CHECK-NEXT: NumBlocks: 25
+; CHECK-NEXT: NumDirectoryBytes: 136
+; CHECK-NEXT: Unknown1: 0
+; CHECK-NEXT: BlockMapAddr: 24
+; CHECK-NEXT: NumDirectoryBlocks: 1
+; CHECK-NEXT: BlockMapOffset: 98304
+; CHECK-NEXT: DirectoryBlocks: [23]
+; CHECK-NEXT: NumStreams: 17
diff --git a/test/DebugInfo/PDB/pdbdump-symbol-format.test b/test/DebugInfo/PDB/pdbdump-symbol-format.test
deleted file mode 100644
index 6dd15fee4c59..000000000000
--- a/test/DebugInfo/PDB/pdbdump-symbol-format.test
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: llvm-pdbdump -symbols %p/Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT %s
-; RUN: llvm-pdbdump -types %p/Inputs/symbolformat.pdb | FileCheck --check-prefix=TYPES_FORMAT %s
-; RUN: llvm-pdbdump -globals %p/Inputs/symbolformat.pdb | FileCheck --check-prefix=GLOBALS %s
-
-; The format is func [0x<rva_start>+<prologue_length> - 0x<rva_end>-<epilogue_length>]
-; SYM_FORMAT: ---SYMBOLS---
-; SYM_FORMAT: symbolformat-fpo.obj
-; SYM_FORMAT-DAG: func [{{.*}}] (FPO) unsigned __cdecl fpo_func(unsigned n)
-; SYM_FORMAT: symbolformat.obj
-; SYM_FORMAT-DAG: func [{{.*}}] (EBP) int __cdecl _purecall()
-; SYM_FORMAT-DAG: func [{{.*}}] (EBP) int __cdecl main(int argc, char** argv)
-; SYM_FORMAT-DAG: func [{{.*}}] (EBP) void A::A()
-; SYM_FORMAT-DAG: func [{{.*}}] (EBP) void B::B()
-; SYM_FORMAT-DAG: thunk [{{.*}}] (Pcode) B::`vcall'{0}'
-; SYM_FORMAT-DAG: func [{{.*}}] (EBP) virtual void B::PureFunc()
-; SYM_FORMAT-DAG: func [{{.*}}] (EBP) void A::RegularFunc()
-; SYM_FORMAT-DAG: func [{{.*}}] (EBP) virtual void A::VirtualFunc()
-
-; TYPES_FORMAT: ---TYPES---
-; TYPES_FORMAT: Enums
-; TYPES_FORMAT-DAG: enum TestEnum
-; TYPES_FORMAT-DAG: enum TestEnumClass
-; TYPES_FORMAT: Typedefs
-; TYPES_FORMAT-DAG: typedef int IntType
-; TYPES_FORMAT-DAG: typedef class A ClassAType
-; TYPES_FORMAT: Classes
-; TYPES_FORMAT: struct A {
-; TYPES_FORMAT: public:
-; TYPES_FORMAT: virtual void PureFunc() = 0
-; TYPES_FORMAT: virtual void VirtualFunc()
-; TYPES_FORMAT: void RegularFunc()
-; TYPES_FORMAT: }
-; TYPES_FORMAT-DAG: struct MemberTest {
-; TYPES_FORMAT: data +0x00 MemberTest::NestedEnum m_nested_enum
-; TYPES_FORMAT: data +0x04 int m_typedef
-; TYPES_FORMAT: data +0x08 bool m_bool
-; TYPES_FORMAT: data +0x09 char m_char
-; TYPES_FORMAT: data +0x0a wchar_t m_wchar_t
-; TYPES_FORMAT: data +0x0c int m_int
-; TYPES_FORMAT: data +0x10 unsigned m_unsigned
-; TYPES_FORMAT: data +0x14 long m_long
-; TYPES_FORMAT: data +0x18 unsigned long m_unsigned_long
-; TYPES_FORMAT: data +0x20 __int64 m_int64
-; TYPES_FORMAT: data +0x28 unsigned __int64 m_unsigned_int64
-; TYPES_FORMAT: data +0x30 float m_float
-; TYPES_FORMAT: data +0x38 double m_double
-; TYPES_FORMAT: data +0x40 void (__cdecl *m_pfn_2_args)(int, double)
-; TYPES_FORMAT: }
-
-; GLOBALS: ---GLOBALS---
-; GLOBALS-DAG: func [{{.*}}] (FPO) unsigned __cdecl fpo_func(unsigned n)
-; GLOBALS-DAG: data [{{.*}}] static void* g_global_pointer
-; GLOBALS-DAG: data [{{.*}}] static int g_global_int
diff --git a/test/DebugInfo/PR20038.ll b/test/DebugInfo/PR20038.ll
deleted file mode 100644
index 8691895f41ac..000000000000
--- a/test/DebugInfo/PR20038.ll
+++ /dev/null
@@ -1,169 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; IR generated from clang -O0 with:
-; struct C {
-; ~C();
-; };
-; extern bool b;
-; void fun4() { b && (C(), 1); }
-; __attribute__((always_inline)) C::~C() { }
-
-; CHECK: DW_TAG_structure_type
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "C"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "~C"
-
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_linkage_name {{.*}} "_ZN1CD1Ev"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "this"
-
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "fun4"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_inlined_subroutine
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} "_ZN1CD1Ev"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} "this"
-
-; FIXME: D2 is actually inlined into D1 but doesn't show up here, possibly due
-; to there being no work in D2 (calling another member function from the dtor
-; causes D2 to show up, calling a free function doesn't).
-
-; CHECK-NOT: DW_TAG
-; CHECK: NULL
-; CHECK-NOT: DW_TAG
-; CHECK: NULL
-
-%struct.C = type { i8 }
-
-@b = external global i8
-
-; Function Attrs: nounwind
-define void @_Z4fun4v() #0 {
-entry:
- %this.addr.i.i = alloca %struct.C*, align 8, !dbg !21
- %this.addr.i = alloca %struct.C*, align 8, !dbg !22
- %agg.tmp.ensured = alloca %struct.C, align 1
- %cleanup.cond = alloca i1
- %0 = load i8, i8* @b, align 1, !dbg !24
- %tobool = trunc i8 %0 to i1, !dbg !24
- store i1 false, i1* %cleanup.cond
- br i1 %tobool, label %land.rhs, label %land.end, !dbg !24
-
-land.rhs: ; preds = %entry
- store i1 true, i1* %cleanup.cond, !dbg !25
- br label %land.end
-
-land.end: ; preds = %land.rhs, %entry
- %1 = phi i1 [ false, %entry ], [ true, %land.rhs ]
- %cleanup.is_active = load i1, i1* %cleanup.cond, !dbg !27
- br i1 %cleanup.is_active, label %cleanup.action, label %cleanup.done, !dbg !27
-
-cleanup.action: ; preds = %land.end
- store %struct.C* %agg.tmp.ensured, %struct.C** %this.addr.i, align 8, !dbg !22
- call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i, metadata !129, metadata !DIExpression()), !dbg !31
- %this1.i = load %struct.C*, %struct.C** %this.addr.i, !dbg !22
- store %struct.C* %this1.i, %struct.C** %this.addr.i.i, align 8, !dbg !21
- call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i.i, metadata !132, metadata !DIExpression()), !dbg !33
- %this1.i.i = load %struct.C*, %struct.C** %this.addr.i.i, !dbg !21
- br label %cleanup.done, !dbg !22
-
-cleanup.done: ; preds = %cleanup.action, %land.end
- ret void, !dbg !34
-}
-
-; Function Attrs: alwaysinline nounwind
-define void @_ZN1CD1Ev(%struct.C* %this) unnamed_addr #1 align 2 {
-entry:
- %this.addr.i = alloca %struct.C*, align 8, !dbg !37
- %this.addr = alloca %struct.C*, align 8
- store %struct.C* %this, %struct.C** %this.addr, align 8
- call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !29, metadata !DIExpression()), !dbg !38
- %this1 = load %struct.C*, %struct.C** %this.addr
- store %struct.C* %this1, %struct.C** %this.addr.i, align 8, !dbg !37
- call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i, metadata !232, metadata !DIExpression()), !dbg !39
- %this1.i = load %struct.C*, %struct.C** %this.addr.i, !dbg !37
- ret void, !dbg !37
-}
-
-; Function Attrs: alwaysinline nounwind
-define void @_ZN1CD2Ev(%struct.C* %this) unnamed_addr #1 align 2 {
-entry:
- %this.addr = alloca %struct.C*, align 8
- store %struct.C* %this, %struct.C** %this.addr, align 8
- call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !32, metadata !DIExpression()), !dbg !40
- %this1 = load %struct.C*, %struct.C** %this.addr
- ret void, !dbg !41
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!18, !19}
-!llvm.ident = !{!20}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
-!1 = !DIFile(filename: "<stdin>", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 1, size: 8, align: 8, file: !5, elements: !6, identifier: "_ZTS1C")
-!5 = !DIFile(filename: "PR20038.cpp", directory: "/tmp/dbginfo")
-!6 = !{!7}
-!7 = !DISubprogram(name: "~C", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !"_ZTS1C", type: !8)
-!8 = !DISubroutineType(types: !9)
-!9 = !{null, !10}
-!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1C")
-!11 = !{!12, !16, !17}
-!12 = !DISubprogram(name: "fun4", linkageName: "_Z4fun4v", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !13, type: !14, function: void ()* @_Z4fun4v, variables: !2)
-!13 = !DIFile(filename: "PR20038.cpp", directory: "/tmp/dbginfo")
-!14 = !DISubroutineType(types: !15)
-!15 = !{null}
-!16 = !DISubprogram(name: "~C", linkageName: "_ZN1CD2Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !5, scope: !"_ZTS1C", type: !8, function: void (%struct.C*)* @_ZN1CD2Ev, declaration: !7, variables: !2)
-!17 = !DISubprogram(name: "~C", linkageName: "_ZN1CD1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !5, scope: !"_ZTS1C", type: !8, function: void (%struct.C*)* @_ZN1CD1Ev, declaration: !7, variables: !2)
-!18 = !{i32 2, !"Dwarf Version", i32 4}
-!19 = !{i32 2, !"Debug Info Version", i32 3}
-!20 = !{!"clang version 3.5.0 "}
-!21 = !DILocation(line: 6, scope: !17, inlinedAt: !22)
-!22 = !DILocation(line: 5, scope: !23)
-!23 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !12)
-!24 = !DILocation(line: 5, scope: !12)
-!25 = !DILocation(line: 5, scope: !26)
-!26 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !12)
-!27 = !DILocation(line: 5, scope: !28)
-!28 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !12)
-!29 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !30)
-!30 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1C")
-!31 = !DILocation(line: 0, scope: !17, inlinedAt: !22)
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
-!33 = !DILocation(line: 0, scope: !16, inlinedAt: !21)
-
-!129 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !30)
-!132 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
-!232 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
-
-!34 = !DILocation(line: 5, scope: !35)
-!35 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !36)
-!36 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !12)
-!37 = !DILocation(line: 6, scope: !17)
-!38 = !DILocation(line: 0, scope: !17)
-!39 = !DILocation(line: 0, scope: !16, inlinedAt: !37)
-!40 = !DILocation(line: 0, scope: !16)
-!41 = !DILocation(line: 6, scope: !16)
diff --git a/test/DebugInfo/PowerPC/tls-fission.ll b/test/DebugInfo/PowerPC/tls-fission.ll
index 25bc9959e4d1..e8c6a13f754e 100644
--- a/test/DebugInfo/PowerPC/tls-fission.ll
+++ b/test/DebugInfo/PowerPC/tls-fission.ll
@@ -22,7 +22,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, splitDebugFilename: "tls.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, splitDebugFilename: "tls.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
!1 = !DIFile(filename: "tls.cpp", directory: "/tmp")
!2 = !{}
!3 = !{!4}
diff --git a/test/DebugInfo/PowerPC/tls.ll b/test/DebugInfo/PowerPC/tls.ll
index 8ba350ac60d3..7e6597c743b9 100644
--- a/test/DebugInfo/PowerPC/tls.ll
+++ b/test/DebugInfo/PowerPC/tls.ll
@@ -17,7 +17,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
!1 = !DIFile(filename: "tls.cpp", directory: "/tmp")
!2 = !{}
!3 = !{!4}
diff --git a/test/DebugInfo/Sparc/gnu-window-save.ll b/test/DebugInfo/Sparc/gnu-window-save.ll
index c2e0364307fa..d94cc7505f3a 100644
--- a/test/DebugInfo/Sparc/gnu-window-save.ll
+++ b/test/DebugInfo/Sparc/gnu-window-save.ll
@@ -38,7 +38,7 @@
@.str = private unnamed_addr constant [14 x i8] c"hello, world\0A\00", align 1
; Function Attrs: nounwind
-define signext i32 @main() #0 {
+define signext i32 @main() #0 !dbg !4 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
@@ -55,11 +55,11 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (http://llvm.org/git/clang.git 6a0714fee07fb7c4e32d3972b4fe2ce2f5678cf4) (llvm/ 672e88e934757f76d5c5e5258be41e7615094844)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (http://llvm.org/git/clang.git 6a0714fee07fb7c4e32d3972b4fe2ce2f5678cf4) (llvm/ 672e88e934757f76d5c5e5258be41e7615094844)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "hello.c", directory: "/home/venkatra/work/benchmarks/test/hello")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "hello.c", directory: "/home/venkatra/work/benchmarks/test/hello")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
diff --git a/test/DebugInfo/Sparc/prologue_end.ll b/test/DebugInfo/Sparc/prologue_end.ll
new file mode 100644
index 000000000000..43b1140620e7
--- /dev/null
+++ b/test/DebugInfo/Sparc/prologue_end.ll
@@ -0,0 +1,41 @@
+; RUN: llc -disable-fp-elim -O0 %s -mtriple sparc -o - | FileCheck %s
+
+; int func(void);
+; void prologue_end_test() {
+; func();
+; func();
+; }
+
+define void @prologue_end_test() nounwind uwtable !dbg !4 {
+ ; CHECK: prologue_end_test:
+ ; CHECK: .cfi_startproc
+ ; CHECK: save %sp
+ ; CHECK: .loc 1 3 3 prologue_end
+ ; CHECK: call func
+ ; CHECK: call func
+entry:
+ %call = call i32 @func(), !dbg !11
+ %call1 = call i32 @func(), !dbg !12
+ ret void, !dbg !13
+}
+
+declare i32 @func()
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 242129)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "prologue_end_test", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.7.0 (trunk 242129)"}
+!11 = !DILocation(line: 3, column: 3, scope: !4)
+!12 = !DILocation(line: 4, column: 3, scope: !4)
+!13 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/DebugInfo/SystemZ/prologue_end.ll b/test/DebugInfo/SystemZ/prologue_end.ll
new file mode 100644
index 000000000000..a62a8a75521e
--- /dev/null
+++ b/test/DebugInfo/SystemZ/prologue_end.ll
@@ -0,0 +1,42 @@
+; RUN: llc -disable-fp-elim -O0 %s -mtriple s390x-linux-gnu -o - | FileCheck %s
+
+; int func(void);
+; void prologue_end_test() {
+; func();
+; func();
+; }
+
+define void @prologue_end_test() nounwind uwtable !dbg !4 {
+ ; CHECK: prologue_end_test:
+ ; CHECK: .cfi_startproc
+ ; CHECK: aghi
+ ; CHECK: lgr
+ ; CHECK: .loc 1 3 3 prologue_end
+ ; CHECK: brasl {{.*}}, func
+ ; CHECK: brasl {{.*}}, func
+entry:
+ %call = call i32 @func(), !dbg !11
+ %call1 = call i32 @func(), !dbg !12
+ ret void, !dbg !13
+}
+
+declare i32 @func()
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 242129)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "prologue_end_test", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.7.0 (trunk 242129)"}
+!11 = !DILocation(line: 3, column: 3, scope: !4)
+!12 = !DILocation(line: 4, column: 3, scope: !4)
+!13 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/DebugInfo/SystemZ/variable-loc.ll b/test/DebugInfo/SystemZ/variable-loc.ll
index 5f4fe258b976..6ace1b614248 100644
--- a/test/DebugInfo/SystemZ/variable-loc.ll
+++ b/test/DebugInfo/SystemZ/variable-loc.ll
@@ -29,7 +29,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
declare i32 @sum_array(i32*, i32) nounwind
-define i32 @main() nounwind {
+define i32 @main() nounwind !dbg !14 {
entry:
%retval = alloca i32, align 4
%main_arr = alloca [100 x i32], align 4
@@ -52,27 +52,27 @@ declare i32 @printf(i8*, ...)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!30}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 ", isOptimized: false, emissionKind: 0, file: !29, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 ", isOptimized: false, emissionKind: 0, file: !29, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5, !11, !14}
-!5 = !DISubprogram(name: "populate_array", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !29, scope: !6, type: !7, function: void (i32*, i32)* @populate_array, variables: !1)
+!5 = distinct !DISubprogram(name: "populate_array", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !29, scope: !6, type: !7, variables: !1)
!6 = !DIFile(filename: "simple.c", directory: "/home/timnor01/a64-trunk/build")
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9, !10}
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
!10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!11 = !DISubprogram(name: "sum_array", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !29, scope: !6, type: !12, function: i32 (i32*, i32)* @sum_array, variables: !1)
+!11 = distinct !DISubprogram(name: "sum_array", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !29, scope: !6, type: !12, variables: !1)
!12 = !DISubroutineType(types: !13)
!13 = !{!10, !9, !10}
-!14 = !DISubprogram(name: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 18, file: !29, scope: !6, type: !15, function: i32 ()* @main, variables: !1)
+!14 = distinct !DISubprogram(name: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 18, file: !29, scope: !6, type: !15, variables: !1)
!15 = !DISubroutineType(types: !16)
!16 = !{!10}
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "main_arr", line: 19, scope: !18, file: !6, type: !19)
+!17 = !DILocalVariable(name: "main_arr", line: 19, scope: !18, file: !6, type: !19)
!18 = distinct !DILexicalBlock(line: 18, column: 16, file: !29, scope: !14)
!19 = !DICompositeType(tag: DW_TAG_array_type, size: 3200, align: 32, baseType: !10, elements: !{!20})
!20 = !DISubrange(count: 99)
!22 = !DILocation(line: 19, column: 7, scope: !18)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "val", line: 20, scope: !18, file: !6, type: !10)
+!23 = !DILocalVariable(name: "val", line: 20, scope: !18, file: !6, type: !10)
!24 = !DILocation(line: 20, column: 7, scope: !18)
!25 = !DILocation(line: 22, column: 3, scope: !18)
!26 = !DILocation(line: 23, column: 9, scope: !18)
diff --git a/test/DebugInfo/X86/2010-04-13-PubType.ll b/test/DebugInfo/X86/2010-04-13-PubType.ll
index 85c499d59630..4688f959fbdb 100644
--- a/test/DebugInfo/X86/2010-04-13-PubType.ll
+++ b/test/DebugInfo/X86/2010-04-13-PubType.ll
@@ -5,7 +5,7 @@
%struct.X = type opaque
%struct.Y = type { i32 }
-define i32 @foo(%struct.X* %x, %struct.Y* %y) nounwind ssp {
+define i32 @foo(%struct.X* %x, %struct.Y* %y) nounwind ssp !dbg !1 {
entry:
%x_addr = alloca %struct.X* ; <%struct.X**> [#uses=1]
%y_addr = alloca %struct.Y* ; <%struct.Y**> [#uses=1]
@@ -31,10 +31,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!20}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 7, arg: 0, scope: !1, file: !2, type: !7)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !18, scope: !2, type: !4, function: i32 (%struct.X*, %struct.Y*)* @foo)
+!0 = !DILocalVariable(name: "x", line: 7, arg: 1, scope: !1, file: !2, type: !7)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !18, scope: !2, type: !4)
!2 = !DIFile(filename: "a.c", directory: "/tmp/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !17, imports: null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !17, imports: null)
!4 = !DISubroutineType(types: !5)
!5 = !{!6, !7, !9}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
@@ -45,7 +45,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!11 = !{!12}
!12 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 5, size: 32, align: 32, file: !18, scope: !10, baseType: !6)
!13 = !DILocation(line: 7, scope: !1)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 7, arg: 0, scope: !1, file: !2, type: !9)
+!14 = !DILocalVariable(name: "y", line: 7, arg: 2, scope: !1, file: !2, type: !9)
!15 = !DILocation(line: 7, scope: !16)
!16 = distinct !DILexicalBlock(line: 7, column: 0, file: !18, scope: !1)
!17 = !{!1}
diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
index 1252f2cb32e3..aaf72311cb46 100644
--- a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
+++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -5,7 +5,7 @@
@GLB = common global i32 0, align 4
-define i32 @f() nounwind {
+define i32 @f() nounwind !dbg !5 {
%LOC = alloca i32, align 4
call void @llvm.dbg.declare(metadata i32* %LOC, metadata !15, metadata !DIExpression()), !dbg !17
%1 = load i32, i32* @GLB, align 4, !dbg !18
@@ -19,17 +19,17 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!21}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !3, globals: !12, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !3, globals: !12, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !6, scope: !6, type: !7, function: i32 ()* @f)
+!5 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !6, scope: !6, type: !7)
!6 = !DIFile(filename: "test.c", directory: "/work/llvm/vanilla/test/DebugInfo")
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!12 = !{!14}
!14 = !DIGlobalVariable(name: "GLB", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !9, variable: i32* @GLB)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "LOC", line: 4, scope: !16, file: !6, type: !9)
+!15 = !DILocalVariable(name: "LOC", line: 4, scope: !16, file: !6, type: !9)
!16 = distinct !DILexicalBlock(line: 3, column: 9, file: !20, scope: !5)
!17 = !DILocation(line: 4, column: 9, scope: !16)
!18 = !DILocation(line: 4, column: 23, scope: !16)
diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
index fc3f69b3584c..2319c0b70881 100644
--- a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
+++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -7,7 +7,7 @@
%struct.bar = type { %struct.baz, %struct.baz* }
%struct.baz = type { i32 }
-define i32 @main(i32 %argc, i8** %argv) uwtable ssp {
+define i32 @main(i32 %argc, i8** %argv) uwtable ssp !dbg !29 {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
@@ -25,7 +25,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-define linkonce_odr void @_ZN3barC1Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 {
+define linkonce_odr void @_ZN3barC1Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 !dbg !37 {
entry:
%this.addr = alloca %struct.bar*, align 8
%x.addr = alloca i32, align 4
@@ -39,7 +39,7 @@ entry:
ret void, !dbg !62
}
-define linkonce_odr void @_ZN3barC2Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 {
+define linkonce_odr void @_ZN3barC2Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 !dbg !40 {
entry:
%this.addr = alloca %struct.bar*, align 8
%x.addr = alloca i32, align 4
@@ -57,7 +57,7 @@ entry:
ret void, !dbg !68
}
-define linkonce_odr void @_ZN3bazC1Ei(%struct.baz* %this, i32 %a) unnamed_addr uwtable ssp align 2 {
+define linkonce_odr void @_ZN3bazC1Ei(%struct.baz* %this, i32 %a) unnamed_addr uwtable ssp align 2 !dbg !43 {
entry:
%this.addr = alloca %struct.baz*, align 8
%a.addr = alloca i32, align 4
@@ -71,7 +71,7 @@ entry:
ret void, !dbg !74
}
-define linkonce_odr void @_ZN3bazC2Ei(%struct.baz* %this, i32 %a) unnamed_addr nounwind uwtable ssp align 2 {
+define linkonce_odr void @_ZN3bazC2Ei(%struct.baz* %this, i32 %a) unnamed_addr nounwind uwtable ssp align 2 !dbg !46 {
entry:
%this.addr = alloca %struct.baz*, align 8
%a.addr = alloca i32, align 4
@@ -89,7 +89,7 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!83}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 146596)", isOptimized: false, emissionKind: 0, file: !82, enums: !1, retainedTypes: !3, subprograms: !27, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 146596)", isOptimized: false, emissionKind: 0, file: !82, enums: !1, retainedTypes: !3, subprograms: !27, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5, !9}
!5 = !DICompositeType(tag: DW_TAG_class_type, name: "bar", line: 9, size: 128, align: 64, file: !82, elements: !7)
@@ -111,7 +111,7 @@ entry:
!23 = !{null, !24, !12}
!24 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !5)
!27 = !{!29, !37, !40, !43, !46}
-!29 = !DISubprogram(name: "main", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: !6, type: !30, function: i32 (i32, i8**)* @main)
+!29 = distinct !DISubprogram(name: "main", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: !6, type: !30)
!30 = !DISubroutineType(types: !31)
!31 = !{!12, !12, !32}
!32 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !33)
@@ -119,45 +119,45 @@ entry:
!34 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!35 = !{!36}
!36 = !{} ; previously: invalid DW_TAG_base_type
-!37 = !DISubprogram(name: "bar", linkageName: "_ZN3barC1Ei", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !22, function: void (%struct.bar*, i32)* @_ZN3barC1Ei, declaration: !21)
+!37 = distinct !DISubprogram(name: "bar", linkageName: "_ZN3barC1Ei", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !22, declaration: !21)
!38 = !{!39}
!39 = !{} ; previously: invalid DW_TAG_base_type
-!40 = !DISubprogram(name: "bar", linkageName: "_ZN3barC2Ei", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !22, function: void (%struct.bar*, i32)* @_ZN3barC2Ei, declaration: !21)
+!40 = distinct !DISubprogram(name: "bar", linkageName: "_ZN3barC2Ei", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !22, declaration: !21)
!41 = !{!42}
!42 = !{} ; previously: invalid DW_TAG_base_type
-!43 = !DISubprogram(name: "baz", linkageName: "_ZN3bazC1Ei", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !14, function: void (%struct.baz*, i32)* @_ZN3bazC1Ei, declaration: !13)
+!43 = distinct !DISubprogram(name: "baz", linkageName: "_ZN3bazC1Ei", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !14, declaration: !13)
!44 = !{!45}
!45 = !{} ; previously: invalid DW_TAG_base_type
-!46 = !DISubprogram(name: "baz", linkageName: "_ZN3bazC2Ei", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !14, function: void (%struct.baz*, i32)* @_ZN3bazC2Ei, declaration: !13)
-!49 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 16, arg: 1, scope: !29, file: !6, type: !12)
+!46 = distinct !DISubprogram(name: "baz", linkageName: "_ZN3bazC2Ei", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !14, declaration: !13)
+!49 = !DILocalVariable(name: "argc", line: 16, arg: 1, scope: !29, file: !6, type: !12)
!50 = !DILocation(line: 16, column: 14, scope: !29)
-!51 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 16, arg: 2, scope: !29, file: !6, type: !32)
+!51 = !DILocalVariable(name: "argv", line: 16, arg: 2, scope: !29, file: !6, type: !32)
!52 = !DILocation(line: 16, column: 27, scope: !29)
-!53 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "myBar", line: 18, scope: !54, file: !6, type: !5)
+!53 = !DILocalVariable(name: "myBar", line: 18, scope: !54, file: !6, type: !5)
!54 = distinct !DILexicalBlock(line: 17, column: 1, file: !82, scope: !29)
!55 = !DILocation(line: 18, column: 9, scope: !54)
!56 = !DILocation(line: 18, column: 17, scope: !54)
!57 = !DILocation(line: 19, column: 5, scope: !54)
-!58 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 13, arg: 1, flags: DIFlagArtificial, scope: !37, file: !6, type: !24)
+!58 = !DILocalVariable(name: "this", line: 13, arg: 1, flags: DIFlagArtificial, scope: !37, file: !6, type: !24)
!59 = !DILocation(line: 13, column: 5, scope: !37)
-!60 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 13, arg: 2, scope: !37, file: !6, type: !12)
+!60 = !DILocalVariable(name: "x", line: 13, arg: 2, scope: !37, file: !6, type: !12)
!61 = !DILocation(line: 13, column: 13, scope: !37)
!62 = !DILocation(line: 13, column: 34, scope: !37)
-!63 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 13, arg: 1, flags: DIFlagArtificial, scope: !40, file: !6, type: !24)
+!63 = !DILocalVariable(name: "this", line: 13, arg: 1, flags: DIFlagArtificial, scope: !40, file: !6, type: !24)
!64 = !DILocation(line: 13, column: 5, scope: !40)
-!65 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 13, arg: 2, scope: !40, file: !6, type: !12)
+!65 = !DILocalVariable(name: "x", line: 13, arg: 2, scope: !40, file: !6, type: !12)
!66 = !DILocation(line: 13, column: 13, scope: !40)
!67 = !DILocation(line: 13, column: 33, scope: !40)
!68 = !DILocation(line: 13, column: 34, scope: !69)
!69 = distinct !DILexicalBlock(line: 13, column: 33, file: !82, scope: !40)
-!70 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 6, arg: 1, flags: DIFlagArtificial, scope: !43, file: !6, type: !16)
+!70 = !DILocalVariable(name: "this", line: 6, arg: 1, flags: DIFlagArtificial, scope: !43, file: !6, type: !16)
!71 = !DILocation(line: 6, column: 5, scope: !43)
-!72 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 6, arg: 2, scope: !43, file: !6, type: !12)
+!72 = !DILocalVariable(name: "a", line: 6, arg: 2, scope: !43, file: !6, type: !12)
!73 = !DILocation(line: 6, column: 13, scope: !43)
!74 = !DILocation(line: 6, column: 24, scope: !43)
-!75 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 6, arg: 1, flags: DIFlagArtificial, scope: !46, file: !6, type: !16)
+!75 = !DILocalVariable(name: "this", line: 6, arg: 1, flags: DIFlagArtificial, scope: !46, file: !6, type: !16)
!76 = !DILocation(line: 6, column: 5, scope: !46)
-!77 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 6, arg: 2, scope: !46, file: !6, type: !12)
+!77 = !DILocalVariable(name: "a", line: 6, arg: 2, scope: !46, file: !6, type: !12)
!78 = !DILocation(line: 6, column: 13, scope: !46)
!79 = !DILocation(line: 6, column: 23, scope: !46)
!80 = !DILocation(line: 6, column: 24, scope: !81)
diff --git a/test/DebugInfo/X86/DIModuleContext.ll b/test/DebugInfo/X86/DIModuleContext.ll
new file mode 100644
index 000000000000..413b45c4ab48
--- /dev/null
+++ b/test/DebugInfo/X86/DIModuleContext.ll
@@ -0,0 +1,30 @@
+target triple = "x86_64-apple-macosx"
+; RUN: %llc_dwarf %s -o - -filetype=obj \
+; RUN: | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; CHECK: DW_TAG_module
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_structure_type
+
+; Hand-crafted based on
+; struct s;
+; struct s *s;
+
+%struct.s = type opaque
+
+@i = common global %struct.s* null, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, globals: !3, imports: !11)
+!1 = !DIFile(filename: "test.c", directory: "/")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIGlobalVariable(name: "s", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, variable: %struct.s** @i)
+!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64, align: 64)
+!6 = !DICompositeType(tag: DW_TAG_structure_type, name: "s", scope: !9, file: !1, line: 1, flags: DIFlagFwdDecl)
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !DIModule(scope: null, name: "Module", configMacros: "", includePath: ".", isysroot: "/")
+!10 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !0, entity: !9, line: 11)
+!11 = !{!10}
diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll
index bfad4fe261db..d879f6732e76 100644
--- a/test/DebugInfo/X86/DW_AT_byte_size.ll
+++ b/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -10,7 +10,7 @@
%struct.A = type { i32 }
-define i32 @_Z3fooP1A(%struct.A* %a) nounwind uwtable ssp {
+define i32 @_Z3fooP1A(%struct.A* %a) nounwind uwtable ssp !dbg !5 {
entry:
%a.addr = alloca %struct.A*, align 8
store %struct.A* %a, %struct.A** %a.addr, align 8
@@ -26,10 +26,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!21}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 150996)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 150996)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "foo", linkageName: "_Z3fooP1A", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !20, scope: !6, type: !7, function: i32 (%struct.A*)* @_Z3fooP1A)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooP1A", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !20, scope: !6, type: !7)
!6 = !DIFile(filename: "foo.cpp", directory: "/Users/echristo")
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !10}
@@ -38,7 +38,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!11 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 1, size: 32, align: 32, file: !20, elements: !12)
!12 = !{!13}
!13 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 1, size: 32, align: 32, file: !20, scope: !11, baseType: !9)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !5, file: !6, type: !10)
+!16 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !5, file: !6, type: !10)
!17 = !DILocation(line: 3, column: 13, scope: !5)
!18 = !DILocation(line: 4, column: 3, scope: !19)
!19 = distinct !DILexicalBlock(line: 3, column: 16, file: !20, scope: !5)
diff --git a/test/DebugInfo/X86/DW_AT_linkage_name.ll b/test/DebugInfo/X86/DW_AT_linkage_name.ll
index 7f18b5037d5a..a3f3da2298e3 100644
--- a/test/DebugInfo/X86/DW_AT_linkage_name.ll
+++ b/test/DebugInfo/X86/DW_AT_linkage_name.ll
@@ -34,7 +34,7 @@ target triple = "x86_64-apple-macosx10.9.0"
%struct.A = type { i8 }
; Function Attrs: nounwind ssp uwtable
-define void @_ZN1AD2Ev(%struct.A* %this) unnamed_addr #0 align 2 {
+define void @_ZN1AD2Ev(%struct.A* %this) unnamed_addr #0 align 2 !dbg !17 {
entry:
%this.addr = alloca %struct.A*, align 8
store %struct.A* %this, %struct.A** %this.addr, align 8
@@ -47,7 +47,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind ssp uwtable
-define void @_ZN1AD1Ev(%struct.A* %this) unnamed_addr #0 align 2 {
+define void @_ZN1AD1Ev(%struct.A* %this) unnamed_addr #0 align 2 !dbg !18 {
entry:
%this.addr = alloca %struct.A*, align 8
store %struct.A* %this, %struct.A** %this.addr, align 8
@@ -58,7 +58,7 @@ entry:
}
; Function Attrs: ssp uwtable
-define void @_Z3foov() #2 {
+define void @_Z3foov() #2 !dbg !19 {
entry:
%a = alloca %struct.A, align 1
call void @llvm.dbg.declare(metadata %struct.A* %a, metadata !34, metadata !DIExpression()), !dbg !35
@@ -77,7 +77,7 @@ attributes #2 = { ssp uwtable }
!llvm.module.flags = !{!23, !24}
!llvm.ident = !{!25}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !16, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !16, globals: !2, imports: !2)
!1 = !DIFile(filename: "linkage-name.cpp", directory: "")
!2 = !{}
!3 = !{!4}
@@ -92,23 +92,23 @@ attributes #2 = { ssp uwtable }
!13 = !DISubroutineType(types: !14)
!14 = !{null, !9}
!16 = !{!17, !18, !19}
-!17 = !DISubprogram(name: "~A", linkageName: "_ZN1AD2Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !13, function: void (%struct.A*)* @_ZN1AD2Ev, declaration: !12, variables: !2)
-!18 = !DISubprogram(name: "~A", linkageName: "_ZN1AD1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !13, function: void (%struct.A*)* @_ZN1AD1Ev, declaration: !12, variables: !2)
-!19 = !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !20, type: !21, function: void ()* @_Z3foov, variables: !2)
+!17 = distinct !DISubprogram(name: "~A", linkageName: "_ZN1AD2Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !13, declaration: !12, variables: !2)
+!18 = distinct !DISubprogram(name: "~A", linkageName: "_ZN1AD1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !13, declaration: !12, variables: !2)
+!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !20, type: !21, variables: !2)
!20 = !DIFile(filename: "linkage-name.cpp", directory: "")
!21 = !DISubroutineType(types: !22)
!22 = !{null}
!23 = !{i32 2, !"Dwarf Version", i32 2}
!24 = !{i32 1, !"Debug Info Version", i32 3}
!25 = !{!"clang version 3.5.0 "}
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !27)
+!26 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !27)
!27 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
!28 = !DILocation(line: 0, scope: !17)
!29 = !DILocation(line: 8, scope: !17)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !18, type: !27)
+!30 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !18, type: !27)
!31 = !DILocation(line: 0, scope: !18)
!32 = !DILocation(line: 6, scope: !18)
!33 = !DILocation(line: 8, scope: !18)
-!34 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 11, scope: !19, file: !20, type: !"_ZTS1A")
+!34 = !DILocalVariable(name: "a", line: 11, scope: !19, file: !20, type: !"_ZTS1A")
!35 = !DILocation(line: 11, scope: !19)
!36 = !DILocation(line: 12, scope: !19)
diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll
index 96d1e5099574..593611d76bc4 100644
--- a/test/DebugInfo/X86/DW_AT_location-reference.ll
+++ b/test/DebugInfo/X86/DW_AT_location-reference.ll
@@ -60,7 +60,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
@a = external global i32
-define void @f() nounwind {
+define void @f() nounwind !dbg !0 {
entry:
%call = tail call i32 @g(i32 0, i32 0) nounwind, !dbg !8
store i32 %call, i32* @a, align 4, !dbg !8
@@ -102,12 +102,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!24}
-!0 = !DISubprogram(name: "f", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !23, scope: !1, type: !3, function: void ()* @f, variables: !22)
+!0 = distinct !DISubprogram(name: "f", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !23, scope: !1, type: !3, variables: !22)
!1 = !DIFile(filename: "simple.c", directory: "/home/rengol01/temp/tests/dwarf/relocation")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk)", isOptimized: true, emissionKind: 1, file: !23, enums: !{}, retainedTypes: !{}, subprograms: !21, imports: null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk)", isOptimized: true, emissionKind: 1, file: !23, enums: !{}, retainedTypes: !{}, subprograms: !21, imports: null)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 5, scope: !6, file: !1, type: !7)
+!5 = !DILocalVariable(name: "x", line: 5, scope: !6, file: !1, type: !7)
!6 = distinct !DILexicalBlock(line: 4, column: 14, file: !23, scope: !0)
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !DILocation(line: 6, column: 3, scope: !6)
diff --git a/test/DebugInfo/X86/DW_AT_object_pointer.ll b/test/DebugInfo/X86/DW_AT_object_pointer.ll
index 86d67cae6b0d..4e6c7c83d1f4 100644
--- a/test/DebugInfo/X86/DW_AT_object_pointer.ll
+++ b/test/DebugInfo/X86/DW_AT_object_pointer.ll
@@ -12,7 +12,7 @@
%class.A = type { i32 }
-define i32 @_Z3fooi(i32) nounwind uwtable ssp {
+define i32 @_Z3fooi(i32) nounwind uwtable ssp !dbg !5 {
entry:
%.addr = alloca i32, align 4
%a = alloca %class.A, align 4
@@ -27,7 +27,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-define linkonce_odr void @_ZN1AC1Ev(%class.A* %this) unnamed_addr nounwind uwtable ssp align 2 {
+define linkonce_odr void @_ZN1AC1Ev(%class.A* %this) unnamed_addr nounwind uwtable ssp align 2 !dbg !10 {
entry:
%this.addr = alloca %class.A*, align 8
store %class.A* %this, %class.A** %this.addr, align 8
@@ -37,7 +37,7 @@ entry:
ret void, !dbg !29
}
-define linkonce_odr void @_ZN1AC2Ev(%class.A* %this) unnamed_addr nounwind uwtable ssp align 2 {
+define linkonce_odr void @_ZN1AC2Ev(%class.A* %this) unnamed_addr nounwind uwtable ssp align 2 !dbg !20 {
entry:
%this.addr = alloca %class.A*, align 8
store %class.A* %this, %class.A** %this.addr, align 8
@@ -51,15 +51,15 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!38}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 163586) (llvm/trunk 163570)", isOptimized: false, emissionKind: 0, file: !37, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 163586) (llvm/trunk 163570)", isOptimized: false, emissionKind: 0, file: !37, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5, !10, !20}
-!5 = !DISubprogram(name: "foo", linkageName: "_Z3fooi", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !6, scope: !6, type: !7, function: i32 (i32)* @_Z3fooi, variables: !1)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !6, scope: !6, type: !7, variables: !1)
!6 = !DIFile(filename: "bar.cpp", directory: "/Users/echristo/debug-tests")
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "A", linkageName: "_ZN1AC1Ev", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !11, function: void (%class.A*)* @_ZN1AC1Ev, declaration: !17, variables: !1)
+!10 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC1Ev", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !11, declaration: !17, variables: !1)
!11 = !DISubroutineType(types: !12)
!12 = !{null, !13}
!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !14)
@@ -67,22 +67,22 @@ entry:
!15 = !{!16, !17}
!16 = !DIDerivedType(tag: DW_TAG_member, name: "m_a", line: 4, size: 32, align: 32, file: !37, scope: !14, baseType: !9)
!17 = !DISubprogram(name: "A", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: !14, type: !11)
-!20 = !DISubprogram(name: "A", linkageName: "_ZN1AC2Ev", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !11, function: void (%class.A*)* @_ZN1AC2Ev, declaration: !17, variables: !1)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 8, scope: !22, file: !6, type: !14)
+!20 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC2Ev", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !11, declaration: !17, variables: !1)
+!21 = !DILocalVariable(name: "a", line: 8, scope: !22, file: !6, type: !14)
!22 = distinct !DILexicalBlock(line: 7, column: 11, file: !6, scope: !5)
!23 = !DILocation(line: 8, column: 5, scope: !22)
!24 = !DILocation(line: 8, column: 6, scope: !22)
!25 = !DILocation(line: 9, column: 3, scope: !22)
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 3, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !10, file: !6, type: !27)
+!26 = !DILocalVariable(name: "this", line: 3, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !10, file: !6, type: !27)
!27 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !14)
!28 = !DILocation(line: 3, column: 3, scope: !10)
!29 = !DILocation(line: 3, column: 18, scope: !10)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 3, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !20, file: !6, type: !27)
+!30 = !DILocalVariable(name: "this", line: 3, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !20, file: !6, type: !27)
!31 = !DILocation(line: 3, column: 3, scope: !20)
!32 = !DILocation(line: 3, column: 9, scope: !33)
!33 = distinct !DILexicalBlock(line: 3, column: 7, file: !6, scope: !20)
!34 = !DILocation(line: 3, column: 18, scope: !33)
!35 = !DILocation(line: 7, scope: !5)
-!36 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 7, arg: 1, scope: !5, file: !6, type: !9)
+!36 = !DILocalVariable(name: "", line: 7, arg: 1, scope: !5, file: !6, type: !9)
!37 = !DIFile(filename: "bar.cpp", directory: "/Users/echristo/debug-tests")
!38 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
index 993e623c7511..2b8345b66b71 100644
--- a/test/DebugInfo/X86/DW_AT_specification.ll
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -12,7 +12,7 @@
@_ZZN3foo3barEvE1x = constant i32 0, align 4
-define void @_ZN3foo3barEv() {
+define void @_ZN3foo3barEv() !dbg !5 {
entry:
ret void, !dbg !25
}
@@ -20,10 +20,10 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!28}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 0, file: !27, enums: !1, retainedTypes: !1, subprograms: !3, globals: !18, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 0, file: !27, enums: !1, retainedTypes: !1, subprograms: !3, globals: !18, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !6, scope: null, type: !7, function: void ()* @_ZN3foo3barEv, declaration: !11)
+!5 = distinct !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !6, scope: null, type: !7, declaration: !11)
!6 = !DIFile(filename: "nsNativeAppSupportBase.ii", directory: "/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library")
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9}
diff --git a/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll b/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
index c8e71a26b8fe..988a2b7daf4f 100644
--- a/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
+++ b/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
@@ -18,7 +18,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "i686-pc-win32"
; Function Attrs: nounwind
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
@@ -30,11 +30,11 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "C:\5CProjects")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "test.c", directory: "C:CProjects")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
diff --git a/test/DebugInfo/X86/DW_TAG_friend.ll b/test/DebugInfo/X86/DW_TAG_friend.ll
index 8237c4086d31..8681f33e7e28 100644
--- a/test/DebugInfo/X86/DW_TAG_friend.ll
+++ b/test/DebugInfo/X86/DW_TAG_friend.ll
@@ -18,7 +18,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!29}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 153413) (llvm/trunk 153428)", isOptimized: false, emissionKind: 0, file: !28, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 153413) (llvm/trunk 153428)", isOptimized: false, emissionKind: 0, file: !28, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
!1 = !{}
!3 = !{!5, !17}
!5 = !DIGlobalVariable(name: "a", line: 10, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: %class.A* @a)
diff --git a/test/DebugInfo/X86/InlinedFnLocalVar.ll b/test/DebugInfo/X86/InlinedFnLocalVar.ll
index 74de8b6e6a92..a262fc8e88ca 100644
--- a/test/DebugInfo/X86/InlinedFnLocalVar.ll
+++ b/test/DebugInfo/X86/InlinedFnLocalVar.ll
@@ -12,7 +12,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-define i32 @bar() nounwind ssp {
+define i32 @bar() nounwind ssp !dbg !6 {
entry:
%0 = load i32, i32* @i, align 4, !dbg !17 ; <i32> [#uses=2]
tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !109, metadata !DIExpression()), !dbg !19
@@ -25,20 +25,20 @@ entry:
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!28}
-!0 = !DISubprogram(name: "foo", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 9, file: !27, scope: !1, type: !3, variables: !24)
+!0 = distinct !DISubprogram(name: "foo", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 9, file: !27, scope: !1, type: !3, variables: !24)
!1 = !DIFile(filename: "bar.c", directory: "/tmp/")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !27, enums: !20, retainedTypes: !20, subprograms: !25, globals: !26, imports: !20)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !27, enums: !20, retainedTypes: !20, subprograms: !25, globals: !26, imports: !20)
!3 = !DISubroutineType(types: !4)
!4 = !{!5, !5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "bar", linkageName: "bar", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !27, scope: !1, type: !7, function: i32 ()* @bar)
+!6 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !27, scope: !1, type: !7)
!7 = !DISubroutineType(types: !8)
!8 = !{!5}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+!9 = !DILocalVariable(name: "j", line: 9, arg: 1, scope: !0, file: !1, type: !5)
+!10 = !DILocalVariable(name: "xyz", line: 10, scope: !11, file: !1, type: !12)
-!109 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
-!110 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+!109 = !DILocalVariable(name: "j", line: 9, arg: 1, scope: !0, file: !1, type: !5)
+!110 = !DILocalVariable(name: "xyz", line: 10, scope: !11, file: !1, type: !12)
!11 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !0)
!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "X", line: 10, size: 64, align: 32, file: !27, scope: !0, elements: !13)
diff --git a/test/DebugInfo/X86/aligned_stack_var.ll b/test/DebugInfo/X86/aligned_stack_var.ll
index 560125d6f40d..02a51f346ee7 100644
--- a/test/DebugInfo/X86/aligned_stack_var.ll
+++ b/test/DebugInfo/X86/aligned_stack_var.ll
@@ -15,7 +15,7 @@
; CHECK-NOT: {{DW_AT_location.*DW_FORM_block1.*0x.*91}}
; CHECK: NULL
-define void @_Z3runv() nounwind uwtable {
+define void @_Z3runv() nounwind uwtable !dbg !5 {
entry:
%x = alloca i32, align 32
call void @llvm.dbg.declare(metadata i32* %x, metadata !9, metadata !DIExpression()), !dbg !12
@@ -27,14 +27,14 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!15}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", isOptimized: false, emissionKind: 0, file: !14, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", isOptimized: false, emissionKind: 0, file: !14, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "run", linkageName: "_Z3runv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !14, scope: !6, type: !7, function: void ()* @_Z3runv, variables: !1)
+!5 = distinct !DISubprogram(name: "run", linkageName: "_Z3runv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !14, scope: !6, type: !7, variables: !1)
!6 = !DIFile(filename: "test.cc", directory: "/home/samsonov/debuginfo")
!7 = !DISubroutineType(types: !8)
!8 = !{null}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 2, scope: !10, file: !6, type: !11)
+!9 = !DILocalVariable(name: "x", line: 2, scope: !10, file: !6, type: !11)
!10 = distinct !DILexicalBlock(line: 1, column: 12, file: !14, scope: !5)
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!12 = !DILocation(line: 2, column: 7, scope: !10)
diff --git a/test/DebugInfo/X86/arange-and-stub.ll b/test/DebugInfo/X86/arange-and-stub.ll
index bbc6de5e573c..39727753b8f9 100644
--- a/test/DebugInfo/X86/arange-and-stub.ll
+++ b/test/DebugInfo/X86/arange-and-stub.ll
@@ -1,10 +1,10 @@
; RUN: llc -generate-arange-section -relocation-model=pic < %s | FileCheck %s
-; CHECK: .section .data.rel,"aw",@progbits
+; CHECK: .data
; CHECK-NOT: .section
; CHECK: .L_ZTId.DW.stub:
-; CHECK: .section .data.rel,"aw",@progbits
+; CHECK: .data
; CHECK-NEXT: .Lsec_end0:
target triple = "x86_64-linux-gnu"
@@ -12,11 +12,11 @@ target triple = "x86_64-linux-gnu"
@_ZTId = external constant i8*
@zed = global [1 x void ()*] [void ()* @bar]
-define void @foo() {
+define void @foo() !dbg !4 {
ret void
}
-define void @bar() personality i8* bitcast (void ()* @foo to i8*) {
+define void @bar() personality i8* bitcast (void ()* @foo to i8*) !dbg !9 {
invoke void @foo()
to label %invoke.cont unwind label %lpad
@@ -32,16 +32,16 @@ lpad: ; preds = %0
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!17, !18}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 234308) (llvm/trunk 234310)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !10, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 234308) (llvm/trunk 234310)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !10, imports: !2)
!1 = !DIFile(filename: "/Users/espindola/llvm/<stdin>", directory: "/Users/espindola/llvm/build")
!2 = !{}
!3 = !{!4, !9}
-!4 = !DISubprogram(name: "foo", linkageName: "foo", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!5 = !DIFile(filename: "/Users/espindola/llvm/test.cpp", directory: "/Users/espindola/llvm/build")
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8}
!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "bar_d", linkageName: "bar", scope: !5, file: !5, line: 3, type: !6, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @bar, variables: !2)
+!9 = distinct !DISubprogram(name: "bar_d", linkageName: "bar", scope: !5, file: !5, line: 3, type: !6, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!10 = !{!11}
!11 = !DIGlobalVariable(name: "zed", scope: !0, file: !5, line: 6, type: !12, isLocal: false, isDefinition: true, variable: [1 x void ()*]* @zed)
!12 = !DICompositeType(tag: DW_TAG_array_type, baseType: !13, size: 64, align: 64, elements: !15)
diff --git a/test/DebugInfo/X86/arange.ll b/test/DebugInfo/X86/arange.ll
index 0b0c2a0b3ee7..f88cdb3a90ad 100644
--- a/test/DebugInfo/X86/arange.ll
+++ b/test/DebugInfo/X86/arange.ll
@@ -29,7 +29,7 @@
!llvm.module.flags = !{!12, !13}
!llvm.ident = !{!14}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !9, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !9, imports: !2)
!1 = !DIFile(filename: "simple.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4}
diff --git a/test/DebugInfo/X86/arguments.ll b/test/DebugInfo/X86/arguments.ll
index 0af119b241fc..8b3902765bbd 100644
--- a/test/DebugInfo/X86/arguments.ll
+++ b/test/DebugInfo/X86/arguments.ll
@@ -29,7 +29,7 @@
%struct.foo = type { i32 }
; Function Attrs: nounwind uwtable
-define void @_Z4func3fooS_(%struct.foo* %f, %struct.foo* %g) #0 {
+define void @_Z4func3fooS_(%struct.foo* %f, %struct.foo* %g) #0 !dbg !4 {
entry:
call void @llvm.dbg.declare(metadata %struct.foo* %f, metadata !19, metadata !DIExpression()), !dbg !20
call void @llvm.dbg.declare(metadata %struct.foo* %g, metadata !21, metadata !DIExpression()), !dbg !20
@@ -49,11 +49,11 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!24}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "scratch.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "func", linkageName: "_Z4func3fooS_", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, function: void (%struct.foo*, %struct.foo*)* @_Z4func3fooS_, variables: !2)
+!4 = distinct !DISubprogram(name: "func", linkageName: "_Z4func3fooS_", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "scratch.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8, !8}
@@ -67,9 +67,9 @@ attributes #1 = { nounwind readnone }
!15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !8)
!16 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !17)
!17 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !8)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "f", line: 6, arg: 1, scope: !4, file: !5, type: !8)
+!19 = !DILocalVariable(name: "f", line: 6, arg: 1, scope: !4, file: !5, type: !8)
!20 = !DILocation(line: 6, scope: !4)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "g", line: 6, arg: 2, scope: !4, file: !5, type: !8)
+!21 = !DILocalVariable(name: "g", line: 6, arg: 2, scope: !4, file: !5, type: !8)
!22 = !DILocation(line: 7, scope: !4)
!23 = !DILocation(line: 8, scope: !4)
!24 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/array.ll b/test/DebugInfo/X86/array.ll
index 8da2079a0530..4ffe0a710b85 100644
--- a/test/DebugInfo/X86/array.ll
+++ b/test/DebugInfo/X86/array.ll
@@ -16,32 +16,34 @@
; Test that we only emit register-indirect locations for the array array.
; rdar://problem/14874886
;
-; CHECK: ##DEBUG_VALUE: main:array <- [R{{.*}}+0]
-; CHECK-NOT: ##DEBUG_VALUE: main:array <- R{{.*}}
+; CHECK: ##DEBUG_VALUE: main:array <- [%R{{.*}}+0]
+; CHECK: ##DEBUG_VALUE: main:array <- [%R{{.*}}+0]
+; CHECK: ##DEBUG_VALUE: main:array <- [%R{{.*}}+0]
+; CHECK-NOT: ##DEBUG_VALUE: main:array <- %R{{.*}}
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
@main.array = private unnamed_addr constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 16
; Function Attrs: nounwind ssp uwtable
-define void @f(i32* nocapture %p) #0 {
+define void @f(i32* nocapture %p) #0 !dbg !4 {
tail call void @llvm.dbg.value(metadata i32* %p, i64 0, metadata !11, metadata !DIExpression()), !dbg !28
store i32 42, i32* %p, align 4, !dbg !29, !tbaa !30
ret void, !dbg !34
}
; Function Attrs: nounwind ssp uwtable
-define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 !dbg !12 {
%array = alloca [4 x i32], align 16
tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !19, metadata !DIExpression()), !dbg !35
tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !20, metadata !DIExpression()), !dbg !35
- tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !DIExpression()), !dbg !36
+ tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !DIExpression(DW_OP_deref)), !dbg !36
%1 = bitcast [4 x i32]* %array to i8*, !dbg !36
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([4 x i32]* @main.array to i8*), i64 16, i32 16, i1 false), !dbg !36
- tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !DIExpression()), !dbg !36
+ tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !DIExpression(DW_OP_deref)), !dbg !36
%2 = getelementptr inbounds [4 x i32], [4 x i32]* %array, i64 0, i64 0, !dbg !37
call void @f(i32* %2), !dbg !37
- tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !DIExpression()), !dbg !36
+ tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !DIExpression(DW_OP_deref)), !dbg !36
%3 = load i32, i32* %2, align 16, !dbg !38, !tbaa !30
ret i32 %3, !dbg !38
}
@@ -60,28 +62,28 @@ attributes #2 = { nounwind readnone }
!llvm.module.flags = !{!25, !26}
!llvm.ident = !{!27}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "array.c", directory: "")
!2 = !{}
!3 = !{!4, !12}
-!4 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*)* @f, variables: !10)
+!4 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !10)
!5 = !DIFile(filename: "array.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8}
!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!10 = !{!11}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", line: 1, arg: 1, scope: !4, file: !5, type: !8)
-!12 = !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !13, function: i32 (i32, i8**)* @main, variables: !18)
+!11 = !DILocalVariable(name: "p", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!12 = distinct !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !13, variables: !18)
!13 = !DISubroutineType(types: !14)
!14 = !{!9, !9, !15}
!15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !16)
!16 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !17)
!17 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!18 = !{!19, !20, !21}
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 5, arg: 1, scope: !12, file: !5, type: !9)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 5, arg: 2, scope: !12, file: !5, type: !15)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "array", line: 6, scope: !12, file: !5, type: !22)
+!19 = !DILocalVariable(name: "argc", line: 5, arg: 1, scope: !12, file: !5, type: !9)
+!20 = !DILocalVariable(name: "argv", line: 5, arg: 2, scope: !12, file: !5, type: !15)
+!21 = !DILocalVariable(name: "array", line: 6, scope: !12, file: !5, type: !22)
!22 = !DICompositeType(tag: DW_TAG_array_type, size: 128, align: 32, baseType: !9, elements: !23)
!23 = !{!24}
!24 = !DISubrange(count: 4)
diff --git a/test/DebugInfo/X86/array2.ll b/test/DebugInfo/X86/array2.ll
index e08c52640b3f..f456aae2e250 100644
--- a/test/DebugInfo/X86/array2.ll
+++ b/test/DebugInfo/X86/array2.ll
@@ -17,7 +17,7 @@
;
; CHECK: define i32 @main
; CHECK: call void @llvm.dbg.value(metadata i32 42, i64 0, metadata ![[ARRAY:[0-9]+]], metadata ![[EXPR:[0-9]+]])
-; CHECK: ![[ARRAY]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "array",{{.*}} line: 6
+; CHECK: ![[ARRAY]] = !DILocalVariable(name: "array",{{.*}} line: 6
; CHECK: ![[EXPR]] = !DIExpression(DW_OP_bit_piece, 0, 32)
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
@@ -25,7 +25,7 @@ target triple = "x86_64-apple-macosx10.9.0"
@main.array = private unnamed_addr constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 16
; Function Attrs: nounwind ssp uwtable
-define void @f(i32* %p) #0 {
+define void @f(i32* %p) #0 !dbg !4 {
entry:
%p.addr = alloca i32*, align 8
store i32* %p, i32** %p.addr, align 8
@@ -40,7 +40,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind ssp uwtable
-define i32 @main(i32 %argc, i8** %argv) #0 {
+define i32 @main(i32 %argc, i8** %argv) #0 !dbg !10 {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
@@ -72,17 +72,17 @@ attributes #2 = { nounwind }
!llvm.module.flags = !{!16, !17}
!llvm.ident = !{!18}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "array.c", directory: "")
!2 = !{}
!3 = !{!4, !10}
-!4 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*)* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "array.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8}
!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !11, function: i32 (i32, i8**)* @main, variables: !2)
+!10 = distinct !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !11, variables: !2)
!11 = !DISubroutineType(types: !12)
!12 = !{!9, !9, !13}
!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !14)
@@ -91,14 +91,14 @@ attributes #2 = { nounwind }
!16 = !{i32 2, !"Dwarf Version", i32 2}
!17 = !{i32 1, !"Debug Info Version", i32 3}
!18 = !{!"clang version 3.5.0 "}
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!19 = !DILocalVariable(name: "p", line: 1, arg: 1, scope: !4, file: !5, type: !8)
!20 = !DILocation(line: 1, scope: !4)
!21 = !DILocation(line: 2, scope: !4)
!22 = !DILocation(line: 3, scope: !4)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 5, arg: 1, scope: !10, file: !5, type: !9)
+!23 = !DILocalVariable(name: "argc", line: 5, arg: 1, scope: !10, file: !5, type: !9)
!24 = !DILocation(line: 5, scope: !10)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 5, arg: 2, scope: !10, file: !5, type: !13)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "array", line: 6, scope: !10, file: !5, type: !27)
+!25 = !DILocalVariable(name: "argv", line: 5, arg: 2, scope: !10, file: !5, type: !13)
+!26 = !DILocalVariable(name: "array", line: 6, scope: !10, file: !5, type: !27)
!27 = !DICompositeType(tag: DW_TAG_array_type, size: 128, align: 32, baseType: !9, elements: !28)
!28 = !{!29}
!29 = !DISubrange(count: 4)
diff --git a/test/DebugInfo/X86/bbjoin.ll b/test/DebugInfo/X86/bbjoin.ll
new file mode 100644
index 000000000000..8061a8d2ce9a
--- /dev/null
+++ b/test/DebugInfo/X86/bbjoin.ll
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10.9.0 %s -stop-after=livedebugvars \
+; RUN: -o %t.s | FileCheck %s
+; Generated from:
+; void g(int *);
+; int f() {
+; int x = 23;
+; g(&x);
+; if (x == 42)
+; ++x;
+; return x; // check that x is not a constant here.
+; }
+; CHECK: ![[X:.*]] = !DILocalVariable(name: "x",
+; CHECK: bb.0.entry:
+; CHECK: DBG_VALUE 23, 0, ![[X]],
+; CHECK: DBG_VALUE debug-use %rdi, debug-use _, ![[X]]
+; CHECK: bb.1.if.then:
+; CHECK: DBG_VALUE debug-use %rdi, debug-use _, ![[X]],
+; CHECK: bb.2.if.end:
+; CHECK-NOT: DBG_VALUE 23, 0, ![[X]],
+; CHECK: RETQ %eax
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @f() #0 !dbg !4 {
+entry:
+ %x = alloca i32, align 4
+ %0 = bitcast i32* %x to i8*, !dbg !14
+ call void @llvm.lifetime.start(i64 4, i8* %0) #4, !dbg !14
+ tail call void @llvm.dbg.value(metadata i32 23, i64 0, metadata !9, metadata !15), !dbg !16
+ store i32 23, i32* %x, align 4, !dbg !16, !tbaa !17
+ tail call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !9, metadata !15), !dbg !16
+ call void @g(i32* nonnull %x) #4, !dbg !21
+ call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !9, metadata !15), !dbg !16
+ %1 = load i32, i32* %x, align 4, !dbg !22, !tbaa !17
+ %cmp = icmp eq i32 %1, 42, !dbg !24
+ br i1 %cmp, label %if.then, label %if.end, !dbg !25
+
+if.then: ; preds = %entry
+ call void @llvm.dbg.value(metadata i32 43, i64 0, metadata !9, metadata !15), !dbg !16
+ store i32 43, i32* %x, align 4, !dbg !26, !tbaa !17
+ br label %if.end, !dbg !26
+
+if.end: ; preds = %if.then, %entry
+ %2 = phi i32 [ 43, %if.then ], [ %1, %entry ], !dbg !27
+ call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !9, metadata !15), !dbg !16
+ call void @llvm.lifetime.end(i64 4, i8* %0) #4, !dbg !28
+ ret i32 %2, !dbg !29
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare void @g(i32*) #4
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { argmemonly nounwind }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11, !12}
+!llvm.ident = !{!13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 255890) (llvm/trunk 255919)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "constant.c", directory: "")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9}
+!9 = !DILocalVariable(name: "x", scope: !4, file: !1, line: 3, type: !7)
+!10 = !{i32 2, !"Dwarf Version", i32 2}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{i32 1, !"PIC Level", i32 2}
+!13 = !{!"clang version 3.8.0 (trunk 255890) (llvm/trunk 255919)"}
+!14 = !DILocation(line: 3, column: 3, scope: !4)
+!15 = !DIExpression()
+!16 = !DILocation(line: 3, column: 7, scope: !4)
+!17 = !{!18, !18, i64 0}
+!18 = !{!"int", !19, i64 0}
+!19 = !{!"omnipotent char", !20, i64 0}
+!20 = !{!"Simple C/C++ TBAA"}
+!21 = !DILocation(line: 4, column: 3, scope: !4)
+!22 = !DILocation(line: 5, column: 7, scope: !23)
+!23 = distinct !DILexicalBlock(scope: !4, file: !1, line: 5, column: 7)
+!24 = !DILocation(line: 5, column: 9, scope: !23)
+!25 = !DILocation(line: 5, column: 7, scope: !4)
+!26 = !DILocation(line: 6, column: 5, scope: !23)
+!27 = !DILocation(line: 7, column: 10, scope: !4)
+!28 = !DILocation(line: 8, column: 1, scope: !4)
+!29 = !DILocation(line: 7, column: 3, scope: !4)
diff --git a/test/DebugInfo/X86/bitfields.ll b/test/DebugInfo/X86/bitfields.ll
index e895fd67e03d..e0fa4f6d9980 100644
--- a/test/DebugInfo/X86/bitfields.ll
+++ b/test/DebugInfo/X86/bitfields.ll
@@ -54,7 +54,7 @@ target triple = "x86_64-apple-macosx"
!llvm.module.flags = !{!13, !14, !15}
!llvm.ident = !{!16}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
!1 = !DIFile(filename: "bitfields.c", directory: "/")
!2 = !{}
!3 = !{!4}
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
index fe026d656bb6..640973ed2a68 100644
--- a/test/DebugInfo/X86/block-capture.ll
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -27,7 +27,7 @@ target triple = "x86_64-apple-darwin"
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: ssp uwtable
-define internal void @__foo_block_invoke(i8* %.block_descriptor) #2 {
+define internal void @__foo_block_invoke(i8* %.block_descriptor) #2 !dbg !8 {
entry:
%.block_descriptor.addr = alloca i8*, align 8
%block.addr = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>*, align 8
@@ -62,14 +62,14 @@ attributes #3 = { nounwind }
!llvm.module.flags = !{!16, !17, !18, !19, !20, !21, !22}
!llvm.ident = !{!23}
-!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.6.0 (trunk 223471)", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.6.0 (trunk 223471)", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "foo.m", directory: "")
!2 = !{}
!3 = !{!8}
!5 = !DIFile(filename: "foo.m", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
-!8 = !DISubprogram(name: "__foo_block_invoke", line: 2, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !9, function: void (i8*)* @__foo_block_invoke, variables: !2)
+!8 = distinct !DISubprogram(name: "__foo_block_invoke", line: 2, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !9, variables: !2)
!9 = !DISubroutineType(types: !10)
!10 = !{null, !11}
!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
@@ -102,7 +102,7 @@ attributes #3 = { nounwind }
!41 = !DIBasicType(tag: DW_TAG_base_type, name: "long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
!42 = !DIDerivedType(tag: DW_TAG_member, name: "Size", size: 64, align: 64, offset: 64, file: !1, scope: !5, baseType: !41)
!43 = !DIExpression()
-!47 = !DILocalVariable(tag: DW_TAG_arg_variable, name: ".block_descriptor", line: 2, arg: 1, flags: DIFlagArtificial, scope: !8, file: !5, type: !48)
+!47 = !DILocalVariable(name: ".block_descriptor", line: 2, arg: 1, flags: DIFlagArtificial, scope: !8, file: !5, type: !48)
!48 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, baseType: !49)
!49 = !DICompositeType(tag: DW_TAG_structure_type, name: "__block_literal_1", line: 2, size: 320, align: 64, file: !1, scope: !5, elements: !50)
!50 = !{!51, !52, !53, !54, !56, !65}
@@ -123,7 +123,7 @@ attributes #3 = { nounwind }
!65 = !DIDerivedType(tag: DW_TAG_member, name: "block", line: 2, size: 64, align: 64, offset: 256, flags: DIFlagPublic, file: !1, scope: !5, baseType: !25)
!66 = !DILocation(line: 2, column: 20, scope: !8)
!67 = !DILocation(line: 2, column: 21, scope: !8)
-!68 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "block", line: 2, scope: !8, file: !5, type: !25)
+!68 = !DILocalVariable(name: "block", line: 2, scope: !8, file: !5, type: !25)
!69 = !DIExpression(DW_OP_deref, DW_OP_plus, 32)
!70 = !DILocation(line: 2, column: 9, scope: !8)
!71 = !DILocation(line: 2, column: 23, scope: !72)
diff --git a/test/DebugInfo/X86/byvalstruct.ll b/test/DebugInfo/X86/byvalstruct.ll
index dedbfb0200b7..422789ae77b5 100644
--- a/test/DebugInfo/X86/byvalstruct.ll
+++ b/test/DebugInfo/X86/byvalstruct.ll
@@ -58,7 +58,7 @@ target triple = "x86_64-apple-macosx10.8.0"
@llvm.used = appending global [5 x i8*] [i8* getelementptr inbounds ([7 x i8], [7 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([23 x i8], [23 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_Bitmap" to i8*), i8* bitcast ([1 x i8*]* @"\01L_OBJC_LABEL_CLASS_$" to i8*)], section "llvm.metadata"
; Function Attrs: ssp uwtable
-define internal i8* @"\01-[Bitmap initWithCopy:andInfo:andLength:]"(%0* %self, i8* %_cmd, %0* %otherBitmap, %struct.ImageInfo* byval align 8 %info, i64 %length) #0 {
+define internal i8* @"\01-[Bitmap initWithCopy:andInfo:andLength:]"(%0* %self, i8* %_cmd, %0* %otherBitmap, %struct.ImageInfo* byval align 8 %info, i64 %length) #0 !dbg !7 {
entry:
%retval = alloca i8*, align 8
%self.addr = alloca %0*, align 8
@@ -87,14 +87,14 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!24, !25, !26, !27, !38}
-!0 = !DICompileUnit(language: DW_LANG_ObjC_plus_plus, producer: "clang version 3.4 ", isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !6, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC_plus_plus, producer: "clang version 3.4 ", isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !6, globals: !2, imports: !2)
!1 = !DIFile(filename: "t.mm", directory: "")
!2 = !{}
!3 = !{!4}
!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "Bitmap", line: 8, size: 8, align: 8, flags: DIFlagObjcClassComplete, runtimeLang: DW_LANG_ObjC_plus_plus, file: !1, scope: !5, elements: !2)
!5 = !DIFile(filename: "t.mm", directory: "")
!6 = !{!7}
-!7 = !DISubprogram(name: "-[Bitmap initWithCopy:andInfo:andLength:]", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !1, scope: !5, type: !8, function: i8* (%0*, i8*, %0*, %struct.ImageInfo*, i64)* @"\01-[Bitmap initWithCopy:andInfo:andLength:]", variables: !2)
+!7 = distinct !DISubprogram(name: "-[Bitmap initWithCopy:andInfo:andLength:]", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !1, scope: !5, type: !8, variables: !2)
!8 = !DISubroutineType(types: !9)
!9 = !{!4, !10, !11, !14, !15, !19}
!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !4)
@@ -115,14 +115,14 @@ attributes #1 = { nounwind readnone }
!25 = !{i32 1, !"Objective-C Image Info Version", i32 0}
!26 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
!27 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
-!28 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "self", line: 9, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !7, file: !5, type: !14)
+!28 = !DILocalVariable(name: "self", line: 9, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !7, file: !5, type: !14)
!29 = !DILocation(line: 9, scope: !7)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "_cmd", line: 9, arg: 2, flags: DIFlagArtificial, scope: !7, file: !5, type: !31)
+!30 = !DILocalVariable(name: "_cmd", line: 9, arg: 2, flags: DIFlagArtificial, scope: !7, file: !5, type: !31)
!31 = !DIDerivedType(tag: DW_TAG_typedef, name: "SEL", line: 9, file: !1, baseType: !12)
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "otherBitmap", line: 9, arg: 3, scope: !7, file: !5, type: !14)
-!33 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "info", line: 10, arg: 4, scope: !7, file: !5, type: !15)
+!32 = !DILocalVariable(name: "otherBitmap", line: 9, arg: 3, scope: !7, file: !5, type: !14)
+!33 = !DILocalVariable(name: "info", line: 10, arg: 4, scope: !7, file: !5, type: !15)
!34 = !DILocation(line: 10, scope: !7)
-!35 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "length", line: 11, arg: 5, scope: !7, file: !5, type: !19)
+!35 = !DILocalVariable(name: "length", line: 11, arg: 5, scope: !7, file: !5, type: !19)
!36 = !DILocation(line: 11, scope: !7)
!37 = !DILocation(line: 13, scope: !7)
!38 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/c-type-units.ll b/test/DebugInfo/X86/c-type-units.ll
index 6756bd45f88c..d8321380c535 100644
--- a/test/DebugInfo/X86/c-type-units.ll
+++ b/test/DebugInfo/X86/c-type-units.ll
@@ -17,7 +17,7 @@
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
!1 = !DIFile(filename: "simple.c", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4}
diff --git a/test/DebugInfo/X86/coff_debug_info_type.ll b/test/DebugInfo/X86/coff_debug_info_type.ll
index fd86c88baa15..cede74d5dc6a 100644
--- a/test/DebugInfo/X86/coff_debug_info_type.ll
+++ b/test/DebugInfo/X86/coff_debug_info_type.ll
@@ -5,7 +5,9 @@
; CHECK: .section .apple_names
; CHECK: .section .apple_types
-; RUN: llc -mtriple=i686-pc-win32 -filetype=asm -O0 < %s | FileCheck -check-prefix=WIN32 %s
+; RUN: sed -e 's/"Dwarf Version"/"CodeView"/' %s \
+; RUN: | llc -mtriple=i686-pc-win32 -filetype=asm -O0 \
+; RUN: | FileCheck -check-prefix=WIN32 %s
; WIN32: .section .debug$S,"dr"
; RUN: llc -mtriple=i686-pc-win32 -filetype=null -O0 < %s
@@ -17,7 +19,7 @@
; return 0;
; }
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
@@ -29,11 +31,11 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "C:\5CProjects")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "test.c", directory: "C:CProjects")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
diff --git a/test/DebugInfo/X86/coff_relative_names.ll b/test/DebugInfo/X86/coff_relative_names.ll
index 6b7864ab3979..7ca4b04a871f 100644
--- a/test/DebugInfo/X86/coff_relative_names.ll
+++ b/test/DebugInfo/X86/coff_relative_names.ll
@@ -11,7 +11,7 @@
; }
; Function Attrs: nounwind
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
@@ -23,11 +23,11 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "C:\5CProjects")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "test.c", directory: "C:CProjects")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index c1e1f750e2cc..d34c1ae37e3f 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -18,12 +18,14 @@
; CHECK: DW_AT_name {{.*}} "~nsAutoRefCnt"
; CHECK: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_decl_line {{.*}}18
; CHECK-NEXT: DW_AT_{{.*}}linkage_name {{.*}}D2
; CHECK-NEXT: DW_AT_specification {{.*}} "~nsAutoRefCnt"
; CHECK-NEXT: DW_AT_inline
; CHECK-NOT: DW_AT
; CHECK: DW_TAG
; CHECK: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_decl_line {{.*}}18
; CHECK-NEXT: DW_AT_{{.*}}linkage_name {{.*}}D1
; CHECK-NEXT: DW_AT_specification {{.*}} "~nsAutoRefCnt"
; CHECK-NEXT: DW_AT_inline
@@ -58,14 +60,14 @@
; CHECK-NEXT: DW_AT_abstract_origin {{.*}} "_ZN17nsAutoRefCntD2Ev"
-define i32 @_ZN17nsAutoRefCnt7ReleaseEv() {
+define i32 @_ZN17nsAutoRefCnt7ReleaseEv() !dbg !5 {
entry:
store i32 1, i32* null, align 4, !dbg !50
tail call void @_Z8moz_freePv(i8* null) nounwind, !dbg !54
ret i32 0
}
-define void @_ZN17nsAutoRefCntD1Ev() {
+define void @_ZN17nsAutoRefCntD1Ev() !dbg !23 {
entry:
tail call void @_Z8moz_freePv(i8* null) nounwind, !dbg !57
ret void
@@ -76,10 +78,10 @@ declare void @_Z8moz_freePv(i8*)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!60}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 ()", isOptimized: true, emissionKind: 0, file: !59, enums: !1, retainedTypes: !1, subprograms: !3, globals: !47, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 ()", isOptimized: true, emissionKind: 0, file: !59, enums: !1, retainedTypes: !1, subprograms: !3, globals: !47, imports: !1)
!1 = !{}
!3 = !{!5, !23, !27, !31}
-!5 = !DISubprogram(name: "Release", linkageName: "_ZN17nsAutoRefCnt7ReleaseEv", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !6, scope: null, type: !7, function: i32 ()* @_ZN17nsAutoRefCnt7ReleaseEv , declaration: !12, variables: !20)
+!5 = distinct !DISubprogram(name: "Release", linkageName: "_ZN17nsAutoRefCnt7ReleaseEv", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !6, scope: null, type: !7 , declaration: !12, variables: !20)
!6 = !DIFile(filename: "nsAutoRefCnt.ii", directory: "/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src")
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !10}
@@ -94,14 +96,14 @@ declare void @_Z8moz_freePv(i8*)
!17 = !{null, !10}
!18 = !{}
!20 = !{!22}
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 14, arg: 1, flags: DIFlagArtificial, scope: !5, file: !6, type: !10)
-!23 = !DISubprogram(name: "~nsAutoRefCnt", linkageName: "_ZN17nsAutoRefCntD1Ev", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !6, scope: null, type: !16, function: void ()* @_ZN17nsAutoRefCntD1Ev, declaration: !15, variables: !24)
+!22 = !DILocalVariable(name: "this", line: 14, arg: 1, flags: DIFlagArtificial, scope: !5, file: !6, type: !10)
+!23 = distinct !DISubprogram(name: "~nsAutoRefCnt", linkageName: "_ZN17nsAutoRefCntD1Ev", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !6, scope: null, type: !16, declaration: !15, variables: !24)
!24 = !{!26}
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 18, arg: 1, flags: DIFlagArtificial, scope: !23, file: !6, type: !10)
-!27 = !DISubprogram(name: "~nsAutoRefCnt", linkageName: "_ZN17nsAutoRefCntD2Ev", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !6, scope: null, type: !16, function: i32 ()* null, declaration: !15, variables: !28)
+!26 = !DILocalVariable(name: "this", line: 18, arg: 1, flags: DIFlagArtificial, scope: !23, file: !6, type: !10)
+!27 = distinct !DISubprogram(name: "~nsAutoRefCnt", linkageName: "_ZN17nsAutoRefCntD2Ev", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !6, scope: null, type: !16, declaration: !15, variables: !28)
!28 = !{!30}
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 18, arg: 1, flags: DIFlagArtificial, scope: !27, file: !6, type: !10)
-!31 = !DISubprogram(name: "operator=", linkageName: "_ZN12nsAutoRefCntaSEi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !6, scope: null, type: !32, declaration: !36, variables: !43)
+!30 = !DILocalVariable(name: "this", line: 18, arg: 1, flags: DIFlagArtificial, scope: !27, file: !6, type: !10)
+!31 = distinct !DISubprogram(name: "operator=", linkageName: "_ZN12nsAutoRefCntaSEi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !6, scope: null, type: !32, declaration: !36, variables: !43)
!32 = !DISubroutineType(types: !33)
!33 = !{!9, !34, !9}
!34 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !35)
@@ -114,8 +116,8 @@ declare void @_Z8moz_freePv(i8*)
!41 = !DISubroutineType(types: !42)
!42 = !{null, !34}
!43 = !{!45, !46}
-!45 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 4, arg: 1, flags: DIFlagArtificial, scope: !31, file: !6, type: !34)
-!46 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "aValue", line: 4, arg: 2, scope: !31, file: !6, type: !9)
+!45 = !DILocalVariable(name: "this", line: 4, arg: 1, flags: DIFlagArtificial, scope: !31, file: !6, type: !34)
+!46 = !DILocalVariable(name: "aValue", line: 4, arg: 2, scope: !31, file: !6, type: !9)
!47 = !{!49}
!49 = !DIGlobalVariable(name: "mRefCnt", line: 9, isLocal: false, isDefinition: true, scope: null, file: !6, type: !37, variable: i32* null)
!50 = !DILocation(line: 5, column: 5, scope: !51, inlinedAt: !52)
diff --git a/test/DebugInfo/X86/constant-aggregate.ll b/test/DebugInfo/X86/constant-aggregate.ll
index daec3b4e7d66..35bf9a516807 100644
--- a/test/DebugInfo/X86/constant-aggregate.ll
+++ b/test/DebugInfo/X86/constant-aggregate.ll
@@ -40,7 +40,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
; Function Attrs: nounwind readnone ssp uwtable
-define i32 @_Z3foo1S(i32 %s.coerce) #0 {
+define i32 @_Z3foo1S(i32 %s.coerce) #0 !dbg !12 {
entry:
tail call void @llvm.dbg.value(metadata i32 %s.coerce, i64 0, metadata !18, metadata !37), !dbg !38
tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !18, metadata !37), !dbg !38
@@ -48,7 +48,7 @@ entry:
}
; Function Attrs: nounwind readnone ssp uwtable
-define i32 @_Z3foo1C(i32 %c.coerce) #0 {
+define i32 @_Z3foo1C(i32 %c.coerce) #0 !dbg !19 {
entry:
tail call void @llvm.dbg.value(metadata i32 %c.coerce, i64 0, metadata !23, metadata !37), !dbg !40
tail call void @llvm.dbg.value(metadata i32 2, i64 0, metadata !23, metadata !37), !dbg !40
@@ -56,7 +56,7 @@ entry:
}
; Function Attrs: nounwind readnone ssp uwtable
-define i32 @_Z3barv() #0 {
+define i32 @_Z3barv() #0 !dbg !24 {
entry:
tail call void @llvm.dbg.value(metadata i32 3, i64 0, metadata !28, metadata !37), !dbg !42
ret i32 3, !dbg !43
@@ -72,7 +72,7 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!33, !34, !35}
!llvm.ident = !{!36}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 225364) (llvm/trunk 225366)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 225364) (llvm/trunk 225366)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
!1 = !DIFile(filename: "sroasplit-4.cpp", directory: "")
!2 = !{}
!3 = !{!4, !8}
@@ -84,23 +84,23 @@ attributes #1 = { nounwind readnone }
!9 = !{!10}
!10 = !DIDerivedType(tag: DW_TAG_member, name: "i", line: 8, size: 32, align: 32, flags: DIFlagPublic, file: !1, scope: !"_ZTS1C", baseType: !7)
!11 = !{!12, !19, !24}
-!12 = !DISubprogram(name: "foo", linkageName: "_Z3foo1S", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !13, type: !14, function: i32 (i32)* @_Z3foo1S, variables: !17)
+!12 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foo1S", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !13, type: !14, variables: !17)
!13 = !DIFile(filename: "sroasplit-4.cpp", directory: "")
!14 = !DISubroutineType(types: !15)
!15 = !{!7, !16}
!16 = !DIDerivedType(tag: DW_TAG_typedef, name: "S", line: 1, file: !1, baseType: !"_ZTS1S")
!17 = !{!18}
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !12, file: !13, type: !16)
-!19 = !DISubprogram(name: "foo", linkageName: "_Z3foo1C", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !1, scope: !13, type: !20, function: i32 (i32)* @_Z3foo1C, variables: !22)
+!18 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !12, file: !13, type: !16)
+!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foo1C", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !1, scope: !13, type: !20, variables: !22)
!20 = !DISubroutineType(types: !21)
!21 = !{!7, !"_ZTS1C"}
!22 = !{!23}
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 10, arg: 1, scope: !19, file: !13, type: !"_ZTS1C")
-!24 = !DISubprogram(name: "bar", linkageName: "_Z3barv", line: 15, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 15, file: !1, scope: !13, type: !25, function: i32 ()* @_Z3barv, variables: !27)
+!23 = !DILocalVariable(name: "c", line: 10, arg: 1, scope: !19, file: !13, type: !"_ZTS1C")
+!24 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", line: 15, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 15, file: !1, scope: !13, type: !25, variables: !27)
!25 = !DISubroutineType(types: !26)
!26 = !{!7}
!27 = !{!28}
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 16, scope: !24, file: !13, type: !29)
+!28 = !DILocalVariable(name: "a", line: 16, scope: !24, file: !13, type: !29)
!29 = !DICompositeType(tag: DW_TAG_array_type, size: 32, align: 32, baseType: !30, elements: !31)
!30 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!31 = !{!32}
diff --git a/test/DebugInfo/X86/cu-ranges-odr.ll b/test/DebugInfo/X86/cu-ranges-odr.ll
index 623e2fdd47a5..67da59a0e4bb 100644
--- a/test/DebugInfo/X86/cu-ranges-odr.ll
+++ b/test/DebugInfo/X86/cu-ranges-odr.ll
@@ -23,14 +23,14 @@
@a = global %class.A zeroinitializer, align 4
@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
-define internal void @__cxx_global_var_init() section ".text.startup" {
+define internal void @__cxx_global_var_init() section ".text.startup" !dbg !14 {
entry:
call void @_ZN1AC2Ei(%class.A* @a, i32 0), !dbg !26
ret void, !dbg !26
}
; Function Attrs: nounwind uwtable
-define linkonce_odr void @_ZN1AC2Ei(%class.A* %this, i32 %i) unnamed_addr #0 align 2 {
+define linkonce_odr void @_ZN1AC2Ei(%class.A* %this, i32 %i) unnamed_addr #0 align 2 !dbg !18 {
entry:
%this.addr = alloca %class.A*, align 8
%i.addr = alloca i32, align 4
@@ -48,7 +48,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-define internal void @_GLOBAL__I_a() section ".text.startup" {
+define internal void @_GLOBAL__I_a() section ".text.startup" !dbg !19 {
entry:
call void @__cxx_global_var_init(), !dbg !32
ret void, !dbg !32
@@ -61,7 +61,7 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!23, !24}
!llvm.ident = !{!25}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 (trunk 199923) (llvm/trunk 199940)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !21, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 (trunk 199923) (llvm/trunk 199940)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !21, imports: !2)
!1 = !DIFile(filename: "baz.cpp", directory: "/usr/local/google/home/echristo/tmp")
!2 = !{}
!3 = !{!4}
@@ -74,12 +74,12 @@ attributes #1 = { nounwind readnone }
!10 = !{null, !11, !7}
!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
!13 = !{!14, !18, !19}
-!14 = !DISubprogram(name: "__cxx_global_var_init", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !15, type: !16, function: void ()* @__cxx_global_var_init, variables: !2)
+!14 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !15, type: !16, variables: !2)
!15 = !DIFile(filename: "baz.cpp", directory: "/usr/local/google/home/echristo/tmp")
!16 = !DISubroutineType(types: !17)
!17 = !{null}
-!18 = !DISubprogram(name: "A", linkageName: "_ZN1AC2Ei", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !"_ZTS1A", type: !9, function: void (%class.A*, i32)* @_ZN1AC2Ei, declaration: !8, variables: !2)
-!19 = !DISubprogram(name: "", linkageName: "_GLOBAL__I_a", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, scopeLine: 3, file: !1, scope: !15, type: !20, function: void ()* @_GLOBAL__I_a, variables: !2)
+!18 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC2Ei", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !"_ZTS1A", type: !9, declaration: !8, variables: !2)
+!19 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__I_a", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, scopeLine: 3, file: !1, scope: !15, type: !20, variables: !2)
!20 = !DISubroutineType(types: !2)
!21 = !{!22}
!22 = !DIGlobalVariable(name: "a", line: 8, isLocal: false, isDefinition: true, scope: null, file: !15, type: !4, variable: %class.A* @a)
@@ -87,9 +87,9 @@ attributes #1 = { nounwind readnone }
!24 = !{i32 1, !"Debug Info Version", i32 3}
!25 = !{!"clang version 3.5 (trunk 199923) (llvm/trunk 199940)"}
!26 = !DILocation(line: 8, scope: !14)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !18, type: !28)
+!27 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !18, type: !28)
!28 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
!29 = !DILocation(line: 0, scope: !18)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 3, arg: 2, scope: !18, file: !15, type: !7)
+!30 = !DILocalVariable(name: "i", line: 3, arg: 2, scope: !18, file: !15, type: !7)
!31 = !DILocation(line: 3, scope: !18)
!32 = !DILocation(line: 3, scope: !19)
diff --git a/test/DebugInfo/X86/cu-ranges.ll b/test/DebugInfo/X86/cu-ranges.ll
index 5611a31cd5fb..543684596d9e 100644
--- a/test/DebugInfo/X86/cu-ranges.ll
+++ b/test/DebugInfo/X86/cu-ranges.ll
@@ -25,7 +25,7 @@
; NO-FUNCTION-SECTIONS-NOT: DW_AT_ranges
; Function Attrs: nounwind uwtable
-define i32 @foo(i32 %a) #0 {
+define i32 @foo(i32 %a) #0 !dbg !4 {
entry:
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
@@ -39,7 +39,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind uwtable
-define i32 @bar(i32 %b) #0 {
+define i32 @bar(i32 %b) #0 !dbg !9 {
entry:
%b.addr = alloca i32, align 4
store i32 %b, i32* %b.addr, align 4
@@ -56,20 +56,20 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!10, !11}
!llvm.ident = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "z.c", directory: "/usr/local/google/home/echristo")
!2 = !{}
!3 = !{!4, !9}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "z.c", directory: "/usr/local/google/home/echristo")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 (i32)* @bar, variables: !2)
+!9 = distinct !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 1, !"Debug Info Version", i32 3}
!12 = !{!"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!13 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
!14 = !DILocation(line: 1, scope: !4)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 2, arg: 1, scope: !9, file: !5, type: !8)
+!15 = !DILocalVariable(name: "b", line: 2, arg: 1, scope: !9, file: !5, type: !8)
!16 = !DILocation(line: 2, scope: !9)
diff --git a/test/DebugInfo/X86/data_member_location.ll b/test/DebugInfo/X86/data_member_location.ll
index c17c45bcc696..5b33096864bb 100644
--- a/test/DebugInfo/X86/data_member_location.ll
+++ b/test/DebugInfo/X86/data_member_location.ll
@@ -34,7 +34,7 @@
!llvm.module.flags = !{!13, !15}
!llvm.ident = !{!14}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !10, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !10, imports: !2)
!1 = !DIFile(filename: "data_member_location.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4}
diff --git a/test/DebugInfo/X86/dbg-at-specficiation.ll b/test/DebugInfo/X86/dbg-at-specficiation.ll
deleted file mode 100644
index ccb5ab142cda..000000000000
--- a/test/DebugInfo/X86/dbg-at-specficiation.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-; Radar 10147769
-; Do not unnecessarily use AT_specification DIE.
-; CHECK-NOT: AT_specification
-
-@a = common global [10 x i32] zeroinitializer, align 16
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!12}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 140253)", isOptimized: true, emissionKind: 0, file: !11, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3)
-!2 = !{}
-!3 = !{!5}
-!5 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: [10 x i32]* @a)
-!6 = !DIFile(filename: "x.c", directory: "/private/tmp")
-!7 = !DICompositeType(tag: DW_TAG_array_type, size: 320, align: 32, baseType: !8, elements: !9)
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !{!10}
-!10 = !DISubrange(count: 10)
-!11 = !DIFile(filename: "x.c", directory: "/private/tmp")
-!12 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/dbg-byval-parameter.ll b/test/DebugInfo/X86/dbg-byval-parameter.ll
index e07109e08f93..c80f8d90ca89 100644
--- a/test/DebugInfo/X86/dbg-byval-parameter.ll
+++ b/test/DebugInfo/X86/dbg-byval-parameter.ll
@@ -4,7 +4,7 @@
%struct.Pt = type { double, double }
%struct.Rect = type { %struct.Pt, %struct.Pt }
-define double @foo(%struct.Rect* byval %my_r0) nounwind ssp {
+define double @foo(%struct.Rect* byval %my_r0) nounwind ssp !dbg !1 {
entry:
%retval = alloca double ; <double*> [#uses=2]
%0 = alloca double ; <double*> [#uses=2]
@@ -28,10 +28,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!21}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "my_r0", line: 11, arg: 0, scope: !1, file: !2, type: !7)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !19, scope: !2, type: !4, function: double (%struct.Rect*)* @foo)
+!0 = !DILocalVariable(name: "my_r0", line: 11, arg: 1, scope: !1, file: !2, type: !7)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !19, scope: !2, type: !4)
!2 = !DIFile(filename: "b2.c", directory: "/tmp/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !19, enums: !20, retainedTypes: !20, subprograms: !18, imports: null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !19, enums: !20, retainedTypes: !20, subprograms: !18, imports: null)
!4 = !DISubroutineType(types: !5)
!5 = !{!6, !7}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
diff --git a/test/DebugInfo/X86/dbg-const-int.ll b/test/DebugInfo/X86/dbg-const-int.ll
index 8482abf373eb..7e90a14c21a2 100644
--- a/test/DebugInfo/X86/dbg-const-int.ll
+++ b/test/DebugInfo/X86/dbg-const-int.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx10.6.7"
; CHECK-NOT: DW_TAG
; CHECK: DW_AT_name {{.*}} "i"
-define i32 @foo() nounwind uwtable readnone optsize ssp {
+define i32 @foo() nounwind uwtable readnone optsize ssp !dbg !1 {
entry:
tail call void @llvm.dbg.value(metadata i32 42, i64 0, metadata !6, metadata !DIExpression()), !dbg !9
ret i32 42, !dbg !10
@@ -21,13 +21,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!15}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 132191)", isOptimized: true, emissionKind: 0, file: !13, enums: !14, retainedTypes: !14, subprograms: !11, imports: null)
-!1 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !13, scope: !2, type: !3, function: i32 ()* @foo, variables: !12)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 132191)", isOptimized: true, emissionKind: 0, file: !13, enums: !14, retainedTypes: !14, subprograms: !11, imports: null)
+!1 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !13, scope: !2, type: !3, variables: !12)
!2 = !DIFile(filename: "a.c", directory: "/private/tmp")
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2, scope: !7, file: !2, type: !5)
+!6 = !DILocalVariable(name: "i", line: 2, scope: !7, file: !2, type: !5)
!7 = distinct !DILexicalBlock(line: 1, column: 11, file: !13, scope: !1)
!8 = !{i32 42}
!9 = !DILocation(line: 2, column: 12, scope: !7)
diff --git a/test/DebugInfo/X86/dbg-const.ll b/test/DebugInfo/X86/dbg-const.ll
index 27a0eabfe493..6d710c512fd7 100644
--- a/test/DebugInfo/X86/dbg-const.ll
+++ b/test/DebugInfo/X86/dbg-const.ll
@@ -15,7 +15,7 @@ target triple = "x86_64-apple-darwin10.0.0"
;CHECK: ## DW_OP_consts
;CHECK-NEXT: .byte 42
-define i32 @foobar() nounwind readonly noinline ssp {
+define i32 @foobar() nounwind readonly noinline ssp !dbg !0 {
entry:
tail call void @llvm.dbg.value(metadata i32 42, i64 0, metadata !6, metadata !DIExpression()), !dbg !9
%call = tail call i32 @bar(), !dbg !11
@@ -31,13 +31,13 @@ declare i32 @bar() nounwind readnone
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!17}
-!0 = !DISubprogram(name: "foobar", linkageName: "foobar", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !15, scope: !1, type: !3, function: i32 ()* @foobar, variables: !14)
+!0 = distinct !DISubprogram(name: "foobar", linkageName: "foobar", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !15, scope: !1, type: !3, variables: !14)
!1 = !DIFile(filename: "mu.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 114183)", isOptimized: true, emissionKind: 1, file: !15, enums: !16, retainedTypes: !16, subprograms: !13, imports: null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 114183)", isOptimized: true, emissionKind: 1, file: !15, enums: !16, retainedTypes: !16, subprograms: !13, imports: null)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 15, scope: !7, file: !1, type: !5)
+!6 = !DILocalVariable(name: "j", line: 15, scope: !7, file: !1, type: !5)
!7 = distinct !DILexicalBlock(line: 12, column: 52, file: !15, scope: !0)
!8 = !{i32 42}
!9 = !DILocation(line: 15, column: 12, scope: !7)
diff --git a/test/DebugInfo/X86/dbg-declare-arg.ll b/test/DebugInfo/X86/dbg-declare-arg.ll
index 85ff4b920ad1..c35bc8c5f629 100644
--- a/test/DebugInfo/X86/dbg-declare-arg.ll
+++ b/test/DebugInfo/X86/dbg-declare-arg.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.6.7"
%class.A = type { i32, i32, i32, i32 }
-define void @_Z3fooi(%class.A* sret %agg.result, i32 %i) ssp {
+define void @_Z3fooi(%class.A* sret %agg.result, i32 %i) ssp !dbg !19 {
entry:
%i.addr = alloca i32, align 4
%j = alloca i32, align 4
@@ -48,7 +48,7 @@ nrvo.skipdtor: ; preds = %nrvo.unused, %if.en
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-define linkonce_odr void @_ZN1AD1Ev(%class.A* %this) unnamed_addr ssp align 2 {
+define linkonce_odr void @_ZN1AD1Ev(%class.A* %this) unnamed_addr ssp align 2 !dbg !22 {
entry:
%this.addr = alloca %class.A*, align 8
store %class.A* %this, %class.A** %this.addr, align 8
@@ -58,7 +58,7 @@ entry:
ret void, !dbg !45
}
-define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr nounwind ssp align 2 {
+define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr nounwind ssp align 2 !dbg !25 {
entry:
%this.addr = alloca %class.A*, align 8
store %class.A* %this, %class.A** %this.addr, align 8
@@ -74,7 +74,7 @@ entry:
!0 = !DISubprogram(name: "~A", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !1, type: !11)
!1 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 2, size: 128, align: 32, file: !51, scope: !2, elements: !4)
-!2 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130127)", isOptimized: false, emissionKind: 1, file: !51, enums: !{}, retainedTypes: !{}, subprograms: !50)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130127)", isOptimized: false, emissionKind: 1, file: !51, enums: !{}, retainedTypes: !{}, subprograms: !50)
!3 = !DIFile(filename: "a.cc", directory: "/private/tmp")
!4 = !{!5, !7, !8, !9, !0, !10, !14}
!5 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 2, size: 32, align: 32, file: !51, scope: !3, baseType: !6)
@@ -91,16 +91,16 @@ entry:
!16 = !{null, !13, !17}
!17 = !DIDerivedType(tag: DW_TAG_reference_type, scope: !2, baseType: !18)
!18 = !DIDerivedType(tag: DW_TAG_const_type, file: !3, baseType: !1)
-!19 = !DISubprogram(name: "foo", linkageName: "_Z3fooi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !3, type: !20, function: void (%class.A*, i32)* @_Z3fooi)
+!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !3, type: !20)
!20 = !DISubroutineType(types: !21)
!21 = !{!1}
-!22 = !DISubprogram(name: "~A", linkageName: "_ZN1AD1Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !3, type: !23, function: void (%class.A*)* @_ZN1AD1Ev)
+!22 = distinct !DISubprogram(name: "~A", linkageName: "_ZN1AD1Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !3, type: !23)
!23 = !DISubroutineType(types: !24)
!24 = !{null}
-!25 = !DISubprogram(name: "~A", linkageName: "_ZN1AD2Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !3, type: !23, function: void (%class.A*)* @_ZN1AD2Ev)
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 4, arg: 1, scope: !19, file: !3, type: !6)
+!25 = distinct !DISubprogram(name: "~A", linkageName: "_ZN1AD2Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !3, type: !23)
+!26 = !DILocalVariable(name: "i", line: 4, arg: 1, scope: !19, file: !3, type: !6)
!27 = !DILocation(line: 4, column: 11, scope: !19)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 5, scope: !29, file: !3, type: !6)
+!28 = !DILocalVariable(name: "j", line: 5, scope: !29, file: !3, type: !6)
!29 = distinct !DILexicalBlock(line: 4, column: 14, file: !51, scope: !19)
!30 = !DILocation(line: 5, column: 7, scope: !29)
!31 = !DILocation(line: 5, column: 12, scope: !29)
@@ -109,16 +109,16 @@ entry:
!34 = distinct !DILexicalBlock(line: 6, column: 16, file: !51, scope: !29)
!35 = !DILocation(line: 8, column: 3, scope: !34)
!36 = !DILocation(line: 9, column: 9, scope: !29)
-!37 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "my_a", line: 9, scope: !29, file: !3, type: !38)
+!37 = !DILocalVariable(name: "my_a", line: 9, scope: !29, file: !3, type: !38)
!38 = !DIDerivedType(tag: DW_TAG_reference_type, file: !3, baseType: !1)
!39 = !DILocation(line: 9, column: 5, scope: !29)
!40 = !DILocation(line: 10, column: 3, scope: !29)
!41 = !DILocation(line: 11, column: 3, scope: !29)
!42 = !DILocation(line: 12, column: 1, scope: !29)
-!43 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 2, arg: 1, flags: DIFlagArtificial, scope: !22, file: !3, type: !13)
+!43 = !DILocalVariable(name: "this", line: 2, arg: 1, flags: DIFlagArtificial, scope: !22, file: !3, type: !13)
!44 = !DILocation(line: 2, column: 47, scope: !22)
!45 = !DILocation(line: 2, column: 61, scope: !22)
-!46 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 2, arg: 1, flags: DIFlagArtificial, scope: !25, file: !3, type: !13)
+!46 = !DILocalVariable(name: "this", line: 2, arg: 1, flags: DIFlagArtificial, scope: !25, file: !3, type: !13)
!47 = !DILocation(line: 2, column: 47, scope: !25)
!48 = !DILocation(line: 2, column: 54, scope: !49)
!49 = distinct !DILexicalBlock(line: 2, column: 52, file: !51, scope: !25)
diff --git a/test/DebugInfo/X86/dbg-declare.ll b/test/DebugInfo/X86/dbg-declare.ll
index a1420a0ba2ef..1d6cfe859596 100644
--- a/test/DebugInfo/X86/dbg-declare.ll
+++ b/test/DebugInfo/X86/dbg-declare.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -O0 -mtriple x86_64-apple-darwin
; <rdar://problem/11134152>
-define i32 @foo(i32* %x) nounwind uwtable ssp {
+define i32 @foo(i32* %x) nounwind uwtable ssp !dbg !5 {
entry:
%x.addr = alloca i32*, align 8
%saved_stack = alloca i8*
@@ -30,21 +30,21 @@ declare void @llvm.stackrestore(i8*) nounwind
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!27}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 153698)", isOptimized: false, emissionKind: 0, file: !26, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 153698)", isOptimized: false, emissionKind: 0, file: !26, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "foo", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !26, scope: !0, type: !7, function: i32 (i32*)* @foo)
+!5 = distinct !DISubprogram(name: "foo", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !26, scope: !0, type: !7)
!6 = !DIFile(filename: "20020104-2.c", directory: "/Volumes/Sandbox/llvm")
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !10}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !11)
!11 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 5, arg: 1, scope: !5, file: !6, type: !10)
+!14 = !DILocalVariable(name: "x", line: 5, arg: 1, scope: !5, file: !6, type: !10)
!15 = !DILocation(line: 5, column: 21, scope: !5)
!16 = !DILocation(line: 7, column: 13, scope: !17)
!17 = distinct !DILexicalBlock(line: 6, column: 1, file: !26, scope: !5)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 7, scope: !17, file: !6, type: !19)
+!18 = !DILocalVariable(name: "a", line: 7, scope: !17, file: !6, type: !19)
!19 = !DICompositeType(tag: DW_TAG_array_type, align: 8, baseType: !20, elements: !21)
!20 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!21 = !{!22}
diff --git a/test/DebugInfo/X86/dbg-file-name.ll b/test/DebugInfo/X86/dbg-file-name.ll
index cc18c0804db4..60954b9746ba 100644
--- a/test/DebugInfo/X86/dbg-file-name.ll
+++ b/test/DebugInfo/X86/dbg-file-name.ll
@@ -5,7 +5,7 @@
declare i32 @printf(i8*, ...) nounwind
-define i32 @main() nounwind {
+define i32 @main() nounwind !dbg !6 {
ret i32 0
}
@@ -13,9 +13,9 @@ define i32 @main() nounwind {
!llvm.module.flags = !{!12}
!1 = !DIFile(filename: "simple.c", directory: "/Users/manav/one/two")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "LLVM build 00", isOptimized: true, emissionKind: 0, file: !10, enums: !11, retainedTypes: !11, subprograms: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "LLVM build 00", isOptimized: true, emissionKind: 0, file: !10, enums: !11, retainedTypes: !11, subprograms: !9)
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "main", linkageName: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !10, scope: !1, type: !7, function: i32 ()* @main)
+!6 = distinct !DISubprogram(name: "main", linkageName: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !10, scope: !1, type: !7)
!7 = !DISubroutineType(types: !8)
!8 = !{!5}
!9 = !{!6}
diff --git a/test/DebugInfo/X86/dbg-i128-const.ll b/test/DebugInfo/X86/dbg-i128-const.ll
index de804faabf4a..80ea1769b60e 100644
--- a/test/DebugInfo/X86/dbg-i128-const.ll
+++ b/test/DebugInfo/X86/dbg-i128-const.ll
@@ -4,7 +4,7 @@
; CHECK: DW_AT_const_value
; CHECK-NEXT: 42
-define i128 @__foo(i128 %a, i128 %b) nounwind {
+define i128 @__foo(i128 %a, i128 %b) nounwind !dbg !3 {
entry:
tail call void @llvm.dbg.value(metadata i128 42 , i64 0, metadata !1, metadata !DIExpression()), !dbg !11
%add = add i128 %a, %b, !dbg !11
@@ -17,11 +17,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.module.flags = !{!16}
!0 = !{i128 42 }
-!1 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "MAX", line: 29, scope: !2, file: !4, type: !8)
+!1 = !DILocalVariable(name: "MAX", line: 29, scope: !2, file: !4, type: !8)
!2 = distinct !DILexicalBlock(line: 26, column: 0, file: !13, scope: !3)
-!3 = !DISubprogram(name: "__foo", linkageName: "__foo", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 26, file: !13, scope: !4, type: !6, function: i128 (i128, i128)* @__foo)
+!3 = distinct !DISubprogram(name: "__foo", linkageName: "__foo", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 26, file: !13, scope: !4, type: !6)
!4 = !DIFile(filename: "foo.c", directory: "/tmp")
-!5 = !DICompileUnit(language: DW_LANG_C89, producer: "clang", isOptimized: true, emissionKind: 0, file: !13, enums: !15, retainedTypes: !15, subprograms: !12, imports: null)
+!5 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "clang", isOptimized: true, emissionKind: 0, file: !13, enums: !15, retainedTypes: !15, subprograms: !12, imports: null)
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8, !8}
!8 = !DIDerivedType(tag: DW_TAG_typedef, name: "ti_int", line: 78, file: !14, scope: !4, baseType: !10)
diff --git a/test/DebugInfo/X86/dbg-merge-loc-entry.ll b/test/DebugInfo/X86/dbg-merge-loc-entry.ll
index 14265139b0fd..fc5c1bbd529d 100644
--- a/test/DebugInfo/X86/dbg-merge-loc-entry.ll
+++ b/test/DebugInfo/X86/dbg-merge-loc-entry.ll
@@ -12,7 +12,7 @@ target triple = "x86_64-apple-darwin8"
@__clz_tab = external constant [256 x i8]
-define hidden i128 @__divti3(i128 %u, i128 %v) nounwind readnone {
+define hidden i128 @__divti3(i128 %u, i128 %v) nounwind readnone !dbg !9 {
entry:
tail call void @llvm.dbg.value(metadata i128 %u, i64 0, metadata !14, metadata !DIExpression()), !dbg !15
tail call void @llvm.dbg.value(metadata i64 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !21
@@ -40,24 +40,24 @@ declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!32}
-!0 = !DISubprogram(name: "__udivmodti4", line: 879, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 879, file: !29, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "__udivmodti4", line: 879, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 879, file: !29, scope: !1, type: !3)
!1 = !DIFile(filename: "foobar.c", directory: "/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !29, enums: !31, retainedTypes: !31, subprograms: !28, imports: null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !29, enums: !31, retainedTypes: !31, subprograms: !28, imports: null)
!3 = !DISubroutineType(types: !4)
!4 = !{!5, !5, !5, !8}
!5 = !DIDerivedType(tag: DW_TAG_typedef, name: "UTItype", line: 166, file: !30, scope: !6, baseType: !7)
!6 = !DIFile(filename: "foobar.h", directory: "/tmp")
!7 = !DIBasicType(tag: DW_TAG_base_type, size: 128, align: 128, encoding: DW_ATE_unsigned)
!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !29, scope: !1, baseType: !5)
-!9 = !DISubprogram(name: "__divti3", linkageName: "__divti3", line: 1094, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1094, file: !29, scope: !1, type: !10, function: i128 (i128, i128)* @__divti3)
+!9 = distinct !DISubprogram(name: "__divti3", linkageName: "__divti3", line: 1094, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1094, file: !29, scope: !1, type: !10)
!10 = !DISubroutineType(types: !11)
!11 = !{!12, !12, !12}
!12 = !DIDerivedType(tag: DW_TAG_typedef, name: "TItype", line: 160, file: !30, scope: !6, baseType: !13)
!13 = !DIBasicType(tag: DW_TAG_base_type, size: 128, align: 128, encoding: DW_ATE_signed)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "u", line: 1093, arg: 0, scope: !9, file: !1, type: !12)
+!14 = !DILocalVariable(name: "u", line: 1093, arg: 1, scope: !9, file: !1, type: !12)
!15 = !DILocation(line: 1093, scope: !9)
!16 = !{i64 0}
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 1095, scope: !18, file: !1, type: !19)
+!17 = !DILocalVariable(name: "c", line: 1095, scope: !18, file: !1, type: !19)
!18 = distinct !DILexicalBlock(line: 1094, column: 0, file: !29, scope: !9)
!19 = !DIDerivedType(tag: DW_TAG_typedef, name: "word_type", line: 424, file: !30, scope: !6, baseType: !20)
!20 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
diff --git a/test/DebugInfo/X86/dbg-prolog-end.ll b/test/DebugInfo/X86/dbg-prolog-end.ll
index 870b09120bcf..c3802b9bfa5a 100644
--- a/test/DebugInfo/X86/dbg-prolog-end.ll
+++ b/test/DebugInfo/X86/dbg-prolog-end.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.6.7"
;CHECK: .loc 1 2 11 prologue_end
-define i32 @foo(i32 %i) nounwind ssp {
+define i32 @foo(i32 %i) nounwind ssp !dbg !1 {
entry:
%i.addr = alloca i32, align 4
%j = alloca i32, align 4
@@ -24,7 +24,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-define i32 @main() nounwind ssp {
+define i32 @main() nounwind ssp !dbg !6 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
@@ -36,16 +36,16 @@ entry:
!llvm.module.flags = !{!21}
!18 = !{!1, !6}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131100)", isOptimized: false, emissionKind: 0, file: !19, enums: !20, retainedTypes: !20, subprograms: !18, imports: null)
-!1 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !19, scope: !2, type: !3, function: i32 (i32)* @foo)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131100)", isOptimized: false, emissionKind: 0, file: !19, enums: !20, retainedTypes: !20, subprograms: !18, imports: null)
+!1 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !19, scope: !2, type: !3)
!2 = !DIFile(filename: "/tmp/a.c", directory: "/private/tmp")
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !19, scope: !2, type: !3, function: i32 ()* @main)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 1, arg: 1, scope: !1, file: !2, type: !5)
+!6 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !19, scope: !2, type: !3)
+!7 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !1, file: !2, type: !5)
!8 = !DILocation(line: 1, column: 13, scope: !1)
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 2, scope: !10, file: !2, type: !5)
+!9 = !DILocalVariable(name: "j", line: 2, scope: !10, file: !2, type: !5)
!10 = distinct !DILexicalBlock(line: 1, column: 16, file: !19, scope: !1)
!11 = !DILocation(line: 2, column: 6, scope: !10)
!12 = !DILocation(line: 2, column: 11, scope: !10)
diff --git a/test/DebugInfo/X86/dbg-subrange.ll b/test/DebugInfo/X86/dbg-subrange.ll
index e3bd07355cc3..5239daea800b 100644
--- a/test/DebugInfo/X86/dbg-subrange.ll
+++ b/test/DebugInfo/X86/dbg-subrange.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.7.2"
@s = common global [4294967296 x i8] zeroinitializer, align 16
; CHECK: .quad 4294967296 ## DW_AT_count
-define void @bar() nounwind uwtable ssp {
+define void @bar() nounwind uwtable ssp !dbg !5 {
entry:
store i8 97, i8* getelementptr inbounds ([4294967296 x i8], [4294967296 x i8]* @s, i32 0, i64 0), align 1, !dbg !18
ret void, !dbg !20
@@ -15,10 +15,10 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!22}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 144833)", isOptimized: false, emissionKind: 0, file: !21, enums: !1, retainedTypes: !1, subprograms: !3, globals: !11, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 144833)", isOptimized: false, emissionKind: 0, file: !21, enums: !1, retainedTypes: !1, subprograms: !3, globals: !11, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !21, scope: !6, type: !7, function: void ()* @bar)
+!5 = distinct !DISubprogram(name: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !21, scope: !6, type: !7)
!6 = !DIFile(filename: "small.c", directory: "/private/tmp")
!7 = !DISubroutineType(types: !8)
!8 = !{null}
diff --git a/test/DebugInfo/X86/dbg-value-const-byref.ll b/test/DebugInfo/X86/dbg-value-const-byref.ll
index a54ef0590e41..2b4c1dc1a937 100644
--- a/test/DebugInfo/X86/dbg-value-const-byref.ll
+++ b/test/DebugInfo/X86/dbg-value-const-byref.ll
@@ -47,7 +47,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
; Function Attrs: nounwind ssp uwtable
-define i32 @foo() #0 {
+define i32 @foo() #0 !dbg !4 {
entry:
%i = alloca i32, align 4
call void @llvm.dbg.value(metadata i32 3, i64 0, metadata !10, metadata !DIExpression()), !dbg !15
@@ -56,7 +56,7 @@ entry:
%call1 = call i32 (...) @f1() #3, !dbg !19
call void @llvm.dbg.value(metadata i32 %call1, i64 0, metadata !10, metadata !DIExpression()), !dbg !19
store i32 %call1, i32* %i, align 4, !dbg !19, !tbaa !20
- call void @llvm.dbg.value(metadata i32* %i, i64 0, metadata !10, metadata !DIExpression()), !dbg !24
+ call void @llvm.dbg.value(metadata i32* %i, i64 0, metadata !10, metadata !DIExpression(DW_OP_deref)), !dbg !24
call void @f2(i32* %i) #3, !dbg !24
ret i32 0, !dbg !25
}
@@ -78,17 +78,17 @@ attributes #3 = { nounwind }
!llvm.module.flags = !{!11, !12}
!llvm.ident = !{!13}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "dbg-value-const-byref.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 ()* @foo, variables: !9)
+!4 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !9)
!5 = !DIFile(filename: "dbg-value-const-byref.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 6, scope: !4, file: !5, type: !8)
+!10 = !DILocalVariable(name: "i", line: 6, scope: !4, file: !5, type: !8)
!11 = !{i32 2, !"Dwarf Version", i32 2}
!12 = !{i32 1, !"Debug Info Version", i32 3}
!13 = !{!"clang version 3.5.0 "}
diff --git a/test/DebugInfo/X86/dbg-value-dag-combine.ll b/test/DebugInfo/X86/dbg-value-dag-combine.ll
index 4331c6591369..6243be8aa4a6 100644
--- a/test/DebugInfo/X86/dbg-value-dag-combine.ll
+++ b/test/DebugInfo/X86/dbg-value-dag-combine.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-darwin10.0.0"
declare <4 x i32> @__amdil_get_global_id_int()
declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
-define void @__OpenCL_test_kernel(i32 addrspace(1)* %ip) nounwind {
+define void @__OpenCL_test_kernel(i32 addrspace(1)* %ip) nounwind !dbg !0 {
entry:
call void @llvm.dbg.value(metadata i32 addrspace(1)* %ip, i64 0, metadata !7, metadata !DIExpression()), !dbg !8
%0 = call <4 x i32> @__amdil_get_global_id_int() nounwind
@@ -15,7 +15,7 @@ entry:
call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !13, metadata !DIExpression()), !dbg !14
%tmp2 = load i32, i32 addrspace(1)* %ip, align 4, !dbg !15
%tmp3 = add i32 0, %tmp2, !dbg !15
-; CHECK: ##DEBUG_VALUE: idx <- E{{..$}}
+; CHECK: ##DEBUG_VALUE: idx <- %E{{..$}}
call void @llvm.dbg.value(metadata i32 %tmp3, i64 0, metadata !13, metadata !DIExpression()), !dbg !15
%arrayidx = getelementptr i32, i32 addrspace(1)* %ip, i32 %1, !dbg !16
store i32 %tmp3, i32 addrspace(1)* %arrayidx, align 4, !dbg !16
@@ -24,20 +24,20 @@ entry:
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!20}
-!0 = !DISubprogram(name: "__OpenCL_test_kernel", linkageName: "__OpenCL_test_kernel", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !19, scope: !1, type: !3, function: void (i32 addrspace(1)*)* @__OpenCL_test_kernel)
+!0 = distinct !DISubprogram(name: "__OpenCL_test_kernel", linkageName: "__OpenCL_test_kernel", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !19, scope: !1, type: !3)
!1 = !DIFile(filename: "OCL6368.tmp.cl", directory: "E:CUsersCmvillmow.AMDCAppDataCLocalCTemp")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "clc", isOptimized: false, emissionKind: 1, file: !19, enums: !12, retainedTypes: !12, subprograms: !18, imports: null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "clc", isOptimized: false, emissionKind: 1, file: !19, enums: !12, retainedTypes: !12, subprograms: !18, imports: null)
!3 = !DISubroutineType(types: !4)
!4 = !{null, !5}
!5 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !6)
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ip", line: 1, arg: 0, scope: !0, file: !1, type: !5)
+!7 = !DILocalVariable(name: "ip", line: 1, arg: 1, scope: !0, file: !1, type: !5)
!8 = !DILocation(line: 1, column: 42, scope: !0)
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "gid", line: 3, scope: !10, file: !1, type: !6)
+!9 = !DILocalVariable(name: "gid", line: 3, scope: !10, file: !1, type: !6)
!10 = distinct !DILexicalBlock(line: 2, column: 1, file: !19, scope: !0)
!11 = !DILocation(line: 3, column: 41, scope: !10)
!12 = !{}
-!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "idx", line: 4, scope: !10, file: !1, type: !6)
+!13 = !DILocalVariable(name: "idx", line: 4, scope: !10, file: !1, type: !6)
!14 = !DILocation(line: 4, column: 20, scope: !10)
!15 = !DILocation(line: 5, column: 15, scope: !10)
!16 = !DILocation(line: 6, column: 18, scope: !10)
diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
index 3fa5cc5a98ab..7ec21f8cf7cb 100644
--- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
+++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
@@ -43,7 +43,7 @@
@p = common global %struct.S1 zeroinitializer, align 8
-define i32 @foo(%struct.S1* nocapture %sp, i32 %nums) nounwind optsize ssp {
+define i32 @foo(%struct.S1* nocapture %sp, i32 %nums) nounwind optsize ssp !dbg !0 {
entry:
tail call void @llvm.dbg.value(metadata %struct.S1* %sp, i64 0, metadata !9, metadata !DIExpression()), !dbg !20
tail call void @llvm.dbg.value(metadata i32 %nums, i64 0, metadata !18, metadata !DIExpression()), !dbg !21
@@ -59,7 +59,7 @@ entry:
declare float* @bar(i32) optsize
-define void @foobar() nounwind optsize ssp {
+define void @foobar() nounwind optsize ssp !dbg !6 {
entry:
tail call void @llvm.dbg.value(metadata %struct.S1* @p, i64 0, metadata !9, metadata !DIExpression()) nounwind, !dbg !31
tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !18, metadata !DIExpression()) nounwind, !dbg !35
@@ -74,16 +74,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!43}
-!0 = !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !1, scope: !1, type: !3, function: i32 (%struct.S1*, i32)* @foo, variables: !41)
+!0 = distinct !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !1, scope: !1, type: !3, variables: !41)
!1 = !DIFile(filename: "nm2.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125693)", isOptimized: true, emissionKind: 1, file: !42, enums: !{}, retainedTypes: !{}, subprograms: !39, globals: !40, imports: !44)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125693)", isOptimized: true, emissionKind: 1, file: !42, enums: !{}, retainedTypes: !{}, subprograms: !39, globals: !40, imports: !44)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "foobar", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: !1, type: !7, function: void ()* @foobar)
+!6 = distinct !DISubprogram(name: "foobar", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: !1, type: !7)
!7 = !DISubroutineType(types: !8)
!8 = !{null}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sp", line: 7, arg: 1, scope: !0, file: !1, type: !10)
+!9 = !DILocalVariable(name: "sp", line: 7, arg: 1, scope: !0, file: !1, type: !10)
!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !11)
!11 = !DIDerivedType(tag: DW_TAG_typedef, name: "S1", line: 4, file: !42, scope: !2, baseType: !12)
!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "S1", line: 1, size: 128, align: 64, file: !42, scope: !2, elements: !13)
@@ -92,7 +92,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !16)
!16 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
!17 = !DIDerivedType(tag: DW_TAG_member, name: "nums", line: 3, size: 32, align: 32, offset: 64, file: !42, scope: !1, baseType: !5)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "nums", line: 7, arg: 2, scope: !0, file: !1, type: !5)
+!18 = !DILocalVariable(name: "nums", line: 7, arg: 2, scope: !0, file: !1, type: !5)
!19 = !DIGlobalVariable(name: "p", line: 14, isLocal: false, isDefinition: true, scope: !2, file: !1, type: !11, variable: %struct.S1* @p)
!20 = !DILocation(line: 7, column: 13, scope: !0)
!21 = !DILocation(line: 7, column: 21, scope: !0)
diff --git a/test/DebugInfo/X86/dbg-value-isel.ll b/test/DebugInfo/X86/dbg-value-isel.ll
index 04e4531d1d72..ae76beb81409 100644
--- a/test/DebugInfo/X86/dbg-value-isel.ll
+++ b/test/DebugInfo/X86/dbg-value-isel.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-apple-darwin10.0.0"
@lvgv = internal constant [0 x i8*] zeroinitializer
@llvm.global.annotations = appending global [1 x %0] [%0 { i8* bitcast (void (i32 addrspace(1)*)* @__OpenCL_nbt02_kernel to i8*), i8* addrspacecast ([1 x i8] addrspace(2)* @sgv to i8*), i8* addrspacecast ([1 x i8] addrspace(2)* @fgv to i8*), i8* bitcast ([0 x i8*]* @lvgv to i8*), i32 0 }], section "llvm.metadata"
-define void @__OpenCL_nbt02_kernel(i32 addrspace(1)* %ip) nounwind {
+define void @__OpenCL_nbt02_kernel(i32 addrspace(1)* %ip) nounwind !dbg !0 {
entry:
call void @llvm.dbg.value(metadata i32 addrspace(1)* %ip, i64 0, metadata !8, metadata !DIExpression()), !dbg !9
%0 = call <4 x i32> @__amdil_get_local_id_int() nounwind
@@ -81,22 +81,22 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!22}
-!0 = !DISubprogram(name: "__OpenCL_nbt02_kernel", linkageName: "__OpenCL_nbt02_kernel", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !20, scope: !1, type: !3, function: void (i32 addrspace(1)*)* @__OpenCL_nbt02_kernel)
+!0 = distinct !DISubprogram(name: "__OpenCL_nbt02_kernel", linkageName: "__OpenCL_nbt02_kernel", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !20, scope: !1, type: !3)
!1 = !DIFile(filename: "OCLlLwTXZ.cl", directory: "/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "clc", isOptimized: false, emissionKind: 1, file: !20, enums: !21, retainedTypes: !21, subprograms: !19, imports: null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "clc", isOptimized: false, emissionKind: 1, file: !20, enums: !21, retainedTypes: !21, subprograms: !19, imports: null)
!3 = !DISubroutineType(types: !4)
!4 = !{null, !5}
!5 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !6)
!6 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint", file: !20, scope: !2, baseType: !7)
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ip", line: 1, arg: 0, scope: !0, file: !1, type: !5)
+!8 = !DILocalVariable(name: "ip", line: 1, arg: 1, scope: !0, file: !1, type: !5)
!9 = !DILocation(line: 1, column: 32, scope: !0)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tid", line: 3, scope: !11, file: !1, type: !6)
+!10 = !DILocalVariable(name: "tid", line: 3, scope: !11, file: !1, type: !6)
!11 = distinct !DILexicalBlock(line: 2, column: 1, file: !1, scope: !0)
!12 = !DILocation(line: 5, column: 24, scope: !11)
-!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "gid", line: 3, scope: !11, file: !1, type: !6)
+!13 = !DILocalVariable(name: "gid", line: 3, scope: !11, file: !1, type: !6)
!14 = !DILocation(line: 6, column: 25, scope: !11)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "lsz", line: 3, scope: !11, file: !1, type: !6)
+!15 = !DILocalVariable(name: "lsz", line: 3, scope: !11, file: !1, type: !6)
!16 = !DILocation(line: 7, column: 26, scope: !11)
!17 = !DILocation(line: 9, column: 24, scope: !11)
!18 = !DILocation(line: 10, column: 1, scope: !0)
diff --git a/test/DebugInfo/X86/dbg-value-location.ll b/test/DebugInfo/X86/dbg-value-location.ll
index 219fa7a5ff34..df9b4085bf37 100644
--- a/test/DebugInfo/X86/dbg-value-location.ll
+++ b/test/DebugInfo/X86/dbg-value-location.ll
@@ -16,7 +16,7 @@ target triple = "x86_64-apple-darwin10.0.0"
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-define i32 @foo(i32 %dev, i64 %cmd, i8* %data, i32 %data2) nounwind optsize ssp {
+define i32 @foo(i32 %dev, i64 %cmd, i8* %data, i32 %data2) nounwind optsize ssp !dbg !0 {
entry:
call void @llvm.dbg.value(metadata i32 %dev, i64 0, metadata !12, metadata !DIExpression()), !dbg !13
%tmp.i = load i32, i32* @dfm, align 4, !dbg !14
@@ -50,23 +50,23 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!29}
-!0 = !DISubprogram(name: "foo", line: 19510, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 19510, file: !26, scope: !1, type: !3, function: i32 (i32, i64, i8*, i32)* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 19510, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 19510, file: !26, scope: !1, type: !3)
!1 = !DIFile(filename: "/tmp/f.c", directory: "/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 124753)", isOptimized: true, emissionKind: 0, file: !27, enums: !28, retainedTypes: !28, subprograms: !24, imports: null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 124753)", isOptimized: true, emissionKind: 0, file: !27, enums: !28, retainedTypes: !28, subprograms: !24, imports: null)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "bar3", line: 14827, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !3, function: i32 (i32)* @bar3)
-!7 = !DISubprogram(name: "bar2", line: 15397, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !3, function: i32 (i32)* @bar2)
-!8 = !DISubprogram(name: "bar", line: 12382, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !9, function: i32 (i32, i32*)* @bar)
+!6 = distinct !DISubprogram(name: "bar3", line: 14827, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !3)
+!7 = distinct !DISubprogram(name: "bar2", line: 15397, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !3)
+!8 = distinct !DISubprogram(name: "bar", line: 12382, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !9)
!9 = !DISubroutineType(types: !10)
!10 = !{!11}
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "var", line: 19509, arg: 0, scope: !0, file: !1, type: !5)
+!12 = !DILocalVariable(name: "var", line: 19509, arg: 1, scope: !0, file: !1, type: !5)
!13 = !DILocation(line: 19509, column: 20, scope: !0)
!14 = !DILocation(line: 18091, column: 2, scope: !15, inlinedAt: !17)
!15 = distinct !DILexicalBlock(line: 18086, column: 1, file: !26, scope: !16)
-!16 = !DISubprogram(name: "foo_bar", line: 18086, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !3)
+!16 = distinct !DISubprogram(name: "foo_bar", line: 18086, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !3)
!17 = !DILocation(line: 19514, column: 2, scope: !18)
!18 = distinct !DILexicalBlock(line: 19510, column: 1, file: !26, scope: !0)
!22 = !DILocation(line: 18094, column: 2, scope: !15, inlinedAt: !17)
diff --git a/test/DebugInfo/X86/dbg-value-range.ll b/test/DebugInfo/X86/dbg-value-range.ll
index a8fa9073fa77..2bb5e021036f 100644
--- a/test/DebugInfo/X86/dbg-value-range.ll
+++ b/test/DebugInfo/X86/dbg-value-range.ll
@@ -2,7 +2,7 @@
%struct.a = type { i32 }
-define i32 @bar(%struct.a* nocapture %b) nounwind ssp {
+define i32 @bar(%struct.a* nocapture %b) nounwind ssp !dbg !0 {
entry:
tail call void @llvm.dbg.value(metadata %struct.a* %b, i64 0, metadata !6, metadata !DIExpression()), !dbg !13
%tmp1 = getelementptr inbounds %struct.a, %struct.a* %b, i64 0, i32 0, !dbg !14
@@ -20,18 +20,18 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!24}
-!0 = !DISubprogram(name: "bar", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !22, scope: !1, type: !3, function: i32 (%struct.a*)* @bar, variables: !21)
+!0 = distinct !DISubprogram(name: "bar", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !22, scope: !1, type: !3, variables: !21)
!1 = !DIFile(filename: "bar.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 122997)", isOptimized: true, emissionKind: 1, file: !22, enums: !23, retainedTypes: !23, subprograms: !20, imports: null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 122997)", isOptimized: true, emissionKind: 1, file: !22, enums: !23, retainedTypes: !23, subprograms: !20, imports: null)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 0, scope: !0, file: !1, type: !7)
+!6 = !DILocalVariable(name: "b", line: 5, arg: 1, scope: !0, file: !1, type: !7)
!7 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !8)
!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "a", line: 1, size: 32, align: 32, file: !22, scope: !2, elements: !9)
!9 = !{!10}
!10 = !DIDerivedType(tag: DW_TAG_member, name: "c", line: 2, size: 32, align: 32, file: !22, scope: !1, baseType: !5)
-!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 6, scope: !12, file: !1, type: !5)
+!11 = !DILocalVariable(name: "x", line: 6, scope: !12, file: !1, type: !5)
!12 = distinct !DILexicalBlock(line: 5, column: 22, file: !22, scope: !0)
!13 = !DILocation(line: 5, column: 19, scope: !0)
!14 = !DILocation(line: 6, column: 14, scope: !12)
diff --git a/test/DebugInfo/X86/dbg-value-terminator.ll b/test/DebugInfo/X86/dbg-value-terminator.ll
index 5f50b13aa024..8b8bdec18ab1 100644
--- a/test/DebugInfo/X86/dbg-value-terminator.ll
+++ b/test/DebugInfo/X86/dbg-value-terminator.ll
@@ -9,7 +9,7 @@
; CHECK: ##DEBUG_VALUE: foo:i
%a = type { i32, i32 }
-define hidden fastcc %a* @test() #1 {
+define hidden fastcc %a* @test() #1 !dbg !1 {
entry:
%0 = icmp eq %a* undef, null, !dbg !12
br i1 %0, label %"14", label %return, !dbg !12
@@ -113,17 +113,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!22}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version", isOptimized: true, emissionKind: 1, file: !20, enums: !21, retainedTypes: !21, subprograms: !18, imports: null)
-!1 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !20, scope: !2, type: !3, function: %a* ()* @test, variables: !19)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version", isOptimized: true, emissionKind: 1, file: !20, enums: !21, retainedTypes: !21, subprograms: !18, imports: null)
+!1 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !20, scope: !2, type: !3, variables: !19)
!2 = !DIFile(filename: "a.c", directory: "/private/tmp")
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !5)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 2, arg: 2, scope: !1, file: !2, type: !8)
+!6 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !5)
+!7 = !DILocalVariable(name: "c", line: 2, arg: 2, scope: !1, file: !2, type: !8)
!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !0, baseType: !9)
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 3, scope: !11, file: !2, type: !9)
+!10 = !DILocalVariable(name: "a", line: 3, scope: !11, file: !2, type: !9)
!11 = distinct !DILexicalBlock(line: 2, column: 25, file: !20, scope: !1)
!12 = !DILocation(line: 2, column: 13, scope: !1)
!18 = !{!1}
diff --git a/test/DebugInfo/X86/dbg_value_direct.ll b/test/DebugInfo/X86/dbg_value_direct.ll
index 6446dfe491a6..b7f1ec8ccaa3 100644
--- a/test/DebugInfo/X86/dbg_value_direct.ll
+++ b/test/DebugInfo/X86/dbg_value_direct.ll
@@ -23,7 +23,7 @@ target triple = "x86_64-unknown-linux-gnu"
@__asan_gen_ = private unnamed_addr constant [16 x i8] c"1 32 4 5 .addr \00", align 1
; Function Attrs: sanitize_address uwtable
-define void @_Z4funci(%struct.A* noalias sret %agg.result, i32) #0 "stack-protector-buffer-size"="1" {
+define void @_Z4funci(%struct.A* noalias sret %agg.result, i32) #0 "stack-protector-buffer-size"="1" !dbg !4 {
entry:
%MyAlloca = alloca [96 x i8], align 32
%1 = ptrtoint [96 x i8]* %MyAlloca to i64
@@ -147,11 +147,11 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!22, !27}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "crash.cpp", directory: "/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "func", linkageName: "_Z4funci", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, function: void (%struct.A*, i32)* @_Z4funci, variables: !2)
+!4 = distinct !DISubprogram(name: "func", linkageName: "_Z4funci", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "crash.cpp", directory: "/tmp")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !21}
@@ -168,8 +168,8 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!19 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !8)
!21 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!22 = !{i32 2, !"Dwarf Version", i32 3}
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 6, arg: 1, scope: !4, file: !5, type: !21)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 7, scope: !4, file: !5, type: !8)
+!23 = !DILocalVariable(name: "", line: 6, arg: 1, scope: !4, file: !5, type: !21)
+!24 = !DILocalVariable(name: "a", line: 7, scope: !4, file: !5, type: !8)
!25 = !DILocation(line: 7, scope: !4)
!26 = !DILocation(line: 8, scope: !4)
!27 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/debug-dead-local-var.ll b/test/DebugInfo/X86/debug-dead-local-var.ll
index d56d17d06e61..9a2872aeb0a8 100644
--- a/test/DebugInfo/X86/debug-dead-local-var.ll
+++ b/test/DebugInfo/X86/debug-dead-local-var.ll
@@ -16,7 +16,7 @@
; CHECK: DW_TAG_structure_type
; Function Attrs: nounwind readnone uwtable
-define i32 @bar() #0 {
+define i32 @bar() #0 !dbg !4 {
entry:
ret i32 1, !dbg !21
}
@@ -27,20 +27,20 @@ attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-fra
!llvm.module.flags = !{!18, !19}
!llvm.ident = !{!20}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 209255) (llvm/trunk 209253)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 209255) (llvm/trunk 209253)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "debug-dead-local-var.c", directory: "/usr/local/google/home/echristo")
!2 = !{}
!3 = !{!4, !9}
-!4 = !DISubprogram(name: "bar", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 11, file: !1, scope: !5, type: !6, function: i32 ()* @bar, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 11, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "debug-dead-local-var.c", directory: "/usr/local/google/home/echristo")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "foo", line: 6, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !10, variables: !12)
+!9 = distinct !DISubprogram(name: "foo", line: 6, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !10, variables: !12)
!10 = !DISubroutineType(types: !11)
!11 = !{null}
!12 = !{!13}
-!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 8, scope: !9, file: !5, type: !14)
+!13 = !DILocalVariable(name: "xyz", line: 8, scope: !9, file: !5, type: !14)
!14 = !DICompositeType(tag: DW_TAG_structure_type, name: "X", line: 8, size: 64, align: 32, file: !1, scope: !9, elements: !15)
!15 = !{!16, !17}
!16 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 8, size: 32, align: 32, file: !1, scope: !14, baseType: !8)
diff --git a/test/DebugInfo/X86/debug-info-access.ll b/test/DebugInfo/X86/debug-info-access.ll
index 21da7b2db1bb..c9a50395c716 100644
--- a/test/DebugInfo/X86/debug-info-access.ll
+++ b/test/DebugInfo/X86/debug-info-access.ll
@@ -96,7 +96,7 @@ target triple = "x86_64-apple-macosx10.10.0"
@u = global %union.U zeroinitializer, align 4
; Function Attrs: nounwind ssp uwtable
-define void @_Z4freev() #0 {
+define void @_Z4freev() #0 !dbg !30 {
ret void, !dbg !41
}
@@ -106,7 +106,7 @@ attributes #0 = { nounwind ssp uwtable }
!llvm.module.flags = !{!38, !39}
!llvm.ident = !{!40}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !29, globals: !34, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !29, globals: !34, imports: !2)
!1 = !DIFile(filename: "/llvm/tools/clang/test/CodeGenCXX/debug-info-access.cpp", directory: "")
!2 = !{}
!3 = !{!4, !12, !22}
@@ -136,7 +136,7 @@ attributes #0 = { nounwind ssp uwtable }
!27 = !{null, !28}
!28 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1U")
!29 = !{!30}
-!30 = !DISubprogram(name: "free", linkageName: "_Z4freev", line: 35, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 35, file: !1, scope: !31, type: !32, function: void ()* @_Z4freev, variables: !2)
+!30 = distinct !DISubprogram(name: "free", linkageName: "_Z4freev", line: 35, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 35, file: !1, scope: !31, type: !32, variables: !2)
!31 = !DIFile(filename: "/llvm/tools/clang/test/CodeGenCXX/debug-info-access.cpp", directory: "")
!32 = !DISubroutineType(types: !33)
!33 = !{null}
diff --git a/test/DebugInfo/X86/debug-info-block-captured-self.ll b/test/DebugInfo/X86/debug-info-block-captured-self.ll
index e79b093d031a..1c3dc24b3240 100644
--- a/test/DebugInfo/X86/debug-info-block-captured-self.ll
+++ b/test/DebugInfo/X86/debug-info-block-captured-self.ll
@@ -64,14 +64,14 @@
%0 = type opaque
%struct.__block_descriptor = type { i64, i64 }
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-define internal void @"__24-[Main initWithContext:]_block_invoke"(i8* %.block_descriptor, i8* %obj) #0 {
+define internal void @"__24-[Main initWithContext:]_block_invoke"(i8* %.block_descriptor, i8* %obj) #0 !dbg !38 {
%block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !84
%block.captured-self = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i32 0, i32 5, !dbg !84
call void @llvm.dbg.declare(metadata <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, metadata !86, metadata !110), !dbg !87
ret void, !dbg !87
}
-define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_descriptor, i8* %object) #0 {
+define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_descriptor, i8* %object) #0 !dbg !42 {
%block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103
%block.captured-self = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i32 0, i32 5, !dbg !103
call void @llvm.dbg.declare(metadata <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, metadata !105, metadata !109), !dbg !106
@@ -80,7 +80,7 @@ define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!108}
-!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !107, enums: !2, retainedTypes: !4, subprograms: !23, globals: !15, imports: !15)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !107, enums: !2, retainedTypes: !4, subprograms: !23, globals: !15, imports: !15)
!1 = !DIFile(filename: "llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m", directory: "")
!2 = !{!3}
!3 = !DICompositeType(tag: DW_TAG_enumeration_type, line: 20, size: 32, align: 32, file: !107, elements: !4)
@@ -95,16 +95,16 @@ define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_
!32 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, baseType: !33)
!33 = !DICompositeType(tag: DW_TAG_structure_type, name: "objc_class", flags: DIFlagFwdDecl, file: !107)
!34 = !DICompositeType(tag: DW_TAG_structure_type, name: "Main", line: 23, flags: DIFlagArtificial | DIFlagObjectPointer, runtimeLang: DW_LANG_ObjC, file: !107)
-!38 = !DISubprogram(name: "__24-[Main initWithContext:]_block_invoke", line: 33, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 33, file: !1, scope: !1, type: !39, function: void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke", variables: !15)
+!38 = distinct !DISubprogram(name: "__24-[Main initWithContext:]_block_invoke", line: 33, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 33, file: !1, scope: !1, type: !39, variables: !15)
!39 = !DISubroutineType(types: !40)
!40 = !{null, !41, !27}
!41 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
-!42 = !DISubprogram(name: "__24-[Main initWithContext:]_block_invoke_2", line: 35, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 35, file: !1, scope: !1, type: !39, function: void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke_2", variables: !15)
+!42 = distinct !DISubprogram(name: "__24-[Main initWithContext:]_block_invoke_2", line: 35, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 35, file: !1, scope: !1, type: !39, variables: !15)
!84 = !DILocation(line: 33, scope: !38)
-!86 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "self", line: 41, scope: !38, file: !1, type: !34)
+!86 = !DILocalVariable(name: "self", line: 41, scope: !38, file: !1, type: !34)
!87 = !DILocation(line: 41, scope: !38)
!103 = !DILocation(line: 35, scope: !42)
-!105 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "self", line: 40, scope: !42, file: !1, type: !34)
+!105 = !DILocalVariable(name: "self", line: 40, scope: !42, file: !1, type: !34)
!106 = !DILocation(line: 40, scope: !42)
!107 = !DIFile(filename: "llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m", directory: "")
!108 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/debug-info-blocks.ll b/test/DebugInfo/X86/debug-info-blocks.ll
index a9aadfdb7313..8a929491f28d 100644
--- a/test/DebugInfo/X86/debug-info-blocks.ll
+++ b/test/DebugInfo/X86/debug-info-blocks.ll
@@ -95,7 +95,7 @@ target triple = "x86_64-apple-darwin"
@"\01L_OBJC_LABEL_CLASS_$" = internal global [1 x i8*] [i8* bitcast (%struct._class_t* @"OBJC_CLASS_$_A" to i8*)], section "__DATA, __objc_classlist, regular, no_dead_strip", align 8
@llvm.used = appending global [14 x i8*] [i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_SUP_REFS_$_" to i8*), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_" to i8*), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @"\01L_OBJC_METH_VAR_NAME_2", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([8 x i8], [8 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_A" to i8*), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @"\01L_OBJC_METH_VAR_TYPE_4", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._ivar_t] }* @"\01l_OBJC_$_INSTANCE_VARIABLES_A" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_5" to i8*), i8* bitcast ([1 x i8*]* @"\01L_OBJC_LABEL_CLASS_$" to i8*)], section "llvm.metadata"
-define internal i8* @"\01-[A init]"(%0* %self, i8* %_cmd) #0 {
+define internal i8* @"\01-[A init]"(%0* %self, i8* %_cmd) #0 !dbg !13 {
%1 = alloca %0*, align 8
%2 = alloca i8*, align 8
%3 = alloca %struct._objc_super
@@ -147,7 +147,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare i8* @objc_msgSendSuper2(%struct._objc_super*, i8*, ...)
-define internal void @run(void ()* %block) #0 {
+define internal void @run(void ()* %block) #0 !dbg !39 {
%1 = alloca void ()*, align 8
store void ()* %block, void ()** %1, align 8
call void @llvm.dbg.declare(metadata void ()** %1, metadata !72, metadata !DIExpression()), !dbg !73
@@ -161,7 +161,7 @@ define internal void @run(void ()* %block) #0 {
ret void, !dbg !75
}
-define internal void @"__9-[A init]_block_invoke"(i8* %.block_descriptor) #0 {
+define internal void @"__9-[A init]_block_invoke"(i8* %.block_descriptor) #0 !dbg !27 {
%1 = alloca i8*, align 8
%2 = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, align 8
%d = alloca %1*, align 8
@@ -206,7 +206,7 @@ declare i8* @objc_msgSend_fixup(i8*, %struct._message_ref_t*, ...)
declare i8* @objc_msgSend(i8*, i8*, ...) #2
-define internal void @__copy_helper_block_(i8*, i8*) {
+define internal void @__copy_helper_block_(i8*, i8*) !dbg !31 {
%3 = alloca i8*, align 8
%4 = alloca i8*, align 8
store i8* %0, i8** %3, align 8
@@ -228,7 +228,7 @@ define internal void @__copy_helper_block_(i8*, i8*) {
declare void @_Block_object_assign(i8*, i8*, i32)
-define internal void @__destroy_helper_block_(i8*) {
+define internal void @__destroy_helper_block_(i8*) !dbg !35 {
%2 = alloca i8*, align 8
store i8* %0, i8** %2, align 8
call void @llvm.dbg.declare(metadata i8** %2, metadata !105, metadata !DIExpression()), !dbg !106
@@ -243,7 +243,7 @@ define internal void @__destroy_helper_block_(i8*) {
declare void @_Block_object_dispose(i8*, i32)
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !36 {
%1 = alloca i32, align 4
%a = alloca %0*, align 8
store i32 0, i32* %1
@@ -270,7 +270,7 @@ attributes #3 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!56, !57, !58, !59, !110}
-!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !2, imports: !2)
!1 = !DIFile(filename: "llvm/tools/clang/test/CodeGenObjC/<unknown>", directory: "llvm/_build.ninja.Debug")
!2 = !{}
!3 = !{!4}
@@ -283,7 +283,7 @@ attributes #3 = { nounwind }
!10 = !DIDerivedType(tag: DW_TAG_member, name: "ivar", line: 35, size: 32, align: 32, file: !5, scope: !6, baseType: !11)
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!12 = !{!13, !27, !31, !35, !36, !39}
-!13 = !DISubprogram(name: "-[A init]", line: 46, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 46, file: !5, scope: !6, type: !14, function: i8* (%0*, i8*)* @"\01-[A init]", variables: !2)
+!13 = distinct !DISubprogram(name: "-[A init]", line: 46, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 46, file: !5, scope: !6, type: !14, variables: !2)
!14 = !DISubroutineType(types: !15)
!15 = !{!16, !23, !24}
!16 = !DIDerivedType(tag: DW_TAG_typedef, name: "id", line: 46, file: !5, baseType: !17)
@@ -297,19 +297,19 @@ attributes #3 = { nounwind }
!24 = !DIDerivedType(tag: DW_TAG_typedef, name: "SEL", line: 46, flags: DIFlagArtificial, file: !5, baseType: !25)
!25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !26)
!26 = !DICompositeType(tag: DW_TAG_structure_type, name: "objc_selector", flags: DIFlagFwdDecl, file: !1)
-!27 = !DISubprogram(name: "__9-[A init]_block_invoke", line: 49, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 49, file: !5, scope: !6, type: !28, function: void (i8*)* @"__9-[A init]_block_invoke", variables: !2)
+!27 = distinct !DISubprogram(name: "__9-[A init]_block_invoke", line: 49, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 49, file: !5, scope: !6, type: !28, variables: !2)
!28 = !DISubroutineType(types: !29)
!29 = !{null, !30}
!30 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
-!31 = !DISubprogram(name: "__copy_helper_block_", line: 52, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 52, file: !1, scope: !32, type: !33, function: void (i8*, i8*)* @__copy_helper_block_, variables: !2)
+!31 = distinct !DISubprogram(name: "__copy_helper_block_", line: 52, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 52, file: !1, scope: !32, type: !33, variables: !2)
!32 = !DIFile(filename: "llvm/tools/clang/test/CodeGenObjC/<unknown>", directory: "llvm/_build.ninja.Debug")
!33 = !DISubroutineType(types: !34)
!34 = !{null, !30, !30}
-!35 = !DISubprogram(name: "__destroy_helper_block_", line: 52, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 52, file: !1, scope: !32, type: !28, function: void (i8*)* @__destroy_helper_block_, variables: !2)
-!36 = !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 60, file: !5, scope: !6, type: !37, function: i32 ()* @main, variables: !2)
+!35 = distinct !DISubprogram(name: "__destroy_helper_block_", line: 52, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 52, file: !1, scope: !32, type: !28, variables: !2)
+!36 = distinct !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 60, file: !5, scope: !6, type: !37, variables: !2)
!37 = !DISubroutineType(types: !38)
!38 = !{!11}
-!39 = !DISubprogram(name: "run", line: 39, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 40, file: !5, scope: !6, type: !40, function: void (void ()*)* @run, variables: !2)
+!39 = distinct !DISubprogram(name: "run", line: 39, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 40, file: !5, scope: !6, type: !40, variables: !2)
!40 = !DISubroutineType(types: !41)
!41 = !{null, !42}
!42 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, baseType: !43)
@@ -330,10 +330,10 @@ attributes #3 = { nounwind }
!57 = !{i32 1, !"Objective-C Image Info Version", i32 0}
!58 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
!59 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
-!60 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "self", line: 46, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !13, file: !32, type: !61)
+!60 = !DILocalVariable(name: "self", line: 46, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !13, file: !32, type: !61)
!61 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !4)
!62 = !DILocation(line: 46, scope: !13)
-!63 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "_cmd", line: 46, arg: 2, flags: DIFlagArtificial, scope: !13, file: !32, type: !64)
+!63 = !DILocalVariable(name: "_cmd", line: 46, arg: 2, flags: DIFlagArtificial, scope: !13, file: !32, type: !64)
!64 = !DIDerivedType(tag: DW_TAG_typedef, name: "SEL", line: 46, file: !5, baseType: !25)
!65 = !DILocation(line: 48, scope: !66)
!66 = distinct !DILexicalBlock(line: 47, column: 0, file: !5, scope: !13)
@@ -342,11 +342,11 @@ attributes #3 = { nounwind }
!69 = distinct !DILexicalBlock(line: 48, column: 0, file: !5, scope: !66)
!70 = !DILocation(line: 53, scope: !69)
!71 = !DILocation(line: 54, scope: !66)
-!72 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "block", line: 39, arg: 1, scope: !39, file: !6, type: !42)
+!72 = !DILocalVariable(name: "block", line: 39, arg: 1, scope: !39, file: !6, type: !42)
!73 = !DILocation(line: 39, scope: !39)
!74 = !DILocation(line: 41, scope: !39)
!75 = !DILocation(line: 42, scope: !39)
-!76 = !DILocalVariable(tag: DW_TAG_arg_variable, name: ".block_descriptor", line: 49, arg: 1, flags: DIFlagArtificial, scope: !27, file: !6, type: !77)
+!76 = !DILocalVariable(name: ".block_descriptor", line: 49, arg: 1, flags: DIFlagArtificial, scope: !27, file: !6, type: !77)
!77 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, baseType: !78)
!78 = !DICompositeType(tag: DW_TAG_structure_type, name: "__block_literal_1", line: 49, size: 320, align: 64, file: !5, scope: !6, elements: !79)
!79 = !{!80, !81, !82, !83, !84, !87}
@@ -359,9 +359,9 @@ attributes #3 = { nounwind }
!86 = !DICompositeType(tag: DW_TAG_structure_type, name: "__block_descriptor_withcopydispose", line: 49, flags: DIFlagFwdDecl, file: !1)
!87 = !DIDerivedType(tag: DW_TAG_member, name: "self", line: 49, size: 64, align: 64, offset: 256, file: !5, scope: !6, baseType: !61)
!88 = !DILocation(line: 49, scope: !27)
-!89 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "self", line: 52, scope: !27, file: !32, type: !23)
+!89 = !DILocalVariable(name: "self", line: 52, scope: !27, file: !32, type: !23)
!90 = !DILocation(line: 52, scope: !27)
-!91 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 50, scope: !92, file: !6, type: !93)
+!91 = !DILocalVariable(name: "d", line: 50, scope: !92, file: !6, type: !93)
!92 = distinct !DILexicalBlock(line: 49, column: 0, file: !5, scope: !27)
!93 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !94)
!94 = !DICompositeType(tag: DW_TAG_structure_type, name: "NSMutableDictionary", line: 30, align: 8, runtimeLang: DW_LANG_ObjC, file: !5, scope: !6, elements: !95)
@@ -372,12 +372,12 @@ attributes #3 = { nounwind }
!99 = !DIDerivedType(tag: DW_TAG_inheritance, scope: !97, baseType: !9)
!100 = !DILocation(line: 50, scope: !92)
!101 = !DILocation(line: 51, scope: !92)
-!102 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 52, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, file: !32, type: !30)
+!102 = !DILocalVariable(name: "", line: 52, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, file: !32, type: !30)
!103 = !DILocation(line: 52, scope: !31)
-!104 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 52, arg: 2, flags: DIFlagArtificial, scope: !31, file: !32, type: !30)
-!105 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 52, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, file: !32, type: !30)
+!104 = !DILocalVariable(name: "", line: 52, arg: 2, flags: DIFlagArtificial, scope: !31, file: !32, type: !30)
+!105 = !DILocalVariable(name: "", line: 52, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, file: !32, type: !30)
!106 = !DILocation(line: 52, scope: !35)
-!107 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 61, scope: !36, file: !6, type: !61)
+!107 = !DILocalVariable(name: "a", line: 61, scope: !36, file: !6, type: !61)
!108 = !DILocation(line: 61, scope: !36)
!109 = !DILocation(line: 62, scope: !36)
!110 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/debug-info-packed-struct.ll b/test/DebugInfo/X86/debug-info-packed-struct.ll
index 6829c2d13740..50b68a714ea8 100644
--- a/test/DebugInfo/X86/debug-info-packed-struct.ll
+++ b/test/DebugInfo/X86/debug-info-packed-struct.ll
@@ -148,7 +148,7 @@ target triple = "x86_64-apple-darwin"
!llvm.module.flags = !{!45, !46}
!llvm.ident = !{!47}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240791) (llvm/trunk 240790)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240791) (llvm/trunk 240790)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
!1 = !DIFile(filename: "/llvm/tools/clang/test/CodeGen/<stdin>", directory: "/llvm/_build.ninja.release")
!2 = !{}
!3 = !{!4, !18, !25, !35}
diff --git a/test/DebugInfo/X86/debug-info-static-member.ll b/test/DebugInfo/X86/debug-info-static-member.ll
index 1dc85b954876..a42279b0a4af 100644
--- a/test/DebugInfo/X86/debug-info-static-member.ll
+++ b/test/DebugInfo/X86/debug-info-static-member.ll
@@ -42,7 +42,7 @@
@_ZN1C1bE = global i32 2, align 4
@_ZN1C1cE = global i32 1, align 4
-define i32 @main() nounwind uwtable {
+define i32 @main() nounwind uwtable !dbg !5 {
entry:
%retval = alloca i32, align 4
%instance_C = alloca %class.C, align 4
@@ -59,10 +59,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!34}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 171914)", isOptimized: false, emissionKind: 0, file: !33, enums: !1, retainedTypes: !1, subprograms: !3, globals: !10, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 171914)", isOptimized: false, emissionKind: 0, file: !33, enums: !1, retainedTypes: !1, subprograms: !3, globals: !10, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !33, scope: !6, type: !7, function: i32 ()* @main, variables: !1)
+!5 = distinct !DISubprogram(name: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !33, scope: !6, type: !7, variables: !1)
!6 = !DIFile(filename: "/usr/local/google/home/blaikie/Development/llvm/src/tools/clang/test/CodeGenCXX/debug-info-static-member.cpp", directory: "/home/blaikie/local/Development/llvm/build/clang/x86-64/Debug/llvm")
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
@@ -85,7 +85,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!26 = !DIDerivedType(tag: DW_TAG_member, name: "d", line: 11, size: 32, align: 32, flags: DIFlagPublic, file: !33, scope: !13, baseType: !9)
!27 = !DIGlobalVariable(name: "b", linkageName: "_ZN1C1bE", line: 15, isLocal: false, isDefinition: true, scope: null, file: !6, type: !9, variable: i32* @_ZN1C1bE, declaration: !19)
!28 = !DIGlobalVariable(name: "c", linkageName: "_ZN1C1cE", line: 16, isLocal: false, isDefinition: true, scope: null, file: !6, type: !9, variable: i32* @_ZN1C1cE, declaration: !23)
-!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "instance_C", line: 20, scope: !5, file: !6, type: !13)
+!29 = !DILocalVariable(name: "instance_C", line: 20, scope: !5, file: !6, type: !13)
!30 = !DILocation(line: 20, scope: !5)
!31 = !DILocation(line: 21, scope: !5)
!32 = !DILocation(line: 22, scope: !5)
diff --git a/test/DebugInfo/X86/debug-loc-asan.ll b/test/DebugInfo/X86/debug-loc-asan.ll
index 68bef4904800..2e0135574496 100644
--- a/test/DebugInfo/X86/debug-loc-asan.ll
+++ b/test/DebugInfo/X86/debug-loc-asan.ll
@@ -7,15 +7,14 @@
; int bar(int y) {
; return y + 2;
; }
-; with "clang++ -S -emit-llvm -fsanitize=address -O0 -g test.cc"
+; with "clang++ -S -emit-llvm -mllvm -asan-skip-promotable-allocas=0 -fsanitize=address -O0 -g test.cc"
-; First, argument variable "y" resides in %rdi:
-; CHECK: DEBUG_VALUE: bar:y <- RDI
-
-; Then its address is stored in a location on a stack:
+; The address of the (potentially now malloc'ed) alloca ends up
+; in RDI, after which it is spilled to the stack. We record the
+; spill OFFSET on the stack for checking the debug info below.
+; CHECK: #DEBUG_VALUE: bar:y <- [%RDI+0]
; CHECK: movq %rdi, [[OFFSET:[0-9]+]](%rsp)
; CHECK-NEXT: [[START_LABEL:.Ltmp[0-9]+]]
-; CHECK-NEXT: DEBUG_VALUE: bar:y <- [RSP+[[OFFSET]]]
; This location should be valid until the end of the function.
; CHECK: .Ldebug_loc{{[0-9]+}}:
@@ -42,7 +41,7 @@ target triple = "x86_64-unknown-linux-gnu"
@__asan_gen_ = private unnamed_addr constant [16 x i8] c"1 32 4 6 y.addr\00", align 1
; Function Attrs: nounwind sanitize_address uwtable
-define i32 @_Z3bari(i32 %y) #0 {
+define i32 @_Z3bari(i32 %y) #0 !dbg !4 {
entry:
%MyAlloca = alloca [64 x i8], align 32
%0 = ptrtoint [64 x i8]* %MyAlloca to i64
@@ -165,11 +164,11 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (209308)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (209308)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.cc", directory: "/llvm_cmake_gcc")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "bar", linkageName: "_Z3bari", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @_Z3bari, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3bari", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "test.cc", directory: "/llvm_cmake_gcc")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
@@ -177,6 +176,6 @@ attributes #1 = { nounwind readnone }
!9 = !{i32 2, !"Dwarf Version", i32 4}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.5.0 (209308)"}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "y", line: 1, arg: 1, scope: !4, file: !5, type: !8)
!13 = !DILocation(line: 2, scope: !4)
!14 = !DIExpression(DW_OP_deref)
diff --git a/test/DebugInfo/X86/debug-loc-empty-entries.ll b/test/DebugInfo/X86/debug-loc-empty-entries.ll
index 3b997fd35e06..776bdbddfb02 100644
--- a/test/DebugInfo/X86/debug-loc-empty-entries.ll
+++ b/test/DebugInfo/X86/debug-loc-empty-entries.ll
@@ -24,7 +24,7 @@
;; }
; Function Attrs: noreturn nounwind readnone
-define void @_Z3fn1v() #0 {
+define void @_Z3fn1v() #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata float 1.000000e+00, i64 0, metadata !9, metadata !14), !dbg !15
br label %for.cond, !dbg !16
@@ -44,16 +44,16 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!11, !12}
!llvm.ident = !{!13}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 238517) (llvm/trunk 238524)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 238517) (llvm/trunk 238524)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "/Users/dexonsmith/data/llvm/bootstrap/play/delta2/testcase")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "fn1", linkageName: "_Z3fn1v", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @_Z3fn1v, variables: !8)
+!4 = distinct !DISubprogram(name: "fn1", linkageName: "_Z3fn1v", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
!5 = !DIFile(filename: "t.cpp", directory: "/Users/dexonsmith/data/llvm/bootstrap/play/delta2/testcase")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
!8 = !{!9}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", scope: !4, file: !5, line: 2, type: !10)
+!9 = !DILocalVariable(name: "a", scope: !4, file: !5, line: 2, type: !10)
!10 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
!11 = !{i32 2, !"Dwarf Version", i32 4}
!12 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/debug-loc-offset.ll b/test/DebugInfo/X86/debug-loc-offset.ll
index cf2fcc8e2229..d6302a1e4247 100644
--- a/test/DebugInfo/X86/debug-loc-offset.ll
+++ b/test/DebugInfo/X86/debug-loc-offset.ll
@@ -60,7 +60,7 @@
%struct.A = type { i32 (...)**, i32 }
; Function Attrs: nounwind
-define i32 @_Z3bari(i32 %b) #0 {
+define i32 @_Z3bari(i32 %b) #0 !dbg !4 {
entry:
%b.addr = alloca i32, align 4
store i32 %b, i32* %b.addr, align 4
@@ -73,7 +73,7 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-define void @_Z3baz1A(%struct.A* %a) #2 {
+define void @_Z3baz1A(%struct.A* %a) #2 !dbg !14 {
entry:
%z = alloca i32, align 4
call void @llvm.dbg.declare(metadata %struct.A* %a, metadata !24, metadata !DIExpression(DW_OP_deref)), !dbg !25
@@ -116,33 +116,33 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!18, !19}
!llvm.ident = !{!20, !20}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (210479)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (210479)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "debug-loc-offset1.cc", directory: "/llvm_cmake_gcc")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "bar", linkageName: "_Z3bari", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @_Z3bari, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3bari", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "debug-loc-offset1.cc", directory: "/llvm_cmake_gcc")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (210479)", isOptimized: false, emissionKind: 1, file: !10, enums: !2, retainedTypes: !11, subprograms: !13, globals: !2, imports: !2)
+!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (210479)", isOptimized: false, emissionKind: 1, file: !10, enums: !2, retainedTypes: !11, subprograms: !13, globals: !2, imports: !2)
!10 = !DIFile(filename: "debug-loc-offset2.cc", directory: "/llvm_cmake_gcc")
!11 = !{!12}
!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "A", line: 1, flags: DIFlagFwdDecl, file: !10, identifier: "_ZTS1A")
!13 = !{!14}
-!14 = !DISubprogram(name: "baz", linkageName: "_Z3baz1A", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !10, scope: !15, type: !16, function: void (%struct.A*)* @_Z3baz1A, variables: !2)
+!14 = distinct !DISubprogram(name: "baz", linkageName: "_Z3baz1A", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !10, scope: !15, type: !16, variables: !2)
!15 = !DIFile(filename: "debug-loc-offset2.cc", directory: "/llvm_cmake_gcc")
!16 = !DISubroutineType(types: !17)
!17 = !{null, !12}
!18 = !{i32 2, !"Dwarf Version", i32 4}
!19 = !{i32 2, !"Debug Info Version", i32 3}
!20 = !{!"clang version 3.5.0 (210479)"}
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!21 = !DILocalVariable(name: "b", line: 1, arg: 1, scope: !4, file: !5, type: !8)
!22 = !DILocation(line: 1, scope: !4)
!23 = !DILocation(line: 2, scope: !4)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 6, arg: 1, scope: !14, file: !15, type: !"_ZTS1A")
+!24 = !DILocalVariable(name: "a", line: 6, arg: 1, scope: !14, file: !15, type: !"_ZTS1A")
!25 = !DILocation(line: 6, scope: !14)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "z", line: 7, scope: !14, file: !15, type: !8)
+!26 = !DILocalVariable(name: "z", line: 7, scope: !14, file: !15, type: !8)
!27 = !DILocation(line: 7, scope: !14)
!28 = !DILocation(line: 8, scope: !29)
!29 = distinct !DILexicalBlock(line: 8, column: 0, file: !10, scope: !14)
diff --git a/test/DebugInfo/X86/debug-ranges-offset.ll b/test/DebugInfo/X86/debug-ranges-offset.ll
index 19c3bcf37483..520f3c45e18c 100644
--- a/test/DebugInfo/X86/debug-ranges-offset.ll
+++ b/test/DebugInfo/X86/debug-ranges-offset.ll
@@ -19,7 +19,7 @@
@_end = external hidden global i32
; Function Attrs: sanitize_memory uwtable
-define void @_Z1fv() #0 {
+define void @_Z1fv() #0 !dbg !4 {
entry:
%p = alloca i32*, align 8
%0 = ptrtoint i32** %p to i64, !dbg !19
@@ -82,7 +82,7 @@ if.end: ; preds = %16, %if.then
declare i8* @_Znwm(i64) #1
; Function Attrs: sanitize_memory uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !13 {
entry:
%p.i = alloca i32*, align 8
%0 = ptrtoint i32** %p.i to i64, !dbg !30
@@ -202,20 +202,20 @@ attributes #4 = { builtin }
!llvm.module.flags = !{!16, !17}
!llvm.ident = !{!18}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 207243) (llvm/trunk 207259)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 207243) (llvm/trunk 207259)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "foo.cpp", directory: "/usr/local/google/home/echristo/tmp")
!2 = !{}
!3 = !{!4, !13}
-!4 = !DISubprogram(name: "f", linkageName: "_Z1fv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, function: void ()* @_Z1fv, variables: !8)
+!4 = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !8)
!5 = !DIFile(filename: "foo.cpp", directory: "/usr/local/google/home/echristo/tmp")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
!8 = !{!9}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "p", line: 4, scope: !4, file: !5, type: !10)
+!9 = !DILocalVariable(name: "p", line: 4, scope: !4, file: !5, type: !10)
!10 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !11)
!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !12)
!12 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!13 = !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !1, scope: !5, type: !14, function: i32 ()* @main, variables: !2)
+!13 = distinct !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !1, scope: !5, type: !14, variables: !2)
!14 = !DISubroutineType(types: !15)
!15 = !{!12}
!16 = !{i32 2, !"Dwarf Version", i32 4}
@@ -234,7 +234,7 @@ attributes #4 = { builtin }
!29 = !DILocation(line: 7, scope: !4)
!30 = !DILocation(line: 4, scope: !4, inlinedAt: !31)
!31 = !DILocation(line: 10, scope: !13)
-!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "p", line: 4, scope: !4, file: !5, type: !10)
+!32 = !DILocalVariable(name: "p", line: 4, scope: !4, file: !5, type: !10)
!33 = !DILocation(line: 5, scope: !21, inlinedAt: !31)
!34 = !DILocation(line: 6, scope: !21, inlinedAt: !31)
!35 = !DILocation(line: 7, scope: !4, inlinedAt: !31)
diff --git a/test/DebugInfo/X86/debug_frame.ll b/test/DebugInfo/X86/debug_frame.ll
index 48ce554f0119..563406ccaf90 100644
--- a/test/DebugInfo/X86/debug_frame.ll
+++ b/test/DebugInfo/X86/debug_frame.ll
@@ -4,7 +4,7 @@
; CHECK: .cfi_sections .debug_frame
-define void @f() nounwind {
+define void @f() nounwind !dbg !0 {
entry:
ret void
}
@@ -13,9 +13,9 @@ entry:
!llvm.module.flags = !{!7}
!5 = !{!0}
-!0 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !1, type: !3, function: void ()* @f)
+!0 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !1, type: !3)
!1 = !DIFile(filename: "/home/espindola/llvm/test.c", directory: "/home/espindola/llvm/build")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !6, enums: !{}, retainedTypes: !{}, subprograms: !5)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !6, enums: !{}, retainedTypes: !{}, subprograms: !5)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!6 = !DIFile(filename: "/home/espindola/llvm/test.c", directory: "/home/espindola/llvm/build")
diff --git a/test/DebugInfo/X86/debugger-tune.ll b/test/DebugInfo/X86/debugger-tune.ll
new file mode 100644
index 000000000000..b685612d4a47
--- /dev/null
+++ b/test/DebugInfo/X86/debugger-tune.ll
@@ -0,0 +1,44 @@
+; Verify target-based defaults for "debugger tuning," and the ability to
+; override defaults.
+; We use existence of the debug_pubnames section to distinguish the GDB case,
+; and the apple_names section to distinguish the LLDB case. SCE has neither.
+
+; Verify defaults for various targets.
+; RUN: llc -mtriple=x86_64-scei-ps4 -filetype=obj < %s | llvm-readobj -sections - | FileCheck --check-prefix=SCE %s
+; RUN: llc -mtriple=x86_64-apple-darwin12 -filetype=obj < %s | llvm-readobj -sections - | FileCheck --check-prefix=LLDB %s
+; RUN: llc -mtriple=x86_64-pc-freebsd -filetype=obj < %s | llvm-readobj -sections - | FileCheck --check-prefix=LLDB %s
+; RUN: llc -mtriple=x86_64-pc-linux -filetype=obj < %s | llvm-readobj -sections - | FileCheck --check-prefix=GDB %s
+
+; We can override defaults.
+; RUN: llc -mtriple=x86_64-scei-ps4 -filetype=obj -debugger-tune=gdb < %s | llvm-readobj -sections - | FileCheck --check-prefix=GDB %s
+; RUN: llc -mtriple=x86_64-pc-linux -filetype=obj -debugger-tune=lldb < %s | llvm-readobj -sections - | FileCheck --check-prefix=LLDB %s
+; RUN: llc -mtriple=x86_64-apple-darwin12 -filetype=obj -debugger-tune=sce < %s | llvm-readobj -sections - | FileCheck --check-prefix=SCE %s
+
+
+; GDB-NOT: apple_names
+; GDB: debug_pubnames
+; GDB-NOT: apple_names
+
+; LLDB-NOT: debug_pubnames
+; LLDB: apple_names
+; LLDB-NOT: debug_pubnames
+
+; SCE-NOT: debug_pubnames
+; SCE-NOT: apple_names
+
+
+@globalvar = global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 238808)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!1 = !DIFile(filename: "debugger-tune.cpp", directory: "/home/probinson/projects/scratch")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIGlobalVariable(name: "globalvar", scope: !0, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, variable: i32* @globalvar)
+!5 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{!"clang version 3.7.0 (trunk 238808)"}
diff --git a/test/DebugInfo/X86/decl-derived-member.ll b/test/DebugInfo/X86/decl-derived-member.ll
index 3b833580e9ca..5751f243521e 100644
--- a/test/DebugInfo/X86/decl-derived-member.ll
+++ b/test/DebugInfo/X86/decl-derived-member.ll
@@ -33,7 +33,7 @@ $_ZN4baseC2Ev = comdat any
@_ZTV4base = external unnamed_addr constant [4 x i8*]
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_decl_derived_member.cpp, i8* null }]
-define internal void @__cxx_global_var_init() section ".text.startup" {
+define internal void @__cxx_global_var_init() section ".text.startup" !dbg !10 {
entry:
call void @_ZN3fooC2Ev(%struct.foo* @f) #2, !dbg !33
%0 = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.foo*)* @_ZN3fooD2Ev to void (i8*)*), i8* bitcast (%struct.foo* @f to i8*), i8* @__dso_handle) #2, !dbg !33
@@ -41,7 +41,7 @@ entry:
}
; Function Attrs: inlinehint nounwind uwtable
-define linkonce_odr void @_ZN3fooC2Ev(%struct.foo* %this) unnamed_addr #0 comdat align 2 {
+define linkonce_odr void @_ZN3fooC2Ev(%struct.foo* %this) unnamed_addr #0 comdat align 2 !dbg !14 {
entry:
%this.addr = alloca %struct.foo*, align 8
store %struct.foo* %this, %struct.foo** %this.addr, align 8
@@ -53,7 +53,7 @@ entry:
}
; Function Attrs: inlinehint uwtable
-define linkonce_odr void @_ZN3fooD2Ev(%struct.foo* %this) unnamed_addr #1 comdat align 2 {
+define linkonce_odr void @_ZN3fooD2Ev(%struct.foo* %this) unnamed_addr #1 comdat align 2 !dbg !24 {
entry:
%this.addr = alloca %struct.foo*, align 8
store %struct.foo* %this, %struct.foo** %this.addr, align 8
@@ -71,7 +71,7 @@ declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #2
declare void @llvm.dbg.declare(metadata, metadata, metadata) #3
; Function Attrs: inlinehint nounwind uwtable
-define linkonce_odr void @_ZN4baseC2Ev(%struct.base* %this) unnamed_addr #0 comdat align 2 {
+define linkonce_odr void @_ZN4baseC2Ev(%struct.base* %this) unnamed_addr #0 comdat align 2 !dbg !19 {
entry:
%this.addr = alloca %struct.base*, align 8
store %struct.base* %this, %struct.base** %this.addr, align 8
@@ -100,7 +100,7 @@ attributes #4 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!30, !31}
!llvm.ident = !{!32}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227104) (llvm/trunk 227103)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !28, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227104) (llvm/trunk 227103)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !28, imports: !2)
!1 = !DIFile(filename: "decl-derived-member.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4, !8}
@@ -110,23 +110,23 @@ attributes #4 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!7 = !DIDerivedType(tag: DW_TAG_typedef, name: "base_type", line: 4, file: !1, baseType: !"_ZTS4base")
!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "base", line: 1, flags: DIFlagFwdDecl, file: !1, identifier: "_ZTS4base")
!9 = !{!10, !14, !19, !24, !26}
-!10 = !DISubprogram(name: "__cxx_global_var_init", line: 8, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !11, type: !12, function: void ()* @__cxx_global_var_init, variables: !2)
+!10 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 8, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !11, type: !12, variables: !2)
!11 = !DIFile(filename: "decl-derived-member.cpp", directory: "/tmp/dbginfo")
!12 = !DISubroutineType(types: !13)
!13 = !{null}
-!14 = !DISubprogram(name: "foo", linkageName: "_ZN3fooC2Ev", line: 5, isLocal: false, isDefinition: true, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !"_ZTS3foo", type: !15, function: void (%struct.foo*)* @_ZN3fooC2Ev, declaration: !18, variables: !2)
+!14 = distinct !DISubprogram(name: "foo", linkageName: "_ZN3fooC2Ev", line: 5, isLocal: false, isDefinition: true, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !"_ZTS3foo", type: !15, declaration: !18, variables: !2)
!15 = !DISubroutineType(types: !16)
!16 = !{null, !17}
!17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS3foo")
!18 = !DISubprogram(name: "foo", isLocal: false, isDefinition: false, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scope: !"_ZTS3foo", type: !15)
-!19 = !DISubprogram(name: "base", linkageName: "_ZN4baseC2Ev", line: 1, isLocal: false, isDefinition: true, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !"_ZTS4base", type: !20, function: void (%struct.base*)* @_ZN4baseC2Ev, declaration: !23, variables: !2)
+!19 = distinct !DISubprogram(name: "base", linkageName: "_ZN4baseC2Ev", line: 1, isLocal: false, isDefinition: true, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !"_ZTS4base", type: !20, declaration: !23, variables: !2)
!20 = !DISubroutineType(types: !21)
!21 = !{null, !22}
!22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS4base")
!23 = !DISubprogram(name: "base", isLocal: false, isDefinition: false, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scope: !"_ZTS4base", type: !20)
-!24 = !DISubprogram(name: "~foo", linkageName: "_ZN3fooD2Ev", line: 5, isLocal: false, isDefinition: true, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !"_ZTS3foo", type: !15, function: void (%struct.foo*)* @_ZN3fooD2Ev, declaration: !25, variables: !2)
+!24 = distinct !DISubprogram(name: "~foo", linkageName: "_ZN3fooD2Ev", line: 5, isLocal: false, isDefinition: true, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !"_ZTS3foo", type: !15, declaration: !25, variables: !2)
!25 = !DISubprogram(name: "~foo", isLocal: false, isDefinition: false, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scope: !"_ZTS3foo", type: !15)
-!26 = !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_decl_derived_member.cpp", isLocal: true, isDefinition: true, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !11, type: !27, function: void ()* @_GLOBAL__sub_I_decl_derived_member.cpp, variables: !2)
+!26 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_decl_derived_member.cpp", isLocal: true, isDefinition: true, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !11, type: !27, variables: !2)
!27 = !DISubroutineType(types: !2)
!28 = !{!29}
!29 = !DIGlobalVariable(name: "f", line: 8, isLocal: false, isDefinition: true, scope: null, file: !11, type: !"_ZTS3foo", variable: %struct.foo* @f)
@@ -134,17 +134,17 @@ attributes #4 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!31 = !{i32 2, !"Debug Info Version", i32 3}
!32 = !{!"clang version 3.7.0 (trunk 227104) (llvm/trunk 227103)"}
!33 = !DILocation(line: 8, column: 5, scope: !10)
-!34 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !14, type: !35)
+!34 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !14, type: !35)
!35 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS3foo")
!36 = !DIExpression()
!37 = !DILocation(line: 0, scope: !14)
!38 = !DILocation(line: 5, column: 8, scope: !14)
-!39 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !24, type: !35)
+!39 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !24, type: !35)
!40 = !DILocation(line: 0, scope: !24)
!41 = !DILocation(line: 5, column: 8, scope: !42)
!42 = distinct !DILexicalBlock(line: 5, column: 8, file: !1, scope: !24)
!43 = !DILocation(line: 5, column: 8, scope: !24)
-!44 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !19, type: !45)
+!44 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !19, type: !45)
!45 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS4base")
!46 = !DILocation(line: 0, scope: !19)
!47 = !DILocation(line: 1, column: 8, scope: !19)
diff --git a/test/DebugInfo/X86/deleted-bit-piece.ll b/test/DebugInfo/X86/deleted-bit-piece.ll
index 63f3be5ea41e..b069cf8e399c 100644
--- a/test/DebugInfo/X86/deleted-bit-piece.ll
+++ b/test/DebugInfo/X86/deleted-bit-piece.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
; CHECK: __Z3foov:
; CHECK: retq
-define void @_Z3foov() {
+define void @_Z3foov() !dbg !12 {
entry:
br i1 undef, label %exit, label %bb
@@ -28,7 +28,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
!0 = !{i32 2, !"Dwarf Version", i32 2}
!1 = !{i32 2, !"Debug Info Version", i32 3}
-!2 = !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !4, retainedTypes: !5, subprograms: !11, globals: !4, imports: !4)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !4, retainedTypes: !5, subprograms: !11, globals: !4, imports: !4)
!3 = !DIFile(filename: "foo.cpp", directory: "/path/to/dir")
!4 = !{}
!5 = !{!6}
@@ -38,9 +38,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!10 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !"_ZT5Class", baseType: !9, size: 32, align: 32)
!11 = !{!12}
-!12 = !DISubprogram(name: "foo", scope: null, file: !3, type: !13, isLocal: false, isDefinition: true, isOptimized: false, function: void ()* @_Z3foov)
+!12 = distinct !DISubprogram(name: "foo", scope: null, file: !3, type: !13, isLocal: false, isDefinition: true, isOptimized: false)
!13 = !DISubroutineType(types: !14)
!14 = !{null}
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "v", scope: !12, type: !"_ZT5Class")
+!15 = !DILocalVariable(name: "v", scope: !12, type: !"_ZT5Class")
!16 = !DIExpression(DW_OP_bit_piece, 32, 32)
!17 = !DILocation(line: 2755, column: 9, scope: !12)
diff --git a/test/DebugInfo/X86/discriminator.ll b/test/DebugInfo/X86/discriminator.ll
index da5acc753c48..e9d8fa58c60d 100644
--- a/test/DebugInfo/X86/discriminator.ll
+++ b/test/DebugInfo/X86/discriminator.ll
@@ -11,7 +11,7 @@
; Manually generated debug nodes !14 and !15 to incorporate an
; arbitrary discriminator with value 42.
-define i32 @foo(i32 %i) #0 {
+define i32 @foo(i32 %i) #0 !dbg !4 {
entry:
%retval = alloca i32, align 4
%i.addr = alloca i32, align 4
@@ -41,11 +41,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "discriminator.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "discriminator.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/X86/dw_op_minus.ll b/test/DebugInfo/X86/dw_op_minus.ll
new file mode 100644
index 000000000000..1c486e06c513
--- /dev/null
+++ b/test/DebugInfo/X86/dw_op_minus.ll
@@ -0,0 +1,84 @@
+; Test dwarf codegen of DW_OP_minus.
+; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+; This was built by compiling the following source with SafeStack and
+; simplifying the result a little.
+; extern "C" {
+; void Capture(int *);
+; void f() {
+; int buf[100];
+; Capture(buf);
+; }
+; }
+; The interesting part is !DIExpression(DW_OP_deref, DW_OP_minus, 400)
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@__safestack_unsafe_stack_ptr = external thread_local(initialexec) global i8*
+
+define void @f() !dbg !4 {
+entry:
+ %unsafe_stack_ptr = load i8*, i8** @__safestack_unsafe_stack_ptr
+ %unsafe_stack_static_top = getelementptr i8, i8* %unsafe_stack_ptr, i32 -400
+ store i8* %unsafe_stack_static_top, i8** @__safestack_unsafe_stack_ptr
+ %0 = getelementptr i8, i8* %unsafe_stack_ptr, i32 -400
+ %buf = bitcast i8* %0 to [100 x i32]*
+ %1 = bitcast [100 x i32]* %buf to i8*, !dbg !16
+ call void @llvm.dbg.declare(metadata i8* %unsafe_stack_ptr, metadata !8, metadata !17), !dbg !18
+ %arraydecay = getelementptr inbounds [100 x i32], [100 x i32]* %buf, i64 0, i64 0, !dbg !19
+ call void @Capture(i32* %arraydecay), !dbg !20
+ store i8* %unsafe_stack_ptr, i8** @__safestack_unsafe_stack_ptr, !dbg !21
+ ret void, !dbg !21
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+declare void @Capture(i32*)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "1.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, variables: !7)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{!8}
+!8 = !DILocalVariable(name: "buf", scope: !4, file: !1, line: 5, type: !9)
+!9 = !DICompositeType(tag: DW_TAG_array_type, baseType: !10, size: 3200, align: 32, elements: !11)
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !{!12}
+!12 = !DISubrange(count: 100)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)"}
+!16 = !DILocation(line: 5, column: 3, scope: !4)
+!17 = !DIExpression(DW_OP_deref, DW_OP_minus, 400)
+!18 = !DILocation(line: 5, column: 7, scope: !4)
+!19 = !DILocation(line: 6, column: 11, scope: !4)
+!20 = !DILocation(line: 6, column: 3, scope: !4)
+!21 = !DILocation(line: 7, column: 1, scope: !4)
+
+; RCX - 400
+; CHECK: .short 6 # Loc expr size
+; CHECK-NEXT: .byte 114 # DW_OP_breg2
+; CHECK-NEXT: .byte 0 # 0
+; CHECK-NEXT: .byte 16 # DW_OP_constu
+; CHECK-NEXT: .byte 144 # 400
+; CHECK-NEXT: .byte 3 # DW_OP_minus
+; CHECK-NEXT: .byte 28
+
+; RCX is clobbered in call @Capture, but there is a spilled copy.
+; *(RSP + 8) - 400
+; CHECK: .short 7 # Loc expr size
+; CHECK-NEXT: .byte 119 # DW_OP_breg7
+; CHECK-NEXT: .byte 8 # 8
+; CHECK-NEXT: .byte 6 # DW_OP_deref
+; CHECK-NEXT: .byte 16 # DW_OP_constu
+; CHECK-NEXT: .byte 144 # 400
+; CHECK-NEXT: .byte 3 # DW_OP_minus
+; CHECK-NEXT: .byte 28
diff --git a/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll b/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
index aae155414bd8..be0d89433e7c 100644
--- a/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
+++ b/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
@@ -26,21 +26,21 @@ target triple = "x86_64-unknown-linux-gnu"
@global = global i32 2, align 4
; Function Attrs: nounwind readnone uwtable
-define i32 @_Z3fooi(i32 %bar) #0 {
+define i32 @_Z3fooi(i32 %bar) #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata i32 %bar, i64 0, metadata !10, metadata !DIExpression()), !dbg !20
ret i32 %bar, !dbg !20
}
; Function Attrs: nounwind readnone uwtable
-define i32 @_Z4foo2i(i32 %bar2) #0 {
+define i32 @_Z4foo2i(i32 %bar2) #0 !dbg !11 {
entry:
tail call void @llvm.dbg.value(metadata i32 %bar2, i64 0, metadata !13, metadata !DIExpression()), !dbg !21
ret i32 %bar2, !dbg !21
}
; Function Attrs: nounwind readonly uwtable
-define i32 @main() #1 {
+define i32 @main() #1 !dbg !14 {
entry:
%call = tail call i32 @_Z3fooi(i32 2), !dbg !22
%call1 = tail call i32 @_Z4foo2i(i32 1), !dbg !22
@@ -60,21 +60,21 @@ attributes #2 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!19, !26}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (191881)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !17, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (191881)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !17, imports: !2)
!1 = !DIFile(filename: "tmp/debug_ranges/a.cc", directory: "/")
!2 = !{}
!3 = !{!4, !11, !14}
-!4 = !DISubprogram(name: "foo", linkageName: "_Z3fooi", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 (i32)* @_Z3fooi, variables: !9)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !9)
!5 = !DIFile(filename: "tmp/debug_ranges/a.cc", directory: "/")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "bar", line: 2, arg: 1, scope: !4, file: !5, type: !8)
-!11 = !DISubprogram(name: "foo2", linkageName: "_Z4foo2i", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 (i32)* @_Z4foo2i, variables: !12)
+!10 = !DILocalVariable(name: "bar", line: 2, arg: 1, scope: !4, file: !5, type: !8)
+!11 = distinct !DISubprogram(name: "foo2", linkageName: "_Z4foo2i", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !12)
!12 = !{!13}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "bar2", line: 3, arg: 1, scope: !11, file: !5, type: !8)
-!14 = !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !15, function: i32 ()* @main, variables: !2)
+!13 = !DILocalVariable(name: "bar2", line: 3, arg: 1, scope: !11, file: !5, type: !8)
+!14 = distinct !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !15, variables: !2)
!15 = !DISubroutineType(types: !16)
!16 = !{!8}
!17 = !{!18}
diff --git a/test/DebugInfo/X86/dwarf-aranges.ll b/test/DebugInfo/X86/dwarf-aranges.ll
index 9924697247b3..cb2e87881286 100644
--- a/test/DebugInfo/X86/dwarf-aranges.ll
+++ b/test/DebugInfo/X86/dwarf-aranges.ll
@@ -48,7 +48,7 @@ target triple = "x86_64-unknown-linux-gnu"
@some_other = global i32 5, section "strange+section", align 4
@some_bss = common global i32 0, align 4
-define void @some_code() {
+define void @some_code() !dbg !4 {
entry:
%0 = load i32, i32* @some_data, align 4, !dbg !14
%1 = load i32, i32* @some_other, align 4, !dbg !14
@@ -62,11 +62,11 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!13, !16}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !8, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !8, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "/home/kayamon")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "some_code", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, function: void ()* @some_code, variables: !2)
+!4 = distinct !DISubprogram(name: "some_code", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "test.c", directory: "/home/kayamon")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
diff --git a/test/DebugInfo/X86/dwarf-linkage-names.ll b/test/DebugInfo/X86/dwarf-linkage-names.ll
new file mode 100644
index 000000000000..65cf1914dd40
--- /dev/null
+++ b/test/DebugInfo/X86/dwarf-linkage-names.ll
@@ -0,0 +1,71 @@
+; DWARF linkage name attributes are optional; verify they are missing for
+; PS4 triple or when tuning for SCE.
+
+; RUN: llc -O0 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s -check-prefix LINKAGE1
+; RUN: llc -O0 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s -check-prefix LINKAGE2
+; RUN: llc -O0 -mtriple=x86_64-scei-ps4 < %s | FileCheck %s -check-prefix NOLINKAGE
+; RUN: llc -O0 -mtriple=x86_64-unknown-unknown -debugger-tune=sce < %s | FileCheck %s -check-prefix NOLINKAGE
+
+; $ clang++ -emit-llvm -S -g dwarf-linkage-names.cpp
+; namespace test {
+; int global_var;
+; int bar() { return global_var; }
+;};
+
+; With linkage names, we get an attribute for the declaration (first) entry
+; for the global variable, and one for the function.
+
+; This assumes the variable will appear before the function.
+; LINKAGE1: .section .debug_info
+; LINKAGE1: DW_TAG_variable
+; LINKAGE1-NOT: DW_TAG
+; LINKAGE1: {{DW_AT_(MIPS_)?linkage_name}}
+; LINKAGE1: DW_TAG_subprogram
+; LINKAGE1-NOT: DW_TAG
+; LINKAGE1: {{DW_AT_(MIPS_)?linkage_name}}
+; LINKAGE1: .section
+
+; Also verify we see the mangled names. We do this as a separate pass to
+; avoid depending on the order of .debug_info and .debug_str sections.
+
+; LINKAGE2-DAG: .asciz "_ZN4test10global_varE"
+; LINKAGE2-DAG: .asciz "_ZN4test3barEv"
+
+; Without linkage names, verify there aren't any linkage-name attributes,
+; and no mangled names.
+
+; NOLINKAGE-NOT: {{DW_AT_(MIPS_)?linkage_name}}
+; NOLINKAGE-NOT: .asciz "_ZN4test10global_varE"
+; NOLINKAGE-NOT: .asciz "_ZN4test3barEv"
+
+@_ZN4test10global_varE = global i32 0, align 4
+
+; Function Attrs: nounwind uwtable
+define i32 @_ZN4test3barEv() #0 !dbg !4 {
+entry:
+ %0 = load i32, i32* @_ZN4test10global_varE, align 4, !dbg !14
+ ret i32 %0, !dbg !15
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12}
+!llvm.ident = !{!13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 244662)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !9)
+!1 = !DIFile(filename: "dwarf-linkage-names.cpp", directory: "/home/probinson/projects/scratch")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "bar", linkageName: "_ZN4test3barEv", scope: !5, file: !1, line: 3, type: !6, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DINamespace(name: "test", scope: null, file: !1, line: 1)
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{!10}
+!10 = !DIGlobalVariable(name: "global_var", linkageName: "_ZN4test10global_varE", scope: !5, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, variable: i32* @_ZN4test10global_varE)
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !{!"clang version 3.8.0 (trunk 244662)"}
+!14 = !DILocation(line: 3, column: 21, scope: !4)
+!15 = !DILocation(line: 3, column: 14, scope: !4)
diff --git a/test/DebugInfo/X86/dwarf-public-names.ll b/test/DebugInfo/X86/dwarf-public-names.ll
index c72da38832ee..d850899ce85a 100644
--- a/test/DebugInfo/X86/dwarf-public-names.ll
+++ b/test/DebugInfo/X86/dwarf-public-names.ll
@@ -43,7 +43,7 @@
; Skip the output to the header of the pubnames section.
; LINUX: debug_pubnames
-; LINUX-NEXT: unit_size = 0x00000128
+; LINUX-NEXT: unit_size = 0x0000012a
; Check for each name in the output.
; LINUX-DAG: "ns"
@@ -61,7 +61,7 @@
@global_variable = global %struct.C zeroinitializer, align 1
@_ZN2ns25global_namespace_variableE = global i32 1, align 4
-define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2 {
+define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2 !dbg !3 {
entry:
%this.addr = alloca %struct.C*, align 8
store %struct.C* %this, %struct.C** %this.addr, align 8
@@ -73,18 +73,18 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 {
+define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 !dbg !18 {
entry:
%0 = load i32, i32* @_ZN1C22static_member_variableE, align 4, !dbg !33
ret i32 %0, !dbg !33
}
-define i32 @_Z15global_functionv() nounwind uwtable {
+define i32 @_Z15global_functionv() nounwind uwtable !dbg !19 {
entry:
ret i32 -1, !dbg !34
}
-define void @_ZN2ns25global_namespace_functionEv() nounwind uwtable {
+define void @_ZN2ns25global_namespace_functionEv() nounwind uwtable !dbg !20 {
entry:
call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !35
ret void, !dbg !36
@@ -96,10 +96,10 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!38}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", isOptimized: false, emissionKind: 0, file: !37, enums: !1, retainedTypes: !1, subprograms: !2, globals: !24, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", isOptimized: false, emissionKind: 0, file: !37, enums: !1, retainedTypes: !1, subprograms: !2, globals: !24, imports: !1)
!1 = !{}
!2 = !{!3, !18, !19, !20}
-!3 = !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !4, scope: null, type: !5, function: void (%struct.C*)* @_ZN1C15member_functionEv, declaration: !12, variables: !1)
+!3 = distinct !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !4, scope: null, type: !5, declaration: !12, variables: !1)
!4 = !DIFile(filename: "dwarf-public-names.cpp", directory: "/usr2/kparzysz/s.hex/t")
!5 = !DISubroutineType(types: !6)
!6 = !{null, !7}
@@ -114,9 +114,9 @@ attributes #1 = { nounwind readnone }
!15 = !DISubroutineType(types: !16)
!16 = !{!11}
!17 = !{} ; previously: invalid DW_TAG_base_type
-!18 = !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !4, scope: null, type: !15, function: i32 ()* @_ZN1C22static_member_functionEv, declaration: !14, variables: !1)
-!19 = !DISubprogram(name: "global_function", linkageName: "_Z15global_functionv", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !4, scope: !4, type: !15, function: i32 ()* @_Z15global_functionv, variables: !1)
-!20 = !DISubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !4, scope: !21, type: !22, function: void ()* @_ZN2ns25global_namespace_functionEv, variables: !1)
+!18 = distinct !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !4, scope: null, type: !15, declaration: !14, variables: !1)
+!19 = distinct !DISubprogram(name: "global_function", linkageName: "_Z15global_functionv", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !4, scope: !4, type: !15, variables: !1)
+!20 = distinct !DISubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !4, scope: !21, type: !22, variables: !1)
!21 = !DINamespace(name: "ns", line: 23, file: !4, scope: null)
!22 = !DISubroutineType(types: !23)
!23 = !{null}
@@ -124,7 +124,7 @@ attributes #1 = { nounwind readnone }
!25 = !DIGlobalVariable(name: "static_member_variable", linkageName: "_ZN1C22static_member_variableE", line: 7, isLocal: false, isDefinition: true, scope: !8, file: !4, type: !11, variable: i32* @_ZN1C22static_member_variableE, declaration: !10)
!26 = !DIGlobalVariable(name: "global_variable", line: 17, isLocal: false, isDefinition: true, scope: null, file: !4, type: !8, variable: %struct.C* @global_variable)
!27 = !DIGlobalVariable(name: "global_namespace_variable", linkageName: "_ZN2ns25global_namespace_variableE", line: 27, isLocal: false, isDefinition: true, scope: !21, file: !4, type: !11, variable: i32* @_ZN2ns25global_namespace_variableE)
-!28 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 9, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !3, file: !4, type: !29)
+!28 = !DILocalVariable(name: "this", line: 9, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !3, file: !4, type: !29)
!29 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !8)
!30 = !DILocation(line: 9, scope: !3)
!31 = !DILocation(line: 10, scope: !3)
diff --git a/test/DebugInfo/X86/dwarf-pubnames-split.ll b/test/DebugInfo/X86/dwarf-pubnames-split.ll
index f17f7848f046..a8e4cc6e433a 100644
--- a/test/DebugInfo/X86/dwarf-pubnames-split.ll
+++ b/test/DebugInfo/X86/dwarf-pubnames-split.ll
@@ -12,7 +12,7 @@
; CHECK-NEXT: .long .Lcu_begin0 # Offset of Compilation Unit Info
; Function Attrs: nounwind uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
@@ -24,11 +24,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 189287) (llvm/trunk 189296)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 189287) (llvm/trunk 189296)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo/tmp")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
diff --git a/test/DebugInfo/X86/earlydup-crash.ll b/test/DebugInfo/X86/earlydup-crash.ll
index 738b7194488f..73626597d23b 100644
--- a/test/DebugInfo/X86/earlydup-crash.ll
+++ b/test/DebugInfo/X86/earlydup-crash.ll
@@ -6,7 +6,7 @@
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-define internal i8* @framework_construct_pathname(i8* %fname, %struct.cpp_dir* %dir) nounwind ssp {
+define internal i8* @framework_construct_pathname(i8* %fname, %struct.cpp_dir* %dir) nounwind ssp !dbg !2 {
entry:
br i1 undef, label %bb33, label %bb
@@ -44,11 +44,11 @@ declare void @foobar(i32)
!llvm.dbg.cu = !{!4}
!llvm.module.flags = !{!47}
-!0 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "frname_len", line: 517, scope: !1, file: !3, type: !38)
+!0 = !DILocalVariable(name: "frname_len", line: 517, scope: !1, file: !3, type: !38)
!1 = distinct !DILexicalBlock(line: 515, column: 0, file: !44, scope: !2)
-!2 = !DISubprogram(name: "framework_construct_pathname", line: 515, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !44, scope: null, type: !5, function: i8* (i8*, %struct.cpp_dir*)* @framework_construct_pathname)
+!2 = distinct !DISubprogram(name: "framework_construct_pathname", line: 515, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !44, scope: null, type: !5)
!3 = !DIFile(filename: "darwin-c.c", directory: "/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/config")
-!4 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !44, enums: !46, retainedTypes: !46, subprograms: !45)
+!4 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !44, enums: !46, retainedTypes: !46, subprograms: !45)
!5 = !DISubroutineType(types: !6)
!6 = !{!7, !9, !11}
!7 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !44, scope: !3, baseType: !8)
diff --git a/test/DebugInfo/X86/elf-names.ll b/test/DebugInfo/X86/elf-names.ll
index e4565c27cf72..2c8571927627 100644
--- a/test/DebugInfo/X86/elf-names.ll
+++ b/test/DebugInfo/X86/elf-names.ll
@@ -17,10 +17,10 @@
%class.D = type { i32, i32, i32, i32 }
-@_ZN1DC1Ev = alias void (%class.D*)* @_ZN1DC2Ev
-@_ZN1DC1ERKS_ = alias void (%class.D*, %class.D*)* @_ZN1DC2ERKS_
+@_ZN1DC1Ev = alias void (%class.D*), void (%class.D*)* @_ZN1DC2Ev
+@_ZN1DC1ERKS_ = alias void (%class.D*, %class.D*), void (%class.D*, %class.D*)* @_ZN1DC2ERKS_
-define void @_ZN1DC2Ev(%class.D* nocapture %this) unnamed_addr nounwind uwtable align 2 {
+define void @_ZN1DC2Ev(%class.D* nocapture %this) unnamed_addr nounwind uwtable align 2 !dbg !5 {
entry:
tail call void @llvm.dbg.value(metadata %class.D* %this, i64 0, metadata !29, metadata !DIExpression()), !dbg !36
%c1 = getelementptr inbounds %class.D, %class.D* %this, i64 0, i32 0, !dbg !37
@@ -34,7 +34,7 @@ entry:
ret void, !dbg !45
}
-define void @_ZN1DC2ERKS_(%class.D* nocapture %this, %class.D* nocapture %d) unnamed_addr nounwind uwtable align 2 {
+define void @_ZN1DC2ERKS_(%class.D* nocapture %this, %class.D* nocapture %d) unnamed_addr nounwind uwtable align 2 !dbg !31 {
entry:
tail call void @llvm.dbg.value(metadata %class.D* %this, i64 0, metadata !34, metadata !DIExpression()), !dbg !46
tail call void @llvm.dbg.value(metadata %class.D* %d, i64 0, metadata !35, metadata !DIExpression()), !dbg !46
@@ -62,10 +62,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!54}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 167506) (llvm/trunk 167505)", isOptimized: true, emissionKind: 0, file: !53, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 167506) (llvm/trunk 167505)", isOptimized: true, emissionKind: 0, file: !53, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5, !31}
-!5 = !DISubprogram(name: "D", linkageName: "_ZN1DC2Ev", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !6, scope: null, type: !7, function: void (%class.D*)* @_ZN1DC2Ev, declaration: !17, variables: !27)
+!5 = distinct !DISubprogram(name: "D", linkageName: "_ZN1DC2Ev", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !6, scope: null, type: !7, declaration: !17, variables: !27)
!6 = !DIFile(filename: "foo.cpp", directory: "/usr/local/google/home/echristo")
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9}
@@ -84,12 +84,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!23 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !24)
!24 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !10)
!27 = !{!29}
-!29 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 12, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !5, file: !6, type: !30)
+!29 = !DILocalVariable(name: "this", line: 12, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !5, file: !6, type: !30)
!30 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
-!31 = !DISubprogram(name: "D", linkageName: "_ZN1DC2ERKS_", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 19, file: !6, scope: null, type: !21, function: void (%class.D*, %class.D*)* @_ZN1DC2ERKS_, declaration: !20, variables: !32)
+!31 = distinct !DISubprogram(name: "D", linkageName: "_ZN1DC2ERKS_", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 19, file: !6, scope: null, type: !21, declaration: !20, variables: !32)
!32 = !{!34, !35}
-!34 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 19, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, file: !6, type: !30)
-!35 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "d", line: 19, arg: 2, scope: !31, file: !6, type: !23)
+!34 = !DILocalVariable(name: "this", line: 19, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, file: !6, type: !30)
+!35 = !DILocalVariable(name: "d", line: 19, arg: 2, scope: !31, file: !6, type: !23)
!36 = !DILocation(line: 12, scope: !5)
!37 = !DILocation(line: 13, scope: !38)
!38 = distinct !DILexicalBlock(line: 12, column: 0, file: !6, scope: !5)
diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll
index 1250b9c64412..550a231f7699 100644
--- a/test/DebugInfo/X86/empty-and-one-elem-array.ll
+++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll
@@ -5,7 +5,7 @@
%struct.foo = type { i32, [1 x i32] }
%struct.bar = type { i32, [0 x i32] }
-define i32 @func() nounwind uwtable ssp {
+define i32 @func() nounwind uwtable ssp !dbg !5 {
entry:
%my_foo = alloca %struct.foo, align 4
%my_bar = alloca %struct.bar, align 4
@@ -63,15 +63,15 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!33}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 169136)", isOptimized: false, emissionKind: 0, file: !32, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 169136)", isOptimized: false, emissionKind: 0, file: !32, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "func", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !6, scope: !6, type: !7, function: i32 ()* @func, variables: !1)
+!5 = distinct !DISubprogram(name: "func", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !6, scope: !6, type: !7, variables: !1)
!6 = !DIFile(filename: "test.c", directory: "/Volumes/Sandbox/llvm")
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "my_foo", line: 12, scope: !11, file: !6, type: !12)
+!10 = !DILocalVariable(name: "my_foo", line: 12, scope: !11, file: !6, type: !12)
!11 = distinct !DILexicalBlock(line: 11, column: 0, file: !6, scope: !5)
!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", line: 1, size: 64, align: 32, file: !32, elements: !13)
!13 = !{!14, !15}
@@ -81,7 +81,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!17 = !{!18}
!18 = !DISubrange(count: 1)
!19 = !DILocation(line: 12, scope: !11)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "my_bar", line: 13, scope: !11, file: !6, type: !21)
+!20 = !DILocalVariable(name: "my_bar", line: 13, scope: !11, file: !6, type: !21)
!21 = !DICompositeType(tag: DW_TAG_structure_type, name: "bar", line: 6, size: 32, align: 32, file: !32, elements: !22)
!22 = !{!23, !24}
!23 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 7, size: 32, align: 32, file: !32, scope: !21, baseType: !9)
diff --git a/test/DebugInfo/X86/empty-array.ll b/test/DebugInfo/X86/empty-array.ll
index ce460a783c05..bad080cb7254 100644
--- a/test/DebugInfo/X86/empty-array.ll
+++ b/test/DebugInfo/X86/empty-array.ll
@@ -27,7 +27,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!21}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169136)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169136)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
!1 = !{}
!3 = !{!5}
!5 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: %class.A* @a)
diff --git a/test/DebugInfo/X86/empty.ll b/test/DebugInfo/X86/empty.ll
index 1f48a2f21246..695e9ca6ed3f 100644
--- a/test/DebugInfo/X86/empty.ll
+++ b/test/DebugInfo/X86/empty.ll
@@ -19,7 +19,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!5}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143523)", isOptimized: true, emissionKind: 0, file: !4, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143523)", isOptimized: true, emissionKind: 0, file: !4, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2)
!2 = !{}
!3 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
!4 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
diff --git a/test/DebugInfo/X86/ending-run.ll b/test/DebugInfo/X86/ending-run.ll
index 9e3584615df2..efb85aad73b6 100644
--- a/test/DebugInfo/X86/ending-run.ll
+++ b/test/DebugInfo/X86/ending-run.ll
@@ -8,7 +8,7 @@
; CHECK: 0x0000000000000000 7 0 1 0
; CHECK: 0x0000000000000004 8 18 1 0 0 is_stmt prologue_end
-define i32 @callee(i32 %x) nounwind uwtable ssp {
+define i32 @callee(i32 %x) nounwind uwtable ssp !dbg !5 {
entry:
%x.addr = alloca i32, align 4
%y = alloca i32, align 4
@@ -29,17 +29,17 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!20}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 153921) (llvm/trunk 153916)", isOptimized: false, emissionKind: 0, file: !19, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 153921) (llvm/trunk 153916)", isOptimized: false, emissionKind: 0, file: !19, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "callee", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !19, scope: !6, type: !7, function: i32 (i32)* @callee)
+!5 = distinct !DISubprogram(name: "callee", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !19, scope: !6, type: !7)
!6 = !DIFile(filename: "ending-run.c", directory: "/Users/echristo/tmp")
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 5, arg: 1, scope: !5, file: !6, type: !9)
+!12 = !DILocalVariable(name: "x", line: 5, arg: 1, scope: !5, file: !6, type: !9)
!13 = !DILocation(line: 5, column: 5, scope: !5)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 8, scope: !15, file: !6, type: !9)
+!14 = !DILocalVariable(name: "y", line: 8, scope: !15, file: !6, type: !9)
!15 = distinct !DILexicalBlock(line: 7, column: 1, file: !19, scope: !5)
!16 = !DILocation(line: 8, column: 9, scope: !15)
!17 = !DILocation(line: 8, column: 18, scope: !15)
diff --git a/test/DebugInfo/X86/enum-class.ll b/test/DebugInfo/X86/enum-class.ll
index 57b2aa404cbb..fda0b4943a9d 100644
--- a/test/DebugInfo/X86/enum-class.ll
+++ b/test/DebugInfo/X86/enum-class.ll
@@ -8,7 +8,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!23}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 157269) (llvm/trunk 157264)", isOptimized: false, emissionKind: 0, file: !22, enums: !1, retainedTypes: !15, subprograms: !15, globals: !17, imports: !15)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 157269) (llvm/trunk 157264)", isOptimized: false, emissionKind: 0, file: !22, enums: !1, retainedTypes: !15, subprograms: !15, globals: !17, imports: !15)
!1 = !{!3, !8, !12}
!3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "A", line: 1, size: 32, align: 32, file: !4, baseType: !5, elements: !6)
!4 = !DIFile(filename: "foo.cpp", directory: "/Users/echristo/tmp")
diff --git a/test/DebugInfo/X86/enum-fwd-decl.ll b/test/DebugInfo/X86/enum-fwd-decl.ll
index f5deeb7d6712..ec862d10a704 100644
--- a/test/DebugInfo/X86/enum-fwd-decl.ll
+++ b/test/DebugInfo/X86/enum-fwd-decl.ll
@@ -6,7 +6,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 165274) (llvm/trunk 165272)", isOptimized: false, emissionKind: 0, file: !8, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 165274) (llvm/trunk 165272)", isOptimized: false, emissionKind: 0, file: !8, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
!1 = !{}
!3 = !{!5}
!5 = !DIGlobalVariable(name: "e", line: 2, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: i16* @e)
diff --git a/test/DebugInfo/X86/externaltyperef.ll b/test/DebugInfo/X86/externaltyperef.ll
new file mode 100644
index 000000000000..c344d5f068c3
--- /dev/null
+++ b/test/DebugInfo/X86/externaltyperef.ll
@@ -0,0 +1,51 @@
+; REQUIRES: object-emission
+; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; Manually derived by externalizing the composite types from:
+;
+; namespace N { class B; }
+; using N::B;
+; class A;
+; A *a;
+;
+; Test the direct use of an external type.
+; CHECK: DW_TAG_variable
+; CHECK: DW_AT_type [DW_FORM_ref4] {{.*}}{[[PTR:.*]]}
+; CHECK: [[PTR]]: DW_TAG_pointer_type
+; CHECK: DW_AT_type [DW_FORM_ref4] {{.*}}{[[A:.*]]}
+; CHECK: [[A]]: DW_TAG_class_type
+; CHECK: DW_AT_declaration [DW_FORM_flag] (0x01)
+; CHECK: DW_AT_signature [DW_FORM_ref_sig8] (0x4e834ea939695c24)
+; CHECK: [[B:.*]]: DW_TAG_class_type
+; CHECK: DW_AT_declaration [DW_FORM_flag] (0x01)
+; CHECK: DW_AT_signature [DW_FORM_ref_sig8] (0x942e51c7addda5f7)
+; CHECK: DW_TAG_imported_declaration
+; CHECK: DW_AT_import [DW_FORM_ref4] {{.*}}[[B]]
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+%class.A = type opaque
+
+@a = global %class.A* null, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14, !15}
+!llvm.ident = !{!16}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 242039) (llvm/trunk 242046)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, globals: !5, imports: !11)
+!1 = !DIFile(filename: "test.cpp", directory: "/")
+!2 = !{}
+!3 = !{!4, !9}
+!4 = !DICompositeType(tag: DW_TAG_class_type, name: "A", file: !1, flags: DIFlagExternalTypeRef, identifier: "_ZTS1A")
+!5 = !{!6}
+!6 = !DIGlobalVariable(name: "a", scope: !0, file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, variable: %class.A** @a)
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !"_ZTS1A", size: 64, align: 64)
+!8 = !DICompositeType(tag: DW_TAG_class_type, name: "B", file: !1, flags: DIFlagExternalTypeRef, identifier: "_ZTS1B")
+!9 = !DICompositeType(tag: DW_TAG_class_type, name: "A", file: !1, flags: DIFlagExternalTypeRef, identifier: "_ZTSN1N1BE")
+!10 = !DINamespace(name: "N", scope: null, file: !1, line: 1)
+!11 = !{!12}
+!12 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !0, entity: !"_ZTSN1N1BE", line: 4)
+!13 = !{i32 2, !"Dwarf Version", i32 2}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{i32 1, !"PIC Level", i32 2}
+!16 = !{!"clang version 3.7.0 (trunk 242039) (llvm/trunk 242046)"}
diff --git a/test/DebugInfo/X86/fission-cu.ll b/test/DebugInfo/X86/fission-cu.ll
index 493f84283e2e..d52333ce6d97 100644
--- a/test/DebugInfo/X86/fission-cu.ll
+++ b/test/DebugInfo/X86/fission-cu.ll
@@ -8,7 +8,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 169021) (llvm/trunk 169020)", isOptimized: false, splitDebugFilename: "baz.dwo", emissionKind: 0, file: !8, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 169021) (llvm/trunk 169020)", isOptimized: false, splitDebugFilename: "baz.dwo", emissionKind: 0, file: !8, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
!1 = !{}
!3 = !{!5}
!5 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: i32* @a)
diff --git a/test/DebugInfo/X86/fission-hash.ll b/test/DebugInfo/X86/fission-hash.ll
index 74764416d748..84568a92e154 100644
--- a/test/DebugInfo/X86/fission-hash.ll
+++ b/test/DebugInfo/X86/fission-hash.ll
@@ -9,7 +9,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 188230) (llvm/trunk 188234)", isOptimized: false, splitDebugFilename: "foo.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 188230) (llvm/trunk 188234)", isOptimized: false, splitDebugFilename: "foo.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo/tmp")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 3}
diff --git a/test/DebugInfo/X86/fission-inline.ll b/test/DebugInfo/X86/fission-inline.ll
index 53e782756ca8..af7dd95e5b84 100644
--- a/test/DebugInfo/X86/fission-inline.ll
+++ b/test/DebugInfo/X86/fission-inline.ll
@@ -75,7 +75,7 @@
; RELOCS-NOT: RELOCATION RECORDS FOR [.rela.debug_ranges]
; Function Attrs: uwtable
-define void @_ZN3foo2f3Ez(...) #0 align 2 {
+define void @_ZN3foo2f3Ez(...) #0 align 2 !dbg !10 {
entry:
call void @_Z2f1v(), !dbg !26
call void @_Z2f1v(), !dbg !25
@@ -92,7 +92,7 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!22, !23}
!llvm.ident = !{!24}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, splitDebugFilename: "fission-inline.dwo", emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !18)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, splitDebugFilename: "fission-inline.dwo", emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !18)
!1 = !DIFile(filename: "fission-inline.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4}
@@ -102,8 +102,8 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!7 = !DISubroutineType(types: !8)
!8 = !{null, null}
!9 = !{!10, !11}
-!10 = !DISubprogram(name: "f3", linkageName: "_ZN3foo2f3Ez", line: 15, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 15, file: !1, scope: !"_ZTS3foo", type: !7, function: void (...)* @_ZN3foo2f3Ez, declaration: !6, variables: !2)
-!11 = !DISubprogram(name: "f2<int>", linkageName: "_ZN3foo2f2IiEEvv", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !"_ZTS3foo", type: !12, templateParams: !14, declaration: !17, variables: !2)
+!10 = distinct !DISubprogram(name: "f3", linkageName: "_ZN3foo2f3Ez", line: 15, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 15, file: !1, scope: !"_ZTS3foo", type: !7, declaration: !6, variables: !2)
+!11 = distinct !DISubprogram(name: "f2<int>", linkageName: "_ZN3foo2f2IiEEvv", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !"_ZTS3foo", type: !12, templateParams: !14, declaration: !17, variables: !2)
!12 = !DISubroutineType(types: !13)
!13 = !{null}
!14 = !{!15}
diff --git a/test/DebugInfo/X86/fission-ranges.ll b/test/DebugInfo/X86/fission-ranges.ll
index e5eb25ce90c7..9c9fd7d6e6fe 100644
--- a/test/DebugInfo/X86/fission-ranges.ll
+++ b/test/DebugInfo/X86/fission-ranges.ll
@@ -29,16 +29,16 @@
; CHECK-NEXT: Location description: 11 00
; CHECK-NEXT: {{^$}}
; CHECK-NEXT: Beginning address index: 3
-; CHECK-NEXT: Length: 21
+; CHECK-NEXT: Length: 25
; CHECK-NEXT: Location description: 50 93 04
; CHECK: [[E]]: Beginning address index: 4
-; CHECK-NEXT: Length: 19
+; CHECK-NEXT: Length: 23
; CHECK-NEXT: Location description: 50 93 04
; CHECK: [[B]]: Beginning address index: 5
-; CHECK-NEXT: Length: 17
+; CHECK-NEXT: Length: 21
; CHECK-NEXT: Location description: 50 93 04
; CHECK: [[D]]: Beginning address index: 6
-; CHECK-NEXT: Length: 17
+; CHECK-NEXT: Length: 21
; CHECK-NEXT: Location description: 50 93 04
; Make sure we don't produce any relocations in any .dwo section (though in particular, debug_info.dwo)
@@ -82,14 +82,14 @@
@c = external global i32
; Function Attrs: nounwind uwtable
-define void @bar() #0 {
+define void @bar() #0 !dbg !4 {
entry:
tail call fastcc void @foo(), !dbg !27
ret void, !dbg !28
}
; Function Attrs: nounwind uwtable
-define internal fastcc void @foo() #0 {
+define internal fastcc void @foo() #0 !dbg !8 {
entry:
tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !13, metadata !DIExpression()), !dbg !30
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !14, metadata !DIExpression()), !dbg !31
@@ -153,26 +153,26 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!26, !43}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 191700) (llvm/trunk 191710)", isOptimized: true, splitDebugFilename: "small.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 191700) (llvm/trunk 191710)", isOptimized: true, splitDebugFilename: "small.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "small.c", directory: "/usr/local/google/home/echristo/tmp")
!2 = !{}
!3 = !{!4, !8}
-!4 = !DISubprogram(name: "bar", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 19, file: !1, scope: !5, type: !6, function: void ()* @bar, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 19, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "small.c", directory: "/usr/local/google/home/echristo/tmp")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
-!8 = !DISubprogram(name: "foo", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !9, function: void ()* @foo, variables: !12)
+!8 = distinct !DISubprogram(name: "foo", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !9, variables: !12)
!9 = !DISubroutineType(types: !10)
!10 = !{null, !11}
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!12 = !{!13, !14, !15, !16, !18, !19}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", line: 2, arg: 1, scope: !8, file: !5, type: !11)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 4, scope: !8, file: !5, type: !11)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 4, scope: !8, file: !5, type: !11)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 5, scope: !8, file: !5, type: !17)
+!13 = !DILocalVariable(name: "p", line: 2, arg: 1, scope: !8, file: !5, type: !11)
+!14 = !DILocalVariable(name: "a", line: 4, scope: !8, file: !5, type: !11)
+!15 = !DILocalVariable(name: "b", line: 4, scope: !8, file: !5, type: !11)
+!16 = !DILocalVariable(name: "d", line: 5, scope: !8, file: !5, type: !17)
!17 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 5, scope: !8, file: !5, type: !17)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "w", line: 12, scope: !20, file: !5, type: !25)
+!18 = !DILocalVariable(name: "e", line: 5, scope: !8, file: !5, type: !17)
+!19 = !DILocalVariable(name: "w", line: 12, scope: !20, file: !5, type: !25)
!20 = distinct !DILexicalBlock(line: 11, column: 0, file: !1, scope: !21)
!21 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !22)
!22 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !23)
diff --git a/test/DebugInfo/X86/float_const.ll b/test/DebugInfo/X86/float_const.ll
index 6e99ffcc1f21..cd94f74dcbe3 100644
--- a/test/DebugInfo/X86/float_const.ll
+++ b/test/DebugInfo/X86/float_const.ll
@@ -8,7 +8,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
; Function Attrs: nounwind optsize readnone uwtable
-define void @foo() #0 {
+define void @foo() #0 !dbg !7 {
entry:
tail call void @llvm.dbg.declare(metadata float* undef, metadata !13, metadata !19), !dbg !20
tail call void @llvm.dbg.value(metadata i32 1078523331, i64 0, metadata !13, metadata !19), !dbg !20
@@ -31,20 +31,20 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!15, !16, !17}
!llvm.ident = !{!18}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 227686)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !6, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 227686)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !6, globals: !2, imports: !2)
!1 = !DIFile(filename: "foo.c", directory: "")
!2 = !{}
!3 = !{!4}
!4 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !5)
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!6 = !{!7}
-!7 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 1, file: !8, scope: !9, type: !10, function: void ()* @foo, variables: !12)
+!7 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 1, file: !8, scope: !9, type: !10, variables: !12)
!8 = !DIFile(filename: "foo.c", directory: "")
!9 = !DIFile(filename: "foo.c", directory: "")
!10 = !DISubroutineType(types: !11)
!11 = !{null}
!12 = !{!13}
-!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 2, scope: !7, file: !9, type: !14)
+!13 = !DILocalVariable(name: "a", line: 2, scope: !7, file: !9, type: !14)
!14 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
!15 = !{i32 2, !"Dwarf Version", i32 2}
!16 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/formal_parameter.ll b/test/DebugInfo/X86/formal_parameter.ll
index e8a8b5f1803b..89be50bbd4a3 100644
--- a/test/DebugInfo/X86/formal_parameter.ll
+++ b/test/DebugInfo/X86/formal_parameter.ll
@@ -24,7 +24,7 @@ target triple = "x86_64-apple-macosx10.9.0"
; CHECK-NOT: DW_AT_name {{.*}}map
; Function Attrs: nounwind ssp uwtable
-define void @foo(i32 %map) #0 {
+define void @foo(i32 %map) #0 !dbg !4 {
entry:
%map.addr = alloca i32, align 4
store i32 %map, i32* %map.addr, align 4, !tbaa !15
@@ -59,17 +59,17 @@ attributes #3 = { nounwind }
!llvm.module.flags = !{!11, !12}
!llvm.ident = !{!13}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "formal_parameter.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, function: void (i32)* @foo, variables: !9)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !9)
!5 = !DIFile(filename: "formal_parameter.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "map", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!10 = !DILocalVariable(name: "map", line: 1, arg: 1, scope: !4, file: !5, type: !8)
!11 = !{i32 2, !"Dwarf Version", i32 2}
!12 = !{i32 1, !"Debug Info Version", i32 3}
!13 = !{!"clang version 3.5.0 "}
diff --git a/test/DebugInfo/X86/frame-register.ll b/test/DebugInfo/X86/frame-register.ll
index bcf65a1a3853..e7f48597b348 100644
--- a/test/DebugInfo/X86/frame-register.ll
+++ b/test/DebugInfo/X86/frame-register.ll
@@ -14,7 +14,7 @@ declare i32 @foo(i32 %i) #0
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !9 {
entry:
%retval = alloca i32, align 4
%i = alloca i32, align 4
@@ -33,27 +33,27 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!12, !13}
!llvm.ident = !{!14}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "x.c", directory: "")
!2 = !{}
!3 = !{!4, !9}
-!4 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "x.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "main", line: 8, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 9, file: !1, scope: !5, type: !10, function: i32 ()* @main, variables: !2)
+!9 = distinct !DISubprogram(name: "main", line: 8, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 9, file: !1, scope: !5, type: !10, variables: !2)
!10 = !DISubroutineType(types: !11)
!11 = !{!8}
!12 = !{i32 2, !"Dwarf Version", i32 4}
!13 = !{i32 2, !"Debug Info Version", i32 3}
!14 = !{!"clang version 3.7.0"}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 1, scope: !4, file: !5, type: !8)
+!15 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !4, file: !5, type: !8)
!16 = !DIExpression()
!17 = !DILocation(line: 2, column: 10, scope: !4)
!18 = !DILocation(line: 4, column: 10, scope: !4)
!19 = !DILocation(line: 4, column: 3, scope: !4)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 10, scope: !9, file: !5, type: !8)
+!20 = !DILocalVariable(name: "i", line: 10, scope: !9, file: !5, type: !8)
!21 = !DILocation(line: 10, column: 7, scope: !9)
!22 = !DILocation(line: 11, column: 15, scope: !9)
!23 = !DILocation(line: 11, column: 10, scope: !9)
diff --git a/test/DebugInfo/X86/generate-odr-hash.ll b/test/DebugInfo/X86/generate-odr-hash.ll
index 7ac9a69181f3..51f33e2730ab 100644
--- a/test/DebugInfo/X86/generate-odr-hash.ll
+++ b/test/DebugInfo/X86/generate-odr-hash.ll
@@ -180,7 +180,7 @@
@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
; Function Attrs: nounwind uwtable
-define void @_Z3foov() #0 {
+define void @_Z3foov() #0 !dbg !22 {
entry:
%b = alloca %struct.baz, align 1
call void @llvm.dbg.declare(metadata %struct.baz* %b, metadata !46, metadata !DIExpression()), !dbg !48
@@ -190,14 +190,14 @@ entry:
; Function Attrs: nounwind readnone
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-define internal void @__cxx_global_var_init() section ".text.startup" {
+define internal void @__cxx_global_var_init() section ".text.startup" !dbg !26 {
entry:
call void @_ZN12_GLOBAL__N_16walrusC2Ev(%"struct.<anonymous namespace>::walrus"* @w), !dbg !50
ret void, !dbg !50
}
; Function Attrs: nounwind uwtable
-define internal void @_ZN12_GLOBAL__N_16walrusC2Ev(%"struct.<anonymous namespace>::walrus"* %this) unnamed_addr #0 align 2 {
+define internal void @_ZN12_GLOBAL__N_16walrusC2Ev(%"struct.<anonymous namespace>::walrus"* %this) unnamed_addr #0 align 2 !dbg !27 {
entry:
%this.addr = alloca %"struct.<anonymous namespace>::walrus"*, align 8
store %"struct.<anonymous namespace>::walrus"* %this, %"struct.<anonymous namespace>::walrus"** %this.addr, align 8
@@ -206,7 +206,7 @@ entry:
ret void, !dbg !54
}
-define internal void @_GLOBAL__I_a() section ".text.startup" {
+define internal void @_GLOBAL__I_a() section ".text.startup" !dbg !36 {
entry:
call void @__cxx_global_var_init(), !dbg !55
ret void, !dbg !55
@@ -219,7 +219,7 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!43, !44}
!llvm.ident = !{!45}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, splitDebugFilename: "bar.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !21, globals: !38, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, splitDebugFilename: "bar.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !21, globals: !38, imports: !2)
!1 = !DIFile(filename: "bar.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4, !6, !14, !17}
@@ -241,12 +241,12 @@ attributes #1 = { nounwind readnone }
!19 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 33, size: 32, align: 32, file: !1, scope: !"_ZTSN6wombatUt_E", baseType: !12)
!20 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 34, size: 32, align: 32, offset: 32, file: !1, scope: !"_ZTSN6wombatUt_E", baseType: !12)
!21 = !{!22, !26, !27, !36}
-!22 = !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !23, type: !24, function: void ()* @_Z3foov, variables: !2)
+!22 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !23, type: !24, variables: !2)
!23 = !DIFile(filename: "bar.cpp", directory: "/tmp/dbginfo")
!24 = !DISubroutineType(types: !25)
!25 = !{null}
-!26 = !DISubprogram(name: "__cxx_global_var_init", line: 29, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 29, file: !1, scope: !23, type: !24, function: void ()* @__cxx_global_var_init, variables: !2)
-!27 = !DISubprogram(name: "walrus", linkageName: "_ZN12_GLOBAL__N_16walrusC2Ev", line: 25, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 25, file: !1, scope: !28, type: !32, function: void (%"struct.<anonymous namespace>::walrus"*)* @_ZN12_GLOBAL__N_16walrusC2Ev, declaration: !31, variables: !2)
+!26 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 29, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 29, file: !1, scope: !23, type: !24, variables: !2)
+!27 = distinct !DISubprogram(name: "walrus", linkageName: "_ZN12_GLOBAL__N_16walrusC2Ev", line: 25, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 25, file: !1, scope: !28, type: !32, declaration: !31, variables: !2)
!28 = !DICompositeType(tag: DW_TAG_structure_type, name: "walrus", line: 24, size: 8, align: 8, file: !1, scope: !29, elements: !30)
!29 = !DINamespace(line: 23, file: !1, scope: null)
!30 = !{!31}
@@ -254,7 +254,7 @@ attributes #1 = { nounwind readnone }
!32 = !DISubroutineType(types: !33)
!33 = !{null, !34}
!34 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !28)
-!36 = !DISubprogram(name: "", linkageName: "_GLOBAL__I_a", line: 25, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, scopeLine: 25, file: !1, scope: !23, type: !37, function: void ()* @_GLOBAL__I_a, variables: !2)
+!36 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__I_a", line: 25, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, scopeLine: 25, file: !1, scope: !23, type: !37, variables: !2)
!37 = !DISubroutineType(types: !2)
!38 = !{!39, !40, !41, !42}
!39 = !DIGlobalVariable(name: "b", line: 3, isLocal: false, isDefinition: true, scope: null, file: !23, type: !4, variable: %struct.bar* @b)
@@ -264,12 +264,12 @@ attributes #1 = { nounwind readnone }
!43 = !{i32 2, !"Dwarf Version", i32 4}
!44 = !{i32 1, !"Debug Info Version", i32 3}
!45 = !{!"clang version 3.5 "}
-!46 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 7, scope: !22, file: !23, type: !47)
+!46 = !DILocalVariable(name: "b", line: 7, scope: !22, file: !23, type: !47)
!47 = !DICompositeType(tag: DW_TAG_structure_type, name: "baz", line: 6, size: 8, align: 8, file: !1, scope: !22, elements: !2)
!48 = !DILocation(line: 7, scope: !22)
!49 = !DILocation(line: 8, scope: !22)
!50 = !DILocation(line: 29, scope: !26)
-!51 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !27, type: !52)
+!51 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !27, type: !52)
!52 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !28)
!53 = !DILocation(line: 0, scope: !27)
!54 = !DILocation(line: 25, scope: !27)
diff --git a/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll b/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
index ef249e8fab9c..20bb1b3f145f 100644
--- a/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
+++ b/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
@@ -29,7 +29,7 @@
; when run with r221709 reverted, then it really doesn't test anything anymore.
; Function Attrs: nounwind ssp uwtable
-define i32 @foo(i32 %a) #0 {
+define i32 @foo(i32 %a) #0 !dbg !8 {
entry:
call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !16, metadata !17), !dbg !18
%conv = trunc i32 %a to i16, !dbg !19
@@ -68,7 +68,7 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!13, !14}
!llvm.ident = !{!15}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !7, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !7, globals: !2, imports: !2)
!1 = !DIFile(filename: "ghost-sdnode-dbgvalues.c", directory: "/tmp")
!2 = !{}
!3 = !{!4}
@@ -76,7 +76,7 @@ attributes #1 = { nounwind readnone }
!5 = !DIFile(filename: "/usr/include/sys/_types/_int16_t.h", directory: "/tmp")
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "short", size: 16, align: 16, encoding: DW_ATE_signed)
!7 = !{!8}
-!8 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !9, type: !10, function: i32 (i32)* @foo, variables: !2)
+!8 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !9, type: !10, variables: !2)
!9 = !DIFile(filename: "ghost-sdnode-dbgvalues.c", directory: "/tmp")
!10 = !DISubroutineType(types: !11)
!11 = !{!12, !12}
@@ -84,22 +84,22 @@ attributes #1 = { nounwind readnone }
!13 = !{i32 2, !"Dwarf Version", i32 2}
!14 = !{i32 2, !"Debug Info Version", i32 3}
!15 = !{!"clang version 3.6.0 "}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !8, file: !9, type: !12)
+!16 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !8, file: !9, type: !12)
!17 = !DIExpression()
!18 = !DILocation(line: 3, column: 13, scope: !8)
!19 = !DILocation(line: 4, column: 5, scope: !8)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 4, scope: !8, file: !9, type: !12)
+!20 = !DILocalVariable(name: "b", line: 4, scope: !8, file: !9, type: !12)
!21 = !DILocation(line: 4, column: 9, scope: !8)
!22 = !DILocation(line: 5, column: 5, scope: !8)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 5, scope: !8, file: !9, type: !12)
+!23 = !DILocalVariable(name: "c", line: 5, scope: !8, file: !9, type: !12)
!24 = !DILocation(line: 5, column: 9, scope: !8)
!25 = !DILocation(line: 6, column: 5, scope: !8)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 6, scope: !8, file: !9, type: !12)
+!26 = !DILocalVariable(name: "d", line: 6, scope: !8, file: !9, type: !12)
!27 = !DILocation(line: 6, column: 9, scope: !8)
!28 = !DILocation(line: 7, column: 5, scope: !8)
-!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 7, scope: !8, file: !9, type: !12)
+!29 = !DILocalVariable(name: "e", line: 7, scope: !8, file: !9, type: !12)
!30 = !DILocation(line: 7, column: 9, scope: !8)
!31 = !DILocation(line: 8, column: 5, scope: !8)
-!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f", line: 8, scope: !8, file: !9, type: !12)
+!32 = !DILocalVariable(name: "f", line: 8, scope: !8, file: !9, type: !12)
!33 = !DILocation(line: 8, column: 9, scope: !8)
!34 = !DILocation(line: 9, column: 5, scope: !8)
diff --git a/test/DebugInfo/X86/gnu-public-names-empty.ll b/test/DebugInfo/X86/gnu-public-names-empty.ll
index d87d8ef19008..4bd33df10dce 100644
--- a/test/DebugInfo/X86/gnu-public-names-empty.ll
+++ b/test/DebugInfo/X86/gnu-public-names-empty.ll
@@ -12,7 +12,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 191846) (llvm/trunk 191866)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 191846) (llvm/trunk 191866)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo/tmp")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/X86/gnu-public-names.ll b/test/DebugInfo/X86/gnu-public-names.ll
index 2022dfaa687c..584c879c064a 100644
--- a/test/DebugInfo/X86/gnu-public-names.ll
+++ b/test/DebugInfo/X86/gnu-public-names.ll
@@ -240,7 +240,7 @@
@_ZN5outer12_GLOBAL__N_11cE = internal global i32 0, align 4
; Function Attrs: nounwind uwtable
-define void @_ZN1C15member_functionEv(%struct.C* %this) #0 align 2 {
+define void @_ZN1C15member_functionEv(%struct.C* %this) #0 align 2 !dbg !20 {
entry:
%this.addr = alloca %struct.C*, align 8
store %struct.C* %this, %struct.C** %this.addr, align 8
@@ -254,33 +254,33 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind uwtable
-define i32 @_ZN1C22static_member_functionEv() #0 align 2 {
+define i32 @_ZN1C22static_member_functionEv() #0 align 2 !dbg !21 {
entry:
%0 = load i32, i32* @_ZN1C22static_member_variableE, align 4, !dbg !57
ret i32 %0, !dbg !57
}
; Function Attrs: nounwind uwtable
-define i32 @_Z15global_functionv() #0 {
+define i32 @_Z15global_functionv() #0 !dbg !22 {
entry:
ret i32 -1, !dbg !58
}
; Function Attrs: nounwind uwtable
-define void @_ZN2ns25global_namespace_functionEv() #0 {
+define void @_ZN2ns25global_namespace_functionEv() #0 !dbg !23 {
entry:
call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !59
ret void, !dbg !60
}
; Function Attrs: nounwind uwtable
-define i32* @_Z2f3v() #0 {
+define i32* @_Z2f3v() #0 !dbg !26 {
entry:
ret i32* @_ZZ2f3vE1z, !dbg !61
}
; Function Attrs: nounwind uwtable
-define i32 @_Z2f7v() #0 {
+define i32 @_Z2f7v() #0 !dbg !30 {
entry:
%0 = load i32, i32* @_ZN12_GLOBAL__N_11iE, align 4, !dbg !62
%call = call i32* @_Z2f3v(), !dbg !62
@@ -300,7 +300,7 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!48, !49}
!llvm.ident = !{!50}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 234897) (llvm/trunk 234911)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !19, globals: !31, imports: !44)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 234897) (llvm/trunk 234911)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !19, globals: !31, imports: !44)
!1 = !DIFile(filename: "gnu-public-names.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4, !15}
@@ -320,17 +320,17 @@ attributes #1 = { nounwind readnone }
!17 = !{!18}
!18 = !DIDerivedType(tag: DW_TAG_member, name: "A", scope: !"_ZTSN2ns1DE", file: !1, line: 30, baseType: !7, size: 32, align: 32)
!19 = !{!20, !21, !22, !23, !26, !30}
-!20 = !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", scope: !"_ZTS1C", file: !1, line: 9, type: !9, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, function: void (%struct.C*)* @_ZN1C15member_functionEv, declaration: !8, variables: !2)
-!21 = !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", scope: !"_ZTS1C", file: !1, line: 13, type: !13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @_ZN1C22static_member_functionEv, declaration: !12, variables: !2)
-!22 = !DISubprogram(name: "global_function", linkageName: "_Z15global_functionv", scope: !1, file: !1, line: 19, type: !13, isLocal: false, isDefinition: true, scopeLine: 19, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @_Z15global_functionv, variables: !2)
-!23 = !DISubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", scope: !16, file: !1, line: 24, type: !24, isLocal: false, isDefinition: true, scopeLine: 24, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @_ZN2ns25global_namespace_functionEv, variables: !2)
+!20 = distinct !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", scope: !"_ZTS1C", file: !1, line: 9, type: !9, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, declaration: !8, variables: !2)
+!21 = distinct !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", scope: !"_ZTS1C", file: !1, line: 13, type: !13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, declaration: !12, variables: !2)
+!22 = distinct !DISubprogram(name: "global_function", linkageName: "_Z15global_functionv", scope: !1, file: !1, line: 19, type: !13, isLocal: false, isDefinition: true, scopeLine: 19, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!23 = distinct !DISubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", scope: !16, file: !1, line: 24, type: !24, isLocal: false, isDefinition: true, scopeLine: 24, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!24 = !DISubroutineType(types: !25)
!25 = !{null}
-!26 = !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 40, type: !27, isLocal: false, isDefinition: true, scopeLine: 40, flags: DIFlagPrototyped, isOptimized: false, function: i32* ()* @_Z2f3v, variables: !2)
+!26 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 40, type: !27, isLocal: false, isDefinition: true, scopeLine: 40, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!27 = !DISubroutineType(types: !28)
!28 = !{!29}
!29 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64, align: 64)
-!30 = !DISubprogram(name: "f7", linkageName: "_Z2f7v", scope: !1, file: !1, line: 57, type: !13, isLocal: false, isDefinition: true, scopeLine: 57, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @_Z2f7v, variables: !2)
+!30 = distinct !DISubprogram(name: "f7", linkageName: "_Z2f7v", scope: !1, file: !1, line: 57, type: !13, isLocal: false, isDefinition: true, scopeLine: 57, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!31 = !{!32, !33, !34, !35, !36, !37, !39, !41}
!32 = !DIGlobalVariable(name: "static_member_variable", linkageName: "_ZN1C22static_member_variableE", scope: !0, file: !1, line: 7, type: !7, isLocal: false, isDefinition: true, variable: i32* @_ZN1C22static_member_variableE, declaration: !6)
!33 = !DIGlobalVariable(name: "global_variable", scope: !0, file: !1, line: 17, type: !"_ZTS1C", isLocal: false, isDefinition: true, variable: %struct.C* @global_variable)
@@ -351,7 +351,7 @@ attributes #1 = { nounwind readnone }
!48 = !{i32 2, !"Dwarf Version", i32 4}
!49 = !{i32 2, !"Debug Info Version", i32 3}
!50 = !{!"clang version 3.7.0 (trunk 234897) (llvm/trunk 234911)"}
-!51 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, scope: !20, type: !52, flags: DIFlagArtificial | DIFlagObjectPointer)
+!51 = !DILocalVariable(name: "this", arg: 1, scope: !20, type: !52, flags: DIFlagArtificial | DIFlagObjectPointer)
!52 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !"_ZTS1C", size: 64, align: 64)
!53 = !DIExpression()
!54 = !DILocation(line: 0, scope: !20)
diff --git a/test/DebugInfo/X86/header.ll b/test/DebugInfo/X86/header.ll
index 8575c4a672a2..1b210d0b90a2 100644
--- a/test/DebugInfo/X86/header.ll
+++ b/test/DebugInfo/X86/header.ll
@@ -11,17 +11,17 @@
; CHECK: .section .debug_str
-define void @f() {
+define void @f() !dbg !4 {
ret void, !dbg !9
}
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "foo", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "foo", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "/foo/test.c", directory: "/foo")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null}
!7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/X86/inline-member-function.ll b/test/DebugInfo/X86/inline-member-function.ll
index 73cd1947f4f4..712a20c3a4a8 100644
--- a/test/DebugInfo/X86/inline-member-function.ll
+++ b/test/DebugInfo/X86/inline-member-function.ll
@@ -37,7 +37,7 @@
@i = global i32 0, align 4
; Function Attrs: uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !13 {
entry:
%this.addr.i = alloca %struct.foo*, align 8
%x.addr.i = alloca i32, align 4
@@ -65,7 +65,7 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!20, !21}
!llvm.ident = !{!22}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !18, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !18, imports: !2)
!1 = !DIFile(filename: "inline.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4}
@@ -77,19 +77,19 @@ attributes #1 = { nounwind readnone }
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS3foo")
!12 = !{!13, !17}
-!13 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !14, type: !15, function: i32 ()* @main, variables: !2)
+!13 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !14, type: !15, variables: !2)
!14 = !DIFile(filename: "inline.cpp", directory: "/tmp/dbginfo")
!15 = !DISubroutineType(types: !16)
!16 = !{!9}
-!17 = !DISubprogram(name: "func", linkageName: "_ZN3foo4funcEi", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !"_ZTS3foo", type: !7, declaration: !6, variables: !2)
+!17 = distinct !DISubprogram(name: "func", linkageName: "_ZN3foo4funcEi", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !"_ZTS3foo", type: !7, declaration: !6, variables: !2)
!18 = !{!19}
!19 = !DIGlobalVariable(name: "i", line: 5, isLocal: false, isDefinition: true, scope: null, file: !14, type: !9, variable: i32* @i)
!20 = !{i32 2, !"Dwarf Version", i32 4}
!21 = !{i32 1, !"Debug Info Version", i32 3}
!22 = !{!"clang version 3.5.0 "}
!23 = !DILocation(line: 8, scope: !13)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !25)
+!24 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !25)
!25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS3foo")
!26 = !DILocation(line: 0, scope: !17, inlinedAt: !23)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 2, arg: 2, scope: !17, file: !14, type: !9)
+!27 = !DILocalVariable(name: "x", line: 2, arg: 2, scope: !17, file: !14, type: !9)
!28 = !DILocation(line: 2, scope: !17, inlinedAt: !23)
diff --git a/test/DebugInfo/X86/inline-seldag-test.ll b/test/DebugInfo/X86/inline-seldag-test.ll
index c8e75cf3fcb3..62b946611f4f 100644
--- a/test/DebugInfo/X86/inline-seldag-test.ll
+++ b/test/DebugInfo/X86/inline-seldag-test.ll
@@ -23,7 +23,7 @@
; ASM: testl
; Function Attrs: nounwind uwtable
-define void @func() #0 {
+define void @func() #0 !dbg !4 {
entry:
%y.addr.i = alloca i32, align 4
%x = alloca i32, align 4
@@ -48,26 +48,26 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!12, !13}
!llvm.ident = !{!14}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "inline-seldag-test.c", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4, !8}
-!4 = !DISubprogram(name: "func", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: void ()* @func, variables: !2)
+!4 = distinct !DISubprogram(name: "func", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "inline-seldag-test.c", directory: "/tmp/dbginfo")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
-!8 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !9, variables: !2)
+!8 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !9, variables: !2)
!9 = !DISubroutineType(types: !10)
!10 = !{!11, !11}
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!12 = !{i32 2, !"Dwarf Version", i32 4}
!13 = !{i32 1, !"Debug Info Version", i32 3}
!14 = !{!"clang version 3.5.0 "}
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 5, scope: !4, file: !5, type: !16)
+!15 = !DILocalVariable(name: "x", line: 5, scope: !4, file: !5, type: !16)
!16 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !11)
!17 = !DILocation(line: 5, scope: !4)
!18 = !DILocation(line: 6, column: 7, scope: !4)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 1, arg: 1, scope: !8, file: !5, type: !11)
+!19 = !DILocalVariable(name: "y", line: 1, arg: 1, scope: !8, file: !5, type: !11)
!20 = !DILocation(line: 1, scope: !8, inlinedAt: !18)
!21 = !DILocation(line: 2, scope: !8, inlinedAt: !18)
!22 = !DILocation(line: 7, scope: !4)
diff --git a/test/DebugInfo/X86/inlined-formal-parameter.ll b/test/DebugInfo/X86/inlined-formal-parameter.ll
index 3cc4e5a65a66..49c1747d7d6d 100644
--- a/test/DebugInfo/X86/inlined-formal-parameter.ll
+++ b/test/DebugInfo/X86/inlined-formal-parameter.ll
@@ -25,7 +25,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin"
; Function Attrs: nounwind ssp uwtable
-define void @foo() #0 {
+define void @foo() #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !12, metadata !17) #3, !dbg !18
tail call void @sink() #3, !dbg !20
@@ -48,19 +48,19 @@ attributes #3 = { nounwind }
!llvm.module.flags = !{!13, !14, !15}
!llvm.ident = !{!16}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 235110) (llvm/trunk 235108)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 235110) (llvm/trunk 235108)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "t.c", directory: "/path/to/dir")
!2 = !{}
!3 = !{!4, !7}
-!4 = !DISubprogram(name: "foo", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null}
-!7 = !DISubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !8, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !8, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
!8 = !DISubroutineType(types: !9)
!9 = !{null, !10}
!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!11 = !{!12}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", arg: 1, scope: !7, file: !1, line: 2, type: !10)
+!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 2, type: !10)
!13 = !{i32 2, !"Dwarf Version", i32 2}
!14 = !{i32 2, !"Debug Info Version", i32 3}
!15 = !{i32 1, !"PIC Level", i32 2}
diff --git a/test/DebugInfo/X86/inlined-indirect-value.ll b/test/DebugInfo/X86/inlined-indirect-value.ll
index 7f95691fbb8f..d203650d05c5 100644
--- a/test/DebugInfo/X86/inlined-indirect-value.ll
+++ b/test/DebugInfo/X86/inlined-indirect-value.ll
@@ -22,7 +22,7 @@ target triple = "x86_64-unknown-linux-gnu"
@x = common global i32 0, align 4
@y = common global i32 0, align 4
-define i32 @main() {
+define i32 @main() !dbg !4 {
; CHECK: .loc 1 {{[89]}}
; CHECK-NOT: .loc
; CHECK: movl $1
@@ -49,15 +49,15 @@ select.end: ; preds = %entry, %select.mid
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!13, !14}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
!1 = !DIFile(filename: "inline-break.c", directory: "/build/dir")
!2 = !{}
!3 = !{!4, !8}
-!4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, isOptimized: true, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, isOptimized: true, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !DISubprogram(name: "f1", scope: !1, file: !1, line: 3, type: !5, isLocal: true, isDefinition: true, scopeLine: 3, isOptimized: true, variables: !2)
+!8 = distinct !DISubprogram(name: "f1", scope: !1, file: !1, line: 3, type: !5, isLocal: true, isDefinition: true, scopeLine: 3, isOptimized: true, variables: !2)
!9 = !{!10, !12}
!10 = !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 1, type: !11, isLocal: false, isDefinition: true, variable: i32* @x)
!11 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !7)
diff --git a/test/DebugInfo/X86/instcombine-instrinsics.ll b/test/DebugInfo/X86/instcombine-instrinsics.ll
index 8241e6498f96..f4f7e1403d32 100644
--- a/test/DebugInfo/X86/instcombine-instrinsics.ll
+++ b/test/DebugInfo/X86/instcombine-instrinsics.ll
@@ -28,7 +28,7 @@ target triple = "x86_64-apple-macosx10.9.0"
%struct.i14 = type { i64 }
; Function Attrs: nounwind ssp uwtable
-define void @init() #0 {
+define void @init() #0 !dbg !4 {
%p = alloca %struct.i14*, align 8
call void @llvm.dbg.declare(metadata %struct.i14** %p, metadata !11, metadata !DIExpression()), !dbg !18
store %struct.i14* null, %struct.i14** %p, align 8, !dbg !18
@@ -54,18 +54,18 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "instcombine_intrinsics.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "init", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, function: void ()* @init, variables: !2)
+!4 = distinct !DISubprogram(name: "init", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "instcombine_intrinsics.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
!8 = !{i32 2, !"Dwarf Version", i32 2}
!9 = !{i32 1, !"Debug Info Version", i32 3}
!10 = !{!"clang version 3.5.0 "}
-!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "p", line: 8, scope: !4, file: !5, type: !12)
+!11 = !DILocalVariable(name: "p", line: 8, scope: !4, file: !5, type: !12)
!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !13)
!13 = !DIDerivedType(tag: DW_TAG_typedef, name: "i14", line: 3, file: !1, baseType: !14)
!14 = !DICompositeType(tag: DW_TAG_structure_type, line: 1, size: 64, align: 64, file: !1, elements: !15)
diff --git a/test/DebugInfo/X86/lexical_block.ll b/test/DebugInfo/X86/lexical_block.ll
index 2619f12f69ed..5fdfcdfe269b 100644
--- a/test/DebugInfo/X86/lexical_block.ll
+++ b/test/DebugInfo/X86/lexical_block.ll
@@ -22,7 +22,7 @@
; }
; Function Attrs: nounwind uwtable
-define void @_Z1bv() #0 {
+define void @_Z1bv() #0 !dbg !4 {
entry:
%i = alloca i32, align 4
call void @llvm.dbg.declare(metadata i32* %i, metadata !11, metadata !DIExpression()), !dbg !14
@@ -48,18 +48,18 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "lexical_block.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "b", linkageName: "_Z1bv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @_Z1bv, variables: !2)
+!4 = distinct !DISubprogram(name: "b", linkageName: "_Z1bv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "lexical_block.cpp", directory: "/tmp/dbginfo")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
!8 = !{i32 2, !"Dwarf Version", i32 4}
!9 = !{i32 1, !"Debug Info Version", i32 3}
!10 = !{!"clang version 3.5.0 "}
-!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2, scope: !12, file: !5, type: !13)
+!11 = !DILocalVariable(name: "i", line: 2, scope: !12, file: !5, type: !13)
!12 = distinct !DILexicalBlock(line: 2, column: 0, file: !1, scope: !4)
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!14 = !DILocation(line: 2, scope: !12)
diff --git a/test/DebugInfo/X86/line-info.ll b/test/DebugInfo/X86/line-info.ll
index a3d957f8bc48..bdd91db9eecb 100644
--- a/test/DebugInfo/X86/line-info.ll
+++ b/test/DebugInfo/X86/line-info.ll
@@ -14,7 +14,7 @@
; int main() {
; }
-define i32 @foo(i32 %x) #0 {
+define i32 @foo(i32 %x) #0 !dbg !4 {
entry:
%x.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
@@ -27,7 +27,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !10 {
entry:
ret i32 0, !dbg !17
}
@@ -38,21 +38,21 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!19}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "list0.c", directory: "/usr/local/google/home/blaikie/dev/scratch")
!2 = !{}
!3 = !{!4, !10}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !5, scope: !6, type: !7, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !5, scope: !6, type: !7, variables: !2)
!5 = !DIFile(filename: "./list0.h", directory: "/usr/local/google/home/blaikie/dev/scratch")
!6 = !DIFile(filename: "./list0.h", directory: "/usr/local/google/home/blaikie/dev/scratch")
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !11, type: !12, function: i32 ()* @main, variables: !2)
+!10 = distinct !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !11, type: !12, variables: !2)
!11 = !DIFile(filename: "list0.c", directory: "/usr/local/google/home/blaikie/dev/scratch")
!12 = !DISubroutineType(types: !13)
!13 = !{!9}
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 1, scope: !4, file: !6, type: !9)
+!14 = !DILocalVariable(name: "x", line: 1, arg: 1, scope: !4, file: !6, type: !9)
!15 = !DILocation(line: 1, scope: !4)
!16 = !DILocation(line: 2, scope: !4)
!17 = !DILocation(line: 3, scope: !18)
diff --git a/test/DebugInfo/X86/linkage-name.ll b/test/DebugInfo/X86/linkage-name.ll
index 330a7f9e53a1..4408d5132c46 100644
--- a/test/DebugInfo/X86/linkage-name.ll
+++ b/test/DebugInfo/X86/linkage-name.ll
@@ -9,7 +9,7 @@
@a = global %class.A zeroinitializer, align 1
-define i32 @_ZN1A1aEi(%class.A* %this, i32 %b) nounwind uwtable ssp align 2 {
+define i32 @_ZN1A1aEi(%class.A* %this, i32 %b) nounwind uwtable ssp align 2 !dbg !5 {
entry:
%this.addr = alloca %class.A*, align 8
%b.addr = alloca i32, align 4
@@ -27,10 +27,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!29}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 152691) (llvm/trunk 152692)", isOptimized: false, emissionKind: 0, file: !28, enums: !1, retainedTypes: !1, subprograms: !3, globals: !18, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 152691) (llvm/trunk 152692)", isOptimized: false, emissionKind: 0, file: !28, enums: !1, retainedTypes: !1, subprograms: !3, globals: !18, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "a", linkageName: "_ZN1A1aEi", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !6, scope: null, type: !7, function: i32 (%class.A*, i32)* @_ZN1A1aEi, declaration: !13)
+!5 = distinct !DISubprogram(name: "a", linkageName: "_ZN1A1aEi", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !6, scope: null, type: !7, declaration: !13)
!6 = !DIFile(filename: "foo.cpp", directory: "/Users/echristo")
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !10, !9}
@@ -41,10 +41,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!13 = !DISubprogram(name: "a", linkageName: "_ZN1A1aEi", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrivate | DIFlagPrototyped, isOptimized: false, file: !6, scope: !11, type: !7)
!18 = !{!20}
!20 = !DIGlobalVariable(name: "a", line: 9, isLocal: false, isDefinition: true, scope: null, file: !6, type: !11, variable: %class.A* @a)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 5, arg: 1, flags: DIFlagArtificial, scope: !5, file: !6, type: !22)
+!21 = !DILocalVariable(name: "this", line: 5, arg: 1, flags: DIFlagArtificial, scope: !5, file: !6, type: !22)
!22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !11)
!23 = !DILocation(line: 5, column: 8, scope: !5)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 2, scope: !5, file: !6, type: !9)
+!24 = !DILocalVariable(name: "b", line: 5, arg: 2, scope: !5, file: !6, type: !9)
!25 = !DILocation(line: 5, column: 14, scope: !5)
!26 = !DILocation(line: 6, column: 4, scope: !27)
!27 = distinct !DILexicalBlock(line: 5, column: 17, file: !6, scope: !5)
diff --git a/test/DebugInfo/X86/live-debug-values.ll b/test/DebugInfo/X86/live-debug-values.ll
new file mode 100644
index 000000000000..615d498041bb
--- /dev/null
+++ b/test/DebugInfo/X86/live-debug-values.ll
@@ -0,0 +1,152 @@
+; RUN: llc -filetype=asm %s -o - | FileCheck %s
+
+; Test the extension of debug ranges from predecessors.
+; Generated from the source file LiveDebugValues.c:
+; #include <stdio.h>
+; int m;
+; extern int inc(int n);
+; extern int change(int n);
+; extern int modify(int n);
+; int main(int argc, char **argv) {
+; int n;
+; if (argc != 2)
+; n = 2;
+; else
+; n = atoi(argv[1]);
+; n = change(n);
+; if (n > 10) {
+; m = modify(n);
+; m = m + n; // var `m' doesn't has a dbg.value
+; }
+; else
+; m = inc(n); // var `m' doesn't has a dbg.value
+; printf("m(main): %d\n", m);
+; return 0;
+; }
+; with clang -g -O3 -emit-llvm -c LiveDebugValues.c -S -o live-debug-values.ll
+; This case will also produce multiple locations but only the debug range
+; extension is tested here.
+
+; DBG_VALUE for variable "n" is extended into BB#5 from its predecessors BB#3
+; and BB#4.
+; CHECK: .LBB0_5:
+; CHECK-NEXT: #DEBUG_VALUE: main:n <- %EBX
+; CHECK-NEXT: #DEBUG_VALUE: main:argv <- %RSI
+
+; ModuleID = 'LiveDebugValues.c'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@m = common global i32 0, align 4
+@.str = private unnamed_addr constant [13 x i8] c"m(main): %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 !dbg !4 {
+entry:
+ tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !12, metadata !20), !dbg !21
+ tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !13, metadata !20), !dbg !22
+ %cmp = icmp eq i32 %argc, 2, !dbg !24
+ br i1 %cmp, label %if.else, label %if.end, !dbg !26
+
+if.else: ; preds = %entry
+ %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1, !dbg !27
+ %0 = load i8*, i8** %arrayidx, align 8, !dbg !27, !tbaa !28
+ %call = tail call i32 (i8*, ...) bitcast (i32 (...)* @atoi to i32 (i8*, ...)*)(i8* %0) #4, !dbg !32
+ tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !14, metadata !20), !dbg !33
+ br label %if.end
+
+if.end: ; preds = %entry, %if.else
+ %n.0 = phi i32 [ %call, %if.else ], [ 2, %entry ]
+ %call1 = tail call i32 @change(i32 %n.0) #4, !dbg !34
+ tail call void @llvm.dbg.value(metadata i32 %call1, i64 0, metadata !14, metadata !20), !dbg !33
+ %cmp2 = icmp sgt i32 %call1, 10, !dbg !35
+ br i1 %cmp2, label %if.then.3, label %if.else.5, !dbg !37
+
+if.then.3: ; preds = %if.end
+ %call4 = tail call i32 @modify(i32 %call1) #4, !dbg !38
+ %add = add nsw i32 %call4, %call1, !dbg !40
+ br label %if.end.7, !dbg !41
+
+if.else.5: ; preds = %if.end
+ %call6 = tail call i32 @inc(i32 %call1) #4, !dbg !42
+ br label %if.end.7
+
+if.end.7: ; preds = %if.else.5, %if.then.3
+ %storemerge = phi i32 [ %call6, %if.else.5 ], [ %add, %if.then.3 ]
+ store i32 %storemerge, i32* @m, align 4, !dbg !43, !tbaa !44
+ %call8 = tail call i32 (i8*, ...) @printf(i8* nonnull getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i32 %storemerge) #4, !dbg !46
+ ret i32 0, !dbg !47
+}
+
+declare i32 @atoi(...) #1
+
+declare i32 @change(i32) #1
+
+declare i32 @modify(i32) #1
+
+declare i32 @inc(i32) #1
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) #2
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 253049) ", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !15)
+!1 = !DIFile(filename: "LiveDebugValues.c", directory: "/home/vt/julia/test/tvvikram")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7, !8}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 64)
+!10 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!11 = !{!12, !13, !14}
+!12 = !DILocalVariable(name: "argc", arg: 1, scope: !4, file: !1, line: 6, type: !7)
+!13 = !DILocalVariable(name: "argv", arg: 2, scope: !4, file: !1, line: 6, type: !8)
+!14 = !DILocalVariable(name: "n", scope: !4, file: !1, line: 7, type: !7)
+!15 = !{!16}
+!16 = !DIGlobalVariable(name: "m", scope: !0, file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, variable: i32* @m)
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 2, !"Debug Info Version", i32 3}
+!19 = !{!"clang version 3.8.0 (trunk 253049) "}
+!20 = !DIExpression()
+!21 = !DILocation(line: 6, column: 14, scope: !4)
+!22 = !DILocation(line: 6, column: 27, scope: !23)
+!23 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 1)
+!24 = !DILocation(line: 8, column: 12, scope: !25)
+!25 = distinct !DILexicalBlock(scope: !4, file: !1, line: 8, column: 7)
+!26 = !DILocation(line: 8, column: 7, scope: !4)
+!27 = !DILocation(line: 11, column: 14, scope: !25)
+!28 = !{!29, !29, i64 0}
+!29 = !{!"any pointer", !30, i64 0}
+!30 = !{!"omnipotent char", !31, i64 0}
+!31 = !{!"Simple C/C++ TBAA"}
+!32 = !DILocation(line: 11, column: 9, scope: !25)
+!33 = !DILocation(line: 7, column: 7, scope: !23)
+!34 = !DILocation(line: 12, column: 7, scope: !4)
+!35 = !DILocation(line: 13, column: 9, scope: !36)
+!36 = distinct !DILexicalBlock(scope: !4, file: !1, line: 13, column: 7)
+!37 = !DILocation(line: 13, column: 7, scope: !4)
+!38 = !DILocation(line: 14, column: 9, scope: !39)
+!39 = distinct !DILexicalBlock(scope: !36, file: !1, line: 13, column: 15)
+!40 = !DILocation(line: 15, column: 11, scope: !39)
+!41 = !DILocation(line: 16, column: 3, scope: !39)
+!42 = !DILocation(line: 18, column: 9, scope: !36)
+!43 = !DILocation(line: 15, column: 7, scope: !39)
+!44 = !{!45, !45, i64 0}
+!45 = !{!"int", !30, i64 0}
+!46 = !DILocation(line: 19, column: 3, scope: !4)
+!47 = !DILocation(line: 20, column: 3, scope: !4)
diff --git a/test/DebugInfo/X86/low-pc-cu.ll b/test/DebugInfo/X86/low-pc-cu.ll
index 727a7bb3dec1..f98da6f5ae28 100644
--- a/test/DebugInfo/X86/low-pc-cu.ll
+++ b/test/DebugInfo/X86/low-pc-cu.ll
@@ -21,7 +21,7 @@
; CHECK-V4: DW_AT_high_pc [DW_FORM_data4]
; Function Attrs: nounwind uwtable
-define void @z() #0 {
+define void @z() #0 !dbg !4 {
entry:
ret void, !dbg !11
}
@@ -32,11 +32,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "z.c", directory: "/usr/local/google/home/echristo")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "z", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @z, variables: !2)
+!4 = distinct !DISubprogram(name: "z", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "z.c", directory: "/usr/local/google/home/echristo")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
diff --git a/test/DebugInfo/X86/memberfnptr.ll b/test/DebugInfo/X86/memberfnptr.ll
index a53264d2c856..9b1c57dd7442 100644
--- a/test/DebugInfo/X86/memberfnptr.ll
+++ b/test/DebugInfo/X86/memberfnptr.ll
@@ -24,7 +24,7 @@ declare void @_ZN1A3fooEv(%struct.A*)
!llvm.module.flags = !{!14, !15, !16}
!llvm.ident = !{!17}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !10, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !10, imports: !2)
!1 = !DIFile(filename: "memberfnptr.cpp", directory: "")
!2 = !{}
!3 = !{!4}
diff --git a/test/DebugInfo/X86/mi-print.ll b/test/DebugInfo/X86/mi-print.ll
index b5d7b0ef067f..086e88deffb0 100644
--- a/test/DebugInfo/X86/mi-print.ll
+++ b/test/DebugInfo/X86/mi-print.ll
@@ -14,7 +14,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
; Function Attrs: nounwind readnone ssp uwtable
-define i32 @bar(i32 %x) #0 {
+define i32 @bar(i32 %x) #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !9, metadata !17), !dbg !18
tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !19, metadata !17), !dbg !21
@@ -31,26 +31,26 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!13, !14, !15}
!llvm.ident = !{!16}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 233919) (llvm/trunk 233920)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 233919) (llvm/trunk 233920)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "t.c", directory: "/Users/dexonsmith/data/llvm/debug-info/test/DebugInfo/X86")
!2 = !{}
!3 = !{!4, !10}
-!4 = !DISubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, function: i32 (i32)* @bar, variables: !8)
+!4 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
!5 = !DISubroutineType(types: !6)
!6 = !{!7, !7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !{!9}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: !4, file: !1, line: 2, type: !7)
-!10 = !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: true, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+!9 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !1, line: 2, type: !7)
+!10 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: true, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
!11 = !{!12}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: !10, file: !1, line: 1, type: !7)
+!12 = !DILocalVariable(name: "x", arg: 1, scope: !10, file: !1, line: 1, type: !7)
!13 = !{i32 2, !"Dwarf Version", i32 2}
!14 = !{i32 2, !"Debug Info Version", i32 3}
!15 = !{i32 1, !"PIC Level", i32 2}
!16 = !{!"clang version 3.7.0 (trunk 233919) (llvm/trunk 233920)"}
!17 = !DIExpression()
!18 = !DILocation(line: 2, column: 13, scope: !4)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: !10, file: !1, line: 1, type: !7)
+!19 = !DILocalVariable(name: "x", arg: 1, scope: !10, file: !1, line: 1, type: !7)
!20 = distinct !DILocation(line: 2, column: 25, scope: !4)
!21 = !DILocation(line: 1, column: 20, scope: !10, inlinedAt: !20)
!22 = !DILocation(line: 2, column: 18, scope: !4)
diff --git a/test/DebugInfo/X86/misched-dbg-value.ll b/test/DebugInfo/X86/misched-dbg-value.ll
index c660e1f72a9f..e85e241c589f 100644
--- a/test/DebugInfo/X86/misched-dbg-value.ll
+++ b/test/DebugInfo/X86/misched-dbg-value.ll
@@ -46,7 +46,7 @@
@PtrGlb = common global %struct.Record* null, align 8
@PtrGlbNext = common global %struct.Record* null, align 8
-define void @Proc8(i32* nocapture %Array1Par, [51 x i32]* nocapture %Array2Par, i32 %IntParI1, i32 %IntParI2) nounwind optsize {
+define void @Proc8(i32* nocapture %Array1Par, [51 x i32]* nocapture %Array2Par, i32 %IntParI1, i32 %IntParI2) nounwind optsize !dbg !12 {
entry:
tail call void @llvm.dbg.value(metadata i32* %Array1Par, i64 0, metadata !23, metadata !DIExpression()), !dbg !64
tail call void @llvm.dbg.value(metadata [51 x i32]* %Array2Par, i64 0, metadata !24, metadata !DIExpression()), !dbg !65
@@ -103,7 +103,7 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!83}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 175015)", isOptimized: true, emissionKind: 1, file: !82, enums: !1, retainedTypes: !10, subprograms: !11, globals: !29, imports: !10)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 175015)", isOptimized: true, emissionKind: 1, file: !82, enums: !1, retainedTypes: !10, subprograms: !11, globals: !29, imports: !10)
!1 = !{!2}
!2 = !DICompositeType(tag: DW_TAG_enumeration_type, line: 128, size: 32, align: 32, file: !82, elements: !4)
!3 = !DIFile(filename: "dry.c", directory: "/Users/manmanren/test-Nov/rdar_13183203/test2")
@@ -115,7 +115,7 @@ attributes #1 = { nounwind readnone }
!9 = !DIEnumerator(name: "Ident5", value: 10003) ; [ DW_TAG_enumerator ] [Ident5 :: 10003]
!10 = !{}
!11 = !{!12}
-!12 = !DISubprogram(name: "Proc8", line: 180, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 185, file: !82, scope: !3, type: !13, function: void (i32*, [51 x i32]*, i32, i32)* @Proc8, variables: !22)
+!12 = distinct !DISubprogram(name: "Proc8", line: 180, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 185, file: !82, scope: !3, type: !13, variables: !22)
!13 = !DISubroutineType(types: !14)
!14 = !{null, !15, !17, !21, !21}
!15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !16)
@@ -126,12 +126,12 @@ attributes #1 = { nounwind readnone }
!20 = !DISubrange(count: 51)
!21 = !DIDerivedType(tag: DW_TAG_typedef, name: "OneToFifty", line: 132, file: !82, baseType: !16)
!22 = !{!23, !24, !25, !26, !27, !28}
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "Array1Par", line: 181, arg: 1, scope: !12, file: !3, type: !15)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "Array2Par", line: 182, arg: 2, scope: !12, file: !3, type: !17)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "IntParI1", line: 183, arg: 3, scope: !12, file: !3, type: !21)
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "IntParI2", line: 184, arg: 4, scope: !12, file: !3, type: !21)
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "IntLoc", line: 186, scope: !12, file: !3, type: !21)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "IntIndex", line: 187, scope: !12, file: !3, type: !21)
+!23 = !DILocalVariable(name: "Array1Par", line: 181, arg: 1, scope: !12, file: !3, type: !15)
+!24 = !DILocalVariable(name: "Array2Par", line: 182, arg: 2, scope: !12, file: !3, type: !17)
+!25 = !DILocalVariable(name: "IntParI1", line: 183, arg: 3, scope: !12, file: !3, type: !21)
+!26 = !DILocalVariable(name: "IntParI2", line: 184, arg: 4, scope: !12, file: !3, type: !21)
+!27 = !DILocalVariable(name: "IntLoc", line: 186, scope: !12, file: !3, type: !21)
+!28 = !DILocalVariable(name: "IntIndex", line: 187, scope: !12, file: !3, type: !21)
!29 = !{!30, !35, !36, !38, !39, !40, !42, !46, !63}
!30 = !DIGlobalVariable(name: "Version", line: 111, isLocal: false, isDefinition: true, scope: null, file: !3, type: !31, variable: [4 x i8]* @Version)
!31 = !DICompositeType(tag: DW_TAG_array_type, size: 32, align: 8, baseType: !32, elements: !33)
diff --git a/test/DebugInfo/X86/missing-file-line.ll b/test/DebugInfo/X86/missing-file-line.ll
index 455868cb1736..19064579af36 100644
--- a/test/DebugInfo/X86/missing-file-line.ll
+++ b/test/DebugInfo/X86/missing-file-line.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-unknown-linux-gnu"
%struct.S = type { %struct.S* }
-define void @f() {
+define void @f() !dbg !4 {
%x = alloca %struct.S, align 8
; CHECK: DW_TAG_typedef
; CHECK-NOT: DW_AT_decl_file
@@ -36,17 +36,17 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "file.c", directory: "/dir")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "f", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, isOptimized: false, function: void ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, isOptimized: false, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null}
!7 = !{i32 2, !"Dwarf Version", i32 4}
!8 = !{i32 2, !"Debug Info Version", i32 3}
!9 = !{!"clang"}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", scope: !4, file: !1, line: 8, type: !11)
+!10 = !DILocalVariable(name: "x", scope: !4, file: !1, line: 8, type: !11)
!11 = !DIDerivedType(tag: DW_TAG_typedef, name: "SS", baseType: !12)
!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", size: 64, align: 64, elements: !13)
!13 = !{!14}
diff --git a/test/DebugInfo/X86/multiple-aranges.ll b/test/DebugInfo/X86/multiple-aranges.ll
index 571454bd8d02..17a475d59345 100644
--- a/test/DebugInfo/X86/multiple-aranges.ll
+++ b/test/DebugInfo/X86/multiple-aranges.ll
@@ -44,14 +44,14 @@ target triple = "x86_64-unknown-linux-gnu"
!llvm.dbg.cu = !{!0, !7}
!llvm.module.flags = !{!12, !13}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
!1 = !DIFile(filename: "test1.c", directory: "/home/kayamon")
!2 = !{}
!3 = !{!4}
!4 = !DIGlobalVariable(name: "kittens", line: 1, isLocal: false, isDefinition: true, scope: null, file: !5, type: !6, variable: i32* @kittens)
!5 = !DIFile(filename: "test1.c", directory: "/home/kayamon")
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!7 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !8, enums: !2, retainedTypes: !2, subprograms: !2, globals: !9, imports: !2)
+!7 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !8, enums: !2, retainedTypes: !2, subprograms: !2, globals: !9, imports: !2)
!8 = !DIFile(filename: "test2.c", directory: "/home/kayamon")
!9 = !{!10}
!10 = !DIGlobalVariable(name: "rainbows", line: 1, isLocal: false, isDefinition: true, scope: null, file: !11, type: !6, variable: i32* @rainbows)
diff --git a/test/DebugInfo/X86/multiple-at-const-val.ll b/test/DebugInfo/X86/multiple-at-const-val.ll
index 01e3af20d2d4..97db71952905 100644
--- a/test/DebugInfo/X86/multiple-at-const-val.ll
+++ b/test/DebugInfo/X86/multiple-at-const-val.ll
@@ -19,7 +19,7 @@
@_ZSt4cout = external global %"class.std::basic_ostream"
@.str = private unnamed_addr constant [6 x i8] c"c is \00", align 1
-define i32 @main() {
+define i32 @main() !dbg !960 {
entry:
%call1.i = tail call %"class.std::basic_ostream"* @test(%"class.std::basic_ostream"* @_ZSt4cout, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i64 5)
ret i32 0
@@ -32,7 +32,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!1803}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 174207)", isOptimized: true, emissionKind: 0, file: !1802, enums: !1, retainedTypes: !955, subprograms: !956, globals: !1786, imports: !955)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 174207)", isOptimized: true, emissionKind: 0, file: !1802, enums: !1, retainedTypes: !955, subprograms: !956, globals: !1786, imports: !955)
!1 = !{!26}
!4 = !DINamespace(name: "std", line: 48, scope: !5)
!5 = !DIFile(filename: "os_base.h", directory: "/privite/tmp")
@@ -54,7 +54,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!79 = !DIDerivedType(tag: DW_TAG_typedef, name: "ostate", line: 327, file: !1801, scope: !49, baseType: !26)
!955 = !{}
!956 = !{!960}
-!960 = !DISubprogram(name: "main", line: 73, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 73, file: !1802, scope: null, type: !54, function: i32 ()* @main, variables: !955)
+!960 = distinct !DISubprogram(name: "main", line: 73, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 73, file: !1802, scope: null, type: !54, variables: !955)
!961 = !DIFile(filename: "student2.cpp", directory: "/privite/tmp")
!1786 = !{!1800}
!1800 = !DIGlobalVariable(name: "badbit", linkageName: "badbit", line: 331, isLocal: true, isDefinition: true, scope: !5, file: !5, type: !78, variable: i32 1, declaration: !77)
diff --git a/test/DebugInfo/X86/nodebug_with_debug_loc.ll b/test/DebugInfo/X86/nodebug_with_debug_loc.ll
index 8153b3352d4e..4ccf22bdc5aa 100644
--- a/test/DebugInfo/X86/nodebug_with_debug_loc.ll
+++ b/test/DebugInfo/X86/nodebug_with_debug_loc.ll
@@ -97,7 +97,7 @@ attributes #3 = { nounwind }
!llvm.module.flags = !{!23, !24}
!llvm.ident = !{!25}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !10, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !10, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4}
@@ -108,23 +108,23 @@ attributes #3 = { nounwind }
!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !9)
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
!10 = !{!11, !17}
-!11 = !DISubprogram(name: "f", linkageName: "_Z1fv", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !5, scope: !12, type: !13, variables: !15)
+!11 = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !5, scope: !12, type: !13, variables: !15)
!12 = !DIFile(filename: "repro.cpp", directory: "/tmp/dbginfo")
!13 = !DISubroutineType(types: !14)
!14 = !{null}
!15 = !{!16}
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "str2", line: 15, scope: !11, file: !12, type: !"_ZTS6string")
-!17 = !DISubprogram(name: "s2", linkageName: "_Z2s2P6string", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !12, type: !18, variables: !21)
+!16 = !DILocalVariable(name: "str2", line: 15, scope: !11, file: !12, type: !"_ZTS6string")
+!17 = distinct !DISubprogram(name: "s2", linkageName: "_Z2s2P6string", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !12, type: !18, variables: !21)
!18 = !DISubroutineType(types: !19)
!19 = !{null, !20}
!20 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !"_ZTS6string")
!21 = !{!22}
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "lhs", line: 13, arg: 1, scope: !17, file: !12, type: !20)
+!22 = !DILocalVariable(name: "lhs", line: 13, arg: 1, scope: !17, file: !12, type: !20)
!23 = !{i32 2, !"Dwarf Version", i32 4}
!24 = !{i32 2, !"Debug Info Version", i32 3}
!25 = !{!"clang version 3.5.0 "}
!26 = !DILocation(line: 15, scope: !11)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "lhs", line: 13, arg: 1, scope: !17, file: !12, type: !20)
+!27 = !DILocalVariable(name: "lhs", line: 13, arg: 1, scope: !17, file: !12, type: !20)
!28 = !DILocation(line: 16, scope: !11)
!29 = !DILocation(line: 13, scope: !17, inlinedAt: !28)
!30 = !DILocation(line: 17, scope: !11)
diff --git a/test/DebugInfo/X86/nondefault-subrange-array.ll b/test/DebugInfo/X86/nondefault-subrange-array.ll
index 20406cfa0f16..fc82fa560a40 100644
--- a/test/DebugInfo/X86/nondefault-subrange-array.ll
+++ b/test/DebugInfo/X86/nondefault-subrange-array.ll
@@ -30,7 +30,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!21}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169136)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169136)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
!1 = !{}
!3 = !{!5}
!5 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: %class.A* @a)
diff --git a/test/DebugInfo/X86/nophysreg.ll b/test/DebugInfo/X86/nophysreg.ll
index f4478efb67f8..ddf014b43b77 100644
--- a/test/DebugInfo/X86/nophysreg.ll
+++ b/test/DebugInfo/X86/nophysreg.ll
@@ -53,7 +53,7 @@ target triple = "x86_64-apple-macosx10.10.0"
%struct.A = type { i32*, i32 }
; Function Attrs: alwaysinline ssp uwtable
-define void @_Z2f21A(i32* %p5.coerce0, i32 %p5.coerce1) #0 {
+define void @_Z2f21A(i32* %p5.coerce0, i32 %p5.coerce1) #0 !dbg !11 {
entry:
tail call void @llvm.dbg.value(metadata i32* %p5.coerce0, i64 0, metadata !16, metadata !33), !dbg !34
tail call void @llvm.dbg.value(metadata i32 %p5.coerce1, i64 0, metadata !16, metadata !35), !dbg !34
@@ -68,7 +68,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare void @_Z2f1Pii(i32*, i32) #2
; Function Attrs: ssp uwtable
-define void @_Z1fv() #3 {
+define void @_Z1fv() #3 !dbg !17 {
entry:
%x = alloca i32, align 4
%ref.tmp = alloca i32, align 4
@@ -136,7 +136,7 @@ attributes #3 = { ssp uwtable }
!llvm.module.flags = !{!29, !30, !31}
!llvm.ident = !{!32}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227088) (llvm/trunk 227091)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !10, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227088) (llvm/trunk 227091)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !10, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.cpp", directory: "")
!2 = !{}
!3 = !{!4}
@@ -147,24 +147,24 @@ attributes #3 = { ssp uwtable }
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !DIDerivedType(tag: DW_TAG_member, name: "m2", line: 3, size: 32, align: 32, offset: 64, file: !1, scope: !"_ZTS1A", baseType: !8)
!10 = !{!11, !17}
-!11 = !DISubprogram(name: "f2", linkageName: "_Z2f21A", line: 7, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !1, scope: !12, type: !13, function: void (i32*, i32)* @_Z2f21A, variables: !15)
+!11 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f21A", line: 7, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !1, scope: !12, type: !13, variables: !15)
!12 = !DIFile(filename: "test.cpp", directory: "")
!13 = !DISubroutineType(types: !14)
!14 = !{null, !"_ZTS1A"}
!15 = !{!16}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p5", line: 7, arg: 1, scope: !11, file: !12, type: !"_ZTS1A")
-!17 = !DISubprogram(name: "f", linkageName: "_Z1fv", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !1, scope: !12, type: !18, function: void ()* @_Z1fv, variables: !20)
+!16 = !DILocalVariable(name: "p5", line: 7, arg: 1, scope: !11, file: !12, type: !"_ZTS1A")
+!17 = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !1, scope: !12, type: !18, variables: !20)
!18 = !DISubroutineType(types: !19)
!19 = !{null}
!20 = !{!21, !23, !26, !27, !28}
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 14, scope: !22, file: !12, type: !8)
+!21 = !DILocalVariable(name: "x", line: 14, scope: !22, file: !12, type: !8)
!22 = distinct !DILexicalBlock(line: 13, column: 18, file: !1, scope: !17)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 17, scope: !24, file: !12, type: !25)
+!23 = !DILocalVariable(name: "y", line: 17, scope: !24, file: !12, type: !25)
!24 = distinct !DILexicalBlock(line: 16, column: 20, file: !1, scope: !22)
!25 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 19, scope: !24, file: !12, type: !25)
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "I", line: 21, scope: !24, file: !12, type: !25)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "g", line: 24, scope: !24, file: !12, type: !"_ZTS1A")
+!26 = !DILocalVariable(name: "j", line: 19, scope: !24, file: !12, type: !25)
+!27 = !DILocalVariable(name: "I", line: 21, scope: !24, file: !12, type: !25)
+!28 = !DILocalVariable(name: "g", line: 24, scope: !24, file: !12, type: !"_ZTS1A")
!29 = !{i32 2, !"Dwarf Version", i32 2}
!30 = !{i32 2, !"Debug Info Version", i32 3}
!31 = !{i32 1, !"PIC Level", i32 2}
@@ -196,7 +196,7 @@ attributes #3 = { ssp uwtable }
!57 = !DILocation(line: 23, column: 15, scope: !24)
!58 = !DILocation(line: 23, column: 7, scope: !24)
!59 = !DILocation(line: 24, column: 9, scope: !24)
-!60 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p5", line: 7, arg: 1, scope: !11, file: !12, type: !"_ZTS1A")
+!60 = !DILocalVariable(name: "p5", line: 7, arg: 1, scope: !11, file: !12, type: !"_ZTS1A")
!61 = distinct !DILocation(line: 26, column: 7, scope: !24)
!62 = !DILocation(line: 7, column: 42, scope: !11, inlinedAt: !61)
!63 = !DILocation(line: 7, column: 48, scope: !11, inlinedAt: !61)
diff --git a/test/DebugInfo/X86/objc-fwd-decl.ll b/test/DebugInfo/X86/objc-fwd-decl.ll
index 5b0aca5f92f5..30024dc414b5 100644
--- a/test/DebugInfo/X86/objc-fwd-decl.ll
+++ b/test/DebugInfo/X86/objc-fwd-decl.ll
@@ -12,7 +12,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10, !11, !12, !14}
-!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.1 (trunk 152054 trunk 152094)", isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !13, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.1 (trunk 152054 trunk 152094)", isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !13, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
!1 = !{}
!3 = !{!5}
!5 = !DIGlobalVariable(name: "a", line: 3, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: %0** @a)
diff --git a/test/DebugInfo/X86/objc-property-void.ll b/test/DebugInfo/X86/objc-property-void.ll
index 0737fe2d2cb4..190d5fe00e7f 100644
--- a/test/DebugInfo/X86/objc-property-void.ll
+++ b/test/DebugInfo/X86/objc-property-void.ll
@@ -51,7 +51,7 @@ target triple = "x86_64-apple-macosx10.9.0"
@llvm.used = appending global [8 x i8*] [i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([8 x i8], [8 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_Foo" to i8*), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_PROP_NAME_ATTR_", i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @"\01L_OBJC_PROP_NAME_ATTR_1", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._prop_t] }* @"\01l_OBJC_$_PROP_LIST_Foo" to i8*), i8* bitcast ([1 x i8*]* @"\01L_OBJC_LABEL_CLASS_$" to i8*)], section "llvm.metadata"
; Function Attrs: ssp uwtable
-define internal void @"\01-[Foo foo]"(%0* %self, i8* %_cmd) #0 {
+define internal void @"\01-[Foo foo]"(%0* %self, i8* %_cmd) #0 !dbg !10 {
entry:
%self.addr = alloca %0*, align 8
%_cmd.addr = alloca i8*, align 8
@@ -72,7 +72,7 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!17, !18, !19, !20, !21, !22}
!llvm.ident = !{!23}
-!0 = !DICompileUnit(language: DW_LANG_ObjC, isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !2)
!1 = !DIFile(filename: "-", directory: "")
!2 = !{}
!3 = !{!4}
@@ -82,7 +82,7 @@ attributes #1 = { nounwind readnone }
!7 = !{!8}
!8 = !DIObjCProperty(name: "foo", line: 2, attributes: 2117, file: !6)
!9 = !{!10}
-!10 = !DISubprogram(name: "-[Foo foo]", line: 5, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !6, type: !11, function: void (%0*, i8*)* @"\01-[Foo foo]", variables: !2)
+!10 = distinct !DISubprogram(name: "-[Foo foo]", line: 5, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !6, type: !11, variables: !2)
!11 = !DISubroutineType(types: !12)
!12 = !{null, !13, !14}
!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !4)
@@ -96,9 +96,9 @@ attributes #1 = { nounwind readnone }
!21 = !{i32 2, !"Dwarf Version", i32 2}
!22 = !{i32 1, !"Debug Info Version", i32 3}
!23 = !{!""}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "self", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !10, type: !25)
+!24 = !DILocalVariable(name: "self", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !10, type: !25)
!25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !4)
!26 = !DILocation(line: 0, scope: !10)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "_cmd", arg: 2, flags: DIFlagArtificial, scope: !10, type: !28)
+!27 = !DILocalVariable(name: "_cmd", arg: 2, flags: DIFlagArtificial, scope: !10, type: !28)
!28 = !DIDerivedType(tag: DW_TAG_typedef, name: "SEL", line: 5, file: !5, baseType: !15)
!29 = !DILocation(line: 5, scope: !10)
diff --git a/test/DebugInfo/X86/op_deref.ll b/test/DebugInfo/X86/op_deref.ll
index 86f629e9c152..535c7390a54c 100644
--- a/test/DebugInfo/X86/op_deref.ll
+++ b/test/DebugInfo/X86/op_deref.ll
@@ -20,13 +20,13 @@
; right now, so we check the asm output:
; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK
; vla should have a register-indirect address at one point.
-; ASM-CHECK: DEBUG_VALUE: vla <- RCX
+; ASM-CHECK: DEBUG_VALUE: vla <- [%RCX+0]
; ASM-CHECK: DW_OP_breg2
; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT
-; PRETTY-PRINT: DIExpression(DW_OP_deref, DW_OP_deref)
+; PRETTY-PRINT: DIExpression(DW_OP_deref)
-define void @testVLAwithSize(i32 %s) nounwind uwtable ssp {
+define void @testVLAwithSize(i32 %s) nounwind uwtable ssp !dbg !5 {
entry:
%s.addr = alloca i32, align 4
%saved_stack = alloca i8*
@@ -80,24 +80,24 @@ declare void @llvm.stackrestore(i8*) nounwind
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!29}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 156005) (llvm/trunk 156000)", isOptimized: false, emissionKind: 1, file: !28, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 156005) (llvm/trunk 156000)", isOptimized: false, emissionKind: 1, file: !28, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "testVLAwithSize", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !28, scope: !6, type: !7, function: void (i32)* @testVLAwithSize, variables: !1)
+!5 = distinct !DISubprogram(name: "testVLAwithSize", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !28, scope: !6, type: !7, variables: !1)
!6 = !DIFile(filename: "bar.c", directory: "/Users/echristo/tmp")
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 1, arg: 1, scope: !5, file: !6, type: !9)
+!10 = !DILocalVariable(name: "s", line: 1, arg: 1, scope: !5, file: !6, type: !9)
!11 = !DILocation(line: 1, column: 26, scope: !5)
!12 = !DILocation(line: 3, column: 13, scope: !13)
!13 = distinct !DILexicalBlock(line: 2, column: 1, file: !28, scope: !5)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vla", line: 3, scope: !13, file: !6, type: !15)
+!14 = !DILocalVariable(name: "vla", line: 3, scope: !13, file: !6, type: !15)
!15 = !DICompositeType(tag: DW_TAG_array_type, align: 32, baseType: !9, elements: !16)
!16 = !{!17}
!17 = !DISubrange(count: -1)
!18 = !DILocation(line: 3, column: 7, scope: !13)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 4, scope: !13, file: !6, type: !9)
+!19 = !DILocalVariable(name: "i", line: 4, scope: !13, file: !6, type: !9)
!20 = !DILocation(line: 4, column: 7, scope: !13)
!21 = !DILocation(line: 5, column: 8, scope: !22)
!22 = distinct !DILexicalBlock(line: 5, column: 3, file: !28, scope: !13)
@@ -108,4 +108,4 @@ declare void @llvm.stackrestore(i8*) nounwind
!27 = !DILocation(line: 8, column: 1, scope: !13)
!28 = !DIFile(filename: "bar.c", directory: "/Users/echristo/tmp")
!29 = !{i32 1, !"Debug Info Version", i32 3}
-!30 = !DIExpression(DW_OP_deref, DW_OP_deref)
+!30 = !DIExpression(DW_OP_deref)
diff --git a/test/DebugInfo/X86/parameters.ll b/test/DebugInfo/X86/parameters.ll
index 044f51e6e6b9..1d71efc0719d 100644
--- a/test/DebugInfo/X86/parameters.ll
+++ b/test/DebugInfo/X86/parameters.ll
@@ -40,7 +40,7 @@
%"struct.pr14763::foo" = type { i8 }
; Function Attrs: uwtable
-define void @_ZN7pr147634funcENS_3fooE(%"struct.pr14763::foo"* noalias sret %agg.result, %"struct.pr14763::foo"* %f) #0 {
+define void @_ZN7pr147634funcENS_3fooE(%"struct.pr14763::foo"* noalias sret %agg.result, %"struct.pr14763::foo"* %f) #0 !dbg !4 {
entry:
call void @llvm.dbg.declare(metadata %"struct.pr14763::foo"* %f, metadata !22, metadata !DIExpression(DW_OP_deref)), !dbg !24
call void @_ZN7pr147633fooC1ERKS0_(%"struct.pr14763::foo"* %agg.result, %"struct.pr14763::foo"* %f), !dbg !25
@@ -53,7 +53,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare void @_ZN7pr147633fooC1ERKS0_(%"struct.pr14763::foo"*, %"struct.pr14763::foo"*) #2
; Function Attrs: uwtable
-define void @_ZN7pr147635func2EbNS_3fooE(i1 zeroext %b, %"struct.pr14763::foo"* %g) #0 {
+define void @_ZN7pr147635func2EbNS_3fooE(i1 zeroext %b, %"struct.pr14763::foo"* %g) #0 !dbg !17 {
entry:
%b.addr = alloca i8, align 1
%frombool = zext i1 %b to i8
@@ -82,11 +82,11 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!21, !33}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "pass.cpp", directory: "/tmp")
!2 = !{}
!3 = !{!4, !17}
-!4 = !DISubprogram(name: "func", linkageName: "_ZN7pr147634funcENS_3fooE", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, function: void (%"struct.pr14763::foo"*, %"struct.pr14763::foo"*)* @_ZN7pr147634funcENS_3fooE, variables: !2)
+!4 = distinct !DISubprogram(name: "func", linkageName: "_ZN7pr147634funcENS_3fooE", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DINamespace(name: "pr14763", line: 1, file: !1, scope: null)
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
@@ -98,18 +98,18 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !8)
!14 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !15)
!15 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !8)
-!17 = !DISubprogram(name: "func2", linkageName: "_ZN7pr147635func2EbNS_3fooE", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 12, file: !1, scope: !5, type: !18, function: void (i1, %"struct.pr14763::foo"*)* @_ZN7pr147635func2EbNS_3fooE, variables: !2)
+!17 = distinct !DISubprogram(name: "func2", linkageName: "_ZN7pr147635func2EbNS_3fooE", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 12, file: !1, scope: !5, type: !18, variables: !2)
!18 = !DISubroutineType(types: !19)
!19 = !{null, !20, !8}
!20 = !DIBasicType(tag: DW_TAG_base_type, name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
!21 = !{i32 2, !"Dwarf Version", i32 3}
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "f", line: 6, arg: 1, scope: !4, file: !23, type: !8)
+!22 = !DILocalVariable(name: "f", line: 6, arg: 1, scope: !4, file: !23, type: !8)
!23 = !DIFile(filename: "pass.cpp", directory: "/tmp")
!24 = !DILocation(line: 6, scope: !4)
!25 = !DILocation(line: 7, scope: !4)
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 12, arg: 1, scope: !17, file: !23, type: !20)
+!26 = !DILocalVariable(name: "b", line: 12, arg: 1, scope: !17, file: !23, type: !20)
!27 = !DILocation(line: 12, scope: !17)
-!28 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "g", line: 12, arg: 2, scope: !17, file: !23, type: !8)
+!28 = !DILocalVariable(name: "g", line: 12, arg: 2, scope: !17, file: !23, type: !8)
!29 = !DILocation(line: 13, scope: !30)
!30 = distinct !DILexicalBlock(line: 13, column: 0, file: !1, scope: !17)
!31 = !DILocation(line: 14, scope: !30)
diff --git a/test/DebugInfo/X86/pieces-1.ll b/test/DebugInfo/X86/pieces-1.ll
index f96e045f8285..9d4f5265b3c2 100644
--- a/test/DebugInfo/X86/pieces-1.ll
+++ b/test/DebugInfo/X86/pieces-1.ll
@@ -30,7 +30,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
; Function Attrs: nounwind ssp uwtable
-define i32 @foo(i64 %s.coerce0, i32 %s.coerce1) #0 {
+define i32 @foo(i64 %s.coerce0, i32 %s.coerce1) #0 !dbg !4 {
entry:
call void @llvm.dbg.value(metadata i64 %s.coerce0, i64 0, metadata !20, metadata !24), !dbg !21
call void @llvm.dbg.value(metadata i32 %s.coerce1, i64 0, metadata !22, metadata !27), !dbg !21
@@ -50,11 +50,11 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!17, !18}
!llvm.ident = !{!19}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "pieces.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 (i64, i32)* @foo, variables: !15)
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !15)
!5 = !DIFile(filename: "pieces.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !9}
@@ -66,13 +66,13 @@ attributes #1 = { nounwind readnone }
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
!14 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 1, size: 32, align: 32, offset: 64, file: !1, scope: !10, baseType: !8)
!15 = !{!16}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!16 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
!17 = !{i32 2, !"Dwarf Version", i32 4}
!18 = !{i32 1, !"Debug Info Version", i32 3}
!19 = !{!"clang version 3.5 "}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!20 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
!21 = !DILocation(line: 3, scope: !4)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!22 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
!23 = !DILocation(line: 4, scope: !4)
!24 = !DIExpression(DW_OP_bit_piece, 0, 64)
!25 = !{}
diff --git a/test/DebugInfo/X86/pieces-2.ll b/test/DebugInfo/X86/pieces-2.ll
index 7c39d323c3e3..8a43f452d32b 100644
--- a/test/DebugInfo/X86/pieces-2.ll
+++ b/test/DebugInfo/X86/pieces-2.ll
@@ -29,7 +29,7 @@ target triple = "x86_64-apple-macosx10.9.0"
%struct.Inner = type { i32, i64 }
; Function Attrs: nounwind ssp uwtable
-define i32 @foo(%struct.Outer* byval align 8 %outer) #0 {
+define i32 @foo(%struct.Outer* byval align 8 %outer) #0 !dbg !4 {
entry:
call void @llvm.dbg.declare(metadata %struct.Outer* %outer, metadata !25, metadata !DIExpression()), !dbg !26
%i1.sroa.0.0..sroa_idx = getelementptr inbounds %struct.Outer, %struct.Outer* %outer, i64 0, i32 0, i64 1, i32 0, !dbg !27
@@ -57,11 +57,11 @@ attributes #2 = { nounwind }
!llvm.module.flags = !{!22, !23}
!llvm.ident = !{!24}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "sroasplit-1.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: i32 (%struct.Outer*)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "sroasplit-1.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !9}
@@ -82,10 +82,10 @@ attributes #2 = { nounwind }
!22 = !{i32 2, !"Dwarf Version", i32 2}
!23 = !{i32 1, !"Debug Info Version", i32 3}
!24 = !{!"clang version 3.5.0 "}
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!25 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
!26 = !DILocation(line: 10, scope: !4)
!27 = !DILocation(line: 11, scope: !4)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i1", line: 11, scope: !4, file: !5, type: !14)
+!28 = !DILocalVariable(name: "i1", line: 11, scope: !4, file: !5, type: !14)
!29 = !DIExpression(DW_OP_bit_piece, 0, 32)
!31 = !{i32 3, i32 0, i32 12}
!32 = !DILocation(line: 12, scope: !4)
diff --git a/test/DebugInfo/X86/pieces-3.ll b/test/DebugInfo/X86/pieces-3.ll
index 97ff9aadfcfc..7a93e393b25f 100644
--- a/test/DebugInfo/X86/pieces-3.ll
+++ b/test/DebugInfo/X86/pieces-3.ll
@@ -26,7 +26,7 @@
; CHECK: .debug_loc
; CHECK: [[LOC]]:
; CHECK: Beginning address offset: 0x0000000000000000
-; CHECK: Ending address offset: 0x0000000000000004
+; CHECK: Ending address offset: 0x0000000000000008
; rdi, piece 0x00000008, piece 0x00000004, rsi, piece 0x00000004
; CHECK: Location description: 55 93 08 93 04 54 93 04
;
@@ -35,7 +35,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
; Function Attrs: nounwind ssp uwtable
-define i32 @foo(i64 %outer.coerce0, i64 %outer.coerce1) #0 {
+define i32 @foo(i64 %outer.coerce0, i64 %outer.coerce1) #0 !dbg !4 {
call void @llvm.dbg.value(metadata i64 %outer.coerce0, i64 0, metadata !24, metadata !25), !dbg !26
call void @llvm.dbg.declare(metadata !{null}, metadata !27, metadata !28), !dbg !26
call void @llvm.dbg.value(metadata i64 %outer.coerce1, i64 0, metadata !29, metadata !30), !dbg !26
@@ -67,11 +67,11 @@ attributes #2 = { nounwind }
!llvm.module.flags = !{!21, !22}
!llvm.ident = !{!23}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "sroasplit-2.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: i32 (i64, i64)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "sroasplit-2.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !9}
@@ -91,16 +91,16 @@ attributes #2 = { nounwind }
!21 = !{i32 2, !"Dwarf Version", i32 2}
!22 = !{i32 1, !"Debug Info Version", i32 3}
!23 = !{!"clang version 3.5.0 "}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!24 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
!25 = !DIExpression(DW_OP_bit_piece, 0, 64)
!26 = !DILocation(line: 10, scope: !4)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!27 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
!28 = !DIExpression(DW_OP_bit_piece, 64, 64)
-!29 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!29 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
!30 = !DIExpression(DW_OP_bit_piece, 96, 32)
-!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!31 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
!32 = !DIExpression(DW_OP_bit_piece, 64, 32)
!33 = !DILocation(line: 11, scope: !4)
-!34 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i1", line: 11, scope: !4, file: !5, type: !14)
+!34 = !DILocalVariable(name: "i1", line: 11, scope: !4, file: !5, type: !14)
!35 = !DIExpression(DW_OP_bit_piece, 0, 32)
!36 = !DILocation(line: 12, scope: !4)
diff --git a/test/DebugInfo/X86/pointer-type-size.ll b/test/DebugInfo/X86/pointer-type-size.ll
index f5abdfe89251..cc43c7604c08 100644
--- a/test/DebugInfo/X86/pointer-type-size.ll
+++ b/test/DebugInfo/X86/pointer-type-size.ll
@@ -11,7 +11,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!14}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 147882)", isOptimized: false, emissionKind: 0, file: !13, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 147882)", isOptimized: false, emissionKind: 0, file: !13, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
!1 = !{}
!3 = !{!5}
!5 = !DIGlobalVariable(name: "crass", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: %struct.crass* @crass)
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
index b68609dd7184..c6124687b2c4 100644
--- a/test/DebugInfo/X86/pr11300.ll
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -14,7 +14,7 @@
%struct.foo = type { i8 }
-define void @_Z3zedP3foo(%struct.foo* %x) uwtable {
+define void @_Z3zedP3foo(%struct.foo* %x) uwtable !dbg !5 {
entry:
%x.addr = alloca %struct.foo*, align 8
store %struct.foo* %x, %struct.foo** %x.addr, align 8
@@ -26,7 +26,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-define linkonce_odr void @_ZN3foo3barEv(%struct.foo* %this) nounwind uwtable align 2 {
+define linkonce_odr void @_ZN3foo3barEv(%struct.foo* %this) nounwind uwtable align 2 !dbg !20 {
entry:
%this.addr = alloca %struct.foo*, align 8
store %struct.foo* %this, %struct.foo** %this.addr, align 8
@@ -38,10 +38,10 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!33}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 0, file: !32, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 0, file: !32, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5, !20}
-!5 = !DISubprogram(name: "zed", linkageName: "_Z3zedP3foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !6, scope: !6, type: !7, function: void (%struct.foo*)* @_Z3zedP3foo)
+!5 = distinct !DISubprogram(name: "zed", linkageName: "_Z3zedP3foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !6, scope: !6, type: !7)
!6 = !DIFile(filename: "/home/espindola/llvm/test.cc", directory: "/home/espindola/tmpfs/build")
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9}
@@ -52,13 +52,13 @@ entry:
!13 = !DISubroutineType(types: !14)
!14 = !{null, !15}
!15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !10)
-!20 = !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !6, scope: null, type: !13, function: void (%struct.foo*)* @_ZN3foo3barEv, declaration: !12)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 4, arg: 1, scope: !5, file: !6, type: !9)
+!20 = distinct !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !6, scope: null, type: !13, declaration: !12)
+!23 = !DILocalVariable(name: "x", line: 4, arg: 1, scope: !5, file: !6, type: !9)
!24 = !DILocation(line: 4, column: 15, scope: !5)
!25 = !DILocation(line: 4, column: 20, scope: !26)
!26 = distinct !DILexicalBlock(line: 4, column: 18, file: !6, scope: !5)
!27 = !DILocation(line: 4, column: 30, scope: !26)
-!28 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 2, arg: 1, flags: DIFlagArtificial, scope: !20, file: !6, type: !15)
+!28 = !DILocalVariable(name: "this", line: 2, arg: 1, flags: DIFlagArtificial, scope: !20, file: !6, type: !15)
!29 = !DILocation(line: 2, column: 8, scope: !20)
!30 = !DILocation(line: 2, column: 15, scope: !31)
!31 = distinct !DILexicalBlock(line: 2, column: 14, file: !6, scope: !20)
diff --git a/test/DebugInfo/X86/pr12831.ll b/test/DebugInfo/X86/pr12831.ll
index 98b9ce488603..89f599838977 100644
--- a/test/DebugInfo/X86/pr12831.ll
+++ b/test/DebugInfo/X86/pr12831.ll
@@ -9,10 +9,10 @@ target triple = "x86_64-unknown-linux-gnu"
%class.anon = type { i8 }
%class.anon.0 = type { i8 }
-@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_" = internal alias void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_"
-@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_0EET_" = internal alias void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_"
+@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_" = internal alias void (%class.function*), void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_"
+@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_0EET_" = internal alias void (%class.function*), void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_"
-define void @_ZN17BPLFunctionWriter9writeExprEv(%class.BPLFunctionWriter* %this) nounwind uwtable align 2 {
+define void @_ZN17BPLFunctionWriter9writeExprEv(%class.BPLFunctionWriter* %this) nounwind uwtable align 2 !dbg !5 {
entry:
%this.addr = alloca %class.BPLFunctionWriter*, align 8
%agg.tmp = alloca %class.function, align 1
@@ -78,10 +78,10 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!162}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 ", isOptimized: false, emissionKind: 0, file: !161, enums: !1, retainedTypes: !1, subprograms: !3, globals: !128)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 ", isOptimized: false, emissionKind: 0, file: !161, enums: !1, retainedTypes: !1, subprograms: !3, globals: !128)
!1 = !{}
!3 = !{!5, !106, !107, !126, !127}
-!5 = !DISubprogram(name: "writeExpr", linkageName: "_ZN17BPLFunctionWriter9writeExprEv", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !6, scope: null, type: !7, function: void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, declaration: !103, variables: !1)
+!5 = distinct !DISubprogram(name: "writeExpr", linkageName: "_ZN17BPLFunctionWriter9writeExprEv", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !6, scope: null, type: !7, declaration: !103, variables: !1)
!6 = !DIFile(filename: "BPLFunctionWriter2.ii", directory: "/home/peter/crashdelta")
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9}
@@ -156,8 +156,8 @@ entry:
!99 = !DISubroutineType(types: !100)
!100 = !{null}
!103 = !DISubprogram(name: "writeExpr", linkageName: "_ZN17BPLFunctionWriter9writeExprEv", line: 17, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrivate | DIFlagPrototyped, isOptimized: false, scopeLine: 17, file: !6, scope: !10, type: !7)
-!106 = !DISubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", linkageName: "_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: null, type: !59, function: void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", templateParams: !82, declaration: !58, variables: !1)
-!107 = !DISubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !108, function: void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", templateParams: !111, declaration: !113, variables: !1)
+!106 = distinct !DISubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", linkageName: "_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: null, type: !59, templateParams: !82, declaration: !58, variables: !1)
+!107 = distinct !DISubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !108, templateParams: !111, declaration: !113, variables: !1)
!108 = !DISubroutineType(types: !109)
!109 = !{null, !110}
!110 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !61)
@@ -172,32 +172,32 @@ entry:
!119 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !26)
!120 = !{!121}
!121 = !DITemplateTypeParameter(name: "_Tp", type: !26)
-!126 = !DISubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", linkageName: "_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: null, type: !23, function: void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", templateParams: !47, declaration: !22, variables: !1)
-!127 = !DISubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !117, function: void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", templateParams: !120, declaration: !116, variables: !1)
+!126 = distinct !DISubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", linkageName: "_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: null, type: !23, templateParams: !47, declaration: !22, variables: !1)
+!127 = distinct !DISubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !117, templateParams: !120, declaration: !116, variables: !1)
!128 = !{!130}
!130 = !DIGlobalVariable(name: "__stored_locally", linkageName: "__stored_locally", line: 2, isLocal: true, isDefinition: true, scope: !114, file: !6, type: !131, variable: i1 1)
!131 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !132)
!132 = !DIBasicType(tag: DW_TAG_base_type, name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
-!133 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 19, arg: 1, flags: DIFlagArtificial, scope: !5, file: !6, type: !134)
+!133 = !DILocalVariable(name: "this", line: 19, arg: 1, flags: DIFlagArtificial, scope: !5, file: !6, type: !134)
!134 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
!135 = !DILocation(line: 19, column: 39, scope: !5)
!136 = !DILocation(line: 20, column: 17, scope: !137)
!137 = distinct !DILexicalBlock(line: 19, column: 51, file: !6, scope: !5)
!138 = !DILocation(line: 23, column: 17, scope: !137)
!139 = !DILocation(line: 26, column: 15, scope: !137)
-!140 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 8, arg: 1, flags: DIFlagArtificial, scope: !106, file: !6, type: !141)
+!140 = !DILocalVariable(name: "this", line: 8, arg: 1, flags: DIFlagArtificial, scope: !106, file: !6, type: !141)
!141 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !20)
!142 = !DILocation(line: 8, column: 45, scope: !106)
-!143 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "__f", line: 8, arg: 2, scope: !106, file: !6, type: !61)
+!143 = !DILocalVariable(name: "__f", line: 8, arg: 2, scope: !106, file: !6, type: !61)
!144 = !DILocation(line: 8, column: 63, scope: !106)
!145 = !DILocation(line: 9, column: 9, scope: !146)
!146 = distinct !DILexicalBlock(line: 8, column: 81, file: !6, scope: !106)
!147 = !DILocation(line: 10, column: 13, scope: !146)
!148 = !DILocation(line: 4, column: 5, scope: !149)
!149 = distinct !DILexicalBlock(line: 3, column: 105, file: !6, scope: !107)
-!150 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 8, arg: 1, flags: DIFlagArtificial, scope: !126, file: !6, type: !141)
+!150 = !DILocalVariable(name: "this", line: 8, arg: 1, flags: DIFlagArtificial, scope: !126, file: !6, type: !141)
!151 = !DILocation(line: 8, column: 45, scope: !126)
-!152 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "__f", line: 8, arg: 2, scope: !126, file: !6, type: !26)
+!152 = !DILocalVariable(name: "__f", line: 8, arg: 2, scope: !126, file: !6, type: !26)
!153 = !DILocation(line: 8, column: 63, scope: !126)
!154 = !DILocation(line: 9, column: 9, scope: !155)
!155 = distinct !DILexicalBlock(line: 8, column: 81, file: !6, scope: !126)
diff --git a/test/DebugInfo/X86/pr13303.ll b/test/DebugInfo/X86/pr13303.ll
index df31a7994469..a369b4259583 100644
--- a/test/DebugInfo/X86/pr13303.ll
+++ b/test/DebugInfo/X86/pr13303.ll
@@ -5,7 +5,7 @@
; Check that the prologue ends with is_stmt here.
; CHECK: 0x0000000000000000 {{.*}} is_stmt
-define i32 @main() nounwind uwtable {
+define i32 @main() nounwind uwtable !dbg !5 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
@@ -15,10 +15,10 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!13}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 160143)", isOptimized: false, emissionKind: 0, file: !12, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 160143)", isOptimized: false, emissionKind: 0, file: !12, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !12, scope: !6, type: !7, function: i32 ()* @main, variables: !1)
+!5 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !12, scope: !6, type: !7, variables: !1)
!6 = !DIFile(filename: "PR13303.c", directory: "/home/probinson")
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
diff --git a/test/DebugInfo/X86/pr19307.ll b/test/DebugInfo/X86/pr19307.ll
index e04131fe322c..39a800704b19 100644
--- a/test/DebugInfo/X86/pr19307.ll
+++ b/test/DebugInfo/X86/pr19307.ll
@@ -35,7 +35,7 @@ target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [7 x i8] c"items=\00", align 1
; Function Attrs: uwtable
-define void @_Z11parse_rangeRyS_Ss(i64* %offset, i64* %limit, %"class.std::basic_string"* %range) #0 {
+define void @_Z11parse_rangeRyS_Ss(i64* %offset, i64* %limit, %"class.std::basic_string"* %range) #0 !dbg !13 {
entry:
%offset.addr = alloca i64*, align 8
%limit.addr = alloca i64*, align 8
@@ -84,7 +84,7 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!42, !43}
!llvm.ident = !{!44}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (209308)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !2, imports: !21)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (209308)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !2, imports: !21)
!1 = !DIFile(filename: "pr19307.cc", directory: "/llvm_cmake_gcc")
!2 = !{}
!3 = !{!4, !6, !8}
@@ -97,7 +97,7 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!10 = !DINamespace(name: "std", line: 153, file: !11, scope: null)
!11 = !DIFile(filename: "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/x86_64-linux-gnu/bits/c++config.h", directory: "/llvm_cmake_gcc")
!12 = !{!13}
-!13 = !DISubprogram(name: "parse_range", linkageName: "_Z11parse_rangeRyS_Ss", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !14, type: !15, function: void (i64*, i64*, %"class.std::basic_string"*)* @_Z11parse_rangeRyS_Ss, variables: !2)
+!13 = distinct !DISubprogram(name: "parse_range", linkageName: "_Z11parse_rangeRyS_Ss", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !14, type: !15, variables: !2)
!14 = !DIFile(filename: "pr19307.cc", directory: "/llvm_cmake_gcc")
!15 = !DISubroutineType(types: !16)
!16 = !{null, !17, !17, !19}
@@ -129,10 +129,10 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!42 = !{i32 2, !"Dwarf Version", i32 4}
!43 = !{i32 2, !"Debug Info Version", i32 3}
!44 = !{!"clang version 3.5.0 (209308)"}
-!45 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "offset", line: 3, arg: 1, scope: !13, file: !14, type: !17)
+!45 = !DILocalVariable(name: "offset", line: 3, arg: 1, scope: !13, file: !14, type: !17)
!46 = !DILocation(line: 3, scope: !13)
-!47 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "limit", line: 3, arg: 2, scope: !13, file: !14, type: !17)
-!48 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "range", line: 4, arg: 3, scope: !13, file: !14, type: !19)
+!47 = !DILocalVariable(name: "limit", line: 3, arg: 2, scope: !13, file: !14, type: !17)
+!48 = !DILocalVariable(name: "range", line: 4, arg: 3, scope: !13, file: !14, type: !19)
!49 = !DILocation(line: 4, scope: !13)
!50 = !DILocation(line: 5, scope: !51)
!51 = distinct !DILexicalBlock(line: 5, column: 0, file: !1, scope: !13)
diff --git a/test/DebugInfo/X86/processes-relocations.ll b/test/DebugInfo/X86/processes-relocations.ll
index bb49e6bef784..e138eaaae6d8 100644
--- a/test/DebugInfo/X86/processes-relocations.ll
+++ b/test/DebugInfo/X86/processes-relocations.ll
@@ -13,7 +13,7 @@
!llvm.module.flags = !{!3, !4}
!llvm.ident = !{!5}
-!0 = !DICompileUnit(file: !1, language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, emissionKind: 1)
+!0 = distinct !DICompileUnit(file: !1, language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, emissionKind: 1)
!1 = !DIFile(filename: "empty.c", directory: "/a")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/X86/prologue-stack.ll b/test/DebugInfo/X86/prologue-stack.ll
index cd362e380951..b3447d344e14 100644
--- a/test/DebugInfo/X86/prologue-stack.ll
+++ b/test/DebugInfo/X86/prologue-stack.ll
@@ -6,7 +6,7 @@
; return 0;
; }
-define i32 @isel_line_test2() nounwind uwtable {
+define i32 @isel_line_test2() nounwind uwtable !dbg !5 {
; The stack adjustment should be part of the prologue.
; CHECK: isel_line_test2:
; CHECK: {{subq|leaq}} {{.*}}, %rsp
@@ -21,10 +21,10 @@ declare i32 @callme(i32)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!14}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 164980) (llvm/trunk 164979)", isOptimized: false, emissionKind: 0, file: !13, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 164980) (llvm/trunk 164979)", isOptimized: false, emissionKind: 0, file: !13, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "isel_line_test2", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !13, scope: !6, type: !7, function: i32 ()* @isel_line_test2, variables: !1)
+!5 = distinct !DISubprogram(name: "isel_line_test2", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !13, scope: !6, type: !7, variables: !1)
!6 = !DIFile(filename: "bar.c", directory: "/usr/local/google/home/echristo/tmp")
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
diff --git a/test/DebugInfo/X86/recursive_inlining.ll b/test/DebugInfo/X86/recursive_inlining.ll
deleted file mode 100644
index 7825646c7c8d..000000000000
--- a/test/DebugInfo/X86/recursive_inlining.ll
+++ /dev/null
@@ -1,275 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; This isn't a very pretty test case - I imagine there might be other ways to
-; tickle the optimizers into producing the desired code, but I haven't found
-; them.
-
-; The issue is when a function is inlined into itself, the inlined argument
-; accidentally overwrote the concrete argument and was lost.
-
-; IR generated from the following source compiled with clang -g:
-; void fn1(void *);
-; void fn2(int, int, int, int);
-; void fn3();
-; void fn8();
-; struct C {
-; int b;
-; void m_fn2() {
-; fn8();
-; if (b) fn2(0, 0, 0, 0);
-; fn3();
-; }
-; };
-; C *x;
-; inline void fn7() {}
-; void fn6() {
-; fn8();
-; x->m_fn2();
-; fn7();
-; }
-; void fn3() { fn6(); }
-; void fn4() { x->m_fn2(); }
-; void fn5() { x->m_fn2(); }
-
-; The definition of C and declaration of C::m_fn2
-; CHECK: DW_TAG_structure_type
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_member
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "m_fn2"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: [[M_FN2_THIS_DECL:.*]]: DW_TAG_formal_parameter
-
-; The abstract definition of C::m_fn2
-; CHECK: [[M_FN2_ABS_DEF:.*]]: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_specification {{.*}} "_ZN1C5m_fn2Ev"
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_inline
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: [[M_FN2_THIS_ABS_DEF:.*]]: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "this"
-
-; Skip some other functions
-; CHECK: DW_TAG_subprogram
-; CHECK: DW_TAG_subprogram
-; CHECK: DW_TAG_subprogram
-
-; The concrete definition of C::m_fn2
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} {[[M_FN2_ABS_DEF]]} "_ZN1C5m_fn2Ev"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} {[[M_FN2_THIS_ABS_DEF]]}
-; CHECK-NOT: {{DW_TAG|NULL}}
-; Inlined fn3:
-; CHECK: DW_TAG_inlined_subroutine
-; CHECK-NOT: {{DW_TAG|NULL}}
-; Inlined fn6:
-; CHECK: DW_TAG_inlined_subroutine
-; CHECK-NOT: {{DW_TAG|NULL}}
-; Inlined C::m_fn2:
-; CHECK: DW_TAG_inlined_subroutine
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} {[[M_FN2_ABS_DEF]]} "_ZN1C5m_fn2Ev"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} {[[M_FN2_THIS_ABS_DEF]]}
-
-
-
-%struct.C = type { i32 }
-
-@x = global %struct.C* null, align 8
-
-; Function Attrs: nounwind
-define void @_Z3fn6v() #0 {
-entry:
- tail call void @_Z3fn8v() #3, !dbg !31
- %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !32, !tbaa !33
- tail call void @llvm.dbg.value(metadata %struct.C* %0, i64 0, metadata !37, metadata !DIExpression()) #3, !dbg !38
- tail call void @_Z3fn8v() #3, !dbg !39
- %b.i = getelementptr inbounds %struct.C, %struct.C* %0, i64 0, i32 0, !dbg !40
- %1 = load i32, i32* %b.i, align 4, !dbg !40, !tbaa !42
- %tobool.i = icmp eq i32 %1, 0, !dbg !40
- br i1 %tobool.i, label %_ZN1C5m_fn2Ev.exit, label %if.then.i, !dbg !40
-
-if.then.i: ; preds = %entry
- tail call void @_Z3fn2iiii(i32 0, i32 0, i32 0, i32 0) #3, !dbg !45
- br label %_ZN1C5m_fn2Ev.exit, !dbg !45
-
-_ZN1C5m_fn2Ev.exit: ; preds = %entry, %if.then.i
- tail call void @_Z3fn3v() #3, !dbg !47
- ret void, !dbg !48
-}
-
-declare void @_Z3fn8v() #1
-
-; Function Attrs: nounwind
-define linkonce_odr void @_ZN1C5m_fn2Ev(%struct.C* nocapture readonly %this) #0 align 2 {
-entry:
- tail call void @llvm.dbg.value(metadata %struct.C* %this, i64 0, metadata !24, metadata !DIExpression()), !dbg !49
- tail call void @_Z3fn8v() #3, !dbg !50
- %b = getelementptr inbounds %struct.C, %struct.C* %this, i64 0, i32 0, !dbg !51
- %0 = load i32, i32* %b, align 4, !dbg !51, !tbaa !42
- %tobool = icmp eq i32 %0, 0, !dbg !51
- br i1 %tobool, label %if.end, label %if.then, !dbg !51
-
-if.then: ; preds = %entry
- tail call void @_Z3fn2iiii(i32 0, i32 0, i32 0, i32 0) #3, !dbg !52
- br label %if.end, !dbg !52
-
-if.end: ; preds = %entry, %if.then
- tail call void @_Z3fn8v() #3, !dbg !53
- %1 = load %struct.C*, %struct.C** @x, align 8, !dbg !56, !tbaa !33
- tail call void @llvm.dbg.value(metadata %struct.C* %1, i64 0, metadata !57, metadata !DIExpression()) #3, !dbg !58
- tail call void @_Z3fn8v() #3, !dbg !59
- %b.i.i = getelementptr inbounds %struct.C, %struct.C* %1, i64 0, i32 0, !dbg !60
- %2 = load i32, i32* %b.i.i, align 4, !dbg !60, !tbaa !42
- %tobool.i.i = icmp eq i32 %2, 0, !dbg !60
- br i1 %tobool.i.i, label %_Z3fn6v.exit, label %if.then.i.i, !dbg !60
-
-if.then.i.i: ; preds = %if.end
- tail call void @_Z3fn2iiii(i32 0, i32 0, i32 0, i32 0) #3, !dbg !61
- br label %_Z3fn6v.exit, !dbg !61
-
-_Z3fn6v.exit: ; preds = %if.end, %if.then.i.i
- tail call void @_Z3fn3v() #3, !dbg !62
- ret void, !dbg !63
-}
-
-; Function Attrs: nounwind
-define void @_Z3fn3v() #0 {
-entry:
- br label %tailrecurse
-
-tailrecurse: ; preds = %tailrecurse.backedge, %entry
- tail call void @_Z3fn8v() #3, !dbg !64
- %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !66, !tbaa !33
- tail call void @llvm.dbg.value(metadata %struct.C* %0, i64 0, metadata !67, metadata !DIExpression()) #3, !dbg !68
- tail call void @_Z3fn8v() #3, !dbg !69
- %b.i.i = getelementptr inbounds %struct.C, %struct.C* %0, i64 0, i32 0, !dbg !70
- %1 = load i32, i32* %b.i.i, align 4, !dbg !70, !tbaa !42
- %tobool.i.i = icmp eq i32 %1, 0, !dbg !70
- br i1 %tobool.i.i, label %tailrecurse.backedge, label %if.then.i.i, !dbg !70
-
-tailrecurse.backedge: ; preds = %tailrecurse, %if.then.i.i
- br label %tailrecurse
-
-if.then.i.i: ; preds = %tailrecurse
- tail call void @_Z3fn2iiii(i32 0, i32 0, i32 0, i32 0) #3, !dbg !71
- br label %tailrecurse.backedge, !dbg !71
-}
-
-; Function Attrs: nounwind
-define void @_Z3fn4v() #0 {
-entry:
- %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !72, !tbaa !33
- tail call void @_ZN1C5m_fn2Ev(%struct.C* %0), !dbg !72
- ret void, !dbg !72
-}
-
-; Function Attrs: nounwind
-define void @_Z3fn5v() #0 {
-entry:
- %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !73, !tbaa !33
- tail call void @_ZN1C5m_fn2Ev(%struct.C* %0), !dbg !73
- ret void, !dbg !73
-}
-
-declare void @_Z3fn2iiii(i32, i32, i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!28, !29}
-!llvm.ident = !{!30}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !26, imports: !2)
-!1 = !DIFile(filename: "<stdin>", directory: "/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce")
-!2 = !{}
-!3 = !{!4}
-!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 5, size: 32, align: 32, file: !5, elements: !6, identifier: "_ZTS1C")
-!5 = !DIFile(filename: "recursive_inlining.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce")
-!6 = !{!7, !9}
-!7 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 6, size: 32, align: 32, file: !5, scope: !"_ZTS1C", baseType: !8)
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "m_fn2", linkageName: "_ZN1C5m_fn2Ev", line: 7, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !5, scope: !"_ZTS1C", type: !10)
-!10 = !DISubroutineType(types: !11)
-!11 = !{null, !12}
-!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1C")
-!13 = !{!14, !18, !19, !20, !21, !22}
-!14 = !DISubprogram(name: "fn6", linkageName: "_Z3fn6v", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 15, file: !5, scope: !15, type: !16, function: void ()* @_Z3fn6v, variables: !2)
-!15 = !DIFile(filename: "recursive_inlining.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce")
-!16 = !DISubroutineType(types: !17)
-!17 = !{null}
-!18 = !DISubprogram(name: "fn3", linkageName: "_Z3fn3v", line: 20, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 20, file: !5, scope: !15, type: !16, function: void ()* @_Z3fn3v, variables: !2)
-!19 = !DISubprogram(name: "fn4", linkageName: "_Z3fn4v", line: 21, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 21, file: !5, scope: !15, type: !16, function: void ()* @_Z3fn4v, variables: !2)
-!20 = !DISubprogram(name: "fn5", linkageName: "_Z3fn5v", line: 22, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 22, file: !5, scope: !15, type: !16, function: void ()* @_Z3fn5v, variables: !2)
-!21 = !DISubprogram(name: "fn7", linkageName: "_Z3fn7v", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !5, scope: !15, type: !16, variables: !2)
-!22 = !DISubprogram(name: "m_fn2", linkageName: "_ZN1C5m_fn2Ev", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !5, scope: !"_ZTS1C", type: !10, function: void (%struct.C*)* @_ZN1C5m_fn2Ev, declaration: !9, variables: !23)
-!23 = !{!24}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
-!25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1C")
-!26 = !{!27}
-!27 = !DIGlobalVariable(name: "x", line: 13, isLocal: false, isDefinition: true, scope: null, file: !15, type: !25, variable: %struct.C** @x)
-!28 = !{i32 2, !"Dwarf Version", i32 4}
-!29 = !{i32 2, !"Debug Info Version", i32 3}
-!30 = !{!"clang version 3.6.0 "}
-!31 = !DILocation(line: 16, scope: !14)
-!32 = !DILocation(line: 17, scope: !14)
-!33 = !{!34, !34, i64 0}
-!34 = !{!"any pointer", !35, i64 0}
-!35 = !{!"omnipotent char", !36, i64 0}
-!36 = !{!"Simple C/C++ TBAA"}
-!37 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
-!38 = !DILocation(line: 0, scope: !22, inlinedAt: !32)
-!39 = !DILocation(line: 8, scope: !22, inlinedAt: !32)
-!40 = !DILocation(line: 9, scope: !41, inlinedAt: !32)
-!41 = distinct !DILexicalBlock(line: 9, column: 0, file: !5, scope: !22)
-!42 = !{!43, !44, i64 0}
-!43 = !{!"_ZTS1C", !44, i64 0}
-!44 = !{!"int", !35, i64 0}
-!45 = !DILocation(line: 9, scope: !46, inlinedAt: !32)
-!46 = distinct !DILexicalBlock(line: 9, column: 0, file: !5, scope: !41)
-!47 = !DILocation(line: 10, scope: !22, inlinedAt: !32)
-!48 = !DILocation(line: 19, scope: !14)
-!49 = !DILocation(line: 0, scope: !22)
-!50 = !DILocation(line: 8, scope: !22)
-!51 = !DILocation(line: 9, scope: !41)
-!52 = !DILocation(line: 9, scope: !46)
-!53 = !DILocation(line: 16, scope: !14, inlinedAt: !54)
-!54 = !DILocation(line: 20, scope: !18, inlinedAt: !55)
-!55 = !DILocation(line: 10, scope: !22)
-!56 = !DILocation(line: 17, scope: !14, inlinedAt: !54)
-!57 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
-!58 = !DILocation(line: 0, scope: !22, inlinedAt: !56)
-!59 = !DILocation(line: 8, scope: !22, inlinedAt: !56)
-!60 = !DILocation(line: 9, scope: !41, inlinedAt: !56)
-!61 = !DILocation(line: 9, scope: !46, inlinedAt: !56)
-!62 = !DILocation(line: 10, scope: !22, inlinedAt: !56)
-!63 = !DILocation(line: 11, scope: !22)
-!64 = !DILocation(line: 16, scope: !14, inlinedAt: !65)
-!65 = !DILocation(line: 20, scope: !18)
-!66 = !DILocation(line: 17, scope: !14, inlinedAt: !65)
-!67 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
-!68 = !DILocation(line: 0, scope: !22, inlinedAt: !66)
-!69 = !DILocation(line: 8, scope: !22, inlinedAt: !66)
-!70 = !DILocation(line: 9, scope: !41, inlinedAt: !66)
-!71 = !DILocation(line: 9, scope: !46, inlinedAt: !66)
-!72 = !DILocation(line: 21, scope: !19)
-!73 = !DILocation(line: 22, scope: !20)
diff --git a/test/DebugInfo/X86/ref_addr_relocation.ll b/test/DebugInfo/X86/ref_addr_relocation.ll
index 488f98ef8e1f..fd074a3a2d91 100644
--- a/test/DebugInfo/X86/ref_addr_relocation.ll
+++ b/test/DebugInfo/X86/ref_addr_relocation.ll
@@ -58,7 +58,7 @@
!llvm.dbg.cu = !{!0, !9}
!llvm.module.flags = !{!14, !15}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 191799)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !6, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 191799)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !6, imports: !2)
!1 = !DIFile(filename: "tu1.cpp", directory: "/Users/manmanren/test-Nov/type_unique_air/ref_addr")
!2 = !{}
!3 = !{!4}
@@ -67,7 +67,7 @@
!6 = !{!7}
!7 = !DIGlobalVariable(name: "f", line: 2, isLocal: false, isDefinition: true, scope: null, file: !8, type: !4, variable: %struct.foo* @f)
!8 = !DIFile(filename: "tu1.cpp", directory: "/Users/manmanren/test-Nov/type_unique_air/ref_addr")
-!9 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 191799)", isOptimized: false, emissionKind: 0, file: !10, enums: !2, retainedTypes: !3, subprograms: !2, globals: !11, imports: !2)
+!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 191799)", isOptimized: false, emissionKind: 0, file: !10, enums: !2, retainedTypes: !3, subprograms: !2, globals: !11, imports: !2)
!10 = !DIFile(filename: "tu2.cpp", directory: "/Users/manmanren/test-Nov/type_unique_air/ref_addr")
!11 = !{!12}
!12 = !DIGlobalVariable(name: "g", line: 2, isLocal: false, isDefinition: true, scope: null, file: !13, type: !4, variable: %struct.foo* @g)
diff --git a/test/DebugInfo/X86/reference-argument.ll b/test/DebugInfo/X86/reference-argument.ll
index 401553ec9711..9433b8785311 100644
--- a/test/DebugInfo/X86/reference-argument.ll
+++ b/test/DebugInfo/X86/reference-argument.ll
@@ -1,10 +1,18 @@
-; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=asm %s -o - | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-macosx10.9.0 -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
; ModuleID = 'aggregate-indirect-arg.cpp'
; extracted from debuginfo-tests/aggregate-indirect-arg.cpp
-; v should not be a pointer.
-; CHECK: ##DEBUG_VALUE: foo:v <- RSI
-; rdar://problem/13658587
+; v should be a pointer.
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_AT_specification {{.*}} "_ZN1A3fooE4SVal"
+; CHECK-NOT: DW_TAG_subprogram
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_name {{.*}} "this"
+; CHECK-NOT: DW_TAG_subprogram
+; CHECK: DW_TAG_formal_parameter
+; rsi+0
+; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x02> 74 00{{ *}})
+; CHECK-NEXT: DW_AT_name {{.*}} "v"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
@@ -14,14 +22,15 @@ target triple = "x86_64-apple-macosx10.9.0"
declare void @_Z3barR4SVal(%class.SVal* %v)
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
declare i32 @main()
; Function Attrs: nounwind ssp uwtable
-define linkonce_odr void @_ZN1A3fooE4SVal(%class.A* %this, %class.SVal* %v) nounwind ssp uwtable align 2 {
+define linkonce_odr void @_ZN1A3fooE4SVal(%class.A* %this, %class.SVal* %v) nounwind ssp uwtable align 2 !dbg !35 {
entry:
%this.addr = alloca %class.A*, align 8
store %class.A* %this, %class.A** %this.addr, align 8
call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !59, metadata !DIExpression()), !dbg !61
- call void @llvm.dbg.declare(metadata %class.SVal* %v, metadata !62, metadata !DIExpression(DW_OP_deref)), !dbg !61
+ call void @llvm.dbg.value(metadata %class.SVal* %v, i64 0, metadata !62, metadata !DIExpression(DW_OP_deref)), !dbg !61
%this1 = load %class.A*, %class.A** %this.addr
call void @_Z3barR4SVal(%class.SVal* %v), !dbg !61
ret void, !dbg !61
@@ -32,11 +41,11 @@ declare void @_ZN4SValD2Ev(%class.SVal* %this)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!47, !68}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "aggregate-indirect-arg.cpp", directory: "")
!2 = !{}
!3 = !{!4, !29, !33, !34, !35}
-!4 = !DISubprogram(name: "bar", linkageName: "_Z3barR4SVal", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !1, scope: !5, type: !6, function: void (%class.SVal*)* @_Z3barR4SVal, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barR4SVal", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "aggregate-indirect-arg.cpp", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8}
@@ -58,13 +67,13 @@ declare void @_ZN4SValD2Ev(%class.SVal* %this)
!25 = !{null, !19, !26}
!26 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !27)
!27 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9)
-!29 = !DISubprogram(name: "main", line: 25, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 25, file: !1, scope: !5, type: !30, function: i32 ()* @main, variables: !2)
+!29 = distinct !DISubprogram(name: "main", line: 25, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 25, file: !1, scope: !5, type: !30, variables: !2)
!30 = !DISubroutineType(types: !31)
!31 = !{!32}
!32 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!33 = !DISubprogram(name: "~SVal", linkageName: "_ZN4SValD1Ev", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: null, type: !17, function: void (%class.SVal*)* @_ZN4SValD1Ev, declaration: !16, variables: !2)
-!34 = !DISubprogram(name: "~SVal", linkageName: "_ZN4SValD2Ev", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: null, type: !17, function: void (%class.SVal*)* @_ZN4SValD2Ev, declaration: !16, variables: !2)
-!35 = !DISubprogram(name: "foo", linkageName: "_ZN1A3fooE4SVal", line: 22, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 22, file: !1, scope: null, type: !36, function: void (%class.A*, %class.SVal*)* @_ZN1A3fooE4SVal, declaration: !41, variables: !2)
+!33 = distinct !DISubprogram(name: "~SVal", linkageName: "_ZN4SValD1Ev", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: null, type: !17, declaration: !16, variables: !2)
+!34 = distinct !DISubprogram(name: "~SVal", linkageName: "_ZN4SValD2Ev", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: null, type: !17, declaration: !16, variables: !2)
+!35 = distinct !DISubprogram(name: "foo", linkageName: "_ZN1A3fooE4SVal", line: 22, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 22, file: !1, scope: null, type: !36, declaration: !41, variables: !2)
!36 = !DISubroutineType(types: !37)
!37 = !{null, !38, !9}
!38 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !39)
@@ -75,24 +84,24 @@ declare void @_ZN4SValD2Ev(%class.SVal* %this)
!44 = !DISubroutineType(types: !45)
!45 = !{null, !38}
!47 = !{i32 2, !"Dwarf Version", i32 3}
-!48 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "v", line: 19, arg: 1, scope: !4, file: !5, type: !8)
+!48 = !DILocalVariable(name: "v", line: 19, arg: 1, scope: !4, file: !5, type: !8)
!49 = !DILocation(line: 19, scope: !4)
-!50 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "v", line: 26, scope: !29, file: !5, type: !9)
+!50 = !DILocalVariable(name: "v", line: 26, scope: !29, file: !5, type: !9)
!51 = !DILocation(line: 26, scope: !29)
!52 = !DILocation(line: 27, scope: !29)
!53 = !DILocation(line: 28, scope: !29)
-!54 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 29, scope: !29, file: !5, type: !39)
+!54 = !DILocalVariable(name: "a", line: 29, scope: !29, file: !5, type: !39)
!55 = !DILocation(line: 29, scope: !29)
!56 = !DILocation(line: 30, scope: !29)
!57 = !DILocation(line: 31, scope: !29)
!58 = !DILocation(line: 32, scope: !29)
-!59 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 22, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, file: !5, type: !60)
+!59 = !DILocalVariable(name: "this", line: 22, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, file: !5, type: !60)
!60 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !39)
!61 = !DILocation(line: 22, scope: !35)
-!62 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "v", line: 22, arg: 2, scope: !35, file: !5, type: !9)
-!63 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 14, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !33, file: !5, type: !64)
+!62 = !DILocalVariable(name: "v", line: 22, arg: 2, scope: !35, file: !5, type: !9)
+!63 = !DILocalVariable(name: "this", line: 14, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !33, file: !5, type: !64)
!64 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
!65 = !DILocation(line: 14, scope: !33)
-!66 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 14, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !34, file: !5, type: !64)
+!66 = !DILocalVariable(name: "this", line: 14, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !34, file: !5, type: !64)
!67 = !DILocation(line: 14, scope: !34)
!68 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/rvalue-ref.ll b/test/DebugInfo/X86/rvalue-ref.ll
index 4e4da67beb08..c72a3aa2c04d 100644
--- a/test/DebugInfo/X86/rvalue-ref.ll
+++ b/test/DebugInfo/X86/rvalue-ref.ll
@@ -5,7 +5,7 @@
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
-define void @_Z3fooOi(i32* %i) uwtable ssp {
+define void @_Z3fooOi(i32* %i) uwtable ssp !dbg !5 {
entry:
%i.addr = alloca i32*, align 8
store i32* %i, i32** %i.addr, align 8
@@ -23,16 +23,16 @@ declare i32 @printf(i8*, ...)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!17}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 157054) (llvm/trunk 157060)", isOptimized: false, emissionKind: 0, file: !16, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 157054) (llvm/trunk 157060)", isOptimized: false, emissionKind: 0, file: !16, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "foo", linkageName: "_Z3fooOi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !16, scope: !6, type: !7, function: void (i32*)* @_Z3fooOi, variables: !1)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooOi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !16, scope: !6, type: !7, variables: !1)
!6 = !DIFile(filename: "foo.cpp", directory: "/Users/echristo/tmp")
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9}
!9 = !DIDerivedType(tag: DW_TAG_rvalue_reference_type, baseType: !10)
!10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 4, arg: 1, scope: !5, file: !6, type: !9)
+!11 = !DILocalVariable(name: "i", line: 4, arg: 1, scope: !5, file: !6, type: !9)
!12 = !DILocation(line: 4, column: 17, scope: !5)
!13 = !DILocation(line: 6, column: 3, scope: !14)
!14 = distinct !DILexicalBlock(line: 5, column: 1, file: !16, scope: !5)
diff --git a/test/DebugInfo/X86/safestack-byval.ll b/test/DebugInfo/X86/safestack-byval.ll
new file mode 100644
index 000000000000..f1f6b6c1d911
--- /dev/null
+++ b/test/DebugInfo/X86/safestack-byval.ll
@@ -0,0 +1,91 @@
+; Test dwarf codegen for DILocalVariable of a byval function argument that
+; points to neither an argument nor an alloca. This kind of IR is generated by
+; SafeStack for unsafe byval arguments.
+; RUN: llc -mtriple=x86_64-unknown-unknown -stop-after expand-isel-pseudos %s -o /dev/null | FileCheck %s
+
+; This was built by compiling the following source with SafeStack and
+; simplifying the result a little.
+; struct S {
+; int a[100];
+; };
+;
+; int f(S zzz, unsigned long len) {
+; return zzz.a[len];
+; }
+
+; CHECK: ![[ZZZ:.*]] = !DILocalVariable(name: "zzz",
+; CHECK: ![[ZZZ_EXPR:.*]] = !DIExpression(DW_OP_deref, DW_OP_minus, 400)
+; CHECK: DBG_VALUE {{.*}} ![[ZZZ]], ![[ZZZ_EXPR]]
+
+%struct.S = type { [100 x i32] }
+
+@__safestack_unsafe_stack_ptr = external thread_local(initialexec) global i8*
+
+; Function Attrs: norecurse nounwind readonly safestack uwtable
+define i32 @_Z1f1Sm(%struct.S* byval nocapture readonly align 8 %zzz, i64 %len) #0 !dbg !12 {
+entry:
+ %unsafe_stack_ptr = load i8*, i8** @__safestack_unsafe_stack_ptr, !dbg !22
+ %unsafe_stack_static_top = getelementptr i8, i8* %unsafe_stack_ptr, i32 -400, !dbg !22
+ store i8* %unsafe_stack_static_top, i8** @__safestack_unsafe_stack_ptr, !dbg !22
+; !17 describes "zzz"
+ call void @llvm.dbg.declare(metadata i8* %unsafe_stack_ptr, metadata !17, metadata !23), !dbg !22
+ %0 = getelementptr i8, i8* %unsafe_stack_ptr, i32 -400, !dbg !22
+ %zzz.unsafe-byval = bitcast i8* %0 to %struct.S*, !dbg !22
+ %1 = bitcast %struct.S* %zzz to i8*, !dbg !24
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 400, i32 8, i1 false), !dbg !24
+ tail call void @llvm.dbg.value(metadata i64 %len, i64 0, metadata !18, metadata !25), !dbg !24
+ %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz.unsafe-byval, i64 0, i32 0, i64 %len, !dbg !26
+ %2 = load i32, i32* %arrayidx, align 4, !dbg !26, !tbaa !27
+ store i8* %unsafe_stack_ptr, i8** @__safestack_unsafe_stack_ptr, !dbg !31
+ ret i32 %2, !dbg !31
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
+
+attributes #0 = { norecurse nounwind readonly safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { argmemonly nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!19, !20}
+!llvm.ident = !{!21}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 254107) (llvm/trunk 254109)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !11)
+!1 = !DIFile(filename: "../llvm/1.cc", directory: "/tmp/build")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !1, line: 4, size: 3200, align: 32, elements: !5, identifier: "_ZTS1S")
+!5 = !{!6}
+!6 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !"_ZTS1S", file: !1, line: 5, baseType: !7, size: 3200, align: 32)
+!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 3200, align: 32, elements: !9)
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{!10}
+!10 = !DISubrange(count: 100)
+!11 = !{!12}
+!12 = distinct !DISubprogram(name: "f", linkageName: "_Z1f1Sm", scope: !1, file: !1, line: 8, type: !13, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, variables: !16)
+!13 = !DISubroutineType(types: !14)
+!14 = !{!8, !"_ZTS1S", !15}
+!15 = !DIBasicType(name: "long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
+!16 = !{!17, !18}
+!17 = !DILocalVariable(name: "zzz", arg: 1, scope: !12, file: !1, line: 8, type: !"_ZTS1S")
+!18 = !DILocalVariable(name: "len", arg: 2, scope: !12, file: !1, line: 8, type: !15)
+!19 = !{i32 2, !"Dwarf Version", i32 4}
+!20 = !{i32 2, !"Debug Info Version", i32 3}
+!21 = !{!"clang version 3.8.0 (trunk 254107) (llvm/trunk 254109)"}
+!22 = !DILocation(line: 8, column: 9, scope: !12)
+!23 = !DIExpression(DW_OP_deref, DW_OP_minus, 400)
+!24 = !DILocation(line: 8, column: 28, scope: !12)
+!25 = !DIExpression()
+!26 = !DILocation(line: 9, column: 10, scope: !12)
+!27 = !{!28, !28, i64 0}
+!28 = !{!"int", !29, i64 0}
+!29 = !{!"omnipotent char", !30, i64 0}
+!30 = !{!"Simple C/C++ TBAA"}
+!31 = !DILocation(line: 9, column: 3, scope: !12)
diff --git a/test/DebugInfo/X86/sret.ll b/test/DebugInfo/X86/sret.ll
index ef8f2e6d65e1..45af28217419 100644
--- a/test/DebugInfo/X86/sret.ll
+++ b/test/DebugInfo/X86/sret.ll
@@ -14,11 +14,11 @@
@_ZTS1A = linkonce_odr constant [3 x i8] c"1A\00"
@_ZTI1A = linkonce_odr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) }
-@_ZN1AC1Ei = alias void (%class.A*, i32)* @_ZN1AC2Ei
-@_ZN1AC1ERKS_ = alias void (%class.A*, %class.A*)* @_ZN1AC2ERKS_
+@_ZN1AC1Ei = alias void (%class.A*, i32), void (%class.A*, i32)* @_ZN1AC2Ei
+@_ZN1AC1ERKS_ = alias void (%class.A*, %class.A*), void (%class.A*, %class.A*)* @_ZN1AC2ERKS_
; Function Attrs: nounwind uwtable
-define void @_ZN1AC2Ei(%class.A* %this, i32 %i) unnamed_addr #0 align 2 {
+define void @_ZN1AC2Ei(%class.A* %this, i32 %i) unnamed_addr #0 align 2 !dbg !49 {
entry:
%this.addr = alloca %class.A*, align 8
%i.addr = alloca i32, align 4
@@ -39,7 +39,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind uwtable
-define void @_ZN1AC2ERKS_(%class.A* %this, %class.A* %rhs) unnamed_addr #0 align 2 {
+define void @_ZN1AC2ERKS_(%class.A* %this, %class.A* %rhs) unnamed_addr #0 align 2 !dbg !50 {
entry:
%this.addr = alloca %class.A*, align 8
%rhs.addr = alloca %class.A*, align 8
@@ -59,7 +59,7 @@ entry:
}
; Function Attrs: nounwind uwtable
-define %class.A* @_ZN1AaSERKS_(%class.A* %this, %class.A* %rhs) #0 align 2 {
+define %class.A* @_ZN1AaSERKS_(%class.A* %this, %class.A* %rhs) #0 align 2 !dbg !51 {
entry:
%this.addr = alloca %class.A*, align 8
%rhs.addr = alloca %class.A*, align 8
@@ -77,7 +77,7 @@ entry:
}
; Function Attrs: nounwind uwtable
-define i32 @_ZN1A7get_intEv(%class.A* %this) #0 align 2 {
+define i32 @_ZN1A7get_intEv(%class.A* %this) #0 align 2 !dbg !52 {
entry:
%this.addr = alloca %class.A*, align 8
store %class.A* %this, %class.A** %this.addr, align 8
@@ -89,7 +89,7 @@ entry:
}
; Function Attrs: uwtable
-define void @_ZN1B9AInstanceEv(%class.A* noalias sret %agg.result, %class.B* %this) #2 align 2 {
+define void @_ZN1B9AInstanceEv(%class.A* noalias sret %agg.result, %class.B* %this) #2 align 2 !dbg !53 {
entry:
%this.addr = alloca %class.B*, align 8
%nrvo = alloca i1
@@ -114,7 +114,7 @@ nrvo.skipdtor: ; preds = %nrvo.unused, %entry
}
; Function Attrs: nounwind uwtable
-define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr #0 align 2 {
+define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr #0 align 2 !dbg !63 {
entry:
%this.addr = alloca %class.A*, align 8
store %class.A* %this, %class.A** %this.addr, align 8
@@ -124,7 +124,7 @@ entry:
}
; Function Attrs: uwtable
-define i32 @main(i32 %argc, i8** %argv) #2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define i32 @main(i32 %argc, i8** %argv) #2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !54 {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
@@ -189,7 +189,7 @@ terminate.lpad: ; preds = %lpad
}
; Function Attrs: nounwind uwtable
-define linkonce_odr void @_ZN1BC2Ev(%class.B* %this) unnamed_addr #0 align 2 {
+define linkonce_odr void @_ZN1BC2Ev(%class.B* %this) unnamed_addr #0 align 2 !dbg !62 {
entry:
%this.addr = alloca %class.B*, align 8
store %class.B* %this, %class.B** %this.addr, align 8
@@ -212,7 +212,7 @@ declare i8* @__cxa_begin_catch(i8*)
declare void @_ZSt9terminatev()
; Function Attrs: uwtable
-define linkonce_odr void @_ZN1AD0Ev(%class.A* %this) unnamed_addr #2 align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define linkonce_odr void @_ZN1AD0Ev(%class.A* %this) unnamed_addr #2 align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !61 {
entry:
%this.addr = alloca %class.A*, align 8
%exn.slot = alloca i8*
@@ -263,7 +263,7 @@ attributes #7 = { builtin nounwind }
!llvm.module.flags = !{!64, !65}
!llvm.ident = !{!66}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 203283) (llvm/trunk 203307)", isOptimized: false, splitDebugFilename: "sret.dwo", emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !48, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 203283) (llvm/trunk 203307)", isOptimized: false, splitDebugFilename: "sret.dwo", emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !48, globals: !2, imports: !2)
!1 = !DIFile(filename: "sret.cpp", directory: "/usr/local/google/home/echristo/tmp")
!2 = !{}
!3 = !{!4, !37}
@@ -305,51 +305,51 @@ attributes #7 = { builtin nounwind }
!45 = !DISubroutineType(types: !46)
!46 = !{!4, !42}
!48 = !{!49, !50, !51, !52, !53, !54, !61, !62, !63}
-!49 = !DISubprogram(name: "A", linkageName: "_ZN1AC2Ei", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 18, file: !1, scope: !"_ZTS1A", type: !15, function: void (%class.A*, i32)* @_ZN1AC2Ei, declaration: !14, variables: !2)
-!50 = !DISubprogram(name: "A", linkageName: "_ZN1AC2ERKS_", line: 21, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !1, scope: !"_ZTS1A", type: !20, function: void (%class.A*, %class.A*)* @_ZN1AC2ERKS_, declaration: !19, variables: !2)
-!51 = !DISubprogram(name: "operator=", linkageName: "_ZN1AaSERKS_", line: 27, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 28, file: !1, scope: !"_ZTS1A", type: !26, function: %class.A* (%class.A*, %class.A*)* @_ZN1AaSERKS_, declaration: !25, variables: !2)
-!52 = !DISubprogram(name: "get_int", linkageName: "_ZN1A7get_intEv", line: 33, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 34, file: !1, scope: !"_ZTS1A", type: !34, function: i32 (%class.A*)* @_ZN1A7get_intEv, declaration: !33, variables: !2)
-!53 = !DISubprogram(name: "AInstance", linkageName: "_ZN1B9AInstanceEv", line: 47, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 48, file: !1, scope: !"_ZTS1B", type: !45, function: void (%class.A*, %class.B*)* @_ZN1B9AInstanceEv, declaration: !44, variables: !2)
-!54 = !DISubprogram(name: "main", line: 53, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 54, file: !1, scope: !7, type: !55, function: i32 (i32, i8**)* @main, variables: !2)
+!49 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC2Ei", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 18, file: !1, scope: !"_ZTS1A", type: !15, declaration: !14, variables: !2)
+!50 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC2ERKS_", line: 21, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !1, scope: !"_ZTS1A", type: !20, declaration: !19, variables: !2)
+!51 = distinct !DISubprogram(name: "operator=", linkageName: "_ZN1AaSERKS_", line: 27, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 28, file: !1, scope: !"_ZTS1A", type: !26, declaration: !25, variables: !2)
+!52 = distinct !DISubprogram(name: "get_int", linkageName: "_ZN1A7get_intEv", line: 33, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 34, file: !1, scope: !"_ZTS1A", type: !34, declaration: !33, variables: !2)
+!53 = distinct !DISubprogram(name: "AInstance", linkageName: "_ZN1B9AInstanceEv", line: 47, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 48, file: !1, scope: !"_ZTS1B", type: !45, declaration: !44, variables: !2)
+!54 = distinct !DISubprogram(name: "main", line: 53, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 54, file: !1, scope: !7, type: !55, variables: !2)
!55 = !DISubroutineType(types: !56)
!56 = !{!12, !12, !57}
!57 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !58)
!58 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !59)
!59 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !60)
!60 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!61 = !DISubprogram(name: "~A", linkageName: "_ZN1AD0Ev", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !"_ZTS1A", type: !30, function: void (%class.A*)* @_ZN1AD0Ev, declaration: !29, variables: !2)
-!62 = !DISubprogram(name: "B", linkageName: "_ZN1BC2Ev", line: 41, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 41, file: !1, scope: !"_ZTS1B", type: !40, function: void (%class.B*)* @_ZN1BC2Ev, declaration: !39, variables: !2)
-!63 = !DISubprogram(name: "~A", linkageName: "_ZN1AD2Ev", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !"_ZTS1A", type: !30, function: void (%class.A*)* @_ZN1AD2Ev, declaration: !29, variables: !2)
+!61 = distinct !DISubprogram(name: "~A", linkageName: "_ZN1AD0Ev", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !"_ZTS1A", type: !30, declaration: !29, variables: !2)
+!62 = distinct !DISubprogram(name: "B", linkageName: "_ZN1BC2Ev", line: 41, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 41, file: !1, scope: !"_ZTS1B", type: !40, declaration: !39, variables: !2)
+!63 = distinct !DISubprogram(name: "~A", linkageName: "_ZN1AD2Ev", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !"_ZTS1A", type: !30, declaration: !29, variables: !2)
!64 = !{i32 2, !"Dwarf Version", i32 4}
!65 = !{i32 1, !"Debug Info Version", i32 3}
!66 = !{!"clang version 3.5.0 (trunk 203283) (llvm/trunk 203307)"}
-!67 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !49, type: !68)
+!67 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !49, type: !68)
!68 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
!69 = !DILocation(line: 0, scope: !49)
-!70 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 16, arg: 2, scope: !49, file: !7, type: !12)
+!70 = !DILocalVariable(name: "i", line: 16, arg: 2, scope: !49, file: !7, type: !12)
!71 = !DILocation(line: 16, scope: !49)
!72 = !DILocation(line: 18, scope: !49)
!73 = !DILocation(line: 19, scope: !49)
-!74 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !50, type: !68)
+!74 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !50, type: !68)
!75 = !DILocation(line: 0, scope: !50)
-!76 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "rhs", line: 21, arg: 2, scope: !50, file: !7, type: !22)
+!76 = !DILocalVariable(name: "rhs", line: 21, arg: 2, scope: !50, file: !7, type: !22)
!77 = !DILocation(line: 21, scope: !50)
!78 = !DILocation(line: 23, scope: !50)
!79 = !DILocation(line: 24, scope: !50)
-!80 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !51, type: !68)
+!80 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !51, type: !68)
!81 = !DILocation(line: 0, scope: !51)
-!82 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "rhs", line: 27, arg: 2, scope: !51, file: !7, type: !22)
+!82 = !DILocalVariable(name: "rhs", line: 27, arg: 2, scope: !51, file: !7, type: !22)
!83 = !DILocation(line: 27, scope: !51)
!84 = !DILocation(line: 29, scope: !51)
!85 = !DILocation(line: 30, scope: !51)
-!86 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !52, type: !68)
+!86 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !52, type: !68)
!87 = !DILocation(line: 0, scope: !52)
!88 = !DILocation(line: 35, scope: !52)
-!89 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !53, type: !90)
+!89 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !53, type: !90)
!90 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1B")
!91 = !DILocation(line: 0, scope: !53)
!92 = !DILocation(line: 49, scope: !53)
-!93 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 49, scope: !53, file: !7, type: !4)
+!93 = !DILocalVariable(name: "a", line: 49, scope: !53, file: !7, type: !4)
!94 = !DILocation(line: 50, scope: !53)
!95 = !DILocation(line: 51, scope: !53)
!96 = !DILocation(line: 51, scope: !97)
@@ -357,19 +357,19 @@ attributes #7 = { builtin nounwind }
!98 = !DILocation(line: 51, scope: !99)
!99 = distinct !DILexicalBlock(line: 51, column: 0, file: !1, scope: !100)
!100 = distinct !DILexicalBlock(line: 51, column: 0, file: !1, scope: !53)
-!101 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !63, type: !68)
+!101 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !63, type: !68)
!102 = !DILocation(line: 0, scope: !63)
!103 = !DILocation(line: 8, scope: !63)
-!104 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 53, arg: 1, scope: !54, file: !7, type: !12)
+!104 = !DILocalVariable(name: "argc", line: 53, arg: 1, scope: !54, file: !7, type: !12)
!105 = !DILocation(line: 53, scope: !54)
-!106 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 53, arg: 2, scope: !54, file: !7, type: !57)
-!107 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 55, scope: !54, file: !7, type: !37)
+!106 = !DILocalVariable(name: "argv", line: 53, arg: 2, scope: !54, file: !7, type: !57)
+!107 = !DILocalVariable(name: "b", line: 55, scope: !54, file: !7, type: !37)
!108 = !DILocation(line: 55, scope: !54)
-!109 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "return_val", line: 56, scope: !54, file: !7, type: !12)
+!109 = !DILocalVariable(name: "return_val", line: 56, scope: !54, file: !7, type: !12)
!110 = !DILocation(line: 56, scope: !54)
!111 = !DILocation(line: 56, scope: !112)
!112 = distinct !DILexicalBlock(line: 56, column: 0, file: !1, scope: !54)
-!113 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 58, scope: !54, file: !7, type: !4)
+!113 = !DILocalVariable(name: "a", line: 58, scope: !54, file: !7, type: !4)
!114 = !DILocation(line: 58, scope: !54)
!115 = !DILocation(line: 59, scope: !54)
!116 = !DILocation(line: 60, scope: !54)
@@ -379,10 +379,10 @@ attributes #7 = { builtin nounwind }
!120 = distinct !DILexicalBlock(line: 60, column: 0, file: !1, scope: !54)
!121 = !DILocation(line: 60, scope: !122)
!122 = distinct !DILexicalBlock(line: 60, column: 0, file: !1, scope: !54)
-!123 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !62, type: !90)
+!123 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !62, type: !90)
!124 = !DILocation(line: 0, scope: !62)
!125 = !DILocation(line: 41, scope: !62)
-!126 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !61, type: !68)
+!126 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !61, type: !68)
!127 = !DILocation(line: 0, scope: !61)
!128 = !DILocation(line: 8, scope: !61)
!129 = !DILocation(line: 8, scope: !130)
diff --git a/test/DebugInfo/X86/sroasplit-1.ll b/test/DebugInfo/X86/sroasplit-1.ll
index 2179aa380953..11895a7f083e 100644
--- a/test/DebugInfo/X86/sroasplit-1.ll
+++ b/test/DebugInfo/X86/sroasplit-1.ll
@@ -25,7 +25,7 @@
; CHECK: call void @llvm.dbg.value(metadata i32 %[[A:.*]], i64 0, metadata ![[VAR]], metadata ![[PIECE2:[0-9]+]])
; CHECK: ret i32 %[[A]]
; Read Var and Piece:
-; CHECK: ![[VAR]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i1",{{.*}} line: 11,
+; CHECK: ![[VAR]] = !DILocalVariable(name: "i1",{{.*}} line: 11,
; CHECK: ![[PIECE1]] = !DIExpression(DW_OP_bit_piece, 32, 96)
; CHECK: ![[PIECE2]] = !DIExpression(DW_OP_bit_piece, 0, 32)
@@ -36,7 +36,7 @@ target triple = "x86_64-apple-macosx10.9.0"
%struct.Inner = type { i32, i64 }
; Function Attrs: nounwind ssp uwtable
-define i32 @foo(%struct.Outer* byval align 8 %outer) #0 {
+define i32 @foo(%struct.Outer* byval align 8 %outer) #0 !dbg !4 {
entry:
%i1 = alloca %struct.Inner, align 8
call void @llvm.dbg.declare(metadata %struct.Outer* %outer, metadata !25, metadata !2), !dbg !26
@@ -65,11 +65,11 @@ attributes #2 = { nounwind }
!llvm.module.flags = !{!22, !23}
!llvm.ident = !{!24}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !3, globals: !{}, imports: !{})
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !3, globals: !{}, imports: !{})
!1 = !DIFile(filename: "sroasplit-1.c", directory: "")
!2 = !DIExpression()
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: i32 (%struct.Outer*)* @foo, variables: !{})
+!4 = distinct !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !{})
!5 = !DIFile(filename: "sroasplit-1.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !9}
@@ -90,8 +90,8 @@ attributes #2 = { nounwind }
!22 = !{i32 2, !"Dwarf Version", i32 2}
!23 = !{i32 1, !"Debug Info Version", i32 3}
!24 = !{!"clang version 3.5.0 "}
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!25 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
!26 = !DILocation(line: 10, scope: !4)
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i1", line: 11, scope: !4, file: !5, type: !14)
+!27 = !DILocalVariable(name: "i1", line: 11, scope: !4, file: !5, type: !14)
!28 = !DILocation(line: 11, scope: !4)
!29 = !DILocation(line: 12, scope: !4)
diff --git a/test/DebugInfo/X86/sroasplit-2.ll b/test/DebugInfo/X86/sroasplit-2.ll
index b209fafd4df1..bea1b26df285 100644
--- a/test/DebugInfo/X86/sroasplit-2.ll
+++ b/test/DebugInfo/X86/sroasplit-2.ll
@@ -23,10 +23,10 @@
; CHECK: call void @llvm.dbg.value(metadata i64 %outer.coerce0, i64 0, metadata ![[O:[0-9]+]], metadata ![[PIECE1:[0-9]+]]),
; CHECK: call void @llvm.dbg.value(metadata i64 %outer.coerce1, i64 0, metadata ![[O]], metadata ![[PIECE2:[0-9]+]]),
; CHECK: call void @llvm.dbg.value({{.*}}, i64 0, metadata ![[I1:[0-9]+]], metadata ![[PIECE3:[0-9]+]]),
-; CHECK-DAG: ![[O]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer",{{.*}} line: 10
+; CHECK-DAG: ![[O]] = !DILocalVariable(name: "outer",{{.*}} line: 10
; CHECK-DAG: ![[PIECE1]] = !DIExpression(DW_OP_bit_piece, 0, 64)
; CHECK-DAG: ![[PIECE2]] = !DIExpression(DW_OP_bit_piece, 64, 64)
-; CHECK-DAG: ![[I1]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i1",{{.*}} line: 11
+; CHECK-DAG: ![[I1]] = !DILocalVariable(name: "i1",{{.*}} line: 11
; CHECK-DAG: ![[PIECE3]] = !DIExpression(DW_OP_bit_piece, 0, 32)
; ModuleID = 'sroasplit-2.c'
@@ -37,7 +37,7 @@ target triple = "x86_64-apple-macosx10.9.0"
%struct.Inner = type { i32, i32 }
; Function Attrs: nounwind ssp uwtable
-define i32 @foo(i64 %outer.coerce0, i64 %outer.coerce1) #0 {
+define i32 @foo(i64 %outer.coerce0, i64 %outer.coerce1) #0 !dbg !4 {
%outer = alloca %struct.Outer, align 8
%i1 = alloca %struct.Inner, align 4
%1 = bitcast %struct.Outer* %outer to { i64, i64 }*
@@ -71,11 +71,11 @@ attributes #2 = { nounwind }
!llvm.module.flags = !{!21, !22}
!llvm.ident = !{!23}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !3, globals: !{}, imports: !{})
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !3, globals: !{}, imports: !{})
!1 = !DIFile(filename: "sroasplit-2.c", directory: "")
!2 = !DIExpression()
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: i32 (i64, i64)* @foo, variables: !{})
+!4 = distinct !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !{})
!5 = !DIFile(filename: "sroasplit-2.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !9}
@@ -95,8 +95,8 @@ attributes #2 = { nounwind }
!21 = !{i32 2, !"Dwarf Version", i32 2}
!22 = !{i32 1, !"Debug Info Version", i32 3}
!23 = !{!"clang version 3.5.0 "}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!24 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
!25 = !DILocation(line: 10, scope: !4)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i1", line: 11, scope: !4, file: !5, type: !14)
+!26 = !DILocalVariable(name: "i1", line: 11, scope: !4, file: !5, type: !14)
!27 = !DILocation(line: 11, scope: !4)
!28 = !DILocation(line: 12, scope: !4)
diff --git a/test/DebugInfo/X86/sroasplit-3.ll b/test/DebugInfo/X86/sroasplit-3.ll
index 50b701f781d8..7a39beb137d8 100644
--- a/test/DebugInfo/X86/sroasplit-3.ll
+++ b/test/DebugInfo/X86/sroasplit-3.ll
@@ -4,7 +4,7 @@
; not partitioned into multiple allocas.
;
; CHECK: call void @llvm.dbg.value(metadata float %s.coerce, i64 0, metadata ![[VAR:[0-9]+]], metadata ![[EXPR:[0-9]+]])
-; CHECK: ![[VAR]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s",{{.*}} line: 3,
+; CHECK: ![[VAR]] = !DILocalVariable(name: "s",{{.*}} line: 3,
; CHECK: ![[EXPR]] = !DIExpression(
; CHECK-NOT: DW_OP_bit_piece
@@ -20,7 +20,7 @@ target triple = "x86_64-apple-macosx10.10.0"
%struct.S = type { float }
; Function Attrs: nounwind ssp uwtable
-define float @foo(float %s.coerce) #0 {
+define float @foo(float %s.coerce) #0 !dbg !4 {
entry:
%s = alloca %struct.S, align 4
%coerce.dive = getelementptr %struct.S, %struct.S* %s, i32 0, i32 0
@@ -41,11 +41,11 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!12, !13, !14}
!llvm.ident = !{!15}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "/Volumes/Data/llvm/_build.ninja.debug")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: float (float)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "test.c", directory: "/Volumes/Data/llvm/_build.ninja.debug")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !9}
@@ -57,7 +57,7 @@ attributes #1 = { nounwind readnone }
!13 = !{i32 2, !"Debug Info Version", i32 3}
!14 = !{i32 1, !"PIC Level", i32 2}
!15 = !{!"clang version 3.6.0 "}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!16 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
!17 = !DIExpression()
!18 = !DILocation(line: 3, column: 20, scope: !4)
!19 = !DILocation(line: 4, column: 2, scope: !4)
diff --git a/test/DebugInfo/X86/sroasplit-4.ll b/test/DebugInfo/X86/sroasplit-4.ll
index e9fcdf8eac92..2abf6e1ec9b4 100644
--- a/test/DebugInfo/X86/sroasplit-4.ll
+++ b/test/DebugInfo/X86/sroasplit-4.ll
@@ -45,7 +45,7 @@ target triple = "x86_64-apple-darwin"
@t = external global i64
; Function Attrs: nounwind
-define i32 @_Z4testv() #0 {
+define i32 @_Z4testv() #0 !dbg !17 {
entry:
%retval = alloca i32, align 4
%y = alloca %struct.p, align 8
@@ -109,7 +109,7 @@ attributes #3 = { nounwind }
!llvm.module.flags = !{!21, !22}
!llvm.ident = !{!23}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !16, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !16, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "")
!2 = !{}
!3 = !{!4, !10}
@@ -126,7 +126,7 @@ attributes #3 = { nounwind }
!14 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 10, size: 128, align: 64, offset: 64, file: !5, scope: !"_ZTS1r", baseType: !"_ZTS1p")
!15 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 11, size: 128, align: 64, offset: 192, file: !5, scope: !"_ZTS1r", baseType: !"_ZTS1p")
!16 = !{!17}
-!17 = !DISubprogram(name: "test", linkageName: "_Z4testv", line: 18, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 18, file: !5, scope: !18, type: !19, function: i32 ()* @_Z4testv, variables: !2)
+!17 = distinct !DISubprogram(name: "test", linkageName: "_Z4testv", line: 18, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 18, file: !5, scope: !18, type: !19, variables: !2)
!18 = !DIFile(filename: "pr22393.cc", directory: "")
!19 = !DISubroutineType(types: !20)
!20 = !{!13}
@@ -137,10 +137,10 @@ attributes #3 = { nounwind }
!25 = distinct !DILexicalBlock(line: 19, column: 0, file: !5, scope: !17)
!26 = !DILocation(line: 19, scope: !17)
!27 = !DILocation(line: 20, scope: !25)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 21, scope: !17, file: !18, type: !"_ZTS1p")
+!28 = !DILocalVariable(name: "y", line: 21, scope: !17, file: !18, type: !"_ZTS1p")
!29 = !DIExpression()
!30 = !DILocation(line: 21, scope: !17)
-!31 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "r", line: 22, scope: !17, file: !18, type: !"_ZTS1r")
+!31 = !DILocalVariable(name: "r", line: 22, scope: !17, file: !18, type: !"_ZTS1r")
!32 = !DILocation(line: 22, scope: !17)
!33 = !DILocation(line: 23, scope: !17)
!34 = !DILocation(line: 24, scope: !17)
diff --git a/test/DebugInfo/X86/sroasplit-5.ll b/test/DebugInfo/X86/sroasplit-5.ll
index ce6b4544c56b..059cb19e92e2 100644
--- a/test/DebugInfo/X86/sroasplit-5.ll
+++ b/test/DebugInfo/X86/sroasplit-5.ll
@@ -27,7 +27,7 @@ target triple = "x86_64-unknown-linux-gnu"
%struct.prog_src_register = type { i32, i24 }
; Function Attrs: nounwind
-define i64 @src_reg_for_float() #0 {
+define i64 @src_reg_for_float() #0 !dbg !4 {
entry:
%retval = alloca %struct.prog_src_register, align 4
%a = alloca %struct.prog_src_register, align 4
@@ -66,11 +66,11 @@ attributes #2 = { nounwind }
!llvm.module.flags = !{!13, !14}
!llvm.ident = !{!15}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "src_reg_for_float", line: 7, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 7, file: !5, scope: !6, type: !7, function: i64 ()* @src_reg_for_float, variables: !2)
+!4 = distinct !DISubprogram(name: "src_reg_for_float", line: 7, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 7, file: !5, scope: !6, type: !7, variables: !2)
!5 = !DIFile(filename: "pr22495.c", directory: "")
!6 = !DIFile(filename: "pr22495.c", directory: "")
!7 = !DISubroutineType(types: !8)
@@ -82,10 +82,10 @@ attributes #2 = { nounwind }
!13 = !{i32 2, !"Dwarf Version", i32 4}
!14 = !{i32 2, !"Debug Info Version", i32 3}
!15 = !{!"clang version 3.7.0 "}
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 8, scope: !4, file: !6, type: !9)
+!16 = !DILocalVariable(name: "a", line: 8, scope: !4, file: !6, type: !9)
!17 = !DIExpression()
!18 = !DILocation(line: 8, scope: !4)
!19 = !DILocation(line: 9, scope: !4)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "local", line: 10, scope: !4, file: !6, type: !12)
+!20 = !DILocalVariable(name: "local", line: 10, scope: !4, file: !6, type: !12)
!21 = !DILocation(line: 10, scope: !4)
!22 = !DILocation(line: 11, scope: !4)
diff --git a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
index 86a1647d6fa4..a366853ad205 100644
--- a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
+++ b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
@@ -58,7 +58,7 @@
; ASM-NOT: Lcu_begin
; ASM: Lset[[LT:[0-9]+]] = Lline_table_start0-Lsection_line ## DW_AT_stmt_list
; ASM-NEXT: .long Lset[[LT]]
-define i32 @test(i32 %a) nounwind uwtable ssp {
+define i32 @test(i32 %a) nounwind uwtable ssp !dbg !5 {
entry:
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
@@ -70,7 +70,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-define i32 @fn(i32 %a) nounwind uwtable ssp {
+define i32 @fn(i32 %a) nounwind uwtable ssp !dbg !13 {
entry:
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
@@ -81,23 +81,23 @@ entry:
!llvm.dbg.cu = !{!0, !10}
!llvm.module.flags = !{!25}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3", isOptimized: false, emissionKind: 1, file: !23, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3", isOptimized: false, emissionKind: 1, file: !23, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "test", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !23, scope: !6, type: !7, function: i32 (i32)* @test, variables: !1)
+!5 = distinct !DISubprogram(name: "test", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !23, scope: !6, type: !7, variables: !1)
!6 = !DIFile(filename: "simple.c", directory: "/private/tmp")
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 172862)", isOptimized: false, emissionKind: 1, file: !24, enums: !1, retainedTypes: !1, subprograms: !11, globals: !1, imports: !1)
+!10 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 172862)", isOptimized: false, emissionKind: 1, file: !24, enums: !1, retainedTypes: !1, subprograms: !11, globals: !1, imports: !1)
!11 = !{!13}
-!13 = !DISubprogram(name: "fn", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !24, scope: !14, type: !7, function: i32 (i32)* @fn, variables: !1)
+!13 = distinct !DISubprogram(name: "fn", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !24, scope: !14, type: !7, variables: !1)
!14 = !DIFile(filename: "simple2.c", directory: "/private/tmp")
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 2, arg: 1, scope: !5, file: !6, type: !9)
+!15 = !DILocalVariable(name: "a", line: 2, arg: 1, scope: !5, file: !6, type: !9)
!16 = !DILocation(line: 2, scope: !5)
!17 = !DILocation(line: 4, scope: !18)
!18 = distinct !DILexicalBlock(line: 3, column: 0, file: !23, scope: !5)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !13, file: !14, type: !9)
+!19 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !13, file: !14, type: !9)
!20 = !DILocation(line: 1, scope: !13)
!21 = !DILocation(line: 2, scope: !22)
!22 = distinct !DILexicalBlock(line: 1, column: 0, file: !24, scope: !13)
diff --git a/test/DebugInfo/X86/stmt-list.ll b/test/DebugInfo/X86/stmt-list.ll
index 1f5189e362a4..393e1b50ea8a 100644
--- a/test/DebugInfo/X86/stmt-list.ll
+++ b/test/DebugInfo/X86/stmt-list.ll
@@ -5,7 +5,7 @@
; CHECK: .section .debug_line,"",@progbits
; CHECK-NEXT: .Lline_table_start0:
-define void @f() {
+define void @f() !dbg !0 {
entry:
ret void
}
@@ -14,9 +14,9 @@ entry:
!llvm.module.flags = !{!7}
!5 = !{!0}
-!0 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !1, type: !3, function: void ()* @f)
+!0 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !1, type: !3)
!1 = !DIFile(filename: "test2.c", directory: "/home/espindola/llvm")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !6, enums: !{}, retainedTypes: !{}, subprograms: !5)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !6, enums: !{}, retainedTypes: !{}, subprograms: !5)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!6 = !DIFile(filename: "test2.c", directory: "/home/espindola/llvm")
diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll
index 62b2b1bb55c0..9030ae02e534 100644
--- a/test/DebugInfo/X86/stringpool.ll
+++ b/test/DebugInfo/X86/stringpool.ll
@@ -6,7 +6,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143009)", isOptimized: true, emissionKind: 0, file: !8, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143009)", isOptimized: true, emissionKind: 0, file: !8, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
!1 = !{}
!3 = !{!5}
!5 = !DIGlobalVariable(name: "yyyy", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: i32* @yyyy)
diff --git a/test/DebugInfo/X86/struct-loc.ll b/test/DebugInfo/X86/struct-loc.ll
index 2c133b9b66ee..2236cd087d54 100644
--- a/test/DebugInfo/X86/struct-loc.ll
+++ b/test/DebugInfo/X86/struct-loc.ll
@@ -14,7 +14,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 152837) (llvm/trunk 152845)", isOptimized: false, emissionKind: 0, file: !11, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 152837) (llvm/trunk 152845)", isOptimized: false, emissionKind: 0, file: !11, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
!1 = !{}
!3 = !{!5}
!5 = !DIGlobalVariable(name: "f", line: 5, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: %struct.foo* @f)
diff --git a/test/DebugInfo/X86/subrange-type.ll b/test/DebugInfo/X86/subrange-type.ll
index d83cca6c8776..7a81bac56d0d 100644
--- a/test/DebugInfo/X86/subrange-type.ll
+++ b/test/DebugInfo/X86/subrange-type.ll
@@ -7,7 +7,7 @@
; CHECK: [[SUBTYPE]]: DW_TAG_base_type
; CHECK-NEXT: DW_AT_name
-define i32 @main() nounwind uwtable {
+define i32 @main() nounwind uwtable !dbg !5 {
entry:
%retval = alloca i32, align 4
%i = alloca [2 x i32], align 4
@@ -21,15 +21,15 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!18}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 171472) (llvm/trunk 171487)", isOptimized: false, emissionKind: 0, file: !17, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 171472) (llvm/trunk 171487)", isOptimized: false, emissionKind: 0, file: !17, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: !6, type: !7, function: i32 ()* @main, variables: !1)
+!5 = distinct !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: !6, type: !7, variables: !1)
!6 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo/tmp")
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 4, scope: !11, file: !6, type: !12)
+!10 = !DILocalVariable(name: "i", line: 4, scope: !11, file: !6, type: !12)
!11 = distinct !DILexicalBlock(line: 3, column: 0, file: !6, scope: !5)
!12 = !DICompositeType(tag: DW_TAG_array_type, size: 64, align: 32, baseType: !9, elements: !13)
!13 = !{!14}
diff --git a/test/DebugInfo/X86/subreg.ll b/test/DebugInfo/X86/subreg.ll
index b850ce800e37..3b95532ff52c 100644
--- a/test/DebugInfo/X86/subreg.ll
+++ b/test/DebugInfo/X86/subreg.ll
@@ -7,7 +7,7 @@
; CHECK-NEXT: .byte 147 # DW_OP_piece
; CHECK-NEXT: .byte 2 # 2
-define i16 @f(i16 signext %zzz) nounwind {
+define i16 @f(i16 signext %zzz) nounwind !dbg !1 {
entry:
call void @llvm.dbg.value(metadata i16 %zzz, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
%conv = sext i16 %zzz to i32, !dbg !7
@@ -21,10 +21,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.module.flags = !{!11}
!9 = !{!1}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "zzz", line: 3, arg: 1, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !10, scope: !2, type: !4, function: i16 (i16)* @f)
+!0 = !DILocalVariable(name: "zzz", line: 3, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !10, scope: !2, type: !4)
!2 = !DIFile(filename: "/home/espindola/llvm/test.c", directory: "/home/espindola/tmpfs/build")
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 1, file: !10, enums: !{}, retainedTypes: !{}, subprograms: !9, imports: null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 1, file: !10, enums: !{}, retainedTypes: !{}, subprograms: !9, imports: null)
!4 = !DISubroutineType(types: !5)
!5 = !{null}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "short", size: 16, align: 16, encoding: DW_ATE_signed)
diff --git a/test/DebugInfo/X86/subregisters.ll b/test/DebugInfo/X86/subregisters.ll
index a4722aa8cd58..9dcb09f0869d 100644
--- a/test/DebugInfo/X86/subregisters.ll
+++ b/test/DebugInfo/X86/subregisters.ll
@@ -38,7 +38,7 @@ target triple = "x86_64-apple-macosx10.9.0"
@main.myBar = private unnamed_addr constant %struct.bar { i32 3, i32 4 }, align 4
; Function Attrs: noinline nounwind ssp uwtable
-define void @doSomething(%struct.bar* nocapture readonly %b) #0 {
+define void @doSomething(%struct.bar* nocapture readonly %b) #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata %struct.bar* %b, i64 0, metadata !15, metadata !DIExpression()), !dbg !25
%a1 = getelementptr inbounds %struct.bar, %struct.bar* %b, i64 0, i32 0, !dbg !26
@@ -55,7 +55,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare i32 @printf(i8* nocapture readonly, ...) #2
; Function Attrs: nounwind ssp uwtable
-define i32 @main() #3 {
+define i32 @main() #3 !dbg !17 {
entry:
%myBar = alloca i64, align 8, !dbg !34
%tmpcast = bitcast i64* %myBar to %struct.bar*, !dbg !34
@@ -78,11 +78,11 @@ attributes #4 = { nounwind }
!llvm.module.flags = !{!22, !23}
!llvm.ident = !{!24}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "subregisters.c", directory: "")
!2 = !{}
!3 = !{!4, !17}
-!4 = !DISubprogram(name: "doSomething", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !1, scope: !5, type: !6, function: void (%struct.bar*)* @doSomething, variables: !14)
+!4 = distinct !DISubprogram(name: "doSomething", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !1, scope: !5, type: !6, variables: !14)
!5 = !DIFile(filename: "subregisters.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8}
@@ -93,13 +93,13 @@ attributes #4 = { nounwind }
!12 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!13 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 5, size: 32, align: 32, offset: 32, file: !1, scope: !9, baseType: !12)
!14 = !{!15, !16}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 10, arg: 1, scope: !4, file: !5, type: !8)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 12, scope: !4, file: !5, type: !12)
-!17 = !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 17, file: !1, scope: !5, type: !18, function: i32 ()* @main, variables: !20)
+!15 = !DILocalVariable(name: "b", line: 10, arg: 1, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "a", line: 12, scope: !4, file: !5, type: !12)
+!17 = distinct !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 17, file: !1, scope: !5, type: !18, variables: !20)
!18 = !DISubroutineType(types: !19)
!19 = !{!12}
!20 = !{!21}
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "myBar", line: 18, scope: !17, file: !5, type: !9)
+!21 = !DILocalVariable(name: "myBar", line: 18, scope: !17, file: !5, type: !9)
!22 = !{i32 2, !"Dwarf Version", i32 2}
!23 = !{i32 1, !"Debug Info Version", i32 3}
!24 = !{!"clang version 3.5 "}
diff --git a/test/DebugInfo/X86/template.ll b/test/DebugInfo/X86/template.ll
index f846b29f5768..d7ce08437ca6 100644
--- a/test/DebugInfo/X86/template.ll
+++ b/test/DebugInfo/X86/template.ll
@@ -64,7 +64,7 @@
@n = global %"struct.y_impl<int>::nested" zeroinitializer, align 1
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_template.cpp, i8* null }]
-define internal void @__cxx_global_var_init() section ".text.startup" {
+define internal void @__cxx_global_var_init() section ".text.startup" !dbg !10 {
entry:
%call = call i32 @_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv(), !dbg !36
store i32 %call, i32* @glbl, align 4, !dbg !36
@@ -72,7 +72,7 @@ entry:
}
; Function Attrs: nounwind uwtable
-define linkonce_odr i32 @_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv() #0 {
+define linkonce_odr i32 @_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv() #0 !dbg !14 {
entry:
ret i32 3, !dbg !37
}
@@ -89,7 +89,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!llvm.module.flags = !{!33, !34}
!llvm.ident = !{!35}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224394) (llvm/trunk 224384)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !30, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224394) (llvm/trunk 224384)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !30, imports: !2)
!1 = !DIFile(filename: "template.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4, !8}
@@ -99,11 +99,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "nested", line: 2, size: 8, align: 8, file: !1, scope: !"_ZTS6y_implIiE", elements: !2, identifier: "_ZTSN6y_implIiE6nestedE")
!9 = !{!10, !14, !28}
-!10 = !DISubprogram(name: "__cxx_global_var_init", line: 3, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !11, type: !12, function: void ()* @__cxx_global_var_init, variables: !2)
+!10 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 3, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !11, type: !12, variables: !2)
!11 = !DIFile(filename: "template.cpp", directory: "/tmp/dbginfo")
!12 = !DISubroutineType(types: !13)
!13 = !{null}
-!14 = !DISubprogram(name: "func<3, &glbl, y_impl, nullptr, 1, 2>", linkageName: "_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !11, type: !15, function: i32 ()* @_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv, templateParams: !17, variables: !2)
+!14 = distinct !DISubprogram(name: "func<3, &glbl, y_impl, nullptr, 1, 2>", linkageName: "_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !11, type: !15, templateParams: !17, variables: !2)
!15 = !DISubroutineType(types: !16)
!16 = !{!7}
!17 = !{!18, !19, !21, !22, !24}
@@ -117,7 +117,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!25 = !{!26, !27}
!26 = !DITemplateValueParameter(tag: DW_TAG_template_value_parameter, type: !7, value: i32 1)
!27 = !DITemplateValueParameter(tag: DW_TAG_template_value_parameter, type: !7, value: i32 2)
-!28 = !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_template.cpp", isLocal: true, isDefinition: true, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !11, type: !29, function: void ()* @_GLOBAL__sub_I_template.cpp, variables: !2)
+!28 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_template.cpp", isLocal: true, isDefinition: true, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !11, type: !29, variables: !2)
!29 = !DISubroutineType(types: !2)
!30 = !{!31, !32}
!31 = !DIGlobalVariable(name: "glbl", line: 3, isLocal: false, isDefinition: true, scope: null, file: !11, type: !7, variable: i32* @glbl)
diff --git a/test/DebugInfo/X86/tls.ll b/test/DebugInfo/X86/tls.ll
index 55930f0bacf2..754ed25debc6 100644
--- a/test/DebugInfo/X86/tls.ll
+++ b/test/DebugInfo/X86/tls.ll
@@ -1,20 +1,30 @@
; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-linux-gnu \
-; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=GNUOP %s
+; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=GNUOP %s
; RUN: llc %s -o - -filetype=asm -O0 -mtriple=i386-linux-gnu \
-; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=SINGLE-32 --check-prefix=GNUOP %s
+; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-32 --check-prefix=GNUOP %s
; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-linux-gnu -split-dwarf=Enable \
-; RUN: | FileCheck --check-prefix=FISSION --check-prefix=GNUOP %s
+; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=FISSION --check-prefix=GNUOP %s
; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-scei-ps4 \
-; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=STDOP %s
+; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=STDOP %s
; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-apple-darwin \
-; RUN: | FileCheck --check-prefix=DARWIN --check-prefix=STDOP %s
+; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=DARWIN --check-prefix=STDOP %s
; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-freebsd \
-; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=GNUOP %s
+; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=STDOP %s
+
+; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-linux-gnu -emulated-tls \
+; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=EMUSINGLE-64 \
+; RUN: --check-prefix=EMUGNUOP --check-prefix=EMU %s
+
+; RUN: llc %s -o - -filetype=asm -O0 -mtriple=i386-linux-gnu -emulated-tls \
+; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=EMUSINGLE-32 \
+; RUN: --check-prefix=EMUGNUOP --check-prefix=EMU %s
+
+; TODO: Add expected output for -emulated-tls tests.
; FIXME: add relocation and DWARF expression support to llvm-dwarfdump & use
; that here instead of raw assembly printing
@@ -90,7 +100,7 @@
@glbl = global i32 0, align 4
; Function Attrs: nounwind uwtable
-define weak_odr i32 @_Z4funcIXadL_Z4glblEEEiv() #0 {
+define weak_odr i32 @_Z4funcIXadL_Z4glblEEEiv() #0 !dbg !4 {
entry:
ret i32 0, !dbg !18
}
@@ -101,11 +111,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!llvm.module.flags = !{!15, !16}
!llvm.ident = !{!17}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, splitDebugFilename: "-.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !12, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, splitDebugFilename: "-.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !12, imports: !2)
!1 = !DIFile(filename: "tls.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "func<&glbl>", linkageName: "_Z4funcIXadL_Z4glblEEEiv", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 ()* @_Z4funcIXadL_Z4glblEEEiv, templateParams: !9, variables: !2)
+!4 = distinct !DISubprogram(name: "func<&glbl>", linkageName: "_Z4funcIXadL_Z4glblEEEiv", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, templateParams: !9, variables: !2)
!5 = !DIFile(filename: "tls.cpp", directory: "/tmp/dbginfo")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
diff --git a/test/DebugInfo/X86/type_units_with_addresses.ll b/test/DebugInfo/X86/type_units_with_addresses.ll
index 2a8d664e89b8..10a41972d9f2 100644
--- a/test/DebugInfo/X86/type_units_with_addresses.ll
+++ b/test/DebugInfo/X86/type_units_with_addresses.ll
@@ -112,7 +112,7 @@
!llvm.module.flags = !{!34, !35}
!llvm.ident = !{!36}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, splitDebugFilename: "tu.dwo", emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !27, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, splitDebugFilename: "tu.dwo", emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !27, imports: !2)
!1 = !DIFile(filename: "tu.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4, !9, !12, !13, !17, !18, !19, !23, !24}
diff --git a/test/DebugInfo/X86/union-const.ll b/test/DebugInfo/X86/union-const.ll
index e051c6297b70..1d3969aff52d 100644
--- a/test/DebugInfo/X86/union-const.ll
+++ b/test/DebugInfo/X86/union-const.ll
@@ -20,7 +20,7 @@ target triple = "x86_64-apple-macosx10.10.0"
%struct.anon = type { i32 }
; Function Attrs: nounwind readnone ssp uwtable
-define i32 @mfi_aen_setup() #0 {
+define i32 @mfi_aen_setup() #0 !dbg !4 {
entry:
tail call void @llvm.dbg.declare(metadata %union.mfi_evt* undef, metadata !16, metadata !21), !dbg !22
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !16, metadata !21), !dbg !22
@@ -40,11 +40,11 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!17, !18, !19}
!llvm.ident = !{!20}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 226915) (llvm/trunk 226905)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 226915) (llvm/trunk 226905)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "union.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "mfi_aen_setup", line: 5, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 ()* @mfi_aen_setup, variables: !15)
+!4 = distinct !DISubprogram(name: "mfi_aen_setup", line: 5, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !15)
!5 = !DIFile(filename: "union.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
@@ -56,7 +56,7 @@ attributes #1 = { nounwind readnone }
!13 = !DIDerivedType(tag: DW_TAG_member, name: "reserved", line: 3, size: 32, align: 32, file: !1, scope: !11, baseType: !14)
!14 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!15 = !{!16}
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 6, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "a", line: 6, scope: !4, file: !5, type: !8)
!17 = !{i32 2, !"Dwarf Version", i32 2}
!18 = !{i32 2, !"Debug Info Version", i32 3}
!19 = !{i32 1, !"PIC Level", i32 2}
diff --git a/test/DebugInfo/X86/union-template.ll b/test/DebugInfo/X86/union-template.ll
index 172dcdcbb9db..1f91f2a129fe 100644
--- a/test/DebugInfo/X86/union-template.ll
+++ b/test/DebugInfo/X86/union-template.ll
@@ -11,7 +11,7 @@
@_ZN7PR156371fE = global %"union.PR15637::Value" zeroinitializer, align 4
-define void @_ZN7PR156371gEf(float %value) #0 {
+define void @_ZN7PR156371gEf(float %value) #0 !dbg !4 {
entry:
%value.addr = alloca float, align 4
%tempValue = alloca %"union.PR15637::Value", align 4
@@ -29,11 +29,11 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!28}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 178499) (llvm/trunk 178472)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 178499) (llvm/trunk 178472)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
!1 = !DIFile(filename: "foo.cc", directory: "/usr/local/google/home/echristo/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "g", linkageName: "_ZN7PR156371gEf", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: void (float)* @_ZN7PR156371gEf, variables: !2)
+!4 = distinct !DISubprogram(name: "g", linkageName: "_ZN7PR156371gEf", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DINamespace(name: "PR15637", line: 1, file: !1, scope: null)
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8}
@@ -51,9 +51,9 @@ attributes #1 = { nounwind readnone }
!19 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !12)
!21 = !{!22}
!22 = !DITemplateTypeParameter(name: "T", type: !8)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 3, arg: 1, scope: !4, file: !11, type: !8)
+!23 = !DILocalVariable(name: "value", line: 3, arg: 1, scope: !4, file: !11, type: !8)
!24 = !DILocation(line: 3, scope: !4)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tempValue", line: 4, scope: !4, file: !11, type: !12)
+!25 = !DILocalVariable(name: "tempValue", line: 4, scope: !4, file: !11, type: !12)
!26 = !DILocation(line: 4, scope: !4)
!27 = !DILocation(line: 5, scope: !4)
!28 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/vector.ll b/test/DebugInfo/X86/vector.ll
index f95ad956dea6..3e2092bd806f 100644
--- a/test/DebugInfo/X86/vector.ll
+++ b/test/DebugInfo/X86/vector.ll
@@ -12,7 +12,7 @@
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!13}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 171825) (llvm/trunk 171822)", isOptimized: false, emissionKind: 0, file: !12, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 171825) (llvm/trunk 171822)", isOptimized: false, emissionKind: 0, file: !12, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
!1 = !{}
!3 = !{!5}
!5 = !DIGlobalVariable(name: "a", line: 3, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: <4 x i32>* @a)
diff --git a/test/DebugInfo/X86/vla.ll b/test/DebugInfo/X86/vla.ll
index dfcc5f1cce2a..5a25f021894e 100644
--- a/test/DebugInfo/X86/vla.ll
+++ b/test/DebugInfo/X86/vla.ll
@@ -1,6 +1,6 @@
; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=asm %s -o - | FileCheck %s
; Ensure that we generate an indirect location for the variable length array a.
-; CHECK: ##DEBUG_VALUE: vla:a <- RDX
+; CHECK: ##DEBUG_VALUE: vla:a <- [%RDX+0]
; CHECK: DW_OP_breg1
; rdar://problem/13658587
;
@@ -21,7 +21,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.8.0"
; Function Attrs: nounwind ssp uwtable
-define i32 @vla(i32 %n) nounwind ssp uwtable {
+define i32 @vla(i32 %n) nounwind ssp uwtable !dbg !4 {
entry:
%n.addr = alloca i32, align 4
%saved_stack = alloca i8*
@@ -57,7 +57,7 @@ declare i8* @llvm.stacksave() nounwind
declare void @llvm.stackrestore(i8*) nounwind
; Function Attrs: nounwind ssp uwtable
-define i32 @main(i32 %argc, i8** %argv) nounwind ssp uwtable {
+define i32 @main(i32 %argc, i8** %argv) nounwind ssp uwtable !dbg !9 {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
@@ -75,33 +75,33 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!29}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "vla.c", directory: "")
!2 = !{}
!3 = !{!4, !9}
-!4 = !DISubprogram(name: "vla", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @vla, variables: !2)
+!4 = distinct !DISubprogram(name: "vla", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "vla.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !10, function: i32 (i32, i8**)* @main, variables: !2)
+!9 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !10, variables: !2)
!10 = !DISubroutineType(types: !11)
!11 = !{!8, !8, !12}
!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !13)
!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !14)
!14 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "n", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!15 = !DILocalVariable(name: "n", line: 1, arg: 1, scope: !4, file: !5, type: !8)
!16 = !DILocation(line: 1, scope: !4)
!17 = !DILocation(line: 2, scope: !4)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 2, scope: !4, file: !5, type: !19)
+!18 = !DILocalVariable(name: "a", line: 2, scope: !4, file: !5, type: !19)
!19 = !DICompositeType(tag: DW_TAG_array_type, align: 32, baseType: !8, elements: !20)
!20 = !{!21}
!21 = !DISubrange(count: -1)
!22 = !DILocation(line: 3, scope: !4)
!23 = !DILocation(line: 4, scope: !4)
!24 = !DILocation(line: 5, scope: !4)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 7, arg: 1, scope: !9, file: !5, type: !8)
+!25 = !DILocalVariable(name: "argc", line: 7, arg: 1, scope: !9, file: !5, type: !8)
!26 = !DILocation(line: 7, scope: !9)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 7, arg: 2, scope: !9, file: !5, type: !12)
+!27 = !DILocalVariable(name: "argv", line: 7, arg: 2, scope: !9, file: !5, type: !12)
!28 = !DILocation(line: 8, scope: !9)
!29 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/accel-table-hash-collisions.ll b/test/DebugInfo/accel-table-hash-collisions.ll
deleted file mode 100644
index 5e109a7de5ed..000000000000
--- a/test/DebugInfo/accel-table-hash-collisions.ll
+++ /dev/null
@@ -1,92 +0,0 @@
-; REQUIRES: object-emission
-; RUN: %llc_dwarf -dwarf-accel-tables=Enable -filetype=obj -o - < %s | llvm-dwarfdump -debug-dump=apple_names - | FileCheck %s
-
-; Generated from the following C code using
-; clang -S -emit-llvm hash-collision.c
-;
-; The names of the variables have been chosen so that they produce hash collisions.
-; There are 12 names here that are hashed to only 6 hashes (each pair of lines
-; hashes to the same value, see the CHECK lines below).
-;
-; int ForceTopDown;
-; int _ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_;
-; int _ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE;
-; int _ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv;
-; int _ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE;
-; int _ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE;
-; int k1;
-; int is;
-; int setStmt;
-; int _ZN4llvm5TwineC1Ei;
-; int _ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE;
-; int _ZN4llvm22MachineModuleInfoMachOD2Ev;
-
-; Check that we have the right amount of hashes.
-; CHECK: Bucket count = 6
-; CHECK: Hashes count = 6
-
-; Check that all the names are present in the output
-; CHECK: Hash = 0x00597841
-; CHECK: Name: {{[0-9a-f]*}} "is"
-; CHECK: Name: {{[0-9a-f]*}} "k1"
-
-; CHECK: Hash = 0xa4b42a1e
-; CHECK: Name: {{[0-9a-f]*}} "_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE"
-; CHECK: Name: {{[0-9a-f]*}} "_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv"
-
-; CHECK: Hash = 0xeee7c0b2
-; CHECK: Name: {{[0-9a-f]*}} "_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE"
-; CHECK: Name: {{[0-9a-f]*}} "_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE"
-
-; CHECK: Hash = 0xea48ac5f
-; CHECK: Name: {{[0-9a-f]*}} "ForceTopDown"
-; CHECK: Name: {{[0-9a-f]*}} "_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_"
-
-; CHECK: Hash = 0x6b22f71f
-; CHECK: Name: {{[0-9a-f]*}} "_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE"
-; CHECK: Name: {{[0-9a-f]*}} "_ZN4llvm22MachineModuleInfoMachOD2Ev"
-
-; CHECK: Hash = 0x8c248979
-; CHECK: Name: {{[0-9a-f]*}} "setStmt"
-; CHECK: Name: {{[0-9a-f]*}} "_ZN4llvm5TwineC1Ei"
-
-
-
-@ForceTopDown = common global i32 0, align 4
-@_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_ = common global i32 0, align 4
-@_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE = common global i32 0, align 4
-@_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv = common global i32 0, align 4
-@_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE = common global i32 0, align 4
-@_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE = common global i32 0, align 4
-@k1 = common global i32 0, align 4
-@is = common global i32 0, align 4
-@setStmt = common global i32 0, align 4
-@_ZN4llvm5TwineC1Ei = common global i32 0, align 4
-@_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE = common global i32 0, align 4
-@_ZN4llvm22MachineModuleInfoMachOD2Ev = common global i32 0, align 4
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!17, !18, !19}
-!llvm.ident = !{!20}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 231548) (llvm/trunk 231547)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
-!1 = !DIFile(filename: "hash-collisions.c", directory: "/tmp")
-!2 = !{}
-!3 = !{!4, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16}
-!4 = !DIGlobalVariable(name: "ForceTopDown", scope: !0, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, variable: i32* @ForceTopDown)
-!5 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DIGlobalVariable(name: "_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_)
-!7 = !DIGlobalVariable(name: "_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE", scope: !0, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE)
-!8 = !DIGlobalVariable(name: "_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv", scope: !0, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv)
-!9 = !DIGlobalVariable(name: "_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE", scope: !0, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE)
-!10 = !DIGlobalVariable(name: "_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE", scope: !0, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE)
-!11 = !DIGlobalVariable(name: "k1", scope: !0, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, variable: i32* @k1)
-!12 = !DIGlobalVariable(name: "is", scope: !0, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, variable: i32* @is)
-!13 = !DIGlobalVariable(name: "setStmt", scope: !0, file: !1, line: 9, type: !5, isLocal: false, isDefinition: true, variable: i32* @setStmt)
-!14 = !DIGlobalVariable(name: "_ZN4llvm5TwineC1Ei", scope: !0, file: !1, line: 10, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm5TwineC1Ei)
-!15 = !DIGlobalVariable(name: "_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE", scope: !0, file: !1, line: 11, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE)
-!16 = !DIGlobalVariable(name: "_ZN4llvm22MachineModuleInfoMachOD2Ev", scope: !0, file: !1, line: 12, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm22MachineModuleInfoMachOD2Ev)
-!17 = !{i32 2, !"Dwarf Version", i32 2}
-!18 = !{i32 2, !"Debug Info Version", i32 3}
-!19 = !{i32 1, !"PIC Level", i32 2}
-!20 = !{!"clang version 3.7.0 (trunk 231548) (llvm/trunk 231547)"}
diff --git a/test/DebugInfo/array.ll b/test/DebugInfo/array.ll
deleted file mode 100644
index f6c79f77c6f2..000000000000
--- a/test/DebugInfo/array.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: %llc_dwarf -O0 < %s | FileCheck %s
-; Do not emit AT_upper_bound for an unbounded array.
-; radar 9241695
-define i32 @main() nounwind ssp {
-entry:
- %retval = alloca i32, align 4
- %a = alloca [0 x i32], align 4
- store i32 0, i32* %retval
- call void @llvm.dbg.declare(metadata [0 x i32]* %a, metadata !6, metadata !DIExpression()), !dbg !11
- ret i32 0, !dbg !12
-}
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!2}
-!llvm.module.flags = !{!16}
-
-!0 = !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 3, file: !14, scope: !1, type: !3, function: i32 ()* @main)
-!1 = !DIFile(filename: "array.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129138)", isOptimized: false, emissionKind: 0, file: !14, enums: !15, retainedTypes: !15, subprograms: !13, imports: null)
-!3 = !DISubroutineType(types: !4)
-!4 = !{!5}
-!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 4, scope: !7, file: !1, type: !8)
-!7 = distinct !DILexicalBlock(line: 3, column: 12, file: !14, scope: !0)
-!8 = !DICompositeType(tag: DW_TAG_array_type, align: 32, file: !14, scope: !2, baseType: !5, elements: !9)
-!9 = !{!10}
-;CHECK: section_info:
-;CHECK: DW_TAG_subrange_type
-;CHECK-NEXT: DW_AT_type
-;CHECK-NOT: DW_AT_lower_bound
-;CHECK-NOT: DW_AT_upper_bound
-;CHECK-NEXT: End Of Children Mark
-!10 = !DISubrange(count: -1)
-!11 = !DILocation(line: 4, column: 7, scope: !7)
-!12 = !DILocation(line: 5, column: 3, scope: !7)
-!13 = !{!0}
-!14 = !DIFile(filename: "array.c", directory: "/private/tmp")
-!15 = !{}
-!16 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/block-asan.ll b/test/DebugInfo/block-asan.ll
deleted file mode 100644
index 393816b34857..000000000000
--- a/test/DebugInfo/block-asan.ll
+++ /dev/null
@@ -1,87 +0,0 @@
-; RUN: opt -S -asan %s | FileCheck %s
-
-; The IR of this testcase is generated from the following C code:
-; void bar (int);
-;
-; void foo() {
-; __block int x;
-; bar(x);
-; }
-; by compiling it with 'clang -emit-llvm -g -S' and then by manually
-; adding the sanitize_address attribute to the @foo() function (so
-; that ASAN accepts to instrument the function in the above opt run).
-
-; Check that the location of the ASAN instrumented __block variable is
-; correct.
-; CHECK: !DIExpression(DW_OP_deref, DW_OP_plus, 8, DW_OP_deref, DW_OP_plus, 24)
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
-%struct.__block_byref_x = type { i8*, %struct.__block_byref_x*, i32, i32, i32 }
-
-; Function Attrs: nounwind ssp uwtable
-define void @foo() #0 {
-entry:
- %x = alloca %struct.__block_byref_x, align 8
- call void @llvm.dbg.declare(metadata %struct.__block_byref_x* %x, metadata !12, metadata !22), !dbg !23
- %byref.isa = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %x, i32 0, i32 0, !dbg !24
- store i8* null, i8** %byref.isa, !dbg !24
- %byref.forwarding = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %x, i32 0, i32 1, !dbg !24
- store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, !dbg !24
- %byref.flags = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %x, i32 0, i32 2, !dbg !24
- store i32 0, i32* %byref.flags, !dbg !24
- %byref.size = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %x, i32 0, i32 3, !dbg !24
- store i32 32, i32* %byref.size, !dbg !24
- %forwarding = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %x, i32 0, i32 1, !dbg !25
- %0 = load %struct.__block_byref_x*, %struct.__block_byref_x** %forwarding, !dbg !25
- %x1 = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %0, i32 0, i32 4, !dbg !25
- %1 = load i32, i32* %x1, align 4, !dbg !25
- call void @bar(i32 %1), !dbg !25
- %2 = bitcast %struct.__block_byref_x* %x to i8*, !dbg !26
- call void @_Block_object_dispose(i8* %2, i32 8) #3, !dbg !26
- ret void, !dbg !26
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-declare void @bar(i32) #2
-
-declare void @_Block_object_dispose(i8*, i32)
-
-attributes #0 = { nounwind ssp uwtable sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8, !9, !10}
-!llvm.ident = !{!11}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 223120) (llvm/trunk 223119)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "block.c", directory: "/tmp")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: void ()* @foo, variables: !2)
-!5 = !DIFile(filename: "block.c", directory: "/tmp")
-!6 = !DISubroutineType(types: !7)
-!7 = !{null}
-!8 = !{i32 2, !"Dwarf Version", i32 2}
-!9 = !{i32 2, !"Debug Info Version", i32 3}
-!10 = !{i32 1, !"PIC Level", i32 2}
-!11 = !{!"clang version 3.6.0 (trunk 223120) (llvm/trunk 223119)"}
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 4, scope: !4, file: !5, type: !13)
-!13 = !DICompositeType(tag: DW_TAG_structure_type, size: 224, flags: DIFlagBlockByrefStruct, file: !1, scope: !5, elements: !14)
-!14 = !{!15, !17, !18, !20, !21}
-!15 = !DIDerivedType(tag: DW_TAG_member, name: "__isa", size: 64, align: 64, file: !1, scope: !5, baseType: !16)
-!16 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
-!17 = !DIDerivedType(tag: DW_TAG_member, name: "__forwarding", size: 64, align: 64, offset: 64, file: !1, scope: !5, baseType: !16)
-!18 = !DIDerivedType(tag: DW_TAG_member, name: "__flags", size: 32, align: 32, offset: 128, file: !1, scope: !5, baseType: !19)
-!19 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!20 = !DIDerivedType(tag: DW_TAG_member, name: "__size", size: 32, align: 32, offset: 160, file: !1, scope: !5, baseType: !19)
-!21 = !DIDerivedType(tag: DW_TAG_member, name: "x", size: 32, align: 32, offset: 192, file: !1, scope: !5, baseType: !19)
-!22 = !DIExpression(DW_OP_plus, 8, DW_OP_deref, DW_OP_plus, 24)
-!23 = !DILocation(line: 4, column: 15, scope: !4)
-!24 = !DILocation(line: 4, column: 3, scope: !4)
-!25 = !DILocation(line: 5, column: 3, scope: !4)
-!26 = !DILocation(line: 6, column: 1, scope: !4)
diff --git a/test/DebugInfo/bug_null_debuginfo.ll b/test/DebugInfo/bug_null_debuginfo.ll
deleted file mode 100644
index 5aaa3834a5f1..000000000000
--- a/test/DebugInfo/bug_null_debuginfo.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!2}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, isOptimized: false, emissionKind: 0, file: !1, globals: null)
-!1 = !DIFile(filename: "t", directory: "")
-!2 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/constant-pointers.ll b/test/DebugInfo/constant-pointers.ll
deleted file mode 100644
index 8e0a242f1bad..000000000000
--- a/test/DebugInfo/constant-pointers.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; Ensure that pointer constants are emitted as unsigned data. Alternatively,
-; these could be signless data (dataN).
-
-; Built with Clang from:
-; template <void *V, void (*F)(), int i>
-; void func() {}
-; template void func<nullptr, nullptr, 42>();
-
-; CHECK: DW_TAG_subprogram
-; CHECK: DW_TAG_template_value_parameter
-; CHECK: DW_AT_name {{.*}} "V"
-; CHECK: DW_AT_const_value [DW_FORM_udata] (0)
-; CHECK: DW_TAG_template_value_parameter
-; CHECK: DW_AT_name {{.*}} "F"
-; CHECK: DW_AT_const_value [DW_FORM_udata] (0)
-
-; Function Attrs: nounwind uwtable
-define weak_odr void @_Z4funcILPv0ELPFvvE0ELi42EEvv() #0 {
-entry:
- ret void, !dbg !18
-}
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!15, !16}
-!llvm.ident = !{!17}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "constant-pointers.cpp", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "func<nullptr, nullptr, 42>", linkageName: "_Z4funcILPv0ELPFvvE0ELi42EEvv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: void ()* @_Z4funcILPv0ELPFvvE0ELi42EEvv, templateParams: !8, variables: !2)
-!5 = !DIFile(filename: "constant-pointers.cpp", directory: "/tmp/dbginfo")
-!6 = !DISubroutineType(types: !7)
-!7 = !{null}
-!8 = !{!9, !11, !13}
-!9 = !DITemplateValueParameter(tag: DW_TAG_template_value_parameter, name: "V", type: !10, value: i8 0)
-!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
-!11 = !DITemplateValueParameter(tag: DW_TAG_template_value_parameter, name: "F", type: !12, value: i8 0)
-!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !6)
-!13 = !DITemplateValueParameter(tag: DW_TAG_template_value_parameter, name: "i", type: !14, value: i32 42)
-!14 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!15 = !{i32 2, !"Dwarf Version", i32 4}
-!16 = !{i32 2, !"Debug Info Version", i32 3}
-!17 = !{!"clang version 3.5.0 "}
-!18 = !DILocation(line: 3, scope: !4)
diff --git a/test/DebugInfo/constant-sdnodes-have-dbg-location.ll b/test/DebugInfo/constant-sdnodes-have-dbg-location.ll
deleted file mode 100644
index 1f502a2a262a..000000000000
--- a/test/DebugInfo/constant-sdnodes-have-dbg-location.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc -debug < %s 2>&1 | FileCheck %s
-; REQUIRES: asserts
-
-; CHECK: 0x{{[0-9,a-f]+}}: i32 = Constant<-1>test.c:4:5
-
-define i32 @main() {
-entry:
- %retval = alloca i32, align 4
- store i32 0, i32* %retval
- ret i32 -1, !dbg !10
-}
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8, !9}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
-!1 = !DIFile(filename: "test.c", directory: "/home/user/clang-llvm/build")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @main, variables: !2)
-!5 = !DISubroutineType(types: !6)
-!6 = !{!7}
-!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !{i32 2, !"Dwarf Version", i32 4}
-!9 = !{i32 2, !"Debug Info Version", i32 3}
-!10 = !DILocation(line: 4, column: 5, scope: !4)
diff --git a/test/DebugInfo/constantfp-sdnodes-have-dbg-location.ll b/test/DebugInfo/constantfp-sdnodes-have-dbg-location.ll
deleted file mode 100644
index 2f7ecd8b061e..000000000000
--- a/test/DebugInfo/constantfp-sdnodes-have-dbg-location.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc -debug < %s 2>&1 | FileCheck %s
-; REQUIRES: asserts
-
-; CHECK: 0x{{[0-9,a-f]+}}: f64 = ConstantFP<1.500000e+00>test.c:3:5
-
-define double @f() {
-entry:
- ret double 1.500000e+00, !dbg !10
-}
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8, !9}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
-!1 = !DIFile(filename: "test.c", directory: "/home/user/clang-llvm/build")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, function: double ()* @f, variables: !2)
-!5 = !DISubroutineType(types: !6)
-!6 = !{!7}
-!7 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float)
-!8 = !{i32 2, !"Dwarf Version", i32 4}
-!9 = !{i32 2, !"Debug Info Version", i32 3}
-!10 = !DILocation(line: 3, column: 5, scope: !4)
diff --git a/test/DebugInfo/cross-cu-inlining.ll b/test/DebugInfo/cross-cu-inlining.ll
deleted file mode 100644
index 436bdda6473b..000000000000
--- a/test/DebugInfo/cross-cu-inlining.ll
+++ /dev/null
@@ -1,143 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck -implicit-check-not=DW_TAG %s
-; RUN: %llc_dwarf -dwarf-accel-tables=Enable -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck --check-prefix=CHECK-ACCEL --check-prefix=CHECK %s
-
-; Build from source:
-; $ clang++ a.cpp b.cpp -g -c -emit-llvm
-; $ llvm-link a.bc b.bc -o ab.bc
-; $ opt -inline ab.bc -o ab-opt.bc
-; $ cat a.cpp
-; extern int i;
-; int func(int);
-; int main() {
-; return func(i);
-; }
-; $ cat b.cpp
-; int __attribute__((always_inline)) func(int x) {
-; return x * 2;
-; }
-
-; Ensure that func inlined into main is described and references the abstract
-; definition in b.cpp's CU.
-
-; CHECK: DW_TAG_compile_unit
-; CHECK: DW_AT_name {{.*}} "a.cpp"
-; CHECK: DW_TAG_subprogram
-; CHECK: DW_AT_type [DW_FORM_ref_addr] (0x00000000[[INT:.*]])
-; CHECK: 0x[[INLINED:[0-9a-f]*]]:{{.*}}DW_TAG_inlined_subroutine
-; CHECK: DW_AT_abstract_origin {{.*}}[[ABS_FUNC:........]] "_Z4funci"
-; CHECK: DW_TAG_formal_parameter
-; CHECK: DW_AT_abstract_origin {{.*}}[[ABS_VAR:........]] "x"
-
-; Check the abstract definition is in the 'b.cpp' CU and doesn't contain any
-; concrete information (address range or variable location)
-; CHECK: DW_TAG_compile_unit
-; CHECK: DW_AT_name {{.*}} "b.cpp"
-; CHECK: 0x[[ABS_FUNC]]: DW_TAG_subprogram
-; CHECK-NOT: DW_AT_low_pc
-; CHECK: 0x[[ABS_VAR]]: DW_TAG_formal_parameter
-; CHECK-NOT: DW_AT_location
-; CHECK: DW_AT_type [DW_FORM_ref4] {{.*}} {0x[[INT]]}
-; CHECK-NOT: DW_AT_location
-
-; CHECK: 0x[[INT]]: DW_TAG_base_type
-; CHECK: DW_AT_name {{.*}} "int"
-
-; Check the concrete out of line definition references the abstract and
-; provides the address range and variable location
-; CHECK: 0x[[FUNC:[0-9a-f]*]]{{.*}}DW_TAG_subprogram
-; CHECK: DW_AT_low_pc
-; CHECK: DW_AT_abstract_origin {{.*}} {0x[[ABS_FUNC]]} "_Z4funci"
-; CHECK: DW_TAG_formal_parameter
-; CHECK: DW_AT_location
-; CHECK: DW_AT_abstract_origin {{.*}} {0x[[ABS_VAR]]} "x"
-
-; Check that both the inline and the non out of line version of func are
-; correctly referenced in the accelerator table. Before r221837, the one
-; in the second compilation unit had a wrong offset
-; CHECK-ACCEL: .apple_names contents:
-; CHECK-ACCEL: Name{{.*}}"func"
-; CHECK-ACCEL-NOT: Name
-; CHECK-ACCEL: Atom[0]{{.*}}[[INLINED]]
-; CHECK-ACCEL-NOT: Name
-; CHECK-ACCEL: Atom[0]{{.*}}[[FUNC]]
-
-@i = external global i32
-
-; Function Attrs: uwtable
-define i32 @main() #0 {
-entry:
- %x.addr.i = alloca i32, align 4
- %retval = alloca i32, align 4
- store i32 0, i32* %retval
- %0 = load i32, i32* @i, align 4, !dbg !19
- %1 = bitcast i32* %x.addr.i to i8*
- call void @llvm.lifetime.start(i64 4, i8* %1)
- store i32 %0, i32* %x.addr.i, align 4
- call void @llvm.dbg.declare(metadata i32* %x.addr.i, metadata !120, metadata !DIExpression()), !dbg !21
- %2 = load i32, i32* %x.addr.i, align 4, !dbg !22
- %mul.i = mul nsw i32 %2, 2, !dbg !22
- %3 = bitcast i32* %x.addr.i to i8*, !dbg !22
- call void @llvm.lifetime.end(i64 4, i8* %3), !dbg !22
- ret i32 %mul.i, !dbg !19
-}
-
-; Function Attrs: alwaysinline nounwind uwtable
-define i32 @_Z4funci(i32 %x) #1 {
-entry:
- %x.addr = alloca i32, align 4
- store i32 %x, i32* %x.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !20, metadata !DIExpression()), !dbg !23
- %0 = load i32, i32* %x.addr, align 4, !dbg !24
- %mul = mul nsw i32 %0, 2, !dbg !24
- ret i32 %mul, !dbg !24
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.start(i64, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.end(i64, i8* nocapture) #3
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.dbg.cu = !{!0, !9}
-!llvm.module.flags = !{!16, !17}
-!llvm.ident = !{!18, !18}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
-!5 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !10, enums: !2, retainedTypes: !2, subprograms: !11, globals: !2, imports: !2)
-!10 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
-!11 = !{!12}
-!12 = !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !10, scope: !13, type: !14, function: i32 (i32)* @_Z4funci, variables: !2)
-!13 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
-!14 = !DISubroutineType(types: !15)
-!15 = !{!8, !8}
-!16 = !{i32 2, !"Dwarf Version", i32 4}
-!17 = !{i32 2, !"Debug Info Version", i32 3}
-!18 = !{!"clang version 3.5.0 "}
-!19 = !DILocation(line: 4, scope: !4)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 1, scope: !12, file: !13, type: !8)
-
-!120 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 1, scope: !12, file: !13, type: !8)
-
-!21 = !DILocation(line: 1, scope: !12, inlinedAt: !19)
-!22 = !DILocation(line: 2, scope: !12, inlinedAt: !19)
-!23 = !DILocation(line: 1, scope: !12)
-!24 = !DILocation(line: 2, scope: !12)
-
diff --git a/test/DebugInfo/cross-cu-linkonce-distinct.ll b/test/DebugInfo/cross-cu-linkonce-distinct.ll
deleted file mode 100644
index b8ce57e70352..000000000000
--- a/test/DebugInfo/cross-cu-linkonce-distinct.ll
+++ /dev/null
@@ -1,95 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; Testing that two distinct (distinct by writing them in separate files, while
-; still fulfilling C++'s ODR by having identical token sequences) functions,
-; linked under LTO, get plausible debug info (and don't crash).
-
-; Built from source:
-; $ clang++ a.cpp b.cpp -g -c -emit-llvm
-; $ llvm-link a.bc b.bc -o ab.bc
-
-; This change is intended to tickle a case where the subprogram MDNode
-; associated with the llvm::Function will differ from the subprogram
-; referenced by the DbgLocs in the function.
-
-; $ sed -ie "s/!12, !0/!0, !12/" ab.ll
-; $ cat a.cpp
-; inline int func(int i) {
-; return i * 2;
-; }
-; int (*x)(int) = &func;
-; $ cat b.cpp
-; inline int func(int i) {
-; return i * 2;
-; }
-; int (*y)(int) = &func;
-
-; CHECK: DW_TAG_compile_unit
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "func"
-; CHECK: DW_TAG_compile_unit
-
-; FIXME: Maybe we should drop the subprogram here - since the function was
-; emitted in one CU, due to linkonce_odr uniquing. We certainly don't emit the
-; subprogram here if the source location for this definition is the same (see
-; test/DebugInfo/cross-cu-linkonce.ll), though it's very easy to tickle that
-; into failing even without duplicating the source as has been done in this
-; case (two cpp files in different directories, including the same header that
-; contains an inline function - clang will produce distinct subprogram metadata
-; that won't deduplicate owing to the file location information containing the
-; directory of the source file even though the file name is absolute, not
-; relative)
-
-; CHECK: DW_TAG_subprogram
-
-@x = global i32 (i32)* @_Z4funci, align 8
-@y = global i32 (i32)* @_Z4funci, align 8
-
-; Function Attrs: inlinehint nounwind uwtable
-define linkonce_odr i32 @_Z4funci(i32 %i) #0 {
- %1 = alloca i32, align 4
- store i32 %i, i32* %1, align 4
- call void @llvm.dbg.declare(metadata i32* %1, metadata !22, metadata !DIExpression()), !dbg !23
- %2 = load i32, i32* %1, align 4, !dbg !24
- %3 = mul nsw i32 %2, 2, !dbg !24
- ret i32 %3, !dbg !24
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!12, !0}
-!llvm.module.flags = !{!19, !20}
-!llvm.ident = !{!21, !21}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
-!1 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @_Z4funci, variables: !2)
-!5 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8, !8}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !{!10}
-!10 = !DIGlobalVariable(name: "x", line: 4, isLocal: false, isDefinition: true, scope: null, file: !5, type: !11, variable: i32 (i32)** @x)
-!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !6)
-!12 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !13, enums: !2, retainedTypes: !2, subprograms: !14, globals: !17, imports: !2)
-!13 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
-!14 = !{!15}
-!15 = !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !13, scope: !16, type: !6, function: i32 (i32)* @_Z4funci, variables: !2)
-!16 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
-!17 = !{!18}
-!18 = !DIGlobalVariable(name: "y", line: 4, isLocal: false, isDefinition: true, scope: null, file: !16, type: !11, variable: i32 (i32)** @y)
-!19 = !{i32 2, !"Dwarf Version", i32 4}
-!20 = !{i32 1, !"Debug Info Version", i32 3}
-!21 = !{!"clang version 3.5.0 "}
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !8)
-!23 = !DILocation(line: 1, scope: !4)
-!24 = !DILocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/cross-cu-linkonce.ll b/test/DebugInfo/cross-cu-linkonce.ll
deleted file mode 100644
index 3638825fc9c7..000000000000
--- a/test/DebugInfo/cross-cu-linkonce.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; Built from source:
-; $ clang++ a.cpp b.cpp -g -c -emit-llvm
-; $ llvm-link a.bc b.bc -o ab.bc
-; $ cat a.cpp
-; # 1 "func.h"
-; inline int func(int i) {
-; return i * 2;
-; }
-; int (*x)(int) = &func;
-; $ cat b.cpp
-; # 1 "func.h"
-; inline int func(int i) {
-; return i * 2;
-; }
-; int (*y)(int) = &func;
-
-; CHECK: DW_TAG_compile_unit
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "func"
-; CHECK: DW_TAG_compile_unit
-; CHECK-NOT: DW_TAG_subprogram
-
-@x = global i32 (i32)* @_Z4funci, align 8
-@y = global i32 (i32)* @_Z4funci, align 8
-
-; Function Attrs: inlinehint nounwind uwtable
-define linkonce_odr i32 @_Z4funci(i32 %i) #0 {
- %1 = alloca i32, align 4
- store i32 %i, i32* %1, align 4
- call void @llvm.dbg.declare(metadata i32* %1, metadata !20, metadata !DIExpression()), !dbg !21
- %2 = load i32, i32* %1, align 4, !dbg !22
- %3 = mul nsw i32 %2, 2, !dbg !22
- ret i32 %3, !dbg !22
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0, !13}
-!llvm.module.flags = !{!17, !18}
-!llvm.ident = !{!19, !19}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !10, imports: !2)
-!1 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !5, scope: !6, type: !7, function: i32 (i32)* @_Z4funci, variables: !2)
-!5 = !DIFile(filename: "func.h", directory: "/tmp/dbginfo")
-!6 = !DIFile(filename: "func.h", directory: "/tmp/dbginfo")
-!7 = !DISubroutineType(types: !8)
-!8 = !{!9, !9}
-!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !{!11}
-!11 = !DIGlobalVariable(name: "x", line: 4, isLocal: false, isDefinition: true, scope: null, file: !6, type: !12, variable: i32 (i32)** @x)
-!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !7)
-!13 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !14, enums: !2, retainedTypes: !2, subprograms: !3, globals: !15, imports: !2)
-!14 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
-!15 = !{!16}
-!16 = !DIGlobalVariable(name: "y", line: 4, isLocal: false, isDefinition: true, scope: null, file: !6, type: !12, variable: i32 (i32)** @y)
-!17 = !{i32 2, !"Dwarf Version", i32 4}
-!18 = !{i32 1, !"Debug Info Version", i32 3}
-!19 = !{!"clang version 3.5.0 "}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 1, arg: 1, scope: !4, file: !6, type: !9)
-!21 = !DILocation(line: 1, scope: !4)
-!22 = !DILocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/cu-range-hole.ll b/test/DebugInfo/cu-range-hole.ll
deleted file mode 100644
index 19a305483fb5..000000000000
--- a/test/DebugInfo/cu-range-hole.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; REQUIRES: object-emission
-; RUN: %llc_dwarf -O0 -filetype=obj %s -o %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
-
-; Check that we emit ranges for this CU since we have a function with and
-; without debug info.
-; Note: This depends upon the order of output in the .o file. Currently it's
-; in order of the output to make sure that the CU has multiple ranges since
-; there's a function in the middle. If they were together then it would have
-; a single range and no DW_AT_ranges.
-; CHECK: DW_TAG_compile_unit
-; CHECK: DW_AT_ranges
-; CHECK: DW_TAG_subprogram
-; CHECK: DW_TAG_subprogram
-
-; Function Attrs: nounwind uwtable
-define i32 @b(i32 %c) #0 {
-entry:
- %c.addr = alloca i32, align 4
- store i32 %c, i32* %c.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %c.addr, metadata !13, metadata !DIExpression()), !dbg !14
- %0 = load i32, i32* %c.addr, align 4, !dbg !14
- %add = add nsw i32 %0, 1, !dbg !14
- ret i32 %add, !dbg !14
-}
-
-; Function Attrs: nounwind uwtable
-define i32 @a(i32 %b) #0 {
-entry:
- %b.addr = alloca i32, align 4
- store i32 %b, i32* %b.addr, align 4
- %0 = load i32, i32* %b.addr, align 4
- %add = add nsw i32 %0, 1
- ret i32 %add
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-; Function Attrs: nounwind uwtable
-define i32 @d(i32 %e) #0 {
-entry:
- %e.addr = alloca i32, align 4
- store i32 %e, i32* %e.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %e.addr, metadata !15, metadata !DIExpression()), !dbg !16
- %0 = load i32, i32* %e.addr, align 4, !dbg !16
- %add = add nsw i32 %0, 1, !dbg !16
- ret i32 %add, !dbg !16
-}
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0, !0}
-!llvm.dbg.cu = !{!1}
-!llvm.module.flags = !{!11, !12}
-
-!0 = !{!"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
-!1 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !2, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3, imports: !3)
-!2 = !DIFile(filename: "b.c", directory: "/usr/local/google/home/echristo")
-!3 = !{}
-!4 = !{!5, !10}
-!5 = !DISubprogram(name: "b", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !2, scope: !6, type: !7, function: i32 (i32)* @b, variables: !3)
-!6 = !DIFile(filename: "b.c", directory: "/usr/local/google/home/echristo")
-!7 = !DISubroutineType(types: !8)
-!8 = !{!9, !9}
-!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "d", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !2, scope: !6, type: !7, function: i32 (i32)* @d, variables: !3)
-!11 = !{i32 2, !"Dwarf Version", i32 4}
-!12 = !{i32 1, !"Debug Info Version", i32 3}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 1, arg: 1, scope: !5, file: !6, type: !9)
-!14 = !DILocation(line: 1, scope: !5)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "e", line: 3, arg: 1, scope: !10, file: !6, type: !9)
-!16 = !DILocation(line: 3, scope: !10)
diff --git a/test/DebugInfo/cu-ranges.ll b/test/DebugInfo/cu-ranges.ll
deleted file mode 100644
index 3cec5c88f385..000000000000
--- a/test/DebugInfo/cu-ranges.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; REQUIRES: object-emission
-; RUN: %llc_dwarf -O0 -filetype=obj %s -o %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
-
-; Check that we emit ranges for this which has a non-traditional section and a normal section.
-
-; CHECK: DW_TAG_compile_unit
-; CHECK: DW_AT_ranges
-; CHECK: DW_TAG_subprogram
-; CHECK: DW_AT_low_pc
-; CHECK: DW_AT_high_pc
-; CHECK: DW_TAG_subprogram
-; CHECK: DW_AT_low_pc
-; CHECK: DW_AT_high_pc
-
-; CHECK: .debug_ranges contents:
-; FIXME: When we get better dumping facilities we'll want to elaborate here.
-; CHECK: 00000000 <End of list>
-
-; Function Attrs: nounwind uwtable
-define i32 @foo(i32 %a) #0 section "__TEXT,__foo" {
-entry:
- %a.addr = alloca i32, align 4
- store i32 %a, i32* %a.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !13, metadata !DIExpression()), !dbg !14
- %0 = load i32, i32* %a.addr, align 4, !dbg !15
- %add = add nsw i32 %0, 5, !dbg !15
- ret i32 %add, !dbg !15
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-; Function Attrs: nounwind uwtable
-define i32 @bar(i32 %a) #0 {
-entry:
- %a.addr = alloca i32, align 4
- store i32 %a, i32* %a.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !16, metadata !DIExpression()), !dbg !17
- %0 = load i32, i32* %a.addr, align 4, !dbg !18
- %add = add nsw i32 %0, 5, !dbg !18
- ret i32 %add, !dbg !18
-}
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!10, !11}
-!llvm.ident = !{!12}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo")
-!2 = !{}
-!3 = !{!4, !9}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
-!5 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8, !8}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "bar", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 (i32)* @bar, variables: !2)
-!10 = !{i32 2, !"Dwarf Version", i32 4}
-!11 = !{i32 1, !"Debug Info Version", i32 3}
-!12 = !{!"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
-!14 = !DILocation(line: 1, scope: !4)
-!15 = !DILocation(line: 2, scope: !4)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 5, arg: 1, scope: !9, file: !5, type: !8)
-!17 = !DILocation(line: 5, scope: !9)
-!18 = !DILocation(line: 6, scope: !9)
-
diff --git a/test/DebugInfo/dead-argument-order.ll b/test/DebugInfo/dead-argument-order.ll
deleted file mode 100644
index 2dd556d096d6..000000000000
--- a/test/DebugInfo/dead-argument-order.ll
+++ /dev/null
@@ -1,81 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; Built from the following source with clang -O1
-; struct S { int i; };
-; int function(struct S s, int i) { return s.i + i; }
-
-; Due to the X86_64 ABI, 's' is passed in registers and once optimized, the
-; entirety of 's' is never reconstituted, since only the int is required, and
-; thus the variable's location is unknown/dead to debug info.
-
-; Future/current work should enable us to describe partial variables, which, in
-; this case, happens to be the entire variable.
-
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "function"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "s"
-; CHECK-NOT: DW_TAG
-; FIXME: Even though 's' is never reconstituted into a struct, the one member
-; variable is still live and used, and so we should be able to describe 's's
-; location as the location of that int.
-; CHECK-NOT: DW_AT_location
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_location
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "i"
-
-
-%struct.S = type { i32 }
-
-; Function Attrs: nounwind readnone uwtable
-define i32 @_Z8function1Si(i32 %s.coerce, i32 %i) #0 {
-entry:
- tail call void @llvm.dbg.declare(metadata %struct.S* undef, metadata !14, metadata !DIExpression()), !dbg !20
- tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !15, metadata !DIExpression()), !dbg !20
- %add = add nsw i32 %i, %s.coerce, !dbg !20
- ret i32 %add, !dbg !20
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
-
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!16, !17}
-!llvm.ident = !{!18}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !8, globals: !2, imports: !2)
-!1 = !DIFile(filename: "dead-argument-order.cpp", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", line: 1, size: 32, align: 32, file: !1, elements: !5, identifier: "_ZTS1S")
-!5 = !{!6}
-!6 = !DIDerivedType(tag: DW_TAG_member, name: "i", line: 1, size: 32, align: 32, file: !1, scope: !"_ZTS1S", baseType: !7)
-!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !{!9}
-!9 = !DISubprogram(name: "function", linkageName: "_Z8function1Si", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !10, type: !11, function: i32 (i32, i32)* @_Z8function1Si, variables: !13)
-!10 = !DIFile(filename: "dead-argument-order.cpp", directory: "/tmp/dbginfo")
-!11 = !DISubroutineType(types: !12)
-!12 = !{!7, !4, !7}
-!13 = !{!14, !15}
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 2, arg: 1, scope: !9, file: !10, type: !"_ZTS1S")
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 2, scope: !9, file: !10, type: !7)
-!16 = !{i32 2, !"Dwarf Version", i32 4}
-!17 = !{i32 2, !"Debug Info Version", i32 3}
-!18 = !{!"clang version 3.5.0 "}
-!19 = !{%struct.S* undef}
-!20 = !DILocation(line: 2, scope: !9)
-
diff --git a/test/DebugInfo/debug-info-qualifiers.ll b/test/DebugInfo/debug-info-qualifiers.ll
deleted file mode 100644
index 1f8f6c419932..000000000000
--- a/test/DebugInfo/debug-info-qualifiers.ll
+++ /dev/null
@@ -1,98 +0,0 @@
-; REQUIRES: object-emission
-; Test (r)value qualifiers on C++11 non-static member functions.
-; Generated from tools/clang/test/CodeGenCXX/debug-info-qualifiers.cpp
-;
-; class A {
-; public:
-; void l() const &;
-; void r() const &&;
-; };
-;
-; void g() {
-; A a;
-; auto pl = &A::l;
-; auto pr = &A::r;
-; }
-;
-; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump - | FileCheck %s
-; CHECK: DW_TAG_subroutine_type DW_CHILDREN_yes
-; CHECK-NEXT: DW_AT_reference DW_FORM_flag_present
-; CHECK: DW_TAG_subroutine_type DW_CHILDREN_yes
-; CHECK-NEXT: DW_AT_rvalue_reference DW_FORM_flag_present
-;
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG_subprogram
-; CHECK: DW_AT_name {{.*}}"l"
-; CHECK-NOT: DW_TAG_subprogram
-; CHECK: DW_AT_reference [DW_FORM_flag_present] (true)
-
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG_subprogram
-; CHECK: DW_AT_name {{.*}}"r"
-; CHECK-NOT: DW_TAG_subprogram
-; CHECK: DW_AT_rvalue_reference [DW_FORM_flag_present] (true)
-
-%class.A = type { i8 }
-
-; Function Attrs: nounwind
-define void @_Z1gv() #0 {
- %a = alloca %class.A, align 1
- %pl = alloca { i64, i64 }, align 8
- %pr = alloca { i64, i64 }, align 8
- call void @llvm.dbg.declare(metadata %class.A* %a, metadata !24, metadata !DIExpression()), !dbg !25
- call void @llvm.dbg.declare(metadata { i64, i64 }* %pl, metadata !26, metadata !DIExpression()), !dbg !31
- store { i64, i64 } { i64 ptrtoint (void (%class.A*)* @_ZNKR1A1lEv to i64), i64 0 }, { i64, i64 }* %pl, align 8, !dbg !31
- call void @llvm.dbg.declare(metadata { i64, i64 }* %pr, metadata !32, metadata !DIExpression()), !dbg !35
- store { i64, i64 } { i64 ptrtoint (void (%class.A*)* @_ZNKO1A1rEv to i64), i64 0 }, { i64, i64 }* %pr, align 8, !dbg !35
- ret void, !dbg !36
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-declare void @_ZNKR1A1lEv(%class.A*)
-
-declare void @_ZNKO1A1rEv(%class.A*)
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!21, !22}
-!llvm.ident = !{!23}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !16, globals: !2, imports: !2)
-!1 = !DIFile(filename: "debug-info-qualifiers.cpp", directory: "")
-!2 = !{}
-!3 = !{!4}
-!4 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 2, size: 8, align: 8, file: !5, elements: !6, identifier: "_ZTS1A")
-!5 = !DIFile(filename: "debug-info-qualifiers.cpp", directory: "")
-!6 = !{!7, !13}
-!7 = !DISubprogram(name: "l", linkageName: "_ZNKR1A1lEv", line: 5, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped | DIFlagLValueReference, isOptimized: false, scopeLine: 5, file: !5, scope: !"_ZTS1A", type: !8)
-!8 = !DISubroutineType(flags: DIFlagLValueReference, types: !9)
-!9 = !{null, !10}
-!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !11)
-!11 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !"_ZTS1A")
-!13 = !DISubprogram(name: "r", linkageName: "_ZNKO1A1rEv", line: 7, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagObjectPointer | DIFlagRValueReference, isOptimized: false, scopeLine: 7, file: !5, scope: !"_ZTS1A", type: !14)
-!14 = !DISubroutineType(flags: DIFlagRValueReference, types: !9)
-!16 = !{!17}
-!17 = !DISubprogram(name: "g", linkageName: "_Z1gv", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !5, scope: !18, type: !19, function: void ()* @_Z1gv, variables: !2)
-!18 = !DIFile(filename: "debug-info-qualifiers.cpp", directory: "")
-!19 = !DISubroutineType(types: !20)
-!20 = !{null}
-!21 = !{i32 2, !"Dwarf Version", i32 4}
-!22 = !{i32 1, !"Debug Info Version", i32 3}
-!23 = !{!"clang version 3.5 "}
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 11, scope: !17, file: !18, type: !4)
-!25 = !DILocation(line: 11, scope: !17)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "pl", line: 16, scope: !17, file: !18, type: !27)
-!27 = !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: !28, extraData: !"_ZTS1A")
-!28 = !DISubroutineType(flags: DIFlagLValueReference, types: !29)
-!29 = !{null, !30}
-!30 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
-!31 = !DILocation(line: 16, scope: !17)
-!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "pr", line: 21, scope: !17, file: !18, type: !33)
-!33 = !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: !34, extraData: !"_ZTS1A")
-!34 = !DISubroutineType(flags: DIFlagRValueReference, types: !29)
-!35 = !DILocation(line: 21, scope: !17)
-!36 = !DILocation(line: 22, scope: !17)
diff --git a/test/DebugInfo/debuginfofinder-forward-declaration.ll b/test/DebugInfo/debuginfofinder-forward-declaration.ll
deleted file mode 100644
index db143260e02b..000000000000
--- a/test/DebugInfo/debuginfofinder-forward-declaration.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: opt -analyze -module-debuginfo < %s | FileCheck %s
-
-
-; This module is generated from the following c-code:
-;
-; > union X;
-; >
-; > struct Y {
-; > union X *x;
-; > };
-; >
-; > struct Y y;
-
-
-; CHECK: Type: Y from /tmp/minimal.c:3 DW_TAG_structure_type
-; CHECK: Type: x from /tmp/minimal.c:4 DW_TAG_member
-; CHECK: Type: DW_TAG_pointer_type
-; CHECK: Type: X from /tmp/minimal.c:1 DW_TAG_structure_type
-
-
-%struct.Y = type { %struct.X* }
-%struct.X = type opaque
-
-@y = common global %struct.Y zeroinitializer, align 8
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!10, !11}
-!llvm.ident = !{!12}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (http://llvm.org/git/clang.git 247b30a043eb8f39ea3708e7e995089da0a6b00f) (http://llvm.org/git/llvm.git 6ecc7365a89c771fd229bdd9ffcc178684ea1aa5)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
-!1 = !DIFile(filename: "minimal.c", directory: "/tmp")
-!2 = !{}
-!3 = !{!4}
-!4 = !DIGlobalVariable(name: "y", scope: !0, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, variable: %struct.Y* @y)
-!5 = !DICompositeType(tag: DW_TAG_structure_type, name: "Y", file: !1, line: 3, size: 64, align: 64, elements: !6)
-!6 = !{!7}
-!7 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !5, file: !1, line: 4, baseType: !8, size: 64, align: 64)
-!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64)
-!9 = !DICompositeType(tag: DW_TAG_structure_type, name: "X", file: !1, line: 1, flags: DIFlagFwdDecl)
-!10 = !{i32 2, !"Dwarf Version", i32 4}
-!11 = !{i32 2, !"Debug Info Version", i32 3}
-!12 = !{!"clang version 3.7.0 (http://llvm.org/git/clang.git 247b30a043eb8f39ea3708e7e995089da0a6b00f) (http://llvm.org/git/llvm.git 6ecc7365a89c771fd229bdd9ffcc178684ea1aa5)"}
diff --git a/test/DebugInfo/debuginfofinder-multiple-cu.ll b/test/DebugInfo/debuginfofinder-multiple-cu.ll
deleted file mode 100644
index 4088dd54f53f..000000000000
--- a/test/DebugInfo/debuginfofinder-multiple-cu.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: opt -analyze -module-debuginfo < %s | FileCheck %s
-
-; Produced from linking:
-; /tmp/test1.c containing f()
-; /tmp/test2.c containing g()
-
-; Verify that both compile units and both their contained functions are
-; listed by DebugInfoFinder:
-;CHECK: Compile unit: DW_LANG_C99 from /tmp/test1.c
-;CHECK: Compile unit: DW_LANG_C99 from /tmp/test2.c
-;CHECK: Subprogram: f from /tmp/test1.c:1
-;CHECK: Subprogram: g from /tmp/test2.c:1
-
-define void @f() {
- ret void, !dbg !14
-}
-
-define void @g() {
- ret void, !dbg !15
-}
-
-!llvm.dbg.cu = !{!0, !8}
-!llvm.module.flags = !{!13, !16}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (192092)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "test1.c", directory: "/tmp")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @f, variables: !2)
-!5 = !DIFile(filename: "test1.c", directory: "/tmp")
-!6 = !DISubroutineType(types: !7)
-!7 = !{null}
-!8 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (192092)", isOptimized: false, emissionKind: 0, file: !9, enums: !2, retainedTypes: !2, subprograms: !10, globals: !2, imports: !2)
-!9 = !DIFile(filename: "test2.c", directory: "/tmp")
-!10 = !{!11}
-!11 = !DISubprogram(name: "g", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !9, scope: !12, type: !6, function: void ()* @g, variables: !2)
-!12 = !DIFile(filename: "test2.c", directory: "/tmp")
-!13 = !{i32 2, !"Dwarf Version", i32 4}
-!14 = !DILocation(line: 1, scope: !4)
-!15 = !DILocation(line: 1, scope: !11)
-!16 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/debugmacinfo.test b/test/DebugInfo/debugmacinfo.test
new file mode 100644
index 000000000000..3f95169a7a4e
--- /dev/null
+++ b/test/DebugInfo/debugmacinfo.test
@@ -0,0 +1,27 @@
+RUN: llvm-dwarfdump -debug-dump=macro %p/Inputs/dwarfdump-macro.o \
+RUN: | FileCheck %s -check-prefix TEST_MACINFO
+RUN: llvm-dwarfdump -debug-dump=line %p/Inputs/dwarfdump-macro.o \
+RUN: | FileCheck %s -check-prefix TEST_LINE
+
+
+; This test verifies that llvm-dwarfdump tools know how to read .debug_macinfo
+; section. It also checks that the file numbers fits with those in the
+; .debug_line section.
+TEST_MACINFO: .debug_macinfo contents:
+TEST_MACINFO: DW_MACINFO_define - lineno: 0 macro: M3 Value3
+TEST_MACINFO: DW_MACINFO_start_file - lineno: 0 filenum: 1
+TEST_MACINFO: DW_MACINFO_start_file - lineno: 0 filenum: 2
+TEST_MACINFO: DW_MACINFO_define - lineno: 1 macro: M4 Value4
+TEST_MACINFO: DW_MACINFO_end_file
+TEST_MACINFO: DW_MACINFO_define - lineno: 1 macro: M1 Value1
+TEST_MACINFO: DW_MACINFO_start_file - lineno: 2 filenum: 3
+TEST_MACINFO: DW_MACINFO_undef - lineno: 4 macro: M1
+TEST_MACINFO: DW_MACINFO_define - lineno: 5 macro: M1 NewValue1
+TEST_MACINFO: DW_MACINFO_end_file
+TEST_MACINFO: DW_MACINFO_define - lineno: 3 macro: M2(x,y) ((x)+(y)* Value2)
+TEST_MACINFO: DW_MACINFO_end_file
+
+TEST_LINE: .debug_line contents:
+TEST_LINE: file_names[ 1] 0 0x00000000 0x00000000 dwarfdump-macro.cc
+TEST_LINE: file_names[ 2] 1 0x00000000 0x00000000 dwarfdump-macro-cmd.h
+TEST_LINE: file_names[ 3] 0 0x00000000 0x00000000 dwarfdump-macro.h
diff --git a/test/DebugInfo/dwarf-public-names.ll b/test/DebugInfo/dwarf-public-names.ll
deleted file mode 100644
index bd340578bfb0..000000000000
--- a/test/DebugInfo/dwarf-public-names.ll
+++ /dev/null
@@ -1,131 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -generate-dwarf-pub-sections=Enable -filetype=obj -o %t.o < %s
-; RUN: llvm-dwarfdump -debug-dump=pubnames %t.o | FileCheck %s
-; ModuleID = 'dwarf-public-names.cpp'
-;
-; Generated from:
-;
-; struct C {
-; void member_function();
-; static int static_member_function();
-; static int static_member_variable;
-; };
-;
-; int C::static_member_variable = 0;
-;
-; void C::member_function() {
-; static_member_variable = 0;
-; }
-;
-; int C::static_member_function() {
-; return static_member_variable;
-; }
-;
-; C global_variable;
-;
-; int global_function() {
-; return -1;
-; }
-;
-; namespace ns {
-; void global_namespace_function() {
-; global_variable.member_function();
-; }
-; int global_namespace_variable = 1;
-; }
-
-; Skip the output to the header of the pubnames section.
-; CHECK: debug_pubnames
-; CHECK: version = 0x0002
-
-; Check for each name in the output.
-; CHECK-DAG: "ns"
-; CHECK-DAG: "C::static_member_function"
-; CHECK-DAG: "global_variable"
-; CHECK-DAG: "ns::global_namespace_variable"
-; CHECK-DAG: "ns::global_namespace_function"
-; CHECK-DAG: "global_function"
-; CHECK-DAG: "C::static_member_variable"
-; CHECK-DAG: "C::member_function"
-
-%struct.C = type { i8 }
-
-@_ZN1C22static_member_variableE = global i32 0, align 4
-@global_variable = global %struct.C zeroinitializer, align 1
-@_ZN2ns25global_namespace_variableE = global i32 1, align 4
-
-define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2 {
-entry:
- %this.addr = alloca %struct.C*, align 8
- store %struct.C* %this, %struct.C** %this.addr, align 8
- call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !28, metadata !DIExpression()), !dbg !30
- %this1 = load %struct.C*, %struct.C** %this.addr
- store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !31
- ret void, !dbg !32
-}
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-
-define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 {
-entry:
- %0 = load i32, i32* @_ZN1C22static_member_variableE, align 4, !dbg !33
- ret i32 %0, !dbg !33
-}
-
-define i32 @_Z15global_functionv() nounwind uwtable {
-entry:
- ret i32 -1, !dbg !34
-}
-
-define void @_ZN2ns25global_namespace_functionEv() nounwind uwtable {
-entry:
- call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !35
- ret void, !dbg !36
-}
-
-attributes #0 = { nounwind uwtable }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!38}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", isOptimized: false, emissionKind: 0, file: !37, enums: !1, retainedTypes: !1, subprograms: !2, globals: !24, imports: !1)
-!1 = !{}
-!2 = !{!3, !18, !19, !20}
-!3 = !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !4, scope: null, type: !5, function: void (%struct.C*)* @_ZN1C15member_functionEv, declaration: !12, variables: !1)
-!4 = !DIFile(filename: "dwarf-public-names.cpp", directory: "/usr2/kparzysz/s.hex/t")
-!5 = !DISubroutineType(types: !6)
-!6 = !{null, !7}
-!7 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !8)
-!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 1, size: 8, align: 8, file: !37, elements: !9)
-!9 = !{!10, !12, !14}
-!10 = !DIDerivedType(tag: DW_TAG_member, name: "static_member_variable", line: 4, flags: DIFlagStaticMember, file: !37, scope: !8, baseType: !11)
-!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!12 = !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !4, scope: !8, type: !5, variables: !13)
-!13 = !{} ; previously: invalid DW_TAG_base_type
-!14 = !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !4, scope: !8, type: !15, variables: !17)
-!15 = !DISubroutineType(types: !16)
-!16 = !{!11}
-!17 = !{} ; previously: invalid DW_TAG_base_type
-!18 = !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !4, scope: null, type: !15, function: i32 ()* @_ZN1C22static_member_functionEv, declaration: !14, variables: !1)
-!19 = !DISubprogram(name: "global_function", linkageName: "_Z15global_functionv", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !4, scope: !4, type: !15, function: i32 ()* @_Z15global_functionv, variables: !1)
-!20 = !DISubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !4, scope: !21, type: !22, function: void ()* @_ZN2ns25global_namespace_functionEv, variables: !1)
-!21 = !DINamespace(name: "ns", line: 23, file: !4, scope: null)
-!22 = !DISubroutineType(types: !23)
-!23 = !{null}
-!24 = !{!25, !26, !27}
-!25 = !DIGlobalVariable(name: "static_member_variable", linkageName: "_ZN1C22static_member_variableE", line: 7, isLocal: false, isDefinition: true, scope: !8, file: !4, type: !11, variable: i32* @_ZN1C22static_member_variableE, declaration: !10)
-!26 = !DIGlobalVariable(name: "global_variable", line: 17, isLocal: false, isDefinition: true, scope: null, file: !4, type: !8, variable: %struct.C* @global_variable)
-!27 = !DIGlobalVariable(name: "global_namespace_variable", linkageName: "_ZN2ns25global_namespace_variableE", line: 27, isLocal: false, isDefinition: true, scope: !21, file: !4, type: !11, variable: i32* @_ZN2ns25global_namespace_variableE)
-!28 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 9, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !3, file: !4, type: !29)
-!29 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !8)
-!30 = !DILocation(line: 9, scope: !3)
-!31 = !DILocation(line: 10, scope: !3)
-!32 = !DILocation(line: 11, scope: !3)
-!33 = !DILocation(line: 14, scope: !18)
-!34 = !DILocation(line: 20, scope: !19)
-!35 = !DILocation(line: 25, scope: !20)
-!36 = !DILocation(line: 26, scope: !20)
-!37 = !DIFile(filename: "dwarf-public-names.cpp", directory: "/usr2/kparzysz/s.hex/t")
-!38 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/dwarfdump-accel.test b/test/DebugInfo/dwarfdump-accel.test
index c5c3b0154c11..7c1838829562 100644
--- a/test/DebugInfo/dwarfdump-accel.test
+++ b/test/DebugInfo/dwarfdump-accel.test
@@ -57,7 +57,7 @@ CHECK-NOT: Magic
Check ObjC specific accelerators.
CHECK: .apple_objc contents:
CHECK: Name{{.*}}"TestInterface"
-CHECK-NOT Name
+CHECK-NOT: Name
CHECK: {Atom[0]: [[READONLY]]}
CHECK: {Atom[0]: [[ASSIGN]]}
CHECK: {Atom[0]: [[SETASSIGN]]}
diff --git a/test/DebugInfo/dwarfdump-dump-flags.test b/test/DebugInfo/dwarfdump-dump-flags.test
index 92b2d50f393b..4c10bede6f83 100644
--- a/test/DebugInfo/dwarfdump-dump-flags.test
+++ b/test/DebugInfo/dwarfdump-dump-flags.test
@@ -1,6 +1,9 @@
; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 -debug-dump=all | FileCheck %s -check-prefix DUMP_ALL
; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 -debug-dump=info | FileCheck %s -check-prefix DUMP_INFO
; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 -debug-dump=ranges | FileCheck %s -check-prefix DUMP_RANGES
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.macho-i386.o -debug-dump=all | FileCheck %s -check-prefix DUMP_ALL
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.macho-i386.o -debug-dump=info | FileCheck %s -check-prefix DUMP_INFO
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.macho-i386.o -debug-dump=ranges | FileCheck %s -check-prefix DUMP_RANGES
; DUMP_ALL: .debug_info
; DUMP_ALL: .debug_ranges
diff --git a/test/DebugInfo/dwarfdump-dwp.test b/test/DebugInfo/dwarfdump-dwp.test
new file mode 100644
index 000000000000..8aef636d4d9a
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-dwp.test
@@ -0,0 +1,53 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-dwp.x86_64.o | FileCheck %s
+
+; Testing the following simple dwp file:
+; a.cpp:
+; struct foo { };
+; foo a;
+; b.cpp:
+; struct bar { };
+; bar b() {
+; }
+
+; CHECK-LABEL: .debug_info.dwo contents:
+; CHECK: Compile Unit
+
+; Verify that the second CU uses the index for its abbrev offset
+; CHECK: Compile Unit
+; CHECK-SAME: abbr_offset = 0x0043
+; CHECK: DW_TAG_compile_unit
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}} "b.cpp"
+
+; Verify that abbreviations are decoded using the abbrev offset in the index
+; CHECK: DW_TAG_structure_type
+; CHECK: DW_TAG_subprogram
+
+; CHECK-LABEL: .debug_types.dwo contents:
+; CHECK: Type Unit
+; CHECK: DW_TAG_type_unit
+; CHECK: DW_AT_stmt_list {{.*}}(0x00000000)
+; CHECK: DW_TAG_structure_type
+; CHECK: DW_AT_decl_file {{.*}} ("a.cpp")
+; CHECK: Type Unit
+; CHECK: DW_TAG_type_unit
+; CHECK: DW_AT_stmt_list {{.*}}(0x00000000)
+; CHECK: DW_TAG_structure_type
+; CHECK: DW_AT_decl_file {{.*}} ("b.cpp")
+
+; CHECK: .debug_cu_index contents:
+; CHECK-NEXT: version = 2 slots = 16
+; CHECK: Index Signature INFO ABBREV LINE STR_OFFSETS
+; CHECK-NEXT: ----- ------------------ ------------------------ ------------------------ ------------------------ ------------------------
+; CHECK-NEXT: 3 0xfef104c25502f092 [0x0000002d, 0x0000005f) [0x00000043, 0x0000008e) [0x0000001a, 0x00000034) [0x00000010, 0x00000024)
+; CHECK-NEXT: 9 0x03c30756e2d45008 [0x00000000, 0x0000002d) [0x00000000, 0x00000043) [0x00000000, 0x0000001a) [0x00000000, 0x00000010)
+
+; CHECK: .debug_tu_index contents:
+; CHECK-NEXT: version = 2 slots = 16
+; CHECK: Index Signature TYPES ABBREV LINE STR_OFFSETS
+; CHECK-NEXT: ----- ------------------ ------------------------ ------------------------ ------------------------ ------------------------
+; CHECK-NEXT: 9 0x1d02f3be30cc5688 [0x00000024, 0x00000048) [0x00000043, 0x0000008e) [0x0000001a, 0x00000034) [0x00000010, 0x00000024)
+; CHECK-NEXT: 13 0x3875c0e21cda63fc [0x00000000, 0x00000024) [0x00000000, 0x00000043) [0x00000000, 0x0000001a) [0x00000000, 0x00000010)
+
+; TODO: use the index section offset info to correctly dump strings in debug info
+; TODO: use the index section offset info to correctly dump file names in debug info
diff --git a/test/DebugInfo/dwarfdump-macho-relocs.test b/test/DebugInfo/dwarfdump-macho-relocs.test
new file mode 100644
index 000000000000..95798a841caf
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-macho-relocs.test
@@ -0,0 +1,27 @@
+// RUN: llvm-dwarfdump -debug-dump=info %p/Inputs/dwarfdump-macho-relocs.macho.x86_64.o | FileCheck %s
+
+// The dumped file has 2 functions in different sections of the __TEXT segment.
+// Check that the addresses are are dumped correctly
+
+// Compiled with: clang -x c -g -c -o dwarfdump-macho-relocs.macho.x86_64.o dwarfdump-macho-relocs.test
+
+__attribute__((section("__TEXT,__blah")))
+int foo() {
+ return 42;
+}
+
+// CHECK: DW_TAG_subprogram
+// CHECK-NEXT: DW_AT_low_pc{{.*}}0x0000000000000020
+// CHECK-NEXT: DW_AT_high_pc{{.*}}0x000000000000002b
+// CHECK-NEXT: DW_AT_frame_base
+// CHECK-NEXT: DW_AT_name{{.*}}"foo"
+
+int main() {
+ return foo();
+}
+
+// CHECK: DW_TAG_subprogram
+// CHECK-NEXT: DW_AT_low_pc{{.*}}0x0000000000000000
+// CHECK-NEXT: DW_AT_high_pc{{.*}}0x000000000000001a
+// CHECK-NEXT: DW_AT_frame_base
+// CHECK-NEXT: DW_AT_name{{.*}}"main"
diff --git a/test/DebugInfo/dwarfdump-macho-universal.test b/test/DebugInfo/dwarfdump-macho-universal.test
new file mode 100644
index 000000000000..0b4777317d7a
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-macho-universal.test
@@ -0,0 +1,17 @@
+Reuse a test input from llvm-dsymutil, it's perfect for what we want to exercise
+RUN: llvm-dwarfdump %S/Inputs/fat-test.o -debug-dump=info | FileCheck %s
+
+CHECK: fat-test.o (x86_64): file format Mach-O 64-bit x86-64
+CHECK: DW_TAG_variable
+CHECK-NOT: {{NULL|DW_TAG}}
+CHECK: DW_AT_name {{.*}} "x86_64_var"
+
+CHECK: fat-test.o (i386): file format Mach-O 32-bit i386
+CHECK: DW_TAG_variable
+CHECK-NOT: {{NULL|DW_TAG}}
+CHECK: DW_AT_name {{.*}} "i386_var"
+
+CHECK: fat-test.o (x86_64h): file format Mach-O 64-bit x86-64
+CHECK: DW_TAG_variable
+CHECK-NOT: {{NULL|DW_TAG}}
+CHECK: DW_AT_name {{.*}} "x86_64h_var"
diff --git a/test/DebugInfo/dwo.ll b/test/DebugInfo/dwo.ll
new file mode 100644
index 000000000000..fd9aa16484c9
--- /dev/null
+++ b/test/DebugInfo/dwo.ll
@@ -0,0 +1,15 @@
+; RUN: %llc_dwarf %s -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_GNU_dwo_id {{.*}}abcd
+; CHECK-NOT: DW_AT_GNU_dwo_name
+; REQUIRES: default_triple
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM", isOptimized: false, runtimeVersion: 2, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, dwoId: 43981)
+!1 = !DIFile(filename: "<stdin>", directory: "/")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/empty.ll b/test/DebugInfo/empty.ll
deleted file mode 100644
index ad26b500b354..000000000000
--- a/test/DebugInfo/empty.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump - | FileCheck %s
-; RUN: %llc_dwarf -split-dwarf=Enable < %s -filetype=obj | llvm-dwarfdump - | FileCheck --check-prefix=FISSION %s
-
-; darwin has a workaround for a linker bug so it always emits one line table entry
-; XFAIL: darwin
-
-; Expect no line table entry since there are no functions and file references in this compile unit
-; CHECK: .debug_line contents:
-; CHECK: Line table prologue:
-; CHECK: total_length: 0x00000019
-; CHECK-NOT: file_names[
-
-; CHECK: .debug_pubnames contents:
-; CHECK-NOT: Offset
-
-; CHECK: .debug_pubtypes contents:
-; CHECK-NOT: Offset
-
-; Don't emit DW_AT_addr_base when there are no addresses.
-; FISSION-NOT: DW_AT_GNU_addr_base [DW_FORM_sec_offset]
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!5}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143523)", isOptimized: true, emissionKind: 0, file: !4, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2)
-!2 = !{}
-!3 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
-!4 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
-!5 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/enum-types.ll b/test/DebugInfo/enum-types.ll
deleted file mode 100644
index 7b50f5e9087c..000000000000
--- a/test/DebugInfo/enum-types.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; REQUIRES: object-emission
-;
-; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; Make sure we can handle enums with the same identifier but in enum types of
-; different compile units.
-; rdar://17628609
-
-; CHECK: DW_TAG_compile_unit
-; CHECK: 0x[[ENUM:.*]]: DW_TAG_enumeration_type
-; CHECK-NEXT: DW_AT_name {{.*}} "EA"
-; CHECK: DW_TAG_subprogram
-; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_Z4topA2EA"
-; CHECK: DW_TAG_formal_parameter
-; CHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x{{.*}} => {0x[[ENUM]]})
-
-; CHECK: DW_TAG_compile_unit
-; CHECK: DW_TAG_subprogram
-; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_Z4topB2EA"
-; CHECK: DW_TAG_formal_parameter
-; CHECK: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[ENUM]]
-
-; Function Attrs: nounwind ssp uwtable
-define void @_Z4topA2EA(i32 %sa) #0 {
-entry:
- %sa.addr = alloca i32, align 4
- store i32 %sa, i32* %sa.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %sa.addr, metadata !22, metadata !DIExpression()), !dbg !23
- ret void, !dbg !24
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-; Function Attrs: nounwind ssp uwtable
-define void @_Z4topB2EA(i32 %sa) #0 {
-entry:
- %sa.addr = alloca i32, align 4
- store i32 %sa, i32* %sa.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %sa.addr, metadata !25, metadata !DIExpression()), !dbg !26
- ret void, !dbg !27
-}
-
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0, !12}
-!llvm.module.flags = !{!19, !20}
-!llvm.ident = !{!21, !21}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !6, globals: !11, imports: !11)
-!1 = !DIFile(filename: "a.cpp", directory: "")
-!2 = !{!3}
-!3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "EA", line: 1, size: 32, align: 32, file: !1, elements: !4, identifier: "_ZTS2EA")
-!4 = !{!5}
-!5 = !DIEnumerator(name: "EA_0", value: 0) ; [ DW_TAG_enumerator ] [EA_0 :: 0]
-!6 = !{!7}
-!7 = !DISubprogram(name: "topA", linkageName: "_Z4topA2EA", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !8, type: !9, function: void (i32)* @_Z4topA2EA, variables: !11)
-!8 = !DIFile(filename: "a.cpp", directory: "")
-!9 = !DISubroutineType(types: !10)
-!10 = !{null, !"_ZTS2EA"}
-!11 = !{}
-!12 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)", isOptimized: false, emissionKind: 1, file: !13, enums: !14, retainedTypes: !14, subprograms: !16, globals: !11, imports: !11)
-!13 = !DIFile(filename: "b.cpp", directory: "")
-!14 = !{!15}
-!15 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "EA", line: 1, size: 32, align: 32, file: !13, elements: !4, identifier: "_ZTS2EA")
-!16 = !{!17}
-!17 = !DISubprogram(name: "topB", linkageName: "_Z4topB2EA", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !13, scope: !18, type: !9, function: void (i32)* @_Z4topB2EA, variables: !11)
-!18 = !DIFile(filename: "b.cpp", directory: "")
-!19 = !{i32 2, !"Dwarf Version", i32 2}
-!20 = !{i32 2, !"Debug Info Version", i32 3}
-!21 = !{!"clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)"}
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sa", line: 5, arg: 1, scope: !7, file: !8, type: !"_ZTS2EA")
-!23 = !DILocation(line: 5, column: 14, scope: !7)
-!24 = !DILocation(line: 6, column: 1, scope: !7)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sa", line: 5, arg: 1, scope: !17, file: !18, type: !"_ZTS2EA")
-!26 = !DILocation(line: 5, column: 14, scope: !17)
-!27 = !DILocation(line: 6, column: 1, scope: !17)
diff --git a/test/DebugInfo/enum.ll b/test/DebugInfo/enum.ll
deleted file mode 100644
index fd07a92ae41b..000000000000
--- a/test/DebugInfo/enum.ll
+++ /dev/null
@@ -1,80 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
-
-; IR generated from the following code compiled with clang -g:
-; enum e1 { I, J = 0xffffffffU, K = 0xf000000000000000ULL } a;
-; enum e2 { X };
-; void func() {
-; int b = X;
-; }
-
-; These values were previously being truncated to -1 and 0 respectively.
-
-; CHECK: debug_info contents
-; CHECK: DW_TAG_enumeration_type
-; CHECK-NEXT: DW_AT_name{{.*}} = "e1"
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_enumerator
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_enumerator
-; CHECK-NEXT: DW_AT_name{{.*}} = "J"
-; CHECK-NEXT: DW_AT_const_value [DW_FORM_sdata] (4294967295)
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_enumerator
-; CHECK-NEXT: DW_AT_name{{.*}} = "K"
-; CHECK-NEXT: DW_AT_const_value [DW_FORM_sdata] (-1152921504606846976)
-
-; Check that we retain enums that aren't referenced by any variables, etc
-; CHECK: DW_TAG_enumeration_type
-; CHECK-NEXT: DW_AT_name{{.*}} = "e2"
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_enumerator
-; CHECK-NEXT: DW_AT_name{{.*}} = "X"
-
-@a = global i64 0, align 8
-
-; Function Attrs: nounwind uwtable
-define void @_Z4funcv() #0 {
-entry:
- %b = alloca i32, align 4
- call void @llvm.dbg.declare(metadata i32* %b, metadata !20, metadata !DIExpression()), !dbg !22
- store i32 0, i32* %b, align 4, !dbg !22
- ret void, !dbg !23
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!19, !24}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !11, subprograms: !12, globals: !17, imports: !11)
-!1 = !DIFile(filename: "enum.cpp", directory: "/tmp")
-!2 = !{!3, !8}
-!3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "e1", line: 1, size: 64, align: 64, file: !1, elements: !4)
-!4 = !{!5, !6, !7}
-!5 = !DIEnumerator(name: "I", value: 0) ; [ DW_TAG_enumerator ] [I :: 0]
-!6 = !DIEnumerator(name: "J", value: 4294967295) ; [ DW_TAG_enumerator ] [J :: 4294967295]
-!7 = !DIEnumerator(name: "K", value: -1152921504606846976) ; [ DW_TAG_enumerator ] [K :: 17293822569102704640]
-!8 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "e2", line: 2, size: 32, align: 32, file: !1, elements: !9)
-!9 = !{!10}
-!10 = !DIEnumerator(name: "X", value: 0) ; [ DW_TAG_enumerator ] [X :: 0]
-!11 = !{}
-!12 = !{!13}
-!13 = !DISubprogram(name: "func", linkageName: "_Z4funcv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !14, type: !15, function: void ()* @_Z4funcv, variables: !11)
-!14 = !DIFile(filename: "enum.cpp", directory: "/tmp")
-!15 = !DISubroutineType(types: !16)
-!16 = !{null}
-!17 = !{!18}
-!18 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !14, type: !3, variable: i64* @a)
-!19 = !{i32 2, !"Dwarf Version", i32 3}
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 4, scope: !13, file: !14, type: !21)
-!21 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!22 = !DILocation(line: 4, scope: !13)
-!23 = !DILocation(line: 5, scope: !13)
-!24 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/global.ll b/test/DebugInfo/global.ll
deleted file mode 100644
index cf2d26b36acd..000000000000
--- a/test/DebugInfo/global.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
-
-; Also test that the null streamer doesn't crash with debug info.
-; RUN: %llc_dwarf -O0 -filetype=null < %s
-
-; generated from the following source compiled to bitcode with clang -g -O1
-; static int i;
-; int main() {
-; (void)&i;
-; }
-
-; CHECK: debug_info contents
-; CHECK: DW_TAG_variable
-
-; Function Attrs: nounwind readnone uwtable
-define i32 @main() #0 {
-entry:
- ret i32 0, !dbg !12
-}
-
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!11, !13}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
-!1 = !DIFile(filename: "global.cpp", directory: "/tmp")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
-!5 = !DIFile(filename: "global.cpp", directory: "/tmp")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !{!10}
-!10 = !DIGlobalVariable(name: "i", linkageName: "_ZL1i", line: 1, isLocal: true, isDefinition: true, scope: null, file: !5, type: !8)
-!11 = !{i32 2, !"Dwarf Version", i32 3}
-!12 = !DILocation(line: 4, scope: !4)
-!13 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/gvn.ll b/test/DebugInfo/gvn.ll
deleted file mode 100644
index 3ca3663bd831..000000000000
--- a/test/DebugInfo/gvn.ll
+++ /dev/null
@@ -1,135 +0,0 @@
-; RUN: opt < %s -O2 -gvn -S | FileCheck %s
-;
-; Produced at -O2 from:
-; struct context {
-; int cur_pid
-; };
-; int a, b, c, f, d;
-; int pid_for_task(int);
-; sample(struct context *p1)
-; {
-; if (c)
-; b = a;
-; if (a && p1->cur_pid)
-; sample_internal();
-; }
-; callback() {
-; f = pid_for_task(d);
-; sample(&f);
-; }
-
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "arm64-apple-ios"
-
-%struct.context = type { i32 }
-
-@c = common global i32 0, align 4
-@a = common global i32 0, align 4
-@b = common global i32 0, align 4
-@d = common global i32 0, align 4
-@f = common global i32 0, align 4
-
-; Function Attrs: nounwind
-declare i32 @sample_internal(...)
-
-; Function Attrs: nounwind
-define i32 @callback() #0 {
-entry:
- %0 = load i32, i32* @d, align 4, !dbg !37
-
- ; Verify that the call still has a debug location after GVN.
- ; CHECK: %call = tail call i32 @pid_for_task(i32 %0) #{{[0-9]}}, !dbg
- %call = tail call i32 @pid_for_task(i32 %0) #3, !dbg !37
-
- store i32 %call, i32* @f, align 4, !dbg !37
- tail call void @llvm.dbg.value(metadata %struct.context* bitcast (i32* @f to %struct.context*), i64 0, metadata !25, metadata !26) #3, !dbg !38
- %1 = load i32, i32* @c, align 4, !dbg !40
- %tobool.i = icmp eq i32 %1, 0, !dbg !40
- %.pr.i = load i32, i32* @a, align 4, !dbg !41
- br i1 %tobool.i, label %if.end.i, label %if.then.i, !dbg !42
-
-if.then.i: ; preds = %entry
- store i32 %.pr.i, i32* @b, align 4, !dbg !43
- br label %if.end.i, !dbg !43
-
-if.end.i: ; preds = %if.then.i, %entry
- %tobool1.i = icmp eq i32 %.pr.i, 0, !dbg !41
-
- ; This instruction has no debug location -- in this
- ; particular case it was removed by a bug in SimplifyCFG.
- %2 = load i32, i32* @f, align 4
-
- ; GVN is supposed to replace the load of @f with a direct reference to %call.
- ; CHECK: %tobool2.i = icmp eq i32 %call, 0, !dbg
- %tobool2.i = icmp eq i32 %2, 0, !dbg !41
-
- %or.cond = or i1 %tobool1.i, %tobool2.i, !dbg !41
- br i1 %or.cond, label %sample.exit, label %if.then.3.i, !dbg !41
-
-if.then.3.i: ; preds = %if.end.i
- %call.i = tail call i32 bitcast (i32 (...)* @sample_internal to i32 ()*)() #3, !dbg !44
- br label %sample.exit, !dbg !44
-
-sample.exit: ; preds = %if.end.i, %if.then.3.i
- ret i32 undef, !dbg !45
-}
-
-declare i32 @pid_for_task(i32) #1
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
-
-attributes #0 = { nounwind }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!22, !23}
-!llvm.ident = !{!24}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 244473) (llvm/trunk 244644)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !16)
-!1 = !DIFile(filename: "test.c", directory: "/")
-!2 = !{}
-!3 = !{!4, !13}
-!4 = !DISubprogram(name: "sample", scope: !5, file: !5, line: 6, type: !6, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
-!5 = !DIFile(filename: "test.i", directory: "/")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8, !9}
-!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 64)
-!10 = !DICompositeType(tag: DW_TAG_structure_type, name: "context", file: !5, line: 1, size: 32, align: 32, elements: !11)
-!11 = !{!12}
-!12 = !DIDerivedType(tag: DW_TAG_member, name: "cur_pid", scope: !10, file: !5, line: 2, baseType: !8, size: 32, align: 32)
-!13 = !DISubprogram(name: "callback", scope: !5, file: !5, line: 13, type: !14, isLocal: false, isDefinition: true, scopeLine: 13, isOptimized: false, function: i32 ()* @callback, variables: !2)
-!14 = !DISubroutineType(types: !15)
-!15 = !{!8}
-!16 = !{!17, !18, !19, !20, !21}
-!17 = !DIGlobalVariable(name: "a", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @a)
-!18 = !DIGlobalVariable(name: "b", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @b)
-!19 = !DIGlobalVariable(name: "c", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @c)
-!20 = !DIGlobalVariable(name: "f", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @f)
-!21 = !DIGlobalVariable(name: "d", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @d)
-!22 = !{i32 2, !"Dwarf Version", i32 2}
-!23 = !{i32 2, !"Debug Info Version", i32 3}
-!24 = !{!"clang version 3.8.0 (trunk 244473) (llvm/trunk 244644)"}
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "p1", arg: 1, scope: !4, file: !5, line: 6, type: !9)
-!26 = !DIExpression()
-!27 = !DILocation(line: 6, scope: !4)
-!28 = !DILocation(line: 8, scope: !29)
-!29 = distinct !DILexicalBlock(scope: !4, file: !5, line: 8)
-!30 = !DILocation(line: 10, scope: !31)
-!31 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10)
-!32 = !DILocation(line: 8, scope: !4)
-!33 = !DILocation(line: 9, scope: !29)
-!34 = !DILocation(line: 10, scope: !4)
-!35 = !DILocation(line: 11, scope: !31)
-!36 = !DILocation(line: 12, scope: !4)
-!37 = !DILocation(line: 14, scope: !13)
-!38 = !DILocation(line: 6, scope: !4, inlinedAt: !39)
-!39 = distinct !DILocation(line: 15, scope: !13)
-!40 = !DILocation(line: 8, scope: !29, inlinedAt: !39)
-!41 = !DILocation(line: 10, scope: !31, inlinedAt: !39)
-!42 = !DILocation(line: 8, scope: !4, inlinedAt: !39)
-!43 = !DILocation(line: 9, scope: !29, inlinedAt: !39)
-!44 = !DILocation(line: 11, scope: !31, inlinedAt: !39)
-!45 = !DILocation(line: 16, scope: !13)
diff --git a/test/DebugInfo/incorrect-variable-debugloc.ll b/test/DebugInfo/incorrect-variable-debugloc.ll
deleted file mode 100644
index 930f8c92cf65..000000000000
--- a/test/DebugInfo/incorrect-variable-debugloc.ll
+++ /dev/null
@@ -1,391 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O2 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; This is a test case that's as reduced as I can get it, though I haven't fully
-; understood the mechanisms by which this bug occurs, so perhaps there's further
-; simplification to be had (it's certainly a bit non-obvious what's going on). I
-; hesitate to hand-craft or otherwise simplify the IR compared to what Clang
-; generates as this is a particular tickling of optimizations and debug location
-; propagation I want a realistic example of.
-
-; Generated with clang-tot -cc1 -g -O2 -w -std=c++11 -fsanitize=address,use-after-return -fcxx-exceptions -fexceptions -x c++ incorrect-variable-debug-loc.cpp -emit-llvm
-
-; struct A {
-; int m_fn1();
-; };
-;
-; struct B {
-; void __attribute__((always_inline)) m_fn2() { i = 0; }
-; int i;
-; };
-;
-; struct C {
-; void m_fn3();
-; int j;
-; B b;
-; };
-;
-; int fn1() {
-; C A;
-; A.b.m_fn2();
-; A.m_fn3();
-; }
-; void C::m_fn3() {
-; A().m_fn1();
-; b.m_fn2();
-; }
-
-; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name {{.*}} "C"
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "m_fn3"
-
-; CHECK: DW_AT_specification {{.*}} "_ZN1C5m_fn3Ev"
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "this"
-
-%struct.C = type { i32, %struct.B }
-%struct.B = type { i32 }
-%struct.A = type { i8 }
-
-@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 1, void ()* @asan.module_ctor }]
-@__asan_option_detect_stack_use_after_return = external global i32
-@__asan_gen_ = private unnamed_addr constant [11 x i8] c"1 32 8 1 A\00", align 1
-@__asan_gen_1 = private unnamed_addr constant [13 x i8] c"1 32 1 3 tmp\00", align 1
-
-; Function Attrs: noreturn sanitize_address
-define i32 @_Z3fn1v() #0 {
-entry:
- %MyAlloca = alloca [64 x i8], align 32, !dbg !39
- %0 = ptrtoint [64 x i8]* %MyAlloca to i64, !dbg !39
- %1 = load i32, i32* @__asan_option_detect_stack_use_after_return, !dbg !39
- %2 = icmp ne i32 %1, 0, !dbg !39
- br i1 %2, label %3, label %5
-
-; <label>:3 ; preds = %entry
- %4 = call i64 @__asan_stack_malloc_0(i64 64, i64 %0), !dbg !39
- br label %5
-
-; <label>:5 ; preds = %entry, %3
- %6 = phi i64 [ %0, %entry ], [ %4, %3 ], !dbg !39
- %7 = add i64 %6, 32, !dbg !39
- %8 = inttoptr i64 %7 to %struct.C*, !dbg !39
- %9 = inttoptr i64 %6 to i64*, !dbg !39
- store i64 1102416563, i64* %9, !dbg !39
- %10 = add i64 %6, 8, !dbg !39
- %11 = inttoptr i64 %10 to i64*, !dbg !39
- store i64 ptrtoint ([11 x i8]* @__asan_gen_ to i64), i64* %11, !dbg !39
- %12 = add i64 %6, 16, !dbg !39
- %13 = inttoptr i64 %12 to i64*, !dbg !39
- store i64 ptrtoint (i32 ()* @_Z3fn1v to i64), i64* %13, !dbg !39
- %14 = lshr i64 %6, 3, !dbg !39
- %15 = add i64 %14, 2147450880, !dbg !39
- %16 = add i64 %15, 0, !dbg !39
- %17 = inttoptr i64 %16 to i64*, !dbg !39
- store i64 -868083117767659023, i64* %17, !dbg !39
- %i.i = getelementptr inbounds %struct.C, %struct.C* %8, i64 0, i32 1, i32 0, !dbg !39
- %18 = ptrtoint i32* %i.i to i64, !dbg !39
- %19 = lshr i64 %18, 3, !dbg !39
- %20 = add i64 %19, 2147450880, !dbg !39
- %21 = inttoptr i64 %20 to i8*, !dbg !39
- %22 = load i8, i8* %21, !dbg !39
- %23 = icmp ne i8 %22, 0, !dbg !39
- br i1 %23, label %24, label %30, !dbg !39
-
-; <label>:24 ; preds = %5
- %25 = and i64 %18, 7, !dbg !39
- %26 = add i64 %25, 3, !dbg !39
- %27 = trunc i64 %26 to i8, !dbg !39
- %28 = icmp sge i8 %27, %22, !dbg !39
- br i1 %28, label %29, label %30
-
-; <label>:29 ; preds = %24
- call void @__asan_report_store4(i64 %18), !dbg !39
- call void asm sideeffect "", ""()
- unreachable
-
-; <label>:30 ; preds = %24, %5
- store i32 0, i32* %i.i, align 4, !dbg !39, !tbaa !41
- tail call void @llvm.dbg.value(metadata %struct.C* %8, i64 0, metadata !27, metadata !DIExpression()), !dbg !46
- call void @_ZN1C5m_fn3Ev(%struct.C* %8), !dbg !47
- unreachable, !dbg !47
-}
-
-; Function Attrs: sanitize_address
-define void @_ZN1C5m_fn3Ev(%struct.C* nocapture %this) #1 align 2 {
-entry:
- %MyAlloca = alloca [64 x i8], align 32, !dbg !48
- %0 = ptrtoint [64 x i8]* %MyAlloca to i64, !dbg !48
- %1 = load i32, i32* @__asan_option_detect_stack_use_after_return, !dbg !48
- %2 = icmp ne i32 %1, 0, !dbg !48
- br i1 %2, label %3, label %5
-
-; <label>:3 ; preds = %entry
- %4 = call i64 @__asan_stack_malloc_0(i64 64, i64 %0), !dbg !48
- br label %5
-
-; <label>:5 ; preds = %entry, %3
- %6 = phi i64 [ %0, %entry ], [ %4, %3 ], !dbg !48
- %7 = add i64 %6, 32, !dbg !48
- %8 = inttoptr i64 %7 to %struct.A*, !dbg !48
- %9 = inttoptr i64 %6 to i64*, !dbg !48
- store i64 1102416563, i64* %9, !dbg !48
- %10 = add i64 %6, 8, !dbg !48
- %11 = inttoptr i64 %10 to i64*, !dbg !48
- store i64 ptrtoint ([13 x i8]* @__asan_gen_1 to i64), i64* %11, !dbg !48
- %12 = add i64 %6, 16, !dbg !48
- %13 = inttoptr i64 %12 to i64*, !dbg !48
- store i64 ptrtoint (void (%struct.C*)* @_ZN1C5m_fn3Ev to i64), i64* %13, !dbg !48
- %14 = lshr i64 %6, 3, !dbg !48
- %15 = add i64 %14, 2147450880, !dbg !48
- %16 = add i64 %15, 0, !dbg !48
- %17 = inttoptr i64 %16 to i64*, !dbg !48
- store i64 -868083113472691727, i64* %17, !dbg !48
- tail call void @llvm.dbg.value(metadata %struct.C* %this, i64 0, metadata !30, metadata !DIExpression()), !dbg !48
- %call = call i32 @_ZN1A5m_fn1Ev(%struct.A* %8), !dbg !49
- %i.i = getelementptr inbounds %struct.C, %struct.C* %this, i64 0, i32 1, i32 0, !dbg !50
- %18 = ptrtoint i32* %i.i to i64, !dbg !50
- %19 = lshr i64 %18, 3, !dbg !50
- %20 = add i64 %19, 2147450880, !dbg !50
- %21 = inttoptr i64 %20 to i8*, !dbg !50
- %22 = load i8, i8* %21, !dbg !50
- %23 = icmp ne i8 %22, 0, !dbg !50
- br i1 %23, label %24, label %30, !dbg !50
-
-; <label>:24 ; preds = %5
- %25 = and i64 %18, 7, !dbg !50
- %26 = add i64 %25, 3, !dbg !50
- %27 = trunc i64 %26 to i8, !dbg !50
- %28 = icmp sge i8 %27, %22, !dbg !50
- br i1 %28, label %29, label %30
-
-; <label>:29 ; preds = %24
- call void @__asan_report_store4(i64 %18), !dbg !50
- call void asm sideeffect "", ""()
- unreachable
-
-; <label>:30 ; preds = %24, %5
- store i32 0, i32* %i.i, align 4, !dbg !50, !tbaa !41
- store i64 1172321806, i64* %9, !dbg !52
- %31 = icmp ne i64 %6, %0, !dbg !52
- br i1 %31, label %32, label %39, !dbg !52
-
-; <label>:32 ; preds = %30
- %33 = add i64 %15, 0, !dbg !52
- %34 = inttoptr i64 %33 to i64*, !dbg !52
- store i64 -723401728380766731, i64* %34, !dbg !52
- %35 = add i64 %6, 56, !dbg !52
- %36 = inttoptr i64 %35 to i64*, !dbg !52
- %37 = load i64, i64* %36, !dbg !52
- %38 = inttoptr i64 %37 to i8*, !dbg !52
- store i8 0, i8* %38, !dbg !52
- br label %42, !dbg !52
-
-; <label>:39 ; preds = %30
- %40 = add i64 %15, 0, !dbg !52
- %41 = inttoptr i64 %40 to i64*, !dbg !52
- store i64 0, i64* %41, !dbg !52
- br label %42, !dbg !52
-
-; <label>:42 ; preds = %39, %32
- ret void, !dbg !52
-}
-
-declare i32 @_ZN1A5m_fn1Ev(%struct.A*) #2
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
-
-define internal void @asan.module_ctor() {
- tail call void @__asan_init_v3()
- ret void
-}
-
-declare void @__asan_init_v3()
-
-declare void @__asan_report_load1(i64)
-
-declare void @__asan_load1(i64)
-
-declare void @__asan_report_load2(i64)
-
-declare void @__asan_load2(i64)
-
-declare void @__asan_report_load4(i64)
-
-declare void @__asan_load4(i64)
-
-declare void @__asan_report_load8(i64)
-
-declare void @__asan_load8(i64)
-
-declare void @__asan_report_load16(i64)
-
-declare void @__asan_load16(i64)
-
-declare void @__asan_report_store1(i64)
-
-declare void @__asan_store1(i64)
-
-declare void @__asan_report_store2(i64)
-
-declare void @__asan_store2(i64)
-
-declare void @__asan_report_store4(i64)
-
-declare void @__asan_store4(i64)
-
-declare void @__asan_report_store8(i64)
-
-declare void @__asan_store8(i64)
-
-declare void @__asan_report_store16(i64)
-
-declare void @__asan_store16(i64)
-
-declare void @__asan_report_load_n(i64, i64)
-
-declare void @__asan_report_store_n(i64, i64)
-
-declare void @__asan_loadN(i64, i64)
-
-declare void @__asan_storeN(i64, i64)
-
-declare i8* @__asan_memmove(i8*, i8*, i64)
-
-declare i8* @__asan_memcpy(i8*, i8*, i64)
-
-declare i8* @__asan_memset(i8*, i32, i64)
-
-declare void @__asan_handle_no_return()
-
-declare void @__sanitizer_cov()
-
-declare void @__sanitizer_ptr_cmp(i64, i64)
-
-declare void @__sanitizer_ptr_sub(i64, i64)
-
-declare i64 @__asan_stack_malloc_0(i64, i64)
-
-declare void @__asan_stack_free_0(i64, i64, i64)
-
-declare i64 @__asan_stack_malloc_1(i64, i64)
-
-declare void @__asan_stack_free_1(i64, i64, i64)
-
-declare i64 @__asan_stack_malloc_2(i64, i64)
-
-declare void @__asan_stack_free_2(i64, i64, i64)
-
-declare i64 @__asan_stack_malloc_3(i64, i64)
-
-declare void @__asan_stack_free_3(i64, i64, i64)
-
-declare i64 @__asan_stack_malloc_4(i64, i64)
-
-declare void @__asan_stack_free_4(i64, i64, i64)
-
-declare i64 @__asan_stack_malloc_5(i64, i64)
-
-declare void @__asan_stack_free_5(i64, i64, i64)
-
-declare i64 @__asan_stack_malloc_6(i64, i64)
-
-declare void @__asan_stack_free_6(i64, i64, i64)
-
-declare i64 @__asan_stack_malloc_7(i64, i64)
-
-declare void @__asan_stack_free_7(i64, i64, i64)
-
-declare i64 @__asan_stack_malloc_8(i64, i64)
-
-declare void @__asan_stack_free_8(i64, i64, i64)
-
-declare i64 @__asan_stack_malloc_9(i64, i64)
-
-declare void @__asan_stack_free_9(i64, i64, i64)
-
-declare i64 @__asan_stack_malloc_10(i64, i64)
-
-declare void @__asan_stack_free_10(i64, i64, i64)
-
-declare void @__asan_poison_stack_memory(i64, i64)
-
-declare void @__asan_unpoison_stack_memory(i64, i64)
-
-declare void @__asan_before_dynamic_init(i64)
-
-declare void @__asan_after_dynamic_init()
-
-declare void @__asan_register_globals(i64, i64)
-
-declare void @__asan_unregister_globals(i64, i64)
-
-declare void @__sanitizer_cov_module_init(i64)
-
-attributes #0 = { noreturn sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!36, !37}
-!llvm.ident = !{!38}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !21, globals: !2, imports: !2)
-!1 = !DIFile(filename: "<stdin>", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4, !14}
-!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 10, size: 64, align: 32, file: !5, elements: !6, identifier: "_ZTS1C")
-!5 = !DIFile(filename: "incorrect-variable-debug-loc.cpp", directory: "/tmp/dbginfo")
-!6 = !{!7, !9, !10}
-!7 = !DIDerivedType(tag: DW_TAG_member, name: "j", line: 12, size: 32, align: 32, file: !5, scope: !"_ZTS1C", baseType: !8)
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 13, size: 32, align: 32, offset: 32, file: !5, scope: !"_ZTS1C", baseType: !"_ZTS1B")
-!10 = !DISubprogram(name: "m_fn3", linkageName: "_ZN1C5m_fn3Ev", line: 11, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !5, scope: !"_ZTS1C", type: !11)
-!11 = !DISubroutineType(types: !12)
-!12 = !{null, !13}
-!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1C")
-!14 = !DICompositeType(tag: DW_TAG_structure_type, name: "B", line: 5, size: 32, align: 32, file: !5, elements: !15, identifier: "_ZTS1B")
-!15 = !{!16, !17}
-!16 = !DIDerivedType(tag: DW_TAG_member, name: "i", line: 7, size: 32, align: 32, file: !5, scope: !"_ZTS1B", baseType: !8)
-!17 = !DISubprogram(name: "m_fn2", linkageName: "_ZN1B5m_fn2Ev", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !5, scope: !"_ZTS1B", type: !18)
-!18 = !DISubroutineType(types: !19)
-!19 = !{null, !20}
-!20 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1B")
-!21 = !{!22, !28, !32}
-!22 = !DISubprogram(name: "fn1", linkageName: "_Z3fn1v", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !5, scope: !23, type: !24, function: i32 ()* @_Z3fn1v, variables: !26)
-!23 = !DIFile(filename: "incorrect-variable-debug-loc.cpp", directory: "/tmp/dbginfo")
-!24 = !DISubroutineType(types: !25)
-!25 = !{!8}
-!26 = !{!27}
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "A", line: 17, scope: !22, file: !23, type: !"_ZTS1C")
-!28 = !DISubprogram(name: "m_fn3", linkageName: "_ZN1C5m_fn3Ev", line: 21, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 21, file: !5, scope: !"_ZTS1C", type: !11, function: void (%struct.C*)* @_ZN1C5m_fn3Ev, declaration: !10, variables: !29)
-!29 = !{!30}
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
-!31 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1C")
-!32 = !DISubprogram(name: "m_fn2", linkageName: "_ZN1B5m_fn2Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !5, scope: !"_ZTS1B", type: !18, declaration: !17, variables: !33)
-!33 = !{!34}
-!34 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !32, type: !35)
-!35 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1B")
-!36 = !{i32 2, !"Dwarf Version", i32 4}
-!37 = !{i32 2, !"Debug Info Version", i32 3}
-!38 = !{!"clang version 3.5.0 "}
-!39 = !DILocation(line: 6, scope: !32, inlinedAt: !40)
-!40 = !DILocation(line: 18, scope: !22)
-!41 = !{!42, !43, i64 0}
-!42 = !{!"_ZTS1B", !43, i64 0}
-!43 = !{!"int", !44, i64 0}
-!44 = !{!"omnipotent char", !45, i64 0}
-!45 = !{!"Simple C/C++ TBAA"}
-!46 = !DILocation(line: 17, scope: !22)
-!47 = !DILocation(line: 19, scope: !22)
-!48 = !DILocation(line: 0, scope: !28)
-!49 = !DILocation(line: 22, scope: !28)
-!50 = !DILocation(line: 6, scope: !32, inlinedAt: !51)
-!51 = !DILocation(line: 23, scope: !28)
-!52 = !DILocation(line: 24, scope: !28)
diff --git a/test/DebugInfo/incorrect-variable-debugloc1.ll b/test/DebugInfo/incorrect-variable-debugloc1.ll
deleted file mode 100644
index 3ece94ac8884..000000000000
--- a/test/DebugInfo/incorrect-variable-debugloc1.ll
+++ /dev/null
@@ -1,77 +0,0 @@
-; REQUIRES: object-emission
-; This test is failing for powerpc64, because a location list for the
-; variable 'c' is not generated at all. Temporary marking this test as XFAIL
-; for powerpc, until PR21881 is fixed.
-; XFAIL: powerpc64
-
-; RUN: %llc_dwarf -O2 -dwarf-version 2 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF23
-; RUN: %llc_dwarf -O2 -dwarf-version 3 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF23
-; RUN: %llc_dwarf -O2 -dwarf-version 4 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF4
-
-; This is a test for PR21176.
-; DW_OP_const <const> doesn't describe a constant value, but a value at a constant address.
-; The proper way to describe a constant value is DW_OP_constu <const>, DW_OP_stack_value.
-
-; Generated with clang -S -emit-llvm -g -O2 test.cpp
-
-; extern int func();
-;
-; int main()
-; {
-; volatile int c = 13;
-; c = func();
-; return c;
-; }
-
-; DWARF23: Location description: 10 0d {{$}}
-; DWARF4: Location description: 10 0d 9f
-
-; Function Attrs: uwtable
-define i32 @main() #0 {
-entry:
- %c = alloca i32, align 4
- tail call void @llvm.dbg.value(metadata i32 13, i64 0, metadata !10, metadata !16), !dbg !17
- store volatile i32 13, i32* %c, align 4, !dbg !18
- %call = tail call i32 @_Z4funcv(), !dbg !19
- tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !10, metadata !16), !dbg !17
- store volatile i32 %call, i32* %c, align 4, !dbg !19
- tail call void @llvm.dbg.value(metadata i32* %c, i64 0, metadata !10, metadata !16), !dbg !17
- %c.0.c.0. = load volatile i32, i32* %c, align 4, !dbg !20
- ret i32 %c.0.c.0., !dbg !20
-}
-
-declare i32 @_Z4funcv() #1
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!12, !13}
-!llvm.ident = !{!14}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 223522)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "test.cpp", directory: "/home/kromanova/ngh/ToT_latest/llvm/test/DebugInfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !9)
-!5 = !DIFile(filename: "test.cpp", directory: "/home/kromanova/ngh/ToT_latest/llvm/test/DebugInfo")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 5, scope: !4, file: !5, type: !11)
-!11 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !8)
-!12 = !{i32 2, !"Dwarf Version", i32 2}
-!13 = !{i32 2, !"Debug Info Version", i32 3}
-!14 = !{!"clang version 3.6.0 (trunk 223522)"}
-!15 = !{i32 13}
-!16 = !DIExpression()
-!17 = !DILocation(line: 5, column: 16, scope: !4)
-!18 = !DILocation(line: 5, column: 3, scope: !4)
-!19 = !DILocation(line: 6, column: 7, scope: !4)
-!20 = !DILocation(line: 7, column: 3, scope: !4)
-
diff --git a/test/DebugInfo/inheritance.ll b/test/DebugInfo/inheritance.ll
deleted file mode 100644
index 8a29d2e576a0..000000000000
--- a/test/DebugInfo/inheritance.ll
+++ /dev/null
@@ -1,154 +0,0 @@
-; RUN: llc %s -o /dev/null
-; PR 2613.
-
-%struct.__class_type_info_pseudo = type { %struct.__type_info_pseudo }
-%struct.__type_info_pseudo = type { i8*, i8* }
-%struct.test1 = type { i32 (...)** }
-
-@_ZTV5test1 = weak_odr constant [4 x i32 (...)*] [i32 (...)* null, i32 (...)* bitcast (%struct.__class_type_info_pseudo* @_ZTI5test1 to i32 (...)*), i32 (...)* bitcast (void (%struct.test1*)* @_ZN5test1D1Ev to i32 (...)*), i32 (...)* bitcast (void (%struct.test1*)* @_ZN5test1D0Ev to i32 (...)*)], align 32 ; <[4 x i32 (...)*]*> [#uses=1]
-@_ZTI5test1 = weak_odr constant %struct.__class_type_info_pseudo { %struct.__type_info_pseudo { i8* inttoptr (i64 add (i64 ptrtoint ([0 x i32 (...)*]* @_ZTVN10__cxxabiv117__class_type_infoE to i64), i64 16) to i8*), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @_ZTS5test1, i64 0, i64 0) } }, align 16 ; <%struct.__class_type_info_pseudo*> [#uses=1]
-@_ZTVN10__cxxabiv117__class_type_infoE = external constant [0 x i32 (...)*] ; <[0 x i32 (...)*]*> [#uses=1]
-@_ZTS5test1 = weak_odr constant [7 x i8] c"5test1\00" ; <[7 x i8]*> [#uses=2]
-
-define i32 @main() nounwind ssp {
-entry:
- %retval = alloca i32 ; <i32*> [#uses=2]
- %0 = alloca i32 ; <i32*> [#uses=2]
- %tst = alloca %struct.test1 ; <%struct.test1*> [#uses=1]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.declare(metadata %struct.test1* %tst, metadata !0, metadata !DIExpression()), !dbg !21
- call void @_ZN5test1C1Ev(%struct.test1* %tst) nounwind, !dbg !22
- store i32 0, i32* %0, align 4, !dbg !23
- %1 = load i32, i32* %0, align 4, !dbg !23 ; <i32> [#uses=1]
- store i32 %1, i32* %retval, align 4, !dbg !23
- br label %return, !dbg !23
-
-return: ; preds = %entry
- %retval1 = load i32, i32* %retval, !dbg !23 ; <i32> [#uses=1]
- ret i32 %retval1, !dbg !23
-}
-
-define linkonce_odr void @_ZN5test1C1Ev(%struct.test1* %this) nounwind ssp align 2 {
-entry:
- %this_addr = alloca %struct.test1* ; <%struct.test1**> [#uses=2]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.declare(metadata %struct.test1** %this_addr, metadata !24, metadata !DIExpression()), !dbg !28
- store %struct.test1* %this, %struct.test1** %this_addr
- %0 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !28 ; <%struct.test1*> [#uses=1]
- %1 = getelementptr inbounds %struct.test1, %struct.test1* %0, i32 0, i32 0, !dbg !28 ; <i32 (...)***> [#uses=1]
- store i32 (...)** getelementptr inbounds ([4 x i32 (...)*], [4 x i32 (...)*]* @_ZTV5test1, i64 0, i64 2), i32 (...)*** %1, align 8, !dbg !28
- br label %return, !dbg !28
-
-return: ; preds = %entry
- ret void, !dbg !29
-}
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-
-define linkonce_odr void @_ZN5test1D1Ev(%struct.test1* %this) nounwind ssp align 2 {
-entry:
- %this_addr = alloca %struct.test1* ; <%struct.test1**> [#uses=3]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.declare(metadata %struct.test1** %this_addr, metadata !32, metadata !DIExpression()), !dbg !34
- store %struct.test1* %this, %struct.test1** %this_addr
- %0 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !35 ; <%struct.test1*> [#uses=1]
- %1 = getelementptr inbounds %struct.test1, %struct.test1* %0, i32 0, i32 0, !dbg !35 ; <i32 (...)***> [#uses=1]
- store i32 (...)** getelementptr inbounds ([4 x i32 (...)*], [4 x i32 (...)*]* @_ZTV5test1, i64 0, i64 2), i32 (...)*** %1, align 8, !dbg !35
- br label %bb, !dbg !37
-
-bb: ; preds = %entry
- %2 = trunc i32 0 to i8, !dbg !37 ; <i8> [#uses=1]
- %toBool = icmp ne i8 %2, 0, !dbg !37 ; <i1> [#uses=1]
- br i1 %toBool, label %bb1, label %bb2, !dbg !37
-
-bb1: ; preds = %bb
- %3 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !37 ; <%struct.test1*> [#uses=1]
- %4 = bitcast %struct.test1* %3 to i8*, !dbg !37 ; <i8*> [#uses=1]
- call void @_ZdlPv(i8* %4) nounwind, !dbg !37
- br label %bb2, !dbg !37
-
-bb2: ; preds = %bb1, %bb
- br label %return, !dbg !37
-
-return: ; preds = %bb2
- ret void, !dbg !37
-}
-
-define linkonce_odr void @_ZN5test1D0Ev(%struct.test1* %this) nounwind ssp align 2 {
-entry:
- %this_addr = alloca %struct.test1* ; <%struct.test1**> [#uses=3]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.declare(metadata %struct.test1** %this_addr, metadata !38, metadata !DIExpression()), !dbg !40
- store %struct.test1* %this, %struct.test1** %this_addr
- %0 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !41 ; <%struct.test1*> [#uses=1]
- %1 = getelementptr inbounds %struct.test1, %struct.test1* %0, i32 0, i32 0, !dbg !41 ; <i32 (...)***> [#uses=1]
- store i32 (...)** getelementptr inbounds ([4 x i32 (...)*], [4 x i32 (...)*]* @_ZTV5test1, i64 0, i64 2), i32 (...)*** %1, align 8, !dbg !41
- br label %bb, !dbg !43
-
-bb: ; preds = %entry
- %2 = trunc i32 1 to i8, !dbg !43 ; <i8> [#uses=1]
- %toBool = icmp ne i8 %2, 0, !dbg !43 ; <i1> [#uses=1]
- br i1 %toBool, label %bb1, label %bb2, !dbg !43
-
-bb1: ; preds = %bb
- %3 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !43 ; <%struct.test1*> [#uses=1]
- %4 = bitcast %struct.test1* %3 to i8*, !dbg !43 ; <i8*> [#uses=1]
- call void @_ZdlPv(i8* %4) nounwind, !dbg !43
- br label %bb2, !dbg !43
-
-bb2: ; preds = %bb1, %bb
- br label %return, !dbg !43
-
-return: ; preds = %bb2
- ret void, !dbg !43
-}
-
-declare void @_ZdlPv(i8*) nounwind
-
-!0 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tst", line: 13, scope: !1, file: !4, type: !8)
-!1 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !2)
-!2 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !3)
-!3 = !DISubprogram(name: "main", linkageName: "main", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !4, type: !5)
-!4 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !44, enums: !45, retainedTypes: !45)
-!5 = !DISubroutineType(types: !6)
-!6 = !{!7}
-!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "test1", line: 1, size: 64, align: 64, file: !44, scope: !4, elements: !9, vtableHolder: !8)
-!9 = !{!10, !14, !18}
-!10 = !DIDerivedType(tag: DW_TAG_member, name: "_vptr$test1", line: 1, size: 64, align: 64, file: !44, scope: !8, baseType: !11)
-!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !4, baseType: !12)
-!12 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "__vtbl_ptr_type", scope: !4, baseType: !5)
-!13 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !46, enums: !45, retainedTypes: !45)
-!14 = !DISubprogram(name: "test1", line: 1, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrivate, isOptimized: false, scope: !8, type: !15)
-!15 = !DISubroutineType(types: !16)
-!16 = !{null, !17}
-!17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !4, baseType: !8)
-!18 = !DISubprogram(name: "~test1", line: 4, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, isOptimized: false, scope: !8, type: !19, containingType: !8)
-!19 = !DISubroutineType(types: !20)
-!20 = !{null, !17, !7}
-!21 = !DILocation(line: 11, scope: !1)
-!22 = !DILocation(line: 13, scope: !1)
-!23 = !DILocation(line: 14, scope: !1)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 13, arg: 0, scope: !25, file: !4, type: !26)
-!25 = !DISubprogram(name: "test1", linkageName: "_ZN5test1C1Ev", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !4, type: !15)
-!26 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !4, baseType: !27)
-!27 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !4, baseType: !8)
-!28 = !DILocation(line: 1, scope: !25)
-!29 = !DILocation(line: 1, scope: !30)
-!30 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !31)
-!31 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !25)
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 4, arg: 0, scope: !33, file: !4, type: !26)
-!33 = !DISubprogram(name: "~test1", linkageName: "_ZN5test1D1Ev", line: 4, isLocal: false, isDefinition: true, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, isOptimized: false, scope: !8, type: !15, containingType: !8)
-!34 = !DILocation(line: 4, scope: !33)
-!35 = !DILocation(line: 5, scope: !36)
-!36 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !33)
-!37 = !DILocation(line: 6, scope: !36)
-!38 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 4, arg: 0, scope: !39, file: !4, type: !26)
-!39 = !DISubprogram(name: "~test1", linkageName: "_ZN5test1D0Ev", line: 4, isLocal: false, isDefinition: true, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, isOptimized: false, scope: !8, type: !15, containingType: !8)
-!40 = !DILocation(line: 4, scope: !39)
-!41 = !DILocation(line: 5, scope: !42)
-!42 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !39)
-!43 = !DILocation(line: 6, scope: !42)
-!44 = !DIFile(filename: "inheritance.cpp", directory: "/tmp/")
-!45 = !{i32 0}
-!46 = !DIFile(filename: "<built-in>", directory: "/tmp/")
diff --git a/test/DebugInfo/inline-debug-info-multiret.ll b/test/DebugInfo/inline-debug-info-multiret.ll
deleted file mode 100644
index d86e6abbd80b..000000000000
--- a/test/DebugInfo/inline-debug-info-multiret.ll
+++ /dev/null
@@ -1,156 +0,0 @@
-; RUN: opt -inline -S < %s | FileCheck %s
-;
-; A hand-edited version of inline-debug-info.ll to test inlining of a
-; function with multiple returns.
-;
-; Make sure the branch instructions created during inlining has a debug location,
-; so the range of the inlined function is correct.
-; CHECK: br label %_Z4testi.exit, !dbg ![[MD:[0-9]+]]
-; CHECK: br label %_Z4testi.exit, !dbg ![[MD]]
-; CHECK: br label %invoke.cont, !dbg ![[MD]]
-; The branch instruction has the source location of line 9 and its inlined location
-; has the source location of line 14.
-; CHECK: ![[INL:[0-9]+]] = distinct !DILocation(line: 14, scope: {{.*}})
-; CHECK: ![[MD]] = !DILocation(line: 9, scope: {{.*}}, inlinedAt: ![[INL]])
-
-; ModuleID = 'test.cpp'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-darwin12.0.0"
-
-@_ZTIi = external constant i8*
-@global_var = external global i32
-
-; copy of above function with multiple returns
-define i32 @_Z4testi(i32 %k) {
-entry:
- %retval = alloca i32, align 4
- %k.addr = alloca i32, align 4
- %k2 = alloca i32, align 4
- store i32 %k, i32* %k.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %k.addr, metadata !13, metadata !DIExpression()), !dbg !14
- call void @llvm.dbg.declare(metadata i32* %k2, metadata !15, metadata !DIExpression()), !dbg !16
- %0 = load i32, i32* %k.addr, align 4, !dbg !16
- %call = call i32 @_Z8test_exti(i32 %0), !dbg !16
- store i32 %call, i32* %k2, align 4, !dbg !16
- %1 = load i32, i32* %k2, align 4, !dbg !17
- %cmp = icmp sgt i32 %1, 100, !dbg !17
- br i1 %cmp, label %if.then, label %if.end, !dbg !17
-
-if.then: ; preds = %entry
- %2 = load i32, i32* %k2, align 4, !dbg !18
- store i32 %2, i32* %retval, !dbg !18
- br label %return, !dbg !18
-
-if.end: ; preds = %entry
- store i32 0, i32* %retval, !dbg !19
- %3 = load i32, i32* %retval, !dbg !20 ; hand-edited
- ret i32 %3, !dbg !20 ; hand-edited
-
-return: ; preds = %if.end, %if.then
- %4 = load i32, i32* %retval, !dbg !20
- ret i32 %4, !dbg !20
-}
-
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-declare i32 @_Z8test_exti(i32)
-
-define i32 @_Z5test2v() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-entry:
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- %e = alloca i32, align 4
- %0 = load i32, i32* @global_var, align 4, !dbg !21
- %call = invoke i32 @_Z4testi(i32 %0)
- to label %invoke.cont unwind label %lpad, !dbg !21
-
-invoke.cont: ; preds = %entry
- br label %try.cont, !dbg !23
-
-lpad: ; preds = %entry
- %1 = landingpad { i8*, i32 }
- catch i8* bitcast (i8** @_ZTIi to i8*), !dbg !21
- %2 = extractvalue { i8*, i32 } %1, 0, !dbg !21
- store i8* %2, i8** %exn.slot, !dbg !21
- %3 = extractvalue { i8*, i32 } %1, 1, !dbg !21
- store i32 %3, i32* %ehselector.slot, !dbg !21
- br label %catch.dispatch, !dbg !21
-
-catch.dispatch: ; preds = %lpad
- %sel = load i32, i32* %ehselector.slot, !dbg !23
- %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2, !dbg !23
- %matches = icmp eq i32 %sel, %4, !dbg !23
- br i1 %matches, label %catch, label %eh.resume, !dbg !23
-
-catch: ; preds = %catch.dispatch
- call void @llvm.dbg.declare(metadata i32* %e, metadata !24, metadata !DIExpression()), !dbg !25
- %exn = load i8*, i8** %exn.slot, !dbg !23
- %5 = call i8* @__cxa_begin_catch(i8* %exn) #2, !dbg !23
- %6 = bitcast i8* %5 to i32*, !dbg !23
- %7 = load i32, i32* %6, align 4, !dbg !23
- store i32 %7, i32* %e, align 4, !dbg !23
- store i32 0, i32* @global_var, align 4, !dbg !26
- call void @__cxa_end_catch() #2, !dbg !28
- br label %try.cont, !dbg !28
-
-try.cont: ; preds = %catch, %invoke.cont
- store i32 1, i32* @global_var, align 4, !dbg !29
- ret i32 0, !dbg !30
-
-eh.resume: ; preds = %catch.dispatch
- %exn1 = load i8*, i8** %exn.slot, !dbg !23
- %sel2 = load i32, i32* %ehselector.slot, !dbg !23
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn1, 0, !dbg !23
- %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel2, 1, !dbg !23
- resume { i8*, i32 } %lpad.val3, !dbg !23
-}
-
-declare i32 @__gxx_personality_v0(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #1
-
-declare i8* @__cxa_begin_catch(i8*)
-
-declare void @__cxa_end_catch()
-
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!31}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "<unknown>", directory: "")
-!2 = !{}
-!3 = !{!4, !10}
-!4 = !DISubprogram(name: "test", linkageName: "_Z4testi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !7, function: i32 (i32)* @_Z4testi, variables: !2)
-!5 = !DIFile(filename: "test.cpp", directory: "")
-!6 = !DIFile(filename: "test.cpp", directory: "")
-!7 = !DISubroutineType(types: !8)
-!8 = !{!9, !9}
-!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "test2", linkageName: "_Z5test2v", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !6, type: !11, function: i32 ()* @_Z5test2v, variables: !2)
-!11 = !DISubroutineType(types: !12)
-!12 = !{!9}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "k", line: 4, arg: 1, scope: !4, file: !6, type: !9)
-!14 = !DILocation(line: 4, scope: !4)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k2", line: 5, scope: !4, file: !6, type: !9)
-!16 = !DILocation(line: 5, scope: !4)
-!17 = !DILocation(line: 6, scope: !4)
-!18 = !DILocation(line: 7, scope: !4)
-!19 = !DILocation(line: 8, scope: !4)
-!20 = !DILocation(line: 9, scope: !4)
-!21 = !DILocation(line: 14, scope: !22)
-!22 = distinct !DILexicalBlock(line: 13, column: 0, file: !5, scope: !10)
-!23 = !DILocation(line: 15, scope: !22)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 16, scope: !10, file: !6, type: !9)
-!25 = !DILocation(line: 16, scope: !10)
-!26 = !DILocation(line: 17, scope: !27)
-!27 = distinct !DILexicalBlock(line: 16, column: 0, file: !5, scope: !10)
-!28 = !DILocation(line: 18, scope: !27)
-!29 = !DILocation(line: 19, scope: !10)
-!30 = !DILocation(line: 20, scope: !10)
-!31 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/inline-debug-info.ll b/test/DebugInfo/inline-debug-info.ll
deleted file mode 100644
index 908093ca2552..000000000000
--- a/test/DebugInfo/inline-debug-info.ll
+++ /dev/null
@@ -1,174 +0,0 @@
-; RUN: opt -inline -S < %s | FileCheck %s
-
-; Created from source
-;
-;
-; 1 // test.cpp
-; 2 extern int global_var;
-; 3 extern int test_ext(int k);
-; 4 int test (int k) {
-; 5 int k2 = test_ext(k);
-; 6 if (k2 > 100)
-; 7 return k2;
-; 8 return 0;
-; 9 }
-; 10
-; 11 int test2() {
-; 12 try
-; 13 {
-; 14 test(global_var);
-; 15 }
-; 16 catch (int e) {
-; 17 global_var = 0;
-; 18 }
-; 19 global_var = 1;
-; 20 return 0;
-; 21 }
-
-; CHECK: _Z4testi.exit:
-; Make sure the branch instruction created during inlining has a debug location,
-; so the range of the inlined function is correct.
-; CHECK: br label %invoke.cont, !dbg [[MD:![0-9]+]]
-; The branch instruction has the source location of line 9 and its inlined location
-; has the source location of line 14.
-; CHECK: [[INL:![0-9]*]] = distinct !DILocation(line: 14, scope: {{.*}})
-; CHECK: [[MD]] = !DILocation(line: 9, scope: {{.*}}, inlinedAt: [[INL]])
-
-; ModuleID = 'test.cpp'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-darwin12.0.0"
-
-@_ZTIi = external constant i8*
-@global_var = external global i32
-
-define i32 @_Z4testi(i32 %k) {
-entry:
- %retval = alloca i32, align 4
- %k.addr = alloca i32, align 4
- %k2 = alloca i32, align 4
- store i32 %k, i32* %k.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %k.addr, metadata !13, metadata !DIExpression()), !dbg !14
- call void @llvm.dbg.declare(metadata i32* %k2, metadata !15, metadata !DIExpression()), !dbg !16
- %0 = load i32, i32* %k.addr, align 4, !dbg !16
- %call = call i32 @_Z8test_exti(i32 %0), !dbg !16
- store i32 %call, i32* %k2, align 4, !dbg !16
- %1 = load i32, i32* %k2, align 4, !dbg !17
- %cmp = icmp sgt i32 %1, 100, !dbg !17
- br i1 %cmp, label %if.then, label %if.end, !dbg !17
-
-if.then: ; preds = %entry
- %2 = load i32, i32* %k2, align 4, !dbg !18
- store i32 %2, i32* %retval, !dbg !18
- br label %return, !dbg !18
-
-if.end: ; preds = %entry
- store i32 0, i32* %retval, !dbg !19
- br label %return, !dbg !19
-
-return: ; preds = %if.end, %if.then
- %3 = load i32, i32* %retval, !dbg !20
- ret i32 %3, !dbg !20
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-declare i32 @_Z8test_exti(i32)
-
-define i32 @_Z5test2v() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-entry:
- %exn.slot = alloca i8*
- %ehselector.slot = alloca i32
- %e = alloca i32, align 4
- %0 = load i32, i32* @global_var, align 4, !dbg !21
- %call = invoke i32 @_Z4testi(i32 %0)
- to label %invoke.cont unwind label %lpad, !dbg !21
-
-invoke.cont: ; preds = %entry
- br label %try.cont, !dbg !23
-
-lpad: ; preds = %entry
- %1 = landingpad { i8*, i32 }
- catch i8* bitcast (i8** @_ZTIi to i8*), !dbg !21
- %2 = extractvalue { i8*, i32 } %1, 0, !dbg !21
- store i8* %2, i8** %exn.slot, !dbg !21
- %3 = extractvalue { i8*, i32 } %1, 1, !dbg !21
- store i32 %3, i32* %ehselector.slot, !dbg !21
- br label %catch.dispatch, !dbg !21
-
-catch.dispatch: ; preds = %lpad
- %sel = load i32, i32* %ehselector.slot, !dbg !23
- %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2, !dbg !23
- %matches = icmp eq i32 %sel, %4, !dbg !23
- br i1 %matches, label %catch, label %eh.resume, !dbg !23
-
-catch: ; preds = %catch.dispatch
- call void @llvm.dbg.declare(metadata i32* %e, metadata !24, metadata !DIExpression()), !dbg !25
- %exn = load i8*, i8** %exn.slot, !dbg !23
- %5 = call i8* @__cxa_begin_catch(i8* %exn) #2, !dbg !23
- %6 = bitcast i8* %5 to i32*, !dbg !23
- %7 = load i32, i32* %6, align 4, !dbg !23
- store i32 %7, i32* %e, align 4, !dbg !23
- store i32 0, i32* @global_var, align 4, !dbg !26
- call void @__cxa_end_catch() #2, !dbg !28
- br label %try.cont, !dbg !28
-
-try.cont: ; preds = %catch, %invoke.cont
- store i32 1, i32* @global_var, align 4, !dbg !29
- ret i32 0, !dbg !30
-
-eh.resume: ; preds = %catch.dispatch
- %exn1 = load i8*, i8** %exn.slot, !dbg !23
- %sel2 = load i32, i32* %ehselector.slot, !dbg !23
- %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn1, 0, !dbg !23
- %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel2, 1, !dbg !23
- resume { i8*, i32 } %lpad.val3, !dbg !23
-}
-
-declare i32 @__gxx_personality_v0(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #1
-
-declare i8* @__cxa_begin_catch(i8*)
-
-declare void @__cxa_end_catch()
-
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!31}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "<unknown>", directory: "")
-!2 = !{}
-!3 = !{!4, !10}
-!4 = !DISubprogram(name: "test", linkageName: "_Z4testi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !7, function: i32 (i32)* @_Z4testi, variables: !2)
-!5 = !DIFile(filename: "test.cpp", directory: "")
-!6 = !DIFile(filename: "test.cpp", directory: "")
-!7 = !DISubroutineType(types: !8)
-!8 = !{!9, !9}
-!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "test2", linkageName: "_Z5test2v", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !6, type: !11, function: i32 ()* @_Z5test2v, variables: !2)
-!11 = !DISubroutineType(types: !12)
-!12 = !{!9}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "k", line: 4, arg: 1, scope: !4, file: !6, type: !9)
-!14 = !DILocation(line: 4, scope: !4)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k2", line: 5, scope: !4, file: !6, type: !9)
-!16 = !DILocation(line: 5, scope: !4)
-!17 = !DILocation(line: 6, scope: !4)
-!18 = !DILocation(line: 7, scope: !4)
-!19 = !DILocation(line: 8, scope: !4)
-!20 = !DILocation(line: 9, scope: !4)
-!21 = !DILocation(line: 14, scope: !22)
-!22 = distinct !DILexicalBlock(line: 13, column: 0, file: !5, scope: !10)
-!23 = !DILocation(line: 15, scope: !22)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 16, scope: !10, file: !6, type: !9)
-!25 = !DILocation(line: 16, scope: !10)
-!26 = !DILocation(line: 17, scope: !27)
-!27 = distinct !DILexicalBlock(line: 16, column: 0, file: !5, scope: !10)
-!28 = !DILocation(line: 18, scope: !27)
-!29 = !DILocation(line: 19, scope: !10)
-!30 = !DILocation(line: 20, scope: !10)
-!31 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/inline-no-debug-info.ll b/test/DebugInfo/inline-no-debug-info.ll
deleted file mode 100644
index 9ecd58031eb1..000000000000
--- a/test/DebugInfo/inline-no-debug-info.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: opt < %s -inline -S | FileCheck %s
-
-; This was generated from the following source:
-; int a, b;
-; __attribute__((__always_inline__)) static void callee2() { b = 2; }
-; __attribute__((__nodebug__)) void callee() { a = 1; callee2(); }
-; void caller() { callee(); }
-; by running
-; clang -S test.c -emit-llvm -O1 -gline-tables-only -fno-strict-aliasing
-
-; CHECK-LABEL: @caller(
-
-; This instruction did not have a !dbg metadata in the callee.
-; CHECK: store i32 1, {{.*}}, !dbg [[A:!.*]]
-
-; This instruction came from callee with a !dbg metadata.
-; CHECK: store i32 2, {{.*}}, !dbg [[B:!.*]]
-
-; The remaining instruction from the caller.
-; CHECK: ret void, !dbg [[A]]
-
-; Debug location of the code in caller() and of the inlined code that did not
-; have any debug location before.
-; CHECK-DAG: [[A]] = !DILocation(line: 4, scope: !{{[0-9]+}})
-
-; Debug location of the inlined code.
-; CHECK-DAG: [[B]] = !DILocation(line: 2, scope: !{{[0-9]+}}, inlinedAt: [[A_INL:![0-9]*]])
-; CHECK-DAG: [[A_INL]] = distinct !DILocation(line: 4, scope: !{{[0-9]+}})
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-@a = common global i32 0, align 4
-@b = common global i32 0, align 4
-
-; Function Attrs: nounwind uwtable
-define void @callee() #0 {
-entry:
- store i32 1, i32* @a, align 4
- store i32 2, i32* @b, align 4, !dbg !11
- ret void
-}
-
-; Function Attrs: nounwind uwtable
-define void @caller() #0 {
-entry:
- tail call void @callee(), !dbg !12
- ret void, !dbg !12
-}
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8, !9}
-!llvm.ident = !{!10}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (210174)", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "test.c", directory: "/code/llvm/build0")
-!2 = !{}
-!3 = !{!4, !7}
-!4 = !DISubprogram(name: "caller", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: void ()* @caller, variables: !2)
-!5 = !DIFile(filename: "test.c", directory: "/code/llvm/build0")
-!6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "callee2", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
-!8 = !{i32 2, !"Dwarf Version", i32 4}
-!9 = !{i32 2, !"Debug Info Version", i32 3}
-!10 = !{!"clang version 3.5.0 (210174)"}
-!11 = !DILocation(line: 2, scope: !7)
-!12 = !DILocation(line: 4, scope: !4)
diff --git a/test/DebugInfo/inline-scopes.ll b/test/DebugInfo/inline-scopes.ll
deleted file mode 100644
index 45324f6a4b96..000000000000
--- a/test/DebugInfo/inline-scopes.ll
+++ /dev/null
@@ -1,130 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; bool f();
-; inline __attribute__((always_inline)) int f1() {
-; if (bool b = f())
-; return 1;
-; return 2;
-; }
-;
-; inline __attribute__((always_inline)) int f2() {
-; # 2 "y.cc"
-; if (bool b = f())
-; return 3;
-; return 4;
-; }
-;
-; int main() {
-; f1();
-; f2();
-; }
-
-; Ensure that lexical_blocks within inlined_subroutines are preserved/emitted.
-; CHECK: DW_TAG_inlined_subroutine
-; CHECK-NOT: DW_TAG
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_lexical_block
-; CHECK-NOT: DW_TAG
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_variable
-; Ensure that file changes don't interfere with creating inlined subroutines.
-; (see the line directive inside 'f2' in thesource)
-; CHECK: DW_TAG_inlined_subroutine
-; CHECK: DW_TAG_variable
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin
-
-; Function Attrs: uwtable
-define i32 @main() #0 {
-entry:
- %retval.i2 = alloca i32, align 4
- %b.i3 = alloca i8, align 1
- %retval.i = alloca i32, align 4
- %b.i = alloca i8, align 1
- call void @llvm.dbg.declare(metadata i8* %b.i, metadata !16, metadata !DIExpression()), !dbg !19
- %call.i = call zeroext i1 @_Z1fv(), !dbg !19
- %frombool.i = zext i1 %call.i to i8, !dbg !19
- store i8 %frombool.i, i8* %b.i, align 1, !dbg !19
- %0 = load i8, i8* %b.i, align 1, !dbg !19
- %tobool.i = trunc i8 %0 to i1, !dbg !19
- br i1 %tobool.i, label %if.then.i, label %if.end.i, !dbg !19
-
-if.then.i: ; preds = %entry
- store i32 1, i32* %retval.i, !dbg !21
- br label %_Z2f1v.exit, !dbg !21
-
-if.end.i: ; preds = %entry
- store i32 2, i32* %retval.i, !dbg !22
- br label %_Z2f1v.exit, !dbg !22
-
-_Z2f1v.exit: ; preds = %if.then.i, %if.end.i
- %1 = load i32, i32* %retval.i, !dbg !23
- call void @llvm.dbg.declare(metadata i8* %b.i3, metadata !24, metadata !DIExpression()), !dbg !27
- %call.i4 = call zeroext i1 @_Z1fv(), !dbg !27
- %frombool.i5 = zext i1 %call.i4 to i8, !dbg !27
- store i8 %frombool.i5, i8* %b.i3, align 1, !dbg !27
- %2 = load i8, i8* %b.i3, align 1, !dbg !27
- %tobool.i6 = trunc i8 %2 to i1, !dbg !27
- br i1 %tobool.i6, label %if.then.i7, label %if.end.i8, !dbg !27
-
-if.then.i7: ; preds = %_Z2f1v.exit
- store i32 3, i32* %retval.i2, !dbg !29
- br label %_Z2f2v.exit, !dbg !29
-
-if.end.i8: ; preds = %_Z2f1v.exit
- store i32 4, i32* %retval.i2, !dbg !30
- br label %_Z2f2v.exit, !dbg !30
-
-_Z2f2v.exit: ; preds = %if.then.i7, %if.end.i8
- %3 = load i32, i32* %retval.i2, !dbg !31
- ret i32 0, !dbg !32
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-declare zeroext i1 @_Z1fv() #2
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!13, !14}
-!llvm.ident = !{!15}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "inline-scopes.cpp", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4, !10, !12}
-!4 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !5, scope: !6, type: !7, function: i32 ()* @main, variables: !2)
-!5 = !DIFile(filename: "y.cc", directory: "/tmp/dbginfo")
-!6 = !DIFile(filename: "y.cc", directory: "/tmp/dbginfo")
-!7 = !DISubroutineType(types: !8)
-!8 = !{!9}
-!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !11, type: !7, variables: !2)
-!11 = !DIFile(filename: "inline-scopes.cpp", directory: "/tmp/dbginfo")
-!12 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !11, type: !7, variables: !2)
-!13 = !{i32 2, !"Dwarf Version", i32 4}
-!14 = !{i32 1, !"Debug Info Version", i32 3}
-!15 = !{!"clang version 3.5.0 "}
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 3, scope: !17, file: !11, type: !18)
-!17 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !12)
-!18 = !DIBasicType(tag: DW_TAG_base_type, name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
-!19 = !DILocation(line: 3, scope: !17, inlinedAt: !20)
-!20 = !DILocation(line: 8, scope: !4)
-!21 = !DILocation(line: 4, scope: !17, inlinedAt: !20)
-!22 = !DILocation(line: 5, scope: !12, inlinedAt: !20)
-!23 = !DILocation(line: 6, scope: !12, inlinedAt: !20)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 2, scope: !25, file: !6, type: !18)
-!25 = distinct !DILexicalBlock(line: 2, column: 0, file: !5, scope: !26)
-!26 = !DILexicalBlockFile(discriminator: 0, file: !5, scope: !10)
-!27 = !DILocation(line: 2, scope: !25, inlinedAt: !28)
-!28 = !DILocation(line: 9, scope: !4)
-!29 = !DILocation(line: 3, scope: !25, inlinedAt: !28)
-!30 = !DILocation(line: 4, scope: !26, inlinedAt: !28)
-!31 = !DILocation(line: 5, scope: !26, inlinedAt: !28)
-!32 = !DILocation(line: 10, scope: !4)
diff --git a/test/DebugInfo/inlined-arguments.ll b/test/DebugInfo/inlined-arguments.ll
deleted file mode 100644
index 912ebb1321ee..000000000000
--- a/test/DebugInfo/inlined-arguments.ll
+++ /dev/null
@@ -1,79 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -filetype=obj < %s > %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
-
-; IR generated from clang -O -g with the following source
-;
-; void f1(int x, int y);
-; void f3(int line);
-; void f2() {
-; f1(1, 2);
-; }
-; void f1(int x, int y) {
-; f3(y);
-; }
-
-; CHECK: DW_AT_name{{.*}}"f1"
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name{{.*}}"x"
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name{{.*}}"y"
-
-; Function Attrs: uwtable
-define void @_Z2f2v() #0 {
- tail call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !16, metadata !DIExpression()), !dbg !18
- tail call void @llvm.dbg.value(metadata i32 2, i64 0, metadata !20, metadata !DIExpression()), !dbg !18
- tail call void @_Z2f3i(i32 2), !dbg !21
- ret void, !dbg !22
-}
-
-; Function Attrs: uwtable
-define void @_Z2f1ii(i32 %x, i32 %y) #0 {
- tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !13, metadata !DIExpression()), !dbg !23
- tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !14, metadata !DIExpression()), !dbg !23
- tail call void @_Z2f3i(i32 %y), !dbg !24
- ret void, !dbg !25
-}
-
-declare void @_Z2f3i(i32) #1
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!26}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "exp.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
-!2 = !{}
-!3 = !{!4, !8}
-!4 = !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, function: void ()* @_Z2f2v, variables: !2)
-!5 = !DIFile(filename: "exp.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
-!6 = !DISubroutineType(types: !7)
-!7 = !{null}
-!8 = !DISubprogram(name: "f1", linkageName: "_Z2f1ii", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !9, function: void (i32, i32)* @_Z2f1ii, variables: !12)
-!9 = !DISubroutineType(types: !10)
-!10 = !{null, !11, !11}
-!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!12 = !{!13, !14}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 6, arg: 1, scope: !8, file: !5, type: !11)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 6, arg: 2, scope: !8, file: !5, type: !11)
-!15 = !{i32 undef}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 6, arg: 1, scope: !8, file: !5, type: !11)
-!17 = !DILocation(line: 4, scope: !4)
-!18 = !DILocation(line: 6, scope: !8, inlinedAt: !17)
-!19 = !{i32 2}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 6, arg: 2, scope: !8, file: !5, type: !11)
-!21 = !DILocation(line: 7, scope: !8, inlinedAt: !17)
-!22 = !DILocation(line: 5, scope: !4)
-!23 = !DILocation(line: 6, scope: !8)
-!24 = !DILocation(line: 7, scope: !8)
-!25 = !DILocation(line: 8, scope: !8)
-!26 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/inlined-vars.ll b/test/DebugInfo/inlined-vars.ll
deleted file mode 100644
index 8c7823748864..000000000000
--- a/test/DebugInfo/inlined-vars.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: %llc_dwarf -O0 < %s | FileCheck %s -check-prefix ARGUMENT
-; RUN: %llc_dwarf -O0 < %s | FileCheck %s -check-prefix VARIABLE
-; PR 13202
-
-define i32 @main() uwtable {
-entry:
- tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !18, metadata !DIExpression()), !dbg !21
- tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !DIExpression()), !dbg !23
- tail call void @smth(i32 0), !dbg !24
- tail call void @smth(i32 0), !dbg !25
- ret i32 0, !dbg !19
-}
-
-declare void @smth(i32)
-
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!27}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 159419)", isOptimized: true, emissionKind: 0, file: !26, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !{i32 0}
-!2 = !{}
-!3 = !{!5, !10}
-!5 = !DISubprogram(name: "main", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !26, scope: !6, type: !7, function: i32 ()* @main, variables: !2)
-!6 = !DIFile(filename: "inline-bug.cc", directory: "/tmp/dbginfo/pr13202")
-!7 = !DISubroutineType(types: !8)
-!8 = !{!9}
-!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "f", linkageName: "_ZL1fi", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !26, scope: !6, type: !11, variables: !13)
-!11 = !DISubroutineType(types: !12)
-!12 = !{!9, !9}
-!13 = !{!15, !16}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argument", line: 3, arg: 1, scope: !10, file: !6, type: !9)
-
-; Two DW_TAG_formal_parameter: one abstract and one inlined.
-; ARGUMENT: {{.*Abbrev.*DW_TAG_formal_parameter}}
-; ARGUMENT: {{.*Abbrev.*DW_TAG_formal_parameter}}
-; ARGUMENT-NOT: {{.*Abbrev.*DW_TAG_formal_parameter}}
-
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "local", line: 4, scope: !10, file: !6, type: !9)
-
-; Two DW_TAG_variable: one abstract and one inlined.
-; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
-; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
-; VARIABLE-NOT: {{.*Abbrev.*DW_TAG_variable}}
-
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argument", line: 3, arg: 1, scope: !10, file: !6, type: !9)
-!19 = !DILocation(line: 11, column: 10, scope: !5)
-!21 = !DILocation(line: 3, column: 25, scope: !10, inlinedAt: !19)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "local", line: 4, scope: !10, file: !6, type: !9)
-!23 = !DILocation(line: 4, column: 16, scope: !10, inlinedAt: !19)
-!24 = !DILocation(line: 5, column: 3, scope: !10, inlinedAt: !19)
-!25 = !DILocation(line: 6, column: 3, scope: !10, inlinedAt: !19)
-!26 = !DIFile(filename: "inline-bug.cc", directory: "/tmp/dbginfo/pr13202")
-!27 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/llvm-symbolizer.test b/test/DebugInfo/llvm-symbolizer.test
index 8a2aaaab3a3c..6b3c0494cdd5 100644
--- a/test/DebugInfo/llvm-symbolizer.test
+++ b/test/DebugInfo/llvm-symbolizer.test
@@ -118,7 +118,9 @@ CHECK-NEXT: main
CHECK-NEXT: /tmp{{[/\\]}}cross-cu-inlining.c:11:0
RUN: echo "unexisting-file 0x1234" > %t.input2
-RUN: llvm-symbolizer < %t.input2
+RUN: llvm-symbolizer < %t.input2 2>&1 | FileCheck %s --check-prefix=MISSING-FILE
+
+MISSING-FILE: LLVMSymbolizer: error reading file: {{[Nn]}}o such file or directory
RUN: echo "%p/Inputs/macho-universal 0x1f84" > %t.input3
RUN: llvm-symbolizer < %t.input3 | FileCheck %s --check-prefix=UNKNOWN-ARCH
@@ -154,7 +156,7 @@ RUN: | FileCheck %s --check-prefix=STRIPPED
STRIPPED: global_func
RUN: echo "%p/Inputs/dwarfdump-test4.elf-x86-64 0x62c" > %t.input7
-RUN: llvm-symbolizer --functions=short --use-symbol-table=false --demangle=false < %t.input7 \
+RUN: llvm-symbolizer --functions=short --demangle=false < %t.input7 \
RUN: | FileCheck %s --check-prefix=SHORT_FUNCTION_NAME
SHORT_FUNCTION_NAME-NOT: _Z1cv
diff --git a/test/DebugInfo/location-verifier.ll b/test/DebugInfo/location-verifier.ll
deleted file mode 100644
index f44a37d7fca5..000000000000
--- a/test/DebugInfo/location-verifier.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: not llvm-as -disable-output -verify-debug-info < %s 2>&1 | FileCheck %s
-; ModuleID = 'test.c'
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.10.0"
-
-; Function Attrs: nounwind ssp uwtable
-define i32 @foo() #0 {
-entry:
- ret i32 42, !dbg !13
-}
-
-attributes #0 = { nounwind ssp uwtable }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!9, !10, !11}
-!llvm.ident = !{!12}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "test.c", directory: "")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 ()* @foo, variables: !2)
-!5 = !DIFile(filename: "test.c", directory: "")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !{i32 2, !"Dwarf Version", i32 2}
-!10 = !{i32 2, !"Debug Info Version", i32 3}
-!11 = !{i32 1, !"PIC Level", i32 2}
-!12 = !{!"clang version 3.7.0 "}
-; An old-style DILocation should not pass verify.
-; CHECK: invalid !dbg metadata attachment
-!13 = !{i32 2, i32 2, !4, null}
diff --git a/test/DebugInfo/lto-comp-dir.ll b/test/DebugInfo/lto-comp-dir.ll
deleted file mode 100644
index 4bafb7bdd2e7..000000000000
--- a/test/DebugInfo/lto-comp-dir.ll
+++ /dev/null
@@ -1,84 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump -debug-dump=line - | FileCheck %s
-; RUN: %llc_dwarf < %s -filetype=asm | FileCheck --check-prefix=ASM %s
-
-; If multiple line tables are emitted, one per CU, those line tables can
-; unambiguously rely on the comp_dir of their owning CU and use directory '0'
-; to refer to it.
-
-; CHECK: .debug_line contents:
-; CHECK-NEXT: Line table prologue:
-; CHECK-NOT: include_directories
-; CHECK: file_names[ 1] 0 {{.*}} a.cpp
-; CHECK-NOT: file_names
-
-; CHECK: Line table prologue:
-; CHECK-NOT: include_directories
-; CHECK: file_names[ 1] 0 {{.*}} b.cpp
-; CHECK-NOT: file_names
-
-; However, if a single line table is emitted and shared between CUs, the
-; comp_dir is ambiguous and relying on it would lead to different path
-; interpretations depending on which CU lead to the table - so ensure that
-; full paths are always emitted in this case, never comp_dir relative.
-
-; ASM: .file 1 "/tmp/dbginfo/a{{[/\\]+}}a.cpp"
-; ASM: .file 2 "/tmp/dbginfo/b{{[/\\]+}}b.cpp"
-
-; Generated from the following source compiled to bitcode from within their
-; respective directories (with debug info) and linked together with llvm-link
-
-; a/a.cpp
-; void func() {
-; }
-
-; b/b.cpp
-; void func();
-; int main() {
-; func();
-; }
-
-; Function Attrs: nounwind uwtable
-define void @_Z4funcv() #0 {
-entry:
- ret void, !dbg !19
-}
-
-; Function Attrs: uwtable
-define i32 @main() #1 {
-entry:
- call void @_Z4funcv(), !dbg !20
- ret i32 0, !dbg !21
-}
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.dbg.cu = !{!0, !8}
-!llvm.module.flags = !{!16, !17}
-!llvm.ident = !{!18, !18}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo/a")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "func", linkageName: "_Z4funcv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @_Z4funcv, variables: !2)
-!5 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo/a")
-!6 = !DISubroutineType(types: !7)
-!7 = !{null}
-!8 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !9, enums: !2, retainedTypes: !2, subprograms: !10, globals: !2, imports: !2)
-!9 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo/b")
-!10 = !{!11}
-!11 = !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !9, scope: !12, type: !13, function: i32 ()* @main, variables: !2)
-!12 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo/b")
-!13 = !DISubroutineType(types: !14)
-!14 = !{!15}
-!15 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!16 = !{i32 2, !"Dwarf Version", i32 4}
-!17 = !{i32 1, !"Debug Info Version", i32 3}
-!18 = !{!"clang version 3.5.0 "}
-!19 = !DILocation(line: 2, scope: !4)
-!20 = !DILocation(line: 3, scope: !11)
-!21 = !DILocation(line: 4, scope: !11)
-
diff --git a/test/DebugInfo/member-order.ll b/test/DebugInfo/member-order.ll
deleted file mode 100644
index f07e4f85cded..000000000000
--- a/test/DebugInfo/member-order.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; generated by clang from:
-; struct foo {
-; void f1();
-; void f2();
-; };
-;
-; void foo::f1() {
-; }
-
-; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name {{.*}} "foo"
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: NULL
-; CHECK: DW_AT_name {{.*}} "f1"
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: NULL
-; CHECK: DW_AT_name {{.*}} "f2"
-
-
-%struct.foo = type { i8 }
-
-; Function Attrs: nounwind uwtable
-define void @_ZN3foo2f1Ev(%struct.foo* %this) #0 align 2 {
-entry:
- %this.addr = alloca %struct.foo*, align 8
- store %struct.foo* %this, %struct.foo** %this.addr, align 8
- call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !16, metadata !DIExpression()), !dbg !18
- %this1 = load %struct.foo*, %struct.foo** %this.addr
- ret void, !dbg !19
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!15, !20}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !2, imports: !2)
-!1 = !DIFile(filename: "member-order.cpp", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", line: 1, size: 8, align: 8, file: !1, elements: !5, identifier: "_ZTS3foo")
-!5 = !{!6, !11}
-!6 = !DISubprogram(name: "f1", linkageName: "_ZN3foo2f1Ev", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !4, type: !7)
-!7 = !DISubroutineType(types: !8)
-!8 = !{null, !9}
-!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS3foo")
-!10 = !{i32 786468}
-!11 = !DISubprogram(name: "f2", linkageName: "_ZN3foo2f2Ev", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !4, type: !7)
-!12 = !{i32 786468}
-!13 = !{!14}
-!14 = !DISubprogram(name: "f1", linkageName: "_ZN3foo2f1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: null, type: !7, function: void (%struct.foo*)* @_ZN3foo2f1Ev, declaration: !6, variables: !2)
-!15 = !{i32 2, !"Dwarf Version", i32 4}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !14, type: !17)
-!17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS3foo")
-!18 = !DILocation(line: 0, scope: !14)
-!19 = !DILocation(line: 7, scope: !14)
-!20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/member-pointers.ll b/test/DebugInfo/member-pointers.ll
deleted file mode 100644
index 0cf9ead84218..000000000000
--- a/test/DebugInfo/member-pointers.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; REQUIRES: object-emission
-; XFAIL: hexagon
-
-; RUN: %llc_dwarf -filetype=obj -O0 < %s > %t
-; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
-; CHECK: DW_TAG_ptr_to_member_type
-; CHECK: DW_TAG_ptr_to_member_type
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE:0x[0-9a-f]+]]})
-; CHECK: [[TYPE]]: DW_TAG_subroutine_type
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NEXT: DW_AT_type
-; CHECK-NEXT: DW_AT_artificial [DW_FORM_flag
-; IR generated from clang -g with the following source:
-; struct S {
-; };
-;
-; int S::*x = 0;
-; void (S::*y)(int) = 0;
-
-@x = global i64 -1, align 8
-@y = global { i64, i64 } zeroinitializer, align 8
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!16}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !15, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports: !1)
-!1 = !{}
-!3 = !{!5, !10}
-!5 = !DIGlobalVariable(name: "x", line: 4, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: i64* @x)
-!6 = !DIFile(filename: "simple.cpp", directory: "/home/blaikie/Development/scratch")
-!7 = !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: !8, extraData: !9)
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", line: 1, size: 8, align: 8, file: !15, elements: !1)
-!10 = !DIGlobalVariable(name: "y", line: 5, isLocal: false, isDefinition: true, scope: null, file: !6, type: !11, variable: { i64, i64 }* @y)
-!11 = !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: !12, extraData: !9)
-!12 = !DISubroutineType(types: !13)
-!13 = !{null, !14, !8}
-!14 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !9)
-!15 = !DIFile(filename: "simple.cpp", directory: "/home/blaikie/Development/scratch")
-!16 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/missing-abstract-variable.ll b/test/DebugInfo/missing-abstract-variable.ll
deleted file mode 100644
index 8a576c2bfd34..000000000000
--- a/test/DebugInfo/missing-abstract-variable.ll
+++ /dev/null
@@ -1,182 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; The formal parameter 'b' for Function 'x' when inlined within 'a' is lost on
-; mips and powerpc64 (and on x86_64 at at least -O2). Presumably this is a
-; SelectionDAG issue (do mips/powerpc64 use FastISel?).
-; XFAIL: mips, powerpc64, s390x
-
-; Build from the following source with clang -O2.
-
-; The important details are that 'x's abstract definition is first built during
-; the definition of 'b', where the parameter to 'x' is constant and so 'x's 's'
-; variable is optimized away. No abstract definition DIE for 's' is constructed.
-; Then, during 'a' emission, the abstract DbgVariable for 's' is created, but
-; the abstract DIE isn't (since the abstract definition for 'b' is already
-; built). This results in 's' inlined in 'a' being emitted with its name, line,
-; file there, rather than referencing an abstract definition.
-
-; extern int t;
-;
-; void f(int);
-;
-; inline void x(bool b) {
-; if (b) {
-; int s = t;
-; f(s);
-; }
-; f(0);
-; }
-;
-; void b() {
-; x(false);
-; }
-;
-; void a(bool u) {
-; x(u);
-; }
-
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "x"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "b"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_lexical_block
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_variable
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "s"
-
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "b"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_inlined_subroutine
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} "_Z1xb"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} "b"
-; Notice 'x's local variable 's' is missing. Not necessarily a bug here,
-; since it's been optimized entirely away and it should be described in
-; abstract subprogram.
-; CHECK-NOT: DW_TAG
-; CHECK: NULL
-; CHECK-NOT: DW_TAG
-; CHECK: NULL
-
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "a"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_inlined_subroutine
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} "_Z1xb"
-; CHECK-NOT: {{DW_TAG|NULL}}
-; FIXME: This formal parameter goes missing at least at -O2 (& on
-; mips/powerpc), maybe before that. Perhaps SelectionDAG is to blame (and
-; fastisel succeeds).
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} "b"
-
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_lexical_block
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_variable
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} "s"
-
-@t = external global i32
-
-; Function Attrs: uwtable
-define void @_Z1bv() #0 {
-entry:
- tail call void @llvm.dbg.value(metadata i1 false, i64 0, metadata !25, metadata !DIExpression()), !dbg !27
- tail call void @_Z1fi(i32 0), !dbg !28
- ret void, !dbg !29
-}
-
-; Function Attrs: uwtable
-define void @_Z1ab(i1 zeroext %u) #0 {
-entry:
- tail call void @llvm.dbg.value(metadata i1 %u, i64 0, metadata !13, metadata !DIExpression()), !dbg !30
- tail call void @llvm.dbg.value(metadata i1 %u, i64 0, metadata !31, metadata !DIExpression()), !dbg !33
- br i1 %u, label %if.then.i, label %_Z1xb.exit, !dbg !34
-
-if.then.i: ; preds = %entry
- %0 = load i32, i32* @t, align 4, !dbg !35, !tbaa !36
- tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !40, metadata !DIExpression()), !dbg !35
- tail call void @_Z1fi(i32 %0), !dbg !41
- br label %_Z1xb.exit, !dbg !42
-
-_Z1xb.exit: ; preds = %entry, %if.then.i
- tail call void @_Z1fi(i32 0), !dbg !43
- ret void, !dbg !44
-}
-
-declare void @_Z1fi(i32) #1
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!21, !22}
-!llvm.ident = !{!23}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "missing-abstract-variables.cc", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4, !8, !14}
-!4 = !DISubprogram(name: "b", linkageName: "_Z1bv", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !1, scope: !5, type: !6, function: void ()* @_Z1bv, variables: !2)
-!5 = !DIFile(filename: "missing-abstract-variables.cc", directory: "/tmp/dbginfo")
-!6 = !DISubroutineType(types: !7)
-!7 = !{null}
-!8 = !DISubprogram(name: "a", linkageName: "_Z1ab", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !1, scope: !5, type: !9, function: void (i1)* @_Z1ab, variables: !12)
-!9 = !DISubroutineType(types: !10)
-!10 = !{null, !11}
-!11 = !DIBasicType(tag: DW_TAG_base_type, name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
-!12 = !{!13}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "u", line: 17, arg: 1, scope: !8, file: !5, type: !11)
-!14 = !DISubprogram(name: "x", linkageName: "_Z1xb", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !9, variables: !15)
-!15 = !{!16, !17}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "s", line: 7, scope: !18, file: !5, type: !20)
-!18 = distinct !DILexicalBlock(line: 6, column: 0, file: !1, scope: !19)
-!19 = distinct !DILexicalBlock(line: 6, column: 0, file: !1, scope: !14)
-!20 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!21 = !{i32 2, !"Dwarf Version", i32 4}
-!22 = !{i32 2, !"Debug Info Version", i32 3}
-!23 = !{!"clang version 3.5.0 "}
-!24 = !{i1 false}
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
-!26 = !DILocation(line: 14, scope: !4)
-!27 = !DILocation(line: 5, scope: !14, inlinedAt: !26)
-!28 = !DILocation(line: 10, scope: !14, inlinedAt: !26)
-!29 = !DILocation(line: 15, scope: !4)
-!30 = !DILocation(line: 17, scope: !8)
-!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
-!32 = !DILocation(line: 18, scope: !8)
-!33 = !DILocation(line: 5, scope: !14, inlinedAt: !32)
-!34 = !DILocation(line: 6, scope: !19, inlinedAt: !32)
-!35 = !DILocation(line: 7, scope: !18, inlinedAt: !32)
-!36 = !{!37, !37, i64 0}
-!37 = !{!"int", !38, i64 0}
-!38 = !{!"omnipotent char", !39, i64 0}
-!39 = !{!"Simple C/C++ TBAA"}
-!40 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "s", line: 7, scope: !18, file: !5, type: !20)
-!41 = !DILocation(line: 8, scope: !18, inlinedAt: !32)
-!42 = !DILocation(line: 9, scope: !18, inlinedAt: !32)
-!43 = !DILocation(line: 10, scope: !14, inlinedAt: !32)
-!44 = !DILocation(line: 19, scope: !8)
diff --git a/test/DebugInfo/multiline.ll b/test/DebugInfo/multiline.ll
deleted file mode 100644
index e6b43239fda3..000000000000
--- a/test/DebugInfo/multiline.ll
+++ /dev/null
@@ -1,82 +0,0 @@
-; RUN: llc -filetype=asm -asm-verbose=0 -O0 < %s | FileCheck %s
-; RUN: llc -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=line - | FileCheck %s --check-prefix=INT
-; XFAIL: hexagon
-
-; Check that the assembly output properly handles is_stmt changes. And since
-; we're testing anyway, check the integrated assembler too.
-
-; Generated with clang from multiline.c:
-; void f1();
-; void f2() {
-; f1(); f1(); f1();
-; f1(); f1(); f1();
-; }
-
-
-; CHECK: .loc 1 2 0{{$}}
-; CHECK-NOT: .loc{{ }}
-; CHECK: .loc 1 3 3 prologue_end{{$}}
-; CHECK-NOT: .loc
-; CHECK: .loc 1 3 9 is_stmt 0{{$}}
-; CHECK-NOT: .loc
-; CHECK: .loc 1 3 15{{$}}
-; CHECK-NOT: .loc
-; CHECK: .loc 1 4 3 is_stmt 1{{$}}
-; CHECK-NOT: .loc
-; CHECK: .loc 1 4 9 is_stmt 0{{$}}
-; CHECK-NOT: .loc
-; CHECK: .loc 1 4 15{{$}}
-; CHECK-NOT: .loc
-; CHECK: .loc 1 5 1 is_stmt 1{{$}}
-
-; INT: {{^}}Address
-; INT: -----
-; INT-NEXT: 2 0 1 0 0 is_stmt{{$}}
-; INT-NEXT: 3 3 1 0 0 is_stmt prologue_end{{$}}
-; INT-NEXT: 3 9 1 0 0 {{$}}
-; INT-NEXT: 3 15 1 0 0 {{$}}
-; INT-NEXT: 4 3 1 0 0 is_stmt{{$}}
-; INT-NEXT: 4 9 1 0 0 {{$}}
-; INT-NEXT: 4 15 1 0 0 {{$}}
-; INT-NEXT: 5 1 1 0 0 is_stmt{{$}}
-
-
-; Function Attrs: nounwind uwtable
-define void @f2() #0 {
-entry:
- call void (...) @f1(), !dbg !11
- call void (...) @f1(), !dbg !12
- call void (...) @f1(), !dbg !13
- call void (...) @f1(), !dbg !14
- call void (...) @f1(), !dbg !15
- call void (...) @f1(), !dbg !16
- ret void, !dbg !17
-}
-
-declare void @f1(...) #1
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8, !9}
-!llvm.ident = !{!10}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 225000) (llvm/trunk 224999)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "multiline.c", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: void ()* @f2, variables: !2)
-!5 = !DIFile(filename: "multiline.c", directory: "/tmp/dbginfo")
-!6 = !DISubroutineType(types: !7)
-!7 = !{null}
-!8 = !{i32 2, !"Dwarf Version", i32 4}
-!9 = !{i32 2, !"Debug Info Version", i32 3}
-!10 = !{!"clang version 3.6.0 (trunk 225000) (llvm/trunk 224999)"}
-!11 = !DILocation(line: 3, column: 3, scope: !4)
-!12 = !DILocation(line: 3, column: 9, scope: !4)
-!13 = !DILocation(line: 3, column: 15, scope: !4)
-!14 = !DILocation(line: 4, column: 3, scope: !4)
-!15 = !DILocation(line: 4, column: 9, scope: !4)
-!16 = !DILocation(line: 4, column: 15, scope: !4)
-!17 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/DebugInfo/namespace.ll b/test/DebugInfo/namespace.ll
deleted file mode 100644
index 85ef7356205c..000000000000
--- a/test/DebugInfo/namespace.ll
+++ /dev/null
@@ -1,366 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
-; CHECK: debug_info contents
-; CHECK: [[NS1:0x[0-9a-f]*]]:{{ *}}DW_TAG_namespace
-; CHECK-NEXT: DW_AT_name{{.*}} = "A"
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F1:".*debug-info-namespace.cpp"]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(5)
-; CHECK-NOT: NULL
-; CHECK: [[NS2:0x[0-9a-f]*]]:{{ *}}DW_TAG_namespace
-; CHECK-NEXT: DW_AT_name{{.*}} = "B"
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2:".*foo.cpp"]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(1)
-; CHECK-NOT: NULL
-; CHECK: [[I:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable
-; CHECK-NEXT: DW_AT_name{{.*}}= "i"
-; CHECK: [[VAR_FWD:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable
-; CHECK-NEXT: DW_AT_name{{.*}}= "var_fwd"
-; CHECK-NOT: NULL
-; CHECK: [[FOO:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name{{.*}}= "foo"
-; CHECK-NEXT: DW_AT_declaration
-; CHECK-NOT: NULL
-; CHECK: [[BAR:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name{{.*}}= "bar"
-; CHECK: [[FUNC1:.*]]: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_MIPS_linkage_name
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name{{.*}}= "f1"
-; CHECK: [[BAZ:0x[0-9a-f]*]]:{{.*}}DW_TAG_typedef
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name{{.*}}= "baz"
-; CHECK: [[VAR_DECL:0x[0-9a-f]*]]:{{.*}}DW_TAG_variable
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name{{.*}}= "var_decl"
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_declaration
-; CHECK: [[FUNC_DECL:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name{{.*}}= "func_decl"
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_declaration
-; CHECK: [[FUNC_FWD:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name{{.*}}= "func_fwd"
-; CHECK-NOT: DW_AT_declaration
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_MIPS_linkage_name
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name{{.*}}= "f1"
-; CHECK: NULL
-
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_imported_module
-; This is a bug, it should be in F2 but it inherits the file from its
-; enclosing scope
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F1]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(15)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS2]]})
-; CHECK: NULL
-; CHECK-NOT: NULL
-
-; CHECK: DW_TAG_imported_module
-; Same bug as above, this should be F2, not F1
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F1]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(18)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
-; CHECK-NOT: NULL
-
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_MIPS_linkage_name
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name{{.*}}= "func"
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_imported_module
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(26)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(27)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FOO]]})
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(28)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[BAR]]})
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(29)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FUNC1]]})
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(30)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[I]]})
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(31)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[BAZ]]})
-; CHECK-NOT: NULL
-; CHECK: [[X:0x[0-9a-f]*]]:{{ *}}DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(32)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
-; CHECK-NEXT: DW_AT_name{{.*}}"X"
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(33)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[X]]})
-; CHECK-NEXT: DW_AT_name{{.*}}"Y"
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(34)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[VAR_DECL]]})
-; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(35)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FUNC_DECL]]})
-; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(36)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[VAR_FWD]]})
-; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(37)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FUNC_FWD]]})
-
-; CHECK: DW_TAG_lexical_block
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_imported_module
-; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(23)
-; CHECK-NEXT: DW_AT_import{{.*}}=>
-; CHECK: NULL
-; CHECK: NULL
-; CHECK: NULL
-
-; IR generated from clang/test/CodeGenCXX/debug-info-namespace.cpp, file paths
-; changed to protect the guilty. The C++ source code is:
-; // RUN...
-; // RUN...
-; // RUN...
-;
-; namespace A {
-; #line 1 "foo.cpp"
-; namespace B {
-; extern int i;
-; int f1() { return 0; }
-; void f1(int) { }
-; struct foo;
-; struct bar { };
-; typedef bar baz;
-; extern int var_decl;
-; void func_decl(void);
-; extern int var_fwd;
-; void func_fwd(void);
-; }
-; }
-; namespace A {
-; using namespace B;
-; }
-;
-; using namespace A;
-; namespace E = A;
-; int B::i = f1();
-; int func(bool b) {
-; if (b) {
-; using namespace A::B;
-; return i;
-; }
-; using namespace A;
-; using B::foo;
-; using B::bar;
-; using B::f1;
-; using B::i;
-; using B::baz;
-; namespace X = A;
-; namespace Y = X;
-; using B::var_decl;
-; using B::func_decl;
-; using B::var_fwd;
-; using B::func_fwd;
-; return i + X::B::i + Y::B::i;
-; }
-;
-; namespace A {
-; using B::i;
-; namespace B {
-; int var_fwd = i;
-; }
-; }
-; void B::func_fwd() {}
-
-@_ZN1A1B1iE = global i32 0, align 4
-@_ZN1A1B7var_fwdE = global i32 0, align 4
-@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_debug_info_namespace.cpp, i8* null }]
-
-; Function Attrs: nounwind ssp uwtable
-define i32 @_ZN1A1B2f1Ev() #0 {
-entry:
- ret i32 0, !dbg !60
-}
-
-; Function Attrs: nounwind ssp uwtable
-define void @_ZN1A1B2f1Ei(i32) #0 {
-entry:
- %.addr = alloca i32, align 4
- store i32 %0, i32* %.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %.addr, metadata !61, metadata !62), !dbg !63
- ret void, !dbg !64
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" {
-entry:
- %call = call i32 @_ZN1A1B2f1Ev(), !dbg !65
- store i32 %call, i32* @_ZN1A1B1iE, align 4, !dbg !65
- ret void, !dbg !65
-}
-
-; Function Attrs: nounwind ssp uwtable
-define i32 @_Z4funcb(i1 zeroext %b) #0 {
-entry:
- %retval = alloca i32, align 4
- %b.addr = alloca i8, align 1
- %frombool = zext i1 %b to i8
- store i8 %frombool, i8* %b.addr, align 1
- call void @llvm.dbg.declare(metadata i8* %b.addr, metadata !66, metadata !62), !dbg !67
- %0 = load i8, i8* %b.addr, align 1, !dbg !68
- %tobool = trunc i8 %0 to i1, !dbg !68
- br i1 %tobool, label %if.then, label %if.end, !dbg !68
-
-if.then: ; preds = %entry
- %1 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !69
- store i32 %1, i32* %retval, !dbg !69
- br label %return, !dbg !69
-
-if.end: ; preds = %entry
- %2 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !70
- %3 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !70
- %add = add nsw i32 %2, %3, !dbg !70
- %4 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !70
- %add1 = add nsw i32 %add, %4, !dbg !70
- store i32 %add1, i32* %retval, !dbg !70
- br label %return, !dbg !70
-
-return: ; preds = %if.end, %if.then
- %5 = load i32, i32* %retval, !dbg !71
- ret i32 %5, !dbg !71
-}
-
-define internal void @__cxx_global_var_init1() section "__TEXT,__StaticInit,regular,pure_instructions" {
-entry:
- %0 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !72
- store i32 %0, i32* @_ZN1A1B7var_fwdE, align 4, !dbg !72
- ret void, !dbg !72
-}
-
-; Function Attrs: nounwind ssp uwtable
-define void @_ZN1A1B8func_fwdEv() #0 {
-entry:
- ret void, !dbg !73
-}
-
-define internal void @_GLOBAL__sub_I_debug_info_namespace.cpp() section "__TEXT,__StaticInit,regular,pure_instructions" {
-entry:
- call void @__cxx_global_var_init(), !dbg !74
- call void @__cxx_global_var_init1(), !dbg !74
- ret void, !dbg !74
-}
-
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!57, !58}
-!llvm.ident = !{!59}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !30, imports: !33)
-!1 = !DIFile(filename: "debug-info-namespace.cpp", directory: "/tmp")
-!2 = !{}
-!3 = !{!4, !8}
-!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", line: 5, flags: DIFlagFwdDecl, file: !5, scope: !6, identifier: "_ZTSN1A1B3fooE")
-!5 = !DIFile(filename: "foo.cpp", directory: "/tmp")
-!6 = !DINamespace(name: "B", line: 1, file: !5, scope: !7)
-!7 = !DINamespace(name: "A", line: 5, file: !1, scope: null)
-!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "bar", line: 6, size: 8, align: 8, file: !5, scope: !6, elements: !2, identifier: "_ZTSN1A1B3barE")
-!9 = !{!10, !14, !17, !21, !25, !26, !27}
-!10 = !DISubprogram(name: "f1", linkageName: "_ZN1A1B2f1Ev", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !11, function: i32 ()* @_ZN1A1B2f1Ev, variables: !2)
-!11 = !DISubroutineType(types: !12)
-!12 = !{!13}
-!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!14 = !DISubprogram(name: "f1", linkageName: "_ZN1A1B2f1Ei", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !15, function: void (i32)* @_ZN1A1B2f1Ei, variables: !2)
-!15 = !DISubroutineType(types: !16)
-!16 = !{null, !13}
-!17 = !DISubprogram(name: "__cxx_global_var_init", line: 20, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !5, scope: !18, type: !19, function: void ()* @__cxx_global_var_init, variables: !2)
-!18 = !DIFile(filename: "foo.cpp", directory: "/tmp")
-!19 = !DISubroutineType(types: !20)
-!20 = !{null}
-!21 = !DISubprogram(name: "func", linkageName: "_Z4funcb", line: 21, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 21, file: !5, scope: !18, type: !22, function: i32 (i1)* @_Z4funcb, variables: !2)
-!22 = !DISubroutineType(types: !23)
-!23 = !{!13, !24}
-!24 = !DIBasicType(tag: DW_TAG_base_type, name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
-!25 = !DISubprogram(name: "__cxx_global_var_init1", line: 44, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 44, file: !5, scope: !18, type: !19, function: void ()* @__cxx_global_var_init1, variables: !2)
-!26 = !DISubprogram(name: "func_fwd", linkageName: "_ZN1A1B8func_fwdEv", line: 47, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 47, file: !5, scope: !6, type: !19, function: void ()* @_ZN1A1B8func_fwdEv, variables: !2)
-!27 = !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_debug_info_namespace.cpp", isLocal: true, isDefinition: true, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !28, type: !29, function: void ()* @_GLOBAL__sub_I_debug_info_namespace.cpp, variables: !2)
-!28 = !DIFile(filename: "debug-info-namespace.cpp", directory: "/tmp")
-!29 = !DISubroutineType(types: !2)
-!30 = !{!31, !32}
-!31 = !DIGlobalVariable(name: "i", linkageName: "_ZN1A1B1iE", line: 20, isLocal: false, isDefinition: true, scope: !6, file: !18, type: !13, variable: i32* @_ZN1A1B1iE)
-!32 = !DIGlobalVariable(name: "var_fwd", linkageName: "_ZN1A1B7var_fwdE", line: 44, isLocal: false, isDefinition: true, scope: !6, file: !18, type: !13, variable: i32* @_ZN1A1B7var_fwdE)
-!33 = !{!34, !35, !36, !37, !40, !41, !42, !43, !44, !45, !47, !48, !49, !51, !54, !55, !56}
-!34 = !DIImportedEntity(tag: DW_TAG_imported_module, line: 15, scope: !7, entity: !6)
-!35 = !DIImportedEntity(tag: DW_TAG_imported_module, line: 18, scope: !0, entity: !7)
-!36 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 19, name: "E", scope: !0, entity: !7)
-!37 = !DIImportedEntity(tag: DW_TAG_imported_module, line: 23, scope: !38, entity: !6)
-!38 = distinct !DILexicalBlock(line: 22, column: 10, file: !5, scope: !39)
-!39 = distinct !DILexicalBlock(line: 22, column: 7, file: !5, scope: !21)
-!40 = !DIImportedEntity(tag: DW_TAG_imported_module, line: 26, scope: !21, entity: !7)
-!41 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 27, scope: !21, entity: !"_ZTSN1A1B3fooE")
-!42 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 28, scope: !21, entity: !"_ZTSN1A1B3barE")
-!43 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 29, scope: !21, entity: !14)
-!44 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 30, scope: !21, entity: !31)
-!45 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 31, scope: !21, entity: !46)
-!46 = !DIDerivedType(tag: DW_TAG_typedef, name: "baz", line: 7, file: !5, scope: !6, baseType: !"_ZTSN1A1B3barE")
-!47 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 32, name: "X", scope: !21, entity: !7)
-!48 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 33, name: "Y", scope: !21, entity: !47)
-!49 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 34, scope: !21, entity: !50)
-!50 = !DIGlobalVariable(name: "var_decl", linkageName: "_ZN1A1B8var_declE", line: 8, isLocal: false, isDefinition: false, scope: !6, file: !18, type: !13)
-!51 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 35, scope: !21, entity: !52)
-!52 = !DISubprogram(name: "func_decl", linkageName: "_ZN1A1B9func_declEv", line: 9, isLocal: false, isDefinition: false, flags: DIFlagPrototyped, isOptimized: false, file: !5, scope: !6, type: !19, variables: !53)
-!53 = !{} ; previously: invalid DW_TAG_base_type
-!54 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 36, scope: !21, entity: !32)
-!55 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 37, scope: !21, entity: !26)
-!56 = !DIImportedEntity(tag: DW_TAG_imported_declaration, line: 42, scope: !7, entity: !31)
-!57 = !{i32 2, !"Dwarf Version", i32 2}
-!58 = !{i32 2, !"Debug Info Version", i32 3}
-!59 = !{!"clang version 3.6.0 "}
-!60 = !DILocation(line: 3, column: 12, scope: !10)
-!61 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 4, arg: 1, scope: !14, file: !18, type: !13)
-!62 = !DIExpression()
-!63 = !DILocation(line: 4, column: 12, scope: !14)
-!64 = !DILocation(line: 4, column: 16, scope: !14)
-!65 = !DILocation(line: 20, column: 12, scope: !17)
-!66 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 21, arg: 1, scope: !21, file: !18, type: !24)
-!67 = !DILocation(line: 21, column: 15, scope: !21)
-!68 = !DILocation(line: 22, column: 7, scope: !21)
-!69 = !DILocation(line: 24, column: 5, scope: !38)
-!70 = !DILocation(line: 38, column: 3, scope: !21)
-!71 = !DILocation(line: 39, column: 1, scope: !21)
-!72 = !DILocation(line: 44, column: 15, scope: !25)
-!73 = !DILocation(line: 47, column: 21, scope: !26)
-!74 = !DILocation(line: 0, scope: !75)
-!75 = !DILexicalBlockFile(discriminator: 0, file: !5, scope: !27)
diff --git a/test/DebugInfo/namespace_function_definition.ll b/test/DebugInfo/namespace_function_definition.ll
deleted file mode 100644
index 44340635484a..000000000000
--- a/test/DebugInfo/namespace_function_definition.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; Generated from clang with the following source:
-; namespace ns {
-; void func() {
-; }
-; }
-
-; CHECK: DW_TAG_namespace
-; CHECK-NEXT: DW_AT_name {{.*}} "ns"
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_low_pc
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_linkage_name {{.*}} "_ZN2ns4funcEv"
-; CHECK: NULL
-; CHECK: NULL
-
-; Function Attrs: nounwind uwtable
-define void @_ZN2ns4funcEv() #0 {
-entry:
- ret void, !dbg !11
-}
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8, !9}
-!llvm.ident = !{!10}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "namespace_function_definition.cpp", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "func", linkageName: "_ZN2ns4funcEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: void ()* @_ZN2ns4funcEv, variables: !2)
-!5 = !DINamespace(name: "ns", line: 1, file: !1, scope: null)
-!6 = !DISubroutineType(types: !7)
-!7 = !{null}
-!8 = !{i32 2, !"Dwarf Version", i32 4}
-!9 = !{i32 1, !"Debug Info Version", i32 3}
-!10 = !{!"clang version 3.5.0 "}
-!11 = !DILocation(line: 3, scope: !4)
diff --git a/test/DebugInfo/namespace_inline_function_definition.ll b/test/DebugInfo/namespace_inline_function_definition.ll
deleted file mode 100644
index c14152065b72..000000000000
--- a/test/DebugInfo/namespace_inline_function_definition.ll
+++ /dev/null
@@ -1,95 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; Generate from clang with the following source. Note that the definition of
-; the inline function follows its use to workaround another bug that should be
-; fixed soon.
-; namespace ns {
-; int func(int i);
-; }
-; extern int x;
-; int main() { return ns::func(x); }
-; int __attribute__((always_inline)) ns::func(int i) { return i * 2; }
-
-; CHECK: DW_TAG_namespace
-; CHECK-NEXT: DW_AT_name {{.*}} "ns"
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_linkage_name {{.*}} "_ZN2ns4funcEi"
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_formal_parameter
-; CHECK: NULL
-; CHECK-NOT: NULL
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} "_ZN2ns4funcEi"
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_formal_parameter
-; CHECK: DW_AT_abstract_origin {{.*}} "i"
-; CHECK: NULL
-; CHECK: NULL
-; CHECK: NULL
-
-@x = external global i32
-
-; Function Attrs: uwtable
-define i32 @main() #0 {
-entry:
- %i.addr.i = alloca i32, align 4
- %retval = alloca i32, align 4
- store i32 0, i32* %retval
- %0 = load i32, i32* @x, align 4, !dbg !16
- store i32 %0, i32* %i.addr.i, align 4
- call void @llvm.dbg.declare(metadata i32* %i.addr.i, metadata !117, metadata !DIExpression()), !dbg !18
- %1 = load i32, i32* %i.addr.i, align 4, !dbg !18
- %mul.i = mul nsw i32 %1, 2, !dbg !18
- ret i32 %mul.i, !dbg !16
-}
-
-; Function Attrs: alwaysinline nounwind uwtable
-define i32 @_ZN2ns4funcEi(i32 %i) #1 {
-entry:
- %i.addr = alloca i32, align 4
- store i32 %i, i32* %i.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !17, metadata !DIExpression()), !dbg !19
- %0 = load i32, i32* %i.addr, align 4, !dbg !19
- %mul = mul nsw i32 %0, 2, !dbg !19
- ret i32 %mul, !dbg !19
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!13, !14}
-!llvm.ident = !{!15}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "namespace_inline_function_definition.cpp", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4, !9}
-!4 = !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
-!5 = !DIFile(filename: "namespace_inline_function_definition.cpp", directory: "/tmp/dbginfo")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "func", linkageName: "_ZN2ns4funcEi", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !10, type: !11, function: i32 (i32)* @_ZN2ns4funcEi, variables: !2)
-!10 = !DINamespace(name: "ns", line: 1, file: !1, scope: null)
-!11 = !DISubroutineType(types: !12)
-!12 = !{!8, !8}
-!13 = !{i32 2, !"Dwarf Version", i32 4}
-!14 = !{i32 2, !"Debug Info Version", i32 3}
-!15 = !{!"clang version 3.5.0 "}
-!16 = !DILocation(line: 5, scope: !4)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 6, arg: 1, scope: !9, file: !5, type: !8)
-
-!117 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 6, arg: 1, scope: !9, file: !5, type: !8)
-
-!18 = !DILocation(line: 6, scope: !9, inlinedAt: !16)
-!19 = !DILocation(line: 6, scope: !9)
diff --git a/test/DebugInfo/nodebug.ll b/test/DebugInfo/nodebug.ll
deleted file mode 100644
index 3ef3119a0c77..000000000000
--- a/test/DebugInfo/nodebug.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; Test that a nodebug function (a function not appearing in the debug info IR
-; metadata subprogram list) with DebugLocs on its IR doesn't cause crashes/does
-; the right thing.
-
-; Build with clang from the following:
-; extern int i;
-; inline __attribute__((always_inline)) void f1() {
-; i = 3;
-; }
-;
-; __attribute__((nodebug)) void f2() {
-; f1();
-; }
-
-; Check that there's only one DW_TAG_subprogram, nothing for the 'f2' function.
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "f1"
-; CHECK-NOT: DW_TAG_subprogram
-
-@i = external global i32
-
-; Function Attrs: uwtable
-define void @_Z2f2v() #0 {
-entry:
- store i32 3, i32* @i, align 4, !dbg !11
- ret void
-}
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8, !9}
-!llvm.ident = !{!10}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "nodebug.cpp", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
-!5 = !DIFile(filename: "nodebug.cpp", directory: "/tmp/dbginfo")
-!6 = !DISubroutineType(types: !7)
-!7 = !{null}
-!8 = !{i32 2, !"Dwarf Version", i32 4}
-!9 = !{i32 2, !"Debug Info Version", i32 3}
-!10 = !{!"clang version 3.5.0 "}
-!11 = !DILocation(line: 3, scope: !4)
diff --git a/test/DebugInfo/piece-verifier.ll b/test/DebugInfo/piece-verifier.ll
deleted file mode 100644
index 9d7efd6a396f..000000000000
--- a/test/DebugInfo/piece-verifier.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: not llvm-as -disable-output < %s 2>&1 | FileCheck %s
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.9.0"
-
-; Function Attrs: nounwind ssp uwtable
-define i32 @foo(i64 %s.coerce0, i32 %s.coerce1) #0 {
-entry:
- call void @llvm.dbg.value(metadata i64 %s.coerce0, i64 0, metadata !20, metadata !24), !dbg !21
- call void @llvm.dbg.value(metadata i32 %s.coerce1, i64 0, metadata !22, metadata !27), !dbg !21
- ret i32 %s.coerce1, !dbg !23
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
-
-attributes #0 = { nounwind ssp uwtable "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!17, !18}
-!llvm.ident = !{!19}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "pieces.c", directory: "")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 (i64, i32)* @foo, variables: !15)
-!5 = !DIFile(filename: "pieces.c", directory: "")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8, !9}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DIDerivedType(tag: DW_TAG_typedef, name: "S", line: 1, file: !1, baseType: !10)
-!10 = !DICompositeType(tag: DW_TAG_structure_type, line: 1, size: 128, align: 64, file: !1, elements: !11)
-!11 = !{!12, !14}
-!12 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 1, size: 64, align: 64, file: !1, scope: !10, baseType: !13)
-!13 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
-!14 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 1, size: 32, align: 32, offset: 64, file: !1, scope: !10, baseType: !8)
-!15 = !{!16}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
-!17 = !{i32 2, !"Dwarf Version", i32 4}
-!18 = !{i32 1, !"Debug Info Version", i32 3}
-!19 = !{!"clang version 3.5 "}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
-!21 = !DILocation(line: 3, scope: !4)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
-!23 = !DILocation(line: 4, scope: !4)
-!24 = !DIExpression(DW_OP_deref, DW_OP_bit_piece, 0, 64)
-!25 = !{}
-; This expression has elements after DW_OP_bit_piece.
-; CHECK: invalid expression
-; CHECK-NEXT: !DIExpression({{[0-9]+}}, 64, 32, {{[0-9]+}})
-; CHECK-NOT: invalid expression
-!27 = !DIExpression(DW_OP_bit_piece, 64, 32, DW_OP_deref)
diff --git a/test/DebugInfo/restrict.ll b/test/DebugInfo/restrict.ll
deleted file mode 100644
index 71d94f60c965..000000000000
--- a/test/DebugInfo/restrict.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -dwarf-version=2 -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=CHECK --check-prefix=V2 %s
-; RUN: %llc_dwarf -dwarf-version=3 -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=CHECK --check-prefix=V3 %s
-
-; CHECK: DW_AT_name {{.*}} "dst"
-; V2: DW_AT_type {{.*}} {[[PTR:0x.*]]}
-; V3: DW_AT_type {{.*}} {[[RESTRICT:0x.*]]}
-; V3: [[RESTRICT]]: {{.*}}DW_TAG_restrict_type
-; V3-NEXT: DW_AT_type {{.*}} {[[PTR:0x.*]]}
-; CHECK: [[PTR]]: {{.*}}DW_TAG_pointer_type
-; CHECK-NOT: DW_AT_type
-
-; Generated with clang from:
-; void foo(void* __restrict__ dst) {
-; }
-
-
-; Function Attrs: nounwind uwtable
-define void @_Z3fooPv(i8* noalias %dst) #0 {
-entry:
- %dst.addr = alloca i8*, align 8
- store i8* %dst, i8** %dst.addr, align 8
- call void @llvm.dbg.declare(metadata i8** %dst.addr, metadata !13, metadata !DIExpression()), !dbg !14
- ret void, !dbg !15
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!10, !11}
-!llvm.ident = !{!12}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "restrict.c", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "foo", linkageName: "_Z3fooPv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i8*)* @_Z3fooPv, variables: !2)
-!5 = !DIFile(filename: "restrict.c", directory: "/tmp/dbginfo")
-!6 = !DISubroutineType(types: !7)
-!7 = !{null, !8}
-!8 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !9)
-!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
-!10 = !{i32 2, !"Dwarf Version", i32 4}
-!11 = !{i32 1, !"Debug Info Version", i32 3}
-!12 = !{!"clang version 3.5.0 "}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "dst", line: 1, arg: 1, scope: !4, file: !5, type: !8)
-!14 = !DILocation(line: 1, scope: !4)
-!15 = !DILocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/skeletoncu.ll b/test/DebugInfo/skeletoncu.ll
new file mode 100644
index 000000000000..d70e9333336a
--- /dev/null
+++ b/test/DebugInfo/skeletoncu.ll
@@ -0,0 +1,17 @@
+; RUN: %llc_dwarf %s -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; XFAIL: hexagon
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_GNU_dwo_id {{.*}}abcd
+; CHECK: DW_AT_GNU_dwo_name {{.*}}"my.dwo"
+; REQUIRES: default_triple
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM", isOptimized: false, runtimeVersion: 2, splitDebugFilename: "my.dwo", emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, dwoId: 43981)
+!1 = !DIFile(filename: "<stdin>", directory: "/")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+
diff --git a/test/DebugInfo/sugared-constants.ll b/test/DebugInfo/sugared-constants.ll
deleted file mode 100644
index 421fe1dcd85c..000000000000
--- a/test/DebugInfo/sugared-constants.ll
+++ /dev/null
@@ -1,82 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-; Use correct signedness when emitting constants of derived (sugared) types.
-
-; Test compiled to IR from clang with -O1 and the following source:
-
-; void func(int);
-; void func(unsigned);
-; void func(char16_t);
-; int main() {
-; const int i = 42;
-; func(i);
-; const unsigned j = 117;
-; func(j);
-; char16_t c = 7;
-; func(c);
-; }
-
-; CHECK: DW_AT_const_value [DW_FORM_sdata] (42)
-; CHECK: DW_AT_const_value [DW_FORM_udata] (117)
-; CHECK: DW_AT_const_value [DW_FORM_udata] (7)
-
-; Function Attrs: uwtable
-define i32 @main() #0 {
-entry:
- tail call void @llvm.dbg.value(metadata i32 42, i64 0, metadata !10, metadata !DIExpression()), !dbg !21
- tail call void @_Z4funci(i32 42), !dbg !22
- tail call void @llvm.dbg.value(metadata i32 117, i64 0, metadata !12, metadata !DIExpression()), !dbg !24
- tail call void @_Z4funcj(i32 117), !dbg !25
- tail call void @llvm.dbg.value(metadata i16 7, i64 0, metadata !15, metadata !DIExpression()), !dbg !27
- tail call void @_Z4funcDs(i16 zeroext 7), !dbg !28
- ret i32 0, !dbg !29
-}
-
-declare void @_Z4funci(i32) #1
-
-declare void @_Z4funcj(i32) #1
-
-declare void @_Z4funcDs(i16 zeroext) #1
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!17, !18}
-!llvm.ident = !{!19}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "const.cpp", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !9)
-!5 = !DIFile(filename: "const.cpp", directory: "/tmp/dbginfo")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !{!10, !12, !15}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 5, scope: !4, file: !5, type: !11)
-!11 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !8)
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 7, scope: !4, file: !5, type: !13)
-!13 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !14)
-!14 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 9, scope: !4, file: !5, type: !16)
-!16 = !DIBasicType(tag: DW_TAG_base_type, name: "char16_t", size: 16, align: 16, encoding: 16)
-!17 = !{i32 2, !"Dwarf Version", i32 4}
-!18 = !{i32 1, !"Debug Info Version", i32 3}
-!19 = !{!"clang version 3.5.0 "}
-!20 = !{i32 42}
-!21 = !DILocation(line: 5, scope: !4)
-!22 = !DILocation(line: 6, scope: !4)
-!23 = !{i32 117}
-!24 = !DILocation(line: 7, scope: !4)
-!25 = !DILocation(line: 8, scope: !4)
-!26 = !{i16 7}
-!27 = !DILocation(line: 9, scope: !4)
-!28 = !DILocation(line: 10, scope: !4)
-!29 = !DILocation(line: 11, scope: !4)
diff --git a/test/DebugInfo/template-recursive-void.ll b/test/DebugInfo/template-recursive-void.ll
deleted file mode 100644
index 30eaee3f75d3..000000000000
--- a/test/DebugInfo/template-recursive-void.ll
+++ /dev/null
@@ -1,61 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
-
-; This was pulled from clang's debug-info-template-recursive.cpp test.
-; class base { };
-
-; template <class T> class foo : public base {
-; void operator=(const foo r) { }
-; };
-
-; class bar : public foo<void> { };
-; bar filters;
-
-; CHECK: DW_TAG_template_type_parameter [{{.*}}]
-; CHECK-NEXT: DW_AT_name{{.*}}"T"
-; CHECK-NOT: DW_AT_type
-; CHECK: NULL
-
-%class.bar = type { i8 }
-
-@filters = global %class.bar zeroinitializer, align 1
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!36, !37}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 187958) (llvm/trunk 187964)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
-!1 = !DIFile(filename: "debug-info-template-recursive.cpp", directory: "/usr/local/google/home/echristo/tmp")
-!2 = !{}
-!3 = !{!4}
-!4 = !DIGlobalVariable(name: "filters", line: 10, isLocal: false, isDefinition: true, scope: null, file: !5, type: !6, variable: %class.bar* @filters)
-!5 = !DIFile(filename: "debug-info-template-recursive.cpp", directory: "/usr/local/google/home/echristo/tmp")
-!6 = !DICompositeType(tag: DW_TAG_class_type, name: "bar", line: 9, size: 8, align: 8, file: !1, elements: !7)
-!7 = !{!8, !31}
-!8 = !DIDerivedType(tag: DW_TAG_inheritance, scope: !6, baseType: !9)
-!9 = !DICompositeType(tag: DW_TAG_class_type, name: "foo<void>", line: 5, size: 8, align: 8, file: !1, elements: !10, templateParams: !29)
-!10 = !{!11, !19, !25}
-!11 = !DIDerivedType(tag: DW_TAG_inheritance, scope: !9, baseType: !12)
-!12 = !DICompositeType(tag: DW_TAG_class_type, name: "base", line: 3, size: 8, align: 8, file: !1, elements: !13)
-!13 = !{!14}
-!14 = !DISubprogram(name: "base", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !12, type: !15)
-!15 = !DISubroutineType(types: !16)
-!16 = !{null, !17}
-!17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !12)
-!19 = !DISubprogram(name: "operator=", linkageName: "_ZN3fooIvEaSES0_", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrivate | DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !9, type: !20)
-!20 = !DISubroutineType(types: !21)
-!21 = !{null, !22, !23}
-!22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !9)
-!23 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9)
-!25 = !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !9, type: !26)
-!26 = !DISubroutineType(types: !27)
-!27 = !{null, !22}
-!29 = !{!30}
-!30 = !DITemplateTypeParameter(name: "T", type: null)
-!31 = !DISubprogram(name: "bar", line: 9, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !1, scope: !6, type: !32)
-!32 = !DISubroutineType(types: !33)
-!33 = !{null, !34}
-!34 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !6)
-!36 = !{i32 2, !"Dwarf Version", i32 3}
-!37 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/tu-composite.ll b/test/DebugInfo/tu-composite.ll
deleted file mode 100644
index 47fb8bc31482..000000000000
--- a/test/DebugInfo/tu-composite.ll
+++ /dev/null
@@ -1,184 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -filetype=obj -O0 < %s > %t
-; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
-; CHECK: [[TYPE:.*]]: DW_TAG_structure_type
-; Make sure we correctly handle containing type of a struct being a type identifier.
-; CHECK-NEXT: DW_AT_containing_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE]]})
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "C")
-
-; Make sure we correctly handle context of a subprogram being a type identifier.
-; CHECK: [[SP:.*]]: DW_TAG_subprogram
-; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "foo")
-; Make sure we correctly handle containing type of a subprogram being a type identifier.
-; CHECK: DW_AT_containing_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE]]})
-; CHECK: DW_TAG_formal_parameter
-; CHECK: NULL
-; CHECK: NULL
-
-; CHECK: [[TYPE2:.*]]: DW_TAG_structure_type
-; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "bar")
-; CHECK: DW_TAG_structure_type
-; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "D")
-; CHECK: DW_TAG_member
-; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "a")
-; Make sure we correctly handle context of a struct being a type identifier.
-; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "Nested")
-; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "Nested2")
-; CHECK-NEXT: DW_AT_declaration [DW_FORM_flag] (0x01)
-; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "virt<bar>")
-; Make sure we correctly handle type of a template_type being a type identifier.
-; CHECK: DW_TAG_template_type_parameter
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
-; CHECK-NEXT: DW_AT_name [DW_FORM_strp] {{.*}}= "T")
-; Make sure we correctly handle derived-from of a typedef being a type identifier.
-; CHECK: DW_TAG_typedef
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
-; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "baz2")
-; Make sure we correctly handle derived-from of a pointer type being a type identifier.
-; CHECK: DW_TAG_pointer_type
-; CHECK: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE]]})
-; CHECK: DW_TAG_typedef
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
-; CHECK: DW_AT_name [DW_FORM_strp] {{.*}}= "baz")
-; Make sure we correctly handle derived-from of an array type being a type identifier.
-; CHECK: DW_TAG_array_type
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE2]]})
-; IR generated from clang -g with the following source:
-; struct C {
-; virtual void foo();
-; };
-; void C::foo() {
-; }
-;
-; struct bar { };
-; typedef bar baz;
-; struct D {
-; typedef bar baz2;
-; static int a;
-; struct Nested { };
-; struct Nested2 { };
-; template <typename T>
-; struct virt {
-; T* values;
-; };
-; };
-; void test() {
-; baz B;
-; bar A[3];
-; D::baz2 B2;
-; D::Nested e;
-; D::Nested2 *p;
-; D::virt<bar> t;
-; }
-
-%struct.C = type { i32 (...)** }
-%struct.bar = type { i8 }
-%"struct.D::Nested" = type { i8 }
-%"struct.D::Nested2" = type { i8 }
-%"struct.D::virt" = type { %struct.bar* }
-
-@_ZTV1C = unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1C to i8*), i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)]
-@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
-@_ZTS1C = constant [3 x i8] c"1C\00"
-@_ZTI1C = unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1C, i32 0, i32 0) }
-
-; Function Attrs: nounwind ssp uwtable
-define void @_ZN1C3fooEv(%struct.C* %this) unnamed_addr #0 align 2 {
-entry:
- %this.addr = alloca %struct.C*, align 8
- store %struct.C* %this, %struct.C** %this.addr, align 8
- call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !36, metadata !DIExpression()), !dbg !38
- %this1 = load %struct.C*, %struct.C** %this.addr
- ret void, !dbg !39
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-; Function Attrs: nounwind ssp uwtable
-define void @_Z4testv() #0 {
-entry:
- %B = alloca %struct.bar, align 1
- %A = alloca [3 x %struct.bar], align 1
- %B2 = alloca %struct.bar, align 1
- %e = alloca %"struct.D::Nested", align 1
- %p = alloca %"struct.D::Nested2"*, align 8
- %t = alloca %"struct.D::virt", align 8
- call void @llvm.dbg.declare(metadata %struct.bar* %B, metadata !40, metadata !DIExpression()), !dbg !42
- call void @llvm.dbg.declare(metadata [3 x %struct.bar]* %A, metadata !43, metadata !DIExpression()), !dbg !47
- call void @llvm.dbg.declare(metadata %struct.bar* %B2, metadata !48, metadata !DIExpression()), !dbg !50
- call void @llvm.dbg.declare(metadata %"struct.D::Nested"* %e, metadata !51, metadata !DIExpression()), !dbg !52
- call void @llvm.dbg.declare(metadata %"struct.D::Nested2"** %p, metadata !53, metadata !DIExpression()), !dbg !55
- call void @llvm.dbg.declare(metadata %"struct.D::virt"* %t, metadata !56, metadata !DIExpression()), !dbg !57
- ret void, !dbg !58
-}
-
-attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!35, !59}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !30, globals: !2, imports: !2)
-!1 = !DIFile(filename: "tmp.cpp", directory: ".")
-!2 = !{}
-!3 = !{!4, !18, !19, !22, !23, !24}
-!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 1, size: 64, align: 64, file: !1, elements: !5, vtableHolder: !"_ZTS1C", identifier: "_ZTS1C")
-!5 = !{!6, !13}
-!6 = !DIDerivedType(tag: DW_TAG_member, name: "_vptr$C", size: 64, flags: DIFlagArtificial, file: !1, scope: !7, baseType: !8)
-!7 = !DIFile(filename: "tmp.cpp", directory: ".")
-!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, baseType: !9)
-!9 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "__vtbl_ptr_type", size: 64, baseType: !10)
-!10 = !DISubroutineType(types: !11)
-!11 = !{!12}
-!12 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!13 = !DISubprogram(name: "foo", linkageName: "_ZN1C3fooEv", line: 2, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !"_ZTS1C", type: !14, containingType: !"_ZTS1C")
-!14 = !DISubroutineType(types: !15)
-!15 = !{null, !16}
-!16 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1C")
-!18 = !DICompositeType(tag: DW_TAG_structure_type, name: "bar", line: 7, size: 8, align: 8, file: !1, elements: !2, identifier: "_ZTS3bar")
-!19 = !DICompositeType(tag: DW_TAG_structure_type, name: "D", line: 9, size: 8, align: 8, file: !1, elements: !20, identifier: "_ZTS1D")
-!20 = !{!21}
-!21 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 11, flags: DIFlagStaticMember, file: !1, scope: !"_ZTS1D", baseType: !12)
-!22 = !DICompositeType(tag: DW_TAG_structure_type, name: "Nested", line: 12, size: 8, align: 8, file: !1, scope: !"_ZTS1D", elements: !2, identifier: "_ZTSN1D6NestedE")
-!23 = !DICompositeType(tag: DW_TAG_structure_type, name: "Nested2", line: 13, flags: DIFlagFwdDecl, file: !1, scope: !"_ZTS1D", identifier: "_ZTSN1D7Nested2E")
-!24 = !DICompositeType(tag: DW_TAG_structure_type, name: "virt<bar>", line: 15, size: 64, align: 64, file: !1, scope: !"_ZTS1D", elements: !25, templateParams: !28, identifier: "_ZTSN1D4virtI3barEE")
-!25 = !{!26}
-!26 = !DIDerivedType(tag: DW_TAG_member, name: "values", line: 16, size: 64, align: 64, file: !1, scope: !"_ZTSN1D4virtI3barEE", baseType: !27)
-!27 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS3bar")
-!28 = !{!29}
-!29 = !DITemplateTypeParameter(name: "T", type: !"_ZTS3bar")
-!30 = !{!31, !32}
-!31 = !DISubprogram(name: "foo", linkageName: "_ZN1C3fooEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: null, type: !14, function: void (%struct.C*)* @_ZN1C3fooEv, declaration: !13, variables: !2)
-!32 = !DISubprogram(name: "test", linkageName: "_Z4testv", line: 20, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !1, scope: !7, type: !33, function: void ()* @_Z4testv, variables: !2)
-!33 = !DISubroutineType(types: !34)
-!34 = !{null}
-!35 = !{i32 2, !"Dwarf Version", i32 2}
-!36 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, type: !37)
-!37 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1C")
-!38 = !DILocation(line: 0, scope: !31)
-!39 = !DILocation(line: 5, scope: !31)
-!40 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "B", line: 21, scope: !32, file: !7, type: !41)
-!41 = !DIDerivedType(tag: DW_TAG_typedef, name: "baz", line: 8, file: !1, baseType: !"_ZTS3bar")
-!42 = !DILocation(line: 21, scope: !32)
-!43 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "A", line: 22, scope: !32, file: !7, type: !44)
-!44 = !DICompositeType(tag: DW_TAG_array_type, size: 24, align: 8, baseType: !"_ZTS3bar", elements: !45)
-!45 = !{!46}
-!46 = !DISubrange(count: 3)
-!47 = !DILocation(line: 22, scope: !32)
-!48 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "B2", line: 23, scope: !32, file: !7, type: !49)
-!49 = !DIDerivedType(tag: DW_TAG_typedef, name: "baz2", line: 10, file: !1, scope: !"_ZTS1D", baseType: !"_ZTS3bar")
-!50 = !DILocation(line: 23, scope: !32)
-!51 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 24, scope: !32, file: !7, type: !22)
-!52 = !DILocation(line: 24, scope: !32)
-!53 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "p", line: 25, scope: !32, file: !7, type: !54)
-!54 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTSN1D7Nested2E")
-!55 = !DILocation(line: 25, scope: !32)
-!56 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 26, scope: !32, file: !7, type: !24)
-!57 = !DILocation(line: 26, scope: !32)
-!58 = !DILocation(line: 27, scope: !32)
-!59 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/tu-member-pointer.ll b/test/DebugInfo/tu-member-pointer.ll
deleted file mode 100644
index a46c41313cf5..000000000000
--- a/test/DebugInfo/tu-member-pointer.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -filetype=obj -O0 < %s > %t
-; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
-; CHECK: DW_TAG_ptr_to_member_type
-; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE:0x[0-9a-f]+]]})
-; CHECK: [[TYPE]]: DW_TAG_base_type
-; IR generated from clang -g with the following source:
-; struct Foo {
-; int e;
-; };
-; int Foo:*x = 0;
-
-@x = global i64 -1, align 8
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!10, !11}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !5, imports: !2)
-!1 = !DIFile(filename: "foo.cpp", directory: ".")
-!2 = !{}
-!3 = !{!4}
-!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", line: 1, flags: DIFlagFwdDecl, file: !1, identifier: "_ZTS3Foo")
-!5 = !{!6}
-!6 = !DIGlobalVariable(name: "x", line: 4, isLocal: false, isDefinition: true, scope: null, file: !7, type: !8, variable: i64* @x)
-!7 = !DIFile(filename: "foo.cpp", directory: ".")
-!8 = !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: !9, extraData: !"_ZTS3Foo")
-!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !{i32 2, !"Dwarf Version", i32 2}
-!11 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/two-cus-from-same-file.ll b/test/DebugInfo/two-cus-from-same-file.ll
deleted file mode 100644
index 9dbd64a3fb92..000000000000
--- a/test/DebugInfo/two-cus-from-same-file.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-; For http://llvm.org/bugs/show_bug.cgi?id=12942
-; There are two CUs coming from /tmp/foo.c in this module. Make sure it doesn't
-; blow llc up and produces something reasonable.
-;
-
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf %s -o %t -filetype=obj -O0
-; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
-
-; ModuleID = 'test.bc'
-
-@str = private unnamed_addr constant [4 x i8] c"FOO\00"
-@str1 = private unnamed_addr constant [6 x i8] c"Main!\00"
-
-define void @foo() nounwind {
-entry:
- %puts = tail call i32 @puts(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @str, i32 0, i32 0)), !dbg !23
- ret void, !dbg !25
-}
-
-declare i32 @puts(i8* nocapture) nounwind
-
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
-entry:
- tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !21, metadata !DIExpression()), !dbg !26
- tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !22, metadata !DIExpression()), !dbg !27
- %puts = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @str1, i32 0, i32 0)), !dbg !28
- tail call void @foo() nounwind, !dbg !30
- ret i32 0, !dbg !31
-}
-
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0, !9}
-!llvm.module.flags = !{!33}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 156513)", isOptimized: true, emissionKind: 1, file: !32, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
-!1 = !{}
-!3 = !{!5}
-!5 = !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !32, scope: !6, type: !7, function: void ()* @foo, variables: !1)
-!6 = !DIFile(filename: "foo.c", directory: "/tmp")
-!7 = !DISubroutineType(types: !8)
-!8 = !{null}
-!9 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 156513)", isOptimized: true, emissionKind: 1, file: !32, enums: !1, retainedTypes: !1, subprograms: !10, globals: !1, imports: !1)
-!10 = !{!12}
-!12 = !DISubprogram(name: "main", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !32, scope: !6, type: !13, function: i32 (i32, i8**)* @main, variables: !19)
-!13 = !DISubroutineType(types: !14)
-!14 = !{!15, !15, !16}
-!15 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!16 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !17)
-!17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !18)
-!18 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!19 = !{!21, !22}
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 11, arg: 1, scope: !12, file: !6, type: !15)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 11, arg: 2, scope: !12, file: !6, type: !16)
-!23 = !DILocation(line: 6, column: 3, scope: !24)
-!24 = distinct !DILexicalBlock(line: 5, column: 16, file: !32, scope: !5)
-!25 = !DILocation(line: 7, column: 1, scope: !24)
-!26 = !DILocation(line: 11, column: 14, scope: !12)
-!27 = !DILocation(line: 11, column: 26, scope: !12)
-!28 = !DILocation(line: 12, column: 3, scope: !29)
-!29 = distinct !DILexicalBlock(line: 11, column: 34, file: !32, scope: !12)
-!30 = !DILocation(line: 13, column: 3, scope: !29)
-!31 = !DILocation(line: 14, column: 3, scope: !29)
-!32 = !DIFile(filename: "foo.c", directory: "/tmp")
-
-; This test is simple to be cross platform (many targets don't yet have
-; sufficiently good DWARF emission and/or dumping)
-; CHECK: {{DW_TAG_compile_unit}}
-; CHECK: {{foo\.c}}
-
-!33 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/typedef.ll b/test/DebugInfo/typedef.ll
deleted file mode 100644
index 591995e9b256..000000000000
--- a/test/DebugInfo/typedef.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
-
-; From source:
-; typedef void x;
-; x *y;
-
-; Check that a typedef with no DW_AT_type is produced. The absence of a type is used to imply the 'void' type.
-
-; CHECK: DW_TAG_typedef
-; CHECK-NOT: DW_AT_type
-; CHECK: {{DW_TAG|NULL}}
-
-@y = global i8* null, align 8
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8, !9}
-!llvm.ident = !{!10}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
-!1 = !DIFile(filename: "typedef.cpp", directory: "/tmp/dbginfo")
-!2 = !{}
-!3 = !{!4}
-!4 = !DIGlobalVariable(name: "y", line: 2, isLocal: false, isDefinition: true, scope: null, file: !5, type: !6, variable: i8** @y)
-!5 = !DIFile(filename: "typedef.cpp", directory: "/tmp/dbginfo")
-!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !7)
-!7 = !DIDerivedType(tag: DW_TAG_typedef, name: "x", line: 1, file: !1, baseType: null)
-!8 = !{i32 2, !"Dwarf Version", i32 4}
-!9 = !{i32 1, !"Debug Info Version", i32 3}
-!10 = !{!"clang version 3.5.0 "}
-
diff --git a/test/DebugInfo/unconditional-branch.ll b/test/DebugInfo/unconditional-branch.ll
deleted file mode 100644
index 220a0e3dbe30..000000000000
--- a/test/DebugInfo/unconditional-branch.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; REQUIRES: object-emission
-; PR 19261
-
-; RUN: %llc_dwarf -fast-isel=false -O0 -filetype=obj %s -o %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
-
-; CHECK: {{0x[0-9a-f]+}} 1 0 1 0 0 is_stmt
-; CHECK: {{0x[0-9a-f]+}} 2 0 1 0 0 is_stmt
-; CHECK: {{0x[0-9a-f]+}} 4 0 1 0 0 is_stmt
-
-; IR generated from clang -O0 -g with the following source:
-;void foo(int i){
-; switch(i){
-; default:
-; break;
-; }
-; return;
-;}
-
-; Function Attrs: nounwind
-define void @foo(i32 %i) #0 {
-entry:
- %i.addr = alloca i32, align 4
- store i32 %i, i32* %i.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !12, metadata !DIExpression()), !dbg !13
- %0 = load i32, i32* %i.addr, align 4, !dbg !14
- switch i32 %0, label %sw.default [
- ], !dbg !14
-
-sw.epilog: ; preds = %sw.default
- ret void, !dbg !17
-
-sw.default: ; preds = %entry
- br label %sw.epilog, !dbg !15
-
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!9, !10}
-!llvm.ident = !{!11}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (204712)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "test.c", directory: "D:\5Cwork\5CEPRs\5C396363")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32)* @foo, variables: !2)
-!5 = !DIFile(filename: "test.c", directory: "D:CworkCEPRsC396363")
-!6 = !DISubroutineType(types: !7)
-!7 = !{null, !8}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !{i32 2, !"Dwarf Version", i32 4}
-!10 = !{i32 1, !"Debug Info Version", i32 3}
-!11 = !{!"clang version 3.5.0 (204712)"}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !8)
-!13 = !DILocation(line: 1, scope: !4)
-!14 = !DILocation(line: 2, scope: !4)
-!15 = !DILocation(line: 4, scope: !16)
-!16 = distinct !DILexicalBlock(line: 2, column: 0, file: !1, scope: !4)
-!17 = !DILocation(line: 6, scope: !4)
diff --git a/test/DebugInfo/varargs.ll b/test/DebugInfo/varargs.ll
deleted file mode 100644
index 93dbfa1e0ace..000000000000
--- a/test/DebugInfo/varargs.ll
+++ /dev/null
@@ -1,101 +0,0 @@
-; RUN: %llc_dwarf -O0 -filetype=obj -o %t.o %s
-; RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s
-; REQUIRES: object-emission
-;
-; Test debug info for variadic function arguments.
-; Created from tools/clang/tests/CodeGenCXX/debug-info-varargs.cpp
-;
-; The ... parameter of variadic should be emitted as
-; DW_TAG_unspecified_parameters.
-;
-; Normal variadic function.
-; void b(int c, ...);
-;
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "a"
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_unspecified_parameters
-;
-; CHECK: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "b"
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_variable
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_variable
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_unspecified_parameters
-;
-; Variadic C++ member function.
-; struct A { void a(int c, ...); }
-;
-; Variadic function pointer.
-; void (*fptr)(int, ...);
-;
-; CHECK: DW_TAG_subroutine_type
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_formal_parameter
-; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_unspecified_parameters
-;
-; ModuleID = 'llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp'
-
-%struct.A = type { i8 }
-
-; Function Attrs: nounwind ssp uwtable
-define void @_Z1biz(i32 %c, ...) #0 {
- %1 = alloca i32, align 4
- %a = alloca %struct.A, align 1
- %fptr = alloca void (i32, ...)*, align 8
- store i32 %c, i32* %1, align 4
- call void @llvm.dbg.declare(metadata i32* %1, metadata !21, metadata !DIExpression()), !dbg !22
- call void @llvm.dbg.declare(metadata %struct.A* %a, metadata !23, metadata !DIExpression()), !dbg !24
- call void @llvm.dbg.declare(metadata void (i32, ...)** %fptr, metadata !25, metadata !DIExpression()), !dbg !27
- store void (i32, ...)* @_Z1biz, void (i32, ...)** %fptr, align 8, !dbg !27
- ret void, !dbg !28
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-attributes #0 = { nounwind ssp uwtable }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!18, !19}
-!llvm.ident = !{!20}
-
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !2, imports: !2)
-!1 = !DIFile(filename: "llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp", directory: "radar/13690847")
-!2 = !{}
-!3 = !{!4}
-!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "A", line: 3, size: 8, align: 8, file: !1, elements: !5, identifier: "_ZTS1A")
-!5 = !{!6}
-!6 = !DISubprogram(name: "a", linkageName: "_ZN1A1aEiz", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !7)
-!7 = !DISubroutineType(types: !8)
-!8 = !{null, !9, !10, null}
-!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
-!10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!13 = !{!14}
-!14 = !DISubprogram(name: "b", linkageName: "_Z1biz", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !1, scope: !15, type: !16, function: void (i32, ...)* @_Z1biz, variables: !2)
-!15 = !DIFile(filename: "llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp", directory: "radar/13690847")
-!16 = !DISubroutineType(types: !17)
-!17 = !{null, !10, null}
-!18 = !{i32 2, !"Dwarf Version", i32 2}
-!19 = !{i32 1, !"Debug Info Version", i32 3}
-!20 = !{!"clang version 3.5 "}
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 13, arg: 1, scope: !14, file: !15, type: !10)
-!22 = !DILocation(line: 13, scope: !14)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 16, scope: !14, file: !15, type: !4)
-!24 = !DILocation(line: 16, scope: !14)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "fptr", line: 18, scope: !14, file: !15, type: !26)
-!26 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !16)
-!27 = !DILocation(line: 18, scope: !14)
-!28 = !DILocation(line: 22, scope: !14)
diff --git a/test/DebugInfo/version.ll b/test/DebugInfo/version.ll
deleted file mode 100644
index f18a3e2871e1..000000000000
--- a/test/DebugInfo/version.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; REQUIRES: object-emission
-
-; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
-
-; Make sure we are generating DWARF version 3 when module flag says so.
-; CHECK: Compile Unit: length = {{.*}} version = 0x0003
-
-define i32 @main() #0 {
-entry:
- %retval = alloca i32, align 4
- store i32 0, i32* %retval
- ret i32 0, !dbg !10
-}
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!9, !11}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 185475)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "CodeGen/dwarf-version.c", directory: "test")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
-!5 = !DIFile(filename: "CodeGen/dwarf-version.c", directory: "test")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !{i32 2, !"Dwarf Version", i32 3}
-!10 = !DILocation(line: 7, scope: !4)
-!11 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Examples/Kaleidoscope/Chapter3.test b/test/Examples/Kaleidoscope/Chapter3.test
new file mode 100644
index 000000000000..b9c8ba6fad8a
--- /dev/null
+++ b/test/Examples/Kaleidoscope/Chapter3.test
@@ -0,0 +1,17 @@
+# RUN: Kaleidoscope-Ch3 < %s 2>&1 | FileCheck %s
+
+# Test basic parsing and IR generation.
+def foo(x) x + 1;
+foo(1);
+
+# CHECK: define double @foo(double %x) {
+# CHECK-NEXT: entry:
+# CHECK-NEXT: %addtmp = fadd double %x, 1.000000e+00
+# CHECK-NEXT: ret double %addtmp
+# CHECK-NEXT: }
+
+# CHECK: define double @__anon_expr() {
+# CHECK-NEXT: entry:
+# CHECK-NEXT: %calltmp = call double @foo(double 1.000000e+00)
+# CHECK-NEXT: ret double %calltmp
+# CHECK-NEXT: }
diff --git a/test/Examples/Kaleidoscope/Chapter4.test b/test/Examples/Kaleidoscope/Chapter4.test
new file mode 100644
index 000000000000..5fd0e42c9a6d
--- /dev/null
+++ b/test/Examples/Kaleidoscope/Chapter4.test
@@ -0,0 +1,17 @@
+# RUN: Kaleidoscope-Ch4 < %s 2>&1 | FileCheck %s
+
+# Test basic definition, binding, and execution.
+def foo(x) x + 1;
+def bar(x) foo(2 * x);
+bar(2);
+# CHECK: Evaluated to 5.000000
+
+# Test redefinition.
+def foo(x) x + 2;
+foo(2);
+# CHECK: Evaluated to 4.000000
+
+# Verify that 'bar' still calls the original 'foo'.
+bar(2);
+# CHECK: Evaluated to 5.000000
+
diff --git a/test/Examples/Kaleidoscope/Chapter5.test b/test/Examples/Kaleidoscope/Chapter5.test
new file mode 100644
index 000000000000..1ad902378edb
--- /dev/null
+++ b/test/Examples/Kaleidoscope/Chapter5.test
@@ -0,0 +1,19 @@
+# RUN: Kaleidoscope-Ch5 < %s 2>&1 | FileCheck %s
+
+# Test 'if' expression.
+def foo(x) if x < 10 then 0 else 1;
+foo(9);
+foo(11);
+# CHECK: Evaluated to 0.000000
+# CHECK: Evaluated to 1.000000
+
+# Test 'for' expression.
+extern printd(x);
+for i = 1, i < 5, 1.0 in
+ printd(i);
+# CHECK: 1.0
+# CHECK: 2.0
+# CHECK: 3.0
+# CHECK: 4.0
+# CHECK: 5.0
+# CHECK: Evaluated to 0.000000 \ No newline at end of file
diff --git a/test/Examples/Kaleidoscope/Chapter6.test b/test/Examples/Kaleidoscope/Chapter6.test
new file mode 100644
index 000000000000..cbdd01f52683
--- /dev/null
+++ b/test/Examples/Kaleidoscope/Chapter6.test
@@ -0,0 +1,15 @@
+# RUN: Kaleidoscope-Ch6 < %s 2>&1 | FileCheck %s
+
+# Test unary operator definition.
+def unary-(x) 0 - x;
+1 + (-1);
+# CHECK: Evaluated to 0.000000
+
+# Test binary operator definition.
+def binary> 10 (lhs rhs) rhs < lhs;
+def foo(x) if x > 10 then 0 else 1;
+foo(9);
+foo(11);
+# CHECK: Evaluated to 1.000000
+# CHECK: Evaluated to 0.000000
+
diff --git a/test/Examples/Kaleidoscope/Chapter7.test b/test/Examples/Kaleidoscope/Chapter7.test
new file mode 100644
index 000000000000..4843ca703aed
--- /dev/null
+++ b/test/Examples/Kaleidoscope/Chapter7.test
@@ -0,0 +1,15 @@
+# RUN: Kaleidoscope-Ch7 < %s 2>&1 | FileCheck %s
+
+# Sequence operator and iterative fibonacci function to test user defined vars.
+def binary : 1 (x y) y;
+
+def fibi(x)
+ var a = 1, b = 1, c in
+ (for i = 3, i < x in
+ c = a + b :
+ a = b :
+ b = c) :
+ b;
+
+fibi(10);
+# CHECK: Evaluated to 55.000000
diff --git a/test/Examples/lit.local.cfg b/test/Examples/lit.local.cfg
new file mode 100644
index 000000000000..462e3dc5d11d
--- /dev/null
+++ b/test/Examples/lit.local.cfg
@@ -0,0 +1 @@
+config.unsupported = True
diff --git a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
index cd4834b3f124..27c13e75e938 100644
--- a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
+++ b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -relocation-model=pic -code-model=large %s
-; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, aarch64, arm, asan, msan
+; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, aarch64, arm
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/MCJIT/eh-sm-pic.ll b/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
index 24d8b2ceb4f6..6233faa8725c 100644
--- a/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -relocation-model=pic -code-model=small %s
-; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, darwin, aarch64, arm, asan, msan
+; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, darwin, aarch64, arm
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/MCJIT/eh.ll b/test/ExecutionEngine/MCJIT/eh.ll
index b301b64767c1..ed057e14512f 100644
--- a/test/ExecutionEngine/MCJIT/eh.ll
+++ b/test/ExecutionEngine/MCJIT/eh.ll
@@ -1,5 +1,5 @@
; RUN: %lli %s
-; XFAIL: arm, cygwin, win32, mingw, asan, msan
+; XFAIL: arm, cygwin, win32, mingw
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
index 50ed321a0d62..5c15ba4f15a8 100644
--- a/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
+++ b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
@@ -1,5 +1,5 @@
; RUN: %lli -extra-module=%p/Inputs/multi-module-eh-b.ll %s
-; XFAIL: arm, cygwin, win32, mingw, asan, msan
+; XFAIL: arm, cygwin, win32, mingw
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/OrcLazy/global_aliases.ll b/test/ExecutionEngine/OrcLazy/global_aliases.ll
new file mode 100644
index 000000000000..61fde4bebf8b
--- /dev/null
+++ b/test/ExecutionEngine/OrcLazy/global_aliases.ll
@@ -0,0 +1,21 @@
+; RUN: lli -jit-kind=orc-lazy %s
+;
+; Test handling of global aliases for function and variables.
+
+@x = global i32 42, align 4
+@y = alias i32, i32* @x
+
+define i32 @foo() {
+entry:
+ %0 = load i32, i32* @y, align 4
+ ret i32 %0
+}
+
+@bar = alias i32(), i32()* @foo
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+ %0 = call i32() @bar()
+ %1 = sub i32 %0, 42
+ ret i32 %1
+}
diff --git a/test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll b/test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll
index 936d7eebe09d..3c05a7105092 100644
--- a/test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll
+++ b/test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -relocation-model=pic -code-model=large %s
-; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, aarch64, arm, asan, msan
+; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, aarch64, arm
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll
index 02279226dd56..8d663973cfca 100644
--- a/test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll
+++ b/test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -relocation-model=pic -code-model=small %s
-; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, darwin, aarch64, arm, asan, msan
+; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, darwin, aarch64, arm
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/OrcMCJIT/eh.ll b/test/ExecutionEngine/OrcMCJIT/eh.ll
index 8a1b4d8f6dfa..6b7ee69255d8 100644
--- a/test/ExecutionEngine/OrcMCJIT/eh.ll
+++ b/test/ExecutionEngine/OrcMCJIT/eh.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit %s
-; XFAIL: arm, cygwin, win32, mingw, asan, msan
+; XFAIL: arm, cygwin, win32, mingw
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/OrcMCJIT/multi-module-eh-a.ll b/test/ExecutionEngine/OrcMCJIT/multi-module-eh-a.ll
index ccde9aefe8e2..d3f9dd1a2335 100644
--- a/test/ExecutionEngine/OrcMCJIT/multi-module-eh-a.ll
+++ b/test/ExecutionEngine/OrcMCJIT/multi-module-eh-a.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/multi-module-eh-b.ll %s
-; XFAIL: arm, cygwin, win32, mingw, asan, msan
+; XFAIL: arm, cygwin, win32, mingw
declare i8* @__cxa_allocate_exception(i64)
declare void @__cxa_throw(i8*, i8*, i8*)
declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s b/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s
index 04d269e2aebf..0387b932f1c5 100644
--- a/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s
@@ -55,6 +55,18 @@ ldr2:
ldr x0, [x0, _ptr@GOTPAGEOFF]
ret
+# rtdyld-check: decode_operand(add1, 2) = (tgt+8)[11:2] << 2
+ .globl _test_explicit_addend_reloc
+ .align 4
+_test_explicit_addend_reloc:
+add1:
+ add x0, x0, tgt@PAGEOFF+8
+
+ .align 3
+tgt:
+ .long 0
+ .long 0
+ .long 7
# Test ARM64_RELOC_UNSIGNED relocation. The absolute 64-bit address of the
# function should be stored at the 8-byte memory location.
diff --git a/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s
index 7ff3a8975769..1f5ec6735976 100644
--- a/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s
@@ -40,6 +40,13 @@ insn4:
.comm baz, 4, 2
.comm foo, 4, 2
+ .section __DATA,__data
+ .globl _a
+ .align 2
+# rtdyld-check: *{4}bar_ofs = bar + 4
+bar_ofs:
+ .long bar + 4
+
# Check that the symbol pointer section entries are fixed up properly:
# rtdyld-check: *{4}foo$non_lazy_ptr = foo
.section __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
diff --git a/test/ExecutionEngine/RuntimeDyld/Mips/ELF_Mips64r2N64_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_Mips64r2N64_PIC_relocations.s
index 1d8d293a26a4..294ea3e43668 100644
--- a/test/ExecutionEngine/RuntimeDyld/Mips/ELF_Mips64r2N64_PIC_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_Mips64r2N64_PIC_relocations.s
@@ -2,6 +2,10 @@
# RUN: llc -mtriple=mips64el-unknown-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_ExternalFunction_Mips64N64.o %S/Inputs/ExternalFunction.ll
# RUN: llvm-rtdyld -triple=mips64el-unknown-linux -verify -map-section test_ELF_Mips64N64.o,.text=0x1000 -map-section test_ELF_ExternalFunction_Mips64N64.o,.text=0x10000 -check=%s %/T/test_ELF_Mips64N64.o %T/test_ELF_ExternalFunction_Mips64N64.o
+# RUN: llvm-mc -triple=mips64-unknown-linux -relocation-model=pic -code-model=small -filetype=obj -o %T/test_ELF_Mips64N64.o %s
+# RUN: llc -mtriple=mips64-unknown-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_ExternalFunction_Mips64N64.o %S/Inputs/ExternalFunction.ll
+# RUN: llvm-rtdyld -triple=mips64-unknown-linux -verify -map-section test_ELF_Mips64N64.o,.text=0x1000 -map-section test_ELF_ExternalFunction_Mips64N64.o,.text=0x10000 -check=%s %/T/test_ELF_Mips64N64.o %T/test_ELF_ExternalFunction_Mips64N64.o
+
.data
# Test R_MIPS_PC32 relocation.
# rtdyld-check: *{4}(R_MIPS_PC32) = (foo - R_MIPS_PC32)[31:0]
@@ -39,13 +43,15 @@ bar:
sd $4, 8($fp)
# Test R_MIPS_26 relocation.
-# rtdyld-check: decode_operand(insn1, 0)[25:0] = foo
+# rtdyld-check: decode_operand(insn1, 0)[27:0] = foo[27:0]
insn1:
+ .option pic0
jal foo
+ .option pic2
nop
# Test R_MIPS_PC16 relocation.
-# rtdyld-check: decode_operand(insn2, 1)[15:0] = foo - insn2
+# rtdyld-check: decode_operand(insn2, 1)[17:0] = (foo - insn2)[17:0]
insn2:
bal foo
nop
diff --git a/test/ExecutionEngine/RuntimeDyld/Mips/ELF_N64R6_relocations.s b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_N64R6_relocations.s
new file mode 100644
index 000000000000..2d5d09a0ab34
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_N64R6_relocations.s
@@ -0,0 +1,54 @@
+# RUN: llvm-mc -triple=mips64el-unknown-linux -mcpu=mips64r6 -relocation-model=pic -code-model=small -filetype=obj -o %T/test_ELF_N64R6.o %s
+# RUN: llc -mtriple=mips64el-unknown-linux -mcpu=mips64r6 -relocation-model=pic -filetype=obj -o %T/test_ELF_ExternalFunction_N64R6.o %S/Inputs/ExternalFunction.ll
+# RUN: llvm-rtdyld -triple=mips64el-unknown-linux -mcpu=mips64r6 -verify -map-section test_ELF_N64R6.o,.text=0x1000 -map-section test_ELF_ExternalFunction_N64R6.o,.text=0x10000 -check=%s %/T/test_ELF_N64R6.o %T/test_ELF_ExternalFunction_N64R6.o
+
+# RUN: llvm-mc -triple=mips64-unknown-linux -mcpu=mips64r6 -relocation-model=pic -code-model=small -filetype=obj -o %T/test_ELF_N64R6.o %s
+# RUN: llc -mtriple=mips64-unknown-linux -mcpu=mips64r6 -relocation-model=pic -filetype=obj -o %T/test_ELF_ExternalFunction_N64R6.o %S/Inputs/ExternalFunction.ll
+# RUN: llvm-rtdyld -triple=mips64-unknown-linux -mcpu=mips64r6 -verify -map-section test_ELF_N64R6.o,.text=0x1000 -map-section test_ELF_ExternalFunction_N64R6.o,.text=0x10000 -check=%s %/T/test_ELF_N64R6.o %T/test_ELF_ExternalFunction_N64R6.o
+
+ .text
+ .abicalls
+ .nan 2008
+ .text
+ .set nomicromips
+ .set nomips16
+ .set noreorder
+ .set nomacro
+ .set noat
+
+ .align 3
+ .globl bar
+ .type bar,@function
+
+bar:
+# Test R_MIPS_PC18_S3 relocation.
+# rtdyld-check: decode_operand(R_MIPS_PC18_S3, 1)[20:0] = (foo - R_MIPS_PC18_S3)[20:0]
+R_MIPS_PC18_S3:
+ ldpc $6,foo
+
+# Test R_MIPS_PC19_S2 relocation.
+# rtdyld-check: decode_operand(R_MIPS_PC19_S2, 1)[20:0] = (foo - R_MIPS_PC19_S2)[20:0]
+R_MIPS_PC19_S2:
+ lwpc $6,foo
+
+# Test R_MIPS_PC21_S2 relocation.
+# rtdyld-check: decode_operand(R_MIPS_PC21_S2, 1)[22:0] = (foo - next_pc(R_MIPS_PC21_S2))[22:0]
+R_MIPS_PC21_S2:
+ bnezc $5,foo
+
+# Test R_MIPS_PC26_S2 relocation.
+# rtdyld-check: decode_operand(R_MIPS_PC26_S2, 0)[27:0] = (foo - next_pc(R_MIPS_PC26_S2))[27:0]
+R_MIPS_PC26_S2:
+ balc foo
+
+# Test R_MIPS_PCHI16 relocation.
+# rtdyld-check: decode_operand(R_MIPS_PCHI16, 1)[15:0] = (foo - R_MIPS_PCHI16 + 0x8000)[31:16]
+R_MIPS_PCHI16:
+ aluipc $5, %pcrel_hi(foo)
+
+# Test R_MIPS_PCLO16 relocation.
+# rtdyld-check: decode_operand(R_MIPS_PCLO16, 2)[15:0] = (foo - R_MIPS_PCLO16)[15:0]
+R_MIPS_PCLO16:
+ addiu $5, $5, %pcrel_lo(foo)
+
+ .size bar, .-bar
diff --git a/test/ExecutionEngine/RuntimeDyld/Mips/ELF_O32_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_O32_PIC_relocations.s
index 6c47262246ab..e4b51002e65f 100644
--- a/test/ExecutionEngine/RuntimeDyld/Mips/ELF_O32_PIC_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_O32_PIC_relocations.s
@@ -1,10 +1,10 @@
# RUN: llvm-mc -triple=mipsel-unknown-linux -relocation-model=pic -code-model=small -filetype=obj -o %T/test_ELF_O32.o %s
# RUN: llc -mtriple=mipsel-unknown-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_ExternalFunction_O32.o %S/Inputs/ExternalFunction.ll
-# RUN: llvm-rtdyld -triple=mipsel-unknown-linux -verify -map-section test_ELF_O32.o,.text=0x1000 -map-section test_ELF_ExternalFunction_O32.o,.text=0x10000 -check=%s %T/test_ELF_O32.o %T/test_ELF_ExternalFunction_O32.o
+# RUN: llvm-rtdyld -triple=mipsel-unknown-linux -verify -map-section test_ELF_O32.o,"<common symbols>"=0x7FF8 -map-section test_ELF_O32.o,.text=0x1000 -map-section test_ELF_ExternalFunction_O32.o,.text=0x10000 -check=%s %T/test_ELF_O32.o %T/test_ELF_ExternalFunction_O32.o
# RUN: llvm-mc -triple=mips-unknown-linux -relocation-model=pic -code-model=small -filetype=obj -o %T/test_ELF_O32.o %s
# RUN: llc -mtriple=mips-unknown-linux -relocation-model=pic -filetype=obj -o %/T/test_ELF_ExternalFunction_O32.o %S/Inputs/ExternalFunction.ll
-# RUN: llvm-rtdyld -triple=mips-unknown-linux -verify -map-section test_ELF_O32.o,.text=0x1000 -map-section test_ELF_ExternalFunction_O32.o,.text=0x10000 -check=%s %T/test_ELF_O32.o %T/test_ELF_ExternalFunction_O32.o
+# RUN: llvm-rtdyld -triple=mips-unknown-linux -verify -map-section test_ELF_O32.o,"<common symbols>"=0x7FF8 -map-section test_ELF_O32.o,.text=0x1000 -map-section test_ELF_ExternalFunction_O32.o,.text=0x10000 -check=%s %T/test_ELF_O32.o %T/test_ELF_ExternalFunction_O32.o
.data
# rtdyld-check: *{4}R_MIPS_32 = foo[31:0]
@@ -52,4 +52,13 @@ R_MIPS_HI16:
R_MIPS_LO16:
lui $1, %lo(foo)
+# rtdyld-check: decode_operand(R_MIPS_HI16_ADDEND, 1)[15:0] = (var+0x8008)[31:16]
+R_MIPS_HI16_ADDEND:
+ lui $2, %hi(var+8)
+
+# rtdyld-check: decode_operand(R_MIPS_LO16_ADDEND, 2)[15:0] = (var+0x8)[15:0]
+R_MIPS_LO16_ADDEND:
+ lb $2, %lo(var+8)($2)
+
.size bar, .-bar
+ .comm var,9,1
diff --git a/test/ExecutionEngine/RuntimeDyld/PowerPC/lit.local.cfg b/test/ExecutionEngine/RuntimeDyld/PowerPC/lit.local.cfg
new file mode 100644
index 000000000000..5d33887ff0a4
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/PowerPC/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'PowerPC' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/ExecutionEngine/RuntimeDyld/PowerPC/ppc32_elf_rel_addr16.s b/test/ExecutionEngine/RuntimeDyld/PowerPC/ppc32_elf_rel_addr16.s
new file mode 100644
index 000000000000..ef40259c7b01
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/PowerPC/ppc32_elf_rel_addr16.s
@@ -0,0 +1,47 @@
+# RUN: llvm-mc -triple=powerpc-unknown-linux-gnu -filetype=obj -o %T/ppc32_elf_rel_addr16.o %s
+# RUN: llvm-rtdyld -triple=powerpc-unknown-linux-gnu -verify -check=%s %T/ppc32_elf_rel_addr16.o
+ .text
+ .file "ppc32_elf_rel_addr16.ll"
+ .globl lookup
+ .align 2
+ .type lookup,@function
+lookup: # @lookup
+.Lfunc_begin0:
+# BB#0:
+ stw 31, -4(1)
+ stwu 1, -16(1)
+insn_hi:
+# Check the higher 16-bits of the symbol's absolute address
+# rtdyld-check: decode_operand(insn_hi, 1) = elements[31:16]
+ lis 4, elements@ha
+ slwi 3, 3, 2
+ mr 31, 1
+insn_lo:
+# Check the lower 16-bits of the symbol's absolute address
+# rtdyld-check: decode_operand(insn_lo, 2) = elements[15:0]
+ la 4, elements@l(4)
+ lwzx 3, 4, 3
+ addi 1, 1, 16
+ lwz 31, -4(1)
+ blr
+.Lfunc_end0:
+ .size lookup, .Lfunc_end0-.Lfunc_begin0
+
+ .type elements,@object # @elements
+ .data
+ .globl elements
+ .align 2
+elements:
+ .long 14 # 0xe
+ .long 4 # 0x4
+ .long 1 # 0x1
+ .long 3 # 0x3
+ .long 13 # 0xd
+ .long 0 # 0x0
+ .long 32 # 0x20
+ .long 334 # 0x14e
+ .size elements, 32
+
+
+ .ident "clang version 3.7.0 "
+ .section ".note.GNU-stack","",@progbits
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/COFF_i386.s b/test/ExecutionEngine/RuntimeDyld/X86/COFF_i386.s
new file mode 100644
index 000000000000..0a96f7025011
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/COFF_i386.s
@@ -0,0 +1,66 @@
+// RUN: llvm-mc -triple i686-windows -filetype obj -o %t.obj %s
+// RUN: llvm-rtdyld -triple i686-windows -dummy-extern _OutputDebugStringA@4=0xfffffffe -dummy-extern _ExitProcess@4=0xffffffff -verify -check=%s %t.obj
+
+ .text
+
+ .def _main
+ .scl 2
+ .type 32
+ .endef
+ .global _main
+_main:
+rel1:
+ call _function // IMAGE_REL_I386_REL32
+# rtdyld-check: decode_operand(rel1, 0) = (_function-_main-4-1)
+ xorl %eax, %eax
+ retl
+
+ .def _function
+ .scl 2
+ .type 32
+ .endef
+_function:
+rel2:
+ pushl string
+rel3:
+ calll *__imp__OutputDebugStringA // IMAGE_REL_I386_DIR32
+# rtdyld-check: decode_operand(rel3, 3) = __imp__OutputDebugStringA
+ addl $4, %esp
+ pushl $0
+rel4:
+ calll *__imp__ExitProcess // IMAGE_REL_I386_DIR32
+# rtdyld-check: decode_operand(rel4, 3) = __imp__ExitProcess
+ addl $4, %esp
+ retl
+
+ .data
+
+ .global __imp__OutputDebugStringA
+ .align 4
+__imp__OutputDebugStringA:
+ .long "_OutputDebugStringA@4" // IMAGE_REL_I386_DIR32
+# rtdyld-check: *{4}__imp__OutputDebugStringA = 0xfffffffe
+
+ .global __imp__ExitProcess
+ .align 4
+__imp__ExitProcess:
+ .long "_ExitProcess@4" // IMAGE_REL_I386_DIR32
+# rtdyld-check: *{4}__imp__ExitProcess = 0xffffffff
+
+ .global string
+ .align 1
+string:
+ .asciz "Hello World!\n"
+
+ .global relocations
+relocations:
+rel5:
+ .long _function@imgrel // IMAGE_REL_I386_DIR32NB
+# rtdyld-check: *{4}rel5 = _function - section_addr(COFF_i386.s.tmp.obj, .text)
+rel6:
+# rtdyld-check: *{2}rel6 = 1
+ .secidx __imp__OutputDebugStringA // IMAGE_REL_I386_SECTION
+rel7:
+# rtdyld-check: *{4}rel7 = relocations - section_addr(COFF_i386.s.tmp.obj, .data)
+ .secrel32 relocations // IMAGE_REL_I386_SECREL
+
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_64 b/test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_64
deleted file mode 100644
index 7029cf437c5e..000000000000
--- a/test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_64
+++ /dev/null
@@ -1,31 +0,0 @@
-# RUN: llvm-mc -triple=x86_64-pc-win32 -filetype=obj -o %T/COFF_x86_64.o %s
-# RUN: llvm-rtdyld -triple=x86_64-pc-win32 -verify -check=%s %/T/COFF_x86_64.o
- .text
- .def F;
- .scl 2;
- .type 32;
- .endef
- .globl __real400921f9f01b866e
- .section .rdata,"dr",discard,__real400921f9f01b866e
- .align 8
-__real400921f9f01b866e:
- .quad 4614256650576692846 # double 3.1415899999999999
- .text
- .globl F
- .global inst1
- .align 16, 0x90
-F: # @F
-.Ltmp0:
-.seh_proc F
-# BB#0: # %entry
-.Ltmp1:
- .seh_endprologue
-# rtdyld-check: decode_operand(inst1, 4) = __real400921f9f01b866e - next_pc(inst1)
-inst1:
- movsd __real400921f9f01b866e(%rip), %xmm0 # xmm0 = mem[0],zero
- retq
-.Leh_func_end0:
-.Ltmp2:
- .seh_endproc
-
-
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_64.s b/test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_64.s
new file mode 100644
index 000000000000..a865bdbfc4c4
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_64.s
@@ -0,0 +1,40 @@
+# RUN: llvm-mc -triple=x86_64-pc-win32 -filetype=obj -o %T/COFF_x86_64.o %s
+# RUN: llvm-rtdyld -triple=x86_64-pc-win32 -verify -check=%s %/T/COFF_x86_64.o
+ .text
+ .def F;
+ .scl 2;
+ .type 32;
+ .endef
+ .globl __real400921f9f01b866e
+ .section .rdata,"dr",discard,__real400921f9f01b866e
+ .align 8
+__real400921f9f01b866e:
+ .quad 4614256650576692846 # double 3.1415899999999999
+ .text
+ .globl F
+ .global inst1
+ .align 16, 0x90
+F: # @F
+.Ltmp0:
+.seh_proc F
+# BB#0: # %entry
+.Ltmp1:
+ .seh_endprologue
+# rtdyld-check: decode_operand(inst1, 4) = __real400921f9f01b866e - next_pc(inst1)
+inst1:
+ movsd __real400921f9f01b866e(%rip), %xmm0 # xmm0 = mem[0],zero
+ retq
+.Leh_func_end0:
+.Ltmp2:
+ .seh_endproc
+
+ .data
+ .globl x # @x
+# rtdyld-check: *{8}x = F
+x:
+ .quad F
+
+# Make sure the JIT doesn't bail out on BSS sections.
+ .bss
+bss_check:
+ .fill 8, 1, 0
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_FILE.s b/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_FILE.s
new file mode 100644
index 000000000000..50cc65079bd0
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_FILE.s
@@ -0,0 +1,14 @@
+# RUN: llvm-mc -triple=x86_64-pc-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_STT_FILE_FILE_x86-64.o %p/Inputs/ELF_STT_FILE_FILE.s
+# RUN: llvm-mc -triple=x86_64-pc-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_STT_FILE_GLOBAL_x86-64.o %p/Inputs/ELF_STT_FILE_GLOBAL.s
+# RUN: llvm-mc -triple=x86_64-pc-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_STT_FILE_x86-64.o %s
+# RUN: llvm-rtdyld -triple=x86_64-pc-linux -verify %T/test_ELF_STT_FILE_GLOBAL_x86-64.o %T/test_ELF_STT_FILE_FILE_x86-64.o %T/test_ELF_STT_FILE_x86-64.o
+
+# Test that RTDyldELF ignores STT_FILE symbols, and in particular does
+# crash if we are relocating against a symbol that happens to have the
+# same name as an STT_FILE symbol.
+
+_main:
+ movq foo.c@GOTPCREL(%rip), %rax
+ movq bar.c@GOTPCREL(%rip), %rax
+ movq $0, %rax
+ retq
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/ELF_x64-64_PC8_relocations.s b/test/ExecutionEngine/RuntimeDyld/X86/ELF_x64-64_PC8_relocations.s
new file mode 100644
index 000000000000..7df9225359fd
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/ELF_x64-64_PC8_relocations.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc -triple=x86_64-pc-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_x86-64_PC8.o %s
+# RUN: llvm-rtdyld -triple=x86_64-pc-linux -verify -map-section test_ELF_x86-64_PC8.o,.text.bar=0x10000 -map-section test_ELF_x86-64_PC8.o,.text.baz=0x10040 %T/test_ELF_x86-64_PC8.o
+# RUN: llvm-rtdyld -triple=x86_64-pc-linux -verify -map-section test_ELF_x86-64_PC8.o,.text.baz=0x10000 -map-section test_ELF_x86-64_PC8.o,.text.bar=0x10040 %T/test_ELF_x86-64_PC8.o
+
+# Test that R_X86_64_PC8 relocation works.
+
+ .section .text.bar,"ax"
+ .align 16, 0x90
+ .type bar,@function
+bar:
+ retq
+.Ltmp1:
+ .size bar, .Ltmp1-bar
+
+ .section .text.baz,"ax"
+ .align 16, 0x90
+ .type baz,@function
+baz:
+ movq %rdi, %rcx
+ jrcxz bar
+ retq
+.Ltmp2:
+ .size baz, .Ltmp2-baz
+
+
+ .section ".note.GNU-stack","",@progbits
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/ELF_x86_64_StubBuf.s b/test/ExecutionEngine/RuntimeDyld/X86/ELF_x86_64_StubBuf.s
new file mode 100644
index 000000000000..8bcba2c4bac7
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/ELF_x86_64_StubBuf.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc -triple=x86_64-apple-macosx10.10.0 -filetype=obj -o %T/test_ELF_x86_64_StubBuf.o %s
+# RUN: llvm-rtdyld -print-alloc-requests -triple=x86_64-pc-linux -dummy-extern _g=196608 -verify %T/test_ELF_x86_64_StubBuf.o
+
+# Compiled from Inputs/ELF/ELF_x86_64_StubBuf.ll
+
+# CHECK: allocateCodeSection(Size = 42, Alignment = 16, SectionName = __text)
+
+ .section __TEXT,__text,regular,pure_instructions
+ .macosx_version_min 10, 10
+ .globl _f
+ .align 4, 0x90
+_f: ## @f
+ .cfi_startproc
+## BB#0: ## %entry
+ pushq %rax
+Ltmp0:
+ .cfi_def_cfa_offset 16
+ callq _g
+ callq _g
+ callq _g
+ popq %rax
+ retq
+ .cfi_endproc
+
+
+.subsections_via_symbols
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_FILE.s b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_FILE.s
new file mode 100644
index 000000000000..7280c51b5867
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_FILE.s
@@ -0,0 +1,3 @@
+.file "foo.c"
+.global bar.c
+bar.c:
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_GLOBAL.s b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_GLOBAL.s
new file mode 100644
index 000000000000..a7e5342746e2
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_GLOBAL.s
@@ -0,0 +1,2 @@
+.global foo.c
+foo.c:
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_x86_64_StubBuf.ll b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_x86_64_StubBuf.ll
new file mode 100644
index 000000000000..625b487b8af3
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_x86_64_StubBuf.ll
@@ -0,0 +1,12 @@
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+declare void @g()
+
+define void @f() {
+ entry:
+ call void @g()
+ call void @g()
+ call void @g()
+ ret void
+}
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
index 2ef8cc439df1..c1762ab0be21 100644
--- a/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
@@ -57,4 +57,22 @@ z1:
z2:
.quad ds2
+# Test absolute symbols.
+# rtdyld-check: abssym = 0xdeadbeef
+ .globl abssym
+abssym = 0xdeadbeef
+
+ # Test subtractor relocations.
+# rtdyld-check: *{8}z3 = z4 - z5 + 4
+z3:
+ .quad z4 - z5 + 4
+
+ .section __DATA,_tmp1
+z4:
+ .byte 1
+
+ .section __DATA,_tmp2
+z5:
+ .byte 1
+
.subsections_via_symbols
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
index f6673df3c358..88e3e9b23883 100644
--- a/test/ExecutionEngine/lit.local.cfg
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -1,4 +1,4 @@
-if config.root.host_arch in ['PowerPC', 'AArch64', 'SystemZ']:
+if config.root.host_arch in ['Sparc', 'PowerPC', 'AArch64', 'SystemZ']:
config.unsupported = True
# CMake and autoconf diverge in naming or host_arch
@@ -12,6 +12,9 @@ if 'aarch64' in config.root.target_triple \
if 'hexagon' in config.root.target_triple:
config.unsupported = True
+if 'sparc' in config.root.target_triple:
+ config.unsupported = True
+
# ExecutionEngine tests are not expected to pass in a cross-compilation setup.
if 'native' not in config.available_features:
config.unsupported = True
diff --git a/test/Feature/OperandBundles/adce.ll b/test/Feature/OperandBundles/adce.ll
new file mode 100644
index 000000000000..a729ba710689
--- /dev/null
+++ b/test/Feature/OperandBundles/adce.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -adce < %s | FileCheck %s
+
+; While it is normally okay to DCE out calls to @readonly_function and
+; @readnone_function, we cannot do that if they're carrying operand
+; bundles since the presence of unknown operand bundles implies
+; arbitrary memory effects.
+
+declare void @readonly_function() readonly nounwind
+declare void @readnone_function() readnone nounwind
+
+define void @test0() {
+; CHECK-LABEL: @test0(
+ entry:
+ call void @readonly_function() [ "tag"() ]
+; CHECK: call void @readonly_function
+ ret void
+}
+
+define void @test1() {
+; CHECK-LABEL: @test1(
+ entry:
+ call void @readnone_function() [ "tag"() ]
+; CHECK: call void @readnone_function
+ ret void
+}
+
+define void @test2() {
+; CHECK-LABEL: @test2(
+ entry:
+; CHECK-NOT: @readonly_function(
+ call void @readonly_function() readonly [ "tag"() ]
+ ret void
+}
+
+define void @test3() {
+; CHECK-LABEL: @test3(
+ entry:
+; CHECK-NOT: @readnone_function(
+ call void @readnone_function() readnone [ "tag"() ]
+ ret void
+}
+
+define void @test4() {
+; CHECK-LABEL: @test4(
+ entry:
+; CHECK-NOT: @readonly_function()
+ call void @readonly_function() [ "deopt"() ]
+ ret void
+}
diff --git a/test/Feature/OperandBundles/basic-aa-argmemonly.ll b/test/Feature/OperandBundles/basic-aa-argmemonly.ll
new file mode 100644
index 000000000000..aa9445886060
--- /dev/null
+++ b/test/Feature/OperandBundles/basic-aa-argmemonly.ll
@@ -0,0 +1,51 @@
+; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+
+declare void @argmemonly_function(i32 *) argmemonly
+
+define i32 @test0(i32* %P, i32* noalias %P2) {
+; CHECK-LABEL: @test0(
+ %v1 = load i32, i32* %P
+; CHECK: %v1 = load i32, i32* %P
+ call void @argmemonly_function(i32* %P2) [ "tag"() ]
+; CHECK: call void @argmemonly_function(
+ %v2 = load i32, i32* %P
+; CHECK: %v2 = load i32, i32* %P
+ %diff = sub i32 %v1, %v2
+; CHECK: %diff = sub i32 %v1, %v2
+ ret i32 %diff
+; CHECK: ret i32 %diff
+}
+
+define i32 @test1(i32* %P, i32* noalias %P2) {
+; CHECK-LABEL: @test1(
+ %v1 = load i32, i32* %P
+ call void @argmemonly_function(i32* %P2) argmemonly [ "tag"() ]
+; CHECK: call void @argmemonly_function(
+ %v2 = load i32, i32* %P
+ %diff = sub i32 %v1, %v2
+ ret i32 %diff
+; CHECK: ret i32 0
+}
+
+define i32 @test2(i32* %P, i32* noalias %P2) {
+; Note: in this test we //can// GVN %v1 and %v2 into one value in theory. Calls
+; with deopt operand bundles are not argmemonly because they *read* the entire
+; heap, but they don't write to any location in the heap if the callee does not
+; deoptimize the caller. This fact, combined with the fact that
+; @argmemonly_function is, well, an argmemonly function, can be used to conclude
+; that %P is not written to at the callsite. However LLVM currently cannot
+; describe the "does not write to non-args, and reads the entire heap" effect on
+; a callsite.
+
+; CHECK-LABEL: @test2(
+ %v1 = load i32, i32* %P
+; CHECK: %v1 = load i32, i32* %P
+ call void @argmemonly_function(i32* %P2) [ "deopt"() ]
+; CHECK: call void @argmemonly_function(
+ %v2 = load i32, i32* %P
+; CHECK: %v2 = load i32, i32* %P
+ %diff = sub i32 %v1, %v2
+; CHECK: %diff = sub i32 %v1, %v2
+ ret i32 %diff
+; CHECK: ret i32 %diff
+}
diff --git a/test/Feature/OperandBundles/dse.ll b/test/Feature/OperandBundles/dse.ll
new file mode 100644
index 000000000000..9ddf7f02e384
--- /dev/null
+++ b/test/Feature/OperandBundles/dse.ll
@@ -0,0 +1,62 @@
+; RUN: opt -S -dse < %s | FileCheck %s
+
+declare void @f()
+declare noalias i8* @malloc(i32) nounwind
+
+define void @test_0() {
+; CHECK-LABEL: @test_0(
+ %m = call i8* @malloc(i32 24)
+ tail call void @f() [ "unknown"(i8* %m) ]
+; CHECK: store i8 -19, i8* %m
+ store i8 -19, i8* %m
+ ret void
+}
+
+define i8* @test_1() {
+; CHECK-LABEL: @test_1(
+ %m = call i8* @malloc(i32 24)
+ tail call void @f() [ "unknown"(i8* %m) ]
+ store i8 -19, i8* %m
+ tail call void @f()
+ store i8 101, i8* %m
+
+; CHECK: tail call void @f() [ "unknown"(i8* %m) ]
+; CHECK: store i8 -19, i8* %m
+; CHECK: tail call void @f()
+; CHECK: store i8 101, i8* %m
+
+ ret i8* %m
+}
+
+define void @test_2() {
+; Since the deopt operand bundle does not escape %m (see caveat below), it is
+; legal to elide the final store that location.
+
+; CHECK-LABEL: @test_2(
+ %m = call i8* @malloc(i32 24)
+ tail call void @f() [ "deopt"(i8* %m) ]
+ store i8 -19, i8* %m
+ ret void
+
+; CHECK: tail call void @f() [ "deopt"(i8* %m) ]
+; CHECK-NEXT ret void
+}
+
+define i8* @test_3() {
+; Since the deopt operand bundle does not escape %m (see caveat below), @f
+; cannot observe the stores to %m
+
+; CHECK-LABEL: @test_3(
+ %m = call i8* @malloc(i32 24)
+ tail call void @f() [ "deopt"(i8* %m) ]
+ store i8 -19, i8* %m
+ tail call void @f()
+ store i8 101, i8* %m
+ ret i8* %m
+}
+
+
+; Caveat: technically, %m can only escape if the calling function is deoptimized
+; at the call site (i.e. the call returns to the "deopt" continuation). Since
+; the calling function body will be invalidated in that case, the calling
+; function can be optimized under the assumption that %m does not escape.
diff --git a/test/Feature/OperandBundles/early-cse.ll b/test/Feature/OperandBundles/early-cse.ll
new file mode 100644
index 000000000000..fc201479d8ce
--- /dev/null
+++ b/test/Feature/OperandBundles/early-cse.ll
@@ -0,0 +1,89 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+
+; While it is normally okay to do memory optimizations over calls to
+; @readonly_function and @readnone_function, we cannot do that if
+; they're carrying unknown operand bundles since the presence of
+; unknown operand bundles implies arbitrary memory effects.
+
+declare void @readonly_function() readonly nounwind
+declare void @readnone_function() readnone nounwind
+
+define i32 @test0(i32* %x) {
+; CHECK-LABEL: @test0(
+ entry:
+ store i32 100, i32* %x
+; CHECK: store i32 100, i32* %x
+ call void @readonly_function() [ "tag"() ]
+; CHECK: call void @readonly_function()
+
+ %v = load i32, i32* %x
+; CHECK: %v = load i32, i32* %x
+; CHECK: ret i32 %v
+ ret i32 %v
+}
+
+define i32 @test1(i32* %x) {
+; CHECK: @test1(
+ entry:
+ store i32 100, i32* %x
+; CHECK: store i32 100, i32* %x
+ call void @readonly_function() readonly [ "tag"() ]
+; CHECK-NOT: call void @readonly_function
+ %v = load i32, i32* %x
+ ret i32 %v
+; CHECK: ret i32 100
+}
+
+define i32 @test3(i32* %x) {
+; CHECK-LABEL: @test3(
+ entry:
+ store i32 100, i32* %x
+; CHECK: store i32 100, i32* %x
+ call void @readonly_function()
+; CHECK-NOT: call void @readonly_function
+ %v = load i32, i32* %x
+ ret i32 %v
+; CHECK: ret i32 100
+}
+
+define void @test4(i32* %x) {
+; CHECK-LABEL: @test4(
+ entry:
+ store i32 100, i32* %x
+; CHECK: store i32 100, i32* %x
+ call void @readnone_function() [ "tag"() ]
+; CHECK: call void @readnone_function
+ store i32 200, i32* %x
+; CHECK: store i32 200, i32* %x
+ ret void
+}
+
+define void @test5(i32* %x) {
+; CHECK-LABEL: @test5(
+ entry:
+ store i32 100, i32* %x
+; CHECK-NOT: store i32 100, i32* %x
+; CHECK-NOT: call void @readnone_function
+ call void @readnone_function() readnone [ "tag"() ]
+ store i32 200, i32* %x
+; CHECK: store i32 200, i32* %x
+ ret void
+}
+
+define void @test6(i32* %x) {
+; The "deopt" operand bundle does not make the call to
+; @readonly_function read-write; and so the nounwind readonly call can
+; be deleted.
+
+; CHECK-LABEL: @test6(
+ entry:
+
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 200, i32* %x
+; CHECK-NEXT: ret void
+
+ store i32 100, i32* %x
+ call void @readonly_function() [ "deopt"() ]
+ store i32 200, i32* %x
+ ret void
+}
diff --git a/test/Feature/OperandBundles/function-attrs.ll b/test/Feature/OperandBundles/function-attrs.ll
new file mode 100644
index 000000000000..808f396fed8b
--- /dev/null
+++ b/test/Feature/OperandBundles/function-attrs.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -functionattrs < %s | FileCheck %s
+
+declare void @f_readonly() readonly
+declare void @f_readnone() readnone
+
+define void @test_0(i32* %x) {
+; FunctionAttrs must not infer readonly / readnone for %x
+
+; CHECK-LABEL: define void @test_0(i32* %x) {
+ entry:
+ ; CHECK: call void @f_readonly() [ "foo"(i32* %x) ]
+ call void @f_readonly() [ "foo"(i32* %x) ]
+ ret void
+}
+
+define void @test_1(i32* %x) {
+; FunctionAttrs must not infer readonly / readnone for %x
+
+; CHECK-LABEL: define void @test_1(i32* %x) {
+ entry:
+ ; CHECK: call void @f_readnone() [ "foo"(i32* %x) ]
+ call void @f_readnone() [ "foo"(i32* %x) ]
+ ret void
+}
+
+define void @test_2(i32* %x) {
+; The "deopt" operand bundle does not capture or write to %x.
+
+; CHECK-LABEL: define void @test_2(i32* nocapture readonly %x)
+ entry:
+ call void @f_readonly() [ "deopt"(i32* %x) ]
+ ret void
+}
diff --git a/test/Feature/OperandBundles/inliner-conservative.ll b/test/Feature/OperandBundles/inliner-conservative.ll
new file mode 100644
index 000000000000..d9f09f71d905
--- /dev/null
+++ b/test/Feature/OperandBundles/inliner-conservative.ll
@@ -0,0 +1,17 @@
+; RUN: opt -S -inline < %s | FileCheck %s
+
+; Check that the inliner does not inline through arbitrary unknown
+; operand bundles.
+
+define i32 @callee() {
+ entry:
+ ret i32 2
+}
+
+define i32 @caller() {
+; CHECK: @caller(
+ entry:
+; CHECK: call i32 @callee() [ "unknown"() ]
+ %x = call i32 @callee() [ "unknown"() ]
+ ret i32 %x
+}
diff --git a/test/Feature/OperandBundles/merge-func.ll b/test/Feature/OperandBundles/merge-func.ll
new file mode 100644
index 000000000000..1fa6eb093084
--- /dev/null
+++ b/test/Feature/OperandBundles/merge-func.ll
@@ -0,0 +1,64 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; Minor note: functions need to be at least three instructions long
+; to be considered by -mergefunc.
+
+declare i32 @foo(...)
+
+define i32 @f() {
+; CHECK-LABEL: @f(
+ entry:
+ %v0 = call i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+ %v1 = call i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+ %v2 = call i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+
+; CHECK: %v0 = call i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+; CHECK: %v1 = call i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+; CHECK: %v2 = call i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+
+ ret i32 %v2
+}
+
+define i32 @g() {
+; CHECK-LABEL: @g(
+ entry:
+ %v0 = call i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+ %v1 = call i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+ %v2 = call i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+
+; CHECK: %v0 = call i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+; CHECK: %v1 = call i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+; CHECK: %v2 = call i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+
+ ret i32 %v2
+}
+
+define i32 @f.invoke() personality i8 3 {
+; CHECK-LABEL: @f.invoke(
+ entry:
+; CHECK: %v0 = invoke i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+ %v0 = invoke i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+ to label %normal unwind label %exception
+
+ normal:
+ ret i32 %v0
+
+ exception:
+ %cleanup = landingpad i8 cleanup
+ ret i32 0
+}
+
+define i32 @g.invoke() personality i8 3 {
+; CHECK-LABEL: @g.invoke(
+ entry:
+; CHECK: %v0 = invoke i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+ %v0 = invoke i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+ to label %normal unwind label %exception
+
+ normal:
+ ret i32 %v0
+
+ exception:
+ %cleanup = landingpad i8 cleanup
+ ret i32 0
+}
diff --git a/test/Feature/OperandBundles/special-state.ll b/test/Feature/OperandBundles/special-state.ll
new file mode 100644
index 000000000000..56e337cc16b3
--- /dev/null
+++ b/test/Feature/OperandBundles/special-state.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+
+; This test isn't directly related to EarlyCSE or varargs. It is just
+; using these as a vehicle for testing the correctness of
+; haveSameSpecialState around operand bundles.
+
+declare i32 @foo(...)
+
+define i32 @f() {
+; CHECK-LABEL: @f(
+ entry:
+; CHECK: %v0 = call i32 (...) @foo(
+; CHECK: %v1 = call i32 (...) @foo(
+; CHECK: %v = add i32 %v0, %v1
+; CHECK: ret i32 %v
+
+ %v0 = call i32 (...) @foo(i32 10) readonly [ "foo"(i32 20) ]
+ %v1 = call i32 (...) @foo() readonly [ "foo"(i32 10, i32 20) ]
+ %v = add i32 %v0, %v1
+ ret i32 %v
+}
diff --git a/test/Feature/alias2.ll b/test/Feature/alias2.ll
index e461854e7514..ae5bc11d71a9 100644
--- a/test/Feature/alias2.ll
+++ b/test/Feature/alias2.ll
@@ -9,20 +9,20 @@
@v3 = global [2 x i16] zeroinitializer
; CHECK: @v3 = global [2 x i16] zeroinitializer
-@a1 = alias bitcast (i32* @v1 to i16*)
-; CHECK: @a1 = alias bitcast (i32* @v1 to i16*)
+@a1 = alias i16, bitcast (i32* @v1 to i16*)
+; CHECK: @a1 = alias i16, bitcast (i32* @v1 to i16*)
-@a2 = alias bitcast([1 x i32]* @v2 to i32*)
-; CHECK: @a2 = alias getelementptr inbounds ([1 x i32], [1 x i32]* @v2, i32 0, i32 0)
+@a2 = alias i32, bitcast([1 x i32]* @v2 to i32*)
+; CHECK: @a2 = alias i32, getelementptr inbounds ([1 x i32], [1 x i32]* @v2, i32 0, i32 0)
-@a3 = alias addrspacecast (i32* @v1 to i32 addrspace(2)*)
-; CHECK: @a3 = alias addrspacecast (i32* @v1 to i32 addrspace(2)*)
+@a3 = alias i32, addrspacecast (i32* @v1 to i32 addrspace(2)*)
+; CHECK: @a3 = alias i32, addrspacecast (i32* @v1 to i32 addrspace(2)*)
-@a4 = alias bitcast (i32* @v1 to i16*)
-; CHECK: @a4 = alias bitcast (i32* @v1 to i16*)
+@a4 = alias i16, bitcast (i32* @v1 to i16*)
+; CHECK: @a4 = alias i16, bitcast (i32* @v1 to i16*)
-@a5 = thread_local(localdynamic) alias i32* @v1
-; CHECK: @a5 = thread_local(localdynamic) alias i32* @v1
+@a5 = thread_local(localdynamic) alias i32, i32* @v1
+; CHECK: @a5 = thread_local(localdynamic) alias i32, i32* @v1
-@a6 = alias getelementptr ([2 x i16], [2 x i16]* @v3, i32 1, i32 1)
-; CHECK: @a6 = alias getelementptr ([2 x i16], [2 x i16]* @v3, i32 1, i32 1)
+@a6 = alias i16, getelementptr ([2 x i16], [2 x i16]* @v3, i32 1, i32 1)
+; CHECK: @a6 = alias i16, getelementptr ([2 x i16], [2 x i16]* @v3, i32 1, i32 1)
diff --git a/test/Feature/aliases.ll b/test/Feature/aliases.ll
index b3b9ceba527b..a8c95e9fd0c3 100644
--- a/test/Feature/aliases.ll
+++ b/test/Feature/aliases.ll
@@ -5,28 +5,28 @@
@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @foo1 to i8*)], section "llvm.metadata"
@bar = global i32 0
-@foo1 = alias i32* @bar
-@foo2 = alias i32* @bar
-@foo3 = alias i32* @foo2
-@foo4 = unnamed_addr alias i32* @foo2
+@foo1 = alias i32, i32* @bar
+@foo2 = alias i32, i32* @bar
+@foo3 = alias i32, i32* @foo2
+@foo4 = unnamed_addr alias i32, i32* @foo2
; Make sure the verifier does not complain about references to a global
; declaration from an initializer.
@decl = external global i32
@ptr = global i32* @decl
-@ptr_a = alias i32** @ptr
+@ptr_a = alias i32*, i32** @ptr
%FunTy = type i32()
define i32 @foo_f() {
ret i32 0
}
-@bar_f = weak_odr alias %FunTy* @foo_f
-@bar_ff = alias i32()* @bar_f
+@bar_f = weak_odr alias %FunTy, %FunTy* @foo_f
+@bar_ff = alias i32(), i32()* @bar_f
-@bar_i = internal alias i32* @bar
+@bar_i = internal alias i32, i32* @bar
-@A = alias bitcast (i32* @bar to i64*)
+@A = alias i64, bitcast (i32* @bar to i64*)
define i32 @test() {
entry:
diff --git a/test/Feature/callingconventions.ll b/test/Feature/callingconventions.ll
index 9aafb36c7573..ac4c5090a51d 100644
--- a/test/Feature/callingconventions.ll
+++ b/test/Feature/callingconventions.ll
@@ -59,4 +59,11 @@ define void @ghc_caller() {
ret void
}
+declare hhvm_ccc void @hhvm_c_callee()
+
+define hhvmcc void @hhvm_caller() {
+ call hhvm_ccc void @hhvm_c_callee()
+ ret void
+}
+
declare i32 @__gxx_personality_v0(...)
diff --git a/test/Feature/comdat.ll b/test/Feature/comdat.ll
index c2a9d6396293..b0286c06ea0f 100644
--- a/test/Feature/comdat.ll
+++ b/test/Feature/comdat.ll
@@ -9,8 +9,8 @@ $f2 = comdat any
@v = global i32 0, comdat($f)
; CHECK: @v = global i32 0, comdat($f)
-@a = alias i32* @v
-; CHECK: @a = alias i32* @v{{$}}
+@a = alias i32, i32* @v
+; CHECK: @a = alias i32, i32* @v{{$}}
define void @f() comdat($f) {
ret void
diff --git a/test/Feature/exception.ll b/test/Feature/exception.ll
index 7568ecfa5f75..2634692f4252 100644
--- a/test/Feature/exception.ll
+++ b/test/Feature/exception.ll
@@ -25,3 +25,112 @@ lpad: ; preds = %entry
declare void @_Z3quxv() optsize
declare i32 @__gxx_personality_v0(...)
+
+define void @cleanupret0() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+ invoke void @_Z3quxv() optsize
+ to label %exit unwind label %pad
+pad:
+ %cp = cleanuppad within none [i7 4]
+ cleanupret from %cp unwind to caller
+exit:
+ ret void
+}
+
+; forward ref by name
+define void @cleanupret1() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+ invoke void @_Z3quxv() optsize
+ to label %exit unwind label %pad
+cleanup:
+ cleanupret from %cp unwind label %pad
+pad:
+ %cp = cleanuppad within none []
+ br label %cleanup
+exit:
+ ret void
+}
+
+; forward ref by ID
+define void @cleanupret2() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+ invoke void @_Z3quxv() optsize
+ to label %exit unwind label %pad
+cleanup:
+ cleanupret from %0 unwind label %pad
+pad:
+ %0 = cleanuppad within none []
+ br label %cleanup
+exit:
+ ret void
+}
+
+define void @catchret0() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+ invoke void @_Z3quxv() optsize
+ to label %exit unwind label %pad
+pad:
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+catch:
+ %cp = catchpad within %cs1 [i7 4]
+ catchret from %cp to label %exit
+exit:
+ ret void
+}
+
+; forward ref by name
+define void @catchret1() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+ invoke void @_Z3quxv() optsize
+ to label %exit unwind label %pad
+catchret:
+ catchret from %cp to label %exit
+pad:
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+catch:
+ %cp = catchpad within %cs1 [i7 4]
+ br label %catchret
+exit:
+ ret void
+}
+
+; forward ref by ID
+define void @catchret2() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+ invoke void @_Z3quxv() optsize
+ to label %exit unwind label %pad
+catchret:
+ catchret from %0 to label %exit
+pad:
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+catch:
+ %0 = catchpad within %cs1 [i7 4]
+ br label %catchret
+exit:
+ ret void
+}
+
+define i8 @catchpad() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+ invoke void @_Z3quxv() optsize
+ to label %exit unwind label %bb2
+bb2:
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+catch:
+ catchpad within %cs1 [i7 4]
+ br label %exit
+exit:
+ ret i8 0
+}
+
+define void @cleanuppad() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+ br label %try.cont
+
+try.cont:
+ invoke void @_Z3quxv() optsize
+ to label %try.cont unwind label %bb
+bb:
+ cleanuppad within none [i7 4]
+ ret void
+}
diff --git a/test/Feature/optnone-llc.ll b/test/Feature/optnone-llc.ll
index b848b19d528b..94f61efea4aa 100644
--- a/test/Feature/optnone-llc.ll
+++ b/test/Feature/optnone-llc.ll
@@ -3,11 +3,13 @@
; RUN: llc -O2 -debug %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LLC-Ox
; RUN: llc -O3 -debug %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LLC-Ox
; RUN: llc -misched-postra -debug %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LLC-MORE
+; RUN: llc -O1 -debug-only=isel %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc -O1 -debug-only=isel -fast-isel=false %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=NOFAST
-; REQUIRES: asserts
+; REQUIRES: asserts, default_triple
; This test verifies that we don't run Machine Function optimizations
-; on optnone functions.
+; on optnone functions, and that we can turn off FastISel.
; Function Attrs: noinline optnone
define i32 @_Z3fooi(i32 %x) #0 {
@@ -52,3 +54,7 @@ attributes #0 = { optnone noinline }
; Alternate post-RA scheduler.
; LLC-MORE: Skipping pass 'PostRA Machine Instruction Scheduler'
+
+; Selectively disable FastISel for optnone functions.
+; FAST: FastISel is enabled
+; NOFAST: FastISel is disabled
diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll
index 2490510fc761..9c4d416a1eff 100644
--- a/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/test/Instrumentation/AddressSanitizer/basic.ll
@@ -171,7 +171,7 @@ define void @memintr_test(i8* %a, i8* %b) nounwind uwtable sanitize_address {
; CHECK: ret void
; CHECK: define internal void @asan.module_ctor()
-; CHECK: call void @__asan_init_v5()
+; CHECK: call void @__asan_init()
; PROF
; CHECK: ![[PROF]] = !{!"branch_weights", i32 1, i32 100000}
diff --git a/test/Instrumentation/AddressSanitizer/debug_info.ll b/test/Instrumentation/AddressSanitizer/debug_info.ll
index 19d627857abe..0834d642df8f 100644
--- a/test/Instrumentation/AddressSanitizer/debug_info.ll
+++ b/test/Instrumentation/AddressSanitizer/debug_info.ll
@@ -6,7 +6,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-define i32 @_Z3zzzi(i32 %p) nounwind uwtable sanitize_address {
+define i32 @_Z3zzzi(i32 %p) nounwind uwtable sanitize_address !dbg !5 {
entry:
%p.addr = alloca i32, align 4
%r = alloca i32, align 4
@@ -33,23 +33,23 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!17}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169314)", isOptimized: true, emissionKind: 0, file: !16, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169314)", isOptimized: true, emissionKind: 0, file: !16, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
!1 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "zzz", linkageName: "_Z3zzzi", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !16, scope: !6, type: !7, function: i32 (i32)* @_Z3zzzi, variables: !1)
+!5 = distinct !DISubprogram(name: "zzz", linkageName: "_Z3zzzi", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !16, scope: !6, type: !7, variables: !1)
!6 = !DIFile(filename: "a.cc", directory: "/usr/local/google/llvm_cmake_clang/tmp/debuginfo")
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", line: 1, arg: 1, scope: !5, file: !6, type: !9)
+!10 = !DILocalVariable(name: "p", line: 1, arg: 1, scope: !5, file: !6, type: !9)
!11 = !DILocation(line: 1, scope: !5)
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "r", line: 2, scope: !13, file: !6, type: !9)
+!12 = !DILocalVariable(name: "r", line: 2, scope: !13, file: !6, type: !9)
; Verify that debug descriptors for argument and local variable will be replaced
; with descriptors that end with OpDeref (encoded as 2).
-; CHECK: ![[ARG_ID]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", arg: 1,{{.*}} line: 1
+; CHECK: ![[ARG_ID]] = !DILocalVariable(name: "p", arg: 1,{{.*}} line: 1
; CHECK: ![[OPDEREF]] = !DIExpression(DW_OP_deref)
-; CHECK: ![[VAR_ID]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "r",{{.*}} line: 2
+; CHECK: ![[VAR_ID]] = !DILocalVariable(name: "r",{{.*}} line: 2
; Verify that there are no more variable descriptors.
; CHECK-NOT: !DILocalVariable(tag: DW_TAG_arg_variable
; CHECK-NOT: !DILocalVariable(tag: DW_TAG_auto_variable
diff --git a/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll b/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
new file mode 100644
index 000000000000..39a41700fdff
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
@@ -0,0 +1,40 @@
+; This test checks that non-instrumented allocas stay in the first basic block.
+; Only first-basic-block allocas are considered stack slots, and moving them
+; breaks debug info.
+
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -asan-instrument-allocas=1 -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define i32 @foo() sanitize_address {
+entry:
+ ; Won't be instrumented because of asan-skip-promotable-allocas.
+ %non_instrumented1 = alloca i32, align 4
+
+ ; Regular alloca, will get instrumented (forced by the ptrtoint below).
+ %instrumented = alloca i32, align 4
+
+ ; Won't be instrumented because of asan-skip-promotable-allocas.
+ %non_instrumented2 = alloca i32, align 4
+
+ br label %bb0
+
+bb0:
+ ; Won't be instrumented because of asan-skip-promotable-allocas.
+ %non_instrumented3 = alloca i32, align 4
+
+ %ptr = ptrtoint i32* %instrumented to i32
+ br label %bb1
+
+bb1:
+ ret i32 %ptr
+}
+
+; CHECK: entry:
+; CHECK: %non_instrumented1 = alloca i32, align 4
+; CHECK: %non_instrumented2 = alloca i32, align 4
+; CHECK: load i32, i32* @__asan_option_detect_stack_use_after_return
+; CHECK: bb0:
+; CHECK: %non_instrumented3 = alloca i32, align 4
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-cstring.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-cstring.ll
deleted file mode 100644
index f096ac1828f2..000000000000
--- a/test/Instrumentation/AddressSanitizer/do-not-instrument-cstring.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
-
-target datalayout = "e"
-target triple = "x86_64-apple-darwin10.0.0"
-
-@foo = private global [19 x i8] c"scannerWithString:\00", section "__TEXT,__objc_methname,cstring_literals"
-
-; CHECK: @foo = private global [19 x i8] c"scannerWithString:\00", section "__TEXT,__objc_methname,cstring_literals" \ No newline at end of file
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll
new file mode 100644
index 000000000000..854f5cb851ea
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll
@@ -0,0 +1,19 @@
+; This test checks that we are not instrumenting unnecessary globals
+; (llvm.metadata and other llvm internal globals).
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@foo_noinst = private global [19 x i8] c"scannerWithString:\00", section "__TEXT,__objc_methname,cstring_literals"
+
+; CHECK: @foo_noinst = private global [19 x i8] c"scannerWithString:\00", section "__TEXT,__objc_methname,cstring_literals"
+
+@.str_noinst = private unnamed_addr constant [4 x i8] c"aaa\00", section "llvm.metadata"
+@.str_noinst_prof = private unnamed_addr constant [4 x i8] c"aaa\00", section "__DATA,__llvm_covmap"
+@.str_inst = private unnamed_addr constant [4 x i8] c"aaa\00"
+
+; CHECK-NOT: {{asan_gen.*str_noinst}}
+; CHECK-NOT: {{asan_gen.*str_noinst_prof}}
+; CHECK: {{asan_gen.*str_inst}}
+; CHECK: @asan.module_ctor
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll
new file mode 100644
index 000000000000..fc0e676ec139
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll
@@ -0,0 +1,35 @@
+; This test checks that we are not instrumenting unnecessary globals
+; (llvm.metadata, init_array sections, and other llvm internal globals).
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define internal void @_ZL3foov() #0 {
+entry:
+ ret void
+}
+
+@__call_foo = global void ()* @_ZL3foov, section ".preinit_array", align 8
+@__call_foo_2 = global void ()* @_ZL3foov, section ".init_array", align 8
+@__call_foo_3 = global void ()* @_ZL3foov, section ".fini_array", align 8
+
+; CHECK-NOT: asan_gen{{.*}}__call_foo
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ ret i32 0
+}
+
+@.str_noinst = private unnamed_addr constant [4 x i8] c"aaa\00", section "llvm.metadata"
+@.str_noinst_prof = private unnamed_addr constant [4 x i8] c"aaa\00", section "__llvm_prf_data"
+@.str_inst = private unnamed_addr constant [4 x i8] c"aaa\00"
+
+; CHECK-NOT: {{asan_gen.*str_noinst}}
+; CHECK-NOT: {{asan_gen.*str_noinst_prof}}
+; CHECK: {{asan_gen.*str_inst}}
+; CHECK: @asan.module_ctor
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll
deleted file mode 100644
index f67155a29c2a..000000000000
--- a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; This test checks that we are not instrumenting globals in llvm.metadata
-; and other llvm internal globals.
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.10.0"
-
-@.str_noinst = private unnamed_addr constant [4 x i8] c"aaa\00", section "llvm.metadata"
-@.str_noinst_prof = private unnamed_addr constant [4 x i8] c"aaa\00", section "__DATA,__llvm_covmap"
-@.str_inst = private unnamed_addr constant [4 x i8] c"aaa\00"
-
-; CHECK-NOT: {{asan_gen.*str_noinst}}
-; CHECK-NOT: {{asan_gen.*str_noinst_prof}}
-; CHECK: {{asan_gen.*str_inst}}
-; CHECK: @asan.module_ctor
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll
deleted file mode 100644
index 93eca5bfd824..000000000000
--- a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; This test checks that we are not instrumenting globals in llvm.metadata
-; and other llvm internal globals.
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-@.str_noinst = private unnamed_addr constant [4 x i8] c"aaa\00", section "llvm.metadata"
-@.str_noinst_prof = private unnamed_addr constant [4 x i8] c"aaa\00", section "__llvm_prf_data"
-@.str_inst = private unnamed_addr constant [4 x i8] c"aaa\00"
-
-; CHECK-NOT: {{asan_gen.*str_noinst}}
-; CHECK-NOT: {{asan_gen.*str_noinst_prof}}
-; CHECK: {{asan_gen.*str_inst}}
-; CHECK: @asan.module_ctor
diff --git a/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll b/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll
index fcc166e966e1..24141ee2190c 100644
--- a/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll
+++ b/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll
@@ -6,9 +6,9 @@ target triple = "i686-pc-windows-msvc"
; no action should be taken for these globals
$global_noinst = comdat largest
@aliasee = private unnamed_addr constant [2 x i8] [i8 1, i8 2], comdat($global_noinst)
-@global_noinst = unnamed_addr alias [2 x i8]* @aliasee
+@global_noinst = unnamed_addr alias [2 x i8], [2 x i8]* @aliasee
; CHECK-NOT: {{asan_gen.*global_noinst}}
-; CHECK-DAG: @global_noinst = unnamed_addr alias [2 x i8]* @aliasee
+; CHECK-DAG: @global_noinst = unnamed_addr alias [2 x i8], [2 x i8]* @aliasee
@global_inst = private constant [2 x i8] [i8 1, i8 2]
; CHECK-DAG: {{asan_gen.*global_inst}}
; CHECK: @asan.module_ctor
diff --git a/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll b/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
index ceaf0e6fcfb6..f6354b1ee59d 100644
--- a/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
@@ -7,8 +7,10 @@ target triple = "x86_64-unknown-linux-gnu"
define void @foo(i32 %len) sanitize_address {
entry:
+; CHECK-ALLOCA-LABEL: define void @foo
; CHECK-ALLOCA: __asan_alloca_poison
; CHECK-ALLOCA: __asan_allocas_unpoison
+; CHECK-ALLOCA: ret void
%0 = alloca i32, align 4
%1 = alloca i8*
store volatile i32 %len, i32* %0, align 4
@@ -19,3 +21,17 @@ entry:
ret void
}
+; Test that dynamic alloca is not used for inalloca variables.
+define void @has_inalloca() uwtable sanitize_address {
+; CHECK-ALLOCA-LABEL: define void @has_inalloca
+; CHECK-ALLOCA-NOT: __asan_alloca_poison
+; CHECK-ALLOCA-NOT: __asan_alloca_unpoison
+; CHECK-ALLOCA: ret void
+entry:
+ %t = alloca inalloca i32
+ store i32 42, i32* %t
+ call void @pass_inalloca(i32* inalloca %t)
+ ret void
+}
+
+declare void @pass_inalloca(i32* inalloca)
diff --git a/test/Instrumentation/AddressSanitizer/keep_going.ll b/test/Instrumentation/AddressSanitizer/keep_going.ll
new file mode 100644
index 000000000000..4bb59e74e8f1
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/keep_going.ll
@@ -0,0 +1,14 @@
+; Test asan internal compiler flags:
+; -asan-recover=1
+
+; RUN: opt < %s -asan -asan-recover -asan-module -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i32* %p) sanitize_address {
+; CHECK: __asan_report_load4_noabort
+; CHECK-NOT: unreachable
+ %1 = load i32, i32* %p, align 4
+ ret i32 %1
+}
+
diff --git a/test/Instrumentation/AddressSanitizer/localescape.ll b/test/Instrumentation/AddressSanitizer/localescape.ll
new file mode 100644
index 000000000000..d9daa8c96b13
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/localescape.ll
@@ -0,0 +1,86 @@
+; RUN: opt < %s -asan -asan-module -asan-use-after-return -asan-stack-dynamic-alloca -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -asan-stack-dynamic-alloca=0 -S | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+declare i32 @llvm.eh.typeid.for(i8*) #2
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
+declare i8* @llvm.localrecover(i8*, i8*, i32)
+declare void @llvm.localescape(...) #1
+
+declare i32 @_except_handler3(...)
+declare void @may_throw(i32* %r)
+
+define i32 @main() sanitize_address personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+ %r = alloca i32, align 4
+ %__exception_code = alloca i32, align 4
+ call void (...) @llvm.localescape(i32* nonnull %__exception_code)
+ %0 = bitcast i32* %r to i8*
+ store i32 0, i32* %r, align 4
+ invoke void @may_throw(i32* nonnull %r) #4
+ to label %__try.cont unwind label %lpad
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 }
+ catch i8* bitcast (i32 ()* @"\01?filt$0@0@main@@" to i8*)
+ %2 = extractvalue { i8*, i32 } %1, 1
+ %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @"\01?filt$0@0@main@@" to i8*)) #1
+ %matches = icmp eq i32 %2, %3
+ br i1 %matches, label %__except, label %eh.resume
+
+__except: ; preds = %lpad
+ store i32 1, i32* %r, align 4
+ br label %__try.cont
+
+__try.cont: ; preds = %entry, %__except
+ %4 = load i32, i32* %r, align 4
+ ret i32 %4
+
+eh.resume: ; preds = %lpad
+ resume { i8*, i32 } %1
+}
+
+; Check that the alloca remains static and the localescape call remains in the
+; entry block.
+
+; CHECK-LABEL: define i32 @main()
+; CHECK-NOT: br {{.*}}label
+; CHECK: %__exception_code = alloca i32, align 4
+; CHECK-NOT: br {{.*}}label
+; CHECK: call void (...) @llvm.localescape(i32* nonnull %__exception_code)
+
+; Function Attrs: nounwind
+define internal i32 @"\01?filt$0@0@main@@"() #1 {
+entry:
+ %0 = tail call i8* @llvm.frameaddress(i32 1)
+ %1 = tail call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %0)
+ %2 = tail call i8* @llvm.localrecover(i8* bitcast (i32 ()* @main to i8*), i8* %1, i32 0)
+ %__exception_code = bitcast i8* %2 to i32*
+ %3 = getelementptr inbounds i8, i8* %0, i32 -20
+ %4 = bitcast i8* %3 to { i32*, i8* }**
+ %5 = load { i32*, i8* }*, { i32*, i8* }** %4, align 4
+ %6 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %5, i32 0, i32 0
+ %7 = load i32*, i32** %6, align 4
+ %8 = load i32, i32* %7, align 4
+ store i32 %8, i32* %__exception_code, align 4
+ ret i32 1
+}
+
+; CHECK-LABEL: define internal i32 @"\01?filt$0@0@main@@"()
+; CHECK: tail call i8* @llvm.localrecover(i8* bitcast (i32 ()* @main to i8*), i8* {{.*}}, i32 0)
+
+define void @ScaleFilterCols_SSSE3(i8* %dst_ptr, i8* %src_ptr, i32 %dst_width, i32 %x, i32 %dx) sanitize_address {
+entry:
+ %dst_width.addr = alloca i32, align 4
+ store i32 %dst_width, i32* %dst_width.addr, align 4
+ %0 = call { i8*, i8*, i32, i32, i32 } asm sideeffect "", "=r,=r,={ax},=r,=r,=*rm,rm,rm,0,1,2,3,4,5,~{memory},~{cc},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{dirflag},~{fpsr},~{flags}"(i32* nonnull %dst_width.addr, i32 %x, i32 %dx, i8* %dst_ptr, i8* %src_ptr, i32 0, i32 0, i32 0, i32 %dst_width)
+ ret void
+}
+
+define void @ScaleColsUp2_SSE2() sanitize_address {
+entry:
+ ret void
+}
diff --git a/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll b/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
index 9c058742dbf5..a8292204d27e 100644
--- a/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
+++ b/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
@@ -44,3 +44,34 @@ entry:
call void asm sideeffect "mov %%rbx, %%rcx", "~{dirflag},~{fpsr},~{flags}"() nounwind
ret void
}
+
+; Test that dynamic alloca is not used when setjmp is present.
+%struct.__jmp_buf_tag = type { [8 x i64], i32, %struct.__sigset_t }
+%struct.__sigset_t = type { [16 x i64] }
+@_ZL3buf = internal global [1 x %struct.__jmp_buf_tag] zeroinitializer, align 16
+
+define void @Func3() uwtable sanitize_address {
+; CHECK-LABEL: define void @Func3
+; CHECK-NOT: __asan_option_detect_stack_use_after_return
+; CHECK-NOT: __asan_stack_malloc
+; CHECK: call void @__asan_handle_no_return
+; CHECK: call void @longjmp
+; CHECK: ret void
+entry:
+ %a = alloca i32, align 4
+ %call = call i32 @_setjmp(%struct.__jmp_buf_tag* getelementptr inbounds ([1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* @_ZL3buf, i32 0, i32 0)) nounwind returns_twice
+ %cmp = icmp eq i32 0, %call
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ call void @longjmp(%struct.__jmp_buf_tag* getelementptr inbounds ([1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* @_ZL3buf, i32 0, i32 0), i32 1) noreturn nounwind
+ unreachable
+
+if.end: ; preds = %entry
+ call void @_Z10escape_ptrPi(i32* %a)
+ ret void
+}
+
+declare i32 @_setjmp(%struct.__jmp_buf_tag*) nounwind returns_twice
+declare void @longjmp(%struct.__jmp_buf_tag*, i32) noreturn nounwind
+declare void @_Z10escape_ptrPi(i32*)
diff --git a/test/Instrumentation/AddressSanitizer/twice.ll b/test/Instrumentation/AddressSanitizer/twice.ll
new file mode 100644
index 000000000000..9f7826f73952
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/twice.ll
@@ -0,0 +1,8 @@
+; Check that the address sanitizer pass can be reused
+; RUN: opt < %s -S -run-twice -asan
+
+define void @foo(i64* %b) nounwind uwtable sanitize_address {
+ entry:
+ store i64 0, i64* %b, align 1
+ ret void
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/abilist.ll b/test/Instrumentation/DataFlowSanitizer/abilist.ll
index 90103f8b9804..8b30875a03fa 100644
--- a/test/Instrumentation/DataFlowSanitizer/abilist.ll
+++ b/test/Instrumentation/DataFlowSanitizer/abilist.ll
@@ -17,7 +17,7 @@ define i32 @functional(i32 %a, i32 %b) {
; CHECK: %[[CALL:.*]] = call { i32 (i32, i32)*, i16 } @"dfs$g"(i32 %0, i16 0)
; CHECK: %[[XVAL:.*]] = extractvalue { i32 (i32, i32)*, i16 } %[[CALL]], 0
; CHECK: ret {{.*}} %[[XVAL]]
-@discardg = alias i32 (i32, i32)* (i32)* @g
+@discardg = alias i32 (i32, i32)* (i32), i32 (i32, i32)* (i32)* @g
declare void @custom1(i32 %a, i32 %b)
@@ -83,7 +83,7 @@ define i32 (i32, i32)* @g(i32) {
; CHECK: %[[IVAL0:.*]] = insertvalue { i32, i16 } undef, i32 %[[CALL]], 0
; CHECK: %[[IVAL1:.*]] = insertvalue { i32, i16 } %[[IVAL0]], i16 0, 1
; CHECK: ret { i32, i16 } %[[IVAL1]]
-@adiscard = alias i32 (i32, i32)* @discard
+@adiscard = alias i32 (i32, i32), i32 (i32, i32)* @discard
; CHECK: declare void @__dfsw_custom1(i32, i32, i16, i16)
; CHECK: declare i32 @__dfsw_custom2(i32, i32, i16, i16, i16*)
diff --git a/test/Instrumentation/DataFlowSanitizer/debug.ll b/test/Instrumentation/DataFlowSanitizer/debug.ll
index c18f5920fa3f..8fac157afd4e 100644
--- a/test/Instrumentation/DataFlowSanitizer/debug.ll
+++ b/test/Instrumentation/DataFlowSanitizer/debug.ll
@@ -1,6 +1,7 @@
; RUN: opt < %s -dfsan -dfsan-abilist=%S/Inputs/debuglist.txt -S | FileCheck %s
-; CHECK: !DISubprogram(name: "main",{{.*}} function: i32 ()* @main{{[,)]}}
+; CHECK: define i32 @main() {{.*}} !dbg [[SP:![0-9]+]]
+; CHECK: [[SP]] = distinct !DISubprogram(name: "main"
; Generated from a simple source file compiled with clang -g:
; int main() {
@@ -10,7 +11,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: nounwind uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
entry:
ret i32 0, !dbg !12
}
@@ -21,11 +22,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "debug.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "debug.cpp", directory: "/tmp/dbginfo")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
diff --git a/test/Instrumentation/DataFlowSanitizer/external_mask.ll b/test/Instrumentation/DataFlowSanitizer/external_mask.ll
new file mode 100644
index 000000000000..db4c14ed9580
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/external_mask.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -dfsan -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define i32 @test(i32 %a, i32* nocapture readonly %b) #0 {
+; CHECK: @"dfs$test"
+; CHECK: %[[RV:.*]] load{{.*}}__dfsan_shadow_ptr_mask
+; CHECK: ptrtoint i32* {{.*}} to i64
+; CHECK: and {{.*}}%[[RV:.*]]
+; CHECK: mul i64
+ %1 = load i32, i32* %b, align 4
+ %2 = add nsw i32 %1, %a
+ ret i32 %2
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
index b14de5f9e513..a6cafd35e0b4 100644
--- a/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
+++ b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
@@ -7,10 +7,10 @@ target triple = "x86_64-unknown-linux-gnu"
module asm ".symver f1,f@@version1"
; CHECK: @"dfs$f2" = alias {{.*}} @"dfs$f1"
-@f2 = alias void ()* @f1
+@f2 = alias void (), void ()* @f1
; CHECK: @"dfs$g2" = alias {{.*}} @"dfs$g1"
-@g2 = alias bitcast (void (i8*)* @g1 to void (i16*)*)
+@g2 = alias void (i16*), bitcast (void (i8*)* @g1 to void (i16*)*)
; CHECK: define void @"dfs$f1"
define void @f1() {
diff --git a/test/Instrumentation/InstrProfiling/PR23499.ll b/test/Instrumentation/InstrProfiling/PR23499.ll
index 5aae735120be..b10aeade2d09 100644
--- a/test/Instrumentation/InstrProfiling/PR23499.ll
+++ b/test/Instrumentation/InstrProfiling/PR23499.ll
@@ -1,21 +1,28 @@
-;; Check that data associated with linkonce odr functions are placed in
-;; the same comdat section as their associated function.
+;; Check that PGO instrumented variables associated with linkonce odr
+;; functions are placed in the same comdat section.
+
; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s
; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -instrprof -S | FileCheck %s --check-prefix=COFF
$_Z3barIvEvv = comdat any
-@__llvm_profile_name__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", align 1
+@__profn__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", align 1
+
+; CHECK: @__profn__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", section "{{.*}}__llvm_prf_names", comdat($__profv__Z3barIvEvv), align 1
+; CHECK: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat($__profv__Z3barIvEvv), align 8
+; CHECK: @__profd__Z3barIvEvv = linkonce_odr hidden global { i32, i32, i64, i8*, i64*, i8*, i8*, [1 x i16] } { i32 11, i32 1, i64 0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__Z3barIvEvv, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8* null, i8* null, [1 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data", comdat($__profv__Z3barIvEvv), align 8
+
+; COFF: @__profn__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", section "{{.*}}__llvm_prf_names", comdat, align 1
+; COFF: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat($__profn__Z3barIvEvv), align 8
+; COFF: @__profd__Z3barIvEvv = linkonce_odr hidden global { i32, i32, i64, i8*, i64*, i8*, i8*, [1 x i16] } { i32 11, i32 1, i64 0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__Z3barIvEvv, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8* null, i8* null, [1 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data", comdat($__profn__Z3barIvEvv), align 8
-; CHECK: @__llvm_profile_name__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", section "{{.*}}__llvm_prf_names", comdat($_Z3barIvEvv), align 1
-; CHECK: @__llvm_profile_counters__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat($_Z3barIvEvv), align 8
-; CHECK: @__llvm_profile_data__Z3barIvEvv = linkonce_odr hidden constant { i32, i32, i64, i8*, i64* } { i32 11, i32 1, i64 0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__llvm_profile_name__Z3barIvEvv, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_profile_counters__Z3barIvEvv, i32 0, i32 0) }, section "{{.*}}__llvm_prf_data", comdat($_Z3barIvEvv), align 8
declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #1
define linkonce_odr void @_Z3barIvEvv() comdat {
entry:
- call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__llvm_profile_name__Z3barIvEvv, i32 0, i32 0), i64 0, i32 1, i32 0)
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__Z3barIvEvv, i32 0, i32 0), i64 0, i32 1, i32 0)
ret void
}
diff --git a/test/Instrumentation/InstrProfiling/linkage.ll b/test/Instrumentation/InstrProfiling/linkage.ll
index fcca7754fa8f..110838b1c8cb 100644
--- a/test/Instrumentation/InstrProfiling/linkage.ll
+++ b/test/Instrumentation/InstrProfiling/linkage.ll
@@ -1,46 +1,49 @@
;; Check that runtime symbols get appropriate linkage.
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s --check-prefix=OTHER --check-prefix=COMMON
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s --check-prefix=LINUX --check-prefix=COMMON
-@__llvm_profile_name_foo = hidden constant [3 x i8] c"foo"
-@__llvm_profile_name_foo_weak = weak hidden constant [8 x i8] c"foo_weak"
-@"__llvm_profile_name_linkage.ll:foo_internal" = internal constant [23 x i8] c"linkage.ll:foo_internal"
-@__llvm_profile_name_foo_inline = linkonce_odr hidden constant [10 x i8] c"foo_inline"
+@__profn_foo = hidden constant [3 x i8] c"foo"
+@__profn_foo_weak = weak hidden constant [8 x i8] c"foo_weak"
+@"__profn_linkage.ll:foo_internal" = internal constant [23 x i8] c"linkage.ll:foo_internal"
+@__profn_foo_inline = linkonce_odr hidden constant [10 x i8] c"foo_inline"
-; CHECK: @__llvm_profile_counters_foo = hidden global
-; CHECK: @__llvm_profile_data_foo = hidden constant
+; COMMON: @__profc_foo = hidden global
+; COMMON: @__profd_foo = hidden global
define void @foo() {
- call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
ret void
}
-; CHECK: @__llvm_profile_counters_foo_weak = weak hidden global
-; CHECK: @__llvm_profile_data_foo_weak = weak hidden constant
+; COMMON: @__profc_foo_weak = weak hidden global
+; COMMON: @__profd_foo_weak = weak hidden global
define weak void @foo_weak() {
- call void @llvm.instrprof.increment(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @__llvm_profile_name_foo_weak, i32 0, i32 0), i64 0, i32 1, i32 0)
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @__profn_foo_weak, i32 0, i32 0), i64 0, i32 1, i32 0)
ret void
}
-; CHECK: @"__llvm_profile_counters_linkage.ll:foo_internal" = internal global
-; CHECK: @"__llvm_profile_data_linkage.ll:foo_internal" = internal constant
+; COMMON: @"__profc_linkage.ll:foo_internal" = internal global
+; COMMON: @"__profd_linkage.ll:foo_internal" = internal global
define internal void @foo_internal() {
- call void @llvm.instrprof.increment(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @"__llvm_profile_name_linkage.ll:foo_internal", i32 0, i32 0), i64 0, i32 1, i32 0)
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @"__profn_linkage.ll:foo_internal", i32 0, i32 0), i64 0, i32 1, i32 0)
ret void
}
-; CHECK: @__llvm_profile_counters_foo_inline = linkonce_odr hidden global
-; CHECK: @__llvm_profile_data_foo_inline = linkonce_odr hidden constant
+; COMMON: @__profc_foo_inline = linkonce_odr hidden global
+; COMMON: @__profd_foo_inline = linkonce_odr hidden global
define linkonce_odr void @foo_inline() {
- call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__llvm_profile_name_foo_inline, i32 0, i32 0), i64 0, i32 1, i32 0)
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__profn_foo_inline, i32 0, i32 0), i64 0, i32 1, i32 0)
ret void
}
declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
-; CHECK: @__llvm_profile_runtime = external global i32
+; OTHER: @__llvm_profile_runtime = external global i32
+; LINUX-NOT: @__llvm_profile_runtime = external global i32
-; CHECK: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} {
-; CHECK: %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime
-; CHECK: ret i32 %[[REG]]
-; CHECK: }
+; OTHER: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} {
+; OTHER: %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime
+; OTHER: ret i32 %[[REG]]
+; OTHER: }
+; LINUX-NOT: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} {
+; LINUX-NOT: %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime
diff --git a/test/Instrumentation/InstrProfiling/no-counters.ll b/test/Instrumentation/InstrProfiling/no-counters.ll
index 0716b0d8c6f2..402fb65bc750 100644
--- a/test/Instrumentation/InstrProfiling/no-counters.ll
+++ b/test/Instrumentation/InstrProfiling/no-counters.ll
@@ -1,8 +1,8 @@
;; No instrumentation should be emitted if there are no counter increments.
; RUN: opt < %s -instrprof -S | FileCheck %s
-; CHECK-NOT: @__llvm_profile_counters
-; CHECK-NOT: @__llvm_profile_data
+; CHECK-NOT: @__profc
+; CHECK-NOT: @__profd
; CHECK-NOT: @__llvm_profile_runtime
define void @foo() {
diff --git a/test/Instrumentation/InstrProfiling/noruntime.ll b/test/Instrumentation/InstrProfiling/noruntime.ll
index f0619c8686ff..17636cecdc22 100644
--- a/test/Instrumentation/InstrProfiling/noruntime.ll
+++ b/test/Instrumentation/InstrProfiling/noruntime.ll
@@ -6,10 +6,10 @@
@__llvm_profile_runtime = global i32 0, align 4
-@__llvm_profile_name_foo = hidden constant [3 x i8] c"foo"
+@__profn_foo = hidden constant [3 x i8] c"foo"
define void @foo() {
- call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
ret void
}
diff --git a/test/Instrumentation/InstrProfiling/platform.ll b/test/Instrumentation/InstrProfiling/platform.ll
index 5f16cba91d91..4307349c8206 100644
--- a/test/Instrumentation/InstrProfiling/platform.ll
+++ b/test/Instrumentation/InstrProfiling/platform.ll
@@ -1,19 +1,24 @@
;; Checks for platform specific section names and initialization code.
; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s -check-prefix=MACHO
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s -check-prefix=ELF
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s -check-prefix=LINUX
+; RUN: opt < %s -mtriple=x86_64-unknown-freebsd -instrprof -S | FileCheck %s -check-prefix=FREEBSD
+; RUN: opt < %s -mtriple=x86_64-pc-solaris -instrprof -S | FileCheck %s -check-prefix=SOLARIS
-@__llvm_profile_name_foo = hidden constant [3 x i8] c"foo"
-; MACHO: @__llvm_profile_name_foo = hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
-; ELF: @__llvm_profile_name_foo = hidden constant [3 x i8] c"foo", section "__llvm_prf_names", align 1
+@__profn_foo = hidden constant [3 x i8] c"foo"
+; MACHO: @__profn_foo = hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
+; ELF: @__profn_foo = hidden constant [3 x i8] c"foo", section "__llvm_prf_names", align 1
-; MACHO: @__llvm_profile_counters_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
-; ELF: @__llvm_profile_counters_foo = hidden global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
+; MACHO: @__profc_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
+; ELF: @__profc_foo = hidden global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
+
+; MACHO: @__profd_foo = hidden {{.*}}, section "__DATA,__llvm_prf_data", align 8
+; LINUX: @__profd_foo = hidden {{.*}}, section "__llvm_prf_data", align 8
+; FREEBSD: @__profd_foo = hidden {{.*}}, section "__llvm_prf_data", align 8
+; SOLARIS: @__profd_foo = hidden {{.*}}, section "__llvm_prf_data", align 8
-; MACHO: @__llvm_profile_data_foo = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
-; ELF: @__llvm_profile_data_foo = hidden constant {{.*}}, section "__llvm_prf_data", align 8
define void @foo() {
- call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
ret void
}
@@ -23,7 +28,11 @@ declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
;; symbols by their sections.
; MACHO-NOT: define internal void @__llvm_profile_register_functions
-; ELF: define internal void @__llvm_profile_register_functions
+; LINUX-NOT: define internal void @__llvm_profile_register_functions
+; FREEBSD-NOT: define internal void @__llvm_profile_register_functions
+; SOLARIS: define internal void @__llvm_profile_register_functions
; MACHO-NOT: define internal void @__llvm_profile_init
-; ELF: define internal void @__llvm_profile_init
+; LINUX-NOT: define internal void @__llvm_profile_init
+; FREEBSD-NOT: define internal void @__llvm_profile_init
+; SOLARIS: define internal void @__llvm_profile_init
diff --git a/test/Instrumentation/InstrProfiling/profiling.ll b/test/Instrumentation/InstrProfiling/profiling.ll
index 52a6eadbff09..5f2a1bc6aae3 100644
--- a/test/Instrumentation/InstrProfiling/profiling.ll
+++ b/test/Instrumentation/InstrProfiling/profiling.ll
@@ -2,37 +2,37 @@
target triple = "x86_64-apple-macosx10.10.0"
-@__llvm_profile_name_foo = hidden constant [3 x i8] c"foo"
-; CHECK: @__llvm_profile_name_foo = hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
-@__llvm_profile_name_bar = hidden constant [4 x i8] c"bar\00"
-; CHECK: @__llvm_profile_name_bar = hidden constant [4 x i8] c"bar\00", section "__DATA,__llvm_prf_names", align 1
-@baz_prof_name = hidden constant [3 x i8] c"baz"
-; CHECK: @baz_prof_name = hidden constant [3 x i8] c"baz", section "__DATA,__llvm_prf_names", align 1
+@__profn_foo = hidden constant [3 x i8] c"foo"
+; CHECK: @__profn_foo = hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
+@__profn_bar = hidden constant [4 x i8] c"bar\00"
+; CHECK: @__profn_bar = hidden constant [4 x i8] c"bar\00", section "__DATA,__llvm_prf_names", align 1
+@__profn_baz = hidden constant [3 x i8] c"baz"
+; CHECK: @__profn_baz = hidden constant [3 x i8] c"baz", section "__DATA,__llvm_prf_names", align 1
-; CHECK: @__llvm_profile_counters_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
-; CHECK: @__llvm_profile_data_foo = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
+; CHECK: @__profc_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
+; CHECK: @__profd_foo = hidden {{.*}}, section "__DATA,__llvm_prf_data", align 8
define void @foo() {
- call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
ret void
}
-; CHECK: @__llvm_profile_counters_bar = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
-; CHECK: @__llvm_profile_data_bar = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
+; CHECK: @__profc_bar = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
+; CHECK: @__profd_bar = hidden {{.*}}, section "__DATA,__llvm_prf_data", align 8
define void @bar() {
- call void @llvm.instrprof.increment(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__llvm_profile_name_bar, i32 0, i32 0), i64 0, i32 1, i32 0)
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_bar, i32 0, i32 0), i64 0, i32 1, i32 0)
ret void
}
-; CHECK: @__llvm_profile_counters_baz = hidden global [3 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
-; CHECK: @__llvm_profile_data_baz = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
+; CHECK: @__profc_baz = hidden global [3 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
+; CHECK: @__profd_baz = hidden {{.*}}, section "__DATA,__llvm_prf_data", align 8
define void @baz() {
- call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @baz_prof_name, i32 0, i32 0), i64 0, i32 3, i32 0)
- call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @baz_prof_name, i32 0, i32 0), i64 0, i32 3, i32 1)
- call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @baz_prof_name, i32 0, i32 0), i64 0, i32 3, i32 2)
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_baz, i32 0, i32 0), i64 0, i32 3, i32 0)
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_baz, i32 0, i32 0), i64 0, i32 3, i32 1)
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_baz, i32 0, i32 0), i64 0, i32 3, i32 2)
ret void
}
declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
; CHECK: @__llvm_profile_runtime = external global i32
-; CHECK: @llvm.used = appending global {{.*}} @__llvm_profile_data_foo {{.*}} @__llvm_profile_data_bar {{.*}} @__llvm_profile_data_baz {{.*}} section "llvm.metadata"
+; CHECK: @llvm.used = appending global {{.*}} @__profd_foo {{.*}} @__profd_bar {{.*}} @__profd_baz {{.*}} section "llvm.metadata"
diff --git a/test/Instrumentation/MemorySanitizer/AArch64/vararg.ll b/test/Instrumentation/MemorySanitizer/AArch64/vararg.ll
new file mode 100644
index 000000000000..d58fbac56215
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/AArch64/vararg.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -msan -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+%struct.__va_list = type { i8*, i8*, i8*, i32, i32 }
+
+define i32 @foo(i32 %guard, ...) {
+ %vl = alloca %struct.__va_list, align 8
+ %1 = bitcast %struct.__va_list* %vl to i8*
+ call void @llvm.lifetime.start(i64 32, i8* %1)
+ call void @llvm.va_start(i8* %1)
+ call void @llvm.va_end(i8* %1)
+ call void @llvm.lifetime.end(i64 32, i8* %1)
+ ret i32 0
+}
+
+; First check if the variadic shadow values are saved in stack with correct
+; size (192 is total of general purpose registers size, 56, rounded to 16
+; plus total of floating-point registers size, 128).
+
+; CHECK-LABEL: @foo
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls
+; CHECK: [[B:%.*]] = add i64 192, [[A]]
+; CHECK: alloca {{.*}} [[B]]
+
+; We expect three memcpy operations: one for the general purpose registers,
+; one for floating-point/SIMD ones, and one for thre remaining arguments.
+
+; Propagate the GR shadow values on for the va_list::__gp_top, adjust the
+; offset in the __msan_va_arg_tls based on va_list:__gp_off, and finally
+; issue the memcpy.
+; CHECK: [[GRP:%.*]] = getelementptr inbounds i8, i8* {{%.*}}, i64 {{%.*}}
+; CHECK: [[GRSIZE:%.*]] = sub i64 56, {{%.*}}
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{%.*}}, i8* [[GRP]], i64 [[GRSIZE]], i32 8, i1 false)
+
+; Propagate the VR shadow values on for the va_list::__vr_top, adjust the
+; offset in the __msan_va_arg_tls based on va_list:__vr_off, and finally
+; issue the memcpy.
+; CHECK: [[VRP:%.*]] = getelementptr inbounds i8, i8* {{%.*}}, i64 {{%.*}}
+; CHECK: [[VRSIZE:%.*]] = sub i64 128, {{%.*}}
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{%.*}}, i8* [[VRP]], i64 [[VRSIZE]], i32 8, i1 false)
+
+; Copy the remaining shadow values on the va_list::__stack position (it is
+; on the constant offset of 192 from __msan_va_arg_tls).
+; CHECK: [[STACK:%.*]] = getelementptr inbounds i8, i8* {{%.*}}, i32 192
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{%.*}}, i8* [[STACK]], i64 {{%.*}}, i32 16, i1 false)
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+declare void @llvm.va_start(i8*) #2
+declare void @llvm.va_end(i8*) #2
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+define i32 @bar() {
+ %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i32 2, double 3.000000e+00,
+ double 4.000000e+00, i32 5, i32 6,
+ double 7.000000e+00, i32 8, i32 9, i32 10, i32 11)
+ ret i32 %1
+}
+
+; Save the incoming shadow value from the arguments in the __msan_va_arg_tls
+; array. General purpose registers are saved at positions from 0 to 56, Floating
+; point and SIMD are saved from 64 to 192, and the remaining from 192.
+; CHECK-LABEL: @bar
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 8
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 64
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 80
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 16
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 24
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 96
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 32
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 40
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 48
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 192
+; CHECK: store {{.*}} 8, {{.*}} @__msan_va_arg_overflow_size_tls
diff --git a/test/Instrumentation/MemorySanitizer/atomics.ll b/test/Instrumentation/MemorySanitizer/atomics.ll
index e896eaebdd3d..8033ed1e2d46 100644
--- a/test/Instrumentation/MemorySanitizer/atomics.ll
+++ b/test/Instrumentation/MemorySanitizer/atomics.ll
@@ -13,7 +13,7 @@ entry:
ret i32 %0
}
-; CHECK: @AtomicRmwXchg
+; CHECK-LABEL: @AtomicRmwXchg
; CHECK: store i32 0,
; CHECK: atomicrmw xchg {{.*}} seq_cst
; CHECK: store i32 0, {{.*}} @__msan_retval_tls
@@ -28,7 +28,7 @@ entry:
ret i32 %0
}
-; CHECK: @AtomicRmwMax
+; CHECK-LABEL: @AtomicRmwMax
; CHECK: store i32 0,
; CHECK: atomicrmw max {{.*}} seq_cst
; CHECK: store i32 0, {{.*}} @__msan_retval_tls
@@ -44,7 +44,7 @@ entry:
ret i32 %0
}
-; CHECK: @Cmpxchg
+; CHECK-LABEL: @Cmpxchg
; CHECK: store { i32, i1 } zeroinitializer,
; CHECK: icmp
; CHECK: br
@@ -63,7 +63,7 @@ entry:
ret i32 %0
}
-; CHECK: @CmpxchgMonotonic
+; CHECK-LABEL: @CmpxchgMonotonic
; CHECK: store { i32, i1 } zeroinitializer,
; CHECK: icmp
; CHECK: br
@@ -81,7 +81,7 @@ entry:
ret i32 %0
}
-; CHECK: @AtomicLoad
+; CHECK-LABEL: @AtomicLoad
; CHECK: load atomic i32, i32* {{.*}} seq_cst, align 16
; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32, i32* {{.*}}, align 16
; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
@@ -96,7 +96,7 @@ entry:
ret i32 %0
}
-; CHECK: @AtomicLoadAcquire
+; CHECK-LABEL: @AtomicLoadAcquire
; CHECK: load atomic i32, i32* {{.*}} acquire, align 16
; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32, i32* {{.*}}, align 16
; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
@@ -111,7 +111,7 @@ entry:
ret i32 %0
}
-; CHECK: @AtomicLoadMonotonic
+; CHECK-LABEL: @AtomicLoadMonotonic
; CHECK: load atomic i32, i32* {{.*}} acquire, align 16
; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32, i32* {{.*}}, align 16
; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
@@ -126,7 +126,7 @@ entry:
ret i32 %0
}
-; CHECK: @AtomicLoadUnordered
+; CHECK-LABEL: @AtomicLoadUnordered
; CHECK: load atomic i32, i32* {{.*}} acquire, align 16
; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32, i32* {{.*}}, align 16
; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
@@ -141,7 +141,7 @@ entry:
ret void
}
-; CHECK: @AtomicStore
+; CHECK-LABEL: @AtomicStore
; CHECK-NOT: @__msan_param_tls
; CHECK: store i32 0, i32* {{.*}}, align 16
; CHECK: store atomic i32 %x, i32* %p seq_cst, align 16
@@ -156,7 +156,7 @@ entry:
ret void
}
-; CHECK: @AtomicStoreRelease
+; CHECK-LABEL: @AtomicStoreRelease
; CHECK-NOT: @__msan_param_tls
; CHECK: store i32 0, i32* {{.*}}, align 16
; CHECK: store atomic i32 %x, i32* %p release, align 16
@@ -171,7 +171,7 @@ entry:
ret void
}
-; CHECK: @AtomicStoreMonotonic
+; CHECK-LABEL: @AtomicStoreMonotonic
; CHECK-NOT: @__msan_param_tls
; CHECK: store i32 0, i32* {{.*}}, align 16
; CHECK: store atomic i32 %x, i32* %p release, align 16
@@ -186,7 +186,7 @@ entry:
ret void
}
-; CHECK: @AtomicStoreUnordered
+; CHECK-LABEL: @AtomicStoreUnordered
; CHECK-NOT: @__msan_param_tls
; CHECK: store i32 0, i32* {{.*}}, align 16
; CHECK: store atomic i32 %x, i32* %p release, align 16
diff --git a/test/Instrumentation/MemorySanitizer/check_access_address.ll b/test/Instrumentation/MemorySanitizer/check_access_address.ll
index 5e1a3f4442f2..723d6f0cd344 100644
--- a/test/Instrumentation/MemorySanitizer/check_access_address.ll
+++ b/test/Instrumentation/MemorySanitizer/check_access_address.ll
@@ -12,7 +12,7 @@ entry:
ret <2 x i64> %x
}
-; CHECK: @ByValArgumentShadowLargeAlignment
+; CHECK-LABEL: @ByValArgumentShadowLargeAlignment
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 16, i32 8, i1 false)
; CHECK: ret <2 x i64>
@@ -23,6 +23,6 @@ entry:
ret i16 %x
}
-; CHECK: @ByValArgumentShadowSmallAlignment
+; CHECK-LABEL: @ByValArgumentShadowSmallAlignment
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 2, i32 2, i1 false)
; CHECK: ret i16
diff --git a/test/Instrumentation/MemorySanitizer/msan_basic.ll b/test/Instrumentation/MemorySanitizer/msan_basic.ll
index 8b8e29709599..cacc9b749dd2 100644
--- a/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -28,7 +28,7 @@ entry:
ret void
}
-; CHECK: @Store
+; CHECK-LABEL: @Store
; CHECK: load {{.*}} @__msan_param_tls
; CHECK-ORIGINS: load {{.*}} @__msan_param_origin_tls
; CHECK: store
@@ -52,7 +52,7 @@ entry:
ret void
}
-; CHECK: @AlignedStore
+; CHECK-LABEL: @AlignedStore
; CHECK: load {{.*}} @__msan_param_tls
; CHECK-ORIGINS: load {{.*}} @__msan_param_origin_tls
; CHECK: store {{.*}} align 32
@@ -83,7 +83,7 @@ if.end: ; preds = %entry, %if.then
declare void @foo(...)
-; CHECK: @LoadAndCmp
+; CHECK-LABEL: @LoadAndCmp
; CHECK: = load
; CHECK: = load
; CHECK: call void @__msan_warning_noreturn()
@@ -97,7 +97,7 @@ entry:
ret i32 123
}
-; CHECK: @ReturnInt
+; CHECK-LABEL: @ReturnInt
; CHECK: store i32 0,{{.*}}__msan_retval_tls
; CHECK: ret i32
@@ -109,7 +109,7 @@ entry:
ret void
}
-; CHECK: @CopyRetVal
+; CHECK-LABEL: @CopyRetVal
; CHECK: load{{.*}}__msan_retval_tls
; CHECK: store
; CHECK: store
@@ -136,7 +136,7 @@ entry:
ret void
}
-; CHECK: @FuncWithPhi
+; CHECK-LABEL: @FuncWithPhi
; CHECK: = phi
; CHECK-NEXT: = phi
; CHECK: store
@@ -152,7 +152,7 @@ entry:
ret void
}
-; CHECK: @ShlConst
+; CHECK-LABEL: @ShlConst
; CHECK: = load
; CHECK: = load
; CHECK: shl
@@ -170,7 +170,7 @@ entry:
ret void
}
-; CHECK: @ShlNonConst
+; CHECK-LABEL: @ShlNonConst
; CHECK: = load
; CHECK: = load
; CHECK: = sext i1
@@ -187,7 +187,7 @@ entry:
ret void
}
-; CHECK: @SExt
+; CHECK-LABEL: @SExt
; CHECK: = load
; CHECK: = load
; CHECK: = sext
@@ -206,7 +206,7 @@ entry:
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
-; CHECK: @MemSet
+; CHECK-LABEL: @MemSet
; CHECK: call i8* @__msan_memset
; CHECK: ret void
@@ -220,7 +220,7 @@ entry:
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-; CHECK: @MemCpy
+; CHECK-LABEL: @MemCpy
; CHECK: call i8* @__msan_memcpy
; CHECK: ret void
@@ -234,7 +234,7 @@ entry:
declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-; CHECK: @MemMove
+; CHECK-LABEL: @MemMove
; CHECK: call i8* @__msan_memmove
; CHECK: ret void
@@ -247,7 +247,7 @@ entry:
ret i32 %cond
}
-; CHECK: @Select
+; CHECK-LABEL: @Select
; CHECK: select i1
; CHECK-DAG: or i32
; CHECK-DAG: xor i32
@@ -271,7 +271,7 @@ entry:
ret <8 x i16> %cond
}
-; CHECK: @SelectVector
+; CHECK-LABEL: @SelectVector
; CHECK: select <8 x i1>
; CHECK-DAG: or <8 x i16>
; CHECK-DAG: xor <8 x i16>
@@ -295,7 +295,7 @@ entry:
ret <8 x i16> %cond
}
-; CHECK: @SelectVector2
+; CHECK-LABEL: @SelectVector2
; CHECK: select i1
; CHECK-DAG: or <8 x i16>
; CHECK-DAG: xor <8 x i16>
@@ -313,7 +313,7 @@ entry:
ret { i64, i64 } %c
}
-; CHECK: @SelectStruct
+; CHECK-LABEL: @SelectStruct
; CHECK: select i1 {{.*}}, { i64, i64 }
; CHECK-NEXT: select i1 {{.*}}, { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 }
; CHECK-ORIGINS: select i1
@@ -328,7 +328,7 @@ entry:
ret { i64*, double } %c
}
-; CHECK: @SelectStruct2
+; CHECK-LABEL: @SelectStruct2
; CHECK: select i1 {{.*}}, { i64, i64 }
; CHECK-NEXT: select i1 {{.*}}, { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 }
; CHECK-ORIGINS: select i1
@@ -343,7 +343,7 @@ entry:
ret i8* %0
}
-; CHECK: @IntToPtr
+; CHECK-LABEL: @IntToPtr
; CHECK: load i64, i64*{{.*}}__msan_param_tls
; CHECK-ORIGINS-NEXT: load i32, i32*{{.*}}__msan_param_origin_tls
; CHECK-NEXT: inttoptr
@@ -357,7 +357,7 @@ entry:
ret i8* %0
}
-; CHECK: @IntToPtr_ZExt
+; CHECK-LABEL: @IntToPtr_ZExt
; CHECK: load i16, i16*{{.*}}__msan_param_tls
; CHECK: zext
; CHECK-NEXT: inttoptr
@@ -374,7 +374,7 @@ entry:
ret i32 %div
}
-; CHECK: @Div
+; CHECK-LABEL: @Div
; CHECK: icmp
; CHECK: call void @__msan_warning
; CHECK-NOT: icmp
@@ -385,48 +385,99 @@ entry:
; Check that we propagate shadow for x<0, x>=0, etc (i.e. sign bit tests)
-define zeroext i1 @ICmpSLT(i32 %x) nounwind uwtable readnone sanitize_memory {
+define zeroext i1 @ICmpSLTZero(i32 %x) nounwind uwtable readnone sanitize_memory {
%1 = icmp slt i32 %x, 0
ret i1 %1
}
-; CHECK: @ICmpSLT
+; CHECK-LABEL: @ICmpSLTZero
; CHECK: icmp slt
; CHECK-NOT: call void @__msan_warning
; CHECK: icmp slt
; CHECK-NOT: call void @__msan_warning
; CHECK: ret i1
-define zeroext i1 @ICmpSGE(i32 %x) nounwind uwtable readnone sanitize_memory {
+define zeroext i1 @ICmpSGEZero(i32 %x) nounwind uwtable readnone sanitize_memory {
%1 = icmp sge i32 %x, 0
ret i1 %1
}
-; CHECK: @ICmpSGE
+; CHECK-LABEL: @ICmpSGEZero
; CHECK: icmp slt
; CHECK-NOT: call void @__msan_warning
; CHECK: icmp sge
; CHECK-NOT: call void @__msan_warning
; CHECK: ret i1
-define zeroext i1 @ICmpSGT(i32 %x) nounwind uwtable readnone sanitize_memory {
+define zeroext i1 @ICmpSGTZero(i32 %x) nounwind uwtable readnone sanitize_memory {
%1 = icmp sgt i32 0, %x
ret i1 %1
}
-; CHECK: @ICmpSGT
+; CHECK-LABEL: @ICmpSGTZero
; CHECK: icmp slt
; CHECK-NOT: call void @__msan_warning
; CHECK: icmp sgt
; CHECK-NOT: call void @__msan_warning
; CHECK: ret i1
-define zeroext i1 @ICmpSLE(i32 %x) nounwind uwtable readnone sanitize_memory {
+define zeroext i1 @ICmpSLEZero(i32 %x) nounwind uwtable readnone sanitize_memory {
%1 = icmp sle i32 0, %x
ret i1 %1
}
-; CHECK: @ICmpSLE
+; CHECK-LABEL: @ICmpSLEZero
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp sle
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+
+; Check that we propagate shadow for x<=-1, x>-1, etc (i.e. sign bit tests)
+
+define zeroext i1 @ICmpSLTAllOnes(i32 %x) nounwind uwtable readnone sanitize_memory {
+ %1 = icmp slt i32 -1, %x
+ ret i1 %1
+}
+
+; CHECK-LABEL: @ICmpSLTAllOnes
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+define zeroext i1 @ICmpSGEAllOnes(i32 %x) nounwind uwtable readnone sanitize_memory {
+ %1 = icmp sge i32 -1, %x
+ ret i1 %1
+}
+
+; CHECK-LABEL: @ICmpSGEAllOnes
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp sge
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+define zeroext i1 @ICmpSGTAllOnes(i32 %x) nounwind uwtable readnone sanitize_memory {
+ %1 = icmp sgt i32 %x, -1
+ ret i1 %1
+}
+
+; CHECK-LABEL: @ICmpSGTAllOnes
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp sgt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+define zeroext i1 @ICmpSLEAllOnes(i32 %x) nounwind uwtable readnone sanitize_memory {
+ %1 = icmp sle i32 %x, -1
+ ret i1 %1
+}
+
+; CHECK-LABEL: @ICmpSLEAllOnes
; CHECK: icmp slt
; CHECK-NOT: call void @__msan_warning
; CHECK: icmp sle
@@ -437,18 +488,33 @@ define zeroext i1 @ICmpSLE(i32 %x) nounwind uwtable readnone sanitize_memory {
; Check that we propagate shadow for x<0, x>=0, etc (i.e. sign bit tests)
; of the vector arguments.
-define <2 x i1> @ICmpSLT_vector(<2 x i32*> %x) nounwind uwtable readnone sanitize_memory {
+define <2 x i1> @ICmpSLT_vector_Zero(<2 x i32*> %x) nounwind uwtable readnone sanitize_memory {
%1 = icmp slt <2 x i32*> %x, zeroinitializer
ret <2 x i1> %1
}
-; CHECK: @ICmpSLT_vector
+; CHECK-LABEL: @ICmpSLT_vector_Zero
; CHECK: icmp slt <2 x i64>
; CHECK-NOT: call void @__msan_warning
; CHECK: icmp slt <2 x i32*>
; CHECK-NOT: call void @__msan_warning
; CHECK: ret <2 x i1>
+; Check that we propagate shadow for x<=-1, x>0, etc (i.e. sign bit tests)
+; of the vector arguments.
+
+define <2 x i1> @ICmpSLT_vector_AllOnes(<2 x i32> %x) nounwind uwtable readnone sanitize_memory {
+ %1 = icmp slt <2 x i32> <i32 -1, i32 -1>, %x
+ ret <2 x i1> %1
+}
+
+; CHECK-LABEL: @ICmpSLT_vector_AllOnes
+; CHECK: icmp slt <2 x i32>
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp slt <2 x i32>
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret <2 x i1>
+
; Check that we propagate shadow for unsigned relational comparisons with
; constants
@@ -459,7 +525,7 @@ entry:
ret i1 %cmp
}
-; CHECK: @ICmpUGTConst
+; CHECK-LABEL: @ICmpUGTConst
; CHECK: icmp ugt i32
; CHECK-NOT: call void @__msan_warning
; CHECK: icmp ugt i32
@@ -478,7 +544,7 @@ define i32 @ShadowLoadAlignmentLarge() nounwind uwtable sanitize_memory {
ret i32 %1
}
-; CHECK: @ShadowLoadAlignmentLarge
+; CHECK-LABEL: @ShadowLoadAlignmentLarge
; CHECK: load volatile i32, i32* {{.*}} align 64
; CHECK: load i32, i32* {{.*}} align 64
; CHECK: ret i32
@@ -489,7 +555,7 @@ define i32 @ShadowLoadAlignmentSmall() nounwind uwtable sanitize_memory {
ret i32 %1
}
-; CHECK: @ShadowLoadAlignmentSmall
+; CHECK-LABEL: @ShadowLoadAlignmentSmall
; CHECK: load volatile i32, i32* {{.*}} align 2
; CHECK: load i32, i32* {{.*}} align 2
; CHECK-ORIGINS: load i32, i32* {{.*}} align 4
@@ -505,7 +571,7 @@ define i32 @ExtractElement(<4 x i32> %vec, i32 %idx) sanitize_memory {
ret i32 %x
}
-; CHECK: @ExtractElement
+; CHECK-LABEL: @ExtractElement
; CHECK: extractelement
; CHECK: call void @__msan_warning
; CHECK: extractelement
@@ -516,7 +582,7 @@ define <4 x i32> @InsertElement(<4 x i32> %vec, i32 %idx, i32 %x) sanitize_memor
ret <4 x i32> %vec1
}
-; CHECK: @InsertElement
+; CHECK-LABEL: @InsertElement
; CHECK: insertelement
; CHECK: call void @__msan_warning
; CHECK: insertelement
@@ -528,7 +594,7 @@ define <4 x i32> @ShuffleVector(<4 x i32> %vec, <4 x i32> %vec1) sanitize_memory
ret <4 x i32> %vec2
}
-; CHECK: @ShuffleVector
+; CHECK-LABEL: @ShuffleVector
; CHECK: shufflevector
; CHECK-NOT: call void @__msan_warning
; CHECK: shufflevector
@@ -543,7 +609,7 @@ define i32 @BSwap(i32 %x) nounwind uwtable readnone sanitize_memory {
declare i32 @llvm.bswap.i32(i32) nounwind readnone
-; CHECK: @BSwap
+; CHECK-LABEL: @BSwap
; CHECK-NOT: call void @__msan_warning
; CHECK: @llvm.bswap.i32
; CHECK-NOT: call void @__msan_warning
@@ -561,7 +627,7 @@ define void @StoreIntrinsic(i8* %p, <4 x float> %x) nounwind uwtable sanitize_me
declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
-; CHECK: @StoreIntrinsic
+; CHECK-LABEL: @StoreIntrinsic
; CHECK-NOT: br
; CHECK-NOT: = or
; CHECK: store <4 x i32> {{.*}} align 1
@@ -578,7 +644,7 @@ define <16 x i8> @LoadIntrinsic(i8* %p) nounwind uwtable sanitize_memory {
declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %p) nounwind
-; CHECK: @LoadIntrinsic
+; CHECK-LABEL: @LoadIntrinsic
; CHECK: load <16 x i8>, <16 x i8>* {{.*}} align 1
; CHECK-ORIGINS: [[ORIGIN:%[01-9a-z]+]] = load i32, i32* {{.*}}
; CHECK-NOT: br
@@ -600,7 +666,7 @@ define <8 x i16> @Paddsw128(<8 x i16> %a, <8 x i16> %b) nounwind uwtable sanitiz
declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) nounwind
-; CHECK: @Paddsw128
+; CHECK-LABEL: @Paddsw128
; CHECK-NEXT: load <8 x i16>, <8 x i16>* {{.*}} @__msan_param_tls
; CHECK-ORIGINS: load i32, i32* {{.*}} @__msan_param_origin_tls
; CHECK-NEXT: load <8 x i16>, <8 x i16>* {{.*}} @__msan_param_tls
@@ -623,7 +689,7 @@ define <8 x i8*> @VectorOfPointers(<8 x i8*>* %p) nounwind uwtable sanitize_memo
ret <8 x i8*> %x
}
-; CHECK: @VectorOfPointers
+; CHECK-LABEL: @VectorOfPointers
; CHECK: load <8 x i8*>, <8 x i8*>*
; CHECK: load <8 x i64>, <8 x i64>*
; CHECK: store <8 x i64> {{.*}} @__msan_retval_tls
@@ -638,7 +704,7 @@ define void @VACopy(i8* %p1, i8* %p2) nounwind uwtable sanitize_memory {
ret void
}
-; CHECK: @VACopy
+; CHECK-LABEL: @VACopy
; CHECK: call void @llvm.memset.p0i8.i64({{.*}}, i8 0, i64 24, i32 8, i1 false)
; CHECK: ret void
@@ -661,7 +727,7 @@ entry:
ret void
}
-; CHECK: @VAStart
+; CHECK-LABEL: @VAStart
; CHECK: call void @llvm.va_start
; CHECK-NOT: @__msan_va_arg_tls
; CHECK-NOT: @__msan_va_arg_overflow_size_tls
@@ -677,7 +743,7 @@ entry:
ret void
}
-; CHECK: @VolatileStore
+; CHECK-LABEL: @VolatileStore
; CHECK-NOT: @__msan_warning
; CHECK: ret void
@@ -700,7 +766,7 @@ if.end: ; preds = %entry, %if.then
declare void @bar()
-; CHECK: @NoSanitizeMemory
+; CHECK-LABEL: @NoSanitizeMemory
; CHECK-NOT: @__msan_warning
; CHECK: store i32 0, {{.*}} @__msan_retval_tls
; CHECK-NOT: @__msan_warning
@@ -719,7 +785,7 @@ entry:
declare i32 @NoSanitizeMemoryAllocaHelper(i32* %p)
-; CHECK: @NoSanitizeMemoryAlloca
+; CHECK-LABEL: @NoSanitizeMemoryAlloca
; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 4, i32 4, i1 false)
; CHECK: call i32 @NoSanitizeMemoryAllocaHelper(i32*
; CHECK: ret i32
@@ -736,7 +802,7 @@ entry:
declare i32 @NoSanitizeMemoryUndefHelper(i32 %x)
-; CHECK: @NoSanitizeMemoryAlloca
+; CHECK-LABEL: @NoSanitizeMemoryUndef
; CHECK: store i32 0, i32* {{.*}} @__msan_param_tls
; CHECK: call i32 @NoSanitizeMemoryUndefHelper(i32 undef)
; CHECK: ret i32
@@ -790,7 +856,7 @@ entry:
ret <2 x i64> %b
}
-; CHECK: @ArgumentShadowAlignment
+; CHECK-LABEL: @ArgumentShadowAlignment
; CHECK: load <2 x i64>, <2 x i64>* {{.*}} @__msan_param_tls {{.*}}, align 8
; CHECK: store <2 x i64> {{.*}} @__msan_retval_tls {{.*}}, align 8
; CHECK: ret <2 x i64>
@@ -847,7 +913,7 @@ entry:
; "undef" and the first 2 structs go to general purpose registers;
; the third struct goes to the overflow area byval
-; CHECK: @VAArgStruct
+; CHECK-LABEL: @VAArgStruct
; undef
; CHECK: store i32 -1, i32* {{.*}}@__msan_va_arg_tls {{.*}}, align 8
; first struct through general purpose registers
@@ -864,6 +930,7 @@ entry:
; CHECK: call void (i32, ...) @VAArgStructFn
; CHECK: ret void
+
declare i32 @InnerTailCall(i32 %a)
define void @MismatchedReturnTypeTailCall(i32 %a) sanitize_memory {
@@ -878,5 +945,41 @@ define void @MismatchedReturnTypeTailCall(i32 %a) sanitize_memory {
; CHECK: tail call i32 @InnerTailCall
; CHECK: ret void
+
+declare i32 @MustTailCall(i32 %a)
+
+define i32 @CallMustTailCall(i32 %a) sanitize_memory {
+ %b = musttail call i32 @MustTailCall(i32 %a)
+ ret i32 %b
+}
+
+; For "musttail" calls we can not insert any shadow manipulating code between
+; call and the return instruction. And we don't need to, because everything is
+; taken care of in the callee.
+
+; CHECK-LABEL: define i32 @CallMustTailCall
+; CHECK: musttail call i32 @MustTailCall
+; No instrumentation between call and ret.
+; CHECK-NEXT: ret i32
+
+declare i32* @MismatchingMustTailCall(i32 %a)
+
+define i8* @MismatchingCallMustTailCall(i32 %a) sanitize_memory {
+ %b = musttail call i32* @MismatchingMustTailCall(i32 %a)
+ %c = bitcast i32* %b to i8*
+ ret i8* %c
+}
+
+; For "musttail" calls we can not insert any shadow manipulating code between
+; call and the return instruction. And we don't need to, because everything is
+; taken care of in the callee.
+
+; CHECK-LABEL: define i8* @MismatchingCallMustTailCall
+; CHECK: musttail call i32* @MismatchingMustTailCall
+; No instrumentation between call and ret.
+; CHECK-NEXT: bitcast i32* {{.*}} to i8*
+; CHECK-NEXT: ret i8*
+
+
; CHECK-LABEL: define internal void @msan.module_ctor
; CHECK: call void @__msan_init()
diff --git a/test/Instrumentation/MemorySanitizer/mul_by_constant.ll b/test/Instrumentation/MemorySanitizer/mul_by_constant.ll
index e068f69ae4ba..7736d94717fe 100644
--- a/test/Instrumentation/MemorySanitizer/mul_by_constant.ll
+++ b/test/Instrumentation/MemorySanitizer/mul_by_constant.ll
@@ -92,3 +92,26 @@ entry:
; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
; CHECK: [[B:%.*]] = mul <4 x i32> [[A]], <i32 1024, i32 0, i32 16, i32 16>
; CHECK: store <4 x i32> [[B]], <4 x i32>* {{.*}} @__msan_retval_tls
+
+
+; The constant in multiplication does not have to be a literal integer constant.
+@X = linkonce_odr global i8* null
+define i64 @MulNonIntegerConst(i64 %a) sanitize_memory {
+ %mul = mul i64 %a, ptrtoint (i8** @X to i64)
+ ret i64 %mul
+}
+
+; CHECK-LABEL: @MulNonIntegerConst(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 1
+; CHECK: store i64 [[B]], {{.*}}@__msan_retval_tls
+
+define <2 x i64> @MulNonIntegerVectorConst(<2 x i64> %a) sanitize_memory {
+ %mul = mul <2 x i64> %a, <i64 3072, i64 ptrtoint (i8** @X to i64)>
+ ret <2 x i64> %mul
+}
+
+; CHECK-LABEL: @MulNonIntegerVectorConst(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul <2 x i64> [[A]], <i64 1024, i64 1>
+; CHECK: store <2 x i64> [[B]], {{.*}}@__msan_retval_tls
diff --git a/test/Instrumentation/MemorySanitizer/origin-alignment.ll b/test/Instrumentation/MemorySanitizer/origin-alignment.ll
index ce0dbfc71909..abd8dd0e533f 100644
--- a/test/Instrumentation/MemorySanitizer/origin-alignment.ll
+++ b/test/Instrumentation/MemorySanitizer/origin-alignment.ll
@@ -24,7 +24,7 @@ entry:
; CHECK-ORIGINS1: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
; CHECK-ORIGINS2: [[ORIGIN0:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
; CHECK-ORIGINS2: [[ORIGIN:%[01-9a-z]+]] = call i32 @__msan_chain_origin(i32 [[ORIGIN0]])
-; CHECK: store i32 [[ORIGIN]], i32* inttoptr (i64 add (i64 and (i64 ptrtoint {{.*}} to i32*), align 8
+; CHECK: store i32 [[ORIGIN]], {{.*}}, align 8
; CHECK: ret void
@@ -39,7 +39,7 @@ entry:
; CHECK-ORIGINS1: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
; CHECK-ORIGINS2: [[ORIGIN0:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
; CHECK-ORIGINS2: [[ORIGIN:%[01-9a-z]+]] = call i32 @__msan_chain_origin(i32 [[ORIGIN0]])
-; CHECK: store i32 [[ORIGIN]], i32* inttoptr (i64 add (i64 and (i64 ptrtoint {{.*}} to i32*), align 4
+; CHECK: store i32 [[ORIGIN]], {{.*}}, align 4
; CHECK: ret void
@@ -54,7 +54,7 @@ entry:
; CHECK-ORIGINS1: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
; CHECK-ORIGINS2: [[ORIGIN0:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
; CHECK-ORIGINS2: [[ORIGIN:%[01-9a-z]+]] = call i32 @__msan_chain_origin(i32 [[ORIGIN0]])
-; CHECK: store i32 [[ORIGIN]], i32* inttoptr (i64 and (i64 add (i64 and (i64 ptrtoint {{.*}} i64 -4) to i32*), align 4
+; CHECK: store i32 [[ORIGIN]], {{.*}}, align 4
; CHECK: ret void
@@ -69,5 +69,6 @@ entry:
; CHECK-ORIGINS1: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
; CHECK-ORIGINS2: [[ORIGIN0:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
; CHECK-ORIGINS2: [[ORIGIN:%[01-9a-z]+]] = call i32 @__msan_chain_origin(i32 [[ORIGIN0]])
-; CHECK: store i32 [[ORIGIN]], i32* inttoptr (i64 and (i64 add (i64 and (i64 ptrtoint {{.*}} i64 -4) to i32*), align 4
+; CHECK: store i32 [[ORIGIN]], {{.*}}, align 4
+
; CHECK: ret void
diff --git a/test/Instrumentation/MemorySanitizer/return_from_main.ll b/test/Instrumentation/MemorySanitizer/return_from_main.ll
index 81dc88834db1..82e2d13bc09b 100644
--- a/test/Instrumentation/MemorySanitizer/return_from_main.ll
+++ b/test/Instrumentation/MemorySanitizer/return_from_main.ll
@@ -10,7 +10,7 @@ entry:
declare i32 @f() sanitize_memory
-; CHECK: @main
+; CHECK-LABEL: @main
; CHECK: call i32 @f()
; CHECK: store i32 0, {{.*}} @__msan_retval_tls
; CHECK: br i1
diff --git a/test/Instrumentation/MemorySanitizer/store-origin.ll b/test/Instrumentation/MemorySanitizer/store-origin.ll
index cdfe280bc8cb..42bfac20822f 100644
--- a/test/Instrumentation/MemorySanitizer/store-origin.ll
+++ b/test/Instrumentation/MemorySanitizer/store-origin.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu"
; Check that debug info for origin propagation code is set correctly.
; Function Attrs: nounwind
-define void @Store(i32* nocapture %p, i32 %x) #0 {
+define void @Store(i32* nocapture %p, i32 %x) #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata i32* %p, i64 0, metadata !11, metadata !DIExpression()), !dbg !16
tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !12, metadata !DIExpression()), !dbg !16
@@ -27,19 +27,19 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!13, !14}
!llvm.ident = !{!15}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (204220)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (204220)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "../2.cc", directory: "/tmp/build0")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "Store", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @Store, variables: !10)
+!4 = distinct !DISubprogram(name: "Store", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !10)
!5 = !DIFile(filename: "../2.cc", directory: "/tmp/build0")
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8, !9}
!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!10 = !{!11, !12}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", line: 1, arg: 1, scope: !4, file: !5, type: !8)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 2, scope: !4, file: !5, type: !9)
+!11 = !DILocalVariable(name: "p", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "x", line: 1, arg: 2, scope: !4, file: !5, type: !9)
!13 = !{i32 2, !"Dwarf Version", i32 4}
!14 = !{i32 1, !"Debug Info Version", i32 3}
!15 = !{!"clang version 3.5.0 (204220)"}
@@ -52,7 +52,7 @@ attributes #1 = { nounwind readnone }
!22 = !DILocation(line: 3, scope: !4)
-; CHECK: @Store
+; CHECK-LABEL: @Store
; CHECK: load {{.*}} @__msan_param_tls
; CHECK: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
; CHECK: store {{.*}}!dbg ![[DBG:[01-9]+]]
diff --git a/test/Instrumentation/MemorySanitizer/unreachable.ll b/test/Instrumentation/MemorySanitizer/unreachable.ll
index e9a79ce0d0e1..ac5aea9a0771 100644
--- a/test/Instrumentation/MemorySanitizer/unreachable.ll
+++ b/test/Instrumentation/MemorySanitizer/unreachable.ll
@@ -18,7 +18,7 @@ exit:
ret i32 %z
}
-; CHECK: @Func
+; CHECK-LABEL: @Func
; CHECK: store i32 0, {{.*}} @__msan_retval_tls
; CHECK: ret i32 42
@@ -34,6 +34,6 @@ xxx:
br label %zzz
}
-; CHECK: @UnreachableLoop
+; CHECK-LABEL: @UnreachableLoop
; CHECK: store i32 0, {{.*}} @__msan_retval_tls
; CHECK: ret i32 0
diff --git a/test/Instrumentation/MemorySanitizer/vector_cvt.ll b/test/Instrumentation/MemorySanitizer/vector_cvt.ll
index a7d5f2115015..55e91c74a316 100644
--- a/test/Instrumentation/MemorySanitizer/vector_cvt.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_cvt.ll
@@ -15,7 +15,7 @@ entry:
ret i32 %0
}
-; CHECK: @test_cvtsd2si
+; CHECK-LABEL: @test_cvtsd2si
; CHECK: [[S:%[_01-9a-z]+]] = extractelement <2 x i64> {{.*}}, i32 0
; CHECK: icmp ne {{.*}}[[S]], 0
; CHECK: br
@@ -33,7 +33,7 @@ entry:
ret <2 x double> %0
}
-; CHECK: @test_cvtsi2sd
+; CHECK-LABEL: @test_cvtsi2sd
; CHECK: [[Sa:%[_01-9a-z]+]] = load i32, i32* {{.*}} @__msan_param_tls
; CHECK: [[Sout0:%[_01-9a-z]+]] = insertelement <2 x i64> <i64 -1, i64 -1>, i64 {{.*}}, i32 1
; Clear low half of result shadow
@@ -54,7 +54,7 @@ entry:
ret x86_mmx %0
}
-; CHECK: @test_cvtps2pi
+; CHECK-LABEL: @test_cvtps2pi
; CHECK: extractelement <4 x i32> {{.*}}, i32 0
; CHECK: extractelement <4 x i32> {{.*}}, i32 1
; CHECK: [[S:%[_01-9a-z]+]] = or i32
diff --git a/test/Instrumentation/MemorySanitizer/vector_shift.ll b/test/Instrumentation/MemorySanitizer/vector_shift.ll
index 91e4bd53c6a9..d7b47f011b1d 100644
--- a/test/Instrumentation/MemorySanitizer/vector_shift.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_shift.ll
@@ -25,7 +25,7 @@ entry:
ret i64 %6
}
-; CHECK: @test_mmx
+; CHECK-LABEL: @test_mmx
; CHECK: = icmp ne i64 {{.*}}, 0
; CHECK: [[C:%.*]] = sext i1 {{.*}} to i64
; CHECK: [[A:%.*]] = call x86_mmx @llvm.x86.mmx.psll.d(
@@ -41,7 +41,7 @@ entry:
ret <8 x i16> %0
}
-; CHECK: @test_sse2_scalar
+; CHECK-LABEL: @test_sse2_scalar
; CHECK: = icmp ne i32 {{.*}}, 0
; CHECK: = sext i1 {{.*}} to i128
; CHECK: = bitcast i128 {{.*}} to <8 x i16>
@@ -57,7 +57,7 @@ entry:
ret <8 x i16> %0
}
-; CHECK: @test_sse2
+; CHECK-LABEL: @test_sse2
; CHECK: = bitcast <8 x i16> {{.*}} to i128
; CHECK: = trunc i128 {{.*}} to i64
; CHECK: = icmp ne i64 {{.*}}, 0
@@ -77,7 +77,7 @@ entry:
ret <4 x i32> %0
}
-; CHECK: @test_avx2
+; CHECK-LABEL: @test_avx2
; CHECK: = icmp ne <4 x i32> {{.*}}, zeroinitializer
; CHECK: = sext <4 x i1> {{.*}} to <4 x i32>
; CHECK: = call <4 x i32> @llvm.x86.avx2.psllv.d(
@@ -91,7 +91,7 @@ entry:
ret <8 x i32> %0
}
-; CHECK: @test_avx2_256
+; CHECK-LABEL: @test_avx2_256
; CHECK: = icmp ne <8 x i32> {{.*}}, zeroinitializer
; CHECK: = sext <8 x i1> {{.*}} to <8 x i32>
; CHECK: = call <8 x i32> @llvm.x86.avx2.psllv.d.256(
diff --git a/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll b/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
index 45b4c8cb0a61..93d0f881625f 100644
--- a/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
+++ b/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
@@ -25,7 +25,7 @@ target triple = "x86_64-unknown-linux-gnu"
%struct.A = type { i32 }
; Function Attrs: nounwind readonly uwtable
-define i32 @_ZN1A1fEv(%struct.A* nocapture readonly %this) #0 align 2 {
+define i32 @_ZN1A1fEv(%struct.A* nocapture readonly %this) #0 align 2 !dbg !13 {
entry:
tail call void @llvm.dbg.value(metadata %struct.A* %this, i64 0, metadata !15, metadata !DIExpression()), !dbg !20
%x = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0, !dbg !21
@@ -43,7 +43,7 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!17, !18}
!llvm.ident = !{!19}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (210251)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (210251)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !2, imports: !2)
!1 = !DIFile(filename: "../1.cc", directory: "/code/llvm/build0")
!2 = !{}
!3 = !{!4}
@@ -56,9 +56,9 @@ attributes #1 = { nounwind readnone }
!10 = !{!7, !11}
!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
!12 = !{!13}
-!13 = !DISubprogram(name: "f", linkageName: "_ZN1A1fEv", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !9, function: i32 (%struct.A*)* @_ZN1A1fEv, declaration: !8, variables: !14)
+!13 = distinct !DISubprogram(name: "f", linkageName: "_ZN1A1fEv", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !9, declaration: !8, variables: !14)
!14 = !{!15}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !13, type: !16)
+!15 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !13, type: !16)
!16 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
!17 = !{i32 2, !"Dwarf Version", i32 4}
!18 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Instrumentation/SanitizerCoverage/coverage.ll b/test/Instrumentation/SanitizerCoverage/coverage.ll
index 659c03040f2f..71fdbbb5ada7 100644
--- a/test/Instrumentation/SanitizerCoverage/coverage.ll
+++ b/test/Instrumentation/SanitizerCoverage/coverage.ll
@@ -29,8 +29,8 @@ entry:
}
; CHECK0-NOT: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor }
-; CHECK1: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor }
-; CHECK2: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor }
+; CHECK1: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor, i8* null }
+; CHECK2: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor, i8* null }
; CHECK0-NOT: call void @__sanitizer_cov(
; CHECK0-NOT: call void @__sanitizer_cov_module_init(
diff --git a/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll b/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll
index f9b8e712688c..8c330ea5bddf 100644
--- a/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll
+++ b/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll
@@ -25,7 +25,7 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK: [[B]] = !DILocation(line: 3, column: 5, scope: !{{.*}})
; CHECK: [[C]] = !DILocation(line: 4, column: 1, scope: !{{.*}})
-define void @_Z3fooPi(i32* %a) #0 {
+define void @_Z3fooPi(i32* %a) #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !11, metadata !DIExpression()), !dbg !15
%tobool = icmp eq i32* %a, null, !dbg !16
@@ -49,18 +49,18 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!12, !13}
!llvm.ident = !{!14}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (217079)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (217079)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "if.cc", directory: "FOO")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", linkageName: "_Z3fooPi", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*)* @_Z3fooPi, variables: !10)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooPi", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !10)
!5 = !DIFile(filename: "if.cc", directory: "FOO")
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8}
!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!10 = !{!11}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!11 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
!12 = !{i32 2, !"Dwarf Version", i32 4}
!13 = !{i32 2, !"Debug Info Version", i32 3}
!14 = !{!"clang version 3.6.0 (217079)"}
diff --git a/test/Instrumentation/SanitizerCoverage/seh.ll b/test/Instrumentation/SanitizerCoverage/seh.ll
new file mode 100644
index 000000000000..ce18334ed207
--- /dev/null
+++ b/test/Instrumentation/SanitizerCoverage/seh.ll
@@ -0,0 +1,86 @@
+; RUN: opt < %s -sancov -sanitizer-coverage-level=0 -S | FileCheck %s
+; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -S | FileCheck %s
+; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -S | FileCheck %s
+; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -sanitizer-coverage-block-threshold=0 -S | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+declare i32 @llvm.eh.typeid.for(i8*) #2
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
+declare i8* @llvm.localrecover(i8*, i8*, i32)
+declare void @llvm.localescape(...) #1
+
+declare i32 @_except_handler3(...)
+declare void @may_throw(i32* %r)
+
+define i32 @main() sanitize_address personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+ %r = alloca i32, align 4
+ %__exception_code = alloca i32, align 4
+ call void (...) @llvm.localescape(i32* nonnull %__exception_code)
+ %0 = bitcast i32* %r to i8*
+ store i32 0, i32* %r, align 4
+ invoke void @may_throw(i32* nonnull %r) #4
+ to label %__try.cont unwind label %lpad
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 }
+ catch i8* bitcast (i32 ()* @"\01?filt$0@0@main@@" to i8*)
+ %2 = extractvalue { i8*, i32 } %1, 1
+ %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @"\01?filt$0@0@main@@" to i8*)) #1
+ %matches = icmp eq i32 %2, %3
+ br i1 %matches, label %__except, label %eh.resume
+
+__except: ; preds = %lpad
+ store i32 1, i32* %r, align 4
+ br label %__try.cont
+
+__try.cont: ; preds = %entry, %__except
+ %4 = load i32, i32* %r, align 4
+ ret i32 %4
+
+eh.resume: ; preds = %lpad
+ resume { i8*, i32 } %1
+}
+
+; Check that we don't do any instrumentation.
+
+; CHECK-LABEL: define i32 @main()
+; CHECK-NOT: load atomic i32, i32* {{.*}} monotonic, align 4, !nosanitize
+; CHECK-NOT: call void @__sanitizer_cov
+; CHECK: ret i32
+
+; Function Attrs: nounwind
+define internal i32 @"\01?filt$0@0@main@@"() #1 {
+entry:
+ %0 = tail call i8* @llvm.frameaddress(i32 1)
+ %1 = tail call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %0)
+ %2 = tail call i8* @llvm.localrecover(i8* bitcast (i32 ()* @main to i8*), i8* %1, i32 0)
+ %__exception_code = bitcast i8* %2 to i32*
+ %3 = getelementptr inbounds i8, i8* %0, i32 -20
+ %4 = bitcast i8* %3 to { i32*, i8* }**
+ %5 = load { i32*, i8* }*, { i32*, i8* }** %4, align 4
+ %6 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %5, i32 0, i32 0
+ %7 = load i32*, i32** %6, align 4
+ %8 = load i32, i32* %7, align 4
+ store i32 %8, i32* %__exception_code, align 4
+ ret i32 1
+}
+
+; CHECK-LABEL: define internal i32 @"\01?filt$0@0@main@@"()
+; CHECK: tail call i8* @llvm.localrecover(i8* bitcast (i32 ()* @main to i8*), i8* {{.*}}, i32 0)
+
+define void @ScaleFilterCols_SSSE3(i8* %dst_ptr, i8* %src_ptr, i32 %dst_width, i32 %x, i32 %dx) sanitize_address {
+entry:
+ %dst_width.addr = alloca i32, align 4
+ store i32 %dst_width, i32* %dst_width.addr, align 4
+ %0 = call { i8*, i8*, i32, i32, i32 } asm sideeffect "", "=r,=r,={ax},=r,=r,=*rm,rm,rm,0,1,2,3,4,5,~{memory},~{cc},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{dirflag},~{fpsr},~{flags}"(i32* nonnull %dst_width.addr, i32 %x, i32 %dx, i8* %dst_ptr, i8* %src_ptr, i32 0, i32 0, i32 0, i32 %dst_width)
+ ret void
+}
+
+define void @ScaleColsUp2_SSE2() sanitize_address {
+entry:
+ ret void
+}
diff --git a/test/Instrumentation/SanitizerCoverage/switch-tracing.ll b/test/Instrumentation/SanitizerCoverage/switch-tracing.ll
new file mode 100644
index 000000000000..aac56dbdeb78
--- /dev/null
+++ b/test/Instrumentation/SanitizerCoverage/switch-tracing.ll
@@ -0,0 +1,56 @@
+; Test -sanitizer-coverage-experimental-trace-compares=1 (instrumenting a switch)
+; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-experimental-trace-compares=1 -S | FileCheck %s --check-prefix=CHECK
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+declare void @_Z3bari(i32)
+define void @foo(i32 %x) {
+entry:
+; CHECK: __sancov_gen_cov_switch_values = internal global [5 x i64] [i64 3, i64 32, i64 1, i64 101, i64 1001]
+; CHECK: [[TMP:%[0-9]*]] = zext i32 %x to i64
+; CHECK-NEXT: call void @__sanitizer_cov_trace_switch(i64 [[TMP]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__sancov_gen_cov_switch_values, i32 0, i32 0))
+ switch i32 %x, label %sw.epilog [
+ i32 1, label %sw.bb
+ i32 101, label %sw.bb.1
+ i32 1001, label %sw.bb.2
+ ]
+
+sw.bb: ; preds = %entry
+ tail call void @_Z3bari(i32 4)
+ br label %sw.epilog
+
+sw.bb.1: ; preds = %entry
+ tail call void @_Z3bari(i32 5)
+ br label %sw.epilog
+
+sw.bb.2: ; preds = %entry
+ tail call void @_Z3bari(i32 6)
+ br label %sw.epilog
+
+sw.epilog: ; preds = %entry, %sw.bb.2, %sw.bb.1, %sw.bb
+ ret void
+}
+
+define void @fooi72(i72 %x) {
+entry:
+ switch i72 %x, label %sw.epilog [
+ i72 1, label %sw.bb
+ i72 101, label %sw.bb.1
+ i72 1001, label %sw.bb.2
+ ]
+
+sw.bb: ; preds = %entry
+ tail call void @_Z3bari(i32 4)
+ br label %sw.epilog
+
+sw.bb.1: ; preds = %entry
+ tail call void @_Z3bari(i32 5)
+ br label %sw.epilog
+
+sw.bb.2: ; preds = %entry
+ tail call void @_Z3bari(i32 6)
+ br label %sw.epilog
+
+sw.epilog: ; preds = %entry, %sw.bb.2, %sw.bb.1, %sw.bb
+ ret void
+}
diff --git a/test/Instrumentation/ThreadSanitizer/atomic.ll b/test/Instrumentation/ThreadSanitizer/atomic.ll
index db01bab8fe53..a10ca6c91a89 100644
--- a/test/Instrumentation/ThreadSanitizer/atomic.ll
+++ b/test/Instrumentation/ThreadSanitizer/atomic.ll
@@ -1994,5 +1994,5 @@ entry:
!3 = !{}
!4 = !DISubroutineType(types: !3)
!5 = !DIFile(filename: "atomic.cpp", directory: "/tmp")
-!6 = !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
+!6 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
!7 = !DILocation(line: 100, column: 1, scope: !6)
diff --git a/test/JitListener/multiple.ll b/test/JitListener/multiple.ll
index 2ba06ef1f0db..1f69ddae4f53 100644
--- a/test/JitListener/multiple.ll
+++ b/test/JitListener/multiple.ll
@@ -50,7 +50,7 @@
; ModuleID = 'multiple.c'
; Function Attrs: nounwind uwtable
-define i32 @foo(i32 %a) #0 {
+define i32 @foo(i32 %a) #0 !dbg !4 {
entry:
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
@@ -63,7 +63,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind uwtable
-define i32 @bar(i32 %a) #0 {
+define i32 @bar(i32 %a) #0 !dbg !9 {
entry:
%retval = alloca i32, align 4
%a.addr = alloca i32, align 4
@@ -89,7 +89,7 @@ return: ; preds = %if.end, %if.then
}
; Function Attrs: nounwind uwtable
-define i32 @fubar(i32 %a) #0 {
+define i32 @fubar(i32 %a) #0 !dbg !10 {
entry:
%retval = alloca i32, align 4
%a.addr = alloca i32, align 4
@@ -125,27 +125,27 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!11, !12, !13}
!llvm.ident = !{!14}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "multiple.c", directory: "F:\5Cusers\5Cakaylor\5Cllvm-s\5Cllvm\5Ctest\5CJitListener")
!2 = !{}
!3 = !{!4, !9, !10}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "multiple.c", directory: "F:CusersCakaylorCllvm-sCllvmCtestCJitListener")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "bar", line: 5, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 (i32)* @bar, variables: !2)
-!10 = !DISubprogram(name: "fubar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 12, file: !1, scope: !5, type: !6, function: i32 (i32)* @fubar, variables: !2)
+!9 = distinct !DISubprogram(name: "bar", line: 5, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
+!10 = distinct !DISubprogram(name: "fubar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 12, file: !1, scope: !5, type: !6, variables: !2)
!11 = !{i32 2, !"Dwarf Version", i32 4}
!12 = !{i32 2, !"Debug Info Version", i32 3}
!13 = !{i32 1, !"PIC Level", i32 2}
!14 = !{!"clang version 3.6.0 (trunk)"}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!15 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
!16 = !DIExpression()
!17 = !DILocation(line: 1, column: 13, scope: !4)
!18 = !DILocation(line: 2, column: 10, scope: !4)
!19 = !DILocation(line: 2, column: 3, scope: !4)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 5, arg: 1, scope: !9, file: !5, type: !8)
+!20 = !DILocalVariable(name: "a", line: 5, arg: 1, scope: !9, file: !5, type: !8)
!21 = !DILocation(line: 5, column: 13, scope: !9)
!22 = !DILocation(line: 6, column: 7, scope: !23)
!23 = distinct !DILexicalBlock(line: 6, column: 7, file: !1, scope: !9)
@@ -156,7 +156,7 @@ attributes #1 = { nounwind readnone }
!28 = !DILocation(line: 9, column: 10, scope: !9)
!29 = !DILocation(line: 9, column: 3, scope: !9)
!30 = !DILocation(line: 10, column: 1, scope: !9)
-!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 12, arg: 1, scope: !10, file: !5, type: !8)
+!31 = !DILocalVariable(name: "a", line: 12, arg: 1, scope: !10, file: !5, type: !8)
!32 = !DILocation(line: 12, column: 15, scope: !10)
!33 = !DILocation(line: 13, column: 11, scope: !10)
!34 = !DILocation(line: 13, column: 3, scope: !10)
diff --git a/test/JitListener/simple.ll b/test/JitListener/simple.ll
index d98eef54e4ac..bfa11b7e533e 100644
--- a/test/JitListener/simple.ll
+++ b/test/JitListener/simple.ll
@@ -16,7 +16,7 @@
; ModuleID = 'simple.c'
; Function Attrs: nounwind uwtable
-define i32 @foo(i32 %a) #0 {
+define i32 @foo(i32 %a) #0 !dbg !4 {
entry:
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
@@ -35,11 +35,11 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "simple.c", directory: "F:\5Cusers\5Cakaylor\5Cllvm-s\5Cllvm\5Ctest\5CJitListener")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "simple.c", directory: "F:CusersCakaylorCllvm-sCllvmCtestCJitListener")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
@@ -47,7 +47,7 @@ attributes #1 = { nounwind readnone }
!9 = !{i32 2, !"Dwarf Version", i32 4}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.6.0 (trunk)"}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
!13 = !DIExpression()
!14 = !DILocation(line: 1, column: 13, scope: !4)
!15 = !DILocation(line: 2, column: 10, scope: !4)
diff --git a/test/LTO/X86/Inputs/invalid.ll.bc b/test/LTO/X86/Inputs/invalid.ll.bc
index a85c3644b3ab..60c7afffbc23 100644
--- a/test/LTO/X86/Inputs/invalid.ll.bc
+++ b/test/LTO/X86/Inputs/invalid.ll.bc
Binary files differ
diff --git a/test/LTO/X86/bcsection.ll b/test/LTO/X86/bcsection.ll
index e65ade623536..bcd6bc328488 100644
--- a/test/LTO/X86/bcsection.ll
+++ b/test/LTO/X86/bcsection.ll
@@ -15,6 +15,8 @@
; RUN: llvm-lto -exported-symbol=main -exported-symbol=_main -o %T/bcsection.macho.o %T/bcsection.macho.bco
; RUN: llvm-nm %T/bcsection.macho.o | FileCheck %s
+; REQUIRES: default_triple
+
; CHECK: main
define i32 @main() {
ret i32 0
diff --git a/test/LTO/X86/current-section.ll b/test/LTO/X86/current-section.ll
index f79b378318df..49eee49ae623 100644
--- a/test/LTO/X86/current-section.ll
+++ b/test/LTO/X86/current-section.ll
@@ -1,4 +1,5 @@
; RUN: llvm-as < %s >%t1
; RUN: llvm-lto -o %t2 %t1
+; REQUIRES: default_triple
module asm ".align 4"
diff --git a/test/LTO/X86/diagnostic-handler-noexit.ll b/test/LTO/X86/diagnostic-handler-noexit.ll
index be768c900f14..597419ff64f5 100644
--- a/test/LTO/X86/diagnostic-handler-noexit.ll
+++ b/test/LTO/X86/diagnostic-handler-noexit.ll
@@ -4,10 +4,10 @@
; RUN: llvm-as <%s >%t1
; RUN: llvm-as <%s >%t2
-; RUN: not llvm-lto -o /dev/null %t1 %t2 2>&1 | FileCheck %s
+; RUN: not llvm-lto -use-diagnostic-handler -o /dev/null %t1 %t2 2>&1 | FileCheck %s
target triple = "x86_64-unknown-linux-gnu"
-; CHECK: Linking globals named 'goodboy': symbol multiply defined!
+; CHECK: llvm-lto: error: Linking globals named 'goodboy': symbol multiply defined!
; CHECK: llvm-lto{{.*}}: error adding file
@goodboy = global i32 3203383023, align 4 ; 0xbeefbeef
diff --git a/test/LTO/X86/diagnostic-handler-remarks.ll b/test/LTO/X86/diagnostic-handler-remarks.ll
index 4da9101117ec..a368a677b2fe 100644
--- a/test/LTO/X86/diagnostic-handler-remarks.ll
+++ b/test/LTO/X86/diagnostic-handler-remarks.ll
@@ -9,7 +9,7 @@
; RUN: llvm-lto -pass-remarks=inline -use-diagnostic-handler \
; RUN: -exported-symbol _main -o %t.o %t.bc 2>&1 | \
-; RUN: FileCheck %s -allow-empty -check-prefix=REMARKS
+; RUN: FileCheck %s -allow-empty -check-prefix=REMARKS_DH
; RUN: llvm-nm %t.o | FileCheck %s -check-prefix NM
; Confirm that -pass-remarks are not printed by default.
@@ -24,14 +24,19 @@
; RUN: llvm-nm %t.o | FileCheck %s -check-prefix NM
; REMARKS: remark:
+; REMARKS_DH: llvm-lto: remark:
; CHECK-NOT: remark:
+; CHECK-NOT: llvm-lto:
; NM-NOT: foo
; NM: main
target triple = "x86_64-apple-darwin"
+declare i32 @bar()
+
define i32 @foo() {
- ret i32 7
+ %a = call i32 @bar()
+ ret i32 %a
}
define i32 @main() {
diff --git a/test/LTO/X86/disable-verify.ll b/test/LTO/X86/disable-verify.ll
new file mode 100644
index 000000000000..5d2508a96c07
--- /dev/null
+++ b/test/LTO/X86/disable-verify.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s >%t.bc
+; RUN: llvm-lto -debug-pass=Arguments -exported-symbol=_f -o /dev/null %t.bc 2>&1 -disable-verify | FileCheck %s
+; RUN: llvm-lto -debug-pass=Arguments -exported-symbol=_f -o /dev/null %t.bc 2>&1 | FileCheck %s -check-prefix=VERIFY
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; -disable-verify should disable verification from the optimization pipeline.
+; CHECK: Pass Arguments: -verify -internalize
+; CHECK-NOT: -verify
+
+; VERIFY: Pass Arguments: -verify -internalize
+; VERIFY: Pass Arguments: {{.*}} -verify {{.*}} -verify
+
+define void @f() {
+entry:
+ ret void
+}
diff --git a/test/LTO/X86/invalid.ll b/test/LTO/X86/invalid.ll
index 5b6996d4ad35..b9d0bca091d0 100644
--- a/test/LTO/X86/invalid.ll
+++ b/test/LTO/X86/invalid.ll
@@ -1,4 +1,4 @@
; RUN: not llvm-lto %S/Inputs/invalid.ll.bc 2>&1 | FileCheck %s
-; CHECK: llvm-lto{{.*}}: error loading file '{{.*}}/Inputs/invalid.ll.bc': Unknown attribute kind (48)
+; CHECK: llvm-lto{{.*}}: error loading file '{{.*}}/Inputs/invalid.ll.bc': Unknown attribute kind (52)
diff --git a/test/LTO/X86/list-symbols.ll b/test/LTO/X86/list-symbols.ll
index 41b7d00deecc..1d98b390b18f 100644
--- a/test/LTO/X86/list-symbols.ll
+++ b/test/LTO/X86/list-symbols.ll
@@ -1,6 +1,7 @@
; RUN: llvm-as -o %T/1.bc %s
; RUN: llvm-as -o %T/2.bc %S/Inputs/list-symbols.ll
; RUN: llvm-lto -list-symbols-only %T/1.bc %T/2.bc | FileCheck %s
+; REQUIRES: default_triple
; CHECK-LABEL: 1.bc:
; CHECK-DAG: foo
diff --git a/test/LTO/X86/llvm-lto-output.ll b/test/LTO/X86/llvm-lto-output.ll
new file mode 100644
index 000000000000..56a9a5284b1e
--- /dev/null
+++ b/test/LTO/X86/llvm-lto-output.ll
@@ -0,0 +1,21 @@
+; Test the various output formats of the llvm-lto utility
+;
+; RUN: llvm-as < %s > %t1
+;
+; RUN: llvm-lto -exported-symbol=main -save-merged-module -filetype=asm -o %t2 %t1
+; RUN: llvm-dis -o - %t2.merged.bc | FileCheck %s
+; CHECK: @main()
+
+; RUN: FileCheck --check-prefix=ASM %s < %t2
+; RUN: llvm-lto -exported-symbol=main -filetype=obj -o %t2 %t1
+; RUN: llvm-objdump -d %t2 | FileCheck --check-prefix=ASM %s
+; ASM: main:
+;
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() {
+entry:
+ ret i32 23
+}
+
diff --git a/test/LTO/X86/parallel.ll b/test/LTO/X86/parallel.ll
new file mode 100644
index 000000000000..9a8494c127f1
--- /dev/null
+++ b/test/LTO/X86/parallel.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as -o %t.bc %s
+; RUN: llvm-lto -exported-symbol=foo -exported-symbol=bar -j2 -o %t.o %t.bc
+; RUN: llvm-nm %t.o.0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-nm %t.o.1 | FileCheck --check-prefix=CHECK1 %s
+
+; FIXME: Investigate test failures on these architecures.
+; UNSUPPORTED: mips, mipsel, aarch64, powerpc64
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK0-NOT: bar
+; CHECK0: T foo
+; CHECK0-NOT: bar
+define void @foo() {
+ call void @bar()
+ ret void
+}
+
+; CHECK1-NOT: foo
+; CHECK1: T bar
+; CHECK1-NOT: foo
+define void @bar() {
+ call void @foo()
+ ret void
+}
diff --git a/test/LibDriver/thin.test b/test/LibDriver/thin.test
new file mode 100644
index 000000000000..4ab9d61952bf
--- /dev/null
+++ b/test/LibDriver/thin.test
@@ -0,0 +1,9 @@
+RUN: echo foo > %t
+
+RUN: llvm-lib -out:%t.a %t
+RUN: FileCheck --check-prefix=FAT %s < %t.a
+FAT: !<arch>
+
+RUN: llvm-lib -out:%t.thin.a -llvmlibthin %t
+RUN: FileCheck --check-prefix=THIN %s < %t.thin.a
+THIN: !<thin>
diff --git a/test/Linker/2003-01-30-LinkerRename.ll b/test/Linker/2003-01-30-LinkerRename.ll
index 1e25d3eef0c6..1844e936baf2 100644
--- a/test/Linker/2003-01-30-LinkerRename.ll
+++ b/test/Linker/2003-01-30-LinkerRename.ll
@@ -2,9 +2,9 @@
; RUN: llvm-as %s -o %t.2.bc
; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
-; CHECK: @bar = global i32 ()* @foo2
+; CHECK: @bar = global i32 ()* @foo.2
-; CHECK: define internal i32 @foo2() {
+; CHECK: define internal i32 @foo.2() {
; CHECK-NEXT: ret i32 7
; CHECK-NEXT: }
diff --git a/test/Linker/2003-04-23-LinkOnceLost.ll b/test/Linker/2003-04-23-LinkOnceLost.ll
index e4528906e024..e2b600c877c0 100644
--- a/test/Linker/2003-04-23-LinkOnceLost.ll
+++ b/test/Linker/2003-04-23-LinkOnceLost.ll
@@ -1,11 +1,10 @@
-; This fails because the linker renames the non-opaque type not the opaque
-; one...
-
-; RUN: echo " define linkonce void @foo() { ret void } " | \
-; RUN: llvm-as -o %t.2.bc
-; RUN: llvm-as %s -o %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; RUN: echo " define linkonce void @foo() { ret void } " > %t.ll
+; RUN: llvm-link %s %t.ll -S | FileCheck %s
; CHECK: linkonce{{.*}}foo
declare void @foo()
+define void @use_foo() {
+ call void @foo()
+ ret void
+}
diff --git a/test/Linker/2003-05-31-LinkerRename.ll b/test/Linker/2003-05-31-LinkerRename.ll
index 0261fe3a9208..f511be1bf22f 100644
--- a/test/Linker/2003-05-31-LinkerRename.ll
+++ b/test/Linker/2003-05-31-LinkerRename.ll
@@ -1,20 +1,18 @@
-; RUN: llvm-as %S/Inputs/2003-05-31-LinkerRename.ll -o %t.1.bc
-; RUN: llvm-as %s -o %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; RUN: llvm-link %S/Inputs/2003-05-31-LinkerRename.ll %s -S | FileCheck %s
-; CHECK: @bar = global i32 ()* @foo2
+; CHECK: @bar = global i32 ()* @foo.2
-; CHECK: define internal i32 @foo2() {
+; CHECK: define internal i32 @foo.2() {
; CHECK-NEXT: ret i32 7
; CHECK-NEXT: }
-; CHECK: declare i32 @foo()
-
; CHECK: define i32 @test() {
; CHECK-NEXT: %X = call i32 @foo()
; CHECK-NEXT: ret i32 %X
; CHECK-NEXT: }
+; CHECK: declare i32 @foo()
+
declare i32 @foo()
define i32 @test() {
diff --git a/test/Linker/2008-03-05-AliasReference.ll b/test/Linker/2008-03-05-AliasReference.ll
index 078479424b02..1423049c8172 100644
--- a/test/Linker/2008-03-05-AliasReference.ll
+++ b/test/Linker/2008-03-05-AliasReference.ll
@@ -8,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-unknown-linux-gnu"
@foo = weak global i32 0 ; <i32*> [#uses=1]
-@bar = weak alias i32* @foo ; <i32*> [#uses=1]
+@bar = weak alias i32, i32* @foo ; <i32*> [#uses=1]
define i32 @baz() nounwind {
entry:
diff --git a/test/Linker/2008-07-06-AliasFnDecl.ll b/test/Linker/2008-07-06-AliasFnDecl.ll
index 8e8c8454d941..555899a31acb 100644
--- a/test/Linker/2008-07-06-AliasFnDecl.ll
+++ b/test/Linker/2008-07-06-AliasFnDecl.ll
@@ -3,7 +3,7 @@
; RUN: llvm-as %p/2008-07-06-AliasFnDecl2.ll -o %t2.bc
; RUN: llvm-link %t1.bc %t2.bc -o %t3.bc
-@b = alias void ()* @a
+@b = alias void (), void ()* @a
define void @a() nounwind {
entry:
diff --git a/test/Linker/2008-07-06-AliasWeakDest.ll b/test/Linker/2008-07-06-AliasWeakDest.ll
index e631175444c0..8db492363aa4 100644
--- a/test/Linker/2008-07-06-AliasWeakDest.ll
+++ b/test/Linker/2008-07-06-AliasWeakDest.ll
@@ -7,9 +7,9 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
target triple = "i386-pc-linux-gnu"
-@sched_clock = alias i64 ()* @native_sched_clock
+@sched_clock = alias i64 (), i64 ()* @native_sched_clock
-@foo = alias i32* @realfoo
+@foo = alias i32, i32* @realfoo
@realfoo = global i32 0
define i64 @native_sched_clock() nounwind {
diff --git a/test/Linker/2009-09-03-mdnode.ll b/test/Linker/2009-09-03-mdnode.ll
index ec444d316b78..77c6b2d93faa 100644
--- a/test/Linker/2009-09-03-mdnode.ll
+++ b/test/Linker/2009-09-03-mdnode.ll
@@ -26,6 +26,6 @@ declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
declare void @llvm.dbg.region.end(metadata) nounwind readnone
-!0 = !DISubprogram(name: "main", linkageName: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1)
-!1 = !DICompileUnit(language: DW_LANG_C99, producer: "ellcc 0.1.0", isOptimized: true, emissionKind: 0, file: !2)
+!0 = distinct !DISubprogram(name: "main", linkageName: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1)
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "ellcc 0.1.0", isOptimized: true, emissionKind: 0, file: !2)
!2 = !DIFile(filename: "a.c", directory: "/home/rich/ellcc/test/source")
diff --git a/test/Linker/2009-09-03-mdnode2.ll b/test/Linker/2009-09-03-mdnode2.ll
index d9d52680b7b0..69b8d6595eb3 100644
--- a/test/Linker/2009-09-03-mdnode2.ll
+++ b/test/Linker/2009-09-03-mdnode2.ll
@@ -21,6 +21,6 @@ declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
declare void @llvm.dbg.region.end(metadata) nounwind readnone
-!0 = !DISubprogram(name: "f", linkageName: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1)
-!1 = !DICompileUnit(language: DW_LANG_C99, producer: "ellcc 0.1.0", isOptimized: true, emissionKind: 0, file: !2)
+!0 = distinct !DISubprogram(name: "f", linkageName: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1)
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "ellcc 0.1.0", isOptimized: true, emissionKind: 0, file: !2)
!2 = !DIFile(filename: "b.c", directory: "/home/rich/ellcc/test/source")
diff --git a/test/Linker/2011-08-04-DebugLoc.ll b/test/Linker/2011-08-04-DebugLoc.ll
index 710ddf7774ff..71e89a5072e5 100644
--- a/test/Linker/2011-08-04-DebugLoc.ll
+++ b/test/Linker/2011-08-04-DebugLoc.ll
@@ -9,7 +9,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-macosx10.7.0"
-define i32 @foo() nounwind ssp {
+define i32 @foo() nounwind ssp !dbg !1 {
ret i32 42, !dbg !6
}
@@ -17,8 +17,8 @@ define i32 @foo() nounwind ssp {
!llvm.module.flags = !{!11}
!llvm.dbg.sp = !{!1}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !9, retainedTypes: !9, subprograms: !10)
-!1 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !2, type: !3, function: i32 ()* @foo)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !9, retainedTypes: !9, subprograms: !10)
+!1 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !2, type: !3)
!2 = !DIFile(filename: "a.c", directory: "/private/tmp")
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
diff --git a/test/Linker/2011-08-04-DebugLoc2.ll b/test/Linker/2011-08-04-DebugLoc2.ll
index bc524dae91de..c5f77ac69cda 100644
--- a/test/Linker/2011-08-04-DebugLoc2.ll
+++ b/test/Linker/2011-08-04-DebugLoc2.ll
@@ -6,7 +6,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-macosx10.7.0"
-define i32 @bar() nounwind ssp {
+define i32 @bar() nounwind ssp !dbg !1 {
ret i32 21, !dbg !6
}
@@ -14,8 +14,8 @@ define i32 @bar() nounwind ssp {
!llvm.module.flags = !{!11}
!llvm.dbg.sp = !{!1}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !9, retainedTypes: !9, subprograms: !10)
-!1 = !DISubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !2, type: !3, function: i32 ()* @bar)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !9, retainedTypes: !9, subprograms: !10)
+!1 = distinct !DISubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !2, type: !3)
!2 = !DIFile(filename: "b.c", directory: "/private/tmp")
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
diff --git a/test/Linker/2011-08-04-Metadata.ll b/test/Linker/2011-08-04-Metadata.ll
index fc1af754abd4..a685142ab715 100644
--- a/test/Linker/2011-08-04-Metadata.ll
+++ b/test/Linker/2011-08-04-Metadata.ll
@@ -13,7 +13,7 @@ target triple = "x86_64-apple-macosx10.7.0"
@x = internal global i32 0, align 4
-define void @foo() nounwind uwtable ssp {
+define void @foo() nounwind uwtable ssp !dbg !1 {
entry:
store i32 1, i32* @x, align 4, !dbg !7
ret void, !dbg !7
@@ -24,8 +24,8 @@ entry:
!llvm.dbg.sp = !{!1}
!llvm.dbg.gv = !{!5}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !9, enums: !{}, retainedTypes: !{}, subprograms: !10)
-!1 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !2, type: !3, function: void ()* @foo)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !9, enums: !{}, retainedTypes: !{}, subprograms: !10)
+!1 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !2, type: !3)
!2 = !DIFile(filename: "/tmp/one.c", directory: "/Volumes/Lalgate/Slate/D")
!3 = !DISubroutineType(types: !4)
!4 = !{null}
diff --git a/test/Linker/2011-08-04-Metadata2.ll b/test/Linker/2011-08-04-Metadata2.ll
index 5eb231b2e364..07baebff4f4e 100644
--- a/test/Linker/2011-08-04-Metadata2.ll
+++ b/test/Linker/2011-08-04-Metadata2.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.7.0"
@x = internal global i32 0, align 4
-define void @bar() nounwind uwtable ssp {
+define void @bar() nounwind uwtable ssp !dbg !1 {
entry:
store i32 1, i32* @x, align 4, !dbg !7
ret void, !dbg !7
@@ -19,8 +19,8 @@ entry:
!llvm.dbg.sp = !{!1}
!llvm.dbg.gv = !{!5}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !9, enums: !{}, retainedTypes: !{}, subprograms: !10)
-!1 = !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !2, type: !3, function: void ()* @bar)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !9, enums: !{}, retainedTypes: !{}, subprograms: !10)
+!1 = distinct !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !2, type: !3)
!2 = !DIFile(filename: "/tmp/two.c", directory: "/Volumes/Lalgate/Slate/D")
!3 = !DISubroutineType(types: !4)
!4 = !{null}
diff --git a/test/Linker/2011-08-18-unique-class-type.ll b/test/Linker/2011-08-18-unique-class-type.ll
index 1692a90bf7e6..e466bceb0cd3 100644
--- a/test/Linker/2011-08-18-unique-class-type.ll
+++ b/test/Linker/2011-08-18-unique-class-type.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.7.0"
%"class.N1::A" = type { i8 }
-define void @_Z3fooN2N11AE() nounwind uwtable ssp {
+define void @_Z3fooN2N11AE() nounwind uwtable ssp !dbg !5 {
entry:
%mya = alloca %"class.N1::A", align 1
call void @llvm.dbg.declare(metadata %"class.N1::A"* %mya, metadata !9, metadata !DIExpression()), !dbg !13
@@ -20,15 +20,15 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!18}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !16, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !16, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
!1 = !{!2}
!2 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "foo", linkageName: "_Z3fooN2N11AE", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !16, scope: !6, type: !7, function: void ()* @_Z3fooN2N11AE)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooN2N11AE", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !16, scope: !6, type: !7)
!6 = !DIFile(filename: "n1.c", directory: "/private/tmp")
!7 = !DISubroutineType(types: !8)
!8 = !{null}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "mya", line: 4, arg: 1, scope: !5, file: !6, type: !10)
+!9 = !DILocalVariable(name: "mya", line: 4, arg: 1, scope: !5, file: !6, type: !10)
!10 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 3, size: 8, align: 8, file: !17, scope: !11, elements: !2)
!11 = !DINamespace(name: "N1", line: 2, file: !17, scope: null)
!12 = !DIFile(filename: "./n.h", directory: "/private/tmp")
diff --git a/test/Linker/2011-08-18-unique-class-type2.ll b/test/Linker/2011-08-18-unique-class-type2.ll
index 1befb4deacc9..8821dd37fadf 100644
--- a/test/Linker/2011-08-18-unique-class-type2.ll
+++ b/test/Linker/2011-08-18-unique-class-type2.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.7.0"
%"class.N1::A" = type { i8 }
-define void @_Z3barN2N11AE() nounwind uwtable ssp {
+define void @_Z3barN2N11AE() nounwind uwtable ssp !dbg !5 {
entry:
%youra = alloca %"class.N1::A", align 1
call void @llvm.dbg.declare(metadata %"class.N1::A"* %youra, metadata !9, metadata !DIExpression()), !dbg !13
@@ -18,15 +18,15 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!18}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !16, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !16, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
!1 = !{!2}
!2 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "bar", linkageName: "_Z3barN2N11AE", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scope: !6, type: !7, function: void ()* @_Z3barN2N11AE)
+!5 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barN2N11AE", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scope: !6, type: !7)
!6 = !DIFile(filename: "n2.c", directory: "/private/tmp")
!7 = !DISubroutineType(types: !8)
!8 = !{null}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "youra", line: 4, arg: 1, scope: !5, file: !6, type: !10)
+!9 = !DILocalVariable(name: "youra", line: 4, arg: 1, scope: !5, file: !6, type: !10)
!10 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 3, size: 8, align: 8, file: !17, scope: !11, elements: !2)
!11 = !DINamespace(name: "N1", line: 2, file: !17, scope: null)
!12 = !DIFile(filename: "./n.h", directory: "/private/tmp")
diff --git a/test/Linker/2011-08-18-unique-debug-type.ll b/test/Linker/2011-08-18-unique-debug-type.ll
index b1da9354f8ce..11a1c4ecb731 100644
--- a/test/Linker/2011-08-18-unique-debug-type.ll
+++ b/test/Linker/2011-08-18-unique-debug-type.ll
@@ -4,7 +4,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-macosx10.7.0"
-define i32 @foo() nounwind uwtable ssp {
+define i32 @foo() nounwind uwtable ssp !dbg !5 {
entry:
ret i32 1, !dbg !10
}
@@ -12,11 +12,11 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!13}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !12, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !12, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
!1 = !{!2}
!2 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !6, type: !7, function: i32 ()* @foo)
+!5 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !6, type: !7)
!6 = !DIFile(filename: "one.c", directory: "/private/tmp")
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
diff --git a/test/Linker/2011-08-18-unique-debug-type2.ll b/test/Linker/2011-08-18-unique-debug-type2.ll
index 40958dabf880..5b68ba0bf295 100644
--- a/test/Linker/2011-08-18-unique-debug-type2.ll
+++ b/test/Linker/2011-08-18-unique-debug-type2.ll
@@ -4,7 +4,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-macosx10.7.0"
-define i32 @bar() nounwind uwtable ssp {
+define i32 @bar() nounwind uwtable ssp !dbg !5 {
entry:
ret i32 2, !dbg !10
}
@@ -12,11 +12,11 @@ entry:
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!13}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !12, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !12, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
!1 = !{!2}
!2 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !6, type: !7, function: i32 ()* @bar)
+!5 = distinct !DISubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !6, type: !7)
!6 = !DIFile(filename: "two.c", directory: "/private/tmp")
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
diff --git a/test/Linker/ConstantGlobals.ll b/test/Linker/ConstantGlobals.ll
index 49f86a51bd7f..58c0d711a07e 100644
--- a/test/Linker/ConstantGlobals.ll
+++ b/test/Linker/ConstantGlobals.ll
@@ -6,3 +6,7 @@
; CHECK-DAG: @Y = external global [1 x i32]
@Y = external global [1 x i32]
+
+define [1 x i32]* @use-Y() {
+ ret [1 x i32] *@Y
+}
diff --git a/test/Linker/DbgDeclare.ll b/test/Linker/DbgDeclare.ll
index a6ffa915c071..23c00a32dbc0 100644
--- a/test/Linker/DbgDeclare.ll
+++ b/test/Linker/DbgDeclare.ll
@@ -14,7 +14,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
-define i32 @main(i32 %argc, i8** %argv) uwtable ssp {
+define i32 @main(i32 %argc, i8** %argv) uwtable ssp !dbg !5 {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
@@ -37,10 +37,10 @@ declare void @test(i32, i8**)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!21}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 173515)", isOptimized: true, emissionKind: 0, file: !20, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 173515)", isOptimized: true, emissionKind: 0, file: !20, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
!2 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !20, scope: null, type: !7, function: i32 (i32, i8**)* @main, variables: !2)
+!5 = distinct !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !20, scope: null, type: !7, variables: !2)
!6 = !DIFile(filename: "main.cpp", directory: "/private/tmp")
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !9, !10}
@@ -49,9 +49,9 @@ declare void @test(i32, i8**)
!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !12)
!12 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !13)
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 3, arg: 1, scope: !5, file: !6, type: !9)
+!14 = !DILocalVariable(name: "argc", line: 3, arg: 1, scope: !5, file: !6, type: !9)
!15 = !DILocation(line: 3, scope: !5)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 3, arg: 2, scope: !5, file: !6, type: !10)
+!16 = !DILocalVariable(name: "argv", line: 3, arg: 2, scope: !5, file: !6, type: !10)
!17 = !DILocation(line: 5, scope: !18)
!18 = distinct !DILexicalBlock(line: 4, column: 0, file: !20, scope: !5)
!19 = !DILocation(line: 6, scope: !18)
diff --git a/test/Linker/DbgDeclare2.ll b/test/Linker/DbgDeclare2.ll
index b716833ecc66..2335f126d8e4 100644
--- a/test/Linker/DbgDeclare2.ll
+++ b/test/Linker/DbgDeclare2.ll
@@ -5,7 +5,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
-define void @test(i32 %argc, i8** %argv) uwtable ssp {
+define void @test(i32 %argc, i8** %argv) uwtable ssp !dbg !5 {
entry:
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
@@ -50,10 +50,10 @@ declare i32 @puts(i8*)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!27}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 173515)", isOptimized: true, emissionKind: 0, file: !25, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 173515)", isOptimized: true, emissionKind: 0, file: !25, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
!2 = !{}
!3 = !{!5}
-!5 = !DISubprogram(name: "print_args", linkageName: "test", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !26, scope: null, type: !7, function: void (i32, i8**)* @test, variables: !2)
+!5 = distinct !DISubprogram(name: "print_args", linkageName: "test", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !26, scope: null, type: !7, variables: !2)
!6 = !DIFile(filename: "test.cpp", directory: "/private/tmp")
!7 = !DISubroutineType(types: !8)
!8 = !{null, !9, !10}
@@ -62,10 +62,10 @@ declare i32 @puts(i8*)
!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !12)
!12 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !13)
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 4, arg: 1, scope: !5, file: !6, type: !9)
+!14 = !DILocalVariable(name: "argc", line: 4, arg: 1, scope: !5, file: !6, type: !9)
!15 = !DILocation(line: 4, scope: !5)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 4, arg: 2, scope: !5, file: !6, type: !10)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 6, scope: !18, file: !6, type: !9)
+!16 = !DILocalVariable(name: "argv", line: 4, arg: 2, scope: !5, file: !6, type: !10)
+!17 = !DILocalVariable(name: "i", line: 6, scope: !18, file: !6, type: !9)
!18 = distinct !DILexicalBlock(line: 6, column: 0, file: !26, scope: !19)
!19 = distinct !DILexicalBlock(line: 5, column: 0, file: !26, scope: !5)
!20 = !DILocation(line: 6, scope: !18)
diff --git a/test/Linker/Inputs/PR8300.b.ll b/test/Linker/Inputs/PR8300.b.ll
index 9e538f5d286a..ce17f5f0dbe1 100644
--- a/test/Linker/Inputs/PR8300.b.ll
+++ b/test/Linker/Inputs/PR8300.b.ll
@@ -1,7 +1,7 @@
%foo = type { [8 x i8] }
%bar = type { [9 x i8] }
-@zed = alias bitcast (void (%bar*)* @xyz to void (%foo*)*)
+@zed = alias void (%foo*), bitcast (void (%bar*)* @xyz to void (%foo*)*)
define void @xyz(%bar* %this) {
entry:
diff --git a/test/Linker/Inputs/alias.ll b/test/Linker/Inputs/alias.ll
index f379476e7654..d4a734f58251 100644
--- a/test/Linker/Inputs/alias.ll
+++ b/test/Linker/Inputs/alias.ll
@@ -1,3 +1,3 @@
@zed = global i32 42
-@foo = alias i32* @zed
-@foo2 = alias bitcast (i32* @zed to i16*)
+@foo = alias i32, i32* @zed
+@foo2 = alias i16, bitcast (i32* @zed to i16*)
diff --git a/test/Linker/Inputs/available_externally_over_decl.ll b/test/Linker/Inputs/available_externally_over_decl.ll
new file mode 100644
index 000000000000..6bd0a939957b
--- /dev/null
+++ b/test/Linker/Inputs/available_externally_over_decl.ll
@@ -0,0 +1,10 @@
+@h = global void ()* @f
+@h2 = global void ()* @g
+
+define available_externally void @f() {
+ ret void
+}
+
+define available_externally void @g() {
+ ret void
+}
diff --git a/test/Linker/Inputs/comdat11.ll b/test/Linker/Inputs/comdat11.ll
new file mode 100644
index 000000000000..5b7f74cf0b24
--- /dev/null
+++ b/test/Linker/Inputs/comdat11.ll
@@ -0,0 +1,9 @@
+$foo = comdat any
+@foo = global i8 1, comdat
+define void @zed() {
+ call void @bar()
+ ret void
+}
+define void @bar() comdat($foo) {
+ ret void
+}
diff --git a/test/Linker/Inputs/comdat13.ll b/test/Linker/Inputs/comdat13.ll
new file mode 100644
index 000000000000..85515210ed7e
--- /dev/null
+++ b/test/Linker/Inputs/comdat13.ll
@@ -0,0 +1,9 @@
+$foo = comdat any
+@foo = internal global i8 1, comdat
+define i8* @zed() {
+ call void @bax()
+ ret i8* @foo
+}
+define internal void @bax() comdat($foo) {
+ ret void
+}
diff --git a/test/Linker/Inputs/comdat14.ll b/test/Linker/Inputs/comdat14.ll
new file mode 100644
index 000000000000..5e79fbcdacc1
--- /dev/null
+++ b/test/Linker/Inputs/comdat14.ll
@@ -0,0 +1,12 @@
+$c = comdat any
+
+@v2 = weak dllexport global i32 0, comdat ($c)
+define i32* @f2() {
+ ret i32* @v2
+}
+
+@v3 = weak alias i32, i32* @v2
+define i32* @f3() {
+ ret i32* @v3
+}
+
diff --git a/test/Linker/Inputs/comdat15.ll b/test/Linker/Inputs/comdat15.ll
new file mode 100644
index 000000000000..5d2d41bba6aa
--- /dev/null
+++ b/test/Linker/Inputs/comdat15.ll
@@ -0,0 +1,6 @@
+$a1 = comdat any
+@baz = private global i32 42, comdat($a1)
+@a1 = internal alias i32, i32* @baz
+define i32* @abc() {
+ ret i32* @a1
+}
diff --git a/test/Linker/Inputs/comdat5.ll b/test/Linker/Inputs/comdat5.ll
index 19739eb1b5da..236faaee0ae0 100644
--- a/test/Linker/Inputs/comdat5.ll
+++ b/test/Linker/Inputs/comdat5.ll
@@ -4,6 +4,6 @@ $foo = comdat largest
@zed = external constant i8
@some_name = private unnamed_addr constant [2 x i8*] [i8* @zed, i8* bitcast (void ()* @bar to i8*)], comdat($foo)
-@foo = alias getelementptr([2 x i8*], [2 x i8*]* @some_name, i32 0, i32 1)
+@foo = alias i8*, getelementptr([2 x i8*], [2 x i8*]* @some_name, i32 0, i32 1)
declare void @bar() unnamed_addr
diff --git a/test/Linker/Inputs/comdat8.ll b/test/Linker/Inputs/comdat8.ll
index a2833b05cf13..fbf099cb9aef 100644
--- a/test/Linker/Inputs/comdat8.ll
+++ b/test/Linker/Inputs/comdat8.ll
@@ -1,4 +1,4 @@
$c1 = comdat largest
@some_name = private unnamed_addr constant i32 42, comdat($c1)
-@c1 = alias i32* @some_name
+@c1 = alias i32, i32* @some_name
diff --git a/test/Linker/Inputs/ctors2.ll b/test/Linker/Inputs/ctors2.ll
new file mode 100644
index 000000000000..e2fe5ff429c0
--- /dev/null
+++ b/test/Linker/Inputs/ctors2.ll
@@ -0,0 +1,6 @@
+$foo = comdat any
+@foo = global i8 1, comdat
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @bar, i8* @foo }]
+define void @bar() comdat($foo) {
+ ret void
+}
diff --git a/test/Linker/Inputs/ctors3.ll b/test/Linker/Inputs/ctors3.ll
new file mode 100644
index 000000000000..449ccbd90faf
--- /dev/null
+++ b/test/Linker/Inputs/ctors3.ll
@@ -0,0 +1,7 @@
+$foo = comdat any
+%t = type { i8 }
+@foo = global %t zeroinitializer, comdat
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @bar, i8* getelementptr (%t, %t* @foo, i32 0, i32 0) }]
+define internal void @bar() comdat($foo) {
+ ret void
+}
diff --git a/test/Linker/Inputs/funcimport.ll b/test/Linker/Inputs/funcimport.ll
new file mode 100644
index 000000000000..25cfcfc2a657
--- /dev/null
+++ b/test/Linker/Inputs/funcimport.ll
@@ -0,0 +1,28 @@
+define i32 @main() #0 {
+entry:
+ call void (...) @weakalias()
+ call void (...) @analias()
+ %call = call i32 (...) @referencestatics()
+ %call1 = call i32 (...) @referenceglobals()
+ %call2 = call i32 (...) @referencecommon()
+ call void (...) @setfuncptr()
+ call void (...) @callfuncptr()
+ call void (...) @callweakfunc()
+ ret i32 0
+}
+
+declare void @weakalias(...) #1
+
+declare void @analias(...) #1
+
+declare i32 @referencestatics(...) #1
+
+declare i32 @referenceglobals(...) #1
+
+declare i32 @referencecommon(...) #1
+
+declare void @setfuncptr(...) #1
+
+declare void @callfuncptr(...) #1
+
+declare void @callweakfunc(...) #1
diff --git a/test/Linker/Inputs/funcimport_appending_global.ll b/test/Linker/Inputs/funcimport_appending_global.ll
new file mode 100644
index 000000000000..413b890b02ad
--- /dev/null
+++ b/test/Linker/Inputs/funcimport_appending_global.ll
@@ -0,0 +1,6 @@
+@v = weak global i8 1
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* @v}]
+
+define void @foo() {
+ ret void
+}
diff --git a/test/Linker/Inputs/internalize-lazy.ll b/test/Linker/Inputs/internalize-lazy.ll
new file mode 100644
index 000000000000..43f9a7ab7455
--- /dev/null
+++ b/test/Linker/Inputs/internalize-lazy.ll
@@ -0,0 +1,8 @@
+define linkonce_odr void @g() {
+ ret void
+}
+
+define void @f() {
+ call void @g()
+ ret void
+}
diff --git a/test/Linker/Inputs/linkage.c.ll b/test/Linker/Inputs/linkage.c.ll
new file mode 100644
index 000000000000..6aedf5ab111b
--- /dev/null
+++ b/test/Linker/Inputs/linkage.c.ll
@@ -0,0 +1,4 @@
+@X = global i32 5
+@U = global i32 6
+define i32 @foo() { ret i32 7 }
+define i32 @unused() { ret i32 8 }
diff --git a/test/Linker/Inputs/mdlocation.ll b/test/Linker/Inputs/mdlocation.ll
index 22473db46097..9c2f65d0a59a 100644
--- a/test/Linker/Inputs/mdlocation.ll
+++ b/test/Linker/Inputs/mdlocation.ll
@@ -1,13 +1,9 @@
-!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
+!named = !{!0, !1, !2, !3, !4, !5}
-!0 = !DISubprogram() ; Use this as a scope.
+!0 = distinct !DISubprogram() ; Use this as a scope.
!1 = !DILocation(line: 3, column: 7, scope: !0)
!2 = !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !1)
!3 = !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !2)
-!4 = distinct !DISubprogram() ; Test actual remapping.
-!5 = !DILocation(line: 3, column: 7, scope: !4)
-!6 = !DILocation(line: 3, column: 7, scope: !4, inlinedAt: !5)
-!7 = !DILocation(line: 3, column: 7, scope: !4, inlinedAt: !6)
; Test distinct nodes.
-!8 = distinct !DILocation(line: 3, column: 7, scope: !0)
-!9 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !8)
+!4 = distinct !DILocation(line: 3, column: 7, scope: !0)
+!5 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !4)
diff --git a/test/Linker/Inputs/only-needed-debug-metadata.ll b/test/Linker/Inputs/only-needed-debug-metadata.ll
new file mode 100644
index 000000000000..ec7f02f4d194
--- /dev/null
+++ b/test/Linker/Inputs/only-needed-debug-metadata.ll
@@ -0,0 +1,27 @@
+@X = external global i32
+
+declare i32 @foo()
+
+define void @bar() !dbg !4 {
+ load i32, i32* @X, !dbg !10
+ call i32 @foo(), !dbg !11
+ ret void, !dbg !12
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "linkused.b.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"}
+!10 = !DILocation(line: 6, column: 7, scope: !4)
+!11 = !DILocation(line: 6, column: 3, scope: !4)
+!12 = !DILocation(line: 7, column: 1, scope: !4)
diff --git a/test/Linker/Inputs/only-needed-named-metadata.ll b/test/Linker/Inputs/only-needed-named-metadata.ll
new file mode 100644
index 000000000000..fa7bc2e3cc87
--- /dev/null
+++ b/test/Linker/Inputs/only-needed-named-metadata.ll
@@ -0,0 +1,9 @@
+@X = external global i32
+
+declare i32 @foo()
+
+define void @bar() {
+ load i32, i32* @X
+ call i32 @foo()
+ ret void
+}
diff --git a/test/Linker/Inputs/opaque.ll b/test/Linker/Inputs/opaque.ll
index f164abd586d1..a5f27cba418e 100644
--- a/test/Linker/Inputs/opaque.ll
+++ b/test/Linker/Inputs/opaque.ll
@@ -11,3 +11,11 @@ define void @f1() {
getelementptr %A, %A* null, i32 0
ret void
}
+
+define %A* @use_g2() {
+ ret %A* @g2
+}
+
+define %B* @use_g3() {
+ ret %B* @g3
+}
diff --git a/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll b/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll
index efa8ec9c67b0..a817cf071078 100644
--- a/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll
+++ b/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll
@@ -1,6 +1,6 @@
%struct.Class = type { i8 }
-define weak_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this) align 2 {
+define weak_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this) align 2 !dbg !4 {
entry:
%this.addr = alloca %struct.Class*, align 8
store %struct.Class* %this, %struct.Class** %this.addr, align 8
@@ -12,11 +12,11 @@ entry:
!llvm.module.flags = !{!8, !9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224193) (llvm/trunk 224197)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224193) (llvm/trunk 224197)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "t2.cpp", directory: "/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d2")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, function: i32 (%struct.Class*)* @_ZN5ClassIiE3fooEv, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, variables: !2)
!5 = !DIFile(filename: "../t.h", directory: "/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d2")
!6 = !DIFile(filename: "../t.h", directory: "/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d2")
!7 = !DISubroutineType(types: !2)
diff --git a/test/Linker/Inputs/subprogram-linkonce-weak-odr.ll b/test/Linker/Inputs/subprogram-linkonce-weak-odr.ll
deleted file mode 100644
index 71c080173a3d..000000000000
--- a/test/Linker/Inputs/subprogram-linkonce-weak-odr.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-define weak_odr i32 @foo(i32 %a, i32 %b) {
-entry:
- %sum = add i32 %a, %b, !dbg !DILocation(line: 2, scope: !3)
- ret i32 %sum, !dbg !DILocation(line: 3, scope: !3)
-}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 2, !"Debug Info Version", i32 3}
-
-!llvm.dbg.cu = !{!1}
-!1 = !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3}, emissionKind: 1)
-!2 = !DIFile(filename: "foo.c", directory: "/path/to/dir")
-!3 = !DISubprogram(file: !4, scope: !4, line: 1, name: "foo", function: i32 (i32, i32)* @foo, type: !5)
-!4 = !DIFile(filename: "foo.h", directory: "/path/to/dir")
-!5 = !DISubroutineType(types: !{})
diff --git a/test/Linker/Inputs/subprogram-linkonce-weak.ll b/test/Linker/Inputs/subprogram-linkonce-weak.ll
index f5399e244a9c..5e6627d90c97 100644
--- a/test/Linker/Inputs/subprogram-linkonce-weak.ll
+++ b/test/Linker/Inputs/subprogram-linkonce-weak.ll
@@ -1,4 +1,4 @@
-define weak i32 @foo(i32 %a, i32 %b) {
+define weak i32 @foo(i32 %a, i32 %b) !dbg !3 {
entry:
%sum = call i32 @fastadd(i32 %a, i32 %b), !dbg !DILocation(line: 52, scope: !3)
ret i32 %sum, !dbg !DILocation(line: 53, scope: !3)
@@ -10,7 +10,7 @@ declare i32 @fastadd(i32, i32)
!0 = !{i32 2, !"Debug Info Version", i32 3}
!llvm.dbg.cu = !{!1}
-!1 = !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3}, emissionKind: 1)
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3}, emissionKind: 1)
!2 = !DIFile(filename: "foo.c", directory: "/path/to/dir")
-!3 = !DISubprogram(file: !2, scope: !2, line: 51, name: "foo", function: i32 (i32, i32)* @foo, type: !4)
+!3 = distinct !DISubprogram(file: !2, scope: !2, line: 51, name: "foo", type: !4)
!4 = !DISubroutineType(types: !{})
diff --git a/test/Linker/Inputs/testlink.ll b/test/Linker/Inputs/testlink.ll
index 263d9e77d1ab..22a66399be09 100644
--- a/test/Linker/Inputs/testlink.ll
+++ b/test/Linker/Inputs/testlink.ll
@@ -53,4 +53,6 @@ define internal void @testIntern() {
ret void
}
-declare void @VecSizeCrash1(%VecSize)
+define void @VecSizeCrash1(%VecSize) {
+ ret void
+}
diff --git a/test/Linker/Inputs/thinlto_funcimport_debug.ll b/test/Linker/Inputs/thinlto_funcimport_debug.ll
new file mode 100644
index 000000000000..846a5ea001d5
--- /dev/null
+++ b/test/Linker/Inputs/thinlto_funcimport_debug.ll
@@ -0,0 +1,38 @@
+; ModuleID = 'dbg_main.o'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !dbg !4 {
+entry:
+ %call = tail call i32 @func1(i32 10) #2, !dbg !11
+ %call1 = tail call i32 @func2(i32 10) #2, !dbg !12
+ ret i32 0, !dbg !13
+}
+
+declare i32 @func1(i32) #1
+
+declare i32 @func2(i32) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "dbg_main.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"}
+!11 = !DILocation(line: 4, column: 3, scope: !4)
+!12 = !DILocation(line: 5, column: 3, scope: !4)
+!13 = !DILocation(line: 6, column: 1, scope: !4)
diff --git a/test/Linker/Inputs/type-unique-alias.ll b/test/Linker/Inputs/type-unique-alias.ll
index 3ee162ccfcfe..5a3dc7d37011 100644
--- a/test/Linker/Inputs/type-unique-alias.ll
+++ b/test/Linker/Inputs/type-unique-alias.ll
@@ -1,4 +1,4 @@
%u = type { i8 }
@g2 = global %u zeroinitializer
-@a = weak alias %u* @g2
+@a = weak alias %u, %u* @g2
diff --git a/test/Linker/Inputs/type-unique-dst-types2.ll b/test/Linker/Inputs/type-unique-dst-types2.ll
index b565c6d73649..7770ea3cca07 100644
--- a/test/Linker/Inputs/type-unique-dst-types2.ll
+++ b/test/Linker/Inputs/type-unique-dst-types2.ll
@@ -1,3 +1,7 @@
%A.11 = type { %B }
%B = type { i8 }
@g1 = external global %A.11
+
+define %A.11* @use_g1() {
+ ret %A.11* @g1
+}
diff --git a/test/Linker/Inputs/type-unique-dst-types3.ll b/test/Linker/Inputs/type-unique-dst-types3.ll
index c5794ad839a2..8a5ac2694791 100644
--- a/test/Linker/Inputs/type-unique-dst-types3.ll
+++ b/test/Linker/Inputs/type-unique-dst-types3.ll
@@ -1,2 +1,6 @@
%A.11 = type opaque
@g2 = external global %A.11
+
+define %A.11* @use_g2() {
+ ret %A.11* @g2
+}
diff --git a/test/Linker/Inputs/type-unique-inheritance-a.ll b/test/Linker/Inputs/type-unique-inheritance-a.ll
index 75cc7e1c838a..0a1c107580bb 100644
--- a/test/Linker/Inputs/type-unique-inheritance-a.ll
+++ b/test/Linker/Inputs/type-unique-inheritance-a.ll
@@ -47,7 +47,7 @@
%class.Base = type { i32 }
; Function Attrs: nounwind ssp uwtable
-define void @_Z1fi(i32 %a) #0 {
+define void @_Z1fi(i32 %a) #0 !dbg !15 {
entry:
%a.addr = alloca i32, align 4
%t = alloca %class.A, align 4
@@ -66,7 +66,7 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!19, !25}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
!1 = !DIFile(filename: "foo.cpp", directory: "/Users/mren/c_testing/type_unique_air/inher")
!2 = !{}
!3 = !{!4, !8}
@@ -81,14 +81,14 @@ attributes #1 = { nounwind readnone }
!12 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!13 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 4, size: 32, align: 32, offset: 32, flags: DIFlagPrivate, file: !5, scope: !"_ZTS1A", baseType: !12)
!14 = !{!15}
-!15 = !DISubprogram(name: "f", linkageName: "_Z1fi", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !16, type: !17, function: void (i32)* @_Z1fi, variables: !2)
+!15 = distinct !DISubprogram(name: "f", linkageName: "_Z1fi", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !16, type: !17, variables: !2)
!16 = !DIFile(filename: "foo.cpp", directory: "/Users/mren/c_testing/type_unique_air/inher")
!17 = !DISubroutineType(types: !18)
!18 = !{null, !12}
!19 = !{i32 2, !"Dwarf Version", i32 2}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 5, arg: 1, scope: !15, file: !16, type: !12)
+!20 = !DILocalVariable(name: "a", line: 5, arg: 1, scope: !15, file: !16, type: !12)
!21 = !DILocation(line: 5, scope: !15)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 6, scope: !15, file: !16, type: !4)
+!22 = !DILocalVariable(name: "t", line: 6, scope: !15, file: !16, type: !4)
!23 = !DILocation(line: 6, scope: !15)
!24 = !DILocation(line: 7, scope: !15)
!25 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Linker/Inputs/type-unique-inheritance-b.ll b/test/Linker/Inputs/type-unique-inheritance-b.ll
index dd89b8dd42df..e87b96b9c791 100644
--- a/test/Linker/Inputs/type-unique-inheritance-b.ll
+++ b/test/Linker/Inputs/type-unique-inheritance-b.ll
@@ -5,7 +5,7 @@
%class.Base = type { i32 }
; Function Attrs: nounwind ssp uwtable
-define void @_Z1gi(i32 %a) #0 {
+define void @_Z1gi(i32 %a) #0 !dbg !20 {
entry:
%a.addr = alloca i32, align 4
%t = alloca %class.B, align 8
@@ -19,7 +19,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: ssp uwtable
-define i32 @main() #2 {
+define i32 @main() #2 !dbg !24 {
entry:
%retval = alloca i32, align 4
%a = alloca %class.A, align 4
@@ -40,7 +40,7 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!27, !38}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !19, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !19, globals: !2, imports: !2)
!1 = !DIFile(filename: "bar.cpp", directory: "/Users/mren/c_testing/type_unique_air/inher")
!2 = !{}
!3 = !{!4, !11, !15}
@@ -60,20 +60,20 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!17 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 4, size: 32, align: 32, flags: DIFlagPrivate, file: !5, scope: !"_ZTS4Base", baseType: !8)
!18 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 4, size: 32, align: 32, offset: 32, flags: DIFlagPrivate, file: !12, scope: !"_ZTS1A", baseType: !8)
!19 = !{!20, !24}
-!20 = !DISubprogram(name: "g", linkageName: "_Z1gi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !21, type: !22, function: void (i32)* @_Z1gi, variables: !2)
+!20 = distinct !DISubprogram(name: "g", linkageName: "_Z1gi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !21, type: !22, variables: !2)
!21 = !DIFile(filename: "bar.cpp", directory: "/Users/mren/c_testing/type_unique_air/inher")
!22 = !DISubroutineType(types: !23)
!23 = !{null, !8}
-!24 = !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !1, scope: !21, type: !25, function: i32 ()* @main, variables: !2)
+!24 = distinct !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !1, scope: !21, type: !25, variables: !2)
!25 = !DISubroutineType(types: !26)
!26 = !{!8}
!27 = !{i32 2, !"Dwarf Version", i32 2}
-!28 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 4, arg: 1, scope: !20, file: !21, type: !8)
+!28 = !DILocalVariable(name: "a", line: 4, arg: 1, scope: !20, file: !21, type: !8)
!29 = !DILocation(line: 4, scope: !20)
-!30 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 5, scope: !20, file: !21, type: !4)
+!30 = !DILocalVariable(name: "t", line: 5, scope: !20, file: !21, type: !4)
!31 = !DILocation(line: 5, scope: !20)
!32 = !DILocation(line: 6, scope: !20)
-!33 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 10, scope: !24, file: !21, type: !11)
+!33 = !DILocalVariable(name: "a", line: 10, scope: !24, file: !21, type: !11)
!34 = !DILocation(line: 10, scope: !24)
!35 = !DILocation(line: 11, scope: !24)
!36 = !DILocation(line: 12, scope: !24)
diff --git a/test/Linker/Inputs/type-unique-simple2-a.ll b/test/Linker/Inputs/type-unique-simple2-a.ll
index c07157bf87b6..2a52e89b6fd3 100644
--- a/test/Linker/Inputs/type-unique-simple2-a.ll
+++ b/test/Linker/Inputs/type-unique-simple2-a.ll
@@ -44,7 +44,7 @@
%struct.Base = type { i32, %struct.Base* }
; Function Attrs: nounwind ssp uwtable
-define void @_Z1fi(i32 %a) #0 {
+define void @_Z1fi(i32 %a) #0 !dbg !12 {
entry:
%a.addr = alloca i32, align 4
%t = alloca %struct.Base, align 8
@@ -63,7 +63,7 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!16, !22}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
!1 = !DIFile(filename: "foo.cpp", directory: ".")
!2 = !{}
!3 = !{!4}
@@ -75,14 +75,14 @@ attributes #1 = { nounwind readnone }
!9 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 3, size: 64, align: 64, offset: 64, file: !5, scope: !"_ZTS4Base", baseType: !10)
!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS4Base")
!11 = !{!12}
-!12 = !DISubprogram(name: "f", linkageName: "_Z1fi", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !13, type: !14, function: void (i32)* @_Z1fi, variables: !2)
+!12 = distinct !DISubprogram(name: "f", linkageName: "_Z1fi", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !13, type: !14, variables: !2)
!13 = !DIFile(filename: "foo.cpp", directory: ".")
!14 = !DISubroutineType(types: !15)
!15 = !{null, !8}
!16 = !{i32 2, !"Dwarf Version", i32 2}
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !12, file: !13, type: !8)
+!17 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !12, file: !13, type: !8)
!18 = !DILocation(line: 3, scope: !12)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 4, scope: !12, file: !13, type: !4)
+!19 = !DILocalVariable(name: "t", line: 4, scope: !12, file: !13, type: !4)
!20 = !DILocation(line: 4, scope: !12)
!21 = !DILocation(line: 5, scope: !12)
!22 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Linker/Inputs/type-unique-simple2-b.ll b/test/Linker/Inputs/type-unique-simple2-b.ll
index 817f88704de5..7e1c6aabd9a5 100644
--- a/test/Linker/Inputs/type-unique-simple2-b.ll
+++ b/test/Linker/Inputs/type-unique-simple2-b.ll
@@ -3,7 +3,7 @@
%struct.Base = type { i32, %struct.Base* }
; Function Attrs: nounwind ssp uwtable
-define void @_Z1gi(i32 %a) #0 {
+define void @_Z1gi(i32 %a) #0 !dbg !12 {
entry:
%a.addr = alloca i32, align 4
%t = alloca %struct.Base, align 8
@@ -17,7 +17,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: ssp uwtable
-define i32 @main() #2 {
+define i32 @main() #2 !dbg !16 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
@@ -36,7 +36,7 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!19, !28}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
!1 = !DIFile(filename: "bar.cpp", directory: ".")
!2 = !{}
!3 = !{!4}
@@ -48,17 +48,17 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!9 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 3, size: 64, align: 64, offset: 64, file: !5, scope: !"_ZTS4Base", baseType: !10)
!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS4Base")
!11 = !{!12, !16}
-!12 = !DISubprogram(name: "g", linkageName: "_Z1gi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !13, type: !14, function: void (i32)* @_Z1gi, variables: !2)
+!12 = distinct !DISubprogram(name: "g", linkageName: "_Z1gi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !13, type: !14, variables: !2)
!13 = !DIFile(filename: "bar.cpp", directory: ".")
!14 = !DISubroutineType(types: !15)
!15 = !{null, !8}
-!16 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !13, type: !17, function: i32 ()* @main, variables: !2)
+!16 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !13, type: !17, variables: !2)
!17 = !DISubroutineType(types: !18)
!18 = !{!8}
!19 = !{i32 2, !"Dwarf Version", i32 2}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 4, arg: 1, scope: !12, file: !13, type: !8)
+!20 = !DILocalVariable(name: "a", line: 4, arg: 1, scope: !12, file: !13, type: !8)
!21 = !DILocation(line: 4, scope: !12)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 5, scope: !12, file: !13, type: !4)
+!22 = !DILocalVariable(name: "t", line: 5, scope: !12, file: !13, type: !4)
!23 = !DILocation(line: 5, scope: !12)
!24 = !DILocation(line: 6, scope: !12)
!25 = !DILocation(line: 8, scope: !16)
diff --git a/test/Linker/Inputs/visibility.ll b/test/Linker/Inputs/visibility.ll
index 2cd112ed37f2..286bee5d783b 100644
--- a/test/Linker/Inputs/visibility.ll
+++ b/test/Linker/Inputs/visibility.ll
@@ -7,9 +7,9 @@ $c1 = comdat any
@v4 = hidden global i32 1, comdat($c1)
; Aliases
-@a1 = weak hidden alias i32* @v1
-@a2 = weak protected alias i32* @v2
-@a3 = weak hidden alias i32* @v3
+@a1 = weak hidden alias i32, i32* @v1
+@a2 = weak protected alias i32, i32* @v2
+@a3 = weak hidden alias i32, i32* @v3
; Functions
define weak hidden void @f1() {
diff --git a/test/Linker/alias.ll b/test/Linker/alias.ll
index bce51ad9836f..ae9da70174e8 100644
--- a/test/Linker/alias.ll
+++ b/test/Linker/alias.ll
@@ -1,16 +1,37 @@
-; RUN: llvm-link %s %S/Inputs/alias.ll -S -o - | FileCheck %s
-; RUN: llvm-link %S/Inputs/alias.ll %s -S -o - | FileCheck %s
+; RUN: llvm-link %s %S/Inputs/alias.ll -S -o - | FileCheck --check-prefix=C1 %s
+; RUN: llvm-link %S/Inputs/alias.ll %s -S -o - | FileCheck --check-prefix=C2 %s
+
+; FIXME:
+; The C1 direction is incorrect.
+; When moving an alias to an existing module and we want to discard the aliasee
+; (the C2 case), the IRMover knows to copy the aliasee as internal.
+; When moving a replacement to an aliasee to a module that has an alias (C1),
+; a replace all uses with blindly changes the alias.
+; The C1 case doesn't happen when using a system linker with a plugin because
+; the linker does full symbol resolution first.
+; Given that this is a problem only with llvm-link and its 1 module at a time
+; linking, it should probably learn to changes the aliases in the destination
+; before using the IRMover.
@foo = weak global i32 0
-; CHECK-DAG: @foo = alias i32* @zed
+; C1-DAG: @foo = alias i32, i32* @zed
+; C2-DAG: @foo = alias i32, i32* @zed
+
+@bar = alias i32, i32* @foo
+; C1-DAG: @bar = alias i32, i32* @foo
-@bar = alias i32* @foo
-; CHECK-DAG: @bar = alias i32* @foo
+; C2-DAG: @foo.1 = internal global i32 0
+; C2-DAG: @bar = alias i32, i32* @foo.1
@foo2 = weak global i32 0
-; CHECK-DAG: @foo2 = alias bitcast (i32* @zed to i16*)
+; C1-DAG: @foo2 = alias i16, bitcast (i32* @zed to i16*)
+; C2-DAG: @foo2 = alias i16, bitcast (i32* @zed to i16*)
+
+@bar2 = alias i32, i32* @foo2
+; C1-DAG: @bar2 = alias i32, bitcast (i16* @foo2 to i32*)
-@bar2 = alias i32* @foo2
-; CHECK-DAG: @bar2 = alias bitcast (i16* @foo2 to i32*)
+; C2-DAG: @foo2.2 = internal global i32 0
+; C2-DAG: @bar2 = alias i32, i32* @foo2.2
-; CHECK-DAG: @zed = global i32 42
+; C1-DAG: @zed = global i32 42
+; C2-DAG: @zed = global i32 42
diff --git a/test/Linker/available_externally_over_decl.ll b/test/Linker/available_externally_over_decl.ll
new file mode 100644
index 000000000000..0104967ef544
--- /dev/null
+++ b/test/Linker/available_externally_over_decl.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-link -S %s %p/Inputs/available_externally_over_decl.ll | FileCheck %s
+
+declare void @f()
+
+define available_externally void @g() {
+ ret void
+}
+
+define void ()* @main() {
+ call void @g()
+ ret void ()* @f
+}
+
+; CHECK-DAG: define available_externally void @g() {
+; CHECK-DAG: define available_externally void @f() {
diff --git a/test/Linker/comdat11.ll b/test/Linker/comdat11.ll
new file mode 100644
index 000000000000..dbade4104fe3
--- /dev/null
+++ b/test/Linker/comdat11.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-link -S %s %p/Inputs/comdat11.ll -o - | FileCheck %s
+
+$foo = comdat any
+@foo = global i8 0, comdat
+
+; CHECK: @foo = global i8 0, comdat
+
+; CHECK: define void @zed() {
+; CHECK: call void @bar()
+; CHECK: ret void
+; CHECK: }
+
+; CHECK: declare void @bar()
diff --git a/test/Linker/comdat12.ll b/test/Linker/comdat12.ll
new file mode 100644
index 000000000000..d06e222b63ac
--- /dev/null
+++ b/test/Linker/comdat12.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-link %s -S -o - | FileCheck %s
+
+$foo = comdat largest
+define internal void @foo() comdat($foo) {
+ ret void
+}
+
+; CHECK-NOT: foo
diff --git a/test/Linker/comdat13.ll b/test/Linker/comdat13.ll
new file mode 100644
index 000000000000..d1e382a2f278
--- /dev/null
+++ b/test/Linker/comdat13.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-link -S %s %p/Inputs/comdat13.ll -o - | FileCheck %s
+
+; In Inputs/comdat13.ll a function not in the $foo comdat (zed) references an
+; internal function in the comdat $foo.
+; The IR would be ilegal on ELF ("relocation refers to discarded section"),
+; but COFF linkers seem to just duplicate the comdat.
+
+$foo = comdat any
+@foo = internal global i8 0, comdat
+define i8* @bar() {
+ ret i8* @foo
+}
+
+; CHECK: $foo = comdat any
+
+; CHECK: @foo = internal global i8 0, comdat
+; CHECK: @foo.1 = internal global i8 1, comdat($foo)
+
+; CHECK: define i8* @bar() {
+; CHECK-NEXT: ret i8* @foo
+; CHECK-NEXT: }
+
+; CHECK: define i8* @zed() {
+; CHECK-NEXT: call void @bax()
+; CHECK-NEXT: ret i8* @foo.1
+; CHECK-NEXT: }
+
+; CHECK: define internal void @bax() comdat($foo) {
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
diff --git a/test/Linker/comdat14.ll b/test/Linker/comdat14.ll
new file mode 100644
index 000000000000..793f8573a1f5
--- /dev/null
+++ b/test/Linker/comdat14.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-link -S %s %p/Inputs/comdat14.ll -o - | FileCheck %s
+
+$c = comdat any
+
+@v = global i32 0, comdat ($c)
+
+; CHECK: @v = global i32 0, comdat($c)
+; CHECK: @v2 = extern_weak dllexport global i32
+; CHECK: @v3 = extern_weak global i32
diff --git a/test/Linker/comdat15.ll b/test/Linker/comdat15.ll
new file mode 100644
index 000000000000..cf900263105a
--- /dev/null
+++ b/test/Linker/comdat15.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-link -S %s %p/Inputs/comdat15.ll -o - | FileCheck %s
+
+$a1 = comdat any
+@bar = global i32 0, comdat($a1)
+
+; CHECK: @bar = global i32 0, comdat($a1)
+; CHECK: @baz = private global i32 42, comdat($a1)
+; CHECK: @a1 = internal alias i32, i32* @baz
+
diff --git a/test/Linker/comdat6.ll b/test/Linker/comdat6.ll
index 15be2fe58036..3dcc9e3b29d7 100644
--- a/test/Linker/comdat6.ll
+++ b/test/Linker/comdat6.ll
@@ -5,6 +5,6 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
$foo = comdat largest
@foo = linkonce_odr unnamed_addr constant [1 x i8*] [i8* bitcast (void ()* @bar to i8*)], comdat($foo)
-; CHECK: @foo = alias getelementptr inbounds ([2 x i8*], [2 x i8*]* @some_name, i32 0, i32 1)
+; CHECK: @foo = alias i8*, getelementptr inbounds ([2 x i8*], [2 x i8*]* @some_name, i32 0, i32 1)
declare void @bar() unnamed_addr
diff --git a/test/Linker/comdat8.ll b/test/Linker/comdat8.ll
index 5ca352a85099..490f8053c550 100644
--- a/test/Linker/comdat8.ll
+++ b/test/Linker/comdat8.ll
@@ -3,6 +3,6 @@
$c1 = comdat largest
@some_name = private unnamed_addr constant i32 42, comdat($c1)
-@c1 = alias i8* inttoptr (i32 ptrtoint (i32* @some_name to i32) to i8*)
+@c1 = alias i8, inttoptr (i32 ptrtoint (i32* @some_name to i32) to i8*)
; CHECK: COMDAT key involves incomputable alias size.
diff --git a/test/Linker/comdat9.ll b/test/Linker/comdat9.ll
index f155a6e35626..4f6f2cfb845d 100644
--- a/test/Linker/comdat9.ll
+++ b/test/Linker/comdat9.ll
@@ -1,19 +1,22 @@
; RUN: llvm-link %s -S -o - | FileCheck %s
$c = comdat any
-@a = alias void ()* @f
+@a = alias void (), void ()* @f
define internal void @f() comdat($c) {
ret void
}
; CHECK-DAG: $c = comdat any
-; CHECK-DAG: @a = alias void ()* @f
+; CHECK-DAG: @a = alias void (), void ()* @f
; CHECK-DAG: define internal void @f() comdat($c)
$f2 = comdat largest
define internal void @f2() comdat($f2) {
ret void
}
+define void @f3() comdat($f2) {
+ ret void
+}
; CHECK-DAG: $f2 = comdat largest
; CHECK-DAG: define internal void @f2() comdat {
diff --git a/test/Linker/comdat_group.ll b/test/Linker/comdat_group.ll
new file mode 100644
index 000000000000..486a6ffb9b1d
--- /dev/null
+++ b/test/Linker/comdat_group.ll
@@ -0,0 +1,18 @@
+; Ensure complete comdat group is materialized
+; RUN: llvm-link %s -S | FileCheck %s
+; CHECK: $linkoncecomdat = comdat any
+; CHECK: @linkoncecomdat = linkonce global i32 2
+; CHECK: @linkoncecomdat_unref_var = linkonce global i32 2, comdat($linkoncecomdat)
+; CHECK: define linkonce void @linkoncecomdat_unref_func() comdat($linkoncecomdat)
+
+$linkoncecomdat = comdat any
+@linkoncecomdat = linkonce global i32 2, comdat($linkoncecomdat)
+@linkoncecomdat_unref_var = linkonce global i32 2, comdat($linkoncecomdat)
+define linkonce void @linkoncecomdat_unref_func() comdat($linkoncecomdat) {
+ ret void
+}
+; Reference one member of comdat so that comdat is generated.
+define void @ref_linkoncecomdat() {
+ load i32, i32* @linkoncecomdat, align 4
+ ret void
+}
diff --git a/test/Linker/constructor-comdat.ll b/test/Linker/constructor-comdat.ll
index dfc899208aa3..e62990157a96 100644
--- a/test/Linker/constructor-comdat.ll
+++ b/test/Linker/constructor-comdat.ll
@@ -4,8 +4,8 @@
$_ZN3fooIiEC5Ev = comdat any
; CHECK: $_ZN3fooIiEC5Ev = comdat any
-@_ZN3fooIiEC1Ev = weak_odr alias void ()* @_ZN3fooIiEC2Ev
-; CHECK: @_ZN3fooIiEC1Ev = weak_odr alias void ()* @_ZN3fooIiEC2Ev
+@_ZN3fooIiEC1Ev = weak_odr alias void (), void ()* @_ZN3fooIiEC2Ev
+; CHECK: @_ZN3fooIiEC1Ev = weak_odr alias void (), void ()* @_ZN3fooIiEC2Ev
; CHECK: define weak_odr void @_ZN3fooIiEC2Ev() comdat($_ZN3fooIiEC5Ev) {
define weak_odr void @_ZN3fooIiEC2Ev() comdat($_ZN3fooIiEC5Ev) {
diff --git a/test/Linker/ctors.ll b/test/Linker/ctors.ll
index 67bf45637180..37dba23d4c91 100644
--- a/test/Linker/ctors.ll
+++ b/test/Linker/ctors.ll
@@ -3,6 +3,9 @@
; RUN: llvm-link %p/Inputs/ctors.ll %s -S -o - | \
; RUN: FileCheck --check-prefix=ALL --check-prefix=CHECK2 %s
+; Test the bitcode writer too. It used to crash.
+; RUN: llvm-link %s %p/Inputs/ctors.ll -o %t.bc
+
@v = weak global i8 0
; CHECK1: @v = weak global i8 0
; CHECK2: @v = weak global i8 1
diff --git a/test/Linker/ctors2.ll b/test/Linker/ctors2.ll
new file mode 100644
index 000000000000..9b7a70eb7cd1
--- /dev/null
+++ b/test/Linker/ctors2.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-link -S %s %p/Inputs/ctors2.ll -o - | FileCheck %s
+
+$foo = comdat any
+@foo = global i8 0, comdat
+
+; CHECK: @foo = global i8 0, comdat
+; CHECK: @llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer
diff --git a/test/Linker/ctors3.ll b/test/Linker/ctors3.ll
new file mode 100644
index 000000000000..e62b92dca0b4
--- /dev/null
+++ b/test/Linker/ctors3.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-link -S %s %p/Inputs/ctors3.ll -o - | FileCheck %s
+
+$foo = comdat any
+%t = type { i8 }
+@foo = global %t zeroinitializer, comdat
+
+; CHECK: @foo = global %t zeroinitializer, comdat
+; CHECK: @llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer
diff --git a/test/Linker/ctors4.ll b/test/Linker/ctors4.ll
new file mode 100644
index 000000000000..c4500841f174
--- /dev/null
+++ b/test/Linker/ctors4.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-link -S %s -o - | FileCheck %s
+
+define void @f() {
+ ret void
+}
+
+; We lazy link @v, which causes llvm.global_ctors to have the corresponding
+; entry.
+@v = linkonce global i8 42
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* @v }]
+
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* @v }]
+
diff --git a/test/Linker/ctors5.ll b/test/Linker/ctors5.ll
new file mode 100644
index 000000000000..99124061bb32
--- /dev/null
+++ b/test/Linker/ctors5.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-link -S %s | FileCheck %s
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }]
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* null }]
+
+define void @f() {
+ ret void
+}
diff --git a/test/Linker/debug-info-version-a.ll b/test/Linker/debug-info-version-a.ll
index 8cc85b167f71..43f374ff5b00 100644
--- a/test/Linker/debug-info-version-a.ll
+++ b/test/Linker/debug-info-version-a.ll
@@ -11,6 +11,6 @@
!llvm.dbg.cu = !{!1}
!0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !2, enums: !3, retainedTypes: !3, subprograms: !3)
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !2, enums: !3, retainedTypes: !3, subprograms: !3)
!2 = !DIFile(filename: "a.c", directory: "")
!3 = !{}
diff --git a/test/Linker/debug-info-version-b.ll b/test/Linker/debug-info-version-b.ll
index b2452f437023..e3ef814e3a5a 100644
--- a/test/Linker/debug-info-version-b.ll
+++ b/test/Linker/debug-info-version-b.ll
@@ -5,6 +5,6 @@
!llvm.dbg.cu = !{!1}
!0 = !{i32 2, !"Debug Info Version", i32 42}
-!1 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: false, file: !"I AM UNEXPECTED!")
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: false, file: !"I AM UNEXPECTED!")
!2 = !{!"b.c", !""}
!3 = !{}
diff --git a/test/Linker/distinct.ll b/test/Linker/distinct.ll
index c8e5c89eb095..d88d8ae16d9a 100644
--- a/test/Linker/distinct.ll
+++ b/test/Linker/distinct.ll
@@ -6,6 +6,8 @@
; CHECK: @global = linkonce global i32 0
@global = linkonce global i32 0
+; Add an external reference to @global so that it gets linked in.
+@alias = alias i32, i32* @global
; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !0, !1, !2, !9, !10, !11, !12, !13, !14}
!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
diff --git a/test/Linker/drop-debug.ll b/test/Linker/drop-debug.ll
index 9c1072a75de2..06689872e12c 100644
--- a/test/Linker/drop-debug.ll
+++ b/test/Linker/drop-debug.ll
@@ -3,4 +3,4 @@
;; drop-debug.bc was created from "void f(void) {}" with clang 3.5 and
; -gline-tables-only, so it contains old debug info.
-; CHECK: warning: ignoring debug info with an invalid version (1) in {{.*}}/Inputs/drop-debug.bc
+; CHECK: WARNING: ignoring debug info with an invalid version (1) in {{.*}}/Inputs/drop-debug.bc
diff --git a/test/Linker/funcimport.ll b/test/Linker/funcimport.ll
new file mode 100644
index 000000000000..38deafd3e3f1
--- /dev/null
+++ b/test/Linker/funcimport.ll
@@ -0,0 +1,195 @@
+; First ensure that the ThinLTO handling in llvm-link and llvm-lto handles
+; bitcode without function summary sections gracefully.
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-as %p/Inputs/funcimport.ll -o %t2.bc
+; RUN: llvm-link %t.bc -functionindex=%t.bc -S
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: llvm-as -function-summary %s -o %t.bc
+; RUN: llvm-as -function-summary %p/Inputs/funcimport.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Ensure statics are promoted/renamed correctly from this file (all but
+; constant variable need promotion).
+; RUN: llvm-link %t.bc -functionindex=%t3.thinlto.bc -S | FileCheck %s --check-prefix=EXPORTSTATIC
+; EXPORTSTATIC-DAG: @staticvar.llvm.1 = hidden global
+; EXPORTSTATIC-DAG: @staticconstvar = internal unnamed_addr constant
+; EXPORTSTATIC-DAG: @P.llvm.1 = hidden global void ()* null
+; EXPORTSTATIC-DAG: define hidden i32 @staticfunc.llvm.1
+; EXPORTSTATIC-DAG: define hidden void @staticfunc2.llvm.1
+
+; Ensure that both weak alias to an imported function and strong alias to a
+; non-imported function are correctly turned into declarations.
+; Also ensures that alias to a linkonce function is turned into a declaration
+; and that the associated linkonce function is not in the output, as it is
+; lazily linked and never referenced/materialized.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=globalfunc1:%t.bc -S | FileCheck %s --check-prefix=IMPORTGLOB1
+; IMPORTGLOB1-DAG: define available_externally void @globalfunc1
+; IMPORTGLOB1-DAG: declare void @weakalias
+; IMPORTGLOB1-DAG: declare void @analias
+; IMPORTGLOB1-NOT: @linkoncealias
+; IMPORTGLOB1-NOT: @linkoncefunc
+; IMPORTGLOB1-NOT: declare void @globalfunc2
+
+; Ensure that weak alias to a non-imported function is correctly
+; turned into a declaration, but that strong alias to an imported function
+; is imported as alias.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=globalfunc2:%t.bc -S | FileCheck %s --check-prefix=IMPORTGLOB2
+; IMPORTGLOB2-DAG: declare void @analias
+; IMPORTGLOB2-DAG: define available_externally void @globalfunc2
+; IMPORTGLOB2-DAG: declare void @weakalias
+; IMPORTGLOB2-NOT: declare void @globalfunc1
+
+; Ensure that strong alias imported in second pass of importing ends up
+; as an alias.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=globalfunc1:%t.bc -import=globalfunc2:%t.bc -S | FileCheck %s --check-prefix=IMPORTGLOB3
+; IMPORTGLOB3-DAG: declare void @analias
+; IMPORTGLOB3-DAG: define available_externally void @globalfunc1
+; IMPORTGLOB3-DAG: define available_externally void @globalfunc2
+; IMPORTGLOB3-DAG: declare void @weakalias
+
+; Ensure that strong alias imported in first pass of importing ends up
+; as an alias, and that seeing the alias definition during a second inlining
+; pass is handled correctly.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=globalfunc2:%t.bc -import=globalfunc1:%t.bc -S | FileCheck %s --check-prefix=IMPORTGLOB4
+; IMPORTGLOB4-DAG: declare void @analias
+; IMPORTGLOB4-DAG: define available_externally void @globalfunc2
+; IMPORTGLOB4-DAG: define available_externally void @globalfunc1
+; IMPORTGLOB4-DAG: declare void @weakalias
+
+; An alias to an imported function is imported as alias if the function is not
+; available_externally.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=linkoncefunc:%t.bc -S | FileCheck %s --check-prefix=IMPORTGLOB5
+; IMPORTGLOB5-DAG: linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc to void (...)*)
+; IMPORTGLOB5-DAG: define linkonce_odr void @linkoncefunc()
+
+; Ensure that imported static variable and function references are correctly
+; promoted and renamed (including static constant variable).
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=referencestatics:%t.bc -S | FileCheck %s --check-prefix=IMPORTSTATIC
+; IMPORTSTATIC-DAG: @staticvar.llvm.1 = available_externally hidden global
+; IMPORTSTATIC-DAG: @staticconstvar.llvm.1 = internal unnamed_addr constant
+; IMPORTSTATIC-DAG: define available_externally i32 @referencestatics
+; IMPORTSTATIC-DAG: %call = call i32 @staticfunc.llvm.1
+; IMPORTSTATIC-DAG: %0 = load i32, i32* @staticvar.llvm.1
+; IMPORTSTATIC-DAG: declare hidden i32 @staticfunc.llvm.1
+
+; Ensure that imported global (external) function and variable references
+; are handled correctly (including referenced variable imported as
+; available_externally definition)
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=referenceglobals:%t.bc -S | FileCheck %s --check-prefix=IMPORTGLOBALS
+; IMPORTGLOBALS-DAG: @globalvar = available_externally global
+; IMPORTGLOBALS-DAG: declare void @globalfunc1()
+; IMPORTGLOBALS-DAG: define available_externally i32 @referenceglobals
+
+; Ensure that common variable correctly imported as common defition.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=referencecommon:%t.bc -S | FileCheck %s --check-prefix=IMPORTCOMMON
+; IMPORTCOMMON-DAG: @commonvar = common global
+; IMPORTCOMMON-DAG: define available_externally i32 @referencecommon
+
+; Ensure that imported static function pointer correctly promoted and renamed.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=callfuncptr:%t.bc -S | FileCheck %s --check-prefix=IMPORTFUNCPTR
+; IMPORTFUNCPTR-DAG: @P.llvm.1 = available_externally hidden global void ()* null
+; IMPORTFUNCPTR-DAG: define available_externally void @callfuncptr
+; IMPORTFUNCPTR-DAG: %0 = load void ()*, void ()** @P.llvm.1
+
+; Ensure that imported weak function reference/definition handled properly.
+; Imported weak_any definition should be skipped with warning, and imported
+; reference should turned into an external_weak declaration.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=callweakfunc:%t.bc -import=weakfunc:%t.bc -S 2>&1 | FileCheck %s --check-prefix=IMPORTWEAKFUNC
+; IMPORTWEAKFUNC-DAG: Ignoring import request for weak-any function weakfunc
+; IMPORTWEAKFUNC-DAG: declare extern_weak void @weakfunc
+; IMPORTWEAKFUNC-DAG: define available_externally void @callweakfunc
+; IMPORTWEAKFUNC-NOT: @weakvar = extern_weak global i32, align 4
+
+@globalvar = global i32 1, align 4
+@staticvar = internal global i32 1, align 4
+@staticconstvar = internal unnamed_addr constant [2 x i32] [i32 10, i32 20], align 4
+@commonvar = common global i32 0, align 4
+@P = internal global void ()* null, align 8
+
+@weakalias = weak alias void (...), bitcast (void ()* @globalfunc1 to void (...)*)
+@analias = alias void (...), bitcast (void ()* @globalfunc2 to void (...)*)
+@linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc to void (...)*)
+
+define void @globalfunc1() #0 {
+entry:
+ ret void
+}
+
+define void @globalfunc2() #0 {
+entry:
+ ret void
+}
+
+define linkonce_odr void @linkoncefunc() #0 {
+entry:
+ ret void
+}
+
+define i32 @referencestatics(i32 %i) #0 {
+entry:
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ %call = call i32 @staticfunc()
+ %0 = load i32, i32* @staticvar, align 4
+ %add = add nsw i32 %call, %0
+ %1 = load i32, i32* %i.addr, align 4
+ %idxprom = sext i32 %1 to i64
+ %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* @staticconstvar, i64 0, i64 %idxprom
+ %2 = load i32, i32* %arrayidx, align 4
+ %add1 = add nsw i32 %add, %2
+ ret i32 %add1
+}
+
+define i32 @referenceglobals(i32 %i) #0 {
+entry:
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ call void @globalfunc1()
+ %0 = load i32, i32* @globalvar, align 4
+ ret i32 %0
+}
+
+define i32 @referencecommon(i32 %i) #0 {
+entry:
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load i32, i32* @commonvar, align 4
+ ret i32 %0
+}
+
+define void @setfuncptr() #0 {
+entry:
+ store void ()* @staticfunc2, void ()** @P, align 8
+ ret void
+}
+
+define void @callfuncptr() #0 {
+entry:
+ %0 = load void ()*, void ()** @P, align 8
+ call void %0()
+ ret void
+}
+
+@weakvar = weak global i32 1, align 4
+define weak void @weakfunc() #0 {
+entry:
+ ret void
+}
+
+define void @callweakfunc() #0 {
+entry:
+ call void @weakfunc()
+ ret void
+}
+
+define internal i32 @staticfunc() #0 {
+entry:
+ ret i32 1
+}
+
+define internal void @staticfunc2() #0 {
+entry:
+ ret void
+}
diff --git a/test/Linker/funcimport_appending_global.ll b/test/Linker/funcimport_appending_global.ll
new file mode 100644
index 000000000000..190d31ee8c7f
--- /dev/null
+++ b/test/Linker/funcimport_appending_global.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-as -function-summary %s -o %t.bc
+; RUN: llvm-as -function-summary %p/Inputs/funcimport_appending_global.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Do the import now
+; RUN: llvm-link %t.bc -functionindex=%t3.thinlto.bc -import=foo:%t2.bc -S | FileCheck %s
+
+; Ensure that global constructor (appending linkage) is not imported
+; CHECK-NOT: @llvm.global_ctors = {{.*}}@foo
+
+declare void @f()
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* null}]
+
+define i32 @main() {
+entry:
+ call void @foo()
+ ret i32 0
+}
+
+declare void @foo()
diff --git a/test/Linker/global_ctors.ll b/test/Linker/global_ctors.ll
index 49df81a00759..cc28471df59d 100644
--- a/test/Linker/global_ctors.ll
+++ b/test/Linker/global_ctors.ll
@@ -1,6 +1,5 @@
-; RUN: llvm-as %s -o %t.new.bc
-; RUN: llvm-link %t.new.bc %S/Inputs/old_global_ctors.3.4.bc | llvm-dis | FileCheck %s
-; RUN: llvm-link %S/Inputs/old_global_ctors.3.4.bc %t.new.bc | llvm-dis | FileCheck %s
+; RUN: llvm-link -S %s %S/Inputs/old_global_ctors.3.4.bc | FileCheck %s
+; RUN: llvm-link -S %S/Inputs/old_global_ctors.3.4.bc %s | FileCheck %s
; old_global_ctors.3.4.bc contains the following LLVM IL, assembled into
; bitcode by llvm-as from 3.4. It uses a two element @llvm.global_ctors array.
diff --git a/test/Linker/internalize-lazy.ll b/test/Linker/internalize-lazy.ll
new file mode 100644
index 000000000000..480335927b51
--- /dev/null
+++ b/test/Linker/internalize-lazy.ll
@@ -0,0 +1,4 @@
+; RUN: llvm-link -S -internalize %s %p/Inputs/internalize-lazy.ll | FileCheck %s
+
+; CHECK: define internal void @f
+; CHECK: define internal void @g
diff --git a/test/Linker/link-flags.ll b/test/Linker/link-flags.ll
new file mode 100644
index 000000000000..c901b699575a
--- /dev/null
+++ b/test/Linker/link-flags.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-link -S %S/Inputs/linkage.b.ll %S/Inputs/linkage.c.ll | FileCheck %s -check-prefix=B -check-prefix=C -check-prefix=CU
+; RUN: llvm-link -S -only-needed %S/Inputs/linkage.b.ll %S/Inputs/linkage.c.ll | FileCheck %s -check-prefix=B -check-prefix=C -check-prefix=CN
+; RUN: llvm-link -S -internalize %S/Inputs/linkage.b.ll %S/Inputs/linkage.c.ll | FileCheck %s -check-prefix=B -check-prefix=CI
+; RUN: llvm-link -S -internalize -only-needed %S/Inputs/linkage.b.ll %S/Inputs/linkage.c.ll | FileCheck %s -check-prefix=B -check-prefix=CN
+
+C-LABEL: @X = global i32 5
+CI-LABEL: @X = internal global i32 5
+CU-LABEL:@U = global i32 6
+CI-LABEL:@U = internal global i32 6
+CN-NOT:@U
+
+B-LABEL: define void @bar() {
+
+C-LABEL: define i32 @foo()
+CI-LABEL: define internal i32 @foo()
+
+CU-LABEL:define i32 @unused() {
+CI-LABEL:define internal i32 @unused() {
+CN-NOT:@unused()
diff --git a/test/Linker/mdlocation.ll b/test/Linker/mdlocation.ll
index 9acc6701599b..b42058bac60e 100644
--- a/test/Linker/mdlocation.ll
+++ b/test/Linker/mdlocation.ll
@@ -2,33 +2,25 @@
; Test that DILocations are remapped properly.
-; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !0, !1, !2, !3, !10, !11, !12, !13, !14, !15}
-!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
+; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11}
+!named = !{!0, !1, !2, !3, !4, !5}
-; CHECK: !0 = !DISubprogram(
+; CHECK: !0 = distinct !DISubprogram(
; CHECK-NEXT: !1 = !DILocation(line: 3, column: 7, scope: !0)
; CHECK-NEXT: !2 = !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !1)
; CHECK-NEXT: !3 = !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !2)
-; CHECK-NEXT: !4 = distinct !DISubprogram(
-; CHECK-NEXT: !5 = !DILocation(line: 3, column: 7, scope: !4)
-; CHECK-NEXT: !6 = !DILocation(line: 3, column: 7, scope: !4, inlinedAt: !5)
-; CHECK-NEXT: !7 = !DILocation(line: 3, column: 7, scope: !4, inlinedAt: !6)
-; CHECK-NEXT: !8 = distinct !DILocation(line: 3, column: 7, scope: !0)
-; CHECK-NEXT: !9 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !8)
-; CHECK-NEXT: !10 = distinct !DISubprogram(
-; CHECK-NEXT: !11 = !DILocation(line: 3, column: 7, scope: !10)
-; CHECK-NEXT: !12 = !DILocation(line: 3, column: 7, scope: !10, inlinedAt: !11)
-; CHECK-NEXT: !13 = !DILocation(line: 3, column: 7, scope: !10, inlinedAt: !12)
-; CHECK-NEXT: !14 = distinct !DILocation(line: 3, column: 7, scope: !0)
-; CHECK-NEXT: !15 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !14)
-!0 = !DISubprogram() ; Use this as a scope.
+; CHECK-NEXT: !4 = distinct !DILocation(line: 3, column: 7, scope: !0)
+; CHECK-NEXT: !5 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !4)
+; CHECK-NEXT: !6 = distinct !DISubprogram(
+; CHECK-NEXT: !7 = !DILocation(line: 3, column: 7, scope: !6)
+; CHECK-NEXT: !8 = !DILocation(line: 3, column: 7, scope: !6, inlinedAt: !7)
+; CHECK-NEXT: !9 = !DILocation(line: 3, column: 7, scope: !6, inlinedAt: !8)
+; CHECK-NEXT: !10 = distinct !DILocation(line: 3, column: 7, scope: !6)
+; CHECK-NEXT: !11 = distinct !DILocation(line: 3, column: 7, scope: !6, inlinedAt: !10)
+!0 = distinct !DISubprogram() ; Use this as a scope.
!1 = !DILocation(line: 3, column: 7, scope: !0)
!2 = !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !1)
!3 = !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !2)
-!4 = distinct !DISubprogram() ; Test actual remapping.
-!5 = !DILocation(line: 3, column: 7, scope: !4)
-!6 = !DILocation(line: 3, column: 7, scope: !4, inlinedAt: !5)
-!7 = !DILocation(line: 3, column: 7, scope: !4, inlinedAt: !6)
; Test distinct nodes.
-!8 = distinct !DILocation(line: 3, column: 7, scope: !0)
-!9 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !8)
+!4 = distinct !DILocation(line: 3, column: 7, scope: !0)
+!5 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !4)
diff --git a/test/Linker/only-needed-debug-metadata.ll b/test/Linker/only-needed-debug-metadata.ll
new file mode 100644
index 000000000000..f327fe03bf48
--- /dev/null
+++ b/test/Linker/only-needed-debug-metadata.ll
@@ -0,0 +1,49 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-as %p/Inputs/only-needed-debug-metadata.ll -o %t2.bc
+
+; Without -only-needed, we need to link in both DISubprogram.
+; RUN: llvm-link -S %t2.bc %t.bc | FileCheck %s
+; CHECK: distinct !DISubprogram(name: "foo"
+; CHECK: distinct !DISubprogram(name: "unused"
+
+; With -only-needed, we only need to link in foo's DISubprogram.
+; RUN: llvm-link -S -only-needed %t2.bc %t.bc | FileCheck %s -check-prefix=ONLYNEEDED
+; ONLYNEEDED: distinct !DISubprogram(name: "foo"
+; ONLYNEEDED-NOT: distinct !DISubprogram(name: "unused"
+
+@X = global i32 5
+@U = global i32 6
+@U_linkonce = linkonce_odr hidden global i32 6
+define i32 @foo() !dbg !4 {
+ ret i32 7, !dbg !20
+}
+define i32 @unused() !dbg !10 {
+ ret i32 8, !dbg !21
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !13)
+!1 = !DIFile(filename: "linkused2.c", directory: "/usr/local/google/home/tejohnson/llvm/tmp")
+!2 = !{}
+!3 = !{!4, !10}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9}
+!9 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !1, line: 4, type: !7)
+!10 = distinct !DISubprogram(name: "unused", scope: !1, file: !1, line: 8, type: !11, isLocal: false, isDefinition: true, scopeLine: 8, isOptimized: true, variables: !2)
+!11 = !DISubroutineType(types: !12)
+!12 = !{!7}
+!13 = !{!14, !15}
+!14 = !DIGlobalVariable(name: "X", scope: !0, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, variable: i32* @X)
+!15 = !DIGlobalVariable(name: "U", scope: !0, file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, variable: i32* @U)
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 3}
+!18 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"}
+!19 = !DIExpression()
+!20 = !DILocation(line: 4, column: 13, scope: !4)
+!21 = !DILocation(line: 9, column: 3, scope: !10)
diff --git a/test/Linker/only-needed-named-metadata.ll b/test/Linker/only-needed-named-metadata.ll
new file mode 100644
index 000000000000..d14b525fa023
--- /dev/null
+++ b/test/Linker/only-needed-named-metadata.ll
@@ -0,0 +1,65 @@
+; Without -only-needed we should lazy link linkonce globals, and the
+; metadata reference should not cause them to be linked.
+; RUN: llvm-link -S %S/Inputs/only-needed-named-metadata.ll %S/only-needed-named-metadata.ll | FileCheck %s
+; CHECK-NOT:@U_linkonce
+; CHECK-NOT:@unused_linkonce()
+
+; With -only-needed the metadata references should not cause any of the
+; otherwise unreferenced globals to be linked. This also ensures that the
+; metadata references don't provoke the module linker to create declarations,
+; which are illegal for aliases and globals in comdats.
+; Note that doing -only-needed with the comdat shown below leads to a only
+; part of the comdat group being linked, which is not technically correct.
+; RUN: llvm-link -S -only-needed %S/Inputs/only-needed-named-metadata.ll %S/only-needed-named-metadata.ll | FileCheck %s -check-prefix=ONLYNEEDED
+; RUN: llvm-link -S -internalize -only-needed %S/Inputs/only-needed-named-metadata.ll %S/only-needed-named-metadata.ll | FileCheck %s -check-prefix=ONLYNEEDED
+; ONLYNEEDED-NOT:@U
+; ONLYNEEDED-NOT:@U_linkonce
+; ONLYNEEDED-NOT:@unused()
+; ONLYNEEDED-NOT:@unused_linkonce()
+; ONLYNEEDED-NOT:@linkoncealias
+; ONLYNEEDED-NOT:@linkoncefunc2()
+; ONLYNEEDED-NOT:@weakalias
+; ONLYNEEDED-NOT:@globalfunc1()
+; ONLYNEEDED-NOT:@analias
+; ONLYNEEDED-NOT:@globalfunc2()
+
+; Test -only-needed link with the modules preserved instead of freeing to
+; catch any cross-module references to metadata, which the bitcode writer
+; will assert on.
+; RUN: llvm-link -preserve-modules -o %t3.bc -only-needed %S/Inputs/only-needed-named-metadata.ll %S/only-needed-named-metadata.ll
+
+@X = global i32 5
+@U = global i32 6
+@U_linkonce = linkonce_odr hidden global i32 6
+define i32 @foo() { ret i32 7 }
+define i32 @unused() { ret i32 8 }
+define linkonce_odr hidden i32 @unused_linkonce() { ret i32 8 }
+@linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc2 to void (...)*)
+
+@weakalias = weak alias void (...), bitcast (void ()* @globalfunc1 to void (...)*)
+@analias = alias void (...), bitcast (void ()* @globalfunc2 to void (...)*)
+
+define void @globalfunc1() #0 {
+entry:
+ ret void
+}
+
+define void @globalfunc2() #0 {
+entry:
+ ret void
+}
+
+$linkoncefunc2 = comdat any
+define linkonce_odr void @linkoncefunc2() #0 comdat {
+entry:
+ ret void
+}
+
+!llvm.named = !{!0, !1, !2, !3, !4, !5, !6}
+!0 = !{i32 ()* @unused}
+!1 = !{i32* @U}
+!2 = !{i32 ()* @unused_linkonce}
+!3 = !{i32* @U_linkonce}
+!4 = !{void (...)* @weakalias}
+!5 = !{void (...)* @analias}
+!6 = !{void (...)* @linkoncealias}
diff --git a/test/Linker/opaque.ll b/test/Linker/opaque.ll
index 4f3f398f8f1b..6fd1ae90d4f4 100644
--- a/test/Linker/opaque.ll
+++ b/test/Linker/opaque.ll
@@ -19,3 +19,7 @@
%C = type { %A }
@g1 = external global %B
+
+define %B* @use_g1() {
+ ret %B* @g1
+}
diff --git a/test/Linker/override-with-internal-linkage.ll b/test/Linker/override-with-internal-linkage.ll
index d3a794799322..59bd214c204f 100644
--- a/test/Linker/override-with-internal-linkage.ll
+++ b/test/Linker/override-with-internal-linkage.ll
@@ -3,14 +3,14 @@
; CHECK-LABEL: define i32 @main(
; CHECK-NEXT: entry:
-; CHECK-NEXT: call i32 @foo2(
+; CHECK-NEXT: call i32 @foo.2(
define i32 @main(i32 %argc, i8** %argv) {
entry:
%a = call i32 @foo(i32 2)
ret i32 %a
}
-; CHECK-LABEL: define internal i32 @foo2(
+; CHECK-LABEL: define internal i32 @foo.2(
; CHECK-NEXT: entry:
; CHECK-NEXT: %add = add nsw i32 %i, %i
; CHECK-NEXT: ret i32 %add
diff --git a/test/Linker/pr21494.ll b/test/Linker/pr21494.ll
index 8a17233b0eb6..40b57cad3cf5 100644
--- a/test/Linker/pr21494.ll
+++ b/test/Linker/pr21494.ll
@@ -6,10 +6,10 @@
@g2 = linkonce_odr global i8 0
; CHECK-NOT: @g2
-@a1 = private alias i8* @g1
+@a1 = private alias i8, i8* @g1
; CHECK-NOT: @a1
-@a2 = linkonce_odr alias i8* @g2
+@a2 = linkonce_odr alias i8, i8* @g2
; CHECK-NOT: @a2
define private void @f1() {
diff --git a/test/Linker/prologuedata.ll b/test/Linker/prologuedata.ll
index 70204fdaacdd..3015d50867e9 100644
--- a/test/Linker/prologuedata.ll
+++ b/test/Linker/prologuedata.ll
@@ -1,21 +1,21 @@
; RUN: llvm-link %s -S -o - | FileCheck %s
@g1 = global void()* @f2
-; CHECK: @g1 = global void ()* @f2
+; CHECK-DAG: @g1 = global void ()* @f2
@p1 = global i8 42
-; CHECK: @p1 = global i8 42
+; CHECK-DAG: @p1 = global i8 42
@p2 = internal global i8 43
-; CHECK: @p2 = internal global i8 43
+; CHECK-DAG: @p2 = internal global i8 43
define void @f1() prologue i8* @p1 {
ret void
}
-; CHECK: define void @f1() prologue i8* @p1 {
+; CHECK-DAG: define void @f1() prologue i8* @p1 {
define internal void @f2() prologue i8* @p2 {
ret void
}
-; CHECK: define internal void @f2() prologue i8* @p2 {
+; CHECK-DAG: define internal void @f2() prologue i8* @p2 {
diff --git a/test/Linker/replaced-function-matches-first-subprogram.ll b/test/Linker/replaced-function-matches-first-subprogram.ll
index fc7e653786e6..30b8be992bb5 100644
--- a/test/Linker/replaced-function-matches-first-subprogram.ll
+++ b/test/Linker/replaced-function-matches-first-subprogram.ll
@@ -15,17 +15,18 @@
%struct.Class = type { i8 }
-define i32 @_Z3foov() {
+; CHECK: define i32 @_Z3foov(){{.*}} !dbg ![[SP1:[0-9]+]]
+define i32 @_Z3foov() !dbg !4 {
entry:
%tmp = alloca %struct.Class, align 1
%call = call i32 @_ZN5ClassIiE3fooEv(%struct.Class* %tmp), !dbg !14
ret i32 %call, !dbg !14
}
-; CHECK: define weak_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this){{.*}}{
+; CHECK: define weak_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this){{.*}} !dbg ![[SP2:[0-9]+]] {
; CHECK-NOT: }
; CHECK: !dbg ![[LOC:[0-9]+]]
-define linkonce_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this) align 2 {
+define linkonce_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this) align 2 !dbg !7 {
entry:
%this.addr = alloca %struct.Class*, align 8
store %struct.Class* %this, %struct.Class** %this.addr, align 8
@@ -39,30 +40,28 @@ entry:
!llvm.ident = !{!13}
; Extract out the list of subprograms from each compile unit.
-; CHECK-DAG: ![[CU1]] = !DICompileUnit({{.*}} subprograms: ![[SPs1:[0-9]+]]
-; CHECK-DAG: ![[CU2]] = !DICompileUnit({{.*}} subprograms: ![[SPs2:[0-9]+]]
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224193) (llvm/trunk 224197)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+; CHECK-DAG: ![[CU1]] = distinct !DICompileUnit({{.*}} subprograms: ![[SPs1:[0-9]+]]
+; CHECK-DAG: ![[CU2]] = distinct !DICompileUnit({{.*}} subprograms: ![[SPs2:[0-9]+]]
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224193) (llvm/trunk 224197)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "t1.cpp", directory: "/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d1")
!2 = !{}
; Extract out each compile unit's single subprogram. The replaced subprogram's
; function should drop to null in the first compile unit.
-; CHECK-DAG: ![[SPs1]] = !{![[SP1:[0-9]+]], ![[SP2r:[0-9]+]]}
-; CHECK-DAG: ![[SPs2]] = !{![[SP2:[0-9]+]]}
+; CHECK-DAG: ![[SPs1]] = !{![[SP1]], ![[SP2r:[0-9]+]]}
+; CHECK-DAG: ![[SPs2]] = !{![[SP2]]}
!3 = !{!4, !7}
-!4 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @_Z3foov, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "t1.cpp", directory: "/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d1")
!6 = !DISubroutineType(types: !2)
-; Extract out the file from the replaced subprogram. Confirm that each
-; subprogram is pointing at the correct function.
-; CHECK-DAG: ![[SP1]] = !DISubprogram({{.*}} function: i32 ()* @_Z3foov
-; CHECK-DAG: ![[SP2]] = !DISubprogram({{.*}} file: ![[FILE:[0-9]+]],{{.*}} function: i32 (%struct.Class*)* @_ZN5ClassIiE3fooEv
+; Extract out the file from the replaced subprogram.
+; CHECK-DAG: ![[SP2]] = distinct !DISubprogram({{.*}} file: ![[FILE:[0-9]+]],
; We can't use CHECK-NOT/CHECK-SAME with a CHECK-DAG, so rely on field order to
; prove that there's no function: here.
; CHECK-DAG: ![[SP2r]] = {{.*}}!DISubprogram({{.*}} isOptimized: false, variables:
-!7 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !8, scope: !9, type: !6, function: i32 (%struct.Class*)* @_ZN5ClassIiE3fooEv, variables: !2)
+!7 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !8, scope: !9, type: !6, variables: !2)
; The new subprogram should be pointing at the new directory.
; CHECK-DAG: ![[FILE]] = !DIFile(filename: "../t.h", directory: "/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d2")
diff --git a/test/Linker/subprogram-linkonce-weak-odr.ll b/test/Linker/subprogram-linkonce-weak-odr.ll
deleted file mode 100644
index c1fa0a5adeb5..000000000000
--- a/test/Linker/subprogram-linkonce-weak-odr.ll
+++ /dev/null
@@ -1,177 +0,0 @@
-; RUN: llvm-link %s %S/Inputs/subprogram-linkonce-weak-odr.ll -S -o %t1
-; RUN: FileCheck %s -check-prefix=LW -check-prefix=CHECK <%t1
-; RUN: llvm-link %S/Inputs/subprogram-linkonce-weak-odr.ll %s -S -o %t2
-; RUN: FileCheck %s -check-prefix=WL -check-prefix=CHECK <%t2
-
-; This testcase tests the following flow:
-; - File A defines a linkonce_odr version of @foo which has inlined into @bar.
-; - File B defines a weak_odr version of @foo (identical definition).
-; - Linkage rules state File B version of @foo wins.
-; - Debug info for the subprograms of @foo match exactly. Without
-; intervention, the same subprogram would show up in both compile units, and
-; it would get associated with the compile unit where it was linkonce.
-; - @bar has inlined debug info related to the linkonce_odr @foo.
-;
-; This checks a corner case for the fix for PR22792, where subprograms match
-; exactly. It's a companion for subprogram-linkonce-weak.ll.
-
-; The LW prefix means linkonce (this file) first, then weak (the other file).
-; The WL prefix means weak (the other file) first, then linkonce (this file).
-
-; We'll see @bar before @foo if this file is first.
-; LW-LABEL: define i32 @bar(
-; LW: %sum = add i32 %a, %b, !dbg ![[FOOINBAR:[0-9]+]]
-; LW: ret i32 %sum, !dbg ![[BARRET:[0-9]+]]
-; LW-LABEL: define weak_odr i32 @foo(
-; LW: %sum = add i32 %a, %b, !dbg ![[FOOADD:[0-9]+]]
-; LW: ret i32 %sum, !dbg ![[FOORET:[0-9]+]]
-
-; We'll see @foo before @bar if this file is second.
-; WL-LABEL: define weak_odr i32 @foo(
-; WL: %sum = add i32 %a, %b, !dbg ![[FOOADD:[0-9]+]]
-; WL: ret i32 %sum, !dbg ![[FOORET:[0-9]+]]
-; WL-LABEL: define i32 @bar(
-; WL: %sum = add i32 %a, %b, !dbg ![[FOOINBAR:[0-9]+]]
-; WL: ret i32 %sum, !dbg ![[BARRET:[0-9]+]]
-
-define i32 @bar(i32 %a, i32 %b) {
-entry:
- %sum = add i32 %a, %b, !dbg !DILocation(line: 2, scope: !4,
- inlinedAt: !DILocation(line: 12, scope: !3))
- ret i32 %sum, !dbg !DILocation(line: 13, scope: !3)
-}
-
-define linkonce_odr i32 @foo(i32 %a, i32 %b) {
-entry:
- %sum = add i32 %a, %b, !dbg !DILocation(line: 2, scope: !4)
- ret i32 %sum, !dbg !DILocation(line: 3, scope: !4)
-}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 2, !"Debug Info Version", i32 3}
-
-; CHECK-LABEL: !llvm.dbg.cu =
-; LW-SAME: !{![[LCU:[0-9]+]], ![[WCU:[0-9]+]]}
-; WL-SAME: !{![[WCU:[0-9]+]], ![[LCU:[0-9]+]]}
-!llvm.dbg.cu = !{!1}
-
-; LW: ![[LCU]] = !DICompileUnit({{.*}} subprograms: ![[LSPs:[0-9]+]]
-; LW: ![[LSPs]] = !{![[BARSP:[0-9]+]], ![[FOOSP:[0-9]+]]}
-; LW: ![[BARSP]] = !DISubprogram(name: "bar",
-; LW-SAME: function: i32 (i32, i32)* @bar
-; LW: ![[FOOSP]] = {{.*}}!DISubprogram(name: "foo",
-; LW-NOT: function:
-; LW-SAME: ){{$}}
-; LW: ![[WCU]] = !DICompileUnit({{.*}} subprograms: ![[WSPs:[0-9]+]]
-; LW: ![[WSPs]] = !{![[WEAKFOOSP:[0-9]+]]}
-; LW: ![[WEAKFOOSP]] = !DISubprogram(name: "foo",
-; LW-SAME: function: i32 (i32, i32)* @foo
-; LW: ![[FOOINBAR]] = !DILocation(line: 2, scope: ![[FOOSP]], inlinedAt: ![[BARIA:[0-9]+]])
-; LW: ![[BARIA]] = !DILocation(line: 12, scope: ![[BARSP]])
-; LW: ![[BARRET]] = !DILocation(line: 13, scope: ![[BARSP]])
-; LW: ![[FOOADD]] = !DILocation(line: 2, scope: ![[WEAKFOOSP]])
-; LW: ![[FOORET]] = !DILocation(line: 3, scope: ![[WEAKFOOSP]])
-
-; Same as above, but reordered.
-; WL: ![[WCU]] = !DICompileUnit({{.*}} subprograms: ![[WSPs:[0-9]+]]
-; WL: ![[WSPs]] = !{![[WEAKFOOSP:[0-9]+]]}
-; WL: ![[WEAKFOOSP]] = !DISubprogram(name: "foo",
-; WL-SAME: function: i32 (i32, i32)* @foo
-; WL: ![[LCU]] = !DICompileUnit({{.*}} subprograms: ![[LSPs:[0-9]+]]
-; Note: for symmetry, LSPs would have a different copy of the subprogram.
-; WL: ![[LSPs]] = !{![[BARSP:[0-9]+]], ![[WEAKFOOSP:[0-9]+]]}
-; WL: ![[BARSP]] = !DISubprogram(name: "bar",
-; WL-SAME: function: i32 (i32, i32)* @bar
-; WL: ![[FOOADD]] = !DILocation(line: 2, scope: ![[WEAKFOOSP]])
-; WL: ![[FOORET]] = !DILocation(line: 3, scope: ![[WEAKFOOSP]])
-; WL: ![[FOOINBAR]] = !DILocation(line: 2, scope: ![[WEAKFOOSP]], inlinedAt: ![[BARIA:[0-9]+]])
-; WL: ![[BARIA]] = !DILocation(line: 12, scope: ![[BARSP]])
-; WL: ![[BARRET]] = !DILocation(line: 13, scope: ![[BARSP]])
-
-!1 = !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3, !4}, emissionKind: 1)
-!2 = !DIFile(filename: "bar.c", directory: "/path/to/dir")
-!3 = !DISubprogram(file: !2, scope: !2, line: 11, name: "bar", function: i32 (i32, i32)* @bar, type: !6)
-!4 = !DISubprogram(file: !5, scope: !5, line: 1, name: "foo", function: i32 (i32, i32)* @foo, type: !6)
-!5 = !DIFile(filename: "foo.h", directory: "/path/to/dir")
-!6 = !DISubroutineType(types: !{})
-
-; Crasher for llc.
-; REQUIRES: object-emission
-; RUN: %llc_dwarf -filetype=obj -O0 %t1 -o %t1.o
-; RUN: llvm-dwarfdump %t1.o -debug-dump=all | FileCheck %s -check-prefix=DWLW -check-prefix=DW
-; RUN: %llc_dwarf -filetype=obj -O0 %t2 -o %t2.o
-; RUN: llvm-dwarfdump %t2.o -debug-dump=all | FileCheck %s -check-prefix=DWWL -check-prefix=DW
-; Check that the debug info puts the subprogram (with PCs) in the correct
-; compile unit.
-
-; DW-LABEL: .debug_info contents:
-; DWLW: DW_TAG_compile_unit
-; DWLW: DW_AT_name {{.*}}"bar.c"
-; Note: If we stop emitting foo here, the comment below for DWWL (and the
-; check) should be copied up here.
-; DWLW: DW_TAG_subprogram
-; DWLW-NOT: DW_AT_low_pc
-; DWLW-NOT: DW_AT_high_pc
-; DWLW: DW_AT_name {{.*}}foo
-; DWLW: DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}foo.h"
-; DWLW: DW_AT_decl_line {{.*}}(1)
-; DWLW: DW_TAG_subprogram
-; DWLW: DW_AT_low_pc
-; DWLW: DW_AT_high_pc
-; DWLW: DW_AT_name {{.*}}bar
-; DWLW: DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}bar.c"
-; DWLW: DW_AT_decl_line {{.*}}(11)
-; DWLW: DW_TAG_inlined_subroutine
-; DWLW: DW_AT_abstract_origin
-; DWLW: DW_TAG_compile_unit
-; DWLW: DW_AT_name {{.*}}"foo.c"
-; DWLW: DW_TAG_subprogram
-; DWLW: DW_AT_low_pc
-; DWLW: DW_AT_high_pc
-; DWLW: DW_AT_name {{.*}}foo
-; DWLW: DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}foo.h"
-; DWLW: DW_AT_decl_line {{.*}}(1)
-
-; The DWARF output is already symmetric (just reordered).
-; DWWL: DW_TAG_compile_unit
-; DWWL: DW_AT_name {{.*}}"foo.c"
-; DWWL: DW_TAG_subprogram
-; DWWL: DW_AT_low_pc
-; DWWL: DW_AT_high_pc
-; DWWL: DW_AT_name {{.*}}foo
-; DWWL: DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}foo.h"
-; DWWL: DW_AT_decl_line {{.*}}(1)
-; DWWL: DW_TAG_compile_unit
-; DWWL: DW_AT_name {{.*}}"bar.c"
-; Note: for symmetry, foo would also show up in this compile unit
-; (alternatively, it wouldn't show up in the DWLW case). If we start emitting
-; foo here, this should be updated by checking that we don't emit low_pc and
-; high_pc for it.
-; DWWL-NOT: DW_AT_name {{.*}}foo
-; DWWL: DW_TAG_subprogram
-; DWWL-NOT: DW_AT_name {{.*}}foo
-; DWWL: DW_AT_low_pc
-; DWWL: DW_AT_high_pc
-; DWWL-NOT: DW_AT_name {{.*}}foo
-; DWWL: DW_AT_name {{.*}}bar
-; DWWL: DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}bar.c"
-; DWWL: DW_AT_decl_line {{.*}}(11)
-; DWWL: DW_TAG_inlined_subroutine
-; DWWL: DW_AT_abstract_origin
-
-; DW-LABEL: .debug_line contents:
-; Check that we have the right things in the line table as well.
-
-; DWLW-LABEL: file_names[{{ *}}1]{{.*}} bar.c
-; DWLW-LABEL: file_names[{{ *}}2]{{.*}} foo.h
-; DWLW: 2 0 2 0 0 is_stmt prologue_end
-; DWLW-LABEL: file_names[{{ *}}1]{{.*}} foo.h
-; DWLW: 2 0 1 0 0 is_stmt prologue_end
-; DWLW-NOT: prologue_end
-
-; DWWL-LABEL: file_names[{{ *}}1]{{.*}} foo.h
-; DWWL: 2 0 1 0 0 is_stmt prologue_end
-; DWWL-LABEL: file_names[{{ *}}1]{{.*}} bar.c
-; DWWL-LABEL: file_names[{{ *}}2]{{.*}} foo.h
-; DWWL: 2 0 2 0 0 is_stmt prologue_end
-; DWWL-NOT: prologue_end
diff --git a/test/Linker/subprogram-linkonce-weak.ll b/test/Linker/subprogram-linkonce-weak.ll
index 598cea7afeb0..e8bc67765bb8 100644
--- a/test/Linker/subprogram-linkonce-weak.ll
+++ b/test/Linker/subprogram-linkonce-weak.ll
@@ -2,6 +2,7 @@
; RUN: FileCheck %s -check-prefix=LW -check-prefix=CHECK <%t1
; RUN: llvm-link %S/Inputs/subprogram-linkonce-weak.ll %s -S -o %t2
; RUN: FileCheck %s -check-prefix=WL -check-prefix=CHECK <%t2
+; REQUIRES: default_triple
; This testcase tests the following flow:
; - File A defines a linkonce version of @foo which has inlined into @bar.
@@ -16,29 +17,29 @@
; The WL prefix means weak (the other file) first, then linkonce (this file).
; We'll see @bar before @foo if this file is first.
-; LW-LABEL: define i32 @bar(
+; LW: define i32 @bar({{.*}} !dbg ![[BARSP:[0-9]+]]
; LW: %sum = add i32 %a, %b, !dbg ![[FOOINBAR:[0-9]+]]
; LW: ret i32 %sum, !dbg ![[BARRET:[0-9]+]]
-; LW-LABEL: define weak i32 @foo(
+; LW: define weak i32 @foo({{.*}} !dbg ![[WEAKFOOSP:[0-9]+]]
; LW: %sum = call i32 @fastadd(i32 %a, i32 %b), !dbg ![[FOOCALL:[0-9]+]]
; LW: ret i32 %sum, !dbg ![[FOORET:[0-9]+]]
; We'll see @foo before @bar if this file is second.
-; WL-LABEL: define weak i32 @foo(
+; WL: define weak i32 @foo({{.*}} !dbg ![[WEAKFOOSP:[0-9]+]]
; WL: %sum = call i32 @fastadd(i32 %a, i32 %b), !dbg ![[FOOCALL:[0-9]+]]
; WL: ret i32 %sum, !dbg ![[FOORET:[0-9]+]]
-; WL-LABEL: define i32 @bar(
+; WL: define i32 @bar({{.*}} !dbg ![[BARSP:[0-9]+]]
; WL: %sum = add i32 %a, %b, !dbg ![[FOOINBAR:[0-9]+]]
; WL: ret i32 %sum, !dbg ![[BARRET:[0-9]+]]
-define i32 @bar(i32 %a, i32 %b) {
+define i32 @bar(i32 %a, i32 %b) !dbg !3 {
entry:
%sum = add i32 %a, %b, !dbg !DILocation(line: 2, scope: !4,
inlinedAt: !DILocation(line: 12, scope: !3))
ret i32 %sum, !dbg !DILocation(line: 13, scope: !3)
}
-define linkonce i32 @foo(i32 %a, i32 %b) {
+define linkonce i32 @foo(i32 %a, i32 %b) !dbg !4 {
entry:
%sum = add i32 %a, %b, !dbg !DILocation(line: 2, scope: !4)
ret i32 %sum, !dbg !DILocation(line: 3, scope: !4)
@@ -52,17 +53,13 @@ entry:
; WL-SAME: !{![[WCU:[0-9]+]], ![[LCU:[0-9]+]]}
!llvm.dbg.cu = !{!1}
-; LW: ![[LCU]] = !DICompileUnit({{.*}} subprograms: ![[LSPs:[0-9]+]]
-; LW: ![[LSPs]] = !{![[BARSP:[0-9]+]], ![[FOOSP:[0-9]+]]}
-; LW: ![[BARSP]] = !DISubprogram(name: "bar",
-; LW-SAME: function: i32 (i32, i32)* @bar
-; LW: ![[FOOSP]] = {{.*}}!DISubprogram(name: "foo",
-; LW-NOT: function:
-; LW-SAME: ){{$}}
-; LW: ![[WCU]] = !DICompileUnit({{.*}} subprograms: ![[WSPs:[0-9]+]]
-; LW: ![[WSPs]] = !{![[WEAKFOOSP:[0-9]+]]}
-; LW: ![[WEAKFOOSP]] = !DISubprogram(name: "foo",
-; LW-SAME: function: i32 (i32, i32)* @foo
+; LW: ![[LCU]] = distinct !DICompileUnit({{.*}} subprograms: ![[LSPs:[0-9]+]]
+; LW: ![[LSPs]] = !{![[BARSP]], ![[FOOSP:[0-9]+]]}
+; LW: ![[BARSP]] = distinct !DISubprogram(name: "bar",
+; LW: ![[FOOSP]] = distinct !DISubprogram(name: "foo",
+; LW: ![[WCU]] = distinct !DICompileUnit({{.*}} subprograms: ![[WSPs:[0-9]+]]
+; LW: ![[WSPs]] = !{![[WEAKFOOSP]]}
+; LW: ![[WEAKFOOSP]] = distinct !DISubprogram(name: "foo",
; LW: ![[FOOINBAR]] = !DILocation(line: 2, scope: ![[FOOSP]], inlinedAt: ![[BARIA:[0-9]+]])
; LW: ![[BARIA]] = !DILocation(line: 12, scope: ![[BARSP]])
; LW: ![[BARRET]] = !DILocation(line: 13, scope: ![[BARSP]])
@@ -70,27 +67,23 @@ entry:
; LW: ![[FOORET]] = !DILocation(line: 53, scope: ![[WEAKFOOSP]])
; Same as above, but reordered.
-; WL: ![[WCU]] = !DICompileUnit({{.*}} subprograms: ![[WSPs:[0-9]+]]
-; WL: ![[WSPs]] = !{![[WEAKFOOSP:[0-9]+]]}
-; WL: ![[WEAKFOOSP]] = !DISubprogram(name: "foo",
-; WL-SAME: function: i32 (i32, i32)* @foo
-; WL: ![[LCU]] = !DICompileUnit({{.*}} subprograms: ![[LSPs:[0-9]+]]
+; WL: ![[WCU]] = distinct !DICompileUnit({{.*}} subprograms: ![[WSPs:[0-9]+]]
+; WL: ![[WSPs]] = !{![[WEAKFOOSP]]}
+; WL: ![[WEAKFOOSP]] = distinct !DISubprogram(name: "foo",
+; WL: ![[LCU]] = distinct !DICompileUnit({{.*}} subprograms: ![[LSPs:[0-9]+]]
; WL: ![[LSPs]] = !{![[BARSP:[0-9]+]], ![[FOOSP:[0-9]+]]}
-; WL: ![[BARSP]] = !DISubprogram(name: "bar",
-; WL-SAME: function: i32 (i32, i32)* @bar
-; WL: ![[FOOSP]] = {{.*}}!DISubprogram(name: "foo",
-; Note, for symmetry, this should be "NOT: function:" and "SAME: ){{$}}".
-; WL-SAME: function: i32 (i32, i32)* @foo
+; WL: ![[BARSP]] = distinct !DISubprogram(name: "bar",
+; WL: ![[FOOSP]] = distinct !DISubprogram(name: "foo",
; WL: ![[FOOCALL]] = !DILocation(line: 52, scope: ![[WEAKFOOSP]])
; WL: ![[FOORET]] = !DILocation(line: 53, scope: ![[WEAKFOOSP]])
; WL: ![[FOOINBAR]] = !DILocation(line: 2, scope: ![[FOOSP]], inlinedAt: ![[BARIA:[0-9]+]])
; WL: ![[BARIA]] = !DILocation(line: 12, scope: ![[BARSP]])
; WL: ![[BARRET]] = !DILocation(line: 13, scope: ![[BARSP]])
-!1 = !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3, !4}, emissionKind: 1)
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3, !4}, emissionKind: 1)
!2 = !DIFile(filename: "bar.c", directory: "/path/to/dir")
-!3 = !DISubprogram(file: !2, scope: !2, line: 11, name: "bar", function: i32 (i32, i32)* @bar, type: !5)
-!4 = !DISubprogram(file: !2, scope: !2, line: 1, name: "foo", function: i32 (i32, i32)* @foo, type: !5)
+!3 = distinct !DISubprogram(file: !2, scope: !2, line: 11, name: "bar", type: !5)
+!4 = distinct !DISubprogram(file: !2, scope: !2, line: 1, name: "foo", type: !5)
!5 = !DISubroutineType(types: !{})
; Crasher for llc.
diff --git a/test/Linker/testlink.ll b/test/Linker/testlink.ll
index 2e9447ddfd37..6a316a3bf846 100644
--- a/test/Linker/testlink.ll
+++ b/test/Linker/testlink.ll
@@ -32,6 +32,11 @@
; CHECK-DAG: @0 = external global i32
@0 = external global i32
+
+define i32* @use0() {
+ ret i32* @0
+}
+
; CHECK-DAG: @Inte = global i32 1
@Inte = global i32 1
@@ -43,7 +48,7 @@
; This should get renamed since there is a definition that is non-internal in
; the other module.
-; CHECK-DAG: @Intern2{{[0-9]+}} = internal constant i32 792
+; CHECK-DAG: @Intern2.{{[0-9]+}} = internal constant i32 792
@Intern2 = internal constant i32 792
@UseIntern2 = global i32* @Intern2
@@ -101,4 +106,6 @@ define void @testIntern() {
ret void
}
-declare void @VecSizeCrash(%VecSize)
+define void @VecSizeCrash(%VecSize) {
+ ret void
+}
diff --git a/test/Linker/thinlto_funcimport_debug.ll b/test/Linker/thinlto_funcimport_debug.ll
new file mode 100644
index 000000000000..02f43b24c17a
--- /dev/null
+++ b/test/Linker/thinlto_funcimport_debug.ll
@@ -0,0 +1,80 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: llvm-as -function-summary %s -o %t.bc
+; RUN: llvm-as -function-summary %p/Inputs/thinlto_funcimport_debug.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; If we import func1 and not func2 we should only link DISubprogram for func1
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=func1:%t.bc -S | FileCheck %s
+
+; CHECK: declare i32 @func2
+; CHECK: define available_externally i32 @func1
+
+; Extract out the list of subprograms from each compile unit and ensure
+; that neither contains null.
+; CHECK: !{{[0-9]+}} = distinct !DICompileUnit({{.*}} subprograms: ![[SPs1:[0-9]+]]
+; CHECK-NOT: ![[SPs1]] = !{{{.*}}null{{.*}}}
+; CHECK: !{{[0-9]+}} = distinct !DICompileUnit({{.*}} subprograms: ![[SPs2:[0-9]+]]
+; CHECK-NOT: ![[SPs2]] = !{{{.*}}null{{.*}}}
+
+; CHECK: distinct !DISubprogram(name: "func1"
+; CHECK-NOT: distinct !DISubprogram(name: "func2"
+
+
+; ModuleID = 'dbg.o'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @func1(i32 %n) #0 !dbg !4 {
+entry:
+ tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !9, metadata !17), !dbg !18
+ tail call void @llvm.dbg.value(metadata i32 5, i64 0, metadata !10, metadata !17), !dbg !19
+ %cmp = icmp sgt i32 %n, 10, !dbg !20
+ %. = select i1 %cmp, i32 10, i32 5, !dbg !22
+ tail call void @llvm.dbg.value(metadata i32 %., i64 0, metadata !10, metadata !17), !dbg !19
+ ret i32 %., !dbg !23
+}
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @func2(i32 %n) #0 !dbg !11 {
+entry:
+ tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !13, metadata !17), !dbg !24
+ ret i32 %n, !dbg !25
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!14, !15}
+!llvm.ident = !{!16}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "dbg.c", directory: ".")
+!2 = !{}
+!3 = !{!4, !11}
+!4 = distinct !DISubprogram(name: "func1", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9, !10}
+!9 = !DILocalVariable(name: "n", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!10 = !DILocalVariable(name: "x", scope: !4, file: !1, line: 2, type: !7)
+!11 = distinct !DISubprogram(name: "func2", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, variables: !12)
+!12 = !{!13}
+!13 = !DILocalVariable(name: "n", arg: 1, scope: !11, file: !1, line: 8, type: !7)
+!14 = !{i32 2, !"Dwarf Version", i32 4}
+!15 = !{i32 2, !"Debug Info Version", i32 3}
+!16 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"}
+!17 = !DIExpression()
+!18 = !DILocation(line: 1, column: 15, scope: !4)
+!19 = !DILocation(line: 2, column: 7, scope: !4)
+!20 = !DILocation(line: 3, column: 9, scope: !21)
+!21 = distinct !DILexicalBlock(scope: !4, file: !1, line: 3, column: 7)
+!22 = !DILocation(line: 3, column: 7, scope: !4)
+!23 = !DILocation(line: 5, column: 3, scope: !4)
+!24 = !DILocation(line: 8, column: 15, scope: !11)
+!25 = !DILocation(line: 9, column: 3, scope: !11)
diff --git a/test/Linker/type-unique-alias.ll b/test/Linker/type-unique-alias.ll
index e43450fbbeb3..89e08dd593d1 100644
--- a/test/Linker/type-unique-alias.ll
+++ b/test/Linker/type-unique-alias.ll
@@ -3,8 +3,8 @@
%t = type { i8 }
@g = global %t zeroinitializer
-@a = weak alias %t* @g
+@a = weak alias %t, %t* @g
; CHECK: @g = global %t zeroinitializer
; CHECK: @g2 = global %t zeroinitializer
-; CHECK: @a = weak alias %t* @g
+; CHECK: @a = weak alias %t, %t* @g
diff --git a/test/Linker/type-unique-dst-types.ll b/test/Linker/type-unique-dst-types.ll
index 30aecbb970cb..1adad49de91d 100644
--- a/test/Linker/type-unique-dst-types.ll
+++ b/test/Linker/type-unique-dst-types.ll
@@ -17,3 +17,7 @@
%A = type { %B }
%B = type { i8 }
@g3 = external global %A
+
+define %A* @use_g3() {
+ ret %A* @g3
+}
diff --git a/test/Linker/type-unique-odr-a.ll b/test/Linker/type-unique-odr-a.ll
index fa6b6bb4f221..d7d978c2d3d7 100644
--- a/test/Linker/type-unique-odr-a.ll
+++ b/test/Linker/type-unique-odr-a.ll
@@ -1,6 +1,6 @@
-; REQUIRES: object-emission
+; REQUIRES: default_triple, object-emission
;
-; RUN: llvm-link %s %p/type-unique-odr-b.ll -S -o - | %llc_dwarf -filetype=obj -O0 | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; RUN: llvm-link %s %p/type-unique-odr-b.ll -S -o - | %llc_dwarf -dwarf-linkage-names=Enable -filetype=obj -O0 | llvm-dwarfdump -debug-dump=info - | FileCheck %s
;
; Test ODR-based type uniquing for C++ class members.
; rdar://problem/15851313.
@@ -49,14 +49,14 @@
%class.A = type { i32 }
; Function Attrs: nounwind
-define void @_Z3bazv() #0 {
+define void @_Z3bazv() #0 !dbg !15 {
entry:
call void @_ZL3barv(), !dbg !23
ret void, !dbg !23
}
; Function Attrs: nounwind
-define internal void @_ZL3barv() #0 {
+define internal void @_ZL3barv() #0 !dbg !19 {
entry:
%a = alloca %class.A, align 4
call void @llvm.dbg.declare(metadata %class.A* %a, metadata !24, metadata !DIExpression()), !dbg !25
@@ -73,7 +73,7 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!20, !21}
!llvm.ident = !{!22}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
!1 = !DIFile(filename: "<unknown>", directory: "")
!2 = !{}
!3 = !{!4}
@@ -87,15 +87,15 @@ attributes #1 = { nounwind readnone }
!11 = !{null, !12}
!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
!14 = !{!15, !19}
-!15 = !DISubprogram(name: "baz", linkageName: "_Z3bazv", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !16, type: !17, function: void ()* @_Z3bazv, variables: !2)
+!15 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !16, type: !17, variables: !2)
!16 = !DIFile(filename: "type-unique-odr-a.cpp", directory: "")
!17 = !DISubroutineType(types: !18)
!18 = !{null}
-!19 = !DISubprogram(name: "bar", linkageName: "_ZL3barv", line: 7, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !5, scope: !16, type: !17, function: void ()* @_ZL3barv, variables: !2)
+!19 = distinct !DISubprogram(name: "bar", linkageName: "_ZL3barv", line: 7, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !5, scope: !16, type: !17, variables: !2)
!20 = !{i32 2, !"Dwarf Version", i32 4}
!21 = !{i32 1, !"Debug Info Version", i32 3}
!22 = !{!"clang version 3.5.0 "}
!23 = !DILocation(line: 11, scope: !15)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 8, scope: !19, file: !16, type: !"_ZTS1A")
+!24 = !DILocalVariable(name: "a", line: 8, scope: !19, file: !16, type: !"_ZTS1A")
!25 = !DILocation(line: 8, scope: !19)
!26 = !DILocation(line: 9, scope: !19)
diff --git a/test/Linker/type-unique-odr-b.ll b/test/Linker/type-unique-odr-b.ll
index f57c21da824b..714bb314c908 100644
--- a/test/Linker/type-unique-odr-b.ll
+++ b/test/Linker/type-unique-odr-b.ll
@@ -22,7 +22,7 @@
%class.A = type { i32 }
; Function Attrs: nounwind
-define void @_ZN1A6getFooEv(%class.A* %this) #0 align 2 {
+define void @_ZN1A6getFooEv(%class.A* %this) #0 align 2 !dbg !15 {
entry:
%this.addr = alloca %class.A*, align 8
store %class.A* %this, %class.A** %this.addr, align 8
@@ -35,14 +35,14 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind
-define void @_Z1fv() #0 {
+define void @_Z1fv() #0 !dbg !16 {
entry:
call void @_ZL3barv(), !dbg !28
ret void, !dbg !28
}
; Function Attrs: nounwind
-define internal void @_ZL3barv() #0 {
+define internal void @_ZL3barv() #0 !dbg !20 {
entry:
ret void, !dbg !29
}
@@ -54,7 +54,7 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!21, !22}
!llvm.ident = !{!23}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
!1 = !DIFile(filename: "<unknown>", directory: "")
!2 = !{}
!3 = !{!4}
@@ -68,16 +68,16 @@ attributes #1 = { nounwind readnone }
!11 = !{null, !12}
!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
!14 = !{!15, !16, !20}
-!15 = !DISubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !5, scope: !"_ZTS1A", type: !10, function: void (%class.A*)* @_ZN1A6getFooEv, declaration: !9, variables: !2)
-!16 = !DISubprogram(name: "f", linkageName: "_Z1fv", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !17, type: !18, function: void ()* @_Z1fv, variables: !2)
+!15 = distinct !DISubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !5, scope: !"_ZTS1A", type: !10, declaration: !9, variables: !2)
+!16 = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !17, type: !18, variables: !2)
!17 = !DIFile(filename: "type-unique-odr-b.cpp", directory: "")
!18 = !DISubroutineType(types: !19)
!19 = !{null}
-!20 = !DISubprogram(name: "bar", linkageName: "_ZL3barv", line: 10, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !5, scope: !17, type: !18, function: void ()* @_ZL3barv, variables: !2)
+!20 = distinct !DISubprogram(name: "bar", linkageName: "_ZL3barv", line: 10, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !5, scope: !17, type: !18, variables: !2)
!21 = !{i32 2, !"Dwarf Version", i32 4}
!22 = !{i32 1, !"Debug Info Version", i32 3}
!23 = !{!"clang version 3.5.0 "}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !15, type: !25)
+!24 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !15, type: !25)
!25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
!26 = !DILocation(line: 0, scope: !15)
!27 = !DILocation(line: 8, scope: !15)
diff --git a/test/Linker/type-unique-simple-a.ll b/test/Linker/type-unique-simple-a.ll
index 1ab43d657e15..24a830922ef5 100644
--- a/test/Linker/type-unique-simple-a.ll
+++ b/test/Linker/type-unique-simple-a.ll
@@ -1,4 +1,4 @@
-; REQUIRES: object-emission
+; REQUIRES: default_triple, object-emission
; RUN: llvm-link %s %p/type-unique-simple-b.ll -S -o %t
; RUN: cat %t | FileCheck %s -check-prefix=LINK
@@ -49,7 +49,7 @@
%struct.Base = type { i32 }
; Function Attrs: nounwind ssp uwtable
-define void @_Z1fi(i32 %a) #0 {
+define void @_Z1fi(i32 %a) #0 !dbg !10 {
entry:
%a.addr = alloca i32, align 4
%t = alloca %struct.Base, align 4
@@ -68,7 +68,7 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!14, !20}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !2)
!1 = !DIFile(filename: "foo.cpp", directory: "/Users/mren/c_testing/type_unique_air/simple")
!2 = !{}
!3 = !{!4}
@@ -78,14 +78,14 @@ attributes #1 = { nounwind readnone }
!7 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 2, size: 32, align: 32, file: !5, scope: !"_ZTS4Base", baseType: !8)
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{!10}
-!10 = !DISubprogram(name: "f", linkageName: "_Z1fi", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !11, type: !12, function: void (i32)* @_Z1fi, variables: !2)
+!10 = distinct !DISubprogram(name: "f", linkageName: "_Z1fi", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !11, type: !12, variables: !2)
!11 = !DIFile(filename: "foo.cpp", directory: "/Users/mren/c_testing/type_unique_air/simple")
!12 = !DISubroutineType(types: !13)
!13 = !{null, !8}
!14 = !{i32 2, !"Dwarf Version", i32 2}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !10, file: !11, type: !8)
+!15 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !10, file: !11, type: !8)
!16 = !DILocation(line: 3, scope: !10)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 4, scope: !10, file: !11, type: !4)
+!17 = !DILocalVariable(name: "t", line: 4, scope: !10, file: !11, type: !4)
!18 = !DILocation(line: 4, scope: !10)
!19 = !DILocation(line: 5, scope: !10)
!20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Linker/type-unique-simple-b.ll b/test/Linker/type-unique-simple-b.ll
index c12b91845299..b2228185c9b5 100644
--- a/test/Linker/type-unique-simple-b.ll
+++ b/test/Linker/type-unique-simple-b.ll
@@ -5,7 +5,7 @@
%struct.Base = type { i32 }
; Function Attrs: nounwind ssp uwtable
-define void @_Z1gi(i32 %a) #0 {
+define void @_Z1gi(i32 %a) #0 !dbg !10 {
entry:
%a.addr = alloca i32, align 4
%t = alloca %struct.Base, align 4
@@ -19,7 +19,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: ssp uwtable
-define i32 @main() #2 {
+define i32 @main() #2 !dbg !14 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
@@ -38,7 +38,7 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!17, !26}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !2)
!1 = !DIFile(filename: "bar.cpp", directory: "/Users/mren/c_testing/type_unique_air/simple")
!2 = !{}
!3 = !{!4}
@@ -48,17 +48,17 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!7 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 2, size: 32, align: 32, file: !5, scope: !"_ZTS4Base", baseType: !8)
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{!10, !14}
-!10 = !DISubprogram(name: "g", linkageName: "_Z1gi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !11, type: !12, function: void (i32)* @_Z1gi, variables: !2)
+!10 = distinct !DISubprogram(name: "g", linkageName: "_Z1gi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !11, type: !12, variables: !2)
!11 = !DIFile(filename: "bar.cpp", directory: "/Users/mren/c_testing/type_unique_air/simple")
!12 = !DISubroutineType(types: !13)
!13 = !{null, !8}
-!14 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !11, type: !15, function: i32 ()* @main, variables: !2)
+!14 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !11, type: !15, variables: !2)
!15 = !DISubroutineType(types: !16)
!16 = !{!8}
!17 = !{i32 2, !"Dwarf Version", i32 2}
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 4, arg: 1, scope: !10, file: !11, type: !8)
+!18 = !DILocalVariable(name: "a", line: 4, arg: 1, scope: !10, file: !11, type: !8)
!19 = !DILocation(line: 4, scope: !10)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 5, scope: !10, file: !11, type: !4)
+!20 = !DILocalVariable(name: "t", line: 5, scope: !10, file: !11, type: !4)
!21 = !DILocation(line: 5, scope: !10)
!22 = !DILocation(line: 6, scope: !10)
!23 = !DILocation(line: 8, scope: !14)
diff --git a/test/Linker/type-unique-simple2-a.ll b/test/Linker/type-unique-simple2-a.ll
index f9170ab5f7fa..3779753a64ea 100644
--- a/test/Linker/type-unique-simple2-a.ll
+++ b/test/Linker/type-unique-simple2-a.ll
@@ -1,4 +1,4 @@
-; REQUIRES: object-emission
+; REQUIRES: default_triple, object-emission
;
; RUN: llvm-link %s %p/type-unique-simple2-b.ll -S -o - | %llc_dwarf -filetype=obj -O0 | llvm-dwarfdump -debug-dump=info - | FileCheck %s
;
@@ -18,7 +18,8 @@
; return A().getFoo();
; }
;
-; CHECK: _ZN1A6setFooEv
+; CHECK: DW_AT_name {{.*}} "setFoo"
+; CHECK-NOT: DW_TAG
; CHECK: DW_AT_accessibility [DW_FORM_data1] (DW_ACCESS_public)
; CHECK-NOT: DW_AT_accessibility
; CHECK: DW_TAG
@@ -30,7 +31,7 @@
@_ZTV1A = external unnamed_addr constant [4 x i8*]
; Function Attrs: nounwind
-define i32 @_Z3barv() #0 {
+define i32 @_Z3barv() #0 !dbg !27 {
entry:
%tmp = alloca %class.A, align 8
%0 = bitcast %class.A* %tmp to i8*, !dbg !38
@@ -44,7 +45,7 @@ entry:
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
; Function Attrs: inlinehint nounwind
-define linkonce_odr void @_ZN1AC1Ev(%class.A* %this) unnamed_addr #2 align 2 {
+define linkonce_odr void @_ZN1AC1Ev(%class.A* %this) unnamed_addr #2 align 2 !dbg !31 {
entry:
%this.addr = alloca %class.A*, align 8
store %class.A* %this, %class.A** %this.addr, align 8
@@ -60,7 +61,7 @@ declare i32 @_ZN1A6getFooEv(%class.A*)
declare void @llvm.dbg.declare(metadata, metadata, metadata) #4
; Function Attrs: inlinehint nounwind
-define linkonce_odr void @_ZN1AC2Ev(%class.A* %this) unnamed_addr #2 align 2 {
+define linkonce_odr void @_ZN1AC2Ev(%class.A* %this) unnamed_addr #2 align 2 !dbg !34 {
entry:
%this.addr = alloca %class.A*, align 8
store %class.A* %this, %class.A** %this.addr, align 8
@@ -80,7 +81,7 @@ attributes #4 = { nounwind readnone }
!llvm.module.flags = !{!35, !36}
!llvm.ident = !{!37}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !26, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !26, globals: !2, imports: !2)
!1 = !DIFile(filename: "<unknown>", directory: "")
!2 = !{}
!3 = !{!4}
@@ -105,22 +106,22 @@ attributes #4 = { nounwind readnone }
!23 = !DIDerivedType(tag: DW_TAG_typedef, name: "foo_t", line: 1, file: !24, baseType: !13)
!24 = !DIFile(filename: "a.cpp", directory: "")
!26 = !{!27, !31, !34}
-!27 = !DISubprogram(name: "bar", linkageName: "_Z3barv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !24, scope: !28, type: !29, function: i32 ()* @_Z3barv, variables: !2)
+!27 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !24, scope: !28, type: !29, variables: !2)
!28 = !DIFile(filename: "a.cpp", directory: "")
!29 = !DISubroutineType(types: !30)
!30 = !{!23}
-!31 = !DISubprogram(name: "A", linkageName: "_ZN1AC1Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !"_ZTS1A", type: !15, function: void (%class.A*)* @_ZN1AC1Ev, declaration: !32, variables: !2)
+!31 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC1Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !"_ZTS1A", type: !15, declaration: !32, variables: !2)
!32 = !DISubprogram(name: "A", isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scope: !"_ZTS1A", type: !15)
-!34 = !DISubprogram(name: "A", linkageName: "_ZN1AC2Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !"_ZTS1A", type: !15, function: void (%class.A*)* @_ZN1AC2Ev, declaration: !32, variables: !2)
+!34 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC2Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !"_ZTS1A", type: !15, declaration: !32, variables: !2)
!35 = !{i32 2, !"Dwarf Version", i32 2}
!36 = !{i32 1, !"Debug Info Version", i32 3}
!37 = !{!"clang version 3.5 "}
!38 = !DILocation(line: 3, scope: !27)
-!39 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, type: !40)
+!39 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, type: !40)
!40 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
!41 = !DILocation(line: 0, scope: !31)
!42 = !DILocation(line: 2, scope: !43)
!43 = !DILexicalBlockFile(discriminator: 0, file: !5, scope: !31)
-!44 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !34, type: !40)
+!44 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !34, type: !40)
!45 = !DILocation(line: 0, scope: !34)
!46 = !DILocation(line: 2, scope: !34)
diff --git a/test/Linker/type-unique-simple2-b.ll b/test/Linker/type-unique-simple2-b.ll
index 5539fb4adb35..e69ab63e6d24 100644
--- a/test/Linker/type-unique-simple2-b.ll
+++ b/test/Linker/type-unique-simple2-b.ll
@@ -18,7 +18,7 @@
@_ZTI1A = unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) }
; Function Attrs: nounwind
-define void @_ZN1A6setFooEv(%class.A* %this) unnamed_addr #0 align 2 {
+define void @_ZN1A6setFooEv(%class.A* %this) unnamed_addr #0 align 2 !dbg !26 {
entry:
%this.addr = alloca %class.A*, align 8
store %class.A* %this, %class.A** %this.addr, align 8
@@ -31,7 +31,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind
-define i32 @_ZN1A6getFooEv(%class.A* %this) unnamed_addr #0 align 2 {
+define i32 @_ZN1A6getFooEv(%class.A* %this) unnamed_addr #0 align 2 !dbg !28 {
entry:
%this.addr = alloca %class.A*, align 8
store %class.A* %this, %class.A** %this.addr, align 8
@@ -47,7 +47,7 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!29, !30}
!llvm.ident = !{!31}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !25, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !25, globals: !2, imports: !2)
!1 = !DIFile(filename: "<unknown>", directory: "")
!2 = !{}
!3 = !{!4}
@@ -71,16 +71,16 @@ attributes #1 = { nounwind readnone }
!22 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !23)
!23 = !DIDerivedType(tag: DW_TAG_typedef, name: "foo_t", line: 1, file: !5, baseType: !13)
!25 = !{!26, !28}
-!26 = !DISubprogram(name: "setFoo", linkageName: "_ZN1A6setFooEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !27, scope: !"_ZTS1A", type: !15, function: void (%class.A*)* @_ZN1A6setFooEv, declaration: !14, variables: !2)
+!26 = distinct !DISubprogram(name: "setFoo", linkageName: "_ZN1A6setFooEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !27, scope: !"_ZTS1A", type: !15, declaration: !14, variables: !2)
!27 = !DIFile(filename: "b.cpp", directory: "")
-!28 = !DISubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !27, scope: !"_ZTS1A", type: !20, function: i32 (%class.A*)* @_ZN1A6getFooEv, declaration: !19, variables: !2)
+!28 = distinct !DISubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !27, scope: !"_ZTS1A", type: !20, declaration: !19, variables: !2)
!29 = !{i32 2, !"Dwarf Version", i32 2}
!30 = !{i32 1, !"Debug Info Version", i32 3}
!31 = !{!"clang version 3.5 "}
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !26, type: !33)
+!32 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !26, type: !33)
!33 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
!34 = !DILocation(line: 0, scope: !26)
!35 = !DILocation(line: 2, scope: !26)
-!36 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !33)
+!36 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !33)
!37 = !DILocation(line: 0, scope: !28)
!38 = !DILocation(line: 4, scope: !28)
diff --git a/test/Linker/type-unique-simple2.ll b/test/Linker/type-unique-simple2.ll
index 8a56e2e80c67..7c31cdf5975e 100644
--- a/test/Linker/type-unique-simple2.ll
+++ b/test/Linker/type-unique-simple2.ll
@@ -1,4 +1,5 @@
; REQUIRES: object-emission
+; REQUIRES: default_triple
; RUN: llvm-link %S/Inputs/type-unique-simple2-a.ll %S/Inputs/type-unique-simple2-b.ll -S -o %t
; RUN: cat %t | FileCheck %S/Inputs/type-unique-simple2-a.ll -check-prefix=LINK
diff --git a/test/Linker/type-unique-src-type.ll b/test/Linker/type-unique-src-type.ll
index 110ecc87e1b1..ab7322892e07 100644
--- a/test/Linker/type-unique-src-type.ll
+++ b/test/Linker/type-unique-src-type.ll
@@ -10,7 +10,7 @@
; CHECK-NEXT: %B = type { %A }
; CHECK-NEXT: %A = type { i8 }
-; CHECK: @g1 = external global %C.0
+; CHECK: @g1 = global %C.0 zeroinitializer
; CHECK: getelementptr %C.0, %C.0* null, i64 0, i32 0, i32 0
%A = type { i8 }
@@ -21,4 +21,4 @@ define void @f1() {
getelementptr %C, %C* null, i64 0, i32 0, i32 0
ret void
}
-@g1 = external global %C.0
+@g1 = global %C.0 zeroinitializer
diff --git a/test/Linker/type-unique-type-array-a.ll b/test/Linker/type-unique-type-array-a.ll
index 98c8d657743d..0a79f20f40c6 100644
--- a/test/Linker/type-unique-type-array-a.ll
+++ b/test/Linker/type-unique-type-array-a.ll
@@ -1,4 +1,4 @@
-; REQUIRES: object-emission
+; REQUIRES: default_triple, object-emission
;
; RUN: llvm-link %s %p/type-unique-type-array-b.ll -S -o - | %llc_dwarf -filetype=obj -O0 | llvm-dwarfdump -debug-dump=info - | FileCheck %s
;
@@ -23,7 +23,7 @@
; CHECK: DW_TAG_class_type
; CHECK-NEXT: DW_AT_name {{.*}} "A"
; CHECK: DW_TAG_subprogram
-; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN1A5testAE2SA"
+; CHECK: DW_AT_name {{.*}} "testA"
; CHECK: DW_TAG_formal_parameter
; CHECK: DW_TAG_formal_parameter
; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + 0x{{.*}} => {0x[[STRUCT:.*]]})
@@ -34,7 +34,7 @@
; CHECK: DW_TAG_class_type
; CHECK-NEXT: DW_AT_name {{.*}} "B"
; CHECK: DW_TAG_subprogram
-; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN1B5testBE2SA"
+; CHECK: DW_AT_name {{.*}} "testB"
; CHECK: DW_TAG_formal_parameter
; CHECK: DW_TAG_formal_parameter
; CHECK-NEXT: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[STRUCT]]
@@ -43,7 +43,7 @@
%struct.SA = type { i32 }
; Function Attrs: ssp uwtable
-define void @_Z4topAP1A2SA(%class.A* %a, i32 %sa.coerce) #0 {
+define void @_Z4topAP1A2SA(%class.A* %a, i32 %sa.coerce) #0 !dbg !15 {
entry:
%sa = alloca %struct.SA, align 4
%a.addr = alloca %class.A*, align 8
@@ -67,7 +67,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind ssp uwtable
-define linkonce_odr void @_ZN1A5testAE2SA(%class.A* %this, i32 %a.coerce) #2 align 2 {
+define linkonce_odr void @_ZN1A5testAE2SA(%class.A* %this, i32 %a.coerce) #2 align 2 !dbg !20 {
entry:
%a = alloca %struct.SA, align 4
%this.addr = alloca %class.A*, align 8
@@ -92,7 +92,7 @@ attributes #3 = { nounwind }
!llvm.module.flags = !{!21, !22}
!llvm.ident = !{!23}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
!1 = !DIFile(filename: "a.cpp", directory: "/Users/manmanren/test-Nov/type_unique/rdar_di_array")
!2 = !{}
!3 = !{!4, !10}
@@ -107,23 +107,23 @@ attributes #3 = { nounwind }
!12 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 2, size: 32, align: 32, file: !1, scope: !"_ZTS2SA", baseType: !13)
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!14 = !{!15, !20}
-!15 = !DISubprogram(name: "topA", linkageName: "_Z4topAP1A2SA", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !1, scope: !16, type: !17, function: void (%class.A*, i32)* @_Z4topAP1A2SA, variables: !2)
+!15 = distinct !DISubprogram(name: "topA", linkageName: "_Z4topAP1A2SA", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !1, scope: !16, type: !17, variables: !2)
!16 = !DIFile(filename: "a.cpp", directory: "/Users/manmanren/test-Nov/type_unique/rdar_di_array")
!17 = !DISubroutineType(types: !18)
!18 = !{null, !19, !"_ZTS2SA"}
!19 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
-!20 = !DISubprogram(name: "testA", linkageName: "_ZN1A5testAE2SA", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !"_ZTS1A", type: !7, function: void (%class.A*, i32)* @_ZN1A5testAE2SA, declaration: !6, variables: !2)
+!20 = distinct !DISubprogram(name: "testA", linkageName: "_ZN1A5testAE2SA", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !"_ZTS1A", type: !7, declaration: !6, variables: !2)
!21 = !{i32 2, !"Dwarf Version", i32 2}
!22 = !{i32 2, !"Debug Info Version", i32 3}
!23 = !{!"clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)"}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 11, arg: 1, scope: !15, file: !16, type: !19)
+!24 = !DILocalVariable(name: "a", line: 11, arg: 1, scope: !15, file: !16, type: !19)
!25 = !DILocation(line: 11, column: 14, scope: !15)
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sa", line: 11, arg: 2, scope: !15, file: !16, type: !"_ZTS2SA")
+!26 = !DILocalVariable(name: "sa", line: 11, arg: 2, scope: !15, file: !16, type: !"_ZTS2SA")
!27 = !DILocation(line: 11, column: 20, scope: !15)
!28 = !DILocation(line: 12, column: 3, scope: !15)
!29 = !DILocation(line: 13, column: 1, scope: !15)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !20, type: !19)
+!30 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !20, type: !19)
!31 = !DILocation(line: 0, scope: !20)
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 7, arg: 2, scope: !20, file: !16, type: !"_ZTS2SA")
+!32 = !DILocalVariable(name: "a", line: 7, arg: 2, scope: !20, file: !16, type: !"_ZTS2SA")
!33 = !DILocation(line: 7, column: 17, scope: !20)
!34 = !DILocation(line: 8, column: 3, scope: !20)
diff --git a/test/Linker/type-unique-type-array-b.ll b/test/Linker/type-unique-type-array-b.ll
index 14ce59b3c127..b7796daf9b13 100644
--- a/test/Linker/type-unique-type-array-b.ll
+++ b/test/Linker/type-unique-type-array-b.ll
@@ -22,7 +22,7 @@
%struct.SA = type { i32 }
; Function Attrs: ssp uwtable
-define void @_Z4topBP1B2SA(%class.B* %b, i32 %sa.coerce) #0 {
+define void @_Z4topBP1B2SA(%class.B* %b, i32 %sa.coerce) #0 !dbg !15 {
entry:
%sa = alloca %struct.SA, align 4
%b.addr = alloca %class.B*, align 8
@@ -46,7 +46,7 @@ entry:
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; Function Attrs: nounwind ssp uwtable
-define linkonce_odr void @_ZN1B5testBE2SA(%class.B* %this, i32 %sa.coerce) #2 align 2 {
+define linkonce_odr void @_ZN1B5testBE2SA(%class.B* %this, i32 %sa.coerce) #2 align 2 !dbg !20 {
entry:
%sa = alloca %struct.SA, align 4
%this.addr = alloca %class.B*, align 8
@@ -71,7 +71,7 @@ attributes #3 = { nounwind }
!llvm.module.flags = !{!21, !22}
!llvm.ident = !{!23}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
!1 = !DIFile(filename: "b.cpp", directory: "/Users/manmanren/test-Nov/type_unique/rdar_di_array")
!2 = !{}
!3 = !{!4, !10}
@@ -86,23 +86,23 @@ attributes #3 = { nounwind }
!12 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 2, size: 32, align: 32, file: !1, scope: !"_ZTS2SA", baseType: !13)
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!14 = !{!15, !20}
-!15 = !DISubprogram(name: "topB", linkageName: "_Z4topBP1B2SA", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !1, scope: !16, type: !17, function: void (%class.B*, i32)* @_Z4topBP1B2SA, variables: !2)
+!15 = distinct !DISubprogram(name: "topB", linkageName: "_Z4topBP1B2SA", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !1, scope: !16, type: !17, variables: !2)
!16 = !DIFile(filename: "b.cpp", directory: "/Users/manmanren/test-Nov/type_unique/rdar_di_array")
!17 = !DISubroutineType(types: !18)
!18 = !{null, !19, !"_ZTS2SA"}
!19 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1B")
-!20 = !DISubprogram(name: "testB", linkageName: "_ZN1B5testBE2SA", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !"_ZTS1B", type: !7, function: void (%class.B*, i32)* @_ZN1B5testBE2SA, declaration: !6, variables: !2)
+!20 = distinct !DISubprogram(name: "testB", linkageName: "_ZN1B5testBE2SA", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !"_ZTS1B", type: !7, declaration: !6, variables: !2)
!21 = !{i32 2, !"Dwarf Version", i32 2}
!22 = !{i32 2, !"Debug Info Version", i32 3}
!23 = !{!"clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)"}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 11, arg: 1, scope: !15, file: !16, type: !19)
+!24 = !DILocalVariable(name: "b", line: 11, arg: 1, scope: !15, file: !16, type: !19)
!25 = !DILocation(line: 11, column: 14, scope: !15)
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sa", line: 11, arg: 2, scope: !15, file: !16, type: !"_ZTS2SA")
+!26 = !DILocalVariable(name: "sa", line: 11, arg: 2, scope: !15, file: !16, type: !"_ZTS2SA")
!27 = !DILocation(line: 11, column: 20, scope: !15)
!28 = !DILocation(line: 12, column: 3, scope: !15)
!29 = !DILocation(line: 13, column: 1, scope: !15)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !20, type: !19)
+!30 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !20, type: !19)
!31 = !DILocation(line: 0, scope: !20)
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sa", line: 7, arg: 2, scope: !20, file: !16, type: !"_ZTS2SA")
+!32 = !DILocalVariable(name: "sa", line: 7, arg: 2, scope: !20, file: !16, type: !"_ZTS2SA")
!33 = !DILocation(line: 7, column: 17, scope: !20)
!34 = !DILocation(line: 8, column: 3, scope: !20)
diff --git a/test/Linker/uniqued-distinct-cycles.ll b/test/Linker/uniqued-distinct-cycles.ll
new file mode 100644
index 000000000000..05cc80d73042
--- /dev/null
+++ b/test/Linker/uniqued-distinct-cycles.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-link -o - %s | llvm-dis | FileCheck %s
+
+; CHECK: !named = !{!0, !2}
+!named = !{!0, !2}
+
+; CHECK: !0 = !{!1}
+; CHECK-NEXT: !1 = distinct !{!0}
+!0 = !{!1}
+!1 = distinct !{!0}
+
+; CHECK-NEXT: !2 = distinct !{!3}
+; CHECK-NEXT: !3 = !{!2}
+!2 = distinct !{!3}
+!3 = !{!2}
diff --git a/test/Linker/unnamed-addr1-a.ll b/test/Linker/unnamed-addr1-a.ll
index 794ae987797c..a96f089a99c0 100644
--- a/test/Linker/unnamed-addr1-a.ll
+++ b/test/Linker/unnamed-addr1-a.ll
@@ -15,6 +15,11 @@ define weak void @func-b() unnamed_addr { ret void }
@global-c = common unnamed_addr global i32 0
; CHECK-DAG: @global-c = common unnamed_addr global i32 0
@global-d = external global i32
+
+define i32* @use-global-d() {
+ ret i32* @global-d
+}
+
; CHECK-DAG: @global-d = global i32 42
@global-e = external unnamed_addr global i32
; CHECK-DAG: @global-e = unnamed_addr global i32 42
@@ -22,11 +27,16 @@ define weak void @func-b() unnamed_addr { ret void }
; CHECK-DAG: @global-f = global i32 42
@alias-a = weak global i32 42
-; CHECK-DAG: @alias-a = alias i32* @global-f
+; CHECK-DAG: @alias-a = alias i32, i32* @global-f
@alias-b = weak unnamed_addr global i32 42
-; CHECK-DAG: @alias-b = unnamed_addr alias i32* @global-f
+; CHECK-DAG: @alias-b = unnamed_addr alias i32, i32* @global-f
declare void @func-c()
+define void @use-func-c() {
+ call void @func-c()
+ ret void
+}
+
; CHECK-DAG: define weak void @func-c() {
define weak void @func-d() { ret void }
; CHECK-DAG: define weak void @func-d() {
@@ -44,9 +54,9 @@ define weak void @func-e() unnamed_addr { ret void }
; CHECK-DAG: @global-j = global i32 42
@alias-c = weak global i32 42
-; CHECK-DAG: @alias-c = alias i32* @global-f
+; CHECK-DAG: @alias-c = alias i32, i32* @global-f
@alias-d = weak unnamed_addr global i32 42
-; CHECK-DAG: @alias-d = alias i32* @global-f
+; CHECK-DAG: @alias-d = alias i32, i32* @global-f
declare void @func-g()
diff --git a/test/Linker/unnamed-addr1-b.ll b/test/Linker/unnamed-addr1-b.ll
index d0f54f2259b4..94334d6da73f 100644
--- a/test/Linker/unnamed-addr1-b.ll
+++ b/test/Linker/unnamed-addr1-b.ll
@@ -6,8 +6,8 @@
@global-e = unnamed_addr global i32 42
@global-f = unnamed_addr global i32 42
-@alias-a = unnamed_addr alias i32* @global-f
-@alias-b = unnamed_addr alias i32* @global-f
+@alias-a = unnamed_addr alias i32, i32* @global-f
+@alias-b = unnamed_addr alias i32, i32* @global-f
define weak void @func-c() unnamed_addr { ret void }
define weak void @func-d() unnamed_addr { ret void }
@@ -18,8 +18,8 @@ define weak void @func-e() unnamed_addr { ret void }
@global-i = global i32 42
@global-j = global i32 42
-@alias-c = alias i32* @global-f
-@alias-d = alias i32* @global-f
+@alias-c = alias i32, i32* @global-f
+@alias-d = alias i32, i32* @global-f
define weak void @func-g() { ret void }
define weak void @func-h() { ret void }
diff --git a/test/Linker/visibility.ll b/test/Linker/visibility.ll
index 4938d7af56d6..4252aee61920 100644
--- a/test/Linker/visibility.ll
+++ b/test/Linker/visibility.ll
@@ -21,14 +21,14 @@ $c1 = comdat any
@v4 = global i32 1, comdat($c1)
; Aliases
-; CHECK: @a1 = hidden alias i32* @v1
-@a1 = alias i32* @v1
+; CHECK: @a1 = hidden alias i32, i32* @v1
+@a1 = alias i32, i32* @v1
-; CHECK: @a2 = protected alias i32* @v2
-@a2 = alias i32* @v2
+; CHECK: @a2 = protected alias i32, i32* @v2
+@a2 = alias i32, i32* @v2
-; CHECK: @a3 = hidden alias i32* @v3
-@a3 = protected alias i32* @v3
+; CHECK: @a3 = hidden alias i32, i32* @v3
+@a3 = protected alias i32, i32* @v3
; Functions
diff --git a/test/Linker/weakextern.ll b/test/Linker/weakextern.ll
index 8d479a0d39b3..814550a907bb 100644
--- a/test/Linker/weakextern.ll
+++ b/test/Linker/weakextern.ll
@@ -1,12 +1,12 @@
-; RUN: llvm-as < %s > %t.bc
-; RUN: llvm-as < %p/testlink.ll > %t2.bc
-; RUN: llvm-link %t.bc %t.bc %t2.bc -o %t1.bc
-; RUN: llvm-dis < %t1.bc | FileCheck %s
+; RUN: llvm-link %s %s %p/testlink.ll -S | FileCheck %s
; CHECK: kallsyms_names = extern_weak
; CHECK: Inte = global i32
; CHECK: MyVar = external global i32
-@kallsyms_names = extern_weak global [0 x i8] ; <[0 x i8]*> [#uses=0]
-@MyVar = extern_weak global i32 ; <i32*> [#uses=0]
-@Inte = extern_weak global i32 ; <i32*> [#uses=0]
+@kallsyms_names = extern_weak global [0 x i8]
+@MyVar = extern_weak global i32
+@Inte = extern_weak global i32
+define weak [0 x i8]* @use_kallsyms_names() {
+ ret [0 x i8]* @kallsyms_names
+}
diff --git a/test/MC/AArch64/arm64-advsimd.s b/test/MC/AArch64/arm64-advsimd.s
index c627de708d31..294f09082916 100644
--- a/test/MC/AArch64/arm64-advsimd.s
+++ b/test/MC/AArch64/arm64-advsimd.s
@@ -1,4 +1,4 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto -output-asm-variant=1 -show-encoding < %s | FileCheck %s
+; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto,fullfp16 -output-asm-variant=1 -show-encoding < %s | FileCheck %s
foo:
@@ -440,6 +440,106 @@ foo:
; CHECK: urshl.8b v0, v0, v0 ; encoding: [0x00,0x54,0x20,0x2e]
; CHECK: ushl.8b v0, v0, v0 ; encoding: [0x00,0x44,0x20,0x2e]
+ fabd.4h v0, v0, v0
+ facge.4h v0, v0, v0
+ facgt.4h v0, v0, v0
+ faddp.4h v0, v0, v0
+ fadd.4h v0, v0, v0
+ fcmeq.4h v0, v0, v0
+ fcmge.4h v0, v0, v0
+ fcmgt.4h v0, v0, v0
+ fdiv.4h v0, v0, v0
+ fmaxnmp.4h v0, v0, v0
+ fmaxnm.4h v0, v0, v0
+ fmaxp.4h v0, v0, v0
+ fmax.4h v0, v0, v0
+ fminnmp.4h v0, v0, v0
+ fminnm.4h v0, v0, v0
+ fminp.4h v0, v0, v0
+ fmin.4h v0, v0, v0
+ fmla.4h v0, v0, v0
+ fmls.4h v0, v0, v0
+ fmulx.4h v0, v0, v0
+ fmul.4h v0, v0, v0
+ frecps.4h v0, v0, v0
+ frsqrts.4h v0, v0, v0
+ fsub.4h v0, v0, v0
+
+; CHECK: fabd.4h v0, v0, v0 ; encoding: [0x00,0x14,0xc0,0x2e]
+; CHECK: facge.4h v0, v0, v0 ; encoding: [0x00,0x2c,0x40,0x2e]
+; CHECK: facgt.4h v0, v0, v0 ; encoding: [0x00,0x2c,0xc0,0x2e]
+; CHECK: faddp.4h v0, v0, v0 ; encoding: [0x00,0x14,0x40,0x2e]
+; CHECK: fadd.4h v0, v0, v0 ; encoding: [0x00,0x14,0x40,0x0e]
+; CHECK: fcmeq.4h v0, v0, v0 ; encoding: [0x00,0x24,0x40,0x0e]
+; CHECK: fcmge.4h v0, v0, v0 ; encoding: [0x00,0x24,0x40,0x2e]
+; CHECK: fcmgt.4h v0, v0, v0 ; encoding: [0x00,0x24,0xc0,0x2e]
+; CHECK: fdiv.4h v0, v0, v0 ; encoding: [0x00,0x3c,0x40,0x2e]
+; CHECK: fmaxnmp.4h v0, v0, v0 ; encoding: [0x00,0x04,0x40,0x2e]
+; CHECK: fmaxnm.4h v0, v0, v0 ; encoding: [0x00,0x04,0x40,0x0e]
+; CHECK: fmaxp.4h v0, v0, v0 ; encoding: [0x00,0x34,0x40,0x2e]
+; CHECK: fmax.4h v0, v0, v0 ; encoding: [0x00,0x34,0x40,0x0e]
+; CHECK: fminnmp.4h v0, v0, v0 ; encoding: [0x00,0x04,0xc0,0x2e]
+; CHECK: fminnm.4h v0, v0, v0 ; encoding: [0x00,0x04,0xc0,0x0e]
+; CHECK: fminp.4h v0, v0, v0 ; encoding: [0x00,0x34,0xc0,0x2e]
+; CHECK: fmin.4h v0, v0, v0 ; encoding: [0x00,0x34,0xc0,0x0e]
+; CHECK: fmla.4h v0, v0, v0 ; encoding: [0x00,0x0c,0x40,0x0e]
+; CHECK: fmls.4h v0, v0, v0 ; encoding: [0x00,0x0c,0xc0,0x0e]
+; CHECK: fmulx.4h v0, v0, v0 ; encoding: [0x00,0x1c,0x40,0x0e]
+; CHECK: fmul.4h v0, v0, v0 ; encoding: [0x00,0x1c,0x40,0x2e]
+; CHECK: frecps.4h v0, v0, v0 ; encoding: [0x00,0x3c,0x40,0x0e]
+; CHECK: frsqrts.4h v0, v0, v0 ; encoding: [0x00,0x3c,0xc0,0x0e]
+; CHECK: fsub.4h v0, v0, v0 ; encoding: [0x00,0x14,0xc0,0x0e]
+
+ fabd.8h v0, v0, v0
+ facge.8h v0, v0, v0
+ facgt.8h v0, v0, v0
+ faddp.8h v0, v0, v0
+ fadd.8h v0, v0, v0
+ fcmeq.8h v0, v0, v0
+ fcmge.8h v0, v0, v0
+ fcmgt.8h v0, v0, v0
+ fdiv.8h v0, v0, v0
+ fmaxnmp.8h v0, v0, v0
+ fmaxnm.8h v0, v0, v0
+ fmaxp.8h v0, v0, v0
+ fmax.8h v0, v0, v0
+ fminnmp.8h v0, v0, v0
+ fminnm.8h v0, v0, v0
+ fminp.8h v0, v0, v0
+ fmin.8h v0, v0, v0
+ fmla.8h v0, v0, v0
+ fmls.8h v0, v0, v0
+ fmulx.8h v0, v0, v0
+ fmul.8h v0, v0, v0
+ frecps.8h v0, v0, v0
+ frsqrts.8h v0, v0, v0
+ fsub.8h v0, v0, v0
+
+; CHECK: fabd.8h v0, v0, v0 ; encoding: [0x00,0x14,0xc0,0x6e]
+; CHECK: facge.8h v0, v0, v0 ; encoding: [0x00,0x2c,0x40,0x6e]
+; CHECK: facgt.8h v0, v0, v0 ; encoding: [0x00,0x2c,0xc0,0x6e]
+; CHECK: faddp.8h v0, v0, v0 ; encoding: [0x00,0x14,0x40,0x6e]
+; CHECK: fadd.8h v0, v0, v0 ; encoding: [0x00,0x14,0x40,0x4e]
+; CHECK: fcmeq.8h v0, v0, v0 ; encoding: [0x00,0x24,0x40,0x4e]
+; CHECK: fcmge.8h v0, v0, v0 ; encoding: [0x00,0x24,0x40,0x6e]
+; CHECK: fcmgt.8h v0, v0, v0 ; encoding: [0x00,0x24,0xc0,0x6e]
+; CHECK: fdiv.8h v0, v0, v0 ; encoding: [0x00,0x3c,0x40,0x6e]
+; CHECK: fmaxnmp.8h v0, v0, v0 ; encoding: [0x00,0x04,0x40,0x6e]
+; CHECK: fmaxnm.8h v0, v0, v0 ; encoding: [0x00,0x04,0x40,0x4e]
+; CHECK: fmaxp.8h v0, v0, v0 ; encoding: [0x00,0x34,0x40,0x6e]
+; CHECK: fmax.8h v0, v0, v0 ; encoding: [0x00,0x34,0x40,0x4e]
+; CHECK: fminnmp.8h v0, v0, v0 ; encoding: [0x00,0x04,0xc0,0x6e]
+; CHECK: fminnm.8h v0, v0, v0 ; encoding: [0x00,0x04,0xc0,0x4e]
+; CHECK: fminp.8h v0, v0, v0 ; encoding: [0x00,0x34,0xc0,0x6e]
+; CHECK: fmin.8h v0, v0, v0 ; encoding: [0x00,0x34,0xc0,0x4e]
+; CHECK: fmla.8h v0, v0, v0 ; encoding: [0x00,0x0c,0x40,0x4e]
+; CHECK: fmls.8h v0, v0, v0 ; encoding: [0x00,0x0c,0xc0,0x4e]
+; CHECK: fmulx.8h v0, v0, v0 ; encoding: [0x00,0x1c,0x40,0x4e]
+; CHECK: fmul.8h v0, v0, v0 ; encoding: [0x00,0x1c,0x40,0x6e]
+; CHECK: frecps.8h v0, v0, v0 ; encoding: [0x00,0x3c,0x40,0x4e]
+; CHECK: frsqrts.8h v0, v0, v0 ; encoding: [0x00,0x3c,0xc0,0x4e]
+; CHECK: fsub.8h v0, v0, v0 ; encoding: [0x00,0x14,0xc0,0x4e]
+
bif.8b v0, v0, v0
bit.8b v0, v0, v0
bsl.8b v0, v0, v0
@@ -568,6 +668,57 @@ foo:
; CHECK: shll2.4s v1, v2, #16 ; encoding: [0x41,0x38,0x61,0x6e]
; CHECK: shll2.2d v1, v2, #32 ; encoding: [0x41,0x38,0xa1,0x6e]
+ fabs.4h v0, v0
+ fneg.4h v0, v0
+ frecpe.4h v0, v0
+ frinta.4h v0, v0
+ frintx.4h v0, v0
+ frinti.4h v0, v0
+ frintm.4h v0, v0
+ frintn.4h v0, v0
+ frintp.4h v0, v0
+ frintz.4h v0, v0
+ frsqrte.4h v0, v0
+ fsqrt.4h v0, v0
+
+; CHECK: fabs.4h v0, v0 ; encoding: [0x00,0xf8,0xf8,0x0e]
+; CHECK: fneg.4h v0, v0 ; encoding: [0x00,0xf8,0xf8,0x2e]
+; CHECK: frecpe.4h v0, v0 ; encoding: [0x00,0xd8,0xf9,0x0e]
+; CHECK: frinta.4h v0, v0 ; encoding: [0x00,0x88,0x79,0x2e]
+; CHECK: frintx.4h v0, v0 ; encoding: [0x00,0x98,0x79,0x2e]
+; CHECK: frinti.4h v0, v0 ; encoding: [0x00,0x98,0xf9,0x2e]
+; CHECK: frintm.4h v0, v0 ; encoding: [0x00,0x98,0x79,0x0e]
+; CHECK: frintn.4h v0, v0 ; encoding: [0x00,0x88,0x79,0x0e]
+; CHECK: frintp.4h v0, v0 ; encoding: [0x00,0x88,0xf9,0x0e]
+; CHECK: frintz.4h v0, v0 ; encoding: [0x00,0x98,0xf9,0x0e]
+; CHECK: frsqrte.4h v0, v0 ; encoding: [0x00,0xd8,0xf9,0x2e]
+; CHECK: fsqrt.4h v0, v0 ; encoding: [0x00,0xf8,0xf9,0x2e]
+
+ fabs.8h v0, v0
+ fneg.8h v0, v0
+ frecpe.8h v0, v0
+ frinta.8h v0, v0
+ frintx.8h v0, v0
+ frinti.8h v0, v0
+ frintm.8h v0, v0
+ frintn.8h v0, v0
+ frintp.8h v0, v0
+ frintz.8h v0, v0
+ frsqrte.8h v0, v0
+ fsqrt.8h v0, v0
+
+; CHECK: fabs.8h v0, v0 ; encoding: [0x00,0xf8,0xf8,0x4e]
+; CHECK: fneg.8h v0, v0 ; encoding: [0x00,0xf8,0xf8,0x6e]
+; CHECK: frecpe.8h v0, v0 ; encoding: [0x00,0xd8,0xf9,0x4e]
+; CHECK: frinta.8h v0, v0 ; encoding: [0x00,0x88,0x79,0x6e]
+; CHECK: frintx.8h v0, v0 ; encoding: [0x00,0x98,0x79,0x6e]
+; CHECK: frinti.8h v0, v0 ; encoding: [0x00,0x98,0xf9,0x6e]
+; CHECK: frintm.8h v0, v0 ; encoding: [0x00,0x98,0x79,0x4e]
+; CHECK: frintn.8h v0, v0 ; encoding: [0x00,0x88,0x79,0x4e]
+; CHECK: frintp.8h v0, v0 ; encoding: [0x00,0x88,0xf9,0x4e]
+; CHECK: frintz.8h v0, v0 ; encoding: [0x00,0x98,0xf9,0x4e]
+; CHECK: frsqrte.8h v0, v0 ; encoding: [0x00,0xd8,0xf9,0x6e]
+; CHECK: fsqrt.8h v0, v0 ; encoding: [0x00,0xf8,0xf9,0x6e]
cmeq.8b v0, v0, #0
cmeq.16b v0, v0, #0
diff --git a/test/MC/AArch64/arm64-diags.s b/test/MC/AArch64/arm64-diags.s
index f8138bde3a4f..3510193a71ff 100644
--- a/test/MC/AArch64/arm64-diags.s
+++ b/test/MC/AArch64/arm64-diags.s
@@ -426,3 +426,66 @@ tlbi vale2
; CHECK-ERRORS: error: specified tlbi op requires a register
tlbi vale3
; CHECK-ERRORS: error: specified tlbi op requires a register
+
+
+; Check that we give the proper "too few operands" diagnostic even when
+; using short-form NEON.
+
+ add.16b v0, v1, v2, v3
+ add.8b v0, v1
+ sub.8h v0, v1
+ fadd.4s v0
+ fmul.2s
+
+; CHECK-ERRORS: error: invalid operand for instruction
+; CHECK-ERRORS: add.16b v0, v1, v2, v3
+; CHECK-ERRORS: ^
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS: add.8b v0, v1
+; CHECK-ERRORS: ^
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS: sub.8h v0, v1
+; CHECK-ERRORS: ^
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS: fadd.4s v0
+; CHECK-ERRORS: ^
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS: fmul.2s
+; CHECK-ERRORS: ^
+
+; Also for 2-operand instructions.
+
+ frsqrte.4s v0, v1, v2
+ frsqrte.2s v0
+ frecpe.2d
+
+; CHECK-ERRORS: error: invalid operand for instruction
+; CHECK-ERRORS: frsqrte.4s v0, v1, v2
+; CHECK-ERRORS: ^
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS: frsqrte.2s v0
+; CHECK-ERRORS: ^
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS: frecpe.2d
+; CHECK-ERRORS: ^
+
+; And check that we do the same for non-NEON instructions.
+
+ b.ne
+ b.eq 0, 0
+
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS: b.ne
+; CHECK-ERRORS: ^
+; CHECK-ERRORS: error: invalid operand for instruction
+; CHECK-ERRORS: b.eq 0, 0
+; CHECK-ERRORS: ^
+
+; Check that we give the proper "too few operands" diagnostic instead of
+; asserting.
+
+ ldr
+
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS: ldr
+; CHECK-ERRORS: ^
diff --git a/test/MC/AArch64/arm64-fp-encoding.s b/test/MC/AArch64/arm64-fp-encoding.s
index 684d9883e37f..8187e4a6fcfd 100644
--- a/test/MC/AArch64/arm64-fp-encoding.s
+++ b/test/MC/AArch64/arm64-fp-encoding.s
@@ -1,99 +1,165 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon -show-encoding -output-asm-variant=1 < %s | FileCheck %s
+; RUN: not llvm-mc -triple arm64-apple-darwin -mattr=neon -show-encoding -output-asm-variant=1 < %s 2>%t | FileCheck %s
+; RUN: FileCheck %s < %t --check-prefix=NO-FP16
+; RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon,v8.2a,fullfp16 -show-encoding -output-asm-variant=1 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
foo:
;-----------------------------------------------------------------------------
; Floating-point arithmetic
;-----------------------------------------------------------------------------
+ fabs h1, h2
fabs s1, s2
fabs d1, d2
+; FP16: fabs h1, h2 ; encoding: [0x41,0xc0,0xe0,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fabs h1, h2
; CHECK: fabs s1, s2 ; encoding: [0x41,0xc0,0x20,0x1e]
; CHECK: fabs d1, d2 ; encoding: [0x41,0xc0,0x60,0x1e]
+ fadd h1, h2, h3
fadd s1, s2, s3
fadd d1, d2, d3
+; FP16: fadd h1, h2, h3 ; encoding: [0x41,0x28,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fadd h1, h2, h3
; CHECK: fadd s1, s2, s3 ; encoding: [0x41,0x28,0x23,0x1e]
; CHECK: fadd d1, d2, d3 ; encoding: [0x41,0x28,0x63,0x1e]
+ fdiv h1, h2, h3
fdiv s1, s2, s3
fdiv d1, d2, d3
+; FP16: fdiv h1, h2, h3 ; encoding: [0x41,0x18,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fdiv h1, h2, h3
; CHECK: fdiv s1, s2, s3 ; encoding: [0x41,0x18,0x23,0x1e]
; CHECK: fdiv d1, d2, d3 ; encoding: [0x41,0x18,0x63,0x1e]
+ fmadd h1, h2, h3, h4
fmadd s1, s2, s3, s4
fmadd d1, d2, d3, d4
+; FP16: fmadd h1, h2, h3, h4 ; encoding: [0x41,0x10,0xc3,0x1f]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmadd h1, h2, h3, h4
; CHECK: fmadd s1, s2, s3, s4 ; encoding: [0x41,0x10,0x03,0x1f]
; CHECK: fmadd d1, d2, d3, d4 ; encoding: [0x41,0x10,0x43,0x1f]
+ fmax h1, h2, h3
fmax s1, s2, s3
fmax d1, d2, d3
+ fmaxnm h1, h2, h3
fmaxnm s1, s2, s3
fmaxnm d1, d2, d3
+; FP16: fmax h1, h2, h3 ; encoding: [0x41,0x48,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmax h1, h2, h3
; CHECK: fmax s1, s2, s3 ; encoding: [0x41,0x48,0x23,0x1e]
; CHECK: fmax d1, d2, d3 ; encoding: [0x41,0x48,0x63,0x1e]
+; FP16: fmaxnm h1, h2, h3 ; encoding: [0x41,0x68,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmaxnm h1, h2, h3
; CHECK: fmaxnm s1, s2, s3 ; encoding: [0x41,0x68,0x23,0x1e]
; CHECK: fmaxnm d1, d2, d3 ; encoding: [0x41,0x68,0x63,0x1e]
+ fmin h1, h2, h3
fmin s1, s2, s3
fmin d1, d2, d3
+ fminnm h1, h2, h3
fminnm s1, s2, s3
fminnm d1, d2, d3
+; FP16: fmin h1, h2, h3 ; encoding: [0x41,0x58,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmin h1, h2, h3
; CHECK: fmin s1, s2, s3 ; encoding: [0x41,0x58,0x23,0x1e]
; CHECK: fmin d1, d2, d3 ; encoding: [0x41,0x58,0x63,0x1e]
+; FP16: fminnm h1, h2, h3 ; encoding: [0x41,0x78,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fminnm h1, h2, h3
; CHECK: fminnm s1, s2, s3 ; encoding: [0x41,0x78,0x23,0x1e]
; CHECK: fminnm d1, d2, d3 ; encoding: [0x41,0x78,0x63,0x1e]
+ fmsub h1, h2, h3, h4
fmsub s1, s2, s3, s4
fmsub d1, d2, d3, d4
+; FP16: fmsub h1, h2, h3, h4 ; encoding: [0x41,0x90,0xc3,0x1f]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmsub h1, h2, h3, h4
; CHECK: fmsub s1, s2, s3, s4 ; encoding: [0x41,0x90,0x03,0x1f]
; CHECK: fmsub d1, d2, d3, d4 ; encoding: [0x41,0x90,0x43,0x1f]
+ fmul h1, h2, h3
fmul s1, s2, s3
fmul d1, d2, d3
+; FP16: fmul h1, h2, h3 ; encoding: [0x41,0x08,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmul h1, h2, h3
; CHECK: fmul s1, s2, s3 ; encoding: [0x41,0x08,0x23,0x1e]
; CHECK: fmul d1, d2, d3 ; encoding: [0x41,0x08,0x63,0x1e]
+ fneg h1, h2
fneg s1, s2
fneg d1, d2
+; FP16: fneg h1, h2 ; encoding: [0x41,0x40,0xe1,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fneg h1, h2
; CHECK: fneg s1, s2 ; encoding: [0x41,0x40,0x21,0x1e]
; CHECK: fneg d1, d2 ; encoding: [0x41,0x40,0x61,0x1e]
+ fnmadd h1, h2, h3, h4
fnmadd s1, s2, s3, s4
fnmadd d1, d2, d3, d4
+; FP16: fnmadd h1, h2, h3, h4 ; encoding: [0x41,0x10,0xe3,0x1f]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fnmadd h1, h2, h3, h4
; CHECK: fnmadd s1, s2, s3, s4 ; encoding: [0x41,0x10,0x23,0x1f]
; CHECK: fnmadd d1, d2, d3, d4 ; encoding: [0x41,0x10,0x63,0x1f]
+ fnmsub h1, h2, h3, h4
fnmsub s1, s2, s3, s4
fnmsub d1, d2, d3, d4
+; FP16: fnmsub h1, h2, h3, h4 ; encoding: [0x41,0x90,0xe3,0x1f]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fnmsub h1, h2, h3, h4
; CHECK: fnmsub s1, s2, s3, s4 ; encoding: [0x41,0x90,0x23,0x1f]
; CHECK: fnmsub d1, d2, d3, d4 ; encoding: [0x41,0x90,0x63,0x1f]
+ fnmul h1, h2, h3
fnmul s1, s2, s3
fnmul d1, d2, d3
+; FP16: fnmul h1, h2, h3 ; encoding: [0x41,0x88,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fnmul h1, h2, h3
; CHECK: fnmul s1, s2, s3 ; encoding: [0x41,0x88,0x23,0x1e]
; CHECK: fnmul d1, d2, d3 ; encoding: [0x41,0x88,0x63,0x1e]
+ fsqrt h1, h2
fsqrt s1, s2
fsqrt d1, d2
+; FP16: fsqrt h1, h2 ; encoding: [0x41,0xc0,0xe1,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fsqrt h1, h2
; CHECK: fsqrt s1, s2 ; encoding: [0x41,0xc0,0x21,0x1e]
; CHECK: fsqrt d1, d2 ; encoding: [0x41,0xc0,0x61,0x1e]
+ fsub h1, h2, h3
fsub s1, s2, s3
fsub d1, d2, d3
+; FP16: fsub h1, h2, h3 ; encoding: [0x41,0x38,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fsub h1, h2, h3
; CHECK: fsub s1, s2, s3 ; encoding: [0x41,0x38,0x23,0x1e]
; CHECK: fsub d1, d2, d3 ; encoding: [0x41,0x38,0x63,0x1e]
@@ -101,31 +167,55 @@ foo:
; Floating-point comparison
;-----------------------------------------------------------------------------
+ fccmp h1, h2, #0, eq
fccmp s1, s2, #0, eq
fccmp d1, d2, #0, eq
+ fccmpe h1, h2, #0, eq
fccmpe s1, s2, #0, eq
fccmpe d1, d2, #0, eq
+; FP16: fccmp h1, h2, #0, eq ; encoding: [0x20,0x04,0xe2,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fccmp h1, h2, #0, eq
; CHECK: fccmp s1, s2, #0, eq ; encoding: [0x20,0x04,0x22,0x1e]
; CHECK: fccmp d1, d2, #0, eq ; encoding: [0x20,0x04,0x62,0x1e]
+; FP16: fccmpe h1, h2, #0, eq ; encoding: [0x30,0x04,0xe2,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fccmpe h1, h2, #0, eq
; CHECK: fccmpe s1, s2, #0, eq ; encoding: [0x30,0x04,0x22,0x1e]
; CHECK: fccmpe d1, d2, #0, eq ; encoding: [0x30,0x04,0x62,0x1e]
+ fcmp h1, h2
fcmp s1, s2
fcmp d1, d2
+ fcmp h1, #0.0
fcmp s1, #0.0
fcmp d1, #0.0
+ fcmpe h1, h2
fcmpe s1, s2
fcmpe d1, d2
+ fcmpe h1, #0.0
fcmpe s1, #0.0
fcmpe d1, #0.0
+; FP16: fcmp h1, h2 ; encoding: [0x20,0x20,0xe2,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcmp h1, h2
; CHECK: fcmp s1, s2 ; encoding: [0x20,0x20,0x22,0x1e]
; CHECK: fcmp d1, d2 ; encoding: [0x20,0x20,0x62,0x1e]
+; FP16: fcmp h1, #0.0 ; encoding: [0x28,0x20,0xe0,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcmp h1, #0.0
; CHECK: fcmp s1, #0.0 ; encoding: [0x28,0x20,0x20,0x1e]
; CHECK: fcmp d1, #0.0 ; encoding: [0x28,0x20,0x60,0x1e]
+; FP16: fcmpe h1, h2 ; encoding: [0x30,0x20,0xe2,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcmpe h1, h2
; CHECK: fcmpe s1, s2 ; encoding: [0x30,0x20,0x22,0x1e]
; CHECK: fcmpe d1, d2 ; encoding: [0x30,0x20,0x62,0x1e]
+; FP16: fcmpe h1, #0.0 ; encoding: [0x38,0x20,0xe0,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcmpe h1, #0.0
; CHECK: fcmpe s1, #0.0 ; encoding: [0x38,0x20,0x20,0x1e]
; CHECK: fcmpe d1, #0.0 ; encoding: [0x38,0x20,0x60,0x1e]
@@ -133,9 +223,13 @@ foo:
; Floating-point conditional select
;-----------------------------------------------------------------------------
+ fcsel h1, h2, h3, eq
fcsel s1, s2, s3, eq
fcsel d1, d2, d3, eq
+; FP16: fcsel h1, h2, h3, eq ; encoding: [0x41,0x0c,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcsel h1, h2, h3, eq
; CHECK: fcsel s1, s2, s3, eq ; encoding: [0x41,0x0c,0x23,0x1e]
; CHECK: fcsel d1, d2, d3, eq ; encoding: [0x41,0x0c,0x63,0x1e]
@@ -161,168 +255,314 @@ foo:
fcvtas x1, d2
fcvtas w1, s2
fcvtas x1, s2
-
-; CHECK: fcvtas w1, d2 ; encoding: [0x41,0x00,0x64,0x1e]
-; CHECK: fcvtas x1, d2 ; encoding: [0x41,0x00,0x64,0x9e]
-; CHECK: fcvtas w1, s2 ; encoding: [0x41,0x00,0x24,0x1e]
-; CHECK: fcvtas x1, s2 ; encoding: [0x41,0x00,0x24,0x9e]
-
+ fcvtas w1, h2
+ fcvtas x1, h2
+
+; CHECK: fcvtas w1, d2 ; encoding: [0x41,0x00,0x64,0x1e]
+; CHECK: fcvtas x1, d2 ; encoding: [0x41,0x00,0x64,0x9e]
+; CHECK: fcvtas w1, s2 ; encoding: [0x41,0x00,0x24,0x1e]
+; CHECK: fcvtas x1, s2 ; encoding: [0x41,0x00,0x24,0x9e]
+; FP16: fcvtas w1, h2 ; encoding: [0x41,0x00,0xe4,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtas w1, h2
+; FP16: fcvtas x1, h2 ; encoding: [0x41,0x00,0xe4,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtas x1, h2
+
+ fcvtau w1, h2
fcvtau w1, s2
fcvtau w1, d2
+ fcvtau x1, h2
fcvtau x1, s2
fcvtau x1, d2
-; CHECK: fcvtau w1, s2 ; encoding: [0x41,0x00,0x25,0x1e]
-; CHECK: fcvtau w1, d2 ; encoding: [0x41,0x00,0x65,0x1e]
-; CHECK: fcvtau x1, s2 ; encoding: [0x41,0x00,0x25,0x9e]
-; CHECK: fcvtau x1, d2 ; encoding: [0x41,0x00,0x65,0x9e]
-
+; FP16: fcvtau w1, h2 ; encoding: [0x41,0x00,0xe5,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtau w1, h2
+; CHECK: fcvtau w1, s2 ; encoding: [0x41,0x00,0x25,0x1e]
+; CHECK: fcvtau w1, d2 ; encoding: [0x41,0x00,0x65,0x1e]
+; FP16: fcvtau x1, h2 ; encoding: [0x41,0x00,0xe5,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtau x1, h2
+; CHECK: fcvtau x1, s2 ; encoding: [0x41,0x00,0x25,0x9e]
+; CHECK: fcvtau x1, d2 ; encoding: [0x41,0x00,0x65,0x9e]
+
+ fcvtms w1, h2
fcvtms w1, s2
fcvtms w1, d2
+ fcvtms x1, h2
fcvtms x1, s2
fcvtms x1, d2
-; CHECK: fcvtms w1, s2 ; encoding: [0x41,0x00,0x30,0x1e]
-; CHECK: fcvtms w1, d2 ; encoding: [0x41,0x00,0x70,0x1e]
-; CHECK: fcvtms x1, s2 ; encoding: [0x41,0x00,0x30,0x9e]
-; CHECK: fcvtms x1, d2 ; encoding: [0x41,0x00,0x70,0x9e]
-
+; FP16: fcvtms w1, h2 ; encoding: [0x41,0x00,0xf0,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtms w1, h2
+; CHECK: fcvtms w1, s2 ; encoding: [0x41,0x00,0x30,0x1e]
+; CHECK: fcvtms w1, d2 ; encoding: [0x41,0x00,0x70,0x1e]
+; FP16: fcvtms x1, h2 ; encoding: [0x41,0x00,0xf0,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtms x1, h2
+; CHECK: fcvtms x1, s2 ; encoding: [0x41,0x00,0x30,0x9e]
+; CHECK: fcvtms x1, d2 ; encoding: [0x41,0x00,0x70,0x9e]
+
+ fcvtmu w1, h2
fcvtmu w1, s2
fcvtmu w1, d2
+ fcvtmu x1, h2
fcvtmu x1, s2
fcvtmu x1, d2
-; CHECK: fcvtmu w1, s2 ; encoding: [0x41,0x00,0x31,0x1e]
-; CHECK: fcvtmu w1, d2 ; encoding: [0x41,0x00,0x71,0x1e]
-; CHECK: fcvtmu x1, s2 ; encoding: [0x41,0x00,0x31,0x9e]
-; CHECK: fcvtmu x1, d2 ; encoding: [0x41,0x00,0x71,0x9e]
-
+; FP16: fcvtmu w1, h2 ; encoding: [0x41,0x00,0xf1,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtmu w1, h2
+; CHECK: fcvtmu w1, s2 ; encoding: [0x41,0x00,0x31,0x1e]
+; CHECK: fcvtmu w1, d2 ; encoding: [0x41,0x00,0x71,0x1e]
+; FP16: fcvtmu x1, h2 ; encoding: [0x41,0x00,0xf1,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtmu x1, h2
+; CHECK: fcvtmu x1, s2 ; encoding: [0x41,0x00,0x31,0x9e]
+; CHECK: fcvtmu x1, d2 ; encoding: [0x41,0x00,0x71,0x9e]
+
+ fcvtns w1, h2
fcvtns w1, s2
fcvtns w1, d2
+ fcvtns x1, h2
fcvtns x1, s2
fcvtns x1, d2
-; CHECK: fcvtns w1, s2 ; encoding: [0x41,0x00,0x20,0x1e]
-; CHECK: fcvtns w1, d2 ; encoding: [0x41,0x00,0x60,0x1e]
-; CHECK: fcvtns x1, s2 ; encoding: [0x41,0x00,0x20,0x9e]
-; CHECK: fcvtns x1, d2 ; encoding: [0x41,0x00,0x60,0x9e]
-
+; FP16: fcvtns w1, h2 ; encoding: [0x41,0x00,0xe0,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtns w1, h2
+; CHECK: fcvtns w1, s2 ; encoding: [0x41,0x00,0x20,0x1e]
+; CHECK: fcvtns w1, d2 ; encoding: [0x41,0x00,0x60,0x1e]
+; FP16: fcvtns x1, h2 ; encoding: [0x41,0x00,0xe0,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtns x1, h2
+; CHECK: fcvtns x1, s2 ; encoding: [0x41,0x00,0x20,0x9e]
+; CHECK: fcvtns x1, d2 ; encoding: [0x41,0x00,0x60,0x9e]
+
+ fcvtnu w1, h2
fcvtnu w1, s2
fcvtnu w1, d2
+ fcvtnu x1, h2
fcvtnu x1, s2
fcvtnu x1, d2
-; CHECK: fcvtnu w1, s2 ; encoding: [0x41,0x00,0x21,0x1e]
-; CHECK: fcvtnu w1, d2 ; encoding: [0x41,0x00,0x61,0x1e]
-; CHECK: fcvtnu x1, s2 ; encoding: [0x41,0x00,0x21,0x9e]
-; CHECK: fcvtnu x1, d2 ; encoding: [0x41,0x00,0x61,0x9e]
-
+; FP16: fcvtnu w1, h2 ; encoding: [0x41,0x00,0xe1,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtnu w1, h2
+; CHECK: fcvtnu w1, s2 ; encoding: [0x41,0x00,0x21,0x1e]
+; CHECK: fcvtnu w1, d2 ; encoding: [0x41,0x00,0x61,0x1e]
+; FP16: fcvtnu x1, h2 ; encoding: [0x41,0x00,0xe1,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtnu x1, h2
+; CHECK: fcvtnu x1, s2 ; encoding: [0x41,0x00,0x21,0x9e]
+; CHECK: fcvtnu x1, d2 ; encoding: [0x41,0x00,0x61,0x9e]
+
+ fcvtps w1, h2
fcvtps w1, s2
fcvtps w1, d2
+ fcvtps x1, h2
fcvtps x1, s2
fcvtps x1, d2
-; CHECK: fcvtps w1, s2 ; encoding: [0x41,0x00,0x28,0x1e]
-; CHECK: fcvtps w1, d2 ; encoding: [0x41,0x00,0x68,0x1e]
-; CHECK: fcvtps x1, s2 ; encoding: [0x41,0x00,0x28,0x9e]
-; CHECK: fcvtps x1, d2 ; encoding: [0x41,0x00,0x68,0x9e]
-
+; FP16: fcvtps w1, h2 ; encoding: [0x41,0x00,0xe8,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtps w1, h2
+; CHECK: fcvtps w1, s2 ; encoding: [0x41,0x00,0x28,0x1e]
+; CHECK: fcvtps w1, d2 ; encoding: [0x41,0x00,0x68,0x1e]
+; FP16: fcvtps x1, h2 ; encoding: [0x41,0x00,0xe8,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtps x1, h2
+; CHECK: fcvtps x1, s2 ; encoding: [0x41,0x00,0x28,0x9e]
+; CHECK: fcvtps x1, d2 ; encoding: [0x41,0x00,0x68,0x9e]
+
+ fcvtpu w1, h2
fcvtpu w1, s2
fcvtpu w1, d2
+ fcvtpu x1, h2
fcvtpu x1, s2
fcvtpu x1, d2
-; CHECK: fcvtpu w1, s2 ; encoding: [0x41,0x00,0x29,0x1e]
-; CHECK: fcvtpu w1, d2 ; encoding: [0x41,0x00,0x69,0x1e]
-; CHECK: fcvtpu x1, s2 ; encoding: [0x41,0x00,0x29,0x9e]
-; CHECK: fcvtpu x1, d2 ; encoding: [0x41,0x00,0x69,0x9e]
-
+; FP16: fcvtpu w1, h2 ; encoding: [0x41,0x00,0xe9,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtpu w1, h2
+; CHECK: fcvtpu w1, s2 ; encoding: [0x41,0x00,0x29,0x1e]
+; CHECK: fcvtpu w1, d2 ; encoding: [0x41,0x00,0x69,0x1e]
+; FP16: fcvtpu x1, h2 ; encoding: [0x41,0x00,0xe9,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtpu x1, h2
+; CHECK: fcvtpu x1, s2 ; encoding: [0x41,0x00,0x29,0x9e]
+; CHECK: fcvtpu x1, d2 ; encoding: [0x41,0x00,0x69,0x9e]
+
+ fcvtzs w1, h2
+ fcvtzs w1, h2, #1
fcvtzs w1, s2
fcvtzs w1, s2, #1
fcvtzs w1, d2
fcvtzs w1, d2, #1
+ fcvtzs x1, h2
+ fcvtzs x1, h2, #1
fcvtzs x1, s2
fcvtzs x1, s2, #1
fcvtzs x1, d2
fcvtzs x1, d2, #1
-; CHECK: fcvtzs w1, s2 ; encoding: [0x41,0x00,0x38,0x1e]
-; CHECK: fcvtzs w1, s2, #1 ; encoding: [0x41,0xfc,0x18,0x1e]
-; CHECK: fcvtzs w1, d2 ; encoding: [0x41,0x00,0x78,0x1e]
-; CHECK: fcvtzs w1, d2, #1 ; encoding: [0x41,0xfc,0x58,0x1e]
-; CHECK: fcvtzs x1, s2 ; encoding: [0x41,0x00,0x38,0x9e]
-; CHECK: fcvtzs x1, s2, #1 ; encoding: [0x41,0xfc,0x18,0x9e]
-; CHECK: fcvtzs x1, d2 ; encoding: [0x41,0x00,0x78,0x9e]
-; CHECK: fcvtzs x1, d2, #1 ; encoding: [0x41,0xfc,0x58,0x9e]
-
+; FP16: fcvtzs w1, h2 ; encoding: [0x41,0x00,0xf8,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzs w1, h2
+; FP16: fcvtzs w1, h2, #1 ; encoding: [0x41,0xfc,0xd8,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzs w1, h2, #1
+; CHECK: fcvtzs w1, s2 ; encoding: [0x41,0x00,0x38,0x1e]
+; CHECK: fcvtzs w1, s2, #1 ; encoding: [0x41,0xfc,0x18,0x1e]
+; CHECK: fcvtzs w1, d2 ; encoding: [0x41,0x00,0x78,0x1e]
+; CHECK: fcvtzs w1, d2, #1 ; encoding: [0x41,0xfc,0x58,0x1e]
+; FP16: fcvtzs x1, h2 ; encoding: [0x41,0x00,0xf8,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzs x1, h2
+; FP16: fcvtzs x1, h2, #1 ; encoding: [0x41,0xfc,0xd8,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzs x1, h2, #1
+; CHECK: fcvtzs x1, s2 ; encoding: [0x41,0x00,0x38,0x9e]
+; CHECK: fcvtzs x1, s2, #1 ; encoding: [0x41,0xfc,0x18,0x9e]
+; CHECK: fcvtzs x1, d2 ; encoding: [0x41,0x00,0x78,0x9e]
+; CHECK: fcvtzs x1, d2, #1 ; encoding: [0x41,0xfc,0x58,0x9e]
+
+ fcvtzu w1, h2
+ fcvtzu w1, h2, #1
fcvtzu w1, s2
fcvtzu w1, s2, #1
fcvtzu w1, d2
fcvtzu w1, d2, #1
+ fcvtzu x1, h2
+ fcvtzu x1, h2, #1
fcvtzu x1, s2
fcvtzu x1, s2, #1
fcvtzu x1, d2
fcvtzu x1, d2, #1
-; CHECK: fcvtzu w1, s2 ; encoding: [0x41,0x00,0x39,0x1e]
-; CHECK: fcvtzu w1, s2, #1 ; encoding: [0x41,0xfc,0x19,0x1e]
-; CHECK: fcvtzu w1, d2 ; encoding: [0x41,0x00,0x79,0x1e]
-; CHECK: fcvtzu w1, d2, #1 ; encoding: [0x41,0xfc,0x59,0x1e]
-; CHECK: fcvtzu x1, s2 ; encoding: [0x41,0x00,0x39,0x9e]
-; CHECK: fcvtzu x1, s2, #1 ; encoding: [0x41,0xfc,0x19,0x9e]
-; CHECK: fcvtzu x1, d2 ; encoding: [0x41,0x00,0x79,0x9e]
-; CHECK: fcvtzu x1, d2, #1 ; encoding: [0x41,0xfc,0x59,0x9e]
-
+; FP16: fcvtzu w1, h2 ; encoding: [0x41,0x00,0xf9,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzu w1, h2
+; FP16: fcvtzu w1, h2, #1 ; encoding: [0x41,0xfc,0xd9,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzu w1, h2, #1
+; CHECK: fcvtzu w1, s2 ; encoding: [0x41,0x00,0x39,0x1e]
+; CHECK: fcvtzu w1, s2, #1 ; encoding: [0x41,0xfc,0x19,0x1e]
+; CHECK: fcvtzu w1, d2 ; encoding: [0x41,0x00,0x79,0x1e]
+; CHECK: fcvtzu w1, d2, #1 ; encoding: [0x41,0xfc,0x59,0x1e]
+; FP16: fcvtzu x1, h2 ; encoding: [0x41,0x00,0xf9,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzu x1, h2
+; FP16: fcvtzu x1, h2, #1 ; encoding: [0x41,0xfc,0xd9,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzu x1, h2, #1
+; CHECK: fcvtzu x1, s2 ; encoding: [0x41,0x00,0x39,0x9e]
+; CHECK: fcvtzu x1, s2, #1 ; encoding: [0x41,0xfc,0x19,0x9e]
+; CHECK: fcvtzu x1, d2 ; encoding: [0x41,0x00,0x79,0x9e]
+; CHECK: fcvtzu x1, d2, #1 ; encoding: [0x41,0xfc,0x59,0x9e]
+
+ scvtf h1, w2
+ scvtf h1, w2, #1
scvtf s1, w2
scvtf s1, w2, #1
scvtf d1, w2
scvtf d1, w2, #1
+ scvtf h1, x2
+ scvtf h1, x2, #1
scvtf s1, x2
scvtf s1, x2, #1
scvtf d1, x2
scvtf d1, x2, #1
-; CHECK: scvtf s1, w2 ; encoding: [0x41,0x00,0x22,0x1e]
-; CHECK: scvtf s1, w2, #1 ; encoding: [0x41,0xfc,0x02,0x1e]
-; CHECK: scvtf d1, w2 ; encoding: [0x41,0x00,0x62,0x1e]
-; CHECK: scvtf d1, w2, #1 ; encoding: [0x41,0xfc,0x42,0x1e]
-; CHECK: scvtf s1, x2 ; encoding: [0x41,0x00,0x22,0x9e]
-; CHECK: scvtf s1, x2, #1 ; encoding: [0x41,0xfc,0x02,0x9e]
-; CHECK: scvtf d1, x2 ; encoding: [0x41,0x00,0x62,0x9e]
-; CHECK: scvtf d1, x2, #1 ; encoding: [0x41,0xfc,0x42,0x9e]
-
+; FP16: scvtf h1, w2 ; encoding: [0x41,0x00,0xe2,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: scvtf h1, w2
+; FP16: scvtf h1, w2, #1 ; encoding: [0x41,0xfc,0xc2,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: scvtf h1, w2, #1
+; CHECK: scvtf s1, w2 ; encoding: [0x41,0x00,0x22,0x1e]
+; CHECK: scvtf s1, w2, #1 ; encoding: [0x41,0xfc,0x02,0x1e]
+; CHECK: scvtf d1, w2 ; encoding: [0x41,0x00,0x62,0x1e]
+; CHECK: scvtf d1, w2, #1 ; encoding: [0x41,0xfc,0x42,0x1e]
+; FP16: scvtf h1, x2 ; encoding: [0x41,0x00,0xe2,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: scvtf h1, x2
+; FP16: scvtf h1, x2, #1 ; encoding: [0x41,0xfc,0xc2,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: scvtf h1, x2, #1
+; CHECK: scvtf s1, x2 ; encoding: [0x41,0x00,0x22,0x9e]
+; CHECK: scvtf s1, x2, #1 ; encoding: [0x41,0xfc,0x02,0x9e]
+; CHECK: scvtf d1, x2 ; encoding: [0x41,0x00,0x62,0x9e]
+; CHECK: scvtf d1, x2, #1 ; encoding: [0x41,0xfc,0x42,0x9e]
+
+ ucvtf h1, w2
+ ucvtf h1, w2, #1
ucvtf s1, w2
ucvtf s1, w2, #1
ucvtf d1, w2
ucvtf d1, w2, #1
+ ucvtf h1, x2
+ ucvtf h1, x2, #1
ucvtf s1, x2
ucvtf s1, x2, #1
ucvtf d1, x2
ucvtf d1, x2, #1
-; CHECK: ucvtf s1, w2 ; encoding: [0x41,0x00,0x23,0x1e]
-; CHECK: ucvtf s1, w2, #1 ; encoding: [0x41,0xfc,0x03,0x1e]
-; CHECK: ucvtf d1, w2 ; encoding: [0x41,0x00,0x63,0x1e]
-; CHECK: ucvtf d1, w2, #1 ; encoding: [0x41,0xfc,0x43,0x1e]
-; CHECK: ucvtf s1, x2 ; encoding: [0x41,0x00,0x23,0x9e]
-; CHECK: ucvtf s1, x2, #1 ; encoding: [0x41,0xfc,0x03,0x9e]
-; CHECK: ucvtf d1, x2 ; encoding: [0x41,0x00,0x63,0x9e]
-; CHECK: ucvtf d1, x2, #1 ; encoding: [0x41,0xfc,0x43,0x9e]
+; FP16: ucvtf h1, w2 ; encoding: [0x41,0x00,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: ucvtf h1, w2
+; FP16: ucvtf h1, w2, #1 ; encoding: [0x41,0xfc,0xc3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: ucvtf h1, w2, #1
+; CHECK: ucvtf s1, w2 ; encoding: [0x41,0x00,0x23,0x1e]
+; CHECK: ucvtf s1, w2, #1 ; encoding: [0x41,0xfc,0x03,0x1e]
+; CHECK: ucvtf d1, w2 ; encoding: [0x41,0x00,0x63,0x1e]
+; CHECK: ucvtf d1, w2, #1 ; encoding: [0x41,0xfc,0x43,0x1e]
+; FP16: ucvtf h1, x2 ; encoding: [0x41,0x00,0xe3,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: ucvtf h1, x2
+; FP16: ucvtf h1, x2, #1 ; encoding: [0x41,0xfc,0xc3,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: ucvtf h1, x2, #1
+; CHECK: ucvtf s1, x2 ; encoding: [0x41,0x00,0x23,0x9e]
+; CHECK: ucvtf s1, x2, #1 ; encoding: [0x41,0xfc,0x03,0x9e]
+; CHECK: ucvtf d1, x2 ; encoding: [0x41,0x00,0x63,0x9e]
+; CHECK: ucvtf d1, x2, #1 ; encoding: [0x41,0xfc,0x43,0x9e]
;-----------------------------------------------------------------------------
; Floating-point move
;-----------------------------------------------------------------------------
+ fmov h1, w2
+ fmov w1, h2
+ fmov h1, x2
+ fmov x1, h2
fmov s1, w2
fmov w1, s2
fmov d1, x2
fmov x1, d2
+; FP16: fmov h1, w2 ; encoding: [0x41,0x00,0xe7,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov h1, w2
+; FP16: fmov w1, h2 ; encoding: [0x41,0x00,0xe6,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov w1, h2
+; FP16: fmov h1, x2 ; encoding: [0x41,0x00,0xe7,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov h1, x2
+; FP16: fmov x1, h2 ; encoding: [0x41,0x00,0xe6,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov x1, h2
; CHECK: fmov s1, w2 ; encoding: [0x41,0x00,0x27,0x1e]
; CHECK: fmov w1, s2 ; encoding: [0x41,0x00,0x26,0x1e]
; CHECK: fmov d1, x2 ; encoding: [0x41,0x00,0x67,0x9e]
; CHECK: fmov x1, d2 ; encoding: [0x41,0x00,0x66,0x9e]
+ fmov h1, #0.125
+ fmov h1, #0x40
fmov s1, #0.125
fmov s1, #0x40
fmov d1, #0.125
@@ -330,9 +570,16 @@ foo:
fmov d1, #-4.843750e-01
fmov d1, #4.843750e-01
fmov d3, #3
+ fmov h2, #0.0
fmov s2, #0.0
fmov d2, #0.0
+; FP16: fmov h1, #0.12500000 ; encoding: [0x01,0x10,0xe8,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov h1, #0.125
+; FP16: fmov h1, #0.12500000 ; encoding: [0x01,0x10,0xe8,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov h1, #0x40
; CHECK: fmov s1, #0.12500000 ; encoding: [0x01,0x10,0x28,0x1e]
; CHECK: fmov s1, #0.12500000 ; encoding: [0x01,0x10,0x28,0x1e]
; CHECK: fmov d1, #0.12500000 ; encoding: [0x01,0x10,0x68,0x1e]
@@ -340,12 +587,19 @@ foo:
; CHECK: fmov d1, #-0.48437500 ; encoding: [0x01,0xf0,0x7b,0x1e]
; CHECK: fmov d1, #0.48437500 ; encoding: [0x01,0xf0,0x6b,0x1e]
; CHECK: fmov d3, #3.00000000 ; encoding: [0x03,0x10,0x61,0x1e]
+; FP16: fmov h2, wzr ; encoding: [0xe2,0x03,0xe7,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov h2, #0.0
; CHECK: fmov s2, wzr ; encoding: [0xe2,0x03,0x27,0x1e]
; CHECK: fmov d2, xzr ; encoding: [0xe2,0x03,0x67,0x9e]
+ fmov h1, h2
fmov s1, s2
fmov d1, d2
+; FP16: fmov h1, h2 ; encoding: [0x41,0x40,0xe0,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov h1, h2
; CHECK: fmov s1, s2 ; encoding: [0x41,0x40,0x20,0x1e]
; CHECK: fmov d1, d2 ; encoding: [0x41,0x40,0x60,0x1e]
@@ -355,63 +609,91 @@ foo:
fmov v1.d[1], x1
fmov.d v8[1], x6
-; CHECK: fmov.d x2, v5[1] ; encoding: [0xa2,0x00,0xae,0x9e]
-; CHECK: fmov.d x9, v7[1] ; encoding: [0xe9,0x00,0xae,0x9e]
-; CHECK: fmov.d v1[1], x1 ; encoding: [0x21,0x00,0xaf,0x9e]
-; CHECK: fmov.d v8[1], x6 ; encoding: [0xc8,0x00,0xaf,0x9e]
+; CHECK: fmov.d x2, v5[1] ; encoding: [0xa2,0x00,0xae,0x9e]
+; CHECK: fmov.d x9, v7[1] ; encoding: [0xe9,0x00,0xae,0x9e]
+; CHECK: fmov.d v1[1], x1 ; encoding: [0x21,0x00,0xaf,0x9e]
+; CHECK: fmov.d v8[1], x6 ; encoding: [0xc8,0x00,0xaf,0x9e]
;-----------------------------------------------------------------------------
; Floating-point round to integral
;-----------------------------------------------------------------------------
+ frinta h1, h2
frinta s1, s2
frinta d1, d2
+; FP16: frinta h1, h2 ; encoding: [0x41,0x40,0xe6,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frinta h1, h2
; CHECK: frinta s1, s2 ; encoding: [0x41,0x40,0x26,0x1e]
; CHECK: frinta d1, d2 ; encoding: [0x41,0x40,0x66,0x1e]
+ frinti h1, h2
frinti s1, s2
frinti d1, d2
+; FP16: frinti h1, h2 ; encoding: [0x41,0xc0,0xe7,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frinti h1, h2
; CHECK: frinti s1, s2 ; encoding: [0x41,0xc0,0x27,0x1e]
; CHECK: frinti d1, d2 ; encoding: [0x41,0xc0,0x67,0x1e]
+ frintm h1, h2
frintm s1, s2
frintm d1, d2
+; FP16: frintm h1, h2 ; encoding: [0x41,0x40,0xe5,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frintm h1, h2
; CHECK: frintm s1, s2 ; encoding: [0x41,0x40,0x25,0x1e]
; CHECK: frintm d1, d2 ; encoding: [0x41,0x40,0x65,0x1e]
+ frintn h1, h2
frintn s1, s2
frintn d1, d2
+; FP16: frintn h1, h2 ; encoding: [0x41,0x40,0xe4,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frintn h1, h2
; CHECK: frintn s1, s2 ; encoding: [0x41,0x40,0x24,0x1e]
; CHECK: frintn d1, d2 ; encoding: [0x41,0x40,0x64,0x1e]
+ frintp h1, h2
frintp s1, s2
frintp d1, d2
+; FP16: frintp h1, h2 ; encoding: [0x41,0xc0,0xe4,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frintp h1, h2
; CHECK: frintp s1, s2 ; encoding: [0x41,0xc0,0x24,0x1e]
; CHECK: frintp d1, d2 ; encoding: [0x41,0xc0,0x64,0x1e]
+ frintx h1, h2
frintx s1, s2
frintx d1, d2
+; FP16: frintx h1, h2 ; encoding: [0x41,0x40,0xe7,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frintx h1, h2
; CHECK: frintx s1, s2 ; encoding: [0x41,0x40,0x27,0x1e]
; CHECK: frintx d1, d2 ; encoding: [0x41,0x40,0x67,0x1e]
+ frintz h1, h2
frintz s1, s2
frintz d1, d2
+; FP16: frintz h1, h2 ; encoding: [0x41,0xc0,0xe5,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frintz h1, h2
; CHECK: frintz s1, s2 ; encoding: [0x41,0xc0,0x25,0x1e]
; CHECK: frintz d1, d2 ; encoding: [0x41,0xc0,0x65,0x1e]
cmhs d0, d0, d0
cmtst d0, d0, d0
-; CHECK: cmhs d0, d0, d0 ; encoding: [0x00,0x3c,0xe0,0x7e]
-; CHECK: cmtst d0, d0, d0 ; encoding: [0x00,0x8c,0xe0,0x5e]
+; CHECK: cmhs d0, d0, d0 ; encoding: [0x00,0x3c,0xe0,0x7e]
+; CHECK: cmtst d0, d0, d0 ; encoding: [0x00,0x8c,0xe0,0x5e]
diff --git a/test/MC/AArch64/arm64-leaf-compact-unwind.s b/test/MC/AArch64/arm64-leaf-compact-unwind.s
index a0703f6360db..2ff7fe82e9be 100644
--- a/test/MC/AArch64/arm64-leaf-compact-unwind.s
+++ b/test/MC/AArch64/arm64-leaf-compact-unwind.s
@@ -22,6 +22,7 @@
// CHECK-NEXT: ]
// CHECK-NEXT: Reserved1:
// CHECK-NEXT: Reserved2:
+// CHECK-NEXT: Reserved3:
// CHECK-NEXT: Relocations [
// CHECK-NEXT: Relocation {
// CHECK-NEXT: Offset: 0x60
diff --git a/test/MC/AArch64/arm64-small-data-fixups.s b/test/MC/AArch64/arm64-small-data-fixups.s
index 3fe7c75c011d..6debe0b8fb04 100644
--- a/test/MC/AArch64/arm64-small-data-fixups.s
+++ b/test/MC/AArch64/arm64-small-data-fixups.s
@@ -1,4 +1,4 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o - %s | macho-dump | FileCheck %s
+; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o - %s | llvm-readobj -r | FileCheck %s
foo:
.long 0
@@ -9,16 +9,15 @@ baz:
.byte foo - bar
.short foo - bar
-; CHECK: # Relocation 0
-; CHECK: (('word-0', 0x9),
-; CHECK: ('word-1', 0x1a000002)),
-; CHECK: # Relocation 1
-; CHECK: (('word-0', 0x9),
-; CHECK: ('word-1', 0xa000001)),
-; CHECK: # Relocation 2
-; CHECK: (('word-0', 0x8),
-; CHECK: ('word-1', 0x18000002)),
-; CHECK: # Relocation 3
-; CHECK: (('word-0', 0x8),
-; CHECK: ('word-1', 0x8000001)),
-
+; CHECK: File: <stdin>
+; CHECK: Format: Mach-O arm64
+; CHECK: Arch: aarch64
+; CHECK: AddressSize: 64bit
+; CHECK: Relocations [
+; CHECK: Section __text {
+; CHECK: 0x9 0 1 1 ARM64_RELOC_SUBTRACTOR 0 bar
+; CHECK: 0x9 0 1 1 ARM64_RELOC_UNSIGNED 0 foo
+; CHECK: 0x8 0 0 1 ARM64_RELOC_SUBTRACTOR 0 bar
+; CHECK: 0x8 0 0 1 ARM64_RELOC_UNSIGNED 0 foo
+; CHECK: }
+; CHECK: ]
diff --git a/test/MC/AArch64/armv8.1a-pan.s b/test/MC/AArch64/armv8.1a-pan.s
index 2068c81d939f..c283cb818e25 100644
--- a/test/MC/AArch64/armv8.1a-pan.s
+++ b/test/MC/AArch64/armv8.1a-pan.s
@@ -13,16 +13,16 @@
// CHECK: mrs x13, PAN // encoding: [0x6d,0x42,0x38,0xd5]
msr pan, #-1
- msr pan, #20
+ msr pan, #2
msr pan, w0
mrs w0, pan
-// CHECK-ERROR: error: immediate must be an integer in range [0, 15].
+// CHECK-ERROR: error: immediate must be an integer in range [0, 1].
// CHECK-ERROR: msr pan, #-1
// CHECK-ERROR: ^
-// CHECK-ERROR: error: immediate must be an integer in range [0, 15].
-// CHECK-ERROR: msr pan, #20
+// CHECK-ERROR: error: immediate must be an integer in range [0, 1].
+// CHECK-ERROR: msr pan, #2
// CHECK-ERROR: ^
-// CHECK-ERROR: error: immediate must be an integer in range [0, 15].
+// CHECK-ERROR: error: immediate must be an integer in range [0, 1].
// CHECK-ERROR: msr pan, w0
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
diff --git a/test/MC/AArch64/armv8.1a-rdma.s b/test/MC/AArch64/armv8.1a-rdma.s
index 1de2a0fb15dd..36158428d6c4 100644
--- a/test/MC/AArch64/armv8.1a-rdma.s
+++ b/test/MC/AArch64/armv8.1a-rdma.s
@@ -26,27 +26,9 @@
sqrdmlsh v0.8s, v1.8s, v2.8s
sqrdmlah v0.2s, v1.4h, v2.8h
sqrdmlsh v0.4s, v1.8h, v2.2s
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h
-// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h
-// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h
-// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h
-// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h
-// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h
-// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h
// CHECK-ERROR: ^
diff --git a/test/MC/AArch64/armv8.2a-at.s b/test/MC/AArch64/armv8.2a-at.s
new file mode 100644
index 000000000000..741f6922a9e4
--- /dev/null
+++ b/test/MC/AArch64/armv8.2a-at.s
@@ -0,0 +1,9 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a < %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+ at s1e1rp, x1
+ at s1e1wp, x2
+// CHECK: at s1e1rp, x1 // encoding: [0x01,0x79,0x08,0xd5]
+// CHECK: at s1e1wp, x2 // encoding: [0x22,0x79,0x08,0xd5]
+// ERROR: error: AT S1E1RP requires ARMv8.2a
+// ERROR: error: AT S1E1WP requires ARMv8.2a
diff --git a/test/MC/AArch64/armv8.2a-mmfr2.s b/test/MC/AArch64/armv8.2a-mmfr2.s
new file mode 100644
index 000000000000..5a9b1f1f42a0
--- /dev/null
+++ b/test/MC/AArch64/armv8.2a-mmfr2.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a < %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+ mrs x3, id_aa64mmfr2_el1
+// CHECK: mrs x3, ID_AA64MMFR2_EL1 // encoding: [0x43,0x07,0x38,0xd5]
+// ERROR: error: expected readable system register
diff --git a/test/MC/AArch64/armv8.2a-persistent-memory.s b/test/MC/AArch64/armv8.2a-persistent-memory.s
new file mode 100644
index 000000000000..8a7600ee904d
--- /dev/null
+++ b/test/MC/AArch64/armv8.2a-persistent-memory.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a < %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+ dc cvap, x7
+// CHECK: dc cvap, x7 // encoding: [0x27,0x7c,0x0b,0xd5]
+// ERROR: error: DC CVAP requires ARMv8.2a
diff --git a/test/MC/AArch64/armv8.2a-statistical-profiling.s b/test/MC/AArch64/armv8.2a-statistical-profiling.s
new file mode 100644
index 000000000000..5cb109318786
--- /dev/null
+++ b/test/MC/AArch64/armv8.2a-statistical-profiling.s
@@ -0,0 +1,87 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+spe < %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s 2>&1 | FileCheck --check-prefix=NO_SPE %s
+
+ psb csync
+// CHECK: psb csync // encoding: [0x3f,0x22,0x03,0xd5]
+// NO_SPE: invalid operand for instruction
+
+ msr pmblimitr_el1, x0
+ msr pmbptr_el1, x0
+ msr pmbsr_el1, x0
+ msr pmbidr_el1, x0
+ msr pmscr_el2, x0
+ msr pmscr_el12, x0
+ msr pmscr_el1, x0
+ msr pmsicr_el1, x0
+ msr pmsirr_el1, x0
+ msr pmsfcr_el1, x0
+ msr pmsevfr_el1, x0
+ msr pmslatfr_el1, x0
+ msr pmsidr_el1, x0
+// CHECK: msr PMBLIMITR_EL1, x0 // encoding: [0x00,0x9a,0x18,0xd5]
+// CHECK: msr PMBPTR_EL1, x0 // encoding: [0x20,0x9a,0x18,0xd5]
+// CHECK: msr PMBSR_EL1, x0 // encoding: [0x60,0x9a,0x18,0xd5]
+// CHECK: msr PMBIDR_EL1, x0 // encoding: [0xe0,0x9a,0x18,0xd5]
+// CHECK: msr PMSCR_EL2, x0 // encoding: [0x00,0x99,0x1c,0xd5]
+// CHECK: msr PMSCR_EL12, x0 // encoding: [0x00,0x99,0x1d,0xd5]
+// CHECK: msr PMSCR_EL1, x0 // encoding: [0x00,0x99,0x18,0xd5]
+// CHECK: msr PMSICR_EL1, x0 // encoding: [0x40,0x99,0x18,0xd5]
+// CHECK: msr PMSIRR_EL1, x0 // encoding: [0x60,0x99,0x18,0xd5]
+// CHECK: msr PMSFCR_EL1, x0 // encoding: [0x80,0x99,0x18,0xd5]
+// CHECK: msr PMSEVFR_EL1, x0 // encoding: [0xa0,0x99,0x18,0xd5]
+// CHECK: msr PMSLATFR_EL1, x0 // encoding: [0xc0,0x99,0x18,0xd5]
+// CHECK: msr PMSIDR_EL1, x0 // encoding: [0xe0,0x99,0x18,0xd5]
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+
+mrs x0, pmblimitr_el1
+ mrs x0, pmbptr_el1
+ mrs x0, pmbsr_el1
+ mrs x0, pmbidr_el1
+ mrs x0, pmscr_el2
+ mrs x0, pmscr_el12
+ mrs x0, pmscr_el1
+ mrs x0, pmsicr_el1
+ mrs x0, pmsirr_el1
+ mrs x0, pmsfcr_el1
+ mrs x0, pmsevfr_el1
+ mrs x0, pmslatfr_el1
+ mrs x0, pmsidr_el1
+
+// CHECK: mrs x0, PMBLIMITR_EL1 // encoding: [0x00,0x9a,0x38,0xd5]
+// CHECK: mrs x0, PMBPTR_EL1 // encoding: [0x20,0x9a,0x38,0xd5]
+// CHECK: mrs x0, PMBSR_EL1 // encoding: [0x60,0x9a,0x38,0xd5]
+// CHECK: mrs x0, PMBIDR_EL1 // encoding: [0xe0,0x9a,0x38,0xd5]
+// CHECK: mrs x0, PMSCR_EL2 // encoding: [0x00,0x99,0x3c,0xd5]
+// CHECK: mrs x0, PMSCR_EL12 // encoding: [0x00,0x99,0x3d,0xd5]
+// CHECK: mrs x0, PMSCR_EL1 // encoding: [0x00,0x99,0x38,0xd5]
+// CHECK: mrs x0, PMSICR_EL1 // encoding: [0x40,0x99,0x38,0xd5]
+// CHECK: mrs x0, PMSIRR_EL1 // encoding: [0x60,0x99,0x38,0xd5]
+// CHECK: mrs x0, PMSFCR_EL1 // encoding: [0x80,0x99,0x38,0xd5]
+// CHECK: mrs x0, PMSEVFR_EL1 // encoding: [0xa0,0x99,0x38,0xd5]
+// CHECK: mrs x0, PMSLATFR_EL1 // encoding: [0xc0,0x99,0x38,0xd5]
+// CHECK: mrs x0, PMSIDR_EL1 // encoding: [0xe0,0x99,0x38,0xd5]
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
diff --git a/test/MC/AArch64/armv8.2a-uao.s b/test/MC/AArch64/armv8.2a-uao.s
new file mode 100644
index 000000000000..ec5e96261a82
--- /dev/null
+++ b/test/MC/AArch64/armv8.2a-uao.s
@@ -0,0 +1,17 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s 2> %t | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERROR %s < %t
+
+ msr uao, #0
+ msr uao, #1
+// CHECK: msr UAO, #0 // encoding: [0x7f,0x40,0x00,0xd5]
+// CHECK: msr UAO, #1 // encoding: [0x7f,0x41,0x00,0xd5]
+
+ msr uao, #2
+// CHECK-ERROR: error: immediate must be an integer in range [0, 1].
+// CHECK-ERROR: msr uao, #2
+// CHECK-ERROR: ^
+
+ msr uao, x1
+ mrs x2, uao
+// CHECK: msr UAO, x1 // encoding: [0x81,0x42,0x18,0xd5]
+// CHECK: mrs x2, UAO // encoding: [0x82,0x42,0x38,0xd5]
diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s
index 0c2bc689663c..b9c7b16cb06e 100644
--- a/test/MC/AArch64/basic-a64-diagnostics.s
+++ b/test/MC/AArch64/basic-a64-diagnostics.s
@@ -1603,7 +1603,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR-NEXT: fcsel q3, q20, q9, pl
// CHECK-ERROR-NEXT: ^
-// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT: error: instruction requires: fullfp16
// CHECK-ERROR-NEXT: fcsel h9, h10, h11, mi
// CHECK-ERROR-NEXT: ^
// CHECK-ERROR-NEXT: error: invalid operand for instruction
@@ -1652,7 +1652,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR-NEXT: fmadd b3, b4, b5, b6
// CHECK-ERROR-NEXT: ^
-// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT: error: instruction requires: fullfp16
// CHECK-ERROR-NEXT: fmsub h1, h2, h3, h4
// CHECK-ERROR-NEXT: ^
// CHECK-ERROR-NEXT: error: invalid operand for instruction
diff --git a/test/MC/AArch64/elf_osabi_flags.s b/test/MC/AArch64/elf_osabi_flags.s
index 68cb385fc991..42c56f043f2b 100644
--- a/test/MC/AArch64/elf_osabi_flags.s
+++ b/test/MC/AArch64/elf_osabi_flags.s
@@ -1,5 +1,6 @@
-# RUN: llvm-mc -filetype=obj -triple aarch64 %s -o -| llvm-readobj -h | FileCheck --check-prefix=AARCH64-OSABI %s
-# AARCH64-OSABI: OS/ABI: SystemV (0x0)
+# RUN: llvm-mc -filetype=obj -triple aarch64 %s -o -| llvm-readobj -h | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu %s -o -| llvm-readobj -h | FileCheck %s
+# CHECK: OS/ABI: SystemV (0x0)
-# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu %s -o -| llvm-readobj -h | FileCheck --check-prefix=AARCH64-LINUX-OSABI %s
-# AARCH64-LINUX-OSABI: OS/ABI: GNU/Linux (0x3)
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-freebsd %s -o -| llvm-readobj -h | FileCheck --check-prefix=AARCH64-FREEBSD-OSABI %s
+# AARCH64-FREEBSD-OSABI: OS/ABI: FreeBSD (0x9)
diff --git a/test/MC/AArch64/error-location-ldr-pseudo.s b/test/MC/AArch64/error-location-ldr-pseudo.s
new file mode 100644
index 000000000000..951373dda61d
--- /dev/null
+++ b/test/MC/AArch64/error-location-ldr-pseudo.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple aarch64--none-eabi -filetype obj < %s -o /dev/null 2>&1 | FileCheck %s
+
+ .text
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: expected relocatable expression
+ ldr x0, =(-undef)
diff --git a/test/MC/AArch64/error-location.s b/test/MC/AArch64/error-location.s
new file mode 100644
index 000000000000..02504368f004
--- /dev/null
+++ b/test/MC/AArch64/error-location.s
@@ -0,0 +1,49 @@
+// RUN: not llvm-mc -triple aarch64--none-eabi -filetype obj < %s -o /dev/null 2>&1 | FileCheck %s
+
+// Note: These errors are not always emitted in the order in which the relevant
+// source appears, this file is carefully ordered so that that is the case.
+
+ .text
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: symbol 'undef' can not be undefined in a subtraction expression
+ .word (0-undef)
+
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: expected relocatable expression
+ .word -undef
+
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: No relocation available to represent this relative expression
+ adr x0, #a-undef
+
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a difference across sections
+ .word x_a - y_a
+
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a subtraction with a weak symbol
+ .word a - w
+
+// CHECK: <unknown>:0: error: expression could not be evaluated
+ .set v1, -undef
+
+ .comm common, 4
+// CHECK: <unknown>:0: error: Common symbol 'common' cannot be used in assignment expr
+ .set v3, common
+
+// CHECK: <unknown>:0: error: Undefined temporary symbol
+ .word 5f
+
+// CHECK: <unknown>:0: error: symbol 'undef' could not be evaluated in a subtraction expression
+ .set v2, a-undef
+
+
+
+w:
+ .word 0
+ .weak w
+
+
+ .section sec_x
+x_a:
+ .word 0
+
+
+ .section sec_y
+y_a:
+ .word 0
diff --git a/test/MC/AArch64/fullfp16-diagnostics.s b/test/MC/AArch64/fullfp16-diagnostics.s
new file mode 100644
index 000000000000..06035dbf7028
--- /dev/null
+++ b/test/MC/AArch64/fullfp16-diagnostics.s
@@ -0,0 +1,82 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon < %s 2> %t
+// RUN: FileCheck < %t %s
+
+ fmla v0.4h, v1.4h, v16.h[3]
+ fmla v2.8h, v3.8h, v17.h[6]
+
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmla v0.4h, v1.4h, v16.h[3]
+// CHECK-NEXT: ^
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmla v2.8h, v3.8h, v17.h[6]
+// CHECK-NEXT: ^
+
+ fmls v0.4h, v1.4h, v16.h[3]
+ fmls v2.8h, v3.8h, v17.h[6]
+
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmls v0.4h, v1.4h, v16.h[3]
+// CHECK-NEXT: ^
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmls v2.8h, v3.8h, v17.h[6]
+// CHECK-NEXT: ^
+
+ fmul v0.4h, v1.4h, v16.h[3]
+ fmul v2.8h, v3.8h, v17.h[6]
+
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmul v0.4h, v1.4h, v16.h[3]
+// CHECK-NEXT: ^
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmul v2.8h, v3.8h, v17.h[6]
+// CHECK-NEXT: ^
+
+ fmulx v0.4h, v1.4h, v16.h[3]
+ fmulx v2.8h, v3.8h, v17.h[6]
+
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmulx v0.4h, v1.4h, v16.h[3]
+// CHECK-NEXT: ^
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmulx v2.8h, v3.8h, v17.h[6]
+// CHECK-NEXT: ^
+
+ fmla h0, h1, v16.h[3]
+ fmla h2, h3, v17.h[6]
+
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmla h0, h1, v16.h[3]
+// CHECK-NEXT: ^
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmla h2, h3, v17.h[6]
+// CHECK-NEXT: ^
+
+ fmls h0, h1, v16.h[3]
+ fmls h2, h3, v17.h[6]
+
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmls h0, h1, v16.h[3]
+// CHECK-NEXT: ^
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmls h2, h3, v17.h[6]
+// CHECK-NEXT: ^
+
+ fmul h0, h1, v16.h[3]
+ fmul h2, h3, v17.h[6]
+
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmul h0, h1, v16.h[3]
+// CHECK-NEXT: ^
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmul h2, h3, v17.h[6]
+// CHECK-NEXT: ^
+
+ fmulx h0, h1, v16.h[3]
+ fmulx h2, h3, v17.h[6]
+
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmulx h0, h1, v16.h[3]
+// CHECK-NEXT: ^
+// CHECK: error: invalid operand for instruction
+// CHECK-NEXT: fmulx h2, h3, v17.h[6]
+// CHECK-NEXT: ^
diff --git a/test/MC/AArch64/fullfp16-neon-neg.s b/test/MC/AArch64/fullfp16-neon-neg.s
new file mode 100644
index 000000000000..0913ecb7e9ab
--- /dev/null
+++ b/test/MC/AArch64/fullfp16-neon-neg.s
@@ -0,0 +1,382 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+neon,-fullfp16 -show-encoding < %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -mattr=-neon,+fullfp16 -show-encoding < %s 2>&1 | FileCheck %s
+
+
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fabs.4h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fneg.4h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frecpe.4h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frinta.4h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintx.4h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frinti.4h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintm.4h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintn.4h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintp.4h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintz.4h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frsqrte.4h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fsqrt.4h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fabs.8h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fneg.8h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frecpe.8h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frinta.8h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintx.8h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frinti.8h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintm.8h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintn.8h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintp.8h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintz.8h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frsqrte.8h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fsqrt.8h v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmla v0.4h, v1.4h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmla v3.8h, v8.8h, v2.h[1]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmls v0.4h, v1.4h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmls v3.8h, v8.8h, v2.h[1]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmul v0.4h, v1.4h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmul v0.8h, v1.8h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmulx v0.4h, v1.4h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmulx v0.8h, v1.8h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fabd v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmaxnmv h0, v1.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fminnmv h0, v1.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmaxv h0, v1.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fminv h0, v1.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ faddp v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ faddp v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fadd v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fadd v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fsub v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fsub v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmeq v0.4h, v31.4h, v16.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmeq v4.8h, v7.8h, v15.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmge v3.4h, v8.4h, v12.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmge v31.8h, v29.8h, v28.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmle v3.4h, v12.4h, v8.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmle v31.8h, v28.8h, v29.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmgt v0.4h, v31.4h, v16.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmgt v4.8h, v7.8h, v15.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmlt v0.4h, v16.4h, v31.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmlt v4.8h, v15.8h, v7.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmeq v0.4h, v31.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmeq v4.8h, v7.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmeq v0.4h, v31.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmeq v4.8h, v7.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmge v3.4h, v8.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmge v31.8h, v29.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmge v3.4h, v8.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmge v31.8h, v29.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmgt v0.4h, v31.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmgt v4.8h, v7.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmgt v0.4h, v31.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmgt v4.8h, v7.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmle v3.4h, v20.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmle v1.8h, v8.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmle v3.4h, v20.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmle v1.8h, v8.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmlt v16.4h, v2.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmlt v15.8h, v4.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmlt v16.4h, v2.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmlt v15.8h, v4.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ facge v0.4h, v31.4h, v16.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ facge v4.8h, v7.8h, v15.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ facle v0.4h, v16.4h, v31.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ facle v4.8h, v15.8h, v7.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ facgt v3.4h, v8.4h, v12.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ facgt v31.8h, v29.8h, v28.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ faclt v3.4h, v12.4h, v8.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ faclt v31.8h, v28.8h, v29.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frsqrts v0.4h, v31.4h, v16.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frsqrts v4.8h, v7.8h, v15.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frecps v3.4h, v8.4h, v12.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frecps v31.8h, v29.8h, v28.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmaxp v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmaxp v31.8h, v15.8h, v16.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fminp v10.4h, v15.4h, v22.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fminp v3.8h, v5.8h, v6.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmaxnmp v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmaxnmp v31.8h, v15.8h, v16.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fminnmp v10.4h, v15.4h, v22.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fminnmp v3.8h, v5.8h, v6.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmax v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmax v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmin v10.4h, v15.4h, v22.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmin v10.8h, v15.8h, v22.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmaxnm v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmaxnm v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fminnm v10.4h, v15.4h, v22.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fminnm v10.8h, v15.8h, v22.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmla v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmla v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmls v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmls v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fabd h29, h24, h20
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmla h0, h1, v1.h[5]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmls h2, h3, v4.h[5]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmul h0, h1, v1.h[5]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmulx h6, h2, v8.h[5]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtzs h21, h12, #1
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtzu h21, h12, #1
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtas h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtau h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtms h22, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtmu h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtns h22, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtnu h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtps h22, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtpu h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtzs h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtzu h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmeq h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmeq h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmeq h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmge h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmge h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmge h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmgt h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmgt h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmgt h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmle h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmle h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmlt h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcmlt h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ facge h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ facgt h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fmulx h20, h22, h15
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frecps h21, h16, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frsqrts h21, h5, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frecpe h19, h14
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frecpx h18, h10
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frsqrte h22, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ faddp h18, v3.2h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fabs v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fabs v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fneg v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fneg v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintn v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintn v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frinta v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frinta v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintp v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintp v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintm v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintm v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintx v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintx v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintz v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frintz v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frinti v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frinti v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtns v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtns v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtnu v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtnu v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtps v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtps v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtpu v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtpu v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtms v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtms v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtmu v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtmu v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtzs v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtzs v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtzu v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtzu v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtas v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtas v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtau v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fcvtau v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frecpe v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frecpe v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frsqrte v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ frsqrte v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fsqrt v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+ fsqrt v6.8h, v8.8h
+
+// CHECK-NOT: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
diff --git a/test/MC/AArch64/ldr-pseudo-diagnostics.s b/test/MC/AArch64/ldr-pseudo-diagnostics.s
index e32c51679528..e83ca93f6d2d 100644
--- a/test/MC/AArch64/ldr-pseudo-diagnostics.s
+++ b/test/MC/AArch64/ldr-pseudo-diagnostics.s
@@ -12,3 +12,21 @@ f2:
// CHECK-ERROR: error: Immediate too large for register
// CHECK-ERROR: ldr w0, =-0x80000001
// CHECK-ERROR: ^
+
+f3:
+ ldr foo, =1
+// CHECK-ERROR: error: Only valid when first operand is register
+// CHECK-ERROR: ldr foo, =1
+// CHECK-ERROR: ^
+
+f4:
+ add r0, r0, =1
+// CHECK-ERROR: error: unexpected token in operand
+// CHECK-ERROR: add r0, r0, =1
+// CHECK-ERROR: ^
+
+f5:
+ ldr x0, =())
+// CHECK-ERROR: error: unknown token in expression
+// CHECK-ERROR: ldr x0, =())
+// CHECK-ERROR: ^
diff --git a/test/MC/AArch64/neon-2velem.s b/test/MC/AArch64/neon-2velem.s
index 04841d0164f2..ed55ad0b1363 100644
--- a/test/MC/AArch64/neon-2velem.s
+++ b/test/MC/AArch64/neon-2velem.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -46,6 +46,8 @@
// CHECK: mls v0.8h, v1.8h, v2.h[7] // encoding: [0x20,0x48,0x72,0x6f]
// CHECK: mls v0.8h, v1.8h, v14.h[6] // encoding: [0x20,0x48,0x6e,0x6f]
+ fmla v0.4h, v1.4h, v2.h[2]
+ fmla v3.8h, v8.8h, v2.h[1]
fmla v0.2s, v1.2s, v2.s[2]
fmla v0.2s, v1.2s, v22.s[2]
fmla v3.4s, v8.4s, v2.s[1]
@@ -53,6 +55,8 @@
fmla v0.2d, v1.2d, v2.d[1]
fmla v0.2d, v1.2d, v22.d[1]
+// CHECK: fmla v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x10,0x22,0x0f]
+// CHECK: fmla v3.8h, v8.8h, v2.h[1] // encoding: [0x03,0x11,0x12,0x4f]
// CHECK: fmla v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x18,0x82,0x0f]
// CHECK: fmla v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x18,0x96,0x0f]
// CHECK: fmla v3.4s, v8.4s, v2.s[1] // encoding: [0x03,0x11,0xa2,0x4f]
@@ -60,6 +64,8 @@
// CHECK: fmla v0.2d, v1.2d, v2.d[1] // encoding: [0x20,0x18,0xc2,0x4f]
// CHECK: fmla v0.2d, v1.2d, v22.d[1] // encoding: [0x20,0x18,0xd6,0x4f]
+ fmls v0.4h, v1.4h, v2.h[2]
+ fmls v3.8h, v8.8h, v2.h[1]
fmls v0.2s, v1.2s, v2.s[2]
fmls v0.2s, v1.2s, v22.s[2]
fmls v3.4s, v8.4s, v2.s[1]
@@ -67,6 +73,8 @@
fmls v0.2d, v1.2d, v2.d[1]
fmls v0.2d, v1.2d, v22.d[1]
+// CHECK: fmls v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x50,0x22,0x0f]
+// CHECK: fmls v3.8h, v8.8h, v2.h[1] // encoding: [0x03,0x51,0x12,0x4f]
// CHECK: fmls v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x58,0x82,0x0f]
// CHECK: fmls v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x58,0x96,0x0f]
// CHECK: fmls v3.4s, v8.4s, v2.s[1] // encoding: [0x03,0x51,0xa2,0x4f]
@@ -172,6 +180,8 @@
// CHECK: mul v0.4s, v1.4s, v2.s[2] // encoding: [0x20,0x88,0x82,0x4f]
// CHECK: mul v0.4s, v1.4s, v22.s[2] // encoding: [0x20,0x88,0x96,0x4f]
+ fmul v0.4h, v1.4h, v2.h[2]
+ fmul v0.8h, v1.8h, v2.h[2]
fmul v0.2s, v1.2s, v2.s[2]
fmul v0.2s, v1.2s, v22.s[2]
fmul v0.4s, v1.4s, v2.s[2]
@@ -179,6 +189,8 @@
fmul v0.2d, v1.2d, v2.d[1]
fmul v0.2d, v1.2d, v22.d[1]
+// CHECK: fmul v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x90,0x22,0x0f]
+// CHECK: fmul v0.8h, v1.8h, v2.h[2] // encoding: [0x20,0x90,0x22,0x4f]
// CHECK: fmul v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x98,0x82,0x0f]
// CHECK: fmul v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x98,0x96,0x0f]
// CHECK: fmul v0.4s, v1.4s, v2.s[2] // encoding: [0x20,0x98,0x82,0x4f]
@@ -186,6 +198,8 @@
// CHECK: fmul v0.2d, v1.2d, v2.d[1] // encoding: [0x20,0x98,0xc2,0x4f]
// CHECK: fmul v0.2d, v1.2d, v22.d[1] // encoding: [0x20,0x98,0xd6,0x4f]
+ fmulx v0.4h, v1.4h, v2.h[2]
+ fmulx v0.8h, v1.8h, v2.h[2]
fmulx v0.2s, v1.2s, v2.s[2]
fmulx v0.2s, v1.2s, v22.s[2]
fmulx v0.4s, v1.4s, v2.s[2]
@@ -193,6 +207,8 @@
fmulx v0.2d, v1.2d, v2.d[1]
fmulx v0.2d, v1.2d, v22.d[1]
+// CHECK: fmulx v0.4h, v1.4h, v2.h[2] // encoding: [0x20,0x90,0x22,0x2f]
+// CHECK: fmulx v0.8h, v1.8h, v2.h[2] // encoding: [0x20,0x90,0x22,0x6f]
// CHECK: fmulx v0.2s, v1.2s, v2.s[2] // encoding: [0x20,0x98,0x82,0x2f]
// CHECK: fmulx v0.2s, v1.2s, v22.s[2] // encoding: [0x20,0x98,0x96,0x2f]
// CHECK: fmulx v0.4s, v1.4s, v2.s[2] // encoding: [0x20,0x98,0x82,0x6f]
diff --git a/test/MC/AArch64/neon-aba-abd.s b/test/MC/AArch64/neon-aba-abd.s
index 178eb26f64c2..b3a90bb14895 100644
--- a/test/MC/AArch64/neon-aba-abd.s
+++ b/test/MC/AArch64/neon-aba-abd.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -68,10 +68,12 @@
//----------------------------------------------------------------------
// Vector Absolute Difference (Floating Point)
//----------------------------------------------------------------------
+ fabd v0.4h, v1.4h, v2.4h
fabd v0.2s, v1.2s, v2.2s
fabd v31.4s, v15.4s, v16.4s
fabd v7.2d, v8.2d, v25.2d
+// CHECK: fabd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x14,0xc2,0x2e]
// CHECK: fabd v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0xa2,0x2e]
// CHECK: fabd v31.4s, v15.4s, v16.4s // encoding: [0xff,0xd5,0xb0,0x6e]
// CHECK: fabd v7.2d, v8.2d, v25.2d // encoding: [0x07,0xd5,0xf9,0x6e]
diff --git a/test/MC/AArch64/neon-across.s b/test/MC/AArch64/neon-across.s
index 60b766d8c881..74edc519a475 100644
--- a/test/MC/AArch64/neon-across.s
+++ b/test/MC/AArch64/neon-across.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -90,11 +90,27 @@
// CHECK: addv h0, v1.8h // encoding: [0x20,0xb8,0x71,0x4e]
// CHECK: addv s0, v1.4s // encoding: [0x20,0xb8,0xb1,0x4e]
+ fmaxnmv h0, v1.4h
+ fminnmv h0, v1.4h
+ fmaxv h0, v1.4h
+ fminv h0, v1.4h
+ fmaxnmv h0, v1.8h
+ fminnmv h0, v1.8h
+ fmaxv h0, v1.8h
+ fminv h0, v1.8h
fmaxnmv s0, v1.4s
fminnmv s0, v1.4s
fmaxv s0, v1.4s
fminv s0, v1.4s
+// CHECK: fmaxnmv h0, v1.4h // encoding: [0x20,0xc8,0x30,0x0e]
+// CHECK: fminnmv h0, v1.4h // encoding: [0x20,0xc8,0xb0,0x0e]
+// CHECK: fmaxv h0, v1.4h // encoding: [0x20,0xf8,0x30,0x0e]
+// CHECK: fminv h0, v1.4h // encoding: [0x20,0xf8,0xb0,0x0e]
+// CHECK: fmaxnmv h0, v1.8h // encoding: [0x20,0xc8,0x30,0x4e]
+// CHECK: fminnmv h0, v1.8h // encoding: [0x20,0xc8,0xb0,0x4e]
+// CHECK: fmaxv h0, v1.8h // encoding: [0x20,0xf8,0x30,0x4e]
+// CHECK: fminv h0, v1.8h // encoding: [0x20,0xf8,0xb0,0x4e]
// CHECK: fmaxnmv s0, v1.4s // encoding: [0x20,0xc8,0x30,0x6e]
// CHECK: fminnmv s0, v1.4s // encoding: [0x20,0xc8,0xb0,0x6e]
// CHECK: fmaxv s0, v1.4s // encoding: [0x20,0xf8,0x30,0x6e]
diff --git a/test/MC/AArch64/neon-add-pairwise.s b/test/MC/AArch64/neon-add-pairwise.s
index df9938b07e52..3d77c6e2790f 100644
--- a/test/MC/AArch64/neon-add-pairwise.s
+++ b/test/MC/AArch64/neon-add-pairwise.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -25,10 +25,14 @@
//------------------------------------------------------------------------------
// Vector Add Pairwise (Floating Point
//------------------------------------------------------------------------------
+ faddp v0.4h, v1.4h, v2.4h
+ faddp v0.8h, v1.8h, v2.8h
faddp v0.2s, v1.2s, v2.2s
faddp v0.4s, v1.4s, v2.4s
faddp v0.2d, v1.2d, v2.2d
+// CHECK: faddp v0.4h, v1.4h, v2.4h // encoding: [0x20,0x14,0x42,0x2e]
+// CHECK: faddp v0.8h, v1.8h, v2.8h // encoding: [0x20,0x14,0x42,0x6e]
// CHECK: faddp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0x22,0x2e]
// CHECK: faddp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0x22,0x6e]
// CHECK: faddp v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0x62,0x6e]
diff --git a/test/MC/AArch64/neon-add-sub-instructions.s b/test/MC/AArch64/neon-add-sub-instructions.s
index 68f169b3dd90..0d8416537022 100644
--- a/test/MC/AArch64/neon-add-sub-instructions.s
+++ b/test/MC/AArch64/neon-add-sub-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -44,10 +44,14 @@
//------------------------------------------------------------------------------
// Vector Floating-Point Add
//------------------------------------------------------------------------------
+ fadd v0.4h, v1.4h, v2.4h
+ fadd v0.8h, v1.8h, v2.8h
fadd v0.2s, v1.2s, v2.2s
fadd v0.4s, v1.4s, v2.4s
fadd v0.2d, v1.2d, v2.2d
+// CHECK: fadd v0.4h, v1.4h, v2.4h // encoding: [0x20,0x14,0x42,0x0e]
+// CHECK: fadd v0.8h, v1.8h, v2.8h // encoding: [0x20,0x14,0x42,0x4e]
// CHECK: fadd v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0x22,0x0e]
// CHECK: fadd v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0x22,0x4e]
// CHECK: fadd v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0x62,0x4e]
@@ -56,10 +60,14 @@
//------------------------------------------------------------------------------
// Vector Floating-Point Sub
//------------------------------------------------------------------------------
+ fsub v0.4h, v1.4h, v2.4h
+ fsub v0.8h, v1.8h, v2.8h
fsub v0.2s, v1.2s, v2.2s
fsub v0.4s, v1.4s, v2.4s
fsub v0.2d, v1.2d, v2.2d
+// CHECK: fsub v0.4h, v1.4h, v2.4h // encoding: [0x20,0x14,0xc2,0x0e]
+// CHECK; fsub v0.8h, v1.8h, v2.8h // encoding: [0x20,0x14,0xc2,0x4e]
// CHECK: fsub v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0xa2,0x0e]
// CHECK: fsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0xa2,0x4e]
// CHECK: fsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0xe2,0x4e]
diff --git a/test/MC/AArch64/neon-compare-instructions.s b/test/MC/AArch64/neon-compare-instructions.s
index 19cfaf1f4d36..ffa88e50e0ce 100644
--- a/test/MC/AArch64/neon-compare-instructions.s
+++ b/test/MC/AArch64/neon-compare-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -194,10 +194,14 @@
// Vector Compare Mask Equal (Floating Point)
//----------------------------------------------------------------------
+ fcmeq v0.4h, v31.4h, v16.4h
+ fcmeq v4.8h, v7.8h, v15.8h
fcmeq v0.2s, v31.2s, v16.2s
fcmeq v4.4s, v7.4s, v15.4s
fcmeq v29.2d, v2.2d, v5.2d
+// CHECK: fcmeq v0.4h, v31.4h, v16.4h // encoding: [0xe0,0x27,0x50,0x0e]
+// CHECK: fcmeq v4.8h, v7.8h, v15.8h // encoding: [0xe4,0x24,0x4f,0x4e]
// CHECK: fcmeq v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xe7,0x30,0x0e]
// CHECK: fcmeq v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xe4,0x2f,0x4e]
// CHECK: fcmeq v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xe4,0x65,0x4e]
@@ -208,6 +212,10 @@
// FCMLE is alias for FCMGE with operands reversed.
//----------------------------------------------------------------------
+ fcmge v3.4h, v8.4h, v12.4h
+ fcmge v31.8h, v29.8h, v28.8h
+ fcmle v3.4h, v12.4h, v8.4h
+ fcmle v31.8h, v28.8h, v29.8h
fcmge v31.4s, v29.4s, v28.4s
fcmge v3.2s, v8.2s, v12.2s
fcmge v17.2d, v15.2d, v13.2d
@@ -215,6 +223,10 @@
fcmle v3.2s, v12.2s, v8.2s
fcmle v17.2d, v13.2d, v15.2d
+// CHECK: fcmge v3.4h, v8.4h, v12.4h // encoding: [0x03,0x25,0x4c,0x2e]
+// CHECK: fcmge v31.8h, v29.8h, v28.8h // encoding: [0xbf,0x27,0x5c,0x6e]
+// CHECK: fcmge v3.4h, v8.4h, v12.4h // encoding: [0x03,0x25,0x4c,0x2e]
+// CHECK: fcmge v31.8h, v29.8h, v28.8h // encoding: [0xbf,0x27,0x5c,0x6e]
// CHECK: fcmge v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xe7,0x3c,0x6e]
// CHECK: fcmge v3.2s, v8.2s, v12.2s // encoding: [0x03,0xe5,0x2c,0x2e]
// CHECK: fcmge v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xe5,0x6d,0x6e]
@@ -228,6 +240,10 @@
// FCMLT is alias for FCMGT with operands reversed.
//----------------------------------------------------------------------
+ fcmgt v0.4h, v31.4h, v16.4h
+ fcmgt v4.8h, v7.8h, v15.8h
+ fcmlt v0.4h, v16.4h, v31.4h
+ fcmlt v4.8h, v15.8h, v7.8h
fcmgt v0.2s, v31.2s, v16.2s
fcmgt v4.4s, v7.4s, v15.4s
fcmgt v29.2d, v2.2d, v5.2d
@@ -235,6 +251,10 @@
fcmlt v4.4s, v15.4s, v7.4s
fcmlt v29.2d, v5.2d, v2.2d
+// CHECK: fcmgt v0.4h, v31.4h, v16.4h // encoding: [0xe0,0x27,0xd0,0x2e]
+// CHECK: fcmgt v4.8h, v7.8h, v15.8h // encoding: [0xe4,0x24,0xcf,0x6e]
+// CHECK: fcmgt v0.4h, v31.4h, v16.4h // encoding: [0xe0,0x27,0xd0,0x2e]
+// CHECK: fcmgt v4.8h, v7.8h, v15.8h // encoding: [0xe4,0x24,0xcf,0x6e]
// CHECK: fcmgt v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xe7,0xb0,0x2e]
// CHECK: fcmgt v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xe4,0xaf,0x6e]
// CHECK: fcmgt v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xe4,0xe5,0x6e]
@@ -343,16 +363,24 @@
//----------------------------------------------------------------------
// Vector Compare Mask Equal to Zero (Floating Point)
//----------------------------------------------------------------------
+ fcmeq v0.4h, v31.4h, #0.0
+ fcmeq v4.8h, v7.8h, #0.0
fcmeq v0.2s, v31.2s, #0.0
fcmeq v4.4s, v7.4s, #0.0
fcmeq v29.2d, v2.2d, #0.0
+ fcmeq v0.4h, v31.4h, #0
+ fcmeq v4.8h, v7.8h, #0
fcmeq v0.2s, v31.2s, #0
fcmeq v4.4s, v7.4s, #0
fcmeq v29.2d, v2.2d, #0
+// CHECK: fcmeq v0.4h, v31.4h, #0.0 // encoding: [0xe0,0xdb,0xf8,0x0e]
+// CHECK: fcmeq v4.8h, v7.8h, #0.0 // encoding: [0xe4,0xd8,0xf8,0x4e]
// CHECK: fcmeq v0.2s, v31.2s, #0.0 // encoding: [0xe0,0xdb,0xa0,0x0e]
// CHECK: fcmeq v4.4s, v7.4s, #0.0 // encoding: [0xe4,0xd8,0xa0,0x4e]
// CHECK: fcmeq v29.2d, v2.2d, #0.0 // encoding: [0x5d,0xd8,0xe0,0x4e]
+// CHECK: fcmeq v0.4h, v31.4h, #0.0 // encoding: [0xe0,0xdb,0xf8,0x0e]
+// CHECK: fcmeq v4.8h, v7.8h, #0.0 // encoding: [0xe4,0xd8,0xf8,0x4e]
// CHECK: fcmeq v0.2s, v31.2s, #0.0 // encoding: [0xe0,0xdb,0xa0,0x0e]
// CHECK: fcmeq v4.4s, v7.4s, #0.0 // encoding: [0xe4,0xd8,0xa0,0x4e]
// CHECK: fcmeq v29.2d, v2.2d, #0.0 // encoding: [0x5d,0xd8,0xe0,0x4e]
@@ -360,16 +388,24 @@
//----------------------------------------------------------------------
// Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
//----------------------------------------------------------------------
+ fcmge v3.4h, v8.4h, #0.0
+ fcmge v31.8h, v29.8h, #0.0
fcmge v31.4s, v29.4s, #0.0
fcmge v3.2s, v8.2s, #0.0
fcmge v17.2d, v15.2d, #0.0
+ fcmge v3.4h, v8.4h, #0
+ fcmge v31.8h, v29.8h, #0
fcmge v31.4s, v29.4s, #0
fcmge v3.2s, v8.2s, #0
fcmge v17.2d, v15.2d, #0
+// CHECK: fcmge v3.4h, v8.4h, #0.0 // encoding: [0x03,0xc9,0xf8,0x2e]
+// CHECK: fcmge v31.8h, v29.8h, #0.0 // encoding: [0xbf,0xcb,0xf8,0x6e]
// CHECK: fcmge v31.4s, v29.4s, #0.0 // encoding: [0xbf,0xcb,0xa0,0x6e]
// CHECK: fcmge v3.2s, v8.2s, #0.0 // encoding: [0x03,0xc9,0xa0,0x2e]
// CHECK: fcmge v17.2d, v15.2d, #0.0 // encoding: [0xf1,0xc9,0xe0,0x6e]
+// CHECK: fcmge v3.4h, v8.4h, #0.0 // encoding: [0x03,0xc9,0xf8,0x2e]
+// CHECK: fcmge v31.8h, v29.8h, #0.0 // encoding: [0xbf,0xcb,0xf8,0x6e]
// CHECK: fcmge v31.4s, v29.4s, #0.0 // encoding: [0xbf,0xcb,0xa0,0x6e]
// CHECK: fcmge v3.2s, v8.2s, #0.0 // encoding: [0x03,0xc9,0xa0,0x2e]
// CHECK: fcmge v17.2d, v15.2d, #0.0 // encoding: [0xf1,0xc9,0xe0,0x6e]
@@ -377,16 +413,24 @@
//----------------------------------------------------------------------
// Vector Compare Mask Greater Than Zero (Floating Point)
//----------------------------------------------------------------------
+ fcmgt v0.4h, v31.4h, #0.0
+ fcmgt v4.8h, v7.8h, #0.0
fcmgt v0.2s, v31.2s, #0.0
fcmgt v4.4s, v7.4s, #0.0
fcmgt v29.2d, v2.2d, #0.0
+ fcmgt v0.4h, v31.4h, #0
+ fcmgt v4.8h, v7.8h, #0
fcmgt v0.2s, v31.2s, #0
fcmgt v4.4s, v7.4s, #0
fcmgt v29.2d, v2.2d, #0
+// CHECK: fcmgt v0.4h, v31.4h, #0.0 // encoding: [0xe0,0xcb,0xf8,0x0e]
+// CHECK: fcmgt v4.8h, v7.8h, #0.0 // encoding: [0xe4,0xc8,0xf8,0x4e]
// CHECK: fcmgt v0.2s, v31.2s, #0.0 // encoding: [0xe0,0xcb,0xa0,0x0e]
// CHECK: fcmgt v4.4s, v7.4s, #0.0 // encoding: [0xe4,0xc8,0xa0,0x4e]
// CHECK: fcmgt v29.2d, v2.2d, #0.0 // encoding: [0x5d,0xc8,0xe0,0x4e]
+// CHECK: fcmgt v0.4h, v31.4h, #0.0 // encoding: [0xe0,0xcb,0xf8,0x0e]
+// CHECK: fcmgt v4.8h, v7.8h, #0.0 // encoding: [0xe4,0xc8,0xf8,0x4e]
// CHECK: fcmgt v0.2s, v31.2s, #0.0 // encoding: [0xe0,0xcb,0xa0,0x0e]
// CHECK: fcmgt v4.4s, v7.4s, #0.0 // encoding: [0xe4,0xc8,0xa0,0x4e]
// CHECK: fcmgt v29.2d, v2.2d, #0.0 // encoding: [0x5d,0xc8,0xe0,0x4e]
@@ -394,16 +438,24 @@
//----------------------------------------------------------------------
// Vector Compare Mask Less Than or Equal To Zero (Floating Point)
//----------------------------------------------------------------------
+ fcmle v3.4h, v20.4h, #0.0
+ fcmle v1.8h, v8.8h, #0.0
fcmle v1.4s, v8.4s, #0.0
fcmle v3.2s, v20.2s, #0.0
fcmle v7.2d, v13.2d, #0.0
+ fcmle v3.4h, v20.4h, #0
+ fcmle v1.8h, v8.8h, #0
fcmle v1.4s, v8.4s, #0
fcmle v3.2s, v20.2s, #0
fcmle v7.2d, v13.2d, #0
+// CHECK: fcmle v3.4h, v20.4h, #0.0 // encoding: [0x83,0xda,0xf8,0x2e]
+// CHECK: fcmle v1.8h, v8.8h, #0.0 // encoding: [0x01,0xd9,0xf8,0x6e]
// CHECK: fcmle v1.4s, v8.4s, #0.0 // encoding: [0x01,0xd9,0xa0,0x6e]
// CHECK: fcmle v3.2s, v20.2s, #0.0 // encoding: [0x83,0xda,0xa0,0x2e]
// CHECK: fcmle v7.2d, v13.2d, #0.0 // encoding: [0xa7,0xd9,0xe0,0x6e]
+// CHECK: fcmle v3.4h, v20.4h, #0.0 // encoding: [0x83,0xda,0xf8,0x2e]
+// CHECK: fcmle v1.8h, v8.8h, #0.0 // encoding: [0x01,0xd9,0xf8,0x6e]
// CHECK: fcmle v1.4s, v8.4s, #0.0 // encoding: [0x01,0xd9,0xa0,0x6e]
// CHECK: fcmle v3.2s, v20.2s, #0.0 // encoding: [0x83,0xda,0xa0,0x2e]
// CHECK: fcmle v7.2d, v13.2d, #0.0 // encoding: [0xa7,0xd9,0xe0,0x6e]
@@ -411,16 +463,24 @@
//----------------------------------------------------------------------
// Vector Compare Mask Less Than Zero (Floating Point)
//----------------------------------------------------------------------
+ fcmlt v16.4h, v2.4h, #0.0
+ fcmlt v15.8h, v4.8h, #0.0
fcmlt v16.2s, v2.2s, #0.0
fcmlt v15.4s, v4.4s, #0.0
fcmlt v5.2d, v29.2d, #0.0
+ fcmlt v16.4h, v2.4h, #0
+ fcmlt v15.8h, v4.8h, #0
fcmlt v16.2s, v2.2s, #0
fcmlt v15.4s, v4.4s, #0
fcmlt v5.2d, v29.2d, #0
+// CHECK: fcmlt v16.4h, v2.4h, #0.0 // encoding: [0x50,0xe8,0xf8,0x0e]
+// CHECK: fcmlt v15.8h, v4.8h, #0.0 // encoding: [0x8f,0xe8,0xf8,0x4e]
// CHECK: fcmlt v16.2s, v2.2s, #0.0 // encoding: [0x50,0xe8,0xa0,0x0e]
// CHECK: fcmlt v15.4s, v4.4s, #0.0 // encoding: [0x8f,0xe8,0xa0,0x4e]
// CHECK: fcmlt v5.2d, v29.2d, #0.0 // encoding: [0xa5,0xeb,0xe0,0x4e]
+// CHECK: fcmlt v16.4h, v2.4h, #0.0 // encoding: [0x50,0xe8,0xf8,0x0e]
+// CHECK: fcmlt v15.8h, v4.8h, #0.0 // encoding: [0x8f,0xe8,0xf8,0x4e]
// CHECK: fcmlt v16.2s, v2.2s, #0.0 // encoding: [0x50,0xe8,0xa0,0x0e]
// CHECK: fcmlt v15.4s, v4.4s, #0.0 // encoding: [0x8f,0xe8,0xa0,0x4e]
// CHECK: fcmlt v5.2d, v29.2d, #0.0 // encoding: [0xa5,0xeb,0xe0,0x4e]
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index fa1f3caf5ad3..6ded6e40bfb9 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -341,7 +341,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fabd v0.2s, v1.4s, v2.2d
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fabd v0.4h, v1.4h, v2.4h
// CHECK-ERROR: ^
//----------------------------------------------------------------------
@@ -385,7 +385,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frecps v0.4s, v1.2d, v2.4s
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frecps v0.8h, v1.8h, v2.8h
// CHECK-ERROR: ^
@@ -400,7 +400,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frsqrts v0.2d, v1.2d, v2.2s
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frsqrts v0.4h, v1.4h, v2.4h
// CHECK-ERROR: ^
@@ -417,7 +417,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: facge v0.2d, v1.2s, v2.2d
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: facge v0.4h, v1.4h, v2.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
@@ -435,7 +435,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: facgt v0.2d, v1.2d, v2.4s
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: facgt v0.8h, v1.8h, v2.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
@@ -1092,7 +1092,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmin v0.4s, v1.4s, v2.2d
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fmin v0.8h, v1.8h, v2.8h
// CHECK-ERROR: ^
@@ -1177,7 +1177,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fminp v0.4s, v1.4s, v2.2d
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fminp v0.8h, v1.8h, v2.8h
// CHECK-ERROR: ^
@@ -1283,7 +1283,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmulx v21.2s, v5.2s, v13.2d
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fmulx v1.4h, v25.4h, v3.4h
// CHECK-ERROR: ^
@@ -3023,10 +3023,10 @@
fmla v0.2d, v1.2d, v2.d[2]
fmla v0.2d, v1.2d, v22.d[2]
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fmla v0.4h, v1.4h, v2.h[2]
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fmla v0.8h, v1.8h, v2.h[2]
// CHECK-ERROR: ^
// CHECK-ERROR: vector lane must be an integer in range
@@ -3057,10 +3057,10 @@
fmls v0.2d, v1.2d, v2.d[2]
fmls v0.2d, v1.2d, v22.d[2]
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fmls v0.4h, v1.4h, v2.h[2]
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fmls v0.8h, v1.8h, v2.h[2]
// CHECK-ERROR: ^
// CHECK-ERROR: vector lane must be an integer in range
@@ -3428,7 +3428,7 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: mul v0.2d, v1.2d, v2.d[1]
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fmul v0.4h, v1.4h, v2.h[4]
// CHECK-ERROR: ^
// CHECK-ERROR: vector lane must be an integer in range
@@ -3458,7 +3458,7 @@
fmulx v0.2d, v1.2d, v2.d[2]
fmulx v0.2d, v1.2d, v22.d[2]
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fmulx v0.4h, v1.4h, v2.h[4]
// CHECK-ERROR: ^
// CHECK-ERROR: vector lane must be an integer in range
@@ -3837,16 +3837,16 @@
fmaxv h0, v1.8h
fminv h0, v1.8h
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fmaxnmv h0, v1.8h
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fminnmv h0, v1.8h
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fmaxv h0, v1.8h
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fminv h0, v1.8h
// CHECK-ERROR: ^
@@ -4351,35 +4351,35 @@
smov x20, v9.d[0]
// CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR smov w1, v0.b[16]
-// CHECK-ERROR ^
-// CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR smov w14, v6.h[8]
-// CHECK-ERROR ^
-// CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR smov x1, v0.b[16]
-// CHECK-ERROR ^
-// CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR smov x14, v6.h[8]
-// CHECK-ERROR ^
-// CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR smov x20, v9.s[5]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR smov w1, v0.d[0]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR smov w14, v6.d[1]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR smov x1, v0.d[0]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR smov x14, v6.d[1]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR smov x20, v9.d[0]
-// CHECK-ERROR ^
+// CHECK-ERROR: smov w1, v0.b[16]
+// CHECK-ERROR: ^
+// CHECK-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: smov w14, v6.h[8]
+// CHECK-ERROR: ^
+// CHECK-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: smov x1, v0.b[16]
+// CHECK-ERROR: ^
+// CHECK-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: smov x14, v6.h[8]
+// CHECK-ERROR: ^
+// CHECK-ERROR: vector lane must be an integer in range
+// CHECK-ERROR: smov x20, v9.s[5]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smov w1, v0.d[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smov w14, v6.d[1]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smov x1, v0.d[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smov x14, v6.d[1]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: smov x20, v9.d[0]
+// CHECK-ERROR: ^
umov w1, v0.b[16]
umov w14, v6.h[8]
@@ -4390,44 +4390,44 @@
umov d7, v18.d[1]
// CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR umov w1, v0.b[16]
-// CHECK-ERROR ^
+// CHECK-ERROR: umov w1, v0.b[16]
+// CHECK-ERROR: ^
// CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR umov w14, v6.h[8]
-// CHECK-ERROR ^
+// CHECK-ERROR: umov w14, v6.h[8]
+// CHECK-ERROR: ^
// CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR umov w20, v9.s[5]
-// CHECK-ERROR ^
+// CHECK-ERROR: umov w20, v9.s[5]
+// CHECK-ERROR: ^
// CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR umov x7, v18.d[3]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR umov w1, v0.d[0]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR umov s20, v9.s[2]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR umov d7, v18.d[1]
-// CHECK-ERROR ^
+// CHECK-ERROR: umov x7, v18.d[3]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umov w1, v0.d[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umov s20, v9.s[2]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: umov d7, v18.d[1]
+// CHECK-ERROR: ^
Ins v1.h[2], v3.b[6]
Ins v6.h[7], v7.s[2]
Ins v15.d[0], v22.s[2]
Ins v0.d[0], v4.b[1]
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR Ins v1.h[2], v3.b[6]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR Ins v6.h[7], v7.s[2]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR Ins v15.d[0], v22.s[2]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR Ins v0.d[0], v4.b[1]
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: Ins v1.h[2], v3.b[6]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: Ins v6.h[7], v7.s[2]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: Ins v15.d[0], v22.s[2]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: Ins v0.d[0], v4.b[1]
+// CHECK-ERROR: ^
dup v1.8h, v2.b[2]
dup v11.4s, v7.h[7]
@@ -4437,27 +4437,27 @@
dup v17.4s, v20.d[0]
dup v5.2d, v1.b[1]
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR dup v1.8h, v2.b[2]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR dup v11.4s, v7.h[7]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR dup v17.2d, v20.s[0]
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR dup v1.16b, v2.h[2]
-// CHECK-ERROR ^
-// CHECK-ERROR invalid operand for instruction
-// CHECK-ERROR dup v11.8h, v7.s[3]
-// CHECK-ERROR ^
-// CHECK-ERROR invalid operand for instruction
-// CHECK-ERROR dup v17.4s, v20.d[0]
-// CHECK-ERROR ^
-// CHECK-ERROR invalid operand for instruction
-// CHECK-ERROR dup v5.2d, v1.b[1]
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup v1.8h, v2.b[2]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup v11.4s, v7.h[7]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup v17.2d, v20.s[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup v1.16b, v2.h[2]
+// CHECK-ERROR: ^
+// CHECK-ERROR: invalid operand for instruction
+// CHECK-ERROR: dup v11.8h, v7.s[3]
+// CHECK-ERROR: ^
+// CHECK-ERROR: invalid operand for instruction
+// CHECK-ERROR: dup v17.4s, v20.d[0]
+// CHECK-ERROR: ^
+// CHECK-ERROR: invalid operand for instruction
+// CHECK-ERROR: dup v5.2d, v1.b[1]
+// CHECK-ERROR: ^
dup v1.8b, b1
dup v11.4h, h14
@@ -4467,27 +4467,27 @@
dup v17.4d, w28
dup v5.2d, w0
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR dup v1.8b, b1
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR dup v11.4h, h14
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR dup v17.2s, s30
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR dup v1.16b, d2
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR dup v11.8s, w16
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR dup v17.4d, w28
-// CHECK-ERROR ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR dup v5.2d, w0
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup v1.8b, b1
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup v11.4h, h14
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup v17.2s, s30
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup v1.16b, d2
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup v11.8s, w16
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup v17.4d, w28
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: dup v5.2d, w0
+// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Compare Bitwise Equal
@@ -5594,13 +5594,13 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fabs v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fabs v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fabs v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fabs v13.4h, v21.4h
// CHECK-ERROR: ^
@@ -5616,13 +5616,13 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fneg v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fneg v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fneg v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fneg v13.4h, v21.4h
// CHECK-ERROR: ^
@@ -5978,205 +5978,205 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frintn v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frintn v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frintn v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frintn v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frinta v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frinta v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frinta v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frinta v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frintp v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frintp v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frintp v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frintp v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frintm v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frintm v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frintm v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frintm v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frintx v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frintx v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frintx v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frintx v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frintz v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frintz v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frintz v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frintz v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frinti v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frinti v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frinti v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frinti v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtns v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtns v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtns v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtns v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtnu v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtnu v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtnu v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtnu v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtps v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtps v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtps v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtps v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtpu v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtpu v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtpu v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtpu v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtms v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtms v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtms v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtms v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtmu v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtmu v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtmu v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtmu v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtzs v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtzs v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtzs v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtzs v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtzu v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtzu v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtzu v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtzu v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtas v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtas v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtas v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtas v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtau v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtau v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fcvtau v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fcvtau v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
@@ -6212,61 +6212,61 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: scvtf v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: scvtf v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: scvtf v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: scvtf v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: ucvtf v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: ucvtf v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: ucvtf v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: ucvtf v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frecpe v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frecpe v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frecpe v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frecpe v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frsqrte v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frsqrte v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: frsqrte v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: frsqrte v13.4h, v21.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fsqrt v0.16b, v31.16b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fsqrt v2.8h, v4.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fsqrt v1.8b, v9.8b
// CHECK-ERROR: ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
// CHECK-ERROR: fsqrt v13.4h, v21.4h
// CHECK-ERROR: ^
@@ -6466,30 +6466,30 @@
uzp1 v0.2d, v1.1d, v2.1d
uzp1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR <stdin>:4289:22: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4290:21: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4291:21: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4292:21: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4293:21: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4294:21: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4295:21: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4296:17: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR: ^
uzp2 v0.16b, v1.8b, v2.8b
uzp2 v0.8b, v1.4b, v2.4b
@@ -6500,30 +6500,30 @@
uzp2 v0.2d, v1.1d, v2.1d
uzp2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR <stdin>:4298:22: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4299:21: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4300:21: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4301:21: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4302:21: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4303:21: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4304:21: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4305:17: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR: ^
zip1 v0.16b, v1.8b, v2.8b
zip1 v0.8b, v1.4b, v2.4b
@@ -6534,30 +6534,30 @@
zip1 v0.2d, v1.1d, v2.1d
zip1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR <stdin>:4307:22: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4308:21: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4309:21: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4310:21: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4311:21: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4312:21: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4313:21: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4314:17: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR: ^
zip2 v0.16b, v1.8b, v2.8b
zip2 v0.8b, v1.4b, v2.4b
@@ -6568,30 +6568,30 @@
zip2 v0.2d, v1.1d, v2.1d
zip2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR <stdin>:4316:22: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4317:21: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4318:21: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4319:21: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4320:21: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4321:21: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4322:21: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4323:17: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR: ^
trn1 v0.16b, v1.8b, v2.8b
trn1 v0.8b, v1.4b, v2.4b
@@ -6602,30 +6602,30 @@
trn1 v0.2d, v1.1d, v2.1d
trn1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR <stdin>:4325:22: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4326:21: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4327:21: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4328:21: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4329:21: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4330:21: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4331:21: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4332:17: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR: ^
trn2 v0.16b, v1.8b, v2.8b
trn2 v0.8b, v1.4b, v2.4b
@@ -6636,30 +6636,30 @@
trn2 v0.2d, v1.1d, v2.1d
trn2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR <stdin>:4334:22: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4335:21: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4336:21: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4337:21: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4338:21: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4339:21: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4340:21: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Permutation with 3 vectors
@@ -6674,30 +6674,30 @@
uzp1 v0.2d, v1.1d, v2.1d
uzp1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR <stdin>:4289:22: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4290:21: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4291:21: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4292:21: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4293:21: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4294:21: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4295:21: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4296:17: error: invalid operand for instruction
-// CHECK-ERROR uzp1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR: ^
uzp2 v0.16b, v1.8b, v2.8b
uzp2 v0.8b, v1.4b, v2.4b
@@ -6708,30 +6708,30 @@
uzp2 v0.2d, v1.1d, v2.1d
uzp2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR <stdin>:4298:22: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4299:21: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4300:21: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4301:21: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4302:21: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4303:21: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4304:21: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4305:17: error: invalid operand for instruction
-// CHECK-ERROR uzp2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: uzp2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR: ^
zip1 v0.16b, v1.8b, v2.8b
zip1 v0.8b, v1.4b, v2.4b
@@ -6742,30 +6742,30 @@
zip1 v0.2d, v1.1d, v2.1d
zip1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR <stdin>:4307:22: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4308:21: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4309:21: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4310:21: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4311:21: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4312:21: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4313:21: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4314:17: error: invalid operand for instruction
-// CHECK-ERROR zip1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR: ^
zip2 v0.16b, v1.8b, v2.8b
zip2 v0.8b, v1.4b, v2.4b
@@ -6776,30 +6776,30 @@
zip2 v0.2d, v1.1d, v2.1d
zip2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR <stdin>:4316:22: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4317:21: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4318:21: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4319:21: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4320:21: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4321:21: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4322:21: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4323:17: error: invalid operand for instruction
-// CHECK-ERROR zip2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: zip2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR: ^
trn1 v0.16b, v1.8b, v2.8b
trn1 v0.8b, v1.4b, v2.4b
@@ -6810,30 +6810,30 @@
trn1 v0.2d, v1.1d, v2.1d
trn1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR <stdin>:4325:22: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4326:21: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4327:21: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4328:21: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4329:21: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4330:21: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4331:21: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4332:17: error: invalid operand for instruction
-// CHECK-ERROR trn1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR: ^
trn2 v0.16b, v1.8b, v2.8b
trn2 v0.8b, v1.4b, v2.4b
@@ -6844,30 +6844,30 @@
trn2 v0.2d, v1.1d, v2.1d
trn2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR <stdin>:4334:22: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4335:21: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4336:21: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4337:21: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4338:21: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4339:21: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4340:21: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR ^
-// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
-// CHECK-ERROR trn2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR: ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: trn2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Floating Point multiply (scalar, by element)
diff --git a/test/MC/AArch64/neon-facge-facgt.s b/test/MC/AArch64/neon-facge-facgt.s
index 212eda2f2092..9c10caa0f7c2 100644
--- a/test/MC/AArch64/neon-facge-facgt.s
+++ b/test/MC/AArch64/neon-facge-facgt.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -6,16 +6,24 @@
// Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
// FACLE is alias for FACGE with operands reversed
//----------------------------------------------------------------------
+ facge v0.4h, v31.4h, v16.4h
+ facge v4.8h, v7.8h, v15.8h
facge v0.2s, v31.2s, v16.2s
facge v4.4s, v7.4s, v15.4s
facge v29.2d, v2.2d, v5.2d
+ facle v0.4h, v16.4h, v31.4h
+ facle v4.8h, v15.8h, v7.8h
facle v0.2s, v16.2s, v31.2s
facle v4.4s, v15.4s, v7.4s
facle v29.2d, v5.2d, v2.2d
+// CHECK: facge v0.4h, v31.4h, v16.4h // encoding: [0xe0,0x2f,0x50,0x2e]
+// CHECK: facge v4.8h, v7.8h, v15.8h // encoding: [0xe4,0x2c,0x4f,0x6e]
// CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e]
// CHECK: facge v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xec,0x2f,0x6e]
// CHECK: facge v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xec,0x65,0x6e]
+// CHECK: facge v0.4h, v31.4h, v16.4h // encoding: [0xe0,0x2f,0x50,0x2e]
+// CHECK: facge v4.8h, v7.8h, v15.8h // encoding: [0xe4,0x2c,0x4f,0x6e]
// CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e]
// CHECK: facge v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xec,0x2f,0x6e]
// CHECK: facge v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xec,0x65,0x6e]
@@ -24,16 +32,24 @@
// Vector Absolute Compare Mask Less Than (Floating Point)
// FACLT is alias for FACGT with operands reversed
//----------------------------------------------------------------------
+ facgt v3.4h, v8.4h, v12.4h
+ facgt v31.8h, v29.8h, v28.8h
facgt v31.4s, v29.4s, v28.4s
facgt v3.2s, v8.2s, v12.2s
facgt v17.2d, v15.2d, v13.2d
+ faclt v3.4h, v12.4h, v8.4h
+ faclt v31.8h, v28.8h, v29.8h
faclt v31.4s, v28.4s, v29.4s
faclt v3.2s, v12.2s, v8.2s
faclt v17.2d, v13.2d, v15.2d
+// CHECK: facgt v3.4h, v8.4h, v12.4h // encoding: [0x03,0x2d,0xcc,0x2e]
+// CHECK: facgt v31.8h, v29.8h, v28.8h // encoding: [0xbf,0x2f,0xdc,0x6e]
// CHECK: facgt v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xef,0xbc,0x6e]
// CHECK: facgt v3.2s, v8.2s, v12.2s // encoding: [0x03,0xed,0xac,0x2e]
// CHECK: facgt v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xed,0xed,0x6e]
+// CHECK: facgt v3.4h, v8.4h, v12.4h // encoding: [0x03,0x2d,0xcc,0x2e]
+// CHECK: facgt v31.8h, v29.8h, v28.8h // encoding: [0xbf,0x2f,0xdc,0x6e]
// CHECK: facgt v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xef,0xbc,0x6e]
// CHECK: facgt v3.2s, v8.2s, v12.2s // encoding: [0x03,0xed,0xac,0x2e]
// CHECK: facgt v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xed,0xed,0x6e]
diff --git a/test/MC/AArch64/neon-frsqrt-frecp.s b/test/MC/AArch64/neon-frsqrt-frecp.s
index 79fe5da5e76f..67a1340ecc32 100644
--- a/test/MC/AArch64/neon-frsqrt-frecp.s
+++ b/test/MC/AArch64/neon-frsqrt-frecp.s
@@ -1,14 +1,18 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//----------------------------------------------------------------------
// Vector Reciprocal Square Root Step (Floating Point)
//----------------------------------------------------------------------
+ frsqrts v0.4h, v31.4h, v16.4h
+ frsqrts v4.8h, v7.8h, v15.8h
frsqrts v0.2s, v31.2s, v16.2s
frsqrts v4.4s, v7.4s, v15.4s
frsqrts v29.2d, v2.2d, v5.2d
+// CHECK: frsqrts v0.4h, v31.4h, v16.4h // encoding: [0xe0,0x3f,0xd0,0x0e]
+// CHECK: frsqrts v4.8h, v7.8h, v15.8h // encoding: [0xe4,0x3c,0xcf,0x4e]
// CHECK: frsqrts v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xff,0xb0,0x0e]
// CHECK: frsqrts v4.4s, v7.4s, v15.4s // encoding: [0xe4,0xfc,0xaf,0x4e]
// CHECK: frsqrts v29.2d, v2.2d, v5.2d // encoding: [0x5d,0xfc,0xe5,0x4e]
@@ -16,10 +20,14 @@
//----------------------------------------------------------------------
// Vector Reciprocal Step (Floating Point)
//----------------------------------------------------------------------
+ frecps v3.4h, v8.4h, v12.4h
+ frecps v31.8h, v29.8h, v28.8h
frecps v31.4s, v29.4s, v28.4s
frecps v3.2s, v8.2s, v12.2s
frecps v17.2d, v15.2d, v13.2d
+// CHECK: frecps v3.4h, v8.4h, v12.4h // encoding: [0x03,0x3d,0x4c,0x0e]
+// CHECK: frecps v31.8h, v29.8h, v28.8h // encoding: [0xbf,0x3f,0x5c,0x4e]
// CHECK: frecps v31.4s, v29.4s, v28.4s // encoding: [0xbf,0xff,0x3c,0x4e]
// CHECK: frecps v3.2s, v8.2s, v12.2s // encoding: [0x03,0xfd,0x2c,0x0e]
// CHECK: frecps v17.2d, v15.2d, v13.2d // encoding: [0xf1,0xfd,0x6d,0x4e]
diff --git a/test/MC/AArch64/neon-max-min-pairwise.s b/test/MC/AArch64/neon-max-min-pairwise.s
index 8d2dadb1997f..27cf4c8d830a 100644
--- a/test/MC/AArch64/neon-max-min-pairwise.s
+++ b/test/MC/AArch64/neon-max-min-pairwise.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -67,10 +67,14 @@
//----------------------------------------------------------------------
// Vector Maximum Pairwise (Floating Point)
//----------------------------------------------------------------------
+ fmaxp v0.4h, v1.4h, v2.4h
+ fmaxp v31.8h, v15.8h, v16.8h
fmaxp v0.2s, v1.2s, v2.2s
fmaxp v31.4s, v15.4s, v16.4s
fmaxp v7.2d, v8.2d, v25.2d
+// CHECK: fmaxp v0.4h, v1.4h, v2.4h // encoding: [0x20,0x34,0x42,0x2e]
+// CHECK: fmaxp v31.8h, v15.8h, v16.8h // encoding: [0xff,0x35,0x50,0x6e]
// CHECK: fmaxp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xf4,0x22,0x2e]
// CHECK: fmaxp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x6e]
// CHECK: fmaxp v7.2d, v8.2d, v25.2d // encoding: [0x07,0xf5,0x79,0x6e]
@@ -78,10 +82,14 @@
//----------------------------------------------------------------------
// Vector Minimum Pairwise (Floating Point)
//----------------------------------------------------------------------
+ fminp v10.4h, v15.4h, v22.4h
+ fminp v3.8h, v5.8h, v6.8h
fminp v10.2s, v15.2s, v22.2s
fminp v3.4s, v5.4s, v6.4s
fminp v17.2d, v13.2d, v2.2d
+// CHECK: fminp v10.4h, v15.4h, v22.4h // encoding: [0xea,0x35,0xd6,0x2e]
+// CHECK: fminp v3.8h, v5.8h, v6.8h // encoding: [0xa3,0x34,0xc6,0x6e]
// CHECK: fminp v10.2s, v15.2s, v22.2s // encoding: [0xea,0xf5,0xb6,0x2e]
// CHECK: fminp v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xf4,0xa6,0x6e]
// CHECK: fminp v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xf5,0xe2,0x6e]
@@ -89,10 +97,14 @@
//----------------------------------------------------------------------
// Vector maxNum Pairwise (Floating Point)
//----------------------------------------------------------------------
+ fmaxnmp v0.4h, v1.4h, v2.4h
+ fmaxnmp v31.8h, v15.8h, v16.8h
fmaxnmp v0.2s, v1.2s, v2.2s
fmaxnmp v31.4s, v15.4s, v16.4s
fmaxnmp v7.2d, v8.2d, v25.2d
+// CHECK: fmaxnmp v0.4h, v1.4h, v2.4h // encoding: [0x20,0x04,0x42,0x2e]
+// CHECK: fmaxnmp v31.8h, v15.8h, v16.8h // encoding: [0xff,0x05,0x50,0x6e]
// CHECK: fmaxnmp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xc4,0x22,0x2e]
// CHECK: fmaxnmp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x6e]
// CHECK: fmaxnmp v7.2d, v8.2d, v25.2d // encoding: [0x07,0xc5,0x79,0x6e]
@@ -100,10 +112,14 @@
//----------------------------------------------------------------------
// Vector minNum Pairwise (Floating Point)
//----------------------------------------------------------------------
+ fminnmp v10.4h, v15.4h, v22.4h
+ fminnmp v3.8h, v5.8h, v6.8h
fminnmp v10.2s, v15.2s, v22.2s
fminnmp v3.4s, v5.4s, v6.4s
fminnmp v17.2d, v13.2d, v2.2d
+// CHECK: fminnmp v10.4h, v15.4h, v22.4h // encoding: [0xea,0x05,0xd6,0x2e]
+// CHECK: fminnmp v3.8h, v5.8h, v6.8h // encoding: [0xa3,0x04,0xc6,0x6e]
// CHECK: fminnmp v10.2s, v15.2s, v22.2s // encoding: [0xea,0xc5,0xb6,0x2e]
// CHECK: fminnmp v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xc4,0xa6,0x6e]
// CHECK: fminnmp v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xc5,0xe2,0x6e]
diff --git a/test/MC/AArch64/neon-max-min.s b/test/MC/AArch64/neon-max-min.s
index 6d1efde5077f..c4bd74d98882 100644
--- a/test/MC/AArch64/neon-max-min.s
+++ b/test/MC/AArch64/neon-max-min.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -67,10 +67,14 @@
//----------------------------------------------------------------------
// Vector Maximum (Floating Point)
//----------------------------------------------------------------------
+ fmax v0.4h, v1.4h, v2.4h
+ fmax v0.8h, v1.8h, v2.8h
fmax v0.2s, v1.2s, v2.2s
fmax v31.4s, v15.4s, v16.4s
fmax v7.2d, v8.2d, v25.2d
+// CHECK: fmax v0.4h, v1.4h, v2.4h // encoding: [0x20,0x34,0x42,0x0e]
+// CHECK: fmax v0.8h, v1.8h, v2.8h // encoding: [0x20,0x34,0x42,0x4e]
// CHECK: fmax v0.2s, v1.2s, v2.2s // encoding: [0x20,0xf4,0x22,0x0e]
// CHECK: fmax v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x4e]
// CHECK: fmax v7.2d, v8.2d, v25.2d // encoding: [0x07,0xf5,0x79,0x4e]
@@ -78,10 +82,14 @@
//----------------------------------------------------------------------
// Vector Minimum (Floating Point)
//----------------------------------------------------------------------
+ fmin v10.4h, v15.4h, v22.4h
+ fmin v10.8h, v15.8h, v22.8h
fmin v10.2s, v15.2s, v22.2s
fmin v3.4s, v5.4s, v6.4s
fmin v17.2d, v13.2d, v2.2d
+// CHECK: fmin v10.4h, v15.4h, v22.4h // encoding: [0xea,0x35,0xd6,0x0e]
+// CHECK: fmin v10.8h, v15.8h, v22.8h // encoding: [0xea,0x35,0xd6,0x4e]
// CHECK: fmin v10.2s, v15.2s, v22.2s // encoding: [0xea,0xf5,0xb6,0x0e]
// CHECK: fmin v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xf4,0xa6,0x4e]
// CHECK: fmin v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xf5,0xe2,0x4e]
@@ -89,10 +97,14 @@
//----------------------------------------------------------------------
// Vector maxNum (Floating Point)
//----------------------------------------------------------------------
+ fmaxnm v0.4h, v1.4h, v2.4h
+ fmaxnm v0.8h, v1.8h, v2.8h
fmaxnm v0.2s, v1.2s, v2.2s
fmaxnm v31.4s, v15.4s, v16.4s
fmaxnm v7.2d, v8.2d, v25.2d
+// CHECK: fmaxnm v0.4h, v1.4h, v2.4h // encoding: [0x20,0x04,0x42,0x0e]
+// CHECK: fmaxnm v0.8h, v1.8h, v2.8h // encoding: [0x20,0x04,0x42,0x4e]
// CHECK: fmaxnm v0.2s, v1.2s, v2.2s // encoding: [0x20,0xc4,0x22,0x0e]
// CHECK: fmaxnm v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x4e]
// CHECK: fmaxnm v7.2d, v8.2d, v25.2d // encoding: [0x07,0xc5,0x79,0x4e]
@@ -100,10 +112,14 @@
//----------------------------------------------------------------------
// Vector minNum (Floating Point)
//----------------------------------------------------------------------
+ fminnm v10.4h, v15.4h, v22.4h
+ fminnm v10.8h, v15.8h, v22.8h
fminnm v10.2s, v15.2s, v22.2s
fminnm v3.4s, v5.4s, v6.4s
fminnm v17.2d, v13.2d, v2.2d
+// CHECK: fminnm v10.4h, v15.4h, v22.4h // encoding: [0xea,0x05,0xd6,0x0e]
+// CHECK: fminnm v10.8h, v15.8h, v22.8h // encoding: [0xea,0x05,0xd6,0x4e]
// CHECK: fminnm v10.2s, v15.2s, v22.2s // encoding: [0xea,0xc5,0xb6,0x0e]
// CHECK: fminnm v3.4s, v5.4s, v6.4s // encoding: [0xa3,0xc4,0xa6,0x4e]
// CHECK: fminnm v17.2d, v13.2d, v2.2d // encoding: [0xb1,0xc5,0xe2,0x4e]
diff --git a/test/MC/AArch64/neon-mla-mls-instructions.s b/test/MC/AArch64/neon-mla-mls-instructions.s
index 3072e6f1200d..a510fc8c7b91 100644
--- a/test/MC/AArch64/neon-mla-mls-instructions.s
+++ b/test/MC/AArch64/neon-mla-mls-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -40,10 +40,14 @@
//----------------------------------------------------------------------
// Vector Floating-Point Multiply-accumulate
//----------------------------------------------------------------------
+ fmla v0.4h, v1.4h, v2.4h
+ fmla v0.8h, v1.8h, v2.8h
fmla v0.2s, v1.2s, v2.2s
fmla v0.4s, v1.4s, v2.4s
fmla v0.2d, v1.2d, v2.2d
+// CHECK: fmla v0.4h, v1.4h, v2.4h // encoding: [0x20,0x0c,0x42,0x0e]
+// CHECK: fmla v0.8h, v1.8h, v2.8h // encoding: [0x20,0x0c,0x42,0x4e]
// CHECK: fmla v0.2s, v1.2s, v2.2s // encoding: [0x20,0xcc,0x22,0x0e]
// CHECK: fmla v0.4s, v1.4s, v2.4s // encoding: [0x20,0xcc,0x22,0x4e]
// CHECK: fmla v0.2d, v1.2d, v2.2d // encoding: [0x20,0xcc,0x62,0x4e]
@@ -51,10 +55,14 @@
//----------------------------------------------------------------------
// Vector Floating-Point Multiply-subtract
//----------------------------------------------------------------------
+ fmls v0.4h, v1.4h, v2.4h
+ fmls v0.8h, v1.8h, v2.8h
fmls v0.2s, v1.2s, v2.2s
fmls v0.4s, v1.4s, v2.4s
fmls v0.2d, v1.2d, v2.2d
+// CHECK: fmls v0.4h, v1.4h, v2.4h // encoding: [0x20,0x0c,0xc2,0x0e]
+// CHECK: fmls v0.8h, v1.8h, v2.8h // encoding: [0x20,0x0c,0xc2,0x4e]
// CHECK: fmls v0.2s, v1.2s, v2.2s // encoding: [0x20,0xcc,0xa2,0x0e]
// CHECK: fmls v0.4s, v1.4s, v2.4s // encoding: [0x20,0xcc,0xa2,0x4e]
// CHECK: fmls v0.2d, v1.2d, v2.2d // encoding: [0x20,0xcc,0xe2,0x4e]
diff --git a/test/MC/AArch64/neon-scalar-abs.s b/test/MC/AArch64/neon-scalar-abs.s
index d08756c0c10c..71130617848f 100644
--- a/test/MC/AArch64/neon-scalar-abs.s
+++ b/test/MC/AArch64/neon-scalar-abs.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -14,9 +14,11 @@
// Scalar Floating-point Absolute Difference
//----------------------------------------------------------------------
+ fabd h29, h24, h20
fabd s29, s24, s20
fabd d29, d24, d20
+// CHECK: fabd h29, h24, h20 // encoding: [0x1d,0x17,0xd4,0x7e]
// CHECK: fabd s29, s24, s20 // encoding: [0x1d,0xd7,0xb4,0x7e]
// CHECK: fabd d29, d24, d20 // encoding: [0x1d,0xd7,0xf4,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mla.s b/test/MC/AArch64/neon-scalar-by-elem-mla.s
index fec9d12d8b8d..394fda673e20 100644
--- a/test/MC/AArch64/neon-scalar-by-elem-mla.s
+++ b/test/MC/AArch64/neon-scalar-by-elem-mla.s
@@ -1,8 +1,9 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
//------------------------------------------------------------------------------
// Floating Point fused multiply-add (scalar, by element)
//------------------------------------------------------------------------------
+ fmla h0, h1, v1.h[5]
fmla s0, s1, v1.s[0]
fmla s30, s11, v1.s[1]
fmla s4, s5, v7.s[2]
@@ -10,6 +11,7 @@
fmla d0, d1, v1.d[0]
fmla d30, d11, v1.d[1]
+// CHECK: fmla h0, h1, v1.h[5] // encoding: [0x20,0x18,0x11,0x5f]
// CHECK: fmla s0, s1, v1.s[0] // encoding: [0x20,0x10,0x81,0x5f]
// CHECK: fmla s30, s11, v1.s[1] // encoding: [0x7e,0x11,0xa1,0x5f]
// CHECK: fmla s4, s5, v7.s[2] // encoding: [0xa4,0x18,0x87,0x5f]
@@ -21,6 +23,7 @@
// Floating Point fused multiply-subtract (scalar, by element)
//------------------------------------------------------------------------------
+ fmls h2, h3, v4.h[5]
fmls s2, s3, v4.s[0]
fmls s29, s10, v28.s[1]
fmls s5, s12, v23.s[2]
@@ -28,6 +31,7 @@
fmls d0, d1, v1.d[0]
fmls d30, d11, v1.d[1]
+// CHECK: fmls h2, h3, v4.h[5] // encoding: [0x62,0x58,0x14,0x5f]
// CHECK: fmls s2, s3, v4.s[0] // encoding: [0x62,0x50,0x84,0x5f]
// CHECK: fmls s29, s10, v28.s[1] // encoding: [0x5d,0x51,0xbc,0x5f]
// CHECK: fmls s5, s12, v23.s[2] // encoding: [0x85,0x59,0x97,0x5f]
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mul.s b/test/MC/AArch64/neon-scalar-by-elem-mul.s
index 8b8a3f57a9ca..0d832742a389 100644
--- a/test/MC/AArch64/neon-scalar-by-elem-mul.s
+++ b/test/MC/AArch64/neon-scalar-by-elem-mul.s
@@ -1,8 +1,9 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
//------------------------------------------------------------------------------
// Floating Point multiply (scalar, by element)
//------------------------------------------------------------------------------
+ fmul h0, h1, v1.h[5]
fmul s0, s1, v1.s[0]
fmul s30, s11, v1.s[1]
fmul s4, s5, v7.s[2]
@@ -10,6 +11,7 @@
fmul d0, d1, v1.d[0]
fmul d30, d11, v1.d[1]
+// CHECK: fmul h0, h1, v1.h[5] // encoding: [0x20,0x98,0x11,0x5f]
// CHECK: fmul s0, s1, v1.s[0] // encoding: [0x20,0x90,0x81,0x5f]
// CHECK: fmul s30, s11, v1.s[1] // encoding: [0x7e,0x91,0xa1,0x5f]
// CHECK: fmul s4, s5, v7.s[2] // encoding: [0xa4,0x98,0x87,0x5f]
@@ -21,6 +23,7 @@
//------------------------------------------------------------------------------
// Floating Point multiply extended (scalar, by element)
//------------------------------------------------------------------------------
+ fmulx h6, h2, v8.h[5]
fmulx s6, s2, v8.s[0]
fmulx s7, s3, v13.s[1]
fmulx s9, s7, v9.s[2]
@@ -28,6 +31,7 @@
fmulx d15, d9, v7.d[0]
fmulx d13, d12, v11.d[1]
+// CHECK: fmulx h6, h2, v8.h[5] // encoding: [0x46,0x98,0x18,0x7f]
// CHECK: fmulx s6, s2, v8.s[0] // encoding: [0x46,0x90,0x88,0x7f]
// CHECK: fmulx s7, s3, v13.s[1] // encoding: [0x67,0x90,0xad,0x7f]
// CHECK: fmulx s9, s7, v9.s[2] // encoding: [0xe9,0x98,0x89,0x7f]
diff --git a/test/MC/AArch64/neon-scalar-cvt.s b/test/MC/AArch64/neon-scalar-cvt.s
index 97416daf0801..3cbf6bae6758 100644
--- a/test/MC/AArch64/neon-scalar-cvt.s
+++ b/test/MC/AArch64/neon-scalar-cvt.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -6,9 +6,11 @@
// Scalar Signed Integer Convert To Floating-point
//----------------------------------------------------------------------
+ scvtf h23, h14
scvtf s22, s13
scvtf d21, d12
+// CHECK: scvtf h23, h14 // encoding: [0xd7,0xd9,0x79,0x5e]
// CHECK: scvtf s22, s13 // encoding: [0xb6,0xd9,0x21,0x5e]
// CHECK: scvtf d21, d12 // encoding: [0x95,0xd9,0x61,0x5e]
@@ -16,9 +18,11 @@
// Scalar Unsigned Integer Convert To Floating-point
//----------------------------------------------------------------------
+ ucvtf h20, h12
ucvtf s22, s13
ucvtf d21, d14
+// CHECK: ucvtf h20, h12 // encoding: [0x94,0xd9,0x79,0x7e]
// CHECK: ucvtf s22, s13 // encoding: [0xb6,0xd9,0x21,0x7e]
// CHECK: ucvtf d21, d14 // encoding: [0xd5,0xd9,0x61,0x7e]
@@ -26,9 +30,11 @@
// Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
//----------------------------------------------------------------------
+ scvtf h22, h13, #16
scvtf s22, s13, #32
scvtf d21, d12, #64
+// CHECK: scvtf h22, h13, #16 // encoding: [0xb6,0xe5,0x10,0x5f]
// CHECK: scvtf s22, s13, #32 // encoding: [0xb6,0xe5,0x20,0x5f]
// CHECK: scvtf d21, d12, #64 // encoding: [0x95,0xe5,0x40,0x5f]
@@ -36,9 +42,11 @@
// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
//----------------------------------------------------------------------
+ ucvtf h22, h13, #16
ucvtf s22, s13, #32
ucvtf d21, d14, #64
+// CHECK: ucvtf h22, h13, #16 // encoding: [0xb6,0xe5,0x10,0x7f]
// CHECK: ucvtf s22, s13, #32 // encoding: [0xb6,0xe5,0x20,0x7f]
// CHECK: ucvtf d21, d14, #64 // encoding: [0xd5,0xe5,0x40,0x7f]
@@ -46,9 +54,11 @@
// Scalar Floating-point Convert To Signed Fixed-point (Immediate)
//----------------------------------------------------------------------
+ fcvtzs h21, h12, #1
fcvtzs s21, s12, #1
fcvtzs d21, d12, #1
+// CHECK: fcvtzs h21, h12, #1 // encoding: [0x95,0xfd,0x1f,0x5f]
// CHECK: fcvtzs s21, s12, #1 // encoding: [0x95,0xfd,0x3f,0x5f]
// CHECK: fcvtzs d21, d12, #1 // encoding: [0x95,0xfd,0x7f,0x5f]
@@ -56,9 +66,11 @@
// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
//----------------------------------------------------------------------
+ fcvtzu h21, h12, #1
fcvtzu s21, s12, #1
fcvtzu d21, d12, #1
+// CHECK: fcvtzu h21, h12, #1 // encoding: [0x95,0xfd,0x1f,0x7f]
// CHECK: fcvtzu s21, s12, #1 // encoding: [0x95,0xfd,0x3f,0x7f]
// CHECK: fcvtzu d21, d12, #1 // encoding: [0x95,0xfd,0x7f,0x7f]
@@ -76,9 +88,11 @@
// With Ties To Away
//----------------------------------------------------------------------
+ fcvtas h12, h13
fcvtas s12, s13
fcvtas d21, d14
+// CHECK: fcvtas h12, h13 // encoding: [0xac,0xc9,0x79,0x5e]
// CHECK: fcvtas s12, s13 // encoding: [0xac,0xc9,0x21,0x5e]
// CHECK: fcvtas d21, d14 // encoding: [0xd5,0xc9,0x61,0x5e]
@@ -87,9 +101,11 @@
// Nearest With Ties To Away
//----------------------------------------------------------------------
+ fcvtau h12, h13
fcvtau s12, s13
fcvtau d21, d14
+// CHECK: fcvtau h12, h13 // encoding: [0xac,0xc9,0x79,0x7e]
// CHECK: fcvtau s12, s13 // encoding: [0xac,0xc9,0x21,0x7e]
// CHECK: fcvtau d21, d14 // encoding: [0xd5,0xc9,0x61,0x7e]
@@ -98,9 +114,11 @@
// Minus Infinity
//----------------------------------------------------------------------
+ fcvtms h22, h13
fcvtms s22, s13
fcvtms d21, d14
+// CHECK: fcvtms h22, h13 // encoding: [0xb6,0xb9,0x79,0x5e]
// CHECK: fcvtms s22, s13 // encoding: [0xb6,0xb9,0x21,0x5e]
// CHECK: fcvtms d21, d14 // encoding: [0xd5,0xb9,0x61,0x5e]
@@ -109,9 +127,11 @@
// Minus Infinity
//----------------------------------------------------------------------
+ fcvtmu h12, h13
fcvtmu s12, s13
fcvtmu d21, d14
+// CHECK: fcvtmu h12, h13 // encoding: [0xac,0xb9,0x79,0x7e]
// CHECK: fcvtmu s12, s13 // encoding: [0xac,0xb9,0x21,0x7e]
// CHECK: fcvtmu d21, d14 // encoding: [0xd5,0xb9,0x61,0x7e]
@@ -120,9 +140,11 @@
// With Ties To Even
//----------------------------------------------------------------------
+ fcvtns h22, h13
fcvtns s22, s13
fcvtns d21, d14
+// CHECK: fcvtns h22, h13 // encoding: [0xb6,0xa9,0x79,0x5e]
// CHECK: fcvtns s22, s13 // encoding: [0xb6,0xa9,0x21,0x5e]
// CHECK: fcvtns d21, d14 // encoding: [0xd5,0xa9,0x61,0x5e]
@@ -131,9 +153,11 @@
// Nearest With Ties To Even
//----------------------------------------------------------------------
+ fcvtnu h12, h13
fcvtnu s12, s13
fcvtnu d21, d14
+// CHECK: fcvtnu h12, h13 // encoding: [0xac,0xa9,0x79,0x7e]
// CHECK: fcvtnu s12, s13 // encoding: [0xac,0xa9,0x21,0x7e]
// CHECK: fcvtnu d21, d14 // encoding: [0xd5,0xa9,0x61,0x7e]
@@ -142,9 +166,11 @@
// Positive Infinity
//----------------------------------------------------------------------
+ fcvtps h22, h13
fcvtps s22, s13
fcvtps d21, d14
+// CHECK: fcvtps h22, h13 // encoding: [0xb6,0xa9,0xf9,0x5e]
// CHECK: fcvtps s22, s13 // encoding: [0xb6,0xa9,0xa1,0x5e]
// CHECK: fcvtps d21, d14 // encoding: [0xd5,0xa9,0xe1,0x5e]
@@ -153,9 +179,11 @@
// Positive Infinity
//----------------------------------------------------------------------
+ fcvtpu h12, h13
fcvtpu s12, s13
fcvtpu d21, d14
+// CHECK: fcvtpu h12, h13 // encoding: [0xac,0xa9,0xf9,0x7e]
// CHECK: fcvtpu s12, s13 // encoding: [0xac,0xa9,0xa1,0x7e]
// CHECK: fcvtpu d21, d14 // encoding: [0xd5,0xa9,0xe1,0x7e]
@@ -163,9 +191,11 @@
// Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
//----------------------------------------------------------------------
+ fcvtzs h12, h13
fcvtzs s12, s13
fcvtzs d21, d14
+// CHECK: fcvtzs h12, h13 // encoding: [0xac,0xb9,0xf9,0x5e]
// CHECK: fcvtzs s12, s13 // encoding: [0xac,0xb9,0xa1,0x5e]
// CHECK: fcvtzs d21, d14 // encoding: [0xd5,0xb9,0xe1,0x5e]
@@ -174,8 +204,10 @@
// Zero
//----------------------------------------------------------------------
+ fcvtzu h12, h13
fcvtzu s12, s13
fcvtzu d21, d14
+// CHECK: fcvtzu h12, h13 // encoding: [0xac,0xb9,0xf9,0x7e]
// CHECK: fcvtzu s12, s13 // encoding: [0xac,0xb9,0xa1,0x7e]
// CHECK: fcvtzu d21, d14 // encoding: [0xd5,0xb9,0xe1,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-fp-compare.s b/test/MC/AArch64/neon-scalar-fp-compare.s
index b798b3410670..0b91d945a719 100644
--- a/test/MC/AArch64/neon-scalar-fp-compare.s
+++ b/test/MC/AArch64/neon-scalar-fp-compare.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -6,9 +6,11 @@
// Scalar Floating-point Compare Mask Equal
//----------------------------------------------------------------------
+ fcmeq h10, h11, h12
fcmeq s10, s11, s12
fcmeq d20, d21, d22
+// CHECK: fcmeq h10, h11, h12 // encoding: [0x6a,0x25,0x4c,0x5e]
// CHECK: fcmeq s10, s11, s12 // encoding: [0x6a,0xe5,0x2c,0x5e]
// CHECK: fcmeq d20, d21, d22 // encoding: [0xb4,0xe6,0x76,0x5e]
@@ -16,13 +18,17 @@
// Scalar Floating-point Compare Mask Equal To Zero
//----------------------------------------------------------------------
+ fcmeq h10, h11, #0.0
fcmeq s10, s11, #0.0
fcmeq d20, d21, #0.0
+ fcmeq h10, h11, #0
fcmeq s10, s11, #0
fcmeq d20, d21, #0x0
+// CHECK: fcmeq h10, h11, #0.0 // encoding: [0x6a,0xd9,0xf8,0x5e]
// CHECK: fcmeq s10, s11, #0.0 // encoding: [0x6a,0xd9,0xa0,0x5e]
// CHECK: fcmeq d20, d21, #0.0 // encoding: [0xb4,0xda,0xe0,0x5e]
+// CHECK: fcmeq h10, h11, #0.0 // encoding: [0x6a,0xd9,0xf8,0x5e]
// CHECK: fcmeq s10, s11, #0.0 // encoding: [0x6a,0xd9,0xa0,0x5e]
// CHECK: fcmeq d20, d21, #0.0 // encoding: [0xb4,0xda,0xe0,0x5e]
@@ -30,9 +36,11 @@
// Scalar Floating-point Compare Mask Greater Than Or Equal
//----------------------------------------------------------------------
+ fcmge h10, h11, h12
fcmge s10, s11, s12
fcmge d20, d21, d22
+// CHECK: fcmge h10, h11, h12 // encoding: [0x6a,0x25,0x4c,0x7e]
// CHECK: fcmge s10, s11, s12 // encoding: [0x6a,0xe5,0x2c,0x7e]
// CHECK: fcmge d20, d21, d22 // encoding: [0xb4,0xe6,0x76,0x7e]
@@ -40,13 +48,17 @@
// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
//----------------------------------------------------------------------
+ fcmge h10, h11, #0.0
fcmge s10, s11, #0.0
fcmge d20, d21, #0.0
+ fcmge h10, h11, #0
fcmge s10, s11, #0
fcmge d20, d21, #0x0
+// CHECK: fcmge h10, h11, #0.0 // encoding: [0x6a,0xc9,0xf8,0x7e]
// CHECK: fcmge s10, s11, #0.0 // encoding: [0x6a,0xc9,0xa0,0x7e]
// CHECK: fcmge d20, d21, #0.0 // encoding: [0xb4,0xca,0xe0,0x7e]
+// CHECK: fcmge h10, h11, #0.0 // encoding: [0x6a,0xc9,0xf8,0x7e]
// CHECK: fcmge s10, s11, #0.0 // encoding: [0x6a,0xc9,0xa0,0x7e]
// CHECK: fcmge d20, d21, #0.0 // encoding: [0xb4,0xca,0xe0,0x7e]
@@ -54,9 +66,11 @@
// Scalar Floating-point Compare Mask Greather Than
//----------------------------------------------------------------------
+ fcmgt h10, h11, h12
fcmgt s10, s11, s12
fcmgt d20, d21, d22
+// CHECK: fcmgt h10, h11, h12 // encoding: [0x6a,0x25,0xcc,0x7e]
// CHECK: fcmgt s10, s11, s12 // encoding: [0x6a,0xe5,0xac,0x7e]
// CHECK: fcmgt d20, d21, d22 // encoding: [0xb4,0xe6,0xf6,0x7e]
@@ -64,13 +78,17 @@
// Scalar Floating-point Compare Mask Greather Than Zero
//----------------------------------------------------------------------
+ fcmgt h10, h11, #0.0
fcmgt s10, s11, #0.0
fcmgt d20, d21, #0.0
+ fcmgt h10, h11, #0
fcmgt s10, s11, #0
fcmgt d20, d21, #0x0
+// CHECK: fcmgt h10, h11, #0.0 // encoding: [0x6a,0xc9,0xf8,0x5e]
// CHECK: fcmgt s10, s11, #0.0 // encoding: [0x6a,0xc9,0xa0,0x5e]
// CHECK: fcmgt d20, d21, #0.0 // encoding: [0xb4,0xca,0xe0,0x5e]
+// CHECK: fcmgt h10, h11, #0.0 // encoding: [0x6a,0xc9,0xf8,0x5e]
// CHECK: fcmgt s10, s11, #0.0 // encoding: [0x6a,0xc9,0xa0,0x5e]
// CHECK: fcmgt d20, d21, #0.0 // encoding: [0xb4,0xca,0xe0,0x5e]
@@ -78,13 +96,17 @@
// Scalar Floating-point Compare Mask Less Than Or Equal To Zero
//----------------------------------------------------------------------
+ fcmle h10, h11, #0.0
fcmle s10, s11, #0.0
fcmle d20, d21, #0.0
+ fcmle h10, h11, #0
fcmle s10, s11, #0
fcmle d20, d21, #0x0
+// CHECK: fcmle h10, h11, #0.0 // encoding: [0x6a,0xd9,0xf8,0x7e]
// CHECK: fcmle s10, s11, #0.0 // encoding: [0x6a,0xd9,0xa0,0x7e]
// CHECK: fcmle d20, d21, #0.0 // encoding: [0xb4,0xda,0xe0,0x7e]
+// CHECK: fcmle h10, h11, #0.0 // encoding: [0x6a,0xd9,0xf8,0x7e]
// CHECK: fcmle s10, s11, #0.0 // encoding: [0x6a,0xd9,0xa0,0x7e]
// CHECK: fcmle d20, d21, #0.0 // encoding: [0xb4,0xda,0xe0,0x7e]
@@ -92,13 +114,17 @@
// Scalar Floating-point Compare Mask Less Than
//----------------------------------------------------------------------
+ fcmlt h10, h11, #0.0
fcmlt s10, s11, #0.0
fcmlt d20, d21, #0.0
+ fcmlt h10, h11, #0
fcmlt s10, s11, #0
fcmlt d20, d21, #0x0
+// CHECK: fcmlt h10, h11, #0.0 // encoding: [0x6a,0xe9,0xf8,0x5e]
// CHECK: fcmlt s10, s11, #0.0 // encoding: [0x6a,0xe9,0xa0,0x5e]
// CHECK: fcmlt d20, d21, #0.0 // encoding: [0xb4,0xea,0xe0,0x5e]
+// CHECK: fcmlt h10, h11, #0.0 // encoding: [0x6a,0xe9,0xf8,0x5e]
// CHECK: fcmlt s10, s11, #0.0 // encoding: [0x6a,0xe9,0xa0,0x5e]
// CHECK: fcmlt d20, d21, #0.0 // encoding: [0xb4,0xea,0xe0,0x5e]
@@ -106,9 +132,11 @@
// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
//----------------------------------------------------------------------
+ facge h10, h11, h12
facge s10, s11, s12
facge d20, d21, d22
+// CHECK: facge h10, h11, h12 // encoding: [0x6a,0x2d,0x4c,0x7e]
// CHECK: facge s10, s11, s12 // encoding: [0x6a,0xed,0x2c,0x7e]
// CHECK: facge d20, d21, d22 // encoding: [0xb4,0xee,0x76,0x7e]
@@ -116,8 +144,10 @@
// Scalar Floating-point Absolute Compare Mask Greater Than
//----------------------------------------------------------------------
+ facgt h10, h11, h12
facgt s10, s11, s12
facgt d20, d21, d22
+// CHECK: facgt h10, h11, h12 // encoding: [0x6a,0x2d,0xcc,0x7e]
// CHECK: facgt s10, s11, s12 // encoding: [0x6a,0xed,0xac,0x7e]
// CHECK: facgt d20, d21, d22 // encoding: [0xb4,0xee,0xf6,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-mul.s b/test/MC/AArch64/neon-scalar-mul.s
index e33bdad91a94..323fad206c4d 100644
--- a/test/MC/AArch64/neon-scalar-mul.s
+++ b/test/MC/AArch64/neon-scalar-mul.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -26,9 +26,11 @@
// Floating-point Multiply Extended
//----------------------------------------------------------------------
+ fmulx h20, h22, h15
fmulx s20, s22, s15
fmulx d23, d11, d1
+// CHECK: fmulx h20, h22, h15 // encoding: [0xd4,0x1e,0x4f,0x5e]
// CHECK: fmulx s20, s22, s15 // encoding: [0xd4,0xde,0x2f,0x5e]
// CHECK: fmulx d23, d11, d1 // encoding: [0x77,0xdd,0x61,0x5e]
diff --git a/test/MC/AArch64/neon-scalar-recip.s b/test/MC/AArch64/neon-scalar-recip.s
index 7a886f3b4a73..923c3549d6f0 100644
--- a/test/MC/AArch64/neon-scalar-recip.s
+++ b/test/MC/AArch64/neon-scalar-recip.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -6,9 +6,11 @@
// Floating-point Reciprocal Step
//----------------------------------------------------------------------
+ frecps h21, h16, h13
frecps s21, s16, s13
frecps d22, d30, d21
+// CHECK: frecps h21, h16, h13 // encoding: [0x15,0x3e,0x4d,0x5e]
// CHECK: frecps s21, s16, s13 // encoding: [0x15,0xfe,0x2d,0x5e]
// CHECK: frecps d22, d30, d21 // encoding: [0xd6,0xff,0x75,0x5e]
@@ -16,9 +18,11 @@
// Floating-point Reciprocal Square Root Step
//----------------------------------------------------------------------
+ frsqrts h21, h5, h12
frsqrts s21, s5, s12
frsqrts d8, d22, d18
+// CHECK: frsqrts h21, h5, h12 // encoding: [0xb5,0x3c,0xcc,0x5e]
// CHECK: frsqrts s21, s5, s12 // encoding: [0xb5,0xfc,0xac,0x5e]
// CHECK: frsqrts d8, d22, d18 // encoding: [0xc8,0xfe,0xf2,0x5e]
@@ -26,9 +30,11 @@
// Scalar Floating-point Reciprocal Estimate
//----------------------------------------------------------------------
+ frecpe h19, h14
frecpe s19, s14
frecpe d13, d13
+// CHECK: frecpe h19, h14 // encoding: [0xd3,0xd9,0xf9,0x5e]
// CHECK: frecpe s19, s14 // encoding: [0xd3,0xd9,0xa1,0x5e]
// CHECK: frecpe d13, d13 // encoding: [0xad,0xd9,0xe1,0x5e]
@@ -36,9 +42,11 @@
// Scalar Floating-point Reciprocal Exponent
//----------------------------------------------------------------------
+ frecpx h18, h10
frecpx s18, s10
frecpx d16, d19
+// CHECK: frecpx h18, h10 // encoding: [0x52,0xf9,0xf9,0x5e]
// CHECK: frecpx s18, s10 // encoding: [0x52,0xf9,0xa1,0x5e]
// CHECK: frecpx d16, d19 // encoding: [0x70,0xfa,0xe1,0x5e]
@@ -46,8 +54,10 @@
// Scalar Floating-point Reciprocal Square Root Estimate
//----------------------------------------------------------------------
+ frsqrte h22, h13
frsqrte s22, s13
frsqrte d21, d12
+// CHECK: frsqrte h22, h13 // encoding: [0xb6,0xd9,0xf9,0x7e]
// CHECK: frsqrte s22, s13 // encoding: [0xb6,0xd9,0xa1,0x7e]
// CHECK: frsqrte d21, d12 // encoding: [0x95,0xd9,0xe1,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-reduce-pairwise.s b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
index 403a940ec2f2..dae61d0f0f32 100644
--- a/test/MC/AArch64/neon-scalar-reduce-pairwise.s
+++ b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
//----------------------------------------------------------------------
// Scalar Reduce Add Pairwise (Integer)
@@ -10,7 +10,12 @@
//----------------------------------------------------------------------
// Scalar Reduce Add Pairwise (Floating Point)
//----------------------------------------------------------------------
+ faddp h18, v3.2h
+ faddp h18, v3.2H
+ faddp s19, v2.2s
faddp d20, v1.2d
+// CHECK: faddp h18, v3.2h // encoding: [0x72,0xd8,0x30,0x5e]
+// CHECK: faddp s19, v2.2s // encoding: [0x53,0xd8,0x30,0x7e]
// CHECK: faddp d20, v1.2d // encoding: [0x34,0xd8,0x70,0x7e]
diff --git a/test/MC/AArch64/neon-simd-misc.s b/test/MC/AArch64/neon-simd-misc.s
index 6d1aafdd7725..32dd48629cd8 100644
--- a/test/MC/AArch64/neon-simd-misc.s
+++ b/test/MC/AArch64/neon-simd-misc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -298,10 +298,14 @@
// Floating-point absolute
//------------------------------------------------------------------------------
+ fabs v4.4h, v0.4h
+ fabs v6.8h, v8.8h
fabs v6.4s, v8.4s
fabs v6.2d, v8.2d
fabs v4.2s, v0.2s
+// CHECK: fabs v4.4h, v0.4h // encoding: [0x04,0xf8,0xf8,0x0e]
+// CHECK: fabs v6.8h, v8.8h // encoding: [0x06,0xf9,0xf8,0x4e]
// CHECK: fabs v6.4s, v8.4s // encoding: [0x06,0xf9,0xa0,0x4e]
// CHECK: fabs v6.2d, v8.2d // encoding: [0x06,0xf9,0xe0,0x4e]
// CHECK: fabs v4.2s, v0.2s // encoding: [0x04,0xf8,0xa0,0x0e]
@@ -310,10 +314,14 @@
// Floating-point negate
//------------------------------------------------------------------------------
+ fneg v4.4h, v0.4h
+ fneg v6.8h, v8.8h
fneg v6.4s, v8.4s
fneg v6.2d, v8.2d
fneg v4.2s, v0.2s
+// CHECK: fneg v4.4h, v0.4h // encoding: [0x04,0xf8,0xf8,0x2e]
+// CHECK: fneg v6.8h, v8.8h // encoding: [0x06,0xf9,0xf8,0x6e]
// CHECK: fneg v6.4s, v8.4s // encoding: [0x06,0xf9,0xa0,0x6e]
// CHECK: fneg v6.2d, v8.2d // encoding: [0x06,0xf9,0xe0,0x6e]
// CHECK: fneg v4.2s, v0.2s // encoding: [0x04,0xf8,0xa0,0x2e]
@@ -450,58 +458,86 @@
// Floating-point round to integral
//------------------------------------------------------------------------------
+ frintn v4.4h, v0.4h
+ frintn v6.8h, v8.8h
frintn v6.4s, v8.4s
frintn v6.2d, v8.2d
frintn v4.2s, v0.2s
+// CHECK: frintn v4.4h, v0.4h // encoding: [0x04,0x88,0x79,0x0e]
+// CHECK: frintn v6.8h, v8.8h // encoding: [0x06,0x89,0x79,0x4e]
// CHECK: frintn v6.4s, v8.4s // encoding: [0x06,0x89,0x21,0x4e]
// CHECK: frintn v6.2d, v8.2d // encoding: [0x06,0x89,0x61,0x4e]
// CHECK: frintn v4.2s, v0.2s // encoding: [0x04,0x88,0x21,0x0e]
+ frinta v4.4h, v0.4h
+ frinta v6.8h, v8.8h
frinta v6.4s, v8.4s
frinta v6.2d, v8.2d
frinta v4.2s, v0.2s
+// CHECK: frinta v4.4h, v0.4h // encoding: [0x04,0x88,0x79,0x2e]
+// CHECK: frinta v6.8h, v8.8h // encoding: [0x06,0x89,0x79,0x6e]
// CHECK: frinta v6.4s, v8.4s // encoding: [0x06,0x89,0x21,0x6e]
// CHECK: frinta v6.2d, v8.2d // encoding: [0x06,0x89,0x61,0x6e]
// CHECK: frinta v4.2s, v0.2s // encoding: [0x04,0x88,0x21,0x2e]
+ frintp v4.4h, v0.4h
+ frintp v6.8h, v8.8h
frintp v6.4s, v8.4s
frintp v6.2d, v8.2d
frintp v4.2s, v0.2s
+// CHECK: frintp v4.4h, v0.4h // encoding: [0x04,0x88,0xf9,0x0e]
+// CHECK: frintp v6.8h, v8.8h // encoding: [0x06,0x89,0xf9,0x4e]
// CHECK: frintp v6.4s, v8.4s // encoding: [0x06,0x89,0xa1,0x4e]
// CHECK: frintp v6.2d, v8.2d // encoding: [0x06,0x89,0xe1,0x4e]
// CHECK: frintp v4.2s, v0.2s // encoding: [0x04,0x88,0xa1,0x0e]
+ frintm v4.4h, v0.4h
+ frintm v6.8h, v8.8h
frintm v6.4s, v8.4s
frintm v6.2d, v8.2d
frintm v4.2s, v0.2s
+// CHECK: frintm v4.4h, v0.4h // encoding: [0x04,0x98,0x79,0x0e]
+// CHECK: frintm v6.8h, v8.8h // encoding: [0x06,0x99,0x79,0x4e]
// CHECK: frintm v6.4s, v8.4s // encoding: [0x06,0x99,0x21,0x4e]
// CHECK: frintm v6.2d, v8.2d // encoding: [0x06,0x99,0x61,0x4e]
// CHECK: frintm v4.2s, v0.2s // encoding: [0x04,0x98,0x21,0x0e]
+ frintx v4.4h, v0.4h
+ frintx v6.8h, v8.8h
frintx v6.4s, v8.4s
frintx v6.2d, v8.2d
frintx v4.2s, v0.2s
+// CHECK: frintx v4.4h, v0.4h // encoding: [0x04,0x98,0x79,0x2e]
+// CHECK: frintx v6.8h, v8.8h // encoding: [0x06,0x99,0x79,0x6e]
// CHECK: frintx v6.4s, v8.4s // encoding: [0x06,0x99,0x21,0x6e]
// CHECK: frintx v6.2d, v8.2d // encoding: [0x06,0x99,0x61,0x6e]
// CHECK: frintx v4.2s, v0.2s // encoding: [0x04,0x98,0x21,0x2e]
+ frintz v4.4h, v0.4h
+ frintz v6.8h, v8.8h
frintz v6.4s, v8.4s
frintz v6.2d, v8.2d
frintz v4.2s, v0.2s
+// CHECK: frintz v4.4h, v0.4h // encoding: [0x04,0x98,0xf9,0x0e]
+// CHECK: frintz v6.8h, v8.8h // encoding: [0x06,0x99,0xf9,0x4e]
// CHECK: frintz v6.4s, v8.4s // encoding: [0x06,0x99,0xa1,0x4e]
// CHECK: frintz v6.2d, v8.2d // encoding: [0x06,0x99,0xe1,0x4e]
// CHECK: frintz v4.2s, v0.2s // encoding: [0x04,0x98,0xa1,0x0e]
+ frinti v4.4h, v0.4h
+ frinti v6.8h, v8.8h
frinti v6.4s, v8.4s
frinti v6.2d, v8.2d
frinti v4.2s, v0.2s
+// CHECK: frinti v4.4h, v0.4h // encoding: [0x04,0x98,0xf9,0x2e]
+// CHECK: frinti v6.8h, v8.8h // encoding: [0x06,0x99,0xf9,0x6e]
// CHECK: frinti v6.4s, v8.4s // encoding: [0x06,0x99,0xa1,0x6e]
// CHECK: frinti v6.2d, v8.2d // encoding: [0x06,0x99,0xe1,0x6e]
// CHECK: frinti v4.2s, v0.2s // encoding: [0x04,0x98,0xa1,0x2e]
@@ -510,83 +546,123 @@
// Floating-point convert to integer
//------------------------------------------------------------------------------
+ fcvtns v4.4h, v0.4h
+ fcvtns v6.8h, v8.8h
fcvtns v6.4s, v8.4s
fcvtns v6.2d, v8.2d
fcvtns v4.2s, v0.2s
+// CHECK: fcvtns v4.4h, v0.4h // encoding: [0x04,0xa8,0x79,0x0e]
+// CHECK: fcvtns v6.8h, v8.8h // encoding: [0x06,0xa9,0x79,0x4e]
// CHECK: fcvtns v6.4s, v8.4s // encoding: [0x06,0xa9,0x21,0x4e]
// CHECK: fcvtns v6.2d, v8.2d // encoding: [0x06,0xa9,0x61,0x4e]
// CHECK: fcvtns v4.2s, v0.2s // encoding: [0x04,0xa8,0x21,0x0e]
+ fcvtnu v4.4h, v0.4h
+ fcvtnu v6.8h, v8.8h
fcvtnu v6.4s, v8.4s
fcvtnu v6.2d, v8.2d
fcvtnu v4.2s, v0.2s
+// CHECK: fcvtnu v4.4h, v0.4h // encoding: [0x04,0xa8,0x79,0x2e]
+// CHECK: fcvtnu v6.8h, v8.8h // encoding: [0x06,0xa9,0x79,0x6e]
// CHECK: fcvtnu v6.4s, v8.4s // encoding: [0x06,0xa9,0x21,0x6e]
// CHECK: fcvtnu v6.2d, v8.2d // encoding: [0x06,0xa9,0x61,0x6e]
// CHECK: fcvtnu v4.2s, v0.2s // encoding: [0x04,0xa8,0x21,0x2e]
+ fcvtps v4.4h, v0.4h
+ fcvtps v6.8h, v8.8h
fcvtps v6.4s, v8.4s
fcvtps v6.2d, v8.2d
fcvtps v4.2s, v0.2s
+// CHECK: fcvtps v4.4h, v0.4h // encoding: [0x04,0xa8,0xf9,0x0e]
+// CHECK: fcvtps v6.8h, v8.8h // encoding: [0x06,0xa9,0xf9,0x4e]
// CHECK: fcvtps v6.4s, v8.4s // encoding: [0x06,0xa9,0xa1,0x4e]
// CHECK: fcvtps v6.2d, v8.2d // encoding: [0x06,0xa9,0xe1,0x4e]
// CHECK: fcvtps v4.2s, v0.2s // encoding: [0x04,0xa8,0xa1,0x0e]
+ fcvtpu v4.4h, v0.4h
+ fcvtpu v6.8h, v8.8h
fcvtpu v6.4s, v8.4s
fcvtpu v6.2d, v8.2d
fcvtpu v4.2s, v0.2s
+// CHECK: fcvtpu v4.4h, v0.4h // encoding: [0x04,0xa8,0xf9,0x2e]
+// CHECK: fcvtpu v6.8h, v8.8h // encoding: [0x06,0xa9,0xf9,0x6e]
// CHECK: fcvtpu v6.4s, v8.4s // encoding: [0x06,0xa9,0xa1,0x6e]
// CHECK: fcvtpu v6.2d, v8.2d // encoding: [0x06,0xa9,0xe1,0x6e]
// CHECK: fcvtpu v4.2s, v0.2s // encoding: [0x04,0xa8,0xa1,0x2e]
+ fcvtms v4.4h, v0.4h
+ fcvtms v6.8h, v8.8h
fcvtms v6.4s, v8.4s
fcvtms v6.2d, v8.2d
fcvtms v4.2s, v0.2s
+// CHECK: fcvtms v4.4h, v0.4h // encoding: [0x04,0xb8,0x79,0x0e]
+// CHECK: fcvtms v6.8h, v8.8h // encoding: [0x06,0xb9,0x79,0x4e]
// CHECK: fcvtms v6.4s, v8.4s // encoding: [0x06,0xb9,0x21,0x4e]
// CHECK: fcvtms v6.2d, v8.2d // encoding: [0x06,0xb9,0x61,0x4e]
// CHECK: fcvtms v4.2s, v0.2s // encoding: [0x04,0xb8,0x21,0x0e]
+ fcvtmu v4.4h, v0.4h
+ fcvtmu v6.8h, v8.8h
fcvtmu v6.4s, v8.4s
fcvtmu v6.2d, v8.2d
fcvtmu v4.2s, v0.2s
+// CHECK: fcvtmu v4.4h, v0.4h // encoding: [0x04,0xb8,0x79,0x2e]
+// CHECK: fcvtmu v6.8h, v8.8h // encoding: [0x06,0xb9,0x79,0x6e]
// CHECK: fcvtmu v6.4s, v8.4s // encoding: [0x06,0xb9,0x21,0x6e]
// CHECK: fcvtmu v6.2d, v8.2d // encoding: [0x06,0xb9,0x61,0x6e]
// CHECK: fcvtmu v4.2s, v0.2s // encoding: [0x04,0xb8,0x21,0x2e]
+ fcvtzs v4.4h, v0.4h
+ fcvtzs v6.8h, v8.8h
fcvtzs v6.4s, v8.4s
fcvtzs v6.2d, v8.2d
fcvtzs v4.2s, v0.2s
+// CHECK: fcvtzs v4.4h, v0.4h // encoding: [0x04,0xb8,0xf9,0x0e]
+// CHECK: fcvtzs v6.8h, v8.8h // encoding: [0x06,0xb9,0xf9,0x4e]
// CHECK: fcvtzs v6.4s, v8.4s // encoding: [0x06,0xb9,0xa1,0x4e]
// CHECK: fcvtzs v6.2d, v8.2d // encoding: [0x06,0xb9,0xe1,0x4e]
// CHECK: fcvtzs v4.2s, v0.2s // encoding: [0x04,0xb8,0xa1,0x0e]
+ fcvtzu v4.4h, v0.4h
+ fcvtzu v6.8h, v8.8h
fcvtzu v6.4s, v8.4s
fcvtzu v6.2d, v8.2d
fcvtzu v4.2s, v0.2s
+// CHECK: fcvtzu v4.4h, v0.4h // encoding: [0x04,0xb8,0xf9,0x2e]
+// CHECK: fcvtzu v6.8h, v8.8h // encoding: [0x06,0xb9,0xf9,0x6e]
// CHECK: fcvtzu v6.4s, v8.4s // encoding: [0x06,0xb9,0xa1,0x6e]
// CHECK: fcvtzu v6.2d, v8.2d // encoding: [0x06,0xb9,0xe1,0x6e]
// CHECK: fcvtzu v4.2s, v0.2s // encoding: [0x04,0xb8,0xa1,0x2e]
+ fcvtas v4.4h, v0.4h
+ fcvtas v6.8h, v8.8h
fcvtas v6.4s, v8.4s
fcvtas v6.2d, v8.2d
fcvtas v4.2s, v0.2s
+// CHECK: fcvtas v4.4h, v0.4h // encoding: [0x04,0xc8,0x79,0x0e]
+// CHECK: fcvtas v6.8h, v8.8h // encoding: [0x06,0xc9,0x79,0x4e]
// CHECK: fcvtas v6.4s, v8.4s // encoding: [0x06,0xc9,0x21,0x4e]
// CHECK: fcvtas v6.2d, v8.2d // encoding: [0x06,0xc9,0x61,0x4e]
// CHECK: fcvtas v4.2s, v0.2s // encoding: [0x04,0xc8,0x21,0x0e]
+ fcvtau v4.4h, v0.4h
+ fcvtau v6.8h, v8.8h
fcvtau v6.4s, v8.4s
fcvtau v6.2d, v8.2d
fcvtau v4.2s, v0.2s
+// CHECK: fcvtau v4.4h, v0.4h // encoding: [0x04,0xc8,0x79,0x2e]
+// CHECK: fcvtau v6.8h, v8.8h // encoding: [0x06,0xc9,0x79,0x6e]
// CHECK: fcvtau v6.4s, v8.4s // encoding: [0x06,0xc9,0x21,0x6e]
// CHECK: fcvtau v6.2d, v8.2d // encoding: [0x06,0xc9,0x61,0x6e]
// CHECK: fcvtau v4.2s, v0.2s // encoding: [0x04,0xc8,0x21,0x2e]
@@ -603,42 +679,62 @@
// CHECK: ursqrte v6.4s, v8.4s // encoding: [0x06,0xc9,0xa1,0x6e]
// CHECK: ursqrte v4.2s, v0.2s // encoding: [0x04,0xc8,0xa1,0x2e]
+ scvtf v4.4h, v0.4h
+ scvtf v6.8h, v8.8h
scvtf v6.4s, v8.4s
scvtf v6.2d, v8.2d
scvtf v4.2s, v0.2s
+// CHECK: scvtf v4.4h, v0.4h // encoding: [0x04,0xd8,0x79,0x0e]
+// CHECK: scvtf v6.8h, v8.8h // encoding: [0x06,0xd9,0x79,0x4e]
// CHECK: scvtf v6.4s, v8.4s // encoding: [0x06,0xd9,0x21,0x4e]
// CHECK: scvtf v6.2d, v8.2d // encoding: [0x06,0xd9,0x61,0x4e]
// CHECK: scvtf v4.2s, v0.2s // encoding: [0x04,0xd8,0x21,0x0e]
+ ucvtf v4.4h, v0.4h
+ ucvtf v6.8h, v8.8h
ucvtf v6.4s, v8.4s
ucvtf v6.2d, v8.2d
ucvtf v4.2s, v0.2s
+// CHECK: ucvtf v4.4h, v0.4h // encoding: [0x04,0xd8,0x79,0x2e]
+// CHECK: ucvtf v6.8h, v8.8h // encoding: [0x06,0xd9,0x79,0x6e]
// CHECK: ucvtf v6.4s, v8.4s // encoding: [0x06,0xd9,0x21,0x6e]
// CHECK: ucvtf v6.2d, v8.2d // encoding: [0x06,0xd9,0x61,0x6e]
// CHECK: ucvtf v4.2s, v0.2s // encoding: [0x04,0xd8,0x21,0x2e]
+ frecpe v4.4h, v0.4h
+ frecpe v6.8h, v8.8h
frecpe v6.4s, v8.4s
frecpe v6.2d, v8.2d
frecpe v4.2s, v0.2s
+// CHECK: frecpe v4.4h, v0.4h // encoding: [0x04,0xd8,0xf9,0x0e]
+// CHECK: frecpe v6.8h, v8.8h // encoding: [0x06,0xd9,0xf9,0x4e]
// CHECK: frecpe v6.4s, v8.4s // encoding: [0x06,0xd9,0xa1,0x4e]
// CHECK: frecpe v6.2d, v8.2d // encoding: [0x06,0xd9,0xe1,0x4e]
// CHECK: frecpe v4.2s, v0.2s // encoding: [0x04,0xd8,0xa1,0x0e]
+ frsqrte v4.4h, v0.4h
+ frsqrte v6.8h, v8.8h
frsqrte v6.4s, v8.4s
frsqrte v6.2d, v8.2d
frsqrte v4.2s, v0.2s
+// CHECK: frsqrte v4.4h, v0.4h // encoding: [0x04,0xd8,0xf9,0x2e]
+// CHECK: frsqrte v6.8h, v8.8h // encoding: [0x06,0xd9,0xf9,0x6e]
// CHECK: frsqrte v6.4s, v8.4s // encoding: [0x06,0xd9,0xa1,0x6e]
// CHECK: frsqrte v6.2d, v8.2d // encoding: [0x06,0xd9,0xe1,0x6e]
// CHECK: frsqrte v4.2s, v0.2s // encoding: [0x04,0xd8,0xa1,0x2e]
+ fsqrt v4.4h, v0.4h
+ fsqrt v6.8h, v8.8h
fsqrt v6.4s, v8.4s
fsqrt v6.2d, v8.2d
fsqrt v4.2s, v0.2s
+// CHECK: fsqrt v4.4h, v0.4h // encoding: [0x04,0xf8,0xf9,0x2e]
+// CHECK: fsqrt v6.8h, v8.8h // encoding: [0x06,0xf9,0xf9,0x6e]
// CHECK: fsqrt v6.4s, v8.4s // encoding: [0x06,0xf9,0xa1,0x6e]
// CHECK: fsqrt v6.2d, v8.2d // encoding: [0x06,0xf9,0xe1,0x6e]
// CHECK: fsqrt v4.2s, v0.2s // encoding: [0x04,0xf8,0xa1,0x2e]
diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s
index a16432324efc..4638c535a6a7 100644
--- a/test/MC/AArch64/neon-simd-shift.s
+++ b/test/MC/AArch64/neon-simd-shift.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
@@ -400,16 +400,24 @@
//------------------------------------------------------------------------------
// Fixed-point convert to floating-point
//------------------------------------------------------------------------------
+ scvtf v0.4h, v1.4h, #3
+ scvtf v0.8h, v1.8h, #3
scvtf v0.2s, v1.2s, #3
scvtf v0.4s, v1.4s, #3
scvtf v0.2d, v1.2d, #3
+ ucvtf v0.4h, v1.4h, #3
+ ucvtf v0.8h, v1.8h, #3
ucvtf v0.2s, v1.2s, #3
ucvtf v0.4s, v1.4s, #3
ucvtf v0.2d, v1.2d, #3
+// CHECK: scvtf v0.4h, v1.4h, #3 // encoding: [0x20,0xe4,0x1d,0x0f]
+// CHECK: scvtf v0.8h, v1.8h, #3 // encoding: [0x20,0xe4,0x1d,0x4f]
// CHECK: scvtf v0.2s, v1.2s, #3 // encoding: [0x20,0xe4,0x3d,0x0f]
// CHECK: scvtf v0.4s, v1.4s, #3 // encoding: [0x20,0xe4,0x3d,0x4f]
// CHECK: scvtf v0.2d, v1.2d, #3 // encoding: [0x20,0xe4,0x7d,0x4f]
+// CHECK: ucvtf v0.4h, v1.4h, #3 // encoding: [0x20,0xe4,0x1d,0x2f]
+// CHECK: ucvtf v0.8h, v1.8h, #3 // encoding: [0x20,0xe4,0x1d,0x6f]
// CHECK: ucvtf v0.2s, v1.2s, #3 // encoding: [0x20,0xe4,0x3d,0x2f]
// CHECK: ucvtf v0.4s, v1.4s, #3 // encoding: [0x20,0xe4,0x3d,0x6f]
// CHECK: ucvtf v0.2d, v1.2d, #3 // encoding: [0x20,0xe4,0x7d,0x6f]
@@ -417,17 +425,25 @@
//------------------------------------------------------------------------------
// Floating-point convert to fixed-point
//------------------------------------------------------------------------------
+ fcvtzs v0.4h, v1.4h, #3
+ fcvtzs v0.8h, v1.8h, #3
fcvtzs v0.2s, v1.2s, #3
fcvtzs v0.4s, v1.4s, #3
fcvtzs v0.2d, v1.2d, #3
+ fcvtzu v0.4h, v1.4h, #3
+ fcvtzu v0.8h, v1.8h, #3
fcvtzu v0.2s, v1.2s, #3
fcvtzu v0.4s, v1.4s, #3
fcvtzu v0.2d, v1.2d, #3
+// CHECK: fcvtzs v0.4h, v1.4h, #3 // encoding: [0x20,0xfc,0x1d,0x0f]
+// CHECK: fcvtzs v0.8h, v1.8h, #3 // encoding: [0x20,0xfc,0x1d,0x4f]
// CHECK: fcvtzs v0.2s, v1.2s, #3 // encoding: [0x20,0xfc,0x3d,0x0f]
// CHECK: fcvtzs v0.4s, v1.4s, #3 // encoding: [0x20,0xfc,0x3d,0x4f]
// CHECK: fcvtzs v0.2d, v1.2d, #3 // encoding: [0x20,0xfc,0x7d,0x4f]
+// CHECK: fcvtzu v0.4h, v1.4h, #3 // encoding: [0x20,0xfc,0x1d,0x2f]
+// CHECK: fcvtzu v0.8h, v1.8h, #3 // encoding: [0x20,0xfc,0x1d,0x6f]
// CHECK: fcvtzu v0.2s, v1.2s, #3 // encoding: [0x20,0xfc,0x3d,0x2f]
// CHECK: fcvtzu v0.4s, v1.4s, #3 // encoding: [0x20,0xfc,0x3d,0x6f]
// CHECK: fcvtzu v0.2d, v1.2d, #3 // encoding: [0x20,0xfc,0x7d,0x6f]
diff --git a/test/MC/AArch64/noneon-diagnostics.s b/test/MC/AArch64/noneon-diagnostics.s
index 60a5fd208af9..309df3697df8 100644
--- a/test/MC/AArch64/noneon-diagnostics.s
+++ b/test/MC/AArch64/noneon-diagnostics.s
@@ -27,3 +27,18 @@
// CHECK-ERROR-NEXT: error: instruction requires: neon
// CHECK-ERROR-NEXT: fmls v9.2s, v9.2s, v0.2s
// CHECK-ERROR-NEXT: ^
+
+
+ fmls.4s v3, v12, v17
+ fmls.2d v1, v30, v20
+ fmls.2s v9, v9, v0
+
+// CHECK-ERROR: error: instruction requires: neon
+// CHECK-ERROR-NEXT: fmls.4s v3, v12, v17
+// CHECK-ERROR-NEXT: ^
+// CHECK-ERROR-NEXT: error: instruction requires: neon
+// CHECK-ERROR-NEXT: fmls.2d v1, v30, v20
+// CHECK-ERROR-NEXT: ^
+// CHECK-ERROR-NEXT: error: instruction requires: neon
+// CHECK-ERROR-NEXT: fmls.2s v9, v9, v0
+// CHECK-ERROR-NEXT: ^
diff --git a/test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s b/test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s
new file mode 100644
index 000000000000..aa0a1ab86460
--- /dev/null
+++ b/test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s
@@ -0,0 +1,7 @@
+// XFAIL: *
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI %s
+
+; When assembled, this emits a different encoding value than codegen for the intrinsic
+
+buffer_wbinvl1_vol
+// VI: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
diff --git a/test/MC/AMDGPU/flat-scratch.s b/test/MC/AMDGPU/flat-scratch.s
new file mode 100644
index 000000000000..0664c80378db
--- /dev/null
+++ b/test/MC/AMDGPU/flat-scratch.s
@@ -0,0 +1,33 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=SI -check-prefix=GCN %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck -check-prefix=CI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI %s
+
+// Add a different RUN line for the failing checks, because when stderr and stdout are mixed the
+// order things are printed is not deterministic.
+// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN %s
+
+s_mov_b64 flat_scratch, -1
+// SI: error: invalid operand for instruction
+// CI: s_mov_b64 flat_scratch, -1 ; encoding: [0xc1,0x04,0xe8,0xbe]
+// VI: s_mov_b64 flat_scratch, -1 ; encoding: [0xc1,0x01,0xe6,0xbe]
+
+s_mov_b32 flat_scratch_lo, -1
+// SI: error: invalid operand for instruction
+// CI: s_mov_b32 flat_scratch_lo, -1 ; encoding: [0xc1,0x03,0xe8,0xbe]
+// VI: s_mov_b32 flat_scratch_lo, -1 ; encoding: [0xc1,0x00,0xe6,0xbe]
+
+s_mov_b32 flat_scratch_hi, -1
+// SI: error: invalid operand for instruction
+// CI: s_mov_b32 flat_scratch_hi, -1 ; encoding: [0xc1,0x03,0xe9,0xbe]
+// VI: s_mov_b32 flat_scratch_hi, -1 ; encoding: [0xc1,0x00,0xe7,0xbe]
+
+
+s_mov_b64 flat_scratch_lo, -1
+// GCN: error: invalid operand for instruction
+
+s_mov_b64 flat_scratch_hi, -1
+// GCN: error: invalid operand for instruction
+
+s_mov_b32 flat_scratch, -1
+// GCN: error: invalid operand for instruction
diff --git a/test/MC/AMDGPU/flat.s b/test/MC/AMDGPU/flat.s
index adad29a5595b..20e6c042733c 100644
--- a/test/MC/AMDGPU/flat.s
+++ b/test/MC/AMDGPU/flat.s
@@ -1,143 +1,162 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=CIVI --check-prefix=CI
-// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=CIVI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=CIVI --check-prefix=VI
-// FIXME: These instructions give an 'invalid operand' error on SI and should
-// instead be reporting an 'instruction not supported' error.
+// FIXME: For missing instruction the error message is:
+// error: too few operands for instruction
+// It should be:
+// error: instruction not supported on this GPU
+//
-// XUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=NOVI
-// XUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI
-// XUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI
+// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI
//===----------------------------------------------------------------------===//
// Operands
//===----------------------------------------------------------------------===//
flat_load_dword v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x00,0x01]
flat_load_dword v1, v[3:4] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x51,0xdc,0x03,0x00,0x00,0x01]
flat_load_dword v1, v[3:4] glc slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x00,0x01]
flat_load_dword v1, v[3:4] glc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x51,0xdc,0x03,0x00,0x80,0x01]
flat_load_dword v1, v[3:4] glc slc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01]
flat_load_dword v1, v[3:4] glc tfe slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01]
flat_load_dword v1, v[3:4] slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] slc ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] slc ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dword v1, v[3:4] slc ; encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x00,0x01]
flat_load_dword v1, v[3:4] slc glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x00,0x01]
flat_load_dword v1, v[3:4] slc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x80,0x01]
flat_load_dword v1, v[3:4] slc glc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01]
flat_load_dword v1, v[3:4] slc tfe glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01]
flat_load_dword v1, v[3:4] tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] tfe ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] tfe ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] tfe ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x80,0x01]
flat_load_dword v1, v[3:4] tfe glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x51,0xdc,0x03,0x00,0x80,0x01]
flat_load_dword v1, v[3:4] tfe slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x80,0x01]
flat_load_dword v1, v[3:4] tfe glc slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01]
flat_load_dword v1, v[3:4] tfe slc glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01]
flat_store_dword v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00]
flat_store_dword v1, v[3:4] glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x00,0x00]
flat_store_dword v1, v[3:4] glc slc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00]
flat_store_dword v1, v[3:4] glc tfe
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x80,0x00]
flat_store_dword v1, v[3:4] glc slc tfe
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
flat_store_dword v1, v[3:4] glc tfe slc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
flat_store_dword v1, v[3:4] slc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] slc ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x00,0x00]
flat_store_dword v1, v[3:4] slc glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00]
flat_store_dword v1, v[3:4] slc tfe
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x80,0x00]
flat_store_dword v1, v[3:4] slc glc tfe
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
flat_store_dword v1, v[3:4] slc tfe glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
flat_store_dword v1, v[3:4] tfe
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] tfe ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x80,0x00]
flat_store_dword v1, v[3:4] tfe glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x80,0x00]
flat_store_dword v1, v[3:4] tfe slc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x80,0x00]
flat_store_dword v1, v[3:4] tfe glc slc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
flat_store_dword v1, v[3:4] tfe slc glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
// FIXME: For atomic instructions, glc must be placed immediately following
@@ -151,327 +170,406 @@ flat_store_dword v1, v[3:4] tfe slc glc
// flat_atomic_add v1, v[3:4], v5 tfe slc glc
flat_atomic_add v1 v[3:4], v5 glc slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_add v1 v[3:4], v5 glc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v1, v[3:4], v5 glc tfe ; encoding: [0x00,0x00,0xc9,0xdc,0x03,0x05,0x80,0x01]
+// NOSI: error:
+// CI: flat_atomic_add v1, v[3:4], v5 glc tfe ; encoding: [0x00,0x00,0xc9,0xdc,0x03,0x05,0x80,0x01]
+// VI: flat_atomic_add v1, v[3:4], v5 glc tfe ; encoding: [0x00,0x00,0x09,0xdd,0x03,0x05,0x80,0x01]
flat_atomic_add v1 v[3:4], v5 glc slc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x80,0x01]
+// NOSI: error:
+// CI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x80,0x01]
+// VI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x80,0x01]
flat_atomic_add v1 v[3:4], v5 glc tfe slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x80,0x01]
+// NOSI: error:
+// CI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x80,0x01]
+// VI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x80,0x01]
flat_atomic_add v[3:4], v5 slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0xca,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0xca,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0x0a,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_add v[3:4], v5 slc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v[3:4], v5 slc tfe ; encoding: [0x00,0x00,0xca,0xdc,0x03,0x05,0x80,0x00]
+// NOSI: error:
+// CI: flat_atomic_add v[3:4], v5 slc tfe ; encoding: [0x00,0x00,0xca,0xdc,0x03,0x05,0x80,0x00]
+// VI: flat_atomic_add v[3:4], v5 slc tfe ; encoding: [0x00,0x00,0x0a,0xdd,0x03,0x05,0x80,0x00]
flat_atomic_add v[3:4], v5 tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v[3:4], v5 tfe ; encoding: [0x00,0x00,0xc8,0xdc,0x03,0x05,0x80,0x00]
+// NOSI: error:
+// CI: flat_atomic_add v[3:4], v5 tfe ; encoding: [0x00,0x00,0xc8,0xdc,0x03,0x05,0x80,0x00]
+// VI: flat_atomic_add v[3:4], v5 tfe ; encoding: [0x00,0x00,0x08,0xdd,0x03,0x05,0x80,0x00]
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
flat_load_ubyte v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_ubyte v1, v[3:4] ; encoding: [0x00,0x00,0x20,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_ubyte v1, v[3:4] ; encoding: [0x00,0x00,0x20,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_ubyte v1, v[3:4] ; encoding: [0x00,0x00,0x40,0xdc,0x03,0x00,0x00,0x01]
flat_load_sbyte v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_sbyte v1, v[3:4] ; encoding: [0x00,0x00,0x24,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_sbyte v1, v[3:4] ; encoding: [0x00,0x00,0x24,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_sbyte v1, v[3:4] ; encoding: [0x00,0x00,0x44,0xdc,0x03,0x00,0x00,0x01]
flat_load_ushort v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_ushort v1, v[3:4] ; encoding: [0x00,0x00,0x28,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_ushort v1, v[3:4] ; encoding: [0x00,0x00,0x28,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_ushort v1, v[3:4] ; encoding: [0x00,0x00,0x48,0xdc,0x03,0x00,0x00,0x01]
flat_load_sshort v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_sshort v1, v[3:4] ; encoding: [0x00,0x00,0x2c,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_sshort v1, v[3:4] ; encoding: [0x00,0x00,0x2c,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_sshort v1, v[3:4] ; encoding: [0x00,0x00,0x4c,0xdc,0x03,0x00,0x00,0x01]
flat_load_dword v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x00,0x01]
flat_load_dwordx2 v[1:2], v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x34,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x34,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x54,0xdc,0x03,0x00,0x00,0x01]
flat_load_dwordx4 v[5:8], v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x38,0xdc,0x03,0x00,0x00,0x05]
+// NOSI: error:
+// CI: flat_load_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x38,0xdc,0x03,0x00,0x00,0x05]
+// VI: flat_load_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x5c,0xdc,0x03,0x00,0x00,0x05]
flat_load_dwordx3 v[5:7], v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x3c,0xdc,0x03,0x00,0x00,0x05]
+// NOSI: error:
+// CI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x3c,0xdc,0x03,0x00,0x00,0x05]
+// VI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x58,0xdc,0x03,0x00,0x00,0x05]
flat_store_byte v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_byte v1, v[3:4] ; encoding: [0x00,0x00,0x60,0xdc,0x03,0x01,0x00,0x00]
flat_store_short v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_short v1, v[3:4] ; encoding: [0x00,0x00,0x68,0xdc,0x03,0x01,0x00,0x00]
flat_store_dword v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dword v1, v[3:4] ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00]
flat_store_dwordx2 v[1:2], v[3:4]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CIVI: flat_store_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x74,0xdc,0x03,0x01,0x00,0x00]
flat_store_dwordx4 v[5:8], v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_store_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_store_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_store_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00]
flat_store_dwordx3 v[5:7], v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_store_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_store_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_store_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00]
flat_atomic_swap v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_swap v[3:4], v5 ; encoding: [0x00,0x00,0xc0,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_swap v[3:4], v5 ; encoding: [0x00,0x00,0xc0,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_swap v[3:4], v5 ; encoding: [0x00,0x00,0x00,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_swap v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_swap v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xc1,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_swap v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xc1,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_swap v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x01,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_cmpswap v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_cmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0xc4,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_cmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0xc4,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_cmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0x04,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_cmpswap v1, v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_cmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xc5,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_cmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xc5,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_cmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x05,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_add v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v[3:4], v5 ; encoding: [0x00,0x00,0xc8,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_add v[3:4], v5 ; encoding: [0x00,0x00,0xc8,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_add v[3:4], v5 ; encoding: [0x00,0x00,0x08,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_add v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xc9,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_add v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xc9,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_add v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x09,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_sub v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_sub v[3:4], v5 ; encoding: [0x00,0x00,0xcc,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_sub v[3:4], v5 ; encoding: [0x00,0x00,0xcc,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_sub v[3:4], v5 ; encoding: [0x00,0x00,0x0c,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_sub v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_sub v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xcd,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_sub v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xcd,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_sub v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x0d,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_smin v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smin v[3:4], v5 ; encoding: [0x00,0x00,0xd4,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_smin v[3:4], v5 ; encoding: [0x00,0x00,0xd4,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_smin v[3:4], v5 ; encoding: [0x00,0x00,0x10,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_smin v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xd5,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_smin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xd5,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_smin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x11,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_umin v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umin v[3:4], v5 ; encoding: [0x00,0x00,0xd8,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_umin v[3:4], v5 ; encoding: [0x00,0x00,0xd8,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_umin v[3:4], v5 ; encoding: [0x00,0x00,0x14,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_umin v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xd9,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_umin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xd9,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_umin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x15,0xdd,0x03,0x05,0x00,0x01]
-flat_atomic_smax v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smax v[3:4], v5 ; encoding: [0x00,0x00,0xdc,0xdc,0x03,0x05,0x00,0x00]
+flat_atomic_smax v[3:4], v5,
+// NOSI: error:
+// CI: flat_atomic_smax v[3:4], v5 ; encoding: [0x00,0x00,0xdc,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_smax v[3:4], v5 ; encoding: [0x00,0x00,0x18,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_smax v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xdd,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_smax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xdd,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_smax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x19,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_umax v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umax v[3:4], v5 ; encoding: [0x00,0x00,0xe0,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_umax v[3:4], v5 ; encoding: [0x00,0x00,0xe0,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_umax v[3:4], v5 ; encoding: [0x00,0x00,0x1c,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_umax v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe1,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_umax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe1,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_umax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x1d,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_and v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_and v[3:4], v5 ; encoding: [0x00,0x00,0xe4,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_and v[3:4], v5 ; encoding: [0x00,0x00,0xe4,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_and v[3:4], v5 ; encoding: [0x00,0x00,0x20,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_and v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_and v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe5,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_and v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe5,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_and v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x21,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_or v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_or v[3:4], v5 ; encoding: [0x00,0x00,0xe8,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_or v[3:4], v5 ; encoding: [0x00,0x00,0xe8,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_or v[3:4], v5 ; encoding: [0x00,0x00,0x24,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_or v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_or v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe9,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_or v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe9,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_or v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x25,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_xor v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_xor v[3:4], v5 ; encoding: [0x00,0x00,0xec,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_xor v[3:4], v5 ; encoding: [0x00,0x00,0xec,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_xor v[3:4], v5 ; encoding: [0x00,0x00,0x28,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_xor v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_xor v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xed,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_xor v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xed,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_xor v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x29,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_inc v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_inc v[3:4], v5 ; encoding: [0x00,0x00,0xf0,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_inc v[3:4], v5 ; encoding: [0x00,0x00,0xf0,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_inc v[3:4], v5 ; encoding: [0x00,0x00,0x2c,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_inc v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_inc v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xf1,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_inc v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xf1,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_inc v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x2d,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_dec v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_dec v[3:4], v5 ; encoding: [0x00,0x00,0xf4,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_dec v[3:4], v5 ; encoding: [0x00,0x00,0xf4,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_dec v[3:4], v5 ; encoding: [0x00,0x00,0x30,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_dec v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_dec v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xf5,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_dec v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xf5,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_dec v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x31,0xdd,0x03,0x05,0x00,0x01]
+
+flat_atomic_fcmpswap v[3:4], v[5:6]
+// NOSI: error:
+// CI: flat_atomic_fcmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0xf8,0xdc,0x03,0x05,0x00,0x00]
+// NOVI: error:
+
+flat_atomic_fcmpswap v1, v[3:4], v[5:6] glc
+// NOSI: error:
+// CI: flat_atomic_fcmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xf9,0xdc,0x03,0x05,0x00,0x01]
+// NOVI: error:
flat_atomic_swap_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_swap_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x40,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_swap_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x40,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_swap_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x80,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x41,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x41,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x81,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_cmpswap_x2 v[3:4], v[5:8]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_cmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x44,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_cmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x44,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_cmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x84,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x45,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x45,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x85,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_add_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x48,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_add_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x48,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_add_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x88,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x49,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x49,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x89,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_sub_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_sub_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x4c,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_sub_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x4c,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_sub_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x8c,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x4d,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x4d,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x8d,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_smin_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x54,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_smin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x54,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_smin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x90,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x55,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x55,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x91,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_umin_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x58,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_umin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x58,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_umin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x94,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x59,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x59,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x95,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_smax_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x5c,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_smax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x5c,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_smax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x98,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x5d,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x5d,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x99,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_umax_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x60,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_umax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x60,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_umax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x9c,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x61,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x61,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x9d,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_and_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_and_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x64,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_and_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x64,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_and_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xa0,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x65,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x65,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xa1,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_or_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_or_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x68,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_or_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x68,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_or_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xa4,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x69,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x69,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xa5,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_xor_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_xor_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x6c,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_xor_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x6c,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_xor_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xa8,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x6d,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x6d,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xa9,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_inc_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_inc_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x70,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_inc_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x70,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_inc_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xac,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x71,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x71,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xad,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_dec_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_dec_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x74,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_dec_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x74,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_dec_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xb0,0xdd,0x03,0x05,0x00,0x00]
flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x75,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x75,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xb1,0xdd,0x03,0x05,0x00,0x01]
flat_atomic_fcmpswap_x2 v[3:4], v[5:8]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CI: flat_atomic_fcmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x78,0xdd,0x03,0x05,0x00,0x00]
-// NOVI: error: instruction not supported on this GPU
+// NOVI: error:
flat_atomic_fcmpswap_x2 v[1:2], v[3:4], v[5:8] glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CI: flat_atomic_fcmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x79,0xdd,0x03,0x05,0x00,0x01]
-// NOVI: error: instruction not supported on this GPU
+// NOVI: error:
flat_atomic_fmin_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CI: flat_atomic_fmin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x7c,0xdd,0x03,0x05,0x00,0x00]
-// NOVI: error: instruction not supported on this GPU
+// NOVI: error:
flat_atomic_fmin_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CI: flat_atomic_fmin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x7d,0xdd,0x03,0x05,0x00,0x01]
-// NOVI: error: instruction not supported on this GPU
+// NOVI: error:
flat_atomic_fmax_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CI: flat_atomic_fmax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x80,0xdd,0x03,0x05,0x00,0x00]
-// NOVI: error: instruction not supported on this GPU
+// NOVI: error:
flat_atomic_fmax_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
// CI: flat_atomic_fmax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x81,0xdd,0x03,0x05,0x00,0x01]
-// NOVI: error: instruction not supported on this GPU
+// NOVI: error:
diff --git a/test/MC/AMDGPU/hsa-text.s b/test/MC/AMDGPU/hsa-text.s
new file mode 100644
index 000000000000..1d2f1f1619e1
--- /dev/null
+++ b/test/MC/AMDGPU/hsa-text.s
@@ -0,0 +1,34 @@
+// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM
+// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | llvm-readobj -s -sd | FileCheck %s --check-prefix=ELF
+
+// For compatibility reasons we treat convert .text sections to .hsatext
+
+// ELF: Section {
+
+// We want to avoid emitting an empty .text section.
+// ELF-NOT: Name: .text
+
+// ELF: Name: .hsatext
+// ELF: Type: SHT_PROGBITS (0x1)
+// ELF: Flags [ (0xC00007)
+// ELF: SHF_ALLOC (0x2)
+// ELF: SHF_AMDGPU_HSA_AGENT (0x800000)
+// ELF: SHF_AMDGPU_HSA_CODE (0x400000)
+// ELF: SHF_EXECINSTR (0x4)
+// ELF: SHF_WRITE (0x1)
+// ELF: Size: 260
+// ELF: }
+
+.hsa_code_object_version 1,0
+// ASM: .hsa_code_object_version 1,0
+
+.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
+// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
+
+.text
+// ASM: .hsatext
+
+.amd_kernel_code_t
+.end_amd_kernel_code_t
+
+s_endpgm
diff --git a/test/MC/AMDGPU/hsa.s b/test/MC/AMDGPU/hsa.s
index 7dfea0fe787e..bfdcfb432923 100644
--- a/test/MC/AMDGPU/hsa.s
+++ b/test/MC/AMDGPU/hsa.s
@@ -1,5 +1,15 @@
// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM
-// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | llvm-readobj -s -sd | FileCheck %s --check-prefix=ELF
+// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF
+
+// ELF: Section {
+// ELF: Name: .hsatext
+// ELF: Type: SHT_PROGBITS (0x1)
+// ELF: Flags [ (0xC00007)
+// ELF: SHF_ALLOC (0x2)
+// ELF: SHF_AMDGPU_HSA_AGENT (0x800000)
+// ELF: SHF_AMDGPU_HSA_CODE (0x400000)
+// ELF: SHF_EXECINSTR (0x4)
+// ELF: SHF_WRITE (0x1)
// ELF: SHT_NOTE
// ELF: 0000: 04000000 08000000 01000000 414D4400
@@ -8,13 +18,30 @@
// ELF: 0030: 00000000 00000000 414D4400 414D4447
// ELF: 0040: 50550000
+// ELF: Symbol {
+// ELF: Name: amd_kernel_code_t_minimal
+// ELF: Type: AMDGPU_HSA_KERNEL (0xA)
+// ELF: Section: .hsatext
+// ELF: }
+// ELF: Symbol {
+// ELF: Name: amd_kernel_code_t_test_all
+// ELF: Type: AMDGPU_HSA_KERNEL (0xA)
+// ELF: Section: .hsatext
+// ELF: }
+
+
.hsa_code_object_version 1,0
// ASM: .hsa_code_object_version 1,0
.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
-.text
+.amdgpu_hsa_kernel amd_kernel_code_t_test_all
+.amdgpu_hsa_kernel amd_kernel_code_t_minimal
+
+.hsatext
+// ASM: .hsatext
+
amd_kernel_code_t_test_all:
; Test all amd_kernel_code_t members with non-default values.
.amd_kernel_code_t
diff --git a/test/MC/AMDGPU/mubuf.s b/test/MC/AMDGPU/mubuf.s
index 78d365abef13..18cca7022699 100644
--- a/test/MC/AMDGPU/mubuf.s
+++ b/test/MC/AMDGPU/mubuf.s
@@ -1,5 +1,9 @@
-// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
-// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s
+
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOCI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI %s
//===----------------------------------------------------------------------===//
// Test for different operand combinations
@@ -10,343 +14,359 @@
//===----------------------------------------------------------------------===//
buffer_load_dword v1, s[4:7], s1
-// CHECK: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
buffer_load_dword v1, s[4:7], s1 offset:4
-// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
buffer_load_dword v1, s[4:7], s1 offset:4 glc
-// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0x01,0x01]
buffer_load_dword v1, s[4:7], s1 offset:4 slc
-// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x41,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x41,0x01]
buffer_load_dword v1, s[4:7], s1 offset:4 tfe
-// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x81,0x01]
buffer_load_dword v1, s[4:7], s1 tfe glc
-// CHECK: buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x30,0xe0,0x00,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x30,0xe0,0x00,0x01,0x81,0x01]
buffer_load_dword v1, s[4:7], s1 offset:4 glc tfe slc
-// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
buffer_load_dword v1, s[4:7], s1 glc tfe slc offset:4
-// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
//===----------------------------------------------------------------------===//
// load - vgpr offset
//===----------------------------------------------------------------------===//
buffer_load_dword v1, v2, s[4:7], s1 offen
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
buffer_load_dword v1, v2, s[4:7], s1 offen offset:4
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0x01,0x01]
buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x41,0x01]
buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x81,0x01]
buffer_load_dword v1, v2, s[4:7], s1 offen tfe glc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x30,0xe0,0x02,0x01,0x81,0x01]
buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc tfe slc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe slc offset:4
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
//===----------------------------------------------------------------------===//
// load - vgpr index
//===----------------------------------------------------------------------===//
buffer_load_dword v1, v2, s[4:7], s1 idxen
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0x01,0x01]
buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x41,0x01]
buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x81,0x01]
buffer_load_dword v1, v2, s[4:7], s1 idxen tfe glc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x30,0xe0,0x02,0x01,0x81,0x01]
buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc tfe slc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe slc offset:4
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
//===----------------------------------------------------------------------===//
// load - vgpr index and offset
//===----------------------------------------------------------------------===//
buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0x01,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x41,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x81,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen tfe glc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x30,0xe0,0x02,0x01,0x81,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc tfe slc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe slc offset:4
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
//===----------------------------------------------------------------------===//
// load - addr64
//===----------------------------------------------------------------------===//
buffer_load_dword v1, v[2:3], s[4:7], s1 addr64
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0x01,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x41,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x81,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 tfe glc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x30,0xe0,0x02,0x01,0x81,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc tfe slc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe slc offset:4
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
//===----------------------------------------------------------------------===//
// store - immediate offset only
//===----------------------------------------------------------------------===//
buffer_store_dword v1, s[4:7], s1
-// CHECK: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
buffer_store_dword v1, s[4:7], s1 offset:4
-// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
buffer_store_dword v1, s[4:7], s1 offset:4 glc
-// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
buffer_store_dword v1, s[4:7], s1 offset:4 slc
-// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x41,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x41,0x01]
buffer_store_dword v1, s[4:7], s1 offset:4 tfe
-// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
buffer_store_dword v1, s[4:7], s1 tfe glc
-// CHECK: buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
buffer_store_dword v1, s[4:7], s1 offset:4 glc tfe slc
-// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
buffer_store_dword v1, s[4:7], s1 glc tfe slc offset:4
-// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
//===----------------------------------------------------------------------===//
// store - vgpr offset
//===----------------------------------------------------------------------===//
buffer_store_dword v1, v2, s[4:7], s1 offen
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
buffer_store_dword v1, v2, s[4:7], s1 offen offset:4
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0x01,0x01]
buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x41,0x01]
buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x81,0x01]
buffer_store_dword v1, v2, s[4:7], s1 offen tfe glc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x70,0xe0,0x02,0x01,0x81,0x01]
buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc tfe slc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe slc offset:4
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
//===----------------------------------------------------------------------===//
// store - vgpr index
//===----------------------------------------------------------------------===//
buffer_store_dword v1, v2, s[4:7], s1 idxen
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0x01,0x01]
buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x41,0x01]
buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x81,0x01]
buffer_store_dword v1, v2, s[4:7], s1 idxen tfe glc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x70,0xe0,0x02,0x01,0x81,0x01]
buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc tfe slc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe slc offset:4
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
//===----------------------------------------------------------------------===//
// store - vgpr index and offset
//===----------------------------------------------------------------------===//
buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0x01,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x41,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x81,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen tfe glc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x70,0xe0,0x02,0x01,0x81,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc tfe slc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe slc offset:4
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
//===----------------------------------------------------------------------===//
// store - addr64
//===----------------------------------------------------------------------===//
buffer_store_dword v1, v[2:3], s[4:7], s1 addr64
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0x01,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x41,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x81,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 tfe glc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x70,0xe0,0x02,0x01,0x81,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc tfe slc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe slc offset:4
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
buffer_load_format_x v1, s[4:7], s1
-// CHECK: buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
buffer_load_format_xy v[1:2], s[4:7], s1
-// CHECK: buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
buffer_load_format_xyz v[1:3], s[4:7], s1
-// CHECK: buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
buffer_load_format_xyzw v[1:4], s[4:7], s1
-// CHECK: buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
buffer_store_format_x v1, s[4:7], s1
-// CHECK: buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
buffer_store_format_xy v[1:2], s[4:7], s1
-// CHECK: buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
buffer_store_format_xyz v[1:3], s[4:7], s1
-// CHECK: buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
buffer_store_format_xyzw v[1:4], s[4:7], s1
-// CHECK: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
buffer_load_ubyte v1, s[4:7], s1
-// CHECK: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
buffer_load_sbyte v1, s[4:7], s1
-// CHECK: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
buffer_load_ushort v1, s[4:7], s1
-// CHECK: buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01]
buffer_load_sshort v1, s[4:7], s1
-// CHECK: buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01]
buffer_load_dword v1, s[4:7], s1
-// CHECK: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
buffer_load_dwordx2 v[1:2], s[4:7], s1
-// CHECK: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
buffer_load_dwordx4 v[1:4], s[4:7], s1
-// CHECK: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
buffer_store_byte v1, s[4:7], s1
-// CHECK: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
buffer_store_short v1, s[4:7], s1
-// CHECK: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
buffer_store_dword v1 s[4:7], s1
-// CHECK: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
buffer_store_dwordx2 v[1:2], s[4:7], s1
-// CHECK: buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
buffer_store_dwordx4 v[1:4], s[4:7], s1
-// CHECK: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
+
+//===----------------------------------------------------------------------===//
+// Cache invalidation
+//===----------------------------------------------------------------------===//
+
+buffer_wbinvl1
+// SICI: buffer_wbinvl1 ; encoding: [0x00,0x00,0xc4,0xe1,0x00,0x00,0x00,0x00]
+
+buffer_wbinvl1_sc
+// SI: buffer_wbinvl1_sc ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00]
+// NOCI: error: instruction not supported on this GPU
+// NOVI: error: instruction not supported on this GPU
+
+buffer_wbinvl1_vol
+// CI: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00]
+// NOSI: error: instruction not supported on this GPU
// TODO: Atomics
diff --git a/test/MC/AMDGPU/out-of-range-registers.s b/test/MC/AMDGPU/out-of-range-registers.s
new file mode 100644
index 000000000000..947c64d3e642
--- /dev/null
+++ b/test/MC/AMDGPU/out-of-range-registers.s
@@ -0,0 +1,62 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s
+
+s_add_i32 s104, s0, s1
+// CHECK: error: invalid operand for instruction
+
+s_add_i32 s105, s0, s1
+// CHECK: error: invalid operand for instruction
+
+v_add_i32 v256, v0, v1
+// CHECK: error: invalid operand for instruction
+
+v_add_i32 v257, v0, v1
+// CHECK: error: invalid operand for instruction
+
+s_mov_b64 s[0:17], -1
+// CHECK: error: invalid operand for instruction
+
+s_mov_b64 s[103:104], -1
+// CHECK: error: invalid operand for instruction
+
+s_mov_b64 s[104:105], -1
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx4 s[102:105], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx4 s[104:108], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx4 s[108:112], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx4 s[1:4], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx4 s[1:4], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx8 s[104:111], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx8 s[100:107], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx8 s[108:115], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx16 s[92:107], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx16 s[96:111], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx16 s[100:115], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx16 s[104:119], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx16 s[108:123], s[2:3], s4
+// CHECK: error: invalid operand for instruction
diff --git a/test/MC/AMDGPU/smem.s b/test/MC/AMDGPU/smem.s
new file mode 100644
index 000000000000..8fa964ca8d1e
--- /dev/null
+++ b/test/MC/AMDGPU/smem.s
@@ -0,0 +1,11 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSI %s
+
+s_dcache_wb
+; VI: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00]
+; NOSI: error: instruction not supported on this GPU
+
+s_dcache_wb_vol
+; VI: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00]
+; NOSI: error: instruction not supported on this GPU
diff --git a/test/MC/AMDGPU/smrd-err.s b/test/MC/AMDGPU/smrd-err.s
new file mode 100644
index 000000000000..a607e91756da
--- /dev/null
+++ b/test/MC/AMDGPU/smrd-err.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+s_load_dwordx4 s[100:103], s[2:3], s4
+// VI: error: invalid operand for instruction
+// SI: s_load_dwordx4 s[100:103], s[2:3], s4
+
+
+s_load_dwordx8 s[96:103], s[2:3], s4
+// VI: error: invalid operand for instruction
+// SI: s_load_dwordx8 s[96:103], s[2:3], s4
+
+s_load_dwordx16 s[88:103], s[2:3], s4
+// VI: error: invalid operand for instruction
+// SI: s_load_dwordx16 s[88:103], s[2:3], s4
diff --git a/test/MC/AMDGPU/smrd.s b/test/MC/AMDGPU/smrd.s
index b67abf7e6890..56841914c6f0 100644
--- a/test/MC/AMDGPU/smrd.s
+++ b/test/MC/AMDGPU/smrd.s
@@ -1,32 +1,69 @@
-// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
-// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SI %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=CI %s
+
+// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s --check-prefix=NOSI
+//===----------------------------------------------------------------------===//
+// Offset Handling
+//===----------------------------------------------------------------------===//
+
+s_load_dword s1, s[2:3], 0xfc
+// GCN: s_load_dword s1, s[2:3], 0xfc ; encoding: [0xfc,0x83,0x00,0xc0]
+
+s_load_dword s1, s[2:3], 0xff
+// GCN: s_load_dword s1, s[2:3], 0xff ; encoding: [0xff,0x83,0x00,0xc0]
+
+s_load_dword s1, s[2:3], 0x100
+// NOSI: error: instruction not supported on this GPU
+// CI: s_load_dword s1, s[2:3], 0x100 ; encoding: [0xff,0x82,0x00,0xc0,0x00,0x01,0x00,0x00]
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
s_load_dword s1, s[2:3], 1
-// CHECK: s_load_dword s1, s[2:3], 0x1 ; encoding: [0x01,0x83,0x00,0xc0]
+// GCN: s_load_dword s1, s[2:3], 0x1 ; encoding: [0x01,0x83,0x00,0xc0]
s_load_dword s1, s[2:3], s4
-// CHECK: s_load_dword s1, s[2:3], s4 ; encoding: [0x04,0x82,0x00,0xc0]
+// GCN: s_load_dword s1, s[2:3], s4 ; encoding: [0x04,0x82,0x00,0xc0]
s_load_dwordx2 s[2:3], s[2:3], 1
-// CHECK: s_load_dwordx2 s[2:3], s[2:3], 0x1 ; encoding: [0x01,0x03,0x41,0xc0]
+// GCN: s_load_dwordx2 s[2:3], s[2:3], 0x1 ; encoding: [0x01,0x03,0x41,0xc0]
s_load_dwordx2 s[2:3], s[2:3], s4
-// CHECK: s_load_dwordx2 s[2:3], s[2:3], s4 ; encoding: [0x04,0x02,0x41,0xc0]
+// GCN: s_load_dwordx2 s[2:3], s[2:3], s4 ; encoding: [0x04,0x02,0x41,0xc0]
s_load_dwordx4 s[4:7], s[2:3], 1
-// CHECK: s_load_dwordx4 s[4:7], s[2:3], 0x1 ; encoding: [0x01,0x03,0x82,0xc0]
+// GCN: s_load_dwordx4 s[4:7], s[2:3], 0x1 ; encoding: [0x01,0x03,0x82,0xc0]
s_load_dwordx4 s[4:7], s[2:3], s4
-// CHECK: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x04,0x02,0x82,0xc0]
+// GCN: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x04,0x02,0x82,0xc0]
+
+s_load_dwordx4 s[100:103], s[2:3], s4
+// GCN: s_load_dwordx4 s[100:103], s[2:3], s4 ; encoding: [0x04,0x02,0xb2,0xc0]
s_load_dwordx8 s[8:15], s[2:3], 1
-// CHECK: s_load_dwordx8 s[8:15], s[2:3], 0x1 ; encoding: [0x01,0x03,0xc4,0xc0]
+// GCN: s_load_dwordx8 s[8:15], s[2:3], 0x1 ; encoding: [0x01,0x03,0xc4,0xc0]
s_load_dwordx8 s[8:15], s[2:3], s4
-// CHECK: s_load_dwordx8 s[8:15], s[2:3], s4 ; encoding: [0x04,0x02,0xc4,0xc0]
+// GCN: s_load_dwordx8 s[8:15], s[2:3], s4 ; encoding: [0x04,0x02,0xc4,0xc0]
+
+s_load_dwordx8 s[96:103], s[2:3], s4
+// GCN: s_load_dwordx8 s[96:103], s[2:3], s4 ; encoding: [0x04,0x02,0xf0,0xc0]
s_load_dwordx16 s[16:31], s[2:3], 1
-// CHECK: s_load_dwordx16 s[16:31], s[2:3], 0x1 ; encoding: [0x01,0x03,0x08,0xc1]
+// GCN: s_load_dwordx16 s[16:31], s[2:3], 0x1 ; encoding: [0x01,0x03,0x08,0xc1]
s_load_dwordx16 s[16:31], s[2:3], s4
-// CHECK: s_load_dwordx16 s[16:31], s[2:3], s4 ; encoding: [0x04,0x02,0x08,0xc1]
+// GCN: s_load_dwordx16 s[16:31], s[2:3], s4 ; encoding: [0x04,0x02,0x08,0xc1]
+
+s_load_dwordx16 s[88:103], s[2:3], s4
+// GCN: s_load_dwordx16 s[88:103], s[2:3], s4 ; encoding: [0x04,0x02,0x2c,0xc1]
+
+s_dcache_inv
+// GCN: s_dcache_inv ; encoding: [0x00,0x00,0xc0,0xc7]
+
+s_dcache_inv_vol
+// CI: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7]
+// NOSI: error: instruction not supported on this GPU
diff --git a/test/MC/AMDGPU/sop1-err.s b/test/MC/AMDGPU/sop1-err.s
index f892356b623d..ee1d383c4469 100644
--- a/test/MC/AMDGPU/sop1-err.s
+++ b/test/MC/AMDGPU/sop1-err.s
@@ -1,37 +1,61 @@
-// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s
s_mov_b32 v1, s2
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
s_mov_b32 s1, v0
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
s_mov_b32 s[1:2], s0
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
s_mov_b32 s0, s[1:2]
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
s_mov_b32 s220, s0
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
s_mov_b32 s0, s220
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
s_mov_b64 s1, s[0:1]
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
s_mov_b64 s[0:1], s1
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
// Immediate greater than 32-bits
s_mov_b32 s1, 0xfffffffff
-// CHECK: error: invalid immediate: only 32-bit values are legal
+// GCN: error: invalid immediate: only 32-bit values are legal
// Immediate greater than 32-bits
s_mov_b64 s[0:1], 0xfffffffff
-// CHECK: error: invalid immediate: only 32-bit values are legal
+// GCN: error: invalid immediate: only 32-bit values are legal
-// Out of range register
+s_mov_b64 s[0:1], 0xfffffffff
+// GCN: error: invalid immediate: only 32-bit values are legal
+
+s_mov_b64 s[0:1], 0xfffffffff
+// GCN: error: invalid immediate: only 32-bit values are legal
+
+s_mov_b64 s[0:1], 0x0000000200000000
+// GCN: error: invalid immediate: only 32-bit values are legal
+
+// FIXME: This shoudl probably say failed to parse.
s_mov_b32 s
+// GCN: error: invalid operand for instruction
+// Out of range register
+
+s_mov_b32 s102, 1
+// VI: error: invalid operand for instruction
+// SI-NOT: error
+
+s_mov_b32 s103, 1
+// VI: error: invalid operand for instruction
+// SI-NOT: error
+
+s_mov_b64 s[102:103], -1
+// VI: error: invalid operand for instruction
+// SI-NOT: error
diff --git a/test/MC/AMDGPU/sop1.s b/test/MC/AMDGPU/sop1.s
index 92ca73f25004..5f63f9930747 100644
--- a/test/MC/AMDGPU/sop1.s
+++ b/test/MC/AMDGPU/sop1.s
@@ -10,12 +10,29 @@ s_mov_b32 s1, 1
s_mov_b32 s1, 100
// CHECK: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x03,0x81,0xbe,0x64,0x00,0x00,0x00]
+// Literal constant sign bit
+s_mov_b32 s1, 0x80000000
+// CHECK: s_mov_b32 s1, 0x80000000 ; encoding: [0xff,0x03,0x81,0xbe,0x00,0x00,0x00,0x80]
+
+// Negative 32-bit constant
+s_mov_b32 s0, 0xfe5163ab
+// CHECK: s_mov_b32 s0, 0xfe5163ab ; encoding: [0xff,0x03,0x80,0xbe,0xab,0x63,0x51,0xfe]
+
s_mov_b64 s[2:3], s[4:5]
// CHECK: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe]
s_mov_b64 s[2:3], 0xffffffffffffffff
// CHECK: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x04,0x82,0xbe]
+s_mov_b64 s[2:3], 0xffffffff
+// CHECK: s_mov_b64 s[2:3], 0xffffffff ; encoding: [0xff,0x04,0x82,0xbe,0xff,0xff,0xff,0xff]
+
+s_mov_b64 s[0:1], 0x80000000
+// CHECK: s_mov_b64 s[0:1], 0x80000000 ; encoding: [0xff,0x04,0x80,0xbe,0x00,0x00,0x00,0x80]
+
+s_mov_b64 s[102:103], -1
+// CHECK: s_mov_b64 s[102:103], -1 ; encoding: [0xc1,0x04,0xe6,0xbe]
+
s_cmov_b32 s1, 200
// CHECK: s_cmov_b32 s1, 0xc8 ; encoding: [0xff,0x05,0x81,0xbe,0xc8,0x00,0x00,0x00]
diff --git a/test/MC/AMDGPU/sop2.s b/test/MC/AMDGPU/sop2.s
index 9a7a1c01064b..1fdc47aa616c 100644
--- a/test/MC/AMDGPU/sop2.s
+++ b/test/MC/AMDGPU/sop2.s
@@ -129,3 +129,6 @@ s_cbranch_g_fork s[4:5], s[6:7]
// CHECK: s_absdiff_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x96]
s_absdiff_i32 s2, s4, s6
+
+// CHECK: s_add_u32 s101, s102, s103 ; encoding: [0x66,0x67,0x65,0x80]
+s_add_u32 s101, s102, s103
diff --git a/test/MC/AMDGPU/vop1.s b/test/MC/AMDGPU/vop1.s
index d0b00fcd1897..22a4f91afefa 100644
--- a/test/MC/AMDGPU/vop1.s
+++ b/test/MC/AMDGPU/vop1.s
@@ -8,6 +8,25 @@
// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI
// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI
+// Force 32-bit encoding
+
+// GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e]
+v_mov_b32_e32 v1, v2
+
+// Force 32-bit encoding for special instructions
+// FIXME: We should be printing _e32 suffixes for these:
+
+// GCN: v_nop ; encoding: [0x00,0x00,0x00,0x7e]
+v_nop_e32
+
+// SICI: v_clrexcp ; encoding: [0x00,0x82,0x00,0x7e]
+// VI: v_clrexcp ; encoding: [0x00,0x6a,0x00,0x7e]
+v_clrexcp_e32
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
// GCN: v_nop ; encoding: [0x00,0x00,0x00,0x7e]
v_nop
diff --git a/test/MC/AMDGPU/vop2-err.s b/test/MC/AMDGPU/vop2-err.s
index a1131000a909..8d282f9bf7ed 100644
--- a/test/MC/AMDGPU/vop2-err.s
+++ b/test/MC/AMDGPU/vop2-err.s
@@ -32,4 +32,31 @@ v_mul_i32_i24_e64 v1, 100, v3
v_mul_i32_i24_e64 v1, v2, 100
// CHECK: error: invalid operand for instruction
+v_add_i32_e32 v1, s[0:1], v2, v3
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, vcc, v2, v3, s[2:3]
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, s[0:1], v2, v3, s[2:3]
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, vcc, v2, v3, -1
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, vcc, v2, v3, 123
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, vcc, v2, v3, s0
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, -1, v2, v3, s0
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e64 v1, s[0:1], v2, v3, 123
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32 v1, s[0:1], v2, v3, 123
+// CHECK: error: invalid operand for instruction
+
// TODO: Constant bus restrictions
diff --git a/test/MC/AMDGPU/vop2.s b/test/MC/AMDGPU/vop2.s
index a1f3b8d89365..2b8249152b7b 100644
--- a/test/MC/AMDGPU/vop2.s
+++ b/test/MC/AMDGPU/vop2.s
@@ -251,41 +251,110 @@ v_mbcnt_lo_u32_b32 v1, v2, v3
// VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
v_mbcnt_hi_u32_b32 v1, v2, v3
-// SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
-// VI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
-v_add_i32 v1, v2, v3
+// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
+// VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
+v_add_i32 v1, vcc, v2, v3
-// SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
-// VI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
-v_add_u32 v1, v2, v3
+// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
+v_add_i32 v1, s[0:1], v2, v3
-// SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
-// VI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
-v_sub_i32 v1, v2, v3
+// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
+v_add_i32_e64 v1, s[0:1], v2, v3
-// SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
-// VI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
-v_sub_u32 v1, v2, v3
+// SICI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x4a,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00]
+v_add_i32_e64 v1, vcc, v2, v3
-// SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
-// VI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
-v_subrev_i32 v1, v2, v3
+// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
+// VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
+v_add_u32 v1, vcc, v2, v3
-// SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
-// VI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
-v_subrev_u32 v1, v2, v3
+// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
+v_add_u32 v1, s[0:1], v2, v3
-// SICI: v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
-// VI: v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
-v_addc_u32 v1, v2, v3
+// SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
+// VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
+v_sub_i32 v1, vcc, v2, v3
-// SICI: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
-// VI: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
-v_subb_u32 v1, v2, v3
+// SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
+v_sub_i32 v1, s[0:1], v2, v3
-// SICI: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
-// VI: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
-v_subbrev_u32 v1, v2, v3
+// SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
+// VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
+v_sub_u32 v1, vcc, v2, v3
+
+// SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
+v_sub_u32 v1, s[0:1], v2, v3
+
+// SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
+// VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
+v_subrev_i32 v1, vcc, v2, v3
+
+// SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
+v_subrev_i32 v1, s[0:1], v2, v3
+
+// SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
+// VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
+v_subrev_u32 v1, vcc, v2, v3
+
+// SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00]
+// VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
+v_subrev_u32 v1, s[0:1], v2, v3
+
+// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50]
+// VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38]
+v_addc_u32 v1, vcc, v2, v3, vcc
+
+// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50]
+// VI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38]
+v_addc_u32_e32 v1, vcc, v2, v3, vcc
+
+
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0xaa,0x01]
+v_addc_u32 v1, s[0:1], v2, v3, vcc
+
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00]
+v_addc_u32 v1, s[0:1], v2, v3, s[2:3]
+
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00]
+v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3]
+
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x06,0x03]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x06,0x03]
+v_addc_u32_e64 v1, s[0:1], v2, v3, -1
+
+// SI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0x06,0x03]
+// VI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0x06,0x03]
+v_addc_u32_e64 v1, vcc, v2, v3, -1
+
+// SI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0xaa,0x01]
+v_addc_u32_e64 v1, vcc, v2, v3, vcc
+
+// SI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x52]
+// VI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3a]
+v_subb_u32 v1, vcc, v2, v3, vcc
+
+// SI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0xaa,0x01]
+v_subb_u32 v1, s[0:1], v2, v3, vcc
+
+// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x54]
+// VI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3c]
+v_subbrev_u32 v1, vcc, v2, v3, vcc
+
+// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0xaa,0x01]
+v_subbrev_u32 v1, s[0:1], v2, v3, vcc
// SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
// VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]
diff --git a/test/MC/AMDGPU/vop3-vop1-nosrc.s b/test/MC/AMDGPU/vop3-vop1-nosrc.s
new file mode 100644
index 000000000000..ce1a1a7f3380
--- /dev/null
+++ b/test/MC/AMDGPU/vop3-vop1-nosrc.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=SICI
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI
+// XFAIL: *
+
+// FIXME: We should be printing _e64 suffixes for these.
+// FIXME: When this is fixed delete this file and fix test case in vop3.s
+
+v_nop_e64
+// SICI: v_nop_e64 ; encoding: [0x00,0x00,0x00,0xd3,0x00,0x00,0x00,0x00]
+// VI: v_nop_e64 ; encoding: [0x00,0x00,0x40,0xd1,0x00,0x00,0x00,0x00]
+
+v_clrexcp_e64
+// SICI: v_clrexcp_e64 ; encoding: [0x00,0x00,0x82,0xd3,0x00,0x00,0x00,0x00]
+// VI: v_clrexcp_e64 ; encoding: [0x00,0x00,0x75,0xd1,0x00,0x00,0x00,0x00]
diff --git a/test/MC/AMDGPU/vop3.s b/test/MC/AMDGPU/vop3.s
index 63914675a869..712b18e37aab 100644
--- a/test/MC/AMDGPU/vop3.s
+++ b/test/MC/AMDGPU/vop3.s
@@ -14,6 +14,11 @@ v_cmp_lt_f32_e64 s[2:3], v4, -v6
// SICI: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
// VI: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x40]
+// Test forcing e64 with vcc dst
+
+v_cmp_lt_f32_e64 vcc, v4, v6
+// SICI: v_cmp_lt_f32_e64 vcc, v4, v6 ; encoding: [0x6a,0x00,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// VI: v_cmp_lt_f32_e64 vcc, v4, v6 ; encoding: [0x6a,0x00,0x41,0xd0,0x04,0x0d,0x02,0x00]
//
// Modifier tests:
@@ -87,12 +92,49 @@ v_cmp_ge_f32 s[2:3], v4, v6
// SICI: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0c,0xd0,0x04,0x0d,0x02,0x00]
// VI: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x46,0xd0,0x04,0x0d,0x02,0x00]
-// TODO: Finish VOPC
+// TODO: Add tests for the rest of v_cmp_*_f32
+// TODO: Add tests for v_cmpx_*_f32
+
+v_cmp_f_f64 s[2:3], v[4:5], v[6:7]
+// SICI: v_cmp_f_f64_e64 s[2:3], v[4:5], v[6:7] ; encoding: [0x02,0x00,0x40,0xd0,0x04,0x0d,0x02,0x00]
+// VI: v_cmp_f_f64_e64 s[2:3], v[4:5], v[6:7] ; encoding: [0x02,0x00,0x60,0xd0,0x04,0x0d,0x02,0x00]
+
+// TODO: Add tests for the rest of v_cmp_*_f64
+// TODO: Add tests for the rest of the floating-point comparision instructions.
+
+v_cmp_f_i32 s[2:3], v4, v6
+// SICI: v_cmp_f_i32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x00,0xd1,0x04,0x0d,0x02,0x00]
+// VI: v_cmp_f_i32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0xc0,0xd0,0x04,0x0d,0x02,0x00]
+
+// TODO: Add test for the rest of v_cmp_*_i32
+
+v_cmp_f_i64 s[2:3], v[4:5], v[6:7]
+// SICI: v_cmp_f_i64_e64 s[2:3], v[4:5], v[6:7] ; encoding: [0x02,0x00,0x40,0xd1,0x04,0x0d,0x02,0x00]
+// VI: v_cmp_f_i64_e64 s[2:3], v[4:5], v[6:7] ; encoding: [0x02,0x00,0xe0,0xd0,0x04,0x0d,0x02,0x00]
+
+// TODO: Add tests for the rest of the instructions.
//===----------------------------------------------------------------------===//
// VOP1 Instructions
//===----------------------------------------------------------------------===//
+// Test forced e64 encoding with e32 operands
+
+v_mov_b32_e64 v1, v2
+// SICI: v_mov_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x02,0xd3,0x02,0x01,0x00,0x00]
+// VI: v_mov_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x41,0xd1,0x02,0x01,0x00,0x00]
+
+// Force e64 encoding for special instructions.
+// FIXME, we should be printing the _e64 suffix for v_nop and v_clrexcp.
+
+v_nop_e64
+// SICI: v_nop ; encoding: [0x00,0x00,0x00,0xd3,0x00,0x00,0x00,0x00]
+// VI: v_nop ; encoding: [0x00,0x00,0x40,0xd1,0x00,0x00,0x00,0x00]
+
+v_clrexcp_e64
+// SICI: v_clrexcp ; encoding: [0x00,0x00,0x82,0xd3,0x00,0x00,0x00,0x00]
+// VI: v_clrexcp ; encoding: [0x00,0x00,0x75,0xd1,0x00,0x00,0x00,0x00]
+
//
// Modifier tests:
//
diff --git a/test/MC/AMDGPU/vopc-errs.s b/test/MC/AMDGPU/vopc-errs.s
new file mode 100644
index 000000000000..06c6752a8441
--- /dev/null
+++ b/test/MC/AMDGPU/vopc-errs.s
@@ -0,0 +1,8 @@
+// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s
+
+// Force 32-bit encoding with non-vcc result
+
+v_cmp_lt_f32_e32 s[0:1], v2, v4
+// CHECK: 18: error: invalid operand for instruction
diff --git a/test/MC/AMDGPU/vopc.s b/test/MC/AMDGPU/vopc.s
index 2d8547c5f953..0692a0e72ce0 100644
--- a/test/MC/AMDGPU/vopc.s
+++ b/test/MC/AMDGPU/vopc.s
@@ -44,5 +44,24 @@ v_cmp_lt_f32 vcc, v2, v4
// SICI: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
// VI: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x82,0x7c]
-// TODO: Add tests for the rest of the instructions.
+// TODO: Add tests for the rest of v_cmp_*_f32
+// TODO: Add tests for v_cmpx_*_f32
+
+v_cmp_f_f64 vcc, v[2:3], v[4:5]
+// SICI: v_cmp_f_f64_e32 vcc, v[2:3], v[4:5] ; encoding: [0x02,0x09,0x40,0x7c]
+// VI: v_cmp_f_f64_e32 vcc, v[2:3], v[4:5] ; encoding: [0x02,0x09,0xc0,0x7c]
+
+// TODO: Add tests for the rest of v_cmp_*_f64
+// TODO: Add tests for the rest of the floating-point comparision instructions.
+
+v_cmp_f_i32 vcc, v2, v4
+// SICI: v_cmp_f_i32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x00,0x7d]
+// VI: v_cmp_f_i32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x80,0x7d]
+// TODO: Add test for the rest of v_cmp_*_i32
+
+v_cmp_f_i64 vcc, v[2:3], v[4:5]
+// SICI: v_cmp_f_i64_e32 vcc, v[2:3], v[4:5] ; encoding: [0x02,0x09,0x40,0x7d]
+// VI: v_cmp_f_i64_e32 vcc, v[2:3], v[4:5] ; encoding: [0x02,0x09,0xc0,0x7d]
+
+// TODO: Add tests for the rest of the instructions.
diff --git a/test/MC/ARM/Windows/invalid-relocation.s b/test/MC/ARM/Windows/invalid-relocation.s
index 4f4c59839a6a..c3e74e97634b 100644
--- a/test/MC/ARM/Windows/invalid-relocation.s
+++ b/test/MC/ARM/Windows/invalid-relocation.s
@@ -1,4 +1,4 @@
-# RUN: not llvm-mc -triple thumbv7-windows -filetype obj -o /dev/null 2>&1 %s \
+# RUN: not llvm-mc -triple thumbv7-windows -incremental-linker-compatible -filetype obj -o /dev/null 2>&1 %s \
# RUN: | FileCheck %s
.def invalid_relocation
diff --git a/test/MC/ARM/arm-elf-relocation-diagnostics.s b/test/MC/ARM/arm-elf-relocation-diagnostics.s
index 5fe903f71619..6875d03b303e 100644
--- a/test/MC/ARM/arm-elf-relocation-diagnostics.s
+++ b/test/MC/ARM/arm-elf-relocation-diagnostics.s
@@ -8,20 +8,19 @@
@ CHECK: .byte target(sbrel)
@ CHECK: ^
-@ TODO: enable these negative test cases
-@ .hword target(sbrel)
-@ @ CHECK-SBREL-HWORD: error: relocated expression must be 32-bit
-@ @ CHECK-SBREL-HWORD: .hword target(sbrel)
-@ @ CHECK-SBREL-HWORD: ^
-@
-@ .short target(sbrel)
-@ @ CHECK-SBREL-SHORT: error: relocated expression must be 32-bit
-@ @ CHECK-SBREL-SHORT: .short target(sbrel)
-@ @ CHECK-SBREL-SHORT: ^
-@
-@ .quad target(sbrel)
-@ @ CHECK-SBREL-SHORT: error: relocated expression must be 32-bit
-@ @ CHECK-SBREL-SHORT: .quad target(sbrel)
-@ @ CHECK-SBREL-SHORT: ^
+ .hword target(sbrel)
+@ CHECK: error: relocated expression must be 32-bit
+@ CHECK: .hword target(sbrel)
+@ CHECK: ^
+
+ .short target(sbrel)
+@ CHECK: error: relocated expression must be 32-bit
+@ CHECK: .short target(sbrel)
+@ CHECK: ^
+
+ .quad target(sbrel)
+@ CHECK: error: relocated expression must be 32-bit
+@ CHECK: .quad target(sbrel)
+@ CHECK: ^
diff --git a/test/MC/ARM/arm-thumb-trustzone.s b/test/MC/ARM/arm-thumb-trustzone.s
index 7755a3c8e69b..4fec4b7e982c 100644
--- a/test/MC/ARM/arm-thumb-trustzone.s
+++ b/test/MC/ARM/arm-thumb-trustzone.s
@@ -1,5 +1,6 @@
@ RUN: not llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
+@ RUN: not llvm-mc -triple=thumbv6kz -mcpu=arm1176jzf-s -show-encoding < %s | FileCheck %s -check-prefix=NOTZ
.syntax unified
.globl _func
diff --git a/test/MC/ARM/arm-trustzone.s b/test/MC/ARM/arm-trustzone.s
index 72bac48e84e4..5ab27b58dfe1 100644
--- a/test/MC/ARM/arm-trustzone.s
+++ b/test/MC/ARM/arm-trustzone.s
@@ -1,5 +1,6 @@
@ RUN: not llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
@ RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
+@ RUN: llvm-mc -triple=armv6kz -mcpu=arm1176jz-s -show-encoding < %s | FileCheck %s -check-prefix=TZ
.syntax unified
.globl _func
@@ -14,7 +15,7 @@ _func:
@------------------------------------------------------------------------------
@ SMC
@------------------------------------------------------------------------------
- smc #0xf
+ smi #0xf @ SMI is old (ARMv6KZ) name for SMC
smceq #0
@ NOTZ-NOT: smc #15
diff --git a/test/MC/ARM/arm11-hint-instr.s b/test/MC/ARM/arm11-hint-instr.s
index 6f5a374e417c..2c0fef4adf73 100644
--- a/test/MC/ARM/arm11-hint-instr.s
+++ b/test/MC/ARM/arm11-hint-instr.s
@@ -5,21 +5,24 @@
@ RUN: | FileCheck --check-prefix=CHECK-ARM %s
@ RUN: llvm-mc -triple=armv6t2 -show-encoding < %s \
@ RUN: | FileCheck --check-prefix=CHECK-ARM %s
-@ RUN: llvm-mc -triple=thumb -mcpu=arm1156t2-s -show-encoding < %s \
-@ RUN: | FileCheck --check-prefix=CHECK-THUMB %s
-@ RUN: llvm-mc -triple=armv6m -show-encoding < %s \
-@ RUN: | FileCheck --check-prefix=CHECK-V6M %s
+@ RUN: not llvm-mc -triple=thumb -mcpu=arm1156t2-s -show-encoding < %s > %t3 2> %t4
+@ RUN: FileCheck --check-prefix=CHECK-THUMB %s < %t3
+@ RUN: FileCheck --check-prefix=CHECK-ERROR-THUMB %s < %t4
+@ RUN: not llvm-mc -triple=armv6m -show-encoding < %s > %t5 2> %t6
+@ RUN: FileCheck --check-prefix=CHECK-V6M %s < %t5
+@ RUN: FileCheck --check-prefix=CHECK-ERROR-V6M %s < %t6
.syntax unified
@------------------------------------------------------------------------------
-@ YIELD/WFE/WFI/SEV - are not supported pre v6K
+@ YIELD/WFE/WFI/SEV/CLREX - are not supported pre v6K
@------------------------------------------------------------------------------
nop
yield
wfe
wfi
sev
+ clrex
@------------------------------------------------------------------------------
@@ -37,6 +40,9 @@
@ CHECK-ERROR-V6: error: instruction requires: armv6k
@ CHECK-ERROR-V6: sev
@ CHECK-ERROR-V6: ^
+@ CHECK-ERROR-V6: error: instruction requires: armv6k
+@ CHECK-ERROR-V6: clrex
+@ CHECK-ERROR-V6: ^
@------------------------------------------------------------------------------
@ v6K using ARM encoding
@@ -49,6 +55,7 @@
@ CHECK-ARM: wfe @ encoding: [0x02,0xf0,0x20,0xe3]
@ CHECK-ARM: wfi @ encoding: [0x03,0xf0,0x20,0xe3]
@ CHECK-ARM: sev @ encoding: [0x04,0xf0,0x20,0xe3]
+@ CHECK-ARM: clrex @ encoding: [0x1f,0xf0,0x7f,0xf5]
@------------------------------------------------------------------------------
@ v6T2 using THUMB encoding (thumb triple)
@@ -58,6 +65,9 @@
@ CHECK-THUMB: wfe @ encoding: [0x20,0xbf]
@ CHECK-THUMB: wfi @ encoding: [0x30,0xbf]
@ CHECK-THUMB: sev @ encoding: [0x40,0xbf]
+@ CHECK-ERROR-THUMB: error: instruction requires: armv7
+@ CHECK-ERROR-THUMB: clrex
+@ CHECK-ERROR-THUMB: ^
@------------------------------------------------------------------------------
@ v6M using THUMB encoding
@@ -67,3 +77,6 @@
@ CHECK-V6M: wfe @ encoding: [0x20,0xbf]
@ CHECK-V6M: wfi @ encoding: [0x30,0xbf]
@ CHECK-V6M: sev @ encoding: [0x40,0xbf]
+@ CHECK-ERROR-V6M: error: instruction requires: armv7
+@ CHECK-ERROR-V6M: clrex
+@ CHECK-ERROR-V6M: ^
diff --git a/test/MC/ARM/basic-arm-instructions-v8.1a.s b/test/MC/ARM/basic-arm-instructions-v8.1a.s
index 005f27bb3983..9b764c18448a 100644
--- a/test/MC/ARM/basic-arm-instructions-v8.1a.s
+++ b/test/MC/ARM/basic-arm-instructions-v8.1a.s
@@ -37,7 +37,7 @@
//CHECK-V8: vqrdmlsh.f32 q3, q4, q5
//CHECK-V8: ^
//CHECK-V8: error: invalid operand for instruction
-//CHECK-V8 vqrdmlsh.f64 d3, d5, d5
+//CHECK-V8: vqrdmlsh.f64 d3, d5, d5
//CHECK-V8: ^
vqrdmlah.s16 d0, d1, d2
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index a1f13b76dda3..99a3cfa7b29e 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -349,6 +349,8 @@ Lforward:
and r6, r7, r8, ror r2
and r10, r1, r6, rrx
and r2, r3, #0x7fffffff
+ and sp, sp, #0x7fffffff
+ and pc, pc, #0x7fffffff
@ destination register is optional
and r1, #0xf
@@ -397,6 +399,8 @@ Lforward:
@ CHECK: and r6, r7, r8, ror r2 @ encoding: [0x78,0x62,0x07,0xe0]
@ CHECK: and r10, r1, r6, rrx @ encoding: [0x66,0xa0,0x01,0xe0]
@ CHECK: bic r2, r3, #-2147483648 @ encoding: [0x02,0x21,0xc3,0xe3]
+@ CHECK: bic sp, sp, #-2147483648 @ encoding: [0x02,0xd1,0xcd,0xe3]
+@ CHECK: bic pc, pc, #-2147483648 @ encoding: [0x02,0xf1,0xcf,0xe3]
@ CHECK: and r1, r1, #15 @ encoding: [0x0f,0x10,0x01,0xe2]
@ CHECK: and r1, r1, #15 @ encoding: [0x0f,0x10,0x01,0xe2]
@@ -502,6 +506,10 @@ Lforward:
bic r6, r7, r8, asr r2
bic r6, r7, r8, ror r2
bic r10, r1, r6, rrx
+ bic r2, r3, #0x7fffffff
+ bic sp, sp, #0x7fffffff
+ bic pc, pc, #0x7fffffff
+
@ destination register is optional
bic r1, #0xf
@@ -548,6 +556,9 @@ Lforward:
@ CHECK: bic r6, r7, r8, asr r2 @ encoding: [0x58,0x62,0xc7,0xe1]
@ CHECK: bic r6, r7, r8, ror r2 @ encoding: [0x78,0x62,0xc7,0xe1]
@ CHECK: bic r10, r1, r6, rrx @ encoding: [0x66,0xa0,0xc1,0xe1]
+@ CHECK: and r2, r3, #-2147483648 @ encoding: [0x02,0x21,0x03,0xe2]
+@ CHECK: and sp, sp, #-2147483648 @ encoding: [0x02,0xd1,0x0d,0xe2]
+@ CHECK: and pc, pc, #-2147483648 @ encoding: [0x02,0xf1,0x0f,0xe2]
@ CHECK: bic r1, r1, #15 @ encoding: [0x0f,0x10,0xc1,0xe3]
diff --git a/test/MC/ARM/basic-thumb2-instructions-v8.s b/test/MC/ARM/basic-thumb2-instructions-v8.s
index a7882aead01f..46bc1b91ffa5 100644
--- a/test/MC/ARM/basic-thumb2-instructions-v8.s
+++ b/test/MC/ARM/basic-thumb2-instructions-v8.s
@@ -3,7 +3,7 @@
@ RUN: llvm-mc -triple thumbv8 -show-encoding < %s | FileCheck %s --check-prefix=CHECK-V8
@ RUN: not llvm-mc -triple thumbv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
-@ HLT
+@ HLT (in ARMv8 only)
hlt #0
hlt #63
@ CHECK-V8: hlt #0 @ encoding: [0x80,0xba]
@@ -19,12 +19,23 @@
@ CHECK-V8: hlt #24 @ encoding: [0x98,0xba]
@ CHECK-V7: error: instruction requires: armv8
-@ Can accept AL condition code
+@ Can accept AL condition code (in ARMv8 only)
hltal #24
@ CHECK-V8: hlt #24 @ encoding: [0x98,0xba]
@ CHECK-V7: error: instruction requires: armv8
-@ DCPS{1,2,3}
+@ Can accept SP as rGPR (in ARMv8 only)
+ sbc.w r6, r3, sp, asr #16
+ and.w r6, r3, sp, asr #16
+ and sp, r0, #0
+@ CHECK-V8: sbc.w r6, r3, sp, asr #16 @ encoding: [0x63,0xeb,0x2d,0x46]
+@ CHECK-V8: and.w r6, r3, sp, asr #16 @ encoding: [0x03,0xea,0x2d,0x46]
+@ CHECK-V8: and sp, r0, #0 @ encoding: [0x00,0xf0,0x00,0x0d]
+@ CHECK-V7: error: instruction variant requires ARMv8 or later
+@ CHECK-V7: error: instruction variant requires ARMv8 or later
+@ CHECK-V7: error: invalid operand for instruction
+
+@ DCPS{1,2,3} (in ARMv8 only)
dcps1
dcps2
dcps3
@@ -36,7 +47,7 @@
@ CHECK-V7: error: instruction requires: armv8
@------------------------------------------------------------------------------
-@ DMB (v8 barriers)
+@ DMB (ARMv8-only barriers)
@------------------------------------------------------------------------------
dmb ishld
dmb oshld
@@ -53,7 +64,7 @@
@ CHECK-V7: error: invalid operand for instruction
@------------------------------------------------------------------------------
-@ DSB (v8 barriers)
+@ DSB (ARMv8-only barriers)
@------------------------------------------------------------------------------
dsb ishld
dsb oshld
@@ -70,7 +81,7 @@
@ CHECK-V7: error: invalid operand for instruction
@------------------------------------------------------------------------------
-@ SEVL
+@ SEVL (in ARMv8 only)
@------------------------------------------------------------------------------
sevl
sevl.w
diff --git a/test/MC/ARM/big-endian-thumb2-fixup.s b/test/MC/ARM/big-endian-thumb2-fixup.s
index 4fd5276fce6e..0aaa26a209fe 100644
--- a/test/MC/ARM/big-endian-thumb2-fixup.s
+++ b/test/MC/ARM/big-endian-thumb2-fixup.s
@@ -35,14 +35,14 @@ cond_label:
@ARM::fixup_t2_ldst_precel_12
.section s_ldst_precel_12,"ax",%progbits
- ldr r0, ldst_precel_12_label
+ ldr.w r0, ldst_precel_12_label
nop
nop
ldst_precel_12_label:
@ARM::fixup_t2_adr_pcrel_12
.section s_adr_pcrel_12,"ax",%progbits
- adr r0, adr_pcrel_12_label
+ adr.w r0, adr_pcrel_12_label
nop
nop
adr_pcrel_12_label:
diff --git a/test/MC/ARM/coff-debugging-secrel.ll b/test/MC/ARM/coff-debugging-secrel.ll
index 1b8b7310171e..a950ba2b6896 100644
--- a/test/MC/ARM/coff-debugging-secrel.ll
+++ b/test/MC/ARM/coff-debugging-secrel.ll
@@ -1,14 +1,15 @@
; RUN: llc -mtriple thumbv7--windows-itanium -filetype obj -o - %s \
; RUN: | llvm-readobj -r - | FileCheck %s -check-prefix CHECK-ITANIUM
-; RUN: llc -mtriple thumbv7--windows-msvc -filetype obj -o - %s \
+; RUN: sed -e 's/"Dwarf Version"/"CodeView"/' %s \
+; RUN: | llc -mtriple thumbv7--windows-msvc -filetype obj -o - \
; RUN: | llvm-readobj -r - | FileCheck %s -check-prefix CHECK-MSVC
; ModuleID = '/Users/compnerd/work/llvm/test/MC/ARM/reduced.c'
target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv7--windows-itanium"
-define arm_aapcs_vfpcc void @function() {
+define arm_aapcs_vfpcc void @function() !dbg !1 {
entry:
ret void, !dbg !0
}
@@ -17,13 +18,13 @@ entry:
!llvm.module.flags = !{!9, !10}
!0 = !DILocation(line: 1, scope: !1)
-!1 = !DISubprogram(name: "function", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !2, scope: !3, type: !4, function: void ()* @function, variables: !6)
+!1 = distinct !DISubprogram(name: "function", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !2, scope: !3, type: !4, variables: !6)
!2 = !DIFile(filename: "/Users/compnerd/work/llvm/test/MC/ARM/reduced.c", directory: "/Users/compnerd/work/llvm")
!3 = !DIFile(filename: "/Users/compnerd/work/llvm/test/MC/ARM/reduced.c", directory: "/Users/compnerd/work/llvm")
!4 = !DISubroutineType(types: !5)
!5 = !{null}
!6 = !{}
-!7 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0", isOptimized: false, emissionKind: 1, file: !2, enums: !6, retainedTypes: !6, subprograms: !8, globals: !6, imports: !6)
+!7 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0", isOptimized: false, emissionKind: 1, file: !2, enums: !6, retainedTypes: !6, subprograms: !8, globals: !6, imports: !6)
!8 = !{!1}
!9 = !{i32 2, !"Dwarf Version", i32 4}
!10 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/MC/ARM/data-in-code.ll b/test/MC/ARM/data-in-code.ll
index c4910ff20e61..10657a3fed39 100644
--- a/test/MC/ARM/data-in-code.ll
+++ b/test/MC/ARM/data-in-code.ll
@@ -53,13 +53,6 @@ exit:
;; ARM: Symbol {
;; ARM: Name: $d
-;; ARM-NEXT: Value: 0
-;; ARM-NEXT: Size: 0
-;; ARM-NEXT: Binding: Local
-;; ARM-NEXT: Type: None
-
-;; ARM: Symbol {
-;; ARM: Name: $d
;; ARM-NEXT: Value: 0x{{[0-9A-F]+}}
;; ARM-NEXT: Size: 0
;; ARM-NEXT: Binding: Local
@@ -77,10 +70,17 @@ exit:
;; ARM-NEXT: Section: .ARM.exidx
;; ARM-NEXT: }
+;; ARM: Symbol {
+;; ARM: Name: $d
+;; ARM-NEXT: Value: 0
+;; ARM-NEXT: Size: 0
+;; ARM-NEXT: Binding: Local
+;; ARM-NEXT: Type: None
+
;; ARM-NOT: ${{[atd]}}
;; TMB: Symbol {
-;; TMB: Name: $d.2
+;; TMB: Name: $d.1
;; TMB-NEXT: Value: 0x{{[0-9A-F]+}}
;; TMB-NEXT: Size: 0
;; TMB-NEXT: Binding: Local
diff --git a/test/MC/ARM/diagnostics.s b/test/MC/ARM/diagnostics.s
index 6f66dc3b4d0a..a1dd95f7d7fc 100644
--- a/test/MC/ARM/diagnostics.s
+++ b/test/MC/ARM/diagnostics.s
@@ -1,7 +1,7 @@
@ RUN: not llvm-mc -triple=armv7-apple-darwin < %s 2> %t
-@ RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS --check-prefix=CHECK-ERRORS-V7 < %t %s
@ RUN: not llvm-mc -triple=armv8 < %s 2> %t
-@ RUN: FileCheck --check-prefix=CHECK-ERRORS-V8 < %t %s
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS --check-prefix=CHECK-ERRORS-V8 < %t %s
@ Check for various assembly diagnostic messages on invalid input.
@@ -98,22 +98,22 @@
@ Out of range immediates for v8 HLT instruction.
hlt #65536
hlt #-1
-@CHECK-ERRORS-V8: error: invalid operand for instruction
-@CHECK-ERRORS-V8: hlt #65536
-@CHECK-ERRORS-V8: ^
-@CHECK-ERRORS-V8: error: invalid operand for instruction
-@CHECK-ERRORS-V8: hlt #-1
-@CHECK-ERRORS-V8: ^
+@CHECK-ERRORS: error: invalid operand for instruction
+@CHECK-ERRORS: hlt #65536
+@CHECK-ERRORS: ^
+@CHECK-ERRORS: error: invalid operand for instruction
+@CHECK-ERRORS: hlt #-1
+@CHECK-ERRORS: ^
@ Illegal condition code for v8 HLT instruction.
hlteq #2
hltlt #23
-@CHECK-ERRORS-V8: error: instruction 'hlt' is not predicable, but condition code specified
-@CHECK-ERRORS-V8: hlteq #2
-@CHECK-ERRORS-V8: ^
-@CHECK-ERRORS-V8: error: instruction 'hlt' is not predicable, but condition code specified
-@CHECK-ERRORS-V8: hltlt #23
-@CHECK-ERRORS-V8: ^
+@CHECK-ERRORS: error: instruction 'hlt' is not predicable, but condition code specified
+@CHECK-ERRORS: hlteq #2
+@CHECK-ERRORS: ^
+@CHECK-ERRORS: error: instruction 'hlt' is not predicable, but condition code specified
+@CHECK-ERRORS: hltlt #23
+@CHECK-ERRORS: ^
@ Out of range 4 and 3 bit immediates on CDP[2]
@@ -149,7 +149,8 @@
@ CHECK-ERRORS: error: invalid operand for instruction
@ CHECK-ERRORS: error: invalid operand for instruction
@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
-@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS-V7: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS-V8: error: invalid operand for instruction
@ p10 and p11 are reserved for NEON
mcr p10, #2, r5, c1, c1, #4
@@ -183,7 +184,8 @@
@ CHECK-ERRORS: error: invalid operand for instruction
@ CHECK-ERRORS: error: invalid operand for instruction
@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
-@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS-V7: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS-V8: error: invalid operand for instruction
@ Shifter operand validation for PKH instructions.
pkhbt r2, r2, r3, lsl #-1
@@ -394,12 +396,14 @@
ldc2 p2, c8, [r1], { 256 }
ldc2 p2, c8, [r1], { -1 }
-@ CHECK-ERRORS: error: coprocessor option must be an immediate in range [0, 255]
-@ CHECK-ERRORS: ldc2 p2, c8, [r1], { 256 }
-@ CHECK-ERRORS: ^
-@ CHECK-ERRORS: error: coprocessor option must be an immediate in range [0, 255]
-@ CHECK-ERRORS: ldc2 p2, c8, [r1], { -1 }
-@ CHECK-ERRORS: ^
+@ CHECK-ERRORS-V7: error: coprocessor option must be an immediate in range [0, 255]
+@ CHECK-ERRORS-V7: ldc2 p2, c8, [r1], { 256 }
+@ CHECK-ERRORS-V7: ^
+@ CHECK-ERRORS-V8: error: register expected
+@ CHECK-ERRORS-V7: error: coprocessor option must be an immediate in range [0, 255]
+@ CHECK-ERRORS-V7: ldc2 p2, c8, [r1], { -1 }
+@ CHECK-ERRORS-V7: ^
+@ CHECK-ERRORS-V8: error: register expected
@ Bad CPS instruction format.
cps f,#1
@@ -470,14 +474,14 @@
vrintn.f32 s8, s9
vrintp.f64.f64 d10, d11
vrintm.f64 d12, d13
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
stm sp!, {r0, pc}^
ldm sp!, {r0}^
diff --git a/test/MC/ARM/directive-arch-armv6j.s b/test/MC/ARM/directive-arch-armv6j.s
deleted file mode 100644
index e27beef1ebaf..000000000000
--- a/test/MC/ARM/directive-arch-armv6j.s
+++ /dev/null
@@ -1,34 +0,0 @@
-@ Test the .arch directive for armv6j
-
-@ This test case will check the default .ARM.attributes value for the
-@ armv6j architecture.
-
-@ RUN: llvm-mc -triple arm-eabi -filetype asm %s \
-@ RUN: | FileCheck %s -check-prefix CHECK-ASM
-@ RUN: llvm-mc -triple arm-eabi -filetype obj %s \
-@ RUN: | llvm-readobj -arm-attributes | FileCheck %s -check-prefix CHECK-ATTR
-
- .syntax unified
- .arch armv6j
-
-@ CHECK-ASM: .arch armv6j
-
-@ CHECK-ATTR: FileAttributes {
-@ CHECK-ATTR: Attribute {
-@ CHECK-ATTR: TagName: CPU_name
-@ CHECK-ATTR: Value: 6J
-@ CHECK-ATTR: }
-@ CHECK-ATTR: Attribute {
-@ CHECK-ATTR: TagName: CPU_arch
-@ CHECK-ATTR: Description: ARM v6
-@ CHECK-ATTR: }
-@ CHECK-ATTR: Attribute {
-@ CHECK-ATTR: TagName: ARM_ISA_use
-@ CHECK-ATTR: Description: Permitted
-@ CHECK-ATTR: }
-@ CHECK-ATTR: Attribute {
-@ CHECK-ATTR: TagName: THUMB_ISA_use
-@ CHECK-ATTR: Description: Thumb-1
-@ CHECK-ATTR: }
-@ CHECK-ATTR: }
-
diff --git a/test/MC/ARM/directive-arch-armv6z.s b/test/MC/ARM/directive-arch-armv6z.s
index 78a9ab1d5de7..efb8f8bfe9d3 100644
--- a/test/MC/ARM/directive-arch-armv6z.s
+++ b/test/MC/ARM/directive-arch-armv6z.s
@@ -11,12 +11,12 @@
.syntax unified
.arch armv6z
-@ CHECK-ASM: .arch armv6z
+@ CHECK-ASM: .arch armv6kz
@ CHECK-ATTR: FileAttributes {
@ CHECK-ATTR: Attribute {
@ CHECK-ATTR: TagName: CPU_name
-@ CHECK-ATTR: Value: 6Z
+@ CHECK-ATTR: Value: 6KZ
@ CHECK-ATTR: }
@ CHECK-ATTR: Attribute {
@ CHECK-ATTR: TagName: CPU_arch
diff --git a/test/MC/ARM/directive-arch-armv6zk.s b/test/MC/ARM/directive-arch-armv6zk.s
deleted file mode 100644
index 48d9cc1a2bf1..000000000000
--- a/test/MC/ARM/directive-arch-armv6zk.s
+++ /dev/null
@@ -1,38 +0,0 @@
-@ Test the .arch directive for armv6zk
-
-@ This test case will check the default .ARM.attributes value for the
-@ armv6zk architecture.
-
-@ RUN: llvm-mc -triple arm-eabi -filetype asm %s \
-@ RUN: | FileCheck %s -check-prefix CHECK-ASM
-@ RUN: llvm-mc -triple arm-eabi -filetype obj %s \
-@ RUN: | llvm-readobj -arm-attributes | FileCheck %s -check-prefix CHECK-ATTR
-
- .syntax unified
- .arch armv6zk
-
-@ CHECK-ASM: .arch armv6zk
-
-@ CHECK-ATTR: FileAttributes {
-@ CHECK-ATTR: Attribute {
-@ CHECK-ATTR: TagName: CPU_name
-@ CHECK-ATTR: Value: 6ZK
-@ CHECK-ATTR: }
-@ CHECK-ATTR: Attribute {
-@ CHECK-ATTR: TagName: CPU_arch
-@ CHECK-ATTR: Description: ARM v6KZ
-@ CHECK-ATTR: }
-@ CHECK-ATTR: Attribute {
-@ CHECK-ATTR: TagName: ARM_ISA_use
-@ CHECK-ATTR: Description: Permitted
-@ CHECK-ATTR: }
-@ CHECK-ATTR: Attribute {
-@ CHECK-ATTR: TagName: THUMB_ISA_use
-@ CHECK-ATTR: Description: Thumb-1
-@ CHECK-ATTR: }
-@ CHECK-ATTR: Attribute {
-@ CHECK-ATTR: TagName: Virtualization_use
-@ CHECK-ATTR: Description: TrustZone
-@ CHECK-ATTR: }
-@ CHECK-ATTR: }
-
diff --git a/test/MC/ARM/directive-arch-armv8.2-a.s b/test/MC/ARM/directive-arch-armv8.2-a.s
new file mode 100644
index 000000000000..c9f4469fb0ae
--- /dev/null
+++ b/test/MC/ARM/directive-arch-armv8.2-a.s
@@ -0,0 +1,46 @@
+@ Test the .arch directive for armv8.2-a
+
+@ This test case will check the default .ARM.attributes value for the
+@ armv8-a architecture.
+
+@ RUN: llvm-mc -triple arm-eabi -filetype asm %s \
+@ RUN: | FileCheck %s -check-prefix CHECK-ASM
+@ RUN: llvm-mc -triple arm-eabi -filetype obj %s \
+@ RUN: | llvm-readobj -arm-attributes | FileCheck %s -check-prefix CHECK-ATTR
+
+ .syntax unified
+ .arch armv8.2-a
+
+@ CHECK-ASM: .arch armv8.2-a
+
+@ CHECK-ATTR: FileAttributes {
+@ CHECK-ATTR: Attribute {
+@ CHECK-ATTR: TagName: CPU_name
+@ CHECK-ATTR: Value: 8.2-A
+@ CHECK-ATTR: }
+@ CHECK-ATTR: Attribute {
+@ CHECK-ATTR: TagName: CPU_arch
+@ CHECK-ATTR: Description: ARM v8
+@ CHECK-ATTR: }
+@ CHECK-ATTR: Attribute {
+@ CHECK-ATTR: TagName: CPU_arch_profile
+@ CHECK-ATTR: Description: Application
+@ CHECK-ATTR: }
+@ CHECK-ATTR: Attribute {
+@ CHECK-ATTR: TagName: ARM_ISA_use
+@ CHECK-ATTR: Description: Permitted
+@ CHECK-ATTR: }
+@ CHECK-ATTR: Attribute {
+@ CHECK-ATTR: TagName: THUMB_ISA_use
+@ CHECK-ATTR: Description: Thumb-2
+@ CHECK-ATTR: }
+@ CHECK-ATTR: Attribute {
+@ CHECK-ATTR: TagName: MPextension_use
+@ CHECK-ATTR: Description: Permitted
+@ CHECK-ATTR: }
+@ CHECK-ATTR: Attribute {
+@ CHECK-ATTR: TagName: Virtualization_use
+@ CHECK-ATTR: Description: TrustZone + Virtualization Extensions
+@ CHECK-ATTR: }
+@ CHECK-ATTR: }
+
diff --git a/test/MC/ARM/directive-arch-semantic-action.s b/test/MC/ARM/directive-arch-semantic-action.s
index b9c65d8e49c8..2d64026e041d 100644
--- a/test/MC/ARM/directive-arch-semantic-action.s
+++ b/test/MC/ARM/directive-arch-semantic-action.s
@@ -1,6 +1,6 @@
@ RUN: not llvm-mc -triple arm-gnueabi-linux -filetype asm %s 2>&1 | FileCheck %s
- .arch armv6
+ .arch armv6
dsb
@ CHECK: error: instruction requires: data-barriers
@@ -9,4 +9,4 @@
@ CHECK-NOT: error: instruction requires: data-barriers
.arch invalid_architecture_name
-@ CHECK: error: Unknown arch name
+@ CHECK: error: Unknown arch name
diff --git a/test/MC/ARM/directive-arch_extension-sec.s b/test/MC/ARM/directive-arch_extension-sec.s
index 55ead8506ab1..645da0f75d31 100644
--- a/test/MC/ARM/directive-arch_extension-sec.s
+++ b/test/MC/ARM/directive-arch_extension-sec.s
@@ -1,11 +1,13 @@
@ RUN: not llvm-mc -triple armv6-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN: | FileCheck %s -check-prefix CHECK-ARMv6 -check-prefix CHECK-V6
+@ RUN: | FileCheck %s -check-prefix CHECK-V6
+@ RUN: not llvm-mc -triple armv6k-eabi -filetype asm -o /dev/null 2>&1 %s \
+@ RUN: | FileCheck %s -check-prefix CHECK-V7
@ RUN: not llvm-mc -triple armv7-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN: | FileCheck %s -check-prefix CHECK-ARMv7 -check-prefix CHECK-V7
+@ RUN: | FileCheck %s -check-prefix CHECK-V7
@ RUN: not llvm-mc -triple thumbv6-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN: | FileCheck %s -check-prefix CHECK-THUMBv6 -check-prefix CHECK-V6
+@ RUN: | FileCheck %s -check-prefix CHECK-V6
@ RUN: not llvm-mc -triple thumbv7-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN: | FileCheck %s -check-prefix CHECK-THUMBv7 -check-prefix CHECK-V7
+@ RUN: | FileCheck %s -check-prefix CHECK-V7
.syntax unified
@@ -13,6 +15,7 @@
@ CHECK-V6: error: architectural extension 'sec' is not allowed for the current base architecture
@ CHECK-V6-NEXT: .arch_extension sec
@ CHECK-V6-NEXT: ^
+@ CHECK-V7-NOT: error: architectural extension 'sec' is not allowed for the current base architecture
.type sec,%function
sec:
@@ -23,9 +26,11 @@ sec:
@ CHECK-V6: error: architectural extension 'sec' is not allowed for the current base architecture
@ CHECK-V6-NEXT: .arch_extension nosec
@ CHECK-V6-NEXT: ^
+@ CHECK-V7-NOT: error: architectural extension 'sec' is not allowed for the current base architecture
.type nosec,%function
nosec:
smc #0
@ CHECK-V7: error: instruction requires: TrustZone
+@ CHECK-V7-NOT: error: instruction requires: TrustZone
diff --git a/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s b/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s
index 5bf8fbd57fa5..d23c9a93de33 100644
--- a/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s
+++ b/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s
@@ -25,7 +25,7 @@ b:
// DWARF: .debug_info contents:
// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
-// CHECK-NOT-DWARF: DW_TAG_
+// DWARF-NOT: DW_TAG_
// DWARF: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
// DWARF: DW_AT_high_pc [DW_FORM_addr] (0x0000000000000004)
diff --git a/test/MC/ARM/dwarf-asm-multiple-sections.s b/test/MC/ARM/dwarf-asm-multiple-sections.s
index 0eb8bab81620..49550559e956 100644
--- a/test/MC/ARM/dwarf-asm-multiple-sections.s
+++ b/test/MC/ARM/dwarf-asm-multiple-sections.s
@@ -1,6 +1,8 @@
// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
-// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF -check-prefix DWARF4 %s
// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 3 -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF -check-prefix DWARF3 %s
// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 2 2>&1 | FileCheck -check-prefix VERSION %s
// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 1 2>&1 | FileCheck -check-prefix DWARF1 %s
// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 5 2>&1 | FileCheck -check-prefix DWARF5 %s
@@ -15,8 +17,10 @@ b:
// DWARF: .debug_abbrev contents:
// DWARF: Abbrev table for offset: 0x00000000
// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
-// DWARF: DW_AT_stmt_list DW_FORM_data4
-// DWARF: DW_AT_ranges DW_FORM_data4
+// DWARF3: DW_AT_stmt_list DW_FORM_data4
+// DWARF4: DW_AT_stmt_list DW_FORM_sec_offset
+// DWARF3: DW_AT_ranges DW_FORM_data4
+// DWARF4: DW_AT_ranges DW_FORM_sec_offset
// DWARF: DW_AT_name DW_FORM_string
// DWARF: DW_AT_comp_dir DW_FORM_string
// DWARF: DW_AT_producer DW_FORM_string
@@ -24,8 +28,9 @@ b:
// DWARF: .debug_info contents:
// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
-// CHECK-NOT-DWARF: DW_TAG_
-// DWARF: DW_AT_ranges [DW_FORM_data4] (0x00000000
+// DWARF-NOT: DW_TAG_
+// DWARF3: DW_AT_ranges [DW_FORM_data4] (0x00000000
+// DWARF4: DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_label [2] *
// DWARF-NEXT: DW_AT_name [DW_FORM_string] ("a")
@@ -41,10 +46,10 @@ b:
// DWARF: .debug_line contents:
-// DWARF: 0x0000000000000000 9 0 1 0 0 is_stmt
-// DWARF-NEXT: 0x0000000000000004 9 0 1 0 0 is_stmt end_sequence
-// DWARF-NEXT: 0x0000000000000000 13 0 1 0 0 is_stmt
-// DWARF-NEXT: 0x0000000000000004 13 0 1 0 0 is_stmt end_sequence
+// DWARF: 0x0000000000000000 11 0 1 0 0 is_stmt
+// DWARF-NEXT: 0x0000000000000004 11 0 1 0 0 is_stmt end_sequence
+// DWARF-NEXT: 0x0000000000000000 15 0 1 0 0 is_stmt
+// DWARF-NEXT: 0x0000000000000004 15 0 1 0 0 is_stmt end_sequence
// DWARF: .debug_ranges contents:
diff --git a/test/MC/ARM/dwarf-asm-nonstandard-section.s b/test/MC/ARM/dwarf-asm-nonstandard-section.s
index 497a39ad1162..39065a4d05f1 100644
--- a/test/MC/ARM/dwarf-asm-nonstandard-section.s
+++ b/test/MC/ARM/dwarf-asm-nonstandard-section.s
@@ -9,7 +9,7 @@ b:
// DWARF: .debug_abbrev contents:
// DWARF: Abbrev table for offset: 0x00000000
// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
-// DWARF: DW_AT_stmt_list DW_FORM_data4
+// DWARF: DW_AT_stmt_list DW_FORM_sec_offset
// DWARF: DW_AT_low_pc DW_FORM_addr
// DWARF: DW_AT_high_pc DW_FORM_addr
// DWARF: DW_AT_name DW_FORM_string
diff --git a/test/MC/ARM/dwarf-asm-single-section.s b/test/MC/ARM/dwarf-asm-single-section.s
index c57e6498a38a..808236f65b3f 100644
--- a/test/MC/ARM/dwarf-asm-single-section.s
+++ b/test/MC/ARM/dwarf-asm-single-section.s
@@ -10,7 +10,7 @@ a:
// DWARF: .debug_abbrev contents:
// DWARF: Abbrev table for offset: 0x00000000
// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
-// DWARF: DW_AT_stmt_list DW_FORM_data4
+// DWARF: DW_AT_stmt_list DW_FORM_sec_offset
// DWARF: DW_AT_low_pc DW_FORM_addr
// DWARF: DW_AT_high_pc DW_FORM_addr
// DWARF: DW_AT_name DW_FORM_string
@@ -20,7 +20,7 @@ a:
// DWARF: .debug_info contents:
// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
-// CHECK-NOT-DWARF: DW_TAG_
+// DWARF-NOT: DW_TAG_
// DWARF: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
// DWARF: DW_AT_high_pc [DW_FORM_addr] (0x0000000000000004)
diff --git a/test/MC/ARM/eh-compact-pr0.s b/test/MC/ARM/eh-compact-pr0.s
index 9c0581a722e8..66fd4a4efeda 100644
--- a/test/MC/ARM/eh-compact-pr0.s
+++ b/test/MC/ARM/eh-compact-pr0.s
@@ -68,8 +68,8 @@ func2:
@ RELOC: Section {
@ RELOC: Name: .rel.ARM.exidx.TEST1
@ RELOC: Relocations [
-@ RELOC: 0x0 R_ARM_PREL31 .TEST1 0x0
@ RELOC: 0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC: 0x0 R_ARM_PREL31 .TEST1 0x0
@ RELOC: ]
@ RELOC: }
@@ -105,7 +105,7 @@ func2:
@ RELOC: Section {
@ RELOC: Name: .rel.ARM.exidx.TEST2
@ RELOC: Relocations [
-@ RELOC: 0x0 R_ARM_PREL31 .TEST2 0x0
@ RELOC: 0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC: 0x0 R_ARM_PREL31 .TEST2 0x0
@ RELOC: ]
@ RELOC: }
diff --git a/test/MC/ARM/eh-compact-pr1.s b/test/MC/ARM/eh-compact-pr1.s
index 17d32f834e3e..9f40593cf437 100644
--- a/test/MC/ARM/eh-compact-pr1.s
+++ b/test/MC/ARM/eh-compact-pr1.s
@@ -68,7 +68,7 @@ func1:
@ will keep __aeabi_unwind_cpp_pr1.
@-------------------------------------------------------------------------------
@ CHECK: Relocations [
-@ CHECK: 0x0 R_ARM_PREL31 .TEST1 0x0
@ CHECK: 0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
+@ CHECK: 0x0 R_ARM_PREL31 .TEST1 0x0
@ CHECK: 0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
@ CHECK: ]
diff --git a/test/MC/ARM/eh-directive-handlerdata.s b/test/MC/ARM/eh-directive-handlerdata.s
index 980a5f056760..c4352e5dee58 100644
--- a/test/MC/ARM/eh-directive-handlerdata.s
+++ b/test/MC/ARM/eh-directive-handlerdata.s
@@ -48,8 +48,8 @@ func1:
@ RELOC: Section {
@ RELOC: Name: .rel.ARM.exidx.TEST1
@ RELOC: Relocations [
-@ RELOC: 0x0 R_ARM_PREL31 .TEST1 0x0
@ RELOC: 0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC: 0x0 R_ARM_PREL31 .TEST1 0x0
@ RELOC: 0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
@ RELOC: ]
@ RELOC: }
@@ -108,8 +108,8 @@ func2:
@ RELOC: Section {
@ RELOC: Name: .rel.ARM.exidx.TEST2
@ RELOC: Relocations [
-@ RELOC: 0x0 R_ARM_PREL31 .TEST2 0x0
@ RELOC: 0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
+@ RELOC: 0x0 R_ARM_PREL31 .TEST2 0x0
@ RELOC: 0x4 R_ARM_PREL31 .ARM.extab.TEST2 0x0
@ RELOC: ]
@ RELOC: }
diff --git a/test/MC/ARM/eh-directive-personalityindex.s b/test/MC/ARM/eh-directive-personalityindex.s
index 6db942503c6d..5d537bb04d37 100644
--- a/test/MC/ARM/eh-directive-personalityindex.s
+++ b/test/MC/ARM/eh-directive-personalityindex.s
@@ -28,8 +28,8 @@ pr0:
@ RELOC: Section {
@ RELOC: Name: .rel.ARM.exidx.pr0
@ RELOC: Relocations [
-@ RELOC: 0x0 R_ARM_PREL31 .pr0 0x0
@ RELOC: 0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC: 0x0 R_ARM_PREL31 .pr0 0x0
@ RELOC: ]
@ RELOC: }
@@ -57,8 +57,8 @@ pr0_nontrivial:
@ RELOC: Section {
@ RELOC: Name: .rel.ARM.exidx.pr0.nontrivial
@ RELOC: Relocations [
-@ RELOC: 0x0 R_ARM_PREL31 .pr0.nontrivial 0x0
@ RELOC: 0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC: 0x0 R_ARM_PREL31 .pr0.nontrivial 0x0
@ RELOC: ]
@ RELOC: }
@@ -90,8 +90,8 @@ pr1:
@ RELOC: Section {
@ RELOC: Name: .rel.ARM.exidx.pr1
@ RELOC: Relocations [
-@ RELOC: 0x0 R_ARM_PREL31 .pr1 0x0
@ RELOC: 0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
+@ RELOC: 0x0 R_ARM_PREL31 .pr1 0x0
@ RELOC: 0x4 R_ARM_PREL31 .ARM.extab.pr1 0x0
@ RELOC: ]
@ RELOC: }
@@ -127,8 +127,8 @@ pr1_nontrivial:
@ RELOC: Section {
@ RELOC: Name: .rel.ARM.exidx.pr1.nontrivial
@ RELOC: Relocations [
-@ RELOC: 0x0 R_ARM_PREL31 .pr1.nontrivial 0x0
@ RELOC: 0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
+@ RELOC: 0x0 R_ARM_PREL31 .pr1.nontrivial 0x0
@ RELOC: 0x4 R_ARM_PREL31 .ARM.extab.pr1.nontrivial 0x0
@ RELOC: ]
@ RELOC: }
@@ -161,8 +161,8 @@ pr2:
@ RELOC: Section {
@ RELOC: Name: .rel.ARM.exidx.pr2
@ RELOC: Relocations [
-@ RELOC: 0x0 R_ARM_PREL31 .pr2 0x0
@ RELOC: 0x0 R_ARM_NONE __aeabi_unwind_cpp_pr2 0x0
+@ RELOC: 0x0 R_ARM_PREL31 .pr2 0x0
@ RELOC: 0x4 R_ARM_PREL31 .ARM.extab.pr2 0x0
@ RELOC: ]
@ RELOC: }
@@ -196,8 +196,8 @@ pr2_nontrivial:
@ RELOC: Section {
@ RELOC: Name: .rel.ARM.exidx.pr2.nontrivial
@ RELOC: Relocations [
-@ RELOC: 0x0 R_ARM_PREL31 .pr2.nontrivial 0x0
@ RELOC: 0x0 R_ARM_NONE __aeabi_unwind_cpp_pr2 0x0
+@ RELOC: 0x0 R_ARM_PREL31 .pr2.nontrivial 0x0
@ RELOC: 0x4 R_ARM_PREL31 .ARM.extab.pr2.nontrivial 0x0
@ RELOC: ]
@ RELOC: }
diff --git a/test/MC/ARM/eh-directive-section-comdat.s b/test/MC/ARM/eh-directive-section-comdat.s
index 9c7160ea5e74..8b7f32eaece9 100644
--- a/test/MC/ARM/eh-directive-section-comdat.s
+++ b/test/MC/ARM/eh-directive-section-comdat.s
@@ -53,8 +53,8 @@ func1:
@ These are the section indexes of .TEST1, .ARM.extab.TEST1, .ARM.exidx.TEST1,
@ .rel.ARM.extab.TEST1, and .rel.ARM.exidx.TEST1.
@-------------------------------------------------------------------------------
-@ CHECK-NEXT: 0000: 01000000 06000000 07000000 08000000
-@ CHECK-NEXT: 0010: 09000000 0A000000
+@ CHECK-NEXT: 0000: 01000000 04000000 05000000 06000000
+@ CHECK-NEXT: 0010: 07000000 08000000
@ CHECK-NEXT: )
@ CHECK: }
@@ -63,7 +63,7 @@ func1:
@ Check the .TEST1 section
@-------------------------------------------------------------------------------
@ CHECK: Section {
-@ CHECK: Index: 6
+@ CHECK: Index: 4
@ CHECK-NEXT: Name: .TEST1
@ CHECK: Type: SHT_PROGBITS (0x1)
@-------------------------------------------------------------------------------
@@ -81,7 +81,7 @@ func1:
@ Check the .ARM.extab.TEST1 section
@-------------------------------------------------------------------------------
@ CHECK: Section {
-@ CHECK: Index: 7
+@ CHECK: Index: 5
@ CHECK-NEXT: Name: .ARM.extab.TEST1
@ CHECK: Type: SHT_PROGBITS (0x1)
@-------------------------------------------------------------------------------
@@ -94,7 +94,7 @@ func1:
@ CHECK: }
@ CHECK: Section {
-@ CHECK: Index: 8
+@ CHECK: Index: 6
@ CHECK-NEXT: Name: .rel.ARM.extab.TEST1
@ CHECK: }
@@ -102,7 +102,7 @@ func1:
@ Check the .ARM.exidx.TEST1 section
@-------------------------------------------------------------------------------
@ CHECK: Section {
-@ CHECK: Index: 9
+@ CHECK: Index: 7
@ CHECK-NEXT: Name: .ARM.exidx.TEST1
@ CHECK: Type: SHT_ARM_EXIDX (0x70000001)
@-------------------------------------------------------------------------------
@@ -113,12 +113,12 @@ func1:
@ CHECK: SHF_GROUP (0x200)
@ CHECK: SHF_LINK_ORDER (0x80)
@ CHECK: ]
-@ CHECK: Link: 6
+@ CHECK: Link: 4
@ CHECK: }
@ CHECK: Section {
-@ CHECK: Index: 10
+@ CHECK: Index: 8
@ CHECK-NEXT: Name: .rel.ARM.exidx.TEST1
@ CHECK: }
diff --git a/test/MC/ARM/eh-directive-section-multiple-func.s b/test/MC/ARM/eh-directive-section-multiple-func.s
index e5307cf9190c..53c498ab87be 100644
--- a/test/MC/ARM/eh-directive-section-multiple-func.s
+++ b/test/MC/ARM/eh-directive-section-multiple-func.s
@@ -54,7 +54,7 @@ func2:
@ Check the .TEST1 section. There should be two "bx lr" instructions.
@-------------------------------------------------------------------------------
@ CHECK: Section {
-@ CHECK: Index: 5
+@ CHECK: Index: 3
@ CHECK-NEXT: Name: .TEST1
@ CHECK: SectionData (
@ CHECK: 0000: 1EFF2FE1 1EFF2FE1 |../.../.|
@@ -87,7 +87,7 @@ func2:
@-------------------------------------------------------------------------------
@ CHECK: Section {
@ CHECK: Name: .ARM.exidx.TEST1
-@ CHECK: Link: 5
+@ CHECK: Link: 3
@-------------------------------------------------------------------------------
@ The first word should be the offset to .TEST1.
@ The second word should be the offset to .ARM.extab.TEST1
diff --git a/test/MC/ARM/eh-directive-section.s b/test/MC/ARM/eh-directive-section.s
index e36d9a99aaf5..bd41fbe5eeea 100644
--- a/test/MC/ARM/eh-directive-section.s
+++ b/test/MC/ARM/eh-directive-section.s
@@ -50,7 +50,7 @@ func2:
@-------------------------------------------------------------------------------
@ CHECK: Sections [
@ CHECK: Section {
-@ CHECK: Index: 5
+@ CHECK: Index: 3
@ CHECK-NEXT: Name: .TEST1
@ CHECK: SectionData (
@ CHECK: 0000: 1EFF2FE1 |../.|
@@ -84,7 +84,7 @@ func2:
@-------------------------------------------------------------------------------
@ This section should linked with .TEST1 section.
@-------------------------------------------------------------------------------
-@ CHECK: Link: 5
+@ CHECK: Link: 3
@-------------------------------------------------------------------------------
@ The first word should be relocated to the code address in .TEST1 section.
@@ -109,7 +109,7 @@ func2:
@ Check the TEST2 section (without the dot in the beginning)
@-------------------------------------------------------------------------------
@ CHECK: Section {
-@ CHECK: Index: 10
+@ CHECK: Index: 8
@ CHECK-NEXT: Name: TEST2
@ CHECK: SectionData (
@ CHECK: 0000: 1EFF2FE1 |../.|
@@ -143,7 +143,7 @@ func2:
@-------------------------------------------------------------------------------
@ This section should linked with TEST2 section.
@-------------------------------------------------------------------------------
-@ CHECK: Link: 10
+@ CHECK: Link: 8
@-------------------------------------------------------------------------------
@ The first word should be relocated to the code address in TEST2 section.
diff --git a/test/MC/ARM/eh-directive-text-section.s b/test/MC/ARM/eh-directive-text-section.s
index 32696d5a1dad..10ccdd54e501 100644
--- a/test/MC/ARM/eh-directive-text-section.s
+++ b/test/MC/ARM/eh-directive-text-section.s
@@ -77,6 +77,6 @@ func1:
@ add an relocation to __aeabi_unwind_cpp_pr0.
@-------------------------------------------------------------------------------
@ CHECK: Relocations [
-@ CHECK: 0x0 R_ARM_PREL31 .text 0x0
@ CHECK: 0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ CHECK: 0x0 R_ARM_PREL31 .text 0x0
@ CHECK: ]
diff --git a/test/MC/ARM/eh-link.s b/test/MC/ARM/eh-link.s
index e14fb0638b91..19fe14db5f30 100644
--- a/test/MC/ARM/eh-link.s
+++ b/test/MC/ARM/eh-link.s
@@ -8,7 +8,7 @@
@ name first we could use a FileCheck variable.
@ CHECK: Section {
-@ CHECK: Index: 6
+@ CHECK: Index: 4
@ CHECK-NEXT: Name: .text
@ CHECK-NEXT: Type: SHT_PROGBITS
@ CHECK-NEXT: Flags [
@@ -25,7 +25,7 @@
@ CHECK-NEXT: EntrySize: 0
@ CHECK-NEXT: }
@ CHECK-NEXT: Section {
-@ CHECK-NEXT: Index: 7
+@ CHECK-NEXT: Index: 5
@ CHECK-NEXT: Name: .ARM.exidx
@ CHECK-NEXT: Type: SHT_ARM_EXIDX
@ CHECK-NEXT: Flags [
@@ -36,14 +36,14 @@
@ CHECK-NEXT: Address: 0x0
@ CHECK-NEXT: Offset:
@ CHECK-NEXT: Size: 8
-@ CHECK-NEXT: Link: 6
+@ CHECK-NEXT: Link: 4
@ CHECK-NEXT: Info: 0
@ CHECK-NEXT: AddressAlignment: 4
@ CHECK-NEXT: EntrySize: 0
@ CHECK-NEXT: }
@ CHECK: Section {
-@ CHECK: Index: 10
+@ CHECK: Index: 8
@ CHECK-NEXT: Name: .text
@ CHECK-NEXT: Type: SHT_PROGBITS
@ CHECK-NEXT: Flags [
@@ -60,7 +60,7 @@
@ CHECK-NEXT: EntrySize: 0
@ CHECK-NEXT: }
@ CHECK-NEXT: Section {
-@ CHECK-NEXT: Index: 11
+@ CHECK-NEXT: Index: 9
@ CHECK-NEXT: Name: .ARM.exidx
@ CHECK-NEXT: Type: SHT_ARM_EXIDX
@ CHECK-NEXT: Flags [
@@ -71,7 +71,7 @@
@ CHECK-NEXT: Address: 0x0
@ CHECK-NEXT: Offset:
@ CHECK-NEXT: Size: 8
-@ CHECK-NEXT: Link: 10
+@ CHECK-NEXT: Link: 8
@ CHECK-NEXT: Info: 0
@ CHECK-NEXT: AddressAlignment: 4
@ CHECK-NEXT: EntrySize: 0
diff --git a/test/MC/ARM/error-location-ldr-pseudo.s b/test/MC/ARM/error-location-ldr-pseudo.s
new file mode 100644
index 000000000000..b5cdcad72597
--- /dev/null
+++ b/test/MC/ARM/error-location-ldr-pseudo.s
@@ -0,0 +1,5 @@
+@ RUN: not llvm-mc -triple armv7a--none-eabi -filetype obj < %s -o /dev/null 2>&1 | FileCheck %s
+
+ .text
+@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: expected relocatable expression
+ ldr r0, =(-undef)
diff --git a/test/MC/ARM/error-location.s b/test/MC/ARM/error-location.s
new file mode 100644
index 000000000000..112acf318ed3
--- /dev/null
+++ b/test/MC/ARM/error-location.s
@@ -0,0 +1,49 @@
+@ RUN: not llvm-mc -triple armv7a--none-eabi -filetype obj < %s -o /dev/null 2>&1 | FileCheck %s
+
+@ Note: These errors are not always emitted in the order in which the relevant
+@ source appears, this file is carefully ordered so that that is the case.
+
+ .text
+@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: symbol 'undef' can not be undefined in a subtraction expression
+ .word (0-undef)
+
+@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: expected relocatable expression
+ .word -undef
+
+@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: No relocation available to represent this relative expression
+ adr r0, #a-undef
+
+@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a difference across sections
+ .word x_a - y_a
+
+@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a subtraction with a weak symbol
+ .word a - w
+
+@ CHECK: <unknown>:0: error: expression could not be evaluated
+ .set v1, -undef
+
+ .comm common, 4
+@ CHECK: <unknown>:0: error: Common symbol 'common' cannot be used in assignment expr
+ .set v3, common
+
+@ CHECK: <unknown>:0: error: Undefined temporary symbol
+ .word 5f
+
+@ CHECK: <unknown>:0: error: symbol 'undef' could not be evaluated in a subtraction expression
+ .set v2, a-undef
+
+
+
+w:
+ .word 0
+ .weak w
+
+
+ .section sec_x
+x_a:
+ .word 0
+
+
+ .section sec_y
+y_a:
+ .word 0
diff --git a/test/MC/ARM/fullfp16-neon-neg.s b/test/MC/ARM/fullfp16-neon-neg.s
new file mode 100644
index 000000000000..1928163db74b
--- /dev/null
+++ b/test/MC/ARM/fullfp16-neon-neg.s
@@ -0,0 +1,289 @@
+@ RUN: not llvm-mc -triple armv8a-none-eabi -mattr=-fullfp16,+neon -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple armv8a-none-eabi -mattr=+fullfp16,-neon -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple thumbv8a-none-eabi -mattr=-fullfp16,+neon -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple thumbv8a-none-eabi -mattr=+fullfp16,-neon -show-encoding < %s 2>&1 | FileCheck %s
+
+ vadd.f16 d0, d1, d2
+ vadd.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vsub.f16 d0, d1, d2
+ vsub.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vmul.f16 d0, d1, d2
+ vmul.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vmul.f16 d1, d2, d3[2]
+ vmul.f16 q4, q5, d6[3]
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vmla.f16 d0, d1, d2
+ vmla.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vmla.f16 d5, d6, d7[2]
+ vmla.f16 q5, q6, d7[3]
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vmls.f16 d0, d1, d2
+ vmls.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vmls.f16 d5, d6, d7[2]
+ vmls.f16 q5, q6, d7[3]
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vfma.f16 d0, d1, d2
+ vfma.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vfms.f16 d0, d1, d2
+ vfms.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vceq.f16 d2, d3, d4
+ vceq.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vceq.f16 d2, d3, #0
+ vceq.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vcge.f16 d2, d3, d4
+ vcge.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vcge.f16 d2, d3, #0
+ vcge.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vcgt.f16 d2, d3, d4
+ vcgt.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vcgt.f16 d2, d3, #0
+ vcgt.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vcle.f16 d2, d3, d4
+ vcle.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vcle.f16 d2, d3, #0
+ vcle.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vclt.f16 d2, d3, d4
+ vclt.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vclt.f16 d2, d3, #0
+ vclt.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vacge.f16 d0, d1, d2
+ vacge.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vacgt.f16 d0, d1, d2
+ vacgt.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vacle.f16 d0, d1, d2
+ vacle.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vaclt.f16 d0, d1, d2
+ vaclt.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vabd.f16 d0, d1, d2
+ vabd.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vabs.f16 d0, d1
+ vabs.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vmax.f16 d0, d1, d2
+ vmax.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vmin.f16 d0, d1, d2
+ vmin.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vmaxnm.f16 d0, d1, d2
+ vmaxnm.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vminnm.f16 d0, d1, d2
+ vminnm.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vpadd.f16 d0, d1, d2
+@ CHECK: error: instruction requires:
+
+ vpmax.f16 d0, d1, d2
+@ CHECK: error: instruction requires:
+
+ vpmin.f16 d0, d1, d2
+@ CHECK: error: instruction requires:
+
+ vrecpe.f16 d0, d1
+ vrecpe.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vrecps.f16 d0, d1, d2
+ vrecps.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vrsqrte.f16 d0, d1
+ vrsqrte.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vrsqrts.f16 d0, d1, d2
+ vrsqrts.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vneg.f16 d0, d1
+ vneg.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vcvt.s16.f16 d0, d1
+ vcvt.u16.f16 d0, d1
+ vcvt.f16.s16 d0, d1
+ vcvt.f16.u16 d0, d1
+ vcvt.s16.f16 q0, q1
+ vcvt.u16.f16 q0, q1
+ vcvt.f16.s16 q0, q1
+ vcvt.f16.u16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vcvta.s16.f16 d0, d1
+ vcvta.s16.f16 q0, q1
+ vcvta.u16.f16 d0, d1
+ vcvta.u16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vcvtm.s16.f16 d0, d1
+ vcvtm.s16.f16 q0, q1
+ vcvtm.u16.f16 d0, d1
+ vcvtm.u16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vcvtn.s16.f16 d0, d1
+ vcvtn.s16.f16 q0, q1
+ vcvtn.u16.f16 d0, d1
+ vcvtn.u16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vcvtp.s16.f16 d0, d1
+ vcvtp.s16.f16 q0, q1
+ vcvtp.u16.f16 d0, d1
+ vcvtp.u16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+
+ vcvt.s16.f16 d0, d1, #1
+ vcvt.u16.f16 d0, d1, #2
+ vcvt.f16.s16 d0, d1, #3
+ vcvt.f16.u16 d0, d1, #4
+ vcvt.s16.f16 q0, q1, #5
+ vcvt.u16.f16 q0, q1, #6
+ vcvt.f16.s16 q0, q1, #7
+ vcvt.f16.u16 q0, q1, #8
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vrinta.f16.f16 d0, d1
+ vrinta.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vrintm.f16.f16 d0, d1
+ vrintm.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vrintn.f16.f16 d0, d1
+ vrintn.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vrintp.f16.f16 d0, d1
+ vrintp.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vrintx.f16.f16 d0, d1
+ vrintx.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+ vrintz.f16.f16 d0, d1
+ vrintz.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
diff --git a/test/MC/ARM/fullfp16-neon.s b/test/MC/ARM/fullfp16-neon.s
new file mode 100644
index 000000000000..32a33720c13b
--- /dev/null
+++ b/test/MC/ARM/fullfp16-neon.s
@@ -0,0 +1,404 @@
+@ RUN: llvm-mc -triple armv8a-none-eabi -mattr=+fullfp16,+neon -show-encoding < %s | FileCheck %s --check-prefix=ARM
+@ RUN: llvm-mc -triple thumbv8a-none-eabi -mattr=+fullfp16,+neon -show-encoding < %s | FileCheck %s --check-prefix=THUMB
+
+ vadd.f16 d0, d1, d2
+ vadd.f16 q0, q1, q2
+@ ARM: vadd.f16 d0, d1, d2 @ encoding: [0x02,0x0d,0x11,0xf2]
+@ ARM: vadd.f16 q0, q1, q2 @ encoding: [0x44,0x0d,0x12,0xf2]
+@ THUMB: vadd.f16 d0, d1, d2 @ encoding: [0x11,0xef,0x02,0x0d]
+@ THUMB: vadd.f16 q0, q1, q2 @ encoding: [0x12,0xef,0x44,0x0d]
+
+ vsub.f16 d0, d1, d2
+ vsub.f16 q0, q1, q2
+@ ARM: vsub.f16 d0, d1, d2 @ encoding: [0x02,0x0d,0x31,0xf2]
+@ ARM: vsub.f16 q0, q1, q2 @ encoding: [0x44,0x0d,0x32,0xf2]
+@ THUMB: vsub.f16 d0, d1, d2 @ encoding: [0x31,0xef,0x02,0x0d]
+@ THUMB: vsub.f16 q0, q1, q2 @ encoding: [0x32,0xef,0x44,0x0d]
+
+ vmul.f16 d0, d1, d2
+ vmul.f16 q0, q1, q2
+@ ARM: vmul.f16 d0, d1, d2 @ encoding: [0x12,0x0d,0x11,0xf3]
+@ ARM: vmul.f16 q0, q1, q2 @ encoding: [0x54,0x0d,0x12,0xf3]
+@ THUMB: vmul.f16 d0, d1, d2 @ encoding: [0x11,0xff,0x12,0x0d]
+@ THUMB: vmul.f16 q0, q1, q2 @ encoding: [0x12,0xff,0x54,0x0d]
+
+ vmul.f16 d1, d2, d3[2]
+ vmul.f16 q4, q5, d6[3]
+@ ARM: vmul.f16 d1, d2, d3[2] @ encoding: [0x63,0x19,0x92,0xf2]
+@ ARM: vmul.f16 q4, q5, d6[3] @ encoding: [0x6e,0x89,0x9a,0xf3]
+@ THUMB: vmul.f16 d1, d2, d3[2] @ encoding: [0x92,0xef,0x63,0x19]
+@ THUMB: vmul.f16 q4, q5, d6[3] @ encoding: [0x9a,0xff,0x6e,0x89]
+
+ vmla.f16 d0, d1, d2
+ vmla.f16 q0, q1, q2
+@ ARM: vmla.f16 d0, d1, d2 @ encoding: [0x12,0x0d,0x11,0xf2]
+@ ARM: vmla.f16 q0, q1, q2 @ encoding: [0x54,0x0d,0x12,0xf2]
+@ THUMB: vmla.f16 d0, d1, d2 @ encoding: [0x11,0xef,0x12,0x0d]
+@ THUMB: vmla.f16 q0, q1, q2 @ encoding: [0x12,0xef,0x54,0x0d]
+
+ vmla.f16 d5, d6, d7[2]
+ vmla.f16 q5, q6, d7[3]
+@ ARM: vmla.f16 d5, d6, d7[2] @ encoding: [0x67,0x51,0x96,0xf2]
+@ ARM: vmla.f16 q5, q6, d7[3] @ encoding: [0x6f,0xa1,0x9c,0xf3]
+@ THUMB: vmla.f16 d5, d6, d7[2] @ encoding: [0x96,0xef,0x67,0x51]
+@ THUMB: vmla.f16 q5, q6, d7[3] @ encoding: [0x9c,0xff,0x6f,0xa1]
+
+ vmls.f16 d0, d1, d2
+ vmls.f16 q0, q1, q2
+@ ARM: vmls.f16 d0, d1, d2 @ encoding: [0x12,0x0d,0x31,0xf2]
+@ ARM: vmls.f16 q0, q1, q2 @ encoding: [0x54,0x0d,0x32,0xf2]
+@ THUMB: vmls.f16 d0, d1, d2 @ encoding: [0x31,0xef,0x12,0x0d]
+@ THUMB: vmls.f16 q0, q1, q2 @ encoding: [0x32,0xef,0x54,0x0d]
+
+ vmls.f16 d5, d6, d7[2]
+ vmls.f16 q5, q6, d7[3]
+@ ARM: vmls.f16 d5, d6, d7[2] @ encoding: [0x67,0x55,0x96,0xf2]
+@ ARM: vmls.f16 q5, q6, d7[3] @ encoding: [0x6f,0xa5,0x9c,0xf3]
+@ THUMB: vmls.f16 d5, d6, d7[2] @ encoding: [0x96,0xef,0x67,0x55]
+@ THUMB: vmls.f16 q5, q6, d7[3] @ encoding: [0x9c,0xff,0x6f,0xa5]
+
+ vfma.f16 d0, d1, d2
+ vfma.f16 q0, q1, q2
+@ ARM: vfma.f16 d0, d1, d2 @ encoding: [0x12,0x0c,0x11,0xf2]
+@ ARM: vfma.f16 q0, q1, q2 @ encoding: [0x54,0x0c,0x12,0xf2]
+@ THUMB: vfma.f16 d0, d1, d2 @ encoding: [0x11,0xef,0x12,0x0c]
+@ THUMB: vfma.f16 q0, q1, q2 @ encoding: [0x12,0xef,0x54,0x0c]
+
+ vfms.f16 d0, d1, d2
+ vfms.f16 q0, q1, q2
+@ ARM: vfms.f16 d0, d1, d2 @ encoding: [0x12,0x0c,0x31,0xf2]
+@ ARM: vfms.f16 q0, q1, q2 @ encoding: [0x54,0x0c,0x32,0xf2]
+@ THUMB: vfms.f16 d0, d1, d2 @ encoding: [0x31,0xef,0x12,0x0c]
+@ THUMB: vfms.f16 q0, q1, q2 @ encoding: [0x32,0xef,0x54,0x0c]
+
+ vceq.f16 d2, d3, d4
+ vceq.f16 q2, q3, q4
+@ ARM: vceq.f16 d2, d3, d4 @ encoding: [0x04,0x2e,0x13,0xf2]
+@ ARM: vceq.f16 q2, q3, q4 @ encoding: [0x48,0x4e,0x16,0xf2]
+@ THUMB: vceq.f16 d2, d3, d4 @ encoding: [0x13,0xef,0x04,0x2e]
+@ THUMB: vceq.f16 q2, q3, q4 @ encoding: [0x16,0xef,0x48,0x4e]
+
+ vceq.f16 d2, d3, #0
+ vceq.f16 q2, q3, #0
+@ ARM: vceq.f16 d2, d3, #0 @ encoding: [0x03,0x25,0xb5,0xf3]
+@ ARM: vceq.f16 q2, q3, #0 @ encoding: [0x46,0x45,0xb5,0xf3]
+@ THUMB: vceq.f16 d2, d3, #0 @ encoding: [0xb5,0xff,0x03,0x25]
+@ THUMB: vceq.f16 q2, q3, #0 @ encoding: [0xb5,0xff,0x46,0x45]
+
+ vcge.f16 d2, d3, d4
+ vcge.f16 q2, q3, q4
+@ ARM: vcge.f16 d2, d3, d4 @ encoding: [0x04,0x2e,0x13,0xf3]
+@ ARM: vcge.f16 q2, q3, q4 @ encoding: [0x48,0x4e,0x16,0xf3]
+@ THUMB: vcge.f16 d2, d3, d4 @ encoding: [0x13,0xff,0x04,0x2e]
+@ THUMB: vcge.f16 q2, q3, q4 @ encoding: [0x16,0xff,0x48,0x4e]
+
+ vcge.f16 d2, d3, #0
+ vcge.f16 q2, q3, #0
+@ ARM: vcge.f16 d2, d3, #0 @ encoding: [0x83,0x24,0xb5,0xf3]
+@ ARM: vcge.f16 q2, q3, #0 @ encoding: [0xc6,0x44,0xb5,0xf3]
+@ THUMB: vcge.f16 d2, d3, #0 @ encoding: [0xb5,0xff,0x83,0x24]
+@ THUMB: vcge.f16 q2, q3, #0 @ encoding: [0xb5,0xff,0xc6,0x44]
+
+ vcgt.f16 d2, d3, d4
+ vcgt.f16 q2, q3, q4
+@ ARM: vcgt.f16 d2, d3, d4 @ encoding: [0x04,0x2e,0x33,0xf3]
+@ ARM: vcgt.f16 q2, q3, q4 @ encoding: [0x48,0x4e,0x36,0xf3]
+@ THUMB: vcgt.f16 d2, d3, d4 @ encoding: [0x33,0xff,0x04,0x2e]
+@ THUMB: vcgt.f16 q2, q3, q4 @ encoding: [0x36,0xff,0x48,0x4e]
+
+ vcgt.f16 d2, d3, #0
+ vcgt.f16 q2, q3, #0
+@ ARM: vcgt.f16 d2, d3, #0 @ encoding: [0x03,0x24,0xb5,0xf3]
+@ ARM: vcgt.f16 q2, q3, #0 @ encoding: [0x46,0x44,0xb5,0xf3]
+@ THUMB: vcgt.f16 d2, d3, #0 @ encoding: [0xb5,0xff,0x03,0x24]
+@ THUMB: vcgt.f16 q2, q3, #0 @ encoding: [0xb5,0xff,0x46,0x44]
+
+ vcle.f16 d2, d3, d4
+ vcle.f16 q2, q3, q4
+@ ARM: vcge.f16 d2, d4, d3 @ encoding: [0x03,0x2e,0x14,0xf3]
+@ ARM: vcge.f16 q2, q4, q3 @ encoding: [0x46,0x4e,0x18,0xf3]
+@ THUMB: vcge.f16 d2, d4, d3 @ encoding: [0x14,0xff,0x03,0x2e]
+@ THUMB: vcge.f16 q2, q4, q3 @ encoding: [0x18,0xff,0x46,0x4e]
+
+ vcle.f16 d2, d3, #0
+ vcle.f16 q2, q3, #0
+@ ARM: vcle.f16 d2, d3, #0 @ encoding: [0x83,0x25,0xb5,0xf3]
+@ ARM: vcle.f16 q2, q3, #0 @ encoding: [0xc6,0x45,0xb5,0xf3]
+@ THUMB: vcle.f16 d2, d3, #0 @ encoding: [0xb5,0xff,0x83,0x25]
+@ THUMB: vcle.f16 q2, q3, #0 @ encoding: [0xb5,0xff,0xc6,0x45]
+
+ vclt.f16 d2, d3, d4
+ vclt.f16 q2, q3, q4
+@ ARM: vcgt.f16 d2, d4, d3 @ encoding: [0x03,0x2e,0x34,0xf3]
+@ ARM: vcgt.f16 q2, q4, q3 @ encoding: [0x46,0x4e,0x38,0xf3]
+@ THUMB: vcgt.f16 d2, d4, d3 @ encoding: [0x34,0xff,0x03,0x2e]
+@ THUMB: vcgt.f16 q2, q4, q3 @ encoding: [0x38,0xff,0x46,0x4e]
+
+ vclt.f16 d2, d3, #0
+ vclt.f16 q2, q3, #0
+@ ARM: vclt.f16 d2, d3, #0 @ encoding: [0x03,0x26,0xb5,0xf3]
+@ ARM: vclt.f16 q2, q3, #0 @ encoding: [0x46,0x46,0xb5,0xf3]
+@ THUMB: vclt.f16 d2, d3, #0 @ encoding: [0xb5,0xff,0x03,0x26]
+@ THUMB: vclt.f16 q2, q3, #0 @ encoding: [0xb5,0xff,0x46,0x46]
+
+ vacge.f16 d0, d1, d2
+ vacge.f16 q0, q1, q2
+@ ARM: vacge.f16 d0, d1, d2 @ encoding: [0x12,0x0e,0x11,0xf3]
+@ ARM: vacge.f16 q0, q1, q2 @ encoding: [0x54,0x0e,0x12,0xf3]
+@ THUMB: vacge.f16 d0, d1, d2 @ encoding: [0x11,0xff,0x12,0x0e]
+@ THUMB: vacge.f16 q0, q1, q2 @ encoding: [0x12,0xff,0x54,0x0e]
+
+ vacgt.f16 d0, d1, d2
+ vacgt.f16 q0, q1, q2
+@ ARM: vacgt.f16 d0, d1, d2 @ encoding: [0x12,0x0e,0x31,0xf3]
+@ ARM: vacgt.f16 q0, q1, q2 @ encoding: [0x54,0x0e,0x32,0xf3]
+@ THUMB: vacgt.f16 d0, d1, d2 @ encoding: [0x31,0xff,0x12,0x0e]
+@ THUMB: vacgt.f16 q0, q1, q2 @ encoding: [0x32,0xff,0x54,0x0e]
+
+ vacle.f16 d0, d1, d2
+ vacle.f16 q0, q1, q2
+@ ARM: vacge.f16 d0, d2, d1 @ encoding: [0x11,0x0e,0x12,0xf3]
+@ ARM: vacge.f16 q0, q2, q1 @ encoding: [0x52,0x0e,0x14,0xf3]
+@ THUMB: vacge.f16 d0, d2, d1 @ encoding: [0x12,0xff,0x11,0x0e]
+@ THUMB: vacge.f16 q0, q2, q1 @ encoding: [0x14,0xff,0x52,0x0e]
+
+ vaclt.f16 d0, d1, d2
+ vaclt.f16 q0, q1, q2
+@ ARM: vacgt.f16 d0, d2, d1 @ encoding: [0x11,0x0e,0x32,0xf3]
+@ ARM: vacgt.f16 q0, q2, q1 @ encoding: [0x52,0x0e,0x34,0xf3]
+@ THUMB: vacgt.f16 d0, d2, d1 @ encoding: [0x32,0xff,0x11,0x0e]
+@ THUMB: vacgt.f16 q0, q2, q1 @ encoding: [0x34,0xff,0x52,0x0e]
+
+ vabd.f16 d0, d1, d2
+ vabd.f16 q0, q1, q2
+@ ARM: vabd.f16 d0, d1, d2 @ encoding: [0x02,0x0d,0x31,0xf3]
+@ ARM: vabd.f16 q0, q1, q2 @ encoding: [0x44,0x0d,0x32,0xf3]
+@ THUMB: vabd.f16 d0, d1, d2 @ encoding: [0x31,0xff,0x02,0x0d]
+@ THUMB: vabd.f16 q0, q1, q2 @ encoding: [0x32,0xff,0x44,0x0d]
+
+ vabs.f16 d0, d1
+ vabs.f16 q0, q1
+@ ARM: vabs.f16 d0, d1 @ encoding: [0x01,0x07,0xb5,0xf3]
+@ ARM: vabs.f16 q0, q1 @ encoding: [0x42,0x07,0xb5,0xf3]
+@ THUMB: vabs.f16 d0, d1 @ encoding: [0xb5,0xff,0x01,0x07]
+@ THUMB: vabs.f16 q0, q1 @ encoding: [0xb5,0xff,0x42,0x07]
+
+ vmax.f16 d0, d1, d2
+ vmax.f16 q0, q1, q2
+@ ARM: vmax.f16 d0, d1, d2 @ encoding: [0x02,0x0f,0x11,0xf2]
+@ ARM: vmax.f16 q0, q1, q2 @ encoding: [0x44,0x0f,0x12,0xf2]
+@ THUMB: vmax.f16 d0, d1, d2 @ encoding: [0x11,0xef,0x02,0x0f]
+@ THUMB: vmax.f16 q0, q1, q2 @ encoding: [0x12,0xef,0x44,0x0f]
+
+ vmin.f16 d0, d1, d2
+ vmin.f16 q0, q1, q2
+@ ARM: vmin.f16 d0, d1, d2 @ encoding: [0x02,0x0f,0x31,0xf2]
+@ ARM: vmin.f16 q0, q1, q2 @ encoding: [0x44,0x0f,0x32,0xf2]
+@ THUMB: vmin.f16 d0, d1, d2 @ encoding: [0x31,0xef,0x02,0x0f]
+@ THUMB: vmin.f16 q0, q1, q2 @ encoding: [0x32,0xef,0x44,0x0f]
+
+ vmaxnm.f16 d0, d1, d2
+ vmaxnm.f16 q0, q1, q2
+@ ARM: vmaxnm.f16 d0, d1, d2 @ encoding: [0x12,0x0f,0x11,0xf3]
+@ ARM: vmaxnm.f16 q0, q1, q2 @ encoding: [0x54,0x0f,0x12,0xf3]
+@ THUMB: vmaxnm.f16 d0, d1, d2 @ encoding: [0x11,0xff,0x12,0x0f]
+@ THUMB: vmaxnm.f16 q0, q1, q2 @ encoding: [0x12,0xff,0x54,0x0f]
+
+ vminnm.f16 d0, d1, d2
+ vminnm.f16 q0, q1, q2
+@ ARM: vminnm.f16 d0, d1, d2 @ encoding: [0x12,0x0f,0x31,0xf3]
+@ ARM: vminnm.f16 q0, q1, q2 @ encoding: [0x54,0x0f,0x32,0xf3]
+@ THUMB: vminnm.f16 d0, d1, d2 @ encoding: [0x31,0xff,0x12,0x0f]
+@ THUMB: vminnm.f16 q0, q1, q2 @ encoding: [0x32,0xff,0x54,0x0f]
+
+ vpadd.f16 d0, d1, d2
+@ ARM: vpadd.f16 d0, d1, d2 @ encoding: [0x02,0x0d,0x11,0xf3]
+@ THUMB: vpadd.f16 d0, d1, d2 @ encoding: [0x11,0xff,0x02,0x0d]
+
+ vpmax.f16 d0, d1, d2
+@ ARM: vpmax.f16 d0, d1, d2 @ encoding: [0x02,0x0f,0x11,0xf3]
+@ THUMB: vpmax.f16 d0, d1, d2 @ encoding: [0x11,0xff,0x02,0x0f]
+
+ vpmin.f16 d0, d1, d2
+@ ARM: vpmin.f16 d0, d1, d2 @ encoding: [0x02,0x0f,0x31,0xf3]
+@ THUMB: vpmin.f16 d0, d1, d2 @ encoding: [0x31,0xff,0x02,0x0f]
+
+ vrecpe.f16 d0, d1
+ vrecpe.f16 q0, q1
+@ ARM: vrecpe.f16 d0, d1 @ encoding: [0x01,0x05,0xb7,0xf3]
+@ ARM: vrecpe.f16 q0, q1 @ encoding: [0x42,0x05,0xb7,0xf3]
+@ THUMB: vrecpe.f16 d0, d1 @ encoding: [0xb7,0xff,0x01,0x05]
+@ THUMB: vrecpe.f16 q0, q1 @ encoding: [0xb7,0xff,0x42,0x05]
+
+ vrecps.f16 d0, d1, d2
+ vrecps.f16 q0, q1, q2
+@ ARM: vrecps.f16 d0, d1, d2 @ encoding: [0x12,0x0f,0x11,0xf2]
+@ ARM: vrecps.f16 q0, q1, q2 @ encoding: [0x54,0x0f,0x12,0xf2]
+@ THUMB: vrecps.f16 d0, d1, d2 @ encoding: [0x11,0xef,0x12,0x0f]
+@ THUMB: vrecps.f16 q0, q1, q2 @ encoding: [0x12,0xef,0x54,0x0f]
+
+ vrsqrte.f16 d0, d1
+ vrsqrte.f16 q0, q1
+@ ARM: vrsqrte.f16 d0, d1 @ encoding: [0x81,0x05,0xb7,0xf3]
+@ ARM: vrsqrte.f16 q0, q1 @ encoding: [0xc2,0x05,0xb7,0xf3]
+@ THUMB: vrsqrte.f16 d0, d1 @ encoding: [0xb7,0xff,0x81,0x05]
+@ THUMB: vrsqrte.f16 q0, q1 @ encoding: [0xb7,0xff,0xc2,0x05]
+
+ vrsqrts.f16 d0, d1, d2
+ vrsqrts.f16 q0, q1, q2
+@ ARM: vrsqrts.f16 d0, d1, d2 @ encoding: [0x12,0x0f,0x31,0xf2]
+@ ARM: vrsqrts.f16 q0, q1, q2 @ encoding: [0x54,0x0f,0x32,0xf2]
+@ THUMB: vrsqrts.f16 d0, d1, d2 @ encoding: [0x31,0xef,0x12,0x0f]
+@ THUMB: vrsqrts.f16 q0, q1, q2 @ encoding: [0x32,0xef,0x54,0x0f]
+
+ vneg.f16 d0, d1
+ vneg.f16 q0, q1
+@ ARM: vneg.f16 d0, d1 @ encoding: [0x81,0x07,0xb5,0xf3]
+@ ARM: vneg.f16 q0, q1 @ encoding: [0xc2,0x07,0xb5,0xf3]
+@ THUMB: vneg.f16 d0, d1 @ encoding: [0xb5,0xff,0x81,0x07]
+@ THUMB: vneg.f16 q0, q1 @ encoding: [0xb5,0xff,0xc2,0x07]
+
+ vcvt.s16.f16 d0, d1
+ vcvt.u16.f16 d0, d1
+ vcvt.f16.s16 d0, d1
+ vcvt.f16.u16 d0, d1
+ vcvt.s16.f16 q0, q1
+ vcvt.u16.f16 q0, q1
+ vcvt.f16.s16 q0, q1
+ vcvt.f16.u16 q0, q1
+@ ARM: vcvt.s16.f16 d0, d1 @ encoding: [0x01,0x07,0xb7,0xf3]
+@ ARM: vcvt.u16.f16 d0, d1 @ encoding: [0x81,0x07,0xb7,0xf3]
+@ ARM: vcvt.f16.s16 d0, d1 @ encoding: [0x01,0x06,0xb7,0xf3]
+@ ARM: vcvt.f16.u16 d0, d1 @ encoding: [0x81,0x06,0xb7,0xf3]
+@ ARM: vcvt.s16.f16 q0, q1 @ encoding: [0x42,0x07,0xb7,0xf3]
+@ ARM: vcvt.u16.f16 q0, q1 @ encoding: [0xc2,0x07,0xb7,0xf3]
+@ ARM: vcvt.f16.s16 q0, q1 @ encoding: [0x42,0x06,0xb7,0xf3]
+@ ARM: vcvt.f16.u16 q0, q1 @ encoding: [0xc2,0x06,0xb7,0xf3]
+@ THUMB: vcvt.s16.f16 d0, d1 @ encoding: [0xb7,0xff,0x01,0x07]
+@ THUMB: vcvt.u16.f16 d0, d1 @ encoding: [0xb7,0xff,0x81,0x07]
+@ THUMB: vcvt.f16.s16 d0, d1 @ encoding: [0xb7,0xff,0x01,0x06]
+@ THUMB: vcvt.f16.u16 d0, d1 @ encoding: [0xb7,0xff,0x81,0x06]
+@ THUMB: vcvt.s16.f16 q0, q1 @ encoding: [0xb7,0xff,0x42,0x07]
+@ THUMB: vcvt.u16.f16 q0, q1 @ encoding: [0xb7,0xff,0xc2,0x07]
+@ THUMB: vcvt.f16.s16 q0, q1 @ encoding: [0xb7,0xff,0x42,0x06]
+@ THUMB: vcvt.f16.u16 q0, q1 @ encoding: [0xb7,0xff,0xc2,0x06]
+
+ vcvta.s16.f16 d0, d1
+ vcvta.s16.f16 q0, q1
+ vcvta.u16.f16 d0, d1
+ vcvta.u16.f16 q0, q1
+@ ARM: vcvta.s16.f16 d0, d1 @ encoding: [0x01,0x00,0xb7,0xf3]
+@ ARM: vcvta.s16.f16 q0, q1 @ encoding: [0x42,0x00,0xb7,0xf3]
+@ ARM: vcvta.u16.f16 d0, d1 @ encoding: [0x81,0x00,0xb7,0xf3]
+@ ARM: vcvta.u16.f16 q0, q1 @ encoding: [0xc2,0x00,0xb7,0xf3]
+@ THUMB: vcvta.s16.f16 d0, d1 @ encoding: [0xb7,0xff,0x01,0x00]
+@ THUMB: vcvta.s16.f16 q0, q1 @ encoding: [0xb7,0xff,0x42,0x00]
+@ THUMB: vcvta.u16.f16 d0, d1 @ encoding: [0xb7,0xff,0x81,0x00]
+@ THUMB: vcvta.u16.f16 q0, q1 @ encoding: [0xb7,0xff,0xc2,0x00]
+
+ vcvtm.s16.f16 d0, d1
+ vcvtm.s16.f16 q0, q1
+ vcvtm.u16.f16 d0, d1
+ vcvtm.u16.f16 q0, q1
+@ ARM: vcvtm.s16.f16 d0, d1 @ encoding: [0x01,0x03,0xb7,0xf3]
+@ ARM: vcvtm.s16.f16 q0, q1 @ encoding: [0x42,0x03,0xb7,0xf3]
+@ ARM: vcvtm.u16.f16 d0, d1 @ encoding: [0x81,0x03,0xb7,0xf3]
+@ ARM: vcvtm.u16.f16 q0, q1 @ encoding: [0xc2,0x03,0xb7,0xf3]
+@ THUMB: vcvtm.s16.f16 d0, d1 @ encoding: [0xb7,0xff,0x01,0x03]
+@ THUMB: vcvtm.s16.f16 q0, q1 @ encoding: [0xb7,0xff,0x42,0x03]
+@ THUMB: vcvtm.u16.f16 d0, d1 @ encoding: [0xb7,0xff,0x81,0x03]
+@ THUMB: vcvtm.u16.f16 q0, q1 @ encoding: [0xb7,0xff,0xc2,0x03]
+
+ vcvtn.s16.f16 d0, d1
+ vcvtn.s16.f16 q0, q1
+ vcvtn.u16.f16 d0, d1
+ vcvtn.u16.f16 q0, q1
+@ ARM: vcvtn.s16.f16 d0, d1 @ encoding: [0x01,0x01,0xb7,0xf3]
+@ ARM: vcvtn.s16.f16 q0, q1 @ encoding: [0x42,0x01,0xb7,0xf3]
+@ ARM: vcvtn.u16.f16 d0, d1 @ encoding: [0x81,0x01,0xb7,0xf3]
+@ ARM: vcvtn.u16.f16 q0, q1 @ encoding: [0xc2,0x01,0xb7,0xf3]
+@ THUMB: vcvtn.s16.f16 d0, d1 @ encoding: [0xb7,0xff,0x01,0x01]
+@ THUMB: vcvtn.s16.f16 q0, q1 @ encoding: [0xb7,0xff,0x42,0x01]
+@ THUMB: vcvtn.u16.f16 d0, d1 @ encoding: [0xb7,0xff,0x81,0x01]
+@ THUMB: vcvtn.u16.f16 q0, q1 @ encoding: [0xb7,0xff,0xc2,0x01]
+
+ vcvtp.s16.f16 d0, d1
+ vcvtp.s16.f16 q0, q1
+ vcvtp.u16.f16 d0, d1
+ vcvtp.u16.f16 q0, q1
+@ ARM: vcvtp.s16.f16 d0, d1 @ encoding: [0x01,0x02,0xb7,0xf3]
+@ ARM: vcvtp.s16.f16 q0, q1 @ encoding: [0x42,0x02,0xb7,0xf3]
+@ ARM: vcvtp.u16.f16 d0, d1 @ encoding: [0x81,0x02,0xb7,0xf3]
+@ ARM: vcvtp.u16.f16 q0, q1 @ encoding: [0xc2,0x02,0xb7,0xf3]
+@ THUMB: vcvtp.s16.f16 d0, d1 @ encoding: [0xb7,0xff,0x01,0x02]
+@ THUMB: vcvtp.s16.f16 q0, q1 @ encoding: [0xb7,0xff,0x42,0x02]
+@ THUMB: vcvtp.u16.f16 d0, d1 @ encoding: [0xb7,0xff,0x81,0x02]
+@ THUMB: vcvtp.u16.f16 q0, q1 @ encoding: [0xb7,0xff,0xc2,0x02]
+
+
+ vcvt.s16.f16 d0, d1, #1
+ vcvt.u16.f16 d0, d1, #2
+ vcvt.f16.s16 d0, d1, #3
+ vcvt.f16.u16 d0, d1, #4
+ vcvt.s16.f16 q0, q1, #5
+ vcvt.u16.f16 q0, q1, #6
+ vcvt.f16.s16 q0, q1, #7
+ vcvt.f16.u16 q0, q1, #8
+@ ARM: vcvt.s16.f16 d0, d1, #1 @ encoding: [0x11,0x0d,0xbf,0xf2]
+@ ARM: vcvt.u16.f16 d0, d1, #2 @ encoding: [0x11,0x0d,0xbe,0xf3]
+@ ARM: vcvt.f16.s16 d0, d1, #3 @ encoding: [0x11,0x0c,0xbd,0xf2]
+@ ARM: vcvt.f16.u16 d0, d1, #4 @ encoding: [0x11,0x0c,0xbc,0xf3]
+@ ARM: vcvt.s16.f16 q0, q1, #5 @ encoding: [0x52,0x0d,0xbb,0xf2]
+@ ARM: vcvt.u16.f16 q0, q1, #6 @ encoding: [0x52,0x0d,0xba,0xf3]
+@ ARM: vcvt.f16.s16 q0, q1, #7 @ encoding: [0x52,0x0c,0xb9,0xf2]
+@ ARM: vcvt.f16.u16 q0, q1, #8 @ encoding: [0x52,0x0c,0xb8,0xf3]
+@ THUMB: vcvt.s16.f16 d0, d1, #1 @ encoding: [0xbf,0xef,0x11,0x0d]
+@ THUMB: vcvt.u16.f16 d0, d1, #2 @ encoding: [0xbe,0xff,0x11,0x0d]
+@ THUMB: vcvt.f16.s16 d0, d1, #3 @ encoding: [0xbd,0xef,0x11,0x0c]
+@ THUMB: vcvt.f16.u16 d0, d1, #4 @ encoding: [0xbc,0xff,0x11,0x0c]
+@ THUMB: vcvt.s16.f16 q0, q1, #5 @ encoding: [0xbb,0xef,0x52,0x0d]
+@ THUMB: vcvt.u16.f16 q0, q1, #6 @ encoding: [0xba,0xff,0x52,0x0d]
+@ THUMB: vcvt.f16.s16 q0, q1, #7 @ encoding: [0xb9,0xef,0x52,0x0c]
+@ THUMB: vcvt.f16.u16 q0, q1, #8 @ encoding: [0xb8,0xff,0x52,0x0c]
+
+ vrinta.f16.f16 d0, d1
+ vrinta.f16.f16 q0, q1
+@ ARM: vrinta.f16 d0, d1 @ encoding: [0x01,0x05,0xb6,0xf3]
+@ ARM: vrinta.f16 q0, q1 @ encoding: [0x42,0x05,0xb6,0xf3]
+@ THUMB: vrinta.f16 d0, d1 @ encoding: [0xb6,0xff,0x01,0x05]
+@ THUMB: vrinta.f16 q0, q1 @ encoding: [0xb6,0xff,0x42,0x05]
+
+ vrintm.f16.f16 d0, d1
+ vrintm.f16.f16 q0, q1
+@ ARM: vrintm.f16 d0, d1 @ encoding: [0x81,0x06,0xb6,0xf3]
+@ ARM: vrintm.f16 q0, q1 @ encoding: [0xc2,0x06,0xb6,0xf3]
+@ THUMB: vrintm.f16 d0, d1 @ encoding: [0xb6,0xff,0x81,0x06]
+@ THUMB: vrintm.f16 q0, q1 @ encoding: [0xb6,0xff,0xc2,0x06]
+
+ vrintn.f16.f16 d0, d1
+ vrintn.f16.f16 q0, q1
+@ ARM: vrintn.f16 d0, d1 @ encoding: [0x01,0x04,0xb6,0xf3]
+@ ARM: vrintn.f16 q0, q1 @ encoding: [0x42,0x04,0xb6,0xf3]
+@ THUMB: vrintn.f16 d0, d1 @ encoding: [0xb6,0xff,0x01,0x04]
+@ THUMB: vrintn.f16 q0, q1 @ encoding: [0xb6,0xff,0x42,0x04]
+
+ vrintp.f16.f16 d0, d1
+ vrintp.f16.f16 q0, q1
+@ ARM: vrintp.f16 d0, d1 @ encoding: [0x81,0x07,0xb6,0xf3]
+@ ARM: vrintp.f16 q0, q1 @ encoding: [0xc2,0x07,0xb6,0xf3]
+@ THUMB: vrintp.f16 d0, d1 @ encoding: [0xb6,0xff,0x81,0x07]
+@ THUMB: vrintp.f16 q0, q1 @ encoding: [0xb6,0xff,0xc2,0x07]
+
+ vrintx.f16.f16 d0, d1
+ vrintx.f16.f16 q0, q1
+@ ARM: vrintx.f16 d0, d1 @ encoding: [0x81,0x04,0xb6,0xf3]
+@ ARM: vrintx.f16 q0, q1 @ encoding: [0xc2,0x04,0xb6,0xf3]
+@ THUMB: vrintx.f16 d0, d1 @ encoding: [0xb6,0xff,0x81,0x04]
+@ THUMB: vrintx.f16 q0, q1 @ encoding: [0xb6,0xff,0xc2,0x04]
+
+ vrintz.f16.f16 d0, d1
+ vrintz.f16.f16 q0, q1
+@ ARM: vrintz.f16 d0, d1 @ encoding: [0x81,0x05,0xb6,0xf3]
+@ ARM: vrintz.f16 q0, q1 @ encoding: [0xc2,0x05,0xb6,0xf3]
+@ THUMB: vrintz.f16 d0, d1 @ encoding: [0xb6,0xff,0x81,0x05]
+@ THUMB: vrintz.f16 q0, q1 @ encoding: [0xb6,0xff,0xc2,0x05]
diff --git a/test/MC/ARM/neon-vcvt-fp16.s b/test/MC/ARM/neon-vcvt-fp16.s
new file mode 100644
index 000000000000..a23be061c0fa
--- /dev/null
+++ b/test/MC/ARM/neon-vcvt-fp16.s
@@ -0,0 +1,18 @@
+@ RUN: llvm-mc -mcpu=cortex-r7 -triple arm -show-encoding < %s 2>&1| \
+@ RUN: FileCheck %s --check-prefix=CHECK-FP16
+@ RUN: not llvm-mc -mcpu=cortex-r5 -triple arm -show-encoding < %s 2>&1 | \
+@ RUN: FileCheck %s --check-prefix=CHECK-NOFP16
+
+@ CHECK-FP16: vcvtt.f32.f16 s7, s1 @ encoding: [0xe0,0x3a,0xf2,0xee]
+@ CHECK-NOFP16: instruction requires: half-float conversions
+ vcvtt.f32.f16 s7, s1
+@ CHECK-FP16: vcvtt.f16.f32 s1, s7 @ encoding: [0xe3,0x0a,0xf3,0xee]
+@ CHECK-NOFP16: instruction requires: half-float conversions
+ vcvtt.f16.f32 s1, s7
+
+@ CHECK-FP16: vcvtb.f32.f16 s7, s1 @ encoding: [0x60,0x3a,0xf2,0xee]
+@ CHECK-NOFP16: instruction requires: half-float conversions
+ vcvtb.f32.f16 s7, s1
+@ CHECK-FP16: vcvtb.f16.f32 s1, s7 @ encoding: [0x63,0x0a,0xf3,0xee]
+@ CHECK-NOFP16: instruction requires: half-float conversions
+ vcvtb.f16.f32 s1, s7
diff --git a/test/MC/ARM/thumb-branches.s b/test/MC/ARM/thumb-branches.s
new file mode 100644
index 000000000000..b4cdfa12a556
--- /dev/null
+++ b/test/MC/ARM/thumb-branches.s
@@ -0,0 +1,25 @@
+@ RUN: llvm-mc < %s -triple thumbv5-linux-gnueabi -filetype=obj -o - \
+@ RUN: | llvm-readobj -r | FileCheck %s
+
+
+ bl end
+ .space 0x3fffff
+end:
+
+ bl end2
+ .space 0x3fffff
+ .global end2
+end2:
+
+ bl end3
+ .space 0x400000
+ .global end3
+end3:
+
+ bl end4
+ .space 0x400000
+end4:
+
+@ CHECK: 0x400003 R_ARM_THM_CALL end2 0x0
+@ CHECK: 0x800006 R_ARM_THM_CALL end3 0x0
+@ CHECK: 0xC0000A R_ARM_THM_CALL end4 0x0
diff --git a/test/MC/ARM/thumb-shift-encoding.s b/test/MC/ARM/thumb-shift-encoding.s
index 54284132b653..ad35aff45055 100644
--- a/test/MC/ARM/thumb-shift-encoding.s
+++ b/test/MC/ARM/thumb-shift-encoding.s
@@ -6,40 +6,40 @@
sbc.w r12, lr, r0
sbc.w r1, r8, r9, lsr #32
- sbc.w r2, r7, pc, lsr #16
+ sbc.w r2, r7, r10, lsr #16
sbc.w r3, r6, r10, lsl #0
sbc.w r4, r5, lr, lsl #16
sbc.w r5, r4, r11, asr #32
- sbc.w r6, r3, sp, asr #16
+ sbc.w r6, r3, r12, asr #16
sbc.w r7, r2, r12, rrx
sbc.w r8, r1, r0, ror #16
@ CHECK: sbc.w r12, lr, r0 @ encoding: [0x6e,0xeb,0x00,0x0c]
@ CHECK: sbc.w r1, r8, r9, lsr #32 @ encoding: [0x68,0xeb,0x19,0x01]
-@ CHECK: sbc.w r2, r7, pc, lsr #16 @ encoding: [0x67,0xeb,0x1f,0x42]
+@ CHECK: sbc.w r2, r7, r10, lsr #16 @ encoding: [0x67,0xeb,0x1a,0x42]
@ CHECK: sbc.w r3, r6, r10 @ encoding: [0x66,0xeb,0x0a,0x03]
@ CHECK: sbc.w r4, r5, lr, lsl #16 @ encoding: [0x65,0xeb,0x0e,0x44]
@ CHECK: sbc.w r5, r4, r11, asr #32 @ encoding: [0x64,0xeb,0x2b,0x05]
-@ CHECK: sbc.w r6, r3, sp, asr #16 @ encoding: [0x63,0xeb,0x2d,0x46]
+@ CHECK: sbc.w r6, r3, r12, asr #16 @ encoding: [0x63,0xeb,0x2c,0x46]
@ CHECK: sbc.w r7, r2, r12, rrx @ encoding: [0x62,0xeb,0x3c,0x07]
@ CHECK: sbc.w r8, r1, r0, ror #16 @ encoding: [0x61,0xeb,0x30,0x48]
and.w r12, lr, r0
and.w r1, r8, r9, lsr #32
- and.w r2, r7, pc, lsr #16
+ and.w r2, r7, r10, lsr #16
and.w r3, r6, r10, lsl #0
and.w r4, r5, lr, lsl #16
and.w r5, r4, r11, asr #32
- and.w r6, r3, sp, asr #16
+ and.w r6, r3, r12, asr #16
and.w r7, r2, r12, rrx
and.w r8, r1, r0, ror #16
@ CHECK: and.w r12, lr, r0 @ encoding: [0x0e,0xea,0x00,0x0c]
@ CHECK: and.w r1, r8, r9, lsr #32 @ encoding: [0x08,0xea,0x19,0x01]
-@ CHECK: and.w r2, r7, pc, lsr #16 @ encoding: [0x07,0xea,0x1f,0x42]
+@ CHECK: and.w r2, r7, r10, lsr #16 @ encoding: [0x07,0xea,0x1a,0x42]
@ CHECK: and.w r3, r6, r10 @ encoding: [0x06,0xea,0x0a,0x03]
@ CHECK: and.w r4, r5, lr, lsl #16 @ encoding: [0x05,0xea,0x0e,0x44]
@ CHECK: and.w r5, r4, r11, asr #32 @ encoding: [0x04,0xea,0x2b,0x05]
-@ CHECK: and.w r6, r3, sp, asr #16 @ encoding: [0x03,0xea,0x2d,0x46]
+@ CHECK: and.w r6, r3, r12, asr #16 @ encoding: [0x03,0xea,0x2c,0x46]
@ CHECK: and.w r7, r2, r12, rrx @ encoding: [0x02,0xea,0x3c,0x07]
@ CHECK: and.w r8, r1, r0, ror #16 @ encoding: [0x01,0xea,0x30,0x48]
diff --git a/test/MC/ARM/thumb1-relax.s b/test/MC/ARM/thumb1-relax.s
new file mode 100644
index 000000000000..ba261aa26356
--- /dev/null
+++ b/test/MC/ARM/thumb1-relax.s
@@ -0,0 +1,35 @@
+@ RUN: not llvm-mc -triple thumbv6m-none-macho -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple thumbv6m-none-eabi -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s
+
+Lhere:
+@ CHECK: out of range pc-relative fixup value
+ ldr r0, Lhere
+
+@ CHECK: out of range pc-relative fixup value
+ b Lfar2
+
+@ CHECK: out of range pc-relative fixup value
+ bne Lfar1
+
+@ CHECK: out of range pc-relative fixup value
+ ldr r0, Lfar2
+
+@ CHECK: misaligned pc-relative fixup value
+ adr r0, Lmisaligned
+
+@ CHECK: misaligned pc-relative fixup value
+ ldr r0, Lmisaligned
+
+ .balign 4
+ .short 0
+Lmisaligned:
+ .word 42
+
+ .space 256
+Lfar1:
+ .word 42
+
+ .space 2050
+Lfar2:
+ .word 42
+
diff --git a/test/MC/ARM/thumb2-diagnostics.s b/test/MC/ARM/thumb2-diagnostics.s
index 8fd161c2cc53..96978899faa2 100644
--- a/test/MC/ARM/thumb2-diagnostics.s
+++ b/test/MC/ARM/thumb2-diagnostics.s
@@ -1,5 +1,8 @@
@ RUN: not llvm-mc -triple=thumbv7-apple-darwin < %s 2> %t
-@ RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS --check-prefix=CHECK-ERRORS-V7 < %t %s
+
+@ RUN: not llvm-mc -triple=thumbv8-apple-darwin < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS --check-prefix=CHECK-ERRORS-V8 < %t %s
@ Ill-formed IT block instructions.
itet eq
@@ -41,7 +44,8 @@
@ CHECK-ERRORS: error: invalid operand for instruction
@ CHECK-ERRORS: error: invalid operand for instruction
@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
-@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS-V7: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS-V8: error: invalid operand for instruction
isb #-1
isb #16
@@ -87,7 +91,14 @@ foo2:
@ CHECK-ERRORS: error: invalid operand for instruction
@ CHECK-ERRORS: error: invalid operand for instruction
-ssat r0, #1, r0, asr #32
-usat r0, #1, r0, asr #32
+ ssat r0, #1, r0, asr #32
+ usat r0, #1, r0, asr #32
@ CHECK-ERRORS: error: 'asr #32' shift amount not allowed in Thumb mode
@ CHECK-ERRORS: error: 'asr #32' shift amount not allowed in Thumb mode
+
+ @ PC is not valid as shifted-rGPR
+ sbc.w r2, r7, pc, lsr #16
+ and.w r2, r7, pc, lsr #16
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
+
diff --git a/test/MC/ARM/v7k-dsp.s b/test/MC/ARM/v7k-dsp.s
new file mode 100644
index 000000000000..cf5101746389
--- /dev/null
+++ b/test/MC/ARM/v7k-dsp.s
@@ -0,0 +1,4 @@
+@ RUN: llvm-mc -triple thumbv7k-apple-watchos2.0 %s | FileCheck %s
+
+@ CHECK: usad8 r2, r1, r4
+ usad8 r2, r1, r4
diff --git a/test/MC/AsmParser/dot-symbol-non-absolute.s b/test/MC/AsmParser/dot-symbol-non-absolute.s
index 7342365fe1b3..7cc97f9ed2bd 100644
--- a/test/MC/AsmParser/dot-symbol-non-absolute.s
+++ b/test/MC/AsmParser/dot-symbol-non-absolute.s
@@ -4,6 +4,6 @@
.extern foo
-# CHECK: error: expected absolute expression
+# CHECK: : expected absolute expression
. = foo + 10
.byte 1
diff --git a/test/MC/AsmParser/expr-shr.s b/test/MC/AsmParser/expr-shr.s
index fc117b64e95a..792bef050d31 100644
--- a/test/MC/AsmParser/expr-shr.s
+++ b/test/MC/AsmParser/expr-shr.s
@@ -1,13 +1,12 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown-elf %s | FileCheck %s --check-prefix=CHECK
+// RUN: llvm-mc -triple x86_64-unknown-darwin %s | FileCheck %s --check-prefix=CHECK
// RUN: llvm-mc -triple x86_64-pc-windows-msvc %s | FileCheck %s --check-prefix=MSVC
-// RUN: llvm-mc -triple x86_64-unknown-darwin %s | FileCheck %s --check-prefix=DARWIN
.data
// CHECK: .quad 3
-// Both COFF and Darwin still use AShr.
+// MSVC does AShr.
// MSVC: .quad -1
-// DARWIN: .quad -1
.quad (~0 >> 62)
diff --git a/test/MC/AsmParser/exprs-invalid.s b/test/MC/AsmParser/exprs-invalid.s
index 88b2a0a486bc..d2f29248967c 100644
--- a/test/MC/AsmParser/exprs-invalid.s
+++ b/test/MC/AsmParser/exprs-invalid.s
@@ -12,3 +12,6 @@
// CHECK-ERRORS: error: literal value out of range for directive
.long 4e71cf69 // double floating point constant due to missing "0x"
+
+// CHECK-ERRORS: error: literal value out of range for directive
+.word 0xfffffffff
diff --git a/test/MC/AsmParser/exprs.s b/test/MC/AsmParser/exprs.s
index c5fc9b594a0c..1c3e284bcf7e 100644
--- a/test/MC/AsmParser/exprs.s
+++ b/test/MC/AsmParser/exprs.s
@@ -34,7 +34,7 @@ k:
check_expr 1 | 2, 3
check_expr 1 << 1, 2
check_expr 2 >> 1, 1
- check_expr (~0 >> 1), -1
+ check_expr (~0 >> 62), 3
check_expr 3 - 2, 1
check_expr 1 ^ 3, 2
check_expr 1 && 2, 1
diff --git a/test/MC/AsmParser/macros-darwin-vararg.s b/test/MC/AsmParser/macros-darwin-vararg.s
index 4aa2f4c1d9b6..1c3ff69180f0 100644
--- a/test/MC/AsmParser/macros-darwin-vararg.s
+++ b/test/MC/AsmParser/macros-darwin-vararg.s
@@ -54,7 +54,7 @@ abc zed0, zed1, zed2
ifcc4 %eax, %ecx ## test
ifcc4 %ecx, %eax ## test
-// CHECK-NOT movl
+// CHECK-NOT: movl
// CHECK: subl $1, %esp
.set cc,0
ifcc movl, %esp, %ebp
diff --git a/test/MC/AsmParser/reassign.s b/test/MC/AsmParser/reassign.s
new file mode 100644
index 000000000000..817836b2a2f0
--- /dev/null
+++ b/test/MC/AsmParser/reassign.s
@@ -0,0 +1,12 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+ .text
+bar:
+
+ .data
+.globl foo
+.set foo, bar
+.globl foo
+.set foo, bar
+
+// CHECK-NOT: invalid reassignment of non-absolute variable
diff --git a/test/MC/AsmParser/undefined-local-symbol.s b/test/MC/AsmParser/undefined-local-symbol.s
new file mode 100644
index 000000000000..280fc5559804
--- /dev/null
+++ b/test/MC/AsmParser/undefined-local-symbol.s
@@ -0,0 +1,8 @@
+# RUN: not llvm-mc -triple i386-apple-darwin -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s
+
+# NOTE: apple-darwin portion of the triple is to enforce the convention choice
+# of what an assembler local symbol looks like (i.e., 'L' prefix.)
+
+# CHECK: error: assembler local symbol 'Lbar' not defined
+foo:
+ jmp Lbar
diff --git a/test/MC/AsmParser/vararg.s b/test/MC/AsmParser/vararg.s
index e3236b072d12..dae81dfb78ce 100644
--- a/test/MC/AsmParser/vararg.s
+++ b/test/MC/AsmParser/vararg.s
@@ -44,7 +44,7 @@
ifcc4 %eax %ecx ## test
ifcc4 %ecx, %eax ## test
-// CHECK-NOT movl
+// CHECK-NOT: movl
// CHECK: subl $1, %esp
.set cc,0
ifcc movl %esp, %ebp
diff --git a/test/MC/COFF/ARM/directive-type-diagnostics.s b/test/MC/COFF/ARM/directive-type-diagnostics.s
deleted file mode 100644
index f8a52cd43e42..000000000000
--- a/test/MC/COFF/ARM/directive-type-diagnostics.s
+++ /dev/null
@@ -1,10 +0,0 @@
-// RUN: not llvm-mc -triple arm-coff -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
-// RUN: not llvm-mc -triple armeb-coff -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
-// RUN: not llvm-mc -triple thumb-coff -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
-// RUN: not llvm-mc -triple thumbeb-coff -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
-
- .type symbol 32
-// CHECK: error: expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', '%<type>' or "<type>"
-// CHECK: .type symbol 32
-// CHECK: ^
-
diff --git a/test/MC/COFF/alias.s b/test/MC/COFF/alias.s
index 2293d43c5750..369bbe8d8444 100644
--- a/test/MC/COFF/alias.s
+++ b/test/MC/COFF/alias.s
@@ -21,9 +21,9 @@ weak_aliased_to_external = external2
.long weak_aliased_to_external
// CHECK: Relocations [
-// CHECK: 0x0 IMAGE_REL_I386_DIR32 local1
+// CHECK: 0x0 IMAGE_REL_I386_DIR32 external_aliased_to_local
// CHECK: 0x4 IMAGE_REL_I386_DIR32 external1
-// CHECK: 0x8 IMAGE_REL_I386_DIR32 local2
+// CHECK: 0x8 IMAGE_REL_I386_DIR32 global_aliased_to_local
// CHECK: 0xC IMAGE_REL_I386_DIR32 external2
// CHECK: ]
// CHECK: Symbols [
diff --git a/test/MC/COFF/bad-expr.s b/test/MC/COFF/bad-expr.s
index ecbdd415c3a6..9a212d988cc3 100644
--- a/test/MC/COFF/bad-expr.s
+++ b/test/MC/COFF/bad-expr.s
@@ -1,7 +1,9 @@
// RUN: not llvm-mc -filetype=obj -triple i386-pc-win32 %s 2>&1 | FileCheck %s
// CHECK: symbol '__ImageBase' can not be undefined in a subtraction expression
+// CHECK: symbol '__ImageBase' can not be undefined in a subtraction expression
.data
_x:
.long _x-__ImageBase
+ .long __ImageBase-_x
diff --git a/test/MC/COFF/basic-coff-64.s b/test/MC/COFF/basic-coff-64.s
index 62e4eb92f855..1fa9280e0ca6 100644
--- a/test/MC/COFF/basic-coff-64.s
+++ b/test/MC/COFF/basic-coff-64.s
@@ -93,7 +93,7 @@ _main: # @main
// CHECK: Length: [[TextSize]]
// CHECK: RelocationCount: 2
// CHECK: LineNumberCount: 0
-// CHECK: Checksum: 0x0
+// CHECK: Checksum: 0x8E1B6D20
// CHECK: Number: [[TextNum]]
// CHECK: Selection: 0x0
// CHECK: }
@@ -110,7 +110,7 @@ _main: # @main
// CHECK: Length: [[DataSize]]
// CHECK: RelocationCount: 0
// CHECK: LineNumberCount: 0
-// CHECK: Checksum: 0x0
+// CHECK: Checksum: 0x2B95CA92
// CHECK: Number: [[DataNum]]
// CHECK: Selection: 0x0
// CHECK: }
diff --git a/test/MC/COFF/basic-coff.s b/test/MC/COFF/basic-coff.s
index 549825aacea8..6aa247bdd29d 100644
--- a/test/MC/COFF/basic-coff.s
+++ b/test/MC/COFF/basic-coff.s
@@ -93,7 +93,7 @@ L_.str: # @.str
// CHECK: Length: 21
// CHECK: RelocationCount: 2
// CHECK: LineNumberCount: 0
-// CHECK: Checksum: 0x0
+// CHECK: Checksum: 0xC6461CBE
// CHECK: Number: 1
// CHECK: Selection: 0x0
// CHECK: }
@@ -110,7 +110,7 @@ L_.str: # @.str
// CHECK: Length: 12
// CHECK: RelocationCount: 0
// CHECK: LineNumberCount: 0
-// CHECK: Checksum: 0x0
+// CHECK: Checksum: 0x2B95CA92
// CHECK: Number: 2
// CHECK: Selection: 0x0
// CHECK: }
diff --git a/test/MC/COFF/invalid-def.s b/test/MC/COFF/invalid-def.s
index 42821c22cf71..f5a71f4d7cfd 100644
--- a/test/MC/COFF/invalid-def.s
+++ b/test/MC/COFF/invalid-def.s
@@ -1,5 +1,8 @@
-# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s 2>&1 | FileCheck %s
+# CHECK: error: starting a new symbol definition without completing the previous one
+# CHECK: error: starting a new symbol definition without completing the previous one
.def first
.def second
+ .def third
diff --git a/test/MC/COFF/invalid-endef.s b/test/MC/COFF/invalid-endef.s
index c6fd8f596268..34c7a090d6e9 100644
--- a/test/MC/COFF/invalid-endef.s
+++ b/test/MC/COFF/invalid-endef.s
@@ -1,4 +1,7 @@
-# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s 2>&1 | FileCheck %s
+# CHECK: ending symbol definition without starting one
+# CHECK: ending symbol definition without starting one
+ .endef
.endef
diff --git a/test/MC/COFF/invalid-scl-range.s b/test/MC/COFF/invalid-scl-range.s
index 57225059821e..326cc1ef9862 100644
--- a/test/MC/COFF/invalid-scl-range.s
+++ b/test/MC/COFF/invalid-scl-range.s
@@ -1,6 +1,9 @@
# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
.def storage_class_range
+# CHECK: storage class value '1337' out of range
+# CHECK: storage class value '9001' out of range
.scl 1337
+ .scl 9001
.endef
diff --git a/test/MC/COFF/invalid-scl.s b/test/MC/COFF/invalid-scl.s
index 8565a5afe0e9..be2f43aee5fa 100644
--- a/test/MC/COFF/invalid-scl.s
+++ b/test/MC/COFF/invalid-scl.s
@@ -1,4 +1,7 @@
-# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s 2>&1 | FileCheck %s
+# CHECK: storage class specified outside of symbol definition
+# CHECK: storage class specified outside of symbol definition
+ .scl 1337
.scl 1337
diff --git a/test/MC/COFF/invalid-type.s b/test/MC/COFF/invalid-type.s
index a1e131e99e55..69493092d6c1 100644
--- a/test/MC/COFF/invalid-type.s
+++ b/test/MC/COFF/invalid-type.s
@@ -1,4 +1,7 @@
-# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s 2>&1 | FileCheck %s
+# CHECK: symbol type specified outside of a symbol definition
+# CHECK: symbol type specified outside of a symbol definition
.type 65536
+ .type 65537
diff --git a/test/MC/COFF/label-undefined.s b/test/MC/COFF/label-undefined.s
new file mode 100644
index 000000000000..ad9a194897c3
--- /dev/null
+++ b/test/MC/COFF/label-undefined.s
@@ -0,0 +1,6 @@
+// RUN: not llvm-mc -filetype=obj -triple i386-pc-win32 %s 2>&1 | FileCheck %s
+// CHECK: assembler label 'Lundefined' can not be undefined
+// CHECK: assembler label 'Lundefined2' can not be undefined
+ .text
+ movl Lundefined, %eax
+ movl Lundefined2, %eax
diff --git a/test/MC/COFF/secidx-diagnostic.s b/test/MC/COFF/secidx-diagnostic.s
index 3e496c3fd45c..aacf7f862803 100644
--- a/test/MC/COFF/secidx-diagnostic.s
+++ b/test/MC/COFF/secidx-diagnostic.s
@@ -2,7 +2,9 @@
// RUN: FileCheck %s < %t
// CHECK: symbol 'bar' can not be undefined
+// CHECK: symbol 'baz' can not be undefined
.data
foo:
.secidx bar
+ .secidx baz
diff --git a/test/MC/COFF/simple-fixups.s b/test/MC/COFF/simple-fixups.s
index cb5d7642ee6b..9d9600842729 100644
--- a/test/MC/COFF/simple-fixups.s
+++ b/test/MC/COFF/simple-fixups.s
@@ -2,8 +2,8 @@
// references to functions. Failing to do so might cause pointer-to-function
// equality to fail if /INCREMENTAL links are used.
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -s | FileCheck %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -s | FileCheck %s
+// RUN: llvm-mc -filetype=obj -incremental-linker-compatible -triple i686-pc-win32 %s | llvm-readobj -s | FileCheck %s
+// RUN: llvm-mc -filetype=obj -incremental-linker-compatible -triple x86_64-pc-win32 %s | llvm-readobj -s | FileCheck %s
.def _foo;
.scl 2;
diff --git a/test/MC/COFF/stdin.s b/test/MC/COFF/stdin.s
new file mode 100644
index 000000000000..9a22da5488e6
--- /dev/null
+++ b/test/MC/COFF/stdin.s
@@ -0,0 +1,3 @@
+// REQUIRES: shell
+// RUN: ( echo "test"; llvm-mc -filetype=obj -triple i686-pc-win32 %s ) > %t
+
diff --git a/test/MC/COFF/symbol-fragment-offset-64.s b/test/MC/COFF/symbol-fragment-offset-64.s
index 05b46bbb7e79..03077ce94291 100644
--- a/test/MC/COFF/symbol-fragment-offset-64.s
+++ b/test/MC/COFF/symbol-fragment-offset-64.s
@@ -114,7 +114,7 @@ _main: # @main
// CHECK: Length: 48
// CHECK: RelocationCount: 6
// CHECK: LineNumberCount: 0
-// CHECK: Checksum: 0x0
+// CHECK: Checksum: 0x7BD396E3
// CHECK: Number: 1
// CHECK: Selection: 0x0
// CHECK: }
@@ -131,7 +131,7 @@ _main: # @main
// CHECK: Length: 35
// CHECK: RelocationCount: 0
// CHECK: LineNumberCount: 0
-// CHECK: Checksum: 0x0
+// CHECK: Checksum: 0xB0A4C21
// CHECK: Number: 2
// CHECK: Selection: 0x0
// CHECK: }
diff --git a/test/MC/COFF/symbol-fragment-offset.s b/test/MC/COFF/symbol-fragment-offset.s
index cc5040a99cc1..c592fa4c0e7b 100644
--- a/test/MC/COFF/symbol-fragment-offset.s
+++ b/test/MC/COFF/symbol-fragment-offset.s
@@ -114,7 +114,7 @@ L_.str2:
// CHECK: Length: 45
// CHECK: RelocationCount: 6
// CHECK: LineNumberCount: 0
-// CHECK: Checksum: 0x0
+// CHECK: Checksum: 0xDED1DC2
// CHECK: Number: 1
// CHECK: Selection: 0x0
// CHECK: }
@@ -131,7 +131,7 @@ L_.str2:
// CHECK: Length: 35
// CHECK: RelocationCount: 0
// CHECK: LineNumberCount: 0
-// CHECK: Checksum: 0x0
+// CHECK: Checksum: 0xB0A4C21
// CHECK: Number: 2
// CHECK: Selection: 0x0
// CHECK: }
diff --git a/test/MC/COFF/temporary-alias.s b/test/MC/COFF/temporary-alias.s
new file mode 100644
index 000000000000..be4297024afc
--- /dev/null
+++ b/test/MC/COFF/temporary-alias.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple=i686-pc-windows -filetype=obj -o %t %s
+// RUN: llvm-objdump -d -r %t | FileCheck %s
+
+.globl _main
+_main:
+// CHECK: 00 00 00 00
+// CHECK-NEXT: 00000002: IMAGE_REL_I386_DIR32 .rdata
+movb L_alias1(%eax), %al
+// CHECK: 01 00 00 00
+// CHECK-NEXT: 00000008: IMAGE_REL_I386_DIR32 .rdata
+movb L_alias2(%eax), %al
+retl
+
+.section .rdata,"dr"
+L_sym1:
+.ascii "\001"
+L_sym2:
+.ascii "\002"
+
+L_alias1 = L_sym1
+L_alias2 = L_sym2
diff --git a/test/MC/COFF/timestamp.s b/test/MC/COFF/timestamp.s
index 18736a2b2d68..a2761575789d 100644
--- a/test/MC/COFF/timestamp.s
+++ b/test/MC/COFF/timestamp.s
@@ -1,4 +1,6 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-readobj -h | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 -incremental-linker-compatible %s -o - | llvm-readobj -h | FileCheck %s
+// REQUIRES: timestamps
// CHECK: ImageFileHeader {
-// CHECK: TimeDateStamp: {{.*}} (0x0)
+// CHECK: TimeDateStamp:
+// CHECK-NOT: 1970-01-01 00:00:00 (0x0)
diff --git a/test/MC/Disassembler/AArch64/arm64-scalar-fp.txt b/test/MC/Disassembler/AArch64/arm64-scalar-fp.txt
index f139700164ca..c536721f3466 100644
--- a/test/MC/Disassembler/AArch64/arm64-scalar-fp.txt
+++ b/test/MC/Disassembler/AArch64/arm64-scalar-fp.txt
@@ -1,98 +1,131 @@
# RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon --disassemble -output-asm-variant=1 < %s | FileCheck %s
+# RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon,v8.2a,fullfp16 --disassemble -output-asm-variant=1 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
#-----------------------------------------------------------------------------
# Floating-point arithmetic
#-----------------------------------------------------------------------------
+0x41 0xc0 0xe0 0x1e
0x41 0xc0 0x20 0x1e
0x41 0xc0 0x60 0x1e
+# FP16: fabs h1, h2
# CHECK: fabs s1, s2
# CHECK: fabs d1, d2
+0x41 0x28 0xe3 0x1e
0x41 0x28 0x23 0x1e
0x41 0x28 0x63 0x1e
+# FP16: fadd h1, h2, h3
# CHECK: fadd s1, s2, s3
# CHECK: fadd d1, d2, d3
+0x41 0x18 0xe3 0x1e
0x41 0x18 0x23 0x1e
0x41 0x18 0x63 0x1e
+# FP16: fdiv h1, h2, h3
# CHECK: fdiv s1, s2, s3
# CHECK: fdiv d1, d2, d3
+0x41 0x10 0xc3 0x1f
0x41 0x10 0x03 0x1f
0x41 0x10 0x43 0x1f
+# FP16: fmadd h1, h2, h3, h4
# CHECK: fmadd s1, s2, s3, s4
# CHECK: fmadd d1, d2, d3, d4
+0x41 0x48 0xe3 0x1e
0x41 0x48 0x23 0x1e
0x41 0x48 0x63 0x1e
+0x41 0x68 0xe3 0x1e
0x41 0x68 0x23 0x1e
0x41 0x68 0x63 0x1e
+# FP16: fmax h1, h2, h3
# CHECK: fmax s1, s2, s3
# CHECK: fmax d1, d2, d3
+# FP16: fmaxnm h1, h2, h3
# CHECK: fmaxnm s1, s2, s3
# CHECK: fmaxnm d1, d2, d3
+0x41 0x58 0xe3 0x1e
0x41 0x58 0x23 0x1e
0x41 0x58 0x63 0x1e
+0x41 0x78 0xe3 0x1e
0x41 0x78 0x23 0x1e
0x41 0x78 0x63 0x1e
+# FP16: fmin h1, h2, h3
# CHECK: fmin s1, s2, s3
# CHECK: fmin d1, d2, d3
+# FP16: fminnm h1, h2, h3
# CHECK: fminnm s1, s2, s3
# CHECK: fminnm d1, d2, d3
+0x41 0x90 0xc3 0x1f
0x41 0x90 0x03 0x1f
0x41 0x90 0x43 0x1f
+# FP16: fmsub h1, h2, h3, h4
# CHECK: fmsub s1, s2, s3, s4
# CHECK: fmsub d1, d2, d3, d4
+0x41 0x08 0xe3 0x1e
0x41 0x08 0x23 0x1e
0x41 0x08 0x63 0x1e
+# FP16: fmul h1, h2, h3
# CHECK: fmul s1, s2, s3
# CHECK: fmul d1, d2, d3
+0x41 0x40 0xe1 0x1e
0x41 0x40 0x21 0x1e
0x41 0x40 0x61 0x1e
+# FP16: fneg h1, h2
# CHECK: fneg s1, s2
# CHECK: fneg d1, d2
+0x41 0x10 0xe3 0x1f
0x41 0x10 0x23 0x1f
0x41 0x10 0x63 0x1f
+# FP16: fnmadd h1, h2, h3, h4
# CHECK: fnmadd s1, s2, s3, s4
# CHECK: fnmadd d1, d2, d3, d4
+0x41 0x90 0xe3 0x1f
0x41 0x90 0x23 0x1f
0x41 0x90 0x63 0x1f
+# FP16: fnmsub h1, h2, h3, h4
# CHECK: fnmsub s1, s2, s3, s4
# CHECK: fnmsub d1, d2, d3, d4
+0x41 0x88 0xe3 0x1e
0x41 0x88 0x23 0x1e
0x41 0x88 0x63 0x1e
+# FP16: fnmul h1, h2, h3
# CHECK: fnmul s1, s2, s3
# CHECK: fnmul d1, d2, d3
+0x41 0xc0 0xe1 0x1e
0x41 0xc0 0x21 0x1e
0x41 0xc0 0x61 0x1e
+# FP16: fsqrt h1, h2
# CHECK: fsqrt s1, s2
# CHECK: fsqrt d1, d2
+0x41 0x38 0xe3 0x1e
0x41 0x38 0x23 0x1e
0x41 0x38 0x63 0x1e
+# FP16: fsub h1, h2, h3
# CHECK: fsub s1, s2, s3
# CHECK: fsub d1, d2, d3
@@ -100,31 +133,43 @@
# Floating-point comparison
#-----------------------------------------------------------------------------
+0x20 0x04 0xe2 0x1e
0x20 0x04 0x22 0x1e
0x20 0x04 0x62 0x1e
+0x30 0x04 0xe2 0x1e
0x30 0x04 0x22 0x1e
0x30 0x04 0x62 0x1e
+# FP16: fccmp h1, h2, #0, eq
# CHECK: fccmp s1, s2, #0, eq
# CHECK: fccmp d1, d2, #0, eq
+# FP16: fccmpe h1, h2, #0, eq
# CHECK: fccmpe s1, s2, #0, eq
# CHECK: fccmpe d1, d2, #0, eq
+0x20 0x20 0xe2 0x1e
0x20 0x20 0x22 0x1e
0x20 0x20 0x62 0x1e
+0x28 0x20 0xe0 0x1e
0x28 0x20 0x20 0x1e
0x28 0x20 0x60 0x1e
+0x30 0x20 0xe2 0x1e
0x30 0x20 0x22 0x1e
0x30 0x20 0x62 0x1e
+0x38 0x20 0xe0 0x1e
0x38 0x20 0x20 0x1e
0x38 0x20 0x60 0x1e
+# FP16: fcmp h1, h2
# CHECK: fcmp s1, s2
# CHECK: fcmp d1, d2
+# FP16: fcmp h1, #0.0
# CHECK: fcmp s1, #0.0
# CHECK: fcmp d1, #0.0
+# FP16: fcmpe h1, h2
# CHECK: fcmpe s1, s2
# CHECK: fcmpe d1, d2
+# FP16: fcmpe h1, #0.0
# CHECK: fcmpe s1, #0.0
# CHECK: fcmpe d1, #0.0
@@ -132,9 +177,11 @@
# Floating-point conditional select
#-----------------------------------------------------------------------------
+0x41 0x0c 0xe3 0x1e
0x41 0x0c 0x23 0x1e
0x41 0x0c 0x63 0x1e
+# FP16: fcsel h1, h2, h3, eq
# CHECK: fcsel s1, s2, s3, eq
# CHECK: fcsel d1, d2, d3, eq
@@ -169,29 +216,37 @@
# Floating-point move
#-----------------------------------------------------------------------------
+0x41 0x00 0xe7 0x1e
+0x41 0x00 0xe6 0x1e
0x41 0x00 0x27 0x1e
0x41 0x00 0x26 0x1e
0x41 0x00 0x67 0x9e
0x41 0x00 0x66 0x9e
+# FP16: fmov h1, w2
+# FP16: fmov w1, h2
# CHECK: fmov s1, w2
# CHECK: fmov w1, s2
# CHECK: fmov d1, x2
# CHECK: fmov x1, d2
+0x01 0x10 0xe8 0x1e
0x01 0x10 0x28 0x1e
0x01 0x10 0x68 0x1e
0x01 0xf0 0x7b 0x1e
0x01 0xf0 0x6b 0x1e
+# FP16: fmov h1, #0.12500000
# CHECK: fmov s1, #0.12500000
# CHECK: fmov d1, #0.12500000
# CHECK: fmov d1, #-0.48437500
# CHECK: fmov d1, #0.48437500
+0x41 0x40 0xe0 0x1e
0x41 0x40 0x20 0x1e
0x41 0x40 0x60 0x1e
+# FP16: fmov h1, h2
# CHECK: fmov s1, s2
# CHECK: fmov d1, d2
@@ -199,45 +254,59 @@
# Floating-point round to integral
#-----------------------------------------------------------------------------
+0x41 0x40 0xe6 0x1e
0x41 0x40 0x26 0x1e
0x41 0x40 0x66 0x1e
+# FP16: frinta h1, h2
# CHECK: frinta s1, s2
# CHECK: frinta d1, d2
+0x41 0xc0 0xe7 0x1e
0x41 0xc0 0x27 0x1e
0x41 0xc0 0x67 0x1e
+# FP16: frinti h1, h2
# CHECK: frinti s1, s2
# CHECK: frinti d1, d2
+0x41 0x40 0xe5 0x1e
0x41 0x40 0x25 0x1e
0x41 0x40 0x65 0x1e
+# FP16: frintm h1, h2
# CHECK: frintm s1, s2
# CHECK: frintm d1, d2
+0x41 0x40 0xe4 0x1e
0x41 0x40 0x24 0x1e
0x41 0x40 0x64 0x1e
+# FP16: frintn h1, h2
# CHECK: frintn s1, s2
# CHECK: frintn d1, d2
+0x41 0xc0 0xe4 0x1e
0x41 0xc0 0x24 0x1e
0x41 0xc0 0x64 0x1e
+# FP16: frintp h1, h2
# CHECK: frintp s1, s2
# CHECK: frintp d1, d2
+0x41 0x40 0xe7 0x1e
0x41 0x40 0x27 0x1e
0x41 0x40 0x67 0x1e
+# FP16: frintx h1, h2
# CHECK: frintx s1, s2
# CHECK: frintx d1, d2
+0x41 0xc0 0xe5 0x1e
0x41 0xc0 0x25 0x1e
0x41 0xc0 0x65 0x1e
+# FP16: frintz h1, h2
# CHECK: frintz s1, s2
# CHECK: frintz d1, d2
diff --git a/test/MC/Disassembler/AArch64/armv8.1a-pan.txt b/test/MC/Disassembler/AArch64/armv8.1a-pan.txt
index 2af5c2aa21ef..22dc5fd58948 100644
--- a/test/MC/Disassembler/AArch64/armv8.1a-pan.txt
+++ b/test/MC/Disassembler/AArch64/armv8.1a-pan.txt
@@ -2,9 +2,11 @@
0x9f,0x40,0x00,0xd5
0x9f,0x41,0x00,0xd5
+0x9f,0x42,0x00,0xd5
0x65,0x42,0x18,0xd5
0x6d,0x42,0x38,0xd5
# CHECK: msr PAN, #0
# CHECK: msr PAN, #1
+# CHECK-NOT: msr PAN, #2
# CHECK: msr PAN, x5
# CHECK: mrs x13, PAN
diff --git a/test/MC/Disassembler/AArch64/armv8.2a-at.txt b/test/MC/Disassembler/AArch64/armv8.2a-at.txt
new file mode 100644
index 000000000000..81841f2c1302
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.2a-at.txt
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=-v8.2a --disassemble < %s | FileCheck %s --check-prefix=NO_V82
+
+[0x01,0x79,0x08,0xd5]
+[0x22,0x79,0x08,0xd5]
+# CHECK: at s1e1rp, x1
+# CHECK: at s1e1wp, x2
+# NO_V82: sys #0, c7, c9, #0, x1
+# NO_V82: sys #0, c7, c9, #1, x2
diff --git a/test/MC/Disassembler/AArch64/armv8.2a-mmfr2.txt b/test/MC/Disassembler/AArch64/armv8.2a-mmfr2.txt
new file mode 100644
index 000000000000..071412672b16
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.2a-mmfr2.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a --disassemble < %s | FileCheck %s
+
+[0x43,0x07,0x38,0xd5]
+# CHECK: mrs x3, ID_AA64MMFR2_EL1
diff --git a/test/MC/Disassembler/AArch64/armv8.2a-persistent-memory.txt b/test/MC/Disassembler/AArch64/armv8.2a-persistent-memory.txt
new file mode 100644
index 000000000000..58f1f81d83e2
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.2a-persistent-memory.txt
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu --disassemble < %s | FileCheck --check-prefix=NO_V82 %s
+
+[0x27,0x7c,0x0b,0xd5]
+# CHECK: dc cvap, x7
+# NO_V82: sys #3, c7, c12, #1, x7
diff --git a/test/MC/Disassembler/AArch64/armv8.2a-statistical-profiling.txt b/test/MC/Disassembler/AArch64/armv8.2a-statistical-profiling.txt
new file mode 100644
index 000000000000..217424cc46e0
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.2a-statistical-profiling.txt
@@ -0,0 +1,87 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+spe --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu --disassemble < %s | FileCheck --check-prefix=NO_SPE %s
+
+[0x3f,0x22,0x03,0xd5]
+# CHECK: psb csync
+# NO_SPE: hint #0x11
+
+[0x00,0x9a,0x18,0xd5]
+[0x20,0x9a,0x18,0xd5]
+[0x60,0x9a,0x18,0xd5]
+[0xe0,0x9a,0x18,0xd5]
+[0x00,0x99,0x1c,0xd5]
+[0x00,0x99,0x1d,0xd5]
+[0x00,0x99,0x18,0xd5]
+[0x40,0x99,0x18,0xd5]
+[0x60,0x99,0x18,0xd5]
+[0x80,0x99,0x18,0xd5]
+[0xa0,0x99,0x18,0xd5]
+[0xc0,0x99,0x18,0xd5]
+[0xe0,0x99,0x18,0xd5]
+# CHECK: msr PMBLIMITR_EL1, x0
+# NO_SPE: msr S3_0_C9_C10_0, x0
+# CHECK: msr PMBPTR_EL1, x0
+# NO_SPE: msr S3_0_C9_C10_1, x0
+# CHECK: msr PMBSR_EL1, x0
+# NO_SPE: msr S3_0_C9_C10_3, x0
+# CHECK: msr PMBIDR_EL1, x0
+# NO_SPE: msr S3_0_C9_C10_7, x0
+# CHECK: msr PMSCR_EL2, x0
+# NO_SPE: msr S3_4_C9_C9_0, x0
+# CHECK: msr PMSCR_EL12, x0
+# NO_SPE: msr S3_5_C9_C9_0, x0
+# CHECK: msr PMSCR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_0, x0
+# CHECK: msr PMSICR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_2, x0
+# CHECK: msr PMSIRR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_3, x0
+# CHECK: msr PMSFCR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_4, x0
+# CHECK: msr PMSEVFR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_5, x0
+# CHECK: msr PMSLATFR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_6, x0
+# CHECK: msr PMSIDR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_7, x0
+
+[0x00,0x9a,0x38,0xd5]
+[0x20,0x9a,0x38,0xd5]
+[0x60,0x9a,0x38,0xd5]
+[0xe0,0x9a,0x38,0xd5]
+[0x00,0x99,0x3c,0xd5]
+[0x00,0x99,0x3d,0xd5]
+[0x00,0x99,0x38,0xd5]
+[0x40,0x99,0x38,0xd5]
+[0x60,0x99,0x38,0xd5]
+[0x80,0x99,0x38,0xd5]
+[0xa0,0x99,0x38,0xd5]
+[0xc0,0x99,0x38,0xd5]
+[0xe0,0x99,0x38,0xd5]
+
+# CHECK: mrs x0, PMBLIMITR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C10_0
+# CHECK: mrs x0, PMBPTR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C10_1
+# CHECK: mrs x0, PMBSR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C10_3
+# CHECK: mrs x0, PMBIDR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C10_7
+# CHECK: mrs x0, PMSCR_EL2
+# NO_SPE: mrs x0, S3_4_C9_C9_0
+# CHECK: mrs x0, PMSCR_EL12
+# NO_SPE: mrs x0, S3_5_C9_C9_0
+# CHECK: mrs x0, PMSCR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_0
+# CHECK: mrs x0, PMSICR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_2
+# CHECK: mrs x0, PMSIRR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_3
+# CHECK: mrs x0, PMSFCR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_4
+# CHECK: mrs x0, PMSEVFR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_5
+# CHECK: mrs x0, PMSLATFR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_6
+# CHECK: mrs x0, PMSIDR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_7
diff --git a/test/MC/Disassembler/AArch64/armv8.2a-uao.txt b/test/MC/Disassembler/AArch64/armv8.2a-uao.txt
new file mode 100644
index 000000000000..b8300f4c7264
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.2a-uao.txt
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu --disassemble < %s 2>&1 | FileCheck --check-prefix=NO_V82A %s
+
+[0x7f,0x40,0x00,0xd5]
+[0x7f,0x41,0x00,0xd5]
+[0x7f,0x42,0x00,0xd5]
+# CHECK: msr UAO, #0
+# CHECK: msr UAO, #1
+# CHECK: msr S0_0_C4_C2_3, xzr
+# NO_V82A: msr S0_0_C4_C0_3, xzr
+# NO_V82A: msr S0_0_C4_C1_3, xzr
+# NO_V82A: msr S0_0_C4_C2_3, xzr
+
+[0x81,0x42,0x18,0xd5]
+[0x82,0x42,0x38,0xd5]
+# CHECK: msr UAO, x1
+# CHECK: mrs x2, UAO
+# NO_V82A: msr S3_0_C4_C2_4, x1
+# NO_V82A: mrs x2, S3_0_C4_C2_4
diff --git a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
index 615d9ba19ca8..185f0c1124a6 100644
--- a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
+++ b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
@@ -1,5 +1,6 @@
# RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s | FileCheck %s
# RUN: llvm-mc -triple=arm64 -mattr=+fp-armv8 -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=arm64 -mattr=+fp-armv8,+fullfp16 -disassemble < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
#------------------------------------------------------------------------------
# Add/sub (immediate)
@@ -1516,6 +1517,20 @@
# Floating-point <-> fixed-point conversion
#------------------------------------------------------------------------------
+# FP16: fcvtzs w3, h5, #1
+# FP16: fcvtzs wzr, h20, #13
+# FP16: fcvtzs w19, h0, #32
+0xa3 0xfc 0xd8 0x1e
+0x9f 0xce 0xd8 0x1e
+0x13 0x80 0xd8 0x1e
+
+# FP16: fcvtzs x3, h5, #1
+# FP16: fcvtzs x12, h30, #45
+# FP16: fcvtzs x19, h0, #64
+0xa3 0xfc 0xd8 0x9e
+0xcc 0x4f 0xd8 0x9e
+0x13 0x00 0xd8 0x9e
+
# CHECK: fcvtzs w3, s5, #1
# CHECK: fcvtzs wzr, s20, #13
# CHECK: fcvtzs w19, s0, #32
@@ -1544,6 +1559,20 @@
0xcc 0x4f 0x58 0x9e
0x13 0x00 0x58 0x9e
+# FP16: fcvtzu w3, h5, #1
+# FP16: fcvtzu wzr, h20, #13
+# FP16: fcvtzu w19, h0, #32
+0xa3 0xfc 0xd9 0x1e
+0x9f 0xce 0xd9 0x1e
+0x13 0x80 0xd9 0x1e
+
+# FP16: fcvtzu x3, h5, #1
+# FP16: fcvtzu x12, h30, #45
+# FP16: fcvtzu x19, h0, #64
+0xa3 0xfc 0xd9 0x9e
+0xcc 0x4f 0xd9 0x9e
+0x13 0x00 0xd9 0x9e
+
# CHECK: fcvtzu w3, s5, #1
# CHECK: fcvtzu wzr, s20, #13
# CHECK: fcvtzu w19, s0, #32
@@ -1572,6 +1601,20 @@
0xcc 0x4f 0x59 0x9e
0x13 0x00 0x59 0x9e
+# FP16: scvtf h23, w19, #1
+# FP16: scvtf h31, wzr, #20
+# FP16: scvtf h14, w0, #32
+0x77 0xfe 0xc2 0x1e
+0xff 0xb3 0xc2 0x1e
+0x0e 0x80 0xc2 0x1e
+
+# FP16: scvtf h23, x19, #1
+# FP16: scvtf h31, xzr, #20
+# FP16: scvtf h14, x0, #64
+0x77 0xfe 0xc2 0x9e
+0xff 0xb3 0xc2 0x9e
+0x0e 0x00 0xc2 0x9e
+
# CHECK: scvtf s23, w19, #1
# CHECK: scvtf s31, wzr, #20
# CHECK: scvtf s14, w0, #32
@@ -1600,6 +1643,20 @@
0xff 0xb3 0x42 0x9e
0x0e 0x00 0x42 0x9e
+# FP16: ucvtf h23, w19, #1
+# FP16: ucvtf h31, wzr, #20
+# FP16: ucvtf h14, w0, #32
+0x77 0xfe 0xc3 0x1e
+0xff 0xb3 0xc3 0x1e
+0x0e 0x80 0xc3 0x1e
+
+# FP16: ucvtf h23, x19, #1
+# FP16: ucvtf h31, xzr, #20
+# FP16: ucvtf h14, x0, #64
+0x77 0xfe 0xc3 0x9e
+0xff 0xb3 0xc3 0x9e
+0x0e 0x00 0xc3 0x9e
+
# CHECK: ucvtf s23, w19, #1
# CHECK: ucvtf s31, wzr, #20
# CHECK: ucvtf s14, w0, #32
@@ -1631,6 +1688,61 @@
#------------------------------------------------------------------------------
# Floating-point <-> integer conversion
#------------------------------------------------------------------------------
+
+# FP16: fcvtns w3, h31
+# FP16: fcvtns xzr, h12
+# FP16: fcvtnu wzr, h12
+# FP16: fcvtnu x0, h0
+0xe3 0x3 0xe0 0x1e
+0x9f 0x1 0xe0 0x9e
+0x9f 0x1 0xe1 0x1e
+0x0 0x0 0xe1 0x9e
+
+# FP16: fcvtps wzr, h9
+# FP16: fcvtps x12, h20
+# FP16: fcvtpu w30, h23
+# FP16: fcvtpu x29, h3
+0x3f 0x1 0xe8 0x1e
+0x8c 0x2 0xe8 0x9e
+0xfe 0x2 0xe9 0x1e
+0x7d 0x0 0xe9 0x9e
+
+# FP16: fcvtms w2, h3
+# FP16: fcvtms x4, h5
+# FP16: fcvtmu w6, h7
+# FP16: fcvtmu x8, h9
+0x62 0x0 0xf0 0x1e
+0xa4 0x0 0xf0 0x9e
+0xe6 0x0 0xf1 0x1e
+0x28 0x1 0xf1 0x9e
+
+# FP16: fcvtzs w10, h11
+# FP16: fcvtzs x12, h13
+# FP16: fcvtzu w14, h15
+# FP16: fcvtzu x15, h16
+0x6a 0x1 0xf8 0x1e
+0xac 0x1 0xf8 0x9e
+0xee 0x1 0xf9 0x1e
+0xf 0x2 0xf9 0x9e
+
+# FP16: scvtf h17, w18
+# FP16: scvtf h19, x20
+# FP16: ucvtf h21, w22
+# FP16: scvtf h23, x24
+0x51 0x2 0xe2 0x1e
+0x93 0x2 0xe2 0x9e
+0xd5 0x2 0xe3 0x1e
+0x17 0x3 0xe2 0x9e
+
+# FP16: fcvtas w25, h26
+# FP16: fcvtas x27, h28
+# FP16: fcvtau w29, h30
+# FP16: fcvtau xzr, h0
+0x59 0x3 0xe4 0x1e
+0x9b 0x3 0xe4 0x9e
+0xdd 0x3 0xe5 0x1e
+0x1f 0x0 0xe5 0x9e
+
# CHECK: fcvtns w3, s31
# CHECK: fcvtns xzr, s12
# CHECK: fcvtnu wzr, s12
@@ -4172,12 +4284,16 @@
# CHECK: mrs x12, {{s3_7_c15_c1_5|S3_7_C15_C1_5}}
# CHECK: mrs x13, {{s3_2_c11_c15_7|S3_2_C11_C15_7}}
+# CHECK: mrs xzr, {{s0_0_c4_c0_0|S0_0_C4_C0_0}}
# CHECK: msr {{s3_0_c15_c0_0|S3_0_C15_C0_0}}, x12
# CHECK: msr {{s3_7_c11_c13_7|S3_7_C11_C13_7}}, x5
+# CHECK: msr {{s0_0_c4_c0_0|S0_0_C4_C0_0}}, xzr
0xac 0xf1 0x3f 0xd5
0xed 0xbf 0x3a 0xd5
+0x1f 0x40 0x20 0xd5
0x0c 0xf0 0x18 0xd5
0xe5 0xbd 0x1f 0xd5
+0x1f 0x40 0x00 0xd5
#------------------------------------------------------------------------------
# Test and branch (immediate)
diff --git a/test/MC/Disassembler/AArch64/fullfp16-neg.txt b/test/MC/Disassembler/AArch64/fullfp16-neg.txt
new file mode 100644
index 000000000000..4feb20c10939
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/fullfp16-neg.txt
@@ -0,0 +1,145 @@
+# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=+neon,-fullfp16 < %s 2>&1 | FileCheck %s
+# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=-neon,-fullfp16 < %s 2>&1 | FileCheck %s
+
+[0x41,0xc0,0xe0,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x28,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x18,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x10,0xc3,0x1f]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x48,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x68,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x58,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x78,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x90,0xc3,0x1f]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x08,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x40,0xe1,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x10,0xe3,0x1f]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x90,0xe3,0x1f]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x88,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xc0,0xe1,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x38,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x04,0xe2,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x30,0x04,0xe2,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x20,0xe2,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x28,0x20,0xe0,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x30,0x20,0xe2,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x38,0x20,0xe0,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x0c,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe4,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe4,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe5,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe5,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf0,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf0,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf1,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf1,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe0,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe0,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe1,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe1,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe8,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe8,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe9,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe9,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf8,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xd8,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf8,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xd8,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf9,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xd9,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf9,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xd9,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe2,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xc2,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe2,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xc2,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xc3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe3,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xc3,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe7,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe6,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe7,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe6,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x01,0x10,0xe8,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x01,0x10,0xe8,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0xe2,0x03,0xe7,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x40,0xe0,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x40,0xe6,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xc0,0xe7,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x40,0xe5,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x40,0xe4,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xc0,0xe4,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x40,0xe7,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xc0,0xe5,0x1e]
+# CHECK: warning: invalid instruction encoding
+
+# CHECK-NOT: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt b/test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt
new file mode 100644
index 000000000000..8b7e1c878002
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt
@@ -0,0 +1,382 @@
+# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=+neon,-fullfp16 < %s 2>&1 | FileCheck %s
+# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=-neon,-fullfp16 < %s 2>&1 | FileCheck %s
+# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=-neon,+fullfp16 < %s 2>&1 | FileCheck %s
+
+[0x00,0xf8,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xd8,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xd8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xd8,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xd8,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x10,0x22,0x0f]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x11,0x12,0x4f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x50,0x22,0x0f]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x51,0x12,0x4f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x90,0x22,0x0f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x90,0x22,0x4f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x90,0x22,0x2f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x90,0x22,0x6f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0xc2,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0xc8,0x30,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0xc8,0xb0,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0xf8,0x30,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0xf8,0xb0,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0x42,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0x42,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0x42,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0x42,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0xc2,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0xc2,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x27,0x50,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x24,0x4f,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x25,0x4c,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x27,0x5c,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x25,0x4c,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x27,0x5c,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x27,0xd0,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x24,0xcf,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x27,0xd0,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x24,0xcf,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0xdb,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0xd8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0xdb,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0xd8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0xc9,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0xcb,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0xc9,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0xcb,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0xcb,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0xc8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0xcb,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0xc8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x83,0xda,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x01,0xd9,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x83,0xda,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x01,0xd9,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x50,0xe8,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x8f,0xe8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x50,0xe8,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x8f,0xe8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x2f,0x50,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x2c,0x4f,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x2f,0x50,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x2c,0x4f,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x2d,0xcc,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x2f,0xdc,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x2d,0xcc,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x2f,0xdc,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x3f,0xd0,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x3c,0xcf,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x3d,0x4c,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x3f,0x5c,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x34,0x42,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xff,0x35,0x50,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x35,0xd6,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xa3,0x34,0xc6,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x04,0x42,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xff,0x05,0x50,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x05,0xd6,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xa3,0x04,0xc6,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x34,0x42,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x34,0x42,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x35,0xd6,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x35,0xd6,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x04,0x42,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x04,0x42,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x05,0xd6,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x05,0xd6,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0c,0x42,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0c,0x42,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0c,0xc2,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0c,0xc2,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x1d,0x17,0xd4,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x18,0x11,0x5f]
+# CHECK: warning: invalid instruction encoding
+[0x62,0x58,0x14,0x5f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x98,0x11,0x5f]
+# CHECK: warning: invalid instruction encoding
+[0x46,0x98,0x18,0x7f]
+# CHECK: warning: invalid instruction encoding
+[0x95,0xfd,0x1f,0x5f]
+# CHECK: warning: invalid instruction encoding
+[0x95,0xfd,0x1f,0x7f]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xc9,0x79,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xc9,0x79,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xb9,0x79,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xb9,0x79,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xa9,0x79,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xa9,0x79,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xa9,0xf9,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xa9,0xf9,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xb9,0xf9,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xb9,0xf9,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x25,0x4c,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xd9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xd9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x25,0x4c,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xc9,0xf8,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xc9,0xf8,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x25,0xcc,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xc9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xc9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xd9,0xf8,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xd9,0xf8,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xe9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xe9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x2d,0x4c,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x2d,0xcc,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xd4,0x1e,0x4f,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x15,0x3e,0x4d,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xb5,0x3c,0xcc,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xd3,0xd9,0xf9,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x52,0xf9,0xf9,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xd9,0xf9,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x72,0xd8,0x30,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xf8,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xf9,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xf8,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xf9,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x88,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x89,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x88,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x89,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x88,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x89,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x98,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x99,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x98,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x99,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x98,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x99,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x98,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x99,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xa8,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xa9,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xa8,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xa9,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xa8,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xa9,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xa8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xa9,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xb8,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xb9,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xb8,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xb9,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xb8,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xb9,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xb8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xb9,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xc8,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xc9,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xc8,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xc9,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xd8,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xd9,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xd8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xd9,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xf8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xf9,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+
+# CHECK-NOT: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/fullfp16-neon-arm-neg.txt b/test/MC/Disassembler/ARM/fullfp16-neon-arm-neg.txt
new file mode 100644
index 000000000000..9c8be171d287
--- /dev/null
+++ b/test/MC/Disassembler/ARM/fullfp16-neon-arm-neg.txt
@@ -0,0 +1,274 @@
+# RUN: not llvm-mc -disassemble -triple armv8a-none-eabi -mattr=-fullfp16,+neon -show-encoding < %s 2>&1 | FileCheck %s
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0d,0x11,0xf2]
+[0x44,0x0d,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0d,0x31,0xf2]
+[0x44,0x0d,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0d,0x11,0xf3]
+[0x54,0x0d,0x12,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x63,0x19,0x92,0xf2]
+[0x6e,0x89,0x9a,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0d,0x11,0xf2]
+[0x54,0x0d,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x67,0x51,0x96,0xf2]
+[0x6f,0xa1,0x9c,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0d,0x31,0xf2]
+[0x54,0x0d,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x67,0x55,0x96,0xf2]
+[0x6f,0xa5,0x9c,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0c,0x11,0xf2]
+[0x54,0x0c,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0c,0x31,0xf2]
+[0x54,0x0c,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x04,0x2e,0x13,0xf2]
+[0x48,0x4e,0x16,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x03,0x25,0xb5,0xf3]
+[0x46,0x45,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x04,0x2e,0x13,0xf3]
+[0x48,0x4e,0x16,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x83,0x24,0xb5,0xf3]
+[0xc6,0x44,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x04,0x2e,0x33,0xf3]
+[0x48,0x4e,0x36,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x03,0x24,0xb5,0xf3]
+[0x46,0x44,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x83,0x25,0xb5,0xf3]
+[0xc6,0x45,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x03,0x26,0xb5,0xf3]
+[0x46,0x46,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0e,0x11,0xf3]
+[0x54,0x0e,0x12,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0e,0x31,0xf3]
+[0x54,0x0e,0x32,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0d,0x31,0xf3]
+[0x44,0x0d,0x32,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x07,0xb5,0xf3]
+[0x42,0x07,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0f,0x11,0xf2]
+[0x44,0x0f,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0f,0x31,0xf2]
+[0x44,0x0f,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0f,0x11,0xf3]
+[0x54,0x0f,0x12,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0f,0x31,0xf3]
+[0x54,0x0f,0x32,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0d,0x11,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0f,0x11,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0f,0x31,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x05,0xb7,0xf3]
+[0x42,0x05,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0f,0x11,0xf2]
+[0x54,0x0f,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x05,0xb7,0xf3]
+[0xc2,0x05,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0f,0x31,0xf2]
+[0x54,0x0f,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x07,0xb5,0xf3]
+[0xc2,0x07,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x07,0xb7,0xf3]
+[0x81,0x07,0xb7,0xf3]
+[0x01,0x06,0xb7,0xf3]
+[0x81,0x06,0xb7,0xf3]
+[0x42,0x07,0xb7,0xf3]
+[0xc2,0x07,0xb7,0xf3]
+[0x42,0x06,0xb7,0xf3]
+[0xc2,0x06,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x00,0xb7,0xf3]
+[0x42,0x00,0xb7,0xf3]
+[0x81,0x00,0xb7,0xf3]
+[0xc2,0x00,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x03,0xb7,0xf3]
+[0x42,0x03,0xb7,0xf3]
+[0x81,0x03,0xb7,0xf3]
+[0xc2,0x03,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x01,0xb7,0xf3]
+[0x42,0x01,0xb7,0xf3]
+[0x81,0x01,0xb7,0xf3]
+[0xc2,0x01,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x02,0xb7,0xf3]
+[0x42,0x02,0xb7,0xf3]
+[0x81,0x02,0xb7,0xf3]
+[0xc2,0x02,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0x0d,0xbf,0xf2]
+[0x11,0x0d,0xbe,0xf3]
+[0x11,0x0c,0xbd,0xf2]
+[0x11,0x0c,0xbc,0xf3]
+[0x52,0x0d,0xbb,0xf2]
+[0x52,0x0d,0xba,0xf3]
+[0x52,0x0c,0xb9,0xf2]
+[0x52,0x0c,0xb8,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x05,0xb6,0xf3]
+[0x42,0x05,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x06,0xb6,0xf3]
+[0xc2,0x06,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x04,0xb6,0xf3]
+[0x42,0x04,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x07,0xb6,0xf3]
+[0xc2,0x07,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x04,0xb6,0xf3]
+[0xc2,0x04,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x05,0xb6,0xf3]
+[0xc2,0x05,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0d,0xc7,0xf2]
+[0x20,0x0f,0xc7,0xf2]
+[0x70,0x0f,0xc7,0xf2]
+
+# CHECK-NOT: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/fullfp16-neon-arm.txt b/test/MC/Disassembler/ARM/fullfp16-neon-arm.txt
new file mode 100644
index 000000000000..232bd641f059
--- /dev/null
+++ b/test/MC/Disassembler/ARM/fullfp16-neon-arm.txt
@@ -0,0 +1,309 @@
+# RUN: not llvm-mc -disassemble -triple armv8a-none-eabi -mattr=+fullfp16,+neon -show-encoding < %s 2>%t | FileCheck %s
+# RUN FileCheck %s < %t --check-prefix=STDERR
+
+# CHECK: vadd.f16 d0, d1, d2
+# CHECK: vadd.f16 q0, q1, q2
+[0x02,0x0d,0x11,0xf2]
+[0x44,0x0d,0x12,0xf2]
+
+# CHECK: vsub.f16 d0, d1, d2
+# CHECK: vsub.f16 q0, q1, q2
+[0x02,0x0d,0x31,0xf2]
+[0x44,0x0d,0x32,0xf2]
+
+# CHECK: vmul.f16 d0, d1, d2
+# CHECK: vmul.f16 q0, q1, q2
+[0x12,0x0d,0x11,0xf3]
+[0x54,0x0d,0x12,0xf3]
+
+# CHECK: vmul.f16 d1, d2, d3[2]
+# CHECK: vmul.f16 q4, q5, d6[3]
+[0x63,0x19,0x92,0xf2]
+[0x6e,0x89,0x9a,0xf3]
+
+# CHECK: vmla.f16 d0, d1, d2
+# CHECK: vmla.f16 q0, q1, q2
+[0x12,0x0d,0x11,0xf2]
+[0x54,0x0d,0x12,0xf2]
+
+# CHECK: vmla.f16 d5, d6, d7[2]
+# CHECK: vmla.f16 q5, q6, d7[3]
+[0x67,0x51,0x96,0xf2]
+[0x6f,0xa1,0x9c,0xf3]
+
+# CHECK: vmls.f16 d0, d1, d2
+# CHECK: vmls.f16 q0, q1, q2
+[0x12,0x0d,0x31,0xf2]
+[0x54,0x0d,0x32,0xf2]
+
+# CHECK: vmls.f16 d5, d6, d7[2]
+# CHECK: vmls.f16 q5, q6, d7[3]
+[0x67,0x55,0x96,0xf2]
+[0x6f,0xa5,0x9c,0xf3]
+
+# CHECK: vfma.f16 d0, d1, d2
+# CHECK: vfma.f16 q0, q1, q2
+[0x12,0x0c,0x11,0xf2]
+[0x54,0x0c,0x12,0xf2]
+
+# CHECK: vfms.f16 d0, d1, d2
+# CHECK: vfms.f16 q0, q1, q2
+[0x12,0x0c,0x31,0xf2]
+[0x54,0x0c,0x32,0xf2]
+
+# CHECK: vceq.f16 d2, d3, d4
+# CHECK: vceq.f16 q2, q3, q4
+[0x04,0x2e,0x13,0xf2]
+[0x48,0x4e,0x16,0xf2]
+
+# CHECK: vceq.f16 d2, d3, #0
+# CHECK: vceq.f16 q2, q3, #0
+[0x03,0x25,0xb5,0xf3]
+[0x46,0x45,0xb5,0xf3]
+
+# CHECK: vcge.f16 d2, d3, d4
+# CHECK: vcge.f16 q2, q3, q4
+[0x04,0x2e,0x13,0xf3]
+[0x48,0x4e,0x16,0xf3]
+
+# CHECK: vcge.f16 d2, d3, #0
+# CHECK: vcge.f16 q2, q3, #0
+[0x83,0x24,0xb5,0xf3]
+[0xc6,0x44,0xb5,0xf3]
+
+# CHECK: vcgt.f16 d2, d3, d4
+# CHECK: vcgt.f16 q2, q3, q4
+[0x04,0x2e,0x33,0xf3]
+[0x48,0x4e,0x36,0xf3]
+
+# CHECK: vcgt.f16 d2, d3, #0
+# CHECK: vcgt.f16 q2, q3, #0
+[0x03,0x24,0xb5,0xf3]
+[0x46,0x44,0xb5,0xf3]
+
+# CHECK: vcle.f16 d2, d3, #0
+# CHECK: vcle.f16 q2, q3, #0
+[0x83,0x25,0xb5,0xf3]
+[0xc6,0x45,0xb5,0xf3]
+
+# CHECK: vclt.f16 d2, d3, #0
+# CHECK: vclt.f16 q2, q3, #0
+[0x03,0x26,0xb5,0xf3]
+[0x46,0x46,0xb5,0xf3]
+
+# CHECK: vacge.f16 d0, d1, d2
+# CHECK: vacge.f16 q0, q1, q2
+[0x12,0x0e,0x11,0xf3]
+[0x54,0x0e,0x12,0xf3]
+
+# CHECK: vacgt.f16 d0, d1, d2
+# CHECK: vacgt.f16 q0, q1, q2
+[0x12,0x0e,0x31,0xf3]
+[0x54,0x0e,0x32,0xf3]
+
+# CHECK: vabd.f16 d0, d1, d2
+# CHECK: vabd.f16 q0, q1, q2
+[0x02,0x0d,0x31,0xf3]
+[0x44,0x0d,0x32,0xf3]
+
+# CHECK: vabs.f16 d0, d1
+# CHECK: vabs.f16 q0, q1
+[0x01,0x07,0xb5,0xf3]
+[0x42,0x07,0xb5,0xf3]
+
+# CHECK: vmax.f16 d0, d1, d2
+# CHECK: vmax.f16 q0, q1, q2
+[0x02,0x0f,0x11,0xf2]
+[0x44,0x0f,0x12,0xf2]
+
+# CHECK: vmin.f16 d0, d1, d2
+# CHECK: vmin.f16 q0, q1, q2
+[0x02,0x0f,0x31,0xf2]
+[0x44,0x0f,0x32,0xf2]
+
+# CHECK: vmaxnm.f16 d0, d1, d2
+# CHECK: vmaxnm.f16 q0, q1, q2
+[0x12,0x0f,0x11,0xf3]
+[0x54,0x0f,0x12,0xf3]
+
+# CHECK: vminnm.f16 d0, d1, d2
+# CHECK: vminnm.f16 q0, q1, q2
+[0x12,0x0f,0x31,0xf3]
+[0x54,0x0f,0x32,0xf3]
+
+# CHECK: vpadd.f16 d0, d1, d2
+[0x02,0x0d,0x11,0xf3]
+
+# CHECK: vpmax.f16 d0, d1, d2
+[0x02,0x0f,0x11,0xf3]
+
+# CHECK: vpmin.f16 d0, d1, d2
+[0x02,0x0f,0x31,0xf3]
+
+# CHECK: vrecpe.f16 d0, d1
+# CHECK: vrecpe.f16 q0, q1
+[0x01,0x05,0xb7,0xf3]
+[0x42,0x05,0xb7,0xf3]
+
+# CHECK: vrecps.f16 d0, d1, d2
+# CHECK: vrecps.f16 q0, q1, q2
+[0x12,0x0f,0x11,0xf2]
+[0x54,0x0f,0x12,0xf2]
+
+# CHECK: vrsqrte.f16 d0, d1
+# CHECK: vrsqrte.f16 q0, q1
+[0x81,0x05,0xb7,0xf3]
+[0xc2,0x05,0xb7,0xf3]
+
+# CHECK: vrsqrts.f16 d0, d1, d2
+# CHECK: vrsqrts.f16 q0, q1, q2
+[0x12,0x0f,0x31,0xf2]
+[0x54,0x0f,0x32,0xf2]
+
+# CHECK: vneg.f16 d0, d1
+# CHECK: vneg.f16 q0, q1
+[0x81,0x07,0xb5,0xf3]
+[0xc2,0x07,0xb5,0xf3]
+
+# CHECK: vcvt.s16.f16 d0, d1
+# CHECK: vcvt.u16.f16 d0, d1
+# CHECK: vcvt.f16.s16 d0, d1
+# CHECK: vcvt.f16.u16 d0, d1
+# CHECK: vcvt.s16.f16 q0, q1
+# CHECK: vcvt.u16.f16 q0, q1
+# CHECK: vcvt.f16.s16 q0, q1
+# CHECK: vcvt.f16.u16 q0, q1
+[0x01,0x07,0xb7,0xf3]
+[0x81,0x07,0xb7,0xf3]
+[0x01,0x06,0xb7,0xf3]
+[0x81,0x06,0xb7,0xf3]
+[0x42,0x07,0xb7,0xf3]
+[0xc2,0x07,0xb7,0xf3]
+[0x42,0x06,0xb7,0xf3]
+[0xc2,0x06,0xb7,0xf3]
+
+# CHECK: vcvta.s16.f16 d0, d1
+# CHECK: vcvta.s16.f16 q0, q1
+# CHECK: vcvta.u16.f16 d0, d1
+# CHECK: vcvta.u16.f16 q0, q1
+[0x01,0x00,0xb7,0xf3]
+[0x42,0x00,0xb7,0xf3]
+[0x81,0x00,0xb7,0xf3]
+[0xc2,0x00,0xb7,0xf3]
+
+# CHECK: vcvtm.s16.f16 d0, d1
+# CHECK: vcvtm.s16.f16 q0, q1
+# CHECK: vcvtm.u16.f16 d0, d1
+# CHECK: vcvtm.u16.f16 q0, q1
+[0x01,0x03,0xb7,0xf3]
+[0x42,0x03,0xb7,0xf3]
+[0x81,0x03,0xb7,0xf3]
+[0xc2,0x03,0xb7,0xf3]
+
+# CHECK: vcvtn.s16.f16 d0, d1
+# CHECK: vcvtn.s16.f16 q0, q1
+# CHECK: vcvtn.u16.f16 d0, d1
+# CHECK: vcvtn.u16.f16 q0, q1
+[0x01,0x01,0xb7,0xf3]
+[0x42,0x01,0xb7,0xf3]
+[0x81,0x01,0xb7,0xf3]
+[0xc2,0x01,0xb7,0xf3]
+
+# CHECK: vcvtp.s16.f16 d0, d1
+# CHECK: vcvtp.s16.f16 q0, q1
+# CHECK: vcvtp.u16.f16 d0, d1
+# CHECK: vcvtp.u16.f16 q0, q1
+[0x01,0x02,0xb7,0xf3]
+[0x42,0x02,0xb7,0xf3]
+[0x81,0x02,0xb7,0xf3]
+[0xc2,0x02,0xb7,0xf3]
+
+# CHECK: vcvt.s16.f16 d0, d1, #1
+# CHECK: vcvt.u16.f16 d0, d1, #2
+# CHECK: vcvt.f16.s16 d0, d1, #3
+# CHECK: vcvt.f16.u16 d0, d1, #4
+# CHECK: vcvt.s16.f16 q0, q1, #5
+# CHECK: vcvt.u16.f16 q0, q1, #6
+# CHECK: vcvt.f16.s16 q0, q1, #7
+# CHECK: vcvt.f16.u16 q0, q1, #8
+[0x11,0x0d,0xbf,0xf2]
+[0x11,0x0d,0xbe,0xf3]
+[0x11,0x0c,0xbd,0xf2]
+[0x11,0x0c,0xbc,0xf3]
+[0x52,0x0d,0xbb,0xf2]
+[0x52,0x0d,0xba,0xf3]
+[0x52,0x0c,0xb9,0xf2]
+[0x52,0x0c,0xb8,0xf3]
+
+# CHECK: vrinta.f16 d0, d1
+# CHECK: vrinta.f16 q0, q1
+[0x01,0x05,0xb6,0xf3]
+[0x42,0x05,0xb6,0xf3]
+
+# CHECK: vrintm.f16 d0, d1
+# CHECK: vrintm.f16 q0, q1
+[0x81,0x06,0xb6,0xf3]
+[0xc2,0x06,0xb6,0xf3]
+
+# CHECK: vrintn.f16 d0, d1
+# CHECK: vrintn.f16 q0, q1
+[0x01,0x04,0xb6,0xf3]
+[0x42,0x04,0xb6,0xf3]
+
+# CHECK: vrintp.f16 d0, d1
+# CHECK: vrintp.f16 q0, q1
+[0x81,0x07,0xb6,0xf3]
+[0xc2,0x07,0xb6,0xf3]
+
+# CHECK: vrintx.f16 d0, d1
+# CHECK: vrintx.f16 q0, q1
+[0x81,0x04,0xb6,0xf3]
+[0xc2,0x04,0xb6,0xf3]
+
+# CHECK: vrintz.f16 d0, d1
+# CHECK: vrintz.f16 q0, q1
+[0x81,0x05,0xb6,0xf3]
+[0xc2,0x05,0xb6,0xf3]
+
+# Existing VMOV(immediate, Advanced SIMD) instructions within the encoding
+# space of the new FP16 VCVT(between floating - point and fixed - point,
+# Advanced SIMD):
+# 4 -- Q
+# 2 -- VMOV op
+# 1 -- VCVT op
+# 2 -- VCVT FP size
+[0x10,0x0c,0xc7,0xf2]
+[0x10,0x0d,0xc7,0xf2]
+[0x10,0x0e,0xc7,0xf2]
+[0x10,0x0f,0xc7,0xf2]
+[0x20,0x0c,0xc7,0xf2]
+[0x20,0x0d,0xc7,0xf2]
+[0x20,0x0e,0xc7,0xf2]
+[0x20,0x0f,0xc7,0xf2]
+[0x50,0x0c,0xc7,0xf2]
+[0x50,0x0d,0xc7,0xf2]
+[0x50,0x0e,0xc7,0xf2]
+[0x50,0x0f,0xc7,0xf2]
+[0x70,0x0c,0xc7,0xf2]
+[0x70,0x0d,0xc7,0xf2]
+[0x70,0x0e,0xc7,0xf2]
+[0x70,0x0f,0xc7,0xf2]
+# CHECK: vmov.i32 d16, #0x70ff
+# CHECK: vmov.i32 d16, #0x70ffff
+# CHECK: vmov.i8 d16, #0x70
+# CHECK: vmov.f32 d16, #1.000000e+00
+# CHECK: vmull.s8 q8, d7, d16
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x20,0x0d,0xc7,0xf2]
+# CHECK: vmull.p8 q8, d7, d16
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x20,0x0f,0xc7,0xf2]
+# CHECK: vmov.i32 q8, #0x70ff
+# CHECK: vmov.i32 q8, #0x70ffff
+# CHECK: vmov.i8 q8, #0x70
+# CHECK: vmov.f32 q8, #1.000000e+00
+# CHECK: vmvn.i32 q8, #0x70ff
+# CHECK: vmvn.i32 q8, #0x70ffff
+# CHECK: vmov.i64 q8, #0xffffff0000000
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x70,0x0f,0xc7,0xf2]
diff --git a/test/MC/Disassembler/ARM/fullfp16-neon-thumb-neg.txt b/test/MC/Disassembler/ARM/fullfp16-neon-thumb-neg.txt
new file mode 100644
index 000000000000..f7561bb4096e
--- /dev/null
+++ b/test/MC/Disassembler/ARM/fullfp16-neon-thumb-neg.txt
@@ -0,0 +1,274 @@
+# RUN: not llvm-mc -disassemble -triple thumbv8a-none-eabi -mattr=-fullfp16,+neon,+thumb-mode -show-encoding < %s 2>&1 | FileCheck %s
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x02,0x0d]
+[0x12,0xef,0x44,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x02,0x0d]
+[0x32,0xef,0x44,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x12,0x0d]
+[0x12,0xff,0x54,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x92,0xef,0x63,0x19]
+[0x9a,0xff,0x6e,0x89]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x12,0x0d]
+[0x12,0xef,0x54,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x96,0xef,0x67,0x51]
+[0x9c,0xff,0x6f,0xa1]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x12,0x0d]
+[0x32,0xef,0x54,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x96,0xef,0x67,0x55]
+[0x9c,0xff,0x6f,0xa5]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x12,0x0c]
+[0x12,0xef,0x54,0x0c]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x12,0x0c]
+[0x32,0xef,0x54,0x0c]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x13,0xef,0x04,0x2e]
+[0x16,0xef,0x48,0x4e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x03,0x25]
+[0xb5,0xff,0x46,0x45]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x13,0xff,0x04,0x2e]
+[0x16,0xff,0x48,0x4e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x83,0x24]
+[0xb5,0xff,0xc6,0x44]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x33,0xff,0x04,0x2e]
+[0x36,0xff,0x48,0x4e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x03,0x24]
+[0xb5,0xff,0x46,0x44]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x83,0x25]
+[0xb5,0xff,0xc6,0x45]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x03,0x26]
+[0xb5,0xff,0x46,0x46]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x12,0x0e]
+[0x12,0xff,0x54,0x0e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xff,0x12,0x0e]
+[0x32,0xff,0x54,0x0e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xff,0x02,0x0d]
+[0x32,0xff,0x44,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x01,0x07]
+[0xb5,0xff,0x42,0x07]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x02,0x0f]
+[0x12,0xef,0x44,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x02,0x0f]
+[0x32,0xef,0x44,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x12,0x0f]
+[0x12,0xff,0x54,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xff,0x12,0x0f]
+[0x32,0xff,0x54,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x02,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x02,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+[0x31,0xff,0x02,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x05]
+[0xb7,0xff,0x42,0x05]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x12,0x0f]
+[0x12,0xef,0x54,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x81,0x05]
+[0xb7,0xff,0xc2,0x05]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x12,0x0f]
+[0x32,0xef,0x54,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x81,0x07]
+[0xb5,0xff,0xc2,0x07]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x07]
+[0xb7,0xff,0x81,0x07]
+[0xb7,0xff,0x01,0x06]
+[0xb7,0xff,0x81,0x06]
+[0xb7,0xff,0x42,0x07]
+[0xb7,0xff,0xc2,0x07]
+[0xb7,0xff,0x42,0x06]
+[0xb7,0xff,0xc2,0x06]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x00]
+[0xb7,0xff,0x42,0x00]
+[0xb7,0xff,0x81,0x00]
+[0xb7,0xff,0xc2,0x00]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x03]
+[0xb7,0xff,0x42,0x03]
+[0xb7,0xff,0x81,0x03]
+[0xb7,0xff,0xc2,0x03]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x01]
+[0xb7,0xff,0x42,0x01]
+[0xb7,0xff,0x81,0x01]
+[0xb7,0xff,0xc2,0x01]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x02]
+[0xb7,0xff,0x42,0x02]
+[0xb7,0xff,0x81,0x02]
+[0xb7,0xff,0xc2,0x02]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xbf,0xef,0x11,0x0d]
+[0xbe,0xff,0x11,0x0d]
+[0xbd,0xef,0x11,0x0c]
+[0xbc,0xff,0x11,0x0c]
+[0xbb,0xef,0x52,0x0d]
+[0xba,0xff,0x52,0x0d]
+[0xb9,0xef,0x52,0x0c]
+[0xb8,0xff,0x52,0x0c]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x01,0x05]
+[0xb6,0xff,0x42,0x05]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x81,0x06]
+[0xb6,0xff,0xc2,0x06]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x01,0x04]
+[0xb6,0xff,0x42,0x04]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x81,0x07]
+[0xb6,0xff,0xc2,0x07]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x81,0x04]
+[0xb6,0xff,0xc2,0x04]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x81,0x05]
+[0xb6,0xff,0xc2,0x05]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xc7,0xef,0x20,0x0d]
+[0xc7,0xef,0x20,0x0f]
+[0xc7,0xef,0x70,0x0f]
+
+# CHECK-NOT: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/fullfp16-neon-thumb.txt b/test/MC/Disassembler/ARM/fullfp16-neon-thumb.txt
new file mode 100644
index 000000000000..a776232b7147
--- /dev/null
+++ b/test/MC/Disassembler/ARM/fullfp16-neon-thumb.txt
@@ -0,0 +1,309 @@
+# RUN: not llvm-mc -disassemble -triple thumbv8a-none-eabi -mattr=+fullfp16,+neon,+thumb-mode -show-encoding < %s 2>%t | FileCheck %s
+# RUN FileCheck %s < %t --check-prefix=STDERR
+
+# CHECK: vadd.f16 d0, d1, d2
+# CHECK: vadd.f16 q0, q1, q2
+[0x11,0xef,0x02,0x0d]
+[0x12,0xef,0x44,0x0d]
+
+# CHECK: vsub.f16 d0, d1, d2
+# CHECK: vsub.f16 q0, q1, q2
+[0x31,0xef,0x02,0x0d]
+[0x32,0xef,0x44,0x0d]
+
+# CHECK: vmul.f16 d0, d1, d2
+# CHECK: vmul.f16 q0, q1, q2
+[0x11,0xff,0x12,0x0d]
+[0x12,0xff,0x54,0x0d]
+
+# CHECK: vmul.f16 d1, d2, d3[2]
+# CHECK: vmul.f16 q4, q5, d6[3]
+[0x92,0xef,0x63,0x19]
+[0x9a,0xff,0x6e,0x89]
+
+# CHECK: vmla.f16 d0, d1, d2
+# CHECK: vmla.f16 q0, q1, q2
+[0x11,0xef,0x12,0x0d]
+[0x12,0xef,0x54,0x0d]
+
+# CHECK: vmla.f16 d5, d6, d7[2]
+# CHECK: vmla.f16 q5, q6, d7[3]
+[0x96,0xef,0x67,0x51]
+[0x9c,0xff,0x6f,0xa1]
+
+# CHECK: vmls.f16 d0, d1, d2
+# CHECK: vmls.f16 q0, q1, q2
+[0x31,0xef,0x12,0x0d]
+[0x32,0xef,0x54,0x0d]
+
+# CHECK: vmls.f16 d5, d6, d7[2]
+# CHECK: vmls.f16 q5, q6, d7[3]
+[0x96,0xef,0x67,0x55]
+[0x9c,0xff,0x6f,0xa5]
+
+# CHECK: vfma.f16 d0, d1, d2
+# CHECK: vfma.f16 q0, q1, q2
+[0x11,0xef,0x12,0x0c]
+[0x12,0xef,0x54,0x0c]
+
+# CHECK: vfms.f16 d0, d1, d2
+# CHECK: vfms.f16 q0, q1, q2
+[0x31,0xef,0x12,0x0c]
+[0x32,0xef,0x54,0x0c]
+
+# CHECK: vceq.f16 d2, d3, d4
+# CHECK: vceq.f16 q2, q3, q4
+[0x13,0xef,0x04,0x2e]
+[0x16,0xef,0x48,0x4e]
+
+# CHECK: vceq.f16 d2, d3, #0
+# CHECK: vceq.f16 q2, q3, #0
+[0xb5,0xff,0x03,0x25]
+[0xb5,0xff,0x46,0x45]
+
+# CHECK: vcge.f16 d2, d3, d4
+# CHECK: vcge.f16 q2, q3, q4
+[0x13,0xff,0x04,0x2e]
+[0x16,0xff,0x48,0x4e]
+
+# CHECK: vcge.f16 d2, d3, #0
+# CHECK: vcge.f16 q2, q3, #0
+[0xb5,0xff,0x83,0x24]
+[0xb5,0xff,0xc6,0x44]
+
+# CHECK: vcgt.f16 d2, d3, d4
+# CHECK: vcgt.f16 q2, q3, q4
+[0x33,0xff,0x04,0x2e]
+[0x36,0xff,0x48,0x4e]
+
+# CHECK: vcgt.f16 d2, d3, #0
+# CHECK: vcgt.f16 q2, q3, #0
+[0xb5,0xff,0x03,0x24]
+[0xb5,0xff,0x46,0x44]
+
+# CHECK: vcle.f16 d2, d3, #0
+# CHECK: vcle.f16 q2, q3, #0
+[0xb5,0xff,0x83,0x25]
+[0xb5,0xff,0xc6,0x45]
+
+# CHECK: vclt.f16 d2, d3, #0
+# CHECK: vclt.f16 q2, q3, #0
+[0xb5,0xff,0x03,0x26]
+[0xb5,0xff,0x46,0x46]
+
+# CHECK: vacge.f16 d0, d1, d2
+# CHECK: vacge.f16 q0, q1, q2
+[0x11,0xff,0x12,0x0e]
+[0x12,0xff,0x54,0x0e]
+
+# CHECK: vacgt.f16 d0, d1, d2
+# CHECK: vacgt.f16 q0, q1, q2
+[0x31,0xff,0x12,0x0e]
+[0x32,0xff,0x54,0x0e]
+
+# CHECK: vabd.f16 d0, d1, d2
+# CHECK: vabd.f16 q0, q1, q2
+[0x31,0xff,0x02,0x0d]
+[0x32,0xff,0x44,0x0d]
+
+# CHECK: vabs.f16 d0, d1
+# CHECK: vabs.f16 q0, q1
+[0xb5,0xff,0x01,0x07]
+[0xb5,0xff,0x42,0x07]
+
+# CHECK: vmax.f16 d0, d1, d2
+# CHECK: vmax.f16 q0, q1, q2
+[0x11,0xef,0x02,0x0f]
+[0x12,0xef,0x44,0x0f]
+
+# CHECK: vmin.f16 d0, d1, d2
+# CHECK: vmin.f16 q0, q1, q2
+[0x31,0xef,0x02,0x0f]
+[0x32,0xef,0x44,0x0f]
+
+# CHECK: vmaxnm.f16 d0, d1, d2
+# CHECK: vmaxnm.f16 q0, q1, q2
+[0x11,0xff,0x12,0x0f]
+[0x12,0xff,0x54,0x0f]
+
+# CHECK: vminnm.f16 d0, d1, d2
+# CHECK: vminnm.f16 q0, q1, q2
+[0x31,0xff,0x12,0x0f]
+[0x32,0xff,0x54,0x0f]
+
+# CHECK: vpadd.f16 d0, d1, d2
+[0x11,0xff,0x02,0x0d]
+
+# CHECK: vpmax.f16 d0, d1, d2
+[0x11,0xff,0x02,0x0f]
+
+# CHECK: vpmin.f16 d0, d1, d2
+[0x31,0xff,0x02,0x0f]
+
+# CHECK: vrecpe.f16 d0, d1
+# CHECK: vrecpe.f16 q0, q1
+[0xb7,0xff,0x01,0x05]
+[0xb7,0xff,0x42,0x05]
+
+# CHECK: vrecps.f16 d0, d1, d2
+# CHECK: vrecps.f16 q0, q1, q2
+[0x11,0xef,0x12,0x0f]
+[0x12,0xef,0x54,0x0f]
+
+# CHECK: vrsqrte.f16 d0, d1
+# CHECK: vrsqrte.f16 q0, q1
+[0xb7,0xff,0x81,0x05]
+[0xb7,0xff,0xc2,0x05]
+
+# CHECK: vrsqrts.f16 d0, d1, d2
+# CHECK: vrsqrts.f16 q0, q1, q2
+[0x31,0xef,0x12,0x0f]
+[0x32,0xef,0x54,0x0f]
+
+# CHECK: vneg.f16 d0, d1
+# CHECK: vneg.f16 q0, q1
+[0xb5,0xff,0x81,0x07]
+[0xb5,0xff,0xc2,0x07]
+
+# CHECK: vcvt.s16.f16 d0, d1
+# CHECK: vcvt.u16.f16 d0, d1
+# CHECK: vcvt.f16.s16 d0, d1
+# CHECK: vcvt.f16.u16 d0, d1
+# CHECK: vcvt.s16.f16 q0, q1
+# CHECK: vcvt.u16.f16 q0, q1
+# CHECK: vcvt.f16.s16 q0, q1
+# CHECK: vcvt.f16.u16 q0, q1
+[0xb7,0xff,0x01,0x07]
+[0xb7,0xff,0x81,0x07]
+[0xb7,0xff,0x01,0x06]
+[0xb7,0xff,0x81,0x06]
+[0xb7,0xff,0x42,0x07]
+[0xb7,0xff,0xc2,0x07]
+[0xb7,0xff,0x42,0x06]
+[0xb7,0xff,0xc2,0x06]
+
+# CHECK: vcvta.s16.f16 d0, d1
+# CHECK: vcvta.s16.f16 q0, q1
+# CHECK: vcvta.u16.f16 d0, d1
+# CHECK: vcvta.u16.f16 q0, q1
+[0xb7,0xff,0x01,0x00]
+[0xb7,0xff,0x42,0x00]
+[0xb7,0xff,0x81,0x00]
+[0xb7,0xff,0xc2,0x00]
+
+# CHECK: vcvtm.s16.f16 d0, d1
+# CHECK: vcvtm.s16.f16 q0, q1
+# CHECK: vcvtm.u16.f16 d0, d1
+# CHECK: vcvtm.u16.f16 q0, q1
+[0xb7,0xff,0x01,0x03]
+[0xb7,0xff,0x42,0x03]
+[0xb7,0xff,0x81,0x03]
+[0xb7,0xff,0xc2,0x03]
+
+# CHECK: vcvtn.s16.f16 d0, d1
+# CHECK: vcvtn.s16.f16 q0, q1
+# CHECK: vcvtn.u16.f16 d0, d1
+# CHECK: vcvtn.u16.f16 q0, q1
+[0xb7,0xff,0x01,0x01]
+[0xb7,0xff,0x42,0x01]
+[0xb7,0xff,0x81,0x01]
+[0xb7,0xff,0xc2,0x01]
+
+# CHECK: vcvtp.s16.f16 d0, d1
+# CHECK: vcvtp.s16.f16 q0, q1
+# CHECK: vcvtp.u16.f16 d0, d1
+# CHECK: vcvtp.u16.f16 q0, q1
+[0xb7,0xff,0x01,0x02]
+[0xb7,0xff,0x42,0x02]
+[0xb7,0xff,0x81,0x02]
+[0xb7,0xff,0xc2,0x02]
+
+# CHECK: vcvt.s16.f16 d0, d1, #1
+# CHECK: vcvt.u16.f16 d0, d1, #2
+# CHECK: vcvt.f16.s16 d0, d1, #3
+# CHECK: vcvt.f16.u16 d0, d1, #4
+# CHECK: vcvt.s16.f16 q0, q1, #5
+# CHECK: vcvt.u16.f16 q0, q1, #6
+# CHECK: vcvt.f16.s16 q0, q1, #7
+# CHECK: vcvt.f16.u16 q0, q1, #8
+[0xbf,0xef,0x11,0x0d]
+[0xbe,0xff,0x11,0x0d]
+[0xbd,0xef,0x11,0x0c]
+[0xbc,0xff,0x11,0x0c]
+[0xbb,0xef,0x52,0x0d]
+[0xba,0xff,0x52,0x0d]
+[0xb9,0xef,0x52,0x0c]
+[0xb8,0xff,0x52,0x0c]
+
+# CHECK: vrinta.f16 d0, d1
+# CHECK: vrinta.f16 q0, q1
+[0xb6,0xff,0x01,0x05]
+[0xb6,0xff,0x42,0x05]
+
+# CHECK: vrintm.f16 d0, d1
+# CHECK: vrintm.f16 q0, q1
+[0xb6,0xff,0x81,0x06]
+[0xb6,0xff,0xc2,0x06]
+
+# CHECK: vrintn.f16 d0, d1
+# CHECK: vrintn.f16 q0, q1
+[0xb6,0xff,0x01,0x04]
+[0xb6,0xff,0x42,0x04]
+
+# CHECK: vrintp.f16 d0, d1
+# CHECK: vrintp.f16 q0, q1
+[0xb6,0xff,0x81,0x07]
+[0xb6,0xff,0xc2,0x07]
+
+# CHECK: vrintx.f16 d0, d1
+# CHECK: vrintx.f16 q0, q1
+[0xb6,0xff,0x81,0x04]
+[0xb6,0xff,0xc2,0x04]
+
+# CHECK: vrintz.f16 d0, d1
+# CHECK: vrintz.f16 q0, q1
+[0xb6,0xff,0x81,0x05]
+[0xb6,0xff,0xc2,0x05]
+
+# Existing VMOV(immediate, Advanced SIMD) instructions within the encoding
+# space of the new FP16 VCVT(between floating - point and fixed - point,
+# Advanced SIMD):
+# 1 -- VCVT op
+# 2 -- VCVT FP size
+# 4 -- Q
+# 2 -- VMOV op
+[0xc7,0xef,0x10,0x0c]
+[0xc7,0xef,0x10,0x0d]
+[0xc7,0xef,0x10,0x0e]
+[0xc7,0xef,0x10,0x0f]
+[0xc7,0xef,0x20,0x0c]
+[0xc7,0xef,0x20,0x0d]
+[0xc7,0xef,0x20,0x0e]
+[0xc7,0xef,0x20,0x0f]
+[0xc7,0xef,0x50,0x0c]
+[0xc7,0xef,0x50,0x0d]
+[0xc7,0xef,0x50,0x0e]
+[0xc7,0xef,0x50,0x0f]
+[0xc7,0xef,0x70,0x0c]
+[0xc7,0xef,0x70,0x0d]
+[0xc7,0xef,0x70,0x0e]
+[0xc7,0xef,0x70,0x0f]
+# CHECK: vmov.i32 d16, #0x70ff
+# CHECK: vmov.i32 d16, #0x70ffff
+# CHECK: vmov.i8 d16, #0x70
+# CHECK: vmov.f32 d16, #1.000000e+00
+# CHECK: vmull.s8 q8, d7, d16
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x20,0x0d,0xc7,0xf2]
+# CHECK: vmull.p8 q8, d7, d16
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x20,0x0f,0xc7,0xf2]
+# CHECK: vmov.i32 q8, #0x70ff
+# CHECK: vmov.i32 q8, #0x70ffff
+# CHECK: vmov.i8 q8, #0x70
+# CHECK: vmov.f32 q8, #1.000000e+00
+# CHECK: vmvn.i32 q8, #0x70ff
+# CHECK: vmvn.i32 q8, #0x70ffff
+# CHECK: vmov.i64 q8, #0xffffff0000000
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x70,0x0f,0xc7,0xf2]
diff --git a/test/MC/Disassembler/ARM/invalid-thumbv7.txt b/test/MC/Disassembler/ARM/invalid-thumbv7.txt
index 5257633e579f..512fc5d237e1 100644
--- a/test/MC/Disassembler/ARM/invalid-thumbv7.txt
+++ b/test/MC/Disassembler/ARM/invalid-thumbv7.txt
@@ -1,4 +1,5 @@
-# RUN: not llvm-mc -disassemble %s -mcpu cortex-a8 -triple thumbv7 2>&1 | FileCheck %s
+# RUN: not llvm-mc -disassemble %s -mcpu cortex-a8 -triple thumbv7 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V7
+# RUN: not llvm-mc -disassemble %s -mcpu cortex-a53 -triple thumbv8 2>&1 | FileCheck %s
# This file is checking Thumbv7 encodings which are globally invalid, usually due
# to the constraints of the instructions not being met. For example invalid
@@ -359,10 +360,22 @@
# 32-bit Thumb STM instructions cannot have a writeback register which appears
# in the list.
-[0xa1,0xe8,0x07,0x04]
+[0xa1 0xe8 0x07 0x04]
# CHECK: warning: potentially undefined instruction encoding
-# CHECK-NEXT: [0xa1,0xe8,0x07,0x04]
+# CHECK-NEXT: [0xa1 0xe8 0x07 0x04]
-[0x21,0xe9,0x07,0x04]
+[0x21 0xe9 0x07 0x04]
# CHECK: warning: potentially undefined instruction encoding
-# CHECK-NEXT: [0x21,0xe9,0x07,0x04]
+# CHECK-NEXT: [0x21 0xe9 0x07 0x04]
+
+#------------------------------------------------------------------------------
+# SP is invalid as rGPR before ARMv8
+#------------------------------------------------------------------------------
+
+[0x00 0xf0 0x00 0x0d]
+# CHECK-V7: warning: potentially undefined instruction encoding
+# CHECK-V7-NEXT: [0x00 0xf0 0x00 0x0d]
+
+[0x63 0xeb 0x2d 0x46]
+# CHECK-V7: warning: potentially undefined instruction encoding
+# CHECK-V7-NEXT: [0x63 0xeb 0x2d 0x46]
diff --git a/test/MC/Disassembler/ARM/thumb-v8.txt b/test/MC/Disassembler/ARM/thumb-v8.txt
index eb5ffea7d667..1bcf654a4795 100644
--- a/test/MC/Disassembler/ARM/thumb-v8.txt
+++ b/test/MC/Disassembler/ARM/thumb-v8.txt
@@ -1,4 +1,5 @@
-# RUN: llvm-mc -disassemble -triple thumbv8 -mattr=+db -show-encoding < %s | FileCheck %s
+# RUN: llvm-mc -disassemble -triple thumbv8 -mattr=+db -show-encoding 2>%t < %s | FileCheck %s
+# RUN: FileCheck -allow-empty -check-prefix=STDERR < %t %s
0x80 0xba
# CHECK: hlt #0
@@ -26,3 +27,10 @@
# CHECK: dmb oshld
# CHECK: dmb nshld
# CHECK: dmb ld
+
+[0x00 0xf0 0x00 0x0d]
+[0x63 0xeb 0x2d 0x46]
+# CHECK: and sp, r0, #0
+# CHECK: sbc.w r6, r3, sp, asr #16
+
+# STDERR-NOT: warning
diff --git a/test/MC/Disassembler/Hexagon/invalid_packet.txt b/test/MC/Disassembler/Hexagon/invalid_packet.txt
new file mode 100644
index 000000000000..8ce0ca90dc5c
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/invalid_packet.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple=hexagon -disassemble < %s 2>&1 | FileCheck %s
+
+#CHECK: warning: invalid instruction encoding
+0x00 0x40 0x20 0x6c 0x00 0xc0 0x00 0x7f \ No newline at end of file
diff --git a/test/MC/Disassembler/Hexagon/j.txt b/test/MC/Disassembler/Hexagon/j.txt
index 0c2cc7a4cf34..661670e2a614 100644
--- a/test/MC/Disassembler/Hexagon/j.txt
+++ b/test/MC/Disassembler/Hexagon/j.txt
@@ -11,149 +11,149 @@
# Compare and jump
0x00 0xc0 0x89 0x11
-# CHECK: p0 = cmp.eq(r9,#-1); if (p0.new) jump:nt
+# CHECK: p0 = cmp.eq(r17,#-1); if (p0.new) jump:nt
0x00 0xc1 0x89 0x11
-# CHECK: p0 = cmp.gt(r9,#-1); if (p0.new) jump:nt
+# CHECK: p0 = cmp.gt(r17,#-1); if (p0.new) jump:nt
0x00 0xc3 0x89 0x11
-# CHECK: p0 = tstbit(r9, #0); if (p0.new) jump:nt
+# CHECK: p0 = tstbit(r17, #0); if (p0.new) jump:nt
0x00 0xe0 0x89 0x11
-# CHECK: p0 = cmp.eq(r9,#-1); if (p0.new) jump:t
+# CHECK: p0 = cmp.eq(r17,#-1); if (p0.new) jump:t
0x00 0xe1 0x89 0x11
-# CHECK: p0 = cmp.gt(r9,#-1); if (p0.new) jump:t
+# CHECK: p0 = cmp.gt(r17,#-1); if (p0.new) jump:t
0x00 0xe3 0x89 0x11
-# CHECK: p0 = tstbit(r9, #0); if (p0.new) jump:t
+# CHECK: p0 = tstbit(r17, #0); if (p0.new) jump:t
0x00 0xc0 0xc9 0x11
-# CHECK: p0 = cmp.eq(r9,#-1); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.eq(r17,#-1); if (!p0.new) jump:nt
0x00 0xc1 0xc9 0x11
-# CHECK: p0 = cmp.gt(r9,#-1); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.gt(r17,#-1); if (!p0.new) jump:nt
0x00 0xc3 0xc9 0x11
-# CHECK: p0 = tstbit(r9, #0); if (!p0.new) jump:nt
+# CHECK: p0 = tstbit(r17, #0); if (!p0.new) jump:nt
0x00 0xe0 0xc9 0x11
-# CHECK: p0 = cmp.eq(r9,#-1); if (!p0.new) jump:t
+# CHECK: p0 = cmp.eq(r17,#-1); if (!p0.new) jump:t
0x00 0xe1 0xc9 0x11
-# CHECK: p0 = cmp.gt(r9,#-1); if (!p0.new) jump:t
+# CHECK: p0 = cmp.gt(r17,#-1); if (!p0.new) jump:t
0x00 0xe3 0xc9 0x11
-# CHECK: p0 = tstbit(r9, #0); if (!p0.new) jump:t
+# CHECK: p0 = tstbit(r17, #0); if (!p0.new) jump:t
0x00 0xd5 0x09 0x10
-# CHECK: p0 = cmp.eq(r9, #21); if (p0.new) jump:nt
+# CHECK: p0 = cmp.eq(r17, #21); if (p0.new) jump:nt
0x00 0xf5 0x09 0x10
-# CHECK: p0 = cmp.eq(r9, #21); if (p0.new) jump:t
+# CHECK: p0 = cmp.eq(r17, #21); if (p0.new) jump:t
0x00 0xd5 0x49 0x10
-# CHECK: p0 = cmp.eq(r9, #21); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.eq(r17, #21); if (!p0.new) jump:nt
0x00 0xf5 0x49 0x10
-# CHECK: p0 = cmp.eq(r9, #21); if (!p0.new) jump:t
+# CHECK: p0 = cmp.eq(r17, #21); if (!p0.new) jump:t
0x00 0xd5 0x89 0x10
-# CHECK: p0 = cmp.gt(r9, #21); if (p0.new) jump:nt
+# CHECK: p0 = cmp.gt(r17, #21); if (p0.new) jump:nt
0x00 0xf5 0x89 0x10
-# CHECK: p0 = cmp.gt(r9, #21); if (p0.new) jump:t
+# CHECK: p0 = cmp.gt(r17, #21); if (p0.new) jump:t
0x00 0xd5 0xc9 0x10
-# CHECK: p0 = cmp.gt(r9, #21); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.gt(r17, #21); if (!p0.new) jump:nt
0x00 0xf5 0xc9 0x10
-# CHECK: p0 = cmp.gt(r9, #21); if (!p0.new) jump:t
+# CHECK: p0 = cmp.gt(r17, #21); if (!p0.new) jump:t
0x00 0xd5 0x09 0x11
-# CHECK: p0 = cmp.gtu(r9, #21); if (p0.new) jump:nt
+# CHECK: p0 = cmp.gtu(r17, #21); if (p0.new) jump:nt
0x00 0xf5 0x09 0x11
-# CHECK: p0 = cmp.gtu(r9, #21); if (p0.new) jump:t
+# CHECK: p0 = cmp.gtu(r17, #21); if (p0.new) jump:t
0x00 0xd5 0x49 0x11
-# CHECK: p0 = cmp.gtu(r9, #21); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.gtu(r17, #21); if (!p0.new) jump:nt
0x00 0xf5 0x49 0x11
-# CHECK: p0 = cmp.gtu(r9, #21); if (!p0.new) jump:t
+# CHECK: p0 = cmp.gtu(r17, #21); if (!p0.new) jump:t
0x00 0xc0 0x89 0x13
-# CHECK: p1 = cmp.eq(r9,#-1); if (p1.new) jump:nt
+# CHECK: p1 = cmp.eq(r17,#-1); if (p1.new) jump:nt
0x00 0xc1 0x89 0x13
-# CHECK: p1 = cmp.gt(r9,#-1); if (p1.new) jump:nt
+# CHECK: p1 = cmp.gt(r17,#-1); if (p1.new) jump:nt
0x00 0xc3 0x89 0x13
-# CHECK: p1 = tstbit(r9, #0); if (p1.new) jump:nt
+# CHECK: p1 = tstbit(r17, #0); if (p1.new) jump:nt
0x00 0xe0 0x89 0x13
-# CHECK: p1 = cmp.eq(r9,#-1); if (p1.new) jump:t
+# CHECK: p1 = cmp.eq(r17,#-1); if (p1.new) jump:t
0x00 0xe1 0x89 0x13
-# CHECK: p1 = cmp.gt(r9,#-1); if (p1.new) jump:t
+# CHECK: p1 = cmp.gt(r17,#-1); if (p1.new) jump:t
0x00 0xe3 0x89 0x13
-# CHECK: p1 = tstbit(r9, #0); if (p1.new) jump:t
+# CHECK: p1 = tstbit(r17, #0); if (p1.new) jump:t
0x00 0xc0 0xc9 0x13
-# CHECK: p1 = cmp.eq(r9,#-1); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.eq(r17,#-1); if (!p1.new) jump:nt
0x00 0xc1 0xc9 0x13
-# CHECK: p1 = cmp.gt(r9,#-1); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.gt(r17,#-1); if (!p1.new) jump:nt
0x00 0xc3 0xc9 0x13
-# CHECK: p1 = tstbit(r9, #0); if (!p1.new) jump:nt
+# CHECK: p1 = tstbit(r17, #0); if (!p1.new) jump:nt
0x00 0xe0 0xc9 0x13
-# CHECK: p1 = cmp.eq(r9,#-1); if (!p1.new) jump:t
+# CHECK: p1 = cmp.eq(r17,#-1); if (!p1.new) jump:t
0x00 0xe1 0xc9 0x13
-# CHECK: p1 = cmp.gt(r9,#-1); if (!p1.new) jump:t
+# CHECK: p1 = cmp.gt(r17,#-1); if (!p1.new) jump:t
0x00 0xe3 0xc9 0x13
-# CHECK: p1 = tstbit(r9, #0); if (!p1.new) jump:t
+# CHECK: p1 = tstbit(r17, #0); if (!p1.new) jump:t
0x00 0xd5 0x09 0x12
-# CHECK: p1 = cmp.eq(r9, #21); if (p1.new) jump:nt
+# CHECK: p1 = cmp.eq(r17, #21); if (p1.new) jump:nt
0x00 0xf5 0x09 0x12
-# CHECK: p1 = cmp.eq(r9, #21); if (p1.new) jump:t
+# CHECK: p1 = cmp.eq(r17, #21); if (p1.new) jump:t
0x00 0xd5 0x49 0x12
-# CHECK: p1 = cmp.eq(r9, #21); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.eq(r17, #21); if (!p1.new) jump:nt
0x00 0xf5 0x49 0x12
-# CHECK: p1 = cmp.eq(r9, #21); if (!p1.new) jump:t
+# CHECK: p1 = cmp.eq(r17, #21); if (!p1.new) jump:t
0x00 0xd5 0x89 0x12
-# CHECK: p1 = cmp.gt(r9, #21); if (p1.new) jump:nt
+# CHECK: p1 = cmp.gt(r17, #21); if (p1.new) jump:nt
0x00 0xf5 0x89 0x12
-# CHECK: p1 = cmp.gt(r9, #21); if (p1.new) jump:t
+# CHECK: p1 = cmp.gt(r17, #21); if (p1.new) jump:t
0x00 0xd5 0xc9 0x12
-# CHECK: p1 = cmp.gt(r9, #21); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.gt(r17, #21); if (!p1.new) jump:nt
0x00 0xf5 0xc9 0x12
-# CHECK: p1 = cmp.gt(r9, #21); if (!p1.new) jump:t
+# CHECK: p1 = cmp.gt(r17, #21); if (!p1.new) jump:t
0x00 0xd5 0x09 0x13
-# CHECK: p1 = cmp.gtu(r9, #21); if (p1.new) jump:nt
+# CHECK: p1 = cmp.gtu(r17, #21); if (p1.new) jump:nt
0x00 0xf5 0x09 0x13
-# CHECK: p1 = cmp.gtu(r9, #21); if (p1.new) jump:t
+# CHECK: p1 = cmp.gtu(r17, #21); if (p1.new) jump:t
0x00 0xd5 0x49 0x13
-# CHECK: p1 = cmp.gtu(r9, #21); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.gtu(r17, #21); if (!p1.new) jump:nt
0x00 0xf5 0x49 0x13
-# CHECK: p1 = cmp.gtu(r9, #21); if (!p1.new) jump:t
+# CHECK: p1 = cmp.gtu(r17, #21); if (!p1.new) jump:t
0x00 0xcd 0x09 0x14
-# CHECK: p0 = cmp.eq(r9, r13); if (p0.new) jump:nt
+# CHECK: p0 = cmp.eq(r17, r21); if (p0.new) jump:nt
0x00 0xdd 0x09 0x14
-# CHECK: p1 = cmp.eq(r9, r13); if (p1.new) jump:nt
+# CHECK: p1 = cmp.eq(r17, r21); if (p1.new) jump:nt
0x00 0xed 0x09 0x14
-# CHECK: p0 = cmp.eq(r9, r13); if (p0.new) jump:t
+# CHECK: p0 = cmp.eq(r17, r21); if (p0.new) jump:t
0x00 0xfd 0x09 0x14
-# CHECK: p1 = cmp.eq(r9, r13); if (p1.new) jump:t
+# CHECK: p1 = cmp.eq(r17, r21); if (p1.new) jump:t
0x00 0xcd 0x49 0x14
-# CHECK: p0 = cmp.eq(r9, r13); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.eq(r17, r21); if (!p0.new) jump:nt
0x00 0xdd 0x49 0x14
-# CHECK: p1 = cmp.eq(r9, r13); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.eq(r17, r21); if (!p1.new) jump:nt
0x00 0xed 0x49 0x14
-# CHECK: p0 = cmp.eq(r9, r13); if (!p0.new) jump:t
+# CHECK: p0 = cmp.eq(r17, r21); if (!p0.new) jump:t
0x00 0xfd 0x49 0x14
-# CHECK: p1 = cmp.eq(r9, r13); if (!p1.new) jump:t
+# CHECK: p1 = cmp.eq(r17, r21); if (!p1.new) jump:t
0x00 0xcd 0x89 0x14
-# CHECK: p0 = cmp.gt(r9, r13); if (p0.new) jump:nt
+# CHECK: p0 = cmp.gt(r17, r21); if (p0.new) jump:nt
0x00 0xdd 0x89 0x14
-# CHECK: p1 = cmp.gt(r9, r13); if (p1.new) jump:nt
+# CHECK: p1 = cmp.gt(r17, r21); if (p1.new) jump:nt
0x00 0xed 0x89 0x14
-# CHECK: p0 = cmp.gt(r9, r13); if (p0.new) jump:t
+# CHECK: p0 = cmp.gt(r17, r21); if (p0.new) jump:t
0x00 0xfd 0x89 0x14
-# CHECK: p1 = cmp.gt(r9, r13); if (p1.new) jump:t
+# CHECK: p1 = cmp.gt(r17, r21); if (p1.new) jump:t
0x00 0xcd 0xc9 0x14
-# CHECK: p0 = cmp.gt(r9, r13); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.gt(r17, r21); if (!p0.new) jump:nt
0x00 0xdd 0xc9 0x14
-# CHECK: p1 = cmp.gt(r9, r13); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.gt(r17, r21); if (!p1.new) jump:nt
0x00 0xed 0xc9 0x14
-# CHECK: p0 = cmp.gt(r9, r13); if (!p0.new) jump:t
+# CHECK: p0 = cmp.gt(r17, r21); if (!p0.new) jump:t
0x00 0xfd 0xc9 0x14
-# CHECK: p1 = cmp.gt(r9, r13); if (!p1.new) jump:t
+# CHECK: p1 = cmp.gt(r17, r21); if (!p1.new) jump:t
0x00 0xcd 0x09 0x15
-# CHECK: p0 = cmp.gtu(r9, r13); if (p0.new) jump:nt
+# CHECK: p0 = cmp.gtu(r17, r21); if (p0.new) jump:nt
0x00 0xdd 0x09 0x15
-# CHECK: p1 = cmp.gtu(r9, r13); if (p1.new) jump:nt
+# CHECK: p1 = cmp.gtu(r17, r21); if (p1.new) jump:nt
0x00 0xed 0x09 0x15
-# CHECK: p0 = cmp.gtu(r9, r13); if (p0.new) jump:t
+# CHECK: p0 = cmp.gtu(r17, r21); if (p0.new) jump:t
0x00 0xfd 0x09 0x15
-# CHECK: p1 = cmp.gtu(r9, r13); if (p1.new) jump:t
+# CHECK: p1 = cmp.gtu(r17, r21); if (p1.new) jump:t
0x00 0xcd 0x49 0x15
-# CHECK: p0 = cmp.gtu(r9, r13); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.gtu(r17, r21); if (!p0.new) jump:nt
0x00 0xdd 0x49 0x15
-# CHECK: p1 = cmp.gtu(r9, r13); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.gtu(r17, r21); if (!p1.new) jump:nt
0x00 0xed 0x49 0x15
-# CHECK: p0 = cmp.gtu(r9, r13); if (!p0.new) jump:t
+# CHECK: p0 = cmp.gtu(r17, r21); if (!p0.new) jump:t
0x00 0xfd 0x49 0x15
-# CHECK: p1 = cmp.gtu(r9, r13); if (!p1.new) jump:t
+# CHECK: p1 = cmp.gtu(r17, r21); if (!p1.new) jump:t
# Jump to address
0x22 0xc0 0x00 0x58
@@ -197,6 +197,6 @@
# Transfer and jump
0x00 0xd5 0x09 0x16
-# CHECK: r9 = #21 ; jump
+# CHECK: r17 = #21 ; jump
0x00 0xc9 0x0d 0x17
-# CHECK: r9 = r13 ; jump
+# CHECK: r17 = r21 ; jump
diff --git a/test/MC/Disassembler/Hexagon/ld.txt b/test/MC/Disassembler/Hexagon/ld.txt
index 15c23b644886..56e00fd94f56 100644
--- a/test/MC/Disassembler/Hexagon/ld.txt
+++ b/test/MC/Disassembler/Hexagon/ld.txt
@@ -4,12 +4,18 @@
# Load doubleword
0x90 0xff 0xd5 0x3a
# CHECK: r17:16 = memd(r21 + r31<<#3)
-0x10 0xc5 0xc0 0x49
-# CHECK: r17:16 = memd(##320)
+0xb0 0xc2 0xc0 0x49
+# CHECK: r17:16 = memd(#168)
+0x02 0x40 0x00 0x00 0x10 0xc5 0xc0 0x49
+# CHECK: r17:16 = memd(##168)
+0xd0 0xc0 0xd5 0x91
+# CHECK: r17:16 = memd(r21 + #48)
0xb0 0xe0 0xd5 0x99
# CHECK: r17:16 = memd(r21 ++ #40:circ(m1))
0x10 0xe2 0xd5 0x99
# CHECK: r17:16 = memd(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x70 0xd7 0xd5 0x9b
+# CHECK: r17:16 = memd(r21 = ##31)
0xb0 0xc0 0xd5 0x9b
# CHECK: r17:16 = memd(r21++#40)
0x10 0xe0 0xd5 0x9d
@@ -53,6 +59,8 @@
0x91 0xff 0x15 0x3a
# CHECK: r17 = memb(r21 + r31<<#3)
0xb1 0xc2 0x00 0x49
+# CHECK: r17 = memb(#21)
+0x00 0x40 0x00 0x00 0xb1 0xc2 0x00 0x49
# CHECK: r17 = memb(##21)
0xf1 0xc3 0x15 0x91
# CHECK: r17 = memb(r21 + #31)
@@ -60,6 +68,8 @@
# CHECK: r17 = memb(r21 ++ #5:circ(m1))
0x11 0xe2 0x15 0x99
# CHECK: r17 = memb(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x71 0xd7 0x15 0x9b
+# CHECK: r17 = memb(r21 = ##31)
0xb1 0xc0 0x15 0x9b
# CHECK: r17 = memb(r21++#5)
0x11 0xe0 0x15 0x9d
@@ -99,17 +109,37 @@
# CHECK: p3 = r5
# CHECK-NEXT: if (!p3.new) r17 = memb(r21++#5)
+# Load byte into shifted vector
+0xf0 0xc3 0x95 0x90
+# CHECK: r17:16 = memb_fifo(r21 + #31)
+0xb0 0xe0 0x95 0x98
+# CHECK: r17:16 = memb_fifo(r21 ++ #5:circ(m1))
+0x10 0xe2 0x95 0x98
+# CHECK: r17:16 = memb_fifo(r21 ++ I:circ(m1))
+
+# Load half into shifted vector
+0xf0 0xc3 0x55 0x90
+# CHECK: r17:16 = memh_fifo(r21 + #62)
+0xb0 0xe0 0x55 0x98
+# CHECK: r17:16 = memh_fifo(r21 ++ #10:circ(m1))
+0x10 0xe2 0x55 0x98
+# CHECK: r17:16 = memh_fifo(r21 ++ I:circ(m1))
+
# Load halfword
0x91 0xff 0x55 0x3a
# CHECK: r17 = memh(r21 + r31<<#3)
-0x51 0xc5 0x40 0x49
-# CHECK: r17 = memh(##84)
+0xb1 0xc2 0x40 0x49
+# CHECK: r17 = memh(#42)
+0x00 0x40 0x00 0x00 0x51 0xc5 0x40 0x49
+# CHECK: r17 = memh(##42)
0xf1 0xc3 0x55 0x91
# CHECK: r17 = memh(r21 + #62)
0xb1 0xe0 0x55 0x99
# CHECK: r17 = memh(r21 ++ #10:circ(m1))
0x11 0xe2 0x55 0x99
# CHECK: r17 = memh(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x71 0xd7 0x55 0x9b
+# CHECK: r17 = memh(r21 = ##31)
0xb1 0xc0 0x55 0x9b
# CHECK: r17 = memh(r21++#10)
0x11 0xe0 0x55 0x9d
@@ -138,11 +168,23 @@
0x03 0x40 0x45 0x85 0xb1 0xfe 0x55 0x9b
# CHECK: p3 = r5
# CHECK-NEXT: if (!p3.new) r17 = memh(r21++#10)
+0xf1 0xdb 0x55 0x41
+# CHECK: if (p3) r17 = memh(r21 + #62)
+0xf1 0xdb 0x55 0x45
+# CHECK: if (!p3) r17 = memh(r21 + #62)
+0x03 0x40 0x45 0x85 0xf1 0xdb 0x55 0x43
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memh(r21 + #62)
+0x03 0x40 0x45 0x85 0xf1 0xdb 0x55 0x47
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memh(r21 + #62)
# Load unsigned byte
0x91 0xff 0x35 0x3a
# CHECK: r17 = memub(r21 + r31<<#3)
0xb1 0xc2 0x20 0x49
+# CHECK: r17 = memub(#21)
+0x00 0x40 0x00 0x00 0xb1 0xc2 0x20 0x49
# CHECK: r17 = memub(##21)
0xf1 0xc3 0x35 0x91
# CHECK: r17 = memub(r21 + #31)
@@ -150,6 +192,8 @@
# CHECK: r17 = memub(r21 ++ #5:circ(m1))
0x11 0xe2 0x35 0x99
# CHECK: r17 = memub(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x71 0xd7 0x35 0x9b
+# CHECK: r17 = memub(r21 = ##31)
0xb1 0xc0 0x35 0x9b
# CHECK: r17 = memub(r21++#5)
0x11 0xe0 0x35 0x9d
@@ -192,14 +236,18 @@
# Load unsigned halfword
0x91 0xff 0x75 0x3a
# CHECK: r17 = memuh(r21 + r31<<#3)
-0x51 0xc5 0x60 0x49
-# CHECK: r17 = memuh(##84)
+0xb1 0xc2 0x60 0x49
+# CHECK: r17 = memuh(#42)
+0x00 0x40 0x00 0x00 0x51 0xc5 0x60 0x49
+# CHECK: r17 = memuh(##42)
0xb1 0xc2 0x75 0x91
# CHECK: r17 = memuh(r21 + #42)
0xb1 0xe0 0x75 0x99
# CHECK: r17 = memuh(r21 ++ #10:circ(m1))
0x11 0xe2 0x75 0x99
# CHECK: r17 = memuh(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x71 0xd7 0x75 0x9b
+# CHECK: r17 = memuh(r21 = ##31)
0xb1 0xc0 0x75 0x9b
# CHECK: r17 = memuh(r21++#10)
0x11 0xe0 0x75 0x9d
@@ -242,14 +290,18 @@
# Load word
0x91 0xff 0x95 0x3a
# CHECK: r17 = memw(r21 + r31<<#3)
-0x91 0xc2 0x80 0x49
-# CHECK: r17 = memw(##80)
+0xb1 0xc2 0x80 0x49
+# CHECK: r17 = memw(#84)
+0x01 0x40 0x00 0x00 0x91 0xc2 0x80 0x49
+# CHECK: r17 = memw(##84)
0xb1 0xc2 0x95 0x91
# CHECK: r17 = memw(r21 + #84)
0xb1 0xe0 0x95 0x99
# CHECK: r17 = memw(r21 ++ #20:circ(m1))
0x11 0xe2 0x95 0x99
# CHECK: r17 = memw(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x71 0xd7 0x95 0x9b
+# CHECK: r17 = memw(r21 = ##31)
0xb1 0xc0 0x95 0x9b
# CHECK: r17 = memw(r21++#20)
0x11 0xe0 0x95 0x9d
@@ -338,14 +390,36 @@
# CHECK: r17:16 = memubh(r21 ++ #20:circ(m1))
0x10 0xe2 0xb5 0x98
# CHECK: r17:16 = memubh(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x71 0xd7 0x35 0x9a
+# CHECK: r17 = membh(r21 = ##31)
0xb1 0xc0 0x35 0x9a
# CHECK: r17 = membh(r21++#10)
+0x00 0x40 0x00 0x00 0x71 0xd7 0x75 0x9a
+# CHECK: r17 = memubh(r21 = ##31)
0xb1 0xc0 0x75 0x9a
# CHECK: r17 = memubh(r21++#10)
+0x00 0x40 0x00 0x00 0x70 0xd7 0xb5 0x9a
+# CHECK: r17:16 = memubh(r21 = ##31)
0xb0 0xc0 0xb5 0x9a
# CHECK: r17:16 = memubh(r21++#20)
+0x00 0x40 0x00 0x00 0x70 0xd7 0xf5 0x9a
+# CHECK: r17:16 = membh(r21 = ##31)
0xb0 0xc0 0xf5 0x9a
# CHECK: r17:16 = membh(r21++#20)
+0x00 0x40 0x00 0x00 0xf1 0xf7 0x35 0x9c
+# CHECK: r17 = membh(r21<<#3 + ##31)
+0x11 0xe0 0x35 0x9c
+# CHECK: r17 = membh(r21++m1)
+0x00 0x40 0x00 0x00 0xf1 0xf7 0x75 0x9c
+# CHECK: r17 = memubh(r21<<#3 + ##31)
+0x11 0xe0 0x75 0x9c
+# CHECK: r17 = memubh(r21++m1)
+0x00 0x40 0x00 0x00 0xf0 0xf7 0xf5 0x9c
+# CHECK: r17:16 = membh(r21<<#3 + ##31)
+0x10 0xe0 0xf5 0x9c
+# CHECK: r17:16 = membh(r21++m1)
+0x00 0x40 0x00 0x00 0xf0 0xf7 0xb5 0x9c
+# CHECK: r17:16 = memubh(r21<<#3 + ##31)
0x11 0xe0 0x35 0x9c
# CHECK: r17 = membh(r21++m1)
0x11 0xe0 0x75 0x9c
diff --git a/test/MC/Disassembler/Hexagon/lit.local.cfg b/test/MC/Disassembler/Hexagon/lit.local.cfg
index 6500d4dd7d5a..ba72ff632d4e 100644
--- a/test/MC/Disassembler/Hexagon/lit.local.cfg
+++ b/test/MC/Disassembler/Hexagon/lit.local.cfg
@@ -1,3 +1,3 @@
-if not 'Hexagon' in config.root.targets:
- config.unsupported = True
-
+if not 'Hexagon' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/MC/Disassembler/Hexagon/nv_j.txt b/test/MC/Disassembler/Hexagon/nv_j.txt
index a6773c3f3c56..2135b5a039f6 100644
--- a/test/MC/Disassembler/Hexagon/nv_j.txt
+++ b/test/MC/Disassembler/Hexagon/nv_j.txt
@@ -4,133 +4,133 @@
# Jump to address conditioned on new register value
0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x20
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.eq(r2.new, r21)) jump:nt
+# CHECK-NEXT: if (cmp.eq(r17.new, r21)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x20
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.eq(r2.new, r21)) jump:t
+# CHECK-NEXT: if (cmp.eq(r17.new, r21)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x20
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.eq(r2.new, r21)) jump:nt
+# CHECK-NEXT: if (!cmp.eq(r17.new, r21)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x20
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.eq(r2.new, r21)) jump:t
+# CHECK-NEXT: if (!cmp.eq(r17.new, r21)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0x82 0x20
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r2.new, r21)) jump:nt
+# CHECK-NEXT: if (cmp.gt(r17.new, r21)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0x82 0x20
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r2.new, r21)) jump:t
+# CHECK-NEXT: if (cmp.gt(r17.new, r21)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0xc2 0x20
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r2.new, r21)) jump:nt
+# CHECK-NEXT: if (!cmp.gt(r17.new, r21)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0xc2 0x20
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r2.new, r21)) jump:t
+# CHECK-NEXT: if (!cmp.gt(r17.new, r21)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x21
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gtu(r2.new, r21)) jump:nt
+# CHECK-NEXT: if (cmp.gtu(r17.new, r21)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x21
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gtu(r2.new, r21)) jump:t
+# CHECK-NEXT: if (cmp.gtu(r17.new, r21)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x21
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gtu(r2.new, r21)) jump:nt
+# CHECK-NEXT: if (!cmp.gtu(r17.new, r21)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x21
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gtu(r2.new, r21)) jump:t
+# CHECK-NEXT: if (!cmp.gtu(r17.new, r21)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0x82 0x21
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r21, r2.new)) jump:nt
+# CHECK-NEXT: if (cmp.gt(r21, r17.new)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0x82 0x21
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r21, r2.new)) jump:t
+# CHECK-NEXT: if (cmp.gt(r21, r17.new)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0xc2 0x21
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r21, r2.new)) jump:nt
+# CHECK-NEXT: if (!cmp.gt(r21, r17.new)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0xc2 0x21
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r21, r2.new)) jump:t
+# CHECK-NEXT: if (!cmp.gt(r21, r17.new)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x22
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gtu(r21, r2.new)) jump:nt
+# CHECK-NEXT: if (cmp.gtu(r21, r17.new)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x22
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gtu(r21, r2.new)) jump:t
+# CHECK-NEXT: if (cmp.gtu(r21, r17.new)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x22
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gtu(r21, r2.new)) jump:nt
+# CHECK-NEXT: if (!cmp.gtu(r21, r17.new)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x22
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gtu(r21, r2.new)) jump:t
+# CHECK-NEXT: if (!cmp.gtu(r21, r17.new)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x24
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.eq(r2.new, #21)) jump:nt
+# CHECK-NEXT: if (cmp.eq(r17.new, #21)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x24
# CHECK: r17 = r17
-# CHECK-NETX: if (cmp.eq(r2.new, #21)) jump:t
+# CHECK-NETX: if (cmp.eq(r17.new, #21)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x24
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.eq(r2.new, #21)) jump:nt
+# CHECK-NEXT: if (!cmp.eq(r17.new, #21)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x24
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.eq(r2.new, #21)) jump:t
+# CHECK-NEXT: if (!cmp.eq(r17.new, #21)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0x82 0x24
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r2.new, #21)) jump:nt
+# CHECK-NEXT: if (cmp.gt(r17.new, #21)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0x82 0x24
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r2.new, #21)) jump:t
+# CHECK-NEXT: if (cmp.gt(r17.new, #21)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0xc2 0x24
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r2.new, #21)) jump:nt
+# CHECK-NEXT: if (!cmp.gt(r17.new, #21)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0xc2 0x24
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r2.new, #21)) jump:t
+# CHECK-NEXT: if (!cmp.gt(r17.new, #21)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x25
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gtu(r2.new, #21)) jump:nt
+# CHECK-NEXT: if (cmp.gtu(r17.new, #21)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x25
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gtu(r2.new, #21)) jump:t
+# CHECK-NEXT: if (cmp.gtu(r17.new, #21)) jump:t
0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x25
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gtu(r2.new, #21)) jump:nt
+# CHECK-NEXT: if (!cmp.gtu(r17.new, #21)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x25
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gtu(r2.new, #21)) jump:t
+# CHECK-NEXT: if (!cmp.gtu(r17.new, #21)) jump:t
0x11 0x40 0x71 0x70 0x92 0xc0 0x82 0x25
# CHECK: r17 = r17
-# CHECK-NEXT: if (tstbit(r2.new, #0)) jump:nt
+# CHECK-NEXT: if (tstbit(r17.new, #0)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xe0 0x82 0x25
# CHECK: r17 = r17
-# CHECK-NEXT: if (tstbit(r2.new, #0)) jump:t
+# CHECK-NEXT: if (tstbit(r17.new, #0)) jump:t
0x11 0x40 0x71 0x70 0x92 0xc0 0xc2 0x25
# CHECK: r17 = r17
-# CHECK-NEXT: if (!tstbit(r2.new, #0)) jump:nt
+# CHECK-NEXT: if (!tstbit(r17.new, #0)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xe0 0xc2 0x25
# CHECK: r17 = r17
-# CHECK-NEXT: if (!tstbit(r2.new, #0)) jump:t
+# CHECK-NEXT: if (!tstbit(r17.new, #0)) jump:t
0x11 0x40 0x71 0x70 0x92 0xc0 0x02 0x26
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.eq(r2.new, #-1)) jump:nt
+# CHECK-NEXT: if (cmp.eq(r17.new, #-1)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xe0 0x02 0x26
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.eq(r2.new, #-1)) jump:t
+# CHECK-NEXT: if (cmp.eq(r17.new, #-1)) jump:t
0x11 0x40 0x71 0x70 0x92 0xc0 0x42 0x26
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.eq(r2.new, #-1)) jump:nt
+# CHECK-NEXT: if (!cmp.eq(r17.new, #-1)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xe0 0x42 0x26
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.eq(r2.new, #-1)) jump:t
+# CHECK-NEXT: if (!cmp.eq(r17.new, #-1)) jump:t
0x11 0x40 0x71 0x70 0x92 0xc0 0x82 0x26
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r2.new, #-1)) jump:nt
+# CHECK-NEXT: if (cmp.gt(r17.new, #-1)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xe0 0x82 0x26
# CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r2.new, #-1)) jump:t
+# CHECK-NEXT: if (cmp.gt(r17.new, #-1)) jump:t
0x11 0x40 0x71 0x70 0x92 0xc0 0xc2 0x26
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r2.new, #-1)) jump:nt
+# CHECK-NEXT: if (!cmp.gt(r17.new, #-1)) jump:nt
0x11 0x40 0x71 0x70 0x92 0xe0 0xc2 0x26
# CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r2.new, #-1)) jump:t
+# CHECK-NEXT: if (!cmp.gt(r17.new, #-1)) jump:t
diff --git a/test/MC/Disassembler/Hexagon/nv_st.txt b/test/MC/Disassembler/Hexagon/nv_st.txt
index ef49455b80b1..3a767f33b36a 100644
--- a/test/MC/Disassembler/Hexagon/nv_st.txt
+++ b/test/MC/Disassembler/Hexagon/nv_st.txt
@@ -4,200 +4,209 @@
# Store new-value byte
0x1f 0x40 0x7f 0x70 0x82 0xf5 0xb1 0x3b
# CHECK: r31 = r31
-# CHECK-NEXT: memb(r17 + r21<<#3) = r2.new
+# CHECK-NEXT: memb(r17 + r21<<#3) = r31.new
+0x1f 0x40 0x7f 0x70 0x11 0xc2 0xa0 0x48
+# CHECK: r31 = r31
+# CHECK-NEXT: memb(#17) = r31.new
0x1f 0x40 0x7f 0x70 0x15 0xc2 0xb1 0xa1
# CHECK: r31 = r31
-# CHECK-NEXT: memb(r17+#21) = r2.new
+# CHECK-NEXT: memb(r17+#21) = r31.new
0x1f 0x40 0x7f 0x70 0x02 0xe2 0xb1 0xa9
# CHECK: r31 = r31
-# CHECK-NEXT: memb(r17 ++ I:circ(m1)) = r2.new
+# CHECK-NEXT: memb(r17 ++ I:circ(m1)) = r31.new
0x1f 0x40 0x7f 0x70 0x28 0xe2 0xb1 0xa9
# CHECK: r31 = r31
-# CHECK-NEXT: memb(r17 ++ #5:circ(m1)) = r2.new
+# CHECK-NEXT: memb(r17 ++ #5:circ(m1)) = r31.new
0x1f 0x40 0x7f 0x70 0x28 0xc2 0xb1 0xab
# CHECK: r31 = r31
-# CHECK-NEXT: memb(r17++#5) = r2.new
+# CHECK-NEXT: memb(r17++#5) = r31.new
0x1f 0x40 0x7f 0x70 0x00 0xe2 0xb1 0xad
# CHECK: r31 = r31
-# CHECK-NEXT: memb(r17++m1) = r2.new
+# CHECK-NEXT: memb(r17++m1) = r31.new
0x1f 0x40 0x7f 0x70 0x00 0xe2 0xb1 0xaf
# CHECK: r31 = r31
-# CHECK-NEXT: memb(r17 ++ m1:brev) = r2.new
+# CHECK-NEXT: memb(r17 ++ m1:brev) = r31.new
# Store new-value byte conditionally
0x1f 0x40 0x7f 0x70 0xe2 0xf5 0xb1 0x34
# CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memb(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (p3) memb(r17+r21<<#3) = r31.new
0x1f 0x40 0x7f 0x70 0xe2 0xf5 0xb1 0x35
# CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memb(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (!p3) memb(r17+r21<<#3) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xe2 0xf5 0xb1 0x36
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memb(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (p3.new) memb(r17+r21<<#3) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xe2 0xf5 0xb1 0x37
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memb(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (!p3.new) memb(r17+r21<<#3) = r31.new
0x1f 0x40 0x7f 0x70 0xab 0xc2 0xb1 0x40
# CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memb(r17+#21) = r2.new
+# CHECK-NEXT: if (p3) memb(r17+#21) = r31.new
0x1f 0x40 0x7f 0x70 0xab 0xc2 0xb1 0x44
# CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memb(r17+#21) = r2.new
+# CHECK-NEXT: if (!p3) memb(r17+#21) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xc2 0xb1 0x42
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memb(r17+#21) = r2.new
+# CHECK-NEXT: if (p3.new) memb(r17+#21) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xc2 0xb1 0x46
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memb(r17+#21) = r2.new
+# CHECK-NEXT: if (!p3.new) memb(r17+#21) = r31.new
0x1f 0x40 0x7f 0x70 0x2b 0xe2 0xb1 0xab
# CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memb(r17++#5) = r2.new
+# CHECK-NEXT: if (p3) memb(r17++#5) = r31.new
0x1f 0x40 0x7f 0x70 0x2f 0xe2 0xb1 0xab
# CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memb(r17++#5) = r2.new
+# CHECK-NEXT: if (!p3) memb(r17++#5) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xe2 0xb1 0xab
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memb(r17++#5) = r2.new
+# CHECK-NEXT: if (p3.new) memb(r17++#5) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xaf 0xe2 0xb1 0xab
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memb(r17++#5) = r2.new
+# CHECK-NEXT: if (!p3.new) memb(r17++#5) = r31.new
# Store new-value halfword
0x1f 0x40 0x7f 0x70 0x8a 0xf5 0xb1 0x3b
# CHECK: r31 = r31
-# CHECK-NEXT: memh(r17 + r21<<#3) = r2.new
+# CHECK-NEXT: memh(r17 + r21<<#3) = r31.new
+0x1f 0x40 0x7f 0x70 0x15 0xca 0xa0 0x48
+# CHECK: r31 = r31
+# CHECK-NEXT: memh(#42) = r31.new
0x1f 0x40 0x7f 0x70 0x15 0xca 0xb1 0xa1
# CHECK: r31 = r31
-# CHECK-NEXT: memh(r17+#42) = r2.new
+# CHECK-NEXT: memh(r17+#42) = r31.new
0x1f 0x40 0x7f 0x70 0x02 0xea 0xb1 0xa9
# CHECK: r31 = r31
-# CHECK-NEXT: memh(r17 ++ I:circ(m1)) = r2.new
+# CHECK-NEXT: memh(r17 ++ I:circ(m1)) = r31.new
0x1f 0x40 0x7f 0x70 0x28 0xea 0xb1 0xa9
# CHECK: r31 = r31
-# CHECK-NEXT: memh(r17 ++ #10:circ(m1)) = r2.new
+# CHECK-NEXT: memh(r17 ++ #10:circ(m1)) = r31.new
0x1f 0x40 0x7f 0x70 0x28 0xca 0xb1 0xab
# CHECK: r31 = r31
-# CHECK-NEXT: memh(r17++#10) = r2.new
+# CHECK-NEXT: memh(r17++#10) = r31.new
0x1f 0x40 0x7f 0x70 0x00 0xea 0xb1 0xad
# CHECK: r31 = r31
-# CHECK-NEXT: memh(r17++m1) = r2.new
+# CHECK-NEXT: memh(r17++m1) = r31.new
0x1f 0x40 0x7f 0x70 0x00 0xea 0xb1 0xaf
# CHECK: r31 = r31
-# CHECK-NEXT: memh(r17 ++ m1:brev) = r2.new
+# CHECK-NEXT: memh(r17 ++ m1:brev) = r31.new
# Store new-value halfword conditionally
0x1f 0x40 0x7f 0x70 0xea 0xf5 0xb1 0x34
# CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memh(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (p3) memh(r17+r21<<#3) = r31.new
0x1f 0x40 0x7f 0x70 0xea 0xf5 0xb1 0x35
# CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memh(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (!p3) memh(r17+r21<<#3) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xea 0xf5 0xb1 0x36
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memh(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (p3.new) memh(r17+r21<<#3) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xea 0xf5 0xb1 0x37
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memh(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (!p3.new) memh(r17+r21<<#3) = r31.new
0x1f 0x40 0x7f 0x70 0xab 0xca 0xb1 0x40
# CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memh(r17+#42) = r2.new
+# CHECK-NEXT: if (p3) memh(r17+#42) = r31.new
0x1f 0x40 0x7f 0x70 0xab 0xca 0xb1 0x44
# CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memh(r17+#42) = r2.new
+# CHECK-NEXT: if (!p3) memh(r17+#42) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xca 0xb1 0x42
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memh(r17+#42) = r2.new
+# CHECK-NEXT: if (p3.new) memh(r17+#42) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xca 0xb1 0x46
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memh(r17+#42) = r2.new
+# CHECK-NEXT: if (!p3.new) memh(r17+#42) = r31.new
0x1f 0x40 0x7f 0x70 0x2b 0xea 0xb1 0xab
# CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memh(r17++#10) = r2.new
+# CHECK-NEXT: if (p3) memh(r17++#10) = r31.new
0x1f 0x40 0x7f 0x70 0x2f 0xea 0xb1 0xab
# CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memh(r17++#10) = r2.new
+# CHECK-NEXT: if (!p3) memh(r17++#10) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xea 0xb1 0xab
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memh(r17++#10) = r2.new
+# CHECK-NEXT: if (p3.new) memh(r17++#10) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xaf 0xea 0xb1 0xab
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memh(r17++#10) = r2.new
+# CHECK-NEXT: if (!p3.new) memh(r17++#10) = r31.new
# Store new-value word
0x1f 0x40 0x7f 0x70 0x92 0xf5 0xb1 0x3b
# CHECK: r31 = r31
-# CHECK-NEXT: memw(r17 + r21<<#3) = r2.new
-0x1f 0x40 0x7f 0x70 0x15 0xd2 0xb1 0xa1
+# CHECK-NEXT: memw(r17 + r21<<#3) = r31.new
+0x1f 0x40 0x7f 0x70 0x15 0xd2 0xa0 0x48
# CHECK: r31 = r31
-# CHECK-NEXT: memw(r17+#84) = r2.new
-0x1f 0x40 0x7f 0x70 0x28 0xf2 0xb1 0xa9
+# CHECK-NEXT: memw(#84) = r31.new
+0x1f 0x40 0x7f 0x70 0x15 0xd2 0xb1 0xa1
# CHECK: r31 = r31
-# CHECK-NEXT: memw(r17 ++ #20:circ(m1)) = r2.new
+# CHECK-NEXT: memw(r17+#84) = r31.new
0x1f 0x40 0x7f 0x70 0x02 0xf2 0xb1 0xa9
# CHECK: r31 = r31
-# CHECK-NEXT: memw(r17 ++ I:circ(m1)) = r2.new
+# CHECK-NEXT: memw(r17 ++ I:circ(m1)) = r31.new
+0x1f 0x40 0x7f 0x70 0x28 0xf2 0xb1 0xa9
+# CHECK: r31 = r31
+# CHECK-NEXT: memw(r17 ++ #20:circ(m1)) = r31.new
0x1f 0x40 0x7f 0x70 0x28 0xd2 0xb1 0xab
# CHECK: r31 = r31
-# CHECK-NEXT: memw(r17++#20) = r2.new
+# CHECK-NEXT: memw(r17++#20) = r31.new
0x1f 0x40 0x7f 0x70 0x00 0xf2 0xb1 0xad
# CHECK: r31 = r31
-# CHECK-NEXT: memw(r17++m1) = r2.new
+# CHECK-NEXT: memw(r17++m1) = r31.new
0x1f 0x40 0x7f 0x70 0x00 0xf2 0xb1 0xaf
# CHECK: r31 = r31
-# CHECK-NEXT: memw(r17 ++ m1:brev) = r2.new
+# CHECK-NEXT: memw(r17 ++ m1:brev) = r31.new
# Store new-value word conditionally
0x1f 0x40 0x7f 0x70 0xf2 0xf5 0xb1 0x34
# CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memw(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (p3) memw(r17+r21<<#3) = r31.new
0x1f 0x40 0x7f 0x70 0xf2 0xf5 0xb1 0x35
# CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memw(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (!p3) memw(r17+r21<<#3) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xf2 0xf5 0xb1 0x36
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memw(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (p3.new) memw(r17+r21<<#3) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xf2 0xf5 0xb1 0x37
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memw(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (!p3.new) memw(r17+r21<<#3) = r31.new
0x1f 0x40 0x7f 0x70 0xab 0xd2 0xb1 0x40
# CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memw(r17+#84) = r2.new
+# CHECK-NEXT: if (p3) memw(r17+#84) = r31.new
0x1f 0x40 0x7f 0x70 0xab 0xd2 0xb1 0x44
# CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memw(r17+#84) = r2.new
+# CHECK-NEXT: if (!p3) memw(r17+#84) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xd2 0xb1 0x42
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memw(r17+#84) = r2.new
+# CHECK-NEXT: if (p3.new) memw(r17+#84) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xd2 0xb1 0x46
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memw(r17+#84) = r2.new
+# CHECK-NEXT: if (!p3.new) memw(r17+#84) = r31.new
0x1f 0x40 0x7f 0x70 0x2b 0xf2 0xb1 0xab
# CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memw(r17++#20) = r2.new
+# CHECK-NEXT: if (p3) memw(r17++#20) = r31.new
0x1f 0x40 0x7f 0x70 0x2f 0xf2 0xb1 0xab
# CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memw(r17++#20) = r2.new
+# CHECK-NEXT: if (!p3) memw(r17++#20) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xf2 0xb1 0xab
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memw(r17++#20) = r2.new
+# CHECK-NEXT: if (p3.new) memw(r17++#20) = r31.new
0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xaf 0xf2 0xb1 0xab
# CHECK: p3 = r5
# CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memw(r17++#20) = r2.new
+# CHECK-NEXT: if (!p3.new) memw(r17++#20) = r31.new
diff --git a/test/MC/Disassembler/Hexagon/st.txt b/test/MC/Disassembler/Hexagon/st.txt
index 3b809d3465a8..6d9074a05ef7 100644
--- a/test/MC/Disassembler/Hexagon/st.txt
+++ b/test/MC/Disassembler/Hexagon/st.txt
@@ -5,7 +5,9 @@
0x9e 0xf5 0xd1 0x3b
# CHECK: memd(r17 + r21<<#3) = r31:30
0x28 0xd4 0xc0 0x48
-# CHECK: memd(##320) = r21:20
+# CHECK: memd(#320) = r21:20
+0x02 0x40 0x00 0x00 0x28 0xd4 0xc0 0x48
+# CHECK: memd(##168) = r21:20
0x15 0xd4 0xd1 0xa1
# CHECK: memd(r17+#168) = r21:20
0x02 0xf4 0xd1 0xa9
@@ -14,6 +16,8 @@
# CHECK: memd(r17 ++ #40:circ(m1)) = r21:20
0x28 0xd4 0xd1 0xab
# CHECK: memd(r17++#40) = r21:20
+0x00 0x40 0x00 0x00 0xd5 0xfe 0xd1 0xad
+# CHECK: memd(r17<<#3 + ##21) = r31:30
0x00 0xf4 0xd1 0xad
# CHECK: memd(r17++m1) = r21:20
0x00 0xf4 0xd1 0xaf
@@ -50,6 +54,16 @@
0x03 0x40 0x45 0x85 0xaf 0xf4 0xd1 0xab
# CHECK: p3 = r5
# CHECK-NEXT: if (!p3.new) memd(r17++#40) = r21:20
+0x02 0x40 0x00 0x00 0xc3 0xd4 0xc2 0xaf
+# CHECK: if (p3) memd(##168) = r21:20
+0x02 0x40 0x00 0x00 0xc7 0xd4 0xc2 0xaf
+# CHECK: if (!p3) memd(##168) = r21:20
+0x03 0x40 0x45 0x85 0x02 0x40 0x00 0x00 0xc3 0xf4 0xc2 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memd(##168) = r21:20
+0x03 0x40 0x45 0x85 0x02 0x40 0x00 0x00 0xc7 0xf4 0xc2 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memd(##168) = r21:20
# Store byte
0x9f 0xf5 0x11 0x3b
@@ -57,6 +71,8 @@
0x9f 0xca 0x11 0x3c
# CHECK: memb(r17+#21)=#31
0x15 0xd5 0x00 0x48
+# CHECK: memb(#21) = r21
+0x00 0x40 0x00 0x00 0x15 0xd5 0x00 0x48
# CHECK: memb(##21) = r21
0x15 0xd5 0x11 0xa1
# CHECK: memb(r17+#21) = r21
@@ -66,6 +82,8 @@
# CHECK: memb(r17 ++ #5:circ(m1)) = r21
0x28 0xd5 0x11 0xab
# CHECK: memb(r17++#5) = r21
+0x00 0x40 0x00 0x00 0xd5 0xff 0x11 0xad
+# CHECK: memb(r17<<#3 + ##21) = r31
0x00 0xf5 0x11 0xad
# CHECK: memb(r17++m1) = r21
0x00 0xf5 0x11 0xaf
@@ -112,6 +130,16 @@
0x03 0x40 0x45 0x85 0xaf 0xf5 0x11 0xab
# CHECK: p3 = r5
# CHECK-NEXT: if (!p3.new) memb(r17++#5) = r21
+0x00 0x40 0x00 0x00 0xab 0xd5 0x01 0xaf
+# CHECK: if (p3) memb(##21) = r21
+0x00 0x40 0x00 0x00 0xaf 0xd5 0x01 0xaf
+# CHECK: if (!p3) memb(##21) = r21
+0x03 0x40 0x45 0x85 0x00 0x40 0x00 0x00 0xab 0xf5 0x01 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memb(##21) = r21
+0x03 0x40 0x45 0x85 0x00 0x40 0x00 0x00 0xaf 0xf5 0x01 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memb(##21) = r21
# Store halfword
0x9f 0xf5 0x51 0x3b
@@ -120,10 +148,14 @@
# CHECK: memh(r17 + r21<<#3) = r31.h
0x95 0xcf 0x31 0x3c
# CHECK: memh(r17+#62)=#21
+0x00 0x40 0x00 0x00 0x2a 0xd5 0x40 0x48
+# CHECK: memh(##42) = r21
+0x00 0x40 0x00 0x00 0x2a 0xd5 0x60 0x48
+# CHECK: memh(##42) = r21.h
0x2a 0xd5 0x40 0x48
-# CHECK: memh(##84) = r21
+# CHECK: memh(#84) = r21
0x2a 0xd5 0x60 0x48
-# CHECK: memh(##84) = r21.h
+# CHECK: memh(#84) = r21.h
0x15 0xdf 0x51 0xa1
# CHECK: memh(r17+#42) = r31
0x15 0xdf 0x71 0xa1
@@ -138,8 +170,12 @@
# CHECK: memh(r17 ++ #10:circ(m1)) = r21.h
0x28 0xd5 0x51 0xab
# CHECK: memh(r17++#10) = r21
+0x00 0x40 0x00 0x00 0xd5 0xff 0x51 0xad
+# CHECK: memh(r17<<#3 + ##21) = r31
0x28 0xd5 0x71 0xab
# CHECK: memh(r17++#10) = r21.h
+0x00 0x40 0x00 0x00 0xd5 0xff 0x71 0xad
+# CHECK: memh(r17<<#3 + ##21) = r31.h
0x00 0xf5 0x51 0xad
# CHECK: memh(r17++m1) = r21
0x00 0xf5 0x71 0xad
@@ -220,22 +256,48 @@
0x03 0x40 0x45 0x85 0xaf 0xf5 0x71 0xab
# CHECK: p3 = r5
# CHECK-NEXT: if (!p3.new) memh(r17++#10) = r21.h
+0x00 0x40 0x00 0x00 0xd3 0xd5 0x42 0xaf
+# CHECK: if (p3) memh(##42) = r21
+0x00 0x40 0x00 0x00 0xd3 0xd5 0x62 0xaf
+# CHECK: if (p3) memh(##42) = r21.h
+0x00 0x40 0x00 0x00 0xd7 0xd5 0x42 0xaf
+# CHECK: if (!p3) memh(##42) = r21
+0x00 0x40 0x00 0x00 0xd7 0xd5 0x62 0xaf
+# CHECK: if (!p3) memh(##42) = r21.h
+0x03 0x40 0x45 0x85 0x00 0x40 0x00 0x00 0xd3 0xf5 0x42 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memh(##42) = r21
+0x03 0x40 0x45 0x85 0x00 0x40 0x00 0x00 0xd3 0xf5 0x62 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memh(##42) = r21.h
+0x03 0x40 0x45 0x85 0x00 0x40 0x00 0x00 0xd7 0xf5 0x42 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memh(##42) = r21
+0x03 0x40 0x45 0x85 0x00 0x40 0x00 0x00 0xd7 0xf5 0x62 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memh(##42) = r21.h
# Store word
0x9f 0xf5 0x91 0x3b
# CHECK: memw(r17 + r21<<#3) = r31
0x9f 0xca 0x51 0x3c
+# CHECK: memw(r17{{ *}}+{{ *}}#84)=#31
+0x15 0xdf 0x80 0x48
+# CHECK: memw(#84) = r31
+0x01 0x40 0x00 0x00 0x14 0xd5 0x80 0x48
+# CHECK: memw(##84) = r21
+0x9f 0xca 0x51 0x3c
# CHECK: memw(r17+#84)=#31
0x15 0xdf 0x91 0xa1
# CHECK: memw(r17+#84) = r31
-0x14 0xd5 0x80 0x48
-# CHECK: memw(##80) = r21
0x02 0xf5 0x91 0xa9
# CHECK: memw(r17 ++ I:circ(m1)) = r21
0x28 0xf5 0x91 0xa9
# CHECK: memw(r17 ++ #20:circ(m1)) = r21
0x28 0xd5 0x91 0xab
# CHECK: memw(r17++#20) = r21
+0x00 0x40 0x00 0x00 0xd5 0xff 0x91 0xad
+# CHECK: memw(r17<<#3 + ##21) = r31
0x00 0xf5 0x91 0xad
# CHECK: memw(r17++m1) = r21
0x00 0xf5 0x91 0xaf
@@ -282,7 +344,17 @@
0x03 0x40 0x45 0x85 0xab 0xf5 0x91 0xab
# CHECK: p3 = r5
# CHECK-NEXT: if (p3.new) memw(r17++#20) = r21
+0x01 0x40 0x00 0x00 0xa3 0xd5 0x81 0xaf
+# CHECK: if (p3) memw(##84) = r21
+0x01 0x40 0x00 0x00 0xa7 0xd5 0x81 0xaf
+# CHECK: if (!p3) memw(##84) = r21
+0x03 0x40 0x45 0x85 0x01 0x40 0x00 0x00 0xa3 0xf5 0x81 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memw(##84) = r21
+0x03 0x40 0x45 0x85 0x01 0x40 0x00 0x00 0xa7 0xf5 0x81 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memw(##84) = r21
# Allocate stack frame
0x1f 0xc0 0x9d 0xa0
-# CHECK: allocframe(#248) \ No newline at end of file
+# CHECK: allocframe(#248)
diff --git a/test/MC/Disassembler/Hexagon/too_many_instructions.txt b/test/MC/Disassembler/Hexagon/too_many_instructions.txt
new file mode 100644
index 000000000000..2aaa22d37f91
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/too_many_instructions.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple=hexagon -disassemble < %s 2>&1 | FileCheck %s
+
+#CHECK: warning: invalid instruction encoding
+0x00 0x40 0x00 0x7f 0x00 0x40 0x00 0x7f 0x00 0x40 0x00 0x7f 0x00 0x40 0x00 0x7f 0x00 0xc0 0x00 0x7f \ No newline at end of file
diff --git a/test/MC/Disassembler/Hexagon/too_many_loop_ends.txt b/test/MC/Disassembler/Hexagon/too_many_loop_ends.txt
new file mode 100644
index 000000000000..55a6f0b3692e
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/too_many_loop_ends.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple=hexagon -disassemble < %s 2>&1 | FileCheck %s
+
+#CHECK: warning: invalid instruction encoding
+0x00 0x80 0x00 0x7f 0x00 0x80 0x00 0x7f 0x00 0x80 0x00 0x7f 0x00 0xc0 0x00 0x7f \ No newline at end of file
diff --git a/test/MC/Disassembler/Hexagon/unextendable.txt b/test/MC/Disassembler/Hexagon/unextendable.txt
new file mode 100644
index 000000000000..377c123177c6
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/unextendable.txt
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple=hexagon -disassemble < %s 2>&1 | FileCheck %s
+
+#Invalid immediate extend duplex load/load
+#CHECK: warning: invalid instruction encoding
+0xfe 0x40 0x00 0x00 0x11 0x00 0x00 0x00
+
+#Invalid immediate extend barrier
+#CHECK: warning: invalid instruction encoding
+0xfe 0x40 0x00 0x00 0x00 0xc0 0x00 0xa8 \ No newline at end of file
diff --git a/test/MC/Disassembler/Mips/dsp/valid-el.txt b/test/MC/Disassembler/Mips/dsp/valid-el.txt
new file mode 100644
index 000000000000..56f363b62a21
--- /dev/null
+++ b/test/MC/Disassembler/Mips/dsp/valid-el.txt
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -triple=mipsel-unknown-linux -mattr=+dsp -disassemble < %s | FileCheck %s
+
+0x10 0xa8 0x60 0x00 # CHECK: mfhi $21, $ac3
+0x12 0xa8 0x60 0x00 # CHECK: mflo $21, $ac3
+0x11 0x18 0xa0 0x02 # CHECK: mthi $21, $ac3
+0x13 0x18 0xa0 0x02 # CHECK: mtlo $21, $ac3
+0x8a 0x51 0x54 0x7f # CHECK: lbux $10, $20($26)
+0x0a 0x59 0x75 0x7f # CHECK: lhx $11, $21($27)
+0x0a 0x60 0x96 0x7f # CHECK: lwx $12, $22($gp)
+0xb8 0x0e 0x30 0x7c # CHECK: shilo $ac1, 3
+0xf8 0x14 0xa0 0x7c # CHECK: wrdsp $5, 2
+0xf8 0xfc 0xa0 0x7c # CHECK: wrdsp $5
diff --git a/test/MC/Disassembler/Mips/dsp/valid.txt b/test/MC/Disassembler/Mips/dsp/valid.txt
new file mode 100644
index 000000000000..e6ca900dde55
--- /dev/null
+++ b/test/MC/Disassembler/Mips/dsp/valid.txt
@@ -0,0 +1,125 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mattr=dsp | FileCheck %s
+
+ 0x7c 0x02 0x0a 0x52 # CHECK: absq_s.ph $1, $2
+ 0x7c 0x06 0x2c 0x52 # CHECK: absq_s.w $5, $6
+ 0x7d 0x09 0x3a 0x90 # CHECK: addq.ph $7, $8, $9
+ 0x7d 0x6c 0x53 0x90 # CHECK: addq_s.ph $10, $11, $12
+ 0x7d 0xcf 0x6d 0x90 # CHECK: addq_s.w $13, $14, $15
+ 0x7f 0xbe 0xe4 0x10 # CHECK: addsc $gp, $sp, $fp
+ 0x7c 0xe8 0x30 0x10 # CHECK: addu.qb $6, $7, $8
+ 0x7d 0x4b 0x49 0x10 # CHECK: addu_s.qb $9, $10, $11
+ 0x7d 0xae 0x64 0x50 # CHECK: addwc $12, $13, $14
+ 0x7c 0x1a 0xce 0xd2 # CHECK: bitrev $25, $26
+ 0x04 0x1c 0x14 0x9b # CHECK: bposge32 21104
+ 0x7f 0x7c 0x02 0x11 # CHECK: cmp.eq.ph $27, $gp
+ 0x7f 0xbe 0x02 0x51 # CHECK: cmp.lt.ph $sp, $fp
+ 0x7f 0xe1 0x02 0x91 # CHECK: cmp.le.ph $ra, $1
+ 0x7d 0x8d 0x59 0x11 # CHECK: cmpgu.eq.qb $11, $12, $13
+ 0x7d 0xf0 0x71 0x51 # CHECK: cmpgu.lt.qb $14, $15, $16
+ 0x7e 0x53 0x89 0x91 # CHECK: cmpgu.le.qb $17, $18, $19
+ 0x7e 0x95 0x00 0x11 # CHECK: cmpu.eq.qb $20, $21
+ 0x7e 0xd7 0x00 0x51 # CHECK: cmpu.lt.qb $22, $23
+ 0x7f 0x19 0x00 0x91 # CHECK: cmpu.le.qb $24, $25
+ 0x7c 0x22 0x09 0x30 # CHECK: dpaq_s.w.ph $ac1, $1, $2
+ 0x7c 0x64 0x13 0x30 # CHECK: dpaq_sa.l.w $ac2, $3, $4
+ 0x7d 0x2a 0x08 0xf0 # CHECK: dpau.h.qbl $ac1, $9, $10
+ 0x7d 0x6c 0x09 0xf0 # CHECK: dpau.h.qbr $ac1, $11, $12
+ 0x7e 0x32 0x01 0x70 # CHECK: dpsq_s.w.ph $ac0, $17, $18
+ 0x7e 0x74 0x0b 0x70 # CHECK: dpsq_sa.l.w $ac1, $19, $20
+ 0x7c 0xa6 0x02 0xf0 # CHECK: dpsu.h.qbl $ac0, $5, $6
+ 0x7c 0xe8 0x0b 0xf0 # CHECK: dpsu.h.qbr $ac1, $7, $8
+ 0x7f 0xe1 0x00 0xb8 # CHECK: extp $1, $ac0, 31
+ 0x7c 0x02 0x0a 0xb8 # CHECK: extpdp $2, $ac1, 0
+ 0x7c 0x83 0x12 0xf8 # CHECK: extpdpv $3, $ac2, $4
+ 0x7c 0xc5 0x18 0xf8 # CHECK: extpv $5, $ac3, $6
+ 0x7f 0xe7 0x00 0x38 # CHECK: extr.w $7, $ac0, 31
+ 0x7d 0xe8 0x09 0x38 # CHECK: extr_r.w $8, $ac1, 15
+ 0x7c 0xe9 0x11 0xb8 # CHECK: extr_rs.w $9, $ac2, 7
+ 0x7c 0x6a 0x1b 0xb8 # CHECK: extr_s.h $10, $ac3, 3
+ 0x7d 0x8b 0x00 0x78 # CHECK: extrv.w $11, $ac0, $12
+ 0x7d 0xcd 0x09 0x78 # CHECK: extrv_r.w $13, $ac1, $14
+ 0x7e 0x0f 0x11 0xf8 # CHECK: extrv_rs.w $15, $ac2, $16
+ 0x7e 0x51 0x1b 0xf8 # CHECK: extrv_s.h $17, $ac3, $18
+ 0x7e 0x93 0x00 0x0c # CHECK: insv $19, $20
+ 0x7f 0x54 0x51 0x8a # CHECK: lbux $10, $20($26)
+ 0x7f 0x75 0x59 0x0a # CHECK: lhx $11, $21($27)
+ 0x7f 0x96 0x60 0x0a # CHECK: lwx $12, $22($gp)
+ 0x70 0xc7 0x08 0x00 # CHECK: madd $ac1, $6, $7
+ 0x71 0x09 0x08 0x01 # CHECK: maddu $ac1, $8, $9
+ 0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
+ 0x71 0x09 0x00 0x01 # CHECK: maddu $8, $9
+ 0x7c 0x64 0x15 0x30 # CHECK: maq_s.w.phl $ac2, $3, $4
+ 0x7c 0xa6 0x1c 0x30 # CHECK: maq_sa.w.phl $ac3, $5, $6
+ 0x7c 0xe8 0x05 0xb0 # CHECK: maq_s.w.phr $ac0, $7, $8
+ 0x7d 0x2a 0x0c 0xb0 # CHECK: maq_sa.w.phr $ac1, $9, $10
+ 0x00 0x20 0x70 0x10 # CHECK: mfhi $14, $ac1
+ 0x00 0x20 0x78 0x12 # CHECK: mflo $15, $ac1
+ 0x00 0x00 0x70 0x10 # CHECK: mfhi $14
+ 0x00 0x00 0x78 0x12 # CHECK: mflo $15
+ 0x7d 0x8d 0x5c 0x90 # CHECK: modsub $11, $12, $13
+ 0x71 0x4b 0x18 0x04 # CHECK: msub $ac3, $10, $11
+ 0x71 0x8d 0x10 0x05 # CHECK: msubu $ac2, $12, $13
+ 0x71 0x4b 0x00 0x04 # CHECK: msub $10, $11
+ 0x71 0x8d 0x00 0x05 # CHECK: msubu $12, $13
+ 0x02 0x00 0x18 0x11 # CHECK: mthi $16, $ac3
+ 0x02 0x00 0x00 0x11 # CHECK: mthi $16
+ 0x7d 0xc0 0x17 0xf8 # CHECK: mthlip $14, $ac2
+ 0x02 0x20 0x10 0x13 # CHECK: mtlo $17, $ac2
+ 0x02 0x20 0x00 0x13 # CHECK: mtlo $17
+ 0x7e 0xd7 0xaf 0x10 # CHECK: muleq_s.w.phl $21, $22, $23
+ 0x7f 0x3a 0xc7 0x50 # CHECK: muleq_s.w.phr $24, $25, $26
+ 0x7f 0x9d 0xd9 0x90 # CHECK: muleu_s.ph.qbl $27, $gp, $sp
+ 0x7f 0xe1 0xf1 0xd0 # CHECK: muleu_s.ph.qbr $fp, $ra, $1
+ 0x7c 0x64 0x17 0xd0 # CHECK: mulq_rs.ph $2, $3, $4
+ 0x7e 0x11 0x01 0xb0 # CHECK: mulsaq_s.w.ph $ac0, $16, $17
+ 0x00 0x43 0x18 0x18 # CHECK: mult $ac3, $2, $3
+ 0x00 0x85 0x10 0x19 # CHECK: multu $ac2, $4, $5
+ 0x00 0x43 0x00 0x18 # CHECK: mult $2, $3
+ 0x00 0x85 0x00 0x19 # CHECK: multu $4, $5
+ 0x7e 0x74 0x93 0x91 # CHECK: packrl.ph $18, $19, $20
+ 0x7d 0xe3 0x3a 0xd1 # CHECK: pick.ph $7, $15, $3
+ 0x7c 0x88 0x10 0xd1 # CHECK: pick.qb $2, $4, $8
+ 0x7c 0x15 0xa3 0x12 # CHECK: preceq.w.phl $20, $21
+ 0x7c 0x16 0xab 0x52 # CHECK: preceq.w.phr $21, $22
+ 0x7c 0x17 0xb1 0x12 # CHECK: precequ.ph.qbl $22, $23
+ 0x7c 0x19 0xc1 0x92 # CHECK: precequ.ph.qbla $24, $25
+ 0x7c 0x18 0xb9 0x52 # CHECK: precequ.ph.qbr $23, $24
+ 0x7c 0x1a 0xc9 0xd2 # CHECK: precequ.ph.qbra $25, $26
+ 0x7c 0x1b 0xd7 0x12 # CHECK: preceu.ph.qbl $26, $27
+ 0x7c 0x1d 0xe7 0x92 # CHECK: preceu.ph.qbla $gp, $sp
+ 0x7c 0x1c 0xdf 0x52 # CHECK: preceu.ph.qbr $27, $gp
+ 0x7c 0x1e 0xef 0xd2 # CHECK: preceu.ph.qbra $sp, $fp
+ 0x7e 0x53 0x8d 0x11 # CHECK: precrq.ph.w $17, $18, $19
+ 0x7e 0x32 0x83 0x11 # CHECK: precrq.qb.ph $16, $17, $18
+ 0x7e 0x95 0x9b 0xd1 # CHECK: precrqu_s.qb.ph $19, $20, $21
+ 0x7e 0x74 0x95 0x51 # CHECK: precrq_rs.ph.w $18, $19, $20
+ 0x7c 0x40 0x0d 0x10 # CHECK: raddu.w.qb $1, $2
+ 0x7d 0x00 0x2c 0xb8 # CHECK: rddsp $5, 256
+ 0x7c 0x0c 0x12 0x92 # CHECK: repl.ph $2, 12
+ 0x7c 0x55 0x08 0x92 # CHECK: repl.qb $1, 85
+ 0x7c 0x02 0x0a 0xd2 # CHECK: replv.ph $1, $2
+ 0x7c 0x02 0x08 0xd2 # CHECK: replv.qb $1, $2
+ 0x7d 0x00 0x0e 0xb8 # CHECK: shilo $ac1, 16
+ 0x7c 0x40 0x0e 0xf8 # CHECK: shilov $ac1, $2
+ 0x7c 0x62 0x0a 0x13 # CHECK: shll.ph $1, $2, 3
+ 0x7c 0x62 0x0b 0x13 # CHECK: shll_s.ph $1, $2, 3
+ 0x7c 0x62 0x08 0x13 # CHECK: shll.qb $1, $2, 3
+ 0x7c 0x62 0x0a 0x93 # CHECK: shllv.ph $1, $2, $3
+ 0x7c 0x62 0x0b 0x93 # CHECK: shllv_s.ph $1, $2, $3
+ 0x7c 0x62 0x08 0x93 # CHECK: shllv.qb $1, $2, $3
+ 0x7c 0x62 0x0d 0x93 # CHECK: shllv_s.w $1, $2, $3
+ 0x7c 0x62 0x0d 0x13 # CHECK: shll_s.w $1, $2, 3
+ 0x7c 0x22 0x2a 0x53 # CHECK: shra.ph $5, $2, 1
+ 0x7c 0x22 0x2b 0x53 # CHECK: shra_r.ph $5, $2, 1
+ 0x7c 0x62 0x0a 0xd3 # CHECK: shrav.ph $1, $2, $3
+ 0x7c 0x62 0x0b 0xd3 # CHECK: shrav_r.ph $1, $2, $3
+ 0x7c 0x62 0x0d 0xd3 # CHECK: shrav_r.w $1, $2, $3
+ 0x7c 0x22 0x0d 0x53 # CHECK: shra_r.w $1, $2, 1
+ 0x7c 0x42 0x08 0x53 # CHECK: shrl.qb $1, $2, 2
+ 0x7c 0x62 0x08 0xd3 # CHECK: shrlv.qb $1, $2, $3
+ 0x7c 0x43 0x0a 0xd0 # CHECK: subq.ph $1, $2, $3
+ 0x7c 0x43 0x0b 0xd0 # CHECK: subq_s.ph $1, $2, $3
+ 0x7c 0x43 0x0d 0xd0 # CHECK: subq_s.w $1, $2, $3
+ 0x7c 0x43 0x08 0x50 # CHECK: subu.qb $1, $2, $3
+ 0x7c 0x43 0x09 0x50 # CHECK: subu_s.qb $1, $2, $3
+ 0x7c 0x20 0x04 0xf8 # CHECK: wrdsp $1, 0
diff --git a/test/MC/Disassembler/Mips/dspr2/valid.txt b/test/MC/Disassembler/Mips/dspr2/valid.txt
new file mode 100644
index 000000000000..b1b5a332dc56
--- /dev/null
+++ b/test/MC/Disassembler/Mips/dspr2/valid.txt
@@ -0,0 +1,173 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mattr=dspr2 | FileCheck %s
+
+ 0x7c 0x02 0x0a 0x52 # CHECK: absq_s.ph $1, $2
+ 0x7c 0x04 0x18 0x52 # CHECK: absq_s.qb $3, $4
+ 0x7c 0x06 0x2c 0x52 # CHECK: absq_s.w $5, $6
+ 0x7d 0x09 0x3a 0x90 # CHECK: addq.ph $7, $8, $9
+ 0x7d 0x6c 0x53 0x90 # CHECK: addq_s.ph $10, $11, $12
+ 0x7d 0xcf 0x6d 0x90 # CHECK: addq_s.w $13, $14, $15
+ 0x7e 0x32 0x82 0x18 # CHECK: addqh.ph $16, $17, $18
+ 0x7e 0x95 0x9a 0x98 # CHECK: addqh_r.ph $19, $20, $21
+ 0x7e 0xf8 0xb4 0x18 # CHECK: addqh.w $22, $23, $24
+ 0x7f 0x5b 0xcc 0x98 # CHECK: addqh_r.w $25, $26, $27
+ 0x7f 0xbe 0xe4 0x10 # CHECK: addsc $gp, $sp, $fp
+ 0x7c 0x22 0xfa 0x10 # CHECK: addu.ph $ra, $1, $2
+ 0x7c 0x85 0x1b 0x10 # CHECK: addu_s.ph $3, $4, $5
+ 0x7c 0xe8 0x30 0x10 # CHECK: addu.qb $6, $7, $8
+ 0x7d 0x4b 0x49 0x10 # CHECK: addu_s.qb $9, $10, $11
+ 0x7d 0xae 0x64 0x50 # CHECK: addwc $12, $13, $14
+ 0x7e 0x11 0x78 0x18 # CHECK: adduh.qb $15, $16, $17
+ 0x7e 0x74 0x90 0x98 # CHECK: adduh_r.qb $18, $19, $20
+ 0x7e 0xd5 0x00 0x31 # CHECK: append $21, $22, 0
+ 0x7f 0x17 0x1c 0x31 # CHECK: balign $23, $24, 3
+ 0x7c 0x1a 0xce 0xd2 # CHECK: bitrev $25, $26
+ 0x04 0x1c 0x14 0x9b # CHECK: bposge32 21104
+ 0x7f 0x7c 0x02 0x11 # CHECK: cmp.eq.ph $27, $gp
+ 0x7f 0xbe 0x02 0x51 # CHECK: cmp.lt.ph $sp, $fp
+ 0x7f 0xe1 0x02 0x91 # CHECK: cmp.le.ph $ra, $1
+ 0x7c 0x64 0x16 0x11 # CHECK: cmpgdu.eq.qb $2, $3, $4
+ 0x7c 0xc7 0x2e 0x51 # CHECK: cmpgdu.lt.qb $5, $6, $7
+ 0x7d 0x2a 0x46 0x91 # CHECK: cmpgdu.le.qb $8, $9, $10
+ 0x7d 0x8d 0x59 0x11 # CHECK: cmpgu.eq.qb $11, $12, $13
+ 0x7d 0xf0 0x71 0x51 # CHECK: cmpgu.lt.qb $14, $15, $16
+ 0x7e 0x53 0x89 0x91 # CHECK: cmpgu.le.qb $17, $18, $19
+ 0x7e 0x95 0x00 0x11 # CHECK: cmpu.eq.qb $20, $21
+ 0x7e 0xd7 0x00 0x51 # CHECK: cmpu.lt.qb $22, $23
+ 0x7f 0x19 0x00 0x91 # CHECK: cmpu.le.qb $24, $25
+ 0x7f 0x5b 0x00 0x30 # CHECK: dpa.w.ph $ac0, $26, $27
+ 0x7c 0x22 0x09 0x30 # CHECK: dpaq_s.w.ph $ac1, $1, $2
+ 0x7c 0x64 0x13 0x30 # CHECK: dpaq_sa.l.w $ac2, $3, $4
+ 0x7c 0xa6 0x1e 0x30 # CHECK: dpaqx_s.w.ph $ac3, $5, $6
+ 0x7c 0xe8 0x06 0xb0 # CHECK: dpaqx_sa.w.ph $ac0, $7, $8
+ 0x7d 0x2a 0x08 0xf0 # CHECK: dpau.h.qbl $ac1, $9, $10
+ 0x7d 0x6c 0x09 0xf0 # CHECK: dpau.h.qbr $ac1, $11, $12
+ 0x7d 0xae 0x12 0x30 # CHECK: dpax.w.ph $ac2, $13, $14
+ 0x7d 0xf0 0x18 0x70 # CHECK: dps.w.ph $ac3, $15, $16
+ 0x7e 0x32 0x01 0x70 # CHECK: dpsq_s.w.ph $ac0, $17, $18
+ 0x7e 0x74 0x0b 0x70 # CHECK: dpsq_sa.l.w $ac1, $19, $20
+ 0x7c 0x22 0x16 0x70 # CHECK: dpsqx_s.w.ph $ac2, $1, $2
+ 0x7c 0x64 0x1e 0xf0 # CHECK: dpsqx_sa.w.ph $ac3, $3, $4
+ 0x7c 0xa6 0x02 0xf0 # CHECK: dpsu.h.qbl $ac0, $5, $6
+ 0x7c 0xe8 0x0b 0xf0 # CHECK: dpsu.h.qbr $ac1, $7, $8
+ 0x7d 0x2a 0x12 0x70 # CHECK: dpsx.w.ph $ac2, $9, $10
+ 0x7f 0xe1 0x00 0xb8 # CHECK: extp $1, $ac0, 31
+ 0x7c 0x02 0x0a 0xb8 # CHECK: extpdp $2, $ac1, 0
+ 0x7c 0x83 0x12 0xf8 # CHECK: extpdpv $3, $ac2, $4
+ 0x7c 0xc5 0x18 0xf8 # CHECK: extpv $5, $ac3, $6
+ 0x7f 0xe7 0x00 0x38 # CHECK: extr.w $7, $ac0, 31
+ 0x7d 0xe8 0x09 0x38 # CHECK: extr_r.w $8, $ac1, 15
+ 0x7c 0xe9 0x11 0xb8 # CHECK: extr_rs.w $9, $ac2, 7
+ 0x7c 0x6a 0x1b 0xb8 # CHECK: extr_s.h $10, $ac3, 3
+ 0x7d 0x8b 0x00 0x78 # CHECK: extrv.w $11, $ac0, $12
+ 0x7d 0xcd 0x09 0x78 # CHECK: extrv_r.w $13, $ac1, $14
+ 0x7e 0x0f 0x11 0xf8 # CHECK: extrv_rs.w $15, $ac2, $16
+ 0x7e 0x51 0x1b 0xf8 # CHECK: extrv_s.h $17, $ac3, $18
+ 0x7e 0x93 0x00 0x0c # CHECK: insv $19, $20
+ 0x7f 0x54 0x51 0x8a # CHECK: lbux $10, $20($26)
+ 0x7f 0x75 0x59 0x0a # CHECK: lhx $11, $21($27)
+ 0x7f 0x96 0x60 0x0a # CHECK: lwx $12, $22($gp)
+ 0x70 0xc7 0x08 0x00 # CHECK: madd $ac1, $6, $7
+ 0x71 0x09 0x08 0x01 # CHECK: maddu $ac1, $8, $9
+ 0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
+ 0x71 0x09 0x00 0x01 # CHECK: maddu $8, $9
+ 0x7c 0x64 0x15 0x30 # CHECK: maq_s.w.phl $ac2, $3, $4
+ 0x7c 0xa6 0x1c 0x30 # CHECK: maq_sa.w.phl $ac3, $5, $6
+ 0x7c 0xe8 0x05 0xb0 # CHECK: maq_s.w.phr $ac0, $7, $8
+ 0x7d 0x2a 0x0c 0xb0 # CHECK: maq_sa.w.phr $ac1, $9, $10
+ 0x00 0x20 0x70 0x10 # CHECK: mfhi $14, $ac1
+ 0x00 0x20 0x78 0x12 # CHECK: mflo $15, $ac1
+ 0x00 0x00 0x70 0x10 # CHECK: mfhi $14
+ 0x00 0x00 0x78 0x12 # CHECK: mflo $15
+ 0x7d 0x8d 0x5c 0x90 # CHECK: modsub $11, $12, $13
+ 0x71 0x4b 0x18 0x04 # CHECK: msub $ac3, $10, $11
+ 0x71 0x8d 0x10 0x05 # CHECK: msubu $ac2, $12, $13
+ 0x71 0x4b 0x00 0x04 # CHECK: msub $10, $11
+ 0x71 0x8d 0x00 0x05 # CHECK: msubu $12, $13
+ 0x02 0x00 0x18 0x11 # CHECK: mthi $16, $ac3
+ 0x02 0x00 0x00 0x11 # CHECK: mthi $16
+ 0x7d 0xc0 0x17 0xf8 # CHECK: mthlip $14, $ac2
+ 0x02 0x20 0x10 0x13 # CHECK: mtlo $17, $ac2
+ 0x02 0x20 0x00 0x13 # CHECK: mtlo $17
+ 0x7e 0x11 0x7b 0x18 # CHECK: mul.ph $15, $16, $17
+ 0x7e 0x74 0x93 0x98 # CHECK: mul_s.ph $18, $19, $20
+ 0x7e 0xd7 0xaf 0x10 # CHECK: muleq_s.w.phl $21, $22, $23
+ 0x7f 0x3a 0xc7 0x50 # CHECK: muleq_s.w.phr $24, $25, $26
+ 0x7f 0x9d 0xd9 0x90 # CHECK: muleu_s.ph.qbl $27, $gp, $sp
+ 0x7f 0xe1 0xf1 0xd0 # CHECK: muleu_s.ph.qbr $fp, $ra, $1
+ 0x7c 0x64 0x17 0xd0 # CHECK: mulq_rs.ph $2, $3, $4
+ 0x7c 0xc7 0x2d 0xd8 # CHECK: mulq_rs.w $5, $6, $7
+ 0x7d 0x2a 0x47 0x90 # CHECK: mulq_s.ph $8, $9, $10
+ 0x7d 0x8d 0x5d 0x98 # CHECK: mulq_s.w $11, $12, $13
+ 0x7d 0xcf 0x18 0xb0 # CHECK: mulsa.w.ph $ac3, $14, $15
+ 0x7e 0x11 0x01 0xb0 # CHECK: mulsaq_s.w.ph $ac0, $16, $17
+ 0x00 0x43 0x18 0x18 # CHECK: mult $ac3, $2, $3
+ 0x00 0x85 0x10 0x19 # CHECK: multu $ac2, $4, $5
+ 0x00 0x43 0x00 0x18 # CHECK: mult $2, $3
+ 0x00 0x85 0x00 0x19 # CHECK: multu $4, $5
+ 0x7e 0x74 0x93 0x91 # CHECK: packrl.ph $18, $19, $20
+ 0x7d 0xe3 0x3a 0xd1 # CHECK: pick.ph $7, $15, $3
+ 0x7c 0x88 0x10 0xd1 # CHECK: pick.qb $2, $4, $8
+ 0x7c 0x15 0xa3 0x12 # CHECK: preceq.w.phl $20, $21
+ 0x7c 0x16 0xab 0x52 # CHECK: preceq.w.phr $21, $22
+ 0x7c 0x17 0xb1 0x12 # CHECK: precequ.ph.qbl $22, $23
+ 0x7c 0x19 0xc1 0x92 # CHECK: precequ.ph.qbla $24, $25
+ 0x7c 0x18 0xb9 0x52 # CHECK: precequ.ph.qbr $23, $24
+ 0x7c 0x1a 0xc9 0xd2 # CHECK: precequ.ph.qbra $25, $26
+ 0x7c 0x1b 0xd7 0x12 # CHECK: preceu.ph.qbl $26, $27
+ 0x7c 0x1d 0xe7 0x92 # CHECK: preceu.ph.qbla $gp, $sp
+ 0x7c 0x1c 0xdf 0x52 # CHECK: preceu.ph.qbr $27, $gp
+ 0x7c 0x1e 0xef 0xd2 # CHECK: preceu.ph.qbra $sp, $fp
+ 0x7f 0x19 0xbb 0x51 # CHECK: precr.qb.ph $23, $24, $25
+ 0x7f 0x38 0x07 0x91 # CHECK: precr_sra.ph.w $24, $25, 0
+ 0x7f 0x38 0xff 0x91 # CHECK: precr_sra.ph.w $24, $25, 31
+ 0x7f 0x59 0x07 0xd1 # CHECK: precr_sra_r.ph.w $25, $26, 0
+ 0x7f 0x59 0xff 0xd1 # CHECK: precr_sra_r.ph.w $25, $26, 31
+ 0x7e 0x53 0x8d 0x11 # CHECK: precrq.ph.w $17, $18, $19
+ 0x7e 0x32 0x83 0x11 # CHECK: precrq.qb.ph $16, $17, $18
+ 0x7e 0x95 0x9b 0xd1 # CHECK: precrqu_s.qb.ph $19, $20, $21
+ 0x7e 0x74 0x95 0x51 # CHECK: precrq_rs.ph.w $18, $19, $20
+ 0x7c 0x41 0x18 0x71 # CHECK: prepend $1, $2, 3
+ 0x7c 0x40 0x0d 0x10 # CHECK: raddu.w.qb $1, $2
+ 0x7d 0x00 0x2c 0xb8 # CHECK: rddsp $5, 256
+ 0x7c 0x0c 0x12 0x92 # CHECK: repl.ph $2, 12
+ 0x7c 0x55 0x08 0x92 # CHECK: repl.qb $1, 85
+ 0x7c 0x02 0x0a 0xd2 # CHECK: replv.ph $1, $2
+ 0x7c 0x02 0x08 0xd2 # CHECK: replv.qb $1, $2
+ 0x7d 0x00 0x0e 0xb8 # CHECK: shilo $ac1, 16
+ 0x7c 0x40 0x0e 0xf8 # CHECK: shilov $ac1, $2
+ 0x7c 0x62 0x0a 0x13 # CHECK: shll.ph $1, $2, 3
+ 0x7c 0x62 0x0b 0x13 # CHECK: shll_s.ph $1, $2, 3
+ 0x7c 0x62 0x08 0x13 # CHECK: shll.qb $1, $2, 3
+ 0x7c 0x62 0x0a 0x93 # CHECK: shllv.ph $1, $2, $3
+ 0x7c 0x62 0x0b 0x93 # CHECK: shllv_s.ph $1, $2, $3
+ 0x7c 0x62 0x08 0x93 # CHECK: shllv.qb $1, $2, $3
+ 0x7c 0x62 0x0d 0x93 # CHECK: shllv_s.w $1, $2, $3
+ 0x7c 0x62 0x0d 0x13 # CHECK: shll_s.w $1, $2, 3
+ 0x7c 0x50 0x11 0x13 # CHECK: shra.qb $2, $16, 2
+ 0x7c 0x50 0x11 0x53 # CHECK: shra_r.qb $2, $16, 2
+ 0x7c 0x22 0x2a 0x53 # CHECK: shra.ph $5, $2, 1
+ 0x7c 0x22 0x2b 0x53 # CHECK: shra_r.ph $5, $2, 1
+ 0x7c 0x62 0x0a 0xd3 # CHECK: shrav.ph $1, $2, $3
+ 0x7c 0x62 0x0b 0xd3 # CHECK: shrav_r.ph $1, $2, $3
+ 0x7c 0x62 0x09 0x93 # CHECK: shrav.qb $1, $2, $3
+ 0x7c 0x62 0x09 0xd3 # CHECK: shrav_r.qb $1, $2, $3
+ 0x7c 0x62 0x0d 0xd3 # CHECK: shrav_r.w $1, $2, $3
+ 0x7c 0x22 0x0d 0x53 # CHECK: shra_r.w $1, $2, 1
+ 0x7c 0x42 0x0e 0x53 # CHECK: shrl.ph $1, $2, 2
+ 0x7c 0x42 0x08 0x53 # CHECK: shrl.qb $1, $2, 2
+ 0x7c 0x62 0x0e 0xd3 # CHECK: shrlv.ph $1, $2, $3
+ 0x7c 0x62 0x08 0xd3 # CHECK: shrlv.qb $1, $2, $3
+ 0x7c 0x43 0x0a 0xd0 # CHECK: subq.ph $1, $2, $3
+ 0x7c 0x43 0x0b 0xd0 # CHECK: subq_s.ph $1, $2, $3
+ 0x7c 0x43 0x0d 0xd0 # CHECK: subq_s.w $1, $2, $3
+ 0x7c 0x43 0x0a 0x58 # CHECK: subqh.ph $1, $2, $3
+ 0x7c 0x43 0x0a 0xd8 # CHECK: subqh_r.ph $1, $2, $3
+ 0x7c 0x43 0x0c 0x58 # CHECK: subqh.w $1, $2, $3
+ 0x7c 0x43 0x0c 0xd8 # CHECK: subqh_r.w $1, $2, $3
+ 0x7c 0x49 0x32 0x50 # CHECK: subu.ph $6, $2, $9
+ 0x7c 0x64 0x13 0x50 # CHECK: subu_s.ph $2, $3, $4
+ 0x7c 0x43 0x08 0x50 # CHECK: subu.qb $1, $2, $3
+ 0x7c 0x43 0x09 0x50 # CHECK: subu_s.qb $1, $2, $3
+ 0x7c 0x43 0x08 0x58 # CHECK: subuh.qb $1, $2, $3
+ 0x7c 0x43 0x08 0xd8 # CHECK: subuh_r.qb $1, $2, $3
+ 0x7c 0x20 0x04 0xf8 # CHECK: wrdsp $1, 0
diff --git a/test/MC/Disassembler/Mips/eva/valid_R6-eva.txt b/test/MC/Disassembler/Mips/eva/valid_R6-eva.txt
new file mode 100644
index 000000000000..b9525ba1206f
--- /dev/null
+++ b/test/MC/Disassembler/Mips/eva/valid_R6-eva.txt
@@ -0,0 +1,38 @@
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips32r6 -mattr=eva | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r6 -mattr=eva | FileCheck %s
+# CHECK: .text
+0x7c 0xff 0x7f 0x9b # CHECK: cachee 31, 255($7)
+0x7c 0x80 0x80 0x1b # CHECK: cachee 0, -256($4)
+0x7c 0x85 0xba 0x1b # CHECK: cachee 5, -140($4)
+0x7f 0x2a 0x80 0x2c # CHECK: lbe $10, -256($25)
+0x7d 0xed 0x7f 0xac # CHECK: lbe $13, 255($15)
+0x7d 0xcb 0x49 0x2c # CHECK: lbe $11, 146($14)
+0x7c 0x6d 0x80 0x28 # CHECK: lbue $13, -256($3)
+0x7c 0x4d 0x7f 0xa8 # CHECK: lbue $13, 255($2)
+0x7c 0x6d 0xa1 0x28 # CHECK: lbue $13, -190($3)
+0x7e 0xad 0x80 0x2d # CHECK: lhe $13, -256($21)
+0x7e 0x0c 0x7f 0xad # CHECK: lhe $12, 255($16)
+0x7e 0x0d 0x28 0xad # CHECK: lhe $13, 81($16)
+0x7c 0x72 0x80 0x29 # CHECK: lhue $18, -256($3)
+0x7c 0x72 0x7f 0xa9 # CHECK: lhue $18, 255($3)
+0x7c 0x56 0xac 0x29 # CHECK: lhue $22, -168($2)
+0x7e 0xa2 0x80 0x2e # CHECK: lle $2, -256($21)
+0x7e 0x63 0x7f 0xae # CHECK: lle $3, 255($19)
+0x7e 0xc3 0xdc 0xae # CHECK: lle $3, -71($22)
+0x7c 0x4e 0x80 0x23 # CHECK: prefe 14, -256($2)
+0x7c 0x6b 0x7f 0xa3 # CHECK: prefe 11, 255($3)
+0x7c 0x6e 0xed 0xa3 # CHECK: prefe 14, -37($3)
+0x7d 0x71 0x7f 0x9c # CHECK: sbe $17, 255($11)
+0x7d 0x51 0x80 0x1c # CHECK: sbe $17, -256($10)
+0x7d 0xd3 0x00 0x1c # CHECK: sbe $19, 0($14)
+0x7e 0x49 0x7f 0x9e # CHECK: sce $9, 255($18)
+0x7e 0xac 0x80 0x1e # CHECK: sce $12, -256($21)
+0x7e 0xed 0xf0 0x9e # CHECK: sce $13, -31($23)
+0x7d 0xee 0x7f 0x9d # CHECK: she $14, 255($15)
+0x7d 0xee 0x80 0x1d # CHECK: she $14, -256($15)
+0x7d 0x69 0x75 0x9d # CHECK: she $9, 235($11)
+0x7f 0xbf 0x7f 0x9f # CHECK: swe $ra, 255($sp)
+0x7f 0xbf 0x80 0x1f # CHECK: swe $ra, -256($sp)
+0x7f 0xbf 0xe5 0x9f # CHECK: swe $ra, -53($sp)
+0x42 0x00 0x00 0x03 # CHECK: tlbinv
+0x42 0x00 0x00 0x04 # CHECK: tlbinvf
diff --git a/test/MC/Disassembler/Mips/eva/valid_preR6-eva.txt b/test/MC/Disassembler/Mips/eva/valid_preR6-eva.txt
new file mode 100644
index 000000000000..f364433c1c4e
--- /dev/null
+++ b/test/MC/Disassembler/Mips/eva/valid_preR6-eva.txt
@@ -0,0 +1,54 @@
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips32r2 -mattr=eva | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips32r3 -mattr=eva | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips32r5 -mattr=eva | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r2 -mattr=eva | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r3 -mattr=eva | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r5 -mattr=eva | FileCheck %s
+# CHECK: .text
+0x7c 0xff 0x7f 0x9b # CHECK: cachee 31, 255($7)
+0x7c 0x80 0x80 0x1b # CHECK: cachee 0, -256($4)
+0x7c 0x85 0xba 0x1b # CHECK: cachee 5, -140($4)
+0x7f 0x2a 0x80 0x2c # CHECK: lbe $10, -256($25)
+0x7d 0xed 0x7f 0xac # CHECK: lbe $13, 255($15)
+0x7d 0xcb 0x49 0x2c # CHECK: lbe $11, 146($14)
+0x7c 0x6d 0x80 0x28 # CHECK: lbue $13, -256($3)
+0x7c 0x4d 0x7f 0xa8 # CHECK: lbue $13, 255($2)
+0x7c 0x6d 0xa1 0x28 # CHECK: lbue $13, -190($3)
+0x7e 0xad 0x80 0x2d # CHECK: lhe $13, -256($21)
+0x7e 0x0c 0x7f 0xad # CHECK: lhe $12, 255($16)
+0x7e 0x0d 0x28 0xad # CHECK: lhe $13, 81($16)
+0x7c 0x72 0x80 0x29 # CHECK: lhue $18, -256($3)
+0x7c 0x72 0x7f 0xa9 # CHECK: lhue $18, 255($3)
+0x7c 0x56 0xac 0x29 # CHECK: lhue $22, -168($2)
+0x7e 0xa2 0x80 0x2e # CHECK: lle $2, -256($21)
+0x7e 0x63 0x7f 0xae # CHECK: lle $3, 255($19)
+0x7e 0xc3 0xdc 0xae # CHECK: lle $3, -71($22)
+0x7d 0xf6 0x7f 0x99 # CHECK: lwle $22, 255($15)
+0x7d 0x57 0x80 0x19 # CHECK: lwle $23, -256($10)
+0x7d 0xb7 0xa8 0x19 # CHECK: lwle $23, -176($13)
+0x7f 0x80 0x7f 0x9a # CHECK: lwre $zero, 255($gp)
+0x7f 0x80 0x80 0x1a # CHECK: lwre $zero, -256($gp)
+0x7f 0x80 0xa8 0x1a # CHECK: lwre $zero, -176($gp)
+0x7c 0x4e 0x80 0x23 # CHECK: prefe 14, -256($2)
+0x7c 0x6b 0x7f 0xa3 # CHECK: prefe 11, 255($3)
+0x7c 0x6e 0xed 0xa3 # CHECK: prefe 14, -37($3)
+0x7d 0x71 0x7f 0x9c # CHECK: sbe $17, 255($11)
+0x7d 0x51 0x80 0x1c # CHECK: sbe $17, -256($10)
+0x7d 0xd3 0x00 0x1c # CHECK: sbe $19, 0($14)
+0x7e 0x49 0x7f 0x9e # CHECK: sce $9, 255($18)
+0x7e 0xac 0x80 0x1e # CHECK: sce $12, -256($21)
+0x7e 0xed 0xf0 0x9e # CHECK: sce $13, -31($23)
+0x7d 0xee 0x7f 0x9d # CHECK: she $14, 255($15)
+0x7d 0xee 0x80 0x1d # CHECK: she $14, -256($15)
+0x7d 0x69 0x75 0x9d # CHECK: she $9, 235($11)
+0x7f 0xbf 0x7f 0x9f # CHECK: swe $ra, 255($sp)
+0x7f 0xbf 0x80 0x1f # CHECK: swe $ra, -256($sp)
+0x7f 0xbf 0xe5 0x9f # CHECK: swe $ra, -53($sp)
+0x7e 0x29 0x7f 0xa1 # CHECK: swle $9, 255($17)
+0x7e 0x6a 0x80 0x21 # CHECK: swle $10, -256($19)
+0x7e 0xa8 0x41 0xa1 # CHECK: swle $8, 131($21)
+0x7d 0xb4 0x7f 0xa2 # CHECK: swre $20, 255($13)
+0x7d 0xb4 0x80 0x22 # CHECK: swre $20, -256($13)
+0x7d 0xd2 0x2b 0x22 # CHECK: swre $18, 86($14)
+0x42 0x00 0x00 0x03 # CHECK: tlbinv
+0x42 0x00 0x00 0x04 # CHECK: tlbinvf
diff --git a/test/MC/Disassembler/Mips/micromips-dsp/valid.txt b/test/MC/Disassembler/Mips/micromips-dsp/valid.txt
new file mode 100644
index 000000000000..fdd404c43492
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips-dsp/valid.txt
@@ -0,0 +1,103 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r6 -mattr=micromips -mattr=+dsp | FileCheck %s
+
+0x00 0x64 0x11 0x3c # CHECK: absq_s.ph $3, $4
+0x00 0x64 0x21 0x3c # CHECK: absq_s.w $3, $4
+0x00 0xa4 0x18 0xcd # CHECK: addu.qb $3, $4, $5
+0x00 0xa4 0x1c 0xcd # CHECK: addu_s.qb $3, $4, $5
+0x00 0xa4 0x1b 0x85 # CHECK: addsc $3, $4, $5
+0x00 0xa4 0x1b 0xc5 # CHECK: addwc $3, $4, $5
+0x00 0xa4 0x18 0x0d # CHECK: addq.ph $3, $4, $5
+0x00 0xa4 0x1c 0x0d # CHECK: addq_s.ph $3, $4, $5
+0x00 0xa4 0x1b 0x05 # CHECK: addq_s.w $3, $4, $5
+0x00 0x65 0x42 0xbc # CHECK: dpaq_s.w.ph $ac1, $5, $3
+0x00 0x64 0x92 0xbc # CHECK: dpaq_sa.l.w $ac2, $4, $3
+0x00 0x83 0x60 0xbc # CHECK: dpau.h.qbl $ac1, $3, $4
+0x02 0xb4 0xb0 0xbc # CHECK: dpau.h.qbr $ac2, $20, $21
+0x00 0x06 0x66 0x7c # CHECK: extp $zero, $ac1, 6
+0x00 0x42 0x76 0x7c # CHECK: extpdp $2, $ac1, 2
+0x00 0x88 0xb8 0xbc # CHECK: extpdpv $4, $ac2, $8
+0x01 0xe7 0xe8 0xbc # CHECK: extpv $15, $ac3, $7
+0x03 0x7f 0xce 0x7c # CHECK: extr.w $27, $ac3, 31
+0x01 0x98 0x1e 0x7c # CHECK: extr_r.w $12, $ac0, 24
+0x03 0x69 0xee 0x7c # CHECK: extr_rs.w $27, $ac3, 9
+0x00 0x61 0xbe 0x7c # CHECK: extr_s.h $3, $ac2, 1
+0x00 0xa6 0x0e 0xbc # CHECK: extrv.w $5, $ac0, $6
+0x01 0x43 0x1e 0xbc # CHECK: extrv_r.w $10, $ac0, $3
+0x01 0xf4 0x6e 0xbc # CHECK: extrv_rs.w $15, $ac1, $20
+0x01 0x10 0xbe 0xbc # CHECK: extrv_s.h $8, $ac2, $16
+0x00 0x64 0x41 0x3c # CHECK: insv $3, $4
+0x00 0xe6 0x4a 0xbc # CHECK: madd $ac1, $6, $7
+0x01 0x28 0x1a 0xbc # CHECK: maddu $ac0, $8, $9
+0x01 0x6a 0xea 0xbc # CHECK: msub $ac3, $10, $11
+0x01 0xac 0xba 0xbc # CHECK: msubu $ac2, $12, $13
+0x00 0x62 0xcc 0xbc # CHECK: mult $ac3, $2, $3
+0x00 0xa4 0x9c 0xbc # CHECK: multu $ac2, $4, $5
+0x00 0xa4 0x19 0xad # CHECK: packrl.ph $3, $4, $5
+0x00 0xa4 0x1a 0x2d # CHECK: pick.ph $3, $4, $5
+0x00 0xa4 0x19 0xed # CHECK: pick.qb $3, $4, $5
+0x00 0x22 0x51 0x3c # CHECK: preceq.w.phl $1, $2
+0x00 0x64 0x61 0x3c # CHECK: preceq.w.phr $3, $4
+0x00 0xa6 0x71 0x3c # CHECK: precequ.ph.qbl $5, $6
+0x00 0xe8 0x73 0x3c # CHECK: precequ.ph.qbla $7, $8
+0x01 0x2a 0x91 0x3c # CHECK: precequ.ph.qbr $9, $10
+0x01 0x6c 0x93 0x3c # CHECK: precequ.ph.qbra $11, $12
+0x01 0xae 0xb1 0x3c # CHECK: preceu.ph.qbl $13, $14
+0x01 0xf0 0xb3 0x3c # CHECK: preceu.ph.qbla $15, $16
+0x02 0x32 0xd1 0x3c # CHECK: preceu.ph.qbr $17, $18
+0x02 0x74 0xd3 0x3c # CHECK: preceu.ph.qbra $19, $20
+0x01 0x49 0x40 0xed # CHECK: precrq.ph.w $8, $9, $10
+0x01 0xac 0x58 0xad # CHECK: precrq.qb.ph $11, $12, $13
+0x02 0x0f 0x71 0x6d # CHECK: precrqu_s.qb.ph $14, $15, $16
+0x02 0x72 0x89 0x2d # CHECK: precrq_rs.ph.w $17, $18, $19
+0x00 0x03 0x40 0x1d # CHECK: shilo $ac1, 3
+0x00 0x05 0x52 0x7c # CHECK: shilov $ac1, $5
+0x00 0x64 0x53 0xb5 # CHECK: shll.ph $3, $4, 5
+0x00 0x64 0x5b 0xb5 # CHECK: shll_s.ph $3, $4, 5
+0x00 0x64 0xa8 0x7c # CHECK: shll.qb $3, $4, 5
+0x00 0x85 0x18 0x0e # CHECK: shllv.ph $3, $4, $5
+0x00 0x85 0x1c 0x0e # CHECK: shllv_s.ph $3, $4, $5
+0x00 0x85 0x1b 0x95 # CHECK: shllv.qb $3, $4, $5
+0x00 0x85 0x1b 0xd5 # CHECK: shllv_s.w $3, $4, $5
+0x00 0x64 0x2b 0xf5 # CHECK: shll_s.w $3, $4, 5
+0x00 0x64 0x53 0x35 # CHECK: shra.ph $3, $4, 5
+0x00 0x64 0x57 0x35 # CHECK: shra_r.ph $3, $4, 5
+0x00 0x85 0x19 0x8d # CHECK: shrav.ph $3, $4, $5
+0x00 0x85 0x1d 0x8d # CHECK: shrav_r.ph $3, $4, $5
+0x00 0x85 0x1a 0xd5 # CHECK: shrav_r.w $3, $4, $5
+0x00 0x64 0x2a 0xf5 # CHECK: shra_r.w $3, $4, 5
+0x00 0x64 0xb8 0x7c # CHECK: shrl.qb $3, $4, 5
+0x00 0x85 0x1b 0x55 # CHECK: shrlv.qb $3, $4, $5
+0x00 0xa4 0x1a 0x0d # CHECK: subq.ph $3, $4, $5
+0x00 0xa4 0x1e 0x0d # CHECK: subq_s.ph $3, $4, $5
+0x00 0xa4 0x1b 0x45 # CHECK: subq_s.w $3, $4, $5
+0x00 0xa4 0x1a 0xcd # CHECK: subu.qb $3, $4, $5
+0x00 0xa4 0x1e 0xcd # CHECK: subu_s.qb $3, $4, $5
+0x00 0xc4 0x46 0xbc # CHECK: dpsq_s.w.ph $ac1, $4, $6
+0x00 0xc4 0x56 0xbc # CHECK: dpsq_sa.l.w $ac1, $4, $6
+0x00 0xc4 0x64 0xbc # CHECK: dpsu.h.qbl $ac1, $4, $6
+0x00 0xc4 0x74 0xbc # CHECK: dpsu.h.qbr $ac1, $4, $6
+0x00 0x62 0x08 0x25 # CHECK: muleq_s.w.phl $1, $2, $3
+0x00 0x62 0x08 0x65 # CHECK: muleq_s.w.phr $1, $2, $3
+0x00 0x62 0x08 0x95 # CHECK: muleu_s.ph.qbl $1, $2, $3
+0x00 0x62 0x08 0xd5 # CHECK: muleu_s.ph.qbr $1, $2, $3
+0x00,0x62,0x09,0x15 # CHECK: mulq_rs.ph $1, $2, $3
+0x00 0x43 0x0a 0x25 # CHECK: lbux $1, $2($3)
+0x00 0x43 0x09 0x65 # CHECK: lhx $1, $2($3)
+0x00 0x43 0x09 0xa5 # CHECK: lwx $1, $2($3)
+0x00 0x62 0x5a 0x7c # CHECK: maq_s.w.phl $ac1, $2, $3
+0x00 0x62 0x7a 0x7c # CHECK: maq_sa.w.phl $ac1, $2, $3
+0x00 0x62 0x4a 0x7c # CHECK: maq_s.w.phr $ac1, $2, $3
+0x00 0x62 0x6a 0x7c # CHECK: maq_sa.w.phr $ac1, $2, $3
+0x00 0x02 0x40 0x7c # CHECK: mfhi $2, $ac1
+0x00 0x01 0x50 0x7c # CHECK: mflo $1, $ac1
+0x00 0x01 0x60 0x7c # CHECK: mthi $1, $ac1
+0x00 0x01 0x70 0x7c # CHECK: mtlo $1, $ac1
+0x00 0x22 0xf1 0x3c # CHECK: raddu.w.qb $1, $2
+0x00 0x20 0x86 0x7c # CHECK: rddsp $1, 2
+0x02 0x00 0x08 0x3d # CHECK: repl.ph $1, 512
+0x00 0x30 0x05 0xfc # CHECK: repl.qb $1, 128
+0x00 0x22 0x03 0x3c # CHECK: replv.ph $1, $2
+0x00 0x22 0x13 0x3c # CHECK: replv.qb $1, $2
+0x00 0x01 0x82 0x7c # CHECK: mthlip $1, $ac2
+0x00 0xa7 0xd6 0x7c # CHECK: wrdsp $5
+0x00 0xa0 0x96 0x7c # CHECK: wrdsp $5, 2
diff --git a/test/MC/Disassembler/Mips/micromips-dspr2/valid.txt b/test/MC/Disassembler/Mips/micromips-dspr2/valid.txt
new file mode 100644
index 000000000000..1d9fc9e80d73
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips-dspr2/valid.txt
@@ -0,0 +1,125 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r6 -mattr=micromips -mattr=+dspr2 | FileCheck %s
+
+0x00 0x64 0x11 0x3c # CHECK: absq_s.ph $3, $4
+0x00 0x64 0x01 0x3c # CHECK: absq_s.qb $3, $4
+0x00 0x64 0x21 0x3c # CHECK: absq_s.w $3, $4
+0x00 0xa4 0x19 0x0d # CHECK: addu.ph $3, $4, $5
+0x00 0xa4 0x18 0xcd # CHECK: addu.qb $3, $4, $5
+0x00 0xa4 0x1d 0x0d # CHECK: addu_s.ph $3, $4, $5
+0x00 0xa4 0x1c 0xcd # CHECK: addu_s.qb $3, $4, $5
+0x00 0xa4 0x19 0x4d # CHECK: adduh.qb $3, $4, $5
+0x00 0xa4 0x1d 0x4d # CHECK: adduh_r.qb $3, $4, $5
+0x00 0xa4 0x1b 0x85 # CHECK: addsc $3, $4, $5
+0x00 0xa4 0x1b 0xc5 # CHECK: addwc $3, $4, $5
+0x00 0xa4 0x18 0x0d # CHECK: addq.ph $3, $4, $5
+0x00 0xa4 0x1c 0x0d # CHECK: addq_s.ph $3, $4, $5
+0x00 0xa4 0x1b 0x05 # CHECK: addq_s.w $3, $4, $5
+0x00 0xa4 0x18 0x4d # CHECK: addqh.ph $3, $4, $5
+0x00 0xa4 0x18 0x8d # CHECK: addqh.w $3, $4, $5
+0x00 0xa4 0x1c 0x4d # CHECK: addqh_r.ph $3, $4, $5
+0x00 0xa4 0x1c 0x8d # CHECK: addqh_r.w $3, $4, $5
+0x00 0x43 0x00 0xbc # CHECK: dpa.w.ph $ac0, $3, $2
+0x00 0x65 0x42 0xbc # CHECK: dpaq_s.w.ph $ac1, $5, $3
+0x00 0x64 0x92 0xbc # CHECK: dpaq_sa.l.w $ac2, $4, $3
+0x00 0xec 0xe2 0xbc # CHECK: dpaqx_s.w.ph $ac3, $12, $7
+0x00 0xc5 0x32 0xbc # CHECK: dpaqx_sa.w.ph $ac0, $5, $6
+0x00 0x83 0x60 0xbc # CHECK: dpau.h.qbl $ac1, $3, $4
+0x02 0xb4 0xb0 0xbc # CHECK: dpau.h.qbr $ac2, $20, $21
+0x00 0x22 0xd0 0xbc # CHECK: dpax.w.ph $ac3, $2, $1
+0x00 0x06 0x66 0x7c # CHECK: extp $zero, $ac1, 6
+0x00 0x42 0x76 0x7c # CHECK: extpdp $2, $ac1, 2
+0x00 0x88 0xb8 0xbc # CHECK: extpdpv $4, $ac2, $8
+0x01 0xe7 0xe8 0xbc # CHECK: extpv $15, $ac3, $7
+0x03 0x7f 0xce 0x7c # CHECK: extr.w $27, $ac3, 31
+0x01 0x98 0x1e 0x7c # CHECK: extr_r.w $12, $ac0, 24
+0x03 0x69 0xee 0x7c # CHECK: extr_rs.w $27, $ac3, 9
+0x00 0x61 0xbe 0x7c # CHECK: extr_s.h $3, $ac2, 1
+0x00 0xa6 0x0e 0xbc # CHECK: extrv.w $5, $ac0, $6
+0x01 0x43 0x1e 0xbc # CHECK: extrv_r.w $10, $ac0, $3
+0x01 0xf4 0x6e 0xbc # CHECK: extrv_rs.w $15, $ac1, $20
+0x01 0x10 0xbe 0xbc # CHECK: extrv_s.h $8, $ac2, $16
+0x00 0x64 0x41 0x3c # CHECK: insv $3, $4
+0x00 0xe6 0x4a 0xbc # CHECK: madd $ac1, $6, $7
+0x01 0x28 0x1a 0xbc # CHECK: maddu $ac0, $8, $9
+0x01 0x6a 0xea 0xbc # CHECK: msub $ac3, $10, $11
+0x01 0xac 0xba 0xbc # CHECK: msubu $ac2, $12, $13
+0x00 0x62 0xcc 0xbc # CHECK: mult $ac3, $2, $3
+0x00 0xa4 0x9c 0xbc # CHECK: multu $ac2, $4, $5
+0x00 0xa4 0x19 0xad # CHECK: packrl.ph $3, $4, $5
+0x00 0xa4 0x1a 0x2d # CHECK: pick.ph $3, $4, $5
+0x00 0xa4 0x19 0xed # CHECK: pick.qb $3, $4, $5
+0x00 0x22 0x51 0x3c # CHECK: preceq.w.phl $1, $2
+0x00 0x64 0x61 0x3c # CHECK: preceq.w.phr $3, $4
+0x00 0xa6 0x71 0x3c # CHECK: precequ.ph.qbl $5, $6
+0x00 0xe8 0x73 0x3c # CHECK: precequ.ph.qbla $7, $8
+0x01 0x2a 0x91 0x3c # CHECK: precequ.ph.qbr $9, $10
+0x01 0x6c 0x93 0x3c # CHECK: precequ.ph.qbra $11, $12
+0x01 0xae 0xb1 0x3c # CHECK: preceu.ph.qbl $13, $14
+0x01 0xf0 0xb3 0x3c # CHECK: preceu.ph.qbla $15, $16
+0x02 0x32 0xd1 0x3c # CHECK: preceu.ph.qbr $17, $18
+0x02 0x74 0xd3 0x3c # CHECK: preceu.ph.qbra $19, $20
+0x00 0x62 0x08 0x6d # CHECK: precr.qb.ph $1, $2, $3
+0x00 0x85 0x0b 0xcd # CHECK: precr_sra.ph.w $4, $5, 1
+0x00 0xc7 0x17 0xcd # CHECK: precr_sra_r.ph.w $6, $7, 2
+0x01 0x49 0x40 0xed # CHECK: precrq.ph.w $8, $9, $10
+0x01 0xac 0x58 0xad # CHECK: precrq.qb.ph $11, $12, $13
+0x02 0x0f 0x71 0x6d # CHECK: precrqu_s.qb.ph $14, $15, $16
+0x02 0x72 0x89 0x2d # CHECK: precrq_rs.ph.w $17, $18, $19
+0x00 0x03 0x40 0x1d # CHECK: shilo $ac1, 3
+0x00 0x05 0x52 0x7c # CHECK: shilov $ac1, $5
+0x00 0x64 0x53 0xb5 # CHECK: shll.ph $3, $4, 5
+0x00 0x64 0x5b 0xb5 # CHECK: shll_s.ph $3, $4, 5
+0x00 0x64 0xa8 0x7c # CHECK: shll.qb $3, $4, 5
+0x00 0x85 0x18 0x0e # CHECK: shllv.ph $3, $4, $5
+0x00 0x85 0x1c 0x0e # CHECK: shllv_s.ph $3, $4, $5
+0x00 0x85 0x1b 0x95 # CHECK: shllv.qb $3, $4, $5
+0x00 0x85 0x1b 0xd5 # CHECK: shllv_s.w $3, $4, $5
+0x00 0x64 0x2b 0xf5 # CHECK: shll_s.w $3, $4, 5
+0x00 0x64 0x53 0x35 # CHECK: shra.ph $3, $4, 5
+0x00 0x64 0xa1 0xfc # CHECK: shra.qb $3, $4, 5
+0x00 0x64 0x57 0x35 # CHECK: shra_r.ph $3, $4, 5
+0x00 0x64 0xb1 0xfc # CHECK: shra_r.qb $3, $4, 5
+0x00 0x85 0x19 0x8d # CHECK: shrav.ph $3, $4, $5
+0x00 0x85 0x19 0xcd # CHECK: shrav.qb $3, $4, $5
+0x00 0x85 0x1d 0x8d # CHECK: shrav_r.ph $3, $4, $5
+0x00 0x85 0x1d 0xcd # CHECK: shrav_r.qb $3, $4, $5
+0x00 0x85 0x1a 0xd5 # CHECK: shrav_r.w $3, $4, $5
+0x00 0x64 0x2a 0xf5 # CHECK: shra_r.w $3, $4, 5
+0x00 0x64 0x53 0xfc # CHECK: shrl.ph $3, $4, 5
+0x00 0x64 0xb8 0x7c # CHECK: shrl.qb $3, $4, 5
+0x00 0x85 0x1b 0x15 # CHECK: shrlv.ph $3, $4, $5
+0x00 0x85 0x1b 0x55 # CHECK: shrlv.qb $3, $4, $5
+0x00 0xa4 0x1a 0x0d # CHECK: subq.ph $3, $4, $5
+0x00 0xa4 0x1e 0x0d # CHECK: subq_s.ph $3, $4, $5
+0x00 0xa4 0x1b 0x45 # CHECK: subq_s.w $3, $4, $5
+0x00 0xa4 0x1a 0x4d # CHECK: subqh.ph $3, $4, $5
+0x00 0xa4 0x1e 0x4d # CHECK: subqh_r.ph $3, $4, $5
+0x00 0xa4 0x1a 0x8d # CHECK: subqh.w $3, $4, $5
+0x00 0xa4 0x1e 0x8d # CHECK: subqh_r.w $3, $4, $5
+0x00 0xa4 0x1b 0x0d # CHECK: subu.ph $3, $4, $5
+0x00 0xa4 0x1f 0x0d # CHECK: subu_s.ph $3, $4, $5
+0x00 0xa4 0x1a 0xcd # CHECK: subu.qb $3, $4, $5
+0x00 0xa4 0x1e 0xcd # CHECK: subu_s.qb $3, $4, $5
+0x00 0xa4 0x1b 0x4d # CHECK: subuh.qb $3, $4, $5
+0x00 0xa4 0x1f 0x4d # CHECK: subuh_r.qb $3, $4, $5
+0x00 0xc4 0x46 0xbc # CHECK: dpsq_s.w.ph $ac1, $4, $6
+0x00 0xc4 0x56 0xbc # CHECK: dpsq_sa.l.w $ac1, $4, $6
+0x00 0xc4 0x64 0xbc # CHECK: dpsu.h.qbl $ac1, $4, $6
+0x00 0xc4 0x74 0xbc # CHECK: dpsu.h.qbr $ac1, $4, $6
+0x00 0xc4 0x44 0xbc # CHECK: dps.w.ph $ac1, $4, $6
+0x00 0xc4 0x66 0xbc # CHECK: dpsqx_s.w.ph $ac1, $4, $6
+0x00 0xc4 0x76 0xbc # CHECK: dpsqx_sa.w.ph $ac1, $4, $6
+0x00 0xc4 0x54 0xbc # CHECK: dpsx.w.ph $ac1, $4, $6
+0x00 0x62 0x08 0x2d # CHECK: mul.ph $1, $2, $3
+0x00 0x62 0x0c 0x2d # CHECK: mul_s.ph $1, $2, $3
+0x00 0x62 0x09 0x95 # CHECK: mulq_rs.w $1, $2, $3
+0x00 0x62 0x09 0x55 # CHECK: mulq_s.ph $1, $2, $3
+0x00 0x62 0x09 0xd5 # CHECK: mulq_s.w $1, $2, $3
+0x00 0x62 0x08 0x25 # CHECK: muleq_s.w.phl $1, $2, $3
+0x00 0x62 0x08 0x65 # CHECK: muleq_s.w.phr $1, $2, $3
+0x00 0x62 0x08 0x95 # CHECK: muleu_s.ph.qbl $1, $2, $3
+0x00 0x62 0x08 0xd5 # CHECK: muleu_s.ph.qbr $1, $2, $3
+0x00,0x62,0x09,0x15 # CHECK: mulq_rs.ph $1, $2, $3
+0x00 0x22 0x1a 0x55 # CHECK: prepend $1, $2, 3
+0x00 0xa7 0xd6 0x7c # CHECK: wrdsp $5
+0x00 0xa0 0x96 0x7c # CHECK: wrdsp $5, 2
diff --git a/test/MC/Disassembler/Mips/micromips.txt b/test/MC/Disassembler/Mips/micromips.txt
deleted file mode 100644
index 5809ac28113c..000000000000
--- a/test/MC/Disassembler/Mips/micromips.txt
+++ /dev/null
@@ -1,342 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mattr=micromips \
-# RUN: | FileCheck %s
-
-0x00 0xe6 0x49 0x10 # CHECK: add $9, $6, $7
-
-0x11 0x26 0x45 0x67 # CHECK: addi $9, $6, 17767
-
-0x31 0x26 0xc5 0x67 # CHECK: addiu $9, $6, -15001
-
-0x11 0x26 0x45 0x67 # CHECK: addi $9, $6, 17767
-
-0x31 0x26 0xc5 0x67 # CHECK: addiu $9, $6, -15001
-
-0x4f 0xf9 # CHECK: addiusp -16
-
-0x4f 0xff # CHECK: addiusp -1028
-
-0x4f 0xfd # CHECK: addiusp -1032
-
-0x4c 0x01 # CHECK: addiusp 1024
-
-0x4c 0x03 # CHECK: addiusp 1028
-
-0x00 0xe6 0x49 0x50 # CHECK: addu $9, $6, $7
-
-0x00 0xe6 0x49 0x90 # CHECK: sub $9, $6, $7
-
-0x00 0xa3 0x21 0xd0 # CHECK: subu $4, $3, $5
-
-0x00 0xe0 0x31 0x90 # CHECK: sub $6, $zero, $7
-
-0x00 0xe0 0x31 0xd0 # CHECK: subu $6, $zero, $7
-
-0x00 0x08 0x39 0x50 # CHECK: addu $7, $8, $zero
-
-0x00 0xa3 0x1b 0x50 # CHECK: slt $3, $3, $5
-
-0x90 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
-
-0x90 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
-
-0xb0 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
-
-0x00 0xa3 0x1b 0x90 # CHECK: sltu $3, $3, $5
-
-0x41 0xa9 0x45 0x67 # CHECK: lui $9, 17767
-
-0x00 0xe6 0x4a 0x50 # CHECK: and $9, $6, $7
-
-0xd1 0x26 0x45 0x67 # CHECK: andi $9, $6, 17767
-
-0xd1 0x26 0x45 0x67 # CHECK: andi $9, $6, 17767
-
-0x2c 0x29 # CHECK: andi16 $16, $2, 31
-
-0x00 0xa4 0x1a 0x90 # CHECK: or $3, $4, $5
-
-0x51 0x26 0x45 0x67 # CHECK: ori $9, $6, 17767
-
-0x00 0xa3 0x1b 0x10 # CHECK: xor $3, $3, $5
-
-0x71 0x26 0x45 0x67 # CHECK: xori $9, $6, 17767
-
-0x71 0x26 0x45 0x67 # CHECK: xori $9, $6, 17767
-
-0x00 0xe6 0x4a 0xd0 # CHECK: nor $9, $6, $7
-
-0x00 0x08 0x3a 0xd0 # CHECK: not $7, $8
-
-0x00 0xe6 0x4a 0x10 # CHECK: mul $9, $6, $7
-
-0x00 0xe9 0x8b 0x3c # CHECK: mult $9, $7
-
-0x00 0xe9 0x9b 0x3c # CHECK: multu $9, $7
-
-0x00 0xe9 0xab 0x3c # CHECK-EB: div $zero, $9, $7
-
-0x00 0xe9 0xbb 0x3c # CHECK-EB: divu $zero, $9, $7
-
-0x00 0x83 0x38 0x00 # CHECK: sll $4, $3, 7
-
-0x00 0x65 0x10 0x10 # CHECK: sllv $2, $3, $5
-
-0x00 0x83 0x38 0x80 # CHECK: sra $4, $3, 7
-
-0x00 0x65 0x10 0x90 # CHECK: srav $2, $3, $5
-
-0x00 0x83 0x38 0x40 # CHECK: srl $4, $3, 7
-
-0x00 0x65 0x10 0x50 # CHECK: srlv $2, $3, $5
-
-0x01 0x26 0x38 0xc0 # CHECK: rotr $9, $6, 7
-
-0x00 0xc7 0x48 0xd0 # CHECK: rotrv $9, $6, $7
-
-0x1c 0xa4 0x00 0x08 # CHECK: lb $5, 8($4)
-
-0x14 0xc4 0x00 0x08 # CHECK: lbu $6, 8($4)
-
-0x3c 0x44 0x00 0x08 # CHECK: lh $2, 8($4)
-
-0x34 0x82 0x00 0x08 # CHECK: lhu $4, 8($2)
-
-0xfc 0xc5 0x00 0x04 # CHECK: lw $6, 4($5)
-
-0xfc 0xdd 0x00 0x7b # CHECK: lw $6, 123($sp)
-
-0x18 0xa4 0x00 0x08 # CHECK: sb $5, 8($4)
-
-0x38 0x44 0x00 0x08 # CHECK: sh $2, 8($4)
-
-0xf8 0xa6 0x00 0x04 # CHECK: sw $5, 4($6)
-
-0xf8 0xbd 0x00 0x7b # CHECK: sw $5, 123($sp)
-
-0x60 0x44 0xe0 0x08 # CHECK: lwu $2, 8($4)
-
-0x60 0x85 0x00 0x10 # CHECK: lwl $4, 16($5)
-
-0x60 0x85 0x10 0x10 # CHECK: lwr $4, 16($5)
-
-0x60 0x85 0x80 0x10 # CHECK: swl $4, 16($5)
-
-0x60 0x85 0x90 0x10 # CHECK: swr $4, 16($5)
-
-0x00 0xe6 0x48 0x58 # CHECK: movz $9, $6, $7
-
-0x00 0xe6 0x48 0x18 # CHECK: movn $9, $6, $7
-
-0x55 0x26 0x09 0x7b # CHECK: movt $9, $6, $fcc0
-
-0x55 0x26 0x01 0x7b # CHECK: movf $9, $6, $fcc0
-
-0x00 0x06 0x2d 0x7c # CHECK: mthi $6
-
-0x00 0x06 0x0d 0x7c # CHECK: mfhi $6
-
-0x00 0x06 0x3d 0x7c # CHECK: mtlo $6
-
-0x00 0x06 0x1d 0x7c # CHECK: mflo $6
-
-0x00 0xa4 0xcb 0x3c # CHECK: madd $4, $5
-
-0x00 0xa4 0xdb 0x3c # CHECK: maddu $4, $5
-
-0x00 0xa4 0xeb 0x3c # CHECK: msub $4, $5
-
-0x00 0xa4 0xfb 0x3c # CHECK: msubu $4, $5
-
-0x01 0x26 0x5b 0x3c # CHECK: clz $9, $6
-
-0x01 0x26 0x4b 0x3c # CHECK: clo $9, $6
-
-0x01 0x26 0x2b 0x3c # CHECK: seb $9, $6
-
-0x01 0x26 0x3b 0x3c # CHECK: seh $9, $6
-
-0x01 0x26 0x7b 0x3c # CHECK: wsbh $9, $6
-
-0x01 0x26 0x30 0xec # CHECK: ext $9, $6, 3, 7
-
-0x01 0x26 0x48 0xcc # CHECK: ins $9, $6, 3, 7
-
-0xd4 0x00 0x02 0x98 # CHECK: j 1328
-
-0xf4 0x00 0x02 0x98 # CHECK: jal 1328
-
-0x03 0xe6 0x0f 0x3c # CHECK: jalr $ra, $6
-
-0x00 0x07 0x0f 0x3c # CHECK: jr $7
-
-0x47 0x05 # CHECK: jraddiusp 20
-
-0x94 0xc9 0x02 0x9a # CHECK: beq $9, $6, 1332
-
-0x40 0x46 0x02 0x9a # CHECK: bgez $6, 1332
-
-0x40 0x66 0x02 0x9a # CHECK: bgezal $6, 1332
-
-0x40 0x26 0x02 0x9a # CHECK: bltzal $6, 1332
-
-0x40 0xc6 0x02 0x9a # CHECK: bgtz $6, 1332
-
-0x40 0x86 0x02 0x9a # CHECK: blez $6, 1332
-
-0xb4 0xc9 0x02 0x9a # CHECK: bne $9, $6, 1332
-
-0x40 0x06 0x02 0x9a # CHECK: bltz $6, 1332
-
-0x01 0x28 0x00 0x3c # CHECK: teq $8, $9, 0
-
-0x01 0x28 0x02 0x3c # CHECK: tge $8, $9, 0
-
-0x01 0x28 0x04 0x3c # CHECK: tgeu $8, $9, 0
-
-0x01 0x28 0x08 0x3c # CHECK: tlt $8, $9, 0
-
-0x01 0x28 0x0a 0x3c # CHECK: tltu $8, $9, 0
-
-0x01 0x28 0x0c 0x3c # CHECK: tne $8, $9, 0
-
-0x41,0xc9,0x45,0x67 # CHECK: teqi $9, 17767
-
-0x41 0x29 0x45 0x67 # CHECK: tgei $9, 17767
-
-0x41 0x69 0x45 0x67 # CHECK: tgeiu $9, 17767
-
-0x41 0x09 0x45 0x67 # CHECK: tlti $9, 17767
-
-0x41 0x49 0x45 0x67 # CHECK: tltiu $9, 17767
-
-0x41 0x89 0x45 0x67 # CHECK: tnei $9, 17767
-
-0x20 0x25 0x60 0x08 # CHECK: cache 1, 8($5)
-
-0x60 0x25 0x20 0x08 # CHECK: pref 1, 8($5)
-
-0x00 0x00 0x08 0x00 # CHECK: ssnop
-
-0x00 0x00 0x18 0x00 # CHECK: ehb
-
-0x00 0x00 0x28 0x00 # CHECK: pause
-
-0x60 0x44 0x30 0x08 # CHECK: ll $2, 8($4)
-
-0x60 0x44 0xb0 0x08 # CHECK: sc $2, 8($4)
-
-0x00 0x64 0x11 0x18 # CHECK: lwxs $2, $3($4)
-
-0x42 0x66 0x02 0x9a # CHECK: bgezals $6, 1332
-
-0x42 0x26 0x02 0x9a # CHECK: bltzals $6, 1332
-
-0x40 0xe9 0x02 0x9a # CHECK: beqzc $9, 1332
-
-0x40 0xa9 0x02 0x9a # CHECK: bnezc $9, 1332
-
-0x74 0x00 0x02 0x98 # CHECK: jals 1328
-
-0x03 0xe6 0x4f 0x3c # CHECK: jalrs $ra, $6
-
-0x20 0x44 0x50 0x08 # CHECK: lwm32 $16, $17, 8($4)
-
-0x20 0x44 0xd0 0x08 # CHECK: swm32 $16, $17, 8($4)
-
-0x22 0x04 0x90 0x08 # CHECK: swp $16, 8($4)
-
-0x22 0x04 0x10 0x08 # CHECK: lwp $16, 8($4)
-
-0x00 0x00 0x00 0x00 # CHECK: nop
-
-0x79 0x00 0x00 0x05 # CHECK: addiupc $2, 20
-
-0x7b 0xbf 0xff 0xff # CHECK: addiupc $7, 16777212
-
-0x7b 0xc0 0x00 0x00 # CHECK: addiupc $7, -16777216
-
-0x07 0x42 # CHECK: addu16 $6, $17, $4
-
-0x06 0xb1 # CHECK: subu16 $5, $16, $3
-
-0x44 0x82 # CHECK: and16 $16, $2
-
-0x44 0x0b # CHECK: not16 $17, $3
-
-0x44 0xc4 # CHECK: or16 $16, $4
-
-0x44 0x4d # CHECK: xor16 $17, $5
-
-0x25 0x8a # CHECK: sll16 $3, $16, 5
-
-0x26 0x1d # CHECK: srl16 $4, $17, 6
-
-0x09 0x94 # CHECK: lbu16 $3, 4($17)
-
-0x09 0x8f # CHECK: lbu16 $3, -1($16)
-
-0x29 0x82 # CHECK: lhu16 $3, 4($16)
-
-0x6a 0x12 # CHECK: lw16 $4, 8($17)
-
-0x89 0x84 # CHECK: sb16 $3, 4($16)
-
-0xaa 0x14 # CHECK: sh16 $4, 8($17)
-
-0xea 0x11 # CHECK: sw16 $4, 4($17)
-
-0xe8 0x11 # CHECK: sw16 $zero, 4($17)
-
-0x46 0x09 # CHECK: mfhi $9
-
-0x46 0x49 # CHECK: mflo $9
-
-0x0f 0x21 # CHECK: move $25, $1
-
-0x45 0xa9 # CHECK: jrc $9
-
-0x45 0xc9 # CHECK: jalr $9
-
-0x45 0xe9 # CHECK: jalrs16 $9
-
-0x45 0x89 # CHECK: jr16 $9
-
-0xed 0xff # CHECK: li16 $3, -1
-
-0xed 0xfe # CHECK: li16 $3, 126
-
-0x6f 0x83 # CHECK: addiur1sp $7, 4
-
-0x6f 0x7e # CHECK: addiur2 $6, $7, -1
-
-0x6f 0x76 # CHECK: addiur2 $6, $7, 12
-
-0x4c 0xfc # CHECK: addius5 $7, -2
-
-0x0c 0x00 # CHECK: nop
-
-0x48 0x68 # CHECK: lw $3, 32($sp)
-
-0xc8 0x9f # CHECK: sw $4, 124($sp)
-
-0x8f 0x0a # CHECK: beqz16 $6, 20
-
-0xaf 0x0a # CHECK: bnez16 $6, 20
-
-0xcc 0x42 # CHECK: b16 132
-
-0x65 0x88 # CHECK: lw $3, 32($gp)
-
-0x45 0x12 # CHECK: lwm16 $16, $17, $ra, 8($sp)
-
-0x45 0x52 # CHECK: swm16 $16, $17, $ra, 8($sp)
-
-0x46 0x88 # CHECK: break16 8
-
-0x46 0xce # CHECK: sdbbp16 14
-
-0x84 0x34 # CHECK: movep $5, $6, $2, $3
-
-0x00 0x00 0x57 0x7c # CHECK: ei
-
-0x00 0x0a 0x57 0x7c # CHECK: ei $10
diff --git a/test/MC/Disassembler/Mips/micromips32r3/invalid.txt b/test/MC/Disassembler/Mips/micromips32r3/invalid.txt
new file mode 100644
index 000000000000..fcaa6169e669
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips32r3/invalid.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mattr=micromips \
+# RUN: 2>&1 | FileCheck %s
+
+0x21 0xe2 0x5c 0x71 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/micromips32r3/valid-el.txt b/test/MC/Disassembler/Mips/micromips32r3/valid-el.txt
new file mode 100644
index 000000000000..d6c7de4e3a53
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips32r3/valid-el.txt
@@ -0,0 +1,191 @@
+# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mattr=micromips \
+# RUN: | FileCheck %s
+
+0xf9 0x4f # CHECK: addiusp -16
+0xff 0x4f # CHECK: addiusp -1028
+0xfd 0x4f # CHECK: addiusp -1032
+0x01 0x4c # CHECK: addiusp 1024
+0x03 0x4c # CHECK: addiusp 1028
+0x29 0x2c # CHECK: andi16 $16, $2, 31
+0x05 0x47 # CHECK: jraddiusp 20
+0x42 0x07 # CHECK: addu16 $6, $17, $4
+0xb1 0x06 # CHECK: subu16 $5, $16, $3
+0x82 0x44 # CHECK: and16 $16, $2
+0x0b 0x44 # CHECK: not16 $17, $3
+0xc4 0x44 # CHECK: or16 $16, $4
+0x4d 0x44 # CHECK: xor16 $17, $5
+0x8a 0x25 # CHECK: sll16 $3, $16, 5
+0x1d 0x26 # CHECK: srl16 $4, $17, 6
+0x94 0x09 # CHECK: lbu16 $3, 4($17)
+0x8f 0x09 # CHECK: lbu16 $3, -1($16)
+0x82 0x29 # CHECK: lhu16 $3, 4($16)
+0x12 0x6a # CHECK: lw16 $4, 8($17)
+0x84 0x89 # CHECK: sb16 $3, 4($16)
+0x14 0xaa # CHECK: sh16 $4, 8($17)
+0x11 0xea # CHECK: sw16 $4, 4($17)
+0x11 0xe8 # CHECK: sw16 $zero, 4($17)
+0x09 0x46 # CHECK: mfhi $9
+0x49 0x46 # CHECK: mflo $9
+0x21 0x0f # CHECK: move $25, $1
+0xa9 0x45 # CHECK: jrc $9
+0xc9 0x45 # CHECK: jalr $9
+0xe9 0x45 # CHECK: jalrs16 $9
+0x89 0x45 # CHECK: jr16 $9
+0xff 0xed # CHECK: li16 $3, -1
+0xfe 0xed # CHECK: li16 $3, 126
+0x83 0x6f # CHECK: addiur1sp $7, 4
+0x7e 0x6f # CHECK: addiur2 $6, $7, -1
+0x76 0x6f # CHECK: addiur2 $6, $7, 12
+0xfc 0x4c # CHECK: addius5 $7, -2
+0x00 0x0c # CHECK: nop
+0x68 0x48 # CHECK: lw $3, 32($sp)
+0x9f 0xc8 # CHECK: sw $4, 124($sp)
+0x0a 0x8f # CHECK: beqz16 $6, 20
+0x0a 0xaf # CHECK: bnez16 $6, 20
+0x42 0xcc # CHECK: b16 132
+0x88 0x65 # CHECK: lw $3, 32($gp)
+0x12 0x45 # CHECK: lwm16 $16, $17, $ra, 8($sp)
+0x52 0x45 # CHECK: swm16 $16, $17, $ra, 8($sp)
+0x88 0x46 # CHECK: break16 8
+0xce 0x46 # CHECK: sdbbp16 14
+0x34 0x84 # CHECK: movep $5, $6, $2, $3
+0xe6 0x00 0x10 0x49 # CHECK: add $9, $6, $7
+0x26 0x11 0x67 0x45 # CHECK: addi $9, $6, 17767
+0x26 0x31 0x67 0xc5 # CHECK: addiu $9, $6, -15001
+0x26 0x11 0x67 0x45 # CHECK: addi $9, $6, 17767
+0x26 0x31 0x67 0xc5 # CHECK: addiu $9, $6, -15001
+0xe6 0x00 0x50 0x49 # CHECK: addu $9, $6, $7
+0xe6 0x00 0x90 0x49 # CHECK: sub $9, $6, $7
+0xa3 0x00 0xd0 0x21 # CHECK: subu $4, $3, $5
+0xe0 0x00 0x90 0x31 # CHECK: sub $6, $zero, $7
+0xe0 0x00 0xd0 0x31 # CHECK: subu $6, $zero, $7
+0x08 0x00 0x50 0x39 # CHECK: addu $7, $8, $zero
+0xa3 0x00 0x50 0x1b # CHECK: slt $3, $3, $5
+0x63 0x90 0x67 0x00 # CHECK: slti $3, $3, 103
+0x63 0x90 0x67 0x00 # CHECK: slti $3, $3, 103
+0x63 0xb0 0x67 0x00 # CHECK: sltiu $3, $3, 103
+0xa3 0x00 0x90 0x1b # CHECK: sltu $3, $3, $5
+0xa9 0x41 0x67 0x45 # CHECK: lui $9, 17767
+0xe6 0x00 0x50 0x4a # CHECK: and $9, $6, $7
+0x26 0xd1 0x67 0x45 # CHECK: andi $9, $6, 17767
+0x26 0xd1 0x67 0x45 # CHECK: andi $9, $6, 17767
+0xa4 0x00 0x90 0x1a # CHECK: or $3, $4, $5
+0x26 0x51 0x67 0x45 # CHECK: ori $9, $6, 17767
+0xa3 0x00 0x10 0x1b # CHECK: xor $3, $3, $5
+0x26 0x71 0x67 0x45 # CHECK: xori $9, $6, 17767
+0x26 0x71 0x67 0x45 # CHECK: xori $9, $6, 17767
+0xe6 0x00 0xd0 0x4a # CHECK: nor $9, $6, $7
+0x08 0x00 0xd0 0x3a # CHECK: not $7, $8
+0xe6 0x00 0x10 0x4a # CHECK: mul $9, $6, $7
+0xe9 0x00 0x3c 0x8b # CHECK: mult $9, $7
+0xe9 0x00 0x3c 0x9b # CHECK: multu $9, $7
+0xe9 0x00 0x3c 0xab # CHECK: div $zero, $9, $7
+0xe9 0x00 0x3c 0xbb # CHECK: divu $zero, $9, $7
+0x83 0x00 0x00 0x38 # CHECK: sll $4, $3, 7
+0x65 0x00 0x10 0x10 # CHECK: sllv $2, $3, $5
+0x83 0x00 0x80 0x38 # CHECK: sra $4, $3, 7
+0x65 0x00 0x90 0x10 # CHECK: srav $2, $3, $5
+0x83 0x00 0x40 0x38 # CHECK: srl $4, $3, 7
+0x65 0x00 0x50 0x10 # CHECK: srlv $2, $3, $5
+0x26 0x01 0xc0 0x38 # CHECK: rotr $9, $6, 7
+0xc7 0x00 0xd0 0x48 # CHECK: rotrv $9, $6, $7
+0xa4 0x1c 0x08 0x00 # CHECK: lb $5, 8($4)
+0xc4 0x14 0x08 0x00 # CHECK: lbu $6, 8($4)
+0x44 0x3c 0x08 0x00 # CHECK: lh $2, 8($4)
+0x82 0x34 0x08 0x00 # CHECK: lhu $4, 8($2)
+0xc5 0xfc 0x04 0x00 # CHECK: lw $6, 4($5)
+0xdd 0xfc 0x7b 0x00 # CHECK: lw $6, 123($sp)
+0xa4 0x18 0x08 0x00 # CHECK: sb $5, 8($4)
+0x44 0x38 0x08 0x00 # CHECK: sh $2, 8($4)
+0xa6 0xf8 0x04 0x00 # CHECK: sw $5, 4($6)
+0xbd 0xf8 0x7b 0x00 # CHECK: sw $5, 123($sp)
+0x44 0x60 0x08 0xe0 # CHECK: lwu $2, 8($4)
+0x85 0x60 0x10 0x00 # CHECK: lwl $4, 16($5)
+0x85 0x60 0x10 0x10 # CHECK: lwr $4, 16($5)
+0x85 0x60 0x10 0x80 # CHECK: swl $4, 16($5)
+0x85 0x60 0x10 0x90 # CHECK: swr $4, 16($5)
+0xe6 0x00 0x58 0x48 # CHECK: movz $9, $6, $7
+0xe6 0x00 0x18 0x48 # CHECK: movn $9, $6, $7
+0x26 0x55 0x7b 0x09 # CHECK: movt $9, $6, $fcc0
+0x26 0x55 0x7b 0x01 # CHECK: movf $9, $6, $fcc0
+0x06 0x00 0x7c 0x2d # CHECK: mthi $6
+0x06 0x00 0x7c 0x0d # CHECK: mfhi $6
+0x06 0x00 0x7c 0x3d # CHECK: mtlo $6
+0x06 0x00 0x7c 0x1d # CHECK: mflo $6
+0xa4 0x00 0x3c 0xcb # CHECK: madd $4, $5
+0xa4 0x00 0x3c 0xdb # CHECK: maddu $4, $5
+0xa4 0x00 0x3c 0xeb # CHECK: msub $4, $5
+0xa4 0x00 0x3c 0xfb # CHECK: msubu $4, $5
+0x26 0x01 0x3c 0x5b # CHECK: clz $9, $6
+0x26 0x01 0x3c 0x4b # CHECK: clo $9, $6
+0x26 0x01 0x3c 0x2b # CHECK: seb $9, $6
+0x26 0x01 0x3c 0x3b # CHECK: seh $9, $6
+0x26 0x01 0x3c 0x7b # CHECK: wsbh $9, $6
+0x26 0x01 0xec 0x30 # CHECK: ext $9, $6, 3, 7
+0x26 0x01 0xcc 0x48 # CHECK: ins $9, $6, 3, 7
+0x00 0xd4 0x98 0x02 # CHECK: j 1328
+0x00 0xf4 0x98 0x02 # CHECK: jal 1328
+0xe6 0x03 0x3c 0x0f # CHECK: jalr $ra, $6
+0x07 0x00 0x3c 0x0f # CHECK: jr $7
+0xc9 0x94 0x9a 0x02 # CHECK: beq $9, $6, 1332
+0x46 0x40 0x9a 0x02 # CHECK: bgez $6, 1332
+0x66 0x40 0x9a 0x02 # CHECK: bgezal $6, 1332
+0x26 0x40 0x9a 0x02 # CHECK: bltzal $6, 1332
+0xc6 0x40 0x9a 0x02 # CHECK: bgtz $6, 1332
+0x86 0x40 0x9a 0x02 # CHECK: blez $6, 1332
+0xc9 0xb4 0x9a 0x02 # CHECK: bne $9, $6, 1332
+0x06 0x40 0x9a 0x02 # CHECK: bltz $6, 1332
+0x28 0x01 0x3c 0x00 # CHECK: teq $8, $9
+0x28 0x01 0x3c 0x02 # CHECK: tge $8, $9
+0x28 0x01 0x3c 0x04 # CHECK: tgeu $8, $9
+0x28 0x01 0x3c 0x08 # CHECK: tlt $8, $9
+0x28 0x01 0x3c 0x0a # CHECK: tltu $8, $9
+0x28 0x01 0x3c 0x0c # CHECK: tne $8, $9
+0xc9 0x41 0x67 0x45 # CHECK: teqi $9, 17767
+0x29 0x41 0x67 0x45 # CHECK: tgei $9, 17767
+0x69 0x41 0x67 0x45 # CHECK: tgeiu $9, 17767
+0x09 0x41 0x67 0x45 # CHECK: tlti $9, 17767
+0x49 0x41 0x67 0x45 # CHECK: tltiu $9, 17767
+0x89 0x41 0x67 0x45 # CHECK: tnei $9, 17767
+0x25 0x20 0x08 0x60 # CHECK: cache 1, 8($5)
+0x25 0x60 0x08 0x20 # CHECK: pref 1, 8($5)
+0x00 0x00 0x00 0x08 # CHECK: ssnop
+0x00 0x00 0x00 0x18 # CHECK: ehb
+0x00 0x00 0x00 0x28 # CHECK: pause
+0x44 0x60 0x08 0x30 # CHECK: ll $2, 8($4)
+0x44 0x60 0x08 0xb0 # CHECK: sc $2, 8($4)
+0x64 0x00 0x18 0x11 # CHECK: lwxs $2, $3($4)
+0x66 0x42 0x9a 0x02 # CHECK: bgezals $6, 1332
+0x26 0x42 0x9a 0x02 # CHECK: bltzals $6, 1332
+0xe9 0x40 0x9a 0x02 # CHECK: beqzc $9, 1332
+0xa9 0x40 0x9a 0x02 # CHECK: bnezc $9, 1332
+0x00 0x74 0x98 0x02 # CHECK: jals 1328
+0xe6 0x03 0x3c 0x4f # CHECK: jalrs $ra, $6
+0x44 0x20 0x08 0x50 # CHECK: lwm32 $16, $17, 8($4)
+0x3b 0x21 0x84 0x59 # CHECK: lwm32 $16, $17, $18, $19, $20, $21, $22, $23, $fp, -1660($27)
+0x44 0x20 0x08 0xd0 # CHECK: swm32 $16, $17, 8($4)
+0x04 0x22 0x08 0x90 # CHECK: swp $16, 8($4)
+0x04 0x22 0x08 0x10 # CHECK: lwp $16, 8($4)
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x79 0x05 0x00 # CHECK: addiupc $2, 20
+0xbf 0x7b 0xff 0xff # CHECK: addiupc $7, 16777212
+0xc0 0x7b 0x00 0x00 # CHECK: addiupc $7, -16777216
+0x00 0x00 0x7c 0x57 # CHECK: ei
+0x0a 0x00 0x7c 0x57 # CHECK: ei $10
+0x25 0x60 0x08 0xa6 # CHECK: cachee 1, 8($5)
+0x25 0x60 0x08 0xa4 # CHECK: prefe 1, 8($5)
+0x65 0x54 0xa0 0x09 # CHECK: prefx 1, $3($5)
+0x82 0x60 0x08 0x62 # CHECK: lhue $4, 8($2)
+0x82 0x60 0x08 0x68 # CHECK: lbe $4, 8($2)
+0x82 0x60 0x08 0x60 # CHECK: lbue $4, 8($2)
+0x82 0x60 0x08 0x6a # CHECK: lhe $4, 8($2)
+0x82 0x60 0x08 0x6e # CHECK: lwe $4, 8($2)
+0xa4 0x60 0x08 0xa8 # CHECK: sbe $5, 8($4)
+0xa4 0x60 0x08 0xaa # CHECK: she $5, 8($4)
+0xa4 0x60 0x08 0xae # CHECK: swe $5, 8($4)
+0x03 0x63 0x05 0xa2 # CHECK: swre $24, 5($3)
+0x03 0x63 0x05 0xa0 # CHECK: swle $24, 5($3)
+0x03 0x63 0x05 0x66 # CHECK: lwre $24, 5($3)
+0x04 0x63 0x02 0x64 # CHECK: lwle $24, 2($4)
+0x44 0x60 0x08 0x6c # CHECK: lle $2, 8($4)
+0x44 0x60 0x08 0xac # CHECK: sce $2, 8($4)
diff --git a/test/MC/Disassembler/Mips/micromips32r3/valid.txt b/test/MC/Disassembler/Mips/micromips32r3/valid.txt
new file mode 100644
index 000000000000..030afb36723d
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips32r3/valid.txt
@@ -0,0 +1,191 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mattr=micromips \
+# RUN: | FileCheck %s
+
+0x4f 0xf9 # CHECK: addiusp -16
+0x4f 0xff # CHECK: addiusp -1028
+0x4f 0xfd # CHECK: addiusp -1032
+0x4c 0x01 # CHECK: addiusp 1024
+0x4c 0x03 # CHECK: addiusp 1028
+0x2c 0x29 # CHECK: andi16 $16, $2, 31
+0x47 0x05 # CHECK: jraddiusp 20
+0x07 0x42 # CHECK: addu16 $6, $17, $4
+0x06 0xb1 # CHECK: subu16 $5, $16, $3
+0x44 0x82 # CHECK: and16 $16, $2
+0x44 0x0b # CHECK: not16 $17, $3
+0x44 0xc4 # CHECK: or16 $16, $4
+0x44 0x4d # CHECK: xor16 $17, $5
+0x25 0x8a # CHECK: sll16 $3, $16, 5
+0x26 0x1d # CHECK: srl16 $4, $17, 6
+0x09 0x94 # CHECK: lbu16 $3, 4($17)
+0x09 0x8f # CHECK: lbu16 $3, -1($16)
+0x29 0x82 # CHECK: lhu16 $3, 4($16)
+0x6a 0x12 # CHECK: lw16 $4, 8($17)
+0x89 0x84 # CHECK: sb16 $3, 4($16)
+0xaa 0x14 # CHECK: sh16 $4, 8($17)
+0xea 0x11 # CHECK: sw16 $4, 4($17)
+0xe8 0x11 # CHECK: sw16 $zero, 4($17)
+0x46 0x09 # CHECK: mfhi $9
+0x46 0x49 # CHECK: mflo $9
+0x0f 0x21 # CHECK: move $25, $1
+0x45 0xa9 # CHECK: jrc $9
+0x45 0xc9 # CHECK: jalr $9
+0x45 0xe9 # CHECK: jalrs16 $9
+0x45 0x89 # CHECK: jr16 $9
+0xed 0xff # CHECK: li16 $3, -1
+0xed 0xfe # CHECK: li16 $3, 126
+0x6f 0x83 # CHECK: addiur1sp $7, 4
+0x6f 0x7e # CHECK: addiur2 $6, $7, -1
+0x6f 0x76 # CHECK: addiur2 $6, $7, 12
+0x4c 0xfc # CHECK: addius5 $7, -2
+0x0c 0x00 # CHECK: nop
+0x48 0x68 # CHECK: lw $3, 32($sp)
+0xc8 0x9f # CHECK: sw $4, 124($sp)
+0x8f 0x0a # CHECK: beqz16 $6, 20
+0xaf 0x0a # CHECK: bnez16 $6, 20
+0xcc 0x42 # CHECK: b16 132
+0x65 0x88 # CHECK: lw $3, 32($gp)
+0x45 0x12 # CHECK: lwm16 $16, $17, $ra, 8($sp)
+0x45 0x52 # CHECK: swm16 $16, $17, $ra, 8($sp)
+0x46 0x88 # CHECK: break16 8
+0x46 0xce # CHECK: sdbbp16 14
+0x84 0x34 # CHECK: movep $5, $6, $2, $3
+0x00 0xe6 0x49 0x10 # CHECK: add $9, $6, $7
+0x11 0x26 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x31 0x26 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x11 0x26 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x31 0x26 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x00 0xe6 0x49 0x50 # CHECK: addu $9, $6, $7
+0x00 0xe6 0x49 0x90 # CHECK: sub $9, $6, $7
+0x00 0xa3 0x21 0xd0 # CHECK: subu $4, $3, $5
+0x00 0xe0 0x31 0x90 # CHECK: sub $6, $zero, $7
+0x00 0xe0 0x31 0xd0 # CHECK: subu $6, $zero, $7
+0x00 0x08 0x39 0x50 # CHECK: addu $7, $8, $zero
+0x00 0xa3 0x1b 0x50 # CHECK: slt $3, $3, $5
+0x90 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x90 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0xb0 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x00 0xa3 0x1b 0x90 # CHECK: sltu $3, $3, $5
+0x41 0xa9 0x45 0x67 # CHECK: lui $9, 17767
+0x00 0xe6 0x4a 0x50 # CHECK: and $9, $6, $7
+0xd1 0x26 0x45 0x67 # CHECK: andi $9, $6, 17767
+0xd1 0x26 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x00 0xa4 0x1a 0x90 # CHECK: or $3, $4, $5
+0x51 0x26 0x45 0x67 # CHECK: ori $9, $6, 17767
+0x00 0xa3 0x1b 0x10 # CHECK: xor $3, $3, $5
+0x71 0x26 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x71 0x26 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x00 0xe6 0x4a 0xd0 # CHECK: nor $9, $6, $7
+0x00 0x08 0x3a 0xd0 # CHECK: not $7, $8
+0x00 0xe6 0x4a 0x10 # CHECK: mul $9, $6, $7
+0x00 0xe9 0x8b 0x3c # CHECK: mult $9, $7
+0x00 0xe9 0x9b 0x3c # CHECK: multu $9, $7
+0x00 0xe9 0xab 0x3c # CHECK: div $zero, $9, $7
+0x00 0xe9 0xbb 0x3c # CHECK: divu $zero, $9, $7
+0x00 0x83 0x38 0x00 # CHECK: sll $4, $3, 7
+0x00 0x65 0x10 0x10 # CHECK: sllv $2, $3, $5
+0x00 0x83 0x38 0x80 # CHECK: sra $4, $3, 7
+0x00 0x65 0x10 0x90 # CHECK: srav $2, $3, $5
+0x00 0x83 0x38 0x40 # CHECK: srl $4, $3, 7
+0x00 0x65 0x10 0x50 # CHECK: srlv $2, $3, $5
+0x01 0x26 0x38 0xc0 # CHECK: rotr $9, $6, 7
+0x00 0xc7 0x48 0xd0 # CHECK: rotrv $9, $6, $7
+0x1c 0xa4 0x00 0x08 # CHECK: lb $5, 8($4)
+0x14 0xc4 0x00 0x08 # CHECK: lbu $6, 8($4)
+0x3c 0x44 0x00 0x08 # CHECK: lh $2, 8($4)
+0x34 0x82 0x00 0x08 # CHECK: lhu $4, 8($2)
+0xfc 0xc5 0x00 0x04 # CHECK: lw $6, 4($5)
+0xfc 0xdd 0x00 0x7b # CHECK: lw $6, 123($sp)
+0x18 0xa4 0x00 0x08 # CHECK: sb $5, 8($4)
+0x38 0x44 0x00 0x08 # CHECK: sh $2, 8($4)
+0xf8 0xa6 0x00 0x04 # CHECK: sw $5, 4($6)
+0xf8 0xbd 0x00 0x7b # CHECK: sw $5, 123($sp)
+0x60 0x44 0xe0 0x08 # CHECK: lwu $2, 8($4)
+0x60 0x85 0x00 0x10 # CHECK: lwl $4, 16($5)
+0x60 0x85 0x10 0x10 # CHECK: lwr $4, 16($5)
+0x60 0x85 0x80 0x10 # CHECK: swl $4, 16($5)
+0x60 0x85 0x90 0x10 # CHECK: swr $4, 16($5)
+0x00 0xe6 0x48 0x58 # CHECK: movz $9, $6, $7
+0x00 0xe6 0x48 0x18 # CHECK: movn $9, $6, $7
+0x55 0x26 0x09 0x7b # CHECK: movt $9, $6, $fcc0
+0x55 0x26 0x01 0x7b # CHECK: movf $9, $6, $fcc0
+0x00 0x06 0x2d 0x7c # CHECK: mthi $6
+0x00 0x06 0x0d 0x7c # CHECK: mfhi $6
+0x00 0x06 0x3d 0x7c # CHECK: mtlo $6
+0x00 0x06 0x1d 0x7c # CHECK: mflo $6
+0x00 0xa4 0xcb 0x3c # CHECK: madd $4, $5
+0x00 0xa4 0xdb 0x3c # CHECK: maddu $4, $5
+0x00 0xa4 0xeb 0x3c # CHECK: msub $4, $5
+0x00 0xa4 0xfb 0x3c # CHECK: msubu $4, $5
+0x01 0x26 0x5b 0x3c # CHECK: clz $9, $6
+0x01 0x26 0x4b 0x3c # CHECK: clo $9, $6
+0x01 0x26 0x2b 0x3c # CHECK: seb $9, $6
+0x01 0x26 0x3b 0x3c # CHECK: seh $9, $6
+0x01 0x26 0x7b 0x3c # CHECK: wsbh $9, $6
+0x01 0x26 0x30 0xec # CHECK: ext $9, $6, 3, 7
+0x01 0x26 0x48 0xcc # CHECK: ins $9, $6, 3, 7
+0xd4 0x00 0x02 0x98 # CHECK: j 1328
+0xf4 0x00 0x02 0x98 # CHECK: jal 1328
+0x03 0xe6 0x0f 0x3c # CHECK: jalr $ra, $6
+0x00 0x07 0x0f 0x3c # CHECK: jr $7
+0x94 0xc9 0x02 0x9a # CHECK: beq $9, $6, 1332
+0x40 0x46 0x02 0x9a # CHECK: bgez $6, 1332
+0x40 0x66 0x02 0x9a # CHECK: bgezal $6, 1332
+0x40 0x26 0x02 0x9a # CHECK: bltzal $6, 1332
+0x40 0xc6 0x02 0x9a # CHECK: bgtz $6, 1332
+0x40 0x86 0x02 0x9a # CHECK: blez $6, 1332
+0xb4 0xc9 0x02 0x9a # CHECK: bne $9, $6, 1332
+0x40 0x06 0x02 0x9a # CHECK: bltz $6, 1332
+0x01 0x28 0x00 0x3c # CHECK: teq $8, $9
+0x01 0x28 0x02 0x3c # CHECK: tge $8, $9
+0x01 0x28 0x04 0x3c # CHECK: tgeu $8, $9
+0x01 0x28 0x08 0x3c # CHECK: tlt $8, $9
+0x01 0x28 0x0a 0x3c # CHECK: tltu $8, $9
+0x01 0x28 0x0c 0x3c # CHECK: tne $8, $9
+0x41 0xc9 0x45 0x67 # CHECK: teqi $9, 17767
+0x41 0x29 0x45 0x67 # CHECK: tgei $9, 17767
+0x41 0x69 0x45 0x67 # CHECK: tgeiu $9, 17767
+0x41 0x09 0x45 0x67 # CHECK: tlti $9, 17767
+0x41 0x49 0x45 0x67 # CHECK: tltiu $9, 17767
+0x41 0x89 0x45 0x67 # CHECK: tnei $9, 17767
+0x20 0x25 0x60 0x08 # CHECK: cache 1, 8($5)
+0x60 0x25 0x20 0x08 # CHECK: pref 1, 8($5)
+0x00 0x00 0x08 0x00 # CHECK: ssnop
+0x00 0x00 0x18 0x00 # CHECK: ehb
+0x00 0x00 0x28 0x00 # CHECK: pause
+0x60 0x44 0x30 0x08 # CHECK: ll $2, 8($4)
+0x60 0x44 0xb0 0x08 # CHECK: sc $2, 8($4)
+0x00 0x64 0x11 0x18 # CHECK: lwxs $2, $3($4)
+0x42 0x66 0x02 0x9a # CHECK: bgezals $6, 1332
+0x42 0x26 0x02 0x9a # CHECK: bltzals $6, 1332
+0x40 0xe9 0x02 0x9a # CHECK: beqzc $9, 1332
+0x40 0xa9 0x02 0x9a # CHECK: bnezc $9, 1332
+0x74 0x00 0x02 0x98 # CHECK: jals 1328
+0x03 0xe6 0x4f 0x3c # CHECK: jalrs $ra, $6
+0x20 0x44 0x50 0x08 # CHECK: lwm32 $16, $17, 8($4)
+0x21 0x3b 0x59 0x84 # CHECK: lwm32 $16, $17, $18, $19, $20, $21, $22, $23, $fp, -1660($27)
+0x20 0x44 0xd0 0x08 # CHECK: swm32 $16, $17, 8($4)
+0x22 0x04 0x90 0x08 # CHECK: swp $16, 8($4)
+0x22 0x04 0x10 0x08 # CHECK: lwp $16, 8($4)
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x79 0x00 0x00 0x05 # CHECK: addiupc $2, 20
+0x7b 0xbf 0xff 0xff # CHECK: addiupc $7, 16777212
+0x7b 0xc0 0x00 0x00 # CHECK: addiupc $7, -16777216
+0x00 0x00 0x57 0x7c # CHECK: ei
+0x00 0x0a 0x57 0x7c # CHECK: ei $10
+0x60 0x25 0xa6 0x08 # CHECK: cachee 1, 8($5)
+0x60 0x25 0xa4 0x08 # CHECK: prefe 1, 8($5)
+0x54 0x65 0x09 0xa0 # CHECK: prefx 1, $3($5)
+0x60 0x82 0x62 0x08 # CHECK: lhue $4, 8($2)
+0x60 0x82 0x68 0x08 # CHECK: lbe $4, 8($2)
+0x60 0x82 0x60 0x08 # CHECK: lbue $4, 8($2)
+0x60 0x82 0x6a 0x08 # CHECK: lhe $4, 8($2)
+0x60 0x82 0x6e 0x08 # CHECK: lwe $4, 8($2)
+0x60 0xa4 0xa8 0x08 # CHECK: sbe $5, 8($4)
+0x60 0xa4 0xaa 0x08 # CHECK: she $5, 8($4)
+0x60 0xa4 0xae 0x08 # CHECK: swe $5, 8($4)
+0x63 0x03 0xa2 0x05 # CHECK: swre $24, 5($3)
+0x63 0x03 0xa0 0x05 # CHECK: swle $24, 5($3)
+0x63 0x03 0x66 0x05 # CHECK: lwre $24, 5($3)
+0x63 0x04 0x64 0x02 # CHECK: lwle $24, 2($4)
+0x60 0x44 0x6c 0x08 # CHECK: lle $2, 8($4)
+0x60 0x44 0xac 0x08 # CHECK: sce $2, 8($4)
diff --git a/test/MC/Disassembler/Mips/micromips32r6.txt b/test/MC/Disassembler/Mips/micromips32r6.txt
deleted file mode 100644
index a2691ee6bc3a..000000000000
--- a/test/MC/Disassembler/Mips/micromips32r6.txt
+++ /dev/null
@@ -1,114 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r6 -mattr=micromips | FileCheck %s
-
-0x00 0xa4 0x19 0x10 # CHECK: add $3, $4, $5
-
-0x30 0x64 0x04 0xd2 # CHECK: addiu $3, $4, 1234
-
-0x00 0xa4 0x19 0x50 # CHECK: addu $3, $4, $5
-
-0x78 0x80 0x00 0x19 # CHECK: addiupc $4, 100
-
-0x78 0x7f 0x00 0x38 # CHECK: aluipc $3, 56
-
-0x78 0x7e 0xff 0xff # CHECK: auipc $3, -1
-
-0x00 0x43 0x24 0x1f # CHECK: align $4, $2, $3, 2
-
-0x00 0xa4 0x1a 0x50 # CHECK: and $3, $4, $5
-
-0xd0 0x64 0x04 0xd2 # CHECK: andi $3, $4, 1234
-
-0x10 0x62 0xff 0xe9 # CHECK: aui $3, $2, -23
-
-0x74 0x40 0x02 0x9a # CHECK: beqzalc $2, 1332
-
-0x7c 0x40 0x02 0x9a # CHECK: bnezalc $2, 1332
-
-0xc0 0x42 0x02 0x9a # CHECK: bgezalc $2, 1332
-
-0xe0 0x40 0x02 0x9a # CHECK: bgtzalc $2, 1332
-
-0xe0 0x42 0x02 0x9a # CHECK: bltzalc $2, 1332
-
-0xc0 0x40 0x02 0x9a # CHECK: blezalc $2, 1332
-
-0xb4 0x37 0x96 0xb8 # CHECK: balc 14572256
-
-0x94 0x37 0x96 0xb8 # CHECK: bc 14572256
-
-0x00 0x44 0x0b 0x3c # CHECK: bitswap $4, $2
-
-0x00 0x00 0x00 0x07 # CHECK: break
-
-0x00 0x07 0x00 0x07 # CHECK: break 7
-
-0x00 0x07 0x01 0x47 # CHECK: break 7, 5
-
-0x20 0x25 0x60 0x08 # CHECK: cache 1, 8($5)
-
-0x01 0x65 0x4b 0x3c # CHECK: clo $11, $5
-
-0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
-
-0x00 0xa4 0x19 0x18 # CHECK: div $3, $4, $5
-
-0x00 0xa4 0x19 0x98 # CHECK: divu $3, $4, $5
-
-0x00 0x00 0x18 0x00 # CHECK: ehb
-
-0x00 0x00 0x57 0x7c # CHECK: ei
-
-0x00 0x0a 0x57 0x7c # CHECK: ei $10
-
-0x00 0x00 0xf3 0x7c # CHECK: eret
-
-0x00 0x01 0xf3 0x7c # CHECK: eretnc
-
-0x80 0x05 0x01 0x00 # CHECK: jialc $5, 256
-
-0xa0 0x05 0x01 0x00 # CHECK: jic $5, 256
-
-0x78 0x48 0x00 0x43 # CHECK: lwpc $2, 268
-
-0x00 0x43 0x26 0x0f # CHECK: lsa $2, $3, $4, 3
-
-0x00 0xa4 0x19 0x58 # CHECK: mod $3, $4, $5
-
-0x00 0xa4 0x19 0xd8 # CHECK: modu $3, $4, $5
-
-0x00 0xa4 0x18 0x18 # CHECK: mul $3, $4, $5
-
-0x00 0xa4 0x18 0x58 # CHECK: muh $3, $4, $5
-
-0x00 0xa4 0x18 0x98 # CHECK: mulu $3, $4, $5
-
-0x00 0xa4,0x18,0xd8 # CHECK: muhu $3, $4, $5
-
-0x00 0x00 0x00 0x00 # CHECK: nop
-
-0x00 0xa4 0x1a 0xd0 # CHECK: nor $3, $4, $5
-
-0x00,0xa4,0x1a,0x90 # CHECK: or $3, $4, $5
-
-0x50 0x64 0x04 0xd2 # CHECK: ori $3, $4, 1234
-
-0x60 0x25 0x20 0x08 # CHECK: pref 1, 8($5)
-
-0x00 0x83 0x11 0x40 # CHECK: seleqz $2, $3, $4
-
-0x00 0x83 0x11 0x80 # CHECK: selnez $2, $3, $4
-
-0x00 0x83 0x38 0x00 # CHECK: sll $4, $3, 7
-
-0x00 0xa4 0x19 0x90 # CHECK: sub $3, $4, $5
-
-0x00 0xa4 0x19 0xd0 # CHECK: subu $3, $4, $5
-
-0x00 0xa4 0x1b 0x10 # CHECK: xor $3, $4, $5
-
-0x70 0x64 0x04 0xd2 # CHECK: xori $3, $4, 1234
-
-0x00 0x64 0x2b 0x3c # CHECK: seb $3, $4
-
-0x00 0x64 0x3b 0x3c # CHECK: seh $3, $4
-
diff --git a/test/MC/Disassembler/Mips/micromips32r6/valid.txt b/test/MC/Disassembler/Mips/micromips32r6/valid.txt
new file mode 100644
index 000000000000..b8e69f833a03
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips32r6/valid.txt
@@ -0,0 +1,258 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r6 -mattr=micromips | FileCheck %s
+
+0x6f 0x83 # CHECK: addiur1sp $7, 4
+0x6f 0x7e # CHECK: addiur2 $6, $7, -1
+0x6f 0x76 # CHECK: addiur2 $6, $7, 12
+0x4c 0xfc # CHECK: addius5 $7, -2
+0x4f 0xff # CHECK: addiusp -1028
+0x4f 0xfd # CHECK: addiusp -1032
+0x4c 0x01 # CHECK: addiusp 1024
+0x4c 0x03 # CHECK: addiusp 1028
+0x4f 0xf9 # CHECK: addiusp -16
+0xcc 0x42 # CHECK: bc16 132
+0x8f 0x0a # CHECK: beqzc16 $6, 20
+0xaf 0x0a # CHECK: bnezc16 $6, 20
+0x45 0x2b # CHECK: jalr $9
+0x45 0x23 # CHECK: jrc16 $9
+0x44 0xb3 # CHECK: jrcaddiusp 20
+0x65 0x88 # CHECK: lw $3, 32($gp)
+0x48 0x66 # CHECK: lw $3, 24($sp)
+0x6a 0x12 # CHECK: lw16 $4, 8($17)
+0x29 0x82 # CHECK: lhu16 $3, 4($16)
+0x09 0x94 # CHECK: lbu16 $3, 4($17)
+0x09 0x9f # CHECK: lbu16 $3, -1($17)
+0x04 0xcc # CHECK: addu16 $6, $17, $4
+0x44 0x21 # CHECK: and16 $16, $2
+0x2e 0x56 # CHECK: andi16 $4, $5, 8
+0x46 0x70 # CHECK: not16 $4, $7
+0x45 0xf9 # CHECK: or16 $3, $7
+0x25 0xe0 # CHECK: sll16 $3, $6, 8
+0x25 0xe1 # CHECK: srl16 $3, $6, 8
+0x00 0xa4 0x19 0x10 # CHECK: add $3, $4, $5
+0x30 0x64 0x04 0xd2 # CHECK: addiu $3, $4, 1234
+0x00 0xa4 0x19 0x50 # CHECK: addu $3, $4, $5
+0x78 0x80 0x00 0x19 # CHECK: addiupc $4, 100
+0x78 0x7f 0x00 0x38 # CHECK: aluipc $3, 56
+0x78 0x7e 0xff 0xff # CHECK: auipc $3, -1
+0x00 0x43 0x24 0x1f # CHECK: align $4, $2, $3, 2
+0x00 0xa4 0x1a 0x50 # CHECK: and $3, $4, $5
+0xd0 0x64 0x04 0xd2 # CHECK: andi $3, $4, 1234
+0x10 0x62 0xff 0xe9 # CHECK: aui $3, $2, -23
+0x74 0x40 0x02 0x9a # CHECK: beqzalc $2, 1332
+0x7c 0x40 0x02 0x9a # CHECK: bnezalc $2, 1332
+0xc0 0x42 0x02 0x9a # CHECK: bgezalc $2, 1332
+0xe0 0x40 0x02 0x9a # CHECK: bgtzalc $2, 1332
+0xe0 0x42 0x02 0x9a # CHECK: bltzalc $2, 1332
+0xc0 0x40 0x02 0x9a # CHECK: blezalc $2, 1332
+0xb4 0x37 0x96 0xb8 # CHECK: balc 7286128
+0x94 0x37 0x96 0xb8 # CHECK: bc 7286128
+0x00 0x44 0x0b 0x3c # CHECK: bitswap $4, $2
+0x00 0x00 0x00 0x07 # CHECK: break
+0x00 0x07 0x00 0x07 # CHECK: break 7
+0x00 0x07 0x01 0x47 # CHECK: break 7, 5
+0x20 0x25 0x60 0x08 # CHECK: cache 1, 8($5)
+0x01 0x65 0x4b 0x3c # CHECK: clo $11, $5
+0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
+0x00 0x00 0xe3 0x7c # CHECK: deret
+0x00 0xa4 0x19 0x18 # CHECK: div $3, $4, $5
+0x00 0xa4 0x19 0x98 # CHECK: divu $3, $4, $5
+0x00 0x00 0x18 0x00 # CHECK: ehb
+0x00 0x00 0x57 0x7c # CHECK: ei
+0x00 0x0a 0x57 0x7c # CHECK: ei $10
+0x00 0x00 0xf3 0x7c # CHECK: eret
+0x00 0x01 0xf3 0x7c # CHECK: eretnc
+0x80 0x05 0x01 0x00 # CHECK: jialc $5, 256
+0xa0 0x05 0x01 0x00 # CHECK: jic $5, 256
+0x78 0x48 0x00 0x43 # CHECK: lwpc $2, 268
+0x00 0x43 0x26 0x0f # CHECK: lsa $2, $3, $4, 4
+0x00 0xa4 0x19 0x58 # CHECK: mod $3, $4, $5
+0x00 0xa4 0x19 0xd8 # CHECK: modu $3, $4, $5
+0x00 0xa4 0x18 0x18 # CHECK: mul $3, $4, $5
+0x00 0xa4 0x18 0x58 # CHECK: muh $3, $4, $5
+0x00 0xa4 0x18 0x98 # CHECK: mulu $3, $4, $5
+0x00 0xa4 0x18 0xd8 # CHECK: muhu $3, $4, $5
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0xa4 0x1a 0xd0 # CHECK: nor $3, $4, $5
+0x00 0xa4 0x1a 0x90 # CHECK: or $3, $4, $5
+0x50 0x64 0x04 0xd2 # CHECK: ori $3, $4, 1234
+0x60 0x25 0x20 0x08 # CHECK: pref 1, 8($5)
+0x00 0x83 0x11 0x40 # CHECK: seleqz $2, $3, $4
+0x00 0x83 0x11 0x80 # CHECK: selnez $2, $3, $4
+0x00 0x83 0x38 0x00 # CHECK: sll $4, $3, 7
+0x00 0xa4 0x19 0x90 # CHECK: sub $3, $4, $5
+0x00 0xa4 0x19 0xd0 # CHECK: subu $3, $4, $5
+0x00 0x64 0xf1 0x7c # CHECK: wrpgpr $3, $4
+0x00 0x64 0x7b 0x3c # CHECK: wsbh $3, $4
+0x00 0xa4 0x1b 0x10 # CHECK: xor $3, $4, $5
+0x70 0x64 0x04 0xd2 # CHECK: xori $3, $4, 1234
+0x18 0x85 0x00 0x06 # CHECK: sb $4, 6($5)
+0x60 0x85 0xa8 0x06 # CHECK: sbe $4, 6($5)
+0x60 0x85 0xac 0x06 # CHECK: sce $4, 6($5)
+0x38 0x85 0x00 0x06 # CHECK: sh $4, 6($5)
+0x60 0x85 0xaa 0x06 # CHECK: she $4, 6($5)
+0x60 0x85 0x6c 0x06 # CHECK: lle $4, 6($5)
+0x60 0x85 0x6e 0x06 # CHECK: lwe $4, 6($5)
+0xfc 0x85 0x00 0x06 # CHECK: lw $4, 6($5)
+0x10 0xc0 0x45 0x67 # CHECK: lui $6, 17767
+0x00 0x64 0x2b 0x3c # CHECK: seb $3, $4
+0x00 0x64 0x3b 0x3c # CHECK: seh $3, $4
+0xf8 0xa6 0x00 0x04 # CHECK: sw $5, 4($6)
+0x60 0xa4 0xae 0x08 # CHECK: swe $5, 8($4)
+0x54 0xa4 0x18 0x30 # CHECK: add.s $f3, $f4, $f5
+0x54 0xc4 0x11 0x30 # CHECK: add.d $f2, $f4, $f6
+0x54 0xa4 0x18 0x70 # CHECK: sub.s $f3, $f4, $f5
+0x54 0xc4 0x11 0x70 # CHECK: sub.d $f2, $f4, $f6
+0x54 0xa4 0x18 0xb0 # CHECK: mul.s $f3, $f4, $f5
+0x54 0xc4 0x11 0xb0 # CHECK: mul.d $f2, $f4, $f6
+0x54 0xa4 0x18 0xf0 # CHECK: div.s $f3, $f4, $f5
+0x54 0xc4 0x11 0xf0 # CHECK: div.d $f2, $f4, $f6
+0x54 0xa4 0x19 0xb8 # CHECK: maddf.s $f3, $f4, $f5
+0x54 0xa4 0x1b 0xb8 # CHECK: maddf.d $f3, $f4, $f5
+0x54 0xa4 0x19 0xf8 # CHECK: msubf.s $f3, $f4, $f5
+0x54 0xa4 0x1b 0xf8 # CHECK: msubf.d $f3, $f4, $f5
+0x54 0xc7 0x00 0x7b # CHECK: mov.s $f6, $f7
+0x54 0x86 0x20 0x7b # CHECK: mov.d $f4, $f6
+0x54 0xc7 0x0b 0x7b # CHECK: neg.s $f6, $f7
+0x54 0x86 0x2b 0x7b # CHECK: neg.d $f4, $f6
+0x54 0x64 0x28 0x0b # CHECK: max.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x0b # CHECK: max.d $f5, $f4, $f3
+0x54 0x64 0x28 0x2b # CHECK: maxa.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x2b # CHECK: maxa.d $f5, $f4, $f3
+0x54 0x64 0x28 0x03 # CHECK: min.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x03 # CHECK: min.d $f5, $f4, $f3
+0x54 0x64 0x28 0x23 # CHECK: mina.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x23 # CHECK: mina.d $f5, $f4, $f3
+0x54 0x83 0x10 0x05 # CHECK: cmp.af.s $f2, $f3, $f4
+0x54 0x83 0x10 0x45 # CHECK: cmp.un.s $f2, $f3, $f4
+0x54 0x83 0x10 0x85 # CHECK: cmp.eq.s $f2, $f3, $f4
+0x54 0x83 0x10 0xc5 # CHECK: cmp.ueq.s $f2, $f3, $f4
+0x54 0x83 0x11 0x05 # CHECK: cmp.lt.s $f2, $f3, $f4
+0x54 0x83 0x11 0x45 # CHECK: cmp.ult.s $f2, $f3, $f4
+0x54 0x83 0x11 0x85 # CHECK: cmp.le.s $f2, $f3, $f4
+0x54 0x83 0x11 0xc5 # CHECK: cmp.ule.s $f2, $f3, $f4
+0x54 0x83 0x12 0x05 # CHECK: cmp.saf.s $f2, $f3, $f4
+0x54 0x83 0x12 0x45 # CHECK: cmp.sun.s $f2, $f3, $f4
+0x54 0x83 0x12 0x85 # CHECK: cmp.seq.s $f2, $f3, $f4
+0x54 0x83 0x12 0xc5 # CHECK: cmp.sueq.s $f2, $f3, $f4
+0x54 0x83 0x13 0x05 # CHECK: cmp.slt.s $f2, $f3, $f4
+0x54 0x83 0x13 0x45 # CHECK: cmp.sult.s $f2, $f3, $f4
+0x54 0x83 0x13 0x85 # CHECK: cmp.sle.s $f2, $f3, $f4
+0x54 0x83 0x13 0xc5 # CHECK: cmp.sule.s $f2, $f3, $f4
+0x54 0x83 0x10 0x15 # CHECK: cmp.af.d $f2, $f3, $f4
+0x54 0x83 0x10 0x55 # CHECK: cmp.un.d $f2, $f3, $f4
+0x54 0x83 0x10 0x95 # CHECK: cmp.eq.d $f2, $f3, $f4
+0x54 0x83 0x10 0xd5 # CHECK: cmp.ueq.d $f2, $f3, $f4
+0x54 0x83 0x11 0x15 # CHECK: cmp.lt.d $f2, $f3, $f4
+0x54 0x83 0x11 0x55 # CHECK: cmp.ult.d $f2, $f3, $f4
+0x54 0x83 0x11 0x95 # CHECK: cmp.le.d $f2, $f3, $f4
+0x54 0x83 0x11 0xd5 # CHECK: cmp.ule.d $f2, $f3, $f4
+0x54 0x83 0x12 0x15 # CHECK: cmp.saf.d $f2, $f3, $f4
+0x54 0x83 0x12 0x55 # CHECK: cmp.sun.d $f2, $f3, $f4
+0x54 0x83 0x12 0x95 # CHECK: cmp.seq.d $f2, $f3, $f4
+0x54 0x83 0x12 0xd5 # CHECK: cmp.sueq.d $f2, $f3, $f4
+0x54 0x83 0x13 0x15 # CHECK: cmp.slt.d $f2, $f3, $f4
+0x54 0x83 0x13 0x55 # CHECK: cmp.sult.d $f2, $f3, $f4
+0x54 0x83 0x13 0x95 # CHECK: cmp.sle.d $f2, $f3, $f4
+0x54 0x83 0x13 0xd5 # CHECK: cmp.sule.d $f2, $f3, $f4
+0x54 0x64 0x01 0x3b # CHECK: cvt.l.s $f3, $f4
+0x54 0x64 0x41 0x3b # CHECK: cvt.l.d $f3, $f4
+0x54 0x64 0x09 0x3b # CHECK: cvt.w.s $f3, $f4
+0x54 0x64 0x49 0x3b # CHECK: cvt.w.d $f3, $f4
+0x54 0x44 0x13 0x7b # CHECK: cvt.d.s $f2, $f4
+0x54 0x44 0x33 0x7b # CHECK: cvt.d.w $f2, $f4
+0x54 0x44 0x53 0x7b # CHECK: cvt.d.l $f2, $f4
+0x54 0x44 0x1b 0x7b # CHECK: cvt.s.d $f2, $f4
+0x54 0x64 0x3b 0x7b # CHECK: cvt.s.w $f3, $f4
+0x54 0x64 0x5b 0x7b # CHECK: cvt.s.l $f3, $f4
+0x54 0x65 0x03 0x7b # CHECK: abs.s $f3, $f5
+0x54 0x44 0x23 0x7b # CHECK: abs.d $f2, $f4
+0x54 0x65 0x03 0x3b # CHECK: floor.l.s $f3, $f5
+0x54 0x44 0x43 0x3b # CHECK: floor.l.d $f2, $f4
+0x54 0x65 0x0b 0x3b # CHECK: floor.w.s $f3, $f5
+0x54 0x44 0x4b 0x3b # CHECK: floor.w.d $f2, $f4
+0x54 0x65 0x13 0x3b # CHECK: ceil.l.s $f3, $f5
+0x54 0x44 0x53 0x3b # CHECK: ceil.l.d $f2, $f4
+0x54 0x65 0x1b 0x3b # CHECK: ceil.w.s $f3, $f5
+0x54 0x44 0x5b 0x3b # CHECK: ceil.w.d $f2, $f4
+0x54 0x65 0x23 0x3b # CHECK: trunc.l.s $f3, $f5
+0x54 0x44 0x63 0x3b # CHECK: trunc.l.d $f2, $f4
+0x54 0x65 0x2b 0x3b # CHECK: trunc.w.s $f3, $f5
+0x54 0x44 0x6b 0x3b # CHECK: trunc.w.d $f2, $f4
+0x54 0x65 0x0a 0x3b # CHECK: sqrt.s $f3, $f5
+0x54 0x44 0x4a 0x3b # CHECK: sqrt.d $f2, $f4
+0x54 0x65 0x02 0x3b # CHECK: rsqrt.s $f3, $f5
+0x54 0x44 0x42 0x3b # CHECK: rsqrt.d $f2, $f4
+0x60 0x25 0xa6 0x08 # CHECK: cachee 1, 8($5)
+0x60 0x25 0xa4 0x08 # CHECK: prefe 1, 8($5)
+0x01 0x28 0x00 0x3c # CHECK: teq $8, $9
+0x00 0xe5 0xf0 0x3c # CHECK: teq $5, $7, 15
+0x01 0x47 0x02 0x3c # CHECK: tge $7, $10
+0x02 0x67 0xf2 0x3c # CHECK: tge $7, $19, 15
+0x03 0x96 0x04 0x3c # CHECK: tgeu $22, $gp
+0x01 0xd4 0xf4 0x3c # CHECK: tgeu $20, $14, 15
+0x01 0xaf 0x08 0x3c # CHECK: tlt $15, $13
+0x02 0x62 0xf8 0x3c # CHECK: tlt $2, $19, 15
+0x02 0x0b 0x0a 0x3c # CHECK: tltu $11, $16
+0x03 0xb0 0xfa 0x3c # CHECK: tltu $16, $sp, 15
+0x02 0x26 0x0c 0x3c # CHECK: tne $6, $17
+0x01 0x07 0xfc 0x3c # CHECK: tne $7, $8, 15
+0x65 0x88 # CHECK: lw $3, 32($gp)
+0x48 0x66 # CHECK: lw $3, 24($sp)
+0x6a 0x12 # CHECK: lw16 $4, 8($17)
+0x29 0x82 # CHECK: lhu16 $3, 4($16)
+0x09 0x94 # CHECK: lbu16 $3, 4($17)
+0x09 0x9f # CHECK: lbu16 $3, -1($17)
+0x04 0xcc # CHECK: addu16 $6, $17, $4
+0x44 0x21 # CHECK: and16 $16, $2
+0x2e 0x56 # CHECK: andi16 $4, $5, 8
+0x46 0x70 # CHECK: not16 $4, $7
+0x45 0xf9 # CHECK: or16 $3, $7
+0x25 0xe0 # CHECK: sll16 $3, $6, 8
+0x25 0xe1 # CHECK: srl16 $3, $6, 8
+0x46 0x1B # CHECK: break16 8
+0xed 0xff # CHECK: li16 $3, -1
+0x0c 0x65 # CHECK: move16 $3, $5
+0x46 0x3b # CHECK: sdbbp16 8
+0x04 0x3b # CHECK: subu16 $5, $16, $3
+0x44 0xd8 # CHECK: xor16 $17, $5
+0x1c 0x85 0x00 0x08 # CHECK: lb $4, 8($5)
+0x14 0x85 0x00 0x08 # CHECK: lbu $4, 8($5)
+0x60 0x85 0x68 0x08 # CHECK: lbe $4, 8($5)
+0x60 0x85 0x60 0x08 # CHECK: lbue $4, 8($5)
+0x00 0x00 0x28 0x00 # CHECK: pause
+0x00 0xbd 0x11 0xc0 # CHECK: rdhwr $5, $29, 2
+0x00 0xbd 0x01 0xc0 # CHECK: rdhwr $5, $29
+0x00 0x00 0x93 0x7c # CHECK: wait
+0x00 0x11 0x93 0x7c # CHECK: wait 17
+0x00 0x00 0x08 0x00 # CHECK: ssnop
+0x00 0x00 0x6b 0x7c # CHECK: sync
+0x00 0x11 0x6b 0x7c # CHECK: sync 17
+0x41 0x85 0x00 0x08 # CHECK: synci 8($5)
+0x00 0x69 0xe1 0x7c # CHECK: rdpgpr $3, $9
+0x00 0x00 0xdb 0x7c # CHECK: sdbbp
+0x00 0x22 0xdb 0x7c # CHECK: sdbbp 34
+0x45 0x22 # CHECK: lwm16 $16, $17, $ra, 8($sp)
+0x89 0x84 # CHECK: sb16 $3, 4($16)
+0xaa 0x14 # CHECK: sh16 $4, 8($17)
+0xc8 0x9f # CHECK: sw $4, 124($sp)
+0xea 0x11 # CHECK: sw16 $4, 4($17)
+0xe8 0x11 # CHECK: sw16 $zero, 4($17)
+0x45 0x2a # CHECK: swm16 $16, $17, $ra, 8($sp)
+0x54 0x44 0x12 0x3b # CHECK: recip.s $f2, $f4
+0x54 0x44 0x52 0x3b # CHECK: recip.d $f2, $f4
+0x54 0x82 0x00 0x20 # CHECK: rint.s $f2, $f4
+0x54 0x82 0x02 0x20 # CHECK: rint.d $f2, $f4
+0x54 0x44 0x33 0x3b # CHECK: round.l.s $f2, $f4
+0x54 0x44 0x73 0x3b # CHECK: round.l.d $f2, $f4
+0x54 0x44 0x3b 0x3b # CHECK: round.w.s $f2, $f4
+0x54 0x44 0x7b 0x3b # CHECK: round.w.d $f2, $f4
+0x54 0x41 0x08 0xb8 # CHECK: sel.s $f1, $f1, $f2
+0x54 0x82 0x02 0xb8 # CHECK: sel.d $f0, $f2, $f4
+0x54 0x62 0x08 0x38 # CHECK: seleqz.s $f1, $f2, $f3
+0x55 0x04 0x12 0x38 # CHECK: seleqz.d $f2, $f4, $f8
+0x54 0x62 0x08 0x78 # CHECK: selnez.s $f1, $f2, $f3
+0x55 0x04 0x12 0x78 # CHECK: selnez.d $f2, $f4, $f8
+0x54 0x62 0x00 0x60 # CHECK: class.s $f2, $f3
+0x54 0x82 0x02 0x60 # CHECK: class.d $f2, $f4
+0x00 0x00 0x47 0x7c # CHECK: di
+0x00 0x0f 0x47 0x7c # CHECK: di $15
diff --git a/test/MC/Disassembler/Mips/micromips64r6/valid.txt b/test/MC/Disassembler/Mips/micromips64r6/valid.txt
new file mode 100644
index 000000000000..4f2325fe7ce1
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips64r6/valid.txt
@@ -0,0 +1,171 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips64r6 -mattr=micromips | FileCheck %s
+
+0x6f 0x83 # CHECK: addiur1sp $7, 4
+0x6f 0x7e # CHECK: addiur2 $6, $7, -1
+0x6f 0x76 # CHECK: addiur2 $6, $7, 12
+0x4c 0xfc # CHECK: addius5 $7, -2
+0x4f 0xff # CHECK: addiusp -1028
+0x4f 0xfd # CHECK: addiusp -1032
+0x4c 0x01 # CHECK: addiusp 1024
+0x4c 0x03 # CHECK: addiusp 1028
+0x4f 0xf9 # CHECK: addiusp -16
+0xcc 0x42 # CHECK: bc16 132
+0x8f 0x0a # CHECK: beqzc16 $6, 20
+0xaf 0x0a # CHECK: bnezc16 $6, 20
+0x65 0x88 # CHECK: lw $3, 32($gp)
+0x48 0x66 # CHECK: lw $3, 24($sp)
+0x6a 0x12 # CHECK: lw16 $4, 8($17)
+0x29 0x82 # CHECK: lhu16 $3, 4($16)
+0x09 0x94 # CHECK: lbu16 $3, 4($17)
+0x09 0x9f # CHECK: lbu16 $3, -1($17)
+0x45 0x2b # CHECK: jalr $9
+0x45 0x23 # CHECK: jrc16 $9
+0x44 0xb3 # CHECK: jrcaddiusp 20
+0xf0 0x64 0x00 0x05 # CHECK: daui $3, $4, 5
+0x42 0x23 0x00 0x04 # CHECK: dahi $3, 4
+0x42 0x03 0x00 0x04 # CHECK: dati $3, 4
+0x59 0x26 0x30 0xec # CHECK: dext $9, $6, 3, 7
+0x59 0x26 0x30 0xe4 # CHECK: dextm $9, $6, 3, 7
+0x59 0x26 0x30 0xd4 # CHECK: dextu $9, $6, 3, 7
+0x58 0x43 0x25 0x1c # CHECK: dalign $4, $2, $3, 5
+0x58 0x64 0x29 0x18 # CHECK: ddiv $3, $4, $5
+0x58 0x64 0x29 0x58 # CHECK: dmod $3, $4, $5
+0x58 0x64 0x29 0x98 # CHECK: ddivu $3, $4, $5
+0x58 0x64 0x29 0xd8 # CHECK: dmodu $3, $4, $5
+0x54 0xa4 0x18 0x30 # CHECK: add.s $f3, $f4, $f5
+0x54 0xc4 0x11 0x30 # CHECK: add.d $f2, $f4, $f6
+0x54 0xa4 0x18 0x70 # CHECK: sub.s $f3, $f4, $f5
+0x54 0xc4 0x11 0x70 # CHECK: sub.d $f2, $f4, $f6
+0x54 0xa4 0x18 0xb0 # CHECK: mul.s $f3, $f4, $f5
+0x54 0xc4 0x11 0xb0 # CHECK: mul.d $f2, $f4, $f6
+0x54 0xa4 0x18 0xf0 # CHECK: div.s $f3, $f4, $f5
+0x54 0xc4 0x11 0xf0 # CHECK: div.d $f2, $f4, $f6
+0x54 0xa4 0x19 0xb8 # CHECK: maddf.s $f3, $f4, $f5
+0x54 0xa4 0x1b 0xb8 # CHECK: maddf.d $f3, $f4, $f5
+0x54 0xa4 0x19 0xf8 # CHECK: msubf.s $f3, $f4, $f5
+0x54 0xa4 0x1b 0xf8 # CHECK: msubf.d $f3, $f4, $f5
+0x54 0xc7 0x00 0x7b # CHECK: mov.s $f6, $f7
+0x54 0x86 0x20 0x7b # CHECK: mov.d $f4, $f6
+0x54 0xc7 0x0b 0x7b # CHECK: neg.s $f6, $f7
+0x54 0x86 0x2b 0x7b # CHECK: neg.d $f4, $f6
+0x54 0x64 0x28 0x0b # CHECK: max.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x0b # CHECK: max.d $f5, $f4, $f3
+0x54 0x64 0x28 0x2b # CHECK: maxa.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x2b # CHECK: maxa.d $f5, $f4, $f3
+0x54 0x64 0x28 0x03 # CHECK: min.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x03 # CHECK: min.d $f5, $f4, $f3
+0x54 0x64 0x28 0x23 # CHECK: mina.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x23 # CHECK: mina.d $f5, $f4, $f3
+0x54 0x83 0x10 0x05 # CHECK: cmp.af.s $f2, $f3, $f4
+0x54 0x83 0x10 0x45 # CHECK: cmp.un.s $f2, $f3, $f4
+0x54 0x83 0x10 0x85 # CHECK: cmp.eq.s $f2, $f3, $f4
+0x54 0x83 0x10 0xc5 # CHECK: cmp.ueq.s $f2, $f3, $f4
+0x54 0x83 0x11 0x05 # CHECK: cmp.lt.s $f2, $f3, $f4
+0x54 0x83 0x11 0x45 # CHECK: cmp.ult.s $f2, $f3, $f4
+0x54 0x83 0x11 0x85 # CHECK: cmp.le.s $f2, $f3, $f4
+0x54 0x83 0x11 0xc5 # CHECK: cmp.ule.s $f2, $f3, $f4
+0x54 0x83 0x12 0x05 # CHECK: cmp.saf.s $f2, $f3, $f4
+0x54 0x83 0x12 0x45 # CHECK: cmp.sun.s $f2, $f3, $f4
+0x54 0x83 0x12 0x85 # CHECK: cmp.seq.s $f2, $f3, $f4
+0x54 0x83 0x12 0xc5 # CHECK: cmp.sueq.s $f2, $f3, $f4
+0x54 0x83 0x13 0x05 # CHECK: cmp.slt.s $f2, $f3, $f4
+0x54 0x83 0x13 0x45 # CHECK: cmp.sult.s $f2, $f3, $f4
+0x54 0x83 0x13 0x85 # CHECK: cmp.sle.s $f2, $f3, $f4
+0x54 0x83 0x13 0xc5 # CHECK: cmp.sule.s $f2, $f3, $f4
+0x54 0x83 0x10 0x15 # CHECK: cmp.af.d $f2, $f3, $f4
+0x54 0x83 0x10 0x55 # CHECK: cmp.un.d $f2, $f3, $f4
+0x54 0x83 0x10 0x95 # CHECK: cmp.eq.d $f2, $f3, $f4
+0x54 0x83 0x10 0xd5 # CHECK: cmp.ueq.d $f2, $f3, $f4
+0x54 0x83 0x11 0x15 # CHECK: cmp.lt.d $f2, $f3, $f4
+0x54 0x83 0x11 0x55 # CHECK: cmp.ult.d $f2, $f3, $f4
+0x54 0x83 0x11 0x95 # CHECK: cmp.le.d $f2, $f3, $f4
+0x54 0x83 0x11 0xd5 # CHECK: cmp.ule.d $f2, $f3, $f4
+0x54 0x83 0x12 0x15 # CHECK: cmp.saf.d $f2, $f3, $f4
+0x54 0x83 0x12 0x55 # CHECK: cmp.sun.d $f2, $f3, $f4
+0x54 0x83 0x12 0x95 # CHECK: cmp.seq.d $f2, $f3, $f4
+0x54 0x83 0x12 0xd5 # CHECK: cmp.sueq.d $f2, $f3, $f4
+0x54 0x83 0x13 0x15 # CHECK: cmp.slt.d $f2, $f3, $f4
+0x54 0x83 0x13 0x55 # CHECK: cmp.sult.d $f2, $f3, $f4
+0x54 0x83 0x13 0x95 # CHECK: cmp.sle.d $f2, $f3, $f4
+0x54 0x83 0x13 0xd5 # CHECK: cmp.sule.d $f2, $f3, $f4
+0x54 0x64 0x01 0x3b # CHECK: cvt.l.s $f3, $f4
+0x54 0x64 0x41 0x3b # CHECK: cvt.l.d $f3, $f4
+0x54 0x64 0x09 0x3b # CHECK: cvt.w.s $f3, $f4
+0x54 0x64 0x49 0x3b # CHECK: cvt.w.d $f3, $f4
+0x54 0x44 0x13 0x7b # CHECK: cvt.d.s $f2, $f4
+0x54 0x44 0x33 0x7b # CHECK: cvt.d.w $f2, $f4
+0x54 0x44 0x53 0x7b # CHECK: cvt.d.l $f2, $f4
+0x54 0x44 0x1b 0x7b # CHECK: cvt.s.d $f2, $f4
+0x54 0x64 0x3b 0x7b # CHECK: cvt.s.w $f3, $f4
+0x54 0x64 0x5b 0x7b # CHECK: cvt.s.l $f3, $f4
+0x54 0x65 0x03 0x7b # CHECK: abs.s $f3, $f5
+0x54 0x44 0x23 0x7b # CHECK: abs.d $f2, $f4
+0x54 0x65 0x03 0x3b # CHECK: floor.l.s $f3, $f5
+0x54 0x44 0x43 0x3b # CHECK: floor.l.d $f2, $f4
+0x54 0x65 0x0b 0x3b # CHECK: floor.w.s $f3, $f5
+0x54 0x44 0x4b 0x3b # CHECK: floor.w.d $f2, $f4
+0x54 0x65 0x13 0x3b # CHECK: ceil.l.s $f3, $f5
+0x54 0x44 0x53 0x3b # CHECK: ceil.l.d $f2, $f4
+0x54 0x65 0x1b 0x3b # CHECK: ceil.w.s $f3, $f5
+0x54 0x44 0x5b 0x3b # CHECK: ceil.w.d $f2, $f4
+0x54 0x65 0x23 0x3b # CHECK: trunc.l.s $f3, $f5
+0x54 0x44 0x63 0x3b # CHECK: trunc.l.d $f2, $f4
+0x54 0x65 0x2b 0x3b # CHECK: trunc.w.s $f3, $f5
+0x54 0x44 0x6b 0x3b # CHECK: trunc.w.d $f2, $f4
+0x54 0x65 0x0a 0x3b # CHECK: sqrt.s $f3, $f5
+0x54 0x44 0x4a 0x3b # CHECK: sqrt.d $f2, $f4
+0x54 0x65 0x02 0x3b # CHECK: rsqrt.s $f3, $f5
+0x54 0x44 0x42 0x3b # CHECK: rsqrt.d $f2, $f4
+0x01 0x28 0x00 0x3c # CHECK: teq $8, $9
+0x00 0xe5 0xf0 0x3c # CHECK: teq $5, $7, 15
+0x01 0x47 0x02 0x3c # CHECK: tge $7, $10
+0x02 0x67 0xf2 0x3c # CHECK: tge $7, $19, 15
+0x03 0x96 0x04 0x3c # CHECK: tgeu $22, $gp
+0x01 0xd4 0xf4 0x3c # CHECK: tgeu $20, $14, 15
+0x01 0xaf 0x08 0x3c # CHECK: tlt $15, $13
+0x02 0x62 0xf8 0x3c # CHECK: tlt $2, $19, 15
+0x02 0x0b 0x0a 0x3c # CHECK: tltu $11, $16
+0x03 0xb0 0xfa 0x3c # CHECK: tltu $16, $sp, 15
+0x02 0x26 0x0c 0x3c # CHECK: tne $6, $17
+0x01 0x07 0xfc 0x3c # CHECK: tne $7, $8, 15
+0x60 0x25 0xa6 0x08 # CHECK: cachee 1, 8($5)
+0x00 0x64 0xf1 0x7c # CHECK: wrpgpr $3, $4
+0x00 0x64 0x7b 0x3c # CHECK: wsbh $3, $4
+0x65 0x88 # CHECK: lw $3, 32($gp)
+0x48 0x66 # CHECK: lw $3, 24($sp)
+0x6a 0x12 # CHECK: lw16 $4, 8($17)
+0x29 0x82 # CHECK: lhu16 $3, 4($16)
+0x09 0x94 # CHECK: lbu16 $3, 4($17)
+0x09 0x9f # CHECK: lbu16 $3, -1($17)
+0x46 0x1B # CHECK: break16 8
+0xed 0xff # CHECK: li16 $3, -1
+0x0c 0x65 # CHECK: move16 $3, $5
+0x46 0x3b # CHECK: sdbbp16 8
+0x04 0x3b # CHECK: subu16 $5, $16, $3
+0x44 0xd8 # CHECK: xor16 $17, $5
+0x45 0x22 # CHECK: lwm16 $16, $17, $ra, 8($sp)
+0x89 0x84 # CHECK: sb16 $3, 4($16)
+0xaa 0x14 # CHECK: sh16 $4, 8($17)
+0xc8 0x9f # CHECK: sw $4, 124($sp)
+0xea 0x11 # CHECK: sw16 $4, 4($17)
+0xe8 0x11 # CHECK: sw16 $zero, 4($17)
+0x45 0x2a # CHECK: swm16 $16, $17, $ra, 8($sp)
+0x54 0x44 0x12 0x3b # CHECK: recip.s $f2, $f4
+0x54 0x44 0x52 0x3b # CHECK: recip.d $f2, $f4
+0x54 0x82 0x00 0x20 # CHECK: rint.s $f2, $f4
+0x54 0x82 0x02 0x20 # CHECK: rint.d $f2, $f4
+0x54 0x44 0x33 0x3b # CHECK: round.l.s $f2, $f4
+0x54 0x44 0x73 0x3b # CHECK: round.l.d $f2, $f4
+0x54 0x44 0x3b 0x3b # CHECK: round.w.s $f2, $f4
+0x54 0x44 0x7b 0x3b # CHECK: round.w.d $f2, $f4
+0x54 0x41 0x08 0xb8 # CHECK: sel.s $f1, $f1, $f2
+0x54 0x82 0x02 0xb8 # CHECK: sel.d $f0, $f2, $f4
+0x54 0x62 0x08 0x38 # CHECK: seleqz.s $f1, $f2, $f3
+0x55 0x04 0x12 0x38 # CHECK: seleqz.d $f2, $f4, $f8
+0x54 0x62 0x08 0x78 # CHECK: selnez.s $f1, $f2, $f3
+0x55 0x04 0x12 0x78 # CHECK: selnez.d $f2, $f4, $f8
+0x54 0x62 0x00 0x60 # CHECK: class.s $f2, $f3
+0x54 0x82 0x02 0x60 # CHECK: class.d $f2, $f4
+0x00 0x00 0xe3 0x7c # CHECK: deret
+0x00 0x00 0x47 0x7c # CHECK: di
+0x00 0x0f 0x47 0x7c # CHECK: di $15
diff --git a/test/MC/Disassembler/Mips/micromips_le.txt b/test/MC/Disassembler/Mips/micromips_le.txt
deleted file mode 100644
index 3058bd061066..000000000000
--- a/test/MC/Disassembler/Mips/micromips_le.txt
+++ /dev/null
@@ -1,342 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mattr=micromips \
-# RUN: | FileCheck %s
-
-0xe6 0x00 0x10 0x49 # CHECK: add $9, $6, $7
-
-0x26 0x11 0x67 0x45 # CHECK: addi $9, $6, 17767
-
-0x26 0x31 0x67 0xc5 # CHECK: addiu $9, $6, -15001
-
-0x26 0x11 0x67 0x45 # CHECK: addi $9, $6, 17767
-
-0x26 0x31 0x67 0xc5 # CHECK: addiu $9, $6, -15001
-
-0xf9 0x4f # CHECK: addiusp -16
-
-0xff 0x4f # CHECK: addiusp -1028
-
-0xfd 0x4f # CHECK: addiusp -1032
-
-0x01 0x4c # CHECK: addiusp 1024
-
-0x03 0x4c # CHECK: addiusp 1028
-
-0xe6 0x00 0x50 0x49 # CHECK: addu $9, $6, $7
-
-0x29 0x2c # CHECK: andi16 $16, $2, 31
-
-0xe6 0x00 0x90 0x49 # CHECK: sub $9, $6, $7
-
-0xa3 0x00 0xd0 0x21 # CHECK: subu $4, $3, $5
-
-0xe0 0x00 0x90 0x31 # CHECK: sub $6, $zero, $7
-
-0xe0 0x00 0xd0 0x31 # CHECK: subu $6, $zero, $7
-
-0x08 0x00 0x50 0x39 # CHECK: addu $7, $8, $zero
-
-0xa3 0x00 0x50 0x1b # CHECK: slt $3, $3, $5
-
-0x63 0x90 0x67 0x00 # CHECK: slti $3, $3, 103
-
-0x63 0x90 0x67 0x00 # CHECK: slti $3, $3, 103
-
-0x63 0xb0 0x67 0x00 # CHECK: sltiu $3, $3, 103
-
-0xa3 0x00 0x90 0x1b # CHECK: sltu $3, $3, $5
-
-0xa9 0x41 0x67 0x45 # CHECK: lui $9, 17767
-
-0xe6 0x00 0x50 0x4a # CHECK: and $9, $6, $7
-
-0x26 0xd1 0x67 0x45 # CHECK: andi $9, $6, 17767
-
-0x26 0xd1 0x67 0x45 # CHECK: andi $9, $6, 17767
-
-0xa4 0x00 0x90 0x1a # CHECK: or $3, $4, $5
-
-0x26 0x51 0x67 0x45 # CHECK: ori $9, $6, 17767
-
-0xa3 0x00 0x10 0x1b # CHECK: xor $3, $3, $5
-
-0x26 0x71 0x67 0x45 # CHECK: xori $9, $6, 17767
-
-0x26 0x71 0x67 0x45 # CHECK: xori $9, $6, 17767
-
-0xe6 0x00 0xd0 0x4a # CHECK: nor $9, $6, $7
-
-0x08 0x00 0xd0 0x3a # CHECK: not $7, $8
-
-0xe6 0x00 0x10 0x4a # CHECK: mul $9, $6, $7
-
-0xe9 0x00 0x3c 0x8b # CHECK: mult $9, $7
-
-0xe9 0x00 0x3c 0x9b # CHECK: multu $9, $7
-
-0xe9 0x00 0x3c 0xab # CHECK: div $zero, $9, $7
-
-0xe9 0x00 0x3c 0xbb # CHECK: divu $zero, $9, $7
-
-0x83 0x00 0x00 0x38 # CHECK: sll $4, $3, 7
-
-0x65 0x00 0x10 0x10 # CHECK: sllv $2, $3, $5
-
-0x83 0x00 0x80 0x38 # CHECK: sra $4, $3, 7
-
-0x65 0x00 0x90 0x10 # CHECK: srav $2, $3, $5
-
-0x83 0x00 0x40 0x38 # CHECK: srl $4, $3, 7
-
-0x65 0x00 0x50 0x10 # CHECK: srlv $2, $3, $5
-
-0x26 0x01 0xc0 0x38 # CHECK: rotr $9, $6, 7
-
-0xc7 0x00 0xd0 0x48 # CHECK: rotrv $9, $6, $7
-
-0xa4 0x1c 0x08 0x00 # CHECK: lb $5, 8($4)
-
-0xc4 0x14 0x08 0x00 # CHECK: lbu $6, 8($4)
-
-0x44 0x3c 0x08 0x00 # CHECK: lh $2, 8($4)
-
-0x82 0x34 0x08 0x00 # CHECK: lhu $4, 8($2)
-
-0xc5 0xfc 0x04 0x00 # CHECK: lw $6, 4($5)
-
-0xdd 0xfc 0x7b 0x00 # CHECK: lw $6, 123($sp)
-
-0xa4 0x18 0x08 0x00 # CHECK: sb $5, 8($4)
-
-0x44 0x38 0x08 0x00 # CHECK: sh $2, 8($4)
-
-0xa6 0xf8 0x04 0x00 # CHECK: sw $5, 4($6)
-
-0xbd 0xf8 0x7b 0x00 # CHECK: sw $5, 123($sp)
-
-0x44 0x60 0x08 0xe0 # CHECK: lwu $2, 8($4)
-
-0x85 0x60 0x10 0x00 # CHECK: lwl $4, 16($5)
-
-0x85 0x60 0x10 0x10 # CHECK: lwr $4, 16($5)
-
-0x85 0x60 0x10 0x80 # CHECK: swl $4, 16($5)
-
-0x85 0x60 0x10 0x90 # CHECK: swr $4, 16($5)
-
-0xe6 0x00 0x58 0x48 # CHECK: movz $9, $6, $7
-
-0xe6 0x00 0x18 0x48 # CHECK: movn $9, $6, $7
-
-0x26 0x55 0x7b 0x09 # CHECK: movt $9, $6, $fcc0
-
-0x26 0x55 0x7b 0x01 # CHECK: movf $9, $6, $fcc0
-
-0x06 0x00 0x7c 0x2d # CHECK: mthi $6
-
-0x06 0x00 0x7c 0x0d # CHECK: mfhi $6
-
-0x06 0x00 0x7c 0x3d # CHECK: mtlo $6
-
-0x06 0x00 0x7c 0x1d # CHECK: mflo $6
-
-0xa4 0x00 0x3c 0xcb # CHECK: madd $4, $5
-
-0xa4 0x00 0x3c 0xdb # CHECK: maddu $4, $5
-
-0xa4 0x00 0x3c 0xeb # CHECK: msub $4, $5
-
-0xa4 0x00 0x3c 0xfb # CHECK: msubu $4, $5
-
-0x26 0x01 0x3c 0x5b # CHECK: clz $9, $6
-
-0x26 0x01 0x3c 0x4b # CHECK: clo $9, $6
-
-0x26 0x01 0x3c 0x2b # CHECK: seb $9, $6
-
-0x26 0x01 0x3c 0x3b # CHECK: seh $9, $6
-
-0x26 0x01 0x3c 0x7b # CHECK: wsbh $9, $6
-
-0x26 0x01 0xec 0x30 # CHECK: ext $9, $6, 3, 7
-
-0x26 0x01 0xcc 0x48 # CHECK: ins $9, $6, 3, 7
-
-0x00 0xd4 0x98 0x02 # CHECK: j 1328
-
-0x00 0xf4 0x98 0x02 # CHECK: jal 1328
-
-0xe6 0x03 0x3c 0x0f # CHECK: jalr $ra, $6
-
-0x07 0x00 0x3c 0x0f # CHECK: jr $7
-
-0x05 0x47 # CHECK: jraddiusp 20
-
-0xc9 0x94 0x9a 0x02 # CHECK: beq $9, $6, 1332
-
-0x46 0x40 0x9a 0x02 # CHECK: bgez $6, 1332
-
-0x66 0x40 0x9a 0x02 # CHECK: bgezal $6, 1332
-
-0x26 0x40 0x9a 0x02 # CHECK: bltzal $6, 1332
-
-0xc6 0x40 0x9a 0x02 # CHECK: bgtz $6, 1332
-
-0x86 0x40 0x9a 0x02 # CHECK: blez $6, 1332
-
-0xc9 0xb4 0x9a 0x02 # CHECK: bne $9, $6, 1332
-
-0x06 0x40 0x9a 0x02 # CHECK: bltz $6, 1332
-
-0x28 0x01 0x3c 0x00 # CHECK: teq $8, $9, 0
-
-0x28 0x01 0x3c 0x02 # CHECK: tge $8, $9, 0
-
-0x28 0x01 0x3c 0x04 # CHECK: tgeu $8, $9, 0
-
-0x28 0x01 0x3c 0x08 # CHECK: tlt $8, $9, 0
-
-0x28 0x01 0x3c 0x0a # CHECK: tltu $8, $9, 0
-
-0x28 0x01 0x3c 0x0c # CHECK: tne $8, $9, 0
-
-0xc9 0x41 0x67 0x45 # CHECK: teqi $9, 17767
-
-0x29 0x41 0x67 0x45 # CHECK: tgei $9, 17767
-
-0x69 0x41 0x67 0x45 # CHECK: tgeiu $9, 17767
-
-0x09 0x41 0x67 0x45 # CHECK: tlti $9, 17767
-
-0x49 0x41 0x67 0x45 # CHECK: tltiu $9, 17767
-
-0x89 0x41 0x67 0x45 # CHECK: tnei $9, 17767
-
-0x25 0x20 0x08 0x60 # CHECK: cache 1, 8($5)
-
-0x25 0x60 0x08 0x20 # CHECK: pref 1, 8($5)
-
-0x00 0x00 0x00 0x08 # CHECK: ssnop
-
-0x00 0x00 0x00 0x18 # CHECK: ehb
-
-0x00 0x00 0x00 0x28 # CHECK: pause
-
-0x44 0x60 0x08 0x30 # CHECK: ll $2, 8($4)
-
-0x44 0x60 0x08 0xb0 # CHECK: sc $2, 8($4)
-
-0x64 0x00 0x18 0x11 # CHECK: lwxs $2, $3($4)
-
-0x66 0x42 0x9a 0x02 # CHECK: bgezals $6, 1332
-
-0x26 0x42 0x9a 0x02 # CHECK: bltzals $6, 1332
-
-0xe9 0x40 0x9a 0x02 # CHECK: beqzc $9, 1332
-
-0xa9 0x40 0x9a 0x02 # CHECK: bnezc $9, 1332
-
-0x00 0x74 0x98 0x02 # CHECK: jals 1328
-
-0xe6 0x03 0x3c 0x4f # CHECK: jalrs $ra, $6
-
-0x44 0x20 0x08 0x50 # CHECK: lwm32 $16, $17, 8($4)
-
-0x44 0x20 0x08 0xd0 # CHECK: swm32 $16, $17, 8($4)
-
-0x04 0x22 0x08 0x90 # CHECK: swp $16, 8($4)
-
-0x04 0x22 0x08 0x10 # CHECK: lwp $16, 8($4)
-
-0x00 0x00 0x00 0x00 # CHECK: nop
-
-0x00 0x79 0x05 0x00 # CHECK: addiupc $2, 20
-
-0xbf 0x7b 0xff 0xff # CHECK: addiupc $7, 16777212
-
-0xc0 0x7b 0x00 0x00 # CHECK: addiupc $7, -16777216
-
-0x42 0x07 # CHECK: addu16 $6, $17, $4
-
-0xb1 0x06 # CHECK: subu16 $5, $16, $3
-
-0x82 0x44 # CHECK: and16 $16, $2
-
-0x0b 0x44 # CHECK: not16 $17, $3
-
-0xc4 0x44 # CHECK: or16 $16, $4
-
-0x4d 0x44 # CHECK: xor16 $17, $5
-
-0x8a 0x25 # CHECK: sll16 $3, $16, 5
-
-0x1d 0x26 # CHECK: srl16 $4, $17, 6
-
-0x94 0x09 # CHECK: lbu16 $3, 4($17)
-
-0x8f 0x09 # CHECK: lbu16 $3, -1($16)
-
-0x82 0x29 # CHECK: lhu16 $3, 4($16)
-
-0x12 0x6a # CHECK: lw16 $4, 8($17)
-
-0x84 0x89 # CHECK: sb16 $3, 4($16)
-
-0x14 0xaa # CHECK: sh16 $4, 8($17)
-
-0x11 0xea # CHECK: sw16 $4, 4($17)
-
-0x11 0xe8 # CHECK: sw16 $zero, 4($17)
-
-0x09 0x46 # CHECK: mfhi $9
-
-0x49 0x46 # CHECK: mflo $9
-
-0x21 0x0f # CHECK: move $25, $1
-
-0xa9 0x45 # CHECK: jrc $9
-
-0xc9 0x45 # CHECK: jalr $9
-
-0xe9 0x45 # CHECK: jalrs16 $9
-
-0x89 0x45 # CHECK: jr16 $9
-
-0xff 0xed # CHECK: li16 $3, -1
-
-0xfe 0xed # CHECK: li16 $3, 126
-
-0x83 0x6f # CHECK: addiur1sp $7, 4
-
-0x7e 0x6f # CHECK: addiur2 $6, $7, -1
-
-0x76 0x6f # CHECK: addiur2 $6, $7, 12
-
-0xfc 0x4c # CHECK: addius5 $7, -2
-
-0x00 0x0c # CHECK: nop
-
-0x68 0x48 # CHECK: lw $3, 32($sp)
-
-0x9f 0xc8 # CHECK: sw $4, 124($sp)
-
-0x0a 0x8f # CHECK: beqz16 $6, 20
-
-0x0a 0xaf # CHECK: bnez16 $6, 20
-
-0x42 0xcc # CHECK: b16 132
-
-0x88 0x65 # CHECK: lw $3, 32($gp)
-
-0x12 0x45 # CHECK: lwm16 $16, $17, $ra, 8($sp)
-
-0x52 0x45 # CHECK: swm16 $16, $17, $ra, 8($sp)
-
-0x88 0x46 # CHECK: break16 8
-
-0xce 0x46 # CHECK: sdbbp16 14
-
-0x34 0x84 # CHECK: movep $5, $6, $2, $3
-
-0x00 0x00 0x7c 0x57 # CHECK: ei
-
-0x0a 0x00 0x7c 0x57 # CHECK: ei $10
diff --git a/test/MC/Disassembler/Mips/mips-dsp.txt b/test/MC/Disassembler/Mips/mips-dsp.txt
deleted file mode 100644
index 3f60ae1f8e65..000000000000
--- a/test/MC/Disassembler/Mips/mips-dsp.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-# RUN: llvm-mc -triple=mipsel-unknown-linux -mattr=+dsp -disassemble < %s | FileCheck %s
-
-# CHECK: mfhi $21, $ac3
-0x10 0xa8 0x60 0x00
-
-# CHECK: mflo $21, $ac3
-0x12 0xa8 0x60 0x00
-
-# CHECK: mthi $21, $ac3
-0x11 0x18 0xa0 0x02
-
-# CHECK: mtlo $21, $ac3
-0x13 0x18 0xa0 0x02
-
-# CHECK: lbux $10, $20($26)
-0x8a 0x51 0x54 0x7f
-
-# CHECK: lhx $11, $21($27)
-0x0a 0x59 0x75 0x7f
-
-# CHECK: lwx $12, $22($gp)
-0x0a 0x60 0x96 0x7f
diff --git a/test/MC/Disassembler/Mips/mips1/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips1/invalid-xfail.txt
new file mode 100644
index 000000000000..c53091363965
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips1/invalid-xfail.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips1 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips1/invalid.txt b/test/MC/Disassembler/Mips/mips1/invalid.txt
new file mode 100644
index 000000000000..a8fc4228f269
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips1/invalid.txt
@@ -0,0 +1,45 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips1 2>&1 | FileCheck %s
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+0x00 0x11 0x00 0x0f # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x00 0x30 0xc0 0x42 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x00 0xab 0x09 0x4a # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x02 0x80 0x44 0xf0 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x02 0xc5 0x40 0x01 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x03 0x21 0x22 0xd5 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x03 0xa0 0x08 0x13 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x04 0x1c 0x63 0xee # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x40 0x3c 0x00 0x5d # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x42 0x02 0x00 0x27 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x42 0x1d 0x60 0x25 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x43 0xa2 0x00 0x18 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x44 0x20 0x86 0x06 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x44 0xe6 0xd0 0x04 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x03 0x80 0x00 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0xe9 0xce 0x01 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x00 0x03 0x7a # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x00 0x1a 0xa5 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x00 0x78 0x4f # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x00 0xe5 0xe1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x00 0xe6 0xcc # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x20 0x75 0x46 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x21 0xaa 0x00 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x21 0xaa 0x11 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x30 0x14 0xc1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x3c 0xe8 0x3b # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x4c 0x00 0x3b 0x00 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x4d 0x09 0x92 0x01 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x4d 0x20 0x03 0x21 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x4d 0x20 0x03 0x8d # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x56 0x28 0x40 0x0d # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x5e 0x03 0xc8 0x13 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x73 0x11 0x00 0x01 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x75 0x68 0x90 0xf3 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x7d 0x00 0xa0 0x71 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0xbe 0x03 0x46 0x40 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0xd8 0x07 0x44 0x00 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0xde 0xe3 0x06 0xef # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0xe0 0x46 0x3b 0x29 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0xf7 0x06 0xdc 0xef # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips1/valid-mips1-el.txt b/test/MC/Disassembler/Mips/mips1/valid-mips1-el.txt
index dba949acd66a..869d909621a1 100644
--- a/test/MC/Disassembler/Mips/mips1/valid-mips1-el.txt
+++ b/test/MC/Disassembler/Mips/mips1/valid-mips1-el.txt
@@ -58,6 +58,8 @@
0x12 0x88 0x00 0x00 # CHECK: mflo $17
0x06 0x75 0x20 0x46 # CHECK: mov.d $f20, $f14
0x86 0xd8 0x00 0x46 # CHECK: mov.s $f2, $f27
+0x25 0xf0 0x80 0x00 # CHECK: move $fp, $4
+0x25 0xc8 0xc0 0x00 # CHECK: move $25, $6
0x21 0xf0 0x80 0x00 # CHECK: move $fp, $4
0x21 0xc8 0xc0 0x00 # CHECK: move $25, $6
0x00 0x48 0x9e 0x44 # CHECK: mtc1 $fp, $f9
diff --git a/test/MC/Disassembler/Mips/mips1/valid-mips1.txt b/test/MC/Disassembler/Mips/mips1/valid-mips1.txt
index 59e702e17e1c..a34327aaa766 100644
--- a/test/MC/Disassembler/Mips/mips1/valid-mips1.txt
+++ b/test/MC/Disassembler/Mips/mips1/valid-mips1.txt
@@ -16,11 +16,13 @@
0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
@@ -43,6 +45,9 @@
0x04 0x11 0x14 0x9b # CHECK: bal 21104
0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x08 0x00 0x00 0x01 # CHECK: j 4
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
diff --git a/test/MC/Disassembler/Mips/mips1/valid-xfail.txt b/test/MC/Disassembler/Mips/mips1/valid-xfail.txt
index 759097cc3751..615bf51addfa 100644
--- a/test/MC/Disassembler/Mips/mips1/valid-xfail.txt
+++ b/test/MC/Disassembler/Mips/mips1/valid-xfail.txt
@@ -1,4 +1,12 @@
# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips1 | FileCheck %s
# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips2/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips2/invalid-xfail.txt
new file mode 100644
index 000000000000..b3fd1c49edc4
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips2/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips2 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips2/valid-mips2-el.txt b/test/MC/Disassembler/Mips/mips2/valid-mips2-el.txt
index 806040937b77..5bdf8f17eb68 100644
--- a/test/MC/Disassembler/Mips/mips2/valid-mips2-el.txt
+++ b/test/MC/Disassembler/Mips/mips2/valid-mips2-el.txt
@@ -74,6 +74,8 @@
0x86 0xd8 0x00 0x46 # CHECK: mov.s $f2, $f27
0x21 0xf0 0x80 0x00 # CHECK: move $fp, $4
0x21 0xc8 0xc0 0x00 # CHECK: move $25, $6
+0x25 0xf0 0x80 0x00 # CHECK: move $fp, $4
+0x25 0xc8 0xc0 0x00 # CHECK: move $25, $6
0x00 0x48 0x9e 0x44 # CHECK: mtc1 $fp, $f9
0x11 0x00 0x20 0x02 # CHECK: mthi $17
0x13 0x00 0xa0 0x03 # CHECK: mtlo $sp
diff --git a/test/MC/Disassembler/Mips/mips2/valid-mips2.txt b/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
index 268bb2900903..570f8ddb47c3 100644
--- a/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
+++ b/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
@@ -1,9 +1,15 @@
# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips2 | FileCheck %s
# CHECK: .text
0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
0x00 0x00 0x00 0xc0 # CHECK: ehb
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
0x00 0x00 0x88 0x12 # CHECK: mflo $17
0x00 0x00 0x98 0x10 # CHECK: mfhi $19
0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
@@ -14,11 +20,15 @@
0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
@@ -26,6 +36,8 @@
0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
@@ -38,6 +50,7 @@
0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
@@ -53,9 +66,11 @@
0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
0x04 0x11 0x14 0x9b # CHECK: bal 21104
0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
@@ -65,6 +80,9 @@
0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
@@ -78,7 +96,9 @@
0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
0x42 0x00 0x00 0x01 # CHECK: tlbr
0x42 0x00 0x00 0x02 # CHECK: tlbwi
0x42 0x00 0x00 0x06 # CHECK: tlbwr
diff --git a/test/MC/Disassembler/Mips/mips2/valid-xfail.txt b/test/MC/Disassembler/Mips/mips2/valid-xfail.txt
new file mode 100644
index 000000000000..5497515d0090
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips2/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips2 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips3/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips3/invalid-xfail.txt
new file mode 100644
index 000000000000..cfb7d801e911
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips3/invalid-xfail.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips3 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0xce 0x00 0x00 0x00 # pref 0, 0($16) # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips3/valid-mips3-el.txt b/test/MC/Disassembler/Mips/mips3/valid-mips3-el.txt
index 98ce16bb7429..6ffe7748e950 100644
--- a/test/MC/Disassembler/Mips/mips3/valid-mips3-el.txt
+++ b/test/MC/Disassembler/Mips/mips3/valid-mips3-el.txt
@@ -118,6 +118,8 @@
0x86 0xd8 0x00 0x46 # CHECK: mov.s $f2, $f27
0x21 0xf0 0x80 0x00 # CHECK: move $fp, $4
0x21 0xc8 0xc0 0x00 # CHECK: move $25, $6
+0x25 0xf0 0x80 0x00 # CHECK: move $fp, $4
+0x25 0xc8 0xc0 0x00 # CHECK: move $25, $6
0x00 0x48 0x9e 0x44 # CHECK: mtc1 $fp, $f9
0x11 0x00 0x20 0x02 # CHECK: mthi $17
0x13 0x00 0xa0 0x03 # CHECK: mtlo $sp
diff --git a/test/MC/Disassembler/Mips/mips3/valid-mips3.txt b/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
index 2a38b19092f0..db83b50aa01a 100644
--- a/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
+++ b/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
@@ -1,12 +1,19 @@
# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips3 | FileCheck %s
# CHECK: .text
0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
0x00 0x00 0x00 0xc0 # CHECK: ehb
0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
0x00 0x00 0x88 0x12 # CHECK: mflo $17
0x00 0x00 0x98 0x10 # CHECK: mfhi $19
0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
@@ -21,6 +28,8 @@
0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
@@ -30,11 +39,14 @@
0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x2d # CHECK: move $fp, $4
0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
@@ -44,6 +56,9 @@
0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
+0x00 0xc0 0xc8 0x2d # CHECK: move $25, $6
0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
@@ -59,6 +74,7 @@
0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
@@ -81,9 +97,11 @@
0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
0x04 0x11 0x14 0x9b # CHECK: bal 21104
0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
@@ -93,6 +111,9 @@
0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
@@ -106,7 +127,10 @@
0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
+0x3a 0x00 0x27 0x12 # CHECK: xori $zero, $16, 10002
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
0x42 0x00 0x00 0x01 # CHECK: tlbr
0x42 0x00 0x00 0x02 # CHECK: tlbwi
0x42 0x00 0x00 0x06 # CHECK: tlbwr
@@ -195,6 +219,7 @@
0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
0xbc 0xa1 0x00 0x08 # CHECK: cache 1, 8($5)
+0xbf 0x00 0xe2 0x1c # CHECK: cache 0, -7652($24)
0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
diff --git a/test/MC/Disassembler/Mips/mips3/valid-xfail.txt b/test/MC/Disassembler/Mips/mips3/valid-xfail.txt
new file mode 100644
index 000000000000..cd685deda2e5
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips3/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips3 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips32/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips32/invalid-xfail.txt
new file mode 100644
index 000000000000..77d2a152d439
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt b/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt
index f2299732a80e..0801ad7fe61b 100644
--- a/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt
+++ b/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt
@@ -7,7 +7,9 @@
0x67 0x45 0xc9 0x20 # CHECK: addi $9, $6, 17767
0x67 0xc5 0xc9 0x24 # CHECK: addiu $9, $6, -15001
0x21 0x48 0xc7 0x00 # CHECK: addu $9, $6, $7
+0x21 0xf0 0x80 0x00 # CHECK: move $fp, $4
0x24 0x48 0xc7 0x00 # CHECK: and $9, $6, $7
+0x25 0xf0 0x80 0x00 # CHECK: move $fp, $4
0x67 0x45 0xc9 0x30 # CHECK: andi $9, $6, 17767
0x4c 0x01 0x00 0x10 # CHECK: b 1332
0x4c 0x01 0x00 0x45 # CHECK: bc1f 1332
@@ -87,11 +89,20 @@
0x00 0x00 0xc7 0x70 # CHECK: madd $6, $7
0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
+0x04 0x80 0x08 0x40 # CHECK: mfc0 $8, $16, 4
0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
0x10 0x28 0x00 0x00 # CHECK: mfhi $5
0x12 0x28 0x00 0x00 # CHECK: mflo $5
0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
+0x25,0x38,0x00,0x01 # CHECK: move $7, $8
+0x25,0x18,0x40,0x00 # CHECK: move $3, $2
+0x01,0x18,0x5c,0x00 # CHECK: movf $3, $2, $fcc7
+0x11,0x11,0x3c,0x46 # CHECK: movf.d $f4, $f2, $fcc7
+0x11,0x11,0x1c,0x46 # CHECK: movf.s $f4, $f2, $fcc7
+0x01,0x18,0x5d,0x00 # CHECK: movt $3, $2, $fcc7
+0x11,0x11,0x3d,0x46 # CHECK: movt.d $f4, $f2, $fcc7
+0x11,0x11,0x1d,0x46 # CHECK: movt.s $f4, $f2, $fcc7
0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
0x05 0x00 0xc7 0x70 # CHECK: msubu $6, $7
0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
diff --git a/test/MC/Disassembler/Mips/mips32/valid-mips32.txt b/test/MC/Disassembler/Mips/mips32/valid-mips32.txt
index 09f1e56fff43..f71b2a16fcfc 100644
--- a/test/MC/Disassembler/Mips/mips32/valid-mips32.txt
+++ b/test/MC/Disassembler/Mips/mips32/valid-mips32.txt
@@ -1,11 +1,39 @@
# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux | FileCheck %s
0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
0x00 0x00 0x28 0x10 # CHECK: mfhi $5
0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
@@ -15,23 +43,80 @@
0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
0x00 0xe0 0x00 0x08 # CHECK: jr $7
0x00 0xe0 0x00 0x11 # CHECK: mthi $7
0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
0x0c 0x00 0x01 0x4c # CHECK: jal 1328
0x10 0x00 0x01 0x4c # CHECK: b 1332
0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -39,24 +124,55 @@
0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret
+0x42 0x00 0x00 0x20 # CHECK: wait
0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
@@ -67,6 +183,17 @@
0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -85,10 +212,17 @@
0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -98,6 +232,14 @@
0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -117,15 +259,29 @@
0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26
0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276
0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
0x74 0x00 0x01 0x4c # CHECK: jalx 1328
@@ -134,25 +290,49 @@
# CHECK: rdhwr $5, $29
# CHECK: .set pop
0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)
0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)
0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
diff --git a/test/MC/Disassembler/Mips/mips32/valid-xfail.txt b/test/MC/Disassembler/Mips/mips32/valid-xfail.txt
new file mode 100644
index 000000000000..9a72e0583964
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips32_le.txt b/test/MC/Disassembler/Mips/mips32_le.txt
deleted file mode 100644
index c019c41bd120..000000000000
--- a/test/MC/Disassembler/Mips/mips32_le.txt
+++ /dev/null
@@ -1,450 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux | FileCheck %s
-# CHECK: abs.d $f12, $f14
-0x05 0x73 0x20 0x46
-
-# CHECK: abs.s $f6, $f7
-0x85 0x39 0x00 0x46
-
-# CHECK: add $9, $6, $7
-0x20 0x48 0xc7 0x00
-
-# CHECK: add.d $f8, $f12, $f14
-0x00 0x62 0x2e 0x46
-
-# CHECK: add.s $f9, $f6, $f7
-0x40 0x32 0x07 0x46
-
-# CHECK: addi $9, $6, 17767
-0x67 0x45 0xc9 0x20
-
-# CHECK: addiu $9, $6, -15001
-0x67 0xc5 0xc9 0x24
-
-# CHECK: addu $9, $6, $7
-0x21 0x48 0xc7 0x00
-
-# CHECK: and $9, $6, $7
-0x24 0x48 0xc7 0x00
-
-# CHECK: andi $9, $6, 17767
-0x67 0x45 0xc9 0x30
-
-# CHECK: b 1332
-0x4c 0x01 0x00 0x10
-
-# CHECK: bc1f 1332
-0x4c 0x01 0x00 0x45
-
-# CHECK: bc1f $fcc7, 1332
-0x4c 0x01 0x1c 0x45
-
-# CHECK: bc1t 1332
-0x4c 0x01 0x01 0x45
-
-# CHECK: bc1t $fcc7, 1332
-0x4c 0x01 0x1d 0x45
-
-# CHECK: beq $9, $6, 1332
-0x4c 0x01 0x26 0x11
-
-# CHECK: bgez $6, 1332
-0x4c 0x01 0xc1 0x04
-
-# CHECK: bgezal $6, 1332
-0x4c 0x01 0xd1 0x04
-
-# CHECK: bgtz $6, 1332
-0x4c 0x01 0xc0 0x1c
-
-# CHECK: blez $6, 1332
-0x4c 0x01 0xc0 0x18
-
-# CHECK: bne $9, $6, 1332
-0x4c 0x01 0x26 0x15
-
-# CHECK: c.eq.d $f12, $f14
-0x32 0x60 0x2e 0x46
-
-# CHECK: c.eq.s $f6, $f7
-0x32 0x30 0x07 0x46
-
-# CHECK: c.f.d $f12, $f14
-0x30 0x60 0x2e 0x46
-
-# CHECK: c.f.s $f6, $f7
-0x30 0x30 0x07 0x46
-
-# CHECK: c.le.d $f12, $f14
-0x3e 0x60 0x2e 0x46
-
-# CHECK: c.le.s $f6, $f7
-0x3e 0x30 0x07 0x46
-
-# CHECK: c.lt.d $f12, $f14
-0x3c 0x60 0x2e 0x46
-
-# CHECK: c.lt.s $f6, $f7
-0x3c 0x30 0x07 0x46
-
-# CHECK: c.nge.d $f12, $f14
-0x3d 0x60 0x2e 0x46
-
-# CHECK: c.nge.s $f6, $f7
-0x3d 0x30 0x07 0x46
-
-# CHECK: c.ngl.d $f12, $f14
-0x3b 0x60 0x2e 0x46
-
-# CHECK: c.ngl.s $f6, $f7
-0x3b 0x30 0x07 0x46
-
-# CHECK: c.ngle.d $f12, $f14
-0x39 0x60 0x2e 0x46
-
-# CHECK: c.ngle.s $f6, $f7
-0x39 0x30 0x07 0x46
-
-# CHECK: c.ngt.d $f12, $f14
-0x3f 0x60 0x2e 0x46
-
-# CHECK: c.ngt.s $f6, $f7
-0x3f 0x30 0x07 0x46
-
-# CHECK: c.ole.d $f12, $f14
-0x36 0x60 0x2e 0x46
-
-# CHECK: c.ole.s $f6, $f7
-0x36 0x30 0x07 0x46
-
-# CHECK: c.olt.d $f12, $f14
-0x34 0x60 0x2e 0x46
-
-# CHECK: c.olt.s $f6, $f7
-0x34 0x30 0x07 0x46
-
-# CHECK: c.seq.d $f12, $f14
-0x3a 0x60 0x2e 0x46
-
-# CHECK: c.seq.s $f6, $f7
-0x3a 0x30 0x07 0x46
-
-# CHECK: c.sf.d $f12, $f14
-0x38 0x60 0x2e 0x46
-
-# CHECK: c.sf.s $f6, $f7
-0x38 0x30 0x07 0x46
-
-# CHECK: c.ueq.d $f12, $f14
-0x33 0x60 0x2e 0x46
-
-# CHECK: c.ueq.s $f28, $f18
-0x33 0xe0 0x12 0x46
-
-# CHECK: c.ule.d $f12, $f14
-0x37 0x60 0x2e 0x46
-
-# CHECK: c.ule.s $f6, $f7
-0x37 0x30 0x07 0x46
-
-# CHECK: c.ult.d $f12, $f14
-0x35 0x60 0x2e 0x46
-
-# CHECK: c.ult.s $f6, $f7
-0x35 0x30 0x07 0x46
-
-# CHECK: c.un.d $f12, $f14
-0x31 0x60 0x2e 0x46
-
-# CHECK: c.un.s $f6, $f7
-0x31 0x30 0x07 0x46
-
-# CHECK: ceil.w.d $f12, $f14
-0x0e 0x73 0x20 0x46
-
-# CHECK: ceil.w.s $f6, $f7
-0x8e 0x39 0x00 0x46
-
-# CHECK: cfc1 $6, $7
-0x00 0x38 0x46 0x44
-
-# CHECK: clo $6, $7
-0x21 0x30 0xe6 0x70
-
-# CHECK: clz $6, $7
-0x20 0x30 0xe6 0x70
-
-# CHECK: ctc1 $6, $7
-0x00 0x38 0xc6 0x44
-
-# CHECK: cvt.d.s $f6, $f7
-0xa1 0x39 0x00 0x46
-
-# CHECK: cvt.d.w $f12, $f14
-0x21 0x73 0x80 0x46
-
-# CHECK: cvt.s.d $f12, $f14
-0x20 0x73 0x20 0x46
-
-# CHECK: cvt.s.w $f6, $f7
-0xa0 0x39 0x80 0x46
-
-# CHECK: cvt.w.d $f12, $f14
-0x24 0x73 0x20 0x46
-
-# CHECK: cvt.w.s $f6, $f7
-0xa4 0x39 0x00 0x46
-
-# CHECK: floor.w.d $f12, $f14
-0x0f 0x73 0x20 0x46
-
-# CHECK: floor.w.s $f6, $f7
-0x8f 0x39 0x00 0x46
-
-# CHECK: j 1328
-0x4c 0x01 0x00 0x08
-
-# CHECK: jal 1328
-0x4c 0x01 0x00 0x0c
-
-# CHECK: jalx 1328
-0x4c 0x01 0x00 0x74
-
-# CHECK: jalr $7
-0x09 0xf8 0xe0 0x00
-
-# CHECK: jr $7
-0x08 0x00 0xe0 0x00
-
-# CHECK: lb $4, 9158($5)
-0xc6 0x23 0xa4 0x80
-
-# CHECK: lbu $4, 6($5)
-0x06 0x00 0xa4 0x90
-
-# CHECK: ldc1 $f9, 9158($7)
-0xc6 0x23 0xe9 0xd4
-
-# CHECK: lh $4, 12($5)
-0x0c 0x00 0xa4 0x84
-
-# CHECK: lh $4, 12($5)
-0x0c 0x00 0xa4 0x84
-
-# CHECK: ll $9, 9158($7)
-0xc6 0x23 0xe9 0xc0
-
-# CHECK: lui $6, 17767
-0x67 0x45 0x06 0x3c
-
-# CHECK: lw $4, 24($5)
-0x18 0x00 0xa4 0x8c
-
-# CHECK: lwc1 $f9, 9158($7)
-0xc6 0x23 0xe9 0xc4
-
-# CHECK: lwl $2, 3($4)
-0x03 0x00 0x82 0x88
-
-# CHECK: lwr $3, 16($5)
-0x10 0x00 0xa3 0x98
-
-# CHECK: madd $6, $7
-0x00 0x00 0xc7 0x70
-
-# CHECK: maddu $6, $7
-0x01 0x00 0xc7 0x70
-
-# CHECK: mfc0 $8, $16, 4
-0x04 0x80 0x08 0x40
-
-# CHECK: mfc1 $6, $f7
-0x00 0x38 0x06 0x44
-
-# CHECK: mfhi $5
-0x10 0x28 0x00 0x00
-
-# CHECK: mflo $5
-0x12 0x28 0x00 0x00
-
-# CHECK: mov.d $f6, $f8
-0x86 0x41 0x20 0x46
-
-# CHECK: mov.s $f6, $f7
-0x86 0x39 0x00 0x46
-
-# CHECK: move $7, $8
-0x21,0x38,0x00,0x01
-
-# CHECK: move $3, $2
-0x25,0x18,0x40,0x00
-
-# CHECK: movf $3, $2, $fcc7
-0x01,0x18,0x5c,0x00
-
-# CHECK: movf.d $f4, $f2, $fcc7
-0x11,0x11,0x3c,0x46
-
-# CHECK: movf.s $f4, $f2, $fcc7
-0x11,0x11,0x1c,0x46
-
-# CHECK: movt $3, $2, $fcc7
-0x01,0x18,0x5d,0x00
-
-# CHECK: movt.d $f4, $f2, $fcc7
-0x11,0x11,0x3d,0x46
-
-# CHECK: movt.s $f4, $f2, $fcc7
-0x11,0x11,0x1d,0x46
-
-# CHECK: msub $6, $7
-0x04 0x00 0xc7 0x70
-
-# CHECK: msubu $6, $7
-0x05 0x00 0xc7 0x70
-
-# CHECK: mtc0 $9, $15, 1
-0x01 0x78 0x89 0x40
-
-# CHECK: mtc1 $6, $f7
-0x00 0x38 0x86 0x44
-
-# CHECK: mthi $7
-0x11 0x00 0xe0 0x00
-
-# CHECK: mtlo $7
-0x13 0x00 0xe0 0x00
-
-# CHECK: mul.d $f8, $f12, $f14
-0x02 0x62 0x2e 0x46
-
-# CHECK: mul.s $f9, $f6, $f7
-0x42 0x32 0x07 0x46
-
-# CHECK: mul $9, $6, $7
-0x02 0x48 0xc7 0x70
-
-# CHECK: mult $3, $5
-0x18 0x00 0x65 0x00
-
-# CHECK: multu $3, $5
-0x19 0x00 0x65 0x00
-
-# CHECK: neg.d $f12, $f14
-0x07 0x73 0x20 0x46
-
-# CHECK: neg.s $f6, $f7
-0x87 0x39 0x00 0x46
-
-# CHECK: nop
-0x00 0x00 0x00 0x00
-
-# CHECK: nor $9, $6, $7
-0x27 0x48 0xc7 0x00
-
-# CHECK: or $3, $3, $5
-0x25 0x18 0x65 0x00
-
-# CHECK: ori $9, $6, 17767
-0x67 0x45 0xc9 0x34
-
-# CHECK: round.w.d $f12, $f14
-0x0c 0x73 0x20 0x46
-
-# CHECK: round.w.s $f6, $f7
-0x8c 0x39 0x00 0x46
-
-# CHECK: sb $4, 9158($5)
-0xc6 0x23 0xa4 0xa0
-
-# CHECK: sb $4, 6($5)
-0x06 0x00 0xa4 0xa0
-
-# CHECK: sc $9, 9158($7)
-0xc6 0x23 0xe9 0xe0
-
-# CHECK: sdc1 $f9, 9158($7)
-0xc6 0x23 0xe9 0xf4
-
-# CHECK: sh $4, 9158($5)
-0xc6 0x23 0xa4 0xa4
-
-# CHECK: sll $4, $3, 7
-0xc0 0x21 0x03 0x00
-
-# CHECK: sllv $2, $3, $5
-0x04 0x10 0xa3 0x00
-
-# CHECK: slt $3, $3, $5
-0x2a 0x18 0x65 0x00
-
-# CHECK: slti $3, $3, 103
-0x67 0x00 0x63 0x28
-
-# CHECK: sltiu $3, $3, 103
-0x67 0x00 0x63 0x2c
-
-# CHECK: sltu $3, $3, $5
-0x2b 0x18 0x65 0x00
-
-# CHECK: sqrt.d $f12, $f14
-0x04 0x73 0x20 0x46
-
-# CHECK: sqrt.s $f6, $f7
-0x84 0x39 0x00 0x46
-
-# CHECK: sra $4, $3, 7
-0xc3 0x21 0x03 0x00
-
-# CHECK: srav $2, $3, $5
-0x07 0x10 0xa3 0x00
-
-# CHECK: srl $4, $3, 7
-0xc2 0x21 0x03 0x00
-
-# CHECK: srlv $2, $3, $5
-0x06 0x10 0xa3 0x00
-
-# CHECK: sub.d $f8, $f12, $f14
-0x01 0x62 0x2e 0x46
-
-# CHECK: sub.s $f9, $f6, $f7
-0x41 0x32 0x07 0x46
-
-# CHECK: sub $9, $6, $7
-0x22 0x48 0xc7 0x00
-
-# CHECK: subu $4, $3, $5
-0x23 0x20 0x65 0x00
-
-# CHECK: sw $4, 24($5)
-0x18 0x00 0xa4 0xac
-
-# CHECK: swc1 $f9, 9158($7)
-0xc6 0x23 0xe9 0xe4
-
-# CHECK: swl $4, 16($5)
-0x10 0x00 0xa4 0xa8
-
-# CHECK: swr $6, 16($7)
-0x10 0x00 0xe6 0xb8
-
-# CHECK: sync 7
-0xcf 0x01 0x00 0x00
-
-# CHECK: trunc.w.d $f12, $f14
-0x0d 0x73 0x20 0x46
-
-# CHECK: trunc.w.s $f6, $f7
-0x8d 0x39 0x00 0x46
-
-# CHECK: xor $3, $3, $5
-0x26 0x18 0x65 0x00
-
-# CHECK: xori $9, $6, 17767
-0x67 0x45 0xc9 0x38
-
-# CHECK: .set push
-# CHECK: .set mips32r2
-# CHECK: rdhwr $5, $29
-# CHECK: .set pop
-0x3b 0xe8 0x05 0x7c
diff --git a/test/MC/Disassembler/Mips/mips32r2/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips32r2/invalid-xfail.txt
new file mode 100644
index 000000000000..82503c05b718
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r2/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32r2 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-el.txt b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-el.txt
index c487b6d0e960..82a883557e87 100644
--- a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-el.txt
+++ b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-el.txt
@@ -102,6 +102,7 @@
0x60 0x98 0xf9 0x4f # CHECK: madd.s $f1, $f31, $f19, $f25
0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
+0x04 0x80 0x08 0x40 # CHECK: mfc0 $8, $16, 4
0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
0x10 0x28 0x00 0x00 # CHECK: mfhi $5
0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
@@ -172,3 +173,4 @@
0xa0 0x30 0x07 0x7c # CHECK: wsbh $6, $7
0x26 0x18 0x65 0x00 # CHECK: xor $3, $3, $5
0x67 0x45 0xc9 0x38 # CHECK: xori $9, $6, 17767
+0x4c 0x1d 0x7f 0x06 # CHECK: synci 7500($19)
diff --git a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt
index d01384752907..acce76bcfddd 100644
--- a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt
+++ b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt
@@ -3,13 +3,43 @@
# an effect on the disassembler behaviour.
# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips32r2 | FileCheck %s
0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
0x00 0x00 0x28 0x10 # CHECK: mfhi $5
0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -17,25 +47,82 @@
0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
0x00 0xe0 0x00 0x08 # CHECK: jr $7
0x00 0xe0 0x00 0x11 # CHECK: mthi $7
0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero
0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
0x07 0xdf 0xe8 0x07 # CHECK: synci -6137($fp)
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
0x0c 0x00 0x01 0x4c # CHECK: jal 1328
0x10 0x00 0x01 0x4c # CHECK: b 1332
0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -43,12 +130,28 @@
0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
@@ -57,16 +160,31 @@
0x41 0x60 0x60 0x20 # CHECK: ei
0x41 0x6e 0x60 0x20 # CHECK: ei $14
0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret
+0x42 0x00 0x00 0x20 # CHECK: wait
0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
@@ -78,6 +196,17 @@
0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -96,8 +225,17 @@
0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -108,6 +246,14 @@
0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -127,8 +273,19 @@
0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
0x4c 0x52 0xf2 0xa9 # CHECK: msub.d $f10, $f2, $f30, $f18
0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
@@ -143,34 +300,72 @@
0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276
0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x05 0xe8 0x3b # CHECK: .set push
+ # CHECK: .set mips32r2
+ # CHECK: rdhwr $5, $29
+ # CHECK: .set pop
0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
+0x7d 0x07 0x30 0x00 # CHECK: ext $7, $8, 0, 7
0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)
0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)
0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
diff --git a/test/MC/Disassembler/Mips/mips32r2/valid-xfail.txt b/test/MC/Disassembler/Mips/mips32r2/valid-xfail.txt
new file mode 100644
index 000000000000..c10faca4d32c
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r2/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32r2 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips32r2_le.txt b/test/MC/Disassembler/Mips/mips32r2_le.txt
deleted file mode 100644
index faaed7cb3453..000000000000
--- a/test/MC/Disassembler/Mips/mips32r2_le.txt
+++ /dev/null
@@ -1,459 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r2 | FileCheck %s
-# CHECK: abs.d $f12, $f14
-0x05 0x73 0x20 0x46
-
-# CHECK: abs.s $f6, $f7
-0x85 0x39 0x00 0x46
-
-# CHECK: add $9, $6, $7
-0x20 0x48 0xc7 0x00
-
-# CHECK: add.d $f8, $f12, $f14
-0x00 0x62 0x2e 0x46
-
-# CHECK: add.s $f9, $f6, $f7
-0x40 0x32 0x07 0x46
-
-# CHECK: addi $9, $6, 17767
-0x67 0x45 0xc9 0x20
-
-# CHECK: addiu $9, $6, -15001
-0x67 0xc5 0xc9 0x24
-
-# CHECK: addu $9, $6, $7
-0x21 0x48 0xc7 0x00
-
-# CHECK: and $9, $6, $7
-0x24 0x48 0xc7 0x00
-
-# CHECK: andi $9, $6, 17767
-0x67 0x45 0xc9 0x30
-
-# CHECK: b 1332
-0x4c 0x01 0x00 0x10
-
-# CHECK: bc1f 1332
-0x4c 0x01 0x00 0x45
-
-# CHECK: bc1f $fcc7, 1332
-0x4c 0x01 0x1c 0x45
-
-# CHECK: bc1t 1332
-0x4c 0x01 0x01 0x45
-
-# CHECK: bc1t $fcc7, 1332
-0x4c 0x01 0x1d 0x45
-
-# CHECK: beq $9, $6, 1332
-0x4c 0x01 0x26 0x11
-
-# CHECK: bgez $6, 1332
-0x4c 0x01 0xc1 0x04
-
-# CHECK: bgezal $6, 1332
-0x4c 0x01 0xd1 0x04
-
-# CHECK: bgtz $6, 1332
-0x4c 0x01 0xc0 0x1c
-
-# CHECK: blez $6, 1332
-0x4c 0x01 0xc0 0x18
-
-# CHECK: bne $9, $6, 1332
-0x4c 0x01 0x26 0x15
-
-# CHECK: c.eq.d $f12, $f14
-0x32 0x60 0x2e 0x46
-
-# CHECK: c.eq.s $f6, $f7
-0x32 0x30 0x07 0x46
-
-# CHECK: c.f.d $f12, $f14
-0x30 0x60 0x2e 0x46
-
-# CHECK: c.f.s $f6, $f7
-0x30 0x30 0x07 0x46
-
-# CHECK: c.le.d $f12, $f14
-0x3e 0x60 0x2e 0x46
-
-# CHECK: c.le.s $f6, $f7
-0x3e 0x30 0x07 0x46
-
-# CHECK: c.lt.d $f12, $f14
-0x3c 0x60 0x2e 0x46
-
-# CHECK: c.lt.s $f6, $f7
-0x3c 0x30 0x07 0x46
-
-# CHECK: c.nge.d $f12, $f14
-0x3d 0x60 0x2e 0x46
-
-# CHECK: c.nge.s $f6, $f7
-0x3d 0x30 0x07 0x46
-
-# CHECK: c.ngl.d $f12, $f14
-0x3b 0x60 0x2e 0x46
-
-# CHECK: c.ngl.s $f6, $f7
-0x3b 0x30 0x07 0x46
-
-# CHECK: c.ngle.d $f12, $f14
-0x39 0x60 0x2e 0x46
-
-# CHECK: c.ngle.s $f6, $f7
-0x39 0x30 0x07 0x46
-
-# CHECK: c.ngt.d $f12, $f14
-0x3f 0x60 0x2e 0x46
-
-# CHECK: c.ngt.s $f6, $f7
-0x3f 0x30 0x07 0x46
-
-# CHECK: c.ole.d $f12, $f14
-0x36 0x60 0x2e 0x46
-
-# CHECK: c.ole.s $f6, $f7
-0x36 0x30 0x07 0x46
-
-# CHECK: c.olt.d $f12, $f14
-0x34 0x60 0x2e 0x46
-
-# CHECK: c.olt.s $f6, $f7
-0x34 0x30 0x07 0x46
-
-# CHECK: c.seq.d $f12, $f14
-0x3a 0x60 0x2e 0x46
-
-# CHECK: c.seq.s $f6, $f7
-0x3a 0x30 0x07 0x46
-
-# CHECK: c.sf.d $f12, $f14
-0x38 0x60 0x2e 0x46
-
-# CHECK: c.sf.s $f6, $f7
-0x38 0x30 0x07 0x46
-
-# CHECK: c.ueq.d $f12, $f14
-0x33 0x60 0x2e 0x46
-
-# CHECK: c.ueq.s $f28, $f18
-0x33 0xe0 0x12 0x46
-
-# CHECK: c.ule.d $f12, $f14
-0x37 0x60 0x2e 0x46
-
-# CHECK: c.ule.s $f6, $f7
-0x37 0x30 0x07 0x46
-
-# CHECK: c.ult.d $f12, $f14
-0x35 0x60 0x2e 0x46
-
-# CHECK: c.ult.s $f6, $f7
-0x35 0x30 0x07 0x46
-
-# CHECK: c.un.d $f12, $f14
-0x31 0x60 0x2e 0x46
-
-# CHECK: c.un.s $f6, $f7
-0x31 0x30 0x07 0x46
-
-# CHECK: ceil.w.d $f12, $f14
-0x0e 0x73 0x20 0x46
-
-# CHECK: ceil.w.s $f6, $f7
-0x8e 0x39 0x00 0x46
-
-# CHECK: cfc1 $6, $7
-0x00 0x38 0x46 0x44
-
-# CHECK: clo $6, $7
-0x21 0x30 0xe6 0x70
-
-# CHECK: clz $6, $7
-0x20 0x30 0xe6 0x70
-
-# CHECK: ctc1 $6, $7
-0x00 0x38 0xc6 0x44
-
-# CHECK: cvt.d.s $f6, $f7
-0xa1 0x39 0x00 0x46
-
-# CHECK: cvt.d.w $f12, $f14
-0x21 0x73 0x80 0x46
-
-# CHECK: cvt.l.d $f12, $f14
-0x25 0x73 0x20 0x46
-
-# CHECK: cvt.l.s $f6, $f7
-0xa5 0x39 0x00 0x46
-
-# CHECK: cvt.s.d $f12, $f14
-0x20 0x73 0x20 0x46
-
-# CHECK: cvt.s.w $f6, $f7
-0xa0 0x39 0x80 0x46
-
-# CHECK: cvt.w.d $f12, $f14
-0x24 0x73 0x20 0x46
-
-# CHECK: cvt.w.s $f6, $f7
-0xa4 0x39 0x00 0x46
-
-# CHECK: floor.w.d $f12, $f14
-0x0f 0x73 0x20 0x46
-
-# CHECK: floor.w.s $f6, $f7
-0x8f 0x39 0x00 0x46
-
-# CHECK: ins $19, $9, 6, 7
-0x84 0x61 0x33 0x7d
-
-# CHECK: j 1328
-0x4c 0x01 0x00 0x08
-
-# CHECK: jal 1328
-0x4c 0x01 0x00 0x0c
-
-# CHECK: jalx 1328
-0x4c 0x01 0x00 0x74
-
-# CHECK: jalr $7
-0x09 0xf8 0xe0 0x00
-
-# CHECK: jr $7
-0x08 0x00 0xe0 0x00
-
-# CHECK: lb $4, 9158($5)
-0xc6 0x23 0xa4 0x80
-
-# CHECK: lbu $4, 6($5)
-0x06 0x00 0xa4 0x90
-
-# CHECK: ldc1 $f9, 9158($7)
-0xc6 0x23 0xe9 0xd4
-
-# CHECK: lh $4, 12($5)
-0x0c 0x00 0xa4 0x84
-
-# CHECK: lh $4, 12($5)
-0x0c 0x00 0xa4 0x84
-
-# CHECK: ll $9, 9158($7)
-0xc6 0x23 0xe9 0xc0
-
-# CHECK: lui $6, 17767
-0x67 0x45 0x06 0x3c
-
-# CHECK: luxc1 $f0, $6($5)
-0x05 0x00 0xa6 0x4c
-
-# CHECK: lw $4, 24($5)
-0x18 0x00 0xa4 0x8c
-
-# CHECK: lwc1 $f9, 9158($7)
-0xc6 0x23 0xe9 0xc4
-
-# CHECK: lwl $2, 3($4)
-0x03 0x00 0x82 0x88
-
-# CHECK: lwr $3, 16($5)
-0x10 0x00 0xa3 0x98
-
-# CHECK: lwxc1 $f20, $12($14)
-0x00 0x05 0xcc 0x4d
-
-# CHECK: madd $6, $7
-0x00 0x00 0xc7 0x70
-
-# CHECK: maddu $6, $7
-0x01 0x00 0xc7 0x70
-
-# CHECK: mfc0 $8, $16, 4
-0x04 0x80 0x08 0x40
-
-# CHECK: mfc1 $6, $f7
-0x00 0x38 0x06 0x44
-
-# CHECK: mfhi $5
-0x10 0x28 0x00 0x00
-
-# CHECK: mflo $5
-0x12 0x28 0x00 0x00
-
-# CHECK: mov.d $f6, $f8
-0x86 0x41 0x20 0x46
-
-# CHECK: mov.s $f6, $f7
-0x86 0x39 0x00 0x46
-
-# CHECK: msub $6, $7
-0x04 0x00 0xc7 0x70
-
-# CHECK: msubu $6, $7
-0x05 0x00 0xc7 0x70
-
-# CHECK: mtc0 $9, $15, 1
-0x01 0x78 0x89 0x40
-
-# CHECK: mtc1 $6, $f7
-0x00 0x38 0x86 0x44
-
-# CHECK: mthi $7
-0x11 0x00 0xe0 0x00
-
-# CHECK: mtlo $7
-0x13 0x00 0xe0 0x00
-
-# CHECK: mul.d $f8, $f12, $f14
-0x02 0x62 0x2e 0x46
-
-# CHECK: mul.s $f9, $f6, $f7
-0x42 0x32 0x07 0x46
-
-# CHECK: mul $9, $6, $7
-0x02 0x48 0xc7 0x70
-
-# CHECK: mult $3, $5
-0x18 0x00 0x65 0x00
-
-# CHECK: multu $3, $5
-0x19 0x00 0x65 0x00
-
-# CHECK: neg.d $f12, $f14
-0x07 0x73 0x20 0x46
-
-# CHECK: neg.s $f6, $f7
-0x87 0x39 0x00 0x46
-
-# CHECK: nop
-0x00 0x00 0x00 0x00
-
-# CHECK: nor $9, $6, $7
-0x27 0x48 0xc7 0x00
-
-# CHECK: or $3, $3, $5
-0x25 0x18 0x65 0x00
-
-# CHECK: ori $9, $6, 17767
-0x67 0x45 0xc9 0x34
-
-# CHECK: rotr $9, $6, 7
-0xc2 0x49 0x26 0x00
-
-# CHECK: rotrv $9, $6, $7
-0x46 0x48 0xe6 0x00
-
-# CHECK: round.w.d $f12, $f14
-0x0c 0x73 0x20 0x46
-
-# CHECK: round.w.s $f6, $f7
-0x8c 0x39 0x00 0x46
-
-# CHECK: sb $4, 9158($5)
-0xc6 0x23 0xa4 0xa0
-
-# CHECK: sb $4, 6($5)
-0x06 0x00 0xa4 0xa0
-
-# CHECK: sc $9, 9158($7)
-0xc6 0x23 0xe9 0xe0
-
-# CHECK: sdc1 $f9, 9158($7)
-0xc6 0x23 0xe9 0xf4
-
-# CHECK: seb $6, $7
-0x20 0x34 0x07 0x7c
-
-# CHECK: seh $6, $7
-0x20 0x36 0x07 0x7c
-
-# CHECK: sh $4, 9158($5)
-0xc6 0x23 0xa4 0xa4
-
-# CHECK: sll $4, $3, 7
-0xc0 0x21 0x03 0x00
-
-# CHECK: sllv $2, $3, $5
-0x04 0x10 0xa3 0x00
-
-# CHECK: slt $3, $3, $5
-0x2a 0x18 0x65 0x00
-
-# CHECK: slti $3, $3, 103
-0x67 0x00 0x63 0x28
-
-# CHECK: sltiu $3, $3, 103
-0x67 0x00 0x63 0x2c
-
-# CHECK: sltu $3, $3, $5
-0x2b 0x18 0x65 0x00
-
-# CHECK: sqrt.d $f12, $f14
-0x04 0x73 0x20 0x46
-
-# CHECK: sqrt.s $f6, $f7
-0x84 0x39 0x00 0x46
-
-# CHECK: sra $4, $3, 7
-0xc3 0x21 0x03 0x00
-
-# CHECK: srav $2, $3, $5
-0x07 0x10 0xa3 0x00
-
-# CHECK: srl $4, $3, 7
-0xc2 0x21 0x03 0x00
-
-# CHECK: srlv $2, $3, $5
-0x06 0x10 0xa3 0x00
-
-# CHECK: sub.d $f8, $f12, $f14
-0x01 0x62 0x2e 0x46
-
-# CHECK: sub.s $f9, $f6, $f7
-0x41 0x32 0x07 0x46
-
-# CHECK: sub $9, $6, $7
-0x22 0x48 0xc7 0x00
-
-# CHECK: subu $4, $3, $5
-0x23 0x20 0x65 0x00
-
-# CHECK: suxc1 $f4, $24($5)
-0x0d 0x20 0xb8 0x4c
-
-# CHECK: sw $4, 24($5)
-0x18 0x00 0xa4 0xac
-
-# CHECK: swc1 $f9, 9158($7)
-0xc6 0x23 0xe9 0xe4
-
-# CHECK: swl $4, 16($5)
-0x10 0x00 0xa4 0xa8
-
-# CHECK: swr $6, 16($7)
-0x10 0x00 0xe6 0xb8
-
-# CHECK: swxc1 $f26, $18($22)
-0x08 0xd0 0xd2 0x4e
-
-# CHECK: sync 7
-0xcf 0x01 0x00 0x00
-
-# CHECK: trunc.w.d $f12, $f14
-0x0d 0x73 0x20 0x46
-
-# CHECK: trunc.w.s $f6, $f7
-0x8d 0x39 0x00 0x46
-
-# CHECK: wsbh $6, $7
-0xa0 0x30 0x07 0x7c
-
-# CHECK: xor $3, $3, $5
-0x26 0x18 0x65 0x00
-
-# CHECK: xori $9, $6, 17767
-0x67 0x45 0xc9 0x38
-
-# CHECK: synci 7500($19)
-0x4c 0x1d 0x7f 0x06
diff --git a/test/MC/Disassembler/Mips/mips32r3/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips32r3/invalid-xfail.txt
new file mode 100644
index 000000000000..0d259902d68d
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r3/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32r3 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3.txt b/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3.txt
index cf9e98675d25..18dbd9ea7a40 100644
--- a/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3.txt
+++ b/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3.txt
@@ -1,12 +1,42 @@
# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r3 | FileCheck %s
0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
0x00 0x00 0x28 0x10 # CHECK: mfhi $5
0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -14,24 +44,82 @@
0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
0x00 0xe0 0x00 0x08 # CHECK: jr $7
0x00 0xe0 0x00 0x11 # CHECK: mthi $7
0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero
0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xdf 0xe8 0x07 # CHECK: synci -6137($fp)
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
0x0c 0x00 0x01 0x4c # CHECK: jal 1328
0x10 0x00 0x01 0x4c # CHECK: b 1332
0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -39,29 +127,61 @@
0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
0x41 0x60 0x60 0x00 # CHECK: di
0x41 0x60 0x60 0x20 # CHECK: ei
0x41 0x6e 0x60 0x20 # CHECK: ei $14
0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret
+0x42 0x00 0x00 0x20 # CHECK: wait
0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
@@ -73,6 +193,17 @@
0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -91,8 +222,17 @@
0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -103,6 +243,14 @@
0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -122,8 +270,19 @@
0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
0x4c 0x52 0xf2 0xa9 # CHECK: msub.d $f10, $f2, $f30, $f18
0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
@@ -138,34 +297,72 @@
0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276
0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x05 0xe8 0x3b # CHECK: .set push
+ # CHECK: .set mips32r2
+ # CHECK: rdhwr $5, $29
+ # CHECK: .set pop
0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
+0x7d 0x07 0x30 0x00 # CHECK: ext $7, $8, 0, 7
0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)
0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)
0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
diff --git a/test/MC/Disassembler/Mips/mips32r3/valid-xfail.txt b/test/MC/Disassembler/Mips/mips32r3/valid-xfail.txt
new file mode 100644
index 000000000000..03836adaa31f
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r3/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32r3 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips32r5/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips32r5/invalid-xfail.txt
new file mode 100644
index 000000000000..871e45f63f4c
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r5/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32r5 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5.txt b/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5.txt
index 282f3a2b0544..8b553cfab2cc 100644
--- a/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5.txt
+++ b/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5.txt
@@ -1,12 +1,42 @@
# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r5 | FileCheck %s
0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
0x00 0x00 0x28 0x10 # CHECK: mfhi $5
0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -14,24 +44,82 @@
0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
0x00 0xe0 0x00 0x08 # CHECK: jr $7
0x00 0xe0 0x00 0x11 # CHECK: mthi $7
0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero
0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xdf 0xe8 0x07 # CHECK: synci -6137($fp)
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
0x0c 0x00 0x01 0x4c # CHECK: jal 1328
0x10 0x00 0x01 0x4c # CHECK: b 1332
0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -39,29 +127,61 @@
0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
0x41 0x60 0x60 0x00 # CHECK: di
0x41 0x60 0x60 0x20 # CHECK: ei
0x41 0x6e 0x60 0x20 # CHECK: ei $14
0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret
+0x42 0x00 0x00 0x20 # CHECK: wait
0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
@@ -73,6 +193,17 @@
0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -88,11 +219,21 @@
0x46 0x07 0x30 0x3d # CHECK: c.nge.s $f6, $f7
0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
0x46 0x07 0x30 0x3f # CHECK: c.ngt.s $f6, $f7
+0x42 0x00 0x00 0x58 # CHECK: eretnc
0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -103,6 +244,14 @@
0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -122,8 +271,19 @@
0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
0x4c 0x52 0xf2 0xa9 # CHECK: msub.d $f10, $f2, $f30, $f18
0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
@@ -138,34 +298,72 @@
0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276
0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x05 0xe8 0x3b # CHECK: .set push
+ # CHECK: .set mips32r2
+ # CHECK: rdhwr $5, $29
+ # CHECK: .set pop
0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
+0x7d 0x07 0x30 0x00 # CHECK: ext $7, $8, 0, 7
0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)
0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)
0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
diff --git a/test/MC/Disassembler/Mips/mips32r5/valid-xfail.txt b/test/MC/Disassembler/Mips/mips32r5/valid-xfail.txt
new file mode 100644
index 000000000000..c99ec7159872
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r5/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32r5 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt b/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt
index 94dc3a2645d9..4164988b2b8f 100644
--- a/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt
+++ b/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt
@@ -80,7 +80,7 @@
0x9b 0x10 0x64 0x00 # CHECK: divu $2, $3, $4
0x20 0x60 0x6e 0x41 # CHECK: ei $14
0x20 0x60 0x60 0x41 # CHECK: ei
-0xc5 0x10 0x64 0x00 # CHECK: lsa $2, $3, $4, 3
+0xc5 0x10 0x64 0x00 # CHECK: lsa $2, $3, $4, 4
0x43 0x00 0x48 0xec # CHECK: lwpc $2, 268
0x43 0x00 0x50 0xec # CHECK: lwupc $2, 268
0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
diff --git a/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt b/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt
index e1721b934835..6af02fc61b86 100644
--- a/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt
+++ b/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt
@@ -12,7 +12,7 @@
0x00 0x64 0x10 0x99 # CHECK: mulu $2, $3, $4
0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4
0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4
-0x00 0x64 0x10 0xc5 # CHECK: lsa $2, $3, $4, 3
+0x00 0x64 0x10 0xc5 # CHECK: lsa $2, $3, $4, 4
0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4
0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4
0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4
@@ -126,6 +126,7 @@
0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4
0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4
0x46 0xa4 0x18 0x8f # CHECK: cmp.sule.d $f2, $f3, $f4
+0x42 0x00 0x00 0x58 # CHECK: eretnc
# FIXME: The encode/decode functions are not inverses of each other.
# The immediate should be 8 but the disassembler currently emits 12
0x49 0x20 0x00 0x02 # CHECK: bc2eqz $0, 12
diff --git a/test/MC/Disassembler/Mips/mips4/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips4/invalid-xfail.txt
new file mode 100644
index 000000000000..6bef06228c10
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips4/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips4 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips4/valid-mips4-el.txt b/test/MC/Disassembler/Mips/mips4/valid-mips4-el.txt
index 0c9e2f1b742f..1d1044d32055 100644
--- a/test/MC/Disassembler/Mips/mips4/valid-mips4-el.txt
+++ b/test/MC/Disassembler/Mips/mips4/valid-mips4-el.txt
@@ -126,6 +126,8 @@
0xd1 0x2d 0x18 0x46 # CHECK: movf.s $f23, $f5, $fcc6
0x21 0xf0 0x80 0x00 # CHECK: move $fp, $4
0x21 0xc8 0xc0 0x00 # CHECK: move $25, $6
+0x25 0xf0 0x80 0x00 # CHECK: move $fp, $4
+0x25 0xc8 0xc0 0x00 # CHECK: move $25, $6
0x0b 0x18 0x30 0x02 # CHECK: movn $3, $17, $16
0xd3 0xae 0x3a 0x46 # CHECK: movn.d $f27, $f21, $26
0x13 0x03 0x17 0x46 # CHECK: movn.s $f12, $f0, $23
diff --git a/test/MC/Disassembler/Mips/mips4/valid-mips4.txt b/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
index 207f4087791d..5e8253dfef2d 100644
--- a/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
+++ b/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
@@ -1,12 +1,19 @@
# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips4 | FileCheck %s
# CHECK: .text
0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
0x00 0x00 0x00 0xc0 # CHECK: ehb
0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
0x00 0x00 0x88 0x12 # CHECK: mflo $17
0x00 0x00 0x98 0x10 # CHECK: mfhi $19
0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
@@ -21,6 +28,8 @@
0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
@@ -30,11 +39,14 @@
0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x2d # CHECK: move $fp, $4
0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
@@ -44,6 +56,9 @@
0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
+0x00 0xc0 0xc8 0x2d # CHECK: move $25, $6
0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
@@ -60,6 +75,7 @@
0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
0x02 0x30 0x18 0x0b # CHECK: movn $3, $17, $16
0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
@@ -85,9 +101,11 @@
0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
0x04 0x11 0x14 0x9b # CHECK: bal 21104
0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
@@ -97,6 +115,9 @@
0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
@@ -110,7 +131,10 @@
0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
+0x3a 0x00 0x27 0x12 # CHECK: xori $zero, $16, 10002
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
0x42 0x00 0x00 0x01 # CHECK: tlbr
0x42 0x00 0x00 0x02 # CHECK: tlbwi
0x42 0x00 0x00 0x06 # CHECK: tlbwr
@@ -184,7 +208,10 @@
0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
+0x4c 0x20 0x01 0x01 # CHECK: ldxc1 $f4, $zero($1)
+0x4c 0x21 0x00 0x28 # CHECK: msub.s $f0, $f1, $f0, $f1
0x4d 0xca 0x58 0x09 # CHECK: sdxc1 $f11, $10($14)
+0x4e 0x20 0x3e 0xb9 # CHECK: nmsub.d $f26, $f17, $f7, $f0
0x4f 0x4c 0x98 0x08 # CHECK: swxc1 $f19, $12($26)
0x4f 0xd1 0x03 0x00 # CHECK: lwxc1 $f12, $17($fp)
0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
@@ -214,9 +241,11 @@
0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
0xbc 0xa1 0x00 0x08 # CHECK: cache 1, 8($5)
+0xbf 0x00 0xe2 0x1c # CHECK: cache 0, -7652($24)
0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x20 0x00 0x00 # CHECK: pref 0, 0($1)
0xcc 0xa1 0x00 0x08 # CHECK: pref 1, 8($5)
0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
diff --git a/test/MC/Disassembler/Mips/mips4/valid-xfail.txt b/test/MC/Disassembler/Mips/mips4/valid-xfail.txt
new file mode 100644
index 000000000000..159e7111d39a
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips4/valid-xfail.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips4 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0x27 0xc7 0xf2 # CHECK: beql $1, $7, -57400
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0x53 0x2a 0x02 0x1e # CHECK: beql $25, $10, 2168
+0x53 0x80 0x21 0x73 # CHECK: beql $gp, $zero, 34252
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips64/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips64/invalid-xfail.txt
new file mode 100644
index 000000000000..bbc7ff43d468
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt b/test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt
index 2d52216fddaa..1cd7b0bcebab 100644
--- a/test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt
+++ b/test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt
@@ -148,6 +148,8 @@
0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
0x10 0x28 0x00 0x00 # CHECK: mfhi $5
0x12 0x28 0x00 0x00 # CHECK: mflo $5
+0x25 0x78 0xe0 0x03 # CEHCK: move $15, $ra
+0x2d 0x78 0xe0 0x03 # CEHCK: move $15, $ra
0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
@@ -218,3 +220,31 @@
0x04 0x00 0x43 0xcc # CHECK: pref 3, 4($2)
0xc6 0x23 0xe9 0xe8 # CHECK: swc2 $9, 9158($7)
0xca 0x23 0xc8 0xc8 # CHECK: lwc2 $8, 9162($6)
+0xcd 0x7c 0x4b 0x67 # CHECK: daddiu $11, $26, 31949
+0x2d 0xd0 0x2b 0x00 # CHECK: daddu $26, $1, $11
+0x1e 0x00 0x56 0x03 # CHECK: ddiv $zero, $26, $22
+0x1f 0x00 0x38 0x01 # CHECK: ddivu $zero, $9, $24
+0x00 0x70 0x22 0x44 # CHECK: dmfc1 $2, $f14
+0x00 0x28 0xb7 0x44 # CHECK: dmtc1 $23, $f5
+0x1c 0x00 0x7a 0x01 # CHECK: dmult $11, $26
+0x1d 0x00 0xed 0x02 # CHECK: dmultu $23, $13
+0x78 0x1c 0x18 0x00 # CHECK: dsll $3, $24, 17
+0x14 0xe0 0x1b 0x03 # CHECK: dsllv $gp, $27, $24
+0xbb 0x0f 0x01 0x00 # CHECK: dsra $1, $1, 30
+0x17 0x08 0xc1 0x03 # CHECK: dsrav $1, $1, $fp
+0x3a 0x56 0x1c 0x00 # CHECK: dsrl $10, $gp, 24
+0x16 0xe0 0xea 0x02 # CHECK: dsrlv $gp, $10, $23
+0x2f 0xe0 0x78 0x03 # CHECK: dsubu $gp, $27, $24
+0xcd 0xc4 0x3b 0x8c # CHECK: lw $27, -15155($1)
+0x01 0x00 0x01 0x3c # CHECK: lui $1, 1
+0x2e 0xf9 0x63 0x9c # CHECK: lwu $3, -1746($3)
+0x01 0x00 0x1f 0x3c # CHECK: lui $ra, 1
+0xc9 0xc4 0x3a 0xac # CHECK: sw $26, -15159($1)
+0x76 0x0f 0x1a 0xdc # CHECK: ld $26, 3958($zero)
+0x67 0x45 0x06 0xfc # CHECK: sd $6, 17767($zero)
+0x05 0x00 0xa6 0x4c # CHECK: luxc1 $f0, $6($5)
+0x00 0x05 0xcc 0x4d # CHECK: lwxc1 $f20, $12($14)
+0x0d 0x20 0xb8 0x4c # CHECK: suxc1 $f4, $24($5)
+0x08 0xd0 0xd2 0x4e # CHECK: swxc1 $f26, $18($22)
+0x81 0x00 0x42 0x4d # CHECK: ldxc1 $f2, $2($10)
+0x09 0x40 0x24 0x4f # CHECK: sdxc1 $f8, $4($25)
diff --git a/test/MC/Disassembler/Mips/mips64/valid-mips64.txt b/test/MC/Disassembler/Mips/mips64/valid-mips64.txt
index 6cbf5d3206b0..2ba1ecdf5b8e 100644
--- a/test/MC/Disassembler/Mips/mips64/valid-mips64.txt
+++ b/test/MC/Disassembler/Mips/mips64/valid-mips64.txt
@@ -1,12 +1,31 @@
# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux | FileCheck %s
0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
0x00 0x00 0x01 0xcf # CHECK: sync 7
0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
0x00 0x00 0x28 0x10 # CHECK: mfhi $5
0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
0x00 0x01 0x0f 0xbb # CHECK: dsra $1, $1, 30
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
@@ -14,18 +33,29 @@
0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
0x00 0x18 0x1c 0x78 # CHECK: dsll $3, $24, 17
0x00 0x1c 0x56 0x3a # CHECK: dsrl $10, $gp, 24
0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
0x00 0x2b 0xd0 0x2d # CHECK: daddu $26, $1, $11
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -33,43 +63,102 @@
0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
0x00 0xe0 0x00 0x08 # CHECK: jr $7
0x00 0xe0 0x00 0x11 # CHECK: mthi $7
0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
0x01 0x38 0x00 0x1f # CHECK: ddivu $zero, $9, $24
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
0x01 0x7a 0x00 0x1c # CHECK: dmult $11, $26
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
0x02 0xea 0xe0 0x16 # CHECK: dsrlv $gp, $10, $23
0x02 0xed 0x00 0x1d # CHECK: dmultu $23, $13
0x03 0x1b 0xe0 0x14 # CHECK: dsllv $gp, $27, $24
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
0x03 0x56 0x00 0x1e # CHECK: ddiv $zero, $26, $22
0x03 0x78 0xe0 0x2f # CHECK: dsubu $gp, $27, $24
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
0x03 0xc1 0x08 0x17 # CHECK: dsrav $1, $1, $fp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x03 0xe0 0x78 0x25 # CHECK: move $15, $ra
+0x03 0xe0 0x78 0x2d # CHECK: move $15, $ra
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
0x0c 0x00 0x01 0x4c # CHECK: jal 1328
0x10 0x00 0x01 0x4c # CHECK: b 1332
0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -77,12 +166,28 @@
0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
0x3c 0x01 0x00 0x01 # CHECK: lui $1, 1
0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
0x3c 0x1f 0x00 0x01 # CHECK: lui $ra, 1
@@ -91,18 +196,33 @@
0x40 0x38 0x50 0x00 # CHECK: dmfc0 $24, $10, 0
0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
0x40 0xa4 0x50 0x00 # CHECK: dmtc0 $4, $10, 0
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret
+0x42 0x00 0x00 0x20 # CHECK: wait
0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
0x44 0x22 0x70 0x00 # CHECK: dmfc1 $2, $f14
0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
0x44 0xb7 0x28 0x00 # CHECK: dmtc1 $23, $f5
0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
@@ -115,8 +235,20 @@
0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
+0x46 0x00 0x46 0xa5 # CHECK: cvt.l.s $f26, $f8
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -135,11 +267,20 @@
0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -149,7 +290,15 @@
0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -169,8 +318,27 @@
0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x48 0x20 0x50 0x00 # CHECK: dmfc2 $zero, $10, 0
+0x48 0xa4 0x50 0x00 # CHECK: dmtc2 $4, $10, 0
+0x4d 0x0c 0xe0 0x21 # CHECK: madd.d $f0, $f8, $f28, $f12
+0x4d 0xbb 0x0d 0xe0 # CHECK: madd.s $f23, $f13, $f1, $f27
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
@@ -206,6 +374,7 @@
0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276
0x70 0xd2 0x90 0x25 # CHECK: dclo $18, $6
0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
@@ -216,39 +385,63 @@
# CHECK: rdhwr $5, $29
# CHECK: .set pop
0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
0x8c 0x3b 0xc4 0xcd # CHECK: lw $27, -15155($1)
0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
0x9c 0x63 0xf9 0x2e # CHECK: lwu $3, -1746($3)
0x9c 0x73 0xa1 0xea # CHECK: lwu $19, -24086($3)
0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
0xac 0x3a 0xc4 0xc9 # CHECK: sw $26, -15159($1)
0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)
0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)
0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
0xd9 0x03 0x23 0xca # CHECK: ldc2 $3, 9162($8)
0xdc 0x1a 0x0f 0x76 # CHECK: ld $26, 3958($zero)
0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
0xf8 0xe9 0x23 0xc6 # CHECK: sdc2 $9, 9158($7)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
0xfc 0x06 0x45 0x67 # CHECK: sd $6, 17767($zero)
0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips64/valid-xfail.txt b/test/MC/Disassembler/Mips/mips64/valid-xfail.txt
new file mode 100644
index 000000000000..4cc762e87492
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips64_le.txt b/test/MC/Disassembler/Mips/mips64_le.txt
deleted file mode 100644
index 0d3d2faf1312..000000000000
--- a/test/MC/Disassembler/Mips/mips64_le.txt
+++ /dev/null
@@ -1,84 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux | FileCheck %s
-# CHECK: daddiu $11, $26, 31949
-0xcd 0x7c 0x4b 0x67
-
-# CHECK: daddu $26, $1, $11
-0x2d 0xd0 0x2b 0x00
-
-# CHECK: ddiv $zero, $26, $22
-0x1e 0x00 0x56 0x03
-
-# CHECK: ddivu $zero, $9, $24
-0x1f 0x00 0x38 0x01
-
-# CHECK: dmfc1 $2, $f14
-0x00 0x70 0x22 0x44
-
-# CHECK: dmtc1 $23, $f5
-0x00 0x28 0xb7 0x44
-
-# CHECK: dmult $11, $26
-0x1c 0x00 0x7a 0x01
-
-# CHECK: dmultu $23, $13
-0x1d 0x00 0xed 0x02
-
-# CHECK: dsll $3, $24, 17
-0x78 0x1c 0x18 0x00
-
-# CHECK: dsllv $gp, $27, $24
-0x14 0xe0 0x1b 0x03
-
-# CHECK: dsra $1, $1, 30
-0xbb 0x0f 0x01 0x00
-
-# CHECK: dsrav $1, $1, $fp
-0x17 0x08 0xc1 0x03
-
-# CHECK: dsrl $10, $gp, 24
-0x3a 0x56 0x1c 0x00
-
-# CHECK: dsrlv $gp, $10, $23
-0x16 0xe0 0xea 0x02
-
-# CHECK: dsubu $gp, $27, $24
-0x2f 0xe0 0x78 0x03
-
-# CHECK: lw $27, -15155($1)
-0xcd 0xc4 0x3b 0x8c
-
-# CHECK: lui $1, 1
-0x01 0x00 0x01 0x3c
-
-# CHECK: lwu $3, -1746($3)
-0x2e 0xf9 0x63 0x9c
-
-# CHECK: lui $ra, 1
-0x01 0x00 0x1f 0x3c
-
-# CHECK: sw $26, -15159($1)
-0xc9 0xc4 0x3a 0xac
-
-# CHECK: ld $26, 3958($zero)
-0x76 0x0f 0x1a 0xdc
-
-# CHECK: sd $6, 17767($zero)
-0x67 0x45 0x06 0xfc
-
-# CHECK: luxc1 $f0, $6($5)
-0x05 0x00 0xa6 0x4c
-
-# CHECK: lwxc1 $f20, $12($14)
-0x00 0x05 0xcc 0x4d
-
-# CHECK: suxc1 $f4, $24($5)
-0x0d 0x20 0xb8 0x4c
-
-# CHECK: swxc1 $f26, $18($22)
-0x08 0xd0 0xd2 0x4e
-
-# CHECK: ldxc1 $f2, $2($10)
-0x81 0x00 0x42 0x4d
-
-# CHECK: sdxc1 $f8, $4($25)
-0x09 0x40 0x24 0x4f
diff --git a/test/MC/Disassembler/Mips/mips64r2/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips64r2/invalid-xfail.txt
new file mode 100644
index 000000000000..9a320be22b40
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64r2/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64r2 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt b/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt
index 2c6859f27faa..3f2b7615eb31 100644
--- a/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt
+++ b/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt
@@ -168,6 +168,8 @@
0x10 0x28 0x00 0x00 # CHECK: mfhi $5
0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
0x12 0x28 0x00 0x00 # CHECK: mflo $5
+0x25 0x78 0xe0 0x03 # CHECK: move $15, $ra
+0x2d 0x78 0xe0 0x03 # CHECK: move $15, $ra
0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
@@ -239,3 +241,33 @@
0xa0 0x30 0x07 0x7c # CHECK: wsbh $6, $7
0x26 0x18 0x65 0x00 # CHECK: xor $3, $3, $5
0x67 0x45 0xc9 0x38 # CHECK: xori $9, $6, 17767
+0xcd 0x7c 0x4b 0x67 # CHECK: daddiu $11, $26, 31949
+0x2d 0xd0 0x2b 0x00 # CHECK: daddu $26, $1, $11
+0x1e 0x00 0x56 0x03 # CHECK: ddiv $zero, $26, $22
+0x1f 0x00 0x38 0x01 # CHECK: ddivu $zero, $9, $24
+0x00 0x70 0x22 0x44 # CHECK: dmfc1 $2, $f14
+0x00 0x28 0xb7 0x44 # CHECK: dmtc1 $23, $f5
+0x1c 0x00 0x7a 0x01 # CHECK: dmult $11, $26
+0x1d 0x00 0xed 0x02 # CHECK: dmultu $23, $13
+0x78 0x1c 0x18 0x00 # CHECK: dsll $3, $24, 17
+0x14 0xe0 0x1b 0x03 # CHECK: dsllv $gp, $27, $24
+0xbb 0x0f 0x01 0x00 # CHECK: dsra $1, $1, 30
+0x17 0x08 0xc1 0x03 # CHECK: dsrav $1, $1, $fp
+0x3a 0x56 0x1c 0x00 # CHECK: dsrl $10, $gp, 24
+0x16 0xe0 0xea 0x02 # CHECK: dsrlv $gp, $10, $23
+0x2f 0xe0 0x78 0x03 # CHECK: dsubu $gp, $27, $24
+0xcd 0xc4 0x3b 0x8c # CHECK: lw $27, -15155($1)
+0x01 0x00 0x01 0x3c # CHECK: lui $1, 1
+0x2e 0xf9 0x63 0x9c # CHECK: lwu $3, -1746($3)
+0x01 0x00 0x1f 0x3c # CHECK: lui $ra, 1
+0xc9 0xc4 0x3a 0xac # CHECK: sw $26, -15159($1)
+0x76 0x0f 0x1a 0xdc # CHECK: ld $26, 3958($zero)
+0x67 0x45 0x06 0xfc # CHECK: sd $6, 17767($zero)
+0x25 0x48 0x09 0x73 # CHECK: dclo $9, $24
+0x24 0xd0 0x3a 0x71 # CHECK: dclz $26, $9
+0x43 0xf7 0x87 0x7f # CHECK: dext $7, $gp, 29, 31
+0xc7 0x7b 0x94 0x7f # CHECK: dins $20, $gp, 15, 1
+0xa4 0x38 0x1c 0x7c # CHECK: dsbh $7, $gp
+0x64 0x19 0x0e 0x7c # CHECK: dshd $3, $14
+0xba 0xa1 0x3b 0x00 # CHECK: drotr $20, $27, 6
+0x56 0xc0 0xb7 0x00 # CHECK: drotrv $24, $23, $5
diff --git a/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt b/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt
index 0c6e10ee37ce..ac9d2b808622 100644
--- a/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt
+++ b/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt
@@ -4,13 +4,32 @@
# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips64r2 | FileCheck %s
# CHECK: .text
0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
0x00 0x00 0x01 0xcf # CHECK: sync 7
0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
0x00 0x00 0x28 0x10 # CHECK: mfhi $5
0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
0x00 0x01 0x0f 0xbb # CHECK: dsra $1, $1, 30
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
@@ -18,11 +37,17 @@
0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
0x00 0x18 0x1c 0x78 # CHECK: dsll $3, $24, 17
0x00 0x1c 0x56 0x3a # CHECK: dsrl $10, $gp, 24
0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
@@ -33,9 +58,14 @@
0x00 0x2b 0xd0 0x2d # CHECK: daddu $26, $1, $11
0x00 0x2e 0x0b 0xfa # CHECK: drotr $1, $14, 15
0x00 0x2e 0x0b 0xfe # CHECK: drotr32 $1, $14, 15
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
0x00 0x3b 0xa1 0xba # CHECK: drotr $20, $27, 6
0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -43,46 +73,106 @@
0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
0x00 0xb7 0xc0 0x56 # CHECK: drotrv $24, $23, $5
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
0x00 0xe0 0x00 0x08 # CHECK: jr $7
0x00 0xe0 0x00 0x11 # CHECK: mthi $7
0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero
0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
0x01 0x38 0x00 0x1f # CHECK: ddivu $zero, $9, $24
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
0x01 0x7a 0x00 0x1c # CHECK: dmult $11, $26
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
0x01 0xee 0x08 0x56 # CHECK: drotrv $1, $14, $15
0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
0x02 0xea 0xe0 0x16 # CHECK: dsrlv $gp, $10, $23
0x02 0xed 0x00 0x1d # CHECK: dmultu $23, $13
0x03 0x1b 0xe0 0x14 # CHECK: dsllv $gp, $27, $24
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
0x03 0x56 0x00 0x1e # CHECK: ddiv $zero, $26, $22
0x03 0x78 0xe0 0x2f # CHECK: dsubu $gp, $27, $24
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
0x03 0xc1 0x08 0x17 # CHECK: dsrav $1, $1, $fp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x03 0xe0 0x78 0x25 # CHECK: move $15, $ra
+0x03 0xe0 0x78 0x2d # CHECK: move $15, $ra
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x7f 0x47 0xc0 # CHECK: synci 18368($3)
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
0x0c 0x00 0x01 0x4c # CHECK: jal 1328
0x10 0x00 0x01 0x4c # CHECK: b 1332
0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -90,16 +180,33 @@
0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
0x3c 0x01 0x00 0x01 # CHECK: lui $1, 1
0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
0x3c 0x1f 0x00 0x01 # CHECK: lui $ra, 1
0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
0x40 0x38 0x50 0x00 # CHECK: dmfc0 $24, $10, 0
0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
0x40 0xa4 0x50 0x00 # CHECK: dmtc0 $4, $10, 0
@@ -107,20 +214,35 @@
0x41 0x60 0x60 0x20 # CHECK: ei
0x41 0x6e 0x60 0x20 # CHECK: ei $14
0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret
+0x42 0x00 0x00 0x20 # CHECK: wait
0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
0x44 0x22 0x70 0x00 # CHECK: dmfc1 $2, $f14
0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
0x44 0xb7 0x28 0x00 # CHECK: dmtc1 $23, $f5
0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
@@ -134,8 +256,20 @@
0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x46 0xa5 # CHECK: cvt.l.s $f26, $f8
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -154,11 +288,20 @@
0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -169,7 +312,15 @@
0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -189,19 +340,44 @@
0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x48 0x20 0x50 0x00 # CHECK: dmfc2 $zero, $10, 0
+0x48 0xa4 0x50 0x00 # CHECK: dmtc2 $4, $10, 0
+0x4d 0x0c 0xe0 0x21 # CHECK: madd.d $f0, $f8, $f28, $f12
+0x4d 0xbb 0x0d 0xe0 # CHECK: madd.s $f23, $f13, $f1, $f27
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
+0x4d 0x42 0x00 0x81 # CHECK: ldxc1 $f2, $2($10)
+0x4d 0xbb 0x60 0x0d # CHECK: suxc1 $f12, $27($13)
+0x4d 0xca 0x58 0x09 # CHECK: sdxc1 $f11, $10($14)
0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
+0x4e 0xb6 0x04 0xc5 # CHECK: luxc1 $f19, $22($21)
0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
+0x4f 0x4c 0x98 0x08 # CHECK: swxc1 $f19, $12($26)
+0x4f 0xd1 0x03 0x00 # CHECK: lwxc1 $f12, $17($fp)
0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
@@ -216,6 +392,7 @@
0x66 0x73 0x69 0x3f # CHECK: daddiu $19, $19, 26943
0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
0x67 0x4b 0x7c 0xcd # CHECK: daddiu $11, $26, 31949
+0x67 0xbd 0xff 0xe0 # CHECK: daddiu $sp, $sp, -32
0x6b 0x18 0xef 0xb9 # CHECK: ldl $24, -4167($24)
0x6e 0x8e 0x89 0x6a # CHECK: ldr $14, -30358($20)
0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
@@ -223,6 +400,7 @@
0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276
0x70 0xd2 0x90 0x25 # CHECK: dclo $18, $6
0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
@@ -230,6 +408,10 @@
0x73 0x09 0x48 0x25 # CHECK: dclo $9, $24
0x73 0x30 0x80 0x24 # CHECK: dclz $16, $25
0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x05 0xe8 0x3b # CHECK: .set push
+ # CHECK: .set mips32r2
+ # CHECK: rdhwr $5, $29
+ # CHECK: .set pop
0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
@@ -241,33 +423,63 @@
0x7f 0x87 0xf7 0x43 # CHECK: dext $7, $gp, 29, 31
0x7f 0x94 0x7b 0xc7 # CHECK: dins $20, $gp, 15, 1
0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
0x8c 0x3b 0xc4 0xcd # CHECK: lw $27, -15155($1)
0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
0x9c 0x63 0xf9 0x2e # CHECK: lwu $3, -1746($3)
0x9c 0x73 0xa1 0xea # CHECK: lwu $19, -24086($3)
0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
0xac 0x3a 0xc4 0xc9 # CHECK: sw $26, -15159($1)
0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)
0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)
0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
+0xd9 0x03 0x23 0xca # CHECK: ldc2 $3, 9162($8)
0xdc 0x1a 0x0f 0x76 # CHECK: ld $26, 3958($zero)
0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xf8 0xe9 0x23 0xc6 # CHECK: sdc2 $9, 9158($7)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
0xfc 0x06 0x45 0x67 # CHECK: sd $6, 17767($zero)
0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips64r2/valid-xfail.txt b/test/MC/Disassembler/Mips/mips64r2/valid-xfail.txt
new file mode 100644
index 000000000000..84d2c4728c7c
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64r2/valid-xfail.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64r2 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0x7c 0x48 0xc7 0x00 # CHECK: ext $8, $2, 28, 25
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips64r2_le.txt b/test/MC/Disassembler/Mips/mips64r2_le.txt
deleted file mode 100644
index 82e4d6ae1ce0..000000000000
--- a/test/MC/Disassembler/Mips/mips64r2_le.txt
+++ /dev/null
@@ -1,90 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux -mattr +mips64r2 | FileCheck %s
-# CHECK: daddiu $11, $26, 31949
-0xcd 0x7c 0x4b 0x67
-
-# CHECK: daddu $26, $1, $11
-0x2d 0xd0 0x2b 0x00
-
-# CHECK: ddiv $zero, $26, $22
-0x1e 0x00 0x56 0x03
-
-# CHECK: ddivu $zero, $9, $24
-0x1f 0x00 0x38 0x01
-
-# CHECK: dmfc1 $2, $f14
-0x00 0x70 0x22 0x44
-
-# CHECK: dmtc1 $23, $f5
-0x00 0x28 0xb7 0x44
-
-# CHECK: dmult $11, $26
-0x1c 0x00 0x7a 0x01
-
-# CHECK: dmultu $23, $13
-0x1d 0x00 0xed 0x02
-
-# CHECK: dsll $3, $24, 17
-0x78 0x1c 0x18 0x00
-
-# CHECK: dsllv $gp, $27, $24
-0x14 0xe0 0x1b 0x03
-
-# CHECK: dsra $1, $1, 30
-0xbb 0x0f 0x01 0x00
-
-# CHECK: dsrav $1, $1, $fp
-0x17 0x08 0xc1 0x03
-
-# CHECK: dsrl $10, $gp, 24
-0x3a 0x56 0x1c 0x00
-
-# CHECK: dsrlv $gp, $10, $23
-0x16 0xe0 0xea 0x02
-
-# CHECK: dsubu $gp, $27, $24
-0x2f 0xe0 0x78 0x03
-
-# CHECK: lw $27, -15155($1)
-0xcd 0xc4 0x3b 0x8c
-
-# CHECK: lui $1, 1
-0x01 0x00 0x01 0x3c
-
-# CHECK: lwu $3, -1746($3)
-0x2e 0xf9 0x63 0x9c
-
-# CHECK: lui $ra, 1
-0x01 0x00 0x1f 0x3c
-
-# CHECK: sw $26, -15159($1)
-0xc9 0xc4 0x3a 0xac
-
-# CHECK: ld $26, 3958($zero)
-0x76 0x0f 0x1a 0xdc
-
-# CHECK: sd $6, 17767($zero)
-0x67 0x45 0x06 0xfc
-
-# CHECK: dclo $9, $24
-0x25 0x48 0x09 0x73
-
-# CHECK: dclz $26, $9
-0x24 0xd0 0x3a 0x71
-
-# CHECK: dext $7, $gp, 29, 31
-0x43 0xf7 0x87 0x7f
-
-# CHECK: dins $20, $gp, 15, 1
-0xc7 0x7b 0x94 0x7f
-
-# CHECK: dsbh $7, $gp
-0xa4 0x38 0x1c 0x7c
-
-# CHECK: dshd $3, $14
-0x64 0x19 0x0e 0x7c
-
-# CHECK: drotr $20, $27, 6
-0xba 0xa1 0x3b 0x00
-
-# CHECK: drotrv $24, $23, $5
-0x56 0xc0 0xb7 0x00
diff --git a/test/MC/Disassembler/Mips/mips64r3/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips64r3/invalid-xfail.txt
new file mode 100644
index 000000000000..d53830876ca3
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64r3/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64r3 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3-el.txt b/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3-el.txt
index 88e9c262a0ee..7d59ef6d3e03 100644
--- a/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3-el.txt
+++ b/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3-el.txt
@@ -165,6 +165,8 @@
0x10 0x28 0x00 0x00 # CHECK: mfhi $5
0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
0x12 0x28 0x00 0x00 # CHECK: mflo $5
+0x25 0x78 0xe0 0x03 # CEHCK: move $15, $ra
+0x2d 0x78 0xe0 0x03 # CEHCK: move $15, $ra
0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
diff --git a/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3.txt b/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3.txt
index 82405f357bf4..31b9f66bce12 100644
--- a/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3.txt
+++ b/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3.txt
@@ -1,12 +1,32 @@
# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r3 | FileCheck %s
# CHECK: .text
0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
0x00 0x00 0x01 0xcf # CHECK: sync 7
0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
0x00 0x00 0x28 0x10 # CHECK: mfhi $5
0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x01 0x0f 0xbb # CHECK: dsra $1, $1, 30
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
@@ -14,20 +34,35 @@
0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
+0x00 0x18 0x1c 0x78 # CHECK: dsll $3, $24, 17
+0x00 0x1c 0x56 0x3a # CHECK: dsrl $10, $gp, 24
0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
0x00 0x21 0x0b 0xfa # CHECK: drotr $1, $1, 15
0x00 0x21 0x0b 0xfe # CHECK: drotr32 $1, $1, 15
0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x2b 0xd0 0x2d # CHECK: daddu $26, $1, $11
0x00 0x2e 0x0b 0xfa # CHECK: drotr $1, $14, 15
0x00 0x2e 0x0b 0xfe # CHECK: drotr32 $1, $14, 15
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
+0x00 0x3b 0xa1 0xba # CHECK: drotr $20, $27, 6
0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -35,37 +70,106 @@
0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xb7 0xc0 0x56 # CHECK: drotrv $24, $23, $5
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
0x00 0xe0 0x00 0x08 # CHECK: jr $7
0x00 0xe0 0x00 0x11 # CHECK: mthi $7
0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero
0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x38 0x00 0x1f # CHECK: ddivu $zero, $9, $24
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7a 0x00 0x1c # CHECK: dmult $11, $26
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
0x01 0xee 0x08 0x56 # CHECK: drotrv $1, $14, $15
0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
+0x02 0xea 0xe0 0x16 # CHECK: dsrlv $gp, $10, $23
+0x02 0xed 0x00 0x1d # CHECK: dmultu $23, $13
+0x03 0x1b 0xe0 0x14 # CHECK: dsllv $gp, $27, $24
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
+0x03 0x56 0x00 0x1e # CHECK: ddiv $zero, $26, $22
+0x03 0x78 0xe0 0x2f # CHECK: dsubu $gp, $27, $24
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xc1 0x08 0x17 # CHECK: dsrav $1, $1, $fp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x03 0xe0 0x78 0x25 # CHECK: move $15, $ra
+0x03 0xe0 0x78 0x2d # CHECK: move $15, $ra
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x7f 0x47 0xc0 # CHECK: synci 18368($3)
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
0x0c 0x00 0x01 0x4c # CHECK: jal 1328
0x10 0x00 0x01 0x4c # CHECK: b 1332
0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -73,14 +177,34 @@
0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3a 0x00 0x3a 0x21 # CHECK: xori $zero, $16, 14881
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
+0x3c 0x01 0x00 0x01 # CHECK: lui $1, 1
0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x3c 0x1f 0x00 0x01 # CHECK: lui $ra, 1
0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
0x40 0x38 0x50 0x00 # CHECK: dmfc0 $24, $10, 0
0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
0x40 0xa4 0x50 0x00 # CHECK: dmtc0 $4, $10, 0
@@ -88,18 +212,35 @@
0x41 0x60 0x60 0x20 # CHECK: ei
0x41 0x6e 0x60 0x20 # CHECK: ei $14
0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret
+0x42 0x00 0x00 0x20 # CHECK: wait
0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
+0x44 0x22 0x70 0x00 # CHECK: dmfc1 $2, $f14
0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
+0x44 0xb7 0x28 0x00 # CHECK: dmtc1 $23, $f5
0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
@@ -113,8 +254,20 @@
0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x46 0xa5 # CHECK: cvt.l.s $f26, $f8
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -133,11 +286,20 @@
0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -148,7 +310,15 @@
0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -168,19 +338,44 @@
0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x48 0x20 0x50 0x00 # CHECK: dmfc2 $zero, $10, 0
+0x48 0xa4 0x50 0x00 # CHECK: dmtc2 $4, $10, 0
+0x4d 0x0c 0xe0 0x21 # CHECK: madd.d $f0, $f8, $f28, $f12
+0x4d 0xbb 0x0d 0xe0 # CHECK: madd.s $f23, $f13, $f1, $f27
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
+0x4d 0x42 0x00 0x81 # CHECK: ldxc1 $f2, $2($10)
+0x4d 0xbb 0x60 0x0d # CHECK: suxc1 $f12, $27($13)
+0x4d 0xca 0x58 0x09 # CHECK: sdxc1 $f11, $10($14)
0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
+0x4e 0xb6 0x04 0xc5 # CHECK: luxc1 $f19, $22($21)
0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
+0x4f 0x4c 0x98 0x08 # CHECK: swxc1 $f19, $12($26)
+0x4f 0xd1 0x03 0x00 # CHECK: lwxc1 $f12, $17($fp)
0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
@@ -194,6 +389,8 @@
0x65 0xce 0x11 0xea # CHECK: daddiu $14, $14, 4586
0x66 0x73 0x69 0x3f # CHECK: daddiu $19, $19, 26943
0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
+0x67 0x4b 0x7c 0xcd # CHECK: daddiu $11, $26, 31949
+0x67 0xbd 0xff 0xe0 # CHECK: daddiu $sp, $sp, -32
0x6b 0x18 0xef 0xb9 # CHECK: ldl $24, -4167($24)
0x6e 0x8e 0x89 0x6a # CHECK: ldr $14, -30358($20)
0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
@@ -201,40 +398,86 @@
0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276
0x70 0xd2 0x90 0x25 # CHECK: dclo $18, $6
0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
+0x71 0x3a 0xd0 0x24 # CHECK: dclz $26, $9
+0x73 0x09 0x48 0x25 # CHECK: dclo $9, $24
0x73 0x30 0x80 0x24 # CHECK: dclz $16, $25
0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x05 0xe8 0x3b # CHECK: .set push
+ # CHECK: .set mips32r2
+ # CHECK: rdhwr $5, $29
+ # CHECK: .set pop
0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
0x7c 0x0e 0x18 0xa4 # CHECK: dsbh $3, $14
+0x7c 0x0e 0x19 0x64 # CHECK: dshd $3, $14
+0x7c 0x1c 0x38 0xa4 # CHECK: dsbh $7, $gp
0x7c 0x1d 0x11 0x64 # CHECK: dshd $2, $sp
0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
+0x7f 0x87 0xf7 0x43 # CHECK: dext $7, $gp, 29, 31
+0x7f 0x94 0x7b 0xc7 # CHECK: dins $20, $gp, 15, 1
0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
+0x8c 0x3b 0xc4 0xcd # CHECK: lw $27, -15155($1)
0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
+0x9c 0x63 0xf9 0x2e # CHECK: lwu $3, -1746($3)
0x9c 0x73 0xa1 0xea # CHECK: lwu $19, -24086($3)
0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
+0xac 0x3a 0xc4 0xc9 # CHECK: sw $26, -15159($1)
0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)
0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)
0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
+0xd9 0x03 0x23 0xca # CHECK: ldc2 $3, 9162($8)
+0xdc 0x1a 0x0f 0x76 # CHECK: ld $26, 3958($zero)
0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xf8 0xe9 0x23 0xc6 # CHECK: sdc2 $9, 9158($7)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
+0xfc 0x06 0x45 0x67 # CHECK: sd $6, 17767($zero)
0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips64r3/valid-xfail.txt b/test/MC/Disassembler/Mips/mips64r3/valid-xfail.txt
new file mode 100644
index 000000000000..4b85968a5370
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64r3/valid-xfail.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64r3 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0x7c 0x48 0xc7 0x00 # CHECK: ext $8, $2, 28, 25
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips64r5/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips64r5/invalid-xfail.txt
new file mode 100644
index 000000000000..3ca183b2d31e
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64r5/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64r5 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5-el.txt b/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5-el.txt
index bd709d228794..ee6ad1c71945 100644
--- a/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5-el.txt
+++ b/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5-el.txt
@@ -165,6 +165,8 @@
0x10 0x28 0x00 0x00 # CHECK: mfhi $5
0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
0x12 0x28 0x00 0x00 # CHECK: mflo $5
+0x25 0x78 0xe0 0x03 # CEHCK: move $15, $ra
+0x2d 0x78 0xe0 0x03 # CEHCK: move $15, $ra
0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
diff --git a/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5.txt b/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5.txt
index 1b30144acab4..1fa0e629b375 100644
--- a/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5.txt
+++ b/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5.txt
@@ -1,12 +1,32 @@
# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r5 | FileCheck %s
# CHECK: .text
0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
0x00 0x00 0x01 0xcf # CHECK: sync 7
0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
0x00 0x00 0x28 0x10 # CHECK: mfhi $5
0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x01 0x0f 0xbb # CHECK: dsra $1, $1, 30
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
@@ -14,20 +34,35 @@
0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
+0x00 0x18 0x1c 0x78 # CHECK: dsll $3, $24, 17
+0x00 0x1c 0x56 0x3a # CHECK: dsrl $10, $gp, 24
0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
0x00 0x21 0x0b 0xfa # CHECK: drotr $1, $1, 15
0x00 0x21 0x0b 0xfe # CHECK: drotr32 $1, $1, 15
0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x2b 0xd0 0x2d # CHECK: daddu $26, $1, $11
0x00 0x2e 0x0b 0xfa # CHECK: drotr $1, $14, 15
0x00 0x2e 0x0b 0xfe # CHECK: drotr32 $1, $14, 15
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
+0x00 0x3b 0xa1 0xba # CHECK: drotr $20, $27, 6
0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -35,37 +70,106 @@
0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xb7 0xc0 0x56 # CHECK: drotrv $24, $23, $5
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
0x00 0xe0 0x00 0x08 # CHECK: jr $7
0x00 0xe0 0x00 0x11 # CHECK: mthi $7
0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero
0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x38 0x00 0x1f # CHECK: ddivu $zero, $9, $24
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7a 0x00 0x1c # CHECK: dmult $11, $26
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
0x01 0xee 0x08 0x56 # CHECK: drotrv $1, $14, $15
0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
+0x02 0xea 0xe0 0x16 # CHECK: dsrlv $gp, $10, $23
+0x02 0xed 0x00 0x1d # CHECK: dmultu $23, $13
+0x03 0x1b 0xe0 0x14 # CHECK: dsllv $gp, $27, $24
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
+0x03 0x56 0x00 0x1e # CHECK: ddiv $zero, $26, $22
+0x03 0x78 0xe0 0x2f # CHECK: dsubu $gp, $27, $24
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xc1 0x08 0x17 # CHECK: dsrav $1, $1, $fp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x03 0xe0 0x78 0x25 # CHECK: move $15, $ra
+0x03 0xe0 0x78 0x2d # CHECK: move $15, $ra
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x7f 0x47 0xc0 # CHECK: synci 18368($3)
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
0x0c 0x00 0x01 0x4c # CHECK: jal 1328
0x10 0x00 0x01 0x4c # CHECK: b 1332
0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -73,14 +177,33 @@
0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
+0x3c 0x01 0x00 0x01 # CHECK: lui $1, 1
0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x3c 0x1f 0x00 0x01 # CHECK: lui $ra, 1
0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
0x40 0x38 0x50 0x00 # CHECK: dmfc0 $24, $10, 0
0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
0x40 0xa4 0x50 0x00 # CHECK: dmtc0 $4, $10, 0
@@ -88,18 +211,35 @@
0x41 0x60 0x60 0x20 # CHECK: ei
0x41 0x6e 0x60 0x20 # CHECK: ei $14
0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret
+0x42 0x00 0x00 0x20 # CHECK: wait
0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
+0x44 0x22 0x70 0x00 # CHECK: dmfc1 $2, $f14
0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
+0x44 0xb7 0x28 0x00 # CHECK: dmtc1 $23, $f5
0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
@@ -113,8 +253,20 @@
0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x46 0xa5 # CHECK: cvt.l.s $f26, $f8
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -133,11 +285,20 @@
0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -148,7 +309,15 @@
0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -165,22 +334,48 @@
0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x42 0x00 0x00 0x58 # CHECK: eretnc
0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x48 0x20 0x50 0x00 # CHECK: dmfc2 $zero, $10, 0
+0x48 0xa4 0x50 0x00 # CHECK: dmtc2 $4, $10, 0
+0x4d 0x0c 0xe0 0x21 # CHECK: madd.d $f0, $f8, $f28, $f12
+0x4d 0xbb 0x0d 0xe0 # CHECK: madd.s $f23, $f13, $f1, $f27
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
+0x4d 0x42 0x00 0x81 # CHECK: ldxc1 $f2, $2($10)
+0x4d 0xbb 0x60 0x0d # CHECK: suxc1 $f12, $27($13)
+0x4d 0xca 0x58 0x09 # CHECK: sdxc1 $f11, $10($14)
0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
+0x4e 0xb6 0x04 0xc5 # CHECK: luxc1 $f19, $22($21)
0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
+0x4f 0x4c 0x98 0x08 # CHECK: swxc1 $f19, $12($26)
+0x4f 0xd1 0x03 0x00 # CHECK: lwxc1 $f12, $17($fp)
0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
@@ -194,6 +389,8 @@
0x65 0xce 0x11 0xea # CHECK: daddiu $14, $14, 4586
0x66 0x73 0x69 0x3f # CHECK: daddiu $19, $19, 26943
0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
+0x67 0x4b 0x7c 0xcd # CHECK: daddiu $11, $26, 31949
+0x67 0xbd 0xff 0xe0 # CHECK: daddiu $sp, $sp, -32
0x6b 0x18 0xef 0xb9 # CHECK: ldl $24, -4167($24)
0x6e 0x8e 0x89 0x6a # CHECK: ldr $14, -30358($20)
0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
@@ -201,40 +398,87 @@
0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276
0x70 0xd2 0x90 0x25 # CHECK: dclo $18, $6
0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
+0x71 0x3a 0xd0 0x24 # CHECK: dclz $26, $9
+0x73 0x09 0x48 0x25 # CHECK: dclo $9, $24
0x73 0x30 0x80 0x24 # CHECK: dclz $16, $25
0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x05 0xe8 0x3b # CHECK: .set push
+ # CHECK: .set mips32r2
+ # CHECK: rdhwr $5, $29
+ # CHECK: .set pop
0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
0x7c 0x0e 0x18 0xa4 # CHECK: dsbh $3, $14
+0x7c 0x0e 0x19 0x64 # CHECK: dshd $3, $14
+0x7c 0x1c 0x38 0xa4 # CHECK: dsbh $7, $gp
0x7c 0x1d 0x11 0x64 # CHECK: dshd $2, $sp
0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
+0x7f 0x87 0xf7 0x43 # CHECK: dext $7, $gp, 29, 31
+0x7f 0x94 0x7b 0xc7 # CHECK: dins $20, $gp, 15, 1
0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
+0x8c 0x3b 0xc4 0xcd # CHECK: lw $27, -15155($1)
0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
+0x9c 0x63 0xf9 0x2e # CHECK: lwu $3, -1746($3)
0x9c 0x73 0xa1 0xea # CHECK: lwu $19, -24086($3)
0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
+0xac 0x3a 0xc4 0xc9 # CHECK: sw $26, -15159($1)
0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)
0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)
0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x07 0x34 0x20 # CHECK: ldc2 $7, 13344($zero)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
+0xd9 0x03 0x23 0xca # CHECK: ldc2 $3, 9162($8)
+0xdc 0x1a 0x0f 0x76 # CHECK: ld $26, 3958($zero)
0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xf8 0xe9 0x23 0xc6 # CHECK: sdc2 $9, 9158($7)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
+0xfc 0x06 0x45 0x67 # CHECK: sd $6, 17767($zero)
0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips64r5/valid-xfail.txt b/test/MC/Disassembler/Mips/mips64r5/valid-xfail.txt
new file mode 100644
index 000000000000..e1fc42c94380
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64r5/valid-xfail.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64r5 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0x7c 0x48 0xc7 0x00 # CHECK: ext $8, $2, 28, 25
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt b/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt
index 157e33593e37..08f5e04ab4fa 100644
--- a/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt
+++ b/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt
@@ -91,7 +91,7 @@
0x00 0x60 0x7e 0x41 # CHECK: di $fp
0x9a 0x10 0x64 0x00 # CHECK: div $2, $3, $4
0x9b 0x10 0x64 0x00 # CHECK: divu $2, $3, $4
-0xd5 0x10 0x64 0x00 # CHECK: dlsa $2, $3, $4, 3
+0xd5 0x10 0x64 0x00 # CHECK: dlsa $2, $3, $4, 4
0x00 0x50 0x38 0x40 # CHECK: dmfc0 $24, $10, 0
0xde 0x10 0x64 0x00 # CHECK: dmod $2, $3, $4
0xdf 0x10 0x64 0x00 # CHECK: dmodu $2, $3, $4
@@ -111,7 +111,7 @@
0x48 0x3c 0x58 0xec # CHECK: ldpc $2, 123456
0xb6 0xb3 0x42 0x7e # CHECK: ll $2, -153($18)
0x37 0x38 0xe0 0x7f # CHECK: lld $zero, 112($ra)
-0xc5 0x10 0x64 0x00 # CHECK: lsa $2, $3, $4, 3
+0xc5 0x10 0x64 0x00 # CHECK: lsa $2, $3, $4, 4
0xb7 0x34 0x52 0x49 # CHECK: lwc2 $18, -841($6)
0x43 0x00 0x48 0xec # CHECK: lwpc $2, 268
0x43 0x00 0x50 0xec # CHECK: lwupc $2, 268
@@ -128,6 +128,8 @@
0x1e 0x10 0x04 0x46 # CHECK: mina.s $f0, $f2, $f4
0xda 0x10 0x64 0x00 # CHECK: mod $2, $3, $4
0xdb 0x10 0x64 0x00 # CHECK: modu $2, $3, $4
+0x25 0x78 0xe0 0x03 # CHECK: move $15, $ra
+0x2d 0x78 0xe0 0x03 # CHECK: move $15, $ra
0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
0x99 0x18 0x24 0x46 # CHECK: msubf.d $f2, $f3, $f4
0x99 0x18 0x04 0x46 # CHECK: msubf.s $f2, $f3, $f4
diff --git a/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt b/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt
index 45379d9c8988..7fa27a7c5420 100644
--- a/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt
+++ b/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt
@@ -18,8 +18,8 @@
0x00 0x64 0x10 0x9d # CHECK: dmulu $2, $3, $4
0x00 0x64 0x10 0x9e # CHECK: ddiv $2, $3, $4
0x00 0x64 0x10 0x9f # CHECK: ddivu $2, $3, $4
-0x00 0x64 0x10 0xc5 # CHECK: lsa $2, $3, $4, 3
-0x00 0x64 0x10 0xd5 # CHECK: dlsa $2, $3, $4, 3
+0x00 0x64 0x10 0xc5 # CHECK: lsa $2, $3, $4, 4
+0x00 0x64 0x10 0xd5 # CHECK: dlsa $2, $3, $4, 4
0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4
0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4
0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4
@@ -45,6 +45,8 @@
0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
0x03 0x20 0x80 0x52 # CHECK: dclz $16, $25
0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
+0x03 0xe0 0x78 0x25 # CHECK: move $15, $ra
+0x03 0xe0 0x78 0x2d # CHECK: move $15, $ra
0x04 0x11 0x14 0x9b # CHECK: bal 21104
0x04 0x66 0x56 0x78 # CHECK: dahi $3, 22136
0x04 0x7e 0xab 0xcd # CHECK: dati $3, -21555
@@ -143,6 +145,7 @@
0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4
0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4
0x46 0xa4 0x18 0x8f # CHECK: cmp.sule.d $f2, $f3, $f4
+0x42 0x00 0x00 0x58 # CHECK: eretnc
# FIXME: The encode/decode functions are not inverses of each other.
# The immediate should be 8 but the disassembler currently emits 12
0x49 0x20 0x00 0x02 # CHECK: bc2eqz $0, 12
diff --git a/test/MC/Disassembler/Mips/msa/test_elm.txt b/test/MC/Disassembler/Mips/msa/test_elm.txt
index 832587b23412..e7322858b8c9 100644
--- a/test/MC/Disassembler/Mips/msa/test_elm.txt
+++ b/test/MC/Disassembler/Mips/msa/test_elm.txt
@@ -5,7 +5,6 @@
0x78 0xb1 0x2d 0x99 # CHECK: copy_s.w $22, $w5[1]
0x78 0xc4 0xa5 0x99 # CHECK: copy_u.b $22, $w20[4]
0x78 0xe0 0x25 0x19 # CHECK: copy_u.h $20, $w4[0]
-0x78 0xf2 0x6f 0x99 # CHECK: copy_u.w $fp, $w13[2]
0x78 0x04 0xe8 0x19 # CHECK: sldi.b $w0, $w29[4]
0x78 0x20 0x8a 0x19 # CHECK: sldi.h $w8, $w17[0]
0x78 0x32 0xdd 0x19 # CHECK: sldi.w $w20, $w27[2]
diff --git a/test/MC/Disassembler/Mips/msa/test_elm_msa64.txt b/test/MC/Disassembler/Mips/msa/test_elm_msa64.txt
index 70c831ac2743..bd4e64faa0c2 100644
--- a/test/MC/Disassembler/Mips/msa/test_elm_msa64.txt
+++ b/test/MC/Disassembler/Mips/msa/test_elm_msa64.txt
@@ -1,6 +1,3 @@
# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r2 -mattr=+msa | FileCheck %s
-# CHECK: copy_s.d $19, $w31[0]
-0x78 0xb8 0xfc 0xd9
-# CHECK: copy_u.d $18, $w29[1]
-0x78 0xf9 0xec 0x99
+0x78 0xb8 0xfc 0xd9 # CHECK: copy_s.d $19, $w31[0]
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
index f235c242fbce..74023340d500 100644
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
@@ -508,10 +508,10 @@
# CHECK: extsh. 2, 3
0x7c 0x62 0x07 0x35
-# CHECK: cntlz 2, 3
+# CHECK: cntlzw 2, 3
0x7c 0x62 0x00 0x34
-# CHECK: cntlz. 2, 3
+# CHECK: cntlzw. 2, 3
0x7c 0x62 0x00 0x35
# CHECK: popcntw 2, 3
diff --git a/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt b/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt
index f154e00ff51c..9ddc286d8aaa 100644
--- a/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt
@@ -508,10 +508,10 @@
# CHECK: extsh. 2, 3
0x35 0x07 0x62 0x7c
-# CHECK: cntlz 2, 3
+# CHECK: cntlzw 2, 3
0x34 0x00 0x62 0x7c
-# CHECK: cntlz. 2, 3
+# CHECK: cntlzw. 2, 3
0x35 0x00 0x62 0x7c
# CHECK: popcntw 2, 3
diff --git a/test/MC/Disassembler/PowerPC/vsx.txt b/test/MC/Disassembler/PowerPC/vsx.txt
index 6f4ba6f6b9ac..37fd17b015ab 100644
--- a/test/MC/Disassembler/PowerPC/vsx.txt
+++ b/test/MC/Disassembler/PowerPC/vsx.txt
@@ -57,6 +57,9 @@
# CHECK: xscvdpsp 7, 27
0xf0 0xe0 0xdc 0x24
+# CHECK: xscvdpspn 7, 27
+0xf0 0xe0 0xdc 0x2c
+
# CHECK: xscvdpsxds 7, 27
0xf0 0xe0 0xdd 0x60
@@ -72,9 +75,18 @@
# CHECK: xscvspdp 7, 27
0xf0 0xe0 0xdd 0x24
+# CHECK: xscvspdpn 7, 27
+0xf0 0xe0 0xdd 0x2c
+
+# CHECK: xscvsxdsp 7, 27
+0xf0 0xe0 0xdc 0xe0
+
# CHECK: xscvsxddp 7, 27
0xf0 0xe0 0xdd 0xe0
+# CHECK: xscvuxdsp 7, 27
+0xf0 0xe0 0xdc 0xa0
+
# CHECK: xscvuxddp 7, 27
0xf0 0xe0 0xdd 0xa0
diff --git a/test/MC/Disassembler/Sparc/sparc-mem.txt b/test/MC/Disassembler/Sparc/sparc-mem.txt
index 5f8886ef8b76..04a0365cc7c1 100644
--- a/test/MC/Disassembler/Sparc/sparc-mem.txt
+++ b/test/MC/Disassembler/Sparc/sparc-mem.txt
@@ -221,3 +221,27 @@
# CHECK: swapa [%g1] 131, %o2
0xd4 0xf8 0x50 0x60
+
+# CHECK: ldd [%i0+%l6], %o2
+0xd4 0x1e 0x00 0x16
+
+# CHECK: ldd [%i0+32], %o2
+0xd4 0x1e 0x20 0x20
+
+# CHECK: ldd [%g1], %o2
+0xd4 0x18 0x60 0x00
+
+# CHECK: ldd [%g1], %o2
+0xd4 0x18 0x40 0x00
+
+# CHECK: std %o2, [%i0+%l6]
+0xd4 0x3e 0x00 0x16
+
+# CHECK: std %o2, [%i0+32]
+0xd4 0x3e 0x20 0x20
+
+# CHECK: std %o2, [%g1]
+0xd4 0x38 0x60 0x00
+
+# CHECK: std %o2, [%g1]
+0xd4 0x38 0x40 0x00
diff --git a/test/MC/Disassembler/Sparc/sparc-v9.txt b/test/MC/Disassembler/Sparc/sparc-v9.txt
new file mode 100644
index 000000000000..b8ca01ce04ee
--- /dev/null
+++ b/test/MC/Disassembler/Sparc/sparc-v9.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --disassemble %s -triple=sparcv9-unknown-linux | FileCheck %s
+
+# CHECK: popc %g1, %g2
+0x85 0x70 0x00 0x01
diff --git a/test/MC/Disassembler/SystemZ/insns.txt b/test/MC/Disassembler/SystemZ/insns.txt
index 9d3f2b08743a..17c3a45b3e41 100644
--- a/test/MC/Disassembler/SystemZ/insns.txt
+++ b/test/MC/Disassembler/SystemZ/insns.txt
@@ -7537,7 +7537,79 @@
# CHECK: stcy %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x72
-# CHECK: stcy %r0, -1
+# CHECK: stck 0
+0xb2 0x05 0x00 0x00
+
+# CHECK: stck 0(%r1)
+0xb2 0x05 0x10 0x00
+
+#CHECK: stck 0(%r15)
+0xb2 0x05 0xf0 0x00
+
+#CHECK: stck 4095
+0xb2 0x05 0x0f 0xff
+
+#CHECK: stck 4095(%r1)
+0xb2 0x05 0x1f 0xff
+
+#CHECK: stck 4095(%r15)
+0xb2 0x05 0xff 0xff
+
+# CHECK: stckf 0
+0xb2 0x7c 0x00 0x00
+
+# CHECK: stckf 0(%r1)
+0xb2 0x7c 0x10 0x00
+
+#CHECK: stckf 0(%r15)
+0xb2 0x7c 0xf0 0x00
+
+#CHECK: stckf 4095
+0xb2 0x7c 0x0f 0xff
+
+#CHECK: stckf 4095(%r1)
+0xb2 0x7c 0x1f 0xff
+
+#CHECK: stckf 4095(%r15)
+0xb2 0x7c 0xff 0xff
+
+# CHECK: stcke 0
+0xb2 0x78 0x00 0x00
+
+# CHECK: stcke 0(%r1)
+0xb2 0x78 0x10 0x00
+
+#CHECK: stcke 0(%r15)
+0xb2 0x78 0xf0 0x00
+
+#CHECK: stcke 4095
+0xb2 0x78 0x0f 0xff
+
+#CHECK: stcke 4095(%r1)
+0xb2 0x78 0x1f 0xff
+
+#CHECK: stcke 4095(%r15)
+0xb2 0x78 0xff 0xff
+
+# CHECK: stfle 0
+0xb2 0xb0 0x00 0x00
+
+# CHECK: stfle 0(%r1)
+0xb2 0xb0 0x10 0x00
+
+#CHECK: stfle 0(%r15)
+0xb2 0xb0 0xf0 0x00
+
+#CHECK: stfle 4095
+0xb2 0xb0 0x0f 0xff
+
+#CHECK: stfle 4095(%r1)
+0xb2 0xb0 0x1f 0xff
+
+#CHECK: stfle 4095(%r15)
+0xb2 0xb0 0xff 0xff
+
+# CHECK: stcy %r0, -1
0xe3 0x00 0x0f 0xff 0xff 0x72
# CHECK: stcy %r0, 0
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index 065b2a57c844..13e36df002a4 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -302,6 +302,117 @@
# CHECK: movq %rax, 1515870810
0x67, 0x48 0xa3 0x5a 0x5a 0x5a 0x5a
+# CHECK: callq -32769
+0x66 0xe8 0xff 0x7f 0xff 0xff
+
+# CHECK: callq -32769
+0x66 0x66 0x48 0xe8 0xff 0x7f 0xff 0xff
+
+# CHECK: jmp -32769
+0xe9 0xff 0x7f 0xff 0xff
+
+# CHECK: jmp -32769
+0x66 0xe9 0xff 0x7f 0xff 0xff
+
+# CHECK: jmp -32769
+0x66 0x66 0x48 0xe9 0xff 0x7f 0xff 0xff
+
+# CHECK: jb -32769
+0x0f 0x82 0xff 0x7f 0xff 0xff
+
+# CHECK: jb -32769
+0x66 0x0f 0x82 0xff 0x7f 0xff 0xff
+
+# CHECK: jae -32769
+0x0f 0x83 0xff 0x7f 0xff 0xff
+
+# CHECK: jae -32769
+0x66 0x0f 0x83 0xff 0x7f 0xff 0xff
+
+# CHECK: je -32769
+0x0f 0x84 0xff 0x7f 0xff 0xff
+
+# CHECK: je -32769
+0x66 0x0f 0x84 0xff 0x7f 0xff 0xff
+
+# CHECK: jne -32769
+0x0f 0x85 0xff 0x7f 0xff 0xff
+
+# CHECK: jne -32769
+0x66 0x0f 0x85 0xff 0x7f 0xff 0xff
+
+# CHECK: jbe -32769
+0x0f 0x86 0xff 0x7f 0xff 0xff
+
+# CHECK: jbe -32769
+0x66 0x0f 0x86 0xff 0x7f 0xff 0xff
+
+# CHECK: ja -32769
+0x0f 0x87 0xff 0x7f 0xff 0xff
+
+# CHECK: ja -32769
+0x66 0x0f 0x87 0xff 0x7f 0xff 0xff
+
+# CHECK: js -32769
+0x0f 0x88 0xff 0x7f 0xff 0xff
+
+# CHECK: js -32769
+0x66 0x0f 0x88 0xff 0x7f 0xff 0xff
+
+# CHECK: jns -32769
+0x0f 0x89 0xff 0x7f 0xff 0xff
+
+# CHECK: jns -32769
+0x66 0x0f 0x89 0xff 0x7f 0xff 0xff
+
+# CHECK: jp -32769
+0x0f 0x8a 0xff 0x7f 0xff 0xff
+
+# CHECK: jp -32769
+0x66 0x0f 0x8a 0xff 0x7f 0xff 0xff
+
+# CHECK: jnp -32769
+0x0f 0x8b 0xff 0x7f 0xff 0xff
+
+# CHECK: jnp -32769
+0x66 0x0f 0x8b 0xff 0x7f 0xff 0xff
+
+# CHECK: jl -32769
+0x0f 0x8c 0xff 0x7f 0xff 0xff
+
+# CHECK: jl -32769
+0x66 0x0f 0x8c 0xff 0x7f 0xff 0xff
+
+# CHECK: jge -32769
+0x0f 0x8d 0xff 0x7f 0xff 0xff
+
+# CHECK: jge -32769
+0x66 0x0f 0x8d 0xff 0x7f 0xff 0xff
+
+# CHECK: jle -32769
+0x0f 0x8e 0xff 0x7f 0xff 0xff
+
+# CHECK: jle -32769
+0x66 0x0f 0x8e 0xff 0x7f 0xff 0xff
+
+# CHECK: jg -32769
+0x0f 0x8f 0xff 0x7f 0xff 0xff
+
+# CHECK: jg -32769
+0x66 0x0f 0x8f 0xff 0x7f 0xff 0xff
+
+# CHECK: lcallw *-32769(%rip)
+0x66 0xff 0x1d 0xff 0x7f 0xff 0xff
+
+# CHECK: ljmpw *-32769(%rip)
+0x66 0xff 0x2d 0xff 0x7f 0xff 0xff
+
+# CHECK: psubsb (%rdx), %mm3
+0x0f 0xe8 0x1a
+
+# CHECK: psubsb (%rdx), %xmm3
+0x66 0x0f 0xe8 0x1a
+
# CHECK: addq 255(%rip), %rbx
0x49, 0x03, 0x1d, 0xff, 0x00, 0x00, 0x00
diff --git a/test/MC/ELF/ARM/directive-type-diagnostics.s b/test/MC/ELF/ARM/directive-type-diagnostics.s
new file mode 100644
index 000000000000..b166ffd06aab
--- /dev/null
+++ b/test/MC/ELF/ARM/directive-type-diagnostics.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple arm-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple armeb-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple thumb-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple thumbeb-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+
+ .type symbol 32
+// CHECK: error: expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', '%<type>' or "<type>"
+// CHECK: .type symbol 32
+// CHECK: ^
+
diff --git a/test/MC/ELF/align-zero.s b/test/MC/ELF/align-zero.s
new file mode 100644
index 000000000000..d8087d127d26
--- /dev/null
+++ b/test/MC/ELF/align-zero.s
@@ -0,0 +1,4 @@
+// Test that an alignment of zero is accepted.
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o -
+
+ .align 0
diff --git a/test/MC/ELF/align.s b/test/MC/ELF/align.s
index 43f5b548d9ee..7e78298540f5 100644
--- a/test/MC/ELF/align.s
+++ b/test/MC/ELF/align.s
@@ -1,22 +1,22 @@
// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
// Test that the alignment of rodata doesn't force a alignment of the
-// previous section (.bss)
+// previous section (.text)
nop
.section .rodata,"a",@progbits
.align 8
// CHECK: Section {
-// CHECK: Name: .bss
-// CHECK-NEXT: Type: SHT_NOBITS
+// CHECK: Name: .text
+// CHECK-NEXT: Type:
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
-// CHECK-NEXT: SHF_WRITE
+// CHECK-NEXT: SHF_EXECINSTR
// CHECK-NEXT: ]
-// CHECK-NEXT: Address: 0x0
-// CHECK-NEXT: Offset: 0x44
-// CHECK-NEXT: Size: 0
+// CHECK-NEXT: Address:
+// CHECK-NEXT: Offset:
+// CHECK-NEXT: Size:
// CHECK-NEXT: Link: 0
// CHECK-NEXT: Info: 0
// CHECK-NEXT: AddressAlignment: 4
diff --git a/test/MC/ELF/cfi-adjust-cfa-offset.s b/test/MC/ELF/cfi-adjust-cfa-offset.s
index 398ad54fe75b..7177ccb36501 100644
--- a/test/MC/ELF/cfi-adjust-cfa-offset.s
+++ b/test/MC/ELF/cfi-adjust-cfa-offset.s
@@ -24,7 +24,7 @@ f:
// CHECK: Section {
// CHECK: Index:
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-advance-loc2.s b/test/MC/ELF/cfi-advance-loc2.s
index 133979201273..6a60e52acbac 100644
--- a/test/MC/ELF/cfi-advance-loc2.s
+++ b/test/MC/ELF/cfi-advance-loc2.s
@@ -12,7 +12,7 @@ f:
// CHECK: Section {
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-def-cfa-offset.s b/test/MC/ELF/cfi-def-cfa-offset.s
index 3e4ca57a5161..c4cc6d53e020 100644
--- a/test/MC/ELF/cfi-def-cfa-offset.s
+++ b/test/MC/ELF/cfi-def-cfa-offset.s
@@ -13,7 +13,7 @@ f:
// CHECK: Section {
// CHECK: Index:
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-def-cfa-register.s b/test/MC/ELF/cfi-def-cfa-register.s
index 53174cb2f552..708f6b1496e2 100644
--- a/test/MC/ELF/cfi-def-cfa-register.s
+++ b/test/MC/ELF/cfi-def-cfa-register.s
@@ -9,7 +9,7 @@ f:
// CHECK: Section {
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-def-cfa.s b/test/MC/ELF/cfi-def-cfa.s
index 9706c4da097b..25931b77ec61 100644
--- a/test/MC/ELF/cfi-def-cfa.s
+++ b/test/MC/ELF/cfi-def-cfa.s
@@ -9,7 +9,7 @@ f:
// CHECK: Section {
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-escape.s b/test/MC/ELF/cfi-escape.s
index e93d5f7be5f0..fb019be1252f 100644
--- a/test/MC/ELF/cfi-escape.s
+++ b/test/MC/ELF/cfi-escape.s
@@ -10,7 +10,7 @@ f:
// CHECK: Section {
// CHECK: Index:
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-large-model.s b/test/MC/ELF/cfi-large-model.s
index f4a20c26267c..2fb63d183712 100644
--- a/test/MC/ELF/cfi-large-model.s
+++ b/test/MC/ELF/cfi-large-model.s
@@ -4,7 +4,7 @@
// CHECK: Section {
// CHECK: Index:
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-offset.s b/test/MC/ELF/cfi-offset.s
index 9038def03a64..ea9d0f49915c 100644
--- a/test/MC/ELF/cfi-offset.s
+++ b/test/MC/ELF/cfi-offset.s
@@ -9,7 +9,7 @@ f:
// CHECK: Section {
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-register.s b/test/MC/ELF/cfi-register.s
index be60e2ea2e81..f7c021d06b8b 100644
--- a/test/MC/ELF/cfi-register.s
+++ b/test/MC/ELF/cfi-register.s
@@ -10,7 +10,7 @@ f:
// CHECK: Section {
// CHECK: Index:
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-rel-offset.s b/test/MC/ELF/cfi-rel-offset.s
index 892cda165ba6..89ca4ff148a6 100644
--- a/test/MC/ELF/cfi-rel-offset.s
+++ b/test/MC/ELF/cfi-rel-offset.s
@@ -17,7 +17,7 @@ f:
// CHECK: Section {
// CHECK: Index:
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-rel-offset2.s b/test/MC/ELF/cfi-rel-offset2.s
index 805b0b290bc8..e4d6b58c748f 100644
--- a/test/MC/ELF/cfi-rel-offset2.s
+++ b/test/MC/ELF/cfi-rel-offset2.s
@@ -9,7 +9,7 @@ f:
// CHECK: Section {
// CHECK: Index:
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-remember.s b/test/MC/ELF/cfi-remember.s
index ae112f59ebef..c438ad3d0f3e 100644
--- a/test/MC/ELF/cfi-remember.s
+++ b/test/MC/ELF/cfi-remember.s
@@ -12,7 +12,7 @@ f:
// CHECK: Section {
// CHECK: Index:
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-restore.s b/test/MC/ELF/cfi-restore.s
index 2bc87cf691a1..eb968fecbc4c 100644
--- a/test/MC/ELF/cfi-restore.s
+++ b/test/MC/ELF/cfi-restore.s
@@ -10,7 +10,7 @@ f:
// CHECK: Section {
// CHECK: Index:
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-same-value.s b/test/MC/ELF/cfi-same-value.s
index 7df4cbbb6dd1..7d744ab59638 100644
--- a/test/MC/ELF/cfi-same-value.s
+++ b/test/MC/ELF/cfi-same-value.s
@@ -10,7 +10,7 @@ f:
// CHECK: Section {
// CHECK: Index:
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-signal-frame.s b/test/MC/ELF/cfi-signal-frame.s
index 023311962189..334cdb497b9f 100644
--- a/test/MC/ELF/cfi-signal-frame.s
+++ b/test/MC/ELF/cfi-signal-frame.s
@@ -11,7 +11,7 @@ g:
// CHECK: Section {
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-undefined.s b/test/MC/ELF/cfi-undefined.s
index 9a7012310ade..50c482df79e5 100644
--- a/test/MC/ELF/cfi-undefined.s
+++ b/test/MC/ELF/cfi-undefined.s
@@ -10,7 +10,7 @@ f:
// CHECK: Section {
// CHECK: Index:
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-version.ll b/test/MC/ELF/cfi-version.ll
index 9ea7be4a42b5..6bb9f8ba8fe7 100644
--- a/test/MC/ELF/cfi-version.ll
+++ b/test/MC/ELF/cfi-version.ll
@@ -1,12 +1,14 @@
; RUN: %llc_dwarf %s -o - -dwarf-version 2 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF2
; RUN: %llc_dwarf %s -o - -dwarf-version 3 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF3
; RUN: %llc_dwarf %s -o - -dwarf-version 4 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF4
+; RUN: %llc_dwarf %s -o - -dwarf-version 5 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF4
; .debug_frame is not emitted for targeting Windows x64.
; REQUIRES: debug_frame
+; REQUIRES: default_triple
; Function Attrs: nounwind
-define i32 @foo() #0 {
+define i32 @foo() #0 !dbg !4 {
entry:
%call = call i32 bitcast (i32 (...)* @bar to i32 ()*)(), !dbg !12
%add = add nsw i32 %call, 1, !dbg !12
@@ -22,11 +24,11 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "test.c", directory: "/tmp")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
diff --git a/test/MC/ELF/cfi-window-save.s b/test/MC/ELF/cfi-window-save.s
index fb38ecd85608..01d7ef433781 100644
--- a/test/MC/ELF/cfi-window-save.s
+++ b/test/MC/ELF/cfi-window-save.s
@@ -12,7 +12,7 @@ f:
// CHECK: Section {
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-zero-addr-delta.s b/test/MC/ELF/cfi-zero-addr-delta.s
index 1e5c5e7e5680..495d52c234d7 100644
--- a/test/MC/ELF/cfi-zero-addr-delta.s
+++ b/test/MC/ELF/cfi-zero-addr-delta.s
@@ -16,7 +16,7 @@ f:
// CHECK: Section {
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi.s b/test/MC/ELF/cfi.s
index 10587e030521..1e9a39d2bcfd 100644
--- a/test/MC/ELF/cfi.s
+++ b/test/MC/ELF/cfi.s
@@ -220,7 +220,7 @@ f37:
// CHECK: Section {
// CHECK: Index:
// CHECK: Name: .eh_frame
-// CHECK-NEXT: Type: SHT_PROGBITS
+// CHECK-NEXT: Type: SHT_X86_64_UNWIND
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/comdat-dup-group-name.s b/test/MC/ELF/comdat-dup-group-name.s
index e52f3dcc04d1..e11cba62b402 100644
--- a/test/MC/ELF/comdat-dup-group-name.s
+++ b/test/MC/ELF/comdat-dup-group-name.s
@@ -2,27 +2,27 @@
// Test that we produce two foo sections, each in separate groups
-// CHECK: Index: 5
+// CHECK: Index: 3
// CHECK-NEXT: Name: .group
-// CHECK: Index: 6
+// CHECK: Index: 4
// CHECK-NEXT: Name: .foo
-// CHECK: Index: 7
+// CHECK: Index: 5
// CHECK-NEXT: Name: .group
-// CHECK: Index: 8
+// CHECK: Index: 6
// CHECK-NEXT: Name: .foo
// CHECK: Symbols [
// CHECK: Name: f1
// CHECK-NOT: }
-// CHECK: Section: .group (0x5)
+// CHECK: Section: .group (0x3)
// CHECK: Name: f2
// CHECK-NOT: }
-// CHECK: Section: .group (0x7)
+// CHECK: Section: .group (0x5)
.section .foo,"axG",@progbits,f1,comdat
nop
diff --git a/test/MC/ELF/comdat-reloc.s b/test/MC/ELF/comdat-reloc.s
index 1ea3d1e57efa..bc126f67fee0 100644
--- a/test/MC/ELF/comdat-reloc.s
+++ b/test/MC/ELF/comdat-reloc.s
@@ -16,14 +16,14 @@ world:
// CHECK: Name: .group
// CHECK-NOT: SectionData
// CHECK: SectionData
-// CHECK-NEXT: 0000: 01000000 07000000 08000000
+// CHECK-NEXT: 0000: 01000000 05000000 06000000
-// CHECK: Index: 7
+// CHECK: Index: 5
// CHECK-NEXT: Name: .text.world
// CHECK-NOT: Section {
// CHECK: SHF_GROUP
-// CHECK: Index: 8
+// CHECK: Index: 6
// CHECK-NEXT: Name: .rela.text.world
// CHECK-NOT: Section {
// CHECK: SHF_GROUP
diff --git a/test/MC/ELF/comdat.s b/test/MC/ELF/comdat.s
index 18da17e6118c..5e6fc64bac41 100644
--- a/test/MC/ELF/comdat.s
+++ b/test/MC/ELF/comdat.s
@@ -3,7 +3,7 @@
// Test that we produce the group sections and that they are before the members
// CHECK: Section {
-// CHECK: Index: 5
+// CHECK: Index: 3
// CHECK-NEXT: Name: .group
// CHECK-NEXT: Type: SHT_GROUP
// CHECK-NEXT: Flags [
@@ -16,11 +16,11 @@
// CHECK-NEXT: AddressAlignment: 4
// CHECK-NEXT: EntrySize: 4
// CHECK-NEXT: SectionData (
-// CHECK-NEXT: 0000: 01000000 06000000 07000000
+// CHECK-NEXT: 0000: 01000000 04000000 05000000
// CHECK-NEXT: )
// CHECK-NEXT: }
// CHECK: Section {
-// CHECK: Index: 8
+// CHECK: Index: 6
// CHECK-NEXT: Name: .group
// CHECK-NEXT: Type: SHT_GROUP
// CHECK-NEXT: Flags [
@@ -33,11 +33,11 @@
// CHECK-NEXT: AddressAlignment: 4
// CHECK-NEXT: EntrySize: 4
// CHECK-NEXT: SectionData (
-// CHECK-NEXT: 0000: 01000000 09000000
+// CHECK-NEXT: 0000: 01000000 07000000
// CHECK-NEXT: )
// CHECK-NEXT: }
// CHECK: Section {
-// CHECK: Index: 10
+// CHECK: Index: 8
// CHECK-NEXT: Name: .group
// CHECK-NEXT: Type: SHT_GROUP
// CHECK-NEXT: Flags [
@@ -50,7 +50,7 @@
// CHECK-NEXT: AddressAlignment: 4
// CHECK-NEXT: EntrySize: 4
// CHECK-NEXT: SectionData (
-// CHECK-NEXT: 0000: 01000000 0B000000 0C000000
+// CHECK-NEXT: 0000: 01000000 09000000 0A000000
// CHECK-NEXT: )
// CHECK-NEXT: }
@@ -72,7 +72,7 @@
// CHECK-NEXT: Binding: Local
// CHECK-NEXT: Type: None
// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .group (0x8)
+// CHECK-NEXT: Section: .group
// CHECK-NEXT: }
// CHECK: Symbol {
diff --git a/test/MC/ELF/common-error1.s b/test/MC/ELF/common-error1.s
index a413885b0165..5ea01cddb563 100644
--- a/test/MC/ELF/common-error1.s
+++ b/test/MC/ELF/common-error1.s
@@ -3,4 +3,4 @@
.comm C,4,4
.set A,C
-// CHECK: Common symbol C cannot be used in assignment expr
+// CHECK: Common symbol 'C' cannot be used in assignment expr
diff --git a/test/MC/ELF/common-error2.s b/test/MC/ELF/common-error2.s
index d666feedee6a..93d35bc80d90 100644
--- a/test/MC/ELF/common-error2.s
+++ b/test/MC/ELF/common-error2.s
@@ -3,4 +3,4 @@
.set A,C
.comm C,4,4
-// CHECK: Common symbol C cannot be used in assignment expr
+// CHECK: Common symbol 'C' cannot be used in assignment expr
diff --git a/test/MC/ELF/common2.s b/test/MC/ELF/common2.s
index 26c32a7c8408..bf9f22f90e93 100644
--- a/test/MC/ELF/common2.s
+++ b/test/MC/ELF/common2.s
@@ -1,7 +1,8 @@
// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
-// Test that the common symbols are placed at the end of .bss. In this example
-// it causes .bss to have size 9 instead of 8.
+// Test local common construction.
+// Unlike gas, common symbols are created when found, not at the end of .bss.
+// In this example it causes .bss to have size 8 instead of 9.
.local vimvardict
.comm vimvardict,1,8
@@ -16,7 +17,7 @@
// CHECK: ]
// CHECK-NEXT: Address:
// CHECK-NEXT: Offset:
-// CHECK-NEXT: Size: 9
+// CHECK-NEXT: Size: 8
// CHECK-NEXT: Link:
// CHECK-NEXT: Info:
// CHECK-NEXT: AddressAlignment:
diff --git a/test/MC/ELF/debug-loc.s b/test/MC/ELF/debug-loc.s
index ea8eb3ec70cd..4f1487284231 100644
--- a/test/MC/ELF/debug-loc.s
+++ b/test/MC/ELF/debug-loc.s
@@ -14,7 +14,7 @@
// CHECK-NEXT: Flags [
// CHECK-NEXT: ]
// CHECK-NEXT: Address: 0x0
-// CHECK-NEXT: Offset: 0x44
+// CHECK-NEXT: Offset:
// CHECK-NEXT: Size: 61
// CHECK-NEXT: Link: 0
// CHECK-NEXT: Info: 0
diff --git a/test/MC/ELF/div-by-zero.s b/test/MC/ELF/div-by-zero.s
new file mode 100644
index 000000000000..8c7f77346551
--- /dev/null
+++ b/test/MC/ELF/div-by-zero.s
@@ -0,0 +1,6 @@
+// Check that llvm-mc doesn't crash on division by zero.
+// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s 2> %t
+// RUN: FileCheck -input-file %t %s
+
+// CHECK: expected relocatable expression
+.int 1/0
diff --git a/test/MC/ELF/dot-symbol-assignment.s b/test/MC/ELF/dot-symbol-assignment.s
index 00fe205082be..284bef0f3372 100644
--- a/test/MC/ELF/dot-symbol-assignment.s
+++ b/test/MC/ELF/dot-symbol-assignment.s
@@ -11,6 +11,9 @@ two:
three:
.quad 0xdddddddddddddddd
+ .align 4
+ . = three + 9
+
// CHECK: Section {
// CHECK: Name: .text
// CHECK-NEXT: Type:
@@ -18,5 +21,5 @@ three:
// CHECK: SectionData (
// CHECK-NEXT: 0000: FFFFFFFF FFFFFFFF 00000000 00000000
// CHECK-NEXT: 0010: 00000000 00000000 EEEEEEEE EEEEEEEE
-// CHECK-NEXT: 0020: DDDDDDDD DDDDDDDD
+// CHECK-NEXT: 0020: DDDDDDDD DDDDDDDD 00 |
// CHECK-NEXT: )
diff --git a/test/MC/ELF/empty-twice.ll b/test/MC/ELF/empty-twice.ll
new file mode 100644
index 000000000000..c24bd629c416
--- /dev/null
+++ b/test/MC/ELF/empty-twice.ll
@@ -0,0 +1,6 @@
+; Check that there is no persistent state in the ELF emitter that crashes us
+; when we try to reuse the pass manager
+; RUN: llc -compile-twice -filetype=obj %s -o -
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
diff --git a/test/MC/ELF/empty.s b/test/MC/ELF/empty.s
index 7b686fef3a23..ea88803a31d3 100644
--- a/test/MC/ELF/empty.s
+++ b/test/MC/ELF/empty.s
@@ -10,8 +10,8 @@
// DARWIN-NEXT: Arch: x86_64
// WINDOWS-NEXT: Arch: x86_64
-// Test that like gnu as we create text, data and bss by default. Also test
-// that symtab and strtab are listed.
+// Test that we create text by default. Also test that symtab and strtab are
+// listed.
// CHECK: Section {
// CHECK: Name: .strtab
@@ -20,7 +20,7 @@
// CHECK-NEXT: ]
// CHECK-NEXT: Address: 0x0
// CHECK-NEXT: Offset:
-// CHECK-NEXT: Size: 34
+// CHECK-NEXT: Size: 23
// CHECK-NEXT: Link: 0
// CHECK-NEXT: Info: 0
// CHECK-NEXT: AddressAlignment: 1
@@ -42,36 +42,6 @@
// CHECK-NEXT: EntrySize: 0
// CHECK-NEXT: }
// CHECK: Section {
-// CHECK: Name: .data
-// CHECK-NEXT: Type: SHT_PROGBITS
-// CHECK-NEXT: Flags [
-// CHECK-NEXT: SHF_ALLOC
-// CHECK-NEXT: SHF_WRITE
-// CHECK-NEXT: ]
-// CHECK-NEXT: Address: 0x0
-// CHECK-NEXT: Offset: 0x40
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Link: 0
-// CHECK-NEXT: Info: 0
-// CHECK-NEXT: AddressAlignment: 4
-// CHECK-NEXT: EntrySize: 0
-// CHECK-NEXT: }
-// CHECK: Section {
-// CHECK: Name: .bss
-// CHECK-NEXT: Type: SHT_NOBITS
-// CHECK-NEXT: Flags [
-// CHECK-NEXT: SHF_ALLOC
-// CHECK-NEXT: SHF_WRITE
-// CHECK-NEXT: ]
-// CHECK-NEXT: Address: 0x0
-// CHECK-NEXT: Offset: 0x40
-// CHECK-NEXT: Size: 0
-// CHECK-NEXT: Link: 0
-// CHECK-NEXT: Info: 0
-// CHECK-NEXT: AddressAlignment: 4
-// CHECK-NEXT: EntrySize: 0
-// CHECK-NEXT: }
-// CHECK: Section {
// CHECK: Name: .symtab
// CHECK-NEXT: Type: SHT_SYMTAB
// CHECK-NEXT: Flags [
diff --git a/test/MC/ELF/many-sections-2.s b/test/MC/ELF/many-sections-2.s
index 0077552ef313..2c3e4b87bb89 100644
--- a/test/MC/ELF/many-sections-2.s
+++ b/test/MC/ELF/many-sections-2.s
@@ -114,6 +114,9 @@
gen_sections16384 b\x
.endm
+ .section foo
+ .section bar
+
gen_sections32768 a
gen_sections16384 b
gen_sections8192 c
diff --git a/test/MC/ELF/many-sections-3.s b/test/MC/ELF/many-sections-3.s
index 02d30a60523b..ec198480916c 100644
--- a/test/MC/ELF/many-sections-3.s
+++ b/test/MC/ELF/many-sections-3.s
@@ -102,6 +102,8 @@ gen_sections8 l
gen_sections4 m
.section foo
+ .section foo2
+ .section foo3
.section bar, "a"
a:
diff --git a/test/MC/ELF/many-sections.s b/test/MC/ELF/many-sections.s
index 2db6abb9321b..b1348f3b7c36 100644
--- a/test/MC/ELF/many-sections.s
+++ b/test/MC/ELF/many-sections.s
@@ -103,7 +103,6 @@ gen_sections64 i
gen_sections32 j
gen_sections16 k
gen_sections8 l
- .section foo
- .section bar
+gen_sections4 m
.section zed
.long zed
diff --git a/test/MC/ELF/popsection.s b/test/MC/ELF/popsection.s
index 19f55688a1b2..ace6fac5e4ce 100644
--- a/test/MC/ELF/popsection.s
+++ b/test/MC/ELF/popsection.s
@@ -6,8 +6,8 @@
.popsection
// CHECK: Section {
-// CHECK: Index: 5
-// CHECK-NEXT: Name: foo
+// CHECK: Index:
+// CHECK: Name: foo
// CHECK-NEXT: Type: SHT_PROGBITS
// CHECK-NEXT: Flags [ (0x0)
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/relax-arith.s b/test/MC/ELF/relax-arith.s
index 15e44ebff7ef..56b7c399c441 100644
--- a/test/MC/ELF/relax-arith.s
+++ b/test/MC/ELF/relax-arith.s
@@ -123,3 +123,35 @@ bar:
.section push,"x"
pushw $foo
push $foo
+
+// CHECK: Disassembly of section adc:
+// CHECK-NEXT: adc:
+// CHECK-NEXT: 0: 66 81 d3 00 00 adcw $0, %bx
+// CHECK-NEXT: 5: 66 81 14 25 00 00 00 00 00 00 adcw $0, 0
+// CHECK-NEXT: f: 81 d3 00 00 00 00 adcl $0, %ebx
+// CHECK-NEXT: 15: 81 14 25 00 00 00 00 00 00 00 00 adcl $0, 0
+// CHECK-NEXT: 20: 48 81 d3 00 00 00 00 adcq $0, %rbx
+// CHECK-NEXT: 27: 48 81 14 25 00 00 00 00 00 00 00 00 adcq $0, 0
+ .section adc,"x"
+ adc $foo, %bx
+ adcw $foo, bar
+ adc $foo, %ebx
+ adcl $foo, bar
+ adc $foo, %rbx
+ adcq $foo, bar
+
+// CHECK: Disassembly of section sbb:
+// CHECK-NEXT: sbb:
+// CHECK-NEXT: 0: 66 81 db 00 00 sbbw $0, %bx
+// CHECK-NEXT: 5: 66 81 1c 25 00 00 00 00 00 00 sbbw $0, 0
+// CHECK-NEXT: f: 81 db 00 00 00 00 sbbl $0, %ebx
+// CHECK-NEXT: 15: 81 1c 25 00 00 00 00 00 00 00 00 sbbl $0, 0
+// CHECK-NEXT: 20: 48 81 db 00 00 00 00 sbbq $0, %rbx
+// CHECK-NEXT: 27: 48 81 1c 25 00 00 00 00 00 00 00 00 sbbq $0, 0
+ .section sbb,"x"
+ sbb $foo, %bx
+ sbbw $foo, bar
+ sbb $foo, %ebx
+ sbbl $foo, bar
+ sbb $foo, %rbx
+ sbbq $foo, bar
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
index 6b7e02f03ea0..19efe0976129 100644
--- a/test/MC/ELF/relocation-386.s
+++ b/test/MC/ELF/relocation-386.s
@@ -1,8 +1,11 @@
-// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s --check-prefix=CHECK --check-prefix=I386
+// RUN: llvm-mc -filetype=obj -triple i386-pc-elfiamcu %s -o - | llvm-readobj -r | FileCheck %s --check-prefix=CHECK --check-prefix=IAMCU
// Test that we produce the correct relocation types and that the relocations
// correctly point to the section or the symbol.
+// IAMCU: Format: ELF32-iamcu
+// I386: Format: ELF32-i386
// CHECK: Relocations [
// CHECK-NEXT: Section {{.*}} .rel.text {
// CHECK-NEXT: 0x2 R_386_GOTOFF .Lfoo 0x0
diff --git a/test/MC/ELF/relocation-pc.s b/test/MC/ELF/relocation-pc.s
index ca7addf3a620..85e97f059584 100644
--- a/test/MC/ELF/relocation-pc.s
+++ b/test/MC/ELF/relocation-pc.s
@@ -2,8 +2,9 @@
// Test that we produce the correct relocation.
- loope 0 # R_X86_64_PC8
- jmp -256 # R_X86_64_PC32
+ loope 0 # R_X86_64_PC8
+ jmp -256 # R_X86_64_PC32
+ .word 0x42 - . # R_X86_64_PC16
// CHECK: Section {
// CHECK: Index:
@@ -13,7 +14,7 @@
// CHECK-NEXT: ]
// CHECK-NEXT: Address: 0x0
// CHECK-NEXT: Offset:
-// CHECK-NEXT: Size: 48
+// CHECK-NEXT: Size:
// CHECK-NEXT: Link:
// CHECK-NEXT: Info:
// CHECK-NEXT: AddressAlignment: 8
@@ -21,5 +22,6 @@
// CHECK-NEXT: Relocations [
// CHECK-NEXT: 0x1 R_X86_64_PC8 - 0xFFFFFFFFFFFFFFFF
// CHECK-NEXT: 0x3 R_X86_64_PC32 - 0xFFFFFFFFFFFFFEFC
+// CHECK-NEXT: 0x7 R_X86_64_PC16 - 0x42
// CHECK-NEXT: ]
// CHECK-NEXT: }
diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s
index 34f1a4038131..0fec76792818 100644
--- a/test/MC/ELF/relocation.s
+++ b/test/MC/ELF/relocation.s
@@ -55,6 +55,11 @@ bar:
.quad pr23272_2 - pr23272
.quad pr23272_3 - pr23272
+ .global pr24486
+pr24486:
+ pr24486_alias = pr24486
+ .long pr24486_alias
+
.code16
call pr23771
@@ -94,6 +99,7 @@ bar:
// CHECK-NEXT: 0xD4 R_X86_64_SIZE32 blah 0xFFFFFFFFFFFFFFE0
// CHECK-NEXT: 0xD8 R_X86_64_GOTPCREL foo 0x0
// CHECK-NEXT: 0xDC R_X86_64_PLT32 foo 0x0
-// CHECK-NEXT: 0xF1 R_X86_64_PC16 pr23771 0xFFFFFFFFFFFFFFFE
+// CHECK-NEXT: 0xF0 R_X86_64_32 .text 0xF0
+// CHECK-NEXT: 0xF5 R_X86_64_PC16 pr23771 0xFFFFFFFFFFFFFFFE
// CHECK-NEXT: ]
// CHECK-NEXT: }
diff --git a/test/MC/ELF/section-sym.s b/test/MC/ELF/section-sym.s
index 4a9484d9b779..9e660526b7ad 100644
--- a/test/MC/ELF/section-sym.s
+++ b/test/MC/ELF/section-sym.s
@@ -9,8 +9,8 @@
// The first seciton foo has index 6
// CHECK: Section {
-// CHECK: Index: 6
-// CHECK-NEXT: Name: foo (28)
+// CHECK: Index: 4
+// CHECK-NEXT: Name: foo
// CHECK-NEXT: Type: SHT_PROGBITS (0x1)
// CHECK-NEXT: Flags [ (0x202)
// CHECK-NEXT: SHF_ALLOC (0x2)
@@ -25,8 +25,8 @@
// CHECK-NEXT: EntrySize: 0
// CHECK-NEXT: }
// CHECK: Section {
-// CHECK: Index: 8
-// CHECK-NEXT: Name: foo (28)
+// CHECK: Index: 6
+// CHECK-NEXT: Name: foo
// CHECK-NEXT: Type: SHT_PROGBITS (0x1)
// CHECK-NEXT: Flags [ (0x200)
// CHECK-NEXT: SHF_GROUP (0x200)
@@ -64,22 +64,22 @@
// CHECK-NEXT: Section: Undefined (0x0)
// CHECK-NEXT: }
// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: f1 (57)
+// CHECK-NEXT: Name: f1
// CHECK-NEXT: Value: 0x0
// CHECK-NEXT: Size: 0
// CHECK-NEXT: Binding: Local (0x0)
// CHECK-NEXT: Type: None (0x0)
// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .group (0x5)
+// CHECK-NEXT: Section: .group
// CHECK-NEXT: }
// CHECK-NEXT: Symbol {
-// CHECK-NEXT: Name: f2 (54)
+// CHECK-NEXT: Name: f2
// CHECK-NEXT: Value: 0x0
// CHECK-NEXT: Size: 0
// CHECK-NEXT: Binding: Local (0x0)
// CHECK-NEXT: Type: None (0x0)
// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: .group (0x7)
+// CHECK-NEXT: Section: .group
// CHECK-NEXT: }
// CHECK-NEXT: Symbol {
// CHECK-NEXT: Name: (0)
@@ -88,6 +88,6 @@
// CHECK-NEXT: Binding: Local (0x0)
// CHECK-NEXT: Type: Section (0x3)
// CHECK-NEXT: Other: 0
-// CHECK-NEXT: Section: foo (0x6)
+// CHECK-NEXT: Section: foo (0x4)
// CHECK-NEXT: }
// CHECK-NEXT: ]
diff --git a/test/MC/ELF/section-unique.s b/test/MC/ELF/section-unique.s
index bd15148f6d83..b56666130fdf 100644
--- a/test/MC/ELF/section-unique.s
+++ b/test/MC/ELF/section-unique.s
@@ -26,7 +26,7 @@ g:
// OBJ: Binding: Global
// OBJ: Type: None
// OBJ: Other: 0
-// OBJ: Section: .text (0x5)
+// OBJ: Section: .text (0x3)
// OBJ: }
// OBJ: Symbol {
// OBJ: Name: g
@@ -35,5 +35,5 @@ g:
// OBJ: Binding: Global
// OBJ: Type: None
// OBJ: Other: 0
-// OBJ: Section: .text (0x6)
+// OBJ: Section: .text (0x4)
// OBJ: }
diff --git a/test/MC/ELF/section.s b/test/MC/ELF/section.s
index ab3fe0387400..008c4605552a 100644
--- a/test/MC/ELF/section.s
+++ b/test/MC/ELF/section.s
@@ -123,7 +123,7 @@ bar:
.section .excluded,"e",@progbits
// CHECK: Section {
-// CHECK: Name: .excluded (92)
+// CHECK: Name: .excluded
// CHECK-NEXT: Type: SHT_PROGBITS (0x1)
// CHECK-NEXT: Flags [ (0x80000000)
// CHECK-NEXT: SHF_EXCLUDE (0x80000000)
diff --git a/test/MC/ELF/sleb.s b/test/MC/ELF/sleb.s
index 5cba5829a12b..280b42d8cac8 100644
--- a/test/MC/ELF/sleb.s
+++ b/test/MC/ELF/sleb.s
@@ -1,7 +1,7 @@
// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_32 %s
// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_64 %s
-// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | macho-dump --dump-section-data | FileCheck -check-prefix=MACHO_32 %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | macho-dump --dump-section-data | FileCheck -check-prefix=MACHO_64 %s
+// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=MACHO_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=MACHO_64 %s
.text
foo:
@@ -27,7 +27,11 @@ foo:
// ELF_64: SectionData (
// ELF_64: 0000: 00017F3F 40C000BF 7FFF3F80 4081C000
// ELF_64: )
-// MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// MACHO_32: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
-// MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// MACHO_64: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
+// MACHO_32: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// MACHO_32: SectionData (
+// MACHO_32: 0000: 00017F3F 40C000BF 7FFF3F80 4081C000 |...?@.....?.@...|
+// MACHO_32: )
+// MACHO_64: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// MACHO_64: SectionData (
+// MACHO_64: 0000: 00017F3F 40C000BF 7FFF3F80 4081C000 |...?@.....?.@...|
+// MACHO_64: )
diff --git a/test/MC/ELF/strtab-suffix-opt.s b/test/MC/ELF/strtab-suffix-opt.s
index 96d15005c618..69aa0933c996 100644
--- a/test/MC/ELF/strtab-suffix-opt.s
+++ b/test/MC/ELF/strtab-suffix-opt.s
@@ -16,6 +16,6 @@ foobar:
.Ltmp3:
.size foobar, .Ltmp3-foobar
-// CHECK: Name: bar (19)
-// CHECK: Name: foo (23)
-// CHECK: Name: foobar (16)
+// CHECK: Name: bar (14)
+// CHECK: Name: foo (18)
+// CHECK: Name: foobar (11)
diff --git a/test/MC/ELF/uleb.s b/test/MC/ELF/uleb.s
index 5d203a93f022..ffa84e9021dd 100644
--- a/test/MC/ELF/uleb.s
+++ b/test/MC/ELF/uleb.s
@@ -1,7 +1,7 @@
// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_32 %s
// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_64 %s
-// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | macho-dump --dump-section-data | FileCheck -check-prefix=MACHO_32 %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | macho-dump --dump-section-data | FileCheck -check-prefix=MACHO_64 %s
+// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=MACHO_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=MACHO_64 %s
.text
foo:
@@ -21,7 +21,11 @@ foo:
// ELF_64: SectionData (
// ELF_64: 0000: 00017F80 01FF7F80 8001172A
// ELF_64: )
-// MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// MACHO_32: ('_section_data', '00017f80 01ff7f80 8001172a')
-// MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// MACHO_64: ('_section_data', '00017f80 01ff7f80 8001172a')
+// MACHO_32: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// MACHO_32: SectionData (
+// MACHO_32: 0000: 00017F80 01FF7F80 8001172A |...........*|
+// MACHO_32: )
+// MACHO_64: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// MACHO_64: SectionData (
+// MACHO_64: 0000: 00017F80 01FF7F80 8001172A |...........*|
+// MACHO_64: )
diff --git a/test/MC/Hexagon/asmMap.s b/test/MC/Hexagon/asmMap.s
new file mode 100644
index 000000000000..81bb8f31f02c
--- /dev/null
+++ b/test/MC/Hexagon/asmMap.s
@@ -0,0 +1,608 @@
+#RUN: llvm-mc -triple=hexagon -filetype=obj %s | llvm-objdump -d - | FileCheck %s
+
+# Make sure that the assembler mapped instructions are being handled correctly.
+
+#CHECK: 3c56c000 { memw(r22{{ *}}+{{ *}}#0)=#0
+memw(r22)=#0
+
+#CHECK: 3c23e05f { memh(r3{{ *}}+{{ *}}#0)=#-33
+memh(r3)=#-33
+
+#CHECK: 3c07c012 { memb(r7{{ *}}+{{ *}}#0)=#18
+memb(r7)=#18
+
+#CHECK: 4101c008 { if (p0) r8 = memb(r1{{ *}}+{{ *}}#0)
+if (p0) r8=memb(r1)
+
+#CHECK: 4519d817 { if (!p3) r23 = memb(r25{{ *}}+{{ *}}#0)
+if (!p3) r23=memb(r25)
+
+#CHECK: 412dc002 { if (p0) r2 = memub(r13{{ *}}+{{ *}}#0)
+if (p0) r2=memub(r13)
+
+#CHECK: 453cc01a { if (!p0) r26 = memub(r28{{ *}}+{{ *}}#0)
+if (!p0) r26=memub(r28)
+
+#CHECK: 416bc818 { if (p1) r24 = memuh(r11{{ *}}+{{ *}}#0)
+if (p1) r24=memuh(r11)
+
+#CHECK: 457fc012 { if (!p0) r18 = memuh(r31{{ *}}+{{ *}}#0)
+if (!p0) r18=memuh(r31)
+
+#CHECK: 455dc014 { if (!p0) r20 = memh(r29{{ *}}+{{ *}}#0)
+if (!p0) r20=memh(r29)
+
+#CHECK: 415dc01d { if (p0) r29 = memh(r29{{ *}}+{{ *}}#0)
+if (p0) r29=memh(r29)
+
+#CHECK: 4583c01d { if (!p0) r29 = memw(r3{{ *}}+{{ *}}#0)
+if (!p0) r29=memw(r3)
+
+#CHECK: 419bd01e { if (p2) r30 = memw(r27{{ *}}+{{ *}}#0)
+if (p2) r30=memw(r27)
+
+#CHECK: 90e2c018 { r25:24 = membh(r2{{ *}}+{{ *}}#0)
+r25:24=membh(r2)
+
+#CHECK: 902bc006 { r6 = membh(r11{{ *}}+{{ *}}#0)
+r6=membh(r11)
+
+#CHECK: 90a2c01c { r29:28 = memubh(r2{{ *}}+{{ *}}#0)
+r29:28=memubh(r2)
+
+#CHECK: 906ec00d { r13 = memubh(r14{{ *}}+{{ *}}#0)
+r13=memubh(r14)
+
+#CHECK: 91dac00c { r13:12 = memd(r26{{ *}}+{{ *}}#0)
+r13:12=memd(r26)
+
+#CHECK: 919bc004 { r4 = memw(r27{{ *}}+{{ *}}#0)
+r4=memw(r27)
+
+#CHECK: 914cc005 { r5 = memh(r12{{ *}}+{{ *}}#0)
+r5=memh(r12)
+
+#CHECK: 9176c010 { r16 = memuh(r22{{ *}}+{{ *}}#0)
+r16=memuh(r22)
+
+#CHECK: 910bc017 { r23 = memb(r11{{ *}}+{{ *}}#0)
+r23=memb(r11)
+
+#CHECK: 912bc01b { r27 = memub(r11{{ *}}+{{ *}}#0)
+r27=memub(r11)
+
+#CHECK: 404ede01 { if (p1) memh(r14{{ *}}+{{ *}}#0) = r30
+if (p1) memh(r14)=r30
+
+#CHECK: 4449d900 { if (!p0) memh(r9{{ *}}+{{ *}}#0) = r25
+if (!p0) memh(r9)=r25
+
+#CHECK: 400ecd00 { if (p0) memb(r14{{ *}}+{{ *}}#0) = r13
+if (p0) memb(r14)=r13
+
+#CHECK: 440bcc01 { if (!p1) memb(r11{{ *}}+{{ *}}#0) = r12
+if (!p1) memb(r11)=r12
+
+#CHECK: 41d0d804 { if (p3) r5:4 = memd(r16{{ *}}+{{ *}}#0)
+if (p3) r5:4=memd(r16)
+
+#CHECK: 45d9c00c { if (!p0) r13:12 = memd(r25{{ *}}+{{ *}}#0)
+if (!p0) r13:12=memd(r25)
+
+#CHECK: 385ee06d { if (p3) memw(r30{{ *}}+{{ *}}#0)=#-19
+if (p3) memw(r30)=#-19
+
+#CHECK: 38c6c053 { if (!p2) memw(r6{{ *}}+{{ *}}#0)=#19
+if (!p2) memw(r6)=#19
+
+#CHECK: 381fc034 { if (p1) memb(r31{{ *}}+{{ *}}#0)=#20
+if (p1) memb(r31)=#20
+
+#CHECK: 389dc010 { if (!p0) memb(r29{{ *}}+{{ *}}#0)=#16
+if (!p0) memb(r29)=#16
+
+#CHECK: 3833e019 { if (p0) memh(r19{{ *}}+{{ *}}#0)=#-7
+if (p0) memh(r19)=#-7
+
+#CHECK: 38b7c013 { if (!p0) memh(r23{{ *}}+{{ *}}#0)=#19
+if (!p0) memh(r23)=#19
+
+#CHECK: 4488d401 { if (!p1) memw(r8{{ *}}+{{ *}}#0) = r20
+if (!p1) memw(r8)=r20
+
+#CHECK: 409ddc02 { if (p2) memw(r29{{ *}}+{{ *}}#0) = r28
+if (p2) memw(r29)=r28
+
+#CHECK: 446fc301 { if (!p1) memh(r15{{ *}}+{{ *}}#0) = r3.h
+if (!p1) memh(r15)=r3.h
+
+#CHECK: 406dc201 { if (p1) memh(r13{{ *}}+{{ *}}#0) = r2.h
+if (p1) memh(r13)=r2.h
+
+#CHECK: 40d9c601 { if (p1) memd(r25{{ *}}+{{ *}}#0) = r7:6
+if (p1) memd(r25)=r7:6
+
+#CHECK: 44dad803 { if (!p3) memd(r26{{ *}}+{{ *}}#0) = r25:24
+if (!p3) memd(r26)=r25:24
+
+#CHECK: 3e21c011 { memh(r1{{ *}}+{{ *}}#0) {{ *}}+={{ *}} r17
+memh(r1)+=r17
+
+#CHECK: 3e4fc019 { memw(r15{{ *}}+{{ *}}#0) {{ *}}+={{ *}} r25
+memw(r15)+=r25
+
+#CHECK: 3e5dc022 { memw(r29{{ *}}+{{ *}}#0) {{ *}}-={{ *}} r2
+memw(r29)-=r2
+
+#CHECK: 3e04c004 { memb(r4{{ *}}+{{ *}}#0) {{ *}}+={{ *}} r4
+memb(r4)+=r4
+
+#CHECK: 3f53c016 { memw(r19{{ *}}+{{ *}}#0){{ *}}{{ *}}+={{ *}}{{ *}}#22
+memw(r19)+=#22
+
+#CHECK: 3f24c01e { memh(r4{{ *}}+{{ *}}#0){{ *}}{{ *}}+={{ *}}{{ *}}#30
+memh(r4)+=#30
+
+#CHECK: 3e27c02d { memh(r7{{ *}}+{{ *}}#0) {{ *}}-={{ *}} r13
+memh(r7)-=r13
+
+#CHECK: 3e1ec032 { memb(r30{{ *}}+{{ *}}#0) {{ *}}-={{ *}} r18
+memb(r30)-=r18
+
+#CHECK: 3e49c05b { memw(r9{{ *}}+{{ *}}#0) &= r27
+memw(r9)&=r27
+
+#CHECK: 3e2dc040 { memh(r13{{ *}}+{{ *}}#0) &= r0
+memh(r13)&=r0
+
+#CHECK: 3e05c046 { memb(r5{{ *}}+{{ *}}#0) &= r6
+memb(r5)&=r6
+
+#CHECK: 3e45c06a { memw(r5{{ *}}+{{ *}}#0) |= r10
+memw(r5)|=r10
+
+#CHECK: 3e21c07e { memh(r1{{ *}}+{{ *}}#0) |= r30
+memh(r1)|=r30
+
+#CHECK: 3e09c06f { memb(r9{{ *}}+{{ *}}#0) |= r15
+memb(r9)|=r15
+
+#CHECK: a157d100 { memh(r23{{ *}}+{{ *}}#0) = r17
+memh(r23)=r17
+
+#CHECK: a10fd400 { memb(r15{{ *}}+{{ *}}#0) = r20
+memb(r15)=r20
+
+#CHECK: 9082c014 { r21:20 = memb_fifo(r2{{ *}}+{{ *}}#0)
+r21:20=memb_fifo(r2)
+
+#CHECK: 9056c01c { r29:28 = memh_fifo(r22{{ *}}+{{ *}}#0)
+r29:28=memh_fifo(r22)
+
+#CHECK: a1d8ca00 { memd(r24{{ *}}+{{ *}}#0) = r11:10
+memd(r24)=r11:10
+
+#CHECK: a19ed900 { memw(r30{{ *}}+{{ *}}#0) = r25
+memw(r30)=r25
+
+#CHECK: a169ce00 { memh(r9{{ *}}+{{ *}}#0) = r14.h
+memh(r9)=r14.h
+
+#CHECK: 3f07c06b { memb(r7{{ *}}+{{ *}}#0) = setbit(#11)
+memb(r7)=setbit(#11)
+
+#CHECK: 3f34c07b { memh(r20{{ *}}+{{ *}}#0) = setbit(#27)
+memh(r20)=setbit(#27)
+
+#CHECK: 3f1cc032 { memb(r28{{ *}}+{{ *}}#0){{ *}}-={{ *}}#18
+memb(r28)-=#18
+
+#CHECK: 3f29c02a { memh(r9{{ *}}+{{ *}}#0){{ *}}-={{ *}}#10
+memh(r9)-=#10
+
+#CHECK: 3f4cc026 { memw(r12{{ *}}+{{ *}}#0){{ *}}-={{ *}}#6
+memw(r12)-=#6
+
+#CHECK: 3f00c00c { memb(r0{{ *}}+{{ *}}#0){{ *}}+={{ *}}#12
+memb(r0)+=#12
+
+#CHECK: 3f50c07a { memw(r16{{ *}}+{{ *}}#0) = setbit(#26)
+memw(r16)=setbit(#26)
+
+#CHECK: 3f1fc05d { memb(r31{{ *}}+{{ *}}#0) = clrbit(#29)
+memb(r31)=clrbit(#29)
+
+#CHECK: 3f20c05e { memh(r0{{ *}}+{{ *}}#0) = clrbit(#30)
+memh(r0)=clrbit(#30)
+
+#CHECK: 3f42c059 { memw(r2{{ *}}+{{ *}}#0) = clrbit(#25)
+memw(r2)=clrbit(#25)
+
+#CHECK: 39cfe072 if (!p3.new) memw(r15{{ *}}+{{ *}}#0)=#-14
+{
+ p3=cmp.eq(r5,##-1997506977)
+ if (!p3.new) memw(r15)=#-14
+}
+
+#CHECK: 3959e06b if (p3.new) memw(r25{{ *}}+{{ *}}#0)=#-21
+{
+ p3=cmp.eq(r0,##1863618461)
+ if (p3.new) memw(r25)=#-21
+}
+
+#CHECK: 4312c801 if (p1.new) r1 = memb(r18{{ *}}+{{ *}}#0)
+{
+ if (p1.new) r1=memb(r18)
+ p1=cmp.eq(r23,##-1105571618)
+}
+
+#CHECK: 4718d803 if (!p3.new) r3 = memb(r24{{ *}}+{{ *}}#0)
+{
+ if (!p3.new) r3=memb(r24)
+ p3=cmp.eq(r3,##-210870878)
+}
+
+#CHECK: 4326c81b if (p1.new) r27 = memub(r6{{ *}}+{{ *}}#0)
+{
+ if (p1.new) r27=memub(r6)
+ p1=cmp.eq(r29,##-188410493)
+}
+
+#CHECK: 473ad00d if (!p2.new) r13 = memub(r26{{ *}}+{{ *}}#0)
+{
+ p2=cmp.eq(r30,##-1823852150)
+ if (!p2.new) r13=memub(r26)
+}
+
+#CHECK: 4785d80e if (!p3.new) r14 = memw(r5{{ *}}+{{ *}}#0)
+{
+ if (!p3.new) r14=memw(r5)
+ p3=cmp.eq(r31,##-228524711)
+}
+
+#CHECK: 438cc81a if (p1.new) r26 = memw(r12{{ *}}+{{ *}}#0)
+{
+ if (p1.new) r26=memw(r12)
+ p1=cmp.eq(r11,##-485232313)
+}
+
+#CHECK: 477dc019 if (!p0.new) r25 = memuh(r29{{ *}}+{{ *}}#0)
+{
+ p0=cmp.eq(r23,##127565957)
+ if (!p0.new) r25=memuh(r29)
+}
+
+#CHECK: 4377c807 if (p1.new) r7 = memuh(r23{{ *}}+{{ *}}#0)
+{
+ p1=cmp.eq(r30,##-222020054)
+ if (p1.new) r7=memuh(r23)
+}
+
+#CHECK: 4754c81c if (!p1.new) r28 = memh(r20{{ *}}+{{ *}}#0)
+{
+ p1=cmp.eq(r18,##1159699785)
+ if (!p1.new) r28=memh(r20)
+}
+
+#CHECK: 435ec01b if (p0.new) r27 = memh(r30{{ *}}+{{ *}}#0)
+{
+ p0=cmp.eq(r7,##-1114567705)
+ if (p0.new) r27=memh(r30)
+}
+
+#CHECK: 420dd100 if (p0.new) memb(r13{{ *}}+{{ *}}#0) = r17
+{
+ p0=cmp.eq(r21,##-1458796638)
+ if (p0.new) memb(r13)=r17
+}
+
+#CHECK: 4601d602 if (!p2.new) memb(r1{{ *}}+{{ *}}#0) = r22
+{
+ p2=cmp.eq(r20,##-824022439)
+ if (!p2.new) memb(r1)=r22
+}
+
+#CHECK: 43dcd808 if (p3.new) r9:8 = memd(r28{{ *}}+{{ *}}#0)
+{
+ p3=cmp.eq(r13,##56660744)
+ if (p3.new) r9:8=memd(r28)
+}
+
+#CHECK: 47d8c80e if (!p1.new) r15:14 = memd(r24{{ *}}+{{ *}}#0)
+{
+ if (!p1.new) r15:14=memd(r24)
+ p1=cmp.eq(r15,##1536716489)
+}
+
+#CHECK: 3918e045 if (p2.new) memb(r24{{ *}}+{{ *}}#0)=#-27
+{
+ if (p2.new) memb(r24)=#-27
+ p2=cmp.eq(r21,##1741091811)
+}
+
+#CHECK: 398fe04d if (!p2.new) memb(r15{{ *}}+{{ *}}#0)=#-19
+{
+ if (!p2.new) memb(r15)=#-19
+ p2=cmp.eq(r15,##779870261)
+}
+
+#CHECK: 3931c04b if (p2.new) memh(r17{{ *}}+{{ *}}#0)=#11
+{
+ if (p2.new) memh(r17)=#11
+ p2=cmp.eq(r13,##-1171145798)
+}
+
+#CHECK: 39aee056 if (!p2.new) memh(r14{{ *}}+{{ *}}#0)=#-10
+{
+ p2=cmp.eq(r23,##-633976762)
+ if (!p2.new) memh(r14)=#-10
+}
+
+#CHECK: 4692df01 if (!p1.new) memw(r18{{ *}}+{{ *}}#0) = r31
+{
+ if (!p1.new) memw(r18)=r31
+ p1=cmp.eq(r11,##-319375732)
+}
+
+#CHECK: 428dc402 if (p2.new) memw(r13{{ *}}+{{ *}}#0) = r4
+{
+ if (p2.new) memw(r13)=r4
+ p2=cmp.eq(r18,##1895120239)
+}
+
+#CHECK: 4670c300 if (!p0.new) memh(r16{{ *}}+{{ *}}#0) = r3.h
+{
+ p0=cmp.eq(r25,##1348715015)
+ if (!p0.new) memh(r16)=r3.h
+}
+
+#CHECK: 426ddf02 if (p2.new) memh(r13{{ *}}+{{ *}}#0) = r31.h
+{
+ p2=cmp.eq(r25,##1085560657)
+ if (p2.new) memh(r13)=r31.h
+}
+
+#CHECK: 464bcb01 if (!p1.new) memh(r11{{ *}}+{{ *}}#0) = r11
+{
+ p1=cmp.eq(r10,##1491455911)
+ if (!p1.new) memh(r11)=r11
+}
+
+#CHECK: 4248d200 if (p0.new) memh(r8{{ *}}+{{ *}}#0) = r18
+{
+ p0=cmp.eq(r3,##687581160)
+ if (p0.new) memh(r8)=r18
+}
+
+#CHECK: 42deca00 if (p0.new) memd(r30{{ *}}+{{ *}}#0) = r11:10
+{
+ if (p0.new) memd(r30)=r11:10
+ p0=cmp.eq(r28,##562796189)
+}
+
+#CHECK: 46d5cc03 if (!p3.new) memd(r21{{ *}}+{{ *}}#0) = r13:12
+{
+ if (!p3.new) memd(r21)=r13:12
+ p3=cmp.eq(r6,##-969273288)
+}
+
+#CHECK: 42bad201 if (p1.new) memw(r26{{ *}}+{{ *}}#0) = r22.new
+{
+ if (p1.new) memw(r26)=r22.new
+ p1=cmp.eq(r0,##-1110065473)
+ r22=add(r28,r9)
+}
+
+#CHECK: 46b9d201 if (!p1.new) memw(r25{{ *}}+{{ *}}#0) = r26.new
+{
+ p1=cmp.eq(r11,##-753121346)
+ r26=add(r19,r7)
+ if (!p1.new) memw(r25)=r26.new
+}
+
+#CHECK: 40aad200 if (p0) memw(r10{{ *}}+{{ *}}#0) = r6.new
+{
+ r6=add(r30,r0)
+ if (p0) memw(r10)=r6.new
+}
+
+#CHECK: 44a6d202 if (!p2) memw(r6{{ *}}+{{ *}}#0) = r4.new
+{
+ if (!p2) memw(r6)=r4.new
+ r4=add(r0,r3)
+}
+
+#CHECK: 40b9c200 if (p0) memb(r25{{ *}}+{{ *}}#0) = r29.new
+{
+ if (p0) memb(r25)=r29.new
+ r29=add(r27,r30)
+}
+
+#CHECK: 44bec203 if (!p3) memb(r30{{ *}}+{{ *}}#0) = r8.new
+{
+ if (!p3) memb(r30)=r8.new
+ r8=add(r24,r4)
+}
+
+#CHECK: 46aecc01 if (!p1.new) memh(r14{{ *}}+{{ *}}#0) = r13.new
+{
+ if (!p1.new) memh(r14)=r13.new
+ r13=add(r21,r2)
+ p1=cmp.eq(r3,##-1529345886)
+}
+
+#CHECK: 42bcca02 if (p2.new) memh(r28{{ *}}+{{ *}}#0) = r18.new
+{
+ p2=cmp.eq(r15,##2048545649)
+ if (p2.new) memh(r28)=r18.new
+ r18=add(r9,r3)
+}
+
+#CHECK: 46aac200 if (!p0.new) memb(r10{{ *}}+{{ *}}#0) = r30.new
+{
+ p0=cmp.eq(r21,##-1160401822)
+ r30=add(r9,r22)
+ if (!p0.new) memb(r10)=r30.new
+}
+
+#CHECK: 42b8c202 if (p2.new) memb(r24{{ *}}+{{ *}}#0) = r11.new
+{
+ if (p2.new) memb(r24)=r11.new
+ p2=cmp.eq(r30,##1267977346)
+ r11=add(r8,r18)
+}
+
+#CHECK: 44a3ca00 if (!p0) memh(r3{{ *}}+{{ *}}#0) = r28.new
+{
+ r28=add(r16,r11)
+ if (!p0) memh(r3)=r28.new
+}
+
+#CHECK: 40abca03 if (p3) memh(r11{{ *}}+{{ *}}#0) = r24.new
+{
+ if (p3) memh(r11)=r24.new
+ r24=add(r18,r19)
+}
+
+#CHECK: a1abd200 memw(r11{{ *}}+{{ *}}#0) = r5.new
+{
+ memw(r11)=r5.new
+ r5=add(r0,r10)
+}
+
+#CHECK: a1a2ca00 memh(r2{{ *}}+{{ *}}#0) = r18.new
+{
+ r18=add(r27,r18)
+ memh(r2)=r18.new
+}
+
+#CHECK: a1bac200 memb(r26{{ *}}+{{ *}}#0) = r15.new
+{
+ r15=add(r22,r17)
+ memb(r26)=r15.new
+}
+
+#CHECK: d328ce1c { r29:28{{ *}}={{ *}}vsubub(r15:14, r9:8)
+r29:28=vsubb(r15:14,r9:8)
+
+#CHECK: 8c5ed60c { r12{{ *}}={{ *}}asr(r30, #22):rnd
+r12=asrrnd(r30,#23)
+
+#CHECK: ed1ec109 { r9{{ *}}={{ *}}mpyi(r30, r1)
+r9=mpyui(r30,r1)
+
+#CHECK: e010d787 { r7{{ *}}={{ *}}+{{ *}}mpyi(r16, #188)
+r7=mpyi(r16,#188)
+
+#CHECK: d206eea2 { p2{{ *}}={{ *}}boundscheck(r7:6, r15:14):raw:hi
+p2=boundscheck(r7,r15:14)
+
+#CHECK: f27ac102 { p2{{ *}}={{ *}}cmp.gtu(r26, r1)
+p2=cmp.ltu(r1,r26)
+
+#CHECK: f240df00 { p0{{ *}}={{ *}}cmp.gt(r0, r31)
+p0=cmp.lt(r31,r0)
+
+#CHECK: 7586cc01 { p1{{ *}}={{ *}}cmp.gtu(r6, #96)
+p1=cmp.geu(r6,#97)
+
+#CHECK: 755dc9a2 { p2{{ *}}={{ *}}cmp.gt(r29, #77)
+p2=cmp.ge(r29,#78)
+
+#CHECK: d310d60a { r11:10{{ *}}={{ *}}vaddub(r17:16, r23:22)
+r11:10=vaddb(r17:16,r23:22)
+
+#CHECK: 8753d1e6 { r6{{ *}}={{ *}}tableidxh(r19, #7, #17):raw
+r6=tableidxh(r19,#7,#18)
+
+#CHECK: 8786d277 { r23{{ *}}={{ *}}tableidxw(r6, #3, #18):raw
+r23=tableidxw(r6,#3,#20)
+
+#CHECK: 7c4dfff8 { r25:24{{ *}}={{ *}}combine(#-1, #-101)
+r25:24=#-101
+
+#CHECK: 8866c09a { r26{{ *}}={{ *}}vasrhub(r7:6, #0):raw
+r26=vasrhub(r7:6,#1):rnd:sat
+
+#CHECK: 7654c016 { r22{{ *}}={{ *}}sub(#0, r20)
+r22=neg(r20)
+
+#CHECK: 802cc808 { r9:8{{ *}}={{ *}}vasrh(r13:12, #8):raw
+r9:8=vasrh(r13:12,#9):rnd
+
+#CHECK: 7614dfe5 { r5{{ *}}={{ *}}{{zxtb\(r20\)|and\(r20, *#255\)}}
+r5=zxtb(r20)
+
+#CHECK: 00ab68e2 immext(#179976320)
+#CHECK: 7500c500 p0{{ *}}={{ *}}cmp.eq(r0, ##179976360)
+{
+ if (p0.new) r11=r26
+ p0=cmp.eq(r0,##179976360)
+}
+
+#CHECK: 74f9c00f { if (!p3) r15{{ *}}={{ *}}r25
+if (!p3) r15=r25
+
+#CHECK: 7425c005 { if (p1) r5{{ *}}={{ *}}r5
+if (p1) r5=r5
+
+#CHECK: e9badae2 { r2{{ *}}={{ *}}vrcmpys(r27:26, r27:26):<<1:rnd:sat:raw:lo
+r2=vrcmpys(r27:26,r26):<<1:rnd:sat
+
+#CHECK: fd13f20e if (p0.new) r15:14{{ *}}={{ *}}{{r19:18|combine\(r19, *r18\)}}
+{
+ p0=cmp.eq(r26,##1766934387)
+ if (p0.new) r15:14=r19:18
+}
+
+#CHECK: fd07c6c2 { if (!p2) r3:2{{ *}}={{ *}}{{r7:6|combine\(r7, *r6\)}}
+if (!p2) r3:2=r7:6
+
+#CHECK: fd0dcc7e { if (p3) r31:30{{ *}}={{ *}}{{r13:12|combine\(r13, *r12\)}}
+if (p3) r31:30=r13:12
+
+#CHECK: 748ae015 if (!p0.new) r21{{ *}}={{ *}}r10
+{
+ p0=cmp.eq(r23,##805633208)
+ if (!p0.new) r21=r10
+}
+
+#CHECK: d36ec6c8 { r9:8{{ *}}={{ *}}add(r15:14, r7:6):raw:lo
+r9:8=add(r14,r7:6)
+
+#CHECK: 01e65477 immext(#509943232)
+#CHECK: 7516c3a3 p3{{ *}}={{ *}}cmp.eq(r22, ##509943261)
+{
+ if (!p3.new) r9:8=r25:24
+ p3=cmp.eq(r22,##509943261)
+}
+
+#CHECK: 87e0d5e5 { r5{{ *}}={{ *}}tableidxd(r0, #15, #21):raw
+r5=tableidxd(r0,#15,#24)
+
+#CHECK: 8701db65 { r5{{ *}}={{ *}}tableidxb(r1, #3, #27):raw
+r5=tableidxb(r1,#3,#27)
+
+#CHECK: 767affe3 { r3{{ *}}={{ *}}sub(#-1, r26)
+r3=not(r26)
+
+#CHECK: f51ddc06 { r7:6{{ *}}={{ *}}{{r29:28|combine\(r29, *r28\)}}
+r7:6=r29:28
+
+#CHECK: 9406c000 { dcfetch(r6 + #0)
+dcfetch(r6)
+
+#CHECK: 6b20c001 { p1{{ *}}={{ *}}or(p0, p0)
+p1=p0
+
+#CHECK: eafcdc82 { r3:2 += vrcmpys(r29:28, r29:28):<<1:sat:raw:lo
+r3:2+=vrcmpys(r29:28,r28):<<1:sat
+
+#CHECK: e8ead092 { r19:18{{ *}}={{ *}}vrcmpys(r11:10, r17:16):<<1:sat:raw:lo
+r19:18=vrcmpys(r11:10,r16):<<1:sat
+
+#CHECK: 9082c014 { r21:20{{ *}}={{ *}}memb_fifo(r2{{ *}}+{{ *}}#0)
+r21:20=memb_fifo(r2)
+
+#CHECK: 9056c01c { r29:28{{ *}}={{ *}}memh_fifo(r22{{ *}}+{{ *}}#0)
+r29:28=memh_fifo(r22) \ No newline at end of file
diff --git a/test/MC/Hexagon/capitalizedEndloop.s b/test/MC/Hexagon/capitalizedEndloop.s
new file mode 100644
index 000000000000..d20ff34de6fe
--- /dev/null
+++ b/test/MC/Hexagon/capitalizedEndloop.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj %s | llvm-objdump -d -r - | FileCheck %s
+#
+
+# Verify that capitaizled endloops work
+
+ { R0 = mpyi(R0,R0) } : endloop0
+ { R0 = mpyi(R0,R0) } : ENDLOOP0
+ { R0 = mpyi(R0,R0) }:endloop0
+
+ { R0 = mpyi(R0,R0) } : endloop1
+ { R0 = mpyi(R0,R0) } : ENDLOOP1
+ { R0 = mpyi(R0,R0) }:endloop1
+
+ { R0 = mpyi(R0,R0) } : endloop0 : endloop1
+ { R0 = mpyi(R0,R0) } : ENDLOOP0 : ENDLOOP1
+ { R0 = mpyi(R0,R0) }:endloop0:endloop1
+
+# CHECK: r0 = mpyi(r0, r0)
+# CHECK: :endloop0
+# CHECK: :endloop0
+# CHECK: :endloop0
+# CHECK: :endloop1
+# CHECK: :endloop1
+# CHECK: :endloop1
+# CHECK: :endloop0 :endloop1
+# CHECK: :endloop0 :endloop1
+# CHECK: :endloop0 :endloop1
+
+
diff --git a/test/MC/Hexagon/dcfetch.s b/test/MC/Hexagon/dcfetch.s
new file mode 100644
index 000000000000..bf4349d95c37
--- /dev/null
+++ b/test/MC/Hexagon/dcfetch.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -mno-pairing %s -o %t; llvm-objdump -d %t | FileCheck %s
+
+# Check that DCFETCH is correctly shuffled.
+
+ { dcfetch(r2 + #0); r1 = memw(r2) }
+# CHECK: 9402c000
+
+# Bug 17424: This should be a legal packet
+{
+ P3 = SP1LOOP0(#8,R18)
+ R7:6 = MEMUBH(R4++#4)
+ R13:12 = VALIGNB(R11:10,R9:8,P2)
+ DCFETCH(R5+#(8+0))
+}
+# CHECK-NOT: error:
diff --git a/test/MC/Hexagon/empty_asm.s b/test/MC/Hexagon/empty_asm.s
new file mode 100644
index 000000000000..10b30ff558ed
--- /dev/null
+++ b/test/MC/Hexagon/empty_asm.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple=hexagon -filetype=asm %s -o - | FileCheck %s
+
+# Verify empty packets aren't printed
+barrier
+{}
+barrier
+# CHECK: {
+# CHECK-NEXT: barrier
+# CHECK-NEXT: }
+# CHECK-NOT: }
+# CHECK: {
+# CHECK-NEXT: barrier
+# CHECK-NEXT: }
+
+
diff --git a/test/MC/Hexagon/endloop.s b/test/MC/Hexagon/endloop.s
new file mode 100644
index 000000000000..303f84fb14ff
--- /dev/null
+++ b/test/MC/Hexagon/endloop.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple=hexagon -filetype=asm %s 2>%t; FileCheck %s <%t
+
+# Check that a branch in an end-loop packet is caught.
+
+1:
+{
+ r0 = #1
+ p0 = cmp.eq (r1, r2)
+ if (p0) jump 1b
+}:endloop0
+
+2:
+{
+ r0 = #1
+ p0 = cmp.eq (r1, r2)
+ if (p0) jump 2b
+}:endloop1
+
+# CHECK: rror: packet marked with `:endloop{{.}}' cannot contain instructions that modify register
diff --git a/test/MC/Hexagon/got.s b/test/MC/Hexagon/got.s
new file mode 100644
index 000000000000..85409ee4a900
--- /dev/null
+++ b/test/MC/Hexagon/got.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -arch=hexagon -filetype=obj %s | llvm-objdump -r - | FileCheck %s
+#
+
+# make sure the fixups emitted match what is
+# expected.
+.Lgot:
+ r0 = memw (r1 + ##foo@GOT)
+
+# CHECK: R_HEX_GOT_32_6_X foo
+# CHECK: R_HEX_GOT_11_X foo
+
diff --git a/test/MC/Hexagon/inst_and64.ll b/test/MC/Hexagon/inst_and64.ll
index 0b8307463261..856f5c9ceea8 100644
--- a/test/MC/Hexagon/inst_and64.ll
+++ b/test/MC/Hexagon/inst_and64.ll
@@ -1,4 +1,4 @@
-;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj -disable-hsdr %s -o - \
;; RUN: | llvm-objdump -s - | FileCheck %s
define i64 @foo (i64 %a, i64 %b)
diff --git a/test/MC/Hexagon/inst_or64.ll b/test/MC/Hexagon/inst_or64.ll
index ea104300da3e..f73b8279343b 100644
--- a/test/MC/Hexagon/inst_or64.ll
+++ b/test/MC/Hexagon/inst_or64.ll
@@ -1,4 +1,4 @@
-;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj -disable-hsdr %s -o - \
;; RUN: | llvm-objdump -s - | FileCheck %s
define i64 @foo (i64 %a, i64 %b)
diff --git a/test/MC/Hexagon/inst_xor64.ll b/test/MC/Hexagon/inst_xor64.ll
index 7f77c4614cf0..c13ef6bea6ca 100644
--- a/test/MC/Hexagon/inst_xor64.ll
+++ b/test/MC/Hexagon/inst_xor64.ll
@@ -1,4 +1,4 @@
-;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: llc -mtriple=hexagon-unknown-elf -disable-hsdr -filetype=obj %s -o - \
;; RUN: | llvm-objdump -s - | FileCheck %s
define i64 @foo (i64 %a, i64 %b)
diff --git a/test/MC/Hexagon/instructions/alu32_alu.s b/test/MC/Hexagon/instructions/alu32_alu.s
new file mode 100644
index 000000000000..4b3256be0733
--- /dev/null
+++ b/test/MC/Hexagon/instructions/alu32_alu.s
@@ -0,0 +1,84 @@
+# RUN: llvm-mc -triple hexagon -filetype=obj %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.1.1 ALU32/ALU
+
+# Add
+# CHECK: f1 c3 15 b0
+r17 = add(r21, #31)
+# CHECK: 11 df 15 f3
+r17 = add(r21, r31)
+# CHECK: 11 df 55 f6
+r17 = add(r21, r31):sat
+
+# And
+# CHECK: f1 c3 15 76
+r17 = and(r21, #31)
+# CHECK: f1 c3 95 76
+r17 = or(r21, #31)
+# CHECK: 11 df 15 f1
+r17 = and(r21, r31)
+# CHECK: 11 df 35 f1
+r17 = or(r21, r31)
+# CHECK: 11 df 75 f1
+r17 = xor(r21, r31)
+# CHECK: 11 d5 9f f1
+r17 = and(r21, ~r31)
+# CHECK: 11 d5 bf f1
+r17 = or(r21, ~r31)
+
+# Nop
+# CHECK: 00 c0 00 7f
+nop
+
+# Subtract
+# CHECK: b1 c2 5f 76
+r17 = sub(#21, r31)
+# CHECK: 11 df 35 f3
+r17 = sub(r31, r21)
+# CHECK: 11 df d5 f6
+r17 = sub(r31, r21):sat
+
+# Sign extend
+# CHECK: 11 c0 bf 70
+r17 = sxtb(r31)
+
+# Transfer immediate
+# CHECK: 15 c0 31 72
+r17.h = #21
+# CHECK: 15 c0 31 71
+r17.l = #21
+# CHECK: f1 ff 5f 78
+r17 = #32767
+# CHECK: f1 ff df 78
+r17 = #-1
+
+# Transfer register
+# CHECK: 11 c0 75 70
+r17 = r21
+
+# Vector add halfwords
+# CHECK: 11 df 15 f6
+r17 = vaddh(r21, r31)
+# CHECK: 11 df 35 f6
+r17 = vaddh(r21, r31):sat
+# CHECK: 11 df 75 f6
+r17 = vadduh(r21, r31):sat
+
+# Vector average halfwords
+# CHECK: 11 df 15 f7
+r17 = vavgh(r21, r31)
+# CHECK: 11 df 35 f7
+r17 = vavgh(r21, r31):rnd
+# CHECK: 11 df 75 f7
+r17 = vnavgh(r31, r21)
+
+# Vector subtract halfwords
+# CHECK: 11 df 95 f6
+r17 = vsubh(r31, r21)
+# CHECK: 11 df b5 f6
+r17 = vsubh(r31, r21):sat
+# CHECK: 11 df f5 f6
+r17 = vsubuh(r31, r21):sat
+
+# Zero extend
+# CHECK: 11 c0 d5 70
+r17 = zxth(r21)
diff --git a/test/MC/Hexagon/instructions/alu32_perm.s b/test/MC/Hexagon/instructions/alu32_perm.s
new file mode 100644
index 000000000000..8410cb9128a3
--- /dev/null
+++ b/test/MC/Hexagon/instructions/alu32_perm.s
@@ -0,0 +1,40 @@
+# RUN: llvm-mc -triple hexagon -filetype=obj %s -o - | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.1.2 ALU32/PERM
+
+# Combine words in to doublewords
+# CHECK: 11 df 95 f3
+r17 = combine(r31.h, r21.h)
+# CHECK: 11 df b5 f3
+r17 = combine(r31.h, r21.l)
+# CHECK: 11 df d5 f3
+r17 = combine(r31.l, r21.h)
+# CHECK: 11 df f5 f3
+r17 = combine(r31.l, r21.l)
+# CHECK: b0 e2 0f 7c
+r17:16 = combine(#21, #31)
+# CHECK: b0 e2 3f 73
+r17:16 = combine(#21, r31)
+# CHECK: f0 e3 15 73
+r17:16 = combine(r21, #31)
+# CHECK: 10 df 15 f5
+r17:16 = combine(r21, r31)
+
+# Mux
+# CHECK: f1 c3 75 73
+r17 = mux(p3, r21, #31)
+# CHECK: b1 c2 ff 73
+r17 = mux(p3, #21, r31)
+# CHECK: b1 e2 8f 7b
+r17 = mux(p3, #21, #31)
+# CHECK: 71 df 15 f4
+r17 = mux(p3, r21, r31)
+
+# Shift word by 16
+# CHECK: 11 c0 15 70
+r17 = aslh(r21)
+# CHECK: 11 c0 35 70
+r17 = asrh(r21)
+
+# Pack high and low halfwords
+# CHECK: 10 df 95 f5
+r17:16 = packhl(r21, r31)
diff --git a/test/MC/Hexagon/instructions/alu32_pred.s b/test/MC/Hexagon/instructions/alu32_pred.s
new file mode 100644
index 000000000000..e5fded0a3691
--- /dev/null
+++ b/test/MC/Hexagon/instructions/alu32_pred.s
@@ -0,0 +1,222 @@
+# RUN: llvm-mc -triple hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.1.3 ALU32/PRED
+
+# Conditional add
+# CHECK: f1 c3 75 74
+if (p3) r17 = add(r21, #31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 e3 75 74
+{ p3 = r5
+ if (p3.new) r17 = add(r21, #31) }
+# CHECK: f1 c3 f5 74
+if (!p3) r17 = add(r21, #31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 e3 f5 74
+{ p3 = r5
+ if (!p3.new) r17 = add(r21, #31) }
+# CHECK: 71 df 15 fb
+if (p3) r17 = add(r21, r31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 71 ff 15 fb
+{ p3 = r5
+ if (p3.new) r17 = add(r21, r31) }
+# CHECK: f1 df 15 fb
+if (!p3) r17 = add(r21, r31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 15 fb
+{ p3 = r5
+ if (!p3.new) r17 = add(r21, r31) }
+
+# Conditional shift halfword
+# CHECK: 11 e3 15 70
+if (p3) r17 = aslh(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 e7 15 70
+{ p3 = r5
+ if (p3.new) r17 = aslh(r21) }
+# CHECK: 11 eb 15 70
+if (!p3) r17 = aslh(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 ef 15 70
+{ p3 = r5
+ if (!p3.new) r17 = aslh(r21) }
+# CHECK: 11 e3 35 70
+if (p3) r17 = asrh(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 e7 35 70
+{ p3 = r5
+ if (p3.new) r17 = asrh(r21) }
+# CHECK: 11 eb 35 70
+if (!p3) r17 = asrh(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 ef 35 70
+{ p3 = r5
+ if (!p3.new) r17 = asrh(r21) }
+
+# Conditional combine
+# CHECK: 70 df 15 fd
+if (p3) r17:16 = combine(r21, r31)
+# CHECK: f0 df 15 fd
+if (!p3) r17:16 = combine(r21, r31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 70 ff 15 fd
+{ p3 = r5
+ if (p3.new) r17:16 = combine(r21, r31) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f0 ff 15 fd
+{ p3 = r5
+ if (!p3.new) r17:16 = combine(r21, r31) }
+
+# Conditional logical operations
+# CHECK: 71 df 15 f9
+if (p3) r17 = and(r21, r31)
+# CHECK: f1 df 15 f9
+if (!p3) r17 = and(r21, r31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 71 ff 15 f9
+{ p3 = r5
+ if (p3.new) r17 = and(r21, r31) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 15 f9
+{ p3 = r5
+ if (!p3.new) r17 = and(r21, r31) }
+# CHECK: 71 df 35 f9
+if (p3) r17 = or(r21, r31)
+# CHECK: f1 df 35 f9
+if (!p3) r17 = or(r21, r31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 71 ff 35 f9
+{ p3 = r5
+ if (p3.new) r17 = or(r21, r31) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 35 f9
+{ p3 = r5
+ if (!p3.new) r17 = or(r21, r31) }
+# CHECK: 71 df 75 f9
+if (p3) r17 = xor(r21, r31)
+# CHECK: f1 df 75 f9
+if (!p3) r17 = xor(r21, r31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 71 ff 75 f9
+{ p3 = r5
+ if (p3.new) r17 = xor(r21, r31) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 75 f9
+{ p3 = r5
+ if (!p3.new) r17 = xor(r21, r31) }
+
+# Conditional subtract
+# CHECK: 71 df 35 fb
+if (p3) r17 = sub(r31, r21)
+# CHECK: f1 df 35 fb
+if (!p3) r17 = sub(r31, r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 71 ff 35 fb
+{ p3 = r5
+ if (p3.new) r17 = sub(r31, r21) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 35 fb
+{ p3 = r5
+ if (!p3.new) r17 = sub(r31, r21) }
+
+# Conditional sign extend
+# CHECK: 11 e3 b5 70
+if (p3) r17 = sxtb(r21)
+# CHECK: 11 eb b5 70
+if (!p3) r17 = sxtb(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 e7 b5 70
+{ p3 = r5
+ if (p3.new) r17 = sxtb(r21) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 ef b5 70
+{ p3 = r5
+ if (!p3.new) r17 = sxtb(r21) }
+# CHECK: 11 e3 f5 70
+if (p3) r17 = sxth(r21)
+# CHECK: 11 eb f5 70
+if (!p3) r17 = sxth(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 e7 f5 70
+{ p3 = r5
+ if (p3.new) r17 = sxth(r21) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 ef f5 70
+{ p3 = r5
+ if (!p3.new) r17 = sxth(r21) }
+
+# Conditional transfer
+# CHECK: b1 c2 60 7e
+if (p3) r17 = #21
+# CHECK: b1 c2 e0 7e
+if (!p3) r17 = #21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 e2 60 7e
+{ p3 = r5
+ if (p3.new) r17 = #21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 e2 e0 7e
+{ p3 = r5
+ if (!p3.new) r17 = #21 }
+
+# Conditional zero extend
+# CHECK: 11 e3 95 70
+if (p3) r17 = zxtb(r21)
+# CHECK: 11 eb 95 70
+if (!p3) r17 = zxtb(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 e7 95 70
+{ p3 = r5
+ if (p3.new) r17 = zxtb(r21) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 ef 95 70
+{ p3 = r5
+ if (!p3.new) r17 = zxtb(r21) }
+# CHECK: 11 e3 d5 70
+if (p3) r17 = zxth(r21)
+# CHECK: 11 eb d5 70
+if (!p3) r17 = zxth(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 e7 d5 70
+{ p3 = r5
+ if (p3.new) r17 = zxth(r21) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 ef d5 70
+{ p3 = r5
+ if (!p3.new) r17 = zxth(r21) }
+
+# Compare
+# CHECK: e3 c3 15 75
+p3 = cmp.eq(r21, #31)
+# CHECK: f3 c3 15 75
+p3 = !cmp.eq(r21, #31)
+# CHECK: e3 c3 55 75
+p3 = cmp.gt(r21, #31)
+# CHECK: f3 c3 55 75
+p3 = !cmp.gt(r21, #31)
+# CHECK: e3 c3 95 75
+p3 = cmp.gtu(r21, #31)
+# CHECK: f3 c3 95 75
+p3 = !cmp.gtu(r21, #31)
+# CHECK: 03 df 15 f2
+p3 = cmp.eq(r21, r31)
+# CHECK: 13 df 15 f2
+p3 = !cmp.eq(r21, r31)
+# CHECK: 03 df 55 f2
+p3 = cmp.gt(r21, r31)
+# CHECK: 13 df 55 f2
+p3 = !cmp.gt(r21, r31)
+# CHECK: 03 df 75 f2
+p3 = cmp.gtu(r21, r31)
+# CHECK: 13 df 75 f2
+p3 = !cmp.gtu(r21, r31)
+
+# Compare to general register
+# CHECK: f1 e3 55 73
+r17 = cmp.eq(r21, #31)
+# CHECK: f1 e3 75 73
+r17 = !cmp.eq(r21, #31)
+# CHECK: 11 df 55 f3
+r17 = cmp.eq(r21, r31)
+# CHECK: 11 df 75 f3
+r17 = !cmp.eq(r21, r31)
diff --git a/test/MC/Hexagon/instructions/cr.s b/test/MC/Hexagon/instructions/cr.s
new file mode 100644
index 000000000000..4cc21551865b
--- /dev/null
+++ b/test/MC/Hexagon/instructions/cr.s
@@ -0,0 +1,78 @@
+# RUN: llvm-mc --triple hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.2 CR
+
+# Corner detection acceleration
+# CHECK: 93 e1 12 6b
+p3 = !fastcorner9(p2, p1)
+# CHECK: 91 e3 02 6b
+p1 = fastcorner9(p2, p3)
+
+# Logical reductions on predicates
+# CHECK: 01 c0 82 6b
+p1 = any8(p2)
+# CHECK: 01 c0 a2 6b
+p1 = all8(p2)
+
+# Looping instructions
+# CHECK: 00 c0 15 60
+loop0(0, r21)
+# CHECK: 00 c0 35 60
+loop1(0, r21)
+# CHECK: 60 c0 00 69
+loop0(0, #12)
+# CHECK: 60 c0 20 69
+loop1(0, #12)
+
+# Add to PC
+# CHECK: 91 ca 49 6a
+r17 = add(pc, #21)
+
+# Pipelined loop instructions
+# CHECK: 00 c0 b5 60
+p3 = sp1loop0(0, r21)
+# CHECK: 00 c0 d5 60
+p3 = sp2loop0(0, r21)
+# CHECK: 00 c0 f5 60
+p3 = sp3loop0(0, r21)
+# CHECK: a1 c0 a0 69
+p3 = sp1loop0(0, #21)
+# CHECK: a1 c0 c0 69
+p3 = sp2loop0(0, #21)
+# CHECK: a1 c0 e0 69
+p3 = sp3loop0(0, #21)
+
+# Logical operations on predicates
+# CHECK: 01 c3 02 6b
+p1 = and(p3, p2)
+# CHECK: c1 c3 12 6b
+p1 = and(p2, and(p3, p3))
+# CHECK: 01 c3 22 6b
+p1 = or(p3, p2)
+# CHECK: c1 c3 32 6b
+p1 = and(p2, or(p3, p3))
+# CHECK: 01 c3 42 6b
+p1 = xor(p2, p3)
+# CHECK: c1 c3 52 6b
+p1 = or(p2, and(p3, p3))
+# CHECK: 01 c2 63 6b
+p1 = and(p2, !p3)
+# CHECK: c1 c3 72 6b
+p1 = or(p2, or(p3, p3))
+# CHECK: c1 c3 92 6b
+p1 = and(p2, and(p3, !p3))
+# CHECK: c1 c3 b2 6b
+p1 = and(p2, or(p3, !p3))
+# CHECK: 01 c0 c2 6b
+p1 = not(p2)
+# CHECK: c1 c3 d2 6b
+p1 = or(p2, and(p3, !p3))
+# CHECK: 01 c2 e3 6b
+p1 = or(p2, !p3)
+# CHECK: c1 c3 f2 6b
+p1 = or(p2, or(p3, !p3))
+
+# User control register transfer
+# CHECK: 0d c0 35 62
+cs1 = r21
+# CHECK: 11 c0 0d 6a
+r17 = cs1
diff --git a/test/MC/Hexagon/instructions/j.s b/test/MC/Hexagon/instructions/j.s
new file mode 100644
index 000000000000..0a9003b3d7b3
--- /dev/null
+++ b/test/MC/Hexagon/instructions/j.s
@@ -0,0 +1,206 @@
+# RUN: llvm-mc -triple hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.4 J
+
+# Call subroutine
+# CHECK: 00 c0 00 5a
+call 0
+# CHECK: 00 c3 00 5d
+if (p3) call 0
+# CHECK: 00 c3 20 5d
+if (!p3) call 0
+
+# Compare and jump
+# CHECK: 00 c0 89 11
+{ p0 = cmp.eq(r17,#-1); if (p0.new) jump:nt 0 }
+# CHECK: 00 c1 89 11
+{ p0 = cmp.gt(r17,#-1); if (p0.new) jump:nt 0 }
+# CHECK: 00 c3 89 11
+{ p0 = tstbit(r17, #0); if (p0.new) jump:nt 0 }
+# CHECK: 00 e0 89 11
+{ p0 = cmp.eq(r17,#-1); if (p0.new) jump:t 0 }
+# CHECK: 00 e1 89 11
+{ p0 = cmp.gt(r17,#-1); if (p0.new) jump:t 0 }
+# CHECK: 00 e3 89 11
+{ p0 = tstbit(r17, #0); if (p0.new) jump:t 0 }
+# CHECK: 00 c0 c9 11
+{ p0 = cmp.eq(r17,#-1); if (!p0.new) jump:nt 0 }
+# CHECK: 00 c1 c9 11
+{ p0 = cmp.gt(r17,#-1); if (!p0.new) jump:nt 0 }
+# CHECK: 00 c3 c9 11
+{ p0 = tstbit(r17, #0); if (!p0.new) jump:nt 0 }
+# CHECK: 00 e0 c9 11
+{ p0 = cmp.eq(r17,#-1); if (!p0.new) jump:t 0 }
+# CHECK: 00 e1 c9 11
+{ p0 = cmp.gt(r17,#-1); if (!p0.new) jump:t 0 }
+# CHECK: 00 e3 c9 11
+{ p0 = tstbit(r17, #0); if (!p0.new) jump:t 0 }
+# CHECK: 00 d5 09 10
+{ p0 = cmp.eq(r17, #21); if (p0.new) jump:nt 0 }
+# CHECK: 00 f5 09 10
+{ p0 = cmp.eq(r17, #21); if (p0.new) jump:t 0 }
+# CHECK: 00 d5 49 10
+{ p0 = cmp.eq(r17, #21); if (!p0.new) jump:nt 0 }
+# CHECK: 00 f5 49 10
+{ p0 = cmp.eq(r17, #21); if (!p0.new) jump:t 0 }
+# CHECK: 00 d5 89 10
+{ p0 = cmp.gt(r17, #21); if (p0.new) jump:nt 0 }
+# CHECK: 00 f5 89 10
+{ p0 = cmp.gt(r17, #21); if (p0.new) jump:t 0 }
+# CHECK: 00 d5 c9 10
+{ p0 = cmp.gt(r17, #21); if (!p0.new) jump:nt 0 }
+# CHECK: 00 f5 c9 10
+{ p0 = cmp.gt(r17, #21); if (!p0.new) jump:t 0 }
+# CHECK: 00 d5 09 11
+{ p0 = cmp.gtu(r17, #21); if (p0.new) jump:nt 0 }
+# CHECK: 00 f5 09 11
+{ p0 = cmp.gtu(r17, #21); if (p0.new) jump:t 0 }
+# CHECK: 00 d5 49 11
+{ p0 = cmp.gtu(r17, #21); if (!p0.new) jump:nt 0 }
+# CHECK: 00 f5 49 11
+{ p0 = cmp.gtu(r17, #21); if (!p0.new) jump:t 0 }
+# CHECK: 00 c0 89 13
+{ p1 = cmp.eq(r17,#-1); if (p1.new) jump:nt 0 }
+# CHECK: 00 c1 89 13
+{ p1 = cmp.gt(r17,#-1); if (p1.new) jump:nt 0 }
+# CHECK: 00 c3 89 13
+{ p1 = tstbit(r17, #0); if (p1.new) jump:nt 0 }
+# CHECK: 00 e0 89 13
+{ p1 = cmp.eq(r17,#-1); if (p1.new) jump:t 0 }
+# CHECK: 00 e1 89 13
+{ p1 = cmp.gt(r17,#-1); if (p1.new) jump:t 0 }
+# CHECK: 00 e3 89 13
+{ p1 = tstbit(r17, #0); if (p1.new) jump:t 0 }
+# CHECK: 00 c0 c9 13
+{ p1 = cmp.eq(r17,#-1); if (!p1.new) jump:nt 0 }
+# CHECK: 00 c1 c9 13
+{ p1 = cmp.gt(r17,#-1); if (!p1.new) jump:nt 0 }
+# CHECK: 00 c3 c9 13
+{ p1 = tstbit(r17, #0); if (!p1.new) jump:nt 0 }
+# CHECK: 00 e0 c9 13
+{ p1 = cmp.eq(r17,#-1); if (!p1.new) jump:t 0 }
+# CHECK: 00 e1 c9 13
+{ p1 = cmp.gt(r17,#-1); if (!p1.new) jump:t 0 }
+# CHECK: 00 e3 c9 13
+{ p1 = tstbit(r17, #0); if (!p1.new) jump:t 0 }
+# CHECK: 00 d5 09 12
+{ p1 = cmp.eq(r17, #21); if (p1.new) jump:nt 0 }
+# CHECK: 00 f5 09 12
+{ p1 = cmp.eq(r17, #21); if (p1.new) jump:t 0 }
+# CHECK: 00 d5 49 12
+{ p1 = cmp.eq(r17, #21); if (!p1.new) jump:nt 0 }
+# CHECK: 00 f5 49 12
+{ p1 = cmp.eq(r17, #21); if (!p1.new) jump:t 0 }
+# CHECK: 00 d5 89 12
+{ p1 = cmp.gt(r17, #21); if (p1.new) jump:nt 0 }
+# CHECK: 00 f5 89 12
+{ p1 = cmp.gt(r17, #21); if (p1.new) jump:t 0 }
+# CHECK: 00 d5 c9 12
+{ p1 = cmp.gt(r17, #21); if (!p1.new) jump:nt 0 }
+# CHECK: 00 f5 c9 12
+{ p1 = cmp.gt(r17, #21); if (!p1.new) jump:t 0 }
+# CHECK: 00 d5 09 13
+{ p1 = cmp.gtu(r17, #21); if (p1.new) jump:nt 0 }
+# CHECK: 00 f5 09 13
+{ p1 = cmp.gtu(r17, #21); if (p1.new) jump:t 0 }
+# CHECK: 00 d5 49 13
+{ p1 = cmp.gtu(r17, #21); if (!p1.new) jump:nt 0 }
+# CHECK: 00 f5 49 13
+{ p1 = cmp.gtu(r17, #21); if (!p1.new) jump:t 0 }
+# CHECK: 00 cd 09 14
+{ p0 = cmp.eq(r17, r21); if (p0.new) jump:nt 0 }
+# CHECK: 00 dd 09 14
+{ p1 = cmp.eq(r17, r21); if (p1.new) jump:nt 0 }
+# CHECK: 00 ed 09 14
+{ p0 = cmp.eq(r17, r21); if (p0.new) jump:t 0 }
+# CHECK: 00 fd 09 14
+{ p1 = cmp.eq(r17, r21); if (p1.new) jump:t 0 }
+# CHECK: 00 cd 49 14
+{ p0 = cmp.eq(r17, r21); if (!p0.new) jump:nt 0 }
+# CHECK: 00 dd 49 14
+{ p1 = cmp.eq(r17, r21); if (!p1.new) jump:nt 0 }
+# CHECK: 00 ed 49 14
+{ p0 = cmp.eq(r17, r21); if (!p0.new) jump:t 0 }
+# CHECK: 00 fd 49 14
+{ p1 = cmp.eq(r17, r21); if (!p1.new) jump:t 0 }
+# CHECK: 00 cd 89 14
+{ p0 = cmp.gt(r17, r21); if (p0.new) jump:nt 0 }
+# CHECK: 00 dd 89 14
+{ p1 = cmp.gt(r17, r21); if (p1.new) jump:nt 0 }
+# CHECK: 00 ed 89 14
+{ p0 = cmp.gt(r17, r21); if (p0.new) jump:t 0 }
+# CHECK: 00 fd 89 14
+{ p1 = cmp.gt(r17, r21); if (p1.new) jump:t 0 }
+# CHECK: 00 cd c9 14
+{ p0 = cmp.gt(r17, r21); if (!p0.new) jump:nt 0 }
+# CHECK: 00 dd c9 14
+{ p1 = cmp.gt(r17, r21); if (!p1.new) jump:nt 0 }
+# CHECK: 00 ed c9 14
+{ p0 = cmp.gt(r17, r21); if (!p0.new) jump:t 0 }
+# CHECK: 00 fd c9 14
+{ p1 = cmp.gt(r17, r21); if (!p1.new) jump:t 0 }
+# CHECK: 00 cd 09 15
+{ p0 = cmp.gtu(r17, r21); if (p0.new) jump:nt 0 }
+# CHECK: 00 dd 09 15
+{ p1 = cmp.gtu(r17, r21); if (p1.new) jump:nt 0 }
+# CHECK: 00 ed 09 15
+{ p0 = cmp.gtu(r17, r21); if (p0.new) jump:t 0 }
+# CHECK: 00 fd 09 15
+{ p1 = cmp.gtu(r17, r21); if (p1.new) jump:t 0 }
+# CHECK: 00 cd 49 15
+{ p0 = cmp.gtu(r17, r21); if (!p0.new) jump:nt 0 }
+# CHECK: 00 dd 49 15
+{ p1 = cmp.gtu(r17, r21); if (!p1.new) jump:nt 0 }
+# CHECK: 00 ed 49 15
+{ p0 = cmp.gtu(r17, r21); if (!p0.new) jump:t 0 }
+# CHECK: 00 fd 49 15
+{ p1 = cmp.gtu(r17, r21); if (!p1.new) jump:t 0 }
+
+# Jump to address
+# CHECK: 00 c0 00 58
+jump 0
+# CHECK: 00 c3 00 5c
+if (p3) jump 0
+# CHECK: 00 c3 20 5c
+if (!p3) jump 0
+
+# Jump to address conditioned on new predicate
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 cb 00 5c
+{ p3 = r5
+ if (p3.new) jump:nt 0 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 db 00 5c
+{ p3 = r5
+ if (p3.new) jump:t 0 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 cb 20 5c
+{ p3 = r5
+ if (!p3.new) jump:nt 0 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 db 20 5c
+{ p3 = r5
+ if (!p3.new) jump:t 0 }
+
+# Jump to address conditioned on register value
+# CHECK: 00 c0 11 61
+if (r17!=#0) jump:nt 0
+# CHECK: 00 d0 11 61
+if (r17!=#0) jump:t 0
+# CHECK: 00 c0 51 61
+if (r17>=#0) jump:nt 0
+# CHECK: 00 d0 51 61
+if (r17>=#0) jump:t 0
+# CHECK: 00 c0 91 61
+if (r17==#0) jump:nt 0
+# CHECK: 00 d0 91 61
+if (r17==#0) jump:t 0
+# CHECK: 00 c0 d1 61
+if (r17<=#0) jump:nt 0
+# CHECK: 00 d0 d1 61
+if (r17<=#0) jump:t 0
+
+# Transfer and jump
+# CHECK: 00 d5 09 16
+{ r17 = #21 ; jump 0}
+# CHECK: 00 c9 0d 17
+{ r17 = r21 ; jump 0 }
diff --git a/test/MC/Hexagon/instructions/jr.s b/test/MC/Hexagon/instructions/jr.s
new file mode 100644
index 000000000000..f4f32450f34e
--- /dev/null
+++ b/test/MC/Hexagon/instructions/jr.s
@@ -0,0 +1,38 @@
+# RUN: llvm-mc -triple hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.3 JR
+
+# Call subroutine from register
+# CHECK: 00 c0 b5 50
+callr r21
+# CHECK: 00 c1 15 51
+if (p1) callr r21
+# CHECK: 00 c3 35 51
+if (!p3) callr r21
+
+# Hint an indirect jump address
+# CHECK: 00 c0 b5 52
+hintjr(r21)
+
+# Jump to address from register
+# CHECK: 00 c0 95 52
+jumpr r21
+# CHECK: 00 c1 55 53
+if (p1) jumpr r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 cb 55 53
+{ p3 = r5
+ if (p3.new) jumpr:nt r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 db 55 53
+{ p3 = r5
+ if (p3.new) jumpr:t r21 }
+# CHECK: 00 c3 75 53
+if (!p3) jumpr r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 cb 75 53
+{ p3 = r5
+ if (!p3.new) jumpr:nt r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 db 75 53
+{ p3 = r5
+ if (!p3.new) jumpr:t r21 }
diff --git a/test/MC/Hexagon/instructions/ld.s b/test/MC/Hexagon/instructions/ld.s
new file mode 100644
index 000000000000..2695999aa85f
--- /dev/null
+++ b/test/MC/Hexagon/instructions/ld.s
@@ -0,0 +1,493 @@
+# RUN: llvm-mc -triple hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.5 LD
+
+# CHECK: 02 40 00 00
+# CHECK-NEXT: 10 c5 c0 49
+r17:16 = memd(##168)
+# CHECK: d0 c0 d5 91
+r17:16 = memd(r21 + #48)
+# CHECK: b0 e0 d5 99
+r17:16 = memd(r21 ++ #40:circ(m1))
+# CHECK: 10 e2 d5 99
+r17:16 = memd(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 70 d7 d5 9b
+r17:16 = memd(r21 = ##31)
+# CHECK: b0 c0 d5 9b
+r17:16 = memd(r21++#40)
+# CHECK: 10 e0 d5 9d
+r17:16 = memd(r21++m1)
+# CHECK: 10 e0 d5 9f
+r17:16 = memd(r21 ++ m1:brev)
+
+# Load doubleword conditionally
+# CHECK: f0 ff d5 30
+if (p3) r17:16 = memd(r21+r31<<#3)
+# CHECK: f0 ff d5 31
+if (!p3) r17:16 = memd(r21+r31<<#3)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f0 ff d5 32
+{ p3 = r5
+ if (p3.new) r17:16 = memd(r21+r31<<#3) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f0 ff d5 33
+{ p3 = r5
+ if (!p3.new) r17:16 = memd(r21+r31<<#3) }
+# CHECK: 70 d8 d5 41
+if (p3) r17:16 = memd(r21 + #24)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 70 d8 d5 43
+{ p3 = r5
+ if (p3.new) r17:16 = memd(r21 + #24) }
+# CHECK: 70 d8 d5 45
+if (!p3) r17:16 = memd(r21 + #24)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 70 d8 d5 47
+{ p3 = r5
+ if (!p3.new) r17:16 = memd(r21 + #24) }
+# CHECK: b0 e6 d5 9b
+if (p3) r17:16 = memd(r21++#40)
+# CHECK: b0 ee d5 9b
+if (!p3) r17:16 = memd(r21++#40)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b0 f6 d5 9b
+{ p3 = r5
+ if (p3.new) r17:16 = memd(r21++#40) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b0 fe d5 9b
+{ p3 = r5
+ if (!p3.new) r17:16 = memd(r21++#40) }
+
+# Load byte
+# CHECK: 91 ff 15 3a
+r17 = memb(r21 + r31<<#3)
+# CHECK: b1 c2 00 49
+r17 = memb(#21)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: b1 c2 00 49
+r17 = memb(##21)
+# CHECK: f1 c3 15 91
+r17 = memb(r21 + #31)
+# CHECK: b1 e0 15 99
+r17 = memb(r21 ++ #5:circ(m1))
+# CHECK: 11 e2 15 99
+r17 = memb(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 15 9b
+r17 = memb(r21 = ##31)
+# CHECK: b1 c0 15 9b
+r17 = memb(r21++#5)
+# CHECK: 11 e0 15 9d
+r17 = memb(r21++m1)
+# CHECK: 11 e0 15 9f
+r17 = memb(r21 ++ m1:brev)
+
+# Load byte conditionally
+# CHECK: f1 ff 15 30
+if (p3) r17 = memb(r21+r31<<#3)
+# CHECK: f1 ff 15 31
+if (!p3) r17 = memb(r21+r31<<#3)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 15 32
+{ p3 = r5
+ if (p3.new) r17 = memb(r21+r31<<#3) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 15 33
+{ p3 = r5
+ if (!p3.new) r17 = memb(r21+r31<<#3) }
+# CHECK: 91 dd 15 41
+if (p3) r17 = memb(r21 + #44)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 91 dd 15 43
+{ p3 = r5
+ if (p3.new) r17 = memb(r21 + #44) }
+# CHECK: 91 dd 15 45
+if (!p3) r17 = memb(r21 + #44)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 91 dd 15 47
+{ p3 = r5
+ if (!p3.new) r17 = memb(r21 + #44) }
+# CHECK: b1 e6 15 9b
+if (p3) r17 = memb(r21++#5)
+# CHECK: b1 ee 15 9b
+if (!p3) r17 = memb(r21++#5)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 f6 15 9b
+{ p3 = r5
+ if (p3.new) r17 = memb(r21++#5) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 fe 15 9b
+{ p3 = r5
+ if (!p3.new) r17 = memb(r21++#5) }
+
+# Load byte into shifted vector
+# CHECK: f0 c3 95 90
+r17:16 = memb_fifo(r21 + #31)
+# CHECK: b0 e0 95 98
+r17:16 = memb_fifo(r21 ++ #5:circ(m1))
+# CHECK: 10 e2 95 98
+r17:16 = memb_fifo(r21 ++ I:circ(m1))
+
+# Load half into shifted vector
+# CHECK: f0 c3 55 90
+r17:16 = memh_fifo(r21 + #62)
+# CHECK: b0 e0 55 98
+r17:16 = memh_fifo(r21 ++ #10:circ(m1))
+# CHECK: 10 e2 55 98
+r17:16 = memh_fifo(r21 ++ I:circ(m1))
+
+# Load halfword
+# CHECK: 91 ff 55 3a
+r17 = memh(r21 + r31<<#3)
+# CHECK: b1 c2 40 49
+r17 = memh(#42)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 51 c5 40 49
+r17 = memh(##42)
+# CHECK: f1 c3 55 91
+r17 = memh(r21 + #62)
+# CHECK: b1 e0 55 99
+r17 = memh(r21 ++ #10:circ(m1))
+# CHECK: 11 e2 55 99
+r17 = memh(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 55 9b
+r17 = memh(r21 = ##31)
+# CHECK: b1 c0 55 9b
+r17 = memh(r21++#10)
+# CHECK: 11 e0 55 9d
+r17 = memh(r21++m1)
+# CHECK: 11 e0 55 9f
+r17 = memh(r21 ++ m1:brev)
+
+# Load halfword conditionally
+# CHECK: f1 ff 55 30
+if (p3) r17 = memh(r21+r31<<#3)
+# CHECK: f1 ff 55 31
+if (!p3) r17 = memh(r21+r31<<#3)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 55 32
+{ p3 = r5
+ if (p3.new) r17 = memh(r21+r31<<#3) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 55 33
+{ p3 = r5
+ if (!p3.new) r17 = memh(r21+r31<<#3) }
+# CHECK: b1 e6 55 9b
+if (p3) r17 = memh(r21++#10)
+# CHECK: b1 ee 55 9b
+if (!p3) r17 = memh(r21++#10)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 f6 55 9b
+{ p3 = r5
+ if (p3.new) r17 = memh(r21++#10) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 fe 55 9b
+{ p3 = r5
+ if (!p3.new) r17 = memh(r21++#10) }
+# CHECK: f1 db 55 41
+if (p3) r17 = memh(r21 + #62)
+# CHECK: f1 db 55 45
+if (!p3) r17 = memh(r21 + #62)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 db 55 43
+{ p3 = r5
+ if (p3.new) r17 = memh(r21 + #62) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 db 55 47
+{ p3 = r5
+ if (!p3.new) r17 = memh(r21 + #62) }
+
+# Load unsigned byte
+# CHECK: 91 ff 35 3a
+r17 = memub(r21 + r31<<#3)
+# CHECK: b1 c2 20 49
+r17 = memub(#21)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: b1 c2 20 49
+r17 = memub(##21)
+# CHECK: f1 c3 35 91
+r17 = memub(r21 + #31)
+# CHECK: b1 e0 35 99
+r17 = memub(r21 ++ #5:circ(m1))
+# CHECK: 11 e2 35 99
+r17 = memub(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 35 9b
+r17 = memub(r21 = ##31)
+# CHECK: b1 c0 35 9b
+r17 = memub(r21++#5)
+# CHECK: 11 e0 35 9d
+r17 = memub(r21++m1)
+# CHECK: 11 e0 35 9f
+r17 = memub(r21 ++ m1:brev)
+
+# Load unsigned byte conditionally
+# CHECK: f1 ff 35 30
+if (p3) r17 = memub(r21+r31<<#3)
+# CHECK: f1 ff 35 31
+if (!p3) r17 = memub(r21+r31<<#3)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 35 32
+{ p3 = r5
+ if (p3.new) r17 = memub(r21+r31<<#3) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 35 33
+{ p3 = r5
+ if (!p3.new) r17 = memub(r21+r31<<#3) }
+# CHECK: f1 db 35 41
+if (p3) r17 = memub(r21 + #31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 db 35 43
+{ p3 = r5
+ if (p3.new) r17 = memub(r21 + #31) }
+# CHECK: f1 db 35 45
+if (!p3) r17 = memub(r21 + #31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 db 35 47
+{ p3 = r5
+ if (!p3.new) r17 = memub(r21 + #31) }
+# CHECK: b1 e6 35 9b
+if (p3) r17 = memub(r21++#5)
+# CHECK: b1 ee 35 9b
+if (!p3) r17 = memub(r21++#5)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 f6 35 9b
+{ p3 = r5
+ if (p3.new) r17 = memub(r21++#5) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 fe 35 9b
+{ p3 = r5
+ if (!p3.new) r17 = memub(r21++#5) }
+
+# Load unsigned halfword
+# CHECK: 91 ff 75 3a
+r17 = memuh(r21 + r31<<#3)
+# CHECK: b1 c2 60 49
+r17 = memuh(#42)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 51 c5 60 49
+r17 = memuh(##42)
+# CHECK: b1 c2 75 91
+r17 = memuh(r21 + #42)
+# CHECK: b1 e0 75 99
+r17 = memuh(r21 ++ #10:circ(m1))
+# CHECK: 11 e2 75 99
+r17 = memuh(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 75 9b
+r17 = memuh(r21 = ##31)
+# CHECK: b1 c0 75 9b
+r17 = memuh(r21++#10)
+# CHECK: 11 e0 75 9d
+r17 = memuh(r21++m1)
+# CHECK: 11 e0 75 9f
+r17 = memuh(r21 ++ m1:brev)
+
+# Load unsigned halfword conditionally
+# CHECK: f1 ff 75 30
+if (p3) r17 = memuh(r21+r31<<#3)
+# CHECK: f1 ff 75 31
+if (!p3) r17 = memuh(r21+r31<<#3)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 75 32
+{ p3 = r5
+ if (p3.new) r17 = memuh(r21+r31<<#3) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 75 33
+{ p3 = r5
+ if (!p3.new) r17 = memuh(r21+r31<<#3) }
+# CHECK: b1 da 75 41
+if (p3) r17 = memuh(r21 + #42)
+# CHECK: b1 da 75 45
+if (!p3) r17 = memuh(r21 + #42)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 da 75 43
+{ p3 = r5
+ if (p3.new) r17 = memuh(r21 + #42) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 da 75 47
+{ p3 = r5
+ if (!p3.new) r17 = memuh(r21 + #42) }
+# CHECK: b1 e6 75 9b
+if (p3) r17 = memuh(r21++#10)
+# CHECK: b1 ee 75 9b
+if (!p3) r17 = memuh(r21++#10)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 f6 75 9b
+{ p3 = r5
+ if (p3.new) r17 = memuh(r21++#10) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 fe 75 9b
+{ p3 = r5
+ if (!p3.new) r17 = memuh(r21++#10) }
+
+# Load word
+# CHECK: 91 ff 95 3a
+r17 = memw(r21 + r31<<#3)
+# CHECK: b1 c2 80 49
+r17 = memw(#84)
+# CHECK: 01 40 00 00
+# CHECK-NEXT: 91 c2 80 49
+r17 = memw(##84)
+# CHECK: b1 c2 95 91
+r17 = memw(r21 + #84)
+# CHECK: b1 e0 95 99
+r17 = memw(r21 ++ #20:circ(m1))
+# CHECK: 11 e2 95 99
+r17 = memw(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 95 9b
+r17 = memw(r21 = ##31)
+# CHECK: b1 c0 95 9b
+r17 = memw(r21++#20)
+# CHECK: 11 e0 95 9d
+r17 = memw(r21++m1)
+# CHECK: 11 e0 95 9f
+r17 = memw(r21 ++ m1:brev)
+
+# Load word conditionally
+# CHECK: f1 ff 95 30
+if (p3) r17 = memw(r21+r31<<#3)
+# CHECK: f1 ff 95 31
+if (!p3) r17 = memw(r21+r31<<#3)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 95 32
+{ p3 = r5
+ if (p3.new) r17 = memw(r21+r31<<#3) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 95 33
+{ p3 = r5
+ if (!p3.new) r17 = memw(r21+r31<<#3) }
+# CHECK: b1 da 95 41
+if (p3) r17 = memw(r21 + #84)
+# CHECK: b1 da 95 45
+if (!p3) r17 = memw(r21 + #84)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 da 95 43
+{ p3 = r5
+ if (p3.new) r17 = memw(r21 + #84) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 da 95 47
+{ p3 = r5
+ if (!p3.new) r17 = memw(r21 + #84) }
+# CHECK: b1 e6 95 9b
+if (p3) r17 = memw(r21++#20)
+# CHECK: b1 ee 95 9b
+if (!p3) r17 = memw(r21++#20)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 f6 95 9b
+{ p3 = r5
+ if (p3.new) r17 = memw(r21++#20) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 fe 95 9b
+{ p3 = r5
+ if (!p3.new) r17 = memw(r21++#20) }
+
+# Deallocate stack frame
+# CHECK: 1e c0 1e 90
+deallocframe
+
+# Deallocate stack frame and return
+# CHECK: 1e c0 1e 96
+dealloc_return
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1e cb 1e 96
+{ p3 = r5
+ if (p3.new) dealloc_return:nt }
+# CHECK: 1e d3 1e 96
+if (p3) dealloc_return
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1e db 1e 96
+{ p3 = r5
+ if (p3.new) dealloc_return:t }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1e eb 1e 96
+{ p3 = r5
+ if (!p3.new) dealloc_return:nt }
+# CHECK: 1e f3 1e 96
+if (!p3) dealloc_return
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1e fb 1e 96
+{ p3 = r5
+ if (!p3.new) dealloc_return:t }
+
+# Load and unpack bytes to halfwords
+# CHECK: f1 c3 35 90
+r17 = membh(r21 + #62)
+# CHECK: f1 c3 75 90
+r17 = memubh(r21 + #62)
+# CHECK: f0 c3 b5 90
+r17:16 = memubh(r21 + #124)
+# CHECK: f0 c3 f5 90
+r17:16 = membh(r21 + #124)
+# CHECK: b1 e0 35 98
+r17 = membh(r21 ++ #10:circ(m1))
+# CHECK: 11 e2 35 98
+r17 = membh(r21 ++ I:circ(m1))
+# CHECK: b1 e0 75 98
+r17 = memubh(r21 ++ #10:circ(m1))
+# CHECK: 11 e2 75 98
+r17 = memubh(r21 ++ I:circ(m1))
+# CHECK: b0 e0 f5 98
+r17:16 = membh(r21 ++ #20:circ(m1))
+# CHECK: 10 e2 f5 98
+r17:16 = membh(r21 ++ I:circ(m1))
+# CHECK: b0 e0 b5 98
+r17:16 = memubh(r21 ++ #20:circ(m1))
+# CHECK: 10 e2 b5 98
+r17:16 = memubh(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 35 9a
+r17 = membh(r21 = ##31)
+# CHECK: b1 c0 35 9a
+r17 = membh(r21++#10)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 75 9a
+r17 = memubh(r21 = ##31)
+# CHECK: b1 c0 75 9a
+r17 = memubh(r21++#10)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 70 d7 b5 9a
+r17:16 = memubh(r21 = ##31)
+# CHECK: b0 c0 b5 9a
+r17:16 = memubh(r21++#20)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 70 d7 f5 9a
+r17:16 = membh(r21 = ##31)
+# CHECK: b0 c0 f5 9a
+r17:16 = membh(r21++#20)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: f1 f7 35 9c
+r17 = membh(r21<<#3 + ##31)
+# CHECK: 11 e0 35 9c
+r17 = membh(r21++m1)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: f1 f7 75 9c
+r17 = memubh(r21<<#3 + ##31)
+# CHECK: 11 e0 75 9c
+r17 = memubh(r21++m1)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: f0 f7 f5 9c
+r17:16 = membh(r21<<#3 + ##31)
+# CHECK: 10 e0 f5 9c
+r17:16 = membh(r21++m1)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: f0 f7 b5 9c
+r17:16 = memubh(r21<<#3 + ##31)
+# CHECK: 11 e0 35 9c
+r17 = membh(r21++m1)
+# CHECK: 11 e0 75 9c
+r17 = memubh(r21++m1)
+# CHECK: 10 e0 f5 9c
+r17:16 = membh(r21++m1)
+# CHECK: 10 e0 b5 9c
+r17:16 = memubh(r21++m1)
+# CHECK: 11 e0 35 9e
+r17 = membh(r21 ++ m1:brev)
+# CHECK: 11 e0 75 9e
+r17 = memubh(r21 ++ m1:brev)
+# CHECK: 10 e0 b5 9e
+r17:16 = memubh(r21 ++ m1:brev)
+# CHECK: 10 e0 f5 9e
+r17:16 = membh(r21 ++ m1:brev)
diff --git a/test/MC/Hexagon/instructions/memop.s b/test/MC/Hexagon/instructions/memop.s
new file mode 100644
index 000000000000..1aac69056b17
--- /dev/null
+++ b/test/MC/Hexagon/instructions/memop.s
@@ -0,0 +1,56 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.6 MEMOP
+
+# Operation on memory byte
+# CHECK: 95 d9 11 3e
+memb(r17+#51) += r21
+# CHECK: b5 d9 11 3e
+memb(r17+#51) -= r21
+# CHECK: d5 d9 11 3e
+memb(r17+#51) &= r21
+# CHECK: f5 d9 11 3e
+memb(r17+#51) |= r21
+# CHECK: 95 d9 11 3f
+memb(r17+#51) += #21
+# CHECK: b5 d9 11 3f
+memb(r17+#51) -= #21
+# CHECK: d5 d9 11 3f
+memb(r17+#51) = clrbit(#21)
+# CHECK: f5 d9 11 3f
+memb(r17+#51) = setbit(#21)
+
+# Operation on memory halfword
+# CHECK: 95 d9 31 3e
+memh(r17+#102) += r21
+# CHECK: b5 d9 31 3e
+memh(r17+#102) -= r21
+# CHECK: d5 d9 31 3e
+memh(r17+#102) &= r21
+# CHECK: f5 d9 31 3e
+memh(r17+#102) |= r21
+# CHECK: 95 d9 31 3f
+memh(r17+#102) += #21
+# CHECK: b5 d9 31 3f
+memh(r17+#102) -= #21
+# CHECK: d5 d9 31 3f
+memh(r17+#102) = clrbit(#21)
+# CHECK: f5 d9 31 3f
+memh(r17+#102) = setbit(#21)
+
+# Operation on memory word
+# CHECK: 95 d9 51 3e
+memw(r17+#204) += r21
+# CHECK: b5 d9 51 3e
+memw(r17+#204) -= r21
+# CHECK: d5 d9 51 3e
+memw(r17+#204) &= r21
+# CHECK: f5 d9 51 3e
+memw(r17+#204) |= r21
+# CHECK: 95 d9 51 3f
+memw(r17+#204) += #21
+# CHECK: b5 d9 51 3f
+memw(r17+#204) -= #21
+# CHECK: d5 d9 51 3f
+memw(r17+#204) = clrbit(#21)
+# CHECK: f5 d9 51 3f
+memw(r17+#204) = setbit(#21)
diff --git a/test/MC/Hexagon/instructions/nv_j.s b/test/MC/Hexagon/instructions/nv_j.s
new file mode 100644
index 000000000000..5bc75c5a964d
--- /dev/null
+++ b/test/MC/Hexagon/instructions/nv_j.s
@@ -0,0 +1,180 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.7.1 NV/J
+
+# Jump to address conditioned on new register value
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 02 20
+{ r17 = r17
+ if (cmp.eq(r17.new, r21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 02 20
+{ r17 = r17
+ if (cmp.eq(r17.new, r21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 42 20
+{ r17 = r17
+ if (!cmp.eq(r17.new, r21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 42 20
+{ r17 = r17
+ if (!cmp.eq(r17.new, r21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 82 20
+{ r17 = r17
+ if (cmp.gt(r17.new, r21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 82 20
+{ r17 = r17
+ if (cmp.gt(r17.new, r21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 c2 20
+{ r17 = r17
+ if (!cmp.gt(r17.new, r21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 c2 20
+{ r17 = r17
+ if (!cmp.gt(r17.new, r21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 02 21
+{ r17 = r17
+ if (cmp.gtu(r17.new, r21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 02 21
+{ r17 = r17
+ if (cmp.gtu(r17.new, r21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 42 21
+{ r17 = r17
+ if (!cmp.gtu(r17.new, r21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 42 21
+{ r17 = r17
+ if (!cmp.gtu(r17.new, r21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 82 21
+{ r17 = r17
+ if (cmp.gt(r21, r17.new)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 82 21
+{ r17 = r17
+ if (cmp.gt(r21, r17.new)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 c2 21
+{ r17 = r17
+ if (!cmp.gt(r21, r17.new)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 c2 21
+{ r17 = r17
+ if (!cmp.gt(r21, r17.new)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 02 22
+{ r17 = r17
+ if (cmp.gtu(r21, r17.new)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 02 22
+{ r17 = r17
+ if (cmp.gtu(r21, r17.new)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 42 22
+{ r17 = r17
+ if (!cmp.gtu(r21, r17.new)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 42 22
+{ r17 = r17
+ if (!cmp.gtu(r21, r17.new)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 02 24
+{ r17 = r17
+ if (cmp.eq(r17.new, #21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 02 24
+{ r17 = r17
+ if (cmp.eq(r17.new, #21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 42 24
+{ r17 = r17
+ if (!cmp.eq(r17.new, #21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 42 24
+{ r17 = r17
+ if (!cmp.eq(r17.new, #21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 82 24
+{ r17 = r17
+ if (cmp.gt(r17.new, #21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 82 24
+{ r17 = r17
+ if (cmp.gt(r17.new, #21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 c2 24
+{ r17 = r17
+ if (!cmp.gt(r17.new, #21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 c2 24
+{ r17 = r17
+ if (!cmp.gt(r17.new, #21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 02 25
+{ r17 = r17
+ if (cmp.gtu(r17.new, #21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 02 25
+{ r17 = r17
+ if (cmp.gtu(r17.new, #21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 42 25
+{ r17 = r17
+ if (!cmp.gtu(r17.new, #21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 42 25
+{ r17 = r17
+ if (!cmp.gtu(r17.new, #21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 c0 82 25
+{ r17 = r17
+ if (tstbit(r17.new, #0)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 e0 82 25
+{ r17 = r17
+ if (tstbit(r17.new, #0)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 c0 c2 25
+{ r17 = r17
+ if (!tstbit(r17.new, #0)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 e0 c2 25
+{ r17 = r17
+ if (!tstbit(r17.new, #0)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 c0 02 26
+{ r17 = r17
+ if (cmp.eq(r17.new, #-1)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 e0 02 26
+{ r17 = r17
+ if (cmp.eq(r17.new, #-1)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 c0 42 26
+{ r17 = r17
+ if (!cmp.eq(r17.new, #-1)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 e0 42 26
+{ r17 = r17
+ if (!cmp.eq(r17.new, #-1)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 c0 82 26
+{ r17 = r17
+ if (cmp.gt(r17.new, #-1)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 e0 82 26
+{ r17 = r17
+ if (cmp.gt(r17.new, #-1)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 c0 c2 26
+{ r17 = r17
+ if (!cmp.gt(r17.new, #-1)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 e0 c2 26
+{ r17 = r17
+ if (!cmp.gt(r17.new, #-1)) jump:t 0x0 }
diff --git a/test/MC/Hexagon/instructions/nv_st.s b/test/MC/Hexagon/instructions/nv_st.s
new file mode 100644
index 000000000000..4ff490024a82
--- /dev/null
+++ b/test/MC/Hexagon/instructions/nv_st.s
@@ -0,0 +1,290 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.7.2 NV/ST
+
+# Store new-value byte
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 82 f5 b1 3b
+{ r31 = r31
+ memb(r17 + r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 11 c2 a0 48
+{ r31 = r31
+ memb(#17) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 15 c2 b1 a1
+{ r31 = r31
+ memb(r17+#21) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 02 e2 b1 a9
+{ r31 = r31
+ memb(r17 ++ I:circ(m1)) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 28 e2 b1 a9
+{ r31 = r31
+ memb(r17 ++ #5:circ(m1)) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 28 c2 b1 ab
+{ r31 = r31
+ memb(r17++#5) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 00 e2 b1 ad
+{ r31 = r31
+ memb(r17++m1) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 00 e2 b1 af
+{ r31 = r31
+ memb(r17 ++ m1:brev) = r31.new }
+
+# Store new-value byte conditionally
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: e2 f5 b1 34
+{ r31 = r31
+ if (p3) memb(r17+r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: e2 f5 b1 35
+{ r31 = r31
+ if (!p3) memb(r17+r21<<#3) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: e2 f5 b1 36
+{ p3 = r5
+ r31 = r31
+ if (p3.new) memb(r17+r21<<#3) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: e2 f5 b1 37
+{ p3 = r5
+ r31 = r31
+ if (!p3.new) memb(r17+r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ab c2 b1 40
+{ r31 = r31
+ if (p3) memb(r17+#21) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ab c2 b1 44
+{ r31 = r31
+ if (!p3) memb(r17+#21) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab c2 b1 42
+{ p3 = r5
+ r31 = r31
+ if (p3.new) memb(r17+#21) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab c2 b1 46
+{ p3 = r5
+ r31 = r31
+ if (!p3.new) memb(r17+#21) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 2b e2 b1 ab
+{ r31 = r31
+ if (p3) memb(r17++#5) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 2f e2 b1 ab
+{ r31 = r31
+ if (!p3) memb(r17++#5) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab e2 b1 ab
+{ p3 = r5
+ r31 = r31
+ if (p3.new) memb(r17++#5) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: af e2 b1 ab
+{ p3 = r5
+ r31 = r31
+ if (!p3.new) memb(r17++#5) = r31.new }
+
+# Store new-value halfword
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 8a f5 b1 3b
+{ r31 = r31
+ memh(r17 + r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 15 ca a0 48
+{ r31 = r31
+ memh(#42) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 15 ca b1 a1
+{ r31 = r31
+ memh(r17+#42) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 02 ea b1 a9
+{ r31 = r31
+ memh(r17 ++ I:circ(m1)) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 28 ea b1 a9
+{ r31 = r31
+ memh(r17 ++ #10:circ(m1)) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 28 ca b1 ab
+{ r31 = r31
+ memh(r17++#10) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 00 ea b1 ad
+{ r31 = r31
+ memh(r17++m1) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 00 ea b1 af
+{ r31 = r31
+ memh(r17 ++ m1:brev) = r31.new }
+
+# Store new-value halfword conditionally
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ea f5 b1 34
+{ r31 = r31
+ if (p3) memh(r17+r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ea f5 b1 35
+{ r31 = r31
+ if (!p3) memh(r17+r21<<#3) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ea f5 b1 36
+{ p3 = r5
+ r31 = r31
+ if (p3.new) memh(r17+r21<<#3) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ea f5 b1 37
+{ p3 = r5
+ r31 = r31
+ if (!p3.new) memh(r17+r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ab ca b1 40
+{ r31 = r31
+ if (p3) memh(r17+#42) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ab ca b1 44
+{ r31 = r31
+ if (!p3) memh(r17+#42) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab ca b1 42
+{ p3 = r5
+ r31 = r31
+ if (p3.new) memh(r17+#42) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab ca b1 46
+{ p3 = r5
+ r31 = r31
+ if (!p3.new) memh(r17+#42) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 2b ea b1 ab
+{ r31 = r31
+ if (p3) memh(r17++#10) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 2f ea b1 ab
+{ r31 = r31
+ if (!p3) memh(r17++#10) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab ea b1 ab
+{ p3 = r5
+ r31 = r31
+ if (p3.new) memh(r17++#10) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: af ea b1 ab
+{ p3 = r5
+ r31 = r31
+ if (!p3.new) memh(r17++#10) = r31.new }
+
+# Store new-value word
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 92 f5 b1 3b
+{ r31 = r31
+ memw(r17 + r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 15 d2 a0 48
+{ r31 = r31
+ memw(#84) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 15 d2 b1 a1
+{ r31 = r31
+ memw(r17+#84) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 02 f2 b1 a9
+{ r31 = r31
+ memw(r17 ++ I:circ(m1)) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 28 f2 b1 a9
+{ r31 = r31
+ memw(r17 ++ #20:circ(m1)) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 28 d2 b1 ab
+{ r31 = r31
+ memw(r17++#20) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 00 f2 b1 ad
+{ r31 = r31
+ memw(r17++m1) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 00 f2 b1 af
+{ r31 = r31
+ memw(r17 ++ m1:brev) = r31.new }
+
+# Store new-value word conditionally
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: f2 f5 b1 34
+{ r31 = r31
+ if (p3) memw(r17+r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: f2 f5 b1 35
+{ r31 = r31
+ if (!p3) memw(r17+r21<<#3) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: f2 f5 b1 36
+{ p3 = r5
+ r31 = r31
+ if (p3.new) memw(r17+r21<<#3) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: f2 f5 b1 37
+{ p3 = r5
+ r31 = r31
+ if (!p3.new) memw(r17+r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ab d2 b1 40
+{ r31 = r31
+ if (p3) memw(r17+#84) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ab d2 b1 44
+{ r31 = r31
+ if (!p3) memw(r17+#84) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab d2 b1 42
+{ p3 = r5
+ r31 = r31
+ if (p3.new) memw(r17+#84) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab d2 b1 46
+{ p3 = r5
+ r31 = r31
+ if (!p3.new) memw(r17+#84) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 2b f2 b1 ab
+{ r31 = r31
+ if (p3) memw(r17++#20) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 2f f2 b1 ab
+{ r31 = r31
+ if (!p3) memw(r17++#20) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab f2 b1 ab
+{ p3 = r5
+ r31 = r31
+ if (p3.new) memw(r17++#20) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: af f2 b1 ab
+{ p3 = r5
+ r31 = r31
+ if (!p3.new) memw(r17++#20) = r31.new }
diff --git a/test/MC/Hexagon/instructions/st.s b/test/MC/Hexagon/instructions/st.s
new file mode 100644
index 000000000000..3b5e8ee18100
--- /dev/null
+++ b/test/MC/Hexagon/instructions/st.s
@@ -0,0 +1,434 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.8 ST
+
+# Store doubleword
+# CHECK: 9e f5 d1 3b
+memd(r17 + r21<<#3) = r31:30
+# CHECK: 28 d4 c0 48
+memd(#320) = r21:20
+# CHECK: 02 40 00 00
+# CHECK-NEXT: 28 d4 c0 48
+memd(##168) = r21:20
+memd(r17+#168) = r21:20
+# CHECK: 02 f4 d1 a9
+memd(r17 ++ I:circ(m1)) = r21:20
+# CHECK: 28 f4 d1 a9
+memd(r17 ++ #40:circ(m1)) = r21:20
+# CHECK: 28 d4 d1 ab
+memd(r17++#40) = r21:20
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d5 fe d1 ad
+memd(r17<<#3 + ##21) = r31:30
+memd(r17++m1) = r21:20
+# CHECK: 00 f4 d1 af
+memd(r17 ++ m1:brev) = r21:20
+
+# Store doubleword conditionally
+# CHECK: fe f5 d1 34
+if (p3) memd(r17+r21<<#3) = r31:30
+# CHECK: fe f5 d1 35
+if (!p3) memd(r17+r21<<#3) = r31:30
+# CHECK: 03 40 45 85
+# CHECK-NEXT: fe f5 d1 36
+{ p3 = r5
+ if (p3.new) memd(r17+r21<<#3) = r31:30 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: fe f5 d1 37
+{ p3 = r5
+ if (!p3.new) memd(r17+r21<<#3) = r31:30 }
+# CHECK: ab de d1 40
+if (p3) memd(r17+#168) = r31:30
+# CHECK: ab de d1 44
+if (!p3) memd(r17+#168) = r31:30
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab de d1 42
+{ p3 = r5
+ if (p3.new) memd(r17+#168) = r31:30 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab de d1 46
+{ p3 = r5
+ if (!p3.new) memd(r17+#168) = r31:30 }
+# CHECK: 2b f4 d1 ab
+if (p3) memd(r17++#40) = r21:20
+# CHECK: 2f f4 d1 ab
+if (!p3) memd(r17++#40) = r21:20
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab f4 d1 ab
+{ p3 = r5
+ if (p3.new) memd(r17++#40) = r21:20 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: af f4 d1 ab
+{ p3 = r5
+ if (!p3.new) memd(r17++#40) = r21:20 }
+# CHECK: 02 40 00 00
+# CHECK-NEXT: c3 d4 c2 af
+if (p3) memd(##168) = r21:20
+# CHECK: 02 40 00 00
+# CHECK-NEXT: c7 d4 c2 af
+if (!p3) memd(##168) = r21:20
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 02 40 00 00
+# CHECK-NEXT: c3 f4 c2 af
+{ p3 = r5
+ if (p3.new) memd(##168) = r21:20 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 02 40 00 00
+# CHECK-NEXT: c7 f4 c2 af
+{ p3 = r5
+ if (!p3.new) memd(##168) = r21:20 }
+
+# Store byte
+# CHECK: 9f f5 11 3b
+memb(r17 + r21<<#3) = r31
+# CHECK: 9f ca 11 3c
+memb(r17+#21)=#31
+# CHECK: 15 d5 00 48
+memb(#21) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 15 d5 00 48
+memb(##21) = r21
+# CHECK: 15 d5 11 a1
+memb(r17+#21) = r21
+# CHECK: 02 f5 11 a9
+memb(r17 ++ I:circ(m1)) = r21
+# CHECK: 28 f5 11 a9
+memb(r17 ++ #5:circ(m1)) = r21
+# CHECK: 28 d5 11 ab
+memb(r17++#5) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d5 ff 11 ad
+memb(r17<<#3 + ##21) = r31
+# CHECK: 00 f5 11 ad
+memb(r17++m1) = r21
+# CHECK: 00 f5 11 af
+memb(r17 ++ m1:brev) = r21
+
+# Store byte conditionally
+# CHECK: ff f5 11 34
+if (p3) memb(r17+r21<<#3) = r31
+# CHECK: ff f5 11 35
+if (!p3) memb(r17+r21<<#3) = r31
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 11 36
+{ p3 = r5
+ if (p3.new) memb(r17+r21<<#3) = r31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 11 37
+{ p3 = r5
+ if (!p3.new) memb(r17+r21<<#3) = r31 }
+# CHECK: ff ca 11 38
+if (p3) memb(r17+#21)=#31
+# CHECK: ff ca 91 38
+if (!p3) memb(r17+#21)=#31
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff ca 11 39
+{ p3 = r5
+ if (p3.new) memb(r17+#21)=#31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff ca 91 39
+{ p3 = r5
+ if (!p3.new) memb(r17+#21)=#31 }
+# CHECK: ab df 11 40
+if (p3) memb(r17+#21) = r31
+# CHECK: ab df 11 44
+if (!p3) memb(r17+#21) = r31
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab df 11 42
+{ p3 = r5
+ if (p3.new) memb(r17+#21) = r31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab df 11 46
+{ p3 = r5
+ if (!p3.new) memb(r17+#21) = r31 }
+# CHECK: 2b f5 11 ab
+if (p3) memb(r17++#5) = r21
+# CHECK: 2f f5 11 ab
+if (!p3) memb(r17++#5) = r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab f5 11 ab
+{ p3 = r5
+ if (p3.new) memb(r17++#5) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: af f5 11 ab
+{ p3 = r5
+ if (!p3.new) memb(r17++#5) = r21 }
+# CHECK: 00 40 00 00
+# CHECK-NEXT: ab d5 01 af
+if (p3) memb(##21) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: af d5 01 af
+if (!p3) memb(##21) = r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 40 00 00
+# CHECK-NEXT: ab f5 01 af
+{ p3 = r5
+ if (p3.new) memb(##21) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 40 00 00
+# CHECK-NEXT: af f5 01 af
+{ p3 = r5
+ if (!p3.new) memb(##21) = r21 }
+
+# Store halfword
+# CHECK: 9f f5 51 3b
+memh(r17 + r21<<#3) = r31
+# CHECK: 9f f5 71 3b
+memh(r17 + r21<<#3) = r31.h
+# CHECK: 95 cf 31 3c
+memh(r17+#62)=#21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 2a d5 40 48
+memh(##42) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 2a d5 60 48
+memh(##42) = r21.h
+# CHECK: 2a d5 40 48
+memh(#84) = r21
+# CHECK: 2a d5 60 48
+memh(#84) = r21.h
+# CHECK: 15 df 51 a1
+memh(r17+#42) = r31
+# CHECK: 15 df 71 a1
+memh(r17+#42) = r31.h
+# CHECK: 02 f5 51 a9
+memh(r17 ++ I:circ(m1)) = r21
+# CHECK: 28 f5 51 a9
+memh(r17 ++ #10:circ(m1)) = r21
+# CHECK: 02 f5 71 a9
+memh(r17 ++ I:circ(m1)) = r21.h
+# CHECK: 28 f5 71 a9
+memh(r17 ++ #10:circ(m1)) = r21.h
+# CHECK: 28 d5 51 ab
+memh(r17++#10) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d5 ff 51 ad
+memh(r17<<#3 + ##21) = r31
+# CHECK: 28 d5 71 ab
+memh(r17++#10) = r21.h
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d5 ff 71 ad
+memh(r17<<#3 + ##21) = r31.h
+# CHECK: 00 f5 51 ad
+memh(r17++m1) = r21
+# CHECK: 00 f5 71 ad
+memh(r17++m1) = r21.h
+# CHECK: 00 f5 51 af
+memh(r17 ++ m1:brev) = r21
+# CHECK: 00 f5 71 af
+memh(r17 ++ m1:brev) = r21.h
+
+# Store halfword conditionally
+# CHECK: ff f5 51 34
+if (p3) memh(r17+r21<<#3) = r31
+# CHECK: ff f5 71 34
+if (p3) memh(r17+r21<<#3) = r31.h
+# CHECK: ff f5 51 35
+if (!p3) memh(r17+r21<<#3) = r31
+# CHECK: ff f5 71 35
+if (!p3) memh(r17+r21<<#3) = r31.h
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 51 36
+{ p3 = r5
+ if (p3.new) memh(r17+r21<<#3) = r31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 71 36
+{ p3 = r5
+ if (p3.new) memh(r17+r21<<#3) = r31.h }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 51 37
+{ p3 = r5
+ if (!p3.new) memh(r17+r21<<#3) = r31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 71 37
+{ p3 = r5
+ if (!p3.new) memh(r17+r21<<#3) = r31.h }
+# CHECK: f5 cf 31 38
+if (p3) memh(r17+#62)=#21
+# CHECK: f5 cf b1 38
+if (!p3) memh(r17+#62)=#21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f5 cf 31 39
+{ p3 = r5
+ if (p3.new) memh(r17+#62)=#21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f5 cf b1 39
+{ p3 = r5
+ if (!p3.new) memh(r17+#62)=#21 }
+# CHECK: fb d5 51 40
+if (p3) memh(r17+#62) = r21
+# CHECK: fb d5 71 40
+if (p3) memh(r17+#62) = r21.h
+# CHECK: fb d5 51 44
+if (!p3) memh(r17+#62) = r21
+# CHECK: fb d5 71 44
+if (!p3) memh(r17+#62) = r21.h
+# CHECK: 03 40 45 85
+# CHECK-NEXT: fb d5 51 42
+{ p3 = r5
+ if (p3.new) memh(r17+#62) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: fb d5 71 42
+{ p3 = r5
+ if (p3.new) memh(r17+#62) = r21.h }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: fb d5 51 46
+{ p3 = r5
+ if (!p3.new) memh(r17+#62) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: fb d5 71 46
+{ p3 = r5
+ if (!p3.new) memh(r17+#62) = r21.h }
+# CHECK: 2b f5 51 ab
+if (p3) memh(r17++#10) = r21
+# CHECK: 2f f5 51 ab
+if (!p3) memh(r17++#10) = r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab f5 51 ab
+{ p3 = r5
+ if (p3.new) memh(r17++#10) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: af f5 51 ab
+{ p3 = r5
+ if (!p3.new) memh(r17++#10) = r21 }
+# CHECK: 2b f5 71 ab
+if (p3) memh(r17++#10) = r21.h
+# CHECK: 2f f5 71 ab
+if (!p3) memh(r17++#10) = r21.h
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab f5 71 ab
+{ p3 = r5
+ if (p3.new) memh(r17++#10) = r21.h }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: af f5 71 ab
+{ p3 = r5
+ if (!p3.new) memh(r17++#10) = r21.h }
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d3 d5 42 af
+if (p3) memh(##42) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d3 d5 62 af
+if (p3) memh(##42) = r21.h
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d7 d5 42 af
+if (!p3) memh(##42) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d7 d5 62 af
+if (!p3) memh(##42) = r21.h
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 40 00 00
+# CHECK-NEXT: d3 f5 42 af
+{ p3 = r5
+ if (p3.new) memh(##42) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 40 00 00
+# CHECK-NEXT: d3 f5 62 af
+{ p3 = r5
+ if (p3.new) memh(##42) = r21.h }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 40 00 00
+# CHECK-NEXT: d7 f5 42 af
+{ p3 = r5
+ if (!p3.new) memh(##42) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 40 00 00
+# CHECK-NEXT: d7 f5 62 af
+{ p3 = r5
+ if (!p3.new) memh(##42) = r21.h }
+
+# Store word
+# CHECK: 9f f5 91 3b
+memw(r17 + r21<<#3) = r31
+# CHECK: 9f ca 51 3c
+memw(r17+#84)=#31
+# CHECK: 15 df 80 48
+memw(#84) = r31
+# CHECK: 01 40 00 00
+# CHECK-NEXT: 14 d5 80 48
+memw(##84) = r21
+# CHECK: 9f ca 51 3c
+memw(r17+#84)=#31
+# CHECK: 15 df 91 a1
+memw(r17+#84) = r31
+# CHECK: 02 f5 91 a9
+memw(r17 ++ I:circ(m1)) = r21
+# CHECK: 28 f5 91 a9
+memw(r17 ++ #20:circ(m1)) = r21
+# CHECK: 28 d5 91 ab
+memw(r17++#20) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d5 ff 91 ad
+memw(r17<<#3 + ##21) = r31
+# CHECK: 00 f5 91 ad
+memw(r17++m1) = r21
+# CHECK: 00 f5 91 af
+memw(r17 ++ m1:brev) = r21
+
+# Store word conditionally
+# CHECK: ff f5 91 34
+if (p3) memw(r17+r21<<#3) = r31
+# CHECK: ff f5 91 35
+if (!p3) memw(r17+r21<<#3) = r31
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 91 36
+{ p3 = r5
+ if (p3.new) memw(r17+r21<<#3) = r31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 91 37
+{ p3 = r5
+ if (!p3.new) memw(r17+r21<<#3) = r31 }
+# CHECK: ff ca 51 38
+if (p3) memw(r17+#84)=#31
+# CHECK: ff ca d1 38
+if (!p3) memw(r17+#84)=#31
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff ca 51 39
+{ p3 = r5
+ if (p3.new) memw(r17+#84)=#31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff ca d1 39
+{ p3 = r5
+ if (!p3.new) memw(r17+#84)=#31 }
+# CHECK: ab df 91 40
+if (p3) memw(r17+#84) = r31
+# CHECK: ab df 91 44
+if (!p3) memw(r17+#84) = r31
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab df 91 42
+{ p3 = r5
+ if (p3.new) memw(r17+#84) = r31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab df 91 46
+{ p3 = r5
+ if (!p3.new) memw(r17+#84) = r31 }
+# CHECK: 2b f5 91 ab
+if (p3) memw(r17++#20) = r21
+# CHECK: 2f f5 91 ab
+if (!p3) memw(r17++#20) = r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: af f5 91 ab
+{ p3 = r5
+ if (!p3.new) memw(r17++#20) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab f5 91 ab
+{ p3 = r5
+ if (p3.new) memw(r17++#20) = r21 }
+# CHECK: 01 40 00 00
+# CHECK-NEXT: a3 d5 81 af
+if (p3) memw(##84) = r21
+# CHECK: 01 40 00 00
+# CHECK-NEXT: a7 d5 81 af
+if (!p3) memw(##84) = r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 01 40 00 00
+# CHECK-NEXT: a3 f5 81 af
+{ p3 = r5
+ if (p3.new) memw(##84) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 01 40 00 00
+# CHECK-NEXT: a7 f5 81 af
+{ p3 = r5
+ if (!p3.new) memw(##84) = r21 }
+
+# Allocate stack frame
+# CHECK: 1f c0 9d a0
+allocframe(#248)
diff --git a/test/MC/Hexagon/instructions/system_user.s b/test/MC/Hexagon/instructions/system_user.s
new file mode 100644
index 000000000000..d52f8b41182a
--- /dev/null
+++ b/test/MC/Hexagon/instructions/system_user.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.9.1 SYSTEM/USER
+
+# Load locked
+# CHECK: 11 c0 15 92
+r17 = memw_locked(r21)
+# CHECK: 10 d0 15 92
+r17:16 = memd_locked(r21)
+
+# Store conditional
+# CHECK: 03 d5 b1 a0
+memw_locked(r17, p3) = r21
+# CHECK: 03 d4 f1 a0
+memd_locked(r17, p3) = r21:20
+
+# Memory barrier
+# CHECK: 00 c0 00 a8
+barrier
+
+# Data cache prefetch
+# CHECK: 15 c0 11 94
+dcfetch(r17 + #168)
+
+# Send value to ETM trace
+# CHECK: 00 c0 51 62
+trace(r17)
diff --git a/test/MC/Hexagon/instructions/xtype_alu.s b/test/MC/Hexagon/instructions/xtype_alu.s
new file mode 100644
index 000000000000..b68175ed1e23
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_alu.s
@@ -0,0 +1,395 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.1 XTYPE/ALU
+
+# Absolute value doubleword
+# CHECK: d0 c0 94 80
+r17:16 = abs(r21:20)
+# CHECK: 91 c0 95 8c
+r17 = abs(r21)
+# CHECK: b1 c0 95 8c
+r17 = abs(r21):sat
+
+# Add and accumulate
+# CHECK: ff d1 35 db
+r17 = add(r21, add(r31, #23))
+# CHECK: ff d1 b5 db
+r17 = add(r21, sub(#23, r31))
+# CHECK: f1 c2 15 e2
+r17 += add(r21, #23)
+# CHECK: f1 c2 95 e2
+r17 -= add(r21, #23)
+# CHECK: 31 df 15 ef
+r17 += add(r21, r31)
+# CHECK: 31 df 95 ef
+r17 -= add(r21, r31)
+
+# Add doublewords
+# CHECK: f0 de 14 d3
+r17:16 = add(r21:20, r31:30)
+# CHECK: b0 de 74 d3
+r17:16 = add(r21:20, r31:30):sat
+# CHECK: d0 de 74 d3
+r17:16 = add(r21:20, r31:30):raw:lo
+# CHECK: f0 de 74 d3
+r17:16 = add(r21:20, r31:30):raw:hi
+
+# Add halfword
+# CHECK: 11 d5 1f d5
+r17 = add(r21.l, r31.l)
+# CHECK: 51 d5 1f d5
+r17 = add(r21.l, r31.h)
+# CHECK: 91 d5 1f d5
+r17 = add(r21.l, r31.l):sat
+# CHECK: d1 d5 1f d5
+r17 = add(r21.l, r31.h):sat
+# CHECK: 11 d5 5f d5
+r17 = add(r21.l, r31.l):<<16
+# CHECK: 31 d5 5f d5
+r17 = add(r21.l, r31.h):<<16
+# CHECK: 51 d5 5f d5
+r17 = add(r21.h, r31.l):<<16
+# CHECK: 71 d5 5f d5
+r17 = add(r21.h, r31.h):<<16
+# CHECK: 91 d5 5f d5
+r17 = add(r21.l, r31.l):sat:<<16
+# CHECK: b1 d5 5f d5
+r17 = add(r21.l, r31.h):sat:<<16
+# CHECK: d1 d5 5f d5
+r17 = add(r21.h, r31.l):sat:<<16
+# CHECK: f1 d5 5f d5
+r17 = add(r21.h, r31.h):sat:<<16
+
+# Add or subtract doublewords with carry
+# CHECK: 70 de d4 c2
+r17:16 = add(r21:20, r31:30, p3):carry
+# CHECK: 70 de f4 c2
+r17:16 = sub(r21:20, r31:30, p3):carry
+
+# Logical doublewords
+# CHECK: 90 c0 94 80
+r17:16 = not(r21:20)
+# CHECK: 10 de f4 d3
+r17:16 = and(r21:20, r31:30)
+# CHECK: 30 d4 fe d3
+r17:16 = and(r21:20, ~r31:30)
+# CHECK: 50 de f4 d3
+r17:16 = or(r21:20, r31:30)
+# CHECK: 70 d4 fe d3
+r17:16 = or(r21:20, ~r31:30)
+# CHECK: 90 de f4 d3
+r17:16 = xor(r21:20, r31:30)
+
+# Logical-logical doublewords
+# CHECK: 10 de 94 ca
+r17:16 ^= xor(r21:20, r31:30)
+
+# Logical-logical words
+# CHECK: f1 c3 15 da
+r17 |= and(r21, #31)
+# CHECK: f5 c3 51 da
+r17 = or(r21, and(r17, #31))
+# CHECK: f1 c3 95 da
+r17 |= or(r21, #31)
+# CHECK: 11 df 35 ef
+r17 |= and(r21, ~r31)
+# CHECK: 31 df 35 ef
+r17 &= and(r21, ~r31)
+# CHECK: 51 df 35 ef
+r17 ^= and(r21, ~r31)
+# CHECK: 11 df 55 ef
+r17 &= and(r21, r31)
+# CHECK: 31 df 55 ef
+r17 &= or(r21, r31)
+# CHECK: 51 df 55 ef
+r17 &= xor(r21, r31)
+# CHECK: 71 df 55 ef
+r17 |= and(r21, r31)
+# CHECK: 71 df 95 ef
+r17 ^= xor(r21, r31)
+# CHECK: 11 df d5 ef
+r17 |= or(r21, r31)
+# CHECK: 31 df d5 ef
+r17 |= xor(r21, r31)
+# CHECK: 51 df d5 ef
+r17 ^= and(r21, r31)
+# CHECK: 71 df d5 ef
+r17 ^= or(r21, r31)
+
+# Maximum words
+# CHECK: 11 df d5 d5
+r17 = max(r21, r31)
+# CHECK: 91 df d5 d5
+r17 = maxu(r21, r31)
+
+# Maximum doublewords
+# CHECK: 90 de d4 d3
+r17:16 = max(r21:20, r31:30)
+# CHECK: b0 de d4 d3
+r17:16 = maxu(r21:20, r31:30)
+
+# Minimum words
+# CHECK: 11 d5 bf d5
+r17 = min(r21, r31)
+# CHECK: 91 d5 bf d5
+r17 = minu(r21, r31)
+
+# Minimum doublewords
+# CHECK: d0 d4 be d3
+r17:16 = min(r21:20, r31:30)
+# CHECK: f0 d4 be d3
+r17:16 = minu(r21:20, r31:30)
+
+# Module wrap
+# CHECK: f1 df f5 d3
+r17 = modwrap(r21, r31)
+
+# Negate
+# CHECK: b0 c0 94 80
+r17:16 = neg(r21:20)
+# CHECK: d1 c0 95 8c
+r17 = neg(r21):sat
+
+# Round
+# CHECK: 31 c0 d4 88
+r17 = round(r21:20):sat
+# CHECK: 11 df f5 8c
+r17 = cround(r21, #31)
+# CHECK: 91 df f5 8c
+r17 = round(r21, #31)
+# CHECK: d1 df f5 8c
+r17 = round(r21, #31):sat
+# CHECK: 11 df d5 c6
+r17 = cround(r21, r31)
+# CHECK: 91 df d5 c6
+r17 = round(r21, r31)
+# CHECK: d1 df d5 c6
+r17 = round(r21, r31):sat
+
+# Subtract doublewords
+# CHECK: f0 d4 3e d3
+r17:16 = sub(r21:20, r31:30)
+
+# Subtract and accumulate words
+# CHECK: 71 d5 1f ef
+r17 += sub(r21, r31)
+
+# Subtract halfword
+# CHECK: 11 d5 3f d5
+r17 = sub(r21.l, r31.l)
+# CHECK: 51 d5 3f d5
+r17 = sub(r21.l, r31.h)
+# CHECK: 91 d5 3f d5
+r17 = sub(r21.l, r31.l):sat
+# CHECK: d1 d5 3f d5
+r17 = sub(r21.l, r31.h):sat
+# CHECK: 11 d5 7f d5
+r17 = sub(r21.l, r31.l):<<16
+# CHECK: 31 d5 7f d5
+r17 = sub(r21.l, r31.h):<<16
+# CHECK: 51 d5 7f d5
+r17 = sub(r21.h, r31.l):<<16
+# CHECK: 71 d5 7f d5
+r17 = sub(r21.h, r31.h):<<16
+# CHECK: 91 d5 7f d5
+r17 = sub(r21.l, r31.l):sat:<<16
+# CHECK: b1 d5 7f d5
+r17 = sub(r21.l, r31.h):sat:<<16
+# CHECK: d1 d5 7f d5
+r17 = sub(r21.h, r31.l):sat:<<16
+# CHECK: f1 d5 7f d5
+r17 = sub(r21.h, r31.h):sat:<<16
+
+# Sign extend word to doubleword
+# CHECK: 10 c0 55 84
+r17:16 = sxtw(r21)
+
+# Vector absolute value halfwords
+# CHECK: 90 c0 54 80
+r17:16 = vabsh(r21:20)
+# CHECK: b0 c0 54 80
+r17:16 = vabsh(r21:20):sat
+
+# Vector absolute value words
+# CHECK: d0 c0 54 80
+r17:16 = vabsw(r21:20)
+# CHECK: f0 c0 54 80
+r17:16 = vabsw(r21:20):sat
+
+# Vector absolute difference halfwords
+# CHECK: 10 d4 7e e8
+r17:16 = vabsdiffh(r21:20, r31:30)
+
+# Vector absolute difference words
+# CHECK: 10 d4 3e e8
+r17:16 = vabsdiffw(r21:20, r31:30)
+
+# Vector add halfwords
+# CHECK: 50 de 14 d3
+r17:16 = vaddh(r21:20, r31:30)
+# CHECK: 70 de 14 d3
+r17:16 = vaddh(r21:20, r31:30):sat
+# CHECK: 90 de 14 d3
+r17:16 = vadduh(r21:20, r31:30):sat
+
+# Vector add halfwords with saturate and pack to unsigned bytes
+# CHECK: 31 de 54 c1
+r17 = vaddhub(r21:20, r31:30):sat
+
+# Vector reduce add unsigned bytes
+# CHECK: 30 de 54 e8
+r17:16 = vraddub(r21:20, r31:30)
+# CHECK: 30 de 54 ea
+r17:16 += vraddub(r21:20, r31:30)
+
+# Vector reduce add halfwords
+# CHECK: 31 de 14 e9
+r17 = vradduh(r21:20, r31:30)
+# CHECK: f1 de 34 e9
+r17 = vraddh(r21:20, r31:30)
+
+# Vector add bytes
+# CHECK: 10 de 14 d3
+r17:16 = vaddub(r21:20, r31:30)
+# CHECK: 30 de 14 d3
+r17:16 = vaddub(r21:20, r31:30):sat
+
+# Vector add words
+# CHECK: b0 de 14 d3
+r17:16 = vaddw(r21:20, r31:30)
+# CHECK: d0 de 14 d3
+r17:16 = vaddw(r21:20, r31:30):sat
+
+# Vector average halfwords
+# CHECK: 50 de 54 d3
+r17:16 = vavgh(r21:20, r31:30)
+# CHECK: 70 de 54 d3
+r17:16 = vavgh(r21:20, r31:30):rnd
+# CHECK: 90 de 54 d3
+r17:16 = vavgh(r21:20, r31:30):crnd
+# CHECK: b0 de 54 d3
+r17:16 = vavguh(r21:20, r31:30)
+# CHECK: d0 de 54 d3
+r17:16 = vavguh(r21:20, r31:30):rnd
+# CHECK: 10 d4 9e d3
+r17:16 = vnavgh(r21:20, r31:30)
+# CHECK: 30 d4 9e d3
+r17:16 = vnavgh(r21:20, r31:30):rnd:sat
+# CHECK: 50 d4 9e d3
+r17:16 = vnavgh(r21:20, r31:30):crnd:sat
+
+# Vector average unsigned bytes
+# CHECK: 10 de 54 d3
+r17:16 = vavgub(r21:20, r31:30)
+# CHECK: 30 de 54 d3
+r17:16 = vavgub(r21:20, r31:30):rnd
+
+# Vector average words
+# CHECK: 10 de 74 d3
+r17:16 = vavgw(r21:20, r31:30)
+# CHECK: 30 de 74 d3
+r17:16 = vavgw(r21:20, r31:30):rnd
+# CHECK: 50 de 74 d3
+r17:16 = vavgw(r21:20, r31:30):crnd
+# CHECK: 70 de 74 d3
+r17:16 = vavguw(r21:20, r31:30)
+# CHECK: 90 de 74 d3
+r17:16 = vavguw(r21:20, r31:30):rnd
+# CHECK: 70 d4 9e d3
+r17:16 = vnavgw(r21:20, r31:30)
+# CHECK: 90 d4 9e d3
+r17:16 = vnavgw(r21:20, r31:30):rnd:sat
+# CHECK: d0 d4 9e d3
+r17:16 = vnavgw(r21:20, r31:30):crnd:sat
+
+# Vector conditional negate
+# CHECK: 50 df d4 c3
+r17:16 = vcnegh(r21:20, r31)
+
+# CHECK: f0 ff 34 cb
+r17:16 += vrcnegh(r21:20, r31)
+
+# Vector maximum bytes
+# CHECK: 10 d4 de d3
+r17:16 = vmaxub(r21:20, r31:30)
+# CHECK: d0 d4 de d3
+r17:16 = vmaxb(r21:20, r31:30)
+
+# Vector maximum halfwords
+# CHECK: 30 d4 de d3
+r17:16 = vmaxh(r21:20, r31:30)
+# CHECK: 50 d4 de d3
+r17:16 = vmaxuh(r21:20, r31:30)
+
+# Vector reduce maximum halfwords
+# CHECK: 3f d0 34 cb
+r17:16 = vrmaxh(r21:20, r31)
+# CHECK: 3f f0 34 cb
+r17:16 = vrmaxuh(r21:20, r31)
+
+# Vector reduce maximum words
+# CHECK: 5f d0 34 cb
+r17:16 = vrmaxw(r21:20, r31)
+# CHECK: 5f f0 34 cb
+r17:16 = vrmaxuw(r21:20, r31)
+
+# Vector maximum words
+# CHECK: b0 d4 be d3
+r17:16 = vmaxuw(r21:20, r31:30)
+# CHECK: 70 d4 de d3
+r17:16 = vmaxw(r21:20, r31:30)
+
+# Vector minimum bytes
+# CHECK: 10 d4 be d3
+r17:16 = vminub(r21:20, r31:30)
+# CHECK: f0 d4 de d3
+r17:16 = vminb(r21:20, r31:30)
+
+# Vector minimum halfwords
+# CHECK: 30 d4 be d3
+r17:16 = vminh(r21:20, r31:30)
+# CHECK: 50 d4 be d3
+r17:16 = vminuh(r21:20, r31:30)
+
+# Vector reduce minimum halfwords
+# CHECK: bf d0 34 cb
+r17:16 = vrminh(r21:20, r31)
+# CHECK: bf f0 34 cb
+r17:16 = vrminuh(r21:20, r31)
+
+# Vector reduce minimum words
+# CHECK: df d0 34 cb
+r17:16 = vrminw(r21:20, r31)
+# CHECK: df f0 34 cb
+r17:16 = vrminuw(r21:20, r31)
+
+# Vector minimum words
+# CHECK: 70 d4 be d3
+r17:16 = vminw(r21:20, r31:30)
+# CHECK: 90 d4 be d3
+r17:16 = vminuw(r21:20, r31:30)
+
+# Vector sum of absolute differences unsigned bytes
+# CHECK: 50 de 54 e8
+r17:16 = vrsadub(r21:20, r31:30)
+# CHECK: 50 de 54 ea
+r17:16 += vrsadub(r21:20, r31:30)
+
+# Vector subtract halfwords
+# CHECK: 50 d4 3e d3
+r17:16 = vsubh(r21:20, r31:30)
+# CHECK: 70 d4 3e d3
+r17:16 = vsubh(r21:20, r31:30):sat
+# CHECK: 90 d4 3e d3
+r17:16 = vsubuh(r21:20, r31:30):sat
+
+# Vector subtract bytes
+# CHECK: 10 d4 3e d3
+r17:16 = vsubub(r21:20, r31:30)
+# CHECK: 30 d4 3e d3
+r17:16 = vsubub(r21:20, r31:30):sat
+
+# Vector subtract words
+# CHECK: b0 d4 3e d3
+r17:16 = vsubw(r21:20, r31:30)
+# CHECK: d0 d4 3e d3
+r17:16 = vsubw(r21:20, r31:30):sat
diff --git a/test/MC/Hexagon/instructions/xtype_bit.s b/test/MC/Hexagon/instructions/xtype_bit.s
new file mode 100644
index 000000000000..6d2fa401b819
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_bit.s
@@ -0,0 +1,118 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.2 XTYPE/BIT
+
+# Count leading
+# CHECK: 11 c0 54 88
+r17 = clb(r21:20)
+# CHECK: 51 c0 54 88
+r17 = cl0(r21:20)
+# CHECK: 91 c0 54 88
+r17 = cl1(r21:20)
+# CHECK: 11 c0 74 88
+r17 = normamt(r21:20)
+# CHECK: 51 d7 74 88
+r17 = add(clb(r21:20), #23)
+# CHECK: 11 d7 35 8c
+r17 = add(clb(r21), #23)
+# CHECK: 91 c0 15 8c
+r17 = clb(r21)
+# CHECK: b1 c0 15 8c
+r17 = cl0(r21)
+# CHECK: d1 c0 15 8c
+r17 = cl1(r21)
+# CHECK: f1 c0 15 8c
+r17 = normamt(r21)
+
+# Count population
+# CHECK: 71 c0 74 88
+r17 = popcount(r21:20)
+
+# Count trailing
+# CHECK: 51 c0 f4 88
+r17 = ct0(r21:20)
+# CHECK: 91 c0 f4 88
+r17 = ct1(r21:20)
+# CHECK: 91 c0 55 8c
+r17 = ct0(r21)
+# CHECK: b1 c0 55 8c
+r17 = ct1(r21)
+
+# Extract bitfield
+# CHECK: f0 df 54 81
+r17:16 = extractu(r21:20, #31, #23)
+# CHECK: f0 df 54 8a
+r17:16 = extract(r21:20, #31, #23)
+# CHECK: f1 df 55 8d
+r17 = extractu(r21, #31, #23)
+# CHECK: f1 df d5 8d
+r17 = extract(r21, #31, #23)
+# CHECK: 10 de 14 c1
+r17:16 = extractu(r21:20, r31:30)
+# CHECK: 90 de d4 c1
+r17:16 = extract(r21:20, r31:30)
+# CHECK: 11 de 15 c9
+r17 = extractu(r21, r31:30)
+# CHECK: 51 de 15 c9
+r17 = extract(r21, r31:30)
+
+# Insert bitfield
+# CHECK: f0 df 54 83
+r17:16 = insert(r21:20, #31, #23)
+# CHECK: f1 df 55 8f
+r17 = insert(r21, #31, #23)
+# CHECK: 11 de 15 c8
+r17 = insert(r21, r31:30)
+# CHECK: 10 de 14 ca
+r17:16 = insert(r21:20, r31:30)
+
+# Interleave/deinterleave
+# CHECK: 90 c0 d4 80
+r17:16 = deinterleave(r21:20)
+# CHECK: b0 c0 d4 80
+r17:16 = interleave(r21:20)
+
+# Linear feedback-shift iteration
+# CHECK: d0 de 94 c1
+r17:16 = lfs(r21:20, r31:30)
+
+# Masked parity
+# CHECK: 11 de 14 d0
+r17 = parity(r21:20, r31:30)
+# CHECK: 11 df f5 d5
+r17 = parity(r21, r31)
+
+# Bit reverse
+# CHECK: d0 c0 d4 80
+r17:16 = brev(r21:20)
+# CHECK: d1 c0 55 8c
+r17 = brev(r21)
+
+# Set/clear/toggle bit
+# CHECK: 11 df d5 8c
+r17 = setbit(r21, #31)
+# CHECK: 31 df d5 8c
+r17 = clrbit(r21, #31)
+# CHECK: 51 df d5 8c
+r17 = togglebit(r21, #31)
+# CHECK: 11 df 95 c6
+r17 = setbit(r21, r31)
+# CHECK: 51 df 95 c6
+r17 = clrbit(r21, r31)
+# CHECK: 91 df 95 c6
+r17 = togglebit(r21, r31)
+
+# Split bitfield
+# CHECK: 90 df d5 88
+r17:16 = bitsplit(r21, #31)
+# CHECK: 10 df 35 d4
+r17:16 = bitsplit(r21, r31)
+
+# Table index
+# CHECK: f1 cd 15 87
+r17 = tableidxb(r21, #7, #13):raw
+# CHECK: f1 cd 55 87
+r17 = tableidxh(r21, #7, #13):raw
+# CHECK: f1 cd 95 87
+r17 = tableidxw(r21, #7, #13):raw
+# CHECK: f1 cd d5 87
+r17 = tableidxd(r21, #7, #13):raw
diff --git a/test/MC/Hexagon/instructions/xtype_complex.s b/test/MC/Hexagon/instructions/xtype_complex.s
new file mode 100644
index 000000000000..901c29c80d93
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_complex.s
@@ -0,0 +1,128 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.3 XTYPE/COMPLEX
+
+# Complex add/sub halfwords
+# CHECK: 90 de 54 c1
+r17:16 = vxaddsubh(r21:20, r31:30):sat
+# CHECK: d0 de 54 c1
+r17:16 = vxsubaddh(r21:20, r31:30):sat
+# CHECK: 10 de d4 c1
+r17:16 = vxaddsubh(r21:20, r31:30):rnd:>>1:sat
+# CHECK: 50 de d4 c1
+r17:16 = vxsubaddh(r21:20, r31:30):rnd:>>1:sat
+
+# Complex add/sub words
+# CHECK: 10 de 54 c1
+r17:16 = vxaddsubw(r21:20, r31:30):sat
+# CHECK: 50 de 54 c1
+r17:16 = vxsubaddw(r21:20, r31:30):sat
+
+# Complex multiply
+# CHECK: d0 df 15 e5
+r17:16 = cmpy(r21, r31):sat
+# CHECK: d0 df 95 e5
+r17:16 = cmpy(r21, r31):<<1:sat
+# CHECK: d0 df 55 e5
+r17:16 = cmpy(r21, r31*):sat
+# CHECK: d0 df d5 e5
+r17:16 = cmpy(r21, r31*):<<1:sat
+# CHECK: d0 df 15 e7
+r17:16 += cmpy(r21, r31):sat
+# CHECK: d0 df 95 e7
+r17:16 += cmpy(r21, r31):<<1:sat
+# CHECK: f0 df 15 e7
+r17:16 -= cmpy(r21, r31):sat
+# CHECK: f0 df 95 e7
+r17:16 -= cmpy(r21, r31):<<1:sat
+# CHECK: d0 df 55 e7
+r17:16 += cmpy(r21, r31*):sat
+# CHECK: d0 df d5 e7
+r17:16 += cmpy(r21, r31*):<<1:sat
+# CHECK: f0 df 55 e7
+r17:16 -= cmpy(r21, r31*):sat
+# CHECK: f0 df d5 e7
+r17:16 -= cmpy(r21, r31*):<<1:sat
+
+# Complex multiply real or imaginary
+# CHECK: 30 df 15 e5
+r17:16 = cmpyi(r21, r31)
+# CHECK: 50 df 15 e5
+r17:16 = cmpyr(r21, r31)
+# CHECK: 30 df 15 e7
+r17:16 += cmpyi(r21, r31)
+# CHECK: 50 df 15 e7
+r17:16 += cmpyr(r21, r31)
+
+# Complex multiply with round and pack
+# CHECK: d1 df 35 ed
+r17 = cmpy(r21, r31):rnd:sat
+# CHECK: d1 df b5 ed
+r17 = cmpy(r21, r31):<<1:rnd:sat
+# CHECK: d1 df 75 ed
+r17 = cmpy(r21, r31*):rnd:sat
+# CHECK: d1 df f5 ed
+r17 = cmpy(r21, r31*):<<1:rnd:sat
+
+# Complex multiply 32x16
+# CHECK: 91 df 14 c5
+r17 = cmpyiwh(r21:20, r31):<<1:rnd:sat
+# CHECK: b1 df 14 c5
+r17 = cmpyiwh(r21:20, r31*):<<1:rnd:sat
+# CHECK: d1 df 14 c5
+r17 = cmpyrwh(r21:20, r31):<<1:rnd:sat
+# CHECK: f1 df 14 c5
+r17 = cmpyrwh(r21:20, r31*):<<1:rnd:sat
+
+# Vector complex multiply real or imaginary
+# CHECK: d0 de 34 e8
+r17:16 = vcmpyr(r21:20, r31:30):sat
+# CHECK: d0 de b4 e8
+r17:16 = vcmpyr(r21:20, r31:30):<<1:sat
+# CHECK: d0 de 54 e8
+r17:16 = vcmpyi(r21:20, r31:30):sat
+# CHECK: d0 de d4 e8
+r17:16 = vcmpyi(r21:20, r31:30):<<1:sat
+# CHECK: 90 de 34 ea
+r17:16 += vcmpyr(r21:20, r31:30):sat
+# CHECK: 90 de 54 ea
+r17:16 += vcmpyi(r21:20, r31:30):sat
+
+# Vector complex conjugate
+# CHECK: f0 c0 94 80
+r17:16 = vconj(r21:20):sat
+
+# Vector complex rotate
+# CHECK: 10 df d4 c3
+r17:16 = vcrotate(r21:20, r31)
+
+# Vector reduce complex multiply real or imaginary
+# CHECK: 10 de 14 e8
+r17:16 = vrcmpyi(r21:20, r31:30)
+# CHECK: 30 de 14 e8
+r17:16 = vrcmpyr(r21:20, r31:30)
+# CHECK: 10 de 54 e8
+r17:16 = vrcmpyi(r21:20, r31:30*)
+# CHECK: 30 de 74 e8
+r17:16 = vrcmpyr(r21:20, r31:30*)
+
+# Vector reduce complex multiply by scalar
+# CHECK: 90 de b4 e8
+r17:16 = vrcmpys(r21:20, r31:30):<<1:sat:raw:hi
+# CHECK: 90 de f4 e8
+r17:16 = vrcmpys(r21:20, r31:30):<<1:sat:raw:lo
+# CHECK: 90 de b4 ea
+r17:16 += vrcmpys(r21:20, r31:30):<<1:sat:raw:hi
+# CHECK: 90 de f4 ea
+r17:16 += vrcmpys(r21:20, r31:30):<<1:sat:raw:lo
+
+# Vector reduce complex multiply by scalar with round and pack
+# CHECK: d1 de b4 e9
+r17 = vrcmpys(r21:20, r31:30):<<1:rnd:sat:raw:hi
+# CHECK: f1 de b4 e9
+r17 = vrcmpys(r21:20, r31:30):<<1:rnd:sat:raw:lo
+
+# Vector reduce complex rotate
+# CHECK: f0 ff d4 c3
+r17:16 = vrcrotate(r21:20, r31, #3)
+# CHECK: 30 ff b4 cb
+r17:16 += vrcrotate(r21:20, r31, #3)
diff --git a/test/MC/Hexagon/instructions/xtype_fp.s b/test/MC/Hexagon/instructions/xtype_fp.s
new file mode 100644
index 000000000000..184098ec6d0f
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_fp.s
@@ -0,0 +1,146 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.4 XTYPE/FP
+
+# Floating point addition
+# CHECK: 11 df 15 eb
+r17 = sfadd(r21, r31)
+
+# Classify floating-point value
+# CHECK: 03 d5 f1 85
+p3 = sfclass(r17, #21)
+# CHECK: b3 c2 90 dc
+p3 = dfclass(r17:16, #21)
+
+# Compare floating-point value
+# CHECK: 03 d5 f1 c7
+p3 = sfcmp.ge(r17, r21)
+# CHECK: 23 d5 f1 c7
+p3 = sfcmp.uo(r17, r21)
+# CHECK: 63 d5 f1 c7
+p3 = sfcmp.eq(r17, r21)
+# CHECK: 83 d5 f1 c7
+p3 = sfcmp.gt(r17, r21)
+# CHECK: 03 d4 f0 d2
+p3 = dfcmp.eq(r17:16, r21:20)
+# CHECK: 23 d4 f0 d2
+p3 = dfcmp.gt(r17:16, r21:20)
+# CHECK: 43 d4 f0 d2
+p3 = dfcmp.ge(r17:16, r21:20)
+# CHECK: 63 d4 f0 d2
+p3 = dfcmp.uo(r17:16, r21:20)
+
+# Convert floating-point value to other format
+# CHECK: 10 c0 95 84
+r17:16 = convert_sf2df(r21)
+# CHECK: 31 c0 14 88
+r17 = convert_df2sf(r21:20)
+
+# Convert integer to floating-point value
+# CHECK: 50 c0 f4 80
+r17:16 = convert_ud2df(r21:20)
+# CHECK: 70 c0 f4 80
+r17:16 = convert_d2df(r21:20)
+# CHECK: 30 c0 95 84
+r17:16 = convert_uw2df(r21)
+# CHECK: 50 c0 95 84
+r17:16 = convert_w2df(r21)
+# CHECK: 31 c0 34 88
+r17 = convert_ud2sf(r21:20)
+# CHECK: 31 c0 54 88
+r17 = convert_d2sf(r21:20)
+# CHECK: 11 c0 35 8b
+r17 = convert_uw2sf(r21)
+# CHECK: 11 c0 55 8b
+r17 = convert_w2sf(r21)
+
+# Convert floating-point value to integer
+# CHECK: 10 c0 f4 80
+r17:16 = convert_df2d(r21:20)
+# CHECK: 30 c0 f4 80
+r17:16 = convert_df2ud(r21:20)
+# CHECK: d0 c0 f4 80
+r17:16 = convert_df2d(r21:20):chop
+# CHECK: f0 c0 f4 80
+r17:16 = convert_df2ud(r21:20):chop
+# CHECK: 70 c0 95 84
+r17:16 = convert_sf2ud(r21)
+# CHECK: 90 c0 95 84
+r17:16 = convert_sf2d(r21)
+# CHECK: b0 c0 95 84
+r17:16 = convert_sf2ud(r21):chop
+# CHECK: d0 c0 95 84
+r17:16 = convert_sf2d(r21):chop
+# CHECK: 31 c0 74 88
+r17 = convert_df2uw(r21:20)
+# CHECK: 31 c0 94 88
+r17 = convert_df2w(r21:20)
+# CHECK: 31 c0 b4 88
+r17 = convert_df2uw(r21:20):chop
+# CHECK: 31 c0 f4 88
+r17 = convert_df2w(r21:20):chop
+# CHECK: 11 c0 75 8b
+r17 = convert_sf2uw(r21)
+# CHECK: 31 c0 75 8b
+r17 = convert_sf2uw(r21):chop
+# CHECK: 11 c0 95 8b
+r17 = convert_sf2w(r21)
+# CHECK: 31 c0 95 8b
+r17 = convert_sf2w(r21):chop
+
+# Floating point extreme value assistance
+# CHECK: 11 c0 b5 8b
+r17 = sffixupr(r21)
+# CHECK: 11 df d5 eb
+r17 = sffixupn(r21, r31)
+# CHECK: 31 df d5 eb
+r17 = sffixupd(r21, r31)
+
+# Floating point fused multiply-add
+# CHECK: 91 df 15 ef
+r17 += sfmpy(r21, r31)
+# CHECK: b1 df 15 ef
+r17 -= sfmpy(r21, r31)
+
+# Floating point fused multiply-add with scaling
+# CHECK: f1 df 75 ef
+r17 += sfmpy(r21, r31, p3):scale
+
+# Floating point reciprocal square root approximation
+# CHECK: 71 c0 f5 8b
+r17, p3 = sfinvsqrta(r21)
+
+# Floating point fused multiply-add for library routines
+# CHECK: d1 df 15 ef
+r17 += sfmpy(r21, r31):lib
+# CHECK: f1 df 15 ef
+r17 -= sfmpy(r21, r31):lib
+
+# Create floating-point constant
+# CHECK: b1 c2 00 d6
+r17 = sfmake(#21):pos
+# CHECK: b1 c2 40 d6
+r17 = sfmake(#21):neg
+# CHECK: b0 c2 00 d9
+r17:16 = dfmake(#21):pos
+# CHECK: b0 c2 40 d9
+r17:16 = dfmake(#21):neg
+
+# Floating point maximum
+# CHECK: 11 df 95 eb
+r17 = sfmax(r21, r31)
+
+# Floating point minimum
+# CHECK: 31 df 95 eb
+r17 = sfmin(r21, r31)
+
+# Floating point multiply
+# CHECK: 11 df 55 eb
+r17 = sfmpy(r21, r31)
+
+# Floating point reciprocal approximation
+# CHECK: f1 df f5 eb
+r17, p3 = sfrecipa(r21, r31)
+
+# Floating point subtraction
+# CHECK: 31 df 15 eb
+r17 = sfsub(r21, r31)
diff --git a/test/MC/Hexagon/instructions/xtype_mpy.s b/test/MC/Hexagon/instructions/xtype_mpy.s
new file mode 100644
index 000000000000..4b9efd4cabc9
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_mpy.s
@@ -0,0 +1,400 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.5 XTYPE/MPY
+
+# Multiply and use lower result
+# CHECK: b1 df 35 d7
+r17 = add(#21, mpyi(r21, r31))
+# CHECK: bf d1 35 d8
+r17 = add(#21, mpyi(r21, #31))
+# CHECK: b5 d1 3f df
+r17 = add(r21, mpyi(#84, r31))
+# CHECK: f5 f1 b5 df
+r17 = add(r21, mpyi(r21, #31))
+# CHECK: 15 d1 1f e3
+r17 = add(r21, mpyi(r17, r31))
+# CHECK: f1 c3 15 e0
+r17 =+ mpyi(r21, #31)
+# CHECK: f1 c3 95 e0
+r17 =- mpyi(r21, #31)
+# CHECK: f1 c3 15 e1
+r17 += mpyi(r21, #31)
+# CHECK: f1 c3 95 e1
+r17 -= mpyi(r21, #31)
+# CHECK: 11 df 15 ed
+r17 = mpyi(r21, r31)
+# CHECK: 11 df 15 ef
+r17 += mpyi(r21, r31)
+
+# Vector multiply word by signed half (32x16)
+# CHECK: b0 de 14 e8
+r17:16 = vmpyweh(r21:20, r31:30):sat
+# CHECK: b0 de 94 e8
+r17:16 = vmpyweh(r21:20, r31:30):<<1:sat
+# CHECK: f0 de 14 e8
+r17:16 = vmpywoh(r21:20, r31:30):sat
+# CHECK: f0 de 94 e8
+r17:16 = vmpywoh(r21:20, r31:30):<<1:sat
+# CHECK: b0 de 34 e8
+r17:16 = vmpyweh(r21:20, r31:30):rnd:sat
+# CHECK: b0 de b4 e8
+r17:16 = vmpyweh(r21:20, r31:30):<<1:rnd:sat
+# CHECK: f0 de 34 e8
+r17:16 = vmpywoh(r21:20, r31:30):rnd:sat
+# CHECK: f0 de b4 e8
+r17:16 = vmpywoh(r21:20, r31:30):<<1:rnd:sat
+# CHECK: b0 de 14 ea
+r17:16 += vmpyweh(r21:20, r31:30):sat
+# CHECK: b0 de 94 ea
+r17:16 += vmpyweh(r21:20, r31:30):<<1:sat
+# CHECK: f0 de 14 ea
+r17:16 += vmpywoh(r21:20, r31:30):sat
+# CHECK: f0 de 94 ea
+r17:16 += vmpywoh(r21:20, r31:30):<<1:sat
+# CHECK: b0 de 34 ea
+r17:16 += vmpyweh(r21:20, r31:30):rnd:sat
+# CHECK: b0 de b4 ea
+r17:16 += vmpyweh(r21:20, r31:30):<<1:rnd:sat
+# CHECK: f0 de 34 ea
+r17:16 += vmpywoh(r21:20, r31:30):rnd:sat
+# CHECK: f0 de b4 ea
+r17:16 += vmpywoh(r21:20, r31:30):<<1:rnd:sat
+
+# Vector multiply word by unsigned half (32x16)
+# CHECK: b0 de 54 e8
+r17:16 = vmpyweuh(r21:20, r31:30):sat
+# CHECK: b0 de d4 e8
+r17:16 = vmpyweuh(r21:20, r31:30):<<1:sat
+# CHECK: f0 de 54 e8
+r17:16 = vmpywouh(r21:20, r31:30):sat
+# CHECK: f0 de d4 e8
+r17:16 = vmpywouh(r21:20, r31:30):<<1:sat
+# CHECK: b0 de 74 e8
+r17:16 = vmpyweuh(r21:20, r31:30):rnd:sat
+# CHECK: b0 de f4 e8
+r17:16 = vmpyweuh(r21:20, r31:30):<<1:rnd:sat
+# CHECK: f0 de 74 e8
+r17:16 = vmpywouh(r21:20, r31:30):rnd:sat
+# CHECK: f0 de f4 e8
+r17:16 = vmpywouh(r21:20, r31:30):<<1:rnd:sat
+# CHECK: b0 de 54 ea
+r17:16 += vmpyweuh(r21:20, r31:30):sat
+# CHECK: b0 de d4 ea
+r17:16 += vmpyweuh(r21:20, r31:30):<<1:sat
+# CHECK: f0 de 54 ea
+r17:16 += vmpywouh(r21:20, r31:30):sat
+# CHECK: f0 de d4 ea
+r17:16 += vmpywouh(r21:20, r31:30):<<1:sat
+# CHECK: b0 de 74 ea
+r17:16 += vmpyweuh(r21:20, r31:30):rnd:sat
+# CHECK: b0 de f4 ea
+r17:16 += vmpyweuh(r21:20, r31:30):<<1:rnd:sat
+# CHECK: f0 de 74 ea
+r17:16 += vmpywouh(r21:20, r31:30):rnd:sat
+# CHECK: f0 de f4 ea
+r17:16 += vmpywouh(r21:20, r31:30):<<1:rnd:sat
+
+# Multiply signed halfwords
+# CHECK: 10 df 95 e4
+r17:16 = mpy(r21.l, r31.l):<<1
+# CHECK: 30 df 95 e4
+r17:16 = mpy(r21.l, r31.h):<<1
+# CHECK: 50 df 95 e4
+r17:16 = mpy(r21.h, r31.l):<<1
+# CHECK: 70 df 95 e4
+r17:16 = mpy(r21.h, r31.h):<<1
+# CHECK: 10 df b5 e4
+r17:16 = mpy(r21.l, r31.l):<<1:rnd
+# CHECK: 30 df b5 e4
+r17:16 = mpy(r21.l, r31.h):<<1:rnd
+# CHECK: 50 df b5 e4
+r17:16 = mpy(r21.h, r31.l):<<1:rnd
+# CHECK: 70 df b5 e4
+r17:16 = mpy(r21.h, r31.h):<<1:rnd
+# CHECK: 10 df 95 e6
+r17:16 += mpy(r21.l, r31.l):<<1
+# CHECK: 30 df 95 e6
+r17:16 += mpy(r21.l, r31.h):<<1
+# CHECK: 50 df 95 e6
+r17:16 += mpy(r21.h, r31.l):<<1
+# CHECK: 70 df 95 e6
+r17:16 += mpy(r21.h, r31.h):<<1
+# CHECK: 10 df b5 e6
+r17:16 -= mpy(r21.l, r31.l):<<1
+# CHECK: 30 df b5 e6
+r17:16 -= mpy(r21.l, r31.h):<<1
+# CHECK: 50 df b5 e6
+r17:16 -= mpy(r21.h, r31.l):<<1
+# CHECK: 70 df b5 e6
+r17:16 -= mpy(r21.h, r31.h):<<1
+# CHECK: 11 df 95 ec
+r17 = mpy(r21.l, r31.l):<<1
+# CHECK: 31 df 95 ec
+r17 = mpy(r21.l, r31.h):<<1
+# CHECK: 51 df 95 ec
+r17 = mpy(r21.h, r31.l):<<1
+# CHECK: 71 df 95 ec
+r17 = mpy(r21.h, r31.h):<<1
+# CHECK: 91 df 95 ec
+r17 = mpy(r21.l, r31.l):<<1:sat
+# CHECK: b1 df 95 ec
+r17 = mpy(r21.l, r31.h):<<1:sat
+# CHECK: d1 df 95 ec
+r17 = mpy(r21.h, r31.l):<<1:sat
+# CHECK: f1 df 95 ec
+r17 = mpy(r21.h, r31.h):<<1:sat
+# CHECK: 11 df b5 ec
+r17 = mpy(r21.l, r31.l):<<1:rnd
+# CHECK: 31 df b5 ec
+r17 = mpy(r21.l, r31.h):<<1:rnd
+# CHECK: 51 df b5 ec
+r17 = mpy(r21.h, r31.l):<<1:rnd
+# CHECK: 71 df b5 ec
+r17 = mpy(r21.h, r31.h):<<1:rnd
+# CHECK: 91 df b5 ec
+r17 = mpy(r21.l, r31.l):<<1:rnd:sat
+# CHECK: b1 df b5 ec
+r17 = mpy(r21.l, r31.h):<<1:rnd:sat
+# CHECK: d1 df b5 ec
+r17 = mpy(r21.h, r31.l):<<1:rnd:sat
+# CHECK: f1 df b5 ec
+r17 = mpy(r21.h, r31.h):<<1:rnd:sat
+# CHECK: 11 df 95 ee
+r17 += mpy(r21.l, r31.l):<<1
+# CHECK: 31 df 95 ee
+r17 += mpy(r21.l, r31.h):<<1
+# CHECK: 51 df 95 ee
+r17 += mpy(r21.h, r31.l):<<1
+# CHECK: 71 df 95 ee
+r17 += mpy(r21.h, r31.h):<<1
+# CHECK: 91 df 95 ee
+r17 += mpy(r21.l, r31.l):<<1:sat
+# CHECK: b1 df 95 ee
+r17 += mpy(r21.l, r31.h):<<1:sat
+# CHECK: d1 df 95 ee
+r17 += mpy(r21.h, r31.l):<<1:sat
+# CHECK: f1 df 95 ee
+r17 += mpy(r21.h, r31.h):<<1:sat
+# CHECK: 11 df b5 ee
+r17 -= mpy(r21.l, r31.l):<<1
+# CHECK: 31 df b5 ee
+r17 -= mpy(r21.l, r31.h):<<1
+# CHECK: 51 df b5 ee
+r17 -= mpy(r21.h, r31.l):<<1
+# CHECK: 71 df b5 ee
+r17 -= mpy(r21.h, r31.h):<<1
+# CHECK: 91 df b5 ee
+r17 -= mpy(r21.l, r31.l):<<1:sat
+# CHECK: b1 df b5 ee
+r17 -= mpy(r21.l, r31.h):<<1:sat
+# CHECK: d1 df b5 ee
+r17 -= mpy(r21.h, r31.l):<<1:sat
+# CHECK: f1 df b5 ee
+r17 -= mpy(r21.h, r31.h):<<1:sat
+
+# Multiply unsigned halfwords
+# CHECK: 10 df d5 e4
+r17:16 = mpyu(r21.l, r31.l):<<1
+# CHECK: 30 df d5 e4
+r17:16 = mpyu(r21.l, r31.h):<<1
+# CHECK: 50 df d5 e4
+r17:16 = mpyu(r21.h, r31.l):<<1
+# CHECK: 70 df d5 e4
+r17:16 = mpyu(r21.h, r31.h):<<1
+# CHECK: 10 df d5 e6
+r17:16 += mpyu(r21.l, r31.l):<<1
+# CHECK: 30 df d5 e6
+r17:16 += mpyu(r21.l, r31.h):<<1
+# CHECK: 50 df d5 e6
+r17:16 += mpyu(r21.h, r31.l):<<1
+# CHECK: 70 df d5 e6
+r17:16 += mpyu(r21.h, r31.h):<<1
+# CHECK: 10 df f5 e6
+r17:16 -= mpyu(r21.l, r31.l):<<1
+# CHECK: 30 df f5 e6
+r17:16 -= mpyu(r21.l, r31.h):<<1
+# CHECK: 50 df f5 e6
+r17:16 -= mpyu(r21.h, r31.l):<<1
+# CHECK: 70 df f5 e6
+r17:16 -= mpyu(r21.h, r31.h):<<1
+# CHECK: 11 df d5 ec
+r17 = mpyu(r21.l, r31.l):<<1
+# CHECK: 31 df d5 ec
+r17 = mpyu(r21.l, r31.h):<<1
+# CHECK: 51 df d5 ec
+r17 = mpyu(r21.h, r31.l):<<1
+# CHECK: 71 df d5 ec
+r17 = mpyu(r21.h, r31.h):<<1
+# CHECK: 11 df d5 ee
+r17 += mpyu(r21.l, r31.l):<<1
+# CHECK: 31 df d5 ee
+r17 += mpyu(r21.l, r31.h):<<1
+# CHECK: 51 df d5 ee
+r17 += mpyu(r21.h, r31.l):<<1
+# CHECK: 71 df d5 ee
+r17 += mpyu(r21.h, r31.h):<<1
+# CHECK: 11 df f5 ee
+r17 -= mpyu(r21.l, r31.l):<<1
+# CHECK: 31 df f5 ee
+r17 -= mpyu(r21.l, r31.h):<<1
+# CHECK: 51 df f5 ee
+r17 -= mpyu(r21.h, r31.l):<<1
+# CHECK: 71 df f5 ee
+r17 -= mpyu(r21.h, r31.h):<<1
+
+# Polynomial multiply words
+# CHECK: f0 df 55 e5
+r17:16 = pmpyw(r21, r31)
+# CHECK: f0 df 35 e7
+r17:16 ^= pmpyw(r21, r31)
+
+# Vector reduce multiply word by signed half (32x16)
+# CHECK: 50 de 34 e8
+r17:16 = vrmpywoh(r21:20, r31:30)
+# CHECK: 50 de b4 e8
+r17:16 = vrmpywoh(r21:20, r31:30):<<1
+# CHECK: 90 de 54 e8
+r17:16 = vrmpyweh(r21:20, r31:30)
+# CHECK: 90 de d4 e8
+r17:16 = vrmpyweh(r21:20, r31:30):<<1
+# CHECK: d0 de 74 ea
+r17:16 += vrmpywoh(r21:20, r31:30)
+# CHECK: d0 de f4 ea
+r17:16 += vrmpywoh(r21:20, r31:30):<<1
+# CHECK: d0 de 34 ea
+r17:16 += vrmpyweh(r21:20, r31:30)
+# CHECK: d0 de b4 ea
+r17:16 += vrmpyweh(r21:20, r31:30):<<1
+
+# Multiply and use upper result
+# CHECK: 31 df 15 ed
+r17 = mpy(r21, r31)
+# CHECK: 31 df 35 ed
+r17 = mpy(r21, r31):rnd
+# CHECK: 31 df 55 ed
+r17 = mpyu(r21, r31)
+# CHECK: 31 df 75 ed
+r17 = mpysu(r21, r31)
+# CHECK: 11 df b5 ed
+r17 = mpy(r21, r31.h):<<1:sat
+# CHECK: 31 df b5 ed
+r17 = mpy(r21, r31.l):<<1:sat
+# CHECK: 91 df b5 ed
+r17 = mpy(r21, r31.h):<<1:rnd:sat
+# CHECK: 11 df f5 ed
+r17 = mpy(r21, r31):<<1:sat
+# CHECK: 91 df f5 ed
+r17 = mpy(r21, r31.l):<<1:rnd:sat
+# CHECK: 51 df b5 ed
+r17 = mpy(r21, r31):<<1
+# CHECK: 11 df 75 ef
+r17 += mpy(r21, r31):<<1:sat
+# CHECK: 31 df 75 ef
+r17 -= mpy(r21, r31):<<1:sat
+
+# Multiply and use full result
+# CHECK: 10 df 15 e5
+r17:16 = mpy(r21, r31)
+# CHECK: 10 df 55 e5
+r17:16 = mpyu(r21, r31)
+# CHECK: 10 df 15 e7
+r17:16 += mpy(r21, r31)
+# CHECK: 10 df 35 e7
+r17:16 -= mpy(r21, r31)
+# CHECK: 10 df 55 e7
+r17:16 += mpyu(r21, r31)
+# CHECK: 10 df 75 e7
+r17:16 -= mpyu(r21, r31)
+
+# Vector dual multiply
+# CHECK: 90 de 14 e8
+r17:16 = vdmpy(r21:20, r31:30):sat
+# CHECK: 90 de 94 e8
+r17:16 = vdmpy(r21:20, r31:30):<<1:sat
+# CHECK: 90 de 14 ea
+r17:16 += vdmpy(r21:20, r31:30):sat
+# CHECK: 90 de 94 ea
+r17:16 += vdmpy(r21:20, r31:30):<<1:sat
+
+# Vector dual multiply with round and pack
+# CHECK: 11 de 14 e9
+r17 = vdmpy(r21:20, r31:30):rnd:sat
+# CHECK: 11 de 94 e9
+r17 = vdmpy(r21:20, r31:30):<<1:rnd:sat
+
+# Vector reduce multiply bytes
+# CHECK: 30 de 94 e8
+r17:16 = vrmpybu(r21:20, r31:30)
+# CHECK: 30 de d4 e8
+r17:16 = vrmpybsu(r21:20, r31:30)
+# CHECK: 30 de 94 ea
+r17:16 += vrmpybu(r21:20, r31:30)
+# CHECK: 30 de d4 ea
+r17:16 += vrmpybsu(r21:20, r31:30)
+
+# Vector dual multiply signed by unsigned bytes
+# CHECK: 30 de b4 e8
+r17:16 = vdmpybsu(r21:20, r31:30):sat
+# CHECK: 30 de 34 ea
+r17:16 += vdmpybsu(r21:20, r31:30):sat
+
+# Vector multiply even haldwords
+# CHECK: d0 de 14 e8
+r17:16 = vmpyeh(r21:20, r31:30):sat
+# CHECK: d0 de 94 e8
+r17:16 = vmpyeh(r21:20, r31:30):<<1:sat
+# CHECK: 50 de 34 ea
+r17:16 += vmpyeh(r21:20, r31:30)
+# CHECK: d0 de 14 ea
+r17:16 += vmpyeh(r21:20, r31:30):sat
+# CHECK: d0 de 94 ea
+r17:16 += vmpyeh(r21:20, r31:30):<<1:sat
+
+# Vector multiply halfwords
+# CHECK: b0 df 15 e5
+r17:16 = vmpyh(r21, r31):sat
+# CHECK: b0 df 95 e5
+r17:16 = vmpyh(r21, r31):<<1:sat
+# CHECK: 30 df 35 e7
+r17:16 += vmpyh(r21, r31)
+# CHECK: b0 df 15 e7
+r17:16 += vmpyh(r21, r31):sat
+# CHECK: b0 df 95 e7
+r17:16 += vmpyh(r21, r31):<<1:sat
+
+# Vector multiply halfwords with round and pack
+# CHECK: f1 df 35 ed
+r17 = vmpyh(r21, r31):rnd:sat
+# CHECK: f1 df b5 ed
+r17 = vmpyh(r21, r31):<<1:rnd:sat
+
+# Vector multiply halfwords signed by unsigned
+# CHECK: f0 df 15 e5
+r17:16 = vmpyhsu(r21, r31):sat
+# CHECK: f0 df 95 e5
+r17:16 = vmpyhsu(r21, r31):<<1:sat
+# CHECK: b0 df 75 e7
+r17:16 += vmpyhsu(r21, r31):sat
+# CHECK: b0 df f5 e7
+r17:16 += vmpyhsu(r21, r31):<<1:sat
+
+# Vector reduce multiply halfwords
+# CHECK: 50 de 14 e8
+r17:16 = vrmpyh(r21:20, r31:30)
+# CHECK: 50 de 14 ea
+r17:16 += vrmpyh(r21:20, r31:30)
+
+# Vector multiply bytes
+# CHECK: 30 df 55 e5
+r17:16 = vmpybsu(r21, r31)
+# CHECK: 30 df 95 e5
+r17:16 = vmpybu(r21, r31)
+# CHECK: 30 df 95 e7
+r17:16 += vmpybu(r21, r31)
+# CHECK: 30 df d5 e7
+r17:16 += vmpybsu(r21, r31)
+
+# Vector polynomial multiply halfwords
+# CHECK: f0 df d5 e5
+r17:16 = vpmpyh(r21, r31)
+# CHECK: f0 df b5 e7
+r17:16 ^= vpmpyh(r21, r31)
diff --git a/test/MC/Hexagon/instructions/xtype_perm.s b/test/MC/Hexagon/instructions/xtype_perm.s
new file mode 100644
index 000000000000..d8033ec80177
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_perm.s
@@ -0,0 +1,104 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.6 XTYPE/PERM
+
+# CABAC decode bin
+# CHECK: d0 de d4 c1
+r17:16 = decbin(r21:20, r31:30)
+
+# Saturate
+# CHECK: 11 c0 d4 88
+r17 = sat(r21:20)
+# CHECK: 91 c0 d5 8c
+r17 = sath(r21)
+# CHECK: b1 c0 d5 8c
+r17 = satuh(r21)
+# CHECK: d1 c0 d5 8c
+r17 = satub(r21)
+# CHECK: f1 c0 d5 8c
+r17 = satb(r21)
+
+# Swizzle bytes
+# CHECK: f1 c0 95 8c
+r17 = swiz(r21)
+
+# Vector align
+# CHECK: 70 d4 1e c2
+r17:16 = valignb(r21:20, r31:30, p3)
+# CHECK: 70 de 94 c2
+r17:16 = vspliceb(r21:20, r31:30, p3)
+
+# Vector round and pack
+# CHECK: 91 c0 94 88
+r17 = vrndwh(r21:20)
+# CHECK: d1 c0 94 88
+r17 = vrndwh(r21:20):sat
+
+# Vector saturate and pack
+# CHECK: 11 c0 14 88
+r17 = vsathub(r21:20)
+# CHECK: 51 c0 14 88
+r17 = vsatwh(r21:20)
+# CHECK: 91 c0 14 88
+r17 = vsatwuh(r21:20)
+# CHECK: d1 c0 14 88
+r17 = vsathb(r21:20)
+# CHECK: 11 c0 95 8c
+r17 = vsathb(r21)
+# CHECK: 51 c0 95 8c
+r17 = vsathub(r21)
+
+# Vector saturate without pack
+# CHECK: 90 c0 14 80
+r17:16 = vsathub(r21:20)
+# CHECK: b0 c0 14 80
+r17:16 = vsatwuh(r21:20)
+# CHECK: d0 c0 14 80
+r17:16 = vsatwh(r21:20)
+# CHECK: f0 c0 14 80
+r17:16 = vsathb(r21:20)
+
+# Vector shuffle
+# CHECK: 50 de 14 c1
+r17:16 = shuffeb(r21:20, r31:30)
+# CHECK: 90 d4 1e c1
+r17:16 = shuffob(r21:20, r31:30)
+# CHECK: d0 de 14 c1
+r17:16 = shuffeh(r21:20, r31:30)
+# CHECK: 10 d4 9e c1
+r17:16 = shuffoh(r21:20, r31:30)
+
+# Vector splat bytes
+# CHECK: f1 c0 55 8c
+r17 = vsplatb(r21)
+
+# Vector splat halfwords
+# CHECK: 50 c0 55 84
+r17:16 = vsplath(r21)
+
+# Vector splice
+# CHECK: 70 de 94 c0
+r17:16 = vspliceb(r21:20, r31:30, #3)
+# CHECK: 70 de 94 c2
+r17:16 = vspliceb(r21:20, r31:30, p3)
+
+# Vector sign extend
+# CHECK: 10 c0 15 84
+r17:16 = vsxtbh(r21)
+# CHECK: 90 c0 15 84
+r17:16 = vsxthw(r21)
+
+# Vector truncate
+# CHECK: 11 c0 94 88
+r17 = vtrunohb(r21:20)
+# CHECK: 51 c0 94 88
+r17 = vtrunehb(r21:20)
+# CHECK: 50 de 94 c1
+r17:16 = vtrunewh(r21:20, r31:30)
+# CHECK: 90 de 94 c1
+r17:16 = vtrunowh(r21:20, r31:30)
+
+# Vector zero extend
+# CHECK: 50 c0 15 84
+r17:16 = vzxtbh(r21)
+# CHECK: d0 c0 15 84
+r17:16 = vzxthw(r21)
diff --git a/test/MC/Hexagon/instructions/xtype_pred.s b/test/MC/Hexagon/instructions/xtype_pred.s
new file mode 100644
index 000000000000..769de0f6e027
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_pred.s
@@ -0,0 +1,136 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.7 XTYPE/PRED
+
+# Bounds check
+# CHECK: 83 f4 10 d2
+p3 = boundscheck(r17:16, r21:20):raw:lo
+# CHECK: a3 f4 10 d2
+p3 = boundscheck(r17:16, r21:20):raw:hi
+
+# Compare byte
+# CHECK: 43 d5 d1 c7
+p3 = cmpb.gt(r17, r21)
+# CHECK: c3 d5 d1 c7
+p3 = cmpb.eq(r17, r21)
+# CHECK: e3 d5 d1 c7
+p3 = cmpb.gtu(r17, r21)
+# CHECK: a3 c2 11 dd
+p3 = cmpb.eq(r17, #21)
+# CHECK: a3 c2 31 dd
+p3 = cmpb.gt(r17, #21)
+# CHECK: a3 c2 51 dd
+p3 = cmpb.gtu(r17, #21)
+
+# Compare half
+# CHECK: 63 d5 d1 c7
+p3 = cmph.eq(r17, r21)
+# CHECK: 83 d5 d1 c7
+p3 = cmph.gt(r17, r21)
+# CHECK: a3 d5 d1 c7
+p3 = cmph.gtu(r17, r21)
+# CHECK: ab c2 11 dd
+p3 = cmph.eq(r17, #21)
+# CHECK: ab c2 31 dd
+p3 = cmph.gt(r17, #21)
+# CHECK: ab c2 51 dd
+p3 = cmph.gtu(r17, #21)
+
+# Compare doublewords
+# CHECK: 03 de 94 d2
+p3 = cmp.eq(r21:20, r31:30)
+# CHECK: 43 de 94 d2
+p3 = cmp.gt(r21:20, r31:30)
+# CHECK: 83 de 94 d2
+p3 = cmp.gtu(r21:20, r31:30)
+
+# Compare bitmask
+# CHECK: 03 d5 91 85
+p3 = bitsclr(r17, #21)
+# CHECK: 03 d5 b1 85
+p3 = !bitsclr(r17, #21)
+# CHECK: 03 d5 51 c7
+p3 = bitsset(r17, r21)
+# CHECK: 03 d5 71 c7
+p3 = !bitsset(r17, r21)
+# CHECK: 03 d5 91 c7
+p3 = bitsclr(r17, r21)
+# CHECK: 03 d5 b1 c7
+p3 = !bitsclr(r17, r21)
+
+# mask generate from predicate
+# CHECK: 10 c3 00 86
+r17:16 = mask(p3)
+
+# Check for TLB match
+# CHECK: 63 f5 10 d2
+p3 = tlbmatch(r17:16, r21)
+
+# Predicate Transfer
+# CHECK: 03 c0 45 85
+p3 = r5
+# CHECK: 05 c0 43 89
+r5 = p3
+
+# Test bit
+# CHECK: 03 d5 11 85
+p3 = tstbit(r17, #21)
+# CHECK: 03 d5 31 85
+p3 = !tstbit(r17, #21)
+# CHECK: 03 d5 11 c7
+p3 = tstbit(r17, r21)
+# CHECK: 03 d5 31 c7
+p3 = !tstbit(r17, r21)
+
+# Vector compare halfwords
+# CHECK: 63 de 14 d2
+p3 = vcmph.eq(r21:20, r31:30)
+# CHECK: 83 de 14 d2
+p3 = vcmph.gt(r21:20, r31:30)
+# CHECK: a3 de 14 d2
+p3 = vcmph.gtu(r21:20, r31:30)
+# CHECK: eb c3 14 dc
+p3 = vcmph.eq(r21:20, #31)
+# CHECK: eb c3 34 dc
+p3 = vcmph.gt(r21:20, #31)
+# CHECK: eb c3 54 dc
+p3 = vcmph.gtu(r21:20, #31)
+
+# Vector compare bytes for any match
+# CHECK: 03 fe 14 d2
+p3 = any8(vcmpb.eq(r21:20, r31:30))
+
+# Vector compare bytes
+# CHECK: 63 de 14 d2
+p3 = vcmph.eq(r21:20, r31:30)
+# CHECK: 83 de 14 d2
+p3 = vcmph.gt(r21:20, r31:30)
+# CHECK: a3 de 14 d2
+p3 = vcmph.gtu(r21:20, r31:30)
+# CHECK: eb c3 14 dc
+p3 = vcmph.eq(r21:20, #31)
+# CHECK: eb c3 34 dc
+p3 = vcmph.gt(r21:20, #31)
+# CHECK: eb c3 54 dc
+p3 = vcmph.gtu(r21:20, #31)
+
+# Vector compare words
+# CHECK: 03 de 14 d2
+p3 = vcmpw.eq(r21:20, r31:30)
+# CHECK: 23 de 14 d2
+p3 = vcmpw.gt(r21:20, r31:30)
+# CHECK: 43 de 14 d2
+p3 = vcmpw.gtu(r21:20, r31:30)
+# CHECK: f3 c3 14 dc
+p3 = vcmpw.eq(r21:20, #31)
+# CHECK: f3 c3 34 dc
+p3 = vcmpw.gt(r21:20, #31)
+# CHECK: f3 c3 54 dc
+p3 = vcmpw.gtu(r21:20, #31)
+
+# Viterbi pack even and odd predicate bits
+# CHECK: 11 c2 03 89
+r17 = vitpack(p3, p2)
+
+# Vector mux
+# CHECK: 70 de 14 d1
+r17:16 = vmux(p3, r21:20, r31:30)
diff --git a/test/MC/Hexagon/instructions/xtype_shift.s b/test/MC/Hexagon/instructions/xtype_shift.s
new file mode 100644
index 000000000000..bbe327b62bad
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_shift.s
@@ -0,0 +1,260 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.8 XTYPE/SHIFT
+
+# Shift by immediate
+# CHECK: 10 df 14 80
+r17:16 = asr(r21:20, #31)
+# CHECK: 30 df 14 80
+r17:16 = lsr(r21:20, #31)
+# CHECK: 50 df 14 80
+r17:16 = asl(r21:20, #31)
+# CHECK: 11 df 15 8c
+r17 = asr(r21, #31)
+# CHECK: 31 df 15 8c
+r17 = lsr(r21, #31)
+# CHECK: 51 df 15 8c
+r17 = asl(r21, #31)
+
+# Shift by immediate and accumulate
+# CHECK: 10 df 14 82
+r17:16 -= asr(r21:20, #31)
+# CHECK: 30 df 14 82
+r17:16 -= lsr(r21:20, #31)
+# CHECK: 50 df 14 82
+r17:16 -= asl(r21:20, #31)
+# CHECK: 90 df 14 82
+r17:16 += asr(r21:20, #31)
+# CHECK: b0 df 14 82
+r17:16 += lsr(r21:20, #31)
+# CHECK: d0 df 14 82
+r17:16 += asl(r21:20, #31)
+# CHECK: 11 df 15 8e
+r17 -= asr(r21, #31)
+# CHECK: 31 df 15 8e
+r17 -= lsr(r21, #31)
+# CHECK: 51 df 15 8e
+r17 -= asl(r21, #31)
+# CHECK: 91 df 15 8e
+r17 += asr(r21, #31)
+# CHECK: b1 df 15 8e
+r17 += lsr(r21, #31)
+# CHECK: d1 df 15 8e
+r17 += asl(r21, #31)
+# CHECK: 4c f7 11 de
+r17 = add(#21, asl(r17, #23))
+# CHECK: 4e f7 11 de
+r17 = sub(#21, asl(r17, #23))
+# CHECK: 5c f7 11 de
+r17 = add(#21, lsr(r17, #23))
+# CHECK: 5e f7 11 de
+r17 = sub(#21, lsr(r17, #23))
+
+# Shift by immediate and add
+# CHECK: f1 d5 1f c4
+r17 = addasl(r21, r31, #7)
+
+# Shift by immediate and logical
+# CHECK: 10 df 54 82
+r17:16 &= asr(r21:20, #31)
+# CHECK: 30 df 54 82
+r17:16 &= lsr(r21:20, #31)
+# CHECK: 50 df 54 82
+r17:16 &= asl(r21:20, #31)
+# CHECK: 90 df 54 82
+r17:16 |= asr(r21:20, #31)
+# CHECK: b0 df 54 82
+r17:16 |= lsr(r21:20, #31)
+# CHECK: d0 df 54 82
+r17:16 |= asl(r21:20, #31)
+# CHECK: 30 df 94 82
+r17:16 ^= lsr(r21:20, #31)
+# CHECK: 50 df 94 82
+r17:16 ^= asl(r21:20, #31)
+# CHECK: 11 df 55 8e
+r17 &= asr(r21, #31)
+# CHECK: 31 df 55 8e
+r17 &= lsr(r21, #31)
+# CHECK: 51 df 55 8e
+r17 &= asl(r21, #31)
+# CHECK: 91 df 55 8e
+r17 |= asr(r21, #31)
+# CHECK: b1 df 55 8e
+r17 |= lsr(r21, #31)
+# CHECK: d1 df 55 8e
+r17 |= asl(r21, #31)
+# CHECK: 31 df 95 8e
+r17 ^= lsr(r21, #31)
+# CHECK: 51 df 95 8e
+r17 ^= asl(r21, #31)
+# CHECK: 48 ff 11 de
+r17 = and(#21, asl(r17, #31))
+# CHECK: 4a ff 11 de
+r17 = or(#21, asl(r17, #31))
+# CHECK: 58 ff 11 de
+r17 = and(#21, lsr(r17, #31))
+# CHECK: 5a ff 11 de
+r17 = or(#21, lsr(r17, #31))
+
+# Shift right by immediate with rounding
+# CHECK: f0 df d4 80
+r17:16 = asr(r21:20, #31):rnd
+# CHECK: 11 df 55 8c
+r17 = asr(r21, #31):rnd
+
+# Shift left by immediate with saturation
+# CHECK: 51 df 55 8c
+r17 = asl(r21, #31):sat
+
+# Shift by register
+# CHECK: 10 df 94 c3
+r17:16 = asr(r21:20, r31)
+# CHECK: 50 df 94 c3
+r17:16 = lsr(r21:20, r31)
+# CHECK: 90 df 94 c3
+r17:16 = asl(r21:20, r31)
+# CHECK: d0 df 94 c3
+r17:16 = lsl(r21:20, r31)
+# CHECK: 11 df 55 c6
+r17 = asr(r21, r31)
+# CHECK: 51 df 55 c6
+r17 = lsr(r21, r31)
+# CHECK: 91 df 55 c6
+r17 = asl(r21, r31)
+# CHECK: d1 df 55 c6
+r17 = lsl(r21, r31)
+# CHECK: f1 df 8a c6
+r17 = lsl(#21, r31)
+
+# Shift by register and accumulate
+# CHECK: 10 df 94 cb
+r17:16 -= asr(r21:20, r31)
+# CHECK: 50 df 94 cb
+r17:16 -= lsr(r21:20, r31)
+# CHECK: 90 df 94 cb
+r17:16 -= asl(r21:20, r31)
+# CHECK: d0 df 94 cb
+r17:16 -= lsl(r21:20, r31)
+# CHECK: 10 df d4 cb
+r17:16 += asr(r21:20, r31)
+# CHECK: 50 df d4 cb
+r17:16 += lsr(r21:20, r31)
+# CHECK: 90 df d4 cb
+r17:16 += asl(r21:20, r31)
+# CHECK: d0 df d4 cb
+r17:16 += lsl(r21:20, r31)
+# CHECK: 11 df 95 cc
+r17 -= asr(r21, r31)
+# CHECK: 51 df 95 cc
+r17 -= lsr(r21, r31)
+# CHECK: 91 df 95 cc
+r17 -= asl(r21, r31)
+# CHECK: d1 df 95 cc
+r17 -= lsl(r21, r31)
+# CHECK: 11 df d5 cc
+r17 += asr(r21, r31)
+# CHECK: 51 df d5 cc
+r17 += lsr(r21, r31)
+# CHECK: 91 df d5 cc
+r17 += asl(r21, r31)
+# CHECK: d1 df d5 cc
+r17 += lsl(r21, r31)
+
+# Shift by register and logical
+# CHECK: 10 df 14 cb
+r17:16 |= asr(r21:20, r31)
+# CHECK: 50 df 14 cb
+r17:16 |= lsr(r21:20, r31)
+# CHECK: 90 df 14 cb
+r17:16 |= asl(r21:20, r31)
+# CHECK: d0 df 14 cb
+r17:16 |= lsl(r21:20, r31)
+# CHECK: 10 df 54 cb
+r17:16 &= asr(r21:20, r31)
+# CHECK: 50 df 54 cb
+r17:16 &= lsr(r21:20, r31)
+# CHECK: 90 df 54 cb
+r17:16 &= asl(r21:20, r31)
+# CHECK: d0 df 54 cb
+r17:16 &= lsl(r21:20, r31)
+# CHECK: 10 df 74 cb
+r17:16 ^= asr(r21:20, r31)
+# CHECK: 50 df 74 cb
+r17:16 ^= lsr(r21:20, r31)
+# CHECK: 90 df 74 cb
+r17:16 ^= asl(r21:20, r31)
+# CHECK: d0 df 74 cb
+r17:16 ^= lsl(r21:20, r31)
+# CHECK: 11 df 15 cc
+r17 |= asr(r21, r31)
+# CHECK: 51 df 15 cc
+r17 |= lsr(r21, r31)
+# CHECK: 91 df 15 cc
+r17 |= asl(r21, r31)
+# CHECK: d1 df 15 cc
+r17 |= lsl(r21, r31)
+# CHECK: 11 df 55 cc
+r17 &= asr(r21, r31)
+# CHECK: 51 df 55 cc
+r17 &= lsr(r21, r31)
+# CHECK: 91 df 55 cc
+r17 &= asl(r21, r31)
+# CHECK: d1 df 55 cc
+r17 &= lsl(r21, r31)
+
+# Shift by register with saturation
+# CHECK: 11 df 15 c6
+r17 = asr(r21, r31):sat
+# CHECK: 91 df 15 c6
+r17 = asl(r21, r31):sat
+
+# Vector shift halfwords by immediate
+# CHECK: 10 c5 94 80
+r17:16 = vasrh(r21:20, #5)
+# CHECK: 30 c5 94 80
+r17:16 = vlsrh(r21:20, #5)
+# CHECK: 50 c5 94 80
+r17:16 = vaslh(r21:20, #5)
+
+# Vector arithmetic shift halfwords with round
+# CHECK: 10 c5 34 80
+r17:16 = vasrh(r21:20, #5):raw
+
+# Vector arithmetic shift halfwords with saturate and pack
+# CHECK: 91 c5 74 88
+r17 = vasrhub(r21:20, #5):raw
+# CHECK: b1 c5 74 88
+r17 = vasrhub(r21:20, #5):sat
+
+# Vector shift halfwords by register
+# CHECK: 10 df 54 c3
+r17:16 = vasrh(r21:20, r31)
+# CHECK: 50 df 54 c3
+r17:16 = vlsrh(r21:20, r31)
+# CHECK: 90 df 54 c3
+r17:16 = vaslh(r21:20, r31)
+# CHECK: d0 df 54 c3
+r17:16 = vlslh(r21:20, r31)
+
+# Vector shift words by immediate
+# CHECK: 10 df 54 80
+r17:16 = vasrw(r21:20, #31)
+# CHECK: 30 df 54 80
+r17:16 = vlsrw(r21:20, #31)
+# CHECK: 50 df 54 80
+r17:16 = vaslw(r21:20, #31)
+
+# Vector shift words by register
+# CHECK: 10 df 14 c3
+r17:16 = vasrw(r21:20, r31)
+# CHECK: 50 df 14 c3
+r17:16 = vlsrw(r21:20, r31)
+# CHECK: 90 df 14 c3
+r17:16 = vaslw(r21:20, r31)
+# CHECK: d0 df 14 c3
+r17:16 = vlslw(r21:20, r31)
+
+# Vector shift words with truncate and pack
+# CHECK: 51 df d4 88
+r17 = vasrw(r21:20, #31)
+# CHECK: 51 df 14 c5
+r17 = vasrw(r21:20, r31)
diff --git a/test/MC/Hexagon/jumpdoublepound.s b/test/MC/Hexagon/jumpdoublepound.s
new file mode 100644
index 000000000000..6b829360a906
--- /dev/null
+++ b/test/MC/Hexagon/jumpdoublepound.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj %s -o - | llvm-objdump -d - | FileCheck %s
+
+# Verify that jump encodes correctly
+
+
+mylabel:
+# CHECK: if (p0) jump
+if (p0) jump ##mylabel
+
+# CHECK: if (cmp.gtu(r5.new, r4)) jump:t
+{ r5 = r4
+ if (cmp.gtu(r5.new, r4)) jump:t ##mylabel }
+
diff --git a/test/MC/Hexagon/labels.s b/test/MC/Hexagon/labels.s
new file mode 100644
index 000000000000..d52ae004b07d
--- /dev/null
+++ b/test/MC/Hexagon/labels.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc -triple=hexagon -filetype=asm -o - %s | FileCheck %s
+
+# CHECK: a:
+a:
+
+# CHECK: r1:
+r1:
+
+# CHECK: r3:
+# CHECK: nop
+r3:nop
+
+# CHECK: r5:4 = combine(r5, r4)
+r5:4 = r5:4
+
+# CHECK: r0 = r1
+# CHECK: p0 = tstbit(r0, #10)
+# CHECK: if (!p0) jump
+1:r0=r1; p0=tstbit(r0, #10); if !p0 jump 1b;
+
+# CHECK: nop
+# CHECK: r1 = add(r1, #4)
+# CHECK: r5 = memw(r1 + #0)
+# CHECK: endloop0
+b: { r5 = memw(r1)
+ r1 = add(r1, #4) } : endloop0 \ No newline at end of file
diff --git a/test/MC/Hexagon/new-value-check.s b/test/MC/Hexagon/new-value-check.s
new file mode 100644
index 000000000000..e46360a7bb3d
--- /dev/null
+++ b/test/MC/Hexagon/new-value-check.s
@@ -0,0 +1,72 @@
+# RUN: llvm-mc -triple=hexagon < %s 2>%t ; \
+# RUN: FileCheck %s < %t --check-prefix=CHECK-STRICT
+# RUN: llvm-mc -triple=hexagon -relax-nv-checks < %s 2>%t ; \
+# RUN: FileCheck %s < %t --check-prefix=CHECK-RELAXED
+
+# CHECK-STRICT: :12:1: error: register `R0' used with `.new' but not validly modified in the same packet
+# CHECK-RELAXED: :12:1: error: register `R0' used with `.new' but not validly modified in the same packet
+{
+ # invalid: r0 definition predicated on the opposite condition
+ if (p3) r0 = add(r1, r2)
+ if (!p3) memb(r20) = r0.new
+}
+
+# CHECK-STRICT: :20:1: error: register `R0' used with `.new' but not validly modified in the same packet
+# CHECK-RELAXED: :20:1: error: register `R0' used with `.new' but not validly modified in the same packet
+{
+ # invalid: new-value compare-and-jump cannot use floating point value
+ r0 = sfadd(r1, r2)
+ if (cmp.eq(r0.new, #0)) jump:nt .
+}
+
+# CHECK-STRICT: :29:1: error: register `R0' used with `.new' but not validly modified in the same packet
+# CHECK-RELAXED: :29:1: error: register `R0' used with `.new' but not validly modified in the same packet
+{
+ # invalid: definition of r0 should be unconditional (not explicitly docu-
+ # mented)
+ if (p0) r0 = r1
+ if (cmp.eq(r0.new, #0)) jump:nt .
+}
+
+
+# No errors from this point on with the relaxed checks.
+# CHECK-RELAXED-NOT: error
+
+# CHECK-STRICT: :41:1: error: register `R0' used with `.new' but not validly modified in the same packet
+{
+ # valid (relaxed): p2 and p3 cannot be proven to violate the new-value
+ # requirements
+ if (p3) r0 = add(r1, r2)
+ if (p2) memb(r20) = r0.new
+}
+
+# CHECK-STRICT: :48:1: error: register `R0' used with `.new' but not validly modified in the same packet
+{
+ # valid (relaxed): p3 could be always true
+ if (p3) r0 = add(r1, r2)
+ memb(r20) = r0.new
+}
+
+
+# No errors from this point on with the strict checks.
+# CHECK-RELAXED-NOT: error
+
+{
+ # valid: r0 defined unconditionally
+ r0 = add(r1, r2)
+ if (p2) memb(r20) = r0.new
+}
+
+{
+ # valid: r0 definition and use identically predicated
+ if (p3) r0 = add(r1, r2)
+ if (p3) memb(r20) = r0.new
+}
+
+{
+ # valid: r0 defined regardless of p0
+ if (p0) r0 = #0
+ if (!p0) r0 = #1
+ if (p0) memb(r20) = r0.new
+}
+
diff --git a/test/MC/Hexagon/out_of_range.s b/test/MC/Hexagon/out_of_range.s
new file mode 100644
index 000000000000..2a98ef287f6b
--- /dev/null
+++ b/test/MC/Hexagon/out_of_range.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple=hexagon -filetype=asm %s 2> %t; FileCheck %s < %t
+
+r1:0=##0xFFFFFF7000001000
+# CHECK: rror: value -144(0xffffffffffffff70) out of range: -128-127
+
+p0 = cmpb.eq(r0, #-257)
+# CHECK: rror: invalid operand for instruction
+
+p0 = cmpb.eq(r0, #256)
+# CHECK: rror: invalid operand for instruction
diff --git a/test/MC/Hexagon/pcrel.s b/test/MC/Hexagon/pcrel.s
new file mode 100644
index 000000000000..368fea5c2b30
--- /dev/null
+++ b/test/MC/Hexagon/pcrel.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -arch=hexagon -filetype=obj %s | llvm-objdump -r - | FileCheck %s
+#
+
+# make sure the fixups emitted match what is
+# expected.
+.Lpc:
+ r0 = add (pc, ##foo@PCREL)
+
+# CHECK: R_HEX_B32_PCREL_X
+# CHECK: R_HEX_6_PCREL_X
+
diff --git a/test/MC/Hexagon/relaxed_newvalue.s b/test/MC/Hexagon/relaxed_newvalue.s
new file mode 100644
index 000000000000..65fbd312e0ac
--- /dev/null
+++ b/test/MC/Hexagon/relaxed_newvalue.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj %s | llvm-objdump -d - | FileCheck %s
+# Make sure relaxation doesn't hinder newvalue calculation
+
+#CHECK: r18 = add(r2, #-6)
+#CHECK-NEXT: immext(#0)
+#CHECK-NEXT: if (!cmp.gt(r18.new, #1)) jump:t
+{
+ r18 = add(r2, #-6)
+ if (!cmp.gt(r18.new, #1)) jump:t .unknown
+}
diff --git a/test/MC/Hexagon/test.s b/test/MC/Hexagon/test.s
new file mode 100644
index 000000000000..73b6d0a96c71
--- /dev/null
+++ b/test/MC/Hexagon/test.s
@@ -0,0 +1,4 @@
+#RUN: llvm-mc -filetype=obj -triple=hexagon -mcpu=hexagonv60 %s
+
+{ vmem (r0 + #0) = v0
+ r0 = memw(r0) } \ No newline at end of file
diff --git a/test/MC/Hexagon/two_ext.s b/test/MC/Hexagon/two_ext.s
new file mode 100644
index 000000000000..c55bcc8cd9f5
--- /dev/null
+++ b/test/MC/Hexagon/two_ext.s
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj %s | llvm-objdump -d - | FileCheck %s
+
+# verify two extenders generated during relaxation
+{
+ if (p1) call foo_a
+ if (!p1) call foo_b
+}
+# CHECK: 00004000 { immext(#0)
+# CHECK: 5d004100 if (p1) call 0x0
+# CHECK: 00004000 immext(#0)
+# CHECK: 5d20c100 if (!p1) call 0x0 }
+
diff --git a/test/MC/Hexagon/v60-alu.s b/test/MC/Hexagon/v60-alu.s
new file mode 100644
index 000000000000..1583c3da2cb7
--- /dev/null
+++ b/test/MC/Hexagon/v60-alu.s
@@ -0,0 +1,312 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 1ce2cbd7 { v23.w = vavg(v11.w,{{ *}}v2.w):rnd }
+v23.w=vavg(v11.w,v2.w):rnd
+
+#CHECK: 1cf4d323 { v3.h = vnavg(v19.h,{{ *}}v20.h) }
+v3.h=vnavg(v19.h,v20.h)
+
+#CHECK: 1cffce9a { v26.uh = vavg(v14.uh,{{ *}}v31.uh):rnd }
+v26.uh=vavg(v14.uh,v31.uh):rnd
+
+#CHECK: 1ce5cba1 { v1.h = vavg(v11.h,{{ *}}v5.h):rnd }
+v1.h=vavg(v11.h,v5.h):rnd
+
+#CHECK: 1cc0d012 { v18.ub = vabsdiff(v16.ub,{{ *}}v0.ub) }
+v18.ub=vabsdiff(v16.ub,v0.ub)
+
+#CHECK: 1cc2de29 { v9.uh = vabsdiff(v30.h,{{ *}}v2.h) }
+v9.uh=vabsdiff(v30.h,v2.h)
+
+#CHECK: 1ce9ca06 { v6.b = vnavg(v10.ub,{{ *}}v9.ub) }
+v6.b=vnavg(v10.ub,v9.ub)
+
+#CHECK: 1caacf90 { v17:16.w = vadd(v15.h,{{ *}}v10.h) }
+v17:16.w=vadd(v15.h,v10.h)
+
+#CHECK: 1cb4cabe { v31:30.h = vsub(v10.ub,{{ *}}v20.ub) }
+v31:30.h=vsub(v10.ub,v20.ub)
+
+#CHECK: 1cb8cada { v27:26.w = vsub(v10.uh,{{ *}}v24.uh) }
+v27:26.w=vsub(v10.uh,v24.uh)
+
+#CHECK: 1cbcdbe8 { v9:8.w = vsub(v27.h,{{ *}}v28.h) }
+v9:8.w=vsub(v27.h,v28.h)
+
+#CHECK: 1caeca00 { v1:0.h = vsub(v11:10.h,{{ *}}v15:14.h):sat }
+v1:0.h=vsub(v11:10.h,v15:14.h):sat
+
+#CHECK: 1ca8c43e { v31:30.w = vsub(v5:4.w,{{ *}}v9:8.w):sat }
+v31:30.w=vsub(v5:4.w,v9:8.w):sat
+
+#CHECK: 1cbad95c { v29:28.h = vadd(v25.ub,{{ *}}v26.ub) }
+v29:28.h=vadd(v25.ub,v26.ub)
+
+#CHECK: 1ca1dc64 { v5:4.w = vadd(v28.uh,{{ *}}v1.uh) }
+v5:4.w=vadd(v28.uh,v1.uh)
+
+#CHECK: 1c79c350 { v16.h = vsub(v3.h,{{ *}}v25.h):sat }
+v16.h=vsub(v3.h,v25.h):sat
+
+#CHECK: 1c7fd364 { v4.w = vsub(v19.w,{{ *}}v31.w):sat }
+v4.w=vsub(v19.w,v31.w):sat
+
+#CHECK: 1c67d816 { v22.ub = vsub(v24.ub,{{ *}}v7.ub):sat }
+v22.ub=vsub(v24.ub,v7.ub):sat
+
+#CHECK: 1c7ddc2f { v15.uh = vsub(v28.uh,{{ *}}v29.uh):sat }
+v15.uh=vsub(v28.uh,v29.uh):sat
+
+#CHECK: 1c5cc6d7 { v23.h = vsub(v6.h,{{ *}}v28.h) }
+v23.h=vsub(v6.h,v28.h)
+
+#CHECK: 1c54cae4 { v4.w = vsub(v10.w,{{ *}}v20.w) }
+v4.w=vsub(v10.w,v20.w)
+
+#CHECK: 1c4dc78b { v11.w = vadd(v7.w,{{ *}}v13.w):sat }
+v11.w=vadd(v7.w,v13.w):sat
+
+#CHECK: 1c48c7a4 { v4.b = vsub(v7.b,{{ *}}v8.b) }
+v4.b=vsub(v7.b,v8.b)
+
+#CHECK: 1cdec3b0 { v16.uh = vavg(v3.uh,{{ *}}v30.uh) }
+v16.uh=vavg(v3.uh,v30.uh)
+
+#CHECK: 1c76dc98 { v25:24.b = vadd(v29:28.b,{{ *}}v23:22.b) }
+v25:24.b=vadd(v29:28.b,v23:22.b)
+
+#CHECK: 1c7ad4a6 { v7:6.h = vadd(v21:20.h,{{ *}}v27:26.h) }
+v7:6.h=vadd(v21:20.h,v27:26.h)
+
+#CHECK: 1cc7c564 { v4.uw = vabsdiff(v5.w,{{ *}}v7.w) }
+v4.uw=vabsdiff(v5.w,v7.w)
+
+#CHECK: 1cd2cdc1 { v1.h = vavg(v13.h,{{ *}}v18.h) }
+v1.h=vavg(v13.h,v18.h)
+
+#CHECK: 1cd5d246 { v6.uh = vabsdiff(v18.uh,{{ *}}v21.uh) }
+v6.uh=vabsdiff(v18.uh,v21.uh)
+
+#CHECK: 1cdcd987 { v7.ub = vavg(v25.ub,{{ *}}v28.ub) }
+v7.ub=vavg(v25.ub,v28.ub)
+
+#CHECK: 1c92c6e4 { v5:4.uh = vsub(v7:6.uh,{{ *}}v19:18.uh):sat }
+v5:4.uh=vsub(v7:6.uh,v19:18.uh):sat
+
+#CHECK: 1c86dace { v15:14.ub = vsub(v27:26.ub,{{ *}}v7:6.ub):sat }
+v15:14.ub=vsub(v27:26.ub,v7:6.ub):sat
+
+#CHECK: 1cffc07c { v28.ub = vavg(v0.ub,{{ *}}v31.ub):rnd }
+v28.ub=vavg(v0.ub,v31.ub):rnd
+
+#CHECK: 1cf8d851 { v17.w = vnavg(v24.w,{{ *}}v24.w) }
+v17.w=vnavg(v24.w,v24.w)
+
+#CHECK: 1c70d2e6 { v7:6.ub = vadd(v19:18.ub,{{ *}}v17:16.ub):sat }
+v7:6.ub=vadd(v19:18.ub,v17:16.ub):sat
+
+#CHECK: 1c72dec6 { v7:6.w = vadd(v31:30.w,{{ *}}v19:18.w) }
+v7:6.w=vadd(v31:30.w,v19:18.w)
+
+#CHECK: 1c92d23e { v31:30.h = vadd(v19:18.h,{{ *}}v19:18.h):sat }
+v31:30.h=vadd(v19:18.h,v19:18.h):sat
+
+#CHECK: 1c94de1e { v31:30.uh = vadd(v31:30.uh,{{ *}}v21:20.uh):sat }
+v31:30.uh=vadd(v31:30.uh,v21:20.uh):sat
+
+#CHECK: 1c9ec07c { v29:28.b = vsub(v1:0.b,{{ *}}v31:30.b) }
+v29:28.b=vsub(v1:0.b,v31:30.b)
+
+#CHECK: 1c88da56 { v23:22.w = vadd(v27:26.w,{{ *}}v9:8.w):sat }
+v23:22.w=vadd(v27:26.w,v9:8.w):sat
+
+#CHECK: 1c9acab8 { v25:24.w = vsub(v11:10.w,{{ *}}v27:26.w) }
+v25:24.w=vsub(v11:10.w,v27:26.w)
+
+#CHECK: 1c82d282 { v3:2.h = vsub(v19:18.h,{{ *}}v3:2.h) }
+v3:2.h=vsub(v19:18.h,v3:2.h)
+
+#CHECK: 1c2bd9a6 { v6 = vand(v25,{{ *}}v11) }
+v6=vand(v25,v11)
+
+#CHECK: 1c43c22d { v13.ub = vadd(v2.ub,{{ *}}v3.ub):sat }
+v13.ub=vadd(v2.ub,v3.ub):sat
+
+#CHECK: 1c59d707 { v7.w = vadd(v23.w,{{ *}}v25.w) }
+v7.w=vadd(v23.w,v25.w)
+
+#CHECK: 1c3fc9e1 { v1 = vxor(v9,{{ *}}v31) }
+v1=vxor(v9,v31)
+
+#CHECK: 1c2acbdf { v31 = vor(v11,{{ *}}v10) }
+v31=vor(v11,v10)
+
+#CHECK: 1cdaccf6 { v22.w = vavg(v12.w,{{ *}}v26.w) }
+v22.w=vavg(v12.w,v26.w)
+
+#CHECK: 1c5ac767 { v7.h = vadd(v7.h,{{ *}}v26.h):sat }
+v7.h=vadd(v7.h,v26.h):sat
+
+#CHECK: 1c40d956 { v22.uh = vadd(v25.uh,{{ *}}v0.uh):sat }
+v22.uh=vadd(v25.uh,v0.uh):sat
+
+#CHECK: 1fbbd611 { v17.w = vasr(v22.w{{ *}},{{ *}}v27.w) }
+v17.w=vasr(v22.w,v27.w)
+
+#CHECK: 1fbad835 { v21.w = vlsr(v24.w{{ *}},{{ *}}v26.w) }
+v21.w=vlsr(v24.w,v26.w)
+
+#CHECK: 1f79cedc { v28.b = vround(v14.h{{ *}},{{ *}}v25.h):sat }
+v28.b=vround(v14.h,v25.h):sat
+
+#CHECK: 1f69c4e0 { v0.ub = vround(v4.h{{ *}},{{ *}}v9.h):sat }
+v0.ub=vround(v4.h,v9.h):sat
+
+#CHECK: 1f72c485 { v5.h = vround(v4.w{{ *}},{{ *}}v18.w):sat }
+v5.h=vround(v4.w,v18.w):sat
+
+#CHECK: 1f6bc8b1 { v17.uh = vround(v8.w{{ *}},{{ *}}v11.w):sat }
+v17.uh=vround(v8.w,v11.w):sat
+
+#CHECK: 1f71c25b { v27.ub = vsat(v2.h{{ *}},{{ *}}v17.h) }
+v27.ub=vsat(v2.h,v17.h)
+
+#CHECK: 1f66c560 { v0.h = vsat(v5.w{{ *}},{{ *}}v6.w) }
+v0.h=vsat(v5.w,v6.w)
+
+#CHECK: 1fb3d148 { v8.h = vlsr(v17.h{{ *}},{{ *}}v19.h) }
+v8.h=vlsr(v17.h,v19.h)
+
+#CHECK: 1fbec56e { v14.h = vasr(v5.h{{ *}},{{ *}}v30.h) }
+v14.h=vasr(v5.h,v30.h)
+
+#CHECK: 1fb2d2a2 { v2.h = vasl(v18.h{{ *}},{{ *}}v18.h) }
+v2.h=vasl(v18.h,v18.h)
+
+#CHECK: 1faccc95 { v21.w = vasl(v12.w{{ *}},{{ *}}v12.w) }
+v21.w=vasl(v12.w,v12.w)
+
+#CHECK: 1fb9c1e2 { v2.h = vadd(v1.h{{ *}},{{ *}}v25.h) }
+v2.h=vadd(v1.h,v25.h)
+
+#CHECK: 1fbbd5df { v31.b = vadd(v21.b{{ *}},{{ *}}v27.b) }
+v31.b=vadd(v21.b,v27.b)
+
+#CHECK: 1f25c578 { v24 = vrdelta(v5{{ *}},{{ *}}v5) }
+v24=vrdelta(v5,v5)
+
+#CHECK: 1f22c62a { v10 = vdelta(v6{{ *}},{{ *}}v2) }
+v10=vdelta(v6,v2)
+
+#CHECK: 1f20d102 { v2.w = vmax(v17.w{{ *}},{{ *}}v0.w) }
+v2.w=vmax(v17.w,v0.w)
+
+#CHECK: 1f1ed6fc { v28.h = vmax(v22.h{{ *}},{{ *}}v30.h) }
+v28.h=vmax(v22.h,v30.h)
+
+#CHECK: 1f0cc8d8 { v24.uh = vmax(v8.uh{{ *}},{{ *}}v12.uh) }
+v24.uh=vmax(v8.uh,v12.uh)
+
+#CHECK: 1f00c1b0 { v16.ub = vmax(v1.ub{{ *}},{{ *}}v0.ub) }
+v16.ub=vmax(v1.ub,v0.ub)
+
+#CHECK: 1f12d08e { v14.w = vmin(v16.w{{ *}},{{ *}}v18.w) }
+v14.w=vmin(v16.w,v18.w)
+
+#CHECK: 1f1ad466 { v6.h = vmin(v20.h{{ *}},{{ *}}v26.h) }
+v6.h=vmin(v20.h,v26.h)
+
+#CHECK: 1f13df5d { v29.uh = vmin(v31.uh{{ *}},{{ *}}v19.uh) }
+v29.uh=vmin(v31.uh,v19.uh)
+
+#CHECK: 1f09c226 { v6.ub = vmin(v2.ub{{ *}},{{ *}}v9.ub) }
+v6.ub=vmin(v2.ub,v9.ub)
+
+#CHECK: 1f41d34f { v15.b = vshuffo(v19.b{{ *}},{{ *}}v1.b) }
+v15.b=vshuffo(v19.b,v1.b)
+
+#CHECK: 1f5fc72e { v14.b = vshuffe(v7.b{{ *}},{{ *}}v31.b) }
+v14.b=vshuffe(v7.b,v31.b)
+
+#CHECK: 1f34d0f7 { v23.b = vdeale(v16.b{{ *}},{{ *}}v20.b) }
+v23.b=vdeale(v16.b,v20.b)
+
+#CHECK: 1f4bd6c4 { v5:4.b = vshuffoe(v22.b{{ *}},{{ *}}v11.b) }
+v5:4.b=vshuffoe(v22.b,v11.b)
+
+#CHECK: 1f5dcea2 { v3:2.h = vshuffoe(v14.h{{ *}},{{ *}}v29.h) }
+v3:2.h=vshuffoe(v14.h,v29.h)
+
+#CHECK: 1f4fd186 { v6.h = vshuffo(v17.h{{ *}},{{ *}}v15.h) }
+v6.h=vshuffo(v17.h,v15.h)
+
+#CHECK: 1f5bda79 { v25.h = vshuffe(v26.h{{ *}},{{ *}}v27.h) }
+v25.h=vshuffe(v26.h,v27.h)
+
+#CHECK: 1f41d1f2 { v19:18 = vcombine(v17{{ *}},{{ *}}v1) }
+v19:18=vcombine(v17,v1)
+
+#CHECK: 1e82f432 { if (!q2) v18.b -= v20.b }
+if (!q2) v18.b-=v20.b
+
+#CHECK: 1ec2fd13 { if (q3) v19.w -= v29.w }
+if (q3) v19.w-=v29.w
+
+#CHECK: 1e81fef9 { if (q2) v25.h -= v30.h }
+if (q2) v25.h-=v30.h
+
+#CHECK: 1e81e2d3 { if (q2) v19.b -= v2.b }
+if (q2) v19.b-=v2.b
+
+#CHECK: 1e41ecad { if (!q1) v13.w += v12.w }
+if (!q1) v13.w+=v12.w
+
+#CHECK: 1e41e789 { if (!q1) v9.h += v7.h }
+if (!q1) v9.h+=v7.h
+
+#CHECK: 1e81e967 { if (!q2) v7.b += v9.b }
+if (!q2) v7.b+=v9.b
+
+#CHECK: 1e41f04f { if (q1) v15.w += v16.w }
+if (q1) v15.w+=v16.w
+
+#CHECK: 1e01e838 { if (q0) v24.h += v8.h }
+if (q0) v24.h+=v8.h
+
+#CHECK: 1ec1f112 { if (q3) v18.b += v17.b }
+if (q3) v18.b+=v17.b
+
+#CHECK: 1e42f67b { if (!q1) v27.w -= v22.w }
+if (!q1) v27.w-=v22.w
+
+#CHECK: 1e82ea5b { if (!q2) v27.h -= v10.h }
+if (!q2) v27.h-=v10.h
+
+#CHECK: 1e00c586 { v6 = vnot(v5) }
+v6=vnot(v5)
+
+#CHECK: 1e00df70 { v16.w = vabs(v31.w):sat }
+v16.w=vabs(v31.w):sat
+
+#CHECK: 1e00d45f { v31.w = vabs(v20.w) }
+v31.w=vabs(v20.w)
+
+#CHECK: 1e00db2f { v15.h = vabs(v27.h):sat }
+v15.h=vabs(v27.h):sat
+
+#CHECK: 1e00d001 { v1.h = vabs(v16.h) }
+v1.h=vabs(v16.h)
+
+#CHECK: 1e02c832 { v19:18.uh = vzxt(v8.ub) }
+v19:18.uh=vzxt(v8.ub)
+
+#CHECK: 1e02c98a { v11:10.w = vsxt(v9.h) }
+v11:10.w=vsxt(v9.h)
+
+#CHECK: 1e02cf76 { v23:22.h = vsxt(v15.b) }
+v23:22.h=vsxt(v15.b)
+
+#CHECK: 1e02c258 { v25:24.uw = vzxt(v2.uh) }
+v25:24.uw=vzxt(v2.uh)
diff --git a/test/MC/Hexagon/v60-permute.s b/test/MC/Hexagon/v60-permute.s
new file mode 100644
index 000000000000..b3544bd0a57b
--- /dev/null
+++ b/test/MC/Hexagon/v60-permute.s
@@ -0,0 +1,51 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 1fd2d5cf { v15.b = vpack(v21.h{{ *}},{{ *}}v18.h):sat }
+v15.b=vpack(v21.h,v18.h):sat
+
+#CHECK: 1fd7d7a2 { v2.ub = vpack(v23.h{{ *}},{{ *}}v23.h):sat }
+v2.ub=vpack(v23.h,v23.h):sat
+
+#CHECK: 1fc7d464 { v4.h = vpacke(v20.w{{ *}},{{ *}}v7.w) }
+v4.h=vpacke(v20.w,v7.w)
+
+#CHECK: 1fc2c75b { v27.b = vpacke(v7.h{{ *}},{{ *}}v2.h) }
+v27.b=vpacke(v7.h,v2.h)
+
+#CHECK: 1fc9c5ed { v13.uh = vpack(v5.w{{ *}},{{ *}}v9.w):sat }
+v13.uh=vpack(v5.w,v9.w):sat
+
+#CHECK: 1ff1d81f { v31.h = vpack(v24.w{{ *}},{{ *}}v17.w):sat }
+v31.h=vpack(v24.w,v17.w):sat
+
+#CHECK: 1fe6c435 { v21.b = vpacko(v4.h{{ *}},{{ *}}v6.h) }
+v21.b=vpacko(v4.h,v6.h)
+
+#CHECK: 1febc140 { v0.h = vpacko(v1.w{{ *}},{{ *}}v11.w) }
+v0.h=vpacko(v1.w,v11.w)
+
+#CHECK: 1e01d256 { v23:22.h = vunpack(v18.b) }
+v23:22.h=vunpack(v18.b)
+
+#CHECK: 1e01cc38 { v25:24.uw = vunpack(v12.uh) }
+v25:24.uw=vunpack(v12.uh)
+
+#CHECK: 1e01c61e { v31:30.uh = vunpack(v6.ub) }
+v31:30.uh=vunpack(v6.ub)
+
+#CHECK: 1e01d778 { v25:24.w = vunpack(v23.h) }
+v25:24.w=vunpack(v23.h)
+
+#CHECK: 1e00c0e0 { v0.b = vdeal(v0.b) }
+v0.b=vdeal(v0.b)
+
+#CHECK: 1e00d5c9 { v9.h = vdeal(v21.h) }
+v9.h=vdeal(v21.h)
+
+#CHECK: 1e02cb1c { v28.b = vshuff(v11.b) }
+v28.b=vshuff(v11.b)
+
+#CHECK: 1e01d8fe { v30.h = vshuff(v24.h) }
+v30.h=vshuff(v24.h)
diff --git a/test/MC/Hexagon/v60-shift.s b/test/MC/Hexagon/v60-shift.s
new file mode 100644
index 000000000000..3d0c334debb9
--- /dev/null
+++ b/test/MC/Hexagon/v60-shift.s
@@ -0,0 +1,39 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 198fd829 { v9.uw = vlsr(v24.uw,{{ *}}r15) }
+v9.uw=vlsr(v24.uw,r15)
+
+#CHECK: 1999d645 { v5.uh = vlsr(v22.uh,{{ *}}r25) }
+v5.uh=vlsr(v22.uh,r25)
+
+#CHECK: 198cc303 { v3.h = vasl(v3.h,{{ *}}r12) }
+v3.h=vasl(v3.h,r12)
+
+#CHECK: 1965d7ac { v12.w = vasr(v23.w,{{ *}}r5) }
+v12.w=vasr(v23.w,r5)
+
+#CHECK: 197dddc3 { v3.h = vasr(v29.h,{{ *}}r29) }
+v3.h=vasr(v29.h,r29)
+
+#CHECK: 197adde8 { v8.w = vasl(v29.w,{{ *}}r26) }
+v8.w=vasl(v29.w,r26)
+
+#CHECK: 1977cc26 { v6 = vror(v12,{{ *}}r23) }
+v6=vror(v12,r23)
+
+#CHECK: 1e02cfad { v13.uw = vcl0(v15.uw) }
+v13.uw=vcl0(v15.uw)
+
+#CHECK: 1e02defb { v27.uh = vcl0(v30.uh) }
+v27.uh=vcl0(v30.uh)
+
+#CHECK: 1e03de90 { v16.w = vnormamt(v30.w) }
+v16.w=vnormamt(v30.w)
+
+#CHECK: 1e03d4a3 { v3.h = vnormamt(v20.h) }
+v3.h=vnormamt(v20.h)
+
+#CHECK: 1e02c2d8 { v24.h = vpopcount(v2.h) }
+v24.h=vpopcount(v2.h)
diff --git a/test/MC/Hexagon/v60-vcmp.s b/test/MC/Hexagon/v60-vcmp.s
new file mode 100644
index 000000000000..c7f4e128be63
--- /dev/null
+++ b/test/MC/Hexagon/v60-vcmp.s
@@ -0,0 +1,84 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 1c81f142 { q2 |= vcmp.eq(v17.b{{ *}},{{ *}}v1.b) }
+q2|=vcmp.eq(v17.b,v1.b)
+
+#CHECK: 1c84fb2a { q2 &= vcmp.gt(v27.uw{{ *}},{{ *}}v4.uw) }
+q2&=vcmp.gt(v27.uw,v4.uw)
+
+#CHECK: 1c8cf826 { q2 &= vcmp.gt(v24.uh{{ *}},{{ *}}v12.uh) }
+q2&=vcmp.gt(v24.uh,v12.uh)
+
+#CHECK: 1c80e720 { q0 &= vcmp.gt(v7.ub{{ *}},{{ *}}v0.ub) }
+q0&=vcmp.gt(v7.ub,v0.ub)
+
+#CHECK: 1c9aed1a { q2 &= vcmp.gt(v13.w{{ *}},{{ *}}v26.w) }
+q2&=vcmp.gt(v13.w,v26.w)
+
+#CHECK: 1c8de516 { q2 &= vcmp.gt(v5.h{{ *}},{{ *}}v13.h) }
+q2&=vcmp.gt(v5.h,v13.h)
+
+#CHECK: 1c8dfc11 { q1 &= vcmp.gt(v28.b{{ *}},{{ *}}v13.b) }
+q1&=vcmp.gt(v28.b,v13.b)
+
+#CHECK: 1c94fa0b { q3 &= vcmp.eq(v26.w{{ *}},{{ *}}v20.w) }
+q3&=vcmp.eq(v26.w,v20.w)
+
+#CHECK: 1c83e206 { q2 &= vcmp.eq(v2.h{{ *}},{{ *}}v3.h) }
+q2&=vcmp.eq(v2.h,v3.h)
+
+#CHECK: 1c85e900 { q0 &= vcmp.eq(v9.b{{ *}},{{ *}}v5.b) }
+q0&=vcmp.eq(v9.b,v5.b)
+
+#CHECK: 1c9cfca8 { q0 ^= vcmp.gt(v28.uw{{ *}},{{ *}}v28.uw) }
+q0^=vcmp.gt(v28.uw,v28.uw)
+
+#CHECK: 1c81faa0 { q0 ^= vcmp.gt(v26.ub{{ *}},{{ *}}v1.ub) }
+q0^=vcmp.gt(v26.ub,v1.ub)
+
+#CHECK: 1c96f0a4 { q0 ^= vcmp.gt(v16.uh{{ *}},{{ *}}v22.uh) }
+q0^=vcmp.gt(v16.uh,v22.uh)
+
+#CHECK: 1c9bf795 { q1 ^= vcmp.gt(v23.h{{ *}},{{ *}}v27.h) }
+q1^=vcmp.gt(v23.h,v27.h)
+
+#CHECK: 1c9de698 { q0 ^= vcmp.gt(v6.w{{ *}},{{ *}}v29.w) }
+q0^=vcmp.gt(v6.w,v29.w)
+
+#CHECK: 1c82ef8a { q2 ^= vcmp.eq(v15.w{{ *}},{{ *}}v2.w) }
+q2^=vcmp.eq(v15.w,v2.w)
+
+#CHECK: 1c99e891 { q1 ^= vcmp.gt(v8.b{{ *}},{{ *}}v25.b) }
+q1^=vcmp.gt(v8.b,v25.b)
+
+#CHECK: 1c8afe55 { q1 |= vcmp.gt(v30.h{{ *}},{{ *}}v10.h) }
+q1|=vcmp.gt(v30.h,v10.h)
+
+#CHECK: 1c92ef50 { q0 |= vcmp.gt(v15.b{{ *}},{{ *}}v18.b) }
+q0|=vcmp.gt(v15.b,v18.b)
+
+#CHECK: 1c9ffb4b { q3 |= vcmp.eq(v27.w{{ *}},{{ *}}v31.w) }
+q3|=vcmp.eq(v27.w,v31.w)
+
+#CHECK: 1c87e944 { q0 |= vcmp.eq(v9.h{{ *}},{{ *}}v7.h) }
+q0|=vcmp.eq(v9.h,v7.h)
+
+#CHECK: 1c8ee768 { q0 |= vcmp.gt(v7.uw{{ *}},{{ *}}v14.uw) }
+q0|=vcmp.gt(v7.uw,v14.uw)
+
+#CHECK: 1c92e265 { q1 |= vcmp.gt(v2.uh{{ *}},{{ *}}v18.uh) }
+q1|=vcmp.gt(v2.uh,v18.uh)
+
+#CHECK: 1c80f062 { q2 |= vcmp.gt(v16.ub{{ *}},{{ *}}v0.ub) }
+q2|=vcmp.gt(v16.ub,v0.ub)
+
+#CHECK: 1c91f75a { q2 |= vcmp.gt(v23.w{{ *}},{{ *}}v17.w) }
+q2|=vcmp.gt(v23.w,v17.w)
+
+#CHECK: 1c86fe84 { q0 ^= vcmp.eq(v30.h{{ *}},{{ *}}v6.h) }
+q0^=vcmp.eq(v30.h,v6.h)
+
+#CHECK: 1c86ec82 { q2 ^= vcmp.eq(v12.b{{ *}},{{ *}}v6.b) }
+q2^=vcmp.eq(v12.b,v6.b)
diff --git a/test/MC/Hexagon/v60-vmem.s b/test/MC/Hexagon/v60-vmem.s
new file mode 100644
index 000000000000..fe202251ec4b
--- /dev/null
+++ b/test/MC/Hexagon/v60-vmem.s
@@ -0,0 +1,424 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 292cc11b { vmem(r12++#1) = v27 }
+{
+ vmem(r12++#1)=v27
+}
+
+#CHECK: 294dc319 { v25 = vmem(r13++#3):nt }
+{
+ v25=vmem(r13++#3):nt
+}
+
+#CHECK: 2904c1fb { v27 = vmemu(r4++#1) }
+{
+ v27=vmemu(r4++#1)
+}
+
+#CHECK: 291dc01f { v31 = vmem(r29++#0) }
+{
+ v31=vmem(r29++#0)
+}
+
+#CHECK: 293ec0ff { vmemu(r30++#0) = v31 }
+{
+ vmemu(r30++#0)=v31
+}
+
+#CHECK: 296ec411 { vmem(r14++#-4):nt = v17 }
+{
+ vmem(r14++#-4):nt=v17
+}
+
+#CHECK: 29fec62f { if (!p0) vmem(r30++#-2):nt = v15 }
+{
+ if (!p0) vmem(r30++#-2):nt=v15
+}
+
+#CHECK: 29f9c914 { if (p1) vmem(r25++#1):nt = v20 }
+{
+ if (p1) vmem(r25++#1):nt=v20
+}
+
+#CHECK: 2984de30 { if (!q3) vmem(r4++#-2) = v16 }
+{
+ if (!q3) vmem(r4++#-2)=v16
+}
+
+#CHECK: 2992dd1f { if (q3) vmem(r18++#-3) = v31 }
+{
+ if (q3) vmem(r18++#-3)=v31
+}
+
+#CHECK: 29c9c425 { if (!q0) vmem(r9++#-4):nt = v5 }
+{
+ if (!q0) vmem(r9++#-4):nt=v5
+}
+
+#CHECK: 29d1cf11 { if (q1) vmem(r17++#-1):nt = v17 }
+{
+ if (q1) vmem(r17++#-1):nt=v17
+}
+
+#CHECK: 29a7c328 { if (!p0) vmem(r7++#3) = v8 }
+{
+ if (!p0) vmem(r7++#3)=v8
+}
+
+#CHECK: 29b6cc1d { if (p1) vmem(r22++#-4) = v29 }
+{
+ if (p1) vmem(r22++#-4)=v29
+}
+
+#CHECK: 29abc5fe { if (!p0) vmemu(r11++#-3) = v30 }
+{
+ if (!p0) vmemu(r11++#-3)=v30
+}
+
+#CHECK: 29b8d5c4 { if (p2) vmemu(r24++#-3) = v4 }
+{
+ if (p2) vmemu(r24++#-3)=v4
+}
+
+#CHECK: 2860e407 { vmem(r0+#-4):nt = v7 }
+{
+ vmem(r0+#-4):nt=v7
+}
+
+#CHECK: 2830e2e7 { vmemu(r16+#-6) = v7 }
+{
+ vmemu(r16+#-6)=v7
+}
+
+#CHECK: 2839c316 { vmem(r25+#3) = v22 }
+{
+ vmem(r25+#3)=v22
+}
+#CHECK: 284be316 { v22 = vmem(r11+#-5):nt }
+{
+ v22=vmem(r11+#-5):nt
+}
+
+#CHECK: 280ec1e6 { v6 = vmemu(r14+#1) }
+{
+ v6=vmemu(r14+#1)
+}
+
+#CHECK: 280ae50c { v12 = vmem(r10+#-3) }
+{
+ v12=vmem(r10+#-3)
+}
+
+#CHECK: 2b62e005 { vmem(r2++m1):nt = v5 }
+{
+ vmem(r2++m1):nt=v5
+}
+
+#CHECK: 2b28e0f2 { vmemu(r8++m1) = v18 }
+{
+ vmemu(r8++m1)=v18
+}
+
+#CHECK: 2b42e019 { v25 = vmem(r2++m1):nt }
+{
+ v25=vmem(r2++m1):nt
+}
+
+#CHECK: 2b2ce009 { vmem(r12++m1) = v9 }
+{
+ vmem(r12++m1)=v9
+}
+
+#CHECK: 2b03c005 { v5 = vmem(r3++m0) }
+{
+ v5=vmem(r3++m0)
+}
+
+
+#CHECK: 2b0ec0f5 { v21 = vmemu(r14++m0) }
+{
+ v21=vmemu(r14++m0)
+}
+
+#CHECK: 2be8c022 { if (!p0) vmem(r8++m0):nt = v2 }
+{
+ if (!p0) vmem(r8++m0):nt=v2
+}
+
+#CHECK: 2bebd813 { if (p3) vmem(r11++m0):nt = v19 }
+{
+ if (p3) vmem(r11++m0):nt=v19
+}
+
+#CHECK: 2ba5e0e7 { if (!p0) vmemu(r5++m1) = v7 }
+{
+ if (!p0) vmemu(r5++m1)=v7
+}
+
+#CHECK: 2ba4f0dd { if (p2) vmemu(r4++m1) = v29 }
+{
+ if (p2) vmemu(r4++m1)=v29
+}
+
+#CHECK: 2ba4e828 { if (!p1) vmem(r4++m1) = v8 }
+{
+ if (!p1) vmem(r4++m1)=v8
+}
+
+#CHECK: 2bbae803 { if (p1) vmem(r26++m1) = v3 }
+{
+ if (p1) vmem(r26++m1)=v3
+}
+
+#CHECK: 2bc9c027 { if (!q0) vmem(r9++m0):nt = v7 }
+{
+ if (!q0) vmem(r9++m0):nt=v7
+}
+
+#CHECK: 2bcfc001 { if (q0) vmem(r15++m0):nt = v1 }
+{
+ if (q0) vmem(r15++m0):nt=v1
+}
+
+#CHECK: 2b97f031 { if (!q2) vmem(r23++m1) = v17 }
+{
+ if (!q2) vmem(r23++m1)=v17
+}
+
+#CHECK: 2b8ad809 { if (q3) vmem(r10++m0) = v9 }
+{
+ if (q3) vmem(r10++m0)=v9
+}
+
+#CHECK: 28c7f438 { if (!q2) vmem(r7+#-4):nt = v24 }
+{
+ if (!q2) vmem(r7+#-4):nt=v24
+}
+
+#CHECK: 28d1eb15 { if (q1) vmem(r17+#-5):nt = v21 }
+{
+ if (q1) vmem(r17+#-5):nt=v21
+}
+
+#CHECK: 289cfe2b { if (!q3) vmem(r28+#-2) = v11 }
+{
+ if (!q3) vmem(r28+#-2)=v11
+}
+
+#CHECK: 288eef0f { if (q1) vmem(r14+#-1) = v15 }
+{
+ if (q1) vmem(r14+#-1)=v15
+}
+
+#CHECK: 28a2d1e1 { if (!p2) vmemu(r2+#1) = v1 }
+{
+ if (!p2) vmemu(r2+#1)=v1
+}
+
+#CHECK: 28bcf4db { if (p2) vmemu(r28+#-4) = v27 }
+{
+ if (p2) vmemu(r28+#-4)=v27
+}
+
+#CHECK: 28b2c925 { if (!p1) vmem(r18+#1) = v5 }
+{
+ if (!p1) vmem(r18+#1)=v5
+}
+
+#CHECK: 28afe41a { if (p0) vmem(r15+#-4) = v26 }
+{
+ if (p0) vmem(r15+#-4)=v26
+}
+
+#CHECK: 28f7fd3a { if (!p3) vmem(r23+#-3):nt = v26 }
+{
+ if (!p3) vmem(r23+#-3):nt=v26
+}
+
+#CHECK: 28f5fd10 { if (p3) vmem(r21+#-3):nt = v16 }
+{
+ if (p3) vmem(r21+#-3):nt=v16
+}
+
+#CHECK: 2945c440 v0.tmp = vmem(r5++#-4):nt }
+{
+ v0.tmp=vmem(r5++#-4):nt
+ v26=v0
+}
+
+#CHECK: 2942c338 v24.cur = vmem(r2++#3):nt }
+{
+ v24.cur=vmem(r2++#3):nt
+ v6=v24
+}
+
+#CHECK: 2908c157 v23.tmp = vmem(r8++#1) }
+{
+ v25=v23
+ v23.tmp=vmem(r8++#1)
+}
+
+#CHECK: 2903c72d v13.cur = vmem(r3++#-1) }
+{
+ v13.cur=vmem(r3++#-1)
+ v21=v13
+}
+
+#CHECK: 2855c743 v3.tmp = vmem(r21+#7):nt }
+{
+ v3.tmp=vmem(r21+#7):nt
+ v21=v3
+}
+
+#CHECK: 2856e025 v5.cur = vmem(r22+#-8):nt }
+{
+ v5.cur=vmem(r22+#-8):nt
+ v29=v5
+}
+
+#CHECK: 2802c555 v21.tmp = vmem(r2+#5) }
+{
+ v31=v21
+ v21.tmp=vmem(r2+#5)
+}
+
+#CHECK: 2814e12a v10.cur = vmem(r20+#-7) }
+{
+ v9=v10
+ v10.cur=vmem(r20+#-7)
+}
+
+
+#CHECK: 2b52c02c v12.cur = vmem(r18++m0):nt }
+{
+ v12.cur=vmem(r18++m0):nt
+ v25=v12
+}
+
+#CHECK: 2b4ae043 v3.tmp = vmem(r10++m1):nt }
+{
+ v25=v3
+ v3.tmp=vmem(r10++m1):nt
+}
+
+#CHECK: 2b06c025 v5.cur = vmem(r6++m0) }
+{
+ v5.cur=vmem(r6++m0)
+ v10=v5
+}
+
+#CHECK: 2b17e048 v8.tmp = vmem(r23++m1) }
+{
+ v8.tmp=vmem(r23++m1)
+ v28=v8
+}
+
+#CHECK: 282ee422 vmem(r14+#-4) = v14.new }
+{
+ v14 = v14
+ vmem(r14+#-4)=v14.new
+}
+
+#CHECK: 2866e222 vmem(r6+#-6):nt = v16.new }
+{
+ v16 = v8
+ vmem(r6+#-6):nt=v16.new
+}
+
+#CHECK: 28b1cd42 if(p1) vmem(r17+#5) = v17.new }
+{
+ v17 = v25
+ if(p1)vmem(r17+#5)=v17.new
+}
+
+#CHECK: 28bbeb6a if(!p1) vmem(r27+#-5) = v17.new }
+{
+ v17 = v15
+ if(!p1)vmem(r27+#-5)=v17.new
+}
+
+#CHECK: 28e4d252 if(p2) vmem(r4+#2):nt = v24.new }
+{
+ v24 = v10
+ if(p2)vmem(r4+#2):nt=v24.new
+}
+
+#CHECK: 28f8d17a if(!p2) vmem(r24+#1):nt = v4.new }
+{
+ v4 = v8
+ if(!p2)vmem(r24+#1):nt=v4.new
+}
+
+#CHECK: 2924c322 vmem(r4++#3) = v4.new }
+{
+ v4 = v3
+ vmem(r4++#3)=v4.new
+}
+
+#CHECK: 2961c122 vmem(r1++#1):nt = v7.new }
+{
+ v7 = v8
+ vmem(r1++#1):nt=v7.new
+}
+
+#CHECK: 29a6d042 if(p2) vmem(r6++#0) = v11.new }
+{
+ v11 = v13
+ if(p2)vmem(r6++#0)=v11.new
+}
+
+#CHECK: 29a2cb6a if(!p1) vmem(r2++#3) = v25.new }
+{
+ v25 = v17
+ if(!p1)vmem(r2++#3)=v25.new
+}
+
+#CHECK: 29f5c952 if(p1) vmem(r21++#1):nt = v14.new }
+{
+ v14 = v13
+ if(p1)vmem(r21++#1):nt=v14.new
+}
+
+#CHECK: 29f7cd7a if(!p1) vmem(r23++#-3):nt = v1.new }
+{
+ v1 = v0
+ if(!p1)vmem(r23++#-3):nt=v1.new
+}
+
+#CHECK: 2b3ec022 vmem(r30++m0) = v10.new }
+{
+ v10 = v23
+ vmem(r30++m0)=v10.new
+}
+
+#CHECK: 2b6fc022 vmem(r15++m0):nt = v19.new }
+{
+ v19 = v20
+ vmem(r15++m0):nt=v19.new
+}
+
+#CHECK: 2bb7f042 if(p2) vmem(r23++m1) = v6.new }
+{
+ v6 = v30
+ if(p2)vmem(r23++m1)=v6.new
+}
+
+#CHECK: 2ba2f06a if(!p2) vmem(r2++m1) = v12.new }
+{
+ v12 = v9
+ if(!p2)vmem(r2++m1)=v12.new
+}
+
+#CHECK: 2be7e852 if(p1) vmem(r7++m1):nt = v3.new }
+{
+ v3 = v13
+ if(p1)vmem(r7++m1):nt=v3.new
+}
+
+#CHECK: 2bfdd07a if(!p2) vmem(r29++m0):nt = v29.new }
+{
+ v29 = v9
+ if(!p2)vmem(r29++m0):nt=v29.new
+}
diff --git a/test/MC/Hexagon/v60-vmpy-acc.s b/test/MC/Hexagon/v60-vmpy-acc.s
new file mode 100644
index 000000000000..c39a9252b563
--- /dev/null
+++ b/test/MC/Hexagon/v60-vmpy-acc.s
@@ -0,0 +1,123 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 1936ee37 { v23.w += vdmpy(v15:14.h,r22.uh,#1):sat }
+v23.w += vdmpy(v15:14.h,r22.uh,#1):sat
+
+#CHECK: 193bf90f { v15.w += vdmpy(v25.h,r27.uh):sat }
+v15.w += vdmpy(v25.h,r27.uh):sat
+
+#CHECK: 1902fcf0 { v17:16.h += vdmpy(v29:28.ub,r2.b) }
+v17:16.h += vdmpy(v29:28.ub,r2.b)
+
+#CHECK: 190cffd1 { v17.h += vdmpy(v31.ub,r12.b) }
+v17.h += vdmpy(v31.ub,r12.b)
+
+#CHECK: 1900f5ac { v12.w += vrmpy(v21.ub,r0.b) }
+v12.w += vrmpy(v21.ub,r0.b)
+
+#CHECK: 1905fb86 { v6.uw += vrmpy(v27.ub,r5.ub) }
+v6.uw += vrmpy(v27.ub,r5.ub)
+
+#CHECK: 191de570 { v16.w += vdmpy(v5.h,r29.b) }
+v16.w += vdmpy(v5.h,r29.b)
+
+#CHECK: 191de846 { v7:6.w += vtmpy(v9:8.h,r29.b) }
+v7:6.w += vtmpy(v9:8.h,r29.b)
+
+#CHECK: 190bfa22 { v3:2.h += vtmpy(v27:26.ub,r11.b) }
+v3:2.h += vtmpy(v27:26.ub,r11.b)
+
+#CHECK: 1915e408 { v9:8.h += vtmpy(v5:4.b,r21.b) }
+v9:8.h += vtmpy(v5:4.b,r21.b)
+
+#CHECK: 1987f71e { v31:30.uh += vmpy(v23.ub,r7.ub) }
+v31:30.uh += vmpy(v23.ub,r7.ub)
+
+#CHECK: 1969ff47 { v7.w += vasl(v31.w,r9) }
+v7.w += vasl(v31.w,r9)
+
+#CHECK: 196de3b0 { v16.w += vasr(v3.w,r13) }
+v16.w += vasr(v3.w,r13)
+
+#CHECK: 1977fe0a { v11:10.uw += vdsad(v31:30.uh,r23.uh) }
+v11:10.uw += vdsad(v31:30.uh,r23.uh)
+
+#CHECK: 196eee36 { v22.h += vmpyi(v14.h,r14.b) }
+v22.h += vmpyi(v14.h,r14.b)
+
+#CHECK: 1931faac { v13:12.h += vmpy(v26.ub,r17.b) }
+v13:12.h += vmpy(v26.ub,r17.b)
+
+#CHECK: 193cfc94 { v21:20.w += vdmpy(v29:28.h,r28.b) }
+v21:20.w += vdmpy(v29:28.h,r28.b)
+
+#CHECK: 1934fc62 { v2.w += vdmpy(v28.h,r20.h):sat }
+v2.w += vdmpy(v28.h,r20.h):sat
+
+#CHECK: 1925fe5f { v31.w += vdmpy(v31:30.h,r5.h):sat }
+v31.w += vdmpy(v31:30.h,r5.h):sat
+
+#CHECK: 194efe36 { v23:22.uw += vmpy(v30.uh,r14.uh) }
+v23:22.uw += vmpy(v30.uh,r14.uh)
+
+#CHECK: 1948e306 { v7:6.w += vmpy(v3.h,r8.h):sat }
+v7:6.w += vmpy(v3.h,r8.h):sat
+
+#CHECK: 192af2f8 { v25:24.w += vmpa(v19:18.h,r10.b) }
+v25:24.w += vmpa(v19:18.h,r10.b)
+
+#CHECK: 1926e4da { v27:26.h += vmpa(v5:4.ub,r6.b) }
+v27:26.h += vmpa(v5:4.ub,r6.b)
+
+#CHECK: 194ff078 { v24.w += vmpyi(v16.w,r15.h) }
+v24.w += vmpyi(v16.w,r15.h)
+
+#CHECK: 1946e247 { v7.w += vmpyi(v2.w,r6.b) }
+v7.w += vmpyi(v2.w,r6.b)
+
+#CHECK: 1c3fead5 { v21.w += vmpyo(v10.w,v31.h):<<1:sat:shift }
+v21.w += vmpyo(v10.w,v31.h):<<1:sat:shift
+
+#CHECK: 1c30e1fa { v26.w += vmpyo(v1.w,v16.h):<<1:rnd:sat:shift }
+v26.w += vmpyo(v1.w,v16.h):<<1:rnd:sat:shift
+
+#CHECK: 1c34f690 { v16.h += vmpyi(v22.h,v20.h) }
+v16.h += vmpyi(v22.h,v20.h)
+
+#CHECK: 1c34f4b5 { v21.w += vmpyie(v20.w,v20.uh) }
+v21.w += vmpyie(v20.w,v20.uh)
+
+#CHECK: 1c54f804 { v4.w += vmpyie(v24.w,v20.h) }
+v4.w += vmpyie(v24.w,v20.h)
+
+#CHECK: 1c1ff6f4 { v21:20.w += vmpy(v22.h,v31.h) }
+v21:20.w += vmpy(v22.h,v31.h)
+
+#CHECK: 1c31f026 { v7:6.w += vmpy(v16.h,v17.uh) }
+v7:6.w += vmpy(v16.h,v17.uh)
+
+#CHECK: 1c12fb98 { v25:24.h += vmpy(v27.b,v18.b) }
+v25:24.h += vmpy(v27.b,v18.b)
+
+#CHECK: 1c17fcc0 { v1:0.h += vmpy(v28.ub,v23.b) }
+v1:0.h += vmpy(v28.ub,v23.b)
+
+#CHECK: 1c16f26f { v15.w += vdmpy(v18.h,v22.h):sat }
+v15.w += vdmpy(v18.h,v22.h):sat
+
+#CHECK: 1c0bea3a { v26.w += vrmpy(v10.b,v11.b) }
+v26.w += vrmpy(v10.b,v11.b)
+
+#CHECK: 1c15eb47 { v7.w += vrmpy(v11.ub,v21.b) }
+v7.w += vrmpy(v11.ub,v21.b)
+
+#CHECK: 1c26e40e { v15:14.uw += vmpy(v4.uh,v6.uh) }
+v15:14.uw += vmpy(v4.uh,v6.uh)
+
+#CHECK: 1c0df9a8 { v9:8.uh += vmpy(v25.ub,v13.ub) }
+v9:8.uh += vmpy(v25.ub,v13.ub)
+
+#CHECK: 1c0afc15 { v21.uw += vrmpy(v28.ub,v10.ub) }
+v21.uw += vrmpy(v28.ub,v10.ub)
diff --git a/test/MC/Hexagon/v60-vmpy1.s b/test/MC/Hexagon/v60-vmpy1.s
new file mode 100644
index 000000000000..1f36a5e95ddb
--- /dev/null
+++ b/test/MC/Hexagon/v60-vmpy1.s
@@ -0,0 +1,138 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 1939c223 { v3.w = vdmpy(v3:2.h,{{ *}}r25.uh,{{ *}}#1):sat }
+v3.w=vdmpy(v3:2.h,r25.uh,#1):sat
+
+#CHECK: 1936de0d { v13.w = vdmpy(v30.h,{{ *}}r22.uh):sat }
+v13.w=vdmpy(v30.h,r22.uh):sat
+
+#CHECK: 1919ccea { v11:10.h = vdmpy(v13:12.ub,{{ *}}r25.b) }
+v11:10.h=vdmpy(v13:12.ub,r25.b)
+
+#CHECK: 1918ced6 { v22.h = vdmpy(v14.ub,{{ *}}r24.b) }
+v22.h=vdmpy(v14.ub,r24.b)
+
+#CHECK: 1911deba { v27:26.uw = vdsad(v31:30.uh,{{ *}}r17.uh) }
+v27:26.uw=vdsad(v31:30.uh,r17.uh)
+
+#CHECK: 1908da97 { v23.w = vrmpy(v26.ub,{{ *}}r8.b) }
+v23.w=vrmpy(v26.ub,r8.b)
+
+#CHECK: 1915c974 { v20.uw = vrmpy(v9.ub,{{ *}}r21.ub) }
+v20.uw=vrmpy(v9.ub,r21.ub)
+
+#CHECK: 190dd446 { v6.w = vdmpy(v20.h,{{ *}}r13.b) }
+v6.w=vdmpy(v20.h,r13.b)
+
+#CHECK: 190ec030 { v17:16.h = vtmpy(v1:0.ub,{{ *}}r14.b) }
+v17:16.h=vtmpy(v1:0.ub,r14.b)
+
+#CHECK: 1918de1c { v29:28.h = vtmpy(v31:30.b,{{ *}}r24.b) }
+v29:28.h=vtmpy(v31:30.b,r24.b)
+
+#CHECK: 198dddf1 { v17.w = vmpyi(v29.w,{{ *}}r13.h) }
+v17.w=vmpyi(v29.w,r13.h)
+
+#CHECK: 19bccb13 { v19.w = vmpyi(v11.w,{{ *}}r28.b) }
+v19.w=vmpyi(v11.w,r28.b)
+
+#CHECK: 19c8cb0a { v11:10.uh = vmpy(v11.ub,{{ *}}r8.ub) }
+v11:10.uh=vmpy(v11.ub,r8.ub)
+
+#CHECK: 1973d012 { v18.h = vmpyi(v16.h,{{ *}}r19.b) }
+v18.h=vmpyi(v16.h,r19.b)
+
+#CHECK: 1922d1aa { v11:10.h = vmpy(v17.ub,{{ *}}r2.b) }
+v11:10.h=vmpy(v17.ub,r2.b)
+
+#CHECK: 1936ce9c { v29:28.w = vdmpy(v15:14.h,{{ *}}r22.b) }
+v29:28.w=vdmpy(v15:14.h,r22.b)
+
+#CHECK: 1925d86b { v11.w = vdmpy(v25:24.h,{{ *}}r5.h):sat }
+v11.w=vdmpy(v25:24.h,r5.h):sat
+
+#CHECK: 1925c255 { v21.w = vdmpy(v2.h,{{ *}}r5.h):sat }
+v21.w=vdmpy(v2.h,r5.h):sat
+
+#CHECK: 1941d424 { v4.h = vmpy(v20.h,{{ *}}r1.h):<<1:sat }
+v4.h=vmpy(v20.h,r1.h):<<1:sat
+
+#CHECK: 1943cf0a { v11:10.w = vmpy(v15.h,{{ *}}r3.h) }
+v11:10.w=vmpy(v15.h,r3.h)
+
+#CHECK: 193ec2f0 { v17:16.w = vmpa(v3:2.h,{{ *}}r30.b) }
+v17:16.w=vmpa(v3:2.h,r30.b)
+
+#CHECK: 193ddcde { v31:30.h = vmpa(v29:28.ub,{{ *}}r29.b) }
+v31:30.h=vmpa(v29:28.ub,r29.b)
+
+#CHECK: 1946de76 { v23:22.uw = vmpy(v30.uh,{{ *}}r6.uh) }
+v23:22.uw=vmpy(v30.uh,r6.uh)
+
+#CHECK: 1945c945 { v5.h = vmpy(v9.h,{{ *}}r5.h):<<1:rnd:sat }
+v5.h=vmpy(v9.h,r5.h):<<1:rnd:sat
+
+#CHECK: 19b0c280 { v1:0.w = vtmpy(v3:2.h,{{ *}}r16.b) }
+v1:0.w=vtmpy(v3:2.h,r16.b)
+
+#CHECK: 1c34d937 { v23.h = vmpy(v25.h,{{ *}}v20.h):<<1:rnd:sat }
+v23.h=vmpy(v25.h,v20.h):<<1:rnd:sat
+
+#CHECK: 1c36c90a { v11:10.uw = vmpy(v9.uh,{{ *}}v22.uh) }
+v11:10.uw=vmpy(v9.uh,v22.uh)
+
+#CHECK: 1c09c3ec { v13:12.w = vmpy(v3.h,{{ *}}v9.h) }
+v13:12.w=vmpy(v3.h,v9.h)
+
+#CHECK: 1c0dd1d8 { v25:24.h = vmpy(v17.ub,{{ *}}v13.b) }
+v25:24.h=vmpy(v17.ub,v13.b)
+
+#CHECK: 1c0dc0a4 { v5:4.uh = vmpy(v0.ub,{{ *}}v13.ub) }
+v5:4.uh=vmpy(v0.ub,v13.ub)
+
+#CHECK: 1c14df84 { v5:4.h = vmpy(v31.b,{{ *}}v20.b) }
+v5:4.h=vmpy(v31.b,v20.b)
+
+#CHECK: 1c16d77c { v28.w = vdmpy(v23.h,{{ *}}v22.h):sat }
+v28.w=vdmpy(v23.h,v22.h):sat
+
+#CHECK: 1c08d84f { v15.w = vrmpy(v24.ub,{{ *}}v8.b) }
+v15.w=vrmpy(v24.ub,v8.b)
+
+#CHECK: 1c06da29 { v9.w = vrmpy(v26.b,{{ *}}v6.b) }
+v9.w=vrmpy(v26.b,v6.b)
+
+#CHECK: 1c1ac805 { v5.uw = vrmpy(v8.ub,{{ *}}v26.ub) }
+v5.uw=vrmpy(v8.ub,v26.ub)
+
+#CHECK: 1c39d089 { v9.h = vmpyi(v16.h,{{ *}}v25.h) }
+v9.h=vmpyi(v16.h,v25.h)
+
+#CHECK: 1c3ecc64 { v5:4.h = vmpa(v13:12.ub,{{ *}}v31:30.b) }
+v5:4.h=vmpa(v13:12.ub,v31:30.b)
+
+#CHECK: 1c21ce54 { v21:20.w = vmpy(v14.h,{{ *}}v1.uh) }
+v21:20.w=vmpy(v14.h,v1.uh)
+
+#CHECK: 1cf2c6f0 { v17:16.h = vmpa(v7:6.ub,{{ *}}v19:18.ub) }
+v17:16.h=vmpa(v7:6.ub,v19:18.ub)
+
+#CHECK: 1fcdc82b { v11.w = vmpyio(v8.w{{ *}},{{ *}}v13.h) }
+v11.w=vmpyio(v8.w,v13.h)
+
+#CHECK: 1fdeda10 { v16.w = vmpyie(v26.w{{ *}},{{ *}}v30.uh) }
+v16.w=vmpyie(v26.w,v30.uh)
+
+#CHECK: 1ff2c2a6 { v6.w = vmpye(v2.w{{ *}},{{ *}}v18.uh) }
+v6.w=vmpye(v2.w,v18.uh)
+
+#CHECK: 1ff7cbfa { v26.w = vmpyo(v11.w{{ *}},{{ *}}v23.h):<<1:sat }
+v26.w=vmpyo(v11.w,v23.h):<<1:sat
+
+#CHECK: 1f5cd411 { v17.w = vmpyo(v20.w{{ *}},{{ *}}v28.h):<<1:rnd:sat }
+v17.w=vmpyo(v20.w,v28.h):<<1:rnd:sat
+
+#CHECK: 1f71cf1d { v29.w = vmpyieo(v15.h{{ *}},{{ *}}v17.h) }
+v29.w=vmpyieo(v15.h,v17.h)
diff --git a/test/MC/Hexagon/v60lookup.s b/test/MC/Hexagon/v60lookup.s
new file mode 100644
index 000000000000..b92a2d3c6eb1
--- /dev/null
+++ b/test/MC/Hexagon/v60lookup.s
@@ -0,0 +1,14 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+ V31.b = vlut32(V29.b, V15.b, R1)
+# CHECK: 1b79fd3f { v31.b = vlut32(v29.b,v15.b,r1) }
+ V31.b |= vlut32(V29.b, V15.b, R2)
+# CHECK: 1b7afdbf { v31.b |= vlut32(v29.b,v15.b,r2) }
+ V31:30.h = vlut16(V29.b, V15.h, R3)
+# CHECK: 1b7bfdde { v31:30.h = vlut16(v29.b,v15.h,r3) }
+ v31:30.h |= vlut16(v2.b, v9.h, r4)
+# CHECK: 1b4ce2fe { v31:30.h |= vlut16(v2.b,v9.h,r4) }
+ v31.w = vinsert(r4)
+# CHECK: 19a4e03f { v31.w = vinsert(r4) }
diff --git a/test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s b/test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s
index d98c257c8586..a10aef3a5f8c 100644
--- a/test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s
+++ b/test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s
@@ -1,4 +1,4 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o - < %s | macho-dump -dump-section-data | FileCheck %s
+; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o - < %s | llvm-readobj -s -sd | FileCheck %s
; rdar://13028719
.globl context_save0
@@ -18,4 +18,11 @@ Lcontext_save1_size: .quad (Lcontext_save1_end - Lcontext_save1)
Llockup_release:
.quad 0
-; CHECK: ('_section_data', '05000000 00000000 05000000 00000000 10000000 00000000 1f2003d5 1f2003d5 1f2003d5 1f2003d5 1f2003d5 1f2003d5 1f2003d5 1f2003d5 1f2003d5 1f2003d5 00000000 00000000 00000000 00000000 10000000 00000000 00000000 00000000')
+; CHECK: SectionData (
+; CHECK: 0000: 05000000 00000000 05000000 00000000 |................|
+; CHECK: 0010: 10000000 00000000 1F2003D5 1F2003D5 |......... ... ..|
+; CHECK: 0020: 1F2003D5 1F2003D5 1F2003D5 1F2003D5 |. ... ... ... ..|
+; CHECK: 0030: 1F2003D5 1F2003D5 1F2003D5 1F2003D5 |. ... ... ... ..|
+; CHECK: 0040: 00000000 00000000 00000000 00000000 |................|
+; CHECK: 0050: 10000000 00000000 00000000 00000000 |................|
+; CHECK: )
diff --git a/test/MC/MachO/AArch64/reloc-errors.s b/test/MC/MachO/AArch64/reloc-errors.s
new file mode 100644
index 000000000000..c29416d62cf3
--- /dev/null
+++ b/test/MC/MachO/AArch64/reloc-errors.s
@@ -0,0 +1,10 @@
+; RUN: not llvm-mc -triple aarch64-none-macho %s -filetype=obj -o - 2>&1 | FileCheck %s
+
+; CHECK: error: conditional branch requires assembler-local label. 'external' is external.
+ b.eq external
+
+; CHECK: error: Invalid relocation on conditional branch
+ tbz w0, #4, external
+
+; CHECK: error: unknown AArch64 fixup kind!
+ adr x0, external
diff --git a/test/MC/MachO/ARM/bad-darwin-ARM-reloc.s b/test/MC/MachO/ARM/bad-darwin-ARM-reloc.s
index 7ad91df3ce0d..ae4bc225dc5d 100644
--- a/test/MC/MachO/ARM/bad-darwin-ARM-reloc.s
+++ b/test/MC/MachO/ARM/bad-darwin-ARM-reloc.s
@@ -7,3 +7,9 @@
L___fcommon:
.word 0
@ CHECK-ERROR: unsupported relocation on symbol
+
+c:
+ .word a - b
+@ CHECK-ERROR: symbol 'a' can not be undefined in a subtraction expression
+ .word c - b
+@ CHECK-ERROR: symbol 'b' can not be undefined in a subtraction expression
diff --git a/test/MC/MachO/ARM/compact-unwind-armv7k.s b/test/MC/MachO/ARM/compact-unwind-armv7k.s
new file mode 100644
index 000000000000..6e8a855cafca
--- /dev/null
+++ b/test/MC/MachO/ARM/compact-unwind-armv7k.s
@@ -0,0 +1,124 @@
+@ RUN: llvm-mc -triple=thumbv7k-apple-watchos2.0.0 -filetype=obj -o %t < %s && llvm-objdump -unwind-info %t | FileCheck %s
+
+@ CHECK: Contents of __compact_unwind section:
+
+ .syntax unified
+ .align 2
+ .code 16
+
+@ CHECK-LABEL: start: {{.*}} _test_r4_r5_r6
+@ CHECK: compact encoding: 0x01000007
+ .thumb_func _test_r4_r5_r6
+_test_r4_r5_r6:
+ .cfi_startproc
+ push {r4, r5, r6, r7, lr}
+ add r7, sp, #12
+ sub sp, #16
+ .cfi_def_cfa r7, 8
+ .cfi_offset lr, -4
+ .cfi_offset r7, -8
+ .cfi_offset r6, -12
+ .cfi_offset r5, -16
+ .cfi_offset r4, -20
+ .cfi_endproc
+
+
+@ CHECK-LABEL: start: {{.*}} _test_r4_r5_r10_r11
+@ CHECK: compact encoding: 0x01000063
+ .thumb_func _test_r4_r5_r10_r11
+_test_r4_r5_r10_r11:
+ .cfi_startproc
+ push {r4, r5, r7, lr}
+ add r7, sp, #8
+ .cfi_def_cfa r7, 8
+ .cfi_offset lr, -4
+ .cfi_offset r7, -8
+ .cfi_offset r5, -12
+ .cfi_offset r4, -16
+ push.w {r10, r11}
+ .cfi_offset r11, -20
+ .cfi_offset r10, -24
+ .cfi_endproc
+
+
+@ CHECK-LABEL: start: {{.*}} _test_d8
+@ CHECK: compact encoding: 0x02000000
+ .thumb_func _test_d8
+_test_d8:
+ .cfi_startproc
+ push {r7, lr}
+ mov r7, sp
+ .cfi_def_cfa r7, 8
+ .cfi_offset lr, -4
+ .cfi_offset r7, -8
+ vpush {d8}
+ .cfi_offset d8, -16
+ .cfi_endproc
+
+
+@ CHECK-LABEL: start: {{.*}} _test_d8_d10_d12_d14
+@ CHECK: compact encoding: 0x02000300
+ .thumb_func _test_d8_d10_d12_d14
+_test_d8_d10_d12_d14:
+ .cfi_startproc
+ push {r7, lr}
+ mov r7, sp
+ .cfi_def_cfa r7, 8
+ .cfi_offset lr, -4
+ .cfi_offset r7, -8
+ vpush {d14}
+ vpush {d12}
+ vpush {d10}
+ vpush {d8}
+ .cfi_offset d14, -16
+ .cfi_offset d12, -24
+ .cfi_offset d10, -32
+ .cfi_offset d8, -40
+ .cfi_endproc
+
+@ CHECK-LABEL: start: {{.*}} _test_varargs
+@ CHECK: compact encoding: 0x01c00001
+ .thumb_func _test_varargs
+_test_varargs:
+ .cfi_startproc
+ sub sp, #12
+ push {r4, r7, lr}
+ add r7, sp, #4
+ .cfi_def_cfa r7, 20
+ .cfi_offset lr, -16
+ .cfi_offset r7, -20
+ .cfi_offset r4, -24
+ add.w r9, r7, #8
+ mov r4, r0
+ stm.w r9, {r1, r2, r3}
+ .cfi_endproc
+
+@ CHECK-LABEL: start: {{.*}} _test_missing_lr
+@ CHECK: compact encoding: 0x04000000
+ .thumb_func _test_missing_lr
+_test_missing_lr:
+ .cfi_startproc
+ push {r7}
+ .cfi_def_cfa r7, 4
+ .cfi_offset r7, -4
+ pop {r7}
+ bx lr
+ .cfi_endproc
+
+@ CHECK-LABEL: start: {{.*}} _test_swapped_offsets
+@ CHECK: compact encoding: 0x04000000
+ .thumb_func _test_swapped_offsets
+_test_swapped_offsets:
+ .cfi_startproc
+ push {r7, lr}
+ push {r10}
+ push {r4}
+ .cfi_def_cfa r7, 8
+ .cfi_offset lr, -4
+ .cfi_offset r7, -8
+ .cfi_offset r10, -12
+ .cfi_offset r4, -16
+ pop {r4}
+ pop {r10}
+ pop {r7, pc}
+ .cfi_endproc
diff --git a/test/MC/MachO/ARM/darwin-ARM-reloc.s b/test/MC/MachO/ARM/darwin-ARM-reloc.s
index 374f8804a52c..9843ec301545 100644
--- a/test/MC/MachO/ARM/darwin-ARM-reloc.s
+++ b/test/MC/MachO/ARM/darwin-ARM-reloc.s
@@ -1,5 +1,5 @@
@ RUN: llvm-mc -n -triple armv7-apple-darwin10 %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols < %t.obj > %t.dump
@ RUN: FileCheck < %t.dump %s
.syntax unified
@@ -21,153 +21,166 @@ Lsc0_0:
.subsections_via_symbols
-@ CHECK: ('cputype', 12)
-@ CHECK: ('cpusubtype', 9)
-@ CHECK: ('filetype', 1)
-@ CHECK: ('num_load_commands', 3)
-@ CHECK: ('load_commands_size', 364)
-@ CHECK: ('flag', 8192)
-@ CHECK: ('load_commands', [
-@ CHECK: # Load Command 0
-@ CHECK: (('command', 1)
-@ CHECK: ('size', 260)
-@ CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK: ('vm_addr', 0)
-@ CHECK: ('vm_size', 16)
-@ CHECK: ('file_offset', 392)
-@ CHECK: ('file_size', 16)
-@ CHECK: ('maxprot', 7)
-@ CHECK: ('initprot', 7)
-@ CHECK: ('num_sections', 3)
-@ CHECK: ('flags', 0)
-@ CHECK: ('sections', [
-@ CHECK: # Section 0
-@ CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK: ('address', 0)
-@ CHECK: ('size', 8)
-@ CHECK: ('offset', 392)
-@ CHECK: ('alignment', 0)
-@ CHECK: ('reloc_offset', 408)
-@ CHECK: ('num_reloc', 2)
-@ CHECK: ('flags', 0x80000400)
-@ CHECK: ('reserved1', 0)
-@ CHECK: ('reserved2', 0)
-@ CHECK: ),
-@ CHECK: ('_relocations', [
-@ CHECK: # Relocation 0
-@ CHECK: (('word-0', 0x4),
-@ CHECK: ('word-1', 0x55000001)),
-@ CHECK: # Relocation 1
-@ CHECK: (('word-0', 0x0),
-@ CHECK: ('word-1', 0x5d000003)),
-@ CHECK: ])
-@ CHECK: ('_section_data', 'feffffeb fdffffeb')
-@ CHECK: # Section 1
-@ CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK: ('address', 8)
-@ CHECK: ('size', 4)
-@ CHECK: ('offset', 400)
-@ CHECK: ('alignment', 0)
-@ CHECK: ('reloc_offset', 424)
-@ CHECK: ('num_reloc', 2)
-@ CHECK: ('flags', 0x0)
-@ CHECK: ('reserved1', 0)
-@ CHECK: ('reserved2', 0)
-@ CHECK: ),
-@ CHECK: ('_relocations', [
-@ CHECK: # Relocation 0
-@ CHECK: (('word-0', 0xa2000000),
-@ CHECK: ('word-1', 0xc)),
-@ CHECK: # Relocation 1
-@ CHECK: (('word-0', 0xa1000000),
-@ CHECK: ('word-1', 0x8)),
-@ CHECK: ])
-@ CHECK: ('_section_data', '04000000')
-@ CHECK: # Section 2
-@ CHECK: (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK: ('address', 12)
-@ CHECK: ('size', 4)
-@ CHECK: ('offset', 404)
-@ CHECK: ('alignment', 0)
-@ CHECK: ('reloc_offset', 0)
-@ CHECK: ('num_reloc', 0)
-@ CHECK: ('flags', 0x2)
-@ CHECK: ('reserved1', 0)
-@ CHECK: ('reserved2', 0)
-@ CHECK: ),
-@ CHECK: ('_relocations', [
-@ CHECK: ])
-@ CHECK: ('_section_data', '00000000')
-@ CHECK: ])
-@ CHECK: ),
-@ CHECK: # Load Command 1
-@ CHECK: (('command', 2)
-@ CHECK: ('size', 24)
-@ CHECK: ('symoff', 440)
-@ CHECK: ('nsyms', 4)
-@ CHECK: ('stroff', 488)
-@ CHECK: ('strsize', 24)
-@ CHECK: ('_string_data', '\x00_printf\x00_f1\x00_f0\x00_d0\x00\x00\x00\x00')
-@ CHECK: ('_symbols', [
-@ CHECK: # Symbol 0
-@ CHECK: (('n_strx', 13)
-@ CHECK: ('n_type', 0xe)
-@ CHECK: ('n_sect', 1)
-@ CHECK: ('n_desc', 0)
-@ CHECK: ('n_value', 0)
-@ CHECK: ('_string', '_f0')
-@ CHECK: ),
-@ CHECK: # Symbol 1
-@ CHECK: (('n_strx', 9)
-@ CHECK: ('n_type', 0xe)
-@ CHECK: ('n_sect', 1)
-@ CHECK: ('n_desc', 0)
-@ CHECK: ('n_value', 4)
-@ CHECK: ('_string', '_f1')
-@ CHECK: ),
-@ CHECK: # Symbol 2
-@ CHECK: (('n_strx', 17)
-@ CHECK: ('n_type', 0xe)
-@ CHECK: ('n_sect', 2)
-@ CHECK: ('n_desc', 0)
-@ CHECK: ('n_value', 8)
-@ CHECK: ('_string', '_d0')
-@ CHECK: ),
-@ CHECK: # Symbol 3
-@ CHECK: (('n_strx', 1)
-@ CHECK: ('n_type', 0x1)
-@ CHECK: ('n_sect', 0)
-@ CHECK: ('n_desc', 0)
-@ CHECK: ('n_value', 0)
-@ CHECK: ('_string', '_printf')
-@ CHECK: ),
-@ CHECK: ])
-@ CHECK: ),
-@ CHECK: # Load Command 2
-@ CHECK: (('command', 11)
-@ CHECK: ('size', 80)
-@ CHECK: ('ilocalsym', 0)
-@ CHECK: ('nlocalsym', 3)
-@ CHECK: ('iextdefsym', 3)
-@ CHECK: ('nextdefsym', 0)
-@ CHECK: ('iundefsym', 3)
-@ CHECK: ('nundefsym', 1)
-@ CHECK: ('tocoff', 0)
-@ CHECK: ('ntoc', 0)
-@ CHECK: ('modtaboff', 0)
-@ CHECK: ('nmodtab', 0)
-@ CHECK: ('extrefsymoff', 0)
-@ CHECK: ('nextrefsyms', 0)
-@ CHECK: ('indirectsymoff', 0)
-@ CHECK: ('nindirectsyms', 0)
-@ CHECK: ('extreloff', 0)
-@ CHECK: ('nextrel', 0)
-@ CHECK: ('locreloff', 0)
-@ CHECK: ('nlocrel', 0)
-@ CHECK: ('_indirect_symbols', [
-@ CHECK: ])
-@ CHECK: ),
-@ CHECK: ])
+@ CHECK: File: <stdin>
+@ CHECK: Format: Mach-O arm
+@ CHECK: Arch: arm
+@ CHECK: AddressSize: 32bit
+@ CHECK: MachHeader {
+@ CHECK: Magic: Magic (0xFEEDFACE)
+@ CHECK: CpuType: Arm (0xC)
+@ CHECK: CpuSubType: CPU_SUBTYPE_ARM_V7 (0x9)
+@ CHECK: FileType: Relocatable (0x1)
+@ CHECK: NumOfLoadCommands: 4
+@ CHECK: SizeOfLoadCommands: 380
+@ CHECK: Flags [ (0x2000)
+@ CHECK: MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+@ CHECK: ]
+@ CHECK: }
+@ CHECK: Sections [
+@ CHECK: Section {
+@ CHECK: Index: 0
+@ CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+@ CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+@ CHECK: Address: 0x0
+@ CHECK: Size: 0x8
+@ CHECK: Offset: 408
+@ CHECK: Alignment: 0
+@ CHECK: RelocationOffset: 0x1A8
+@ CHECK: RelocationCount: 2
+@ CHECK: Type: 0x0
+@ CHECK: Attributes [ (0x800004)
+@ CHECK: PureInstructions (0x800000)
+@ CHECK: SomeInstructions (0x4)
+@ CHECK: ]
+@ CHECK: Reserved1: 0x0
+@ CHECK: Reserved2: 0x0
+@ CHECK: SectionData (
+@ CHECK: 0000: FEFFFFEB FDFFFFEB |........|
+@ CHECK: )
+@ CHECK: }
+@ CHECK: Section {
+@ CHECK: Index: 1
+@ CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+@ CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+@ CHECK: Address: 0x8
+@ CHECK: Size: 0x4
+@ CHECK: Offset: 416
+@ CHECK: Alignment: 0
+@ CHECK: RelocationOffset: 0x1B8
+@ CHECK: RelocationCount: 2
+@ CHECK: Type: 0x0
+@ CHECK: Attributes [ (0x0)
+@ CHECK: ]
+@ CHECK: Reserved1: 0x0
+@ CHECK: Reserved2: 0x0
+@ CHECK: SectionData (
+@ CHECK: 0000: 04000000 |....|
+@ CHECK: )
+@ CHECK: }
+@ CHECK: Section {
+@ CHECK: Index: 2
+@ CHECK: Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+@ CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+@ CHECK: Address: 0xC
+@ CHECK: Size: 0x4
+@ CHECK: Offset: 420
+@ CHECK: Alignment: 0
+@ CHECK: RelocationOffset: 0x0
+@ CHECK: RelocationCount: 0
+@ CHECK: Type: ExtReloc (0x2)
+@ CHECK: Attributes [ (0x0)
+@ CHECK: ]
+@ CHECK: Reserved1: 0x0
+@ CHECK: Reserved2: 0x0
+@ CHECK: SectionData (
+@ CHECK: 0000: 00000000 |....|
+@ CHECK: )
+@ CHECK: }
+@ CHECK: ]
+@ CHECK: Relocations [
+@ CHECK: Section __text {
+@ CHECK: 0x4 1 2 0 ARM_RELOC_BR24 0 __text
+@ CHECK: 0x0 1 2 1 ARM_RELOC_BR24 0 _printf
+@ CHECK: }
+@ CHECK: Section __data {
+@ CHECK: 0x0 0 2 n/a ARM_RELOC_SECTDIFF 1 0xC
+@ CHECK: 0x0 0 2 n/a ARM_RELOC_PAIR 1 0x8
+@ CHECK: }
+@ CHECK: ]
+@ CHECK: Symbols [
+@ CHECK: Symbol {
+@ CHECK: Name: _f0 (13)
+@ CHECK: Type: Section (0xE)
+@ CHECK: Section: __text (0x1)
+@ CHECK: RefType: UndefinedNonLazy (0x0)
+@ CHECK: Flags [ (0x0)
+@ CHECK: ]
+@ CHECK: Value: 0x0
+@ CHECK: }
+@ CHECK: Symbol {
+@ CHECK: Name: _f1 (9)
+@ CHECK: Type: Section (0xE)
+@ CHECK: Section: __text (0x1)
+@ CHECK: RefType: UndefinedNonLazy (0x0)
+@ CHECK: Flags [ (0x0)
+@ CHECK: ]
+@ CHECK: Value: 0x4
+@ CHECK: }
+@ CHECK: Symbol {
+@ CHECK: Name: _d0 (17)
+@ CHECK: Type: Section (0xE)
+@ CHECK: Section: __data (0x2)
+@ CHECK: RefType: UndefinedNonLazy (0x0)
+@ CHECK: Flags [ (0x0)
+@ CHECK: ]
+@ CHECK: Value: 0x8
+@ CHECK: }
+@ CHECK: Symbol {
+@ CHECK: Name: _printf (1)
+@ CHECK: Extern
+@ CHECK: Type: Undef (0x0)
+@ CHECK: Section: (0x0)
+@ CHECK: RefType: UndefinedNonLazy (0x0)
+@ CHECK: Flags [ (0x0)
+@ CHECK: ]
+@ CHECK: Value: 0x0
+@ CHECK: }
+@ CHECK: ]
+@ CHECK: Indirect Symbols {
+@ CHECK: Number: 0
+@ CHECK: Symbols [
+@ CHECK: ]
+@ CHECK: }
+@ CHECK: Segment {
+@ CHECK: Cmd: LC_SEGMENT
+@ CHECK: Name:
+@ CHECK: Size: 260
+@ CHECK: vmaddr: 0x0
+@ CHECK: vmsize: 0x10
+@ CHECK: fileoff: 408
+@ CHECK: filesize: 16
+@ CHECK: maxprot: rwx
+@ CHECK: initprot: rwx
+@ CHECK: nsects: 3
+@ CHECK: flags: 0x0
+@ CHECK: }
+@ CHECK: Dysymtab {
+@ CHECK: ilocalsym: 0
+@ CHECK: nlocalsym: 3
+@ CHECK: iextdefsym: 3
+@ CHECK: nextdefsym: 0
+@ CHECK: iundefsym: 3
+@ CHECK: nundefsym: 1
+@ CHECK: tocoff: 0
+@ CHECK: ntoc: 0
+@ CHECK: modtaboff: 0
+@ CHECK: nmodtab: 0
+@ CHECK: extrefsymoff: 0
+@ CHECK: nextrefsyms: 0
+@ CHECK: indirectsymoff: 0
+@ CHECK: nindirectsyms: 0
+@ CHECK: extreloff: 0
+@ CHECK: nextrel: 0
+@ CHECK: locreloff: 0
+@ CHECK: nlocrel: 0
+@ CHECK: }
diff --git a/test/MC/MachO/ARM/darwin-Thumb-reloc.s b/test/MC/MachO/ARM/darwin-Thumb-reloc.s
index 567573d9ef19..7c85e0d3fe8e 100644
--- a/test/MC/MachO/ARM/darwin-Thumb-reloc.s
+++ b/test/MC/MachO/ARM/darwin-Thumb-reloc.s
@@ -1,5 +1,5 @@
@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols < %t.obj > %t.dump
@ RUN: FileCheck < %t.dump %s
.syntax unified
@@ -22,118 +22,127 @@ L_.str:
.subsections_via_symbols
-@ CHECK: ('cputype', 12)
-@ CHECK: ('cpusubtype', 9)
-@ CHECK: ('filetype', 1)
-@ CHECK: ('num_load_commands', 3)
-@ CHECK: ('load_commands_size', 296)
-@ CHECK: ('flag', 8192)
-@ CHECK: ('load_commands', [
-@ CHECK: # Load Command 0
-@ CHECK: (('command', 1)
-@ CHECK: ('size', 192)
-@ CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK: ('vm_addr', 0)
-@ CHECK: ('vm_size', 11)
-@ CHECK: ('file_offset', 324)
-@ CHECK: ('file_size', 11)
-@ CHECK: ('maxprot', 7)
-@ CHECK: ('initprot', 7)
-@ CHECK: ('num_sections', 2)
-@ CHECK: ('flags', 0)
-@ CHECK: ('sections', [
-@ CHECK: # Section 0
-@ CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK: ('address', 0)
-@ CHECK: ('size', 8)
-@ CHECK: ('offset', 324)
-@ CHECK: ('alignment', 2)
-@ CHECK: ('reloc_offset', 336)
-@ CHECK: ('num_reloc', 3)
-@ CHECK: ('flags', 0x80000400)
-@ CHECK: ('reserved1', 0)
-@ CHECK: ('reserved2', 0)
-@ CHECK: ),
-@ CHECK: ('_relocations', [
-@ CHECK: # Relocation 0
-@ CHECK: (('word-0', 0xa2000004),
-@ CHECK: ('word-1', 0x8)),
-@ CHECK: # Relocation 1
-@ CHECK: (('word-0', 0xa1000000),
-@ CHECK: ('word-1', 0x0)),
-@ CHECK: # Relocation 2
-@ CHECK: (('word-0', 0x0),
-@ CHECK: ('word-1', 0x6d000001)),
-@ CHECK: ])
-@ CHECK-FIXME: ('_section_data', 'fff7feef 04000000')
-@ CHECK: # Section 1
-@ CHECK: (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK: ('address', 8)
-@ CHECK: ('size', 3)
-@ CHECK: ('offset', 332)
-@ CHECK: ('alignment', 2)
-@ CHECK: ('reloc_offset', 0)
-@ CHECK: ('num_reloc', 0)
-@ CHECK: ('flags', 0x2)
-@ CHECK: ('reserved1', 0)
-@ CHECK: ('reserved2', 0)
-@ CHECK: ),
-@ CHECK: ('_relocations', [
-@ CHECK: ])
-@ CHECK: ('_section_data', '733000')
-@ CHECK: ])
-@ CHECK: ),
-@ CHECK: # Load Command 1
-@ CHECK: (('command', 2)
-@ CHECK: ('size', 24)
-@ CHECK: ('symoff', 360)
-@ CHECK: ('nsyms', 2)
-@ CHECK: ('stroff', 384)
-@ CHECK: ('strsize', 16)
-@ CHECK: ('_string_data', '\x00_main\x00_printf\x00\x00')
-@ CHECK: ('_symbols', [
-@ CHECK: # Symbol 0
-@ CHECK: (('n_strx', 1)
-@ CHECK: ('n_type', 0xf)
-@ CHECK: ('n_sect', 1)
-@ CHECK: ('n_desc', 8)
-@ CHECK: ('n_value', 0)
-@ CHECK: ('_string', '_main')
-@ CHECK: ),
-@ CHECK: # Symbol 1
-@ CHECK: (('n_strx', 7)
-@ CHECK: ('n_type', 0x1)
-@ CHECK: ('n_sect', 0)
-@ CHECK: ('n_desc', 0)
-@ CHECK: ('n_value', 0)
-@ CHECK: ('_string', '_printf')
-@ CHECK: ),
-@ CHECK: ])
-@ CHECK: ),
-@ CHECK: # Load Command 2
-@ CHECK: (('command', 11)
-@ CHECK: ('size', 80)
-@ CHECK: ('ilocalsym', 0)
-@ CHECK: ('nlocalsym', 0)
-@ CHECK: ('iextdefsym', 0)
-@ CHECK: ('nextdefsym', 1)
-@ CHECK: ('iundefsym', 1)
-@ CHECK: ('nundefsym', 1)
-@ CHECK: ('tocoff', 0)
-@ CHECK: ('ntoc', 0)
-@ CHECK: ('modtaboff', 0)
-@ CHECK: ('nmodtab', 0)
-@ CHECK: ('extrefsymoff', 0)
-@ CHECK: ('nextrefsyms', 0)
-@ CHECK: ('indirectsymoff', 0)
-@ CHECK: ('nindirectsyms', 0)
-@ CHECK: ('extreloff', 0)
-@ CHECK: ('nextrel', 0)
-@ CHECK: ('locreloff', 0)
-@ CHECK: ('nlocrel', 0)
-@ CHECK: ('_indirect_symbols', [
-@ CHECK: ])
-@ CHECK: ),
-@ CHECK: ])
+@ CHECK: File: <stdin>
+@ CHECK: Format: Mach-O arm
+@ CHECK: Arch: arm
+@ CHECK: AddressSize: 32bit
+@ CHECK: MachHeader {
+@ CHECK: Magic: Magic (0xFEEDFACE)
+@ CHECK: CpuType: Arm (0xC)
+@ CHECK: CpuSubType: CPU_SUBTYPE_ARM_V7 (0x9)
+@ CHECK: FileType: Relocatable (0x1)
+@ CHECK: NumOfLoadCommands: 4
+@ CHECK: SizeOfLoadCommands: 312
+@ CHECK: Flags [ (0x2000)
+@ CHECK: MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+@ CHECK: ]
+@ CHECK: }
+@ CHECK: Sections [
+@ CHECK: Section {
+@ CHECK: Index: 0
+@ CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+@ CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+@ CHECK: Address: 0x0
+@ CHECK: Size: 0x8
+@ CHECK: Offset: 340
+@ CHECK: Alignment: 2
+@ CHECK: RelocationOffset: 0x160
+@ CHECK: RelocationCount: 3
+@ CHECK: Type: 0x0
+@ CHECK: Attributes [ (0x800004)
+@ CHECK: PureInstructions (0x800000)
+@ CHECK: SomeInstructions (0x4)
+@ CHECK: ]
+@ CHECK: Reserved1: 0x0
+@ CHECK: Reserved2: 0x0
+@ CHECK: SectionData (
+@ CHECK: 0000: FFF7FEEF 04000000 |........|
+@ CHECK: )
+@ CHECK: }
+@ CHECK: Section {
+@ CHECK: Index: 1
+@ CHECK: Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+@ CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+@ CHECK: Address: 0x8
+@ CHECK: Size: 0x3
+@ CHECK: Offset: 348
+@ CHECK: Alignment: 2
+@ CHECK: RelocationOffset: 0x0
+@ CHECK: RelocationCount: 0
+@ CHECK: Type: ExtReloc (0x2)
+@ CHECK: Attributes [ (0x0)
+@ CHECK: ]
+@ CHECK: Reserved1: 0x0
+@ CHECK: Reserved2: 0x0
+@ CHECK: SectionData (
+@ CHECK: 0000: 733000 |s0.|
+@ CHECK: )
+@ CHECK: }
+@ CHECK: ]
+@ CHECK: Relocations [
+@ CHECK: Section __text {
+@ CHECK: 0x4 0 2 n/a ARM_RELOC_SECTDIFF 1 0x8
+@ CHECK: 0x0 0 2 n/a ARM_RELOC_PAIR 1 0x0
+@ CHECK: 0x0 1 2 1 ARM_THUMB_RELOC_BR22 0 _printf
+@ CHECK: }
+@ CHECK: ]
+@ CHECK: Symbols [
+@ CHECK: Symbol {
+@ CHECK: Name: _main (1)
+@ CHECK: Extern
+@ CHECK: Type: Section (0xE)
+@ CHECK: Section: __text (0x1)
+@ CHECK: RefType: 0x8
+@ CHECK: Flags [ (0x0)
+@ CHECK: ]
+@ CHECK: Value: 0x0
+@ CHECK: }
+@ CHECK: Symbol {
+@ CHECK: Name: _printf (7)
+@ CHECK: Extern
+@ CHECK: Type: Undef (0x0)
+@ CHECK: Section: (0x0)
+@ CHECK: RefType: UndefinedNonLazy (0x0)
+@ CHECK: Flags [ (0x0)
+@ CHECK: ]
+@ CHECK: Value: 0x0
+@ CHECK: }
+@ CHECK: ]
+@ CHECK: Indirect Symbols {
+@ CHECK: Number: 0
+@ CHECK: Symbols [
+@ CHECK: ]
+@ CHECK: }
+@ CHECK: Segment {
+@ CHECK: Cmd: LC_SEGMENT
+@ CHECK: Name:
+@ CHECK: Size: 192
+@ CHECK: vmaddr: 0x0
+@ CHECK: vmsize: 0xB
+@ CHECK: fileoff: 340
+@ CHECK: filesize: 11
+@ CHECK: maxprot: rwx
+@ CHECK: initprot: rwx
+@ CHECK: nsects: 2
+@ CHECK: flags: 0x0
+@ CHECK: }
+@ CHECK: Dysymtab {
+@ CHECK: ilocalsym: 0
+@ CHECK: nlocalsym: 0
+@ CHECK: iextdefsym: 0
+@ CHECK: nextdefsym: 1
+@ CHECK: iundefsym: 1
+@ CHECK: nundefsym: 1
+@ CHECK: tocoff: 0
+@ CHECK: ntoc: 0
+@ CHECK: modtaboff: 0
+@ CHECK: nmodtab: 0
+@ CHECK: extrefsymoff: 0
+@ CHECK: nextrefsyms: 0
+@ CHECK: indirectsymoff: 0
+@ CHECK: nindirectsyms: 0
+@ CHECK: extreloff: 0
+@ CHECK: nextrel: 0
+@ CHECK: locreloff: 0
+@ CHECK: nlocrel: 0
+@ CHECK: }
diff --git a/test/MC/MachO/ARM/data-in-code.s b/test/MC/MachO/ARM/data-in-code.s
index bbcb9aabde5c..a7be7e74d495 100644
--- a/test/MC/MachO/ARM/data-in-code.s
+++ b/test/MC/MachO/ARM/data-in-code.s
@@ -1,23 +1,6 @@
-@ RUN: llvm-mc -triple armv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+@ RUN: llvm-mc -triple armv7-apple-darwin10 -filetype=obj -o - < %s | llvm-readobj --macho-data-in-code | FileCheck %s
.text
_foo:
-@ CHECK: # DICE 0
-@ CHECK: ('offset', 0)
-@ CHECK: ('length', 4)
-@ CHECK: ('kind', 1)
-@ CHECK: # DICE 1
-@ CHECK: ('offset', 4)
-@ CHECK: ('length', 4)
-@ CHECK: ('kind', 4)
-@ CHECK: # DICE 2
-@ CHECK: ('offset', 8)
-@ CHECK: ('length', 2)
-@ CHECK: ('kind', 3)
-@ CHECK: # DICE 3
-@ CHECK: ('offset', 10)
-@ CHECK: ('length', 1)
-@ CHECK: ('kind', 2)
-
.data_region
.long 10
.end_data_region
@@ -31,3 +14,37 @@ _foo:
.byte 3
.end_data_region
+@ CHECK: File: <stdin>
+@ CHECK: Format: Mach-O arm
+@ CHECK: Arch: arm
+@ CHECK: AddressSize: 32bit
+@ CHECK: DataInCode {
+@ CHECK: Data offset: 300
+@ CHECK: Data size: 32
+@ CHECK: Data entries [
+@ CHECK: Entry {
+@ CHECK: Index: 0
+@ CHECK: Offset: 0
+@ CHECK: Length: 4
+@ CHECK: Kind: 1
+@ CHECK: }
+@ CHECK: Entry {
+@ CHECK: Index: 1
+@ CHECK: Offset: 4
+@ CHECK: Length: 4
+@ CHECK: Kind: 4
+@ CHECK: }
+@ CHECK: Entry {
+@ CHECK: Index: 2
+@ CHECK: Offset: 8
+@ CHECK: Length: 2
+@ CHECK: Kind: 3
+@ CHECK: }
+@ CHECK: Entry {
+@ CHECK: Index: 3
+@ CHECK: Offset: 10
+@ CHECK: Length: 1
+@ CHECK: Kind: 2
+@ CHECK: }
+@ CHECK: ]
+@ CHECK: }
diff --git a/test/MC/MachO/ARM/empty-function-nop.ll b/test/MC/MachO/ARM/empty-function-nop.ll
index ef86ebc2a267..0bc439497e2c 100644
--- a/test/MC/MachO/ARM/empty-function-nop.ll
+++ b/test/MC/MachO/ARM/empty-function-nop.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -filetype=obj -mtriple=thumbv6-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-T1 %s
-; RUN: llc < %s -filetype=obj -mtriple=thumbv7-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-T2 %s
-; RUN: llc < %s -filetype=obj -mtriple=armv6-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-ARM %s
-; RUN: llc < %s -filetype=obj -mtriple=armv7-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-ARMV7 %s
+; RUN: llc < %s -filetype=obj -mtriple=thumbv6-apple-darwin -o - | llvm-readobj -s -sd | FileCheck -check-prefix=CHECK-T1 %s
+; RUN: llc < %s -filetype=obj -mtriple=thumbv7-apple-darwin -o - | llvm-readobj -s -sd | FileCheck -check-prefix=CHECK-T2 %s
+; RUN: llc < %s -filetype=obj -mtriple=armv6-apple-darwin -o - | llvm-readobj -s -sd | FileCheck -check-prefix=CHECK-ARM %s
+; RUN: llc < %s -filetype=obj -mtriple=armv7-apple-darwin -o - | llvm-readobj -s -sd | FileCheck -check-prefix=CHECK-ARMV7 %s
; Empty functions need a NOP in them for MachO to prevent DWARF FDEs from
; getting all mucked up. See lib/CodeGen/AsmPrinter/AsmPrinter.cpp for
@@ -9,7 +9,15 @@
define internal fastcc void @empty_function() {
unreachable
}
-; CHECK-T1: ('_section_data', 'c046')
-; CHECK-T2: ('_section_data', '00bf')
-; CHECK-ARM: ('_section_data', '0000a0e1')
-; CHECK-ARMV7: ('_section_data', '00f020e3')
+; CHECK-T1: SectionData (
+; CHECK-T1: 0000: C046 |.F|
+; CHECK-T1: )
+; CHECK-T2: SectionData (
+; CHECK-T2: 0000: 00BF |..|
+; CHECK-T2: )
+; CHECK-ARM: SectionData (
+; CHECK-ARM: 0000: 0000A0E1 |....|
+; CHECK-ARM: )
+; CHECK-ARMV7: SectionData (
+; CHECK-ARMV7: 0000: 00F020E3 |.. .|
+; CHECK-ARMV7: )
diff --git a/test/MC/MachO/ARM/ios-version-min-load-command.s b/test/MC/MachO/ARM/ios-version-min-load-command.s
index 9f63c9bd27c7..0fa29da0b4c6 100644
--- a/test/MC/MachO/ARM/ios-version-min-load-command.s
+++ b/test/MC/MachO/ARM/ios-version-min-load-command.s
@@ -1,10 +1,16 @@
-// RUN: llvm-mc -triple armv7-apple-ios %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple armv7-apple-ios %s -filetype=obj -o - | llvm-readobj --macho-version-min | FileCheck %s
// Test the formation of the version-min load command in the MachO.
// use a nonsense but well formed version.
.ios_version_min 99,8,7
-// CHECK: (('command', 37)
-// CHECK: ('size', 16)
-// CHECK: ('version, 6490119)
-// CHECK: ('sdk, 0)
-// CHECK: ),
+
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O arm
+// CHECK: Arch: arm
+// CHECK: AddressSize: 32bit
+// CHECK: MinVersion {
+// CHECK: Cmd: LC_VERSION_MIN_IPHONEOS
+// CHECK: Size: 16
+// CHECK: Version: 99.8.7
+// CHECK: SDK: n/a
+// CHECK: }
diff --git a/test/MC/MachO/ARM/long-call-branch-island-relocation.s b/test/MC/MachO/ARM/long-call-branch-island-relocation.s
index 8ee7da54b541..c4d153fc2b68 100644
--- a/test/MC/MachO/ARM/long-call-branch-island-relocation.s
+++ b/test/MC/MachO/ARM/long-call-branch-island-relocation.s
@@ -1,5 +1,5 @@
@ RUN: llvm-mc -n -triple armv7-apple-darwin10 %s -filetype=obj -o %t.o
-@ RUN: macho-dump --dump-section-data < %t.o | FileCheck %s
+@ RUN: llvm-readobj -relocations -expand-relocs < %t.o | FileCheck %s
@ rdar://12359919
@@ -36,8 +36,18 @@ _foo:
pop {r7, pc}
-@ CHECK: ('_relocations', [
-@ CHECK: # Relocation 0
-@ CHECK: (('word-0', 0x4),
-@ CHECK: ('word-1', 0x6d000002)),
-@ CHECK: ])
+@ CHECK: File: <stdin>
+@ CHECK: Format: Mach-O arm
+@ CHECK: Arch: arm
+@ CHECK: AddressSize: 32bit
+@ CHECK: Relocations [
+@ CHECK: Section __text {
+@ CHECK: Relocation {
+@ CHECK: Offset: 0x4
+@ CHECK: PCRel: 1
+@ CHECK: Length: 2
+@ CHECK: Type: ARM_THUMB_RELOC_BR22 (6)
+@ CHECK: Symbol: _foo (2)
+@ CHECK: }
+@ CHECK: }
+@ CHECK: ]
diff --git a/test/MC/MachO/ARM/no-subsections-reloc.s b/test/MC/MachO/ARM/no-subsections-reloc.s
index 7701c59c6805..e367a3cfa731 100644
--- a/test/MC/MachO/ARM/no-subsections-reloc.s
+++ b/test/MC/MachO/ARM/no-subsections-reloc.s
@@ -1,5 +1,5 @@
@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
@ RUN: FileCheck < %t.dump %s
@ When not using subsections-via-symbols, references to non-local symbols
@@ -14,5 +14,7 @@ _foo:
bar:
.long 0
-@ CHECK: 'num_reloc', 0
-@ CHECK: '_section_data', 'dff80030 00000000'
+@ CHECK: RelocationCount: 0
+@ CHECK: SectionData (
+@ CHECK: 0000: DFF80030 00000000 |...0....|
+@ CHECK: )
diff --git a/test/MC/MachO/ARM/nop-armv4-padding.s b/test/MC/MachO/ARM/nop-armv4-padding.s
index 8e03d17a70c9..baab3d7491c8 100644
--- a/test/MC/MachO/ARM/nop-armv4-padding.s
+++ b/test/MC/MachO/ARM/nop-armv4-padding.s
@@ -1,5 +1,5 @@
@ RUN: llvm-mc -triple armv4-apple-darwin %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
@ RUN: FileCheck %s < %t.dump
x:
@@ -7,4 +7,7 @@ x:
.align 4
add r0, r1, r2
-@ CHECK: ('_section_data', '020081e0 0000a0e1 0000a0e1 0000a0e1 020081e0')
+@ CHECK: SectionData (
+@ CHECK: 0000: 020081E0 0000A0E1 0000A0E1 0000A0E1 |................|
+@ CHECK: 0010: 020081E0 |....|
+@ CHECK: )
diff --git a/test/MC/MachO/ARM/nop-armv6t2-padding.s b/test/MC/MachO/ARM/nop-armv6t2-padding.s
index c38ad2d7c57c..a1a21f5f456e 100644
--- a/test/MC/MachO/ARM/nop-armv6t2-padding.s
+++ b/test/MC/MachO/ARM/nop-armv6t2-padding.s
@@ -1,5 +1,5 @@
@ RUN: llvm-mc -triple armv6t2-apple-darwin %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
@ RUN: FileCheck %s < %t.dump
x:
@@ -7,4 +7,7 @@ x:
.align 4
add r0, r1, r2
-@ CHECK: ('_section_data', '020081e0 00f020e3 00f020e3 00f020e3 020081e0')
+@ CHECK: SectionData (
+@ CHECK: 0000: 020081E0 00F020E3 00F020E3 00F020E3 |...... ... ... .|
+@ CHECK: 0010: 020081E0 |....|
+@ CHECK: )
diff --git a/test/MC/MachO/ARM/nop-thumb-padding.s b/test/MC/MachO/ARM/nop-thumb-padding.s
index 1e173f1a42d9..26db12140def 100644
--- a/test/MC/MachO/ARM/nop-thumb-padding.s
+++ b/test/MC/MachO/ARM/nop-thumb-padding.s
@@ -1,5 +1,5 @@
@ RUN: llvm-mc -triple armv6-apple-darwin %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
@ RUN: FileCheck %s < %t.dump
.thumb_func x
@@ -9,4 +9,7 @@ x:
.align 4
adds r0, r1, r2
-@ CHECK: ('_section_data', '8818c046 c046c046 c046c046 c046c046 8818')
+@ CHECK: SectionData (
+@ CHECK: 0000: 8818C046 C046C046 C046C046 C046C046 |...F.F.F.F.F.F.F|
+@ CHECK: 0010: 8818 |..|
+@ CHECK: )
diff --git a/test/MC/MachO/ARM/nop-thumb2-padding.s b/test/MC/MachO/ARM/nop-thumb2-padding.s
index a8aa3a1168ef..a986ff17f2aa 100644
--- a/test/MC/MachO/ARM/nop-thumb2-padding.s
+++ b/test/MC/MachO/ARM/nop-thumb2-padding.s
@@ -1,5 +1,5 @@
@ RUN: llvm-mc -triple armv7-apple-darwin %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
@ RUN: FileCheck %s < %t.dump
.thumb_func x
@@ -9,4 +9,7 @@ x:
.align 4
adds r0, r1, r2
-@ CHECK: ('_section_data', '881800bf 00bf00bf 00bf00bf 00bf00bf 8818')
+@ CHECK: SectionData (
+@ CHECK: 0000: 881800BF 00BF00BF 00BF00BF 00BF00BF |................|
+@ CHECK: 0010: 8818 |..|
+@ CHECK: )
diff --git a/test/MC/MachO/ARM/relax-thumb-ldr-literal.s b/test/MC/MachO/ARM/relax-thumb-ldr-literal.s
index 8d26f6d2e2d7..f6f2233e6f97 100644
--- a/test/MC/MachO/ARM/relax-thumb-ldr-literal.s
+++ b/test/MC/MachO/ARM/relax-thumb-ldr-literal.s
@@ -1,5 +1,5 @@
@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
@ RUN: FileCheck < %t.dump %s
.syntax unified
@@ -9,5 +9,14 @@
_foo:
ldr r2, (_foo - 4)
-@ CHECK: ('num_reloc', 0)
-@ CHECK: ('_section_data', '5ff80820')
+@ CHECK: RelocationCount: 0
+@ CHECK: Type: 0x0
+@ CHECK: Attributes [ (0x800004)
+@ CHECK: PureInstructions (0x800000)
+@ CHECK: SomeInstructions (0x4)
+@ CHECK: ]
+@ CHECK: Reserved1: 0x0
+@ CHECK: Reserved2: 0x0
+@ CHECK: SectionData (
+@ CHECK: 0000: 5FF80820 |_.. |
+@ CHECK: )
diff --git a/test/MC/MachO/ARM/relax-thumb2-branches.s b/test/MC/MachO/ARM/relax-thumb2-branches.s
index 7916d424078c..fbac5fd4260c 100644
--- a/test/MC/MachO/ARM/relax-thumb2-branches.s
+++ b/test/MC/MachO/ARM/relax-thumb2-branches.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -triple=thumbv7-apple-darwin -show-encoding %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -show-encoding %s -filetype=obj -o - | llvm-readobj -s -sd | FileCheck %s
ble Lfoo @ wide encoding
@@ -10,5 +10,38 @@ Lfoo:
.space 256
Lbaz:
-@ CHECK: '_section_data', '40f38180
-@ CHECK: 000000bf 7fdd
+@ CHECK: SectionData (
+@ CHECK: 0000: 40F38180 00000000 00000000 00000000 |@...............|
+@ CHECK: 0010: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0020: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0030: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0040: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0050: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0060: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0070: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0080: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0090: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 00A0: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 00B0: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 00C0: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 00D0: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 00E0: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 00F0: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0100: 00000000 000000BF 7FDD0000 00000000 |................|
+@ CHECK: 0110: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0120: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0130: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0140: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0150: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0160: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0170: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0180: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0190: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 01A0: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 01B0: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 01C0: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 01D0: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 01E0: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 01F0: 00000000 00000000 00000000 00000000 |................|
+@ CHECK: 0200: 00000000 00000000 0000 |..........|
+@ CHECK: )
diff --git a/test/MC/MachO/ARM/thumb-bl-jbits.s b/test/MC/MachO/ARM/thumb-bl-jbits.s
index 9657968db5e4..2657fd9d99d7 100644
--- a/test/MC/MachO/ARM/thumb-bl-jbits.s
+++ b/test/MC/MachO/ARM/thumb-bl-jbits.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -triple=thumbv7-apple-darwin -filetype=obj -o - < %s | macho-dump --dump-section-data | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -filetype=obj -o - < %s | llvm-readobj -s -sd | FileCheck %s
.thumb
.thumb_func t
t: nop
@@ -11,9 +11,17 @@ t: nop
.thumb_func b
b:
bl t
-# CHECK: '_section_data', 'c3f7fcf5'
# We are checking that the branch and link instruction which is:
# bl #-4441096
# has it displacement encoded correctly with respect to the J1 and J2 bits when
# the branch is assembled with a label not a displacement.
# rdar://10149689
+
+# CHECK: Section {
+# CHECK: Index: 2
+# CHECK: Name: __branch (5F 5F 62 72 61 6E 63 68 00 00 00 00 00 00 00 00)
+# CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+# CHECK: SectionData (
+# CHECK: 0000: C3F7FCF5 |....|
+# CHECK: )
+# CHECK: }
diff --git a/test/MC/MachO/ARM/thumb2-function-relative-load.s b/test/MC/MachO/ARM/thumb2-function-relative-load.s
index 622007dc1657..1a91675fae58 100644
--- a/test/MC/MachO/ARM/thumb2-function-relative-load.s
+++ b/test/MC/MachO/ARM/thumb2-function-relative-load.s
@@ -1,5 +1,5 @@
@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
@ RUN: FileCheck < %t.dump %s
.syntax unified
.text
@@ -10,4 +10,6 @@ _foo:
.subsections_via_symbols
-@ CHECK: ('_section_data', '5ff808e0')
+@ CHECK: SectionData (
+@ CHECK: 0000: 5FF808E0 |_...|
+@ CHECK: )
diff --git a/test/MC/MachO/ARM/thumb2-movt-fixup.s b/test/MC/MachO/ARM/thumb2-movt-fixup.s
index ddd95b54791e..5cfb3f4c9186 100644
--- a/test/MC/MachO/ARM/thumb2-movt-fixup.s
+++ b/test/MC/MachO/ARM/thumb2-movt-fixup.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | llvm-readobj -relocations -expand-relocs | FileCheck %s
_fred:
movt r3, :upper16:(_wilma-(LPC0_0+4))
@@ -7,11 +7,25 @@ LPC0_0:
_wilma:
.long 0
-@ CHECK: ('_relocations', [
-@ CHECK: # Relocation 0
-@ CHECK: (('word-0', 0xb9000000),
-@ CHECK: ('word-1', 0x4)),
-@ CHECK: # Relocation 1
-@ CHECK: (('word-0', 0xb100fffc),
-@ CHECK: ('word-1', 0x4)),
-
+@ CHECK: File: <stdin>
+@ CHECK: Format: Mach-O arm
+@ CHECK: Arch: arm
+@ CHECK: AddressSize: 32bit
+@ CHECK: Relocations [
+@ CHECK: Section __text {
+@ CHECK: Relocation {
+@ CHECK: Offset: 0x0
+@ CHECK: PCRel: 0
+@ CHECK: Length: 3
+@ CHECK: Type: ARM_RELOC_HALF_SECTDIFF (9)
+@ CHECK: Value: 0x4
+@ CHECK: }
+@ CHECK: Relocation {
+@ CHECK: Offset: 0xFFFC
+@ CHECK: PCRel: 0
+@ CHECK: Length: 3
+@ CHECK: Type: ARM_RELOC_PAIR (1)
+@ CHECK: Value: 0x4
+@ CHECK: }
+@ CHECK: }
+@ CHECK: ]
diff --git a/test/MC/MachO/ARM/thumb2-movw-fixup.s b/test/MC/MachO/ARM/thumb2-movw-fixup.s
index 57973a874467..9c21d9b5b50e 100644
--- a/test/MC/MachO/ARM/thumb2-movw-fixup.s
+++ b/test/MC/MachO/ARM/thumb2-movw-fixup.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | llvm-readobj -relocations -expand-relocs | FileCheck %s
@ rdar://10038370
@@ -17,28 +17,66 @@
L1: .long 0
L2: .long 0
-@ CHECK: ('_relocations', [
-@ CHECK: # Relocation 0
-@ CHECK: (('word-0', 0xc),
-@ CHECK: ('word-1', 0x86000002)),
-@ CHECK: # Relocation 1
-@ CHECK: (('word-0', 0x1184),
-@ CHECK: ('word-1', 0x16ffffff)),
-@ CHECK: # Relocation 2
-@ CHECK: (('word-0', 0x8),
-@ CHECK: ('word-1', 0x84000002)),
-@ CHECK: # Relocation 3
-@ CHECK: (('word-0', 0x1),
-@ CHECK: ('word-1', 0x14ffffff)),
-@ CHECK: # Relocation 4
-@ CHECK: (('word-0', 0x4),
-@ CHECK: ('word-1', 0x86000002)),
-@ CHECK: # Relocation 5
-@ CHECK: (('word-0', 0x1180),
-@ CHECK: ('word-1', 0x16ffffff)),
-@ CHECK: # Relocation 6
-@ CHECK: (('word-0', 0x0),
-@ CHECK: ('word-1', 0x84000002)),
-@ CHECK: # Relocation 7
-@ CHECK: (('word-0', 0x1),
-@ CHECK: ('word-1', 0x14ffffff)),
+@ CHECK: Format: Mach-O arm
+@ CHECK: Arch: arm
+@ CHECK: AddressSize: 32bit
+@ CHECK: Relocations [
+@ CHECK: Section __text {
+@ CHECK: Relocation {
+@ CHECK: Offset: 0xC
+@ CHECK: PCRel: 0
+@ CHECK: Length: 3
+@ CHECK: Type: ARM_RELOC_HALF (8)
+@ CHECK: Section: __data (2)
+@ CHECK: }
+@ CHECK: Relocation {
+@ CHECK: Offset: 0x1184
+@ CHECK: PCRel: 0
+@ CHECK: Length: 3
+@ CHECK: Type: ARM_RELOC_PAIR (1)
+@ CHECK: Section: - (16777215)
+@ CHECK: }
+@ CHECK: Relocation {
+@ CHECK: Offset: 0x8
+@ CHECK: PCRel: 0
+@ CHECK: Length: 2
+@ CHECK: Type: ARM_RELOC_HALF (8)
+@ CHECK: Section: __data (2)
+@ CHECK: }
+@ CHECK: Relocation {
+@ CHECK: Offset: 0x1
+@ CHECK: PCRel: 0
+@ CHECK: Length: 2
+@ CHECK: Type: ARM_RELOC_PAIR (1)
+@ CHECK: Section: - (16777215)
+@ CHECK: }
+@ CHECK: Relocation {
+@ CHECK: Offset: 0x4
+@ CHECK: PCRel: 0
+@ CHECK: Length: 3
+@ CHECK: Type: ARM_RELOC_HALF (8)
+@ CHECK: Section: __data (2)
+@ CHECK: }
+@ CHECK: Relocation {
+@ CHECK: Offset: 0x1180
+@ CHECK: PCRel: 0
+@ CHECK: Length: 3
+@ CHECK: Type: ARM_RELOC_PAIR (1)
+@ CHECK: Section: - (16777215)
+@ CHECK: }
+@ CHECK: Relocation {
+@ CHECK: Offset: 0x0
+@ CHECK: PCRel: 0
+@ CHECK: Length: 2
+@ CHECK: Type: ARM_RELOC_HALF (8)
+@ CHECK: Section: __data (2)
+@ CHECK: }
+@ CHECK: Relocation {
+@ CHECK: Offset: 0x1
+@ CHECK: PCRel: 0
+@ CHECK: Length: 2
+@ CHECK: Type: ARM_RELOC_PAIR (1)
+@ CHECK: Section: - (16777215)
+@ CHECK: }
+@ CHECK: }
+@ CHECK: ]
diff --git a/test/MC/MachO/ARM/tvos-version-min-load-command.s b/test/MC/MachO/ARM/tvos-version-min-load-command.s
new file mode 100644
index 000000000000..3c9b237d6950
--- /dev/null
+++ b/test/MC/MachO/ARM/tvos-version-min-load-command.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple armv7-apple-tvos %s -filetype=obj -o - | llvm-readobj --macho-version-min | FileCheck %s
+
+
+// Test the formation of the version-min load command in the MachO.
+// use a nonsense but well formed version.
+.tvos_version_min 99,8,7
+
+// CHECK: MinVersion {
+// CHECK-NEXT: Cmd: LC_VERSION_MIN_TVOS
+// CHECK-NEXT: Size: 16
+// CHECK-NEXT: Version: 99.8.7
+// CHECK-NEXT: SDK: n/a
+// CHECK-NEXT: }
diff --git a/test/MC/MachO/ARM/version-min-diagnostics.s b/test/MC/MachO/ARM/version-min-diagnostics.s
index 15d44d31661a..76c3268dafbf 100644
--- a/test/MC/MachO/ARM/version-min-diagnostics.s
+++ b/test/MC/MachO/ARM/version-min-diagnostics.s
@@ -15,6 +15,16 @@
.macosx_version_min 10,-1,1
.macosx_version_min 0,1,1
.macosx_version_min 70000,1
+.tvos_version_min 99,2,257
+.tvos_version_min 50,256,1
+.tvos_version_min 10,-1,1
+.tvos_version_min 0,1,1
+.tvos_version_min 70000,1
+.watchos_version_min 99,2,257
+.watchos_version_min 50,256,1
+.watchos_version_min 10,-1,1
+.watchos_version_min 0,1,1
+.watchos_version_min 70000,1
// CHECK: error: invalid OS update number
@@ -47,3 +57,33 @@
// CHECK: error: invalid OS major version number
// CHECK: .macosx_version_min 70000,1
// CHECK: ^
+// CHECK: error: invalid OS update number
+// CHECK: .tvos_version_min 99,2,257
+// CHECK: ^
+// CHECK: error: invalid OS minor version number
+// CHECK: .tvos_version_min 50,256,1
+// CHECK: ^
+// CHECK: error: invalid OS minor version number
+// CHECK: .tvos_version_min 10,-1,1
+// CHECK: ^
+// CHECK: error: invalid OS major version number
+// CHECK: .tvos_version_min 0,1,1
+// CHECK: ^
+// CHECK: error: invalid OS major version number
+// CHECK: .tvos_version_min 70000,1
+// CHECK: ^
+// CHECK: error: invalid OS update number
+// CHECK: .watchos_version_min 99,2,257
+// CHECK: ^
+// CHECK: error: invalid OS minor version number
+// CHECK: .watchos_version_min 50,256,1
+// CHECK: ^
+// CHECK: error: invalid OS minor version number
+// CHECK: .watchos_version_min 10,-1,1
+// CHECK: ^
+// CHECK: error: invalid OS major version number
+// CHECK: .watchos_version_min 0,1,1
+// CHECK: ^
+// CHECK: error: invalid OS major version number
+// CHECK: .watchos_version_min 70000,1
+// CHECK: ^
diff --git a/test/MC/MachO/ARM/version-min-diagnostics2.s b/test/MC/MachO/ARM/version-min-diagnostics2.s
new file mode 100644
index 000000000000..0689cd41f704
--- /dev/null
+++ b/test/MC/MachO/ARM/version-min-diagnostics2.s
@@ -0,0 +1,34 @@
+// RUN: llvm-mc -triple i386-apple-ios %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=IOS
+// RUN: llvm-mc -triple i386-apple-watchos %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=WATCHOS
+// RUN: llvm-mc -triple i386-apple-tvos %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=TVOS
+// RUN: llvm-mc -triple i386-apple-macosx %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=MACOSX
+
+.ios_version_min 1,2,3
+// WATCHOS: version-min-diagnostics2.s:[[@LINE-1]]:1: warning: .ios_version_min should only be used for ios targets
+// TVOS: version-min-diagnostics2.s:[[@LINE-2]]:1: warning: .ios_version_min should only be used for ios targets
+// MACOSX: version-min-diagnostics2.s:[[@LINE-3]]:1: warning: .ios_version_min should only be used for ios targets
+// IOS-NOT: warning: .ios_version_min should only be used for ios targets
+
+.macosx_version_min 4,5,6
+// WATCHOS: version-min-diagnostics2.s:[[@LINE-1]]:1: warning: .macosx_version_min should only be used for macosx targets
+// TVOS: version-min-diagnostics2.s:[[@LINE-2]]:1: warning: .macosx_version_min should only be used for macosx targets
+// IOS: version-min-diagnostics2.s:[[@LINE-3]]:1: warning: .macosx_version_min should only be used for macosx targets
+// MACOSX-NOT: warning: .macosx_version_min should only be used for macosx targets
+// CHECK: version-min-diagnostics2.s:[[@LINE-5]]:1: warning: overriding previous version_min directive
+// CHECK: version-min-diagnostics2.s:[[@LINE-12]]:1: note: previous definition is here
+
+.tvos_version_min 7,8,9
+// WATCHOS: version-min-diagnostics2.s:[[@LINE-1]]:1: warning: .tvos_version_min should only be used for tvos targets
+// MACOSX: version-min-diagnostics2.s:[[@LINE-2]]:1: warning: .tvos_version_min should only be used for tvos targets
+// IOS: version-min-diagnostics2.s:[[@LINE-3]]:1: warning: .tvos_version_min should only be used for tvos targets
+// TVOS-NOT: warning: .tvos_version_min should only be used for tvos targets
+// CHECK: version-min-diagnostics2.s:[[@LINE-5]]:1: warning: overriding previous version_min directive
+// CHECK: version-min-diagnostics2.s:[[@LINE-14]]:1: note: previous definition is here
+
+.watchos_version_min 10,11,12
+// MACOSX: version-min-diagnostics2.s:[[@LINE-1]]:1: warning: .watchos_version_min should only be used for watchos targets
+// IOS: version-min-diagnostics2.s:[[@LINE-2]]:1: warning: .watchos_version_min should only be used for watchos targets
+// TVOS-NOT: warning: .tvos_version_min should only be used for tvos targets
+// WATCHOS-NOT: warning: .watchos_version_min should only be used for watchos targets
+// CHECK: version-min-diagnostics2.s:[[@LINE-5]]:1: warning: overriding previous version_min directive
+// CHECK: version-min-diagnostics2.s:[[@LINE-14]]:1: note: previous definition is here
diff --git a/test/MC/MachO/ARM/version-min.s b/test/MC/MachO/ARM/version-min.s
index 0a40338ed5e0..d4840db7b6bf 100644
--- a/test/MC/MachO/ARM/version-min.s
+++ b/test/MC/MachO/ARM/version-min.s
@@ -19,3 +19,19 @@
// CHECK: .macosx_version_min 10, 2
// CHECK: .macosx_version_min 10, 8, 1
// CHECK: .macosx_version_min 2, 0
+
+.tvos_version_min 5,2,0
+.tvos_version_min 3,2,1
+.tvos_version_min 5,0
+
+// CHECK: .tvos_version_min 5, 2
+// CHECK: .tvos_version_min 3, 2, 1
+// CHECK: .tvos_version_min 5, 0
+
+.watchos_version_min 5,2,0
+.watchos_version_min 3,2,1
+.watchos_version_min 5,0
+
+// CHECK: .watchos_version_min 5, 2
+// CHECK: .watchos_version_min 3, 2, 1
+// CHECK: .watchos_version_min 5, 0
diff --git a/test/MC/MachO/ARM/watchos-version-min-load-command.s b/test/MC/MachO/ARM/watchos-version-min-load-command.s
new file mode 100644
index 000000000000..3df38fb6a179
--- /dev/null
+++ b/test/MC/MachO/ARM/watchos-version-min-load-command.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple armv7k-apple-watchos %s -filetype=obj -o - | llvm-readobj --macho-version-min | FileCheck %s
+
+
+// Test the formation of the version-min load command in the MachO.
+// use a nonsense but well formed version.
+.watchos_version_min 99,8,7
+
+// CHECK: MinVersion {
+// CHECK-NEXT: Cmd: LC_VERSION_MIN_WATCHOS
+// CHECK-NEXT: Size: 16
+// CHECK-NEXT: Version: 99.8.7
+// CHECK-NEXT: SDK: n/a
+// CHECK-NEXT: }
diff --git a/test/MC/MachO/PowerPC/coal-sections-powerpc.s b/test/MC/MachO/PowerPC/coal-sections-powerpc.s
new file mode 100644
index 000000000000..1c3dc37d5bd7
--- /dev/null
+++ b/test/MC/MachO/PowerPC/coal-sections-powerpc.s
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -triple powerpc-apple-darwin8 -arch=ppc32 -filetype=obj %s -o - | llvm-readobj -sections | FileCheck %s
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 0
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 1
+// CHECK-NEXT: Name: __textcoal_nt (
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 2
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 3
+// CHECK-NEXT: Name: __const_coal (
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 4
+// CHECK-NEXT: Name: __datacoal_nt (
+
+ .section __TEXT,__text,regular,pure_instructions
+ .machine ppc
+ .section __TEXT,__textcoal_nt,coalesced,pure_instructions
+ .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
+ .section __TEXT,__text,regular,pure_instructions
+ .section __TEXT,__textcoal_nt,coalesced,pure_instructions
+ .globl _foo
+ .weak_definition _foo
+ .align 4
+_foo:
+ blr
+
+.subsections_via_symbols
+ .section __TEXT,__const_coal,coalesced
+ .globl _a ; @a
+ .weak_definition _a
+ .align 4
+_a:
+ .long 1 ; 0x1
+
+ .section __DATA,__datacoal_nt,coalesced
+ .globl _b ; @b
+ .weak_definition _b
+ .align 2
+_b:
+ .long 5 ; 0x5
diff --git a/test/MC/MachO/PowerPC/lit.local.cfg b/test/MC/MachO/PowerPC/lit.local.cfg
new file mode 100644
index 000000000000..091332439b18
--- /dev/null
+++ b/test/MC/MachO/PowerPC/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'PowerPC' in config.root.targets:
+ config.unsupported = True
diff --git a/test/MC/MachO/absolute.s b/test/MC/MachO/absolute.s
index 0b22afb1b4d0..36a0ae5ec5bb 100644
--- a/test/MC/MachO/absolute.s
+++ b/test/MC/MachO/absolute.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
_bar:
nop
@@ -17,142 +17,159 @@ foo_equals2 = (_foo - _bar + 0xffff0000)
.globl foo_set2_global;
.set foo_set2_global, (_foo - _bar + 0xffff0000)
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 256)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 25)
-// CHECK: ('size', 152)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 2)
-// CHECK: ('file_offset', 288)
-// CHECK: ('file_size', 2)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 1)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 2)
-// CHECK: ('offset', 288)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000400)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 292)
-// CHECK: ('nsyms', 8)
-// CHECK: ('stroff', 420)
-// CHECK: ('strsize', 84)
-// CHECK: ('_string_data', '\x00foo_equals\x00_bar\x00_foo\x00foo_set2_global\x00foo_set1_global\x00foo_set2\x00foo_equals2\x00foo_set1\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 12)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_bar')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 17)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 1)
-// CHECK: ('_string', '_foo')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 75)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 32)
-// CHECK: ('n_value', 4294901761)
-// CHECK: ('_string', 'foo_set1')
-// CHECK: ),
-// CHECK: # Symbol 3
-// CHECK: (('n_strx', 54)
-// CHECK: ('n_type', 0x2)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 32)
-// CHECK: ('n_value', 4294901761)
-// CHECK: ('_string', 'foo_set2')
-// CHECK: ),
-// CHECK: # Symbol 4
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 4294901761)
-// CHECK: ('_string', 'foo_equals')
-// CHECK: ),
-// CHECK: # Symbol 5
-// CHECK: (('n_strx', 63)
-// CHECK: ('n_type', 0x2)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 4294901761)
-// CHECK: ('_string', 'foo_equals2')
-// CHECK: ),
-// CHECK: # Symbol 6
-// CHECK: (('n_strx', 38)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 32)
-// CHECK: ('n_value', 4294901761)
-// CHECK: ('_string', 'foo_set1_global')
-// CHECK: ),
-// CHECK: # Symbol 7
-// CHECK: (('n_strx', 22)
-// CHECK: ('n_type', 0x3)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 32)
-// CHECK: ('n_value', 4294901761)
-// CHECK: ('_string', 'foo_set2_global')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 6)
-// CHECK: ('iextdefsym', 6)
-// CHECK: ('nextdefsym', 2)
-// CHECK: ('iundefsym', 8)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 272
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x2
+// CHECK: Offset: 304
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800004)
+// CHECK: PureInstructions (0x800000)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _bar (12)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _foo (17)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x1
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: foo_set1 (75)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0xFFFF0001
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: foo_set2 (54)
+// CHECK: Type: Abs (0x2)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0xFFFF0001
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: foo_equals (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0xFFFF0001
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: foo_equals2 (63)
+// CHECK: Type: Abs (0x2)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0xFFFF0001
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: foo_set1_global (38)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0xFFFF0001
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: foo_set2_global (22)
+// CHECK: Extern
+// CHECK: Type: Abs (0x2)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0xFFFF0001
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 152
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x2
+// CHECK: fileoff: 304
+// CHECK: filesize: 2
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 1
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 6
+// CHECK: iextdefsym: 6
+// CHECK: nextdefsym: 2
+// CHECK: iundefsym: 8
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/absolutize.s b/test/MC/MachO/absolutize.s
index 8947c0f65e53..1f5ed32ff72d 100644
--- a/test/MC/MachO/absolutize.s
+++ b/test/MC/MachO/absolutize.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
_text_a:
xorl %eax,%eax
@@ -47,143 +47,151 @@ Ldata_expr_2 = Ldata_d - Ldata_c
.long _data_a + Ldata_expr_0
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 192)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 87)
-// CHECK: ('file_offset', 324)
-// CHECK: ('file_size', 87)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 43)
-// CHECK: ('offset', 324)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 412)
-// CHECK: ('num_reloc', 3)
-// CHECK: ('flags', 0x80000400)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0xa0000027),
-// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0xa4000009),
-// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 2
-// CHECK: (('word-0', 0xa1000000),
-// CHECK: ('word-1', 0x2)),
-// CHECK: ])
-// CHECK: ('_section_data', '31c031c0 31c031c0 b8feffff ffb8feff ffffb802 000000b8 02000000 b8020000 00b80200 0000b8fe ffffff')
-// CHECK: # Section 1
-// CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 43)
-// CHECK: ('size', 44)
-// CHECK: ('offset', 367)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 436)
-// CHECK: ('num_reloc', 3)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0xa0000028),
-// CHECK: ('word-1', 0x2b)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0xa4000010),
-// CHECK: ('word-1', 0x2b)),
-// CHECK: # Relocation 2
-// CHECK: (('word-0', 0xa1000000),
-// CHECK: ('word-1', 0x2f)),
-// CHECK: ])
-// CHECK: ('_section_data', '00000000 00000000 00000000 00000000 fcffffff fcffffff 04000000 04000000 04000000 04000000 27000000')
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 460)
-// CHECK: ('nsyms', 4)
-// CHECK: ('stroff', 508)
-// CHECK: ('strsize', 36)
-// CHECK: ('_string_data', '\x00_text_b\x00_data_b\x00_text_a\x00_data_a\x00\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 17)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_text_a')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 2)
-// CHECK: ('_string', '_text_b')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 25)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 43)
-// CHECK: ('_string', '_data_a')
-// CHECK: ),
-// CHECK: # Symbol 3
-// CHECK: (('n_strx', 9)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 47)
-// CHECK: ('_string', '_data_b')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 4)
-// CHECK: ('iextdefsym', 4)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 4)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 312
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x2B
+// CHECK: Offset: 340
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x1AC
+// CHECK: RelocationCount: 3
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800004)
+// CHECK: PureInstructions (0x800000)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 31C031C0 31C031C0 B8FEFFFF FFB8FEFF |1.1.1.1.........|
+// CHECK: 0010: FFFFB802 000000B8 02000000 B8020000 |................|
+// CHECK: 0020: 00B80200 0000B8FE FFFFFF |...........|
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x2B
+// CHECK: Size: 0x2C
+// CHECK: Offset: 383
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x1C4
+// CHECK: RelocationCount: 3
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 00000000 00000000 00000000 00000000 |................|
+// CHECK: 0010: FCFFFFFF FCFFFFFF 04000000 04000000 |................|
+// CHECK: 0020: 04000000 04000000 27000000 |........'...|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: Section __text {
+// CHECK: 0x27 0 2 n/a GENERIC_RELOC_VANILLA 1 0x0
+// CHECK: 0x9 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x0
+// CHECK: 0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x2
+// CHECK: }
+// CHECK: Section __data {
+// CHECK: 0x28 0 2 n/a GENERIC_RELOC_VANILLA 1 0x2B
+// CHECK: 0x10 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x2B
+// CHECK: 0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x2F
+// CHECK: }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _text_a (17)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _text_b (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x2
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _data_a (25)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x2B
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _data_b (9)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x2F
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 192
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x57
+// CHECK: fileoff: 340
+// CHECK: filesize: 87
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 4
+// CHECK: iextdefsym: 4
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 4
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s b/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s
index 1ccebc5124c3..36b5f9579242 100644
--- a/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s
+++ b/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s
@@ -2,4 +2,12 @@
// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
.quad _foo - _bar
-// CHECK-ERROR: unsupported relocation with subtraction expression
+// CHECK-ERROR: error: unsupported relocation with subtraction expression
+
+_Y:
+.long (_Y+4)-_b
+// CHECK-ERROR: error: unsupported relocation with subtraction expression, symbol '_b' can not be undefined in a subtraction expression
+
+_Z:
+.long (_a+4)-_Z
+// CHECK-ERROR: error: unsupported relocation with subtraction expression, symbol '_a' can not be undefined in a subtraction expression
diff --git a/test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s b/test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s
deleted file mode 100644
index 518ae6423dbc..000000000000
--- a/test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s
+++ /dev/null
@@ -1,6 +0,0 @@
-// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
-// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
-
-_Z:
-.long (_Z+4)-_b
-// CHECK-ERROR: error: unsupported relocation with subtraction expression, symbol '_b' can not be undefined in a subtraction expression
diff --git a/test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s b/test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s
deleted file mode 100644
index 3aefd87c557c..000000000000
--- a/test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s
+++ /dev/null
@@ -1,6 +0,0 @@
-// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
-// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
-
-_Z:
-.long (_a+4)-_Z
-// CHECK-ERROR: error: unsupported relocation with subtraction expression, symbol '_a' can not be undefined in a subtraction expression
diff --git a/test/MC/MachO/coal-sections-x86_64.s b/test/MC/MachO/coal-sections-x86_64.s
new file mode 100644
index 000000000000..5ecdc578dbb6
--- /dev/null
+++ b/test/MC/MachO/coal-sections-x86_64.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin -filetype=obj %s -o - 2>%t.err | llvm-readobj -sections | FileCheck %s
+// RUN: FileCheck --check-prefix=WARNING < %t.err %s
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 0
+// CHECK-NEXT: Name: __text (
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 1
+// CHECK-NEXT: Name: __textcoal_nt (
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 2
+// CHECK-NEXT: Name: __const_coal (
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 3
+// CHECK-NEXT: Name: __datacoal_nt (
+
+// WARNING: warning: section "__textcoal_nt" is deprecated
+// WARNING: note: change section name to "__text"
+// WARNING: warning: section "__const_coal" is deprecated
+// WARNING: note: change section name to "__const"
+// WARNING: warning: section "__datacoal_nt" is deprecated
+// WARNING: note: change section name to "__data"
+
+ .section __TEXT,__textcoal_nt,coalesced,pure_instructions
+ .globl _foo
+ .weak_definition _foo
+ .align 4, 0x90
+_foo:
+ retq
+
+ .section __TEXT,__const_coal,coalesced
+ .globl _a ## @a
+ .weak_definition _a
+ .align 4
+_a:
+ .long 1 ## 0x1
+
+ .section __DATA,__datacoal_nt,coalesced
+ .globl _b ## @b
+ .weak_definition _b
+ .align 2
+_b:
+ .long 5 ## 0x5
+
+.subsections_via_symbols
diff --git a/test/MC/MachO/comm-1.s b/test/MC/MachO/comm-1.s
index cb240f98e14f..1b64e94c2c3f 100644
--- a/test/MC/MachO/comm-1.s
+++ b/test/MC/MachO/comm-1.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
.comm sym_comm_B, 2
.comm sym_comm_A, 4
@@ -7,108 +7,119 @@
.no_dead_strip sym_comm_C
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 228)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 124)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 0)
-// CHECK: ('file_offset', 256)
-// CHECK: ('file_size', 0)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 1)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 256)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 256)
-// CHECK: ('nsyms', 4)
-// CHECK: ('stroff', 304)
-// CHECK: ('strsize', 48)
-// CHECK: ('_string_data', '\x00sym_comm_D\x00sym_comm_C\x00sym_comm_B\x00sym_comm_A\x00\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 34)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 4)
-// CHECK: ('_string', 'sym_comm_A')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 23)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 2)
-// CHECK: ('_string', 'sym_comm_B')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 12)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 544)
-// CHECK: ('n_value', 8)
-// CHECK: ('_string', 'sym_comm_C')
-// CHECK: ),
-// CHECK: # Symbol 3
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 768)
-// CHECK: ('n_value', 2)
-// CHECK: ('_string', 'sym_comm_D')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 0)
-// CHECK: ('iextdefsym', 0)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 0)
-// CHECK: ('nundefsym', 4)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 244
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 272
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: sym_comm_A (34)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x4
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_comm_B (23)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x2
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_comm_C (12)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x220)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0x8
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_comm_D (1)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x300)
+// CHECK: ]
+// CHECK: Value: 0x2
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 124
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x0
+// CHECK: fileoff: 272
+// CHECK: filesize: 0
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 1
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 0
+// CHECK: iextdefsym: 0
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 0
+// CHECK: nundefsym: 4
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/cstexpr-gotpcrel-64.ll b/test/MC/MachO/cstexpr-gotpcrel-64.ll
index bafddcb3db69..41abeb0179cc 100644
--- a/test/MC/MachO/cstexpr-gotpcrel-64.ll
+++ b/test/MC/MachO/cstexpr-gotpcrel-64.ll
@@ -1,6 +1,7 @@
; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t
; RUN: FileCheck %s -check-prefix=X86 < %t
; RUN: FileCheck %s -check-prefix=X86-GOT-EQUIV < %t
+; RUN: FileCheck %s -check-prefix=X86-NOGOT-EQUIV < %t
; GOT equivalent globals references can be replaced by the GOT entry of the
; final symbol instead.
@@ -86,10 +87,15 @@ define i32** @t1() {
}
; Do not crash when a pattern cannot be matched as a GOT equivalent
-
+define void @foo() {
+; X86-NOGOT-EQUIV-LABEL: _foo:
+; X86-NOGOT-EQUIV: leaq _b(%rip), %rax
+ store i8** @b, i8*** null
+ ret void
+}
@a = external global i8
@b = internal unnamed_addr constant i8* @a
-; X86-LABEL: _c:
-; X86: .quad _b
+; X86-NOGOT-EQUIV-LABEL: _c:
+; X86-NOGOT-EQUIV: .quad _b
@c = global i8** @b
diff --git a/test/MC/MachO/darwin-complex-difference.s b/test/MC/MachO/darwin-complex-difference.s
index f31d3ade33bc..84940ba68f89 100644
--- a/test/MC/MachO/darwin-complex-difference.s
+++ b/test/MC/MachO/darwin-complex-difference.s
@@ -1,5 +1,5 @@
// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o %t.o
-// RUN: macho-dump --dump-section-data < %t.o > %t.dump
+// RUN: llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols < %t.o > %t.dump
// RUN: FileCheck < %t.dump %s
_a:
@@ -15,115 +15,117 @@ _c:
_d:
.long 0
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 256)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 25)
-// CHECK: ('size', 152)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 24)
-// CHECK: ('file_offset', 288)
-// CHECK: ('file_size', 24)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 1)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 24)
-// CHECK: ('offset', 288)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 312)
-// CHECK: ('num_reloc', 4)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0xc),
-// CHECK: ('word-1', 0x5c000002)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0xc),
-// CHECK: ('word-1', 0xc000001)),
-// CHECK: # Relocation 2
-// CHECK: (('word-0', 0x8),
-// CHECK: ('word-1', 0x5c000002)),
-// CHECK: # Relocation 3
-// CHECK: (('word-0', 0x8),
-// CHECK: ('word-1', 0xc000001)),
-// CHECK: ])
-// CHECK: ('_section_data', '01000000 02000000 04000000 04000000 00000000 00000000')
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 344)
-// CHECK: ('nsyms', 3)
-// CHECK: ('stroff', 392)
-// CHECK: ('strsize', 12)
-// CHECK: ('_string_data', '\x00_d\x00_c\x00_a\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 7)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_a')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 4)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 16)
-// CHECK: ('_string', '_c')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 20)
-// CHECK: ('_string', '_d')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 3)
-// CHECK: ('iextdefsym', 3)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 3)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 272
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x18
+// CHECK: Offset: 304
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x148
+// CHECK: RelocationCount: 4
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 01000000 02000000 04000000 04000000 |................|
+// CHECK: 0010: 00000000 00000000 |........|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: Section __text {
+// CHECK: 0xC 0 2 1 X86_64_RELOC_SUBTRACTOR 0 _d
+// CHECK: 0xC 0 2 1 X86_64_RELOC_UNSIGNED 0 _c
+// CHECK: 0x8 0 2 1 X86_64_RELOC_SUBTRACTOR 0 _d
+// CHECK: 0x8 0 2 1 X86_64_RELOC_UNSIGNED 0 _c
+// CHECK: }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _a (7)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _c (4)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x10
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _d (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x14
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 152
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x18
+// CHECK: fileoff: 304
+// CHECK: filesize: 24
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 1
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 3
+// CHECK: iextdefsym: 3
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 3
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/darwin-version-min-load-command.s b/test/MC/MachO/darwin-version-min-load-command.s
new file mode 100644
index 000000000000..17f3784d6326
--- /dev/null
+++ b/test/MC/MachO/darwin-version-min-load-command.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -triple x86_64-apple-macosx10.10.0 %s -filetype=obj -o - | llvm-objdump -macho -private-headers - | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-ios8.0.0 %s -filetype=obj -o - | llvm-objdump -macho -private-headers - | FileCheck %s --check-prefix=CHECK-IOS
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-objdump -macho -private-headers - | FileCheck %s --check-prefix=CHECK-DARWIN
+
+// Test version-min load command should be inferred from triple and should always be generated on Darwin
+// CHECK: Load command
+// CHECK: cmd LC_VERSION_MIN_MACOSX
+// CHECK: cmdsize 16
+// CHECK: version 10.10
+
+// CHECK-IOS: Load command
+// CHECK-IOS: cmd LC_VERSION_MIN_IPHONEOS
+// CHECK-IOS: cmdsize 16
+// CHECK-IOS: version 8.0
+
+// CHECK-DARWIN-NOT: LC_VERSION_MIN
+
+
+// RUN: llvm-mc -triple x86_64-apple-watchos1.0.0 %s -filetype=obj -o - | llvm-objdump -macho -private-headers - | FileCheck %s --check-prefix=CHECK-WATCHOS
+// RUN: llvm-mc -triple x86_64-apple-tvos8.0.0 %s -filetype=obj -o - | llvm-objdump -macho -private-headers - | FileCheck %s --check-prefix=CHECK-TVOS
+// CHECK-WATCHOS: Load command
+// CHECK-WATCHOS: cmd LC_VERSION_MIN_WATCHOS
+// CHECK-WATCHOS-NEXT: cmdsize 16
+// CHECK-WATCHOS-NEXT: version 1.0
+
+// CHECK-TVOS: cmd LC_VERSION_MIN_TVOS
+// CHECK-TVOS-NEXT: cmdsize 16
+// CHECK-TVOS-NEXT: version 8.0
diff --git a/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s b/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s
index 49cfa418162c..7a7919786d41 100644
--- a/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s
+++ b/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
// Test case for rdar://10743265
@@ -17,11 +17,9 @@ _base = .
_start_ap_2:
cli
-// CHECK: ('_relocations', [
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0x0),
-// CHECK: ('word-1', 0x5c000000)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0x0),
-// CHECK: ('word-1', 0xc000001)),
-// CHECK: ])
+// CHECK: Relocations [
+// CHECK: Section __text {
+// CHECK: 0x0 0 2 1 X86_64_RELOC_SUBTRACTOR 0 _base
+// CHECK: 0x0 0 2 1 X86_64_RELOC_UNSIGNED 0 _start_ap_2
+// CHECK: }
+// CHECK: ]
diff --git a/test/MC/MachO/darwin-x86_64-diff-relocs.s b/test/MC/MachO/darwin-x86_64-diff-relocs.s
index eb28cf1af158..9d69a493dd83 100644
--- a/test/MC/MachO/darwin-x86_64-diff-relocs.s
+++ b/test/MC/MachO/darwin-x86_64-diff-relocs.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
.text
@@ -117,213 +117,201 @@ _g3:
L3:
xorl %eax,%eax
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 336)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 25)
-// CHECK: ('size', 232)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 236)
-// CHECK: ('file_offset', 368)
-// CHECK: ('file_size', 236)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 94)
-// CHECK: ('offset', 368)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 604)
-// CHECK: ('num_reloc', 12)
-// CHECK: ('flags', 0x80000400)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-
// FIXME: Unfortunately, we do not get these relocations in exactly the same
// order as Darwin 'as'. It turns out that 'as' *usually* ends up emitting
// them in reverse address order, but sometimes it allocates some
// additional relocations late so these end up precede the other entries. I
// haven't figured out the exact criteria for this yet.
-
-// CHECK: (('word-0', 0x56),
-// CHECK: ('word-1', 0x1d000004)),
-// CHECK: (('word-0', 0x50),
-// CHECK: ('word-1', 0x1d000004)),
-// CHECK: (('word-0', 0x4a),
-// CHECK: ('word-1', 0x1d000003)),
-// CHECK: (('word-0', 0x44),
-// CHECK: ('word-1', 0x1d000003)),
-// CHECK: (('word-0', 0x3e),
-// CHECK: ('word-1', 0x1d000002)),
-// CHECK: (('word-0', 0x38),
-// CHECK: ('word-1', 0x1d000002)),
-// CHECK: (('word-0', 0x20),
-// CHECK: ('word-1', 0x2d000004)),
-// CHECK: (('word-0', 0x1b),
-// CHECK: ('word-1', 0x2d000004)),
-// CHECK: (('word-0', 0x16),
-// CHECK: ('word-1', 0x2d000003)),
-// CHECK: (('word-0', 0x11),
-// CHECK: ('word-1', 0x2d000003)),
-// CHECK: (('word-0', 0xc),
-// CHECK: ('word-1', 0x2d000002)),
-// CHECK: (('word-0', 0x5),
-// CHECK: ('word-1', 0x2d000000)),
-// CHECK: ])
-// CHECK: # Section 1
-// CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 94)
-// CHECK: ('size', 142)
-// CHECK: ('offset', 462)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 700)
-// CHECK: ('num_reloc', 16)
-// CHECK: ('flags', 0x400)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0x7a),
-// CHECK: ('word-1', 0x5e000001)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0x7a),
-// CHECK: ('word-1', 0xe000002)),
-// CHECK: # Relocation 2
-// CHECK: (('word-0', 0x72),
-// CHECK: ('word-1', 0x5e000001)),
-// CHECK: # Relocation 3
-// CHECK: (('word-0', 0x72),
-// CHECK: ('word-1', 0xe000002)),
-// CHECK: # Relocation 4
-// CHECK: (('word-0', 0x62),
-// CHECK: ('word-1', 0xe000002)),
-// CHECK: # Relocation 5
-// CHECK: (('word-0', 0x5a),
-// CHECK: ('word-1', 0xe000002)),
-// CHECK: # Relocation 6
-// CHECK: (('word-0', 0x52),
-// CHECK: ('word-1', 0xe000001)),
-// CHECK: # Relocation 7
-// CHECK: (('word-0', 0x4a),
-// CHECK: ('word-1', 0xe000001)),
-// CHECK: # Relocation 8
-// CHECK: (('word-0', 0x3a),
-// CHECK: ('word-1', 0x5e000003)),
-// CHECK: # Relocation 9
-// CHECK: (('word-0', 0x3a),
-// CHECK: ('word-1', 0xe000004)),
-// CHECK: # Relocation 10
-// CHECK: (('word-0', 0x32),
-// CHECK: ('word-1', 0x5e000003)),
-// CHECK: # Relocation 11
-// CHECK: (('word-0', 0x32),
-// CHECK: ('word-1', 0xe000004)),
-// CHECK: # Relocation 12
-// CHECK: (('word-0', 0x22),
-// CHECK: ('word-1', 0xe000004)),
-// CHECK: # Relocation 13
-// CHECK: (('word-0', 0x1a),
-// CHECK: ('word-1', 0xe000004)),
-// CHECK: # Relocation 14
-// CHECK: (('word-0', 0x12),
-// CHECK: ('word-1', 0xe000003)),
-// CHECK: # Relocation 15
-// CHECK: (('word-0', 0xa),
-// CHECK: ('word-1', 0xe000003)),
-// CHECK: ])
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 828)
-// CHECK: ('nsyms', 5)
-// CHECK: ('stroff', 908)
-// CHECK: ('strsize', 24)
-// CHECK: ('_string_data', '\x00_foo\x00_g3\x00_g2\x00_g1\x00_g0\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_foo')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 18)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 2)
-// CHECK: ('_string', '_g0')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 14)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 90)
-// CHECK: ('_string', '_g1')
-// CHECK: ),
-// CHECK: # Symbol 3
-// CHECK: (('n_strx', 10)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 94)
-// CHECK: ('_string', '_g2')
-// CHECK: ),
-// CHECK: # Symbol 4
-// CHECK: (('n_strx', 6)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 232)
-// CHECK: ('_string', '_g3')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 5)
-// CHECK: ('iextdefsym', 5)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 5)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 352
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x5E
+// CHECK: Offset: 384
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x26C
+// CHECK: RelocationCount: 12
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800004)
+// CHECK: PureInstructions (0x800000)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 31C031C0 E9040000 00EBF9E9 00000000 |1.1.............|
+// CHECK: 0010: E9000000 00E90200 0000E900 000000E9 |................|
+// CHECK: 0020: 02000000 89050400 00008905 D2FFFFFF |................|
+// CHECK: 0030: 8905CEFF FFFF8905 00000000 89050200 |................|
+// CHECK: 0040: 00008905 00000000 89050200 00008905 |................|
+// CHECK: 0050: 00000000 89050200 000031C0 31C0 |..........1.1.|
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x5E
+// CHECK: Size: 0x8E
+// CHECK: Offset: 478
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x2CC
+// CHECK: RelocationCount: 16
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x4)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 31C00400 00000000 00000000 00000000 |1...............|
+// CHECK: 0010: 00000200 00000000 00000000 00000000 |................|
+// CHECK: 0020: 00000200 00000000 00000200 00000000 |................|
+// CHECK: 0030: 00000000 00000000 00000200 00000000 |................|
+// CHECK: 0040: 00000200 00000000 00000000 00000000 |................|
+// CHECK: 0050: 00000200 00000000 00000000 00000000 |................|
+// CHECK: 0060: 00000200 00000000 00000200 00000000 |................|
+// CHECK: 0070: 00000000 00000000 00000200 00000000 |................|
+// CHECK: 0080: 00000200 00000000 000031C0 31C0 |..........1.1.|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: Section __text {
+// CHECK: 0x56 1 2 1 X86_64_RELOC_SIGNED 0 _g3
+// CHECK: 0x50 1 2 1 X86_64_RELOC_SIGNED 0 _g3
+// CHECK: 0x4A 1 2 1 X86_64_RELOC_SIGNED 0 _g2
+// CHECK: 0x44 1 2 1 X86_64_RELOC_SIGNED 0 _g2
+// CHECK: 0x3E 1 2 1 X86_64_RELOC_SIGNED 0 _g1
+// CHECK: 0x38 1 2 1 X86_64_RELOC_SIGNED 0 _g1
+// CHECK: 0x20 1 2 1 X86_64_RELOC_BRANCH 0 _g3
+// CHECK: 0x1B 1 2 1 X86_64_RELOC_BRANCH 0 _g3
+// CHECK: 0x16 1 2 1 X86_64_RELOC_BRANCH 0 _g2
+// CHECK: 0x11 1 2 1 X86_64_RELOC_BRANCH 0 _g2
+// CHECK: 0xC 1 2 1 X86_64_RELOC_BRANCH 0 _g1
+// CHECK: 0x5 1 2 1 X86_64_RELOC_BRANCH 0 _foo
+// CHECK: }
+// CHECK: Section __data {
+// CHECK: 0x7A 0 3 1 X86_64_RELOC_SUBTRACTOR 0 _g0
+// CHECK: 0x7A 0 3 1 X86_64_RELOC_UNSIGNED 0 _g1
+// CHECK: 0x72 0 3 1 X86_64_RELOC_SUBTRACTOR 0 _g0
+// CHECK: 0x72 0 3 1 X86_64_RELOC_UNSIGNED 0 _g1
+// CHECK: 0x62 0 3 1 X86_64_RELOC_UNSIGNED 0 _g1
+// CHECK: 0x5A 0 3 1 X86_64_RELOC_UNSIGNED 0 _g1
+// CHECK: 0x52 0 3 1 X86_64_RELOC_UNSIGNED 0 _g0
+// CHECK: 0x4A 0 3 1 X86_64_RELOC_UNSIGNED 0 _g0
+// CHECK: 0x3A 0 3 1 X86_64_RELOC_SUBTRACTOR 0 _g2
+// CHECK: 0x3A 0 3 1 X86_64_RELOC_UNSIGNED 0 _g3
+// CHECK: 0x32 0 3 1 X86_64_RELOC_SUBTRACTOR 0 _g2
+// CHECK: 0x32 0 3 1 X86_64_RELOC_UNSIGNED 0 _g3
+// CHECK: 0x22 0 3 1 X86_64_RELOC_UNSIGNED 0 _g3
+// CHECK: 0x1A 0 3 1 X86_64_RELOC_UNSIGNED 0 _g3
+// CHECK: 0x12 0 3 1 X86_64_RELOC_UNSIGNED 0 _g2
+// CHECK: 0xA 0 3 1 X86_64_RELOC_UNSIGNED 0 _g2
+// CHECK: }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _foo (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _g0 (18)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x2
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _g1 (14)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x5A
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _g2 (10)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x5E
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _g3 (6)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0xE8
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 232
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0xEC
+// CHECK: fileoff: 384
+// CHECK: filesize: 236
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 5
+// CHECK: iextdefsym: 5
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 5
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/darwin-x86_64-nobase-relocs.s b/test/MC/MachO/darwin-x86_64-nobase-relocs.s
index a90b3e4d0962..857c3541d652 100644
--- a/test/MC/MachO/darwin-x86_64-nobase-relocs.s
+++ b/test/MC/MachO/darwin-x86_64-nobase-relocs.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -n -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -n -triple x86_64-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -macho-segment | FileCheck %s
// Test case for rdar://10062261
@@ -15,44 +15,58 @@ Lbar:
mov $1, %eax
ret
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 152)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 25)
-// CHECK: ('size', 152)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 17)
-// CHECK: ('file_offset', 184)
-// CHECK: ('file_size', 17)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 1)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 17)
-// CHECK: ('offset', 184)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000400)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '90eb080f 1f40000f 1f4000b8 01000000 c3')
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 2
+// CHECK: SizeOfLoadCommands: 168
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x11
+// CHECK: Offset: 200
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800004)
+// CHECK: PureInstructions (0x800000)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 90EB080F 1F40000F 1F4000B8 01000000 |.....@...@......|
+// CHECK: 0010: C3 |.|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 152
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x11
+// CHECK: fileoff: 200
+// CHECK: filesize: 17
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 1
+// CHECK: flags: 0x0
+// CHECK: }
diff --git a/test/MC/MachO/darwin-x86_64-reloc-offsets.s b/test/MC/MachO/darwin-x86_64-reloc-offsets.s
index f748064b2bf9..e7f0c5baf8df 100644
--- a/test/MC/MachO/darwin-x86_64-reloc-offsets.s
+++ b/test/MC/MachO/darwin-x86_64-reloc-offsets.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
.data
@@ -114,230 +114,178 @@ L1:
movl %eax, L1 + 3(%rip)
movl %eax, L1 + 4(%rip)
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 336)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 25)
-// CHECK: ('size', 232)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 358)
-// CHECK: ('file_offset', 368)
-// CHECK: ('file_size', 358)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 318)
-// CHECK: ('offset', 368)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 728)
-// CHECK: ('num_reloc', 42)
-// CHECK: ('flags', 0x80000400)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0x13a),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0x134),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 2
-// CHECK: (('word-0', 0x12e),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 3
-// CHECK: (('word-0', 0x128),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 4
-// CHECK: (('word-0', 0x122),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 5
-// CHECK: (('word-0', 0x11c),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 6
-// CHECK: (('word-0', 0x116),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 7
-// CHECK: (('word-0', 0x10c),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 8
-// CHECK: (('word-0', 0x102),
-// CHECK: ('word-1', 0x6d000000)),
-// CHECK: # Relocation 9
-// CHECK: (('word-0', 0xf8),
-// CHECK: ('word-1', 0x7d000000)),
-// CHECK: # Relocation 10
-// CHECK: (('word-0', 0xee),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 11
-// CHECK: (('word-0', 0xe4),
-// CHECK: ('word-1', 0x8d000000)),
-// CHECK: # Relocation 12
-// CHECK: (('word-0', 0xdd),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 13
-// CHECK: (('word-0', 0xd6),
-// CHECK: ('word-1', 0x6d000000)),
-// CHECK: # Relocation 14
-// CHECK: (('word-0', 0xd0),
-// CHECK: ('word-1', 0x15000002)),
-// CHECK: # Relocation 15
-// CHECK: (('word-0', 0xca),
-// CHECK: ('word-1', 0x15000002)),
-// CHECK: # Relocation 16
-// CHECK: (('word-0', 0xc4),
-// CHECK: ('word-1', 0x15000002)),
-// CHECK: # Relocation 17
-// CHECK: (('word-0', 0xbe),
-// CHECK: ('word-1', 0x15000002)),
-// CHECK: # Relocation 18
-// CHECK: (('word-0', 0xb8),
-// CHECK: ('word-1', 0x15000002)),
-// CHECK: # Relocation 19
-// CHECK: (('word-0', 0xb2),
-// CHECK: ('word-1', 0x15000002)),
-// CHECK: # Relocation 20
-// CHECK: (('word-0', 0xac),
-// CHECK: ('word-1', 0x15000002)),
-// CHECK: # Relocation 21
-// CHECK: (('word-0', 0xa2),
-// CHECK: ('word-1', 0x15000002)),
-// CHECK: # Relocation 22
-// CHECK: (('word-0', 0x98),
-// CHECK: ('word-1', 0x65000002)),
-// CHECK: # Relocation 23
-// CHECK: (('word-0', 0x8e),
-// CHECK: ('word-1', 0x75000002)),
-// CHECK: # Relocation 24
-// CHECK: (('word-0', 0x84),
-// CHECK: ('word-1', 0x15000002)),
-// CHECK: # Relocation 25
-// CHECK: (('word-0', 0x7a),
-// CHECK: ('word-1', 0x85000002)),
-// CHECK: # Relocation 26
-// CHECK: (('word-0', 0x73),
-// CHECK: ('word-1', 0x15000002)),
-// CHECK: # Relocation 27
-// CHECK: (('word-0', 0x6c),
-// CHECK: ('word-1', 0x65000002)),
-// CHECK: # Relocation 28
-// CHECK: (('word-0', 0x66),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 29
-// CHECK: (('word-0', 0x60),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 30
-// CHECK: (('word-0', 0x5a),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 31
-// CHECK: (('word-0', 0x54),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 32
-// CHECK: (('word-0', 0x4e),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 33
-// CHECK: (('word-0', 0x48),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 34
-// CHECK: (('word-0', 0x42),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 35
-// CHECK: (('word-0', 0x38),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 36
-// CHECK: (('word-0', 0x2e),
-// CHECK: ('word-1', 0x6d000000)),
-// CHECK: # Relocation 37
-// CHECK: (('word-0', 0x24),
-// CHECK: ('word-1', 0x7d000000)),
-// CHECK: # Relocation 38
-// CHECK: (('word-0', 0x1a),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 39
-// CHECK: (('word-0', 0x10),
-// CHECK: ('word-1', 0x8d000000)),
-// CHECK: # Relocation 40
-// CHECK: (('word-0', 0x9),
-// CHECK: ('word-1', 0x1d000000)),
-// CHECK: # Relocation 41
-// CHECK: (('word-0', 0x2),
-// CHECK: ('word-1', 0x6d000000)),
-// CHECK: ])
-// CHECK: ('_section_data', 'c605ffff ffff12c6 05000000 0012c705 fcffffff 78563412 c705fdff ffff7856 3412c705 feffffff 78563412 c705ffff ffff7856 3412c705 00000000 78563412 88050000 00008805 01000000 89050000 00008905 01000000 89050200 00008905 03000000 89050400 0000c605 dd000000 12c605d7 00000012 c705cc00 00007856 3412c705 c3000000 78563412 c705ba00 00007856 3412c705 b1000000 78563412 c705a800 00007856 34128805 9e000000 88059900 00008905 92000000 89058d00 00008905 88000000 89058300 00008905 7e000000 c6050300 000012c6 05040000 0012c705 00000000 78563412 c7050100 00007856 3412c705 02000000 78563412 c7050300 00007856 3412c705 04000000 78563412 88050400 00008805 05000000 89050400 00008905 05000000 89050600 00008905 07000000 89050800 0000')
-// CHECK: # Section 1
-// CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 318)
-// CHECK: ('size', 40)
-// CHECK: ('offset', 686)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000')
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 1064)
-// CHECK: ('nsyms', 1)
-// CHECK: ('stroff', 1080)
-// CHECK: ('strsize', 4)
-// CHECK: ('_string_data', '\x00_d\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 350)
-// CHECK: ('_string', '_d')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 1)
-// CHECK: ('iextdefsym', 1)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 1)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 352
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x13E
+// CHECK: Offset: 384
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x2E8
+// CHECK: RelocationCount: 42
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800004)
+// CHECK: PureInstructions (0x800000)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: C605FFFF FFFF12C6 05000000 0012C705 |................|
+// CHECK: 0010: FCFFFFFF 78563412 C705FDFF FFFF7856 |....xV4.......xV|
+// CHECK: 0020: 3412C705 FEFFFFFF 78563412 C705FFFF |4.......xV4.....|
+// CHECK: 0030: FFFF7856 3412C705 00000000 78563412 |..xV4.......xV4.|
+// CHECK: 0040: 88050000 00008805 01000000 89050000 |................|
+// CHECK: 0050: 00008905 01000000 89050200 00008905 |................|
+// CHECK: 0060: 03000000 89050400 0000C605 DD000000 |................|
+// CHECK: 0070: 12C605D7 00000012 C705CC00 00007856 |..............xV|
+// CHECK: 0080: 3412C705 C3000000 78563412 C705BA00 |4.......xV4.....|
+// CHECK: 0090: 00007856 3412C705 B1000000 78563412 |..xV4.......xV4.|
+// CHECK: 00A0: C705A800 00007856 34128805 9E000000 |......xV4.......|
+// CHECK: 00B0: 88059900 00008905 92000000 89058D00 |................|
+// CHECK: 00C0: 00008905 88000000 89058300 00008905 |................|
+// CHECK: 00D0: 7E000000 C6050300 000012C6 05040000 |~...............|
+// CHECK: 00E0: 0012C705 00000000 78563412 C7050100 |........xV4.....|
+// CHECK: 00F0: 00007856 3412C705 02000000 78563412 |..xV4.......xV4.|
+// CHECK: 0100: C7050300 00007856 3412C705 04000000 |......xV4.......|
+// CHECK: 0110: 78563412 88050400 00008805 05000000 |xV4.............|
+// CHECK: 0120: 89050400 00008905 05000000 89050600 |................|
+// CHECK: 0130: 00008905 07000000 89050800 0000 |..............|
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x13E
+// CHECK: Size: 0x28
+// CHECK: Offset: 702
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 00000000 00000000 00000000 00000000 |................|
+// CHECK: 0010: 00000000 00000000 00000000 00000000 |................|
+// CHECK: 0020: 00000000 00000000 |........|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: Section __text {
+// CHECK: 0x13A 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x134 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x12E 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x128 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x122 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x11C 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x116 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x10C 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x102 1 2 1 X86_64_RELOC_SIGNED_1 0 _d
+// CHECK: 0xF8 1 2 1 X86_64_RELOC_SIGNED_2 0 _d
+// CHECK: 0xEE 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0xE4 1 2 1 X86_64_RELOC_SIGNED_4 0 _d
+// CHECK: 0xDD 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0xD6 1 2 1 X86_64_RELOC_SIGNED_1 0 _d
+// CHECK: 0xD0 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK: 0xCA 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK: 0xC4 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK: 0xBE 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK: 0xB8 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK: 0xB2 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK: 0xAC 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK: 0xA2 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK: 0x98 1 2 0 X86_64_RELOC_SIGNED_1 0 __data
+// CHECK: 0x8E 1 2 0 X86_64_RELOC_SIGNED_2 0 __data
+// CHECK: 0x84 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK: 0x7A 1 2 0 X86_64_RELOC_SIGNED_4 0 __data
+// CHECK: 0x73 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK: 0x6C 1 2 0 X86_64_RELOC_SIGNED_1 0 __data
+// CHECK: 0x66 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x60 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x5A 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x54 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x4E 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x48 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x42 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x38 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x2E 1 2 1 X86_64_RELOC_SIGNED_1 0 _d
+// CHECK: 0x24 1 2 1 X86_64_RELOC_SIGNED_2 0 _d
+// CHECK: 0x1A 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x10 1 2 1 X86_64_RELOC_SIGNED_4 0 _d
+// CHECK: 0x9 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK: 0x2 1 2 1 X86_64_RELOC_SIGNED_1 0 _d
+// CHECK: }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _d (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x15E
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 232
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x166
+// CHECK: fileoff: 384
+// CHECK: filesize: 358
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 1
+// CHECK: iextdefsym: 1
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 1
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/data.s b/test/MC/MachO/data.s
index 0ff2854801ac..90679a989f7b 100644
--- a/test/MC/MachO/data.s
+++ b/test/MC/MachO/data.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -macho-segment | FileCheck %s
.data
.ascii "hello"
@@ -14,54 +14,67 @@
.short 0 // 50
.p2alignw 3, 0xABCD, 5 // 50
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 192)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 192)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 50)
-// CHECK: ('file_offset', 220)
-// CHECK: ('file_size', 50)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 220)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 1
-// CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 50)
-// CHECK: ('offset', 220)
-// CHECK: ('alignment', 3)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
-
// FIXME: Dump contents, so we can check those too.
+
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 2
+// CHECK: SizeOfLoadCommands: 208
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 236
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x32
+// CHECK: Offset: 236
+// CHECK: Alignment: 3
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 192
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x32
+// CHECK: fileoff: 236
+// CHECK: filesize: 50
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
diff --git a/test/MC/MachO/debug_frame.s b/test/MC/MachO/debug_frame.s
index 247347d252a7..d185127f4b17 100644
--- a/test/MC/MachO/debug_frame.s
+++ b/test/MC/MachO/debug_frame.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin %s -filetype=obj -o - | llvm-readobj -s -sd -r | FileCheck %s
// Make sure MC can handle file level .cfi_startproc and .cfi_endproc that creates
// an empty frame.
@@ -26,23 +26,33 @@ Leh_func_end0:
.cfi_sections .debug_frame
Ltext_end:
-// CHECK: (('section_name', '__debug_frame\x00\x00\x00')
-// CHECK-NEXT: ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('address', 8)
-// CHECK-NEXT: ('size', 52)
-// CHECK-NEXT: ('offset', 332)
-// CHECK-NEXT: ('alignment', 2)
-// CHECK-NEXT: ('reloc_offset', 384)
-// CHECK-NEXT: ('num_reloc', 2)
-// CHECK-NEXT: ('flags', 0x2000000)
-// CHECK-NEXT: ('reserved1', 0)
-// CHECK-NEXT: ('reserved2', 0)
-// CHECK-NEXT: ),
-// CHECK-NEXT: ('_relocations', [
-// CHECK-NEXT: # Relocation 0
-// CHECK-NEXT: (('word-0', 0x2c),
-// CHECK-NEXT: ('word-1', 0x4000001)),
-// CHECK-NEXT: # Relocation 1
-// CHECK-NEXT: (('word-0', 0x1c),
-// CHECK-NEXT: ('word-1', 0x4000001)),
-// CHECK-NEXT: ])
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __debug_frame (5F 5F 64 65 62 75 67 5F 66 72 61 6D 65 00 00 00)
+// CHECK: Segment: __DWARF (5F 5F 44 57 41 52 46 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x8
+// CHECK: Size: 0x34
+// CHECK: Offset: 332
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x180
+// CHECK: RelocationCount: 2
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x20000)
+// CHECK: Debug (0x20000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 10000000 FFFFFFFF 04000400 017C080C |.............|..|
+// CHECK: 0010: 04048801 0C000000 00000000 00000000 |................|
+// CHECK: 0020: 00000000 0C000000 00000000 00000000 |................|
+// CHECK: 0030: 06000000 |....|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: Section __debug_frame {
+// CHECK: 0x2C 0 2 0 GENERIC_RELOC_VANILLA 0 __text
+// CHECK: 0x1C 0 2 0 GENERIC_RELOC_VANILLA 0 __text
+// CHECK: }
+// CHECK: ]
diff --git a/test/MC/MachO/diff-with-two-sections.s b/test/MC/MachO/diff-with-two-sections.s
index b5e09885f318..15784afad558 100644
--- a/test/MC/MachO/diff-with-two-sections.s
+++ b/test/MC/MachO/diff-with-two-sections.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -macho-segment -macho-version-min | FileCheck %s
.section __TEXT,__text,regular,pure_instructions
Leh_func_begin0:
@@ -7,58 +7,81 @@ Ltmp3:
Ltmp4 = Leh_func_begin0-Ltmp3
.long Ltmp4
-// CHECK: ('cputype', 7)
-// CHECK-NEXT: ('cpusubtype', 3)
-// CHECK-NEXT: ('filetype', 1)
-// CHECK-NEXT: ('num_load_commands', 1)
-// CHECK-NEXT: ('load_commands_size', 192)
-// CHECK-NEXT: ('flag', 0)
-// CHECK-NEXT: ('load_commands', [
-// CHECK-NEXT: # Load Command 0
-// CHECK-NEXT: (('command', 1)
-// CHECK-NEXT: ('size', 192)
-// CHECK-NEXT: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('vm_addr', 0)
-// CHECK-NEXT: ('vm_size', 4)
-// CHECK-NEXT: ('file_offset', 220)
-// CHECK-NEXT: ('file_size', 4)
-// CHECK-NEXT: ('maxprot', 7)
-// CHECK-NEXT: ('initprot', 7)
-// CHECK-NEXT: ('num_sections', 2)
-// CHECK-NEXT: ('flags', 0)
-// CHECK-NEXT: ('sections', [
-// CHECK-NEXT: # Section 0
-// CHECK-NEXT: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('address', 0)
-// CHECK-NEXT: ('size', 0)
-// CHECK-NEXT: ('offset', 220)
-// CHECK-NEXT: ('alignment', 0)
-// CHECK-NEXT: ('reloc_offset', 0)
-// CHECK-NEXT: ('num_reloc', 0)
-// CHECK-NEXT: ('flags', 0x80000000)
-// CHECK-NEXT: ('reserved1', 0)
-// CHECK-NEXT: ('reserved2', 0)
-// CHECK-NEXT: ),
-// CHECK-NEXT: ('_relocations', [
-// CHECK-NEXT: ])
-// CHECK-NEXT: ('_section_data', '')
-// CHECK-NEXT: # Section 1
-// CHECK-NEXT: (('section_name', '__eh_frame\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('address', 0)
-// CHECK-NEXT: ('size', 4)
-// CHECK-NEXT: ('offset', 220)
-// CHECK-NEXT: ('alignment', 0)
-// CHECK-NEXT: ('reloc_offset', 0)
-// CHECK-NEXT: ('num_reloc', 0)
-// CHECK-NEXT: ('flags', 0x6800000b)
-// CHECK-NEXT: ('reserved1', 0)
-// CHECK-NEXT: ('reserved2', 0)
-// CHECK-NEXT: ),
-// CHECK-NEXT: ('_relocations', [
-// CHECK-NEXT: ])
-// CHECK-NEXT: ('_section_data', '00000000')
-// CHECK-NEXT: ])
-// CHECK-NEXT: ),
-// CHECK-NEXT: ])
+// CHECK: File: <stdin>
+// CHECK-NEXT: Format: Mach-O 32-bit i386
+// CHECK-NEXT: Arch: i386
+// CHECK-NEXT: AddressSize: 32bit
+// CHECK-NEXT: MachHeader {
+// CHECK-NEXT: Magic: Magic (0xFEEDFACE)
+// CHECK-NEXT: CpuType: X86 (0x7)
+// CHECK-NEXT: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK-NEXT: FileType: Relocatable (0x1)
+// CHECK-NEXT: NumOfLoadCommands: 2
+// CHECK-NEXT: SizeOfLoadCommands: 208
+// CHECK-NEXT: Flags [ (0x0)
+// CHECK-NEXT: ]
+// CHECK-NEXT: }
+// CHECK-NEXT: Sections [
+// CHECK-NEXT: Section {
+// CHECK-NEXT: Index: 0
+// CHECK-NEXT: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT: Address: 0x0
+// CHECK-NEXT: Size: 0x0
+// CHECK-NEXT: Offset: 236
+// CHECK-NEXT: Alignment: 0
+// CHECK-NEXT: RelocationOffset: 0x0
+// CHECK-NEXT: RelocationCount: 0
+// CHECK-NEXT: Type: 0x0
+// CHECK-NEXT: Attributes [ (0x800000)
+// CHECK-NEXT: PureInstructions (0x800000)
+// CHECK-NEXT: ]
+// CHECK-NEXT: Reserved1: 0x0
+// CHECK-NEXT: Reserved2: 0x0
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT: )
+// CHECK-NEXT: }
+// CHECK-NEXT: Section {
+// CHECK-NEXT: Index: 1
+// CHECK-NEXT: Name: __eh_frame (5F 5F 65 68 5F 66 72 61 6D 65 00 00 00 00 00 00)
+// CHECK-NEXT: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT: Address: 0x0
+// CHECK-NEXT: Size: 0x4
+// CHECK-NEXT: Offset: 236
+// CHECK-NEXT: Alignment: 0
+// CHECK-NEXT: RelocationOffset: 0x0
+// CHECK-NEXT: RelocationCount: 0
+// CHECK-NEXT: Type: 0xB
+// CHECK-NEXT: Attributes [ (0x680000)
+// CHECK-NEXT: LiveSupport (0x80000)
+// CHECK-NEXT: NoTOC (0x400000)
+// CHECK-NEXT: StripStaticSyms (0x200000)
+// CHECK-NEXT: ]
+// CHECK-NEXT: Reserved1: 0x0
+// CHECK-NEXT: Reserved2: 0x0
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT: 0000: 00000000 |....|
+// CHECK-NEXT: )
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: Relocations [
+// CHECK-NEXT: ]
+// CHECK-NEXT: Segment {
+// CHECK-NEXT: Cmd: LC_SEGMENT
+// CHECK-NEXT: Name:
+// CHECK-NEXT: Size: 192
+// CHECK-NEXT: vmaddr: 0x0
+// CHECK-NEXT: vmsize: 0x4
+// CHECK-NEXT: fileoff: 236
+// CHECK-NEXT: filesize: 4
+// CHECK-NEXT: maxprot: rwx
+// CHECK-NEXT: initprot: rwx
+// CHECK-NEXT: nsects: 2
+// CHECK-NEXT: flags: 0x0
+// CHECK-NEXT: }
+// CHECK-NEXT: MinVersion {
+// CHECK-NEXT: Cmd: LC_VERSION_MIN_MACOSX
+// CHECK-NEXT: Size: 16
+// CHECK-NEXT: Version: 9.0
+// CHECK-NEXT: SDK: n/a
+// CHECK-NEXT: }
diff --git a/test/MC/MachO/direction_labels.s b/test/MC/MachO/direction_labels.s
index e224ed3a1473..345a7521d7a6 100644
--- a/test/MC/MachO/direction_labels.s
+++ b/test/MC/MachO/direction_labels.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
direction_labels:
10: nop
@@ -11,85 +11,88 @@ direction_labels:
11: nop
ret
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 228)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 124)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 13)
-// CHECK: ('file_offset', 256)
-// CHECK: ('file_size', 13)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 1)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 13)
-// CHECK: ('offset', 256)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000400)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '90ebfd90 75009075 fdeb0090 c3')
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 272)
-// CHECK: ('nsyms', 1)
-// CHECK: ('stroff', 284)
-// CHECK: ('strsize', 20)
-// CHECK: ('_string_data', '\x00direction_labels\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'direction_labels')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 1)
-// CHECK: ('iextdefsym', 1)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 1)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 244
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0xD
+// CHECK: Offset: 272
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800004)
+// CHECK: PureInstructions (0x800000)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: direction_labels (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 124
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0xD
+// CHECK: fileoff: 272
+// CHECK: filesize: 13
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 1
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 1
+// CHECK: iextdefsym: 1
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 1
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/empty-twice.ll b/test/MC/MachO/empty-twice.ll
new file mode 100644
index 000000000000..6914c73a58d1
--- /dev/null
+++ b/test/MC/MachO/empty-twice.ll
@@ -0,0 +1,12 @@
+; Check that there is no persistent state in the MachO emitter that crashes
+; us when reusing the pass manager.
+; RUN: llc -mtriple=x86_64-apple-darwin -compile-twice -filetype=obj %s -o -
+
+; Force the creation of a DWARF section
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM", isOptimized: true)
+!1 = !DIFile(filename: "<stdin>", directory: "/")
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/MC/MachO/file.s b/test/MC/MachO/file.s
index a7d6c20b885f..108e3bbf173f 100644
--- a/test/MC/MachO/file.s
+++ b/test/MC/MachO/file.s
@@ -9,7 +9,7 @@
// CHECK-NEXT: Segment: __DWARF
// CHECK-NEXT: Address: 0x1
// CHECK-NEXT: Size: 0x28
-// CHECK-NEXT: Offset: 221
+// CHECK-NEXT: Offset: 237
// CHECK-NEXT: Alignment: 0
// CHECK-NEXT: RelocationOffset: 0x0
// CHECK-NEXT: RelocationCount: 0
diff --git a/test/MC/MachO/gen-dwarf.s b/test/MC/MachO/gen-dwarf.s
index ad0a562aaf70..22a8e93799d6 100644
--- a/test/MC/MachO/gen-dwarf.s
+++ b/test/MC/MachO/gen-dwarf.s
@@ -17,7 +17,7 @@ _x: .long 1
// CHECK: .debug_abbrev contents:
// CHECK: Abbrev table for offset: 0x00000000
// CHECK: [1] DW_TAG_compile_unit DW_CHILDREN_yes
-// CHECK: DW_AT_stmt_list DW_FORM_data4
+// CHECK: DW_AT_stmt_list DW_FORM_sec_offset
// CHECK: DW_AT_low_pc DW_FORM_addr
// CHECK: DW_AT_high_pc DW_FORM_addr
// CHECK: DW_AT_name DW_FORM_string
@@ -39,7 +39,7 @@ _x: .long 1
// We don't check the leading addresses these are at.
// CHECK: DW_TAG_compile_unit [1] *
-// CHECK: DW_AT_stmt_list [DW_FORM_data4] (0x00000000)
+// CHECK: DW_AT_stmt_list [DW_FORM_sec_offset] (0x00000000)
// CHECK: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
// CHECK: DW_AT_high_pc [DW_FORM_addr] (0x0000000000000008)
// We don't check the file name as it is a temp directory
diff --git a/test/MC/MachO/i386-large-relocations.s b/test/MC/MachO/i386-large-relocations.s
index e5a1cfb2c5ef..e8805d37e4a1 100644
--- a/test/MC/MachO/i386-large-relocations.s
+++ b/test/MC/MachO/i386-large-relocations.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
.space 0x1ed280
.section __DATA,__const
@@ -20,17 +20,11 @@ _foo:
// so the assembler falls back to non-scattered relocations.
// rdar://12358909
-// CHECK: ('_relocations', [
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0x5181034),
-// CHECK: ('word-1', 0x4000003)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0x518102c),
-// CHECK: ('word-1', 0x4000003)),
-// CHECK: # Relocation 2
-// CHECK: (('word-0', 0x5181028),
-// CHECK: ('word-1', 0x4000003)),
-// CHECK: # Relocation 3
-// CHECK: (('word-0', 0x5181020),
-// CHECK: ('word-1', 0x4000003)),
-// CHECK: ])
+// CHECK: Relocations [
+// CHECK: Section __const {
+// CHECK: 0x5181034 0 2 0 GENERIC_RELOC_VANILLA 0 __bss
+// CHECK: 0x518102C 0 2 0 GENERIC_RELOC_VANILLA 0 __bss
+// CHECK: 0x5181028 0 2 0 GENERIC_RELOC_VANILLA 0 __bss
+// CHECK: 0x5181020 0 2 0 GENERIC_RELOC_VANILLA 0 __bss
+// CHECK: }
+// CHECK: ]
diff --git a/test/MC/MachO/indirect-symbols.s b/test/MC/MachO/indirect-symbols.s
index 079576833cfe..4ab384821d9e 100644
--- a/test/MC/MachO/indirect-symbols.s
+++ b/test/MC/MachO/indirect-symbols.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
_b:
_c = 0
@@ -20,169 +20,200 @@ _e:
.indirect_symbol _f
.long 0
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 364)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 260)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 27)
-// CHECK: ('file_offset', 392)
-// CHECK: ('file_size', 27)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 3)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 392)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '')
-// CHECK: # Section 1
-// CHECK: (('section_name', '__jump_table\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__IMPORT\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 15)
-// CHECK: ('offset', 392)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x84000008)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 5)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', 'f4f4f4f4 f4f4f4f4 f4f4f4f4 f4f4f4')
-// CHECK: # Section 2
-// CHECK: (('section_name', '__pointers\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__IMPORT\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 15)
-// CHECK: ('size', 12)
-// CHECK: ('offset', 407)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x6)
-// CHECK: ('reserved1', 3)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '00000000 00000000 00000000')
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 444)
-// CHECK: ('nsyms', 6)
-// CHECK: ('stroff', 516)
-// CHECK: ('strsize', 20)
-// CHECK: ('_string_data', '\x00_f\x00_e\x00_d\x00_c\x00_b\x00_a\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 13)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_b')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 10)
-// CHECK: ('n_type', 0x2)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_c')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 4)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_e')
-// CHECK: ),
-// CHECK: # Symbol 3
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0x2)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_f')
-// CHECK: ),
-// CHECK: # Symbol 4
-// CHECK: (('n_strx', 16)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 1)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_a')
-// CHECK: ),
-// CHECK: # Symbol 5
-// CHECK: (('n_strx', 7)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_d')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 4)
-// CHECK: ('iextdefsym', 4)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 4)
-// CHECK: ('nundefsym', 2)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 420)
-// CHECK: ('nindirectsyms', 6)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: # Indirect Symbol 0
-// CHECK: (('symbol_index', 0x4),),
-// CHECK: # Indirect Symbol 1
-// CHECK: (('symbol_index', 0x0),),
-// CHECK: # Indirect Symbol 2
-// CHECK: (('symbol_index', 0x1),),
-// CHECK: # Indirect Symbol 3
-// CHECK: (('symbol_index', 0x5),),
-// CHECK: # Indirect Symbol 4
-// CHECK: (('symbol_index', 0x80000000),),
-// CHECK: # Indirect Symbol 5
-// CHECK: (('symbol_index', 0xc0000000),),
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 380
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 408
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __jump_table (5F 5F 6A 75 6D 70 5F 74 61 62 6C 65 00 00 00 00)
+// CHECK: Segment: __IMPORT (5F 5F 49 4D 50 4F 52 54 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0xF
+// CHECK: Offset: 408
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x8
+// CHECK: Attributes [ (0x840000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: SelfModifyingCode (0x40000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x5
+// CHECK: SectionData (
+// CHECK: 0000: F4F4F4F4 F4F4F4F4 F4F4F4F4 F4F4F4 |...............|
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 2
+// CHECK: Name: __pointers (5F 5F 70 6F 69 6E 74 65 72 73 00 00 00 00 00 00)
+// CHECK: Segment: __IMPORT (5F 5F 49 4D 50 4F 52 54 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0xF
+// CHECK: Size: 0xC
+// CHECK: Offset: 423
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x6
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x3
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 00000000 00000000 00000000 |............|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _b (13)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _c (10)
+// CHECK: Type: Abs (0x2)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _e (4)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _f (1)
+// CHECK: Type: Abs (0x2)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _a (16)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: ReferenceFlagUndefinedLazy (0x1)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _d (7)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 6
+// CHECK: Symbols [
+// CHECK: Entry {
+// CHECK: Entry Index: 0
+// CHECK: Symbol Index: 0x4
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 1
+// CHECK: Symbol Index: 0x0
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 2
+// CHECK: Symbol Index: 0x1
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 3
+// CHECK: Symbol Index: 0x5
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 4
+// CHECK: Symbol Index: 0x80000000
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 5
+// CHECK: Symbol Index: 0xC0000000
+// CHECK: }
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 260
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x1B
+// CHECK: fileoff: 408
+// CHECK: filesize: 27
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 3
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 4
+// CHECK: iextdefsym: 4
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 4
+// CHECK: nundefsym: 2
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 436
+// CHECK: nindirectsyms: 6
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/jcc.s b/test/MC/MachO/jcc.s
index 2288a20fa273..caff25714a0c 100644
--- a/test/MC/MachO/jcc.s
+++ b/test/MC/MachO/jcc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -macho-segment | FileCheck %s
ja 1f
1: nop
@@ -65,42 +65,60 @@
jz 1f
1: nop
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 124)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 124)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 96)
-// CHECK: ('file_offset', 152)
-// CHECK: ('file_size', 96)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 1)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 96)
-// CHECK: ('offset', 152)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000400)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '77009073 00907200 90760090 720090e3 0090e300 90740090 7f00907d 00907c00 907e0090 76009072 00907300 90770090 73009075 00907e00 907c0090 7d00907f 00907100 907b0090 79009075 00907000 907a0090 7a00907b 00907800 90740090')
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 2
+// CHECK: SizeOfLoadCommands: 140
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x60
+// CHECK: Offset: 168
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800004)
+// CHECK: PureInstructions (0x800000)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 77009073 00907200 90760090 720090E3 |w..s..r..v..r...|
+// CHECK: 0010: 0090E300 90740090 7F00907D 00907C00 |.....t.....}..|.|
+// CHECK: 0020: 907E0090 76009072 00907300 90770090 |.~..v..r..s..w..|
+// CHECK: 0030: 73009075 00907E00 907C0090 7D00907F |s..u..~..|..}...|
+// CHECK: 0040: 00907100 907B0090 79009075 00907000 |..q..{..y..u..p.|
+// CHECK: 0050: 907A0090 7A00907B 00907800 90740090 |.z..z..{..x..t..|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 124
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x60
+// CHECK: fileoff: 168
+// CHECK: filesize: 96
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 1
+// CHECK: flags: 0x0
+// CHECK: }
diff --git a/test/MC/MachO/lcomm-attributes.s b/test/MC/MachO/lcomm-attributes.s
index 6e49e8016d1c..5f902ebac4f6 100644
--- a/test/MC/MachO/lcomm-attributes.s
+++ b/test/MC/MachO/lcomm-attributes.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
// Note, this test intentionally mismatches Darwin 'as', which loses the
// following global marker.
@@ -14,123 +14,134 @@
.zerofill __DATA, __bss, sym_zfill_ext_B, 4
.globl sym_zfill_ext_B
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 192)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 16)
-// CHECK: ('file_offset', 324)
-// CHECK: ('file_size', 0)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 324)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 1
-// CHECK: (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 16)
-// CHECK: ('offset', 0)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x1)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 324)
-// CHECK: ('nsyms', 4)
-// CHECK: ('stroff', 372)
-// CHECK: ('strsize', 68)
-// CHECK: ('_string_data', '\x00sym_lcomm_ext_B\x00sym_zfill_ext_B\x00sym_lcomm_ext_A\x00sym_zfill_ext_A\x00\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 33)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_lcomm_ext_A')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 4)
-// CHECK: ('_string', 'sym_lcomm_ext_B')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 49)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 8)
-// CHECK: ('_string', 'sym_zfill_ext_A')
-// CHECK: ),
-// CHECK: # Symbol 3
-// CHECK: (('n_strx', 17)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 12)
-// CHECK: ('_string', 'sym_zfill_ext_B')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 0)
-// CHECK: ('iextdefsym', 0)
-// CHECK: ('nextdefsym', 4)
-// CHECK: ('iundefsym', 4)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 312
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 340
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __bss (5F 5F 62 73 73 00 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x10
+// CHECK: Offset: 0
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: LocReloc (0x1)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: sym_lcomm_ext_A (33)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_lcomm_ext_B (1)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x4
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_zfill_ext_A (49)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x8
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_zfill_ext_B (17)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0xC
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 192
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x10
+// CHECK: fileoff: 340
+// CHECK: filesize: 0
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 0
+// CHECK: iextdefsym: 0
+// CHECK: nextdefsym: 4
+// CHECK: iundefsym: 4
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/linker-option-2.s b/test/MC/MachO/linker-option-2.s
index bb5966be2734..415d02e536b3 100644
--- a/test/MC/MachO/linker-option-2.s
+++ b/test/MC/MachO/linker-option-2.s
@@ -1,25 +1,18 @@
-// RUN: llvm-mc -n -triple x86_64-apple-darwin10 %s -filetype=obj | macho-dump | FileCheck %s
-
-// CHECK: ('load_commands_size', 104)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 1
-// CHECK: (('command', 45)
-// CHECK: ('size', 16)
-// CHECK: ('count', 1)
-// CHECK: ('_strings', [
-// CHECK: "a",
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 45)
-// CHECK: ('size', 16)
-// CHECK: ('count', 2)
-// CHECK: ('_strings', [
-// CHECK: "a",
-// CHECK: "b",
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// RUN: llvm-mc -n -triple x86_64-apple-darwin10 %s -filetype=obj | llvm-readobj -macho-linker-options | FileCheck %s
.linker_option "a"
.linker_option "a", "b"
+
+// CHECK: Linker Options {
+// CHECK: Size: 16
+// CHECK: Strings [
+// CHECK: Value: a
+// CHECK: ]
+// CHECK: }
+// CHECK: Linker Options {
+// CHECK: Size: 16
+// CHECK: Strings [
+// CHECK: Value: a
+// CHECK: Value: b
+// CHECK: ]
+// CHECK: }
diff --git a/test/MC/MachO/linker-options.ll b/test/MC/MachO/linker-options.ll
index 2cda835c100c..09ebd0f91567 100644
--- a/test/MC/MachO/linker-options.ll
+++ b/test/MC/MachO/linker-options.ll
@@ -4,35 +4,28 @@
; CHECK-ASM: .linker_option "-lz"
; CHECK-ASM-NEXT: .linker_option "-framework", "Cocoa"
-; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | macho-dump > %t
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-readobj -macho-linker-options > %t
; RUN: FileCheck --check-prefix=CHECK-OBJ < %t %s
-; CHECK-OBJ: ('load_commands', [
-; CHECK-OBJ: # Load Command 1
-; CHECK-OBJ: (('command', 45)
-; CHECK-OBJ: ('size', 16)
-; CHECK-OBJ: ('count', 1)
-; CHECK-OBJ: ('_strings', [
-; CHECK-OBJ: "-lz",
-; CHECK-OBJ: ])
-; CHECK-OBJ: ),
-; CHECK-OBJ: # Load Command 2
-; CHECK-OBJ: (('command', 45)
-; CHECK-OBJ: ('size', 32)
-; CHECK-OBJ: ('count', 2)
-; CHECK-OBJ: ('_strings', [
-; CHECK-OBJ: "-framework",
-; CHECK-OBJ: "Cocoa",
-; CHECK-OBJ: ])
-; CHECK-OBJ: # Load Command 3
-; CHECK-OBJ: (('command', 45)
-; CHECK-OBJ: ('size', 24)
-; CHECK-OBJ: ('count', 1)
-; CHECK-OBJ: ('_strings', [
-; CHECK-OBJ: "-lmath",
-; CHECK-OBJ: ])
-; CHECK-OBJ: ),
-; CHECK-OBJ: ])
+; CHECK-OBJ: Linker Options {
+; CHECK-OBJ: Size: 16
+; CHECK-OBJ: Strings [
+; CHECK-OBJ: Value: -lz
+; CHECK-OBJ: ]
+; CHECK-OBJ: }
+; CHECK-OBJ: Linker Options {
+; CHECK-OBJ: Size: 32
+; CHECK-OBJ: Strings [
+; CHECK-OBJ: Value: -framework
+; CHECK-OBJ: Value: Cocoa
+; CHECK-OBJ: ]
+; CHECK-OBJ: }
+; CHECK-OBJ: Linker Options {
+; CHECK-OBJ: Size: 24
+; CHECK-OBJ: Strings [
+; CHECK-OBJ: Value: -lmath
+; CHECK-OBJ: ]
+; CHECK-OBJ: }
!0 = !{i32 6, !"Linker Options", !{!{!"-lz"}, !{!"-framework", !"Cocoa"}, !{!"-lmath"}}}
diff --git a/test/MC/MachO/loc.s b/test/MC/MachO/loc.s
index 6e7faa3bf9aa..c1a2edd60909 100644
--- a/test/MC/MachO/loc.s
+++ b/test/MC/MachO/loc.s
@@ -1,25 +1,35 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -r -s -sd | FileCheck %s
.file 1 "foo"
.loc 1 64 0
nop
-// CHECK: # Section 1
-// CHECK-NEXT: (('section_name', '__debug_line\x00\x00\x00\x00')
-// CHECK-NEXT: ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('address', 1)
-// CHECK-NEXT: ('size', 51)
-// CHECK-NEXT: ('offset', 221)
-// CHECK-NEXT: ('alignment', 0)
-// CHECK-NEXT: ('reloc_offset', 272)
-// CHECK-NEXT: ('num_reloc', 1)
-// CHECK-NEXT: ('flags', 0x2000000)
-// CHECK-NEXT: ('reserved1', 0)
-// CHECK-NEXT: ('reserved2', 0)
-// CHECK-NEXT: ),
-// CHECK-NEXT: ('_relocations', [
-// CHECK-NEXT: # Relocation 0
-// CHECK-NEXT: (('word-0', 0x27),
-// CHECK-NEXT: ('word-1', 0x4000001)),
-// CHECK-NEXT: ])
-// CHECK-NEXT: ('_section_data', '2f000000 02001a00 00000101 fb0e0d00 01010101 00000001 00000100 666f6f00 00000000 00050200 00000003 3f010201 000101')
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __debug_line (5F 5F 64 65 62 75 67 5F 6C 69 6E 65 00 00 00 00)
+// CHECK: Segment: __DWARF (5F 5F 44 57 41 52 46 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x1
+// CHECK: Size: 0x33
+// CHECK: Offset: 237
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x120
+// CHECK: RelocationCount: 1
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x20000)
+// CHECK: Debug (0x20000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 2F000000 02001A00 00000101 FB0E0D00 |/...............|
+// CHECK: 0010: 01010101 00000001 00000100 666F6F00 |............foo.|
+// CHECK: 0020: 00000000 00050200 00000003 3F010201 |............?...|
+// CHECK: 0030: 000101 |...|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: Section __debug_line {
+// CHECK: 0x27 0 2 0 GENERIC_RELOC_VANILLA 0 __text
+// CHECK: }
+// CHECK: ]
diff --git a/test/MC/MachO/osx-version-min-load-command.s b/test/MC/MachO/osx-version-min-load-command.s
index cb62565cef9a..2218d556bce5 100644
--- a/test/MC/MachO/osx-version-min-load-command.s
+++ b/test/MC/MachO/osx-version-min-load-command.s
@@ -1,10 +1,16 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj --macho-version-min | FileCheck %s
// Test the formation of the version-min load command in the MachO.
// use a nonsense but well formed version.
.macosx_version_min 25,3,1
-// CHECK: (('command', 36)
-// CHECK: ('size', 16)
-// CHECK: ('version, 1639169)
-// CHECK: ('sdk, 0)
-// CHECK: ),
+
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MinVersion {
+// CHECK: Cmd: LC_VERSION_MIN_MACOSX
+// CHECK: Size: 16
+// CHECK: Version: 25.3.1
+// CHECK: SDK: n/a
+// CHECK: }
diff --git a/test/MC/MachO/pcrel-to-other-section.s b/test/MC/MachO/pcrel-to-other-section.s
index 22a7822d9576..cf46b0dcb0f7 100644
--- a/test/MC/MachO/pcrel-to-other-section.s
+++ b/test/MC/MachO/pcrel-to-other-section.s
@@ -1,107 +1,119 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
nop
.section __TEXT,__StaticInit,regular,pure_instructions
calll foo
-// CHECK: ('cputype', 7)
-// CHECK-NEXT: ('cpusubtype', 3)
-// CHECK-NEXT: ('filetype', 1)
-// CHECK-NEXT: ('num_load_commands', 3)
-// CHECK-NEXT: ('load_commands_size', 296)
-// CHECK-NEXT: ('flag', 0)
-// CHECK-NEXT: ('load_commands', [
-// CHECK-NEXT: # Load Command 0
-// CHECK-NEXT: (('command', 1)
-// CHECK-NEXT: ('size', 192)
-// CHECK-NEXT: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('vm_addr', 0)
-// CHECK-NEXT: ('vm_size', 6)
-// CHECK-NEXT: ('file_offset', 324)
-// CHECK-NEXT: ('file_size', 6)
-// CHECK-NEXT: ('maxprot', 7)
-// CHECK-NEXT: ('initprot', 7)
-// CHECK-NEXT: ('num_sections', 2)
-// CHECK-NEXT: ('flags', 0)
-// CHECK-NEXT: ('sections', [
-// CHECK-NEXT: # Section 0
-// CHECK-NEXT: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('address', 0)
-// CHECK-NEXT: ('size', 1)
-// CHECK-NEXT: ('offset', 324)
-// CHECK-NEXT: ('alignment', 0)
-// CHECK-NEXT: ('reloc_offset', 0)
-// CHECK-NEXT: ('num_reloc', 0)
-// CHECK-NEXT: ('flags', 0x80000400)
-// CHECK-NEXT: ('reserved1', 0)
-// CHECK-NEXT: ('reserved2', 0)
-// CHECK-NEXT: ),
-// CHECK-NEXT: ('_relocations', [
-// CHECK-NEXT: ])
-// CHECK-NEXT: ('_section_data', '90')
-// CHECK-NEXT: # Section 1
-// CHECK-NEXT: (('section_name', '__StaticInit\x00\x00\x00\x00')
-// CHECK-NEXT: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('address', 1)
-// CHECK-NEXT: ('size', 5)
-// CHECK-NEXT: ('offset', 325)
-// CHECK-NEXT: ('alignment', 0)
-// CHECK-NEXT: ('reloc_offset', 332)
-// CHECK-NEXT: ('num_reloc', 1)
-// CHECK-NEXT: ('flags', 0x80000400)
-// CHECK-NEXT: ('reserved1', 0)
-// CHECK-NEXT: ('reserved2', 0)
-// CHECK-NEXT: ),
-// CHECK-NEXT: ('_relocations', [
-// CHECK-NEXT: # Relocation 0
-// CHECK-NEXT: (('word-0', 0x1),
-// CHECK-NEXT: ('word-1', 0xd000000)),
-// CHECK-NEXT: ])
-// CHECK-NEXT: ('_section_data', 'e8faffff ff')
-// CHECK-NEXT: ])
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Load Command 1
-// CHECK-NEXT: (('command', 2)
-// CHECK-NEXT: ('size', 24)
-// CHECK-NEXT: ('symoff', 340)
-// CHECK-NEXT: ('nsyms', 1)
-// CHECK-NEXT: ('stroff', 352)
-// CHECK-NEXT: ('strsize', 8)
-// CHECK-NEXT: ('_string_data', '\x00foo\x00\x00\x00\x00')
-// CHECK-NEXT: ('_symbols', [
-// CHECK-NEXT: # Symbol 0
-// CHECK-NEXT: (('n_strx', 1)
-// CHECK-NEXT: ('n_type', 0x1)
-// CHECK-NEXT: ('n_sect', 0)
-// CHECK-NEXT: ('n_desc', 0)
-// CHECK-NEXT: ('n_value', 0)
-// CHECK-NEXT: ('_string', 'foo')
-// CHECK-NEXT: ),
-// CHECK-NEXT: ])
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Load Command 2
-// CHECK-NEXT: (('command', 11)
-// CHECK-NEXT: ('size', 80)
-// CHECK-NEXT: ('ilocalsym', 0)
-// CHECK-NEXT: ('nlocalsym', 0)
-// CHECK-NEXT: ('iextdefsym', 0)
-// CHECK-NEXT: ('nextdefsym', 0)
-// CHECK-NEXT: ('iundefsym', 0)
-// CHECK-NEXT: ('nundefsym', 1)
-// CHECK-NEXT: ('tocoff', 0)
-// CHECK-NEXT: ('ntoc', 0)
-// CHECK-NEXT: ('modtaboff', 0)
-// CHECK-NEXT: ('nmodtab', 0)
-// CHECK-NEXT: ('extrefsymoff', 0)
-// CHECK-NEXT: ('nextrefsyms', 0)
-// CHECK-NEXT: ('indirectsymoff', 0)
-// CHECK-NEXT: ('nindirectsyms', 0)
-// CHECK-NEXT: ('extreloff', 0)
-// CHECK-NEXT: ('nextrel', 0)
-// CHECK-NEXT: ('locreloff', 0)
-// CHECK-NEXT: ('nlocrel', 0)
-// CHECK-NEXT: ('_indirect_symbols', [
-// CHECK-NEXT: ])
-// CHECK-NEXT: ),
-// CHECK-NEXT: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 312
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x1
+// CHECK: Offset: 340
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800004)
+// CHECK: PureInstructions (0x800000)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 90 |.|
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __StaticInit (5F 5F 53 74 61 74 69 63 49 6E 69 74 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x1
+// CHECK: Size: 0x5
+// CHECK: Offset: 341
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x15C
+// CHECK: RelocationCount: 1
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800004)
+// CHECK: PureInstructions (0x800000)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: E8FAFFFF FF |.....|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: Section __StaticInit {
+// CHECK: 0x1 1 2 1 GENERIC_RELOC_VANILLA 0 foo
+// CHECK: }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: foo (1)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 192
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x6
+// CHECK: fileoff: 340
+// CHECK: filesize: 6
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 0
+// CHECK: iextdefsym: 0
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 0
+// CHECK: nundefsym: 1
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/relax-jumps.s b/test/MC/MachO/relax-jumps.s
index 65a51e92b37c..ab68eb1d6c22 100644
--- a/test/MC/MachO/relax-jumps.s
+++ b/test/MC/MachO/relax-jumps.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -s -sd | FileCheck %s
// FIXME: This is a horrible way of checking the output, we need an llvm-mc
// based 'otool'. Use:
@@ -10,10 +10,6 @@
// diff $f.{as,mc}.dump)
// to examine the results in a more sensible fashion.
-// CHECK: ('_section_data', '90
-// CHECK: 0f8432ff ffff0f82 e6000000 0f8726ff ffff0f8f da000000 0f881aff ffff0f83 ce000000 0f890eff ffff90
-// CHECK: 9031c0')
-
L1:
.space 200, 0x90
@@ -29,3 +25,12 @@ L1:
L2:
xorl %eax, %eax
+
+// CHECK: SectionData (
+// CHECK: 00C0: 90909090 90909090 0F8432FF FFFF0F82 |..........2.....|
+// CHECK: 00D0: E6000000 0F8726FF FFFF0F8F DA000000 |......&.........|
+// CHECK: 00E0: 0F881AFF FFFF0F83 CE000000 0F890EFF |................|
+// CHECK: 00F0: FFFF9090 90909090 90909090 90909090 |................|
+// CHECK: 01A0: 90909090 90909090 90909090 90909090 |................|
+// CHECK: 01B0: 90909090 90909090 909031C0 |..........1.|
+// CHECK: )
diff --git a/test/MC/MachO/relax-recompute-align.s b/test/MC/MachO/relax-recompute-align.s
index 249402502f71..1369bcdf5aee 100644
--- a/test/MC/MachO/relax-recompute-align.s
+++ b/test/MC/MachO/relax-recompute-align.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -s | FileCheck %s
// FIXME: This is a horrible way of checking the output, we need an llvm-mc
// based 'otool'.
@@ -8,20 +8,6 @@
// recomputed -- otherwise the second jump will appear to be out-of-range for a
// 1-byte jump.
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 306)
-// CHECK: ('offset', 324)
-// CHECK: ('alignment', 4)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000400)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-
L0:
.space 0x8a, 0x90
jmp L0
@@ -35,3 +21,22 @@ L1:
L2:
.zerofill __DATA,__bss,_sym,4,2
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 0
+// CHECK-NEXT: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT: Address: 0x0
+// CHECK-NEXT: Size: 0x132
+// CHECK-NEXT: Offset: 340
+// CHECK-NEXT: Alignment: 4
+// CHECK-NEXT: RelocationOffset: 0x0
+// CHECK-NEXT: RelocationCount: 0
+// CHECK-NEXT: Type: 0x0
+// CHECK-NEXT: Attributes [ (0x800004)
+// CHECK-NEXT: PureInstructions (0x800000)
+// CHECK-NEXT: SomeInstructions (0x4)
+// CHECK-NEXT: ]
+// CHECK-NEXT: Reserved1: 0x0
+// CHECK-NEXT: Reserved2: 0x0
+// CHECK-NEXT: }
diff --git a/test/MC/MachO/reloc-diff.s b/test/MC/MachO/reloc-diff.s
index a63a413f24b6..f9d58d97e529 100644
--- a/test/MC/MachO/reloc-diff.s
+++ b/test/MC/MachO/reloc-diff.s
@@ -1,30 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
-
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0xa4000010),
-// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0xa1000000),
-// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 2
-// CHECK: (('word-0', 0xa4000008),
-// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 3
-// CHECK: (('word-0', 0xa1000000),
-// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 4
-// CHECK: (('word-0', 0xa4000004),
-// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 5
-// CHECK: (('word-0', 0xa1000000),
-// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 6
-// CHECK: (('word-0', 0xa2000000),
-// CHECK: ('word-1', 0x0)),
-// CHECK: # Relocation 7
-// CHECK: (('word-0', 0xa1000000),
-// CHECK: ('word-1', 0x0)),
-// CHECK-NEXT: ])
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
_local_def:
.globl _external_def
@@ -41,3 +15,16 @@ Ltemp:
.long _local_def - Ltemp
.long _external_def - Ltemp
+
+// CHECK: Relocations [
+// CHECK-NEXT: Section __data {
+// CHECK-NEXT: 0x10 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x0
+// CHECK-NEXT: 0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x0
+// CHECK-NEXT: 0x8 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x0
+// CHECK-NEXT: 0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x0
+// CHECK-NEXT: 0x4 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x0
+// CHECK-NEXT: 0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x0
+// CHECK-NEXT: 0x0 0 2 n/a GENERIC_RELOC_SECTDIFF 1 0x0
+// CHECK-NEXT: 0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x0
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
diff --git a/test/MC/MachO/reloc-pcrel-offset.s b/test/MC/MachO/reloc-pcrel-offset.s
index e113e9616cc0..d4d6ddf48ddf 100644
--- a/test/MC/MachO/reloc-pcrel-offset.s
+++ b/test/MC/MachO/reloc-pcrel-offset.s
@@ -1,10 +1,4 @@
-// RUN: llvm-mc -n -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
-
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0x1),
-// CHECK: ('word-1', 0x5000002)),
-// CHECK-NEXT: ])
-// CHECK: ('_section_data', 'e8fbffff ff')
+// RUN: llvm-mc -n -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -r -s -sd | FileCheck %s
.data
.long 0
@@ -15,3 +9,28 @@ _b:
call _a
.subsections_via_symbols
+
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x4
+// CHECK: Offset: 340
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 00000000 |....|
+// CHECK: )
+// CHECK: }
+// CHECK: Relocations [
+// CHECK: Section __text {
+// CHECK: 0x1 1 2 0 GENERIC_RELOC_VANILLA 0 __text
+// CHECK: }
+// CHECK: ]
diff --git a/test/MC/MachO/reloc-pcrel.s b/test/MC/MachO/reloc-pcrel.s
index 11334150368a..88f8828e6874 100644
--- a/test/MC/MachO/reloc-pcrel.s
+++ b/test/MC/MachO/reloc-pcrel.s
@@ -1,39 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
-
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0xe4000045),
-// CHECK: ('word-1', 0x4)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0xe1000000),
-// CHECK: ('word-1', 0x6)),
-// CHECK: # Relocation 2
-// CHECK: (('word-0', 0x40),
-// CHECK: ('word-1', 0xd000003)),
-// CHECK: # Relocation 3
-// CHECK: (('word-0', 0x3b),
-// CHECK: ('word-1', 0xd000003)),
-// CHECK: # Relocation 4
-// CHECK: (('word-0', 0x36),
-// CHECK: ('word-1', 0xd000003)),
-// CHECK: # Relocation 5
-// CHECK: (('word-0', 0xe0000031),
-// CHECK: ('word-1', 0x4)),
-// CHECK: # Relocation 6
-// CHECK: (('word-0', 0xe000002c),
-// CHECK: ('word-1', 0x4)),
-// CHECK: # Relocation 7
-// CHECK: (('word-0', 0x27),
-// CHECK: ('word-1', 0x5000001)),
-// CHECK: # Relocation 8
-// CHECK: (('word-0', 0xe0000022),
-// CHECK: ('word-1', 0x2)),
-// CHECK: # Relocation 9
-// CHECK: (('word-0', 0xe000001d),
-// CHECK: ('word-1', 0x2)),
-// CHECK: # Relocation 10
-// CHECK: (('word-0', 0x18),
-// CHECK: ('word-1', 0x5000001)),
-// CHECK-NEXT: ])
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
xorl %eax,%eax
@@ -63,3 +28,19 @@ L1:
call _b - L0
.subsections_via_symbols
+
+// CHECK: Relocations [
+// CHECK-NEXT: Section __text {
+// CHECK-NEXT: 0x45 1 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x4
+// CHECK-NEXT: 0x0 1 2 n/a GENERIC_RELOC_PAIR 1 0x6
+// CHECK-NEXT: 0x40 1 2 1 GENERIC_RELOC_VANILLA 0 _c
+// CHECK-NEXT: 0x3B 1 2 1 GENERIC_RELOC_VANILLA 0 _c
+// CHECK-NEXT: 0x36 1 2 1 GENERIC_RELOC_VANILLA 0 _c
+// CHECK-NEXT: 0x31 1 2 n/a GENERIC_RELOC_VANILLA 1 0x4
+// CHECK-NEXT: 0x2C 1 2 n/a GENERIC_RELOC_VANILLA 1 0x4
+// CHECK-NEXT: 0x27 1 2 0 GENERIC_RELOC_VANILLA 0 __text
+// CHECK-NEXT: 0x22 1 2 n/a GENERIC_RELOC_VANILLA 1 0x2
+// CHECK-NEXT: 0x1D 1 2 n/a GENERIC_RELOC_VANILLA 1 0x2
+// CHECK-NEXT: 0x18 1 2 0 GENERIC_RELOC_VANILLA 0 __text
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
diff --git a/test/MC/MachO/section-align-1.s b/test/MC/MachO/section-align-1.s
index 360c0a842313..db0716585504 100644
--- a/test/MC/MachO/section-align-1.s
+++ b/test/MC/MachO/section-align-1.s
@@ -1,87 +1,89 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -t -file-headers -s -macho-indirect-symbols -macho-dysymtab -macho-segment | FileCheck %s
name:
.byte 0
// Check that symbol table is aligned to 4 bytes.
-
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 228)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 124)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 1)
-// CHECK: ('file_offset', 256)
-// CHECK: ('file_size', 1)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 1)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 1)
-// CHECK: ('offset', 256)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 260)
-// CHECK: ('nsyms', 1)
-// CHECK: ('stroff', 272)
-// CHECK: ('strsize', 8)
-// CHECK: ('_string_data', '\x00name\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'name')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 1)
-// CHECK: ('iextdefsym', 1)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 1)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 244
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x1
+// CHECK: Offset: 272
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: name (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 124
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x1
+// CHECK: fileoff: 272
+// CHECK: filesize: 1
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 1
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 1
+// CHECK: iextdefsym: 1
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 1
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/section-align-2.s b/test/MC/MachO/section-align-2.s
index 086fc4a4f15a..4a2099a4b0cb 100644
--- a/test/MC/MachO/section-align-2.s
+++ b/test/MC/MachO/section-align-2.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
.byte 0
@@ -14,124 +14,137 @@ bar:
.const
baz:
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 364)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 260)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 13)
-// CHECK: ('file_offset', 392)
-// CHECK: ('file_size', 13)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 3)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 1)
-// CHECK: ('offset', 392)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 1
-// CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 4)
-// CHECK: ('size', 9)
-// CHECK: ('offset', 396)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 2
-// CHECK: (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 13)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 405)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 408)
-// CHECK: ('nsyms', 3)
-// CHECK: ('stroff', 444)
-// CHECK: ('strsize', 16)
-// CHECK: ('_string_data', '\x00baz\x00bar\x00foo\x00\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 9)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 4)
-// CHECK: ('_string', 'foo')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 5)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 12)
-// CHECK: ('_string', 'bar')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 3)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 13)
-// CHECK: ('_string', 'baz')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 3)
-// CHECK: ('iextdefsym', 3)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 3)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 380
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x1
+// CHECK: Offset: 408
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x4
+// CHECK: Size: 0x9
+// CHECK: Offset: 412
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 2
+// CHECK: Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0xD
+// CHECK: Size: 0x0
+// CHECK: Offset: 421
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: foo (9)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x4
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: bar (5)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0xC
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: baz (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __const (0x3)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0xD
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 260
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0xD
+// CHECK: fileoff: 408
+// CHECK: filesize: 13
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 3
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 3
+// CHECK: iextdefsym: 3
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 3
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/section-attributes.s b/test/MC/MachO/section-attributes.s
index b21ef38ac561..0c2913e4aca4 100644
--- a/test/MC/MachO/section-attributes.s
+++ b/test/MC/MachO/section-attributes.s
@@ -1,7 +1,10 @@
// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o %t
-// RUN: macho-dump %t | FileCheck %s
+// RUN: llvm-readobj -s -sd %t | FileCheck %s
-// CHECK: # Section 1
-// CHECK: ('flags', 0x0)
.section __TEXT,__objc_opt_ro
.long 0
+
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
diff --git a/test/MC/MachO/section-flags.s b/test/MC/MachO/section-flags.s
index 8ac1bbff7551..da5c4c133176 100644
--- a/test/MC/MachO/section-flags.s
+++ b/test/MC/MachO/section-flags.s
@@ -1,14 +1,49 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
-//
-// CHECK: # Section 0
-// CHECK: 'section_name', '__text
-// CHECK: 'flags', 0x80000000
-// CHECK: # Section 1
-// CHECK: 'section_name', '__data
-// CHECK: 'flags', 0x400
-
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -s -sd | FileCheck %s
+
.text
.data
f0:
movl $0, %eax
+
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 340
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x5
+// CHECK: Offset: 340
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x4)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: B8000000 00 |.....|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
diff --git a/test/MC/MachO/string-table.s b/test/MC/MachO/string-table.s
index 3a935eee0500..0902a3477b55 100644
--- a/test/MC/MachO/string-table.s
+++ b/test/MC/MachO/string-table.s
@@ -1,100 +1,107 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
movl $a, b
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 228)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 124)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 10)
-// CHECK: ('file_offset', 256)
-// CHECK: ('file_size', 10)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 1)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 10)
-// CHECK: ('offset', 256)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 268)
-// CHECK: ('num_reloc', 2)
-// CHECK: ('flags', 0x80000400)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0x6),
-// CHECK: ('word-1', 0xc000000)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0x2),
-// CHECK: ('word-1', 0xc000001)),
-// CHECK: ])
-// CHECK: ('_section_data', 'c7050000 00000000 0000')
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 284)
-// CHECK: ('nsyms', 2)
-// CHECK: ('stroff', 308)
-// CHECK: ('strsize', 8)
-// CHECK: ('_string_data', '\x00b\x00a\x00\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 3)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'a')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'b')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 0)
-// CHECK: ('iextdefsym', 0)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 0)
-// CHECK: ('nundefsym', 2)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 244
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0xA
+// CHECK: Offset: 272
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x11C
+// CHECK: RelocationCount: 2
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800004)
+// CHECK: PureInstructions (0x800000)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: C7050000 00000000 0000 |..........|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: Section __text {
+// CHECK: 0x6 0 2 1 GENERIC_RELOC_VANILLA 0 a
+// CHECK: 0x2 0 2 1 GENERIC_RELOC_VANILLA 0 b
+// CHECK: }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: a (3)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: b (1)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 124
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0xA
+// CHECK: fileoff: 272
+// CHECK: filesize: 10
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 1
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 0
+// CHECK: iextdefsym: 0
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 0
+// CHECK: nundefsym: 2
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/symbol-diff.s b/test/MC/MachO/symbol-diff.s
index dae32878b8e2..2c3b52f85a85 100644
--- a/test/MC/MachO/symbol-diff.s
+++ b/test/MC/MachO/symbol-diff.s
@@ -1,122 +1,131 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
_g:
LFB2:
.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
_g.eh:
.quad LFB2-.
-// CHECK: ('cputype', 16777223)
-// CHECK-NEXT: ('cpusubtype', 3)
-// CHECK-NEXT: ('filetype', 1)
-// CHECK-NEXT: ('num_load_commands', 3)
-// CHECK-NEXT: ('load_commands_size', 336)
-// CHECK-NEXT: ('flag', 0)
-// CHECK-NEXT: ('reserved', 0)
-// CHECK-NEXT: ('load_commands', [
-// CHECK-NEXT: # Load Command 0
-// CHECK-NEXT: (('command', 25)
-// CHECK-NEXT: ('size', 232)
-// CHECK-NEXT: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('vm_addr', 0)
-// CHECK-NEXT: ('vm_size', 8)
-// CHECK-NEXT: ('file_offset', 368)
-// CHECK-NEXT: ('file_size', 8)
-// CHECK-NEXT: ('maxprot', 7)
-// CHECK-NEXT: ('initprot', 7)
-// CHECK-NEXT: ('num_sections', 2)
-// CHECK-NEXT: ('flags', 0)
-// CHECK-NEXT: ('sections', [
-// CHECK-NEXT: # Section 0
-// CHECK-NEXT: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('address', 0)
-// CHECK-NEXT: ('size', 0)
-// CHECK-NEXT: ('offset', 368)
-// CHECK-NEXT: ('alignment', 0)
-// CHECK-NEXT: ('reloc_offset', 0)
-// CHECK-NEXT: ('num_reloc', 0)
-// CHECK-NEXT: ('flags', 0x80000000)
-// CHECK-NEXT: ('reserved1', 0)
-// CHECK-NEXT: ('reserved2', 0)
-// CHECK-NEXT: ('reserved3', 0)
-// CHECK-NEXT: ),
-// CHECK-NEXT: ('_relocations', [
-// CHECK-NEXT: ])
-// CHECK-NEXT: ('_section_data', '')
-// CHECK-NEXT: # Section 1
-// CHECK-NEXT: (('section_name', '__eh_frame\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('address', 0)
-// CHECK-NEXT: ('size', 8)
-// CHECK-NEXT: ('offset', 368)
-// CHECK-NEXT: ('alignment', 0)
-// CHECK-NEXT: ('reloc_offset', 376)
-// CHECK-NEXT: ('num_reloc', 2)
-// CHECK-NEXT: ('flags', 0x6800000b)
-// CHECK-NEXT: ('reserved1', 0)
-// CHECK-NEXT: ('reserved2', 0)
-// CHECK-NEXT: ('reserved3', 0)
-// CHECK-NEXT: ),
-// CHECK-NEXT: ('_relocations', [
-// CHECK-NEXT: # Relocation 0
-// CHECK-NEXT: (('word-0', 0x0),
-// CHECK-NEXT: ('word-1', 0x5e000001)),
-// CHECK-NEXT: # Relocation 1
-// CHECK-NEXT: (('word-0', 0x0),
-// CHECK-NEXT: ('word-1', 0xe000000)),
-// CHECK-NEXT: ])
-// CHECK-NEXT: ('_section_data', '00000000 00000000')
-// CHECK-NEXT: ])
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Load Command 1
-// CHECK-NEXT: (('command', 2)
-// CHECK-NEXT: ('size', 24)
-// CHECK-NEXT: ('symoff', 392)
-// CHECK-NEXT: ('nsyms', 2)
-// CHECK-NEXT: ('stroff', 424)
-// CHECK-NEXT: ('strsize', 12)
-// CHECK-NEXT: ('_string_data', '\x00_g.eh\x00_g\x00\x00\x00')
-// CHECK-NEXT: ('_symbols', [
-// CHECK-NEXT: # Symbol 0
-// CHECK-NEXT: (('n_strx', 7)
-// CHECK-NEXT: ('n_type', 0xe)
-// CHECK-NEXT: ('n_sect', 1)
-// CHECK-NEXT: ('n_desc', 0)
-// CHECK-NEXT: ('n_value', 0)
-// CHECK-NEXT: ('_string', '_g')
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 1
-// CHECK-NEXT: (('n_strx', 1)
-// CHECK-NEXT: ('n_type', 0xe)
-// CHECK-NEXT: ('n_sect', 2)
-// CHECK-NEXT: ('n_desc', 0)
-// CHECK-NEXT: ('n_value', 0)
-// CHECK-NEXT: ('_string', '_g.eh')
-// CHECK-NEXT: ),
-// CHECK-NEXT: ])
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Load Command 2
-// CHECK-NEXT: (('command', 11)
-// CHECK-NEXT: ('size', 80)
-// CHECK-NEXT: ('ilocalsym', 0)
-// CHECK-NEXT: ('nlocalsym', 2)
-// CHECK-NEXT: ('iextdefsym', 2)
-// CHECK-NEXT: ('nextdefsym', 0)
-// CHECK-NEXT: ('iundefsym', 2)
-// CHECK-NEXT: ('nundefsym', 0)
-// CHECK-NEXT: ('tocoff', 0)
-// CHECK-NEXT: ('ntoc', 0)
-// CHECK-NEXT: ('modtaboff', 0)
-// CHECK-NEXT: ('nmodtab', 0)
-// CHECK-NEXT: ('extrefsymoff', 0)
-// CHECK-NEXT: ('nextrefsyms', 0)
-// CHECK-NEXT: ('indirectsymoff', 0)
-// CHECK-NEXT: ('nindirectsyms', 0)
-// CHECK-NEXT: ('extreloff', 0)
-// CHECK-NEXT: ('nextrel', 0)
-// CHECK-NEXT: ('locreloff', 0)
-// CHECK-NEXT: ('nlocrel', 0)
-// CHECK-NEXT: ('_indirect_symbols', [
-// CHECK-NEXT: ])
-// CHECK-NEXT: ),
-// CHECK-NEXT:])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 352
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 384
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __eh_frame (5F 5F 65 68 5F 66 72 61 6D 65 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x8
+// CHECK: Offset: 384
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x188
+// CHECK: RelocationCount: 2
+// CHECK: Type: 0xB
+// CHECK: Attributes [ (0x680000)
+// CHECK: LiveSupport (0x80000)
+// CHECK: NoTOC (0x400000)
+// CHECK: StripStaticSyms (0x200000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 00000000 00000000 |........|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: Section __eh_frame {
+// CHECK: 0x0 0 3 1 X86_64_RELOC_SUBTRACTOR 0 _g.eh
+// CHECK: 0x0 0 3 1 X86_64_RELOC_UNSIGNED 0 _g
+// CHECK: }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _g (7)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _g.eh (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __eh_frame (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 232
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x8
+// CHECK: fileoff: 384
+// CHECK: filesize: 8
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 2
+// CHECK: iextdefsym: 2
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 2
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/symbol-flags.s b/test/MC/MachO/symbol-flags.s
index 561d88a14e73..8f001e5c612e 100644
--- a/test/MC/MachO/symbol-flags.s
+++ b/test/MC/MachO/symbol-flags.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
.reference sym_ref_A
.reference sym_ref_def_A
@@ -59,283 +59,349 @@ sym_symbol_resolver_A:
.desc sym_desc_flags,0x47
sym_desc_flags:
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 192)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 0)
-// CHECK: ('file_offset', 324)
-// CHECK: ('file_size', 0)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 324)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 1
-// CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 324)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 324)
-// CHECK: ('nsyms', 24)
-// CHECK: ('stroff', 612)
-// CHECK: ('strsize', 388)
-// CHECK: ('_string_data', '\x00sym_desc_flags\x00sym_private_ext_E\x00sym_lazy_ref_E\x00sym_ref_def_E\x00sym_private_ext_D\x00sym_lazy_ref_D\x00sym_ref_def_D\x00sym_private_ext_C\x00sym_lazy_ref_C\x00sym_weak_def_C\x00sym_ref_def_C\x00sym_private_ext_B\x00sym_lazy_ref_B\x00sym_weak_def_B\x00sym_weak_ref_def_B\x00sym_private_ext_A\x00sym_symbol_resolver_A\x00sym_no_dead_strip_A\x00sym_lazy_ref_A\x00sym_ref_A\x00sym_weak_ref_A\x00sym_weak_def_A\x00sym_ref_def_A\x00sym_weak_ref_def_A\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 354)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 32)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_ref_def_A')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 158)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 32)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_ref_def_C')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 368)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 64)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_weak_ref_def_A')
-// CHECK: ),
-// CHECK: # Symbol 3
-// CHECK: (('n_strx', 220)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_weak_ref_def_B')
-// CHECK: ),
-// CHECK: # Symbol 4
-// CHECK: (('n_strx', 190)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 32)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_lazy_ref_B')
-// CHECK: ),
-// CHECK: # Symbol 5
-// CHECK: (('n_strx', 128)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 32)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_lazy_ref_C')
-// CHECK: ),
-// CHECK: # Symbol 6
-// CHECK: (('n_strx', 257)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 256)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_symbol_resolver_A')
-// CHECK: ),
-// CHECK: # Symbol 7
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 64)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_desc_flags')
-// CHECK: ),
-// CHECK: # Symbol 8
-// CHECK: (('n_strx', 172)
-// CHECK: ('n_type', 0x1f)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_private_ext_B')
-// CHECK: ),
-// CHECK: # Symbol 9
-// CHECK: (('n_strx', 110)
-// CHECK: ('n_type', 0x1f)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_private_ext_C')
-// CHECK: ),
-// CHECK: # Symbol 10
-// CHECK: (('n_strx', 339)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 128)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_weak_def_A')
-// CHECK: ),
-// CHECK: # Symbol 11
-// CHECK: (('n_strx', 205)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 128)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_weak_def_B')
-// CHECK: ),
-// CHECK: # Symbol 12
-// CHECK: (('n_strx', 143)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 128)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_weak_def_C')
-// CHECK: ),
-// CHECK: # Symbol 13
-// CHECK: (('n_strx', 299)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 33)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_lazy_ref_A')
-// CHECK: ),
-// CHECK: # Symbol 14
-// CHECK: (('n_strx', 81)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 32)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_lazy_ref_D')
-// CHECK: ),
-// CHECK: # Symbol 15
-// CHECK: (('n_strx', 34)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 33)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_lazy_ref_E')
-// CHECK: ),
-// CHECK: # Symbol 16
-// CHECK: (('n_strx', 279)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 32)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_no_dead_strip_A')
-// CHECK: ),
-// CHECK: # Symbol 17
-// CHECK: (('n_strx', 239)
-// CHECK: ('n_type', 0x11)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_private_ext_A')
-// CHECK: ),
-// CHECK: # Symbol 18
-// CHECK: (('n_strx', 63)
-// CHECK: ('n_type', 0x11)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_private_ext_D')
-// CHECK: ),
-// CHECK: # Symbol 19
-// CHECK: (('n_strx', 16)
-// CHECK: ('n_type', 0x11)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_private_ext_E')
-// CHECK: ),
-// CHECK: # Symbol 20
-// CHECK: (('n_strx', 314)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 4660)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_ref_A')
-// CHECK: ),
-// CHECK: # Symbol 21
-// CHECK: (('n_strx', 96)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 32)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_ref_def_D')
-// CHECK: ),
-// CHECK: # Symbol 22
-// CHECK: (('n_strx', 49)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 32)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_ref_def_E')
-// CHECK: ),
-// CHECK: # Symbol 23
-// CHECK: (('n_strx', 324)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 64)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_weak_ref_A')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 8)
-// CHECK: ('iextdefsym', 8)
-// CHECK: ('nextdefsym', 5)
-// CHECK: ('iundefsym', 13)
-// CHECK: ('nundefsym', 11)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 312
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 340
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 340
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: sym_ref_def_A (354)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_ref_def_C (158)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_weak_ref_def_A (368)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x40)
+// CHECK: WeakRef (0x40)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_weak_ref_def_B (220)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_lazy_ref_B (190)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_lazy_ref_C (128)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_symbol_resolver_A (257)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x100)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_desc_flags (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x40)
+// CHECK: WeakRef (0x40)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_private_ext_B (172)
+// CHECK: PrivateExtern
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_private_ext_C (110)
+// CHECK: PrivateExtern
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_weak_def_A (339)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x80)
+// CHECK: WeakDef (0x80)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_weak_def_B (205)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x80)
+// CHECK: WeakDef (0x80)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_weak_def_C (143)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x80)
+// CHECK: WeakDef (0x80)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_lazy_ref_A (299)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: ReferenceFlagUndefinedLazy (0x1)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_lazy_ref_D (81)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_lazy_ref_E (34)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: ReferenceFlagUndefinedLazy (0x1)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_no_dead_strip_A (279)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_private_ext_A (239)
+// CHECK: PrivateExtern
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_private_ext_D (63)
+// CHECK: PrivateExtern
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_private_ext_E (16)
+// CHECK: PrivateExtern
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_ref_A (314)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: ReferenceFlagPrivateUndefinedNonLazy (0x4)
+// CHECK: Flags [ (0x1230)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ReferencedDynamically (0x10)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_ref_def_D (96)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_ref_def_E (49)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x20)
+// CHECK: NoDeadStrip (0x20)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_weak_ref_A (324)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x40)
+// CHECK: WeakRef (0x40)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 192
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x0
+// CHECK: fileoff: 340
+// CHECK: filesize: 0
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 8
+// CHECK: iextdefsym: 8
+// CHECK: nextdefsym: 5
+// CHECK: iundefsym: 13
+// CHECK: nundefsym: 11
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/symbol-indirect.s b/test/MC/MachO/symbol-indirect.s
index 1cdeed1f4ef6..c0012b703553 100644
--- a/test/MC/MachO/symbol-indirect.s
+++ b/test/MC/MachO/symbol-indirect.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
// FIXME: We are missing a lot of diagnostics on this kind of stuff which the
// assembler has.
@@ -69,200 +69,248 @@ sym_nlp_G:
.indirect_symbol sym_nlp_G
.long 0
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 364)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 260)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 40)
-// CHECK: ('file_offset', 392)
-// CHECK: ('file_size', 40)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 3)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 392)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 1
-// CHECK: (('section_name', '__la_symbol_ptr\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 20)
-// CHECK: ('offset', 392)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x7)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 2
-// CHECK: (('section_name', '__nl_symbol_ptr\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 20)
-// CHECK: ('size', 20)
-// CHECK: ('offset', 412)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x6)
- // FIXME: Enable this when fixed!
-// CHECX: ('reserved1', 5)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 472)
-// CHECK: ('nsyms', 10)
-// CHECK: ('stroff', 592)
-// CHECK: ('strsize', 104)
-// CHECK: ('_string_data', '\x00sym_lsp_G\x00sym_nlp_G\x00sym_lsp_E\x00sym_nlp_E\x00sym_lsp_C\x00sym_nlp_C\x00sym_lsp_B\x00sym_nlp_B\x00sym_lsp_A\x00sym_nlp_A\x00\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 41)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 8)
-// CHECK: ('_string', 'sym_lsp_C')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 51)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 3)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 28)
-// CHECK: ('_string', 'sym_nlp_C')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 16)
-// CHECK: ('_string', 'sym_lsp_G')
-// CHECK: ),
-// CHECK: # Symbol 3
-// CHECK: (('n_strx', 11)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 3)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 36)
-// CHECK: ('_string', 'sym_nlp_G')
-// CHECK: ),
-// CHECK: # Symbol 4
-// CHECK: (('n_strx', 81)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_lsp_A')
-// CHECK: ),
-// CHECK: # Symbol 5
-// CHECK: (('n_strx', 61)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 1)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_lsp_B')
-// CHECK: ),
-// CHECK: # Symbol 6
-// CHECK: (('n_strx', 21)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 1)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_lsp_E')
-// CHECK: ),
-// CHECK: # Symbol 7
-// CHECK: (('n_strx', 91)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_nlp_A')
-// CHECK: ),
-// CHECK: # Symbol 8
-// CHECK: (('n_strx', 71)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_nlp_B')
-// CHECK: ),
-// CHECK: # Symbol 9
-// CHECK: (('n_strx', 31)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_nlp_E')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 2)
-// CHECK: ('iextdefsym', 2)
-// CHECK: ('nextdefsym', 2)
-// CHECK: ('iundefsym', 4)
-// CHECK: ('nundefsym', 6)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 432)
-// CHECK: ('nindirectsyms', 10)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: # Indirect Symbol 0
-// CHECK: (('symbol_index', 0x5),),
-// CHECK: # Indirect Symbol 1
-// CHECK: (('symbol_index', 0x4),),
-// CHECK: # Indirect Symbol 2
-// CHECK: (('symbol_index', 0x0),),
-// CHECK: # Indirect Symbol 3
-// CHECK: (('symbol_index', 0x6),),
-// CHECK: # Indirect Symbol 4
-// CHECK: (('symbol_index', 0x2),),
-// CHECK: # Indirect Symbol 5
-// CHECK: (('symbol_index', 0x8),),
-// CHECK: # Indirect Symbol 6
-// CHECK: (('symbol_index', 0x7),),
-// CHECK: # Indirect Symbol 7
-// CHECK: (('symbol_index', 0x80000000),),
-// CHECK: # Indirect Symbol 8
-// CHECK: (('symbol_index', 0x9),),
-// CHECK: # Indirect Symbol 9
-// CHECK: (('symbol_index', 0x3),),
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 380
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 408
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x14
+// CHECK: Offset: 408
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x7
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 2
+// CHECK: Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x14
+// CHECK: Size: 0x14
+// CHECK: Offset: 428
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x6
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x5
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: sym_lsp_C (41)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __la_symbol_ptr (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x8
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_nlp_C (51)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __nl_symbol_ptr (0x3)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x1C
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_lsp_G (1)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __la_symbol_ptr (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x10
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_nlp_G (11)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __nl_symbol_ptr (0x3)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x24
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_lsp_A (81)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_lsp_B (61)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: ReferenceFlagUndefinedLazy (0x1)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_lsp_E (21)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: ReferenceFlagUndefinedLazy (0x1)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_nlp_A (91)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_nlp_B (71)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_nlp_E (31)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 10
+// CHECK: Symbols [
+// CHECK: Entry {
+// CHECK: Entry Index: 0
+// CHECK: Symbol Index: 0x5
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 1
+// CHECK: Symbol Index: 0x4
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 2
+// CHECK: Symbol Index: 0x0
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 3
+// CHECK: Symbol Index: 0x6
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 4
+// CHECK: Symbol Index: 0x2
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 5
+// CHECK: Symbol Index: 0x8
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 6
+// CHECK: Symbol Index: 0x7
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 7
+// CHECK: Symbol Index: 0x80000000
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 8
+// CHECK: Symbol Index: 0x9
+// CHECK: }
+// CHECK: Entry {
+// CHECK: Entry Index: 9
+// CHECK: Symbol Index: 0x3
+// CHECK: }
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 260
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x28
+// CHECK: fileoff: 408
+// CHECK: filesize: 40
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 3
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 2
+// CHECK: iextdefsym: 2
+// CHECK: nextdefsym: 2
+// CHECK: iundefsym: 4
+// CHECK: nundefsym: 6
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 448
+// CHECK: nindirectsyms: 10
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/symbols-1.s b/test/MC/MachO/symbols-1.s
index 8b663dc71762..bec31bb73459 100644
--- a/test/MC/MachO/symbols-1.s
+++ b/test/MC/MachO/symbols-1.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck -check-prefix CHECK-X86_32 %s
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck -check-prefix CHECK-X86_64 %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck -check-prefix CHECK-X86_32 %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck -check-prefix CHECK-X86_64 %s
sym_local_B:
.globl sym_globl_def_B
@@ -17,294 +17,330 @@ sym_globl_def_C:
Lsym_asm_temp:
.long 0
-// CHECK-X86_32: ('cputype', 7)
-// CHECK-X86_32: ('cpusubtype', 3)
-// CHECK-X86_32: ('filetype', 1)
-// CHECK-X86_32: ('num_load_commands', 3)
-// CHECK-X86_32: ('load_commands_size', 228)
-// CHECK-X86_32: ('flag', 0)
-// CHECK-X86_32: ('load_commands', [
-// CHECK-X86_32: # Load Command 0
-// CHECK-X86_32: (('command', 1)
-// CHECK-X86_32: ('size', 124)
-// CHECK-X86_32: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_32: ('vm_addr', 0)
-// CHECK-X86_32: ('vm_size', 4)
-// CHECK-X86_32: ('file_offset', 256)
-// CHECK-X86_32: ('file_size', 4)
-// CHECK-X86_32: ('maxprot', 7)
-// CHECK-X86_32: ('initprot', 7)
-// CHECK-X86_32: ('num_sections', 1)
-// CHECK-X86_32: ('flags', 0)
-// CHECK-X86_32: ('sections', [
-// CHECK-X86_32: # Section 0
-// CHECK-X86_32: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_32: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_32: ('address', 0)
-// CHECK-X86_32: ('size', 4)
-// CHECK-X86_32: ('offset', 256)
-// CHECK-X86_32: ('alignment', 0)
-// CHECK-X86_32: ('reloc_offset', 0)
-// CHECK-X86_32: ('num_reloc', 0)
-// CHECK-X86_32: ('flags', 0x80000000)
-// CHECK-X86_32: ('reserved1', 0)
-// CHECK-X86_32: ('reserved2', 0)
-// CHECK-X86_32: ),
-// CHECK-X86_32: ])
-// CHECK-X86_32: ),
-// CHECK-X86_32: # Load Command 1
-// CHECK-X86_32: (('command', 2)
-// CHECK-X86_32: ('size', 24)
-// CHECK-X86_32: ('symoff', 260)
-// CHECK-X86_32: ('nsyms', 9)
-// CHECK-X86_32: ('stroff', 368)
-// CHECK-X86_32: ('strsize', 140)
-// CHECK-X86_32: ('_string_data', '\x00sym_local_C\x00sym_globl_undef_C\x00sym_globl_def_C\x00sym_local_B\x00sym_globl_undef_B\x00sym_globl_def_B\x00sym_local_A\x00sym_globl_undef_A\x00sym_globl_def_A\x00\x00')
-// CHECK-X86_32: ('_symbols', [
-// CHECK-X86_32: # Symbol 0
-// CHECK-X86_32: (('n_strx', 47)
-// CHECK-X86_32: ('n_type', 0xe)
-// CHECK-X86_32: ('n_sect', 1)
-// CHECK-X86_32: ('n_desc', 0)
-// CHECK-X86_32: ('n_value', 0)
-// CHECK-X86_32: ('_string', 'sym_local_B')
-// CHECK-X86_32: ),
-// CHECK-X86_32: # Symbol 1
-// CHECK-X86_32: (('n_strx', 93)
-// CHECK-X86_32: ('n_type', 0xe)
-// CHECK-X86_32: ('n_sect', 1)
-// CHECK-X86_32: ('n_desc', 0)
-// CHECK-X86_32: ('n_value', 0)
-// CHECK-X86_32: ('_string', 'sym_local_A')
-// CHECK-X86_32: ),
-// CHECK-X86_32: # Symbol 2
-// CHECK-X86_32: (('n_strx', 1)
-// CHECK-X86_32: ('n_type', 0xe)
-// CHECK-X86_32: ('n_sect', 1)
-// CHECK-X86_32: ('n_desc', 0)
-// CHECK-X86_32: ('n_value', 0)
-// CHECK-X86_32: ('_string', 'sym_local_C')
-// CHECK-X86_32: ),
-// CHECK-X86_32: # Symbol 3
-// CHECK-X86_32: (('n_strx', 123)
-// CHECK-X86_32: ('n_type', 0xf)
-// CHECK-X86_32: ('n_sect', 1)
-// CHECK-X86_32: ('n_desc', 0)
-// CHECK-X86_32: ('n_value', 0)
-// CHECK-X86_32: ('_string', 'sym_globl_def_A')
-// CHECK-X86_32: ),
-// CHECK-X86_32: # Symbol 4
-// CHECK-X86_32: (('n_strx', 77)
-// CHECK-X86_32: ('n_type', 0xf)
-// CHECK-X86_32: ('n_sect', 1)
-// CHECK-X86_32: ('n_desc', 0)
-// CHECK-X86_32: ('n_value', 0)
-// CHECK-X86_32: ('_string', 'sym_globl_def_B')
-// CHECK-X86_32: ),
-// CHECK-X86_32: # Symbol 5
-// CHECK-X86_32: (('n_strx', 31)
-// CHECK-X86_32: ('n_type', 0xf)
-// CHECK-X86_32: ('n_sect', 1)
-// CHECK-X86_32: ('n_desc', 0)
-// CHECK-X86_32: ('n_value', 0)
-// CHECK-X86_32: ('_string', 'sym_globl_def_C')
-// CHECK-X86_32: ),
-// CHECK-X86_32: # Symbol 6
-// CHECK-X86_32: (('n_strx', 105)
-// CHECK-X86_32: ('n_type', 0x1)
-// CHECK-X86_32: ('n_sect', 0)
-// CHECK-X86_32: ('n_desc', 0)
-// CHECK-X86_32: ('n_value', 0)
-// CHECK-X86_32: ('_string', 'sym_globl_undef_A')
-// CHECK-X86_32: ),
-// CHECK-X86_32: # Symbol 7
-// CHECK-X86_32: (('n_strx', 59)
-// CHECK-X86_32: ('n_type', 0x1)
-// CHECK-X86_32: ('n_sect', 0)
-// CHECK-X86_32: ('n_desc', 0)
-// CHECK-X86_32: ('n_value', 0)
-// CHECK-X86_32: ('_string', 'sym_globl_undef_B')
-// CHECK-X86_32: ),
-// CHECK-X86_32: # Symbol 8
-// CHECK-X86_32: (('n_strx', 13)
-// CHECK-X86_32: ('n_type', 0x1)
-// CHECK-X86_32: ('n_sect', 0)
-// CHECK-X86_32: ('n_desc', 0)
-// CHECK-X86_32: ('n_value', 0)
-// CHECK-X86_32: ('_string', 'sym_globl_undef_C')
-// CHECK-X86_32: ),
-// CHECK-X86_32: ])
-// CHECK-X86_32: ),
-// CHECK-X86_32: # Load Command 2
-// CHECK-X86_32: (('command', 11)
-// CHECK-X86_32: ('size', 80)
-// CHECK-X86_32: ('ilocalsym', 0)
-// CHECK-X86_32: ('nlocalsym', 3)
-// CHECK-X86_32: ('iextdefsym', 3)
-// CHECK-X86_32: ('nextdefsym', 3)
-// CHECK-X86_32: ('iundefsym', 6)
-// CHECK-X86_32: ('nundefsym', 3)
-// CHECK-X86_32: ('tocoff', 0)
-// CHECK-X86_32: ('ntoc', 0)
-// CHECK-X86_32: ('modtaboff', 0)
-// CHECK-X86_32: ('nmodtab', 0)
-// CHECK-X86_32: ('extrefsymoff', 0)
-// CHECK-X86_32: ('nextrefsyms', 0)
-// CHECK-X86_32: ('indirectsymoff', 0)
-// CHECK-X86_32: ('nindirectsyms', 0)
-// CHECK-X86_32: ('extreloff', 0)
-// CHECK-X86_32: ('nextrel', 0)
-// CHECK-X86_32: ('locreloff', 0)
-// CHECK-X86_32: ('nlocrel', 0)
-// CHECK-X86_32: ('_indirect_symbols', [
-// CHECK-X86_32: ])
-// CHECK-X86_32: ),
-// CHECK-X86_32: ])
+// CHECK-X86_32: File: <stdin>
+// CHECK-X86_32: Format: Mach-O 32-bit i386
+// CHECK-X86_32: Arch: i386
+// CHECK-X86_32: AddressSize: 32bit
+// CHECK-X86_32: MachHeader {
+// CHECK-X86_32: Magic: Magic (0xFEEDFACE)
+// CHECK-X86_32: CpuType: X86 (0x7)
+// CHECK-X86_32: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK-X86_32: FileType: Relocatable (0x1)
+// CHECK-X86_32: NumOfLoadCommands: 4
+// CHECK-X86_32: SizeOfLoadCommands: 244
+// CHECK-X86_32: Flags [ (0x0)
+// CHECK-X86_32: ]
+// CHECK-X86_32: }
+// CHECK-X86_32: Sections [
+// CHECK-X86_32: Section {
+// CHECK-X86_32: Index: 0
+// CHECK-X86_32: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_32: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_32: Address: 0x0
+// CHECK-X86_32: Size: 0x4
+// CHECK-X86_32: Offset: 272
+// CHECK-X86_32: Alignment: 0
+// CHECK-X86_32: RelocationOffset: 0x0
+// CHECK-X86_32: RelocationCount: 0
+// CHECK-X86_32: Type: 0x0
+// CHECK-X86_32: Attributes [ (0x800000)
+// CHECK-X86_32: PureInstructions (0x800000)
+// CHECK-X86_32: ]
+// CHECK-X86_32: Reserved1: 0x0
+// CHECK-X86_32: Reserved2: 0x0
+// CHECK-X86_32: }
+// CHECK-X86_32: ]
+// CHECK-X86_32: Relocations [
+// CHECK-X86_32: ]
+// CHECK-X86_32: Symbols [
+// CHECK-X86_32: Symbol {
+// CHECK-X86_32: Name: sym_local_B (47)
+// CHECK-X86_32: Type: Section (0xE)
+// CHECK-X86_32: Section: __text (0x1)
+// CHECK-X86_32: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32: Flags [ (0x0)
+// CHECK-X86_32: ]
+// CHECK-X86_32: Value: 0x0
+// CHECK-X86_32: }
+// CHECK-X86_32: Symbol {
+// CHECK-X86_32: Name: sym_local_A (93)
+// CHECK-X86_32: Type: Section (0xE)
+// CHECK-X86_32: Section: __text (0x1)
+// CHECK-X86_32: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32: Flags [ (0x0)
+// CHECK-X86_32: ]
+// CHECK-X86_32: Value: 0x0
+// CHECK-X86_32: }
+// CHECK-X86_32: Symbol {
+// CHECK-X86_32: Name: sym_local_C (1)
+// CHECK-X86_32: Type: Section (0xE)
+// CHECK-X86_32: Section: __text (0x1)
+// CHECK-X86_32: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32: Flags [ (0x0)
+// CHECK-X86_32: ]
+// CHECK-X86_32: Value: 0x0
+// CHECK-X86_32: }
+// CHECK-X86_32: Symbol {
+// CHECK-X86_32: Name: sym_globl_def_A (123)
+// CHECK-X86_32: Extern
+// CHECK-X86_32: Type: Section (0xE)
+// CHECK-X86_32: Section: __text (0x1)
+// CHECK-X86_32: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32: Flags [ (0x0)
+// CHECK-X86_32: ]
+// CHECK-X86_32: Value: 0x0
+// CHECK-X86_32: }
+// CHECK-X86_32: Symbol {
+// CHECK-X86_32: Name: sym_globl_def_B (77)
+// CHECK-X86_32: Extern
+// CHECK-X86_32: Type: Section (0xE)
+// CHECK-X86_32: Section: __text (0x1)
+// CHECK-X86_32: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32: Flags [ (0x0)
+// CHECK-X86_32: ]
+// CHECK-X86_32: Value: 0x0
+// CHECK-X86_32: }
+// CHECK-X86_32: Symbol {
+// CHECK-X86_32: Name: sym_globl_def_C (31)
+// CHECK-X86_32: Extern
+// CHECK-X86_32: Type: Section (0xE)
+// CHECK-X86_32: Section: __text (0x1)
+// CHECK-X86_32: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32: Flags [ (0x0)
+// CHECK-X86_32: ]
+// CHECK-X86_32: Value: 0x0
+// CHECK-X86_32: }
+// CHECK-X86_32: Symbol {
+// CHECK-X86_32: Name: sym_globl_undef_A (105)
+// CHECK-X86_32: Extern
+// CHECK-X86_32: Type: Undef (0x0)
+// CHECK-X86_32: Section: (0x0)
+// CHECK-X86_32: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32: Flags [ (0x0)
+// CHECK-X86_32: ]
+// CHECK-X86_32: Value: 0x0
+// CHECK-X86_32: }
+// CHECK-X86_32: Symbol {
+// CHECK-X86_32: Name: sym_globl_undef_B (59)
+// CHECK-X86_32: Extern
+// CHECK-X86_32: Type: Undef (0x0)
+// CHECK-X86_32: Section: (0x0)
+// CHECK-X86_32: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32: Flags [ (0x0)
+// CHECK-X86_32: ]
+// CHECK-X86_32: Value: 0x0
+// CHECK-X86_32: }
+// CHECK-X86_32: Symbol {
+// CHECK-X86_32: Name: sym_globl_undef_C (13)
+// CHECK-X86_32: Extern
+// CHECK-X86_32: Type: Undef (0x0)
+// CHECK-X86_32: Section: (0x0)
+// CHECK-X86_32: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32: Flags [ (0x0)
+// CHECK-X86_32: ]
+// CHECK-X86_32: Value: 0x0
+// CHECK-X86_32: }
+// CHECK-X86_32: ]
+// CHECK-X86_32: Indirect Symbols {
+// CHECK-X86_32: Number: 0
+// CHECK-X86_32: Symbols [
+// CHECK-X86_32: ]
+// CHECK-X86_32: }
+// CHECK-X86_32: Segment {
+// CHECK-X86_32: Cmd: LC_SEGMENT
+// CHECK-X86_32: Name:
+// CHECK-X86_32: Size: 124
+// CHECK-X86_32: vmaddr: 0x0
+// CHECK-X86_32: vmsize: 0x4
+// CHECK-X86_32: fileoff: 272
+// CHECK-X86_32: filesize: 4
+// CHECK-X86_32: maxprot: rwx
+// CHECK-X86_32: initprot: rwx
+// CHECK-X86_32: nsects: 1
+// CHECK-X86_32: flags: 0x0
+// CHECK-X86_32: }
+// CHECK-X86_32: Dysymtab {
+// CHECK-X86_32: ilocalsym: 0
+// CHECK-X86_32: nlocalsym: 3
+// CHECK-X86_32: iextdefsym: 3
+// CHECK-X86_32: nextdefsym: 3
+// CHECK-X86_32: iundefsym: 6
+// CHECK-X86_32: nundefsym: 3
+// CHECK-X86_32: tocoff: 0
+// CHECK-X86_32: ntoc: 0
+// CHECK-X86_32: modtaboff: 0
+// CHECK-X86_32: nmodtab: 0
+// CHECK-X86_32: extrefsymoff: 0
+// CHECK-X86_32: nextrefsyms: 0
+// CHECK-X86_32: indirectsymoff: 0
+// CHECK-X86_32: nindirectsyms: 0
+// CHECK-X86_32: extreloff: 0
+// CHECK-X86_32: nextrel: 0
+// CHECK-X86_32: locreloff: 0
+// CHECK-X86_32: nlocrel: 0
+// CHECK-X86_32: }
-// CHECK-X86_64: ('cputype', 16777223)
-// CHECK-X86_64: ('cpusubtype', 3)
-// CHECK-X86_64: ('filetype', 1)
-// CHECK-X86_64: ('num_load_commands', 3)
-// CHECK-X86_64: ('load_commands_size', 256)
-// CHECK-X86_64: ('flag', 0)
-// CHECK-X86_64: ('reserved', 0)
-// CHECK-X86_64: ('load_commands', [
-// CHECK-X86_64: # Load Command 0
-// CHECK-X86_64: (('command', 25)
-// CHECK-X86_64: ('size', 152)
-// CHECK-X86_64: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64: ('vm_addr', 0)
-// CHECK-X86_64: ('vm_size', 4)
-// CHECK-X86_64: ('file_offset', 288)
-// CHECK-X86_64: ('file_size', 4)
-// CHECK-X86_64: ('maxprot', 7)
-// CHECK-X86_64: ('initprot', 7)
-// CHECK-X86_64: ('num_sections', 1)
-// CHECK-X86_64: ('flags', 0)
-// CHECK-X86_64: ('sections', [
-// CHECK-X86_64: # Section 0
-// CHECK-X86_64: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64: ('address', 0)
-// CHECK-X86_64: ('size', 4)
-// CHECK-X86_64: ('offset', 288)
-// CHECK-X86_64: ('alignment', 0)
-// CHECK-X86_64: ('reloc_offset', 0)
-// CHECK-X86_64: ('num_reloc', 0)
-// CHECK-X86_64: ('flags', 0x80000000)
-// CHECK-X86_64: ('reserved1', 0)
-// CHECK-X86_64: ('reserved2', 0)
-// CHECK-X86_64: ('reserved3', 0)
-// CHECK-X86_64: ),
-// CHECK-X86_64: ('_relocations', [
-// CHECK-X86_64: ])
-// CHECK-X86_64: ])
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Load Command 1
-// CHECK-X86_64: (('command', 2)
-// CHECK-X86_64: ('size', 24)
-// CHECK-X86_64: ('symoff', 292)
-// CHECK-X86_64: ('nsyms', 9)
-// CHECK-X86_64: ('stroff', 436)
-// CHECK-X86_64: ('strsize', 140)
-// CHECK-X86_64: ('_string_data', '\x00sym_local_C\x00sym_globl_undef_C\x00sym_globl_def_C\x00sym_local_B\x00sym_globl_undef_B\x00sym_globl_def_B\x00sym_local_A\x00sym_globl_undef_A\x00sym_globl_def_A\x00\x00')
-// CHECK-X86_64: ('_symbols', [
-// CHECK-X86_64: # Symbol 0
-// CHECK-X86_64: (('n_strx', 47)
-// CHECK-X86_64: ('n_type', 0xe)
-// CHECK-X86_64: ('n_sect', 1)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 0)
-// CHECK-X86_64: ('_string', 'sym_local_B')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 1
-// CHECK-X86_64: (('n_strx', 93)
-// CHECK-X86_64: ('n_type', 0xe)
-// CHECK-X86_64: ('n_sect', 1)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 0)
-// CHECK-X86_64: ('_string', 'sym_local_A')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 2
-// CHECK-X86_64: (('n_strx', 1)
-// CHECK-X86_64: ('n_type', 0xe)
-// CHECK-X86_64: ('n_sect', 1)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 0)
-// CHECK-X86_64: ('_string', 'sym_local_C')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 3
-// CHECK-X86_64: (('n_strx', 123)
-// CHECK-X86_64: ('n_type', 0xf)
-// CHECK-X86_64: ('n_sect', 1)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 0)
-// CHECK-X86_64: ('_string', 'sym_globl_def_A')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 4
-// CHECK-X86_64: (('n_strx', 77)
-// CHECK-X86_64: ('n_type', 0xf)
-// CHECK-X86_64: ('n_sect', 1)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 0)
-// CHECK-X86_64: ('_string', 'sym_globl_def_B')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 5
-// CHECK-X86_64: (('n_strx', 31)
-// CHECK-X86_64: ('n_type', 0xf)
-// CHECK-X86_64: ('n_sect', 1)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 0)
-// CHECK-X86_64: ('_string', 'sym_globl_def_C')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 6
-// CHECK-X86_64: (('n_strx', 105)
-// CHECK-X86_64: ('n_type', 0x1)
-// CHECK-X86_64: ('n_sect', 0)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 0)
-// CHECK-X86_64: ('_string', 'sym_globl_undef_A')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 7
-// CHECK-X86_64: (('n_strx', 59)
-// CHECK-X86_64: ('n_type', 0x1)
-// CHECK-X86_64: ('n_sect', 0)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 0)
-// CHECK-X86_64: ('_string', 'sym_globl_undef_B')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 8
-// CHECK-X86_64: (('n_strx', 13)
-// CHECK-X86_64: ('n_type', 0x1)
-// CHECK-X86_64: ('n_sect', 0)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 0)
-// CHECK-X86_64: ('_string', 'sym_globl_undef_C')
-// CHECK-X86_64: ),
-// CHECK-X86_64: ])
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Load Command 2
-// CHECK-X86_64: (('command', 11)
-// CHECK-X86_64: ('size', 80)
-// CHECK-X86_64: ('ilocalsym', 0)
-// CHECK-X86_64: ('nlocalsym', 3)
-// CHECK-X86_64: ('iextdefsym', 3)
-// CHECK-X86_64: ('nextdefsym', 3)
-// CHECK-X86_64: ('iundefsym', 6)
-// CHECK-X86_64: ('nundefsym', 3)
-// CHECK-X86_64: ('tocoff', 0)
-// CHECK-X86_64: ('ntoc', 0)
-// CHECK-X86_64: ('modtaboff', 0)
-// CHECK-X86_64: ('nmodtab', 0)
-// CHECK-X86_64: ('extrefsymoff', 0)
-// CHECK-X86_64: ('nextrefsyms', 0)
-// CHECK-X86_64: ('indirectsymoff', 0)
-// CHECK-X86_64: ('nindirectsyms', 0)
-// CHECK-X86_64: ('extreloff', 0)
-// CHECK-X86_64: ('nextrel', 0)
-// CHECK-X86_64: ('locreloff', 0)
-// CHECK-X86_64: ('nlocrel', 0)
-// CHECK-X86_64: ('_indirect_symbols', [
-// CHECK-X86_64: ])
-// CHECK-X86_64: ),
-// CHECK-X86_64: ])
+// CHECK-X86_64: File: <stdin>
+// CHECK-X86_64: Format: Mach-O 64-bit x86-64
+// CHECK-X86_64: Arch: x86_64
+// CHECK-X86_64: AddressSize: 64bit
+// CHECK-X86_64: MachHeader {
+// CHECK-X86_64: Magic: Magic64 (0xFEEDFACF)
+// CHECK-X86_64: CpuType: X86-64 (0x1000007)
+// CHECK-X86_64: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK-X86_64: FileType: Relocatable (0x1)
+// CHECK-X86_64: NumOfLoadCommands: 4
+// CHECK-X86_64: SizeOfLoadCommands: 272
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Reserved: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Sections [
+// CHECK-X86_64: Section {
+// CHECK-X86_64: Index: 0
+// CHECK-X86_64: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_64: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_64: Address: 0x0
+// CHECK-X86_64: Size: 0x4
+// CHECK-X86_64: Offset: 304
+// CHECK-X86_64: Alignment: 0
+// CHECK-X86_64: RelocationOffset: 0x0
+// CHECK-X86_64: RelocationCount: 0
+// CHECK-X86_64: Type: 0x0
+// CHECK-X86_64: Attributes [ (0x800000)
+// CHECK-X86_64: PureInstructions (0x800000)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Reserved1: 0x0
+// CHECK-X86_64: Reserved2: 0x0
+// CHECK-X86_64: Reserved3: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: ]
+// CHECK-X86_64: Relocations [
+// CHECK-X86_64: ]
+// CHECK-X86_64: Symbols [
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: sym_local_B (47)
+// CHECK-X86_64: Type: Section (0xE)
+// CHECK-X86_64: Section: __text (0x1)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: sym_local_A (93)
+// CHECK-X86_64: Type: Section (0xE)
+// CHECK-X86_64: Section: __text (0x1)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: sym_local_C (1)
+// CHECK-X86_64: Type: Section (0xE)
+// CHECK-X86_64: Section: __text (0x1)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: sym_globl_def_A (123)
+// CHECK-X86_64: Extern
+// CHECK-X86_64: Type: Section (0xE)
+// CHECK-X86_64: Section: __text (0x1)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: sym_globl_def_B (77)
+// CHECK-X86_64: Extern
+// CHECK-X86_64: Type: Section (0xE)
+// CHECK-X86_64: Section: __text (0x1)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: sym_globl_def_C (31)
+// CHECK-X86_64: Extern
+// CHECK-X86_64: Type: Section (0xE)
+// CHECK-X86_64: Section: __text (0x1)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: sym_globl_undef_A (105)
+// CHECK-X86_64: Extern
+// CHECK-X86_64: Type: Undef (0x0)
+// CHECK-X86_64: Section: (0x0)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: sym_globl_undef_B (59)
+// CHECK-X86_64: Extern
+// CHECK-X86_64: Type: Undef (0x0)
+// CHECK-X86_64: Section: (0x0)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: sym_globl_undef_C (13)
+// CHECK-X86_64: Extern
+// CHECK-X86_64: Type: Undef (0x0)
+// CHECK-X86_64: Section: (0x0)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: ]
+// CHECK-X86_64: Indirect Symbols {
+// CHECK-X86_64: Number: 0
+// CHECK-X86_64: Symbols [
+// CHECK-X86_64: ]
+// CHECK-X86_64: }
+// CHECK-X86_64: Segment {
+// CHECK-X86_64: Cmd: LC_SEGMENT_64
+// CHECK-X86_64: Name:
+// CHECK-X86_64: Size: 152
+// CHECK-X86_64: vmaddr: 0x0
+// CHECK-X86_64: vmsize: 0x4
+// CHECK-X86_64: fileoff: 304
+// CHECK-X86_64: filesize: 4
+// CHECK-X86_64: maxprot: rwx
+// CHECK-X86_64: initprot: rwx
+// CHECK-X86_64: nsects: 1
+// CHECK-X86_64: flags: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Dysymtab {
+// CHECK-X86_64: ilocalsym: 0
+// CHECK-X86_64: nlocalsym: 3
+// CHECK-X86_64: iextdefsym: 3
+// CHECK-X86_64: nextdefsym: 3
+// CHECK-X86_64: iundefsym: 6
+// CHECK-X86_64: nundefsym: 3
+// CHECK-X86_64: tocoff: 0
+// CHECK-X86_64: ntoc: 0
+// CHECK-X86_64: modtaboff: 0
+// CHECK-X86_64: nmodtab: 0
+// CHECK-X86_64: extrefsymoff: 0
+// CHECK-X86_64: nextrefsyms: 0
+// CHECK-X86_64: indirectsymoff: 0
+// CHECK-X86_64: nindirectsyms: 0
+// CHECK-X86_64: extreloff: 0
+// CHECK-X86_64: nextrel: 0
+// CHECK-X86_64: locreloff: 0
+// CHECK-X86_64: nlocrel: 0
+// CHECK-X86_64: }
diff --git a/test/MC/MachO/tbss.s b/test/MC/MachO/tbss.s
index 1c23aa548d76..a25772909268 100644
--- a/test/MC/MachO/tbss.s
+++ b/test/MC/MachO/tbss.s
@@ -1,114 +1,122 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
.tbss _a$tlv$init, 4
.tbss _b$tlv$init, 4, 3
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 336)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 25)
-// CHECK: ('size', 232)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 12)
-// CHECK: ('file_offset', 368)
-// CHECK: ('file_size', 0)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 368)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '')
-// CHECK: # Section 1
-// CHECK: (('section_name', '__thread_bss\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 12)
-// CHECK: ('offset', 0)
-// CHECK: ('alignment', 3)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x12)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', 'cffaedfe 07000001 03000000')
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 368)
-// CHECK: ('nsyms', 2)
-// CHECK: ('stroff', 400)
-// CHECK: ('strsize', 28)
-// CHECK: ('_string_data', '\x00_b$tlv$init\x00_a$tlv$init\x00\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 13)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_a$tlv$init')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 8)
-// CHECK: ('_string', '_b$tlv$init')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 2)
-// CHECK: ('iextdefsym', 2)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 2)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 3
+// CHECK: SizeOfLoadCommands: 336
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 368
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __thread_bss (5F 5F 74 68 72 65 61 64 5F 62 73 73 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0xC
+// CHECK: Offset: 0
+// CHECK: Alignment: 3
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x12
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: CFFAEDFE 07000001 03000000 |............|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _a$tlv$init (13)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _b$tlv$init (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x8
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 232
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0xC
+// CHECK: fileoff: 368
+// CHECK: filesize: 0
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 2
+// CHECK: iextdefsym: 2
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 2
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/tdata.s b/test/MC/MachO/tdata.s
index 4829ca73a519..855ce54c0abe 100644
--- a/test/MC/MachO/tdata.s
+++ b/test/MC/MachO/tdata.s
@@ -1,106 +1,113 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
.tdata
_a$tlv$init:
.long 4
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 336)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 25)
-// CHECK: ('size', 232)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 4)
-// CHECK: ('file_offset', 368)
-// CHECK: ('file_size', 4)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 368)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '')
-// CHECK: # Section 1
-// CHECK: (('section_name', '__thread_data\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 4)
-// CHECK: ('offset', 368)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x11)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '04000000')
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 372)
-// CHECK: ('nsyms', 1)
-// CHECK: ('stroff', 388)
-// CHECK: ('strsize', 16)
-// CHECK: ('_string_data', '\x00_a$tlv$init\x00\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_a$tlv$init')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 1)
-// CHECK: ('iextdefsym', 1)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 1)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 3
+// CHECK: SizeOfLoadCommands: 336
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 368
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __thread_data (5F 5F 74 68 72 65 61 64 5F 64 61 74 61 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x4
+// CHECK: Offset: 368
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x11
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 04000000 |....|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _a$tlv$init (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 232
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x4
+// CHECK: fileoff: 368
+// CHECK: filesize: 4
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 1
+// CHECK: iextdefsym: 1
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 1
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/temp-labels.s b/test/MC/MachO/temp-labels.s
index ac0f6203aef1..12dbae5531a6 100644
--- a/test/MC/MachO/temp-labels.s
+++ b/test/MC/MachO/temp-labels.s
@@ -1,33 +1,27 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -save-temp-labels -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -save-temp-labels -o - | llvm-readobj -t | FileCheck %s
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 296)
-// CHECK: ('nsyms', 2)
-// CHECK: ('stroff', 328)
-// CHECK: ('strsize', 8)
-// CHECK: ('_string_data', '\x00_f0\x00L0\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_f0')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 5)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 4)
-// CHECK: ('_string', 'L0')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
_f0:
.long 0
L0:
.long 0
+
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _f0 (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: L0 (5)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x4
+// CHECK: }
+// CHECK: ]
diff --git a/test/MC/MachO/thread_init_func.s b/test/MC/MachO/thread_init_func.s
index d3ead83fd255..912d7824140f 100644
--- a/test/MC/MachO/thread_init_func.s
+++ b/test/MC/MachO/thread_init_func.s
@@ -1,63 +1,77 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -macho-segment | FileCheck %s
.thread_init_func
.quad 0
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 232)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 25)
-// CHECK: ('size', 232)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 8)
-// CHECK: ('file_offset', 264)
-// CHECK: ('file_size', 8)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 264)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '')
-// CHECK: # Section 1
-// CHECK: (('section_name', '__thread_init\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 8)
-// CHECK: ('offset', 264)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x15)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '00000000 00000000')
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 1
+// CHECK: SizeOfLoadCommands: 232
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 264
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __thread_init (5F 5F 74 68 72 65 61 64 5F 69 6E 69 74 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x8
+// CHECK: Offset: 264
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x15
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 00000000 00000000 |........|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 232
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x8
+// CHECK: fileoff: 264
+// CHECK: filesize: 8
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
diff --git a/test/MC/MachO/tls.s b/test/MC/MachO/tls.s
index 33e23a9c4276..438c7f04c8ed 100644
--- a/test/MC/MachO/tls.s
+++ b/test/MC/MachO/tls.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
.section __TEXT,__text,regular,pure_instructions
.section __DATA,__thread_data,thread_local_regular
@@ -45,226 +45,246 @@ _b:
.subsections_via_symbols
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 496)
-// CHECK: ('flag', 8192)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 25)
-// CHECK: ('size', 392)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 112)
-// CHECK: ('file_offset', 528)
-// CHECK: ('file_size', 104)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 4)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 528)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '')
-// CHECK: # Section 1
-// CHECK: (('section_name', '__thread_data\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 8)
-// CHECK: ('offset', 528)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x11)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '04000000 05000000')
-// CHECK: # Section 2
-// CHECK: (('section_name', '__thread_vars\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 8)
-// CHECK: ('size', 96)
-// CHECK: ('offset', 536)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 632)
-// CHECK: ('num_reloc', 8)
-// CHECK: ('flags', 0x13)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0x58),
-// CHECK: ('word-1', 0xe000001)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0x48),
-// CHECK: ('word-1', 0xe000008)),
-// CHECK: # Relocation 2
-// CHECK: (('word-0', 0x40),
-// CHECK: ('word-1', 0xe000000)),
-// CHECK: # Relocation 3
-// CHECK: (('word-0', 0x30),
-// CHECK: ('word-1', 0xe000008)),
-// CHECK: # Relocation 4
-// CHECK: (('word-0', 0x28),
-// CHECK: ('word-1', 0xe000007)),
-// CHECK: # Relocation 5
-// CHECK: (('word-0', 0x18),
-// CHECK: ('word-1', 0xe000008)),
-// CHECK: # Relocation 6
-// CHECK: (('word-0', 0x10),
-// CHECK: ('word-1', 0xe000005)),
-// CHECK: # Relocation 7
-// CHECK: (('word-0', 0x0),
-// CHECK: ('word-1', 0xe000008)),
-// CHECK: ])
-// CHECK: ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000')
-// CHECK: # Section 3
-// CHECK: (('section_name', '__thread_bss\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 104)
-// CHECK: ('size', 8)
-// CHECK: ('offset', 0)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x12)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', 'cffaedfe 07000001')
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 696)
-// CHECK: ('nsyms', 9)
-// CHECK: ('stroff', 840)
-// CHECK: ('strsize', 80)
-// CHECK: ('_string_data', '\x00_d$tlv$init\x00_c$tlv$init\x00_b$tlv$init\x00_a$tlv$init\x00___tlv_bootstrap\x00_d\x00_c\x00_b\x00_a\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 37)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 4)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 104)
-// CHECK: ('_string', '_a$tlv$init')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 25)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 4)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 108)
-// CHECK: ('_string', '_b$tlv$init')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 75)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 3)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 56)
-// CHECK: ('_string', '_a')
-// CHECK: ),
-// CHECK: # Symbol 3
-// CHECK: (('n_strx', 72)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 3)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 80)
-// CHECK: ('_string', '_b')
-// CHECK: ),
-// CHECK: # Symbol 4
-// CHECK: (('n_strx', 69)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 3)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 8)
-// CHECK: ('_string', '_c')
-// CHECK: ),
-// CHECK: # Symbol 5
-// CHECK: (('n_strx', 13)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_c$tlv$init')
-// CHECK: ),
-// CHECK: # Symbol 6
-// CHECK: (('n_strx', 66)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 3)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 32)
-// CHECK: ('_string', '_d')
-// CHECK: ),
-// CHECK: # Symbol 7
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 4)
-// CHECK: ('_string', '_d$tlv$init')
-// CHECK: ),
-// CHECK: # Symbol 8
-// CHECK: (('n_strx', 49)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '___tlv_bootstrap')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 2)
-// CHECK: ('iextdefsym', 2)
-// CHECK: ('nextdefsym', 6)
-// CHECK: ('iundefsym', 8)
-// CHECK: ('nundefsym', 1)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 3
+// CHECK: SizeOfLoadCommands: 496
+// CHECK: Flags [ (0x2000)
+// CHECK: MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 528
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __thread_data (5F 5F 74 68 72 65 61 64 5F 64 61 74 61 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x8
+// CHECK: Offset: 528
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x11
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 04000000 05000000 |........|
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 2
+// CHECK: Name: __thread_vars (5F 5F 74 68 72 65 61 64 5F 76 61 72 73 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x8
+// CHECK: Size: 0x60
+// CHECK: Offset: 536
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x278
+// CHECK: RelocationCount: 8
+// CHECK: Type: 0x13
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 00000000 00000000 00000000 00000000 |................|
+// CHECK: 0010: 00000000 00000000 00000000 00000000 |................|
+// CHECK: 0020: 00000000 00000000 00000000 00000000 |................|
+// CHECK: 0030: 00000000 00000000 00000000 00000000 |................|
+// CHECK: 0040: 00000000 00000000 00000000 00000000 |................|
+// CHECK: 0050: 00000000 00000000 00000000 00000000 |................|
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 3
+// CHECK: Name: __thread_bss (5F 5F 74 68 72 65 61 64 5F 62 73 73 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x68
+// CHECK: Size: 0x8
+// CHECK: Offset: 0
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x12
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: CFFAEDFE 07000001 |........|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: Section __thread_vars {
+// CHECK: 0x58 0 3 1 X86_64_RELOC_UNSIGNED 0 _b$tlv$init
+// CHECK: 0x48 0 3 1 X86_64_RELOC_UNSIGNED 0 ___tlv_bootstrap
+// CHECK: 0x40 0 3 1 X86_64_RELOC_UNSIGNED 0 _a$tlv$init
+// CHECK: 0x30 0 3 1 X86_64_RELOC_UNSIGNED 0 ___tlv_bootstrap
+// CHECK: 0x28 0 3 1 X86_64_RELOC_UNSIGNED 0 _d$tlv$init
+// CHECK: 0x18 0 3 1 X86_64_RELOC_UNSIGNED 0 ___tlv_bootstrap
+// CHECK: 0x10 0 3 1 X86_64_RELOC_UNSIGNED 0 _c$tlv$init
+// CHECK: 0x0 0 3 1 X86_64_RELOC_UNSIGNED 0 ___tlv_bootstrap
+// CHECK: }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _a$tlv$init (37)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_bss (0x4)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x68
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _b$tlv$init (25)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_bss (0x4)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x6C
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _a (75)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_vars (0x3)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x38
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _b (72)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_vars (0x3)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x50
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _c (69)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_vars (0x3)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x8
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _c$tlv$init (13)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _d (66)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_vars (0x3)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x20
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _d$tlv$init (1)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x4
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: ___tlv_bootstrap (49)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 392
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x70
+// CHECK: fileoff: 528
+// CHECK: filesize: 104
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 4
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 2
+// CHECK: iextdefsym: 2
+// CHECK: nextdefsym: 6
+// CHECK: iundefsym: 8
+// CHECK: nundefsym: 1
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/tlv-bss.ll b/test/MC/MachO/tlv-bss.ll
index 3dbf4b07e16e..9a6ea20266fc 100644
--- a/test/MC/MachO/tlv-bss.ll
+++ b/test/MC/MachO/tlv-bss.ll
@@ -1,11 +1,16 @@
-; RUN: llc -O0 -mtriple=x86_64-apple-darwin12 -filetype=obj -o - %s | macho-dump | FileCheck %s
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin12 -filetype=obj -o - %s | llvm-readobj -s | FileCheck %s
; Test that we emit weak_odr thread_locals correctly into the thread_bss section
; PR15972
-; CHECK: __thread_bss
-; CHECK: 'size', 8
-; CHECK: 'alignment', 3
-; CHECK: __thread_vars
+; CHECK: Section {
+; CHECK: Index: 1
+; CHECK: Name: __thread_bss (5F 5F 74 68 72 65 61 64 5F 62 73 73 00 00 00 00)
+; CHECK: Size: 0x8
+; CHECK: Alignment: 3
+; CHECK: }
+; CHECK: Section {
+; CHECK: Index: 2
+; CHECK: Name: __thread_vars (5F 5F 74 68 72 65 61 64 5F 76 61 72 73 00 00 00)
; Generated from this C++ source
; template<class T>
diff --git a/test/MC/MachO/tlv-reloc.s b/test/MC/MachO/tlv-reloc.s
index 80e0565c59b7..ca6873478dce 100644
--- a/test/MC/MachO/tlv-reloc.s
+++ b/test/MC/MachO/tlv-reloc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
.tdata
_a$tlv$init:
@@ -21,154 +21,171 @@ _foo:
call *(%rdi) # returns &a in %rax
ret
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 416)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 25)
-// CHECK: ('size', 312)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 38)
-// CHECK: ('file_offset', 448)
-// CHECK: ('file_size', 38)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 3)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 10)
-// CHECK: ('offset', 448)
-// CHECK: ('alignment', 4)
-// CHECK: ('reloc_offset', 488)
-// CHECK: ('num_reloc', 1)
-// CHECK: ('flags', 0x80000400)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0x3),
-// CHECK: ('word-1', 0x9d000001)),
-// CHECK: ])
-// CHECK: ('_section_data', '488b3d00 000000ff 17c3')
-// CHECK: # Section 1
-// CHECK: (('section_name', '__thread_data\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 10)
-// CHECK: ('size', 4)
-// CHECK: ('offset', 458)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x11)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '04000000')
-// CHECK: # Section 2
-// CHECK: (('section_name', '__thread_vars\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 14)
-// CHECK: ('size', 24)
-// CHECK: ('offset', 462)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 496)
-// CHECK: ('num_reloc', 2)
-// CHECK: ('flags', 0x13)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0x10),
-// CHECK: ('word-1', 0xe000000)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0x0),
-// CHECK: ('word-1', 0xe000003)),
-// CHECK: ])
-// CHECK: ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000')
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 512)
-// CHECK: ('nsyms', 4)
-// CHECK: ('stroff', 576)
-// CHECK: ('strsize', 40)
-// CHECK: ('_string_data', '\x00_a$tlv$init\x00__tlv_bootstrap\x00_foo\x00_a\x00\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 10)
-// CHECK: ('_string', '_a$tlv$init')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 34)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 3)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 14)
-// CHECK: ('_string', '_a')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 29)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_foo')
-// CHECK: ),
-// CHECK: # Symbol 3
-// CHECK: (('n_strx', 13)
-// CHECK: ('n_type', 0x1)
-// CHECK: ('n_sect', 0)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '__tlv_bootstrap')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 1)
-// CHECK: ('iextdefsym', 1)
-// CHECK: ('nextdefsym', 2)
-// CHECK: ('iundefsym', 3)
-// CHECK: ('nundefsym', 1)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 3
+// CHECK: SizeOfLoadCommands: 416
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0xA
+// CHECK: Offset: 448
+// CHECK: Alignment: 4
+// CHECK: RelocationOffset: 0x1E8
+// CHECK: RelocationCount: 1
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800004)
+// CHECK: PureInstructions (0x800000)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 488B3D00 000000FF 17C3 |H.=.......|
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __thread_data (5F 5F 74 68 72 65 61 64 5F 64 61 74 61 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0xA
+// CHECK: Size: 0x4
+// CHECK: Offset: 458
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x11
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 04000000 |....|
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 2
+// CHECK: Name: __thread_vars (5F 5F 74 68 72 65 61 64 5F 76 61 72 73 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0xE
+// CHECK: Size: 0x18
+// CHECK: Offset: 462
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x1F0
+// CHECK: RelocationCount: 2
+// CHECK: Type: 0x13
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 00000000 00000000 00000000 00000000 |................|
+// CHECK: 0010: 00000000 00000000 |........|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: Section __text {
+// CHECK: 0x3 1 2 1 X86_64_RELOC_TLV 0 _a
+// CHECK: }
+// CHECK: Section __thread_vars {
+// CHECK: 0x10 0 3 1 X86_64_RELOC_UNSIGNED 0 _a$tlv$init
+// CHECK: 0x0 0 3 1 X86_64_RELOC_UNSIGNED 0 __tlv_bootstrap
+// CHECK: }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _a$tlv$init (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0xA
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _a (34)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_vars (0x3)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0xE
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _foo (29)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: __tlv_bootstrap (13)
+// CHECK: Extern
+// CHECK: Type: Undef (0x0)
+// CHECK: Section: (0x0)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 312
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x26
+// CHECK: fileoff: 448
+// CHECK: filesize: 38
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 3
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 1
+// CHECK: iextdefsym: 1
+// CHECK: nextdefsym: 2
+// CHECK: iundefsym: 3
+// CHECK: nundefsym: 1
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/tlv.s b/test/MC/MachO/tlv.s
index 0fe028e7d501..57d74448aea5 100644
--- a/test/MC/MachO/tlv.s
+++ b/test/MC/MachO/tlv.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
.tlv
.globl _a
@@ -7,104 +7,113 @@ _a:
.quad 0
.quad 0
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 336)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 25)
-// CHECK: ('size', 232)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 24)
-// CHECK: ('file_offset', 368)
-// CHECK: ('file_size', 24)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 368)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '')
-// CHECK: # Section 1
-// CHECK: (('section_name', '__thread_vars\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 24)
-// CHECK: ('offset', 368)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x13)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000')
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 392)
-// CHECK: ('nsyms', 1)
-// CHECK: ('stroff', 408)
-// CHECK: ('strsize', 4)
-// CHECK: ('_string_data', '\x00_a\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_a')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 0)
-// CHECK: ('iextdefsym', 0)
-// CHECK: ('nextdefsym', 1)
-// CHECK: ('iundefsym', 1)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 3
+// CHECK: SizeOfLoadCommands: 336
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 368
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __thread_vars (5F 5F 74 68 72 65 61 64 5F 76 61 72 73 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x18
+// CHECK: Offset: 368
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x13
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 00000000 00000000 00000000 00000000 |................|
+// CHECK: 0010: 00000000 00000000 |........|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _a (1)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __thread_vars (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 232
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x18
+// CHECK: fileoff: 368
+// CHECK: filesize: 24
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 0
+// CHECK: iextdefsym: 0
+// CHECK: nextdefsym: 1
+// CHECK: iundefsym: 1
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/values.s b/test/MC/MachO/values.s
index 96115990636e..0bdd0946770f 100644
--- a/test/MC/MachO/values.s
+++ b/test/MC/MachO/values.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
.long 0
text_def_int:
@@ -17,119 +17,132 @@ data_def_int:
data_def_ext:
.long 0
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 192)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 24)
-// CHECK: ('file_offset', 324)
-// CHECK: ('file_size', 24)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 12)
-// CHECK: ('offset', 324)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 1
-// CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 12)
-// CHECK: ('size', 12)
-// CHECK: ('offset', 336)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 348)
-// CHECK: ('nsyms', 4)
-// CHECK: ('stroff', 396)
-// CHECK: ('strsize', 56)
-// CHECK: ('_string_data', '\x00text_def_ext\x00data_def_ext\x00text_def_int\x00data_def_int\x00\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 27)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 4)
-// CHECK: ('_string', 'text_def_int')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 40)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 16)
-// CHECK: ('_string', 'data_def_int')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 14)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 20)
-// CHECK: ('_string', 'data_def_ext')
-// CHECK: ),
-// CHECK: # Symbol 3
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 8)
-// CHECK: ('_string', 'text_def_ext')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 2)
-// CHECK: ('iextdefsym', 2)
-// CHECK: ('nextdefsym', 2)
-// CHECK: ('iundefsym', 4)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 312
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0xC
+// CHECK: Offset: 340
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0xC
+// CHECK: Size: 0xC
+// CHECK: Offset: 352
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: text_def_int (27)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x4
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: data_def_int (40)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x10
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: data_def_ext (14)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x14
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: text_def_ext (1)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x8
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 192
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x18
+// CHECK: fileoff: 340
+// CHECK: filesize: 24
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 2
+// CHECK: iextdefsym: 2
+// CHECK: nextdefsym: 2
+// CHECK: iundefsym: 4
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/variable-exprs.s b/test/MC/MachO/variable-exprs.s
index ac781ef7f67d..85f395fd3be8 100644
--- a/test/MC/MachO/variable-exprs.s
+++ b/test/MC/MachO/variable-exprs.s
@@ -1,9 +1,9 @@
// RUN: llvm-mc -triple i386-apple-darwin10 %s -filetype=obj -o %t.o
-// RUN: macho-dump --dump-section-data < %t.o > %t.dump
+// RUN: llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols < %t.o > %t.dump
// RUN: FileCheck --check-prefix=CHECK-I386 < %t.dump %s
// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o %t.o
-// RUN: macho-dump --dump-section-data < %t.o > %t.dump
+// RUN: llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols < %t.o > %t.dump
// RUN: FileCheck --check-prefix=CHECK-X86_64 < %t.dump %s
.data
@@ -46,401 +46,417 @@ Lt0_b:
Lt0_x = Lt0_a - Lt0_b
.quad Lt0_x
-// CHECK-I386: ('cputype', 7)
-// CHECK-I386: ('cpusubtype', 3)
-// CHECK-I386: ('filetype', 1)
-// CHECK-I386: ('num_load_commands', 3)
-// CHECK-I386: ('load_commands_size', 296)
-// CHECK-I386: ('flag', 0)
-// CHECK-I386: ('load_commands', [
-// CHECK-I386: # Load Command 0
-// CHECK-I386: (('command', 1)
-// CHECK-I386: ('size', 192)
-// CHECK-I386: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-I386: ('vm_addr', 0)
-// CHECK-I386: ('vm_size', 57)
-// CHECK-I386: ('file_offset', 324)
-// CHECK-I386: ('file_size', 57)
-// CHECK-I386: ('maxprot', 7)
-// CHECK-I386: ('initprot', 7)
-// CHECK-I386: ('num_sections', 2)
-// CHECK-I386: ('flags', 0)
-// CHECK-I386: ('sections', [
-// CHECK-I386: # Section 0
-// CHECK-I386: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-I386: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-I386: ('address', 0)
-// CHECK-I386: ('size', 1)
-// CHECK-I386: ('offset', 324)
-// CHECK-I386: ('alignment', 0)
-// CHECK-I386: ('reloc_offset', 0)
-// CHECK-I386: ('num_reloc', 0)
-// CHECK-I386: ('flags', 0x80000400)
-// CHECK-I386: ('reserved1', 0)
-// CHECK-I386: ('reserved2', 0)
-// CHECK-I386: ),
-// CHECK-I386: ('_relocations', [
-// CHECK-I386: ])
-// CHECK-I386: ('_section_data', 'c3')
-// CHECK-I386: # Section 1
-// CHECK-I386: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-I386: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-I386: ('address', 1)
-// CHECK-I386: ('size', 56)
-// CHECK-I386: ('offset', 325)
-// CHECK-I386: ('alignment', 0)
-// CHECK-I386: ('reloc_offset', 384)
-// CHECK-I386: ('num_reloc', 9)
-// CHECK-I386: ('flags', 0x0)
-// CHECK-I386: ('reserved1', 0)
-// CHECK-I386: ('reserved2', 0)
-// CHECK-I386: ),
-// CHECK-I386: ('_relocations', [
-// CHECK-I386: # Relocation 0
-// CHECK-I386: (('word-0', 0x2c),
-// CHECK-I386: ('word-1', 0x4000002)),
-// CHECK-I386: # Relocation 1
-// CHECK-I386: (('word-0', 0x28),
-// CHECK-I386: ('word-1', 0x4000002)),
-// CHECK-I386: # Relocation 2
-// CHECK-I386: (('word-0', 0x24),
-// CHECK-I386: ('word-1', 0xc000009)),
-// CHECK-I386: # Relocation 3
-// CHECK-I386: (('word-0', 0x20),
-// CHECK-I386: ('word-1', 0xc000008)),
-// CHECK-I386: # Relocation 4
-// CHECK-I386: (('word-0', 0x1c),
-// CHECK-I386: ('word-1', 0xc000007)),
-// CHECK-I386: # Relocation 5
-// CHECK-I386: (('word-0', 0xa0000018),
-// CHECK-I386: ('word-1', 0x5)),
-// CHECK-I386: # Relocation 6
-// CHECK-I386: (('word-0', 0x14),
-// CHECK-I386: ('word-1', 0x4000002)),
-// CHECK-I386: # Relocation 7
-// CHECK-I386: (('word-0', 0x10),
-// CHECK-I386: ('word-1', 0x4000002)),
-// CHECK-I386: # Relocation 8
-// CHECK-I386: (('word-0', 0x8),
-// CHECK-I386: ('word-1', 0x4000002)),
-// CHECK-I386: ])
-// CHECK-I386: ('_section_data', '00000000 00000000 05000000 00000000 05000000 09000000 09000000 00000000 00000000 00000000 0d000000 0d000000 cfffffff ffffffff')
-// CHECK-I386: ])
-// CHECK-I386: ),
-// CHECK-I386: # Load Command 1
-// CHECK-I386: (('command', 2)
-// CHECK-I386: ('size', 24)
-// CHECK-I386: ('symoff', 456)
-// CHECK-I386: ('nsyms', 10)
-// CHECK-I386: ('stroff', 576)
-// CHECK-I386: ('strsize', 24)
-// CHECK-I386: ('_string_data', '\x00g\x00f\x00e\x00d\x00c\x00b\x00a\x00d3\x00d2\x00t0\x00')
-// CHECK-I386: ('_symbols', [
-// CHECK-I386: # Symbol 0
-// CHECK-I386: (('n_strx', 13)
-// CHECK-I386: ('n_type', 0xe)
-// CHECK-I386: ('n_sect', 2)
-// CHECK-I386: ('n_desc', 0)
-// CHECK-I386: ('n_value', 5)
-// CHECK-I386: ('_string', 'a')
-// CHECK-I386: ),
-// CHECK-I386: # Symbol 1
-// CHECK-I386: (('n_strx', 11)
-// CHECK-I386: ('n_type', 0xe)
-// CHECK-I386: ('n_sect', 2)
-// CHECK-I386: ('n_desc', 0)
-// CHECK-I386: ('n_value', 5)
-// CHECK-I386: ('_string', 'b')
-// CHECK-I386: ),
-// CHECK-I386: # Symbol 2
-// CHECK-I386: (('n_strx', 9)
-// CHECK-I386: ('n_type', 0xe)
-// CHECK-I386: ('n_sect', 2)
-// CHECK-I386: ('n_desc', 0)
-// CHECK-I386: ('n_value', 9)
-// CHECK-I386: ('_string', 'c')
-// CHECK-I386: ),
-// CHECK-I386: # Symbol 3
-// CHECK-I386: (('n_strx', 5)
-// CHECK-I386: ('n_type', 0xe)
-// CHECK-I386: ('n_sect', 2)
-// CHECK-I386: ('n_desc', 0)
-// CHECK-I386: ('n_value', 9)
-// CHECK-I386: ('_string', 'e')
-// CHECK-I386: ),
-// CHECK-I386: # Symbol 4
-// CHECK-I386: (('n_strx', 1)
-// CHECK-I386: ('n_type', 0xe)
-// CHECK-I386: ('n_sect', 2)
-// CHECK-I386: ('n_desc', 0)
-// CHECK-I386: ('n_value', 13)
-// CHECK-I386: ('_string', 'g')
-// CHECK-I386: ),
-// CHECK-I386: # Symbol 5
-// CHECK-I386: (('n_strx', 3)
-// CHECK-I386: ('n_type', 0xe)
-// CHECK-I386: ('n_sect', 2)
-// CHECK-I386: ('n_desc', 0)
-// CHECK-I386: ('n_value', 13)
-// CHECK-I386: ('_string', 'f')
-// CHECK-I386: ),
-// CHECK-I386: # Symbol 6
-// CHECK-I386: (('n_strx', 21)
-// CHECK-I386: ('n_type', 0xe)
-// CHECK-I386: ('n_sect', 1)
-// CHECK-I386: ('n_desc', 0)
-// CHECK-I386: ('n_value', 0)
-// CHECK-I386: ('_string', 't0')
-// CHECK-I386: ),
-// CHECK-I386: # Symbol 7
-// CHECK-I386: (('n_strx', 7)
-// CHECK-I386: ('n_type', 0x1)
-// CHECK-I386: ('n_sect', 0)
-// CHECK-I386: ('n_desc', 0)
-// CHECK-I386: ('n_value', 0)
-// CHECK-I386: ('_string', 'd')
-// CHECK-I386: ),
-// CHECK-I386: # Symbol 8
-// CHECK-I386: (('n_strx', 18)
-// CHECK-I386: ('n_type', 0xb)
-// CHECK-I386: ('n_sect', 0)
-// CHECK-I386: ('n_desc', 0)
-// CHECK-I386: ('n_value', 7)
-// CHECK-I386: ('_string', 'd2')
-// CHECK-I386: ),
-// CHECK-I386: # Symbol 9
-// CHECK-I386: (('n_strx', 15)
-// CHECK-I386: ('n_type', 0x1)
-// CHECK-I386: ('n_sect', 0)
-// CHECK-I386: ('n_desc', 0)
-// CHECK-I386: ('n_value', 0)
-// CHECK-I386: ('_string', 'd3')
-// CHECK-I386: ),
-// CHECK-I386: ])
-// CHECK-I386: ),
-// CHECK-I386: # Load Command 2
-// CHECK-I386: (('command', 11)
-// CHECK-I386: ('size', 80)
-// CHECK-I386: ('ilocalsym', 0)
-// CHECK-I386: ('nlocalsym', 7)
-// CHECK-I386: ('iextdefsym', 7)
-// CHECK-I386: ('nextdefsym', 0)
-// CHECK-I386: ('iundefsym', 7)
-// CHECK-I386: ('nundefsym', 3)
-// CHECK-I386: ('tocoff', 0)
-// CHECK-I386: ('ntoc', 0)
-// CHECK-I386: ('modtaboff', 0)
-// CHECK-I386: ('nmodtab', 0)
-// CHECK-I386: ('extrefsymoff', 0)
-// CHECK-I386: ('nextrefsyms', 0)
-// CHECK-I386: ('indirectsymoff', 0)
-// CHECK-I386: ('nindirectsyms', 0)
-// CHECK-I386: ('extreloff', 0)
-// CHECK-I386: ('nextrel', 0)
-// CHECK-I386: ('locreloff', 0)
-// CHECK-I386: ('nlocrel', 0)
-// CHECK-I386: ('_indirect_symbols', [
-// CHECK-I386: ])
-// CHECK-I386: ),
-// CHECK-I386: ])
+// CHECK-I386: File: <stdin>
+// CHECK-I386: Format: Mach-O 32-bit i386
+// CHECK-I386: Arch: i386
+// CHECK-I386: AddressSize: 32bit
+// CHECK-I386: MachHeader {
+// CHECK-I386: Magic: Magic (0xFEEDFACE)
+// CHECK-I386: CpuType: X86 (0x7)
+// CHECK-I386: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK-I386: FileType: Relocatable (0x1)
+// CHECK-I386: NumOfLoadCommands: 4
+// CHECK-I386: SizeOfLoadCommands: 312
+// CHECK-I386: Flags [ (0x0)
+// CHECK-I386: ]
+// CHECK-I386: }
+// CHECK-I386: Sections [
+// CHECK-I386: Section {
+// CHECK-I386: Index: 0
+// CHECK-I386: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-I386: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-I386: Address: 0x0
+// CHECK-I386: Size: 0x1
+// CHECK-I386: Offset: 340
+// CHECK-I386: Alignment: 0
+// CHECK-I386: RelocationOffset: 0x0
+// CHECK-I386: RelocationCount: 0
+// CHECK-I386: Type: 0x0
+// CHECK-I386: Attributes [ (0x800004)
+// CHECK-I386: PureInstructions (0x800000)
+// CHECK-I386: SomeInstructions (0x4)
+// CHECK-I386: ]
+// CHECK-I386: Reserved1: 0x0
+// CHECK-I386: Reserved2: 0x0
+// CHECK-I386: SectionData (
+// CHECK-I386: 0000: C3 |.|
+// CHECK-I386: )
+// CHECK-I386: }
+// CHECK-I386: Section {
+// CHECK-I386: Index: 1
+// CHECK-I386: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK-I386: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK-I386: Address: 0x1
+// CHECK-I386: Size: 0x38
+// CHECK-I386: Offset: 341
+// CHECK-I386: Alignment: 0
+// CHECK-I386: RelocationOffset: 0x190
+// CHECK-I386: RelocationCount: 9
+// CHECK-I386: Type: 0x0
+// CHECK-I386: Attributes [ (0x0)
+// CHECK-I386: ]
+// CHECK-I386: Reserved1: 0x0
+// CHECK-I386: Reserved2: 0x0
+// CHECK-I386: SectionData (
+// CHECK-I386: 0000: 00000000 00000000 05000000 00000000 |................|
+// CHECK-I386: 0010: 05000000 09000000 09000000 00000000 |................|
+// CHECK-I386: 0020: 00000000 00000000 0D000000 0D000000 |................|
+// CHECK-I386: 0030: CFFFFFFF FFFFFFFF |........|
+// CHECK-I386: )
+// CHECK-I386: }
+// CHECK-I386: ]
+// CHECK-I386: Relocations [
+// CHECK-I386: Section __data {
+// CHECK-I386: 0x2C 0 2 0 GENERIC_RELOC_VANILLA 0 __data
+// CHECK-I386: 0x28 0 2 0 GENERIC_RELOC_VANILLA 0 __data
+// CHECK-I386: 0x24 0 2 1 GENERIC_RELOC_VANILLA 0 d3
+// CHECK-I386: 0x20 0 2 1 GENERIC_RELOC_VANILLA 0 d2
+// CHECK-I386: 0x1C 0 2 1 GENERIC_RELOC_VANILLA 0 d
+// CHECK-I386: 0x18 0 2 n/a GENERIC_RELOC_VANILLA 1 0x5
+// CHECK-I386: 0x14 0 2 0 GENERIC_RELOC_VANILLA 0 __data
+// CHECK-I386: 0x10 0 2 0 GENERIC_RELOC_VANILLA 0 __data
+// CHECK-I386: 0x8 0 2 0 GENERIC_RELOC_VANILLA 0 __data
+// CHECK-I386: }
+// CHECK-I386: ]
+// CHECK-I386: Symbols [
+// CHECK-I386: Symbol {
+// CHECK-I386: Name: a (13)
+// CHECK-I386: Type: Section (0xE)
+// CHECK-I386: Section: __data (0x2)
+// CHECK-I386: RefType: UndefinedNonLazy (0x0)
+// CHECK-I386: Flags [ (0x0)
+// CHECK-I386: ]
+// CHECK-I386: Value: 0x5
+// CHECK-I386: }
+// CHECK-I386: Symbol {
+// CHECK-I386: Name: b (11)
+// CHECK-I386: Type: Section (0xE)
+// CHECK-I386: Section: __data (0x2)
+// CHECK-I386: RefType: UndefinedNonLazy (0x0)
+// CHECK-I386: Flags [ (0x0)
+// CHECK-I386: ]
+// CHECK-I386: Value: 0x5
+// CHECK-I386: }
+// CHECK-I386: Symbol {
+// CHECK-I386: Name: c (9)
+// CHECK-I386: Type: Section (0xE)
+// CHECK-I386: Section: __data (0x2)
+// CHECK-I386: RefType: UndefinedNonLazy (0x0)
+// CHECK-I386: Flags [ (0x0)
+// CHECK-I386: ]
+// CHECK-I386: Value: 0x9
+// CHECK-I386: }
+// CHECK-I386: Symbol {
+// CHECK-I386: Name: e (5)
+// CHECK-I386: Type: Section (0xE)
+// CHECK-I386: Section: __data (0x2)
+// CHECK-I386: RefType: UndefinedNonLazy (0x0)
+// CHECK-I386: Flags [ (0x0)
+// CHECK-I386: ]
+// CHECK-I386: Value: 0x9
+// CHECK-I386: }
+// CHECK-I386: Symbol {
+// CHECK-I386: Name: g (1)
+// CHECK-I386: Type: Section (0xE)
+// CHECK-I386: Section: __data (0x2)
+// CHECK-I386: RefType: UndefinedNonLazy (0x0)
+// CHECK-I386: Flags [ (0x0)
+// CHECK-I386: ]
+// CHECK-I386: Value: 0xD
+// CHECK-I386: }
+// CHECK-I386: Symbol {
+// CHECK-I386: Name: f (3)
+// CHECK-I386: Type: Section (0xE)
+// CHECK-I386: Section: __data (0x2)
+// CHECK-I386: RefType: UndefinedNonLazy (0x0)
+// CHECK-I386: Flags [ (0x0)
+// CHECK-I386: ]
+// CHECK-I386: Value: 0xD
+// CHECK-I386: }
+// CHECK-I386: Symbol {
+// CHECK-I386: Name: t0 (21)
+// CHECK-I386: Type: Section (0xE)
+// CHECK-I386: Section: __text (0x1)
+// CHECK-I386: RefType: UndefinedNonLazy (0x0)
+// CHECK-I386: Flags [ (0x0)
+// CHECK-I386: ]
+// CHECK-I386: Value: 0x0
+// CHECK-I386: }
+// CHECK-I386: Symbol {
+// CHECK-I386: Name: d (7)
+// CHECK-I386: Extern
+// CHECK-I386: Type: Undef (0x0)
+// CHECK-I386: Section: (0x0)
+// CHECK-I386: RefType: UndefinedNonLazy (0x0)
+// CHECK-I386: Flags [ (0x0)
+// CHECK-I386: ]
+// CHECK-I386: Value: 0x0
+// CHECK-I386: }
+// CHECK-I386: Symbol {
+// CHECK-I386: Name: d2 (18)
+// CHECK-I386: Extern
+// CHECK-I386: Type: Indirect (0xA)
+// CHECK-I386: Section: (0x0)
+// CHECK-I386: RefType: UndefinedNonLazy (0x0)
+// CHECK-I386: Flags [ (0x0)
+// CHECK-I386: ]
+// CHECK-I386: Value: 0x7
+// CHECK-I386: }
+// CHECK-I386: Symbol {
+// CHECK-I386: Name: d3 (15)
+// CHECK-I386: Extern
+// CHECK-I386: Type: Undef (0x0)
+// CHECK-I386: Section: (0x0)
+// CHECK-I386: RefType: UndefinedNonLazy (0x0)
+// CHECK-I386: Flags [ (0x0)
+// CHECK-I386: ]
+// CHECK-I386: Value: 0x0
+// CHECK-I386: }
+// CHECK-I386: ]
+// CHECK-I386: Indirect Symbols {
+// CHECK-I386: Number: 0
+// CHECK-I386: Symbols [
+// CHECK-I386: ]
+// CHECK-I386: }
+// CHECK-I386: Segment {
+// CHECK-I386: Cmd: LC_SEGMENT
+// CHECK-I386: Name:
+// CHECK-I386: Size: 192
+// CHECK-I386: vmaddr: 0x0
+// CHECK-I386: vmsize: 0x39
+// CHECK-I386: fileoff: 340
+// CHECK-I386: filesize: 57
+// CHECK-I386: maxprot: rwx
+// CHECK-I386: initprot: rwx
+// CHECK-I386: nsects: 2
+// CHECK-I386: flags: 0x0
+// CHECK-I386: }
+// CHECK-I386: Dysymtab {
+// CHECK-I386: ilocalsym: 0
+// CHECK-I386: nlocalsym: 7
+// CHECK-I386: iextdefsym: 7
+// CHECK-I386: nextdefsym: 0
+// CHECK-I386: iundefsym: 7
+// CHECK-I386: nundefsym: 3
+// CHECK-I386: tocoff: 0
+// CHECK-I386: ntoc: 0
+// CHECK-I386: modtaboff: 0
+// CHECK-I386: nmodtab: 0
+// CHECK-I386: extrefsymoff: 0
+// CHECK-I386: nextrefsyms: 0
+// CHECK-I386: indirectsymoff: 0
+// CHECK-I386: nindirectsyms: 0
+// CHECK-I386: extreloff: 0
+// CHECK-I386: nextrel: 0
+// CHECK-I386: locreloff: 0
+// CHECK-I386: nlocrel: 0
+// CHECK-I386: }
-// CHECK-X86_64: ('cputype', 16777223)
-// CHECK-X86_64: ('cpusubtype', 3)
-// CHECK-X86_64: ('filetype', 1)
-// CHECK-X86_64: ('num_load_commands', 3)
-// CHECK-X86_64: ('load_commands_size', 336)
-// CHECK-X86_64: ('flag', 0)
-// CHECK-X86_64: ('reserved', 0)
-// CHECK-X86_64: ('load_commands', [
-// CHECK-X86_64: # Load Command 0
-// CHECK-X86_64: (('command', 25)
-// CHECK-X86_64: ('size', 232)
-// CHECK-X86_64: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64: ('vm_addr', 0)
-// CHECK-X86_64: ('vm_size', 57)
-// CHECK-X86_64: ('file_offset', 368)
-// CHECK-X86_64: ('file_size', 57)
-// CHECK-X86_64: ('maxprot', 7)
-// CHECK-X86_64: ('initprot', 7)
-// CHECK-X86_64: ('num_sections', 2)
-// CHECK-X86_64: ('flags', 0)
-// CHECK-X86_64: ('sections', [
-// CHECK-X86_64: # Section 0
-// CHECK-X86_64: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64: ('address', 0)
-// CHECK-X86_64: ('size', 1)
-// CHECK-X86_64: ('offset', 368)
-// CHECK-X86_64: ('alignment', 0)
-// CHECK-X86_64: ('reloc_offset', 0)
-// CHECK-X86_64: ('num_reloc', 0)
-// CHECK-X86_64: ('flags', 0x80000400)
-// CHECK-X86_64: ('reserved1', 0)
-// CHECK-X86_64: ('reserved2', 0)
-// CHECK-X86_64: ('reserved3', 0)
-// CHECK-X86_64: ),
-// CHECK-X86_64: ('_relocations', [
-// CHECK-X86_64: ])
-// CHECK-X86_64: ('_section_data', 'c3')
-// CHECK-X86_64: # Section 1
-// CHECK-X86_64: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64: ('address', 1)
-// CHECK-X86_64: ('size', 56)
-// CHECK-X86_64: ('offset', 369)
-// CHECK-X86_64: ('alignment', 0)
-// CHECK-X86_64: ('reloc_offset', 428)
-// CHECK-X86_64: ('num_reloc', 9)
-// CHECK-X86_64: ('flags', 0x0)
-// CHECK-X86_64: ('reserved1', 0)
-// CHECK-X86_64: ('reserved2', 0)
-// CHECK-X86_64: ('reserved3', 0)
-// CHECK-X86_64: ),
-// CHECK-X86_64: ('_relocations', [
-// CHECK-X86_64: # Relocation 0
-// CHECK-X86_64: (('word-0', 0x2c),
-// CHECK-X86_64: ('word-1', 0xc000004)),
-// CHECK-X86_64: # Relocation 1
-// CHECK-X86_64: (('word-0', 0x28),
-// CHECK-X86_64: ('word-1', 0xc000005)),
-// CHECK-X86_64: # Relocation 2
-// CHECK-X86_64: (('word-0', 0x24),
-// CHECK-X86_64: ('word-1', 0xc000009)),
-// CHECK-X86_64: # Relocation 3
-// CHECK-X86_64: (('word-0', 0x20),
-// CHECK-X86_64: ('word-1', 0xc000008)),
-// CHECK-X86_64: # Relocation 4
-// CHECK-X86_64: (('word-0', 0x1c),
-// CHECK-X86_64: ('word-1', 0xc000007)),
-// CHECK-X86_64: # Relocation 5
-// CHECK-X86_64: (('word-0', 0x18),
-// CHECK-X86_64: ('word-1', 0xc000000)),
-// CHECK-X86_64: # Relocation 6
-// CHECK-X86_64: (('word-0', 0x14),
-// CHECK-X86_64: ('word-1', 0xc000003)),
-// CHECK-X86_64: # Relocation 7
-// CHECK-X86_64: (('word-0', 0x10),
-// CHECK-X86_64: ('word-1', 0xc000001)),
-// CHECK-X86_64: # Relocation 8
-// CHECK-X86_64: (('word-0', 0x8),
-// CHECK-X86_64: ('word-1', 0xc000001)),
-// CHECK-X86_64: ])
-// CHECK-X86_64: ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 04000000 00000000 00000000 00000000 00000000 00000000 cfffffff ffffffff')
-// CHECK-X86_64: ])
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Load Command 1
-// CHECK-X86_64: (('command', 2)
-// CHECK-X86_64: ('size', 24)
-// CHECK-X86_64: ('symoff', 500)
-// CHECK-X86_64: ('nsyms', 10)
-// CHECK-X86_64: ('stroff', 660)
-// CHECK-X86_64: ('strsize', 24)
-// CHECK-X86_64: ('_string_data', '\x00g\x00f\x00e\x00d\x00c\x00b\x00a\x00d3\x00d2\x00t0\x00')
-// CHECK-X86_64: ('_symbols', [
-// CHECK-X86_64: # Symbol 0
-// CHECK-X86_64: (('n_strx', 13)
-// CHECK-X86_64: ('n_type', 0xe)
-// CHECK-X86_64: ('n_sect', 2)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 5)
-// CHECK-X86_64: ('_string', 'a')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 1
-// CHECK-X86_64: (('n_strx', 11)
-// CHECK-X86_64: ('n_type', 0xe)
-// CHECK-X86_64: ('n_sect', 2)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 5)
-// CHECK-X86_64: ('_string', 'b')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 2
-// CHECK-X86_64: (('n_strx', 9)
-// CHECK-X86_64: ('n_type', 0xe)
-// CHECK-X86_64: ('n_sect', 2)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 9)
-// CHECK-X86_64: ('_string', 'c')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 3
-// CHECK-X86_64: (('n_strx', 5)
-// CHECK-X86_64: ('n_type', 0xe)
-// CHECK-X86_64: ('n_sect', 2)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 9)
-// CHECK-X86_64: ('_string', 'e')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 4
-// CHECK-X86_64: (('n_strx', 1)
-// CHECK-X86_64: ('n_type', 0xe)
-// CHECK-X86_64: ('n_sect', 2)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 13)
-// CHECK-X86_64: ('_string', 'g')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 5
-// CHECK-X86_64: (('n_strx', 3)
-// CHECK-X86_64: ('n_type', 0xe)
-// CHECK-X86_64: ('n_sect', 2)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 13)
-// CHECK-X86_64: ('_string', 'f')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 6
-// CHECK-X86_64: (('n_strx', 21)
-// CHECK-X86_64: ('n_type', 0xe)
-// CHECK-X86_64: ('n_sect', 1)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 0)
-// CHECK-X86_64: ('_string', 't0')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 7
-// CHECK-X86_64: (('n_strx', 7)
-// CHECK-X86_64: ('n_type', 0x1)
-// CHECK-X86_64: ('n_sect', 0)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 0)
-// CHECK-X86_64: ('_string', 'd')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 8
-// CHECK-X86_64: (('n_strx', 18)
-// CHECK-X86_64: ('n_type', 0xb)
-// CHECK-X86_64: ('n_sect', 0)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 7)
-// CHECK-X86_64: ('_string', 'd2')
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Symbol 9
-// CHECK-X86_64: (('n_strx', 15)
-// CHECK-X86_64: ('n_type', 0x1)
-// CHECK-X86_64: ('n_sect', 0)
-// CHECK-X86_64: ('n_desc', 0)
-// CHECK-X86_64: ('n_value', 0)
-// CHECK-X86_64: ('_string', 'd3')
-// CHECK-X86_64: ),
-// CHECK-X86_64: ])
-// CHECK-X86_64: ),
-// CHECK-X86_64: # Load Command 2
-// CHECK-X86_64: (('command', 11)
-// CHECK-X86_64: ('size', 80)
-// CHECK-X86_64: ('ilocalsym', 0)
-// CHECK-X86_64: ('nlocalsym', 7)
-// CHECK-X86_64: ('iextdefsym', 7)
-// CHECK-X86_64: ('nextdefsym', 0)
-// CHECK-X86_64: ('iundefsym', 7)
-// CHECK-X86_64: ('nundefsym', 3)
-// CHECK-X86_64: ('tocoff', 0)
-// CHECK-X86_64: ('ntoc', 0)
-// CHECK-X86_64: ('modtaboff', 0)
-// CHECK-X86_64: ('nmodtab', 0)
-// CHECK-X86_64: ('extrefsymoff', 0)
-// CHECK-X86_64: ('nextrefsyms', 0)
-// CHECK-X86_64: ('indirectsymoff', 0)
-// CHECK-X86_64: ('nindirectsyms', 0)
-// CHECK-X86_64: ('extreloff', 0)
-// CHECK-X86_64: ('nextrel', 0)
-// CHECK-X86_64: ('locreloff', 0)
-// CHECK-X86_64: ('nlocrel', 0)
-// CHECK-X86_64: ('_indirect_symbols', [
-// CHECK-X86_64: ])
-// CHECK-X86_64: ),
-// CHECK-X86_64: ])
+// CHECK-X86_64: File: <stdin>
+// CHECK-X86_64: Format: Mach-O 64-bit x86-64
+// CHECK-X86_64: Arch: x86_64
+// CHECK-X86_64: AddressSize: 64bit
+// CHECK-X86_64: MachHeader {
+// CHECK-X86_64: Magic: Magic64 (0xFEEDFACF)
+// CHECK-X86_64: CpuType: X86-64 (0x1000007)
+// CHECK-X86_64: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK-X86_64: FileType: Relocatable (0x1)
+// CHECK-X86_64: NumOfLoadCommands: 4
+// CHECK-X86_64: SizeOfLoadCommands: 352
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Reserved: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Sections [
+// CHECK-X86_64: Section {
+// CHECK-X86_64: Index: 0
+// CHECK-X86_64: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_64: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_64: Address: 0x0
+// CHECK-X86_64: Size: 0x1
+// CHECK-X86_64: Offset: 384
+// CHECK-X86_64: Alignment: 0
+// CHECK-X86_64: RelocationOffset: 0x0
+// CHECK-X86_64: RelocationCount: 0
+// CHECK-X86_64: Type: 0x0
+// CHECK-X86_64: Attributes [ (0x800004)
+// CHECK-X86_64: PureInstructions (0x800000)
+// CHECK-X86_64: SomeInstructions (0x4)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Reserved1: 0x0
+// CHECK-X86_64: Reserved2: 0x0
+// CHECK-X86_64: Reserved3: 0x0
+// CHECK-X86_64: SectionData (
+// CHECK-X86_64: 0000: C3 |.|
+// CHECK-X86_64: )
+// CHECK-X86_64: }
+// CHECK-X86_64: Section {
+// CHECK-X86_64: Index: 1
+// CHECK-X86_64: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_64: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_64: Address: 0x1
+// CHECK-X86_64: Size: 0x38
+// CHECK-X86_64: Offset: 385
+// CHECK-X86_64: Alignment: 0
+// CHECK-X86_64: RelocationOffset: 0x1BC
+// CHECK-X86_64: RelocationCount: 9
+// CHECK-X86_64: Type: 0x0
+// CHECK-X86_64: Attributes [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Reserved1: 0x0
+// CHECK-X86_64: Reserved2: 0x0
+// CHECK-X86_64: Reserved3: 0x0
+// CHECK-X86_64: SectionData (
+// CHECK-X86_64: 0000: 00000000 00000000 00000000 00000000 |................|
+// CHECK-X86_64: 0010: 00000000 00000000 04000000 00000000 |................|
+// CHECK-X86_64: 0020: 00000000 00000000 00000000 00000000 |................|
+// CHECK-X86_64: 0030: CFFFFFFF FFFFFFFF |........|
+// CHECK-X86_64: )
+// CHECK-X86_64: }
+// CHECK-X86_64: ]
+// CHECK-X86_64: Relocations [
+// CHECK-X86_64: Section __data {
+// CHECK-X86_64: 0x2C 0 2 1 X86_64_RELOC_UNSIGNED 0 g
+// CHECK-X86_64: 0x28 0 2 1 X86_64_RELOC_UNSIGNED 0 f
+// CHECK-X86_64: 0x24 0 2 1 X86_64_RELOC_UNSIGNED 0 d3
+// CHECK-X86_64: 0x20 0 2 1 X86_64_RELOC_UNSIGNED 0 d2
+// CHECK-X86_64: 0x1C 0 2 1 X86_64_RELOC_UNSIGNED 0 d
+// CHECK-X86_64: 0x18 0 2 1 X86_64_RELOC_UNSIGNED 0 a
+// CHECK-X86_64: 0x14 0 2 1 X86_64_RELOC_UNSIGNED 0 e
+// CHECK-X86_64: 0x10 0 2 1 X86_64_RELOC_UNSIGNED 0 b
+// CHECK-X86_64: 0x8 0 2 1 X86_64_RELOC_UNSIGNED 0 b
+// CHECK-X86_64: }
+// CHECK-X86_64: ]
+// CHECK-X86_64: Symbols [
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: a (13)
+// CHECK-X86_64: Type: Section (0xE)
+// CHECK-X86_64: Section: __data (0x2)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x5
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: b (11)
+// CHECK-X86_64: Type: Section (0xE)
+// CHECK-X86_64: Section: __data (0x2)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x5
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: c (9)
+// CHECK-X86_64: Type: Section (0xE)
+// CHECK-X86_64: Section: __data (0x2)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x9
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: e (5)
+// CHECK-X86_64: Type: Section (0xE)
+// CHECK-X86_64: Section: __data (0x2)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x9
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: g (1)
+// CHECK-X86_64: Type: Section (0xE)
+// CHECK-X86_64: Section: __data (0x2)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0xD
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: f (3)
+// CHECK-X86_64: Type: Section (0xE)
+// CHECK-X86_64: Section: __data (0x2)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0xD
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: t0 (21)
+// CHECK-X86_64: Type: Section (0xE)
+// CHECK-X86_64: Section: __text (0x1)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: d (7)
+// CHECK-X86_64: Extern
+// CHECK-X86_64: Type: Undef (0x0)
+// CHECK-X86_64: Section: (0x0)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: d2 (18)
+// CHECK-X86_64: Extern
+// CHECK-X86_64: Type: Indirect (0xA)
+// CHECK-X86_64: Section: (0x0)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x7
+// CHECK-X86_64: }
+// CHECK-X86_64: Symbol {
+// CHECK-X86_64: Name: d3 (15)
+// CHECK-X86_64: Extern
+// CHECK-X86_64: Type: Undef (0x0)
+// CHECK-X86_64: Section: (0x0)
+// CHECK-X86_64: RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64: Flags [ (0x0)
+// CHECK-X86_64: ]
+// CHECK-X86_64: Value: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: ]
+// CHECK-X86_64: Indirect Symbols {
+// CHECK-X86_64: Number: 0
+// CHECK-X86_64: Symbols [
+// CHECK-X86_64: ]
+// CHECK-X86_64: }
+// CHECK-X86_64: Segment {
+// CHECK-X86_64: Cmd: LC_SEGMENT_64
+// CHECK-X86_64: Name:
+// CHECK-X86_64: Size: 232
+// CHECK-X86_64: vmaddr: 0x0
+// CHECK-X86_64: vmsize: 0x39
+// CHECK-X86_64: fileoff: 384
+// CHECK-X86_64: filesize: 57
+// CHECK-X86_64: maxprot: rwx
+// CHECK-X86_64: initprot: rwx
+// CHECK-X86_64: nsects: 2
+// CHECK-X86_64: flags: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Dysymtab {
+// CHECK-X86_64: ilocalsym: 0
+// CHECK-X86_64: nlocalsym: 7
+// CHECK-X86_64: iextdefsym: 7
+// CHECK-X86_64: nextdefsym: 0
+// CHECK-X86_64: iundefsym: 7
+// CHECK-X86_64: nundefsym: 3
+// CHECK-X86_64: tocoff: 0
+// CHECK-X86_64: ntoc: 0
+// CHECK-X86_64: modtaboff: 0
+// CHECK-X86_64: nmodtab: 0
+// CHECK-X86_64: extrefsymoff: 0
+// CHECK-X86_64: nextrefsyms: 0
+// CHECK-X86_64: indirectsymoff: 0
+// CHECK-X86_64: nindirectsyms: 0
+// CHECK-X86_64: extreloff: 0
+// CHECK-X86_64: nextrel: 0
+// CHECK-X86_64: locreloff: 0
+// CHECK-X86_64: nlocrel: 0
+// CHECK-X86_64: }
diff --git a/test/MC/MachO/weakdef.s b/test/MC/MachO/weakdef.s
index 494079df5004..2043dc86f74b 100644
--- a/test/MC/MachO/weakdef.s
+++ b/test/MC/MachO/weakdef.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
.section __DATA,__datacoal_nt,coalesced
.section __TEXT,__const_coal,coalesced
@@ -14,128 +14,142 @@ __ZTS3optIbE:
__ZTI3optIbE:
.long __ZTS3optIbE
-// CHECK: ('cputype', 7)
-// CHECK-NEXT: ('cpusubtype', 3)
-// CHECK-NEXT: ('filetype', 1)
-// CHECK-NEXT: ('num_load_commands', 3)
-// CHECK-NEXT: ('load_commands_size', 364)
-// CHECK-NEXT: ('flag', 0)
-// CHECK-NEXT: ('load_commands', [
-// CHECK-NEXT: # Load Command 0
-// CHECK-NEXT: (('command', 1)
-// CHECK-NEXT: ('size', 260)
-// CHECK-NEXT: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('vm_addr', 0)
-// CHECK-NEXT: ('vm_size', 4)
-// CHECK-NEXT: ('file_offset', 392)
-// CHECK-NEXT: ('file_size', 4)
-// CHECK-NEXT: ('maxprot', 7)
-// CHECK-NEXT: ('initprot', 7)
-// CHECK-NEXT: ('num_sections', 3)
-// CHECK-NEXT: ('flags', 0)
-// CHECK-NEXT: ('sections', [
-// CHECK-NEXT: # Section 0
-// CHECK-NEXT: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('address', 0)
-// CHECK-NEXT: ('size', 0)
-// CHECK-NEXT: ('offset', 392)
-// CHECK-NEXT: ('alignment', 0)
-// CHECK-NEXT: ('reloc_offset', 0)
-// CHECK-NEXT: ('num_reloc', 0)
-// CHECK-NEXT: ('flags', 0x80000000)
-// CHECK-NEXT: ('reserved1', 0)
-// CHECK-NEXT: ('reserved2', 0)
-// CHECK-NEXT: ),
-// CHECK-NEXT: ('_relocations', [
-// CHECK-NEXT: ])
-// CHECK-NEXT: ('_section_data', '')
-// CHECK-NEXT: # Section 1
-// CHECK-NEXT: (('section_name', '__datacoal_nt\x00\x00\x00')
-// CHECK-NEXT: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('address', 0)
-// CHECK-NEXT: ('size', 4)
-// CHECK-NEXT: ('offset', 392)
-// CHECK-NEXT: ('alignment', 0)
-// CHECK-NEXT: ('reloc_offset', 396)
-// CHECK-NEXT: ('num_reloc', 1)
-// CHECK-NEXT: ('flags', 0xb)
-// CHECK-NEXT: ('reserved1', 0)
-// CHECK-NEXT: ('reserved2', 0)
-// CHECK-NEXT: ),
-// CHECK-NEXT: ('_relocations', [
-// CHECK-NEXT: # Relocation 0
-// CHECK-NEXT: (('word-0', 0x0),
-// CHECK-NEXT: ('word-1', 0xc000001)),
-// CHECK-NEXT: ])
-// CHECK-NEXT: ('_section_data', '00000000')
-// CHECK-NEXT: # Section 2
-// CHECK-NEXT: (('section_name', '__const_coal\x00\x00\x00\x00')
-// CHECK-NEXT: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT: ('address', 4)
-// CHECK-NEXT: ('size', 0)
-// CHECK-NEXT: ('offset', 396)
-// CHECK-NEXT: ('alignment', 0)
-// CHECK-NEXT: ('reloc_offset', 0)
-// CHECK-NEXT: ('num_reloc', 0)
-// CHECK-NEXT: ('flags', 0xb)
-// CHECK-NEXT: ('reserved1', 0)
-// CHECK-NEXT: ('reserved2', 0)
-// CHECK-NEXT: ),
-// CHECK-NEXT: ('_relocations', [
-// CHECK-NEXT: ])
-// CHECK-NEXT: ('_section_data', '')
-// CHECK-NEXT: ])
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Load Command 1
-// CHECK-NEXT: (('command', 2)
-// CHECK-NEXT: ('size', 24)
-// CHECK-NEXT: ('symoff', 404)
-// CHECK-NEXT: ('nsyms', 2)
-// CHECK-NEXT: ('stroff', 428)
-// CHECK-NEXT: ('strsize', 28)
-// CHECK-NEXT: ('_string_data', '\x00__ZTS3optIbE\x00__ZTI3optIbE\x00\x00')
-// CHECK-NEXT: ('_symbols', [
-// CHECK-NEXT: # Symbol 0
-// CHECK-NEXT: (('n_strx', 14)
-// CHECK-NEXT: ('n_type', 0xf)
-// CHECK-NEXT: ('n_sect', 2)
-// CHECK-NEXT: ('n_desc', 128)
-// CHECK-NEXT: ('n_value', 0)
-// CHECK-NEXT: ('_string', '__ZTI3optIbE')
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 1
-// CHECK-NEXT: (('n_strx', 1)
-// CHECK-NEXT: ('n_type', 0xf)
-// CHECK-NEXT: ('n_sect', 3)
-// CHECK-NEXT: ('n_desc', 128)
-// CHECK-NEXT: ('n_value', 4)
-// CHECK-NEXT: ('_string', '__ZTS3optIbE')
-// CHECK-NEXT: ),
-// CHECK-NEXT: ])
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Load Command 2
-// CHECK-NEXT: (('command', 11)
-// CHECK-NEXT: ('size', 80)
-// CHECK-NEXT: ('ilocalsym', 0)
-// CHECK-NEXT: ('nlocalsym', 0)
-// CHECK-NEXT: ('iextdefsym', 0)
-// CHECK-NEXT: ('nextdefsym', 2)
-// CHECK-NEXT: ('iundefsym', 2)
-// CHECK-NEXT: ('nundefsym', 0)
-// CHECK-NEXT: ('tocoff', 0)
-// CHECK-NEXT: ('ntoc', 0)
-// CHECK-NEXT: ('modtaboff', 0)
-// CHECK-NEXT: ('nmodtab', 0)
-// CHECK-NEXT: ('extrefsymoff', 0)
-// CHECK-NEXT: ('nextrefsyms', 0)
-// CHECK-NEXT: ('indirectsymoff', 0)
-// CHECK-NEXT: ('nindirectsyms', 0)
-// CHECK-NEXT: ('extreloff', 0)
-// CHECK-NEXT: ('nextrel', 0)
-// CHECK-NEXT: ('locreloff', 0)
-// CHECK-NEXT: ('nlocrel', 0)
-// CHECK-NEXT: ('_indirect_symbols', [
-// CHECK-NEXT: ])
-// CHECK-NEXT: ),
-// CHECK-NEXT: ])
+// CHECK: File: <stdin>
+// CHECK-NEXT: Format: Mach-O 32-bit i386
+// CHECK-NEXT: Arch: i386
+// CHECK-NEXT: AddressSize: 32bit
+// CHECK-NEXT: MachHeader {
+// CHECK-NEXT: Magic: Magic (0xFEEDFACE)
+// CHECK-NEXT: CpuType: X86 (0x7)
+// CHECK-NEXT: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK-NEXT: FileType: Relocatable (0x1)
+// CHECK-NEXT: NumOfLoadCommands: 4
+// CHECK-NEXT: SizeOfLoadCommands: 380
+// CHECK-NEXT: Flags [ (0x0)
+// CHECK-NEXT: ]
+// CHECK-NEXT: }
+// CHECK-NEXT: Sections [
+// CHECK-NEXT: Section {
+// CHECK-NEXT: Index: 0
+// CHECK-NEXT: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT: Address: 0x0
+// CHECK-NEXT: Size: 0x0
+// CHECK-NEXT: Offset: 408
+// CHECK-NEXT: Alignment: 0
+// CHECK-NEXT: RelocationOffset: 0x0
+// CHECK-NEXT: RelocationCount: 0
+// CHECK-NEXT: Type: 0x0
+// CHECK-NEXT: Attributes [ (0x800000)
+// CHECK-NEXT: PureInstructions (0x800000)
+// CHECK-NEXT: ]
+// CHECK-NEXT: Reserved1: 0x0
+// CHECK-NEXT: Reserved2: 0x0
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT: )
+// CHECK-NEXT: }
+// CHECK-NEXT: Section {
+// CHECK-NEXT: Index: 1
+// CHECK-NEXT: Name: __datacoal_nt (5F 5F 64 61 74 61 63 6F 61 6C 5F 6E 74 00 00 00)
+// CHECK-NEXT: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT: Address: 0x0
+// CHECK-NEXT: Size: 0x4
+// CHECK-NEXT: Offset: 408
+// CHECK-NEXT: Alignment: 0
+// CHECK-NEXT: RelocationOffset: 0x19C
+// CHECK-NEXT: RelocationCount: 1
+// CHECK-NEXT: Type: 0xB
+// CHECK-NEXT: Attributes [ (0x0)
+// CHECK-NEXT: ]
+// CHECK-NEXT: Reserved1: 0x0
+// CHECK-NEXT: Reserved2: 0x0
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT: 0000: 00000000 |....|
+// CHECK-NEXT: )
+// CHECK-NEXT: }
+// CHECK-NEXT: Section {
+// CHECK-NEXT: Index: 2
+// CHECK-NEXT: Name: __const_coal (5F 5F 63 6F 6E 73 74 5F 63 6F 61 6C 00 00 00 00)
+// CHECK-NEXT: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT: Address: 0x4
+// CHECK-NEXT: Size: 0x0
+// CHECK-NEXT: Offset: 412
+// CHECK-NEXT: Alignment: 0
+// CHECK-NEXT: RelocationOffset: 0x0
+// CHECK-NEXT: RelocationCount: 0
+// CHECK-NEXT: Type: 0xB
+// CHECK-NEXT: Attributes [ (0x0)
+// CHECK-NEXT: ]
+// CHECK-NEXT: Reserved1: 0x0
+// CHECK-NEXT: Reserved2: 0x0
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT: )
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: Relocations [
+// CHECK-NEXT: Section __datacoal_nt {
+// CHECK-NEXT: 0x0 0 2 1 GENERIC_RELOC_VANILLA 0 __ZTS3optIbE
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: Symbols [
+// CHECK-NEXT: Symbol {
+// CHECK-NEXT: Name: __ZTI3optIbE (14)
+// CHECK-NEXT: Extern
+// CHECK-NEXT: Type: Section (0xE)
+// CHECK-NEXT: Section: __datacoal_nt (0x2)
+// CHECK-NEXT: RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT: Flags [ (0x80)
+// CHECK-NEXT: WeakDef (0x80)
+// CHECK-NEXT: ]
+// CHECK-NEXT: Value: 0x0
+// CHECK-NEXT: }
+// CHECK-NEXT: Symbol {
+// CHECK-NEXT: Name: __ZTS3optIbE (1)
+// CHECK-NEXT: Extern
+// CHECK-NEXT: Type: Section (0xE)
+// CHECK-NEXT: Section: __const_coal (0x3)
+// CHECK-NEXT: RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT: Flags [ (0x80)
+// CHECK-NEXT: WeakDef (0x80)
+// CHECK-NEXT: ]
+// CHECK-NEXT: Value: 0x4
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: Indirect Symbols {
+// CHECK-NEXT: Number: 0
+// CHECK-NEXT: Symbols [
+// CHECK-NEXT: ]
+// CHECK-NEXT: }
+// CHECK-NEXT: Segment {
+// CHECK-NEXT: Cmd: LC_SEGMENT
+// CHECK-NEXT: Name:
+// CHECK-NEXT: Size: 260
+// CHECK-NEXT: vmaddr: 0x0
+// CHECK-NEXT: vmsize: 0x4
+// CHECK-NEXT: fileoff: 408
+// CHECK-NEXT: filesize: 4
+// CHECK-NEXT: maxprot: rwx
+// CHECK-NEXT: initprot: rwx
+// CHECK-NEXT: nsects: 3
+// CHECK-NEXT: flags: 0x0
+// CHECK-NEXT: }
+// CHECK-NEXT: Dysymtab {
+// CHECK-NEXT: ilocalsym: 0
+// CHECK-NEXT: nlocalsym: 0
+// CHECK-NEXT: iextdefsym: 0
+// CHECK-NEXT: nextdefsym: 2
+// CHECK-NEXT: iundefsym: 2
+// CHECK-NEXT: nundefsym: 0
+// CHECK-NEXT: tocoff: 0
+// CHECK-NEXT: ntoc: 0
+// CHECK-NEXT: modtaboff: 0
+// CHECK-NEXT: nmodtab: 0
+// CHECK-NEXT: extrefsymoff: 0
+// CHECK-NEXT: nextrefsyms: 0
+// CHECK-NEXT: indirectsymoff: 0
+// CHECK-NEXT: nindirectsyms: 0
+// CHECK-NEXT: extreloff: 0
+// CHECK-NEXT: nextrel: 0
+// CHECK-NEXT: locreloff: 0
+// CHECK-NEXT: nlocrel: 0
+// CHECK-NEXT: }
diff --git a/test/MC/MachO/x86-data-in-code.ll b/test/MC/MachO/x86-data-in-code.ll
index c2e136fbeb08..b8d3a185cbd9 100644
--- a/test/MC/MachO/x86-data-in-code.ll
+++ b/test/MC/MachO/x86-data-in-code.ll
@@ -1,9 +1,8 @@
-; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | macho-dump | FileCheck %s
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-readobj -macho-data-in-code | FileCheck %s
; There should not be a data-in-code load command (type 0x29) for x86_64
; jump tables, even though they are in the text section.
-; CHECK: 'num_load_commands'
-; CHECK-NOT: (('command', 41)
+; CHECK-NOT: DataInCode {
define void @foo(i32* %ptr) nounwind ssp {
%tmp = load i32, i32* %ptr, align 4
diff --git a/test/MC/MachO/x86_32-optimal_nop.s b/test/MC/MachO/x86_32-optimal_nop.s
index 24751409bdb4..01d8a1f6eb2a 100644
--- a/test/MC/MachO/x86_32-optimal_nop.s
+++ b/test/MC/MachO/x86_32-optimal_nop.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
# 1 byte nop test
.align 4, 0 # start with 16 byte alignment filled with zeros
@@ -157,101 +157,132 @@ f0:
.align 4, 0x90
.long 0
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 192)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 372)
-// CHECK: ('file_offset', 324)
-// CHECK: ('file_size', 372)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 337)
-// CHECK: ('offset', 324)
-// CHECK: ('alignment', 4)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000400)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', 'c390c300 00000000 00000000 00000000 c3c36690 c3000000 00000000 00000000 c30f1f00 c3000000 00000000 00000000 c3c3c3c3 0f1f4000 c3000000 00000000 c3c3c30f 1f440000 c3000000 00000000 c3c3660f 1f440000 c3000000 00000000 c30f1f80 00000000 c3000000 00000000 c3c3c3c3 c3c3c3c3 c3000000 00000000 c3c3c3c3 c3c3c366 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c3c3 c3c3c366 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c3c3 c366662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c3c3 6666662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c366 6666662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c36666 6666662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3666666 6666662e 0f1f8400 00000000 c3')
-// CHECK: # Section 1
-// CHECK: (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 352)
-// CHECK: ('size', 20)
-// CHECK: ('offset', 676)
-// CHECK: ('alignment', 4)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '00909090 90909090 90909090 90909090 00000000')
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 696)
-// CHECK: ('nsyms', 1)
-// CHECK: ('stroff', 708)
-// CHECK: ('strsize', 4)
-// CHECK: ('_string_data', '\x00f0\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 352)
-// CHECK: ('_string', 'f0')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 1)
-// CHECK: ('iextdefsym', 1)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 1)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 312
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x151
+// CHECK: Offset: 340
+// CHECK: Alignment: 4
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800004)
+// CHECK: PureInstructions (0x800000)
+// CHECK: SomeInstructions (0x4)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: C390C300 00000000 00000000 00000000 |................|
+// CHECK: 0010: C3C36690 C3000000 00000000 00000000 |..f.............|
+// CHECK: 0020: C30F1F00 C3000000 00000000 00000000 |................|
+// CHECK: 0030: C3C3C3C3 0F1F4000 C3000000 00000000 |......@.........|
+// CHECK: 0040: C3C3C30F 1F440000 C3000000 00000000 |.....D..........|
+// CHECK: 0050: C3C3660F 1F440000 C3000000 00000000 |..f..D..........|
+// CHECK: 0060: C30F1F80 00000000 C3000000 00000000 |................|
+// CHECK: 0070: C3C3C3C3 C3C3C3C3 C3000000 00000000 |................|
+// CHECK: 0080: C3C3C3C3 C3C3C366 0F1F8400 00000000 |.......f........|
+// CHECK: 0090: C3000000 00000000 00000000 00000000 |................|
+// CHECK: 00A0: C3C3C3C3 C3C3C366 0F1F8400 00000000 |.......f........|
+// CHECK: 00B0: C3000000 00000000 00000000 00000000 |................|
+// CHECK: 00C0: C3C3C3C3 C366662E 0F1F8400 00000000 |.....ff.........|
+// CHECK: 00D0: C3000000 00000000 00000000 00000000 |................|
+// CHECK: 00E0: C3C3C3C3 6666662E 0F1F8400 00000000 |....fff.........|
+// CHECK: 00F0: C3000000 00000000 00000000 00000000 |................|
+// CHECK: 0100: C3C3C366 6666662E 0F1F8400 00000000 |...ffff.........|
+// CHECK: 0110: C3000000 00000000 00000000 00000000 |................|
+// CHECK: 0120: C3C36666 6666662E 0F1F8400 00000000 |..fffff.........|
+// CHECK: 0130: C3000000 00000000 00000000 00000000 |................|
+// CHECK: 0140: C3666666 6666662E 0F1F8400 00000000 |.ffffff.........|
+// CHECK: 0150: C3 |.|
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x160
+// CHECK: Size: 0x14
+// CHECK: Offset: 692
+// CHECK: Alignment: 4
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 00909090 90909090 90909090 90909090 |................|
+// CHECK: 0010: 00000000 |....|
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: f0 (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __const (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x160
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 192
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x174
+// CHECK: fileoff: 340
+// CHECK: filesize: 372
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 1
+// CHECK: iextdefsym: 1
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 1
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/x86_32-scattered-reloc-fallback.s b/test/MC/MachO/x86_32-scattered-reloc-fallback.s
index 3de52b4228d0..b2dc27b3e1cb 100644
--- a/test/MC/MachO/x86_32-scattered-reloc-fallback.s
+++ b/test/MC/MachO/x86_32-scattered-reloc-fallback.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -s -sd | FileCheck %s
// rdar://15526046
@@ -24,4 +24,8 @@ _key64b_9:
// be relocated, in this case _key64b_9+4, value correct in the instruction.
// 01020f55 c7056475530100000000 movl $0x0, 0x1537564
-// CHECK: 90c70564 75530100 000000')
+// CHECK: SectionData (
+// CHECK: F75530: 90909090 90909090 90909090 90909090 |................|
+// CHECK: 1020F50: 90909090 90C70564 75530100 000000 |.......duS.....|
+// CHECK: 75530: 00000000 00000000 00000000 00000000 |................|
+// CHECK: )
diff --git a/test/MC/MachO/x86_32-sections.s b/test/MC/MachO/x86_32-sections.s
index 66ada2807ef9..a78ac17db1d4 100644
--- a/test/MC/MachO/x86_32-sections.s
+++ b/test/MC/MachO/x86_32-sections.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd --macho-segment | FileCheck %s
.text
.section __TEXT,__text,regular,pure_instructions
@@ -43,494 +43,700 @@
.section __TEXT,__picsymbolstub4,symbol_stubs,none,16
.subsections_via_symbols
-
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 2504)
-// CHECK: ('flag', 8192)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 2504)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 0)
-// CHECK: ('file_offset', 2532)
-// CHECK: ('file_size', 0)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 36)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 1
-// CHECK: (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 2
-// CHECK: (('section_name', '__static_const\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 3
-// CHECK: (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x2)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 4
-// CHECK: (('section_name', '__literal4\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x3)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 5
-// CHECK: (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 3)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x4)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 6
-// CHECK: (('section_name', '__literal16\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 4)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0xe)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 7
-// CHECK: (('section_name', '__constructor\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 8
-// CHECK: (('section_name', '__destructor\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 9
-// CHECK: (('section_name', '__symbol_stub\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000008)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 16)
-// CHECK: ),
-// CHECK: # Section 10
-// CHECK: (('section_name', '__picsymbol_stub')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000008)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 26)
-// CHECK: ),
-// CHECK: # Section 11
-// CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 12
-// CHECK: (('section_name', '__static_data\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 13
-// CHECK: (('section_name', '__nl_symbol_ptr\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x6)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 14
-// CHECK: (('section_name', '__la_symbol_ptr\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x7)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 15
-// CHECK: (('section_name', '__dyld\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 16
-// CHECK: (('section_name', '__mod_init_func\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x9)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 17
-// CHECK: (('section_name', '__mod_term_func\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0xa)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 18
-// CHECK: (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 19
-// CHECK: (('section_name', '__class\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 20
-// CHECK: (('section_name', '__meta_class\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 21
-// CHECK: (('section_name', '__cat_cls_meth\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 22
-// CHECK: (('section_name', '__cat_inst_meth\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 23
-// CHECK: (('section_name', '__protocol\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 24
-// CHECK: (('section_name', '__string_object\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 25
-// CHECK: (('section_name', '__cls_meth\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 26
-// CHECK: (('section_name', '__inst_meth\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 27
-// CHECK: (('section_name', '__cls_refs\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000005)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 28
-// CHECK: (('section_name', '__message_refs\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000005)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 29
-// CHECK: (('section_name', '__symbols\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 30
-// CHECK: (('section_name', '__category\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 31
-// CHECK: (('section_name', '__class_vars\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 32
-// CHECK: (('section_name', '__instance_vars\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 33
-// CHECK: (('section_name', '__module_info\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: # Section 34
-// CHECK: (('section_name', '__selector_strs\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x2)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: # Section 35
-// CHECK: (('section_name', '__picsymbolstub4')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2532)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x8)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 16)
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 2
+// CHECK: SizeOfLoadCommands: 2520
+// CHECK: Flags [ (0x2000)
+// CHECK: MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 2
+// CHECK: Name: __static_const (5F 5F 73 74 61 74 69 63 5F 63 6F 6E 73 74 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 3
+// CHECK: Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: ExtReloc (0x2)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 4
+// CHECK: Name: __literal4 (5F 5F 6C 69 74 65 72 61 6C 34 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x3
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 5
+// CHECK: Name: __literal8 (5F 5F 6C 69 74 65 72 61 6C 38 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 3
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: SomeInstructions (0x4)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 6
+// CHECK: Name: __literal16 (5F 5F 6C 69 74 65 72 61 6C 31 36 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 4
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0xE
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 7
+// CHECK: Name: __constructor (5F 5F 63 6F 6E 73 74 72 75 63 74 6F 72 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 8
+// CHECK: Name: __destructor (5F 5F 64 65 73 74 72 75 63 74 6F 72 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 9
+// CHECK: Name: __symbol_stub (5F 5F 73 79 6D 62 6F 6C 5F 73 74 75 62 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x8
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x10
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 10
+// CHECK: Name: __picsymbol_stub (5F 5F 70 69 63 73 79 6D 62 6F 6C 5F 73 74 75 62)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x8
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x1A
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 11
+// CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 12
+// CHECK: Name: __static_data (5F 5F 73 74 61 74 69 63 5F 64 61 74 61 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 13
+// CHECK: Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x6
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 14
+// CHECK: Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x7
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 15
+// CHECK: Name: __dyld (5F 5F 64 79 6C 64 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 16
+// CHECK: Name: __mod_init_func (5F 5F 6D 6F 64 5F 69 6E 69 74 5F 66 75 6E 63 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x9
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 17
+// CHECK: Name: __mod_term_func (5F 5F 6D 6F 64 5F 74 65 72 6D 5F 66 75 6E 63 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0xA
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 18
+// CHECK: Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 19
+// CHECK: Name: __class (5F 5F 63 6C 61 73 73 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 20
+// CHECK: Name: __meta_class (5F 5F 6D 65 74 61 5F 63 6C 61 73 73 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 21
+// CHECK: Name: __cat_cls_meth (5F 5F 63 61 74 5F 63 6C 73 5F 6D 65 74 68 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 22
+// CHECK: Name: __cat_inst_meth (5F 5F 63 61 74 5F 69 6E 73 74 5F 6D 65 74 68 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 23
+// CHECK: Name: __protocol (5F 5F 70 72 6F 74 6F 63 6F 6C 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 24
+// CHECK: Name: __string_object (5F 5F 73 74 72 69 6E 67 5F 6F 62 6A 65 63 74 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 25
+// CHECK: Name: __cls_meth (5F 5F 63 6C 73 5F 6D 65 74 68 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 26
+// CHECK: Name: __inst_meth (5F 5F 69 6E 73 74 5F 6D 65 74 68 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 27
+// CHECK: Name: __cls_refs (5F 5F 63 6C 73 5F 72 65 66 73 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x5
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 28
+// CHECK: Name: __message_refs (5F 5F 6D 65 73 73 61 67 65 5F 72 65 66 73 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x5
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 29
+// CHECK: Name: __symbols (5F 5F 73 79 6D 62 6F 6C 73 00 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 30
+// CHECK: Name: __category (5F 5F 63 61 74 65 67 6F 72 79 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 31
+// CHECK: Name: __class_vars (5F 5F 63 6C 61 73 73 5F 76 61 72 73 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 32
+// CHECK: Name: __instance_vars (5F 5F 69 6E 73 74 61 6E 63 65 5F 76 61 72 73 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 33
+// CHECK: Name: __module_info (5F 5F 6D 6F 64 75 6C 65 5F 69 6E 66 6F 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 34
+// CHECK: Name: __selector_strs (5F 5F 73 65 6C 65 63 74 6F 72 5F 73 74 72 73 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: ExtReloc (0x2)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 35
+// CHECK: Name: __picsymbolstub4 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 34)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2548
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x8
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x10
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 2504
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x0
+// CHECK: fileoff: 2548
+// CHECK: filesize: 0
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 36
+// CHECK: flags: 0x0
+// CHECK: }
diff --git a/test/MC/MachO/x86_32-symbols.s b/test/MC/MachO/x86_32-symbols.s
index 95aa507305fd..1a4ffafb745d 100644
--- a/test/MC/MachO/x86_32-symbols.s
+++ b/test/MC/MachO/x86_32-symbols.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t --macho-segment --macho-dysymtab --macho-indirect-symbols | FileCheck %s
.text
L0:
@@ -121,921 +121,1087 @@ D38:
L39:
D39:
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 2608)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 2504)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 0)
-// CHECK: ('file_offset', 2636)
-// CHECK: ('file_size', 0)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 36)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 1
-// CHECK: (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 2
-// CHECK: (('section_name', '__static_const\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 3
-// CHECK: (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x2)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 4
-// CHECK: (('section_name', '__literal4\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x3)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 5
-// CHECK: (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 3)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x4)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 6
-// CHECK: (('section_name', '__literal16\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 4)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0xe)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 7
-// CHECK: (('section_name', '__constructor\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 8
-// CHECK: (('section_name', '__destructor\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 9
-// CHECK: (('section_name', '__symbol_stub\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000008)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 16)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 10
-// CHECK: (('section_name', '__picsymbol_stub')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000008)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 26)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 11
-// CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 12
-// CHECK: (('section_name', '__static_data\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 13
-// CHECK: (('section_name', '__nl_symbol_ptr\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x6)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 14
-// CHECK: (('section_name', '__la_symbol_ptr\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x7)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 15
-// CHECK: (('section_name', '__dyld\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 16
-// CHECK: (('section_name', '__mod_init_func\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x9)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 17
-// CHECK: (('section_name', '__mod_term_func\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0xa)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 18
-// CHECK: (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 19
-// CHECK: (('section_name', '__class\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 20
-// CHECK: (('section_name', '__meta_class\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 21
-// CHECK: (('section_name', '__cat_cls_meth\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 22
-// CHECK: (('section_name', '__cat_inst_meth\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 23
-// CHECK: (('section_name', '__protocol\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 24
-// CHECK: (('section_name', '__string_object\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 25
-// CHECK: (('section_name', '__cls_meth\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 26
-// CHECK: (('section_name', '__inst_meth\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 27
-// CHECK: (('section_name', '__cls_refs\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000005)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 28
-// CHECK: (('section_name', '__message_refs\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000005)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 29
-// CHECK: (('section_name', '__symbols\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 30
-// CHECK: (('section_name', '__category\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 31
-// CHECK: (('section_name', '__class_vars\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 32
-// CHECK: (('section_name', '__instance_vars\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 33
-// CHECK: (('section_name', '__module_info\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 34
-// CHECK: (('section_name', '__selector_strs\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x2)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 35
-// CHECK: (('section_name', '__picsymbolstub4')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2636)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x8)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 16)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 2636)
-// CHECK: ('nsyms', 40)
-// CHECK: ('stroff', 3116)
-// CHECK: ('strsize', 152)
-// CHECK: ('_string_data', '\x00D9\x00D39\x00D29\x00D19\x00D8\x00D38\x00D28\x00D18\x00D7\x00D37\x00D27\x00D17\x00D6\x00D36\x00D26\x00D16\x00D5\x00D35\x00D25\x00D15\x00D4\x00D34\x00D24\x00D14\x00D3\x00D33\x00D23\x00D13\x00D2\x00D32\x00D22\x00D12\x00D1\x00D31\x00D21\x00D11\x00D0\x00D30\x00D20\x00D10\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 136)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D0')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 121)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 1)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D1')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 106)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D2')
-// CHECK: ),
-// CHECK: # Symbol 3
-// CHECK: (('n_strx', 91)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 3)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D3')
-// CHECK: ),
-// CHECK: # Symbol 4
-// CHECK: (('n_strx', 76)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 4)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D4')
-// CHECK: ),
-// CHECK: # Symbol 5
-// CHECK: (('n_strx', 61)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 5)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D5')
-// CHECK: ),
-// CHECK: # Symbol 6
-// CHECK: (('n_strx', 46)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 6)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D6')
-// CHECK: ),
-// CHECK: # Symbol 7
-// CHECK: (('n_strx', 31)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 7)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D7')
-// CHECK: ),
-// CHECK: # Symbol 8
-// CHECK: (('n_strx', 16)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 8)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D8')
-// CHECK: ),
-// CHECK: # Symbol 9
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 9)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D9')
-// CHECK: ),
-// CHECK: # Symbol 10
-// CHECK: (('n_strx', 147)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 10)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D10')
-// CHECK: ),
-// CHECK: # Symbol 11
-// CHECK: (('n_strx', 132)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 11)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D11')
-// CHECK: ),
-// CHECK: # Symbol 12
-// CHECK: (('n_strx', 117)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 12)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D12')
-// CHECK: ),
-// CHECK: # Symbol 13
-// CHECK: (('n_strx', 102)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 13)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D13')
-// CHECK: ),
-// CHECK: # Symbol 14
-// CHECK: (('n_strx', 87)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 14)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D14')
-// CHECK: ),
-// CHECK: # Symbol 15
-// CHECK: (('n_strx', 72)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 15)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D15')
-// CHECK: ),
-// CHECK: # Symbol 16
-// CHECK: (('n_strx', 57)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 16)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D16')
-// CHECK: ),
-// CHECK: # Symbol 17
-// CHECK: (('n_strx', 42)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 17)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D17')
-// CHECK: ),
-// CHECK: # Symbol 18
-// CHECK: (('n_strx', 27)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 18)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D18')
-// CHECK: ),
-// CHECK: # Symbol 19
-// CHECK: (('n_strx', 12)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 19)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D19')
-// CHECK: ),
-// CHECK: # Symbol 20
-// CHECK: (('n_strx', 143)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 20)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D20')
-// CHECK: ),
-// CHECK: # Symbol 21
-// CHECK: (('n_strx', 128)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 21)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D21')
-// CHECK: ),
-// CHECK: # Symbol 22
-// CHECK: (('n_strx', 113)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 22)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D22')
-// CHECK: ),
-// CHECK: # Symbol 23
-// CHECK: (('n_strx', 98)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 23)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D23')
-// CHECK: ),
-// CHECK: # Symbol 24
-// CHECK: (('n_strx', 83)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 24)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D24')
-// CHECK: ),
-// CHECK: # Symbol 25
-// CHECK: (('n_strx', 68)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 25)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D25')
-// CHECK: ),
-// CHECK: # Symbol 26
-// CHECK: (('n_strx', 53)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 26)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D26')
-// CHECK: ),
-// CHECK: # Symbol 27
-// CHECK: (('n_strx', 38)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 27)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D27')
-// CHECK: ),
-// CHECK: # Symbol 28
-// CHECK: (('n_strx', 23)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 28)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D28')
-// CHECK: ),
-// CHECK: # Symbol 29
-// CHECK: (('n_strx', 8)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 29)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D29')
-// CHECK: ),
-// CHECK: # Symbol 30
-// CHECK: (('n_strx', 139)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 30)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D30')
-// CHECK: ),
-// CHECK: # Symbol 31
-// CHECK: (('n_strx', 124)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 31)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D31')
-// CHECK: ),
-// CHECK: # Symbol 32
-// CHECK: (('n_strx', 109)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 32)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D32')
-// CHECK: ),
-// CHECK: # Symbol 33
-// CHECK: (('n_strx', 94)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 33)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D33')
-// CHECK: ),
-// CHECK: # Symbol 34
-// CHECK: (('n_strx', 79)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 34)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D34')
-// CHECK: ),
-// CHECK: # Symbol 35
-// CHECK: (('n_strx', 64)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 4)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D35')
-// CHECK: ),
-// CHECK: # Symbol 36
-// CHECK: (('n_strx', 49)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 4)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D36')
-// CHECK: ),
-// CHECK: # Symbol 37
-// CHECK: (('n_strx', 34)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 4)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D37')
-// CHECK: ),
-// CHECK: # Symbol 38
-// CHECK: (('n_strx', 19)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 35)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D38')
-// CHECK: ),
-// CHECK: # Symbol 39
-// CHECK: (('n_strx', 4)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 36)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'D39')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 40)
-// CHECK: ('iextdefsym', 40)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 40)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 2624
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 2
+// CHECK: Name: __static_const (5F 5F 73 74 61 74 69 63 5F 63 6F 6E 73 74 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 3
+// CHECK: Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: ExtReloc (0x2)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 4
+// CHECK: Name: __literal4 (5F 5F 6C 69 74 65 72 61 6C 34 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x3
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 5
+// CHECK: Name: __literal8 (5F 5F 6C 69 74 65 72 61 6C 38 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 3
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: SomeInstructions (0x4)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 6
+// CHECK: Name: __literal16 (5F 5F 6C 69 74 65 72 61 6C 31 36 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 4
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0xE
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 7
+// CHECK: Name: __constructor (5F 5F 63 6F 6E 73 74 72 75 63 74 6F 72 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 8
+// CHECK: Name: __destructor (5F 5F 64 65 73 74 72 75 63 74 6F 72 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 9
+// CHECK: Name: __symbol_stub (5F 5F 73 79 6D 62 6F 6C 5F 73 74 75 62 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x8
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x10
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 10
+// CHECK: Name: __picsymbol_stub (5F 5F 70 69 63 73 79 6D 62 6F 6C 5F 73 74 75 62)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x8
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x1A
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 11
+// CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 12
+// CHECK: Name: __static_data (5F 5F 73 74 61 74 69 63 5F 64 61 74 61 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 13
+// CHECK: Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x6
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 14
+// CHECK: Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x7
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 15
+// CHECK: Name: __dyld (5F 5F 64 79 6C 64 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 16
+// CHECK: Name: __mod_init_func (5F 5F 6D 6F 64 5F 69 6E 69 74 5F 66 75 6E 63 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x9
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 17
+// CHECK: Name: __mod_term_func (5F 5F 6D 6F 64 5F 74 65 72 6D 5F 66 75 6E 63 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0xA
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 18
+// CHECK: Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 19
+// CHECK: Name: __class (5F 5F 63 6C 61 73 73 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 20
+// CHECK: Name: __meta_class (5F 5F 6D 65 74 61 5F 63 6C 61 73 73 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 21
+// CHECK: Name: __cat_cls_meth (5F 5F 63 61 74 5F 63 6C 73 5F 6D 65 74 68 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 22
+// CHECK: Name: __cat_inst_meth (5F 5F 63 61 74 5F 69 6E 73 74 5F 6D 65 74 68 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 23
+// CHECK: Name: __protocol (5F 5F 70 72 6F 74 6F 63 6F 6C 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 24
+// CHECK: Name: __string_object (5F 5F 73 74 72 69 6E 67 5F 6F 62 6A 65 63 74 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 25
+// CHECK: Name: __cls_meth (5F 5F 63 6C 73 5F 6D 65 74 68 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 26
+// CHECK: Name: __inst_meth (5F 5F 69 6E 73 74 5F 6D 65 74 68 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 27
+// CHECK: Name: __cls_refs (5F 5F 63 6C 73 5F 72 65 66 73 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x5
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 28
+// CHECK: Name: __message_refs (5F 5F 6D 65 73 73 61 67 65 5F 72 65 66 73 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x5
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 29
+// CHECK: Name: __symbols (5F 5F 73 79 6D 62 6F 6C 73 00 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 30
+// CHECK: Name: __category (5F 5F 63 61 74 65 67 6F 72 79 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 31
+// CHECK: Name: __class_vars (5F 5F 63 6C 61 73 73 5F 76 61 72 73 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 32
+// CHECK: Name: __instance_vars (5F 5F 69 6E 73 74 61 6E 63 65 5F 76 61 72 73 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 33
+// CHECK: Name: __module_info (5F 5F 6D 6F 64 75 6C 65 5F 69 6E 66 6F 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 34
+// CHECK: Name: __selector_strs (5F 5F 73 65 6C 65 63 74 6F 72 5F 73 74 72 73 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: ExtReloc (0x2)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 35
+// CHECK: Name: __picsymbolstub4 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 34)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2652
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x8
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x10
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: D0 (136)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D1 (121)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __text (0x1)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D2 (106)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __const (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D3 (91)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __static_const (0x3)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D4 (76)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __cstring (0x4)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D5 (61)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __literal4 (0x5)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D6 (46)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __literal8 (0x6)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D7 (31)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __literal16 (0x7)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D8 (16)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __constructor (0x8)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D9 (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __destructor (0x9)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D10 (147)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __symbol_stub (0xA)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D11 (132)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __picsymbol_stub (0xB)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D12 (117)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __data (0xC)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D13 (102)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __static_data (0xD)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D14 (87)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __nl_symbol_ptr (0xE)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D15 (72)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __la_symbol_ptr (0xF)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D16 (57)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __dyld (0x10)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D17 (42)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __mod_init_func (0x11)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D18 (27)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __mod_term_func (0x12)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D19 (12)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __const (0x13)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D20 (143)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __class (0x14)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D21 (128)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __meta_class (0x15)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D22 (113)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __cat_cls_meth (0x16)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D23 (98)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __cat_inst_meth (0x17)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D24 (83)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __protocol (0x18)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D25 (68)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __string_object (0x19)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D26 (53)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __cls_meth (0x1A)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D27 (38)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __inst_meth (0x1B)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D28 (23)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __cls_refs (0x1C)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D29 (8)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __message_refs (0x1D)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D30 (139)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __symbols (0x1E)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D31 (124)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __category (0x1F)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D32 (109)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __class_vars (0x20)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D33 (94)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __instance_vars (0x21)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D34 (79)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __module_info (0x22)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D35 (64)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __cstring (0x4)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D36 (49)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __cstring (0x4)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D37 (34)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __cstring (0x4)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D38 (19)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __selector_strs (0x23)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: D39 (4)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __picsymbolstub4 (0x24)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 2504
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x0
+// CHECK: fileoff: 2652
+// CHECK: filesize: 0
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 36
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 40
+// CHECK: iextdefsym: 40
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 40
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/x86_64-reloc-arithmetic.s b/test/MC/MachO/x86_64-reloc-arithmetic.s
index e82f69b6d477..de524791faa3 100644
--- a/test/MC/MachO/x86_64-reloc-arithmetic.s
+++ b/test/MC/MachO/x86_64-reloc-arithmetic.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -r -t | FileCheck %s
// rdar://9906375
.org 0x100
@@ -7,15 +7,31 @@ _bar = _foo + 2
_baz:
leaq _bar(%rip), %rcx
-// CHECK: ('_relocations', [
-// CHECK-NEXT: # Relocation 0
-// CHECK-NEXT: (('word-0', 0x103),
-// CHECK-NEXT: ('word-1', 0x1d000001))
-
-// CHECK: # Symbol 1
-// CHECK-NEXT: (('n_strx', 6)
-// CHECK-NEXT: ('n_type', 0xe)
-// CHECK-NEXT: ('n_sect', 1)
-// CHECK-NEXT: ('n_desc', 0)
-// CHECK-NEXT: ('n_value', 258)
-// CHECK-NEXT: ('_string', '_bar')
+// CHECK: File: <stdin>
+// CHECK-NEXT: Format: Mach-O 64-bit x86-64
+// CHECK-NEXT: Arch: x86_64
+// CHECK-NEXT: AddressSize: 64bit
+// CHECK-NEXT: Relocations [
+// CHECK-NEXT: Section __text {
+// CHECK-NEXT: 0x103 1 2 1 X86_64_RELOC_SIGNED 0 _bar
+// CHECK-NEXT: }
+// CHECK-NEXT: ]
+// CHECK-NEXT: Symbols [
+// CHECK-NEXT: Symbol {
+// CHECK-NEXT: Name: _foo (11)
+// CHECK-NEXT: Type: Section (0xE)
+// CHECK-NEXT: Section: __text (0x1)
+// CHECK-NEXT: RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT: Flags [ (0x0)
+// CHECK-NEXT: ]
+// CHECK-NEXT: Value: 0x100
+// CHECK-NEXT: }
+// CHECK-NEXT: Symbol {
+// CHECK-NEXT: Name: _bar (6)
+// CHECK-NEXT: Type: Section (0xE)
+// CHECK-NEXT: Section: __text (0x1)
+// CHECK-NEXT: RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT: Flags [ (0x0)
+// CHECK-NEXT: ]
+// CHECK-NEXT: Value: 0x102
+// CHECK-NEXT: }
diff --git a/test/MC/MachO/x86_64-sections.s b/test/MC/MachO/x86_64-sections.s
index 8efd35e6cbff..5ca83257f301 100644
--- a/test/MC/MachO/x86_64-sections.s
+++ b/test/MC/MachO/x86_64-sections.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r --macho-segment | FileCheck %s
.text
.section __TEXT,__text,regular,pure_instructions
@@ -39,523 +39,641 @@
.subsections_via_symbols
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 2552)
-// CHECK: ('flag', 8192)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 25)
-// CHECK: ('size', 2552)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 0)
-// CHECK: ('file_offset', 2584)
-// CHECK: ('file_size', 0)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 31)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 1
-// CHECK: (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 2
-// CHECK: (('section_name', '__static_const\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 3
-// CHECK: (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x2)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 4
-// CHECK: (('section_name', '__literal4\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x3)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 5
-// CHECK: (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 3)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x4)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 6
-// CHECK: (('section_name', '__literal16\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 4)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0xe)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 7
-// CHECK: (('section_name', '__constructor\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 8
-// CHECK: (('section_name', '__destructor\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 9
-// CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 10
-// CHECK: (('section_name', '__static_data\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 11
-// CHECK: (('section_name', '__dyld\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 12
-// CHECK: (('section_name', '__mod_init_func\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x9)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 13
-// CHECK: (('section_name', '__mod_term_func\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0xa)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 14
-// CHECK: (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 15
-// CHECK: (('section_name', '__class\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 16
-// CHECK: (('section_name', '__meta_class\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 17
-// CHECK: (('section_name', '__cat_cls_meth\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 18
-// CHECK: (('section_name', '__cat_inst_meth\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 19
-// CHECK: (('section_name', '__protocol\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 20
-// CHECK: (('section_name', '__string_object\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 21
-// CHECK: (('section_name', '__cls_meth\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 22
-// CHECK: (('section_name', '__inst_meth\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 23
-// CHECK: (('section_name', '__cls_refs\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000005)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 24
-// CHECK: (('section_name', '__message_refs\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 2)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000005)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 25
-// CHECK: (('section_name', '__symbols\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 26
-// CHECK: (('section_name', '__category\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 27
-// CHECK: (('section_name', '__class_vars\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 28
-// CHECK: (('section_name', '__instance_vars\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 29
-// CHECK: (('section_name', '__module_info\x00\x00\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x10000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 30
-// CHECK: (('section_name', '__selector_strs\x00')
-// CHECK: ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 2584)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x2)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 2
+// CHECK: SizeOfLoadCommands: 2568
+// CHECK: Flags [ (0x2000)
+// CHECK: MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 2
+// CHECK: Name: __static_const (5F 5F 73 74 61 74 69 63 5F 63 6F 6E 73 74 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 3
+// CHECK: Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: ExtReloc (0x2)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 4
+// CHECK: Name: __literal4 (5F 5F 6C 69 74 65 72 61 6C 34 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x3
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 5
+// CHECK: Name: __literal8 (5F 5F 6C 69 74 65 72 61 6C 38 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 3
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: SomeInstructions (0x4)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 6
+// CHECK: Name: __literal16 (5F 5F 6C 69 74 65 72 61 6C 31 36 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 4
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0xE
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 7
+// CHECK: Name: __constructor (5F 5F 63 6F 6E 73 74 72 75 63 74 6F 72 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 8
+// CHECK: Name: __destructor (5F 5F 64 65 73 74 72 75 63 74 6F 72 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 9
+// CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 10
+// CHECK: Name: __static_data (5F 5F 73 74 61 74 69 63 5F 64 61 74 61 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 11
+// CHECK: Name: __dyld (5F 5F 64 79 6C 64 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 12
+// CHECK: Name: __mod_init_func (5F 5F 6D 6F 64 5F 69 6E 69 74 5F 66 75 6E 63 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x9
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 13
+// CHECK: Name: __mod_term_func (5F 5F 6D 6F 64 5F 74 65 72 6D 5F 66 75 6E 63 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0xA
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 14
+// CHECK: Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 15
+// CHECK: Name: __class (5F 5F 63 6C 61 73 73 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 16
+// CHECK: Name: __meta_class (5F 5F 6D 65 74 61 5F 63 6C 61 73 73 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 17
+// CHECK: Name: __cat_cls_meth (5F 5F 63 61 74 5F 63 6C 73 5F 6D 65 74 68 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 18
+// CHECK: Name: __cat_inst_meth (5F 5F 63 61 74 5F 69 6E 73 74 5F 6D 65 74 68 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 19
+// CHECK: Name: __protocol (5F 5F 70 72 6F 74 6F 63 6F 6C 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 20
+// CHECK: Name: __string_object (5F 5F 73 74 72 69 6E 67 5F 6F 62 6A 65 63 74 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 21
+// CHECK: Name: __cls_meth (5F 5F 63 6C 73 5F 6D 65 74 68 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 22
+// CHECK: Name: __inst_meth (5F 5F 69 6E 73 74 5F 6D 65 74 68 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 23
+// CHECK: Name: __cls_refs (5F 5F 63 6C 73 5F 72 65 66 73 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x5
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 24
+// CHECK: Name: __message_refs (5F 5F 6D 65 73 73 61 67 65 5F 72 65 66 73 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 2
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x5
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 25
+// CHECK: Name: __symbols (5F 5F 73 79 6D 62 6F 6C 73 00 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 26
+// CHECK: Name: __category (5F 5F 63 61 74 65 67 6F 72 79 00 00 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 27
+// CHECK: Name: __class_vars (5F 5F 63 6C 61 73 73 5F 76 61 72 73 00 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 28
+// CHECK: Name: __instance_vars (5F 5F 69 6E 73 74 61 6E 63 65 5F 76 61 72 73 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 29
+// CHECK: Name: __module_info (5F 5F 6D 6F 64 75 6C 65 5F 69 6E 66 6F 00 00 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x100000)
+// CHECK: NoDeadStrip (0x100000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 30
+// CHECK: Name: __selector_strs (5F 5F 73 65 6C 65 63 74 6F 72 5F 73 74 72 73 00)
+// CHECK: Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 2600
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: ExtReloc (0x2)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 2552
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x0
+// CHECK: fileoff: 2600
+// CHECK: filesize: 0
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 31
+// CHECK: flags: 0x0
+// CHECK: }
diff --git a/test/MC/MachO/zerofill-1.s b/test/MC/MachO/zerofill-1.s
index 805a7861e1fe..c3de2ebbf795 100644
--- a/test/MC/MachO/zerofill-1.s
+++ b/test/MC/MachO/zerofill-1.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t --macho-segment --macho-dysymtab --macho-indirect-symbols | FileCheck %s
.text
.byte 0 // Align to 2**3 bytes, not 2**1
@@ -8,114 +8,124 @@
.data
.align 3
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 364)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 260)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 10)
-// CHECK: ('file_offset', 392)
-// CHECK: ('file_size', 8)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 3)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 1)
-// CHECK: ('offset', 392)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 1
-// CHECK: (('section_name', '__common\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 8)
-// CHECK: ('size', 2)
-// CHECK: ('offset', 0)
-// CHECK: ('alignment', 1)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x1)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 2
-// CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 8)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 400)
-// CHECK: ('alignment', 3)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x0)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 400)
-// CHECK: ('nsyms', 1)
-// CHECK: ('stroff', 412)
-// CHECK: ('strsize', 8)
-// CHECK: ('_string_data', '\x00zfill\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 8)
-// CHECK: ('_string', 'zfill')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 1)
-// CHECK: ('iextdefsym', 1)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 1)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 380
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x1
+// CHECK: Offset: 408
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 00 |.|
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __common (5F 5F 63 6F 6D 6D 6F 6E 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x8
+// CHECK: Size: 0x2
+// CHECK: Offset: 0
+// CHECK: Alignment: 1
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: LocReloc (0x1)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 2
+// CHECK: Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x8
+// CHECK: Size: 0x0
+// CHECK: Offset: 416
+// CHECK: Alignment: 3
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: zfill (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __common (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x8
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 260
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0xA
+// CHECK: fileoff: 408
+// CHECK: filesize: 8
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 3
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 1
+// CHECK: iextdefsym: 1
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 1
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/zerofill-2.s b/test/MC/MachO/zerofill-2.s
index 16577e41d03a..e2e2dfac6fe6 100644
--- a/test/MC/MachO/zerofill-2.s
+++ b/test/MC/MachO/zerofill-2.s
@@ -1,103 +1,110 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t --macho-segment --macho-dysymtab --macho-indirect-symbols | FileCheck %s
.byte 0
// This file has size 2, the tail padding doesn't count.
.zerofill __DATA, __bss, sym_a, 1
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 192)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 2)
-// CHECK: ('file_offset', 324)
-// CHECK: ('file_size', 1)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 1)
-// CHECK: ('offset', 324)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 1
-// CHECK: (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 1)
-// CHECK: ('size', 1)
-// CHECK: ('offset', 0)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x1)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 328)
-// CHECK: ('nsyms', 1)
-// CHECK: ('stroff', 340)
-// CHECK: ('strsize', 8)
-// CHECK: ('_string_data', '\x00sym_a\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 1)
-// CHECK: ('_string', 'sym_a')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 1)
-// CHECK: ('iextdefsym', 1)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 1)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 312
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x1
+// CHECK: Offset: 340
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 00 |.|
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __bss (5F 5F 62 73 73 00 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x1
+// CHECK: Size: 0x1
+// CHECK: Offset: 0
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: LocReloc (0x1)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: sym_a (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x1
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 192
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x2
+// CHECK: fileoff: 340
+// CHECK: filesize: 1
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 1
+// CHECK: iextdefsym: 1
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 1
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/zerofill-3.s b/test/MC/MachO/zerofill-3.s
index a4cd31ec0a40..2e4ff2781329 100644
--- a/test/MC/MachO/zerofill-3.s
+++ b/test/MC/MachO/zerofill-3.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t --macho-segment --macho-dysymtab --macho-indirect-symbols | FileCheck %s
// FIXME: We don't get the order right currently, the assembler first
// orders the symbols, then assigns addresses. :(
@@ -19,123 +19,134 @@
.lcomm sym_lcomm_B, 4
.endif
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 1)
-// CHECK: ('size', 192)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 16)
-// CHECK: ('file_offset', 324)
-// CHECK: ('file_size', 0)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 0)
-// CHECK: ('offset', 324)
-// CHECK: ('alignment', 0)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: # Section 1
-// CHECK: (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 16)
-// CHECK: ('offset', 0)
-// CHECK: ('alignment', 4)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x1)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 324)
-// CHECK: ('nsyms', 4)
-// CHECK: ('stroff', 372)
-// CHECK: ('strsize', 52)
-// CHECK: ('_string_data', '\x00sym_lcomm_D\x00sym_lcomm_C\x00sym_lcomm_B\x00sym_lcomm_A\x00\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 37)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 8)
-// CHECK: ('_string', 'sym_lcomm_A')
-// CHECK: ),
-// CHECK: # Symbol 1
-// CHECK: (('n_strx', 25)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 12)
-// CHECK: ('_string', 'sym_lcomm_B')
-// CHECK: ),
-// CHECK: # Symbol 2
-// CHECK: (('n_strx', 13)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', 'sym_lcomm_C')
-// CHECK: ),
-// CHECK: # Symbol 3
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xf)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 4)
-// CHECK: ('_string', 'sym_lcomm_D')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 2)
-// CHECK: ('iextdefsym', 2)
-// CHECK: ('nextdefsym', 2)
-// CHECK: ('iundefsym', 4)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic (0xFEEDFACE)
+// CHECK: CpuType: X86 (0x7)
+// CHECK: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 312
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x0
+// CHECK: Offset: 340
+// CHECK: Alignment: 0
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: SectionData (
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __bss (5F 5F 62 73 73 00 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x10
+// CHECK: Offset: 0
+// CHECK: Alignment: 4
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: LocReloc (0x1)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: sym_lcomm_A (37)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x8
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_lcomm_B (25)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0xC
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_lcomm_C (13)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: sym_lcomm_D (1)
+// CHECK: Extern
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x4
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT
+// CHECK: Name:
+// CHECK: Size: 192
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x10
+// CHECK: fileoff: 340
+// CHECK: filesize: 0
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 2
+// CHECK: iextdefsym: 2
+// CHECK: nextdefsym: 2
+// CHECK: iundefsym: 4
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/zerofill-4.s b/test/MC/MachO/zerofill-4.s
index d9c987c9b65a..b99e6289691e 100644
--- a/test/MC/MachO/zerofill-4.s
+++ b/test/MC/MachO/zerofill-4.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -t | FileCheck %s
.zerofill __DATA,__bss,_fill0,1,0
.zerofill __DATA,__bss,_a,4,2
@@ -9,27 +9,81 @@
.zerofill __DATA,__bss,_fill3,1,0
.zerofill __DATA,__bss,_d,4,5
-// CHECK: # Symbol 0
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_fill0')
-// CHECK: # Symbol 1
-// CHECK: ('n_value', 4)
-// CHECK: ('_string', '_a')
-// CHECK: # Symbol 2
-// CHECK: ('n_value', 8)
-// CHECK: ('_string', '_fill1')
-// CHECK: # Symbol 3
-// CHECK: ('n_value', 16)
-// CHECK: ('_string', '_b')
-// CHECK: # Symbol 4
-// CHECK: ('n_value', 20)
-// CHECK: ('_string', '_fill2')
-// CHECK: # Symbol 5
-// CHECK: ('n_value', 32)
-// CHECK: ('_string', '_c')
-// CHECK: # Symbol 6
-// CHECK: ('n_value', 36)
-// CHECK: ('_string', '_fill3')
-// CHECK: # Symbol 7
-// CHECK: ('n_value', 64)
-// CHECK: ('_string', '_d')
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _fill0 (34)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x0
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _a (10)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x4
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _fill1 (27)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x8
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _b (7)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x10
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _fill2 (20)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x14
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _c (4)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x20
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _fill3 (13)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x24
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _d (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x40
+// CHECK: }
+// CHECK: ]
diff --git a/test/MC/MachO/zerofill-5.s b/test/MC/MachO/zerofill-5.s
index 91f251b9983c..b688e6b0692e 100644
--- a/test/MC/MachO/zerofill-5.s
+++ b/test/MC/MachO/zerofill-5.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t --macho-segment --macho-dysymtab --macho-indirect-symbols | FileCheck %s
.text
.align 3
@@ -6,104 +6,109 @@
.zerofill __DATA,__bss,_g0,8,3
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 336)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK: # Load Command 0
-// CHECK: (('command', 25)
-// CHECK: ('size', 232)
-// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('vm_addr', 0)
-// CHECK: ('vm_size', 16)
-// CHECK: ('file_offset', 368)
-// CHECK: ('file_size', 4)
-// CHECK: ('maxprot', 7)
-// CHECK: ('initprot', 7)
-// CHECK: ('num_sections', 2)
-// CHECK: ('flags', 0)
-// CHECK: ('sections', [
-// CHECK: # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 0)
-// CHECK: ('size', 4)
-// CHECK: ('offset', 368)
-// CHECK: ('alignment', 3)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x80000000)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', '02000000')
-// CHECK: # Section 1
-// CHECK: (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK: ('address', 8)
-// CHECK: ('size', 8)
-// CHECK: ('offset', 0)
-// CHECK: ('alignment', 3)
-// CHECK: ('reloc_offset', 0)
-// CHECK: ('num_reloc', 0)
-// CHECK: ('flags', 0x1)
-// CHECK: ('reserved1', 0)
-// CHECK: ('reserved2', 0)
-// CHECK: ('reserved3', 0)
-// CHECK: ),
-// CHECK: ('_relocations', [
-// CHECK: ])
-// CHECK: ('_section_data', 'cffaedfe 07000001')
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 1
-// CHECK: (('command', 2)
-// CHECK: ('size', 24)
-// CHECK: ('symoff', 372)
-// CHECK: ('nsyms', 1)
-// CHECK: ('stroff', 388)
-// CHECK: ('strsize', 8)
-// CHECK: ('_string_data', '\x00_g0\x00\x00\x00\x00')
-// CHECK: ('_symbols', [
-// CHECK: # Symbol 0
-// CHECK: (('n_strx', 1)
-// CHECK: ('n_type', 0xe)
-// CHECK: ('n_sect', 2)
-// CHECK: ('n_desc', 0)
-// CHECK: ('n_value', 8)
-// CHECK: ('_string', '_g0')
-// CHECK: ),
-// CHECK: ])
-// CHECK: ),
-// CHECK: # Load Command 2
-// CHECK: (('command', 11)
-// CHECK: ('size', 80)
-// CHECK: ('ilocalsym', 0)
-// CHECK: ('nlocalsym', 1)
-// CHECK: ('iextdefsym', 1)
-// CHECK: ('nextdefsym', 0)
-// CHECK: ('iundefsym', 1)
-// CHECK: ('nundefsym', 0)
-// CHECK: ('tocoff', 0)
-// CHECK: ('ntoc', 0)
-// CHECK: ('modtaboff', 0)
-// CHECK: ('nmodtab', 0)
-// CHECK: ('extrefsymoff', 0)
-// CHECK: ('nextrefsyms', 0)
-// CHECK: ('indirectsymoff', 0)
-// CHECK: ('nindirectsyms', 0)
-// CHECK: ('extreloff', 0)
-// CHECK: ('nextrel', 0)
-// CHECK: ('locreloff', 0)
-// CHECK: ('nlocrel', 0)
-// CHECK: ('_indirect_symbols', [
-// CHECK: ])
-// CHECK: ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK: Magic: Magic64 (0xFEEDFACF)
+// CHECK: CpuType: X86-64 (0x1000007)
+// CHECK: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK: FileType: Relocatable (0x1)
+// CHECK: NumOfLoadCommands: 4
+// CHECK: SizeOfLoadCommands: 352
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK: Section {
+// CHECK: Index: 0
+// CHECK: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x0
+// CHECK: Size: 0x4
+// CHECK: Offset: 384
+// CHECK: Alignment: 3
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0x0
+// CHECK: Attributes [ (0x800000)
+// CHECK: PureInstructions (0x800000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: SectionData (
+// CHECK: 0000: 02000000 |....|
+// CHECK: )
+// CHECK: }
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __bss (5F 5F 62 73 73 00 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x8
+// CHECK: Size: 0x8
+// CHECK: Offset: 0
+// CHECK: Alignment: 3
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: LocReloc (0x1)
+// CHECK: Attributes [ (0x0)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: Reserved3: 0x0
+// CHECK: }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _g0 (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x8
+// CHECK: }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK: Number: 0
+// CHECK: Symbols [
+// CHECK: ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK: Cmd: LC_SEGMENT_64
+// CHECK: Name:
+// CHECK: Size: 232
+// CHECK: vmaddr: 0x0
+// CHECK: vmsize: 0x10
+// CHECK: fileoff: 384
+// CHECK: filesize: 4
+// CHECK: maxprot: rwx
+// CHECK: initprot: rwx
+// CHECK: nsects: 2
+// CHECK: flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK: ilocalsym: 0
+// CHECK: nlocalsym: 1
+// CHECK: iextdefsym: 1
+// CHECK: nextdefsym: 0
+// CHECK: iundefsym: 1
+// CHECK: nundefsym: 0
+// CHECK: tocoff: 0
+// CHECK: ntoc: 0
+// CHECK: modtaboff: 0
+// CHECK: nmodtab: 0
+// CHECK: extrefsymoff: 0
+// CHECK: nextrefsyms: 0
+// CHECK: indirectsymoff: 0
+// CHECK: nindirectsyms: 0
+// CHECK: extreloff: 0
+// CHECK: nextrel: 0
+// CHECK: locreloff: 0
+// CHECK: nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/zerofill-sect-align.s b/test/MC/MachO/zerofill-sect-align.s
index 5d7730f439e9..d950b7f103c2 100644
--- a/test/MC/MachO/zerofill-sect-align.s
+++ b/test/MC/MachO/zerofill-sect-align.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -t | FileCheck %s
//
// Check that the section itself is aligned.
@@ -7,9 +7,27 @@
.zerofill __DATA,__bss,_a,1,0
.zerofill __DATA,__bss,_b,4,4
-// CHECK: # Symbol 0
-// CHECK: ('n_value', 16)
-// CHECK: ('_string', '_a')
-// CHECK: # Symbol 1
-// CHECK: ('n_value', 32)
-// CHECK: ('_string', '_b')
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: Symbols [
+// CHECK: Symbol {
+// CHECK: Name: _a (4)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x10
+// CHECK: }
+// CHECK: Symbol {
+// CHECK: Name: _b (1)
+// CHECK: Type: Section (0xE)
+// CHECK: Section: __bss (0x2)
+// CHECK: RefType: UndefinedNonLazy (0x0)
+// CHECK: Flags [ (0x0)
+// CHECK: ]
+// CHECK: Value: 0x20
+// CHECK: }
+// CHECK: ]
diff --git a/test/MC/Mips/branch-pseudos-bad.s b/test/MC/Mips/branch-pseudos-bad.s
index fcbf84af84d0..3a0193b2e94b 100644
--- a/test/MC/Mips/branch-pseudos-bad.s
+++ b/test/MC/Mips/branch-pseudos-bad.s
@@ -19,3 +19,20 @@ local_label:
# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
bgtu $7, $8, local_label
# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+
+ bltl $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+ bltul $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+ blel $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+ bleul $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+ bgel $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+ bgeul $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+ bgtl $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+ bgtul $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
diff --git a/test/MC/Mips/branch-pseudos.s b/test/MC/Mips/branch-pseudos.s
index d5b06f78d800..56841e29f425 100644
--- a/test/MC/Mips/branch-pseudos.s
+++ b/test/MC/Mips/branch-pseudos.s
@@ -187,3 +187,183 @@ local_label:
# CHECK: bnez $zero, local_label # encoding: [0x14,0x00,A,A]
# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
# CHECK: nop
+
+ bltl $7,$8,local_label
+# CHECK: slt $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2a]
+# CHECK: bnel $1, $zero, local_label # encoding: [0x54,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bltl $7,$8,global_label
+# CHECK: slt $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2a]
+# CHECK: bnel $1, $zero, global_label # encoding: [0x54,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bltl $7,$0,local_label
+# CHECK: bltz $7, local_label # encoding: [0x04,0xe0,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bltl $0,$8,local_label
+# CHECK: bgtz $8, local_label # encoding: [0x1d,0x00,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bltl $0,$0,local_label
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+
+ blel $7,$8,local_label
+# CHECK: slt $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2a]
+# CHECK: beql $1, $zero, local_label # encoding: [0x50,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ blel $7,$8,global_label
+# CHECK: slt $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2a]
+# CHECK: beql $1, $zero, global_label # encoding: [0x50,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ blel $7,$0,local_label
+# CHECK: blez $7, local_label # encoding: [0x18,0xe0,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ blel $0,$8,local_label
+# CHECK: bgez $8, local_label # encoding: [0x05,0x01,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ blel $0,$0,local_label
+# WARNING: :[[@LINE-1]]:3: warning: branch is always taken
+# CHECK: b local_label # encoding: [0x10,0x00,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+
+ bgel $7,$8,local_label
+# CHECK: slt $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2a]
+# CHECK: beql $1, $zero, local_label # encoding: [0x50,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgel $7,$8,global_label
+# CHECK: slt $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2a]
+# CHECK: beql $1, $zero, global_label # encoding: [0x50,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgel $7,$0,local_label
+# CHECK: bgez $7, local_label # encoding: [0x04,0xe1,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgel $0,$8,local_label
+# CHECK: blez $8, local_label # encoding: [0x19,0x00,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgel $0,$0,local_label
+# WARNING: :[[@LINE-1]]:3: warning: branch is always taken
+# CHECK: b local_label # encoding: [0x10,0x00,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+
+ bgtl $7,$8,local_label
+# CHECK: slt $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2a]
+# CHECK: bnel $1, $zero, local_label # encoding: [0x54,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgtl $7,$8,global_label
+# CHECK: slt $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2a]
+# CHECK: bnel $1, $zero, global_label # encoding: [0x54,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgtl $7,$0,local_label
+# CHECK: bgtz $7, local_label # encoding: [0x1c,0xe0,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgtl $0,$8,local_label
+# CHECK: bltz $8, local_label # encoding: [0x05,0x00,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgtl $0,$0,local_label
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+
+ bltul $7,$8,local_label
+# CHECK: sltu $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2b]
+# CHECK: bnel $1, $zero, local_label # encoding: [0x54,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bltul $7,$8,global_label
+# CHECK: sltu $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2b]
+# CHECK: bnel $1, $zero, global_label # encoding: [0x54,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bltul $7,$0,local_label
+# CHECK: bnez $7, local_label # encoding: [0x14,0xe0,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bltul $0,$8,local_label
+# CHECK: bnez $8, local_label # encoding: [0x15,0x00,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bltul $0,$0,local_label
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+
+ bleul $7,$8,local_label
+# CHECK: sltu $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2b]
+# CHECK: beql $1, $zero, local_label # encoding: [0x50,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bleul $7,$8,global_label
+# CHECK: sltu $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2b]
+# CHECK: beql $1, $zero, global_label # encoding: [0x50,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bleul $7,$0,local_label
+# CHECK: beqz $7, local_label # encoding: [0x10,0xe0,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bleul $0,$8,local_label
+# CHECK: beqz $8, local_label # encoding: [0x11,0x00,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bleul $0,$0,local_label
+# WARNING: :[[@LINE-1]]:3: warning: branch is always taken
+# CHECK: b local_label # encoding: [0x10,0x00,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+
+ bgeul $7,$8,local_label
+# CHECK: sltu $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2b]
+# CHECK: beql $1, $zero, local_label # encoding: [0x50,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgeul $7,$8,global_label
+# CHECK: sltu $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2b]
+# CHECK: beql $1, $zero, global_label # encoding: [0x50,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgeul $7,$0,local_label
+# CHECK: beqz $7, local_label # encoding: [0x10,0xe0,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgeul $0,$8,local_label
+# CHECK: beqz $8, local_label # encoding: [0x11,0x00,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgeul $0,$0,local_label
+# WARNING: :[[@LINE-1]]:3: warning: branch is always taken
+# CHECK: b local_label # encoding: [0x10,0x00,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+
+ bgtul $7,$8,local_label
+# CHECK: sltu $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2b]
+# CHECK: bnel $1, $zero, local_label # encoding: [0x54,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgtul $7,$8,global_label
+# CHECK: sltu $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2b]
+# CHECK: bnel $1, $zero, global_label # encoding: [0x54,0x20,A,A]
+# CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgtul $7,$0,local_label
+# CHECK: bnez $7, local_label # encoding: [0x14,0xe0,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgtul $0,$8,local_label
+# CHECK: bnez $8, local_label # encoding: [0x15,0x00,A,A]
+# CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+ bgtul $0,$0,local_label
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
diff --git a/test/MC/Mips/cnmips/invalid.s b/test/MC/Mips/cnmips/invalid.s
new file mode 100644
index 000000000000..52e71102e46f
--- /dev/null
+++ b/test/MC/Mips/cnmips/invalid.s
@@ -0,0 +1,15 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=octeon 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+foo:
+ bbit0 $19, -1, foo # CHECK: :[[@LINE]]:16: error: expected 6-bit unsigned immediate
+ bbit0 $19, 64, foo # CHECK: :[[@LINE]]:16: error: expected 6-bit unsigned immediate
+ bbit032 $19, -1, foo # CHECK: :[[@LINE]]:18: error: expected 5-bit unsigned immediate
+ bbit032 $19, 32, foo # CHECK: :[[@LINE]]:18: error: expected 5-bit unsigned immediate
+ bbit1 $19, -1, foo # CHECK: :[[@LINE]]:16: error: expected 6-bit unsigned immediate
+ bbit1 $19, 64, foo # CHECK: :[[@LINE]]:16: error: expected 6-bit unsigned immediate
+ bbit132 $19, -1, foo # CHECK: :[[@LINE]]:18: error: expected 5-bit unsigned immediate
+ bbit132 $19, 32, foo # CHECK: :[[@LINE]]:18: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/cprestore-bad.s b/test/MC/Mips/cprestore-bad.s
new file mode 100644
index 000000000000..d2fb037091d1
--- /dev/null
+++ b/test/MC/Mips/cprestore-bad.s
@@ -0,0 +1,23 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=pic 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .text
+ .set noreorder
+ .cpload $25
+
+ .set mips16
+ .cprestore 8
+# CHECK: :[[@LINE-1]]:14: error: .cprestore is not supported in Mips16 mode
+ .set nomips16
+
+ .cprestore
+# CHECK: :[[@LINE-1]]:13: error: expected stack offset value
+
+ .cprestore foo
+# CHECK: :[[@LINE-1]]:17: error: stack offset is not an absolute expression
+
+ .cprestore -8
+# CHECK: :[[@LINE-1]]:3: warning: .cprestore with negative stack offset has no effect
+
+ .cprestore 8, 35, bar
+# CHECK: :[[@LINE-1]]:15: error: unexpected token, expected end of statement
diff --git a/test/MC/Mips/cprestore-noreorder.s b/test/MC/Mips/cprestore-noreorder.s
new file mode 100644
index 000000000000..750c95a63f0c
--- /dev/null
+++ b/test/MC/Mips/cprestore-noreorder.s
@@ -0,0 +1,97 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=pic -show-encoding | \
+# RUN: FileCheck %s
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=pic -filetype=obj -o -| \
+# RUN: llvm-objdump -d -r -arch=mips - | \
+# RUN: FileCheck %s -check-prefix=CHECK-FOR-STORE
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+micromips -relocation-model=pic -show-encoding | \
+# RUN: FileCheck %s -check-prefix=MICROMIPS
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=static -show-encoding | \
+# RUN: FileCheck %s -check-prefix=NO-PIC
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64 -target-abi n32 -relocation-model=pic -show-encoding | \
+# RUN: FileCheck %s -check-prefix=BAD-ABI -check-prefix=BAD-ABI-N32
+
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n64 -relocation-model=pic -show-encoding | \
+# RUN: FileCheck %s -check-prefix=BAD-ABI -check-prefix=BAD-ABI-N64
+
+ .text
+ .ent foo
+foo:
+ .frame $sp, 0, $ra
+ .set noreorder
+
+ .cpload $25
+ .cprestore 8
+
+ jal $25
+ jal $4, $25
+ jal foo
+
+ .end foo
+
+# CHECK-FOR-STORE: sw $gp, 8($sp)
+
+# CHECK: .cprestore 8
+# CHECK: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+# CHECK: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# CHECK: jalr $4, $25 # encoding: [0x03,0x20,0x20,0x09]
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+# CHECK: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# CHECK: lw $25, %got(foo)($gp) # encoding: [0x8f,0x99,A,A]
+# CHECK: # fixup A - offset: 0, value: foo@GOT, kind: fixup_Mips_GOT_Local
+# CHECK: addiu $25, $25, %lo(foo) # encoding: [0x27,0x39,A,A]
+# CHECK: # fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
+# CHECK: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+# CHECK: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+# CHECK: .end foo
+
+# MICROMIPS: .cprestore 8
+# MICROMIPS: jalrs16 $25 # encoding: [0x45,0xf9]
+# MICROMIPS: nop # encoding: [0x00,0x00,0x00,0x00]
+# MICROMIPS: lw $gp, 8($sp) # encoding: [0xff,0x9d,0x00,0x08]
+
+# MICROMIPS: jalrs $4, $25 # encoding: [0x00,0x99,0x4f,0x3c]
+# MICROMIPS: nop # encoding: [0x00,0x00,0x00,0x00]
+# MICROMIPS: lw $gp, 8($sp) # encoding: [0xff,0x9d,0x00,0x08]
+
+# MICROMIPS: lw $25, %got(foo)($gp) # encoding: [0xff,0x3c,A,A]
+# MICROMIPS: # fixup A - offset: 0, value: foo@GOT, kind: fixup_MICROMIPS_GOT16
+# MICROMIPS: addiu $25, $25, %lo(foo) # encoding: [0x33,0x39,A,A]
+# MICROMIPS: # fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_MICROMIPS_LO16
+# MICROMIPS: jalrs $ra, $25 # encoding: [0x03,0xf9,0x4f,0x3c]
+# MICROMIPS: nop # encoding: [0x0c,0x00]
+# MICROMIPS: lw $gp, 8($sp) # encoding: [0xff,0x9d,0x00,0x08]
+# MICROMIPS: .end foo
+
+# NO-PIC: .cprestore 8
+# NO-PIC: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# NO-PIC-NOT: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# NO-PIC: jalr $4, $25 # encoding: [0x03,0x20,0x20,0x09]
+# NO-PIC-NOT: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# NO-PIC: jal foo # encoding: [0b000011AA,A,A,A]
+# NO-PIC: # fixup A - offset: 0, value: foo, kind: fixup_Mips_26
+# NO-PIC-NOT: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+# NO-PIC: .end foo
+
+# BAD-ABI: .cprestore 8
+# BAD-ABI: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# BAD-ABI-NOT: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# BAD-ABI: jalr $4, $25 # encoding: [0x03,0x20,0x20,0x09]
+# BAD-ABI-NOT: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# BAD-ABI-N32: lw $25, %got_disp(foo)($gp) # encoding: [0x8f,0x99,A,A]
+# BAD-ABI-N64: ld $25, %got_disp(foo)($gp) # encoding: [0xdf,0x99,A,A]
+# BAD-ABI: # fixup A - offset: 0, value: foo@GOT_DISP, kind: fixup_Mips_GOT_DISP
+# BAD-ABI: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# BAD-ABI-NOT: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+# BAD-ABI: .end foo
diff --git a/test/MC/Mips/cprestore-reorder.s b/test/MC/Mips/cprestore-reorder.s
new file mode 100644
index 000000000000..e037701ede0a
--- /dev/null
+++ b/test/MC/Mips/cprestore-reorder.s
@@ -0,0 +1,98 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=pic -show-encoding | \
+# RUN: FileCheck %s
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=pic -filetype=obj -o -| \
+# RUN: llvm-objdump -d -r -arch=mips - | \
+# RUN: FileCheck %s -check-prefix=CHECK-FOR-STORE
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+micromips -relocation-model=pic -show-encoding | \
+# RUN: FileCheck %s -check-prefix=MICROMIPS
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=static -show-encoding | \
+# RUN: FileCheck %s -check-prefix=NO-PIC
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64 -target-abi n32 -relocation-model=pic -show-encoding | \
+# RUN: FileCheck %s -check-prefix=BAD-ABI -check-prefix=BAD-ABI-N32
+
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n64 -relocation-model=pic -show-encoding | \
+# RUN: FileCheck %s -check-prefix=BAD-ABI -check-prefix=BAD-ABI-N64
+
+ .text
+ .ent foo
+foo:
+ .frame $sp, 0, $ra
+ .set noreorder
+ .cpload $25
+ .set reorder
+
+ .cprestore 8
+
+ jal $25
+ jal $4, $25
+ jal foo
+
+ .end foo
+
+# CHECK-FOR-STORE: sw $gp, 8($sp)
+
+# CHECK: .cprestore 8
+# CHECK: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+# CHECK: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# CHECK: jalr $4, $25 # encoding: [0x03,0x20,0x20,0x09]
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+# CHECK: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# CHECK: lw $25, %got(foo)($gp) # encoding: [0x8f,0x99,A,A]
+# CHECK: # fixup A - offset: 0, value: foo@GOT, kind: fixup_Mips_GOT_Local
+# CHECK: addiu $25, $25, %lo(foo) # encoding: [0x27,0x39,A,A]
+# CHECK: # fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
+# CHECK: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
+# CHECK: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+# CHECK: .end foo
+
+# MICROMIPS: .cprestore 8
+# MICROMIPS: jalrs16 $25 # encoding: [0x45,0xf9]
+# MICROMIPS: nop # encoding: [0x0c,0x00]
+# MICROMIPS: lw $gp, 8($sp) # encoding: [0xff,0x9d,0x00,0x08]
+
+# MICROMIPS: jalrs $4, $25 # encoding: [0x00,0x99,0x4f,0x3c]
+# MICROMIPS: nop # encoding: [0x0c,0x00]
+# MICROMIPS: lw $gp, 8($sp) # encoding: [0xff,0x9d,0x00,0x08]
+
+# MICROMIPS: lw $25, %got(foo)($gp) # encoding: [0xff,0x3c,A,A]
+# MICROMIPS: # fixup A - offset: 0, value: foo@GOT, kind: fixup_MICROMIPS_GOT16
+# MICROMIPS: addiu $25, $25, %lo(foo) # encoding: [0x33,0x39,A,A]
+# MICROMIPS: # fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_MICROMIPS_LO16
+# MICROMIPS: jalrs $ra, $25 # encoding: [0x03,0xf9,0x4f,0x3c]
+# MICROMIPS: nop # encoding: [0x0c,0x00]
+# MICROMIPS: lw $gp, 8($sp) # encoding: [0xff,0x9d,0x00,0x08]
+# MICROMIPS: .end foo
+
+# NO-PIC: .cprestore 8
+# NO-PIC: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# NO-PIC-NOT: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# NO-PIC: jalr $4, $25 # encoding: [0x03,0x20,0x20,0x09]
+# NO-PIC-NOT: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# NO-PIC: jal foo # encoding: [0b000011AA,A,A,A]
+# NO-PIC: # fixup A - offset: 0, value: foo, kind: fixup_Mips_26
+# NO-PIC-NOT: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+# NO-PIC: .end foo
+
+# BAD-ABI: .cprestore 8
+# BAD-ABI: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# BAD-ABI-NOT: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# BAD-ABI: jalr $4, $25 # encoding: [0x03,0x20,0x20,0x09]
+# BAD-ABI-NOT: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# BAD-ABI-N32: lw $25, %got_disp(foo)($gp) # encoding: [0x8f,0x99,A,A]
+# BAD-ABI-N64: ld $25, %got_disp(foo)($gp) # encoding: [0xdf,0x99,A,A]
+# BAD-ABI: # fixup A - offset: 0, value: foo@GOT_DISP, kind: fixup_Mips_GOT_DISP
+# BAD-ABI: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# BAD-ABI-NOT: lw $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+# BAD-ABI: .end foo
diff --git a/test/MC/Mips/cprestore-warning-unused.s b/test/MC/Mips/cprestore-warning-unused.s
new file mode 100644
index 000000000000..41a5df715977
--- /dev/null
+++ b/test/MC/Mips/cprestore-warning-unused.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=pic 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .text
+ .set noreorder
+ .cpload $25
+ .set reorder
+
+ jal $25
+# CHECK: :[[@LINE-1]]:3: warning: no .cprestore used in PIC mode
diff --git a/test/MC/Mips/cpsetup.s b/test/MC/Mips/cpsetup.s
index a3ffae67aaec..95d84d95d851 100644
--- a/test/MC/Mips/cpsetup.s
+++ b/test/MC/Mips/cpsetup.s
@@ -1,29 +1,33 @@
# RUN: llvm-mc -triple mips64-unknown-unknown -target-abi o32 -filetype=obj -o - %s | \
# RUN: llvm-objdump -d -r -arch=mips64 - | \
-# RUN: FileCheck -check-prefix=O32 %s
+# RUN: FileCheck -check-prefix=ALL -check-prefix=O32 %s
# RUN: llvm-mc -triple mips64-unknown-unknown -target-abi o32 %s | \
-# RUN: FileCheck -check-prefix=ASM %s
+# RUN: FileCheck -check-prefix=ALL -check-prefix=ASM %s
# RUN: llvm-mc -triple mips64-unknown-unknown -target-abi n32 -filetype=obj -o - %s | \
# RUN: llvm-objdump -d -r -t -arch=mips64 - | \
-# RUN: FileCheck -check-prefix=NXX -check-prefix=N32 %s
+# RUN: FileCheck -check-prefix=ALL -check-prefix=NXX -check-prefix=N32 %s
# RUN: llvm-mc -triple mips64-unknown-unknown -target-abi n32 %s | \
-# RUN: FileCheck -check-prefix=ASM %s
+# RUN: FileCheck -check-prefix=ALL -check-prefix=ASM %s
# RUN: llvm-mc -triple mips64-unknown-unknown %s -filetype=obj -o - | \
# RUN: llvm-objdump -d -r -t -arch=mips64 - | \
-# RUN: FileCheck -check-prefix=NXX -check-prefix=N64 %s
+# RUN: FileCheck -check-prefix=ALL -check-prefix=NXX -check-prefix=N64 %s
# RUN: llvm-mc -triple mips64-unknown-unknown %s | \
-# RUN: FileCheck -check-prefix=ASM %s
+# RUN: FileCheck -check-prefix=ALL -check-prefix=ASM %s
.text
.option pic2
t1:
.cpsetup $25, 8, __cerror
+ nop
+ .cpreturn
+ nop
+# ALL-LABEL: t1:
# O32-NOT: __cerror
@@ -31,19 +35,30 @@ t1:
# N32 doesn't allow 3 operations to be specified in the same relocation
# record like N64 does.
-# NXX: sd $gp, 8($sp)
-# NXX: lui $gp, 0
-# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 __cerror
-# NXX: addiu $gp, $gp, 0
-# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 __cerror
-# N32: addu $gp, $gp, $25
-# N64: daddu $gp, $gp, $25
+# NXX-NEXT: sd $gp, 8($sp)
+# NXX-NEXT: lui $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 __cerror
+# NXX-NEXT: addiu $gp, $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 __cerror
+# N32-NEXT: addu $gp, $gp, $25
+# N64-NEXT: daddu $gp, $gp, $25
-# ASM: .cpsetup $25, 8, __cerror
+# ASM-NEXT: .cpsetup $25, 8, __cerror
-t2:
+# ALL-NEXT: nop
+# ASM-NEXT: .cpreturn
+# NXX-NEXT: ld $gp, 8($sp)
+
+# ALL-NEXT: nop
+
+t2:
.cpsetup $25, $2, __cerror
+ nop
+ .cpreturn
+ nop
+
+# ALL-LABEL: t2:
# O32-NOT: __cerror
@@ -51,60 +66,115 @@ t2:
# N32 doesn't allow 3 operations to be specified in the same relocation
# record like N64 does.
-# NXX: move $2, $gp
-# NXX: lui $gp, 0
-# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 __cerror
-# NXX: addiu $gp, $gp, 0
-# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 __cerror
-# N32: addu $gp, $gp, $25
-# N64: daddu $gp, $gp, $25
+# NXX-NEXT: move $2, $gp
+# NXX-NEXT: lui $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 __cerror
+# NXX-NEXT: addiu $gp, $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 __cerror
+# N32-NEXT: addu $gp, $gp, $25
+# N64-NEXT: daddu $gp, $gp, $25
+
+# ASM-NEXT: .cpsetup $25, $2, __cerror
+
+# ALL-NEXT: nop
-# ASM: .cpsetup $25, $2, __cerror
+# ASM-NEXT: .cpreturn
+# NXX-NEXT: move $gp, $2
+
+# ALL-NEXT: nop
# .cpsetup with local labels (PR22518):
+
+# The '1:' label isn't emitted in all cases but we still want a label to match
+# so we force one here.
+
+t3:
+ nop
1:
.cpsetup $25, $2, 1b
nop
sub $3, $3, $2
- nop
-# O32: t2:
-# O32: nop
-# O32: sub $3, $3, $2
-# O32: nop
+# ALL-LABEL: t3:
+# ALL-NEXT: nop
+
+# O32-NEXT: nop
+# O32-NEXT: sub $3, $3, $2
# FIXME: Direct object emission for N32 is still under development.
# N32 doesn't allow 3 operations to be specified in the same relocation
# record like N64 does.
-# NXX: move $2, $gp
-# NXX: lui $gp, 0
-# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 $tmp0
-# NXX: addiu $gp, $gp, 0
-# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 $tmp0
-# N32: addu $gp, $gp, $25
-# N64: daddu $gp, $gp, $25
-# NXX: nop
-# NXX: sub $3, $3, $2
-# NXX: nop
+# NXX: $tmp0:
+# NXX-NEXT: move $2, $gp
+# NXX-NEXT: lui $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 $tmp0
+# NXX-NEXT: addiu $gp, $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 $tmp0
+# N32-NEXT: addu $gp, $gp, $25
+# N64-NEXT: daddu $gp, $gp, $25
+# NXX-NEXT: nop
+# NXX-NEXT: sub $3, $3, $2
-# ASM: .cpsetup $25, $2, $tmp0
+# ASM: $tmp0:
+# ASM-NEXT: .cpsetup $25, $2, $tmp0
+
+# Ensure we have at least one instruction between labels so that the labels
+# we're matching aren't removed.
+ nop
+# ALL-NEXT: nop
-t3:
.option pic0
+t4:
nop
.cpsetup $25, 8, __cerror
nop
+ .cpreturn
+ nop
# Testing that .cpsetup expands to nothing in this case
# by checking that the next instruction after the first
# nop is also a 'nop'.
-# NXX: nop
+
+# ALL-LABEL: t4:
+
+# NXX-NEXT: nop
# NXX-NEXT: nop
+# NXX-NEXT: nop
+
+# ASM-NEXT: nop
+# ASM-NEXT: .cpsetup $25, 8, __cerror
+# ASM-NEXT: nop
+# ASM-NEXT: .cpreturn
+# ASM-NEXT: nop
+
+# Test that we accept constant expressions.
+ .option pic2
+t5:
+ .cpsetup $25, ((8*4) - (3*8)), __cerror
+ nop
+
+# ALL-LABEL: t5:
+
+# O32-NOT: __cerror
+
+# FIXME: Direct object emission for N32 is still under development.
+# N32 doesn't allow 3 operations to be specified in the same relocation
+# record like N64 does.
+
+# NXX-NEXT: sd $gp, 8($sp)
+# NXX-NEXT: lui $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16 __cerror
+# NXX-NEXT: addiu $gp, $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16 __cerror
+# N32-NEXT: addu $gp, $gp, $25
+# N64-NEXT: daddu $gp, $gp, $25
+
+# ASM-NEXT: .cpsetup $25, 8, __cerror
+
+# ALL-NEXT: nop
-# ASM: nop
-# ASM: .cpsetup $25, 8, __cerror
-# ASM: nop
+# NXX-LABEL: SYMBOL TABLE:
# For .cpsetup with local labels, we need to check if $tmp0 is in the symbol
# table:
diff --git a/test/MC/Mips/directive-ent.s b/test/MC/Mips/directive-ent.s
new file mode 100644
index 000000000000..b9b8bf902f6d
--- /dev/null
+++ b/test/MC/Mips/directive-ent.s
@@ -0,0 +1,50 @@
+# The effects of .ent on the .pdr section are tested in mips-pdr*.s. Test
+# everything else here.
+#
+# RUN: llvm-mc -mcpu=mips32 -triple mips-unknown-unknown %s | \
+# RUN: FileCheck -check-prefix=ASM %s
+# RUN: llvm-mc -filetype=obj -mcpu=mips32 -triple mips-unknown-unknown %s | \
+# RUN: llvm-readobj -symbols | \
+# RUN: FileCheck -check-prefix=OBJ -check-prefix=OBJ-32 %s
+#
+# RUN: llvm-mc -mcpu=mips32 -mattr=micromips -triple mips-unknown-unknown %s | \
+# RUN: FileCheck -check-prefix=ASM %s
+# RUN: llvm-mc -filetype=obj -mcpu=mips32 -mattr=micromips \
+# RUN: -triple mips-unknown-unknown %s | \
+# RUN: llvm-readobj -symbols | \
+# RUN: FileCheck -check-prefix=OBJ -check-prefix=OBJ-MM %s
+#
+ .ent a
+a:
+
+# ASM: .ent a
+# ASM: a:
+
+# OBJ: Name: a
+# OBJ: Value: 0x0
+# OBJ: Size: 0
+# OBJ: Binding: Local
+# OBJ: Type: Function
+# OBJ: Other: 0
+# OBJ: Section: .text
+# OBJ: }
+
+ .ent b
+b:
+ nop
+ nop
+ .end b
+
+# ASM: .ent b
+# ASM: b:
+
+# OBJ: Name: b
+# OBJ: Value: 0x0
+# OBJ-32: Size: 8
+# FIXME: microMIPS uses the 4-byte nop instead of the 2-byte nop.
+# OBJ-MM: Size: 8
+# OBJ: Binding: Local
+# OBJ: Type: Function
+# OBJ: Other: 0
+# OBJ: Section: .text
+# OBJ: }
diff --git a/test/MC/Mips/dsp/invalid.s b/test/MC/Mips/dsp/invalid.s
new file mode 100644
index 000000000000..8bd0906e67f8
--- /dev/null
+++ b/test/MC/Mips/dsp/invalid.s
@@ -0,0 +1,25 @@
+# RUN: not llvm-mc %s -triple=mips-unknown-unknown -show-encoding -mattr=dsp 2>%t1
+# RUN: FileCheck %s < %t1
+
+ shll.ph $3, $4, 16 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+ shll.ph $3, $4, -1 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+ shll_s.ph $3, $4, 16 # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+ shll_s.ph $3, $4, -1 # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+ shll.qb $3, $4, 8 # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+ shll.qb $3, $4, -1 # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+ // FIXME: Following invalid tests are temporarely disabled, until operand check for uimm5 is added
+ shll_s.w $3, $4, 32 # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+ shll_s.w $3, $4, -1 # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+ shra.ph $3, $4, 16 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+ shra.ph $3, $4, -1 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+ shra_r.ph $3, $4, 16 # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+ shra_r.ph $3, $4, -1 # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+ // FIXME: Following invalid tests are temporarely disabled, until operand check for uimm5 is added
+ shra_r.w $3, $4, 32 # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+ shra_r.w $3, $4, -1 # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+ shrl.qb $3, $4, 8 # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+ shrl.qb $3, $4, -1 # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+ shilo $ac1, 64 # CHECK: :[[@LINE]]:15: error: expected 6-bit signed immediate
+ shilo $ac1, -64 # CHECK: :[[@LINE]]:15: error: expected 6-bit signed immediate
+ wrdsp $5, 1024 # CHECK: :[[@LINE]]:13: error: expected 10-bit unsigned immediate
+ wrdsp $5, -1 # CHECK: :[[@LINE]]:13: error: expected 10-bit unsigned immediate
diff --git a/test/MC/Mips/dsp/valid.s b/test/MC/Mips/dsp/valid.s
new file mode 100644
index 000000000000..f5926f3e2593
--- /dev/null
+++ b/test/MC/Mips/dsp/valid.s
@@ -0,0 +1,131 @@
+# RUN: llvm-mc -show-encoding -triple=mips-unknown-unknown -mattr=dsp %s | FileCheck %s
+#
+# CHECK: .text
+ .set noat
+ absq_s.ph $1, $2 # CHECK: absq_s.ph $1, $2 # encoding: [0x7c,0x02,0x0a,0x52]
+ absq_s.w $5, $6 # CHECK: absq_s.w $5, $6 # encoding: [0x7c,0x06,0x2c,0x52]
+ addq.ph $7, $8, $9 # CHECK: addq.ph $7, $8, $9 # encoding: [0x7d,0x09,0x3a,0x90]
+ addq_s.ph $10, $11, $12 # CHECK: addq_s.ph $10, $11, $12 # encoding: [0x7d,0x6c,0x53,0x90]
+ addq_s.w $13, $14, $15 # CHECK: addq_s.w $13, $14, $15 # encoding: [0x7d,0xcf,0x6d,0x90]
+ addsc $gp, $sp, $fp # CHECK: addsc $gp, $sp, $fp # encoding: [0x7f,0xbe,0xe4,0x10]
+ addu.qb $6, $7, $8 # CHECK: addu.qb $6, $7, $8 # encoding: [0x7c,0xe8,0x30,0x10]
+ addu_s.qb $9, $10, $11 # CHECK: addu_s.qb $9, $10, $11 # encoding: [0x7d,0x4b,0x49,0x10]
+ addwc $12, $13, $14 # CHECK: addwc $12, $13, $14 # encoding: [0x7d,0xae,0x64,0x50]
+ bitrev $25, $26 # CHECK: bitrev $25, $26 # encoding: [0x7c,0x1a,0xce,0xd2]
+ bposge32 21100 # CHECK: bposge32 21100 # encoding: [0x04,0x1c,0x14,0x9b]
+ cmp.eq.ph $27, $gp # CHECK: cmp.eq.ph $27, $gp # encoding: [0x7f,0x7c,0x02,0x11]
+ cmp.lt.ph $sp, $fp # CHECK: cmp.lt.ph $sp, $fp # encoding: [0x7f,0xbe,0x02,0x51]
+ cmp.le.ph $ra, $1 # CHECK: cmp.le.ph $ra, $1 # encoding: [0x7f,0xe1,0x02,0x91]
+ cmpgu.eq.qb $11, $12, $13 # CHECK: cmpgu.eq.qb $11, $12, $13 # encoding: [0x7d,0x8d,0x59,0x11]
+ cmpgu.lt.qb $14, $15, $16 # CHECK: cmpgu.lt.qb $14, $15, $16 # encoding: [0x7d,0xf0,0x71,0x51]
+ cmpgu.le.qb $17, $18, $19 # CHECK: cmpgu.le.qb $17, $18, $19 # encoding: [0x7e,0x53,0x89,0x91]
+ cmpu.eq.qb $20, $21 # CHECK: cmpu.eq.qb $20, $21 # encoding: [0x7e,0x95,0x00,0x11]
+ cmpu.lt.qb $22, $23 # CHECK: cmpu.lt.qb $22, $23 # encoding: [0x7e,0xd7,0x00,0x51]
+ cmpu.le.qb $24, $25 # CHECK: cmpu.le.qb $24, $25 # encoding: [0x7f,0x19,0x00,0x91]
+ dpaq_s.w.ph $ac1, $1, $2 # CHECK: dpaq_s.w.ph $ac1, $1, $2 # encoding: [0x7c,0x22,0x09,0x30]
+ dpaq_sa.l.w $ac2, $3, $4 # CHECK: dpaq_sa.l.w $ac2, $3, $4 # encoding: [0x7c,0x64,0x13,0x30]
+ dpau.h.qbl $ac1, $9, $10 # CHECK: dpau.h.qbl $ac1, $9, $10 # encoding: [0x7d,0x2a,0x08,0xf0]
+ dpau.h.qbr $ac1, $11, $12 # CHECK: dpau.h.qbr $ac1, $11, $12 # encoding: [0x7d,0x6c,0x09,0xf0]
+ dpsq_s.w.ph $ac0, $17, $18 # CHECK: dpsq_s.w.ph $ac0, $17, $18 # encoding: [0x7e,0x32,0x01,0x70]
+ dpsq_sa.l.w $ac1, $19, $20 # CHECK: dpsq_sa.l.w $ac1, $19, $20 # encoding: [0x7e,0x74,0x0b,0x70]
+ dpsu.h.qbl $ac0, $5, $6 # CHECK: dpsu.h.qbl $ac0, $5, $6 # encoding: [0x7c,0xa6,0x02,0xf0]
+ dpsu.h.qbr $ac1, $7, $8 # CHECK: dpsu.h.qbr $ac1, $7, $8 # encoding: [0x7c,0xe8,0x0b,0xf0]
+ extp $1, $ac0, 31 # CHECK: extp $1, $ac0, 31 # encoding: [0x7f,0xe1,0x00,0xb8]
+ extpdp $2, $ac1, 0 # CHECK: extpdp $2, $ac1, 0 # encoding: [0x7c,0x02,0x0a,0xb8]
+ extpdpv $3, $ac2, $4 # CHECK: extpdpv $3, $ac2, $4 # encoding: [0x7c,0x83,0x12,0xf8]
+ extpv $5, $ac3, $6 # CHECK: extpv $5, $ac3, $6 # encoding: [0x7c,0xc5,0x18,0xf8]
+ extr.w $7, $ac0, 31 # CHECK: extr.w $7, $ac0, 31 # encoding: [0x7f,0xe7,0x00,0x38]
+ extr_r.w $8, $ac1, 15 # CHECK: extr_r.w $8, $ac1, 15 # encoding: [0x7d,0xe8,0x09,0x38]
+ extr_rs.w $9, $ac2, 7 # CHECK: extr_rs.w $9, $ac2, 7 # encoding: [0x7c,0xe9,0x11,0xb8]
+ extr_s.h $10, $ac3, 3 # CHECK: extr_s.h $10, $ac3, 3 # encoding: [0x7c,0x6a,0x1b,0xb8]
+ extrv.w $11, $ac0, $12 # CHECK: extrv.w $11, $ac0, $12 # encoding: [0x7d,0x8b,0x00,0x78]
+ extrv_r.w $13, $ac1, $14 # CHECK: extrv_r.w $13, $ac1, $14 # encoding: [0x7d,0xcd,0x09,0x78]
+ extrv_rs.w $15, $ac2, $16 # CHECK: extrv_rs.w $15, $ac2, $16 # encoding: [0x7e,0x0f,0x11,0xf8]
+ extrv_s.h $17, $ac3, $18 # CHECK: extrv_s.h $17, $ac3, $18 # encoding: [0x7e,0x51,0x1b,0xf8]
+ insv $19, $20 # CHECK: insv $19, $20 # encoding: [0x7e,0x93,0x00,0x0c]
+ lbux $10, $20($26) # CHECK: lbux $10, $20($26) # encoding: [0x7f,0x54,0x51,0x8a]
+ lhx $11, $21($27) # CHECK: lhx $11, $21($27) # encoding: [0x7f,0x75,0x59,0x0a]
+ lwx $12, $22($gp) # CHECK: lwx $12, $22($gp) # encoding: [0x7f,0x96,0x60,0x0a]
+ madd $ac1, $6, $7 # CHECK: madd $ac1, $6, $7 # encoding: [0x70,0xc7,0x08,0x00]
+ maddu $ac0, $8, $9 # CHECK: maddu $ac0, $8, $9 # encoding: [0x71,0x09,0x00,0x01]
+ madd $6, $7 # CHECK: madd $6, $7 # encoding: [0x70,0xc7,0x00,0x00]
+ maddu $8, $9 # CHECK: maddu $8, $9 # encoding: [0x71,0x09,0x00,0x01]
+ maq_s.w.phl $ac2, $3, $4 # CHECK: maq_s.w.phl $ac2, $3, $4 # encoding: [0x7c,0x64,0x15,0x30]
+ maq_sa.w.phl $ac3, $5, $6 # CHECK: maq_sa.w.phl $ac3, $5, $6 # encoding: [0x7c,0xa6,0x1c,0x30]
+ maq_s.w.phr $ac0, $7, $8 # CHECK: maq_s.w.phr $ac0, $7, $8 # encoding: [0x7c,0xe8,0x05,0xb0]
+ maq_sa.w.phr $ac1, $9, $10 # CHECK: maq_sa.w.phr $ac1, $9, $10 # encoding: [0x7d,0x2a,0x0c,0xb0]
+ mfhi $14, $ac1 # CHECK: mfhi $14, $ac1 # encoding: [0x00,0x20,0x70,0x10]
+ mflo $15, $ac0 # CHECK: mflo $15, $ac0 # encoding: [0x00,0x00,0x78,0x12]
+ mfhi $14 # CHECK: mfhi $14 # encoding: [0x00,0x00,0x70,0x10]
+ mflo $15 # CHECK: mflo $15 # encoding: [0x00,0x00,0x78,0x12]
+ modsub $11, $12, $13 # CHECK: modsub $11, $12, $13 # encoding: [0x7d,0x8d,0x5c,0x90]
+ msub $ac3, $10, $11 # CHECK: msub $ac3, $10, $11 # encoding: [0x71,0x4b,0x18,0x04]
+ msubu $ac2, $12, $13 # CHECK: msubu $ac2, $12, $13 # encoding: [0x71,0x8d,0x10,0x05]
+ msub $10, $11 # CHECK: msub $10, $11 # encoding: [0x71,0x4b,0x00,0x04]
+ msubu $12, $13 # CHECK: msubu $12, $13 # encoding: [0x71,0x8d,0x00,0x05]
+ mthi $16, $ac3 # CHECK: mthi $16, $ac3 # encoding: [0x02,0x00,0x18,0x11]
+ mthi $16 # CHECK: mthi $16 # encoding: [0x02,0x00,0x00,0x11]
+ mthlip $14, $ac2 # CHECK: mthlip $14, $ac2 # encoding: [0x7d,0xc0,0x17,0xf8]
+ mtlo $17, $ac2 # CHECK: mtlo $17, $ac2 # encoding: [0x02,0x20,0x10,0x13]
+ mtlo $17 # CHECK: mtlo $17 # encoding: [0x02,0x20,0x00,0x13]
+ muleq_s.w.phl $21, $22, $23 # CHECK: muleq_s.w.phl $21, $22, $23 # encoding: [0x7e,0xd7,0xaf,0x10]
+ muleq_s.w.phr $24, $25, $26 # CHECK: muleq_s.w.phr $24, $25, $26 # encoding: [0x7f,0x3a,0xc7,0x50]
+ muleu_s.ph.qbl $27, $gp, $sp # CHECK: muleu_s.ph.qbl $27, $gp, $sp # encoding: [0x7f,0x9d,0xd9,0x90]
+ muleu_s.ph.qbr $fp, $ra, $1 # CHECK: muleu_s.ph.qbr $fp, $ra, $1 # encoding: [0x7f,0xe1,0xf1,0xd0]
+ mulq_rs.ph $2, $3, $4 # CHECK: mulq_rs.ph $2, $3, $4 # encoding: [0x7c,0x64,0x17,0xd0]
+ mulsaq_s.w.ph $ac0, $16, $17 # CHECK: mulsaq_s.w.ph $ac0, $16, $17 # encoding: [0x7e,0x11,0x01,0xb0]
+ mult $ac3, $2, $3 # CHECK: mult $ac3, $2, $3 # encoding: [0x00,0x43,0x18,0x18]
+ multu $ac2, $4, $5 # CHECK: multu $ac2, $4, $5 # encoding: [0x00,0x85,0x10,0x19]
+ mult $2, $3 # CHECK: mult $2, $3 # encoding: [0x00,0x43,0x00,0x18]
+ multu $4, $5 # CHECK: multu $4, $5 # encoding: [0x00,0x85,0x00,0x19]
+ packrl.ph $18, $19, $20 # CHECK: packrl.ph $18, $19, $20 # encoding: [0x7e,0x74,0x93,0x91]
+ pick.ph $7, $15, $3 # CHECK: pick.ph $7, $15, $3 # encoding: [0x7d,0xe3,0x3a,0xd1]
+ pick.qb $2, $4, $8 # CHECK: pick.qb $2, $4, $8 # encoding: [0x7c,0x88,0x10,0xd1]
+ preceq.w.phl $20, $21 # CHECK: preceq.w.phl $20, $21 # encoding: [0x7c,0x15,0xa3,0x12]
+ preceq.w.phr $21, $22 # CHECK: preceq.w.phr $21, $22 # encoding: [0x7c,0x16,0xab,0x52]
+ precequ.ph.qbl $22, $23 # CHECK: precequ.ph.qbl $22, $23 # encoding: [0x7c,0x17,0xb1,0x12]
+ precequ.ph.qbla $24, $25 # CHECK: precequ.ph.qbla $24, $25 # encoding: [0x7c,0x19,0xc1,0x92]
+ precequ.ph.qbr $23, $24 # CHECK: precequ.ph.qbr $23, $24 # encoding: [0x7c,0x18,0xb9,0x52]
+ precequ.ph.qbra $25, $26 # CHECK: precequ.ph.qbra $25, $26 # encoding: [0x7c,0x1a,0xc9,0xd2]
+ preceu.ph.qbl $26, $27 # CHECK: preceu.ph.qbl $26, $27 # encoding: [0x7c,0x1b,0xd7,0x12]
+ preceu.ph.qbla $gp, $sp # CHECK: preceu.ph.qbla $gp, $sp # encoding: [0x7c,0x1d,0xe7,0x92]
+ preceu.ph.qbr $27, $gp # CHECK: preceu.ph.qbr $27, $gp # encoding: [0x7c,0x1c,0xdf,0x52]
+ preceu.ph.qbra $sp, $fp # CHECK: preceu.ph.qbra $sp, $fp # encoding: [0x7c,0x1e,0xef,0xd2]
+ precrq.ph.w $17, $18, $19 # CHECK: precrq.ph.w $17, $18, $19 # encoding: [0x7e,0x53,0x8d,0x11]
+ precrq.qb.ph $16, $17, $18 # CHECK: precrq.qb.ph $16, $17, $18 # encoding: [0x7e,0x32,0x83,0x11]
+ precrqu_s.qb.ph $19, $20, $21 # CHECK: precrqu_s.qb.ph $19, $20, $21 # encoding: [0x7e,0x95,0x9b,0xd1]
+ precrq_rs.ph.w $18, $19, $20 # CHECK: precrq_rs.ph.w $18, $19, $20 # encoding: [0x7e,0x74,0x95,0x51]
+ raddu.w.qb $1, $2 # CHECK: raddu.w.qb $1, $2 # encoding: [0x7c,0x40,0x0d,0x10]
+ rddsp $5, 256 # CHECK: rddsp $5, 256 # encoding: [0x7d,0x00,0x2c,0xb8]
+ repl.ph $2, 12 # CHECK: repl.ph $2, 12 # encoding: [0x7c,0x0c,0x12,0x92]
+ repl.qb $1, 85 # CHECK: repl.qb $1, 85 # encoding: [0x7c,0x55,0x08,0x92]
+ replv.ph $1, $2 # CHECK: replv.ph $1, $2 # encoding: [0x7c,0x02,0x0a,0xd2]
+ replv.qb $1, $2 # CHECK: replv.qb $1, $2 # encoding: [0x7c,0x02,0x08,0xd2]
+ shilo $ac1, 3 # CHECK: shilo $ac1, 3 # encoding: [0x7c,0x30,0x0e,0xb8]
+ shilo $ac1, 16 # CHECK: shilo $ac1, 16 # encoding: [0x7d,0x00,0x0e,0xb8]
+ shilov $ac1, $2 # CHECK: shilov $ac1, $2 # encoding: [0x7c,0x40,0x0e,0xf8]
+ shll.ph $1, $2, 3 # CHECK: shll.ph $1, $2, 3 # encoding: [0x7c,0x62,0x0a,0x13]
+ shll_s.ph $1, $2, 3 # CHECK: shll_s.ph $1, $2, 3 # encoding: [0x7c,0x62,0x0b,0x13]
+ shll.qb $1, $2, 3 # CHECK: shll.qb $1, $2, 3 # encoding: [0x7c,0x62,0x08,0x13]
+ shllv.ph $1, $2, $3 # CHECK: shllv.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0a,0x93]
+ shllv_s.ph $1, $2, $3 # CHECK: shllv_s.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0b,0x93]
+ shllv.qb $1, $2, $3 # CHECK: shllv.qb $1, $2, $3 # encoding: [0x7c,0x62,0x08,0x93]
+ shllv_s.w $1, $2, $3 # CHECK: shllv_s.w $1, $2, $3 # encoding: [0x7c,0x62,0x0d,0x93]
+ shll_s.w $1, $2, 3 # CHECK: shll_s.w $1, $2, 3 # encoding: [0x7c,0x62,0x0d,0x13]
+ shra.ph $5, $2, 1 # CHECK: shra.ph $5, $2, 1 # encoding: [0x7c,0x22,0x2a,0x53]
+ shra_r.ph $5, $2, 1 # CHECK: shra_r.ph $5, $2, 1 # encoding: [0x7c,0x22,0x2b,0x53]
+ shrav.ph $1, $2, $3 # CHECK: shrav.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0a,0xd3]
+ shrav_r.ph $1, $2, $3 # CHECK: shrav_r.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0b,0xd3]
+ shrav_r.w $1, $2, $3 # CHECK: shrav_r.w $1, $2, $3 # encoding: [0x7c,0x62,0x0d,0xd3]
+ shra_r.w $1, $2, 1 # CHECK: shra_r.w $1, $2, 1 # encoding: [0x7c,0x22,0x0d,0x53]
+ shrl.qb $1, $2, 2 # CHECK: shrl.qb $1, $2, 2 # encoding: [0x7c,0x42,0x08,0x53]
+ shrlv.qb $1, $2, $3 # CHECK: shrlv.qb $1, $2, $3 # encoding: [0x7c,0x62,0x08,0xd3]
+ subq.ph $1, $2, $3 # CHECK: subq.ph $1, $2, $3 # encoding: [0x7c,0x43,0x0a,0xd0]
+ subq_s.ph $1, $2, $3 # CHECK: subq_s.ph $1, $2, $3 # encoding: [0x7c,0x43,0x0b,0xd0]
+ subq_s.w $1, $2, $3 # CHECK: subq_s.w $1, $2, $3 # encoding: [0x7c,0x43,0x0d,0xd0]
+ subu.qb $1, $2, $3 # CHECK: subu.qb $1, $2, $3 # encoding: [0x7c,0x43,0x08,0x50]
+ subu_s.qb $1, $2, $3 # CHECK: subu_s.qb $1, $2, $3 # encoding: [0x7c,0x43,0x09,0x50]
+ wrdsp $1, 0 # CHECK: wrdsp $1, 0 # encoding: [0x7c,0x20,0x04,0xf8]
+ wrdsp $5 # CHECK: wrdsp $5 # encoding: [0x7c,0xa0,0xfc,0xf8]
+ wrdsp $5, 2 # CHECK: wrdsp $5, 2 # encoding: [0x7c,0xa0,0x14,0xf8]
+ wrdsp $5, 31 # CHECK: wrdsp $5 # encoding: [0x7c,0xa0,0xfc,0xf8]
diff --git a/test/MC/Mips/dspr2/invalid.s b/test/MC/Mips/dspr2/invalid.s
new file mode 100644
index 000000000000..16eb8faa112a
--- /dev/null
+++ b/test/MC/Mips/dspr2/invalid.s
@@ -0,0 +1,20 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -mattr=+dspr2 -show-encoding 2>%t1
+# RUN: FileCheck %s < %t1
+ append $2, $3, -1 # CHECK: :[[@LINE]]:18: error: expected 5-bit unsigned immediate
+ append $2, $3, 32 # CHECK: :[[@LINE]]:18: error: expected 5-bit unsigned immediate
+ balign $2, $3, -1 # CHECK: :[[@LINE]]:18: error: expected 2-bit unsigned immediate
+ balign $2, $3, 4 # CHECK: :[[@LINE]]:18: error: expected 2-bit unsigned immediate
+ precr_sra.ph.w $24, $25, -1 # CHECK: :[[@LINE]]:28: error: expected 5-bit unsigned immediate
+ precr_sra.ph.w $24, $25, 32 # CHECK: :[[@LINE]]:28: error: expected 5-bit unsigned immediate
+ precr_sra_r.ph.w $25 ,$26, -1 # CHECK: :[[@LINE]]:30: error: expected 5-bit unsigned immediate
+ precr_sra_r.ph.w $25 ,$26, 32 # CHECK: :[[@LINE]]:30: error: expected 5-bit unsigned immediate
+ prepend $2, $3, -1 # CHECK: :[[@LINE]]:19: error: expected 5-bit unsigned immediate
+ prepend $2, $3, 32 # CHECK: :[[@LINE]]:19: error: expected 5-bit unsigned immediate
+ shra.qb $3, $4, 8 # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+ shra.qb $3, $4, -1 # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+ shra_r.qb $3, $4, 8 # CHECK: :[[@LINE]]:21: error: expected 3-bit unsigned immediate
+ shra_r.qb $3, $4, -1 # CHECK: :[[@LINE]]:21: error: expected 3-bit unsigned immediate
+ shrl.ph $3, $4, 16 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+ shrl.ph $3, $4, -1 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
diff --git a/test/MC/Mips/dspr2/valid.s b/test/MC/Mips/dspr2/valid.s
new file mode 100644
index 000000000000..c50e9d6e5ae1
--- /dev/null
+++ b/test/MC/Mips/dspr2/valid.s
@@ -0,0 +1,179 @@
+# RUN: llvm-mc -show-encoding -triple=mips-unknown-unknown -mattr=dspr2 %s | FileCheck %s
+#
+# CHECK: .text
+ .set noat
+ absq_s.ph $1, $2 # CHECK: absq_s.ph $1, $2 # encoding: [0x7c,0x02,0x0a,0x52]
+ absq_s.qb $3, $4 # CHECK: absq_s.qb $3, $4 # encoding: [0x7c,0x04,0x18,0x52]
+ absq_s.w $5, $6 # CHECK: absq_s.w $5, $6 # encoding: [0x7c,0x06,0x2c,0x52]
+ addq.ph $7, $8, $9 # CHECK: addq.ph $7, $8, $9 # encoding: [0x7d,0x09,0x3a,0x90]
+ addq_s.ph $10, $11, $12 # CHECK: addq_s.ph $10, $11, $12 # encoding: [0x7d,0x6c,0x53,0x90]
+ addq_s.w $13, $14, $15 # CHECK: addq_s.w $13, $14, $15 # encoding: [0x7d,0xcf,0x6d,0x90]
+ addqh.ph $16, $17, $18 # CHECK: addqh.ph $16, $17, $18 # encoding: [0x7e,0x32,0x82,0x18]
+ addqh_r.ph $19, $20, $21 # CHECK: addqh_r.ph $19, $20, $21 # encoding: [0x7e,0x95,0x9a,0x98]
+ addqh.w $22, $23, $24 # CHECK: addqh.w $22, $23, $24 # encoding: [0x7e,0xf8,0xb4,0x18]
+ addqh_r.w $25, $26, $27 # CHECK: addqh_r.w $25, $26, $27 # encoding: [0x7f,0x5b,0xcc,0x98]
+ addsc $gp, $sp, $fp # CHECK: addsc $gp, $sp, $fp # encoding: [0x7f,0xbe,0xe4,0x10]
+ addu.ph $ra, $1, $2 # CHECK: addu.ph $ra, $1, $2 # encoding: [0x7c,0x22,0xfa,0x10]
+ addu_s.ph $3, $4, $5 # CHECK: addu_s.ph $3, $4, $5 # encoding: [0x7c,0x85,0x1b,0x10]
+ addu.qb $6, $7, $8 # CHECK: addu.qb $6, $7, $8 # encoding: [0x7c,0xe8,0x30,0x10]
+ addu_s.qb $9, $10, $11 # CHECK: addu_s.qb $9, $10, $11 # encoding: [0x7d,0x4b,0x49,0x10]
+ addwc $12, $13, $14 # CHECK: addwc $12, $13, $14 # encoding: [0x7d,0xae,0x64,0x50]
+ adduh.qb $15, $16, $17 # CHECK: adduh.qb $15, $16, $17 # encoding: [0x7e,0x11,0x78,0x18]
+ adduh_r.qb $18, $19, $20 # CHECK: adduh_r.qb $18, $19, $20 # encoding: [0x7e,0x74,0x90,0x98]
+ append $21, $22, 0 # CHECK: append $21, $22, 0 # encoding: [0x7e,0xd5,0x00,0x31]
+ balign $23, $24, 3 # CHECK: balign $23, $24, 3 # encoding: [0x7f,0x17,0x1c,0x31]
+ bitrev $25, $26 # CHECK: bitrev $25, $26 # encoding: [0x7c,0x1a,0xce,0xd2]
+ bposge32 21100 # CHECK: bposge32 21100 # encoding: [0x04,0x1c,0x14,0x9b]
+ cmp.eq.ph $27, $gp # CHECK: cmp.eq.ph $27, $gp # encoding: [0x7f,0x7c,0x02,0x11]
+ cmp.lt.ph $sp, $fp # CHECK: cmp.lt.ph $sp, $fp # encoding: [0x7f,0xbe,0x02,0x51]
+ cmp.le.ph $ra, $1 # CHECK: cmp.le.ph $ra, $1 # encoding: [0x7f,0xe1,0x02,0x91]
+ cmpgdu.eq.qb $2, $3, $4 # CHECK: cmpgdu.eq.qb $2, $3, $4 # encoding: [0x7c,0x64,0x16,0x11]
+ cmpgdu.lt.qb $5, $6, $7 # CHECK: cmpgdu.lt.qb $5, $6, $7 # encoding: [0x7c,0xc7,0x2e,0x51]
+ cmpgdu.le.qb $8, $9, $10 # CHECK: cmpgdu.le.qb $8, $9, $10 # encoding: [0x7d,0x2a,0x46,0x91]
+ cmpgu.eq.qb $11, $12, $13 # CHECK: cmpgu.eq.qb $11, $12, $13 # encoding: [0x7d,0x8d,0x59,0x11]
+ cmpgu.lt.qb $14, $15, $16 # CHECK: cmpgu.lt.qb $14, $15, $16 # encoding: [0x7d,0xf0,0x71,0x51]
+ cmpgu.le.qb $17, $18, $19 # CHECK: cmpgu.le.qb $17, $18, $19 # encoding: [0x7e,0x53,0x89,0x91]
+ cmpu.eq.qb $20, $21 # CHECK: cmpu.eq.qb $20, $21 # encoding: [0x7e,0x95,0x00,0x11]
+ cmpu.lt.qb $22, $23 # CHECK: cmpu.lt.qb $22, $23 # encoding: [0x7e,0xd7,0x00,0x51]
+ cmpu.le.qb $24, $25 # CHECK: cmpu.le.qb $24, $25 # encoding: [0x7f,0x19,0x00,0x91]
+ dpa.w.ph $ac0, $26, $27 # CHECK: dpa.w.ph $ac0, $26, $27 # encoding: [0x7f,0x5b,0x00,0x30]
+ dpaq_s.w.ph $ac1, $1, $2 # CHECK: dpaq_s.w.ph $ac1, $1, $2 # encoding: [0x7c,0x22,0x09,0x30]
+ dpaq_sa.l.w $ac2, $3, $4 # CHECK: dpaq_sa.l.w $ac2, $3, $4 # encoding: [0x7c,0x64,0x13,0x30]
+ dpaqx_s.w.ph $ac3, $5, $6 # CHECK: dpaqx_s.w.ph $ac3, $5, $6 # encoding: [0x7c,0xa6,0x1e,0x30]
+ dpaqx_sa.w.ph $ac0, $7, $8 # CHECK: dpaqx_sa.w.ph $ac0, $7, $8 # encoding: [0x7c,0xe8,0x06,0xb0]
+ dpau.h.qbl $ac1, $9, $10 # CHECK: dpau.h.qbl $ac1, $9, $10 # encoding: [0x7d,0x2a,0x08,0xf0]
+ dpau.h.qbr $ac1, $11, $12 # CHECK: dpau.h.qbr $ac1, $11, $12 # encoding: [0x7d,0x6c,0x09,0xf0]
+ dpax.w.ph $ac2, $13, $14 # CHECK: dpax.w.ph $ac2, $13, $14 # encoding: [0x7d,0xae,0x12,0x30]
+ dps.w.ph $ac3, $15, $16 # CHECK: dps.w.ph $ac3, $15, $16 # encoding: [0x7d,0xf0,0x18,0x70]
+ dpsq_s.w.ph $ac0, $17, $18 # CHECK: dpsq_s.w.ph $ac0, $17, $18 # encoding: [0x7e,0x32,0x01,0x70]
+ dpsq_sa.l.w $ac1, $19, $20 # CHECK: dpsq_sa.l.w $ac1, $19, $20 # encoding: [0x7e,0x74,0x0b,0x70]
+ dpsqx_s.w.ph $ac2, $1, $2 # CHECK: dpsqx_s.w.ph $ac2, $1, $2 # encoding: [0x7c,0x22,0x16,0x70]
+ dpsqx_sa.w.ph $ac3, $3, $4 # CHECK: dpsqx_sa.w.ph $ac3, $3, $4 # encoding: [0x7c,0x64,0x1e,0xf0]
+ dpsu.h.qbl $ac0, $5, $6 # CHECK: dpsu.h.qbl $ac0, $5, $6 # encoding: [0x7c,0xa6,0x02,0xf0]
+ dpsu.h.qbr $ac1, $7, $8 # CHECK: dpsu.h.qbr $ac1, $7, $8 # encoding: [0x7c,0xe8,0x0b,0xf0]
+ dpsx.w.ph $ac2, $9, $10 # CHECK: dpsx.w.ph $ac2, $9, $10 # encoding: [0x7d,0x2a,0x12,0x70]
+ extp $1, $ac0, 31 # CHECK: extp $1, $ac0, 31 # encoding: [0x7f,0xe1,0x00,0xb8]
+ extpdp $2, $ac1, 0 # CHECK: extpdp $2, $ac1, 0 # encoding: [0x7c,0x02,0x0a,0xb8]
+ extpdpv $3, $ac2, $4 # CHECK: extpdpv $3, $ac2, $4 # encoding: [0x7c,0x83,0x12,0xf8]
+ extpv $5, $ac3, $6 # CHECK: extpv $5, $ac3, $6 # encoding: [0x7c,0xc5,0x18,0xf8]
+ extr.w $7, $ac0, 31 # CHECK: extr.w $7, $ac0, 31 # encoding: [0x7f,0xe7,0x00,0x38]
+ extr_r.w $8, $ac1, 15 # CHECK: extr_r.w $8, $ac1, 15 # encoding: [0x7d,0xe8,0x09,0x38]
+ extr_rs.w $9, $ac2, 7 # CHECK: extr_rs.w $9, $ac2, 7 # encoding: [0x7c,0xe9,0x11,0xb8]
+ extr_s.h $10, $ac3, 3 # CHECK: extr_s.h $10, $ac3, 3 # encoding: [0x7c,0x6a,0x1b,0xb8]
+ extrv.w $11, $ac0, $12 # CHECK: extrv.w $11, $ac0, $12 # encoding: [0x7d,0x8b,0x00,0x78]
+ extrv_r.w $13, $ac1, $14 # CHECK: extrv_r.w $13, $ac1, $14 # encoding: [0x7d,0xcd,0x09,0x78]
+ extrv_rs.w $15, $ac2, $16 # CHECK: extrv_rs.w $15, $ac2, $16 # encoding: [0x7e,0x0f,0x11,0xf8]
+ extrv_s.h $17, $ac3, $18 # CHECK: extrv_s.h $17, $ac3, $18 # encoding: [0x7e,0x51,0x1b,0xf8]
+ insv $19, $20 # CHECK: insv $19, $20 # encoding: [0x7e,0x93,0x00,0x0c]
+ lbux $10, $20($26) # CHECK: lbux $10, $20($26) # encoding: [0x7f,0x54,0x51,0x8a]
+ lhx $11, $21($27) # CHECK: lhx $11, $21($27) # encoding: [0x7f,0x75,0x59,0x0a]
+ lwx $12, $22($gp) # CHECK: lwx $12, $22($gp) # encoding: [0x7f,0x96,0x60,0x0a]
+ madd $ac1, $6, $7 # CHECK: madd $ac1, $6, $7 # encoding: [0x70,0xc7,0x08,0x00]
+ maddu $ac0, $8, $9 # CHECK: maddu $ac0, $8, $9 # encoding: [0x71,0x09,0x00,0x01]
+ madd $6, $7 # CHECK: madd $6, $7 # encoding: [0x70,0xc7,0x00,0x00]
+ maddu $8, $9 # CHECK: maddu $8, $9 # encoding: [0x71,0x09,0x00,0x01]
+ maq_s.w.phl $ac2, $3, $4 # CHECK: maq_s.w.phl $ac2, $3, $4 # encoding: [0x7c,0x64,0x15,0x30]
+ maq_sa.w.phl $ac3, $5, $6 # CHECK: maq_sa.w.phl $ac3, $5, $6 # encoding: [0x7c,0xa6,0x1c,0x30]
+ maq_s.w.phr $ac0, $7, $8 # CHECK: maq_s.w.phr $ac0, $7, $8 # encoding: [0x7c,0xe8,0x05,0xb0]
+ maq_sa.w.phr $ac1, $9, $10 # CHECK: maq_sa.w.phr $ac1, $9, $10 # encoding: [0x7d,0x2a,0x0c,0xb0]
+ mfhi $14, $ac1 # CHECK: mfhi $14, $ac1 # encoding: [0x00,0x20,0x70,0x10]
+ mflo $15, $ac0 # CHECK: mflo $15, $ac0 # encoding: [0x00,0x00,0x78,0x12]
+ mfhi $14 # CHECK: mfhi $14 # encoding: [0x00,0x00,0x70,0x10]
+ mflo $15 # CHECK: mflo $15 # encoding: [0x00,0x00,0x78,0x12]
+ modsub $11, $12, $13 # CHECK: modsub $11, $12, $13 # encoding: [0x7d,0x8d,0x5c,0x90]
+ msub $ac3, $10, $11 # CHECK: msub $ac3, $10, $11 # encoding: [0x71,0x4b,0x18,0x04]
+ msubu $ac2, $12, $13 # CHECK: msubu $ac2, $12, $13 # encoding: [0x71,0x8d,0x10,0x05]
+ msub $10, $11 # CHECK: msub $10, $11 # encoding: [0x71,0x4b,0x00,0x04]
+ msubu $12, $13 # CHECK: msubu $12, $13 # encoding: [0x71,0x8d,0x00,0x05]
+ mthi $16, $ac3 # CHECK: mthi $16, $ac3 # encoding: [0x02,0x00,0x18,0x11]
+ mthi $16 # CHECK: mthi $16 # encoding: [0x02,0x00,0x00,0x11]
+ mthlip $14, $ac2 # CHECK: mthlip $14, $ac2 # encoding: [0x7d,0xc0,0x17,0xf8]
+ mtlo $17, $ac2 # CHECK: mtlo $17, $ac2 # encoding: [0x02,0x20,0x10,0x13]
+ mtlo $17 # CHECK: mtlo $17 # encoding: [0x02,0x20,0x00,0x13]
+ mul.ph $15, $16, $17 # CHECK: mul.ph $15, $16, $17 # encoding: [0x7e,0x11,0x7b,0x18]
+ mul_s.ph $18, $19, $20 # CHECK: mul_s.ph $18, $19, $20 # encoding: [0x7e,0x74,0x93,0x98]
+ muleq_s.w.phl $21, $22, $23 # CHECK: muleq_s.w.phl $21, $22, $23 # encoding: [0x7e,0xd7,0xaf,0x10]
+ muleq_s.w.phr $24, $25, $26 # CHECK: muleq_s.w.phr $24, $25, $26 # encoding: [0x7f,0x3a,0xc7,0x50]
+ muleu_s.ph.qbl $27, $gp, $sp # CHECK: muleu_s.ph.qbl $27, $gp, $sp # encoding: [0x7f,0x9d,0xd9,0x90]
+ muleu_s.ph.qbr $fp, $ra, $1 # CHECK: muleu_s.ph.qbr $fp, $ra, $1 # encoding: [0x7f,0xe1,0xf1,0xd0]
+ mulq_rs.ph $2, $3, $4 # CHECK: mulq_rs.ph $2, $3, $4 # encoding: [0x7c,0x64,0x17,0xd0]
+ mulq_rs.w $5, $6, $7 # CHECK: mulq_rs.w $5, $6, $7 # encoding: [0x7c,0xc7,0x2d,0xd8]
+ mulq_s.ph $8, $9, $10 # CHECK: mulq_s.ph $8, $9, $10 # encoding: [0x7d,0x2a,0x47,0x90]
+ mulq_s.w $11, $12, $13 # CHECK: mulq_s.w $11, $12, $13 # encoding: [0x7d,0x8d,0x5d,0x98]
+ mulsa.w.ph $ac3, $14, $15 # CHECK: mulsa.w.ph $ac3, $14, $15 # encoding: [0x7d,0xcf,0x18,0xb0]
+ mulsaq_s.w.ph $ac0, $16, $17 # CHECK: mulsaq_s.w.ph $ac0, $16, $17 # encoding: [0x7e,0x11,0x01,0xb0]
+ mult $ac3, $2, $3 # CHECK: mult $ac3, $2, $3 # encoding: [0x00,0x43,0x18,0x18]
+ multu $ac2, $4, $5 # CHECK: multu $ac2, $4, $5 # encoding: [0x00,0x85,0x10,0x19]
+ mult $2, $3 # CHECK: mult $2, $3 # encoding: [0x00,0x43,0x00,0x18]
+ multu $4, $5 # CHECK: multu $4, $5 # encoding: [0x00,0x85,0x00,0x19]
+ packrl.ph $18, $19, $20 # CHECK: packrl.ph $18, $19, $20 # encoding: [0x7e,0x74,0x93,0x91]
+ pick.ph $7, $15, $3 # CHECK: pick.ph $7, $15, $3 # encoding: [0x7d,0xe3,0x3a,0xd1]
+ pick.qb $2, $4, $8 # CHECK: pick.qb $2, $4, $8 # encoding: [0x7c,0x88,0x10,0xd1]
+ preceq.w.phl $20,$21 # CHECK: preceq.w.phl $20, $21 # encoding: [0x7c,0x15,0xa3,0x12]
+ preceq.w.phr $21,$22 # CHECK: preceq.w.phr $21, $22 # encoding: [0x7c,0x16,0xab,0x52]
+ precequ.ph.qbl $22,$23 # CHECK: precequ.ph.qbl $22, $23 # encoding: [0x7c,0x17,0xb1,0x12]
+ precequ.ph.qbla $24,$25 # CHECK: precequ.ph.qbla $24, $25 # encoding: [0x7c,0x19,0xc1,0x92]
+ precequ.ph.qbr $23,$24 # CHECK: precequ.ph.qbr $23, $24 # encoding: [0x7c,0x18,0xb9,0x52]
+ precequ.ph.qbra $25,$26 # CHECK: precequ.ph.qbra $25, $26 # encoding: [0x7c,0x1a,0xc9,0xd2]
+ preceu.ph.qbl $26,$27 # CHECK: preceu.ph.qbl $26, $27 # encoding: [0x7c,0x1b,0xd7,0x12]
+ preceu.ph.qbla $28,$29 # CHECK: preceu.ph.qbla $gp, $sp # encoding: [0x7c,0x1d,0xe7,0x92]
+ preceu.ph.qbr $27,$28 # CHECK: preceu.ph.qbr $27, $gp # encoding: [0x7c,0x1c,0xdf,0x52]
+ preceu.ph.qbra $29,$30 # CHECK: preceu.ph.qbra $sp, $fp # encoding: [0x7c,0x1e,0xef,0xd2]
+ precr.qb.ph $23,$24,$25 # CHECK: precr.qb.ph $23, $24, $25 # encoding: [0x7f,0x19,0xbb,0x51]
+ precr_sra.ph.w $24,$25,0 # CHECK: precr_sra.ph.w $24, $25, 0 # encoding: [0x7f,0x38,0x07,0x91]
+ precr_sra.ph.w $24,$25,31 # CHECK: precr_sra.ph.w $24, $25, 31 # encoding: [0x7f,0x38,0xff,0x91]
+ precr_sra_r.ph.w $25,$26,0 # CHECK: precr_sra_r.ph.w $25, $26, 0 # encoding: [0x7f,0x59,0x07,0xd1]
+ precr_sra_r.ph.w $25,$26,31 # CHECK: precr_sra_r.ph.w $25, $26, 31 # encoding: [0x7f,0x59,0xff,0xd1]
+ precrq.ph.w $17,$18,$19 # CHECK: precrq.ph.w $17, $18, $19 # encoding: [0x7e,0x53,0x8d,0x11]
+ precrq.qb.ph $16,$17,$18 # CHECK: precrq.qb.ph $16, $17, $18 # encoding: [0x7e,0x32,0x83,0x11]
+ precrqu_s.qb.ph $19,$20,$21 # CHECK: precrqu_s.qb.ph $19, $20, $21 # encoding: [0x7e,0x95,0x9b,0xd1]
+ precrq_rs.ph.w $18,$19,$20 # CHECK: precrq_rs.ph.w $18, $19, $20 # encoding: [0x7e,0x74,0x95,0x51]
+ prepend $1, $2, 3 # CHECK: prepend $1, $2, 3 # encoding: [0x7c,0x41,0x18,0x71]
+ raddu.w.qb $1, $2 # CHECK: raddu.w.qb $1, $2 # encoding: [0x7c,0x40,0x0d,0x10]
+ rddsp $5, 256 # CHECK: rddsp $5, 256 # encoding: [0x7d,0x00,0x2c,0xb8]
+ repl.ph $2, 12 # CHECK: repl.ph $2, 12 # encoding: [0x7c,0x0c,0x12,0x92]
+ repl.qb $1, 85 # CHECK: repl.qb $1, 85 # encoding: [0x7c,0x55,0x08,0x92]
+ replv.ph $1, $2 # CHECK: replv.ph $1, $2 # encoding: [0x7c,0x02,0x0a,0xd2]
+ replv.qb $1, $2 # CHECK: replv.qb $1, $2 # encoding: [0x7c,0x02,0x08,0xd2]
+ shilo $ac1, 3 # CHECK: shilo $ac1, 3 # encoding: [0x7c,0x30,0x0e,0xb8]
+ shilo $ac1, 16 # CHECK: shilo $ac1, 16 # encoding: [0x7d,0x00,0x0e,0xb8]
+ shilov $ac1, $2 # CHECK: shilov $ac1, $2 # encoding: [0x7c,0x40,0x0e,0xf8]
+ shll.ph $1, $2, 3 # CHECK: shll.ph $1, $2, 3 # encoding: [0x7c,0x62,0x0a,0x13]
+ shll_s.ph $1, $2, 3 # CHECK: shll_s.ph $1, $2, 3 # encoding: [0x7c,0x62,0x0b,0x13]
+ shll.qb $1, $2, 3 # CHECK: shll.qb $1, $2, 3 # encoding: [0x7c,0x62,0x08,0x13]
+ shllv.ph $1, $2, $3 # CHECK: shllv.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0a,0x93]
+ shllv_s.ph $1, $2, $3 # CHECK: shllv_s.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0b,0x93]
+ shllv.qb $1, $2, $3 # CHECK: shllv.qb $1, $2, $3 # encoding: [0x7c,0x62,0x08,0x93]
+ shllv_s.w $1, $2, $3 # CHECK: shllv_s.w $1, $2, $3 # encoding: [0x7c,0x62,0x0d,0x93]
+ shll_s.w $1, $2, 3 # CHECK: shll_s.w $1, $2, 3 # encoding: [0x7c,0x62,0x0d,0x13]
+ shra.qb $2, $16, 2 # CHECK: shra.qb $2, $16, 2 # encoding: [0x7c,0x50,0x11,0x13]
+ shra_r.qb $2, $16, 2 # CHECK: shra_r.qb $2, $16, 2 # encoding: [0x7c,0x50,0x11,0x53]
+ shra.ph $5, $2, 1 # CHECK: shra.ph $5, $2, 1 # encoding: [0x7c,0x22,0x2a,0x53]
+ shra_r.ph $5, $2, 1 # CHECK: shra_r.ph $5, $2, 1 # encoding: [0x7c,0x22,0x2b,0x53]
+ shrav.ph $1, $2, $3 # CHECK: shrav.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0a,0xd3]
+ shrav_r.ph $1, $2, $3 # CHECK: shrav_r.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0b,0xd3]
+ shrav.qb $1, $2, $3 # CHECK: shrav.qb $1, $2, $3 # encoding: [0x7c,0x62,0x09,0x93]
+ shrav_r.qb $1, $2, $3 # CHECK: shrav_r.qb $1, $2, $3 # encoding: [0x7c,0x62,0x09,0xd3]
+ shrav_r.w $1, $2, $3 # CHECK: shrav_r.w $1, $2, $3 # encoding: [0x7c,0x62,0x0d,0xd3]
+ shra_r.w $1, $2, 1 # CHECK: shra_r.w $1, $2, 1 # encoding: [0x7c,0x22,0x0d,0x53]
+ shrl.ph $1, $2, 2 # CHECK: shrl.ph $1, $2, 2 # encoding: [0x7c,0x42,0x0e,0x53]
+ shrl.qb $1, $2, 2 # CHECK: shrl.qb $1, $2, 2 # encoding: [0x7c,0x42,0x08,0x53]
+ shrlv.ph $1, $2, $3 # CHECK: shrlv.ph $1, $2, $3 # encoding: [0x7c,0x62,0x0e,0xd3]
+ shrlv.qb $1, $2, $3 # CHECK: shrlv.qb $1, $2, $3 # encoding: [0x7c,0x62,0x08,0xd3]
+ subq.ph $1, $2, $3 # CHECK: subq.ph $1, $2, $3 # encoding: [0x7c,0x43,0x0a,0xd0]
+ subq_s.ph $1, $2, $3 # CHECK: subq_s.ph $1, $2, $3 # encoding: [0x7c,0x43,0x0b,0xd0]
+ subq_s.w $1, $2, $3 # CHECK: subq_s.w $1, $2, $3 # encoding: [0x7c,0x43,0x0d,0xd0]
+ subqh.ph $1, $2, $3 # CHECK: subqh.ph $1, $2, $3 # encoding: [0x7c,0x43,0x0a,0x58]
+ subqh_r.ph $1, $2, $3 # CHECK: subqh_r.ph $1, $2, $3 # encoding: [0x7c,0x43,0x0a,0xd8]
+ subqh.w $1, $2, $3 # CHECK: subqh.w $1, $2, $3 # encoding: [0x7c,0x43,0x0c,0x58]
+ subqh_r.w $1, $2, $3 # CHECK: subqh_r.w $1, $2, $3 # encoding: [0x7c,0x43,0x0c,0xd8]
+ subu.ph $6, $2, $9 # CHECK: subu.ph $6, $2, $9 # encoding: [0x7c,0x49,0x32,0x50]
+ subu_s.ph $2, $3, $4 # CHECK: subu_s.ph $2, $3, $4 # encoding: [0x7c,0x64,0x13,0x50]
+ subu.qb $1, $2, $3 # CHECK: subu.qb $1, $2, $3 # encoding: [0x7c,0x43,0x08,0x50]
+ subu_s.qb $1, $2, $3 # CHECK: subu_s.qb $1, $2, $3 # encoding: [0x7c,0x43,0x09,0x50]
+ subuh.qb $1, $2, $3 # CHECK: subuh.qb $1, $2, $3 # encoding: [0x7c,0x43,0x08,0x58]
+ subuh_r.qb $1, $2, $3 # CHECK: subuh_r.qb $1, $2, $3 # encoding: [0x7c,0x43,0x08,0xd8]
+ wrdsp $1, 0 # CHECK: wrdsp $1, 0 # encoding: [0x7c,0x20,0x04,0xf8]
+ wrdsp $5 # CHECK: wrdsp $5 # encoding: [0x7c,0xa0,0xfc,0xf8]
+ wrdsp $5, 2 # CHECK: wrdsp $5, 2 # encoding: [0x7c,0xa0,0x14,0xf8]
+ wrdsp $5, 31 # CHECK: wrdsp $5 # encoding: [0x7c,0xa0,0xfc,0xf8]
diff --git a/test/MC/Mips/elf_basic.s b/test/MC/Mips/elf_basic.s
index 6c1e7690126d..4739247f67fa 100644
--- a/test/MC/Mips/elf_basic.s
+++ b/test/MC/Mips/elf_basic.s
@@ -36,6 +36,6 @@
// CHECK-LE64: Ident {
// CHECK-LE64: Class: 64-bit
// CHECK-LE64: DataEncoding: LittleEndian
-// CHECK-LE64: OS/ABI: GNU/Linux
+// CHECK-LE64: OS/ABI: SystemV
// CHECK-LE64: }
// CHECK-LE64: }
diff --git a/test/MC/Mips/eva/invalid-noeva-wrong-error.s b/test/MC/Mips/eva/invalid-noeva-wrong-error.s
new file mode 100644
index 000000000000..77b25645fe20
--- /dev/null
+++ b/test/MC/Mips/eva/invalid-noeva-wrong-error.s
@@ -0,0 +1,69 @@
+# invalid operand for instructions that are invalid without -mattr=+eva flag and
+# are correctly rejected but use the wrong error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r3 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r5 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r6 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r3 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r5 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ cachee 31, 255($7) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ cachee 0, -256($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ cachee 5, -140($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lbe $10,-256($25) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lbe $13,255($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lbe $11,146($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lbue $13,-256($v1) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lbue $13,255($v0) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lbue $13,-190($v1) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lhe $13,-256($s5) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lhe $12,255($s0) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lhe $13,81($s0) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lhue $s2,-256($v1) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lhue $s2,255($v1) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lhue $s6,-168($v0) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lle $v0,-256($s5) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lle $v1,255($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lle $v1,-71($s6) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwe $15,255($a2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwe $13,-256($a2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwe $15,-200($a1) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwle $s6,255($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwle $s7,-256($10) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwle $s7,-176($13) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwre $zero,255($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwre $zero,-256($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwre $zero,-176($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ prefe 14, -256($2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ prefe 11, 255($3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ prefe 14, -37($3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sbe $s1,255($11) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sbe $s1,-256($10) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sbe $s3,0($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sce $9,255($s2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sce $12,-256($s5) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sce $13,-31($s7) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ she $14,255($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ she $14,-256($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ she $9,235($11) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swe $ra,255($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swe $ra,-256($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swe $ra,-53($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swle $9,255($s1) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swle $10,-256($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swle $8,131($s5) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swre $s4,255($13) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swre $s4,-256($13) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swre $s2,86($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/eva/invalid-noeva.s b/test/MC/Mips/eva/invalid-noeva.s
new file mode 100644
index 000000000000..ee9ce12b2054
--- /dev/null
+++ b/test/MC/Mips/eva/invalid-noeva.s
@@ -0,0 +1,22 @@
+# invalid operand for instructions that are invalid without -mattr=+eva flag
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r3 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r5 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r6 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r3 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r5 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ tlbinv # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ tlbinvf # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/eva/invalid.s b/test/MC/Mips/eva/invalid.s
new file mode 100644
index 000000000000..2ef4eaaab08d
--- /dev/null
+++ b/test/MC/Mips/eva/invalid.s
@@ -0,0 +1,11 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r2 \
+# RUN: -mattr==eva 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ cachee -1, 255($7) # CHECK: :[[@LINE]]:12: error: expected 5-bit unsigned immediate
+ cachee 32, 255($7) # CHECK: :[[@LINE]]:12: error: expected 5-bit unsigned immediate
+ prefe -1, 255($7) # CHECK: :[[@LINE]]:11: error: expected 5-bit unsigned immediate
+ prefe 32, 255($7) # CHECK: :[[@LINE]]:11: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/eva/invalid_R6.s b/test/MC/Mips/eva/invalid_R6.s
new file mode 100644
index 000000000000..81c322845a63
--- /dev/null
+++ b/test/MC/Mips/eva/invalid_R6.s
@@ -0,0 +1,20 @@
+# Instructions that are invalid as they were removed in R6
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=+eva 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 -mattr=+eva 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ lwle $s6,255($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwle $s7,-256($10) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwle $s7,-176($13) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwre $zero,255($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwre $zero,-256($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwre $zero,-176($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swle $9,255($s1) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swle $10,-256($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swle $8,131($s5) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swre $s4,255($13) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swre $s4,-256($13) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swre $s2,86($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/eva/valid_R6.s b/test/MC/Mips/eva/valid_R6.s
new file mode 100644
index 000000000000..75ecf7538bb3
--- /dev/null
+++ b/test/MC/Mips/eva/valid_R6.s
@@ -0,0 +1,47 @@
+# Instructions that are valid
+#
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=+eva | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r6 -mattr=+eva | FileCheck %s
+a:
+ .set noat
+ cachee 31, 255($7) # CHECK: cachee 31, 255($7) # encoding: [0x7c,0xff,0x7f,0x9b]
+ cachee 0, -256($4) # CHECK: cachee 0, -256($4) # encoding: [0x7c,0x80,0x80,0x1b]
+ cachee 5, -140($4) # CHECK: cachee 5, -140($4) # encoding: [0x7c,0x85,0xba,0x1b]
+ lbe $10,-256($25) # CHECK: lbe $10, -256($25) # encoding: [0x7f,0x2a,0x80,0x2c]
+ lbe $13,255($15) # CHECK: lbe $13, 255($15) # encoding: [0x7d,0xed,0x7f,0xac]
+ lbe $11,146($14) # CHECK: lbe $11, 146($14) # encoding: [0x7d,0xcb,0x49,0x2c]
+ lbue $13,-256($v1) # CHECK: lbue $13, -256($3) # encoding: [0x7c,0x6d,0x80,0x28]
+ lbue $13,255($v0) # CHECK: lbue $13, 255($2) # encoding: [0x7c,0x4d,0x7f,0xa8]
+ lbue $13,-190($v1) # CHECK: lbue $13, -190($3) # encoding: [0x7c,0x6d,0xa1,0x28]
+ lhe $13,-256($s5) # CHECK: lhe $13, -256($21) # encoding: [0x7e,0xad,0x80,0x2d]
+ lhe $12,255($s0) # CHECK: lhe $12, 255($16) # encoding: [0x7e,0x0c,0x7f,0xad]
+ lhe $13,81($s0) # CHECK: lhe $13, 81($16) # encoding: [0x7e,0x0d,0x28,0xad]
+ lhue $s2,-256($v1) # CHECK: lhue $18, -256($3) # encoding: [0x7c,0x72,0x80,0x29]
+ lhue $s2,255($v1) # CHECK: lhue $18, 255($3) # encoding: [0x7c,0x72,0x7f,0xa9]
+ lhue $s6,-168($v0) # CHECK: lhue $22, -168($2) # encoding: [0x7c,0x56,0xac,0x29]
+ lle $v0,-256($s5) # CHECK: lle $2, -256($21) # encoding: [0x7e,0xa2,0x80,0x2e]
+ lle $v1,255($s3) # CHECK: lle $3, 255($19) # encoding: [0x7e,0x63,0x7f,0xae]
+ lle $v1,-71($s6) # CHECK: lle $3, -71($22) # encoding: [0x7e,0xc3,0xdc,0xae]
+ lwe $15,255($a2) # CHECK: lwe $15, 255($6) # encoding: [0x7c,0xcf,0x7f,0xaf]
+ lwe $13,-256($a2) # CHECK: lwe $13, -256($6) # encoding: [0x7c,0xcd,0x80,0x2f]
+ lwe $15,-200($a1) # CHECK: lwe $15, -200($5) # encoding: [0x7c,0xaf,0x9c,0x2f]
+ prefe 14, -256($2) # CHECK: prefe 14, -256($2) # encoding: [0x7c,0x4e,0x80,0x23]
+ prefe 11, 255($3) # CHECK: prefe 11, 255($3) # encoding: [0x7c,0x6b,0x7f,0xa3]
+ prefe 14, -37($3) # CHECK: prefe 14, -37($3) # encoding: [0x7c,0x6e,0xed,0xa3]
+ sbe $s1,255($11) # CHECK: sbe $17, 255($11) # encoding: [0x7d,0x71,0x7f,0x9c]
+ sbe $s1,-256($10) # CHECK: sbe $17, -256($10) # encoding: [0x7d,0x51,0x80,0x1c]
+ sbe $s3,0($14) # CHECK: sbe $19, 0($14) # encoding: [0x7d,0xd3,0x00,0x1c]
+ sce $9,255($s2) # CHECK: sce $9, 255($18) # encoding: [0x7e,0x49,0x7f,0x9e]
+ sce $12,-256($s5) # CHECK: sce $12, -256($21) # encoding: [0x7e,0xac,0x80,0x1e]
+ sce $13,-31($s7) # CHECK: sce $13, -31($23) # encoding: [0x7e,0xed,0xf0,0x9e]
+ she $14,255($15) # CHECK: she $14, 255($15) # encoding: [0x7d,0xee,0x7f,0x9d]
+ she $14,-256($15) # CHECK: she $14, -256($15) # encoding: [0x7d,0xee,0x80,0x1d]
+ she $9,235($11) # CHECK: she $9, 235($11) # encoding: [0x7d,0x69,0x75,0x9d]
+ swe $ra,255($sp) # CHECK: swe $ra, 255($sp) # encoding: [0x7f,0xbf,0x7f,0x9f]
+ swe $ra,-256($sp) # CHECK: swe $ra, -256($sp) # encoding: [0x7f,0xbf,0x80,0x1f]
+ swe $ra,-53($sp) # CHECK: swe $ra, -53($sp) # encoding: [0x7f,0xbf,0xe5,0x9f]
+ tlbinv # CHECK: tlbinv # encoding: [0x42,0x00,0x00,0x03]
+ tlbinvf # CHECK: tlbinvf # encoding: [0x42,0x00,0x00,0x04]
+
+
+1:
diff --git a/test/MC/Mips/eva/valid_preR6.s b/test/MC/Mips/eva/valid_preR6.s
new file mode 100644
index 000000000000..b7ae76c7819a
--- /dev/null
+++ b/test/MC/Mips/eva/valid_preR6.s
@@ -0,0 +1,62 @@
+# Instructions that are valid
+#
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+eva | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r3 -mattr=+eva | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r5 -mattr=+eva | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r2 -mattr=+eva | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r3 -mattr=+eva | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r5 -mattr=+eva | FileCheck %s
+a:
+ .set noat
+ cachee 31, 255($7) # CHECK: cachee 31, 255($7) # encoding: [0x7c,0xff,0x7f,0x9b]
+ cachee 0, -256($4) # CHECK: cachee 0, -256($4) # encoding: [0x7c,0x80,0x80,0x1b]
+ cachee 5, -140($4) # CHECK: cachee 5, -140($4) # encoding: [0x7c,0x85,0xba,0x1b]
+ lbe $10,-256($25) # CHECK: lbe $10, -256($25) # encoding: [0x7f,0x2a,0x80,0x2c]
+ lbe $13,255($15) # CHECK: lbe $13, 255($15) # encoding: [0x7d,0xed,0x7f,0xac]
+ lbe $11,146($14) # CHECK: lbe $11, 146($14) # encoding: [0x7d,0xcb,0x49,0x2c]
+ lbue $13,-256($v1) # CHECK: lbue $13, -256($3) # encoding: [0x7c,0x6d,0x80,0x28]
+ lbue $13,255($v0) # CHECK: lbue $13, 255($2) # encoding: [0x7c,0x4d,0x7f,0xa8]
+ lbue $13,-190($v1) # CHECK: lbue $13, -190($3) # encoding: [0x7c,0x6d,0xa1,0x28]
+ lhe $13,-256($s5) # CHECK: lhe $13, -256($21) # encoding: [0x7e,0xad,0x80,0x2d]
+ lhe $12,255($s0) # CHECK: lhe $12, 255($16) # encoding: [0x7e,0x0c,0x7f,0xad]
+ lhe $13,81($s0) # CHECK: lhe $13, 81($16) # encoding: [0x7e,0x0d,0x28,0xad]
+ lhue $s2,-256($v1) # CHECK: lhue $18, -256($3) # encoding: [0x7c,0x72,0x80,0x29]
+ lhue $s2,255($v1) # CHECK: lhue $18, 255($3) # encoding: [0x7c,0x72,0x7f,0xa9]
+ lhue $s6,-168($v0) # CHECK: lhue $22, -168($2) # encoding: [0x7c,0x56,0xac,0x29]
+ lle $v0,-256($s5) # CHECK: lle $2, -256($21) # encoding: [0x7e,0xa2,0x80,0x2e]
+ lle $v1,255($s3) # CHECK: lle $3, 255($19) # encoding: [0x7e,0x63,0x7f,0xae]
+ lle $v1,-71($s6) # CHECK: lle $3, -71($22) # encoding: [0x7e,0xc3,0xdc,0xae]
+ lwe $15,255($a2) # CHECK: lwe $15, 255($6) # encoding: [0x7c,0xcf,0x7f,0xaf]
+ lwe $13,-256($a2) # CHECK: lwe $13, -256($6) # encoding: [0x7c,0xcd,0x80,0x2f]
+ lwe $15,-200($a1) # CHECK: lwe $15, -200($5) # encoding: [0x7c,0xaf,0x9c,0x2f]
+ lwle $s6,255($15) # CHECK: lwle $22, 255($15) # encoding: [0x7d,0xf6,0x7f,0x99]
+ lwle $s7,-256($10) # CHECK: lwle $23, -256($10) # encoding: [0x7d,0x57,0x80,0x19]
+ lwle $s7,-176($13) # CHECK: lwle $23, -176($13) # encoding: [0x7d,0xb7,0xa8,0x19]
+ lwre $zero,255($gp) # CHECK: lwre $zero, 255($gp) # encoding: [0x7f,0x80,0x7f,0x9a]
+ lwre $zero,-256($gp) # CHECK: lwre $zero, -256($gp) # encoding: [0x7f,0x80,0x80,0x1a]
+ lwre $zero,-176($gp) # CHECK: lwre $zero, -176($gp) # encoding: [0x7f,0x80,0xa8,0x1a]
+ prefe 14, -256($2) # CHECK: prefe 14, -256($2) # encoding: [0x7c,0x4e,0x80,0x23]
+ prefe 11, 255($3) # CHECK: prefe 11, 255($3) # encoding: [0x7c,0x6b,0x7f,0xa3]
+ prefe 14, -37($3) # CHECK: prefe 14, -37($3) # encoding: [0x7c,0x6e,0xed,0xa3]
+ sbe $s1,255($11) # CHECK: sbe $17, 255($11) # encoding: [0x7d,0x71,0x7f,0x9c]
+ sbe $s1,-256($10) # CHECK: sbe $17, -256($10) # encoding: [0x7d,0x51,0x80,0x1c]
+ sbe $s3,0($14) # CHECK: sbe $19, 0($14) # encoding: [0x7d,0xd3,0x00,0x1c]
+ sce $9,255($s2) # CHECK: sce $9, 255($18) # encoding: [0x7e,0x49,0x7f,0x9e]
+ sce $12,-256($s5) # CHECK: sce $12, -256($21) # encoding: [0x7e,0xac,0x80,0x1e]
+ sce $13,-31($s7) # CHECK: sce $13, -31($23) # encoding: [0x7e,0xed,0xf0,0x9e]
+ she $14,255($15) # CHECK: she $14, 255($15) # encoding: [0x7d,0xee,0x7f,0x9d]
+ she $14,-256($15) # CHECK: she $14, -256($15) # encoding: [0x7d,0xee,0x80,0x1d]
+ she $9,235($11) # CHECK: she $9, 235($11) # encoding: [0x7d,0x69,0x75,0x9d]
+ swe $ra,255($sp) # CHECK: swe $ra, 255($sp) # encoding: [0x7f,0xbf,0x7f,0x9f]
+ swe $ra,-256($sp) # CHECK: swe $ra, -256($sp) # encoding: [0x7f,0xbf,0x80,0x1f]
+ swe $ra,-53($sp) # CHECK: swe $ra, -53($sp) # encoding: [0x7f,0xbf,0xe5,0x9f]
+ swle $9,255($s1) # CHECK: swle $9, 255($17) # encoding: [0x7e,0x29,0x7f,0xa1]
+ swle $10,-256($s3) # CHECK: swle $10, -256($19) # encoding: [0x7e,0x6a,0x80,0x21]
+ swle $8,131($s5) # CHECK: swle $8, 131($21) # encoding: [0x7e,0xa8,0x41,0xa1]
+ swre $s4,255($13) # CHECK: swre $20, 255($13) # encoding: [0x7d,0xb4,0x7f,0xa2]
+ swre $s4,-256($13) # CHECK: swre $20, -256($13) # encoding: [0x7d,0xb4,0x80,0x22]
+ swre $s2,86($14) # CHECK: swre $18, 86($14) # encoding: [0x7d,0xd2,0x2b,0x22]
+ tlbinv # CHECK: tlbinv # encoding: [0x42,0x00,0x00,0x03]
+ tlbinvf # CHECK: tlbinvf # encoding: [0x42,0x00,0x00,0x04]
+
+1:
diff --git a/test/MC/Mips/expansion-jal-sym-pic.s b/test/MC/Mips/expansion-jal-sym-pic.s
new file mode 100644
index 000000000000..23a4396276f7
--- /dev/null
+++ b/test/MC/Mips/expansion-jal-sym-pic.s
@@ -0,0 +1,183 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -show-encoding |\
+# RUN: FileCheck %s -check-prefix=ALL -check-prefix=NORMAL -check-prefix=O32
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64 -target-abi n32 -show-encoding |\
+# RUN: FileCheck %s -check-prefix=ALL -check-prefix=NORMAL -check-prefix=N32
+
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n64 -show-encoding |\
+# RUN: FileCheck %s -check-prefix=ALL -check-prefix=NORMAL -check-prefix=N64
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=micromips -show-encoding |\
+# RUN: FileCheck %s -check-prefix=ALL -check-prefix=MICROMIPS -check-prefix=O32-MICROMIPS
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64 -target-abi n32 -mattr=micromips -show-encoding |\
+# RUN: FileCheck %s -check-prefix=ALL -check-prefix=MICROMIPS -check-prefix=N32-MICROMIPS
+
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n64 -mattr=micromips -show-encoding |\
+# RUN: FileCheck %s -check-prefix=ALL -check-prefix=MICROMIPS -check-prefix=N64-MICROMIPS
+
+ .weak weak_label
+
+ .text
+ .option pic2
+
+ .ent local_label
+local_label:
+ .frame $sp, 0, $ra
+ .set noreorder
+
+ jal local_label
+ nop
+
+ jal weak_label
+ nop
+
+ jal global_label
+ nop
+
+ jal .text
+ nop
+
+ # local labels ($tmp symbols)
+ jal 1f
+ nop
+
+ .end local_label
+
+1:
+ nop
+ add $8, $8, $8
+ nop
+
+# Expanding "jal local_label":
+# O32: lw $25, %got(local_label)($gp) # encoding: [0x8f,0x99,A,A]
+# O32: # fixup A - offset: 0, value: local_label@GOT, kind: fixup_Mips_GOT_Local
+# O32: addiu $25, $25, %lo(local_label) # encoding: [0x27,0x39,A,A]
+# O32: # fixup A - offset: 0, value: local_label@ABS_LO, kind: fixup_Mips_LO16
+
+# N32: lw $25, %got_disp(local_label)($gp) # encoding: [0x8f,0x99,A,A]
+# N32: # fixup A - offset: 0, value: local_label@GOT_DISP, kind: fixup_Mips_GOT_DISP
+
+# N64: ld $25, %got_disp(local_label)($gp) # encoding: [0xdf,0x99,A,A]
+# N64: # fixup A - offset: 0, value: local_label@GOT_DISP, kind: fixup_Mips_GOT_DISP
+
+# O32-MICROMIPS: lw $25, %got(local_label)($gp) # encoding: [0xff,0x3c,A,A]
+# O32-MICROMIPS: # fixup A - offset: 0, value: local_label@GOT, kind: fixup_MICROMIPS_GOT16
+# O32-MICROMIPS: addiu $25, $25, %lo(local_label) # encoding: [0x33,0x39,A,A]
+# O32-MICROMIPS: # fixup A - offset: 0, value: local_label@ABS_LO, kind: fixup_MICROMIPS_LO16
+
+# N32-MICROMIPS: lw $25, %got_disp(local_label)($gp) # encoding: [0xff,0x3c,A,A]
+# N32-MICROMIPS: # fixup A - offset: 0, value: local_label@GOT_DISP, kind: fixup_MICROMIPS_GOT_DISP
+
+# N64-MICROMIPS: ld $25, %got_disp(local_label)($gp) # encoding: [0xdf,0x99,A,A]
+# N64-MICROMIPS: # fixup A - offset: 0, value: local_label@GOT_DISP, kind: fixup_MICROMIPS_GOT_DISP
+
+# NORMAL: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# MICROMIPS: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c]
+# ALL: nop # encoding: [0x00,0x00,0x00,0x00]
+
+
+# Expanding "jal weak_label":
+# O32: lw $25, %call16(weak_label)($gp) # encoding: [0x8f,0x99,A,A]
+# O32: # fixup A - offset: 0, value: weak_label@GOT_CALL, kind: fixup_Mips_CALL16
+
+# N32: lw $25, %call16(weak_label)($gp) # encoding: [0x8f,0x99,A,A]
+# N32: # fixup A - offset: 0, value: weak_label@GOT_CALL, kind: fixup_Mips_CALL16
+
+# N64: ld $25, %call16(weak_label)($gp) # encoding: [0xdf,0x99,A,A]
+# N64: # fixup A - offset: 0, value: weak_label@GOT_CALL, kind: fixup_Mips_CALL16
+
+# O32-MICROMIPS: lw $25, %call16(weak_label)($gp) # encoding: [0xff,0x3c,A,A]
+# O32-MICROMIPS: # fixup A - offset: 0, value: weak_label@GOT_CALL, kind: fixup_MICROMIPS_CALL16
+
+# N32-MICROMIPS: lw $25, %call16(weak_label)($gp) # encoding: [0xff,0x3c,A,A]
+# N32-MICROMIPS: # fixup A - offset: 0, value: weak_label@GOT_CALL, kind: fixup_MICROMIPS_CALL16
+
+# N64-MICROMIPS: ld $25, %call16(weak_label)($gp) # encoding: [0xdf,0x99,A,A]
+# N64-MICROMIPS: # fixup A - offset: 0, value: weak_label@GOT_CALL, kind: fixup_MICROMIPS_CALL16
+
+# NORMAL: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# MICROMIPS: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c]
+# ALL: nop # encoding: [0x00,0x00,0x00,0x00]
+
+
+# Expanding "jal global_label":
+# O32: lw $25, %call16(global_label)($gp) # encoding: [0x8f,0x99,A,A]
+# O32: # fixup A - offset: 0, value: global_label@GOT_CALL, kind: fixup_Mips_CALL16
+
+# N32: lw $25, %call16(global_label)($gp) # encoding: [0x8f,0x99,A,A]
+# N32: # fixup A - offset: 0, value: global_label@GOT_CALL, kind: fixup_Mips_CALL16
+
+# N64: ld $25, %call16(global_label)($gp) # encoding: [0xdf,0x99,A,A]
+# N64: # fixup A - offset: 0, value: global_label@GOT_CALL, kind: fixup_Mips_CALL16
+
+# O32-MICROMIPS: lw $25, %call16(global_label)($gp) # encoding: [0xff,0x3c,A,A]
+# O32-MICROMIPS: # fixup A - offset: 0, value: global_label@GOT_CALL, kind: fixup_MICROMIPS_CALL16
+
+# N32-MICROMIPS: lw $25, %call16(global_label)($gp) # encoding: [0xff,0x3c,A,A]
+# N32-MICROMIPS: # fixup A - offset: 0, value: global_label@GOT_CALL, kind: fixup_MICROMIPS_CALL16
+
+# N64-MICROMIPS: ld $25, %call16(global_label)($gp) # encoding: [0xdf,0x99,A,A]
+# N64-MICROMIPS: # fixup A - offset: 0, value: global_label@GOT_CALL, kind: fixup_MICROMIPS_CALL16
+
+# NORMAL: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# MICROMIPS: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c]
+# ALL: nop # encoding: [0x00,0x00,0x00,0x00]
+
+
+# FIXME: The .text section MCSymbol isn't created when printing assembly. However,
+# it is created when generating an ELF object file.
+# Expanding "jal .text":
+# O32-FIXME: lw $25, %got(.text)($gp) # encoding: [0x8f,0x99,A,A]
+# O32-FIXME: # fixup A - offset: 0, value: .text@GOT, kind: fixup_Mips_GOT_Local
+# O32-FIXME: addiu $25, $25, %lo(.text) # encoding: [0x27,0x39,A,A]
+# O32-FIXME: # fixup A - offset: 0, value: .text@ABS_LO, kind: fixup_Mips_LO16
+
+# N32-FIXME: lw $25, %got_disp(.text)($gp) # encoding: [0x8f,0x99,A,A]
+# N32-FIXME: # fixup A - offset: 0, value: .text@GOT_DISP, kind: fixup_Mips_GOT_DISP
+
+# N64-FIXME: ld $25, %got_disp(.text)($gp) # encoding: [0xdf,0x99,A,A]
+# N64-FIXME: # fixup A - offset: 0, value: .text@GOT_DISP, kind: fixup_Mips_GOT_DISP
+
+# O32-MICROMIPS-FIXME: lw $25, %got(.text)($gp) # encoding: [0xff,0x3c,A,A]
+# O32-MICROMIPS-FIXME: # fixup A - offset: 0, value: .text@GOT, kind: fixup_MICROMIPS_GOT16
+# O32-MICROMIPS-FIXME: addiu $25, $25, %lo(.text) # encoding: [0x33,0x39,A,A]
+# O32-MICROMIPS-FIXME: # fixup A - offset: 0, value: .text@ABS_LO, kind: fixup_MICROMIPS_LO16
+
+# N32-MICROMIPS-FIXME: lw $25, %got_disp(.text)($gp) # encoding: [0xff,0x3c,A,A]
+# N32-MICROMIPS-FIXME: # fixup A - offset: 0, value: .text@GOT_DISP, kind: fixup_MICROMIPS_GOT_DISP
+
+# N64-MICROMIPS-FIXME: ld $25, %got_disp(.text)($gp) # encoding: [0xdf,0x99,A,A]
+# N64-MICROMIPS-FIXME: # fixup A - offset: 0, value: .text@GOT_DISP, kind: fixup_MICROMIPS_GOT_DISP
+
+# NORMAL: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# MICROMIPS: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c]
+# ALL: nop # encoding: [0x00,0x00,0x00,0x00]
+
+
+# Expanding "jal 1f":
+# O32: lw $25, %got($tmp0)($gp) # encoding: [0x8f,0x99,A,A]
+# O32: # fixup A - offset: 0, value: ($tmp0)@GOT, kind: fixup_Mips_GOT_Local
+# O32: addiu $25, $25, %lo($tmp0) # encoding: [0x27,0x39,A,A]
+# O32: # fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind: fixup_Mips_LO16
+
+# N32: lw $25, %got_disp($tmp0)($gp) # encoding: [0x8f,0x99,A,A]
+# N32: # fixup A - offset: 0, value: ($tmp0)@GOT_DISP, kind: fixup_Mips_GOT_DISP
+
+# N64: ld $25, %got_disp($tmp0)($gp) # encoding: [0xdf,0x99,A,A]
+# N64: # fixup A - offset: 0, value: ($tmp0)@GOT_DISP, kind: fixup_Mips_GOT_DISP
+
+# O32-MICROMIPS: lw $25, %got($tmp0)($gp) # encoding: [0xff,0x3c,A,A]
+# O32-MICROMIPS: # fixup A - offset: 0, value: ($tmp0)@GOT, kind: fixup_MICROMIPS_GOT16
+# O32-MICROMIPS: addiu $25, $25, %lo($tmp0) # encoding: [0x33,0x39,A,A]
+# O32-MICROMIPS: # fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind: fixup_MICROMIPS_LO16
+
+# N32-MICROMIPS: lw $25, %got_disp($tmp0)($gp) # encoding: [0xff,0x3c,A,A]
+# N32-MICROMIPS: # fixup A - offset: 0, value: ($tmp0)@GOT_DISP, kind: fixup_MICROMIPS_GOT_DISP
+
+# N64-MICROMIPS: ld $25, %got_disp($tmp0)($gp) # encoding: [0xdf,0x99,A,A]
+# N64-MICROMIPS: # fixup A - offset: 0, value: ($tmp0)@GOT_DISP, kind: fixup_MICROMIPS_GOT_DISP
+
+# NORMAL: jalr $25 # encoding: [0x03,0x20,0xf8,0x09]
+# MICROMIPS: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c]
+# ALL: nop # encoding: [0x00,0x00,0x00,0x00]
diff --git a/test/MC/Mips/instalias-imm-expanding.s b/test/MC/Mips/instalias-imm-expanding.s
new file mode 100644
index 000000000000..b3667ef8bf81
--- /dev/null
+++ b/test/MC/Mips/instalias-imm-expanding.s
@@ -0,0 +1,273 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -show-encoding | FileCheck %s
+
+
+ .text
+text_label:
+
+ add $4, -0x80000000
+# CHECK: lui $1, 32768 # encoding: [0x00,0x80,0x01,0x3c]
+# CHECK: add $4, $4, $1 # encoding: [0x20,0x20,0x81,0x00]
+ add $4, -0x8001
+# CHECK: lui $1, 65535 # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori $1, $1, 32767 # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK: add $4, $4, $1 # encoding: [0x20,0x20,0x81,0x00]
+ add $4, -0x8000
+# CHECK: addi $4, $4, -32768 # encoding: [0x00,0x80,0x84,0x20]
+ add $4, 0
+# CHECK: addi $4, $4, 0 # encoding: [0x00,0x00,0x84,0x20]
+ add $4, 0xFFFF
+# CHECK: ori $1, $zero, 65535 # encoding: [0xff,0xff,0x01,0x34]
+# CHECK: add $4, $4, $1 # encoding: [0x20,0x20,0x81,0x00]
+ add $4, 0x10000
+# CHECK: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: add $4, $4, $1 # encoding: [0x20,0x20,0x81,0x00]
+ add $4, 0xFFFFFFFF
+# CHECK: addiu $1, $zero, -1 # encoding: [0xff,0xff,0x01,0x24]
+# CHECK: add $4, $4, $1 # encoding: [0x20,0x20,0x81,0x00]
+
+ add $4, $5, -0x80000000
+# CHECK: lui $4, 32768 # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: add $4, $4, $5 # encoding: [0x20,0x20,0x85,0x00]
+ add $4, $5, -0x8001
+# CHECK: lui $4, 65535 # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori $4, $4, 32767 # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: add $4, $4, $5 # encoding: [0x20,0x20,0x85,0x00]
+ add $4, $5, -0x8000
+# CHECK: addi $4, $5, -32768 # encoding: [0x00,0x80,0xa4,0x20]
+ add $4, $5, 0
+# CHECK: addi $4, $5, 0 # encoding: [0x00,0x00,0xa4,0x20]
+ add $4, $5, 0xFFFF
+# CHECK: ori $4, $zero, 65535 # encoding: [0xff,0xff,0x04,0x34]
+# CHECK: add $4, $4, $5 # encoding: [0x20,0x20,0x85,0x00]
+ add $4, $5, 0x10000
+# CHECK: lui $4, 1 # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: add $4, $4, $5 # encoding: [0x20,0x20,0x85,0x00]
+ add $4, $5, 0xFFFFFFFF
+# CHECK: addiu $4, $zero, -1 # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: add $4, $4, $5 # encoding: [0x20,0x20,0x85,0x00]
+
+ addu $4, -0x80000000
+# CHECK: lui $1, 32768 # encoding: [0x00,0x80,0x01,0x3c]
+# CHECK: addu $4, $4, $1 # encoding: [0x21,0x20,0x81,0x00]
+ addu $4, -0x8001
+# CHECK: lui $1, 65535 # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori $1, $1, 32767 # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK: addu $4, $4, $1 # encoding: [0x21,0x20,0x81,0x00]
+ addu $4, -0x8000
+# CHECK: addiu $4, $4, -32768 # encoding: [0x00,0x80,0x84,0x24]
+ addu $4, 0
+# CHECK: addiu $4, $4, 0 # encoding: [0x00,0x00,0x84,0x24]
+ addu $4, 0xFFFF
+# CHECK: ori $1, $zero, 65535 # encoding: [0xff,0xff,0x01,0x34]
+# CHECK: addu $4, $4, $1 # encoding: [0x21,0x20,0x81,0x00]
+ addu $4, 0x10000
+# CHECK: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addu $4, $4, $1 # encoding: [0x21,0x20,0x81,0x00]
+ addu $4, 0xFFFFFFFF
+# CHECK: addiu $1, $zero, -1 # encoding: [0xff,0xff,0x01,0x24]
+# CHECK: addu $4, $4, $1 # encoding: [0x21,0x20,0x81,0x00]
+
+ addu $4, $5, -0x80000000
+# CHECK: lui $4, 32768 # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: addu $4, $4, $5 # encoding: [0x21,0x20,0x85,0x00]
+ addu $4, $5, -0x8001
+# CHECK: lui $4, 65535 # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori $4, $4, 32767 # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: addu $4, $4, $5 # encoding: [0x21,0x20,0x85,0x00]
+ addu $4, $5, -0x8000
+# CHECK: addiu $4, $5, -32768 # encoding: [0x00,0x80,0xa4,0x24]
+ addu $4, $5, 0
+# CHECK: addiu $4, $5, 0 # encoding: [0x00,0x00,0xa4,0x24]
+ addu $4, $5, 0xFFFF
+# CHECK: ori $4, $zero, 65535 # encoding: [0xff,0xff,0x04,0x34]
+# CHECK: addu $4, $4, $5 # encoding: [0x21,0x20,0x85,0x00]
+ addu $4, $5, 0x10000
+# CHECK: lui $4, 1 # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: addu $4, $4, $5 # encoding: [0x21,0x20,0x85,0x00]
+ addu $4, $5, 0xFFFFFFFF
+# CHECK: addiu $4, $zero, -1 # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: addu $4, $4, $5 # encoding: [0x21,0x20,0x85,0x00]
+
+ and $4, -0x80000000
+# CHECK: lui $1, 32768 # encoding: [0x00,0x80,0x01,0x3c]
+# CHECK: and $4, $4, $1 # encoding: [0x24,0x20,0x81,0x00]
+ and $4, -0x8001
+# CHECK: lui $1, 65535 # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori $1, $1, 32767 # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK: and $4, $4, $1 # encoding: [0x24,0x20,0x81,0x00]
+ and $4, -0x8000
+# CHECK: addiu $1, $zero, -32768 # encoding: [0x00,0x80,0x01,0x24]
+ and $4, 0
+# CHECK: andi $4, $4, 0 # encoding: [0x00,0x00,0x84,0x30]
+ and $4, 0xFFFF
+# CHECK: andi $4, $4, 65535 # encoding: [0xff,0xff,0x84,0x30]
+ and $4, 0x10000
+# CHECK: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: and $4, $4, $1 # encoding: [0x24,0x20,0x81,0x00]
+ and $4, 0xFFFFFFFF
+# CHECK: addiu $1, $zero, -1 # encoding: [0xff,0xff,0x01,0x24]
+# CHECK: and $4, $4, $1 # encoding: [0x24,0x20,0x81,0x00]
+
+ and $4, $5, -0x80000000
+# CHECK: lui $4, 32768 # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: and $4, $4, $5 # encoding: [0x24,0x20,0x85,0x00]
+ and $4, $5, -0x8001
+# CHECK: lui $4, 65535 # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori $4, $4, 32767 # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: and $4, $4, $5 # encoding: [0x24,0x20,0x85,0x00]
+ and $4, $5, -0x8000
+# CHECK: addiu $4, $zero, -32768 # encoding: [0x00,0x80,0x04,0x24]
+# CHECK: and $4, $4, $5 # encoding: [0x24,0x20,0x85,0x00]
+ and $4, $5, 0
+# CHECK: andi $4, $5, 0 # encoding: [0x00,0x00,0xa4,0x30]
+ and $4, $5, 0xFFFF
+# CHECK: andi $4, $5, 65535 # encoding: [0xff,0xff,0xa4,0x30]
+ and $4, $5, 0x10000
+# CHECK: lui $4, 1 # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: and $4, $4, $5 # encoding: [0x24,0x20,0x85,0x00]
+ and $4, $5, 0xFFFFFFFF
+# CHECK: addiu $4, $zero, -1 # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: and $4, $4, $5 # encoding: [0x24,0x20,0x85,0x00]
+
+ nor $4, $5, 0
+# CHECK: addiu $4, $zero, 0 # encoding: [0x00,0x00,0x04,0x24]
+# CHECK: nor $4, $4, $5 # encoding: [0x27,0x20,0x85,0x00]
+ nor $4, $5, 1
+# CHECK: addiu $4, $zero, 1 # encoding: [0x01,0x00,0x04,0x24]
+# CHECK: nor $4, $4, $5 # encoding: [0x27,0x20,0x85,0x00]
+ nor $4, $5, 0x8000
+# CHECK: ori $4, $zero, 32768 # encoding: [0x00,0x80,0x04,0x34]
+# CHECK: nor $4, $4, $5 # encoding: [0x27,0x20,0x85,0x00]
+ nor $4, $5, -0x8000
+# CHECK: addiu $4, $zero, -32768 # encoding: [0x00,0x80,0x04,0x24]
+# CHECK: nor $4, $4, $5 # encoding: [0x27,0x20,0x85,0x00]
+ nor $4, $5, 0x10000
+# CHECK: lui $4, 1 # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: nor $4, $4, $5 # encoding: [0x27,0x20,0x85,0x00]
+ nor $4, $5, 0x1a5a5
+# CHECK: lui $4, 1 # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: ori $4, $4, 42405 # encoding: [0xa5,0xa5,0x84,0x34]
+# CHECK: nor $4, $4, $5 # encoding: [0x27,0x20,0x85,0x00]
+
+ or $4, -0x80000000
+# CHECK: lui $1, 32768 # encoding: [0x00,0x80,0x01,0x3c]
+# CHECK: or $4, $4, $1 # encoding: [0x25,0x20,0x81,0x00]
+ or $4, -0x8001
+# CHECK: lui $1, 65535 # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori $1, $1, 32767 # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK: or $4, $4, $1 # encoding: [0x25,0x20,0x81,0x00]
+ or $4, -0x8000
+# CHECK: addiu $1, $zero, -32768 # encoding: [0x00,0x80,0x01,0x24]
+# CHECK: or $4, $4, $1 # encoding: [0x25,0x20,0x81,0x00]
+ or $4, 0
+# CHECK: ori $4, $4, 0 # encoding: [0x00,0x00,0x84,0x34]
+ or $4, 0xFFFF
+# CHECK: ori $4, $4, 65535 # encoding: [0xff,0xff,0x84,0x34]
+ or $4, 0x10000
+# CHECK: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: or $4, $4, $1 # encoding: [0x25,0x20,0x81,0x00]
+ or $4, 0xFFFFFFFF
+# CHECK: addiu $1, $zero, -1 # encoding: [0xff,0xff,0x01,0x24]
+# CHECK: or $4, $4, $1 # encoding: [0x25,0x20,0x81,0x00]
+
+ or $4, $5, -0x80000000
+# CHECK: lui $4, 32768 # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: or $4, $4, $5 # encoding: [0x25,0x20,0x85,0x00]
+ or $4, $5, -0x8001
+# CHECK: lui $4, 65535 # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori $4, $4, 32767 # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: or $4, $4, $5 # encoding: [0x25,0x20,0x85,0x00]
+ or $4, $5, -0x8000
+# CHECK: addiu $4, $zero, -32768 # encoding: [0x00,0x80,0x04,0x24]
+# CHECK: or $4, $4, $5 # encoding: [0x25,0x20,0x85,0x00]
+ or $4, $5, 0
+# CHECK: ori $4, $5, 0 # encoding: [0x00,0x00,0xa4,0x34]
+ or $4, $5, 0xFFFF
+# CHECK: ori $4, $5, 65535 # encoding: [0xff,0xff,0xa4,0x34]
+ or $4, $5, 0x10000
+# CHECK: lui $4, 1 # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: or $4, $4, $5 # encoding: [0x25,0x20,0x85,0x00]
+ or $4, $5, 0xFFFFFFFF
+# CHECK: addiu $4, $zero, -1 # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: or $4, $4, $5 # encoding: [0x25,0x20,0x85,0x00]
+
+ slt $4, $5, -0x80000000
+# CHECK: lui $4, 32768 # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: slt $4, $4, $5 # encoding: [0x2a,0x20,0x85,0x00]
+ slt $4, $5, -0x8001
+# CHECK: lui $4, 65535 # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori $4, $4, 32767 # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: slt $4, $4, $5 # encoding: [0x2a,0x20,0x85,0x00]
+ slt $4, $5, -0x8000
+# CHECK: slti $4, $5, -32768 # encoding: [0x00,0x80,0xa4,0x28]
+ slt $4, $5, 0
+# CHECK: slti $4, $5, 0 # encoding: [0x00,0x00,0xa4,0x28]
+ slt $4, $5, 0xFFFF
+# CHECK: ori $4, $zero, 65535 # encoding: [0xff,0xff,0x04,0x34]
+ slt $4, $5, 0x10000
+# CHECK: lui $4, 1 # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: slt $4, $4, $5 # encoding: [0x2a,0x20,0x85,0x00]
+ slt $4, $5, 0xFFFFFFFF
+# CHECK: addiu $4, $zero, -1 # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: slt $4, $4, $5 # encoding: [0x2a,0x20,0x85,0x00]
+
+ sltu $4, $5, -0x80000000
+# CHECK: lui $4, 32768 # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: sltu $4, $4, $5 # encoding: [0x2b,0x20,0x85,0x00]
+ sltu $4, $5, -0x8001
+# CHECK: lui $4, 65535 # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori $4, $4, 32767 # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: sltu $4, $4, $5 # encoding: [0x2b,0x20,0x85,0x00]
+ sltu $4, $5, -0x8000
+# CHECK: sltiu $4, $5, -32768 # encoding: [0x00,0x80,0xa4,0x2c]
+ sltu $4, $5, 0
+# CHECK: sltiu $4, $5, 0 # encoding: [0x00,0x00,0xa4,0x2c]
+ sltu $4, $5, 0xFFFF
+# CHECK: ori $4, $zero, 65535 # encoding: [0xff,0xff,0x04,0x34]
+ sltu $4, $5, 0x10000
+# CHECK: lui $4, 1 # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: sltu $4, $4, $5 # encoding: [0x2b,0x20,0x85,0x00]
+ sltu $4, $5, 0xFFFFFFFF
+# CHECK: addiu $4, $zero, -1 # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: sltu $4, $4, $5 # encoding: [0x2b,0x20,0x85,0x00]
+
+ xor $4, -0x80000000
+# CHECK: lui $1, 32768 # encoding: [0x00,0x80,0x01,0x3c]
+# CHECK: xor $4, $4, $1 # encoding: [0x26,0x20,0x81,0x00]
+ xor $4, -0x8001
+# CHECK: lui $1, 65535 # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori $1, $1, 32767 # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK: xor $4, $4, $1 # encoding: [0x26,0x20,0x81,0x00]
+ xor $4, -0x8000
+# CHECK: addiu $1, $zero, -32768 # encoding: [0x00,0x80,0x01,0x24]
+ xor $4, 0
+# CHECK: xori $4, $4, 0 # encoding: [0x00,0x00,0x84,0x38]
+ xor $4, 0xFFFF
+# CHECK: xori $4, $4, 65535 # encoding: [0xff,0xff,0x84,0x38]
+ xor $4, 0x10000
+# CHECK: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: xor $4, $4, $1 # encoding: [0x26,0x20,0x81,0x00]
+ xor $4, 0xFFFFFFFF
+# CHECK: addiu $1, $zero, -1 # encoding: [0xff,0xff,0x01,0x24]
+# CHECK: xor $4, $4, $1 # encoding: [0x26,0x20,0x81,0x00]
+
+ xor $4, $5, -0x80000000
+# CHECK: lui $4, 32768 # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: xor $4, $4, $5 # encoding: [0x26,0x20,0x85,0x00]
+ xor $4, $5, -0x8001
+# CHECK: lui $4, 65535 # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori $4, $4, 32767 # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: xor $4, $4, $5 # encoding: [0x26,0x20,0x85,0x00]
+ xor $4, $5, -0x8000
+# CHECK: addiu $4, $zero, -32768 # encoding: [0x00,0x80,0x04,0x24]
+# CHECK: xor $4, $4, $5 # encoding: [0x26,0x20,0x85,0x00]
+ xor $4, $5, 0
+# CHECK: xori $4, $5, 0 # encoding: [0x00,0x00,0xa4,0x38]
+ xor $4, $5, 0xFFFF
+# CHECK: xori $4, $5, 65535 # encoding: [0xff,0xff,0xa4,0x38]
+ xor $4, $5, 0x10000
+# CHECK: lui $4, 1 # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: xor $4, $4, $5 # encoding: [0x26,0x20,0x85,0x00]
+ xor $4, $5, 0xFFFFFFFF
+# CHECK: addiu $4, $zero, -1 # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: xor $4, $4, $5 # encoding: [0x26,0x20,0x85,0x00]
diff --git a/test/MC/Mips/macro-bcc-imm-bad.s b/test/MC/Mips/macro-bcc-imm-bad.s
new file mode 100644
index 000000000000..bcf08bdc34d5
--- /dev/null
+++ b/test/MC/Mips/macro-bcc-imm-bad.s
@@ -0,0 +1,12 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=ALL
+
+ .text
+ .set noat
+foo:
+ blt $a2, 16, foo # ALL: :[[@LINE]]:5: error: pseudo-instruction requires $at, which is not available
+ .set at
+ .set noreorder
+ .set nomacro
+ blt $a2, 16, foo # ALL: :[[@LINE]]:5: warning: macro instruction expanded into multiple instructions
+ # ALL-NOT: :[[@LINE-1]]:5: warning: macro instruction expanded into multiple instructions
diff --git a/test/MC/Mips/macro-bcc-imm.s b/test/MC/Mips/macro-bcc-imm.s
new file mode 100644
index 000000000000..fbc4662d6833
--- /dev/null
+++ b/test/MC/Mips/macro-bcc-imm.s
@@ -0,0 +1,69 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -show-encoding 2>&1 | \
+# RUN: FileCheck %s --check-prefix=ALL
+
+ .text
+foo: # ALL-LABEL: foo:
+ blt $a2, 16, foo # ALL: addiu $1, $zero, 16
+ # ALL: slt $1, $6, $1
+ # ALL: bnez $1, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ ble $a2, 16, foo # ALL: addiu $1, $zero, 16
+ # ALL: slt $1, $1, $6
+ # ALL: beqz $1, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ bge $a2, 32767, foo # ALL: addiu $1, $zero, 32767
+ # ALL: slt $1, $6, $1
+ # ALL: beqz $1, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ bgt $a2, 32768, foo # ALL: ori $1, $zero, 32768
+ # ALL: slt $1, $1, $6
+ # ALL: bnez $1, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ bltu $a2, 16, foo # ALL: addiu $1, $zero, 16
+ # ALL: sltu $1, $6, $1
+ # ALL: bnez $1, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ bleu $a2, 16, foo # ALL: addiu $1, $zero, 16
+ # ALL: sltu $1, $1, $6
+ # ALL: beqz $1, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ bgeu $a2, 32767, foo # ALL: addiu $1, $zero, 32767
+ # ALL: sltu $1, $6, $1
+ # ALL: beqz $1, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ bgtu $a2, 32768, foo # ALL: ori $1, $zero, 32768
+ # ALL: sltu $1, $1, $6
+ # ALL: bnez $1, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ bltl $a2, 16, foo # ALL: addiu $1, $zero, 16
+ # ALL: slt $1, $6, $1
+ # ALL: bnel $1, $zero, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ blel $a2, 16, foo # ALL: addiu $1, $zero, 16
+ # ALL: slt $1, $1, $6
+ # ALL: beql $1, $zero, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ bgel $a2, 32767, foo # ALL: addiu $1, $zero, 32767
+ # ALL: slt $1, $6, $1
+ # ALL: beql $1, $zero, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ bgtl $a2, 32768, foo # ALL: ori $1, $zero, 32768
+ # ALL: slt $1, $1, $6
+ # ALL: bnel $1, $zero, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ bltul $a2, 16, foo # ALL: addiu $1, $zero, 16
+ # ALL: sltu $1, $6, $1
+ # ALL: bnel $1, $zero, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ bleul $a2, 16, foo # ALL: addiu $1, $zero, 16
+ # ALL: sltu $1, $1, $6
+ # ALL: beql $1, $zero, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ bgeul $a2, 32767, foo # ALL: addiu $1, $zero, 32767
+ # ALL: sltu $1, $6, $1
+ # ALL: beql $1, $zero, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+ bgtul $a2, 65536, foo # ALL: lui $1, 1
+ # ALL: sltu $1, $1, $6
+ # ALL: bnel $1, $zero, foo
+ # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
diff --git a/test/MC/Mips/macro-ddiv-bad.s b/test/MC/Mips/macro-ddiv-bad.s
new file mode 100644
index 000000000000..350a0fbaeda6
--- /dev/null
+++ b/test/MC/Mips/macro-ddiv-bad.s
@@ -0,0 +1,18 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+
+ .text
+ ddiv $25, $11
+ # R6: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+
+ ddiv $25, $0
+ # NOT-R6: :[[@LINE-1]]:3: warning: division by zero
+
+ ddiv $0,$0
+ # NOT-R6: :[[@LINE-1]]:3: warning: dividing zero by zero
diff --git a/test/MC/Mips/macro-ddiv.s b/test/MC/Mips/macro-ddiv.s
new file mode 100644
index 000000000000..99bc5450d015
--- /dev/null
+++ b/test/MC/Mips/macro-ddiv.s
@@ -0,0 +1,85 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r2 | \
+# RUN: FileCheck %s --check-prefix=CHECK-NOTRAP
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r2 \
+# RUN: -mattr=+use-tcc-in-div | FileCheck %s --check-prefix=CHECK-TRAP
+
+ ddiv $25, $11
+# CHECK-NOTRAP: bne $11, $zero, 8 # encoding: [0x15,0x60,0x00,0x02]
+# CHECK-NOTRAP: ddiv $zero, $25, $11 # encoding: [0x03,0x2b,0x00,0x1e]
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-NOTRAP: bne $11, $1, 20 # encoding: [0x15,0x61,0x00,0x05]
+# CHECK-NOTRAP: addiu $1, $zero, 1 # encoding: [0x24,0x01,0x00,0x01]
+# CHECK-NOTRAP: dsll32 $1, $1, 31 # encoding: [0x00,0x01,0x0f,0xfc]
+# CHECK-NOTRAP: bne $25, $1, 8 # encoding: [0x17,0x21,0x00,0x02]
+# CHECK-NOTRAP: sll $zero, $zero, 0 # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-NOTRAP: break 6 # encoding: [0x00,0x06,0x00,0x0d]
+# CHECK-NOTRAP: mflo $25 # encoding: [0x00,0x00,0xc8,0x12]
+
+ ddiv $24,$12
+# CHECK-NOTRAP: bne $12, $zero, 8 # encoding: [0x15,0x80,0x00,0x02]
+# CHECK-NOTRAP: ddiv $zero, $24, $12 # encoding: [0x03,0x0c,0x00,0x1e]
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-NOTRAP: bne $12, $1, 20 # encoding: [0x15,0x81,0x00,0x05]
+# CHECK-NOTRAP: addiu $1, $zero, 1 # encoding: [0x24,0x01,0x00,0x01]
+# CHECK-NOTRAP: dsll32 $1, $1, 31 # encoding: [0x00,0x01,0x0f,0xfc]
+# CHECK-NOTRAP: bne $24, $1, 8 # encoding: [0x17,0x01,0x00,0x02]
+# CHECK-NOTRAP: sll $zero, $zero, 0 # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-NOTRAP: break 6 # encoding: [0x00,0x06,0x00,0x0d]
+# CHECK-NOTRAP: mflo $24 # encoding: [0x00,0x00,0xc0,0x12]
+
+ ddiv $25,$0
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+
+ ddiv $0,$9
+# CHECK-NOTRAP: bne $9, $zero, 8 # encoding: [0x15,0x20,0x00,0x02]
+# CHECK-NOTRAP: ddiv $zero, $zero, $9 # encoding: [0x00,0x09,0x00,0x1e]
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-NOTRAP: bne $9, $1, 20 # encoding: [0x15,0x21,0x00,0x05]
+# CHECK-NOTRAP: addiu $1, $zero, 1 # encoding: [0x24,0x01,0x00,0x01]
+# CHECK-NOTRAP: dsll32 $1, $1, 31 # encoding: [0x00,0x01,0x0f,0xfc]
+# CHECK-NOTRAP: bne $zero, $1, 8 # encoding: [0x14,0x01,0x00,0x02]
+# CHECK-NOTRAP: sll $zero, $zero, 0 # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-NOTRAP: break 6 # encoding: [0x00,0x06,0x00,0x0d]
+# CHECK-NOTRAP: mflo $zero # encoding: [0x00,0x00,0x00,0x12]
+
+ ddiv $0,$0
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+
+ ddiv $25,$11
+# CHECK-TRAP: teq $11, $zero, 7 # encoding: [0x01,0x60,0x01,0xf4]
+# CHECK-TRAP: ddiv $zero, $25, $11 # encoding: [0x03,0x2b,0x00,0x1e]
+# CHECK-TRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-TRAP: bne $11, $1, 12 # encoding: [0x15,0x61,0x00,0x03]
+# CHECK-TRAP: addiu $1, $zero, 1 # encoding: [0x24,0x01,0x00,0x01]
+# CHECK-TRAP: dsll32 $1, $1, 31 # encoding: [0x00,0x01,0x0f,0xfc]
+# CHECK-TRAP: teq $25, $1, 6 # encoding: [0x03,0x21,0x01,0xb4]
+# CHECK-TRAP: mflo $25 # encoding: [0x00,0x00,0xc8,0x12]
+
+ ddiv $24,$12
+# CHECK-TRAP: teq $12, $zero, 7 # encoding: [0x01,0x80,0x01,0xf4]
+# CHECK-TRAP: ddiv $zero, $24, $12 # encoding: [0x03,0x0c,0x00,0x1e]
+# CHECK-TRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-TRAP: bne $12, $1, 12 # encoding: [0x15,0x81,0x00,0x03]
+# CHECK-TRAP: addiu $1, $zero, 1 # encoding: [0x24,0x01,0x00,0x01]
+# CHECK-TRAP: dsll32 $1, $1, 31 # encoding: [0x00,0x01,0x0f,0xfc]
+# CHECK-TRAP: teq $24, $1, 6 # encoding: [0x03,0x01,0x01,0xb4]
+# CHECK-TRAP: mflo $24 # encoding: [0x00,0x00,0xc0,0x12]
+
+ ddiv $25,$0
+# CHECK-TRAP: teq $zero, $zero, 7 # encoding: [0x00,0x00,0x01,0xf4]
+
+ ddiv $0,$9
+# CHECK-TRAP: teq $9, $zero, 7 # encoding: [0x01,0x20,0x01,0xf4]
+# CHECK-TRAP: ddiv $zero, $zero, $9 # encoding: [0x00,0x09,0x00,0x1e]
+# CHECK-TRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-TRAP: bne $9, $1, 12 # encoding: [0x15,0x21,0x00,0x03]
+# CHECK-TRAP: addiu $1, $zero, 1 # encoding: [0x24,0x01,0x00,0x01]
+# CHECK-TRAP: dsll32 $1, $1, 31 # encoding: [0x00,0x01,0x0f,0xfc]
+# CHECK-TRAP: teq $zero, $1, 6 # encoding: [0x00,0x01,0x01,0xb4]
+# CHECK-TRAP: mflo $zero # encoding: [0x00,0x00,0x00,0x12]
+
+ ddiv $0,$0
+# CHECK-TRAP: teq $zero, $zero, 7 # encoding: [0x00,0x00,0x01,0xf4]
diff --git a/test/MC/Mips/macro-ddivu-bad.s b/test/MC/Mips/macro-ddivu-bad.s
new file mode 100644
index 000000000000..7a6c7e0bd528
--- /dev/null
+++ b/test/MC/Mips/macro-ddivu-bad.s
@@ -0,0 +1,18 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+
+ .text
+ ddivu $25, $11
+ # R6: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+
+ ddivu $25, $0
+ # NOT-R6: :[[@LINE-1]]:3: warning: division by zero
+
+ ddivu $0,$0
+ # NOT-R6: :[[@LINE-1]]:3: warning: dividing zero by zero
diff --git a/test/MC/Mips/macro-ddivu.s b/test/MC/Mips/macro-ddivu.s
new file mode 100644
index 000000000000..72d923802785
--- /dev/null
+++ b/test/MC/Mips/macro-ddivu.s
@@ -0,0 +1,59 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r2 | \
+# RUN: FileCheck %s --check-prefix=CHECK-NOTRAP
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r2 \
+# RUN: -mattr=+use-tcc-in-div | FileCheck %s --check-prefix=CHECK-TRAP
+
+ ddivu $25,$11
+# CHECK-NOTRAP: bne $11, $zero, 8 # encoding: [0x15,0x60,0x00,0x02]
+# CHECK-NOTRAP: ddivu $zero, $25, $11 # encoding: [0x03,0x2b,0x00,0x1f]
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $25 # encoding: [0x00,0x00,0xc8,0x12]
+
+ ddivu $24,$12
+# CHECK-NOTRAP: bne $12, $zero, 8 # encoding: [0x15,0x80,0x00,0x02]
+# CHECK-NOTRAP: ddivu $zero, $24, $12 # encoding: [0x03,0x0c,0x00,0x1f]
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $24 # encoding: [0x00,0x00,0xc0,0x12]
+
+ ddivu $25,$0
+# CHECK-NOTRAP: bne $zero, $zero, 8 # encoding: [0x14,0x00,0x00,0x02]
+# CHECK-NOTRAP: ddivu $zero, $25, $zero # encoding: [0x03,0x20,0x00,0x1f]
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $25 # encoding: [0x00,0x00,0xc8,0x12]
+
+ ddivu $0,$9
+# CHECK-NOTRAP: bne $9, $zero, 8 # encoding: [0x15,0x20,0x00,0x02]
+# CHECK-NOTRAP: ddivu $zero, $zero, $9 # encoding: [0x00,0x09,0x00,0x1f]
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $zero # encoding: [0x00,0x00,0x00,0x12]
+
+ ddivu $0,$0
+# CHECK-NOTRAP: bne $zero, $zero, 8 # encoding: [0x14,0x00,0x00,0x02]
+# CHECK-NOTRAP: ddivu $zero, $zero, $zero # encoding: [0x00,0x00,0x00,0x1f]
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $zero # encoding: [0x00,0x00,0x00,0x12]
+
+ ddivu $25, $11
+# CHECK-TRAP: teq $11, $zero, 7 # encoding: [0x01,0x60,0x01,0xf4]
+# CHECK-TRAP: ddivu $zero, $25, $11 # encoding: [0x03,0x2b,0x00,0x1f]
+# CHECK-TRAP: mflo $25 # encoding: [0x00,0x00,0xc8,0x12]
+
+ ddivu $24,$12
+# CHECK-TRAP: teq $12, $zero, 7 # encoding: [0x01,0x80,0x01,0xf4]
+# CHECK-TRAP: ddivu $zero, $24, $12 # encoding: [0x03,0x0c,0x00,0x1f]
+# CHECK-TRAP: mflo $24 # encoding: [0x00,0x00,0xc0,0x12]
+
+ ddivu $25,$0
+# CHECK-TRAP: teq $zero, $zero, 7 # encoding: [0x00,0x00,0x01,0xf4]
+# CHECK-TRAP: ddivu $zero, $25, $zero # encoding: [0x03,0x20,0x00,0x1f]
+# CHECK-TRAP: mflo $25 # encoding: [0x00,0x00,0xc8,0x12]
+
+ ddivu $0,$9
+# CHECK-TRAP: teq $9, $zero, 7 # encoding: [0x01,0x20,0x01,0xf4]
+# CHECK-TRAP: ddivu $zero, $zero, $9 # encoding: [0x00,0x09,0x00,0x1f]
+# CHECK-TRAP: mflo $zero # encoding: [0x00,0x00,0x00,0x12]
+
+ ddivu $0,$0
+# CHECK-TRAP: teq $zero, $zero, 7 # encoding: [0x00,0x00,0x01,0xf4]
+# CHECK-TRAP: ddivu $zero, $zero, $zero # encoding: [0x00,0x00,0x00,0x1f]
+# CHECK-TRAP: mflo $zero # encoding: [0x00,0x00,0x00,0x12]
diff --git a/test/MC/Mips/macro-div-bad.s b/test/MC/Mips/macro-div-bad.s
new file mode 100644
index 000000000000..086e8b441885
--- /dev/null
+++ b/test/MC/Mips/macro-div-bad.s
@@ -0,0 +1,18 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+
+ .text
+ div $25, $11
+ # R6: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+
+ div $25, $0
+ # NOT-R6: :[[@LINE-1]]:3: warning: division by zero
+
+ div $0,$0
+ # NOT-R6: :[[@LINE-1]]:3: warning: dividing zero by zero
diff --git a/test/MC/Mips/macro-div.s b/test/MC/Mips/macro-div.s
new file mode 100644
index 000000000000..9efd6e19db15
--- /dev/null
+++ b/test/MC/Mips/macro-div.s
@@ -0,0 +1,64 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 | \
+# RUN: FileCheck %s --check-prefix=CHECK-NOTRAP
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 \
+# RUN: -mattr=+use-tcc-in-div | FileCheck %s --check-prefix=CHECK-TRAP
+
+ div $25,$11
+# CHECK-NOTRAP: bnez $11, 8 # encoding: [0x15,0x60,0x00,0x02]
+# CHECK-NOTRAP: div $zero, $25, $11 # encoding: [0x03,0x2b,0x00,0x1a]
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-NOTRAP: bne $11, $1, 16 # encoding: [0x15,0x61,0x00,0x04]
+# CHECK-NOTRAP: lui $1, 32768 # encoding: [0x3c,0x01,0x80,0x00]
+# CHECK-NOTRAP: bne $25, $1, 8 # encoding: [0x17,0x21,0x00,0x02]
+# CHECK-NOTRAP: nop # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-NOTRAP: break 6 # encoding: [0x00,0x06,0x00,0x0d]
+# CHECK-NOTRAP: mflo $25 # encoding: [0x00,0x00,0xc8,0x12]
+
+ div $24,$12
+# CHECK-NOTRAP: bnez $12, 8 # encoding: [0x15,0x80,0x00,0x02]
+# CHECK-NOTRAP: div $zero, $24, $12 # encoding: [0x03,0x0c,0x00,0x1a]
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-NOTRAP: bne $12, $1, 16 # encoding: [0x15,0x81,0x00,0x04]
+# CHECK-NOTRAP: lui $1, 32768 # encoding: [0x3c,0x01,0x80,0x00]
+# CHECK-NOTRAP: bne $24, $1, 8 # encoding: [0x17,0x01,0x00,0x02]
+# CHECK-NOTRAP: nop # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-NOTRAP: break 6 # encoding: [0x00,0x06,0x00,0x0d]
+# CHECK-NOTRAP: mflo $24 # encoding: [0x00,0x00,0xc0,0x12]
+
+ div $25,$0
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+
+ div $0,$9
+# CHECK-NOTRAP: div $zero, $zero, $9 # encoding: [0x00,0x09,0x00,0x1a]
+
+ div $0,$0
+# CHECK-NOTRAP: div $zero, $zero, $zero # encoding: [0x00,0x00,0x00,0x1a]
+
+ div $25, $11
+# CHECK-TRAP: teq $11, $zero, 7 # encoding: [0x01,0x60,0x01,0xf4]
+# CHECK-TRAP: div $zero, $25, $11 # encoding: [0x03,0x2b,0x00,0x1a]
+# CHECK-TRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-TRAP: bne $11, $1, 8 # encoding: [0x15,0x61,0x00,0x02]
+# CHECK-TRAP: lui $1, 32768 # encoding: [0x3c,0x01,0x80,0x00]
+# CHECK-TRAP: teq $25, $1, 6 # encoding: [0x03,0x21,0x01,0xb4]
+# CHECK-TRAP: mflo $25 # encoding: [0x00,0x00,0xc8,0x12]
+
+ div $24,$12
+# CHECK-TRAP: teq $12, $zero, 7 # encoding: [0x01,0x80,0x01,0xf4]
+# CHECK-TRAP: div $zero, $24, $12 # encoding: [0x03,0x0c,0x00,0x1a]
+# CHECK-TRAP: addiu $1, $zero, -1 # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-TRAP: bne $12, $1, 8 # encoding: [0x15,0x81,0x00,0x02]
+# CHECK-TRAP: lui $1, 32768 # encoding: [0x3c,0x01,0x80,0x00]
+# CHECK-TRAP: teq $24, $1, 6 # encoding: [0x03,0x01,0x01,0xb4]
+# CHECK-TRAP: mflo $24 # encoding: [0x00,0x00,0xc0,0x12]
+
+ div $25,$0
+# CHECK-TRAP: teq $zero, $zero, 7 # encoding: [0x00,0x00,0x01,0xf4]
+
+ div $0,$9
+# CHECK-TRAP: div $zero, $zero, $9 # encoding: [0x00,0x09,0x00,0x1a]
+
+ div $0,$0
+# CHECK-TRAP: div $zero, $zero, $zero # encoding: [0x00,0x00,0x00,0x1a]
diff --git a/test/MC/Mips/macro-divu-bad.s b/test/MC/Mips/macro-divu-bad.s
new file mode 100644
index 000000000000..45cef1f81820
--- /dev/null
+++ b/test/MC/Mips/macro-divu-bad.s
@@ -0,0 +1,18 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+
+ .text
+ divu $25, $11
+ # R6: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+
+ divu $25, $0
+ # NOT-R6: :[[@LINE-1]]:3: warning: division by zero
+
+ divu $0,$0
+ # NOT-R6: :[[@LINE-1]]:3: warning: dividing zero by zero
diff --git a/test/MC/Mips/macro-divu.s b/test/MC/Mips/macro-divu.s
new file mode 100644
index 000000000000..95630d34bd16
--- /dev/null
+++ b/test/MC/Mips/macro-divu.s
@@ -0,0 +1,49 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 | \
+# RUN: FileCheck %s --check-prefix=CHECK-NOTRAP
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 \
+# RUN: -mattr=+use-tcc-in-div | FileCheck %s --check-prefix=CHECK-TRAP
+
+ divu $25,$11
+# CHECK-NOTRAP: bnez $11, 8 # encoding: [0x15,0x60,0x00,0x02]
+# CHECK-NOTRAP: divu $zero, $25, $11 # encoding: [0x03,0x2b,0x00,0x1b]
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $25 # encoding: [0x00,0x00,0xc8,0x12]
+
+ divu $24,$12
+# CHECK-NOTRAP: bnez $12, 8 # encoding: [0x15,0x80,0x00,0x02]
+# CHECK-NOTRAP: divu $zero, $24, $12 # encoding: [0x03,0x0c,0x00,0x1b]
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $24 # encoding: [0x00,0x00,0xc0,0x12]
+
+ divu $25,$0
+# CHECK-NOTRAP: bnez $zero, 8 # encoding: [0x14,0x00,0x00,0x02]
+# CHECK-NOTRAP: divu $zero, $25, $zero # encoding: [0x03,0x20,0x00,0x1b]
+# CHECK-NOTRAP: break 7 # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $25 # encoding: [0x00,0x00,0xc8,0x12]
+
+ divu $0,$9
+# CHECK-NOTRAP: divu $zero, $zero, $9 # encoding: [0x00,0x09,0x00,0x1b]
+
+ divu $0,$0
+# CHECK-NOTRAP: divu $zero, $zero, $zero # encoding: [0x00,0x00,0x00,0x1b]
+
+ divu $25, $11
+# CHECK-TRAP: teq $11, $zero, 7 # encoding: [0x01,0x60,0x01,0xf4]
+# CHECK-TRAP: divu $zero, $25, $11 # encoding: [0x03,0x2b,0x00,0x1b]
+# CHECK-TRAP: mflo $25 # encoding: [0x00,0x00,0xc8,0x12]
+
+ divu $24,$12
+# CHECK-TRAP: teq $12, $zero, 7 # encoding: [0x01,0x80,0x01,0xf4]
+# CHECK-TRAP: divu $zero, $24, $12 # encoding: [0x03,0x0c,0x00,0x1b]
+# CHECK-TRAP: mflo $24 # encoding: [0x00,0x00,0xc0,0x12]
+
+ divu $25,$0
+# CHECK-TRAP: teq $zero, $zero, 7 # encoding: [0x00,0x00,0x01,0xf4]
+# CHECK-TRAP: divu $zero, $25, $zero # encoding: [0x03,0x20,0x00,0x1b]
+# CHECK-TRAP: mflo $25 # encoding: [0x00,0x00,0xc8,0x12]
+
+ divu $0,$9
+# CHECK-TRAP: divu $zero, $zero, $9 # encoding: [0x00,0x09,0x00,0x1b]
+
+ divu $0,$0
+# CHECK-TRAP: divu $zero, $zero, $zero # encoding: [0x00,0x00,0x00,0x1b]
diff --git a/test/MC/Mips/macro-dla.s b/test/MC/Mips/macro-dla.s
new file mode 100644
index 000000000000..e3b558e9e514
--- /dev/null
+++ b/test/MC/Mips/macro-dla.s
@@ -0,0 +1,707 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 | \
+# RUN: FileCheck %s
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 | \
+# RUN: FileCheck %s
+
+bits_32_to_47_0x0000: # CHECK-LABEL: bits_32_to_47_0x0000:
+dla $5, 0x0000000000000001 # CHECK: daddiu $5, $zero, 1 # encoding: [0x64,0x05,0x00,0x01]
+dla $5, 0x0000000000000002 # CHECK: daddiu $5, $zero, 2 # encoding: [0x64,0x05,0x00,0x02]
+dla $5, 0x0000000000004000 # CHECK: daddiu $5, $zero, 16384 # encoding: [0x64,0x05,0x40,0x00]
+dla $5, 0x0000000000008000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+dla $5, 0x00000000ffff8000 # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x00000000ffffc000 # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 49152 # encoding: [0x34,0xa5,0xc0,0x00]
+dla $5, 0x00000000fffffffe # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 65534 # encoding: [0x34,0xa5,0xff,0xfe]
+dla $5, 0x00000000ffffffff # CHECK: lui $5, 65535 # encoding: [0x3c,0x05,0xff,0xff]
+ # CHECK: dsrl32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3e]
+dla $5, 0x0000000000010000 # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+dla $5, 0x0000000000020000 # CHECK: lui $5, 2 # encoding: [0x3c,0x05,0x00,0x02]
+dla $5, 0x0000000040000000 # CHECK: lui $5, 16384 # encoding: [0x3c,0x05,0x40,0x00]
+dla $5, 0x0000000080000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x00000000c0000000 # CHECK: ori $5, $zero, 49152 # encoding: [0x34,0x05,0xc0,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x00000000fffe0000 # CHECK: ori $5, $zero, 65534 # encoding: [0x34,0x05,0xff,0xfe]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x00000000ffff0000 # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000000000010001 # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000000020001 # CHECK: lui $5, 2 # encoding: [0x3c,0x05,0x00,0x02]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000040000001 # CHECK: lui $5, 16384 # encoding: [0x3c,0x05,0x40,0x00]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000080000001 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000000010002 # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000000020002 # CHECK: lui $5, 2 # encoding: [0x3c,0x05,0x00,0x02]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000040000002 # CHECK: lui $5, 16384 # encoding: [0x3c,0x05,0x40,0x00]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000080000002 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000000014000 # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000000024000 # CHECK: lui $5, 2 # encoding: [0x3c,0x05,0x00,0x02]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000040004000 # CHECK: lui $5, 16384 # encoding: [0x3c,0x05,0x40,0x00]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000080004000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000000018000 # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000000028000 # CHECK: lui $5, 2 # encoding: [0x3c,0x05,0x00,0x02]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000040008000 # CHECK: lui $5, 16384 # encoding: [0x3c,0x05,0x40,0x00]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000080008000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x00000000c0008000 # CHECK: ori $5, $zero, 49152 # encoding: [0x34,0x05,0xc0,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x00000000fffe8000 # CHECK: ori $5, $zero, 65534 # encoding: [0x34,0x05,0xff,0xfe]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x00000000ffff8000 # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x0001: # CHECK-LABEL: bits_32_to_47_0x0001:
+dla $5, 0x0000000100000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 17 # encoding: [0x00,0x05,0x2c,0x78]
+dla $5, 0x0000000100000001 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000100000002 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000100004000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000100008000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000100010000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000000100010001 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000100010002 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000100014000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000100018000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000100020000 # CHECK: ori $5, $zero, 32769 # encoding: [0x34,0x05,0x80,0x01]
+ # CHECK: dsll $5, $5, 17 # encoding: [0x00,0x05,0x2c,0x78]
+dla $5, 0x0000000100020001 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000100020002 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000100024000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000100028000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000140000000 # CHECK: ori $5, $zero, 40960 # encoding: [0x34,0x05,0xa0,0x00]
+ # CHECK: dsll $5, $5, 17 # encoding: [0x00,0x05,0x2c,0x78]
+dla $5, 0x0000000140000001 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000140000002 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000140004000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000140008000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000180000000 # CHECK: ori $5, $zero, 49152 # encoding: [0x34,0x05,0xc0,0x00]
+ # CHECK: dsll $5, $5, 17 # encoding: [0x00,0x05,0x2c,0x78]
+dla $5, 0x0000000180000001 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000180000002 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000180004000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000180008000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x0002: # CHECK-LABEL: bits_32_to_47_0x0002:
+dla $5, 0x0000000200000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 18 # encoding: [0x00,0x05,0x2c,0xb8]
+dla $5, 0x0000000200000001 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000200000002 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000200004000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000200008000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000200010000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000000200010001 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000200010002 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000200014000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000200018000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000200020000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000000200020001 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000200020002 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000200024000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000200028000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000240000000 # CHECK: ori $5, $zero, 36864 # encoding: [0x34,0x05,0x90,0x00]
+ # CHECK: dsll $5, $5, 18 # encoding: [0x00,0x05,0x2c,0xb8]
+dla $5, 0x0000000240000001 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000240000002 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000240004000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000240008000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000280000000 # CHECK: ori $5, $zero, 40960 # encoding: [0x34,0x05,0xa0,0x00]
+ # CHECK: dsll $5, $5, 18 # encoding: [0x00,0x05,0x2c,0xb8]
+dla $5, 0x0000000280000001 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000280000002 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000280004000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000280008000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x4000: # CHECK-LABEL: bits_32_to_47_0x4000:
+dla $5, 0x0000400000000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 31 # encoding: [0x00,0x05,0x2f,0xf8]
+dla $5, 0x0000400000000001 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000400000000002 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000400000004000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000400000008000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000400000010000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000400000010001 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000400000010002 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000400000014000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000400000018000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000400000020000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000400000020001 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000400000020002 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000400000024000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000400000028000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000400040000000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000400040000001 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000400040000002 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000400040004000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000400040008000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000400080000000 # CHECK: ori $5, $zero, 32769 # encoding: [0x34,0x05,0x80,0x01]
+ # CHECK: dsll $5, $5, 31 # encoding: [0x00,0x05,0x2f,0xf8]
+dla $5, 0x0000400080000001 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000400080000002 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000400080004000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000400080008000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x8000: # CHECK-LABEL: bits_32_to_47_0x8000:
+dla $5, 0x0000800000000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 32 # encoding: [0x00,0x05,0x28,0x3c]
+dla $5, 0x0000800000000001 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000800000000002 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000800000004000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000800000008000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000800000010000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000800000010001 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000800000010002 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000800000014000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000800000018000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000800000020000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000800000020001 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000800000020002 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000800000024000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000800000028000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000800040000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000800040000001 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000800040000002 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000800040004000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000800040008000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000800080000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000800080000001 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000800080000002 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000800080004000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000800080008000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+# Only test a few with bits 48-63 non-zero. It just adds an lui to the cases we've already done.
+dla $5, 0x0001800080008000 # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0002800080008000 # CHECK: lui $5, 2 # encoding: [0x3c,0x05,0x00,0x02]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x4000800080008000 # CHECK: lui $5, 16384 # encoding: [0x3c,0x05,0x40,0x00]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x8000800080008000 # CHECK: lui $5, 32768 # encoding: [0x3c,0x05,0x80,0x00]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+
+dla $5, 0x000000000($6) # CHECK: daddiu $5, $6, 0 # encoding: [0x64,0xc5,0x00,0x00]
+dla $5, 0x00000001($6) # CHECK: daddiu $5, $6, 1 # encoding: [0x64,0xc5,0x00,0x01]
+dla $5, 0x00000002($6) # CHECK: daddiu $5, $6, 2 # encoding: [0x64,0xc5,0x00,0x02]
+dla $5, 0x00004000($6) # CHECK: daddiu $5, $6, 16384 # encoding: [0x64,0xc5,0x40,0x00]
+dla $5, 0x00008000($6) # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xffffffff($6) # CHECK: lui $5, 65535 # encoding: [0x3c,0x05,0xff,0xff]
+ # CHECK: dsrl32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3e]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xfffffffe($6) # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 65534 # encoding: [0x34,0xa5,0xff,0xfe]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xffffc000($6) # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 49152 # encoding: [0x34,0xa5,0xc0,0x00]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xffff8000($6) # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+
+dla $5, 0x00010000($6) # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0x00020000($6) # CHECK: lui $5, 2 # encoding: [0x3c,0x05,0x00,0x02]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0x40000000($6) # CHECK: lui $5, 16384 # encoding: [0x3c,0x05,0x40,0x00]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0x80000000($6) # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xffff0000($6) # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xfffe0000($6) # CHECK: ori $5, $zero, 65534 # encoding: [0x34,0x05,0xff,0xfe]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xc0000000($6) # CHECK: ori $5, $zero, 49152 # encoding: [0x34,0x05,0xc0,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0x80000000($6) # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+
+dla $5, 0x00010001($6) # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: daddu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x2d]
+# There are no further interesting immediates.
+
+dla $6, 0x00000001($6) # CHECK: daddiu $6, $6, 1 # encoding: [0x64,0xc6,0x00,0x01]
+dla $6, 0x00000002($6) # CHECK: daddiu $6, $6, 2 # encoding: [0x64,0xc6,0x00,0x02]
+dla $6, 0x00004000($6) # CHECK: daddiu $6, $6, 16384 # encoding: [0x64,0xc6,0x40,0x00]
+dla $6, 0x00008000($6) # CHECK: ori $1, $zero, 32768 # encoding: [0x34,0x01,0x80,0x00]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xffffffff($6) # CHECK: lui $1, 65535 # encoding: [0x3c,0x01,0xff,0xff]
+ # CHECK: dsrl32 $1, $1, 0 # encoding: [0x00,0x01,0x08,0x3e]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xfffffffe($6) # CHECK: ori $1, $zero, 65535 # encoding: [0x34,0x01,0xff,0xff]
+ # CHECK: dsll $1, $1, 16 # encoding: [0x00,0x01,0x0c,0x38]
+ # CHECK: ori $1, $1, 65534 # encoding: [0x34,0x21,0xff,0xfe]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xffffc000($6) # CHECK: ori $1, $zero, 65535 # encoding: [0x34,0x01,0xff,0xff]
+ # CHECK: dsll $1, $1, 16 # encoding: [0x00,0x01,0x0c,0x38]
+ # CHECK: ori $1, $1, 49152 # encoding: [0x34,0x21,0xc0,0x00]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xffff8000($6) # CHECK: ori $1, $zero, 65535 # encoding: [0x34,0x01,0xff,0xff]
+ # CHECK: dsll $1, $1, 16 # encoding: [0x00,0x01,0x0c,0x38]
+ # CHECK: ori $1, $1, 32768 # encoding: [0x34,0x21,0x80,0x00]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+
+dla $6, 0x00010000($6) # CHECK: lui $1, 1 # encoding: [0x3c,0x01,0x00,0x01]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0x00020000($6) # CHECK: lui $1, 2 # encoding: [0x3c,0x01,0x00,0x02]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0x40000000($6) # CHECK: lui $1, 16384 # encoding: [0x3c,0x01,0x40,0x00]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0x80000000($6) # CHECK: ori $1, $zero, 32768 # encoding: [0x34,0x01,0x80,0x00]
+ # CHECK: dsll $1, $1, 16 # encoding: [0x00,0x01,0x0c,0x38]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xffff0000($6) # CHECK: ori $1, $zero, 65535 # encoding: [0x34,0x01,0xff,0xff]
+ # CHECK: dsll $1, $1, 16 # encoding: [0x00,0x01,0x0c,0x38]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xfffe0000($6) # CHECK: ori $1, $zero, 65534 # encoding: [0x34,0x01,0xff,0xfe]
+ # CHECK: dsll $1, $1, 16 # encoding: [0x00,0x01,0x0c,0x38]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xc0000000($6) # CHECK: ori $1, $zero, 49152 # encoding: [0x34,0x01,0xc0,0x00]
+ # CHECK: dsll $1, $1, 16 # encoding: [0x00,0x01,0x0c,0x38]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0x80000000($6) # CHECK: ori $1, $zero, 32768 # encoding: [0x34,0x01,0x80,0x00]
+ # CHECK: dsll $1, $1, 16 # encoding: [0x00,0x01,0x0c,0x38]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+
+dla $6, 0x00010001($6) # CHECK: lui $1, 1 # encoding: [0x3c,0x01,0x00,0x01]
+ # CHECK: ori $1, $1, 1 # encoding: [0x34,0x21,0x00,0x01]
+ # CHECK: daddu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x2d]
+# There are no further interesting immediates.
+
+symbol: # CHECK-LABEL: symbol:
+.extern extern_sym
+.option pic0
+dla $5, extern_sym # CHECK: lui $5, %highest(extern_sym) # encoding: [0x3c,0x05,A,A]
+ # CHECK: # fixup A - offset: 0, value: %highest(extern_sym), kind: fixup_Mips_HIGHEST
+ # CHECK: lui $1, %hi(extern_sym) # encoding: [0x3c,0x01,A,A]
+ # CHECK: # fixup A - offset: 0, value: %hi(extern_sym), kind: fixup_Mips_HI16
+ # CHECK: daddiu $5, $5, %higher(extern_sym) # encoding: [0x64,0xa5,A,A]
+ # CHECK: # fixup A - offset: 0, value: %higher(extern_sym), kind: fixup_Mips_HIGHER
+ # CHECK: daddiu $1, $1, %lo(extern_sym) # encoding: [0x64,0x21,A,A]
+ # CHECK: # fixup A - offset: 0, value: %lo(extern_sym), kind: fixup_Mips_LO16
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: daddu $5, $5, $1 # encoding: [0x00,0xa1,0x28,0x2d]
+
+dla $5, extern_sym($8) # CHECK: lui $5, %highest(extern_sym) # encoding: [0x3c,0x05,A,A]
+ # CHECK: # fixup A - offset: 0, value: %highest(extern_sym), kind: fixup_Mips_HIGHEST
+ # CHECK: lui $1, %hi(extern_sym) # encoding: [0x3c,0x01,A,A]
+ # CHECK: # fixup A - offset: 0, value: %hi(extern_sym), kind: fixup_Mips_HI16
+ # CHECK: daddiu $5, $5, %higher(extern_sym) # encoding: [0x64,0xa5,A,A]
+ # CHECK: # fixup A - offset: 0, value: %higher(extern_sym), kind: fixup_Mips_HIGHER
+ # CHECK: daddiu $1, $1, %lo(extern_sym) # encoding: [0x64,0x21,A,A]
+ # CHECK: # fixup A - offset: 0, value: %lo(extern_sym), kind: fixup_Mips_LO16
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: daddu $5, $5, $1 # encoding: [0x00,0xa1,0x28,0x2d]
+ # CHECK: daddu $5, $5, $8 # encoding: [0x00,0xa8,0x28,0x2d]
+
+dla $5, extern_sym($5) # CHECK: lui $1, %highest(extern_sym) # encoding: [0x3c,0x01,A,A]
+ # CHECK: # fixup A - offset: 0, value: %highest(extern_sym), kind: fixup_Mips_HIGHEST
+ # CHECK: daddiu $1, $1, %higher(extern_sym) # encoding: [0x64,0x21,A,A]
+ # CHECK: # fixup A - offset: 0, value: %higher(extern_sym), kind: fixup_Mips_HIGHER
+ # CHECK: dsll $1, $1, 16 # encoding: [0x00,0x01,0x0c,0x38]
+ # CHECK: daddiu $1, $1, %hi(extern_sym) # encoding: [0x64,0x21,A,A]
+ # CHECK: # fixup A - offset: 0, value: %hi(extern_sym), kind: fixup_Mips_HI16
+ # CHECK: dsll $1, $1, 16 # encoding: [0x00,0x01,0x0c,0x38]
+ # CHECK: daddiu $1, $1, %lo(extern_sym) # encoding: [0x64,0x21,A,A]
+ # CHECK: # fixup A - offset: 0, value: %lo(extern_sym), kind: fixup_Mips_LO16
+ # CHECK: daddu $5, $1, $5 # encoding: [0x00,0x25,0x28,0x2d]
+
+dla $5, extern_sym+8 # CHECK: lui $5, %highest(extern_sym+8) # encoding: [0x3c,0x05,A,A]
+ # CHECK: # fixup A - offset: 0, value: %highest(extern_sym+8), kind: fixup_Mips_HIGHEST
+ # CHECK: lui $1, %hi(extern_sym+8) # encoding: [0x3c,0x01,A,A]
+ # CHECK: # fixup A - offset: 0, value: %hi(extern_sym+8), kind: fixup_Mips_HI16
+ # CHECK: daddiu $5, $5, %higher(extern_sym+8) # encoding: [0x64,0xa5,A,A]
+ # CHECK: # fixup A - offset: 0, value: %higher(extern_sym+8), kind: fixup_Mips_HIGHER
+ # CHECK: daddiu $1, $1, %lo(extern_sym+8) # encoding: [0x64,0x21,A,A]
+ # CHECK: # fixup A - offset: 0, value: %lo(extern_sym+8), kind: fixup_Mips_LO16
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: daddu $5, $5, $1 # encoding: [0x00,0xa1,0x28,0x2d]
+
+dla $5, extern_sym+8($8) # CHECK: lui $5, %highest(extern_sym+8) # encoding: [0x3c,0x05,A,A]
+ # CHECK: # fixup A - offset: 0, value: %highest(extern_sym+8), kind: fixup_Mips_HIGHEST
+ # CHECK: lui $1, %hi(extern_sym+8) # encoding: [0x3c,0x01,A,A]
+ # CHECK: # fixup A - offset: 0, value: %hi(extern_sym+8), kind: fixup_Mips_HI16
+ # CHECK: daddiu $5, $5, %higher(extern_sym+8) # encoding: [0x64,0xa5,A,A]
+ # CHECK: # fixup A - offset: 0, value: %higher(extern_sym+8), kind: fixup_Mips_HIGHER
+ # CHECK: daddiu $1, $1, %lo(extern_sym+8) # encoding: [0x64,0x21,A,A]
+ # CHECK: # fixup A - offset: 0, value: %lo(extern_sym+8), kind: fixup_Mips_LO16
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: daddu $5, $5, $1 # encoding: [0x00,0xa1,0x28,0x2d]
+ # CHECK: daddu $5, $5, $8 # encoding: [0x00,0xa8,0x28,0x2d]
+
+dla $5, extern_sym+8($5) # CHECK: lui $1, %highest(extern_sym+8) # encoding: [0x3c,0x01,A,A]
+ # CHECK: # fixup A - offset: 0, value: %highest(extern_sym+8), kind: fixup_Mips_HIGHEST
+ # CHECK: daddiu $1, $1, %higher(extern_sym+8) # encoding: [0x64,0x21,A,A]
+ # CHECK: # fixup A - offset: 0, value: %higher(extern_sym+8), kind: fixup_Mips_HIGHER
+ # CHECK: dsll $1, $1, 16 # encoding: [0x00,0x01,0x0c,0x38]
+ # CHECK: daddiu $1, $1, %hi(extern_sym+8) # encoding: [0x64,0x21,A,A]
+ # CHECK: # fixup A - offset: 0, value: %hi(extern_sym+8), kind: fixup_Mips_HI16
+ # CHECK: dsll $1, $1, 16 # encoding: [0x00,0x01,0x0c,0x38]
+ # CHECK: daddiu $1, $1, %lo(extern_sym+8) # encoding: [0x64,0x21,A,A]
+ # CHECK: # fixup A - offset: 0, value: %lo(extern_sym+8), kind: fixup_Mips_LO16
+ # CHECK: daddu $5, $1, $5 # encoding: [0x00,0x25,0x28,0x2d]
+
+.option pic2
+#dla $5, symbol
diff --git a/test/MC/Mips/macro-dli.s b/test/MC/Mips/macro-dli.s
new file mode 100644
index 000000000000..6faf5051e5fc
--- /dev/null
+++ b/test/MC/Mips/macro-dli.s
@@ -0,0 +1,534 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 | \
+# RUN: FileCheck %s
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 | \
+# RUN: FileCheck %s
+
+bits_32_to_47_0x0000: # CHECK-LABEL: bits_32_to_47_0x0000:
+dli $5, 0x0000000000000001 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+dli $5, 0x0000000000000002 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+dli $5, 0x0000000000004000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+dli $5, 0x0000000000008000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+dli $5, 0x00000000ffff8000 # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x00000000ffffc000 # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 49152 # encoding: [0x34,0xa5,0xc0,0x00]
+dli $5, 0x00000000fffffffe # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 65534 # encoding: [0x34,0xa5,0xff,0xfe]
+dli $5, 0x00000000ffffffff # CHECK: lui $5, 65535 # encoding: [0x3c,0x05,0xff,0xff]
+ # CHECK: dsrl32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3e]
+dli $5, 0x0000000000010000 # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+dli $5, 0x0000000000020000 # CHECK: lui $5, 2 # encoding: [0x3c,0x05,0x00,0x02]
+dli $5, 0x0000000040000000 # CHECK: lui $5, 16384 # encoding: [0x3c,0x05,0x40,0x00]
+dli $5, 0x0000000080000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x00000000c0000000 # CHECK: ori $5, $zero, 49152 # encoding: [0x34,0x05,0xc0,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x00000000fffe0000 # CHECK: ori $5, $zero, 65534 # encoding: [0x34,0x05,0xff,0xfe]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x00000000ffff0000 # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000000000010001 # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000000020001 # CHECK: lui $5, 2 # encoding: [0x3c,0x05,0x00,0x02]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000040000001 # CHECK: lui $5, 16384 # encoding: [0x3c,0x05,0x40,0x00]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000080000001 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000000010002 # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000000020002 # CHECK: lui $5, 2 # encoding: [0x3c,0x05,0x00,0x02]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000040000002 # CHECK: lui $5, 16384 # encoding: [0x3c,0x05,0x40,0x00]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000080000002 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000000014000 # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000000024000 # CHECK: lui $5, 2 # encoding: [0x3c,0x05,0x00,0x02]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000040004000 # CHECK: lui $5, 16384 # encoding: [0x3c,0x05,0x40,0x00]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000080004000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000000018000 # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000000028000 # CHECK: lui $5, 2 # encoding: [0x3c,0x05,0x00,0x02]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000040008000 # CHECK: lui $5, 16384 # encoding: [0x3c,0x05,0x40,0x00]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000080008000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x00000000c0008000 # CHECK: ori $5, $zero, 49152 # encoding: [0x34,0x05,0xc0,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x00000000fffe8000 # CHECK: ori $5, $zero, 65534 # encoding: [0x34,0x05,0xff,0xfe]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x00000000ffff8000 # CHECK: ori $5, $zero, 65535 # encoding: [0x34,0x05,0xff,0xff]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x0001: # CHECK-LABEL: bits_32_to_47_0x0001:
+dli $5, 0x0000000100000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 17 # encoding: [0x00,0x05,0x2c,0x78]
+dli $5, 0x0000000100000001 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000100000002 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000100004000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000100008000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000100010000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000000100010001 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000100010002 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000100014000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000100018000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000100020000 # CHECK: ori $5, $zero, 32769 # encoding: [0x34,0x05,0x80,0x01]
+ # CHECK: dsll $5, $5, 17 # encoding: [0x00,0x05,0x2c,0x78]
+dli $5, 0x0000000100020001 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000100020002 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000100024000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000100028000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000140000000 # CHECK: ori $5, $zero, 40960 # encoding: [0x34,0x05,0xa0,0x00]
+ # CHECK: dsll $5, $5, 17 # encoding: [0x00,0x05,0x2c,0x78]
+dli $5, 0x0000000140000001 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000140000002 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000140004000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000140008000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000180000000 # CHECK: ori $5, $zero, 49152 # encoding: [0x34,0x05,0xc0,0x00]
+ # CHECK: dsll $5, $5, 17 # encoding: [0x00,0x05,0x2c,0x78]
+dli $5, 0x0000000180000001 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000180000002 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000180004000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000180008000 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x0002: # CHECK-LABEL: bits_32_to_47_0x0002:
+dli $5, 0x0000000200000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 18 # encoding: [0x00,0x05,0x2c,0xb8]
+dli $5, 0x0000000200000001 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000200000002 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000200004000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000200008000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000200010000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000000200010001 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000200010002 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000200014000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000200018000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000200020000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000000200020001 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000200020002 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000200024000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000200028000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000240000000 # CHECK: ori $5, $zero, 36864 # encoding: [0x34,0x05,0x90,0x00]
+ # CHECK: dsll $5, $5, 18 # encoding: [0x00,0x05,0x2c,0xb8]
+dli $5, 0x0000000240000001 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000240000002 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000240004000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000240008000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000280000000 # CHECK: ori $5, $zero, 40960 # encoding: [0x34,0x05,0xa0,0x00]
+ # CHECK: dsll $5, $5, 18 # encoding: [0x00,0x05,0x2c,0xb8]
+dli $5, 0x0000000280000001 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000280000002 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000280004000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000280008000 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x4000: # CHECK-LABEL: bits_32_to_47_0x4000:
+dli $5, 0x0000400000000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 31 # encoding: [0x00,0x05,0x2f,0xf8]
+dli $5, 0x0000400000000001 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000400000000002 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000400000004000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000400000008000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000400000010000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000400000010001 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000400000010002 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000400000014000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000400000018000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000400000020000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000400000020001 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000400000020002 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000400000024000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000400000028000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000400040000000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000400040000001 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000400040000002 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000400040004000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000400040008000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000400080000000 # CHECK: ori $5, $zero, 32769 # encoding: [0x34,0x05,0x80,0x01]
+ # CHECK: dsll $5, $5, 31 # encoding: [0x00,0x05,0x2f,0xf8]
+dli $5, 0x0000400080000001 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000400080000002 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000400080004000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000400080008000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x8000: # CHECK-LABEL: bits_32_to_47_0x8000:
+dli $5, 0x0000800000000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 32 # encoding: [0x00,0x05,0x28,0x3c]
+dli $5, 0x0000800000000001 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000800000000002 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000800000004000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000800000008000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll32 $5, $5, 0 # encoding: [0x00,0x05,0x28,0x3c]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000800000010000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000800000010001 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000800000010002 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000800000014000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000800000018000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000800000020000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000800000020001 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000800000020002 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000800000024000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000800000028000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000800040000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000800040000001 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000800040000002 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000800040004000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000800040008000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000800080000000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000800080000001 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 1 # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000800080000002 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 2 # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000800080004000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 16384 # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000800080008000 # CHECK: ori $5, $zero, 32768 # encoding: [0x34,0x05,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+# Only test a few with bits 48-63 non-zero. It just adds an lui to the cases we've already done.
+dli $5, 0x0001800080008000 # CHECK: lui $5, 1 # encoding: [0x3c,0x05,0x00,0x01]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0002800080008000 # CHECK: lui $5, 2 # encoding: [0x3c,0x05,0x00,0x02]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x4000800080008000 # CHECK: lui $5, 16384 # encoding: [0x3c,0x05,0x40,0x00]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x8000800080008000 # CHECK: lui $5, 32768 # encoding: [0x3c,0x05,0x80,0x00]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
+ # CHECK: dsll $5, $5, 16 # encoding: [0x00,0x05,0x2c,0x38]
+ # CHECK: ori $5, $5, 32768 # encoding: [0x34,0xa5,0x80,0x00]
diff --git a/test/MC/Mips/macro-la-bad.s b/test/MC/Mips/macro-la-bad.s
index 89d334030ec6..2a8cf2a9d894 100644
--- a/test/MC/Mips/macro-la-bad.s
+++ b/test/MC/Mips/macro-la-bad.s
@@ -1,17 +1,23 @@
# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r2 2>%t1
-# RUN: FileCheck %s < %t1 --check-prefix=32-BIT
+# RUN: FileCheck %s < %t1 --check-prefix=O32
# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n32 2>&1 | \
-# RUN: FileCheck %s --check-prefix=64-BIT --check-prefix=N32-ONLY
+# RUN: FileCheck %s --check-prefix=N32
# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n64 2>&1 | \
-# RUN: FileCheck %s --check-prefix=64-BIT --check-prefix=N64-ONLY
+# RUN: FileCheck %s --check-prefix=N64
.text
la $5, 0x100000000
- # 32-BIT: :[[@LINE-1]]:3: error: instruction requires a 32-bit immediate
- # 64-BIT: :[[@LINE-2]]:3: error: instruction requires a 32-bit immediate
+ # O32: :[[@LINE-1]]:3: error: instruction requires a 32-bit immediate
+ # N32: :[[@LINE-2]]:3: error: instruction requires a 32-bit immediate
+ # N64: :[[@LINE-3]]:3: error: la used to load 64-bit address
+
la $5, 0x100000000($6)
- # 32-BIT: :[[@LINE-1]]:3: error: instruction requires a 32-bit immediate
- # 64-BIT: :[[@LINE-2]]:3: error: instruction requires a 32-bit immediate
+ # O32: :[[@LINE-1]]:3: error: instruction requires a 32-bit immediate
+ # N32: :[[@LINE-2]]:3: error: instruction requires a 32-bit immediate
+ # N64: :[[@LINE-3]]:3: error: la used to load 64-bit address
+
+ # FIXME: These should be warnings but we lack la -> dla promotion at the
+ # moment.
la $5, symbol
- # N64-ONLY: :[[@LINE-1]]:3: warning: instruction loads the 32-bit address of a 64-bit symbol
- # N32-ONLY-NOT: :[[@LINE-2]]:3: warning: instruction loads the 32-bit address of a 64-bit symbol
+ # N32-NOT: :[[@LINE-1]]:3: error: la used to load 64-bit address
+ # N64: :[[@LINE-2]]:3: error: la used to load 64-bit address
diff --git a/test/MC/Mips/macro-la.s b/test/MC/Mips/macro-la.s
index 8c183a7b23e4..3428cd78cb84 100644
--- a/test/MC/Mips/macro-la.s
+++ b/test/MC/Mips/macro-la.s
@@ -2,11 +2,13 @@
# RUN: FileCheck %s
# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 | \
# RUN: FileCheck %s
-# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 | \
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 -target-abi=n32 | \
# RUN: FileCheck %s
-# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 | \
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 -target-abi=n32 | \
# RUN: FileCheck %s
+# N64 should be acceptable too but we cannot convert la to dla yet.
+
la $5, 0x00000001 # CHECK: addiu $5, $zero, 1 # encoding: [0x24,0x05,0x00,0x01]
la $5, 0x00000002 # CHECK: addiu $5, $zero, 2 # encoding: [0x24,0x05,0x00,0x02]
la $5, 0x00004000 # CHECK: addiu $5, $zero, 16384 # encoding: [0x24,0x05,0x40,0x00]
@@ -243,21 +245,35 @@ la $6, 0x80008000($6) # CHECK: lui $1, 32768 # encoding: [0x3c,0x01,0x80,0x0
# CHECK: addu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x21]
la $5, symbol # CHECK: lui $5, %hi(symbol) # encoding: [0x3c,0x05,A,A]
- # CHECK: # fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+ # CHECK: # fixup A - offset: 0, value: %hi(symbol), kind: fixup_Mips_HI16
# CHECK: addiu $5, $5, %lo(symbol) # encoding: [0x24,0xa5,A,A]
- # CHECK: # fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+ # CHECK: # fixup A - offset: 0, value: %lo(symbol), kind: fixup_Mips_LO16
la $5, symbol($6) # CHECK: lui $5, %hi(symbol) # encoding: [0x3c,0x05,A,A]
- # CHECK: # fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+ # CHECK: # fixup A - offset: 0, value: %hi(symbol), kind: fixup_Mips_HI16
# CHECK: addiu $5, $5, %lo(symbol) # encoding: [0x24,0xa5,A,A]
- # CHECK: # fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+ # CHECK: # fixup A - offset: 0, value: %lo(symbol), kind: fixup_Mips_LO16
# CHECK: addu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x21]
la $6, symbol($6) # CHECK: lui $1, %hi(symbol) # encoding: [0x3c,0x01,A,A]
- # CHECK: # fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+ # CHECK: # fixup A - offset: 0, value: %hi(symbol), kind: fixup_Mips_HI16
# CHECK: addiu $1, $1, %lo(symbol) # encoding: [0x24,0x21,A,A]
- # CHECK: # fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+ # CHECK: # fixup A - offset: 0, value: %lo(symbol), kind: fixup_Mips_LO16
# CHECK: addu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x21]
-la $5, 1f # CHECK: lui $5, %hi($tmp0) # encoding: [0x3c,0x05,A,A]
- # CHECK: # fixup A - offset: 0, value: ($tmp0)@ABS_HI, kind: fixup_Mips_HI16
- # CHECK: addiu $5, $5, %lo($tmp0) # encoding: [0x24,0xa5,A,A]
- # CHECK: # fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind: fixup_Mips_LO16
+la $5, symbol+8 # CHECK: lui $5, %hi(symbol+8) # encoding: [0x3c,0x05,A,A]
+ # CHECK: # fixup A - offset: 0, value: %hi(symbol+8), kind: fixup_Mips_HI16
+ # CHECK: addiu $5, $5, %lo(symbol+8) # encoding: [0x24,0xa5,A,A]
+ # CHECK: # fixup A - offset: 0, value: %lo(symbol+8), kind: fixup_Mips_LO16
+la $5, symbol+8($6) # CHECK: lui $5, %hi(symbol+8) # encoding: [0x3c,0x05,A,A]
+ # CHECK: # fixup A - offset: 0, value: %hi(symbol+8), kind: fixup_Mips_HI16
+ # CHECK: addiu $5, $5, %lo(symbol+8) # encoding: [0x24,0xa5,A,A]
+ # CHECK: # fixup A - offset: 0, value: %lo(symbol+8), kind: fixup_Mips_LO16
+ # CHECK: addu $5, $5, $6 # encoding: [0x00,0xa6,0x28,0x21]
+la $6, symbol+8($6) # CHECK: lui $1, %hi(symbol+8) # encoding: [0x3c,0x01,A,A]
+ # CHECK: # fixup A - offset: 0, value: %hi(symbol+8), kind: fixup_Mips_HI16
+ # CHECK: addiu $1, $1, %lo(symbol+8) # encoding: [0x24,0x21,A,A]
+ # CHECK: # fixup A - offset: 0, value: %lo(symbol+8), kind: fixup_Mips_LO16
+ # CHECK: addu $6, $1, $6 # encoding: [0x00,0x26,0x30,0x21]
+la $5, 1f # CHECK: lui $5, %hi(($tmp0)) # encoding: [0x3c,0x05,A,A]
+ # CHECK: # fixup A - offset: 0, value: %hi(($tmp0)), kind: fixup_Mips_HI16
+ # CHECK: addiu $5, $5, %lo(($tmp0)) # encoding: [0x24,0xa5,A,A]
+ # CHECK: # fixup A - offset: 0, value: %lo(($tmp0)), kind: fixup_Mips_LO16
1:
diff --git a/test/MC/Mips/micromips-control-instructions.s b/test/MC/Mips/micromips-control-instructions.s
index 76c953f85d55..2276b492e3d9 100644
--- a/test/MC/Mips/micromips-control-instructions.s
+++ b/test/MC/Mips/micromips-control-instructions.s
@@ -11,10 +11,10 @@
#------------------------------------------------------------------------------
# CHECK-EL: sdbbp # encoding: [0x00,0x00,0x7c,0xdb]
# CHECK-EL: sdbbp 34 # encoding: [0x22,0x00,0x7c,0xdb]
-# CHECK-EL: .set push
-# CHECK-EL: .set mips32r2
-# CHECK-EL: rdhwr $5, $29
-# CHECK-EL: .set pop # encoding: [0xbd,0x00,0x3c,0x6b]
+# CHECK-EL-NOT: .set push
+# CHECK-EL-NOT: .set mips32r2
+# CHECK-EL: rdhwr $5, $29 # encoding: [0xbd,0x00,0x3c,0x6b]
+# CHECK-EL-NOT: .set pop
# CHECK-EL: cache 1, 8($5) # encoding: [0x25,0x20,0x08,0x60]
# CHECK-EL: pref 1, 8($5) # encoding: [0x25,0x60,0x08,0x20]
# CHECK-EL: ssnop # encoding: [0x00,0x00,0x00,0x08]
@@ -39,15 +39,24 @@
# CHECK-EL: tlbr # encoding: [0x00,0x00,0x7c,0x13]
# CHECK-EL: tlbwi # encoding: [0x00,0x00,0x7c,0x23]
# CHECK-EL: tlbwr # encoding: [0x00,0x00,0x7c,0x33]
+# CHECK-EL: prefe 1, 8($5) # encoding: [0x25,0x60,0x08,0xa4]
+# CHECK-EL: cachee 1, 8($5) # encoding: [0x25,0x60,0x08,0xa6]
+# CHECK-EL: prefx 1, $3($5) # encoding: [0x65,0x54,0xa0,0x09]
+# CHECK-EL: swre $24, 5($3) # encoding: [0x03,0x63,0x05,0xa2]
+# CHECK-EL: swle $24, 5($3) # encoding: [0x03,0x63,0x05,0xa0]
+# CHECK-EL: lwre $24, 5($3) # encoding: [0x03,0x63,0x05,0x66]
+# CHECK-EL: lwle $24, 2($4) # encoding: [0x04,0x63,0x02,0x64]
+# CHECK-EL: lle $2, 8($4) # encoding: [0x44,0x60,0x08,0x6c]
+# CHECK-EL: sce $2, 8($4) # encoding: [0x44,0x60,0x08,0xac]
#------------------------------------------------------------------------------
# Big endian
#------------------------------------------------------------------------------
# CHECK-EB: sdbbp # encoding: [0x00,0x00,0xdb,0x7c]
# CHECK-EB: sdbbp 34 # encoding: [0x00,0x22,0xdb,0x7c]
-# CHECK-EB: .set push
-# CHECK-EB: .set mips32r2
-# CHECK-EB: rdhwr $5, $29
-# CHECK-EB: .set pop # encoding: [0x00,0xbd,0x6b,0x3c]
+# CHECK-EB-NOT: .set push
+# CHECK-EB-NOT: .set mips32r2
+# CHECK-EB: rdhwr $5, $29 # encoding: [0x00,0xbd,0x6b,0x3c]
+# CHECK-EB-NOT: .set pop
# CHECK-EB: cache 1, 8($5) # encoding: [0x20,0x25,0x60,0x08]
# CHECK-EB: pref 1, 8($5) # encoding: [0x60,0x25,0x20,0x08]
# CHECK-EB: ssnop # encoding: [0x00,0x00,0x08,0x00]
@@ -72,6 +81,15 @@
# CHECK-EB: tlbr # encoding: [0x00,0x00,0x13,0x7c]
# CHECK-EB: tlbwi # encoding: [0x00,0x00,0x23,0x7c]
# CHECK-EB: tlbwr # encoding: [0x00,0x00,0x33,0x7c]
+# CHECK-EB: prefe 1, 8($5) # encoding: [0x60,0x25,0xa4,0x08]
+# CHECK-EB: cachee 1, 8($5) # encoding: [0x60,0x25,0xa6,0x08]
+# CHECK-EB: prefx 1, $3($5) # encoding: [0x54,0x65,0x09,0xa0]
+# CHECK-EB: swre $24, 5($3) # encoding: [0x63,0x03,0xa2,0x05]
+# CHECK-EB: swle $24, 5($3) # encoding: [0x63,0x03,0xa0,0x05]
+# CHECK-EB: lwre $24, 5($3) # encoding: [0x63,0x03,0x66,0x05]
+# CHECK-EB: lwle $24, 2($4) # encoding: [0x63,0x04,0x64,0x02]
+# CHECK-EB: lle $2, 8($4) # encoding: [0x60,0x44,0x6c,0x08]
+# CHECK-EB: sce $2, 8($4) # encoding: [0x60,0x44,0xac,0x08]
sdbbp
sdbbp 34
@@ -100,3 +118,12 @@
tlbr
tlbwi
tlbwr
+ prefe 1, 8($5)
+ cachee 1, 8($5)
+ prefx 1, $3($5)
+ swre $24, 5($3)
+ swle $24, 5($3)
+ lwre $24, 5($3)
+ lwle $24, 2($4)
+ lle $2, 8($4)
+ sce $2, 8($4)
diff --git a/test/MC/Mips/micromips-diagnostic-fixup.s b/test/MC/Mips/micromips-diagnostic-fixup.s
index 041338ac2d3b..4a94f9a3cd51 100644
--- a/test/MC/Mips/micromips-diagnostic-fixup.s
+++ b/test/MC/Mips/micromips-diagnostic-fixup.s
@@ -1,9 +1,12 @@
# RUN: not llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -arch=mips -mattr=+micromips 2>&1 -filetype=obj | FileCheck %s
-#
-# CHECK: LLVM ERROR: out of range PC16 fixup
+
+# Two instructions, to check that this is not a fatal error
+# CHECK: error: out of range PC16 fixup
+# CHECK: error: out of range PC16 fixup
.text
b foo
+ b foo
.space 65536 - 6, 1 # -6 = size of b instr plus size of automatically inserted nop
nop # This instr makes the branch too long to fit into a 17-bit offset
foo:
diff --git a/test/MC/Mips/micromips-dsp/invalid-wrong-error.s b/test/MC/Mips/micromips-dsp/invalid-wrong-error.s
new file mode 100644
index 000000000000..d1ba873809db
--- /dev/null
+++ b/test/MC/Mips/micromips-dsp/invalid-wrong-error.s
@@ -0,0 +1,7 @@
+# Instructions that are correctly rejected but emit a wrong or misleading error.
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=micromips -mattr=+dsp 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ wrdsp $5, 128 # CHECK: :[[@LINE]]:3: error: instruction requires a CPU feature not currently enabled
+ wrdsp $5, -1 # CHECK: :[[@LINE]]:13: error: expected 10-bit unsigned immediate
diff --git a/test/MC/Mips/micromips-dsp/invalid.s b/test/MC/Mips/micromips-dsp/invalid.s
new file mode 100644
index 000000000000..55a6f8e28737
--- /dev/null
+++ b/test/MC/Mips/micromips-dsp/invalid.s
@@ -0,0 +1,23 @@
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=micromips -mattr=+dsp 2>%t1
+# RUN: FileCheck %s < %t1
+
+ shll.ph $3, $4, 16 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+ shll.ph $3, $4, -1 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+ shll_s.ph $3, $4, 16 # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+ shll_s.ph $3, $4, -1 # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+ shll.qb $3, $4, 8 # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+ shll.qb $3, $4, -1 # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+ // FIXME: Following invalid tests are temporarely disabled, until operand check for uimm5 is added
+ shll_s.w $3, $4, 32 # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+ shll_s.w $3, $4, -1 # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+ shra.ph $3, $4, 16 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+ shra.ph $3, $4, -1 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+ shra_r.ph $3, $4, 16 # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+ shra_r.ph $3, $4, -1 # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+ // FIXME: Following invalid tests are temporarely disabled, until operand check for uimm5 is added
+ shra_r.w $3, $4, 32 # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+ shra_r.w $3, $4, -1 # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+ shrl.qb $3, $4, 8 # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+ shrl.qb $3, $4, -1 # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+ shilo $ac1, 64 # CHECK: :[[@LINE]]:15: error: expected 6-bit signed immediate
+ shilo $ac1, -64 # CHECK: :[[@LINE]]:15: error: expected 6-bit signed immediate
diff --git a/test/MC/Mips/micromips-dsp/valid.s b/test/MC/Mips/micromips-dsp/valid.s
new file mode 100644
index 000000000000..c147a6d850ac
--- /dev/null
+++ b/test/MC/Mips/micromips-dsp/valid.s
@@ -0,0 +1,105 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=micromips -mattr=+dsp | FileCheck %s
+
+ .set noat
+ absq_s.ph $3, $4 # CHECK: absq_s.ph $3, $4 # encoding: [0x00,0x64,0x11,0x3c]
+ absq_s.w $3, $4 # CHECK: absq_s.w $3, $4 # encoding: [0x00,0x64,0x21,0x3c]
+ addu.qb $3, $4, $5 # CHECK: addu.qb $3, $4, $5 # encoding: [0x00,0xa4,0x18,0xcd]
+ addu_s.qb $3, $4, $5 # CHECK: addu_s.qb $3, $4, $5 # encoding: [0x00,0xa4,0x1c,0xcd]
+ addsc $3, $4, $5 # CHECK: addsc $3, $4, $5 # encoding: [0x00,0xa4,0x1b,0x85]
+ addwc $3, $4, $5 # CHECK: addwc $3, $4, $5 # encoding: [0x00,0xa4,0x1b,0xc5]
+ addq.ph $3, $4, $5 # CHECK: addq.ph $3, $4, $5 # encoding: [0x00,0xa4,0x18,0x0d]
+ addq_s.ph $3, $4, $5 # CHECK: addq_s.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1c,0x0d]
+ addq_s.w $3, $4, $5 # CHECK: addq_s.w $3, $4, $5 # encoding: [0x00,0xa4,0x1b,0x05]
+ dpaq_s.w.ph $ac1, $5, $3 # CHECK: dpaq_s.w.ph $ac1, $5, $3 # encoding: [0x00,0x65,0x42,0xbc]
+ dpaq_sa.l.w $ac2, $4, $3 # CHECK: dpaq_sa.l.w $ac2, $4, $3 # encoding: [0x00,0x64,0x92,0xbc]
+ dpau.h.qbl $ac1, $3, $4 # CHECK: dpau.h.qbl $ac1, $3, $4 # encoding: [0x00,0x83,0x60,0xbc]
+ dpau.h.qbr $ac2, $20, $21 # CHECK: dpau.h.qbr $ac2, $20, $21 # encoding: [0x02,0xb4,0xb0,0xbc]
+ extp $zero, $ac1, 6 # CHECK: extp $zero, $ac1, 6 # encoding: [0x00,0x06,0x66,0x7c]
+ extpdp $2, $ac1, 2 # CHECK: extpdp $2, $ac1, 2 # encoding: [0x00,0x42,0x76,0x7c]
+ extpdpv $4, $ac2, $8 # CHECK: extpdpv $4, $ac2, $8 # encoding: [0x00,0x88,0xb8,0xbc]
+ extpv $15, $ac3, $7 # CHECK: extpv $15, $ac3, $7 # encoding: [0x01,0xe7,0xe8,0xbc]
+ extr.w $27, $ac3, 31 # CHECK: extr.w $27, $ac3, 31 # encoding: [0x03,0x7f,0xce,0x7c]
+ extr_r.w $12, $ac0, 24 # CHECK: extr_r.w $12, $ac0, 24 # encoding: [0x01,0x98,0x1e,0x7c]
+ extr_rs.w $27, $ac3, 9 # CHECK: extr_rs.w $27, $ac3, 9 # encoding: [0x03,0x69,0xee,0x7c]
+ extr_s.h $3, $ac2, 1 # CHECK: extr_s.h $3, $ac2, 1 # encoding: [0x00,0x61,0xbe,0x7c]
+ extrv.w $5, $ac0, $6 # CHECK: extrv.w $5, $ac0, $6 # encoding: [0x00,0xa6,0x0e,0xbc]
+ extrv_r.w $10, $ac0, $3 # CHECK: extrv_r.w $10, $ac0, $3 # encoding: [0x01,0x43,0x1e,0xbc]
+ extrv_rs.w $15, $ac1, $20 # CHECK: extrv_rs.w $15, $ac1, $20 # encoding: [0x01,0xf4,0x6e,0xbc]
+ extrv_s.h $8, $ac2, $16 # CHECK: extrv_s.h $8, $ac2, $16 # encoding: [0x01,0x10,0xbe,0xbc]
+ insv $3, $4 # CHECK: insv $3, $4 # encoding: [0x00,0x64,0x41,0x3c]
+ madd $ac1, $6, $7 # CHECK: madd $ac1, $6, $7 # encoding: [0x00,0xe6,0x4a,0xbc]
+ maddu $ac0, $8, $9 # CHECK: maddu $ac0, $8, $9 # encoding: [0x01,0x28,0x1a,0xbc]
+ msub $ac3, $10, $11 # CHECK: msub $ac3, $10, $11 # encoding: [0x01,0x6a,0xea,0xbc]
+ msubu $ac2, $12, $13 # CHECK: msubu $ac2, $12, $13 # encoding: [0x01,0xac,0xba,0xbc]
+ mult $ac3, $2, $3 # CHECK: mult $ac3, $2, $3 # encoding: [0x00,0x62,0xcc,0xbc]
+ multu $ac2, $4, $5 # CHECK: multu $ac2, $4, $5 # encoding: [0x00,0xa4,0x9c,0xbc]
+ packrl.ph $3, $4, $5 # CHECK: packrl.ph $3, $4, $5 # encoding: [0x00,0xa4,0x19,0xad]
+ pick.ph $3, $4, $5 # CHECK: pick.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1a,0x2d]
+ pick.qb $3, $4, $5 # CHECK: pick.qb $3, $4, $5 # encoding: [0x00,0xa4,0x19,0xed]
+ preceq.w.phl $1, $2 # CHECK: preceq.w.phl $1, $2 # encoding: [0x00,0x22,0x51,0x3c]
+ preceq.w.phr $3, $4 # CHECK: preceq.w.phr $3, $4 # encoding: [0x00,0x64,0x61,0x3c]
+ precequ.ph.qbl $5, $6 # CHECK: precequ.ph.qbl $5, $6 # encoding: [0x00,0xa6,0x71,0x3c]
+ precequ.ph.qbla $7, $8 # CHECK: precequ.ph.qbla $7, $8 # encoding: [0x00,0xe8,0x73,0x3c]
+ precequ.ph.qbr $9, $10 # CHECK: precequ.ph.qbr $9, $10 # encoding: [0x01,0x2a,0x91,0x3c]
+ precequ.ph.qbra $11, $12 # CHECK: precequ.ph.qbra $11, $12 # encoding: [0x01,0x6c,0x93,0x3c]
+ preceu.ph.qbl $13, $14 # CHECK: preceu.ph.qbl $13, $14 # encoding: [0x01,0xae,0xb1,0x3c]
+ preceu.ph.qbla $15, $16 # CHECK: preceu.ph.qbla $15, $16 # encoding: [0x01,0xf0,0xb3,0x3c]
+ preceu.ph.qbr $17, $18 # CHECK: preceu.ph.qbr $17, $18 # encoding: [0x02,0x32,0xd1,0x3c]
+ preceu.ph.qbra $19, $20 # CHECK: preceu.ph.qbra $19, $20 # encoding: [0x02,0x74,0xd3,0x3c]
+ precrq.ph.w $8, $9, $10 # CHECK: precrq.ph.w $8, $9, $10 # encoding: [0x01,0x49,0x40,0xed]
+ precrq.qb.ph $11, $12, $13 # CHECK: precrq.qb.ph $11, $12, $13 # encoding: [0x01,0xac,0x58,0xad]
+ precrqu_s.qb.ph $14, $15, $16 # CHECK: precrqu_s.qb.ph $14, $15, $16 # encoding: [0x02,0x0f,0x71,0x6d]
+ precrq_rs.ph.w $17, $18, $19 # CHECK: precrq_rs.ph.w $17, $18, $19 # encoding: [0x02,0x72,0x89,0x2d]
+ shilo $ac1, 3 # CHECK: shilo $ac1, 3 # encoding: [0x00,0x03,0x40,0x1d]
+ shilov $ac1, $5 # CHECK: shilov $ac1, $5 # encoding: [0x00,0x05,0x52,0x7c]
+ shll.ph $3, $4, 5 # CHECK: shll.ph $3, $4, 5 # encoding: [0x00,0x64,0x53,0xb5]
+ shll_s.ph $3, $4, 5 # CHECK: shll_s.ph $3, $4, 5 # encoding: [0x00,0x64,0x5b,0xb5]
+ shll.qb $3, $4, 5 # CHECK: shll.qb $3, $4, 5 # encoding: [0x00,0x64,0xa8,0x7c]
+ shllv.ph $3, $4, $5 # CHECK: shllv.ph $3, $4, $5 # encoding: [0x00,0x85,0x18,0x0e]
+ shllv_s.ph $3, $4, $5 # CHECK: shllv_s.ph $3, $4, $5 # encoding: [0x00,0x85,0x1c,0x0e]
+ shllv.qb $3, $4, $5 # CHECK: shllv.qb $3, $4, $5 # encoding: [0x00,0x85,0x1b,0x95]
+ shllv_s.w $3, $4, $5 # CHECK: shllv_s.w $3, $4, $5 # encoding: [0x00,0x85,0x1b,0xd5]
+ shll_s.w $3, $4, 5 # CHECK: shll_s.w $3, $4, 5 # encoding: [0x00,0x64,0x2b,0xf5]
+ shra.ph $3, $4, 5 # CHECK: shra.ph $3, $4, 5 # encoding: [0x00,0x64,0x53,0x35]
+ shra_r.ph $3, $4, 5 # CHECK: shra_r.ph $3, $4, 5 # encoding: [0x00,0x64,0x57,0x35]
+ shrav.ph $3, $4, $5 # CHECK: shrav.ph $3, $4, $5 # encoding: [0x00,0x85,0x19,0x8d]
+ shrav_r.ph $3, $4, $5 # CHECK: shrav_r.ph $3, $4, $5 # encoding: [0x00,0x85,0x1d,0x8d]
+ shrav_r.w $3, $4, $5 # CHECK: shrav_r.w $3, $4, $5 # encoding: [0x00,0x85,0x1a,0xd5]
+ shra_r.w $3, $4, 5 # CHECK: shra_r.w $3, $4, 5 # encoding: [0x00,0x64,0x2a,0xf5]
+ shrl.qb $3, $4, 5 # CHECK: shrl.qb $3, $4, 5 # encoding: [0x00,0x64,0xb8,0x7c]
+ shrlv.qb $3, $4, $5 # CHECK: shrlv.qb $3, $4, $5 # encoding: [0x00,0x85,0x1b,0x55]
+ subq.ph $3, $4, $5 # CHECK: subq.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1a,0x0d]
+ subq_s.ph $3, $4, $5 # CHECK: subq_s.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1e,0x0d]
+ subq_s.w $3, $4, $5 # CHECK: subq_s.w $3, $4, $5 # encoding: [0x00,0xa4,0x1b,0x45]
+ subu.qb $3, $4, $5 # CHECK: subu.qb $3, $4, $5 # encoding: [0x00,0xa4,0x1a,0xcd]
+ subu_s.qb $3, $4, $5 # CHECK: subu_s.qb $3, $4, $5 # encoding: [0x00,0xa4,0x1e,0xcd]
+ dpsq_s.w.ph $ac1, $4, $6 # CHECK: dpsq_s.w.ph $ac1, $4, $6 # encoding: [0x00,0xc4,0x46,0xbc]
+ dpsq_sa.l.w $ac1, $4, $6 # CHECK: dpsq_sa.l.w $ac1, $4, $6 # encoding: [0x00,0xc4,0x56,0xbc]
+ dpsu.h.qbl $ac1, $4, $6 # CHECK: dpsu.h.qbl $ac1, $4, $6 # encoding: [0x00,0xc4,0x64,0xbc]
+ dpsu.h.qbr $ac1, $4, $6 # CHECK: dpsu.h.qbr $ac1, $4, $6 # encoding: [0x00,0xc4,0x74,0xbc]
+ muleq_s.w.phl $1, $2, $3 # CHECK: muleq_s.w.phl $1, $2, $3 # encoding: [0x00,0x62,0x08,0x25]
+ muleq_s.w.phr $1, $2, $3 # CHECK: muleq_s.w.phr $1, $2, $3 # encoding: [0x00,0x62,0x08,0x65]
+ muleu_s.ph.qbl $1, $2, $3 # CHECK: muleu_s.ph.qbl $1, $2, $3 # encoding: [0x00,0x62,0x08,0x95]
+ muleu_s.ph.qbr $1, $2, $3 # CHECK: muleu_s.ph.qbr $1, $2, $3 # encoding: [0x00,0x62,0x08,0xd5]
+ mulq_rs.ph $1, $2, $3 # CHECK: mulq_rs.ph $1, $2, $3 # encoding: [0x00,0x62,0x09,0x15]
+ lbux $1, $2($3) # CHECK: lbux $1, $2($3) # encoding: [0x00,0x43,0x0a,0x25]
+ lhx $1, $2($3) # CHECK: lhx $1, $2($3) # encoding: [0x00,0x43,0x09,0x65]
+ lwx $1, $2($3) # CHECK: lwx $1, $2($3) # encoding: [0x00,0x43,0x09,0xa5]
+ maq_s.w.phl $ac1, $2, $3 # CHECK: maq_s.w.phl $ac1, $2, $3 # encoding: [0x00,0x62,0x5a,0x7c]
+ maq_sa.w.phl $ac1, $2, $3 # CHECK: maq_sa.w.phl $ac1, $2, $3 # encoding: [0x00,0x62,0x7a,0x7c]
+ maq_s.w.phr $ac1, $2, $3 # CHECK: maq_s.w.phr $ac1, $2, $3 # encoding: [0x00,0x62,0x4a,0x7c]
+ maq_sa.w.phr $ac1, $2, $3 # CHECK: maq_sa.w.phr $ac1, $2, $3 # encoding: [0x00,0x62,0x6a,0x7c]
+ mfhi $2, $ac1 # CHECK: mfhi $2, $ac1 # encoding: [0x00,0x02,0x40,0x7c]
+ mflo $1, $ac1 # CHECK: mflo $1, $ac1 # encoding: [0x00,0x01,0x50,0x7c]
+ mthi $1, $ac1 # CHECK: mthi $1, $ac1 # encoding: [0x00,0x01,0x60,0x7c]
+ mtlo $1, $ac1 # CHECK: mtlo $1, $ac1 # encoding: [0x00,0x01,0x70,0x7c]
+ raddu.w.qb $1, $2 # CHECK: raddu.w.qb $1, $2 # encoding: [0x00,0x22,0xf1,0x3c]
+ rddsp $1, 2 # CHECK: rddsp $1, 2 # encoding: [0x00,0x20,0x86,0x7c]
+ repl.ph $1, 512 # CHECK: repl.ph $1, 512 # encoding: [0x02,0x00,0x08,0x3d]
+ repl.qb $1, 128 # CHECK: repl.qb $1, 128 # encoding: [0x00,0x30,0x05,0xfc]
+ replv.ph $1, $2 # CHECK: replv.ph $1, $2 # encoding: [0x00,0x22,0x03,0x3c]
+ replv.qb $1, $2 # CHECK: replv.qb $1, $2 # encoding: [0x00,0x22,0x13,0x3c]
+ mthlip $1, $ac2 # CHECK: mthlip $1, $ac2 # encoding: [0x00,0x01,0x82,0x7c]
+ wrdsp $5 # CHECK: wrdsp $5 # encoding: [0x00,0xa7,0xd6,0x7c]
+ wrdsp $5, 2 # CHECK: wrdsp $5, 2 # encoding: [0x00,0xa0,0x96,0x7c]
+ wrdsp $5, 31 # CHECK: wrdsp $5 # encoding: [0x00,0xa7,0xd6,0x7c]
diff --git a/test/MC/Mips/micromips-dspr2/invalid.s b/test/MC/Mips/micromips-dspr2/invalid.s
new file mode 100644
index 000000000000..5ea203d981c1
--- /dev/null
+++ b/test/MC/Mips/micromips-dspr2/invalid.s
@@ -0,0 +1,9 @@
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=micromips -mattr=+dspr2 2>%t1
+# RUN: FileCheck %s < %t1
+
+ shra.qb $3, $4, 8 # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+ shra.qb $3, $4, -1 # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+ shra_r.qb $3, $4, 8 # CHECK: :[[@LINE]]:21: error: expected 3-bit unsigned immediate
+ shra_r.qb $3, $4, -1 # CHECK: :[[@LINE]]:21: error: expected 3-bit unsigned immediate
+ shrl.ph $3, $4, 16 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+ shrl.ph $3, $4, -1 # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
diff --git a/test/MC/Mips/micromips-dspr2/valid.s b/test/MC/Mips/micromips-dspr2/valid.s
new file mode 100644
index 000000000000..b1d09cbc84c7
--- /dev/null
+++ b/test/MC/Mips/micromips-dspr2/valid.s
@@ -0,0 +1,127 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=micromips -mattr=+dspr2 | FileCheck %s
+
+ .set noat
+ absq_s.ph $3, $4 # CHECK: absq_s.ph $3, $4 # encoding: [0x00,0x64,0x11,0x3c]
+ absq_s.qb $3, $4 # CHECK: absq_s.qb $3, $4 # encoding: [0x00,0x64,0x01,0x3c]
+ absq_s.w $3, $4 # CHECK: absq_s.w $3, $4 # encoding: [0x00,0x64,0x21,0x3c]
+ addqh.ph $3, $4, $5 # CHECK: addqh.ph $3, $4, $5 # encoding: [0x00,0xa4,0x18,0x4d]
+ addqh_r.ph $3, $4, $5 # CHECK: addqh_r.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1c,0x4d]
+ addqh.w $3, $4, $5 # CHECK: addqh.w $3, $4, $5 # encoding: [0x00,0xa4,0x18,0x8d]
+ addqh_r.w $3, $4, $5 # CHECK: addqh_r.w $3, $4, $5 # encoding: [0x00,0xa4,0x1c,0x8d]
+ addu.ph $3, $4, $5 # CHECK: addu.ph $3, $4, $5 # encoding: [0x00,0xa4,0x19,0x0d]
+ addu.qb $3, $4, $5 # CHECK: addu.qb $3, $4, $5 # encoding: [0x00,0xa4,0x18,0xcd]
+ addu_s.ph $3, $4, $5 # CHECK: addu_s.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1d,0x0d]
+ addu_s.qb $3, $4, $5 # CHECK: addu_s.qb $3, $4, $5 # encoding: [0x00,0xa4,0x1c,0xcd]
+ adduh.qb $3, $4, $5 # CHECK: adduh.qb $3, $4, $5 # encoding: [0x00,0xa4,0x19,0x4d]
+ adduh_r.qb $3, $4, $5 # CHECK: adduh_r.qb $3, $4, $5 # encoding: [0x00,0xa4,0x1d,0x4d]
+ addsc $3, $4, $5 # CHECK: addsc $3, $4, $5 # encoding: [0x00,0xa4,0x1b,0x85]
+ addwc $3, $4, $5 # CHECK: addwc $3, $4, $5 # encoding: [0x00,0xa4,0x1b,0xc5]
+ addq.ph $3, $4, $5 # CHECK: addq.ph $3, $4, $5 # encoding: [0x00,0xa4,0x18,0x0d]
+ addq_s.ph $3, $4, $5 # CHECK: addq_s.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1c,0x0d]
+ addq_s.w $3, $4, $5 # CHECK: addq_s.w $3, $4, $5 # encoding: [0x00,0xa4,0x1b,0x05]
+ dpa.w.ph $ac0, $3, $2 # CHECK: dpa.w.ph $ac0, $3, $2 # encoding: [0x00,0x43,0x00,0xbc]
+ dpaq_s.w.ph $ac1, $5, $3 # CHECK: dpaq_s.w.ph $ac1, $5, $3 # encoding: [0x00,0x65,0x42,0xbc]
+ dpaq_sa.l.w $ac2, $4, $3 # CHECK: dpaq_sa.l.w $ac2, $4, $3 # encoding: [0x00,0x64,0x92,0xbc]
+ dpaqx_s.w.ph $ac3, $12, $7 # CHECK: dpaqx_s.w.ph $ac3, $12, $7 # encoding: [0x00,0xec,0xe2,0xbc]
+ dpaqx_sa.w.ph $ac0, $5, $6 # CHECK: dpaqx_sa.w.ph $ac0, $5, $6 # encoding: [0x00,0xc5,0x32,0xbc]
+ dpau.h.qbl $ac1, $3, $4 # CHECK: dpau.h.qbl $ac1, $3, $4 # encoding: [0x00,0x83,0x60,0xbc]
+ dpau.h.qbr $ac2, $20, $21 # CHECK: dpau.h.qbr $ac2, $20, $21 # encoding: [0x02,0xb4,0xb0,0xbc]
+ dpax.w.ph $ac3, $2, $1 # CHECK: dpax.w.ph $ac3, $2, $1 # encoding: [0x00,0x22,0xd0,0xbc]
+ extp $zero, $ac1, 6 # CHECK: extp $zero, $ac1, 6 # encoding: [0x00,0x06,0x66,0x7c]
+ extpdp $2, $ac1, 2 # CHECK: extpdp $2, $ac1, 2 # encoding: [0x00,0x42,0x76,0x7c]
+ extpdpv $4, $ac2, $8 # CHECK: extpdpv $4, $ac2, $8 # encoding: [0x00,0x88,0xb8,0xbc]
+ extpv $15, $ac3, $7 # CHECK: extpv $15, $ac3, $7 # encoding: [0x01,0xe7,0xe8,0xbc]
+ extr.w $27, $ac3, 31 # CHECK: extr.w $27, $ac3, 31 # encoding: [0x03,0x7f,0xce,0x7c]
+ extr_r.w $12, $ac0, 24 # CHECK: extr_r.w $12, $ac0, 24 # encoding: [0x01,0x98,0x1e,0x7c]
+ extr_rs.w $27, $ac3, 9 # CHECK: extr_rs.w $27, $ac3, 9 # encoding: [0x03,0x69,0xee,0x7c]
+ extr_s.h $3, $ac2, 1 # CHECK: extr_s.h $3, $ac2, 1 # encoding: [0x00,0x61,0xbe,0x7c]
+ extrv.w $5, $ac0, $6 # CHECK: extrv.w $5, $ac0, $6 # encoding: [0x00,0xa6,0x0e,0xbc]
+ extrv_r.w $10, $ac0, $3 # CHECK: extrv_r.w $10, $ac0, $3 # encoding: [0x01,0x43,0x1e,0xbc]
+ extrv_rs.w $15, $ac1, $20 # CHECK: extrv_rs.w $15, $ac1, $20 # encoding: [0x01,0xf4,0x6e,0xbc]
+ extrv_s.h $8, $ac2, $16 # CHECK: extrv_s.h $8, $ac2, $16 # encoding: [0x01,0x10,0xbe,0xbc]
+ insv $3, $4 # CHECK: insv $3, $4 # encoding: [0x00,0x64,0x41,0x3c]
+ madd $ac1, $6, $7 # CHECK: madd $ac1, $6, $7 # encoding: [0x00,0xe6,0x4a,0xbc]
+ maddu $ac0, $8, $9 # CHECK: maddu $ac0, $8, $9 # encoding: [0x01,0x28,0x1a,0xbc]
+ msub $ac3, $10, $11 # CHECK: msub $ac3, $10, $11 # encoding: [0x01,0x6a,0xea,0xbc]
+ msubu $ac2, $12, $13 # CHECK: msubu $ac2, $12, $13 # encoding: [0x01,0xac,0xba,0xbc]
+ mult $ac3, $2, $3 # CHECK: mult $ac3, $2, $3 # encoding: [0x00,0x62,0xcc,0xbc]
+ multu $ac2, $4, $5 # CHECK: multu $ac2, $4, $5 # encoding: [0x00,0xa4,0x9c,0xbc]
+ packrl.ph $3, $4, $5 # CHECK: packrl.ph $3, $4, $5 # encoding: [0x00,0xa4,0x19,0xad]
+ pick.ph $3, $4, $5 # CHECK: pick.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1a,0x2d]
+ pick.qb $3, $4, $5 # CHECK: pick.qb $3, $4, $5 # encoding: [0x00,0xa4,0x19,0xed]
+ preceq.w.phl $1, $2 # CHECK: preceq.w.phl $1, $2 # encoding: [0x00,0x22,0x51,0x3c]
+ preceq.w.phr $3, $4 # CHECK: preceq.w.phr $3, $4 # encoding: [0x00,0x64,0x61,0x3c]
+ precequ.ph.qbl $5, $6 # CHECK: precequ.ph.qbl $5, $6 # encoding: [0x00,0xa6,0x71,0x3c]
+ precequ.ph.qbla $7, $8 # CHECK: precequ.ph.qbla $7, $8 # encoding: [0x00,0xe8,0x73,0x3c]
+ precequ.ph.qbr $9, $10 # CHECK: precequ.ph.qbr $9, $10 # encoding: [0x01,0x2a,0x91,0x3c]
+ precequ.ph.qbra $11, $12 # CHECK: precequ.ph.qbra $11, $12 # encoding: [0x01,0x6c,0x93,0x3c]
+ preceu.ph.qbl $13, $14 # CHECK: preceu.ph.qbl $13, $14 # encoding: [0x01,0xae,0xb1,0x3c]
+ preceu.ph.qbla $15, $16 # CHECK: preceu.ph.qbla $15, $16 # encoding: [0x01,0xf0,0xb3,0x3c]
+ preceu.ph.qbr $17, $18 # CHECK: preceu.ph.qbr $17, $18 # encoding: [0x02,0x32,0xd1,0x3c]
+ preceu.ph.qbra $19, $20 # CHECK: preceu.ph.qbra $19, $20 # encoding: [0x02,0x74,0xd3,0x3c]
+ precr.qb.ph $1, $2, $3 # CHECK: precr.qb.ph $1, $2, $3 # encoding: [0x00,0x62,0x08,0x6d]
+ precr_sra.ph.w $4, $5, 1 # CHECK: precr_sra.ph.w $4, $5, 1 # encoding: [0x00,0x85,0x0b,0xcd]
+ precr_sra_r.ph.w $6, $7, 2 # CHECK: precr_sra_r.ph.w $6, $7, 2 # encoding: [0x00,0xc7,0x17,0xcd]
+ precrq.ph.w $8, $9, $10 # CHECK: precrq.ph.w $8, $9, $10 # encoding: [0x01,0x49,0x40,0xed]
+ precrq.qb.ph $11, $12, $13 # CHECK: precrq.qb.ph $11, $12, $13 # encoding: [0x01,0xac,0x58,0xad]
+ precrqu_s.qb.ph $14, $15, $16 # CHECK: precrqu_s.qb.ph $14, $15, $16 # encoding: [0x02,0x0f,0x71,0x6d]
+ precrq_rs.ph.w $17, $18, $19 # CHECK: precrq_rs.ph.w $17, $18, $19 # encoding: [0x02,0x72,0x89,0x2d]
+ shilo $ac1, 3 # CHECK: shilo $ac1, 3 # encoding: [0x00,0x03,0x40,0x1d]
+ shilov $ac1, $5 # CHECK: shilov $ac1, $5 # encoding: [0x00,0x05,0x52,0x7c]
+ shll.ph $3, $4, 5 # CHECK: shll.ph $3, $4, 5 # encoding: [0x00,0x64,0x53,0xb5]
+ shll_s.ph $3, $4, 5 # CHECK: shll_s.ph $3, $4, 5 # encoding: [0x00,0x64,0x5b,0xb5]
+ shll.qb $3, $4, 5 # CHECK: shll.qb $3, $4, 5 # encoding: [0x00,0x64,0xa8,0x7c]
+ shllv.ph $3, $4, $5 # CHECK: shllv.ph $3, $4, $5 # encoding: [0x00,0x85,0x18,0x0e]
+ shllv_s.ph $3, $4, $5 # CHECK: shllv_s.ph $3, $4, $5 # encoding: [0x00,0x85,0x1c,0x0e]
+ shllv.qb $3, $4, $5 # CHECK: shllv.qb $3, $4, $5 # encoding: [0x00,0x85,0x1b,0x95]
+ shllv_s.w $3, $4, $5 # CHECK: shllv_s.w $3, $4, $5 # encoding: [0x00,0x85,0x1b,0xd5]
+ shll_s.w $3, $4, 5 # CHECK: shll_s.w $3, $4, 5 # encoding: [0x00,0x64,0x2b,0xf5]
+ shra.ph $3, $4, 5 # CHECK: shra.ph $3, $4, 5 # encoding: [0x00,0x64,0x53,0x35]
+ shra.qb $3, $4, 5 # CHECK: shra.qb $3, $4, 5 # encoding: [0x00,0x64,0xa1,0xfc]
+ shra_r.ph $3, $4, 5 # CHECK: shra_r.ph $3, $4, 5 # encoding: [0x00,0x64,0x57,0x35]
+ shra_r.qb $3, $4, 5 # CHECK: shra_r.qb $3, $4, 5 # encoding: [0x00,0x64,0xb1,0xfc]
+ shrav.ph $3, $4, $5 # CHECK: shrav.ph $3, $4, $5 # encoding: [0x00,0x85,0x19,0x8d]
+ shrav.qb $3, $4, $5 # CHECK: shrav.qb $3, $4, $5 # encoding: [0x00,0x85,0x19,0xcd]
+ shrav_r.ph $3, $4, $5 # CHECK: shrav_r.ph $3, $4, $5 # encoding: [0x00,0x85,0x1d,0x8d]
+ shrav_r.qb $3, $4, $5 # CHECK: shrav_r.qb $3, $4, $5 # encoding: [0x00,0x85,0x1d,0xcd]
+ shrav_r.w $3, $4, $5 # CHECK: shrav_r.w $3, $4, $5 # encoding: [0x00,0x85,0x1a,0xd5]
+ shra_r.w $3, $4, 5 # CHECK: shra_r.w $3, $4, 5 # encoding: [0x00,0x64,0x2a,0xf5]
+ shrl.ph $3, $4, 5 # CHECK: shrl.ph $3, $4, 5 # encoding: [0x00,0x64,0x53,0xfc]
+ shrl.qb $3, $4, 5 # CHECK: shrl.qb $3, $4, 5 # encoding: [0x00,0x64,0xb8,0x7c]
+ shrlv.ph $3, $4, $5 # CHECK: shrlv.ph $3, $4, $5 # encoding: [0x00,0x85,0x1b,0x15]
+ shrlv.qb $3, $4, $5 # CHECK: shrlv.qb $3, $4, $5 # encoding: [0x00,0x85,0x1b,0x55]
+ subq.ph $3, $4, $5 # CHECK: subq.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1a,0x0d]
+ subq_s.ph $3, $4, $5 # CHECK: subq_s.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1e,0x0d]
+ subq_s.w $3, $4, $5 # CHECK: subq_s.w $3, $4, $5 # encoding: [0x00,0xa4,0x1b,0x45]
+ subqh.ph $3, $4, $5 # CHECK: subqh.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1a,0x4d]
+ subqh_r.ph $3, $4, $5 # CHECK: subqh_r.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1e,0x4d]
+ subqh.w $3, $4, $5 # CHECK: subqh.w $3, $4, $5 # encoding: [0x00,0xa4,0x1a,0x8d]
+ subqh_r.w $3, $4, $5 # CHECK: subqh_r.w $3, $4, $5 # encoding: [0x00,0xa4,0x1e,0x8d]
+ subu.ph $3, $4, $5 # CHECK: subu.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1b,0x0d]
+ subu_s.ph $3, $4, $5 # CHECK: subu_s.ph $3, $4, $5 # encoding: [0x00,0xa4,0x1f,0x0d]
+ subu.qb $3, $4, $5 # CHECK: subu.qb $3, $4, $5 # encoding: [0x00,0xa4,0x1a,0xcd]
+ subu_s.qb $3, $4, $5 # CHECK: subu_s.qb $3, $4, $5 # encoding: [0x00,0xa4,0x1e,0xcd]
+ subuh.qb $3, $4, $5 # CHECK: subuh.qb $3, $4, $5 # encoding: [0x00,0xa4,0x1b,0x4d]
+ subuh_r.qb $3, $4, $5 # CHECK: subuh_r.qb $3, $4, $5 # encoding: [0x00,0xa4,0x1f,0x4d]
+ dpsq_s.w.ph $ac1, $4, $6 # CHECK: dpsq_s.w.ph $ac1, $4, $6 # encoding: [0x00,0xc4,0x46,0xbc]
+ dpsq_sa.l.w $ac1, $4, $6 # CHECK: dpsq_sa.l.w $ac1, $4, $6 # encoding: [0x00,0xc4,0x56,0xbc]
+ dpsu.h.qbl $ac1, $4, $6 # CHECK: dpsu.h.qbl $ac1, $4, $6 # encoding: [0x00,0xc4,0x64,0xbc]
+ dpsu.h.qbr $ac1, $4, $6 # CHECK: dpsu.h.qbr $ac1, $4, $6 # encoding: [0x00,0xc4,0x74,0xbc]
+ dps.w.ph $ac1, $4, $6 # CHECK: dps.w.ph $ac1, $4, $6 # encoding: [0x00,0xc4,0x44,0xbc]
+ dpsqx_s.w.ph $ac1, $4, $6 # CHECK: dpsqx_s.w.ph $ac1, $4, $6 # encoding: [0x00,0xc4,0x66,0xbc]
+ dpsqx_sa.w.ph $ac1, $4, $6 # CHECK: dpsqx_sa.w.ph $ac1, $4, $6 # encoding: [0x00,0xc4,0x76,0xbc]
+ dpsx.w.ph $ac1, $4, $6 # CHECK: dpsx.w.ph $ac1, $4, $6 # encoding: [0x00,0xc4,0x54,0xbc]
+ mul.ph $1, $2, $3 # CHECK: mul.ph $1, $2, $3 # encoding: [0x00,0x62,0x08,0x2d]
+ mul_s.ph $1, $2, $3 # CHECK: mul_s.ph $1, $2, $3 # encoding: [0x00,0x62,0x0c,0x2d]
+ mulq_rs.w $1, $2, $3 # CHECK: mulq_rs.w $1, $2, $3 # encoding: [0x00,0x62,0x09,0x95]
+ mulq_s.ph $1, $2, $3 # CHECK: mulq_s.ph $1, $2, $3 # encoding: [0x00,0x62,0x09,0x55]
+ mulq_s.w $1, $2, $3 # CHECK: mulq_s.w $1, $2, $3 # encoding: [0x00,0x62,0x09,0xd5]
+ muleq_s.w.phl $1, $2, $3 # CHECK: muleq_s.w.phl $1, $2, $3 # encoding: [0x00,0x62,0x08,0x25]
+ muleq_s.w.phr $1, $2, $3 # CHECK: muleq_s.w.phr $1, $2, $3 # encoding: [0x00,0x62,0x08,0x65]
+ muleu_s.ph.qbl $1, $2, $3 # CHECK: muleu_s.ph.qbl $1, $2, $3 # encoding: [0x00,0x62,0x08,0x95]
+ muleu_s.ph.qbr $1, $2, $3 # CHECK: muleu_s.ph.qbr $1, $2, $3 # encoding: [0x00,0x62,0x08,0xd5]
+ mulq_rs.ph $1, $2, $3 # CHECK: mulq_rs.ph $1, $2, $3 # encoding: [0x00,0x62,0x09,0x15]
+ prepend $1, $2, 3 # CHECK: prepend $1, $2, 3 # encoding: [0x00,0x22,0x1a,0x55]
+ wrdsp $5 # CHECK: wrdsp $5 # encoding: [0x00,0xa7,0xd6,0x7c]
+ wrdsp $5, 2 # CHECK: wrdsp $5, 2 # encoding: [0x00,0xa0,0x96,0x7c]
+ wrdsp $5, 31 # CHECK: wrdsp $5 # encoding: [0x00,0xa7,0xd6,0x7c]
diff --git a/test/MC/Mips/micromips-invalid.s b/test/MC/Mips/micromips-invalid.s
index 74a62ceeba0a..7d34e79cf714 100644
--- a/test/MC/Mips/micromips-invalid.s
+++ b/test/MC/Mips/micromips-invalid.s
@@ -65,16 +65,29 @@
sb16 $7, 4($9) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
sh16 $7, 8($9) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
sw16 $7, 4($10) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- cache 256, 8($5) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
- pref 256, 8($5) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ cache -1, 8($5) # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+ cache 32, 8($5) # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+ pref -1, 8($5) # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+ pref 32, 8($5) # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
beqz16 $9, 20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
bnez16 $9, 20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
movep $5, $21, $2, $3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
movep $8, $6, $2, $3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
movep $5, $6, $5, $3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
movep $5, $6, $2, $9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- break 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- break 1024, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- break 7, 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- break 1024, 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- wait 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ break 1024 # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+ break 1024, 5 # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+ break 7, 1024 # CHECK: :[[@LINE]]:12: error: expected 10-bit unsigned immediate
+ break 1024, 1024 # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+ wait 1024 # CHECK: :[[@LINE]]:8: error: expected 10-bit unsigned immediate
+ prefx -1, $8($5) # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+ prefx 32, $8($5) # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+ jraddiusp 1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jraddiusp 2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jraddiusp 3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jraddiusp 10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jraddiusp 18 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jraddiusp 31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jraddiusp 33 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jraddiusp 125 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jraddiusp 132 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
diff --git a/test/MC/Mips/micromips-loadstore-instructions.s b/test/MC/Mips/micromips-loadstore-instructions.s
index f22719dff94e..4865713a3c72 100644
--- a/test/MC/Mips/micromips-loadstore-instructions.s
+++ b/test/MC/Mips/micromips-loadstore-instructions.s
@@ -44,6 +44,14 @@
# CHECK-EL: swm32 $16, $17, 8($sp) # encoding: [0x5d,0x20,0x08,0xd0]
# CHECK-EL: swp $16, 8($4) # encoding: [0x04,0x22,0x08,0x90]
# CHECK-EL: lwp $16, 8($4) # encoding: [0x04,0x22,0x08,0x10]
+# CHECK-EL: lhue $4, 8($2) # encoding: [0x82,0x60,0x08,0x62]
+# CHECK-EL: lbe $4, 8($2) # encoding: [0x82,0x60,0x08,0x68]
+# CHECK-EL: lbue $4, 8($2) # encoding: [0x82,0x60,0x08,0x60]
+# CHECK-EL: lhe $4, 8($2) # encoding: [0x82,0x60,0x08,0x6a]
+# CHECK-EL: lwe $4, 8($2) # encoding: [0x82,0x60,0x08,0x6e]
+# CHECK-EL: sbe $5, 8($4) # encoding: [0xa4,0x60,0x08,0xa8]
+# CHECK-EL: she $5, 8($4) # encoding: [0xa4,0x60,0x08,0xaa]
+# CHECK-EL: swe $5, 8($4) # encoding: [0xa4,0x60,0x08,0xae]
#------------------------------------------------------------------------------
# Big endian
#------------------------------------------------------------------------------
@@ -82,6 +90,14 @@
# CHECK-EB: swm32 $16, $17, 8($sp) # encoding: [0x20,0x5d,0xd0,0x08]
# CHECK-EB: swp $16, 8($4) # encoding: [0x22,0x04,0x90,0x08]
# CHECK-EB: lwp $16, 8($4) # encoding: [0x22,0x04,0x10,0x08]
+# CHECK-EB: lhue $4, 8($2) # encoding: [0x60,0x82,0x62,0x08]
+# CHECK-EB: lbe $4, 8($2) # encoding: [0x60,0x82,0x68,0x08]
+# CHECK-EB: lbue $4, 8($2) # encoding: [0x60,0x82,0x60,0x08]
+# CHECK-EB: lhe $4, 8($2) # encoding: [0x60,0x82,0x6a,0x08]
+# CHECK-EB: lwe $4, 8($2) # encoding: [0x60,0x82,0x6e,0x08]
+# CHECK-EB: sbe $5, 8($4) # encoding: [0x60,0xa4,0xa8,0x08]
+# CHECK-EB: she $5, 8($4) # encoding: [0x60,0xa4,0xaa,0x08]
+# CHECK-EB: swe $5, 8($4) # encoding: [0x60,0xa4,0xae,0x08]
lb $5, 8($4)
lbu $6, 8($4)
lh $2, 8($4)
@@ -117,3 +133,11 @@
swm $16, $17, 8($sp)
swp $16, 8($4)
lwp $16, 8($4)
+ lhue $4, 8($2)
+ lbe $4, 8($2)
+ lbue $4, 8($2)
+ lhe $4, 8($2)
+ lwe $4, 8($2)
+ sbe $5, 8($4)
+ she $5, 8($4)
+ swe $5, 8($4)
diff --git a/test/MC/Mips/micromips-pc16-fixup.s b/test/MC/Mips/micromips-pc16-fixup.s
index 146a1550b499..7725b4e6f0eb 100644
--- a/test/MC/Mips/micromips-pc16-fixup.s
+++ b/test/MC/Mips/micromips-pc16-fixup.s
@@ -1,6 +1,6 @@
# RUN: llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r2 -arch=mips -mattr=+micromips 2>&1 -filetype=obj | FileCheck %s
#
-# CHECK-NOT: LLVM ERROR: out of range PC16 fixup
+# CHECK-NOT: error: out of range PC16 fixup
.text
b foo
diff --git a/test/MC/Mips/micromips/invalid.s b/test/MC/Mips/micromips/invalid.s
new file mode 100644
index 000000000000..b091062fdccf
--- /dev/null
+++ b/test/MC/Mips/micromips/invalid.s
@@ -0,0 +1,35 @@
+# RUN: not llvm-mc %s -triple=mips -show-encoding -mattr=micromips 2>%t1
+# RUN: FileCheck %s < %t1
+
+ break -1 # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+ break 1024 # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+ break -1, 5 # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+ break 1024, 5 # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+ break 7, -1 # CHECK: :[[@LINE]]:12: error: expected 10-bit unsigned immediate
+ break 7, 1024 # CHECK: :[[@LINE]]:12: error: expected 10-bit unsigned immediate
+ break16 -1 # CHECK: :[[@LINE]]:11: error: expected 4-bit unsigned immediate
+ break16 16 # CHECK: :[[@LINE]]:11: error: expected 4-bit unsigned immediate
+ cache -1, 255($7) # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+ cache 32, 255($7) # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+ # FIXME: Check '0 < pos + size <= 32' constraint on ext
+ ext $2, $3, -1, 31 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ ext $2, $3, 32, 31 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ ext $2, $3, 1, 0 # CHECK: :[[@LINE]]:18: error: expected immediate in range 1 .. 32
+ ext $2, $3, 1, 33 # CHECK: :[[@LINE]]:18: error: expected immediate in range 1 .. 32
+ ins $2, $3, -1, 31 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ ins $2, $3, 32, 31 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ jraddiusp -1 # CHECK: :[[@LINE]]:13: error: expected both 7-bit unsigned immediate and multiple of 4
+ jraddiusp -4 # CHECK: :[[@LINE]]:13: error: expected both 7-bit unsigned immediate and multiple of 4
+ jraddiusp 125 # CHECK: :[[@LINE]]:13: error: expected both 7-bit unsigned immediate and multiple of 4
+ jraddiusp 128 # CHECK: :[[@LINE]]:13: error: expected both 7-bit unsigned immediate and multiple of 4
+ pref -1, 255($7) # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+ pref 32, 255($7) # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+ rotr $2, $3, 32 # CHECK: :[[@LINE]]:16: error: expected 5-bit unsigned immediate
+ sdbbp16 -1 # CHECK: :[[@LINE]]:11: error: expected 4-bit unsigned immediate
+ sdbbp16 16 # CHECK: :[[@LINE]]:11: error: expected 4-bit unsigned immediate
+ sll $2, $3, -1 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ sll $2, $3, 32 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ sra $2, $3, -1 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ sra $2, $3, 32 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ srl $2, $3, -1 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ srl $2, $3, 32 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/micromips32r6/invalid.s b/test/MC/Mips/micromips32r6/invalid.s
index 8ba787ae1aa4..41c661b04920 100644
--- a/test/MC/Mips/micromips32r6/invalid.s
+++ b/test/MC/Mips/micromips32r6/invalid.s
@@ -1,6 +1,121 @@
# RUN: not llvm-mc %s -triple=mips -show-encoding -mcpu=mips32r6 -mattr=micromips 2>%t1
# RUN: FileCheck %s < %t1
- break 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- break 1023, 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ addiur1sp $7, 260 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ addiur1sp $7, 241 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: misaligned immediate operand value
+ addiur1sp $8, 240 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ addiur2 $9, $7, -1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ addiur2 $6, $7, 10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ addius5 $7, 9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ addiusp 1032 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ align $4, $2, $3, -1 # CHECK: :[[@LINE]]:21: error: expected 2-bit unsigned immediate
+ align $4, $2, $3, 4 # CHECK: :[[@LINE]]:21: error: expected 2-bit unsigned immediate
+ beqzc16 $9, 20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ beqzc16 $6, 31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch to misaligned address
+ beqzc16 $6, 130 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch target out of range
+ bnezc16 $9, 20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ bnezc16 $6, 31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch to misaligned address
+ bnezc16 $6, 130 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch target out of range
+ break -1 # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+ break 1024 # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+ break -1, 5 # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+ break 1024, 5 # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+ break 7, -1 # CHECK: :[[@LINE]]:12: error: expected 10-bit unsigned immediate
+ break 7, 1024 # CHECK: :[[@LINE]]:12: error: expected 10-bit unsigned immediate
+ break 1023, 1024 # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+ cache -1, 255($7) # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+ cache 32, 255($7) # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+ # FIXME: Check '0 < pos + size <= 32' constraint on ext
+ ext $2, $3, -1, 31 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ ext $2, $3, 32, 31 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ ext $2, $3, 1, 0 # CHECK: :[[@LINE]]:18: error: expected immediate in range 1 .. 32
+ ext $2, $3, 1, 33 # CHECK: :[[@LINE]]:18: error: expected immediate in range 1 .. 32
+ ins $2, $3, -1, 31 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ ins $2, $3, 32, 31 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
ei $32 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swe $33, 8($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swe $5, 8($34) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swe $5, 512($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lbu16 $9, 8($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lbu16 $3, -2($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ lbu16 $3, -2($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ lbu16 $16, 8($9) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lhu16 $9, 4($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lhu16 $3, 64($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ lhu16 $3, 64($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ lhu16 $16, 4($9) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lsa $4, $2, $3, 0 # CHECK: :[[@LINE]]:21: error: expected immediate in range 1 .. 4
+ lsa $4, $2, $3, 5 # CHECK: :[[@LINE]]:21: error: expected immediate in range 1 .. 4
+ lw16 $9, 8($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lw16 $4, 68($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ lw16 $4, 68($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ lw16 $17, 8($10) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ pref -1, 255($7) # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+ pref 32, 255($7) # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+ teq $34, $9, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ teq $8, $35, 6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ teq $8, $9, 16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ tge $34, $9, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tge $8, $35, 6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tge $8, $9, 16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ tgeu $34, $9, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tgeu $8, $35, 6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tgeu $8, $9, 16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ tlt $34, $9, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tlt $8, $35, 6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tlt $8, $9, 16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ tltu $34, $9, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tltu $8, $35, 6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tltu $8, $9, 16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ tne $34, $9, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tne $8, $35, 6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tne $8, $9, 16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ teq $8, $9, $2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tge $8, $9, $2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tgeu $8, $9, $2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tlt $8, $9, $2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tltu $8, $9, $2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tne $8, $9, $2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ wait -1 # CHECK: :[[@LINE]]:8: error: expected 10-bit unsigned immediate
+ wait 1024 # CHECK: :[[@LINE]]:8: error: expected 10-bit unsigned immediate
+ wrpgpr $34, $4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ wrpgpr $3, $33 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ wsbh $34, $4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ wsbh $3, $33 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ jrcaddiusp 1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 18 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 33 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 125 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 132 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ lwm16 $5, $6, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: $16 or $31 expected
+ lwm16 $16, $19, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: consecutive register numbers expected
+ lwm16 $16-$25, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid register operand
+ lwm16 $16, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwm16 $16, $17, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwm16 $16-$20, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwm16 $16, $17, $ra, 8($fp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwm16 $16, $17, $ra, 64($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sb16 $9, 4($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sb16 $3, 64($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ sb16 $16, 4($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sb16 $7, 4($9) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sh16 $9, 8($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sh16 $4, 68($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ sh16 $16, 8($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sh16 $7, 8($9) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sw16 $9, 4($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sw16 $4, 64($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ sw16 $16, 4($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sw16 $7, 4($10) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swm16 $5, $6, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: $16 or $31 expected
+ swm16 $16, $19, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: consecutive register numbers expected
+ swm16 $16-$25, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid register operand
+ swm16 $16, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swm16 $16, $17, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swm16 $16-$20, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swm16 $16, $17, $ra, 8($fp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swm16 $16, $17, $ra, 64($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/micromips32r6/valid.s b/test/MC/Mips/micromips32r6/valid.s
index a49622a507de..62048a91163d 100644
--- a/test/MC/Mips/micromips32r6/valid.s
+++ b/test/MC/Mips/micromips32r6/valid.s
@@ -5,6 +5,15 @@
addiu $3, $4, 1234 # CHECK: addiu $3, $4, 1234 # encoding: [0x30,0x64,0x04,0xd2]
addu $3, $4, $5 # CHECK: addu $3, $4, $5 # encoding: [0x00,0xa4,0x19,0x50]
addiupc $4, 100 # CHECK: addiupc $4, 100 # encoding: [0x78,0x80,0x00,0x19]
+ addiur1sp $7, 4 # CHECK: addiur1sp $7, 4 # encoding: [0x6f,0x83]
+ addiur2 $6, $7, -1 # CHECK: addiur2 $6, $7, -1 # encoding: [0x6f,0x7e]
+ addiur2 $6, $7, 12 # CHECK: addiur2 $6, $7, 12 # encoding: [0x6f,0x76]
+ addius5 $7, -2 # CHECK: addius5 $7, -2 # encoding: [0x4c,0xfc]
+ addiusp -1028 # CHECK: addiusp -1028 # encoding: [0x4f,0xff]
+ addiusp -1032 # CHECK: addiusp -1032 # encoding: [0x4f,0xfd]
+ addiusp 1024 # CHECK: addiusp 1024 # encoding: [0x4c,0x01]
+ addiusp 1028 # CHECK: addiusp 1028 # encoding: [0x4c,0x03]
+ addiusp -16 # CHECK: addiusp -16 # encoding: [0x4f,0xf9]
aluipc $3, 56 # CHECK: aluipc $3, 56 # encoding: [0x78,0x7f,0x00,0x38]
and $3, $4, $5 # CHECK: and $3, $4, $5 # encoding: [0x00,0xa4,0x1a,0x50]
andi $3, $4, 1234 # CHECK: andi $3, $4, 1234 # encoding: [0xd0,0x64,0x04,0xd2]
@@ -17,8 +26,12 @@
bgtzalc $2, 1332 # CHECK: bgtzalc $2, 1332 # encoding: [0xe0,0x40,0x02,0x9a]
bltzalc $2, 1332 # CHECK: bltzalc $2, 1332 # encoding: [0xe0,0x42,0x02,0x9a]
blezalc $2, 1332 # CHECK: blezalc $2, 1332 # encoding: [0xc0,0x40,0x02,0x9a]
- balc 14572256 # CHECK: balc 14572256 # encoding: [0xb4,0x37,0x96,0xb8]
- bc 14572256 # CHECK: bc 14572256 # encoding: [0x94,0x37,0x96,0xb8]
+ balc 7286128 # CHECK: balc 7286128 # encoding: [0xb4,0x37,0x96,0xb8]
+ b 132 # CHECK: bc16 132 # encoding: [0xcc,0x42]
+ bc 7286128 # CHECK: bc 7286128 # encoding: [0x94,0x37,0x96,0xb8]
+ bc16 132 # CHECK: bc16 132 # encoding: [0xcc,0x42]
+ beqzc16 $6, 20 # CHECK: beqzc16 $6, 20 # encoding: [0x8f,0x0a]
+ bnezc16 $6, 20 # CHECK: bnezc16 $6, 20 # encoding: [0xaf,0x0a]
bitswap $4, $2 # CHECK: bitswap $4, $2 # encoding: [0x00,0x44,0x0b,0x3c]
break # CHECK: break # encoding: [0x00,0x00,0x00,0x07]
break 7 # CHECK: break 7 # encoding: [0x00,0x07,0x00,0x07]
@@ -32,12 +45,20 @@
ei # CHECK: ei # encoding: [0x00,0x00,0x57,0x7c]
ei $0 # CHECK: ei # encoding: [0x00,0x00,0x57,0x7c]
ei $10 # CHECK: ei $10 # encoding: [0x00,0x0a,0x57,0x7c]
+ di # CHECK: di # encoding: [0x00,0x00,0x47,0x7c]
+ di $0 # CHECK: di # encoding: [0x00,0x00,0x47,0x7c]
+ di $15 # CHECK: di $15 # encoding: [0x00,0x0f,0x47,0x7c]
eret # CHECK: eret # encoding: [0x00,0x00,0xf3,0x7c]
eretnc # CHECK: eretnc # encoding: [0x00,0x01,0xf3,0x7c]
+ jalr $9 # CHECK: jalr $9 # encoding: [0x45,0x2b]
jialc $5, 256 # CHECK: jialc $5, 256 # encoding: [0x80,0x05,0x01,0x00]
jic $5, 256 # CHECK: jic $5, 256 # encoding: [0xa0,0x05,0x01,0x00]
- lsa $2, $3, $4, 3 # CHECK: lsa $2, $3, $4, 3 # encoding: [0x00,0x43,0x26,0x0f]
+ jrc16 $9 # CHECK: jrc16 $9 # encoding: [0x45,0x23]
+ jrcaddiusp 20 # CHECK: jrcaddiusp 20 # encoding: [0x44,0xb3]
+ lsa $2, $3, $4, 3 # CHECK: lsa $2, $3, $4, 3 # encoding: [0x00,0x43,0x24,0x0f]
lwpc $2,268 # CHECK: lwpc $2, 268 # encoding: [0x78,0x48,0x00,0x43]
+ lwm $16, $17, $ra, 8($sp) # CHECK: lwm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x22]
+ lwm16 $16, $17, $ra, 8($sp) # CHECK: lwm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x22]
mod $3, $4, $5 # CHECK: mod $3, $4, $5 # encoding: [0x00,0xa4,0x19,0x58]
modu $3, $4, $5 # CHECK: modu $3, $4, $5 # encoding: [0x00,0xa4,0x19,0xd8]
mul $3, $4, $5 # CHECK mul $3, $4, $5 # encoding: [0x00,0xa4,0x18,0x18]
@@ -49,13 +70,183 @@
or $3, $4, $5 # CHECK: or $3, $4, $5 # encoding: [0x00,0xa4,0x1a,0x90]
ori $3, $4, 1234 # CHECK: ori $3, $4, 1234 # encoding: [0x50,0x64,0x04,0xd2]
pref 1, 8($5) # CHECK: pref 1, 8($5) # encoding: [0x60,0x25,0x20,0x08]
+ sb16 $3, 4($16) # CHECK: sb16 $3, 4($16) # encoding: [0x89,0x84]
seb $3, $4 # CHECK: seb $3, $4 # encoding: [0x00,0x64,0x2b,0x3c]
seh $3, $4 # CHECK: seh $3, $4 # encoding: [0x00,0x64,0x3b,0x3c]
seleqz $2,$3,$4 # CHECK: seleqz $2, $3, $4 # encoding: [0x00,0x83,0x11,0x40]
selnez $2,$3,$4 # CHECK: selnez $2, $3, $4 # encoding: [0x00,0x83,0x11,0x80]
+ sh16 $4, 8($17) # CHECK: sh16 $4, 8($17) # encoding: [0xaa,0x14]
sll $4, $3, 7 # CHECK: sll $4, $3, 7 # encoding: [0x00,0x83,0x38,0x00]
sub $3, $4, $5 # CHECK: sub $3, $4, $5 # encoding: [0x00,0xa4,0x19,0x90]
subu $3, $4, $5 # CHECK: subu $3, $4, $5 # encoding: [0x00,0xa4,0x19,0xd0]
+ sw $4, 124($sp) # CHECK: sw $4, 124($sp) # encoding: [0xc8,0x9f]
+ sw16 $4, 4($17) # CHECK: sw16 $4, 4($17) # encoding: [0xea,0x11]
+ sw16 $0, 4($17) # CHECK: sw16 $zero, 4($17) # encoding: [0xe8,0x11]
+ swm $16, $17, $ra, 8($sp) # CHECK: swm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x2a]
+ swm16 $16, $17, $ra, 8($sp) # CHECK: swm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x2a]
+ wrpgpr $3, $4 # CHECK: wrpgpr $3, $4 # encoding: [0x00,0x64,0xf1,0x7c]
+ wsbh $3, $4 # CHECK: wsbh $3, $4 # encoding: [0x00,0x64,0x7b,0x3c]
+ pause # CHECK: pause # encoding: [0x00,0x00,0x28,0x00]
+ rdhwr $5, $29, 2 # CHECK: rdhwr $5, $29, 2 # encoding: [0x00,0xbd,0x11,0xc0]
+ rdhwr $5, $29, 0 # CHECK: rdhwr $5, $29 # encoding: [0x00,0xbd,0x01,0xc0]
+ rdhwr $5, $29 # CHECK: rdhwr $5, $29 # encoding: [0x00,0xbd,0x01,0xc0]
+ wait # CHECK: wait # encoding: [0x00,0x00,0x93,0x7c]
+ wait 17 # CHECK: wait 17 # encoding: [0x00,0x11,0x93,0x7c]
+ ssnop # CHECK: ssnop # encoding: [0x00,0x00,0x08,0x00]
+ sync # CHECK: sync # encoding: [0x00,0x00,0x6b,0x7c]
+ sync 17 # CHECK: sync 17 # encoding: [0x00,0x11,0x6b,0x7c]
+ synci 8($5) # CHECK: synci 8($5) # encoding: [0x41,0x85,0x00,0x08]
+ rdpgpr $3, $9 # CHECK: $3, $9 # encoding: [0x00,0x69,0xe1,0x7c]
+ sdbbp # CHECK: sdbbp # encoding: [0x00,0x00,0xdb,0x7c]
+ sdbbp 34 # CHECK: sdbbp 34 # encoding: [0x00,0x22,0xdb,0x7c]
xor $3, $4, $5 # CHECK: xor $3, $4, $5 # encoding: [0x00,0xa4,0x1b,0x10]
xori $3, $4, 1234 # CHECK: xori $3, $4, 1234 # encoding: [0x70,0x64,0x04,0xd2]
-
+ sw $5, 4($6) # CHECK: sw $5, 4($6) # encoding: [0xf8,0xa6,0x00,0x04]
+ swe $5, 8($4) # CHECK: swe $5, 8($4) # encoding: [0x60,0xa4,0xae,0x08]
+ add.s $f3, $f4, $f5 # CHECK: add.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0x30]
+ add.d $f2, $f4, $f6 # CHECK: add.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0x30]
+ sub.s $f3, $f4, $f5 # CHECK: sub.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0x70]
+ sub.d $f2, $f4, $f6 # CHECK: sub.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0x70]
+ mul.s $f3, $f4, $f5 # CHECK: mul.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0xb0]
+ mul.d $f2, $f4, $f6 # CHECK: mul.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0xb0]
+ div.s $f3, $f4, $f5 # CHECK: div.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0xf0]
+ div.d $f2, $f4, $f6 # CHECK: div.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0xf0]
+ maddf.s $f3, $f4, $f5 # CHECK: maddf.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x19,0xb8]
+ maddf.d $f3, $f4, $f5 # CHECK: maddf.d $f3, $f4, $f5 # encoding: [0x54,0xa4,0x1b,0xb8]
+ msubf.s $f3, $f4, $f5 # CHECK: msubf.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x19,0xf8]
+ msubf.d $f3, $f4, $f5 # CHECK: msubf.d $f3, $f4, $f5 # encoding: [0x54,0xa4,0x1b,0xf8]
+ mov.s $f6, $f7 # CHECK: mov.s $f6, $f7 # encoding: [0x54,0xc7,0x00,0x7b]
+ mov.d $f4, $f6 # CHECK: mov.d $f4, $f6 # encoding: [0x54,0x86,0x20,0x7b]
+ neg.s $f6, $f7 # CHECK: neg.s $f6, $f7 # encoding: [0x54,0xc7,0x0b,0x7b]
+ neg.d $f4, $f6 # CHECK: neg.d $f4, $f6 # encoding: [0x54,0x86,0x2b,0x7b]
+ max.s $f5, $f4, $f3 # CHECK: max.s $f5, $f4, $f3 # encoding: [0x54,0x64,0x28,0x0b]
+ max.d $f5, $f4, $f3 # CHECK: max.d $f5, $f4, $f3 # encoding: [0x54,0x64,0x2a,0x0b]
+ maxa.s $f5, $f4, $f3 # CHECK: maxa.s $f5, $f4, $f3 # encoding: [0x54,0x64,0x28,0x2b]
+ maxa.d $f5, $f4, $f3 # CHECK: maxa.d $f5, $f4, $f3 # encoding: [0x54,0x64,0x2a,0x2b]
+ min.s $f5, $f4, $f3 # CHECK: min.s $f5, $f4, $f3 # encoding: [0x54,0x64,0x28,0x03]
+ min.d $f5, $f4, $f3 # CHECK: min.d $f5, $f4, $f3 # encoding: [0x54,0x64,0x2a,0x03]
+ mina.s $f5, $f4, $f3 # CHECK: mina.s $f5, $f4, $f3 # encoding: [0x54,0x64,0x28,0x23]
+ mina.d $f5, $f4, $f3 # CHECK: mina.d $f5, $f4, $f3 # encoding: [0x54,0x64,0x2a,0x23]
+ cmp.af.s $f2, $f3, $f4 # CHECK: cmp.af.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0x05]
+ cmp.af.d $f2, $f3, $f4 # CHECK: cmp.af.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0x15]
+ cmp.un.s $f2, $f3, $f4 # CHECK: cmp.un.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0x45]
+ cmp.un.d $f2, $f3, $f4 # CHECK: cmp.un.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0x55]
+ cmp.eq.s $f2, $f3, $f4 # CHECK: cmp.eq.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0x85]
+ cmp.eq.d $f2, $f3, $f4 # CHECK: cmp.eq.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0x95]
+ cmp.ueq.s $f2, $f3, $f4 # CHECK: cmp.ueq.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0xc5]
+ cmp.ueq.d $f2, $f3, $f4 # CHECK: cmp.ueq.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0xd5]
+ cmp.lt.s $f2, $f3, $f4 # CHECK: cmp.lt.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0x05]
+ cmp.lt.d $f2, $f3, $f4 # CHECK: cmp.lt.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0x15]
+ cmp.ult.s $f2, $f3, $f4 # CHECK: cmp.ult.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0x45]
+ cmp.ult.d $f2, $f3, $f4 # CHECK: cmp.ult.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0x55]
+ cmp.le.s $f2, $f3, $f4 # CHECK: cmp.le.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0x85]
+ cmp.le.d $f2, $f3, $f4 # CHECK: cmp.le.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0x95]
+ cmp.ule.s $f2, $f3, $f4 # CHECK: cmp.ule.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0xc5]
+ cmp.ule.d $f2, $f3, $f4 # CHECK: cmp.ule.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0xd5]
+ cmp.saf.s $f2, $f3, $f4 # CHECK: cmp.saf.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0x05]
+ cmp.saf.d $f2, $f3, $f4 # CHECK: cmp.saf.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0x15]
+ cmp.sun.s $f2, $f3, $f4 # CHECK: cmp.sun.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0x45]
+ cmp.sun.d $f2, $f3, $f4 # CHECK: cmp.sun.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0x55]
+ cmp.seq.s $f2, $f3, $f4 # CHECK: cmp.seq.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0x85]
+ cmp.seq.d $f2, $f3, $f4 # CHECK: cmp.seq.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0x95]
+ cmp.sueq.s $f2, $f3, $f4 # CHECK: cmp.sueq.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0xc5]
+ cmp.sueq.d $f2, $f3, $f4 # CHECK: cmp.sueq.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0xd5]
+ cmp.slt.s $f2, $f3, $f4 # CHECK: cmp.slt.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x05]
+ cmp.slt.d $f2, $f3, $f4 # CHECK: cmp.slt.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x15]
+ cmp.sult.s $f2, $f3, $f4 # CHECK: cmp.sult.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x45]
+ cmp.sult.d $f2, $f3, $f4 # CHECK: cmp.sult.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x55]
+ cmp.sle.s $f2, $f3, $f4 # CHECK: cmp.sle.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x85]
+ cmp.sle.d $f2, $f3, $f4 # CHECK: cmp.sle.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x95]
+ cmp.sule.s $f2, $f3, $f4 # CHECK: cmp.sule.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0xc5]
+ cmp.sule.d $f2, $f3, $f4 # CHECK: cmp.sule.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0xd5]
+ cvt.l.s $f3, $f4 # CHECK: cvt.l.s $f3, $f4 # encoding: [0x54,0x64,0x01,0x3b]
+ cvt.l.d $f3, $f4 # CHECK: cvt.l.d $f3, $f4 # encoding: [0x54,0x64,0x41,0x3b]
+ cvt.w.s $f3, $f4 # CHECK: cvt.w.s $f3, $f4 # encoding: [0x54,0x64,0x09,0x3b]
+ cvt.w.d $f3, $f4 # CHECK: cvt.w.d $f3, $f4 # encoding: [0x54,0x64,0x49,0x3b]
+ cvt.d.s $f2, $f4 # CHECK: cvt.d.s $f2, $f4 # encoding: [0x54,0x44,0x13,0x7b]
+ cvt.d.w $f2, $f4 # CHECK: cvt.d.w $f2, $f4 # encoding: [0x54,0x44,0x33,0x7b]
+ cvt.d.l $f2, $f4 # CHECK: cvt.d.l $f2, $f4 # encoding: [0x54,0x44,0x53,0x7b]
+ cvt.s.d $f2, $f4 # CHECK: cvt.s.d $f2, $f4 # encoding: [0x54,0x44,0x1b,0x7b]
+ cvt.s.w $f3, $f4 # CHECK: cvt.s.w $f3, $f4 # encoding: [0x54,0x64,0x3b,0x7b]
+ cvt.s.l $f3, $f4 # CHECK: cvt.s.l $f3, $f4 # encoding: [0x54,0x64,0x5b,0x7b]
+ abs.s $f3, $f5 # CHECK: abs.s $f3, $f5 # encoding: [0x54,0x65,0x03,0x7b]
+ abs.d $f2, $f4 # CHECK: abs.d $f2, $f4 # encoding: [0x54,0x44,0x23,0x7b]
+ floor.l.s $f3, $f5 # CHECK: floor.l.s $f3, $f5 # encoding: [0x54,0x65,0x03,0x3b]
+ floor.l.d $f2, $f4 # CHECK: floor.l.d $f2, $f4 # encoding: [0x54,0x44,0x43,0x3b]
+ floor.w.s $f3, $f5 # CHECK: floor.w.s $f3, $f5 # encoding: [0x54,0x65,0x0b,0x3b]
+ floor.w.d $f2, $f4 # CHECK: floor.w.d $f2, $f4 # encoding: [0x54,0x44,0x4b,0x3b]
+ ceil.l.s $f3, $f5 # CHECK: ceil.l.s $f3, $f5 # encoding: [0x54,0x65,0x13,0x3b]
+ ceil.l.d $f2, $f4 # CHECK: ceil.l.d $f2, $f4 # encoding: [0x54,0x44,0x53,0x3b]
+ ceil.w.s $f3, $f5 # CHECK: ceil.w.s $f3, $f5 # encoding: [0x54,0x65,0x1b,0x3b]
+ ceil.w.d $f2, $f4 # CHECK: ceil.w.d $f2, $f4 # encoding: [0x54,0x44,0x5b,0x3b]
+ trunc.l.s $f3, $f5 # CHECK: trunc.l.s $f3, $f5 # encoding: [0x54,0x65,0x23,0x3b]
+ trunc.l.d $f2, $f4 # CHECK: trunc.l.d $f2, $f4 # encoding: [0x54,0x44,0x63,0x3b]
+ trunc.w.s $f3, $f5 # CHECK: trunc.w.s $f3, $f5 # encoding: [0x54,0x65,0x2b,0x3b]
+ trunc.w.d $f2, $f4 # CHECK: trunc.w.d $f2, $f4 # encoding: [0x54,0x44,0x6b,0x3b]
+ sqrt.s $f3, $f5 # CHECK: sqrt.s $f3, $f5 # encoding: [0x54,0x65,0x0a,0x3b]
+ sqrt.d $f2, $f4 # CHECK: sqrt.d $f2, $f4 # encoding: [0x54,0x44,0x4a,0x3b]
+ rsqrt.s $f3, $f5 # CHECK: rsqrt.s $f3, $f5 # encoding: [0x54,0x65,0x02,0x3b]
+ rsqrt.d $f2, $f4 # CHECK: rsqrt.d $f2, $f4 # encoding: [0x54,0x44,0x42,0x3b]
+ lw $3, 32($gp) # CHECK: lw $3, 32($gp) # encoding: [0x65,0x88]
+ lw $3, 24($sp) # CHECK: lw $3, 24($sp) # encoding: [0x48,0x66]
+ lw16 $4, 8($17) # CHECK: lw16 $4, 8($17) # encoding: [0x6a,0x12]
+ lhu16 $3, 4($16) # CHECK: lhu16 $3, 4($16) # encoding: [0x29,0x82]
+ lbu16 $3, 4($17) # CHECK: lbu16 $3, 4($17) # encoding: [0x09,0x94]
+ lbu16 $3, -1($17) # CHECK: lbu16 $3, -1($17) # encoding: [0x09,0x9f]
+ sb $4, 6($5) # CHECK: sb $4, 6($5) # encoding: [0x18,0x85,0x00,0x06]
+ sbe $4, 6($5) # CHECK: sbe $4, 6($5) # encoding: [0x60,0x85,0xa8,0x06]
+ sce $4, 6($5) # CHECK: sce $4, 6($5) # encoding: [0x60,0x85,0xac,0x06]
+ sh $4, 6($5) # CHECK: sh $4, 6($5) # encoding: [0x38,0x85,0x00,0x06]
+ she $4, 6($5) # CHECK: she $4, 6($5) # encoding: [0x60,0x85,0xaa,0x06]
+ lle $4, 6($5) # CHECK: lle $4, 6($5) # encoding: [0x60,0x85,0x6c,0x06]
+ lwe $4, 6($5) # CHECK: lwe $4, 6($5) # encoding: [0x60,0x85,0x6e,0x06]
+ lw $4, 6($5) # CHECK: lw $4, 6($5) # encoding: [0xfc,0x85,0x00,0x06]
+ lui $6, 17767 # CHECK: lui $6, 17767 # encoding: [0x10,0xc0,0x45,0x67]
+ addu16 $6, $17, $4 # CHECK: addu16 $6, $17, $4 # encoding: [0x04,0xcc]
+ and16 $16, $2 # CHECK: and16 $16, $2 # encoding: [0x44,0x21]
+ andi16 $4, $5, 8 # CHECK: andi16 $4, $5, 8 # encoding: [0x2e,0x56]
+ not16 $4, $7 # CHECK: not16 $4, $7 # encoding: [0x46,0x70]
+ or16 $3, $7 # CHECK: or16 $3, $7 # encoding: [0x45,0xf9]
+ sll16 $3, $6, 8 # CHECK: sll16 $3, $6, 8 # encoding: [0x25,0xe0]
+ srl16 $3, $6, 8 # CHECK: srl16 $3, $6, 8 # encoding: [0x25,0xe1]
+ prefe 1, 8($5) # CHECK: prefe 1, 8($5) # encoding: [0x60,0x25,0xa4,0x08]
+ cachee 1, 8($5) # CHECK: cachee 1, 8($5) # encoding: [0x60,0x25,0xa6,0x08]
+ teq $8, $9 # CHECK: teq $8, $9 # encoding: [0x01,0x28,0x00,0x3c]
+ teq $5, $7, 15 # CHECK: teq $5, $7, 15 # encoding: [0x00,0xe5,0xf0,0x3c]
+ tge $7, $10 # CHECK: tge $7, $10 # encoding: [0x01,0x47,0x02,0x3c]
+ tge $7, $19, 15 # CHECK: tge $7, $19, 15 # encoding: [0x02,0x67,0xf2,0x3c]
+ tgeu $22, $gp # CHECK: tgeu $22, $gp # encoding: [0x03,0x96,0x04,0x3c]
+ tgeu $20, $14, 15 # CHECK: tgeu $20, $14, 15 # encoding: [0x01,0xd4,0xf4,0x3c]
+ tlt $15, $13 # CHECK: tlt $15, $13 # encoding: [0x01,0xaf,0x08,0x3c]
+ tlt $2, $19, 15 # CHECK: tlt $2, $19, 15 # encoding: [0x02,0x62,0xf8,0x3c]
+ tltu $11, $16 # CHECK: tltu $11, $16 # encoding: [0x02,0x0b,0x0a,0x3c]
+ tltu $16, $sp, 15 # CHECK: tltu $16, $sp, 15 # encoding: [0x03,0xb0,0xfa,0x3c]
+ tne $6, $17 # CHECK: tne $6, $17 # encoding: [0x02,0x26,0x0c,0x3c]
+ tne $7, $8, 15 # CHECK: tne $7, $8, 15 # encoding: [0x01,0x07,0xfc,0x3c]
+ break16 8 # CHECK: break16 8 # encoding: [0x46,0x1b]
+ li16 $3, -1 # CHECK: li16 $3, -1 # encoding: [0xed,0xff]
+ move16 $3, $5 # CHECK: move16 $3, $5 # encoding: [0x0c,0x65]
+ sdbbp16 8 # CHECK: sdbbp16 8 # encoding: [0x46,0x3b]
+ subu16 $5, $16, $3 # CHECK: subu16 $5, $16, $3 # encoding: [0x04,0x3b]
+ xor16 $17, $5 # CHECK: xor16 $17, $5 # encoding: [0x44,0xd8]
+ lb $4, 8($5) # CHECK: lb $4, 8($5) # encoding: [0x1c,0x85,0x00,0x08]
+ lbu $4, 8($5) # CHECK: lbu $4, 8($5) # encoding: [0x14,0x85,0x00,0x08]
+ lbe $4, 8($5) # CHECK: lbe $4, 8($5) # encoding: [0x60,0x85,0x68,0x08]
+ lbue $4, 8($5) # CHECK: lbue $4, 8($5) # encoding: [0x60,0x85,0x60,0x08]
+ recip.s $f2, $f4 # CHECK: recip.s $f2, $f4 # encoding: [0x54,0x44,0x12,0x3b]
+ recip.d $f2, $f4 # CHECK: recip.d $f2, $f4 # encoding: [0x54,0x44,0x52,0x3b]
+ rint.s $f2, $f4 # CHECK: rint.s $f2, $f4 # encoding: [0x54,0x82,0x00,0x20]
+ rint.d $f2, $f4 # CHECK: rint.d $f2, $f4 # encoding: [0x54,0x82,0x02,0x20]
+ round.l.s $f2, $f4 # CHECK: round.l.s $f2, $f4 # encoding: [0x54,0x44,0x33,0x3b]
+ round.l.d $f2, $f4 # CHECK: round.l.d $f2, $f4 # encoding: [0x54,0x44,0x73,0x3b]
+ round.w.s $f2, $f4 # CHECK: round.w.s $f2, $f4 # encoding: [0x54,0x44,0x3b,0x3b]
+ round.w.d $f2, $f4 # CHECK: round.w.d $f2, $f4 # encoding: [0x54,0x44,0x7b,0x3b]
+ sel.s $f1, $f1, $f2 # CHECK: sel.s $f1, $f1, $f2 # encoding: [0x54,0x41,0x08,0xb8]
+ sel.d $f0, $f2, $f4 # CHECK: sel.d $f0, $f2, $f4 # encoding: [0x54,0x82,0x02,0xb8]
+ seleqz.s $f1, $f2, $f3 # CHECK: seleqz.s $f1, $f2, $f3 # encoding: [0x54,0x62,0x08,0x38]
+ seleqz.d $f2, $f4, $f8 # CHECK: seleqz.d $f2, $f4, $f8 # encoding: [0x55,0x04,0x12,0x38]
+ selnez.s $f1, $f2, $f3 # CHECK: selnez.s $f1, $f2, $f3 # encoding: [0x54,0x62,0x08,0x78]
+ selnez.d $f2, $f4, $f8 # CHECK: selnez.d $f2, $f4, $f8 # encoding: [0x55,0x04,0x12,0x78]
+ class.s $f2, $f3 # CHECK: class.s $f2, $f3 # encoding: [0x54,0x62,0x00,0x60]
+ class.d $f2, $f4 # CHECK: class.d $f2, $f4 # encoding: [0x54,0x82,0x02,0x60]
+ deret # CHECK: deret # encoding: [0x00,0x00,0xe3,0x7c]
diff --git a/test/MC/Mips/micromips64r6/invalid.s b/test/MC/Mips/micromips64r6/invalid.s
new file mode 100644
index 000000000000..df1005dfa0b1
--- /dev/null
+++ b/test/MC/Mips/micromips64r6/invalid.s
@@ -0,0 +1,145 @@
+# RUN: not llvm-mc %s -triple=mips -show-encoding -mcpu=mips64r6 -mattr=micromips 2>%t1
+# RUN: FileCheck %s < %t1
+
+ addiur1sp $7, 260 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ addiur1sp $7, 241 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: misaligned immediate operand value
+ addiur1sp $8, 240 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ addiur2 $9, $7, -1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ addiur2 $6, $7, 10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ addius5 $7, 9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ addiusp 1032 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ align $4, $2, $3, -1 # CHECK: :[[@LINE]]:21: error: expected 2-bit unsigned immediate
+ align $4, $2, $3, 4 # CHECK: :[[@LINE]]:21: error: expected 2-bit unsigned immediate
+ beqzc16 $9, 20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ beqzc16 $6, 31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch to misaligned address
+ beqzc16 $6, 130 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch target out of range
+ bnezc16 $9, 20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ bnezc16 $6, 31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch to misaligned address
+ bnezc16 $6, 130 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch target out of range
+ cache -1, 255($7) # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+ cache 32, 255($7) # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+ # FIXME: Check various 'pos + size' constraints on dext*
+ dext $2, $3, -1, 1 # CHECK: :[[@LINE]]:16: error: expected 5-bit unsigned immediate
+ dext $2, $3, 32, 1 # CHECK: :[[@LINE]]:16: error: expected 5-bit unsigned immediate
+ dext $2, $3, 1, 0 # CHECK: :[[@LINE]]:19: error: expected immediate in range 1 .. 32
+ dext $2, $3, 1, 33 # CHECK: :[[@LINE]]:19: error: expected immediate in range 1 .. 32
+ dextm $2, $3, -1, 1 # CHECK: :[[@LINE]]:17: error: expected 5-bit unsigned immediate
+ dextm $2, $3, 32, 1 # CHECK: :[[@LINE]]:17: error: expected 5-bit unsigned immediate
+ dextm $2, $3, -1, 33 # CHECK: :[[@LINE]]:17: error: expected 5-bit unsigned immediate
+ dextm $2, $3, 32, 33 # CHECK: :[[@LINE]]:17: error: expected 5-bit unsigned immediate
+ dextm $2, $3, 1, 32 # CHECK: :[[@LINE]]:20: error: expected immediate in range 33 .. 64
+ dextm $2, $3, 1, 65 # CHECK: :[[@LINE]]:20: error: expected immediate in range 33 .. 64
+ dextu $2, $3, 31, 1 # CHECK: :[[@LINE]]:17: error: expected immediate in range 32 .. 63
+ dextu $2, $3, 64, 1 # CHECK: :[[@LINE]]:17: error: expected immediate in range 32 .. 63
+ dextu $2, $3, 32, 0 # CHECK: :[[@LINE]]:21: error: expected immediate in range 1 .. 32
+ dextu $2, $3, 32, 33 # CHECK: :[[@LINE]]:21: error: expected immediate in range 1 .. 32
+ # FIXME: Check size on dins*
+ dins $2, $3, -1, 1 # CHECK: :[[@LINE]]:16: error: expected 6-bit unsigned immediate
+ dins $2, $3, 64, 1 # CHECK: :[[@LINE]]:16: error: expected 6-bit unsigned immediate
+ dinsm $2, $3, -1, 1 # CHECK: :[[@LINE]]:17: error: expected 5-bit unsigned immediate
+ dinsm $2, $3, 32, 1 # CHECK: :[[@LINE]]:17: error: expected 5-bit unsigned immediate
+ dinsu $2, $3, 31, 1 # CHECK: :[[@LINE]]:17: error: expected immediate in range 32 .. 63
+ dinsu $2, $3, 64, 1 # CHECK: :[[@LINE]]:17: error: expected immediate in range 32 .. 63
+ # FIXME: Check '0 < pos + size <= 32' constraint on ext
+ ext $2, $3, -1, 31 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ ext $2, $3, 32, 31 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ ext $2, $3, 1, 0 # CHECK: :[[@LINE]]:18: error: expected immediate in range 1 .. 32
+ ext $2, $3, 1, 33 # CHECK: :[[@LINE]]:18: error: expected immediate in range 1 .. 32
+ ins $2, $3, -1, 31 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ ins $2, $3, 32, 31 # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ dalign $4, $2, $3, -1 # CHECK: :[[@LINE]]:23: error: expected 3-bit unsigned immediate
+ dalign $4, $2, $3, 8 # CHECK: :[[@LINE]]:23: error: expected 3-bit unsigned immediate
+ lbu16 $9, 8($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lbu16 $3, -2($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ lbu16 $3, -2($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ lbu16 $16, 8($9) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lhu16 $9, 4($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lhu16 $3, 64($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ lhu16 $3, 64($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ lhu16 $16, 4($9) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lsa $4, $2, $3, 0 # CHECK: :[[@LINE]]:21: error: expected immediate in range 1 .. 4
+ lsa $4, $2, $3, 5 # CHECK: :[[@LINE]]:21: error: expected immediate in range 1 .. 4
+ lw16 $9, 8($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lw16 $4, 68($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ lw16 $4, 68($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ lw16 $17, 8($10) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ ddiv $32, $4, $5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ ddiv $3, $34, $5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ ddiv $3, $4, $35 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ dmod $32, $4, $5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ dmod $3, $34, $5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ dmod $3, $4, $35 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ ddivu $32, $4, $5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ ddivu $3, $34, $5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ ddivu $3, $4, $35 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ dmodu $32, $4, $5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ dmodu $3, $34, $5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ dmodu $3, $4, $35 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ pref -1, 255($7) # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+ pref 32, 255($7) # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+ teq $34, $9, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ teq $8, $35, 6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ teq $8, $9, 16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ tge $34, $9, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tge $8, $35, 6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tge $8, $9, 16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ tgeu $34, $9, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tgeu $8, $35, 6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tgeu $8, $9, 16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ tlt $34, $9, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tlt $8, $35, 6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tlt $8, $9, 16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ tltu $34, $9, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tltu $8, $35, 6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tltu $8, $9, 16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ tne $34, $9, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tne $8, $35, 6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tne $8, $9, 16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ teq $8, $9, $2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tge $8, $9, $2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tgeu $8, $9, $2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tlt $8, $9, $2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tltu $8, $9, $2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ tne $8, $9, $2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ wrpgpr $34, $4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ wrpgpr $3, $33 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ wsbh $34, $4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ wsbh $3, $33 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ jrcaddiusp 1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 18 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 33 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 125 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 128 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ jrcaddiusp 132 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+ lwm16 $5, $6, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: $16 or $31 expected
+ lwm16 $16, $19, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: consecutive register numbers expected
+ lwm16 $16-$25, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid register operand
+ lwm16 $16, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwm16 $16, $17, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwm16 $16-$20, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwm16 $16, $17, $ra, 8($fp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwm16 $16, $17, $ra, 64($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sb16 $9, 4($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sb16 $3, 64($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ sb16 $16, 4($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sb16 $7, 4($9) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sh16 $9, 8($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sh16 $4, 68($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ sh16 $16, 8($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sh16 $7, 8($9) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sw16 $9, 4($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sw16 $4, 64($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+ sw16 $16, 4($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ sw16 $7, 4($10) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swm16 $5, $6, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: $16 or $31 expected
+ swm16 $16, $19, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: consecutive register numbers expected
+ swm16 $16-$25, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid register operand
+ swm16 $16, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swm16 $16, $17, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swm16 $16-$20, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swm16 $16, $17, $ra, 8($fp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swm16 $16, $17, $ra, 64($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/micromips64r6/valid.s b/test/MC/Mips/micromips64r6/valid.s
new file mode 100644
index 000000000000..b41c86bd553c
--- /dev/null
+++ b/test/MC/Mips/micromips64r6/valid.s
@@ -0,0 +1,154 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r6 -mattr=micromips | FileCheck %s
+a:
+ .set noat
+ addiur1sp $7, 4 # CHECK: addiur1sp $7, 4 # encoding: [0x6f,0x83]
+ addiur2 $6, $7, -1 # CHECK: addiur2 $6, $7, -1 # encoding: [0x6f,0x7e]
+ addiur2 $6, $7, 12 # CHECK: addiur2 $6, $7, 12 # encoding: [0x6f,0x76]
+ addius5 $7, -2 # CHECK: addius5 $7, -2 # encoding: [0x4c,0xfc]
+ addiusp -1028 # CHECK: addiusp -1028 # encoding: [0x4f,0xff]
+ addiusp -1032 # CHECK: addiusp -1032 # encoding: [0x4f,0xfd]
+ addiusp 1024 # CHECK: addiusp 1024 # encoding: [0x4c,0x01]
+ addiusp 1028 # CHECK: addiusp 1028 # encoding: [0x4c,0x03]
+ addiusp -16 # CHECK: addiusp -16 # encoding: [0x4f,0xf9]
+ b 132 # CHECK: bc16 132 # encoding: [0xcc,0x42]
+ bc16 132 # CHECK: bc16 132 # encoding: [0xcc,0x42]
+ beqzc16 $6, 20 # CHECK: beqzc16 $6, 20 # encoding: [0x8f,0x0a]
+ bnezc16 $6, 20 # CHECK: bnezc16 $6, 20 # encoding: [0xaf,0x0a]
+ daui $3, $4, 5 # CHECK: daui $3, $4, 5 # encoding: [0xf0,0x64,0x00,0x05]
+ dahi $3, 4 # CHECK: dahi $3, 4 # encoding: [0x42,0x23,0x00,0x04]
+ dati $3, 4 # CHECK: dati $3, 4 # encoding: [0x42,0x03,0x00,0x04]
+ dext $9, $6, 3, 7 # CHECK: dext $9, $6, 3, 7 # encoding: [0x59,0x26,0x30,0xec]
+ dextm $9, $6, 3, 39 # CHECK: dextm $9, $6, 3, 39 # encoding: [0x59,0x26,0x30,0xe4]
+ dextu $9, $6, 35, 7 # CHECK: dextu $9, $6, 35, 7 # encoding: [0x59,0x26,0x30,0xd4]
+ dalign $4, $2, $3, 5 # CHECK: dalign $4, $2, $3, 5 # encoding: [0x58,0x43,0x25,0x1c]
+ lw $3, 32($gp) # CHECK: lw $3, 32($gp) # encoding: [0x65,0x88]
+ lw $3, 24($sp) # CHECK: lw $3, 24($sp) # encoding: [0x48,0x66]
+ lw16 $4, 8($17) # CHECK: lw16 $4, 8($17) # encoding: [0x6a,0x12]
+ lhu16 $3, 4($16) # CHECK: lhu16 $3, 4($16) # encoding: [0x29,0x82]
+ lbu16 $3, 4($17) # CHECK: lbu16 $3, 4($17) # encoding: [0x09,0x94]
+ lbu16 $3, -1($17) # CHECK: lbu16 $3, -1($17) # encoding: [0x09,0x9f]
+ ddiv $3, $4, $5 # CHECK: ddiv $3, $4, $5 # encoding: [0x58,0x64,0x29,0x18]
+ dmod $3, $4, $5 # CHECK: dmod $3, $4, $5 # encoding: [0x58,0x64,0x29,0x58]
+ ddivu $3, $4, $5 # CHECK: ddivu $3, $4, $5 # encoding: [0x58,0x64,0x29,0x98]
+ dmodu $3, $4, $5 # CHECK: dmodu $3, $4, $5 # encoding: [0x58,0x64,0x29,0xd8]
+ add.s $f3, $f4, $f5 # CHECK: add.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0x30]
+ add.d $f2, $f4, $f6 # CHECK: add.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0x30]
+ sub.s $f3, $f4, $f5 # CHECK: sub.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0x70]
+ sub.d $f2, $f4, $f6 # CHECK: sub.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0x70]
+ mul.s $f3, $f4, $f5 # CHECK: mul.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0xb0]
+ mul.d $f2, $f4, $f6 # CHECK: mul.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0xb0]
+ div.s $f3, $f4, $f5 # CHECK: div.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0xf0]
+ div.d $f2, $f4, $f6 # CHECK: div.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0xf0]
+ maddf.s $f3, $f4, $f5 # CHECK: maddf.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x19,0xb8]
+ maddf.d $f3, $f4, $f5 # CHECK: maddf.d $f3, $f4, $f5 # encoding: [0x54,0xa4,0x1b,0xb8]
+ msubf.s $f3, $f4, $f5 # CHECK: msubf.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x19,0xf8]
+ msubf.d $f3, $f4, $f5 # CHECK: msubf.d $f3, $f4, $f5 # encoding: [0x54,0xa4,0x1b,0xf8]
+ mov.s $f6, $f7 # CHECK: mov.s $f6, $f7 # encoding: [0x54,0xc7,0x00,0x7b]
+ mov.d $f4, $f6 # CHECK: mov.d $f4, $f6 # encoding: [0x54,0x86,0x20,0x7b]
+ neg.s $f6, $f7 # CHECK: neg.s $f6, $f7 # encoding: [0x54,0xc7,0x0b,0x7b]
+ neg.d $f4, $f6 # CHECK: neg.d $f4, $f6 # encoding: [0x54,0x86,0x2b,0x7b]
+ max.s $f5, $f4, $f3 # CHECK: max.s $f5, $f4, $f3 # encoding: [0x54,0x64,0x28,0x0b]
+ max.d $f5, $f4, $f3 # CHECK: max.d $f5, $f4, $f3 # encoding: [0x54,0x64,0x2a,0x0b]
+ maxa.s $f5, $f4, $f3 # CHECK: maxa.s $f5, $f4, $f3 # encoding: [0x54,0x64,0x28,0x2b]
+ maxa.d $f5, $f4, $f3 # CHECK: maxa.d $f5, $f4, $f3 # encoding: [0x54,0x64,0x2a,0x2b]
+ min.s $f5, $f4, $f3 # CHECK: min.s $f5, $f4, $f3 # encoding: [0x54,0x64,0x28,0x03]
+ min.d $f5, $f4, $f3 # CHECK: min.d $f5, $f4, $f3 # encoding: [0x54,0x64,0x2a,0x03]
+ mina.s $f5, $f4, $f3 # CHECK: mina.s $f5, $f4, $f3 # encoding: [0x54,0x64,0x28,0x23]
+ mina.d $f5, $f4, $f3 # CHECK: mina.d $f5, $f4, $f3 # encoding: [0x54,0x64,0x2a,0x23]
+ cmp.af.s $f2, $f3, $f4 # CHECK: cmp.af.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0x05]
+ cmp.af.d $f2, $f3, $f4 # CHECK: cmp.af.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0x15]
+ cmp.un.s $f2, $f3, $f4 # CHECK: cmp.un.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0x45]
+ cmp.un.d $f2, $f3, $f4 # CHECK: cmp.un.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0x55]
+ cmp.eq.s $f2, $f3, $f4 # CHECK: cmp.eq.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0x85]
+ cmp.eq.d $f2, $f3, $f4 # CHECK: cmp.eq.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0x95]
+ cmp.ueq.s $f2, $f3, $f4 # CHECK: cmp.ueq.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0xc5]
+ cmp.ueq.d $f2, $f3, $f4 # CHECK: cmp.ueq.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x10,0xd5]
+ cmp.lt.s $f2, $f3, $f4 # CHECK: cmp.lt.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0x05]
+ cmp.lt.d $f2, $f3, $f4 # CHECK: cmp.lt.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0x15]
+ cmp.ult.s $f2, $f3, $f4 # CHECK: cmp.ult.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0x45]
+ cmp.ult.d $f2, $f3, $f4 # CHECK: cmp.ult.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0x55]
+ cmp.le.s $f2, $f3, $f4 # CHECK: cmp.le.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0x85]
+ cmp.le.d $f2, $f3, $f4 # CHECK: cmp.le.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0x95]
+ cmp.ule.s $f2, $f3, $f4 # CHECK: cmp.ule.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0xc5]
+ cmp.ule.d $f2, $f3, $f4 # CHECK: cmp.ule.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x11,0xd5]
+ cmp.saf.s $f2, $f3, $f4 # CHECK: cmp.saf.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0x05]
+ cmp.saf.d $f2, $f3, $f4 # CHECK: cmp.saf.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0x15]
+ cmp.sun.s $f2, $f3, $f4 # CHECK: cmp.sun.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0x45]
+ cmp.sun.d $f2, $f3, $f4 # CHECK: cmp.sun.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0x55]
+ cmp.seq.s $f2, $f3, $f4 # CHECK: cmp.seq.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0x85]
+ cmp.seq.d $f2, $f3, $f4 # CHECK: cmp.seq.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0x95]
+ cmp.sueq.s $f2, $f3, $f4 # CHECK: cmp.sueq.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0xc5]
+ cmp.sueq.d $f2, $f3, $f4 # CHECK: cmp.sueq.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0xd5]
+ cmp.slt.s $f2, $f3, $f4 # CHECK: cmp.slt.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x05]
+ cmp.slt.d $f2, $f3, $f4 # CHECK: cmp.slt.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x15]
+ cmp.sult.s $f2, $f3, $f4 # CHECK: cmp.sult.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x45]
+ cmp.sult.d $f2, $f3, $f4 # CHECK: cmp.sult.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x55]
+ cmp.sle.s $f2, $f3, $f4 # CHECK: cmp.sle.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x85]
+ cmp.sle.d $f2, $f3, $f4 # CHECK: cmp.sle.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x95]
+ cmp.sule.s $f2, $f3, $f4 # CHECK: cmp.sule.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0xc5]
+ cmp.sule.d $f2, $f3, $f4 # CHECK: cmp.sule.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0xd5]
+ cvt.l.s $f3, $f4 # CHECK: cvt.l.s $f3, $f4 # encoding: [0x54,0x64,0x01,0x3b]
+ cvt.l.d $f3, $f4 # CHECK: cvt.l.d $f3, $f4 # encoding: [0x54,0x64,0x41,0x3b]
+ cvt.w.s $f3, $f4 # CHECK: cvt.w.s $f3, $f4 # encoding: [0x54,0x64,0x09,0x3b]
+ cvt.w.d $f3, $f4 # CHECK: cvt.w.d $f3, $f4 # encoding: [0x54,0x64,0x49,0x3b]
+ cvt.d.s $f2, $f4 # CHECK: cvt.d.s $f2, $f4 # encoding: [0x54,0x44,0x13,0x7b]
+ cvt.d.w $f2, $f4 # CHECK: cvt.d.w $f2, $f4 # encoding: [0x54,0x44,0x33,0x7b]
+ cvt.d.l $f2, $f4 # CHECK: cvt.d.l $f2, $f4 # encoding: [0x54,0x44,0x53,0x7b]
+ cvt.s.d $f2, $f4 # CHECK: cvt.s.d $f2, $f4 # encoding: [0x54,0x44,0x1b,0x7b]
+ cvt.s.w $f3, $f4 # CHECK: cvt.s.w $f3, $f4 # encoding: [0x54,0x64,0x3b,0x7b]
+ cvt.s.l $f3, $f4 # CHECK: cvt.s.l $f3, $f4 # encoding: [0x54,0x64,0x5b,0x7b]
+ teq $8, $9 # CHECK: teq $8, $9 # encoding: [0x01,0x28,0x00,0x3c]
+ teq $5, $7, 15 # CHECK: teq $5, $7, 15 # encoding: [0x00,0xe5,0xf0,0x3c]
+ tge $7, $10 # CHECK: tge $7, $10 # encoding: [0x01,0x47,0x02,0x3c]
+ tge $7, $19, 15 # CHECK: tge $7, $19, 15 # encoding: [0x02,0x67,0xf2,0x3c]
+ tgeu $22, $gp # CHECK: tgeu $22, $gp # encoding: [0x03,0x96,0x04,0x3c]
+ tgeu $20, $14, 15 # CHECK: tgeu $20, $14, 15 # encoding: [0x01,0xd4,0xf4,0x3c]
+ tlt $15, $13 # CHECK: tlt $15, $13 # encoding: [0x01,0xaf,0x08,0x3c]
+ tlt $2, $19, 15 # CHECK: tlt $2, $19, 15 # encoding: [0x02,0x62,0xf8,0x3c]
+ tltu $11, $16 # CHECK: tltu $11, $16 # encoding: [0x02,0x0b,0x0a,0x3c]
+ tltu $16, $sp, 15 # CHECK: tltu $16, $sp, 15 # encoding: [0x03,0xb0,0xfa,0x3c]
+ tne $6, $17 # CHECK: tne $6, $17 # encoding: [0x02,0x26,0x0c,0x3c]
+ tne $7, $8, 15 # CHECK: tne $7, $8, 15 # encoding: [0x01,0x07,0xfc,0x3c]
+ cachee 1, 8($5) # CHECK: cachee 1, 8($5) # encoding: [0x60,0x25,0xa6,0x08]
+ wrpgpr $3, $4 # CHECK: wrpgpr $3, $4 # encoding: [0x00,0x64,0xf1,0x7c]
+ wsbh $3, $4 # CHECK: wsbh $3, $4 # encoding: [0x00,0x64,0x7b,0x3c]
+ jalr $9 # CHECK: jalr $9 # encoding: [0x45,0x2b]
+ jrc16 $9 # CHECK: jrc16 $9 # encoding: [0x45,0x23]
+ jrcaddiusp 20 # CHECK: jrcaddiusp 20 # encoding: [0x44,0xb3]
+ break16 8 # CHECK: break16 8 # encoding: [0x46,0x1b]
+ li16 $3, -1 # CHECK: li16 $3, -1 # encoding: [0xed,0xff]
+ move16 $3, $5 # CHECK: move16 $3, $5 # encoding: [0x0c,0x65]
+ sdbbp16 8 # CHECK: sdbbp16 8 # encoding: [0x46,0x3b]
+ subu16 $5, $16, $3 # CHECK: subu16 $5, $16, $3 # encoding: [0x04,0x3b]
+ xor16 $17, $5 # CHECK: xor16 $17, $5 # encoding: [0x44,0xd8]
+ lwm $16, $17, $ra, 8($sp) # CHECK: lwm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x22]
+ lwm16 $16, $17, $ra, 8($sp) # CHECK: lwm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x22]
+ sb16 $3, 4($16) # CHECK: sb16 $3, 4($16) # encoding: [0x89,0x84]
+ sh16 $4, 8($17) # CHECK: sh16 $4, 8($17) # encoding: [0xaa,0x14]
+ sw $4, 124($sp) # CHECK: sw $4, 124($sp) # encoding: [0xc8,0x9f]
+ sw16 $4, 4($17) # CHECK: sw16 $4, 4($17) # encoding: [0xea,0x11]
+ sw16 $0, 4($17) # CHECK: sw16 $zero, 4($17) # encoding: [0xe8,0x11]
+ swm $16, $17, $ra, 8($sp) # CHECK: swm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x2a]
+ swm16 $16, $17, $ra, 8($sp) # CHECK: swm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x2a]
+ recip.s $f2, $f4 # CHECK: recip.s $f2, $f4 # encoding: [0x54,0x44,0x12,0x3b]
+ recip.d $f2, $f4 # CHECK: recip.d $f2, $f4 # encoding: [0x54,0x44,0x52,0x3b]
+ rint.s $f2, $f4 # CHECK: rint.s $f2, $f4 # encoding: [0x54,0x82,0x00,0x20]
+ rint.d $f2, $f4 # CHECK: rint.d $f2, $f4 # encoding: [0x54,0x82,0x02,0x20]
+ round.l.s $f2, $f4 # CHECK: round.l.s $f2, $f4 # encoding: [0x54,0x44,0x33,0x3b]
+ round.l.d $f2, $f4 # CHECK: round.l.d $f2, $f4 # encoding: [0x54,0x44,0x73,0x3b]
+ round.w.s $f2, $f4 # CHECK: round.w.s $f2, $f4 # encoding: [0x54,0x44,0x3b,0x3b]
+ round.w.d $f2, $f4 # CHECK: round.w.d $f2, $f4 # encoding: [0x54,0x44,0x7b,0x3b]
+ sel.s $f1, $f1, $f2 # CHECK: sel.s $f1, $f1, $f2 # encoding: [0x54,0x41,0x08,0xb8]
+ sel.d $f0, $f2, $f4 # CHECK: sel.d $f0, $f2, $f4 # encoding: [0x54,0x82,0x02,0xb8]
+ seleqz.s $f1, $f2, $f3 # CHECK: seleqz.s $f1, $f2, $f3 # encoding: [0x54,0x62,0x08,0x38]
+ seleqz.d $f2, $f4, $f8 # CHECK: seleqz.d $f2, $f4, $f8 # encoding: [0x55,0x04,0x12,0x38]
+ selnez.s $f1, $f2, $f3 # CHECK: selnez.s $f1, $f2, $f3 # encoding: [0x54,0x62,0x08,0x78]
+ selnez.d $f2, $f4, $f8 # CHECK: selnez.d $f2, $f4, $f8 # encoding: [0x55,0x04,0x12,0x78]
+ class.s $f2, $f3 # CHECK: class.s $f2, $f3 # encoding: [0x54,0x62,0x00,0x60]
+ class.d $f2, $f4 # CHECK: class.d $f2, $f4 # encoding: [0x54,0x82,0x02,0x60]
+ deret # CHECK: deret # encoding: [0x00,0x00,0xe3,0x7c]
+ di # CHECK: di # encoding: [0x00,0x00,0x47,0x7c]
+ di $0 # CHECK: di # encoding: [0x00,0x00,0x47,0x7c]
+ di $15 # CHECK: di $15 # encoding: [0x00,0x0f,0x47,0x7c]
+
+1:
diff --git a/test/MC/Mips/mips-alu-instructions.s b/test/MC/Mips/mips-alu-instructions.s
index b25394b39a67..9c133abbae36 100644
--- a/test/MC/Mips/mips-alu-instructions.s
+++ b/test/MC/Mips/mips-alu-instructions.s
@@ -91,7 +91,7 @@
# CHECK: addiu $sp, $sp, -40 # encoding: [0xd8,0xff,0xbd,0x27]
# CHECK: neg $6, $7 # encoding: [0x22,0x30,0x07,0x00]
# CHECK: negu $6, $7 # encoding: [0x23,0x30,0x07,0x00]
-# CHECK: move $7, $8 # encoding: [0x21,0x38,0x00,0x01]
+# CHECK: move $7, $8 # encoding: [0x25,0x38,0x00,0x01]
# CHECK: .set push
# CHECK: .set mips32r2
# CHECK: rdhwr $5, $29
diff --git a/test/MC/Mips/mips-diagnostic-fixup.s b/test/MC/Mips/mips-diagnostic-fixup.s
index 864d7397271d..7bfe0d6263db 100644
--- a/test/MC/Mips/mips-diagnostic-fixup.s
+++ b/test/MC/Mips/mips-diagnostic-fixup.s
@@ -1,9 +1,12 @@
# RUN: not llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -arch=mips 2>&1 -filetype=obj | FileCheck %s
-#
-# CHECK: LLVM ERROR: out of range PC16 fixup
+
+# Two instructions, to check that this is not a fatal error
+# CHECK: error: out of range PC16 fixup
+# CHECK: error: out of range PC16 fixup
.text
b foo
+ b foo
.space 131072 - 8, 1 # -8 = size of b instr plus size of automatically inserted nop
nop # This instr makes the branch too long to fit into a 18-bit offset
foo:
diff --git a/test/MC/Mips/mips-dsp-instructions.s b/test/MC/Mips/mips-dsp-instructions.s
deleted file mode 100644
index 5a9e8ea9db39..000000000000
--- a/test/MC/Mips/mips-dsp-instructions.s
+++ /dev/null
@@ -1,97 +0,0 @@
-# RUN: llvm-mc -show-encoding -triple=mips-unknown-unknown -mattr=dspr2 %s | FileCheck %s
-#
-# CHECK: .text
-# CHECK: precrq.qb.ph $16, $17, $18 # encoding: [0x7e,0x32,0x83,0x11]
-# CHECK: precrq.ph.w $17, $18, $19 # encoding: [0x7e,0x53,0x8d,0x11]
-# CHECK: precrq_rs.ph.w $18, $19, $20 # encoding: [0x7e,0x74,0x95,0x51]
-# CHECK: precrqu_s.qb.ph $19, $20, $21 # encoding: [0x7e,0x95,0x9b,0xd1]
-# CHECK: preceq.w.phl $20, $21 # encoding: [0x7c,0x15,0xa3,0x12]
-# CHECK: preceq.w.phr $21, $22 # encoding: [0x7c,0x16,0xab,0x52]
-# CHECK: precequ.ph.qbl $22, $23 # encoding: [0x7c,0x17,0xb1,0x12]
-# CHECK: precequ.ph.qbr $23, $24 # encoding: [0x7c,0x18,0xb9,0x52]
-# CHECK: precequ.ph.qbla $24, $25 # encoding: [0x7c,0x19,0xc1,0x92]
-# CHECK: precequ.ph.qbra $25, $26 # encoding: [0x7c,0x1a,0xc9,0xd2]
-# CHECK: preceu.ph.qbl $26, $27 # encoding: [0x7c,0x1b,0xd7,0x12]
-# CHECK: preceu.ph.qbr $27, $gp # encoding: [0x7c,0x1c,0xdf,0x52]
-# CHECK: preceu.ph.qbla $gp, $sp # encoding: [0x7c,0x1d,0xe7,0x92]
-# CHECK: preceu.ph.qbra $sp, $fp # encoding: [0x7c,0x1e,0xef,0xd2]
-
-# CHECK: precr.qb.ph $23, $24, $25 # encoding: [0x7f,0x19,0xbb,0x51]
-# CHECK: precr_sra.ph.w $24, $25, 0 # encoding: [0x7f,0x38,0x07,0x91]
-# CHECK: precr_sra.ph.w $24, $25, 31 # encoding: [0x7f,0x38,0xff,0x91]
-# CHECK: precr_sra_r.ph.w $25, $26, 0 # encoding: [0x7f,0x59,0x07,0xd1]
-# CHECK: precr_sra_r.ph.w $25, $26, 31 # encoding: [0x7f,0x59,0xff,0xd1]
-
-# CHECK: lbux $10, $20($26) # encoding: [0x7f,0x54,0x51,0x8a]
-# CHECK: lhx $11, $21($27) # encoding: [0x7f,0x75,0x59,0x0a]
-# CHECK: lwx $12, $22($gp) # encoding: [0x7f,0x96,0x60,0x0a]
-
-# CHECK: mult $ac3, $2, $3 # encoding: [0x00,0x43,0x18,0x18]
-# CHECK: multu $ac2, $4, $5 # encoding: [0x00,0x85,0x10,0x19]
-# CHECK: madd $ac1, $6, $7 # encoding: [0x70,0xc7,0x08,0x00]
-# CHECK: maddu $ac0, $8, $9 # encoding: [0x71,0x09,0x00,0x01]
-# CHECK: msub $ac3, $10, $11 # encoding: [0x71,0x4b,0x18,0x04]
-# CHECK: msubu $ac2, $12, $13 # encoding: [0x71,0x8d,0x10,0x05]
-# CHECK: mfhi $14, $ac1 # encoding: [0x00,0x20,0x70,0x10]
-# CHECK: mflo $15, $ac0 # encoding: [0x00,0x00,0x78,0x12]
-# CHECK: mthi $16, $ac3 # encoding: [0x02,0x00,0x18,0x11]
-# CHECK: mtlo $17, $ac2 # encoding: [0x02,0x20,0x10,0x13]
-
-# CHECK: mult $2, $3 # encoding: [0x00,0x43,0x00,0x18]
-# CHECK: multu $4, $5 # encoding: [0x00,0x85,0x00,0x19]
-# CHECK: madd $6, $7 # encoding: [0x70,0xc7,0x00,0x00]
-# CHECK: maddu $8, $9 # encoding: [0x71,0x09,0x00,0x01]
-# CHECK: msub $10, $11 # encoding: [0x71,0x4b,0x00,0x04]
-# CHECK: msubu $12, $13 # encoding: [0x71,0x8d,0x00,0x05]
-# CHECK: mfhi $14 # encoding: [0x00,0x00,0x70,0x10]
-# CHECK: mflo $15 # encoding: [0x00,0x00,0x78,0x12]
-# CHECK: mthi $16 # encoding: [0x02,0x00,0x00,0x11]
-# CHECK: mtlo $17 # encoding: [0x02,0x20,0x00,0x13]
-
-
- precrq.qb.ph $16,$17,$18
- precrq.ph.w $17,$18,$19
- precrq_rs.ph.w $18,$19,$20
- precrqu_s.qb.ph $19,$20,$21
- preceq.w.phl $20,$21
- preceq.w.phr $21,$22
- precequ.ph.qbl $22,$23
- precequ.ph.qbr $23,$24
- precequ.ph.qbla $24,$25
- precequ.ph.qbra $25,$26
- preceu.ph.qbl $26,$27
- preceu.ph.qbr $27,$28
- preceu.ph.qbla $28,$29
- preceu.ph.qbra $29,$30
-
- precr.qb.ph $23,$24,$25
- precr_sra.ph.w $24,$25,0
- precr_sra.ph.w $24,$25,31
- precr_sra_r.ph.w $25,$26,0
- precr_sra_r.ph.w $25,$26,31
-
- lbux $10, $s4($26)
- lhx $11, $s5($27)
- lwx $12, $s6($28)
-
- mult $ac3, $2, $3
- multu $ac2, $4, $5
- madd $ac1, $6, $7
- maddu $ac0, $8, $9
- msub $ac3, $10, $11
- msubu $ac2, $12, $13
- mfhi $14, $ac1
- mflo $15, $ac0
- mthi $16, $ac3
- mtlo $17, $ac2
-
- mult $2, $3
- multu $4, $5
- madd $6, $7
- maddu $8, $9
- msub $10, $11
- msubu $12, $13
- mfhi $14
- mflo $15
- mthi $16
- mtlo $17
diff --git a/test/MC/Mips/mips-expansions-bad.s b/test/MC/Mips/mips-expansions-bad.s
index cd74f7d4aa88..3d6824687939 100644
--- a/test/MC/Mips/mips-expansions-bad.s
+++ b/test/MC/Mips/mips-expansions-bad.s
@@ -13,14 +13,23 @@
beq $2, 0x100010001, 1332
# 32-BIT: :[[@LINE-1]]:3: error: instruction requires a 32-bit immediate
.set mips32r6
+ ulh $5, 0
+ # 32-BIT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+ # 64-BIT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
ulhu $5, 0
# 32-BIT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
# 64-BIT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
.set mips32
+ ulh $5, 1
+ # 32-BIT-NOT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+ # 64-BIT-NOT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
ulhu $5, 1
# 32-BIT-NOT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
# 64-BIT-NOT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
.set mips64r6
+ ulh $5, 2
+ # 32-BIT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+ # 64-BIT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
ulhu $5, 2
# 32-BIT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
# 64-BIT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index 93c6b7cd75a8..625b77f2d884 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -1,17 +1,17 @@
# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | \
-# RUN: FileCheck %s --check-prefix=CHECK-LE
+# RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 | \
-# RUN: FileCheck %s --check-prefix=CHECK-BE
+# RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
# Check that the IAS expands macro instructions in the same way as GAS.
# Load address, done by MipsAsmParser::expandLoadAddressReg()
# and MipsAsmParser::expandLoadAddressImm():
la $8, 1f
-# CHECK-LE: lui $8, %hi($tmp0) # encoding: [A,A,0x08,0x3c]
-# CHECK-LE: # fixup A - offset: 0, value: ($tmp0)@ABS_HI, kind: fixup_Mips_HI16
-# CHECK-LE: addiu $8, $8, %lo($tmp0) # encoding: [A,A,0x08,0x25]
-# CHECK-LE: # fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind: fixup_Mips_LO16
+# CHECK-LE: lui $8, %hi(($tmp0)) # encoding: [A,A,0x08,0x3c]
+# CHECK-LE: # fixup A - offset: 0, value: %hi(($tmp0)), kind: fixup_Mips_HI16
+# CHECK-LE: addiu $8, $8, %lo(($tmp0)) # encoding: [A,A,0x08,0x25]
+# CHECK-LE: # fixup A - offset: 0, value: %lo(($tmp0)), kind: fixup_Mips_LO16
# LW/SW and LDC1/SDC1 of symbol address, done by MipsAsmParser::expandMemInst():
.set noat
@@ -131,7 +131,210 @@
# CHECK-LE: beq $2, $1, 1332 # encoding: [0x4d,0x01,0x41,0x10]
# CHECK-LE: nop # encoding: [0x00,0x00,0x00,0x00]
+# Test ULH with immediate operand.
+ulh_imm: # CHECK-LABEL: ulh_imm:
+ ulh $8, 0
+# CHECK-BE: lb $1, 0($zero) # encoding: [0x80,0x01,0x00,0x00]
+# CHECK-BE: lbu $8, 1($zero) # encoding: [0x90,0x08,0x00,0x01]
+# CHECK-BE: sll $1, $1, 8 # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lb $1, 1($zero) # encoding: [0x01,0x00,0x01,0x80]
+# CHECK-LE: lbu $8, 0($zero) # encoding: [0x00,0x00,0x08,0x90]
+# CHECK-LE: sll $1, $1, 8 # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, 2
+# CHECK-BE: lb $1, 2($zero) # encoding: [0x80,0x01,0x00,0x02]
+# CHECK-BE: lbu $8, 3($zero) # encoding: [0x90,0x08,0x00,0x03]
+# CHECK-BE: sll $1, $1, 8 # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lb $1, 3($zero) # encoding: [0x03,0x00,0x01,0x80]
+# CHECK-LE: lbu $8, 2($zero) # encoding: [0x02,0x00,0x08,0x90]
+# CHECK-LE: sll $1, $1, 8 # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, 0x8000
+# CHECK-BE: ori $1, $zero, 32768 # encoding: [0x34,0x01,0x80,0x00]
+# CHECK-BE: lb $8, 0($1) # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu $1, 1($1) # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll $8, $8, 8 # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: ori $1, $zero, 32768 # encoding: [0x00,0x80,0x01,0x34]
+# CHECK-LE: lb $8, 1($1) # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu $1, 0($1) # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll $8, $8, 8 # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, -0x8000
+# CHECK-BE: lb $1, -32768($zero) # encoding: [0x80,0x01,0x80,0x00]
+# CHECK-BE: lbu $8, -32767($zero) # encoding: [0x90,0x08,0x80,0x01]
+# CHECK-BE: sll $1, $1, 8 # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lb $1, -32767($zero) # encoding: [0x01,0x80,0x01,0x80]
+# CHECK-LE: lbu $8, -32768($zero) # encoding: [0x00,0x80,0x08,0x90]
+# CHECK-LE: sll $1, $1, 8 # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, 0x10000
+# CHECK-BE: lui $1, 1 # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: lb $8, 0($1) # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu $1, 1($1) # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll $8, $8, 8 # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: lb $8, 1($1) # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu $1, 0($1) # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll $8, $8, 8 # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, 0x18888
+# CHECK-BE: lui $1, 1 # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: ori $1, $1, 34952 # encoding: [0x34,0x21,0x88,0x88]
+# CHECK-BE: lb $8, 0($1) # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu $1, 1($1) # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll $8, $8, 8 # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: ori $1, $1, 34952 # encoding: [0x88,0x88,0x21,0x34]
+# CHECK-LE: lb $8, 1($1) # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu $1, 0($1) # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll $8, $8, 8 # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, -32769
+# CHECK-BE: lui $1, 65535 # encoding: [0x3c,0x01,0xff,0xff]
+# CHECK-BE: ori $1, $1, 32767 # encoding: [0x34,0x21,0x7f,0xff]
+# CHECK-BE: lb $8, 0($1) # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu $1, 1($1) # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll $8, $8, 8 # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui $1, 65535 # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK-LE: ori $1, $1, 32767 # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK-LE: lb $8, 1($1) # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu $1, 0($1) # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll $8, $8, 8 # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, 32767
+# CHECK-BE: addiu $1, $zero, 32767 # encoding: [0x24,0x01,0x7f,0xff]
+# CHECK-BE: lb $8, 0($1) # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu $1, 1($1) # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll $8, $8, 8 # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: addiu $1, $zero, 32767 # encoding: [0xff,0x7f,0x01,0x24]
+# CHECK-LE: lb $8, 1($1) # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu $1, 0($1) # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll $8, $8, 8 # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+# Test ULH with immediate offset and a source register operand.
+ulh_reg: # CHECK-LABEL: ulh_reg:
+ ulh $8, 0($9)
+# CHECK-BE: lb $1, 0($9) # encoding: [0x81,0x21,0x00,0x00]
+# CHECK-BE: lbu $8, 1($9) # encoding: [0x91,0x28,0x00,0x01]
+# CHECK-BE: sll $1, $1, 8 # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lb $1, 1($9) # encoding: [0x01,0x00,0x21,0x81]
+# CHECK-LE: lbu $8, 0($9) # encoding: [0x00,0x00,0x28,0x91]
+# CHECK-LE: sll $1, $1, 8 # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, 2($9)
+# CHECK-BE: lb $1, 2($9) # encoding: [0x81,0x21,0x00,0x02]
+# CHECK-BE: lbu $8, 3($9) # encoding: [0x91,0x28,0x00,0x03]
+# CHECK-BE: sll $1, $1, 8 # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lb $1, 3($9) # encoding: [0x03,0x00,0x21,0x81]
+# CHECK-LE: lbu $8, 2($9) # encoding: [0x02,0x00,0x28,0x91]
+# CHECK-LE: sll $1, $1, 8 # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, 0x8000($9)
+# CHECK-BE: ori $1, $zero, 32768 # encoding: [0x34,0x01,0x80,0x00]
+# CHECK-BE: addu $1, $1, $9 # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lb $8, 0($1) # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu $1, 1($1) # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll $8, $8, 8 # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: ori $1, $zero, 32768 # encoding: [0x00,0x80,0x01,0x34]
+# CHECK-LE: addu $1, $1, $9 # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lb $8, 1($1) # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu $1, 0($1) # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll $8, $8, 8 # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, -0x8000($9)
+# CHECK-BE: lb $1, -32768($9) # encoding: [0x81,0x21,0x80,0x00]
+# CHECK-BE: lbu $8, -32767($9) # encoding: [0x91,0x28,0x80,0x01]
+# CHECK-BE: sll $1, $1, 8 # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lb $1, -32767($9) # encoding: [0x01,0x80,0x21,0x81]
+# CHECK-LE: lbu $8, -32768($9) # encoding: [0x00,0x80,0x28,0x91]
+# CHECK-LE: sll $1, $1, 8 # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, 0x10000($9)
+# CHECK-BE: lui $1, 1 # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: addu $1, $1, $9 # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lb $8, 0($1) # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu $1, 1($1) # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll $8, $8, 8 # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: addu $1, $1, $9 # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lb $8, 1($1) # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu $1, 0($1) # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll $8, $8, 8 # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, 0x18888($9)
+# CHECK-BE: lui $1, 1 # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: ori $1, $1, 34952 # encoding: [0x34,0x21,0x88,0x88]
+# CHECK-BE: addu $1, $1, $9 # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lb $8, 0($1) # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu $1, 1($1) # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll $8, $8, 8 # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: ori $1, $1, 34952 # encoding: [0x88,0x88,0x21,0x34]
+# CHECK-LE: addu $1, $1, $9 # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lb $8, 1($1) # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu $1, 0($1) # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll $8, $8, 8 # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, -32769($9)
+# CHECK-BE: lui $1, 65535 # encoding: [0x3c,0x01,0xff,0xff]
+# CHECK-BE: ori $1, $1, 32767 # encoding: [0x34,0x21,0x7f,0xff]
+# CHECK-BE: addu $1, $1, $9 # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lb $8, 0($1) # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu $1, 1($1) # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll $8, $8, 8 # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui $1, 65535 # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK-LE: ori $1, $1, 32767 # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK-LE: addu $1, $1, $9 # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lb $8, 1($1) # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu $1, 0($1) # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll $8, $8, 8 # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
+ ulh $8, 32767($9)
+# CHECK-BE: addiu $1, $zero, 32767 # encoding: [0x24,0x01,0x7f,0xff]
+# CHECK-BE: addu $1, $1, $9 # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lb $8, 0($1) # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu $1, 1($1) # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll $8, $8, 8 # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or $8, $8, $1 # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: addiu $1, $zero, 32767 # encoding: [0xff,0x7f,0x01,0x24]
+# CHECK-LE: addu $1, $1, $9 # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lb $8, 1($1) # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu $1, 0($1) # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll $8, $8, 8 # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
+
# Test ULHU with immediate operand.
+ulhu_imm: # CHECK-LABEL: ulhu_imm:
ulhu $8, 0
# CHECK-BE: lbu $1, 0($zero) # encoding: [0x90,0x01,0x00,0x00]
# CHECK-BE: lbu $8, 1($zero) # encoding: [0x90,0x08,0x00,0x01]
diff --git a/test/MC/Mips/mips-pc16-fixup.s b/test/MC/Mips/mips-pc16-fixup.s
index 5443532d6125..ae4c915d97d5 100644
--- a/test/MC/Mips/mips-pc16-fixup.s
+++ b/test/MC/Mips/mips-pc16-fixup.s
@@ -1,6 +1,6 @@
# RUN: llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r2 -arch=mips 2>&1 -filetype=obj | FileCheck %s
#
-# CHECK-NOT: LLVM ERROR: out of range PC16 fixup
+# CHECK-NOT: error: out of range PC16 fixup
.text
b foo
diff --git a/test/MC/Mips/mips-pdr.s b/test/MC/Mips/mips-pdr.s
index 79e824bda64b..8177e3e23448 100644
--- a/test/MC/Mips/mips-pdr.s
+++ b/test/MC/Mips/mips-pdr.s
@@ -33,7 +33,7 @@
# We should also check if relocation information was correctly generated.
# OBJOUT: Relocations [
-# OBJOUT-NEXT: Section (6) .rel.pdr {
+# OBJOUT-NEXT: Section ({{.*}}) .rel.pdr {
# OBJOUT-NEXT: 0x0 R_MIPS_32 .text 0x0
# OBJOUT-NEXT: 0x20 R_MIPS_32 _global_foo 0x0
# OBJOUT-NEXT: }
diff --git a/test/MC/Mips/mips1/valid.s b/test/MC/Mips/mips1/valid.s
index e2feeac4dd56..80f0f8b047c6 100644
--- a/test/MC/Mips/mips1/valid.s
+++ b/test/MC/Mips/mips1/valid.s
@@ -64,8 +64,8 @@ a:
mflo $s1
mov.d $f20,$f14
mov.s $f2,$f27
- move $s8,$a0
- move $25,$a2
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
mtc1 $s8,$f9
mthi $s1
mtlo $sp
diff --git a/test/MC/Mips/mips2/valid.s b/test/MC/Mips/mips2/valid.s
index 93fdbafdfcd0..c57d386d9d05 100644
--- a/test/MC/Mips/mips2/valid.s
+++ b/test/MC/Mips/mips2/valid.s
@@ -84,8 +84,8 @@ a:
mflo $s1
mov.d $f20,$f14
mov.s $f2,$f27
- move $s8,$a0
- move $25,$a2
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
mtc1 $s8,$f9
mthi $s1
mtlo $sp
diff --git a/test/MC/Mips/mips3/valid.s b/test/MC/Mips/mips3/valid.s
index 1c878c97d158..cf51753712e6 100644
--- a/test/MC/Mips/mips3/valid.s
+++ b/test/MC/Mips/mips3/valid.s
@@ -142,10 +142,10 @@ a:
mflo $s1
mov.d $f20,$f14
mov.s $f2,$f27
- move $a0,$a3
- move $s5,$a0
- move $s8,$a0
- move $25,$a2
+ move $a0,$a3 # CHECK: move $4, $7 # encoding: [0x00,0xe0,0x20,0x25]
+ move $s5,$a0 # CHECK: move $21, $4 # encoding: [0x00,0x80,0xa8,0x25]
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
mtc1 $s8,$f9
mthi $s1
mtlo $sp
diff --git a/test/MC/Mips/mips32/valid.s b/test/MC/Mips/mips32/valid.s
index fbe1551f6c26..2fdbdfe65223 100644
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@@ -40,7 +40,7 @@ a:
bltzall $6,488 # CHECK: bltzall $6, 488 # encoding: [0x04,0xd2,0x00,0x7a]
bltzl $s1,-9964 # CHECK: bltzl $17, -9964 # encoding: [0x06,0x22,0xf6,0x45]
bnel $gp,$s4,5107 # CHECK: bnel $gp, $20, 5107 # encoding: [0x57,0x94,0x04,0xfc]
- cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
+ cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
c.ngl.d $f29,$f29
c.ngle.d $f0,$f16
c.sf.d $f30,$f0
@@ -96,8 +96,8 @@ a:
mflo $s1
mov.d $f20,$f14
mov.s $f2,$f27
- move $s8,$a0
- move $25,$a2
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
movf $gp,$8,$fcc7
movf.d $f6,$f11,$fcc5
movf.s $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips32r2/invalid-dsp.s b/test/MC/Mips/mips32r2/invalid-dsp.s
new file mode 100644
index 000000000000..66e5f63129ac
--- /dev/null
+++ b/test/MC/Mips/mips32r2/invalid-dsp.s
@@ -0,0 +1,97 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding \
+# RUN: -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ absq_s.ph $8,$a0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ absq_s.w $s3,$ra # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addq.ph $s1,$15,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addq_s.ph $s3,$s6,$s2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addq_s.w $a2,$8,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addsc $s8,$15,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addu.qb $s6,$v1,$v1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addu_s.qb $s4,$s8,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addwc $k0,$s6,$s7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bitrev $14,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmp.eq.ph $s7,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmp.le.ph $8,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmp.lt.ph $k0,$sp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpgu.eq.qb $14,$s6,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpgu.le.qb $9,$a3,$s4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpgu.lt.qb $sp,$at,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpu.eq.qb $v0,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpu.le.qb $s1,$a1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpu.lt.qb $at,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpaq_sa.l.w $ac0,$a2,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpau.h.qbl $ac1,$10,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpau.h.qbr $ac1,$s7,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpsq_s.w.ph $ac0,$gp,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpsq_sa.l.w $ac0,$a3,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpsu.h.qbl $ac2,$14,$10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpsu.h.qbr $ac2,$a1,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ extpdpv $s6,$ac0,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ extpv $13,$ac0,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ extrv.w $8,$ac3,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ extrv_r.w $8,$ac1,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ extrv_rs.w $gp,$ac1,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ extrv_s.h $s2,$ac1,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ insv $s2,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ lbux $9,$14($v0) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ lhx $sp,$k0($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ lwx $12,$12($s4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ madd $ac2,$sp,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ maddu $ac2,$a1,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ maq_s.w.phl $ac2,$25,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ maq_s.w.phr $ac0,$10,$25 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ maq_sa.w.phl $ac3,$a1,$v1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ maq_sa.w.phr $ac1,$at,$10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mfhi $9,$ac2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mflo $9,$ac2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ modsub $a3,$12,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mthi $v0,$ac1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mthlip $a3,$ac0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mtlo $v0,$ac1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ muleq_s.w.phl $11,$s4,$s4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ muleq_s.w.phr $s6,$a0,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ muleu_s.ph.qbl $a2,$14,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ muleu_s.ph.qbr $a1,$ra,$9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mulq_rs.ph $s2,$14,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mulsaq_s.w.ph $ac0,$ra,$s2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mult $ac1, $2, $3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ multu $ac1, $2, $3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ packrl.ph $ra,$24,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ pick.ph $ra,$a2,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ pick.qb $11,$a0,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precequ.ph.qbl $s7,$ra # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precequ.ph.qbla $a0,$9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precequ.ph.qbr $ra,$s3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precequ.ph.qbra $24,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ preceu.ph.qbl $sp,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ preceu.ph.qbla $s6,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ preceu.ph.qbr $gp,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ preceu.ph.qbra $k1,$s0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precrq.ph.w $14,$s8,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precrq.qb.ph $a2,$12,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precrq_rs.ph.w $a1,$k0,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precrqu_s.qb.ph $zero,$gp,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ raddu.w.qb $25,$s3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ repl.ph $at,-307 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ replv.ph $v1,$s7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ replv.qb $25,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shilo $ac1,26 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shilov $ac2,$10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shllv.ph $10,$s0,$s0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shllv.qb $gp,$v1,$zero # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shllv_s.ph $k1,$at,$13 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shllv_s.w $s1,$ra,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shrav.ph $25,$s2,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shrav_r.ph $s3,$11,$25 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shrav_r.w $s7,$s4,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shrlv.qb $a2,$s2,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subq.ph $ra,$9,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subq_s.ph $13,$s8,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subq_s.w $k1,$a2,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subu.qb $s6,$a2,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subu_s.qb $s1,$at,$ra # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r2/invalid-dspr2.s b/test/MC/Mips/mips32r2/invalid-dspr2.s
new file mode 100644
index 000000000000..5c31b465ca1f
--- /dev/null
+++ b/test/MC/Mips/mips32r2/invalid-dspr2.s
@@ -0,0 +1,134 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding \
+# RUN: -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ absq_s.ph $8,$a0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ absq_s.qb $15,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ absq_s.w $s3,$ra # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addq.ph $s1,$15,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addq_s.ph $s3,$s6,$s2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addq_s.w $a2,$8,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addqh.ph $s4,$14,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addqh_r.ph $sp,$25,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addsc $s8,$15,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addu.ph $a2,$14,$s3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addu.qb $s6,$v1,$v1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addu_s.ph $a3,$s3,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addu_s.qb $s4,$s8,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ adduh.qb $a1,$a1,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ adduh_r.qb $a0,$9,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ addwc $k0,$s6,$s7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bitrev $14,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmp.eq.ph $s7,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmp.le.ph $8,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmp.lt.ph $k0,$sp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpgdu.eq.qb $s3,$zero,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpgdu.le.qb $v1,$15,$s2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpgdu.lt.qb $s0,$gp,$sp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpgu.eq.qb $14,$s6,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpgu.le.qb $9,$a3,$s4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpgu.lt.qb $sp,$at,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpu.eq.qb $v0,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpu.le.qb $s1,$a1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cmpu.lt.qb $at,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpa.w.ph $ac1,$s7,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpaq_sa.l.w $ac0,$a2,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpaqx_s.w.ph $ac3,$a0,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpaqx_sa.w.ph $ac1,$zero,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpau.h.qbl $ac1,$10,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpau.h.qbr $ac1,$s7,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpax.w.ph $ac3,$a0,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dps.w.ph $ac1,$a3,$a1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpsq_s.w.ph $ac0,$gp,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpsq_sa.l.w $ac0,$a3,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpsqx_s.w.ph $ac3,$13,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpsqx_sa.w.ph $ac3,$sp,$s2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpsu.h.qbl $ac2,$14,$10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpsu.h.qbr $ac2,$a1,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ dpsx.w.ph $ac0,$s7,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ extpdpv $s6,$ac0,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ extpv $13,$ac0,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ extrv.w $8,$ac3,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ extrv_r.w $8,$ac1,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ extrv_rs.w $gp,$ac1,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ extrv_s.h $s2,$ac1,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ insv $s2,$at # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ lbux $9,$14($v0) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ lhx $sp,$k0($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ lwx $12,$12($s4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ madd $ac2,$sp,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ maddu $ac2,$a1,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ maq_s.w.phl $ac2,$25,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ maq_s.w.phr $ac0,$10,$25 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ maq_sa.w.phl $ac3,$a1,$v1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ maq_sa.w.phr $ac1,$at,$10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mfhi $9,$ac2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mflo $9,$ac2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ modsub $a3,$12,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mthi $v0,$ac1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mthlip $a3,$ac0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mtlo $v0,$ac1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mul.ph $10,$14,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mul.ph $s4,$24,$s0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mul_s.ph $10,$14,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ muleq_s.w.phl $11,$s4,$s4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ muleq_s.w.phr $s6,$a0,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ muleu_s.ph.qbl $a2,$14,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ muleu_s.ph.qbr $a1,$ra,$9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mulq_rs.ph $s2,$14,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mulq_rs.w $at,$s4,$25 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mulq_s.ph $s0,$k1,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mulq_s.w $9,$a3,$s0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mulsa.w.ph $ac1,$s4,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mulsaq_s.w.ph $ac0,$ra,$s2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ mult $ac1, $2, $3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ multu $ac1, $2, $3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ packrl.ph $ra,$24,$14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ pick.ph $ra,$a2,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ pick.qb $11,$a0,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precequ.ph.qbl $s7,$ra # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precequ.ph.qbla $a0,$9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precequ.ph.qbr $ra,$s3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precequ.ph.qbra $24,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ preceu.ph.qbl $sp,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ preceu.ph.qbla $s6,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ preceu.ph.qbr $gp,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ preceu.ph.qbra $k1,$s0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precr.qb.ph $v0,$12,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precrq.ph.w $14,$s8,$24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precrq.qb.ph $a2,$12,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precrq_rs.ph.w $a1,$k0,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ precrqu_s.qb.ph $zero,$gp,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ raddu.w.qb $25,$s3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ repl.ph $at,-307 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ replv.ph $v1,$s7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ replv.qb $25,$12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shilo $ac1,26 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shilov $ac2,$10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shllv.ph $10,$s0,$s0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shllv.qb $gp,$v1,$zero # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shllv_s.ph $k1,$at,$13 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shllv_s.w $s1,$ra,$k0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shrav.ph $25,$s2,$s1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shrav.qb $zero,$24,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shrav_r.ph $s3,$11,$25 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shrav_r.qb $a0,$sp,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shrav_r.w $s7,$s4,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shrlv.ph $14,$10,$9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ shrlv.qb $a2,$s2,$11 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subq.ph $ra,$9,$s8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subq_s.ph $13,$s8,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subq_s.w $k1,$a2,$a3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subqh.ph $10,$at,$9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subqh.w $v0,$a2,$zero # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subqh_r.ph $a0,$12,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subqh_r.w $10,$a2,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subu.ph $9,$s6,$s4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subu.qb $s6,$a2,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subu_s.ph $v1,$a1,$s3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subu_s.qb $s1,$at,$ra # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subuh.qb $zero,$gp,$gp # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ subuh_r.qb $s4,$s8,$s6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r2/invalid-msa.s b/test/MC/Mips/mips32r2/invalid-msa.s
new file mode 100644
index 000000000000..2ad99b147209
--- /dev/null
+++ b/test/MC/Mips/mips32r2/invalid-msa.s
@@ -0,0 +1,62 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding \
+# RUN: -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ and.v $w10,$w25,$w29 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bmnz.v $w15,$w2,$w28 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bmz.v $w13,$w11,$w21 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bsel.v $w28,$w7,$w0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ fclass.d $w14,$w27 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ fclass.w $w19,$w28 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ fexupl.d $w10,$w29 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ fexupl.w $w12,$w27 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ fexupr.d $w31,$w15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ fexupr.w $w29,$w12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ffint_s.d $w1,$w30 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ffint_s.w $w16,$w14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ffint_u.d $w23,$w18 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ffint_u.w $w19,$w12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ffql.d $w2,$w3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ffql.w $w9,$w0 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ffqr.d $w25,$w24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ffqr.w $w10,$w6 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ fill.b $w9,$v1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ fill.h $w9,$8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ fill.w $w31,$15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ flog2.d $w12,$w16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ flog2.w $w19,$w23 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ frcp.d $w12,$w4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ frcp.w $w30,$w8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ frint.d $w20,$w8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ frint.w $w11,$w29 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ frsqrt.d $w29,$w2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ frsqrt.w $w9,$w8 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ fsqrt.d $w3,$w1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ fsqrt.w $w5,$w15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ftint_s.d $w31,$w26 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ftint_s.w $w27,$w14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ftint_u.d $w5,$w31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ftint_u.w $w12,$w29 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ftrunc_s.d $w4,$w22 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ftrunc_s.w $w24,$w7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ftrunc_u.d $w20,$w25 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ ftrunc_u.w $w7,$w26 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ move.v $w8,$w17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ nloc.b $w12,$w30 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ nloc.d $w16,$w7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ nloc.h $w21,$w17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ nloc.w $w17,$w16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ nlzc.b $w12,$w7 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ nlzc.d $w14,$w14 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ nlzc.h $w24,$w24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ nlzc.w $w10,$w4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ nor.v $w20,$w20,$w15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ or.v $w13,$w23,$w12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ pcnt.b $w30,$w15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ pcnt.d $w5,$w16 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ pcnt.h $w20,$w24 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ pcnt.w $w22,$w20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ xor.v $w20,$w21,$w30 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r2/invalid.s b/test/MC/Mips/mips32r2/invalid.s
index ebccc43834e0..6001aeacf9b3 100644
--- a/test/MC/Mips/mips32r2/invalid.s
+++ b/test/MC/Mips/mips32r2/invalid.s
@@ -2,9 +2,29 @@
# invalid set of operands or operand's restrictions not met).
# RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r2 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
.text
.set noreorder
- jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
- jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+ cache -1, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ cache 32, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ # FIXME: Check '0 < pos + size <= 32' constraint on ext
+ ext $2, $3, -1, 1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ ext $2, $3, 32, 1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ ext $2, $3, 1, 0 # CHECK: :[[@LINE]]:24: error: expected immediate in range 1 .. 32
+ ext $2, $3, 1, 33 # CHECK: :[[@LINE]]:24: error: expected immediate in range 1 .. 32
+ # FIXME: Check size on ins
+ ins $2, $3, -1, 1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ ins $2, $3, 32, 1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ jalr.hb $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+ jalr.hb $31, $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+ pref -1, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+ pref 32, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+ sll $2, $3, -1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ sll $2, $3, 32 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ srl $2, $3, -1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ srl $2, $3, 32 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ sra $2, $3, -1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ sra $2, $3, 32 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ rotr $2, $3, -1 # CHECK: :[[@LINE]]:22: error: expected 5-bit unsigned immediate
+ rotr $2, $3, 32 # CHECK: :[[@LINE]]:22: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips32r2/valid-xfail.s b/test/MC/Mips/mips32r2/valid-xfail.s
index 13385d06ce81..5a61eb6cbfb7 100644
--- a/test/MC/Mips/mips32r2/valid-xfail.s
+++ b/test/MC/Mips/mips32r2/valid-xfail.s
@@ -8,31 +8,10 @@
.set noat
abs.ps $f22,$f8
- absq_s.ph $8,$a0
- absq_s.qb $15,$s1
- absq_s.w $s3,$ra
add.ps $f25,$f27,$f13
- addq.ph $s1,$15,$at
- addq_s.ph $s3,$s6,$s2
- addq_s.w $a2,$8,$at
- addqh.ph $s4,$14,$s1
addqh.w $s7,$s7,$k1
- addqh_r.ph $sp,$25,$s8
addqh_r.w $8,$v1,$zero
- addsc $s8,$15,$12
- addu.ph $a2,$14,$s3
- addu.qb $s6,$v1,$v1
- addu_s.ph $a3,$s3,$gp
- addu_s.qb $s4,$s8,$s1
- adduh.qb $a1,$a1,$at
- adduh_r.qb $a0,$9,$12
- addwc $k0,$s6,$s7
alnv.ps $f12,$f18,$f30,$12
- and.v $w10,$w25,$w29
- bitrev $14,$at
- bmnz.v $w15,$w2,$w28
- bmz.v $w13,$w11,$w21
- bsel.v $w28,$w7,$w0
c.eq.d $fcc1,$f15,$f15
c.eq.ps $fcc5,$f0,$f9
c.eq.s $fcc5,$f24,$f17
@@ -80,18 +59,6 @@
ceil.l.d $f1,$f3
ceil.l.s $f18,$f13
cfcmsa $s6,$19
- cmp.eq.ph $s7,$14
- cmp.le.ph $8,$14
- cmp.lt.ph $k0,$sp
- cmpgdu.eq.qb $s3,$zero,$k0
- cmpgdu.le.qb $v1,$15,$s2
- cmpgdu.lt.qb $s0,$gp,$sp
- cmpgu.eq.qb $14,$s6,$s8
- cmpgu.le.qb $9,$a3,$s4
- cmpgu.lt.qb $sp,$at,$8
- cmpu.eq.qb $v0,$24
- cmpu.le.qb $s1,$a1
- cmpu.lt.qb $at,$a3
ctcmsa $31,$s7
cvt.d.l $f4,$f16
cvt.ps.s $f3,$f18,$f19
@@ -99,162 +66,44 @@
cvt.s.pl $f30,$f1
cvt.s.pu $f14,$f25
dmt $k0
- dpa.w.ph $ac1,$s7,$k0
- dpaq_s.w.ph $ac2,$a0,$13
- dpaq_sa.l.w $ac0,$a2,$14
- dpaqx_s.w.ph $ac3,$a0,$24
- dpaqx_sa.w.ph $ac1,$zero,$s5
- dpau.h.qbl $ac1,$10,$24
- dpau.h.qbr $ac1,$s7,$s6
- dpax.w.ph $ac3,$a0,$k0
- dps.w.ph $ac1,$a3,$a1
- dpsq_s.w.ph $ac0,$gp,$k0
- dpsq_sa.l.w $ac0,$a3,$15
- dpsqx_s.w.ph $ac3,$13,$a3
- dpsqx_sa.w.ph $ac3,$sp,$s2
- dpsu.h.qbl $ac2,$14,$10
- dpsu.h.qbr $ac2,$a1,$s6
- dpsx.w.ph $ac0,$s7,$gp
dvpe $s6
emt $8
evpe $v0
- extpdpv $s6,$ac0,$s8
- extpv $13,$ac0,$14
- extrv.w $8,$ac3,$at
- extrv_r.w $8,$ac1,$s6
- extrv_rs.w $gp,$ac1,$s6
- extrv_s.h $s2,$ac1,$14
- fclass.d $w14,$w27
- fclass.w $w19,$w28
- fexupl.d $w10,$w29
- fexupl.w $w12,$w27
- fexupr.d $w31,$w15
- fexupr.w $w29,$w12
- ffint_s.d $w1,$w30
- ffint_s.w $w16,$w14
- ffint_u.d $w23,$w18
- ffint_u.w $w19,$w12
- ffql.d $w2,$w3
- ffql.w $w9,$w0
- ffqr.d $w25,$w24
- ffqr.w $w10,$w6
- fill.b $w9,$v1
- fill.h $w9,$8
- fill.w $w31,$15
- flog2.d $w12,$w16
- flog2.w $w19,$w23
floor.l.d $f26,$f7
floor.l.s $f12,$f5
fork $s2,$8,$a0
- frcp.d $w12,$w4
- frcp.w $w30,$w8
- frint.d $w20,$w8
- frint.w $w11,$w29
- frsqrt.d $w29,$w2
- frsqrt.w $w9,$w8
- fsqrt.d $w3,$w1
- fsqrt.w $w5,$w15
- ftint_s.d $w31,$w26
- ftint_s.w $w27,$w14
- ftint_u.d $w5,$w31
- ftint_u.w $w12,$w29
- ftrunc_s.d $w4,$w22
- ftrunc_s.w $w24,$w7
- ftrunc_u.d $w20,$w25
- ftrunc_u.w $w7,$w26
- insv $s2,$at
iret
lbe $14,122($9)
lbue $11,-108($10)
- lbux $9,$14($v0)
lhe $s6,219($v1)
lhue $gp,118($11)
- lhx $sp,$k0($15)
lle $gp,-237($ra)
lwe $ra,-145($14)
lwle $11,-42($11)
lwre $sp,-152($24)
- lwx $12,$12($s4)
madd.ps $f22,$f3,$f14,$f3
- maq_s.w.phl $ac2,$25,$11
- maq_s.w.phr $ac0,$10,$25
- maq_sa.w.phl $ac3,$a1,$v1
- maq_sa.w.phr $ac1,$at,$10
mfgc0 $s6,c0_datahi1
- mflo $9,$ac2
- modsub $a3,$12,$a3
mov.ps $f22,$f17
- move.v $w8,$w17
movf.ps $f10,$f28,$fcc6
movn.ps $f31,$f31,$s3
movt.ps $f20,$f25,$fcc2
movz.ps $f18,$f17,$ra
- msub $ac2,$sp,$14
msub.ps $f12,$f14,$f29,$f17
- msubu $ac2,$a1,$24
mtc0 $9,c0_datahi1
mtgc0 $s4,$21,7
- mthi $v0,$ac1
- mthlip $a3,$ac0
- mul.ph $s4,$24,$s0
mul.ps $f14,$f0,$f16
- mul_s.ph $10,$14,$15
- muleq_s.w.phl $11,$s4,$s4
- muleq_s.w.phr $s6,$a0,$s8
- muleu_s.ph.qbl $a2,$14,$8
- muleu_s.ph.qbr $a1,$ra,$9
- mulq_rs.ph $s2,$14,$15
- mulq_rs.w $at,$s4,$25
- mulq_s.ph $s0,$k1,$15
- mulq_s.w $9,$a3,$s0
- mulsa.w.ph $ac1,$s4,$s6
- mulsaq_s.w.ph $ac0,$ra,$s2
neg.ps $f19,$f13
- nloc.b $w12,$w30
- nloc.d $w16,$w7
- nloc.h $w21,$w17
- nloc.w $w17,$w16
- nlzc.b $w12,$w7
- nlzc.d $w14,$w14
- nlzc.h $w24,$w24
- nlzc.w $w10,$w4
nmadd.ps $f27,$f4,$f9,$f25
nmsub.ps $f6,$f12,$f14,$f17
- nor.v $w20,$w20,$w15
- or.v $w13,$w23,$w12
- packrl.ph $ra,$24,$14
- pcnt.b $w30,$w15
- pcnt.d $w5,$w16
- pcnt.h $w20,$w24
- pcnt.w $w22,$w20
- pick.ph $ra,$a2,$gp
- pick.qb $11,$a0,$gp
pll.ps $f25,$f9,$f30
plu.ps $f1,$f26,$f29
preceq.w.phl $s8,$gp
preceq.w.phr $s5,$15
- precequ.ph.qbl $s7,$ra
- precequ.ph.qbla $a0,$9
- precequ.ph.qbr $ra,$s3
- precequ.ph.qbra $24,$8
- preceu.ph.qbl $sp,$8
- preceu.ph.qbla $s6,$11
- preceu.ph.qbr $gp,$s1
- preceu.ph.qbra $k1,$s0
- precr.qb.ph $v0,$12,$s8
- precrq.ph.w $14,$s8,$24
- precrq.qb.ph $a2,$12,$12
- precrq_rs.ph.w $a1,$k0,$a3
- precrqu_s.qb.ph $zero,$gp,$s5
pul.ps $f9,$f30,$f26
puu.ps $f24,$f9,$f2
- raddu.w.qb $25,$s3
rdpgpr $s3,$9
recip.d $f19,$f6
recip.s $f3,$f30
- repl.ph $at,-307
- replv.ph $v1,$s7
- replv.qb $25,$12
rorv $13,$a3,$s5
round.l.d $f12,$f1
round.l.s $f25,$f5
@@ -263,33 +112,7 @@
sbe $s7,33($s1)
sce $sp,189($10)
she $24,105($v0)
- shilo $ac1,26
- shilov $ac2,$10
- shllv.ph $10,$s0,$s0
- shllv.qb $gp,$v1,$zero
- shllv_s.ph $k1,$at,$13
- shllv_s.w $s1,$ra,$k0
- shrav.ph $25,$s2,$s1
- shrav.qb $zero,$24,$11
- shrav_r.ph $s3,$11,$25
- shrav_r.qb $a0,$sp,$s5
- shrav_r.w $s7,$s4,$s6
- shrlv.ph $14,$10,$9
- shrlv.qb $a2,$s2,$11
sub.ps $f5,$f14,$f26
- subq.ph $ra,$9,$s8
- subq_s.ph $13,$s8,$s5
- subq_s.w $k1,$a2,$a3
- subqh.ph $10,$at,$9
- subqh.w $v0,$a2,$zero
- subqh_r.ph $a0,$12,$s6
- subqh_r.w $10,$a2,$gp
- subu.ph $9,$s6,$s4
- subu.qb $s6,$a2,$s6
- subu_s.ph $v1,$a1,$s3
- subu_s.qb $s1,$at,$ra
- subuh.qb $zero,$gp,$gp
- subuh_r.qb $s4,$s8,$s6
swe $24,94($k0)
swle $v1,-209($gp)
swre $k0,-202($s2)
@@ -304,5 +127,4 @@
trunc.l.d $f23,$f23
trunc.l.s $f28,$f31
wrpgpr $zero,$13
- xor.v $w20,$w21,$w30
yield $v1,$s0
diff --git a/test/MC/Mips/mips32r2/valid.s b/test/MC/Mips/mips32r2/valid.s
index 2e4366ab40f1..7ebc60d946a8 100644
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@@ -40,7 +40,7 @@ a:
bltzall $6,488 # CHECK: bltzall $6, 488 # encoding: [0x04,0xd2,0x00,0x7a]
bltzl $s1,-9964 # CHECK: bltzl $17, -9964 # encoding: [0x06,0x22,0xf6,0x45]
bnel $gp,$s4,5107 # CHECK: bnel $gp, $20, 5107 # encoding: [0x57,0x94,0x04,0xfc]
- cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
+ cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
c.ngl.d $f29,$f29
c.ngle.d $f0,$f16
c.sf.d $f30,$f0
@@ -111,8 +111,8 @@ a:
mflo $s1
mov.d $f20,$f14
mov.s $f2,$f27
- move $s8,$a0
- move $25,$a2
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
movf $gp,$8,$fcc7
movf.d $f6,$f11,$fcc5
movf.s $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips32r3/invalid.s b/test/MC/Mips/mips32r3/invalid.s
index f67f4c55ecb3..9051088fb38f 100644
--- a/test/MC/Mips/mips32r3/invalid.s
+++ b/test/MC/Mips/mips32r3/invalid.s
@@ -2,9 +2,13 @@
# invalid set of operands or operand's restrictions not met).
# RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r3 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
.text
.set noreorder
- jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
- jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+ cache -1, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ cache 32, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ jalr.hb $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+ jalr.hb $31, $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+ pref -1, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+ pref 32, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips32r3/valid-xfail.s b/test/MC/Mips/mips32r3/valid-xfail.s
index b0fc3a1d23f7..defa388c4df2 100644
--- a/test/MC/Mips/mips32r3/valid-xfail.s
+++ b/test/MC/Mips/mips32r3/valid-xfail.s
@@ -8,31 +8,10 @@
.set noat
abs.ps $f22,$f8
- absq_s.ph $8,$a0
- absq_s.qb $15,$s1
- absq_s.w $s3,$ra
add.ps $f25,$f27,$f13
- addq.ph $s1,$15,$at
- addq_s.ph $s3,$s6,$s2
- addq_s.w $a2,$8,$at
- addqh.ph $s4,$14,$s1
addqh.w $s7,$s7,$k1
- addqh_r.ph $sp,$25,$s8
addqh_r.w $8,$v1,$zero
- addsc $s8,$15,$12
- addu.ph $a2,$14,$s3
- addu.qb $s6,$v1,$v1
- addu_s.ph $a3,$s3,$gp
- addu_s.qb $s4,$s8,$s1
- adduh.qb $a1,$a1,$at
- adduh_r.qb $a0,$9,$12
- addwc $k0,$s6,$s7
alnv.ps $f12,$f18,$f30,$12
- and.v $w10,$w25,$w29
- bitrev $14,$at
- bmnz.v $w15,$w2,$w28
- bmz.v $w13,$w11,$w21
- bsel.v $w28,$w7,$w0
c.eq.d $fcc1,$f15,$f15
c.eq.ps $fcc5,$f0,$f9
c.eq.s $fcc5,$f24,$f17
@@ -80,18 +59,6 @@
ceil.l.d $f1,$f3
ceil.l.s $f18,$f13
cfcmsa $s6,$19
- cmp.eq.ph $s7,$14
- cmp.le.ph $8,$14
- cmp.lt.ph $k0,$sp
- cmpgdu.eq.qb $s3,$zero,$k0
- cmpgdu.le.qb $v1,$15,$s2
- cmpgdu.lt.qb $s0,$gp,$sp
- cmpgu.eq.qb $14,$s6,$s8
- cmpgu.le.qb $9,$a3,$s4
- cmpgu.lt.qb $sp,$at,$8
- cmpu.eq.qb $v0,$24
- cmpu.le.qb $s1,$a1
- cmpu.lt.qb $at,$a3
ctcmsa $31,$s7
cvt.d.l $f4,$f16
cvt.ps.s $f3,$f18,$f19
@@ -99,162 +66,44 @@
cvt.s.pl $f30,$f1
cvt.s.pu $f14,$f25
dmt $k0
- dpa.w.ph $ac1,$s7,$k0
- dpaq_s.w.ph $ac2,$a0,$13
- dpaq_sa.l.w $ac0,$a2,$14
- dpaqx_s.w.ph $ac3,$a0,$24
- dpaqx_sa.w.ph $ac1,$zero,$s5
- dpau.h.qbl $ac1,$10,$24
- dpau.h.qbr $ac1,$s7,$s6
- dpax.w.ph $ac3,$a0,$k0
- dps.w.ph $ac1,$a3,$a1
- dpsq_s.w.ph $ac0,$gp,$k0
- dpsq_sa.l.w $ac0,$a3,$15
- dpsqx_s.w.ph $ac3,$13,$a3
- dpsqx_sa.w.ph $ac3,$sp,$s2
- dpsu.h.qbl $ac2,$14,$10
- dpsu.h.qbr $ac2,$a1,$s6
- dpsx.w.ph $ac0,$s7,$gp
dvpe $s6
emt $8
evpe $v0
- extpdpv $s6,$ac0,$s8
- extpv $13,$ac0,$14
- extrv.w $8,$ac3,$at
- extrv_r.w $8,$ac1,$s6
- extrv_rs.w $gp,$ac1,$s6
- extrv_s.h $s2,$ac1,$14
- fclass.d $w14,$w27
- fclass.w $w19,$w28
- fexupl.d $w10,$w29
- fexupl.w $w12,$w27
- fexupr.d $w31,$w15
- fexupr.w $w29,$w12
- ffint_s.d $w1,$w30
- ffint_s.w $w16,$w14
- ffint_u.d $w23,$w18
- ffint_u.w $w19,$w12
- ffql.d $w2,$w3
- ffql.w $w9,$w0
- ffqr.d $w25,$w24
- ffqr.w $w10,$w6
- fill.b $w9,$v1
- fill.h $w9,$8
- fill.w $w31,$15
- flog2.d $w12,$w16
- flog2.w $w19,$w23
floor.l.d $f26,$f7
floor.l.s $f12,$f5
fork $s2,$8,$a0
- frcp.d $w12,$w4
- frcp.w $w30,$w8
- frint.d $w20,$w8
- frint.w $w11,$w29
- frsqrt.d $w29,$w2
- frsqrt.w $w9,$w8
- fsqrt.d $w3,$w1
- fsqrt.w $w5,$w15
- ftint_s.d $w31,$w26
- ftint_s.w $w27,$w14
- ftint_u.d $w5,$w31
- ftint_u.w $w12,$w29
- ftrunc_s.d $w4,$w22
- ftrunc_s.w $w24,$w7
- ftrunc_u.d $w20,$w25
- ftrunc_u.w $w7,$w26
- insv $s2,$at
iret
lbe $14,122($9)
lbue $11,-108($10)
- lbux $9,$14($v0)
lhe $s6,219($v1)
lhue $gp,118($11)
- lhx $sp,$k0($15)
lle $gp,-237($ra)
lwe $ra,-145($14)
lwle $11,-42($11)
lwre $sp,-152($24)
- lwx $12,$12($s4)
madd.ps $f22,$f3,$f14,$f3
- maq_s.w.phl $ac2,$25,$11
- maq_s.w.phr $ac0,$10,$25
- maq_sa.w.phl $ac3,$a1,$v1
- maq_sa.w.phr $ac1,$at,$10
mfgc0 $s6,c0_datahi1
- mflo $9,$ac2
- modsub $a3,$12,$a3
mov.ps $f22,$f17
- move.v $w8,$w17
movf.ps $f10,$f28,$fcc6
movn.ps $f31,$f31,$s3
movt.ps $f20,$f25,$fcc2
movz.ps $f18,$f17,$ra
- msub $ac2,$sp,$14
msub.ps $f12,$f14,$f29,$f17
- msubu $ac2,$a1,$24
mtc0 $9,c0_datahi1
mtgc0 $s4,$21,7
- mthi $v0,$ac1
- mthlip $a3,$ac0
- mul.ph $s4,$24,$s0
mul.ps $f14,$f0,$f16
- mul_s.ph $10,$14,$15
- muleq_s.w.phl $11,$s4,$s4
- muleq_s.w.phr $s6,$a0,$s8
- muleu_s.ph.qbl $a2,$14,$8
- muleu_s.ph.qbr $a1,$ra,$9
- mulq_rs.ph $s2,$14,$15
- mulq_rs.w $at,$s4,$25
- mulq_s.ph $s0,$k1,$15
- mulq_s.w $9,$a3,$s0
- mulsa.w.ph $ac1,$s4,$s6
- mulsaq_s.w.ph $ac0,$ra,$s2
neg.ps $f19,$f13
- nloc.b $w12,$w30
- nloc.d $w16,$w7
- nloc.h $w21,$w17
- nloc.w $w17,$w16
- nlzc.b $w12,$w7
- nlzc.d $w14,$w14
- nlzc.h $w24,$w24
- nlzc.w $w10,$w4
nmadd.ps $f27,$f4,$f9,$f25
nmsub.ps $f6,$f12,$f14,$f17
- nor.v $w20,$w20,$w15
- or.v $w13,$w23,$w12
- packrl.ph $ra,$24,$14
- pcnt.b $w30,$w15
- pcnt.d $w5,$w16
- pcnt.h $w20,$w24
- pcnt.w $w22,$w20
- pick.ph $ra,$a2,$gp
- pick.qb $11,$a0,$gp
pll.ps $f25,$f9,$f30
plu.ps $f1,$f26,$f29
preceq.w.phl $s8,$gp
preceq.w.phr $s5,$15
- precequ.ph.qbl $s7,$ra
- precequ.ph.qbla $a0,$9
- precequ.ph.qbr $ra,$s3
- precequ.ph.qbra $24,$8
- preceu.ph.qbl $sp,$8
- preceu.ph.qbla $s6,$11
- preceu.ph.qbr $gp,$s1
- preceu.ph.qbra $k1,$s0
- precr.qb.ph $v0,$12,$s8
- precrq.ph.w $14,$s8,$24
- precrq.qb.ph $a2,$12,$12
- precrq_rs.ph.w $a1,$k0,$a3
- precrqu_s.qb.ph $zero,$gp,$s5
pul.ps $f9,$f30,$f26
puu.ps $f24,$f9,$f2
- raddu.w.qb $25,$s3
rdpgpr $s3,$9
recip.d $f19,$f6
recip.s $f3,$f30
- repl.ph $at,-307
- replv.ph $v1,$s7
- replv.qb $25,$12
rorv $13,$a3,$s5
round.l.d $f12,$f1
round.l.s $f25,$f5
@@ -263,33 +112,7 @@
sbe $s7,33($s1)
sce $sp,189($10)
she $24,105($v0)
- shilo $ac1,26
- shilov $ac2,$10
- shllv.ph $10,$s0,$s0
- shllv.qb $gp,$v1,$zero
- shllv_s.ph $k1,$at,$13
- shllv_s.w $s1,$ra,$k0
- shrav.ph $25,$s2,$s1
- shrav.qb $zero,$24,$11
- shrav_r.ph $s3,$11,$25
- shrav_r.qb $a0,$sp,$s5
- shrav_r.w $s7,$s4,$s6
- shrlv.ph $14,$10,$9
- shrlv.qb $a2,$s2,$11
sub.ps $f5,$f14,$f26
- subq.ph $ra,$9,$s8
- subq_s.ph $13,$s8,$s5
- subq_s.w $k1,$a2,$a3
- subqh.ph $10,$at,$9
- subqh.w $v0,$a2,$zero
- subqh_r.ph $a0,$12,$s6
- subqh_r.w $10,$a2,$gp
- subu.ph $9,$s6,$s4
- subu.qb $s6,$a2,$s6
- subu_s.ph $v1,$a1,$s3
- subu_s.qb $s1,$at,$ra
- subuh.qb $zero,$gp,$gp
- subuh_r.qb $s4,$s8,$s6
swe $24,94($k0)
swle $v1,-209($gp)
swre $k0,-202($s2)
@@ -304,5 +127,4 @@
trunc.l.d $f23,$f23
trunc.l.s $f28,$f31
wrpgpr $zero,$13
- xor.v $w20,$w21,$w30
yield $v1,$s0
diff --git a/test/MC/Mips/mips32r3/valid.s b/test/MC/Mips/mips32r3/valid.s
index f6ef1d356c1f..3431e1cbc8d5 100644
--- a/test/MC/Mips/mips32r3/valid.s
+++ b/test/MC/Mips/mips32r3/valid.s
@@ -40,7 +40,7 @@ a:
bltzall $6,488 # CHECK: bltzall $6, 488 # encoding: [0x04,0xd2,0x00,0x7a]
bltzl $s1,-9964 # CHECK: bltzl $17, -9964 # encoding: [0x06,0x22,0xf6,0x45]
bnel $gp,$s4,5107 # CHECK: bnel $gp, $20, 5107 # encoding: [0x57,0x94,0x04,0xfc]
- cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
+ cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
c.ngl.d $f29,$f29
c.ngle.d $f0,$f16
c.sf.d $f30,$f0
@@ -111,8 +111,8 @@ a:
mflo $s1
mov.d $f20,$f14
mov.s $f2,$f27
- move $s8,$a0
- move $25,$a2
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
movf $gp,$8,$fcc7
movf.d $f6,$f11,$fcc5
movf.s $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips32r5/invalid-mips32.s b/test/MC/Mips/mips32r5/invalid-mips32.s
new file mode 100644
index 000000000000..fc1a516f811a
--- /dev/null
+++ b/test/MC/Mips/mips32r5/invalid-mips32.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32 \
+# RUN: 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ eretnc # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r5/invalid-mips32r2.s b/test/MC/Mips/mips32r5/invalid-mips32r2.s
new file mode 100644
index 000000000000..2e7a29720899
--- /dev/null
+++ b/test/MC/Mips/mips32r5/invalid-mips32r2.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 \
+# RUN: 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ eretnc # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r5/invalid-mips32r3.s b/test/MC/Mips/mips32r5/invalid-mips32r3.s
new file mode 100644
index 000000000000..3ee188f148d3
--- /dev/null
+++ b/test/MC/Mips/mips32r5/invalid-mips32r3.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r3 \
+# RUN: 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ eretnc # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r5/invalid.s b/test/MC/Mips/mips32r5/invalid.s
index fec30e1426cd..0d83005d5ccc 100644
--- a/test/MC/Mips/mips32r5/invalid.s
+++ b/test/MC/Mips/mips32r5/invalid.s
@@ -2,9 +2,13 @@
# invalid set of operands or operand's restrictions not met).
# RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r5 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
.text
.set noreorder
- jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
- jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+ cache -1, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ cache 32, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ jalr.hb $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+ jalr.hb $31, $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+ pref -1, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+ pref 32, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips32r5/valid-xfail.s b/test/MC/Mips/mips32r5/valid-xfail.s
index a821dddb85ca..c1bf7a4b3a29 100644
--- a/test/MC/Mips/mips32r5/valid-xfail.s
+++ b/test/MC/Mips/mips32r5/valid-xfail.s
@@ -8,31 +8,10 @@
.set noat
abs.ps $f22,$f8
- absq_s.ph $8,$a0
- absq_s.qb $15,$s1
- absq_s.w $s3,$ra
add.ps $f25,$f27,$f13
- addq.ph $s1,$15,$at
- addq_s.ph $s3,$s6,$s2
- addq_s.w $a2,$8,$at
- addqh.ph $s4,$14,$s1
addqh.w $s7,$s7,$k1
- addqh_r.ph $sp,$25,$s8
addqh_r.w $8,$v1,$zero
- addsc $s8,$15,$12
- addu.ph $a2,$14,$s3
- addu.qb $s6,$v1,$v1
- addu_s.ph $a3,$s3,$gp
- addu_s.qb $s4,$s8,$s1
- adduh.qb $a1,$a1,$at
- adduh_r.qb $a0,$9,$12
- addwc $k0,$s6,$s7
alnv.ps $f12,$f18,$f30,$12
- and.v $w10,$w25,$w29
- bitrev $14,$at
- bmnz.v $w15,$w2,$w28
- bmz.v $w13,$w11,$w21
- bsel.v $w28,$w7,$w0
c.eq.d $fcc1,$f15,$f15
c.eq.ps $fcc5,$f0,$f9
c.eq.s $fcc5,$f24,$f17
@@ -80,18 +59,6 @@
ceil.l.d $f1,$f3
ceil.l.s $f18,$f13
cfcmsa $s6,$19
- cmp.eq.ph $s7,$14
- cmp.le.ph $8,$14
- cmp.lt.ph $k0,$sp
- cmpgdu.eq.qb $s3,$zero,$k0
- cmpgdu.le.qb $v1,$15,$s2
- cmpgdu.lt.qb $s0,$gp,$sp
- cmpgu.eq.qb $14,$s6,$s8
- cmpgu.le.qb $9,$a3,$s4
- cmpgu.lt.qb $sp,$at,$8
- cmpu.eq.qb $v0,$24
- cmpu.le.qb $s1,$a1
- cmpu.lt.qb $at,$a3
ctcmsa $31,$s7
cvt.d.l $f4,$f16
cvt.ps.s $f3,$f18,$f19
@@ -99,162 +66,44 @@
cvt.s.pl $f30,$f1
cvt.s.pu $f14,$f25
dmt $k0
- dpa.w.ph $ac1,$s7,$k0
- dpaq_s.w.ph $ac2,$a0,$13
- dpaq_sa.l.w $ac0,$a2,$14
- dpaqx_s.w.ph $ac3,$a0,$24
- dpaqx_sa.w.ph $ac1,$zero,$s5
- dpau.h.qbl $ac1,$10,$24
- dpau.h.qbr $ac1,$s7,$s6
- dpax.w.ph $ac3,$a0,$k0
- dps.w.ph $ac1,$a3,$a1
- dpsq_s.w.ph $ac0,$gp,$k0
- dpsq_sa.l.w $ac0,$a3,$15
- dpsqx_s.w.ph $ac3,$13,$a3
- dpsqx_sa.w.ph $ac3,$sp,$s2
- dpsu.h.qbl $ac2,$14,$10
- dpsu.h.qbr $ac2,$a1,$s6
- dpsx.w.ph $ac0,$s7,$gp
dvpe $s6
emt $8
evpe $v0
- extpdpv $s6,$ac0,$s8
- extpv $13,$ac0,$14
- extrv.w $8,$ac3,$at
- extrv_r.w $8,$ac1,$s6
- extrv_rs.w $gp,$ac1,$s6
- extrv_s.h $s2,$ac1,$14
- fclass.d $w14,$w27
- fclass.w $w19,$w28
- fexupl.d $w10,$w29
- fexupl.w $w12,$w27
- fexupr.d $w31,$w15
- fexupr.w $w29,$w12
- ffint_s.d $w1,$w30
- ffint_s.w $w16,$w14
- ffint_u.d $w23,$w18
- ffint_u.w $w19,$w12
- ffql.d $w2,$w3
- ffql.w $w9,$w0
- ffqr.d $w25,$w24
- ffqr.w $w10,$w6
- fill.b $w9,$v1
- fill.h $w9,$8
- fill.w $w31,$15
- flog2.d $w12,$w16
- flog2.w $w19,$w23
floor.l.d $f26,$f7
floor.l.s $f12,$f5
fork $s2,$8,$a0
- frcp.d $w12,$w4
- frcp.w $w30,$w8
- frint.d $w20,$w8
- frint.w $w11,$w29
- frsqrt.d $w29,$w2
- frsqrt.w $w9,$w8
- fsqrt.d $w3,$w1
- fsqrt.w $w5,$w15
- ftint_s.d $w31,$w26
- ftint_s.w $w27,$w14
- ftint_u.d $w5,$w31
- ftint_u.w $w12,$w29
- ftrunc_s.d $w4,$w22
- ftrunc_s.w $w24,$w7
- ftrunc_u.d $w20,$w25
- ftrunc_u.w $w7,$w26
- insv $s2,$at
iret
lbe $14,122($9)
lbue $11,-108($10)
- lbux $9,$14($v0)
lhe $s6,219($v1)
lhue $gp,118($11)
- lhx $sp,$k0($15)
lle $gp,-237($ra)
lwe $ra,-145($14)
lwle $11,-42($11)
lwre $sp,-152($24)
- lwx $12,$12($s4)
madd.ps $f22,$f3,$f14,$f3
- maq_s.w.phl $ac2,$25,$11
- maq_s.w.phr $ac0,$10,$25
- maq_sa.w.phl $ac3,$a1,$v1
- maq_sa.w.phr $ac1,$at,$10
mfgc0 $s6,c0_datahi1
- mflo $9,$ac2
- modsub $a3,$12,$a3
mov.ps $f22,$f17
- move.v $w8,$w17
movf.ps $f10,$f28,$fcc6
movn.ps $f31,$f31,$s3
movt.ps $f20,$f25,$fcc2
movz.ps $f18,$f17,$ra
- msub $ac2,$sp,$14
msub.ps $f12,$f14,$f29,$f17
- msubu $ac2,$a1,$24
mtc0 $9,c0_datahi1
mtgc0 $s4,$21,7
- mthi $v0,$ac1
- mthlip $a3,$ac0
- mul.ph $s4,$24,$s0
mul.ps $f14,$f0,$f16
- mul_s.ph $10,$14,$15
- muleq_s.w.phl $11,$s4,$s4
- muleq_s.w.phr $s6,$a0,$s8
- muleu_s.ph.qbl $a2,$14,$8
- muleu_s.ph.qbr $a1,$ra,$9
- mulq_rs.ph $s2,$14,$15
- mulq_rs.w $at,$s4,$25
- mulq_s.ph $s0,$k1,$15
- mulq_s.w $9,$a3,$s0
- mulsa.w.ph $ac1,$s4,$s6
- mulsaq_s.w.ph $ac0,$ra,$s2
neg.ps $f19,$f13
- nloc.b $w12,$w30
- nloc.d $w16,$w7
- nloc.h $w21,$w17
- nloc.w $w17,$w16
- nlzc.b $w12,$w7
- nlzc.d $w14,$w14
- nlzc.h $w24,$w24
- nlzc.w $w10,$w4
nmadd.ps $f27,$f4,$f9,$f25
nmsub.ps $f6,$f12,$f14,$f17
- nor.v $w20,$w20,$w15
- or.v $w13,$w23,$w12
- packrl.ph $ra,$24,$14
- pcnt.b $w30,$w15
- pcnt.d $w5,$w16
- pcnt.h $w20,$w24
- pcnt.w $w22,$w20
- pick.ph $ra,$a2,$gp
- pick.qb $11,$a0,$gp
pll.ps $f25,$f9,$f30
plu.ps $f1,$f26,$f29
preceq.w.phl $s8,$gp
preceq.w.phr $s5,$15
- precequ.ph.qbl $s7,$ra
- precequ.ph.qbla $a0,$9
- precequ.ph.qbr $ra,$s3
- precequ.ph.qbra $24,$8
- preceu.ph.qbl $sp,$8
- preceu.ph.qbla $s6,$11
- preceu.ph.qbr $gp,$s1
- preceu.ph.qbra $k1,$s0
- precr.qb.ph $v0,$12,$s8
- precrq.ph.w $14,$s8,$24
- precrq.qb.ph $a2,$12,$12
- precrq_rs.ph.w $a1,$k0,$a3
- precrqu_s.qb.ph $zero,$gp,$s5
pul.ps $f9,$f30,$f26
puu.ps $f24,$f9,$f2
- raddu.w.qb $25,$s3
rdpgpr $s3,$9
recip.d $f19,$f6
recip.s $f3,$f30
- repl.ph $at,-307
- replv.ph $v1,$s7
- replv.qb $25,$12
rorv $13,$a3,$s5
round.l.d $f12,$f1
round.l.s $f25,$f5
@@ -263,33 +112,7 @@
sbe $s7,33($s1)
sce $sp,189($10)
she $24,105($v0)
- shilo $ac1,26
- shilov $ac2,$10
- shllv.ph $10,$s0,$s0
- shllv.qb $gp,$v1,$zero
- shllv_s.ph $k1,$at,$13
- shllv_s.w $s1,$ra,$k0
- shrav.ph $25,$s2,$s1
- shrav.qb $zero,$24,$11
- shrav_r.ph $s3,$11,$25
- shrav_r.qb $a0,$sp,$s5
- shrav_r.w $s7,$s4,$s6
- shrlv.ph $14,$10,$9
- shrlv.qb $a2,$s2,$11
sub.ps $f5,$f14,$f26
- subq.ph $ra,$9,$s8
- subq_s.ph $13,$s8,$s5
- subq_s.w $k1,$a2,$a3
- subqh.ph $10,$at,$9
- subqh.w $v0,$a2,$zero
- subqh_r.ph $a0,$12,$s6
- subqh_r.w $10,$a2,$gp
- subu.ph $9,$s6,$s4
- subu.qb $s6,$a2,$s6
- subu_s.ph $v1,$a1,$s3
- subu_s.qb $s1,$at,$ra
- subuh.qb $zero,$gp,$gp
- subuh_r.qb $s4,$s8,$s6
swe $24,94($k0)
swle $v1,-209($gp)
swre $k0,-202($s2)
@@ -304,5 +127,4 @@
trunc.l.d $f23,$f23
trunc.l.s $f28,$f31
wrpgpr $zero,$13
- xor.v $w20,$w21,$w30
yield $v1,$s0
diff --git a/test/MC/Mips/mips32r5/valid.s b/test/MC/Mips/mips32r5/valid.s
index f12d75113203..0c477f4fa2ae 100644
--- a/test/MC/Mips/mips32r5/valid.s
+++ b/test/MC/Mips/mips32r5/valid.s
@@ -40,7 +40,7 @@ a:
bltzall $6,488 # CHECK: bltzall $6, 488 # encoding: [0x04,0xd2,0x00,0x7a]
bltzl $s1,-9964 # CHECK: bltzl $17, -9964 # encoding: [0x06,0x22,0xf6,0x45]
bnel $gp,$s4,5107 # CHECK: bnel $gp, $20, 5107 # encoding: [0x57,0x94,0x04,0xfc]
- cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
+ cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
c.ngl.d $f29,$f29
c.ngle.d $f0,$f16
c.sf.d $f30,$f0
@@ -70,6 +70,7 @@ a:
ei $14 # CHECK: ei $14 # encoding: [0x41,0x6e,0x60,0x20]
ei # CHECK: ei # encoding: [0x41,0x60,0x60,0x20]
eret
+ eretnc # CHECK: eretnc # encoding: [0x42,0x00,0x00,0x58]
floor.w.d $f14,$f11
floor.w.s $f8,$f9
j 1f # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
@@ -111,8 +112,8 @@ a:
mflo $s1
mov.d $f20,$f14
mov.s $f2,$f27
- move $s8,$a0
- move $25,$a2
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
movf $gp,$8,$fcc7
movf.d $f6,$f11,$fcc5
movf.s $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
index cc7d403eaf8b..6d569d12b396 100644
--- a/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
+++ b/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
@@ -11,7 +11,8 @@
lwr $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
swl $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
swr $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- lwle $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
- lwre $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
- swle $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
- swre $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+ lwle $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwre $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swle $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swre $24, 5($3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swre $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s
index 06bf58c1e8fa..1cec777c27a6 100644
--- a/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s
+++ b/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s
@@ -8,4 +8,3 @@
.set noat
bc2tl 4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
bc2fl 4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
- prefx 0,$2($31) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips4.s b/test/MC/Mips/mips32r6/invalid-mips4.s
index 9d8f02fb6d67..6a0370870112 100644
--- a/test/MC/Mips/mips32r6/invalid-mips4.s
+++ b/test/MC/Mips/mips32r6/invalid-mips4.s
@@ -11,3 +11,4 @@
lwxc1 $f12,$s1($s8) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
sdxc1 $f11,$10($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
swxc1 $f19,$12($k0) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ prefx 0,$2($31) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r6/invalid.s b/test/MC/Mips/mips32r6/invalid.s
index 0ce75e6143c2..1656ac1da350 100644
--- a/test/MC/Mips/mips32r6/invalid.s
+++ b/test/MC/Mips/mips32r6/invalid.s
@@ -2,17 +2,40 @@
# the assembler (e.g. invalid set of operands or operand's restrictions not met).
# RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r6 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
.text
+local_label:
.set noreorder
.set noat
- jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
- jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
- ldc2 $8,-21181($at) # ASM: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
- sdc2 $20,23157($s2) # ASM: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
- swc2 $25,24880($s0) # ASM: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
- break 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- break 1024, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- break 7, 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- break 1024, 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ align $4, $2, $3, -1 # CHECK: :[[@LINE]]:29: error: expected 2-bit unsigned immediate
+ align $4, $2, $3, 4 # CHECK: :[[@LINE]]:29: error: expected 2-bit unsigned immediate
+ jalr.hb $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+ jalr.hb $31, $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+ ldc2 $8,-21181($at) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ sdc2 $20,23157($s2) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ swc2 $25,24880($s0) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ break -1 # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+ break 1024 # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+ break -1, 5 # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+ break 1024, 5 # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+ break 7, -1 # CHECK: :[[@LINE]]:18: error: expected 10-bit unsigned immediate
+ break 7, 1024 # CHECK: :[[@LINE]]:18: error: expected 10-bit unsigned immediate
+ break 1024, 1024 # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+ // FIXME: Following tests are temporarely disabled, until "PredicateControl not in hierarchy" problem is resolved
+ bltl $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bltul $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ blel $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bleul $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bgel $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bgeul $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bgtl $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bgtul $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cache -1, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ cache 32, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ jalr.hb $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+ jalr.hb $31, $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+ lsa $2, $3, $4, 0 # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+ lsa $2, $3, $4, 5 # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+ pref -1, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+ pref 32, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s
index 52752c5a6997..226acd96a5a6 100644
--- a/test/MC/Mips/mips32r6/valid.s
+++ b/test/MC/Mips/mips32r6/valid.s
@@ -64,7 +64,7 @@ a:
bovc $0, $0, 4 # CHECK: bovc $zero, $zero, 4 # encoding: [0x20,0x00,0x00,0x01]
bovc $2, $0, 4 # CHECK: bovc $2, $zero, 4 # encoding: [0x20,0x40,0x00,0x01]
bovc $4, $2, 4 # CHECK: bovc $4, $2, 4 # encoding: [0x20,0x82,0x00,0x01]
- cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0x7c,0xa1,0x04,0x25]
+ cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0x7c,0xa1,0x04,0x25]
cmp.af.s $f2,$f3,$f4 # CHECK: cmp.af.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x80]
cmp.af.d $f2,$f3,$f4 # CHECK: cmp.af.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x80]
cmp.un.s $f2,$f3,$f4 # CHECK: cmp.un.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x81]
@@ -103,14 +103,20 @@ a:
divu $2,$3,$4 # CHECK: divu $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9b]
ei $14 # CHECK: ei $14 # encoding: [0x41,0x6e,0x60,0x20]
ei # CHECK: ei # encoding: [0x41,0x60,0x60,0x20]
+ eret
+ eretnc # CHECK: eretnc # encoding: [0x42,0x00,0x00,0x58]
jialc $5, 256 # CHECK: jialc $5, 256 # encoding: [0xf8,0x05,0x01,0x00]
jic $5, 256 # CHECK: jic $5, 256 # encoding: [0xd8,0x05,0x01,0x00]
- lsa $2, $3, $4, 3 # CHECK: lsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5]
+ lsa $2, $3, $4, 3 # CHECK: lsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0x85]
lwpc $2,268 # CHECK: lwpc $2, 268 # encoding: [0xec,0x48,0x00,0x43]
lwupc $2,268 # CHECK: lwupc $2, 268 # encoding: [0xec,0x50,0x00,0x43]
mfc0 $8,$15,1 # CHECK: mfc0 $8, $15, 1 # encoding: [0x40,0x08,0x78,0x01]
mod $2,$3,$4 # CHECK: mod $2, $3, $4 # encoding: [0x00,0x64,0x10,0xda]
modu $2,$3,$4 # CHECK: modu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdb]
+ move $a0,$a3 # CHECK: move $4, $7 # encoding: [0x00,0xe0,0x20,0x25]
+ move $s5,$a0 # CHECK: move $21, $4 # encoding: [0x00,0x80,0xa8,0x25]
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
mtc0 $9,$15,1 # CHECK: mtc0 $9, $15, 1 # encoding: [0x40,0x89,0x78,0x01]
mul $2,$3,$4 # CHECK: mul $2, $3, $4 # encoding: [0x00,0x64,0x10,0x98]
muh $2,$3,$4 # CHECK: muh $2, $3, $4 # encoding: [0x00,0x64,0x10,0xd8]
diff --git a/test/MC/Mips/mips4/valid.s b/test/MC/Mips/mips4/valid.s
index b89026dd714a..9bf98d1c29ff 100644
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@@ -150,10 +150,10 @@ a:
mflo $s1
mov.d $f20,$f14
mov.s $f2,$f27
- move $a0,$a3
- move $s5,$a0
- move $s8,$a0
- move $25,$a2
+ move $a0,$a3 # CHECK: move $4, $7 # encoding: [0x00,0xe0,0x20,0x25]
+ move $s5,$a0 # CHECK: move $21, $4 # encoding: [0x00,0x80,0xa8,0x25]
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
movf $gp,$8,$fcc7
movf.d $f6,$f11,$fcc5
movf.s $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips5/valid.s b/test/MC/Mips/mips5/valid.s
index b444274d78ec..cb30de38c295 100644
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@@ -151,10 +151,10 @@ a:
mflo $s1
mov.d $f20,$f14
mov.s $f2,$f27
- move $a0,$a3
- move $s5,$a0
- move $s8,$a0
- move $25,$a2
+ move $a0,$a3 # CHECK: move $4, $7 # encoding: [0x00,0xe0,0x20,0x25]
+ move $s5,$a0 # CHECK: move $21, $4 # encoding: [0x00,0x80,0xa8,0x25]
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
movf $gp,$8,$fcc7
movf.d $f6,$f11,$fcc5
movf.s $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips64-alu-instructions.s b/test/MC/Mips/mips64-alu-instructions.s
index 19ed1ffad1ca..409d59e5e88b 100644
--- a/test/MC/Mips/mips64-alu-instructions.s
+++ b/test/MC/Mips/mips64-alu-instructions.s
@@ -74,7 +74,7 @@
# CHECK: daddiu $9, $9, -15001 # encoding: [0x67,0xc5,0x29,0x65]
# CHECK: daddu $9, $6, $7 # encoding: [0x2d,0x48,0xc7,0x00]
# CHECK: drotr $9, $6, 20 # encoding: [0x3a,0x4d,0x26,0x00]
-# CHECK: drotr32 $9, $6, 52 # encoding: [0x3e,0x4d,0x26,0x00]
+# CHECK: drotr32 $9, $6, 20 # encoding: [0x3e,0x4d,0x26,0x00]
# CHECK: madd $6, $7 # encoding: [0x00,0x00,0xc7,0x70]
# CHECK: maddu $6, $7 # encoding: [0x01,0x00,0xc7,0x70]
# CHECK: msub $6, $7 # encoding: [0x04,0x00,0xc7,0x70]
@@ -84,7 +84,7 @@
# CHECK: dsub $9, $6, $7 # encoding: [0x2e,0x48,0xc7,0x00]
# CHECK: dsubu $4, $3, $5 # encoding: [0x2f,0x20,0x65,0x00]
# CHECK: daddiu $9, $6, -17767 # encoding: [0x99,0xba,0xc9,0x64]
-# CHECK: move $7, $8 # encoding: [0x2d,0x38,0x00,0x01]
+# CHECK: move $7, $8 # encoding: [0x25,0x38,0x00,0x01]
# CHECK: .set push
# CHECK: .set mips32r2
# CHECK: rdhwr $5, $29
@@ -99,7 +99,7 @@
daddiu $9,-15001
daddu $9,$6,$7
drotr $9, $6, 20
- drotr32 $9, $6, 52
+ drotr32 $9, $6, 20
madd $6,$7
maddu $6,$7
msub $6,$7
diff --git a/test/MC/Mips/mips64-expansions.s b/test/MC/Mips/mips64-expansions.s
index b8f1e7a3e87e..0b56cf501283 100644
--- a/test/MC/Mips/mips64-expansions.s
+++ b/test/MC/Mips/mips64-expansions.s
@@ -1,183 +1,4 @@
# RUN: llvm-mc %s -triple=mips64el-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
-#
-# Test the 'dli' and 'dla' 64-bit variants of 'li' and 'la'.
-
-# Immediate is <= 32 bits.
- dli $5, 123
-# CHECK: addiu $5, $zero, 123 # encoding: [0x7b,0x00,0x05,0x24]
-
- dli $6, -2345
-# CHECK: addiu $6, $zero, -2345 # encoding: [0xd7,0xf6,0x06,0x24]
-
- dli $7, 65538
-# CHECK: lui $7, 1 # encoding: [0x01,0x00,0x07,0x3c]
-# CHECK: ori $7, $7, 2 # encoding: [0x02,0x00,0xe7,0x34]
-
- dli $8, ~7
-# CHECK: addiu $8, $zero, -8 # encoding: [0xf8,0xff,0x08,0x24]
-
- dli $9, 0x10000
-# CHECK: lui $9, 1 # encoding: [0x01,0x00,0x09,0x3c]
-# CHECK-NOT: ori $9, $9, 0 # encoding: [0x00,0x00,0x29,0x35]
-
-
-# Positive immediate which is => 32 bits and <= 48 bits.
- dli $8, 0x100000000
-# CHECK: lui $8, 1 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-
- dli $8, 0x100000001
-# CHECK: lui $8, 1 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-
- dli $8, 0x100010000
-# CHECK: lui $8, 1 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-
- dli $8, 0x100010001
-# CHECK: lui $8, 1 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-
-
-# Positive immediate which is > 48 bits.
- dli $8, 0x1000000000000
-# CHECK: lui $8, 1 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: dsll32 $8, $8, 0 # encoding: [0x3c,0x40,0x08,0x00]
-
- dli $8, 0x1000000000001
-# CHECK: lui $8, 1 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: dsll32 $8, $8, 0 # encoding: [0x3c,0x40,0x08,0x00]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-
- dli $8, 0x1000000010000
-# CHECK: lui $8, 1 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-
- dli $8, 0x1000100000000
-# CHECK: lui $8, 1 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll32 $8, $8, 0 # encoding: [0x3c,0x40,0x08,0x00]
-
- dli $8, 0x1000000010001
-# CHECK: lui $8, 1 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-
- dli $8, 0x1000100010000
-# CHECK: lui $8, 1 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-
- dli $8, 0x1000100000001
-# CHECK: lui $8, 1 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll32 $8, $8, 0 # encoding: [0x3c,0x40,0x08,0x00]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-
- dli $8, 0x1000100010001
-# CHECK: lui $8, 1 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 1 # encoding: [0x01,0x00,0x08,0x35]
-
-
-# Negative immediate which is => 32 bits and <= 48 bits.
- dli $8, -0x100000000
-# CHECK: lui $8, 65535 # encoding: [0xff,0xff,0x08,0x3c]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll32 $8, $8, 0 # encoding: [0x3c,0x40,0x08,0x00]
-
- dli $8, -0x100000001
-# CHECK: lui $8, 65535 # encoding: [0xff,0xff,0x08,0x3c]
-# CHECK: ori $8, $8, 65534 # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-
- dli $8, -0x100010000
-# CHECK: lui $8, 65535 # encoding: [0xff,0xff,0x08,0x3c]
-# CHECK: ori $8, $8, 65534 # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-
- dli $8, -0x100010001
-# CHECK: lui $8, 65535 # encoding: [0xff,0xff,0x08,0x3c]
-# CHECK: ori $8, $8, 65534 # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65534 # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-
-
-# Negative immediate which is > 48 bits.
- dli $8, -0x1000000000000
-# CHECK: lui $8, 65535 # encoding: [0xff,0xff,0x08,0x3c]
-# CHECK: dsll32 $8, $8, 0 # encoding: [0x3c,0x40,0x08,0x00]
-
- dli $8, -0x1000000000001
-# CHECK: lui $8, 65534 # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-
- dli $8, -0x1000000010000
-# CHECK: lui $8, 65534 # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-
- dli $8, -0x1000100000000
-# CHECK: lui $8, 65534 # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll32 $8, $8, 0 # encoding: [0x3c,0x40,0x08,0x00]
-
- dli $8, -0x1000000010001
-# CHECK: lui $8, 65534 # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65534 # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-
- dli $8, -0x1000100010000
-# CHECK: lui $8, 65534 # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori $8, $8, 65534 # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-
- dli $8, -0x1000100000001
-# CHECK: lui $8, 65534 # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori $8, $8, 65534 # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
-
- dli $8, -0x1000100010001
-# CHECK: lui $8, 65534 # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori $8, $8, 65534 # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65534 # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll $8, $8, 16 # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori $8, $8, 65535 # encoding: [0xff,0xff,0x08,0x35]
# Check that signed negative 32-bit immediates are loaded correctly:
li $10, ~(0x101010)
@@ -185,18 +6,10 @@
# CHECK: ori $10, $10, 61423 # encoding: [0xef,0xef,0x4a,0x35]
# CHECK-NOT: dsll
- dli $10, ~(0x202020)
-# CHECK: lui $10, 65503 # encoding: [0xdf,0xff,0x0a,0x3c]
-# CHECK: ori $10, $10, 57311 # encoding: [0xdf,0xdf,0x4a,0x35]
-# CHECK-NOT: dsll
-
- dli $9, 0x80000000
-# CHECK: ori $9, $zero, 32768 # encoding: [0x00,0x80,0x09,0x34]
-# CHECK: dsll $9, $9, 16 # encoding: [0x38,0x4c,0x09,0x00]
-
# Test bne with an immediate as the 2nd operand.
bne $2, 0x100010001, 1332
-# CHECK: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addiu $1, $zero, 1 # encoding: [0x01,0x00,0x01,0x24]
+# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 1 # encoding: [0x01,0x00,0x21,0x34]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 1 # encoding: [0x01,0x00,0x21,0x34]
@@ -214,8 +27,7 @@
# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
bne $2, -0x100010001, 1332
-# CHECK: lui $1, 65535 # encoding: [0xff,0xff,0x01,0x3c]
-# CHECK: ori $1, $1, 65534 # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: addiu $1, $zero, -2 # encoding: [0xfe,0xff,0x01,0x24]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 65534 # encoding: [0xfe,0xff,0x21,0x34]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
@@ -235,7 +47,8 @@
# Test beq with an immediate as the 2nd operand.
beq $2, 0x100010001, 1332
-# CHECK: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addiu $1, $zero, 1 # encoding: [0x01,0x00,0x01,0x24]
+# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 1 # encoding: [0x01,0x00,0x21,0x34]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 1 # encoding: [0x01,0x00,0x21,0x34]
@@ -253,8 +66,7 @@
# CHECK: nop # encoding: [0x00,0x00,0x00,0x00]
beq $2, -0x100010001, 1332
-# CHECK: lui $1, 65535 # encoding: [0xff,0xff,0x01,0x3c]
-# CHECK: ori $1, $1, 65534 # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: addiu $1, $zero, -2 # encoding: [0xfe,0xff,0x01,0x24]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 65534 # encoding: [0xfe,0xff,0x21,0x34]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
@@ -274,7 +86,7 @@
# Test ulhu with 64-bit immediate addresses.
ulhu $8, 0x100010001
-# CHECK: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addiu $1, $zero, 1 # encoding: [0x01,0x00,0x01,0x24]
# CHECK: ori $1, $1, 1 # encoding: [0x01,0x00,0x21,0x34]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 1 # encoding: [0x01,0x00,0x21,0x34]
@@ -296,8 +108,7 @@
# CHECK: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
ulhu $8, -0x100010001
-# CHECK: lui $1, 65535 # encoding: [0xff,0xff,0x01,0x3c]
-# CHECK: ori $1, $1, 65534 # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: addiu $1, $zero, -2 # encoding: [0xfe,0xff,0x01,0x24]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 65534 # encoding: [0xfe,0xff,0x21,0x34]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
@@ -321,7 +132,7 @@
# Test ulhu with source register and 64-bit immediate offset.
ulhu $8, 0x100010001($9)
-# CHECK: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addiu $1, $zero, 1 # encoding: [0x01,0x00,0x01,0x24]
# CHECK: ori $1, $1, 1 # encoding: [0x01,0x00,0x21,0x34]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 1 # encoding: [0x01,0x00,0x21,0x34]
@@ -345,12 +156,10 @@
# CHECK: or $8, $8, $1 # encoding: [0x25,0x40,0x01,0x01]
ulhu $8, -0x100010001($9)
-# CHECK: lui $1, 65535 # encoding: [0xff,0xff,0x01,0x3c]
-# CHECK: ori $1, $1, 65534 # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: addiu $1, $zero, -2 # encoding: [0xfe,0xff,0x01,0x24]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 65534 # encoding: [0xfe,0xff,0x21,0x34]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
-# CHECK: ori $1, $1, 65535 # encoding: [0xff,0xff,0x21,0x34]
# CHECK: daddu $1, $1, $9 # encoding: [0x2d,0x08,0x29,0x00]
# CHECK: lbu $8, 1($1) # encoding: [0x01,0x00,0x28,0x90]
# CHECK: lbu $1, 0($1) # encoding: [0x00,0x00,0x21,0x90]
@@ -372,7 +181,7 @@
# Test ulw with 64-bit immediate addresses.
ulw $8, 0x100010001
-# CHECK: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addiu $1, $zero, 1 # encoding: [0x01,0x00,0x01,0x24]
# CHECK: ori $1, $1, 1 # encoding: [0x01,0x00,0x21,0x34]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 1 # encoding: [0x01,0x00,0x21,0x34]
@@ -390,8 +199,7 @@
# CHECK: lwr $8, 0($1) # encoding: [0x00,0x00,0x28,0x98]
ulw $8, -0x100010001
-# CHECK: lui $1, 65535 # encoding: [0xff,0xff,0x01,0x3c]
-# CHECK: ori $1, $1, 65534 # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: addiu $1, $zero, -2 # encoding: [0xfe,0xff,0x01,0x24]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 65534 # encoding: [0xfe,0xff,0x21,0x34]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
@@ -411,7 +219,7 @@
# Test ulw with source register and 64-bit immediate offset.
ulw $8, 0x100010001($9)
-# CHECK: lui $1, 1 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addiu $1, $zero, 1 # encoding: [0x01,0x00,0x01,0x24]
# CHECK: ori $1, $1, 1 # encoding: [0x01,0x00,0x21,0x34]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 1 # encoding: [0x01,0x00,0x21,0x34]
@@ -431,8 +239,7 @@
# CHECK: lwr $8, 0($1) # encoding: [0x00,0x00,0x28,0x98]
ulw $8, -0x100010001($9)
-# CHECK: lui $1, 65535 # encoding: [0xff,0xff,0x01,0x3c]
-# CHECK: ori $1, $1, 65534 # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: addiu $1, $zero, -2 # encoding: [0xfe,0xff,0x01,0x24]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
# CHECK: ori $1, $1, 65534 # encoding: [0xfe,0xff,0x21,0x34]
# CHECK: dsll $1, $1, 16 # encoding: [0x38,0x0c,0x01,0x00]
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index 03ea6c15c333..24ed1ffc8d60 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -40,7 +40,7 @@ a:
bltzall $6,488 # CHECK: bltzall $6, 488 # encoding: [0x04,0xd2,0x00,0x7a]
bltzl $s1,-9964 # CHECK: bltzl $17, -9964 # encoding: [0x06,0x22,0xf6,0x45]
bnel $gp,$s4,5107 # CHECK: bnel $gp, $20, 5107 # encoding: [0x57,0x94,0x04,0xfc]
- cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
+ cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
c.ngl.d $f29,$f29
c.ngle.d $f0,$f16
c.sf.d $f30,$f0
@@ -163,10 +163,10 @@ a:
mflo $s1
mov.d $f20,$f14
mov.s $f2,$f27
- move $a0,$a3
- move $s5,$a0
- move $s8,$a0
- move $25,$a2
+ move $a0,$a3 # CHECK: move $4, $7 # encoding: [0x00,0xe0,0x20,0x25]
+ move $s5,$a0 # CHECK: move $21, $4 # encoding: [0x00,0x80,0xa8,0x25]
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
movf $gp,$8,$fcc7
movf.d $f6,$f11,$fcc5
movf.s $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips64r2/invalid.s b/test/MC/Mips/mips64r2/invalid.s
index f53cfff40438..86bfc959fff0 100644
--- a/test/MC/Mips/mips64r2/invalid.s
+++ b/test/MC/Mips/mips64r2/invalid.s
@@ -2,9 +2,67 @@
# invalid set of operands or operand's restrictions not met).
# RUN: not llvm-mc %s -triple=mips64-unknown-linux -mcpu=mips64r2 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
.text
.set noreorder
- jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
- jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+ cache -1, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ cache 32, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ # FIXME: Check various 'pos + size' constraints on dext*
+ dext $2, $3, -1, 1 # CHECK: :[[@LINE]]:22: error: expected 5-bit unsigned immediate
+ dext $2, $3, 32, 1 # CHECK: :[[@LINE]]:22: error: expected 5-bit unsigned immediate
+ dext $2, $3, 1, 0 # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 32
+ dext $2, $3, 1, 33 # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 32
+ dextm $2, $3, -1, 1 # CHECK: :[[@LINE]]:23: error: expected 5-bit unsigned immediate
+ dextm $2, $3, 32, 1 # CHECK: :[[@LINE]]:23: error: expected 5-bit unsigned immediate
+ dextm $2, $3, -1, 33 # CHECK: :[[@LINE]]:23: error: expected 5-bit unsigned immediate
+ dextm $2, $3, 32, 33 # CHECK: :[[@LINE]]:23: error: expected 5-bit unsigned immediate
+ dextm $2, $3, 1, 32 # CHECK: :[[@LINE]]:26: error: expected immediate in range 33 .. 64
+ dextm $2, $3, 1, 65 # CHECK: :[[@LINE]]:26: error: expected immediate in range 33 .. 64
+ dextu $2, $3, 31, 1 # CHECK: :[[@LINE]]:23: error: expected immediate in range 32 .. 63
+ dextu $2, $3, 64, 1 # CHECK: :[[@LINE]]:23: error: expected immediate in range 32 .. 63
+ dextu $2, $3, 32, 0 # CHECK: :[[@LINE]]:27: error: expected immediate in range 1 .. 32
+ dextu $2, $3, 32, 33 # CHECK: :[[@LINE]]:27: error: expected immediate in range 1 .. 32
+ # FIXME: Check size on dins*
+ dins $2, $3, -1, 1 # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+ dins $2, $3, 64, 1 # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+ dinsm $2, $3, -1, 1 # CHECK: :[[@LINE]]:23: error: expected 5-bit unsigned immediate
+ dinsm $2, $3, 32, 1 # CHECK: :[[@LINE]]:23: error: expected 5-bit unsigned immediate
+ dinsu $2, $3, 31, 1 # CHECK: :[[@LINE]]:23: error: expected immediate in range 32 .. 63
+ dinsu $2, $3, 64, 1 # CHECK: :[[@LINE]]:23: error: expected immediate in range 32 .. 63
+ drotr $2, $3, -1 # CHECK: :[[@LINE]]:23: error: expected 6-bit unsigned immediate
+ drotr $2, $3, 64 # CHECK: :[[@LINE]]:23: error: expected 6-bit unsigned immediate
+ drotr32 $2, $3, -1 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ drotr32 $2, $3, 32 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ dsll $2, $3, -1 # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+ dsll $2, $3, 64 # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+ dsll32 $2, $3, -1 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ dsll32 $2, $3, 32 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ dsrl $2, $3, -1 # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+ dsrl $2, $3, 64 # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+ dsrl32 $2, $3, -1 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ dsrl32 $2, $3, 64 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ dsra $2, $3, -1 # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+ dsra $2, $3, 64 # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+ dsra32 $2, $3, -1 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ dsra32 $2, $3, 64 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ # FIXME: Check '0 < pos + size <= 32' constraint on ext
+ ext $2, $3, -1, 1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ ext $2, $3, 32, 1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ ext $2, $3, 1, 0 # CHECK: :[[@LINE]]:24: error: expected immediate in range 1 .. 32
+ ext $2, $3, 1, 33 # CHECK: :[[@LINE]]:24: error: expected immediate in range 1 .. 32
+ # FIXME: Check size on ins
+ ins $2, $3, -1, 1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ ins $2, $3, 32, 1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ jalr.hb $31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+ jalr.hb $31, $31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+ pref -1, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+ pref 32, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+ sll $2, $3, -1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ sll $2, $3, 32 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ srl $2, $3, -1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ srl $2, $3, 32 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ sra $2, $3, -1 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ sra $2, $3, 32 # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+ rotr $2, $3, -1 # CHECK: :[[@LINE]]:22: error: expected 5-bit unsigned immediate
+ rotr $2, $3, 32 # CHECK: :[[@LINE]]:22: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips64r2/valid-xfail.s b/test/MC/Mips/mips64r2/valid-xfail.s
index 148758cd3263..bf17b35c446c 100644
--- a/test/MC/Mips/mips64r2/valid-xfail.s
+++ b/test/MC/Mips/mips64r2/valid-xfail.s
@@ -8,34 +8,10 @@
.set noat
abs.ps $f22,$f8
- absq_s.ph $8,$a0
- absq_s.qb $15,$s1
- absq_s.w $s3,$ra
add.ps $f25,$f27,$f13
- addq.ph $s1,$15,$at
- addq_s.ph $s3,$s6,$s2
- addq_s.w $a2,$8,$at
- addqh.ph $s4,$14,$s1
addqh.w $s7,$s7,$k1
- addqh_r.ph $sp,$25,$s8
addqh_r.w $8,$v1,$zero
- addsc $s8,$15,$12
- addu.ph $a2,$14,$s3
- addu.qb $s6,$v1,$v1
- addu_s.ph $a3,$s3,$gp
- addu_s.qb $s4,$s8,$s1
- adduh.qb $a1,$a1,$at
- adduh_r.qb $a0,$9,$12
- addwc $k0,$s6,$s7
- alnv.ob $v22,$v19,$v30,$v1
- alnv.ob $v31,$v23,$v30,$at
- alnv.ob $v8,$v17,$v30,$a1
alnv.ps $f12,$f18,$f30,$12
- and.v $w10,$w25,$w29
- bitrev $14,$at
- bmnz.v $w15,$w2,$w28
- bmz.v $w13,$w11,$w21
- bsel.v $w28,$w7,$w0
c.eq.d $fcc1,$f15,$f15
c.eq.ps $fcc5,$f0,$f9
c.eq.s $fcc5,$f24,$f17
@@ -81,18 +57,6 @@
c.un.ps $fcc4,$f2,$f26
c.un.s $fcc1,$f30,$f4
cvt.ps.s $f3,$f18,$f19
- cmp.eq.ph $s7,$14
- cmp.le.ph $8,$14
- cmp.lt.ph $k0,$sp
- cmpgdu.eq.qb $s3,$zero,$k0
- cmpgdu.le.qb $v1,$15,$s2
- cmpgdu.lt.qb $s0,$gp,$sp
- cmpgu.eq.qb $14,$s6,$s8
- cmpgu.le.qb $9,$a3,$s4
- cmpgu.lt.qb $sp,$at,$8
- cmpu.eq.qb $v0,$24
- cmpu.le.qb $s1,$a1
- cmpu.lt.qb $at,$a3
cvt.s.pl $f30,$f1
cvt.s.pu $f14,$f25
dmfc0 $10,c0_watchhi,2
@@ -100,90 +64,22 @@
dmt $k0
dmtc0 $15,c0_datalo
dmtgc0 $a2,c0_watchlo,2
- dpa.w.ph $ac1,$s7,$k0
- dpaq_s.w.ph $ac2,$a0,$13
- dpaq_sa.l.w $ac0,$a2,$14
- dpaqx_s.w.ph $ac3,$a0,$24
- dpaqx_sa.w.ph $ac1,$zero,$s5
- dpau.h.qbl $ac1,$10,$24
- dpau.h.qbr $ac1,$s7,$s6
- dpax.w.ph $ac3,$a0,$k0
- dps.w.ph $ac1,$a3,$a1
- dpsq_s.w.ph $ac0,$gp,$k0
- dpsq_sa.l.w $ac0,$a3,$15
- dpsqx_s.w.ph $ac3,$13,$a3
- dpsqx_sa.w.ph $ac3,$sp,$s2
- dpsu.h.qbl $ac2,$14,$10
- dpsu.h.qbr $ac2,$a1,$s6
- dpsx.w.ph $ac0,$s7,$gp
drorv $at,$a1,$s7
dvpe $s6
emt $8
evpe $v0
- extpdpv $s6,$ac0,$s8
- extpv $13,$ac0,$14
- extrv.w $8,$ac3,$at
- extrv_r.w $8,$ac1,$s6
- extrv_rs.w $gp,$ac1,$s6
- extrv_s.h $s2,$ac1,$14
- fclass.d $w14,$w27
- fclass.w $w19,$w28
- fexupl.d $w10,$w29
- fexupl.w $w12,$w27
- fexupr.d $w31,$w15
- fexupr.w $w29,$w12
- ffint_s.d $w1,$w30
- ffint_s.w $w16,$w14
- ffint_u.d $w23,$w18
- ffint_u.w $w19,$w12
- ffql.d $w2,$w3
- ffql.w $w9,$w0
- ffqr.d $w25,$w24
- ffqr.w $w10,$w6
- fill.b $w9,$v1
- fill.d $w28,$8
- fill.h $w9,$8
- fill.w $w31,$15
- flog2.d $w12,$w16
- flog2.w $w19,$w23
fork $s2,$8,$a0
- frcp.d $w12,$w4
- frcp.w $w30,$w8
- frint.d $w20,$w8
- frint.w $w11,$w29
- frsqrt.d $w29,$w2
- frsqrt.w $w9,$w8
- fsqrt.d $w3,$w1
- fsqrt.w $w5,$w15
- ftint_s.d $w31,$w26
- ftint_s.w $w27,$w14
- ftint_u.d $w5,$w31
- ftint_u.w $w12,$w29
- ftrunc_s.d $w4,$w22
- ftrunc_s.w $w24,$w7
- ftrunc_u.d $w20,$w25
- ftrunc_u.w $w7,$w26
- insv $s2,$at
iret
- lbe $14,122($9)
+ lbe $14,122($9)
lbue $11,-108($10)
- lbux $9,$14($v0)
- lhe $s6,219($v1)
+ lhe $s6,219($v1)
lhue $gp,118($11)
- lhx $sp,$k0($15)
- lle $gp,-237($ra)
- lwe $ra,-145($14)
+ lle $gp,-237($ra)
+ lwe $ra,-145($14)
lwle $11,-42($11)
lwre $sp,-152($24)
- lwx $12,$12($s4)
madd.ps $f22,$f3,$f14,$f3
- maq_s.w.phl $ac2,$25,$11
- maq_s.w.phr $ac0,$10,$25
- maq_sa.w.phl $ac3,$a1,$v1
- maq_sa.w.phr $ac1,$at,$10
mfgc0 $s6,c0_datahi1
- mflo $9,$ac2
- modsub $a3,$12,$a3
mov.ps $f22,$f17
movf.ps $f10,$f28,$fcc6
movn.ps $f31,$f31,$s3
@@ -191,106 +87,30 @@
movz.ps $f18,$f17,$ra
msgn.qh $v0,$v24,$v20
msgn.qh $v12,$v21,$v0[1]
- msub $ac2,$sp,$14
msub.ps $f12,$f14,$f29,$f17
- msubu $ac2,$a1,$24
mtc0 $9,c0_datahi1
mtgc0 $s4,$21,7
- mthi $v0,$ac1
- mthlip $a3,$ac0
- mul.ph $s4,$24,$s0
mul.ps $f14,$f0,$f16
- mul_s.ph $10,$14,$15
- muleq_s.w.phl $11,$s4,$s4
- muleq_s.w.phr $s6,$a0,$s8
- muleu_s.ph.qbl $a2,$14,$8
- muleu_s.ph.qbr $a1,$ra,$9
- mulq_rs.ph $s2,$14,$15
- mulq_rs.w $at,$s4,$25
- mulq_s.ph $s0,$k1,$15
- mulq_s.w $9,$a3,$s0
- mulsa.w.ph $ac1,$s4,$s6
- mulsaq_s.w.ph $ac0,$ra,$s2
neg.ps $f19,$f13
- nloc.b $w12,$w30
- nloc.d $w16,$w7
- nloc.h $w21,$w17
- nloc.w $w17,$w16
- nlzc.b $w12,$w7
- nlzc.d $w14,$w14
- nlzc.h $w24,$w24
- nlzc.w $w10,$w4
nmadd.ps $f27,$f4,$f9,$f25
nmsub.ps $f6,$f12,$f14,$f17
- nor.v $w20,$w20,$w15
- or.v $w13,$w23,$w12
- packrl.ph $ra,$24,$14
- pcnt.b $w30,$w15
- pcnt.d $w5,$w16
- pcnt.h $w20,$w24
- pcnt.w $w22,$w20
- pick.ph $ra,$a2,$gp
- pick.qb $11,$a0,$gp
pll.ps $f25,$f9,$f30
plu.ps $f1,$f26,$f29
preceq.w.phl $s8,$gp
preceq.w.phr $s5,$15
- precequ.ph.qbl $s7,$ra
- precequ.ph.qbla $a0,$9
- precequ.ph.qbr $ra,$s3
- precequ.ph.qbra $24,$8
- preceu.ph.qbl $sp,$8
- preceu.ph.qbla $s6,$11
- preceu.ph.qbr $gp,$s1
- preceu.ph.qbra $k1,$s0
- precr.qb.ph $v0,$12,$s8
- precrq.ph.w $14,$s8,$24
- precrq.qb.ph $a2,$12,$12
- precrq_rs.ph.w $a1,$k0,$a3
- precrqu_s.qb.ph $zero,$gp,$s5
pul.ps $f9,$f30,$f26
puu.ps $f24,$f9,$f2
- raddu.w.qb $25,$s3
rdpgpr $s3,$9
recip.d $f19,$f6
recip.s $f3,$f30
- repl.ph $at,-307
- replv.ph $v1,$s7
- replv.qb $25,$12
rorv $13,$a3,$s5
rsqrt.d $f3,$f28
rsqrt.s $f4,$f8
- sbe $s7,33($s1)
- sce $sp,189($10)
- she $24,105($v0)
- shilo $ac1,26
- shilov $ac2,$10
- shllv.ph $10,$s0,$s0
- shllv.qb $gp,$v1,$zero
- shllv_s.ph $k1,$at,$13
- shllv_s.w $s1,$ra,$k0
- shrav.ph $25,$s2,$s1
- shrav.qb $zero,$24,$11
- shrav_r.ph $s3,$11,$25
- shrav_r.qb $a0,$sp,$s5
- shrav_r.w $s7,$s4,$s6
- shrlv.ph $14,$10,$9
- shrlv.qb $a2,$s2,$11
+ sbe $s7,33($s1)
+ sce $sp,189($10)
+ she $24,105($v0)
sub.ps $f5,$f14,$f26
- subq.ph $ra,$9,$s8
- subq_s.ph $13,$s8,$s5
- subq_s.w $k1,$a2,$a3
- subqh.ph $10,$at,$9
- subqh.w $v0,$a2,$zero
- subqh_r.ph $a0,$12,$s6
- subqh_r.w $10,$a2,$gp
- subu.ph $9,$s6,$s4
- subu.qb $s6,$a2,$s6
- subu_s.ph $v1,$a1,$s3
- subu_s.qb $s1,$at,$ra
- subuh.qb $zero,$gp,$gp
- subuh_r.qb $s4,$s8,$s6
- swe $24,94($k0)
+ swe $24,94($k0)
swle $v1,-209($gp)
swre $k0,-202($s2)
tlbginv
@@ -302,5 +122,4 @@
tlbinv
tlbinvf
wrpgpr $zero,$13
- xor.v $w20,$w21,$w30
yield $v1,$s0
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index 37753ae0a878..e571d9365913 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -40,7 +40,7 @@ a:
bltzall $6,488 # CHECK: bltzall $6, 488 # encoding: [0x04,0xd2,0x00,0x7a]
bltzl $s1,-9964 # CHECK: bltzl $17, -9964 # encoding: [0x06,0x22,0xf6,0x45]
bnel $gp,$s4,5107 # CHECK: bnel $gp, $20, 5107 # encoding: [0x57,0x94,0x04,0xfc]
- cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
+ cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
c.ngl.d $f29,$f29
c.ngle.d $f0,$f16
c.sf.d $f30,$f0
@@ -179,10 +179,10 @@ a:
mflo $s1
mov.d $f20,$f14
mov.s $f2,$f27
- move $a0,$a3
- move $s5,$a0
- move $s8,$a0
- move $25,$a2
+ move $a0,$a3 # CHECK: move $4, $7 # encoding: [0x00,0xe0,0x20,0x25]
+ move $s5,$a0 # CHECK: move $21, $4 # encoding: [0x00,0x80,0xa8,0x25]
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
movf $gp,$8,$fcc7
movf.d $f6,$f11,$fcc5
movf.s $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips64r3/invalid.s b/test/MC/Mips/mips64r3/invalid.s
index 99cd0803d202..99071211ba4b 100644
--- a/test/MC/Mips/mips64r3/invalid.s
+++ b/test/MC/Mips/mips64r3/invalid.s
@@ -2,9 +2,15 @@
# invalid set of operands or operand's restrictions not met).
# RUN: not llvm-mc %s -triple=mips64-unknown-linux -mcpu=mips64r3 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
.text
.set noreorder
- jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
- jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+ cache -1, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ cache 32, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ drotr32 $2, $3, -1 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ drotr32 $2, $3, 32 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ jalr.hb $31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+ jalr.hb $31, $31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+ pref -1, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+ pref 32, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips64r3/valid-xfail.s b/test/MC/Mips/mips64r3/valid-xfail.s
index f2949c4f2dda..7e94200dfd62 100644
--- a/test/MC/Mips/mips64r3/valid-xfail.s
+++ b/test/MC/Mips/mips64r3/valid-xfail.s
@@ -8,34 +8,13 @@
.set noat
abs.ps $f22,$f8
- absq_s.ph $8,$a0
- absq_s.qb $15,$s1
- absq_s.w $s3,$ra
add.ps $f25,$f27,$f13
- addq.ph $s1,$15,$at
- addq_s.ph $s3,$s6,$s2
- addq_s.w $a2,$8,$at
- addqh.ph $s4,$14,$s1
addqh.w $s7,$s7,$k1
- addqh_r.ph $sp,$25,$s8
addqh_r.w $8,$v1,$zero
- addsc $s8,$15,$12
- addu.ph $a2,$14,$s3
- addu.qb $s6,$v1,$v1
- addu_s.ph $a3,$s3,$gp
- addu_s.qb $s4,$s8,$s1
- adduh.qb $a1,$a1,$at
- adduh_r.qb $a0,$9,$12
- addwc $k0,$s6,$s7
alnv.ob $v22,$v19,$v30,$v1
alnv.ob $v31,$v23,$v30,$at
alnv.ob $v8,$v17,$v30,$a1
alnv.ps $f12,$f18,$f30,$12
- and.v $w10,$w25,$w29
- bitrev $14,$at
- bmnz.v $w15,$w2,$w28
- bmz.v $w13,$w11,$w21
- bsel.v $w28,$w7,$w0
c.eq.d $fcc1,$f15,$f15
c.eq.ps $fcc5,$f0,$f9
c.eq.s $fcc5,$f24,$f17
@@ -81,18 +60,6 @@
c.un.ps $fcc4,$f2,$f26
c.un.s $fcc1,$f30,$f4
cvt.ps.s $f3,$f18,$f19
- cmp.eq.ph $s7,$14
- cmp.le.ph $8,$14
- cmp.lt.ph $k0,$sp
- cmpgdu.eq.qb $s3,$zero,$k0
- cmpgdu.le.qb $v1,$15,$s2
- cmpgdu.lt.qb $s0,$gp,$sp
- cmpgu.eq.qb $14,$s6,$s8
- cmpgu.le.qb $9,$a3,$s4
- cmpgu.lt.qb $sp,$at,$8
- cmpu.eq.qb $v0,$24
- cmpu.le.qb $s1,$a1
- cmpu.lt.qb $at,$a3
cvt.s.pl $f30,$f1
cvt.s.pu $f14,$f25
dmfc0 $10,c0_watchhi,2
@@ -100,90 +67,22 @@
dmt $k0
dmtc0 $15,c0_datalo
dmtgc0 $a2,c0_watchlo,2
- dpa.w.ph $ac1,$s7,$k0
- dpaq_s.w.ph $ac2,$a0,$13
- dpaq_sa.l.w $ac0,$a2,$14
- dpaqx_s.w.ph $ac3,$a0,$24
- dpaqx_sa.w.ph $ac1,$zero,$s5
- dpau.h.qbl $ac1,$10,$24
- dpau.h.qbr $ac1,$s7,$s6
- dpax.w.ph $ac3,$a0,$k0
- dps.w.ph $ac1,$a3,$a1
- dpsq_s.w.ph $ac0,$gp,$k0
- dpsq_sa.l.w $ac0,$a3,$15
- dpsqx_s.w.ph $ac3,$13,$a3
- dpsqx_sa.w.ph $ac3,$sp,$s2
- dpsu.h.qbl $ac2,$14,$10
- dpsu.h.qbr $ac2,$a1,$s6
- dpsx.w.ph $ac0,$s7,$gp
drorv $at,$a1,$s7
dvpe $s6
emt $8
evpe $v0
- extpdpv $s6,$ac0,$s8
- extpv $13,$ac0,$14
- extrv.w $8,$ac3,$at
- extrv_r.w $8,$ac1,$s6
- extrv_rs.w $gp,$ac1,$s6
- extrv_s.h $s2,$ac1,$14
- fclass.d $w14,$w27
- fclass.w $w19,$w28
- fexupl.d $w10,$w29
- fexupl.w $w12,$w27
- fexupr.d $w31,$w15
- fexupr.w $w29,$w12
- ffint_s.d $w1,$w30
- ffint_s.w $w16,$w14
- ffint_u.d $w23,$w18
- ffint_u.w $w19,$w12
- ffql.d $w2,$w3
- ffql.w $w9,$w0
- ffqr.d $w25,$w24
- ffqr.w $w10,$w6
- fill.b $w9,$v1
- fill.d $w28,$8
- fill.h $w9,$8
- fill.w $w31,$15
- flog2.d $w12,$w16
- flog2.w $w19,$w23
fork $s2,$8,$a0
- frcp.d $w12,$w4
- frcp.w $w30,$w8
- frint.d $w20,$w8
- frint.w $w11,$w29
- frsqrt.d $w29,$w2
- frsqrt.w $w9,$w8
- fsqrt.d $w3,$w1
- fsqrt.w $w5,$w15
- ftint_s.d $w31,$w26
- ftint_s.w $w27,$w14
- ftint_u.d $w5,$w31
- ftint_u.w $w12,$w29
- ftrunc_s.d $w4,$w22
- ftrunc_s.w $w24,$w7
- ftrunc_u.d $w20,$w25
- ftrunc_u.w $w7,$w26
- insv $s2,$at
iret
- lbe $14,122($9)
+ lbe $14,122($9)
lbue $11,-108($10)
- lbux $9,$14($v0)
- lhe $s6,219($v1)
+ lhe $s6,219($v1)
lhue $gp,118($11)
- lhx $sp,$k0($15)
- lle $gp,-237($ra)
- lwe $ra,-145($14)
+ lle $gp,-237($ra)
+ lwe $ra,-145($14)
lwle $11,-42($11)
lwre $sp,-152($24)
- lwx $12,$12($s4)
madd.ps $f22,$f3,$f14,$f3
- maq_s.w.phl $ac2,$25,$11
- maq_s.w.phr $ac0,$10,$25
- maq_sa.w.phl $ac3,$a1,$v1
- maq_sa.w.phr $ac1,$at,$10
mfgc0 $s6,c0_datahi1
- mflo $9,$ac2
- modsub $a3,$12,$a3
mov.ps $f22,$f17
movf.ps $f10,$f28,$fcc6
movn.ps $f31,$f31,$s3
@@ -191,106 +90,30 @@
movz.ps $f18,$f17,$ra
msgn.qh $v0,$v24,$v20
msgn.qh $v12,$v21,$v0[1]
- msub $ac2,$sp,$14
msub.ps $f12,$f14,$f29,$f17
- msubu $ac2,$a1,$24
mtc0 $9,c0_datahi1
mtgc0 $s4,$21,7
- mthi $v0,$ac1
- mthlip $a3,$ac0
- mul.ph $s4,$24,$s0
mul.ps $f14,$f0,$f16
- mul_s.ph $10,$14,$15
- muleq_s.w.phl $11,$s4,$s4
- muleq_s.w.phr $s6,$a0,$s8
- muleu_s.ph.qbl $a2,$14,$8
- muleu_s.ph.qbr $a1,$ra,$9
- mulq_rs.ph $s2,$14,$15
- mulq_rs.w $at,$s4,$25
- mulq_s.ph $s0,$k1,$15
- mulq_s.w $9,$a3,$s0
- mulsa.w.ph $ac1,$s4,$s6
- mulsaq_s.w.ph $ac0,$ra,$s2
neg.ps $f19,$f13
- nloc.b $w12,$w30
- nloc.d $w16,$w7
- nloc.h $w21,$w17
- nloc.w $w17,$w16
- nlzc.b $w12,$w7
- nlzc.d $w14,$w14
- nlzc.h $w24,$w24
- nlzc.w $w10,$w4
nmadd.ps $f27,$f4,$f9,$f25
nmsub.ps $f6,$f12,$f14,$f17
- nor.v $w20,$w20,$w15
- or.v $w13,$w23,$w12
- packrl.ph $ra,$24,$14
- pcnt.b $w30,$w15
- pcnt.d $w5,$w16
- pcnt.h $w20,$w24
- pcnt.w $w22,$w20
- pick.ph $ra,$a2,$gp
- pick.qb $11,$a0,$gp
pll.ps $f25,$f9,$f30
plu.ps $f1,$f26,$f29
preceq.w.phl $s8,$gp
preceq.w.phr $s5,$15
- precequ.ph.qbl $s7,$ra
- precequ.ph.qbla $a0,$9
- precequ.ph.qbr $ra,$s3
- precequ.ph.qbra $24,$8
- preceu.ph.qbl $sp,$8
- preceu.ph.qbla $s6,$11
- preceu.ph.qbr $gp,$s1
- preceu.ph.qbra $k1,$s0
- precr.qb.ph $v0,$12,$s8
- precrq.ph.w $14,$s8,$24
- precrq.qb.ph $a2,$12,$12
- precrq_rs.ph.w $a1,$k0,$a3
- precrqu_s.qb.ph $zero,$gp,$s5
pul.ps $f9,$f30,$f26
puu.ps $f24,$f9,$f2
- raddu.w.qb $25,$s3
rdpgpr $s3,$9
recip.d $f19,$f6
recip.s $f3,$f30
- repl.ph $at,-307
- replv.ph $v1,$s7
- replv.qb $25,$12
rorv $13,$a3,$s5
rsqrt.d $f3,$f28
rsqrt.s $f4,$f8
- sbe $s7,33($s1)
- sce $sp,189($10)
- she $24,105($v0)
- shilo $ac1,26
- shilov $ac2,$10
- shllv.ph $10,$s0,$s0
- shllv.qb $gp,$v1,$zero
- shllv_s.ph $k1,$at,$13
- shllv_s.w $s1,$ra,$k0
- shrav.ph $25,$s2,$s1
- shrav.qb $zero,$24,$11
- shrav_r.ph $s3,$11,$25
- shrav_r.qb $a0,$sp,$s5
- shrav_r.w $s7,$s4,$s6
- shrlv.ph $14,$10,$9
- shrlv.qb $a2,$s2,$11
+ sbe $s7,33($s1)
+ sce $sp,189($10)
+ she $24,105($v0)
sub.ps $f5,$f14,$f26
- subq.ph $ra,$9,$s8
- subq_s.ph $13,$s8,$s5
- subq_s.w $k1,$a2,$a3
- subqh.ph $10,$at,$9
- subqh.w $v0,$a2,$zero
- subqh_r.ph $a0,$12,$s6
- subqh_r.w $10,$a2,$gp
- subu.ph $9,$s6,$s4
- subu.qb $s6,$a2,$s6
- subu_s.ph $v1,$a1,$s3
- subu_s.qb $s1,$at,$ra
- subuh.qb $zero,$gp,$gp
- subuh_r.qb $s4,$s8,$s6
- swe $24,94($k0)
+ swe $24,94($k0)
swle $v1,-209($gp)
swre $k0,-202($s2)
tlbginv
@@ -302,5 +125,4 @@
tlbinv
tlbinvf
wrpgpr $zero,$13
- xor.v $w20,$w21,$w30
yield $v1,$s0
diff --git a/test/MC/Mips/mips64r3/valid.s b/test/MC/Mips/mips64r3/valid.s
index c5d4848458d2..4bde82eb8ec1 100644
--- a/test/MC/Mips/mips64r3/valid.s
+++ b/test/MC/Mips/mips64r3/valid.s
@@ -40,7 +40,7 @@ a:
bltzall $6,488 # CHECK: bltzall $6, 488 # encoding: [0x04,0xd2,0x00,0x7a]
bltzl $s1,-9964 # CHECK: bltzl $17, -9964 # encoding: [0x06,0x22,0xf6,0x45]
bnel $gp,$s4,5107 # CHECK: bnel $gp, $20, 5107 # encoding: [0x57,0x94,0x04,0xfc]
- cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
+ cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
c.ngl.d $f29,$f29
c.ngle.d $f0,$f16
c.sf.d $f30,$f0
@@ -179,10 +179,10 @@ a:
mflo $s1
mov.d $f20,$f14
mov.s $f2,$f27
- move $a0,$a3
- move $s5,$a0
- move $s8,$a0
- move $25,$a2
+ move $a0,$a3 # CHECK: move $4, $7 # encoding: [0x00,0xe0,0x20,0x25]
+ move $s5,$a0 # CHECK: move $21, $4 # encoding: [0x00,0x80,0xa8,0x25]
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
movf $gp,$8,$fcc7
movf.d $f6,$f11,$fcc5
movf.s $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips64r5/invalid-mips64.s b/test/MC/Mips/mips64r5/invalid-mips64.s
new file mode 100644
index 000000000000..412fb31a1043
--- /dev/null
+++ b/test/MC/Mips/mips64r5/invalid-mips64.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64 \
+# RUN: 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ eretnc # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r5/invalid-mips64r2.s b/test/MC/Mips/mips64r5/invalid-mips64r2.s
new file mode 100644
index 000000000000..1ee159429a70
--- /dev/null
+++ b/test/MC/Mips/mips64r5/invalid-mips64r2.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r2 \
+# RUN: 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ eretnc # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r5/invalid-mips64r3.s b/test/MC/Mips/mips64r5/invalid-mips64r3.s
new file mode 100644
index 000000000000..e80d7a1e9b4f
--- /dev/null
+++ b/test/MC/Mips/mips64r5/invalid-mips64r3.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r3 \
+# RUN: 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ eretnc # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r5/invalid.s b/test/MC/Mips/mips64r5/invalid.s
index 8319debaa30c..3bdea15f5236 100644
--- a/test/MC/Mips/mips64r5/invalid.s
+++ b/test/MC/Mips/mips64r5/invalid.s
@@ -2,9 +2,15 @@
# invalid set of operands or operand's restrictions not met).
# RUN: not llvm-mc %s -triple=mips64-unknown-linux -mcpu=mips64r5 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
.text
.set noreorder
- jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
- jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+ cache -1, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ cache 32, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ drotr32 $2, $3, -1 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ drotr32 $2, $3, 32 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ jalr.hb $31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+ jalr.hb $31, $31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+ pref -1, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+ pref 32, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips64r5/valid-xfail.s b/test/MC/Mips/mips64r5/valid-xfail.s
index 04221ddb8630..b5ecdcbfb726 100644
--- a/test/MC/Mips/mips64r5/valid-xfail.s
+++ b/test/MC/Mips/mips64r5/valid-xfail.s
@@ -8,34 +8,13 @@
.set noat
abs.ps $f22,$f8
- absq_s.ph $8,$a0
- absq_s.qb $15,$s1
- absq_s.w $s3,$ra
add.ps $f25,$f27,$f13
- addq.ph $s1,$15,$at
- addq_s.ph $s3,$s6,$s2
- addq_s.w $a2,$8,$at
- addqh.ph $s4,$14,$s1
addqh.w $s7,$s7,$k1
- addqh_r.ph $sp,$25,$s8
addqh_r.w $8,$v1,$zero
- addsc $s8,$15,$12
- addu.ph $a2,$14,$s3
- addu.qb $s6,$v1,$v1
- addu_s.ph $a3,$s3,$gp
- addu_s.qb $s4,$s8,$s1
- adduh.qb $a1,$a1,$at
- adduh_r.qb $a0,$9,$12
- addwc $k0,$s6,$s7
alnv.ob $v22,$v19,$v30,$v1
alnv.ob $v31,$v23,$v30,$at
alnv.ob $v8,$v17,$v30,$a1
alnv.ps $f12,$f18,$f30,$12
- and.v $w10,$w25,$w29
- bitrev $14,$at
- bmnz.v $w15,$w2,$w28
- bmz.v $w13,$w11,$w21
- bsel.v $w28,$w7,$w0
c.eq.d $fcc1,$f15,$f15
c.eq.ps $fcc5,$f0,$f9
c.eq.s $fcc5,$f24,$f17
@@ -81,18 +60,6 @@
c.un.ps $fcc4,$f2,$f26
c.un.s $fcc1,$f30,$f4
cvt.ps.s $f3,$f18,$f19
- cmp.eq.ph $s7,$14
- cmp.le.ph $8,$14
- cmp.lt.ph $k0,$sp
- cmpgdu.eq.qb $s3,$zero,$k0
- cmpgdu.le.qb $v1,$15,$s2
- cmpgdu.lt.qb $s0,$gp,$sp
- cmpgu.eq.qb $14,$s6,$s8
- cmpgu.le.qb $9,$a3,$s4
- cmpgu.lt.qb $sp,$at,$8
- cmpu.eq.qb $v0,$24
- cmpu.le.qb $s1,$a1
- cmpu.lt.qb $at,$a3
cvt.s.pl $f30,$f1
cvt.s.pu $f14,$f25
dmfc0 $10,c0_watchhi,2
@@ -100,90 +67,22 @@
dmt $k0
dmtc0 $15,c0_datalo
dmtgc0 $a2,c0_watchlo,2
- dpa.w.ph $ac1,$s7,$k0
- dpaq_s.w.ph $ac2,$a0,$13
- dpaq_sa.l.w $ac0,$a2,$14
- dpaqx_s.w.ph $ac3,$a0,$24
- dpaqx_sa.w.ph $ac1,$zero,$s5
- dpau.h.qbl $ac1,$10,$24
- dpau.h.qbr $ac1,$s7,$s6
- dpax.w.ph $ac3,$a0,$k0
- dps.w.ph $ac1,$a3,$a1
- dpsq_s.w.ph $ac0,$gp,$k0
- dpsq_sa.l.w $ac0,$a3,$15
- dpsqx_s.w.ph $ac3,$13,$a3
- dpsqx_sa.w.ph $ac3,$sp,$s2
- dpsu.h.qbl $ac2,$14,$10
- dpsu.h.qbr $ac2,$a1,$s6
- dpsx.w.ph $ac0,$s7,$gp
drorv $at,$a1,$s7
dvpe $s6
emt $8
evpe $v0
- extpdpv $s6,$ac0,$s8
- extpv $13,$ac0,$14
- extrv.w $8,$ac3,$at
- extrv_r.w $8,$ac1,$s6
- extrv_rs.w $gp,$ac1,$s6
- extrv_s.h $s2,$ac1,$14
- fclass.d $w14,$w27
- fclass.w $w19,$w28
- fexupl.d $w10,$w29
- fexupl.w $w12,$w27
- fexupr.d $w31,$w15
- fexupr.w $w29,$w12
- ffint_s.d $w1,$w30
- ffint_s.w $w16,$w14
- ffint_u.d $w23,$w18
- ffint_u.w $w19,$w12
- ffql.d $w2,$w3
- ffql.w $w9,$w0
- ffqr.d $w25,$w24
- ffqr.w $w10,$w6
- fill.b $w9,$v1
- fill.d $w28,$8
- fill.h $w9,$8
- fill.w $w31,$15
- flog2.d $w12,$w16
- flog2.w $w19,$w23
fork $s2,$8,$a0
- frcp.d $w12,$w4
- frcp.w $w30,$w8
- frint.d $w20,$w8
- frint.w $w11,$w29
- frsqrt.d $w29,$w2
- frsqrt.w $w9,$w8
- fsqrt.d $w3,$w1
- fsqrt.w $w5,$w15
- ftint_s.d $w31,$w26
- ftint_s.w $w27,$w14
- ftint_u.d $w5,$w31
- ftint_u.w $w12,$w29
- ftrunc_s.d $w4,$w22
- ftrunc_s.w $w24,$w7
- ftrunc_u.d $w20,$w25
- ftrunc_u.w $w7,$w26
- insv $s2,$at
iret
- lbe $14,122($9)
+ lbe $14,122($9)
lbue $11,-108($10)
- lbux $9,$14($v0)
- lhe $s6,219($v1)
+ lhe $s6,219($v1)
lhue $gp,118($11)
- lhx $sp,$k0($15)
- lle $gp,-237($ra)
- lwe $ra,-145($14)
+ lle $gp,-237($ra)
+ lwe $ra,-145($14)
lwle $11,-42($11)
lwre $sp,-152($24)
- lwx $12,$12($s4)
madd.ps $f22,$f3,$f14,$f3
- maq_s.w.phl $ac2,$25,$11
- maq_s.w.phr $ac0,$10,$25
- maq_sa.w.phl $ac3,$a1,$v1
- maq_sa.w.phr $ac1,$at,$10
mfgc0 $s6,c0_datahi1
- mflo $9,$ac2
- modsub $a3,$12,$a3
mov.ps $f22,$f17
movf.ps $f10,$f28,$fcc6
movn.ps $f31,$f31,$s3
@@ -191,106 +90,30 @@
movz.ps $f18,$f17,$ra
msgn.qh $v0,$v24,$v20
msgn.qh $v12,$v21,$v0[1]
- msub $ac2,$sp,$14
msub.ps $f12,$f14,$f29,$f17
- msubu $ac2,$a1,$24
mtc0 $9,c0_datahi1
mtgc0 $s4,$21,7
- mthi $v0,$ac1
- mthlip $a3,$ac0
- mul.ph $s4,$24,$s0
mul.ps $f14,$f0,$f16
- mul_s.ph $10,$14,$15
- muleq_s.w.phl $11,$s4,$s4
- muleq_s.w.phr $s6,$a0,$s8
- muleu_s.ph.qbl $a2,$14,$8
- muleu_s.ph.qbr $a1,$ra,$9
- mulq_rs.ph $s2,$14,$15
- mulq_rs.w $at,$s4,$25
- mulq_s.ph $s0,$k1,$15
- mulq_s.w $9,$a3,$s0
- mulsa.w.ph $ac1,$s4,$s6
- mulsaq_s.w.ph $ac0,$ra,$s2
neg.ps $f19,$f13
- nloc.b $w12,$w30
- nloc.d $w16,$w7
- nloc.h $w21,$w17
- nloc.w $w17,$w16
- nlzc.b $w12,$w7
- nlzc.d $w14,$w14
- nlzc.h $w24,$w24
- nlzc.w $w10,$w4
nmadd.ps $f27,$f4,$f9,$f25
nmsub.ps $f6,$f12,$f14,$f17
- nor.v $w20,$w20,$w15
- or.v $w13,$w23,$w12
- packrl.ph $ra,$24,$14
- pcnt.b $w30,$w15
- pcnt.d $w5,$w16
- pcnt.h $w20,$w24
- pcnt.w $w22,$w20
- pick.ph $ra,$a2,$gp
- pick.qb $11,$a0,$gp
pll.ps $f25,$f9,$f30
plu.ps $f1,$f26,$f29
preceq.w.phl $s8,$gp
preceq.w.phr $s5,$15
- precequ.ph.qbl $s7,$ra
- precequ.ph.qbla $a0,$9
- precequ.ph.qbr $ra,$s3
- precequ.ph.qbra $24,$8
- preceu.ph.qbl $sp,$8
- preceu.ph.qbla $s6,$11
- preceu.ph.qbr $gp,$s1
- preceu.ph.qbra $k1,$s0
- precr.qb.ph $v0,$12,$s8
- precrq.ph.w $14,$s8,$24
- precrq.qb.ph $a2,$12,$12
- precrq_rs.ph.w $a1,$k0,$a3
- precrqu_s.qb.ph $zero,$gp,$s5
pul.ps $f9,$f30,$f26
puu.ps $f24,$f9,$f2
- raddu.w.qb $25,$s3
rdpgpr $s3,$9
recip.d $f19,$f6
recip.s $f3,$f30
- repl.ph $at,-307
- replv.ph $v1,$s7
- replv.qb $25,$12
rorv $13,$a3,$s5
rsqrt.d $f3,$f28
rsqrt.s $f4,$f8
- sbe $s7,33($s1)
- sce $sp,189($10)
- she $24,105($v0)
- shilo $ac1,26
- shilov $ac2,$10
- shllv.ph $10,$s0,$s0
- shllv.qb $gp,$v1,$zero
- shllv_s.ph $k1,$at,$13
- shllv_s.w $s1,$ra,$k0
- shrav.ph $25,$s2,$s1
- shrav.qb $zero,$24,$11
- shrav_r.ph $s3,$11,$25
- shrav_r.qb $a0,$sp,$s5
- shrav_r.w $s7,$s4,$s6
- shrlv.ph $14,$10,$9
- shrlv.qb $a2,$s2,$11
+ sbe $s7,33($s1)
+ sce $sp,189($10)
+ she $24,105($v0)
sub.ps $f5,$f14,$f26
- subq.ph $ra,$9,$s8
- subq_s.ph $13,$s8,$s5
- subq_s.w $k1,$a2,$a3
- subqh.ph $10,$at,$9
- subqh.w $v0,$a2,$zero
- subqh_r.ph $a0,$12,$s6
- subqh_r.w $10,$a2,$gp
- subu.ph $9,$s6,$s4
- subu.qb $s6,$a2,$s6
- subu_s.ph $v1,$a1,$s3
- subu_s.qb $s1,$at,$ra
- subuh.qb $zero,$gp,$gp
- subuh_r.qb $s4,$s8,$s6
- swe $24,94($k0)
+ swe $24,94($k0)
swle $v1,-209($gp)
swre $k0,-202($s2)
tlbginv
@@ -302,5 +125,4 @@
tlbinv
tlbinvf
wrpgpr $zero,$13
- xor.v $w20,$w21,$w30
yield $v1,$s0
diff --git a/test/MC/Mips/mips64r5/valid.s b/test/MC/Mips/mips64r5/valid.s
index d4e52dcca67e..029dfa9438c7 100644
--- a/test/MC/Mips/mips64r5/valid.s
+++ b/test/MC/Mips/mips64r5/valid.s
@@ -40,7 +40,7 @@ a:
bltzall $6,488 # CHECK: bltzall $6, 488 # encoding: [0x04,0xd2,0x00,0x7a]
bltzl $s1,-9964 # CHECK: bltzl $17, -9964 # encoding: [0x06,0x22,0xf6,0x45]
bnel $gp,$s4,5107 # CHECK: bnel $gp, $20, 5107 # encoding: [0x57,0x94,0x04,0xfc]
- cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
+ cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0xbc,0xa1,0x00,0x08]
c.ngl.d $f29,$f29
c.ngle.d $f0,$f16
c.sf.d $f30,$f0
@@ -132,6 +132,7 @@ a:
ei $14 # CHECK: ei $14 # encoding: [0x41,0x6e,0x60,0x20]
ei # CHECK: ei # encoding: [0x41,0x60,0x60,0x20]
eret
+ eretnc # CHECK: eretnc # encoding: [0x42,0x00,0x00,0x58]
floor.l.d $f26,$f7
floor.l.s $f12,$f5
floor.w.d $f14,$f11
@@ -179,10 +180,10 @@ a:
mflo $s1
mov.d $f20,$f14
mov.s $f2,$f27
- move $a0,$a3
- move $s5,$a0
- move $s8,$a0
- move $25,$a2
+ move $a0,$a3 # CHECK: move $4, $7 # encoding: [0x00,0xe0,0x20,0x25]
+ move $s5,$a0 # CHECK: move $21, $4 # encoding: [0x00,0x80,0xa8,0x25]
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
movf $gp,$8,$fcc7
movf.d $f6,$f11,$fcc5
movf.s $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
index 5156429340a1..06d95fd8665b 100644
--- a/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
+++ b/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
@@ -11,7 +11,7 @@
lwr $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
swl $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
swr $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- lwle $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
- lwre $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
- swle $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
- swre $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+ lwle $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwre $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swle $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swre $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
index eda18ac9012e..a111726fca38 100644
--- a/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
+++ b/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
@@ -17,7 +17,7 @@
lwr $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
swl $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
swr $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- lwle $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
- lwre $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
- swle $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
- swre $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+ lwle $s4,-4231($15) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ lwre $zero,-19147($gp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swle $15,13694($s3) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ swre $s1,-26590($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s
index 06bf58c1e8fa..1cec777c27a6 100644
--- a/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s
+++ b/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s
@@ -8,4 +8,3 @@
.set noat
bc2tl 4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
bc2fl 4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
- prefx 0,$2($31) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips4.s b/test/MC/Mips/mips64r6/invalid-mips4.s
index 82a1196daf84..a25eeaa09871 100644
--- a/test/MC/Mips/mips64r6/invalid-mips4.s
+++ b/test/MC/Mips/mips64r6/invalid-mips4.s
@@ -14,3 +14,4 @@
lwxc1 $f12,$s1($s8) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
sdxc1 $f11,$10($14) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
swxc1 $f19,$12($k0) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ prefx 0,$2($31) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r6/invalid.s b/test/MC/Mips/mips64r6/invalid.s
index ae980347f306..cba20b15eea4 100644
--- a/test/MC/Mips/mips64r6/invalid.s
+++ b/test/MC/Mips/mips64r6/invalid.s
@@ -2,15 +2,44 @@
# the assembler (e.g. invalid set of operands or operand's restrictions not met).
# RUN: not llvm-mc %s -triple=mips64-unknown-linux -mcpu=mips64r6 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
.text
+local_label:
.set noreorder
.set noat
- jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
- jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
- ldc2 $8,-21181($at) # ASM: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
- break 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- break 1024, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- break 7, 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
- break 1024, 1024 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+ align $4, $2, $3, -1 # CHECK: :[[@LINE]]:29: error: expected 2-bit unsigned immediate
+ align $4, $2, $3, 4 # CHECK: :[[@LINE]]:29: error: expected 2-bit unsigned immediate
+ jalr.hb $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+ jalr.hb $31, $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+ ldc2 $8,-21181($at) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ break -1 # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+ break 1024 # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+ break -1, 5 # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+ break 1024, 5 # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+ break 7, -1 # CHECK: :[[@LINE]]:18: error: expected 10-bit unsigned immediate
+ break 7, 1024 # CHECK: :[[@LINE]]:18: error: expected 10-bit unsigned immediate
+ break 1024, 1024 # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+ // FIXME: Following tests are temporarely disabled, until "PredicateControl not in hierarchy" problem is resolved
+ bltl $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bltul $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ blel $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bleul $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bgel $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bgeul $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bgtl $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ bgtul $7, $8, local_label # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ cache -1, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ cache 32, 255($7) # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+ dalign $4, $2, $3, -1 # CHECK: :[[@LINE]]:29: error: expected 3-bit unsigned immediate
+ dalign $4, $2, $3, 8 # CHECK: :[[@LINE]]:29: error: expected 3-bit unsigned immediate
+ dlsa $2, $3, $4, 0 # CHECK: :[[@LINE]]:29: error: expected immediate in range 1 .. 4
+ dlsa $2, $3, $4, 5 # CHECK: :[[@LINE]]:29: error: expected immediate in range 1 .. 4
+ drotr32 $2, $3, -1 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ drotr32 $2, $3, 32 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ jalr.hb $31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+ jalr.hb $31, $31 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+ lsa $2, $3, $4, 0 # CHECK: :[[@LINE]]:29: error: expected immediate in range 1 .. 4
+ lsa $2, $3, $4, 5 # CHECK: :[[@LINE]]:29: error: expected immediate in range 1 .. 4
+ pref -1, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+ pref 32, 255($7) # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
index 3dc771a80d54..0b4b6b187afd 100644
--- a/test/MC/Mips/mips64r6/valid.s
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -64,7 +64,7 @@ a:
bovc $0, $0, 4 # CHECK: bovc $zero, $zero, 4 # encoding: [0x20,0x00,0x00,0x01]
bovc $2, $0, 4 # CHECK: bovc $2, $zero, 4 # encoding: [0x20,0x40,0x00,0x01]
bovc $4, $2, 4 # CHECK: bovc $4, $2, 4 # encoding: [0x20,0x82,0x00,0x01]
- cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0x7c,0xa1,0x04,0x25]
+ cache 1, 8($5) # CHECK: cache 1, 8($5) # encoding: [0x7c,0xa1,0x04,0x25]
class.d $f2, $f4 # CHECK: class.d $f2, $f4 # encoding: [0x46,0x20,0x20,0x9b]
class.s $f2, $f4 # CHECK: class.s $f2, $f4 # encoding: [0x46,0x00,0x20,0x9b]
clo $11,$a1 # CHECK: clo $11, $5 # encoding: [0x00,0xa0,0x58,0x51]
@@ -116,7 +116,7 @@ a:
di $s8 # CHECK: di $fp # encoding: [0x41,0x7e,0x60,0x00]
div $2,$3,$4 # CHECK: div $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9a]
divu $2,$3,$4 # CHECK: divu $2, $3, $4 # encoding: [0x00,0x64,0x10,0x9b]
- dlsa $2, $3, $4, 3 # CHECK: dlsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xd5]
+ dlsa $2, $3, $4, 3 # CHECK: dlsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0x95]
dmfc0 $10, $16, 2 # CHECK: dmfc0 $10, $16, 2 # encoding: [0x40,0x2a,0x80,0x02]
dmod $2,$3,$4 # CHECK: dmod $2, $3, $4 # encoding: [0x00,0x64,0x10,0xde]
dmodu $2,$3,$4 # CHECK: dmodu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdf]
@@ -132,6 +132,7 @@ a:
dsubu $15,$11,5025 # CHECK: daddiu $15, $11, -5025 # encoding: [0x65,0x6f,0xec,0x5f]
ei # CHECK: ei # encoding: [0x41,0x60,0x60,0x20]
ei $14 # CHECK: ei $14 # encoding: [0x41,0x6e,0x60,0x20]
+ eretnc # CHECK: eretnc # encoding: [0x42,0x00,0x00,0x58]
j 1f # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
@@ -146,7 +147,7 @@ a:
ldpc $2,123456 # CHECK: ldpc $2, 123456 # encoding: [0xec,0x58,0x3c,0x48]
ll $v0,-153($s2) # CHECK: ll $2, -153($18) # encoding: [0x7e,0x42,0xb3,0xb6]
lld $zero,112($ra) # CHECK: lld $zero, 112($ra) # encoding: [0x7f,0xe0,0x38,0x37]
- lsa $2, $3, $4, 3 # CHECK: lsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5]
+ lsa $2, $3, $4, 3 # CHECK: lsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0x85]
lwc2 $18,-841($a2) # CHECK: lwc2 $18, -841($6) # encoding: [0x49,0x52,0x34,0xb7]
lwpc $2,268 # CHECK: lwpc $2, 268 # encoding: [0xec,0x48,0x00,0x43]
lwupc $2,268 # CHECK: lwupc $2, 268 # encoding: [0xec,0x50,0x00,0x43]
@@ -163,6 +164,10 @@ a:
mfc0 $8,$15,1 # CHECK: mfc0 $8, $15, 1 # encoding: [0x40,0x08,0x78,0x01]
mod $2,$3,$4 # CHECK: mod $2, $3, $4 # encoding: [0x00,0x64,0x10,0xda]
modu $2,$3,$4 # CHECK: modu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdb]
+ move $a0,$a3 # CHECK: move $4, $7 # encoding: [0x00,0xe0,0x20,0x25]
+ move $s5,$a0 # CHECK: move $21, $4 # encoding: [0x00,0x80,0xa8,0x25]
+ move $s8,$a0 # CHECK: move $fp, $4 # encoding: [0x00,0x80,0xf0,0x25]
+ move $25,$a2 # CHECK: move $25, $6 # encoding: [0x00,0xc0,0xc8,0x25]
mtc0 $9,$15,1 # CHECK: mtc0 $9, $15, 1 # encoding: [0x40,0x89,0x78,0x01]
msubf.d $f2,$f3,$f4 # CHECK: msubf.d $f2, $f3, $f4 # encoding: [0x46,0x24,0x18,0x99]
msubf.s $f2,$f3,$f4 # CHECK: msubf.s $f2, $f3, $f4 # encoding: [0x46,0x04,0x18,0x99]
diff --git a/test/MC/Mips/msa/invalid-64.s b/test/MC/Mips/msa/invalid-64.s
new file mode 100644
index 000000000000..a15ee270bccf
--- /dev/null
+++ b/test/MC/Mips/msa/invalid-64.s
@@ -0,0 +1,66 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips64r2 -mattr=+msa \
+# RUN: -show-encoding 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ dlsa $2, $3, $4, 0 # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+ dlsa $2, $3, $4, 5 # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+ insve.b $w25[-1], $w9[0] # CHECK: :[[@LINE]]:18: error: expected 4-bit unsigned immediate
+ insve.b $w25[16], $w9[0] # CHECK: :[[@LINE]]:18: error: expected 4-bit unsigned immediate
+ insve.h $w24[-1], $w2[0] # CHECK: :[[@LINE]]:18: error: expected 3-bit unsigned immediate
+ insve.h $w24[8], $w2[0] # CHECK: :[[@LINE]]:18: error: expected 3-bit unsigned immediate
+ insve.w $w0[-1], $w13[0] # CHECK: :[[@LINE]]:17: error: expected 2-bit unsigned immediate
+ insve.w $w0[4], $w13[0] # CHECK: :[[@LINE]]:17: error: expected 2-bit unsigned immediate
+ insve.d $w3[-1], $w18[0] # CHECK: :[[@LINE]]:17: error: expected 1-bit unsigned immediate
+ insve.d $w3[2], $w18[0] # CHECK: :[[@LINE]]:17: error: expected 1-bit unsigned immediate
+ insve.b $w25[3], $w9[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+ insve.h $w24[2], $w2[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+ insve.w $w0[2], $w13[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+ insve.d $w3[0], $w18[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+ lsa $2, $3, $4, 0 # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+ lsa $2, $3, $4, 5 # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+ sat_s.b $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+ sat_s.b $w31, $w31, 8 # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+ sat_s.h $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+ sat_s.h $w31, $w31, 16 # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+ sat_s.w $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ sat_s.w $w31, $w31, 32 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ sat_s.d $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+ sat_s.d $w31, $w31, 64 # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+ sat_u.b $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+ sat_u.b $w31, $w31, 8 # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+ sat_u.h $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+ sat_u.h $w31, $w31, 16 # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+ sat_u.w $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ sat_u.w $w31, $w31, 32 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ sat_u.d $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+ sat_u.d $w31, $w31, 64 # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+ shf.b $w19, $w30, -1 # CHECK: :[[@LINE]]:23: error: expected 8-bit unsigned immediate
+ shf.h $w17, $w8, -1 # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate
+ shf.w $w14, $w3, -1 # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate
+ sldi.b $w0, $w29[-1] # CHECK: :[[@LINE]]:22: error: expected 4-bit unsigned immediate
+ sldi.b $w0, $w29[16] # CHECK: :[[@LINE]]:22: error: expected 4-bit unsigned immediate
+ sldi.d $w4, $w12[-1] # CHECK: :[[@LINE]]:22: error: expected 1-bit unsigned immediate
+ sldi.d $w4, $w12[2] # CHECK: :[[@LINE]]:22: error: expected 1-bit unsigned immediate
+ sldi.h $w8, $w17[-1] # CHECK: :[[@LINE]]:22: error: expected 3-bit unsigned immediate
+ sldi.h $w8, $w17[8] # CHECK: :[[@LINE]]:22: error: expected 3-bit unsigned immediate
+ sldi.w $w20, $w27[-1] # CHECK: :[[@LINE]]:23: error: expected 2-bit unsigned immediate
+ sldi.w $w20, $w27[4] # CHECK: :[[@LINE]]:23: error: expected 2-bit unsigned immediate
+ srari.b $w5, $w25, -1 # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+ srari.b $w5, $w25, 8 # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+ srari.h $w5, $w25, -1 # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+ srari.h $w5, $w25, 16 # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+ srari.w $w5, $w25, -1 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ srari.w $w5, $w25, 32 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ srari.d $w5, $w25, -1 # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
+ srari.d $w5, $w25, 64 # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
+ srlri.b $w18, $w3, -1 # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+ srlri.b $w18, $w3, 8 # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+ srlri.h $w18, $w3, -1 # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+ srlri.h $w18, $w3, 16 # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+ srlri.w $w18, $w3, -1 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ srlri.w $w18, $w3, 32 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ srlri.d $w18, $w3, -1 # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
+ srlri.d $w18, $w3, 64 # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
diff --git a/test/MC/Mips/msa/invalid.s b/test/MC/Mips/msa/invalid.s
new file mode 100644
index 000000000000..724d9c193e0a
--- /dev/null
+++ b/test/MC/Mips/msa/invalid.s
@@ -0,0 +1,67 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+msa \
+# RUN: -show-encoding 2>%t1
+# RUN: FileCheck %s < %t1
+
+ .set noat
+ insve.b $w25[-1], $w9[0] # CHECK: :[[@LINE]]:18: error: expected 4-bit unsigned immediate
+ insve.b $w25[16], $w9[0] # CHECK: :[[@LINE]]:18: error: expected 4-bit unsigned immediate
+ insve.h $w24[-1], $w2[0] # CHECK: :[[@LINE]]:18: error: expected 3-bit unsigned immediate
+ insve.h $w24[8], $w2[0] # CHECK: :[[@LINE]]:18: error: expected 3-bit unsigned immediate
+ insve.w $w0[-1], $w13[0] # CHECK: :[[@LINE]]:17: error: expected 2-bit unsigned immediate
+ insve.w $w0[4], $w13[0] # CHECK: :[[@LINE]]:17: error: expected 2-bit unsigned immediate
+ insve.d $w3[-1], $w18[0] # CHECK: :[[@LINE]]:17: error: expected 1-bit unsigned immediate
+ insve.d $w3[2], $w18[0] # CHECK: :[[@LINE]]:17: error: expected 1-bit unsigned immediate
+ insve.b $w25[3], $w9[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+ insve.h $w24[2], $w2[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+ insve.w $w0[2], $w13[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+ insve.d $w3[0], $w18[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+ lsa $2, $3, $4, 0 # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+ lsa $2, $3, $4, 5 # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+ sat_s.b $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+ sat_s.b $w31, $w31, 8 # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+ sat_s.h $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+ sat_s.h $w31, $w31, 16 # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+ sat_s.w $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ sat_s.w $w31, $w31, 32 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ sat_s.d $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+ sat_s.d $w31, $w31, 64 # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+ sat_u.b $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+ sat_u.b $w31, $w31, 8 # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+ sat_u.h $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+ sat_u.h $w31, $w31, 16 # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+ sat_u.w $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ sat_u.w $w31, $w31, 32 # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+ sat_u.d $w31, $w31, -1 # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+ sat_u.d $w31, $w31, 64 # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+ shf.b $w19, $w30, -1 # CHECK: :[[@LINE]]:23: error: expected 8-bit unsigned immediate
+ shf.b $w19, $w30, 256 # CHECK: :[[@LINE]]:23: error: expected 8-bit unsigned immediate
+ shf.h $w17, $w8, -1 # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate
+ shf.h $w17, $w8, 256 # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate
+ shf.w $w14, $w3, -1 # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate
+ shf.w $w14, $w3, 256 # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate
+ sldi.b $w0, $w29[-1] # CHECK: :[[@LINE]]:22: error: expected 4-bit unsigned immediate
+ sldi.b $w0, $w29[16] # CHECK: :[[@LINE]]:22: error: expected 4-bit unsigned immediate
+ sldi.d $w4, $w12[-1] # CHECK: :[[@LINE]]:22: error: expected 1-bit unsigned immediate
+ sldi.d $w4, $w12[2] # CHECK: :[[@LINE]]:22: error: expected 1-bit unsigned immediate
+ sldi.h $w8, $w17[-1] # CHECK: :[[@LINE]]:22: error: expected 3-bit unsigned immediate
+ sldi.h $w8, $w17[8] # CHECK: :[[@LINE]]:22: error: expected 3-bit unsigned immediate
+ sldi.w $w20, $w27[-1] # CHECK: :[[@LINE]]:23: error: expected 2-bit unsigned immediate
+ sldi.w $w20, $w27[4] # CHECK: :[[@LINE]]:23: error: expected 2-bit unsigned immediate
+ srari.b $w5, $w25, -1 # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+ srari.b $w5, $w25, 8 # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+ srari.h $w5, $w25, -1 # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+ srari.h $w5, $w25, 16 # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+ srari.w $w5, $w25, -1 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ srari.w $w5, $w25, 32 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ srari.d $w5, $w25, -1 # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
+ srari.d $w5, $w25, 64 # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
+ srlri.b $w18, $w3, -1 # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+ srlri.b $w18, $w3, 8 # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+ srlri.h $w18, $w3, -1 # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+ srlri.h $w18, $w3, 16 # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+ srlri.w $w18, $w3, -1 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ srlri.w $w18, $w3, 32 # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+ srlri.d $w18, $w3, -1 # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
+ srlri.d $w18, $w3, 64 # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
diff --git a/test/MC/Mips/msa/test_elm.s b/test/MC/Mips/msa/test_elm.s
index dbe6d5c700b5..ca6f18c9584f 100644
--- a/test/MC/Mips/msa/test_elm.s
+++ b/test/MC/Mips/msa/test_elm.s
@@ -1,33 +1,16 @@
# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
-#
-# CHECK: copy_s.b $13, $w8[2] # encoding: [0x78,0x82,0x43,0x59]
-# CHECK: copy_s.h $1, $w25[0] # encoding: [0x78,0xa0,0xc8,0x59]
-# CHECK: copy_s.w $22, $w5[1] # encoding: [0x78,0xb1,0x2d,0x99]
-# CHECK: copy_u.b $22, $w20[4] # encoding: [0x78,0xc4,0xa5,0x99]
-# CHECK: copy_u.h $20, $w4[0] # encoding: [0x78,0xe0,0x25,0x19]
-# CHECK: copy_u.w $fp, $w13[2] # encoding: [0x78,0xf2,0x6f,0x99]
-# CHECK: sldi.b $w0, $w29[4] # encoding: [0x78,0x04,0xe8,0x19]
-# CHECK: sldi.h $w8, $w17[0] # encoding: [0x78,0x20,0x8a,0x19]
-# CHECK: sldi.w $w20, $w27[2] # encoding: [0x78,0x32,0xdd,0x19]
-# CHECK: sldi.d $w4, $w12[0] # encoding: [0x78,0x38,0x61,0x19]
-# CHECK: splati.b $w25, $w3[2] # encoding: [0x78,0x42,0x1e,0x59]
-# CHECK: splati.h $w24, $w28[1] # encoding: [0x78,0x61,0xe6,0x19]
-# CHECK: splati.w $w13, $w18[0] # encoding: [0x78,0x70,0x93,0x59]
-# CHECK: splati.d $w28, $w1[0] # encoding: [0x78,0x78,0x0f,0x19]
-# CHECK: move.v $w23, $w24 # encoding: [0x78,0xbe,0xc5,0xd9]
- copy_s.b $13, $w8[2]
- copy_s.h $1, $w25[0]
- copy_s.w $22, $w5[1]
- copy_u.b $22, $w20[4]
- copy_u.h $20, $w4[0]
- copy_u.w $30, $w13[2]
- sldi.b $w0, $w29[4]
- sldi.h $w8, $w17[0]
- sldi.w $w20, $w27[2]
- sldi.d $w4, $w12[0]
- splati.b $w25, $w3[2]
- splati.h $w24, $w28[1]
- splati.w $w13, $w18[0]
- splati.d $w28, $w1[0]
- move.v $w23, $w24
+copy_s.b $13, $w8[2] # CHECK: copy_s.b $13, $w8[2] # encoding: [0x78,0x82,0x43,0x59]
+copy_s.h $1, $w25[0] # CHECK: copy_s.h $1, $w25[0] # encoding: [0x78,0xa0,0xc8,0x59]
+copy_s.w $22, $w5[1] # CHECK: copy_s.w $22, $w5[1] # encoding: [0x78,0xb1,0x2d,0x99]
+copy_u.b $22, $w20[4] # CHECK: copy_u.b $22, $w20[4] # encoding: [0x78,0xc4,0xa5,0x99]
+copy_u.h $20, $w4[0] # CHECK: copy_u.h $20, $w4[0] # encoding: [0x78,0xe0,0x25,0x19]
+sldi.b $w0, $w29[4] # CHECK: sldi.b $w0, $w29[4] # encoding: [0x78,0x04,0xe8,0x19]
+sldi.h $w8, $w17[0] # CHECK: sldi.h $w8, $w17[0] # encoding: [0x78,0x20,0x8a,0x19]
+sldi.w $w20, $w27[2] # CHECK: sldi.w $w20, $w27[2] # encoding: [0x78,0x32,0xdd,0x19]
+sldi.d $w4, $w12[0] # CHECK: sldi.d $w4, $w12[0] # encoding: [0x78,0x38,0x61,0x19]
+splati.b $w25, $w3[2] # CHECK: splati.b $w25, $w3[2] # encoding: [0x78,0x42,0x1e,0x59]
+splati.h $w24, $w28[1] # CHECK: splati.h $w24, $w28[1] # encoding: [0x78,0x61,0xe6,0x19]
+splati.w $w13, $w18[0] # CHECK: splati.w $w13, $w18[0] # encoding: [0x78,0x70,0x93,0x59]
+splati.d $w28, $w1[0] # CHECK: splati.d $w28, $w1[0] # encoding: [0x78,0x78,0x0f,0x19]
+move.v $w23, $w24 # CHECK: move.v $w23, $w24 # encoding: [0x78,0xbe,0xc5,0xd9]
diff --git a/test/MC/Mips/msa/test_elm_msa64.s b/test/MC/Mips/msa/test_elm_msa64.s
index 5cc9147df77a..8e4c540c5019 100644
--- a/test/MC/Mips/msa/test_elm_msa64.s
+++ b/test/MC/Mips/msa/test_elm_msa64.s
@@ -1,7 +1,3 @@
# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -mattr=+msa -show-encoding | FileCheck %s
-#
-# CHECK: copy_s.d $19, $w31[0] # encoding: [0x78,0xb8,0xfc,0xd9]
-# CHECK: copy_u.d $18, $w29[1] # encoding: [0x78,0xf9,0xec,0x99]
- copy_s.d $19, $w31[0]
- copy_u.d $18, $w29[1]
+copy_s.d $19, $w31[0] # CHECK: copy_s.d $19, $w31[0] # encoding: [0x78,0xb8,0xfc,0xd9]
diff --git a/test/MC/Mips/reloc-directive-bad.s b/test/MC/Mips/reloc-directive-bad.s
new file mode 100644
index 000000000000..41f21ed36de7
--- /dev/null
+++ b/test/MC/Mips/reloc-directive-bad.s
@@ -0,0 +1,6 @@
+# RUN: not llvm-mc -triple mips-unknown-linux < %s -show-encoding -target-abi=o32 \
+# RUN: 2>&1 | FileCheck %s
+ .text
+foo:
+ .reloc 0, R_MIPS_32, .text+.text # CHECK: :[[@LINE]]:23: error: expression must be relocatable
+ nop
diff --git a/test/MC/Mips/reloc-directive.s b/test/MC/Mips/reloc-directive.s
new file mode 100644
index 000000000000..f42a1bc699f4
--- /dev/null
+++ b/test/MC/Mips/reloc-directive.s
@@ -0,0 +1,58 @@
+# RUN: llvm-mc -triple mips-unknown-linux < %s -show-encoding -target-abi=o32 \
+# RUN: | FileCheck -check-prefix=ASM %s
+# RUN: llvm-mc -triple mips64-unknown-linux < %s -show-encoding -target-abi=n32 \
+# RUN: | FileCheck -check-prefix=ASM %s
+# RUN: llvm-mc -triple mips64-unknown-linux < %s -show-encoding -target-abi=n64 \
+# RUN: | FileCheck -check-prefix=ASM %s
+# RUN: llvm-mc -triple mips-unknown-linux < %s -show-encoding -target-abi=o32 \
+# RUN: -filetype=obj | llvm-readobj -sections -section-data -r | \
+# RUN: FileCheck -check-prefix=OBJ-O32 %s
+# RUN: llvm-mc -triple mips64-unknown-linux < %s -show-encoding -target-abi=n32 \
+# RUN: -filetype=obj | llvm-readobj -sections -section-data -r | \
+# RUN: FileCheck -check-prefix=OBJ-N32 %s
+# RUN: llvm-mc -triple mips64-unknown-linux < %s -show-encoding -target-abi=n64 \
+# RUN: -filetype=obj | llvm-readobj -sections -section-data -r | \
+# RUN: FileCheck -check-prefix=OBJ-N64 %s
+ .text
+foo:
+ .reloc 4, R_MIPS_NONE, foo # ASM: .reloc 4, R_MIPS_NONE, foo
+ .reloc 0, R_MIPS_NONE, foo+4 # ASM: .reloc 0, R_MIPS_NONE, foo+4
+ .reloc 8, R_MIPS_32, foo+8 # ASM: .reloc 8, R_MIPS_32, foo+8
+ nop
+ nop
+ nop
+ .reloc 12, R_MIPS_NONE # ASM: .reloc 12, R_MIPS_NONE{{$}}
+ nop
+
+# OBJ-O32-LABEL: Name: .text
+# OBJ-O32: 0000: 00000000 00000000 00000008
+# OBJ-O32-LABEL: }
+# OBJ-O32-LABEL: Relocations [
+# OBJ-O32: 0x0 R_MIPS_NONE foo 0x0
+# OBJ-O32: 0x4 R_MIPS_NONE foo 0x0
+# OBJ-O32: 0x8 R_MIPS_32 .text 0x0
+# OBJ-O32: 0xC R_MIPS_NONE - 0x0
+
+# FIXME: We can't get N32 correct at the moment. If we use a mips-* triple then
+# we incorrectly drop the addend. If we use a mips64-* triple then we
+# incorrectly use the 3-reloc encoding (and ELF64). mips64-* triples
+# are closest to being correct so we use them for now.
+# This should be corrected once the triple bugfixes allow us to be ABI
+# dependent rather than triple dependent.
+# OBJ-N32-LABEL: Name: .text
+# OBJ-N32: 0000: 00000000 00000000 00000000
+# OBJ-N32-LABEL: }
+# OBJ-N32-LABEL: Relocations [
+# OBJ-N32: 0x0 R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE foo 0x4
+# OBJ-N32: 0x4 R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE foo 0x0
+# OBJ-N32: 0x8 R_MIPS_32/R_MIPS_NONE/R_MIPS_NONE .text 0x8
+# OBJ-N32: 0xC R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE - 0x0
+
+# OBJ-N64-LABEL: Name: .text
+# OBJ-N64: 0000: 00000000 00000000 00000000
+# OBJ-N64-LABEL: }
+# OBJ-N64-LABEL: Relocations [
+# OBJ-N64: 0x0 R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE foo 0x4
+# OBJ-N64: 0x4 R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE foo 0x0
+# OBJ-N64: 0x8 R_MIPS_32/R_MIPS_NONE/R_MIPS_NONE .text 0x8
+# OBJ-N64: 0xC R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE - 0x0
diff --git a/test/MC/Mips/rotations32-bad.s b/test/MC/Mips/rotations32-bad.s
new file mode 100644
index 000000000000..600000e7caef
--- /dev/null
+++ b/test/MC/Mips/rotations32-bad.s
@@ -0,0 +1,31 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32 -show-encoding 2> %t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r2 -show-encoding 2> %t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r3 -show-encoding 2> %t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r5 -show-encoding 2> %t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r6 -show-encoding 2> %t1
+# RUN: FileCheck %s < %t1
+
+ .text
+foo:
+
+ drol $4,$5
+# CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ drol $4,$5,$6
+# CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ drol $4,0
+# CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+ drol $4,$5,0
+# CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+
+ dror $4,$5
+# CHECK: [[@LINE-1]]:3: error: instruction requires a CPU feature not currently enabled
+ dror $4,$5,$6
+# CHECK: [[@LINE-1]]:3: error: instruction requires a CPU feature not currently enabled
+ dror $4,0
+# CHECK: [[@LINE-1]]:3: error: instruction requires a CPU feature not currently enabled
+ dror $4,$5,0
+# CHECK: [[@LINE-1]]:3: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/rotations32.s b/test/MC/Mips/rotations32.s
new file mode 100644
index 000000000000..64207708aa94
--- /dev/null
+++ b/test/MC/Mips/rotations32.s
@@ -0,0 +1,87 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -show-encoding | FileCheck %s -check-prefix=CHECK-32
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -show-encoding | FileCheck %s -check-prefix=CHECK-32R
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r3 -show-encoding | FileCheck %s -check-prefix=CHECK-32R
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r5 -show-encoding | FileCheck %s -check-prefix=CHECK-32R
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r6 -show-encoding | FileCheck %s -check-prefix=CHECK-32R
+
+ .text
+foo:
+ rol $4,$5
+# CHECK-32: negu $1, $5 # encoding: [0x00,0x05,0x08,0x23]
+# CHECK-32: srlv $1, $4, $1 # encoding: [0x00,0x24,0x08,0x06]
+# CHECK-32: sllv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x04]
+# CHECK-32: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R: negu $1, $5 # encoding: [0x00,0x05,0x08,0x23]
+# CHECK-32R: rotrv $4, $4, $1 # encoding: [0x00,0x24,0x20,0x46]
+ rol $4,$5,$6
+# CHECK-32: negu $1, $6 # encoding: [0x00,0x06,0x08,0x23]
+# CHECK-32: srlv $1, $5, $1 # encoding: [0x00,0x25,0x08,0x06]
+# CHECK-32: sllv $4, $5, $6 # encoding: [0x00,0xc5,0x20,0x04]
+# CHECK-32: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R: negu $4, $6 # encoding: [0x00,0x06,0x20,0x23]
+# CHECK-32R: rotrv $4, $5, $4 # encoding: [0x00,0x85,0x20,0x46]
+ rol $4,0
+# CHECK-32: srl $4, $4, 0 # encoding: [0x00,0x04,0x20,0x02]
+# CHECK-32R: rotr $4, $4, 0 # encoding: [0x00,0x24,0x20,0x02]
+ rol $4,$5,0
+# CHECK-32: srl $4, $5, 0 # encoding: [0x00,0x05,0x20,0x02]
+# CHECK-32R: rotr $4, $5, 0 # encoding: [0x00,0x25,0x20,0x02]
+ rol $4,1
+# CHECK-32: sll $1, $4, 1 # encoding: [0x00,0x04,0x08,0x40]
+# CHECK-32: srl $4, $4, 31 # encoding: [0x00,0x04,0x27,0xc2]
+# CHECK-32: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R: rotr $4, $4, 31 # encoding: [0x00,0x24,0x27,0xc2]
+ rol $4,$5,1
+# CHECK-32: sll $1, $5, 1 # encoding: [0x00,0x05,0x08,0x40]
+# CHECK-32: srl $4, $5, 31 # encoding: [0x00,0x05,0x27,0xc2]
+# CHECK-32: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R: rotr $4, $5, 31 # encoding: [0x00,0x25,0x27,0xc2]
+ rol $4,2
+# CHECK-32: sll $1, $4, 2 # encoding: [0x00,0x04,0x08,0x80]
+# CHECK-32: srl $4, $4, 30 # encoding: [0x00,0x04,0x27,0x82]
+# CHECK-32: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R: rotr $4, $4, 30 # encoding: [0x00,0x24,0x27,0x82]
+ rol $4,$5,2
+# CHECK-32: sll $1, $5, 2 # encoding: [0x00,0x05,0x08,0x80]
+# CHECK-32: srl $4, $5, 30 # encoding: [0x00,0x05,0x27,0x82]
+# CHECK-32: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R: rotr $4, $5, 30 # encoding: [0x00,0x25,0x27,0x82]
+
+ ror $4,$5
+# CHECK-32: negu $1, $5 # encoding: [0x00,0x05,0x08,0x23]
+# CHECK-32: sllv $1, $4, $1 # encoding: [0x00,0x24,0x08,0x04]
+# CHECK-32: srlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x06]
+# CHECK-32: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R: rotrv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x46]
+ ror $4,$5,$6
+# CHECK-32: negu $1, $6 # encoding: [0x00,0x06,0x08,0x23]
+# CHECK-32: sllv $1, $5, $1 # encoding: [0x00,0x25,0x08,0x04]
+# CHECK-32: srlv $4, $5, $6 # encoding: [0x00,0xc5,0x20,0x06]
+# CHECK-32: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R: rotrv $4, $5, $6 # encoding: [0x00,0xc5,0x20,0x46]
+ ror $4,0
+# CHECK-32: srl $4, $4, 0 # encoding: [0x00,0x04,0x20,0x02]
+# CHECK-32R: rotr $4, $4, 0 # encoding: [0x00,0x24,0x20,0x02]
+ ror $4,$5,0
+# CHECK-32: srl $4, $5, 0 # encoding: [0x00,0x05,0x20,0x02]
+# CHECK-32R: rotr $4, $5, 0 # encoding: [0x00,0x25,0x20,0x02]
+ ror $4,1
+# CHECK-32: srl $1, $4, 1 # encoding: [0x00,0x04,0x08,0x42]
+# CHECK-32: sll $4, $4, 31 # encoding: [0x00,0x04,0x27,0xc0]
+# CHECK-32: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R: rotr $4, $4, 1 # encoding: [0x00,0x24,0x20,0x42]
+ ror $4,$5,1
+# CHECK-32: srl $1, $5, 1 # encoding: [0x00,0x05,0x08,0x42]
+# CHECK-32: sll $4, $5, 31 # encoding: [0x00,0x05,0x27,0xc0]
+# CHECK-32: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R: rotr $4, $5, 1 # encoding: [0x00,0x25,0x20,0x42]
+ ror $4,2
+# CHECK-32: srl $1, $4, 2 # encoding: [0x00,0x04,0x08,0x82]
+# CHECK-32: sll $4, $4, 30 # encoding: [0x00,0x04,0x27,0x80]
+# CHECK-32: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R: rotr $4, $4, 2 # encoding: [0x00,0x24,0x20,0x82]
+ ror $4,$5,2
+# CHECK-32: srl $1, $5, 2 # encoding: [0x00,0x05,0x08,0x82]
+# CHECK-32: sll $4, $5, 30 # encoding: [0x00,0x05,0x27,0x80]
+# CHECK-32: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R: rotr $4, $5, 2 # encoding: [0x00,0x25,0x20,0x82]
diff --git a/test/MC/Mips/rotations64.s b/test/MC/Mips/rotations64.s
new file mode 100644
index 000000000000..f25b48ad87fd
--- /dev/null
+++ b/test/MC/Mips/rotations64.s
@@ -0,0 +1,238 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64 -show-encoding | FileCheck %s -check-prefix=CHECK-64
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64r2 -show-encoding | FileCheck %s -check-prefix=CHECK-64R
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64r3 -show-encoding | FileCheck %s -check-prefix=CHECK-64R
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64r5 -show-encoding | FileCheck %s -check-prefix=CHECK-64R
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64r6 -show-encoding | FileCheck %s -check-prefix=CHECK-64R
+
+ .text
+foo:
+ rol $4,$5
+# CHECK-64: subu $1, $zero, $5 # encoding: [0x00,0x05,0x08,0x23]
+# CHECK-64: srlv $1, $4, $1 # encoding: [0x00,0x24,0x08,0x06]
+# CHECK-64: sllv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x04]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: subu $1, $zero, $5 # encoding: [0x00,0x05,0x08,0x23]
+# CHECK-64R: rotrv $4, $4, $1 # encoding: [0x00,0x24,0x20,0x46]
+ rol $4,$5,$6
+# CHECK-64: subu $1, $zero, $6 # encoding: [0x00,0x06,0x08,0x23]
+# CHECK-64: srlv $1, $5, $1 # encoding: [0x00,0x25,0x08,0x06]
+# CHECK-64: sllv $4, $5, $6 # encoding: [0x00,0xc5,0x20,0x04]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: negu $4, $6 # encoding: [0x00,0x06,0x20,0x23]
+# CHECK-64R: rotrv $4, $5, $4 # encoding: [0x00,0x85,0x20,0x46]
+ rol $4,0
+# CHECK-64: srl $4, $4, 0 # encoding: [0x00,0x04,0x20,0x02]
+# CHECK-64R: rotr $4, $4, 0 # encoding: [0x00,0x24,0x20,0x02]
+ rol $4,$5,0
+# CHECK-64: srl $4, $5, 0 # encoding: [0x00,0x05,0x20,0x02]
+# CHECK-64R: rotr $4, $5, 0 # encoding: [0x00,0x25,0x20,0x02]
+ rol $4,1
+# CHECK-64: sll $1, $4, 1 # encoding: [0x00,0x04,0x08,0x40]
+# CHECK-64: srl $4, $4, 31 # encoding: [0x00,0x04,0x27,0xc2]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: rotr $4, $4, 31 # encoding: [0x00,0x24,0x27,0xc2]
+ rol $4,$5,1
+# CHECK-64: sll $1, $5, 1 # encoding: [0x00,0x05,0x08,0x40]
+# CHECK-64: srl $4, $5, 31 # encoding: [0x00,0x05,0x27,0xc2]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: rotr $4, $5, 31 # encoding: [0x00,0x25,0x27,0xc2]
+ rol $4,2
+# CHECK-64: sll $1, $4, 2 # encoding: [0x00,0x04,0x08,0x80]
+# CHECK-64: srl $4, $4, 30 # encoding: [0x00,0x04,0x27,0x82]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: rotr $4, $4, 30 # encoding: [0x00,0x24,0x27,0x82]
+ rol $4,$5,2
+# CHECK-64: sll $1, $5, 2 # encoding: [0x00,0x05,0x08,0x80]
+# CHECK-64: srl $4, $5, 30 # encoding: [0x00,0x05,0x27,0x82]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: rotr $4, $5, 30 # encoding: [0x00,0x25,0x27,0x82]
+
+ ror $4,$5
+# CHECK-64: subu $1, $zero, $5 # encoding: [0x00,0x05,0x08,0x23]
+# CHECK-64: sllv $1, $4, $1 # encoding: [0x00,0x24,0x08,0x04]
+# CHECK-64: srlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x06]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: rotrv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x46]
+ ror $4,$5,$6
+# CHECK-64: subu $1, $zero, $6 # encoding: [0x00,0x06,0x08,0x23]
+# CHECK-64: sllv $1, $5, $1 # encoding: [0x00,0x25,0x08,0x04]
+# CHECK-64: srlv $4, $5, $6 # encoding: [0x00,0xc5,0x20,0x06]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: rotrv $4, $5, $6 # encoding: [0x00,0xc5,0x20,0x46]
+ ror $4,0
+# CHECK-64: srl $4, $4, 0 # encoding: [0x00,0x04,0x20,0x02]
+# CHECK-64R: rotr $4, $4, 0 # encoding: [0x00,0x24,0x20,0x02]
+ ror $4,$5,0
+# CHECK-64: srl $4, $5, 0 # encoding: [0x00,0x05,0x20,0x02]
+# CHECK-64R: rotr $4, $5, 0 # encoding: [0x00,0x25,0x20,0x02]
+ ror $4,1
+# CHECK-64: srl $1, $4, 1 # encoding: [0x00,0x04,0x08,0x42]
+# CHECK-64: sll $4, $4, 31 # encoding: [0x00,0x04,0x27,0xc0]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: rotr $4, $4, 1 # encoding: [0x00,0x24,0x20,0x42]
+ ror $4,$5,1
+# CHECK-64: srl $1, $5, 1 # encoding: [0x00,0x05,0x08,0x42]
+# CHECK-64: sll $4, $5, 31 # encoding: [0x00,0x05,0x27,0xc0]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: rotr $4, $5, 1 # encoding: [0x00,0x25,0x20,0x42]
+ ror $4,2
+# CHECK-64: srl $1, $4, 2 # encoding: [0x00,0x04,0x08,0x82]
+# CHECK-64: sll $4, $4, 30 # encoding: [0x00,0x04,0x27,0x80]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: rotr $4, $4, 2 # encoding: [0x00,0x24,0x20,0x82]
+ ror $4,$5,2
+# CHECK-64: srl $1, $5, 2 # encoding: [0x00,0x05,0x08,0x82]
+# CHECK-64: sll $4, $5, 30 # encoding: [0x00,0x05,0x27,0x80]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: rotr $4, $5, 2 # encoding: [0x00,0x25,0x20,0x82]
+
+ drol $4,$5
+# CHECK-64: dsubu $1, $zero, $5 # encoding: [0x00,0x05,0x08,0x2f]
+# CHECK-64: dsrlv $1, $4, $1 # encoding: [0x00,0x24,0x08,0x16]
+# CHECK-64: dsllv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x14]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: dsubu $1, $zero, $5 # encoding: [0x00,0x05,0x08,0x2f]
+# CHECK-64R: drotrv $4, $4, $1 # encoding: [0x00,0x24,0x20,0x56]
+ drol $4,$5,$6
+# CHECK-64: dsubu $1, $zero, $6 # encoding: [0x00,0x06,0x08,0x2f]
+# CHECK-64: dsrlv $1, $5, $1 # encoding: [0x00,0x25,0x08,0x16]
+# CHECK-64: dsllv $4, $5, $6 # encoding: [0x00,0xc5,0x20,0x14]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: dsubu $4, $zero, $6 # encoding: [0x00,0x06,0x20,0x2f]
+# CHECK-64R: drotrv $4, $5, $4 # encoding: [0x00,0x85,0x20,0x56]
+
+ drol $4,1
+# CHECK-64: dsll $1, $4, 1 # encoding: [0x00,0x04,0x08,0x78]
+# CHECK-64: dsrl32 $4, $4, 31 # encoding: [0x00,0x04,0x27,0xfe]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr32 $4, $4, 31 # encoding: [0x00,0x24,0x27,0xfe]
+ drol $4,$5,0
+# CHECK-64: dsrl $4, $5, 0 # encoding: [0x00,0x05,0x20,0x3a]
+# CHECK-64R: drotr $4, $5, 0 # encoding: [0x00,0x25,0x20,0x3a]
+ drol $4,$5,1
+# CHECK-64: dsll $1, $5, 1 # encoding: [0x00,0x05,0x08,0x78]
+# CHECK-64: dsrl32 $4, $5, 31 # encoding: [0x00,0x05,0x27,0xfe]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr32 $4, $5, 31 # encoding: [0x00,0x25,0x27,0xfe]
+ drol $4,$5,31
+# CHECK-64: dsll $1, $5, 31 # encoding: [0x00,0x05,0x0f,0xf8]
+# CHECK-64: dsrl32 $4, $5, 1 # encoding: [0x00,0x05,0x20,0x7e]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr32 $4, $5, 1 # encoding: [0x00,0x25,0x20,0x7e]
+ drol $4,$5,32
+# CHECK-64: dsll32 $1, $5, 0 # encoding: [0x00,0x05,0x08,0x3c]
+# CHECK-64: dsrl32 $4, $5, 0 # encoding: [0x00,0x05,0x20,0x3e]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr32 $4, $5, 0 # encoding: [0x00,0x25,0x20,0x3e]
+ drol $4,$5,33
+# CHECK-64: dsll32 $1, $5, 1 # encoding: [0x00,0x05,0x08,0x7c]
+# CHECK-64: dsrl $4, $5, 31 # encoding: [0x00,0x05,0x27,0xfa]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr $4, $5, 31 # encoding: [0x00,0x25,0x27,0xfa]
+ drol $4,$5,63
+# CHECK-64: dsll32 $1, $5, 31 # encoding: [0x00,0x05,0x0f,0xfc]
+# CHECK-64: dsrl $4, $5, 1 # encoding: [0x00,0x05,0x20,0x7a]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr $4, $5, 1 # encoding: [0x00,0x25,0x20,0x7a]
+ drol $4,$5,64
+# CHECK-64: dsrl $4, $5, 0 # encoding: [0x00,0x05,0x20,0x3a]
+# CHECK-64R: drotr $4, $5, 0 # encoding: [0x00,0x25,0x20,0x3a]
+ drol $4,$5,65
+# CHECK-64: dsll $1, $5, 1 # encoding: [0x00,0x05,0x08,0x78]
+# CHECK-64: dsrl32 $4, $5, 31 # encoding: [0x00,0x05,0x27,0xfe]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr32 $4, $5, 31 # encoding: [0x00,0x25,0x27,0xfe]
+ drol $4,$5,95
+# CHECK-64: dsll $1, $5, 31 # encoding: [0x00,0x05,0x0f,0xf8]
+# CHECK-64: dsrl32 $4, $5, 1 # encoding: [0x00,0x05,0x20,0x7e]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr32 $4, $5, 1 # encoding: [0x00,0x25,0x20,0x7e]
+ drol $4,$5,96
+# CHECK-64: dsll32 $1, $5, 0 # encoding: [0x00,0x05,0x08,0x3c]
+# CHECK-64: dsrl32 $4, $5, 0 # encoding: [0x00,0x05,0x20,0x3e]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr32 $4, $5, 0 # encoding: [0x00,0x25,0x20,0x3e]
+ drol $4,$5,97
+# CHECK-64: dsll32 $1, $5, 1 # encoding: [0x00,0x05,0x08,0x7c]
+# CHECK-64: dsrl $4, $5, 31 # encoding: [0x00,0x05,0x27,0xfa]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr $4, $5, 31 # encoding: [0x00,0x25,0x27,0xfa]
+ drol $4,$5,127
+# CHECK-64: dsll32 $1, $5, 31 # encoding: [0x00,0x05,0x0f,0xfc]
+# CHECK-64: dsrl $4, $5, 1 # encoding: [0x00,0x05,0x20,0x7a]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr $4, $5, 1 # encoding: [0x00,0x25,0x20,0x7a]
+
+ dror $4,$5
+# CHECK-64: dsubu $1, $zero, $5 # encoding: [0x00,0x05,0x08,0x2f]
+# CHECK-64: dsllv $1, $4, $1 # encoding: [0x00,0x24,0x08,0x14]
+# CHECK-64: dsrlv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x16]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotrv $4, $4, $5 # encoding: [0x00,0xa4,0x20,0x56]
+ dror $4,$5,$6
+# CHECK-64: dsubu $1, $zero, $6 # encoding: [0x00,0x06,0x08,0x2f]
+# CHECK-64: dsllv $1, $5, $1 # encoding: [0x00,0x25,0x08,0x14]
+# CHECK-64: dsrlv $4, $5, $6 # encoding: [0x00,0xc5,0x20,0x16]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotrv $4, $5, $6 # encoding: [0x00,0xc5,0x20,0x56]
+ dror $4,1
+# CHECK-64: dsrl $1, $4, 1 # encoding: [0x00,0x04,0x08,0x7a]
+# CHECK-64: dsll32 $4, $4, 31 # encoding: [0x00,0x04,0x27,0xfc]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr $4, $4, 1 # encoding: [0x00,0x24,0x20,0x7a]
+ dror $4,$5,0
+# CHECK-64: dsrl $4, $5, 0 # encoding: [0x00,0x05,0x20,0x3a]
+# CHECK-64R: drotr $4, $5, 0 # encoding: [0x00,0x25,0x20,0x3a]
+ dror $4,$5,1
+# CHECK-64: dsrl $1, $5, 1 # encoding: [0x00,0x05,0x08,0x7a]
+# CHECK-64: dsll32 $4, $5, 31 # encoding: [0x00,0x05,0x27,0xfc]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr $4, $5, 1 # encoding: [0x00,0x25,0x20,0x7a]
+ dror $4,$5,31
+# CHECK-64: dsrl $1, $5, 31 # encoding: [0x00,0x05,0x0f,0xfa]
+# CHECK-64: dsll32 $4, $5, 1 # encoding: [0x00,0x05,0x20,0x7c]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr $4, $5, 31 # encoding: [0x00,0x25,0x27,0xfa]
+ dror $4,$5,32
+# CHECK-64: dsrl32 $1, $5, 0 # encoding: [0x00,0x05,0x08,0x3e]
+# CHECK-64: dsll32 $4, $5, 0 # encoding: [0x00,0x05,0x20,0x3c]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr32 $4, $5, 0 # encoding: [0x00,0x25,0x20,0x3e]
+ dror $4,$5,33
+# CHECK-64: dsrl32 $1, $5, 1 # encoding: [0x00,0x05,0x08,0x7e]
+# CHECK-64: dsll $4, $5, 31 # encoding: [0x00,0x05,0x27,0xf8]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr32 $4, $5, 1 # encoding: [0x00,0x25,0x20,0x7e]
+ dror $4,$5,63
+# CHECK-64: dsrl32 $1, $5, 31 # encoding: [0x00,0x05,0x0f,0xfe]
+# CHECK-64: dsll $4, $5, 1 # encoding: [0x00,0x05,0x20,0x78]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr32 $4, $5, 31 # encoding: [0x00,0x25,0x27,0xfe]
+ dror $4,$5,64
+# CHECK-64: dsrl $4, $5, 0 # encoding: [0x00,0x05,0x20,0x3a]
+# CHECK-64R: drotr $4, $5, 0 # encoding: [0x00,0x25,0x20,0x3a]
+ dror $4,$5,65
+# CHECK-64: dsrl $1, $5, 1 # encoding: [0x00,0x05,0x08,0x7a]
+# CHECK-64: dsll32 $4, $5, 31 # encoding: [0x00,0x05,0x27,0xfc]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr $4, $5, 1 # encoding: [0x00,0x25,0x20,0x7a]
+ dror $4,$5,95
+# CHECK-64: dsrl $1, $5, 31 # encoding: [0x00,0x05,0x0f,0xfa]
+# CHECK-64: dsll32 $4, $5, 1 # encoding: [0x00,0x05,0x20,0x7c]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr $4, $5, 31 # encoding: [0x00,0x25,0x27,0xfa]
+ dror $4,$5,96
+# CHECK-64: dsrl32 $1, $5, 0 # encoding: [0x00,0x05,0x08,0x3e]
+# CHECK-64: dsll32 $4, $5, 0 # encoding: [0x00,0x05,0x20,0x3c]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr32 $4, $5, 0 # encoding: [0x00,0x25,0x20,0x3e]
+ dror $4,$5,97
+# CHECK-64: dsrl32 $1, $5, 1 # encoding: [0x00,0x05,0x08,0x7e]
+# CHECK-64: dsll $4, $5, 31 # encoding: [0x00,0x05,0x27,0xf8]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr32 $4, $5, 1 # encoding: [0x00,0x25,0x20,0x7e]
+ dror $4,$5,127
+# CHECK-64: dsrl32 $1, $5, 31 # encoding: [0x00,0x05,0x0f,0xfe]
+# CHECK-64: dsll $4, $5, 1 # encoding: [0x00,0x05,0x20,0x78]
+# CHECK-64: or $4, $4, $1 # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R: drotr32 $4, $5, 31 # encoding: [0x00,0x25,0x27,0xfe]
diff --git a/test/MC/Mips/set-nomacro.s b/test/MC/Mips/set-nomacro.s
index 3f82f8161315..1b7a49fbaffb 100644
--- a/test/MC/Mips/set-nomacro.s
+++ b/test/MC/Mips/set-nomacro.s
@@ -60,6 +60,7 @@
bgtu $0, $8, local_label
bgtu $0, $0, local_label
+ ulh $5, 0
ulhu $5, 0
ulw $8, 2
@@ -67,6 +68,11 @@
ulw $8, 2($9)
ulw $8, 0x8000($9)
+ jal foo
+ .option pic2
+ jal foo
+ .option pic0
+
add $4, $5, $6
.set noreorder
@@ -175,6 +181,8 @@
bgtu $0, $0, local_label
# CHECK-NOT: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
+ ulh $5, 0
+# CHECK: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
ulhu $5, 0
# CHECK: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
@@ -187,5 +195,12 @@
ulw $8, 0x8000($9)
# CHECK: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
+ jal foo
+# CHECK-NOT: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
+ .option pic2
+ jal foo
+# CHECK: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
+ .option pic0
+
add $4, $5, $6
# CHECK-NOT: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
diff --git a/test/MC/PowerPC/ppc-llong.s b/test/MC/PowerPC/ppc-llong.s
index 5d92fe319679..bbe0c95a084e 100644
--- a/test/MC/PowerPC/ppc-llong.s
+++ b/test/MC/PowerPC/ppc-llong.s
@@ -21,7 +21,7 @@
# CHECK-NEXT: Size: 8
# CHECK-NEXT: Link: 0
# CHECK-NEXT: Info: 0
-# CHECK-NEXT: AddressAlignment: 4
+# CHECK-NEXT: AddressAlignment:
# CHECK-NEXT: EntrySize: 0
# CHECK-NEXT: SectionData (
# CHECK-NEXT: 0000: 00000000 00000000
diff --git a/test/MC/PowerPC/ppc-word.s b/test/MC/PowerPC/ppc-word.s
index e69de54bca08..c52129c57cd3 100644
--- a/test/MC/PowerPC/ppc-word.s
+++ b/test/MC/PowerPC/ppc-word.s
@@ -21,7 +21,7 @@
# CHECK-NEXT: Size: 2
# CHECK-NEXT: Link: 0
# CHECK-NEXT: Info: 0
-# CHECK-NEXT: AddressAlignment: 4
+# CHECK-NEXT: AddressAlignment:
# CHECK-NEXT: EntrySize: 0
# CHECK-NEXT: SectionData (
# CHECK-NEXT: 0000: 0000
diff --git a/test/MC/PowerPC/ppc64-encoding.s b/test/MC/PowerPC/ppc64-encoding.s
index 4a698adadda3..b2aac8874ffe 100644
--- a/test/MC/PowerPC/ppc64-encoding.s
+++ b/test/MC/PowerPC/ppc64-encoding.s
@@ -634,18 +634,20 @@
# CHECK-LE: extsh. 2, 3 # encoding: [0x35,0x07,0x62,0x7c]
extsh. 2, 3
-# CHECK-BE: cntlz 2, 3 # encoding: [0x7c,0x62,0x00,0x34]
-# CHECK-LE: cntlz 2, 3 # encoding: [0x34,0x00,0x62,0x7c]
+# CHECK-BE: cntlzw 2, 3 # encoding: [0x7c,0x62,0x00,0x34]
+# CHECK-LE: cntlzw 2, 3 # encoding: [0x34,0x00,0x62,0x7c]
cntlzw 2, 3
-# CHECK-BE: cntlz. 2, 3 # encoding: [0x7c,0x62,0x00,0x35]
-# CHECK-LE: cntlz. 2, 3 # encoding: [0x35,0x00,0x62,0x7c]
+# CHECK-BE: cntlzw. 2, 3 # encoding: [0x7c,0x62,0x00,0x35]
+# CHECK-LE: cntlzw. 2, 3 # encoding: [0x35,0x00,0x62,0x7c]
cntlzw. 2, 3
-# CHECK-BE: cntlz 2, 3 # encoding: [0x7c,0x62,0x00,0x34]
-# CHECK-LE: cntlz 2, 3 # encoding: [0x34,0x00,0x62,0x7c]
- cntlz 2, 3
-# CHECK-BE: cntlz. 2, 3 # encoding: [0x7c,0x62,0x00,0x35]
-# CHECK-LE: cntlz. 2, 3 # encoding: [0x35,0x00,0x62,0x7c]
- cntlz. 2, 3
+#
+# The POWER variant of cntlzw
+# CHECK-BE: cntlzw 2, 3 # encoding: [0x7c,0x62,0x00,0x34]
+# CHECK-LE: cntlzw 2, 3 # encoding: [0x34,0x00,0x62,0x7c]
+ cntlz 2, 3
+# CHECK-BE: cntlzw. 2, 3 # encoding: [0x7c,0x62,0x00,0x35]
+# CHECK-LE: cntlzw. 2, 3 # encoding: [0x35,0x00,0x62,0x7c]
+ cntlz. 2, 3
cmpb 7, 21, 4
# CHECK-BE: cmpb 7, 21, 4 # encoding: [0x7e,0xa7,0x23,0xf8]
# CHECK-LE: cmpb 7, 21, 4 # encoding: [0xf8,0x23,0xa7,0x7e]
diff --git a/test/MC/PowerPC/ppc64-fixup-apply.s b/test/MC/PowerPC/ppc64-fixup-apply.s
index f98b46d6b18d..c3b7d06eb483 100644
--- a/test/MC/PowerPC/ppc64-fixup-apply.s
+++ b/test/MC/PowerPC/ppc64-fixup-apply.s
@@ -101,7 +101,7 @@ addis 1, 1, target7@highesta
# CHECK-NEXT: Size: 15
# CHECK-NEXT: Link: 0
# CHECK-NEXT: Info: 0
-# CHECK-NEXT: AddressAlignment: 4
+# CHECK-NEXT: AddressAlignment:
# CHECK-NEXT: EntrySize: 0
# CHECK-NEXT: SectionData (
# CHECK-BE-NEXT: 0000: 12345678 9ABCDEF0 87654321 BEEF42
diff --git a/test/MC/PowerPC/pr24686.s b/test/MC/PowerPC/pr24686.s
new file mode 100644
index 000000000000..28cba230b8c4
--- /dev/null
+++ b/test/MC/PowerPC/pr24686.s
@@ -0,0 +1,7 @@
+# RUN: not llvm-mc -triple=powerpc64le-unknown-linux-gnu -filetype=obj %s \
+# RUN: 2>&1 | FileCheck %s
+
+_stext:
+ld %r5, p_end - _stext(%r5)
+
+# CHECK: LLVM ERROR: Invalid PC-relative half16ds relocation
diff --git a/test/MC/PowerPC/st-other-crash.s b/test/MC/PowerPC/st-other-crash.s
index fcc56ad70c56..20f51f761373 100644
--- a/test/MC/PowerPC/st-other-crash.s
+++ b/test/MC/PowerPC/st-other-crash.s
@@ -10,7 +10,7 @@
// CHECK-NEXT: Binding: Local (0x0)
// CHECK-NEXT: Type: None (0x0)
// CHECK-NEXT: Other: 64
-// CHECK-NEXT: Section: .group (0x5)
+// CHECK-NEXT: Section: .group
// CHECK-NEXT: }
diff --git a/test/MC/PowerPC/vsx.s b/test/MC/PowerPC/vsx.s
index 352fc5173800..2c03659c9cde 100644
--- a/test/MC/PowerPC/vsx.s
+++ b/test/MC/PowerPC/vsx.s
@@ -62,6 +62,9 @@
# CHECK-BE: xscvdpsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0x24]
# CHECK-LE: xscvdpsp 7, 27 # encoding: [0x24,0xdc,0xe0,0xf0]
xscvdpsp 7, 27
+# CHECK-BE: xscvdpspn 7, 27 # encoding: [0xf0,0xe0,0xdc,0x2c]
+# CHECK-LE: xscvdpspn 7, 27 # encoding: [0x2c,0xdc,0xe0,0xf0]
+ xscvdpspn 7, 27
# CHECK-BE: xscvdpsxds 7, 27 # encoding: [0xf0,0xe0,0xdd,0x60]
# CHECK-LE: xscvdpsxds 7, 27 # encoding: [0x60,0xdd,0xe0,0xf0]
xscvdpsxds 7, 27
@@ -77,9 +80,18 @@
# CHECK-BE: xscvspdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x24]
# CHECK-LE: xscvspdp 7, 27 # encoding: [0x24,0xdd,0xe0,0xf0]
xscvspdp 7, 27
+# CHECK-BE: xscvspdpn 7, 27 # encoding: [0xf0,0xe0,0xdd,0x2c]
+# CHECK-LE: xscvspdpn 7, 27 # encoding: [0x2c,0xdd,0xe0,0xf0]
+ xscvspdpn 7, 27
+# CHECK-BE: xscvsxdsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0xe0]
+# CHECK-LE: xscvsxdsp 7, 27 # encoding: [0xe0,0xdc,0xe0,0xf0]
+ xscvsxdsp 7, 27
# CHECK-BE: xscvsxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xe0]
# CHECK-LE: xscvsxddp 7, 27 # encoding: [0xe0,0xdd,0xe0,0xf0]
xscvsxddp 7, 27
+# CHECK-BE: xscvuxdsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0xa0]
+# CHECK-LE: xscvuxdsp 7, 27 # encoding: [0xa0,0xdc,0xe0,0xf0]
+ xscvuxdsp 7, 27
# CHECK-BE: xscvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa0]
# CHECK-LE: xscvuxddp 7, 27 # encoding: [0xa0,0xdd,0xe0,0xf0]
xscvuxddp 7, 27
diff --git a/test/MC/Sparc/sparc-alu-instructions.s b/test/MC/Sparc/sparc-alu-instructions.s
index 98caf1d6d673..b08ab43f13e9 100644
--- a/test/MC/Sparc/sparc-alu-instructions.s
+++ b/test/MC/Sparc/sparc-alu-instructions.s
@@ -91,6 +91,12 @@
! CHECK: smulcc %g2, %g1, %g3 ! encoding: [0x86,0xd8,0x80,0x01]
smulcc %g2, %g1, %g3
+ ! CHECK: mulscc %g2, %g1, %g3 ! encoding: [0x87,0x20,0x80,0x01]
+ mulscc %g2, %g1, %g3
+
+ ! CHECK: mulscc %g2, 254, %g3 ! encoding: [0x87,0x20,0xa0,0xfe]
+ mulscc %g2, 254, %g3
+
! CHECK: udivcc %g2, %g1, %g3 ! encoding: [0x86,0xf0,0x80,0x01]
udivcc %g2, %g1, %g3
diff --git a/test/MC/Sparc/sparc-asm-errors.s b/test/MC/Sparc/sparc-asm-errors.s
new file mode 100644
index 000000000000..6a4128f683f8
--- /dev/null
+++ b/test/MC/Sparc/sparc-asm-errors.s
@@ -0,0 +1,8 @@
+! RUN: not llvm-mc %s -arch=sparc -show-encoding 2>&1 | FileCheck %s
+! RUN: not llvm-mc %s -arch=sparcv9 -show-encoding 2>&1 | FileCheck %s
+
+! Test the lower and upper bounds of 'set'
+ ! CHECK: argument must be between
+ set -2147483649, %o1
+ ! CHECK: argument must be between
+ set 4294967296, %o1
diff --git a/test/MC/Sparc/sparc-assembly-exprs.s b/test/MC/Sparc/sparc-assembly-exprs.s
index 7fdc5a50345f..1729ee48dcf8 100644
--- a/test/MC/Sparc/sparc-assembly-exprs.s
+++ b/test/MC/Sparc/sparc-assembly-exprs.s
@@ -3,4 +3,11 @@
! CHECK: mov 1033, %o1 ! encoding: [0x92,0x10,0x24,0x09]
mov (0x400|9), %o1
! CHECK: mov 60, %o2 ! encoding: [0x94,0x10,0x20,0x3c]
- mov (12+3<<2), %o2
+ mov ((12+3)<<2), %o2
+
+! "." is exactly like a temporary symbol equated to the current line.
+! RUN: llvm-mc %s -arch=sparc | FileCheck %s --check-prefix=DOTEXPR
+
+ ! DOTEXPR: .Ltmp0
+ ! DOTEXPR-NEXT: ba .Ltmp0+8
+ b . + 8
diff --git a/test/MC/Sparc/sparc-atomic-instructions.s b/test/MC/Sparc/sparc-atomic-instructions.s
index 17f97d4b5354..4ce19d5cdda0 100644
--- a/test/MC/Sparc/sparc-atomic-instructions.s
+++ b/test/MC/Sparc/sparc-atomic-instructions.s
@@ -12,3 +12,12 @@
! CHECK: swapa [%i0+%l6] 131, %o2 ! encoding: [0xd4,0xfe,0x10,0x76]
swapa [%i0+%l6] 131, %o2
+
+ ! CHECK: ldstub [%i0+40], %g1 ! encoding: [0xc2,0x6e,0x20,0x28]
+ ldstub [%i0+40], %g1
+
+ ! CHECK: ldstub [%i0+%i2], %g1 ! encoding: [0xc2,0x6e,0x00,0x1a]
+ ldstub [%i0+%i2], %g1
+
+ ! CHECK: ldstuba [%i0+%i2] 131, %g1 ! encoding: [0xc2,0xee,0x10,0x7a]
+ ldstuba [%i0+%i2] 131, %g1
diff --git a/test/MC/Sparc/sparc-ctrl-instructions.s b/test/MC/Sparc/sparc-ctrl-instructions.s
index a2608fdba608..bcb625b17e61 100644
--- a/test/MC/Sparc/sparc-ctrl-instructions.s
+++ b/test/MC/Sparc/sparc-ctrl-instructions.s
@@ -64,6 +64,10 @@
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
bz .BB0
+ ! CHECK: be .BB0 ! encoding: [0x02,0b10AAAAAA,A,A]
+ ! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
+ beq .BB0
+
! CHECK: bg .BB0 ! encoding: [0x14,0b10AAAAAA,A,A]
! CHECK-NEXT: ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
bg .BB0
diff --git a/test/MC/Sparc/sparc-fp-instructions.s b/test/MC/Sparc/sparc-fp-instructions.s
index f8c130f6e5f9..64f1538cb910 100644
--- a/test/MC/Sparc/sparc-fp-instructions.s
+++ b/test/MC/Sparc/sparc-fp-instructions.s
@@ -138,3 +138,12 @@
fdtox %f0, %f4
fqtox %f0, %f4
+ ! CHECK: ld [%l0], %f29 ! encoding: [0xfb,0x04,0x00,0x00]
+ ! CHECK: ldd [%l0], %f48 ! encoding: [0xe3,0x1c,0x00,0x00]
+ ld [%l0], %f29
+ ldd [%l0], %f48
+
+ ! CHECK: st %f29, [%l0] ! encoding: [0xfb,0x24,0x00,0x00]
+ ! CHECK: std %f48, [%l0] ! encoding: [0xe3,0x3c,0x00,0x00]
+ st %f29, [%l0]
+ std %f48, [%l0]
diff --git a/test/MC/Sparc/sparc-mem-instructions.s b/test/MC/Sparc/sparc-mem-instructions.s
index c10c8781fd88..1d098fe158fd 100644
--- a/test/MC/Sparc/sparc-mem-instructions.s
+++ b/test/MC/Sparc/sparc-mem-instructions.s
@@ -46,6 +46,15 @@
! CHECK: lda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x86,0x10,0x76]
lda [%i0 + %l6] 131, %o2
+ ! CHECK: ldd [%i0+%l6], %o2 ! encoding: [0xd4,0x1e,0x00,0x16]
+ ldd [%i0 + %l6], %o2
+ ! CHECK: ldd [%i0+32], %o2 ! encoding: [0xd4,0x1e,0x20,0x20]
+ ldd [%i0 + 32], %o2
+ ! CHECK: ldd [%g1], %o2 ! encoding: [0xd4,0x18,0x40,0x00]
+ ldd [%g1], %o2
+ ! CHECK: ldda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x9e,0x10,0x76]
+ ldda [%i0 + %l6] 131, %o2
+
! CHECK: stb %o2, [%i0+%l6] ! encoding: [0xd4,0x2e,0x00,0x16]
stb %o2, [%i0 + %l6]
! CHECK: stb %o2, [%i0+32] ! encoding: [0xd4,0x2e,0x20,0x20]
@@ -73,6 +82,15 @@
! CHECK: sta %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xa6,0x10,0x76]
sta %o2, [%i0 + %l6] 131
+ ! CHECK: std %o2, [%i0+%l6] ! encoding: [0xd4,0x3e,0x00,0x16]
+ std %o2, [%i0 + %l6]
+ ! CHECK: std %o2, [%i0+32] ! encoding: [0xd4,0x3e,0x20,0x20]
+ std %o2, [%i0 + 32]
+ ! CHECK: std %o2, [%g1] ! encoding: [0xd4,0x38,0x40,0x00]
+ std %o2, [%g1]
+ ! CHECK: stda %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xbe,0x10,0x76]
+ stda %o2, [%i0 + %l6] 131
+
! CHECK: flush %g1+%g2 ! encoding: [0x81,0xd8,0x40,0x02]
flush %g1 + %g2
! CHECK: flush %g1+8 ! encoding: [0x81,0xd8,0x60,0x08]
diff --git a/test/MC/Sparc/sparc-pic.s b/test/MC/Sparc/sparc-pic.s
index 5430d1fea103..1c935e0842ce 100644
--- a/test/MC/Sparc/sparc-pic.s
+++ b/test/MC/Sparc/sparc-pic.s
@@ -1,16 +1,36 @@
-! RUN: llvm-mc %s -arch=sparcv9 --relocation-model=pic -filetype=obj | llvm-readobj -r | FileCheck %s
+! RUN: llvm-mc %s -arch=sparcv9 --relocation-model=pic -filetype=obj | llvm-readobj -r | FileCheck --check-prefix=PIC %s
+! RUN: llvm-mc %s -arch=sparcv9 --relocation-model=static -filetype=obj | llvm-readobj -r | FileCheck --check-prefix=NOPIC %s
-! CHECK: Relocations [
-! CHECK-NOT: 0x{{[0-9,A-F]+}} R_SPARC_WPLT30 .text 0xC
-! CHECK: 0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x4
-! CHECK-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_PC10 _GLOBAL_OFFSET_TABLE_ 0x8
-! CHECK-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT22 AGlobalVar 0x0
-! CHECK-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT10 AGlobalVar 0x0
-! CHECK-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT22 .LC0 0x0
-! CHECK-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT10 .LC0 0x0
-! CHECK-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_WPLT30 bar 0x0
-! CHECK: ]
+! PIC: Relocations [
+! PIC-NOT: 0x{{[0-9,A-F]+}} R_SPARC_WPLT30 .text 0xC
+! PIC: 0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x4
+! PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_PC10 _GLOBAL_OFFSET_TABLE_ 0x8
+! PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x0
+! PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_PC10 _GLOBAL_OFFSET_TABLE_ 0x0
+! PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT22 AGlobalVar 0x0
+! PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT10 AGlobalVar 0x0
+! PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT22 AGlobalVar 0x0
+! PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT10 AGlobalVar 0x0
+! PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT22 .LC0 0x0
+! PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_GOT10 .LC0 0x0
+! PIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_WPLT30 bar 0x0
+! PIC: ]
+
+! NOPIC: Relocations [
+! NOPIC-NOT: 0x{{[0-9,A-F]+}} R_SPARC_WPLT30 .text 0xC
+! NOPIC: 0x{{[0-9,A-F]+}} R_SPARC_HI22 _GLOBAL_OFFSET_TABLE_ 0x4
+! NOPIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_LO10 _GLOBAL_OFFSET_TABLE_ 0x8
+! NOPIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_HI22 _GLOBAL_OFFSET_TABLE_ 0x0
+! NOPIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_LO10 _GLOBAL_OFFSET_TABLE_ 0x0
+! NOPIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_HI22 AGlobalVar 0x0
+! NOPIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_LO10 AGlobalVar 0x0
+! NOPIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_HI22 AGlobalVar 0x0
+! NOPIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_LO10 AGlobalVar 0x0
+! NOPIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_HI22 .rodata 0x0
+! NOPIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_LO10 .rodata 0x0
+! NOPIC-NEXT: 0x{{[0-9,A-F]+}} R_SPARC_WDISP30 bar 0x0
+! NOPIC: ]
.section ".rodata"
.align 8
@@ -33,9 +53,11 @@ foo:
sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp6-.Ltmp4)), %i1
.Ltmp5:
or %i1, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp5-.Ltmp4)), %i1
+ set _GLOBAL_OFFSET_TABLE_, %i1
add %i1, %o7, %i1
sethi %hi(AGlobalVar), %i2
add %i2, %lo(AGlobalVar), %i2
+ set AGlobalVar, %i2
ldx [%i1+%i2], %i3
ldx [%i3], %i3
sethi %hi(.LC0), %i2
diff --git a/test/MC/Sparc/sparc-relocations.s b/test/MC/Sparc/sparc-relocations.s
index a5b7bafa4f57..58ad37e1da53 100644
--- a/test/MC/Sparc/sparc-relocations.s
+++ b/test/MC/Sparc/sparc-relocations.s
@@ -18,7 +18,7 @@
call foo
! CHECK: or %g1, %lo(sym), %g3 ! encoding: [0x86,0x10,0b011000AA,A]
- ! CHECK-NEXT ! fixup A - offset: 0, value: %lo(sym), kind: fixup_sparc_lo10
+ ! CHECK-NEXT: ! fixup A - offset: 0, value: %lo(sym), kind: fixup_sparc_lo10
or %g1, %lo(sym), %g3
! CHECK: sethi %hi(sym), %l0 ! encoding: [0x21,0b00AAAAAA,A,A]
@@ -26,15 +26,15 @@
sethi %hi(sym), %l0
! CHECK: sethi %h44(sym), %l0 ! encoding: [0x21,0b00AAAAAA,A,A]
- ! CHECK-NEXT: ! fixup A - offset: 0, value: %h44(sym), kind: fixup_sparc_h44
+ ! CHECK-NEXT: ! fixup A - offset: 0, value: %h44(sym), kind: fixup_sparc_h44
sethi %h44(sym), %l0
! CHECK: or %g1, %m44(sym), %g3 ! encoding: [0x86,0x10,0b011000AA,A]
- ! CHECK-NEXT ! fixup A - offset: 0, value: %m44(sym), kind: fixup_sparc_m44
+ ! CHECK-NEXT: ! fixup A - offset: 0, value: %m44(sym), kind: fixup_sparc_m44
or %g1, %m44(sym), %g3
! CHECK: or %g1, %l44(sym), %g3 ! encoding: [0x86,0x10,0b0110AAAA,A]
- ! CHECK-NEXT ! fixup A - offset: 0, value: %l44(sym), kind: fixup_sparc_l44
+ ! CHECK-NEXT: ! fixup A - offset: 0, value: %l44(sym), kind: fixup_sparc_l44
or %g1, %l44(sym), %g3
! CHECK: sethi %hh(sym), %l0 ! encoding: [0x21,0b00AAAAAA,A,A]
@@ -42,5 +42,5 @@
sethi %hh(sym), %l0
! CHECK: or %g1, %hm(sym), %g3 ! encoding: [0x86,0x10,0b011000AA,A]
- ! CHECK-NEXT ! fixup A - offset: 0, value: %hm(sym), kind: fixup_sparc_hm
+ ! CHECK-NEXT: ! fixup A - offset: 0, value: %hm(sym), kind: fixup_sparc_hm
or %g1, %hm(sym), %g3
diff --git a/test/MC/Sparc/sparc-special-registers.s b/test/MC/Sparc/sparc-special-registers.s
index 2cb57d720c4f..235c4cfedcf8 100644
--- a/test/MC/Sparc/sparc-special-registers.s
+++ b/test/MC/Sparc/sparc-special-registers.s
@@ -33,3 +33,21 @@
! CHECK: wr %i0, 5, %tbr ! encoding: [0x81,0x9e,0x20,0x05]
wr %i0, 5, %tbr
+
+ ! CHECK: rd %asr6, %i0 ! encoding: [0xb1,0x41,0x80,0x00]
+ rd %fprs, %i0
+
+ ! CHECK: wr %i0, 7, %asr6 ! encoding: [0x8d,0x86,0x20,0x07]
+ wr %i0, 7, %fprs
+
+ ! CHECK: ld [%g2+20], %fsr ! encoding: [0xc1,0x08,0xa0,0x14]
+ ld [%g2 + 20],%fsr
+
+ ! CHECK: ld [%g2+%i5], %fsr ! encoding: [0xc1,0x08,0x80,0x1d]
+ ld [%g2 + %i5],%fsr
+
+ ! CHECK: st %fsr, [%g2+20] ! encoding: [0xc1,0x28,0xa0,0x14]
+ st %fsr,[%g2 + 20]
+
+ ! CHECK: st %fsr, [%g2+%i5] ! encoding: [0xc1,0x28,0x80,0x1d]
+ st %fsr,[%g2 + %i5]
diff --git a/test/MC/Sparc/sparc-synthetic-instructions.s b/test/MC/Sparc/sparc-synthetic-instructions.s
index 09fd30c09e28..f83c8c2893ac 100644
--- a/test/MC/Sparc/sparc-synthetic-instructions.s
+++ b/test/MC/Sparc/sparc-synthetic-instructions.s
@@ -27,13 +27,33 @@
! CHECK: or %g1, %lo(40000), %g1 ! encoding: [0x82,0x10,0b011000AA,A]
! CHECK: ! fixup A - offset: 0, value: %lo(40000), kind: fixup_sparc_lo10
set 40000, %g1
- ! CHECK: mov %lo(1), %g1 ! encoding: [0x82,0x10,0b001000AA,A]
- ! CHECK: ! fixup A - offset: 0, value: %lo(1), kind: fixup_sparc_lo10
+ ! CHECK: mov 1, %g1 ! encoding: [0x82,0x10,0x20,0x01]
set 1, %g1
! CHECK: sethi %hi(32768), %g1 ! encoding: [0x03,0b00AAAAAA,A,A]
! CHECK: ! fixup A - offset: 0, value: %hi(32768), kind: fixup_sparc_hi22
set 32768, %g1
+ ! Expect a 'sethi' without an 'or'.
+ ! CHECK: sethi %hi(268431360), %o1 ! encoding: [0x13,0b00AAAAAA,A,A]
+ ! CHECK: ! fixup A - offset: 0, value: %hi(268431360), kind: fixup_sparc_hi22
+ set 0x0ffff000, %o1
+
+ ! CHECK: sethi %hi(268433408), %o1 ! encoding: [0x13,0b00AAAAAA,A,A]
+ ! CHECK: ! fixup A - offset: 0, value: %hi(268433408), kind: fixup_sparc_hi22
+ set 0x0ffff800, %o1
+
+ ! This is the boundary case that uses the lowest of the 22 bits in sethi.
+ ! CHECK: sethi %hi(268434432), %o1 ! encoding: [0x13,0b00AAAAAA,A,A]
+ ! CHECK: ! fixup A - offset: 0, value: %hi(268434432), kind: fixup_sparc_hi22
+ set 0x0ffffc00, %o1
+
+ ! Now the synthetic instruction becomes two instructions.
+ ! CHECK: sethi %hi(2147483647), %o1 ! encoding: [0x13,0b00AAAAAA,A,A]
+ ! CHECK: ! fixup A - offset: 0, value: %hi(2147483647), kind: fixup_sparc_hi22
+ ! CHECK: or %o1, %lo(2147483647), %o1 ! encoding: [0x92,0x12,0b011000AA,A]
+ ! CHECK: ! fixup A - offset: 0, value: %lo(2147483647), kind: fixup_sparc_lo10
+ set 2147483647, %o1
+
! CHECK: xnor %g1, %g0, %g2 ! encoding: [0x84,0x38,0x40,0x00]
not %g1, %g2
! CHECK: xnor %g1, %g0, %g1 ! encoding: [0x82,0x38,0x40,0x00]
@@ -143,3 +163,51 @@
wr %i0, %tbr
! CHECK: wr %g0, 5, %tbr ! encoding: [0x81,0x98,0x20,0x05]
wr 5, %tbr
+
+! The following tests exercise 'set' in such a way that its output differs
+! depending on whether targeting V8 or V9.
+!
+! RUN: llvm-mc %s -arch=sparc -show-encoding | FileCheck %s --check-prefix=V8
+! RUN: llvm-mc %s -arch=sparcv9 -show-encoding | FileCheck %s --check-prefix=V9
+
+ ! V8: mov -1, %o1 ! encoding: [0x92,0x10,0x3f,0xff]
+ ! V9: sethi %hi(-1), %o1 ! encoding: [0x13,0b00AAAAAA,A,A]
+ ! V9: ! fixup A - offset: 0, value: %hi(-1), kind: fixup_sparc_hi22
+ ! V9: or %o1, %lo(-1), %o1 ! encoding: [0x92,0x12,0b011000AA,A]
+ ! V9: ! fixup A - offset: 0, value: %lo(-1), kind: fixup_sparc_lo10
+ set 0xffffffff, %o1
+
+ ! V8: mov -2, %o1 ! encoding: [0x92,0x10,0x3f,0xfe]
+ ! V9: sethi %hi(-2), %o1 ! encoding: [0x13,0b00AAAAAA,A,A]
+ ! V9: ! fixup A - offset: 0, value: %hi(-2), kind: fixup_sparc_hi22
+ ! V9: or %o1, %lo(-2), %o1 ! encoding: [0x92,0x12,0b011000AA,A]
+ ! V9: ! fixup A - offset: 0, value: %lo(-2), kind: fixup_sparc_lo10
+ set 0xfffffffe, %o1
+
+ ! V8: mov -16, %o1 ! encoding: [0x92,0x10,0x3f,0xf0]
+ ! V9: sethi %hi(-16), %o1 ! encoding: [0x13,0b00AAAAAA,A,A]
+ ! V9: ! fixup A - offset: 0, value: %hi(-16), kind: fixup_sparc_hi22
+ ! V9: or %o1, %lo(-16), %o1 ! encoding: [0x92,0x12,0b011000AA,A]
+ ! V9: ! fixup A - offset: 0, value: %lo(-16), kind: fixup_sparc_lo10
+ set 0xfffffff0, %o1
+
+ ! V8: mov -256, %o1 ! encoding: [0x92,0x10,0x3f,0x00]
+ ! V9: sethi %hi(-256), %o1 ! encoding: [0x13,0b00AAAAAA,A,A]
+ ! V9: ! fixup A - offset: 0, value: %hi(-256), kind: fixup_sparc_hi22
+ ! V9: or %o1, %lo(-256), %o1 ! encoding: [0x92,0x12,0b011000AA,A]
+ ! V9: ! fixup A - offset: 0, value: %lo(-256), kind: fixup_sparc_lo10
+ set 0xffffff00, %o1
+
+ ! V8: mov -4096, %o1 ! encoding: [0x92,0x10,0x30,0x00]
+ ! V9: sethi %hi(-4096), %o1 ! encoding: [0x13,0b00AAAAAA,A,A]
+ ! V9: ! fixup A - offset: 0, value: %hi(-4096), kind: fixup_sparc_hi22
+ set 0xfffff000, %o1
+
+ ! These results are the same for V8 and V9, so this test could have
+ ! been with the others that weren't segregated by architecture,
+ ! but logically it belongs here as a boundary case.
+ ! V8: sethi %hi(-8192), %o1 ! encoding: [0x13,0b00AAAAAA,A,A]
+ ! V8: ! fixup A - offset: 0, value: %hi(-8192), kind: fixup_sparc_hi22
+ ! V9: sethi %hi(-8192), %o1 ! encoding: [0x13,0b00AAAAAA,A,A]
+ ! V9: ! fixup A - offset: 0, value: %hi(-8192), kind: fixup_sparc_hi22
+ set 0xffffe000, %o1
diff --git a/test/MC/Sparc/sparcv9-instructions.s b/test/MC/Sparc/sparcv9-instructions.s
index 37f4c8b2f6b9..2f90d4360dd9 100644
--- a/test/MC/Sparc/sparcv9-instructions.s
+++ b/test/MC/Sparc/sparcv9-instructions.s
@@ -21,3 +21,275 @@
! V9: subxcc %g1, %g2, %g3 ! encoding: [0x86,0xe0,0x40,0x02]
subccc %g1, %g2, %g3
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: popc %g1, %g2
+ ! V9: popc %g1, %g2 ! encoding: [0x85,0x70,0x00,0x01]
+ popc %g1, %g2
+
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: signx %g1, %g2
+ ! V9: sra %g1, %g0, %g2 ! encoding: [0x85,0x38,0x40,0x00]
+ signx %g1, %g2
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: signx %g1
+ ! V9: sra %g1, %g0, %g1 ! encoding: [0x83,0x38,0x40,0x00]
+ signx %g1
+
+ ! V8: error: invalid instruction mnemonic
+ ! V8-NEXT: lduw [%i0 + %l6], %o2
+ ! V9: ld [%i0+%l6], %o2 ! encoding: [0xd4,0x06,0x00,0x16]
+ lduw [%i0 + %l6], %o2
+ ! V8: error: invalid instruction mnemonic
+ ! V8-NEXT: lduw [%i0 + 32], %o2
+ ! V9: ld [%i0+32], %o2 ! encoding: [0xd4,0x06,0x20,0x20]
+ lduw [%i0 + 32], %o2
+ ! V8: error: invalid instruction mnemonic
+ ! V8-NEXT: lduw [%g1], %o2
+ ! V9: ld [%g1], %o2 ! encoding: [0xd4,0x00,0x40,0x00]
+ lduw [%g1], %o2
+ ! V8: error: invalid instruction mnemonic
+ ! V8-NEXT: lduwa [%i0 + %l6] 131, %o2
+ ! V9: lda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x86,0x10,0x76]
+ lduwa [%i0 + %l6] 131, %o2
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: lda [%l0] 0xf0, %f29
+ ! V9: lda [%l0] 240, %f29 ! encoding: [0xfb,0x84,0x1e,0x00]
+ lda [%l0] 0xf0, %f29
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: ldda [%l0] 0xf0, %f48
+ ! V9: ldda [%l0] 240, %f48 ! encoding: [0xe3,0x9c,0x1e,0x00]
+ ldda [%l0] 0xf0, %f48
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: ldqa [%l0] 0xf0, %f48
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: ldq [%l0], %f48
+ ! V9: ldqa [%l0] 240, %f48 ! encoding: [0xe3,0x94,0x1e,0x00]
+ ! V9: ldq [%l0], %f48 ! encoding: [0xe3,0x14,0x00,0x00]
+ ldqa [%l0] 0xf0, %f48
+ ldq [%l0], %f48
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: sta %f29, [%l0] 0xf0
+ ! V9: sta %f29, [%l0] 240 ! encoding: [0xfb,0xa4,0x1e,0x00]
+ sta %f29, [%l0] 0xf0
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: stda %f48, [%l0] 0xf0
+ ! V9: stda %f48, [%l0] 240 ! encoding: [0xe3,0xbc,0x1e,0x00]
+ stda %f48, [%l0] 0xf0
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: stqa %f48, [%l0] 0xf0
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: stq %f48, [%l0]
+ ! V9: stqa %f48, [%l0] 240 ! encoding: [0xe3,0xb4,0x1e,0x00]
+ ! V9: stq %f48, [%l0] ! encoding: [0xe3,0x34,0x00,0x00]
+ stqa %f48, [%l0] 0xf0
+ stq %f48, [%l0]
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: ldx [%g2 + 20],%fsr
+ ! V9: ldx [%g2+20], %fsr ! encoding: [0xc3,0x08,0xa0,0x14]
+ ldx [%g2 + 20],%fsr
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: ldx [%g2 + %i5],%fsr
+ ! V9: ldx [%g2+%i5], %fsr ! encoding: [0xc3,0x08,0x80,0x1d]
+ ldx [%g2 + %i5],%fsr
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: stx %fsr,[%g2 + 20]
+ ! V9: stx %fsr, [%g2+20] ! encoding: [0xc3,0x28,0xa0,0x14]
+ stx %fsr,[%g2 + 20]
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: stx %fsr,[%g2 + %i5]
+ ! V9: stx %fsr, [%g2+%i5] ! encoding: [0xc3,0x28,0x80,0x1d]
+ stx %fsr,[%g2 + %i5]
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%tpc
+ ! V9: wrpr %g6, %fp, %tpc ! encoding: [0x81,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%tpc
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%tnpc
+ ! V9: wrpr %g6, %fp, %tnpc ! encoding: [0x83,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%tnpc
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%tstate
+ ! V9: wrpr %g6, %fp, %tstate ! encoding: [0x85,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%tstate
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%tt
+ ! V9: wrpr %g6, %fp, %tt ! encoding: [0x87,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%tt
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%tick
+ ! V9: wrpr %g6, %fp, %tick ! encoding: [0x89,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%tick
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%tba
+ ! V9: wrpr %g6, %fp, %tba ! encoding: [0x8b,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%tba
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%pstate
+ ! V9: wrpr %g6, %fp, %pstate ! encoding: [0x8d,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%pstate
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%tl
+ ! V9: wrpr %g6, %fp, %tl ! encoding: [0x8f,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%tl
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%pil
+ ! V9: wrpr %g6, %fp, %pil ! encoding: [0x91,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%pil
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%cwp
+ ! V9: wrpr %g6, %fp, %cwp ! encoding: [0x93,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%cwp
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%cansave
+ ! V9: wrpr %g6, %fp, %cansave ! encoding: [0x95,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%cansave
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%canrestore
+ ! V9: wrpr %g6, %fp, %canrestore ! encoding: [0x97,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%canrestore
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%cleanwin
+ ! V9: wrpr %g6, %fp, %cleanwin ! encoding: [0x99,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%cleanwin
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%otherwin
+ ! V9: wrpr %g6, %fp, %otherwin ! encoding: [0x9b,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%otherwin
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,%i6,%wstate
+ ! V9: wrpr %g6, %fp, %wstate ! encoding: [0x9d,0x91,0x80,0x1e]
+ wrpr %g6,%i6,%wstate
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%tpc
+ ! V9: wrpr %g6, 255, %tpc ! encoding: [0x81,0x91,0xa0,0xff]
+ wrpr %g6,255,%tpc
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%tnpc
+ ! V9: wrpr %g6, 255, %tnpc ! encoding: [0x83,0x91,0xa0,0xff]
+ wrpr %g6,255,%tnpc
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%tstate
+ ! V9: wrpr %g6, 255, %tstate ! encoding: [0x85,0x91,0xa0,0xff]
+ wrpr %g6,255,%tstate
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%tt
+ ! V9: wrpr %g6, 255, %tt ! encoding: [0x87,0x91,0xa0,0xff]
+ wrpr %g6,255,%tt
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%tick
+ ! V9: wrpr %g6, 255, %tick ! encoding: [0x89,0x91,0xa0,0xff]
+ wrpr %g6,255,%tick
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%tba
+ ! V9: wrpr %g6, 255, %tba ! encoding: [0x8b,0x91,0xa0,0xff]
+ wrpr %g6,255,%tba
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%pstate
+ ! V9: wrpr %g6, 255, %pstate ! encoding: [0x8d,0x91,0xa0,0xff]
+ wrpr %g6,255,%pstate
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%tl
+ ! V9: wrpr %g6, 255, %tl ! encoding: [0x8f,0x91,0xa0,0xff]
+ wrpr %g6,255,%tl
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%pil
+ ! V9: wrpr %g6, 255, %pil ! encoding: [0x91,0x91,0xa0,0xff]
+ wrpr %g6,255,%pil
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%cwp
+ ! V9: wrpr %g6, 255, %cwp ! encoding: [0x93,0x91,0xa0,0xff]
+ wrpr %g6,255,%cwp
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%cansave
+ ! V9: wrpr %g6, 255, %cansave ! encoding: [0x95,0x91,0xa0,0xff]
+ wrpr %g6,255,%cansave
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%canrestore
+ ! V9: wrpr %g6, 255, %canrestore ! encoding: [0x97,0x91,0xa0,0xff]
+ wrpr %g6,255,%canrestore
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%cleanwin
+ ! V9: wrpr %g6, 255, %cleanwin ! encoding: [0x99,0x91,0xa0,0xff]
+ wrpr %g6,255,%cleanwin
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%otherwin
+ ! V9: wrpr %g6, 255, %otherwin ! encoding: [0x9b,0x91,0xa0,0xff]
+ wrpr %g6,255,%otherwin
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: wrpr %g6,255,%wstate
+ ! V9: wrpr %g6, 255, %wstate ! encoding: [0x9d,0x91,0xa0,0xff]
+ wrpr %g6,255,%wstate
+
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %tpc,%i5
+ ! V9: rdpr %tpc, %i5 ! encoding: [0xbb,0x50,0x00,0x00]
+ rdpr %tpc,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %tnpc,%i5
+ ! V9: rdpr %tnpc, %i5 ! encoding: [0xbb,0x50,0x40,0x00]
+ rdpr %tnpc,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %tstate,%i5
+ ! V9: rdpr %tstate, %i5 ! encoding: [0xbb,0x50,0x80,0x00]
+ rdpr %tstate,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %tt,%i5
+ ! V9: rdpr %tt, %i5 ! encoding: [0xbb,0x50,0xc0,0x00]
+ rdpr %tt,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %tick,%i5
+ ! V9: rdpr %tick, %i5 ! encoding: [0xbb,0x51,0x00,0x00]
+ rdpr %tick,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %tba,%i5
+ ! V9: rdpr %tba, %i5 ! encoding: [0xbb,0x51,0x40,0x00]
+ rdpr %tba,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %pstate,%i5
+ ! V9: rdpr %pstate, %i5 ! encoding: [0xbb,0x51,0x80,0x00]
+ rdpr %pstate,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %tl,%i5
+ ! V9: rdpr %tl, %i5 ! encoding: [0xbb,0x51,0xc0,0x00]
+ rdpr %tl,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %pil,%i5
+ ! V9: rdpr %pil, %i5 ! encoding: [0xbb,0x52,0x00,0x00]
+ rdpr %pil,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %cwp,%i5
+ ! V9: rdpr %cwp, %i5 ! encoding: [0xbb,0x52,0x40,0x00]
+ rdpr %cwp,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %cansave,%i5
+ ! V9: rdpr %cansave, %i5 ! encoding: [0xbb,0x52,0x80,0x00]
+ rdpr %cansave,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %canrestore,%i5
+ ! V9: rdpr %canrestore, %i5 ! encoding: [0xbb,0x52,0xc0,0x00]
+ rdpr %canrestore,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %cleanwin,%i5
+ ! V9: rdpr %cleanwin, %i5 ! encoding: [0xbb,0x53,0x00,0x00]
+ rdpr %cleanwin,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %otherwin,%i5
+ ! V9: rdpr %otherwin, %i5 ! encoding: [0xbb,0x53,0x40,0x00]
+ rdpr %otherwin,%i5
+ ! V8: error: instruction requires a CPU feature not currently enabled
+ ! V8-NEXT: rdpr %wstate,%i5
+ ! V9: rdpr %wstate, %i5 ! encoding: [0xbb,0x53,0x80,0x00]
+ rdpr %wstate,%i5
diff --git a/test/MC/SystemZ/fixups.s b/test/MC/SystemZ/fixups.s
index 8354121a01e9..ea3b690d253b 100644
--- a/test/MC/SystemZ/fixups.s
+++ b/test/MC/SystemZ/fixups.s
@@ -37,16 +37,16 @@
# CHECK: brasl %r14, target@PLT:tls_gdcall:sym # encoding: [0xc0,0xe5,A,A,A,A]
# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC32DBL
# CHECK-NEXT: # fixup B - offset: 0, value: sym@TLSGD, kind: FK_390_TLS_CALL
-# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0
# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT32DBL target 0x2
+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0
.align 16
brasl %r14, target@plt:tls_gdcall:sym
# CHECK: brasl %r14, target@PLT:tls_ldcall:sym # encoding: [0xc0,0xe5,A,A,A,A]
# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC32DBL
# CHECK-NEXT: # fixup B - offset: 0, value: sym@TLSLDM, kind: FK_390_TLS_CALL
-# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0
# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT32DBL target 0x2
+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0
.align 16
brasl %r14, target@plt:tls_ldcall:sym
@@ -65,16 +65,16 @@
# CHECK: bras %r14, target@PLT:tls_gdcall:sym # encoding: [0xa7,0xe5,A,A]
# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC16DBL
# CHECK-NEXT: # fixup B - offset: 0, value: sym@TLSGD, kind: FK_390_TLS_CALL
-# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0
# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT16DBL target 0x2
+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0
.align 16
bras %r14, target@plt:tls_gdcall:sym
# CHECK: bras %r14, target@PLT:tls_ldcall:sym # encoding: [0xa7,0xe5,A,A]
# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC16DBL
# CHECK-NEXT: # fixup B - offset: 0, value: sym@TLSLDM, kind: FK_390_TLS_CALL
-# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0
# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT16DBL target 0x2
+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0
.align 16
bras %r14, target@plt:tls_ldcall:sym
diff --git a/test/MC/SystemZ/insn-good-z13.s b/test/MC/SystemZ/insn-good-z13.s
index 621b946d19dd..db783295e546 100644
--- a/test/MC/SystemZ/insn-good-z13.s
+++ b/test/MC/SystemZ/insn-good-z13.s
@@ -4681,6 +4681,7 @@
vzero %v31
#CHECK: wcdgb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc3]
+#CHECK: wcdgb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc3]
#CHECK: wcdgb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc3]
#CHECK: wcdgb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc3]
#CHECK: wcdgb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc3]
@@ -4689,6 +4690,7 @@
#CHECK: wcdgb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc3]
wcdgb %v0, %v0, 0, 0
+ wcdgb %f0, %f0, 0, 0
wcdgb %v0, %v0, 0, 15
wcdgb %v0, %v0, 4, 0
wcdgb %v0, %v0, 12, 0
@@ -4697,6 +4699,7 @@
wcdgb %v14, %v17, 4, 10
#CHECK: wcdlgb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc1]
+#CHECK: wcdlgb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc1]
#CHECK: wcdlgb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc1]
#CHECK: wcdlgb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc1]
#CHECK: wcdlgb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc1]
@@ -4705,6 +4708,7 @@
#CHECK: wcdlgb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc1]
wcdlgb %v0, %v0, 0, 0
+ wcdlgb %f0, %f0, 0, 0
wcdlgb %v0, %v0, 0, 15
wcdlgb %v0, %v0, 4, 0
wcdlgb %v0, %v0, 12, 0
@@ -4713,6 +4717,7 @@
wcdlgb %v14, %v17, 4, 10
#CHECK: wcgdb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc2]
+#CHECK: wcgdb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc2]
#CHECK: wcgdb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc2]
#CHECK: wcgdb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc2]
#CHECK: wcgdb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc2]
@@ -4721,6 +4726,7 @@
#CHECK: wcgdb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc2]
wcgdb %v0, %v0, 0, 0
+ wcgdb %f0, %f0, 0, 0
wcgdb %v0, %v0, 0, 15
wcgdb %v0, %v0, 4, 0
wcgdb %v0, %v0, 12, 0
@@ -4729,6 +4735,7 @@
wcgdb %v14, %v17, 4, 10
#CHECK: wclgdb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc0]
+#CHECK: wclgdb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc0]
#CHECK: wclgdb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc0]
#CHECK: wclgdb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc0]
#CHECK: wclgdb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc0]
@@ -4737,6 +4744,7 @@
#CHECK: wclgdb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc0]
wclgdb %v0, %v0, 0, 0
+ wclgdb %f0, %f0, 0, 0
wclgdb %v0, %v0, 0, 15
wclgdb %v0, %v0, 4, 0
wclgdb %v0, %v0, 12, 0
@@ -4745,31 +4753,36 @@
wclgdb %v14, %v17, 4, 10
#CHECK: wfadb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe3]
+#CHECK: wfadb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe3]
#CHECK: wfadb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe3]
#CHECK: wfadb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe3]
#CHECK: wfadb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe3]
#CHECK: wfadb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe3]
wfadb %v0, %v0, %v0
+ wfadb %f0, %f0, %f0
wfadb %v0, %v0, %v31
wfadb %v0, %v31, %v0
wfadb %v31, %v0, %v0
wfadb %v18, %v3, %v20
#CHECK: wfcdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xcb]
+#CHECK: wfcdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xcb]
#CHECK: wfcdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xcb]
#CHECK: wfcdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xcb]
#CHECK: wfcdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xcb]
#CHECK: wfcdb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xcb]
#CHECK: wfcdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xcb]
-
+
wfcdb %v0, %v0
+ wfcdb %f0, %f0
wfcdb %v0, %v15
wfcdb %v0, %v31
wfcdb %v15, %v0
wfcdb %v31, %v0
wfcdb %v14, %v17
-
+
+#CHECK: wfcedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe8]
#CHECK: wfcedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe8]
#CHECK: wfcedb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe8]
#CHECK: wfcedb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe8]
@@ -4777,84 +4790,98 @@
#CHECK: wfcedb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe8]
wfcedb %v0, %v0, %v0
+ wfcedb %f0, %f0, %f0
wfcedb %v0, %v0, %v31
wfcedb %v0, %v31, %v0
wfcedb %v31, %v0, %v0
wfcedb %v18, %v3, %v20
#CHECK: wfcedbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xe8]
+#CHECK: wfcedbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xe8]
#CHECK: wfcedbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xe8]
#CHECK: wfcedbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xe8]
#CHECK: wfcedbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xe8]
#CHECK: wfcedbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xe8]
wfcedbs %v0, %v0, %v0
+ wfcedbs %f0, %f0, %f0
wfcedbs %v0, %v0, %v31
wfcedbs %v0, %v31, %v0
wfcedbs %v31, %v0, %v0
wfcedbs %v18, %v3, %v20
#CHECK: wfchdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xeb]
+#CHECK: wfchdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xeb]
#CHECK: wfchdb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xeb]
#CHECK: wfchdb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xeb]
#CHECK: wfchdb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xeb]
#CHECK: wfchdb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xeb]
wfchdb %v0, %v0, %v0
+ wfchdb %f0, %f0, %f0
wfchdb %v0, %v0, %v31
wfchdb %v0, %v31, %v0
wfchdb %v31, %v0, %v0
wfchdb %v18, %v3, %v20
#CHECK: wfchdbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xeb]
+#CHECK: wfchdbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xeb]
#CHECK: wfchdbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xeb]
#CHECK: wfchdbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xeb]
#CHECK: wfchdbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xeb]
#CHECK: wfchdbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xeb]
-
+
wfchdbs %v0, %v0, %v0
+ wfchdbs %f0, %f0, %f0
wfchdbs %v0, %v0, %v31
wfchdbs %v0, %v31, %v0
wfchdbs %v31, %v0, %v0
wfchdbs %v18, %v3, %v20
#CHECK: wfchedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xea]
+#CHECK: wfchedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xea]
#CHECK: wfchedb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xea]
#CHECK: wfchedb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xea]
#CHECK: wfchedb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xea]
#CHECK: wfchedb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xea]
wfchedb %v0, %v0, %v0
+ wfchedb %f0, %f0, %f0
wfchedb %v0, %v0, %v31
wfchedb %v0, %v31, %v0
wfchedb %v31, %v0, %v0
wfchedb %v18, %v3, %v20
#CHECK: wfchedbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xea]
+#CHECK: wfchedbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xea]
#CHECK: wfchedbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xea]
#CHECK: wfchedbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xea]
#CHECK: wfchedbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xea]
#CHECK: wfchedbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xea]
wfchedbs %v0, %v0, %v0
+ wfchedbs %f0, %f0, %f0
wfchedbs %v0, %v0, %v31
wfchedbs %v0, %v31, %v0
wfchedbs %v31, %v0, %v0
wfchedbs %v18, %v3, %v20
#CHECK: wfddb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe5]
+#CHECK: wfddb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe5]
#CHECK: wfddb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe5]
#CHECK: wfddb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe5]
#CHECK: wfddb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe5]
#CHECK: wfddb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe5]
wfddb %v0, %v0, %v0
+ wfddb %f0, %f0, %f0
wfddb %v0, %v0, %v31
wfddb %v0, %v31, %v0
wfddb %v31, %v0, %v0
wfddb %v18, %v3, %v20
#CHECK: wfidb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc7]
+#CHECK: wfidb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc7]
#CHECK: wfidb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc7]
#CHECK: wfidb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc7]
#CHECK: wfidb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc7]
@@ -4863,6 +4890,7 @@
#CHECK: wfidb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc7]
wfidb %v0, %v0, 0, 0
+ wfidb %f0, %f0, 0, 0
wfidb %v0, %v0, 0, 15
wfidb %v0, %v0, 4, 0
wfidb %v0, %v0, 12, 0
@@ -4871,6 +4899,7 @@
wfidb %v14, %v17, 4, 10
#CHECK: wfkdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xca]
+#CHECK: wfkdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xca]
#CHECK: wfkdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xca]
#CHECK: wfkdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xca]
#CHECK: wfkdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xca]
@@ -4878,6 +4907,7 @@
#CHECK: wfkdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xca]
wfkdb %v0, %v0
+ wfkdb %f0, %f0
wfkdb %v0, %v15
wfkdb %v0, %v31
wfkdb %v15, %v0
@@ -4885,6 +4915,7 @@
wfkdb %v14, %v17
#CHECK: wflcdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xcc]
+#CHECK: wflcdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xcc]
#CHECK: wflcdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x30,0xcc]
#CHECK: wflcdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xcc]
#CHECK: wflcdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x30,0xcc]
@@ -4892,6 +4923,7 @@
#CHECK: wflcdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x34,0xcc]
wflcdb %v0, %v0
+ wflcdb %f0, %f0
wflcdb %v0, %v15
wflcdb %v0, %v31
wflcdb %v15, %v0
@@ -4899,6 +4931,7 @@
wflcdb %v14, %v17
#CHECK: wflndb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xcc]
+#CHECK: wflndb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xcc]
#CHECK: wflndb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x18,0x30,0xcc]
#CHECK: wflndb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xcc]
#CHECK: wflndb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x30,0xcc]
@@ -4906,6 +4939,7 @@
#CHECK: wflndb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x18,0x34,0xcc]
wflndb %v0, %v0
+ wflndb %f0, %f0
wflndb %v0, %v15
wflndb %v0, %v31
wflndb %v15, %v0
@@ -4913,6 +4947,7 @@
wflndb %v14, %v17
#CHECK: wflpdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x28,0x30,0xcc]
+#CHECK: wflpdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x28,0x30,0xcc]
#CHECK: wflpdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x28,0x30,0xcc]
#CHECK: wflpdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x28,0x34,0xcc]
#CHECK: wflpdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x28,0x30,0xcc]
@@ -4920,6 +4955,7 @@
#CHECK: wflpdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x28,0x34,0xcc]
wflpdb %v0, %v0
+ wflpdb %f0, %f0
wflpdb %v0, %v15
wflpdb %v0, %v31
wflpdb %v15, %v0
@@ -4927,6 +4963,7 @@
wflpdb %v14, %v17
#CHECK: wfmadb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8f]
+#CHECK: wfmadb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8f]
#CHECK: wfmadb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x8f]
#CHECK: wfmadb %f0, %f0, %v31, %f0 # encoding: [0xe7,0x00,0xf3,0x08,0x02,0x8f]
#CHECK: wfmadb %f0, %v31, %f0, %f0 # encoding: [0xe7,0x0f,0x03,0x08,0x04,0x8f]
@@ -4934,6 +4971,7 @@
#CHECK: wfmadb %f13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x08,0x97,0x8f]
wfmadb %v0, %v0, %v0, %v0
+ wfmadb %f0, %f0, %f0, %f0
wfmadb %v0, %v0, %v0, %v31
wfmadb %v0, %v0, %v31, %v0
wfmadb %v0, %v31, %v0, %v0
@@ -4941,18 +4979,21 @@
wfmadb %v13, %v17, %v21, %v25
#CHECK: wfmdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe7]
+#CHECK: wfmdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe7]
#CHECK: wfmdb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe7]
#CHECK: wfmdb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe7]
#CHECK: wfmdb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe7]
#CHECK: wfmdb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe7]
wfmdb %v0, %v0, %v0
+ wfmdb %f0, %f0, %f0
wfmdb %v0, %v0, %v31
wfmdb %v0, %v31, %v0
wfmdb %v31, %v0, %v0
wfmdb %v18, %v3, %v20
#CHECK: wfmsdb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8e]
+#CHECK: wfmsdb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8e]
#CHECK: wfmsdb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x8e]
#CHECK: wfmsdb %f0, %f0, %v31, %f0 # encoding: [0xe7,0x00,0xf3,0x08,0x02,0x8e]
#CHECK: wfmsdb %f0, %v31, %f0, %f0 # encoding: [0xe7,0x0f,0x03,0x08,0x04,0x8e]
@@ -4960,6 +5001,7 @@
#CHECK: wfmsdb %f13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x08,0x97,0x8e]
wfmsdb %v0, %v0, %v0, %v0
+ wfmsdb %f0, %f0, %f0, %f0
wfmsdb %v0, %v0, %v0, %v31
wfmsdb %v0, %v0, %v31, %v0
wfmsdb %v0, %v31, %v0, %v0
@@ -4967,25 +5009,29 @@
wfmsdb %v13, %v17, %v21, %v25
#CHECK: wfsdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe2]
+#CHECK: wfsdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe2]
#CHECK: wfsdb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe2]
#CHECK: wfsdb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe2]
#CHECK: wfsdb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe2]
#CHECK: wfsdb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe2]
-
+
wfsdb %v0, %v0, %v0
+ wfsdb %f0, %f0, %f0
wfsdb %v0, %v0, %v31
wfsdb %v0, %v31, %v0
wfsdb %v31, %v0, %v0
wfsdb %v18, %v3, %v20
-
+
+#CHECK: wfsqdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xce]
#CHECK: wfsqdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xce]
#CHECK: wfsqdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x30,0xce]
#CHECK: wfsqdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xce]
#CHECK: wfsqdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x30,0xce]
#CHECK: wfsqdb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xce]
#CHECK: wfsqdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x34,0xce]
-
+
wfsqdb %v0, %v0
+ wfsqdb %f0, %f0
wfsqdb %v0, %v15
wfsqdb %v0, %v31
wfsqdb %v15, %v0
@@ -4993,14 +5039,16 @@
wfsqdb %v14, %v17
#CHECK: wftcidb %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0x4a]
+#CHECK: wftcidb %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0x4a]
#CHECK: wftcidb %f0, %f0, 4095 # encoding: [0xe7,0x00,0xff,0xf8,0x30,0x4a]
#CHECK: wftcidb %f0, %f15, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x30,0x4a]
#CHECK: wftcidb %f0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0x4a]
#CHECK: wftcidb %f15, %f0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x30,0x4a]
#CHECK: wftcidb %v31, %f0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0x4a]
#CHECK: wftcidb %f4, %v21, 1656 # encoding: [0xe7,0x45,0x67,0x88,0x34,0x4a]
-
+
wftcidb %v0, %v0, 0
+ wftcidb %f0, %f0, 0
wftcidb %v0, %v0, 4095
wftcidb %v0, %v15, 0
wftcidb %v0, %v31, 0
@@ -5009,6 +5057,7 @@
wftcidb %v4, %v21, 0x678
#CHECK: wldeb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc4]
+#CHECK: wldeb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc4]
#CHECK: wldeb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x20,0xc4]
#CHECK: wldeb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc4]
#CHECK: wldeb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x20,0xc4]
@@ -5016,6 +5065,7 @@
#CHECK: wldeb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x24,0xc4]
wldeb %v0, %v0
+ wldeb %f0, %f0
wldeb %v0, %v15
wldeb %v0, %v31
wldeb %v15, %v0
@@ -5023,6 +5073,7 @@
wldeb %v14, %v17
#CHECK: wledb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc5]
+#CHECK: wledb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc5]
#CHECK: wledb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc5]
#CHECK: wledb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5]
#CHECK: wledb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5]
@@ -5031,6 +5082,7 @@
#CHECK: wledb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc5]
wledb %v0, %v0, 0, 0
+ wledb %f0, %f0, 0, 0
wledb %v0, %v0, 0, 15
wledb %v0, %v0, 4, 0
wledb %v0, %v0, 12, 0
diff --git a/test/MC/SystemZ/insn-good.s b/test/MC/SystemZ/insn-good.s
index 23bd68a2f5d9..0e8cad4bdb7a 100644
--- a/test/MC/SystemZ/insn-good.s
+++ b/test/MC/SystemZ/insn-good.s
@@ -7946,6 +7946,62 @@
stc %r0, 4095(%r15,%r1)
stc %r15, 0
+#CHECK: stck 0 # encoding: [0xb2,0x05,0x00,0x00]
+#CHECK: stck 0(%r1) # encoding: [0xb2,0x05,0x10,0x00]
+#CHECK: stck 0(%r15) # encoding: [0xb2,0x05,0xf0,0x00]
+#CHECK: stck 4095 # encoding: [0xb2,0x05,0x0f,0xff]
+#CHECK: stck 4095(%r1) # encoding: [0xb2,0x05,0x1f,0xff]
+#CHECK: stck 4095(%r15) # encoding: [0xb2,0x05,0xff,0xff]
+
+ stck 0
+ stck 0(%r1)
+ stck 0(%r15)
+ stck 4095
+ stck 4095(%r1)
+ stck 4095(%r15)
+
+#CHECK: stckf 0 # encoding: [0xb2,0x7c,0x00,0x00]
+#CHECK: stckf 0(%r1) # encoding: [0xb2,0x7c,0x10,0x00]
+#CHECK: stckf 0(%r15) # encoding: [0xb2,0x7c,0xf0,0x00]
+#CHECK: stckf 4095 # encoding: [0xb2,0x7c,0x0f,0xff]
+#CHECK: stckf 4095(%r1) # encoding: [0xb2,0x7c,0x1f,0xff]
+#CHECK: stckf 4095(%r15) # encoding: [0xb2,0x7c,0xff,0xff]
+
+ stckf 0
+ stckf 0(%r1)
+ stckf 0(%r15)
+ stckf 4095
+ stckf 4095(%r1)
+ stckf 4095(%r15)
+
+#CHECK: stcke 0 # encoding: [0xb2,0x78,0x00,0x00]
+#CHECK: stcke 0(%r1) # encoding: [0xb2,0x78,0x10,0x00]
+#CHECK: stcke 0(%r15) # encoding: [0xb2,0x78,0xf0,0x00]
+#CHECK: stcke 4095 # encoding: [0xb2,0x78,0x0f,0xff]
+#CHECK: stcke 4095(%r1) # encoding: [0xb2,0x78,0x1f,0xff]
+#CHECK: stcke 4095(%r15) # encoding: [0xb2,0x78,0xff,0xff]
+
+ stcke 0
+ stcke 0(%r1)
+ stcke 0(%r15)
+ stcke 4095
+ stcke 4095(%r1)
+ stcke 4095(%r15)
+
+#CHECK: stfle 0 # encoding: [0xb2,0xb0,0x00,0x00]
+#CHECK: stfle 0(%r1) # encoding: [0xb2,0xb0,0x10,0x00]
+#CHECK: stfle 0(%r15) # encoding: [0xb2,0xb0,0xf0,0x00]
+#CHECK: stfle 4095 # encoding: [0xb2,0xb0,0x0f,0xff]
+#CHECK: stfle 4095(%r1) # encoding: [0xb2,0xb0,0x1f,0xff]
+#CHECK: stfle 4095(%r15) # encoding: [0xb2,0xb0,0xff,0xff]
+
+ stfle 0
+ stfle 0(%r1)
+ stfle 0(%r15)
+ stfle 4095
+ stfle 4095(%r1)
+ stfle 4095(%r15)
+
#CHECK: stcy %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x72]
#CHECK: stcy %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x72]
#CHECK: stcy %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x72]
diff --git a/test/MC/SystemZ/lit.local.cfg b/test/MC/SystemZ/lit.local.cfg
index 78c5738c7acc..2f3cf7d3f043 100644
--- a/test/MC/SystemZ/lit.local.cfg
+++ b/test/MC/SystemZ/lit.local.cfg
@@ -1,6 +1,2 @@
if not 'SystemZ' in config.root.targets:
config.unsupported = True
-
-# http://llvm.org/bugs/show_bug.cgi?id=20980
-if 'ubsan' in config.available_features:
- config.unsupported = True
diff --git a/test/MC/X86/X86_64-pku.s b/test/MC/X86/X86_64-pku.s
new file mode 100644
index 000000000000..8f8b227fa0c2
--- /dev/null
+++ b/test/MC/X86/X86_64-pku.s
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+pku --show-encoding < %s | FileCheck %s
+// CHECK: rdpkru
+// CHECK: encoding: [0x0f,0x01,0xee]
+ rdpkru
+
+// CHECK: wrpkru
+// CHECK: encoding: [0x0f,0x01,0xef]
+ wrpkru \ No newline at end of file
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s
index 3bb7a5bcd2c3..658ca715a32a 100644
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@@ -12846,6 +12846,342 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xf1,0xfd,0x58,0x5a,0xaa,0xf8,0xfb,0xff,0xff]
vcvtpd2ps -1032(%rdx){1to8}, %ymm5
+// CHECK: vscalefsd %xmm21, %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xcd,0x00,0x2d,0xed]
+ vscalefsd %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd %xmm21, %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xcd,0x02,0x2d,0xed]
+ vscalefsd %xmm21, %xmm22, %xmm21 {%k2}
+
+// CHECK: vscalefsd %xmm21, %xmm22, %xmm21 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0xcd,0x82,0x2d,0xed]
+ vscalefsd %xmm21, %xmm22, %xmm21 {%k2} {z}
+
+// CHECK: vscalefsd {rn-sae}, %xmm21, %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xcd,0x10,0x2d,0xed]
+ vscalefsd {rn-sae}, %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd {ru-sae}, %xmm21, %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xcd,0x50,0x2d,0xed]
+ vscalefsd {ru-sae}, %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd {rd-sae}, %xmm21, %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xcd,0x30,0x2d,0xed]
+ vscalefsd {rd-sae}, %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd {rz-sae}, %xmm21, %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xcd,0x70,0x2d,0xed]
+ vscalefsd {rz-sae}, %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd (%rcx), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x29]
+ vscalefsd (%rcx), %xmm22, %xmm21
+
+// CHECK: vscalefsd 291(%rax,%r14,8), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xcd,0x00,0x2d,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vscalefsd 291(%rax,%r14,8), %xmm22, %xmm21
+
+// CHECK: vscalefsd 1016(%rdx), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x6a,0x7f]
+ vscalefsd 1016(%rdx), %xmm22, %xmm21
+
+// CHECK: vscalefsd 1024(%rdx), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0xaa,0x00,0x04,0x00,0x00]
+ vscalefsd 1024(%rdx), %xmm22, %xmm21
+
+// CHECK: vscalefsd -1024(%rdx), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x6a,0x80]
+ vscalefsd -1024(%rdx), %xmm22, %xmm21
+
+// CHECK: vscalefsd -1032(%rdx), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xcd,0x00,0x2d,0xaa,0xf8,0xfb,0xff,0xff]
+ vscalefsd -1032(%rdx), %xmm22, %xmm21
+
+// CHECK: vscalefss %xmm23, %xmm15, %xmm13
+// CHECK: encoding: [0x62,0x32,0x05,0x08,0x2d,0xef]
+ vscalefss %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss %xmm23, %xmm15, %xmm13 {%k3}
+// CHECK: encoding: [0x62,0x32,0x05,0x0b,0x2d,0xef]
+ vscalefss %xmm23, %xmm15, %xmm13 {%k3}
+
+// CHECK: vscalefss %xmm23, %xmm15, %xmm13 {%k3} {z}
+// CHECK: encoding: [0x62,0x32,0x05,0x8b,0x2d,0xef]
+ vscalefss %xmm23, %xmm15, %xmm13 {%k3} {z}
+
+// CHECK: vscalefss {rn-sae}, %xmm23, %xmm15, %xmm13
+// CHECK: encoding: [0x62,0x32,0x05,0x18,0x2d,0xef]
+ vscalefss {rn-sae}, %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss {ru-sae}, %xmm23, %xmm15, %xmm13
+// CHECK: encoding: [0x62,0x32,0x05,0x58,0x2d,0xef]
+ vscalefss {ru-sae}, %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss {rd-sae}, %xmm23, %xmm15, %xmm13
+// CHECK: encoding: [0x62,0x32,0x05,0x38,0x2d,0xef]
+ vscalefss {rd-sae}, %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss {rz-sae}, %xmm23, %xmm15, %xmm13
+// CHECK: encoding: [0x62,0x32,0x05,0x78,0x2d,0xef]
+ vscalefss {rz-sae}, %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss (%rcx), %xmm15, %xmm13
+// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0x29]
+ vscalefss (%rcx), %xmm15, %xmm13
+
+// CHECK: vscalefss 291(%rax,%r14,8), %xmm15, %xmm13
+// CHECK: encoding: [0x62,0x32,0x05,0x08,0x2d,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vscalefss 291(%rax,%r14,8), %xmm15, %xmm13
+
+// CHECK: vscalefss 508(%rdx), %xmm15, %xmm13
+// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0x6a,0x7f]
+ vscalefss 508(%rdx), %xmm15, %xmm13
+
+// CHECK: vscalefss 512(%rdx), %xmm15, %xmm13
+// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0xaa,0x00,0x02,0x00,0x00]
+ vscalefss 512(%rdx), %xmm15, %xmm13
+
+// CHECK: vscalefss -512(%rdx), %xmm15, %xmm13
+// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0x6a,0x80]
+ vscalefss -512(%rdx), %xmm15, %xmm13
+
+// CHECK: vscalefss -516(%rdx), %xmm15, %xmm13
+// CHECK: encoding: [0x62,0x72,0x05,0x08,0x2d,0xaa,0xfc,0xfd,0xff,0xff]
+ vscalefss -516(%rdx), %xmm15, %xmm13
+
+// CHECK: vrndscalepd $171, %zmm7, %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0xf7,0xab]
+ vrndscalepd $0xab, %zmm7, %zmm22
+
+// CHECK: vrndscalepd $171, %zmm7, %zmm22 {%k1}
+// CHECK: encoding: [0x62,0xe3,0xfd,0x49,0x09,0xf7,0xab]
+ vrndscalepd $0xab, %zmm7, %zmm22 {%k1}
+
+// CHECK: vrndscalepd $171, %zmm7, %zmm22 {%k1} {z}
+// CHECK: encoding: [0x62,0xe3,0xfd,0xc9,0x09,0xf7,0xab]
+ vrndscalepd $0xab, %zmm7, %zmm22 {%k1} {z}
+
+// CHECK: vrndscalepd $171,{sae}, %zmm7, %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x09,0xf7,0xab]
+ vrndscalepd $0xab,{sae}, %zmm7, %zmm22
+
+// CHECK: vrndscalepd $123, %zmm7, %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0xf7,0x7b]
+ vrndscalepd $0x7b, %zmm7, %zmm22
+
+// CHECK: vrndscalepd $123,{sae}, %zmm7, %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x09,0xf7,0x7b]
+ vrndscalepd $0x7b,{sae}, %zmm7, %zmm22
+
+// CHECK: vrndscalepd $123, (%rcx), %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0x31,0x7b]
+ vrndscalepd $0x7b, (%rcx), %zmm22
+
+// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x09,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalepd $0x7b, 291(%rax,%r14,8), %zmm22
+
+// CHECK: vrndscalepd $123, (%rcx){1to8}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0x31,0x7b]
+ vrndscalepd $0x7b, (%rcx){1to8}, %zmm22
+
+// CHECK: vrndscalepd $123, 8128(%rdx), %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0x72,0x7f,0x7b]
+ vrndscalepd $0x7b, 8128(%rdx), %zmm22
+
+// CHECK: vrndscalepd $123, 8192(%rdx), %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0xb2,0x00,0x20,0x00,0x00,0x7b]
+ vrndscalepd $0x7b, 8192(%rdx), %zmm22
+
+// CHECK: vrndscalepd $123, -8192(%rdx), %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0x72,0x80,0x7b]
+ vrndscalepd $0x7b, -8192(%rdx), %zmm22
+
+// CHECK: vrndscalepd $123, -8256(%rdx), %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0xb2,0xc0,0xdf,0xff,0xff,0x7b]
+ vrndscalepd $0x7b, -8256(%rdx), %zmm22
+
+// CHECK: vrndscalepd $123, 1016(%rdx){1to8}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0x72,0x7f,0x7b]
+ vrndscalepd $0x7b, 1016(%rdx){1to8}, %zmm22
+
+// CHECK: vrndscalepd $123, 1024(%rdx){1to8}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0xb2,0x00,0x04,0x00,0x00,0x7b]
+ vrndscalepd $0x7b, 1024(%rdx){1to8}, %zmm22
+
+// CHECK: vrndscalepd $123, -1024(%rdx){1to8}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0x72,0x80,0x7b]
+ vrndscalepd $0x7b, -1024(%rdx){1to8}, %zmm22
+
+// CHECK: vrndscalepd $123, -1032(%rdx){1to8}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x09,0xb2,0xf8,0xfb,0xff,0xff,0x7b]
+ vrndscalepd $0x7b, -1032(%rdx){1to8}, %zmm22
+
+// CHECK: vrndscaleps $171, %zmm7, %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0xef,0xab]
+ vrndscaleps $0xab, %zmm7, %zmm13
+
+// CHECK: vrndscaleps $171, %zmm7, %zmm13 {%k1}
+// CHECK: encoding: [0x62,0x73,0x7d,0x49,0x08,0xef,0xab]
+ vrndscaleps $0xab, %zmm7, %zmm13 {%k1}
+
+// CHECK: vrndscaleps $171, %zmm7, %zmm13 {%k1} {z}
+// CHECK: encoding: [0x62,0x73,0x7d,0xc9,0x08,0xef,0xab]
+ vrndscaleps $0xab, %zmm7, %zmm13 {%k1} {z}
+
+// CHECK: vrndscaleps $171,{sae}, %zmm7, %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x18,0x08,0xef,0xab]
+ vrndscaleps $0xab,{sae}, %zmm7, %zmm13
+
+// CHECK: vrndscaleps $123, %zmm7, %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0xef,0x7b]
+ vrndscaleps $0x7b, %zmm7, %zmm13
+
+// CHECK: vrndscaleps $123,{sae}, %zmm7, %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x18,0x08,0xef,0x7b]
+ vrndscaleps $0x7b,{sae}, %zmm7, %zmm13
+
+// CHECK: vrndscaleps $123, (%rcx), %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0x29,0x7b]
+ vrndscaleps $0x7b, (%rcx), %zmm13
+
+// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %zmm13
+// CHECK: encoding: [0x62,0x33,0x7d,0x48,0x08,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrndscaleps $0x7b, 291(%rax,%r14,8), %zmm13
+
+// CHECK: vrndscaleps $123, (%rcx){1to16}, %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0x29,0x7b]
+ vrndscaleps $0x7b, (%rcx){1to16}, %zmm13
+
+// CHECK: vrndscaleps $123, 8128(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0x6a,0x7f,0x7b]
+ vrndscaleps $0x7b, 8128(%rdx), %zmm13
+
+// CHECK: vrndscaleps $123, 8192(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0xaa,0x00,0x20,0x00,0x00,0x7b]
+ vrndscaleps $0x7b, 8192(%rdx), %zmm13
+
+// CHECK: vrndscaleps $123, -8192(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0x6a,0x80,0x7b]
+ vrndscaleps $0x7b, -8192(%rdx), %zmm13
+
+// CHECK: vrndscaleps $123, -8256(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0xaa,0xc0,0xdf,0xff,0xff,0x7b]
+ vrndscaleps $0x7b, -8256(%rdx), %zmm13
+
+// CHECK: vrndscaleps $123, 508(%rdx){1to16}, %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0x6a,0x7f,0x7b]
+ vrndscaleps $0x7b, 508(%rdx){1to16}, %zmm13
+
+// CHECK: vrndscaleps $123, 512(%rdx){1to16}, %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0xaa,0x00,0x02,0x00,0x00,0x7b]
+ vrndscaleps $0x7b, 512(%rdx){1to16}, %zmm13
+
+// CHECK: vrndscaleps $123, -512(%rdx){1to16}, %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0x6a,0x80,0x7b]
+ vrndscaleps $0x7b, -512(%rdx){1to16}, %zmm13
+
+// CHECK: vrndscaleps $123, -516(%rdx){1to16}, %zmm13
+// CHECK: encoding: [0x62,0x73,0x7d,0x58,0x08,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+ vrndscaleps $0x7b, -516(%rdx){1to16}, %zmm13
+
+// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25
+// CHECK: encoding: [0x62,0x43,0x9d,0x08,0x0b,0xcf,0xab]
+ vrndscalesd $0xab, %xmm15, %xmm12, %xmm25
+
+// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25 {%k6}
+// CHECK: encoding: [0x62,0x43,0x9d,0x0e,0x0b,0xcf,0xab]
+ vrndscalesd $0xab, %xmm15, %xmm12, %xmm25 {%k6}
+
+// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25 {%k6} {z}
+// CHECK: encoding: [0x62,0x43,0x9d,0x8e,0x0b,0xcf,0xab]
+ vrndscalesd $0xab, %xmm15, %xmm12, %xmm25 {%k6} {z}
+
+// CHECK: vrndscalesd $171, {sae}, %xmm15, %xmm12, %xmm25
+// CHECK: encoding: [0x62,0x43,0x9d,0x18,0x0b,0xcf,0xab]
+ vrndscalesd $0xab,{sae}, %xmm15, %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, %xmm15, %xmm12, %xmm25
+// CHECK: encoding: [0x62,0x43,0x9d,0x08,0x0b,0xcf,0x7b]
+ vrndscalesd $0x7b, %xmm15, %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, {sae}, %xmm15, %xmm12, %xmm25
+// CHECK: encoding: [0x62,0x43,0x9d,0x18,0x0b,0xcf,0x7b]
+ vrndscalesd $0x7b,{sae}, %xmm15, %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, (%rcx), %xmm12, %xmm25
+// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x09,0x7b]
+ vrndscalesd $0x7b, (%rcx), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, 291(%rax,%r14,8), %xmm12, %xmm25
+// CHECK: encoding: [0x62,0x23,0x9d,0x08,0x0b,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalesd $0x7b, 291(%rax,%r14,8), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, 1016(%rdx), %xmm12, %xmm25
+// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x4a,0x7f,0x7b]
+ vrndscalesd $0x7b, 1016(%rdx), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, 1024(%rdx), %xmm12, %xmm25
+// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vrndscalesd $0x7b, 1024(%rdx), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, -1024(%rdx), %xmm12, %xmm25
+// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x4a,0x80,0x7b]
+ vrndscalesd $0x7b, -1024(%rdx), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, -1032(%rdx), %xmm12, %xmm25
+// CHECK: encoding: [0x62,0x63,0x9d,0x08,0x0b,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vrndscalesd $0x7b, -1032(%rdx), %xmm12, %xmm25
+
+// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11
+// CHECK: encoding: [0x62,0x33,0x25,0x08,0x0a,0xd9,0xab]
+ vrndscaless $0xab, %xmm17, %xmm11, %xmm11
+
+// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11 {%k3}
+// CHECK: encoding: [0x62,0x33,0x25,0x0b,0x0a,0xd9,0xab]
+ vrndscaless $0xab, %xmm17, %xmm11, %xmm11 {%k3}
+
+// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11 {%k3} {z}
+// CHECK: encoding: [0x62,0x33,0x25,0x8b,0x0a,0xd9,0xab]
+ vrndscaless $0xab, %xmm17, %xmm11, %xmm11 {%k3} {z}
+
+// CHECK: vrndscaless $171, {sae}, %xmm17, %xmm11, %xmm11
+// CHECK: encoding: [0x62,0x33,0x25,0x18,0x0a,0xd9,0xab]
+ vrndscaless $0xab,{sae}, %xmm17, %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, %xmm17, %xmm11, %xmm11
+// CHECK: encoding: [0x62,0x33,0x25,0x08,0x0a,0xd9,0x7b]
+ vrndscaless $0x7b, %xmm17, %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, {sae}, %xmm17, %xmm11, %xmm11
+// CHECK: encoding: [0x62,0x33,0x25,0x18,0x0a,0xd9,0x7b]
+ vrndscaless $0x7b,{sae}, %xmm17, %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, (%rcx), %xmm11, %xmm11
+// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x19,0x7b]
+ vrndscaless $0x7b, (%rcx), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, 291(%rax,%r14,8), %xmm11, %xmm11
+// CHECK: encoding: [0x62,0x33,0x25,0x08,0x0a,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrndscaless $0x7b, 291(%rax,%r14,8), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, 508(%rdx), %xmm11, %xmm11
+// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x5a,0x7f,0x7b]
+ vrndscaless $0x7b, 508(%rdx), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, 512(%rdx), %xmm11, %xmm11
+// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x9a,0x00,0x02,0x00,0x00,0x7b]
+ vrndscaless $0x7b, 512(%rdx), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, -512(%rdx), %xmm11, %xmm11
+// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x5a,0x80,0x7b]
+ vrndscaless $0x7b, -512(%rdx), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, -516(%rdx), %xmm11, %xmm11
+// CHECK: encoding: [0x62,0x73,0x25,0x08,0x0a,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+ vrndscaless $0x7b, -516(%rdx), %xmm11, %xmm11
+
// CHECK: vfmadd132ss %xmm22, %xmm17, %xmm30
// CHECK: encoding: [0x62,0x22,0x75,0x00,0x99,0xf6]
vfmadd132ss %xmm22, %xmm17, %xmm30
@@ -14094,3 +14430,4848 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xf2,0xdd,0x00,0xbf,0xb2,0xf8,0xfb,0xff,0xff]
vfnmsub231sd -1032(%rdx), %xmm20, %xmm6
+// CHECK: vunpckhps %zmm16, %zmm14, %zmm5
+// CHECK: encoding: [0x62,0xb1,0x0c,0x48,0x15,0xe8]
+ vunpckhps %zmm16, %zmm14, %zmm5
+
+// CHECK: vunpckhps %zmm16, %zmm14, %zmm5 {%k6}
+// CHECK: encoding: [0x62,0xb1,0x0c,0x4e,0x15,0xe8]
+ vunpckhps %zmm16, %zmm14, %zmm5 {%k6}
+
+// CHECK: vunpckhps %zmm16, %zmm14, %zmm5 {%k6} {z}
+// CHECK: encoding: [0x62,0xb1,0x0c,0xce,0x15,0xe8]
+ vunpckhps %zmm16, %zmm14, %zmm5 {%k6} {z}
+
+// CHECK: vunpckhps (%rcx), %zmm14, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x0c,0x48,0x15,0x29]
+ vunpckhps (%rcx), %zmm14, %zmm5
+
+// CHECK: vunpckhps 291(%rax,%r14,8), %zmm14, %zmm5
+// CHECK: encoding: [0x62,0xb1,0x0c,0x48,0x15,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vunpckhps 291(%rax,%r14,8), %zmm14, %zmm5
+
+// CHECK: vunpckhps (%rcx){1to16}, %zmm14, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x0c,0x58,0x15,0x29]
+ vunpckhps (%rcx){1to16}, %zmm14, %zmm5
+
+// CHECK: vunpckhps 8128(%rdx), %zmm14, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x0c,0x48,0x15,0x6a,0x7f]
+ vunpckhps 8128(%rdx), %zmm14, %zmm5
+
+// CHECK: vunpckhps 8192(%rdx), %zmm14, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x0c,0x48,0x15,0xaa,0x00,0x20,0x00,0x00]
+ vunpckhps 8192(%rdx), %zmm14, %zmm5
+
+// CHECK: vunpckhps -8192(%rdx), %zmm14, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x0c,0x48,0x15,0x6a,0x80]
+ vunpckhps -8192(%rdx), %zmm14, %zmm5
+
+// CHECK: vunpckhps -8256(%rdx), %zmm14, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x0c,0x48,0x15,0xaa,0xc0,0xdf,0xff,0xff]
+ vunpckhps -8256(%rdx), %zmm14, %zmm5
+
+// CHECK: vunpckhps 508(%rdx){1to16}, %zmm14, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x0c,0x58,0x15,0x6a,0x7f]
+ vunpckhps 508(%rdx){1to16}, %zmm14, %zmm5
+
+// CHECK: vunpckhps 512(%rdx){1to16}, %zmm14, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x0c,0x58,0x15,0xaa,0x00,0x02,0x00,0x00]
+ vunpckhps 512(%rdx){1to16}, %zmm14, %zmm5
+
+// CHECK: vunpckhps -512(%rdx){1to16}, %zmm14, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x0c,0x58,0x15,0x6a,0x80]
+ vunpckhps -512(%rdx){1to16}, %zmm14, %zmm5
+
+// CHECK: vunpckhps -516(%rdx){1to16}, %zmm14, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x0c,0x58,0x15,0xaa,0xfc,0xfd,0xff,0xff]
+ vunpckhps -516(%rdx){1to16}, %zmm14, %zmm5
+
+// CHECK: vunpcklps %zmm2, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf1,0x64,0x48,0x14,0xca]
+ vunpcklps %zmm2, %zmm3, %zmm1
+
+// CHECK: vunpcklps %zmm2, %zmm3, %zmm1 {%k3}
+// CHECK: encoding: [0x62,0xf1,0x64,0x4b,0x14,0xca]
+ vunpcklps %zmm2, %zmm3, %zmm1 {%k3}
+
+// CHECK: vunpcklps %zmm2, %zmm3, %zmm1 {%k3} {z}
+// CHECK: encoding: [0x62,0xf1,0x64,0xcb,0x14,0xca]
+ vunpcklps %zmm2, %zmm3, %zmm1 {%k3} {z}
+
+// CHECK: vunpcklps (%rcx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf1,0x64,0x48,0x14,0x09]
+ vunpcklps (%rcx), %zmm3, %zmm1
+
+// CHECK: vunpcklps 291(%rax,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb1,0x64,0x48,0x14,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vunpcklps 291(%rax,%r14,8), %zmm3, %zmm1
+
+// CHECK: vunpcklps (%rcx){1to16}, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf1,0x64,0x58,0x14,0x09]
+ vunpcklps (%rcx){1to16}, %zmm3, %zmm1
+
+// CHECK: vunpcklps 8128(%rdx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf1,0x64,0x48,0x14,0x4a,0x7f]
+ vunpcklps 8128(%rdx), %zmm3, %zmm1
+
+// CHECK: vunpcklps 8192(%rdx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf1,0x64,0x48,0x14,0x8a,0x00,0x20,0x00,0x00]
+ vunpcklps 8192(%rdx), %zmm3, %zmm1
+
+// CHECK: vunpcklps -8192(%rdx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf1,0x64,0x48,0x14,0x4a,0x80]
+ vunpcklps -8192(%rdx), %zmm3, %zmm1
+
+// CHECK: vunpcklps -8256(%rdx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf1,0x64,0x48,0x14,0x8a,0xc0,0xdf,0xff,0xff]
+ vunpcklps -8256(%rdx), %zmm3, %zmm1
+
+// CHECK: vunpcklps 508(%rdx){1to16}, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf1,0x64,0x58,0x14,0x4a,0x7f]
+ vunpcklps 508(%rdx){1to16}, %zmm3, %zmm1
+
+// CHECK: vunpcklps 512(%rdx){1to16}, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf1,0x64,0x58,0x14,0x8a,0x00,0x02,0x00,0x00]
+ vunpcklps 512(%rdx){1to16}, %zmm3, %zmm1
+
+// CHECK: vunpcklps -512(%rdx){1to16}, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf1,0x64,0x58,0x14,0x4a,0x80]
+ vunpcklps -512(%rdx){1to16}, %zmm3, %zmm1
+
+// CHECK: vunpcklps -516(%rdx){1to16}, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf1,0x64,0x58,0x14,0x8a,0xfc,0xfd,0xff,0xff]
+ vunpcklps -516(%rdx){1to16}, %zmm3, %zmm1
+
+// CHECK: vunpckhpd %zmm26, %zmm19, %zmm25
+// CHECK: encoding: [0x62,0x01,0xe5,0x40,0x15,0xca]
+ vunpckhpd %zmm26, %zmm19, %zmm25
+
+// CHECK: vunpckhpd %zmm26, %zmm19, %zmm25 {%k5}
+// CHECK: encoding: [0x62,0x01,0xe5,0x45,0x15,0xca]
+ vunpckhpd %zmm26, %zmm19, %zmm25 {%k5}
+
+// CHECK: vunpckhpd %zmm26, %zmm19, %zmm25 {%k5} {z}
+// CHECK: encoding: [0x62,0x01,0xe5,0xc5,0x15,0xca]
+ vunpckhpd %zmm26, %zmm19, %zmm25 {%k5} {z}
+
+// CHECK: vunpckhpd (%rcx), %zmm19, %zmm25
+// CHECK: encoding: [0x62,0x61,0xe5,0x40,0x15,0x09]
+ vunpckhpd (%rcx), %zmm19, %zmm25
+
+// CHECK: vunpckhpd 291(%rax,%r14,8), %zmm19, %zmm25
+// CHECK: encoding: [0x62,0x21,0xe5,0x40,0x15,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vunpckhpd 291(%rax,%r14,8), %zmm19, %zmm25
+
+// CHECK: vunpckhpd (%rcx){1to8}, %zmm19, %zmm25
+// CHECK: encoding: [0x62,0x61,0xe5,0x50,0x15,0x09]
+ vunpckhpd (%rcx){1to8}, %zmm19, %zmm25
+
+// CHECK: vunpckhpd 8128(%rdx), %zmm19, %zmm25
+// CHECK: encoding: [0x62,0x61,0xe5,0x40,0x15,0x4a,0x7f]
+ vunpckhpd 8128(%rdx), %zmm19, %zmm25
+
+// CHECK: vunpckhpd 8192(%rdx), %zmm19, %zmm25
+// CHECK: encoding: [0x62,0x61,0xe5,0x40,0x15,0x8a,0x00,0x20,0x00,0x00]
+ vunpckhpd 8192(%rdx), %zmm19, %zmm25
+
+// CHECK: vunpckhpd -8192(%rdx), %zmm19, %zmm25
+// CHECK: encoding: [0x62,0x61,0xe5,0x40,0x15,0x4a,0x80]
+ vunpckhpd -8192(%rdx), %zmm19, %zmm25
+
+// CHECK: vunpckhpd -8256(%rdx), %zmm19, %zmm25
+// CHECK: encoding: [0x62,0x61,0xe5,0x40,0x15,0x8a,0xc0,0xdf,0xff,0xff]
+ vunpckhpd -8256(%rdx), %zmm19, %zmm25
+
+// CHECK: vunpckhpd 1016(%rdx){1to8}, %zmm19, %zmm25
+// CHECK: encoding: [0x62,0x61,0xe5,0x50,0x15,0x4a,0x7f]
+ vunpckhpd 1016(%rdx){1to8}, %zmm19, %zmm25
+
+// CHECK: vunpckhpd 1024(%rdx){1to8}, %zmm19, %zmm25
+// CHECK: encoding: [0x62,0x61,0xe5,0x50,0x15,0x8a,0x00,0x04,0x00,0x00]
+ vunpckhpd 1024(%rdx){1to8}, %zmm19, %zmm25
+
+// CHECK: vunpckhpd -1024(%rdx){1to8}, %zmm19, %zmm25
+// CHECK: encoding: [0x62,0x61,0xe5,0x50,0x15,0x4a,0x80]
+ vunpckhpd -1024(%rdx){1to8}, %zmm19, %zmm25
+
+// CHECK: vunpckhpd -1032(%rdx){1to8}, %zmm19, %zmm25
+// CHECK: encoding: [0x62,0x61,0xe5,0x50,0x15,0x8a,0xf8,0xfb,0xff,0xff]
+ vunpckhpd -1032(%rdx){1to8}, %zmm19, %zmm25
+
+// CHECK: vunpcklpd %zmm21, %zmm29, %zmm18
+// CHECK: encoding: [0x62,0xa1,0x95,0x40,0x14,0xd5]
+ vunpcklpd %zmm21, %zmm29, %zmm18
+
+// CHECK: vunpcklpd %zmm21, %zmm29, %zmm18 {%k6}
+// CHECK: encoding: [0x62,0xa1,0x95,0x46,0x14,0xd5]
+ vunpcklpd %zmm21, %zmm29, %zmm18 {%k6}
+
+// CHECK: vunpcklpd %zmm21, %zmm29, %zmm18 {%k6} {z}
+// CHECK: encoding: [0x62,0xa1,0x95,0xc6,0x14,0xd5]
+ vunpcklpd %zmm21, %zmm29, %zmm18 {%k6} {z}
+
+// CHECK: vunpcklpd (%rcx), %zmm29, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x14,0x11]
+ vunpcklpd (%rcx), %zmm29, %zmm18
+
+// CHECK: vunpcklpd 291(%rax,%r14,8), %zmm29, %zmm18
+// CHECK: encoding: [0x62,0xa1,0x95,0x40,0x14,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vunpcklpd 291(%rax,%r14,8), %zmm29, %zmm18
+
+// CHECK: vunpcklpd (%rcx){1to8}, %zmm29, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x95,0x50,0x14,0x11]
+ vunpcklpd (%rcx){1to8}, %zmm29, %zmm18
+
+// CHECK: vunpcklpd 8128(%rdx), %zmm29, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x14,0x52,0x7f]
+ vunpcklpd 8128(%rdx), %zmm29, %zmm18
+
+// CHECK: vunpcklpd 8192(%rdx), %zmm29, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x14,0x92,0x00,0x20,0x00,0x00]
+ vunpcklpd 8192(%rdx), %zmm29, %zmm18
+
+// CHECK: vunpcklpd -8192(%rdx), %zmm29, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x14,0x52,0x80]
+ vunpcklpd -8192(%rdx), %zmm29, %zmm18
+
+// CHECK: vunpcklpd -8256(%rdx), %zmm29, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x14,0x92,0xc0,0xdf,0xff,0xff]
+ vunpcklpd -8256(%rdx), %zmm29, %zmm18
+
+// CHECK: vunpcklpd 1016(%rdx){1to8}, %zmm29, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x95,0x50,0x14,0x52,0x7f]
+ vunpcklpd 1016(%rdx){1to8}, %zmm29, %zmm18
+
+// CHECK: vunpcklpd 1024(%rdx){1to8}, %zmm29, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x95,0x50,0x14,0x92,0x00,0x04,0x00,0x00]
+ vunpcklpd 1024(%rdx){1to8}, %zmm29, %zmm18
+
+// CHECK: vunpcklpd -1024(%rdx){1to8}, %zmm29, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x95,0x50,0x14,0x52,0x80]
+ vunpcklpd -1024(%rdx){1to8}, %zmm29, %zmm18
+
+// CHECK: vunpcklpd -1032(%rdx){1to8}, %zmm29, %zmm18
+// CHECK: encoding: [0x62,0xe1,0x95,0x50,0x14,0x92,0xf8,0xfb,0xff,0xff]
+ vunpcklpd -1032(%rdx){1to8}, %zmm29, %zmm18
+
+// CHECK: vpunpckldq %zmm17, %zmm3, %zmm24
+// CHECK: encoding: [0x62,0x21,0x65,0x48,0x62,0xc1]
+ vpunpckldq %zmm17, %zmm3, %zmm24
+
+// CHECK: vpunpckldq %zmm17, %zmm3, %zmm24 {%k3}
+// CHECK: encoding: [0x62,0x21,0x65,0x4b,0x62,0xc1]
+ vpunpckldq %zmm17, %zmm3, %zmm24 {%k3}
+
+// CHECK: vpunpckldq %zmm17, %zmm3, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x21,0x65,0xcb,0x62,0xc1]
+ vpunpckldq %zmm17, %zmm3, %zmm24 {%k3} {z}
+
+// CHECK: vpunpckldq (%rcx), %zmm3, %zmm24
+// CHECK: encoding: [0x62,0x61,0x65,0x48,0x62,0x01]
+ vpunpckldq (%rcx), %zmm3, %zmm24
+
+// CHECK: vpunpckldq 291(%rax,%r14,8), %zmm3, %zmm24
+// CHECK: encoding: [0x62,0x21,0x65,0x48,0x62,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckldq 291(%rax,%r14,8), %zmm3, %zmm24
+
+// CHECK: vpunpckldq (%rcx){1to16}, %zmm3, %zmm24
+// CHECK: encoding: [0x62,0x61,0x65,0x58,0x62,0x01]
+ vpunpckldq (%rcx){1to16}, %zmm3, %zmm24
+
+// CHECK: vpunpckldq 8128(%rdx), %zmm3, %zmm24
+// CHECK: encoding: [0x62,0x61,0x65,0x48,0x62,0x42,0x7f]
+ vpunpckldq 8128(%rdx), %zmm3, %zmm24
+
+// CHECK: vpunpckldq 8192(%rdx), %zmm3, %zmm24
+// CHECK: encoding: [0x62,0x61,0x65,0x48,0x62,0x82,0x00,0x20,0x00,0x00]
+ vpunpckldq 8192(%rdx), %zmm3, %zmm24
+
+// CHECK: vpunpckldq -8192(%rdx), %zmm3, %zmm24
+// CHECK: encoding: [0x62,0x61,0x65,0x48,0x62,0x42,0x80]
+ vpunpckldq -8192(%rdx), %zmm3, %zmm24
+
+// CHECK: vpunpckldq -8256(%rdx), %zmm3, %zmm24
+// CHECK: encoding: [0x62,0x61,0x65,0x48,0x62,0x82,0xc0,0xdf,0xff,0xff]
+ vpunpckldq -8256(%rdx), %zmm3, %zmm24
+
+// CHECK: vpunpckldq 508(%rdx){1to16}, %zmm3, %zmm24
+// CHECK: encoding: [0x62,0x61,0x65,0x58,0x62,0x42,0x7f]
+ vpunpckldq 508(%rdx){1to16}, %zmm3, %zmm24
+
+// CHECK: vpunpckldq 512(%rdx){1to16}, %zmm3, %zmm24
+// CHECK: encoding: [0x62,0x61,0x65,0x58,0x62,0x82,0x00,0x02,0x00,0x00]
+ vpunpckldq 512(%rdx){1to16}, %zmm3, %zmm24
+
+// CHECK: vpunpckldq -512(%rdx){1to16}, %zmm3, %zmm24
+// CHECK: encoding: [0x62,0x61,0x65,0x58,0x62,0x42,0x80]
+ vpunpckldq -512(%rdx){1to16}, %zmm3, %zmm24
+
+// CHECK: vpunpckldq -516(%rdx){1to16}, %zmm3, %zmm24
+// CHECK: encoding: [0x62,0x61,0x65,0x58,0x62,0x82,0xfc,0xfd,0xff,0xff]
+ vpunpckldq -516(%rdx){1to16}, %zmm3, %zmm24
+
+// CHECK: vpunpckhdq %zmm13, %zmm4, %zmm6
+// CHECK: encoding: [0x62,0xd1,0x5d,0x48,0x6a,0xf5]
+ vpunpckhdq %zmm13, %zmm4, %zmm6
+
+// CHECK: vpunpckhdq %zmm13, %zmm4, %zmm6 {%k5}
+// CHECK: encoding: [0x62,0xd1,0x5d,0x4d,0x6a,0xf5]
+ vpunpckhdq %zmm13, %zmm4, %zmm6 {%k5}
+
+// CHECK: vpunpckhdq %zmm13, %zmm4, %zmm6 {%k5} {z}
+// CHECK: encoding: [0x62,0xd1,0x5d,0xcd,0x6a,0xf5]
+ vpunpckhdq %zmm13, %zmm4, %zmm6 {%k5} {z}
+
+// CHECK: vpunpckhdq (%rcx), %zmm4, %zmm6
+// CHECK: encoding: [0x62,0xf1,0x5d,0x48,0x6a,0x31]
+ vpunpckhdq (%rcx), %zmm4, %zmm6
+
+// CHECK: vpunpckhdq 291(%rax,%r14,8), %zmm4, %zmm6
+// CHECK: encoding: [0x62,0xb1,0x5d,0x48,0x6a,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckhdq 291(%rax,%r14,8), %zmm4, %zmm6
+
+// CHECK: vpunpckhdq (%rcx){1to16}, %zmm4, %zmm6
+// CHECK: encoding: [0x62,0xf1,0x5d,0x58,0x6a,0x31]
+ vpunpckhdq (%rcx){1to16}, %zmm4, %zmm6
+
+// CHECK: vpunpckhdq 8128(%rdx), %zmm4, %zmm6
+// CHECK: encoding: [0x62,0xf1,0x5d,0x48,0x6a,0x72,0x7f]
+ vpunpckhdq 8128(%rdx), %zmm4, %zmm6
+
+// CHECK: vpunpckhdq 8192(%rdx), %zmm4, %zmm6
+// CHECK: encoding: [0x62,0xf1,0x5d,0x48,0x6a,0xb2,0x00,0x20,0x00,0x00]
+ vpunpckhdq 8192(%rdx), %zmm4, %zmm6
+
+// CHECK: vpunpckhdq -8192(%rdx), %zmm4, %zmm6
+// CHECK: encoding: [0x62,0xf1,0x5d,0x48,0x6a,0x72,0x80]
+ vpunpckhdq -8192(%rdx), %zmm4, %zmm6
+
+// CHECK: vpunpckhdq -8256(%rdx), %zmm4, %zmm6
+// CHECK: encoding: [0x62,0xf1,0x5d,0x48,0x6a,0xb2,0xc0,0xdf,0xff,0xff]
+ vpunpckhdq -8256(%rdx), %zmm4, %zmm6
+
+// CHECK: vpunpckhdq 508(%rdx){1to16}, %zmm4, %zmm6
+// CHECK: encoding: [0x62,0xf1,0x5d,0x58,0x6a,0x72,0x7f]
+ vpunpckhdq 508(%rdx){1to16}, %zmm4, %zmm6
+
+// CHECK: vpunpckhdq 512(%rdx){1to16}, %zmm4, %zmm6
+// CHECK: encoding: [0x62,0xf1,0x5d,0x58,0x6a,0xb2,0x00,0x02,0x00,0x00]
+ vpunpckhdq 512(%rdx){1to16}, %zmm4, %zmm6
+
+// CHECK: vpunpckhdq -512(%rdx){1to16}, %zmm4, %zmm6
+// CHECK: encoding: [0x62,0xf1,0x5d,0x58,0x6a,0x72,0x80]
+ vpunpckhdq -512(%rdx){1to16}, %zmm4, %zmm6
+
+// CHECK: vpunpckhdq -516(%rdx){1to16}, %zmm4, %zmm6
+// CHECK: encoding: [0x62,0xf1,0x5d,0x58,0x6a,0xb2,0xfc,0xfd,0xff,0xff]
+ vpunpckhdq -516(%rdx){1to16}, %zmm4, %zmm6
+
+// CHECK: vpunpcklqdq %zmm17, %zmm4, %zmm3
+// CHECK: encoding: [0x62,0xb1,0xdd,0x48,0x6c,0xd9]
+ vpunpcklqdq %zmm17, %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq %zmm17, %zmm4, %zmm3 {%k1}
+// CHECK: encoding: [0x62,0xb1,0xdd,0x49,0x6c,0xd9]
+ vpunpcklqdq %zmm17, %zmm4, %zmm3 {%k1}
+
+// CHECK: vpunpcklqdq %zmm17, %zmm4, %zmm3 {%k1} {z}
+// CHECK: encoding: [0x62,0xb1,0xdd,0xc9,0x6c,0xd9]
+ vpunpcklqdq %zmm17, %zmm4, %zmm3 {%k1} {z}
+
+// CHECK: vpunpcklqdq (%rcx), %zmm4, %zmm3
+// CHECK: encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x19]
+ vpunpcklqdq (%rcx), %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq 291(%rax,%r14,8), %zmm4, %zmm3
+// CHECK: encoding: [0x62,0xb1,0xdd,0x48,0x6c,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpunpcklqdq 291(%rax,%r14,8), %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq (%rcx){1to8}, %zmm4, %zmm3
+// CHECK: encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x19]
+ vpunpcklqdq (%rcx){1to8}, %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq 8128(%rdx), %zmm4, %zmm3
+// CHECK: encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x5a,0x7f]
+ vpunpcklqdq 8128(%rdx), %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq 8192(%rdx), %zmm4, %zmm3
+// CHECK: encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x9a,0x00,0x20,0x00,0x00]
+ vpunpcklqdq 8192(%rdx), %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq -8192(%rdx), %zmm4, %zmm3
+// CHECK: encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x5a,0x80]
+ vpunpcklqdq -8192(%rdx), %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq -8256(%rdx), %zmm4, %zmm3
+// CHECK: encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x9a,0xc0,0xdf,0xff,0xff]
+ vpunpcklqdq -8256(%rdx), %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq 1016(%rdx){1to8}, %zmm4, %zmm3
+// CHECK: encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x5a,0x7f]
+ vpunpcklqdq 1016(%rdx){1to8}, %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq 1024(%rdx){1to8}, %zmm4, %zmm3
+// CHECK: encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x9a,0x00,0x04,0x00,0x00]
+ vpunpcklqdq 1024(%rdx){1to8}, %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq -1024(%rdx){1to8}, %zmm4, %zmm3
+// CHECK: encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x5a,0x80]
+ vpunpcklqdq -1024(%rdx){1to8}, %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq -1032(%rdx){1to8}, %zmm4, %zmm3
+// CHECK: encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x9a,0xf8,0xfb,0xff,0xff]
+ vpunpcklqdq -1032(%rdx){1to8}, %zmm4, %zmm3
+
+// CHECK: vpunpckhqdq %zmm16, %zmm15, %zmm27
+// CHECK: encoding: [0x62,0x21,0x85,0x48,0x6d,0xd8]
+ vpunpckhqdq %zmm16, %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq %zmm16, %zmm15, %zmm27 {%k3}
+// CHECK: encoding: [0x62,0x21,0x85,0x4b,0x6d,0xd8]
+ vpunpckhqdq %zmm16, %zmm15, %zmm27 {%k3}
+
+// CHECK: vpunpckhqdq %zmm16, %zmm15, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x21,0x85,0xcb,0x6d,0xd8]
+ vpunpckhqdq %zmm16, %zmm15, %zmm27 {%k3} {z}
+
+// CHECK: vpunpckhqdq (%rcx), %zmm15, %zmm27
+// CHECK: encoding: [0x62,0x61,0x85,0x48,0x6d,0x19]
+ vpunpckhqdq (%rcx), %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq 291(%rax,%r14,8), %zmm15, %zmm27
+// CHECK: encoding: [0x62,0x21,0x85,0x48,0x6d,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckhqdq 291(%rax,%r14,8), %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq (%rcx){1to8}, %zmm15, %zmm27
+// CHECK: encoding: [0x62,0x61,0x85,0x58,0x6d,0x19]
+ vpunpckhqdq (%rcx){1to8}, %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq 8128(%rdx), %zmm15, %zmm27
+// CHECK: encoding: [0x62,0x61,0x85,0x48,0x6d,0x5a,0x7f]
+ vpunpckhqdq 8128(%rdx), %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq 8192(%rdx), %zmm15, %zmm27
+// CHECK: encoding: [0x62,0x61,0x85,0x48,0x6d,0x9a,0x00,0x20,0x00,0x00]
+ vpunpckhqdq 8192(%rdx), %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq -8192(%rdx), %zmm15, %zmm27
+// CHECK: encoding: [0x62,0x61,0x85,0x48,0x6d,0x5a,0x80]
+ vpunpckhqdq -8192(%rdx), %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq -8256(%rdx), %zmm15, %zmm27
+// CHECK: encoding: [0x62,0x61,0x85,0x48,0x6d,0x9a,0xc0,0xdf,0xff,0xff]
+ vpunpckhqdq -8256(%rdx), %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq 1016(%rdx){1to8}, %zmm15, %zmm27
+// CHECK: encoding: [0x62,0x61,0x85,0x58,0x6d,0x5a,0x7f]
+ vpunpckhqdq 1016(%rdx){1to8}, %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq 1024(%rdx){1to8}, %zmm15, %zmm27
+// CHECK: encoding: [0x62,0x61,0x85,0x58,0x6d,0x9a,0x00,0x04,0x00,0x00]
+ vpunpckhqdq 1024(%rdx){1to8}, %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq -1024(%rdx){1to8}, %zmm15, %zmm27
+// CHECK: encoding: [0x62,0x61,0x85,0x58,0x6d,0x5a,0x80]
+ vpunpckhqdq -1024(%rdx){1to8}, %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq -1032(%rdx){1to8}, %zmm15, %zmm27
+// CHECK: encoding: [0x62,0x61,0x85,0x58,0x6d,0x9a,0xf8,0xfb,0xff,0xff]
+ vpunpckhqdq -1032(%rdx){1to8}, %zmm15, %zmm27
+
+// CHECK: vgetexpss %xmm26, %xmm1, %xmm20
+// CHECK: encoding: [0x62,0x82,0x75,0x08,0x43,0xe2]
+ vgetexpss %xmm26, %xmm1, %xmm20
+
+// CHECK: vgetexpss %xmm26, %xmm1, %xmm20 {%k7}
+// CHECK: encoding: [0x62,0x82,0x75,0x0f,0x43,0xe2]
+ vgetexpss %xmm26, %xmm1, %xmm20 {%k7}
+
+// CHECK: vgetexpss %xmm26, %xmm1, %xmm20 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x75,0x8f,0x43,0xe2]
+ vgetexpss %xmm26, %xmm1, %xmm20 {%k7} {z}
+
+// CHECK: vgetexpss {sae}, %xmm26, %xmm1, %xmm20
+// CHECK: encoding: [0x62,0x82,0x75,0x18,0x43,0xe2]
+ vgetexpss {sae}, %xmm26, %xmm1, %xmm20
+
+// CHECK: vgetexpss (%rcx), %xmm1, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x75,0x08,0x43,0x21]
+ vgetexpss (%rcx), %xmm1, %xmm20
+
+// CHECK: vgetexpss 291(%rax,%r14,8), %xmm1, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x75,0x08,0x43,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vgetexpss 291(%rax,%r14,8), %xmm1, %xmm20
+
+// CHECK: vgetexpss 508(%rdx), %xmm1, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x75,0x08,0x43,0x62,0x7f]
+ vgetexpss 508(%rdx), %xmm1, %xmm20
+
+// CHECK: vgetexpss 512(%rdx), %xmm1, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x75,0x08,0x43,0xa2,0x00,0x02,0x00,0x00]
+ vgetexpss 512(%rdx), %xmm1, %xmm20
+
+// CHECK: vgetexpss -512(%rdx), %xmm1, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x75,0x08,0x43,0x62,0x80]
+ vgetexpss -512(%rdx), %xmm1, %xmm20
+
+// CHECK: vgetexpss -516(%rdx), %xmm1, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x75,0x08,0x43,0xa2,0xfc,0xfd,0xff,0xff]
+ vgetexpss -516(%rdx), %xmm1, %xmm20
+
+// CHECK: vgetexpsd %xmm2, %xmm7, %xmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0xd2]
+ vgetexpsd %xmm2, %xmm7, %xmm2
+
+// CHECK: vgetexpsd %xmm2, %xmm7, %xmm2 {%k5}
+// CHECK: encoding: [0x62,0xf2,0xc5,0x0d,0x43,0xd2]
+ vgetexpsd %xmm2, %xmm7, %xmm2 {%k5}
+
+// CHECK: vgetexpsd %xmm2, %xmm7, %xmm2 {%k5} {z}
+// CHECK: encoding: [0x62,0xf2,0xc5,0x8d,0x43,0xd2]
+ vgetexpsd %xmm2, %xmm7, %xmm2 {%k5} {z}
+
+// CHECK: vgetexpsd {sae}, %xmm2, %xmm7, %xmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x18,0x43,0xd2]
+ vgetexpsd {sae}, %xmm2, %xmm7, %xmm2
+
+// CHECK: vgetexpsd (%rcx), %xmm7, %xmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x11]
+ vgetexpsd (%rcx), %xmm7, %xmm2
+
+// CHECK: vgetexpsd 291(%rax,%r14,8), %xmm7, %xmm2
+// CHECK: encoding: [0x62,0xb2,0xc5,0x08,0x43,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vgetexpsd 291(%rax,%r14,8), %xmm7, %xmm2
+
+// CHECK: vgetexpsd 1016(%rdx), %xmm7, %xmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x52,0x7f]
+ vgetexpsd 1016(%rdx), %xmm7, %xmm2
+
+// CHECK: vgetexpsd 1024(%rdx), %xmm7, %xmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0x00,0x04,0x00,0x00]
+ vgetexpsd 1024(%rdx), %xmm7, %xmm2
+
+// CHECK: vgetexpsd -1024(%rdx), %xmm7, %xmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x52,0x80]
+ vgetexpsd -1024(%rdx), %xmm7, %xmm2
+
+// CHECK: vgetexpsd -1032(%rdx), %xmm7, %xmm2
+// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0xf8,0xfb,0xff,0xff]
+ vgetexpsd -1032(%rdx), %xmm7, %xmm2
+
+// CHECK: vcmpss $171, %xmm12, %xmm15, %k4
+// CHECK: encoding: [0x62,0xd1,0x06,0x08,0xc2,0xe4,0xab]
+ vcmpss $0xab, %xmm12, %xmm15, %k4
+
+// CHECK: vcmpss $171, %xmm12, %xmm15, %k4 {%k5}
+// CHECK: encoding: [0x62,0xd1,0x06,0x0d,0xc2,0xe4,0xab]
+ vcmpss $0xab, %xmm12, %xmm15, %k4 {%k5}
+
+// CHECK: vcmpss $171,{sae}, %xmm12, %xmm15, %k4
+// CHECK: encoding: [0x62,0xd1,0x06,0x18,0xc2,0xe4,0xab]
+ vcmpss $0xab,{sae}, %xmm12, %xmm15, %k4
+
+// CHECK: vcmpss $123, %xmm12, %xmm15, %k4
+// CHECK: encoding: [0x62,0xd1,0x06,0x08,0xc2,0xe4,0x7b]
+ vcmpss $0x7b, %xmm12, %xmm15, %k4
+
+// CHECK: vcmpss $123,{sae}, %xmm12, %xmm15, %k4
+// CHECK: encoding: [0x62,0xd1,0x06,0x18,0xc2,0xe4,0x7b]
+ vcmpss $0x7b,{sae}, %xmm12, %xmm15, %k4
+
+// CHECK: vcmpss $123, (%rcx), %xmm15, %k4
+// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0x21,0x7b]
+ vcmpss $0x7b, (%rcx), %xmm15, %k4
+
+// CHECK: vcmpss $123, 291(%rax,%r14,8), %xmm15, %k4
+// CHECK: encoding: [0x62,0xb1,0x06,0x08,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpss $0x7b, 291(%rax,%r14,8), %xmm15, %k4
+
+// CHECK: vcmpss $123, 508(%rdx), %xmm15, %k4
+// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0x62,0x7f,0x7b]
+ vcmpss $0x7b, 508(%rdx), %xmm15, %k4
+
+// CHECK: vcmpss $123, 512(%rdx), %xmm15, %k4
+// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vcmpss $0x7b, 512(%rdx), %xmm15, %k4
+
+// CHECK: vcmpss $123, -512(%rdx), %xmm15, %k4
+// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0x62,0x80,0x7b]
+ vcmpss $0x7b, -512(%rdx), %xmm15, %k4
+
+// CHECK: vcmpss $123, -516(%rdx), %xmm15, %k4
+// CHECK: encoding: [0x62,0xf1,0x06,0x08,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vcmpss $0x7b, -516(%rdx), %xmm15, %k4
+
+// CHECK: vcmpsd $171, %xmm4, %xmm19, %k5
+// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xec,0xab]
+ vcmpsd $0xab, %xmm4, %xmm19, %k5
+
+// CHECK: vcmpsd $171, %xmm4, %xmm19, %k5 {%k1}
+// CHECK: encoding: [0x62,0xf1,0xe7,0x01,0xc2,0xec,0xab]
+ vcmpsd $0xab, %xmm4, %xmm19, %k5 {%k1}
+
+// CHECK: vcmpsd $171,{sae}, %xmm4, %xmm19, %k5
+// CHECK: encoding: [0x62,0xf1,0xe7,0x10,0xc2,0xec,0xab]
+ vcmpsd $0xab,{sae}, %xmm4, %xmm19, %k5
+
+// CHECK: vcmpsd $123, %xmm4, %xmm19, %k5
+// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xec,0x7b]
+ vcmpsd $0x7b, %xmm4, %xmm19, %k5
+
+// CHECK: vcmpsd $123,{sae}, %xmm4, %xmm19, %k5
+// CHECK: encoding: [0x62,0xf1,0xe7,0x10,0xc2,0xec,0x7b]
+ vcmpsd $0x7b,{sae}, %xmm4, %xmm19, %k5
+
+// CHECK: vcmpsd $123, (%rcx), %xmm19, %k5
+// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x29,0x7b]
+ vcmpsd $0x7b, (%rcx), %xmm19, %k5
+
+// CHECK: vcmpsd $123, 291(%rax,%r14,8), %xmm19, %k5
+// CHECK: encoding: [0x62,0xb1,0xe7,0x00,0xc2,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcmpsd $0x7b, 291(%rax,%r14,8), %xmm19, %k5
+
+// CHECK: vcmpsd $123, 1016(%rdx), %xmm19, %k5
+// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x6a,0x7f,0x7b]
+ vcmpsd $0x7b, 1016(%rdx), %xmm19, %k5
+
+// CHECK: vcmpsd $123, 1024(%rdx), %xmm19, %k5
+// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xaa,0x00,0x04,0x00,0x00,0x7b]
+ vcmpsd $0x7b, 1024(%rdx), %xmm19, %k5
+
+// CHECK: vcmpsd $123, -1024(%rdx), %xmm19, %k5
+// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x6a,0x80,0x7b]
+ vcmpsd $0x7b, -1024(%rdx), %xmm19, %k5
+
+// CHECK: vcmpsd $123, -1032(%rdx), %xmm19, %k5
+// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
+ vcmpsd $0x7b, -1032(%rdx), %xmm19, %k5
+
+// CHECK: vsqrtss %xmm8, %xmm19, %xmm22
+// CHECK: encoding: [0x62,0xc1,0x66,0x00,0x51,0xf0]
+ vsqrtss %xmm8, %xmm19, %xmm22
+
+// CHECK: vsqrtss %xmm8, %xmm19, %xmm22 {%k1}
+// CHECK: encoding: [0x62,0xc1,0x66,0x01,0x51,0xf0]
+ vsqrtss %xmm8, %xmm19, %xmm22 {%k1}
+
+// CHECK: vsqrtss %xmm8, %xmm19, %xmm22 {%k1} {z}
+// CHECK: encoding: [0x62,0xc1,0x66,0x81,0x51,0xf0]
+ vsqrtss %xmm8, %xmm19, %xmm22 {%k1} {z}
+
+// CHECK: vsqrtss {rn-sae}, %xmm8, %xmm19, %xmm22
+// CHECK: encoding: [0x62,0xc1,0x66,0x10,0x51,0xf0]
+ vsqrtss {rn-sae}, %xmm8, %xmm19, %xmm22
+
+// CHECK: vsqrtss {ru-sae}, %xmm8, %xmm19, %xmm22
+// CHECK: encoding: [0x62,0xc1,0x66,0x50,0x51,0xf0]
+ vsqrtss {ru-sae}, %xmm8, %xmm19, %xmm22
+
+// CHECK: vsqrtss {rd-sae}, %xmm8, %xmm19, %xmm22
+// CHECK: encoding: [0x62,0xc1,0x66,0x30,0x51,0xf0]
+ vsqrtss {rd-sae}, %xmm8, %xmm19, %xmm22
+
+// CHECK: vsqrtss {rz-sae}, %xmm8, %xmm19, %xmm22
+// CHECK: encoding: [0x62,0xc1,0x66,0x70,0x51,0xf0]
+ vsqrtss {rz-sae}, %xmm8, %xmm19, %xmm22
+
+// CHECK: vsqrtss (%rcx), %xmm19, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x66,0x00,0x51,0x31]
+ vsqrtss (%rcx), %xmm19, %xmm22
+
+// CHECK: vsqrtss 291(%rax,%r14,8), %xmm19, %xmm22
+// CHECK: encoding: [0x62,0xa1,0x66,0x00,0x51,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vsqrtss 291(%rax,%r14,8), %xmm19, %xmm22
+
+// CHECK: vsqrtss 508(%rdx), %xmm19, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x66,0x00,0x51,0x72,0x7f]
+ vsqrtss 508(%rdx), %xmm19, %xmm22
+
+// CHECK: vsqrtss 512(%rdx), %xmm19, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x66,0x00,0x51,0xb2,0x00,0x02,0x00,0x00]
+ vsqrtss 512(%rdx), %xmm19, %xmm22
+
+// CHECK: vsqrtss -512(%rdx), %xmm19, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x66,0x00,0x51,0x72,0x80]
+ vsqrtss -512(%rdx), %xmm19, %xmm22
+
+// CHECK: vsqrtss -516(%rdx), %xmm19, %xmm22
+// CHECK: encoding: [0x62,0xe1,0x66,0x00,0x51,0xb2,0xfc,0xfd,0xff,0xff]
+ vsqrtss -516(%rdx), %xmm19, %xmm22
+
+// CHECK: vsqrtsd %xmm12, %xmm2, %xmm26
+// CHECK: encoding: [0x62,0x41,0xef,0x08,0x51,0xd4]
+ vsqrtsd %xmm12, %xmm2, %xmm26
+
+// CHECK: vsqrtsd %xmm12, %xmm2, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xd1,0xef,0x0f,0x51,0xf4]
+ vsqrtsd %xmm12, %xmm2, %xmm6 {%k7}
+
+// CHECK: vsqrtsd %xmm12, %xmm2, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xd1,0xef,0x8f,0x51,0xf4]
+ vsqrtsd %xmm12, %xmm2, %xmm6 {%k7} {z}
+
+// CHECK: vsqrtsd {rn-sae}, %xmm12, %xmm2, %xmm6
+// CHECK: encoding: [0x62,0xd1,0xef,0x18,0x51,0xf4]
+ vsqrtsd {rn-sae}, %xmm12, %xmm2, %xmm6
+
+// CHECK: vsqrtsd {ru-sae}, %xmm12, %xmm2, %xmm6
+// CHECK: encoding: [0x62,0xd1,0xef,0x58,0x51,0xf4]
+ vsqrtsd {ru-sae}, %xmm12, %xmm2, %xmm6
+
+// CHECK: vsqrtsd {rd-sae}, %xmm12, %xmm2, %xmm6
+// CHECK: encoding: [0x62,0xd1,0xef,0x38,0x51,0xf4]
+ vsqrtsd {rd-sae}, %xmm12, %xmm2, %xmm6
+
+// CHECK: vsqrtsd {rz-sae}, %xmm12, %xmm2, %xmm6
+// CHECK: encoding: [0x62,0xd1,0xef,0x78,0x51,0xf4]
+ vsqrtsd {rz-sae}, %xmm12, %xmm2, %xmm6
+
+// CHECK: vsqrtsd (%rcx), %xmm2, %xmm26
+// CHECK: encoding: [0x62,0x61,0xef,0x08,0x51,0x11]
+ vsqrtsd (%rcx), %xmm2, %xmm26
+
+// CHECK: vsqrtsd 291(%rax,%r14,8), %xmm2, %xmm26
+// CHECK: encoding: [0x62,0x21,0xef,0x08,0x51,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vsqrtsd 291(%rax,%r14,8), %xmm2, %xmm26
+
+// CHECK: vsqrtsd 1016(%rdx), %xmm2, %xmm26
+// CHECK: encoding: [0x62,0x61,0xef,0x08,0x51,0x52,0x7f]
+ vsqrtsd 1016(%rdx), %xmm2, %xmm26
+
+// CHECK: vsqrtsd 1024(%rdx), %xmm2, %xmm26
+// CHECK: encoding: [0x62,0x61,0xef,0x08,0x51,0x92,0x00,0x04,0x00,0x00]
+ vsqrtsd 1024(%rdx), %xmm2, %xmm26
+
+// CHECK: vsqrtsd -1024(%rdx), %xmm2, %xmm26
+// CHECK: encoding: [0x62,0x61,0xef,0x08,0x51,0x52,0x80]
+ vsqrtsd -1024(%rdx), %xmm2, %xmm26
+
+// CHECK: vsqrtsd -1032(%rdx), %xmm2, %xmm26
+// CHECK: encoding: [0x62,0x61,0xef,0x08,0x51,0x92,0xf8,0xfb,0xff,0xff]
+ vsqrtsd -1032(%rdx), %xmm2, %xmm26
+
+// CHECK: vinsertf32x4 $171, %xmm3, %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0xdb,0xab]
+ vinsertf32x4 $0xab, %xmm3, %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $171, %xmm3, %zmm26, %zmm11 {%k1}
+// CHECK: encoding: [0x62,0x73,0x2d,0x41,0x18,0xdb,0xab]
+ vinsertf32x4 $0xab, %xmm3, %zmm26, %zmm11 {%k1}
+
+// CHECK: vinsertf32x4 $171, %xmm3, %zmm26, %zmm11 {%k1} {z}
+// CHECK: encoding: [0x62,0x73,0x2d,0xc1,0x18,0xdb,0xab]
+ vinsertf32x4 $0xab, %xmm3, %zmm26, %zmm11 {%k1} {z}
+
+// CHECK: vinsertf32x4 $123, %xmm3, %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0xdb,0x7b]
+ vinsertf32x4 $0x7b, %xmm3, %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, (%rcx), %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x19,0x7b]
+ vinsertf32x4 $0x7b, (%rcx), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, 291(%rax,%r14,8), %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x33,0x2d,0x40,0x18,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinsertf32x4 $0x7b, 291(%rax,%r14,8), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, 2032(%rdx), %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x5a,0x7f,0x7b]
+ vinsertf32x4 $0x7b, 2032(%rdx), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, 2048(%rdx), %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vinsertf32x4 $0x7b, 2048(%rdx), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, -2048(%rdx), %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x5a,0x80,0x7b]
+ vinsertf32x4 $0x7b, -2048(%rdx), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, -2064(%rdx), %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vinsertf32x4 $0x7b, -2064(%rdx), %zmm26, %zmm11
+
+// CHECK: vinsertf64x4 $171, %ymm7, %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0xcf,0xab]
+ vinsertf64x4 $0xab, %ymm7, %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $171, %ymm7, %zmm5, %zmm1 {%k1}
+// CHECK: encoding: [0x62,0xf3,0xd5,0x49,0x1a,0xcf,0xab]
+ vinsertf64x4 $0xab, %ymm7, %zmm5, %zmm1 {%k1}
+
+// CHECK: vinsertf64x4 $171, %ymm7, %zmm5, %zmm1 {%k1} {z}
+// CHECK: encoding: [0x62,0xf3,0xd5,0xc9,0x1a,0xcf,0xab]
+ vinsertf64x4 $0xab, %ymm7, %zmm5, %zmm1 {%k1} {z}
+
+// CHECK: vinsertf64x4 $123, %ymm7, %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0xcf,0x7b]
+ vinsertf64x4 $0x7b, %ymm7, %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, (%rcx), %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x09,0x7b]
+ vinsertf64x4 $0x7b, (%rcx), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, 291(%rax,%r14,8), %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xd5,0x48,0x1a,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinsertf64x4 $0x7b, 291(%rax,%r14,8), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, 4064(%rdx), %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x4a,0x7f,0x7b]
+ vinsertf64x4 $0x7b, 4064(%rdx), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, 4096(%rdx), %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x8a,0x00,0x10,0x00,0x00,0x7b]
+ vinsertf64x4 $0x7b, 4096(%rdx), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, -4096(%rdx), %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x4a,0x80,0x7b]
+ vinsertf64x4 $0x7b, -4096(%rdx), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, -4128(%rdx), %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+ vinsertf64x4 $0x7b, -4128(%rdx), %zmm5, %zmm1
+
+// CHECK: vinserti32x4 $171, %xmm10, %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xc3,0x15,0x48,0x38,0xca,0xab]
+ vinserti32x4 $0xab, %xmm10, %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $171, %xmm10, %zmm13, %zmm17 {%k6}
+// CHECK: encoding: [0x62,0xc3,0x15,0x4e,0x38,0xca,0xab]
+ vinserti32x4 $0xab, %xmm10, %zmm13, %zmm17 {%k6}
+
+// CHECK: vinserti32x4 $171, %xmm10, %zmm13, %zmm17 {%k6} {z}
+// CHECK: encoding: [0x62,0xc3,0x15,0xce,0x38,0xca,0xab]
+ vinserti32x4 $0xab, %xmm10, %zmm13, %zmm17 {%k6} {z}
+
+// CHECK: vinserti32x4 $123, %xmm10, %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xc3,0x15,0x48,0x38,0xca,0x7b]
+ vinserti32x4 $0x7b, %xmm10, %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, (%rcx), %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x09,0x7b]
+ vinserti32x4 $0x7b, (%rcx), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, 291(%rax,%r14,8), %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xa3,0x15,0x48,0x38,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinserti32x4 $0x7b, 291(%rax,%r14,8), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, 2032(%rdx), %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x4a,0x7f,0x7b]
+ vinserti32x4 $0x7b, 2032(%rdx), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, 2048(%rdx), %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vinserti32x4 $0x7b, 2048(%rdx), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, -2048(%rdx), %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x4a,0x80,0x7b]
+ vinserti32x4 $0x7b, -2048(%rdx), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, -2064(%rdx), %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vinserti32x4 $0x7b, -2064(%rdx), %zmm13, %zmm17
+
+// CHECK: vinserti64x4 $171, %ymm4, %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xe4,0xab]
+ vinserti64x4 $0xab, %ymm4, %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $171, %ymm4, %zmm25, %zmm4 {%k1}
+// CHECK: encoding: [0x62,0xf3,0xb5,0x41,0x3a,0xe4,0xab]
+ vinserti64x4 $0xab, %ymm4, %zmm25, %zmm4 {%k1}
+
+// CHECK: vinserti64x4 $171, %ymm4, %zmm25, %zmm4 {%k1} {z}
+// CHECK: encoding: [0x62,0xf3,0xb5,0xc1,0x3a,0xe4,0xab]
+ vinserti64x4 $0xab, %ymm4, %zmm25, %zmm4 {%k1} {z}
+
+// CHECK: vinserti64x4 $123, %ymm4, %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xe4,0x7b]
+ vinserti64x4 $0x7b, %ymm4, %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, (%rcx), %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0x21,0x7b]
+ vinserti64x4 $0x7b, (%rcx), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, 291(%rax,%r14,8), %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xb3,0xb5,0x40,0x3a,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinserti64x4 $0x7b, 291(%rax,%r14,8), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, 4064(%rdx), %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0x62,0x7f,0x7b]
+ vinserti64x4 $0x7b, 4064(%rdx), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, 4096(%rdx), %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vinserti64x4 $0x7b, 4096(%rdx), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, -4096(%rdx), %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0x62,0x80,0x7b]
+ vinserti64x4 $0x7b, -4096(%rdx), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, -4128(%rdx), %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vinserti64x4 $0x7b, -4128(%rdx), %zmm25, %zmm4
+
+// CHECK: vextractf32x4 $171, %zmm21, %xmm15
+// CHECK: encoding: [0x62,0xc3,0x7d,0x48,0x19,0xef,0xab]
+ vextractf32x4 $0xab, %zmm21, %xmm15
+
+// CHECK: vextractf32x4 $171, %zmm21, %xmm15 {%k1}
+// CHECK: encoding: [0x62,0xc3,0x7d,0x49,0x19,0xef,0xab]
+ vextractf32x4 $0xab, %zmm21, %xmm15 {%k1}
+
+// CHECK: vextractf32x4 $171, %zmm21, %xmm15 {%k1} {z}
+// CHECK: encoding: [0x62,0xc3,0x7d,0xc9,0x19,0xef,0xab]
+ vextractf32x4 $0xab, %zmm21, %xmm15 {%k1} {z}
+
+// CHECK: vextractf32x4 $123, %zmm21, %xmm15
+// CHECK: encoding: [0x62,0xc3,0x7d,0x48,0x19,0xef,0x7b]
+ vextractf32x4 $0x7b, %zmm21, %xmm15
+
+// CHECK: vextractf32x4 $171, %zmm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x19,0x21,0xab]
+ vextractf32x4 $0xab, %zmm20, (%rcx)
+
+// CHECK: vextractf32x4 $171, %zmm20, (%rcx) {%k7}
+// CHECK: encoding: [0x62,0xe3,0x7d,0x4f,0x19,0x21,0xab]
+ vextractf32x4 $0xab, %zmm20, (%rcx) {%k7}
+
+// CHECK: vextractf32x4 $123, %zmm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x19,0x21,0x7b]
+ vextractf32x4 $0x7b, %zmm20, (%rcx)
+
+// CHECK: vextractf32x4 $123, %zmm20, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x19,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vextractf32x4 $0x7b, %zmm20, 291(%rax,%r14,8)
+
+// CHECK: vextractf32x4 $123, %zmm20, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x19,0x62,0x7f,0x7b]
+ vextractf32x4 $0x7b, %zmm20, 2032(%rdx)
+
+// CHECK: vextractf32x4 $123, %zmm20, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x19,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vextractf32x4 $0x7b, %zmm20, 2048(%rdx)
+
+// CHECK: vextractf32x4 $123, %zmm20, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x19,0x62,0x80,0x7b]
+ vextractf32x4 $0x7b, %zmm20, -2048(%rdx)
+
+// CHECK: vextractf32x4 $123, %zmm20, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x19,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vextractf32x4 $0x7b, %zmm20, -2064(%rdx)
+
+// CHECK: vextractf64x4 $171, %zmm24, %ymm11
+// CHECK: encoding: [0x62,0x43,0xfd,0x48,0x1b,0xc3,0xab]
+ vextractf64x4 $0xab, %zmm24, %ymm11
+
+// CHECK: vextractf64x4 $171, %zmm24, %ymm11 {%k5}
+// CHECK: encoding: [0x62,0x43,0xfd,0x4d,0x1b,0xc3,0xab]
+ vextractf64x4 $0xab, %zmm24, %ymm11 {%k5}
+
+// CHECK: vextractf64x4 $171, %zmm24, %ymm11 {%k5} {z}
+// CHECK: encoding: [0x62,0x43,0xfd,0xcd,0x1b,0xc3,0xab]
+ vextractf64x4 $0xab, %zmm24, %ymm11 {%k5} {z}
+
+// CHECK: vextractf64x4 $123, %zmm24, %ymm11
+// CHECK: encoding: [0x62,0x43,0xfd,0x48,0x1b,0xc3,0x7b]
+ vextractf64x4 $0x7b, %zmm24, %ymm11
+
+// CHECK: vextractf64x4 $171, %zmm5, (%rcx)
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x1b,0x29,0xab]
+ vextractf64x4 $0xab, %zmm5, (%rcx)
+
+// CHECK: vextractf64x4 $171, %zmm5, (%rcx) {%k4}
+// CHECK: encoding: [0x62,0xf3,0xfd,0x4c,0x1b,0x29,0xab]
+ vextractf64x4 $0xab, %zmm5, (%rcx) {%k4}
+
+// CHECK: vextractf64x4 $123, %zmm5, (%rcx)
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x1b,0x29,0x7b]
+ vextractf64x4 $0x7b, %zmm5, (%rcx)
+
+// CHECK: vextractf64x4 $123, %zmm5, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xb3,0xfd,0x48,0x1b,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vextractf64x4 $0x7b, %zmm5, 291(%rax,%r14,8)
+
+// CHECK: vextractf64x4 $123, %zmm5, 4064(%rdx)
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x1b,0x6a,0x7f,0x7b]
+ vextractf64x4 $0x7b, %zmm5, 4064(%rdx)
+
+// CHECK: vextractf64x4 $123, %zmm5, 4096(%rdx)
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x1b,0xaa,0x00,0x10,0x00,0x00,0x7b]
+ vextractf64x4 $0x7b, %zmm5, 4096(%rdx)
+
+// CHECK: vextractf64x4 $123, %zmm5, -4096(%rdx)
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x1b,0x6a,0x80,0x7b]
+ vextractf64x4 $0x7b, %zmm5, -4096(%rdx)
+
+// CHECK: vextractf64x4 $123, %zmm5, -4128(%rdx)
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x1b,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+ vextractf64x4 $0x7b, %zmm5, -4128(%rdx)
+
+// CHECK: vextracti32x4 $171, %zmm16, %xmm13
+// CHECK: encoding: [0x62,0xc3,0x7d,0x48,0x39,0xc5,0xab]
+ vextracti32x4 $0xab, %zmm16, %xmm13
+
+// CHECK: vextracti32x4 $171, %zmm16, %xmm13 {%k5}
+// CHECK: encoding: [0x62,0xc3,0x7d,0x4d,0x39,0xc5,0xab]
+ vextracti32x4 $0xab, %zmm16, %xmm13 {%k5}
+
+// CHECK: vextracti32x4 $171, %zmm16, %xmm13 {%k5} {z}
+// CHECK: encoding: [0x62,0xc3,0x7d,0xcd,0x39,0xc5,0xab]
+ vextracti32x4 $0xab, %zmm16, %xmm13 {%k5} {z}
+
+// CHECK: vextracti32x4 $123, %zmm16, %xmm13
+// CHECK: encoding: [0x62,0xc3,0x7d,0x48,0x39,0xc5,0x7b]
+ vextracti32x4 $0x7b, %zmm16, %xmm13
+
+// CHECK: vextracti32x4 $171, %zmm29, (%rcx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x39,0x29,0xab]
+ vextracti32x4 $0xab, %zmm29, (%rcx)
+
+// CHECK: vextracti32x4 $171, %zmm29, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0x63,0x7d,0x4a,0x39,0x29,0xab]
+ vextracti32x4 $0xab, %zmm29, (%rcx) {%k2}
+
+// CHECK: vextracti32x4 $123, %zmm29, (%rcx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x39,0x29,0x7b]
+ vextracti32x4 $0x7b, %zmm29, (%rcx)
+
+// CHECK: vextracti32x4 $123, %zmm29, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x23,0x7d,0x48,0x39,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vextracti32x4 $0x7b, %zmm29, 291(%rax,%r14,8)
+
+// CHECK: vextracti32x4 $123, %zmm29, 2032(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x39,0x6a,0x7f,0x7b]
+ vextracti32x4 $0x7b, %zmm29, 2032(%rdx)
+
+// CHECK: vextracti32x4 $123, %zmm29, 2048(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x39,0xaa,0x00,0x08,0x00,0x00,0x7b]
+ vextracti32x4 $0x7b, %zmm29, 2048(%rdx)
+
+// CHECK: vextracti32x4 $123, %zmm29, -2048(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x39,0x6a,0x80,0x7b]
+ vextracti32x4 $0x7b, %zmm29, -2048(%rdx)
+
+// CHECK: vextracti32x4 $123, %zmm29, -2064(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x39,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+ vextracti32x4 $0x7b, %zmm29, -2064(%rdx)
+
+// CHECK: vextracti64x4 $171, %zmm16, %ymm13
+// CHECK: encoding: [0x62,0xc3,0xfd,0x48,0x3b,0xc5,0xab]
+ vextracti64x4 $0xab, %zmm16, %ymm13
+
+// CHECK: vextracti64x4 $171, %zmm16, %ymm13 {%k3}
+// CHECK: encoding: [0x62,0xc3,0xfd,0x4b,0x3b,0xc5,0xab]
+ vextracti64x4 $0xab, %zmm16, %ymm13 {%k3}
+
+// CHECK: vextracti64x4 $171, %zmm16, %ymm13 {%k3} {z}
+// CHECK: encoding: [0x62,0xc3,0xfd,0xcb,0x3b,0xc5,0xab]
+ vextracti64x4 $0xab, %zmm16, %ymm13 {%k3} {z}
+
+// CHECK: vextracti64x4 $123, %zmm16, %ymm13
+// CHECK: encoding: [0x62,0xc3,0xfd,0x48,0x3b,0xc5,0x7b]
+ vextracti64x4 $0x7b, %zmm16, %ymm13
+
+// CHECK: vextracti64x4 $171, %zmm30, (%rcx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x3b,0x31,0xab]
+ vextracti64x4 $0xab, %zmm30, (%rcx)
+
+// CHECK: vextracti64x4 $171, %zmm30, (%rcx) {%k4}
+// CHECK: encoding: [0x62,0x63,0xfd,0x4c,0x3b,0x31,0xab]
+ vextracti64x4 $0xab, %zmm30, (%rcx) {%k4}
+
+// CHECK: vextracti64x4 $123, %zmm30, (%rcx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x3b,0x31,0x7b]
+ vextracti64x4 $0x7b, %zmm30, (%rcx)
+
+// CHECK: vextracti64x4 $123, %zmm30, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x23,0xfd,0x48,0x3b,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vextracti64x4 $0x7b, %zmm30, 291(%rax,%r14,8)
+
+// CHECK: vextracti64x4 $123, %zmm30, 4064(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x3b,0x72,0x7f,0x7b]
+ vextracti64x4 $0x7b, %zmm30, 4064(%rdx)
+
+// CHECK: vextracti64x4 $123, %zmm30, 4096(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x3b,0xb2,0x00,0x10,0x00,0x00,0x7b]
+ vextracti64x4 $0x7b, %zmm30, 4096(%rdx)
+
+// CHECK: vextracti64x4 $123, %zmm30, -4096(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x3b,0x72,0x80,0x7b]
+ vextracti64x4 $0x7b, %zmm30, -4096(%rdx)
+
+// CHECK: vextracti64x4 $123, %zmm30, -4128(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x3b,0xb2,0xe0,0xef,0xff,0xff,0x7b]
+ vextracti64x4 $0x7b, %zmm30, -4128(%rdx)
+
+// CHECK: kunpckbw %k6, %k5, %k5
+// CHECK: encoding: [0xc5,0xd5,0x4b,0xee]
+ kunpckbw %k6, %k5, %k5
+
+// CHECK: vgetmantss $171, %xmm12, %xmm2, %xmm3
+// CHECK: encoding: [0x62,0xd3,0x6d,0x08,0x27,0xdc,0xab]
+ vgetmantss $0xab, %xmm12, %xmm2, %xmm3
+
+// CHECK: vgetmantss $171, %xmm12, %xmm2, %xmm3 {%k7}
+// CHECK: encoding: [0x62,0xd3,0x6d,0x0f,0x27,0xdc,0xab]
+ vgetmantss $0xab, %xmm12, %xmm2, %xmm3 {%k7}
+
+// CHECK: vgetmantss $171, %xmm12, %xmm2, %xmm3 {%k7} {z}
+// CHECK: encoding: [0x62,0xd3,0x6d,0x8f,0x27,0xdc,0xab]
+ vgetmantss $0xab, %xmm12, %xmm2, %xmm3 {%k7} {z}
+
+// CHECK: vgetmantss $171,{sae}, %xmm12, %xmm2, %xmm3
+// CHECK: encoding: [0x62,0xd3,0x6d,0x18,0x27,0xdc,0xab]
+ vgetmantss $0xab,{sae}, %xmm12, %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, %xmm12, %xmm2, %xmm3
+// CHECK: encoding: [0x62,0xd3,0x6d,0x08,0x27,0xdc,0x7b]
+ vgetmantss $0x7b, %xmm12, %xmm2, %xmm3
+
+// CHECK: vgetmantss $123,{sae}, %xmm12, %xmm2, %xmm3
+// CHECK: encoding: [0x62,0xd3,0x6d,0x18,0x27,0xdc,0x7b]
+ vgetmantss $0x7b,{sae}, %xmm12, %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, (%rcx), %xmm2, %xmm3
+// CHECK: encoding: [0x62,0xf3,0x6d,0x08,0x27,0x19,0x7b]
+ vgetmantss $0x7b, (%rcx), %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, 291(%rax,%r14,8), %xmm2, %xmm3
+// CHECK: encoding: [0x62,0xb3,0x6d,0x08,0x27,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantss $0x7b, 291(%rax,%r14,8), %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, 508(%rdx), %xmm2, %xmm3
+// CHECK: encoding: [0x62,0xf3,0x6d,0x08,0x27,0x5a,0x7f,0x7b]
+ vgetmantss $0x7b, 508(%rdx), %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, 512(%rdx), %xmm2, %xmm3
+// CHECK: encoding: [0x62,0xf3,0x6d,0x08,0x27,0x9a,0x00,0x02,0x00,0x00,0x7b]
+ vgetmantss $0x7b, 512(%rdx), %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, -512(%rdx), %xmm2, %xmm3
+// CHECK: encoding: [0x62,0xf3,0x6d,0x08,0x27,0x5a,0x80,0x7b]
+ vgetmantss $0x7b, -512(%rdx), %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, -516(%rdx), %xmm2, %xmm3
+// CHECK: encoding: [0x62,0xf3,0x6d,0x08,0x27,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+ vgetmantss $0x7b, -516(%rdx), %xmm2, %xmm3
+
+// CHECK: vgetmantsd $171, %xmm8, %xmm27, %xmm3
+// CHECK: encoding: [0x62,0xd3,0xa5,0x00,0x27,0xd8,0xab]
+ vgetmantsd $0xab, %xmm8, %xmm27, %xmm3
+
+// CHECK: vgetmantsd $171, %xmm8, %xmm27, %xmm3 {%k6}
+// CHECK: encoding: [0x62,0xd3,0xa5,0x06,0x27,0xd8,0xab]
+ vgetmantsd $0xab, %xmm8, %xmm27, %xmm3 {%k6}
+
+// CHECK: vgetmantsd $171, %xmm8, %xmm27, %xmm3 {%k6} {z}
+// CHECK: encoding: [0x62,0xd3,0xa5,0x86,0x27,0xd8,0xab]
+ vgetmantsd $0xab, %xmm8, %xmm27, %xmm3 {%k6} {z}
+
+// CHECK: vgetmantsd $171,{sae}, %xmm8, %xmm27, %xmm3
+// CHECK: encoding: [0x62,0xd3,0xa5,0x10,0x27,0xd8,0xab]
+ vgetmantsd $0xab,{sae}, %xmm8, %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, %xmm8, %xmm27, %xmm3
+// CHECK: encoding: [0x62,0xd3,0xa5,0x00,0x27,0xd8,0x7b]
+ vgetmantsd $0x7b, %xmm8, %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123,{sae}, %xmm8, %xmm27, %xmm3
+// CHECK: encoding: [0x62,0xd3,0xa5,0x10,0x27,0xd8,0x7b]
+ vgetmantsd $0x7b,{sae}, %xmm8, %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, (%rcx), %xmm27, %xmm3
+// CHECK: encoding: [0x62,0xf3,0xa5,0x00,0x27,0x19,0x7b]
+ vgetmantsd $0x7b, (%rcx), %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, 291(%rax,%r14,8), %xmm27, %xmm3
+// CHECK: encoding: [0x62,0xb3,0xa5,0x00,0x27,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantsd $0x7b, 291(%rax,%r14,8), %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, 1016(%rdx), %xmm27, %xmm3
+// CHECK: encoding: [0x62,0xf3,0xa5,0x00,0x27,0x5a,0x7f,0x7b]
+ vgetmantsd $0x7b, 1016(%rdx), %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, 1024(%rdx), %xmm27, %xmm3
+// CHECK: encoding: [0x62,0xf3,0xa5,0x00,0x27,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ vgetmantsd $0x7b, 1024(%rdx), %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, -1024(%rdx), %xmm27, %xmm3
+// CHECK: encoding: [0x62,0xf3,0xa5,0x00,0x27,0x5a,0x80,0x7b]
+ vgetmantsd $0x7b, -1024(%rdx), %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, -1032(%rdx), %xmm27, %xmm3
+// CHECK: encoding: [0x62,0xf3,0xa5,0x00,0x27,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ vgetmantsd $0x7b, -1032(%rdx), %xmm27, %xmm3
+
+// CHECK: vgetmantps $171, %zmm28, %zmm22
+// CHECK: encoding: [0x62,0x83,0x7d,0x48,0x26,0xf4,0xab]
+ vgetmantps $0xab, %zmm28, %zmm22
+
+// CHECK: vgetmantps $171, %zmm28, %zmm22 {%k3}
+// CHECK: encoding: [0x62,0x83,0x7d,0x4b,0x26,0xf4,0xab]
+ vgetmantps $0xab, %zmm28, %zmm22 {%k3}
+
+// CHECK: vgetmantps $171, %zmm28, %zmm22 {%k3} {z}
+// CHECK: encoding: [0x62,0x83,0x7d,0xcb,0x26,0xf4,0xab]
+ vgetmantps $0xab, %zmm28, %zmm22 {%k3} {z}
+
+// CHECK: vgetmantps $171,{sae}, %zmm28, %zmm22
+// CHECK: encoding: [0x62,0x83,0x7d,0x18,0x26,0xf4,0xab]
+ vgetmantps $0xab,{sae}, %zmm28, %zmm22
+
+// CHECK: vgetmantps $123, %zmm28, %zmm22
+// CHECK: encoding: [0x62,0x83,0x7d,0x48,0x26,0xf4,0x7b]
+ vgetmantps $0x7b, %zmm28, %zmm22
+
+// CHECK: vgetmantps $123,{sae}, %zmm28, %zmm22
+// CHECK: encoding: [0x62,0x83,0x7d,0x18,0x26,0xf4,0x7b]
+ vgetmantps $0x7b,{sae}, %zmm28, %zmm22
+
+// CHECK: vgetmantps $123, (%rcx), %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x26,0x31,0x7b]
+ vgetmantps $0x7b, (%rcx), %zmm22
+
+// CHECK: vgetmantps $123, 291(%rax,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x26,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantps $0x7b, 291(%rax,%r14,8), %zmm22
+
+// CHECK: vgetmantps $123, (%rcx){1to16}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x26,0x31,0x7b]
+ vgetmantps $0x7b, (%rcx){1to16}, %zmm22
+
+// CHECK: vgetmantps $123, 8128(%rdx), %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x26,0x72,0x7f,0x7b]
+ vgetmantps $0x7b, 8128(%rdx), %zmm22
+
+// CHECK: vgetmantps $123, 8192(%rdx), %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x26,0xb2,0x00,0x20,0x00,0x00,0x7b]
+ vgetmantps $0x7b, 8192(%rdx), %zmm22
+
+// CHECK: vgetmantps $123, -8192(%rdx), %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x26,0x72,0x80,0x7b]
+ vgetmantps $0x7b, -8192(%rdx), %zmm22
+
+// CHECK: vgetmantps $123, -8256(%rdx), %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x26,0xb2,0xc0,0xdf,0xff,0xff,0x7b]
+ vgetmantps $0x7b, -8256(%rdx), %zmm22
+
+// CHECK: vgetmantps $123, 508(%rdx){1to16}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x26,0x72,0x7f,0x7b]
+ vgetmantps $0x7b, 508(%rdx){1to16}, %zmm22
+
+// CHECK: vgetmantps $123, 512(%rdx){1to16}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x26,0xb2,0x00,0x02,0x00,0x00,0x7b]
+ vgetmantps $0x7b, 512(%rdx){1to16}, %zmm22
+
+// CHECK: vgetmantps $123, -512(%rdx){1to16}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x26,0x72,0x80,0x7b]
+ vgetmantps $0x7b, -512(%rdx){1to16}, %zmm22
+
+// CHECK: vgetmantps $123, -516(%rdx){1to16}, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x26,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+ vgetmantps $0x7b, -516(%rdx){1to16}, %zmm22
+
+// CHECK: vgetmantpd $171, %zmm26, %zmm2
+// CHECK: encoding: [0x62,0x93,0xfd,0x48,0x26,0xd2,0xab]
+ vgetmantpd $0xab, %zmm26, %zmm2
+
+// CHECK: vgetmantpd $171, %zmm26, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0x93,0xfd,0x4f,0x26,0xd2,0xab]
+ vgetmantpd $0xab, %zmm26, %zmm2 {%k7}
+
+// CHECK: vgetmantpd $171, %zmm26, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0x93,0xfd,0xcf,0x26,0xd2,0xab]
+ vgetmantpd $0xab, %zmm26, %zmm2 {%k7} {z}
+
+// CHECK: vgetmantpd $171,{sae}, %zmm26, %zmm2
+// CHECK: encoding: [0x62,0x93,0xfd,0x18,0x26,0xd2,0xab]
+ vgetmantpd $0xab,{sae}, %zmm26, %zmm2
+
+// CHECK: vgetmantpd $123, %zmm26, %zmm2
+// CHECK: encoding: [0x62,0x93,0xfd,0x48,0x26,0xd2,0x7b]
+ vgetmantpd $0x7b, %zmm26, %zmm2
+
+// CHECK: vgetmantpd $123,{sae}, %zmm26, %zmm2
+// CHECK: encoding: [0x62,0x93,0xfd,0x18,0x26,0xd2,0x7b]
+ vgetmantpd $0x7b,{sae}, %zmm26, %zmm2
+
+// CHECK: vgetmantpd $123, (%rcx), %zmm2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x26,0x11,0x7b]
+ vgetmantpd $0x7b, (%rcx), %zmm2
+
+// CHECK: vgetmantpd $123, 291(%rax,%r14,8), %zmm2
+// CHECK: encoding: [0x62,0xb3,0xfd,0x48,0x26,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpd $0x7b, 291(%rax,%r14,8), %zmm2
+
+// CHECK: vgetmantpd $123, (%rcx){1to8}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x26,0x11,0x7b]
+ vgetmantpd $0x7b, (%rcx){1to8}, %zmm2
+
+// CHECK: vgetmantpd $123, 8128(%rdx), %zmm2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x26,0x52,0x7f,0x7b]
+ vgetmantpd $0x7b, 8128(%rdx), %zmm2
+
+// CHECK: vgetmantpd $123, 8192(%rdx), %zmm2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x26,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vgetmantpd $0x7b, 8192(%rdx), %zmm2
+
+// CHECK: vgetmantpd $123, -8192(%rdx), %zmm2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x26,0x52,0x80,0x7b]
+ vgetmantpd $0x7b, -8192(%rdx), %zmm2
+
+// CHECK: vgetmantpd $123, -8256(%rdx), %zmm2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x26,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vgetmantpd $0x7b, -8256(%rdx), %zmm2
+
+// CHECK: vgetmantpd $123, 1016(%rdx){1to8}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x26,0x52,0x7f,0x7b]
+ vgetmantpd $0x7b, 1016(%rdx){1to8}, %zmm2
+
+// CHECK: vgetmantpd $123, 1024(%rdx){1to8}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x26,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vgetmantpd $0x7b, 1024(%rdx){1to8}, %zmm2
+
+// CHECK: vgetmantpd $123, -1024(%rdx){1to8}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x26,0x52,0x80,0x7b]
+ vgetmantpd $0x7b, -1024(%rdx){1to8}, %zmm2
+
+// CHECK: vgetmantpd $123, -1032(%rdx){1to8}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x26,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vgetmantpd $0x7b, -1032(%rdx){1to8}, %zmm2
+
+// CHECK: vshufps $171, %zmm9, %zmm6, %zmm5
+// CHECK: encoding: [0x62,0xd1,0x4c,0x48,0xc6,0xe9,0xab]
+ vshufps $0xab, %zmm9, %zmm6, %zmm5
+
+// CHECK: vshufps $171, %zmm9, %zmm6, %zmm5 {%k6}
+// CHECK: encoding: [0x62,0xd1,0x4c,0x4e,0xc6,0xe9,0xab]
+ vshufps $0xab, %zmm9, %zmm6, %zmm5 {%k6}
+
+// CHECK: vshufps $171, %zmm9, %zmm6, %zmm5 {%k6} {z}
+// CHECK: encoding: [0x62,0xd1,0x4c,0xce,0xc6,0xe9,0xab]
+ vshufps $0xab, %zmm9, %zmm6, %zmm5 {%k6} {z}
+
+// CHECK: vshufps $123, %zmm9, %zmm6, %zmm5
+// CHECK: encoding: [0x62,0xd1,0x4c,0x48,0xc6,0xe9,0x7b]
+ vshufps $0x7b, %zmm9, %zmm6, %zmm5
+
+// CHECK: vshufps $123, (%rcx), %zmm6, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x4c,0x48,0xc6,0x29,0x7b]
+ vshufps $0x7b, (%rcx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, 291(%rax,%r14,8), %zmm6, %zmm5
+// CHECK: encoding: [0x62,0xb1,0x4c,0x48,0xc6,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshufps $0x7b, 291(%rax,%r14,8), %zmm6, %zmm5
+
+// CHECK: vshufps $123, (%rcx){1to16}, %zmm6, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x4c,0x58,0xc6,0x29,0x7b]
+ vshufps $0x7b, (%rcx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufps $123, 8128(%rdx), %zmm6, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x4c,0x48,0xc6,0x6a,0x7f,0x7b]
+ vshufps $0x7b, 8128(%rdx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, 8192(%rdx), %zmm6, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x4c,0x48,0xc6,0xaa,0x00,0x20,0x00,0x00,0x7b]
+ vshufps $0x7b, 8192(%rdx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, -8192(%rdx), %zmm6, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x4c,0x48,0xc6,0x6a,0x80,0x7b]
+ vshufps $0x7b, -8192(%rdx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, -8256(%rdx), %zmm6, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x4c,0x48,0xc6,0xaa,0xc0,0xdf,0xff,0xff,0x7b]
+ vshufps $0x7b, -8256(%rdx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, 508(%rdx){1to16}, %zmm6, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x4c,0x58,0xc6,0x6a,0x7f,0x7b]
+ vshufps $0x7b, 508(%rdx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufps $123, 512(%rdx){1to16}, %zmm6, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x4c,0x58,0xc6,0xaa,0x00,0x02,0x00,0x00,0x7b]
+ vshufps $0x7b, 512(%rdx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufps $123, -512(%rdx){1to16}, %zmm6, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x4c,0x58,0xc6,0x6a,0x80,0x7b]
+ vshufps $0x7b, -512(%rdx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufps $123, -516(%rdx){1to16}, %zmm6, %zmm5
+// CHECK: encoding: [0x62,0xf1,0x4c,0x58,0xc6,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+ vshufps $0x7b, -516(%rdx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufpd $171, %zmm22, %zmm8, %zmm28
+// CHECK: encoding: [0x62,0x21,0xbd,0x48,0xc6,0xe6,0xab]
+ vshufpd $0xab, %zmm22, %zmm8, %zmm28
+
+// CHECK: vshufpd $171, %zmm22, %zmm8, %zmm28 {%k2}
+// CHECK: encoding: [0x62,0x21,0xbd,0x4a,0xc6,0xe6,0xab]
+ vshufpd $0xab, %zmm22, %zmm8, %zmm28 {%k2}
+
+// CHECK: vshufpd $171, %zmm22, %zmm8, %zmm28 {%k2} {z}
+// CHECK: encoding: [0x62,0x21,0xbd,0xca,0xc6,0xe6,0xab]
+ vshufpd $0xab, %zmm22, %zmm8, %zmm28 {%k2} {z}
+
+// CHECK: vshufpd $123, %zmm22, %zmm8, %zmm28
+// CHECK: encoding: [0x62,0x21,0xbd,0x48,0xc6,0xe6,0x7b]
+ vshufpd $0x7b, %zmm22, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, (%rcx), %zmm8, %zmm28
+// CHECK: encoding: [0x62,0x61,0xbd,0x48,0xc6,0x21,0x7b]
+ vshufpd $0x7b, (%rcx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 291(%rax,%r14,8), %zmm8, %zmm28
+// CHECK: encoding: [0x62,0x21,0xbd,0x48,0xc6,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshufpd $0x7b, 291(%rax,%r14,8), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, (%rcx){1to8}, %zmm8, %zmm28
+// CHECK: encoding: [0x62,0x61,0xbd,0x58,0xc6,0x21,0x7b]
+ vshufpd $0x7b, (%rcx){1to8}, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 8128(%rdx), %zmm8, %zmm28
+// CHECK: encoding: [0x62,0x61,0xbd,0x48,0xc6,0x62,0x7f,0x7b]
+ vshufpd $0x7b, 8128(%rdx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 8192(%rdx), %zmm8, %zmm28
+// CHECK: encoding: [0x62,0x61,0xbd,0x48,0xc6,0xa2,0x00,0x20,0x00,0x00,0x7b]
+ vshufpd $0x7b, 8192(%rdx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, -8192(%rdx), %zmm8, %zmm28
+// CHECK: encoding: [0x62,0x61,0xbd,0x48,0xc6,0x62,0x80,0x7b]
+ vshufpd $0x7b, -8192(%rdx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, -8256(%rdx), %zmm8, %zmm28
+// CHECK: encoding: [0x62,0x61,0xbd,0x48,0xc6,0xa2,0xc0,0xdf,0xff,0xff,0x7b]
+ vshufpd $0x7b, -8256(%rdx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 1016(%rdx){1to8}, %zmm8, %zmm28
+// CHECK: encoding: [0x62,0x61,0xbd,0x58,0xc6,0x62,0x7f,0x7b]
+ vshufpd $0x7b, 1016(%rdx){1to8}, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 1024(%rdx){1to8}, %zmm8, %zmm28
+// CHECK: encoding: [0x62,0x61,0xbd,0x58,0xc6,0xa2,0x00,0x04,0x00,0x00,0x7b]
+ vshufpd $0x7b, 1024(%rdx){1to8}, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, -1024(%rdx){1to8}, %zmm8, %zmm28
+// CHECK: encoding: [0x62,0x61,0xbd,0x58,0xc6,0x62,0x80,0x7b]
+ vshufpd $0x7b, -1024(%rdx){1to8}, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, -1032(%rdx){1to8}, %zmm8, %zmm28
+// CHECK: encoding: [0x62,0x61,0xbd,0x58,0xc6,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+ vshufpd $0x7b, -1032(%rdx){1to8}, %zmm8, %zmm28
+
+// CHECK: kortestw %k6, %k2
+// CHECK: encoding: [0xc5,0xf8,0x98,0xd6]
+ kortestw %k6, %k2
+
+// CHECK: vscatterqps %ymm6, 123(%r14,%zmm27,8) {%k1}
+// CHECK: encoding: [0x62,0x92,0x7d,0x41,0xa3,0xb4,0xde,0x7b,0x00,0x00,0x00]
+ vscatterqps %ymm6, 123(%r14,%zmm27,8) {%k1}
+
+// CHECK: vscatterqps %ymm6, 123(%r14,%zmm27,8) {%k1}
+// CHECK: encoding: [0x62,0x92,0x7d,0x41,0xa3,0xb4,0xde,0x7b,0x00,0x00,0x00]
+ vscatterqps %ymm6, 123(%r14,%zmm27,8) {%k1}
+
+// CHECK: vscatterqps %ymm6, 256(%r9,%zmm27) {%k1}
+// CHECK: encoding: [0x62,0x92,0x7d,0x41,0xa3,0x74,0x19,0x40]
+ vscatterqps %ymm6, 256(%r9,%zmm27) {%k1}
+
+// CHECK: vscatterqps %ymm6, 1024(%rcx,%zmm27,4) {%k1}
+// CHECK: encoding: [0x62,0xb2,0x7d,0x41,0xa3,0xb4,0x99,0x00,0x04,0x00,0x00]
+ vscatterqps %ymm6, 1024(%rcx,%zmm27,4) {%k1}
+
+// CHECK: vscatterqpd %zmm22, 123(%r14,%zmm28,8) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x41,0xa3,0xb4,0xe6,0x7b,0x00,0x00,0x00]
+ vscatterqpd %zmm22, 123(%r14,%zmm28,8) {%k1}
+
+// CHECK: vscatterqpd %zmm22, 123(%r14,%zmm28,8) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x41,0xa3,0xb4,0xe6,0x7b,0x00,0x00,0x00]
+ vscatterqpd %zmm22, 123(%r14,%zmm28,8) {%k1}
+
+// CHECK: vscatterqpd %zmm22, 256(%r9,%zmm28) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x41,0xa3,0x74,0x21,0x20]
+ vscatterqpd %zmm22, 256(%r9,%zmm28) {%k1}
+
+// CHECK: vscatterqpd %zmm22, 1024(%rcx,%zmm28,4) {%k1}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x41,0xa3,0xb4,0xa1,0x00,0x04,0x00,0x00]
+ vscatterqpd %zmm22, 1024(%rcx,%zmm28,4) {%k1}
+
+// CHECK: vscatterdps %zmm17, 123(%r14,%zmm19,8) {%k1}
+// CHECK: encoding: [0x62,0xc2,0x7d,0x41,0xa2,0x8c,0xde,0x7b,0x00,0x00,0x00]
+ vscatterdps %zmm17, 123(%r14, %zmm19,8) {%k1}
+
+// CHECK: vscatterdps %zmm17, 123(%r14,%zmm19,8) {%k1}
+// CHECK: encoding: [0x62,0xc2,0x7d,0x41,0xa2,0x8c,0xde,0x7b,0x00,0x00,0x00]
+ vscatterdps %zmm17, 123(%r14, %zmm19,8) {%k1}
+
+// CHECK: vscatterdps %zmm17, 256(%r9,%zmm19) {%k1}
+// CHECK: encoding: [0x62,0xc2,0x7d,0x41,0xa2,0x4c,0x19,0x40]
+ vscatterdps %zmm17, 256(%r9, %zmm19) {%k1}
+
+// CHECK: vscatterdps %zmm17, 1024(%rcx,%zmm19,4) {%k1}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x41,0xa2,0x8c,0x99,0x00,0x04,0x00,0x00]
+ vscatterdps %zmm17, 1024(%rcx, %zmm19,4) {%k1}
+
+// CHECK: vscatterdpd %zmm18, 123(%r14,%ymm24,8) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x41,0xa2,0x94,0xc6,0x7b,0x00,0x00,0x00]
+ vscatterdpd %zmm18, 123(%r14, %ymm24,8) {%k1}
+
+// CHECK: vscatterdpd %zmm18, 123(%r14,%ymm24,8) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x41,0xa2,0x94,0xc6,0x7b,0x00,0x00,0x00]
+ vscatterdpd %zmm18, 123(%r14, %ymm24,8) {%k1}
+
+// CHECK: vscatterdpd %zmm18, 256(%r9,%ymm24) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x41,0xa2,0x54,0x01,0x20]
+ vscatterdpd %zmm18, 256(%r9, %ymm24) {%k1}
+
+// CHECK: vscatterdpd %zmm18, 1024(%rcx,%ymm24,4) {%k1}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x41,0xa2,0x94,0x81,0x00,0x04,0x00,0x00]
+ vscatterdpd %zmm18, 1024(%rcx, %ymm24,4) {%k1}
+
+// CHECK: vpermilps $171, %zmm22, %zmm2
+// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x04,0xd6,0xab]
+ vpermilps $0xab, %zmm22, %zmm2
+
+// CHECK: vpermilps $171, %zmm22, %zmm2 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x7d,0x4a,0x04,0xd6,0xab]
+ vpermilps $0xab, %zmm22, %zmm2 {%k2}
+
+// CHECK: vpermilps $171, %zmm22, %zmm2 {%k2} {z}
+// CHECK: encoding: [0x62,0xb3,0x7d,0xca,0x04,0xd6,0xab]
+ vpermilps $0xab, %zmm22, %zmm2 {%k2} {z}
+
+// CHECK: vpermilps $123, %zmm22, %zmm2
+// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x04,0xd6,0x7b]
+ vpermilps $0x7b, %zmm22, %zmm2
+
+// CHECK: vpermilps $123, (%rcx), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x11,0x7b]
+ vpermilps $0x7b, (%rcx), %zmm2
+
+// CHECK: vpermilps $123, 291(%rax,%r14,8), %zmm2
+// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x04,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpermilps $0x7b, 291(%rax,%r14,8), %zmm2
+
+// CHECK: vpermilps $123, (%rcx){1to16}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x11,0x7b]
+ vpermilps $0x7b, (%rcx){1to16}, %zmm2
+
+// CHECK: vpermilps $123, 8128(%rdx), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x52,0x7f,0x7b]
+ vpermilps $0x7b, 8128(%rdx), %zmm2
+
+// CHECK: vpermilps $123, 8192(%rdx), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vpermilps $0x7b, 8192(%rdx), %zmm2
+
+// CHECK: vpermilps $123, -8192(%rdx), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x52,0x80,0x7b]
+ vpermilps $0x7b, -8192(%rdx), %zmm2
+
+// CHECK: vpermilps $123, -8256(%rdx), %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x04,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vpermilps $0x7b, -8256(%rdx), %zmm2
+
+// CHECK: vpermilps $123, 508(%rdx){1to16}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x52,0x7f,0x7b]
+ vpermilps $0x7b, 508(%rdx){1to16}, %zmm2
+
+// CHECK: vpermilps $123, 512(%rdx){1to16}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x92,0x00,0x02,0x00,0x00,0x7b]
+ vpermilps $0x7b, 512(%rdx){1to16}, %zmm2
+
+// CHECK: vpermilps $123, -512(%rdx){1to16}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x52,0x80,0x7b]
+ vpermilps $0x7b, -512(%rdx){1to16}, %zmm2
+
+// CHECK: vpermilps $123, -516(%rdx){1to16}, %zmm2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x04,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+ vpermilps $0x7b, -516(%rdx){1to16}, %zmm2
+
+// CHECK: vpermilps %zmm2, %zmm20, %zmm13
+// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0xea]
+ vpermilps %zmm2, %zmm20, %zmm13
+
+// CHECK: vpermilps %zmm2, %zmm20, %zmm13 {%k1}
+// CHECK: encoding: [0x62,0x72,0x5d,0x41,0x0c,0xea]
+ vpermilps %zmm2, %zmm20, %zmm13 {%k1}
+
+// CHECK: vpermilps %zmm2, %zmm20, %zmm13 {%k1} {z}
+// CHECK: encoding: [0x62,0x72,0x5d,0xc1,0x0c,0xea]
+ vpermilps %zmm2, %zmm20, %zmm13 {%k1} {z}
+
+// CHECK: vpermilps (%rcx), %zmm20, %zmm13
+// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0x29]
+ vpermilps (%rcx), %zmm20, %zmm13
+
+// CHECK: vpermilps 291(%rax,%r14,8), %zmm20, %zmm13
+// CHECK: encoding: [0x62,0x32,0x5d,0x40,0x0c,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpermilps 291(%rax,%r14,8), %zmm20, %zmm13
+
+// CHECK: vpermilps (%rcx){1to16}, %zmm20, %zmm13
+// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0x29]
+ vpermilps (%rcx){1to16}, %zmm20, %zmm13
+
+// CHECK: vpermilps 8128(%rdx), %zmm20, %zmm13
+// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0x6a,0x7f]
+ vpermilps 8128(%rdx), %zmm20, %zmm13
+
+// CHECK: vpermilps 8192(%rdx), %zmm20, %zmm13
+// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0xaa,0x00,0x20,0x00,0x00]
+ vpermilps 8192(%rdx), %zmm20, %zmm13
+
+// CHECK: vpermilps -8192(%rdx), %zmm20, %zmm13
+// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0x6a,0x80]
+ vpermilps -8192(%rdx), %zmm20, %zmm13
+
+// CHECK: vpermilps -8256(%rdx), %zmm20, %zmm13
+// CHECK: encoding: [0x62,0x72,0x5d,0x40,0x0c,0xaa,0xc0,0xdf,0xff,0xff]
+ vpermilps -8256(%rdx), %zmm20, %zmm13
+
+// CHECK: vpermilps 508(%rdx){1to16}, %zmm20, %zmm13
+// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0x6a,0x7f]
+ vpermilps 508(%rdx){1to16}, %zmm20, %zmm13
+
+// CHECK: vpermilps 512(%rdx){1to16}, %zmm20, %zmm13
+// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0xaa,0x00,0x02,0x00,0x00]
+ vpermilps 512(%rdx){1to16}, %zmm20, %zmm13
+
+// CHECK: vpermilps -512(%rdx){1to16}, %zmm20, %zmm13
+// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0x6a,0x80]
+ vpermilps -512(%rdx){1to16}, %zmm20, %zmm13
+
+// CHECK: vpermilps -516(%rdx){1to16}, %zmm20, %zmm13
+// CHECK: encoding: [0x62,0x72,0x5d,0x50,0x0c,0xaa,0xfc,0xfd,0xff,0xff]
+ vpermilps -516(%rdx){1to16}, %zmm20, %zmm13
+
+// CHECK: vpermilpd $171, %zmm4, %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0xdc,0xab]
+ vpermilpd $0xab, %zmm4, %zmm19
+
+// CHECK: vpermilpd $171, %zmm4, %zmm19 {%k1}
+// CHECK: encoding: [0x62,0xe3,0xfd,0x49,0x05,0xdc,0xab]
+ vpermilpd $0xab, %zmm4, %zmm19 {%k1}
+
+// CHECK: vpermilpd $171, %zmm4, %zmm19 {%k1} {z}
+// CHECK: encoding: [0x62,0xe3,0xfd,0xc9,0x05,0xdc,0xab]
+ vpermilpd $0xab, %zmm4, %zmm19 {%k1} {z}
+
+// CHECK: vpermilpd $123, %zmm4, %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0xdc,0x7b]
+ vpermilpd $0x7b, %zmm4, %zmm19
+
+// CHECK: vpermilpd $123, (%rcx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x19,0x7b]
+ vpermilpd $0x7b, (%rcx), %zmm19
+
+// CHECK: vpermilpd $123, 291(%rax,%r14,8), %zmm19
+// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x05,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpermilpd $0x7b, 291(%rax,%r14,8), %zmm19
+
+// CHECK: vpermilpd $123, (%rcx){1to8}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x19,0x7b]
+ vpermilpd $0x7b, (%rcx){1to8}, %zmm19
+
+// CHECK: vpermilpd $123, 8128(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x5a,0x7f,0x7b]
+ vpermilpd $0x7b, 8128(%rdx), %zmm19
+
+// CHECK: vpermilpd $123, 8192(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x9a,0x00,0x20,0x00,0x00,0x7b]
+ vpermilpd $0x7b, 8192(%rdx), %zmm19
+
+// CHECK: vpermilpd $123, -8192(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x5a,0x80,0x7b]
+ vpermilpd $0x7b, -8192(%rdx), %zmm19
+
+// CHECK: vpermilpd $123, -8256(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x05,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+ vpermilpd $0x7b, -8256(%rdx), %zmm19
+
+// CHECK: vpermilpd $123, 1016(%rdx){1to8}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x5a,0x7f,0x7b]
+ vpermilpd $0x7b, 1016(%rdx){1to8}, %zmm19
+
+// CHECK: vpermilpd $123, 1024(%rdx){1to8}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ vpermilpd $0x7b, 1024(%rdx){1to8}, %zmm19
+
+// CHECK: vpermilpd $123, -1024(%rdx){1to8}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x5a,0x80,0x7b]
+ vpermilpd $0x7b, -1024(%rdx){1to8}, %zmm19
+
+// CHECK: vpermilpd $123, -1032(%rdx){1to8}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x05,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ vpermilpd $0x7b, -1032(%rdx){1to8}, %zmm19
+
+// CHECK: vpermilpd %zmm21, %zmm26, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xad,0x40,0x0d,0xcd]
+ vpermilpd %zmm21, %zmm26, %zmm1
+
+// CHECK: vpermilpd %zmm21, %zmm26, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xad,0x42,0x0d,0xcd]
+ vpermilpd %zmm21, %zmm26, %zmm1 {%k2}
+
+// CHECK: vpermilpd %zmm21, %zmm26, %zmm1 {%k2} {z}
+// CHECK: encoding: [0x62,0xb2,0xad,0xc2,0x0d,0xcd]
+ vpermilpd %zmm21, %zmm26, %zmm1 {%k2} {z}
+
+// CHECK: vpermilpd (%rcx), %zmm26, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x09]
+ vpermilpd (%rcx), %zmm26, %zmm1
+
+// CHECK: vpermilpd 291(%rax,%r14,8), %zmm26, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xad,0x40,0x0d,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpermilpd 291(%rax,%r14,8), %zmm26, %zmm1
+
+// CHECK: vpermilpd (%rcx){1to8}, %zmm26, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x09]
+ vpermilpd (%rcx){1to8}, %zmm26, %zmm1
+
+// CHECK: vpermilpd 8128(%rdx), %zmm26, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x4a,0x7f]
+ vpermilpd 8128(%rdx), %zmm26, %zmm1
+
+// CHECK: vpermilpd 8192(%rdx), %zmm26, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x8a,0x00,0x20,0x00,0x00]
+ vpermilpd 8192(%rdx), %zmm26, %zmm1
+
+// CHECK: vpermilpd -8192(%rdx), %zmm26, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x4a,0x80]
+ vpermilpd -8192(%rdx), %zmm26, %zmm1
+
+// CHECK: vpermilpd -8256(%rdx), %zmm26, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xad,0x40,0x0d,0x8a,0xc0,0xdf,0xff,0xff]
+ vpermilpd -8256(%rdx), %zmm26, %zmm1
+
+// CHECK: vpermilpd 1016(%rdx){1to8}, %zmm26, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x4a,0x7f]
+ vpermilpd 1016(%rdx){1to8}, %zmm26, %zmm1
+
+// CHECK: vpermilpd 1024(%rdx){1to8}, %zmm26, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x8a,0x00,0x04,0x00,0x00]
+ vpermilpd 1024(%rdx){1to8}, %zmm26, %zmm1
+
+// CHECK: vpermilpd -1024(%rdx){1to8}, %zmm26, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x4a,0x80]
+ vpermilpd -1024(%rdx){1to8}, %zmm26, %zmm1
+
+// CHECK: vpermilpd -1032(%rdx){1to8}, %zmm26, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xad,0x50,0x0d,0x8a,0xf8,0xfb,0xff,0xff]
+ vpermilpd -1032(%rdx){1to8}, %zmm26, %zmm1
+
+// CHECK: vcvtpd2dq %zmm15, %ymm24
+// CHECK: encoding: [0x62,0x41,0xff,0x48,0xe6,0xc7]
+ vcvtpd2dq %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq %zmm15, %ymm24 {%k3}
+// CHECK: encoding: [0x62,0x41,0xff,0x4b,0xe6,0xc7]
+ vcvtpd2dq %zmm15, %ymm24 {%k3}
+
+// CHECK: vcvtpd2dq %zmm15, %ymm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x41,0xff,0xcb,0xe6,0xc7]
+ vcvtpd2dq %zmm15, %ymm24 {%k3} {z}
+
+// CHECK: vcvtpd2dq {rn-sae}, %zmm15, %ymm24
+// CHECK: encoding: [0x62,0x41,0xff,0x18,0xe6,0xc7]
+ vcvtpd2dq {rn-sae}, %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq {ru-sae}, %zmm15, %ymm24
+// CHECK: encoding: [0x62,0x41,0xff,0x58,0xe6,0xc7]
+ vcvtpd2dq {ru-sae}, %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq {rd-sae}, %zmm15, %ymm24
+// CHECK: encoding: [0x62,0x41,0xff,0x38,0xe6,0xc7]
+ vcvtpd2dq {rd-sae}, %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq {rz-sae}, %zmm15, %ymm24
+// CHECK: encoding: [0x62,0x41,0xff,0x78,0xe6,0xc7]
+ vcvtpd2dq {rz-sae}, %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq (%rcx), %ymm24
+// CHECK: encoding: [0x62,0x61,0xff,0x48,0xe6,0x01]
+ vcvtpd2dq (%rcx), %ymm24
+
+// CHECK: vcvtpd2dq 291(%rax,%r14,8), %ymm24
+// CHECK: encoding: [0x62,0x21,0xff,0x48,0xe6,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvtpd2dq 291(%rax,%r14,8), %ymm24
+
+// CHECK: vcvtpd2dq (%rcx){1to8}, %ymm24
+// CHECK: encoding: [0x62,0x61,0xff,0x58,0xe6,0x01]
+ vcvtpd2dq (%rcx){1to8}, %ymm24
+
+// CHECK: vcvtpd2dq 8128(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0xff,0x48,0xe6,0x42,0x7f]
+ vcvtpd2dq 8128(%rdx), %ymm24
+
+// CHECK: vcvtpd2dq 8192(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0xff,0x48,0xe6,0x82,0x00,0x20,0x00,0x00]
+ vcvtpd2dq 8192(%rdx), %ymm24
+
+// CHECK: vcvtpd2dq -8192(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0xff,0x48,0xe6,0x42,0x80]
+ vcvtpd2dq -8192(%rdx), %ymm24
+
+// CHECK: vcvtpd2dq -8256(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0xff,0x48,0xe6,0x82,0xc0,0xdf,0xff,0xff]
+ vcvtpd2dq -8256(%rdx), %ymm24
+
+// CHECK: vcvtpd2dq 1016(%rdx){1to8}, %ymm24
+// CHECK: encoding: [0x62,0x61,0xff,0x58,0xe6,0x42,0x7f]
+ vcvtpd2dq 1016(%rdx){1to8}, %ymm24
+
+// CHECK: vcvtpd2dq 1024(%rdx){1to8}, %ymm24
+// CHECK: encoding: [0x62,0x61,0xff,0x58,0xe6,0x82,0x00,0x04,0x00,0x00]
+ vcvtpd2dq 1024(%rdx){1to8}, %ymm24
+
+// CHECK: vcvtpd2dq -1024(%rdx){1to8}, %ymm24
+// CHECK: encoding: [0x62,0x61,0xff,0x58,0xe6,0x42,0x80]
+ vcvtpd2dq -1024(%rdx){1to8}, %ymm24
+
+// CHECK: vcvtpd2dq -1032(%rdx){1to8}, %ymm24
+// CHECK: encoding: [0x62,0x61,0xff,0x58,0xe6,0x82,0xf8,0xfb,0xff,0xff]
+ vcvtpd2dq -1032(%rdx){1to8}, %ymm24
+
+// CHECK: vcvtpd2udq %zmm19, %ymm15
+// CHECK: encoding: [0x62,0x31,0xfc,0x48,0x79,0xfb]
+ vcvtpd2udq %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq %zmm19, %ymm15 {%k7}
+// CHECK: encoding: [0x62,0x31,0xfc,0x4f,0x79,0xfb]
+ vcvtpd2udq %zmm19, %ymm15 {%k7}
+
+// CHECK: vcvtpd2udq %zmm19, %ymm15 {%k7} {z}
+// CHECK: encoding: [0x62,0x31,0xfc,0xcf,0x79,0xfb]
+ vcvtpd2udq %zmm19, %ymm15 {%k7} {z}
+
+// CHECK: vcvtpd2udq {rn-sae}, %zmm19, %ymm15
+// CHECK: encoding: [0x62,0x31,0xfc,0x18,0x79,0xfb]
+ vcvtpd2udq {rn-sae}, %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq {ru-sae}, %zmm19, %ymm15
+// CHECK: encoding: [0x62,0x31,0xfc,0x58,0x79,0xfb]
+ vcvtpd2udq {ru-sae}, %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq {rd-sae}, %zmm19, %ymm15
+// CHECK: encoding: [0x62,0x31,0xfc,0x38,0x79,0xfb]
+ vcvtpd2udq {rd-sae}, %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq {rz-sae}, %zmm19, %ymm15
+// CHECK: encoding: [0x62,0x31,0xfc,0x78,0x79,0xfb]
+ vcvtpd2udq {rz-sae}, %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq (%rcx), %ymm15
+// CHECK: encoding: [0x62,0x71,0xfc,0x48,0x79,0x39]
+ vcvtpd2udq (%rcx), %ymm15
+
+// CHECK: vcvtpd2udq 291(%rax,%r14,8), %ymm15
+// CHECK: encoding: [0x62,0x31,0xfc,0x48,0x79,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vcvtpd2udq 291(%rax,%r14,8), %ymm15
+
+// CHECK: vcvtpd2udq (%rcx){1to8}, %ymm15
+// CHECK: encoding: [0x62,0x71,0xfc,0x58,0x79,0x39]
+ vcvtpd2udq (%rcx){1to8}, %ymm15
+
+// CHECK: vcvtpd2udq 8128(%rdx), %ymm15
+// CHECK: encoding: [0x62,0x71,0xfc,0x48,0x79,0x7a,0x7f]
+ vcvtpd2udq 8128(%rdx), %ymm15
+
+// CHECK: vcvtpd2udq 8192(%rdx), %ymm15
+// CHECK: encoding: [0x62,0x71,0xfc,0x48,0x79,0xba,0x00,0x20,0x00,0x00]
+ vcvtpd2udq 8192(%rdx), %ymm15
+
+// CHECK: vcvtpd2udq -8192(%rdx), %ymm15
+// CHECK: encoding: [0x62,0x71,0xfc,0x48,0x79,0x7a,0x80]
+ vcvtpd2udq -8192(%rdx), %ymm15
+
+// CHECK: vcvtpd2udq -8256(%rdx), %ymm15
+// CHECK: encoding: [0x62,0x71,0xfc,0x48,0x79,0xba,0xc0,0xdf,0xff,0xff]
+ vcvtpd2udq -8256(%rdx), %ymm15
+
+// CHECK: vcvtpd2udq 1016(%rdx){1to8}, %ymm15
+// CHECK: encoding: [0x62,0x71,0xfc,0x58,0x79,0x7a,0x7f]
+ vcvtpd2udq 1016(%rdx){1to8}, %ymm15
+
+// CHECK: vcvtpd2udq 1024(%rdx){1to8}, %ymm15
+// CHECK: encoding: [0x62,0x71,0xfc,0x58,0x79,0xba,0x00,0x04,0x00,0x00]
+ vcvtpd2udq 1024(%rdx){1to8}, %ymm15
+
+// CHECK: vcvtpd2udq -1024(%rdx){1to8}, %ymm15
+// CHECK: encoding: [0x62,0x71,0xfc,0x58,0x79,0x7a,0x80]
+ vcvtpd2udq -1024(%rdx){1to8}, %ymm15
+
+// CHECK: vcvtpd2udq -1032(%rdx){1to8}, %ymm15
+// CHECK: encoding: [0x62,0x71,0xfc,0x58,0x79,0xba,0xf8,0xfb,0xff,0xff]
+ vcvtpd2udq -1032(%rdx){1to8}, %ymm15
+
+// CHECK: vcvttpd2udq %zmm20, %ymm16
+// CHECK: encoding: [0x62,0xa1,0xfc,0x48,0x78,0xc4]
+ vcvttpd2udq %zmm20, %ymm16
+
+// CHECK: vcvttpd2udq %zmm20, %ymm16 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xfc,0x4f,0x78,0xc4]
+ vcvttpd2udq %zmm20, %ymm16 {%k7}
+
+// CHECK: vcvttpd2udq %zmm20, %ymm16 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xfc,0xcf,0x78,0xc4]
+ vcvttpd2udq %zmm20, %ymm16 {%k7} {z}
+
+// CHECK: vcvttpd2udq {sae}, %zmm20, %ymm16
+// CHECK: encoding: [0x62,0xa1,0xfc,0x18,0x78,0xc4]
+ vcvttpd2udq {sae}, %zmm20, %ymm16
+
+// CHECK: vcvttpd2udq (%rcx), %ymm16
+// CHECK: encoding: [0x62,0xe1,0xfc,0x48,0x78,0x01]
+ vcvttpd2udq (%rcx), %ymm16
+
+// CHECK: vcvttpd2udq 291(%rax,%r14,8), %ymm16
+// CHECK: encoding: [0x62,0xa1,0xfc,0x48,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvttpd2udq 291(%rax,%r14,8), %ymm16
+
+// CHECK: vcvttpd2udq (%rcx){1to8}, %ymm16
+// CHECK: encoding: [0x62,0xe1,0xfc,0x58,0x78,0x01]
+ vcvttpd2udq (%rcx){1to8}, %ymm16
+
+// CHECK: vcvttpd2udq 8128(%rdx), %ymm16
+// CHECK: encoding: [0x62,0xe1,0xfc,0x48,0x78,0x42,0x7f]
+ vcvttpd2udq 8128(%rdx), %ymm16
+
+// CHECK: vcvttpd2udq 8192(%rdx), %ymm16
+// CHECK: encoding: [0x62,0xe1,0xfc,0x48,0x78,0x82,0x00,0x20,0x00,0x00]
+ vcvttpd2udq 8192(%rdx), %ymm16
+
+// CHECK: vcvttpd2udq -8192(%rdx), %ymm16
+// CHECK: encoding: [0x62,0xe1,0xfc,0x48,0x78,0x42,0x80]
+ vcvttpd2udq -8192(%rdx), %ymm16
+
+// CHECK: vcvttpd2udq -8256(%rdx), %ymm16
+// CHECK: encoding: [0x62,0xe1,0xfc,0x48,0x78,0x82,0xc0,0xdf,0xff,0xff]
+ vcvttpd2udq -8256(%rdx), %ymm16
+
+// CHECK: vcvttpd2udq 1016(%rdx){1to8}, %ymm16
+// CHECK: encoding: [0x62,0xe1,0xfc,0x58,0x78,0x42,0x7f]
+ vcvttpd2udq 1016(%rdx){1to8}, %ymm16
+
+// CHECK: vcvttpd2udq 1024(%rdx){1to8}, %ymm16
+// CHECK: encoding: [0x62,0xe1,0xfc,0x58,0x78,0x82,0x00,0x04,0x00,0x00]
+ vcvttpd2udq 1024(%rdx){1to8}, %ymm16
+
+// CHECK: vcvttpd2udq -1024(%rdx){1to8}, %ymm16
+// CHECK: encoding: [0x62,0xe1,0xfc,0x58,0x78,0x42,0x80]
+ vcvttpd2udq -1024(%rdx){1to8}, %ymm16
+
+// CHECK: vcvttpd2udq -1032(%rdx){1to8}, %ymm16
+// CHECK: encoding: [0x62,0xe1,0xfc,0x58,0x78,0x82,0xf8,0xfb,0xff,0xff]
+ vcvttpd2udq -1032(%rdx){1to8}, %ymm16
+
+// CHECK: vcvttpd2dq %zmm9, %ymm27
+// CHECK: encoding: [0x62,0x41,0xfd,0x48,0xe6,0xd9]
+ vcvttpd2dq %zmm9, %ymm27
+
+// CHECK: vcvttpd2dq %zmm9, %ymm27 {%k5}
+// CHECK: encoding: [0x62,0x41,0xfd,0x4d,0xe6,0xd9]
+ vcvttpd2dq %zmm9, %ymm27 {%k5}
+
+// CHECK: vcvttpd2dq %zmm9, %ymm27 {%k5} {z}
+// CHECK: encoding: [0x62,0x41,0xfd,0xcd,0xe6,0xd9]
+ vcvttpd2dq %zmm9, %ymm27 {%k5} {z}
+
+// CHECK: vcvttpd2dq {sae}, %zmm9, %ymm27
+// CHECK: encoding: [0x62,0x41,0xfd,0x18,0xe6,0xd9]
+ vcvttpd2dq {sae}, %zmm9, %ymm27
+
+// CHECK: vcvttpd2dq (%rcx), %ymm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x48,0xe6,0x19]
+ vcvttpd2dq (%rcx), %ymm27
+
+// CHECK: vcvttpd2dq 291(%rax,%r14,8), %ymm27
+// CHECK: encoding: [0x62,0x21,0xfd,0x48,0xe6,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vcvttpd2dq 291(%rax,%r14,8), %ymm27
+
+// CHECK: vcvttpd2dq (%rcx){1to8}, %ymm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x58,0xe6,0x19]
+ vcvttpd2dq (%rcx){1to8}, %ymm27
+
+// CHECK: vcvttpd2dq 8128(%rdx), %ymm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x48,0xe6,0x5a,0x7f]
+ vcvttpd2dq 8128(%rdx), %ymm27
+
+// CHECK: vcvttpd2dq 8192(%rdx), %ymm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x48,0xe6,0x9a,0x00,0x20,0x00,0x00]
+ vcvttpd2dq 8192(%rdx), %ymm27
+
+// CHECK: vcvttpd2dq -8192(%rdx), %ymm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x48,0xe6,0x5a,0x80]
+ vcvttpd2dq -8192(%rdx), %ymm27
+
+// CHECK: vcvttpd2dq -8256(%rdx), %ymm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x48,0xe6,0x9a,0xc0,0xdf,0xff,0xff]
+ vcvttpd2dq -8256(%rdx), %ymm27
+
+// CHECK: vcvttpd2dq 1016(%rdx){1to8}, %ymm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x58,0xe6,0x5a,0x7f]
+ vcvttpd2dq 1016(%rdx){1to8}, %ymm27
+
+// CHECK: vcvttpd2dq 1024(%rdx){1to8}, %ymm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x58,0xe6,0x9a,0x00,0x04,0x00,0x00]
+ vcvttpd2dq 1024(%rdx){1to8}, %ymm27
+
+// CHECK: vcvttpd2dq -1024(%rdx){1to8}, %ymm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x58,0xe6,0x5a,0x80]
+ vcvttpd2dq -1024(%rdx){1to8}, %ymm27
+
+// CHECK: vcvttpd2dq -1032(%rdx){1to8}, %ymm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x58,0xe6,0x9a,0xf8,0xfb,0xff,0xff]
+ vcvttpd2dq -1032(%rdx){1to8}, %ymm27
+
+// CHECK: vcvtsd2ss %xmm12, %xmm9, %xmm17
+// CHECK: encoding: [0x62,0xc1,0xb7,0x48,0x5a,0xcc]
+ vcvtsd2ss %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss %xmm12, %xmm9, %xmm17 {%k6}
+// CHECK: encoding: [0x62,0xc1,0xb7,0x4e,0x5a,0xcc]
+ vcvtsd2ss %xmm12, %xmm9, %xmm17 {%k6}
+
+// CHECK: vcvtsd2ss %xmm12, %xmm9, %xmm17 {%k6} {z}
+// CHECK: encoding: [0x62,0xc1,0xb7,0xce,0x5a,0xcc]
+ vcvtsd2ss %xmm12, %xmm9, %xmm17 {%k6} {z}
+
+// CHECK: vcvtsd2ss {rn-sae}, %xmm12, %xmm9, %xmm17
+// CHECK: encoding: [0x62,0xc1,0xb7,0x18,0x5a,0xcc]
+ vcvtsd2ss {rn-sae}, %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss {ru-sae}, %xmm12, %xmm9, %xmm17
+// CHECK: encoding: [0x62,0xc1,0xb7,0x58,0x5a,0xcc]
+ vcvtsd2ss {ru-sae}, %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss {rd-sae}, %xmm12, %xmm9, %xmm17
+// CHECK: encoding: [0x62,0xc1,0xb7,0x38,0x5a,0xcc]
+ vcvtsd2ss {rd-sae}, %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss {rz-sae}, %xmm12, %xmm9, %xmm17
+// CHECK: encoding: [0x62,0xc1,0xb7,0x78,0x5a,0xcc]
+ vcvtsd2ss {rz-sae}, %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss (%rcx), %xmm9, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x09]
+ vcvtsd2ss (%rcx), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss 291(%rax,%r14,8), %xmm9, %xmm17
+// CHECK: encoding: [0x62,0xa1,0xb7,0x48,0x5a,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vcvtsd2ss 291(%rax,%r14,8), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss 1016(%rdx), %xmm9, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x4a,0x7f]
+ vcvtsd2ss 1016(%rdx), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss 1024(%rdx), %xmm9, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x8a,0x00,0x04,0x00,0x00]
+ vcvtsd2ss 1024(%rdx), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss -1024(%rdx), %xmm9, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x4a,0x80]
+ vcvtsd2ss -1024(%rdx), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss -1032(%rdx), %xmm9, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x8a,0xf8,0xfb,0xff,0xff]
+ vcvtsd2ss -1032(%rdx), %xmm9, %xmm17
+
+// CHECK: vcvtss2sd %xmm6, %xmm6, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4e,0x48,0x5a,0xe6]
+ vcvtss2sd %xmm6, %xmm6, %xmm28
+
+// CHECK: vcvtss2sd %xmm6, %xmm6, %xmm28 {%k3}
+// CHECK: encoding: [0x62,0x61,0x4e,0x4b,0x5a,0xe6]
+ vcvtss2sd %xmm6, %xmm6, %xmm28 {%k3}
+
+// CHECK: vcvtss2sd %xmm6, %xmm6, %xmm28 {%k3} {z}
+// CHECK: encoding: [0x62,0x61,0x4e,0xcb,0x5a,0xe6]
+ vcvtss2sd %xmm6, %xmm6, %xmm28 {%k3} {z}
+
+// CHECK: vcvtss2sd {sae}, %xmm6, %xmm6, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4e,0x18,0x5a,0xe6]
+ vcvtss2sd {sae}, %xmm6, %xmm6, %xmm28
+
+// CHECK: vcvtss2sd (%rcx), %xmm6, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4e,0x48,0x5a,0x21]
+ vcvtss2sd (%rcx), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd 291(%rax,%r14,8), %xmm6, %xmm28
+// CHECK: encoding: [0x62,0x21,0x4e,0x48,0x5a,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vcvtss2sd 291(%rax,%r14,8), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd 508(%rdx), %xmm6, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4e,0x48,0x5a,0x62,0x7f]
+ vcvtss2sd 508(%rdx), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd 512(%rdx), %xmm6, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4e,0x48,0x5a,0xa2,0x00,0x02,0x00,0x00]
+ vcvtss2sd 512(%rdx), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd -512(%rdx), %xmm6, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4e,0x48,0x5a,0x62,0x80]
+ vcvtss2sd -512(%rdx), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd -516(%rdx), %xmm6, %xmm28
+// CHECK: encoding: [0x62,0x61,0x4e,0x48,0x5a,0xa2,0xfc,0xfd,0xff,0xff]
+ vcvtss2sd -516(%rdx), %xmm6, %xmm28
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm7, %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2d,0xc7]
+ vcvtsd2si {rn-sae}, %xmm7, %eax
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm7, %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x58,0x2d,0xc7]
+ vcvtsd2si {ru-sae}, %xmm7, %eax
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm7, %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x38,0x2d,0xc7]
+ vcvtsd2si {rd-sae}, %xmm7, %eax
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm7, %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x78,0x2d,0xc7]
+ vcvtsd2si {rz-sae}, %xmm7, %eax
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm7, %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2d,0xef]
+ vcvtsd2si {rn-sae}, %xmm7, %ebp
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm7, %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x58,0x2d,0xef]
+ vcvtsd2si {ru-sae}, %xmm7, %ebp
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm7, %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x38,0x2d,0xef]
+ vcvtsd2si {rd-sae}, %xmm7, %ebp
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm7, %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x78,0x2d,0xef]
+ vcvtsd2si {rz-sae}, %xmm7, %ebp
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm7, %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x18,0x2d,0xef]
+ vcvtsd2si {rn-sae}, %xmm7, %r13d
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm7, %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x58,0x2d,0xef]
+ vcvtsd2si {ru-sae}, %xmm7, %r13d
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm7, %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x38,0x2d,0xef]
+ vcvtsd2si {rd-sae}, %xmm7, %r13d
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm7, %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x78,0x2d,0xef]
+ vcvtsd2si {rz-sae}, %xmm7, %r13d
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm10, %rax
+// CHECK: encoding: [0x62,0xd1,0xff,0x18,0x2d,0xc2]
+ vcvtsd2si {rn-sae}, %xmm10, %rax
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm10, %rax
+// CHECK: encoding: [0x62,0xd1,0xff,0x58,0x2d,0xc2]
+ vcvtsd2si {ru-sae}, %xmm10, %rax
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm10, %rax
+// CHECK: encoding: [0x62,0xd1,0xff,0x38,0x2d,0xc2]
+ vcvtsd2si {rd-sae}, %xmm10, %rax
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm10, %rax
+// CHECK: encoding: [0x62,0xd1,0xff,0x78,0x2d,0xc2]
+ vcvtsd2si {rz-sae}, %xmm10, %rax
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm10, %r8
+// CHECK: encoding: [0x62,0x51,0xff,0x18,0x2d,0xc2]
+ vcvtsd2si {rn-sae}, %xmm10, %r8
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm10, %r8
+// CHECK: encoding: [0x62,0x51,0xff,0x58,0x2d,0xc2]
+ vcvtsd2si {ru-sae}, %xmm10, %r8
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm10, %r8
+// CHECK: encoding: [0x62,0x51,0xff,0x38,0x2d,0xc2]
+ vcvtsd2si {rd-sae}, %xmm10, %r8
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm10, %r8
+// CHECK: encoding: [0x62,0x51,0xff,0x78,0x2d,0xc2]
+ vcvtsd2si {rz-sae}, %xmm10, %r8
+
+// CHECK: vcvtsd2usi %xmm30, %eax
+// CHECK: encoding: [0x62,0x91,0x7f,0x08,0x79,0xc6]
+ vcvtsd2usi %xmm30, %eax
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm30, %eax
+// CHECK: encoding: [0x62,0x91,0x7f,0x18,0x79,0xc6]
+ vcvtsd2usi {rn-sae}, %xmm30, %eax
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm30, %eax
+// CHECK: encoding: [0x62,0x91,0x7f,0x58,0x79,0xc6]
+ vcvtsd2usi {ru-sae}, %xmm30, %eax
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm30, %eax
+// CHECK: encoding: [0x62,0x91,0x7f,0x38,0x79,0xc6]
+ vcvtsd2usi {rd-sae}, %xmm30, %eax
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm30, %eax
+// CHECK: encoding: [0x62,0x91,0x7f,0x78,0x79,0xc6]
+ vcvtsd2usi {rz-sae}, %xmm30, %eax
+
+// CHECK: vcvtsd2usi (%rcx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x79,0x01]
+ vcvtsd2usi (%rcx), %eax
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %eax
+// CHECK: encoding: [0x62,0xb1,0x7f,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvtsd2usi 291(%rax,%r14,8), %eax
+
+// CHECK: vcvtsd2usi 1016(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x79,0x42,0x7f]
+ vcvtsd2usi 1016(%rdx), %eax
+
+// CHECK: vcvtsd2usi 1024(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x79,0x82,0x00,0x04,0x00,0x00]
+ vcvtsd2usi 1024(%rdx), %eax
+
+// CHECK: vcvtsd2usi -1024(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x79,0x42,0x80]
+ vcvtsd2usi -1024(%rdx), %eax
+
+// CHECK: vcvtsd2usi -1032(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x79,0x82,0xf8,0xfb,0xff,0xff]
+ vcvtsd2usi -1032(%rdx), %eax
+
+// CHECK: vcvtsd2usi %xmm30, %ebp
+// CHECK: encoding: [0x62,0x91,0x7f,0x08,0x79,0xee]
+ vcvtsd2usi %xmm30, %ebp
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm30, %ebp
+// CHECK: encoding: [0x62,0x91,0x7f,0x18,0x79,0xee]
+ vcvtsd2usi {rn-sae}, %xmm30, %ebp
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm30, %ebp
+// CHECK: encoding: [0x62,0x91,0x7f,0x58,0x79,0xee]
+ vcvtsd2usi {ru-sae}, %xmm30, %ebp
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm30, %ebp
+// CHECK: encoding: [0x62,0x91,0x7f,0x38,0x79,0xee]
+ vcvtsd2usi {rd-sae}, %xmm30, %ebp
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm30, %ebp
+// CHECK: encoding: [0x62,0x91,0x7f,0x78,0x79,0xee]
+ vcvtsd2usi {rz-sae}, %xmm30, %ebp
+
+// CHECK: vcvtsd2usi (%rcx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x79,0x29]
+ vcvtsd2usi (%rcx), %ebp
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %ebp
+// CHECK: encoding: [0x62,0xb1,0x7f,0x08,0x79,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vcvtsd2usi 291(%rax,%r14,8), %ebp
+
+// CHECK: vcvtsd2usi 1016(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x79,0x6a,0x7f]
+ vcvtsd2usi 1016(%rdx), %ebp
+
+// CHECK: vcvtsd2usi 1024(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x79,0xaa,0x00,0x04,0x00,0x00]
+ vcvtsd2usi 1024(%rdx), %ebp
+
+// CHECK: vcvtsd2usi -1024(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x79,0x6a,0x80]
+ vcvtsd2usi -1024(%rdx), %ebp
+
+// CHECK: vcvtsd2usi -1032(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x79,0xaa,0xf8,0xfb,0xff,0xff]
+ vcvtsd2usi -1032(%rdx), %ebp
+
+// CHECK: vcvtsd2usi %xmm30, %r13d
+// CHECK: encoding: [0x62,0x11,0x7f,0x08,0x79,0xee]
+ vcvtsd2usi %xmm30, %r13d
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm30, %r13d
+// CHECK: encoding: [0x62,0x11,0x7f,0x18,0x79,0xee]
+ vcvtsd2usi {rn-sae}, %xmm30, %r13d
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm30, %r13d
+// CHECK: encoding: [0x62,0x11,0x7f,0x58,0x79,0xee]
+ vcvtsd2usi {ru-sae}, %xmm30, %r13d
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm30, %r13d
+// CHECK: encoding: [0x62,0x11,0x7f,0x38,0x79,0xee]
+ vcvtsd2usi {rd-sae}, %xmm30, %r13d
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm30, %r13d
+// CHECK: encoding: [0x62,0x11,0x7f,0x78,0x79,0xee]
+ vcvtsd2usi {rz-sae}, %xmm30, %r13d
+
+// CHECK: vcvtsd2usi (%rcx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x08,0x79,0x29]
+ vcvtsd2usi (%rcx), %r13d
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %r13d
+// CHECK: encoding: [0x62,0x31,0x7f,0x08,0x79,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vcvtsd2usi 291(%rax,%r14,8), %r13d
+
+// CHECK: vcvtsd2usi 1016(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x08,0x79,0x6a,0x7f]
+ vcvtsd2usi 1016(%rdx), %r13d
+
+// CHECK: vcvtsd2usi 1024(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x08,0x79,0xaa,0x00,0x04,0x00,0x00]
+ vcvtsd2usi 1024(%rdx), %r13d
+
+// CHECK: vcvtsd2usi -1024(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x08,0x79,0x6a,0x80]
+ vcvtsd2usi -1024(%rdx), %r13d
+
+// CHECK: vcvtsd2usi -1032(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x08,0x79,0xaa,0xf8,0xfb,0xff,0xff]
+ vcvtsd2usi -1032(%rdx), %r13d
+
+// CHECK: vcvtsd2usi %xmm18, %rax
+// CHECK: encoding: [0x62,0xb1,0xff,0x08,0x79,0xc2]
+ vcvtsd2usi %xmm18, %rax
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm18, %rax
+// CHECK: encoding: [0x62,0xb1,0xff,0x18,0x79,0xc2]
+ vcvtsd2usi {rn-sae}, %xmm18, %rax
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm18, %rax
+// CHECK: encoding: [0x62,0xb1,0xff,0x58,0x79,0xc2]
+ vcvtsd2usi {ru-sae}, %xmm18, %rax
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm18, %rax
+// CHECK: encoding: [0x62,0xb1,0xff,0x38,0x79,0xc2]
+ vcvtsd2usi {rd-sae}, %xmm18, %rax
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm18, %rax
+// CHECK: encoding: [0x62,0xb1,0xff,0x78,0x79,0xc2]
+ vcvtsd2usi {rz-sae}, %xmm18, %rax
+
+// CHECK: vcvtsd2usi (%rcx), %rax
+// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x79,0x01]
+ vcvtsd2usi (%rcx), %rax
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %rax
+// CHECK: encoding: [0x62,0xb1,0xff,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvtsd2usi 291(%rax,%r14,8), %rax
+
+// CHECK: vcvtsd2usi 1016(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x79,0x42,0x7f]
+ vcvtsd2usi 1016(%rdx), %rax
+
+// CHECK: vcvtsd2usi 1024(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x79,0x82,0x00,0x04,0x00,0x00]
+ vcvtsd2usi 1024(%rdx), %rax
+
+// CHECK: vcvtsd2usi -1024(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x79,0x42,0x80]
+ vcvtsd2usi -1024(%rdx), %rax
+
+// CHECK: vcvtsd2usi -1032(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x79,0x82,0xf8,0xfb,0xff,0xff]
+ vcvtsd2usi -1032(%rdx), %rax
+
+// CHECK: vcvtsd2usi %xmm18, %r8
+// CHECK: encoding: [0x62,0x31,0xff,0x08,0x79,0xc2]
+ vcvtsd2usi %xmm18, %r8
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm18, %r8
+// CHECK: encoding: [0x62,0x31,0xff,0x18,0x79,0xc2]
+ vcvtsd2usi {rn-sae}, %xmm18, %r8
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm18, %r8
+// CHECK: encoding: [0x62,0x31,0xff,0x58,0x79,0xc2]
+ vcvtsd2usi {ru-sae}, %xmm18, %r8
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm18, %r8
+// CHECK: encoding: [0x62,0x31,0xff,0x38,0x79,0xc2]
+ vcvtsd2usi {rd-sae}, %xmm18, %r8
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm18, %r8
+// CHECK: encoding: [0x62,0x31,0xff,0x78,0x79,0xc2]
+ vcvtsd2usi {rz-sae}, %xmm18, %r8
+
+// CHECK: vcvtsd2usi (%rcx), %r8
+// CHECK: encoding: [0x62,0x71,0xff,0x08,0x79,0x01]
+ vcvtsd2usi (%rcx), %r8
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %r8
+// CHECK: encoding: [0x62,0x31,0xff,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvtsd2usi 291(%rax,%r14,8), %r8
+
+// CHECK: vcvtsd2usi 1016(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xff,0x08,0x79,0x42,0x7f]
+ vcvtsd2usi 1016(%rdx), %r8
+
+// CHECK: vcvtsd2usi 1024(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xff,0x08,0x79,0x82,0x00,0x04,0x00,0x00]
+ vcvtsd2usi 1024(%rdx), %r8
+
+// CHECK: vcvtsd2usi -1024(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xff,0x08,0x79,0x42,0x80]
+ vcvtsd2usi -1024(%rdx), %r8
+
+// CHECK: vcvtsd2usi -1032(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xff,0x08,0x79,0x82,0xf8,0xfb,0xff,0xff]
+ vcvtsd2usi -1032(%rdx), %r8
+
+// CHECK: vcvtss2si {rn-sae}, %xmm22, %eax
+// CHECK: encoding: [0x62,0xb1,0x7e,0x18,0x2d,0xc6]
+ vcvtss2si {rn-sae}, %xmm22, %eax
+
+// CHECK: vcvtss2si {ru-sae}, %xmm22, %eax
+// CHECK: encoding: [0x62,0xb1,0x7e,0x58,0x2d,0xc6]
+ vcvtss2si {ru-sae}, %xmm22, %eax
+
+// CHECK: vcvtss2si {rd-sae}, %xmm22, %eax
+// CHECK: encoding: [0x62,0xb1,0x7e,0x38,0x2d,0xc6]
+ vcvtss2si {rd-sae}, %xmm22, %eax
+
+// CHECK: vcvtss2si {rz-sae}, %xmm22, %eax
+// CHECK: encoding: [0x62,0xb1,0x7e,0x78,0x2d,0xc6]
+ vcvtss2si {rz-sae}, %xmm22, %eax
+
+// CHECK: vcvtss2si {rn-sae}, %xmm22, %ebp
+// CHECK: encoding: [0x62,0xb1,0x7e,0x18,0x2d,0xee]
+ vcvtss2si {rn-sae}, %xmm22, %ebp
+
+// CHECK: vcvtss2si {ru-sae}, %xmm22, %ebp
+// CHECK: encoding: [0x62,0xb1,0x7e,0x58,0x2d,0xee]
+ vcvtss2si {ru-sae}, %xmm22, %ebp
+
+// CHECK: vcvtss2si {rd-sae}, %xmm22, %ebp
+// CHECK: encoding: [0x62,0xb1,0x7e,0x38,0x2d,0xee]
+ vcvtss2si {rd-sae}, %xmm22, %ebp
+
+// CHECK: vcvtss2si {rz-sae}, %xmm22, %ebp
+// CHECK: encoding: [0x62,0xb1,0x7e,0x78,0x2d,0xee]
+ vcvtss2si {rz-sae}, %xmm22, %ebp
+
+// CHECK: vcvtss2si {rn-sae}, %xmm22, %r13d
+// CHECK: encoding: [0x62,0x31,0x7e,0x18,0x2d,0xee]
+ vcvtss2si {rn-sae}, %xmm22, %r13d
+
+// CHECK: vcvtss2si {ru-sae}, %xmm22, %r13d
+// CHECK: encoding: [0x62,0x31,0x7e,0x58,0x2d,0xee]
+ vcvtss2si {ru-sae}, %xmm22, %r13d
+
+// CHECK: vcvtss2si {rd-sae}, %xmm22, %r13d
+// CHECK: encoding: [0x62,0x31,0x7e,0x38,0x2d,0xee]
+ vcvtss2si {rd-sae}, %xmm22, %r13d
+
+// CHECK: vcvtss2si {rz-sae}, %xmm22, %r13d
+// CHECK: encoding: [0x62,0x31,0x7e,0x78,0x2d,0xee]
+ vcvtss2si {rz-sae}, %xmm22, %r13d
+
+// CHECK: vcvtss2si {rn-sae}, %xmm29, %rax
+// CHECK: encoding: [0x62,0x91,0xfe,0x18,0x2d,0xc5]
+ vcvtss2si {rn-sae}, %xmm29, %rax
+
+// CHECK: vcvtss2si {ru-sae}, %xmm29, %rax
+// CHECK: encoding: [0x62,0x91,0xfe,0x58,0x2d,0xc5]
+ vcvtss2si {ru-sae}, %xmm29, %rax
+
+// CHECK: vcvtss2si {rd-sae}, %xmm29, %rax
+// CHECK: encoding: [0x62,0x91,0xfe,0x38,0x2d,0xc5]
+ vcvtss2si {rd-sae}, %xmm29, %rax
+
+// CHECK: vcvtss2si {rz-sae}, %xmm29, %rax
+// CHECK: encoding: [0x62,0x91,0xfe,0x78,0x2d,0xc5]
+ vcvtss2si {rz-sae}, %xmm29, %rax
+
+// CHECK: vcvtss2si {rn-sae}, %xmm29, %r8
+// CHECK: encoding: [0x62,0x11,0xfe,0x18,0x2d,0xc5]
+ vcvtss2si {rn-sae}, %xmm29, %r8
+
+// CHECK: vcvtss2si {ru-sae}, %xmm29, %r8
+// CHECK: encoding: [0x62,0x11,0xfe,0x58,0x2d,0xc5]
+ vcvtss2si {ru-sae}, %xmm29, %r8
+
+// CHECK: vcvtss2si {rd-sae}, %xmm29, %r8
+// CHECK: encoding: [0x62,0x11,0xfe,0x38,0x2d,0xc5]
+ vcvtss2si {rd-sae}, %xmm29, %r8
+
+// CHECK: vcvtss2si {rz-sae}, %xmm29, %r8
+// CHECK: encoding: [0x62,0x11,0xfe,0x78,0x2d,0xc5]
+ vcvtss2si {rz-sae}, %xmm29, %r8
+
+// CHECK: vcvtss2usi %xmm28, %eax
+// CHECK: encoding: [0x62,0x91,0x7e,0x08,0x79,0xc4]
+ vcvtss2usi %xmm28, %eax
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm28, %eax
+// CHECK: encoding: [0x62,0x91,0x7e,0x18,0x79,0xc4]
+ vcvtss2usi {rn-sae}, %xmm28, %eax
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm28, %eax
+// CHECK: encoding: [0x62,0x91,0x7e,0x58,0x79,0xc4]
+ vcvtss2usi {ru-sae}, %xmm28, %eax
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm28, %eax
+// CHECK: encoding: [0x62,0x91,0x7e,0x38,0x79,0xc4]
+ vcvtss2usi {rd-sae}, %xmm28, %eax
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm28, %eax
+// CHECK: encoding: [0x62,0x91,0x7e,0x78,0x79,0xc4]
+ vcvtss2usi {rz-sae}, %xmm28, %eax
+
+// CHECK: vcvtss2usi (%rcx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x79,0x01]
+ vcvtss2usi (%rcx), %eax
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %eax
+// CHECK: encoding: [0x62,0xb1,0x7e,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvtss2usi 291(%rax,%r14,8), %eax
+
+// CHECK: vcvtss2usi 508(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x79,0x42,0x7f]
+ vcvtss2usi 508(%rdx), %eax
+
+// CHECK: vcvtss2usi 512(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x79,0x82,0x00,0x02,0x00,0x00]
+ vcvtss2usi 512(%rdx), %eax
+
+// CHECK: vcvtss2usi -512(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x79,0x42,0x80]
+ vcvtss2usi -512(%rdx), %eax
+
+// CHECK: vcvtss2usi -516(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x79,0x82,0xfc,0xfd,0xff,0xff]
+ vcvtss2usi -516(%rdx), %eax
+
+// CHECK: vcvtss2usi %xmm28, %ebp
+// CHECK: encoding: [0x62,0x91,0x7e,0x08,0x79,0xec]
+ vcvtss2usi %xmm28, %ebp
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm28, %ebp
+// CHECK: encoding: [0x62,0x91,0x7e,0x18,0x79,0xec]
+ vcvtss2usi {rn-sae}, %xmm28, %ebp
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm28, %ebp
+// CHECK: encoding: [0x62,0x91,0x7e,0x58,0x79,0xec]
+ vcvtss2usi {ru-sae}, %xmm28, %ebp
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm28, %ebp
+// CHECK: encoding: [0x62,0x91,0x7e,0x38,0x79,0xec]
+ vcvtss2usi {rd-sae}, %xmm28, %ebp
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm28, %ebp
+// CHECK: encoding: [0x62,0x91,0x7e,0x78,0x79,0xec]
+ vcvtss2usi {rz-sae}, %xmm28, %ebp
+
+// CHECK: vcvtss2usi (%rcx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x79,0x29]
+ vcvtss2usi (%rcx), %ebp
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %ebp
+// CHECK: encoding: [0x62,0xb1,0x7e,0x08,0x79,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vcvtss2usi 291(%rax,%r14,8), %ebp
+
+// CHECK: vcvtss2usi 508(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x79,0x6a,0x7f]
+ vcvtss2usi 508(%rdx), %ebp
+
+// CHECK: vcvtss2usi 512(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x79,0xaa,0x00,0x02,0x00,0x00]
+ vcvtss2usi 512(%rdx), %ebp
+
+// CHECK: vcvtss2usi -512(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x79,0x6a,0x80]
+ vcvtss2usi -512(%rdx), %ebp
+
+// CHECK: vcvtss2usi -516(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x79,0xaa,0xfc,0xfd,0xff,0xff]
+ vcvtss2usi -516(%rdx), %ebp
+
+// CHECK: vcvtss2usi %xmm28, %r13d
+// CHECK: encoding: [0x62,0x11,0x7e,0x08,0x79,0xec]
+ vcvtss2usi %xmm28, %r13d
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm28, %r13d
+// CHECK: encoding: [0x62,0x11,0x7e,0x18,0x79,0xec]
+ vcvtss2usi {rn-sae}, %xmm28, %r13d
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm28, %r13d
+// CHECK: encoding: [0x62,0x11,0x7e,0x58,0x79,0xec]
+ vcvtss2usi {ru-sae}, %xmm28, %r13d
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm28, %r13d
+// CHECK: encoding: [0x62,0x11,0x7e,0x38,0x79,0xec]
+ vcvtss2usi {rd-sae}, %xmm28, %r13d
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm28, %r13d
+// CHECK: encoding: [0x62,0x11,0x7e,0x78,0x79,0xec]
+ vcvtss2usi {rz-sae}, %xmm28, %r13d
+
+// CHECK: vcvtss2usi (%rcx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7e,0x08,0x79,0x29]
+ vcvtss2usi (%rcx), %r13d
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %r13d
+// CHECK: encoding: [0x62,0x31,0x7e,0x08,0x79,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vcvtss2usi 291(%rax,%r14,8), %r13d
+
+// CHECK: vcvtss2usi 508(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7e,0x08,0x79,0x6a,0x7f]
+ vcvtss2usi 508(%rdx), %r13d
+
+// CHECK: vcvtss2usi 512(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7e,0x08,0x79,0xaa,0x00,0x02,0x00,0x00]
+ vcvtss2usi 512(%rdx), %r13d
+
+// CHECK: vcvtss2usi -512(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7e,0x08,0x79,0x6a,0x80]
+ vcvtss2usi -512(%rdx), %r13d
+
+// CHECK: vcvtss2usi -516(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7e,0x08,0x79,0xaa,0xfc,0xfd,0xff,0xff]
+ vcvtss2usi -516(%rdx), %r13d
+
+// CHECK: vcvtss2usi %xmm23, %rax
+// CHECK: encoding: [0x62,0xb1,0xfe,0x08,0x79,0xc7]
+ vcvtss2usi %xmm23, %rax
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm23, %rax
+// CHECK: encoding: [0x62,0xb1,0xfe,0x18,0x79,0xc7]
+ vcvtss2usi {rn-sae}, %xmm23, %rax
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm23, %rax
+// CHECK: encoding: [0x62,0xb1,0xfe,0x58,0x79,0xc7]
+ vcvtss2usi {ru-sae}, %xmm23, %rax
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm23, %rax
+// CHECK: encoding: [0x62,0xb1,0xfe,0x38,0x79,0xc7]
+ vcvtss2usi {rd-sae}, %xmm23, %rax
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm23, %rax
+// CHECK: encoding: [0x62,0xb1,0xfe,0x78,0x79,0xc7]
+ vcvtss2usi {rz-sae}, %xmm23, %rax
+
+// CHECK: vcvtss2usi (%rcx), %rax
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x79,0x01]
+ vcvtss2usi (%rcx), %rax
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %rax
+// CHECK: encoding: [0x62,0xb1,0xfe,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvtss2usi 291(%rax,%r14,8), %rax
+
+// CHECK: vcvtss2usi 508(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x79,0x42,0x7f]
+ vcvtss2usi 508(%rdx), %rax
+
+// CHECK: vcvtss2usi 512(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x79,0x82,0x00,0x02,0x00,0x00]
+ vcvtss2usi 512(%rdx), %rax
+
+// CHECK: vcvtss2usi -512(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x79,0x42,0x80]
+ vcvtss2usi -512(%rdx), %rax
+
+// CHECK: vcvtss2usi -516(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x79,0x82,0xfc,0xfd,0xff,0xff]
+ vcvtss2usi -516(%rdx), %rax
+
+// CHECK: vcvtss2usi %xmm23, %r8
+// CHECK: encoding: [0x62,0x31,0xfe,0x08,0x79,0xc7]
+ vcvtss2usi %xmm23, %r8
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm23, %r8
+// CHECK: encoding: [0x62,0x31,0xfe,0x18,0x79,0xc7]
+ vcvtss2usi {rn-sae}, %xmm23, %r8
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm23, %r8
+// CHECK: encoding: [0x62,0x31,0xfe,0x58,0x79,0xc7]
+ vcvtss2usi {ru-sae}, %xmm23, %r8
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm23, %r8
+// CHECK: encoding: [0x62,0x31,0xfe,0x38,0x79,0xc7]
+ vcvtss2usi {rd-sae}, %xmm23, %r8
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm23, %r8
+// CHECK: encoding: [0x62,0x31,0xfe,0x78,0x79,0xc7]
+ vcvtss2usi {rz-sae}, %xmm23, %r8
+
+// CHECK: vcvtss2usi (%rcx), %r8
+// CHECK: encoding: [0x62,0x71,0xfe,0x08,0x79,0x01]
+ vcvtss2usi (%rcx), %r8
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %r8
+// CHECK: encoding: [0x62,0x31,0xfe,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvtss2usi 291(%rax,%r14,8), %r8
+
+// CHECK: vcvtss2usi 508(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xfe,0x08,0x79,0x42,0x7f]
+ vcvtss2usi 508(%rdx), %r8
+
+// CHECK: vcvtss2usi 512(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xfe,0x08,0x79,0x82,0x00,0x02,0x00,0x00]
+ vcvtss2usi 512(%rdx), %r8
+
+// CHECK: vcvtss2usi -512(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xfe,0x08,0x79,0x42,0x80]
+ vcvtss2usi -512(%rdx), %r8
+
+// CHECK: vcvtss2usi -516(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xfe,0x08,0x79,0x82,0xfc,0xfd,0xff,0xff]
+ vcvtss2usi -516(%rdx), %r8
+
+// CHECK: vcvttsd2si {sae}, %xmm3, %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2c,0xc3]
+ vcvttsd2si {sae}, %xmm3, %eax
+
+// CHECK: vcvttsd2si {sae}, %xmm3, %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x18,0x2c,0xeb]
+ vcvttsd2si {sae}, %xmm3, %ebp
+
+// CHECK: vcvttsd2si {sae}, %xmm3, %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x18,0x2c,0xeb]
+ vcvttsd2si {sae}, %xmm3, %r13d
+
+// CHECK: vcvttsd2si {sae}, %xmm1, %rax
+// CHECK: encoding: [0x62,0xf1,0xff,0x18,0x2c,0xc1]
+ vcvttsd2si {sae}, %xmm1, %rax
+
+// CHECK: vcvttsd2usi %xmm21, %eax
+// CHECK: encoding: [0x62,0xb1,0x7f,0x08,0x78,0xc5]
+ vcvttsd2usi %xmm21, %eax
+
+// CHECK: vcvttsd2usi {sae}, %xmm21, %eax
+// CHECK: encoding: [0x62,0xb1,0x7f,0x18,0x78,0xc5]
+ vcvttsd2usi {sae}, %xmm21, %eax
+
+// CHECK: vcvttsd2usi (%rcx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x78,0x01]
+ vcvttsd2usi (%rcx), %eax
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %eax
+// CHECK: encoding: [0x62,0xb1,0x7f,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvttsd2usi 291(%rax,%r14,8), %eax
+
+// CHECK: vcvttsd2usi 1016(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x78,0x42,0x7f]
+ vcvttsd2usi 1016(%rdx), %eax
+
+// CHECK: vcvttsd2usi 1024(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x78,0x82,0x00,0x04,0x00,0x00]
+ vcvttsd2usi 1024(%rdx), %eax
+
+// CHECK: vcvttsd2usi -1024(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x78,0x42,0x80]
+ vcvttsd2usi -1024(%rdx), %eax
+
+// CHECK: vcvttsd2usi -1032(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x78,0x82,0xf8,0xfb,0xff,0xff]
+ vcvttsd2usi -1032(%rdx), %eax
+
+// CHECK: vcvttsd2usi %xmm21, %ebp
+// CHECK: encoding: [0x62,0xb1,0x7f,0x08,0x78,0xed]
+ vcvttsd2usi %xmm21, %ebp
+
+// CHECK: vcvttsd2usi {sae}, %xmm21, %ebp
+// CHECK: encoding: [0x62,0xb1,0x7f,0x18,0x78,0xed]
+ vcvttsd2usi {sae}, %xmm21, %ebp
+
+// CHECK: vcvttsd2usi (%rcx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x78,0x29]
+ vcvttsd2usi (%rcx), %ebp
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %ebp
+// CHECK: encoding: [0x62,0xb1,0x7f,0x08,0x78,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vcvttsd2usi 291(%rax,%r14,8), %ebp
+
+// CHECK: vcvttsd2usi 1016(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x78,0x6a,0x7f]
+ vcvttsd2usi 1016(%rdx), %ebp
+
+// CHECK: vcvttsd2usi 1024(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x78,0xaa,0x00,0x04,0x00,0x00]
+ vcvttsd2usi 1024(%rdx), %ebp
+
+// CHECK: vcvttsd2usi -1024(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x78,0x6a,0x80]
+ vcvttsd2usi -1024(%rdx), %ebp
+
+// CHECK: vcvttsd2usi -1032(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7f,0x08,0x78,0xaa,0xf8,0xfb,0xff,0xff]
+ vcvttsd2usi -1032(%rdx), %ebp
+
+// CHECK: vcvttsd2usi %xmm21, %r13d
+// CHECK: encoding: [0x62,0x31,0x7f,0x08,0x78,0xed]
+ vcvttsd2usi %xmm21, %r13d
+
+// CHECK: vcvttsd2usi {sae}, %xmm21, %r13d
+// CHECK: encoding: [0x62,0x31,0x7f,0x18,0x78,0xed]
+ vcvttsd2usi {sae}, %xmm21, %r13d
+
+// CHECK: vcvttsd2usi (%rcx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x08,0x78,0x29]
+ vcvttsd2usi (%rcx), %r13d
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %r13d
+// CHECK: encoding: [0x62,0x31,0x7f,0x08,0x78,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vcvttsd2usi 291(%rax,%r14,8), %r13d
+
+// CHECK: vcvttsd2usi 1016(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x08,0x78,0x6a,0x7f]
+ vcvttsd2usi 1016(%rdx), %r13d
+
+// CHECK: vcvttsd2usi 1024(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x08,0x78,0xaa,0x00,0x04,0x00,0x00]
+ vcvttsd2usi 1024(%rdx), %r13d
+
+// CHECK: vcvttsd2usi -1024(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x08,0x78,0x6a,0x80]
+ vcvttsd2usi -1024(%rdx), %r13d
+
+// CHECK: vcvttsd2usi -1032(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7f,0x08,0x78,0xaa,0xf8,0xfb,0xff,0xff]
+ vcvttsd2usi -1032(%rdx), %r13d
+
+// CHECK: vcvttsd2usi %xmm7, %rax
+// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x78,0xc7]
+ vcvttsd2usi %xmm7, %rax
+
+// CHECK: vcvttsd2usi {sae}, %xmm7, %rax
+// CHECK: encoding: [0x62,0xf1,0xff,0x18,0x78,0xc7]
+ vcvttsd2usi {sae}, %xmm7, %rax
+
+// CHECK: vcvttsd2usi (%rcx), %rax
+// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x78,0x01]
+ vcvttsd2usi (%rcx), %rax
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %rax
+// CHECK: encoding: [0x62,0xb1,0xff,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvttsd2usi 291(%rax,%r14,8), %rax
+
+// CHECK: vcvttsd2usi 1016(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x78,0x42,0x7f]
+ vcvttsd2usi 1016(%rdx), %rax
+
+// CHECK: vcvttsd2usi 1024(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x78,0x82,0x00,0x04,0x00,0x00]
+ vcvttsd2usi 1024(%rdx), %rax
+
+// CHECK: vcvttsd2usi -1024(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x78,0x42,0x80]
+ vcvttsd2usi -1024(%rdx), %rax
+
+// CHECK: vcvttsd2usi -1032(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xff,0x08,0x78,0x82,0xf8,0xfb,0xff,0xff]
+ vcvttsd2usi -1032(%rdx), %rax
+
+// CHECK: vcvttsd2usi %xmm7, %r8
+// CHECK: encoding: [0x62,0x71,0xff,0x08,0x78,0xc7]
+ vcvttsd2usi %xmm7, %r8
+
+// CHECK: vcvttsd2usi {sae}, %xmm7, %r8
+// CHECK: encoding: [0x62,0x71,0xff,0x18,0x78,0xc7]
+ vcvttsd2usi {sae}, %xmm7, %r8
+
+// CHECK: vcvttsd2usi (%rcx), %r8
+// CHECK: encoding: [0x62,0x71,0xff,0x08,0x78,0x01]
+ vcvttsd2usi (%rcx), %r8
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %r8
+// CHECK: encoding: [0x62,0x31,0xff,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvttsd2usi 291(%rax,%r14,8), %r8
+
+// CHECK: vcvttsd2usi 1016(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xff,0x08,0x78,0x42,0x7f]
+ vcvttsd2usi 1016(%rdx), %r8
+
+// CHECK: vcvttsd2usi 1024(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xff,0x08,0x78,0x82,0x00,0x04,0x00,0x00]
+ vcvttsd2usi 1024(%rdx), %r8
+
+// CHECK: vcvttsd2usi -1024(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xff,0x08,0x78,0x42,0x80]
+ vcvttsd2usi -1024(%rdx), %r8
+
+// CHECK: vcvttsd2usi -1032(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xff,0x08,0x78,0x82,0xf8,0xfb,0xff,0xff]
+ vcvttsd2usi -1032(%rdx), %r8
+
+// CHECK: vcvttss2si {sae}, %xmm14, %eax
+// CHECK: encoding: [0x62,0xd1,0x7e,0x18,0x2c,0xc6]
+ vcvttss2si {sae}, %xmm14, %eax
+
+// CHECK: vcvttss2si {sae}, %xmm14, %ebp
+// CHECK: encoding: [0x62,0xd1,0x7e,0x18,0x2c,0xee]
+ vcvttss2si {sae}, %xmm14, %ebp
+
+// CHECK: vcvttss2si {sae}, %xmm14, %r13d
+// CHECK: encoding: [0x62,0x51,0x7e,0x18,0x2c,0xee]
+ vcvttss2si {sae}, %xmm14, %r13d
+
+// CHECK: vcvttss2si {sae}, %xmm21, %rax
+// CHECK: encoding: [0x62,0xb1,0xfe,0x18,0x2c,0xc5]
+ vcvttss2si {sae}, %xmm21, %rax
+
+// CHECK: vcvttss2si {sae}, %xmm21, %r8
+// CHECK: encoding: [0x62,0x31,0xfe,0x18,0x2c,0xc5]
+ vcvttss2si {sae}, %xmm21, %r8
+
+// CHECK: vcvttss2usi %xmm18, %eax
+// CHECK: encoding: [0x62,0xb1,0x7e,0x08,0x78,0xc2]
+ vcvttss2usi %xmm18, %eax
+
+// CHECK: vcvttss2usi {sae}, %xmm18, %eax
+// CHECK: encoding: [0x62,0xb1,0x7e,0x18,0x78,0xc2]
+ vcvttss2usi {sae}, %xmm18, %eax
+
+// CHECK: vcvttss2usi (%rcx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x78,0x01]
+ vcvttss2usi (%rcx), %eax
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %eax
+// CHECK: encoding: [0x62,0xb1,0x7e,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvttss2usi 291(%rax,%r14,8), %eax
+
+// CHECK: vcvttss2usi 508(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x78,0x42,0x7f]
+ vcvttss2usi 508(%rdx), %eax
+
+// CHECK: vcvttss2usi 512(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x78,0x82,0x00,0x02,0x00,0x00]
+ vcvttss2usi 512(%rdx), %eax
+
+// CHECK: vcvttss2usi -512(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x78,0x42,0x80]
+ vcvttss2usi -512(%rdx), %eax
+
+// CHECK: vcvttss2usi -516(%rdx), %eax
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x78,0x82,0xfc,0xfd,0xff,0xff]
+ vcvttss2usi -516(%rdx), %eax
+
+// CHECK: vcvttss2usi %xmm18, %ebp
+// CHECK: encoding: [0x62,0xb1,0x7e,0x08,0x78,0xea]
+ vcvttss2usi %xmm18, %ebp
+
+// CHECK: vcvttss2usi {sae}, %xmm18, %ebp
+// CHECK: encoding: [0x62,0xb1,0x7e,0x18,0x78,0xea]
+ vcvttss2usi {sae}, %xmm18, %ebp
+
+// CHECK: vcvttss2usi (%rcx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x78,0x29]
+ vcvttss2usi (%rcx), %ebp
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %ebp
+// CHECK: encoding: [0x62,0xb1,0x7e,0x08,0x78,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vcvttss2usi 291(%rax,%r14,8), %ebp
+
+// CHECK: vcvttss2usi 508(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x78,0x6a,0x7f]
+ vcvttss2usi 508(%rdx), %ebp
+
+// CHECK: vcvttss2usi 512(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x78,0xaa,0x00,0x02,0x00,0x00]
+ vcvttss2usi 512(%rdx), %ebp
+
+// CHECK: vcvttss2usi -512(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x78,0x6a,0x80]
+ vcvttss2usi -512(%rdx), %ebp
+
+// CHECK: vcvttss2usi -516(%rdx), %ebp
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x78,0xaa,0xfc,0xfd,0xff,0xff]
+ vcvttss2usi -516(%rdx), %ebp
+
+// CHECK: vcvttss2usi %xmm18, %r13d
+// CHECK: encoding: [0x62,0x31,0x7e,0x08,0x78,0xea]
+ vcvttss2usi %xmm18, %r13d
+
+// CHECK: vcvttss2usi {sae}, %xmm18, %r13d
+// CHECK: encoding: [0x62,0x31,0x7e,0x18,0x78,0xea]
+ vcvttss2usi {sae}, %xmm18, %r13d
+
+// CHECK: vcvttss2usi (%rcx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7e,0x08,0x78,0x29]
+ vcvttss2usi (%rcx), %r13d
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %r13d
+// CHECK: encoding: [0x62,0x31,0x7e,0x08,0x78,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vcvttss2usi 291(%rax,%r14,8), %r13d
+
+// CHECK: vcvttss2usi 508(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7e,0x08,0x78,0x6a,0x7f]
+ vcvttss2usi 508(%rdx), %r13d
+
+// CHECK: vcvttss2usi 512(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7e,0x08,0x78,0xaa,0x00,0x02,0x00,0x00]
+ vcvttss2usi 512(%rdx), %r13d
+
+// CHECK: vcvttss2usi -512(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7e,0x08,0x78,0x6a,0x80]
+ vcvttss2usi -512(%rdx), %r13d
+
+// CHECK: vcvttss2usi -516(%rdx), %r13d
+// CHECK: encoding: [0x62,0x71,0x7e,0x08,0x78,0xaa,0xfc,0xfd,0xff,0xff]
+ vcvttss2usi -516(%rdx), %r13d
+
+// CHECK: vcvttss2usi %xmm27, %rax
+// CHECK: encoding: [0x62,0x91,0xfe,0x08,0x78,0xc3]
+ vcvttss2usi %xmm27, %rax
+
+// CHECK: vcvttss2usi {sae}, %xmm27, %rax
+// CHECK: encoding: [0x62,0x91,0xfe,0x18,0x78,0xc3]
+ vcvttss2usi {sae}, %xmm27, %rax
+
+// CHECK: vcvttss2usi (%rcx), %rax
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x78,0x01]
+ vcvttss2usi (%rcx), %rax
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %rax
+// CHECK: encoding: [0x62,0xb1,0xfe,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvttss2usi 291(%rax,%r14,8), %rax
+
+// CHECK: vcvttss2usi 508(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x78,0x42,0x7f]
+ vcvttss2usi 508(%rdx), %rax
+
+// CHECK: vcvttss2usi 512(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x78,0x82,0x00,0x02,0x00,0x00]
+ vcvttss2usi 512(%rdx), %rax
+
+// CHECK: vcvttss2usi -512(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x78,0x42,0x80]
+ vcvttss2usi -512(%rdx), %rax
+
+// CHECK: vcvttss2usi -516(%rdx), %rax
+// CHECK: encoding: [0x62,0xf1,0xfe,0x08,0x78,0x82,0xfc,0xfd,0xff,0xff]
+ vcvttss2usi -516(%rdx), %rax
+
+// CHECK: vcvttss2usi %xmm27, %r8
+// CHECK: encoding: [0x62,0x11,0xfe,0x08,0x78,0xc3]
+ vcvttss2usi %xmm27, %r8
+
+// CHECK: vcvttss2usi {sae}, %xmm27, %r8
+// CHECK: encoding: [0x62,0x11,0xfe,0x18,0x78,0xc3]
+ vcvttss2usi {sae}, %xmm27, %r8
+
+// CHECK: vcvttss2usi (%rcx), %r8
+// CHECK: encoding: [0x62,0x71,0xfe,0x08,0x78,0x01]
+ vcvttss2usi (%rcx), %r8
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %r8
+// CHECK: encoding: [0x62,0x31,0xfe,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vcvttss2usi 291(%rax,%r14,8), %r8
+
+// CHECK: vcvttss2usi 508(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xfe,0x08,0x78,0x42,0x7f]
+ vcvttss2usi 508(%rdx), %r8
+
+// CHECK: vcvttss2usi 512(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xfe,0x08,0x78,0x82,0x00,0x02,0x00,0x00]
+ vcvttss2usi 512(%rdx), %r8
+
+// CHECK: vcvttss2usi -512(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xfe,0x08,0x78,0x42,0x80]
+ vcvttss2usi -512(%rdx), %r8
+
+// CHECK: vcvttss2usi -516(%rdx), %r8
+// CHECK: encoding: [0x62,0x71,0xfe,0x08,0x78,0x82,0xfc,0xfd,0xff,0xff]
+ vcvttss2usi -516(%rdx), %r8
+// CHECK: vrsqrt14sd %xmm10, %xmm6, %xmm26
+// CHECK: encoding: [0x62,0x42,0xcd,0x08,0x4f,0xd2]
+ vrsqrt14sd %xmm10, %xmm6, %xmm26
+
+// CHECK: vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5}
+// CHECK: encoding: [0x62,0x42,0xcd,0x0d,0x4f,0xd2]
+ vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5}
+
+// CHECK: vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5} {z}
+// CHECK: encoding: [0x62,0x42,0xcd,0x8d,0x4f,0xd2]
+ vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5} {z}
+
+// CHECK: vrsqrt14sd (%rcx), %xmm6, %xmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x11]
+ vrsqrt14sd (%rcx), %xmm6, %xmm26
+
+// CHECK: vrsqrt14sd 291(%rax,%r14,8), %xmm6, %xmm26
+// CHECK: encoding: [0x62,0x22,0xcd,0x08,0x4f,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vrsqrt14sd 291(%rax,%r14,8), %xmm6, %xmm26
+
+// CHECK: vrsqrt14sd 1016(%rdx), %xmm6, %xmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x52,0x7f]
+ vrsqrt14sd 1016(%rdx), %xmm6, %xmm26
+
+// CHECK: vrsqrt14sd 1024(%rdx), %xmm6, %xmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x92,0x00,0x04,0x00,0x00]
+ vrsqrt14sd 1024(%rdx), %xmm6, %xmm26
+
+// CHECK: vrsqrt14sd -1024(%rdx), %xmm6, %xmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x52,0x80]
+ vrsqrt14sd -1024(%rdx), %xmm6, %xmm26
+
+// CHECK: vrsqrt14sd -1032(%rdx), %xmm6, %xmm26
+// CHECK: encoding: [0x62,0x62,0xcd,0x08,0x4f,0x92,0xf8,0xfb,0xff,0xff]
+ vrsqrt14sd -1032(%rdx), %xmm6, %xmm26
+
+// CHECK: vrsqrt14ss %xmm9, %xmm14, %xmm14
+// CHECK: encoding: [0x62,0x52,0x0d,0x08,0x4f,0xf1]
+ vrsqrt14ss %xmm9, %xmm14, %xmm14
+
+// CHECK: vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1}
+// CHECK: encoding: [0x62,0x52,0x0d,0x09,0x4f,0xf1]
+ vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1}
+
+// CHECK: vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1} {z}
+// CHECK: encoding: [0x62,0x52,0x0d,0x89,0x4f,0xf1]
+ vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1} {z}
+
+// CHECK: vrsqrt14ss (%rcx), %xmm14, %xmm14
+// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0x31]
+ vrsqrt14ss (%rcx), %xmm14, %xmm14
+
+// CHECK: vrsqrt14ss 291(%rax,%r14,8), %xmm14, %xmm14
+// CHECK: encoding: [0x62,0x32,0x0d,0x08,0x4f,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vrsqrt14ss 291(%rax,%r14,8), %xmm14, %xmm14
+
+// CHECK: vrsqrt14ss 508(%rdx), %xmm14, %xmm14
+// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0x72,0x7f]
+ vrsqrt14ss 508(%rdx), %xmm14, %xmm14
+
+// CHECK: vrsqrt14ss 512(%rdx), %xmm14, %xmm14
+// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0xb2,0x00,0x02,0x00,0x00]
+ vrsqrt14ss 512(%rdx), %xmm14, %xmm14
+
+// CHECK: vrsqrt14ss -512(%rdx), %xmm14, %xmm14
+// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0x72,0x80]
+ vrsqrt14ss -512(%rdx), %xmm14, %xmm14
+
+// CHECK: vrsqrt14ss -516(%rdx), %xmm14, %xmm14
+// CHECK: encoding: [0x62,0x72,0x0d,0x08,0x4f,0xb2,0xfc,0xfd,0xff,0xff]
+ vrsqrt14ss -516(%rdx), %xmm14, %xmm14
+
+// CHECK: vrcp14sd %xmm14, %xmm22, %xmm12
+// CHECK: encoding: [0x62,0x52,0xcd,0x00,0x4d,0xe6]
+ vrcp14sd %xmm14, %xmm22, %xmm12
+
+// CHECK: vrcp14sd %xmm14, %xmm22, %xmm12 {%k2}
+// CHECK: encoding: [0x62,0x52,0xcd,0x02,0x4d,0xe6]
+ vrcp14sd %xmm14, %xmm22, %xmm12 {%k2}
+
+// CHECK: vrcp14sd %xmm14, %xmm22, %xmm12 {%k2} {z}
+// CHECK: encoding: [0x62,0x52,0xcd,0x82,0x4d,0xe6]
+ vrcp14sd %xmm14, %xmm22, %xmm12 {%k2} {z}
+
+// CHECK: vrcp14sd (%rcx), %xmm22, %xmm12
+// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0x21]
+ vrcp14sd (%rcx), %xmm22, %xmm12
+
+// CHECK: vrcp14sd 291(%rax,%r14,8), %xmm22, %xmm12
+// CHECK: encoding: [0x62,0x32,0xcd,0x00,0x4d,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vrcp14sd 291(%rax,%r14,8), %xmm22, %xmm12
+
+// CHECK: vrcp14sd 1016(%rdx), %xmm22, %xmm12
+// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0x62,0x7f]
+ vrcp14sd 1016(%rdx), %xmm22, %xmm12
+
+// CHECK: vrcp14sd 1024(%rdx), %xmm22, %xmm12
+// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0xa2,0x00,0x04,0x00,0x00]
+ vrcp14sd 1024(%rdx), %xmm22, %xmm12
+
+// CHECK: vrcp14sd -1024(%rdx), %xmm22, %xmm12
+// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0x62,0x80]
+ vrcp14sd -1024(%rdx), %xmm22, %xmm12
+
+// CHECK: vrcp14sd -1032(%rdx), %xmm22, %xmm12
+// CHECK: encoding: [0x62,0x72,0xcd,0x00,0x4d,0xa2,0xf8,0xfb,0xff,0xff]
+ vrcp14sd -1032(%rdx), %xmm22, %xmm12
+
+// CHECK: vrcp14ss %xmm3, %xmm8, %xmm8
+// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0xc3]
+ vrcp14ss %xmm3, %xmm8, %xmm8
+
+// CHECK: vrcp14ss %xmm3, %xmm8, %xmm8 {%k7}
+// CHECK: encoding: [0x62,0x72,0x3d,0x0f,0x4d,0xc3]
+ vrcp14ss %xmm3, %xmm8, %xmm8 {%k7}
+
+// CHECK: vrcp14ss %xmm3, %xmm8, %xmm8 {%k7} {z}
+// CHECK: encoding: [0x62,0x72,0x3d,0x8f,0x4d,0xc3]
+ vrcp14ss %xmm3, %xmm8, %xmm8 {%k7} {z}
+
+// CHECK: vrcp14ss (%rcx), %xmm8, %xmm8
+// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x01]
+ vrcp14ss (%rcx), %xmm8, %xmm8
+
+// CHECK: vrcp14ss 291(%rax,%r14,8), %xmm8, %xmm8
+// CHECK: encoding: [0x62,0x32,0x3d,0x08,0x4d,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vrcp14ss 291(%rax,%r14,8), %xmm8, %xmm8
+
+// CHECK: vrcp14ss 508(%rdx), %xmm8, %xmm8
+// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x42,0x7f]
+ vrcp14ss 508(%rdx), %xmm8, %xmm8
+
+// CHECK: vrcp14ss 512(%rdx), %xmm8, %xmm8
+// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x82,0x00,0x02,0x00,0x00]
+ vrcp14ss 512(%rdx), %xmm8, %xmm8
+
+// CHECK: vrcp14ss -512(%rdx), %xmm8, %xmm8
+// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x42,0x80]
+ vrcp14ss -512(%rdx), %xmm8, %xmm8
+
+// CHECK: vrcp14ss -516(%rdx), %xmm8, %xmm8
+// CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x82,0xfc,0xfd,0xff,0xff]
+ vrcp14ss -516(%rdx), %xmm8, %xmm8
+
+// CHECK: vpternlogd $171, %zmm20, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x33,0x0d,0x48,0x25,0xe4,0xab]
+ vpternlogd $0xab, %zmm20, %zmm14, %zmm12
+
+// CHECK: vpternlogd $171, %zmm20, %zmm14, %zmm12 {%k7}
+// CHECK: encoding: [0x62,0x33,0x0d,0x4f,0x25,0xe4,0xab]
+ vpternlogd $0xab, %zmm20, %zmm14, %zmm12 {%k7}
+
+// CHECK: vpternlogd $171, %zmm20, %zmm14, %zmm12 {%k7} {z}
+// CHECK: encoding: [0x62,0x33,0x0d,0xcf,0x25,0xe4,0xab]
+ vpternlogd $0xab, %zmm20, %zmm14, %zmm12 {%k7} {z}
+
+// CHECK: vpternlogd $123, %zmm20, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x33,0x0d,0x48,0x25,0xe4,0x7b]
+ vpternlogd $0x7b, %zmm20, %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, (%rcx), %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0x21,0x7b]
+ vpternlogd $0x7b, (%rcx), %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, 291(%rax,%r14,8), %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x33,0x0d,0x48,0x25,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpternlogd $0x7b, 291(%rax,%r14,8), %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, (%rcx){1to16}, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0x21,0x7b]
+ vpternlogd $0x7b, (%rcx){1to16}, %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, 8128(%rdx), %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0x62,0x7f,0x7b]
+ vpternlogd $0x7b, 8128(%rdx), %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, 8192(%rdx), %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0xa2,0x00,0x20,0x00,0x00,0x7b]
+ vpternlogd $0x7b, 8192(%rdx), %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, -8192(%rdx), %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0x62,0x80,0x7b]
+ vpternlogd $0x7b, -8192(%rdx), %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, -8256(%rdx), %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0xa2,0xc0,0xdf,0xff,0xff,0x7b]
+ vpternlogd $0x7b, -8256(%rdx), %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, 508(%rdx){1to16}, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0x62,0x7f,0x7b]
+ vpternlogd $0x7b, 508(%rdx){1to16}, %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, 512(%rdx){1to16}, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vpternlogd $0x7b, 512(%rdx){1to16}, %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, -512(%rdx){1to16}, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0x62,0x80,0x7b]
+ vpternlogd $0x7b, -512(%rdx){1to16}, %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, -516(%rdx){1to16}, %zmm14, %zmm12
+// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vpternlogd $0x7b, -516(%rdx){1to16}, %zmm14, %zmm12
+
+// CHECK: vpternlogq $171, %zmm21, %zmm2, %zmm15
+// CHECK: encoding: [0x62,0x33,0xed,0x48,0x25,0xfd,0xab]
+ vpternlogq $0xab, %zmm21, %zmm2, %zmm15
+
+// CHECK: vpternlogq $171, %zmm21, %zmm2, %zmm15 {%k3}
+// CHECK: encoding: [0x62,0x33,0xed,0x4b,0x25,0xfd,0xab]
+ vpternlogq $0xab, %zmm21, %zmm2, %zmm15 {%k3}
+
+// CHECK: vpternlogq $171, %zmm21, %zmm2, %zmm15 {%k3} {z}
+// CHECK: encoding: [0x62,0x33,0xed,0xcb,0x25,0xfd,0xab]
+ vpternlogq $0xab, %zmm21, %zmm2, %zmm15 {%k3} {z}
+
+// CHECK: vpternlogq $123, %zmm21, %zmm2, %zmm15
+// CHECK: encoding: [0x62,0x33,0xed,0x48,0x25,0xfd,0x7b]
+ vpternlogq $0x7b, %zmm21, %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, (%rcx), %zmm2, %zmm15
+// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0x39,0x7b]
+ vpternlogq $0x7b, (%rcx), %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, 291(%rax,%r14,8), %zmm2, %zmm15
+// CHECK: encoding: [0x62,0x33,0xed,0x48,0x25,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpternlogq $0x7b, 291(%rax,%r14,8), %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, (%rcx){1to8}, %zmm2, %zmm15
+// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0x39,0x7b]
+ vpternlogq $0x7b, (%rcx){1to8}, %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, 8128(%rdx), %zmm2, %zmm15
+// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0x7a,0x7f,0x7b]
+ vpternlogq $0x7b, 8128(%rdx), %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, 8192(%rdx), %zmm2, %zmm15
+// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0xba,0x00,0x20,0x00,0x00,0x7b]
+ vpternlogq $0x7b, 8192(%rdx), %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, -8192(%rdx), %zmm2, %zmm15
+// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0x7a,0x80,0x7b]
+ vpternlogq $0x7b, -8192(%rdx), %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, -8256(%rdx), %zmm2, %zmm15
+// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0xba,0xc0,0xdf,0xff,0xff,0x7b]
+ vpternlogq $0x7b, -8256(%rdx), %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, 1016(%rdx){1to8}, %zmm2, %zmm15
+// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0x7a,0x7f,0x7b]
+ vpternlogq $0x7b, 1016(%rdx){1to8}, %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, 1024(%rdx){1to8}, %zmm2, %zmm15
+// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0xba,0x00,0x04,0x00,0x00,0x7b]
+ vpternlogq $0x7b, 1024(%rdx){1to8}, %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, -1024(%rdx){1to8}, %zmm2, %zmm15
+// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0x7a,0x80,0x7b]
+ vpternlogq $0x7b, -1024(%rdx){1to8}, %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, -1032(%rdx){1to8}, %zmm2, %zmm15
+// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0xba,0xf8,0xfb,0xff,0xff,0x7b]
+ vpternlogq $0x7b, -1032(%rdx){1to8}, %zmm2, %zmm15
+
+// CHECK: vpbroadcastd (%rcx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x11]
+ vpbroadcastd (%rcx), %zmm26
+
+// CHECK: vpbroadcastd (%rcx), %zmm26 {%k2}
+// CHECK: encoding: [0x62,0x62,0x7d,0x4a,0x58,0x11]
+ vpbroadcastd (%rcx), %zmm26 {%k2}
+
+// CHECK: vpbroadcastd (%rcx), %zmm26 {%k2} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0xca,0x58,0x11]
+ vpbroadcastd (%rcx), %zmm26 {%k2} {z}
+
+// CHECK: vpbroadcastd 291(%rax,%r14,8), %zmm26
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x58,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastd 291(%rax,%r14,8), %zmm26
+
+// CHECK: vpbroadcastd 508(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x52,0x7f]
+ vpbroadcastd 508(%rdx), %zmm26
+
+// CHECK: vpbroadcastd 512(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x92,0x00,0x02,0x00,0x00]
+ vpbroadcastd 512(%rdx), %zmm26
+
+// CHECK: vpbroadcastd -512(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x52,0x80]
+ vpbroadcastd -512(%rdx), %zmm26
+
+// CHECK: vpbroadcastd -516(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x92,0xfc,0xfd,0xff,0xff]
+ vpbroadcastd -516(%rdx), %zmm26
+
+// CHECK: vpbroadcastd %xmm22, %zmm10
+// CHECK: encoding: [0x62,0x32,0x7d,0x48,0x58,0xd6]
+ vpbroadcastd %xmm22, %zmm10
+
+// CHECK: vpbroadcastd %xmm22, %zmm10 {%k7}
+// CHECK: encoding: [0x62,0x32,0x7d,0x4f,0x58,0xd6]
+ vpbroadcastd %xmm22, %zmm10 {%k7}
+
+// CHECK: vpbroadcastd %xmm22, %zmm10 {%k7} {z}
+// CHECK: encoding: [0x62,0x32,0x7d,0xcf,0x58,0xd6]
+ vpbroadcastd %xmm22, %zmm10 {%k7} {z}
+
+// CHECK: vpbroadcastd %eax, %zmm11
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x7c,0xd8]
+ vpbroadcastd %eax, %zmm11
+
+// CHECK: vpbroadcastd %eax, %zmm11 {%k6}
+// CHECK: encoding: [0x62,0x72,0x7d,0x4e,0x7c,0xd8]
+ vpbroadcastd %eax, %zmm11 {%k6}
+
+// CHECK: vpbroadcastd %eax, %zmm11 {%k6} {z}
+// CHECK: encoding: [0x62,0x72,0x7d,0xce,0x7c,0xd8]
+ vpbroadcastd %eax, %zmm11 {%k6} {z}
+
+// CHECK: vpbroadcastd %ebp, %zmm11
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x7c,0xdd]
+ vpbroadcastd %ebp, %zmm11
+
+// CHECK: vpbroadcastd %r13d, %zmm11
+// CHECK: encoding: [0x62,0x52,0x7d,0x48,0x7c,0xdd]
+ vpbroadcastd %r13d, %zmm11
+
+// CHECK: vpbroadcastq (%rcx), %zmm25
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x09]
+ vpbroadcastq (%rcx), %zmm25
+
+// CHECK: vpbroadcastq (%rcx), %zmm25 {%k2}
+// CHECK: encoding: [0x62,0x62,0xfd,0x4a,0x59,0x09]
+ vpbroadcastq (%rcx), %zmm25 {%k2}
+
+// CHECK: vpbroadcastq (%rcx), %zmm25 {%k2} {z}
+// CHECK: encoding: [0x62,0x62,0xfd,0xca,0x59,0x09]
+ vpbroadcastq (%rcx), %zmm25 {%k2} {z}
+
+// CHECK: vpbroadcastq 291(%rax,%r14,8), %zmm25
+// CHECK: encoding: [0x62,0x22,0xfd,0x48,0x59,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastq 291(%rax,%r14,8), %zmm25
+
+// CHECK: vpbroadcastq 1016(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x4a,0x7f]
+ vpbroadcastq 1016(%rdx), %zmm25
+
+// CHECK: vpbroadcastq 1024(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x8a,0x00,0x04,0x00,0x00]
+ vpbroadcastq 1024(%rdx), %zmm25
+
+// CHECK: vpbroadcastq -1024(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x4a,0x80]
+ vpbroadcastq -1024(%rdx), %zmm25
+
+// CHECK: vpbroadcastq -1032(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x8a,0xf8,0xfb,0xff,0xff]
+ vpbroadcastq -1032(%rdx), %zmm25
+
+// CHECK: vpbroadcastq %xmm5, %zmm3
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x59,0xdd]
+ vpbroadcastq %xmm5, %zmm3
+
+// CHECK: vpbroadcastq %xmm5, %zmm3 {%k5}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x4d,0x59,0xdd]
+ vpbroadcastq %xmm5, %zmm3 {%k5}
+
+// CHECK: vpbroadcastq %xmm5, %zmm3 {%k5} {z}
+// CHECK: encoding: [0x62,0xf2,0xfd,0xcd,0x59,0xdd]
+ vpbroadcastq %xmm5, %zmm3 {%k5} {z}
+
+// CHECK: vpbroadcastq %rax, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xc8]
+ vpbroadcastq %rax, %zmm1
+
+// CHECK: vpbroadcastq %rax, %zmm1 {%k6}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x4e,0x7c,0xc8]
+ vpbroadcastq %rax, %zmm1 {%k6}
+
+// CHECK: vpbroadcastq %rax, %zmm1 {%k6} {z}
+// CHECK: encoding: [0x62,0xf2,0xfd,0xce,0x7c,0xc8]
+ vpbroadcastq %rax, %zmm1 {%k6} {z}
+
+// CHECK: vpbroadcastq %r8, %zmm1
+// CHECK: encoding: [0x62,0xd2,0xfd,0x48,0x7c,0xc8]
+ vpbroadcastq %r8, %zmm1
+
+// CHECK: vcvtph2ps %ymm27, %zmm13
+// CHECK: encoding: [0x62,0x12,0x7d,0x48,0x13,0xeb]
+ vcvtph2ps %ymm27, %zmm13
+
+// CHECK: vcvtph2ps %ymm27, %zmm13 {%k3}
+// CHECK: encoding: [0x62,0x12,0x7d,0x4b,0x13,0xeb]
+ vcvtph2ps %ymm27, %zmm13 {%k3}
+
+// CHECK: vcvtph2ps %ymm27, %zmm13 {%k3} {z}
+// CHECK: encoding: [0x62,0x12,0x7d,0xcb,0x13,0xeb]
+ vcvtph2ps %ymm27, %zmm13 {%k3} {z}
+
+// CHECK: vcvtph2ps {sae}, %ymm27, %zmm13
+// CHECK: encoding: [0x62,0x12,0x7d,0x18,0x13,0xeb]
+ vcvtph2ps {sae}, %ymm27, %zmm13
+
+// CHECK: vcvtph2ps (%rcx), %zmm13
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0x29]
+ vcvtph2ps (%rcx), %zmm13
+
+// CHECK: vcvtph2ps 291(%rax,%r14,8), %zmm13
+// CHECK: encoding: [0x62,0x32,0x7d,0x48,0x13,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vcvtph2ps 291(%rax,%r14,8), %zmm13
+
+// CHECK: vcvtph2ps 4064(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0x6a,0x7f]
+ vcvtph2ps 4064(%rdx), %zmm13
+
+// CHECK: vcvtph2ps 4096(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0xaa,0x00,0x10,0x00,0x00]
+ vcvtph2ps 4096(%rdx), %zmm13
+
+// CHECK: vcvtph2ps -4096(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0x6a,0x80]
+ vcvtph2ps -4096(%rdx), %zmm13
+
+// CHECK: vcvtph2ps -4128(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x13,0xaa,0xe0,0xef,0xff,0xff]
+ vcvtph2ps -4128(%rdx), %zmm13
+
+// CHECK: vcvtps2ph $171, %zmm14, %ymm11
+// CHECK: encoding: [0x62,0x53,0x7d,0x48,0x1d,0xf3,0xab]
+ vcvtps2ph $0xab, %zmm14, %ymm11
+
+// CHECK: vcvtps2ph $171, %zmm14, %ymm11 {%k6}
+// CHECK: encoding: [0x62,0x53,0x7d,0x4e,0x1d,0xf3,0xab]
+ vcvtps2ph $0xab, %zmm14, %ymm11 {%k6}
+
+// CHECK: vcvtps2ph $171, %zmm14, %ymm11 {%k6} {z}
+// CHECK: encoding: [0x62,0x53,0x7d,0xce,0x1d,0xf3,0xab]
+ vcvtps2ph $0xab, %zmm14, %ymm11 {%k6} {z}
+
+// CHECK: vcvtps2ph $171, {sae}, %zmm14, %ymm11
+// CHECK: encoding: [0x62,0x53,0x7d,0x18,0x1d,0xf3,0xab]
+ vcvtps2ph $0xab,{sae}, %zmm14, %ymm11
+
+// CHECK: vcvtps2ph $123, %zmm14, %ymm11
+// CHECK: encoding: [0x62,0x53,0x7d,0x48,0x1d,0xf3,0x7b]
+ vcvtps2ph $0x7b, %zmm14, %ymm11
+
+// CHECK: vcvtps2ph $123, {sae}, %zmm14, %ymm11
+// CHECK: encoding: [0x62,0x53,0x7d,0x18,0x1d,0xf3,0x7b]
+ vcvtps2ph $0x7b,{sae}, %zmm14, %ymm11
+
+// CHECK: vcvtps2ph $171, %zmm19, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x19,0xab]
+ vcvtps2ph $0xab, %zmm19, (%rcx)
+
+// CHECK: vcvtps2ph $171, %zmm19, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0xe3,0x7d,0x4e,0x1d,0x19,0xab]
+ vcvtps2ph $0xab, %zmm19, (%rcx) {%k6}
+
+// CHECK: vcvtps2ph $123, %zmm19, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x19,0x7b]
+ vcvtps2ph $0x7b, %zmm19, (%rcx)
+
+// CHECK: vcvtps2ph $123, %zmm19, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x1d,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcvtps2ph $0x7b, %zmm19, 291(%rax,%r14,8)
+
+// CHECK: vcvtps2ph $123, %zmm19, 4064(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x5a,0x7f,0x7b]
+ vcvtps2ph $0x7b, %zmm19, 4064(%rdx)
+
+// CHECK: vcvtps2ph $123, %zmm19, 4096(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x9a,0x00,0x10,0x00,0x00,0x7b]
+ vcvtps2ph $0x7b, %zmm19, 4096(%rdx)
+
+// CHECK: vcvtps2ph $123, %zmm19, -4096(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x5a,0x80,0x7b]
+ vcvtps2ph $0x7b, %zmm19, -4096(%rdx)
+
+// CHECK: vcvtps2ph $123, %zmm19, -4128(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+ vcvtps2ph $0x7b, %zmm19, -4128(%rdx)
+
+// CHECK: vmovq %rax, %xmm29
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x6e,0xe8]
+ vmovq %rax, %xmm29
+
+// CHECK: vmovq %r8, %xmm29
+// CHECK: encoding: [0x62,0x41,0xfd,0x08,0x6e,0xe8]
+ vmovq %r8, %xmm29
+
+// CHECK: vmovq (%rcx), %xmm29
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0x29]
+ vmovq (%rcx), %xmm29
+
+// CHECK: vmovq 291(%rax,%r14,8), %xmm29
+// CHECK: encoding: [0x62,0x21,0xfe,0x08,0x7e,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vmovq 291(%rax,%r14,8), %xmm29
+
+// CHECK: vmovq 1016(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0x6a,0x7f]
+ vmovq 1016(%rdx), %xmm29
+
+// CHECK: vmovq 1024(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0xaa,0x00,0x04,0x00,0x00]
+ vmovq 1024(%rdx), %xmm29
+
+// CHECK: vmovq -1024(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0x6a,0x80]
+ vmovq -1024(%rdx), %xmm29
+
+// CHECK: vmovq -1032(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0xaa,0xf8,0xfb,0xff,0xff]
+ vmovq -1032(%rdx), %xmm29
+
+// CHECK: vmovq %xmm17, (%rcx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x09]
+ vmovq %xmm17, (%rcx)
+
+// CHECK: vmovq %xmm17, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0xd6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vmovq %xmm17, 291(%rax,%r14,8)
+
+// CHECK: vmovq %xmm17, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x4a,0x7f]
+ vmovq %xmm17, 1016(%rdx)
+
+// CHECK: vmovq %xmm17, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x8a,0x00,0x04,0x00,0x00]
+ vmovq %xmm17, 1024(%rdx)
+
+// CHECK: vmovq %xmm17, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x4a,0x80]
+ vmovq %xmm17, -1024(%rdx)
+
+// CHECK: vmovq %xmm17, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x8a,0xf8,0xfb,0xff,0xff]
+ vmovq %xmm17, -1032(%rdx)
+
+// CHECK: vmovq %xmm3, %xmm24
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0xc3]
+ vmovq %xmm3, %xmm24
+
+// CHECK: vmovq (%rcx), %xmm24
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0x01]
+ vmovq (%rcx), %xmm24
+
+// CHECK: vmovq 291(%rax,%r14,8), %xmm24
+// CHECK: encoding: [0x62,0x21,0xfe,0x08,0x7e,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vmovq 291(%rax,%r14,8), %xmm24
+
+// CHECK: vmovq 1016(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0x42,0x7f]
+ vmovq 1016(%rdx), %xmm24
+
+// CHECK: vmovq 1024(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0x82,0x00,0x04,0x00,0x00]
+ vmovq 1024(%rdx), %xmm24
+
+// CHECK: vmovq -1024(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0x42,0x80]
+ vmovq -1024(%rdx), %xmm24
+
+// CHECK: vmovq -1032(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0x82,0xf8,0xfb,0xff,0xff]
+ vmovq -1032(%rdx), %xmm24
+
+// CHECK: vmovq %xmm19, (%rcx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x19]
+ vmovq %xmm19, (%rcx)
+
+// CHECK: vmovq %xmm19, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0xd6,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vmovq %xmm19, 291(%rax,%r14,8)
+
+// CHECK: vmovq %xmm19, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x5a,0x7f]
+ vmovq %xmm19, 1016(%rdx)
+
+// CHECK: vmovq %xmm19, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x9a,0x00,0x04,0x00,0x00]
+ vmovq %xmm19, 1024(%rdx)
+
+// CHECK: vmovq %xmm19, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x5a,0x80]
+ vmovq %xmm19, -1024(%rdx)
+
+// CHECK: vmovq %xmm19, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x9a,0xf8,0xfb,0xff,0xff]
+ vmovq %xmm19, -1032(%rdx)
+
+// CHECK: vmovq %xmm27, %rax
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x7e,0xd8]
+ vmovq %xmm27, %rax
+
+// CHECK: vmovq %xmm27, %rax
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x7e,0xd8]
+ vmovq %xmm27, %rax
+
+// CHECK: vmovq %xmm27, %r8
+// CHECK: encoding: [0x62,0x41,0xfd,0x08,0x7e,0xd8]
+ vmovq %xmm27, %r8
+
+// CHECK: vmovq %xmm27, %r8
+// CHECK: encoding: [0x62,0x41,0xfd,0x08,0x7e,0xd8]
+ vmovq %xmm27, %r8
+
+// CHECK: vmovq %xmm22, %rax
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x7e,0xf0]
+ vmovq %xmm22, %rax
+
+// CHECK: vmovq %xmm22, %rax
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x7e,0xf0]
+ vmovq %xmm22, %rax
+
+// CHECK: vmovq %xmm22, %r8
+// CHECK: encoding: [0x62,0xc1,0xfd,0x08,0x7e,0xf0]
+ vmovq %xmm22, %r8
+
+// CHECK: vmovq %xmm22, %r8
+// CHECK: encoding: [0x62,0xc1,0xfd,0x08,0x7e,0xf0]
+ vmovq %xmm22, %r8
+
+// CHECK: vmovq %xmm29, %xmm29
+// CHECK: encoding: [0x62,0x01,0xfe,0x08,0x7e,0xed]
+ vmovq %xmm29, %xmm29
+
+// CHECK: vmovq %xmm25, %xmm8
+// CHECK: encoding: [0x62,0x11,0xfe,0x08,0x7e,0xc1]
+ vmovq %xmm25, %xmm8
+
+// CHECK: vmovq %xmm29, %rax
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x7e,0xe8]
+ vmovq %xmm29, %rax
+
+// CHECK: vmovq %xmm29, %rax
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x7e,0xe8]
+ vmovq %xmm29, %rax
+
+// CHECK: vmovq %xmm29, %r8
+// CHECK: encoding: [0x62,0x41,0xfd,0x08,0x7e,0xe8]
+ vmovq %xmm29, %r8
+
+// CHECK: vmovq %xmm29, %r8
+// CHECK: encoding: [0x62,0x41,0xfd,0x08,0x7e,0xe8]
+ vmovq %xmm29, %r8
+
+// CHECK: vmovq %xmm20, %rax
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x7e,0xe0]
+ vmovq %xmm20, %rax
+
+// CHECK: vmovq %xmm20, %rax
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x7e,0xe0]
+ vmovq %xmm20, %rax
+
+// CHECK: vmovq %xmm20, %r8
+// CHECK: encoding: [0x62,0xc1,0xfd,0x08,0x7e,0xe0]
+ vmovq %xmm20, %r8
+
+// CHECK: vmovq %xmm20, %r8
+// CHECK: encoding: [0x62,0xc1,0xfd,0x08,0x7e,0xe0]
+ vmovq %xmm20, %r8
+
+// CHECK: vmovq %xmm14, %xmm25
+// CHECK: encoding: [0x62,0x41,0xfe,0x08,0x7e,0xce]
+ vmovq %xmm14, %xmm25
+
+// CHECK: vmovq %xmm24, %xmm12
+// CHECK: encoding: [0x62,0x11,0xfe,0x08,0x7e,0xe0]
+ vmovq %xmm24, %xmm12
+
+// CHECK: vmovd %xmm5, %eax
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xe8]
+ vmovd %xmm5, %eax
+
+// CHECK: vmovd %xmm5, %eax
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xe8]
+ vmovd %xmm5, %eax
+
+// CHECK: vmovd %xmm5, %ebp
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xed]
+ vmovd %xmm5, %ebp
+
+// CHECK: vmovd %xmm5, %ebp
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xed]
+ vmovd %xmm5, %ebp
+
+// CHECK: vmovd %xmm5, %r13d
+// CHECK: encoding: [0xc4,0xc1,0x79,0x7e,0xed]
+ vmovd %xmm5, %r13d
+
+// CHECK: vmovd %xmm5, %r13d
+// CHECK: encoding: [0xc4,0xc1,0x79,0x7e,0xed]
+ vmovd %xmm5, %r13d
+
+// CHECK: vmovd %xmm28, %eax
+// CHECK: encoding: [0x62,0x61,0x7d,0x08,0x7e,0xe0]
+ vmovd %xmm28, %eax
+
+// CHECK: vmovd %xmm28, %eax
+// CHECK: encoding: [0x62,0x61,0x7d,0x08,0x7e,0xe0]
+ vmovd %xmm28, %eax
+
+// CHECK: vmovd %xmm28, %ebp
+// CHECK: encoding: [0x62,0x61,0x7d,0x08,0x7e,0xe5]
+ vmovd %xmm28, %ebp
+
+// CHECK: vmovd %xmm28, %ebp
+// CHECK: encoding: [0x62,0x61,0x7d,0x08,0x7e,0xe5]
+ vmovd %xmm28, %ebp
+
+// CHECK: vmovd %xmm28, %r13d
+// CHECK: encoding: [0x62,0x41,0x7d,0x08,0x7e,0xe5]
+ vmovd %xmm28, %r13d
+
+// CHECK: vmovd %xmm28, %r13d
+// CHECK: encoding: [0x62,0x41,0x7d,0x08,0x7e,0xe5]
+ vmovd %xmm28, %r13d
+
+// CHECK: vmovd %xmm14, %eax
+// CHECK: encoding: [0xc5,0x79,0x7e,0xf0]
+ vmovd %xmm14, %eax
+
+// CHECK: vmovd %xmm14, %eax
+// CHECK: encoding: [0xc5,0x79,0x7e,0xf0]
+ vmovd %xmm14, %eax
+
+// CHECK: vmovd %xmm14, %ebp
+// CHECK: encoding: [0xc5,0x79,0x7e,0xf5]
+ vmovd %xmm14, %ebp
+
+// CHECK: vmovd %xmm14, %ebp
+// CHECK: encoding: [0xc5,0x79,0x7e,0xf5]
+ vmovd %xmm14, %ebp
+
+// CHECK: vmovd %xmm14, %r13d
+// CHECK: encoding: [0xc4,0x41,0x79,0x7e,0xf5]
+ vmovd %xmm14, %r13d
+
+// CHECK: vmovd %xmm14, %r13d
+// CHECK: encoding: [0xc4,0x41,0x79,0x7e,0xf5]
+ vmovd %xmm14, %r13d
+
+// CHECK: vmovd %xmm1, %eax
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8]
+ vmovd %xmm1, %eax
+
+// CHECK: vmovd %xmm1, %eax
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8]
+ vmovd %xmm1, %eax
+
+// CHECK: vmovd %xmm1, %ebp
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xcd]
+ vmovd %xmm1, %ebp
+
+// CHECK: vmovd %xmm1, %ebp
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xcd]
+ vmovd %xmm1, %ebp
+
+// CHECK: vmovd %xmm1, %r13d
+// CHECK: encoding: [0xc4,0xc1,0x79,0x7e,0xcd]
+ vmovd %xmm1, %r13d
+
+// CHECK: vmovd %xmm1, %r13d
+// CHECK: encoding: [0xc4,0xc1,0x79,0x7e,0xcd]
+ vmovd %xmm1, %r13d
+
+// CHECK: vmovd %eax, %xmm26
+// CHECK: encoding: [0x62,0x61,0x7d,0x08,0x6e,0xd0]
+ vmovd %eax, %xmm26
+
+// CHECK: vmovd %ebp, %xmm26
+// CHECK: encoding: [0x62,0x61,0x7d,0x08,0x6e,0xd5]
+ vmovd %ebp, %xmm26
+
+// CHECK: vmovd %r13d, %xmm26
+// CHECK: encoding: [0x62,0x41,0x7d,0x08,0x6e,0xd5]
+ vmovd %r13d, %xmm26
+
+// CHECK: vmovd (%rcx), %xmm26
+// CHECK: encoding: [0x62,0x61,0x7d,0x08,0x6e,0x11]
+ vmovd (%rcx), %xmm26
+
+// CHECK: vmovd 291(%rax,%r14,8), %xmm26
+// CHECK: encoding: [0x62,0x21,0x7d,0x08,0x6e,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vmovd 291(%rax,%r14,8), %xmm26
+
+// CHECK: vmovd 508(%rdx), %xmm26
+// CHECK: encoding: [0x62,0x61,0x7d,0x08,0x6e,0x52,0x7f]
+ vmovd 508(%rdx), %xmm26
+
+// CHECK: vmovd 512(%rdx), %xmm26
+// CHECK: encoding: [0x62,0x61,0x7d,0x08,0x6e,0x92,0x00,0x02,0x00,0x00]
+ vmovd 512(%rdx), %xmm26
+
+// CHECK: vmovd -512(%rdx), %xmm26
+// CHECK: encoding: [0x62,0x61,0x7d,0x08,0x6e,0x52,0x80]
+ vmovd -512(%rdx), %xmm26
+
+// CHECK: vmovd -516(%rdx), %xmm26
+// CHECK: encoding: [0x62,0x61,0x7d,0x08,0x6e,0x92,0xfc,0xfd,0xff,0xff]
+ vmovd -516(%rdx), %xmm26
+
+// CHECK: vmovd %xmm5, (%rcx)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x29]
+ vmovd %xmm5, (%rcx)
+
+// CHECK: vmovd %xmm5, 291(%rax,%r14,8)
+// CHECK: encoding: [0xc4,0xa1,0x79,0x7e,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vmovd %xmm5, 291(%rax,%r14,8)
+
+// CHECK: vmovd %xmm5, 508(%rdx)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xaa,0xfc,0x01,0x00,0x00]
+ vmovd %xmm5, 508(%rdx)
+
+// CHECK: vmovd %xmm5, 512(%rdx)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xaa,0x00,0x02,0x00,0x00]
+ vmovd %xmm5, 512(%rdx)
+
+// CHECK: vmovd %xmm5, -512(%rdx)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xaa,0x00,0xfe,0xff,0xff]
+ vmovd %xmm5, -512(%rdx)
+
+// CHECK: vmovd %xmm5, -516(%rdx)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xaa,0xfc,0xfd,0xff,0xff]
+ vmovd %xmm5, -516(%rdx)
+
+// CHECK: vmovshdup %zmm27, %zmm16
+// CHECK: encoding: [0x62,0x81,0x7e,0x48,0x16,0xc3]
+ vmovshdup %zmm27, %zmm16
+
+// CHECK: vmovshdup %zmm27, %zmm16 {%k4}
+// CHECK: encoding: [0x62,0x81,0x7e,0x4c,0x16,0xc3]
+ vmovshdup %zmm27, %zmm16 {%k4}
+
+// CHECK: vmovshdup %zmm27, %zmm16 {%k4} {z}
+// CHECK: encoding: [0x62,0x81,0x7e,0xcc,0x16,0xc3]
+ vmovshdup %zmm27, %zmm16 {%k4} {z}
+
+// CHECK: vmovshdup (%rcx), %zmm16
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x01]
+ vmovshdup (%rcx), %zmm16
+
+// CHECK: vmovshdup 291(%rax,%r14,8), %zmm16
+// CHECK: encoding: [0x62,0xa1,0x7e,0x48,0x16,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vmovshdup 291(%rax,%r14,8), %zmm16
+
+// CHECK: vmovshdup 8128(%rdx), %zmm16
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x42,0x7f]
+ vmovshdup 8128(%rdx), %zmm16
+
+// CHECK: vmovshdup 8192(%rdx), %zmm16
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x82,0x00,0x20,0x00,0x00]
+ vmovshdup 8192(%rdx), %zmm16
+
+// CHECK: vmovshdup -8192(%rdx), %zmm16
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x42,0x80]
+ vmovshdup -8192(%rdx), %zmm16
+
+// CHECK: vmovshdup -8256(%rdx), %zmm16
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x82,0xc0,0xdf,0xff,0xff]
+ vmovshdup -8256(%rdx), %zmm16
+
+// CHECK: vmovsldup %zmm14, %zmm13
+// CHECK: encoding: [0x62,0x51,0x7e,0x48,0x12,0xee]
+ vmovsldup %zmm14, %zmm13
+
+// CHECK: vmovsldup %zmm14, %zmm13 {%k6}
+// CHECK: encoding: [0x62,0x51,0x7e,0x4e,0x12,0xee]
+ vmovsldup %zmm14, %zmm13 {%k6}
+
+// CHECK: vmovsldup %zmm14, %zmm13 {%k6} {z}
+// CHECK: encoding: [0x62,0x51,0x7e,0xce,0x12,0xee]
+ vmovsldup %zmm14, %zmm13 {%k6} {z}
+
+// CHECK: vmovsldup (%rcx), %zmm13
+// CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0x29]
+ vmovsldup (%rcx), %zmm13
+
+// CHECK: vmovsldup 291(%rax,%r14,8), %zmm13
+// CHECK: encoding: [0x62,0x31,0x7e,0x48,0x12,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vmovsldup 291(%rax,%r14,8), %zmm13
+
+// CHECK: vmovsldup 8128(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0x6a,0x7f]
+ vmovsldup 8128(%rdx), %zmm13
+
+// CHECK: vmovsldup 8192(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0xaa,0x00,0x20,0x00,0x00]
+ vmovsldup 8192(%rdx), %zmm13
+
+// CHECK: vmovsldup -8192(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0x6a,0x80]
+ vmovsldup -8192(%rdx), %zmm13
+
+// CHECK: vmovsldup -8256(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0xaa,0xc0,0xdf,0xff,0xff]
+ vmovsldup -8256(%rdx), %zmm13
+
+// CHECK: vmovlps (%rcx), %xmm20, %xmm7
+// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x39]
+ vmovlps (%rcx), %xmm20, %xmm7
+
+// CHECK: vmovlps 291(%rax,%r14,8), %xmm20, %xmm7
+// CHECK: encoding: [0x62,0xb1,0x5c,0x00,0x12,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vmovlps 291(%rax,%r14,8), %xmm20, %xmm7
+
+// CHECK: vmovlps 1016(%rdx), %xmm20, %xmm7
+// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x7a,0x7f]
+ vmovlps 1016(%rdx), %xmm20, %xmm7
+
+// CHECK: vmovlps 1024(%rdx), %xmm20, %xmm7
+// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0xba,0x00,0x04,0x00,0x00]
+ vmovlps 1024(%rdx), %xmm20, %xmm7
+
+// CHECK: vmovlps -1024(%rdx), %xmm20, %xmm7
+// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x7a,0x80]
+ vmovlps -1024(%rdx), %xmm20, %xmm7
+
+// CHECK: vmovlps -1032(%rdx), %xmm20, %xmm7
+// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0xba,0xf8,0xfb,0xff,0xff]
+ vmovlps -1032(%rdx), %xmm20, %xmm7
+
+// CHECK: vmovlps %xmm27, (%rcx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x19]
+ vmovlps %xmm27, (%rcx)
+
+// CHECK: vmovlps %xmm27, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x21,0x7c,0x08,0x13,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vmovlps %xmm27, 291(%rax,%r14,8)
+
+// CHECK: vmovlps %xmm27, 1016(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x5a,0x7f]
+ vmovlps %xmm27, 1016(%rdx)
+
+// CHECK: vmovlps %xmm27, 1024(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x9a,0x00,0x04,0x00,0x00]
+ vmovlps %xmm27, 1024(%rdx)
+
+// CHECK: vmovlps %xmm27, -1024(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x5a,0x80]
+ vmovlps %xmm27, -1024(%rdx)
+
+// CHECK: vmovlps %xmm27, -1032(%rdx)
+// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x13,0x9a,0xf8,0xfb,0xff,0xff]
+ vmovlps %xmm27, -1032(%rdx)
+
+// CHECK: vmovlpd (%rcx), %xmm6, %xmm29
+// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0x29]
+ vmovlpd (%rcx), %xmm6, %xmm29
+
+// CHECK: vmovlpd 291(%rax,%r14,8), %xmm6, %xmm29
+// CHECK: encoding: [0x62,0x21,0xcd,0x08,0x12,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vmovlpd 291(%rax,%r14,8), %xmm6, %xmm29
+
+// CHECK: vmovlpd 1016(%rdx), %xmm6, %xmm29
+// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0x6a,0x7f]
+ vmovlpd 1016(%rdx), %xmm6, %xmm29
+
+// CHECK: vmovlpd 1024(%rdx), %xmm6, %xmm29
+// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0xaa,0x00,0x04,0x00,0x00]
+ vmovlpd 1024(%rdx), %xmm6, %xmm29
+
+// CHECK: vmovlpd -1024(%rdx), %xmm6, %xmm29
+// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0x6a,0x80]
+ vmovlpd -1024(%rdx), %xmm6, %xmm29
+
+// CHECK: vmovlpd -1032(%rdx), %xmm6, %xmm29
+// CHECK: encoding: [0x62,0x61,0xcd,0x08,0x12,0xaa,0xf8,0xfb,0xff,0xff]
+ vmovlpd -1032(%rdx), %xmm6, %xmm29
+
+// CHECK: vmovlpd %xmm25, (%rcx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x09]
+ vmovlpd %xmm25, (%rcx)
+
+// CHECK: vmovlpd %xmm25, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x13,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vmovlpd %xmm25, 291(%rax,%r14,8)
+
+// CHECK: vmovlpd %xmm25, 1016(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x4a,0x7f]
+ vmovlpd %xmm25, 1016(%rdx)
+
+// CHECK: vmovlpd %xmm25, 1024(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x8a,0x00,0x04,0x00,0x00]
+ vmovlpd %xmm25, 1024(%rdx)
+
+// CHECK: vmovlpd %xmm25, -1024(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x4a,0x80]
+ vmovlpd %xmm25, -1024(%rdx)
+
+// CHECK: vmovlpd %xmm25, -1032(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x8a,0xf8,0xfb,0xff,0xff]
+ vmovlpd %xmm25, -1032(%rdx)
+
+// CHECK: vmovhps (%rcx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0x21]
+ vmovhps (%rcx), %xmm17, %xmm20
+
+// CHECK: vmovhps 291(%rax,%r14,8), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xa1,0x74,0x00,0x16,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vmovhps 291(%rax,%r14,8), %xmm17, %xmm20
+
+// CHECK: vmovhps 1016(%rdx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0x62,0x7f]
+ vmovhps 1016(%rdx), %xmm17, %xmm20
+
+// CHECK: vmovhps 1024(%rdx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0xa2,0x00,0x04,0x00,0x00]
+ vmovhps 1024(%rdx), %xmm17, %xmm20
+
+// CHECK: vmovhps -1024(%rdx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0x62,0x80]
+ vmovhps -1024(%rdx), %xmm17, %xmm20
+
+// CHECK: vmovhps -1032(%rdx), %xmm17, %xmm20
+// CHECK: encoding: [0x62,0xe1,0x74,0x00,0x16,0xa2,0xf8,0xfb,0xff,0xff]
+ vmovhps -1032(%rdx), %xmm17, %xmm20
+
+// CHECK: vmovhps %xmm18, (%rcx)
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x11]
+ vmovhps %xmm18, (%rcx)
+
+// CHECK: vmovhps %xmm18, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa1,0x7c,0x08,0x17,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vmovhps %xmm18, 291(%rax,%r14,8)
+
+// CHECK: vmovhps %xmm18, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x52,0x7f]
+ vmovhps %xmm18, 1016(%rdx)
+
+// CHECK: vmovhps %xmm18, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x92,0x00,0x04,0x00,0x00]
+ vmovhps %xmm18, 1024(%rdx)
+
+// CHECK: vmovhps %xmm18, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x52,0x80]
+ vmovhps %xmm18, -1024(%rdx)
+
+// CHECK: vmovhps %xmm18, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x17,0x92,0xf8,0xfb,0xff,0xff]
+ vmovhps %xmm18, -1032(%rdx)
+
+// CHECK: vmovhpd (%rcx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x19]
+ vmovhpd (%rcx), %xmm28, %xmm19
+
+// CHECK: vmovhpd 291(%rax,%r14,8), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xa1,0x9d,0x00,0x16,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vmovhpd 291(%rax,%r14,8), %xmm28, %xmm19
+
+// CHECK: vmovhpd 1016(%rdx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x5a,0x7f]
+ vmovhpd 1016(%rdx), %xmm28, %xmm19
+
+// CHECK: vmovhpd 1024(%rdx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x9a,0x00,0x04,0x00,0x00]
+ vmovhpd 1024(%rdx), %xmm28, %xmm19
+
+// CHECK: vmovhpd -1024(%rdx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x5a,0x80]
+ vmovhpd -1024(%rdx), %xmm28, %xmm19
+
+// CHECK: vmovhpd -1032(%rdx), %xmm28, %xmm19
+// CHECK: encoding: [0x62,0xe1,0x9d,0x00,0x16,0x9a,0xf8,0xfb,0xff,0xff]
+ vmovhpd -1032(%rdx), %xmm28, %xmm19
+
+// CHECK: vmovhpd %xmm25, (%rcx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x09]
+ vmovhpd %xmm25, (%rcx)
+
+// CHECK: vmovhpd %xmm25, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x17,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vmovhpd %xmm25, 291(%rax,%r14,8)
+
+// CHECK: vmovhpd %xmm25, 1016(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x4a,0x7f]
+ vmovhpd %xmm25, 1016(%rdx)
+
+// CHECK: vmovhpd %xmm25, 1024(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x8a,0x00,0x04,0x00,0x00]
+ vmovhpd %xmm25, 1024(%rdx)
+
+// CHECK: vmovhpd %xmm25, -1024(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x4a,0x80]
+ vmovhpd %xmm25, -1024(%rdx)
+
+// CHECK: vmovhpd %xmm25, -1032(%rdx)
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x8a,0xf8,0xfb,0xff,0xff]
+ vmovhpd %xmm25, -1032(%rdx)
+
+// CHECK: vmovddup %zmm29, %zmm5
+// CHECK: encoding: [0x62,0x91,0xff,0x48,0x12,0xed]
+ vmovddup %zmm29, %zmm5
+
+// CHECK: vmovddup %zmm29, %zmm5 {%k4}
+// CHECK: encoding: [0x62,0x91,0xff,0x4c,0x12,0xed]
+ vmovddup %zmm29, %zmm5 {%k4}
+
+// CHECK: vmovddup %zmm29, %zmm5 {%k4} {z}
+// CHECK: encoding: [0x62,0x91,0xff,0xcc,0x12,0xed]
+ vmovddup %zmm29, %zmm5 {%k4} {z}
+
+// CHECK: vmovddup (%rcx), %zmm5
+// CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0x29]
+ vmovddup (%rcx), %zmm5
+
+// CHECK: vmovddup 291(%rax,%r14,8), %zmm5
+// CHECK: encoding: [0x62,0xb1,0xff,0x48,0x12,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vmovddup 291(%rax,%r14,8), %zmm5
+
+// CHECK: vmovddup 8128(%rdx), %zmm5
+// CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0x6a,0x7f]
+ vmovddup 8128(%rdx), %zmm5
+
+// CHECK: vmovddup 8192(%rdx), %zmm5
+// CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0xaa,0x00,0x20,0x00,0x00]
+ vmovddup 8192(%rdx), %zmm5
+
+// CHECK: vmovddup -8192(%rdx), %zmm5
+// CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0x6a,0x80]
+ vmovddup -8192(%rdx), %zmm5
+
+// CHECK: vmovddup -8256(%rdx), %zmm5
+// CHECK: encoding: [0x62,0xf1,0xff,0x48,0x12,0xaa,0xc0,0xdf,0xff,0xff]
+ vmovddup -8256(%rdx), %zmm5
+
+// CHECK: vmovsd.s %xmm15, %xmm22, %xmm21
+// CHECK: encoding: [0x62,0x31,0xcf,0x00,0x11,0xfd]
+ vmovsd.s %xmm15, %xmm22, %xmm21
+
+// CHECK: vmovsd.s %xmm15, %xmm22, %xmm21 {%k7}
+// CHECK: encoding: [0x62,0x31,0xcf,0x07,0x11,0xfd]
+ vmovsd.s %xmm15, %xmm22, %xmm21 {%k7}
+
+// CHECK: vmovsd.s %xmm15, %xmm22, %xmm21 {%k7} {z}
+// CHECK: encoding: [0x62,0x31,0xcf,0x87,0x11,0xfd]
+ vmovsd.s %xmm15, %xmm22, %xmm21 {%k7} {z}
+
+// CHECK: vmovsd.s %xmm8, %xmm13, %xmm23
+// CHECK: encoding: [0x62,0x31,0x97,0x08,0x11,0xc7]
+ vmovsd.s %xmm8, %xmm13, %xmm23
+
+// CHECK: vmovsd.s %xmm8, %xmm13, %xmm3 {%k5}
+// CHECK: encoding: [0x62,0x71,0x97,0x0d,0x11,0xc3]
+ vmovsd.s %xmm8, %xmm13, %xmm3 {%k5}
+
+// CHECK: vmovsd.s %xmm8, %xmm13, %xmm3 {%k5} {z}
+// CHECK: encoding: [0x62,0x71,0x97,0x8d,0x11,0xc3]
+ vmovsd.s %xmm8, %xmm13, %xmm3 {%k5} {z}
+
+// CHECK: vmovsd.s %xmm4, %xmm15, %xmm24
+// CHECK: encoding: [0x62,0x91,0x87,0x08,0x11,0xe0]
+ vmovsd.s %xmm4, %xmm15, %xmm24
+
+// CHECK: vmovsd.s %xmm4, %xmm15, %xmm4 {%k6}
+// CHECK: encoding: [0x62,0xf1,0x87,0x0e,0x11,0xe4]
+ vmovsd.s %xmm4, %xmm15, %xmm4 {%k6}
+
+// CHECK: vmovsd.s %xmm4, %xmm15, %xmm4 {%k6} {z}
+// CHECK: encoding: [0x62,0xf1,0x87,0x8e,0x11,0xe4]
+ vmovsd.s %xmm4, %xmm15, %xmm4 {%k6} {z}
+
+// CHECK: vmovsd.s %xmm14, %xmm2, %xmm20
+// CHECK: encoding: [0x62,0x31,0xef,0x08,0x11,0xf4]
+ vmovsd.s %xmm14, %xmm2, %xmm20
+
+// CHECK: vmovsd.s %xmm14, %xmm2, %xmm20 {%k7}
+// CHECK: encoding: [0x62,0x31,0xef,0x0f,0x11,0xf4]
+ vmovsd.s %xmm14, %xmm2, %xmm20 {%k7}
+
+// CHECK: vmovsd.s %xmm14, %xmm2, %xmm20 {%k7} {z}
+// CHECK: encoding: [0x62,0x31,0xef,0x8f,0x11,0xf4]
+ vmovsd.s %xmm14, %xmm2, %xmm20 {%k7} {z}
+
+// CHECK: vmovss.s %xmm2, %xmm27, %xmm17
+// CHECK: encoding: [0x62,0xb1,0x26,0x00,0x11,0xd1]
+ vmovss.s %xmm2, %xmm27, %xmm17
+
+// CHECK: vmovss.s %xmm2, %xmm27, %xmm17 {%k2}
+// CHECK: encoding: [0x62,0xb1,0x26,0x02,0x11,0xd1]
+ vmovss.s %xmm2, %xmm27, %xmm17 {%k2}
+
+// CHECK: vmovss.s %xmm2, %xmm27, %xmm17 {%k2} {z}
+// CHECK: encoding: [0x62,0xb1,0x26,0x82,0x11,0xd1]
+ vmovss.s %xmm2, %xmm27, %xmm17 {%k2} {z}
+
+// CHECK: vmovss.s %xmm23, %xmm19, %xmm10
+// CHECK: encoding: [0x62,0xc1,0x66,0x00,0x11,0xfa]
+ vmovss.s %xmm23, %xmm19, %xmm10
+
+// CHECK: vmovss.s %xmm23, %xmm19, %xmm10 {%k3}
+// CHECK: encoding: [0x62,0xc1,0x66,0x03,0x11,0xfa]
+ vmovss.s %xmm23, %xmm19, %xmm10 {%k3}
+
+// CHECK: vmovss.s %xmm23, %xmm19, %xmm10 {%k3} {z}
+// CHECK: encoding: [0x62,0xc1,0x66,0x83,0x11,0xfa]
+ vmovss.s %xmm23, %xmm19, %xmm10 {%k3} {z}
+
+// CHECK: vmovss.s %xmm19, %xmm11, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x26,0x08,0x11,0xdd]
+ vmovss.s %xmm19, %xmm11, %xmm21
+
+// CHECK: vmovss.s %xmm19, %xmm11, %xmm21 {%k3}
+// CHECK: encoding: [0x62,0xa1,0x26,0x0b,0x11,0xdd]
+ vmovss.s %xmm19, %xmm11, %xmm21 {%k3}
+
+// CHECK: vmovss.s %xmm19, %xmm11, %xmm21 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0x26,0x8b,0x11,0xdd]
+ vmovss.s %xmm19, %xmm11, %xmm21 {%k3} {z}
+
+// CHECK: vmovss.s %xmm24, %xmm27, %xmm15
+// CHECK: encoding: [0x62,0x41,0x26,0x00,0x11,0xc7]
+ vmovss.s %xmm24, %xmm27, %xmm15
+
+// CHECK: vmovss.s %xmm24, %xmm27, %xmm15 {%k2}
+// CHECK: encoding: [0x62,0x41,0x26,0x02,0x11,0xc7]
+ vmovss.s %xmm24, %xmm27, %xmm15 {%k2}
+
+// CHECK: vmovss.s %xmm24, %xmm27, %xmm15 {%k2} {z}
+// CHECK: encoding: [0x62,0x41,0x26,0x82,0x11,0xc7]
+ vmovss.s %xmm24, %xmm27, %xmm15 {%k2} {z}
+
+// CHECK: vmovapd.s %zmm29, %zmm13
+// CHECK: encoding: [0x62,0x41,0xfd,0x48,0x29,0xed]
+ vmovapd.s %zmm29, %zmm13
+
+// CHECK: vmovapd.s %zmm29, %zmm13 {%k4}
+// CHECK: encoding: [0x62,0x41,0xfd,0x4c,0x29,0xed]
+ vmovapd.s %zmm29, %zmm13 {%k4}
+
+// CHECK: vmovapd.s %zmm29, %zmm13 {%k4} {z}
+// CHECK: encoding: [0x62,0x41,0xfd,0xcc,0x29,0xed]
+ vmovapd.s %zmm29, %zmm13 {%k4} {z}
+
+// CHECK: vmovapd.s %zmm1, %zmm17
+// CHECK: encoding: [0x62,0xb1,0xfd,0x48,0x29,0xc9]
+ vmovapd.s %zmm1, %zmm17
+
+// CHECK: vmovapd.s %zmm1, %zmm17 {%k5}
+// CHECK: encoding: [0x62,0xb1,0xfd,0x4d,0x29,0xc9]
+ vmovapd.s %zmm1, %zmm17 {%k5}
+
+// CHECK: vmovapd.s %zmm1, %zmm17 {%k5} {z}
+// CHECK: encoding: [0x62,0xb1,0xfd,0xcd,0x29,0xc9]
+ vmovapd.s %zmm1, %zmm17 {%k5} {z}
+
+// CHECK: vmovapd.s %zmm16, %zmm26
+// CHECK: encoding: [0x62,0x81,0xfd,0x48,0x29,0xc2]
+ vmovapd.s %zmm16, %zmm26
+
+// CHECK: vmovapd.s %zmm16, %zmm26 {%k1}
+// CHECK: encoding: [0x62,0x81,0xfd,0x49,0x29,0xc2]
+ vmovapd.s %zmm16, %zmm26 {%k1}
+
+// CHECK: vmovapd.s %zmm16, %zmm26 {%k1} {z}
+// CHECK: encoding: [0x62,0x81,0xfd,0xc9,0x29,0xc2]
+ vmovapd.s %zmm16, %zmm26 {%k1} {z}
+
+// CHECK: vmovapd.s %zmm7, %zmm4
+// CHECK: encoding: [0x62,0xf1,0xfd,0x48,0x29,0xfc]
+ vmovapd.s %zmm7, %zmm4
+
+// CHECK: vmovapd.s %zmm7, %zmm4 {%k5}
+// CHECK: encoding: [0x62,0xf1,0xfd,0x4d,0x29,0xfc]
+ vmovapd.s %zmm7, %zmm4 {%k5}
+
+// CHECK: vmovapd.s %zmm7, %zmm4 {%k5} {z}
+// CHECK: encoding: [0x62,0xf1,0xfd,0xcd,0x29,0xfc]
+ vmovapd.s %zmm7, %zmm4 {%k5} {z}
+
+// CHECK: vmovaps.s %zmm6, %zmm2
+// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x29,0xf2]
+ vmovaps.s %zmm6, %zmm2
+
+// CHECK: vmovaps.s %zmm6, %zmm2 {%k7}
+// CHECK: encoding: [0x62,0xf1,0x7c,0x4f,0x29,0xf2]
+ vmovaps.s %zmm6, %zmm2 {%k7}
+
+// CHECK: vmovaps.s %zmm6, %zmm2 {%k7} {z}
+// CHECK: encoding: [0x62,0xf1,0x7c,0xcf,0x29,0xf2]
+ vmovaps.s %zmm6, %zmm2 {%k7} {z}
+
+// CHECK: vmovaps.s %zmm2, %zmm8
+// CHECK: encoding: [0x62,0xd1,0x7c,0x48,0x29,0xd0]
+ vmovaps.s %zmm2, %zmm8
+
+// CHECK: vmovaps.s %zmm2, %zmm8 {%k5}
+// CHECK: encoding: [0x62,0xd1,0x7c,0x4d,0x29,0xd0]
+ vmovaps.s %zmm2, %zmm8 {%k5}
+
+// CHECK: vmovaps.s %zmm2, %zmm8 {%k5} {z}
+// CHECK: encoding: [0x62,0xd1,0x7c,0xcd,0x29,0xd0]
+ vmovaps.s %zmm2, %zmm8 {%k5} {z}
+
+// CHECK: vmovaps.s %zmm1, %zmm21
+// CHECK: encoding: [0x62,0xb1,0x7c,0x48,0x29,0xcd]
+ vmovaps.s %zmm1, %zmm21
+
+// CHECK: vmovaps.s %zmm1, %zmm21 {%k5}
+// CHECK: encoding: [0x62,0xb1,0x7c,0x4d,0x29,0xcd]
+ vmovaps.s %zmm1, %zmm21 {%k5}
+
+// CHECK: vmovaps.s %zmm1, %zmm21 {%k5} {z}
+// CHECK: encoding: [0x62,0xb1,0x7c,0xcd,0x29,0xcd]
+ vmovaps.s %zmm1, %zmm21 {%k5} {z}
+
+// CHECK: vmovaps.s %zmm12, %zmm30
+// CHECK: encoding: [0x62,0x11,0x7c,0x48,0x29,0xe6]
+ vmovaps.s %zmm12, %zmm30
+
+// CHECK: vmovaps.s %zmm12, %zmm30 {%k3}
+// CHECK: encoding: [0x62,0x11,0x7c,0x4b,0x29,0xe6]
+ vmovaps.s %zmm12, %zmm30 {%k3}
+
+// CHECK: vmovaps.s %zmm12, %zmm30 {%k3} {z}
+// CHECK: encoding: [0x62,0x11,0x7c,0xcb,0x29,0xe6]
+ vmovaps.s %zmm12, %zmm30 {%k3} {z}
+
+// CHECK: vmovdqa32.s %zmm17, %zmm4
+// CHECK: encoding: [0x62,0xe1,0x7d,0x48,0x7f,0xcc]
+ vmovdqa32.s %zmm17, %zmm4
+
+// CHECK: vmovdqa32.s %zmm17, %zmm4 {%k4}
+// CHECK: encoding: [0x62,0xe1,0x7d,0x4c,0x7f,0xcc]
+ vmovdqa32.s %zmm17, %zmm4 {%k4}
+
+// CHECK: vmovdqa32.s %zmm17, %zmm4 {%k4} {z}
+// CHECK: encoding: [0x62,0xe1,0x7d,0xcc,0x7f,0xcc]
+ vmovdqa32.s %zmm17, %zmm4 {%k4} {z}
+
+// CHECK: vmovdqa32.s %zmm1, %zmm18
+// CHECK: encoding: [0x62,0xb1,0x7d,0x48,0x7f,0xca]
+ vmovdqa32.s %zmm1, %zmm18
+
+// CHECK: vmovdqa32.s %zmm1, %zmm18 {%k1}
+// CHECK: encoding: [0x62,0xb1,0x7d,0x49,0x7f,0xca]
+ vmovdqa32.s %zmm1, %zmm18 {%k1}
+
+// CHECK: vmovdqa32.s %zmm1, %zmm18 {%k1} {z}
+// CHECK: encoding: [0x62,0xb1,0x7d,0xc9,0x7f,0xca]
+ vmovdqa32.s %zmm1, %zmm18 {%k1} {z}
+
+// CHECK: vmovdqa32.s %zmm28, %zmm14
+// CHECK: encoding: [0x62,0x41,0x7d,0x48,0x7f,0xe6]
+ vmovdqa32.s %zmm28, %zmm14
+
+// CHECK: vmovdqa32.s %zmm28, %zmm14 {%k5}
+// CHECK: encoding: [0x62,0x41,0x7d,0x4d,0x7f,0xe6]
+ vmovdqa32.s %zmm28, %zmm14 {%k5}
+
+// CHECK: vmovdqa32.s %zmm28, %zmm14 {%k5} {z}
+// CHECK: encoding: [0x62,0x41,0x7d,0xcd,0x7f,0xe6]
+ vmovdqa32.s %zmm28, %zmm14 {%k5} {z}
+
+// CHECK: vmovdqa32.s %zmm24, %zmm10
+// CHECK: encoding: [0x62,0x41,0x7d,0x48,0x7f,0xc2]
+ vmovdqa32.s %zmm24, %zmm10
+
+// CHECK: vmovdqa32.s %zmm24, %zmm10 {%k1}
+// CHECK: encoding: [0x62,0x41,0x7d,0x49,0x7f,0xc2]
+ vmovdqa32.s %zmm24, %zmm10 {%k1}
+
+// CHECK: vmovdqa32.s %zmm24, %zmm10 {%k1} {z}
+// CHECK: encoding: [0x62,0x41,0x7d,0xc9,0x7f,0xc2]
+ vmovdqa32.s %zmm24, %zmm10 {%k1} {z}
+
+// CHECK: vmovdqa64.s %zmm25, %zmm18
+// CHECK: encoding: [0x62,0x21,0xfd,0x48,0x7f,0xca]
+ vmovdqa64.s %zmm25, %zmm18
+
+// CHECK: vmovdqa64.s %zmm25, %zmm18 {%k7}
+// CHECK: encoding: [0x62,0x21,0xfd,0x4f,0x7f,0xca]
+ vmovdqa64.s %zmm25, %zmm18 {%k7}
+
+// CHECK: vmovdqa64.s %zmm25, %zmm18 {%k7} {z}
+// CHECK: encoding: [0x62,0x21,0xfd,0xcf,0x7f,0xca]
+ vmovdqa64.s %zmm25, %zmm18 {%k7} {z}
+
+// CHECK: vmovdqa64.s %zmm18, %zmm21
+// CHECK: encoding: [0x62,0xa1,0xfd,0x48,0x7f,0xd5]
+ vmovdqa64.s %zmm18, %zmm21
+
+// CHECK: vmovdqa64.s %zmm18, %zmm21 {%k6}
+// CHECK: encoding: [0x62,0xa1,0xfd,0x4e,0x7f,0xd5]
+ vmovdqa64.s %zmm18, %zmm21 {%k6}
+
+// CHECK: vmovdqa64.s %zmm18, %zmm21 {%k6} {z}
+// CHECK: encoding: [0x62,0xa1,0xfd,0xce,0x7f,0xd5]
+ vmovdqa64.s %zmm18, %zmm21 {%k6} {z}
+
+// CHECK: vmovdqa64.s %zmm14, %zmm4
+// CHECK: encoding: [0x62,0x71,0xfd,0x48,0x7f,0xf4]
+ vmovdqa64.s %zmm14, %zmm4
+
+// CHECK: vmovdqa64.s %zmm14, %zmm4 {%k3}
+// CHECK: encoding: [0x62,0x71,0xfd,0x4b,0x7f,0xf4]
+ vmovdqa64.s %zmm14, %zmm4 {%k3}
+
+// CHECK: vmovdqa64.s %zmm14, %zmm4 {%k3} {z}
+// CHECK: encoding: [0x62,0x71,0xfd,0xcb,0x7f,0xf4]
+ vmovdqa64.s %zmm14, %zmm4 {%k3} {z}
+
+// CHECK: vmovdqa64.s %zmm7, %zmm21
+// CHECK: encoding: [0x62,0xb1,0xfd,0x48,0x7f,0xfd]
+ vmovdqa64.s %zmm7, %zmm21
+
+// CHECK: vmovdqa64.s %zmm7, %zmm21 {%k7}
+// CHECK: encoding: [0x62,0xb1,0xfd,0x4f,0x7f,0xfd]
+ vmovdqa64.s %zmm7, %zmm21 {%k7}
+
+// CHECK: vmovdqa64.s %zmm7, %zmm21 {%k7} {z}
+// CHECK: encoding: [0x62,0xb1,0xfd,0xcf,0x7f,0xfd]
+ vmovdqa64.s %zmm7, %zmm21 {%k7} {z}
+
+// CHECK: vmovdqu32.s %zmm19, %zmm10
+// CHECK: encoding: [0x62,0xc1,0x7e,0x48,0x7f,0xda]
+ vmovdqu32.s %zmm19, %zmm10
+
+// CHECK: vmovdqu32.s %zmm19, %zmm10 {%k4}
+// CHECK: encoding: [0x62,0xc1,0x7e,0x4c,0x7f,0xda]
+ vmovdqu32.s %zmm19, %zmm10 {%k4}
+
+// CHECK: vmovdqu32.s %zmm19, %zmm10 {%k4} {z}
+// CHECK: encoding: [0x62,0xc1,0x7e,0xcc,0x7f,0xda]
+ vmovdqu32.s %zmm19, %zmm10 {%k4} {z}
+
+// CHECK: vmovdqu32.s %zmm16, %zmm18
+// CHECK: encoding: [0x62,0xa1,0x7e,0x48,0x7f,0xc2]
+ vmovdqu32.s %zmm16, %zmm18
+
+// CHECK: vmovdqu32.s %zmm16, %zmm18 {%k3}
+// CHECK: encoding: [0x62,0xa1,0x7e,0x4b,0x7f,0xc2]
+ vmovdqu32.s %zmm16, %zmm18 {%k3}
+
+// CHECK: vmovdqu32.s %zmm16, %zmm18 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0x7e,0xcb,0x7f,0xc2]
+ vmovdqu32.s %zmm16, %zmm18 {%k3} {z}
+
+// CHECK: vmovdqu32.s %zmm9, %zmm7
+// CHECK: encoding: [0x62,0x71,0x7e,0x48,0x7f,0xcf]
+ vmovdqu32.s %zmm9, %zmm7
+
+// CHECK: vmovdqu32.s %zmm9, %zmm7 {%k4}
+// CHECK: encoding: [0x62,0x71,0x7e,0x4c,0x7f,0xcf]
+ vmovdqu32.s %zmm9, %zmm7 {%k4}
+
+// CHECK: vmovdqu32.s %zmm9, %zmm7 {%k4} {z}
+// CHECK: encoding: [0x62,0x71,0x7e,0xcc,0x7f,0xcf]
+ vmovdqu32.s %zmm9, %zmm7 {%k4} {z}
+
+// CHECK: vmovdqu32.s %zmm9, %zmm13
+// CHECK: encoding: [0x62,0x51,0x7e,0x48,0x7f,0xcd]
+ vmovdqu32.s %zmm9, %zmm13
+
+// CHECK: vmovdqu32.s %zmm9, %zmm13 {%k4}
+// CHECK: encoding: [0x62,0x51,0x7e,0x4c,0x7f,0xcd]
+ vmovdqu32.s %zmm9, %zmm13 {%k4}
+
+// CHECK: vmovdqu32.s %zmm9, %zmm13 {%k4} {z}
+// CHECK: encoding: [0x62,0x51,0x7e,0xcc,0x7f,0xcd]
+ vmovdqu32.s %zmm9, %zmm13 {%k4} {z}
+
+// CHECK: vmovdqu64.s %zmm27, %zmm21
+// CHECK: encoding: [0x62,0x21,0xfe,0x48,0x7f,0xdd]
+ vmovdqu64.s %zmm27, %zmm21
+
+// CHECK: vmovdqu64.s %zmm27, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0x21,0xfe,0x4a,0x7f,0xdd]
+ vmovdqu64.s %zmm27, %zmm21 {%k2}
+
+// CHECK: vmovdqu64.s %zmm27, %zmm21 {%k2} {z}
+// CHECK: encoding: [0x62,0x21,0xfe,0xca,0x7f,0xdd]
+ vmovdqu64.s %zmm27, %zmm21 {%k2} {z}
+
+// CHECK: vmovdqu64.s %zmm25, %zmm12
+// CHECK: encoding: [0x62,0x41,0xfe,0x48,0x7f,0xcc]
+ vmovdqu64.s %zmm25, %zmm12
+
+// CHECK: vmovdqu64.s %zmm25, %zmm12 {%k3}
+// CHECK: encoding: [0x62,0x41,0xfe,0x4b,0x7f,0xcc]
+ vmovdqu64.s %zmm25, %zmm12 {%k3}
+
+// CHECK: vmovdqu64.s %zmm25, %zmm12 {%k3} {z}
+// CHECK: encoding: [0x62,0x41,0xfe,0xcb,0x7f,0xcc]
+ vmovdqu64.s %zmm25, %zmm12 {%k3} {z}
+
+// CHECK: vmovdqu64.s %zmm7, %zmm19
+// CHECK: encoding: [0x62,0xb1,0xfe,0x48,0x7f,0xfb]
+ vmovdqu64.s %zmm7, %zmm19
+
+// CHECK: vmovdqu64.s %zmm7, %zmm19 {%k7}
+// CHECK: encoding: [0x62,0xb1,0xfe,0x4f,0x7f,0xfb]
+ vmovdqu64.s %zmm7, %zmm19 {%k7}
+
+// CHECK: vmovdqu64.s %zmm7, %zmm19 {%k7} {z}
+// CHECK: encoding: [0x62,0xb1,0xfe,0xcf,0x7f,0xfb]
+ vmovdqu64.s %zmm7, %zmm19 {%k7} {z}
+
+// CHECK: vmovdqu64.s %zmm24, %zmm17
+// CHECK: encoding: [0x62,0x21,0xfe,0x48,0x7f,0xc1]
+ vmovdqu64.s %zmm24, %zmm17
+
+// CHECK: vmovdqu64.s %zmm24, %zmm17 {%k1}
+// CHECK: encoding: [0x62,0x21,0xfe,0x49,0x7f,0xc1]
+ vmovdqu64.s %zmm24, %zmm17 {%k1}
+
+// CHECK: vmovdqu64.s %zmm24, %zmm17 {%k1} {z}
+// CHECK: encoding: [0x62,0x21,0xfe,0xc9,0x7f,0xc1]
+ vmovdqu64.s %zmm24, %zmm17 {%k1} {z}
+
+// CHECK: vmovupd.s %zmm17, %zmm3
+// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x11,0xcb]
+ vmovupd.s %zmm17, %zmm3
+
+// CHECK: vmovupd.s %zmm17, %zmm3 {%k6}
+// CHECK: encoding: [0x62,0xe1,0xfd,0x4e,0x11,0xcb]
+ vmovupd.s %zmm17, %zmm3 {%k6}
+
+// CHECK: vmovupd.s %zmm17, %zmm3 {%k6} {z}
+// CHECK: encoding: [0x62,0xe1,0xfd,0xce,0x11,0xcb]
+ vmovupd.s %zmm17, %zmm3 {%k6} {z}
+
+// CHECK: vmovupd.s %zmm26, %zmm7
+// CHECK: encoding: [0x62,0x61,0xfd,0x48,0x11,0xd7]
+ vmovupd.s %zmm26, %zmm7
+
+// CHECK: vmovupd.s %zmm26, %zmm7 {%k5}
+// CHECK: encoding: [0x62,0x61,0xfd,0x4d,0x11,0xd7]
+ vmovupd.s %zmm26, %zmm7 {%k5}
+
+// CHECK: vmovupd.s %zmm26, %zmm7 {%k5} {z}
+// CHECK: encoding: [0x62,0x61,0xfd,0xcd,0x11,0xd7]
+ vmovupd.s %zmm26, %zmm7 {%k5} {z}
+
+// CHECK: vmovupd.s %zmm3, %zmm15
+// CHECK: encoding: [0x62,0xd1,0xfd,0x48,0x11,0xdf]
+ vmovupd.s %zmm3, %zmm15
+
+// CHECK: vmovupd.s %zmm3, %zmm15 {%k1}
+// CHECK: encoding: [0x62,0xd1,0xfd,0x49,0x11,0xdf]
+ vmovupd.s %zmm3, %zmm15 {%k1}
+
+// CHECK: vmovupd.s %zmm3, %zmm15 {%k1} {z}
+// CHECK: encoding: [0x62,0xd1,0xfd,0xc9,0x11,0xdf]
+ vmovupd.s %zmm3, %zmm15 {%k1} {z}
+
+// CHECK: vmovupd.s %zmm8, %zmm19
+// CHECK: encoding: [0x62,0x31,0xfd,0x48,0x11,0xc3]
+ vmovupd.s %zmm8, %zmm19
+
+// CHECK: vmovupd.s %zmm8, %zmm19 {%k1}
+// CHECK: encoding: [0x62,0x31,0xfd,0x49,0x11,0xc3]
+ vmovupd.s %zmm8, %zmm19 {%k1}
+
+// CHECK: vmovupd.s %zmm8, %zmm19 {%k1} {z}
+// CHECK: encoding: [0x62,0x31,0xfd,0xc9,0x11,0xc3]
+ vmovupd.s %zmm8, %zmm19 {%k1} {z}
+
+// CHECK: vmovups.s %zmm27, %zmm16
+// CHECK: encoding: [0x62,0x21,0x7c,0x48,0x11,0xd8]
+ vmovups.s %zmm27, %zmm16
+
+// CHECK: vmovups.s %zmm27, %zmm16 {%k1}
+// CHECK: encoding: [0x62,0x21,0x7c,0x49,0x11,0xd8]
+ vmovups.s %zmm27, %zmm16 {%k1}
+
+// CHECK: vmovups.s %zmm27, %zmm16 {%k1} {z}
+// CHECK: encoding: [0x62,0x21,0x7c,0xc9,0x11,0xd8]
+ vmovups.s %zmm27, %zmm16 {%k1} {z}
+
+// CHECK: vmovups.s %zmm5, %zmm19
+// CHECK: encoding: [0x62,0xb1,0x7c,0x48,0x11,0xeb]
+ vmovups.s %zmm5, %zmm19
+
+// CHECK: vmovups.s %zmm5, %zmm19 {%k2}
+// CHECK: encoding: [0x62,0xb1,0x7c,0x4a,0x11,0xeb]
+ vmovups.s %zmm5, %zmm19 {%k2}
+
+// CHECK: vmovups.s %zmm5, %zmm19 {%k2} {z}
+// CHECK: encoding: [0x62,0xb1,0x7c,0xca,0x11,0xeb]
+ vmovups.s %zmm5, %zmm19 {%k2} {z}
+
+// CHECK: vmovups.s %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x11,0xe9]
+ vmovups.s %zmm5, %zmm1
+
+// CHECK: vmovups.s %zmm5, %zmm1 {%k3}
+// CHECK: encoding: [0x62,0xf1,0x7c,0x4b,0x11,0xe9]
+ vmovups.s %zmm5, %zmm1 {%k3}
+
+// CHECK: vmovups.s %zmm5, %zmm1 {%k3} {z}
+// CHECK: encoding: [0x62,0xf1,0x7c,0xcb,0x11,0xe9]
+ vmovups.s %zmm5, %zmm1 {%k3} {z}
+
+// CHECK: vmovups.s %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x81,0x7c,0x48,0x11,0xe3]
+ vmovups.s %zmm20, %zmm27
+
+// CHECK: vmovups.s %zmm20, %zmm27 {%k6}
+// CHECK: encoding: [0x62,0x81,0x7c,0x4e,0x11,0xe3]
+ vmovups.s %zmm20, %zmm27 {%k6}
+
+// CHECK: vmovups.s %zmm20, %zmm27 {%k6} {z}
+// CHECK: encoding: [0x62,0x81,0x7c,0xce,0x11,0xe3]
+ vmovups.s %zmm20, %zmm27 {%k6} {z}
+
+// CHECK: vmovq.s %xmm9, %xmm29
+// CHECK: encoding: [0x62,0x11,0xfd,0x08,0xd6,0xcd]
+ vmovq.s %xmm9, %xmm29
+
+// CHECK: vmovq.s %xmm5, %xmm18
+// CHECK: encoding: [0x62,0xb1,0xfd,0x08,0xd6,0xea]
+ vmovq.s %xmm5, %xmm18
+
+// CHECK: vmovq.s %xmm14, %xmm25
+// CHECK: encoding: [0x62,0x11,0xfd,0x08,0xd6,0xf1]
+ vmovq.s %xmm14, %xmm25
+
+// CHECK: vmovq.s %xmm24, %xmm12
+// CHECK: encoding: [0x62,0x41,0xfd,0x08,0xd6,0xc4]
+ vmovq.s %xmm24, %xmm12
+
+// CHECK: vcomisd %xmm21, %xmm23
+// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0x2f,0xfd]
+ vcomisd %xmm21, %xmm23
+
+// CHECK: vcomisd {sae}, %xmm21, %xmm23
+// CHECK: encoding: [0x62,0xa1,0xfd,0x18,0x2f,0xfd]
+ vcomisd {sae}, %xmm21, %xmm23
+
+// CHECK: vcomisd (%rcx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x39]
+ vcomisd (%rcx), %xmm23
+
+// CHECK: vcomisd 291(%rax,%r14,8), %xmm23
+// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0x2f,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vcomisd 291(%rax,%r14,8), %xmm23
+
+// CHECK: vcomisd 1016(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x7a,0x7f]
+ vcomisd 1016(%rdx), %xmm23
+
+// CHECK: vcomisd 1024(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0xba,0x00,0x04,0x00,0x00]
+ vcomisd 1024(%rdx), %xmm23
+
+// CHECK: vcomisd -1024(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x7a,0x80]
+ vcomisd -1024(%rdx), %xmm23
+
+// CHECK: vcomisd -1032(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0xba,0xf8,0xfb,0xff,0xff]
+ vcomisd -1032(%rdx), %xmm23
+
+// CHECK: vcomiss %xmm28, %xmm14
+// CHECK: encoding: [0x62,0x11,0x7c,0x08,0x2f,0xf4]
+ vcomiss %xmm28, %xmm14
+
+// CHECK: vcomiss {sae}, %xmm28, %xmm14
+// CHECK: encoding: [0x62,0x11,0x7c,0x18,0x2f,0xf4]
+ vcomiss {sae}, %xmm28, %xmm14
+
+// CHECK: vcomiss (%rcx), %xmm14
+// CHECK: encoding: [0xc5,0x78,0x2f,0x31]
+ vcomiss (%rcx), %xmm14
+
+// CHECK: vcomiss 291(%rax,%r14,8), %xmm14
+// CHECK: encoding: [0xc4,0x21,0x78,0x2f,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vcomiss 291(%rax,%r14,8), %xmm14
+
+// CHECK: vcomiss 508(%rdx), %xmm14
+// CHECK: encoding: [0xc5,0x78,0x2f,0xb2,0xfc,0x01,0x00,0x00]
+ vcomiss 508(%rdx), %xmm14
+
+// CHECK: vcomiss 512(%rdx), %xmm14
+// CHECK: encoding: [0xc5,0x78,0x2f,0xb2,0x00,0x02,0x00,0x00]
+ vcomiss 512(%rdx), %xmm14
+
+// CHECK: vcomiss -512(%rdx), %xmm14
+// CHECK: encoding: [0xc5,0x78,0x2f,0xb2,0x00,0xfe,0xff,0xff]
+ vcomiss -512(%rdx), %xmm14
+
+// CHECK: vcomiss -516(%rdx), %xmm14
+// CHECK: encoding: [0xc5,0x78,0x2f,0xb2,0xfc,0xfd,0xff,0xff]
+ vcomiss -516(%rdx), %xmm14
+
+// CHECK: vucomisd %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xda]
+ vucomisd %xmm10, %xmm11
+
+// CHECK: vucomisd {sae}, %xmm10, %xmm11
+// CHECK: encoding: [0x62,0x51,0xfd,0x18,0x2e,0xda]
+ vucomisd {sae}, %xmm10, %xmm11
+
+// CHECK: vucomisd (%rcx), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x2e,0x19]
+ vucomisd (%rcx), %xmm11
+
+// CHECK: vucomisd 291(%rax,%r14,8), %xmm11
+// CHECK: encoding: [0xc4,0x21,0x79,0x2e,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vucomisd 291(%rax,%r14,8), %xmm11
+
+// CHECK: vucomisd 1016(%rdx), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x2e,0x9a,0xf8,0x03,0x00,0x00]
+ vucomisd 1016(%rdx), %xmm11
+
+// CHECK: vucomisd 1024(%rdx), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x2e,0x9a,0x00,0x04,0x00,0x00]
+ vucomisd 1024(%rdx), %xmm11
+
+// CHECK: vucomisd -1024(%rdx), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x2e,0x9a,0x00,0xfc,0xff,0xff]
+ vucomisd -1024(%rdx), %xmm11
+
+// CHECK: vucomisd -1032(%rdx), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x2e,0x9a,0xf8,0xfb,0xff,0xff]
+ vucomisd -1032(%rdx), %xmm11
+
+// CHECK: vucomiss %xmm11, %xmm22
+// CHECK: encoding: [0x62,0xc1,0x7c,0x08,0x2e,0xf3]
+ vucomiss %xmm11, %xmm22
+
+// CHECK: vucomiss {sae}, %xmm11, %xmm22
+// CHECK: encoding: [0x62,0xc1,0x7c,0x18,0x2e,0xf3]
+ vucomiss {sae}, %xmm11, %xmm22
+
+// CHECK: vucomiss (%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0x31]
+ vucomiss (%rcx), %xmm22
+
+// CHECK: vucomiss 291(%rax,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7c,0x08,0x2e,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vucomiss 291(%rax,%r14,8), %xmm22
+
+// CHECK: vucomiss 508(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0x72,0x7f]
+ vucomiss 508(%rdx), %xmm22
+
+// CHECK: vucomiss 512(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0xb2,0x00,0x02,0x00,0x00]
+ vucomiss 512(%rdx), %xmm22
+
+// CHECK: vucomiss -512(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0x72,0x80]
+ vucomiss -512(%rdx), %xmm22
+
+// CHECK: vucomiss -516(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0xb2,0xfc,0xfd,0xff,0xff]
+ vucomiss -516(%rdx), %xmm22
+// CHECK: vmovsd (%rcx), %xmm25 {%k3}
+// CHECK: encoding: [0x62,0x61,0xff,0x0b,0x10,0x09]
+ vmovsd (%rcx), %xmm25 {%k3}
+
+// CHECK: vmovsd (%rcx), %xmm25 {%k3} {z}
+// CHECK: encoding: [0x62,0x61,0xff,0x8b,0x10,0x09]
+ vmovsd (%rcx), %xmm25 {%k3} {z}
+
+// CHECK: vmovsd %xmm19, %xmm3, %xmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x21,0xe7,0x8b,0x10,0xdb]
+ vmovsd %xmm19, %xmm3, %xmm27 {%k3} {z}
+
+// CHECK: vmovss (%rcx), %xmm2 {%k4}
+// CHECK: encoding: [0x62,0xf1,0x7e,0x0c,0x10,0x11]
+ vmovss (%rcx), %xmm2 {%k4}
+
+// CHECK: vmovss (%rcx), %xmm2 {%k4} {z}
+// CHECK: encoding: [0x62,0xf1,0x7e,0x8c,0x10,0x11]
+ vmovss (%rcx), %xmm2 {%k4} {z}
+
+// CHECK: vmovss %xmm26, %xmm9, %xmm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x01,0x36,0x8c,0x10,0xe2]
+ vmovss %xmm26, %xmm9, %xmm28 {%k4} {z}
+
+// CHECK: vmovsd %xmm15, %xmm22, %xmm21 {%k7} {z}
+// CHECK: encoding: [0x62,0xc1,0xcf,0x87,0x10,0xef]
+ vmovsd %xmm15, %xmm22, %xmm21 {%k7} {z}
+
+// CHECK: vmovsd %xmm8, %xmm13, %xmm3 {%k5} {z}
+// CHECK: encoding: [0x62,0xd1,0x97,0x8d,0x10,0xd8]
+ vmovsd %xmm8, %xmm13, %xmm3 {%k5} {z}
+
+// CHECK: vmovss %xmm2, %xmm27, %xmm17 {%k2} {z}
+// CHECK: encoding: [0x62,0xe1,0x26,0x82,0x10,0xca]
+ vmovss %xmm2, %xmm27, %xmm17 {%k2} {z}
+
+// CHECK: vmovss %xmm23, %xmm19, %xmm10 {%k3} {z}
+// CHECK: encoding: [0x62,0x31,0x66,0x83,0x10,0xd7]
+ vmovss %xmm23, %xmm19, %xmm10 {%k3} {z}
+
+// CHECK: vmovsd %xmm4, %xmm15, %xmm4 {%k6} {z}
+// CHECK: encoding: [0x62,0xf1,0x87,0x8e,0x10,0xe4]
+ vmovsd %xmm4, %xmm15, %xmm4 {%k6} {z}
+
+// CHECK: vmovsd %xmm14, %xmm2, %xmm20 {%k7} {z}
+// CHECK: encoding: [0x62,0xc1,0xef,0x8f,0x10,0xe6]
+ vmovsd %xmm14, %xmm2, %xmm20 {%k7} {z}
+
+// CHECK: vmovss %xmm19, %xmm11, %xmm21 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0x26,0x8b,0x10,0xeb]
+ vmovss %xmm19, %xmm11, %xmm21 {%k3} {z}
+
+// CHECK: vmovss %xmm24, %xmm27, %xmm15 {%k2} {z}
+// CHECK: encoding: [0x62,0x11,0x26,0x82,0x10,0xf8]
+ vmovss %xmm24, %xmm27, %xmm15 {%k2} {z}
diff --git a/test/MC/X86/avx512vl-encoding.s b/test/MC/X86/avx512vl-encoding.s
index e1fc32848ccd..2e3eaf2aa4c7 100644
--- a/test/MC/X86/avx512vl-encoding.s
+++ b/test/MC/X86/avx512vl-encoding.s
@@ -860,6 +860,215 @@
// CHECK: encoding: [0x62,0xf2,0xbe,0x30,0x27,0xa2,0xf8,0xfb,0xff,0xff]
vptestnmq -1032(%rdx){1to4}, %ymm24, %k4
+// CHECK: vptestnmq %xmm19, %xmm28, %k2
+// CHECK: encoding: [0x62,0xb2,0x9e,0x00,0x27,0xd3]
+ vptestnmq %xmm19, %xmm28,%k2
+
+// CHECK: vptestnmq %xmm19, %xmm28, %k2 {%k1}
+// CHECK: encoding: [0x62,0xb2,0x9e,0x01,0x27,0xd3]
+ vptestnmq %xmm19, %xmm28,%k2 {%k1}
+
+// CHECK: vptestnmq (%rcx), %xmm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x00,0x27,0x11]
+ vptestnmq (%rcx), %xmm28,%k2
+
+// CHECK: vptestnmq 4660(%rax,%r14,8), %xmm28, %k2
+// CHECK: encoding: [0x62,0xb2,0x9e,0x00,0x27,0x94,0xf0,0x34,0x12,0x00,0x00]
+ vptestnmq 4660(%rax,%r14,8), %xmm28,%k2
+
+// CHECK: vptestnmq (%rcx){1to2}, %xmm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x10,0x27,0x11]
+ vptestnmq (%rcx){1to2}, %xmm28,%k2
+
+// CHECK: vptestnmq 2032(%rdx), %xmm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x00,0x27,0x52,0x7f]
+ vptestnmq 2032(%rdx), %xmm28,%k2
+
+// CHECK: vptestnmq 2048(%rdx), %xmm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x00,0x27,0x92,0x00,0x08,0x00,0x00]
+ vptestnmq 2048(%rdx), %xmm28,%k2
+
+// CHECK: vptestnmq -2048(%rdx), %xmm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x00,0x27,0x52,0x80]
+ vptestnmq -2048(%rdx), %xmm28,%k2
+
+// CHECK: vptestnmq -2064(%rdx), %xmm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x00,0x27,0x92,0xf0,0xf7,0xff,0xff]
+ vptestnmq -2064(%rdx), %xmm28,%k2
+
+// CHECK: vptestnmq 1016(%rdx){1to2}, %xmm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x10,0x27,0x52,0x7f]
+ vptestnmq 1016(%rdx){1to2}, %xmm28,%k2
+
+// CHECK: vptestnmq 1024(%rdx){1to2}, %xmm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x10,0x27,0x92,0x00,0x04,0x00,0x00]
+ vptestnmq 1024(%rdx){1to2}, %xmm28,%k2
+
+// CHECK: vptestnmq -1024(%rdx){1to2}, %xmm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x10,0x27,0x52,0x80]
+ vptestnmq -1024(%rdx){1to2}, %xmm28,%k2
+
+// CHECK: vptestnmq -1032(%rdx){1to2}, %xmm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x10,0x27,0x92,0xf8,0xfb,0xff,0xff]
+ vptestnmq -1032(%rdx){1to2}, %xmm28,%k2
+
+// CHECK: vptestnmq %ymm17, %ymm26, %k4
+// CHECK: encoding: [0x62,0xb2,0xae,0x20,0x27,0xe1]
+ vptestnmq %ymm17, %ymm26,%k4
+
+// CHECK: vptestnmq %ymm17, %ymm26, %k4 {%k1}
+// CHECK: encoding: [0x62,0xb2,0xae,0x21,0x27,0xe1]
+ vptestnmq %ymm17, %ymm26,%k4 {%k1}
+
+// CHECK: vptestnmq (%rcx), %ymm26, %k4
+// CHECK: encoding: [0x62,0xf2,0xae,0x20,0x27,0x21]
+ vptestnmq (%rcx), %ymm26,%k4
+
+// CHECK: vptestnmq 4660(%rax,%r14,8), %ymm26, %k4
+// CHECK: encoding: [0x62,0xb2,0xae,0x20,0x27,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vptestnmq 4660(%rax,%r14,8), %ymm26,%k4
+
+// CHECK: vptestnmq (%rcx){1to4}, %ymm26, %k4
+// CHECK: encoding: [0x62,0xf2,0xae,0x30,0x27,0x21]
+ vptestnmq (%rcx){1to4}, %ymm26,%k4
+
+// CHECK: vptestnmq 4064(%rdx), %ymm26, %k4
+// CHECK: encoding: [0x62,0xf2,0xae,0x20,0x27,0x62,0x7f]
+ vptestnmq 4064(%rdx), %ymm26,%k4
+
+// CHECK: vptestnmq 4096(%rdx), %ymm26, %k4
+// CHECK: encoding: [0x62,0xf2,0xae,0x20,0x27,0xa2,0x00,0x10,0x00,0x00]
+ vptestnmq 4096(%rdx), %ymm26,%k4
+
+// CHECK: vptestnmq -4096(%rdx), %ymm26, %k4
+// CHECK: encoding: [0x62,0xf2,0xae,0x20,0x27,0x62,0x80]
+ vptestnmq -4096(%rdx), %ymm26,%k4
+
+// CHECK: vptestnmq -4128(%rdx), %ymm26, %k4
+// CHECK: encoding: [0x62,0xf2,0xae,0x20,0x27,0xa2,0xe0,0xef,0xff,0xff]
+ vptestnmq -4128(%rdx), %ymm26,%k4
+
+// CHECK: vptestnmq 1016(%rdx){1to4}, %ymm26, %k4
+// CHECK: encoding: [0x62,0xf2,0xae,0x30,0x27,0x62,0x7f]
+ vptestnmq 1016(%rdx){1to4}, %ymm26,%k4
+
+// CHECK: vptestnmq 1024(%rdx){1to4}, %ymm26, %k4
+// CHECK: encoding: [0x62,0xf2,0xae,0x30,0x27,0xa2,0x00,0x04,0x00,0x00]
+ vptestnmq 1024(%rdx){1to4}, %ymm26,%k4
+
+// CHECK: vptestnmq -1024(%rdx){1to4}, %ymm26, %k4
+// CHECK: encoding: [0x62,0xf2,0xae,0x30,0x27,0x62,0x80]
+ vptestnmq -1024(%rdx){1to4}, %ymm26,%k4
+
+// CHECK: vptestnmq -1032(%rdx){1to4}, %ymm26, %k4
+// CHECK: encoding: [0x62,0xf2,0xae,0x30,0x27,0xa2,0xf8,0xfb,0xff,0xff]
+ vptestnmq -1032(%rdx){1to4}, %ymm26,%k4
+
+
+// CHECK: vptestnmd %xmm21, %xmm25, %k4
+// CHECK: encoding: [0x62,0xb2,0x36,0x00,0x27,0xe5]
+ vptestnmd %xmm21, %xmm25,%k4
+
+// CHECK: vptestnmd %xmm21, %xmm25, %k4 {%k5}
+// CHECK: encoding: [0x62,0xb2,0x36,0x05,0x27,0xe5]
+ vptestnmd %xmm21, %xmm25,%k4 {%k5}
+
+// CHECK: vptestnmd (%rcx), %xmm25, %k4
+// CHECK: encoding: [0x62,0xf2,0x36,0x00,0x27,0x21]
+ vptestnmd (%rcx), %xmm25,%k4
+
+// CHECK: vptestnmd 4660(%rax,%r14,8), %xmm25, %k4
+// CHECK: encoding: [0x62,0xb2,0x36,0x00,0x27,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vptestnmd 4660(%rax,%r14,8), %xmm25,%k4
+
+// CHECK: vptestnmd (%rcx){1to4}, %xmm25, %k4
+// CHECK: encoding: [0x62,0xf2,0x36,0x10,0x27,0x21]
+ vptestnmd (%rcx){1to4}, %xmm25,%k4
+
+// CHECK: vptestnmd 2032(%rdx), %xmm25, %k4
+// CHECK: encoding: [0x62,0xf2,0x36,0x00,0x27,0x62,0x7f]
+ vptestnmd 2032(%rdx), %xmm25,%k4
+
+// CHECK: vptestnmd 2048(%rdx), %xmm25, %k4
+// CHECK: encoding: [0x62,0xf2,0x36,0x00,0x27,0xa2,0x00,0x08,0x00,0x00]
+ vptestnmd 2048(%rdx), %xmm25,%k4
+
+// CHECK: vptestnmd -2048(%rdx), %xmm25, %k4
+// CHECK: encoding: [0x62,0xf2,0x36,0x00,0x27,0x62,0x80]
+ vptestnmd -2048(%rdx), %xmm25,%k4
+
+// CHECK: vptestnmd -2064(%rdx), %xmm25, %k4
+// CHECK: encoding: [0x62,0xf2,0x36,0x00,0x27,0xa2,0xf0,0xf7,0xff,0xff]
+ vptestnmd -2064(%rdx), %xmm25,%k4
+
+// CHECK: vptestnmd 508(%rdx){1to4}, %xmm25, %k4
+// CHECK: encoding: [0x62,0xf2,0x36,0x10,0x27,0x62,0x7f]
+ vptestnmd 508(%rdx){1to4}, %xmm25,%k4
+
+// CHECK: vptestnmd 512(%rdx){1to4}, %xmm25, %k4
+// CHECK: encoding: [0x62,0xf2,0x36,0x10,0x27,0xa2,0x00,0x02,0x00,0x00]
+ vptestnmd 512(%rdx){1to4}, %xmm25,%k4
+
+// CHECK: vptestnmd -512(%rdx){1to4}, %xmm25, %k4
+// CHECK: encoding: [0x62,0xf2,0x36,0x10,0x27,0x62,0x80]
+ vptestnmd -512(%rdx){1to4}, %xmm25,%k4
+
+// CHECK: vptestnmd -516(%rdx){1to4}, %xmm25, %k4
+// CHECK: encoding: [0x62,0xf2,0x36,0x10,0x27,0xa2,0xfc,0xfd,0xff,0xff]
+ vptestnmd -516(%rdx){1to4}, %xmm25,%k4
+
+// CHECK: vptestnmd %ymm26, %ymm29, %k4
+// CHECK: encoding: [0x62,0x92,0x16,0x20,0x27,0xe2]
+ vptestnmd %ymm26, %ymm29,%k4
+
+// CHECK: vptestnmd %ymm26, %ymm29, %k4 {%k4}
+// CHECK: encoding: [0x62,0x92,0x16,0x24,0x27,0xe2]
+ vptestnmd %ymm26, %ymm29,%k4 {%k4}
+
+// CHECK: vptestnmd (%rcx), %ymm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x16,0x20,0x27,0x21]
+ vptestnmd (%rcx), %ymm29,%k4
+
+// CHECK: vptestnmd 4660(%rax,%r14,8), %ymm29, %k4
+// CHECK: encoding: [0x62,0xb2,0x16,0x20,0x27,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vptestnmd 4660(%rax,%r14,8), %ymm29,%k4
+
+// CHECK: vptestnmd (%rcx){1to8}, %ymm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x16,0x30,0x27,0x21]
+ vptestnmd (%rcx){1to8}, %ymm29,%k4
+
+// CHECK: vptestnmd 4064(%rdx), %ymm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x16,0x20,0x27,0x62,0x7f]
+ vptestnmd 4064(%rdx), %ymm29,%k4
+
+// CHECK: vptestnmd 4096(%rdx), %ymm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x16,0x20,0x27,0xa2,0x00,0x10,0x00,0x00]
+ vptestnmd 4096(%rdx), %ymm29,%k4
+
+// CHECK: vptestnmd -4096(%rdx), %ymm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x16,0x20,0x27,0x62,0x80]
+ vptestnmd -4096(%rdx), %ymm29,%k4
+
+// CHECK: vptestnmd -4128(%rdx), %ymm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x16,0x20,0x27,0xa2,0xe0,0xef,0xff,0xff]
+ vptestnmd -4128(%rdx), %ymm29,%k4
+
+// CHECK: vptestnmd 508(%rdx){1to8}, %ymm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x16,0x30,0x27,0x62,0x7f]
+ vptestnmd 508(%rdx){1to8}, %ymm29,%k4
+
+// CHECK: vptestnmd 512(%rdx){1to8}, %ymm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x16,0x30,0x27,0xa2,0x00,0x02,0x00,0x00]
+ vptestnmd 512(%rdx){1to8}, %ymm29,%k4
+
+// CHECK: vptestnmd -512(%rdx){1to8}, %ymm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x16,0x30,0x27,0x62,0x80]
+ vptestnmd -512(%rdx){1to8}, %ymm29,%k4
+
+// CHECK: vptestnmd -516(%rdx){1to8}, %ymm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x16,0x30,0x27,0xa2,0xfc,0xfd,0xff,0xff]
+ vptestnmd -516(%rdx){1to8}, %ymm29,%k4
+
// CHECK: vpmovd2m %xmm27, %k3
// CHECK: encoding: [0x62,0x92,0x7e,0x08,0x39,0xdb]
vpmovd2m %xmm27, %k3
diff --git a/test/MC/X86/cfi_def_cfa-crash.s b/test/MC/X86/cfi_def_cfa-crash.s
index 9d22d6e281cf..74b41d9943a9 100644
--- a/test/MC/X86/cfi_def_cfa-crash.s
+++ b/test/MC/X86/cfi_def_cfa-crash.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin -filetype=obj %s -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin -filetype=obj %s -o - | llvm-readobj -sections | FileCheck %s
// We were trying to generate compact unwind info for assembly like this.
// The .cfi_def_cfa directive, however, throws a wrench into that and was
@@ -68,6 +68,22 @@ _foo:
ret
.cfi_endproc
-
-
-// CHECK: 'section_name', '__eh_frame\x00
+// CHECK: Section {
+// CHECK: Index: 1
+// CHECK: Name: __eh_frame (5F 5F 65 68 5F 66 72 61 6D 65 00 00 00 00 00 00)
+// CHECK: Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK: Address: 0x70
+// CHECK: Size: 0x40
+// CHECK: Offset: 480
+// CHECK: Alignment: 3
+// CHECK: RelocationOffset: 0x0
+// CHECK: RelocationCount: 0
+// CHECK: Type: 0xB
+// CHECK: Attributes [ (0x680000)
+// CHECK: LiveSupport (0x80000)
+// CHECK: NoTOC (0x400000)
+// CHECK: StripStaticSyms (0x200000)
+// CHECK: ]
+// CHECK: Reserved1: 0x0
+// CHECK: Reserved2: 0x0
+// CHECK: }
diff --git a/test/MC/X86/encoder-fail.s b/test/MC/X86/encoder-fail.s
new file mode 100644
index 000000000000..3e845fe7561b
--- /dev/null
+++ b/test/MC/X86/encoder-fail.s
@@ -0,0 +1,3 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown --show-encoding %s 2>&1 | FileCheck %s
+// CHECK: LLVM ERROR: Cannot encode high byte register in REX-prefixed instruction
+ movzx %dh, %rsi
diff --git a/test/MC/X86/expand-var.s b/test/MC/X86/expand-var.s
index 8d5529a9a469..d9b8bb9394a4 100644
--- a/test/MC/X86/expand-var.s
+++ b/test/MC/X86/expand-var.s
@@ -1,9 +1,13 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux < %s | llvm-readobj -r | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux < %s | llvm-readobj -r -t | FileCheck %s
// CHECK: Section {{.*}} .rela.text {
// CHECK-NEXT: 0x0 R_X86_64_32 d 0x0
// CHECK-NEXT: }
+// CHECK: Symbol {
+// CHECK: Name: d2
+// CHECK-NEXT: Value: 0x2A
+
a:
b = a
c = a
@@ -16,3 +20,4 @@ a2:
.weak b2
b2 = a2
c2 = b2 - a2
+ d2 = b2 - a2 + 42
diff --git a/test/MC/X86/i386-darwin-frame-register.ll b/test/MC/X86/i386-darwin-frame-register.ll
index c994a1efd203..12b1e1e5c548 100644
--- a/test/MC/X86/i386-darwin-frame-register.ll
+++ b/test/MC/X86/i386-darwin-frame-register.ll
@@ -29,7 +29,7 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 230514) (llvm/trunk 230518)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 230514) (llvm/trunk 230518)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "foo.c", directory: "/tmp")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/MC/X86/intel-syntax-2.s b/test/MC/X86/intel-syntax-2.s
index f7bdaf92dbb5..aead5766db4d 100644
--- a/test/MC/X86/intel-syntax-2.s
+++ b/test/MC/X86/intel-syntax-2.s
@@ -15,3 +15,17 @@ _test2:
.att_syntax prefix
movl $255, -4(%rsp)
// CHECK: movl $255, -4(%rsp)
+
+_test3:
+fadd
+// CHECK: faddp %st(1)
+fmul
+// CHECK: fmulp %st(1)
+fsub
+// CHECK: fsubp %st(1)
+fsubr
+// CHECK: fsubrp %st(1)
+fdiv
+// CHECK: fdivp %st(1)
+fdivr
+// CHECK: fdivrp %st(1)
diff --git a/test/MC/X86/intel-syntax-ambiguous.s b/test/MC/X86/intel-syntax-ambiguous.s
index fe1fe5023902..e90cca820043 100644
--- a/test/MC/X86/intel-syntax-ambiguous.s
+++ b/test/MC/X86/intel-syntax-ambiguous.s
@@ -45,3 +45,15 @@ add rax, 3
fadd "?half@?0??bar@@YAXXZ@4NA"
// CHECK: error: ambiguous operand size for instruction 'fadd'
+
+// Instruction line with PTR inside check that they don't accept register as memory.
+
+// CHECK: error: expected memory operand after 'ptr', found register operand instead
+// CHECK: andps xmm1, xmmword ptr xmm1
+andps xmm1, xmmword ptr xmm1
+// CHECK: error: expected memory operand after 'ptr', found register operand instead
+// CHECK: andps xmmword ptr xmm1, xmm1
+andps xmmword ptr xmm1, xmm1
+// CHECK: error: expected memory operand after 'ptr', found register operand instead
+// CHECK: mov dword ptr eax, ebx
+mov dword ptr eax, ebx
diff --git a/test/MC/X86/intel-syntax-avx512.s b/test/MC/X86/intel-syntax-avx512.s
index 86a1af8bc16f..c5ab7dde1106 100644
--- a/test/MC/X86/intel-syntax-avx512.s
+++ b/test/MC/X86/intel-syntax-avx512.s
@@ -256,3 +256,99 @@ vaddpd zmm1,zmm1,zmm2,{rz-sae}
// CHECK: vfixupimmsd xmm13 , xmm26, qword ptr [rdx - 1032], 123
// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
vfixupimmsd xmm13,xmm26,QWORD PTR [rdx-0x408],0x7b
+
+// CHECK: vcomisd xmm23, qword ptr [rcx]
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x39]
+ vcomisd xmm23, QWORD PTR [rcx]
+
+// CHECK: vcomiss xmm16, dword ptr [rcx]
+// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2f,0x01]
+ vcomiss xmm16, DWORD PTR [rcx]
+
+// CHECK: vmovss dword ptr [rcx] {k2}, xmm13
+// CHECK: encoding: [0x62,0x71,0x7e,0x0a,0x11,0x29]
+ vmovss dword ptr [rcx]{k2},xmm13
+
+// CHECK: vmovss dword ptr [rax + 8*r14 + 4660], xmm13
+// CHECK: encoding: [0xc4,0x21,0x7a,0x11,0xac,0xf0,0x34,0x12,0x00,0x00]
+ vmovss dword ptr [rax+r14*8+0x1234],xmm13
+
+// CHECK: vmovss dword ptr [rdx + 508], xmm13
+// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0xfc,0x01,0x00,0x00]
+ vmovss dword ptr [rdx+0x1fc],xmm13
+
+// CHECK: vmovss dword ptr [rdx + 512], xmm13
+// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0x00,0x02,0x00,0x00]
+ vmovss dword ptr [rdx+0x200],xmm13
+
+// CHECK: vmovss dword ptr [rdx - 512], xmm13
+// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0x00,0xfe,0xff,0xff]
+ vmovss dword ptr [rdx-0x200],xmm13
+
+// CHECK: vmovss dword ptr [rdx - 516], xmm13
+// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0xfc,0xfd,0xff,0xff]
+ vmovss dword ptr [rdx-0x204],xmm13
+
+// CHECK: vmovss dword ptr [rdx + 508], xmm5
+// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0xfc,0x01,0x00,0x00]
+ vmovss dword ptr [rdx+0x1fc],xmm5
+
+// CHECK: vmovss dword ptr [rdx + 512], xmm5
+// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0x00,0x02,0x00,0x00]
+ vmovss dword ptr [rdx+0x200],xmm5
+
+// CHECK: vmovss dword ptr [rdx - 512], xmm5
+// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0x00,0xfe,0xff,0xff]
+ vmovss dword ptr [rdx-0x200], xmm5
+
+// CHECK: vmovss dword ptr [rdx - 516], xmm5
+// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0xfc,0xfd,0xff,0xff]
+ vmovss dword ptr [rdx-0x204],xmm5
+
+// CHECK: vmovss dword ptr [rcx], xmm13
+// CHECK: encoding: [0xc5,0x7a,0x11,0x29]
+ vmovss dword ptr [rcx],xmm13
+
+// CHECK: vmovss xmm2, dword ptr [rcx]
+// CHECK: encoding: [0xc5,0xfa,0x10,0x11]
+ vmovss xmm2, dword ptr [rcx]
+
+// CHECK: vmovss xmm2 {k4}, dword ptr [rcx]
+// CHECK: encoding: [0x62,0xf1,0x7e,0x0c,0x10,0x11]
+ vmovss xmm2{k4}, dword ptr [rcx]
+
+// CHECK: vmovss xmm2 {k4} {z}, dword ptr [rcx]
+// CHECK: encoding: [0x62,0xf1,0x7e,0x8c,0x10,0x11]
+ vmovss xmm2{k4} {z}, dword ptr [rcx]
+
+// CHECK: vmovsd xmm25 , qword ptr [rcx]
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x09]
+ vmovsd xmm25, qword ptr [rcx]
+
+// CHECK: vmovsd xmm25 {k3}, qword ptr [rcx]
+// CHECK: encoding: [0x62,0x61,0xff,0x0b,0x10,0x09]
+ vmovsd xmm25{k3}, qword ptr [rcx]
+
+// CHECK: vmovsd xmm25 {k3} {z}, qword ptr [rcx]
+// CHECK: encoding: [0x62,0x61,0xff,0x8b,0x10,0x09]
+ vmovsd xmm25{k3} {z}, qword ptr [rcx]
+
+// CHECK: vmovsd xmm25 , qword ptr [rax + 8*r14 + 291]
+// CHECK: encoding: [0x62,0x21,0xff,0x08,0x10,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vmovsd xmm25, qword ptr [rax+r14*8+0x123]
+
+// CHECK: vmovsd xmm25 , qword ptr [rdx + 1016]
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x4a,0x7f]
+ vmovsd xmm25, qword ptr [rdx+0x3f8]
+
+// CHECK: vmovsd xmm25 , qword ptr [rdx + 1024]
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x8a,0x00,0x04,0x00,0x00]
+ vmovsd xmm25, qword ptr [rdx+0x400]
+
+// CHECK: vmovsd xmm25 , qword ptr [rdx - 1024]
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x4a,0x80]
+ vmovsd xmm25, qword ptr [rdx-0x400]
+
+// CHECK: vmovsd xmm25 , qword ptr [rdx - 1032]
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x8a,0xf8,0xfb,0xff,0xff]
+ vmovsd xmm25, qword ptr [rdx-0x408]
diff --git a/test/MC/X86/intel-syntax-print.ll b/test/MC/X86/intel-syntax-print.ll
new file mode 100644
index 000000000000..14ef46705be9
--- /dev/null
+++ b/test/MC/X86/intel-syntax-print.ll
@@ -0,0 +1,10 @@
+; RUN: llc -x86-asm-syntax=intel < %s | FileCheck %s -check-prefix=INTEL
+; RUN: llc -x86-asm-syntax=att < %s | FileCheck %s -check-prefix=ATT
+
+; INTEL: .intel_syntax noprefix
+; ATT-NOT: .intel_syntax noprefix
+target triple = "x86_64-unknown-unknown"
+define i32 @test() {
+entry:
+ ret i32 0
+}
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 6fde42bd898d..c7ec77eadfee 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -489,10 +489,12 @@ test [ECX], AL
// CHECK: fnstsw %ax
// CHECK: fnstsw %ax
// CHECK: fnstsw %ax
+// CHECK: fnstsw (%eax)
fnstsw
fnstsw AX
fnstsw EAX
fnstsw AL
+fnstsw WORD PTR [EAX]
// CHECK: faddp %st(1)
// CHECK: fmulp %st(1)
@@ -533,6 +535,20 @@ fsubrp ST(1)
fdivp ST(1)
fdivrp ST(1)
+
+// CHECK: faddp %st(1)
+// CHECK: fmulp %st(1)
+// CHECK: fsubrp %st(1)
+// CHECK: fsubp %st(1)
+// CHECK: fdivrp %st(1)
+// CHECK: fdivp %st(1)
+fadd
+fmul
+fsub
+fsubr
+fdiv
+fdivr
+
// CHECK: faddp %st(1)
// CHECK: fmulp %st(1)
// CHECK: fsubrp %st(1)
@@ -635,10 +651,12 @@ add byte ptr [rax], 1
// CHECK: addw $1, (%rax)
// CHECK: addb $1, (%rax)
+fstp tbyte ptr [rax]
fstp xword ptr [rax]
fstp qword ptr [rax]
fstp dword ptr [rax]
// CHECK: fstpt (%rax)
+// CHECK: fstpt (%rax)
// CHECK: fstpl (%rax)
// CHECK: fstps (%rax)
@@ -679,3 +697,57 @@ outsd
// CHECK: outsb (%rsi), %dx
// CHECK: outsw (%rsi), %dx
// CHECK: outsl (%rsi), %dx
+
+imul bx, 123
+imul ebx, 123
+imul rbx, 123
+// CHECK: imulw $123, %bx
+// CHECK: imull $123, %ebx
+// CHECK: imulq $123, %rbx
+
+repe cmpsb
+repz cmpsb
+repne cmpsb
+repnz cmpsb
+// CHECK: rep
+// CHECK: cmpsb %es:(%rdi), (%rsi)
+// CHECK: rep
+// CHECK: cmpsb %es:(%rdi), (%rsi)
+// CHECK: repne
+// CHECK: cmpsb %es:(%rdi), (%rsi)
+// CHECK: repne
+// CHECK: cmpsb %es:(%rdi), (%rsi)
+
+sal eax, 123
+// CHECK: shll $123, %eax
+
+psignw mm0, MMWORD PTR t2
+// CHECK: psignw t2, %mm0
+
+comisd xmm0, QWORD PTR [eax]
+comiss xmm0, DWORD PTR [eax]
+vcomisd xmm0, QWORD PTR [eax]
+vcomiss xmm0, DWORD PTR [eax]
+
+// CHECK: comisd (%eax), %xmm0
+// CHECK: comiss (%eax), %xmm0
+// CHECK: vcomisd (%eax), %xmm0
+// CHECK: vcomiss (%eax), %xmm0
+
+fbld tbyte ptr [eax]
+fbstp tbyte ptr [eax]
+// CHECK: fbld (%eax)
+// CHECK: fbstp (%eax)
+
+fcomip st, st(2)
+fucomip st, st(2)
+// CHECK: fcompi %st(2)
+// CHECK: fucompi %st(2)
+
+loopz _foo
+loopnz _foo
+// CHECK: loope _foo
+// CHECK: loopne _foo
+
+sidt fword ptr [eax]
+// CHECK: sidtq (%eax)
diff --git a/test/MC/X86/large-bss.s b/test/MC/X86/large-bss.s
new file mode 100644
index 000000000000..edb111e9092a
--- /dev/null
+++ b/test/MC/X86/large-bss.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple x86_64-pc-linux -filetype=obj %s -o - | llvm-readobj -s | FileCheck %s
+
+.bss
+.zero 0x10000000000000
+
+// CHECK: Name: .bss
+// CHECK-NEXT: Type: SHT_NOBITS
+// CHECK-NEXT: Flags [
+// CHECK-NEXT: SHF_ALLOC
+// CHECK-NEXT: SHF_WRITE
+// CHECK-NEXT: ]
+// CHECK-NEXT: Address: 0x0
+// CHECK-NEXT: Offset: 0x40
+// CHECK-NEXT: Size: 4503599627370496
diff --git a/test/MC/X86/macho-reloc-errors-x86.s b/test/MC/X86/macho-reloc-errors-x86.s
new file mode 100644
index 000000000000..4af202220073
--- /dev/null
+++ b/test/MC/X86/macho-reloc-errors-x86.s
@@ -0,0 +1,15 @@
+// RUN: not llvm-mc -triple=i686-apple-darwin -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+ .space 0x1000000
+ mov %eax, thing-thing2
+ mov %eax, defined-thing2
+ mov %eax, later-defined
+
+ .section __DATA,__tim
+defined:
+
+ .section __DATA,__tim2
+later:
+
+// CHECK-ERROR: 3:9: error: symbol 'thing' can not be undefined in a subtraction expression
+// CHECK-ERROR: 4:9: error: symbol 'thing2' can not be undefined in a subtraction expression
+// CHECK-ERROR: 5:9: error: Section too large, can't encode r_address (0x100000b) into 24 bits of scattered relocation entry.
diff --git a/test/MC/X86/macho-reloc-errors-x86_64.s b/test/MC/X86/macho-reloc-errors-x86_64.s
new file mode 100644
index 000000000000..05f77c495b24
--- /dev/null
+++ b/test/MC/X86/macho-reloc-errors-x86_64.s
@@ -0,0 +1,19 @@
+// RUN: not llvm-mc -triple=x86_64-apple-darwin -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+
+ mov %rax, thing
+ mov %rax, thing@GOT-thing2@GOT
+ mov %rax, (thing-thing2)(%rip)
+ mov %rax, thing-thing
+ mov %rax, thing-thing2
+ mov %rax, thing@PLT
+ jmp thing@PLT
+ mov %rax, thing@TLVP
+
+// CHECK-ERROR: 3:9: error: 32-bit absolute addressing is not supported in 64-bit mode
+// CHECK-ERROR: 4:9: error: unsupported relocation of modified symbol
+// CHECK-ERROR: 5:9: error: unsupported pc-relative relocation of difference
+// CHECK-ERROR: 6:9: error: unsupported relocation with identical base
+// CHECK-ERROR: 7:9: error: unsupported relocation with subtraction expression, symbol 'thing' can not be undefined in a subtraction expression
+// CHECK-ERROR: 8:9: error: unsupported symbol modifier in relocation
+// CHECK-ERROR: 9:9: error: unsupported symbol modifier in branch relocation
+// CHECK-ERROR: 10:9: error: TLVP symbol modifier should have been rip-rel
diff --git a/test/MC/X86/validate-inst-att.s b/test/MC/X86/validate-inst-att.s
index dec8bfdf6600..7ac851e76150 100644
--- a/test/MC/X86/validate-inst-att.s
+++ b/test/MC/X86/validate-inst-att.s
@@ -2,6 +2,21 @@
.text
int $65535
-# CHECK: error: interrupt vector must be in range [0-255]
+# CHECK: error: invalid operand for instruction
# CHECK: int $65535
# CHECK: ^
+
+ int $-129
+# CHECK: error: invalid operand for instruction
+# CHECK: int $-129
+# CHECK: ^
+
+ inb $65535, %al
+# CHECK: error: invalid operand for instruction
+# CHECK: inb $65535, %al
+# CHECK: ^
+
+ outb %al, $65535
+# CHECK: error: invalid operand for instruction
+# CHECK: outb %al, $65535
+# CHECK: ^
diff --git a/test/MC/X86/validate-inst-intel.s b/test/MC/X86/validate-inst-intel.s
index 9a7d122ca781..466b906fee73 100644
--- a/test/MC/X86/validate-inst-intel.s
+++ b/test/MC/X86/validate-inst-intel.s
@@ -3,7 +3,13 @@
.text
int 65535
-# CHECK: error: interrupt vector must be in range [0-255]
+# CHECK: error: invalid operand for instruction
# CHECK: int 65535
# CHECK: ^
+ .text
+ int -129
+# CHECK: error: invalid operand for instruction
+# CHECK: int -129
+# CHECK: ^
+
diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s
index e14031d67f1e..10a1288bd57f 100644
--- a/test/MC/X86/x86-32-coverage.s
+++ b/test/MC/X86/x86-32-coverage.s
@@ -10742,3 +10742,23 @@ btcq $4, (%eax)
// CHECK: getsec
// CHECK: encoding: [0x0f,0x37]
getsec
+
+// CHECK: monitorx
+// CHECK: encoding: [0x0f,0x01,0xfa]
+ monitorx
+
+// CHECK: monitorx
+// CHECK: encoding: [0x0f,0x01,0xfa]
+ monitorx %eax, %ecx, %edx
+
+// CHECK: mwaitx
+// CHECK: encoding: [0x0f,0x01,0xfb]
+ mwaitx
+
+// CHECK: mwaitx
+// CHECK: encoding: [0x0f,0x01,0xfb]
+ mwaitx %eax, %ecx, %ebx
+
+// CHECK: clzero
+// CHECK: encoding: [0x0f,0x01,0xfc]
+ clzero
diff --git a/test/MC/X86/x86-64-avx512bw.s b/test/MC/X86/x86-64-avx512bw.s
index 95eabfdb3411..b5ba2af64f0b 100644
--- a/test/MC/X86/x86-64-avx512bw.s
+++ b/test/MC/X86/x86-64-avx512bw.s
@@ -3668,6 +3668,126 @@
// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x1d,0xb2,0xc0,0xdf,0xff,0xff]
vpabsw -8256(%rdx), %zmm30
+// CHECK: vpmovwb %zmm27, %ymm22
+// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x30,0xde]
+ vpmovwb %zmm27, %ymm22
+
+// CHECK: vpmovwb %zmm27, %ymm22 {%k1}
+// CHECK: encoding: [0x62,0x22,0x7e,0x49,0x30,0xde]
+ vpmovwb %zmm27, %ymm22 {%k1}
+
+// CHECK: vpmovwb %zmm27, %ymm22 {%k1} {z}
+// CHECK: encoding: [0x62,0x22,0x7e,0xc9,0x30,0xde]
+ vpmovwb %zmm27, %ymm22 {%k1} {z}
+
+// CHECK: vpmovwb %zmm22, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x31]
+ vpmovwb %zmm22, (%rcx)
+
+// CHECK: vpmovwb %zmm22, (%rcx) {%k4}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x4c,0x30,0x31]
+ vpmovwb %zmm22, (%rcx) {%k4}
+
+// CHECK: vpmovwb %zmm22, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x48,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovwb %zmm22, 291(%rax,%r14,8)
+
+// CHECK: vpmovwb %zmm22, 4064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x72,0x7f]
+ vpmovwb %zmm22, 4064(%rdx)
+
+// CHECK: vpmovwb %zmm22, 4096(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0xb2,0x00,0x10,0x00,0x00]
+ vpmovwb %zmm22, 4096(%rdx)
+
+// CHECK: vpmovwb %zmm22, -4096(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0x72,0x80]
+ vpmovwb %zmm22, -4096(%rdx)
+
+// CHECK: vpmovwb %zmm22, -4128(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x48,0x30,0xb2,0xe0,0xef,0xff,0xff]
+ vpmovwb %zmm22, -4128(%rdx)
+
+// CHECK: vpmovswb %zmm18, %ymm23
+// CHECK: encoding: [0x62,0xa2,0x7e,0x48,0x20,0xd7]
+ vpmovswb %zmm18, %ymm23
+
+// CHECK: vpmovswb %zmm18, %ymm23 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x4a,0x20,0xd7]
+ vpmovswb %zmm18, %ymm23 {%k2}
+
+// CHECK: vpmovswb %zmm18, %ymm23 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xca,0x20,0xd7]
+ vpmovswb %zmm18, %ymm23 {%k2} {z}
+
+// CHECK: vpmovswb %zmm24, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x01]
+ vpmovswb %zmm24, (%rcx)
+
+// CHECK: vpmovswb %zmm24, (%rcx) {%k7}
+// CHECK: encoding: [0x62,0x62,0x7e,0x4f,0x20,0x01]
+ vpmovswb %zmm24, (%rcx) {%k7}
+
+// CHECK: vpmovswb %zmm24, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x20,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vpmovswb %zmm24, 291(%rax,%r14,8)
+
+// CHECK: vpmovswb %zmm24, 4064(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x42,0x7f]
+ vpmovswb %zmm24, 4064(%rdx)
+
+// CHECK: vpmovswb %zmm24, 4096(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x82,0x00,0x10,0x00,0x00]
+ vpmovswb %zmm24, 4096(%rdx)
+
+// CHECK: vpmovswb %zmm24, -4096(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x42,0x80]
+ vpmovswb %zmm24, -4096(%rdx)
+
+// CHECK: vpmovswb %zmm24, -4128(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x20,0x82,0xe0,0xef,0xff,0xff]
+ vpmovswb %zmm24, -4128(%rdx)
+
+// CHECK: vpmovuswb %zmm22, %ymm28
+// CHECK: encoding: [0x62,0x82,0x7e,0x48,0x10,0xf4]
+ vpmovuswb %zmm22, %ymm28
+
+// CHECK: vpmovuswb %zmm22, %ymm28 {%k3}
+// CHECK: encoding: [0x62,0x82,0x7e,0x4b,0x10,0xf4]
+ vpmovuswb %zmm22, %ymm28 {%k3}
+
+// CHECK: vpmovuswb %zmm22, %ymm28 {%k3} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0xcb,0x10,0xf4]
+ vpmovuswb %zmm22, %ymm28 {%k3} {z}
+
+// CHECK: vpmovuswb %zmm27, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x19]
+ vpmovuswb %zmm27, (%rcx)
+
+// CHECK: vpmovuswb %zmm27, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0x62,0x7e,0x4a,0x10,0x19]
+ vpmovuswb %zmm27, (%rcx) {%k2}
+
+// CHECK: vpmovuswb %zmm27, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x48,0x10,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovuswb %zmm27, 291(%rax,%r14,8)
+
+// CHECK: vpmovuswb %zmm27, 4064(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x5a,0x7f]
+ vpmovuswb %zmm27, 4064(%rdx)
+
+// CHECK: vpmovuswb %zmm27, 4096(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x9a,0x00,0x10,0x00,0x00]
+ vpmovuswb %zmm27, 4096(%rdx)
+
+// CHECK: vpmovuswb %zmm27, -4096(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x5a,0x80]
+ vpmovuswb %zmm27, -4096(%rdx)
+
+// CHECK: vpmovuswb %zmm27, -4128(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x10,0x9a,0xe0,0xef,0xff,0xff]
+ vpmovuswb %zmm27, -4128(%rdx)
+
// CHECK: vpmulhuw %zmm21, %zmm24, %zmm21
// CHECK: encoding: [0x62,0xa1,0x3d,0x40,0xe4,0xed]
vpmulhuw %zmm21, %zmm24, %zmm21
@@ -3776,3 +3896,942 @@
// CHECK: encoding: [0x62,0xe2,0x25,0x40,0x0b,0xaa,0xc0,0xdf,0xff,0xff]
vpmulhrsw -8256(%rdx), %zmm27, %zmm21
+// CHECK: vpmaddubsw %zmm25, %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x02,0x5d,0x40,0x04,0xd9]
+ vpmaddubsw %zmm25, %zmm20, %zmm27
+
+// CHECK: vpmaddubsw %zmm25, %zmm20, %zmm27 {%k3}
+// CHECK: encoding: [0x62,0x02,0x5d,0x43,0x04,0xd9]
+ vpmaddubsw %zmm25, %zmm20, %zmm27 {%k3}
+
+// CHECK: vpmaddubsw %zmm25, %zmm20, %zmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x5d,0xc3,0x04,0xd9]
+ vpmaddubsw %zmm25, %zmm20, %zmm27 {%k3} {z}
+
+// CHECK: vpmaddubsw (%rcx), %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x62,0x5d,0x40,0x04,0x19]
+ vpmaddubsw (%rcx), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw 291(%rax,%r14,8), %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x22,0x5d,0x40,0x04,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmaddubsw 291(%rax,%r14,8), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw 8128(%rdx), %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x62,0x5d,0x40,0x04,0x5a,0x7f]
+ vpmaddubsw 8128(%rdx), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw 8192(%rdx), %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x62,0x5d,0x40,0x04,0x9a,0x00,0x20,0x00,0x00]
+ vpmaddubsw 8192(%rdx), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw -8192(%rdx), %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x62,0x5d,0x40,0x04,0x5a,0x80]
+ vpmaddubsw -8192(%rdx), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw -8256(%rdx), %zmm20, %zmm27
+// CHECK: encoding: [0x62,0x62,0x5d,0x40,0x04,0x9a,0xc0,0xdf,0xff,0xff]
+ vpmaddubsw -8256(%rdx), %zmm20, %zmm27
+
+// CHECK: vpmaddwd %zmm25, %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x01,0x4d,0x40,0xf5,0xd1]
+ vpmaddwd %zmm25, %zmm22, %zmm26
+
+// CHECK: vpmaddwd %zmm25, %zmm22, %zmm26 {%k2}
+// CHECK: encoding: [0x62,0x01,0x4d,0x42,0xf5,0xd1]
+ vpmaddwd %zmm25, %zmm22, %zmm26 {%k2}
+
+// CHECK: vpmaddwd %zmm25, %zmm22, %zmm26 {%k2} {z}
+// CHECK: encoding: [0x62,0x01,0x4d,0xc2,0xf5,0xd1]
+ vpmaddwd %zmm25, %zmm22, %zmm26 {%k2} {z}
+
+// CHECK: vpmaddwd (%rcx), %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x61,0x4d,0x40,0xf5,0x11]
+ vpmaddwd (%rcx), %zmm22, %zmm26
+
+// CHECK: vpmaddwd 291(%rax,%r14,8), %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x21,0x4d,0x40,0xf5,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmaddwd 291(%rax,%r14,8), %zmm22, %zmm26
+
+// CHECK: vpmaddwd 8128(%rdx), %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x61,0x4d,0x40,0xf5,0x52,0x7f]
+ vpmaddwd 8128(%rdx), %zmm22, %zmm26
+
+// CHECK: vpmaddwd 8192(%rdx), %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x61,0x4d,0x40,0xf5,0x92,0x00,0x20,0x00,0x00]
+ vpmaddwd 8192(%rdx), %zmm22, %zmm26
+
+// CHECK: vpmaddwd -8192(%rdx), %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x61,0x4d,0x40,0xf5,0x52,0x80]
+ vpmaddwd -8192(%rdx), %zmm22, %zmm26
+
+// CHECK: vpmaddwd -8256(%rdx), %zmm22, %zmm26
+// CHECK: encoding: [0x62,0x61,0x4d,0x40,0xf5,0x92,0xc0,0xdf,0xff,0xff]
+ vpmaddwd -8256(%rdx), %zmm22, %zmm26
+
+// CHECK: vpunpcklbw %zmm24, %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x01,0x75,0x40,0x60,0xe0]
+ vpunpcklbw %zmm24, %zmm17, %zmm28
+
+// CHECK: vpunpcklbw %zmm24, %zmm17, %zmm28 {%k1}
+// CHECK: encoding: [0x62,0x01,0x75,0x41,0x60,0xe0]
+ vpunpcklbw %zmm24, %zmm17, %zmm28 {%k1}
+
+// CHECK: vpunpcklbw %zmm24, %zmm17, %zmm28 {%k1} {z}
+// CHECK: encoding: [0x62,0x01,0x75,0xc1,0x60,0xe0]
+ vpunpcklbw %zmm24, %zmm17, %zmm28 {%k1} {z}
+
+// CHECK: vpunpcklbw (%rcx), %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x75,0x40,0x60,0x21]
+ vpunpcklbw (%rcx), %zmm17, %zmm28
+
+// CHECK: vpunpcklbw 291(%rax,%r14,8), %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x21,0x75,0x40,0x60,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpunpcklbw 291(%rax,%r14,8), %zmm17, %zmm28
+
+// CHECK: vpunpcklbw 8128(%rdx), %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x75,0x40,0x60,0x62,0x7f]
+ vpunpcklbw 8128(%rdx), %zmm17, %zmm28
+
+// CHECK: vpunpcklbw 8192(%rdx), %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x75,0x40,0x60,0xa2,0x00,0x20,0x00,0x00]
+ vpunpcklbw 8192(%rdx), %zmm17, %zmm28
+
+// CHECK: vpunpcklbw -8192(%rdx), %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x75,0x40,0x60,0x62,0x80]
+ vpunpcklbw -8192(%rdx), %zmm17, %zmm28
+
+// CHECK: vpunpcklbw -8256(%rdx), %zmm17, %zmm28
+// CHECK: encoding: [0x62,0x61,0x75,0x40,0x60,0xa2,0xc0,0xdf,0xff,0xff]
+ vpunpcklbw -8256(%rdx), %zmm17, %zmm28
+
+// CHECK: vpunpckhbw %zmm23, %zmm19, %zmm30
+// CHECK: encoding: [0x62,0x21,0x65,0x40,0x68,0xf7]
+ vpunpckhbw %zmm23, %zmm19, %zmm30
+
+// CHECK: vpunpckhbw %zmm23, %zmm19, %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x21,0x65,0x47,0x68,0xf7]
+ vpunpckhbw %zmm23, %zmm19, %zmm30 {%k7}
+
+// CHECK: vpunpckhbw %zmm23, %zmm19, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x21,0x65,0xc7,0x68,0xf7]
+ vpunpckhbw %zmm23, %zmm19, %zmm30 {%k7} {z}
+
+// CHECK: vpunpckhbw (%rcx), %zmm19, %zmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x40,0x68,0x31]
+ vpunpckhbw (%rcx), %zmm19, %zmm30
+
+// CHECK: vpunpckhbw 291(%rax,%r14,8), %zmm19, %zmm30
+// CHECK: encoding: [0x62,0x21,0x65,0x40,0x68,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckhbw 291(%rax,%r14,8), %zmm19, %zmm30
+
+// CHECK: vpunpckhbw 8128(%rdx), %zmm19, %zmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x40,0x68,0x72,0x7f]
+ vpunpckhbw 8128(%rdx), %zmm19, %zmm30
+
+// CHECK: vpunpckhbw 8192(%rdx), %zmm19, %zmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x40,0x68,0xb2,0x00,0x20,0x00,0x00]
+ vpunpckhbw 8192(%rdx), %zmm19, %zmm30
+
+// CHECK: vpunpckhbw -8192(%rdx), %zmm19, %zmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x40,0x68,0x72,0x80]
+ vpunpckhbw -8192(%rdx), %zmm19, %zmm30
+
+// CHECK: vpunpckhbw -8256(%rdx), %zmm19, %zmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x40,0x68,0xb2,0xc0,0xdf,0xff,0xff]
+ vpunpckhbw -8256(%rdx), %zmm19, %zmm30
+
+// CHECK: vpunpcklwd %zmm18, %zmm24, %zmm20
+// CHECK: encoding: [0x62,0xa1,0x3d,0x40,0x61,0xe2]
+ vpunpcklwd %zmm18, %zmm24, %zmm20
+
+// CHECK: vpunpcklwd %zmm18, %zmm24, %zmm20 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x3d,0x47,0x61,0xe2]
+ vpunpcklwd %zmm18, %zmm24, %zmm20 {%k7}
+
+// CHECK: vpunpcklwd %zmm18, %zmm24, %zmm20 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x3d,0xc7,0x61,0xe2]
+ vpunpcklwd %zmm18, %zmm24, %zmm20 {%k7} {z}
+
+// CHECK: vpunpcklwd (%rcx), %zmm24, %zmm20
+// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0x61,0x21]
+ vpunpcklwd (%rcx), %zmm24, %zmm20
+
+// CHECK: vpunpcklwd 291(%rax,%r14,8), %zmm24, %zmm20
+// CHECK: encoding: [0x62,0xa1,0x3d,0x40,0x61,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpunpcklwd 291(%rax,%r14,8), %zmm24, %zmm20
+
+// CHECK: vpunpcklwd 8128(%rdx), %zmm24, %zmm20
+// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0x61,0x62,0x7f]
+ vpunpcklwd 8128(%rdx), %zmm24, %zmm20
+
+// CHECK: vpunpcklwd 8192(%rdx), %zmm24, %zmm20
+// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0x61,0xa2,0x00,0x20,0x00,0x00]
+ vpunpcklwd 8192(%rdx), %zmm24, %zmm20
+
+// CHECK: vpunpcklwd -8192(%rdx), %zmm24, %zmm20
+// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0x61,0x62,0x80]
+ vpunpcklwd -8192(%rdx), %zmm24, %zmm20
+
+// CHECK: vpunpcklwd -8256(%rdx), %zmm24, %zmm20
+// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0x61,0xa2,0xc0,0xdf,0xff,0xff]
+ vpunpcklwd -8256(%rdx), %zmm24, %zmm20
+
+// CHECK: vpunpckhwd %zmm24, %zmm26, %zmm30
+// CHECK: encoding: [0x62,0x01,0x2d,0x40,0x69,0xf0]
+ vpunpckhwd %zmm24, %zmm26, %zmm30
+
+// CHECK: vpunpckhwd %zmm24, %zmm26, %zmm30 {%k4}
+// CHECK: encoding: [0x62,0x01,0x2d,0x44,0x69,0xf0]
+ vpunpckhwd %zmm24, %zmm26, %zmm30 {%k4}
+
+// CHECK: vpunpckhwd %zmm24, %zmm26, %zmm30 {%k4} {z}
+// CHECK: encoding: [0x62,0x01,0x2d,0xc4,0x69,0xf0]
+ vpunpckhwd %zmm24, %zmm26, %zmm30 {%k4} {z}
+
+// CHECK: vpunpckhwd (%rcx), %zmm26, %zmm30
+// CHECK: encoding: [0x62,0x61,0x2d,0x40,0x69,0x31]
+ vpunpckhwd (%rcx), %zmm26, %zmm30
+
+// CHECK: vpunpckhwd 291(%rax,%r14,8), %zmm26, %zmm30
+// CHECK: encoding: [0x62,0x21,0x2d,0x40,0x69,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckhwd 291(%rax,%r14,8), %zmm26, %zmm30
+
+// CHECK: vpunpckhwd 8128(%rdx), %zmm26, %zmm30
+// CHECK: encoding: [0x62,0x61,0x2d,0x40,0x69,0x72,0x7f]
+ vpunpckhwd 8128(%rdx), %zmm26, %zmm30
+
+// CHECK: vpunpckhwd 8192(%rdx), %zmm26, %zmm30
+// CHECK: encoding: [0x62,0x61,0x2d,0x40,0x69,0xb2,0x00,0x20,0x00,0x00]
+ vpunpckhwd 8192(%rdx), %zmm26, %zmm30
+
+// CHECK: vpunpckhwd -8192(%rdx), %zmm26, %zmm30
+// CHECK: encoding: [0x62,0x61,0x2d,0x40,0x69,0x72,0x80]
+ vpunpckhwd -8192(%rdx), %zmm26, %zmm30
+
+// CHECK: vpunpckhwd -8256(%rdx), %zmm26, %zmm30
+// CHECK: encoding: [0x62,0x61,0x2d,0x40,0x69,0xb2,0xc0,0xdf,0xff,0xff]
+ vpunpckhwd -8256(%rdx), %zmm26, %zmm30
+
+// CHECK: vpextrb $171, %xmm17, %eax
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x14,0xc8,0xab]
+ vpextrb $171, %xmm17, %eax
+
+// CHECK: vpextrb $123, %xmm17, %eax
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x14,0xc8,0x7b]
+ vpextrb $123, %xmm17, %eax
+
+// CHECK: vpextrb $123, %xmm17, %r8d
+// CHECK: encoding: [0x62,0xc3,0x7d,0x08,0x14,0xc8,0x7b]
+ vpextrb $123, %xmm17,%r8d
+
+// CHECK: vpextrb $123, %xmm17, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x14,0x09,0x7b]
+ vpextrb $123, %xmm17, (%rcx)
+
+// CHECK: vpextrb $123, %xmm17, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x14,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpextrb $123, %xmm17, 291(%rax,%r14,8)
+
+// CHECK: vpextrb $123, %xmm17, 127(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x14,0x4a,0x7f,0x7b]
+ vpextrb $123, %xmm17, 127(%rdx)
+
+// CHECK: vpextrb $123, %xmm17, 128(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x14,0x8a,0x80,0x00,0x00,0x00,0x7b]
+ vpextrb $123, %xmm17, 128(%rdx)
+
+// CHECK: vpextrb $123, %xmm17, -128(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x14,0x4a,0x80,0x7b]
+ vpextrb $123, %xmm17, -128(%rdx)
+
+// CHECK: vpextrb $123, %xmm17, -129(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x14,0x8a,0x7f,0xff,0xff,0xff,0x7b]
+ vpextrb $123, %xmm17, -129(%rdx)
+// CHECK: vpinsrb $171, %eax, %xmm25, %xmm25
+// CHECK: encoding: [0x62,0x63,0x35,0x00,0x20,0xc8,0xab]
+ vpinsrb $171,%eax, %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, %eax, %xmm25, %xmm25
+// CHECK: encoding: [0x62,0x63,0x35,0x00,0x20,0xc8,0x7b]
+ vpinsrb $123,%eax, %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, %ebp, %xmm25, %xmm25
+// CHECK: encoding: [0x62,0x63,0x35,0x00,0x20,0xcd,0x7b]
+ vpinsrb $123,%ebp, %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, %r13d, %xmm25, %xmm25
+// CHECK: encoding: [0x62,0x43,0x35,0x00,0x20,0xcd,0x7b]
+ vpinsrb $123,%r13d, %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, (%rcx), %xmm25, %xmm25
+// CHECK: encoding: [0x62,0x63,0x35,0x00,0x20,0x09,0x7b]
+ vpinsrb $123, (%rcx), %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, 291(%rax,%r14,8), %xmm25, %xmm25
+// CHECK: encoding: [0x62,0x23,0x35,0x00,0x20,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpinsrb $123, 291(%rax,%r14,8), %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, 127(%rdx), %xmm25, %xmm25
+// CHECK: encoding: [0x62,0x63,0x35,0x00,0x20,0x4a,0x7f,0x7b]
+ vpinsrb $123, 127(%rdx), %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, 128(%rdx), %xmm25, %xmm25
+// CHECK: encoding: [0x62,0x63,0x35,0x00,0x20,0x8a,0x80,0x00,0x00,0x00,0x7b]
+ vpinsrb $123, 128(%rdx), %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, -128(%rdx), %xmm25, %xmm25
+// CHECK: encoding: [0x62,0x63,0x35,0x00,0x20,0x4a,0x80,0x7b]
+ vpinsrb $123, -128(%rdx), %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, -129(%rdx), %xmm25, %xmm25
+// CHECK: encoding: [0x62,0x63,0x35,0x00,0x20,0x8a,0x7f,0xff,0xff,0xff,0x7b]
+ vpinsrb $123, -129(%rdx), %xmm25, %xmm25
+
+// CHECK: vpinsrw $171, %eax, %xmm25, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x35,0x00,0xc4,0xd0,0xab]
+ vpinsrw $171,%eax, %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, %eax, %xmm25, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x35,0x00,0xc4,0xd0,0x7b]
+ vpinsrw $123,%eax, %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, %ebp, %xmm25, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x35,0x00,0xc4,0xd5,0x7b]
+ vpinsrw $123,%ebp, %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, %r13d, %xmm25, %xmm18
+// CHECK: encoding: [0x62,0xc1,0x35,0x00,0xc4,0xd5,0x7b]
+ vpinsrw $123,%r13d, %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, (%rcx), %xmm25, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x35,0x00,0xc4,0x11,0x7b]
+ vpinsrw $123, (%rcx), %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, 291(%rax,%r14,8), %xmm25, %xmm18
+// CHECK: encoding: [0x62,0xa1,0x35,0x00,0xc4,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpinsrw $123, 291(%rax,%r14,8), %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, 254(%rdx), %xmm25, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x35,0x00,0xc4,0x52,0x7f,0x7b]
+ vpinsrw $123, 254(%rdx), %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, 256(%rdx), %xmm25, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x35,0x00,0xc4,0x92,0x00,0x01,0x00,0x00,0x7b]
+ vpinsrw $123, 256(%rdx), %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, -256(%rdx), %xmm25, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x35,0x00,0xc4,0x52,0x80,0x7b]
+ vpinsrw $123, -256(%rdx), %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, -258(%rdx), %xmm25, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x35,0x00,0xc4,0x92,0xfe,0xfe,0xff,0xff,0x7b]
+ vpinsrw $123, -258(%rdx), %xmm25, %xmm18
+
+// CHECK: vpextrw $123, %xmm28, (%rcx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x15,0x21,0x7b]
+ vpextrw $123, %xmm28, (%rcx)
+
+// CHECK: vpextrw $123, %xmm28, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x15,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpextrw $123, %xmm28, 291(%rax,%r14,8)
+
+// CHECK: vpextrw $123, %xmm28, 254(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x15,0x62,0x7f,0x7b]
+ vpextrw $123, %xmm28, 254(%rdx)
+
+// CHECK: vpextrw $123, %xmm28, 256(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x15,0xa2,0x00,0x01,0x00,0x00,0x7b]
+ vpextrw $123, %xmm28, 256(%rdx)
+
+// CHECK: vpextrw $123, %xmm28, -256(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x15,0x62,0x80,0x7b]
+ vpextrw $123, %xmm28, -256(%rdx)
+
+// CHECK: vpextrw $123, %xmm28, -258(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x15,0xa2,0xfe,0xfe,0xff,0xff,0x7b]
+ vpextrw $123, %xmm28, -258(%rdx)
+
+// CHECK: vpextrw $171, %xmm30, %eax
+// CHECK: encoding: [0x62,0x91,0x7d,0x08,0xc5,0xc6,0xab]
+ vpextrw $171, %xmm30,%rax
+
+// CHECK: vpextrw $123, %xmm30, %eax
+// CHECK: encoding: [0x62,0x91,0x7d,0x08,0xc5,0xc6,0x7b]
+ vpextrw $123, %xmm30,%rax
+
+// CHECK: vpextrw $123, %xmm30, %r8d
+// CHECK: encoding: [0x62,0x11,0x7d,0x08,0xc5,0xc6,0x7b]
+ vpextrw $123, %xmm30,%r8
+
+// CHECK: vpextrw $171, %xmm28, %eax
+// CHECK: encoding: [0x62,0x91,0x7d,0x08,0xc5,0xc4,0xab]
+ vpextrw $0xab, %xmm28, %eax
+
+// CHECK: vpextrw $123, %xmm28, %eax
+// CHECK: encoding: [0x62,0x91,0x7d,0x08,0xc5,0xc4,0x7b]
+ vpextrw $0x7b, %xmm28, %eax
+
+// CHECK: vpextrw $123, %xmm28, %r8d
+// CHECK: encoding: [0x62,0x11,0x7d,0x08,0xc5,0xc4,0x7b]
+ vpextrw $0x7b, %xmm28, %r8d
+
+// CHECK: vpextrw $171, %xmm28, %eax
+// CHECK: encoding: [0x62,0x91,0x7d,0x08,0xc5,0xc4,0xab]
+ vpextrw $0xab, %xmm28, %eax
+
+// CHECK: vpextrw $123, %xmm28, %eax
+// CHECK: encoding: [0x62,0x91,0x7d,0x08,0xc5,0xc4,0x7b]
+ vpextrw $0x7b, %xmm28, %eax
+
+// CHECK: vpextrw $123, %xmm28, %r8d
+// CHECK: encoding: [0x62,0x11,0x7d,0x08,0xc5,0xc4,0x7b]
+ vpextrw $0x7b, %xmm28, %r8d
+
+// CHECK: vpextrw $171, %xmm20, %eax
+// CHECK: encoding: [0x62,0xb1,0x7d,0x08,0xc5,0xc4,0xab]
+ vpextrw $0xab, %xmm20, %eax
+
+// CHECK: vpextrw $123, %xmm20, %eax
+// CHECK: encoding: [0x62,0xb1,0x7d,0x08,0xc5,0xc4,0x7b]
+ vpextrw $0x7b, %xmm20, %eax
+
+// CHECK: vpextrw $123, %xmm20, %r8d
+// CHECK: encoding: [0x62,0x31,0x7d,0x08,0xc5,0xc4,0x7b]
+ vpextrw $0x7b, %xmm20, %r8d
+
+// CHECK: vpextrw $171, %xmm19, %eax
+// CHECK: encoding: [0x62,0xb1,0x7d,0x08,0xc5,0xc3,0xab]
+ vpextrw $0xab, %xmm19, %eax
+
+// CHECK: vpextrw $123, %xmm19, %eax
+// CHECK: encoding: [0x62,0xb1,0x7d,0x08,0xc5,0xc3,0x7b]
+ vpextrw $0x7b, %xmm19, %eax
+
+// CHECK: vpextrw $123, %xmm19, %r8d
+// CHECK: encoding: [0x62,0x31,0x7d,0x08,0xc5,0xc3,0x7b]
+ vpextrw $0x7b, %xmm19, %r8d
+
+// CHECK: kunpckdq %k4, %k6, %k4
+// CHECK: encoding: [0xc4,0xe1,0xcc,0x4b,0xe4]
+ kunpckdq %k4, %k6, %k4
+
+// CHECK: kunpckwd %k6, %k5, %k5
+// CHECK: encoding: [0xc5,0xd4,0x4b,0xee]
+ kunpckwd %k6, %k5, %k5
+
+// CHECK: ktestd %k3, %k3
+// CHECK: encoding: [0xc4,0xe1,0xf9,0x99,0xdb]
+ ktestd %k3, %k3
+
+// CHECK: ktestq %k6, %k2
+// CHECK: encoding: [0xc4,0xe1,0xf8,0x99,0xd6]
+ ktestq %k6, %k2
+
+// CHECK: kortestd %k3, %k4
+// CHECK: encoding: [0xc4,0xe1,0xf9,0x98,0xe3]
+ kortestd %k3, %k4
+
+// CHECK: kortestq %k4, %k5
+// CHECK: encoding: [0xc4,0xe1,0xf8,0x98,0xec]
+ kortestq %k4, %k5
+
+// CHECK: kaddd %k6, %k6, %k2
+// CHECK: encoding: [0xc4,0xe1,0xcd,0x4a,0xd6]
+ kaddd %k6, %k6, %k2
+
+// CHECK: kaddq %k4, %k6, %k3
+// CHECK: encoding: [0xc4,0xe1,0xcc,0x4a,0xdc]
+ kaddq %k4, %k6, %k3
+
+// CHECK: vpalignr $171, %zmm17, %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xa3,0x2d,0x40,0x0f,0xf1,0xab]
+ vpalignr $171, %zmm17, %zmm26, %zmm22
+
+// CHECK: vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3}
+// CHECK: encoding: [0x62,0xa3,0x2d,0x43,0x0f,0xf1,0xab]
+ vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3}
+
+// CHECK: vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3} {z}
+// CHECK: encoding: [0x62,0xa3,0x2d,0xc3,0x0f,0xf1,0xab]
+ vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3} {z}
+
+// CHECK: vpalignr $123, %zmm17, %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xa3,0x2d,0x40,0x0f,0xf1,0x7b]
+ vpalignr $123, %zmm17, %zmm26, %zmm22
+
+// CHECK: vpalignr $123, (%rcx), %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0x31,0x7b]
+ vpalignr $123, (%rcx), %zmm26, %zmm22
+
+// CHECK: vpalignr $123, 291(%rax,%r14,8), %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xa3,0x2d,0x40,0x0f,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpalignr $123, 291(%rax,%r14,8), %zmm26, %zmm22
+
+// CHECK: vpalignr $123, 8128(%rdx), %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0x72,0x7f,0x7b]
+ vpalignr $123, 8128(%rdx), %zmm26, %zmm22
+
+// CHECK: vpalignr $123, 8192(%rdx), %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0xb2,0x00,0x20,0x00,0x00,0x7b]
+ vpalignr $123, 8192(%rdx), %zmm26, %zmm22
+
+// CHECK: vpalignr $123, -8192(%rdx), %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0x72,0x80,0x7b]
+ vpalignr $123, -8192(%rdx), %zmm26, %zmm22
+
+// CHECK: vpalignr $123, -8256(%rdx), %zmm26, %zmm22
+// CHECK: encoding: [0x62,0xe3,0x2d,0x40,0x0f,0xb2,0xc0,0xdf,0xff,0xff,0x7b]
+ vpalignr $123, -8256(%rdx), %zmm26, %zmm22
+
+// CHECK: vdbpsadbw $171, %zmm18, %zmm20, %zmm21
+// CHECK: encoding: [0x62,0xa3,0x5d,0x40,0x42,0xea,0xab]
+ vdbpsadbw $171, %zmm18, %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $171, %zmm18, %zmm20, %zmm21 {%k1}
+// CHECK: encoding: [0x62,0xa3,0x5d,0x41,0x42,0xea,0xab]
+ vdbpsadbw $171, %zmm18, %zmm20, %zmm21 {%k1}
+
+// CHECK: vdbpsadbw $171, %zmm18, %zmm20, %zmm21 {%k1} {z}
+// CHECK: encoding: [0x62,0xa3,0x5d,0xc1,0x42,0xea,0xab]
+ vdbpsadbw $171, %zmm18, %zmm20, %zmm21 {%k1} {z}
+
+// CHECK: vdbpsadbw $123, %zmm18, %zmm20, %zmm21
+// CHECK: encoding: [0x62,0xa3,0x5d,0x40,0x42,0xea,0x7b]
+ vdbpsadbw $123, %zmm18, %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $123, (%rcx), %zmm20, %zmm21
+// CHECK: encoding: [0x62,0xe3,0x5d,0x40,0x42,0x29,0x7b]
+ vdbpsadbw $123, (%rcx), %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $123, 291(%rax,%r14,8), %zmm20, %zmm21
+// CHECK: encoding: [0x62,0xa3,0x5d,0x40,0x42,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vdbpsadbw $123, 291(%rax,%r14,8), %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $123, 8128(%rdx), %zmm20, %zmm21
+// CHECK: encoding: [0x62,0xe3,0x5d,0x40,0x42,0x6a,0x7f,0x7b]
+ vdbpsadbw $123, 8128(%rdx), %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $123, 8192(%rdx), %zmm20, %zmm21
+// CHECK: encoding: [0x62,0xe3,0x5d,0x40,0x42,0xaa,0x00,0x20,0x00,0x00,0x7b]
+ vdbpsadbw $123, 8192(%rdx), %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $123, -8192(%rdx), %zmm20, %zmm21
+// CHECK: encoding: [0x62,0xe3,0x5d,0x40,0x42,0x6a,0x80,0x7b]
+ vdbpsadbw $123, -8192(%rdx), %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $123, -8256(%rdx), %zmm20, %zmm21
+// CHECK: encoding: [0x62,0xe3,0x5d,0x40,0x42,0xaa,0xc0,0xdf,0xff,0xff,0x7b]
+ vdbpsadbw $123, -8256(%rdx), %zmm20, %zmm21
+
+// CHECK: vpslldq $171, %zmm28, %zmm20
+// CHECK: encoding: [0x62,0x91,0x5d,0x40,0x73,0xfc,0xab]
+ vpslldq $171, %zmm28, %zmm20
+
+// CHECK: vpslldq $123, %zmm28, %zmm20
+// CHECK: encoding: [0x62,0x91,0x5d,0x40,0x73,0xfc,0x7b]
+ vpslldq $123, %zmm28, %zmm20
+
+// CHECK: vpslldq $123, (%rcx), %zmm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x40,0x73,0x39,0x7b]
+ vpslldq $123, (%rcx), %zmm20
+
+// CHECK: vpslldq $123, 291(%rax,%r14,8), %zmm20
+// CHECK: encoding: [0x62,0xb1,0x5d,0x40,0x73,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpslldq $123, 291(%rax,%r14,8), %zmm20
+
+// CHECK: vpslldq $123, 8128(%rdx), %zmm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x40,0x73,0x7a,0x7f,0x7b]
+ vpslldq $123, 8128(%rdx), %zmm20
+
+// CHECK: vpslldq $123, 8192(%rdx), %zmm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x40,0x73,0xba,0x00,0x20,0x00,0x00,0x7b]
+ vpslldq $123, 8192(%rdx), %zmm20
+
+// CHECK: vpslldq $123, -8192(%rdx), %zmm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x40,0x73,0x7a,0x80,0x7b]
+ vpslldq $123, -8192(%rdx), %zmm20
+
+// CHECK: vpslldq $123, -8256(%rdx), %zmm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x40,0x73,0xba,0xc0,0xdf,0xff,0xff,0x7b]
+ vpslldq $123, -8256(%rdx), %zmm20
+
+// CHECK: vpsrldq $171, %zmm26, %zmm18
+// CHECK: encoding: [0x62,0x91,0x6d,0x40,0x73,0xda,0xab]
+ vpsrldq $171, %zmm26, %zmm18
+
+// CHECK: vpsrldq $123, %zmm26, %zmm18
+// CHECK: encoding: [0x62,0x91,0x6d,0x40,0x73,0xda,0x7b]
+ vpsrldq $123, %zmm26, %zmm18
+
+// CHECK: vpsrldq $123, (%rcx), %zmm18
+// CHECK: encoding: [0x62,0xf1,0x6d,0x40,0x73,0x19,0x7b]
+ vpsrldq $123, (%rcx), %zmm18
+
+// CHECK: vpsrldq $123, 291(%rax,%r14,8), %zmm18
+// CHECK: encoding: [0x62,0xb1,0x6d,0x40,0x73,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpsrldq $123, 291(%rax,%r14,8), %zmm18
+
+// CHECK: vpsrldq $123, 8128(%rdx), %zmm18
+// CHECK: encoding: [0x62,0xf1,0x6d,0x40,0x73,0x5a,0x7f,0x7b]
+ vpsrldq $123, 8128(%rdx), %zmm18
+
+// CHECK: vpsrldq $123, 8192(%rdx), %zmm18
+// CHECK: encoding: [0x62,0xf1,0x6d,0x40,0x73,0x9a,0x00,0x20,0x00,0x00,0x7b]
+ vpsrldq $123, 8192(%rdx), %zmm18
+
+// CHECK: vpsrldq $123, -8192(%rdx), %zmm18
+// CHECK: encoding: [0x62,0xf1,0x6d,0x40,0x73,0x5a,0x80,0x7b]
+ vpsrldq $123, -8192(%rdx), %zmm18
+
+// CHECK: vpsrldq $123, -8256(%rdx), %zmm18
+// CHECK: encoding: [0x62,0xf1,0x6d,0x40,0x73,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+ vpsrldq $123, -8256(%rdx), %zmm18
+
+// CHECK: vpsadbw %zmm22, %zmm25, %zmm28
+// CHECK: encoding: [0x62,0x21,0x35,0x40,0xf6,0xe6]
+ vpsadbw %zmm22, %zmm25, %zmm28
+
+// CHECK: vpsadbw (%rcx), %zmm25, %zmm28
+// CHECK: encoding: [0x62,0x61,0x35,0x40,0xf6,0x21]
+ vpsadbw (%rcx), %zmm25, %zmm28
+
+// CHECK: vpsadbw 291(%rax,%r14,8), %zmm25, %zmm28
+// CHECK: encoding: [0x62,0x21,0x35,0x40,0xf6,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpsadbw 291(%rax,%r14,8), %zmm25, %zmm28
+
+// CHECK: vpsadbw 8128(%rdx), %zmm25, %zmm28
+// CHECK: encoding: [0x62,0x61,0x35,0x40,0xf6,0x62,0x7f]
+ vpsadbw 8128(%rdx), %zmm25, %zmm28
+
+// CHECK: vpsadbw 8192(%rdx), %zmm25, %zmm28
+// CHECK: encoding: [0x62,0x61,0x35,0x40,0xf6,0xa2,0x00,0x20,0x00,0x00]
+ vpsadbw 8192(%rdx), %zmm25, %zmm28
+
+// CHECK: vpsadbw -8192(%rdx), %zmm25, %zmm28
+// CHECK: encoding: [0x62,0x61,0x35,0x40,0xf6,0x62,0x80]
+ vpsadbw -8192(%rdx), %zmm25, %zmm28
+
+// CHECK: vpsadbw -8256(%rdx), %zmm25, %zmm28
+// CHECK: encoding: [0x62,0x61,0x35,0x40,0xf6,0xa2,0xc0,0xdf,0xff,0xff]
+ vpsadbw -8256(%rdx), %zmm25, %zmm28
+
+// CHECK: vpshuflw $171, %zmm28, %zmm26
+// CHECK: encoding: [0x62,0x01,0x7f,0x48,0x70,0xd4,0xab]
+ vpshuflw $171, %zmm28, %zmm26
+
+// CHECK: vpshuflw $171, %zmm28, %zmm26 {%k1}
+// CHECK: encoding: [0x62,0x01,0x7f,0x49,0x70,0xd4,0xab]
+ vpshuflw $171, %zmm28, %zmm26 {%k1}
+
+// CHECK: vpshuflw $171, %zmm28, %zmm26 {%k1} {z}
+// CHECK: encoding: [0x62,0x01,0x7f,0xc9,0x70,0xd4,0xab]
+ vpshuflw $171, %zmm28, %zmm26 {%k1} {z}
+
+// CHECK: vpshuflw $123, %zmm28, %zmm26
+// CHECK: encoding: [0x62,0x01,0x7f,0x48,0x70,0xd4,0x7b]
+ vpshuflw $123, %zmm28, %zmm26
+
+// CHECK: vpshuflw $123, (%rcx), %zmm26
+// CHECK: encoding: [0x62,0x61,0x7f,0x48,0x70,0x11,0x7b]
+ vpshuflw $123, (%rcx), %zmm26
+
+// CHECK: vpshuflw $123, 291(%rax,%r14,8), %zmm26
+// CHECK: encoding: [0x62,0x21,0x7f,0x48,0x70,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpshuflw $123, 291(%rax,%r14,8), %zmm26
+
+// CHECK: vpshuflw $123, 8128(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x61,0x7f,0x48,0x70,0x52,0x7f,0x7b]
+ vpshuflw $123, 8128(%rdx), %zmm26
+
+// CHECK: vpshuflw $123, 8192(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x61,0x7f,0x48,0x70,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vpshuflw $123, 8192(%rdx), %zmm26
+
+// CHECK: vpshuflw $123, -8192(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x61,0x7f,0x48,0x70,0x52,0x80,0x7b]
+ vpshuflw $123, -8192(%rdx), %zmm26
+
+// CHECK: vpshuflw $123, -8256(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x61,0x7f,0x48,0x70,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vpshuflw $123, -8256(%rdx), %zmm26
+
+// CHECK: vpshufhw $171, %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xa1,0x7e,0x48,0x70,0xd2,0xab]
+ vpshufhw $171, %zmm18, %zmm18
+
+// CHECK: vpshufhw $171, %zmm18, %zmm18 {%k4}
+// CHECK: encoding: [0x62,0xa1,0x7e,0x4c,0x70,0xd2,0xab]
+ vpshufhw $171, %zmm18, %zmm18 {%k4}
+
+// CHECK: vpshufhw $171, %zmm18, %zmm18 {%k4} {z}
+// CHECK: encoding: [0x62,0xa1,0x7e,0xcc,0x70,0xd2,0xab]
+ vpshufhw $171, %zmm18, %zmm18 {%k4} {z}
+
+// CHECK: vpshufhw $123, %zmm18, %zmm18
+// CHECK: encoding: [0x62,0xa1,0x7e,0x48,0x70,0xd2,0x7b]
+ vpshufhw $123, %zmm18, %zmm18
+
+// CHECK: vpshufhw $123, (%rcx), %zmm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x70,0x11,0x7b]
+ vpshufhw $123, (%rcx), %zmm18
+
+// CHECK: vpshufhw $123, 291(%rax,%r14,8), %zmm18
+// CHECK: encoding: [0x62,0xa1,0x7e,0x48,0x70,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpshufhw $123, 291(%rax,%r14,8), %zmm18
+
+// CHECK: vpshufhw $123, 8128(%rdx), %zmm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x70,0x52,0x7f,0x7b]
+ vpshufhw $123, 8128(%rdx), %zmm18
+
+// CHECK: vpshufhw $123, 8192(%rdx), %zmm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x70,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vpshufhw $123, 8192(%rdx), %zmm18
+
+// CHECK: vpshufhw $123, -8192(%rdx), %zmm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x70,0x52,0x80,0x7b]
+ vpshufhw $123, -8192(%rdx), %zmm18
+
+// CHECK: vpshufhw $123, -8256(%rdx), %zmm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x70,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vpshufhw $123, -8256(%rdx), %zmm18
+
+// CHECK: vpbroadcastb %xmm23, %zmm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x78,0xcf]
+ vpbroadcastb %xmm23, %zmm25
+
+// CHECK: vpbroadcastb %xmm23, %zmm25 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7d,0x4f,0x78,0xcf]
+ vpbroadcastb %xmm23, %zmm25 {%k7}
+
+// CHECK: vpbroadcastb %xmm23, %zmm25 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xcf,0x78,0xcf]
+ vpbroadcastb %xmm23, %zmm25 {%k7} {z}
+
+// CHECK: vpbroadcastb (%rcx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x09]
+ vpbroadcastb (%rcx), %zmm25
+
+// CHECK: vpbroadcastb 291(%rax,%r14,8), %zmm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x78,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastb 291(%rax,%r14,8), %zmm25
+
+// CHECK: vpbroadcastb 127(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x4a,0x7f]
+ vpbroadcastb 127(%rdx), %zmm25
+
+// CHECK: vpbroadcastb 128(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x8a,0x80,0x00,0x00,0x00]
+ vpbroadcastb 128(%rdx), %zmm25
+
+// CHECK: vpbroadcastb -128(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x4a,0x80]
+ vpbroadcastb -128(%rdx), %zmm25
+
+// CHECK: vpbroadcastb -129(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x8a,0x7f,0xff,0xff,0xff]
+ vpbroadcastb -129(%rdx), %zmm25
+
+// CHECK: vpbroadcastb %eax, %zmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x7a,0xd8]
+ vpbroadcastb %eax, %zmm19
+
+// CHECK: vpbroadcastb %eax, %zmm19 {%k7}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4f,0x7a,0xd8]
+ vpbroadcastb %eax, %zmm19 {%k7}
+
+// CHECK: vpbroadcastb %eax, %zmm19 {%k7} {z}
+// CHECK: encoding: [0x62,0xe2,0x7d,0xcf,0x7a,0xd8]
+ vpbroadcastb %eax, %zmm19 {%k7} {z}
+
+// CHECK: vpbroadcastw %xmm19, %zmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x79,0xf3]
+ vpbroadcastw %xmm19, %zmm30
+
+// CHECK: vpbroadcastw %xmm19, %zmm30 {%k4}
+// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0x79,0xf3]
+ vpbroadcastw %xmm19, %zmm30 {%k4}
+
+// CHECK: vpbroadcastw %xmm19, %zmm30 {%k4} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0x79,0xf3]
+ vpbroadcastw %xmm19, %zmm30 {%k4} {z}
+
+// CHECK: vpbroadcastw (%rcx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0x31]
+ vpbroadcastw (%rcx), %zmm30
+
+// CHECK: vpbroadcastw 291(%rax,%r14,8), %zmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x79,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastw 291(%rax,%r14,8), %zmm30
+
+// CHECK: vpbroadcastw 254(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0x72,0x7f]
+ vpbroadcastw 254(%rdx), %zmm30
+
+// CHECK: vpbroadcastw 256(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0xb2,0x00,0x01,0x00,0x00]
+ vpbroadcastw 256(%rdx), %zmm30
+
+// CHECK: vpbroadcastw -256(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0x72,0x80]
+ vpbroadcastw -256(%rdx), %zmm30
+
+// CHECK: vpbroadcastw -258(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0xb2,0xfe,0xfe,0xff,0xff]
+ vpbroadcastw -258(%rdx), %zmm30
+
+// CHECK: vpbroadcastw %eax, %zmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x7b,0xc0]
+ vpbroadcastw %eax, %zmm24
+
+// CHECK: vpbroadcastw %eax, %zmm24 {%k1}
+// CHECK: encoding: [0x62,0x62,0x7d,0x49,0x7b,0xc0]
+ vpbroadcastw %eax, %zmm24 {%k1}
+
+// CHECK: vpbroadcastw %eax, %zmm24 {%k1} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0xc9,0x7b,0xc0]
+ vpbroadcastw %eax, %zmm24 {%k1} {z}
+
+// CHECK: vpextrw.s $171, %xmm28, %eax
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x15,0xe0,0xab]
+ vpextrw.s $0xab, %xmm28, %eax
+
+// CHECK: vpextrw.s $123, %xmm28, %eax
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x15,0xe0,0x7b]
+ vpextrw.s $0x7b, %xmm28, %eax
+
+// CHECK: vpextrw.s $123, %xmm28, %r8d
+// CHECK: encoding: [0x62,0x43,0x7d,0x08,0x15,0xe0,0x7b]
+ vpextrw.s $0x7b, %xmm28, %r8d
+
+// CHECK: vpextrw.s $171, %xmm28, %eax
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x15,0xe0,0xab]
+ vpextrw.s $0xab, %xmm28, %eax
+
+// CHECK: vpextrw.s $123, %xmm28, %eax
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x15,0xe0,0x7b]
+ vpextrw.s $0x7b, %xmm28, %eax
+
+// CHECK: vpextrw.s $123, %xmm28, %r8d
+// CHECK: encoding: [0x62,0x43,0x7d,0x08,0x15,0xe0,0x7b]
+ vpextrw.s $0x7b, %xmm28, %r8d
+
+// CHECK: vpextrw.s $171, %xmm20, %eax
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x15,0xe0,0xab]
+ vpextrw.s $0xab, %xmm20, %eax
+
+// CHECK: vpextrw.s $123, %xmm20, %eax
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x15,0xe0,0x7b]
+ vpextrw.s $0x7b, %xmm20, %eax
+
+// CHECK: vpextrw.s $123, %xmm20, %r8d
+// CHECK: encoding: [0x62,0xc3,0x7d,0x08,0x15,0xe0,0x7b]
+ vpextrw.s $0x7b, %xmm20, %r8d
+
+// CHECK: vpextrw.s $171, %xmm19, %eax
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x15,0xd8,0xab]
+ vpextrw.s $0xab, %xmm19, %eax
+
+// CHECK: vpextrw.s $123, %xmm19, %eax
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x15,0xd8,0x7b]
+ vpextrw.s $0x7b, %xmm19, %eax
+
+// CHECK: vpextrw.s $123, %xmm19, %r8d
+// CHECK: encoding: [0x62,0xc3,0x7d,0x08,0x15,0xd8,0x7b]
+ vpextrw.s $0x7b, %xmm19, %r8d
+
+// CHECK: vmovdqu16.s %zmm19, %zmm22
+// CHECK: encoding: [0x62,0xa1,0xff,0x48,0x7f,0xde]
+ vmovdqu16.s %zmm19, %zmm22
+
+// CHECK: vmovdqu16.s %zmm19, %zmm22 {%k3}
+// CHECK: encoding: [0x62,0xa1,0xff,0x4b,0x7f,0xde]
+ vmovdqu16.s %zmm19, %zmm22 {%k3}
+
+// CHECK: vmovdqu16.s %zmm19, %zmm22 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0xff,0xcb,0x7f,0xde]
+ vmovdqu16.s %zmm19, %zmm22 {%k3} {z}
+
+// CHECK: vmovdqu16.s %zmm22, %zmm20
+// CHECK: encoding: [0x62,0xa1,0xff,0x48,0x7f,0xf4]
+ vmovdqu16.s %zmm22, %zmm20
+
+// CHECK: vmovdqu16.s %zmm22, %zmm20 {%k3}
+// CHECK: encoding: [0x62,0xa1,0xff,0x4b,0x7f,0xf4]
+ vmovdqu16.s %zmm22, %zmm20 {%k3}
+
+// CHECK: vmovdqu16.s %zmm22, %zmm20 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0xff,0xcb,0x7f,0xf4]
+ vmovdqu16.s %zmm22, %zmm20 {%k3} {z}
+
+// CHECK: vmovdqu16.s %zmm26, %zmm26
+// CHECK: encoding: [0x62,0x01,0xff,0x48,0x7f,0xd2]
+ vmovdqu16.s %zmm26, %zmm26
+
+// CHECK: vmovdqu16.s %zmm26, %zmm26 {%k2}
+// CHECK: encoding: [0x62,0x01,0xff,0x4a,0x7f,0xd2]
+ vmovdqu16.s %zmm26, %zmm26 {%k2}
+
+// CHECK: vmovdqu16.s %zmm26, %zmm26 {%k2} {z}
+// CHECK: encoding: [0x62,0x01,0xff,0xca,0x7f,0xd2]
+ vmovdqu16.s %zmm26, %zmm26 {%k2} {z}
+
+// CHECK: vmovdqu16.s %zmm22, %zmm19
+// CHECK: encoding: [0x62,0xa1,0xff,0x48,0x7f,0xf3]
+ vmovdqu16.s %zmm22, %zmm19
+
+// CHECK: vmovdqu16.s %zmm22, %zmm19 {%k7}
+// CHECK: encoding: [0x62,0xa1,0xff,0x4f,0x7f,0xf3]
+ vmovdqu16.s %zmm22, %zmm19 {%k7}
+
+// CHECK: vmovdqu16.s %zmm22, %zmm19 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0xff,0xcf,0x7f,0xf3]
+ vmovdqu16.s %zmm22, %zmm19 {%k7} {z}
+
+// CHECK: vmovdqu8.s %zmm23, %zmm24
+// CHECK: encoding: [0x62,0x81,0x7f,0x48,0x7f,0xf8]
+ vmovdqu8.s %zmm23, %zmm24
+
+// CHECK: vmovdqu8.s %zmm23, %zmm24 {%k3}
+// CHECK: encoding: [0x62,0x81,0x7f,0x4b,0x7f,0xf8]
+ vmovdqu8.s %zmm23, %zmm24 {%k3}
+
+// CHECK: vmovdqu8.s %zmm23, %zmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x81,0x7f,0xcb,0x7f,0xf8]
+ vmovdqu8.s %zmm23, %zmm24 {%k3} {z}
+
+// CHECK: vmovdqu8.s %zmm27, %zmm23
+// CHECK: encoding: [0x62,0x21,0x7f,0x48,0x7f,0xdf]
+ vmovdqu8.s %zmm27, %zmm23
+
+// CHECK: vmovdqu8.s %zmm27, %zmm23 {%k5}
+// CHECK: encoding: [0x62,0x21,0x7f,0x4d,0x7f,0xdf]
+ vmovdqu8.s %zmm27, %zmm23 {%k5}
+
+// CHECK: vmovdqu8.s %zmm27, %zmm23 {%k5} {z}
+// CHECK: encoding: [0x62,0x21,0x7f,0xcd,0x7f,0xdf]
+ vmovdqu8.s %zmm27, %zmm23 {%k5} {z}
+
+// CHECK: vmovdqu8.s %zmm17, %zmm23
+// CHECK: encoding: [0x62,0xa1,0x7f,0x48,0x7f,0xcf]
+ vmovdqu8.s %zmm17, %zmm23
+
+// CHECK: vmovdqu8.s %zmm17, %zmm23 {%k2}
+// CHECK: encoding: [0x62,0xa1,0x7f,0x4a,0x7f,0xcf]
+ vmovdqu8.s %zmm17, %zmm23 {%k2}
+
+// CHECK: vmovdqu8.s %zmm17, %zmm23 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0x7f,0xca,0x7f,0xcf]
+ vmovdqu8.s %zmm17, %zmm23 {%k2} {z}
+
+// CHECK: vmovdqu8.s %zmm19, %zmm28
+// CHECK: encoding: [0x62,0x81,0x7f,0x48,0x7f,0xdc]
+ vmovdqu8.s %zmm19, %zmm28
+
+// CHECK: vmovdqu8.s %zmm19, %zmm28 {%k4}
+// CHECK: encoding: [0x62,0x81,0x7f,0x4c,0x7f,0xdc]
+ vmovdqu8.s %zmm19, %zmm28 {%k4}
+
+// CHECK: vmovdqu8.s %zmm19, %zmm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x81,0x7f,0xcc,0x7f,0xdc]
+ vmovdqu8.s %zmm19, %zmm28 {%k4} {z}
+
diff --git a/test/MC/X86/x86-64-avx512bw_vl.s b/test/MC/X86/x86-64-avx512bw_vl.s
index 014be27564bf..1c3784a61527 100644
--- a/test/MC/X86/x86-64-avx512bw_vl.s
+++ b/test/MC/X86/x86-64-avx512bw_vl.s
@@ -5776,165 +5776,325 @@
// CHECK: encoding: [0x62,0x61,0x35,0x20,0xd9,0x9a,0xe0,0xef,0xff,0xff]
vpsubusw -4128(%rdx), %ymm25, %ymm27
-// CHECK: vpshufhw $171, %xmm19, %xmm23
-// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x70,0xfb,0xab]
- vpshufhw $171, %xmm19, %xmm23
+// CHECK: vpshufhw $171, %xmm27, %xmm28
+// CHECK: encoding: [0x62,0x01,0x7e,0x08,0x70,0xe3,0xab]
+ vpshufhw $171, %xmm27, %xmm28
-// CHECK: vpshufhw $171, %xmm19, %xmm23 {%k7}
-// CHECK: encoding: [0x62,0xa1,0xfe,0x0f,0x70,0xfb,0xab]
- vpshufhw $171, %xmm19, %xmm23 {%k7}
+// CHECK: vpshufhw $171, %xmm27, %xmm28 {%k6}
+// CHECK: encoding: [0x62,0x01,0x7e,0x0e,0x70,0xe3,0xab]
+ vpshufhw $171, %xmm27, %xmm28 {%k6}
-// CHECK: vpshufhw $171, %xmm19, %xmm23 {%k7} {z}
-// CHECK: encoding: [0x62,0xa1,0xfe,0x8f,0x70,0xfb,0xab]
- vpshufhw $171, %xmm19, %xmm23 {%k7} {z}
+// CHECK: vpshufhw $171, %xmm27, %xmm28 {%k6} {z}
+// CHECK: encoding: [0x62,0x01,0x7e,0x8e,0x70,0xe3,0xab]
+ vpshufhw $171, %xmm27, %xmm28 {%k6} {z}
-// CHECK: vpshufhw $123, %xmm19, %xmm23
-// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x70,0xfb,0x7b]
- vpshufhw $123, %xmm19, %xmm23
+// CHECK: vpshufhw $123, %xmm27, %xmm28
+// CHECK: encoding: [0x62,0x01,0x7e,0x08,0x70,0xe3,0x7b]
+ vpshufhw $123, %xmm27, %xmm28
-// CHECK: vpshufhw $123, (%rcx), %xmm23
-// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x70,0x39,0x7b]
- vpshufhw $123, (%rcx), %xmm23
+// CHECK: vpshufhw $123, (%rcx), %xmm28
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x70,0x21,0x7b]
+ vpshufhw $123, (%rcx), %xmm28
-// CHECK: vpshufhw $123, 291(%rax,%r14,8), %xmm23
-// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x70,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
- vpshufhw $123, 291(%rax,%r14,8), %xmm23
+// CHECK: vpshufhw $123, 291(%rax,%r14,8), %xmm28
+// CHECK: encoding: [0x62,0x21,0x7e,0x08,0x70,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpshufhw $123, 291(%rax,%r14,8), %xmm28
-// CHECK: vpshufhw $123, 2032(%rdx), %xmm23
-// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x70,0x7a,0x7f,0x7b]
- vpshufhw $123, 2032(%rdx), %xmm23
+// CHECK: vpshufhw $123, 2032(%rdx), %xmm28
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x70,0x62,0x7f,0x7b]
+ vpshufhw $123, 2032(%rdx), %xmm28
-// CHECK: vpshufhw $123, 2048(%rdx), %xmm23
-// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x70,0xba,0x00,0x08,0x00,0x00,0x7b]
- vpshufhw $123, 2048(%rdx), %xmm23
+// CHECK: vpshufhw $123, 2048(%rdx), %xmm28
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x70,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vpshufhw $123, 2048(%rdx), %xmm28
-// CHECK: vpshufhw $123, -2048(%rdx), %xmm23
-// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x70,0x7a,0x80,0x7b]
- vpshufhw $123, -2048(%rdx), %xmm23
+// CHECK: vpshufhw $123, -2048(%rdx), %xmm28
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x70,0x62,0x80,0x7b]
+ vpshufhw $123, -2048(%rdx), %xmm28
-// CHECK: vpshufhw $123, -2064(%rdx), %xmm23
-// CHECK: encoding: [0x62,0xe1,0xfe,0x08,0x70,0xba,0xf0,0xf7,0xff,0xff,0x7b]
- vpshufhw $123, -2064(%rdx), %xmm23
+// CHECK: vpshufhw $123, -2064(%rdx), %xmm28
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x70,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vpshufhw $123, -2064(%rdx), %xmm28
-// CHECK: vpshufhw $171, %ymm17, %ymm29
-// CHECK: encoding: [0x62,0x21,0xfe,0x28,0x70,0xe9,0xab]
- vpshufhw $171, %ymm17, %ymm29
+// CHECK: vpshufhw $171, %ymm22, %ymm19
+// CHECK: encoding: [0x62,0xa1,0x7e,0x28,0x70,0xde,0xab]
+ vpshufhw $171, %ymm22, %ymm19
-// CHECK: vpshufhw $171, %ymm17, %ymm29 {%k7}
-// CHECK: encoding: [0x62,0x21,0xfe,0x2f,0x70,0xe9,0xab]
- vpshufhw $171, %ymm17, %ymm29 {%k7}
+// CHECK: vpshufhw $171, %ymm22, %ymm19 {%k1}
+// CHECK: encoding: [0x62,0xa1,0x7e,0x29,0x70,0xde,0xab]
+ vpshufhw $171, %ymm22, %ymm19 {%k1}
-// CHECK: vpshufhw $171, %ymm17, %ymm29 {%k7} {z}
-// CHECK: encoding: [0x62,0x21,0xfe,0xaf,0x70,0xe9,0xab]
- vpshufhw $171, %ymm17, %ymm29 {%k7} {z}
+// CHECK: vpshufhw $171, %ymm22, %ymm19 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0x7e,0xa9,0x70,0xde,0xab]
+ vpshufhw $171, %ymm22, %ymm19 {%k1} {z}
-// CHECK: vpshufhw $123, %ymm17, %ymm29
-// CHECK: encoding: [0x62,0x21,0xfe,0x28,0x70,0xe9,0x7b]
- vpshufhw $123, %ymm17, %ymm29
+// CHECK: vpshufhw $123, %ymm22, %ymm19
+// CHECK: encoding: [0x62,0xa1,0x7e,0x28,0x70,0xde,0x7b]
+ vpshufhw $123, %ymm22, %ymm19
-// CHECK: vpshufhw $123, (%rcx), %ymm29
-// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x70,0x29,0x7b]
- vpshufhw $123, (%rcx), %ymm29
+// CHECK: vpshufhw $123, (%rcx), %ymm19
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x70,0x19,0x7b]
+ vpshufhw $123, (%rcx), %ymm19
-// CHECK: vpshufhw $123, 291(%rax,%r14,8), %ymm29
-// CHECK: encoding: [0x62,0x21,0xfe,0x28,0x70,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
- vpshufhw $123, 291(%rax,%r14,8), %ymm29
+// CHECK: vpshufhw $123, 291(%rax,%r14,8), %ymm19
+// CHECK: encoding: [0x62,0xa1,0x7e,0x28,0x70,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpshufhw $123, 291(%rax,%r14,8), %ymm19
-// CHECK: vpshufhw $123, 4064(%rdx), %ymm29
-// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x70,0x6a,0x7f,0x7b]
- vpshufhw $123, 4064(%rdx), %ymm29
+// CHECK: vpshufhw $123, 4064(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x70,0x5a,0x7f,0x7b]
+ vpshufhw $123, 4064(%rdx), %ymm19
-// CHECK: vpshufhw $123, 4096(%rdx), %ymm29
-// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x70,0xaa,0x00,0x10,0x00,0x00,0x7b]
- vpshufhw $123, 4096(%rdx), %ymm29
+// CHECK: vpshufhw $123, 4096(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x70,0x9a,0x00,0x10,0x00,0x00,0x7b]
+ vpshufhw $123, 4096(%rdx), %ymm19
-// CHECK: vpshufhw $123, -4096(%rdx), %ymm29
-// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x70,0x6a,0x80,0x7b]
- vpshufhw $123, -4096(%rdx), %ymm29
+// CHECK: vpshufhw $123, -4096(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x70,0x5a,0x80,0x7b]
+ vpshufhw $123, -4096(%rdx), %ymm19
-// CHECK: vpshufhw $123, -4128(%rdx), %ymm29
-// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x70,0xaa,0xe0,0xef,0xff,0xff,0x7b]
- vpshufhw $123, -4128(%rdx), %ymm29
+// CHECK: vpshufhw $123, -4128(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x70,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+ vpshufhw $123, -4128(%rdx), %ymm19
-// CHECK: vpshuflw $171, %xmm27, %xmm30
-// CHECK: encoding: [0x62,0x01,0xff,0x08,0x70,0xf3,0xab]
- vpshuflw $171, %xmm27, %xmm30
+// CHECK: vpshufhw $171, %xmm22, %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x70,0xf6,0xab]
+ vpshufhw $0xab, %xmm22, %xmm22
-// CHECK: vpshuflw $171, %xmm27, %xmm30 {%k6}
-// CHECK: encoding: [0x62,0x01,0xff,0x0e,0x70,0xf3,0xab]
- vpshuflw $171, %xmm27, %xmm30 {%k6}
+// CHECK: vpshufhw $171, %xmm22, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x7e,0x0f,0x70,0xf6,0xab]
+ vpshufhw $0xab, %xmm22, %xmm22 {%k7}
-// CHECK: vpshuflw $171, %xmm27, %xmm30 {%k6} {z}
-// CHECK: encoding: [0x62,0x01,0xff,0x8e,0x70,0xf3,0xab]
- vpshuflw $171, %xmm27, %xmm30 {%k6} {z}
+// CHECK: vpshufhw $171, %xmm22, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x7e,0x8f,0x70,0xf6,0xab]
+ vpshufhw $0xab, %xmm22, %xmm22 {%k7} {z}
-// CHECK: vpshuflw $123, %xmm27, %xmm30
-// CHECK: encoding: [0x62,0x01,0xff,0x08,0x70,0xf3,0x7b]
- vpshuflw $123, %xmm27, %xmm30
+// CHECK: vpshufhw $123, %xmm22, %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x70,0xf6,0x7b]
+ vpshufhw $0x7b, %xmm22, %xmm22
-// CHECK: vpshuflw $123, (%rcx), %xmm30
-// CHECK: encoding: [0x62,0x61,0xff,0x08,0x70,0x31,0x7b]
- vpshuflw $123, (%rcx), %xmm30
+// CHECK: vpshufhw $123, (%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x70,0x31,0x7b]
+ vpshufhw $0x7b,(%rcx), %xmm22
-// CHECK: vpshuflw $123, 291(%rax,%r14,8), %xmm30
-// CHECK: encoding: [0x62,0x21,0xff,0x08,0x70,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
- vpshuflw $123, 291(%rax,%r14,8), %xmm30
+// CHECK: vpshufhw $123, 4660(%rax,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x70,0xb4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpshufhw $0x7b,4660(%rax,%r14,8), %xmm22
-// CHECK: vpshuflw $123, 2032(%rdx), %xmm30
-// CHECK: encoding: [0x62,0x61,0xff,0x08,0x70,0x72,0x7f,0x7b]
- vpshuflw $123, 2032(%rdx), %xmm30
+// CHECK: vpshufhw $123, 2032(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x70,0x72,0x7f,0x7b]
+ vpshufhw $0x7b,2032(%rdx), %xmm22
-// CHECK: vpshuflw $123, 2048(%rdx), %xmm30
-// CHECK: encoding: [0x62,0x61,0xff,0x08,0x70,0xb2,0x00,0x08,0x00,0x00,0x7b]
- vpshuflw $123, 2048(%rdx), %xmm30
+// CHECK: vpshufhw $123, 2048(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x70,0xb2,0x00,0x08,0x00,0x00,0x7b]
+ vpshufhw $0x7b,2048(%rdx), %xmm22
-// CHECK: vpshuflw $123, -2048(%rdx), %xmm30
-// CHECK: encoding: [0x62,0x61,0xff,0x08,0x70,0x72,0x80,0x7b]
- vpshuflw $123, -2048(%rdx), %xmm30
+// CHECK: vpshufhw $123, -2048(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x70,0x72,0x80,0x7b]
+ vpshufhw $0x7b,-2048(%rdx), %xmm22
-// CHECK: vpshuflw $123, -2064(%rdx), %xmm30
-// CHECK: encoding: [0x62,0x61,0xff,0x08,0x70,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
- vpshuflw $123, -2064(%rdx), %xmm30
+// CHECK: vpshufhw $123, -2064(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x70,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+ vpshufhw $0x7b,-2064(%rdx), %xmm22
-// CHECK: vpshuflw $171, %ymm25, %ymm25
-// CHECK: encoding: [0x62,0x01,0xff,0x28,0x70,0xc9,0xab]
- vpshuflw $171, %ymm25, %ymm25
+// CHECK: vpshufhw $171, %ymm24, %ymm27
+// CHECK: encoding: [0x62,0x01,0x7e,0x28,0x70,0xd8,0xab]
+ vpshufhw $0xab, %ymm24, %ymm27
-// CHECK: vpshuflw $171, %ymm25, %ymm25 {%k5}
-// CHECK: encoding: [0x62,0x01,0xff,0x2d,0x70,0xc9,0xab]
- vpshuflw $171, %ymm25, %ymm25 {%k5}
+// CHECK: vpshufhw $171, %ymm24, %ymm27 {%k1}
+// CHECK: encoding: [0x62,0x01,0x7e,0x29,0x70,0xd8,0xab]
+ vpshufhw $0xab, %ymm24, %ymm27 {%k1}
-// CHECK: vpshuflw $171, %ymm25, %ymm25 {%k5} {z}
-// CHECK: encoding: [0x62,0x01,0xff,0xad,0x70,0xc9,0xab]
- vpshuflw $171, %ymm25, %ymm25 {%k5} {z}
+// CHECK: vpshufhw $171, %ymm24, %ymm27 {%k1} {z}
+// CHECK: encoding: [0x62,0x01,0x7e,0xa9,0x70,0xd8,0xab]
+ vpshufhw $0xab, %ymm24, %ymm27 {%k1} {z}
-// CHECK: vpshuflw $123, %ymm25, %ymm25
-// CHECK: encoding: [0x62,0x01,0xff,0x28,0x70,0xc9,0x7b]
- vpshuflw $123, %ymm25, %ymm25
+// CHECK: vpshufhw $123, %ymm24, %ymm27
+// CHECK: encoding: [0x62,0x01,0x7e,0x28,0x70,0xd8,0x7b]
+ vpshufhw $0x7b, %ymm24, %ymm27
-// CHECK: vpshuflw $123, (%rcx), %ymm25
-// CHECK: encoding: [0x62,0x61,0xff,0x28,0x70,0x09,0x7b]
- vpshuflw $123, (%rcx), %ymm25
+// CHECK: vpshufhw $123, (%rcx), %ymm27
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x70,0x19,0x7b]
+ vpshufhw $0x7b,(%rcx), %ymm27
-// CHECK: vpshuflw $123, 291(%rax,%r14,8), %ymm25
-// CHECK: encoding: [0x62,0x21,0xff,0x28,0x70,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
- vpshuflw $123, 291(%rax,%r14,8), %ymm25
+// CHECK: vpshufhw $123, 4660(%rax,%r14,8), %ymm27
+// CHECK: encoding: [0x62,0x21,0x7e,0x28,0x70,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpshufhw $0x7b,4660(%rax,%r14,8), %ymm27
-// CHECK: vpshuflw $123, 4064(%rdx), %ymm25
-// CHECK: encoding: [0x62,0x61,0xff,0x28,0x70,0x4a,0x7f,0x7b]
- vpshuflw $123, 4064(%rdx), %ymm25
+// CHECK: vpshufhw $123, 4064(%rdx), %ymm27
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x70,0x5a,0x7f,0x7b]
+ vpshufhw $0x7b,4064(%rdx), %ymm27
-// CHECK: vpshuflw $123, 4096(%rdx), %ymm25
-// CHECK: encoding: [0x62,0x61,0xff,0x28,0x70,0x8a,0x00,0x10,0x00,0x00,0x7b]
- vpshuflw $123, 4096(%rdx), %ymm25
+// CHECK: vpshufhw $123, 4096(%rdx), %ymm27
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x70,0x9a,0x00,0x10,0x00,0x00,0x7b]
+ vpshufhw $0x7b,4096(%rdx), %ymm27
-// CHECK: vpshuflw $123, -4096(%rdx), %ymm25
-// CHECK: encoding: [0x62,0x61,0xff,0x28,0x70,0x4a,0x80,0x7b]
- vpshuflw $123, -4096(%rdx), %ymm25
+// CHECK: vpshufhw $123, -4096(%rdx), %ymm27
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x70,0x5a,0x80,0x7b]
+ vpshufhw $0x7b,-4096(%rdx), %ymm27
-// CHECK: vpshuflw $123, -4128(%rdx), %ymm25
-// CHECK: encoding: [0x62,0x61,0xff,0x28,0x70,0x8a,0xe0,0xef,0xff,0xff,0x7b]
- vpshuflw $123, -4128(%rdx), %ymm25
+// CHECK: vpshufhw $123, -4128(%rdx), %ymm27
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x70,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+ vpshufhw $0x7b,-4128(%rdx), %ymm27
+
+// CHECK: vpshuflw $171, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x70,0xf7,0xab]
+ vpshuflw $171, %xmm23, %xmm22
+
+// CHECK: vpshuflw $171, %xmm23, %xmm22 {%k5}
+// CHECK: encoding: [0x62,0xa1,0x7f,0x0d,0x70,0xf7,0xab]
+ vpshuflw $171, %xmm23, %xmm22 {%k5}
+
+// CHECK: vpshuflw $171, %xmm23, %xmm22 {%k5} {z}
+// CHECK: encoding: [0x62,0xa1,0x7f,0x8d,0x70,0xf7,0xab]
+ vpshuflw $171, %xmm23, %xmm22 {%k5} {z}
+
+// CHECK: vpshuflw $123, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x70,0xf7,0x7b]
+ vpshuflw $123, %xmm23, %xmm22
+
+// CHECK: vpshuflw $123, (%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x70,0x31,0x7b]
+ vpshuflw $123, (%rcx), %xmm22
+
+// CHECK: vpshuflw $123, 291(%rax,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x70,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpshuflw $123, 291(%rax,%r14,8), %xmm22
+
+// CHECK: vpshuflw $123, 2032(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x70,0x72,0x7f,0x7b]
+ vpshuflw $123, 2032(%rdx), %xmm22
+
+// CHECK: vpshuflw $123, 2048(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x70,0xb2,0x00,0x08,0x00,0x00,0x7b]
+ vpshuflw $123, 2048(%rdx), %xmm22
+
+// CHECK: vpshuflw $123, -2048(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x70,0x72,0x80,0x7b]
+ vpshuflw $123, -2048(%rdx), %xmm22
+
+// CHECK: vpshuflw $123, -2064(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x70,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+ vpshuflw $123, -2064(%rdx), %xmm22
+
+// CHECK: vpshuflw $171, %ymm26, %ymm24
+// CHECK: encoding: [0x62,0x01,0x7f,0x28,0x70,0xc2,0xab]
+ vpshuflw $171, %ymm26, %ymm24
+
+// CHECK: vpshuflw $171, %ymm26, %ymm24 {%k2}
+// CHECK: encoding: [0x62,0x01,0x7f,0x2a,0x70,0xc2,0xab]
+ vpshuflw $171, %ymm26, %ymm24 {%k2}
+
+// CHECK: vpshuflw $171, %ymm26, %ymm24 {%k2} {z}
+// CHECK: encoding: [0x62,0x01,0x7f,0xaa,0x70,0xc2,0xab]
+ vpshuflw $171, %ymm26, %ymm24 {%k2} {z}
+
+// CHECK: vpshuflw $123, %ymm26, %ymm24
+// CHECK: encoding: [0x62,0x01,0x7f,0x28,0x70,0xc2,0x7b]
+ vpshuflw $123, %ymm26, %ymm24
+
+// CHECK: vpshuflw $123, (%rcx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7f,0x28,0x70,0x01,0x7b]
+ vpshuflw $123, (%rcx), %ymm24
+
+// CHECK: vpshuflw $123, 291(%rax,%r14,8), %ymm24
+// CHECK: encoding: [0x62,0x21,0x7f,0x28,0x70,0x84,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpshuflw $123, 291(%rax,%r14,8), %ymm24
+
+// CHECK: vpshuflw $123, 4064(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7f,0x28,0x70,0x42,0x7f,0x7b]
+ vpshuflw $123, 4064(%rdx), %ymm24
+
+// CHECK: vpshuflw $123, 4096(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7f,0x28,0x70,0x82,0x00,0x10,0x00,0x00,0x7b]
+ vpshuflw $123, 4096(%rdx), %ymm24
+
+// CHECK: vpshuflw $123, -4096(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7f,0x28,0x70,0x42,0x80,0x7b]
+ vpshuflw $123, -4096(%rdx), %ymm24
+
+// CHECK: vpshuflw $123, -4128(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7f,0x28,0x70,0x82,0xe0,0xef,0xff,0xff,0x7b]
+ vpshuflw $123, -4128(%rdx), %ymm24
+
+// CHECK: vpshuflw $171, %xmm28, %xmm21
+// CHECK: encoding: [0x62,0x81,0x7f,0x08,0x70,0xec,0xab]
+ vpshuflw $0xab, %xmm28, %xmm21
+
+// CHECK: vpshuflw $171, %xmm28, %xmm21 {%k6}
+// CHECK: encoding: [0x62,0x81,0x7f,0x0e,0x70,0xec,0xab]
+ vpshuflw $0xab, %xmm28, %xmm21 {%k6}
+
+// CHECK: vpshuflw $171, %xmm28, %xmm21 {%k6} {z}
+// CHECK: encoding: [0x62,0x81,0x7f,0x8e,0x70,0xec,0xab]
+ vpshuflw $0xab, %xmm28, %xmm21 {%k6} {z}
+
+// CHECK: vpshuflw $123, %xmm28, %xmm21
+// CHECK: encoding: [0x62,0x81,0x7f,0x08,0x70,0xec,0x7b]
+ vpshuflw $0x7b, %xmm28, %xmm21
+
+// CHECK: vpshuflw $123, (%rcx), %xmm21
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x70,0x29,0x7b]
+ vpshuflw $0x7b,(%rcx), %xmm21
+
+// CHECK: vpshuflw $123, 4660(%rax,%r14,8), %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x70,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpshuflw $0x7b,4660(%rax,%r14,8), %xmm21
+
+// CHECK: vpshuflw $123, 2032(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x70,0x6a,0x7f,0x7b]
+ vpshuflw $0x7b,2032(%rdx), %xmm21
+
+// CHECK: vpshuflw $123, 2048(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x70,0xaa,0x00,0x08,0x00,0x00,0x7b]
+ vpshuflw $0x7b,2048(%rdx), %xmm21
+
+// CHECK: vpshuflw $123, -2048(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x70,0x6a,0x80,0x7b]
+ vpshuflw $0x7b,-2048(%rdx), %xmm21
+
+// CHECK: vpshuflw $123, -2064(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe1,0x7f,0x08,0x70,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+ vpshuflw $0x7b,-2064(%rdx), %xmm21
+
+// CHECK: vpshuflw $171, %ymm29, %ymm20
+// CHECK: encoding: [0x62,0x81,0x7f,0x28,0x70,0xe5,0xab]
+ vpshuflw $0xab, %ymm29, %ymm20
+
+// CHECK: vpshuflw $171, %ymm29, %ymm20 {%k3}
+// CHECK: encoding: [0x62,0x81,0x7f,0x2b,0x70,0xe5,0xab]
+ vpshuflw $0xab, %ymm29, %ymm20 {%k3}
+
+// CHECK: vpshuflw $171, %ymm29, %ymm20 {%k3} {z}
+// CHECK: encoding: [0x62,0x81,0x7f,0xab,0x70,0xe5,0xab]
+ vpshuflw $0xab, %ymm29, %ymm20 {%k3} {z}
+
+// CHECK: vpshuflw $123, %ymm29, %ymm20
+// CHECK: encoding: [0x62,0x81,0x7f,0x28,0x70,0xe5,0x7b]
+ vpshuflw $0x7b, %ymm29, %ymm20
+
+// CHECK: vpshuflw $123, (%rcx), %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7f,0x28,0x70,0x21,0x7b]
+ vpshuflw $0x7b,(%rcx), %ymm20
+
+// CHECK: vpshuflw $123, 4660(%rax,%r14,8), %ymm20
+// CHECK: encoding: [0x62,0xa1,0x7f,0x28,0x70,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpshuflw $0x7b,4660(%rax,%r14,8), %ymm20
+
+// CHECK: vpshuflw $123, 4064(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7f,0x28,0x70,0x62,0x7f,0x7b]
+ vpshuflw $0x7b,4064(%rdx), %ymm20
+
+// CHECK: vpshuflw $123, 4096(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7f,0x28,0x70,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vpshuflw $0x7b,4096(%rdx), %ymm20
+
+// CHECK: vpshuflw $123, -4096(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7f,0x28,0x70,0x62,0x80,0x7b]
+ vpshuflw $0x7b,-4096(%rdx), %ymm20
+
+// CHECK: vpshuflw $123, -4128(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe1,0x7f,0x28,0x70,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vpshuflw $0x7b,-4128(%rdx), %ymm20
// CHECK: vpermi2w %xmm21, %xmm29, %xmm19
// CHECK: encoding: [0x62,0xa2,0x95,0x00,0x75,0xdd]
@@ -6583,6 +6743,486 @@
// CHECK: encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0xe0,0xef,0xff,0xff]
vpshufb -4128(%rdx), %ymm18, %ymm19
+// CHECK: vpmovwb %xmm28, %xmm27
+// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x30,0xe3]
+ vpmovwb %xmm28, %xmm27
+
+// CHECK: vpmovwb %xmm28, %xmm27 {%k2}
+// CHECK: encoding: [0x62,0x02,0x7e,0x0a,0x30,0xe3]
+ vpmovwb %xmm28, %xmm27 {%k2}
+
+// CHECK: vpmovwb %xmm28, %xmm27 {%k2} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0x8a,0x30,0xe3]
+ vpmovwb %xmm28, %xmm27 {%k2} {z}
+
+// CHECK: vpmovwb %ymm26, %xmm26
+// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x30,0xd2]
+ vpmovwb %ymm26, %xmm26
+
+// CHECK: vpmovwb %ymm26, %xmm26 {%k4}
+// CHECK: encoding: [0x62,0x02,0x7e,0x2c,0x30,0xd2]
+ vpmovwb %ymm26, %xmm26 {%k4}
+
+// CHECK: vpmovwb %ymm26, %xmm26 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0xac,0x30,0xd2]
+ vpmovwb %ymm26, %xmm26 {%k4} {z}
+
+// CHECK: vpmovwb %xmm23, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x39]
+ vpmovwb %xmm23,(%rcx)
+
+// CHECK: vpmovwb %xmm23, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0e,0x30,0x39]
+ vpmovwb %xmm23,(%rcx) {%k6}
+
+// CHECK: vpmovwb %xmm23, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xbc,0xf0,0x34,0x12,0x00,0x00]
+ vpmovwb %xmm23,4660(%rax,%r14,8)
+
+// CHECK: vpmovwb %xmm23, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x7a,0x7f]
+ vpmovwb %xmm23, 1016(%rdx)
+
+// CHECK: vpmovwb %xmm23, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xba,0x00,0x04,0x00,0x00]
+ vpmovwb %xmm23, 1024(%rdx)
+
+// CHECK: vpmovwb %xmm23, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x7a,0x80]
+ vpmovwb %xmm23,-1024(%rdx)
+
+// CHECK: vpmovwb %xmm23, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xba,0xf8,0xfb,0xff,0xff]
+ vpmovwb %xmm23,-1032(%rdx)
+
+// CHECK: vpmovwb %ymm21, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x29]
+ vpmovwb %ymm21,(%rcx)
+
+// CHECK: vpmovwb %ymm21, (%rcx) {%k5}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2d,0x30,0x29]
+ vpmovwb %ymm21,(%rcx) {%k5}
+
+// CHECK: vpmovwb %ymm21, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x30,0xac,0xf0,0x34,0x12,0x00,0x00]
+ vpmovwb %ymm21, 4660(%rax,%r14,8)
+
+// CHECK: vpmovwb %ymm21, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x6a,0x7f]
+ vpmovwb %ymm21, 2032(%rdx)
+
+// CHECK: vpmovwb %ymm21, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xaa,0x00,0x08,0x00,0x00]
+ vpmovwb %ymm21, 2048(%rdx)
+
+// CHECK: vpmovwb %ymm21, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x6a,0x80]
+ vpmovwb %ymm21,-2048(%rdx)
+
+// CHECK: vpmovwb %ymm21, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xaa,0xf0,0xf7,0xff,0xff]
+ vpmovwb %ymm21, -2064(%rdx)
+
+// CHECK: vpmovswb %xmm19, %xmm17
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x20,0xd9]
+ vpmovswb %xmm19, %xmm17
+
+// CHECK: vpmovswb %xmm19, %xmm17 {%k1}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x09,0x20,0xd9]
+ vpmovswb %xmm19, %xmm17 {%k1}
+
+// CHECK: vpmovswb %xmm19, %xmm17 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x89,0x20,0xd9]
+ vpmovswb %xmm19, %xmm17 {%k1} {z}
+
+// CHECK: vpmovswb %ymm19, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x20,0xdd]
+ vpmovswb %ymm19, %xmm21
+
+// CHECK: vpmovswb %ymm19, %xmm21 {%k4}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2c,0x20,0xdd]
+ vpmovswb %ymm19, %xmm21 {%k4}
+
+// CHECK: vpmovswb %ymm19, %xmm21 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xac,0x20,0xdd]
+ vpmovswb %ymm19, %xmm21 {%k4} {z}
+
+// CHECK: vpmovswb %xmm18, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x11]
+ vpmovswb %xmm18,(%rcx)
+
+// CHECK: vpmovswb %xmm18, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x20,0x11]
+ vpmovswb %xmm18,(%rcx) {%k2}
+
+// CHECK: vpmovswb %xmm18, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x20,0x94,0xf0,0x34,0x12,0x00,0x00]
+ vpmovswb %xmm18, 4660(%rax,%r14,8)
+
+// CHECK: vpmovswb %xmm18, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x52,0x7f]
+ vpmovswb %xmm18, 1016(%rdx)
+
+// CHECK: vpmovswb %xmm18, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x92,0x00,0x04,0x00,0x00]
+ vpmovswb %xmm18, 1024(%rdx)
+
+// CHECK: vpmovswb %xmm18, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x52,0x80]
+ vpmovswb %xmm18, -1024(%rdx)
+
+// CHECK: vpmovswb %xmm18, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x20,0x92,0xf8,0xfb,0xff,0xff]
+ vpmovswb %xmm18, -1032(%rdx)
+
+// CHECK: vpmovswb %ymm23, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x39]
+ vpmovswb %ymm23,(%rcx)
+
+// CHECK: vpmovswb %ymm23, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2a,0x20,0x39]
+ vpmovswb %ymm23,(%rcx) {%k2}
+
+// CHECK: vpmovswb %ymm23, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x20,0xbc,0xf0,0x34,0x12,0x00,0x00]
+ vpmovswb %ymm23, 4660(%rax,%r14,8)
+
+// CHECK: vpmovswb %ymm23, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x7a,0x7f]
+ vpmovswb %ymm23, 2032(%rdx)
+
+// CHECK: vpmovswb %ymm23, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0xba,0x00,0x08,0x00,0x00]
+ vpmovswb %ymm23, 2048(%rdx)
+
+// CHECK: vpmovswb %ymm23, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0x7a,0x80]
+ vpmovswb %ymm23, -2048(%rdx)
+
+// CHECK: vpmovswb %ymm23, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x20,0xba,0xf0,0xf7,0xff,0xff]
+ vpmovswb %ymm23, -2064(%rdx)
+
+// CHECK: vpmovuswb %xmm17, %xmm26
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x10,0xca]
+ vpmovuswb %xmm17, %xmm26
+
+// CHECK: vpmovuswb %xmm17, %xmm26 {%k6}
+// CHECK: encoding: [0x62,0x82,0x7e,0x0e,0x10,0xca]
+ vpmovuswb %xmm17, %xmm26 {%k6}
+
+// CHECK: vpmovuswb %xmm17, %xmm26 {%k6} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x8e,0x10,0xca]
+ vpmovuswb %xmm17, %xmm26 {%k6} {z}
+
+// CHECK: vpmovuswb %ymm26, %xmm17
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x10,0xd1]
+ vpmovuswb %ymm26, %xmm17
+
+// CHECK: vpmovuswb %ymm26, %xmm17 {%k2}
+// CHECK: encoding: [0x62,0x22,0x7e,0x2a,0x10,0xd1]
+ vpmovuswb %ymm26, %xmm17 {%k2}
+
+// CHECK: vpmovuswb %ymm26, %xmm17 {%k2} {z}
+// CHECK: encoding: [0x62,0x22,0x7e,0xaa,0x10,0xd1]
+ vpmovuswb %ymm26, %xmm17 {%k2} {z}
+
+// CHECK: vpmovuswb %xmm19, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x19]
+ vpmovuswb %xmm19,(%rcx)
+
+// CHECK: vpmovuswb %xmm19, (%rcx) {%k1}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x09,0x10,0x19]
+ vpmovuswb %xmm19,(%rcx) {%k1}
+
+// CHECK: vpmovuswb %xmm19, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x10,0x9c,0xf0,0x34,0x12,0x00,0x00]
+ vpmovuswb %xmm19, 4660(%rax,%r14,8)
+
+// CHECK: vpmovuswb %xmm19, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x5a,0x7f]
+ vpmovuswb %xmm19, 1016(%rdx)
+
+// CHECK: vpmovuswb %xmm19, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x9a,0x00,0x04,0x00,0x00]
+ vpmovuswb %xmm19, 1024(%rdx)
+
+// CHECK: vpmovuswb %xmm19, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x5a,0x80]
+ vpmovuswb %xmm19, -1024(%rdx)
+
+// CHECK: vpmovuswb %xmm19, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x10,0x9a,0xf8,0xfb,0xff,0xff]
+ vpmovuswb %xmm19, -1032(%rdx)
+
+// CHECK: vpmovuswb %ymm23, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x39]
+ vpmovuswb %ymm23,(%rcx)
+
+// CHECK: vpmovuswb %ymm23, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2e,0x10,0x39]
+ vpmovuswb %ymm23,(%rcx) {%k6}
+
+// CHECK: vpmovuswb %ymm23, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x10,0xbc,0xf0,0x34,0x12,0x00,0x00]
+ vpmovuswb %ymm23, 4660(%rax,%r14,8)
+
+// CHECK: vpmovuswb %ymm23, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x7a,0x7f]
+ vpmovuswb %ymm23, 2032(%rdx)
+
+// CHECK: vpmovuswb %ymm23, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0xba,0x00,0x08,0x00,0x00]
+ vpmovuswb %ymm23, 2048(%rdx)
+
+// CHECK: vpmovuswb %ymm23, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0x7a,0x80]
+ vpmovuswb %ymm23, -2048(%rdx)
+
+// CHECK: vpmovuswb %ymm23, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x10,0xba,0xf0,0xf7,0xff,0xff]
+ vpmovuswb %ymm23, -2064(%rdx)
+
+// CHECK: vpmovwb %xmm17, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xcd]
+ vpmovwb %xmm17, %xmm21
+
+// CHECK: vpmovwb %xmm17, %xmm21 {%k1}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x09,0x30,0xcd]
+ vpmovwb %xmm17, %xmm21 {%k1}
+
+// CHECK: vpmovwb %xmm17, %xmm21 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x89,0x30,0xcd]
+ vpmovwb %xmm17, %xmm21 {%k1} {z}
+
+// CHECK: vpmovwb %ymm23, %xmm26
+// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x30,0xfa]
+ vpmovwb %ymm23, %xmm26
+
+// CHECK: vpmovwb %ymm23, %xmm26 {%k7}
+// CHECK: encoding: [0x62,0x82,0x7e,0x2f,0x30,0xfa]
+ vpmovwb %ymm23, %xmm26 {%k7}
+
+// CHECK: vpmovwb %ymm23, %xmm26 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0xaf,0x30,0xfa]
+ vpmovwb %ymm23, %xmm26 {%k7} {z}
+
+// CHECK: vpmovwb %xmm21, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x29]
+ vpmovwb %xmm21, (%rcx)
+
+// CHECK: vpmovwb %xmm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x30,0x29]
+ vpmovwb %xmm21, (%rcx) {%k2}
+
+// CHECK: vpmovwb %xmm21, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x30,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovwb %xmm21, 291(%rax,%r14,8)
+
+// CHECK: vpmovwb %xmm21, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x6a,0x7f]
+ vpmovwb %xmm21, 1016(%rdx)
+
+// CHECK: vpmovwb %xmm21, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xaa,0x00,0x04,0x00,0x00]
+ vpmovwb %xmm21, 1024(%rdx)
+
+// CHECK: vpmovwb %xmm21, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0x6a,0x80]
+ vpmovwb %xmm21, -1024(%rdx)
+
+// CHECK: vpmovwb %xmm21, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x30,0xaa,0xf8,0xfb,0xff,0xff]
+ vpmovwb %xmm21, -1032(%rdx)
+
+// CHECK: vpmovwb %ymm20, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x21]
+ vpmovwb %ymm20, (%rcx)
+
+// CHECK: vpmovwb %ymm20, (%rcx) {%k4}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2c,0x30,0x21]
+ vpmovwb %ymm20, (%rcx) {%k4}
+
+// CHECK: vpmovwb %ymm20, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x30,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovwb %ymm20, 291(%rax,%r14,8)
+
+// CHECK: vpmovwb %ymm20, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x62,0x7f]
+ vpmovwb %ymm20, 2032(%rdx)
+
+// CHECK: vpmovwb %ymm20, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xa2,0x00,0x08,0x00,0x00]
+ vpmovwb %ymm20, 2048(%rdx)
+
+// CHECK: vpmovwb %ymm20, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0x62,0x80]
+ vpmovwb %ymm20, -2048(%rdx)
+
+// CHECK: vpmovwb %ymm20, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x30,0xa2,0xf0,0xf7,0xff,0xff]
+ vpmovwb %ymm20, -2064(%rdx)
+
+// CHECK: vpmovswb %xmm20, %xmm24
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x20,0xe0]
+ vpmovswb %xmm20, %xmm24
+
+// CHECK: vpmovswb %xmm20, %xmm24 {%k4}
+// CHECK: encoding: [0x62,0x82,0x7e,0x0c,0x20,0xe0]
+ vpmovswb %xmm20, %xmm24 {%k4}
+
+// CHECK: vpmovswb %xmm20, %xmm24 {%k4} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x8c,0x20,0xe0]
+ vpmovswb %xmm20, %xmm24 {%k4} {z}
+
+// CHECK: vpmovswb %ymm18, %xmm27
+// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x20,0xd3]
+ vpmovswb %ymm18, %xmm27
+
+// CHECK: vpmovswb %ymm18, %xmm27 {%k1}
+// CHECK: encoding: [0x62,0x82,0x7e,0x29,0x20,0xd3]
+ vpmovswb %ymm18, %xmm27 {%k1}
+
+// CHECK: vpmovswb %ymm18, %xmm27 {%k1} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0xa9,0x20,0xd3]
+ vpmovswb %ymm18, %xmm27 {%k1} {z}
+
+// CHECK: vpmovswb %xmm24, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x01]
+ vpmovswb %xmm24, (%rcx)
+
+// CHECK: vpmovswb %xmm24, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x20,0x01]
+ vpmovswb %xmm24, (%rcx) {%k3}
+
+// CHECK: vpmovswb %xmm24, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x20,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vpmovswb %xmm24, 291(%rax,%r14,8)
+
+// CHECK: vpmovswb %xmm24, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x42,0x7f]
+ vpmovswb %xmm24, 1016(%rdx)
+
+// CHECK: vpmovswb %xmm24, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x82,0x00,0x04,0x00,0x00]
+ vpmovswb %xmm24, 1024(%rdx)
+
+// CHECK: vpmovswb %xmm24, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x42,0x80]
+ vpmovswb %xmm24, -1024(%rdx)
+
+// CHECK: vpmovswb %xmm24, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x20,0x82,0xf8,0xfb,0xff,0xff]
+ vpmovswb %xmm24, -1032(%rdx)
+
+// CHECK: vpmovswb %ymm27, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x19]
+ vpmovswb %ymm27, (%rcx)
+
+// CHECK: vpmovswb %ymm27, (%rcx) {%k7}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2f,0x20,0x19]
+ vpmovswb %ymm27, (%rcx) {%k7}
+
+// CHECK: vpmovswb %ymm27, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x20,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovswb %ymm27, 291(%rax,%r14,8)
+
+// CHECK: vpmovswb %ymm27, 2032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x5a,0x7f]
+ vpmovswb %ymm27, 2032(%rdx)
+
+// CHECK: vpmovswb %ymm27, 2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x9a,0x00,0x08,0x00,0x00]
+ vpmovswb %ymm27, 2048(%rdx)
+
+// CHECK: vpmovswb %ymm27, -2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x5a,0x80]
+ vpmovswb %ymm27, -2048(%rdx)
+
+// CHECK: vpmovswb %ymm27, -2064(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x20,0x9a,0xf0,0xf7,0xff,0xff]
+ vpmovswb %ymm27, -2064(%rdx)
+
+// CHECK: vpmovuswb %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x10,0xdf]
+ vpmovuswb %xmm19, %xmm23
+
+// CHECK: vpmovuswb %xmm19, %xmm23 {%k4}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x0c,0x10,0xdf]
+ vpmovuswb %xmm19, %xmm23 {%k4}
+
+// CHECK: vpmovuswb %xmm19, %xmm23 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x8c,0x10,0xdf]
+ vpmovuswb %xmm19, %xmm23 {%k4} {z}
+
+// CHECK: vpmovuswb %ymm23, %xmm28
+// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x10,0xfc]
+ vpmovuswb %ymm23, %xmm28
+
+// CHECK: vpmovuswb %ymm23, %xmm28 {%k6}
+// CHECK: encoding: [0x62,0x82,0x7e,0x2e,0x10,0xfc]
+ vpmovuswb %ymm23, %xmm28 {%k6}
+
+// CHECK: vpmovuswb %ymm23, %xmm28 {%k6} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0xae,0x10,0xfc]
+ vpmovuswb %ymm23, %xmm28 {%k6} {z}
+
+// CHECK: vpmovuswb %xmm25, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x09]
+ vpmovuswb %xmm25, (%rcx)
+
+// CHECK: vpmovuswb %xmm25, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x10,0x09]
+ vpmovuswb %xmm25, (%rcx) {%k3}
+
+// CHECK: vpmovuswb %xmm25, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x10,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovuswb %xmm25, 291(%rax,%r14,8)
+
+// CHECK: vpmovuswb %xmm25, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x4a,0x7f]
+ vpmovuswb %xmm25, 1016(%rdx)
+
+// CHECK: vpmovuswb %xmm25, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x8a,0x00,0x04,0x00,0x00]
+ vpmovuswb %xmm25, 1024(%rdx)
+
+// CHECK: vpmovuswb %xmm25, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x4a,0x80]
+ vpmovuswb %xmm25, -1024(%rdx)
+
+// CHECK: vpmovuswb %xmm25, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x10,0x8a,0xf8,0xfb,0xff,0xff]
+ vpmovuswb %xmm25, -1032(%rdx)
+
+// CHECK: vpmovuswb %ymm28, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x21]
+ vpmovuswb %ymm28, (%rcx)
+
+// CHECK: vpmovuswb %ymm28, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2a,0x10,0x21]
+ vpmovuswb %ymm28, (%rcx) {%k2}
+
+// CHECK: vpmovuswb %ymm28, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x10,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovuswb %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vpmovuswb %ymm28, 2032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x62,0x7f]
+ vpmovuswb %ymm28, 2032(%rdx)
+
+// CHECK: vpmovuswb %ymm28, 2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0xa2,0x00,0x08,0x00,0x00]
+ vpmovuswb %ymm28, 2048(%rdx)
+
+// CHECK: vpmovuswb %ymm28, -2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0x62,0x80]
+ vpmovuswb %ymm28, -2048(%rdx)
+
+// CHECK: vpmovuswb %ymm28, -2064(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x10,0xa2,0xf0,0xf7,0xff,0xff]
+ vpmovuswb %ymm28, -2064(%rdx)
+
// CHECK: vpmulhuw %xmm18, %xmm21, %xmm24
// CHECK: encoding: [0x62,0x21,0x55,0x00,0xe4,0xc2]
vpmulhuw %xmm18, %xmm21, %xmm24
@@ -6799,3 +7439,2387 @@
// CHECK: encoding: [0x62,0x62,0x5d,0x20,0x0b,0xa2,0xe0,0xef,0xff,0xff]
vpmulhrsw -4128(%rdx), %ymm20, %ymm28
+// CHECK: vpmaddubsw %xmm20, %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x22,0x55,0x00,0x04,0xe4]
+ vpmaddubsw %xmm20, %xmm21, %xmm28
+
+// CHECK: vpmaddubsw %xmm20, %xmm21, %xmm28 {%k6}
+// CHECK: encoding: [0x62,0x22,0x55,0x06,0x04,0xe4]
+ vpmaddubsw %xmm20, %xmm21, %xmm28 {%k6}
+
+// CHECK: vpmaddubsw %xmm20, %xmm21, %xmm28 {%k6} {z}
+// CHECK: encoding: [0x62,0x22,0x55,0x86,0x04,0xe4]
+ vpmaddubsw %xmm20, %xmm21, %xmm28 {%k6} {z}
+
+// CHECK: vpmaddubsw (%rcx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0x55,0x00,0x04,0x21]
+ vpmaddubsw (%rcx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw 291(%rax,%r14,8), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x22,0x55,0x00,0x04,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmaddubsw 291(%rax,%r14,8), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw 2032(%rdx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0x55,0x00,0x04,0x62,0x7f]
+ vpmaddubsw 2032(%rdx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw 2048(%rdx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0x55,0x00,0x04,0xa2,0x00,0x08,0x00,0x00]
+ vpmaddubsw 2048(%rdx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw -2048(%rdx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0x55,0x00,0x04,0x62,0x80]
+ vpmaddubsw -2048(%rdx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw -2064(%rdx), %xmm21, %xmm28
+// CHECK: encoding: [0x62,0x62,0x55,0x00,0x04,0xa2,0xf0,0xf7,0xff,0xff]
+ vpmaddubsw -2064(%rdx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw %ymm26, %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x02,0x2d,0x20,0x04,0xf2]
+ vpmaddubsw %ymm26, %ymm26, %ymm30
+
+// CHECK: vpmaddubsw %ymm26, %ymm26, %ymm30 {%k5}
+// CHECK: encoding: [0x62,0x02,0x2d,0x25,0x04,0xf2]
+ vpmaddubsw %ymm26, %ymm26, %ymm30 {%k5}
+
+// CHECK: vpmaddubsw %ymm26, %ymm26, %ymm30 {%k5} {z}
+// CHECK: encoding: [0x62,0x02,0x2d,0xa5,0x04,0xf2]
+ vpmaddubsw %ymm26, %ymm26, %ymm30 {%k5} {z}
+
+// CHECK: vpmaddubsw (%rcx), %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x62,0x2d,0x20,0x04,0x31]
+ vpmaddubsw (%rcx), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw 291(%rax,%r14,8), %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x22,0x2d,0x20,0x04,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmaddubsw 291(%rax,%r14,8), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw 4064(%rdx), %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x62,0x2d,0x20,0x04,0x72,0x7f]
+ vpmaddubsw 4064(%rdx), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw 4096(%rdx), %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x62,0x2d,0x20,0x04,0xb2,0x00,0x10,0x00,0x00]
+ vpmaddubsw 4096(%rdx), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw -4096(%rdx), %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x62,0x2d,0x20,0x04,0x72,0x80]
+ vpmaddubsw -4096(%rdx), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw -4128(%rdx), %ymm26, %ymm30
+// CHECK: encoding: [0x62,0x62,0x2d,0x20,0x04,0xb2,0xe0,0xef,0xff,0xff]
+ vpmaddubsw -4128(%rdx), %ymm26, %ymm30
+
+// CHECK: vpmaddwd %xmm28, %xmm24, %xmm17
+// CHECK: encoding: [0x62,0x81,0x3d,0x00,0xf5,0xcc]
+ vpmaddwd %xmm28, %xmm24, %xmm17
+
+// CHECK: vpmaddwd %xmm28, %xmm24, %xmm17 {%k1}
+// CHECK: encoding: [0x62,0x81,0x3d,0x01,0xf5,0xcc]
+ vpmaddwd %xmm28, %xmm24, %xmm17 {%k1}
+
+// CHECK: vpmaddwd %xmm28, %xmm24, %xmm17 {%k1} {z}
+// CHECK: encoding: [0x62,0x81,0x3d,0x81,0xf5,0xcc]
+ vpmaddwd %xmm28, %xmm24, %xmm17 {%k1} {z}
+
+// CHECK: vpmaddwd (%rcx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x09]
+ vpmaddwd (%rcx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd 291(%rax,%r14,8), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x3d,0x00,0xf5,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmaddwd 291(%rax,%r14,8), %xmm24, %xmm17
+
+// CHECK: vpmaddwd 2032(%rdx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x4a,0x7f]
+ vpmaddwd 2032(%rdx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd 2048(%rdx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x8a,0x00,0x08,0x00,0x00]
+ vpmaddwd 2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd -2048(%rdx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x4a,0x80]
+ vpmaddwd -2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd -2064(%rdx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x8a,0xf0,0xf7,0xff,0xff]
+ vpmaddwd -2064(%rdx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd %ymm19, %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x21,0x45,0x20,0xf5,0xc3]
+ vpmaddwd %ymm19, %ymm23, %ymm24
+
+// CHECK: vpmaddwd %ymm19, %ymm23, %ymm24 {%k4}
+// CHECK: encoding: [0x62,0x21,0x45,0x24,0xf5,0xc3]
+ vpmaddwd %ymm19, %ymm23, %ymm24 {%k4}
+
+// CHECK: vpmaddwd %ymm19, %ymm23, %ymm24 {%k4} {z}
+// CHECK: encoding: [0x62,0x21,0x45,0xa4,0xf5,0xc3]
+ vpmaddwd %ymm19, %ymm23, %ymm24 {%k4} {z}
+
+// CHECK: vpmaddwd (%rcx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0xf5,0x01]
+ vpmaddwd (%rcx), %ymm23, %ymm24
+
+// CHECK: vpmaddwd 291(%rax,%r14,8), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x21,0x45,0x20,0xf5,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vpmaddwd 291(%rax,%r14,8), %ymm23, %ymm24
+
+// CHECK: vpmaddwd 4064(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0xf5,0x42,0x7f]
+ vpmaddwd 4064(%rdx), %ymm23, %ymm24
+
+// CHECK: vpmaddwd 4096(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0xf5,0x82,0x00,0x10,0x00,0x00]
+ vpmaddwd 4096(%rdx), %ymm23, %ymm24
+
+// CHECK: vpmaddwd -4096(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0xf5,0x42,0x80]
+ vpmaddwd -4096(%rdx), %ymm23, %ymm24
+
+// CHECK: vpmaddwd -4128(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0xf5,0x82,0xe0,0xef,0xff,0xff]
+ vpmaddwd -4128(%rdx), %ymm23, %ymm24
+
+// CHECK: vpmaddubsw %xmm25, %xmm23, %xmm19
+// CHECK: encoding: [0x62,0x82,0x45,0x00,0x04,0xd9]
+ vpmaddubsw %xmm25, %xmm23, %xmm19
+
+// CHECK: vpmaddubsw %xmm25, %xmm23, %xmm19 {%k2}
+// CHECK: encoding: [0x62,0x82,0x45,0x02,0x04,0xd9]
+ vpmaddubsw %xmm25, %xmm23, %xmm19 {%k2}
+
+// CHECK: vpmaddubsw %xmm25, %xmm23, %xmm19 {%k2} {z}
+// CHECK: encoding: [0x62,0x82,0x45,0x82,0x04,0xd9]
+ vpmaddubsw %xmm25, %xmm23, %xmm19 {%k2} {z}
+
+// CHECK: vpmaddubsw (%rcx), %xmm23, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x04,0x19]
+ vpmaddubsw (%rcx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw 4660(%rax,%r14,8), %xmm23, %xmm19
+// CHECK: encoding: [0x62,0xa2,0x45,0x00,0x04,0x9c,0xf0,0x34,0x12,0x00,0x00]
+ vpmaddubsw 4660(%rax,%r14,8), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw 2032(%rdx), %xmm23, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x04,0x5a,0x7f]
+ vpmaddubsw 2032(%rdx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw 2048(%rdx), %xmm23, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x04,0x9a,0x00,0x08,0x00,0x00]
+ vpmaddubsw 2048(%rdx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw -2048(%rdx), %xmm23, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x04,0x5a,0x80]
+ vpmaddubsw -2048(%rdx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw -2064(%rdx), %xmm23, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x04,0x9a,0xf0,0xf7,0xff,0xff]
+ vpmaddubsw -2064(%rdx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw %ymm22, %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xa2,0x65,0x20,0x04,0xce]
+ vpmaddubsw %ymm22, %ymm19, %ymm17
+
+// CHECK: vpmaddubsw %ymm22, %ymm19, %ymm17 {%k7}
+// CHECK: encoding: [0x62,0xa2,0x65,0x27,0x04,0xce]
+ vpmaddubsw %ymm22, %ymm19, %ymm17 {%k7}
+
+// CHECK: vpmaddubsw %ymm22, %ymm19, %ymm17 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0x65,0xa7,0x04,0xce]
+ vpmaddubsw %ymm22, %ymm19, %ymm17 {%k7} {z}
+
+// CHECK: vpmaddubsw (%rcx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0x04,0x09]
+ vpmaddubsw (%rcx), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw 4660(%rax,%r14,8), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xa2,0x65,0x20,0x04,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vpmaddubsw 4660(%rax,%r14,8), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw 4064(%rdx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0x04,0x4a,0x7f]
+ vpmaddubsw 4064(%rdx), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw 4096(%rdx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0x04,0x8a,0x00,0x10,0x00,0x00]
+ vpmaddubsw 4096(%rdx), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw -4096(%rdx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0x04,0x4a,0x80]
+ vpmaddubsw -4096(%rdx), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw -4128(%rdx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x65,0x20,0x04,0x8a,0xe0,0xef,0xff,0xff]
+ vpmaddubsw -4128(%rdx), %ymm19, %ymm17
+
+// CHECK: vpmaddwd %xmm20, %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xa1,0x4d,0x00,0xf5,0xfc]
+ vpmaddwd %xmm20, %xmm22, %xmm23
+
+// CHECK: vpmaddwd %xmm20, %xmm22, %xmm23 {%k3}
+// CHECK: encoding: [0x62,0xa1,0x4d,0x03,0xf5,0xfc]
+ vpmaddwd %xmm20, %xmm22, %xmm23 {%k3}
+
+// CHECK: vpmaddwd %xmm20, %xmm22, %xmm23 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0x4d,0x83,0xf5,0xfc]
+ vpmaddwd %xmm20, %xmm22, %xmm23 {%k3} {z}
+
+// CHECK: vpmaddwd (%rcx), %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xf5,0x39]
+ vpmaddwd (%rcx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd 4660(%rax,%r14,8), %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xa1,0x4d,0x00,0xf5,0xbc,0xf0,0x34,0x12,0x00,0x00]
+ vpmaddwd 4660(%rax,%r14,8), %xmm22, %xmm23
+
+// CHECK: vpmaddwd 2032(%rdx), %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xf5,0x7a,0x7f]
+ vpmaddwd 2032(%rdx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd 2048(%rdx), %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xf5,0xba,0x00,0x08,0x00,0x00]
+ vpmaddwd 2048(%rdx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd -2048(%rdx), %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xf5,0x7a,0x80]
+ vpmaddwd -2048(%rdx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd -2064(%rdx), %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0xf5,0xba,0xf0,0xf7,0xff,0xff]
+ vpmaddwd -2064(%rdx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd %ymm17, %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xa1,0x5d,0x20,0xf5,0xd9]
+ vpmaddwd %ymm17, %ymm20, %ymm19
+
+// CHECK: vpmaddwd %ymm17, %ymm20, %ymm19 {%k2}
+// CHECK: encoding: [0x62,0xa1,0x5d,0x22,0xf5,0xd9]
+ vpmaddwd %ymm17, %ymm20, %ymm19 {%k2}
+
+// CHECK: vpmaddwd %ymm17, %ymm20, %ymm19 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0x5d,0xa2,0xf5,0xd9]
+ vpmaddwd %ymm17, %ymm20, %ymm19 {%k2} {z}
+
+// CHECK: vpmaddwd (%rcx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x19]
+ vpmaddwd (%rcx), %ymm20, %ymm19
+
+// CHECK: vpmaddwd 4660(%rax,%r14,8), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xa1,0x5d,0x20,0xf5,0x9c,0xf0,0x34,0x12,0x00,0x00]
+ vpmaddwd 4660(%rax,%r14,8), %ymm20, %ymm19
+
+// CHECK: vpmaddwd 4064(%rdx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x5a,0x7f]
+ vpmaddwd 4064(%rdx), %ymm20, %ymm19
+
+// CHECK: vpmaddwd 4096(%rdx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x9a,0x00,0x10,0x00,0x00]
+ vpmaddwd 4096(%rdx), %ymm20, %ymm19
+
+// CHECK: vpmaddwd -4096(%rdx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x5a,0x80]
+ vpmaddwd -4096(%rdx), %ymm20, %ymm19
+
+// CHECK: vpmaddwd -4128(%rdx), %ymm20, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x9a,0xe0,0xef,0xff,0xff]
+ vpmaddwd -4128(%rdx), %ymm20, %ymm19
+
+// CHECK: vptestnmw %xmm24, %xmm29, %k4
+// CHECK: encoding: [0x62,0x92,0x96,0x00,0x26,0xe0]
+ vptestnmw %xmm24, %xmm29, %k4
+
+// CHECK: vptestnmw %xmm24, %xmm29, %k4 {%k5}
+// CHECK: encoding: [0x62,0x92,0x96,0x05,0x26,0xe0]
+ vptestnmw %xmm24, %xmm29, %k4 {%k5}
+
+// CHECK: vptestnmw (%rcx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x96,0x00,0x26,0x21]
+ vptestnmw (%rcx), %xmm29, %k4
+
+// CHECK: vptestnmw 291(%rax,%r14,8), %xmm29, %k4
+// CHECK: encoding: [0x62,0xb2,0x96,0x00,0x26,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vptestnmw 291(%rax,%r14,8), %xmm29, %k4
+
+// CHECK: vptestnmw 2032(%rdx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x96,0x00,0x26,0x62,0x7f]
+ vptestnmw 2032(%rdx), %xmm29, %k4
+
+// CHECK: vptestnmw 2048(%rdx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x96,0x00,0x26,0xa2,0x00,0x08,0x00,0x00]
+ vptestnmw 2048(%rdx), %xmm29, %k4
+
+// CHECK: vptestnmw -2048(%rdx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x96,0x00,0x26,0x62,0x80]
+ vptestnmw -2048(%rdx), %xmm29, %k4
+
+// CHECK: vptestnmw -2064(%rdx), %xmm29, %k4
+// CHECK: encoding: [0x62,0xf2,0x96,0x00,0x26,0xa2,0xf0,0xf7,0xff,0xff]
+ vptestnmw -2064(%rdx), %xmm29, %k4
+
+// CHECK: vptestnmw %ymm17, %ymm28, %k2
+// CHECK: encoding: [0x62,0xb2,0x9e,0x20,0x26,0xd1]
+ vptestnmw %ymm17, %ymm28, %k2
+
+// CHECK: vptestnmw %ymm17, %ymm28, %k2 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x9e,0x22,0x26,0xd1]
+ vptestnmw %ymm17, %ymm28, %k2 {%k2}
+
+// CHECK: vptestnmw (%rcx), %ymm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x20,0x26,0x11]
+ vptestnmw (%rcx), %ymm28, %k2
+
+// CHECK: vptestnmw 291(%rax,%r14,8), %ymm28, %k2
+// CHECK: encoding: [0x62,0xb2,0x9e,0x20,0x26,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vptestnmw 291(%rax,%r14,8), %ymm28, %k2
+
+// CHECK: vptestnmw 4064(%rdx), %ymm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x20,0x26,0x52,0x7f]
+ vptestnmw 4064(%rdx), %ymm28, %k2
+
+// CHECK: vptestnmw 4096(%rdx), %ymm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x20,0x26,0x92,0x00,0x10,0x00,0x00]
+ vptestnmw 4096(%rdx), %ymm28, %k2
+
+// CHECK: vptestnmw -4096(%rdx), %ymm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x20,0x26,0x52,0x80]
+ vptestnmw -4096(%rdx), %ymm28, %k2
+
+// CHECK: vptestnmw -4128(%rdx), %ymm28, %k2
+// CHECK: encoding: [0x62,0xf2,0x9e,0x20,0x26,0x92,0xe0,0xef,0xff,0xff]
+ vptestnmw -4128(%rdx), %ymm28, %k2
+
+// CHECK: vptestnmw %xmm21, %xmm27, %k2
+// CHECK: encoding: [0x62,0xb2,0xa6,0x00,0x26,0xd5]
+ vptestnmw %xmm21, %xmm27, %k2
+
+// CHECK: vptestnmw %xmm21, %xmm27, %k2 {%k3}
+// CHECK: encoding: [0x62,0xb2,0xa6,0x03,0x26,0xd5]
+ vptestnmw %xmm21, %xmm27, %k2 {%k3}
+
+// CHECK: vptestnmw (%rcx), %xmm27, %k2
+// CHECK: encoding: [0x62,0xf2,0xa6,0x00,0x26,0x11]
+ vptestnmw (%rcx), %xmm27, %k2
+
+// CHECK: vptestnmw 4660(%rax,%r14,8), %xmm27, %k2
+// CHECK: encoding: [0x62,0xb2,0xa6,0x00,0x26,0x94,0xf0,0x34,0x12,0x00,0x00]
+ vptestnmw 4660(%rax,%r14,8), %xmm27, %k2
+
+// CHECK: vptestnmw 2032(%rdx), %xmm27, %k2
+// CHECK: encoding: [0x62,0xf2,0xa6,0x00,0x26,0x52,0x7f]
+ vptestnmw 2032(%rdx), %xmm27, %k2
+
+// CHECK: vptestnmw 2048(%rdx), %xmm27, %k2
+// CHECK: encoding: [0x62,0xf2,0xa6,0x00,0x26,0x92,0x00,0x08,0x00,0x00]
+ vptestnmw 2048(%rdx), %xmm27, %k2
+
+// CHECK: vptestnmw -2048(%rdx), %xmm27, %k2
+// CHECK: encoding: [0x62,0xf2,0xa6,0x00,0x26,0x52,0x80]
+ vptestnmw -2048(%rdx), %xmm27, %k2
+
+// CHECK: vptestnmw -2064(%rdx), %xmm27, %k2
+// CHECK: encoding: [0x62,0xf2,0xa6,0x00,0x26,0x92,0xf0,0xf7,0xff,0xff]
+ vptestnmw -2064(%rdx), %xmm27, %k2
+
+// CHECK: vptestnmw %ymm23, %ymm19, %k4
+// CHECK: encoding: [0x62,0xb2,0xe6,0x20,0x26,0xe7]
+ vptestnmw %ymm23, %ymm19, %k4
+
+// CHECK: vptestnmw %ymm23, %ymm19, %k4 {%k1}
+// CHECK: encoding: [0x62,0xb2,0xe6,0x21,0x26,0xe7]
+ vptestnmw %ymm23, %ymm19, %k4 {%k1}
+
+// CHECK: vptestnmw (%rcx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf2,0xe6,0x20,0x26,0x21]
+ vptestnmw (%rcx), %ymm19, %k4
+
+// CHECK: vptestnmw 4660(%rax,%r14,8), %ymm19, %k4
+// CHECK: encoding: [0x62,0xb2,0xe6,0x20,0x26,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vptestnmw 4660(%rax,%r14,8), %ymm19, %k4
+
+// CHECK: vptestnmw 4064(%rdx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf2,0xe6,0x20,0x26,0x62,0x7f]
+ vptestnmw 4064(%rdx), %ymm19, %k4
+
+// CHECK: vptestnmw 4096(%rdx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf2,0xe6,0x20,0x26,0xa2,0x00,0x10,0x00,0x00]
+ vptestnmw 4096(%rdx), %ymm19, %k4
+
+// CHECK: vptestnmw -4096(%rdx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf2,0xe6,0x20,0x26,0x62,0x80]
+ vptestnmw -4096(%rdx), %ymm19, %k4
+
+// CHECK: vptestnmw -4128(%rdx), %ymm19, %k4
+// CHECK: encoding: [0x62,0xf2,0xe6,0x20,0x26,0xa2,0xe0,0xef,0xff,0xff]
+ vptestnmw -4128(%rdx), %ymm19, %k4
+
+// CHECK: vptestnmb %xmm22, %xmm27, %k4
+// CHECK: encoding: [0x62,0xb2,0x26,0x00,0x26,0xe6]
+ vptestnmb %xmm22, %xmm27, %k4
+
+// CHECK: vptestnmb %xmm22, %xmm27, %k4 {%k1}
+// CHECK: encoding: [0x62,0xb2,0x26,0x01,0x26,0xe6]
+ vptestnmb %xmm22, %xmm27, %k4 {%k1}
+
+// CHECK: vptestnmb (%rcx), %xmm27, %k4
+// CHECK: encoding: [0x62,0xf2,0x26,0x00,0x26,0x21]
+ vptestnmb (%rcx), %xmm27, %k4
+
+// CHECK: vptestnmb 291(%rax,%r14,8), %xmm27, %k4
+// CHECK: encoding: [0x62,0xb2,0x26,0x00,0x26,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vptestnmb 291(%rax,%r14,8), %xmm27, %k4
+
+// CHECK: vptestnmb 2032(%rdx), %xmm27, %k4
+// CHECK: encoding: [0x62,0xf2,0x26,0x00,0x26,0x62,0x7f]
+ vptestnmb 2032(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb 2048(%rdx), %xmm27, %k4
+// CHECK: encoding: [0x62,0xf2,0x26,0x00,0x26,0xa2,0x00,0x08,0x00,0x00]
+ vptestnmb 2048(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb -2048(%rdx), %xmm27, %k4
+// CHECK: encoding: [0x62,0xf2,0x26,0x00,0x26,0x62,0x80]
+ vptestnmb -2048(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb -2064(%rdx), %xmm27, %k4
+// CHECK: encoding: [0x62,0xf2,0x26,0x00,0x26,0xa2,0xf0,0xf7,0xff,0xff]
+ vptestnmb -2064(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb %ymm17, %ymm25, %k5
+// CHECK: encoding: [0x62,0xb2,0x36,0x20,0x26,0xe9]
+ vptestnmb %ymm17, %ymm25, %k5
+
+// CHECK: vptestnmb %ymm17, %ymm25, %k5 {%k6}
+// CHECK: encoding: [0x62,0xb2,0x36,0x26,0x26,0xe9]
+ vptestnmb %ymm17, %ymm25, %k5 {%k6}
+
+// CHECK: vptestnmb (%rcx), %ymm25, %k5
+// CHECK: encoding: [0x62,0xf2,0x36,0x20,0x26,0x29]
+ vptestnmb (%rcx), %ymm25, %k5
+
+// CHECK: vptestnmb 291(%rax,%r14,8), %ymm25, %k5
+// CHECK: encoding: [0x62,0xb2,0x36,0x20,0x26,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vptestnmb 291(%rax,%r14,8), %ymm25, %k5
+
+// CHECK: vptestnmb 4064(%rdx), %ymm25, %k5
+// CHECK: encoding: [0x62,0xf2,0x36,0x20,0x26,0x6a,0x7f]
+ vptestnmb 4064(%rdx), %ymm25, %k5
+
+// CHECK: vptestnmb 4096(%rdx), %ymm25, %k5
+// CHECK: encoding: [0x62,0xf2,0x36,0x20,0x26,0xaa,0x00,0x10,0x00,0x00]
+ vptestnmb 4096(%rdx), %ymm25, %k5
+
+// CHECK: vptestnmb -4096(%rdx), %ymm25, %k5
+// CHECK: encoding: [0x62,0xf2,0x36,0x20,0x26,0x6a,0x80]
+ vptestnmb -4096(%rdx), %ymm25, %k5
+
+// CHECK: vptestnmb -4128(%rdx), %ymm25, %k5
+// CHECK: encoding: [0x62,0xf2,0x36,0x20,0x26,0xaa,0xe0,0xef,0xff,0xff]
+ vptestnmb -4128(%rdx), %ymm25, %k5
+
+// CHECK: vptestnmb %xmm19, %xmm27, %k4
+// CHECK: encoding: [0x62,0xb2,0x26,0x00,0x26,0xe3]
+ vptestnmb %xmm19, %xmm27, %k4
+
+// CHECK: vptestnmb %xmm19, %xmm27, %k4 {%k3}
+// CHECK: encoding: [0x62,0xb2,0x26,0x03,0x26,0xe3]
+ vptestnmb %xmm19, %xmm27, %k4 {%k3}
+
+// CHECK: vptestnmb (%rcx), %xmm27, %k4
+// CHECK: encoding: [0x62,0xf2,0x26,0x00,0x26,0x21]
+ vptestnmb (%rcx), %xmm27, %k4
+
+// CHECK: vptestnmb 4660(%rax,%r14,8), %xmm27, %k4
+// CHECK: encoding: [0x62,0xb2,0x26,0x00,0x26,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vptestnmb 4660(%rax,%r14,8), %xmm27, %k4
+
+// CHECK: vptestnmb 2032(%rdx), %xmm27, %k4
+// CHECK: encoding: [0x62,0xf2,0x26,0x00,0x26,0x62,0x7f]
+ vptestnmb 2032(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb 2048(%rdx), %xmm27, %k4
+// CHECK: encoding: [0x62,0xf2,0x26,0x00,0x26,0xa2,0x00,0x08,0x00,0x00]
+ vptestnmb 2048(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb -2048(%rdx), %xmm27, %k4
+// CHECK: encoding: [0x62,0xf2,0x26,0x00,0x26,0x62,0x80]
+ vptestnmb -2048(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb -2064(%rdx), %xmm27, %k4
+// CHECK: encoding: [0x62,0xf2,0x26,0x00,0x26,0xa2,0xf0,0xf7,0xff,0xff]
+ vptestnmb -2064(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb %ymm24, %ymm28, %k4
+// CHECK: encoding: [0x62,0x92,0x1e,0x20,0x26,0xe0]
+ vptestnmb %ymm24, %ymm28, %k4
+
+// CHECK: vptestnmb %ymm24, %ymm28, %k4 {%k1}
+// CHECK: encoding: [0x62,0x92,0x1e,0x21,0x26,0xe0]
+ vptestnmb %ymm24, %ymm28, %k4 {%k1}
+
+// CHECK: vptestnmb (%rcx), %ymm28, %k4
+// CHECK: encoding: [0x62,0xf2,0x1e,0x20,0x26,0x21]
+ vptestnmb (%rcx), %ymm28, %k4
+
+// CHECK: vptestnmb 4660(%rax,%r14,8), %ymm28, %k4
+// CHECK: encoding: [0x62,0xb2,0x1e,0x20,0x26,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vptestnmb 4660(%rax,%r14,8), %ymm28, %k4
+
+// CHECK: vptestnmb 4064(%rdx), %ymm28, %k4
+// CHECK: encoding: [0x62,0xf2,0x1e,0x20,0x26,0x62,0x7f]
+ vptestnmb 4064(%rdx), %ymm28, %k4
+
+// CHECK: vptestnmb 4096(%rdx), %ymm28, %k4
+// CHECK: encoding: [0x62,0xf2,0x1e,0x20,0x26,0xa2,0x00,0x10,0x00,0x00]
+ vptestnmb 4096(%rdx), %ymm28, %k4
+
+// CHECK: vptestnmb -4096(%rdx), %ymm28, %k4
+// CHECK: encoding: [0x62,0xf2,0x1e,0x20,0x26,0x62,0x80]
+ vptestnmb -4096(%rdx), %ymm28, %k4
+
+// CHECK: vptestnmb -4128(%rdx), %ymm28, %k4
+// CHECK: encoding: [0x62,0xf2,0x1e,0x20,0x26,0xa2,0xe0,0xef,0xff,0xff]
+ vptestnmb -4128(%rdx), %ymm28, %k4
+
+// CHECK: vpunpcklbw %xmm20, %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x21,0x65,0x00,0x60,0xf4]
+ vpunpcklbw %xmm20, %xmm19, %xmm30
+
+// CHECK: vpunpcklbw %xmm20, %xmm19, %xmm30 {%k4}
+// CHECK: encoding: [0x62,0x21,0x65,0x04,0x60,0xf4]
+ vpunpcklbw %xmm20, %xmm19, %xmm30 {%k4}
+
+// CHECK: vpunpcklbw %xmm20, %xmm19, %xmm30 {%k4} {z}
+// CHECK: encoding: [0x62,0x21,0x65,0x84,0x60,0xf4]
+ vpunpcklbw %xmm20, %xmm19, %xmm30 {%k4} {z}
+
+// CHECK: vpunpcklbw (%rcx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x00,0x60,0x31]
+ vpunpcklbw (%rcx), %xmm19, %xmm30
+
+// CHECK: vpunpcklbw 4660(%rax,%r14,8), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x21,0x65,0x00,0x60,0xb4,0xf0,0x34,0x12,0x00,0x00]
+ vpunpcklbw 4660(%rax,%r14,8), %xmm19, %xmm30
+
+// CHECK: vpunpcklbw 2032(%rdx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x00,0x60,0x72,0x7f]
+ vpunpcklbw 2032(%rdx), %xmm19, %xmm30
+
+// CHECK: vpunpcklbw 2048(%rdx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x00,0x60,0xb2,0x00,0x08,0x00,0x00]
+ vpunpcklbw 2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vpunpcklbw -2048(%rdx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x00,0x60,0x72,0x80]
+ vpunpcklbw -2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vpunpcklbw -2064(%rdx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x00,0x60,0xb2,0xf0,0xf7,0xff,0xff]
+ vpunpcklbw -2064(%rdx), %xmm19, %xmm30
+
+// CHECK: vpunpcklbw %ymm22, %ymm28, %ymm20
+// CHECK: encoding: [0x62,0xa1,0x1d,0x20,0x60,0xe6]
+ vpunpcklbw %ymm22, %ymm28, %ymm20
+
+// CHECK: vpunpcklbw %ymm22, %ymm28, %ymm20 {%k1}
+// CHECK: encoding: [0x62,0xa1,0x1d,0x21,0x60,0xe6]
+ vpunpcklbw %ymm22, %ymm28, %ymm20 {%k1}
+
+// CHECK: vpunpcklbw %ymm22, %ymm28, %ymm20 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0x1d,0xa1,0x60,0xe6]
+ vpunpcklbw %ymm22, %ymm28, %ymm20 {%k1} {z}
+
+// CHECK: vpunpcklbw (%rcx), %ymm28, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x1d,0x20,0x60,0x21]
+ vpunpcklbw (%rcx), %ymm28, %ymm20
+
+// CHECK: vpunpcklbw 4660(%rax,%r14,8), %ymm28, %ymm20
+// CHECK: encoding: [0x62,0xa1,0x1d,0x20,0x60,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vpunpcklbw 4660(%rax,%r14,8), %ymm28, %ymm20
+
+// CHECK: vpunpcklbw 4064(%rdx), %ymm28, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x1d,0x20,0x60,0x62,0x7f]
+ vpunpcklbw 4064(%rdx), %ymm28, %ymm20
+
+// CHECK: vpunpcklbw 4096(%rdx), %ymm28, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x1d,0x20,0x60,0xa2,0x00,0x10,0x00,0x00]
+ vpunpcklbw 4096(%rdx), %ymm28, %ymm20
+
+// CHECK: vpunpcklbw -4096(%rdx), %ymm28, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x1d,0x20,0x60,0x62,0x80]
+ vpunpcklbw -4096(%rdx), %ymm28, %ymm20
+
+// CHECK: vpunpcklbw -4128(%rdx), %ymm28, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x1d,0x20,0x60,0xa2,0xe0,0xef,0xff,0xff]
+ vpunpcklbw -4128(%rdx), %ymm28, %ymm20
+
+// CHECK: vpunpckhbw %xmm27, %xmm22, %xmm18
+// CHECK: encoding: [0x62,0x81,0x4d,0x00,0x68,0xd3]
+ vpunpckhbw %xmm27, %xmm22, %xmm18
+
+// CHECK: vpunpckhbw %xmm27, %xmm22, %xmm18 {%k1}
+// CHECK: encoding: [0x62,0x81,0x4d,0x01,0x68,0xd3]
+ vpunpckhbw %xmm27, %xmm22, %xmm18 {%k1}
+
+// CHECK: vpunpckhbw %xmm27, %xmm22, %xmm18 {%k1} {z}
+// CHECK: encoding: [0x62,0x81,0x4d,0x81,0x68,0xd3]
+ vpunpckhbw %xmm27, %xmm22, %xmm18 {%k1} {z}
+
+// CHECK: vpunpckhbw (%rcx), %xmm22, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0x68,0x11]
+ vpunpckhbw (%rcx), %xmm22, %xmm18
+
+// CHECK: vpunpckhbw 4660(%rax,%r14,8), %xmm22, %xmm18
+// CHECK: encoding: [0x62,0xa1,0x4d,0x00,0x68,0x94,0xf0,0x34,0x12,0x00,0x00]
+ vpunpckhbw 4660(%rax,%r14,8), %xmm22, %xmm18
+
+// CHECK: vpunpckhbw 2032(%rdx), %xmm22, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0x68,0x52,0x7f]
+ vpunpckhbw 2032(%rdx), %xmm22, %xmm18
+
+// CHECK: vpunpckhbw 2048(%rdx), %xmm22, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0x68,0x92,0x00,0x08,0x00,0x00]
+ vpunpckhbw 2048(%rdx), %xmm22, %xmm18
+
+// CHECK: vpunpckhbw -2048(%rdx), %xmm22, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0x68,0x52,0x80]
+ vpunpckhbw -2048(%rdx), %xmm22, %xmm18
+
+// CHECK: vpunpckhbw -2064(%rdx), %xmm22, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0x68,0x92,0xf0,0xf7,0xff,0xff]
+ vpunpckhbw -2064(%rdx), %xmm22, %xmm18
+
+// CHECK: vpunpckhbw %ymm24, %ymm20, %ymm17
+// CHECK: encoding: [0x62,0x81,0x5d,0x20,0x68,0xc8]
+ vpunpckhbw %ymm24, %ymm20, %ymm17
+
+// CHECK: vpunpckhbw %ymm24, %ymm20, %ymm17 {%k5}
+// CHECK: encoding: [0x62,0x81,0x5d,0x25,0x68,0xc8]
+ vpunpckhbw %ymm24, %ymm20, %ymm17 {%k5}
+
+// CHECK: vpunpckhbw %ymm24, %ymm20, %ymm17 {%k5} {z}
+// CHECK: encoding: [0x62,0x81,0x5d,0xa5,0x68,0xc8]
+ vpunpckhbw %ymm24, %ymm20, %ymm17 {%k5} {z}
+
+// CHECK: vpunpckhbw (%rcx), %ymm20, %ymm17
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0x68,0x09]
+ vpunpckhbw (%rcx), %ymm20, %ymm17
+
+// CHECK: vpunpckhbw 4660(%rax,%r14,8), %ymm20, %ymm17
+// CHECK: encoding: [0x62,0xa1,0x5d,0x20,0x68,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vpunpckhbw 4660(%rax,%r14,8), %ymm20, %ymm17
+
+// CHECK: vpunpckhbw 4064(%rdx), %ymm20, %ymm17
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0x68,0x4a,0x7f]
+ vpunpckhbw 4064(%rdx), %ymm20, %ymm17
+
+// CHECK: vpunpckhbw 4096(%rdx), %ymm20, %ymm17
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0x68,0x8a,0x00,0x10,0x00,0x00]
+ vpunpckhbw 4096(%rdx), %ymm20, %ymm17
+
+// CHECK: vpunpckhbw -4096(%rdx), %ymm20, %ymm17
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0x68,0x4a,0x80]
+ vpunpckhbw -4096(%rdx), %ymm20, %ymm17
+
+// CHECK: vpunpckhbw -4128(%rdx), %ymm20, %ymm17
+// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0x68,0x8a,0xe0,0xef,0xff,0xff]
+ vpunpckhbw -4128(%rdx), %ymm20, %ymm17
+
+// CHECK: vpunpcklwd %xmm17, %xmm27, %xmm27
+// CHECK: encoding: [0x62,0x21,0x25,0x00,0x61,0xd9]
+ vpunpcklwd %xmm17, %xmm27, %xmm27
+
+// CHECK: vpunpcklwd %xmm17, %xmm27, %xmm27 {%k5}
+// CHECK: encoding: [0x62,0x21,0x25,0x05,0x61,0xd9]
+ vpunpcklwd %xmm17, %xmm27, %xmm27 {%k5}
+
+// CHECK: vpunpcklwd %xmm17, %xmm27, %xmm27 {%k5} {z}
+// CHECK: encoding: [0x62,0x21,0x25,0x85,0x61,0xd9]
+ vpunpcklwd %xmm17, %xmm27, %xmm27 {%k5} {z}
+
+// CHECK: vpunpcklwd (%rcx), %xmm27, %xmm27
+// CHECK: encoding: [0x62,0x61,0x25,0x00,0x61,0x19]
+ vpunpcklwd (%rcx), %xmm27, %xmm27
+
+// CHECK: vpunpcklwd 4660(%rax,%r14,8), %xmm27, %xmm27
+// CHECK: encoding: [0x62,0x21,0x25,0x00,0x61,0x9c,0xf0,0x34,0x12,0x00,0x00]
+ vpunpcklwd 4660(%rax,%r14,8), %xmm27, %xmm27
+
+// CHECK: vpunpcklwd 2032(%rdx), %xmm27, %xmm27
+// CHECK: encoding: [0x62,0x61,0x25,0x00,0x61,0x5a,0x7f]
+ vpunpcklwd 2032(%rdx), %xmm27, %xmm27
+
+// CHECK: vpunpcklwd 2048(%rdx), %xmm27, %xmm27
+// CHECK: encoding: [0x62,0x61,0x25,0x00,0x61,0x9a,0x00,0x08,0x00,0x00]
+ vpunpcklwd 2048(%rdx), %xmm27, %xmm27
+
+// CHECK: vpunpcklwd -2048(%rdx), %xmm27, %xmm27
+// CHECK: encoding: [0x62,0x61,0x25,0x00,0x61,0x5a,0x80]
+ vpunpcklwd -2048(%rdx), %xmm27, %xmm27
+
+// CHECK: vpunpcklwd -2064(%rdx), %xmm27, %xmm27
+// CHECK: encoding: [0x62,0x61,0x25,0x00,0x61,0x9a,0xf0,0xf7,0xff,0xff]
+ vpunpcklwd -2064(%rdx), %xmm27, %xmm27
+
+// CHECK: vpunpcklwd %ymm23, %ymm25, %ymm18
+// CHECK: encoding: [0x62,0xa1,0x35,0x20,0x61,0xd7]
+ vpunpcklwd %ymm23, %ymm25, %ymm18
+
+// CHECK: vpunpcklwd %ymm23, %ymm25, %ymm18 {%k5}
+// CHECK: encoding: [0x62,0xa1,0x35,0x25,0x61,0xd7]
+ vpunpcklwd %ymm23, %ymm25, %ymm18 {%k5}
+
+// CHECK: vpunpcklwd %ymm23, %ymm25, %ymm18 {%k5} {z}
+// CHECK: encoding: [0x62,0xa1,0x35,0xa5,0x61,0xd7]
+ vpunpcklwd %ymm23, %ymm25, %ymm18 {%k5} {z}
+
+// CHECK: vpunpcklwd (%rcx), %ymm25, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x35,0x20,0x61,0x11]
+ vpunpcklwd (%rcx), %ymm25, %ymm18
+
+// CHECK: vpunpcklwd 4660(%rax,%r14,8), %ymm25, %ymm18
+// CHECK: encoding: [0x62,0xa1,0x35,0x20,0x61,0x94,0xf0,0x34,0x12,0x00,0x00]
+ vpunpcklwd 4660(%rax,%r14,8), %ymm25, %ymm18
+
+// CHECK: vpunpcklwd 4064(%rdx), %ymm25, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x35,0x20,0x61,0x52,0x7f]
+ vpunpcklwd 4064(%rdx), %ymm25, %ymm18
+
+// CHECK: vpunpcklwd 4096(%rdx), %ymm25, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x35,0x20,0x61,0x92,0x00,0x10,0x00,0x00]
+ vpunpcklwd 4096(%rdx), %ymm25, %ymm18
+
+// CHECK: vpunpcklwd -4096(%rdx), %ymm25, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x35,0x20,0x61,0x52,0x80]
+ vpunpcklwd -4096(%rdx), %ymm25, %ymm18
+
+// CHECK: vpunpcklwd -4128(%rdx), %ymm25, %ymm18
+// CHECK: encoding: [0x62,0xe1,0x35,0x20,0x61,0x92,0xe0,0xef,0xff,0xff]
+ vpunpcklwd -4128(%rdx), %ymm25, %ymm18
+
+// CHECK: vpunpckhwd %xmm17, %xmm28, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x1d,0x00,0x69,0xc9]
+ vpunpckhwd %xmm17, %xmm28, %xmm17
+
+// CHECK: vpunpckhwd %xmm17, %xmm28, %xmm17 {%k7}
+// CHECK: encoding: [0x62,0xa1,0x1d,0x07,0x69,0xc9]
+ vpunpckhwd %xmm17, %xmm28, %xmm17 {%k7}
+
+// CHECK: vpunpckhwd %xmm17, %xmm28, %xmm17 {%k7} {z}
+// CHECK: encoding: [0x62,0xa1,0x1d,0x87,0x69,0xc9]
+ vpunpckhwd %xmm17, %xmm28, %xmm17 {%k7} {z}
+
+// CHECK: vpunpckhwd (%rcx), %xmm28, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0x69,0x09]
+ vpunpckhwd (%rcx), %xmm28, %xmm17
+
+// CHECK: vpunpckhwd 4660(%rax,%r14,8), %xmm28, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x1d,0x00,0x69,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vpunpckhwd 4660(%rax,%r14,8), %xmm28, %xmm17
+
+// CHECK: vpunpckhwd 2032(%rdx), %xmm28, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0x69,0x4a,0x7f]
+ vpunpckhwd 2032(%rdx), %xmm28, %xmm17
+
+// CHECK: vpunpckhwd 2048(%rdx), %xmm28, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0x69,0x8a,0x00,0x08,0x00,0x00]
+ vpunpckhwd 2048(%rdx), %xmm28, %xmm17
+
+// CHECK: vpunpckhwd -2048(%rdx), %xmm28, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0x69,0x4a,0x80]
+ vpunpckhwd -2048(%rdx), %xmm28, %xmm17
+
+// CHECK: vpunpckhwd -2064(%rdx), %xmm28, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0x69,0x8a,0xf0,0xf7,0xff,0xff]
+ vpunpckhwd -2064(%rdx), %xmm28, %xmm17
+
+// CHECK: vpunpckhwd %ymm20, %ymm25, %ymm24
+// CHECK: encoding: [0x62,0x21,0x35,0x20,0x69,0xc4]
+ vpunpckhwd %ymm20, %ymm25, %ymm24
+
+// CHECK: vpunpckhwd %ymm20, %ymm25, %ymm24 {%k1}
+// CHECK: encoding: [0x62,0x21,0x35,0x21,0x69,0xc4]
+ vpunpckhwd %ymm20, %ymm25, %ymm24 {%k1}
+
+// CHECK: vpunpckhwd %ymm20, %ymm25, %ymm24 {%k1} {z}
+// CHECK: encoding: [0x62,0x21,0x35,0xa1,0x69,0xc4]
+ vpunpckhwd %ymm20, %ymm25, %ymm24 {%k1} {z}
+
+// CHECK: vpunpckhwd (%rcx), %ymm25, %ymm24
+// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x01]
+ vpunpckhwd (%rcx), %ymm25, %ymm24
+
+// CHECK: vpunpckhwd 4660(%rax,%r14,8), %ymm25, %ymm24
+// CHECK: encoding: [0x62,0x21,0x35,0x20,0x69,0x84,0xf0,0x34,0x12,0x00,0x00]
+ vpunpckhwd 4660(%rax,%r14,8), %ymm25, %ymm24
+
+// CHECK: vpunpckhwd 4064(%rdx), %ymm25, %ymm24
+// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x42,0x7f]
+ vpunpckhwd 4064(%rdx), %ymm25, %ymm24
+
+// CHECK: vpunpckhwd 4096(%rdx), %ymm25, %ymm24
+// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x82,0x00,0x10,0x00,0x00]
+ vpunpckhwd 4096(%rdx), %ymm25, %ymm24
+
+// CHECK: vpunpckhwd -4096(%rdx), %ymm25, %ymm24
+// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x42,0x80]
+ vpunpckhwd -4096(%rdx), %ymm25, %ymm24
+
+// CHECK: vpunpckhwd -4128(%rdx), %ymm25, %ymm24
+// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x82,0xe0,0xef,0xff,0xff]
+ vpunpckhwd -4128(%rdx), %ymm25, %ymm24
+
+// CHECK: vpunpcklbw %xmm23, %xmm29, %xmm18
+// CHECK: encoding: [0x62,0xa1,0x15,0x00,0x60,0xd7]
+ vpunpcklbw %xmm23, %xmm29, %xmm18
+
+// CHECK: vpunpcklbw %xmm23, %xmm29, %xmm18 {%k4}
+// CHECK: encoding: [0x62,0xa1,0x15,0x04,0x60,0xd7]
+ vpunpcklbw %xmm23, %xmm29, %xmm18 {%k4}
+
+// CHECK: vpunpcklbw %xmm23, %xmm29, %xmm18 {%k4} {z}
+// CHECK: encoding: [0x62,0xa1,0x15,0x84,0x60,0xd7]
+ vpunpcklbw %xmm23, %xmm29, %xmm18 {%k4} {z}
+
+// CHECK: vpunpcklbw (%rcx), %xmm29, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x15,0x00,0x60,0x11]
+ vpunpcklbw (%rcx), %xmm29, %xmm18
+
+// CHECK: vpunpcklbw 291(%rax,%r14,8), %xmm29, %xmm18
+// CHECK: encoding: [0x62,0xa1,0x15,0x00,0x60,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpunpcklbw 291(%rax,%r14,8), %xmm29, %xmm18
+
+// CHECK: vpunpcklbw 2032(%rdx), %xmm29, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x15,0x00,0x60,0x52,0x7f]
+ vpunpcklbw 2032(%rdx), %xmm29, %xmm18
+
+// CHECK: vpunpcklbw 2048(%rdx), %xmm29, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x15,0x00,0x60,0x92,0x00,0x08,0x00,0x00]
+ vpunpcklbw 2048(%rdx), %xmm29, %xmm18
+
+// CHECK: vpunpcklbw -2048(%rdx), %xmm29, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x15,0x00,0x60,0x52,0x80]
+ vpunpcklbw -2048(%rdx), %xmm29, %xmm18
+
+// CHECK: vpunpcklbw -2064(%rdx), %xmm29, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x15,0x00,0x60,0x92,0xf0,0xf7,0xff,0xff]
+ vpunpcklbw -2064(%rdx), %xmm29, %xmm18
+
+// CHECK: vpunpcklbw %ymm21, %ymm28, %ymm27
+// CHECK: encoding: [0x62,0x21,0x1d,0x20,0x60,0xdd]
+ vpunpcklbw %ymm21, %ymm28, %ymm27
+
+// CHECK: vpunpcklbw %ymm21, %ymm28, %ymm27 {%k4}
+// CHECK: encoding: [0x62,0x21,0x1d,0x24,0x60,0xdd]
+ vpunpcklbw %ymm21, %ymm28, %ymm27 {%k4}
+
+// CHECK: vpunpcklbw %ymm21, %ymm28, %ymm27 {%k4} {z}
+// CHECK: encoding: [0x62,0x21,0x1d,0xa4,0x60,0xdd]
+ vpunpcklbw %ymm21, %ymm28, %ymm27 {%k4} {z}
+
+// CHECK: vpunpcklbw (%rcx), %ymm28, %ymm27
+// CHECK: encoding: [0x62,0x61,0x1d,0x20,0x60,0x19]
+ vpunpcklbw (%rcx), %ymm28, %ymm27
+
+// CHECK: vpunpcklbw 291(%rax,%r14,8), %ymm28, %ymm27
+// CHECK: encoding: [0x62,0x21,0x1d,0x20,0x60,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpunpcklbw 291(%rax,%r14,8), %ymm28, %ymm27
+
+// CHECK: vpunpcklbw 4064(%rdx), %ymm28, %ymm27
+// CHECK: encoding: [0x62,0x61,0x1d,0x20,0x60,0x5a,0x7f]
+ vpunpcklbw 4064(%rdx), %ymm28, %ymm27
+
+// CHECK: vpunpcklbw 4096(%rdx), %ymm28, %ymm27
+// CHECK: encoding: [0x62,0x61,0x1d,0x20,0x60,0x9a,0x00,0x10,0x00,0x00]
+ vpunpcklbw 4096(%rdx), %ymm28, %ymm27
+
+// CHECK: vpunpcklbw -4096(%rdx), %ymm28, %ymm27
+// CHECK: encoding: [0x62,0x61,0x1d,0x20,0x60,0x5a,0x80]
+ vpunpcklbw -4096(%rdx), %ymm28, %ymm27
+
+// CHECK: vpunpcklbw -4128(%rdx), %ymm28, %ymm27
+// CHECK: encoding: [0x62,0x61,0x1d,0x20,0x60,0x9a,0xe0,0xef,0xff,0xff]
+ vpunpcklbw -4128(%rdx), %ymm28, %ymm27
+
+// CHECK: vpunpckhbw %xmm24, %xmm21, %xmm18
+// CHECK: encoding: [0x62,0x81,0x55,0x00,0x68,0xd0]
+ vpunpckhbw %xmm24, %xmm21, %xmm18
+
+// CHECK: vpunpckhbw %xmm24, %xmm21, %xmm18 {%k6}
+// CHECK: encoding: [0x62,0x81,0x55,0x06,0x68,0xd0]
+ vpunpckhbw %xmm24, %xmm21, %xmm18 {%k6}
+
+// CHECK: vpunpckhbw %xmm24, %xmm21, %xmm18 {%k6} {z}
+// CHECK: encoding: [0x62,0x81,0x55,0x86,0x68,0xd0]
+ vpunpckhbw %xmm24, %xmm21, %xmm18 {%k6} {z}
+
+// CHECK: vpunpckhbw (%rcx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x55,0x00,0x68,0x11]
+ vpunpckhbw (%rcx), %xmm21, %xmm18
+
+// CHECK: vpunpckhbw 291(%rax,%r14,8), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xa1,0x55,0x00,0x68,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckhbw 291(%rax,%r14,8), %xmm21, %xmm18
+
+// CHECK: vpunpckhbw 2032(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x55,0x00,0x68,0x52,0x7f]
+ vpunpckhbw 2032(%rdx), %xmm21, %xmm18
+
+// CHECK: vpunpckhbw 2048(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x55,0x00,0x68,0x92,0x00,0x08,0x00,0x00]
+ vpunpckhbw 2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vpunpckhbw -2048(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x55,0x00,0x68,0x52,0x80]
+ vpunpckhbw -2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vpunpckhbw -2064(%rdx), %xmm21, %xmm18
+// CHECK: encoding: [0x62,0xe1,0x55,0x00,0x68,0x92,0xf0,0xf7,0xff,0xff]
+ vpunpckhbw -2064(%rdx), %xmm21, %xmm18
+
+// CHECK: vpunpckhbw %ymm23, %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xa1,0x2d,0x20,0x68,0xe7]
+ vpunpckhbw %ymm23, %ymm26, %ymm20
+
+// CHECK: vpunpckhbw %ymm23, %ymm26, %ymm20 {%k1}
+// CHECK: encoding: [0x62,0xa1,0x2d,0x21,0x68,0xe7]
+ vpunpckhbw %ymm23, %ymm26, %ymm20 {%k1}
+
+// CHECK: vpunpckhbw %ymm23, %ymm26, %ymm20 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0x2d,0xa1,0x68,0xe7]
+ vpunpckhbw %ymm23, %ymm26, %ymm20 {%k1} {z}
+
+// CHECK: vpunpckhbw (%rcx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0x68,0x21]
+ vpunpckhbw (%rcx), %ymm26, %ymm20
+
+// CHECK: vpunpckhbw 291(%rax,%r14,8), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xa1,0x2d,0x20,0x68,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckhbw 291(%rax,%r14,8), %ymm26, %ymm20
+
+// CHECK: vpunpckhbw 4064(%rdx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0x68,0x62,0x7f]
+ vpunpckhbw 4064(%rdx), %ymm26, %ymm20
+
+// CHECK: vpunpckhbw 4096(%rdx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0x68,0xa2,0x00,0x10,0x00,0x00]
+ vpunpckhbw 4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vpunpckhbw -4096(%rdx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0x68,0x62,0x80]
+ vpunpckhbw -4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vpunpckhbw -4128(%rdx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0x68,0xa2,0xe0,0xef,0xff,0xff]
+ vpunpckhbw -4128(%rdx), %ymm26, %ymm20
+
+// CHECK: vpunpcklwd %xmm21, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x35,0x00,0x61,0xed]
+ vpunpcklwd %xmm21, %xmm25, %xmm21
+
+// CHECK: vpunpcklwd %xmm21, %xmm25, %xmm21 {%k6}
+// CHECK: encoding: [0x62,0xa1,0x35,0x06,0x61,0xed]
+ vpunpcklwd %xmm21, %xmm25, %xmm21 {%k6}
+
+// CHECK: vpunpcklwd %xmm21, %xmm25, %xmm21 {%k6} {z}
+// CHECK: encoding: [0x62,0xa1,0x35,0x86,0x61,0xed]
+ vpunpcklwd %xmm21, %xmm25, %xmm21 {%k6} {z}
+
+// CHECK: vpunpcklwd (%rcx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x35,0x00,0x61,0x29]
+ vpunpcklwd (%rcx), %xmm25, %xmm21
+
+// CHECK: vpunpcklwd 291(%rax,%r14,8), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x35,0x00,0x61,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpunpcklwd 291(%rax,%r14,8), %xmm25, %xmm21
+
+// CHECK: vpunpcklwd 2032(%rdx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x35,0x00,0x61,0x6a,0x7f]
+ vpunpcklwd 2032(%rdx), %xmm25, %xmm21
+
+// CHECK: vpunpcklwd 2048(%rdx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x35,0x00,0x61,0xaa,0x00,0x08,0x00,0x00]
+ vpunpcklwd 2048(%rdx), %xmm25, %xmm21
+
+// CHECK: vpunpcklwd -2048(%rdx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x35,0x00,0x61,0x6a,0x80]
+ vpunpcklwd -2048(%rdx), %xmm25, %xmm21
+
+// CHECK: vpunpcklwd -2064(%rdx), %xmm25, %xmm21
+// CHECK: encoding: [0x62,0xe1,0x35,0x00,0x61,0xaa,0xf0,0xf7,0xff,0xff]
+ vpunpcklwd -2064(%rdx), %xmm25, %xmm21
+
+// CHECK: vpunpcklwd %ymm26, %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x01,0x45,0x20,0x61,0xd2]
+ vpunpcklwd %ymm26, %ymm23, %ymm26
+
+// CHECK: vpunpcklwd %ymm26, %ymm23, %ymm26 {%k2}
+// CHECK: encoding: [0x62,0x01,0x45,0x22,0x61,0xd2]
+ vpunpcklwd %ymm26, %ymm23, %ymm26 {%k2}
+
+// CHECK: vpunpcklwd %ymm26, %ymm23, %ymm26 {%k2} {z}
+// CHECK: encoding: [0x62,0x01,0x45,0xa2,0x61,0xd2]
+ vpunpcklwd %ymm26, %ymm23, %ymm26 {%k2} {z}
+
+// CHECK: vpunpcklwd (%rcx), %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0x61,0x11]
+ vpunpcklwd (%rcx), %ymm23, %ymm26
+
+// CHECK: vpunpcklwd 291(%rax,%r14,8), %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x21,0x45,0x20,0x61,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpunpcklwd 291(%rax,%r14,8), %ymm23, %ymm26
+
+// CHECK: vpunpcklwd 4064(%rdx), %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0x61,0x52,0x7f]
+ vpunpcklwd 4064(%rdx), %ymm23, %ymm26
+
+// CHECK: vpunpcklwd 4096(%rdx), %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0x61,0x92,0x00,0x10,0x00,0x00]
+ vpunpcklwd 4096(%rdx), %ymm23, %ymm26
+
+// CHECK: vpunpcklwd -4096(%rdx), %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0x61,0x52,0x80]
+ vpunpcklwd -4096(%rdx), %ymm23, %ymm26
+
+// CHECK: vpunpcklwd -4128(%rdx), %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x61,0x45,0x20,0x61,0x92,0xe0,0xef,0xff,0xff]
+ vpunpcklwd -4128(%rdx), %ymm23, %ymm26
+
+// CHECK: vpunpckhwd %xmm23, %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x6d,0x00,0x69,0xcf]
+ vpunpckhwd %xmm23, %xmm18, %xmm17
+
+// CHECK: vpunpckhwd %xmm23, %xmm18, %xmm17 {%k3}
+// CHECK: encoding: [0x62,0xa1,0x6d,0x03,0x69,0xcf]
+ vpunpckhwd %xmm23, %xmm18, %xmm17 {%k3}
+
+// CHECK: vpunpckhwd %xmm23, %xmm18, %xmm17 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0x6d,0x83,0x69,0xcf]
+ vpunpckhwd %xmm23, %xmm18, %xmm17 {%k3} {z}
+
+// CHECK: vpunpckhwd (%rcx), %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0x69,0x09]
+ vpunpckhwd (%rcx), %xmm18, %xmm17
+
+// CHECK: vpunpckhwd 291(%rax,%r14,8), %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x6d,0x00,0x69,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckhwd 291(%rax,%r14,8), %xmm18, %xmm17
+
+// CHECK: vpunpckhwd 2032(%rdx), %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0x69,0x4a,0x7f]
+ vpunpckhwd 2032(%rdx), %xmm18, %xmm17
+
+// CHECK: vpunpckhwd 2048(%rdx), %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0x69,0x8a,0x00,0x08,0x00,0x00]
+ vpunpckhwd 2048(%rdx), %xmm18, %xmm17
+
+// CHECK: vpunpckhwd -2048(%rdx), %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0x69,0x4a,0x80]
+ vpunpckhwd -2048(%rdx), %xmm18, %xmm17
+
+// CHECK: vpunpckhwd -2064(%rdx), %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0x69,0x8a,0xf0,0xf7,0xff,0xff]
+ vpunpckhwd -2064(%rdx), %xmm18, %xmm17
+
+// CHECK: vpunpckhwd %ymm26, %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x01,0x35,0x20,0x69,0xe2]
+ vpunpckhwd %ymm26, %ymm25, %ymm28
+
+// CHECK: vpunpckhwd %ymm26, %ymm25, %ymm28 {%k4}
+// CHECK: encoding: [0x62,0x01,0x35,0x24,0x69,0xe2]
+ vpunpckhwd %ymm26, %ymm25, %ymm28 {%k4}
+
+// CHECK: vpunpckhwd %ymm26, %ymm25, %ymm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x01,0x35,0xa4,0x69,0xe2]
+ vpunpckhwd %ymm26, %ymm25, %ymm28 {%k4} {z}
+
+// CHECK: vpunpckhwd (%rcx), %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x21]
+ vpunpckhwd (%rcx), %ymm25, %ymm28
+
+// CHECK: vpunpckhwd 291(%rax,%r14,8), %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x21,0x35,0x20,0x69,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckhwd 291(%rax,%r14,8), %ymm25, %ymm28
+
+// CHECK: vpunpckhwd 4064(%rdx), %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x62,0x7f]
+ vpunpckhwd 4064(%rdx), %ymm25, %ymm28
+
+// CHECK: vpunpckhwd 4096(%rdx), %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0xa2,0x00,0x10,0x00,0x00]
+ vpunpckhwd 4096(%rdx), %ymm25, %ymm28
+
+// CHECK: vpunpckhwd -4096(%rdx), %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x62,0x80]
+ vpunpckhwd -4096(%rdx), %ymm25, %ymm28
+
+// CHECK: vpunpckhwd -4128(%rdx), %ymm25, %ymm28
+// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0xa2,0xe0,0xef,0xff,0xff]
+ vpunpckhwd -4128(%rdx), %ymm25, %ymm28
+
+
+// CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19
+// CHECK: encoding: [0x62,0xa3,0x2d,0x00,0x0f,0xdd,0xab]
+ vpalignr $171, %xmm21, %xmm26, %xmm19
+
+// CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4}
+// CHECK: encoding: [0x62,0xa3,0x2d,0x04,0x0f,0xdd,0xab]
+ vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4}
+
+// CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4} {z}
+// CHECK: encoding: [0x62,0xa3,0x2d,0x84,0x0f,0xdd,0xab]
+ vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4} {z}
+
+// CHECK: vpalignr $123, %xmm21, %xmm26, %xmm19
+// CHECK: encoding: [0x62,0xa3,0x2d,0x00,0x0f,0xdd,0x7b]
+ vpalignr $123, %xmm21, %xmm26, %xmm19
+
+// CHECK: vpalignr $123, (%rcx), %xmm26, %xmm19
+// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x19,0x7b]
+ vpalignr $123, (%rcx), %xmm26, %xmm19
+
+// CHECK: vpalignr $123, 291(%rax,%r14,8), %xmm26, %xmm19
+// CHECK: encoding: [0x62,0xa3,0x2d,0x00,0x0f,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpalignr $123, 291(%rax,%r14,8), %xmm26, %xmm19
+
+// CHECK: vpalignr $123, 2032(%rdx), %xmm26, %xmm19
+// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x5a,0x7f,0x7b]
+ vpalignr $123, 2032(%rdx), %xmm26, %xmm19
+
+// CHECK: vpalignr $123, 2048(%rdx), %xmm26, %xmm19
+// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vpalignr $123, 2048(%rdx), %xmm26, %xmm19
+
+// CHECK: vpalignr $123, -2048(%rdx), %xmm26, %xmm19
+// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x5a,0x80,0x7b]
+ vpalignr $123, -2048(%rdx), %xmm26, %xmm19
+
+// CHECK: vpalignr $123, -2064(%rdx), %xmm26, %xmm19
+// CHECK: encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vpalignr $123, -2064(%rdx), %xmm26, %xmm19
+
+// CHECK: vpalignr $171, %ymm22, %ymm21, %ymm27
+// CHECK: encoding: [0x62,0x23,0x55,0x20,0x0f,0xde,0xab]
+ vpalignr $171, %ymm22, %ymm21, %ymm27
+
+// CHECK: vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2}
+// CHECK: encoding: [0x62,0x23,0x55,0x22,0x0f,0xde,0xab]
+ vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2}
+
+// CHECK: vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2} {z}
+// CHECK: encoding: [0x62,0x23,0x55,0xa2,0x0f,0xde,0xab]
+ vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2} {z}
+
+// CHECK: vpalignr $123, %ymm22, %ymm21, %ymm27
+// CHECK: encoding: [0x62,0x23,0x55,0x20,0x0f,0xde,0x7b]
+ vpalignr $123, %ymm22, %ymm21, %ymm27
+
+// CHECK: vpalignr $123, (%rcx), %ymm21, %ymm27
+// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x19,0x7b]
+ vpalignr $123, (%rcx), %ymm21, %ymm27
+
+// CHECK: vpalignr $123, 291(%rax,%r14,8), %ymm21, %ymm27
+// CHECK: encoding: [0x62,0x23,0x55,0x20,0x0f,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpalignr $123, 291(%rax,%r14,8), %ymm21, %ymm27
+
+// CHECK: vpalignr $123, 4064(%rdx), %ymm21, %ymm27
+// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x5a,0x7f,0x7b]
+ vpalignr $123, 4064(%rdx), %ymm21, %ymm27
+
+// CHECK: vpalignr $123, 4096(%rdx), %ymm21, %ymm27
+// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x9a,0x00,0x10,0x00,0x00,0x7b]
+ vpalignr $123, 4096(%rdx), %ymm21, %ymm27
+
+// CHECK: vpalignr $123, -4096(%rdx), %ymm21, %ymm27
+// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x5a,0x80,0x7b]
+ vpalignr $123, -4096(%rdx), %ymm21, %ymm27
+
+// CHECK: vpalignr $123, -4128(%rdx), %ymm21, %ymm27
+// CHECK: encoding: [0x62,0x63,0x55,0x20,0x0f,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+ vpalignr $123, -4128(%rdx), %ymm21, %ymm27
+
+// CHECK: vpalignr $171, %xmm25, %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x03,0x5d,0x00,0x0f,0xf1,0xab]
+ vpalignr $0xab, %xmm25, %xmm20, %xmm30
+
+// CHECK: vpalignr $171, %xmm25, %xmm20, %xmm30 {%k2}
+// CHECK: encoding: [0x62,0x03,0x5d,0x02,0x0f,0xf1,0xab]
+ vpalignr $0xab, %xmm25, %xmm20, %xmm30 {%k2}
+
+// CHECK: vpalignr $171, %xmm25, %xmm20, %xmm30 {%k2} {z}
+// CHECK: encoding: [0x62,0x03,0x5d,0x82,0x0f,0xf1,0xab]
+ vpalignr $0xab, %xmm25, %xmm20, %xmm30 {%k2} {z}
+
+// CHECK: vpalignr $123, %xmm25, %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x03,0x5d,0x00,0x0f,0xf1,0x7b]
+ vpalignr $0x7b, %xmm25, %xmm20, %xmm30
+
+// CHECK: vpalignr $123, (%rcx), %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0x31,0x7b]
+ vpalignr $0x7b,(%rcx), %xmm20, %xmm30
+
+// CHECK: vpalignr $123, 4660(%rax,%r14,8), %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x23,0x5d,0x00,0x0f,0xb4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpalignr $0x7b,4660(%rax,%r14,8), %xmm20, %xmm30
+
+// CHECK: vpalignr $123, 2032(%rdx), %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0x72,0x7f,0x7b]
+ vpalignr $0x7b,2032(%rdx), %xmm20, %xmm30
+
+// CHECK: vpalignr $123, 2048(%rdx), %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0xb2,0x00,0x08,0x00,0x00,0x7b]
+ vpalignr $0x7b,2048(%rdx), %xmm20, %xmm30
+
+// CHECK: vpalignr $123, -2048(%rdx), %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0x72,0x80,0x7b]
+ vpalignr $0x7b,-2048(%rdx), %xmm20, %xmm30
+
+// CHECK: vpalignr $123, -2064(%rdx), %xmm20, %xmm30
+// CHECK: encoding: [0x62,0x63,0x5d,0x00,0x0f,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+ vpalignr $0x7b,-2064(%rdx), %xmm20, %xmm30
+
+// CHECK: vpalignr $171, %ymm27, %ymm17, %ymm21
+// CHECK: encoding: [0x62,0x83,0x75,0x20,0x0f,0xeb,0xab]
+ vpalignr $0xab, %ymm27, %ymm17, %ymm21
+
+// CHECK: vpalignr $171, %ymm27, %ymm17, %ymm21 {%k7}
+// CHECK: encoding: [0x62,0x83,0x75,0x27,0x0f,0xeb,0xab]
+ vpalignr $0xab, %ymm27, %ymm17, %ymm21 {%k7}
+
+// CHECK: vpalignr $171, %ymm27, %ymm17, %ymm21 {%k7} {z}
+// CHECK: encoding: [0x62,0x83,0x75,0xa7,0x0f,0xeb,0xab]
+ vpalignr $0xab, %ymm27, %ymm17, %ymm21 {%k7} {z}
+
+// CHECK: vpalignr $123, %ymm27, %ymm17, %ymm21
+// CHECK: encoding: [0x62,0x83,0x75,0x20,0x0f,0xeb,0x7b]
+ vpalignr $0x7b, %ymm27, %ymm17, %ymm21
+
+// CHECK: vpalignr $123, (%rcx), %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0x29,0x7b]
+ vpalignr $0x7b,(%rcx), %ymm17, %ymm21
+
+// CHECK: vpalignr $123, 4660(%rax,%r14,8), %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xa3,0x75,0x20,0x0f,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpalignr $0x7b,4660(%rax,%r14,8), %ymm17, %ymm21
+
+// CHECK: vpalignr $123, 4064(%rdx), %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0x6a,0x7f,0x7b]
+ vpalignr $0x7b,4064(%rdx), %ymm17, %ymm21
+
+// CHECK: vpalignr $123, 4096(%rdx), %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0xaa,0x00,0x10,0x00,0x00,0x7b]
+ vpalignr $0x7b,4096(%rdx), %ymm17, %ymm21
+
+// CHECK: vpalignr $123, -4096(%rdx), %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0x6a,0x80,0x7b]
+ vpalignr $0x7b,-4096(%rdx), %ymm17, %ymm21
+
+// CHECK: vpalignr $123, -4128(%rdx), %ymm17, %ymm21
+// CHECK: encoding: [0x62,0xe3,0x75,0x20,0x0f,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+ vpalignr $0x7b,-4128(%rdx), %ymm17, %ymm21
+
+// CHECK: vdbpsadbw $171, %xmm20, %xmm29, %xmm17
+// CHECK: encoding: [0x62,0xa3,0x15,0x00,0x42,0xcc,0xab]
+ vdbpsadbw $0xab, %xmm20, %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $171, %xmm20, %xmm29, %xmm17 {%k4}
+// CHECK: encoding: [0x62,0xa3,0x15,0x04,0x42,0xcc,0xab]
+ vdbpsadbw $0xab, %xmm20, %xmm29, %xmm17 {%k4}
+
+// CHECK: vdbpsadbw $171, %xmm20, %xmm29, %xmm17 {%k4} {z}
+// CHECK: encoding: [0x62,0xa3,0x15,0x84,0x42,0xcc,0xab]
+ vdbpsadbw $0xab, %xmm20, %xmm29, %xmm17 {%k4} {z}
+
+// CHECK: vdbpsadbw $123, %xmm20, %xmm29, %xmm17
+// CHECK: encoding: [0x62,0xa3,0x15,0x00,0x42,0xcc,0x7b]
+ vdbpsadbw $0x7b, %xmm20, %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $123, (%rcx), %xmm29, %xmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x00,0x42,0x09,0x7b]
+ vdbpsadbw $0x7b,(%rcx), %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $123, 4660(%rax,%r14,8), %xmm29, %xmm17
+// CHECK: encoding: [0x62,0xa3,0x15,0x00,0x42,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vdbpsadbw $0x7b,4660(%rax,%r14,8), %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $123, 2032(%rdx), %xmm29, %xmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x00,0x42,0x4a,0x7f,0x7b]
+ vdbpsadbw $0x7b,2032(%rdx), %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $123, 2048(%rdx), %xmm29, %xmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x00,0x42,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vdbpsadbw $0x7b,2048(%rdx), %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $123, -2048(%rdx), %xmm29, %xmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x00,0x42,0x4a,0x80,0x7b]
+ vdbpsadbw $0x7b,-2048(%rdx), %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $123, -2064(%rdx), %xmm29, %xmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x00,0x42,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vdbpsadbw $0x7b,-2064(%rdx), %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $171, %ymm26, %ymm28, %ymm26
+// CHECK: encoding: [0x62,0x03,0x1d,0x20,0x42,0xd2,0xab]
+ vdbpsadbw $0xab, %ymm26, %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $171, %ymm26, %ymm28, %ymm26 {%k4}
+// CHECK: encoding: [0x62,0x03,0x1d,0x24,0x42,0xd2,0xab]
+ vdbpsadbw $0xab, %ymm26, %ymm28, %ymm26 {%k4}
+
+// CHECK: vdbpsadbw $171, %ymm26, %ymm28, %ymm26 {%k4} {z}
+// CHECK: encoding: [0x62,0x03,0x1d,0xa4,0x42,0xd2,0xab]
+ vdbpsadbw $0xab, %ymm26, %ymm28, %ymm26 {%k4} {z}
+
+// CHECK: vdbpsadbw $123, %ymm26, %ymm28, %ymm26
+// CHECK: encoding: [0x62,0x03,0x1d,0x20,0x42,0xd2,0x7b]
+ vdbpsadbw $0x7b, %ymm26, %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $123, (%rcx), %ymm28, %ymm26
+// CHECK: encoding: [0x62,0x63,0x1d,0x20,0x42,0x11,0x7b]
+ vdbpsadbw $0x7b,(%rcx), %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $123, 4660(%rax,%r14,8), %ymm28, %ymm26
+// CHECK: encoding: [0x62,0x23,0x1d,0x20,0x42,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vdbpsadbw $0x7b,4660(%rax,%r14,8), %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $123, 4064(%rdx), %ymm28, %ymm26
+// CHECK: encoding: [0x62,0x63,0x1d,0x20,0x42,0x52,0x7f,0x7b]
+ vdbpsadbw $0x7b,4064(%rdx), %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $123, 4096(%rdx), %ymm28, %ymm26
+// CHECK: encoding: [0x62,0x63,0x1d,0x20,0x42,0x92,0x00,0x10,0x00,0x00,0x7b]
+ vdbpsadbw $0x7b,4096(%rdx), %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $123, -4096(%rdx), %ymm28, %ymm26
+// CHECK: encoding: [0x62,0x63,0x1d,0x20,0x42,0x52,0x80,0x7b]
+ vdbpsadbw $0x7b,-4096(%rdx), %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $123, -4128(%rdx), %ymm28, %ymm26
+// CHECK: encoding: [0x62,0x63,0x1d,0x20,0x42,0x92,0xe0,0xef,0xff,0xff,0x7b]
+ vdbpsadbw $0x7b,-4128(%rdx), %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $171, %xmm17, %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xa3,0x35,0x00,0x42,0xf1,0xab]
+ vdbpsadbw $171, %xmm17, %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $171, %xmm17, %xmm25, %xmm22 {%k3}
+// CHECK: encoding: [0x62,0xa3,0x35,0x03,0x42,0xf1,0xab]
+ vdbpsadbw $171, %xmm17, %xmm25, %xmm22 {%k3}
+
+// CHECK: vdbpsadbw $171, %xmm17, %xmm25, %xmm22 {%k3} {z}
+// CHECK: encoding: [0x62,0xa3,0x35,0x83,0x42,0xf1,0xab]
+ vdbpsadbw $171, %xmm17, %xmm25, %xmm22 {%k3} {z}
+
+// CHECK: vdbpsadbw $123, %xmm17, %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xa3,0x35,0x00,0x42,0xf1,0x7b]
+ vdbpsadbw $123, %xmm17, %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $123, (%rcx), %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x35,0x00,0x42,0x31,0x7b]
+ vdbpsadbw $123, (%rcx), %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $123, 291(%rax,%r14,8), %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xa3,0x35,0x00,0x42,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vdbpsadbw $123, 291(%rax,%r14,8), %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $123, 2032(%rdx), %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x35,0x00,0x42,0x72,0x7f,0x7b]
+ vdbpsadbw $123, 2032(%rdx), %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $123, 2048(%rdx), %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x35,0x00,0x42,0xb2,0x00,0x08,0x00,0x00,0x7b]
+ vdbpsadbw $123, 2048(%rdx), %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $123, -2048(%rdx), %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x35,0x00,0x42,0x72,0x80,0x7b]
+ vdbpsadbw $123, -2048(%rdx), %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $123, -2064(%rdx), %xmm25, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x35,0x00,0x42,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+ vdbpsadbw $123, -2064(%rdx), %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $171, %ymm20, %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xa3,0x65,0x20,0x42,0xcc,0xab]
+ vdbpsadbw $171, %ymm20, %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $171, %ymm20, %ymm19, %ymm17 {%k5}
+// CHECK: encoding: [0x62,0xa3,0x65,0x25,0x42,0xcc,0xab]
+ vdbpsadbw $171, %ymm20, %ymm19, %ymm17 {%k5}
+
+// CHECK: vdbpsadbw $171, %ymm20, %ymm19, %ymm17 {%k5} {z}
+// CHECK: encoding: [0x62,0xa3,0x65,0xa5,0x42,0xcc,0xab]
+ vdbpsadbw $171, %ymm20, %ymm19, %ymm17 {%k5} {z}
+
+// CHECK: vdbpsadbw $123, %ymm20, %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xa3,0x65,0x20,0x42,0xcc,0x7b]
+ vdbpsadbw $123, %ymm20, %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $123, (%rcx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x65,0x20,0x42,0x09,0x7b]
+ vdbpsadbw $123, (%rcx), %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $123, 291(%rax,%r14,8), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xa3,0x65,0x20,0x42,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vdbpsadbw $123, 291(%rax,%r14,8), %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $123, 4064(%rdx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x65,0x20,0x42,0x4a,0x7f,0x7b]
+ vdbpsadbw $123, 4064(%rdx), %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $123, 4096(%rdx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x65,0x20,0x42,0x8a,0x00,0x10,0x00,0x00,0x7b]
+ vdbpsadbw $123, 4096(%rdx), %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $123, -4096(%rdx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x65,0x20,0x42,0x4a,0x80,0x7b]
+ vdbpsadbw $123, -4096(%rdx), %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $123, -4128(%rdx), %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x65,0x20,0x42,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+ vdbpsadbw $123, -4128(%rdx), %ymm19, %ymm17
+// CHECK: vpslldq $171, %xmm24, %xmm20
+// CHECK: encoding: [0x62,0x91,0x5d,0x00,0x73,0xf8,0xab]
+ vpslldq $171, %xmm24, %xmm20
+
+// CHECK: vpslldq $123, %xmm24, %xmm20
+// CHECK: encoding: [0x62,0x91,0x5d,0x00,0x73,0xf8,0x7b]
+ vpslldq $123, %xmm24, %xmm20
+
+// CHECK: vpslldq $123, (%rcx), %xmm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x00,0x73,0x39,0x7b]
+ vpslldq $123, (%rcx), %xmm20
+
+// CHECK: vpslldq $123, 291(%rax,%r14,8), %xmm20
+// CHECK: encoding: [0x62,0xb1,0x5d,0x00,0x73,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpslldq $123, 291(%rax,%r14,8), %xmm20
+
+// CHECK: vpslldq $123, 2032(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x00,0x73,0x7a,0x7f,0x7b]
+ vpslldq $123, 2032(%rdx), %xmm20
+
+// CHECK: vpslldq $123, 2048(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x00,0x73,0xba,0x00,0x08,0x00,0x00,0x7b]
+ vpslldq $123, 2048(%rdx), %xmm20
+
+// CHECK: vpslldq $123, -2048(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x00,0x73,0x7a,0x80,0x7b]
+ vpslldq $123, -2048(%rdx), %xmm20
+
+// CHECK: vpslldq $123, -2064(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x00,0x73,0xba,0xf0,0xf7,0xff,0xff,0x7b]
+ vpslldq $123, -2064(%rdx), %xmm20
+
+// CHECK: vpslldq $171, %ymm25, %ymm26
+// CHECK: encoding: [0x62,0x91,0x2d,0x20,0x73,0xf9,0xab]
+ vpslldq $171, %ymm25, %ymm26
+
+// CHECK: vpslldq $123, %ymm25, %ymm26
+// CHECK: encoding: [0x62,0x91,0x2d,0x20,0x73,0xf9,0x7b]
+ vpslldq $123, %ymm25, %ymm26
+
+// CHECK: vpslldq $123, (%rcx), %ymm26
+// CHECK: encoding: [0x62,0xf1,0x2d,0x20,0x73,0x39,0x7b]
+ vpslldq $123, (%rcx), %ymm26
+
+// CHECK: vpslldq $123, 291(%rax,%r14,8), %ymm26
+// CHECK: encoding: [0x62,0xb1,0x2d,0x20,0x73,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpslldq $123, 291(%rax,%r14,8), %ymm26
+
+// CHECK: vpslldq $123, 4064(%rdx), %ymm26
+// CHECK: encoding: [0x62,0xf1,0x2d,0x20,0x73,0x7a,0x7f,0x7b]
+ vpslldq $123, 4064(%rdx), %ymm26
+
+// CHECK: vpslldq $123, 4096(%rdx), %ymm26
+// CHECK: encoding: [0x62,0xf1,0x2d,0x20,0x73,0xba,0x00,0x10,0x00,0x00,0x7b]
+ vpslldq $123, 4096(%rdx), %ymm26
+
+// CHECK: vpslldq $123, -4096(%rdx), %ymm26
+// CHECK: encoding: [0x62,0xf1,0x2d,0x20,0x73,0x7a,0x80,0x7b]
+ vpslldq $123, -4096(%rdx), %ymm26
+
+// CHECK: vpslldq $123, -4128(%rdx), %ymm26
+// CHECK: encoding: [0x62,0xf1,0x2d,0x20,0x73,0xba,0xe0,0xef,0xff,0xff,0x7b]
+ vpslldq $123, -4128(%rdx), %ymm26
+
+// CHECK: vpslldq $171, %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xb1,0x45,0x00,0x73,0xfb,0xab]
+ vpslldq $0xab, %xmm19, %xmm23
+
+// CHECK: vpslldq $123, %xmm19, %xmm23
+// CHECK: encoding: [0x62,0xb1,0x45,0x00,0x73,0xfb,0x7b]
+ vpslldq $0x7b, %xmm19, %xmm23
+
+// CHECK: vpslldq $123, (%rcx), %xmm23
+// CHECK: encoding: [0x62,0xf1,0x45,0x00,0x73,0x39,0x7b]
+ vpslldq $0x7b,(%rcx), %xmm23
+
+// CHECK: vpslldq $123, 4660(%rax,%r14,8), %xmm23
+// CHECK: encoding: [0x62,0xb1,0x45,0x00,0x73,0xbc,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpslldq $0x7b,4660(%rax,%r14,8), %xmm23
+
+// CHECK: vpslldq $123, 2032(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xf1,0x45,0x00,0x73,0x7a,0x7f,0x7b]
+ vpslldq $0x7b,2032(%rdx), %xmm23
+
+// CHECK: vpslldq $123, 2048(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xf1,0x45,0x00,0x73,0xba,0x00,0x08,0x00,0x00,0x7b]
+ vpslldq $0x7b,2048(%rdx), %xmm23
+
+// CHECK: vpslldq $123, -2048(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xf1,0x45,0x00,0x73,0x7a,0x80,0x7b]
+ vpslldq $0x7b,-2048(%rdx), %xmm23
+
+// CHECK: vpslldq $123, -2064(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xf1,0x45,0x00,0x73,0xba,0xf0,0xf7,0xff,0xff,0x7b]
+ vpslldq $0x7b,-2064(%rdx), %xmm23
+
+// CHECK: vpslldq $171, %ymm25, %ymm29
+// CHECK: encoding: [0x62,0x91,0x15,0x20,0x73,0xf9,0xab]
+ vpslldq $0xab, %ymm25, %ymm29
+
+// CHECK: vpslldq $123, %ymm25, %ymm29
+// CHECK: encoding: [0x62,0x91,0x15,0x20,0x73,0xf9,0x7b]
+ vpslldq $0x7b, %ymm25, %ymm29
+
+// CHECK: vpslldq $123, (%rcx), %ymm29
+// CHECK: encoding: [0x62,0xf1,0x15,0x20,0x73,0x39,0x7b]
+ vpslldq $0x7b,(%rcx), %ymm29
+
+// CHECK: vpslldq $123, 4660(%rax,%r14,8), %ymm29
+// CHECK: encoding: [0x62,0xb1,0x15,0x20,0x73,0xbc,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpslldq $0x7b,4660(%rax,%r14,8), %ymm29
+
+// CHECK: vpslldq $123, 4064(%rdx), %ymm29
+// CHECK: encoding: [0x62,0xf1,0x15,0x20,0x73,0x7a,0x7f,0x7b]
+ vpslldq $0x7b,4064(%rdx), %ymm29
+
+// CHECK: vpslldq $123, 4096(%rdx), %ymm29
+// CHECK: encoding: [0x62,0xf1,0x15,0x20,0x73,0xba,0x00,0x10,0x00,0x00,0x7b]
+ vpslldq $0x7b,4096(%rdx), %ymm29
+
+// CHECK: vpslldq $123, -4096(%rdx), %ymm29
+// CHECK: encoding: [0x62,0xf1,0x15,0x20,0x73,0x7a,0x80,0x7b]
+ vpslldq $0x7b,-4096(%rdx), %ymm29
+
+// CHECK: vpslldq $123, -4128(%rdx), %ymm29
+// CHECK: encoding: [0x62,0xf1,0x15,0x20,0x73,0xba,0xe0,0xef,0xff,0xff,0x7b]
+ vpslldq $0x7b,-4128(%rdx), %ymm29
+
+// CHECK: vpsrldq $171, %xmm21, %xmm24
+// CHECK: encoding: [0x62,0xb1,0x3d,0x00,0x73,0xdd,0xab]
+ vpsrldq $171, %xmm21, %xmm24
+
+// CHECK: vpsrldq $123, %xmm21, %xmm24
+// CHECK: encoding: [0x62,0xb1,0x3d,0x00,0x73,0xdd,0x7b]
+ vpsrldq $123, %xmm21, %xmm24
+
+// CHECK: vpsrldq $123, (%rcx), %xmm24
+// CHECK: encoding: [0x62,0xf1,0x3d,0x00,0x73,0x19,0x7b]
+ vpsrldq $123, (%rcx), %xmm24
+
+// CHECK: vpsrldq $123, 291(%rax,%r14,8), %xmm24
+// CHECK: encoding: [0x62,0xb1,0x3d,0x00,0x73,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpsrldq $123, 291(%rax,%r14,8), %xmm24
+
+// CHECK: vpsrldq $123, 2032(%rdx), %xmm24
+// CHECK: encoding: [0x62,0xf1,0x3d,0x00,0x73,0x5a,0x7f,0x7b]
+ vpsrldq $123, 2032(%rdx), %xmm24
+
+// CHECK: vpsrldq $123, 2048(%rdx), %xmm24
+// CHECK: encoding: [0x62,0xf1,0x3d,0x00,0x73,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vpsrldq $123, 2048(%rdx), %xmm24
+
+// CHECK: vpsrldq $123, -2048(%rdx), %xmm24
+// CHECK: encoding: [0x62,0xf1,0x3d,0x00,0x73,0x5a,0x80,0x7b]
+ vpsrldq $123, -2048(%rdx), %xmm24
+
+// CHECK: vpsrldq $123, -2064(%rdx), %xmm24
+// CHECK: encoding: [0x62,0xf1,0x3d,0x00,0x73,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vpsrldq $123, -2064(%rdx), %xmm24
+
+// CHECK: vpsrldq $171, %ymm25, %ymm24
+// CHECK: encoding: [0x62,0x91,0x3d,0x20,0x73,0xd9,0xab]
+ vpsrldq $171, %ymm25, %ymm24
+
+// CHECK: vpsrldq $123, %ymm25, %ymm24
+// CHECK: encoding: [0x62,0x91,0x3d,0x20,0x73,0xd9,0x7b]
+ vpsrldq $123, %ymm25, %ymm24
+
+// CHECK: vpsrldq $123, (%rcx), %ymm24
+// CHECK: encoding: [0x62,0xf1,0x3d,0x20,0x73,0x19,0x7b]
+ vpsrldq $123, (%rcx), %ymm24
+
+// CHECK: vpsrldq $123, 291(%rax,%r14,8), %ymm24
+// CHECK: encoding: [0x62,0xb1,0x3d,0x20,0x73,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpsrldq $123, 291(%rax,%r14,8), %ymm24
+
+// CHECK: vpsrldq $123, 4064(%rdx), %ymm24
+// CHECK: encoding: [0x62,0xf1,0x3d,0x20,0x73,0x5a,0x7f,0x7b]
+ vpsrldq $123, 4064(%rdx), %ymm24
+
+// CHECK: vpsrldq $123, 4096(%rdx), %ymm24
+// CHECK: encoding: [0x62,0xf1,0x3d,0x20,0x73,0x9a,0x00,0x10,0x00,0x00,0x7b]
+ vpsrldq $123, 4096(%rdx), %ymm24
+
+// CHECK: vpsrldq $123, -4096(%rdx), %ymm24
+// CHECK: encoding: [0x62,0xf1,0x3d,0x20,0x73,0x5a,0x80,0x7b]
+ vpsrldq $123, -4096(%rdx), %ymm24
+
+// CHECK: vpsrldq $123, -4128(%rdx), %ymm24
+// CHECK: encoding: [0x62,0xf1,0x3d,0x20,0x73,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+ vpsrldq $123, -4128(%rdx), %ymm24
+
+// CHECK: vpsrldq $171, %xmm17, %xmm18
+// CHECK: encoding: [0x62,0xb1,0x6d,0x00,0x73,0xd9,0xab]
+ vpsrldq $0xab, %xmm17, %xmm18
+
+// CHECK: vpsrldq $123, %xmm17, %xmm18
+// CHECK: encoding: [0x62,0xb1,0x6d,0x00,0x73,0xd9,0x7b]
+ vpsrldq $0x7b, %xmm17, %xmm18
+
+// CHECK: vpsrldq $123, (%rcx), %xmm18
+// CHECK: encoding: [0x62,0xf1,0x6d,0x00,0x73,0x19,0x7b]
+ vpsrldq $0x7b,(%rcx), %xmm18
+
+// CHECK: vpsrldq $123, 4660(%rax,%r14,8), %xmm18
+// CHECK: encoding: [0x62,0xb1,0x6d,0x00,0x73,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpsrldq $0x7b,4660(%rax,%r14,8), %xmm18
+
+// CHECK: vpsrldq $123, 2032(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xf1,0x6d,0x00,0x73,0x5a,0x7f,0x7b]
+ vpsrldq $0x7b,2032(%rdx), %xmm18
+
+// CHECK: vpsrldq $123, 2048(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xf1,0x6d,0x00,0x73,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vpsrldq $0x7b,2048(%rdx), %xmm18
+
+// CHECK: vpsrldq $123, -2048(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xf1,0x6d,0x00,0x73,0x5a,0x80,0x7b]
+ vpsrldq $0x7b,-2048(%rdx), %xmm18
+
+// CHECK: vpsrldq $123, -2064(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xf1,0x6d,0x00,0x73,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vpsrldq $0x7b,-2064(%rdx), %xmm18
+
+// CHECK: vpsrldq $171, %ymm28, %ymm20
+// CHECK: encoding: [0x62,0x91,0x5d,0x20,0x73,0xdc,0xab]
+ vpsrldq $0xab, %ymm28, %ymm20
+
+// CHECK: vpsrldq $123, %ymm28, %ymm20
+// CHECK: encoding: [0x62,0x91,0x5d,0x20,0x73,0xdc,0x7b]
+ vpsrldq $0x7b, %ymm28, %ymm20
+
+// CHECK: vpsrldq $123, (%rcx), %ymm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x20,0x73,0x19,0x7b]
+ vpsrldq $0x7b,(%rcx), %ymm20
+
+// CHECK: vpsrldq $123, 4660(%rax,%r14,8), %ymm20
+// CHECK: encoding: [0x62,0xb1,0x5d,0x20,0x73,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpsrldq $0x7b,4660(%rax,%r14,8), %ymm20
+
+// CHECK: vpsrldq $123, 4064(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x20,0x73,0x5a,0x7f,0x7b]
+ vpsrldq $0x7b,4064(%rdx), %ymm20
+
+// CHECK: vpsrldq $123, 4096(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x20,0x73,0x9a,0x00,0x10,0x00,0x00,0x7b]
+ vpsrldq $0x7b,4096(%rdx), %ymm20
+
+// CHECK: vpsrldq $123, -4096(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x20,0x73,0x5a,0x80,0x7b]
+ vpsrldq $0x7b,-4096(%rdx), %ymm20
+
+// CHECK: vpsrldq $123, -4128(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xf1,0x5d,0x20,0x73,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+ vpsrldq $0x7b,-4128(%rdx), %ymm20
+
+// CHECK: vpsadbw %xmm24, %xmm24, %xmm17
+// CHECK: encoding: [0x62,0x81,0x3d,0x00,0xf6,0xc8]
+ vpsadbw %xmm24, %xmm24, %xmm17
+
+// CHECK: vpsadbw (%rcx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x09]
+ vpsadbw (%rcx), %xmm24, %xmm17
+
+// CHECK: vpsadbw 291(%rax,%r14,8), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x3d,0x00,0xf6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpsadbw 291(%rax,%r14,8), %xmm24, %xmm17
+
+// CHECK: vpsadbw 2032(%rdx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x4a,0x7f]
+ vpsadbw 2032(%rdx), %xmm24, %xmm17
+
+// CHECK: vpsadbw 2048(%rdx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x8a,0x00,0x08,0x00,0x00]
+ vpsadbw 2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vpsadbw -2048(%rdx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x4a,0x80]
+ vpsadbw -2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vpsadbw -2064(%rdx), %xmm24, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x8a,0xf0,0xf7,0xff,0xff]
+ vpsadbw -2064(%rdx), %xmm24, %xmm17
+
+// CHECK: vpsadbw %ymm24, %ymm27, %ymm19
+// CHECK: encoding: [0x62,0x81,0x25,0x20,0xf6,0xd8]
+ vpsadbw %ymm24, %ymm27, %ymm19
+
+// CHECK: vpsadbw (%rcx), %ymm27, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xf6,0x19]
+ vpsadbw (%rcx), %ymm27, %ymm19
+
+// CHECK: vpsadbw 291(%rax,%r14,8), %ymm27, %ymm19
+// CHECK: encoding: [0x62,0xa1,0x25,0x20,0xf6,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpsadbw 291(%rax,%r14,8), %ymm27, %ymm19
+
+// CHECK: vpsadbw 4064(%rdx), %ymm27, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xf6,0x5a,0x7f]
+ vpsadbw 4064(%rdx), %ymm27, %ymm19
+
+// CHECK: vpsadbw 4096(%rdx), %ymm27, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xf6,0x9a,0x00,0x10,0x00,0x00]
+ vpsadbw 4096(%rdx), %ymm27, %ymm19
+
+// CHECK: vpsadbw -4096(%rdx), %ymm27, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xf6,0x5a,0x80]
+ vpsadbw -4096(%rdx), %ymm27, %ymm19
+
+// CHECK: vpsadbw -4128(%rdx), %ymm27, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x25,0x20,0xf6,0x9a,0xe0,0xef,0xff,0xff]
+ vpsadbw -4128(%rdx), %ymm27, %ymm19
+
+// CHECK: vpsadbw %xmm21, %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x21,0x65,0x00,0xf6,0xf5]
+ vpsadbw %xmm21, %xmm19, %xmm30
+
+// CHECK: vpsadbw (%rcx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x00,0xf6,0x31]
+ vpsadbw (%rcx), %xmm19, %xmm30
+
+// CHECK: vpsadbw 4660(%rax,%r14,8), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x21,0x65,0x00,0xf6,0xb4,0xf0,0x34,0x12,0x00,0x00]
+ vpsadbw 4660(%rax,%r14,8), %xmm19, %xmm30
+
+// CHECK: vpsadbw 2032(%rdx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x00,0xf6,0x72,0x7f]
+ vpsadbw 2032(%rdx), %xmm19, %xmm30
+
+// CHECK: vpsadbw 2048(%rdx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x00,0xf6,0xb2,0x00,0x08,0x00,0x00]
+ vpsadbw 2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vpsadbw -2048(%rdx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x00,0xf6,0x72,0x80]
+ vpsadbw -2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vpsadbw -2064(%rdx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x65,0x00,0xf6,0xb2,0xf0,0xf7,0xff,0xff]
+ vpsadbw -2064(%rdx), %xmm19, %xmm30
+
+// CHECK: vpsadbw %ymm27, %ymm26, %ymm20
+// CHECK: encoding: [0x62,0x81,0x2d,0x20,0xf6,0xe3]
+ vpsadbw %ymm27, %ymm26, %ymm20
+
+// CHECK: vpsadbw (%rcx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xf6,0x21]
+ vpsadbw (%rcx), %ymm26, %ymm20
+
+// CHECK: vpsadbw 4660(%rax,%r14,8), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xa1,0x2d,0x20,0xf6,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vpsadbw 4660(%rax,%r14,8), %ymm26, %ymm20
+
+// CHECK: vpsadbw 4064(%rdx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xf6,0x62,0x7f]
+ vpsadbw 4064(%rdx), %ymm26, %ymm20
+
+// CHECK: vpsadbw 4096(%rdx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xf6,0xa2,0x00,0x10,0x00,0x00]
+ vpsadbw 4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vpsadbw -4096(%rdx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xf6,0x62,0x80]
+ vpsadbw -4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vpsadbw -4128(%rdx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xf6,0xa2,0xe0,0xef,0xff,0xff]
+ vpsadbw -4128(%rdx), %ymm26, %ymm20
+
+// CHECK: vpbroadcastb %xmm28, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x78,0xf4]
+ vpbroadcastb %xmm28, %xmm30
+
+// CHECK: vpbroadcastb %xmm28, %xmm30 {%k4}
+// CHECK: encoding: [0x62,0x02,0x7d,0x0c,0x78,0xf4]
+ vpbroadcastb %xmm28, %xmm30 {%k4}
+
+// CHECK: vpbroadcastb %xmm28, %xmm30 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0x8c,0x78,0xf4]
+ vpbroadcastb %xmm28, %xmm30 {%k4} {z}
+
+// CHECK: vpbroadcastb (%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0x31]
+ vpbroadcastb (%rcx), %xmm30
+
+// CHECK: vpbroadcastb 4660(%rax,%r14,8), %xmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x78,0xb4,0xf0,0x34,0x12,0x00,0x00]
+ vpbroadcastb 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vpbroadcastb 127(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0x72,0x7f]
+ vpbroadcastb 127(%rdx), %xmm30
+
+// CHECK: vpbroadcastb 128(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0xb2,0x80,0x00,0x00,0x00]
+ vpbroadcastb 128(%rdx), %xmm30
+
+// CHECK: vpbroadcastb -128(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0x72,0x80]
+ vpbroadcastb -128(%rdx), %xmm30
+
+// CHECK: vpbroadcastb -129(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0xb2,0x7f,0xff,0xff,0xff]
+ vpbroadcastb -129(%rdx), %xmm30
+
+// CHECK: vpbroadcastb %xmm25, %ymm17
+// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x78,0xc9]
+ vpbroadcastb %xmm25, %ymm17
+
+// CHECK: vpbroadcastb %xmm25, %ymm17 {%k2}
+// CHECK: encoding: [0x62,0x82,0x7d,0x2a,0x78,0xc9]
+ vpbroadcastb %xmm25, %ymm17 {%k2}
+
+// CHECK: vpbroadcastb %xmm25, %ymm17 {%k2} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0xaa,0x78,0xc9]
+ vpbroadcastb %xmm25, %ymm17 {%k2} {z}
+
+// CHECK: vpbroadcastb (%rcx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x09]
+ vpbroadcastb (%rcx), %ymm17
+
+// CHECK: vpbroadcastb 4660(%rax,%r14,8), %ymm17
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x78,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vpbroadcastb 4660(%rax,%r14,8), %ymm17
+
+// CHECK: vpbroadcastb 127(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x4a,0x7f]
+ vpbroadcastb 127(%rdx), %ymm17
+
+// CHECK: vpbroadcastb 128(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x8a,0x80,0x00,0x00,0x00]
+ vpbroadcastb 128(%rdx), %ymm17
+
+// CHECK: vpbroadcastb -128(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x4a,0x80]
+ vpbroadcastb -128(%rdx), %ymm17
+
+// CHECK: vpbroadcastb -129(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x8a,0x7f,0xff,0xff,0xff]
+ vpbroadcastb -129(%rdx), %ymm17
+
+// CHECK: vpbroadcastb %eax, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x7a,0xe0]
+ vpbroadcastb %eax, %xmm20
+
+// CHECK: vpbroadcastb %eax, %xmm20 {%k3}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0b,0x7a,0xe0]
+ vpbroadcastb %eax, %xmm20 {%k3}
+
+// CHECK: vpbroadcastb %eax, %xmm20 {%k3} {z}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x8b,0x7a,0xe0]
+ vpbroadcastb %eax, %xmm20 {%k3} {z}
+
+// CHECK: vpbroadcastb %eax, %ymm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x7a,0xd8]
+ vpbroadcastb %eax, %ymm27
+
+// CHECK: vpbroadcastb %eax, %ymm27 {%k6}
+// CHECK: encoding: [0x62,0x62,0x7d,0x2e,0x7a,0xd8]
+ vpbroadcastb %eax, %ymm27 {%k6}
+
+// CHECK: vpbroadcastb %eax, %ymm27 {%k6} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0xae,0x7a,0xd8]
+ vpbroadcastb %eax, %ymm27 {%k6} {z}
+
+// CHECK: vpbroadcastw %xmm24, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x79,0xf0]
+ vpbroadcastw %xmm24, %xmm30
+
+// CHECK: vpbroadcastw %xmm24, %xmm30 {%k1}
+// CHECK: encoding: [0x62,0x02,0x7d,0x09,0x79,0xf0]
+ vpbroadcastw %xmm24, %xmm30 {%k1}
+
+// CHECK: vpbroadcastw %xmm24, %xmm30 {%k1} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0x89,0x79,0xf0]
+ vpbroadcastw %xmm24, %xmm30 {%k1} {z}
+
+// CHECK: vpbroadcastw (%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0x31]
+ vpbroadcastw (%rcx), %xmm30
+
+// CHECK: vpbroadcastw 4660(%rax,%r14,8), %xmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x79,0xb4,0xf0,0x34,0x12,0x00,0x00]
+ vpbroadcastw 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vpbroadcastw 254(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0x72,0x7f]
+ vpbroadcastw 254(%rdx), %xmm30
+
+// CHECK: vpbroadcastw 256(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0xb2,0x00,0x01,0x00,0x00]
+ vpbroadcastw 256(%rdx), %xmm30
+
+// CHECK: vpbroadcastw -256(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0x72,0x80]
+ vpbroadcastw -256(%rdx), %xmm30
+
+// CHECK: vpbroadcastw -258(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0xb2,0xfe,0xfe,0xff,0xff]
+ vpbroadcastw -258(%rdx), %xmm30
+
+// CHECK: vpbroadcastw %xmm18, %ymm28
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x79,0xe2]
+ vpbroadcastw %xmm18, %ymm28
+
+// CHECK: vpbroadcastw %xmm18, %ymm28 {%k3}
+// CHECK: encoding: [0x62,0x22,0x7d,0x2b,0x79,0xe2]
+ vpbroadcastw %xmm18, %ymm28 {%k3}
+
+// CHECK: vpbroadcastw %xmm18, %ymm28 {%k3} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xab,0x79,0xe2]
+ vpbroadcastw %xmm18, %ymm28 {%k3} {z}
+
+// CHECK: vpbroadcastw (%rcx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x21]
+ vpbroadcastw (%rcx), %ymm28
+
+// CHECK: vpbroadcastw 4660(%rax,%r14,8), %ymm28
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x79,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vpbroadcastw 4660(%rax,%r14,8), %ymm28
+
+// CHECK: vpbroadcastw 254(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x62,0x7f]
+ vpbroadcastw 254(%rdx), %ymm28
+
+// CHECK: vpbroadcastw 256(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0xa2,0x00,0x01,0x00,0x00]
+ vpbroadcastw 256(%rdx), %ymm28
+
+// CHECK: vpbroadcastw -256(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x62,0x80]
+ vpbroadcastw -256(%rdx), %ymm28
+
+// CHECK: vpbroadcastw -258(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0xa2,0xfe,0xfe,0xff,0xff]
+ vpbroadcastw -258(%rdx), %ymm28
+
+// CHECK: vpbroadcastw %eax, %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x7b,0xc0]
+ vpbroadcastw %eax, %xmm24
+
+// CHECK: vpbroadcastw %eax, %xmm24 {%k6}
+// CHECK: encoding: [0x62,0x62,0x7d,0x0e,0x7b,0xc0]
+ vpbroadcastw %eax, %xmm24 {%k6}
+
+// CHECK: vpbroadcastw %eax, %xmm24 {%k6} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0x8e,0x7b,0xc0]
+ vpbroadcastw %eax, %xmm24 {%k6} {z}
+
+// CHECK: vpbroadcastw %eax, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x7b,0xd8]
+ vpbroadcastw %eax, %ymm19
+
+// CHECK: vpbroadcastw %eax, %ymm19 {%k3}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2b,0x7b,0xd8]
+ vpbroadcastw %eax, %ymm19 {%k3}
+
+// CHECK: vpbroadcastw %eax, %ymm19 {%k3} {z}
+// CHECK: encoding: [0x62,0xe2,0x7d,0xab,0x7b,0xd8]
+ vpbroadcastw %eax, %ymm19 {%k3} {z}
+
+// CHECK: vpbroadcastb %xmm20, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x78,0xe4]
+ vpbroadcastb %xmm20, %xmm20
+
+// CHECK: vpbroadcastb %xmm20, %xmm20 {%k7}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0f,0x78,0xe4]
+ vpbroadcastb %xmm20, %xmm20 {%k7}
+
+// CHECK: vpbroadcastb %xmm20, %xmm20 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x8f,0x78,0xe4]
+ vpbroadcastb %xmm20, %xmm20 {%k7} {z}
+
+// CHECK: vpbroadcastb (%rcx), %xmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0x21]
+ vpbroadcastb (%rcx), %xmm20
+
+// CHECK: vpbroadcastb 291(%rax,%r14,8), %xmm20
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x78,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastb 291(%rax,%r14,8), %xmm20
+
+// CHECK: vpbroadcastb 127(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0x62,0x7f]
+ vpbroadcastb 127(%rdx), %xmm20
+
+// CHECK: vpbroadcastb 128(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0xa2,0x80,0x00,0x00,0x00]
+ vpbroadcastb 128(%rdx), %xmm20
+
+// CHECK: vpbroadcastb -128(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0x62,0x80]
+ vpbroadcastb -128(%rdx), %xmm20
+
+// CHECK: vpbroadcastb -129(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0xa2,0x7f,0xff,0xff,0xff]
+ vpbroadcastb -129(%rdx), %xmm20
+
+// CHECK: vpbroadcastb %xmm27, %ymm30
+// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x78,0xf3]
+ vpbroadcastb %xmm27, %ymm30
+
+// CHECK: vpbroadcastb %xmm27, %ymm30 {%k6}
+// CHECK: encoding: [0x62,0x02,0x7d,0x2e,0x78,0xf3]
+ vpbroadcastb %xmm27, %ymm30 {%k6}
+
+// CHECK: vpbroadcastb %xmm27, %ymm30 {%k6} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0xae,0x78,0xf3]
+ vpbroadcastb %xmm27, %ymm30 {%k6} {z}
+
+// CHECK: vpbroadcastb (%rcx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0x31]
+ vpbroadcastb (%rcx), %ymm30
+
+// CHECK: vpbroadcastb 291(%rax,%r14,8), %ymm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x78,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastb 291(%rax,%r14,8), %ymm30
+
+// CHECK: vpbroadcastb 127(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0x72,0x7f]
+ vpbroadcastb 127(%rdx), %ymm30
+
+// CHECK: vpbroadcastb 128(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0xb2,0x80,0x00,0x00,0x00]
+ vpbroadcastb 128(%rdx), %ymm30
+
+// CHECK: vpbroadcastb -128(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0x72,0x80]
+ vpbroadcastb -128(%rdx), %ymm30
+
+// CHECK: vpbroadcastb -129(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0xb2,0x7f,0xff,0xff,0xff]
+ vpbroadcastb -129(%rdx), %ymm30
+
+// CHECK: vpbroadcastb %eax, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x7a,0xf0]
+ vpbroadcastb %eax, %xmm22
+
+// CHECK: vpbroadcastb %eax, %xmm22 {%k3}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0b,0x7a,0xf0]
+ vpbroadcastb %eax, %xmm22 {%k3}
+
+// CHECK: vpbroadcastb %eax, %xmm22 {%k3} {z}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x8b,0x7a,0xf0]
+ vpbroadcastb %eax, %xmm22 {%k3} {z}
+
+// CHECK: vpbroadcastb %eax, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x7a,0xc8]
+ vpbroadcastb %eax, %ymm17
+
+// CHECK: vpbroadcastb %eax, %ymm17 {%k1}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x29,0x7a,0xc8]
+ vpbroadcastb %eax, %ymm17 {%k1}
+
+// CHECK: vpbroadcastb %eax, %ymm17 {%k1} {z}
+// CHECK: encoding: [0x62,0xe2,0x7d,0xa9,0x7a,0xc8]
+ vpbroadcastb %eax, %ymm17 {%k1} {z}
+
+// CHECK: vpbroadcastw %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x79,0xdc]
+ vpbroadcastw %xmm20, %xmm19
+
+// CHECK: vpbroadcastw %xmm20, %xmm19 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x79,0xdc]
+ vpbroadcastw %xmm20, %xmm19 {%k2}
+
+// CHECK: vpbroadcastw %xmm20, %xmm19 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x8a,0x79,0xdc]
+ vpbroadcastw %xmm20, %xmm19 {%k2} {z}
+
+// CHECK: vpbroadcastw (%rcx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x19]
+ vpbroadcastw (%rcx), %xmm19
+
+// CHECK: vpbroadcastw 291(%rax,%r14,8), %xmm19
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x79,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastw 291(%rax,%r14,8), %xmm19
+
+// CHECK: vpbroadcastw 254(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x5a,0x7f]
+ vpbroadcastw 254(%rdx), %xmm19
+
+// CHECK: vpbroadcastw 256(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x9a,0x00,0x01,0x00,0x00]
+ vpbroadcastw 256(%rdx), %xmm19
+
+// CHECK: vpbroadcastw -256(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x5a,0x80]
+ vpbroadcastw -256(%rdx), %xmm19
+
+// CHECK: vpbroadcastw -258(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x9a,0xfe,0xfe,0xff,0xff]
+ vpbroadcastw -258(%rdx), %xmm19
+
+// CHECK: vpbroadcastw %xmm17, %ymm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x79,0xc9]
+ vpbroadcastw %xmm17, %ymm25
+
+// CHECK: vpbroadcastw %xmm17, %ymm25 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x79,0xc9]
+ vpbroadcastw %xmm17, %ymm25 {%k7}
+
+// CHECK: vpbroadcastw %xmm17, %ymm25 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x79,0xc9]
+ vpbroadcastw %xmm17, %ymm25 {%k7} {z}
+
+// CHECK: vpbroadcastw (%rcx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x09]
+ vpbroadcastw (%rcx), %ymm25
+
+// CHECK: vpbroadcastw 291(%rax,%r14,8), %ymm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x79,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastw 291(%rax,%r14,8), %ymm25
+
+// CHECK: vpbroadcastw 254(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x4a,0x7f]
+ vpbroadcastw 254(%rdx), %ymm25
+
+// CHECK: vpbroadcastw 256(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x8a,0x00,0x01,0x00,0x00]
+ vpbroadcastw 256(%rdx), %ymm25
+
+// CHECK: vpbroadcastw -256(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x4a,0x80]
+ vpbroadcastw -256(%rdx), %ymm25
+
+// CHECK: vpbroadcastw -258(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x8a,0xfe,0xfe,0xff,0xff]
+ vpbroadcastw -258(%rdx), %ymm25
+
+// CHECK: vpbroadcastw %eax, %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x7b,0xe8]
+ vpbroadcastw %eax, %xmm29
+
+// CHECK: vpbroadcastw %eax, %xmm29 {%k1}
+// CHECK: encoding: [0x62,0x62,0x7d,0x09,0x7b,0xe8]
+ vpbroadcastw %eax, %xmm29 {%k1}
+
+// CHECK: vpbroadcastw %eax, %xmm29 {%k1} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0x89,0x7b,0xe8]
+ vpbroadcastw %eax, %xmm29 {%k1} {z}
+
+// CHECK: vpbroadcastw %eax, %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x7b,0xe0]
+ vpbroadcastw %eax, %ymm28
+
+// CHECK: vpbroadcastw %eax, %ymm28 {%k4}
+// CHECK: encoding: [0x62,0x62,0x7d,0x2c,0x7b,0xe0]
+ vpbroadcastw %eax, %ymm28 {%k4}
+
+// CHECK: vpbroadcastw %eax, %ymm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0xac,0x7b,0xe0]
+ vpbroadcastw %eax, %ymm28 {%k4} {z}
+
+// CHECK: vmovdqu16.s %xmm27, %xmm23
+// CHECK: encoding: [0x62,0x21,0xff,0x08,0x7f,0xdf]
+ vmovdqu16.s %xmm27, %xmm23
+
+// CHECK: vmovdqu16.s %xmm27, %xmm23 {%k2}
+// CHECK: encoding: [0x62,0x21,0xff,0x0a,0x7f,0xdf]
+ vmovdqu16.s %xmm27, %xmm23 {%k2}
+
+// CHECK: vmovdqu16.s %xmm27, %xmm23 {%k2} {z}
+// CHECK: encoding: [0x62,0x21,0xff,0x8a,0x7f,0xdf]
+ vmovdqu16.s %xmm27, %xmm23 {%k2} {z}
+
+// CHECK: vmovdqu16.s %xmm17, %xmm27
+// CHECK: encoding: [0x62,0x81,0xff,0x08,0x7f,0xcb]
+ vmovdqu16.s %xmm17, %xmm27
+
+// CHECK: vmovdqu16.s %xmm17, %xmm27 {%k2}
+// CHECK: encoding: [0x62,0x81,0xff,0x0a,0x7f,0xcb]
+ vmovdqu16.s %xmm17, %xmm27 {%k2}
+
+// CHECK: vmovdqu16.s %xmm17, %xmm27 {%k2} {z}
+// CHECK: encoding: [0x62,0x81,0xff,0x8a,0x7f,0xcb]
+ vmovdqu16.s %xmm17, %xmm27 {%k2} {z}
+
+// CHECK: vmovdqu16.s %ymm29, %ymm27
+// CHECK: encoding: [0x62,0x01,0xff,0x28,0x7f,0xeb]
+ vmovdqu16.s %ymm29, %ymm27
+
+// CHECK: vmovdqu16.s %ymm29, %ymm27 {%k6}
+// CHECK: encoding: [0x62,0x01,0xff,0x2e,0x7f,0xeb]
+ vmovdqu16.s %ymm29, %ymm27 {%k6}
+
+// CHECK: vmovdqu16.s %ymm29, %ymm27 {%k6} {z}
+// CHECK: encoding: [0x62,0x01,0xff,0xae,0x7f,0xeb]
+ vmovdqu16.s %ymm29, %ymm27 {%k6} {z}
+
+// CHECK: vmovdqu16.s %ymm29, %ymm29
+// CHECK: encoding: [0x62,0x01,0xff,0x28,0x7f,0xed]
+ vmovdqu16.s %ymm29, %ymm29
+
+// CHECK: vmovdqu16.s %ymm29, %ymm29 {%k1}
+// CHECK: encoding: [0x62,0x01,0xff,0x29,0x7f,0xed]
+ vmovdqu16.s %ymm29, %ymm29 {%k1}
+
+// CHECK: vmovdqu16.s %ymm29, %ymm29 {%k1} {z}
+// CHECK: encoding: [0x62,0x01,0xff,0xa9,0x7f,0xed]
+ vmovdqu16.s %ymm29, %ymm29 {%k1} {z}
+
+// CHECK: vmovdqu16.s %xmm26, %xmm24
+// CHECK: encoding: [0x62,0x01,0xff,0x08,0x7f,0xd0]
+ vmovdqu16.s %xmm26, %xmm24
+
+// CHECK: vmovdqu16.s %xmm26, %xmm24 {%k5}
+// CHECK: encoding: [0x62,0x01,0xff,0x0d,0x7f,0xd0]
+ vmovdqu16.s %xmm26, %xmm24 {%k5}
+
+// CHECK: vmovdqu16.s %xmm26, %xmm24 {%k5} {z}
+// CHECK: encoding: [0x62,0x01,0xff,0x8d,0x7f,0xd0]
+ vmovdqu16.s %xmm26, %xmm24 {%k5} {z}
+
+// CHECK: vmovdqu16.s %xmm20, %xmm23
+// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x7f,0xe7]
+ vmovdqu16.s %xmm20, %xmm23
+
+// CHECK: vmovdqu16.s %xmm20, %xmm23 {%k1}
+// CHECK: encoding: [0x62,0xa1,0xff,0x09,0x7f,0xe7]
+ vmovdqu16.s %xmm20, %xmm23 {%k1}
+
+// CHECK: vmovdqu16.s %xmm20, %xmm23 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0xff,0x89,0x7f,0xe7]
+ vmovdqu16.s %xmm20, %xmm23 {%k1} {z}
+
+// CHECK: vmovdqu16.s %ymm23, %ymm28
+// CHECK: encoding: [0x62,0x81,0xff,0x28,0x7f,0xfc]
+ vmovdqu16.s %ymm23, %ymm28
+
+// CHECK: vmovdqu16.s %ymm23, %ymm28 {%k4}
+// CHECK: encoding: [0x62,0x81,0xff,0x2c,0x7f,0xfc]
+ vmovdqu16.s %ymm23, %ymm28 {%k4}
+
+// CHECK: vmovdqu16.s %ymm23, %ymm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x81,0xff,0xac,0x7f,0xfc]
+ vmovdqu16.s %ymm23, %ymm28 {%k4} {z}
+
+// CHECK: vmovdqu16.s %ymm24, %ymm26
+// CHECK: encoding: [0x62,0x01,0xff,0x28,0x7f,0xc2]
+ vmovdqu16.s %ymm24, %ymm26
+
+// CHECK: vmovdqu16.s %ymm24, %ymm26 {%k2}
+// CHECK: encoding: [0x62,0x01,0xff,0x2a,0x7f,0xc2]
+ vmovdqu16.s %ymm24, %ymm26 {%k2}
+
+// CHECK: vmovdqu16.s %ymm24, %ymm26 {%k2} {z}
+// CHECK: encoding: [0x62,0x01,0xff,0xaa,0x7f,0xc2]
+ vmovdqu16.s %ymm24, %ymm26 {%k2} {z}
+
+// CHECK: vmovdqu8.s %xmm22, %xmm18
+// CHECK: encoding: [0x62,0xa1,0x7f,0x08,0x7f,0xf2]
+ vmovdqu8.s %xmm22, %xmm18
+
+// CHECK: vmovdqu8.s %xmm22, %xmm18 {%k1}
+// CHECK: encoding: [0x62,0xa1,0x7f,0x09,0x7f,0xf2]
+ vmovdqu8.s %xmm22, %xmm18 {%k1}
+
+// CHECK: vmovdqu8.s %xmm22, %xmm18 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0x7f,0x89,0x7f,0xf2]
+ vmovdqu8.s %xmm22, %xmm18 {%k1} {z}
+
+// CHECK: vmovdqu8.s %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x81,0x7f,0x08,0x7f,0xdc]
+ vmovdqu8.s %xmm19, %xmm28
+
+// CHECK: vmovdqu8.s %xmm19, %xmm28 {%k1}
+// CHECK: encoding: [0x62,0x81,0x7f,0x09,0x7f,0xdc]
+ vmovdqu8.s %xmm19, %xmm28 {%k1}
+
+// CHECK: vmovdqu8.s %xmm19, %xmm28 {%k1} {z}
+// CHECK: encoding: [0x62,0x81,0x7f,0x89,0x7f,0xdc]
+ vmovdqu8.s %xmm19, %xmm28 {%k1} {z}
+
+// CHECK: vmovdqu8.s %ymm19, %ymm28
+// CHECK: encoding: [0x62,0x81,0x7f,0x28,0x7f,0xdc]
+ vmovdqu8.s %ymm19, %ymm28
+
+// CHECK: vmovdqu8.s %ymm19, %ymm28 {%k3}
+// CHECK: encoding: [0x62,0x81,0x7f,0x2b,0x7f,0xdc]
+ vmovdqu8.s %ymm19, %ymm28 {%k3}
+
+// CHECK: vmovdqu8.s %ymm19, %ymm28 {%k3} {z}
+// CHECK: encoding: [0x62,0x81,0x7f,0xab,0x7f,0xdc]
+ vmovdqu8.s %ymm19, %ymm28 {%k3} {z}
+
+// CHECK: vmovdqu8.s %ymm17, %ymm19
+// CHECK: encoding: [0x62,0xa1,0x7f,0x28,0x7f,0xcb]
+ vmovdqu8.s %ymm17, %ymm19
+
+// CHECK: vmovdqu8.s %ymm17, %ymm19 {%k2}
+// CHECK: encoding: [0x62,0xa1,0x7f,0x2a,0x7f,0xcb]
+ vmovdqu8.s %ymm17, %ymm19 {%k2}
+
+// CHECK: vmovdqu8.s %ymm17, %ymm19 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0x7f,0xaa,0x7f,0xcb]
+ vmovdqu8.s %ymm17, %ymm19 {%k2} {z}
+
+// CHECK: vmovdqu8.s %xmm17, %xmm24
+// CHECK: encoding: [0x62,0x81,0x7f,0x08,0x7f,0xc8]
+ vmovdqu8.s %xmm17, %xmm24
+
+// CHECK: vmovdqu8.s %xmm17, %xmm24 {%k3}
+// CHECK: encoding: [0x62,0x81,0x7f,0x0b,0x7f,0xc8]
+ vmovdqu8.s %xmm17, %xmm24 {%k3}
+
+// CHECK: vmovdqu8.s %xmm17, %xmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x81,0x7f,0x8b,0x7f,0xc8]
+ vmovdqu8.s %xmm17, %xmm24 {%k3} {z}
+
+// CHECK: vmovdqu8.s %xmm27, %xmm23
+// CHECK: encoding: [0x62,0x21,0x7f,0x08,0x7f,0xdf]
+ vmovdqu8.s %xmm27, %xmm23
+
+// CHECK: vmovdqu8.s %xmm27, %xmm23 {%k2}
+// CHECK: encoding: [0x62,0x21,0x7f,0x0a,0x7f,0xdf]
+ vmovdqu8.s %xmm27, %xmm23 {%k2}
+
+// CHECK: vmovdqu8.s %xmm27, %xmm23 {%k2} {z}
+// CHECK: encoding: [0x62,0x21,0x7f,0x8a,0x7f,0xdf]
+ vmovdqu8.s %xmm27, %xmm23 {%k2} {z}
+
+// CHECK: vmovdqu8.s %ymm21, %ymm17
+// CHECK: encoding: [0x62,0xa1,0x7f,0x28,0x7f,0xe9]
+ vmovdqu8.s %ymm21, %ymm17
+
+// CHECK: vmovdqu8.s %ymm21, %ymm17 {%k1}
+// CHECK: encoding: [0x62,0xa1,0x7f,0x29,0x7f,0xe9]
+ vmovdqu8.s %ymm21, %ymm17 {%k1}
+
+// CHECK: vmovdqu8.s %ymm21, %ymm17 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0x7f,0xa9,0x7f,0xe9]
+ vmovdqu8.s %ymm21, %ymm17 {%k1} {z}
+
+// CHECK: vmovdqu8.s %ymm19, %ymm17
+// CHECK: encoding: [0x62,0xa1,0x7f,0x28,0x7f,0xd9]
+ vmovdqu8.s %ymm19, %ymm17
+
+// CHECK: vmovdqu8.s %ymm19, %ymm17 {%k6}
+// CHECK: encoding: [0x62,0xa1,0x7f,0x2e,0x7f,0xd9]
+ vmovdqu8.s %ymm19, %ymm17 {%k6}
+
+// CHECK: vmovdqu8.s %ymm19, %ymm17 {%k6} {z}
+// CHECK: encoding: [0x62,0xa1,0x7f,0xae,0x7f,0xd9]
+ vmovdqu8.s %ymm19, %ymm17 {%k6} {z}
+
diff --git a/test/MC/X86/x86-64-avx512cd.s b/test/MC/X86/x86-64-avx512cd.s
new file mode 100644
index 000000000000..49c5ccfa3637
--- /dev/null
+++ b/test/MC/X86/x86-64-avx512cd.s
@@ -0,0 +1,450 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512cd --show-encoding %s | FileCheck %s
+
+// CHECK: vplzcntq %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x44,0xee]
+ vplzcntq %zmm22, %zmm21
+
+// CHECK: vplzcntq %zmm22, %zmm21 {%k7}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4f,0x44,0xee]
+ vplzcntq %zmm22, %zmm21 {%k7}
+
+// CHECK: vplzcntq %zmm22, %zmm21 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0xfd,0xcf,0x44,0xee]
+ vplzcntq %zmm22, %zmm21 {%k7} {z}
+
+// CHECK: vplzcntq (%rcx), %zmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x29]
+ vplzcntq (%rcx), %zmm21
+
+// CHECK: vplzcntq 291(%rax,%r14,8), %zmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x44,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vplzcntq 291(%rax,%r14,8), %zmm21
+
+// CHECK: vplzcntq (%rcx){1to8}, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x29]
+ vplzcntq (%rcx){1to8}, %zmm21
+
+// CHECK: vplzcntq 4064(%rdx), %zmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xaa,0xe0,0x0f,0x00,0x00]
+ vplzcntq 4064(%rdx), %zmm21
+
+// CHECK: vplzcntq 4096(%rdx), %zmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x6a,0x40]
+ vplzcntq 4096(%rdx), %zmm21
+
+// CHECK: vplzcntq -4096(%rdx), %zmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x6a,0xc0]
+ vplzcntq -4096(%rdx), %zmm21
+
+// CHECK: vplzcntq -4128(%rdx), %zmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xaa,0xe0,0xef,0xff,0xff]
+ vplzcntq -4128(%rdx), %zmm21
+
+// CHECK: vplzcntq 1016(%rdx){1to8}, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x6a,0x7f]
+ vplzcntq 1016(%rdx){1to8}, %zmm21
+
+// CHECK: vplzcntq 1024(%rdx){1to8}, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xaa,0x00,0x04,0x00,0x00]
+ vplzcntq 1024(%rdx){1to8}, %zmm21
+
+// CHECK: vplzcntq -1024(%rdx){1to8}, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x6a,0x80]
+ vplzcntq -1024(%rdx){1to8}, %zmm21
+
+// CHECK: vplzcntq -1032(%rdx){1to8}, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xaa,0xf8,0xfb,0xff,0xff]
+ vplzcntq -1032(%rdx){1to8}, %zmm21
+
+// CHECK: vplzcntq %zmm27, %zmm23
+// CHECK: encoding: [0x62,0x82,0xfd,0x48,0x44,0xfb]
+ vplzcntq %zmm27, %zmm23
+
+// CHECK: vplzcntq %zmm27, %zmm23 {%k5}
+// CHECK: encoding: [0x62,0x82,0xfd,0x4d,0x44,0xfb]
+ vplzcntq %zmm27, %zmm23 {%k5}
+
+// CHECK: vplzcntq %zmm27, %zmm23 {%k5} {z}
+// CHECK: encoding: [0x62,0x82,0xfd,0xcd,0x44,0xfb]
+ vplzcntq %zmm27, %zmm23 {%k5} {z}
+
+// CHECK: vplzcntq (%rcx), %zmm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x39]
+ vplzcntq (%rcx), %zmm23
+
+// CHECK: vplzcntq 4660(%rax,%r14,8), %zmm23
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x44,0xbc,0xf0,0x34,0x12,0x00,0x00]
+ vplzcntq 4660(%rax,%r14,8), %zmm23
+
+// CHECK: vplzcntq (%rcx){1to8}, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x39]
+ vplzcntq (%rcx){1to8}, %zmm23
+
+// CHECK: vplzcntq 4064(%rdx), %zmm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xba,0xe0,0x0f,0x00,0x00]
+ vplzcntq 4064(%rdx), %zmm23
+
+// CHECK: vplzcntq 4096(%rdx), %zmm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x7a,0x40]
+ vplzcntq 4096(%rdx), %zmm23
+
+// CHECK: vplzcntq -4096(%rdx), %zmm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x7a,0xc0]
+ vplzcntq -4096(%rdx), %zmm23
+
+// CHECK: vplzcntq -4128(%rdx), %zmm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xba,0xe0,0xef,0xff,0xff]
+ vplzcntq -4128(%rdx), %zmm23
+
+// CHECK: vplzcntq 1016(%rdx){1to8}, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x7a,0x7f]
+ vplzcntq 1016(%rdx){1to8}, %zmm23
+
+// CHECK: vplzcntq 1024(%rdx){1to8}, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xba,0x00,0x04,0x00,0x00]
+ vplzcntq 1024(%rdx){1to8}, %zmm23
+
+// CHECK: vplzcntq -1024(%rdx){1to8}, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x7a,0x80]
+ vplzcntq -1024(%rdx){1to8}, %zmm23
+
+// CHECK: vplzcntq -1032(%rdx){1to8}, %zmm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xba,0xf8,0xfb,0xff,0xff]
+ vplzcntq -1032(%rdx){1to8}, %zmm23
+
+// CHECK: vplzcntd %zmm22, %zmm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x44,0xce]
+ vplzcntd %zmm22, %zmm25
+
+// CHECK: vplzcntd %zmm22, %zmm25 {%k2}
+// CHECK: encoding: [0x62,0x22,0x7d,0x4a,0x44,0xce]
+ vplzcntd %zmm22, %zmm25 {%k2}
+
+// CHECK: vplzcntd %zmm22, %zmm25 {%k2} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xca,0x44,0xce]
+ vplzcntd %zmm22, %zmm25 {%k2} {z}
+
+// CHECK: vplzcntd (%rcx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x09]
+ vplzcntd (%rcx), %zmm25
+
+// CHECK: vplzcntd 291(%rax,%r14,8), %zmm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x44,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vplzcntd 291(%rax,%r14,8), %zmm25
+
+// CHECK: vplzcntd (%rcx){1to16}, %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x09]
+ vplzcntd (%rcx){1to16}, %zmm25
+
+// CHECK: vplzcntd 4064(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x8a,0xe0,0x0f,0x00,0x00]
+ vplzcntd 4064(%rdx), %zmm25
+
+// CHECK: vplzcntd 4096(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x4a,0x40]
+ vplzcntd 4096(%rdx), %zmm25
+
+// CHECK: vplzcntd -4096(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x4a,0xc0]
+ vplzcntd -4096(%rdx), %zmm25
+
+// CHECK: vplzcntd -4128(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x8a,0xe0,0xef,0xff,0xff]
+ vplzcntd -4128(%rdx), %zmm25
+
+// CHECK: vplzcntd 508(%rdx){1to16}, %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x4a,0x7f]
+ vplzcntd 508(%rdx){1to16}, %zmm25
+
+// CHECK: vplzcntd 512(%rdx){1to16}, %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x8a,0x00,0x02,0x00,0x00]
+ vplzcntd 512(%rdx){1to16}, %zmm25
+
+// CHECK: vplzcntd -512(%rdx){1to16}, %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x4a,0x80]
+ vplzcntd -512(%rdx){1to16}, %zmm25
+
+// CHECK: vplzcntd -516(%rdx){1to16}, %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x8a,0xfc,0xfd,0xff,0xff]
+ vplzcntd -516(%rdx){1to16}, %zmm25
+
+// CHECK: vplzcntd %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x44,0xee]
+ vplzcntd %zmm22, %zmm21
+
+// CHECK: vplzcntd %zmm22, %zmm21 {%k3}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4b,0x44,0xee]
+ vplzcntd %zmm22, %zmm21 {%k3}
+
+// CHECK: vplzcntd %zmm22, %zmm21 {%k3} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0xcb,0x44,0xee]
+ vplzcntd %zmm22, %zmm21 {%k3} {z}
+
+// CHECK: vplzcntd (%rcx), %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0x29]
+ vplzcntd (%rcx), %zmm21
+
+// CHECK: vplzcntd 4660(%rax,%r14,8), %zmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x44,0xac,0xf0,0x34,0x12,0x00,0x00]
+ vplzcntd 4660(%rax,%r14,8), %zmm21
+
+// CHECK: vplzcntd (%rcx){1to16}, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0x29]
+ vplzcntd (%rcx){1to16}, %zmm21
+
+// CHECK: vplzcntd 4064(%rdx), %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0xaa,0xe0,0x0f,0x00,0x00]
+ vplzcntd 4064(%rdx), %zmm21
+
+// CHECK: vplzcntd 4096(%rdx), %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0x6a,0x40]
+ vplzcntd 4096(%rdx), %zmm21
+
+// CHECK: vplzcntd -4096(%rdx), %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0x6a,0xc0]
+ vplzcntd -4096(%rdx), %zmm21
+
+// CHECK: vplzcntd -4128(%rdx), %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0xaa,0xe0,0xef,0xff,0xff]
+ vplzcntd -4128(%rdx), %zmm21
+
+// CHECK: vplzcntd 508(%rdx){1to16}, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0x6a,0x7f]
+ vplzcntd 508(%rdx){1to16}, %zmm21
+
+// CHECK: vplzcntd 512(%rdx){1to16}, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0xaa,0x00,0x02,0x00,0x00]
+ vplzcntd 512(%rdx){1to16}, %zmm21
+
+// CHECK: vplzcntd -512(%rdx){1to16}, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0x6a,0x80]
+ vplzcntd -512(%rdx){1to16}, %zmm21
+
+// CHECK: vplzcntd -516(%rdx){1to16}, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0xaa,0xfc,0xfd,0xff,0xff]
+ vplzcntd -516(%rdx){1to16}, %zmm21
+
+// CHECK: vpconflictq %zmm25, %zmm20
+// CHECK: encoding: [0x62,0x82,0xfd,0x48,0xc4,0xe1]
+ vpconflictq %zmm25, %zmm20
+
+// CHECK: vpconflictq %zmm25, %zmm20 {%k6}
+// CHECK: encoding: [0x62,0x82,0xfd,0x4e,0xc4,0xe1]
+ vpconflictq %zmm25, %zmm20 {%k6}
+
+// CHECK: vpconflictq %zmm25, %zmm20 {%k6} {z}
+// CHECK: encoding: [0x62,0x82,0xfd,0xce,0xc4,0xe1]
+ vpconflictq %zmm25, %zmm20 {%k6} {z}
+
+// CHECK: vpconflictq (%rcx), %zmm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x21]
+ vpconflictq (%rcx), %zmm20
+
+// CHECK: vpconflictq 291(%rax,%r14,8), %zmm20
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0xc4,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpconflictq 291(%rax,%r14,8), %zmm20
+
+// CHECK: vpconflictq (%rcx){1to8}, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x21]
+ vpconflictq (%rcx){1to8}, %zmm20
+
+// CHECK: vpconflictq 4064(%rdx), %zmm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0xa2,0xe0,0x0f,0x00,0x00]
+ vpconflictq 4064(%rdx), %zmm20
+
+// CHECK: vpconflictq 4096(%rdx), %zmm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x62,0x40]
+ vpconflictq 4096(%rdx), %zmm20
+
+// CHECK: vpconflictq -4096(%rdx), %zmm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x62,0xc0]
+ vpconflictq -4096(%rdx), %zmm20
+
+// CHECK: vpconflictq -4128(%rdx), %zmm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0xa2,0xe0,0xef,0xff,0xff]
+ vpconflictq -4128(%rdx), %zmm20
+
+// CHECK: vpconflictq 1016(%rdx){1to8}, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x62,0x7f]
+ vpconflictq 1016(%rdx){1to8}, %zmm20
+
+// CHECK: vpconflictq 1024(%rdx){1to8}, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0xa2,0x00,0x04,0x00,0x00]
+ vpconflictq 1024(%rdx){1to8}, %zmm20
+
+// CHECK: vpconflictq -1024(%rdx){1to8}, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x62,0x80]
+ vpconflictq -1024(%rdx){1to8}, %zmm20
+
+// CHECK: vpconflictq -1032(%rdx){1to8}, %zmm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0xa2,0xf8,0xfb,0xff,0xff]
+ vpconflictq -1032(%rdx){1to8}, %zmm20
+
+// CHECK: vpconflictq %zmm21, %zmm17
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0xc4,0xcd]
+ vpconflictq %zmm21, %zmm17
+
+// CHECK: vpconflictq %zmm21, %zmm17 {%k6}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4e,0xc4,0xcd]
+ vpconflictq %zmm21, %zmm17 {%k6}
+
+// CHECK: vpconflictq %zmm21, %zmm17 {%k6} {z}
+// CHECK: encoding: [0x62,0xa2,0xfd,0xce,0xc4,0xcd]
+ vpconflictq %zmm21, %zmm17 {%k6} {z}
+
+// CHECK: vpconflictq (%rcx), %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x09]
+ vpconflictq (%rcx), %zmm17
+
+// CHECK: vpconflictq 4660(%rax,%r14,8), %zmm17
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0xc4,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vpconflictq 4660(%rax,%r14,8), %zmm17
+
+// CHECK: vpconflictq (%rcx){1to8}, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x09]
+ vpconflictq (%rcx){1to8}, %zmm17
+
+// CHECK: vpconflictq 4064(%rdx), %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x8a,0xe0,0x0f,0x00,0x00]
+ vpconflictq 4064(%rdx), %zmm17
+
+// CHECK: vpconflictq 4096(%rdx), %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x4a,0x40]
+ vpconflictq 4096(%rdx), %zmm17
+
+// CHECK: vpconflictq -4096(%rdx), %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x4a,0xc0]
+ vpconflictq -4096(%rdx), %zmm17
+
+// CHECK: vpconflictq -4128(%rdx), %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x8a,0xe0,0xef,0xff,0xff]
+ vpconflictq -4128(%rdx), %zmm17
+
+// CHECK: vpconflictq 1016(%rdx){1to8}, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x4a,0x7f]
+ vpconflictq 1016(%rdx){1to8}, %zmm17
+
+// CHECK: vpconflictq 1024(%rdx){1to8}, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x8a,0x00,0x04,0x00,0x00]
+ vpconflictq 1024(%rdx){1to8}, %zmm17
+
+// CHECK: vpconflictq -1024(%rdx){1to8}, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x4a,0x80]
+ vpconflictq -1024(%rdx){1to8}, %zmm17
+
+// CHECK: vpconflictq -1032(%rdx){1to8}, %zmm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x8a,0xf8,0xfb,0xff,0xff]
+ vpconflictq -1032(%rdx){1to8}, %zmm17
+
+// CHECK: vpconflictd %zmm19, %zmm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0xcb]
+ vpconflictd %zmm19, %zmm25
+
+// CHECK: vpconflictd %zmm19, %zmm25 {%k4}
+// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0xc4,0xcb]
+ vpconflictd %zmm19, %zmm25 {%k4}
+
+// CHECK: vpconflictd %zmm19, %zmm25 {%k4} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0xc4,0xcb]
+ vpconflictd %zmm19, %zmm25 {%k4} {z}
+
+// CHECK: vpconflictd (%rcx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x09]
+ vpconflictd (%rcx), %zmm25
+
+// CHECK: vpconflictd 291(%rax,%r14,8), %zmm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpconflictd 291(%rax,%r14,8), %zmm25
+
+// CHECK: vpconflictd (%rcx){1to16}, %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x09]
+ vpconflictd (%rcx){1to16}, %zmm25
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512cd --show-encoding %s | FileCheck %s
+
+// CHECK: vpconflictd 4064(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x8a,0xe0,0x0f,0x00,0x00]
+ vpconflictd 4064(%rdx), %zmm25
+
+// CHECK: vpconflictd 4096(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x4a,0x40]
+ vpconflictd 4096(%rdx), %zmm25
+
+// CHECK: vpconflictd -4096(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x4a,0xc0]
+ vpconflictd -4096(%rdx), %zmm25
+
+// CHECK: vpconflictd -4128(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x8a,0xe0,0xef,0xff,0xff]
+ vpconflictd -4128(%rdx), %zmm25
+
+// CHECK: vpconflictd 508(%rdx){1to16}, %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x4a,0x7f]
+ vpconflictd 508(%rdx){1to16}, %zmm25
+
+// CHECK: vpconflictd 512(%rdx){1to16}, %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x8a,0x00,0x02,0x00,0x00]
+ vpconflictd 512(%rdx){1to16}, %zmm25
+
+// CHECK: vpconflictd -512(%rdx){1to16}, %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x4a,0x80]
+ vpconflictd -512(%rdx){1to16}, %zmm25
+
+// CHECK: vpconflictd -516(%rdx){1to16}, %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x8a,0xfc,0xfd,0xff,0xff]
+ vpconflictd -516(%rdx){1to16}, %zmm25
+
+// CHECK: vpconflictd %zmm21, %zmm26
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0xd5]
+ vpconflictd %zmm21, %zmm26
+
+// CHECK: vpconflictd %zmm21, %zmm26 {%k4}
+// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0xc4,0xd5]
+ vpconflictd %zmm21, %zmm26 {%k4}
+
+// CHECK: vpconflictd %zmm21, %zmm26 {%k4} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0xc4,0xd5]
+ vpconflictd %zmm21, %zmm26 {%k4} {z}
+
+// CHECK: vpconflictd (%rcx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x11]
+ vpconflictd (%rcx), %zmm26
+
+// CHECK: vpconflictd 4660(%rax,%r14,8), %zmm26
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0x94,0xf0,0x34,0x12,0x00,0x00]
+ vpconflictd 4660(%rax,%r14,8), %zmm26
+
+// CHECK: vpconflictd (%rcx){1to16}, %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x11]
+ vpconflictd (%rcx){1to16}, %zmm26
+
+// CHECK: vpconflictd 4064(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x92,0xe0,0x0f,0x00,0x00]
+ vpconflictd 4064(%rdx), %zmm26
+
+// CHECK: vpconflictd 4096(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x52,0x40]
+ vpconflictd 4096(%rdx), %zmm26
+
+// CHECK: vpconflictd -4096(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x52,0xc0]
+ vpconflictd -4096(%rdx), %zmm26
+
+// CHECK: vpconflictd -4128(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x92,0xe0,0xef,0xff,0xff]
+ vpconflictd -4128(%rdx), %zmm26
+
+// CHECK: vpconflictd 508(%rdx){1to16}, %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x52,0x7f]
+ vpconflictd 508(%rdx){1to16}, %zmm26
+
+// CHECK: vpconflictd 512(%rdx){1to16}, %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x92,0x00,0x02,0x00,0x00]
+ vpconflictd 512(%rdx){1to16}, %zmm26
+
+// CHECK: vpconflictd -512(%rdx){1to16}, %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x52,0x80]
+ vpconflictd -512(%rdx){1to16}, %zmm26
+
+// CHECK: vpconflictd -516(%rdx){1to16}, %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x92,0xfc,0xfd,0xff,0xff]
+ vpconflictd -516(%rdx){1to16}, %zmm26
diff --git a/test/MC/X86/x86-64-avx512cd_vl.s b/test/MC/X86/x86-64-avx512cd_vl.s
new file mode 100644
index 000000000000..ddaeae69a92c
--- /dev/null
+++ b/test/MC/X86/x86-64-avx512cd_vl.s
@@ -0,0 +1,913 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl --show-encoding %s | FileCheck %s
+
+// CHECK: vplzcntq %xmm20, %xmm18
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x44,0xd4]
+ vplzcntq %xmm20, %xmm18
+
+// CHECK: vplzcntq %xmm20, %xmm18 {%k1}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x09,0x44,0xd4]
+ vplzcntq %xmm20, %xmm18 {%k1}
+
+// CHECK: vplzcntq %xmm20, %xmm18 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x89,0x44,0xd4]
+ vplzcntq %xmm20, %xmm18 {%k1} {z}
+
+// CHECK: vplzcntq (%rcx), %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x44,0x11]
+ vplzcntq (%rcx), %xmm18
+
+// CHECK: vplzcntq 291(%rax,%r14,8), %xmm18
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x44,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vplzcntq 291(%rax,%r14,8), %xmm18
+
+// CHECK: vplzcntq (%rcx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x44,0x11]
+ vplzcntq (%rcx){1to2}, %xmm18
+
+// CHECK: vplzcntq 2032(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x44,0x52,0x7f]
+ vplzcntq 2032(%rdx), %xmm18
+
+// CHECK: vplzcntq 2048(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x44,0x92,0x00,0x08,0x00,0x00]
+ vplzcntq 2048(%rdx), %xmm18
+
+// CHECK: vplzcntq -2048(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x44,0x52,0x80]
+ vplzcntq -2048(%rdx), %xmm18
+
+// CHECK: vplzcntq -2064(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x44,0x92,0xf0,0xf7,0xff,0xff]
+ vplzcntq -2064(%rdx), %xmm18
+
+// CHECK: vplzcntq 1016(%rdx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x44,0x52,0x7f]
+ vplzcntq 1016(%rdx){1to2}, %xmm18
+
+// CHECK: vplzcntq 1024(%rdx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x44,0x92,0x00,0x04,0x00,0x00]
+ vplzcntq 1024(%rdx){1to2}, %xmm18
+
+// CHECK: vplzcntq -1024(%rdx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x44,0x52,0x80]
+ vplzcntq -1024(%rdx){1to2}, %xmm18
+
+// CHECK: vplzcntq -1032(%rdx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x44,0x92,0xf8,0xfb,0xff,0xff]
+ vplzcntq -1032(%rdx){1to2}, %xmm18
+
+// CHECK: vplzcntq %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x44,0xee]
+ vplzcntq %ymm22, %ymm21
+
+// CHECK: vplzcntq %ymm22, %ymm21 {%k7}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2f,0x44,0xee]
+ vplzcntq %ymm22, %ymm21 {%k7}
+
+// CHECK: vplzcntq %ymm22, %ymm21 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0xfd,0xaf,0x44,0xee]
+ vplzcntq %ymm22, %ymm21 {%k7} {z}
+
+// CHECK: vplzcntq (%rcx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0x29]
+ vplzcntq (%rcx), %ymm21
+
+// CHECK: vplzcntq 291(%rax,%r14,8), %ymm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x44,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vplzcntq 291(%rax,%r14,8), %ymm21
+
+// CHECK: vplzcntq (%rcx){1to4}, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0x29]
+ vplzcntq (%rcx){1to4}, %ymm21
+
+// CHECK: vplzcntq 4064(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0x6a,0x7f]
+ vplzcntq 4064(%rdx), %ymm21
+
+// CHECK: vplzcntq 4096(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0xaa,0x00,0x10,0x00,0x00]
+ vplzcntq 4096(%rdx), %ymm21
+
+// CHECK: vplzcntq -4096(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0x6a,0x80]
+ vplzcntq -4096(%rdx), %ymm21
+
+// CHECK: vplzcntq -4128(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0xaa,0xe0,0xef,0xff,0xff]
+ vplzcntq -4128(%rdx), %ymm21
+
+// CHECK: vplzcntq 1016(%rdx){1to4}, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0x6a,0x7f]
+ vplzcntq 1016(%rdx){1to4}, %ymm21
+
+// CHECK: vplzcntq 1024(%rdx){1to4}, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0xaa,0x00,0x04,0x00,0x00]
+ vplzcntq 1024(%rdx){1to4}, %ymm21
+
+// CHECK: vplzcntq -1024(%rdx){1to4}, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0x6a,0x80]
+ vplzcntq -1024(%rdx){1to4}, %ymm21
+
+// CHECK: vplzcntq -1032(%rdx){1to4}, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0xaa,0xf8,0xfb,0xff,0xff]
+ vplzcntq -1032(%rdx){1to4}, %ymm21
+
+// CHECK: vplzcntq %xmm20, %xmm24
+// CHECK: encoding: [0x62,0x22,0xfd,0x08,0x44,0xc4]
+ vplzcntq %xmm20, %xmm24
+
+// CHECK: vplzcntq %xmm20, %xmm24 {%k3}
+// CHECK: encoding: [0x62,0x22,0xfd,0x0b,0x44,0xc4]
+ vplzcntq %xmm20, %xmm24 {%k3}
+
+// CHECK: vplzcntq %xmm20, %xmm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x22,0xfd,0x8b,0x44,0xc4]
+ vplzcntq %xmm20, %xmm24 {%k3} {z}
+
+// CHECK: vplzcntq (%rcx), %xmm24
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x44,0x01]
+ vplzcntq (%rcx), %xmm24
+
+// CHECK: vplzcntq 4660(%rax,%r14,8), %xmm24
+// CHECK: encoding: [0x62,0x22,0xfd,0x08,0x44,0x84,0xf0,0x34,0x12,0x00,0x00]
+ vplzcntq 4660(%rax,%r14,8), %xmm24
+
+// CHECK: vplzcntq (%rcx){1to2}, %xmm24
+// CHECK: encoding: [0x62,0x62,0xfd,0x18,0x44,0x01]
+ vplzcntq (%rcx){1to2}, %xmm24
+
+// CHECK: vplzcntq 2032(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x44,0x42,0x7f]
+ vplzcntq 2032(%rdx), %xmm24
+
+// CHECK: vplzcntq 2048(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x44,0x82,0x00,0x08,0x00,0x00]
+ vplzcntq 2048(%rdx), %xmm24
+
+// CHECK: vplzcntq -2048(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x44,0x42,0x80]
+ vplzcntq -2048(%rdx), %xmm24
+
+// CHECK: vplzcntq -2064(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x44,0x82,0xf0,0xf7,0xff,0xff]
+ vplzcntq -2064(%rdx), %xmm24
+
+// CHECK: vplzcntq 1016(%rdx){1to2}, %xmm24
+// CHECK: encoding: [0x62,0x62,0xfd,0x18,0x44,0x42,0x7f]
+ vplzcntq 1016(%rdx){1to2}, %xmm24
+
+// CHECK: vplzcntq 1024(%rdx){1to2}, %xmm24
+// CHECK: encoding: [0x62,0x62,0xfd,0x18,0x44,0x82,0x00,0x04,0x00,0x00]
+ vplzcntq 1024(%rdx){1to2}, %xmm24
+
+// CHECK: vplzcntq -1024(%rdx){1to2}, %xmm24
+// CHECK: encoding: [0x62,0x62,0xfd,0x18,0x44,0x42,0x80]
+ vplzcntq -1024(%rdx){1to2}, %xmm24
+
+// CHECK: vplzcntq -1032(%rdx){1to2}, %xmm24
+// CHECK: encoding: [0x62,0x62,0xfd,0x18,0x44,0x82,0xf8,0xfb,0xff,0xff]
+ vplzcntq -1032(%rdx){1to2}, %xmm24
+
+// CHECK: vplzcntq %ymm27, %ymm23
+// CHECK: encoding: [0x62,0x82,0xfd,0x28,0x44,0xfb]
+ vplzcntq %ymm27, %ymm23
+
+// CHECK: vplzcntq %ymm27, %ymm23 {%k5}
+// CHECK: encoding: [0x62,0x82,0xfd,0x2d,0x44,0xfb]
+ vplzcntq %ymm27, %ymm23 {%k5}
+
+// CHECK: vplzcntq %ymm27, %ymm23 {%k5} {z}
+// CHECK: encoding: [0x62,0x82,0xfd,0xad,0x44,0xfb]
+ vplzcntq %ymm27, %ymm23 {%k5} {z}
+
+// CHECK: vplzcntq (%rcx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0x39]
+ vplzcntq (%rcx), %ymm23
+
+// CHECK: vplzcntq 4660(%rax,%r14,8), %ymm23
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x44,0xbc,0xf0,0x34,0x12,0x00,0x00]
+ vplzcntq 4660(%rax,%r14,8), %ymm23
+
+// CHECK: vplzcntq (%rcx){1to4}, %ymm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0x39]
+ vplzcntq (%rcx){1to4}, %ymm23
+
+// CHECK: vplzcntq 4064(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0x7a,0x7f]
+ vplzcntq 4064(%rdx), %ymm23
+
+// CHECK: vplzcntq 4096(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0xba,0x00,0x10,0x00,0x00]
+ vplzcntq 4096(%rdx), %ymm23
+
+// CHECK: vplzcntq -4096(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0x7a,0x80]
+ vplzcntq -4096(%rdx), %ymm23
+
+// CHECK: vplzcntq -4128(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0xba,0xe0,0xef,0xff,0xff]
+ vplzcntq -4128(%rdx), %ymm23
+
+// CHECK: vplzcntq 1016(%rdx){1to4}, %ymm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0x7a,0x7f]
+ vplzcntq 1016(%rdx){1to4}, %ymm23
+
+// CHECK: vplzcntq 1024(%rdx){1to4}, %ymm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0xba,0x00,0x04,0x00,0x00]
+ vplzcntq 1024(%rdx){1to4}, %ymm23
+
+// CHECK: vplzcntq -1024(%rdx){1to4}, %ymm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0x7a,0x80]
+ vplzcntq -1024(%rdx){1to4}, %ymm23
+
+// CHECK: vplzcntq -1032(%rdx){1to4}, %ymm23
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0xba,0xf8,0xfb,0xff,0xff]
+ vplzcntq -1032(%rdx){1to4}, %ymm23
+
+// CHECK: vplzcntd %xmm26, %xmm26
+// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x44,0xd2]
+ vplzcntd %xmm26, %xmm26
+
+// CHECK: vplzcntd %xmm26, %xmm26 {%k4}
+// CHECK: encoding: [0x62,0x02,0x7d,0x0c,0x44,0xd2]
+ vplzcntd %xmm26, %xmm26 {%k4}
+
+// CHECK: vplzcntd %xmm26, %xmm26 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0x8c,0x44,0xd2]
+ vplzcntd %xmm26, %xmm26 {%k4} {z}
+
+// CHECK: vplzcntd (%rcx), %xmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x11]
+ vplzcntd (%rcx), %xmm26
+
+// CHECK: vplzcntd 291(%rax,%r14,8), %xmm26
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x44,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vplzcntd 291(%rax,%r14,8), %xmm26
+
+// CHECK: vplzcntd (%rcx){1to4}, %xmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x11]
+ vplzcntd (%rcx){1to4}, %xmm26
+
+// CHECK: vplzcntd 2032(%rdx), %xmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x52,0x7f]
+ vplzcntd 2032(%rdx), %xmm26
+
+// CHECK: vplzcntd 2048(%rdx), %xmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x92,0x00,0x08,0x00,0x00]
+ vplzcntd 2048(%rdx), %xmm26
+
+// CHECK: vplzcntd -2048(%rdx), %xmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x52,0x80]
+ vplzcntd -2048(%rdx), %xmm26
+
+// CHECK: vplzcntd -2064(%rdx), %xmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x92,0xf0,0xf7,0xff,0xff]
+ vplzcntd -2064(%rdx), %xmm26
+
+// CHECK: vplzcntd 508(%rdx){1to4}, %xmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x52,0x7f]
+ vplzcntd 508(%rdx){1to4}, %xmm26
+
+// CHECK: vplzcntd 512(%rdx){1to4}, %xmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x92,0x00,0x02,0x00,0x00]
+ vplzcntd 512(%rdx){1to4}, %xmm26
+
+// CHECK: vplzcntd -512(%rdx){1to4}, %xmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x52,0x80]
+ vplzcntd -512(%rdx){1to4}, %xmm26
+
+// CHECK: vplzcntd -516(%rdx){1to4}, %xmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x92,0xfc,0xfd,0xff,0xff]
+ vplzcntd -516(%rdx){1to4}, %xmm26
+
+// CHECK: vplzcntd %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x44,0xce]
+ vplzcntd %ymm22, %ymm25
+
+// CHECK: vplzcntd %ymm22, %ymm25 {%k2}
+// CHECK: encoding: [0x62,0x22,0x7d,0x2a,0x44,0xce]
+ vplzcntd %ymm22, %ymm25 {%k2}
+
+// CHECK: vplzcntd %ymm22, %ymm25 {%k2} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xaa,0x44,0xce]
+ vplzcntd %ymm22, %ymm25 {%k2} {z}
+
+// CHECK: vplzcntd (%rcx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x44,0x09]
+ vplzcntd (%rcx), %ymm25
+
+// CHECK: vplzcntd 291(%rax,%r14,8), %ymm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x44,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vplzcntd 291(%rax,%r14,8), %ymm25
+
+// CHECK: vplzcntd (%rcx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x44,0x09]
+ vplzcntd (%rcx){1to8}, %ymm25
+
+// CHECK: vplzcntd 4064(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x44,0x4a,0x7f]
+ vplzcntd 4064(%rdx), %ymm25
+
+// CHECK: vplzcntd 4096(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x44,0x8a,0x00,0x10,0x00,0x00]
+ vplzcntd 4096(%rdx), %ymm25
+
+// CHECK: vplzcntd -4096(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x44,0x4a,0x80]
+ vplzcntd -4096(%rdx), %ymm25
+
+// CHECK: vplzcntd -4128(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x44,0x8a,0xe0,0xef,0xff,0xff]
+ vplzcntd -4128(%rdx), %ymm25
+
+// CHECK: vplzcntd 508(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x44,0x4a,0x7f]
+ vplzcntd 508(%rdx){1to8}, %ymm25
+
+// CHECK: vplzcntd 512(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x44,0x8a,0x00,0x02,0x00,0x00]
+ vplzcntd 512(%rdx){1to8}, %ymm25
+
+// CHECK: vplzcntd -512(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x44,0x4a,0x80]
+ vplzcntd -512(%rdx){1to8}, %ymm25
+
+// CHECK: vplzcntd -516(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x44,0x8a,0xfc,0xfd,0xff,0xff]
+ vplzcntd -516(%rdx){1to8}, %ymm25
+
+// CHECK: vplzcntd %xmm22, %xmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x44,0xf6]
+ vplzcntd %xmm22, %xmm30
+
+// CHECK: vplzcntd %xmm22, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7d,0x0f,0x44,0xf6]
+ vplzcntd %xmm22, %xmm30 {%k7}
+
+// CHECK: vplzcntd %xmm22, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0x8f,0x44,0xf6]
+ vplzcntd %xmm22, %xmm30 {%k7} {z}
+
+// CHECK: vplzcntd (%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x31]
+ vplzcntd (%rcx), %xmm30
+
+// CHECK: vplzcntd 4660(%rax,%r14,8), %xmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x44,0xb4,0xf0,0x34,0x12,0x00,0x00]
+ vplzcntd 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vplzcntd (%rcx){1to4}, %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x31]
+ vplzcntd (%rcx){1to4}, %xmm30
+
+// CHECK: vplzcntd 2032(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x72,0x7f]
+ vplzcntd 2032(%rdx), %xmm30
+
+// CHECK: vplzcntd 2048(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0xb2,0x00,0x08,0x00,0x00]
+ vplzcntd 2048(%rdx), %xmm30
+
+// CHECK: vplzcntd -2048(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x72,0x80]
+ vplzcntd -2048(%rdx), %xmm30
+
+// CHECK: vplzcntd -2064(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0xb2,0xf0,0xf7,0xff,0xff]
+ vplzcntd -2064(%rdx), %xmm30
+
+// CHECK: vplzcntd 508(%rdx){1to4}, %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x72,0x7f]
+ vplzcntd 508(%rdx){1to4}, %xmm30
+
+// CHECK: vplzcntd 512(%rdx){1to4}, %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0xb2,0x00,0x02,0x00,0x00]
+ vplzcntd 512(%rdx){1to4}, %xmm30
+
+// CHECK: vplzcntd -512(%rdx){1to4}, %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x72,0x80]
+ vplzcntd -512(%rdx){1to4}, %xmm30
+
+// CHECK: vplzcntd -516(%rdx){1to4}, %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0xb2,0xfc,0xfd,0xff,0xff]
+ vplzcntd -516(%rdx){1to4}, %xmm30
+
+// CHECK: vplzcntd %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x44,0xee]
+ vplzcntd %ymm22, %ymm21
+
+// CHECK: vplzcntd %ymm22, %ymm21 {%k3}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2b,0x44,0xee]
+ vplzcntd %ymm22, %ymm21 {%k3}
+
+// CHECK: vplzcntd %ymm22, %ymm21 {%k3} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0xab,0x44,0xee]
+ vplzcntd %ymm22, %ymm21 {%k3} {z}
+
+// CHECK: vplzcntd (%rcx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x44,0x29]
+ vplzcntd (%rcx), %ymm21
+
+// CHECK: vplzcntd 4660(%rax,%r14,8), %ymm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x44,0xac,0xf0,0x34,0x12,0x00,0x00]
+ vplzcntd 4660(%rax,%r14,8), %ymm21
+
+// CHECK: vplzcntd (%rcx){1to8}, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x38,0x44,0x29]
+ vplzcntd (%rcx){1to8}, %ymm21
+
+// CHECK: vplzcntd 4064(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x44,0x6a,0x7f]
+ vplzcntd 4064(%rdx), %ymm21
+
+// CHECK: vplzcntd 4096(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x44,0xaa,0x00,0x10,0x00,0x00]
+ vplzcntd 4096(%rdx), %ymm21
+
+// CHECK: vplzcntd -4096(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x44,0x6a,0x80]
+ vplzcntd -4096(%rdx), %ymm21
+
+// CHECK: vplzcntd -4128(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x44,0xaa,0xe0,0xef,0xff,0xff]
+ vplzcntd -4128(%rdx), %ymm21
+
+// CHECK: vplzcntd 508(%rdx){1to8}, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x38,0x44,0x6a,0x7f]
+ vplzcntd 508(%rdx){1to8}, %ymm21
+
+// CHECK: vplzcntd 512(%rdx){1to8}, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x38,0x44,0xaa,0x00,0x02,0x00,0x00]
+ vplzcntd 512(%rdx){1to8}, %ymm21
+
+// CHECK: vplzcntd -512(%rdx){1to8}, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x38,0x44,0x6a,0x80]
+ vplzcntd -512(%rdx){1to8}, %ymm21
+
+// CHECK: vplzcntd -516(%rdx){1to8}, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x38,0x44,0xaa,0xfc,0xfd,0xff,0xff]
+ vplzcntd -516(%rdx){1to8}, %ymm21
+
+// CHECK: vpconflictq %xmm24, %xmm19
+// CHECK: encoding: [0x62,0x82,0xfd,0x08,0xc4,0xd8]
+ vpconflictq %xmm24, %xmm19
+
+// CHECK: vpconflictq %xmm24, %xmm19 {%k7}
+// CHECK: encoding: [0x62,0x82,0xfd,0x0f,0xc4,0xd8]
+ vpconflictq %xmm24, %xmm19 {%k7}
+
+// CHECK: vpconflictq %xmm24, %xmm19 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0xfd,0x8f,0xc4,0xd8]
+ vpconflictq %xmm24, %xmm19 {%k7} {z}
+
+// CHECK: vpconflictq (%rcx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x19]
+ vpconflictq (%rcx), %xmm19
+
+// CHECK: vpconflictq 291(%rax,%r14,8), %xmm19
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0xc4,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpconflictq 291(%rax,%r14,8), %xmm19
+
+// CHECK: vpconflictq (%rcx){1to2}, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x19]
+ vpconflictq (%rcx){1to2}, %xmm19
+
+// CHECK: vpconflictq 2032(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x5a,0x7f]
+ vpconflictq 2032(%rdx), %xmm19
+
+// CHECK: vpconflictq 2048(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x9a,0x00,0x08,0x00,0x00]
+ vpconflictq 2048(%rdx), %xmm19
+
+// CHECK: vpconflictq -2048(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x5a,0x80]
+ vpconflictq -2048(%rdx), %xmm19
+
+// CHECK: vpconflictq -2064(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x9a,0xf0,0xf7,0xff,0xff]
+ vpconflictq -2064(%rdx), %xmm19
+
+// CHECK: vpconflictq 1016(%rdx){1to2}, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x5a,0x7f]
+ vpconflictq 1016(%rdx){1to2}, %xmm19
+
+// CHECK: vpconflictq 1024(%rdx){1to2}, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x9a,0x00,0x04,0x00,0x00]
+ vpconflictq 1024(%rdx){1to2}, %xmm19
+
+// CHECK: vpconflictq -1024(%rdx){1to2}, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x5a,0x80]
+ vpconflictq -1024(%rdx){1to2}, %xmm19
+
+// CHECK: vpconflictq -1032(%rdx){1to2}, %xmm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x9a,0xf8,0xfb,0xff,0xff]
+ vpconflictq -1032(%rdx){1to2}, %xmm19
+
+// CHECK: vpconflictq %ymm25, %ymm20
+// CHECK: encoding: [0x62,0x82,0xfd,0x28,0xc4,0xe1]
+ vpconflictq %ymm25, %ymm20
+
+// CHECK: vpconflictq %ymm25, %ymm20 {%k6}
+// CHECK: encoding: [0x62,0x82,0xfd,0x2e,0xc4,0xe1]
+ vpconflictq %ymm25, %ymm20 {%k6}
+
+// CHECK: vpconflictq %ymm25, %ymm20 {%k6} {z}
+// CHECK: encoding: [0x62,0x82,0xfd,0xae,0xc4,0xe1]
+ vpconflictq %ymm25, %ymm20 {%k6} {z}
+
+// CHECK: vpconflictq (%rcx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x21]
+ vpconflictq (%rcx), %ymm20
+
+// CHECK: vpconflictq 291(%rax,%r14,8), %ymm20
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0xc4,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpconflictq 291(%rax,%r14,8), %ymm20
+
+// CHECK: vpconflictq (%rcx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x21]
+ vpconflictq (%rcx){1to4}, %ymm20
+
+// CHECK: vpconflictq 4064(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x62,0x7f]
+ vpconflictq 4064(%rdx), %ymm20
+
+// CHECK: vpconflictq 4096(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0xa2,0x00,0x10,0x00,0x00]
+ vpconflictq 4096(%rdx), %ymm20
+
+// CHECK: vpconflictq -4096(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x62,0x80]
+ vpconflictq -4096(%rdx), %ymm20
+
+// CHECK: vpconflictq -4128(%rdx), %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0xa2,0xe0,0xef,0xff,0xff]
+ vpconflictq -4128(%rdx), %ymm20
+
+// CHECK: vpconflictq 1016(%rdx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x62,0x7f]
+ vpconflictq 1016(%rdx){1to4}, %ymm20
+
+// CHECK: vpconflictq 1024(%rdx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0xa2,0x00,0x04,0x00,0x00]
+ vpconflictq 1024(%rdx){1to4}, %ymm20
+
+// CHECK: vpconflictq -1024(%rdx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x62,0x80]
+ vpconflictq -1024(%rdx){1to4}, %ymm20
+
+// CHECK: vpconflictq -1032(%rdx){1to4}, %ymm20
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0xa2,0xf8,0xfb,0xff,0xff]
+ vpconflictq -1032(%rdx){1to4}, %ymm20
+
+// CHECK: vpconflictq %xmm27, %xmm18
+// CHECK: encoding: [0x62,0x82,0xfd,0x08,0xc4,0xd3]
+ vpconflictq %xmm27, %xmm18
+
+// CHECK: vpconflictq %xmm27, %xmm18 {%k4}
+// CHECK: encoding: [0x62,0x82,0xfd,0x0c,0xc4,0xd3]
+ vpconflictq %xmm27, %xmm18 {%k4}
+
+// CHECK: vpconflictq %xmm27, %xmm18 {%k4} {z}
+// CHECK: encoding: [0x62,0x82,0xfd,0x8c,0xc4,0xd3]
+ vpconflictq %xmm27, %xmm18 {%k4} {z}
+
+// CHECK: vpconflictq (%rcx), %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x11]
+ vpconflictq (%rcx), %xmm18
+
+// CHECK: vpconflictq 4660(%rax,%r14,8), %xmm18
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0xc4,0x94,0xf0,0x34,0x12,0x00,0x00]
+ vpconflictq 4660(%rax,%r14,8), %xmm18
+
+// CHECK: vpconflictq (%rcx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x11]
+ vpconflictq (%rcx){1to2}, %xmm18
+
+// CHECK: vpconflictq 2032(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x52,0x7f]
+ vpconflictq 2032(%rdx), %xmm18
+
+// CHECK: vpconflictq 2048(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x92,0x00,0x08,0x00,0x00]
+ vpconflictq 2048(%rdx), %xmm18
+
+// CHECK: vpconflictq -2048(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x52,0x80]
+ vpconflictq -2048(%rdx), %xmm18
+
+// CHECK: vpconflictq -2064(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x92,0xf0,0xf7,0xff,0xff]
+ vpconflictq -2064(%rdx), %xmm18
+
+// CHECK: vpconflictq 1016(%rdx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x52,0x7f]
+ vpconflictq 1016(%rdx){1to2}, %xmm18
+
+// CHECK: vpconflictq 1024(%rdx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x92,0x00,0x04,0x00,0x00]
+ vpconflictq 1024(%rdx){1to2}, %xmm18
+
+// CHECK: vpconflictq -1024(%rdx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x52,0x80]
+ vpconflictq -1024(%rdx){1to2}, %xmm18
+
+// CHECK: vpconflictq -1032(%rdx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x92,0xf8,0xfb,0xff,0xff]
+ vpconflictq -1032(%rdx){1to2}, %xmm18
+
+// CHECK: vpconflictq %ymm21, %ymm17
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0xc4,0xcd]
+ vpconflictq %ymm21, %ymm17
+
+// CHECK: vpconflictq %ymm21, %ymm17 {%k6}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2e,0xc4,0xcd]
+ vpconflictq %ymm21, %ymm17 {%k6}
+
+// CHECK: vpconflictq %ymm21, %ymm17 {%k6} {z}
+// CHECK: encoding: [0x62,0xa2,0xfd,0xae,0xc4,0xcd]
+ vpconflictq %ymm21, %ymm17 {%k6} {z}
+
+// CHECK: vpconflictq (%rcx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x09]
+ vpconflictq (%rcx), %ymm17
+
+// CHECK: vpconflictq 4660(%rax,%r14,8), %ymm17
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0xc4,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vpconflictq 4660(%rax,%r14,8), %ymm17
+
+// CHECK: vpconflictq (%rcx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x09]
+ vpconflictq (%rcx){1to4}, %ymm17
+
+// CHECK: vpconflictq 4064(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x4a,0x7f]
+ vpconflictq 4064(%rdx), %ymm17
+
+// CHECK: vpconflictq 4096(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x8a,0x00,0x10,0x00,0x00]
+ vpconflictq 4096(%rdx), %ymm17
+
+// CHECK: vpconflictq -4096(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x4a,0x80]
+ vpconflictq -4096(%rdx), %ymm17
+
+// CHECK: vpconflictq -4128(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x8a,0xe0,0xef,0xff,0xff]
+ vpconflictq -4128(%rdx), %ymm17
+
+// CHECK: vpconflictq 1016(%rdx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x4a,0x7f]
+ vpconflictq 1016(%rdx){1to4}, %ymm17
+
+// CHECK: vpconflictq 1024(%rdx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x8a,0x00,0x04,0x00,0x00]
+ vpconflictq 1024(%rdx){1to4}, %ymm17
+
+// CHECK: vpconflictq -1024(%rdx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x4a,0x80]
+ vpconflictq -1024(%rdx){1to4}, %ymm17
+
+// CHECK: vpconflictq -1032(%rdx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x8a,0xf8,0xfb,0xff,0xff]
+ vpconflictq -1032(%rdx){1to4}, %ymm17
+
+// CHECK: vpconflictd %xmm27, %xmm21
+// CHECK: encoding: [0x62,0x82,0x7d,0x08,0xc4,0xeb]
+ vpconflictd %xmm27, %xmm21
+
+// CHECK: vpconflictd %xmm27, %xmm21 {%k5}
+// CHECK: encoding: [0x62,0x82,0x7d,0x0d,0xc4,0xeb]
+ vpconflictd %xmm27, %xmm21 {%k5}
+
+// CHECK: vpconflictd %xmm27, %xmm21 {%k5} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0x8d,0xc4,0xeb]
+ vpconflictd %xmm27, %xmm21 {%k5} {z}
+
+// CHECK: vpconflictd (%rcx), %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0xc4,0x29]
+ vpconflictd (%rcx), %xmm21
+
+// CHECK: vpconflictd 291(%rax,%r14,8), %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0xc4,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpconflictd 291(%rax,%r14,8), %xmm21
+
+// CHECK: vpconflictd (%rcx){1to4}, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0xc4,0x29]
+ vpconflictd (%rcx){1to4}, %xmm21
+
+// CHECK: vpconflictd 2032(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0xc4,0x6a,0x7f]
+ vpconflictd 2032(%rdx), %xmm21
+
+// CHECK: vpconflictd 2048(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0xc4,0xaa,0x00,0x08,0x00,0x00]
+ vpconflictd 2048(%rdx), %xmm21
+
+// CHECK: vpconflictd -2048(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0xc4,0x6a,0x80]
+ vpconflictd -2048(%rdx), %xmm21
+
+// CHECK: vpconflictd -2064(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0xc4,0xaa,0xf0,0xf7,0xff,0xff]
+ vpconflictd -2064(%rdx), %xmm21
+
+// CHECK: vpconflictd 508(%rdx){1to4}, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0xc4,0x6a,0x7f]
+ vpconflictd 508(%rdx){1to4}, %xmm21
+
+// CHECK: vpconflictd 512(%rdx){1to4}, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0xc4,0xaa,0x00,0x02,0x00,0x00]
+ vpconflictd 512(%rdx){1to4}, %xmm21
+
+// CHECK: vpconflictd -512(%rdx){1to4}, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0xc4,0x6a,0x80]
+ vpconflictd -512(%rdx){1to4}, %xmm21
+
+// CHECK: vpconflictd -516(%rdx){1to4}, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0xc4,0xaa,0xfc,0xfd,0xff,0xff]
+ vpconflictd -516(%rdx){1to4}, %xmm21
+
+// CHECK: vpconflictd %ymm19, %ymm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0xc4,0xcb]
+ vpconflictd %ymm19, %ymm25
+
+// CHECK: vpconflictd %ymm19, %ymm25 {%k4}
+// CHECK: encoding: [0x62,0x22,0x7d,0x2c,0xc4,0xcb]
+ vpconflictd %ymm19, %ymm25 {%k4}
+
+// CHECK: vpconflictd %ymm19, %ymm25 {%k4} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xac,0xc4,0xcb]
+ vpconflictd %ymm19, %ymm25 {%k4} {z}
+
+// CHECK: vpconflictd (%rcx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x09]
+ vpconflictd (%rcx), %ymm25
+
+// CHECK: vpconflictd 291(%rax,%r14,8), %ymm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0xc4,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpconflictd 291(%rax,%r14,8), %ymm25
+
+// CHECK: vpconflictd (%rcx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x09]
+ vpconflictd (%rcx){1to8}, %ymm25
+
+// CHECK: vpconflictd 4064(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x4a,0x7f]
+ vpconflictd 4064(%rdx), %ymm25
+
+// CHECK: vpconflictd 4096(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x8a,0x00,0x10,0x00,0x00]
+ vpconflictd 4096(%rdx), %ymm25
+
+// CHECK: vpconflictd -4096(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x4a,0x80]
+ vpconflictd -4096(%rdx), %ymm25
+
+// CHECK: vpconflictd -4128(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x8a,0xe0,0xef,0xff,0xff]
+ vpconflictd -4128(%rdx), %ymm25
+
+// CHECK: vpconflictd 508(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x4a,0x7f]
+ vpconflictd 508(%rdx){1to8}, %ymm25
+
+// CHECK: vpconflictd 512(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x8a,0x00,0x02,0x00,0x00]
+ vpconflictd 512(%rdx){1to8}, %ymm25
+
+// CHECK: vpconflictd -512(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x4a,0x80]
+ vpconflictd -512(%rdx){1to8}, %ymm25
+
+// CHECK: vpconflictd -516(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x8a,0xfc,0xfd,0xff,0xff]
+ vpconflictd -516(%rdx){1to8}, %ymm25
+
+// CHECK: vpconflictd %xmm28, %xmm27
+// CHECK: encoding: [0x62,0x02,0x7d,0x08,0xc4,0xdc]
+ vpconflictd %xmm28, %xmm27
+
+// CHECK: vpconflictd %xmm28, %xmm27 {%k3}
+// CHECK: encoding: [0x62,0x02,0x7d,0x0b,0xc4,0xdc]
+ vpconflictd %xmm28, %xmm27 {%k3}
+
+// CHECK: vpconflictd %xmm28, %xmm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0x8b,0xc4,0xdc]
+ vpconflictd %xmm28, %xmm27 {%k3} {z}
+
+// CHECK: vpconflictd (%rcx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0xc4,0x19]
+ vpconflictd (%rcx), %xmm27
+
+// CHECK: vpconflictd 4660(%rax,%r14,8), %xmm27
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0xc4,0x9c,0xf0,0x34,0x12,0x00,0x00]
+ vpconflictd 4660(%rax,%r14,8), %xmm27
+
+// CHECK: vpconflictd (%rcx){1to4}, %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0xc4,0x19]
+ vpconflictd (%rcx){1to4}, %xmm27
+
+// CHECK: vpconflictd 2032(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0xc4,0x5a,0x7f]
+ vpconflictd 2032(%rdx), %xmm27
+
+// CHECK: vpconflictd 2048(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0xc4,0x9a,0x00,0x08,0x00,0x00]
+ vpconflictd 2048(%rdx), %xmm27
+
+// CHECK: vpconflictd -2048(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0xc4,0x5a,0x80]
+ vpconflictd -2048(%rdx), %xmm27
+
+// CHECK: vpconflictd -2064(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0xc4,0x9a,0xf0,0xf7,0xff,0xff]
+ vpconflictd -2064(%rdx), %xmm27
+
+// CHECK: vpconflictd 508(%rdx){1to4}, %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0xc4,0x5a,0x7f]
+ vpconflictd 508(%rdx){1to4}, %xmm27
+
+// CHECK: vpconflictd 512(%rdx){1to4}, %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0xc4,0x9a,0x00,0x02,0x00,0x00]
+ vpconflictd 512(%rdx){1to4}, %xmm27
+
+// CHECK: vpconflictd -512(%rdx){1to4}, %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0xc4,0x5a,0x80]
+ vpconflictd -512(%rdx){1to4}, %xmm27
+
+// CHECK: vpconflictd -516(%rdx){1to4}, %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x18,0xc4,0x9a,0xfc,0xfd,0xff,0xff]
+ vpconflictd -516(%rdx){1to4}, %xmm27
+
+// CHECK: vpconflictd %ymm21, %ymm26
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0xc4,0xd5]
+ vpconflictd %ymm21, %ymm26
+
+// CHECK: vpconflictd %ymm21, %ymm26 {%k4}
+// CHECK: encoding: [0x62,0x22,0x7d,0x2c,0xc4,0xd5]
+ vpconflictd %ymm21, %ymm26 {%k4}
+
+// CHECK: vpconflictd %ymm21, %ymm26 {%k4} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xac,0xc4,0xd5]
+ vpconflictd %ymm21, %ymm26 {%k4} {z}
+
+// CHECK: vpconflictd (%rcx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x11]
+ vpconflictd (%rcx), %ymm26
+
+// CHECK: vpconflictd 4660(%rax,%r14,8), %ymm26
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0xc4,0x94,0xf0,0x34,0x12,0x00,0x00]
+ vpconflictd 4660(%rax,%r14,8), %ymm26
+
+// CHECK: vpconflictd (%rcx){1to8}, %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x11]
+ vpconflictd (%rcx){1to8}, %ymm26
+
+// CHECK: vpconflictd 4064(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x52,0x7f]
+ vpconflictd 4064(%rdx), %ymm26
+
+// CHECK: vpconflictd 4096(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x92,0x00,0x10,0x00,0x00]
+ vpconflictd 4096(%rdx), %ymm26
+
+// CHECK: vpconflictd -4096(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x52,0x80]
+ vpconflictd -4096(%rdx), %ymm26
+
+// CHECK: vpconflictd -4128(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x92,0xe0,0xef,0xff,0xff]
+ vpconflictd -4128(%rdx), %ymm26
+
+// CHECK: vpconflictd 508(%rdx){1to8}, %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x52,0x7f]
+ vpconflictd 508(%rdx){1to8}, %ymm26
+
+// CHECK: vpconflictd 512(%rdx){1to8}, %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x92,0x00,0x02,0x00,0x00]
+ vpconflictd 512(%rdx){1to8}, %ymm26
+
+// CHECK: vpconflictd -512(%rdx){1to8}, %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x52,0x80]
+ vpconflictd -512(%rdx){1to8}, %ymm26
+
+// CHECK: vpconflictd -516(%rdx){1to8}, %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x92,0xfc,0xfd,0xff,0xff]
+ vpconflictd -516(%rdx){1to8}, %ymm26
+
+// CHECK: vpbroadcastmw2d %k4, %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x3a,0xdc]
+ vpbroadcastmw2d %k4, %xmm19
+
+// CHECK: vpbroadcastmw2d %k3, %ymm24
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x3a,0xc3]
+ vpbroadcastmw2d %k3, %ymm24
+
+// CHECK: vpbroadcastmw2d %k4, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x3a,0xec]
+ vpbroadcastmw2d %k4, %xmm21
+
+// CHECK: vpbroadcastmw2d %k4, %ymm27
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x3a,0xdc]
+ vpbroadcastmw2d %k4, %ymm27
diff --git a/test/MC/X86/x86-64-avx512dq.s b/test/MC/X86/x86-64-avx512dq.s
index 4b26f7a0b80e..d0b91d69ebde 100644
--- a/test/MC/X86/x86-64-avx512dq.s
+++ b/test/MC/X86/x86-64-avx512dq.s
@@ -1391,6 +1391,470 @@
// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x51,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
vrangess $0x7b,-516(%rdx), %xmm24, %xmm25
+// CHECK: vreducepd $171, %zmm19, %zmm19
+// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x56,0xdb,0xab]
+ vreducepd $0xab, %zmm19, %zmm19
+
+// CHECK: vreducepd $171, %zmm19, %zmm19 {%k6}
+// CHECK: encoding: [0x62,0xa3,0xfd,0x4e,0x56,0xdb,0xab]
+ vreducepd $0xab, %zmm19, %zmm19 {%k6}
+
+// CHECK: vreducepd $171, %zmm19, %zmm19 {%k6} {z}
+// CHECK: encoding: [0x62,0xa3,0xfd,0xce,0x56,0xdb,0xab]
+ vreducepd $0xab, %zmm19, %zmm19 {%k6} {z}
+
+// CHECK: vreducepd $171,{sae}, %zmm19, %zmm19
+// CHECK: encoding: [0x62,0xa3,0xfd,0x18,0x56,0xdb,0xab]
+ vreducepd $0xab,{sae}, %zmm19, %zmm19
+
+// CHECK: vreducepd $123, %zmm19, %zmm19
+// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x56,0xdb,0x7b]
+ vreducepd $0x7b, %zmm19, %zmm19
+
+// CHECK: vreducepd $123,{sae}, %zmm19, %zmm19
+// CHECK: encoding: [0x62,0xa3,0xfd,0x18,0x56,0xdb,0x7b]
+ vreducepd $0x7b,{sae}, %zmm19, %zmm19
+
+// CHECK: vreducepd $123, (%rcx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x19,0x7b]
+ vreducepd $0x7b,(%rcx), %zmm19
+
+// CHECK: vreducepd $123, 291(%rax,%r14,8), %zmm19
+// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vreducepd $0x7b,291(%rax,%r14,8), %zmm19
+
+// CHECK: vreducepd $123, (%rcx){1to8}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x19,0x7b]
+ vreducepd $0x7b,(%rcx){1to8}, %zmm19
+
+// CHECK: vreducepd $123, 8128(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x5a,0x7f,0x7b]
+ vreducepd $0x7b,8128(%rdx), %zmm19
+
+// CHECK: vreducepd $123, 8192(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x9a,0x00,0x20,0x00,0x00,0x7b]
+ vreducepd $0x7b,8192(%rdx), %zmm19
+
+// CHECK: vreducepd $123, -8192(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x5a,0x80,0x7b]
+ vreducepd $0x7b,-8192(%rdx), %zmm19
+
+// CHECK: vreducepd $123, -8256(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+ vreducepd $0x7b,-8256(%rdx), %zmm19
+
+// CHECK: vreducepd $123, 1016(%rdx){1to8}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x5a,0x7f,0x7b]
+ vreducepd $0x7b,1016(%rdx){1to8}, %zmm19
+
+// CHECK: vreducepd $123, 1024(%rdx){1to8}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ vreducepd $0x7b,1024(%rdx){1to8}, %zmm19
+
+// CHECK: vreducepd $123, -1024(%rdx){1to8}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x5a,0x80,0x7b]
+ vreducepd $0x7b,-1024(%rdx){1to8}, %zmm19
+
+// CHECK: vreducepd $123, -1032(%rdx){1to8}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ vreducepd $0x7b,-1032(%rdx){1to8}, %zmm19
+
+// CHECK: vreduceps $171, %zmm29, %zmm19
+// CHECK: encoding: [0x62,0x83,0x7d,0x48,0x56,0xdd,0xab]
+ vreduceps $0xab, %zmm29, %zmm19
+
+// CHECK: vreduceps $171, %zmm29, %zmm19 {%k3}
+// CHECK: encoding: [0x62,0x83,0x7d,0x4b,0x56,0xdd,0xab]
+ vreduceps $0xab, %zmm29, %zmm19 {%k3}
+
+// CHECK: vreduceps $171, %zmm29, %zmm19 {%k3} {z}
+// CHECK: encoding: [0x62,0x83,0x7d,0xcb,0x56,0xdd,0xab]
+ vreduceps $0xab, %zmm29, %zmm19 {%k3} {z}
+
+// CHECK: vreduceps $171,{sae}, %zmm29, %zmm19
+// CHECK: encoding: [0x62,0x83,0x7d,0x18,0x56,0xdd,0xab]
+ vreduceps $0xab,{sae}, %zmm29, %zmm19
+
+// CHECK: vreduceps $123, %zmm29, %zmm19
+// CHECK: encoding: [0x62,0x83,0x7d,0x48,0x56,0xdd,0x7b]
+ vreduceps $0x7b, %zmm29, %zmm19
+
+// CHECK: vreduceps $123,{sae}, %zmm29, %zmm19
+// CHECK: encoding: [0x62,0x83,0x7d,0x18,0x56,0xdd,0x7b]
+ vreduceps $0x7b,{sae}, %zmm29, %zmm19
+
+// CHECK: vreduceps $123, (%rcx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x19,0x7b]
+ vreduceps $0x7b,(%rcx), %zmm19
+
+// CHECK: vreduceps $123, 291(%rax,%r14,8), %zmm19
+// CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vreduceps $0x7b,291(%rax,%r14,8), %zmm19
+
+// CHECK: vreduceps $123, (%rcx){1to16}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x19,0x7b]
+ vreduceps $0x7b,(%rcx){1to16}, %zmm19
+
+// CHECK: vreduceps $123, 8128(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x5a,0x7f,0x7b]
+ vreduceps $0x7b,8128(%rdx), %zmm19
+
+// CHECK: vreduceps $123, 8192(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x9a,0x00,0x20,0x00,0x00,0x7b]
+ vreduceps $0x7b,8192(%rdx), %zmm19
+
+// CHECK: vreduceps $123, -8192(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x5a,0x80,0x7b]
+ vreduceps $0x7b,-8192(%rdx), %zmm19
+
+// CHECK: vreduceps $123, -8256(%rdx), %zmm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x56,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+ vreduceps $0x7b,-8256(%rdx), %zmm19
+
+// CHECK: vreduceps $123, 508(%rdx){1to16}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x5a,0x7f,0x7b]
+ vreduceps $0x7b,508(%rdx){1to16}, %zmm19
+
+// CHECK: vreduceps $123, 512(%rdx){1to16}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x9a,0x00,0x02,0x00,0x00,0x7b]
+ vreduceps $0x7b,512(%rdx){1to16}, %zmm19
+
+// CHECK: vreduceps $123, -512(%rdx){1to16}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x5a,0x80,0x7b]
+ vreduceps $0x7b,-512(%rdx){1to16}, %zmm19
+
+// CHECK: vreduceps $123, -516(%rdx){1to16}, %zmm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x58,0x56,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+ vreduceps $0x7b,-516(%rdx){1to16}, %zmm19
+
+// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17
+// CHECK: encoding: [0x62,0x83,0xf5,0x00,0x57,0xc9,0xab]
+ vreducesd $0xab, %xmm25, %xmm17, %xmm17
+
+// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17 {%k6}
+// CHECK: encoding: [0x62,0x83,0xf5,0x06,0x57,0xc9,0xab]
+ vreducesd $0xab, %xmm25, %xmm17, %xmm17 {%k6}
+
+// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17 {%k6} {z}
+// CHECK: encoding: [0x62,0x83,0xf5,0x86,0x57,0xc9,0xab]
+ vreducesd $0xab, %xmm25, %xmm17, %xmm17 {%k6} {z}
+
+// CHECK: vreducesd $171,{sae}, %xmm25, %xmm17, %xmm17
+// CHECK: encoding: [0x62,0x83,0xf5,0x10,0x57,0xc9,0xab]
+ vreducesd $0xab,{sae}, %xmm25, %xmm17, %xmm17
+
+// CHECK: vreducesd $123, %xmm25, %xmm17, %xmm17
+// CHECK: encoding: [0x62,0x83,0xf5,0x00,0x57,0xc9,0x7b]
+ vreducesd $0x7b, %xmm25, %xmm17, %xmm17
+
+// CHECK: vreducesd $123,{sae}, %xmm25, %xmm17, %xmm17
+// CHECK: encoding: [0x62,0x83,0xf5,0x10,0x57,0xc9,0x7b]
+ vreducesd $0x7b,{sae}, %xmm25, %xmm17, %xmm17
+
+// CHECK: vreducesd $123, (%rcx), %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x09,0x7b]
+ vreducesd $0x7b,(%rcx), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, 291(%rax,%r14,8), %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xa3,0xf5,0x00,0x57,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vreducesd $0x7b,291(%rax,%r14,8), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, 1016(%rdx), %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x4a,0x7f,0x7b]
+ vreducesd $0x7b,1016(%rdx), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, 1024(%rdx), %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vreducesd $0x7b,1024(%rdx), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, -1024(%rdx), %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x4a,0x80,0x7b]
+ vreducesd $0x7b,-1024(%rdx), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, -1032(%rdx), %xmm17, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xf5,0x00,0x57,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vreducesd $0x7b,-1032(%rdx), %xmm17, %xmm17
+
+// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x23,0x15,0x00,0x57,0xf5,0xab]
+ vreducess $0xab, %xmm21, %xmm29, %xmm30
+
+// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30 {%k1}
+// CHECK: encoding: [0x62,0x23,0x15,0x01,0x57,0xf5,0xab]
+ vreducess $0xab, %xmm21, %xmm29, %xmm30 {%k1}
+
+// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30 {%k1} {z}
+// CHECK: encoding: [0x62,0x23,0x15,0x81,0x57,0xf5,0xab]
+ vreducess $0xab, %xmm21, %xmm29, %xmm30 {%k1} {z}
+
+// CHECK: vreducess $171,{sae}, %xmm21, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x23,0x15,0x10,0x57,0xf5,0xab]
+ vreducess $0xab,{sae}, %xmm21, %xmm29, %xmm30
+
+// CHECK: vreducess $123, %xmm21, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x23,0x15,0x00,0x57,0xf5,0x7b]
+ vreducess $0x7b, %xmm21, %xmm29, %xmm30
+
+// CHECK: vreducess $123,{sae}, %xmm21, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x23,0x15,0x10,0x57,0xf5,0x7b]
+ vreducess $0x7b,{sae}, %xmm21, %xmm29, %xmm30
+
+// CHECK: vreducess $123, (%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0x31,0x7b]
+ vreducess $0x7b,(%rcx), %xmm29, %xmm30
+
+// CHECK: vreducess $123, 291(%rax,%r14,8), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x23,0x15,0x00,0x57,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vreducess $0x7b,291(%rax,%r14,8), %xmm29, %xmm30
+
+// CHECK: vreducess $123, 508(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0x72,0x7f,0x7b]
+ vreducess $0x7b,508(%rdx), %xmm29, %xmm30
+
+// CHECK: vreducess $123, 512(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0xb2,0x00,0x02,0x00,0x00,0x7b]
+ vreducess $0x7b,512(%rdx), %xmm29, %xmm30
+
+// CHECK: vreducess $123, -512(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0x72,0x80,0x7b]
+ vreducess $0x7b,-512(%rdx), %xmm29, %xmm30
+
+// CHECK: vreducess $123, -516(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x63,0x15,0x00,0x57,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+ vreducess $0x7b,-516(%rdx), %xmm29, %xmm30
+
+// CHECK: vreducepd $171, %zmm28, %zmm18
+// CHECK: encoding: [0x62,0x83,0xfd,0x48,0x56,0xd4,0xab]
+ vreducepd $0xab, %zmm28, %zmm18
+
+// CHECK: vreducepd $171, %zmm28, %zmm18 {%k5}
+// CHECK: encoding: [0x62,0x83,0xfd,0x4d,0x56,0xd4,0xab]
+ vreducepd $0xab, %zmm28, %zmm18 {%k5}
+
+// CHECK: vreducepd $171, %zmm28, %zmm18 {%k5} {z}
+// CHECK: encoding: [0x62,0x83,0xfd,0xcd,0x56,0xd4,0xab]
+ vreducepd $0xab, %zmm28, %zmm18 {%k5} {z}
+
+// CHECK: vreducepd $171,{sae}, %zmm28, %zmm18
+// CHECK: encoding: [0x62,0x83,0xfd,0x18,0x56,0xd4,0xab]
+ vreducepd $0xab,{sae}, %zmm28, %zmm18
+
+// CHECK: vreducepd $123, %zmm28, %zmm18
+// CHECK: encoding: [0x62,0x83,0xfd,0x48,0x56,0xd4,0x7b]
+ vreducepd $0x7b, %zmm28, %zmm18
+
+// CHECK: vreducepd $123,{sae}, %zmm28, %zmm18
+// CHECK: encoding: [0x62,0x83,0xfd,0x18,0x56,0xd4,0x7b]
+ vreducepd $0x7b,{sae}, %zmm28, %zmm18
+
+// CHECK: vreducepd $123, (%rcx), %zmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x11,0x7b]
+ vreducepd $0x7b,(%rcx), %zmm18
+
+// CHECK: vreducepd $123, 4660(%rax,%r14,8), %zmm18
+// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x56,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vreducepd $0x7b,4660(%rax,%r14,8), %zmm18
+
+// CHECK: vreducepd $123, (%rcx){1to8}, %zmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x11,0x7b]
+ vreducepd $0x7b,(%rcx){1to8}, %zmm18
+
+// CHECK: vreducepd $123, 8128(%rdx), %zmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x52,0x7f,0x7b]
+ vreducepd $0x7b,8128(%rdx), %zmm18
+
+// CHECK: vreducepd $123, 8192(%rdx), %zmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vreducepd $0x7b,8192(%rdx), %zmm18
+
+// CHECK: vreducepd $123, -8192(%rdx), %zmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x52,0x80,0x7b]
+ vreducepd $0x7b,-8192(%rdx), %zmm18
+
+// CHECK: vreducepd $123, -8256(%rdx), %zmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x56,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vreducepd $0x7b,-8256(%rdx), %zmm18
+
+// CHECK: vreducepd $123, 1016(%rdx){1to8}, %zmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x52,0x7f,0x7b]
+ vreducepd $0x7b,1016(%rdx){1to8}, %zmm18
+
+// CHECK: vreducepd $123, 1024(%rdx){1to8}, %zmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vreducepd $0x7b,1024(%rdx){1to8}, %zmm18
+
+// CHECK: vreducepd $123, -1024(%rdx){1to8}, %zmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x52,0x80,0x7b]
+ vreducepd $0x7b,-1024(%rdx){1to8}, %zmm18
+
+// CHECK: vreducepd $123, -1032(%rdx){1to8}, %zmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x58,0x56,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vreducepd $0x7b,-1032(%rdx){1to8}, %zmm18
+
+// CHECK: vreduceps $171, %zmm25, %zmm26
+// CHECK: encoding: [0x62,0x03,0x7d,0x48,0x56,0xd1,0xab]
+ vreduceps $0xab, %zmm25, %zmm26
+
+// CHECK: vreduceps $171, %zmm25, %zmm26 {%k3}
+// CHECK: encoding: [0x62,0x03,0x7d,0x4b,0x56,0xd1,0xab]
+ vreduceps $0xab, %zmm25, %zmm26 {%k3}
+
+// CHECK: vreduceps $171, %zmm25, %zmm26 {%k3} {z}
+// CHECK: encoding: [0x62,0x03,0x7d,0xcb,0x56,0xd1,0xab]
+ vreduceps $0xab, %zmm25, %zmm26 {%k3} {z}
+
+// CHECK: vreduceps $171,{sae}, %zmm25, %zmm26
+// CHECK: encoding: [0x62,0x03,0x7d,0x18,0x56,0xd1,0xab]
+ vreduceps $0xab,{sae}, %zmm25, %zmm26
+
+// CHECK: vreduceps $123, %zmm25, %zmm26
+// CHECK: encoding: [0x62,0x03,0x7d,0x48,0x56,0xd1,0x7b]
+ vreduceps $0x7b, %zmm25, %zmm26
+
+// CHECK: vreduceps $123,{sae}, %zmm25, %zmm26
+// CHECK: encoding: [0x62,0x03,0x7d,0x18,0x56,0xd1,0x7b]
+ vreduceps $0x7b,{sae}, %zmm25, %zmm26
+
+// CHECK: vreduceps $123, (%rcx), %zmm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x11,0x7b]
+ vreduceps $0x7b,(%rcx), %zmm26
+
+// CHECK: vreduceps $123, 4660(%rax,%r14,8), %zmm26
+// CHECK: encoding: [0x62,0x23,0x7d,0x48,0x56,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vreduceps $0x7b,4660(%rax,%r14,8), %zmm26
+
+// CHECK: vreduceps $123, (%rcx){1to16}, %zmm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x11,0x7b]
+ vreduceps $0x7b,(%rcx){1to16}, %zmm26
+
+// CHECK: vreduceps $123, 8128(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x52,0x7f,0x7b]
+ vreduceps $0x7b,8128(%rdx), %zmm26
+
+// CHECK: vreduceps $123, 8192(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vreduceps $0x7b,8192(%rdx), %zmm26
+
+// CHECK: vreduceps $123, -8192(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x52,0x80,0x7b]
+ vreduceps $0x7b,-8192(%rdx), %zmm26
+
+// CHECK: vreduceps $123, -8256(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x56,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vreduceps $0x7b,-8256(%rdx), %zmm26
+
+// CHECK: vreduceps $123, 508(%rdx){1to16}, %zmm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x52,0x7f,0x7b]
+ vreduceps $0x7b,508(%rdx){1to16}, %zmm26
+
+// CHECK: vreduceps $123, 512(%rdx){1to16}, %zmm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x92,0x00,0x02,0x00,0x00,0x7b]
+ vreduceps $0x7b,512(%rdx){1to16}, %zmm26
+
+// CHECK: vreduceps $123, -512(%rdx){1to16}, %zmm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x52,0x80,0x7b]
+ vreduceps $0x7b,-512(%rdx){1to16}, %zmm26
+
+// CHECK: vreduceps $123, -516(%rdx){1to16}, %zmm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x58,0x56,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+ vreduceps $0x7b,-516(%rdx){1to16}, %zmm26
+
+// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x03,0xe5,0x00,0x57,0xc8,0xab]
+ vreducesd $0xab, %xmm24, %xmm19, %xmm25
+
+// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25 {%k3}
+// CHECK: encoding: [0x62,0x03,0xe5,0x03,0x57,0xc8,0xab]
+ vreducesd $0xab, %xmm24, %xmm19, %xmm25 {%k3}
+
+// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25 {%k3} {z}
+// CHECK: encoding: [0x62,0x03,0xe5,0x83,0x57,0xc8,0xab]
+ vreducesd $0xab, %xmm24, %xmm19, %xmm25 {%k3} {z}
+
+// CHECK: vreducesd $171,{sae}, %xmm24, %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x03,0xe5,0x10,0x57,0xc8,0xab]
+ vreducesd $0xab,{sae}, %xmm24, %xmm19, %xmm25
+
+// CHECK: vreducesd $123, %xmm24, %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x03,0xe5,0x00,0x57,0xc8,0x7b]
+ vreducesd $0x7b, %xmm24, %xmm19, %xmm25
+
+// CHECK: vreducesd $123,{sae}, %xmm24, %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x03,0xe5,0x10,0x57,0xc8,0x7b]
+ vreducesd $0x7b,{sae}, %xmm24, %xmm19, %xmm25
+
+// CHECK: vreducesd $123, (%rcx), %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x09,0x7b]
+ vreducesd $0x7b,(%rcx), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, 4660(%rax,%r14,8), %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x23,0xe5,0x00,0x57,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vreducesd $0x7b,4660(%rax,%r14,8), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, 1016(%rdx), %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x4a,0x7f,0x7b]
+ vreducesd $0x7b,1016(%rdx), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, 1024(%rdx), %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vreducesd $0x7b,1024(%rdx), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, -1024(%rdx), %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x4a,0x80,0x7b]
+ vreducesd $0x7b,-1024(%rdx), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, -1032(%rdx), %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x57,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vreducesd $0x7b,-1032(%rdx), %xmm19, %xmm25
+
+// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30
+// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x57,0xf5,0xab]
+ vreducess $0xab, %xmm21, %xmm24, %xmm30
+
+// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30 {%k2}
+// CHECK: encoding: [0x62,0x23,0x3d,0x02,0x57,0xf5,0xab]
+ vreducess $0xab, %xmm21, %xmm24, %xmm30 {%k2}
+
+// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30 {%k2} {z}
+// CHECK: encoding: [0x62,0x23,0x3d,0x82,0x57,0xf5,0xab]
+ vreducess $0xab, %xmm21, %xmm24, %xmm30 {%k2} {z}
+
+// CHECK: vreducess $171,{sae}, %xmm21, %xmm24, %xmm30
+// CHECK: encoding: [0x62,0x23,0x3d,0x10,0x57,0xf5,0xab]
+ vreducess $0xab,{sae}, %xmm21, %xmm24, %xmm30
+
+// CHECK: vreducess $123, %xmm21, %xmm24, %xmm30
+// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x57,0xf5,0x7b]
+ vreducess $0x7b, %xmm21, %xmm24, %xmm30
+
+// CHECK: vreducess $123,{sae}, %xmm21, %xmm24, %xmm30
+// CHECK: encoding: [0x62,0x23,0x3d,0x10,0x57,0xf5,0x7b]
+ vreducess $0x7b,{sae}, %xmm21, %xmm24, %xmm30
+
+// CHECK: vreducess $123, (%rcx), %xmm24, %xmm30
+// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0x31,0x7b]
+ vreducess $0x7b,(%rcx), %xmm24, %xmm30
+
+// CHECK: vreducess $123, 4660(%rax,%r14,8), %xmm24, %xmm30
+// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x57,0xb4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vreducess $0x7b,4660(%rax,%r14,8), %xmm24, %xmm30
+
+// CHECK: vreducess $123, 508(%rdx), %xmm24, %xmm30
+// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0x72,0x7f,0x7b]
+ vreducess $0x7b,508(%rdx), %xmm24, %xmm30
+
+// CHECK: vreducess $123, 512(%rdx), %xmm24, %xmm30
+// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0xb2,0x00,0x02,0x00,0x00,0x7b]
+ vreducess $0x7b,512(%rdx), %xmm24, %xmm30
+
+// CHECK: vreducess $123, -512(%rdx), %xmm24, %xmm30
+// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0x72,0x80,0x7b]
+ vreducess $0x7b,-512(%rdx), %xmm24, %xmm30
+
+// CHECK: vreducess $123, -516(%rdx), %xmm24, %xmm30
+// CHECK: encoding: [0x62,0x63,0x3d,0x00,0x57,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+ vreducess $0x7b,-516(%rdx), %xmm24, %xmm30
+
// CHECK: vcvtpd2qq %zmm29, %zmm18
// CHECK: encoding: [0x62,0x81,0xfd,0x48,0x7b,0xd5]
vcvtpd2qq %zmm29, %zmm18
@@ -1823,6 +2287,78 @@
// CHECK: encoding: [0x62,0xe1,0xfc,0x58,0x5b,0xa2,0xf8,0xfb,0xff,0xff]
vcvtqq2ps -1032(%rdx){1to8}, %ymm20
+// CHECK: vcvtqq2ps %zmm19, %ymm28
+// CHECK: encoding: [0x62,0x21,0xfc,0x48,0x5b,0xe3]
+ vcvtqq2ps %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps %zmm19, %ymm28 {%k3}
+// CHECK: encoding: [0x62,0x21,0xfc,0x4b,0x5b,0xe3]
+ vcvtqq2ps %zmm19, %ymm28 {%k3}
+
+// CHECK: vcvtqq2ps %zmm19, %ymm28 {%k3} {z}
+// CHECK: encoding: [0x62,0x21,0xfc,0xcb,0x5b,0xe3]
+ vcvtqq2ps %zmm19, %ymm28 {%k3} {z}
+
+// CHECK: vcvtqq2ps {rn-sae}, %zmm19, %ymm28
+// CHECK: encoding: [0x62,0x21,0xfc,0x18,0x5b,0xe3]
+ vcvtqq2ps {rn-sae}, %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps {ru-sae}, %zmm19, %ymm28
+// CHECK: encoding: [0x62,0x21,0xfc,0x58,0x5b,0xe3]
+ vcvtqq2ps {ru-sae}, %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps {rd-sae}, %zmm19, %ymm28
+// CHECK: encoding: [0x62,0x21,0xfc,0x38,0x5b,0xe3]
+ vcvtqq2ps {rd-sae}, %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps {rz-sae}, %zmm19, %ymm28
+// CHECK: encoding: [0x62,0x21,0xfc,0x78,0x5b,0xe3]
+ vcvtqq2ps {rz-sae}, %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps (%rcx), %ymm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x48,0x5b,0x21]
+ vcvtqq2ps (%rcx), %ymm28
+
+// CHECK: vcvtqq2ps 4660(%rax,%r14,8), %ymm28
+// CHECK: encoding: [0x62,0x21,0xfc,0x48,0x5b,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vcvtqq2ps 4660(%rax,%r14,8), %ymm28
+
+// CHECK: vcvtqq2ps (%rcx){1to8}, %ymm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x58,0x5b,0x21]
+ vcvtqq2ps (%rcx){1to8}, %ymm28
+
+// CHECK: vcvtqq2ps 8128(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x48,0x5b,0x62,0x7f]
+ vcvtqq2ps 8128(%rdx), %ymm28
+
+// CHECK: vcvtqq2ps 8192(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x48,0x5b,0xa2,0x00,0x20,0x00,0x00]
+ vcvtqq2ps 8192(%rdx), %ymm28
+
+// CHECK: vcvtqq2ps -8192(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x48,0x5b,0x62,0x80]
+ vcvtqq2ps -8192(%rdx), %ymm28
+
+// CHECK: vcvtqq2ps -8256(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x48,0x5b,0xa2,0xc0,0xdf,0xff,0xff]
+ vcvtqq2ps -8256(%rdx), %ymm28
+
+// CHECK: vcvtqq2ps 1016(%rdx){1to8}, %ymm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x58,0x5b,0x62,0x7f]
+ vcvtqq2ps 1016(%rdx){1to8}, %ymm28
+
+// CHECK: vcvtqq2ps 1024(%rdx){1to8}, %ymm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x58,0x5b,0xa2,0x00,0x04,0x00,0x00]
+ vcvtqq2ps 1024(%rdx){1to8}, %ymm28
+
+// CHECK: vcvtqq2ps -1024(%rdx){1to8}, %ymm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x58,0x5b,0x62,0x80]
+ vcvtqq2ps -1024(%rdx){1to8}, %ymm28
+
+// CHECK: vcvtqq2ps -1032(%rdx){1to8}, %ymm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x58,0x5b,0xa2,0xf8,0xfb,0xff,0xff]
+ vcvtqq2ps -1032(%rdx){1to8}, %ymm28
+
// CHECK: vcvtuqq2pd %zmm29, %zmm21
// CHECK: encoding: [0x62,0x81,0xfe,0x48,0x7a,0xed]
vcvtuqq2pd %zmm29, %zmm21
@@ -1907,3 +2443,1696 @@
// CHECK: encoding: [0x62,0xa1,0xff,0xca,0x7a,0xd5]
vcvtuqq2ps %zmm21, %ymm18 {%k2} {z}
+// CHECK: vpextrd $171, %xmm28, %eax
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x16,0xe0,0xab]
+ vpextrd $0xab, %xmm28, %eax
+
+// CHECK: vpextrd $123, %xmm28, %eax
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x16,0xe0,0x7b]
+ vpextrd $0x7b, %xmm28, %eax
+
+// CHECK: vpextrd $123, %xmm28, %ebp
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x16,0xe5,0x7b]
+ vpextrd $0x7b, %xmm28, %ebp
+
+// CHECK: vpextrd $123, %xmm28, %r13d
+// CHECK: encoding: [0x62,0x43,0x7d,0x08,0x16,0xe5,0x7b]
+ vpextrd $0x7b, %xmm28, %r13d
+
+// CHECK: vpextrd $123, %xmm28, (%rcx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x16,0x21,0x7b]
+ vpextrd $0x7b, %xmm28, (%rcx)
+
+// CHECK: vpextrd $123, %xmm28, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x16,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpextrd $0x7b, %xmm28, 291(%rax,%r14,8)
+
+// CHECK: vpextrd $123, %xmm28, 508(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x16,0x62,0x7f,0x7b]
+ vpextrd $0x7b, %xmm28, 508(%rdx)
+
+// CHECK: vpextrd $123, %xmm28, 512(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x16,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vpextrd $0x7b, %xmm28, 512(%rdx)
+
+// CHECK: vpextrd $123, %xmm28, -512(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x16,0x62,0x80,0x7b]
+ vpextrd $0x7b, %xmm28, -512(%rdx)
+
+// CHECK: vpextrd $123, %xmm28, -516(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x16,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vpextrd $0x7b, %xmm28, -516(%rdx)
+
+// CHECK: vpextrd $171, %xmm20, %eax
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x16,0xe0,0xab]
+ vpextrd $0xab, %xmm20, %eax
+
+// CHECK: vpextrd $123, %xmm20, %eax
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x16,0xe0,0x7b]
+ vpextrd $0x7b, %xmm20, %eax
+
+// CHECK: vpextrd $123, %xmm20, %ebp
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x16,0xe5,0x7b]
+ vpextrd $0x7b, %xmm20, %ebp
+
+// CHECK: vpextrd $123, %xmm20, %r13d
+// CHECK: encoding: [0x62,0xc3,0x7d,0x08,0x16,0xe5,0x7b]
+ vpextrd $0x7b, %xmm20, %r13d
+
+// CHECK: vpextrd $123, %xmm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x16,0x21,0x7b]
+ vpextrd $0x7b, %xmm20, (%rcx)
+
+// CHECK: vpextrd $123, %xmm20, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x16,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpextrd $0x7b, %xmm20, 4660(%rax,%r14,8)
+
+// CHECK: vpextrd $123, %xmm20, 508(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x16,0x62,0x7f,0x7b]
+ vpextrd $0x7b, %xmm20, 508(%rdx)
+
+// CHECK: vpextrd $123, %xmm20, 512(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x16,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vpextrd $0x7b, %xmm20, 512(%rdx)
+
+// CHECK: vpextrd $123, %xmm20, -512(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x16,0x62,0x80,0x7b]
+ vpextrd $0x7b, %xmm20, -512(%rdx)
+
+// CHECK: vpextrd $123, %xmm20, -516(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x16,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vpextrd $0x7b, %xmm20, -516(%rdx)
+
+// CHECK: vpextrq $171, %xmm24, %rax
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x16,0xc0,0xab]
+ vpextrq $0xab, %xmm24, %rax
+
+// CHECK: vpextrq $123, %xmm24, %rax
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x16,0xc0,0x7b]
+ vpextrq $0x7b, %xmm24, %rax
+
+// CHECK: vpextrq $123, %xmm24, %r8
+// CHECK: encoding: [0x62,0x43,0xfd,0x08,0x16,0xc0,0x7b]
+ vpextrq $0x7b, %xmm24, %r8
+
+// CHECK: vpextrq $123, %xmm24, (%rcx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x16,0x01,0x7b]
+ vpextrq $0x7b, %xmm24, (%rcx)
+
+// CHECK: vpextrq $123, %xmm24, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x16,0x84,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpextrq $0x7b, %xmm24, 291(%rax,%r14,8)
+
+// CHECK: vpextrq $123, %xmm24, 1016(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x16,0x42,0x7f,0x7b]
+ vpextrq $0x7b, %xmm24, 1016(%rdx)
+
+// CHECK: vpextrq $123, %xmm24, 1024(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x16,0x82,0x00,0x04,0x00,0x00,0x7b]
+ vpextrq $0x7b, %xmm24, 1024(%rdx)
+
+// CHECK: vpextrq $123, %xmm24, -1024(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x16,0x42,0x80,0x7b]
+ vpextrq $0x7b, %xmm24, -1024(%rdx)
+
+// CHECK: vpextrq $123, %xmm24, -1032(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x16,0x82,0xf8,0xfb,0xff,0xff,0x7b]
+ vpextrq $0x7b, %xmm24, -1032(%rdx)
+
+// CHECK: vpextrq $171, %xmm20, %rax
+// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x16,0xe0,0xab]
+ vpextrq $0xab, %xmm20, %rax
+
+// CHECK: vpextrq $123, %xmm20, %rax
+// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x16,0xe0,0x7b]
+ vpextrq $0x7b, %xmm20, %rax
+
+// CHECK: vpextrq $123, %xmm20, %r8
+// CHECK: encoding: [0x62,0xc3,0xfd,0x08,0x16,0xe0,0x7b]
+ vpextrq $0x7b, %xmm20, %r8
+
+// CHECK: vpextrq $123, %xmm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x16,0x21,0x7b]
+ vpextrq $0x7b, %xmm20, (%rcx)
+
+// CHECK: vpextrq $123, %xmm20, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0xfd,0x08,0x16,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpextrq $0x7b, %xmm20, 4660(%rax,%r14,8)
+
+// CHECK: vpextrq $123, %xmm20, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x16,0x62,0x7f,0x7b]
+ vpextrq $0x7b, %xmm20, 1016(%rdx)
+
+// CHECK: vpextrq $123, %xmm20, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x16,0xa2,0x00,0x04,0x00,0x00,0x7b]
+ vpextrq $0x7b, %xmm20, 1024(%rdx)
+
+// CHECK: vpextrq $123, %xmm20, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x16,0x62,0x80,0x7b]
+ vpextrq $0x7b, %xmm20, -1024(%rdx)
+
+// CHECK: vpextrq $123, %xmm20, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x16,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+ vpextrq $0x7b, %xmm20, -1032(%rdx)
+
+// CHECK: vpinsrd $171, %eax, %xmm25, %xmm23
+// CHECK: encoding: [0x62,0xe3,0x35,0x00,0x22,0xf8,0xab]
+ vpinsrd $0xab,%eax, %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, %eax, %xmm25, %xmm23
+// CHECK: encoding: [0x62,0xe3,0x35,0x00,0x22,0xf8,0x7b]
+ vpinsrd $0x7b,%eax, %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, %ebp, %xmm25, %xmm23
+// CHECK: encoding: [0x62,0xe3,0x35,0x00,0x22,0xfd,0x7b]
+ vpinsrd $0x7b,%ebp, %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, %r13d, %xmm25, %xmm23
+// CHECK: encoding: [0x62,0xc3,0x35,0x00,0x22,0xfd,0x7b]
+ vpinsrd $0x7b,%r13d, %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, (%rcx), %xmm25, %xmm23
+// CHECK: encoding: [0x62,0xe3,0x35,0x00,0x22,0x39,0x7b]
+ vpinsrd $0x7b,(%rcx), %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, 291(%rax,%r14,8), %xmm25, %xmm23
+// CHECK: encoding: [0x62,0xa3,0x35,0x00,0x22,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpinsrd $0x7b,291(%rax,%r14,8), %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, 508(%rdx), %xmm25, %xmm23
+// CHECK: encoding: [0x62,0xe3,0x35,0x00,0x22,0x7a,0x7f,0x7b]
+ vpinsrd $0x7b,508(%rdx), %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, 512(%rdx), %xmm25, %xmm23
+// CHECK: encoding: [0x62,0xe3,0x35,0x00,0x22,0xba,0x00,0x02,0x00,0x00,0x7b]
+ vpinsrd $0x7b,512(%rdx), %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, -512(%rdx), %xmm25, %xmm23
+// CHECK: encoding: [0x62,0xe3,0x35,0x00,0x22,0x7a,0x80,0x7b]
+ vpinsrd $0x7b,-512(%rdx), %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, -516(%rdx), %xmm25, %xmm23
+// CHECK: encoding: [0x62,0xe3,0x35,0x00,0x22,0xba,0xfc,0xfd,0xff,0xff,0x7b]
+ vpinsrd $0x7b,-516(%rdx), %xmm25, %xmm23
+
+// CHECK: vpinsrd $171, %eax, %xmm29, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x15,0x00,0x22,0xf0,0xab]
+ vpinsrd $0xab,%eax, %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, %eax, %xmm29, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x15,0x00,0x22,0xf0,0x7b]
+ vpinsrd $0x7b,%eax, %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, %ebp, %xmm29, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x15,0x00,0x22,0xf5,0x7b]
+ vpinsrd $0x7b,%ebp, %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, %r13d, %xmm29, %xmm22
+// CHECK: encoding: [0x62,0xc3,0x15,0x00,0x22,0xf5,0x7b]
+ vpinsrd $0x7b,%r13d, %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, (%rcx), %xmm29, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x15,0x00,0x22,0x31,0x7b]
+ vpinsrd $0x7b,(%rcx), %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, 4660(%rax,%r14,8), %xmm29, %xmm22
+// CHECK: encoding: [0x62,0xa3,0x15,0x00,0x22,0xb4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpinsrd $0x7b,4660(%rax,%r14,8), %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, 508(%rdx), %xmm29, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x15,0x00,0x22,0x72,0x7f,0x7b]
+ vpinsrd $0x7b,508(%rdx), %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, 512(%rdx), %xmm29, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x15,0x00,0x22,0xb2,0x00,0x02,0x00,0x00,0x7b]
+ vpinsrd $0x7b,512(%rdx), %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, -512(%rdx), %xmm29, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x15,0x00,0x22,0x72,0x80,0x7b]
+ vpinsrd $0x7b,-512(%rdx), %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, -516(%rdx), %xmm29, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x15,0x00,0x22,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+ vpinsrd $0x7b,-516(%rdx), %xmm29, %xmm22
+
+// CHECK: vpinsrq $171, %rax, %xmm20, %xmm22
+// CHECK: encoding: [0x62,0xe3,0xdd,0x00,0x22,0xf0,0xab]
+ vpinsrq $0xab,%rax, %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, %rax, %xmm20, %xmm22
+// CHECK: encoding: [0x62,0xe3,0xdd,0x00,0x22,0xf0,0x7b]
+ vpinsrq $0x7b,%rax, %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, %r8, %xmm20, %xmm22
+// CHECK: encoding: [0x62,0xc3,0xdd,0x00,0x22,0xf0,0x7b]
+ vpinsrq $0x7b,%r8, %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, (%rcx), %xmm20, %xmm22
+// CHECK: encoding: [0x62,0xe3,0xdd,0x00,0x22,0x31,0x7b]
+ vpinsrq $0x7b,(%rcx), %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, 291(%rax,%r14,8), %xmm20, %xmm22
+// CHECK: encoding: [0x62,0xa3,0xdd,0x00,0x22,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpinsrq $0x7b,291(%rax,%r14,8), %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, 1016(%rdx), %xmm20, %xmm22
+// CHECK: encoding: [0x62,0xe3,0xdd,0x00,0x22,0x72,0x7f,0x7b]
+ vpinsrq $0x7b,1016(%rdx), %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, 1024(%rdx), %xmm20, %xmm22
+// CHECK: encoding: [0x62,0xe3,0xdd,0x00,0x22,0xb2,0x00,0x04,0x00,0x00,0x7b]
+ vpinsrq $0x7b,1024(%rdx), %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, -1024(%rdx), %xmm20, %xmm22
+// CHECK: encoding: [0x62,0xe3,0xdd,0x00,0x22,0x72,0x80,0x7b]
+ vpinsrq $0x7b,-1024(%rdx), %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, -1032(%rdx), %xmm20, %xmm22
+// CHECK: encoding: [0x62,0xe3,0xdd,0x00,0x22,0xb2,0xf8,0xfb,0xff,0xff,0x7b]
+ vpinsrq $0x7b,-1032(%rdx), %xmm20, %xmm22
+
+// CHECK: vpinsrq $171, %rax, %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x22,0xc8,0xab]
+ vpinsrq $0xab,%rax, %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, %rax, %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x22,0xc8,0x7b]
+ vpinsrq $0x7b,%rax, %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, %r8, %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x43,0xe5,0x00,0x22,0xc8,0x7b]
+ vpinsrq $0x7b,%r8, %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, (%rcx), %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x22,0x09,0x7b]
+ vpinsrq $0x7b,(%rcx), %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, 4660(%rax,%r14,8), %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x23,0xe5,0x00,0x22,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vpinsrq $0x7b,4660(%rax,%r14,8), %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, 1016(%rdx), %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x22,0x4a,0x7f,0x7b]
+ vpinsrq $0x7b,1016(%rdx), %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, 1024(%rdx), %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x22,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vpinsrq $0x7b,1024(%rdx), %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, -1024(%rdx), %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x22,0x4a,0x80,0x7b]
+ vpinsrq $0x7b,-1024(%rdx), %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, -1032(%rdx), %xmm19, %xmm25
+// CHECK: encoding: [0x62,0x63,0xe5,0x00,0x22,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vpinsrq $0x7b,-1032(%rdx), %xmm19, %xmm25
+
+// CHECK: vinsertf32x8 $171, %ymm24, %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x03,0x75,0x40,0x1a,0xe8,0xab]
+ vinsertf32x8 $0xab, %ymm24, %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $171, %ymm24, %zmm17, %zmm29 {%k3}
+// CHECK: encoding: [0x62,0x03,0x75,0x43,0x1a,0xe8,0xab]
+ vinsertf32x8 $0xab, %ymm24, %zmm17, %zmm29 {%k3}
+
+// CHECK: vinsertf32x8 $171, %ymm24, %zmm17, %zmm29 {%k3} {z}
+// CHECK: encoding: [0x62,0x03,0x75,0xc3,0x1a,0xe8,0xab]
+ vinsertf32x8 $0xab, %ymm24, %zmm17, %zmm29 {%k3} {z}
+
+// CHECK: vinsertf32x8 $123, %ymm24, %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x03,0x75,0x40,0x1a,0xe8,0x7b]
+ vinsertf32x8 $0x7b, %ymm24, %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, (%rcx), %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0x29,0x7b]
+ vinsertf32x8 $0x7b,(%rcx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, 291(%rax,%r14,8), %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x23,0x75,0x40,0x1a,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinsertf32x8 $0x7b,291(%rax,%r14,8), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4064(%rdx), %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0x6a,0x7f,0x7b]
+ vinsertf32x8 $0x7b,4064(%rdx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4096(%rdx), %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0xaa,0x00,0x10,0x00,0x00,0x7b]
+ vinsertf32x8 $0x7b,4096(%rdx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, -4096(%rdx), %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0x6a,0x80,0x7b]
+ vinsertf32x8 $0x7b,-4096(%rdx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, -4128(%rdx), %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+ vinsertf32x8 $0x7b,-4128(%rdx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $171, %ymm22, %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x23,0x1d,0x40,0x1a,0xee,0xab]
+ vinsertf32x8 $0xab, %ymm22, %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $171, %ymm22, %zmm28, %zmm29 {%k5}
+// CHECK: encoding: [0x62,0x23,0x1d,0x45,0x1a,0xee,0xab]
+ vinsertf32x8 $0xab, %ymm22, %zmm28, %zmm29 {%k5}
+
+// CHECK: vinsertf32x8 $171, %ymm22, %zmm28, %zmm29 {%k5} {z}
+// CHECK: encoding: [0x62,0x23,0x1d,0xc5,0x1a,0xee,0xab]
+ vinsertf32x8 $0xab, %ymm22, %zmm28, %zmm29 {%k5} {z}
+
+// CHECK: vinsertf32x8 $123, %ymm22, %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x23,0x1d,0x40,0x1a,0xee,0x7b]
+ vinsertf32x8 $0x7b, %ymm22, %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, (%rcx), %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0x29,0x7b]
+ vinsertf32x8 $0x7b,(%rcx), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4660(%rax,%r14,8), %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x23,0x1d,0x40,0x1a,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vinsertf32x8 $0x7b,4660(%rax,%r14,8), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4064(%rdx), %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0x6a,0x7f,0x7b]
+ vinsertf32x8 $0x7b,4064(%rdx), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4096(%rdx), %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0xaa,0x00,0x10,0x00,0x00,0x7b]
+ vinsertf32x8 $0x7b,4096(%rdx), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, -4096(%rdx), %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0x6a,0x80,0x7b]
+ vinsertf32x8 $0x7b,-4096(%rdx), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, -4128(%rdx), %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+ vinsertf32x8 $0x7b,-4128(%rdx), %zmm28, %zmm29
+
+// CHECK: vinsertf64x2 $171, %xmm25, %zmm28, %zmm17
+// CHECK: encoding: [0x62,0x83,0x9d,0x40,0x18,0xc9,0xab]
+ vinsertf64x2 $0xab, %xmm25, %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $171, %xmm25, %zmm28, %zmm17 {%k2}
+// CHECK: encoding: [0x62,0x83,0x9d,0x42,0x18,0xc9,0xab]
+ vinsertf64x2 $0xab, %xmm25, %zmm28, %zmm17 {%k2}
+
+// CHECK: vinsertf64x2 $171, %xmm25, %zmm28, %zmm17 {%k2} {z}
+// CHECK: encoding: [0x62,0x83,0x9d,0xc2,0x18,0xc9,0xab]
+ vinsertf64x2 $0xab, %xmm25, %zmm28, %zmm17 {%k2} {z}
+
+// CHECK: vinsertf64x2 $123, %xmm25, %zmm28, %zmm17
+// CHECK: encoding: [0x62,0x83,0x9d,0x40,0x18,0xc9,0x7b]
+ vinsertf64x2 $0x7b, %xmm25, %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, (%rcx), %zmm28, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x09,0x7b]
+ vinsertf64x2 $0x7b,(%rcx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, 291(%rax,%r14,8), %zmm28, %zmm17
+// CHECK: encoding: [0x62,0xa3,0x9d,0x40,0x18,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,291(%rax,%r14,8), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, 2032(%rdx), %zmm28, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x4a,0x7f,0x7b]
+ vinsertf64x2 $0x7b,2032(%rdx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, 2048(%rdx), %zmm28, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,2048(%rdx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, -2048(%rdx), %zmm28, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x4a,0x80,0x7b]
+ vinsertf64x2 $0x7b,-2048(%rdx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, -2064(%rdx), %zmm28, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vinsertf64x2 $0x7b,-2064(%rdx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $171, %xmm28, %zmm17, %zmm20
+// CHECK: encoding: [0x62,0x83,0xf5,0x40,0x18,0xe4,0xab]
+ vinsertf64x2 $0xab, %xmm28, %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $171, %xmm28, %zmm17, %zmm20 {%k7}
+// CHECK: encoding: [0x62,0x83,0xf5,0x47,0x18,0xe4,0xab]
+ vinsertf64x2 $0xab, %xmm28, %zmm17, %zmm20 {%k7}
+
+// CHECK: vinsertf64x2 $171, %xmm28, %zmm17, %zmm20 {%k7} {z}
+// CHECK: encoding: [0x62,0x83,0xf5,0xc7,0x18,0xe4,0xab]
+ vinsertf64x2 $0xab, %xmm28, %zmm17, %zmm20 {%k7} {z}
+
+// CHECK: vinsertf64x2 $123, %xmm28, %zmm17, %zmm20
+// CHECK: encoding: [0x62,0x83,0xf5,0x40,0x18,0xe4,0x7b]
+ vinsertf64x2 $0x7b, %xmm28, %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, (%rcx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0x21,0x7b]
+ vinsertf64x2 $0x7b,(%rcx), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, 4660(%rax,%r14,8), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xa3,0xf5,0x40,0x18,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,4660(%rax,%r14,8), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, 2032(%rdx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0x62,0x7f,0x7b]
+ vinsertf64x2 $0x7b,2032(%rdx), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, 2048(%rdx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,2048(%rdx), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, -2048(%rdx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0x62,0x80,0x7b]
+ vinsertf64x2 $0x7b,-2048(%rdx), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, -2064(%rdx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vinsertf64x2 $0x7b,-2064(%rdx), %zmm17, %zmm20
+
+// CHECK: vinserti32x8 $171, %ymm22, %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x23,0x3d,0x40,0x3a,0xe6,0xab]
+ vinserti32x8 $0xab, %ymm22, %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $171, %ymm22, %zmm24, %zmm28 {%k2}
+// CHECK: encoding: [0x62,0x23,0x3d,0x42,0x3a,0xe6,0xab]
+ vinserti32x8 $0xab, %ymm22, %zmm24, %zmm28 {%k2}
+
+// CHECK: vinserti32x8 $171, %ymm22, %zmm24, %zmm28 {%k2} {z}
+// CHECK: encoding: [0x62,0x23,0x3d,0xc2,0x3a,0xe6,0xab]
+ vinserti32x8 $0xab, %ymm22, %zmm24, %zmm28 {%k2} {z}
+
+// CHECK: vinserti32x8 $123, %ymm22, %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x23,0x3d,0x40,0x3a,0xe6,0x7b]
+ vinserti32x8 $0x7b, %ymm22, %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, (%rcx), %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0x21,0x7b]
+ vinserti32x8 $0x7b,(%rcx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, 291(%rax,%r14,8), %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x23,0x3d,0x40,0x3a,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinserti32x8 $0x7b,291(%rax,%r14,8), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, 4064(%rdx), %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0x62,0x7f,0x7b]
+ vinserti32x8 $0x7b,4064(%rdx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, 4096(%rdx), %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vinserti32x8 $0x7b,4096(%rdx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, -4096(%rdx), %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0x62,0x80,0x7b]
+ vinserti32x8 $0x7b,-4096(%rdx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, -4128(%rdx), %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vinserti32x8 $0x7b,-4128(%rdx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $171, %ymm24, %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x03,0x5d,0x40,0x3a,0xe0,0xab]
+ vinserti32x8 $0xab, %ymm24, %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $171, %ymm24, %zmm20, %zmm28 {%k7}
+// CHECK: encoding: [0x62,0x03,0x5d,0x47,0x3a,0xe0,0xab]
+ vinserti32x8 $0xab, %ymm24, %zmm20, %zmm28 {%k7}
+
+// CHECK: vinserti32x8 $171, %ymm24, %zmm20, %zmm28 {%k7} {z}
+// CHECK: encoding: [0x62,0x03,0x5d,0xc7,0x3a,0xe0,0xab]
+ vinserti32x8 $0xab, %ymm24, %zmm20, %zmm28 {%k7} {z}
+
+// CHECK: vinserti32x8 $123, %ymm24, %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x03,0x5d,0x40,0x3a,0xe0,0x7b]
+ vinserti32x8 $0x7b, %ymm24, %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, (%rcx), %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0x21,0x7b]
+ vinserti32x8 $0x7b,(%rcx), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, 4660(%rax,%r14,8), %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x23,0x5d,0x40,0x3a,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vinserti32x8 $0x7b,4660(%rax,%r14,8), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, 4064(%rdx), %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0x62,0x7f,0x7b]
+ vinserti32x8 $0x7b,4064(%rdx), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, 4096(%rdx), %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vinserti32x8 $0x7b,4096(%rdx), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, -4096(%rdx), %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0x62,0x80,0x7b]
+ vinserti32x8 $0x7b,-4096(%rdx), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, -4128(%rdx), %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vinserti32x8 $0x7b,-4128(%rdx), %zmm20, %zmm28
+
+// CHECK: vinserti64x2 $171, %xmm26, %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x03,0xed,0x40,0x38,0xe2,0xab]
+ vinserti64x2 $0xab, %xmm26, %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $171, %xmm26, %zmm18, %zmm28 {%k7}
+// CHECK: encoding: [0x62,0x03,0xed,0x47,0x38,0xe2,0xab]
+ vinserti64x2 $0xab, %xmm26, %zmm18, %zmm28 {%k7}
+
+// CHECK: vinserti64x2 $171, %xmm26, %zmm18, %zmm28 {%k7} {z}
+// CHECK: encoding: [0x62,0x03,0xed,0xc7,0x38,0xe2,0xab]
+ vinserti64x2 $0xab, %xmm26, %zmm18, %zmm28 {%k7} {z}
+
+// CHECK: vinserti64x2 $123, %xmm26, %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x03,0xed,0x40,0x38,0xe2,0x7b]
+ vinserti64x2 $0x7b, %xmm26, %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, (%rcx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x21,0x7b]
+ vinserti64x2 $0x7b,(%rcx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, 291(%rax,%r14,8), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x23,0xed,0x40,0x38,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,291(%rax,%r14,8), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, 2032(%rdx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x62,0x7f,0x7b]
+ vinserti64x2 $0x7b,2032(%rdx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, 2048(%rdx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,2048(%rdx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, -2048(%rdx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x62,0x80,0x7b]
+ vinserti64x2 $0x7b,-2048(%rdx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, -2064(%rdx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vinserti64x2 $0x7b,-2064(%rdx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $171, %xmm21, %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x23,0xed,0x40,0x38,0xdd,0xab]
+ vinserti64x2 $0xab, %xmm21, %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $171, %xmm21, %zmm18, %zmm27 {%k2}
+// CHECK: encoding: [0x62,0x23,0xed,0x42,0x38,0xdd,0xab]
+ vinserti64x2 $0xab, %xmm21, %zmm18, %zmm27 {%k2}
+
+// CHECK: vinserti64x2 $171, %xmm21, %zmm18, %zmm27 {%k2} {z}
+// CHECK: encoding: [0x62,0x23,0xed,0xc2,0x38,0xdd,0xab]
+ vinserti64x2 $0xab, %xmm21, %zmm18, %zmm27 {%k2} {z}
+
+// CHECK: vinserti64x2 $123, %xmm21, %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x23,0xed,0x40,0x38,0xdd,0x7b]
+ vinserti64x2 $0x7b, %xmm21, %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, (%rcx), %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x19,0x7b]
+ vinserti64x2 $0x7b,(%rcx), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, 4660(%rax,%r14,8), %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x23,0xed,0x40,0x38,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,4660(%rax,%r14,8), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, 2032(%rdx), %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x5a,0x7f,0x7b]
+ vinserti64x2 $0x7b,2032(%rdx), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, 2048(%rdx), %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,2048(%rdx), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, -2048(%rdx), %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x5a,0x80,0x7b]
+ vinserti64x2 $0x7b,-2048(%rdx), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, -2064(%rdx), %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vinserti64x2 $0x7b,-2064(%rdx), %zmm18, %zmm27
+
+// CHECK: vextractf32x8 $171, %zmm18, %ymm21
+// CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x1b,0xd5,0xab]
+ vextractf32x8 $0xab, %zmm18, %ymm21
+
+// CHECK: vextractf32x8 $171, %zmm18, %ymm21 {%k1}
+// CHECK: encoding: [0x62,0xa3,0x7d,0x49,0x1b,0xd5,0xab]
+ vextractf32x8 $0xab, %zmm18, %ymm21 {%k1}
+
+// CHECK: vextractf32x8 $171, %zmm18, %ymm21 {%k1} {z}
+// CHECK: encoding: [0x62,0xa3,0x7d,0xc9,0x1b,0xd5,0xab]
+ vextractf32x8 $0xab, %zmm18, %ymm21 {%k1} {z}
+
+// CHECK: vextractf32x8 $123, %zmm18, %ymm21
+// CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x1b,0xd5,0x7b]
+ vextractf32x8 $0x7b, %zmm18, %ymm21
+
+// CHECK: vextractf32x8 $171, %zmm21, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x29,0xab]
+ vextractf32x8 $0xab, %zmm21,(%rcx)
+
+// CHECK: vextractf32x8 $171, %zmm21, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0xe3,0x7d,0x4b,0x1b,0x29,0xab]
+ vextractf32x8 $0xab, %zmm21,(%rcx) {%k3}
+
+// CHECK: vextractf32x8 $123, %zmm21, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x29,0x7b]
+ vextractf32x8 $0x7b, %zmm21,(%rcx)
+
+// CHECK: vextractf32x8 $123, %zmm21, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x1b,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vextractf32x8 $0x7b, %zmm21,291(%rax,%r14,8)
+
+// CHECK: vextractf32x8 $123, %zmm21, 4064(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x6a,0x7f,0x7b]
+ vextractf32x8 $0x7b, %zmm21,4064(%rdx)
+
+// CHECK: vextractf32x8 $123, %zmm21, 4096(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1b,0xaa,0x00,0x10,0x00,0x00,0x7b]
+ vextractf32x8 $0x7b, %zmm21,4096(%rdx)
+
+// CHECK: vextractf32x8 $123, %zmm21, -4096(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x6a,0x80,0x7b]
+ vextractf32x8 $0x7b, %zmm21,-4096(%rdx)
+
+// CHECK: vextractf32x8 $123, %zmm21, -4128(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1b,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+ vextractf32x8 $0x7b, %zmm21,-4128(%rdx)
+
+// CHECK: vextractf32x8 $171, %zmm26, %ymm30
+// CHECK: encoding: [0x62,0x03,0x7d,0x48,0x1b,0xd6,0xab]
+ vextractf32x8 $0xab, %zmm26, %ymm30
+
+// CHECK: vextractf32x8 $171, %zmm26, %ymm30 {%k3}
+// CHECK: encoding: [0x62,0x03,0x7d,0x4b,0x1b,0xd6,0xab]
+ vextractf32x8 $0xab, %zmm26, %ymm30 {%k3}
+
+// CHECK: vextractf32x8 $171, %zmm26, %ymm30 {%k3} {z}
+// CHECK: encoding: [0x62,0x03,0x7d,0xcb,0x1b,0xd6,0xab]
+ vextractf32x8 $0xab, %zmm26, %ymm30 {%k3} {z}
+
+// CHECK: vextractf32x8 $123, %zmm26, %ymm30
+// CHECK: encoding: [0x62,0x03,0x7d,0x48,0x1b,0xd6,0x7b]
+ vextractf32x8 $0x7b, %zmm26, %ymm30
+
+// CHECK: vextractf32x8 $171, %zmm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x21,0xab]
+ vextractf32x8 $0xab, %zmm20,(%rcx)
+
+// CHECK: vextractf32x8 $171, %zmm20, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0xe3,0x7d,0x4b,0x1b,0x21,0xab]
+ vextractf32x8 $0xab, %zmm20,(%rcx) {%k3}
+
+// CHECK: vextractf32x8 $123, %zmm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x21,0x7b]
+ vextractf32x8 $0x7b, %zmm20,(%rcx)
+
+// CHECK: vextractf32x8 $123, %zmm20, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x1b,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vextractf32x8 $0x7b, %zmm20,4660(%rax,%r14,8)
+
+// CHECK: vextractf32x8 $123, %zmm20, 4064(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x62,0x7f,0x7b]
+ vextractf32x8 $0x7b, %zmm20,4064(%rdx)
+
+// CHECK: vextractf32x8 $123, %zmm20, 4096(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1b,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vextractf32x8 $0x7b, %zmm20,4096(%rdx)
+
+// CHECK: vextractf32x8 $123, %zmm20, -4096(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x62,0x80,0x7b]
+ vextractf32x8 $0x7b, %zmm20,-4096(%rdx)
+
+// CHECK: vextractf32x8 $123, %zmm20, -4128(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x1b,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vextractf32x8 $0x7b, %zmm20,-4128(%rdx)
+
+// CHECK: vextractf64x2 $171, %zmm26, %xmm28
+// CHECK: encoding: [0x62,0x03,0xfd,0x48,0x19,0xd4,0xab]
+ vextractf64x2 $0xab, %zmm26, %xmm28
+
+// CHECK: vextractf64x2 $171, %zmm26, %xmm28 {%k5}
+// CHECK: encoding: [0x62,0x03,0xfd,0x4d,0x19,0xd4,0xab]
+ vextractf64x2 $0xab, %zmm26, %xmm28 {%k5}
+
+// CHECK: vextractf64x2 $171, %zmm26, %xmm28 {%k5} {z}
+// CHECK: encoding: [0x62,0x03,0xfd,0xcd,0x19,0xd4,0xab]
+ vextractf64x2 $0xab, %zmm26, %xmm28 {%k5} {z}
+
+// CHECK: vextractf64x2 $123, %zmm26, %xmm28
+// CHECK: encoding: [0x62,0x03,0xfd,0x48,0x19,0xd4,0x7b]
+ vextractf64x2 $0x7b, %zmm26, %xmm28
+
+// CHECK: vextractf64x2 $171, %zmm28, (%rcx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x19,0x21,0xab]
+ vextractf64x2 $0xab, %zmm28,(%rcx)
+
+// CHECK: vextractf64x2 $171, %zmm28, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0x63,0xfd,0x4b,0x19,0x21,0xab]
+ vextractf64x2 $0xab, %zmm28,(%rcx) {%k3}
+
+// CHECK: vextractf64x2 $123, %zmm28, (%rcx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x19,0x21,0x7b]
+ vextractf64x2 $0x7b, %zmm28,(%rcx)
+
+// CHECK: vextractf64x2 $123, %zmm28, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x23,0xfd,0x48,0x19,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vextractf64x2 $0x7b, %zmm28,291(%rax,%r14,8)
+
+// CHECK: vextractf64x2 $123, %zmm28, 2032(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x19,0x62,0x7f,0x7b]
+ vextractf64x2 $0x7b, %zmm28,2032(%rdx)
+
+// CHECK: vextractf64x2 $123, %zmm28, 2048(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x19,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vextractf64x2 $0x7b, %zmm28,2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %zmm28, -2048(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x19,0x62,0x80,0x7b]
+ vextractf64x2 $0x7b, %zmm28,-2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %zmm28, -2064(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x19,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vextractf64x2 $0x7b, %zmm28,-2064(%rdx)
+
+// CHECK: vextractf64x2 $171, %zmm26, %xmm19
+// CHECK: encoding: [0x62,0x23,0xfd,0x48,0x19,0xd3,0xab]
+ vextractf64x2 $0xab, %zmm26, %xmm19
+
+// CHECK: vextractf64x2 $171, %zmm26, %xmm19 {%k3}
+// CHECK: encoding: [0x62,0x23,0xfd,0x4b,0x19,0xd3,0xab]
+ vextractf64x2 $0xab, %zmm26, %xmm19 {%k3}
+
+// CHECK: vextractf64x2 $171, %zmm26, %xmm19 {%k3} {z}
+// CHECK: encoding: [0x62,0x23,0xfd,0xcb,0x19,0xd3,0xab]
+ vextractf64x2 $0xab, %zmm26, %xmm19 {%k3} {z}
+
+// CHECK: vextractf64x2 $123, %zmm26, %xmm19
+// CHECK: encoding: [0x62,0x23,0xfd,0x48,0x19,0xd3,0x7b]
+ vextractf64x2 $0x7b, %zmm26, %xmm19
+
+// CHECK: vextractf64x2 $171, %zmm17, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x19,0x09,0xab]
+ vextractf64x2 $0xab, %zmm17,(%rcx)
+
+// CHECK: vextractf64x2 $171, %zmm17, (%rcx) {%k1}
+// CHECK: encoding: [0x62,0xe3,0xfd,0x49,0x19,0x09,0xab]
+ vextractf64x2 $0xab, %zmm17,(%rcx) {%k1}
+
+// CHECK: vextractf64x2 $123, %zmm17, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x19,0x09,0x7b]
+ vextractf64x2 $0x7b, %zmm17,(%rcx)
+
+// CHECK: vextractf64x2 $123, %zmm17, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x19,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vextractf64x2 $0x7b, %zmm17,4660(%rax,%r14,8)
+
+// CHECK: vextractf64x2 $123, %zmm17, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x19,0x4a,0x7f,0x7b]
+ vextractf64x2 $0x7b, %zmm17,2032(%rdx)
+
+// CHECK: vextractf64x2 $123, %zmm17, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x19,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vextractf64x2 $0x7b, %zmm17,2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %zmm17, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x19,0x4a,0x80,0x7b]
+ vextractf64x2 $0x7b, %zmm17,-2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %zmm17, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x19,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vextractf64x2 $0x7b, %zmm17,-2064(%rdx)
+
+// CHECK: vextracti32x8 $171, %zmm24, %ymm20
+// CHECK: encoding: [0x62,0x23,0x7d,0x48,0x3b,0xc4,0xab]
+ vextracti32x8 $0xab, %zmm24, %ymm20
+
+// CHECK: vextracti32x8 $171, %zmm24, %ymm20 {%k1}
+// CHECK: encoding: [0x62,0x23,0x7d,0x49,0x3b,0xc4,0xab]
+ vextracti32x8 $0xab, %zmm24, %ymm20 {%k1}
+
+// CHECK: vextracti32x8 $171, %zmm24, %ymm20 {%k1} {z}
+// CHECK: encoding: [0x62,0x23,0x7d,0xc9,0x3b,0xc4,0xab]
+ vextracti32x8 $0xab, %zmm24, %ymm20 {%k1} {z}
+
+// CHECK: vextracti32x8 $123, %zmm24, %ymm20
+// CHECK: encoding: [0x62,0x23,0x7d,0x48,0x3b,0xc4,0x7b]
+ vextracti32x8 $0x7b, %zmm24, %ymm20
+
+// CHECK: vextracti32x8 $171, %zmm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x3b,0x21,0xab]
+ vextracti32x8 $0xab, %zmm20,(%rcx)
+
+// CHECK: vextracti32x8 $171, %zmm20, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0xe3,0x7d,0x4b,0x3b,0x21,0xab]
+ vextracti32x8 $0xab, %zmm20,(%rcx) {%k3}
+
+// CHECK: vextracti32x8 $123, %zmm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x3b,0x21,0x7b]
+ vextracti32x8 $0x7b, %zmm20,(%rcx)
+
+// CHECK: vextracti32x8 $123, %zmm20, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x3b,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vextracti32x8 $0x7b, %zmm20,291(%rax,%r14,8)
+
+// CHECK: vextracti32x8 $123, %zmm20, 4064(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x3b,0x62,0x7f,0x7b]
+ vextracti32x8 $0x7b, %zmm20,4064(%rdx)
+
+// CHECK: vextracti32x8 $123, %zmm20, 4096(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x3b,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vextracti32x8 $0x7b, %zmm20,4096(%rdx)
+
+// CHECK: vextracti32x8 $123, %zmm20, -4096(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x3b,0x62,0x80,0x7b]
+ vextracti32x8 $0x7b, %zmm20,-4096(%rdx)
+
+// CHECK: vextracti32x8 $123, %zmm20, -4128(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x48,0x3b,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vextracti32x8 $0x7b, %zmm20,-4128(%rdx)
+
+// CHECK: vextracti32x8 $171, %zmm29, %ymm27
+// CHECK: encoding: [0x62,0x03,0x7d,0x48,0x3b,0xeb,0xab]
+ vextracti32x8 $0xab, %zmm29, %ymm27
+
+// CHECK: vextracti32x8 $171, %zmm29, %ymm27 {%k2}
+// CHECK: encoding: [0x62,0x03,0x7d,0x4a,0x3b,0xeb,0xab]
+ vextracti32x8 $0xab, %zmm29, %ymm27 {%k2}
+
+// CHECK: vextracti32x8 $171, %zmm29, %ymm27 {%k2} {z}
+// CHECK: encoding: [0x62,0x03,0x7d,0xca,0x3b,0xeb,0xab]
+ vextracti32x8 $0xab, %zmm29, %ymm27 {%k2} {z}
+
+// CHECK: vextracti32x8 $123, %zmm29, %ymm27
+// CHECK: encoding: [0x62,0x03,0x7d,0x48,0x3b,0xeb,0x7b]
+ vextracti32x8 $0x7b, %zmm29, %ymm27
+
+// CHECK: vextracti32x8 $171, %zmm26, (%rcx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x3b,0x11,0xab]
+ vextracti32x8 $0xab, %zmm26,(%rcx)
+
+// CHECK: vextracti32x8 $171, %zmm26, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0x63,0x7d,0x4a,0x3b,0x11,0xab]
+ vextracti32x8 $0xab, %zmm26,(%rcx) {%k2}
+
+// CHECK: vextracti32x8 $123, %zmm26, (%rcx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x3b,0x11,0x7b]
+ vextracti32x8 $0x7b, %zmm26,(%rcx)
+
+// CHECK: vextracti32x8 $123, %zmm26, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x23,0x7d,0x48,0x3b,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vextracti32x8 $0x7b, %zmm26,4660(%rax,%r14,8)
+
+// CHECK: vextracti32x8 $123, %zmm26, 4064(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x3b,0x52,0x7f,0x7b]
+ vextracti32x8 $0x7b, %zmm26,4064(%rdx)
+
+// CHECK: vextracti32x8 $123, %zmm26, 4096(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x3b,0x92,0x00,0x10,0x00,0x00,0x7b]
+ vextracti32x8 $0x7b, %zmm26,4096(%rdx)
+
+// CHECK: vextracti32x8 $123, %zmm26, -4096(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x3b,0x52,0x80,0x7b]
+ vextracti32x8 $0x7b, %zmm26,-4096(%rdx)
+
+// CHECK: vextracti32x8 $123, %zmm26, -4128(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x48,0x3b,0x92,0xe0,0xef,0xff,0xff,0x7b]
+ vextracti32x8 $0x7b, %zmm26,-4128(%rdx)
+
+// CHECK: vextracti64x2 $171, %zmm20, %xmm17
+// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x39,0xe1,0xab]
+ vextracti64x2 $0xab, %zmm20, %xmm17
+
+// CHECK: vextracti64x2 $171, %zmm20, %xmm17 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xfd,0x4a,0x39,0xe1,0xab]
+ vextracti64x2 $0xab, %zmm20, %xmm17 {%k2}
+
+// CHECK: vextracti64x2 $171, %zmm20, %xmm17 {%k2} {z}
+// CHECK: encoding: [0x62,0xa3,0xfd,0xca,0x39,0xe1,0xab]
+ vextracti64x2 $0xab, %zmm20, %xmm17 {%k2} {z}
+
+// CHECK: vextracti64x2 $123, %zmm20, %xmm17
+// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x39,0xe1,0x7b]
+ vextracti64x2 $0x7b, %zmm20, %xmm17
+
+// CHECK: vextracti64x2 $171, %zmm17, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x39,0x09,0xab]
+ vextracti64x2 $0xab, %zmm17,(%rcx)
+
+// CHECK: vextracti64x2 $171, %zmm17, (%rcx) {%k5}
+// CHECK: encoding: [0x62,0xe3,0xfd,0x4d,0x39,0x09,0xab]
+ vextracti64x2 $0xab, %zmm17,(%rcx) {%k5}
+
+// CHECK: vextracti64x2 $123, %zmm17, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x39,0x09,0x7b]
+ vextracti64x2 $0x7b, %zmm17,(%rcx)
+
+// CHECK: vextracti64x2 $123, %zmm17, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x39,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vextracti64x2 $0x7b, %zmm17,291(%rax,%r14,8)
+
+// CHECK: vextracti64x2 $123, %zmm17, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x39,0x4a,0x7f,0x7b]
+ vextracti64x2 $0x7b, %zmm17,2032(%rdx)
+
+// CHECK: vextracti64x2 $123, %zmm17, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x39,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vextracti64x2 $0x7b, %zmm17,2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %zmm17, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x39,0x4a,0x80,0x7b]
+ vextracti64x2 $0x7b, %zmm17,-2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %zmm17, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x39,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vextracti64x2 $0x7b, %zmm17,-2064(%rdx)
+
+// CHECK: vextracti64x2 $171, %zmm23, %xmm27
+// CHECK: encoding: [0x62,0x83,0xfd,0x48,0x39,0xfb,0xab]
+ vextracti64x2 $0xab, %zmm23, %xmm27
+
+// CHECK: vextracti64x2 $171, %zmm23, %xmm27 {%k5}
+// CHECK: encoding: [0x62,0x83,0xfd,0x4d,0x39,0xfb,0xab]
+ vextracti64x2 $0xab, %zmm23, %xmm27 {%k5}
+
+// CHECK: vextracti64x2 $171, %zmm23, %xmm27 {%k5} {z}
+// CHECK: encoding: [0x62,0x83,0xfd,0xcd,0x39,0xfb,0xab]
+ vextracti64x2 $0xab, %zmm23, %xmm27 {%k5} {z}
+
+// CHECK: vextracti64x2 $123, %zmm23, %xmm27
+// CHECK: encoding: [0x62,0x83,0xfd,0x48,0x39,0xfb,0x7b]
+ vextracti64x2 $0x7b, %zmm23, %xmm27
+
+// CHECK: vextracti64x2 $171, %zmm24, (%rcx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x39,0x01,0xab]
+ vextracti64x2 $0xab, %zmm24,(%rcx)
+
+// CHECK: vextracti64x2 $171, %zmm24, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0x63,0xfd,0x4b,0x39,0x01,0xab]
+ vextracti64x2 $0xab, %zmm24,(%rcx) {%k3}
+
+// CHECK: vextracti64x2 $123, %zmm24, (%rcx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x39,0x01,0x7b]
+ vextracti64x2 $0x7b, %zmm24,(%rcx)
+
+// CHECK: vextracti64x2 $123, %zmm24, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x23,0xfd,0x48,0x39,0x84,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vextracti64x2 $0x7b, %zmm24,4660(%rax,%r14,8)
+
+// CHECK: vextracti64x2 $123, %zmm24, 2032(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x39,0x42,0x7f,0x7b]
+ vextracti64x2 $0x7b, %zmm24,2032(%rdx)
+
+// CHECK: vextracti64x2 $123, %zmm24, 2048(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x39,0x82,0x00,0x08,0x00,0x00,0x7b]
+ vextracti64x2 $0x7b, %zmm24,2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %zmm24, -2048(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x39,0x42,0x80,0x7b]
+ vextracti64x2 $0x7b, %zmm24,-2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %zmm24, -2064(%rdx)
+// CHECK: encoding: [0x62,0x63,0xfd,0x48,0x39,0x82,0xf0,0xf7,0xff,0xff,0x7b]
+ vextracti64x2 $0x7b, %zmm24,-2064(%rdx)
+
+// CHECK: ktestb %k6, %k4
+// CHECK: encoding: [0xc5,0xf9,0x99,0xe6]
+ ktestb %k6, %k4
+
+// CHECK: ktestb %k4, %k5
+// CHECK: encoding: [0xc5,0xf9,0x99,0xec]
+ ktestb %k4, %k5
+
+// CHECK: ktestw %k4, %k5
+// CHECK: encoding: [0xc5,0xf8,0x99,0xec]
+ ktestw %k4, %k5
+
+// CHECK: ktestw %k6, %k2
+// CHECK: encoding: [0xc5,0xf8,0x99,0xd6]
+ ktestw %k6, %k2
+
+// CHECK: kortestb %k3, %k2
+// CHECK: encoding: [0xc5,0xf9,0x98,0xd3]
+ kortestb %k3, %k2
+
+// CHECK: kortestb %k6, %k2
+// CHECK: encoding: [0xc5,0xf9,0x98,0xd6]
+ kortestb %k6, %k2
+
+// CHECK: kaddb %k7, %k4, %k5
+// CHECK: encoding: [0xc5,0xdd,0x4a,0xef]
+ kaddb %k7, %k4, %k5
+
+// CHECK: kaddb %k4, %k6, %k5
+// CHECK: encoding: [0xc5,0xcd,0x4a,0xec]
+ kaddb %k4, %k6, %k5
+
+// CHECK: kaddw %k4, %k3, %k2
+// CHECK: encoding: [0xc5,0xe4,0x4a,0xd4]
+ kaddw %k4, %k3, %k2
+
+// CHECK: kaddw %k6, %k6, %k2
+// CHECK: encoding: [0xc5,0xcc,0x4a,0xd6]
+ kaddw %k6, %k6, %k2
+
+// CHECK: vfpclasspd $171, %zmm17, %k2
+// CHECK: encoding: [0x62,0xb3,0xfd,0x48,0x66,0xd1,0xab]
+ vfpclasspd $0xab, %zmm17, %k2
+
+// CHECK: vfpclasspd $171, %zmm17, %k2 {%k1}
+// CHECK: encoding: [0x62,0xb3,0xfd,0x49,0x66,0xd1,0xab]
+ vfpclasspd $0xab, %zmm17, %k2 {%k1}
+
+// CHECK: vfpclasspd $123, %zmm17, %k2
+// CHECK: encoding: [0x62,0xb3,0xfd,0x48,0x66,0xd1,0x7b]
+ vfpclasspd $0x7b, %zmm17, %k2
+
+// CHECK: vfpclasspdz $123, (%rcx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x66,0x11,0x7b]
+ vfpclasspdz $0x7b,(%rcx), %k2
+
+// CHECK: vfpclasspdz $123, 291(%rax,%r14,8), %k2
+// CHECK: encoding: [0x62,0xb3,0xfd,0x48,0x66,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspdz $0x7b,291(%rax,%r14,8), %k2
+
+// CHECK: vfpclasspdq $123, (%rcx){1to8}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x66,0x11,0x7b]
+ vfpclasspdq $0x7b,(%rcx){1to8}, %k2
+
+// CHECK: vfpclasspdz $123, 8128(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x66,0x52,0x7f,0x7b]
+ vfpclasspdz $0x7b,8128(%rdx), %k2
+
+// CHECK: vfpclasspdz $123, 8192(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x66,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vfpclasspdz $0x7b,8192(%rdx), %k2
+
+// CHECK: vfpclasspdz $123, -8192(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x66,0x52,0x80,0x7b]
+ vfpclasspdz $0x7b,-8192(%rdx), %k2
+
+// CHECK: vfpclasspdz $123, -8256(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x66,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vfpclasspdz $0x7b,-8256(%rdx), %k2
+
+// CHECK: vfpclasspdq $123, 1016(%rdx){1to8}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x66,0x52,0x7f,0x7b]
+ vfpclasspdq $0x7b,1016(%rdx){1to8}, %k2
+
+// CHECK: vfpclasspdq $123, 1024(%rdx){1to8}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x66,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vfpclasspdq $0x7b,1024(%rdx){1to8}, %k2
+
+// CHECK: vfpclasspdq $123, -1024(%rdx){1to8}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x66,0x52,0x80,0x7b]
+ vfpclasspdq $0x7b,-1024(%rdx){1to8}, %k2
+
+// CHECK: vfpclasspdq $123, -1032(%rdx){1to8}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x66,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vfpclasspdq $0x7b,-1032(%rdx){1to8}, %k2
+
+// CHECK: vfpclassps $171, %zmm21, %k2
+// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x66,0xd5,0xab]
+ vfpclassps $0xab, %zmm21, %k2
+
+// CHECK: vfpclassps $171, %zmm21, %k2 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x7d,0x4a,0x66,0xd5,0xab]
+ vfpclassps $0xab, %zmm21, %k2 {%k2}
+
+// CHECK: vfpclassps $123, %zmm21, %k2
+// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x66,0xd5,0x7b]
+ vfpclassps $0x7b, %zmm21, %k2
+
+// CHECK: vfpclasspsz $123, (%rcx), %k2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x66,0x11,0x7b]
+ vfpclasspsz $0x7b,(%rcx), %k2
+
+// CHECK: vfpclasspsz $123, 291(%rax,%r14,8), %k2
+// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x66,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspsz $0x7b,291(%rax,%r14,8), %k2
+
+// CHECK: vfpclasspsl $123, (%rcx){1to16}, %k2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x66,0x11,0x7b]
+ vfpclasspsl $0x7b,(%rcx){1to16}, %k2
+
+// CHECK: vfpclasspsz $123, 8128(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x66,0x52,0x7f,0x7b]
+ vfpclasspsz $0x7b,8128(%rdx), %k2
+
+// CHECK: vfpclasspsz $123, 8192(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x66,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vfpclasspsz $0x7b,8192(%rdx), %k2
+
+// CHECK: vfpclasspsz $123, -8192(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x66,0x52,0x80,0x7b]
+ vfpclasspsz $0x7b,-8192(%rdx), %k2
+
+// CHECK: vfpclasspsz $123, -8256(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x66,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vfpclasspsz $0x7b,-8256(%rdx), %k2
+
+// CHECK: vfpclasspsl $123, 508(%rdx){1to16}, %k2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x66,0x52,0x7f,0x7b]
+ vfpclasspsl $0x7b,508(%rdx){1to16}, %k2
+
+// CHECK: vfpclasspsl $123, 512(%rdx){1to16}, %k2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x66,0x92,0x00,0x02,0x00,0x00,0x7b]
+ vfpclasspsl $0x7b,512(%rdx){1to16}, %k2
+
+// CHECK: vfpclasspsl $123, -512(%rdx){1to16}, %k2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x66,0x52,0x80,0x7b]
+ vfpclasspsl $0x7b,-512(%rdx){1to16}, %k2
+
+// CHECK: vfpclasspsl $123, -516(%rdx){1to16}, %k2
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x66,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+ vfpclasspsl $0x7b,-516(%rdx){1to16}, %k2
+
+// CHECK: vfpclasspd $171, %zmm19, %k2
+// CHECK: encoding: [0x62,0xb3,0xfd,0x48,0x66,0xd3,0xab]
+ vfpclasspd $0xab, %zmm19, %k2
+
+// CHECK: vfpclasspd $171, %zmm19, %k2 {%k6}
+// CHECK: encoding: [0x62,0xb3,0xfd,0x4e,0x66,0xd3,0xab]
+ vfpclasspd $0xab, %zmm19, %k2 {%k6}
+
+// CHECK: vfpclasspd $123, %zmm19, %k2
+// CHECK: encoding: [0x62,0xb3,0xfd,0x48,0x66,0xd3,0x7b]
+ vfpclasspd $0x7b, %zmm19, %k2
+
+// CHECK: vfpclasspdz $123, (%rcx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x66,0x11,0x7b]
+ vfpclasspdz $0x7b,(%rcx), %k2
+
+// CHECK: vfpclasspdz $123, 4660(%rax,%r14,8), %k2
+// CHECK: encoding: [0x62,0xb3,0xfd,0x48,0x66,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vfpclasspdz $0x7b,4660(%rax,%r14,8), %k2
+
+// CHECK: vfpclasspdq $123, (%rcx){1to8}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x66,0x11,0x7b]
+ vfpclasspdq $0x7b,(%rcx){1to8}, %k2
+
+// CHECK: vfpclasspdz $123, 8128(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x66,0x52,0x7f,0x7b]
+ vfpclasspdz $0x7b,8128(%rdx), %k2
+
+// CHECK: vfpclasspdz $123, 8192(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x66,0x92,0x00,0x20,0x00,0x00,0x7b]
+ vfpclasspdz $0x7b,8192(%rdx), %k2
+
+// CHECK: vfpclasspdz $123, -8192(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x66,0x52,0x80,0x7b]
+ vfpclasspdz $0x7b,-8192(%rdx), %k2
+
+// CHECK: vfpclasspdz $123, -8256(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x66,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+ vfpclasspdz $0x7b,-8256(%rdx), %k2
+
+// CHECK: vfpclasspdq $123, 1016(%rdx){1to8}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x66,0x52,0x7f,0x7b]
+ vfpclasspdq $0x7b,1016(%rdx){1to8}, %k2
+
+// CHECK: vfpclasspdq $123, 1024(%rdx){1to8}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x66,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vfpclasspdq $0x7b,1024(%rdx){1to8}, %k2
+
+// CHECK: vfpclasspdq $123, -1024(%rdx){1to8}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x66,0x52,0x80,0x7b]
+ vfpclasspdq $0x7b,-1024(%rdx){1to8}, %k2
+
+// CHECK: vfpclasspdq $123, -1032(%rdx){1to8}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x58,0x66,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vfpclasspdq $0x7b,-1032(%rdx){1to8}, %k2
+
+// CHECK: vfpclassps $171, %zmm17, %k4
+// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x66,0xe1,0xab]
+ vfpclassps $0xab, %zmm17, %k4
+
+// CHECK: vfpclassps $171, %zmm17, %k4 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x7d,0x4a,0x66,0xe1,0xab]
+ vfpclassps $0xab, %zmm17, %k4 {%k2}
+
+// CHECK: vfpclassps $123, %zmm17, %k4
+// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x66,0xe1,0x7b]
+ vfpclassps $0x7b, %zmm17, %k4
+
+// CHECK: vfpclasspsz $123, (%rcx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x66,0x21,0x7b]
+ vfpclasspsz $0x7b,(%rcx), %k4
+
+// CHECK: vfpclasspsz $123, 4660(%rax,%r14,8), %k4
+// CHECK: encoding: [0x62,0xb3,0x7d,0x48,0x66,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vfpclasspsz $0x7b,4660(%rax,%r14,8), %k4
+
+// CHECK: vfpclasspsl $123, (%rcx){1to16}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x66,0x21,0x7b]
+ vfpclasspsl $0x7b,(%rcx){1to16}, %k4
+
+// CHECK: vfpclasspsz $123, 8128(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x66,0x62,0x7f,0x7b]
+ vfpclasspsz $0x7b,8128(%rdx), %k4
+
+// CHECK: vfpclasspsz $123, 8192(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x66,0xa2,0x00,0x20,0x00,0x00,0x7b]
+ vfpclasspsz $0x7b,8192(%rdx), %k4
+
+// CHECK: vfpclasspsz $123, -8192(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x66,0x62,0x80,0x7b]
+ vfpclasspsz $0x7b,-8192(%rdx), %k4
+
+// CHECK: vfpclasspsz $123, -8256(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x48,0x66,0xa2,0xc0,0xdf,0xff,0xff,0x7b]
+ vfpclasspsz $0x7b,-8256(%rdx), %k4
+
+// CHECK: vfpclasspsl $123, 508(%rdx){1to16}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x66,0x62,0x7f,0x7b]
+ vfpclasspsl $0x7b,508(%rdx){1to16}, %k4
+
+// CHECK: vfpclasspsl $123, 512(%rdx){1to16}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x66,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vfpclasspsl $0x7b,512(%rdx){1to16}, %k4
+
+// CHECK: vfpclasspsl $123, -512(%rdx){1to16}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x66,0x62,0x80,0x7b]
+ vfpclasspsl $0x7b,-512(%rdx){1to16}, %k4
+
+// CHECK: vfpclasspsl $123, -516(%rdx){1to16}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x58,0x66,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vfpclasspsl $0x7b,-516(%rdx){1to16}, %k4
+
+
+// CHECK: vcvtuqq2ps {rn-sae}, %zmm21, %ymm18
+// CHECK: encoding: [0x62,0xa1,0xff,0x18,0x7a,0xd5]
+ vcvtuqq2ps {rn-sae}, %zmm21, %ymm18
+
+// CHECK: vcvtuqq2ps {ru-sae}, %zmm21, %ymm18
+// CHECK: encoding: [0x62,0xa1,0xff,0x58,0x7a,0xd5]
+ vcvtuqq2ps {ru-sae}, %zmm21, %ymm18
+
+// CHECK: vcvtuqq2ps {rd-sae}, %zmm21, %ymm18
+// CHECK: encoding: [0x62,0xa1,0xff,0x38,0x7a,0xd5]
+ vcvtuqq2ps {rd-sae}, %zmm21, %ymm18
+
+// CHECK: vcvtuqq2ps {rz-sae}, %zmm21, %ymm18
+// CHECK: encoding: [0x62,0xa1,0xff,0x78,0x7a,0xd5]
+ vcvtuqq2ps {rz-sae}, %zmm21, %ymm18
+
+// CHECK: vcvtuqq2ps (%rcx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x48,0x7a,0x11]
+ vcvtuqq2ps (%rcx), %ymm18
+
+// CHECK: vcvtuqq2ps 291(%rax,%r14,8), %ymm18
+// CHECK: encoding: [0x62,0xa1,0xff,0x48,0x7a,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vcvtuqq2ps 291(%rax,%r14,8), %ymm18
+
+// CHECK: vcvtuqq2ps (%rcx){1to8}, %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x58,0x7a,0x11]
+ vcvtuqq2ps (%rcx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps 8128(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x48,0x7a,0x52,0x7f]
+ vcvtuqq2ps 8128(%rdx), %ymm18
+
+// CHECK: vcvtuqq2ps 8192(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x48,0x7a,0x92,0x00,0x20,0x00,0x00]
+ vcvtuqq2ps 8192(%rdx), %ymm18
+
+// CHECK: vcvtuqq2ps -8192(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x48,0x7a,0x52,0x80]
+ vcvtuqq2ps -8192(%rdx), %ymm18
+
+// CHECK: vcvtuqq2ps -8256(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x48,0x7a,0x92,0xc0,0xdf,0xff,0xff]
+ vcvtuqq2ps -8256(%rdx), %ymm18
+
+// CHECK: vcvtuqq2ps 1016(%rdx){1to8}, %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x58,0x7a,0x52,0x7f]
+ vcvtuqq2ps 1016(%rdx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps 1024(%rdx){1to8}, %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x58,0x7a,0x92,0x00,0x04,0x00,0x00]
+ vcvtuqq2ps 1024(%rdx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps -1024(%rdx){1to8}, %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x58,0x7a,0x52,0x80]
+ vcvtuqq2ps -1024(%rdx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps -1032(%rdx){1to8}, %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x58,0x7a,0x92,0xf8,0xfb,0xff,0xff]
+ vcvtuqq2ps -1032(%rdx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps %zmm26, %ymm25
+// CHECK: encoding: [0x62,0x01,0xff,0x48,0x7a,0xca]
+ vcvtuqq2ps %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps %zmm26, %ymm25 {%k2}
+// CHECK: encoding: [0x62,0x01,0xff,0x4a,0x7a,0xca]
+ vcvtuqq2ps %zmm26, %ymm25 {%k2}
+
+// CHECK: vcvtuqq2ps %zmm26, %ymm25 {%k2} {z}
+// CHECK: encoding: [0x62,0x01,0xff,0xca,0x7a,0xca]
+ vcvtuqq2ps %zmm26, %ymm25 {%k2} {z}
+
+// CHECK: vcvtuqq2ps {rn-sae}, %zmm26, %ymm25
+// CHECK: encoding: [0x62,0x01,0xff,0x18,0x7a,0xca]
+ vcvtuqq2ps {rn-sae}, %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps {ru-sae}, %zmm26, %ymm25
+// CHECK: encoding: [0x62,0x01,0xff,0x58,0x7a,0xca]
+ vcvtuqq2ps {ru-sae}, %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps {rd-sae}, %zmm26, %ymm25
+// CHECK: encoding: [0x62,0x01,0xff,0x38,0x7a,0xca]
+ vcvtuqq2ps {rd-sae}, %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps {rz-sae}, %zmm26, %ymm25
+// CHECK: encoding: [0x62,0x01,0xff,0x78,0x7a,0xca]
+ vcvtuqq2ps {rz-sae}, %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps (%rcx), %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x48,0x7a,0x09]
+ vcvtuqq2ps (%rcx), %ymm25
+
+// CHECK: vcvtuqq2ps 4660(%rax,%r14,8), %ymm25
+// CHECK: encoding: [0x62,0x21,0xff,0x48,0x7a,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vcvtuqq2ps 4660(%rax,%r14,8), %ymm25
+
+// CHECK: vcvtuqq2ps (%rcx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x58,0x7a,0x09]
+ vcvtuqq2ps (%rcx){1to8}, %ymm25
+
+// CHECK: vcvtuqq2ps 8128(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x48,0x7a,0x4a,0x7f]
+ vcvtuqq2ps 8128(%rdx), %ymm25
+
+// CHECK: vcvtuqq2ps 8192(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x48,0x7a,0x8a,0x00,0x20,0x00,0x00]
+ vcvtuqq2ps 8192(%rdx), %ymm25
+
+// CHECK: vcvtuqq2ps -8192(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x48,0x7a,0x4a,0x80]
+ vcvtuqq2ps -8192(%rdx), %ymm25
+
+// CHECK: vcvtuqq2ps -8256(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x48,0x7a,0x8a,0xc0,0xdf,0xff,0xff]
+ vcvtuqq2ps -8256(%rdx), %ymm25
+
+// CHECK: vcvtuqq2ps 1016(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x58,0x7a,0x4a,0x7f]
+ vcvtuqq2ps 1016(%rdx){1to8}, %ymm25
+
+// CHECK: vcvtuqq2ps 1024(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x58,0x7a,0x8a,0x00,0x04,0x00,0x00]
+ vcvtuqq2ps 1024(%rdx){1to8}, %ymm25
+
+// CHECK: vcvtuqq2ps -1024(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x58,0x7a,0x4a,0x80]
+ vcvtuqq2ps -1024(%rdx){1to8}, %ymm25
+
+// CHECK: vcvtuqq2ps -1032(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x61,0xff,0x58,0x7a,0x8a,0xf8,0xfb,0xff,0xff]
+ vcvtuqq2ps -1032(%rdx){1to8}, %ymm25
+
+// CHECK: vfpclasssd $171, %xmm28, %k4
+// CHECK: encoding: [0x62,0x93,0xfd,0x08,0x67,0xe4,0xab]
+ vfpclasssd $0xab, %xmm28, %k4
+
+// CHECK: vfpclasssd $171, %xmm28, %k4 {%k3}
+// CHECK: encoding: [0x62,0x93,0xfd,0x0b,0x67,0xe4,0xab]
+ vfpclasssd $0xab, %xmm28, %k4 {%k3}
+
+// CHECK: vfpclasssd $123, %xmm28, %k4
+// CHECK: encoding: [0x62,0x93,0xfd,0x08,0x67,0xe4,0x7b]
+ vfpclasssd $0x7b, %xmm28, %k4
+
+// CHECK: vfpclasssd $123, (%rcx), %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x21,0x7b]
+ vfpclasssd $0x7b,(%rcx), %k4
+
+// CHECK: vfpclasssd $123, 291(%rax,%r14,8), %k4
+// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x67,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasssd $0x7b,291(%rax,%r14,8), %k4
+
+// CHECK: vfpclasssd $123, 1016(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x62,0x7f,0x7b]
+ vfpclasssd $0x7b,1016(%rdx), %k4
+
+// CHECK: vfpclasssd $123, 1024(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0xa2,0x00,0x04,0x00,0x00,0x7b]
+ vfpclasssd $0x7b,1024(%rdx), %k4
+
+// CHECK: vfpclasssd $123, -1024(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x62,0x80,0x7b]
+ vfpclasssd $0x7b,-1024(%rdx), %k4
+
+// CHECK: vfpclasssd $123, -1032(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+ vfpclasssd $0x7b,-1032(%rdx), %k4
+
+// CHECK: vfpclassss $171, %xmm26, %k5
+// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x67,0xea,0xab]
+ vfpclassss $0xab, %xmm26, %k5
+
+// CHECK: vfpclassss $171, %xmm26, %k5 {%k4}
+// CHECK: encoding: [0x62,0x93,0x7d,0x0c,0x67,0xea,0xab]
+ vfpclassss $0xab, %xmm26, %k5 {%k4}
+
+// CHECK: vfpclassss $123, %xmm26, %k5
+// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x67,0xea,0x7b]
+ vfpclassss $0x7b, %xmm26, %k5
+
+// CHECK: vfpclassss $123, (%rcx), %k5
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x29,0x7b]
+ vfpclassss $0x7b,(%rcx), %k5
+
+// CHECK: vfpclassss $123, 291(%rax,%r14,8), %k5
+// CHECK: encoding: [0x62,0xb3,0x7d,0x08,0x67,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfpclassss $0x7b,291(%rax,%r14,8), %k5
+
+// CHECK: vfpclassss $123, 508(%rdx), %k5
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x6a,0x7f,0x7b]
+ vfpclassss $0x7b,508(%rdx), %k5
+
+// CHECK: vfpclassss $123, 512(%rdx), %k5
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0xaa,0x00,0x02,0x00,0x00,0x7b]
+ vfpclassss $0x7b,512(%rdx), %k5
+
+// CHECK: vfpclassss $123, -512(%rdx), %k5
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x6a,0x80,0x7b]
+ vfpclassss $0x7b,-512(%rdx), %k5
+
+// CHECK: vfpclassss $123, -516(%rdx), %k5
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+ vfpclassss $0x7b,-516(%rdx), %k5
+
+// CHECK: vfpclasssd $171, %xmm20, %k3
+// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x67,0xdc,0xab]
+ vfpclasssd $0xab, %xmm20, %k3
+
+// CHECK: vfpclasssd $171, %xmm20, %k3 {%k6}
+// CHECK: encoding: [0x62,0xb3,0xfd,0x0e,0x67,0xdc,0xab]
+ vfpclasssd $0xab, %xmm20, %k3 {%k6}
+
+// CHECK: vfpclasssd $123, %xmm20, %k3
+// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x67,0xdc,0x7b]
+ vfpclasssd $0x7b, %xmm20, %k3
+
+// CHECK: vfpclasssd $123, (%rcx), %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x19,0x7b]
+ vfpclasssd $0x7b,(%rcx), %k3
+
+// CHECK: vfpclasssd $123, 4660(%rax,%r14,8), %k3
+// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x67,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vfpclasssd $0x7b,4660(%rax,%r14,8), %k3
+
+// CHECK: vfpclasssd $123, 1016(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x5a,0x7f,0x7b]
+ vfpclasssd $0x7b,1016(%rdx), %k3
+
+// CHECK: vfpclasssd $123, 1024(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ vfpclasssd $0x7b,1024(%rdx), %k3
+
+// CHECK: vfpclasssd $123, -1024(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x5a,0x80,0x7b]
+ vfpclasssd $0x7b,-1024(%rdx), %k3
+
+// CHECK: vfpclasssd $123, -1032(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x67,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ vfpclasssd $0x7b,-1032(%rdx), %k3
+
+// CHECK: vfpclassss $171, %xmm28, %k4
+// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x67,0xe4,0xab]
+ vfpclassss $0xab, %xmm28, %k4
+
+// CHECK: vfpclassss $171, %xmm28, %k4 {%k6}
+// CHECK: encoding: [0x62,0x93,0x7d,0x0e,0x67,0xe4,0xab]
+ vfpclassss $0xab, %xmm28, %k4 {%k6}
+
+// CHECK: vfpclassss $123, %xmm28, %k4
+// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x67,0xe4,0x7b]
+ vfpclassss $0x7b, %xmm28, %k4
+
+// CHECK: vfpclassss $123, (%rcx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x21,0x7b]
+ vfpclassss $0x7b,(%rcx), %k4
+
+// CHECK: vfpclassss $123, 4660(%rax,%r14,8), %k4
+// CHECK: encoding: [0x62,0xb3,0x7d,0x08,0x67,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vfpclassss $0x7b,4660(%rax,%r14,8), %k4
+
+// CHECK: vfpclassss $123, 508(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x62,0x7f,0x7b]
+ vfpclassss $0x7b,508(%rdx), %k4
+
+// CHECK: vfpclassss $123, 512(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vfpclassss $0x7b,512(%rdx), %k4
+
+// CHECK: vfpclassss $123, -512(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0x62,0x80,0x7b]
+ vfpclassss $0x7b,-512(%rdx), %k4
+
+// CHECK: vfpclassss $123, -516(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x67,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vfpclassss $0x7b,-516(%rdx), %k4
+
+// CHECK: vbroadcasti32x2 %xmm31, %zmm30
+// CHECK: encoding: [0x62,0x02,0x7d,0x48,0x59,0xf7]
+ vbroadcasti32x2 %xmm31, %zmm30
+
+// CHECK: vbroadcasti32x2 %xmm31, %zmm30 {%k5}
+// CHECK: encoding: [0x62,0x02,0x7d,0x4d,0x59,0xf7]
+ vbroadcasti32x2 %xmm31, %zmm30 {%k5}
+
+// CHECK: vbroadcasti32x2 %xmm31, %zmm30 {%k5} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0xcd,0x59,0xf7]
+ vbroadcasti32x2 %xmm31, %zmm30 {%k5} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x59,0x31]
+ vbroadcasti32x2 (%rcx), %zmm30
+
+// CHECK: vbroadcasti32x2 291(%rax,%r14,8), %zmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vbroadcasti32x2 291(%rax,%r14,8), %zmm30
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x59,0x72,0x7f]
+ vbroadcasti32x2 1016(%rdx), %zmm30
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x59,0xb2,0x00,0x04,0x00,0x00]
+ vbroadcasti32x2 1024(%rdx), %zmm30
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x59,0x72,0x80]
+ vbroadcasti32x2 -1024(%rdx), %zmm30
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x59,0xb2,0xf8,0xfb,0xff,0xff]
+ vbroadcasti32x2 -1032(%rdx), %zmm30
+
+// CHECK: vbroadcasti32x2 %xmm17, %zmm20
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x59,0xe1]
+ vbroadcasti32x2 %xmm17, %zmm20
+
+// CHECK: vbroadcasti32x2 %xmm17, %zmm20 {%k1}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x49,0x59,0xe1]
+ vbroadcasti32x2 %xmm17, %zmm20 {%k1}
+
+// CHECK: vbroadcasti32x2 %xmm17, %zmm20 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0xc9,0x59,0xe1]
+ vbroadcasti32x2 %xmm17, %zmm20 {%k1} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %zmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x59,0x21]
+ vbroadcasti32x2 (%rcx), %zmm20
+
+// CHECK: vbroadcasti32x2 4660(%rax,%r14,8), %zmm20
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x59,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vbroadcasti32x2 4660(%rax,%r14,8), %zmm20
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %zmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x59,0x62,0x7f]
+ vbroadcasti32x2 1016(%rdx), %zmm20
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %zmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x59,0xa2,0x00,0x04,0x00,0x00]
+ vbroadcasti32x2 1024(%rdx), %zmm20
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %zmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x59,0x62,0x80]
+ vbroadcasti32x2 -1024(%rdx), %zmm20
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %zmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x59,0xa2,0xf8,0xfb,0xff,0xff]
+ vbroadcasti32x2 -1032(%rdx), %zmm20
+
+// CHECK: vbroadcastf32x2 %xmm23, %zmm27
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x19,0xdf]
+ vbroadcastf32x2 %xmm23, %zmm27
+
+// CHECK: vbroadcastf32x2 %xmm23, %zmm27 {%k6}
+// CHECK: encoding: [0x62,0x22,0x7d,0x4e,0x19,0xdf]
+ vbroadcastf32x2 %xmm23, %zmm27 {%k6}
+
+// CHECK: vbroadcastf32x2 %xmm23, %zmm27 {%k6} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xce,0x19,0xdf]
+ vbroadcastf32x2 %xmm23, %zmm27 {%k6} {z}
+
+// CHECK: vbroadcastf32x2 (%rcx), %zmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x19]
+ vbroadcastf32x2 (%rcx), %zmm27
+
+// CHECK: vbroadcastf32x2 291(%rax,%r14,8), %zmm27
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x19,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vbroadcastf32x2 291(%rax,%r14,8), %zmm27
+
+// CHECK: vbroadcastf32x2 1016(%rdx), %zmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x7f]
+ vbroadcastf32x2 1016(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 1024(%rdx), %zmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0x00,0x04,0x00,0x00]
+ vbroadcastf32x2 1024(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 -1024(%rdx), %zmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x80]
+ vbroadcastf32x2 -1024(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 -1032(%rdx), %zmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0xf8,0xfb,0xff,0xff]
+ vbroadcastf32x2 -1032(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 %xmm21, %zmm27
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x19,0xdd]
+ vbroadcastf32x2 %xmm21, %zmm27
+
+// CHECK: vbroadcastf32x2 %xmm21, %zmm27 {%k5}
+// CHECK: encoding: [0x62,0x22,0x7d,0x4d,0x19,0xdd]
+ vbroadcastf32x2 %xmm21, %zmm27 {%k5}
+
+// CHECK: vbroadcastf32x2 %xmm21, %zmm27 {%k5} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xcd,0x19,0xdd]
+ vbroadcastf32x2 %xmm21, %zmm27 {%k5} {z}
+
+// CHECK: vbroadcastf32x2 (%rcx), %zmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x19]
+ vbroadcastf32x2 (%rcx), %zmm27
+
+// CHECK: vbroadcastf32x2 4660(%rax,%r14,8), %zmm27
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x19,0x9c,0xf0,0x34,0x12,0x00,0x00]
+ vbroadcastf32x2 4660(%rax,%r14,8), %zmm27
+
+// CHECK: vbroadcastf32x2 1016(%rdx), %zmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x7f]
+ vbroadcastf32x2 1016(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 1024(%rdx), %zmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0x00,0x04,0x00,0x00]
+ vbroadcastf32x2 1024(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 -1024(%rdx), %zmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x80]
+ vbroadcastf32x2 -1024(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 -1032(%rdx), %zmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0xf8,0xfb,0xff,0xff]
+ vbroadcastf32x2 -1032(%rdx), %zmm27
+
diff --git a/test/MC/X86/x86-64-avx512dq_vl.s b/test/MC/X86/x86-64-avx512dq_vl.s
index 17c37c08335c..eef6b0cf33e2 100644
--- a/test/MC/X86/x86-64-avx512dq_vl.s
+++ b/test/MC/X86/x86-64-avx512dq_vl.s
@@ -2208,6 +2208,486 @@
// CHECK: encoding: [0x62,0x63,0x45,0x30,0x50,0x82,0xfc,0xfd,0xff,0xff,0x7b]
vrangeps $0x7b,-516(%rdx){1to8}, %ymm23, %ymm24
+// CHECK: vreducepd $171, %xmm17, %xmm18
+// CHECK: encoding: [0x62,0xa3,0xfd,0x08,0x56,0xd1,0xab]
+ vreducepd $0xab, %xmm17, %xmm18
+
+// CHECK: vreducepd $171, %xmm17, %xmm18 {%k3}
+// CHECK: encoding: [0x62,0xa3,0xfd,0x0b,0x56,0xd1,0xab]
+ vreducepd $0xab, %xmm17, %xmm18 {%k3}
+
+// CHECK: vreducepd $171, %xmm17, %xmm18 {%k3} {z}
+// CHECK: encoding: [0x62,0xa3,0xfd,0x8b,0x56,0xd1,0xab]
+ vreducepd $0xab, %xmm17, %xmm18 {%k3} {z}
+
+// CHECK: vreducepd $123, %xmm17, %xmm18
+// CHECK: encoding: [0x62,0xa3,0xfd,0x08,0x56,0xd1,0x7b]
+ vreducepd $0x7b, %xmm17, %xmm18
+
+// CHECK: vreducepd $123, (%rcx), %xmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x11,0x7b]
+ vreducepd $0x7b,(%rcx), %xmm18
+
+// CHECK: vreducepd $123, 291(%rax,%r14,8), %xmm18
+// CHECK: encoding: [0x62,0xa3,0xfd,0x08,0x56,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vreducepd $0x7b,291(%rax,%r14,8), %xmm18
+
+// CHECK: vreducepd $171, %xmm28, %xmm25
+// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x56,0xcc,0xab]
+ vreducepd $0xab, %xmm28, %xmm25
+
+// CHECK: vreducepd $171, %xmm28, %xmm25 {%k4}
+// CHECK: encoding: [0x62,0x03,0xfd,0x0c,0x56,0xcc,0xab]
+ vreducepd $0xab, %xmm28, %xmm25 {%k4}
+
+// CHECK: vreducepd $171, %xmm28, %xmm25 {%k4} {z}
+// CHECK: encoding: [0x62,0x03,0xfd,0x8c,0x56,0xcc,0xab]
+ vreducepd $0xab, %xmm28, %xmm25 {%k4} {z}
+
+// CHECK: vreducepd $123, %xmm28, %xmm25
+// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x56,0xcc,0x7b]
+ vreducepd $0x7b, %xmm28, %xmm25
+
+// CHECK: vreducepd $123, (%rcx), %xmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x09,0x7b]
+ vreducepd $0x7b,(%rcx), %xmm25
+
+// CHECK: vreducepd $123, 4660(%rax,%r14,8), %xmm25
+// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x56,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vreducepd $0x7b,4660(%rax,%r14,8), %xmm25
+
+// CHECK: vreducepd $123, (%rcx){1to2}, %xmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x09,0x7b]
+ vreducepd $0x7b,(%rcx){1to2}, %xmm25
+
+// CHECK: vreducepd $123, 2032(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x4a,0x7f,0x7b]
+ vreducepd $0x7b,2032(%rdx), %xmm25
+
+// CHECK: vreducepd $123, 2048(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vreducepd $0x7b,2048(%rdx), %xmm25
+
+// CHECK: vreducepd $123, -2048(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x4a,0x80,0x7b]
+ vreducepd $0x7b,-2048(%rdx), %xmm25
+
+// CHECK: vreducepd $123, -2064(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x56,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vreducepd $0x7b,-2064(%rdx), %xmm25
+
+// CHECK: vreducepd $123, 1016(%rdx){1to2}, %xmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x4a,0x7f,0x7b]
+ vreducepd $0x7b,1016(%rdx){1to2}, %xmm25
+
+// CHECK: vreducepd $123, 1024(%rdx){1to2}, %xmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vreducepd $0x7b,1024(%rdx){1to2}, %xmm25
+
+// CHECK: vreducepd $123, -1024(%rdx){1to2}, %xmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x4a,0x80,0x7b]
+ vreducepd $0x7b,-1024(%rdx){1to2}, %xmm25
+
+// CHECK: vreducepd $123, -1032(%rdx){1to2}, %xmm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x56,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vreducepd $0x7b,-1032(%rdx){1to2}, %xmm25
+
+// CHECK: vreducepd $171, %ymm17, %ymm28
+// CHECK: encoding: [0x62,0x23,0xfd,0x28,0x56,0xe1,0xab]
+ vreducepd $0xab, %ymm17, %ymm28
+
+// CHECK: vreducepd $171, %ymm17, %ymm28 {%k4}
+// CHECK: encoding: [0x62,0x23,0xfd,0x2c,0x56,0xe1,0xab]
+ vreducepd $0xab, %ymm17, %ymm28 {%k4}
+
+// CHECK: vreducepd $171, %ymm17, %ymm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x23,0xfd,0xac,0x56,0xe1,0xab]
+ vreducepd $0xab, %ymm17, %ymm28 {%k4} {z}
+
+// CHECK: vreducepd $123, %ymm17, %ymm28
+// CHECK: encoding: [0x62,0x23,0xfd,0x28,0x56,0xe1,0x7b]
+ vreducepd $0x7b, %ymm17, %ymm28
+
+// CHECK: vreducepd $123, (%rcx), %ymm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x21,0x7b]
+ vreducepd $0x7b,(%rcx), %ymm28
+
+// CHECK: vreducepd $123, 4660(%rax,%r14,8), %ymm28
+// CHECK: encoding: [0x62,0x23,0xfd,0x28,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vreducepd $0x7b,4660(%rax,%r14,8), %ymm28
+
+// CHECK: vreducepd $123, (%rcx){1to4}, %ymm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x21,0x7b]
+ vreducepd $0x7b,(%rcx){1to4}, %ymm28
+
+// CHECK: vreducepd $123, 4064(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x62,0x7f,0x7b]
+ vreducepd $0x7b,4064(%rdx), %ymm28
+
+// CHECK: vreducepd $123, 4096(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vreducepd $0x7b,4096(%rdx), %ymm28
+
+// CHECK: vreducepd $123, -4096(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x62,0x80,0x7b]
+ vreducepd $0x7b,-4096(%rdx), %ymm28
+
+// CHECK: vreducepd $123, -4128(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vreducepd $0x7b,-4128(%rdx), %ymm28
+
+// CHECK: vreducepd $123, 1016(%rdx){1to4}, %ymm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x62,0x7f,0x7b]
+ vreducepd $0x7b,1016(%rdx){1to4}, %ymm28
+
+// CHECK: vreducepd $123, 1024(%rdx){1to4}, %ymm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0xa2,0x00,0x04,0x00,0x00,0x7b]
+ vreducepd $0x7b,1024(%rdx){1to4}, %ymm28
+
+// CHECK: vreducepd $123, -1024(%rdx){1to4}, %ymm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x62,0x80,0x7b]
+ vreducepd $0x7b,-1024(%rdx){1to4}, %ymm28
+
+// CHECK: vreducepd $123, -1032(%rdx){1to4}, %ymm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+ vreducepd $0x7b,-1032(%rdx){1to4}, %ymm28
+
+// CHECK: vreduceps $171, %xmm21, %xmm29
+// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x56,0xed,0xab]
+ vreduceps $0xab, %xmm21, %xmm29
+
+// CHECK: vreduceps $171, %xmm21, %xmm29 {%k7}
+// CHECK: encoding: [0x62,0x23,0x7d,0x0f,0x56,0xed,0xab]
+ vreduceps $0xab, %xmm21, %xmm29 {%k7}
+
+// CHECK: vreduceps $171, %xmm21, %xmm29 {%k7} {z}
+// CHECK: encoding: [0x62,0x23,0x7d,0x8f,0x56,0xed,0xab]
+ vreduceps $0xab, %xmm21, %xmm29 {%k7} {z}
+
+// CHECK: vreduceps $123, %xmm21, %xmm29
+// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x56,0xed,0x7b]
+ vreduceps $0x7b, %xmm21, %xmm29
+
+// CHECK: vreduceps $123, (%rcx), %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0x29,0x7b]
+ vreduceps $0x7b,(%rcx), %xmm29
+
+// CHECK: vreduceps $123, 4660(%rax,%r14,8), %xmm29
+// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x56,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vreduceps $0x7b,4660(%rax,%r14,8), %xmm29
+
+// CHECK: vreduceps $123, (%rcx){1to4}, %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0x29,0x7b]
+ vreduceps $0x7b,(%rcx){1to4}, %xmm29
+
+// CHECK: vreduceps $123, 2032(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0x6a,0x7f,0x7b]
+ vreduceps $0x7b,2032(%rdx), %xmm29
+
+// CHECK: vreduceps $123, 2048(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0xaa,0x00,0x08,0x00,0x00,0x7b]
+ vreduceps $0x7b,2048(%rdx), %xmm29
+
+// CHECK: vreduceps $123, -2048(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0x6a,0x80,0x7b]
+ vreduceps $0x7b,-2048(%rdx), %xmm29
+
+// CHECK: vreduceps $123, -2064(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x56,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+ vreduceps $0x7b,-2064(%rdx), %xmm29
+
+// CHECK: vreduceps $123, 508(%rdx){1to4}, %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0x6a,0x7f,0x7b]
+ vreduceps $0x7b,508(%rdx){1to4}, %xmm29
+
+// CHECK: vreduceps $123, 512(%rdx){1to4}, %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0xaa,0x00,0x02,0x00,0x00,0x7b]
+ vreduceps $0x7b,512(%rdx){1to4}, %xmm29
+
+// CHECK: vreduceps $123, -512(%rdx){1to4}, %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0x6a,0x80,0x7b]
+ vreduceps $0x7b,-512(%rdx){1to4}, %xmm29
+
+// CHECK: vreduceps $123, -516(%rdx){1to4}, %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x56,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+ vreduceps $0x7b,-516(%rdx){1to4}, %xmm29
+
+// CHECK: vreduceps $171, %ymm23, %ymm25
+// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0xcf,0xab]
+ vreduceps $0xab, %ymm23, %ymm25
+
+// CHECK: vreduceps $171, %ymm23, %ymm25 {%k3}
+// CHECK: encoding: [0x62,0x23,0x7d,0x2b,0x56,0xcf,0xab]
+ vreduceps $0xab, %ymm23, %ymm25 {%k3}
+
+// CHECK: vreduceps $171, %ymm23, %ymm25 {%k3} {z}
+// CHECK: encoding: [0x62,0x23,0x7d,0xab,0x56,0xcf,0xab]
+ vreduceps $0xab, %ymm23, %ymm25 {%k3} {z}
+
+// CHECK: vreduceps $123, %ymm23, %ymm25
+// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0xcf,0x7b]
+ vreduceps $0x7b, %ymm23, %ymm25
+
+// CHECK: vreduceps $123, (%rcx), %ymm25
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x09,0x7b]
+ vreduceps $0x7b,(%rcx), %ymm25
+
+// CHECK: vreduceps $123, 4660(%rax,%r14,8), %ymm25
+// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vreduceps $0x7b,4660(%rax,%r14,8), %ymm25
+
+// CHECK: vreduceps $123, (%rcx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x09,0x7b]
+ vreduceps $0x7b,(%rcx){1to8}, %ymm25
+
+// CHECK: vreduceps $123, 4064(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x4a,0x7f,0x7b]
+ vreduceps $0x7b,4064(%rdx), %ymm25
+
+// CHECK: vreduceps $123, 4096(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x8a,0x00,0x10,0x00,0x00,0x7b]
+ vreduceps $0x7b,4096(%rdx), %ymm25
+
+// CHECK: vreduceps $123, -4096(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x4a,0x80,0x7b]
+ vreduceps $0x7b,-4096(%rdx), %ymm25
+
+// CHECK: vreduceps $123, -4128(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+ vreduceps $0x7b,-4128(%rdx), %ymm25
+
+// CHECK: vreduceps $123, 508(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x4a,0x7f,0x7b]
+ vreduceps $0x7b,508(%rdx){1to8}, %ymm25
+
+// CHECK: vreduceps $123, 512(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x8a,0x00,0x02,0x00,0x00,0x7b]
+ vreduceps $0x7b,512(%rdx){1to8}, %ymm25
+
+// CHECK: vreduceps $123, -512(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x4a,0x80,0x7b]
+ vreduceps $0x7b,-512(%rdx){1to8}, %ymm25
+
+// CHECK: vreduceps $123, -516(%rdx){1to8}, %ymm25
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
+ vreduceps $0x7b,-516(%rdx){1to8}, %ymm25
+
+// CHECK: vreducepd $123, (%rcx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x11,0x7b]
+ vreducepd $0x7b,(%rcx){1to2}, %xmm18
+
+// CHECK: vreducepd $123, 2032(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x52,0x7f,0x7b]
+ vreducepd $0x7b,2032(%rdx), %xmm18
+
+// CHECK: vreducepd $123, 2048(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x92,0x00,0x08,0x00,0x00,0x7b]
+ vreducepd $0x7b,2048(%rdx), %xmm18
+
+// CHECK: vreducepd $123, -2048(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x52,0x80,0x7b]
+ vreducepd $0x7b,-2048(%rdx), %xmm18
+
+// CHECK: vreducepd $123, -2064(%rdx), %xmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x08,0x56,0x92,0xf0,0xf7,0xff,0xff,0x7b]
+ vreducepd $0x7b,-2064(%rdx), %xmm18
+
+// CHECK: vreducepd $123, 1016(%rdx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x52,0x7f,0x7b]
+ vreducepd $0x7b,1016(%rdx){1to2}, %xmm18
+
+// CHECK: vreducepd $123, 1024(%rdx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vreducepd $0x7b,1024(%rdx){1to2}, %xmm18
+
+// CHECK: vreducepd $123, -1024(%rdx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x52,0x80,0x7b]
+ vreducepd $0x7b,-1024(%rdx){1to2}, %xmm18
+
+// CHECK: vreducepd $123, -1032(%rdx){1to2}, %xmm18
+// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x56,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vreducepd $0x7b,-1032(%rdx){1to2}, %xmm18
+
+// CHECK: vreducepd $171, %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x03,0xfd,0x28,0x56,0xcd,0xab]
+ vreducepd $0xab, %ymm29, %ymm25
+
+// CHECK: vreducepd $171, %ymm29, %ymm25 {%k1}
+// CHECK: encoding: [0x62,0x03,0xfd,0x29,0x56,0xcd,0xab]
+ vreducepd $0xab, %ymm29, %ymm25 {%k1}
+
+// CHECK: vreducepd $171, %ymm29, %ymm25 {%k1} {z}
+// CHECK: encoding: [0x62,0x03,0xfd,0xa9,0x56,0xcd,0xab]
+ vreducepd $0xab, %ymm29, %ymm25 {%k1} {z}
+
+// CHECK: vreducepd $123, %ymm29, %ymm25
+// CHECK: encoding: [0x62,0x03,0xfd,0x28,0x56,0xcd,0x7b]
+ vreducepd $0x7b, %ymm29, %ymm25
+
+// CHECK: vreducepd $123, (%rcx), %ymm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x09,0x7b]
+ vreducepd $0x7b,(%rcx), %ymm25
+
+// CHECK: vreducepd $123, 291(%rax,%r14,8), %ymm25
+// CHECK: encoding: [0x62,0x23,0xfd,0x28,0x56,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vreducepd $0x7b,291(%rax,%r14,8), %ymm25
+
+// CHECK: vreducepd $123, (%rcx){1to4}, %ymm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x09,0x7b]
+ vreducepd $0x7b,(%rcx){1to4}, %ymm25
+
+// CHECK: vreducepd $123, 4064(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x4a,0x7f,0x7b]
+ vreducepd $0x7b,4064(%rdx), %ymm25
+
+// CHECK: vreducepd $123, 4096(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x8a,0x00,0x10,0x00,0x00,0x7b]
+ vreducepd $0x7b,4096(%rdx), %ymm25
+
+// CHECK: vreducepd $123, -4096(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x4a,0x80,0x7b]
+ vreducepd $0x7b,-4096(%rdx), %ymm25
+
+// CHECK: vreducepd $123, -4128(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x28,0x56,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+ vreducepd $0x7b,-4128(%rdx), %ymm25
+
+// CHECK: vreducepd $123, 1016(%rdx){1to4}, %ymm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x4a,0x7f,0x7b]
+ vreducepd $0x7b,1016(%rdx){1to4}, %ymm25
+
+// CHECK: vreducepd $123, 1024(%rdx){1to4}, %ymm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vreducepd $0x7b,1024(%rdx){1to4}, %ymm25
+
+// CHECK: vreducepd $123, -1024(%rdx){1to4}, %ymm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x4a,0x80,0x7b]
+ vreducepd $0x7b,-1024(%rdx){1to4}, %ymm25
+
+// CHECK: vreducepd $123, -1032(%rdx){1to4}, %ymm25
+// CHECK: encoding: [0x62,0x63,0xfd,0x38,0x56,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vreducepd $0x7b,-1032(%rdx){1to4}, %ymm25
+
+// CHECK: vreduceps $171, %xmm23, %xmm20
+// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x56,0xe7,0xab]
+ vreduceps $0xab, %xmm23, %xmm20
+
+// CHECK: vreduceps $171, %xmm23, %xmm20 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7d,0x0f,0x56,0xe7,0xab]
+ vreduceps $0xab, %xmm23, %xmm20 {%k7}
+
+// CHECK: vreduceps $171, %xmm23, %xmm20 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7d,0x8f,0x56,0xe7,0xab]
+ vreduceps $0xab, %xmm23, %xmm20 {%k7} {z}
+
+// CHECK: vreduceps $123, %xmm23, %xmm20
+// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x56,0xe7,0x7b]
+ vreduceps $0x7b, %xmm23, %xmm20
+
+// CHECK: vreduceps $123, (%rcx), %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0x21,0x7b]
+ vreduceps $0x7b,(%rcx), %xmm20
+
+// CHECK: vreduceps $123, 291(%rax,%r14,8), %xmm20
+// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x56,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vreduceps $0x7b,291(%rax,%r14,8), %xmm20
+
+// CHECK: vreduceps $123, (%rcx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0x21,0x7b]
+ vreduceps $0x7b,(%rcx){1to4}, %xmm20
+
+// CHECK: vreduceps $123, 2032(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0x62,0x7f,0x7b]
+ vreduceps $0x7b,2032(%rdx), %xmm20
+
+// CHECK: vreduceps $123, 2048(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vreduceps $0x7b,2048(%rdx), %xmm20
+
+// CHECK: vreduceps $123, -2048(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0x62,0x80,0x7b]
+ vreduceps $0x7b,-2048(%rdx), %xmm20
+
+// CHECK: vreduceps $123, -2064(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x56,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vreduceps $0x7b,-2064(%rdx), %xmm20
+
+// CHECK: vreduceps $123, 508(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0x62,0x7f,0x7b]
+ vreduceps $0x7b,508(%rdx){1to4}, %xmm20
+
+// CHECK: vreduceps $123, 512(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vreduceps $0x7b,512(%rdx){1to4}, %xmm20
+
+// CHECK: vreduceps $123, -512(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0x62,0x80,0x7b]
+ vreduceps $0x7b,-512(%rdx){1to4}, %xmm20
+
+// CHECK: vreduceps $123, -516(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x56,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vreduceps $0x7b,-516(%rdx){1to4}, %xmm20
+
+// CHECK: vreduceps $171, %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0xd6,0xab]
+ vreduceps $0xab, %ymm22, %ymm26
+
+// CHECK: vreduceps $171, %ymm22, %ymm26 {%k6}
+// CHECK: encoding: [0x62,0x23,0x7d,0x2e,0x56,0xd6,0xab]
+ vreduceps $0xab, %ymm22, %ymm26 {%k6}
+
+// CHECK: vreduceps $171, %ymm22, %ymm26 {%k6} {z}
+// CHECK: encoding: [0x62,0x23,0x7d,0xae,0x56,0xd6,0xab]
+ vreduceps $0xab, %ymm22, %ymm26 {%k6} {z}
+
+// CHECK: vreduceps $123, %ymm22, %ymm26
+// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0xd6,0x7b]
+ vreduceps $0x7b, %ymm22, %ymm26
+
+// CHECK: vreduceps $123, (%rcx), %ymm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x11,0x7b]
+ vreduceps $0x7b,(%rcx), %ymm26
+
+// CHECK: vreduceps $123, 291(%rax,%r14,8), %ymm26
+// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x56,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vreduceps $0x7b,291(%rax,%r14,8), %ymm26
+
+// CHECK: vreduceps $123, (%rcx){1to8}, %ymm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x11,0x7b]
+ vreduceps $0x7b,(%rcx){1to8}, %ymm26
+
+// CHECK: vreduceps $123, 4064(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x52,0x7f,0x7b]
+ vreduceps $0x7b,4064(%rdx), %ymm26
+
+// CHECK: vreduceps $123, 4096(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x92,0x00,0x10,0x00,0x00,0x7b]
+ vreduceps $0x7b,4096(%rdx), %ymm26
+
+// CHECK: vreduceps $123, -4096(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x52,0x80,0x7b]
+ vreduceps $0x7b,-4096(%rdx), %ymm26
+
+// CHECK: vreduceps $123, -4128(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x56,0x92,0xe0,0xef,0xff,0xff,0x7b]
+ vreduceps $0x7b,-4128(%rdx), %ymm26
+
+// CHECK: vreduceps $123, 508(%rdx){1to8}, %ymm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x52,0x7f,0x7b]
+ vreduceps $0x7b,508(%rdx){1to8}, %ymm26
+
+// CHECK: vreduceps $123, 512(%rdx){1to8}, %ymm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x92,0x00,0x02,0x00,0x00,0x7b]
+ vreduceps $0x7b,512(%rdx){1to8}, %ymm26
+
+// CHECK: vreduceps $123, -512(%rdx){1to8}, %ymm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x52,0x80,0x7b]
+ vreduceps $0x7b,-512(%rdx){1to8}, %ymm26
+
+// CHECK: vreduceps $123, -516(%rdx){1to8}, %ymm26
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x56,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+ vreduceps $0x7b,-516(%rdx){1to8}, %ymm26
+
// CHECK: vcvtpd2qq %xmm22, %xmm24
// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x7b,0xc6]
vcvtpd2qq %xmm22, %xmm24
@@ -2880,6 +3360,118 @@
// CHECK: encoding: [0x62,0x61,0xfc,0x38,0x5b,0x9a,0xf8,0xfb,0xff,0xff]
vcvtqq2ps -1032(%rdx){1to4}, %xmm27
+// CHECK: vcvtqq2ps %xmm26, %xmm30
+// CHECK: encoding: [0x62,0x01,0xfc,0x08,0x5b,0xf2]
+ vcvtqq2ps %xmm26, %xmm30
+
+// CHECK: vcvtqq2ps %xmm26, %xmm30 {%k4}
+// CHECK: encoding: [0x62,0x01,0xfc,0x0c,0x5b,0xf2]
+ vcvtqq2ps %xmm26, %xmm30 {%k4}
+
+// CHECK: vcvtqq2ps %xmm26, %xmm30 {%k4} {z}
+// CHECK: encoding: [0x62,0x01,0xfc,0x8c,0x5b,0xf2]
+ vcvtqq2ps %xmm26, %xmm30 {%k4} {z}
+
+// CHECK: vcvtqq2psx (%rcx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xfc,0x08,0x5b,0x31]
+ vcvtqq2psx (%rcx), %xmm30
+
+// CHECK: vcvtqq2psx 4660(%rax,%r14,8), %xmm30
+// CHECK: encoding: [0x62,0x21,0xfc,0x08,0x5b,0xb4,0xf0,0x34,0x12,0x00,0x00]
+ vcvtqq2psx 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vcvtqq2ps (%rcx){1to2}, %xmm30
+// CHECK: encoding: [0x62,0x61,0xfc,0x18,0x5b,0x31]
+ vcvtqq2ps (%rcx){1to2}, %xmm30
+
+// CHECK: vcvtqq2psx 2032(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xfc,0x08,0x5b,0x72,0x7f]
+ vcvtqq2psx 2032(%rdx), %xmm30
+
+// CHECK: vcvtqq2psx 2048(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xfc,0x08,0x5b,0xb2,0x00,0x08,0x00,0x00]
+ vcvtqq2psx 2048(%rdx), %xmm30
+
+// CHECK: vcvtqq2psx -2048(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xfc,0x08,0x5b,0x72,0x80]
+ vcvtqq2psx -2048(%rdx), %xmm30
+
+// CHECK: vcvtqq2psx -2064(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xfc,0x08,0x5b,0xb2,0xf0,0xf7,0xff,0xff]
+ vcvtqq2psx -2064(%rdx), %xmm30
+
+// CHECK: vcvtqq2ps 1016(%rdx){1to2}, %xmm30
+// CHECK: encoding: [0x62,0x61,0xfc,0x18,0x5b,0x72,0x7f]
+ vcvtqq2ps 1016(%rdx){1to2}, %xmm30
+
+// CHECK: vcvtqq2ps 1024(%rdx){1to2}, %xmm30
+// CHECK: encoding: [0x62,0x61,0xfc,0x18,0x5b,0xb2,0x00,0x04,0x00,0x00]
+ vcvtqq2ps 1024(%rdx){1to2}, %xmm30
+
+// CHECK: vcvtqq2ps -1024(%rdx){1to2}, %xmm30
+// CHECK: encoding: [0x62,0x61,0xfc,0x18,0x5b,0x72,0x80]
+ vcvtqq2ps -1024(%rdx){1to2}, %xmm30
+
+// CHECK: vcvtqq2ps -1032(%rdx){1to2}, %xmm30
+// CHECK: encoding: [0x62,0x61,0xfc,0x18,0x5b,0xb2,0xf8,0xfb,0xff,0xff]
+ vcvtqq2ps -1032(%rdx){1to2}, %xmm30
+
+// CHECK: vcvtqq2ps %ymm28, %xmm20
+// CHECK: encoding: [0x62,0x81,0xfc,0x28,0x5b,0xe4]
+ vcvtqq2ps %ymm28, %xmm20
+
+// CHECK: vcvtqq2ps %ymm28, %xmm20 {%k3}
+// CHECK: encoding: [0x62,0x81,0xfc,0x2b,0x5b,0xe4]
+ vcvtqq2ps %ymm28, %xmm20 {%k3}
+
+// CHECK: vcvtqq2ps %ymm28, %xmm20 {%k3} {z}
+// CHECK: encoding: [0x62,0x81,0xfc,0xab,0x5b,0xe4]
+ vcvtqq2ps %ymm28, %xmm20 {%k3} {z}
+
+// CHECK: vcvtqq2psy (%rcx), %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfc,0x28,0x5b,0x21]
+ vcvtqq2psy (%rcx), %xmm20
+
+// CHECK: vcvtqq2psy 4660(%rax,%r14,8), %xmm20
+// CHECK: encoding: [0x62,0xa1,0xfc,0x28,0x5b,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vcvtqq2psy 4660(%rax,%r14,8), %xmm20
+
+// CHECK: vcvtqq2ps (%rcx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfc,0x38,0x5b,0x21]
+ vcvtqq2ps (%rcx){1to4}, %xmm20
+
+// CHECK: vcvtqq2psy 4064(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfc,0x28,0x5b,0x62,0x7f]
+ vcvtqq2psy 4064(%rdx), %xmm20
+
+// CHECK: vcvtqq2psy 4096(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfc,0x28,0x5b,0xa2,0x00,0x10,0x00,0x00]
+ vcvtqq2psy 4096(%rdx), %xmm20
+
+// CHECK: vcvtqq2psy -4096(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfc,0x28,0x5b,0x62,0x80]
+ vcvtqq2psy -4096(%rdx), %xmm20
+
+// CHECK: vcvtqq2psy -4128(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfc,0x28,0x5b,0xa2,0xe0,0xef,0xff,0xff]
+ vcvtqq2psy -4128(%rdx), %xmm20
+
+// CHECK: vcvtqq2ps 1016(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfc,0x38,0x5b,0x62,0x7f]
+ vcvtqq2ps 1016(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtqq2ps 1024(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfc,0x38,0x5b,0xa2,0x00,0x04,0x00,0x00]
+ vcvtqq2ps 1024(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtqq2ps -1024(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfc,0x38,0x5b,0x62,0x80]
+ vcvtqq2ps -1024(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtqq2ps -1032(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfc,0x38,0x5b,0xa2,0xf8,0xfb,0xff,0xff]
+ vcvtqq2ps -1032(%rdx){1to4}, %xmm20
+
// CHECK: vcvtuqq2pd %xmm20, %xmm19
// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x7a,0xdc]
vcvtuqq2pd %xmm20, %xmm19
@@ -3104,3 +3696,1131 @@
// CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0xa2,0xf8,0xfb,0xff,0xff]
vcvtuqq2ps -1032(%rdx){1to4}, %xmm28
+// CHECK: vcvtuqq2ps %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x7a,0xee]
+ vcvtuqq2ps %xmm22, %xmm21
+
+// CHECK: vcvtuqq2ps %xmm22, %xmm21 {%k6}
+// CHECK: encoding: [0x62,0xa1,0xff,0x0e,0x7a,0xee]
+ vcvtuqq2ps %xmm22, %xmm21 {%k6}
+
+// CHECK: vcvtuqq2ps %xmm22, %xmm21 {%k6} {z}
+// CHECK: encoding: [0x62,0xa1,0xff,0x8e,0x7a,0xee]
+ vcvtuqq2ps %xmm22, %xmm21 {%k6} {z}
+
+// CHECK: vcvtuqq2psx (%rcx), %xmm21
+// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x7a,0x29]
+ vcvtuqq2psx (%rcx), %xmm21
+
+// CHECK: vcvtuqq2psx 4660(%rax,%r14,8), %xmm21
+// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x7a,0xac,0xf0,0x34,0x12,0x00,0x00]
+ vcvtuqq2psx 4660(%rax,%r14,8), %xmm21
+
+// CHECK: vcvtuqq2ps (%rcx){1to2}, %xmm21
+// CHECK: encoding: [0x62,0xe1,0xff,0x18,0x7a,0x29]
+ vcvtuqq2ps (%rcx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2psx 2032(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x7a,0x6a,0x7f]
+ vcvtuqq2psx 2032(%rdx), %xmm21
+
+// CHECK: vcvtuqq2psx 2048(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x7a,0xaa,0x00,0x08,0x00,0x00]
+ vcvtuqq2psx 2048(%rdx), %xmm21
+
+// CHECK: vcvtuqq2psx -2048(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x7a,0x6a,0x80]
+ vcvtuqq2psx -2048(%rdx), %xmm21
+
+// CHECK: vcvtuqq2psx -2064(%rdx), %xmm21
+// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x7a,0xaa,0xf0,0xf7,0xff,0xff]
+ vcvtuqq2psx -2064(%rdx), %xmm21
+
+// CHECK: vcvtuqq2ps 1016(%rdx){1to2}, %xmm21
+// CHECK: encoding: [0x62,0xe1,0xff,0x18,0x7a,0x6a,0x7f]
+ vcvtuqq2ps 1016(%rdx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2ps 1024(%rdx){1to2}, %xmm21
+// CHECK: encoding: [0x62,0xe1,0xff,0x18,0x7a,0xaa,0x00,0x04,0x00,0x00]
+ vcvtuqq2ps 1024(%rdx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2ps -1024(%rdx){1to2}, %xmm21
+// CHECK: encoding: [0x62,0xe1,0xff,0x18,0x7a,0x6a,0x80]
+ vcvtuqq2ps -1024(%rdx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2ps -1032(%rdx){1to2}, %xmm21
+// CHECK: encoding: [0x62,0xe1,0xff,0x18,0x7a,0xaa,0xf8,0xfb,0xff,0xff]
+ vcvtuqq2ps -1032(%rdx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2ps %ymm17, %xmm26
+// CHECK: encoding: [0x62,0x21,0xff,0x28,0x7a,0xd1]
+ vcvtuqq2ps %ymm17, %xmm26
+
+// CHECK: vcvtuqq2ps %ymm17, %xmm26 {%k4}
+// CHECK: encoding: [0x62,0x21,0xff,0x2c,0x7a,0xd1]
+ vcvtuqq2ps %ymm17, %xmm26 {%k4}
+
+// CHECK: vcvtuqq2ps %ymm17, %xmm26 {%k4} {z}
+// CHECK: encoding: [0x62,0x21,0xff,0xac,0x7a,0xd1]
+ vcvtuqq2ps %ymm17, %xmm26 {%k4} {z}
+
+// CHECK: vcvtuqq2psy (%rcx), %xmm26
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0x7a,0x11]
+ vcvtuqq2psy (%rcx), %xmm26
+
+// CHECK: vcvtuqq2psy 4660(%rax,%r14,8), %xmm26
+// CHECK: encoding: [0x62,0x21,0xff,0x28,0x7a,0x94,0xf0,0x34,0x12,0x00,0x00]
+ vcvtuqq2psy 4660(%rax,%r14,8), %xmm26
+
+// CHECK: vcvtuqq2ps (%rcx){1to4}, %xmm26
+// CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0x11]
+ vcvtuqq2ps (%rcx){1to4}, %xmm26
+
+// CHECK: vcvtuqq2psy 4064(%rdx), %xmm26
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0x7a,0x52,0x7f]
+ vcvtuqq2psy 4064(%rdx), %xmm26
+
+// CHECK: vcvtuqq2psy 4096(%rdx), %xmm26
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0x7a,0x92,0x00,0x10,0x00,0x00]
+ vcvtuqq2psy 4096(%rdx), %xmm26
+
+// CHECK: vcvtuqq2psy -4096(%rdx), %xmm26
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0x7a,0x52,0x80]
+ vcvtuqq2psy -4096(%rdx), %xmm26
+
+// CHECK: vcvtuqq2psy -4128(%rdx), %xmm26
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0x7a,0x92,0xe0,0xef,0xff,0xff]
+ vcvtuqq2psy -4128(%rdx), %xmm26
+
+// CHECK: vcvtuqq2ps 1016(%rdx){1to4}, %xmm26
+// CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0x52,0x7f]
+ vcvtuqq2ps 1016(%rdx){1to4}, %xmm26
+
+// CHECK: vcvtuqq2ps 1024(%rdx){1to4}, %xmm26
+// CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0x92,0x00,0x04,0x00,0x00]
+ vcvtuqq2ps 1024(%rdx){1to4}, %xmm26
+
+// CHECK: vcvtuqq2ps -1024(%rdx){1to4}, %xmm26
+// CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0x52,0x80]
+ vcvtuqq2ps -1024(%rdx){1to4}, %xmm26
+
+// CHECK: vcvtuqq2ps -1032(%rdx){1to4}, %xmm26
+// CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0x92,0xf8,0xfb,0xff,0xff]
+ vcvtuqq2ps -1032(%rdx){1to4}, %xmm26
+
+// CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xa5,0x20,0x18,0xef,0xab]
+ vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21 {%k7}
+// CHECK: encoding: [0x62,0xa3,0xa5,0x27,0x18,0xef,0xab]
+ vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21 {%k7}
+
+// CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0xa5,0xa7,0x18,0xef,0xab]
+ vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21 {%k7} {z}
+
+// CHECK: vinsertf64x2 $123, %xmm23, %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xa5,0x20,0x18,0xef,0x7b]
+ vinsertf64x2 $0x7b, %xmm23, %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, (%rcx), %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0x29,0x7b]
+ vinsertf64x2 $0x7b,(%rcx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, 291(%rax,%r14,8), %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xa5,0x20,0x18,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,291(%rax,%r14,8), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, 2032(%rdx), %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0x6a,0x7f,0x7b]
+ vinsertf64x2 $0x7b,2032(%rdx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, 2048(%rdx), %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0xaa,0x00,0x08,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,2048(%rdx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, -2048(%rdx), %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0x6a,0x80,0x7b]
+ vinsertf64x2 $0x7b,-2048(%rdx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, -2064(%rdx), %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+ vinsertf64x2 $0x7b,-2064(%rdx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $171, %xmm27, %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x03,0xc5,0x20,0x18,0xc3,0xab]
+ vinsertf64x2 $0xab, %xmm27, %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $171, %xmm27, %ymm23, %ymm24 {%k5}
+// CHECK: encoding: [0x62,0x03,0xc5,0x25,0x18,0xc3,0xab]
+ vinsertf64x2 $0xab, %xmm27, %ymm23, %ymm24 {%k5}
+
+// CHECK: vinsertf64x2 $171, %xmm27, %ymm23, %ymm24 {%k5} {z}
+// CHECK: encoding: [0x62,0x03,0xc5,0xa5,0x18,0xc3,0xab]
+ vinsertf64x2 $0xab, %xmm27, %ymm23, %ymm24 {%k5} {z}
+
+// CHECK: vinsertf64x2 $123, %xmm27, %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x03,0xc5,0x20,0x18,0xc3,0x7b]
+ vinsertf64x2 $0x7b, %xmm27, %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, (%rcx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x01,0x7b]
+ vinsertf64x2 $0x7b,(%rcx), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, 4660(%rax,%r14,8), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x23,0xc5,0x20,0x18,0x84,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,4660(%rax,%r14,8), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, 2032(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x42,0x7f,0x7b]
+ vinsertf64x2 $0x7b,2032(%rdx), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, 2048(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x82,0x00,0x08,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,2048(%rdx), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, -2048(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x42,0x80,0x7b]
+ vinsertf64x2 $0x7b,-2048(%rdx), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, -2064(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x82,0xf0,0xf7,0xff,0xff,0x7b]
+ vinsertf64x2 $0x7b,-2064(%rdx), %ymm23, %ymm24
+
+// CHECK: vinserti64x2 $171, %xmm21, %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xa3,0xb5,0x20,0x38,0xdd,0xab]
+ vinserti64x2 $0xab, %xmm21, %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $171, %xmm21, %ymm25, %ymm19 {%k6}
+// CHECK: encoding: [0x62,0xa3,0xb5,0x26,0x38,0xdd,0xab]
+ vinserti64x2 $0xab, %xmm21, %ymm25, %ymm19 {%k6}
+
+// CHECK: vinserti64x2 $171, %xmm21, %ymm25, %ymm19 {%k6} {z}
+// CHECK: encoding: [0x62,0xa3,0xb5,0xa6,0x38,0xdd,0xab]
+ vinserti64x2 $0xab, %xmm21, %ymm25, %ymm19 {%k6} {z}
+
+// CHECK: vinserti64x2 $123, %xmm21, %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xa3,0xb5,0x20,0x38,0xdd,0x7b]
+ vinserti64x2 $0x7b, %xmm21, %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, (%rcx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x19,0x7b]
+ vinserti64x2 $0x7b,(%rcx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, 291(%rax,%r14,8), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xa3,0xb5,0x20,0x38,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,291(%rax,%r14,8), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, 2032(%rdx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x5a,0x7f,0x7b]
+ vinserti64x2 $0x7b,2032(%rdx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, 2048(%rdx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,2048(%rdx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, -2048(%rdx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x5a,0x80,0x7b]
+ vinserti64x2 $0x7b,-2048(%rdx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, -2064(%rdx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vinserti64x2 $0x7b,-2064(%rdx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $171, %xmm25, %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x03,0xbd,0x20,0x38,0xe9,0xab]
+ vinserti64x2 $0xab, %xmm25, %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $171, %xmm25, %ymm24, %ymm29 {%k2}
+// CHECK: encoding: [0x62,0x03,0xbd,0x22,0x38,0xe9,0xab]
+ vinserti64x2 $0xab, %xmm25, %ymm24, %ymm29 {%k2}
+
+// CHECK: vinserti64x2 $171, %xmm25, %ymm24, %ymm29 {%k2} {z}
+// CHECK: encoding: [0x62,0x03,0xbd,0xa2,0x38,0xe9,0xab]
+ vinserti64x2 $0xab, %xmm25, %ymm24, %ymm29 {%k2} {z}
+
+// CHECK: vinserti64x2 $123, %xmm25, %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x03,0xbd,0x20,0x38,0xe9,0x7b]
+ vinserti64x2 $0x7b, %xmm25, %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, (%rcx), %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0x29,0x7b]
+ vinserti64x2 $0x7b,(%rcx), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, 4660(%rax,%r14,8), %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x23,0xbd,0x20,0x38,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,4660(%rax,%r14,8), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, 2032(%rdx), %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0x6a,0x7f,0x7b]
+ vinserti64x2 $0x7b,2032(%rdx), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, 2048(%rdx), %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0xaa,0x00,0x08,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,2048(%rdx), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, -2048(%rdx), %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0x6a,0x80,0x7b]
+ vinserti64x2 $0x7b,-2048(%rdx), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, -2064(%rdx), %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+ vinserti64x2 $0x7b,-2064(%rdx), %ymm24, %ymm29
+
+// CHECK: vextractf64x2 $171, %ymm21, %xmm27
+// CHECK: encoding: [0x62,0x83,0xfd,0x28,0x19,0xeb,0xab]
+ vextractf64x2 $0xab, %ymm21, %xmm27
+
+// CHECK: vextractf64x2 $171, %ymm21, %xmm27 {%k7}
+// CHECK: encoding: [0x62,0x83,0xfd,0x2f,0x19,0xeb,0xab]
+ vextractf64x2 $0xab, %ymm21, %xmm27 {%k7}
+
+// CHECK: vextractf64x2 $171, %ymm21, %xmm27 {%k7} {z}
+// CHECK: encoding: [0x62,0x83,0xfd,0xaf,0x19,0xeb,0xab]
+ vextractf64x2 $0xab, %ymm21, %xmm27 {%k7} {z}
+
+// CHECK: vextractf64x2 $123, %ymm21, %xmm27
+// CHECK: encoding: [0x62,0x83,0xfd,0x28,0x19,0xeb,0x7b]
+ vextractf64x2 $0x7b, %ymm21, %xmm27
+
+// CHECK: vextractf64x2 $171, %ymm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x19,0x21,0xab]
+ vextractf64x2 $0xab, %ymm20,(%rcx)
+
+// CHECK: vextractf64x2 $171, %ymm20, (%rcx) {%k1}
+// CHECK: encoding: [0x62,0xe3,0xfd,0x29,0x19,0x21,0xab]
+ vextractf64x2 $0xab, %ymm20,(%rcx) {%k1}
+
+// CHECK: vextractf64x2 $123, %ymm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x19,0x21,0x7b]
+ vextractf64x2 $0x7b, %ymm20,(%rcx)
+
+// CHECK: vextractf64x2 $123, %ymm20, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x19,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vextractf64x2 $0x7b, %ymm20,291(%rax,%r14,8)
+
+// CHECK: vextractf64x2 $123, %ymm20, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x19,0x62,0x7f,0x7b]
+ vextractf64x2 $0x7b, %ymm20,2032(%rdx)
+
+// CHECK: vextractf64x2 $123, %ymm20, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x19,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vextractf64x2 $0x7b, %ymm20,2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %ymm20, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x19,0x62,0x80,0x7b]
+ vextractf64x2 $0x7b, %ymm20,-2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %ymm20, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x19,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vextractf64x2 $0x7b, %ymm20,-2064(%rdx)
+
+// CHECK: vextractf64x2 $171, %ymm26, %xmm28
+// CHECK: encoding: [0x62,0x03,0xfd,0x28,0x19,0xd4,0xab]
+ vextractf64x2 $0xab, %ymm26, %xmm28
+
+// CHECK: vextractf64x2 $171, %ymm26, %xmm28 {%k4}
+// CHECK: encoding: [0x62,0x03,0xfd,0x2c,0x19,0xd4,0xab]
+ vextractf64x2 $0xab, %ymm26, %xmm28 {%k4}
+
+// CHECK: vextractf64x2 $171, %ymm26, %xmm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x03,0xfd,0xac,0x19,0xd4,0xab]
+ vextractf64x2 $0xab, %ymm26, %xmm28 {%k4} {z}
+
+// CHECK: vextractf64x2 $123, %ymm26, %xmm28
+// CHECK: encoding: [0x62,0x03,0xfd,0x28,0x19,0xd4,0x7b]
+ vextractf64x2 $0x7b, %ymm26, %xmm28
+
+// CHECK: vextractf64x2 $171, %ymm17, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x19,0x09,0xab]
+ vextractf64x2 $0xab, %ymm17,(%rcx)
+
+// CHECK: vextractf64x2 $171, %ymm17, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe3,0xfd,0x2a,0x19,0x09,0xab]
+ vextractf64x2 $0xab, %ymm17,(%rcx) {%k2}
+
+// CHECK: vextractf64x2 $123, %ymm17, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x19,0x09,0x7b]
+ vextractf64x2 $0x7b, %ymm17,(%rcx)
+
+// CHECK: vextractf64x2 $123, %ymm17, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x19,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vextractf64x2 $0x7b, %ymm17,4660(%rax,%r14,8)
+
+// CHECK: vextractf64x2 $123, %ymm17, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x19,0x4a,0x7f,0x7b]
+ vextractf64x2 $0x7b, %ymm17,2032(%rdx)
+
+// CHECK: vextractf64x2 $123, %ymm17, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x19,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vextractf64x2 $0x7b, %ymm17,2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %ymm17, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x19,0x4a,0x80,0x7b]
+ vextractf64x2 $0x7b, %ymm17,-2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %ymm17, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x19,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vextractf64x2 $0x7b, %ymm17,-2064(%rdx)
+
+// CHECK: vextracti64x2 $171, %ymm24, %xmm29
+// CHECK: encoding: [0x62,0x03,0xfd,0x28,0x39,0xc5,0xab]
+ vextracti64x2 $0xab, %ymm24, %xmm29
+
+// CHECK: vextracti64x2 $171, %ymm24, %xmm29 {%k7}
+// CHECK: encoding: [0x62,0x03,0xfd,0x2f,0x39,0xc5,0xab]
+ vextracti64x2 $0xab, %ymm24, %xmm29 {%k7}
+
+// CHECK: vextracti64x2 $171, %ymm24, %xmm29 {%k7} {z}
+// CHECK: encoding: [0x62,0x03,0xfd,0xaf,0x39,0xc5,0xab]
+ vextracti64x2 $0xab, %ymm24, %xmm29 {%k7} {z}
+
+// CHECK: vextracti64x2 $123, %ymm24, %xmm29
+// CHECK: encoding: [0x62,0x03,0xfd,0x28,0x39,0xc5,0x7b]
+ vextracti64x2 $0x7b, %ymm24, %xmm29
+
+// CHECK: vextracti64x2 $171, %ymm17, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x39,0x09,0xab]
+ vextracti64x2 $0xab, %ymm17,(%rcx)
+
+// CHECK: vextracti64x2 $171, %ymm17, (%rcx) {%k1}
+// CHECK: encoding: [0x62,0xe3,0xfd,0x29,0x39,0x09,0xab]
+ vextracti64x2 $0xab, %ymm17,(%rcx) {%k1}
+
+// CHECK: vextracti64x2 $123, %ymm17, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x39,0x09,0x7b]
+ vextracti64x2 $0x7b, %ymm17,(%rcx)
+
+// CHECK: vextracti64x2 $123, %ymm17, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x39,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vextracti64x2 $0x7b, %ymm17,291(%rax,%r14,8)
+
+// CHECK: vextracti64x2 $123, %ymm17, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x39,0x4a,0x7f,0x7b]
+ vextracti64x2 $0x7b, %ymm17,2032(%rdx)
+
+// CHECK: vextracti64x2 $123, %ymm17, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x39,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vextracti64x2 $0x7b, %ymm17,2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %ymm17, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x39,0x4a,0x80,0x7b]
+ vextracti64x2 $0x7b, %ymm17,-2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %ymm17, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x39,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vextracti64x2 $0x7b, %ymm17,-2064(%rdx)
+
+// CHECK: vextracti64x2 $171, %ymm17, %xmm29
+// CHECK: encoding: [0x62,0x83,0xfd,0x28,0x39,0xcd,0xab]
+ vextracti64x2 $0xab, %ymm17, %xmm29
+
+// CHECK: vextracti64x2 $171, %ymm17, %xmm29 {%k5}
+// CHECK: encoding: [0x62,0x83,0xfd,0x2d,0x39,0xcd,0xab]
+ vextracti64x2 $0xab, %ymm17, %xmm29 {%k5}
+
+// CHECK: vextracti64x2 $171, %ymm17, %xmm29 {%k5} {z}
+// CHECK: encoding: [0x62,0x83,0xfd,0xad,0x39,0xcd,0xab]
+ vextracti64x2 $0xab, %ymm17, %xmm29 {%k5} {z}
+
+// CHECK: vextracti64x2 $123, %ymm17, %xmm29
+// CHECK: encoding: [0x62,0x83,0xfd,0x28,0x39,0xcd,0x7b]
+ vextracti64x2 $0x7b, %ymm17, %xmm29
+
+// CHECK: vextracti64x2 $171, %ymm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x39,0x21,0xab]
+ vextracti64x2 $0xab, %ymm20,(%rcx)
+
+// CHECK: vextracti64x2 $171, %ymm20, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe3,0xfd,0x2a,0x39,0x21,0xab]
+ vextracti64x2 $0xab, %ymm20,(%rcx) {%k2}
+
+// CHECK: vextracti64x2 $123, %ymm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x39,0x21,0x7b]
+ vextracti64x2 $0x7b, %ymm20,(%rcx)
+
+// CHECK: vextracti64x2 $123, %ymm20, 4660(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x39,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vextracti64x2 $0x7b, %ymm20,4660(%rax,%r14,8)
+
+// CHECK: vextracti64x2 $123, %ymm20, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x39,0x62,0x7f,0x7b]
+ vextracti64x2 $0x7b, %ymm20,2032(%rdx)
+
+// CHECK: vextracti64x2 $123, %ymm20, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x39,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vextracti64x2 $0x7b, %ymm20,2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %ymm20, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x39,0x62,0x80,0x7b]
+ vextracti64x2 $0x7b, %ymm20,-2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %ymm20, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x39,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vextracti64x2 $0x7b, %ymm20,-2064(%rdx)
+
+// CHECK: vfpclasspd $171, %xmm18, %k2
+// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x66,0xd2,0xab]
+ vfpclasspd $0xab, %xmm18, %k2
+
+// CHECK: vfpclasspd $171, %xmm18, %k2 {%k7}
+// CHECK: encoding: [0x62,0xb3,0xfd,0x0f,0x66,0xd2,0xab]
+ vfpclasspd $0xab, %xmm18, %k2 {%k7}
+
+// CHECK: vfpclasspd $123, %xmm18, %k2
+// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x66,0xd2,0x7b]
+ vfpclasspd $0x7b, %xmm18, %k2
+
+// CHECK: vfpclasspdx $123, (%rcx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x66,0x11,0x7b]
+ vfpclasspdx $0x7b,(%rcx), %k2
+
+// CHECK: vfpclasspdx $123, 291(%rax,%r14,8), %k2
+// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x66,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspdx $0x7b,291(%rax,%r14,8), %k2
+
+// CHECK: vfpclasspdq $123, (%rcx){1to2}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x18,0x66,0x11,0x7b]
+ vfpclasspdq $0x7b,(%rcx){1to2}, %k2
+
+// CHECK: vfpclasspdx $123, 2032(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x66,0x52,0x7f,0x7b]
+ vfpclasspdx $0x7b,2032(%rdx), %k2
+
+// CHECK: vfpclasspdx $123, 2048(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x66,0x92,0x00,0x08,0x00,0x00,0x7b]
+ vfpclasspdx $0x7b,2048(%rdx), %k2
+
+// CHECK: vfpclasspdx $123, -2048(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x66,0x52,0x80,0x7b]
+ vfpclasspdx $0x7b,-2048(%rdx), %k2
+
+// CHECK: vfpclasspdx $123, -2064(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x66,0x92,0xf0,0xf7,0xff,0xff,0x7b]
+ vfpclasspdx $0x7b,-2064(%rdx), %k2
+
+// CHECK: vfpclasspdq $123, 1016(%rdx){1to2}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x18,0x66,0x52,0x7f,0x7b]
+ vfpclasspdq $0x7b,1016(%rdx){1to2}, %k2
+
+// CHECK: vfpclasspdq $123, 1024(%rdx){1to2}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x18,0x66,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vfpclasspdq $0x7b,1024(%rdx){1to2}, %k2
+
+// CHECK: vfpclasspdq $123, -1024(%rdx){1to2}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x18,0x66,0x52,0x80,0x7b]
+ vfpclasspdq $0x7b,-1024(%rdx){1to2}, %k2
+
+// CHECK: vfpclasspdq $123, -1032(%rdx){1to2}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x18,0x66,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vfpclasspdq $0x7b,-1032(%rdx){1to2}, %k2
+
+// CHECK: vfpclasspd $171, %ymm25, %k4
+// CHECK: encoding: [0x62,0x93,0xfd,0x28,0x66,0xe1,0xab]
+ vfpclasspd $0xab, %ymm25, %k4
+
+// CHECK: vfpclasspd $171, %ymm25, %k4 {%k6}
+// CHECK: encoding: [0x62,0x93,0xfd,0x2e,0x66,0xe1,0xab]
+ vfpclasspd $0xab, %ymm25, %k4 {%k6}
+
+// CHECK: vfpclasspd $123, %ymm25, %k4
+// CHECK: encoding: [0x62,0x93,0xfd,0x28,0x66,0xe1,0x7b]
+ vfpclasspd $0x7b, %ymm25, %k4
+
+// CHECK: vfpclasspdy $123, (%rcx), %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x28,0x66,0x21,0x7b]
+ vfpclasspdy $0x7b,(%rcx), %k4
+
+// CHECK: vfpclasspdy $123, 291(%rax,%r14,8), %k4
+// CHECK: encoding: [0x62,0xb3,0xfd,0x28,0x66,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspdy $0x7b,291(%rax,%r14,8), %k4
+
+// CHECK: vfpclasspdq $123, (%rcx){1to4}, %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x38,0x66,0x21,0x7b]
+ vfpclasspdq $0x7b,(%rcx){1to4}, %k4
+
+// CHECK: vfpclasspdy $123, 4064(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x28,0x66,0x62,0x7f,0x7b]
+ vfpclasspdy $0x7b,4064(%rdx), %k4
+
+// CHECK: vfpclasspdy $123, 4096(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x28,0x66,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vfpclasspdy $0x7b,4096(%rdx), %k4
+
+// CHECK: vfpclasspdy $123, -4096(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x28,0x66,0x62,0x80,0x7b]
+ vfpclasspdy $0x7b,-4096(%rdx), %k4
+
+// CHECK: vfpclasspdy $123, -4128(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x28,0x66,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vfpclasspdy $0x7b,-4128(%rdx), %k4
+
+// CHECK: vfpclasspdq $123, 1016(%rdx){1to4}, %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x38,0x66,0x62,0x7f,0x7b]
+ vfpclasspdq $0x7b,1016(%rdx){1to4}, %k4
+
+// CHECK: vfpclasspdq $123, 1024(%rdx){1to4}, %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x38,0x66,0xa2,0x00,0x04,0x00,0x00,0x7b]
+ vfpclasspdq $0x7b,1024(%rdx){1to4}, %k4
+
+// CHECK: vfpclasspdq $123, -1024(%rdx){1to4}, %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x38,0x66,0x62,0x80,0x7b]
+ vfpclasspdq $0x7b,-1024(%rdx){1to4}, %k4
+
+// CHECK: vfpclasspdq $123, -1032(%rdx){1to4}, %k4
+// CHECK: encoding: [0x62,0xf3,0xfd,0x38,0x66,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+ vfpclasspdq $0x7b,-1032(%rdx){1to4}, %k4
+
+// CHECK: vfpclassps $171, %xmm20, %k4
+// CHECK: encoding: [0x62,0xb3,0x7d,0x08,0x66,0xe4,0xab]
+ vfpclassps $0xab, %xmm20, %k4
+
+// CHECK: vfpclassps $171, %xmm20, %k4 {%k5}
+// CHECK: encoding: [0x62,0xb3,0x7d,0x0d,0x66,0xe4,0xab]
+ vfpclassps $0xab, %xmm20, %k4 {%k5}
+
+// CHECK: vfpclassps $123, %xmm20, %k4
+// CHECK: encoding: [0x62,0xb3,0x7d,0x08,0x66,0xe4,0x7b]
+ vfpclassps $0x7b, %xmm20, %k4
+
+// CHECK: vfpclasspsx $123, (%rcx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x66,0x21,0x7b]
+ vfpclasspsx $0x7b,(%rcx), %k4
+
+// CHECK: vfpclasspsx $123, 291(%rax,%r14,8), %k4
+// CHECK: encoding: [0x62,0xb3,0x7d,0x08,0x66,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspsx $0x7b,291(%rax,%r14,8), %k4
+
+// CHECK: vfpclasspsl $123, (%rcx){1to4}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x18,0x66,0x21,0x7b]
+ vfpclasspsl $0x7b,(%rcx){1to4}, %k4
+
+// CHECK: vfpclasspsx $123, 2032(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x66,0x62,0x7f,0x7b]
+ vfpclasspsx $0x7b,2032(%rdx), %k4
+
+// CHECK: vfpclasspsx $123, 2048(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x66,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vfpclasspsx $0x7b,2048(%rdx), %k4
+
+// CHECK: vfpclasspsx $123, -2048(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x66,0x62,0x80,0x7b]
+ vfpclasspsx $0x7b,-2048(%rdx), %k4
+
+// CHECK: vfpclasspsx $123, -2064(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x66,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vfpclasspsx $0x7b,-2064(%rdx), %k4
+
+// CHECK: vfpclasspsl $123, 508(%rdx){1to4}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x18,0x66,0x62,0x7f,0x7b]
+ vfpclasspsl $0x7b,508(%rdx){1to4}, %k4
+
+// CHECK: vfpclasspsl $123, 512(%rdx){1to4}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x18,0x66,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vfpclasspsl $0x7b,512(%rdx){1to4}, %k4
+
+// CHECK: vfpclasspsl $123, -512(%rdx){1to4}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x18,0x66,0x62,0x80,0x7b]
+ vfpclasspsl $0x7b,-512(%rdx){1to4}, %k4
+
+// CHECK: vfpclasspsl $123, -516(%rdx){1to4}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x18,0x66,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vfpclasspsl $0x7b,-516(%rdx){1to4}, %k4
+
+// CHECK: vfpclassps $171, %ymm17, %k4
+// CHECK: encoding: [0x62,0xb3,0x7d,0x28,0x66,0xe1,0xab]
+ vfpclassps $0xab, %ymm17, %k4
+
+// CHECK: vfpclassps $171, %ymm17, %k4 {%k5}
+// CHECK: encoding: [0x62,0xb3,0x7d,0x2d,0x66,0xe1,0xab]
+ vfpclassps $0xab, %ymm17, %k4 {%k5}
+
+// CHECK: vfpclassps $123, %ymm17, %k4
+// CHECK: encoding: [0x62,0xb3,0x7d,0x28,0x66,0xe1,0x7b]
+ vfpclassps $0x7b, %ymm17, %k4
+
+// CHECK: vfpclasspsy $123, (%rcx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x28,0x66,0x21,0x7b]
+ vfpclasspsy $0x7b,(%rcx), %k4
+
+// CHECK: vfpclasspsy $123, 291(%rax,%r14,8), %k4
+// CHECK: encoding: [0x62,0xb3,0x7d,0x28,0x66,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vfpclasspsy $0x7b,291(%rax,%r14,8), %k4
+
+// CHECK: vfpclasspsl $123, (%rcx){1to8}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x38,0x66,0x21,0x7b]
+ vfpclasspsl $0x7b,(%rcx){1to8}, %k4
+
+// CHECK: vfpclasspsy $123, 4064(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x28,0x66,0x62,0x7f,0x7b]
+ vfpclasspsy $0x7b,4064(%rdx), %k4
+
+// CHECK: vfpclasspsy $123, 4096(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x28,0x66,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vfpclasspsy $0x7b,4096(%rdx), %k4
+
+// CHECK: vfpclasspsy $123, -4096(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x28,0x66,0x62,0x80,0x7b]
+ vfpclasspsy $0x7b,-4096(%rdx), %k4
+
+// CHECK: vfpclasspsy $123, -4128(%rdx), %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x28,0x66,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vfpclasspsy $0x7b,-4128(%rdx), %k4
+
+// CHECK: vfpclasspsl $123, 508(%rdx){1to8}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x38,0x66,0x62,0x7f,0x7b]
+ vfpclasspsl $0x7b,508(%rdx){1to8}, %k4
+
+// CHECK: vfpclasspsl $123, 512(%rdx){1to8}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x38,0x66,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vfpclasspsl $0x7b,512(%rdx){1to8}, %k4
+
+// CHECK: vfpclasspsl $123, -512(%rdx){1to8}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x38,0x66,0x62,0x80,0x7b]
+ vfpclasspsl $0x7b,-512(%rdx){1to8}, %k4
+
+// CHECK: vfpclasspsl $123, -516(%rdx){1to8}, %k4
+// CHECK: encoding: [0x62,0xf3,0x7d,0x38,0x66,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vfpclasspsl $0x7b,-516(%rdx){1to8}, %k4
+
+// CHECK: vfpclasspd $171, %xmm26, %k3
+// CHECK: encoding: [0x62,0x93,0xfd,0x08,0x66,0xda,0xab]
+ vfpclasspd $0xab, %xmm26, %k3
+
+// CHECK: vfpclasspd $171, %xmm26, %k3 {%k5}
+// CHECK: encoding: [0x62,0x93,0xfd,0x0d,0x66,0xda,0xab]
+ vfpclasspd $0xab, %xmm26, %k3 {%k5}
+
+// CHECK: vfpclasspd $123, %xmm26, %k3
+// CHECK: encoding: [0x62,0x93,0xfd,0x08,0x66,0xda,0x7b]
+ vfpclasspd $0x7b, %xmm26, %k3
+
+// CHECK: vfpclasspdx $123, (%rcx), %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x66,0x19,0x7b]
+ vfpclasspdx $0x7b,(%rcx), %k3
+
+// CHECK: vfpclasspdx $123, 4660(%rax,%r14,8), %k3
+// CHECK: encoding: [0x62,0xb3,0xfd,0x08,0x66,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vfpclasspdx $0x7b,4660(%rax,%r14,8), %k3
+
+// CHECK: vfpclasspdq $123, (%rcx){1to2}, %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x18,0x66,0x19,0x7b]
+ vfpclasspdq $0x7b,(%rcx){1to2}, %k3
+
+// CHECK: vfpclasspdx $123, 2032(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x66,0x5a,0x7f,0x7b]
+ vfpclasspdx $0x7b,2032(%rdx), %k3
+
+// CHECK: vfpclasspdx $123, 2048(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x66,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vfpclasspdx $0x7b,2048(%rdx), %k3
+
+// CHECK: vfpclasspdx $123, -2048(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x66,0x5a,0x80,0x7b]
+ vfpclasspdx $0x7b,-2048(%rdx), %k3
+
+// CHECK: vfpclasspdx $123, -2064(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x08,0x66,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vfpclasspdx $0x7b,-2064(%rdx), %k3
+
+// CHECK: vfpclasspdq $123, 1016(%rdx){1to2}, %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x18,0x66,0x5a,0x7f,0x7b]
+ vfpclasspdq $0x7b,1016(%rdx){1to2}, %k3
+
+// CHECK: vfpclasspdq $123, 1024(%rdx){1to2}, %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x18,0x66,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ vfpclasspdq $0x7b,1024(%rdx){1to2}, %k3
+
+// CHECK: vfpclasspdq $123, -1024(%rdx){1to2}, %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x18,0x66,0x5a,0x80,0x7b]
+ vfpclasspdq $0x7b,-1024(%rdx){1to2}, %k3
+
+// CHECK: vfpclasspdq $123, -1032(%rdx){1to2}, %k3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x18,0x66,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ vfpclasspdq $0x7b,-1032(%rdx){1to2}, %k3
+
+// CHECK: vfpclasspd $171, %ymm26, %k2
+// CHECK: encoding: [0x62,0x93,0xfd,0x28,0x66,0xd2,0xab]
+ vfpclasspd $0xab, %ymm26, %k2
+
+// CHECK: vfpclasspd $171, %ymm26, %k2 {%k6}
+// CHECK: encoding: [0x62,0x93,0xfd,0x2e,0x66,0xd2,0xab]
+ vfpclasspd $0xab, %ymm26, %k2 {%k6}
+
+// CHECK: vfpclasspd $123, %ymm26, %k2
+// CHECK: encoding: [0x62,0x93,0xfd,0x28,0x66,0xd2,0x7b]
+ vfpclasspd $0x7b, %ymm26, %k2
+
+// CHECK: vfpclasspdy $123, (%rcx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x28,0x66,0x11,0x7b]
+ vfpclasspdy $0x7b,(%rcx), %k2
+
+// CHECK: vfpclasspdy $123, 4660(%rax,%r14,8), %k2
+// CHECK: encoding: [0x62,0xb3,0xfd,0x28,0x66,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vfpclasspdy $0x7b,4660(%rax,%r14,8), %k2
+
+// CHECK: vfpclasspdq $123, (%rcx){1to4}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x38,0x66,0x11,0x7b]
+ vfpclasspdq $0x7b,(%rcx){1to4}, %k2
+
+// CHECK: vfpclasspdy $123, 4064(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x28,0x66,0x52,0x7f,0x7b]
+ vfpclasspdy $0x7b,4064(%rdx), %k2
+
+// CHECK: vfpclasspdy $123, 4096(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x28,0x66,0x92,0x00,0x10,0x00,0x00,0x7b]
+ vfpclasspdy $0x7b,4096(%rdx), %k2
+
+// CHECK: vfpclasspdy $123, -4096(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x28,0x66,0x52,0x80,0x7b]
+ vfpclasspdy $0x7b,-4096(%rdx), %k2
+
+// CHECK: vfpclasspdy $123, -4128(%rdx), %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x28,0x66,0x92,0xe0,0xef,0xff,0xff,0x7b]
+ vfpclasspdy $0x7b,-4128(%rdx), %k2
+
+// CHECK: vfpclasspdq $123, 1016(%rdx){1to4}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x38,0x66,0x52,0x7f,0x7b]
+ vfpclasspdq $0x7b,1016(%rdx){1to4}, %k2
+
+// CHECK: vfpclasspdq $123, 1024(%rdx){1to4}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x38,0x66,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vfpclasspdq $0x7b,1024(%rdx){1to4}, %k2
+
+// CHECK: vfpclasspdq $123, -1024(%rdx){1to4}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x38,0x66,0x52,0x80,0x7b]
+ vfpclasspdq $0x7b,-1024(%rdx){1to4}, %k2
+
+// CHECK: vfpclasspdq $123, -1032(%rdx){1to4}, %k2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x38,0x66,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vfpclasspdq $0x7b,-1032(%rdx){1to4}, %k2
+
+// CHECK: vfpclassps $171, %xmm29, %k3
+// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x66,0xdd,0xab]
+ vfpclassps $0xab, %xmm29, %k3
+
+// CHECK: vfpclassps $171, %xmm29, %k3 {%k6}
+// CHECK: encoding: [0x62,0x93,0x7d,0x0e,0x66,0xdd,0xab]
+ vfpclassps $0xab, %xmm29, %k3 {%k6}
+
+// CHECK: vfpclassps $123, %xmm29, %k3
+// CHECK: encoding: [0x62,0x93,0x7d,0x08,0x66,0xdd,0x7b]
+ vfpclassps $0x7b, %xmm29, %k3
+
+// CHECK: vfpclasspsx $123, (%rcx), %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x66,0x19,0x7b]
+ vfpclasspsx $0x7b,(%rcx), %k3
+
+// CHECK: vfpclasspsx $123, 4660(%rax,%r14,8), %k3
+// CHECK: encoding: [0x62,0xb3,0x7d,0x08,0x66,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vfpclasspsx $0x7b,4660(%rax,%r14,8), %k3
+
+// CHECK: vfpclasspsl $123, (%rcx){1to4}, %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x18,0x66,0x19,0x7b]
+ vfpclasspsl $0x7b,(%rcx){1to4}, %k3
+
+// CHECK: vfpclasspsx $123, 2032(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x66,0x5a,0x7f,0x7b]
+ vfpclasspsx $0x7b,2032(%rdx), %k3
+
+// CHECK: vfpclasspsx $123, 2048(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x66,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vfpclasspsx $0x7b,2048(%rdx), %k3
+
+// CHECK: vfpclasspsx $123, -2048(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x66,0x5a,0x80,0x7b]
+ vfpclasspsx $0x7b,-2048(%rdx), %k3
+
+// CHECK: vfpclasspsx $123, -2064(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x08,0x66,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vfpclasspsx $0x7b,-2064(%rdx), %k3
+
+// CHECK: vfpclasspsl $123, 508(%rdx){1to4}, %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x18,0x66,0x5a,0x7f,0x7b]
+ vfpclasspsl $0x7b,508(%rdx){1to4}, %k3
+
+// CHECK: vfpclasspsl $123, 512(%rdx){1to4}, %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x18,0x66,0x9a,0x00,0x02,0x00,0x00,0x7b]
+ vfpclasspsl $0x7b,512(%rdx){1to4}, %k3
+
+// CHECK: vfpclasspsl $123, -512(%rdx){1to4}, %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x18,0x66,0x5a,0x80,0x7b]
+ vfpclasspsl $0x7b,-512(%rdx){1to4}, %k3
+
+// CHECK: vfpclasspsl $123, -516(%rdx){1to4}, %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x18,0x66,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+ vfpclasspsl $0x7b,-516(%rdx){1to4}, %k3
+
+// CHECK: vfpclassps $171, %ymm19, %k3
+// CHECK: encoding: [0x62,0xb3,0x7d,0x28,0x66,0xdb,0xab]
+ vfpclassps $0xab, %ymm19, %k3
+
+// CHECK: vfpclassps $171, %ymm19, %k3 {%k3}
+// CHECK: encoding: [0x62,0xb3,0x7d,0x2b,0x66,0xdb,0xab]
+ vfpclassps $0xab, %ymm19, %k3 {%k3}
+
+// CHECK: vfpclassps $123, %ymm19, %k3
+// CHECK: encoding: [0x62,0xb3,0x7d,0x28,0x66,0xdb,0x7b]
+ vfpclassps $0x7b, %ymm19, %k3
+
+// CHECK: vfpclasspsy $123, (%rcx), %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x28,0x66,0x19,0x7b]
+ vfpclasspsy $0x7b,(%rcx), %k3
+
+// CHECK: vfpclasspsy $123, 4660(%rax,%r14,8), %k3
+// CHECK: encoding: [0x62,0xb3,0x7d,0x28,0x66,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vfpclasspsy $0x7b,4660(%rax,%r14,8), %k3
+
+// CHECK: vfpclasspsl $123, (%rcx){1to8}, %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x38,0x66,0x19,0x7b]
+ vfpclasspsl $0x7b,(%rcx){1to8}, %k3
+
+// CHECK: vfpclasspsy $123, 4064(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x28,0x66,0x5a,0x7f,0x7b]
+ vfpclasspsy $0x7b,4064(%rdx), %k3
+
+// CHECK: vfpclasspsy $123, 4096(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x28,0x66,0x9a,0x00,0x10,0x00,0x00,0x7b]
+ vfpclasspsy $0x7b,4096(%rdx), %k3
+
+// CHECK: vfpclasspsy $123, -4096(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x28,0x66,0x5a,0x80,0x7b]
+ vfpclasspsy $0x7b,-4096(%rdx), %k3
+
+// CHECK: vfpclasspsy $123, -4128(%rdx), %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x28,0x66,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+ vfpclasspsy $0x7b,-4128(%rdx), %k3
+
+// CHECK: vfpclasspsl $123, 508(%rdx){1to8}, %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x38,0x66,0x5a,0x7f,0x7b]
+ vfpclasspsl $0x7b,508(%rdx){1to8}, %k3
+
+// CHECK: vfpclasspsl $123, 512(%rdx){1to8}, %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x38,0x66,0x9a,0x00,0x02,0x00,0x00,0x7b]
+ vfpclasspsl $0x7b,512(%rdx){1to8}, %k3
+
+// CHECK: vfpclasspsl $123, -512(%rdx){1to8}, %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x38,0x66,0x5a,0x80,0x7b]
+ vfpclasspsl $0x7b,-512(%rdx){1to8}, %k3
+
+// CHECK: vfpclasspsl $123, -516(%rdx){1to8}, %k3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x38,0x66,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+ vfpclasspsl $0x7b,-516(%rdx){1to8}, %k3
+
+// CHECK: vbroadcasti32x2 %xmm30, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x59,0xf6]
+ vbroadcasti32x2 %xmm30, %xmm30
+
+// CHECK: vbroadcasti32x2 %xmm30, %xmm30 {%k2}
+// CHECK: encoding: [0x62,0x02,0x7d,0x0a,0x59,0xf6]
+ vbroadcasti32x2 %xmm30, %xmm30 {%k2}
+
+// CHECK: vbroadcasti32x2 %xmm30, %xmm30 {%k2} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0x8a,0x59,0xf6]
+ vbroadcasti32x2 %xmm30, %xmm30 {%k2} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0x31]
+ vbroadcasti32x2 (%rcx), %xmm30
+
+// CHECK: vbroadcasti32x2 291(%rax,%r14,8), %xmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vbroadcasti32x2 291(%rax,%r14,8), %xmm30
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x7f]
+ vbroadcasti32x2 1016(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0x00,0x04,0x00,0x00]
+ vbroadcasti32x2 1024(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x80]
+ vbroadcasti32x2 -1024(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0xf8,0xfb,0xff,0xff]
+ vbroadcasti32x2 -1032(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm26
+// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x59,0xd4]
+ vbroadcasti32x2 %xmm28, %ymm26
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm26 {%k7}
+// CHECK: encoding: [0x62,0x02,0x7d,0x2f,0x59,0xd4]
+ vbroadcasti32x2 %xmm28, %ymm26 {%k7}
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm26 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0xaf,0x59,0xd4]
+ vbroadcasti32x2 %xmm28, %ymm26 {%k7} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x59,0x11]
+ vbroadcasti32x2 (%rcx), %ymm26
+
+// CHECK: vbroadcasti32x2 291(%rax,%r14,8), %ymm26
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x59,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vbroadcasti32x2 291(%rax,%r14,8), %ymm26
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x59,0x52,0x7f]
+ vbroadcasti32x2 1016(%rdx), %ymm26
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x59,0x92,0x00,0x04,0x00,0x00]
+ vbroadcasti32x2 1024(%rdx), %ymm26
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x59,0x52,0x80]
+ vbroadcasti32x2 -1024(%rdx), %ymm26
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x59,0x92,0xf8,0xfb,0xff,0xff]
+ vbroadcasti32x2 -1032(%rdx), %ymm26
+
+// CHECK: vbroadcasti32x2 %xmm28, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x59,0xf4]
+ vbroadcasti32x2 %xmm28, %xmm30
+
+// CHECK: vbroadcasti32x2 %xmm28, %xmm30 {%k6}
+// CHECK: encoding: [0x62,0x02,0x7d,0x0e,0x59,0xf4]
+ vbroadcasti32x2 %xmm28, %xmm30 {%k6}
+
+// CHECK: vbroadcasti32x2 %xmm28, %xmm30 {%k6} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0x8e,0x59,0xf4]
+ vbroadcasti32x2 %xmm28, %xmm30 {%k6} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0x31]
+ vbroadcasti32x2 (%rcx), %xmm30
+
+// CHECK: vbroadcasti32x2 4660(%rax,%r14,8), %xmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x59,0xb4,0xf0,0x34,0x12,0x00,0x00]
+ vbroadcasti32x2 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x7f]
+ vbroadcasti32x2 1016(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0x00,0x04,0x00,0x00]
+ vbroadcasti32x2 1024(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x80]
+ vbroadcasti32x2 -1024(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0xf8,0xfb,0xff,0xff]
+ vbroadcasti32x2 -1032(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm23
+// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x59,0xfc]
+ vbroadcasti32x2 %xmm28, %ymm23
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm23 {%k1}
+// CHECK: encoding: [0x62,0x82,0x7d,0x29,0x59,0xfc]
+ vbroadcasti32x2 %xmm28, %ymm23 {%k1}
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm23 {%k1} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0xa9,0x59,0xfc]
+ vbroadcasti32x2 %xmm28, %ymm23 {%k1} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x59,0x39]
+ vbroadcasti32x2 (%rcx), %ymm23
+
+// CHECK: vbroadcasti32x2 4660(%rax,%r14,8), %ymm23
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x59,0xbc,0xf0,0x34,0x12,0x00,0x00]
+ vbroadcasti32x2 4660(%rax,%r14,8), %ymm23
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x59,0x7a,0x7f]
+ vbroadcasti32x2 1016(%rdx), %ymm23
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x59,0xba,0x00,0x04,0x00,0x00]
+ vbroadcasti32x2 1024(%rdx), %ymm23
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x59,0x7a,0x80]
+ vbroadcasti32x2 -1024(%rdx), %ymm23
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x59,0xba,0xf8,0xfb,0xff,0xff]
+ vbroadcasti32x2 -1032(%rdx), %ymm23
+
+// CHECK: vbroadcastf32x2 %xmm18, %ymm28
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x19,0xe2]
+ vbroadcastf32x2 %xmm18, %ymm28
+
+// CHECK: vbroadcastf32x2 %xmm18, %ymm28 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x19,0xe2]
+ vbroadcastf32x2 %xmm18, %ymm28 {%k7}
+
+// CHECK: vbroadcastf32x2 %xmm18, %ymm28 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x19,0xe2]
+ vbroadcastf32x2 %xmm18, %ymm28 {%k7} {z}
+
+// CHECK: vbroadcastf32x2 (%rcx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x19,0x21]
+ vbroadcastf32x2 (%rcx), %ymm28
+
+// CHECK: vbroadcastf32x2 291(%rax,%r14,8), %ymm28
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x19,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vbroadcastf32x2 291(%rax,%r14,8), %ymm28
+
+// CHECK: vbroadcastf32x2 1016(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x19,0x62,0x7f]
+ vbroadcastf32x2 1016(%rdx), %ymm28
+
+// CHECK: vbroadcastf32x2 1024(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x19,0xa2,0x00,0x04,0x00,0x00]
+ vbroadcastf32x2 1024(%rdx), %ymm28
+
+// CHECK: vbroadcastf32x2 -1024(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x19,0x62,0x80]
+ vbroadcastf32x2 -1024(%rdx), %ymm28
+
+// CHECK: vbroadcastf32x2 -1032(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x19,0xa2,0xf8,0xfb,0xff,0xff]
+ vbroadcastf32x2 -1032(%rdx), %ymm28
+
+// CHECK: vbroadcastf32x2 %xmm26, %ymm19
+// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x19,0xda]
+ vbroadcastf32x2 %xmm26, %ymm19
+
+// CHECK: vbroadcastf32x2 %xmm26, %ymm19 {%k7}
+// CHECK: encoding: [0x62,0x82,0x7d,0x2f,0x19,0xda]
+ vbroadcastf32x2 %xmm26, %ymm19 {%k7}
+
+// CHECK: vbroadcastf32x2 %xmm26, %ymm19 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0xaf,0x19,0xda]
+ vbroadcastf32x2 %xmm26, %ymm19 {%k7} {z}
+
+// CHECK: vbroadcastf32x2 (%rcx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x19,0x19]
+ vbroadcastf32x2 (%rcx), %ymm19
+
+// CHECK: vbroadcastf32x2 4660(%rax,%r14,8), %ymm19
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x19,0x9c,0xf0,0x34,0x12,0x00,0x00]
+ vbroadcastf32x2 4660(%rax,%r14,8), %ymm19
+
+// CHECK: vbroadcastf32x2 1016(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x19,0x5a,0x7f]
+ vbroadcastf32x2 1016(%rdx), %ymm19
+
+// CHECK: vbroadcastf32x2 1024(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x19,0x9a,0x00,0x04,0x00,0x00]
+ vbroadcastf32x2 1024(%rdx), %ymm19
+
+// CHECK: vbroadcastf32x2 -1024(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x19,0x5a,0x80]
+ vbroadcastf32x2 -1024(%rdx), %ymm19
+
+// CHECK: vbroadcastf32x2 -1032(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x19,0x9a,0xf8,0xfb,0xff,0xff]
+ vbroadcastf32x2 -1032(%rdx), %ymm19
+
diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s
index c746e6627f7a..8031c097a1e5 100644
--- a/test/MC/X86/x86-64-avx512f_vl.s
+++ b/test/MC/X86/x86-64-avx512f_vl.s
@@ -16285,6 +16285,1446 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: encoding: [0x62,0x62,0x4d,0x30,0x2c,0x8a,0xfc,0xfd,0xff,0xff]
vscalefps -516(%rdx){1to8}, %ymm22, %ymm25
+// CHECK: vpmovqb %xmm29, %xmm24
+// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x32,0xe8]
+ vpmovqb %xmm29, %xmm24
+
+// CHECK: vpmovqb %xmm29, %xmm24 {%k4}
+// CHECK: encoding: [0x62,0x02,0x7e,0x0c,0x32,0xe8]
+ vpmovqb %xmm29, %xmm24 {%k4}
+
+// CHECK: vpmovqb %xmm29, %xmm24 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0x8c,0x32,0xe8]
+ vpmovqb %xmm29, %xmm24 {%k4} {z}
+
+// CHECK: vpmovqb %ymm29, %xmm17
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x32,0xe9]
+ vpmovqb %ymm29, %xmm17
+
+// CHECK: vpmovqb %ymm29, %xmm17 {%k3}
+// CHECK: encoding: [0x62,0x22,0x7e,0x2b,0x32,0xe9]
+ vpmovqb %ymm29, %xmm17 {%k3}
+
+// CHECK: vpmovqb %ymm29, %xmm17 {%k3} {z}
+// CHECK: encoding: [0x62,0x22,0x7e,0xab,0x32,0xe9]
+ vpmovqb %ymm29, %xmm17 {%k3} {z}
+
+// CHECK: vpmovqb %xmm27, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x19]
+ vpmovqb %xmm27, (%rcx)
+
+// CHECK: vpmovqb %xmm27, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0a,0x32,0x19]
+ vpmovqb %xmm27, (%rcx) {%k2}
+
+// CHECK: vpmovqb %xmm27, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x32,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovqb %xmm27, 291(%rax,%r14,8)
+
+// CHECK: vpmovqb %xmm27, 254(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x5a,0x7f]
+ vpmovqb %xmm27, 254(%rdx)
+
+// CHECK: vpmovqb %xmm27, 256(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x9a,0x00,0x01,0x00,0x00]
+ vpmovqb %xmm27, 256(%rdx)
+
+// CHECK: vpmovqb %xmm27, -256(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x5a,0x80]
+ vpmovqb %xmm27, -256(%rdx)
+
+// CHECK: vpmovqb %xmm27, -258(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x32,0x9a,0xfe,0xfe,0xff,0xff]
+ vpmovqb %xmm27, -258(%rdx)
+
+// CHECK: vpmovqb %ymm28, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0x21]
+ vpmovqb %ymm28, (%rcx)
+
+// CHECK: vpmovqb %ymm28, (%rcx) {%k7}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2f,0x32,0x21]
+ vpmovqb %ymm28, (%rcx) {%k7}
+
+// CHECK: vpmovqb %ymm28, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x32,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovqb %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vpmovqb %ymm28, 508(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0x62,0x7f]
+ vpmovqb %ymm28, 508(%rdx)
+
+// CHECK: vpmovqb %ymm28, 512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0xa2,0x00,0x02,0x00,0x00]
+ vpmovqb %ymm28, 512(%rdx)
+
+// CHECK: vpmovqb %ymm28, -512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0x62,0x80]
+ vpmovqb %ymm28, -512(%rdx)
+
+// CHECK: vpmovqb %ymm28, -516(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x32,0xa2,0xfc,0xfd,0xff,0xff]
+ vpmovqb %ymm28, -516(%rdx)
+
+// CHECK: vpmovsqb %xmm19, %xmm26
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x22,0xda]
+ vpmovsqb %xmm19, %xmm26
+
+// CHECK: vpmovsqb %xmm19, %xmm26 {%k1}
+// CHECK: encoding: [0x62,0x82,0x7e,0x09,0x22,0xda]
+ vpmovsqb %xmm19, %xmm26 {%k1}
+
+// CHECK: vpmovsqb %xmm19, %xmm26 {%k1} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x89,0x22,0xda]
+ vpmovsqb %xmm19, %xmm26 {%k1} {z}
+
+// CHECK: vpmovsqb %ymm20, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x22,0xe4]
+ vpmovsqb %ymm20, %xmm20
+
+// CHECK: vpmovsqb %ymm20, %xmm20 {%k6}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2e,0x22,0xe4]
+ vpmovsqb %ymm20, %xmm20 {%k6}
+
+// CHECK: vpmovsqb %ymm20, %xmm20 {%k6} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xae,0x22,0xe4]
+ vpmovsqb %ymm20, %xmm20 {%k6} {z}
+
+// CHECK: vpmovsqb %xmm25, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x09]
+ vpmovsqb %xmm25, (%rcx)
+
+// CHECK: vpmovsqb %xmm25, (%rcx) {%k7}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0f,0x22,0x09]
+ vpmovsqb %xmm25, (%rcx) {%k7}
+
+// CHECK: vpmovsqb %xmm25, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsqb %xmm25, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqb %xmm25, 254(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x4a,0x7f]
+ vpmovsqb %xmm25, 254(%rdx)
+
+// CHECK: vpmovsqb %xmm25, 256(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x8a,0x00,0x01,0x00,0x00]
+ vpmovsqb %xmm25, 256(%rdx)
+
+// CHECK: vpmovsqb %xmm25, -256(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x4a,0x80]
+ vpmovsqb %xmm25, -256(%rdx)
+
+// CHECK: vpmovsqb %xmm25, -258(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x22,0x8a,0xfe,0xfe,0xff,0xff]
+ vpmovsqb %xmm25, -258(%rdx)
+
+// CHECK: vpmovsqb %ymm17, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x09]
+ vpmovsqb %ymm17, (%rcx)
+
+// CHECK: vpmovsqb %ymm17, (%rcx) {%k4}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2c,0x22,0x09]
+ vpmovsqb %ymm17, (%rcx) {%k4}
+
+// CHECK: vpmovsqb %ymm17, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsqb %ymm17, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqb %ymm17, 508(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x4a,0x7f]
+ vpmovsqb %ymm17, 508(%rdx)
+
+// CHECK: vpmovsqb %ymm17, 512(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x8a,0x00,0x02,0x00,0x00]
+ vpmovsqb %ymm17, 512(%rdx)
+
+// CHECK: vpmovsqb %ymm17, -512(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x4a,0x80]
+ vpmovsqb %ymm17, -512(%rdx)
+
+// CHECK: vpmovsqb %ymm17, -516(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x22,0x8a,0xfc,0xfd,0xff,0xff]
+ vpmovsqb %ymm17, -516(%rdx)
+
+// CHECK: vpmovusqb %xmm22, %xmm28
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x12,0xf4]
+ vpmovusqb %xmm22, %xmm28
+
+// CHECK: vpmovusqb %xmm22, %xmm28 {%k2}
+// CHECK: encoding: [0x62,0x82,0x7e,0x0a,0x12,0xf4]
+ vpmovusqb %xmm22, %xmm28 {%k2}
+
+// CHECK: vpmovusqb %xmm22, %xmm28 {%k2} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x8a,0x12,0xf4]
+ vpmovusqb %xmm22, %xmm28 {%k2} {z}
+
+// CHECK: vpmovusqb %ymm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x12,0xfe]
+ vpmovusqb %ymm23, %xmm22
+
+// CHECK: vpmovusqb %ymm23, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2f,0x12,0xfe]
+ vpmovusqb %ymm23, %xmm22 {%k7}
+
+// CHECK: vpmovusqb %ymm23, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xaf,0x12,0xfe]
+ vpmovusqb %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vpmovusqb %xmm26, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x11]
+ vpmovusqb %xmm26, (%rcx)
+
+// CHECK: vpmovusqb %xmm26, (%rcx) {%k5}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0d,0x12,0x11]
+ vpmovusqb %xmm26, (%rcx) {%k5}
+
+// CHECK: vpmovusqb %xmm26, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x12,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusqb %xmm26, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqb %xmm26, 254(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x52,0x7f]
+ vpmovusqb %xmm26, 254(%rdx)
+
+// CHECK: vpmovusqb %xmm26, 256(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x92,0x00,0x01,0x00,0x00]
+ vpmovusqb %xmm26, 256(%rdx)
+
+// CHECK: vpmovusqb %xmm26, -256(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x52,0x80]
+ vpmovusqb %xmm26, -256(%rdx)
+
+// CHECK: vpmovusqb %xmm26, -258(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x12,0x92,0xfe,0xfe,0xff,0xff]
+ vpmovusqb %xmm26, -258(%rdx)
+
+// CHECK: vpmovusqb %ymm30, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0x31]
+ vpmovusqb %ymm30, (%rcx)
+
+// CHECK: vpmovusqb %ymm30, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2a,0x12,0x31]
+ vpmovusqb %ymm30, (%rcx) {%k2}
+
+// CHECK: vpmovusqb %ymm30, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x12,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusqb %ymm30, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqb %ymm30, 508(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0x72,0x7f]
+ vpmovusqb %ymm30, 508(%rdx)
+
+// CHECK: vpmovusqb %ymm30, 512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0xb2,0x00,0x02,0x00,0x00]
+ vpmovusqb %ymm30, 512(%rdx)
+
+// CHECK: vpmovusqb %ymm30, -512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0x72,0x80]
+ vpmovusqb %ymm30, -512(%rdx)
+
+// CHECK: vpmovusqb %ymm30, -516(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x12,0xb2,0xfc,0xfd,0xff,0xff]
+ vpmovusqb %ymm30, -516(%rdx)
+
+// CHECK: vpmovqw %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x34,0xd3]
+ vpmovqw %xmm18, %xmm19
+
+// CHECK: vpmovqw %xmm18, %xmm19 {%k4}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x0c,0x34,0xd3]
+ vpmovqw %xmm18, %xmm19 {%k4}
+
+// CHECK: vpmovqw %xmm18, %xmm19 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x8c,0x34,0xd3]
+ vpmovqw %xmm18, %xmm19 {%k4} {z}
+
+// CHECK: vpmovqw %ymm22, %xmm19
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x34,0xf3]
+ vpmovqw %ymm22, %xmm19
+
+// CHECK: vpmovqw %ymm22, %xmm19 {%k5}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2d,0x34,0xf3]
+ vpmovqw %ymm22, %xmm19 {%k5}
+
+// CHECK: vpmovqw %ymm22, %xmm19 {%k5} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xad,0x34,0xf3]
+ vpmovqw %ymm22, %xmm19 {%k5} {z}
+
+// CHECK: vpmovqw %xmm21, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0x29]
+ vpmovqw %xmm21, (%rcx)
+
+// CHECK: vpmovqw %xmm21, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0b,0x34,0x29]
+ vpmovqw %xmm21, (%rcx) {%k3}
+
+// CHECK: vpmovqw %xmm21, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x34,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovqw %xmm21, 291(%rax,%r14,8)
+
+// CHECK: vpmovqw %xmm21, 508(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0x6a,0x7f]
+ vpmovqw %xmm21, 508(%rdx)
+
+// CHECK: vpmovqw %xmm21, 512(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0xaa,0x00,0x02,0x00,0x00]
+ vpmovqw %xmm21, 512(%rdx)
+
+// CHECK: vpmovqw %xmm21, -512(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0x6a,0x80]
+ vpmovqw %xmm21, -512(%rdx)
+
+// CHECK: vpmovqw %xmm21, -516(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x34,0xaa,0xfc,0xfd,0xff,0xff]
+ vpmovqw %xmm21, -516(%rdx)
+
+// CHECK: vpmovqw %ymm28, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0x21]
+ vpmovqw %ymm28, (%rcx)
+
+// CHECK: vpmovqw %ymm28, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x34,0x21]
+ vpmovqw %ymm28, (%rcx) {%k6}
+
+// CHECK: vpmovqw %ymm28, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x34,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovqw %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vpmovqw %ymm28, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0x62,0x7f]
+ vpmovqw %ymm28, 1016(%rdx)
+
+// CHECK: vpmovqw %ymm28, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0xa2,0x00,0x04,0x00,0x00]
+ vpmovqw %ymm28, 1024(%rdx)
+
+// CHECK: vpmovqw %ymm28, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0x62,0x80]
+ vpmovqw %ymm28, -1024(%rdx)
+
+// CHECK: vpmovqw %ymm28, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x34,0xa2,0xf8,0xfb,0xff,0xff]
+ vpmovqw %ymm28, -1032(%rdx)
+
+// CHECK: vpmovsqw %xmm18, %xmm26
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x24,0xd2]
+ vpmovsqw %xmm18, %xmm26
+
+// CHECK: vpmovsqw %xmm18, %xmm26 {%k7}
+// CHECK: encoding: [0x62,0x82,0x7e,0x0f,0x24,0xd2]
+ vpmovsqw %xmm18, %xmm26 {%k7}
+
+// CHECK: vpmovsqw %xmm18, %xmm26 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x8f,0x24,0xd2]
+ vpmovsqw %xmm18, %xmm26 {%k7} {z}
+
+// CHECK: vpmovsqw %ymm20, %xmm28
+// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x24,0xe4]
+ vpmovsqw %ymm20, %xmm28
+
+// CHECK: vpmovsqw %ymm20, %xmm28 {%k4}
+// CHECK: encoding: [0x62,0x82,0x7e,0x2c,0x24,0xe4]
+ vpmovsqw %ymm20, %xmm28 {%k4}
+
+// CHECK: vpmovsqw %ymm20, %xmm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0xac,0x24,0xe4]
+ vpmovsqw %ymm20, %xmm28 {%k4} {z}
+
+// CHECK: vpmovsqw %xmm30, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0x31]
+ vpmovsqw %xmm30, (%rcx)
+
+// CHECK: vpmovsqw %xmm30, (%rcx) {%k4}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0c,0x24,0x31]
+ vpmovsqw %xmm30, (%rcx) {%k4}
+
+// CHECK: vpmovsqw %xmm30, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x24,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsqw %xmm30, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqw %xmm30, 508(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0x72,0x7f]
+ vpmovsqw %xmm30, 508(%rdx)
+
+// CHECK: vpmovsqw %xmm30, 512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0xb2,0x00,0x02,0x00,0x00]
+ vpmovsqw %xmm30, 512(%rdx)
+
+// CHECK: vpmovsqw %xmm30, -512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0x72,0x80]
+ vpmovsqw %xmm30, -512(%rdx)
+
+// CHECK: vpmovsqw %xmm30, -516(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x24,0xb2,0xfc,0xfd,0xff,0xff]
+ vpmovsqw %xmm30, -516(%rdx)
+
+// CHECK: vpmovsqw %ymm21, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0x29]
+ vpmovsqw %ymm21, (%rcx)
+
+// CHECK: vpmovsqw %ymm21, (%rcx) {%k5}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2d,0x24,0x29]
+ vpmovsqw %ymm21, (%rcx) {%k5}
+
+// CHECK: vpmovsqw %ymm21, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x24,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsqw %ymm21, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqw %ymm21, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0x6a,0x7f]
+ vpmovsqw %ymm21, 1016(%rdx)
+
+// CHECK: vpmovsqw %ymm21, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0xaa,0x00,0x04,0x00,0x00]
+ vpmovsqw %ymm21, 1024(%rdx)
+
+// CHECK: vpmovsqw %ymm21, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0x6a,0x80]
+ vpmovsqw %ymm21, -1024(%rdx)
+
+// CHECK: vpmovsqw %ymm21, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x24,0xaa,0xf8,0xfb,0xff,0xff]
+ vpmovsqw %ymm21, -1032(%rdx)
+
+// CHECK: vpmovusqw %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x14,0xe5]
+ vpmovusqw %xmm20, %xmm29
+
+// CHECK: vpmovusqw %xmm20, %xmm29 {%k1}
+// CHECK: encoding: [0x62,0x82,0x7e,0x09,0x14,0xe5]
+ vpmovusqw %xmm20, %xmm29 {%k1}
+
+// CHECK: vpmovusqw %xmm20, %xmm29 {%k1} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x89,0x14,0xe5]
+ vpmovusqw %xmm20, %xmm29 {%k1} {z}
+
+// CHECK: vpmovusqw %ymm21, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x14,0xec]
+ vpmovusqw %ymm21, %xmm20
+
+// CHECK: vpmovusqw %ymm21, %xmm20 {%k5}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2d,0x14,0xec]
+ vpmovusqw %ymm21, %xmm20 {%k5}
+
+// CHECK: vpmovusqw %ymm21, %xmm20 {%k5} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xad,0x14,0xec]
+ vpmovusqw %ymm21, %xmm20 {%k5} {z}
+
+// CHECK: vpmovusqw %xmm18, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x11]
+ vpmovusqw %xmm18, (%rcx)
+
+// CHECK: vpmovusqw %xmm18, (%rcx) {%k1}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x09,0x14,0x11]
+ vpmovusqw %xmm18, (%rcx) {%k1}
+
+// CHECK: vpmovusqw %xmm18, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x14,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusqw %xmm18, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqw %xmm18, 508(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x52,0x7f]
+ vpmovusqw %xmm18, 508(%rdx)
+
+// CHECK: vpmovusqw %xmm18, 512(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x92,0x00,0x02,0x00,0x00]
+ vpmovusqw %xmm18, 512(%rdx)
+
+// CHECK: vpmovusqw %xmm18, -512(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x52,0x80]
+ vpmovusqw %xmm18, -512(%rdx)
+
+// CHECK: vpmovusqw %xmm18, -516(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x14,0x92,0xfc,0xfd,0xff,0xff]
+ vpmovusqw %xmm18, -516(%rdx)
+
+// CHECK: vpmovusqw %ymm18, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x11]
+ vpmovusqw %ymm18, (%rcx)
+
+// CHECK: vpmovusqw %ymm18, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2a,0x14,0x11]
+ vpmovusqw %ymm18, (%rcx) {%k2}
+
+// CHECK: vpmovusqw %ymm18, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x14,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusqw %ymm18, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqw %ymm18, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x52,0x7f]
+ vpmovusqw %ymm18, 1016(%rdx)
+
+// CHECK: vpmovusqw %ymm18, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x92,0x00,0x04,0x00,0x00]
+ vpmovusqw %ymm18, 1024(%rdx)
+
+// CHECK: vpmovusqw %ymm18, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x52,0x80]
+ vpmovusqw %ymm18, -1024(%rdx)
+
+// CHECK: vpmovusqw %ymm18, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x14,0x92,0xf8,0xfb,0xff,0xff]
+ vpmovusqw %ymm18, -1032(%rdx)
+
+// CHECK: vpmovqd %xmm25, %xmm21
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x35,0xcd]
+ vpmovqd %xmm25, %xmm21
+
+// CHECK: vpmovqd %xmm25, %xmm21 {%k5}
+// CHECK: encoding: [0x62,0x22,0x7e,0x0d,0x35,0xcd]
+ vpmovqd %xmm25, %xmm21 {%k5}
+
+// CHECK: vpmovqd %xmm25, %xmm21 {%k5} {z}
+// CHECK: encoding: [0x62,0x22,0x7e,0x8d,0x35,0xcd]
+ vpmovqd %xmm25, %xmm21 {%k5} {z}
+
+// CHECK: vpmovqd %ymm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x35,0xf5]
+ vpmovqd %ymm22, %xmm21
+
+// CHECK: vpmovqd %ymm22, %xmm21 {%k6}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2e,0x35,0xf5]
+ vpmovqd %ymm22, %xmm21 {%k6}
+
+// CHECK: vpmovqd %ymm22, %xmm21 {%k6} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xae,0x35,0xf5]
+ vpmovqd %ymm22, %xmm21 {%k6} {z}
+
+// CHECK: vpmovqd %xmm29, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0x29]
+ vpmovqd %xmm29, (%rcx)
+
+// CHECK: vpmovqd %xmm29, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0e,0x35,0x29]
+ vpmovqd %xmm29, (%rcx) {%k6}
+
+// CHECK: vpmovqd %xmm29, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x35,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovqd %xmm29, 291(%rax,%r14,8)
+
+// CHECK: vpmovqd %xmm29, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0x6a,0x7f]
+ vpmovqd %xmm29, 1016(%rdx)
+
+// CHECK: vpmovqd %xmm29, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0xaa,0x00,0x04,0x00,0x00]
+ vpmovqd %xmm29, 1024(%rdx)
+
+// CHECK: vpmovqd %xmm29, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0x6a,0x80]
+ vpmovqd %xmm29, -1024(%rdx)
+
+// CHECK: vpmovqd %xmm29, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x35,0xaa,0xf8,0xfb,0xff,0xff]
+ vpmovqd %xmm29, -1032(%rdx)
+
+// CHECK: vpmovqd %ymm30, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0x31]
+ vpmovqd %ymm30, (%rcx)
+
+// CHECK: vpmovqd %ymm30, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2a,0x35,0x31]
+ vpmovqd %ymm30, (%rcx) {%k2}
+
+// CHECK: vpmovqd %ymm30, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x35,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovqd %ymm30, 291(%rax,%r14,8)
+
+// CHECK: vpmovqd %ymm30, 2032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0x72,0x7f]
+ vpmovqd %ymm30, 2032(%rdx)
+
+// CHECK: vpmovqd %ymm30, 2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0xb2,0x00,0x08,0x00,0x00]
+ vpmovqd %ymm30, 2048(%rdx)
+
+// CHECK: vpmovqd %ymm30, -2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0x72,0x80]
+ vpmovqd %ymm30, -2048(%rdx)
+
+// CHECK: vpmovqd %ymm30, -2064(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x35,0xb2,0xf0,0xf7,0xff,0xff]
+ vpmovqd %ymm30, -2064(%rdx)
+
+// CHECK: vpmovsqd %xmm21, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x25,0xed]
+ vpmovsqd %xmm21, %xmm21
+
+// CHECK: vpmovsqd %xmm21, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x0a,0x25,0xed]
+ vpmovsqd %xmm21, %xmm21 {%k2}
+
+// CHECK: vpmovsqd %xmm21, %xmm21 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x8a,0x25,0xed]
+ vpmovsqd %xmm21, %xmm21 {%k2} {z}
+
+// CHECK: vpmovsqd %ymm29, %xmm29
+// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x25,0xed]
+ vpmovsqd %ymm29, %xmm29
+
+// CHECK: vpmovsqd %ymm29, %xmm29 {%k4}
+// CHECK: encoding: [0x62,0x02,0x7e,0x2c,0x25,0xed]
+ vpmovsqd %ymm29, %xmm29 {%k4}
+
+// CHECK: vpmovsqd %ymm29, %xmm29 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0xac,0x25,0xed]
+ vpmovsqd %ymm29, %xmm29 {%k4} {z}
+
+// CHECK: vpmovsqd %xmm17, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x09]
+ vpmovsqd %xmm17, (%rcx)
+
+// CHECK: vpmovsqd %xmm17, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x25,0x09]
+ vpmovsqd %xmm17, (%rcx) {%k2}
+
+// CHECK: vpmovsqd %xmm17, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x25,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsqd %xmm17, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqd %xmm17, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x4a,0x7f]
+ vpmovsqd %xmm17, 1016(%rdx)
+
+// CHECK: vpmovsqd %xmm17, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x8a,0x00,0x04,0x00,0x00]
+ vpmovsqd %xmm17, 1024(%rdx)
+
+// CHECK: vpmovsqd %xmm17, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x4a,0x80]
+ vpmovsqd %xmm17, -1024(%rdx)
+
+// CHECK: vpmovsqd %xmm17, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x25,0x8a,0xf8,0xfb,0xff,0xff]
+ vpmovsqd %xmm17, -1032(%rdx)
+
+// CHECK: vpmovsqd %ymm23, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0x39]
+ vpmovsqd %ymm23, (%rcx)
+
+// CHECK: vpmovsqd %ymm23, (%rcx) {%k5}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2d,0x25,0x39]
+ vpmovsqd %ymm23, (%rcx) {%k5}
+
+// CHECK: vpmovsqd %ymm23, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x25,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsqd %ymm23, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqd %ymm23, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0x7a,0x7f]
+ vpmovsqd %ymm23, 2032(%rdx)
+
+// CHECK: vpmovsqd %ymm23, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0xba,0x00,0x08,0x00,0x00]
+ vpmovsqd %ymm23, 2048(%rdx)
+
+// CHECK: vpmovsqd %ymm23, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0x7a,0x80]
+ vpmovsqd %ymm23, -2048(%rdx)
+
+// CHECK: vpmovsqd %ymm23, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x25,0xba,0xf0,0xf7,0xff,0xff]
+ vpmovsqd %ymm23, -2064(%rdx)
+
+// CHECK: vpmovusqd %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x15,0xe9]
+ vpmovusqd %xmm21, %xmm25
+
+// CHECK: vpmovusqd %xmm21, %xmm25 {%k5}
+// CHECK: encoding: [0x62,0x82,0x7e,0x0d,0x15,0xe9]
+ vpmovusqd %xmm21, %xmm25 {%k5}
+
+// CHECK: vpmovusqd %xmm21, %xmm25 {%k5} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x8d,0x15,0xe9]
+ vpmovusqd %xmm21, %xmm25 {%k5} {z}
+
+// CHECK: vpmovusqd %ymm21, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x15,0xec]
+ vpmovusqd %ymm21, %xmm20
+
+// CHECK: vpmovusqd %ymm21, %xmm20 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2a,0x15,0xec]
+ vpmovusqd %ymm21, %xmm20 {%k2}
+
+// CHECK: vpmovusqd %ymm21, %xmm20 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xaa,0x15,0xec]
+ vpmovusqd %ymm21, %xmm20 {%k2} {z}
+
+// CHECK: vpmovusqd %xmm18, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x11]
+ vpmovusqd %xmm18, (%rcx)
+
+// CHECK: vpmovusqd %xmm18, (%rcx) {%k1}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x09,0x15,0x11]
+ vpmovusqd %xmm18, (%rcx) {%k1}
+
+// CHECK: vpmovusqd %xmm18, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x15,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusqd %xmm18, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqd %xmm18, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x52,0x7f]
+ vpmovusqd %xmm18, 1016(%rdx)
+
+// CHECK: vpmovusqd %xmm18, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x92,0x00,0x04,0x00,0x00]
+ vpmovusqd %xmm18, 1024(%rdx)
+
+// CHECK: vpmovusqd %xmm18, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x52,0x80]
+ vpmovusqd %xmm18, -1024(%rdx)
+
+// CHECK: vpmovusqd %xmm18, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x15,0x92,0xf8,0xfb,0xff,0xff]
+ vpmovusqd %xmm18, -1032(%rdx)
+
+// CHECK: vpmovusqd %ymm29, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0x29]
+ vpmovusqd %ymm29, (%rcx)
+
+// CHECK: vpmovusqd %ymm29, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x15,0x29]
+ vpmovusqd %ymm29, (%rcx) {%k6}
+
+// CHECK: vpmovusqd %ymm29, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x15,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusqd %ymm29, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqd %ymm29, 2032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0x6a,0x7f]
+ vpmovusqd %ymm29, 2032(%rdx)
+
+// CHECK: vpmovusqd %ymm29, 2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0xaa,0x00,0x08,0x00,0x00]
+ vpmovusqd %ymm29, 2048(%rdx)
+
+// CHECK: vpmovusqd %ymm29, -2048(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0x6a,0x80]
+ vpmovusqd %ymm29, -2048(%rdx)
+
+// CHECK: vpmovusqd %ymm29, -2064(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x15,0xaa,0xf0,0xf7,0xff,0xff]
+ vpmovusqd %ymm29, -2064(%rdx)
+
+// CHECK: vpmovdb %xmm21, %xmm30
+// CHECK: encoding: [0x62,0x82,0x7e,0x08,0x31,0xee]
+ vpmovdb %xmm21, %xmm30
+
+// CHECK: vpmovdb %xmm21, %xmm30 {%k3}
+// CHECK: encoding: [0x62,0x82,0x7e,0x0b,0x31,0xee]
+ vpmovdb %xmm21, %xmm30 {%k3}
+
+// CHECK: vpmovdb %xmm21, %xmm30 {%k3} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0x8b,0x31,0xee]
+ vpmovdb %xmm21, %xmm30 {%k3} {z}
+
+// CHECK: vpmovdb %ymm21, %xmm23
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x31,0xef]
+ vpmovdb %ymm21, %xmm23
+
+// CHECK: vpmovdb %ymm21, %xmm23 {%k4}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2c,0x31,0xef]
+ vpmovdb %ymm21, %xmm23 {%k4}
+
+// CHECK: vpmovdb %ymm21, %xmm23 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xac,0x31,0xef]
+ vpmovdb %ymm21, %xmm23 {%k4} {z}
+
+// CHECK: vpmovdb %xmm29, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0x29]
+ vpmovdb %xmm29, (%rcx)
+
+// CHECK: vpmovdb %xmm29, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x31,0x29]
+ vpmovdb %xmm29, (%rcx) {%k3}
+
+// CHECK: vpmovdb %xmm29, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x31,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovdb %xmm29, 291(%rax,%r14,8)
+
+// CHECK: vpmovdb %xmm29, 508(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0x6a,0x7f]
+ vpmovdb %xmm29, 508(%rdx)
+
+// CHECK: vpmovdb %xmm29, 512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0xaa,0x00,0x02,0x00,0x00]
+ vpmovdb %xmm29, 512(%rdx)
+
+// CHECK: vpmovdb %xmm29, -512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0x6a,0x80]
+ vpmovdb %xmm29, -512(%rdx)
+
+// CHECK: vpmovdb %xmm29, -516(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x31,0xaa,0xfc,0xfd,0xff,0xff]
+ vpmovdb %xmm29, -516(%rdx)
+
+// CHECK: vpmovdb %ymm26, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x11]
+ vpmovdb %ymm26, (%rcx)
+
+// CHECK: vpmovdb %ymm26, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x31,0x11]
+ vpmovdb %ymm26, (%rcx) {%k6}
+
+// CHECK: vpmovdb %ymm26, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x31,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovdb %ymm26, 291(%rax,%r14,8)
+
+// CHECK: vpmovdb %ymm26, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x52,0x7f]
+ vpmovdb %ymm26, 1016(%rdx)
+
+// CHECK: vpmovdb %ymm26, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x92,0x00,0x04,0x00,0x00]
+ vpmovdb %ymm26, 1024(%rdx)
+
+// CHECK: vpmovdb %ymm26, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x52,0x80]
+ vpmovdb %ymm26, -1024(%rdx)
+
+// CHECK: vpmovdb %ymm26, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x31,0x92,0xf8,0xfb,0xff,0xff]
+ vpmovdb %ymm26, -1032(%rdx)
+
+// CHECK: vpmovsdb %xmm27, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x21,0xde]
+ vpmovsdb %xmm27, %xmm30
+
+// CHECK: vpmovsdb %xmm27, %xmm30 {%k1}
+// CHECK: encoding: [0x62,0x02,0x7e,0x09,0x21,0xde]
+ vpmovsdb %xmm27, %xmm30 {%k1}
+
+// CHECK: vpmovsdb %xmm27, %xmm30 {%k1} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0x89,0x21,0xde]
+ vpmovsdb %xmm27, %xmm30 {%k1} {z}
+
+// CHECK: vpmovsdb %ymm27, %xmm26
+// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x21,0xda]
+ vpmovsdb %ymm27, %xmm26
+
+// CHECK: vpmovsdb %ymm27, %xmm26 {%k3}
+// CHECK: encoding: [0x62,0x02,0x7e,0x2b,0x21,0xda]
+ vpmovsdb %ymm27, %xmm26 {%k3}
+
+// CHECK: vpmovsdb %ymm27, %xmm26 {%k3} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0xab,0x21,0xda]
+ vpmovsdb %ymm27, %xmm26 {%k3} {z}
+
+// CHECK: vpmovsdb %xmm30, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0x31]
+ vpmovsdb %xmm30, (%rcx)
+
+// CHECK: vpmovsdb %xmm30, (%rcx) {%k3}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0b,0x21,0x31]
+ vpmovsdb %xmm30, (%rcx) {%k3}
+
+// CHECK: vpmovsdb %xmm30, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x21,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsdb %xmm30, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdb %xmm30, 508(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0x72,0x7f]
+ vpmovsdb %xmm30, 508(%rdx)
+
+// CHECK: vpmovsdb %xmm30, 512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0xb2,0x00,0x02,0x00,0x00]
+ vpmovsdb %xmm30, 512(%rdx)
+
+// CHECK: vpmovsdb %xmm30, -512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0x72,0x80]
+ vpmovsdb %xmm30, -512(%rdx)
+
+// CHECK: vpmovsdb %xmm30, -516(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x21,0xb2,0xfc,0xfd,0xff,0xff]
+ vpmovsdb %xmm30, -516(%rdx)
+
+// CHECK: vpmovsdb %ymm25, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x09]
+ vpmovsdb %ymm25, (%rcx)
+
+// CHECK: vpmovsdb %ymm25, (%rcx) {%k5}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2d,0x21,0x09]
+ vpmovsdb %ymm25, (%rcx) {%k5}
+
+// CHECK: vpmovsdb %ymm25, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x21,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsdb %ymm25, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdb %ymm25, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x4a,0x7f]
+ vpmovsdb %ymm25, 1016(%rdx)
+
+// CHECK: vpmovsdb %ymm25, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x8a,0x00,0x04,0x00,0x00]
+ vpmovsdb %ymm25, 1024(%rdx)
+
+// CHECK: vpmovsdb %ymm25, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x4a,0x80]
+ vpmovsdb %ymm25, -1024(%rdx)
+
+// CHECK: vpmovsdb %ymm25, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x21,0x8a,0xf8,0xfb,0xff,0xff]
+ vpmovsdb %ymm25, -1032(%rdx)
+
+// CHECK: vpmovusdb %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x11,0xee]
+ vpmovusdb %xmm29, %xmm30
+
+// CHECK: vpmovusdb %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x02,0x7e,0x0f,0x11,0xee]
+ vpmovusdb %xmm29, %xmm30 {%k7}
+
+// CHECK: vpmovusdb %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0x8f,0x11,0xee]
+ vpmovusdb %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vpmovusdb %ymm17, %xmm23
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x11,0xcf]
+ vpmovusdb %ymm17, %xmm23
+
+// CHECK: vpmovusdb %ymm17, %xmm23 {%k6}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2e,0x11,0xcf]
+ vpmovusdb %ymm17, %xmm23 {%k6}
+
+// CHECK: vpmovusdb %ymm17, %xmm23 {%k6} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xae,0x11,0xcf]
+ vpmovusdb %ymm17, %xmm23 {%k6} {z}
+
+// CHECK: vpmovusdb %xmm26, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x11]
+ vpmovusdb %xmm26, (%rcx)
+
+// CHECK: vpmovusdb %xmm26, (%rcx) {%k7}
+// CHECK: encoding: [0x62,0x62,0x7e,0x0f,0x11,0x11]
+ vpmovusdb %xmm26, (%rcx) {%k7}
+
+// CHECK: vpmovusdb %xmm26, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x11,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusdb %xmm26, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdb %xmm26, 508(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x52,0x7f]
+ vpmovusdb %xmm26, 508(%rdx)
+
+// CHECK: vpmovusdb %xmm26, 512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x92,0x00,0x02,0x00,0x00]
+ vpmovusdb %xmm26, 512(%rdx)
+
+// CHECK: vpmovusdb %xmm26, -512(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x52,0x80]
+ vpmovusdb %xmm26, -512(%rdx)
+
+// CHECK: vpmovusdb %xmm26, -516(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x11,0x92,0xfc,0xfd,0xff,0xff]
+ vpmovusdb %xmm26, -516(%rdx)
+
+// CHECK: vpmovusdb %ymm25, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x09]
+ vpmovusdb %ymm25, (%rcx)
+
+// CHECK: vpmovusdb %ymm25, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0x62,0x7e,0x2e,0x11,0x09]
+ vpmovusdb %ymm25, (%rcx) {%k6}
+
+// CHECK: vpmovusdb %ymm25, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x28,0x11,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusdb %ymm25, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdb %ymm25, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x4a,0x7f]
+ vpmovusdb %ymm25, 1016(%rdx)
+
+// CHECK: vpmovusdb %ymm25, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x8a,0x00,0x04,0x00,0x00]
+ vpmovusdb %ymm25, 1024(%rdx)
+
+// CHECK: vpmovusdb %ymm25, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x4a,0x80]
+ vpmovusdb %ymm25, -1024(%rdx)
+
+// CHECK: vpmovusdb %ymm25, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x11,0x8a,0xf8,0xfb,0xff,0xff]
+ vpmovusdb %ymm25, -1032(%rdx)
+
+// CHECK: vpmovdw %xmm25, %xmm17
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x33,0xc9]
+ vpmovdw %xmm25, %xmm17
+
+// CHECK: vpmovdw %xmm25, %xmm17 {%k5}
+// CHECK: encoding: [0x62,0x22,0x7e,0x0d,0x33,0xc9]
+ vpmovdw %xmm25, %xmm17 {%k5}
+
+// CHECK: vpmovdw %xmm25, %xmm17 {%k5} {z}
+// CHECK: encoding: [0x62,0x22,0x7e,0x8d,0x33,0xc9]
+ vpmovdw %xmm25, %xmm17 {%k5} {z}
+
+// CHECK: vpmovdw %ymm19, %xmm25
+// CHECK: encoding: [0x62,0x82,0x7e,0x28,0x33,0xd9]
+ vpmovdw %ymm19, %xmm25
+
+// CHECK: vpmovdw %ymm19, %xmm25 {%k4}
+// CHECK: encoding: [0x62,0x82,0x7e,0x2c,0x33,0xd9]
+ vpmovdw %ymm19, %xmm25 {%k4}
+
+// CHECK: vpmovdw %ymm19, %xmm25 {%k4} {z}
+// CHECK: encoding: [0x62,0x82,0x7e,0xac,0x33,0xd9]
+ vpmovdw %ymm19, %xmm25 {%k4} {z}
+
+// CHECK: vpmovdw %xmm21, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0x29]
+ vpmovdw %xmm21, (%rcx)
+
+// CHECK: vpmovdw %xmm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0a,0x33,0x29]
+ vpmovdw %xmm21, (%rcx) {%k2}
+
+// CHECK: vpmovdw %xmm21, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x33,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovdw %xmm21, 291(%rax,%r14,8)
+
+// CHECK: vpmovdw %xmm21, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0x6a,0x7f]
+ vpmovdw %xmm21, 1016(%rdx)
+
+// CHECK: vpmovdw %xmm21, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0xaa,0x00,0x04,0x00,0x00]
+ vpmovdw %xmm21, 1024(%rdx)
+
+// CHECK: vpmovdw %xmm21, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0x6a,0x80]
+ vpmovdw %xmm21, -1024(%rdx)
+
+// CHECK: vpmovdw %xmm21, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x33,0xaa,0xf8,0xfb,0xff,0xff]
+ vpmovdw %xmm21, -1032(%rdx)
+
+// CHECK: vpmovdw %ymm22, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0x31]
+ vpmovdw %ymm22, (%rcx)
+
+// CHECK: vpmovdw %ymm22, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2e,0x33,0x31]
+ vpmovdw %ymm22, (%rcx) {%k6}
+
+// CHECK: vpmovdw %ymm22, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x33,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovdw %ymm22, 291(%rax,%r14,8)
+
+// CHECK: vpmovdw %ymm22, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0x72,0x7f]
+ vpmovdw %ymm22, 2032(%rdx)
+
+// CHECK: vpmovdw %ymm22, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0xb2,0x00,0x08,0x00,0x00]
+ vpmovdw %ymm22, 2048(%rdx)
+
+// CHECK: vpmovdw %ymm22, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0x72,0x80]
+ vpmovdw %ymm22, -2048(%rdx)
+
+// CHECK: vpmovdw %ymm22, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x33,0xb2,0xf0,0xf7,0xff,0xff]
+ vpmovdw %ymm22, -2064(%rdx)
+
+// CHECK: vpmovsdw %xmm18, %xmm18
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x23,0xd2]
+ vpmovsdw %xmm18, %xmm18
+
+// CHECK: vpmovsdw %xmm18, %xmm18 {%k6}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x0e,0x23,0xd2]
+ vpmovsdw %xmm18, %xmm18 {%k6}
+
+// CHECK: vpmovsdw %xmm18, %xmm18 {%k6} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x8e,0x23,0xd2]
+ vpmovsdw %xmm18, %xmm18 {%k6} {z}
+
+// CHECK: vpmovsdw %ymm18, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x23,0xd4]
+ vpmovsdw %ymm18, %xmm20
+
+// CHECK: vpmovsdw %ymm18, %xmm20 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x2a,0x23,0xd4]
+ vpmovsdw %ymm18, %xmm20 {%k2}
+
+// CHECK: vpmovsdw %ymm18, %xmm20 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0xaa,0x23,0xd4]
+ vpmovsdw %ymm18, %xmm20 {%k2} {z}
+
+// CHECK: vpmovsdw %xmm29, (%rcx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0x29]
+ vpmovsdw %xmm29, (%rcx)
+
+// CHECK: vpmovsdw %xmm29, (%rcx) {%k1}
+// CHECK: encoding: [0x62,0x62,0x7e,0x09,0x23,0x29]
+ vpmovsdw %xmm29, (%rcx) {%k1}
+
+// CHECK: vpmovsdw %xmm29, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x22,0x7e,0x08,0x23,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsdw %xmm29, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdw %xmm29, 1016(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0x6a,0x7f]
+ vpmovsdw %xmm29, 1016(%rdx)
+
+// CHECK: vpmovsdw %xmm29, 1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0xaa,0x00,0x04,0x00,0x00]
+ vpmovsdw %xmm29, 1024(%rdx)
+
+// CHECK: vpmovsdw %xmm29, -1024(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0x6a,0x80]
+ vpmovsdw %xmm29, -1024(%rdx)
+
+// CHECK: vpmovsdw %xmm29, -1032(%rdx)
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x23,0xaa,0xf8,0xfb,0xff,0xff]
+ vpmovsdw %xmm29, -1032(%rdx)
+
+// CHECK: vpmovsdw %ymm19, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x19]
+ vpmovsdw %ymm19, (%rcx)
+
+// CHECK: vpmovsdw %ymm19, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x2e,0x23,0x19]
+ vpmovsdw %ymm19, (%rcx) {%k6}
+
+// CHECK: vpmovsdw %ymm19, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x23,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsdw %ymm19, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdw %ymm19, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x5a,0x7f]
+ vpmovsdw %ymm19, 2032(%rdx)
+
+// CHECK: vpmovsdw %ymm19, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x9a,0x00,0x08,0x00,0x00]
+ vpmovsdw %ymm19, 2048(%rdx)
+
+// CHECK: vpmovsdw %ymm19, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x5a,0x80]
+ vpmovsdw %ymm19, -2048(%rdx)
+
+// CHECK: vpmovsdw %ymm19, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x23,0x9a,0xf0,0xf7,0xff,0xff]
+ vpmovsdw %ymm19, -2064(%rdx)
+
+// CHECK: vpmovusdw %xmm18, %xmm18
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x13,0xd2]
+ vpmovusdw %xmm18, %xmm18
+
+// CHECK: vpmovusdw %xmm18, %xmm18 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x0a,0x13,0xd2]
+ vpmovusdw %xmm18, %xmm18 {%k2}
+
+// CHECK: vpmovusdw %xmm18, %xmm18 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x7e,0x8a,0x13,0xd2]
+ vpmovusdw %xmm18, %xmm18 {%k2} {z}
+
+// CHECK: vpmovusdw %ymm25, %xmm28
+// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x13,0xcc]
+ vpmovusdw %ymm25, %xmm28
+
+// CHECK: vpmovusdw %ymm25, %xmm28 {%k4}
+// CHECK: encoding: [0x62,0x02,0x7e,0x2c,0x13,0xcc]
+ vpmovusdw %ymm25, %xmm28 {%k4}
+
+// CHECK: vpmovusdw %ymm25, %xmm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0x7e,0xac,0x13,0xcc]
+ vpmovusdw %ymm25, %xmm28 {%k4} {z}
+
+// CHECK: vpmovusdw %xmm20, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0x21]
+ vpmovusdw %xmm20, (%rcx)
+
+// CHECK: vpmovusdw %xmm20, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x0e,0x13,0x21]
+ vpmovusdw %xmm20, (%rcx) {%k6}
+
+// CHECK: vpmovusdw %xmm20, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x08,0x13,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusdw %xmm20, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdw %xmm20, 1016(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0x62,0x7f]
+ vpmovusdw %xmm20, 1016(%rdx)
+
+// CHECK: vpmovusdw %xmm20, 1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0xa2,0x00,0x04,0x00,0x00]
+ vpmovusdw %xmm20, 1024(%rdx)
+
+// CHECK: vpmovusdw %xmm20, -1024(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0x62,0x80]
+ vpmovusdw %xmm20, -1024(%rdx)
+
+// CHECK: vpmovusdw %xmm20, -1032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x08,0x13,0xa2,0xf8,0xfb,0xff,0xff]
+ vpmovusdw %xmm20, -1032(%rdx)
+
+// CHECK: vpmovusdw %ymm23, (%rcx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0x39]
+ vpmovusdw %ymm23, (%rcx)
+
+// CHECK: vpmovusdw %ymm23, (%rcx) {%k1}
+// CHECK: encoding: [0x62,0xe2,0x7e,0x29,0x13,0x39]
+ vpmovusdw %ymm23, (%rcx) {%k1}
+
+// CHECK: vpmovusdw %ymm23, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa2,0x7e,0x28,0x13,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vpmovusdw %ymm23, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdw %ymm23, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0x7a,0x7f]
+ vpmovusdw %ymm23, 2032(%rdx)
+
+// CHECK: vpmovusdw %ymm23, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0xba,0x00,0x08,0x00,0x00]
+ vpmovusdw %ymm23, 2048(%rdx)
+
+// CHECK: vpmovusdw %ymm23, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0x7a,0x80]
+ vpmovusdw %ymm23, -2048(%rdx)
+
+// CHECK: vpmovusdw %ymm23, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe2,0x7e,0x28,0x13,0xba,0xf0,0xf7,0xff,0xff]
+ vpmovusdw %ymm23, -2064(%rdx)
+
+// CHECK: vrndscalepd $171, %xmm28, %xmm29
+// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x09,0xec,0xab]
+ vrndscalepd $0xab, %xmm28, %xmm29
+
+// CHECK: vrndscalepd $171, %xmm28, %xmm29 {%k4}
+// CHECK: encoding: [0x62,0x03,0xfd,0x0c,0x09,0xec,0xab]
+ vrndscalepd $0xab, %xmm28, %xmm29 {%k4}
+
+// CHECK: vrndscalepd $171, %xmm28, %xmm29 {%k4} {z}
+// CHECK: encoding: [0x62,0x03,0xfd,0x8c,0x09,0xec,0xab]
+ vrndscalepd $0xab, %xmm28, %xmm29 {%k4} {z}
+
+// CHECK: vrndscalepd $123, %xmm28, %xmm29
+// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x09,0xec,0x7b]
+ vrndscalepd $0x7b, %xmm28, %xmm29
+
+// CHECK: vrndscalepd $123, (%rcx), %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0x29,0x7b]
+ vrndscalepd $0x7b, (%rcx), %xmm29
+
+// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %xmm29
+// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x09,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalepd $0x7b, 291(%rax,%r14,8), %xmm29
+
+// CHECK: vrndscalepd $123, (%rcx){1to2}, %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0x29,0x7b]
+ vrndscalepd $0x7b, (%rcx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $123, 2032(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0x6a,0x7f,0x7b]
+ vrndscalepd $0x7b, 2032(%rdx), %xmm29
+
+// CHECK: vrndscalepd $123, 2048(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0xaa,0x00,0x08,0x00,0x00,0x7b]
+ vrndscalepd $0x7b, 2048(%rdx), %xmm29
+
+// CHECK: vrndscalepd $123, -2048(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0x6a,0x80,0x7b]
+ vrndscalepd $0x7b, -2048(%rdx), %xmm29
+
+// CHECK: vrndscalepd $123, -2064(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x09,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+ vrndscalepd $0x7b, -2064(%rdx), %xmm29
+
+// CHECK: vrndscalepd $123, 1016(%rdx){1to2}, %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0x6a,0x7f,0x7b]
+ vrndscalepd $0x7b, 1016(%rdx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $123, 1024(%rdx){1to2}, %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0xaa,0x00,0x04,0x00,0x00,0x7b]
+ vrndscalepd $0x7b, 1024(%rdx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $123, -1024(%rdx){1to2}, %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0x6a,0x80,0x7b]
+ vrndscalepd $0x7b, -1024(%rdx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $123, -1032(%rdx){1to2}, %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x09,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
+ vrndscalepd $0x7b, -1032(%rdx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $171, %ymm22, %ymm17
+// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x09,0xce,0xab]
+ vrndscalepd $0xab, %ymm22, %ymm17
+
+// CHECK: vrndscalepd $171, %ymm22, %ymm17 {%k7}
+// CHECK: encoding: [0x62,0xa3,0xfd,0x2f,0x09,0xce,0xab]
+ vrndscalepd $0xab, %ymm22, %ymm17 {%k7}
+
+// CHECK: vrndscalepd $171, %ymm22, %ymm17 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0xfd,0xaf,0x09,0xce,0xab]
+ vrndscalepd $0xab, %ymm22, %ymm17 {%k7} {z}
+
+// CHECK: vrndscalepd $123, %ymm22, %ymm17
+// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x09,0xce,0x7b]
+ vrndscalepd $0x7b, %ymm22, %ymm17
+
+// CHECK: vrndscalepd $123, (%rcx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x09,0x7b]
+ vrndscalepd $0x7b, (%rcx), %ymm17
+
+// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %ymm17
+// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x09,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrndscalepd $0x7b, 291(%rax,%r14,8), %ymm17
+
+// CHECK: vrndscalepd $123, (%rcx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x09,0x7b]
+ vrndscalepd $0x7b, (%rcx){1to4}, %ymm17
+
+// CHECK: vrndscalepd $123, 4064(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x4a,0x7f,0x7b]
+ vrndscalepd $0x7b, 4064(%rdx), %ymm17
+
+// CHECK: vrndscalepd $123, 4096(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x8a,0x00,0x10,0x00,0x00,0x7b]
+ vrndscalepd $0x7b, 4096(%rdx), %ymm17
+
+// CHECK: vrndscalepd $123, -4096(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x4a,0x80,0x7b]
+ vrndscalepd $0x7b, -4096(%rdx), %ymm17
+
+// CHECK: vrndscalepd $123, -4128(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x09,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+ vrndscalepd $0x7b, -4128(%rdx), %ymm17
+
+// CHECK: vrndscalepd $123, 1016(%rdx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x4a,0x7f,0x7b]
+ vrndscalepd $0x7b, 1016(%rdx){1to4}, %ymm17
+
+// CHECK: vrndscalepd $123, 1024(%rdx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vrndscalepd $0x7b, 1024(%rdx){1to4}, %ymm17
+
+// CHECK: vrndscalepd $123, -1024(%rdx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x4a,0x80,0x7b]
+ vrndscalepd $0x7b, -1024(%rdx){1to4}, %ymm17
+
+// CHECK: vrndscalepd $123, -1032(%rdx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x09,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vrndscalepd $0x7b, -1032(%rdx){1to4}, %ymm17
+
+// CHECK: vrndscaleps $171, %xmm26, %xmm22
+// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x08,0xf2,0xab]
+ vrndscaleps $0xab, %xmm26, %xmm22
+
+// CHECK: vrndscaleps $171, %xmm26, %xmm22 {%k4}
+// CHECK: encoding: [0x62,0x83,0x7d,0x0c,0x08,0xf2,0xab]
+ vrndscaleps $0xab, %xmm26, %xmm22 {%k4}
+
+// CHECK: vrndscaleps $171, %xmm26, %xmm22 {%k4} {z}
+// CHECK: encoding: [0x62,0x83,0x7d,0x8c,0x08,0xf2,0xab]
+ vrndscaleps $0xab, %xmm26, %xmm22 {%k4} {z}
+
+// CHECK: vrndscaleps $123, %xmm26, %xmm22
+// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x08,0xf2,0x7b]
+ vrndscaleps $0x7b, %xmm26, %xmm22
+
+// CHECK: vrndscaleps $123, (%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0x31,0x7b]
+ vrndscaleps $0x7b, (%rcx), %xmm22
+
+// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x08,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrndscaleps $0x7b, 291(%rax,%r14,8), %xmm22
+
+// CHECK: vrndscaleps $123, (%rcx){1to4}, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0x31,0x7b]
+ vrndscaleps $0x7b, (%rcx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $123, 2032(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0x72,0x7f,0x7b]
+ vrndscaleps $0x7b, 2032(%rdx), %xmm22
+
+// CHECK: vrndscaleps $123, 2048(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0xb2,0x00,0x08,0x00,0x00,0x7b]
+ vrndscaleps $0x7b, 2048(%rdx), %xmm22
+
+// CHECK: vrndscaleps $123, -2048(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0x72,0x80,0x7b]
+ vrndscaleps $0x7b, -2048(%rdx), %xmm22
+
+// CHECK: vrndscaleps $123, -2064(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x08,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+ vrndscaleps $0x7b, -2064(%rdx), %xmm22
+
+// CHECK: vrndscaleps $123, 508(%rdx){1to4}, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0x72,0x7f,0x7b]
+ vrndscaleps $0x7b, 508(%rdx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $123, 512(%rdx){1to4}, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0xb2,0x00,0x02,0x00,0x00,0x7b]
+ vrndscaleps $0x7b, 512(%rdx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $123, -512(%rdx){1to4}, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0x72,0x80,0x7b]
+ vrndscaleps $0x7b, -512(%rdx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $123, -516(%rdx){1to4}, %xmm22
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x08,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+ vrndscaleps $0x7b, -516(%rdx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $171, %ymm17, %ymm19
+// CHECK: encoding: [0x62,0xa3,0x7d,0x28,0x08,0xd9,0xab]
+ vrndscaleps $0xab, %ymm17, %ymm19
+
+// CHECK: vrndscaleps $171, %ymm17, %ymm19 {%k7}
+// CHECK: encoding: [0x62,0xa3,0x7d,0x2f,0x08,0xd9,0xab]
+ vrndscaleps $0xab, %ymm17, %ymm19 {%k7}
+
+// CHECK: vrndscaleps $171, %ymm17, %ymm19 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0x7d,0xaf,0x08,0xd9,0xab]
+ vrndscaleps $0xab, %ymm17, %ymm19 {%k7} {z}
+
+// CHECK: vrndscaleps $123, %ymm17, %ymm19
+// CHECK: encoding: [0x62,0xa3,0x7d,0x28,0x08,0xd9,0x7b]
+ vrndscaleps $0x7b, %ymm17, %ymm19
+
+// CHECK: vrndscaleps $123, (%rcx), %ymm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x19,0x7b]
+ vrndscaleps $0x7b, (%rcx), %ymm19
+
+// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %ymm19
+// CHECK: encoding: [0x62,0xa3,0x7d,0x28,0x08,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vrndscaleps $0x7b, 291(%rax,%r14,8), %ymm19
+
+// CHECK: vrndscaleps $123, (%rcx){1to8}, %ymm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x19,0x7b]
+ vrndscaleps $0x7b, (%rcx){1to8}, %ymm19
+
+// CHECK: vrndscaleps $123, 4064(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x5a,0x7f,0x7b]
+ vrndscaleps $0x7b, 4064(%rdx), %ymm19
+
+// CHECK: vrndscaleps $123, 4096(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x9a,0x00,0x10,0x00,0x00,0x7b]
+ vrndscaleps $0x7b, 4096(%rdx), %ymm19
+
+// CHECK: vrndscaleps $123, -4096(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x5a,0x80,0x7b]
+ vrndscaleps $0x7b, -4096(%rdx), %ymm19
+
+// CHECK: vrndscaleps $123, -4128(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x08,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+ vrndscaleps $0x7b, -4128(%rdx), %ymm19
+
+// CHECK: vrndscaleps $123, 508(%rdx){1to8}, %ymm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x5a,0x7f,0x7b]
+ vrndscaleps $0x7b, 508(%rdx){1to8}, %ymm19
+
+// CHECK: vrndscaleps $123, 512(%rdx){1to8}, %ymm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x9a,0x00,0x02,0x00,0x00,0x7b]
+ vrndscaleps $0x7b, 512(%rdx){1to8}, %ymm19
+
+// CHECK: vrndscaleps $123, -512(%rdx){1to8}, %ymm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x5a,0x80,0x7b]
+ vrndscaleps $0x7b, -512(%rdx){1to8}, %ymm19
+
+// CHECK: vrndscaleps $123, -516(%rdx){1to8}, %ymm19
+// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x08,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+ vrndscaleps $0x7b, -516(%rdx){1to8}, %ymm19
+
// CHECK: vcvtps2pd %xmm27, %xmm20
// CHECK: encoding: [0x62,0x81,0x7c,0x08,0x5a,0xe3]
vcvtps2pd %xmm27, %xmm20
@@ -17403,3 +18843,4123 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: vcvtdq2ps -516(%rdx){1to8}, %ymm24
// CHECK: encoding: [0x62,0x61,0x7c,0x38,0x5b,0x82,0xfc,0xfd,0xff,0xff]
vcvtdq2ps -516(%rdx){1to8}, %ymm24
+// CHECK: vunpckhps %xmm27, %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x01,0x64,0x00,0x15,0xf3]
+ vunpckhps %xmm27, %xmm19, %xmm30
+
+// CHECK: vunpckhps %xmm27, %xmm19, %xmm30 {%k2}
+// CHECK: encoding: [0x62,0x01,0x64,0x02,0x15,0xf3]
+ vunpckhps %xmm27, %xmm19, %xmm30 {%k2}
+
+// CHECK: vunpckhps %xmm27, %xmm19, %xmm30 {%k2} {z}
+// CHECK: encoding: [0x62,0x01,0x64,0x82,0x15,0xf3]
+ vunpckhps %xmm27, %xmm19, %xmm30 {%k2} {z}
+
+// CHECK: vunpckhps (%rcx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x64,0x00,0x15,0x31]
+ vunpckhps (%rcx), %xmm19, %xmm30
+
+// CHECK: vunpckhps 291(%rax,%r14,8), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x21,0x64,0x00,0x15,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vunpckhps 291(%rax,%r14,8), %xmm19, %xmm30
+
+// CHECK: vunpckhps (%rcx){1to4}, %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x64,0x10,0x15,0x31]
+ vunpckhps (%rcx){1to4}, %xmm19, %xmm30
+
+// CHECK: vunpckhps 2032(%rdx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x64,0x00,0x15,0x72,0x7f]
+ vunpckhps 2032(%rdx), %xmm19, %xmm30
+
+// CHECK: vunpckhps 2048(%rdx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x64,0x00,0x15,0xb2,0x00,0x08,0x00,0x00]
+ vunpckhps 2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vunpckhps -2048(%rdx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x64,0x00,0x15,0x72,0x80]
+ vunpckhps -2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vunpckhps -2064(%rdx), %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x64,0x00,0x15,0xb2,0xf0,0xf7,0xff,0xff]
+ vunpckhps -2064(%rdx), %xmm19, %xmm30
+
+// CHECK: vunpckhps 508(%rdx){1to4}, %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x64,0x10,0x15,0x72,0x7f]
+ vunpckhps 508(%rdx){1to4}, %xmm19, %xmm30
+
+// CHECK: vunpckhps 512(%rdx){1to4}, %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x64,0x10,0x15,0xb2,0x00,0x02,0x00,0x00]
+ vunpckhps 512(%rdx){1to4}, %xmm19, %xmm30
+
+// CHECK: vunpckhps -512(%rdx){1to4}, %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x64,0x10,0x15,0x72,0x80]
+ vunpckhps -512(%rdx){1to4}, %xmm19, %xmm30
+
+// CHECK: vunpckhps -516(%rdx){1to4}, %xmm19, %xmm30
+// CHECK: encoding: [0x62,0x61,0x64,0x10,0x15,0xb2,0xfc,0xfd,0xff,0xff]
+ vunpckhps -516(%rdx){1to4}, %xmm19, %xmm30
+
+// CHECK: vunpckhps %ymm25, %ymm25, %ymm21
+// CHECK: encoding: [0x62,0x81,0x34,0x20,0x15,0xe9]
+ vunpckhps %ymm25, %ymm25, %ymm21
+
+// CHECK: vunpckhps %ymm25, %ymm25, %ymm21 {%k5}
+// CHECK: encoding: [0x62,0x81,0x34,0x25,0x15,0xe9]
+ vunpckhps %ymm25, %ymm25, %ymm21 {%k5}
+
+// CHECK: vunpckhps %ymm25, %ymm25, %ymm21 {%k5} {z}
+// CHECK: encoding: [0x62,0x81,0x34,0xa5,0x15,0xe9]
+ vunpckhps %ymm25, %ymm25, %ymm21 {%k5} {z}
+
+// CHECK: vunpckhps (%rcx), %ymm25, %ymm21
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x15,0x29]
+ vunpckhps (%rcx), %ymm25, %ymm21
+
+// CHECK: vunpckhps 291(%rax,%r14,8), %ymm25, %ymm21
+// CHECK: encoding: [0x62,0xa1,0x34,0x20,0x15,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vunpckhps 291(%rax,%r14,8), %ymm25, %ymm21
+
+// CHECK: vunpckhps (%rcx){1to8}, %ymm25, %ymm21
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x15,0x29]
+ vunpckhps (%rcx){1to8}, %ymm25, %ymm21
+
+// CHECK: vunpckhps 4064(%rdx), %ymm25, %ymm21
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x15,0x6a,0x7f]
+ vunpckhps 4064(%rdx), %ymm25, %ymm21
+
+// CHECK: vunpckhps 4096(%rdx), %ymm25, %ymm21
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x15,0xaa,0x00,0x10,0x00,0x00]
+ vunpckhps 4096(%rdx), %ymm25, %ymm21
+
+// CHECK: vunpckhps -4096(%rdx), %ymm25, %ymm21
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x15,0x6a,0x80]
+ vunpckhps -4096(%rdx), %ymm25, %ymm21
+
+// CHECK: vunpckhps -4128(%rdx), %ymm25, %ymm21
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x15,0xaa,0xe0,0xef,0xff,0xff]
+ vunpckhps -4128(%rdx), %ymm25, %ymm21
+
+// CHECK: vunpckhps 508(%rdx){1to8}, %ymm25, %ymm21
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x15,0x6a,0x7f]
+ vunpckhps 508(%rdx){1to8}, %ymm25, %ymm21
+
+// CHECK: vunpckhps 512(%rdx){1to8}, %ymm25, %ymm21
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x15,0xaa,0x00,0x02,0x00,0x00]
+ vunpckhps 512(%rdx){1to8}, %ymm25, %ymm21
+
+// CHECK: vunpckhps -512(%rdx){1to8}, %ymm25, %ymm21
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x15,0x6a,0x80]
+ vunpckhps -512(%rdx){1to8}, %ymm25, %ymm21
+
+// CHECK: vunpckhps -516(%rdx){1to8}, %ymm25, %ymm21
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x15,0xaa,0xfc,0xfd,0xff,0xff]
+ vunpckhps -516(%rdx){1to8}, %ymm25, %ymm21
+
+// CHECK: vunpcklps %xmm26, %xmm24, %xmm29
+// CHECK: encoding: [0x62,0x01,0x3c,0x00,0x14,0xea]
+ vunpcklps %xmm26, %xmm24, %xmm29
+
+// CHECK: vunpcklps %xmm26, %xmm24, %xmm29 {%k6}
+// CHECK: encoding: [0x62,0x01,0x3c,0x06,0x14,0xea]
+ vunpcklps %xmm26, %xmm24, %xmm29 {%k6}
+
+// CHECK: vunpcklps %xmm26, %xmm24, %xmm29 {%k6} {z}
+// CHECK: encoding: [0x62,0x01,0x3c,0x86,0x14,0xea]
+ vunpcklps %xmm26, %xmm24, %xmm29 {%k6} {z}
+
+// CHECK: vunpcklps (%rcx), %xmm24, %xmm29
+// CHECK: encoding: [0x62,0x61,0x3c,0x00,0x14,0x29]
+ vunpcklps (%rcx), %xmm24, %xmm29
+
+// CHECK: vunpcklps 291(%rax,%r14,8), %xmm24, %xmm29
+// CHECK: encoding: [0x62,0x21,0x3c,0x00,0x14,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vunpcklps 291(%rax,%r14,8), %xmm24, %xmm29
+
+// CHECK: vunpcklps (%rcx){1to4}, %xmm24, %xmm29
+// CHECK: encoding: [0x62,0x61,0x3c,0x10,0x14,0x29]
+ vunpcklps (%rcx){1to4}, %xmm24, %xmm29
+
+// CHECK: vunpcklps 2032(%rdx), %xmm24, %xmm29
+// CHECK: encoding: [0x62,0x61,0x3c,0x00,0x14,0x6a,0x7f]
+ vunpcklps 2032(%rdx), %xmm24, %xmm29
+
+// CHECK: vunpcklps 2048(%rdx), %xmm24, %xmm29
+// CHECK: encoding: [0x62,0x61,0x3c,0x00,0x14,0xaa,0x00,0x08,0x00,0x00]
+ vunpcklps 2048(%rdx), %xmm24, %xmm29
+
+// CHECK: vunpcklps -2048(%rdx), %xmm24, %xmm29
+// CHECK: encoding: [0x62,0x61,0x3c,0x00,0x14,0x6a,0x80]
+ vunpcklps -2048(%rdx), %xmm24, %xmm29
+
+// CHECK: vunpcklps -2064(%rdx), %xmm24, %xmm29
+// CHECK: encoding: [0x62,0x61,0x3c,0x00,0x14,0xaa,0xf0,0xf7,0xff,0xff]
+ vunpcklps -2064(%rdx), %xmm24, %xmm29
+
+// CHECK: vunpcklps 508(%rdx){1to4}, %xmm24, %xmm29
+// CHECK: encoding: [0x62,0x61,0x3c,0x10,0x14,0x6a,0x7f]
+ vunpcklps 508(%rdx){1to4}, %xmm24, %xmm29
+
+// CHECK: vunpcklps 512(%rdx){1to4}, %xmm24, %xmm29
+// CHECK: encoding: [0x62,0x61,0x3c,0x10,0x14,0xaa,0x00,0x02,0x00,0x00]
+ vunpcklps 512(%rdx){1to4}, %xmm24, %xmm29
+
+// CHECK: vunpcklps -512(%rdx){1to4}, %xmm24, %xmm29
+// CHECK: encoding: [0x62,0x61,0x3c,0x10,0x14,0x6a,0x80]
+ vunpcklps -512(%rdx){1to4}, %xmm24, %xmm29
+
+// CHECK: vunpcklps -516(%rdx){1to4}, %xmm24, %xmm29
+// CHECK: encoding: [0x62,0x61,0x3c,0x10,0x14,0xaa,0xfc,0xfd,0xff,0xff]
+ vunpcklps -516(%rdx){1to4}, %xmm24, %xmm29
+
+// CHECK: vunpcklps %ymm18, %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x21,0x6c,0x20,0x14,0xd2]
+ vunpcklps %ymm18, %ymm18, %ymm26
+
+// CHECK: vunpcklps %ymm18, %ymm18, %ymm26 {%k1}
+// CHECK: encoding: [0x62,0x21,0x6c,0x21,0x14,0xd2]
+ vunpcklps %ymm18, %ymm18, %ymm26 {%k1}
+
+// CHECK: vunpcklps %ymm18, %ymm18, %ymm26 {%k1} {z}
+// CHECK: encoding: [0x62,0x21,0x6c,0xa1,0x14,0xd2]
+ vunpcklps %ymm18, %ymm18, %ymm26 {%k1} {z}
+
+// CHECK: vunpcklps (%rcx), %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x14,0x11]
+ vunpcklps (%rcx), %ymm18, %ymm26
+
+// CHECK: vunpcklps 291(%rax,%r14,8), %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x21,0x6c,0x20,0x14,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vunpcklps 291(%rax,%r14,8), %ymm18, %ymm26
+
+// CHECK: vunpcklps (%rcx){1to8}, %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x14,0x11]
+ vunpcklps (%rcx){1to8}, %ymm18, %ymm26
+
+// CHECK: vunpcklps 4064(%rdx), %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x14,0x52,0x7f]
+ vunpcklps 4064(%rdx), %ymm18, %ymm26
+
+// CHECK: vunpcklps 4096(%rdx), %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x14,0x92,0x00,0x10,0x00,0x00]
+ vunpcklps 4096(%rdx), %ymm18, %ymm26
+
+// CHECK: vunpcklps -4096(%rdx), %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x14,0x52,0x80]
+ vunpcklps -4096(%rdx), %ymm18, %ymm26
+
+// CHECK: vunpcklps -4128(%rdx), %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x14,0x92,0xe0,0xef,0xff,0xff]
+ vunpcklps -4128(%rdx), %ymm18, %ymm26
+
+// CHECK: vunpcklps 508(%rdx){1to8}, %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x14,0x52,0x7f]
+ vunpcklps 508(%rdx){1to8}, %ymm18, %ymm26
+
+// CHECK: vunpcklps 512(%rdx){1to8}, %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x14,0x92,0x00,0x02,0x00,0x00]
+ vunpcklps 512(%rdx){1to8}, %ymm18, %ymm26
+
+// CHECK: vunpcklps -512(%rdx){1to8}, %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x14,0x52,0x80]
+ vunpcklps -512(%rdx){1to8}, %ymm18, %ymm26
+
+// CHECK: vunpcklps -516(%rdx){1to8}, %ymm18, %ymm26
+// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x14,0x92,0xfc,0xfd,0xff,0xff]
+ vunpcklps -516(%rdx){1to8}, %ymm18, %ymm26
+
+// CHECK: vunpckhpd %xmm26, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0x81,0xb5,0x00,0x15,0xda]
+ vunpckhpd %xmm26, %xmm25, %xmm19
+
+// CHECK: vunpckhpd %xmm26, %xmm25, %xmm19 {%k3}
+// CHECK: encoding: [0x62,0x81,0xb5,0x03,0x15,0xda]
+ vunpckhpd %xmm26, %xmm25, %xmm19 {%k3}
+
+// CHECK: vunpckhpd %xmm26, %xmm25, %xmm19 {%k3} {z}
+// CHECK: encoding: [0x62,0x81,0xb5,0x83,0x15,0xda]
+ vunpckhpd %xmm26, %xmm25, %xmm19 {%k3} {z}
+
+// CHECK: vunpckhpd (%rcx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x15,0x19]
+ vunpckhpd (%rcx), %xmm25, %xmm19
+
+// CHECK: vunpckhpd 291(%rax,%r14,8), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xa1,0xb5,0x00,0x15,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vunpckhpd 291(%rax,%r14,8), %xmm25, %xmm19
+
+// CHECK: vunpckhpd (%rcx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x15,0x19]
+ vunpckhpd (%rcx){1to2}, %xmm25, %xmm19
+
+// CHECK: vunpckhpd 2032(%rdx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x15,0x5a,0x7f]
+ vunpckhpd 2032(%rdx), %xmm25, %xmm19
+
+// CHECK: vunpckhpd 2048(%rdx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x15,0x9a,0x00,0x08,0x00,0x00]
+ vunpckhpd 2048(%rdx), %xmm25, %xmm19
+
+// CHECK: vunpckhpd -2048(%rdx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x15,0x5a,0x80]
+ vunpckhpd -2048(%rdx), %xmm25, %xmm19
+
+// CHECK: vunpckhpd -2064(%rdx), %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x15,0x9a,0xf0,0xf7,0xff,0xff]
+ vunpckhpd -2064(%rdx), %xmm25, %xmm19
+
+// CHECK: vunpckhpd 1016(%rdx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x15,0x5a,0x7f]
+ vunpckhpd 1016(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vunpckhpd 1024(%rdx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x15,0x9a,0x00,0x04,0x00,0x00]
+ vunpckhpd 1024(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vunpckhpd -1024(%rdx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x15,0x5a,0x80]
+ vunpckhpd -1024(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vunpckhpd -1032(%rdx){1to2}, %xmm25, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x15,0x9a,0xf8,0xfb,0xff,0xff]
+ vunpckhpd -1032(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vunpckhpd %ymm24, %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x01,0xcd,0x20,0x15,0xc8]
+ vunpckhpd %ymm24, %ymm22, %ymm25
+
+// CHECK: vunpckhpd %ymm24, %ymm22, %ymm25 {%k7}
+// CHECK: encoding: [0x62,0x01,0xcd,0x27,0x15,0xc8]
+ vunpckhpd %ymm24, %ymm22, %ymm25 {%k7}
+
+// CHECK: vunpckhpd %ymm24, %ymm22, %ymm25 {%k7} {z}
+// CHECK: encoding: [0x62,0x01,0xcd,0xa7,0x15,0xc8]
+ vunpckhpd %ymm24, %ymm22, %ymm25 {%k7} {z}
+
+// CHECK: vunpckhpd (%rcx), %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x61,0xcd,0x20,0x15,0x09]
+ vunpckhpd (%rcx), %ymm22, %ymm25
+
+// CHECK: vunpckhpd 291(%rax,%r14,8), %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x21,0xcd,0x20,0x15,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vunpckhpd 291(%rax,%r14,8), %ymm22, %ymm25
+
+// CHECK: vunpckhpd (%rcx){1to4}, %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x61,0xcd,0x30,0x15,0x09]
+ vunpckhpd (%rcx){1to4}, %ymm22, %ymm25
+
+// CHECK: vunpckhpd 4064(%rdx), %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x61,0xcd,0x20,0x15,0x4a,0x7f]
+ vunpckhpd 4064(%rdx), %ymm22, %ymm25
+
+// CHECK: vunpckhpd 4096(%rdx), %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x61,0xcd,0x20,0x15,0x8a,0x00,0x10,0x00,0x00]
+ vunpckhpd 4096(%rdx), %ymm22, %ymm25
+
+// CHECK: vunpckhpd -4096(%rdx), %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x61,0xcd,0x20,0x15,0x4a,0x80]
+ vunpckhpd -4096(%rdx), %ymm22, %ymm25
+
+// CHECK: vunpckhpd -4128(%rdx), %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x61,0xcd,0x20,0x15,0x8a,0xe0,0xef,0xff,0xff]
+ vunpckhpd -4128(%rdx), %ymm22, %ymm25
+
+// CHECK: vunpckhpd 1016(%rdx){1to4}, %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x61,0xcd,0x30,0x15,0x4a,0x7f]
+ vunpckhpd 1016(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vunpckhpd 1024(%rdx){1to4}, %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x61,0xcd,0x30,0x15,0x8a,0x00,0x04,0x00,0x00]
+ vunpckhpd 1024(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vunpckhpd -1024(%rdx){1to4}, %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x61,0xcd,0x30,0x15,0x4a,0x80]
+ vunpckhpd -1024(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vunpckhpd -1032(%rdx){1to4}, %ymm22, %ymm25
+// CHECK: encoding: [0x62,0x61,0xcd,0x30,0x15,0x8a,0xf8,0xfb,0xff,0xff]
+ vunpckhpd -1032(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vunpcklpd %xmm18, %xmm26, %xmm30
+// CHECK: encoding: [0x62,0x21,0xad,0x00,0x14,0xf2]
+ vunpcklpd %xmm18, %xmm26, %xmm30
+
+// CHECK: vunpcklpd %xmm18, %xmm26, %xmm30 {%k5}
+// CHECK: encoding: [0x62,0x21,0xad,0x05,0x14,0xf2]
+ vunpcklpd %xmm18, %xmm26, %xmm30 {%k5}
+
+// CHECK: vunpcklpd %xmm18, %xmm26, %xmm30 {%k5} {z}
+// CHECK: encoding: [0x62,0x21,0xad,0x85,0x14,0xf2]
+ vunpcklpd %xmm18, %xmm26, %xmm30 {%k5} {z}
+
+// CHECK: vunpcklpd (%rcx), %xmm26, %xmm30
+// CHECK: encoding: [0x62,0x61,0xad,0x00,0x14,0x31]
+ vunpcklpd (%rcx), %xmm26, %xmm30
+
+// CHECK: vunpcklpd 291(%rax,%r14,8), %xmm26, %xmm30
+// CHECK: encoding: [0x62,0x21,0xad,0x00,0x14,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vunpcklpd 291(%rax,%r14,8), %xmm26, %xmm30
+
+// CHECK: vunpcklpd (%rcx){1to2}, %xmm26, %xmm30
+// CHECK: encoding: [0x62,0x61,0xad,0x10,0x14,0x31]
+ vunpcklpd (%rcx){1to2}, %xmm26, %xmm30
+
+// CHECK: vunpcklpd 2032(%rdx), %xmm26, %xmm30
+// CHECK: encoding: [0x62,0x61,0xad,0x00,0x14,0x72,0x7f]
+ vunpcklpd 2032(%rdx), %xmm26, %xmm30
+
+// CHECK: vunpcklpd 2048(%rdx), %xmm26, %xmm30
+// CHECK: encoding: [0x62,0x61,0xad,0x00,0x14,0xb2,0x00,0x08,0x00,0x00]
+ vunpcklpd 2048(%rdx), %xmm26, %xmm30
+
+// CHECK: vunpcklpd -2048(%rdx), %xmm26, %xmm30
+// CHECK: encoding: [0x62,0x61,0xad,0x00,0x14,0x72,0x80]
+ vunpcklpd -2048(%rdx), %xmm26, %xmm30
+
+// CHECK: vunpcklpd -2064(%rdx), %xmm26, %xmm30
+// CHECK: encoding: [0x62,0x61,0xad,0x00,0x14,0xb2,0xf0,0xf7,0xff,0xff]
+ vunpcklpd -2064(%rdx), %xmm26, %xmm30
+
+// CHECK: vunpcklpd 1016(%rdx){1to2}, %xmm26, %xmm30
+// CHECK: encoding: [0x62,0x61,0xad,0x10,0x14,0x72,0x7f]
+ vunpcklpd 1016(%rdx){1to2}, %xmm26, %xmm30
+
+// CHECK: vunpcklpd 1024(%rdx){1to2}, %xmm26, %xmm30
+// CHECK: encoding: [0x62,0x61,0xad,0x10,0x14,0xb2,0x00,0x04,0x00,0x00]
+ vunpcklpd 1024(%rdx){1to2}, %xmm26, %xmm30
+
+// CHECK: vunpcklpd -1024(%rdx){1to2}, %xmm26, %xmm30
+// CHECK: encoding: [0x62,0x61,0xad,0x10,0x14,0x72,0x80]
+ vunpcklpd -1024(%rdx){1to2}, %xmm26, %xmm30
+
+// CHECK: vunpcklpd -1032(%rdx){1to2}, %xmm26, %xmm30
+// CHECK: encoding: [0x62,0x61,0xad,0x10,0x14,0xb2,0xf8,0xfb,0xff,0xff]
+ vunpcklpd -1032(%rdx){1to2}, %xmm26, %xmm30
+
+// CHECK: vunpcklpd %ymm17, %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xa1,0xad,0x20,0x14,0xe1]
+ vunpcklpd %ymm17, %ymm26, %ymm20
+
+// CHECK: vunpcklpd %ymm17, %ymm26, %ymm20 {%k2}
+// CHECK: encoding: [0x62,0xa1,0xad,0x22,0x14,0xe1]
+ vunpcklpd %ymm17, %ymm26, %ymm20 {%k2}
+
+// CHECK: vunpcklpd %ymm17, %ymm26, %ymm20 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0xad,0xa2,0x14,0xe1]
+ vunpcklpd %ymm17, %ymm26, %ymm20 {%k2} {z}
+
+// CHECK: vunpcklpd (%rcx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x14,0x21]
+ vunpcklpd (%rcx), %ymm26, %ymm20
+
+// CHECK: vunpcklpd 291(%rax,%r14,8), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xa1,0xad,0x20,0x14,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vunpcklpd 291(%rax,%r14,8), %ymm26, %ymm20
+
+// CHECK: vunpcklpd (%rcx){1to4}, %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xad,0x30,0x14,0x21]
+ vunpcklpd (%rcx){1to4}, %ymm26, %ymm20
+
+// CHECK: vunpcklpd 4064(%rdx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x14,0x62,0x7f]
+ vunpcklpd 4064(%rdx), %ymm26, %ymm20
+
+// CHECK: vunpcklpd 4096(%rdx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x14,0xa2,0x00,0x10,0x00,0x00]
+ vunpcklpd 4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vunpcklpd -4096(%rdx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x14,0x62,0x80]
+ vunpcklpd -4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vunpcklpd -4128(%rdx), %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x14,0xa2,0xe0,0xef,0xff,0xff]
+ vunpcklpd -4128(%rdx), %ymm26, %ymm20
+
+// CHECK: vunpcklpd 1016(%rdx){1to4}, %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xad,0x30,0x14,0x62,0x7f]
+ vunpcklpd 1016(%rdx){1to4}, %ymm26, %ymm20
+
+// CHECK: vunpcklpd 1024(%rdx){1to4}, %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xad,0x30,0x14,0xa2,0x00,0x04,0x00,0x00]
+ vunpcklpd 1024(%rdx){1to4}, %ymm26, %ymm20
+
+// CHECK: vunpcklpd -1024(%rdx){1to4}, %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xad,0x30,0x14,0x62,0x80]
+ vunpcklpd -1024(%rdx){1to4}, %ymm26, %ymm20
+
+// CHECK: vunpcklpd -1032(%rdx){1to4}, %ymm26, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xad,0x30,0x14,0xa2,0xf8,0xfb,0xff,0xff]
+ vunpcklpd -1032(%rdx){1to4}, %ymm26, %ymm20
+
+// CHECK: vpunpckldq %xmm17, %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x21,0x55,0x00,0x62,0xc9]
+ vpunpckldq %xmm17, %xmm21, %xmm25
+
+// CHECK: vpunpckldq %xmm17, %xmm21, %xmm25 {%k7}
+// CHECK: encoding: [0x62,0x21,0x55,0x07,0x62,0xc9]
+ vpunpckldq %xmm17, %xmm21, %xmm25 {%k7}
+
+// CHECK: vpunpckldq %xmm17, %xmm21, %xmm25 {%k7} {z}
+// CHECK: encoding: [0x62,0x21,0x55,0x87,0x62,0xc9]
+ vpunpckldq %xmm17, %xmm21, %xmm25 {%k7} {z}
+
+// CHECK: vpunpckldq (%rcx), %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x61,0x55,0x00,0x62,0x09]
+ vpunpckldq (%rcx), %xmm21, %xmm25
+
+// CHECK: vpunpckldq 291(%rax,%r14,8), %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x21,0x55,0x00,0x62,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckldq 291(%rax,%r14,8), %xmm21, %xmm25
+
+// CHECK: vpunpckldq (%rcx){1to4}, %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x61,0x55,0x10,0x62,0x09]
+ vpunpckldq (%rcx){1to4}, %xmm21, %xmm25
+
+// CHECK: vpunpckldq 2032(%rdx), %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x61,0x55,0x00,0x62,0x4a,0x7f]
+ vpunpckldq 2032(%rdx), %xmm21, %xmm25
+
+// CHECK: vpunpckldq 2048(%rdx), %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x61,0x55,0x00,0x62,0x8a,0x00,0x08,0x00,0x00]
+ vpunpckldq 2048(%rdx), %xmm21, %xmm25
+
+// CHECK: vpunpckldq -2048(%rdx), %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x61,0x55,0x00,0x62,0x4a,0x80]
+ vpunpckldq -2048(%rdx), %xmm21, %xmm25
+
+// CHECK: vpunpckldq -2064(%rdx), %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x61,0x55,0x00,0x62,0x8a,0xf0,0xf7,0xff,0xff]
+ vpunpckldq -2064(%rdx), %xmm21, %xmm25
+
+// CHECK: vpunpckldq 508(%rdx){1to4}, %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x61,0x55,0x10,0x62,0x4a,0x7f]
+ vpunpckldq 508(%rdx){1to4}, %xmm21, %xmm25
+
+// CHECK: vpunpckldq 512(%rdx){1to4}, %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x61,0x55,0x10,0x62,0x8a,0x00,0x02,0x00,0x00]
+ vpunpckldq 512(%rdx){1to4}, %xmm21, %xmm25
+
+// CHECK: vpunpckldq -512(%rdx){1to4}, %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x61,0x55,0x10,0x62,0x4a,0x80]
+ vpunpckldq -512(%rdx){1to4}, %xmm21, %xmm25
+
+// CHECK: vpunpckldq -516(%rdx){1to4}, %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x61,0x55,0x10,0x62,0x8a,0xfc,0xfd,0xff,0xff]
+ vpunpckldq -516(%rdx){1to4}, %xmm21, %xmm25
+
+// CHECK: vpunpckldq %ymm26, %ymm19, %ymm20
+// CHECK: encoding: [0x62,0x81,0x65,0x20,0x62,0xe2]
+ vpunpckldq %ymm26, %ymm19, %ymm20
+
+// CHECK: vpunpckldq %ymm26, %ymm19, %ymm20 {%k7}
+// CHECK: encoding: [0x62,0x81,0x65,0x27,0x62,0xe2]
+ vpunpckldq %ymm26, %ymm19, %ymm20 {%k7}
+
+// CHECK: vpunpckldq %ymm26, %ymm19, %ymm20 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0x65,0xa7,0x62,0xe2]
+ vpunpckldq %ymm26, %ymm19, %ymm20 {%k7} {z}
+
+// CHECK: vpunpckldq (%rcx), %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x65,0x20,0x62,0x21]
+ vpunpckldq (%rcx), %ymm19, %ymm20
+
+// CHECK: vpunpckldq 291(%rax,%r14,8), %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xa1,0x65,0x20,0x62,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckldq 291(%rax,%r14,8), %ymm19, %ymm20
+
+// CHECK: vpunpckldq (%rcx){1to8}, %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x65,0x30,0x62,0x21]
+ vpunpckldq (%rcx){1to8}, %ymm19, %ymm20
+
+// CHECK: vpunpckldq 4064(%rdx), %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x65,0x20,0x62,0x62,0x7f]
+ vpunpckldq 4064(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckldq 4096(%rdx), %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x65,0x20,0x62,0xa2,0x00,0x10,0x00,0x00]
+ vpunpckldq 4096(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckldq -4096(%rdx), %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x65,0x20,0x62,0x62,0x80]
+ vpunpckldq -4096(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckldq -4128(%rdx), %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x65,0x20,0x62,0xa2,0xe0,0xef,0xff,0xff]
+ vpunpckldq -4128(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckldq 508(%rdx){1to8}, %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x65,0x30,0x62,0x62,0x7f]
+ vpunpckldq 508(%rdx){1to8}, %ymm19, %ymm20
+
+// CHECK: vpunpckldq 512(%rdx){1to8}, %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x65,0x30,0x62,0xa2,0x00,0x02,0x00,0x00]
+ vpunpckldq 512(%rdx){1to8}, %ymm19, %ymm20
+
+// CHECK: vpunpckldq -512(%rdx){1to8}, %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x65,0x30,0x62,0x62,0x80]
+ vpunpckldq -512(%rdx){1to8}, %ymm19, %ymm20
+
+// CHECK: vpunpckldq -516(%rdx){1to8}, %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0x65,0x30,0x62,0xa2,0xfc,0xfd,0xff,0xff]
+ vpunpckldq -516(%rdx){1to8}, %ymm19, %ymm20
+
+// CHECK: vpunpckhdq %xmm27, %xmm24, %xmm28
+// CHECK: encoding: [0x62,0x01,0x3d,0x00,0x6a,0xe3]
+ vpunpckhdq %xmm27, %xmm24, %xmm28
+
+// CHECK: vpunpckhdq %xmm27, %xmm24, %xmm28 {%k7}
+// CHECK: encoding: [0x62,0x01,0x3d,0x07,0x6a,0xe3]
+ vpunpckhdq %xmm27, %xmm24, %xmm28 {%k7}
+
+// CHECK: vpunpckhdq %xmm27, %xmm24, %xmm28 {%k7} {z}
+// CHECK: encoding: [0x62,0x01,0x3d,0x87,0x6a,0xe3]
+ vpunpckhdq %xmm27, %xmm24, %xmm28 {%k7} {z}
+
+// CHECK: vpunpckhdq (%rcx), %xmm24, %xmm28
+// CHECK: encoding: [0x62,0x61,0x3d,0x00,0x6a,0x21]
+ vpunpckhdq (%rcx), %xmm24, %xmm28
+
+// CHECK: vpunpckhdq 291(%rax,%r14,8), %xmm24, %xmm28
+// CHECK: encoding: [0x62,0x21,0x3d,0x00,0x6a,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckhdq 291(%rax,%r14,8), %xmm24, %xmm28
+
+// CHECK: vpunpckhdq (%rcx){1to4}, %xmm24, %xmm28
+// CHECK: encoding: [0x62,0x61,0x3d,0x10,0x6a,0x21]
+ vpunpckhdq (%rcx){1to4}, %xmm24, %xmm28
+
+// CHECK: vpunpckhdq 2032(%rdx), %xmm24, %xmm28
+// CHECK: encoding: [0x62,0x61,0x3d,0x00,0x6a,0x62,0x7f]
+ vpunpckhdq 2032(%rdx), %xmm24, %xmm28
+
+// CHECK: vpunpckhdq 2048(%rdx), %xmm24, %xmm28
+// CHECK: encoding: [0x62,0x61,0x3d,0x00,0x6a,0xa2,0x00,0x08,0x00,0x00]
+ vpunpckhdq 2048(%rdx), %xmm24, %xmm28
+
+// CHECK: vpunpckhdq -2048(%rdx), %xmm24, %xmm28
+// CHECK: encoding: [0x62,0x61,0x3d,0x00,0x6a,0x62,0x80]
+ vpunpckhdq -2048(%rdx), %xmm24, %xmm28
+
+// CHECK: vpunpckhdq -2064(%rdx), %xmm24, %xmm28
+// CHECK: encoding: [0x62,0x61,0x3d,0x00,0x6a,0xa2,0xf0,0xf7,0xff,0xff]
+ vpunpckhdq -2064(%rdx), %xmm24, %xmm28
+
+// CHECK: vpunpckhdq 508(%rdx){1to4}, %xmm24, %xmm28
+// CHECK: encoding: [0x62,0x61,0x3d,0x10,0x6a,0x62,0x7f]
+ vpunpckhdq 508(%rdx){1to4}, %xmm24, %xmm28
+
+// CHECK: vpunpckhdq 512(%rdx){1to4}, %xmm24, %xmm28
+// CHECK: encoding: [0x62,0x61,0x3d,0x10,0x6a,0xa2,0x00,0x02,0x00,0x00]
+ vpunpckhdq 512(%rdx){1to4}, %xmm24, %xmm28
+
+// CHECK: vpunpckhdq -512(%rdx){1to4}, %xmm24, %xmm28
+// CHECK: encoding: [0x62,0x61,0x3d,0x10,0x6a,0x62,0x80]
+ vpunpckhdq -512(%rdx){1to4}, %xmm24, %xmm28
+
+// CHECK: vpunpckhdq -516(%rdx){1to4}, %xmm24, %xmm28
+// CHECK: encoding: [0x62,0x61,0x3d,0x10,0x6a,0xa2,0xfc,0xfd,0xff,0xff]
+ vpunpckhdq -516(%rdx){1to4}, %xmm24, %xmm28
+
+// CHECK: vpunpckhdq %ymm28, %ymm24, %ymm26
+// CHECK: encoding: [0x62,0x01,0x3d,0x20,0x6a,0xd4]
+ vpunpckhdq %ymm28, %ymm24, %ymm26
+
+// CHECK: vpunpckhdq %ymm28, %ymm24, %ymm26 {%k2}
+// CHECK: encoding: [0x62,0x01,0x3d,0x22,0x6a,0xd4]
+ vpunpckhdq %ymm28, %ymm24, %ymm26 {%k2}
+
+// CHECK: vpunpckhdq %ymm28, %ymm24, %ymm26 {%k2} {z}
+// CHECK: encoding: [0x62,0x01,0x3d,0xa2,0x6a,0xd4]
+ vpunpckhdq %ymm28, %ymm24, %ymm26 {%k2} {z}
+
+// CHECK: vpunpckhdq (%rcx), %ymm24, %ymm26
+// CHECK: encoding: [0x62,0x61,0x3d,0x20,0x6a,0x11]
+ vpunpckhdq (%rcx), %ymm24, %ymm26
+
+// CHECK: vpunpckhdq 291(%rax,%r14,8), %ymm24, %ymm26
+// CHECK: encoding: [0x62,0x21,0x3d,0x20,0x6a,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckhdq 291(%rax,%r14,8), %ymm24, %ymm26
+
+// CHECK: vpunpckhdq (%rcx){1to8}, %ymm24, %ymm26
+// CHECK: encoding: [0x62,0x61,0x3d,0x30,0x6a,0x11]
+ vpunpckhdq (%rcx){1to8}, %ymm24, %ymm26
+
+// CHECK: vpunpckhdq 4064(%rdx), %ymm24, %ymm26
+// CHECK: encoding: [0x62,0x61,0x3d,0x20,0x6a,0x52,0x7f]
+ vpunpckhdq 4064(%rdx), %ymm24, %ymm26
+
+// CHECK: vpunpckhdq 4096(%rdx), %ymm24, %ymm26
+// CHECK: encoding: [0x62,0x61,0x3d,0x20,0x6a,0x92,0x00,0x10,0x00,0x00]
+ vpunpckhdq 4096(%rdx), %ymm24, %ymm26
+
+// CHECK: vpunpckhdq -4096(%rdx), %ymm24, %ymm26
+// CHECK: encoding: [0x62,0x61,0x3d,0x20,0x6a,0x52,0x80]
+ vpunpckhdq -4096(%rdx), %ymm24, %ymm26
+
+// CHECK: vpunpckhdq -4128(%rdx), %ymm24, %ymm26
+// CHECK: encoding: [0x62,0x61,0x3d,0x20,0x6a,0x92,0xe0,0xef,0xff,0xff]
+ vpunpckhdq -4128(%rdx), %ymm24, %ymm26
+
+// CHECK: vpunpckhdq 508(%rdx){1to8}, %ymm24, %ymm26
+// CHECK: encoding: [0x62,0x61,0x3d,0x30,0x6a,0x52,0x7f]
+ vpunpckhdq 508(%rdx){1to8}, %ymm24, %ymm26
+
+// CHECK: vpunpckhdq 512(%rdx){1to8}, %ymm24, %ymm26
+// CHECK: encoding: [0x62,0x61,0x3d,0x30,0x6a,0x92,0x00,0x02,0x00,0x00]
+ vpunpckhdq 512(%rdx){1to8}, %ymm24, %ymm26
+
+// CHECK: vpunpckhdq -512(%rdx){1to8}, %ymm24, %ymm26
+// CHECK: encoding: [0x62,0x61,0x3d,0x30,0x6a,0x52,0x80]
+ vpunpckhdq -512(%rdx){1to8}, %ymm24, %ymm26
+
+// CHECK: vpunpckhdq -516(%rdx){1to8}, %ymm24, %ymm26
+// CHECK: encoding: [0x62,0x61,0x3d,0x30,0x6a,0x92,0xfc,0xfd,0xff,0xff]
+ vpunpckhdq -516(%rdx){1to8}, %ymm24, %ymm26
+
+// CHECK: vpunpcklqdq %xmm23, %xmm27, %xmm17
+// CHECK: encoding: [0x62,0xa1,0xa5,0x00,0x6c,0xcf]
+ vpunpcklqdq %xmm23, %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq %xmm23, %xmm27, %xmm17 {%k6}
+// CHECK: encoding: [0x62,0xa1,0xa5,0x06,0x6c,0xcf]
+ vpunpcklqdq %xmm23, %xmm27, %xmm17 {%k6}
+
+// CHECK: vpunpcklqdq %xmm23, %xmm27, %xmm17 {%k6} {z}
+// CHECK: encoding: [0x62,0xa1,0xa5,0x86,0x6c,0xcf]
+ vpunpcklqdq %xmm23, %xmm27, %xmm17 {%k6} {z}
+
+// CHECK: vpunpcklqdq (%rcx), %xmm27, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x09]
+ vpunpcklqdq (%rcx), %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq 291(%rax,%r14,8), %xmm27, %xmm17
+// CHECK: encoding: [0x62,0xa1,0xa5,0x00,0x6c,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpunpcklqdq 291(%rax,%r14,8), %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq (%rcx){1to2}, %xmm27, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x09]
+ vpunpcklqdq (%rcx){1to2}, %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq 2032(%rdx), %xmm27, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x4a,0x7f]
+ vpunpcklqdq 2032(%rdx), %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq 2048(%rdx), %xmm27, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x8a,0x00,0x08,0x00,0x00]
+ vpunpcklqdq 2048(%rdx), %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq -2048(%rdx), %xmm27, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x4a,0x80]
+ vpunpcklqdq -2048(%rdx), %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq -2064(%rdx), %xmm27, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x8a,0xf0,0xf7,0xff,0xff]
+ vpunpcklqdq -2064(%rdx), %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq 1016(%rdx){1to2}, %xmm27, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x4a,0x7f]
+ vpunpcklqdq 1016(%rdx){1to2}, %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq 1024(%rdx){1to2}, %xmm27, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x8a,0x00,0x04,0x00,0x00]
+ vpunpcklqdq 1024(%rdx){1to2}, %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq -1024(%rdx){1to2}, %xmm27, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x4a,0x80]
+ vpunpcklqdq -1024(%rdx){1to2}, %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq -1032(%rdx){1to2}, %xmm27, %xmm17
+// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x8a,0xf8,0xfb,0xff,0xff]
+ vpunpcklqdq -1032(%rdx){1to2}, %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq %ymm28, %ymm20, %ymm29
+// CHECK: encoding: [0x62,0x01,0xdd,0x20,0x6c,0xec]
+ vpunpcklqdq %ymm28, %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq %ymm28, %ymm20, %ymm29 {%k7}
+// CHECK: encoding: [0x62,0x01,0xdd,0x27,0x6c,0xec]
+ vpunpcklqdq %ymm28, %ymm20, %ymm29 {%k7}
+
+// CHECK: vpunpcklqdq %ymm28, %ymm20, %ymm29 {%k7} {z}
+// CHECK: encoding: [0x62,0x01,0xdd,0xa7,0x6c,0xec]
+ vpunpcklqdq %ymm28, %ymm20, %ymm29 {%k7} {z}
+
+// CHECK: vpunpcklqdq (%rcx), %ymm20, %ymm29
+// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x6c,0x29]
+ vpunpcklqdq (%rcx), %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq 291(%rax,%r14,8), %ymm20, %ymm29
+// CHECK: encoding: [0x62,0x21,0xdd,0x20,0x6c,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpunpcklqdq 291(%rax,%r14,8), %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq (%rcx){1to4}, %ymm20, %ymm29
+// CHECK: encoding: [0x62,0x61,0xdd,0x30,0x6c,0x29]
+ vpunpcklqdq (%rcx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq 4064(%rdx), %ymm20, %ymm29
+// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x6c,0x6a,0x7f]
+ vpunpcklqdq 4064(%rdx), %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq 4096(%rdx), %ymm20, %ymm29
+// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x6c,0xaa,0x00,0x10,0x00,0x00]
+ vpunpcklqdq 4096(%rdx), %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq -4096(%rdx), %ymm20, %ymm29
+// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x6c,0x6a,0x80]
+ vpunpcklqdq -4096(%rdx), %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq -4128(%rdx), %ymm20, %ymm29
+// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x6c,0xaa,0xe0,0xef,0xff,0xff]
+ vpunpcklqdq -4128(%rdx), %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq 1016(%rdx){1to4}, %ymm20, %ymm29
+// CHECK: encoding: [0x62,0x61,0xdd,0x30,0x6c,0x6a,0x7f]
+ vpunpcklqdq 1016(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq 1024(%rdx){1to4}, %ymm20, %ymm29
+// CHECK: encoding: [0x62,0x61,0xdd,0x30,0x6c,0xaa,0x00,0x04,0x00,0x00]
+ vpunpcklqdq 1024(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq -1024(%rdx){1to4}, %ymm20, %ymm29
+// CHECK: encoding: [0x62,0x61,0xdd,0x30,0x6c,0x6a,0x80]
+ vpunpcklqdq -1024(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq -1032(%rdx){1to4}, %ymm20, %ymm29
+// CHECK: encoding: [0x62,0x61,0xdd,0x30,0x6c,0xaa,0xf8,0xfb,0xff,0xff]
+ vpunpcklqdq -1032(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpunpckhqdq %xmm24, %xmm19, %xmm19
+// CHECK: encoding: [0x62,0x81,0xe5,0x00,0x6d,0xd8]
+ vpunpckhqdq %xmm24, %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq %xmm24, %xmm19, %xmm19 {%k6}
+// CHECK: encoding: [0x62,0x81,0xe5,0x06,0x6d,0xd8]
+ vpunpckhqdq %xmm24, %xmm19, %xmm19 {%k6}
+
+// CHECK: vpunpckhqdq %xmm24, %xmm19, %xmm19 {%k6} {z}
+// CHECK: encoding: [0x62,0x81,0xe5,0x86,0x6d,0xd8]
+ vpunpckhqdq %xmm24, %xmm19, %xmm19 {%k6} {z}
+
+// CHECK: vpunpckhqdq (%rcx), %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x19]
+ vpunpckhqdq (%rcx), %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq 291(%rax,%r14,8), %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xa1,0xe5,0x00,0x6d,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckhqdq 291(%rax,%r14,8), %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq (%rcx){1to2}, %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x19]
+ vpunpckhqdq (%rcx){1to2}, %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq 2032(%rdx), %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x5a,0x7f]
+ vpunpckhqdq 2032(%rdx), %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq 2048(%rdx), %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x9a,0x00,0x08,0x00,0x00]
+ vpunpckhqdq 2048(%rdx), %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq -2048(%rdx), %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x5a,0x80]
+ vpunpckhqdq -2048(%rdx), %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq -2064(%rdx), %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x9a,0xf0,0xf7,0xff,0xff]
+ vpunpckhqdq -2064(%rdx), %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq 1016(%rdx){1to2}, %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x5a,0x7f]
+ vpunpckhqdq 1016(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq 1024(%rdx){1to2}, %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x9a,0x00,0x04,0x00,0x00]
+ vpunpckhqdq 1024(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq -1024(%rdx){1to2}, %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x5a,0x80]
+ vpunpckhqdq -1024(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq -1032(%rdx){1to2}, %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x9a,0xf8,0xfb,0xff,0xff]
+ vpunpckhqdq -1032(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq %ymm28, %ymm19, %ymm20
+// CHECK: encoding: [0x62,0x81,0xe5,0x20,0x6d,0xe4]
+ vpunpckhqdq %ymm28, %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq %ymm28, %ymm19, %ymm20 {%k6}
+// CHECK: encoding: [0x62,0x81,0xe5,0x26,0x6d,0xe4]
+ vpunpckhqdq %ymm28, %ymm19, %ymm20 {%k6}
+
+// CHECK: vpunpckhqdq %ymm28, %ymm19, %ymm20 {%k6} {z}
+// CHECK: encoding: [0x62,0x81,0xe5,0xa6,0x6d,0xe4]
+ vpunpckhqdq %ymm28, %ymm19, %ymm20 {%k6} {z}
+
+// CHECK: vpunpckhqdq (%rcx), %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x6d,0x21]
+ vpunpckhqdq (%rcx), %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq 291(%rax,%r14,8), %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xa1,0xe5,0x20,0x6d,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpunpckhqdq 291(%rax,%r14,8), %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq (%rcx){1to4}, %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xe5,0x30,0x6d,0x21]
+ vpunpckhqdq (%rcx){1to4}, %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq 4064(%rdx), %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x6d,0x62,0x7f]
+ vpunpckhqdq 4064(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq 4096(%rdx), %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x6d,0xa2,0x00,0x10,0x00,0x00]
+ vpunpckhqdq 4096(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq -4096(%rdx), %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x6d,0x62,0x80]
+ vpunpckhqdq -4096(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq -4128(%rdx), %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x6d,0xa2,0xe0,0xef,0xff,0xff]
+ vpunpckhqdq -4128(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq 1016(%rdx){1to4}, %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xe5,0x30,0x6d,0x62,0x7f]
+ vpunpckhqdq 1016(%rdx){1to4}, %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq 1024(%rdx){1to4}, %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xe5,0x30,0x6d,0xa2,0x00,0x04,0x00,0x00]
+ vpunpckhqdq 1024(%rdx){1to4}, %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq -1024(%rdx){1to4}, %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xe5,0x30,0x6d,0x62,0x80]
+ vpunpckhqdq -1024(%rdx){1to4}, %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq -1032(%rdx){1to4}, %ymm19, %ymm20
+// CHECK: encoding: [0x62,0xe1,0xe5,0x30,0x6d,0xa2,0xf8,0xfb,0xff,0xff]
+ vpunpckhqdq -1032(%rdx){1to4}, %ymm19, %ymm20
+
+// CHECK: vinsertf32x4 $171, %xmm27, %ymm18, %ymm18
+// CHECK: encoding: [0x62,0x83,0x6d,0x20,0x18,0xd3,0xab]
+ vinsertf32x4 $0xab, %xmm27, %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $171, %xmm27, %ymm18, %ymm18 {%k7}
+// CHECK: encoding: [0x62,0x83,0x6d,0x27,0x18,0xd3,0xab]
+ vinsertf32x4 $0xab, %xmm27, %ymm18, %ymm18 {%k7}
+
+// CHECK: vinsertf32x4 $171, %xmm27, %ymm18, %ymm18 {%k7} {z}
+// CHECK: encoding: [0x62,0x83,0x6d,0xa7,0x18,0xd3,0xab]
+ vinsertf32x4 $0xab, %xmm27, %ymm18, %ymm18 {%k7} {z}
+
+// CHECK: vinsertf32x4 $123, %xmm27, %ymm18, %ymm18
+// CHECK: encoding: [0x62,0x83,0x6d,0x20,0x18,0xd3,0x7b]
+ vinsertf32x4 $0x7b, %xmm27, %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, (%rcx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x11,0x7b]
+ vinsertf32x4 $0x7b, (%rcx), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, 291(%rax,%r14,8), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xa3,0x6d,0x20,0x18,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinsertf32x4 $0x7b, 291(%rax,%r14,8), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, 2032(%rdx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x52,0x7f,0x7b]
+ vinsertf32x4 $0x7b, 2032(%rdx), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, 2048(%rdx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x92,0x00,0x08,0x00,0x00,0x7b]
+ vinsertf32x4 $0x7b, 2048(%rdx), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, -2048(%rdx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x52,0x80,0x7b]
+ vinsertf32x4 $0x7b, -2048(%rdx), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, -2064(%rdx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x92,0xf0,0xf7,0xff,0xff,0x7b]
+ vinsertf32x4 $0x7b, -2064(%rdx), %ymm18, %ymm18
+
+// CHECK: vinserti32x4 $171, %xmm24, %ymm28, %ymm17
+// CHECK: encoding: [0x62,0x83,0x1d,0x20,0x38,0xc8,0xab]
+ vinserti32x4 $0xab, %xmm24, %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $171, %xmm24, %ymm28, %ymm17 {%k3}
+// CHECK: encoding: [0x62,0x83,0x1d,0x23,0x38,0xc8,0xab]
+ vinserti32x4 $0xab, %xmm24, %ymm28, %ymm17 {%k3}
+
+// CHECK: vinserti32x4 $171, %xmm24, %ymm28, %ymm17 {%k3} {z}
+// CHECK: encoding: [0x62,0x83,0x1d,0xa3,0x38,0xc8,0xab]
+ vinserti32x4 $0xab, %xmm24, %ymm28, %ymm17 {%k3} {z}
+
+// CHECK: vinserti32x4 $123, %xmm24, %ymm28, %ymm17
+// CHECK: encoding: [0x62,0x83,0x1d,0x20,0x38,0xc8,0x7b]
+ vinserti32x4 $0x7b, %xmm24, %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, (%rcx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x09,0x7b]
+ vinserti32x4 $0x7b, (%rcx), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, 291(%rax,%r14,8), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xa3,0x1d,0x20,0x38,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinserti32x4 $0x7b, 291(%rax,%r14,8), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, 2032(%rdx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x4a,0x7f,0x7b]
+ vinserti32x4 $0x7b, 2032(%rdx), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, 2048(%rdx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vinserti32x4 $0x7b, 2048(%rdx), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, -2048(%rdx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x4a,0x80,0x7b]
+ vinserti32x4 $0x7b, -2048(%rdx), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, -2064(%rdx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vinserti32x4 $0x7b, -2064(%rdx), %ymm28, %ymm17
+
+// CHECK: vextractf32x4 $171, %ymm17, %xmm28
+// CHECK: encoding: [0x62,0x83,0x7d,0x28,0x19,0xcc,0xab]
+ vextractf32x4 $0xab, %ymm17, %xmm28
+
+// CHECK: vextractf32x4 $171, %ymm17, %xmm28 {%k6}
+// CHECK: encoding: [0x62,0x83,0x7d,0x2e,0x19,0xcc,0xab]
+ vextractf32x4 $0xab, %ymm17, %xmm28 {%k6}
+
+// CHECK: vextractf32x4 $171, %ymm17, %xmm28 {%k6} {z}
+// CHECK: encoding: [0x62,0x83,0x7d,0xae,0x19,0xcc,0xab]
+ vextractf32x4 $0xab, %ymm17, %xmm28 {%k6} {z}
+
+// CHECK: vextractf32x4 $123, %ymm17, %xmm28
+// CHECK: encoding: [0x62,0x83,0x7d,0x28,0x19,0xcc,0x7b]
+ vextractf32x4 $0x7b, %ymm17, %xmm28
+
+// CHECK: vextractf32x4 $171, %ymm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x19,0x21,0xab]
+ vextractf32x4 $0xab, %ymm20, (%rcx)
+
+// CHECK: vextractf32x4 $171, %ymm20, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe3,0x7d,0x2a,0x19,0x21,0xab]
+ vextractf32x4 $0xab, %ymm20, (%rcx) {%k2}
+
+// CHECK: vextractf32x4 $123, %ymm20, (%rcx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x19,0x21,0x7b]
+ vextractf32x4 $0x7b, %ymm20, (%rcx)
+
+// CHECK: vextractf32x4 $123, %ymm20, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0xa3,0x7d,0x28,0x19,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vextractf32x4 $0x7b, %ymm20, 291(%rax,%r14,8)
+
+// CHECK: vextractf32x4 $123, %ymm20, 2032(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x19,0x62,0x7f,0x7b]
+ vextractf32x4 $0x7b, %ymm20, 2032(%rdx)
+
+// CHECK: vextractf32x4 $123, %ymm20, 2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x19,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vextractf32x4 $0x7b, %ymm20, 2048(%rdx)
+
+// CHECK: vextractf32x4 $123, %ymm20, -2048(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x19,0x62,0x80,0x7b]
+ vextractf32x4 $0x7b, %ymm20, -2048(%rdx)
+
+// CHECK: vextractf32x4 $123, %ymm20, -2064(%rdx)
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x19,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vextractf32x4 $0x7b, %ymm20, -2064(%rdx)
+
+// CHECK: vextracti32x4 $171, %ymm21, %xmm20
+// CHECK: encoding: [0x62,0xa3,0x7d,0x28,0x39,0xec,0xab]
+ vextracti32x4 $0xab, %ymm21, %xmm20
+
+// CHECK: vextracti32x4 $171, %ymm21, %xmm20 {%k4}
+// CHECK: encoding: [0x62,0xa3,0x7d,0x2c,0x39,0xec,0xab]
+ vextracti32x4 $0xab, %ymm21, %xmm20 {%k4}
+
+// CHECK: vextracti32x4 $171, %ymm21, %xmm20 {%k4} {z}
+// CHECK: encoding: [0x62,0xa3,0x7d,0xac,0x39,0xec,0xab]
+ vextracti32x4 $0xab, %ymm21, %xmm20 {%k4} {z}
+
+// CHECK: vextracti32x4 $123, %ymm21, %xmm20
+// CHECK: encoding: [0x62,0xa3,0x7d,0x28,0x39,0xec,0x7b]
+ vextracti32x4 $0x7b, %ymm21, %xmm20
+
+// CHECK: vextracti32x4 $171, %ymm28, (%rcx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x39,0x21,0xab]
+ vextracti32x4 $0xab, %ymm28, (%rcx)
+
+// CHECK: vextracti32x4 $171, %ymm28, (%rcx) {%k6}
+// CHECK: encoding: [0x62,0x63,0x7d,0x2e,0x39,0x21,0xab]
+ vextracti32x4 $0xab, %ymm28, (%rcx) {%k6}
+
+// CHECK: vextracti32x4 $123, %ymm28, (%rcx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x39,0x21,0x7b]
+ vextracti32x4 $0x7b, %ymm28, (%rcx)
+
+// CHECK: vextracti32x4 $123, %ymm28, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x39,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vextracti32x4 $0x7b, %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vextracti32x4 $123, %ymm28, 2032(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x39,0x62,0x7f,0x7b]
+ vextracti32x4 $0x7b, %ymm28, 2032(%rdx)
+
+// CHECK: vextracti32x4 $123, %ymm28, 2048(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x39,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vextracti32x4 $0x7b, %ymm28, 2048(%rdx)
+
+// CHECK: vextracti32x4 $123, %ymm28, -2048(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x39,0x62,0x80,0x7b]
+ vextracti32x4 $0x7b, %ymm28, -2048(%rdx)
+
+// CHECK: vextracti32x4 $123, %ymm28, -2064(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x39,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vextracti32x4 $0x7b, %ymm28, -2064(%rdx)
+
+// CHECK: vgetmantps $171, %xmm23, %xmm29
+// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x26,0xef,0xab]
+ vgetmantps $0xab, %xmm23, %xmm29
+
+// CHECK: vgetmantps $171, %xmm23, %xmm29 {%k5}
+// CHECK: encoding: [0x62,0x23,0x7d,0x0d,0x26,0xef,0xab]
+ vgetmantps $0xab, %xmm23, %xmm29 {%k5}
+
+// CHECK: vgetmantps $171, %xmm23, %xmm29 {%k5} {z}
+// CHECK: encoding: [0x62,0x23,0x7d,0x8d,0x26,0xef,0xab]
+ vgetmantps $0xab, %xmm23, %xmm29 {%k5} {z}
+
+// CHECK: vgetmantps $123, %xmm23, %xmm29
+// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x26,0xef,0x7b]
+ vgetmantps $0x7b, %xmm23, %xmm29
+
+// CHECK: vgetmantps $123, (%rcx), %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x26,0x29,0x7b]
+ vgetmantps $0x7b, (%rcx), %xmm29
+
+// CHECK: vgetmantps $123, 291(%rax,%r14,8), %xmm29
+// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x26,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantps $0x7b, 291(%rax,%r14,8), %xmm29
+
+// CHECK: vgetmantps $123, (%rcx){1to4}, %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x26,0x29,0x7b]
+ vgetmantps $0x7b, (%rcx){1to4}, %xmm29
+
+// CHECK: vgetmantps $123, 2032(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x26,0x6a,0x7f,0x7b]
+ vgetmantps $0x7b, 2032(%rdx), %xmm29
+
+// CHECK: vgetmantps $123, 2048(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x26,0xaa,0x00,0x08,0x00,0x00,0x7b]
+ vgetmantps $0x7b, 2048(%rdx), %xmm29
+
+// CHECK: vgetmantps $123, -2048(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x26,0x6a,0x80,0x7b]
+ vgetmantps $0x7b, -2048(%rdx), %xmm29
+
+// CHECK: vgetmantps $123, -2064(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x26,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+ vgetmantps $0x7b, -2064(%rdx), %xmm29
+
+// CHECK: vgetmantps $123, 508(%rdx){1to4}, %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x26,0x6a,0x7f,0x7b]
+ vgetmantps $0x7b, 508(%rdx){1to4}, %xmm29
+
+// CHECK: vgetmantps $123, 512(%rdx){1to4}, %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x26,0xaa,0x00,0x02,0x00,0x00,0x7b]
+ vgetmantps $0x7b, 512(%rdx){1to4}, %xmm29
+
+// CHECK: vgetmantps $123, -512(%rdx){1to4}, %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x26,0x6a,0x80,0x7b]
+ vgetmantps $0x7b, -512(%rdx){1to4}, %xmm29
+
+// CHECK: vgetmantps $123, -516(%rdx){1to4}, %xmm29
+// CHECK: encoding: [0x62,0x63,0x7d,0x18,0x26,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+ vgetmantps $0x7b, -516(%rdx){1to4}, %xmm29
+
+// CHECK: vgetmantps $171, %ymm29, %ymm17
+// CHECK: encoding: [0x62,0x83,0x7d,0x28,0x26,0xcd,0xab]
+ vgetmantps $0xab, %ymm29, %ymm17
+
+// CHECK: vgetmantps $171, %ymm29, %ymm17 {%k1}
+// CHECK: encoding: [0x62,0x83,0x7d,0x29,0x26,0xcd,0xab]
+ vgetmantps $0xab, %ymm29, %ymm17 {%k1}
+
+// CHECK: vgetmantps $171, %ymm29, %ymm17 {%k1} {z}
+// CHECK: encoding: [0x62,0x83,0x7d,0xa9,0x26,0xcd,0xab]
+ vgetmantps $0xab, %ymm29, %ymm17 {%k1} {z}
+
+// CHECK: vgetmantps $123, %ymm29, %ymm17
+// CHECK: encoding: [0x62,0x83,0x7d,0x28,0x26,0xcd,0x7b]
+ vgetmantps $0x7b, %ymm29, %ymm17
+
+// CHECK: vgetmantps $123, (%rcx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x26,0x09,0x7b]
+ vgetmantps $0x7b, (%rcx), %ymm17
+
+// CHECK: vgetmantps $123, 291(%rax,%r14,8), %ymm17
+// CHECK: encoding: [0x62,0xa3,0x7d,0x28,0x26,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantps $0x7b, 291(%rax,%r14,8), %ymm17
+
+// CHECK: vgetmantps $123, (%rcx){1to8}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x26,0x09,0x7b]
+ vgetmantps $0x7b, (%rcx){1to8}, %ymm17
+
+// CHECK: vgetmantps $123, 4064(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x26,0x4a,0x7f,0x7b]
+ vgetmantps $0x7b, 4064(%rdx), %ymm17
+
+// CHECK: vgetmantps $123, 4096(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x26,0x8a,0x00,0x10,0x00,0x00,0x7b]
+ vgetmantps $0x7b, 4096(%rdx), %ymm17
+
+// CHECK: vgetmantps $123, -4096(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x26,0x4a,0x80,0x7b]
+ vgetmantps $0x7b, -4096(%rdx), %ymm17
+
+// CHECK: vgetmantps $123, -4128(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0x7d,0x28,0x26,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+ vgetmantps $0x7b, -4128(%rdx), %ymm17
+
+// CHECK: vgetmantps $123, 508(%rdx){1to8}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x26,0x4a,0x7f,0x7b]
+ vgetmantps $0x7b, 508(%rdx){1to8}, %ymm17
+
+// CHECK: vgetmantps $123, 512(%rdx){1to8}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x26,0x8a,0x00,0x02,0x00,0x00,0x7b]
+ vgetmantps $0x7b, 512(%rdx){1to8}, %ymm17
+
+// CHECK: vgetmantps $123, -512(%rdx){1to8}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x26,0x4a,0x80,0x7b]
+ vgetmantps $0x7b, -512(%rdx){1to8}, %ymm17
+
+// CHECK: vgetmantps $123, -516(%rdx){1to8}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x7d,0x38,0x26,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
+ vgetmantps $0x7b, -516(%rdx){1to8}, %ymm17
+
+// CHECK: vgetmantpd $171, %xmm29, %xmm28
+// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x26,0xe5,0xab]
+ vgetmantpd $0xab, %xmm29, %xmm28
+
+// CHECK: vgetmantpd $171, %xmm29, %xmm28 {%k4}
+// CHECK: encoding: [0x62,0x03,0xfd,0x0c,0x26,0xe5,0xab]
+ vgetmantpd $0xab, %xmm29, %xmm28 {%k4}
+
+// CHECK: vgetmantpd $171, %xmm29, %xmm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x03,0xfd,0x8c,0x26,0xe5,0xab]
+ vgetmantpd $0xab, %xmm29, %xmm28 {%k4} {z}
+
+// CHECK: vgetmantpd $123, %xmm29, %xmm28
+// CHECK: encoding: [0x62,0x03,0xfd,0x08,0x26,0xe5,0x7b]
+ vgetmantpd $0x7b, %xmm29, %xmm28
+
+// CHECK: vgetmantpd $123, (%rcx), %xmm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x26,0x21,0x7b]
+ vgetmantpd $0x7b, (%rcx), %xmm28
+
+// CHECK: vgetmantpd $123, 291(%rax,%r14,8), %xmm28
+// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x26,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpd $0x7b, 291(%rax,%r14,8), %xmm28
+
+// CHECK: vgetmantpd $123, (%rcx){1to2}, %xmm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x26,0x21,0x7b]
+ vgetmantpd $0x7b, (%rcx){1to2}, %xmm28
+
+// CHECK: vgetmantpd $123, 2032(%rdx), %xmm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x26,0x62,0x7f,0x7b]
+ vgetmantpd $0x7b, 2032(%rdx), %xmm28
+
+// CHECK: vgetmantpd $123, 2048(%rdx), %xmm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x26,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vgetmantpd $0x7b, 2048(%rdx), %xmm28
+
+// CHECK: vgetmantpd $123, -2048(%rdx), %xmm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x26,0x62,0x80,0x7b]
+ vgetmantpd $0x7b, -2048(%rdx), %xmm28
+
+// CHECK: vgetmantpd $123, -2064(%rdx), %xmm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x26,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vgetmantpd $0x7b, -2064(%rdx), %xmm28
+
+// CHECK: vgetmantpd $123, 1016(%rdx){1to2}, %xmm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x26,0x62,0x7f,0x7b]
+ vgetmantpd $0x7b, 1016(%rdx){1to2}, %xmm28
+
+// CHECK: vgetmantpd $123, 1024(%rdx){1to2}, %xmm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x26,0xa2,0x00,0x04,0x00,0x00,0x7b]
+ vgetmantpd $0x7b, 1024(%rdx){1to2}, %xmm28
+
+// CHECK: vgetmantpd $123, -1024(%rdx){1to2}, %xmm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x26,0x62,0x80,0x7b]
+ vgetmantpd $0x7b, -1024(%rdx){1to2}, %xmm28
+
+// CHECK: vgetmantpd $123, -1032(%rdx){1to2}, %xmm28
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x26,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+ vgetmantpd $0x7b, -1032(%rdx){1to2}, %xmm28
+
+// CHECK: vgetmantpd $171, %ymm23, %ymm23
+// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x26,0xff,0xab]
+ vgetmantpd $0xab, %ymm23, %ymm23
+
+// CHECK: vgetmantpd $171, %ymm23, %ymm23 {%k5}
+// CHECK: encoding: [0x62,0xa3,0xfd,0x2d,0x26,0xff,0xab]
+ vgetmantpd $0xab, %ymm23, %ymm23 {%k5}
+
+// CHECK: vgetmantpd $171, %ymm23, %ymm23 {%k5} {z}
+// CHECK: encoding: [0x62,0xa3,0xfd,0xad,0x26,0xff,0xab]
+ vgetmantpd $0xab, %ymm23, %ymm23 {%k5} {z}
+
+// CHECK: vgetmantpd $123, %ymm23, %ymm23
+// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x26,0xff,0x7b]
+ vgetmantpd $0x7b, %ymm23, %ymm23
+
+// CHECK: vgetmantpd $123, (%rcx), %ymm23
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x26,0x39,0x7b]
+ vgetmantpd $0x7b, (%rcx), %ymm23
+
+// CHECK: vgetmantpd $123, 291(%rax,%r14,8), %ymm23
+// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x26,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vgetmantpd $0x7b, 291(%rax,%r14,8), %ymm23
+
+// CHECK: vgetmantpd $123, (%rcx){1to4}, %ymm23
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x26,0x39,0x7b]
+ vgetmantpd $0x7b, (%rcx){1to4}, %ymm23
+
+// CHECK: vgetmantpd $123, 4064(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x26,0x7a,0x7f,0x7b]
+ vgetmantpd $0x7b, 4064(%rdx), %ymm23
+
+// CHECK: vgetmantpd $123, 4096(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x26,0xba,0x00,0x10,0x00,0x00,0x7b]
+ vgetmantpd $0x7b, 4096(%rdx), %ymm23
+
+// CHECK: vgetmantpd $123, -4096(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x26,0x7a,0x80,0x7b]
+ vgetmantpd $0x7b, -4096(%rdx), %ymm23
+
+// CHECK: vgetmantpd $123, -4128(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x26,0xba,0xe0,0xef,0xff,0xff,0x7b]
+ vgetmantpd $0x7b, -4128(%rdx), %ymm23
+
+// CHECK: vgetmantpd $123, 1016(%rdx){1to4}, %ymm23
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x26,0x7a,0x7f,0x7b]
+ vgetmantpd $0x7b, 1016(%rdx){1to4}, %ymm23
+
+// CHECK: vgetmantpd $123, 1024(%rdx){1to4}, %ymm23
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x26,0xba,0x00,0x04,0x00,0x00,0x7b]
+ vgetmantpd $0x7b, 1024(%rdx){1to4}, %ymm23
+
+// CHECK: vgetmantpd $123, -1024(%rdx){1to4}, %ymm23
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x26,0x7a,0x80,0x7b]
+ vgetmantpd $0x7b, -1024(%rdx){1to4}, %ymm23
+
+// CHECK: vgetmantpd $123, -1032(%rdx){1to4}, %ymm23
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x26,0xba,0xf8,0xfb,0xff,0xff,0x7b]
+ vgetmantpd $0x7b, -1032(%rdx){1to4}, %ymm23
+
+// CHECK: vshufps $171, %xmm21, %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x44,0x00,0xc6,0xcd,0xab]
+ vshufps $0xab, %xmm21, %xmm23, %xmm17
+
+// CHECK: vshufps $171, %xmm21, %xmm23, %xmm17 {%k3}
+// CHECK: encoding: [0x62,0xa1,0x44,0x03,0xc6,0xcd,0xab]
+ vshufps $0xab, %xmm21, %xmm23, %xmm17 {%k3}
+
+// CHECK: vshufps $171, %xmm21, %xmm23, %xmm17 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0x44,0x83,0xc6,0xcd,0xab]
+ vshufps $0xab, %xmm21, %xmm23, %xmm17 {%k3} {z}
+
+// CHECK: vshufps $123, %xmm21, %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x44,0x00,0xc6,0xcd,0x7b]
+ vshufps $0x7b, %xmm21, %xmm23, %xmm17
+
+// CHECK: vshufps $123, (%rcx), %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x44,0x00,0xc6,0x09,0x7b]
+ vshufps $0x7b, (%rcx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, 291(%rax,%r14,8), %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xa1,0x44,0x00,0xc6,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshufps $0x7b, 291(%rax,%r14,8), %xmm23, %xmm17
+
+// CHECK: vshufps $123, (%rcx){1to4}, %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x44,0x10,0xc6,0x09,0x7b]
+ vshufps $0x7b, (%rcx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $123, 2032(%rdx), %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x44,0x00,0xc6,0x4a,0x7f,0x7b]
+ vshufps $0x7b, 2032(%rdx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, 2048(%rdx), %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x44,0x00,0xc6,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vshufps $0x7b, 2048(%rdx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, -2048(%rdx), %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x44,0x00,0xc6,0x4a,0x80,0x7b]
+ vshufps $0x7b, -2048(%rdx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, -2064(%rdx), %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x44,0x00,0xc6,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vshufps $0x7b, -2064(%rdx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, 508(%rdx){1to4}, %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x44,0x10,0xc6,0x4a,0x7f,0x7b]
+ vshufps $0x7b, 508(%rdx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $123, 512(%rdx){1to4}, %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x44,0x10,0xc6,0x8a,0x00,0x02,0x00,0x00,0x7b]
+ vshufps $0x7b, 512(%rdx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $123, -512(%rdx){1to4}, %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x44,0x10,0xc6,0x4a,0x80,0x7b]
+ vshufps $0x7b, -512(%rdx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $123, -516(%rdx){1to4}, %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xe1,0x44,0x10,0xc6,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
+ vshufps $0x7b, -516(%rdx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $171, %ymm23, %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xa1,0x34,0x20,0xc6,0xdf,0xab]
+ vshufps $0xab, %ymm23, %ymm25, %ymm19
+
+// CHECK: vshufps $171, %ymm23, %ymm25, %ymm19 {%k3}
+// CHECK: encoding: [0x62,0xa1,0x34,0x23,0xc6,0xdf,0xab]
+ vshufps $0xab, %ymm23, %ymm25, %ymm19 {%k3}
+
+// CHECK: vshufps $171, %ymm23, %ymm25, %ymm19 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0x34,0xa3,0xc6,0xdf,0xab]
+ vshufps $0xab, %ymm23, %ymm25, %ymm19 {%k3} {z}
+
+// CHECK: vshufps $123, %ymm23, %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xa1,0x34,0x20,0xc6,0xdf,0x7b]
+ vshufps $0x7b, %ymm23, %ymm25, %ymm19
+
+// CHECK: vshufps $123, (%rcx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0xc6,0x19,0x7b]
+ vshufps $0x7b, (%rcx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, 291(%rax,%r14,8), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xa1,0x34,0x20,0xc6,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshufps $0x7b, 291(%rax,%r14,8), %ymm25, %ymm19
+
+// CHECK: vshufps $123, (%rcx){1to8}, %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0xc6,0x19,0x7b]
+ vshufps $0x7b, (%rcx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufps $123, 4064(%rdx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0xc6,0x5a,0x7f,0x7b]
+ vshufps $0x7b, 4064(%rdx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, 4096(%rdx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0xc6,0x9a,0x00,0x10,0x00,0x00,0x7b]
+ vshufps $0x7b, 4096(%rdx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, -4096(%rdx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0xc6,0x5a,0x80,0x7b]
+ vshufps $0x7b, -4096(%rdx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, -4128(%rdx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x34,0x20,0xc6,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+ vshufps $0x7b, -4128(%rdx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, 508(%rdx){1to8}, %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0xc6,0x5a,0x7f,0x7b]
+ vshufps $0x7b, 508(%rdx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufps $123, 512(%rdx){1to8}, %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0xc6,0x9a,0x00,0x02,0x00,0x00,0x7b]
+ vshufps $0x7b, 512(%rdx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufps $123, -512(%rdx){1to8}, %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0xc6,0x5a,0x80,0x7b]
+ vshufps $0x7b, -512(%rdx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufps $123, -516(%rdx){1to8}, %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe1,0x34,0x30,0xc6,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+ vshufps $0x7b, -516(%rdx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufpd $171, %xmm22, %xmm21, %xmm20
+// CHECK: encoding: [0x62,0xa1,0xd5,0x00,0xc6,0xe6,0xab]
+ vshufpd $0xab, %xmm22, %xmm21, %xmm20
+
+// CHECK: vshufpd $171, %xmm22, %xmm21, %xmm20 {%k3}
+// CHECK: encoding: [0x62,0xa1,0xd5,0x03,0xc6,0xe6,0xab]
+ vshufpd $0xab, %xmm22, %xmm21, %xmm20 {%k3}
+
+// CHECK: vshufpd $171, %xmm22, %xmm21, %xmm20 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0xd5,0x83,0xc6,0xe6,0xab]
+ vshufpd $0xab, %xmm22, %xmm21, %xmm20 {%k3} {z}
+
+// CHECK: vshufpd $123, %xmm22, %xmm21, %xmm20
+// CHECK: encoding: [0x62,0xa1,0xd5,0x00,0xc6,0xe6,0x7b]
+ vshufpd $0x7b, %xmm22, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, (%rcx), %xmm21, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0xc6,0x21,0x7b]
+ vshufpd $0x7b, (%rcx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 291(%rax,%r14,8), %xmm21, %xmm20
+// CHECK: encoding: [0x62,0xa1,0xd5,0x00,0xc6,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshufpd $0x7b, 291(%rax,%r14,8), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, (%rcx){1to2}, %xmm21, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0xc6,0x21,0x7b]
+ vshufpd $0x7b, (%rcx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 2032(%rdx), %xmm21, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0xc6,0x62,0x7f,0x7b]
+ vshufpd $0x7b, 2032(%rdx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 2048(%rdx), %xmm21, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0xc6,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vshufpd $0x7b, 2048(%rdx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, -2048(%rdx), %xmm21, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0xc6,0x62,0x80,0x7b]
+ vshufpd $0x7b, -2048(%rdx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, -2064(%rdx), %xmm21, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xd5,0x00,0xc6,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vshufpd $0x7b, -2064(%rdx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 1016(%rdx){1to2}, %xmm21, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0xc6,0x62,0x7f,0x7b]
+ vshufpd $0x7b, 1016(%rdx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 1024(%rdx){1to2}, %xmm21, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0xc6,0xa2,0x00,0x04,0x00,0x00,0x7b]
+ vshufpd $0x7b, 1024(%rdx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, -1024(%rdx){1to2}, %xmm21, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0xc6,0x62,0x80,0x7b]
+ vshufpd $0x7b, -1024(%rdx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, -1032(%rdx){1to2}, %xmm21, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xd5,0x10,0xc6,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+ vshufpd $0x7b, -1032(%rdx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $171, %ymm22, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x21,0x95,0x20,0xc6,0xc6,0xab]
+ vshufpd $0xab, %ymm22, %ymm29, %ymm24
+
+// CHECK: vshufpd $171, %ymm22, %ymm29, %ymm24 {%k6}
+// CHECK: encoding: [0x62,0x21,0x95,0x26,0xc6,0xc6,0xab]
+ vshufpd $0xab, %ymm22, %ymm29, %ymm24 {%k6}
+
+// CHECK: vshufpd $171, %ymm22, %ymm29, %ymm24 {%k6} {z}
+// CHECK: encoding: [0x62,0x21,0x95,0xa6,0xc6,0xc6,0xab]
+ vshufpd $0xab, %ymm22, %ymm29, %ymm24 {%k6} {z}
+
+// CHECK: vshufpd $123, %ymm22, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x21,0x95,0x20,0xc6,0xc6,0x7b]
+ vshufpd $0x7b, %ymm22, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, (%rcx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0xc6,0x01,0x7b]
+ vshufpd $0x7b, (%rcx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 291(%rax,%r14,8), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x21,0x95,0x20,0xc6,0x84,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vshufpd $0x7b, 291(%rax,%r14,8), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, (%rcx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0xc6,0x01,0x7b]
+ vshufpd $0x7b, (%rcx){1to4}, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 4064(%rdx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0xc6,0x42,0x7f,0x7b]
+ vshufpd $0x7b, 4064(%rdx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 4096(%rdx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0xc6,0x82,0x00,0x10,0x00,0x00,0x7b]
+ vshufpd $0x7b, 4096(%rdx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, -4096(%rdx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0xc6,0x42,0x80,0x7b]
+ vshufpd $0x7b, -4096(%rdx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, -4128(%rdx), %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x20,0xc6,0x82,0xe0,0xef,0xff,0xff,0x7b]
+ vshufpd $0x7b, -4128(%rdx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 1016(%rdx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0xc6,0x42,0x7f,0x7b]
+ vshufpd $0x7b, 1016(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 1024(%rdx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0xc6,0x82,0x00,0x04,0x00,0x00,0x7b]
+ vshufpd $0x7b, 1024(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, -1024(%rdx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0xc6,0x42,0x80,0x7b]
+ vshufpd $0x7b, -1024(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, -1032(%rdx){1to4}, %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x61,0x95,0x30,0xc6,0x82,0xf8,0xfb,0xff,0xff,0x7b]
+ vshufpd $0x7b, -1032(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vscatterqps %xmm28, 123(%r14,%xmm31,8) {%k1}
+// CHECK: encoding: [0x62,0x02,0x7d,0x01,0xa3,0xa4,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterqps %xmm28, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterqps %xmm28, 123(%r14,%xmm31,8) {%k1}
+// CHECK: encoding: [0x62,0x02,0x7d,0x01,0xa3,0xa4,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterqps %xmm28, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterqps %xmm28, 256(%r9,%xmm31) {%k1}
+// CHECK: encoding: [0x62,0x02,0x7d,0x01,0xa3,0x64,0x39,0x40]
+ vscatterqps %xmm28, 256(%r9, %xmm31) {%k1}
+
+// CHECK: vscatterqps %xmm28, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK: encoding: [0x62,0x22,0x7d,0x01,0xa3,0xa4,0xb9,0x00,0x04,0x00,0x00]
+ vscatterqps %xmm28, 1024(%rcx, %xmm31,4) {%k1}
+
+// CHECK: vscatterqps %xmm25, 123(%r14,%ymm31,8) {%k1}
+// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa3,0x8c,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterqps %xmm25, 123(%r14, %ymm31,8) {%k1}
+
+// CHECK: vscatterqps %xmm25, 123(%r14,%ymm31,8) {%k1}
+// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa3,0x8c,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterqps %xmm25, 123(%r14, %ymm31,8) {%k1}
+
+// CHECK: vscatterqps %xmm25, 256(%r9,%ymm31) {%k1}
+// CHECK: encoding: [0x62,0x02,0x7d,0x21,0xa3,0x4c,0x39,0x40]
+ vscatterqps %xmm25, 256(%r9, %ymm31) {%k1}
+
+// CHECK: vscatterqps %xmm25, 1024(%rcx,%ymm31,4) {%k1}
+// CHECK: encoding: [0x62,0x22,0x7d,0x21,0xa3,0x8c,0xb9,0x00,0x04,0x00,0x00]
+ vscatterqps %xmm25, 1024(%rcx, %ymm31,4) {%k1}
+
+// CHECK: vscatterqpd %xmm21, 123(%r14,%xmm31,8) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x01,0xa3,0xac,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterqpd %xmm21, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterqpd %xmm21, 123(%r14,%xmm31,8) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x01,0xa3,0xac,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterqpd %xmm21, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterqpd %xmm21, 256(%r9,%xmm31) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x01,0xa3,0x6c,0x39,0x20]
+ vscatterqpd %xmm21, 256(%r9, %xmm31) {%k1}
+
+// CHECK: vscatterqpd %xmm21, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x01,0xa3,0xac,0xb9,0x00,0x04,0x00,0x00]
+ vscatterqpd %xmm21, 1024(%rcx, %xmm31,4) {%k1}
+
+// CHECK: vscatterqpd %ymm23, 123(%r14,%ymm31,8) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x21,0xa3,0xbc,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterqpd %ymm23, 123(%r14, %ymm31,8) {%k1}
+
+// CHECK: vscatterqpd %ymm23, 123(%r14,%ymm31,8) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x21,0xa3,0xbc,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterqpd %ymm23, 123(%r14, %ymm31,8) {%k1}
+
+// CHECK: vscatterqpd %ymm23, 256(%r9,%ymm31) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x21,0xa3,0x7c,0x39,0x20]
+ vscatterqpd %ymm23, 256(%r9, %ymm31) {%k1}
+
+// CHECK: vscatterqpd %ymm23, 1024(%rcx,%ymm31,4) {%k1}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x21,0xa3,0xbc,0xb9,0x00,0x04,0x00,0x00]
+ vscatterqpd %ymm23, 1024(%rcx, %ymm31,4) {%k1}
+
+// CHECK: vscatterdps %xmm24, 123(%r14,%xmm31,8) {%k1}
+// CHECK: encoding: [0x62,0x02,0x7d,0x01,0xa2,0x84,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterdps %xmm24, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterdps %xmm24, 123(%r14,%xmm31,8) {%k1}
+// CHECK: encoding: [0x62,0x02,0x7d,0x01,0xa2,0x84,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterdps %xmm24, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterdps %xmm24, 256(%r9,%xmm31) {%k1}
+// CHECK: encoding: [0x62,0x02,0x7d,0x01,0xa2,0x44,0x39,0x40]
+ vscatterdps %xmm24, 256(%r9, %xmm31) {%k1}
+
+// CHECK: vscatterdps %xmm24, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK: encoding: [0x62,0x22,0x7d,0x01,0xa2,0x84,0xb9,0x00,0x04,0x00,0x00]
+ vscatterdps %xmm24, 1024(%rcx, %xmm31,4) {%k1}
+
+// CHECK: vscatterdps %ymm23, 123(%r14,%ymm31,8) {%k1}
+// CHECK: encoding: [0x62,0x82,0x7d,0x21,0xa2,0xbc,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterdps %ymm23, 123(%r14, %ymm31,8) {%k1}
+
+// CHECK: vscatterdps %ymm23, 123(%r14,%ymm31,8) {%k1}
+// CHECK: encoding: [0x62,0x82,0x7d,0x21,0xa2,0xbc,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterdps %ymm23, 123(%r14, %ymm31,8) {%k1}
+
+// CHECK: vscatterdps %ymm23, 256(%r9,%ymm31) {%k1}
+// CHECK: encoding: [0x62,0x82,0x7d,0x21,0xa2,0x7c,0x39,0x40]
+ vscatterdps %ymm23, 256(%r9, %ymm31) {%k1}
+
+// CHECK: vscatterdps %ymm23, 1024(%rcx,%ymm31,4) {%k1}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x21,0xa2,0xbc,0xb9,0x00,0x04,0x00,0x00]
+ vscatterdps %ymm23, 1024(%rcx, %ymm31,4) {%k1}
+
+// CHECK: vscatterdpd %xmm18, 123(%r14,%xmm31,8) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x01,0xa2,0x94,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterdpd %xmm18, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterdpd %xmm18, 123(%r14,%xmm31,8) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x01,0xa2,0x94,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterdpd %xmm18, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterdpd %xmm18, 256(%r9,%xmm31) {%k1}
+// CHECK: encoding: [0x62,0x82,0xfd,0x01,0xa2,0x54,0x39,0x20]
+ vscatterdpd %xmm18, 256(%r9, %xmm31) {%k1}
+
+// CHECK: vscatterdpd %xmm18, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x01,0xa2,0x94,0xb9,0x00,0x04,0x00,0x00]
+ vscatterdpd %xmm18, 1024(%rcx, %xmm31,4) {%k1}
+
+// CHECK: vscatterdpd %ymm30, 123(%r14,%xmm31,8) {%k1}
+// CHECK: encoding: [0x62,0x02,0xfd,0x21,0xa2,0xb4,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterdpd %ymm30, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterdpd %ymm30, 123(%r14,%xmm31,8) {%k1}
+// CHECK: encoding: [0x62,0x02,0xfd,0x21,0xa2,0xb4,0xfe,0x7b,0x00,0x00,0x00]
+ vscatterdpd %ymm30, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterdpd %ymm30, 256(%r9,%xmm31) {%k1}
+// CHECK: encoding: [0x62,0x02,0xfd,0x21,0xa2,0x74,0x39,0x20]
+ vscatterdpd %ymm30, 256(%r9, %xmm31) {%k1}
+
+// CHECK: vscatterdpd %ymm30, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK: encoding: [0x62,0x22,0xfd,0x21,0xa2,0xb4,0xb9,0x00,0x04,0x00,0x00]
+ vscatterdpd %ymm30, 1024(%rcx, %xmm31,4) {%k1}
+
+// CHECK: vpermilps $171, %xmm28, %xmm20
+// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x04,0xe4,0xab]
+ vpermilps $0xab, %xmm28, %xmm20
+
+// CHECK: vpermilps $171, %xmm28, %xmm20 {%k4}
+// CHECK: encoding: [0x62,0x83,0x7d,0x0c,0x04,0xe4,0xab]
+ vpermilps $0xab, %xmm28, %xmm20 {%k4}
+
+// CHECK: vpermilps $171, %xmm28, %xmm20 {%k4} {z}
+// CHECK: encoding: [0x62,0x83,0x7d,0x8c,0x04,0xe4,0xab]
+ vpermilps $0xab, %xmm28, %xmm20 {%k4} {z}
+
+// CHECK: vpermilps $123, %xmm28, %xmm20
+// CHECK: encoding: [0x62,0x83,0x7d,0x08,0x04,0xe4,0x7b]
+ vpermilps $0x7b, %xmm28, %xmm20
+
+// CHECK: vpermilps $123, (%rcx), %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0x21,0x7b]
+ vpermilps $0x7b, (%rcx), %xmm20
+
+// CHECK: vpermilps $123, 291(%rax,%r14,8), %xmm20
+// CHECK: encoding: [0x62,0xa3,0x7d,0x08,0x04,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpermilps $0x7b, 291(%rax,%r14,8), %xmm20
+
+// CHECK: vpermilps $123, (%rcx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0x21,0x7b]
+ vpermilps $0x7b, (%rcx){1to4}, %xmm20
+
+// CHECK: vpermilps $123, 2032(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0x62,0x7f,0x7b]
+ vpermilps $0x7b, 2032(%rdx), %xmm20
+
+// CHECK: vpermilps $123, 2048(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vpermilps $0x7b, 2048(%rdx), %xmm20
+
+// CHECK: vpermilps $123, -2048(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0x62,0x80,0x7b]
+ vpermilps $0x7b, -2048(%rdx), %xmm20
+
+// CHECK: vpermilps $123, -2064(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x08,0x04,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vpermilps $0x7b, -2064(%rdx), %xmm20
+
+// CHECK: vpermilps $123, 508(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0x62,0x7f,0x7b]
+ vpermilps $0x7b, 508(%rdx){1to4}, %xmm20
+
+// CHECK: vpermilps $123, 512(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0xa2,0x00,0x02,0x00,0x00,0x7b]
+ vpermilps $0x7b, 512(%rdx){1to4}, %xmm20
+
+// CHECK: vpermilps $123, -512(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0x62,0x80,0x7b]
+ vpermilps $0x7b, -512(%rdx){1to4}, %xmm20
+
+// CHECK: vpermilps $123, -516(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe3,0x7d,0x18,0x04,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+ vpermilps $0x7b, -516(%rdx){1to4}, %xmm20
+
+// CHECK: vpermilps $171, %ymm17, %ymm30
+// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x04,0xf1,0xab]
+ vpermilps $0xab, %ymm17, %ymm30
+
+// CHECK: vpermilps $171, %ymm17, %ymm30 {%k5}
+// CHECK: encoding: [0x62,0x23,0x7d,0x2d,0x04,0xf1,0xab]
+ vpermilps $0xab, %ymm17, %ymm30 {%k5}
+
+// CHECK: vpermilps $171, %ymm17, %ymm30 {%k5} {z}
+// CHECK: encoding: [0x62,0x23,0x7d,0xad,0x04,0xf1,0xab]
+ vpermilps $0xab, %ymm17, %ymm30 {%k5} {z}
+
+// CHECK: vpermilps $123, %ymm17, %ymm30
+// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x04,0xf1,0x7b]
+ vpermilps $0x7b, %ymm17, %ymm30
+
+// CHECK: vpermilps $123, (%rcx), %ymm30
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0x31,0x7b]
+ vpermilps $0x7b, (%rcx), %ymm30
+
+// CHECK: vpermilps $123, 291(%rax,%r14,8), %ymm30
+// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x04,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpermilps $0x7b, 291(%rax,%r14,8), %ymm30
+
+// CHECK: vpermilps $123, (%rcx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0x31,0x7b]
+ vpermilps $0x7b, (%rcx){1to8}, %ymm30
+
+// CHECK: vpermilps $123, 4064(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0x72,0x7f,0x7b]
+ vpermilps $0x7b, 4064(%rdx), %ymm30
+
+// CHECK: vpermilps $123, 4096(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0xb2,0x00,0x10,0x00,0x00,0x7b]
+ vpermilps $0x7b, 4096(%rdx), %ymm30
+
+// CHECK: vpermilps $123, -4096(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0x72,0x80,0x7b]
+ vpermilps $0x7b, -4096(%rdx), %ymm30
+
+// CHECK: vpermilps $123, -4128(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x04,0xb2,0xe0,0xef,0xff,0xff,0x7b]
+ vpermilps $0x7b, -4128(%rdx), %ymm30
+
+// CHECK: vpermilps $123, 508(%rdx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0x72,0x7f,0x7b]
+ vpermilps $0x7b, 508(%rdx){1to8}, %ymm30
+
+// CHECK: vpermilps $123, 512(%rdx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0xb2,0x00,0x02,0x00,0x00,0x7b]
+ vpermilps $0x7b, 512(%rdx){1to8}, %ymm30
+
+// CHECK: vpermilps $123, -512(%rdx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0x72,0x80,0x7b]
+ vpermilps $0x7b, -512(%rdx){1to8}, %ymm30
+
+// CHECK: vpermilps $123, -516(%rdx){1to8}, %ymm30
+// CHECK: encoding: [0x62,0x63,0x7d,0x38,0x04,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+ vpermilps $0x7b, -516(%rdx){1to8}, %ymm30
+
+// CHECK: vpermilps %xmm22, %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x22,0x1d,0x00,0x0c,0xe6]
+ vpermilps %xmm22, %xmm28, %xmm28
+
+// CHECK: vpermilps %xmm22, %xmm28, %xmm28 {%k6}
+// CHECK: encoding: [0x62,0x22,0x1d,0x06,0x0c,0xe6]
+ vpermilps %xmm22, %xmm28, %xmm28 {%k6}
+
+// CHECK: vpermilps %xmm22, %xmm28, %xmm28 {%k6} {z}
+// CHECK: encoding: [0x62,0x22,0x1d,0x86,0x0c,0xe6]
+ vpermilps %xmm22, %xmm28, %xmm28 {%k6} {z}
+
+// CHECK: vpermilps (%rcx), %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0x21]
+ vpermilps (%rcx), %xmm28, %xmm28
+
+// CHECK: vpermilps 291(%rax,%r14,8), %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x22,0x1d,0x00,0x0c,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpermilps 291(%rax,%r14,8), %xmm28, %xmm28
+
+// CHECK: vpermilps (%rcx){1to4}, %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0x21]
+ vpermilps (%rcx){1to4}, %xmm28, %xmm28
+
+// CHECK: vpermilps 2032(%rdx), %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0x62,0x7f]
+ vpermilps 2032(%rdx), %xmm28, %xmm28
+
+// CHECK: vpermilps 2048(%rdx), %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0xa2,0x00,0x08,0x00,0x00]
+ vpermilps 2048(%rdx), %xmm28, %xmm28
+
+// CHECK: vpermilps -2048(%rdx), %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0x62,0x80]
+ vpermilps -2048(%rdx), %xmm28, %xmm28
+
+// CHECK: vpermilps -2064(%rdx), %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x00,0x0c,0xa2,0xf0,0xf7,0xff,0xff]
+ vpermilps -2064(%rdx), %xmm28, %xmm28
+
+// CHECK: vpermilps 508(%rdx){1to4}, %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0x62,0x7f]
+ vpermilps 508(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vpermilps 512(%rdx){1to4}, %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0xa2,0x00,0x02,0x00,0x00]
+ vpermilps 512(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vpermilps -512(%rdx){1to4}, %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0x62,0x80]
+ vpermilps -512(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vpermilps -516(%rdx){1to4}, %xmm28, %xmm28
+// CHECK: encoding: [0x62,0x62,0x1d,0x10,0x0c,0xa2,0xfc,0xfd,0xff,0xff]
+ vpermilps -516(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vpermilps %ymm21, %ymm28, %ymm29
+// CHECK: encoding: [0x62,0x22,0x1d,0x20,0x0c,0xed]
+ vpermilps %ymm21, %ymm28, %ymm29
+
+// CHECK: vpermilps %ymm21, %ymm28, %ymm29 {%k2}
+// CHECK: encoding: [0x62,0x22,0x1d,0x22,0x0c,0xed]
+ vpermilps %ymm21, %ymm28, %ymm29 {%k2}
+
+// CHECK: vpermilps %ymm21, %ymm28, %ymm29 {%k2} {z}
+// CHECK: encoding: [0x62,0x22,0x1d,0xa2,0x0c,0xed]
+ vpermilps %ymm21, %ymm28, %ymm29 {%k2} {z}
+
+// CHECK: vpermilps (%rcx), %ymm28, %ymm29
+// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0x29]
+ vpermilps (%rcx), %ymm28, %ymm29
+
+// CHECK: vpermilps 291(%rax,%r14,8), %ymm28, %ymm29
+// CHECK: encoding: [0x62,0x22,0x1d,0x20,0x0c,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpermilps 291(%rax,%r14,8), %ymm28, %ymm29
+
+// CHECK: vpermilps (%rcx){1to8}, %ymm28, %ymm29
+// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0x29]
+ vpermilps (%rcx){1to8}, %ymm28, %ymm29
+
+// CHECK: vpermilps 4064(%rdx), %ymm28, %ymm29
+// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0x6a,0x7f]
+ vpermilps 4064(%rdx), %ymm28, %ymm29
+
+// CHECK: vpermilps 4096(%rdx), %ymm28, %ymm29
+// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0xaa,0x00,0x10,0x00,0x00]
+ vpermilps 4096(%rdx), %ymm28, %ymm29
+
+// CHECK: vpermilps -4096(%rdx), %ymm28, %ymm29
+// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0x6a,0x80]
+ vpermilps -4096(%rdx), %ymm28, %ymm29
+
+// CHECK: vpermilps -4128(%rdx), %ymm28, %ymm29
+// CHECK: encoding: [0x62,0x62,0x1d,0x20,0x0c,0xaa,0xe0,0xef,0xff,0xff]
+ vpermilps -4128(%rdx), %ymm28, %ymm29
+
+// CHECK: vpermilps 508(%rdx){1to8}, %ymm28, %ymm29
+// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0x6a,0x7f]
+ vpermilps 508(%rdx){1to8}, %ymm28, %ymm29
+
+// CHECK: vpermilps 512(%rdx){1to8}, %ymm28, %ymm29
+// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0xaa,0x00,0x02,0x00,0x00]
+ vpermilps 512(%rdx){1to8}, %ymm28, %ymm29
+
+// CHECK: vpermilps -512(%rdx){1to8}, %ymm28, %ymm29
+// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0x6a,0x80]
+ vpermilps -512(%rdx){1to8}, %ymm28, %ymm29
+
+// CHECK: vpermilps -516(%rdx){1to8}, %ymm28, %ymm29
+// CHECK: encoding: [0x62,0x62,0x1d,0x30,0x0c,0xaa,0xfc,0xfd,0xff,0xff]
+ vpermilps -516(%rdx){1to8}, %ymm28, %ymm29
+
+// CHECK: vpermilpd $171, %xmm19, %xmm29
+// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x05,0xeb,0xab]
+ vpermilpd $0xab, %xmm19, %xmm29
+
+// CHECK: vpermilpd $171, %xmm19, %xmm29 {%k7}
+// CHECK: encoding: [0x62,0x23,0xfd,0x0f,0x05,0xeb,0xab]
+ vpermilpd $0xab, %xmm19, %xmm29 {%k7}
+
+// CHECK: vpermilpd $171, %xmm19, %xmm29 {%k7} {z}
+// CHECK: encoding: [0x62,0x23,0xfd,0x8f,0x05,0xeb,0xab]
+ vpermilpd $0xab, %xmm19, %xmm29 {%k7} {z}
+
+// CHECK: vpermilpd $123, %xmm19, %xmm29
+// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x05,0xeb,0x7b]
+ vpermilpd $0x7b, %xmm19, %xmm29
+
+// CHECK: vpermilpd $123, (%rcx), %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0x29,0x7b]
+ vpermilpd $0x7b, (%rcx), %xmm29
+
+// CHECK: vpermilpd $123, 291(%rax,%r14,8), %xmm29
+// CHECK: encoding: [0x62,0x23,0xfd,0x08,0x05,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpermilpd $0x7b, 291(%rax,%r14,8), %xmm29
+
+// CHECK: vpermilpd $123, (%rcx){1to2}, %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0x29,0x7b]
+ vpermilpd $0x7b, (%rcx){1to2}, %xmm29
+
+// CHECK: vpermilpd $123, 2032(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0x6a,0x7f,0x7b]
+ vpermilpd $0x7b, 2032(%rdx), %xmm29
+
+// CHECK: vpermilpd $123, 2048(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0xaa,0x00,0x08,0x00,0x00,0x7b]
+ vpermilpd $0x7b, 2048(%rdx), %xmm29
+
+// CHECK: vpermilpd $123, -2048(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0x6a,0x80,0x7b]
+ vpermilpd $0x7b, -2048(%rdx), %xmm29
+
+// CHECK: vpermilpd $123, -2064(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x08,0x05,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+ vpermilpd $0x7b, -2064(%rdx), %xmm29
+
+// CHECK: vpermilpd $123, 1016(%rdx){1to2}, %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0x6a,0x7f,0x7b]
+ vpermilpd $0x7b, 1016(%rdx){1to2}, %xmm29
+
+// CHECK: vpermilpd $123, 1024(%rdx){1to2}, %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0xaa,0x00,0x04,0x00,0x00,0x7b]
+ vpermilpd $0x7b, 1024(%rdx){1to2}, %xmm29
+
+// CHECK: vpermilpd $123, -1024(%rdx){1to2}, %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0x6a,0x80,0x7b]
+ vpermilpd $0x7b, -1024(%rdx){1to2}, %xmm29
+
+// CHECK: vpermilpd $123, -1032(%rdx){1to2}, %xmm29
+// CHECK: encoding: [0x62,0x63,0xfd,0x18,0x05,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
+ vpermilpd $0x7b, -1032(%rdx){1to2}, %xmm29
+
+// CHECK: vpermilpd $171, %ymm24, %ymm17
+// CHECK: encoding: [0x62,0x83,0xfd,0x28,0x05,0xc8,0xab]
+ vpermilpd $0xab, %ymm24, %ymm17
+
+// CHECK: vpermilpd $171, %ymm24, %ymm17 {%k6}
+// CHECK: encoding: [0x62,0x83,0xfd,0x2e,0x05,0xc8,0xab]
+ vpermilpd $0xab, %ymm24, %ymm17 {%k6}
+
+// CHECK: vpermilpd $171, %ymm24, %ymm17 {%k6} {z}
+// CHECK: encoding: [0x62,0x83,0xfd,0xae,0x05,0xc8,0xab]
+ vpermilpd $0xab, %ymm24, %ymm17 {%k6} {z}
+
+// CHECK: vpermilpd $123, %ymm24, %ymm17
+// CHECK: encoding: [0x62,0x83,0xfd,0x28,0x05,0xc8,0x7b]
+ vpermilpd $0x7b, %ymm24, %ymm17
+
+// CHECK: vpermilpd $123, (%rcx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x09,0x7b]
+ vpermilpd $0x7b, (%rcx), %ymm17
+
+// CHECK: vpermilpd $123, 291(%rax,%r14,8), %ymm17
+// CHECK: encoding: [0x62,0xa3,0xfd,0x28,0x05,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpermilpd $0x7b, 291(%rax,%r14,8), %ymm17
+
+// CHECK: vpermilpd $123, (%rcx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x09,0x7b]
+ vpermilpd $0x7b, (%rcx){1to4}, %ymm17
+
+// CHECK: vpermilpd $123, 4064(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x4a,0x7f,0x7b]
+ vpermilpd $0x7b, 4064(%rdx), %ymm17
+
+// CHECK: vpermilpd $123, 4096(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x8a,0x00,0x10,0x00,0x00,0x7b]
+ vpermilpd $0x7b, 4096(%rdx), %ymm17
+
+// CHECK: vpermilpd $123, -4096(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x4a,0x80,0x7b]
+ vpermilpd $0x7b, -4096(%rdx), %ymm17
+
+// CHECK: vpermilpd $123, -4128(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x28,0x05,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+ vpermilpd $0x7b, -4128(%rdx), %ymm17
+
+// CHECK: vpermilpd $123, 1016(%rdx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x4a,0x7f,0x7b]
+ vpermilpd $0x7b, 1016(%rdx){1to4}, %ymm17
+
+// CHECK: vpermilpd $123, 1024(%rdx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vpermilpd $0x7b, 1024(%rdx){1to4}, %ymm17
+
+// CHECK: vpermilpd $123, -1024(%rdx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x4a,0x80,0x7b]
+ vpermilpd $0x7b, -1024(%rdx){1to4}, %ymm17
+
+// CHECK: vpermilpd $123, -1032(%rdx){1to4}, %ymm17
+// CHECK: encoding: [0x62,0xe3,0xfd,0x38,0x05,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vpermilpd $0x7b, -1032(%rdx){1to4}, %ymm17
+
+// CHECK: vpermilpd %xmm17, %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x22,0xa5,0x00,0x0d,0xd1]
+ vpermilpd %xmm17, %xmm27, %xmm26
+
+// CHECK: vpermilpd %xmm17, %xmm27, %xmm26 {%k2}
+// CHECK: encoding: [0x62,0x22,0xa5,0x02,0x0d,0xd1]
+ vpermilpd %xmm17, %xmm27, %xmm26 {%k2}
+
+// CHECK: vpermilpd %xmm17, %xmm27, %xmm26 {%k2} {z}
+// CHECK: encoding: [0x62,0x22,0xa5,0x82,0x0d,0xd1]
+ vpermilpd %xmm17, %xmm27, %xmm26 {%k2} {z}
+
+// CHECK: vpermilpd (%rcx), %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x11]
+ vpermilpd (%rcx), %xmm27, %xmm26
+
+// CHECK: vpermilpd 291(%rax,%r14,8), %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x22,0xa5,0x00,0x0d,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpermilpd 291(%rax,%r14,8), %xmm27, %xmm26
+
+// CHECK: vpermilpd (%rcx){1to2}, %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x11]
+ vpermilpd (%rcx){1to2}, %xmm27, %xmm26
+
+// CHECK: vpermilpd 2032(%rdx), %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x52,0x7f]
+ vpermilpd 2032(%rdx), %xmm27, %xmm26
+
+// CHECK: vpermilpd 2048(%rdx), %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x92,0x00,0x08,0x00,0x00]
+ vpermilpd 2048(%rdx), %xmm27, %xmm26
+
+// CHECK: vpermilpd -2048(%rdx), %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x52,0x80]
+ vpermilpd -2048(%rdx), %xmm27, %xmm26
+
+// CHECK: vpermilpd -2064(%rdx), %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x00,0x0d,0x92,0xf0,0xf7,0xff,0xff]
+ vpermilpd -2064(%rdx), %xmm27, %xmm26
+
+// CHECK: vpermilpd 1016(%rdx){1to2}, %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x52,0x7f]
+ vpermilpd 1016(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vpermilpd 1024(%rdx){1to2}, %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x92,0x00,0x04,0x00,0x00]
+ vpermilpd 1024(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vpermilpd -1024(%rdx){1to2}, %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x52,0x80]
+ vpermilpd -1024(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vpermilpd -1032(%rdx){1to2}, %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x62,0xa5,0x10,0x0d,0x92,0xf8,0xfb,0xff,0xff]
+ vpermilpd -1032(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vpermilpd %ymm24, %ymm26, %ymm26
+// CHECK: encoding: [0x62,0x02,0xad,0x20,0x0d,0xd0]
+ vpermilpd %ymm24, %ymm26, %ymm26
+
+// CHECK: vpermilpd %ymm24, %ymm26, %ymm26 {%k5}
+// CHECK: encoding: [0x62,0x02,0xad,0x25,0x0d,0xd0]
+ vpermilpd %ymm24, %ymm26, %ymm26 {%k5}
+
+// CHECK: vpermilpd %ymm24, %ymm26, %ymm26 {%k5} {z}
+// CHECK: encoding: [0x62,0x02,0xad,0xa5,0x0d,0xd0]
+ vpermilpd %ymm24, %ymm26, %ymm26 {%k5} {z}
+
+// CHECK: vpermilpd (%rcx), %ymm26, %ymm26
+// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x11]
+ vpermilpd (%rcx), %ymm26, %ymm26
+
+// CHECK: vpermilpd 291(%rax,%r14,8), %ymm26, %ymm26
+// CHECK: encoding: [0x62,0x22,0xad,0x20,0x0d,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpermilpd 291(%rax,%r14,8), %ymm26, %ymm26
+
+// CHECK: vpermilpd (%rcx){1to4}, %ymm26, %ymm26
+// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x11]
+ vpermilpd (%rcx){1to4}, %ymm26, %ymm26
+
+// CHECK: vpermilpd 4064(%rdx), %ymm26, %ymm26
+// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x52,0x7f]
+ vpermilpd 4064(%rdx), %ymm26, %ymm26
+
+// CHECK: vpermilpd 4096(%rdx), %ymm26, %ymm26
+// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x92,0x00,0x10,0x00,0x00]
+ vpermilpd 4096(%rdx), %ymm26, %ymm26
+
+// CHECK: vpermilpd -4096(%rdx), %ymm26, %ymm26
+// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x52,0x80]
+ vpermilpd -4096(%rdx), %ymm26, %ymm26
+
+// CHECK: vpermilpd -4128(%rdx), %ymm26, %ymm26
+// CHECK: encoding: [0x62,0x62,0xad,0x20,0x0d,0x92,0xe0,0xef,0xff,0xff]
+ vpermilpd -4128(%rdx), %ymm26, %ymm26
+
+// CHECK: vpermilpd 1016(%rdx){1to4}, %ymm26, %ymm26
+// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x52,0x7f]
+ vpermilpd 1016(%rdx){1to4}, %ymm26, %ymm26
+
+// CHECK: vpermilpd 1024(%rdx){1to4}, %ymm26, %ymm26
+// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x92,0x00,0x04,0x00,0x00]
+ vpermilpd 1024(%rdx){1to4}, %ymm26, %ymm26
+
+// CHECK: vpermilpd -1024(%rdx){1to4}, %ymm26, %ymm26
+// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x52,0x80]
+ vpermilpd -1024(%rdx){1to4}, %ymm26, %ymm26
+
+// CHECK: vpermilpd -1032(%rdx){1to4}, %ymm26, %ymm26
+// CHECK: encoding: [0x62,0x62,0xad,0x30,0x0d,0x92,0xf8,0xfb,0xff,0xff]
+ vpermilpd -1032(%rdx){1to4}, %ymm26, %ymm26
+
+// CHECK: vcvtpd2dq %xmm20, %xmm25
+// CHECK: encoding: [0x62,0x21,0xff,0x08,0xe6,0xcc]
+ vcvtpd2dq %xmm20, %xmm25
+
+// CHECK: vcvtpd2dq %xmm20, %xmm25 {%k2}
+// CHECK: encoding: [0x62,0x21,0xff,0x0a,0xe6,0xcc]
+ vcvtpd2dq %xmm20, %xmm25 {%k2}
+
+// CHECK: vcvtpd2dq %xmm20, %xmm25 {%k2} {z}
+// CHECK: encoding: [0x62,0x21,0xff,0x8a,0xe6,0xcc]
+ vcvtpd2dq %xmm20, %xmm25 {%k2} {z}
+
+// CHECK: vcvtpd2dqx (%rcx), %xmm25
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0xe6,0x09]
+ vcvtpd2dqx (%rcx), %xmm25
+
+// CHECK: vcvtpd2dqx 291(%rax,%r14,8), %xmm25
+// CHECK: encoding: [0x62,0x21,0xff,0x08,0xe6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vcvtpd2dqx 291(%rax,%r14,8), %xmm25
+
+// CHECK: vcvtpd2dq (%rcx){1to2}, %xmm25
+// CHECK: encoding: [0x62,0x61,0xff,0x18,0xe6,0x09]
+ vcvtpd2dq (%rcx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dqx 2032(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0xe6,0x4a,0x7f]
+ vcvtpd2dqx 2032(%rdx), %xmm25
+
+// CHECK: vcvtpd2dqx 2048(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0xe6,0x8a,0x00,0x08,0x00,0x00]
+ vcvtpd2dqx 2048(%rdx), %xmm25
+
+// CHECK: vcvtpd2dqx -2048(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0xe6,0x4a,0x80]
+ vcvtpd2dqx -2048(%rdx), %xmm25
+
+// CHECK: vcvtpd2dqx -2064(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x61,0xff,0x08,0xe6,0x8a,0xf0,0xf7,0xff,0xff]
+ vcvtpd2dqx -2064(%rdx), %xmm25
+
+// CHECK: vcvtpd2dq 1016(%rdx){1to2}, %xmm25
+// CHECK: encoding: [0x62,0x61,0xff,0x18,0xe6,0x4a,0x7f]
+ vcvtpd2dq 1016(%rdx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dq 1024(%rdx){1to2}, %xmm25
+// CHECK: encoding: [0x62,0x61,0xff,0x18,0xe6,0x8a,0x00,0x04,0x00,0x00]
+ vcvtpd2dq 1024(%rdx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dq -1024(%rdx){1to2}, %xmm25
+// CHECK: encoding: [0x62,0x61,0xff,0x18,0xe6,0x4a,0x80]
+ vcvtpd2dq -1024(%rdx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dq -1032(%rdx){1to2}, %xmm25
+// CHECK: encoding: [0x62,0x61,0xff,0x18,0xe6,0x8a,0xf8,0xfb,0xff,0xff]
+ vcvtpd2dq -1032(%rdx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dq %ymm28, %xmm30
+// CHECK: encoding: [0x62,0x01,0xff,0x28,0xe6,0xf4]
+ vcvtpd2dq %ymm28, %xmm30
+
+// CHECK: vcvtpd2dq %ymm28, %xmm30 {%k3}
+// CHECK: encoding: [0x62,0x01,0xff,0x2b,0xe6,0xf4]
+ vcvtpd2dq %ymm28, %xmm30 {%k3}
+
+// CHECK: vcvtpd2dq %ymm28, %xmm30 {%k3} {z}
+// CHECK: encoding: [0x62,0x01,0xff,0xab,0xe6,0xf4]
+ vcvtpd2dq %ymm28, %xmm30 {%k3} {z}
+
+// CHECK: vcvtpd2dqy (%rcx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0xe6,0x31]
+ vcvtpd2dqy (%rcx), %xmm30
+
+// CHECK: vcvtpd2dqy 291(%rax,%r14,8), %xmm30
+// CHECK: encoding: [0x62,0x21,0xff,0x28,0xe6,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vcvtpd2dqy 291(%rax,%r14,8), %xmm30
+
+// CHECK: vcvtpd2dq (%rcx){1to4}, %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x38,0xe6,0x31]
+ vcvtpd2dq (%rcx){1to4}, %xmm30
+
+// CHECK: vcvtpd2dqy 4064(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0xe6,0x72,0x7f]
+ vcvtpd2dqy 4064(%rdx), %xmm30
+
+// CHECK: vcvtpd2dqy 4096(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0xe6,0xb2,0x00,0x10,0x00,0x00]
+ vcvtpd2dqy 4096(%rdx), %xmm30
+
+// CHECK: vcvtpd2dqy -4096(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0xe6,0x72,0x80]
+ vcvtpd2dqy -4096(%rdx), %xmm30
+
+// CHECK: vcvtpd2dqy -4128(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x28,0xe6,0xb2,0xe0,0xef,0xff,0xff]
+ vcvtpd2dqy -4128(%rdx), %xmm30
+
+// CHECK: vcvtpd2dq 1016(%rdx){1to4}, %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x38,0xe6,0x72,0x7f]
+ vcvtpd2dq 1016(%rdx){1to4}, %xmm30
+
+// CHECK: vcvtpd2dq 1024(%rdx){1to4}, %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x38,0xe6,0xb2,0x00,0x04,0x00,0x00]
+ vcvtpd2dq 1024(%rdx){1to4}, %xmm30
+
+// CHECK: vcvtpd2dq -1024(%rdx){1to4}, %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x38,0xe6,0x72,0x80]
+ vcvtpd2dq -1024(%rdx){1to4}, %xmm30
+
+// CHECK: vcvtpd2dq -1032(%rdx){1to4}, %xmm30
+// CHECK: encoding: [0x62,0x61,0xff,0x38,0xe6,0xb2,0xf8,0xfb,0xff,0xff]
+ vcvtpd2dq -1032(%rdx){1to4}, %xmm30
+
+// CHECK: vcvtpd2ps %xmm27, %xmm27
+// CHECK: encoding: [0x62,0x01,0xfd,0x08,0x5a,0xdb]
+ vcvtpd2ps %xmm27, %xmm27
+
+// CHECK: vcvtpd2ps %xmm27, %xmm27 {%k7}
+// CHECK: encoding: [0x62,0x01,0xfd,0x0f,0x5a,0xdb]
+ vcvtpd2ps %xmm27, %xmm27 {%k7}
+
+// CHECK: vcvtpd2ps %xmm27, %xmm27 {%k7} {z}
+// CHECK: encoding: [0x62,0x01,0xfd,0x8f,0x5a,0xdb]
+ vcvtpd2ps %xmm27, %xmm27 {%k7} {z}
+
+// CHECK: vcvtpd2psx (%rcx), %xmm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x5a,0x19]
+ vcvtpd2psx (%rcx), %xmm27
+
+// CHECK: vcvtpd2psx 291(%rax,%r14,8), %xmm27
+// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x5a,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vcvtpd2psx 291(%rax,%r14,8), %xmm27
+
+// CHECK: vcvtpd2ps (%rcx){1to2}, %xmm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x18,0x5a,0x19]
+ vcvtpd2ps (%rcx){1to2}, %xmm27
+
+// CHECK: vcvtpd2psx 2032(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x5a,0x5a,0x7f]
+ vcvtpd2psx 2032(%rdx), %xmm27
+
+// CHECK: vcvtpd2psx 2048(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x5a,0x9a,0x00,0x08,0x00,0x00]
+ vcvtpd2psx 2048(%rdx), %xmm27
+
+// CHECK: vcvtpd2psx -2048(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x5a,0x5a,0x80]
+ vcvtpd2psx -2048(%rdx), %xmm27
+
+// CHECK: vcvtpd2psx -2064(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x5a,0x9a,0xf0,0xf7,0xff,0xff]
+ vcvtpd2psx -2064(%rdx), %xmm27
+
+// CHECK: vcvtpd2ps 1016(%rdx){1to2}, %xmm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x18,0x5a,0x5a,0x7f]
+ vcvtpd2ps 1016(%rdx){1to2}, %xmm27
+
+// CHECK: vcvtpd2ps 1024(%rdx){1to2}, %xmm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x18,0x5a,0x9a,0x00,0x04,0x00,0x00]
+ vcvtpd2ps 1024(%rdx){1to2}, %xmm27
+
+// CHECK: vcvtpd2ps -1024(%rdx){1to2}, %xmm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x18,0x5a,0x5a,0x80]
+ vcvtpd2ps -1024(%rdx){1to2}, %xmm27
+
+// CHECK: vcvtpd2ps -1032(%rdx){1to2}, %xmm27
+// CHECK: encoding: [0x62,0x61,0xfd,0x18,0x5a,0x9a,0xf8,0xfb,0xff,0xff]
+ vcvtpd2ps -1032(%rdx){1to2}, %xmm27
+
+// CHECK: vcvtpd2ps %ymm20, %xmm20
+// CHECK: encoding: [0x62,0xa1,0xfd,0x28,0x5a,0xe4]
+ vcvtpd2ps %ymm20, %xmm20
+
+// CHECK: vcvtpd2ps %ymm20, %xmm20 {%k6}
+// CHECK: encoding: [0x62,0xa1,0xfd,0x2e,0x5a,0xe4]
+ vcvtpd2ps %ymm20, %xmm20 {%k6}
+
+// CHECK: vcvtpd2ps %ymm20, %xmm20 {%k6} {z}
+// CHECK: encoding: [0x62,0xa1,0xfd,0xae,0x5a,0xe4]
+ vcvtpd2ps %ymm20, %xmm20 {%k6} {z}
+
+// CHECK: vcvtpd2psy (%rcx), %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfd,0x28,0x5a,0x21]
+ vcvtpd2psy (%rcx), %xmm20
+
+// CHECK: vcvtpd2psy 291(%rax,%r14,8), %xmm20
+// CHECK: encoding: [0x62,0xa1,0xfd,0x28,0x5a,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vcvtpd2psy 291(%rax,%r14,8), %xmm20
+
+// CHECK: vcvtpd2ps (%rcx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfd,0x38,0x5a,0x21]
+ vcvtpd2ps (%rcx){1to4}, %xmm20
+
+// CHECK: vcvtpd2psy 4064(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfd,0x28,0x5a,0x62,0x7f]
+ vcvtpd2psy 4064(%rdx), %xmm20
+
+// CHECK: vcvtpd2psy 4096(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfd,0x28,0x5a,0xa2,0x00,0x10,0x00,0x00]
+ vcvtpd2psy 4096(%rdx), %xmm20
+
+// CHECK: vcvtpd2psy -4096(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfd,0x28,0x5a,0x62,0x80]
+ vcvtpd2psy -4096(%rdx), %xmm20
+
+// CHECK: vcvtpd2psy -4128(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfd,0x28,0x5a,0xa2,0xe0,0xef,0xff,0xff]
+ vcvtpd2psy -4128(%rdx), %xmm20
+
+// CHECK: vcvtpd2ps 1016(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfd,0x38,0x5a,0x62,0x7f]
+ vcvtpd2ps 1016(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtpd2ps 1024(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfd,0x38,0x5a,0xa2,0x00,0x04,0x00,0x00]
+ vcvtpd2ps 1024(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtpd2ps -1024(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfd,0x38,0x5a,0x62,0x80]
+ vcvtpd2ps -1024(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtpd2ps -1032(%rdx){1to4}, %xmm20
+// CHECK: encoding: [0x62,0xe1,0xfd,0x38,0x5a,0xa2,0xf8,0xfb,0xff,0xff]
+ vcvtpd2ps -1032(%rdx){1to4}, %xmm20
+
+// CHECK: vcvttpd2udq %xmm26, %xmm23
+// CHECK: encoding: [0x62,0x81,0xfc,0x08,0x78,0xfa]
+ vcvttpd2udq %xmm26, %xmm23
+
+// CHECK: vcvttpd2udq %xmm26, %xmm23 {%k2}
+// CHECK: encoding: [0x62,0x81,0xfc,0x0a,0x78,0xfa]
+ vcvttpd2udq %xmm26, %xmm23 {%k2}
+
+// CHECK: vcvttpd2udq %xmm26, %xmm23 {%k2} {z}
+// CHECK: encoding: [0x62,0x81,0xfc,0x8a,0x78,0xfa]
+ vcvttpd2udq %xmm26, %xmm23 {%k2} {z}
+
+// CHECK: vcvttpd2udqx (%rcx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfc,0x08,0x78,0x39]
+ vcvttpd2udqx (%rcx), %xmm23
+
+// CHECK: vcvttpd2udqx 291(%rax,%r14,8), %xmm23
+// CHECK: encoding: [0x62,0xa1,0xfc,0x08,0x78,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vcvttpd2udqx 291(%rax,%r14,8), %xmm23
+
+// CHECK: vcvttpd2udq (%rcx){1to2}, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfc,0x18,0x78,0x39]
+ vcvttpd2udq (%rcx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udqx 2032(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfc,0x08,0x78,0x7a,0x7f]
+ vcvttpd2udqx 2032(%rdx), %xmm23
+
+// CHECK: vcvttpd2udqx 2048(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfc,0x08,0x78,0xba,0x00,0x08,0x00,0x00]
+ vcvttpd2udqx 2048(%rdx), %xmm23
+
+// CHECK: vcvttpd2udqx -2048(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfc,0x08,0x78,0x7a,0x80]
+ vcvttpd2udqx -2048(%rdx), %xmm23
+
+// CHECK: vcvttpd2udqx -2064(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfc,0x08,0x78,0xba,0xf0,0xf7,0xff,0xff]
+ vcvttpd2udqx -2064(%rdx), %xmm23
+
+// CHECK: vcvttpd2udq 1016(%rdx){1to2}, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfc,0x18,0x78,0x7a,0x7f]
+ vcvttpd2udq 1016(%rdx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udq 1024(%rdx){1to2}, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfc,0x18,0x78,0xba,0x00,0x04,0x00,0x00]
+ vcvttpd2udq 1024(%rdx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udq -1024(%rdx){1to2}, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfc,0x18,0x78,0x7a,0x80]
+ vcvttpd2udq -1024(%rdx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udq -1032(%rdx){1to2}, %xmm23
+// CHECK: encoding: [0x62,0xe1,0xfc,0x18,0x78,0xba,0xf8,0xfb,0xff,0xff]
+ vcvttpd2udq -1032(%rdx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udq %ymm23, %xmm28
+// CHECK: encoding: [0x62,0x21,0xfc,0x28,0x78,0xe7]
+ vcvttpd2udq %ymm23, %xmm28
+
+// CHECK: vcvttpd2udq %ymm23, %xmm28 {%k6}
+// CHECK: encoding: [0x62,0x21,0xfc,0x2e,0x78,0xe7]
+ vcvttpd2udq %ymm23, %xmm28 {%k6}
+
+// CHECK: vcvttpd2udq %ymm23, %xmm28 {%k6} {z}
+// CHECK: encoding: [0x62,0x21,0xfc,0xae,0x78,0xe7]
+ vcvttpd2udq %ymm23, %xmm28 {%k6} {z}
+
+// CHECK: vcvttpd2udqy (%rcx), %xmm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x28,0x78,0x21]
+ vcvttpd2udqy (%rcx), %xmm28
+
+// CHECK: vcvttpd2udqy 291(%rax,%r14,8), %xmm28
+// CHECK: encoding: [0x62,0x21,0xfc,0x28,0x78,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vcvttpd2udqy 291(%rax,%r14,8), %xmm28
+
+// CHECK: vcvttpd2udq (%rcx){1to4}, %xmm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x38,0x78,0x21]
+ vcvttpd2udq (%rcx){1to4}, %xmm28
+
+// CHECK: vcvttpd2udqy 4064(%rdx), %xmm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x28,0x78,0x62,0x7f]
+ vcvttpd2udqy 4064(%rdx), %xmm28
+
+// CHECK: vcvttpd2udqy 4096(%rdx), %xmm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x28,0x78,0xa2,0x00,0x10,0x00,0x00]
+ vcvttpd2udqy 4096(%rdx), %xmm28
+
+// CHECK: vcvttpd2udqy -4096(%rdx), %xmm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x28,0x78,0x62,0x80]
+ vcvttpd2udqy -4096(%rdx), %xmm28
+
+// CHECK: vcvttpd2udqy -4128(%rdx), %xmm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x28,0x78,0xa2,0xe0,0xef,0xff,0xff]
+ vcvttpd2udqy -4128(%rdx), %xmm28
+
+// CHECK: vcvttpd2udq 1016(%rdx){1to4}, %xmm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x38,0x78,0x62,0x7f]
+ vcvttpd2udq 1016(%rdx){1to4}, %xmm28
+
+// CHECK: vcvttpd2udq 1024(%rdx){1to4}, %xmm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x38,0x78,0xa2,0x00,0x04,0x00,0x00]
+ vcvttpd2udq 1024(%rdx){1to4}, %xmm28
+
+// CHECK: vcvttpd2udq -1024(%rdx){1to4}, %xmm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x38,0x78,0x62,0x80]
+ vcvttpd2udq -1024(%rdx){1to4}, %xmm28
+
+// CHECK: vcvttpd2udq -1032(%rdx){1to4}, %xmm28
+// CHECK: encoding: [0x62,0x61,0xfc,0x38,0x78,0xa2,0xf8,0xfb,0xff,0xff]
+ vcvttpd2udq -1032(%rdx){1to4}, %xmm28
+
+// CHECK: vpternlogd $171, %xmm25, %xmm19, %xmm27
+// CHECK: encoding: [0x62,0x03,0x65,0x00,0x25,0xd9,0xab]
+ vpternlogd $0xab, %xmm25, %xmm19, %xmm27
+
+// CHECK: vpternlogd $171, %xmm25, %xmm19, %xmm27 {%k7}
+// CHECK: encoding: [0x62,0x03,0x65,0x07,0x25,0xd9,0xab]
+ vpternlogd $0xab, %xmm25, %xmm19, %xmm27 {%k7}
+
+// CHECK: vpternlogd $171, %xmm25, %xmm19, %xmm27 {%k7} {z}
+// CHECK: encoding: [0x62,0x03,0x65,0x87,0x25,0xd9,0xab]
+ vpternlogd $0xab, %xmm25, %xmm19, %xmm27 {%k7} {z}
+
+// CHECK: vpternlogd $123, %xmm25, %xmm19, %xmm27
+// CHECK: encoding: [0x62,0x03,0x65,0x00,0x25,0xd9,0x7b]
+ vpternlogd $0x7b, %xmm25, %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, (%rcx), %xmm19, %xmm27
+// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x19,0x7b]
+ vpternlogd $0x7b, (%rcx), %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, 291(%rax,%r14,8), %xmm19, %xmm27
+// CHECK: encoding: [0x62,0x23,0x65,0x00,0x25,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpternlogd $0x7b, 291(%rax,%r14,8), %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, (%rcx){1to4}, %xmm19, %xmm27
+// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x19,0x7b]
+ vpternlogd $0x7b, (%rcx){1to4}, %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, 2032(%rdx), %xmm19, %xmm27
+// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x5a,0x7f,0x7b]
+ vpternlogd $0x7b, 2032(%rdx), %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, 2048(%rdx), %xmm19, %xmm27
+// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vpternlogd $0x7b, 2048(%rdx), %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, -2048(%rdx), %xmm19, %xmm27
+// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x5a,0x80,0x7b]
+ vpternlogd $0x7b, -2048(%rdx), %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, -2064(%rdx), %xmm19, %xmm27
+// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vpternlogd $0x7b, -2064(%rdx), %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, 508(%rdx){1to4}, %xmm19, %xmm27
+// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x5a,0x7f,0x7b]
+ vpternlogd $0x7b, 508(%rdx){1to4}, %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, 512(%rdx){1to4}, %xmm19, %xmm27
+// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x9a,0x00,0x02,0x00,0x00,0x7b]
+ vpternlogd $0x7b, 512(%rdx){1to4}, %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, -512(%rdx){1to4}, %xmm19, %xmm27
+// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x5a,0x80,0x7b]
+ vpternlogd $0x7b, -512(%rdx){1to4}, %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, -516(%rdx){1to4}, %xmm19, %xmm27
+// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+ vpternlogd $0x7b, -516(%rdx){1to4}, %xmm19, %xmm27
+
+// CHECK: vpternlogd $171, %ymm20, %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x23,0x75,0x20,0x25,0xec,0xab]
+ vpternlogd $0xab, %ymm20, %ymm17, %ymm29
+
+// CHECK: vpternlogd $171, %ymm20, %ymm17, %ymm29 {%k3}
+// CHECK: encoding: [0x62,0x23,0x75,0x23,0x25,0xec,0xab]
+ vpternlogd $0xab, %ymm20, %ymm17, %ymm29 {%k3}
+
+// CHECK: vpternlogd $171, %ymm20, %ymm17, %ymm29 {%k3} {z}
+// CHECK: encoding: [0x62,0x23,0x75,0xa3,0x25,0xec,0xab]
+ vpternlogd $0xab, %ymm20, %ymm17, %ymm29 {%k3} {z}
+
+// CHECK: vpternlogd $123, %ymm20, %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x23,0x75,0x20,0x25,0xec,0x7b]
+ vpternlogd $0x7b, %ymm20, %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, (%rcx), %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0x29,0x7b]
+ vpternlogd $0x7b, (%rcx), %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, 291(%rax,%r14,8), %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x23,0x75,0x20,0x25,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpternlogd $0x7b, 291(%rax,%r14,8), %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, (%rcx){1to8}, %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0x29,0x7b]
+ vpternlogd $0x7b, (%rcx){1to8}, %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, 4064(%rdx), %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0x6a,0x7f,0x7b]
+ vpternlogd $0x7b, 4064(%rdx), %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, 4096(%rdx), %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0xaa,0x00,0x10,0x00,0x00,0x7b]
+ vpternlogd $0x7b, 4096(%rdx), %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, -4096(%rdx), %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0x6a,0x80,0x7b]
+ vpternlogd $0x7b, -4096(%rdx), %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, -4128(%rdx), %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+ vpternlogd $0x7b, -4128(%rdx), %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, 508(%rdx){1to8}, %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0x6a,0x7f,0x7b]
+ vpternlogd $0x7b, 508(%rdx){1to8}, %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, 512(%rdx){1to8}, %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0xaa,0x00,0x02,0x00,0x00,0x7b]
+ vpternlogd $0x7b, 512(%rdx){1to8}, %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, -512(%rdx){1to8}, %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0x6a,0x80,0x7b]
+ vpternlogd $0x7b, -512(%rdx){1to8}, %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, -516(%rdx){1to8}, %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+ vpternlogd $0x7b, -516(%rdx){1to8}, %ymm17, %ymm29
+
+// CHECK: vpternlogq $171, %xmm22, %xmm25, %xmm17
+// CHECK: encoding: [0x62,0xa3,0xb5,0x00,0x25,0xce,0xab]
+ vpternlogq $0xab, %xmm22, %xmm25, %xmm17
+
+// CHECK: vpternlogq $171, %xmm22, %xmm25, %xmm17 {%k1}
+// CHECK: encoding: [0x62,0xa3,0xb5,0x01,0x25,0xce,0xab]
+ vpternlogq $0xab, %xmm22, %xmm25, %xmm17 {%k1}
+
+// CHECK: vpternlogq $171, %xmm22, %xmm25, %xmm17 {%k1} {z}
+// CHECK: encoding: [0x62,0xa3,0xb5,0x81,0x25,0xce,0xab]
+ vpternlogq $0xab, %xmm22, %xmm25, %xmm17 {%k1} {z}
+
+// CHECK: vpternlogq $123, %xmm22, %xmm25, %xmm17
+// CHECK: encoding: [0x62,0xa3,0xb5,0x00,0x25,0xce,0x7b]
+ vpternlogq $0x7b, %xmm22, %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, (%rcx), %xmm25, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x09,0x7b]
+ vpternlogq $0x7b, (%rcx), %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, 291(%rax,%r14,8), %xmm25, %xmm17
+// CHECK: encoding: [0x62,0xa3,0xb5,0x00,0x25,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpternlogq $0x7b, 291(%rax,%r14,8), %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, (%rcx){1to2}, %xmm25, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x09,0x7b]
+ vpternlogq $0x7b, (%rcx){1to2}, %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, 2032(%rdx), %xmm25, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x4a,0x7f,0x7b]
+ vpternlogq $0x7b, 2032(%rdx), %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, 2048(%rdx), %xmm25, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vpternlogq $0x7b, 2048(%rdx), %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, -2048(%rdx), %xmm25, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x4a,0x80,0x7b]
+ vpternlogq $0x7b, -2048(%rdx), %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, -2064(%rdx), %xmm25, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vpternlogq $0x7b, -2064(%rdx), %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, 1016(%rdx){1to2}, %xmm25, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x4a,0x7f,0x7b]
+ vpternlogq $0x7b, 1016(%rdx){1to2}, %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, 1024(%rdx){1to2}, %xmm25, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ vpternlogq $0x7b, 1024(%rdx){1to2}, %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, -1024(%rdx){1to2}, %xmm25, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x4a,0x80,0x7b]
+ vpternlogq $0x7b, -1024(%rdx){1to2}, %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, -1032(%rdx){1to2}, %xmm25, %xmm17
+// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ vpternlogq $0x7b, -1032(%rdx){1to2}, %xmm25, %xmm17
+
+// CHECK: vpternlogq $171, %ymm25, %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x03,0xc5,0x20,0x25,0xd1,0xab]
+ vpternlogq $0xab, %ymm25, %ymm23, %ymm26
+
+// CHECK: vpternlogq $171, %ymm25, %ymm23, %ymm26 {%k6}
+// CHECK: encoding: [0x62,0x03,0xc5,0x26,0x25,0xd1,0xab]
+ vpternlogq $0xab, %ymm25, %ymm23, %ymm26 {%k6}
+
+// CHECK: vpternlogq $171, %ymm25, %ymm23, %ymm26 {%k6} {z}
+// CHECK: encoding: [0x62,0x03,0xc5,0xa6,0x25,0xd1,0xab]
+ vpternlogq $0xab, %ymm25, %ymm23, %ymm26 {%k6} {z}
+
+// CHECK: vpternlogq $123, %ymm25, %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x03,0xc5,0x20,0x25,0xd1,0x7b]
+ vpternlogq $0x7b, %ymm25, %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, (%rcx), %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x11,0x7b]
+ vpternlogq $0x7b, (%rcx), %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, 291(%rax,%r14,8), %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x23,0xc5,0x20,0x25,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vpternlogq $0x7b, 291(%rax,%r14,8), %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, (%rcx){1to4}, %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x11,0x7b]
+ vpternlogq $0x7b, (%rcx){1to4}, %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, 4064(%rdx), %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x52,0x7f,0x7b]
+ vpternlogq $0x7b, 4064(%rdx), %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, 4096(%rdx), %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x92,0x00,0x10,0x00,0x00,0x7b]
+ vpternlogq $0x7b, 4096(%rdx), %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, -4096(%rdx), %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x52,0x80,0x7b]
+ vpternlogq $0x7b, -4096(%rdx), %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, -4128(%rdx), %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x92,0xe0,0xef,0xff,0xff,0x7b]
+ vpternlogq $0x7b, -4128(%rdx), %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, 1016(%rdx){1to4}, %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x52,0x7f,0x7b]
+ vpternlogq $0x7b, 1016(%rdx){1to4}, %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, 1024(%rdx){1to4}, %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x92,0x00,0x04,0x00,0x00,0x7b]
+ vpternlogq $0x7b, 1024(%rdx){1to4}, %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, -1024(%rdx){1to4}, %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x52,0x80,0x7b]
+ vpternlogq $0x7b, -1024(%rdx){1to4}, %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, -1032(%rdx){1to4}, %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+ vpternlogq $0x7b, -1032(%rdx){1to4}, %ymm23, %ymm26
+
+// CHECK: vpbroadcastd (%rcx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0x29]
+ vpbroadcastd (%rcx), %xmm29
+
+// CHECK: vpbroadcastd (%rcx), %xmm29 {%k1}
+// CHECK: encoding: [0x62,0x62,0x7d,0x09,0x58,0x29]
+ vpbroadcastd (%rcx), %xmm29 {%k1}
+
+// CHECK: vpbroadcastd (%rcx), %xmm29 {%k1} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0x89,0x58,0x29]
+ vpbroadcastd (%rcx), %xmm29 {%k1} {z}
+
+// CHECK: vpbroadcastd 291(%rax,%r14,8), %xmm29
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x58,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastd 291(%rax,%r14,8), %xmm29
+
+// CHECK: vpbroadcastd 508(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0x6a,0x7f]
+ vpbroadcastd 508(%rdx), %xmm29
+
+// CHECK: vpbroadcastd 512(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0xaa,0x00,0x02,0x00,0x00]
+ vpbroadcastd 512(%rdx), %xmm29
+
+// CHECK: vpbroadcastd -512(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0x6a,0x80]
+ vpbroadcastd -512(%rdx), %xmm29
+
+// CHECK: vpbroadcastd -516(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0xaa,0xfc,0xfd,0xff,0xff]
+ vpbroadcastd -516(%rdx), %xmm29
+
+// CHECK: vpbroadcastd (%rcx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0x21]
+ vpbroadcastd (%rcx), %ymm28
+
+// CHECK: vpbroadcastd (%rcx), %ymm28 {%k2}
+// CHECK: encoding: [0x62,0x62,0x7d,0x2a,0x58,0x21]
+ vpbroadcastd (%rcx), %ymm28 {%k2}
+
+// CHECK: vpbroadcastd (%rcx), %ymm28 {%k2} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0xaa,0x58,0x21]
+ vpbroadcastd (%rcx), %ymm28 {%k2} {z}
+
+// CHECK: vpbroadcastd 291(%rax,%r14,8), %ymm28
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x58,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastd 291(%rax,%r14,8), %ymm28
+
+// CHECK: vpbroadcastd 508(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0x62,0x7f]
+ vpbroadcastd 508(%rdx), %ymm28
+
+// CHECK: vpbroadcastd 512(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0xa2,0x00,0x02,0x00,0x00]
+ vpbroadcastd 512(%rdx), %ymm28
+
+// CHECK: vpbroadcastd -512(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0x62,0x80]
+ vpbroadcastd -512(%rdx), %ymm28
+
+// CHECK: vpbroadcastd -516(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0xa2,0xfc,0xfd,0xff,0xff]
+ vpbroadcastd -516(%rdx), %ymm28
+
+// CHECK: vpbroadcastd %xmm18, %xmm29
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x58,0xea]
+ vpbroadcastd %xmm18, %xmm29
+
+// CHECK: vpbroadcastd %xmm18, %xmm29 {%k2}
+// CHECK: encoding: [0x62,0x22,0x7d,0x0a,0x58,0xea]
+ vpbroadcastd %xmm18, %xmm29 {%k2}
+
+// CHECK: vpbroadcastd %xmm18, %xmm29 {%k2} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0x8a,0x58,0xea]
+ vpbroadcastd %xmm18, %xmm29 {%k2} {z}
+
+// CHECK: vpbroadcastd %xmm26, %ymm17
+// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x58,0xca]
+ vpbroadcastd %xmm26, %ymm17
+
+// CHECK: vpbroadcastd %xmm26, %ymm17 {%k3}
+// CHECK: encoding: [0x62,0x82,0x7d,0x2b,0x58,0xca]
+ vpbroadcastd %xmm26, %ymm17 {%k3}
+
+// CHECK: vpbroadcastd %xmm26, %ymm17 {%k3} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0xab,0x58,0xca]
+ vpbroadcastd %xmm26, %ymm17 {%k3} {z}
+
+// CHECK: vpbroadcastd %eax, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x7c,0xf0]
+ vpbroadcastd %eax, %xmm22
+
+// CHECK: vpbroadcastd %eax, %xmm22 {%k5}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0d,0x7c,0xf0]
+ vpbroadcastd %eax, %xmm22 {%k5}
+
+// CHECK: vpbroadcastd %eax, %xmm22 {%k5} {z}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x8d,0x7c,0xf0]
+ vpbroadcastd %eax, %xmm22 {%k5} {z}
+
+// CHECK: vpbroadcastd %ebp, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x7c,0xf5]
+ vpbroadcastd %ebp, %xmm22
+
+// CHECK: vpbroadcastd %r13d, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x7d,0x08,0x7c,0xf5]
+ vpbroadcastd %r13d, %xmm22
+
+// CHECK: vpbroadcastd %eax, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x7c,0xc8]
+ vpbroadcastd %eax, %ymm25
+
+// CHECK: vpbroadcastd %eax, %ymm25 {%k5}
+// CHECK: encoding: [0x62,0x62,0x7d,0x2d,0x7c,0xc8]
+ vpbroadcastd %eax, %ymm25 {%k5}
+
+// CHECK: vpbroadcastd %eax, %ymm25 {%k5} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0xad,0x7c,0xc8]
+ vpbroadcastd %eax, %ymm25 {%k5} {z}
+
+// CHECK: vpbroadcastd %ebp, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x7c,0xcd]
+ vpbroadcastd %ebp, %ymm25
+
+// CHECK: vpbroadcastd %r13d, %ymm25
+// CHECK: encoding: [0x62,0x42,0x7d,0x28,0x7c,0xcd]
+ vpbroadcastd %r13d, %ymm25
+
+// CHECK: vpbroadcastq (%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0x31]
+ vpbroadcastq (%rcx), %xmm30
+
+// CHECK: vpbroadcastq (%rcx), %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x62,0xfd,0x0f,0x59,0x31]
+ vpbroadcastq (%rcx), %xmm30 {%k7}
+
+// CHECK: vpbroadcastq (%rcx), %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0xfd,0x8f,0x59,0x31]
+ vpbroadcastq (%rcx), %xmm30 {%k7} {z}
+
+// CHECK: vpbroadcastq 291(%rax,%r14,8), %xmm30
+// CHECK: encoding: [0x62,0x22,0xfd,0x08,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastq 291(%rax,%r14,8), %xmm30
+
+// CHECK: vpbroadcastq 1016(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0x72,0x7f]
+ vpbroadcastq 1016(%rdx), %xmm30
+
+// CHECK: vpbroadcastq 1024(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0xb2,0x00,0x04,0x00,0x00]
+ vpbroadcastq 1024(%rdx), %xmm30
+
+// CHECK: vpbroadcastq -1024(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0x72,0x80]
+ vpbroadcastq -1024(%rdx), %xmm30
+
+// CHECK: vpbroadcastq -1032(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0xb2,0xf8,0xfb,0xff,0xff]
+ vpbroadcastq -1032(%rdx), %xmm30
+
+// CHECK: vpbroadcastq (%rcx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x19]
+ vpbroadcastq (%rcx), %ymm19
+
+// CHECK: vpbroadcastq (%rcx), %ymm19 {%k7}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2f,0x59,0x19]
+ vpbroadcastq (%rcx), %ymm19 {%k7}
+
+// CHECK: vpbroadcastq (%rcx), %ymm19 {%k7} {z}
+// CHECK: encoding: [0x62,0xe2,0xfd,0xaf,0x59,0x19]
+ vpbroadcastq (%rcx), %ymm19 {%k7} {z}
+
+// CHECK: vpbroadcastq 291(%rax,%r14,8), %ymm19
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x59,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastq 291(%rax,%r14,8), %ymm19
+
+// CHECK: vpbroadcastq 1016(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x5a,0x7f]
+ vpbroadcastq 1016(%rdx), %ymm19
+
+// CHECK: vpbroadcastq 1024(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x9a,0x00,0x04,0x00,0x00]
+ vpbroadcastq 1024(%rdx), %ymm19
+
+// CHECK: vpbroadcastq -1024(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x5a,0x80]
+ vpbroadcastq -1024(%rdx), %ymm19
+
+// CHECK: vpbroadcastq -1032(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x9a,0xf8,0xfb,0xff,0xff]
+ vpbroadcastq -1032(%rdx), %ymm19
+
+// CHECK: vpbroadcastq %xmm24, %xmm19
+// CHECK: encoding: [0x62,0x82,0xfd,0x08,0x59,0xd8]
+ vpbroadcastq %xmm24, %xmm19
+
+// CHECK: vpbroadcastq %xmm24, %xmm19 {%k6}
+// CHECK: encoding: [0x62,0x82,0xfd,0x0e,0x59,0xd8]
+ vpbroadcastq %xmm24, %xmm19 {%k6}
+
+// CHECK: vpbroadcastq %xmm24, %xmm19 {%k6} {z}
+// CHECK: encoding: [0x62,0x82,0xfd,0x8e,0x59,0xd8]
+ vpbroadcastq %xmm24, %xmm19 {%k6} {z}
+
+// CHECK: vpbroadcastq %xmm26, %ymm19
+// CHECK: encoding: [0x62,0x82,0xfd,0x28,0x59,0xda]
+ vpbroadcastq %xmm26, %ymm19
+
+// CHECK: vpbroadcastq %xmm26, %ymm19 {%k6}
+// CHECK: encoding: [0x62,0x82,0xfd,0x2e,0x59,0xda]
+ vpbroadcastq %xmm26, %ymm19 {%k6}
+
+// CHECK: vpbroadcastq %xmm26, %ymm19 {%k6} {z}
+// CHECK: encoding: [0x62,0x82,0xfd,0xae,0x59,0xda]
+ vpbroadcastq %xmm26, %ymm19 {%k6} {z}
+
+// CHECK: vpbroadcastq %rax, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x7c,0xf0]
+ vpbroadcastq %rax, %xmm22
+
+// CHECK: vpbroadcastq %rax, %xmm22 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x7c,0xf0]
+ vpbroadcastq %rax, %xmm22 {%k2}
+
+// CHECK: vpbroadcastq %rax, %xmm22 {%k2} {z}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x8a,0x7c,0xf0]
+ vpbroadcastq %rax, %xmm22 {%k2} {z}
+
+// CHECK: vpbroadcastq %r8, %xmm22
+// CHECK: encoding: [0x62,0xc2,0xfd,0x08,0x7c,0xf0]
+ vpbroadcastq %r8, %xmm22
+
+// CHECK: vpbroadcastq %rax, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x7c,0xd8]
+ vpbroadcastq %rax, %ymm19
+
+// CHECK: vpbroadcastq %rax, %ymm19 {%k5}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2d,0x7c,0xd8]
+ vpbroadcastq %rax, %ymm19 {%k5}
+
+// CHECK: vpbroadcastq %rax, %ymm19 {%k5} {z}
+// CHECK: encoding: [0x62,0xe2,0xfd,0xad,0x7c,0xd8]
+ vpbroadcastq %rax, %ymm19 {%k5} {z}
+
+// CHECK: vpbroadcastq %r8, %ymm19
+// CHECK: encoding: [0x62,0xc2,0xfd,0x28,0x7c,0xd8]
+ vpbroadcastq %r8, %ymm19
+
+// CHECK: vcvtph2ps %xmm17, %xmm27
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x13,0xd9]
+ vcvtph2ps %xmm17, %xmm27
+
+// CHECK: vcvtph2ps %xmm17, %xmm27 {%k2}
+// CHECK: encoding: [0x62,0x22,0x7d,0x0a,0x13,0xd9]
+ vcvtph2ps %xmm17, %xmm27 {%k2}
+
+// CHECK: vcvtph2ps %xmm17, %xmm27 {%k2} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0x8a,0x13,0xd9]
+ vcvtph2ps %xmm17, %xmm27 {%k2} {z}
+
+// CHECK: vcvtph2ps (%rcx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x19]
+ vcvtph2ps (%rcx), %xmm27
+
+// CHECK: vcvtph2ps 291(%rax,%r14,8), %xmm27
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x13,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vcvtph2ps 291(%rax,%r14,8), %xmm27
+
+// CHECK: vcvtph2ps 1016(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x5a,0x7f]
+ vcvtph2ps 1016(%rdx), %xmm27
+
+// CHECK: vcvtph2ps 1024(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x9a,0x00,0x04,0x00,0x00]
+ vcvtph2ps 1024(%rdx), %xmm27
+
+// CHECK: vcvtph2ps -1024(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x5a,0x80]
+ vcvtph2ps -1024(%rdx), %xmm27
+
+// CHECK: vcvtph2ps -1032(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x13,0x9a,0xf8,0xfb,0xff,0xff]
+ vcvtph2ps -1032(%rdx), %xmm27
+
+// CHECK: vcvtph2ps %xmm22, %ymm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x13,0xf6]
+ vcvtph2ps %xmm22, %ymm30
+
+// CHECK: vcvtph2ps %xmm22, %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x13,0xf6]
+ vcvtph2ps %xmm22, %ymm30 {%k7}
+
+// CHECK: vcvtph2ps %xmm22, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x13,0xf6]
+ vcvtph2ps %xmm22, %ymm30 {%k7} {z}
+
+// CHECK: vcvtph2ps (%rcx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0x31]
+ vcvtph2ps (%rcx), %ymm30
+
+// CHECK: vcvtph2ps 291(%rax,%r14,8), %ymm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x13,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vcvtph2ps 291(%rax,%r14,8), %ymm30
+
+// CHECK: vcvtph2ps 2032(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0x72,0x7f]
+ vcvtph2ps 2032(%rdx), %ymm30
+
+// CHECK: vcvtph2ps 2048(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0xb2,0x00,0x08,0x00,0x00]
+ vcvtph2ps 2048(%rdx), %ymm30
+
+// CHECK: vcvtph2ps -2048(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0x72,0x80]
+ vcvtph2ps -2048(%rdx), %ymm30
+
+// CHECK: vcvtph2ps -2064(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x13,0xb2,0xf0,0xf7,0xff,0xff]
+ vcvtph2ps -2064(%rdx), %ymm30
+
+// CHECK: vcvtps2ph $171, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x1d,0xcd,0xab]
+ vcvtps2ph $0xab, %xmm25, %xmm21
+
+// CHECK: vcvtps2ph $171, %xmm25, %xmm21 {%k5}
+// CHECK: encoding: [0x62,0x23,0x7d,0x0d,0x1d,0xcd,0xab]
+ vcvtps2ph $0xab, %xmm25, %xmm21 {%k5}
+
+// CHECK: vcvtps2ph $171, %xmm25, %xmm21 {%k5} {z}
+// CHECK: encoding: [0x62,0x23,0x7d,0x8d,0x1d,0xcd,0xab]
+ vcvtps2ph $0xab, %xmm25, %xmm21 {%k5} {z}
+
+// CHECK: vcvtps2ph $123, %xmm25, %xmm21
+// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x1d,0xcd,0x7b]
+ vcvtps2ph $0x7b, %xmm25, %xmm21
+
+// CHECK: vcvtps2ph $171, %ymm28, %xmm23
+// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x1d,0xe7,0xab]
+ vcvtps2ph $0xab, %ymm28, %xmm23
+
+// CHECK: vcvtps2ph $171, %ymm28, %xmm23 {%k3}
+// CHECK: encoding: [0x62,0x23,0x7d,0x2b,0x1d,0xe7,0xab]
+ vcvtps2ph $0xab, %ymm28, %xmm23 {%k3}
+
+// CHECK: vcvtps2ph $171, %ymm28, %xmm23 {%k3} {z}
+// CHECK: encoding: [0x62,0x23,0x7d,0xab,0x1d,0xe7,0xab]
+ vcvtps2ph $0xab, %ymm28, %xmm23 {%k3} {z}
+
+// CHECK: vcvtps2ph $123, %ymm28, %xmm23
+// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x1d,0xe7,0x7b]
+ vcvtps2ph $0x7b, %ymm28, %xmm23
+
+// CHECK: vcvtps2ph $171, %xmm27, (%rcx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x1d,0x19,0xab]
+ vcvtps2ph $0xab, %xmm27, (%rcx)
+
+// CHECK: vcvtps2ph $171, %xmm27, (%rcx) {%k4}
+// CHECK: encoding: [0x62,0x63,0x7d,0x0c,0x1d,0x19,0xab]
+ vcvtps2ph $0xab, %xmm27, (%rcx) {%k4}
+
+// CHECK: vcvtps2ph $123, %xmm27, (%rcx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x1d,0x19,0x7b]
+ vcvtps2ph $0x7b, %xmm27, (%rcx)
+
+// CHECK: vcvtps2ph $123, %xmm27, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x23,0x7d,0x08,0x1d,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcvtps2ph $0x7b, %xmm27, 291(%rax,%r14,8)
+
+// CHECK: vcvtps2ph $123, %xmm27, 1016(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x1d,0x5a,0x7f,0x7b]
+ vcvtps2ph $0x7b, %xmm27, 1016(%rdx)
+
+// CHECK: vcvtps2ph $123, %xmm27, 1024(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x1d,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ vcvtps2ph $0x7b, %xmm27, 1024(%rdx)
+
+// CHECK: vcvtps2ph $123, %xmm27, -1024(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x1d,0x5a,0x80,0x7b]
+ vcvtps2ph $0x7b, %xmm27, -1024(%rdx)
+
+// CHECK: vcvtps2ph $123, %xmm27, -1032(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x08,0x1d,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ vcvtps2ph $0x7b, %xmm27, -1032(%rdx)
+
+// CHECK: vcvtps2ph $171, %ymm30, (%rcx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0x31,0xab]
+ vcvtps2ph $0xab, %ymm30, (%rcx)
+
+// CHECK: vcvtps2ph $171, %ymm30, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0x63,0x7d,0x2a,0x1d,0x31,0xab]
+ vcvtps2ph $0xab, %ymm30, (%rcx) {%k2}
+
+// CHECK: vcvtps2ph $123, %ymm30, (%rcx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0x31,0x7b]
+ vcvtps2ph $0x7b, %ymm30, (%rcx)
+
+// CHECK: vcvtps2ph $123, %ymm30, 291(%rax,%r14,8)
+// CHECK: encoding: [0x62,0x23,0x7d,0x28,0x1d,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vcvtps2ph $0x7b, %ymm30, 291(%rax,%r14,8)
+
+// CHECK: vcvtps2ph $123, %ymm30, 2032(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0x72,0x7f,0x7b]
+ vcvtps2ph $0x7b, %ymm30, 2032(%rdx)
+
+// CHECK: vcvtps2ph $123, %ymm30, 2048(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0xb2,0x00,0x08,0x00,0x00,0x7b]
+ vcvtps2ph $0x7b, %ymm30, 2048(%rdx)
+
+// CHECK: vcvtps2ph $123, %ymm30, -2048(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0x72,0x80,0x7b]
+ vcvtps2ph $0x7b, %ymm30, -2048(%rdx)
+
+// CHECK: vcvtps2ph $123, %ymm30, -2064(%rdx)
+// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+ vcvtps2ph $0x7b, %ymm30, -2064(%rdx)
+
+// CHECK: vmovshdup %xmm18, %xmm23
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x16,0xfa]
+ vmovshdup %xmm18, %xmm23
+
+// CHECK: vmovshdup %xmm18, %xmm23 {%k2}
+// CHECK: encoding: [0x62,0xa1,0x7e,0x0a,0x16,0xfa]
+ vmovshdup %xmm18, %xmm23 {%k2}
+
+// CHECK: vmovshdup %xmm18, %xmm23 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0x7e,0x8a,0x16,0xfa]
+ vmovshdup %xmm18, %xmm23 {%k2} {z}
+
+// CHECK: vmovshdup (%rcx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0x39]
+ vmovshdup (%rcx), %xmm23
+
+// CHECK: vmovshdup 291(%rax,%r14,8), %xmm23
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x16,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vmovshdup 291(%rax,%r14,8), %xmm23
+
+// CHECK: vmovshdup 2032(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0x7a,0x7f]
+ vmovshdup 2032(%rdx), %xmm23
+
+// CHECK: vmovshdup 2048(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0xba,0x00,0x08,0x00,0x00]
+ vmovshdup 2048(%rdx), %xmm23
+
+// CHECK: vmovshdup -2048(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0x7a,0x80]
+ vmovshdup -2048(%rdx), %xmm23
+
+// CHECK: vmovshdup -2064(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0xba,0xf0,0xf7,0xff,0xff]
+ vmovshdup -2064(%rdx), %xmm23
+
+// CHECK: vmovshdup %ymm24, %ymm18
+// CHECK: encoding: [0x62,0x81,0x7e,0x28,0x16,0xd0]
+ vmovshdup %ymm24, %ymm18
+
+// CHECK: vmovshdup %ymm24, %ymm18 {%k3}
+// CHECK: encoding: [0x62,0x81,0x7e,0x2b,0x16,0xd0]
+ vmovshdup %ymm24, %ymm18 {%k3}
+
+// CHECK: vmovshdup %ymm24, %ymm18 {%k3} {z}
+// CHECK: encoding: [0x62,0x81,0x7e,0xab,0x16,0xd0]
+ vmovshdup %ymm24, %ymm18 {%k3} {z}
+
+// CHECK: vmovshdup (%rcx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x11]
+ vmovshdup (%rcx), %ymm18
+
+// CHECK: vmovshdup 291(%rax,%r14,8), %ymm18
+// CHECK: encoding: [0x62,0xa1,0x7e,0x28,0x16,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vmovshdup 291(%rax,%r14,8), %ymm18
+
+// CHECK: vmovshdup 4064(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x52,0x7f]
+ vmovshdup 4064(%rdx), %ymm18
+
+// CHECK: vmovshdup 4096(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x92,0x00,0x10,0x00,0x00]
+ vmovshdup 4096(%rdx), %ymm18
+
+// CHECK: vmovshdup -4096(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x52,0x80]
+ vmovshdup -4096(%rdx), %ymm18
+
+// CHECK: vmovshdup -4128(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x92,0xe0,0xef,0xff,0xff]
+ vmovshdup -4128(%rdx), %ymm18
+
+// CHECK: vmovsldup %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x21,0x7e,0x08,0x12,0xcd]
+ vmovsldup %xmm21, %xmm25
+
+// CHECK: vmovsldup %xmm21, %xmm25 {%k5}
+// CHECK: encoding: [0x62,0x21,0x7e,0x0d,0x12,0xcd]
+ vmovsldup %xmm21, %xmm25 {%k5}
+
+// CHECK: vmovsldup %xmm21, %xmm25 {%k5} {z}
+// CHECK: encoding: [0x62,0x21,0x7e,0x8d,0x12,0xcd]
+ vmovsldup %xmm21, %xmm25 {%k5} {z}
+
+// CHECK: vmovsldup (%rcx), %xmm25
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x09]
+ vmovsldup (%rcx), %xmm25
+
+// CHECK: vmovsldup 291(%rax,%r14,8), %xmm25
+// CHECK: encoding: [0x62,0x21,0x7e,0x08,0x12,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vmovsldup 291(%rax,%r14,8), %xmm25
+
+// CHECK: vmovsldup 2032(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x4a,0x7f]
+ vmovsldup 2032(%rdx), %xmm25
+
+// CHECK: vmovsldup 2048(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x8a,0x00,0x08,0x00,0x00]
+ vmovsldup 2048(%rdx), %xmm25
+
+// CHECK: vmovsldup -2048(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x4a,0x80]
+ vmovsldup -2048(%rdx), %xmm25
+
+// CHECK: vmovsldup -2064(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x8a,0xf0,0xf7,0xff,0xff]
+ vmovsldup -2064(%rdx), %xmm25
+
+// CHECK: vmovsldup %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x01,0x7e,0x28,0x12,0xc5]
+ vmovsldup %ymm29, %ymm24
+
+// CHECK: vmovsldup %ymm29, %ymm24 {%k5}
+// CHECK: encoding: [0x62,0x01,0x7e,0x2d,0x12,0xc5]
+ vmovsldup %ymm29, %ymm24 {%k5}
+
+// CHECK: vmovsldup %ymm29, %ymm24 {%k5} {z}
+// CHECK: encoding: [0x62,0x01,0x7e,0xad,0x12,0xc5]
+ vmovsldup %ymm29, %ymm24 {%k5} {z}
+
+// CHECK: vmovsldup (%rcx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x01]
+ vmovsldup (%rcx), %ymm24
+
+// CHECK: vmovsldup 291(%rax,%r14,8), %ymm24
+// CHECK: encoding: [0x62,0x21,0x7e,0x28,0x12,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vmovsldup 291(%rax,%r14,8), %ymm24
+
+// CHECK: vmovsldup 4064(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x42,0x7f]
+ vmovsldup 4064(%rdx), %ymm24
+
+// CHECK: vmovsldup 4096(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x82,0x00,0x10,0x00,0x00]
+ vmovsldup 4096(%rdx), %ymm24
+
+// CHECK: vmovsldup -4096(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x42,0x80]
+ vmovsldup -4096(%rdx), %ymm24
+
+// CHECK: vmovsldup -4128(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x82,0xe0,0xef,0xff,0xff]
+ vmovsldup -4128(%rdx), %ymm24
+
+// CHECK: vmovddup %xmm23, %xmm17
+// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x12,0xcf]
+ vmovddup %xmm23, %xmm17
+
+// CHECK: vmovddup %xmm23, %xmm17 {%k6}
+// CHECK: encoding: [0x62,0xa1,0xff,0x0e,0x12,0xcf]
+ vmovddup %xmm23, %xmm17 {%k6}
+
+// CHECK: vmovddup %xmm23, %xmm17 {%k6} {z}
+// CHECK: encoding: [0x62,0xa1,0xff,0x8e,0x12,0xcf]
+ vmovddup %xmm23, %xmm17 {%k6} {z}
+
+// CHECK: vmovddup (%rcx), %xmm17
+// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x09]
+ vmovddup (%rcx), %xmm17
+
+// CHECK: vmovddup 291(%rax,%r14,8), %xmm17
+// CHECK: encoding: [0x62,0xa1,0xff,0x08,0x12,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vmovddup 291(%rax,%r14,8), %xmm17
+
+// CHECK: vmovddup 1016(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x4a,0x7f]
+ vmovddup 1016(%rdx), %xmm17
+
+// CHECK: vmovddup 1024(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x8a,0x00,0x04,0x00,0x00]
+ vmovddup 1024(%rdx), %xmm17
+
+// CHECK: vmovddup -1024(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x4a,0x80]
+ vmovddup -1024(%rdx), %xmm17
+
+// CHECK: vmovddup -1032(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe1,0xff,0x08,0x12,0x8a,0xf8,0xfb,0xff,0xff]
+ vmovddup -1032(%rdx), %xmm17
+
+// CHECK: vmovddup %ymm25, %ymm18
+// CHECK: encoding: [0x62,0x81,0xff,0x28,0x12,0xd1]
+ vmovddup %ymm25, %ymm18
+
+// CHECK: vmovddup %ymm25, %ymm18 {%k4}
+// CHECK: encoding: [0x62,0x81,0xff,0x2c,0x12,0xd1]
+ vmovddup %ymm25, %ymm18 {%k4}
+
+// CHECK: vmovddup %ymm25, %ymm18 {%k4} {z}
+// CHECK: encoding: [0x62,0x81,0xff,0xac,0x12,0xd1]
+ vmovddup %ymm25, %ymm18 {%k4} {z}
+
+// CHECK: vmovddup (%rcx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x11]
+ vmovddup (%rcx), %ymm18
+
+// CHECK: vmovddup 291(%rax,%r14,8), %ymm18
+// CHECK: encoding: [0x62,0xa1,0xff,0x28,0x12,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vmovddup 291(%rax,%r14,8), %ymm18
+
+// CHECK: vmovddup 4064(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x52,0x7f]
+ vmovddup 4064(%rdx), %ymm18
+
+// CHECK: vmovddup 4096(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x92,0x00,0x10,0x00,0x00]
+ vmovddup 4096(%rdx), %ymm18
+
+// CHECK: vmovddup -4096(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x52,0x80]
+ vmovddup -4096(%rdx), %ymm18
+
+// CHECK: vmovddup -4128(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0xff,0x28,0x12,0x92,0xe0,0xef,0xff,0xff]
+ vmovddup -4128(%rdx), %ymm18
+
+// CHECK: vmovapd.s %xmm27, %xmm26
+// CHECK: encoding: [0x62,0x01,0xfd,0x08,0x29,0xda]
+ vmovapd.s %xmm27, %xmm26
+
+// CHECK: vmovapd.s %xmm27, %xmm26 {%k6}
+// CHECK: encoding: [0x62,0x01,0xfd,0x0e,0x29,0xda]
+ vmovapd.s %xmm27, %xmm26 {%k6}
+
+// CHECK: vmovapd.s %xmm27, %xmm26 {%k6} {z}
+// CHECK: encoding: [0x62,0x01,0xfd,0x8e,0x29,0xda]
+ vmovapd.s %xmm27, %xmm26 {%k6} {z}
+
+// CHECK: vmovapd.s %xmm26, %xmm17
+// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x29,0xd1]
+ vmovapd.s %xmm26, %xmm17
+
+// CHECK: vmovapd.s %xmm26, %xmm17 {%k2}
+// CHECK: encoding: [0x62,0x21,0xfd,0x0a,0x29,0xd1]
+ vmovapd.s %xmm26, %xmm17 {%k2}
+
+// CHECK: vmovapd.s %xmm26, %xmm17 {%k2} {z}
+// CHECK: encoding: [0x62,0x21,0xfd,0x8a,0x29,0xd1]
+ vmovapd.s %xmm26, %xmm17 {%k2} {z}
+
+// CHECK: vmovapd.s %ymm20, %ymm18
+// CHECK: encoding: [0x62,0xa1,0xfd,0x28,0x29,0xe2]
+ vmovapd.s %ymm20, %ymm18
+
+// CHECK: vmovapd.s %ymm20, %ymm18 {%k2}
+// CHECK: encoding: [0x62,0xa1,0xfd,0x2a,0x29,0xe2]
+ vmovapd.s %ymm20, %ymm18 {%k2}
+
+// CHECK: vmovapd.s %ymm20, %ymm18 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0xfd,0xaa,0x29,0xe2]
+ vmovapd.s %ymm20, %ymm18 {%k2} {z}
+
+// CHECK: vmovapd.s %ymm20, %ymm21
+// CHECK: encoding: [0x62,0xa1,0xfd,0x28,0x29,0xe5]
+ vmovapd.s %ymm20, %ymm21
+
+// CHECK: vmovapd.s %ymm20, %ymm21 {%k6}
+// CHECK: encoding: [0x62,0xa1,0xfd,0x2e,0x29,0xe5]
+ vmovapd.s %ymm20, %ymm21 {%k6}
+
+// CHECK: vmovapd.s %ymm20, %ymm21 {%k6} {z}
+// CHECK: encoding: [0x62,0xa1,0xfd,0xae,0x29,0xe5]
+ vmovapd.s %ymm20, %ymm21 {%k6} {z}
+
+// CHECK: vmovapd.s %xmm29, %xmm17
+// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x29,0xe9]
+ vmovapd.s %xmm29, %xmm17
+
+// CHECK: vmovapd.s %xmm29, %xmm17 {%k1}
+// CHECK: encoding: [0x62,0x21,0xfd,0x09,0x29,0xe9]
+ vmovapd.s %xmm29, %xmm17 {%k1}
+
+// CHECK: vmovapd.s %xmm29, %xmm17 {%k1} {z}
+// CHECK: encoding: [0x62,0x21,0xfd,0x89,0x29,0xe9]
+ vmovapd.s %xmm29, %xmm17 {%k1} {z}
+
+// CHECK: vmovapd.s %xmm27, %xmm19
+// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x29,0xdb]
+ vmovapd.s %xmm27, %xmm19
+
+// CHECK: vmovapd.s %xmm27, %xmm19 {%k1}
+// CHECK: encoding: [0x62,0x21,0xfd,0x09,0x29,0xdb]
+ vmovapd.s %xmm27, %xmm19 {%k1}
+
+// CHECK: vmovapd.s %xmm27, %xmm19 {%k1} {z}
+// CHECK: encoding: [0x62,0x21,0xfd,0x89,0x29,0xdb]
+ vmovapd.s %xmm27, %xmm19 {%k1} {z}
+
+// CHECK: vmovapd.s %ymm18, %ymm19
+// CHECK: encoding: [0x62,0xa1,0xfd,0x28,0x29,0xd3]
+ vmovapd.s %ymm18, %ymm19
+
+// CHECK: vmovapd.s %ymm18, %ymm19 {%k4}
+// CHECK: encoding: [0x62,0xa1,0xfd,0x2c,0x29,0xd3]
+ vmovapd.s %ymm18, %ymm19 {%k4}
+
+// CHECK: vmovapd.s %ymm18, %ymm19 {%k4} {z}
+// CHECK: encoding: [0x62,0xa1,0xfd,0xac,0x29,0xd3]
+ vmovapd.s %ymm18, %ymm19 {%k4} {z}
+
+// CHECK: vmovapd.s %ymm28, %ymm20
+// CHECK: encoding: [0x62,0x21,0xfd,0x28,0x29,0xe4]
+ vmovapd.s %ymm28, %ymm20
+
+// CHECK: vmovapd.s %ymm28, %ymm20 {%k5}
+// CHECK: encoding: [0x62,0x21,0xfd,0x2d,0x29,0xe4]
+ vmovapd.s %ymm28, %ymm20 {%k5}
+
+// CHECK: vmovapd.s %ymm28, %ymm20 {%k5} {z}
+// CHECK: encoding: [0x62,0x21,0xfd,0xad,0x29,0xe4]
+ vmovapd.s %ymm28, %ymm20 {%k5} {z}
+
+// CHECK: vmovaps.s %xmm23, %xmm18
+// CHECK: encoding: [0x62,0xa1,0x7c,0x08,0x29,0xfa]
+ vmovaps.s %xmm23, %xmm18
+
+// CHECK: vmovaps.s %xmm23, %xmm18 {%k5}
+// CHECK: encoding: [0x62,0xa1,0x7c,0x0d,0x29,0xfa]
+ vmovaps.s %xmm23, %xmm18 {%k5}
+
+// CHECK: vmovaps.s %xmm23, %xmm18 {%k5} {z}
+// CHECK: encoding: [0x62,0xa1,0x7c,0x8d,0x29,0xfa]
+ vmovaps.s %xmm23, %xmm18 {%k5} {z}
+
+// CHECK: vmovaps.s %xmm24, %xmm18
+// CHECK: encoding: [0x62,0x21,0x7c,0x08,0x29,0xc2]
+ vmovaps.s %xmm24, %xmm18
+
+// CHECK: vmovaps.s %xmm24, %xmm18 {%k6}
+// CHECK: encoding: [0x62,0x21,0x7c,0x0e,0x29,0xc2]
+ vmovaps.s %xmm24, %xmm18 {%k6}
+
+// CHECK: vmovaps.s %xmm24, %xmm18 {%k6} {z}
+// CHECK: encoding: [0x62,0x21,0x7c,0x8e,0x29,0xc2]
+ vmovaps.s %xmm24, %xmm18 {%k6} {z}
+
+// CHECK: vmovaps.s %ymm23, %ymm26
+// CHECK: encoding: [0x62,0x81,0x7c,0x28,0x29,0xfa]
+ vmovaps.s %ymm23, %ymm26
+
+// CHECK: vmovaps.s %ymm23, %ymm26 {%k5}
+// CHECK: encoding: [0x62,0x81,0x7c,0x2d,0x29,0xfa]
+ vmovaps.s %ymm23, %ymm26 {%k5}
+
+// CHECK: vmovaps.s %ymm23, %ymm26 {%k5} {z}
+// CHECK: encoding: [0x62,0x81,0x7c,0xad,0x29,0xfa]
+ vmovaps.s %ymm23, %ymm26 {%k5} {z}
+
+// CHECK: vmovaps.s %ymm17, %ymm27
+// CHECK: encoding: [0x62,0x81,0x7c,0x28,0x29,0xcb]
+ vmovaps.s %ymm17, %ymm27
+
+// CHECK: vmovaps.s %ymm17, %ymm27 {%k1}
+// CHECK: encoding: [0x62,0x81,0x7c,0x29,0x29,0xcb]
+ vmovaps.s %ymm17, %ymm27 {%k1}
+
+// CHECK: vmovaps.s %ymm17, %ymm27 {%k1} {z}
+// CHECK: encoding: [0x62,0x81,0x7c,0xa9,0x29,0xcb]
+ vmovaps.s %ymm17, %ymm27 {%k1} {z}
+
+// CHECK: vmovaps.s %xmm28, %xmm17
+// CHECK: encoding: [0x62,0x21,0x7c,0x08,0x29,0xe1]
+ vmovaps.s %xmm28, %xmm17
+
+// CHECK: vmovaps.s %xmm28, %xmm17 {%k2}
+// CHECK: encoding: [0x62,0x21,0x7c,0x0a,0x29,0xe1]
+ vmovaps.s %xmm28, %xmm17 {%k2}
+
+// CHECK: vmovaps.s %xmm28, %xmm17 {%k2} {z}
+// CHECK: encoding: [0x62,0x21,0x7c,0x8a,0x29,0xe1]
+ vmovaps.s %xmm28, %xmm17 {%k2} {z}
+
+// CHECK: vmovaps.s %xmm19, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7c,0x08,0x29,0xdd]
+ vmovaps.s %xmm19, %xmm21
+
+// CHECK: vmovaps.s %xmm19, %xmm21 {%k1}
+// CHECK: encoding: [0x62,0xa1,0x7c,0x09,0x29,0xdd]
+ vmovaps.s %xmm19, %xmm21 {%k1}
+
+// CHECK: vmovaps.s %xmm19, %xmm21 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0x7c,0x89,0x29,0xdd]
+ vmovaps.s %xmm19, %xmm21 {%k1} {z}
+
+// CHECK: vmovaps.s %ymm26, %ymm18
+// CHECK: encoding: [0x62,0x21,0x7c,0x28,0x29,0xd2]
+ vmovaps.s %ymm26, %ymm18
+
+// CHECK: vmovaps.s %ymm26, %ymm18 {%k5}
+// CHECK: encoding: [0x62,0x21,0x7c,0x2d,0x29,0xd2]
+ vmovaps.s %ymm26, %ymm18 {%k5}
+
+// CHECK: vmovaps.s %ymm26, %ymm18 {%k5} {z}
+// CHECK: encoding: [0x62,0x21,0x7c,0xad,0x29,0xd2]
+ vmovaps.s %ymm26, %ymm18 {%k5} {z}
+
+// CHECK: vmovaps.s %ymm27, %ymm17
+// CHECK: encoding: [0x62,0x21,0x7c,0x28,0x29,0xd9]
+ vmovaps.s %ymm27, %ymm17
+
+// CHECK: vmovaps.s %ymm27, %ymm17 {%k6}
+// CHECK: encoding: [0x62,0x21,0x7c,0x2e,0x29,0xd9]
+ vmovaps.s %ymm27, %ymm17 {%k6}
+
+// CHECK: vmovaps.s %ymm27, %ymm17 {%k6} {z}
+// CHECK: encoding: [0x62,0x21,0x7c,0xae,0x29,0xd9]
+ vmovaps.s %ymm27, %ymm17 {%k6} {z}
+
+// CHECK: vmovdqa32.s %xmm25, %xmm25
+// CHECK: encoding: [0x62,0x01,0x7d,0x08,0x7f,0xc9]
+ vmovdqa32.s %xmm25, %xmm25
+
+// CHECK: vmovdqa32.s %xmm25, %xmm25 {%k5}
+// CHECK: encoding: [0x62,0x01,0x7d,0x0d,0x7f,0xc9]
+ vmovdqa32.s %xmm25, %xmm25 {%k5}
+
+// CHECK: vmovdqa32.s %xmm25, %xmm25 {%k5} {z}
+// CHECK: encoding: [0x62,0x01,0x7d,0x8d,0x7f,0xc9]
+ vmovdqa32.s %xmm25, %xmm25 {%k5} {z}
+
+// CHECK: vmovdqa32.s %xmm17, %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x7f,0xce]
+ vmovdqa32.s %xmm17, %xmm22
+
+// CHECK: vmovdqa32.s %xmm17, %xmm22 {%k2}
+// CHECK: encoding: [0x62,0xa1,0x7d,0x0a,0x7f,0xce]
+ vmovdqa32.s %xmm17, %xmm22 {%k2}
+
+// CHECK: vmovdqa32.s %xmm17, %xmm22 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0x7d,0x8a,0x7f,0xce]
+ vmovdqa32.s %xmm17, %xmm22 {%k2} {z}
+
+// CHECK: vmovdqa32.s %ymm20, %ymm30
+// CHECK: encoding: [0x62,0x81,0x7d,0x28,0x7f,0xe6]
+ vmovdqa32.s %ymm20, %ymm30
+
+// CHECK: vmovdqa32.s %ymm20, %ymm30 {%k2}
+// CHECK: encoding: [0x62,0x81,0x7d,0x2a,0x7f,0xe6]
+ vmovdqa32.s %ymm20, %ymm30 {%k2}
+
+// CHECK: vmovdqa32.s %ymm20, %ymm30 {%k2} {z}
+// CHECK: encoding: [0x62,0x81,0x7d,0xaa,0x7f,0xe6]
+ vmovdqa32.s %ymm20, %ymm30 {%k2} {z}
+
+// CHECK: vmovdqa32.s %ymm17, %ymm20
+// CHECK: encoding: [0x62,0xa1,0x7d,0x28,0x7f,0xcc]
+ vmovdqa32.s %ymm17, %ymm20
+
+// CHECK: vmovdqa32.s %ymm17, %ymm20 {%k3}
+// CHECK: encoding: [0x62,0xa1,0x7d,0x2b,0x7f,0xcc]
+ vmovdqa32.s %ymm17, %ymm20 {%k3}
+
+// CHECK: vmovdqa32.s %ymm17, %ymm20 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0x7d,0xab,0x7f,0xcc]
+ vmovdqa32.s %ymm17, %ymm20 {%k3} {z}
+
+// CHECK: vmovdqa32.s %xmm27, %xmm21
+// CHECK: encoding: [0x62,0x21,0x7d,0x08,0x7f,0xdd]
+ vmovdqa32.s %xmm27, %xmm21
+
+// CHECK: vmovdqa32.s %xmm27, %xmm21 {%k4}
+// CHECK: encoding: [0x62,0x21,0x7d,0x0c,0x7f,0xdd]
+ vmovdqa32.s %xmm27, %xmm21 {%k4}
+
+// CHECK: vmovdqa32.s %xmm27, %xmm21 {%k4} {z}
+// CHECK: encoding: [0x62,0x21,0x7d,0x8c,0x7f,0xdd]
+ vmovdqa32.s %xmm27, %xmm21 {%k4} {z}
+
+// CHECK: vmovdqa32.s %xmm20, %xmm24
+// CHECK: encoding: [0x62,0x81,0x7d,0x08,0x7f,0xe0]
+ vmovdqa32.s %xmm20, %xmm24
+
+// CHECK: vmovdqa32.s %xmm20, %xmm24 {%k4}
+// CHECK: encoding: [0x62,0x81,0x7d,0x0c,0x7f,0xe0]
+ vmovdqa32.s %xmm20, %xmm24 {%k4}
+
+// CHECK: vmovdqa32.s %xmm20, %xmm24 {%k4} {z}
+// CHECK: encoding: [0x62,0x81,0x7d,0x8c,0x7f,0xe0]
+ vmovdqa32.s %xmm20, %xmm24 {%k4} {z}
+
+// CHECK: vmovdqa32.s %ymm22, %ymm18
+// CHECK: encoding: [0x62,0xa1,0x7d,0x28,0x7f,0xf2]
+ vmovdqa32.s %ymm22, %ymm18
+
+// CHECK: vmovdqa32.s %ymm22, %ymm18 {%k5}
+// CHECK: encoding: [0x62,0xa1,0x7d,0x2d,0x7f,0xf2]
+ vmovdqa32.s %ymm22, %ymm18 {%k5}
+
+// CHECK: vmovdqa32.s %ymm22, %ymm18 {%k5} {z}
+// CHECK: encoding: [0x62,0xa1,0x7d,0xad,0x7f,0xf2]
+ vmovdqa32.s %ymm22, %ymm18 {%k5} {z}
+
+// CHECK: vmovdqa32.s %ymm22, %ymm18
+// CHECK: encoding: [0x62,0xa1,0x7d,0x28,0x7f,0xf2]
+ vmovdqa32.s %ymm22, %ymm18
+
+// CHECK: vmovdqa32.s %ymm22, %ymm18 {%k3}
+// CHECK: encoding: [0x62,0xa1,0x7d,0x2b,0x7f,0xf2]
+ vmovdqa32.s %ymm22, %ymm18 {%k3}
+
+// CHECK: vmovdqa32.s %ymm22, %ymm18 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0x7d,0xab,0x7f,0xf2]
+ vmovdqa32.s %ymm22, %ymm18 {%k3} {z}
+
+// CHECK: vmovdqa64.s %xmm22, %xmm23
+// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0x7f,0xf7]
+ vmovdqa64.s %xmm22, %xmm23
+
+// CHECK: vmovdqa64.s %xmm22, %xmm23 {%k3}
+// CHECK: encoding: [0x62,0xa1,0xfd,0x0b,0x7f,0xf7]
+ vmovdqa64.s %xmm22, %xmm23 {%k3}
+
+// CHECK: vmovdqa64.s %xmm22, %xmm23 {%k3} {z}
+// CHECK: encoding: [0x62,0xa1,0xfd,0x8b,0x7f,0xf7]
+ vmovdqa64.s %xmm22, %xmm23 {%k3} {z}
+
+// CHECK: vmovdqa64.s %xmm25, %xmm18
+// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x7f,0xca]
+ vmovdqa64.s %xmm25, %xmm18
+
+// CHECK: vmovdqa64.s %xmm25, %xmm18 {%k4}
+// CHECK: encoding: [0x62,0x21,0xfd,0x0c,0x7f,0xca]
+ vmovdqa64.s %xmm25, %xmm18 {%k4}
+
+// CHECK: vmovdqa64.s %xmm25, %xmm18 {%k4} {z}
+// CHECK: encoding: [0x62,0x21,0xfd,0x8c,0x7f,0xca]
+ vmovdqa64.s %xmm25, %xmm18 {%k4} {z}
+
+// CHECK: vmovdqa64.s %ymm26, %ymm22
+// CHECK: encoding: [0x62,0x21,0xfd,0x28,0x7f,0xd6]
+ vmovdqa64.s %ymm26, %ymm22
+
+// CHECK: vmovdqa64.s %ymm26, %ymm22 {%k1}
+// CHECK: encoding: [0x62,0x21,0xfd,0x29,0x7f,0xd6]
+ vmovdqa64.s %ymm26, %ymm22 {%k1}
+
+// CHECK: vmovdqa64.s %ymm26, %ymm22 {%k1} {z}
+// CHECK: encoding: [0x62,0x21,0xfd,0xa9,0x7f,0xd6]
+ vmovdqa64.s %ymm26, %ymm22 {%k1} {z}
+
+// CHECK: vmovdqa64.s %ymm24, %ymm21
+// CHECK: encoding: [0x62,0x21,0xfd,0x28,0x7f,0xc5]
+ vmovdqa64.s %ymm24, %ymm21
+
+// CHECK: vmovdqa64.s %ymm24, %ymm21 {%k6}
+// CHECK: encoding: [0x62,0x21,0xfd,0x2e,0x7f,0xc5]
+ vmovdqa64.s %ymm24, %ymm21 {%k6}
+
+// CHECK: vmovdqa64.s %ymm24, %ymm21 {%k6} {z}
+// CHECK: encoding: [0x62,0x21,0xfd,0xae,0x7f,0xc5]
+ vmovdqa64.s %ymm24, %ymm21 {%k6} {z}
+
+// CHECK: vmovdqa64.s %xmm29, %xmm25
+// CHECK: encoding: [0x62,0x01,0xfd,0x08,0x7f,0xe9]
+ vmovdqa64.s %xmm29, %xmm25
+
+// CHECK: vmovdqa64.s %xmm29, %xmm25 {%k1}
+// CHECK: encoding: [0x62,0x01,0xfd,0x09,0x7f,0xe9]
+ vmovdqa64.s %xmm29, %xmm25 {%k1}
+
+// CHECK: vmovdqa64.s %xmm29, %xmm25 {%k1} {z}
+// CHECK: encoding: [0x62,0x01,0xfd,0x89,0x7f,0xe9]
+ vmovdqa64.s %xmm29, %xmm25 {%k1} {z}
+
+// CHECK: vmovdqa64.s %xmm19, %xmm26
+// CHECK: encoding: [0x62,0x81,0xfd,0x08,0x7f,0xda]
+ vmovdqa64.s %xmm19, %xmm26
+
+// CHECK: vmovdqa64.s %xmm19, %xmm26 {%k3}
+// CHECK: encoding: [0x62,0x81,0xfd,0x0b,0x7f,0xda]
+ vmovdqa64.s %xmm19, %xmm26 {%k3}
+
+// CHECK: vmovdqa64.s %xmm19, %xmm26 {%k3} {z}
+// CHECK: encoding: [0x62,0x81,0xfd,0x8b,0x7f,0xda]
+ vmovdqa64.s %xmm19, %xmm26 {%k3} {z}
+
+// CHECK: vmovdqa64.s %ymm28, %ymm29
+// CHECK: encoding: [0x62,0x01,0xfd,0x28,0x7f,0xe5]
+ vmovdqa64.s %ymm28, %ymm29
+
+// CHECK: vmovdqa64.s %ymm28, %ymm29 {%k1}
+// CHECK: encoding: [0x62,0x01,0xfd,0x29,0x7f,0xe5]
+ vmovdqa64.s %ymm28, %ymm29 {%k1}
+
+// CHECK: vmovdqa64.s %ymm28, %ymm29 {%k1} {z}
+// CHECK: encoding: [0x62,0x01,0xfd,0xa9,0x7f,0xe5]
+ vmovdqa64.s %ymm28, %ymm29 {%k1} {z}
+
+// CHECK: vmovdqa64.s %ymm21, %ymm17
+// CHECK: encoding: [0x62,0xa1,0xfd,0x28,0x7f,0xe9]
+ vmovdqa64.s %ymm21, %ymm17
+
+// CHECK: vmovdqa64.s %ymm21, %ymm17 {%k4}
+// CHECK: encoding: [0x62,0xa1,0xfd,0x2c,0x7f,0xe9]
+ vmovdqa64.s %ymm21, %ymm17 {%k4}
+
+// CHECK: vmovdqa64.s %ymm21, %ymm17 {%k4} {z}
+// CHECK: encoding: [0x62,0xa1,0xfd,0xac,0x7f,0xe9]
+ vmovdqa64.s %ymm21, %ymm17 {%k4} {z}
+
+// CHECK: vmovdqu32.s %xmm22, %xmm18
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7f,0xf2]
+ vmovdqu32.s %xmm22, %xmm18
+
+// CHECK: vmovdqu32.s %xmm22, %xmm18 {%k1}
+// CHECK: encoding: [0x62,0xa1,0x7e,0x09,0x7f,0xf2]
+ vmovdqu32.s %xmm22, %xmm18 {%k1}
+
+// CHECK: vmovdqu32.s %xmm22, %xmm18 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0x7e,0x89,0x7f,0xf2]
+ vmovdqu32.s %xmm22, %xmm18 {%k1} {z}
+
+// CHECK: vmovdqu32.s %xmm26, %xmm25
+// CHECK: encoding: [0x62,0x01,0x7e,0x08,0x7f,0xd1]
+ vmovdqu32.s %xmm26, %xmm25
+
+// CHECK: vmovdqu32.s %xmm26, %xmm25 {%k2}
+// CHECK: encoding: [0x62,0x01,0x7e,0x0a,0x7f,0xd1]
+ vmovdqu32.s %xmm26, %xmm25 {%k2}
+
+// CHECK: vmovdqu32.s %xmm26, %xmm25 {%k2} {z}
+// CHECK: encoding: [0x62,0x01,0x7e,0x8a,0x7f,0xd1]
+ vmovdqu32.s %xmm26, %xmm25 {%k2} {z}
+
+// CHECK: vmovdqu32.s %ymm26, %ymm22
+// CHECK: encoding: [0x62,0x21,0x7e,0x28,0x7f,0xd6]
+ vmovdqu32.s %ymm26, %ymm22
+
+// CHECK: vmovdqu32.s %ymm26, %ymm22 {%k3}
+// CHECK: encoding: [0x62,0x21,0x7e,0x2b,0x7f,0xd6]
+ vmovdqu32.s %ymm26, %ymm22 {%k3}
+
+// CHECK: vmovdqu32.s %ymm26, %ymm22 {%k3} {z}
+// CHECK: encoding: [0x62,0x21,0x7e,0xab,0x7f,0xd6]
+ vmovdqu32.s %ymm26, %ymm22 {%k3} {z}
+
+// CHECK: vmovdqu32.s %ymm24, %ymm27
+// CHECK: encoding: [0x62,0x01,0x7e,0x28,0x7f,0xc3]
+ vmovdqu32.s %ymm24, %ymm27
+
+// CHECK: vmovdqu32.s %ymm24, %ymm27 {%k1}
+// CHECK: encoding: [0x62,0x01,0x7e,0x29,0x7f,0xc3]
+ vmovdqu32.s %ymm24, %ymm27 {%k1}
+
+// CHECK: vmovdqu32.s %ymm24, %ymm27 {%k1} {z}
+// CHECK: encoding: [0x62,0x01,0x7e,0xa9,0x7f,0xc3]
+ vmovdqu32.s %ymm24, %ymm27 {%k1} {z}
+
+// CHECK: vmovdqu32.s %xmm19, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7f,0xdd]
+ vmovdqu32.s %xmm19, %xmm21
+
+// CHECK: vmovdqu32.s %xmm19, %xmm21 {%k1}
+// CHECK: encoding: [0x62,0xa1,0x7e,0x09,0x7f,0xdd]
+ vmovdqu32.s %xmm19, %xmm21 {%k1}
+
+// CHECK: vmovdqu32.s %xmm19, %xmm21 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0x7e,0x89,0x7f,0xdd]
+ vmovdqu32.s %xmm19, %xmm21 {%k1} {z}
+
+// CHECK: vmovdqu32.s %xmm25, %xmm22
+// CHECK: encoding: [0x62,0x21,0x7e,0x08,0x7f,0xce]
+ vmovdqu32.s %xmm25, %xmm22
+
+// CHECK: vmovdqu32.s %xmm25, %xmm22 {%k7}
+// CHECK: encoding: [0x62,0x21,0x7e,0x0f,0x7f,0xce]
+ vmovdqu32.s %xmm25, %xmm22 {%k7}
+
+// CHECK: vmovdqu32.s %xmm25, %xmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0x21,0x7e,0x8f,0x7f,0xce]
+ vmovdqu32.s %xmm25, %xmm22 {%k7} {z}
+
+// CHECK: vmovdqu32.s %ymm17, %ymm29
+// CHECK: encoding: [0x62,0x81,0x7e,0x28,0x7f,0xcd]
+ vmovdqu32.s %ymm17, %ymm29
+
+// CHECK: vmovdqu32.s %ymm17, %ymm29 {%k1}
+// CHECK: encoding: [0x62,0x81,0x7e,0x29,0x7f,0xcd]
+ vmovdqu32.s %ymm17, %ymm29 {%k1}
+
+// CHECK: vmovdqu32.s %ymm17, %ymm29 {%k1} {z}
+// CHECK: encoding: [0x62,0x81,0x7e,0xa9,0x7f,0xcd]
+ vmovdqu32.s %ymm17, %ymm29 {%k1} {z}
+
+// CHECK: vmovdqu32.s %ymm26, %ymm24
+// CHECK: encoding: [0x62,0x01,0x7e,0x28,0x7f,0xd0]
+ vmovdqu32.s %ymm26, %ymm24
+
+// CHECK: vmovdqu32.s %ymm26, %ymm24 {%k1}
+// CHECK: encoding: [0x62,0x01,0x7e,0x29,0x7f,0xd0]
+ vmovdqu32.s %ymm26, %ymm24 {%k1}
+
+// CHECK: vmovdqu32.s %ymm26, %ymm24 {%k1} {z}
+// CHECK: encoding: [0x62,0x01,0x7e,0xa9,0x7f,0xd0]
+ vmovdqu32.s %ymm26, %ymm24 {%k1} {z}
+
+// CHECK: vmovdqu64.s %xmm17, %xmm23
+// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x7f,0xcf]
+ vmovdqu64.s %xmm17, %xmm23
+
+// CHECK: vmovdqu64.s %xmm17, %xmm23 {%k1}
+// CHECK: encoding: [0x62,0xa1,0xfe,0x09,0x7f,0xcf]
+ vmovdqu64.s %xmm17, %xmm23 {%k1}
+
+// CHECK: vmovdqu64.s %xmm17, %xmm23 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0xfe,0x89,0x7f,0xcf]
+ vmovdqu64.s %xmm17, %xmm23 {%k1} {z}
+
+// CHECK: vmovdqu64.s %xmm29, %xmm26
+// CHECK: encoding: [0x62,0x01,0xfe,0x08,0x7f,0xea]
+ vmovdqu64.s %xmm29, %xmm26
+
+// CHECK: vmovdqu64.s %xmm29, %xmm26 {%k1}
+// CHECK: encoding: [0x62,0x01,0xfe,0x09,0x7f,0xea]
+ vmovdqu64.s %xmm29, %xmm26 {%k1}
+
+// CHECK: vmovdqu64.s %xmm29, %xmm26 {%k1} {z}
+// CHECK: encoding: [0x62,0x01,0xfe,0x89,0x7f,0xea]
+ vmovdqu64.s %xmm29, %xmm26 {%k1} {z}
+
+// CHECK: vmovdqu64.s %ymm26, %ymm19
+// CHECK: encoding: [0x62,0x21,0xfe,0x28,0x7f,0xd3]
+ vmovdqu64.s %ymm26, %ymm19
+
+// CHECK: vmovdqu64.s %ymm26, %ymm19 {%k3}
+// CHECK: encoding: [0x62,0x21,0xfe,0x2b,0x7f,0xd3]
+ vmovdqu64.s %ymm26, %ymm19 {%k3}
+
+// CHECK: vmovdqu64.s %ymm26, %ymm19 {%k3} {z}
+// CHECK: encoding: [0x62,0x21,0xfe,0xab,0x7f,0xd3]
+ vmovdqu64.s %ymm26, %ymm19 {%k3} {z}
+
+// CHECK: vmovdqu64.s %ymm17, %ymm24
+// CHECK: encoding: [0x62,0x81,0xfe,0x28,0x7f,0xc8]
+ vmovdqu64.s %ymm17, %ymm24
+
+// CHECK: vmovdqu64.s %ymm17, %ymm24 {%k6}
+// CHECK: encoding: [0x62,0x81,0xfe,0x2e,0x7f,0xc8]
+ vmovdqu64.s %ymm17, %ymm24 {%k6}
+
+// CHECK: vmovdqu64.s %ymm17, %ymm24 {%k6} {z}
+// CHECK: encoding: [0x62,0x81,0xfe,0xae,0x7f,0xc8]
+ vmovdqu64.s %ymm17, %ymm24 {%k6} {z}
+
+// CHECK: vmovdqu64.s %xmm26, %xmm21
+// CHECK: encoding: [0x62,0x21,0xfe,0x08,0x7f,0xd5]
+ vmovdqu64.s %xmm26, %xmm21
+
+// CHECK: vmovdqu64.s %xmm26, %xmm21 {%k1}
+// CHECK: encoding: [0x62,0x21,0xfe,0x09,0x7f,0xd5]
+ vmovdqu64.s %xmm26, %xmm21 {%k1}
+
+// CHECK: vmovdqu64.s %xmm26, %xmm21 {%k1} {z}
+// CHECK: encoding: [0x62,0x21,0xfe,0x89,0x7f,0xd5]
+ vmovdqu64.s %xmm26, %xmm21 {%k1} {z}
+
+// CHECK: vmovdqu64.s %xmm17, %xmm18
+// CHECK: encoding: [0x62,0xa1,0xfe,0x08,0x7f,0xca]
+ vmovdqu64.s %xmm17, %xmm18
+
+// CHECK: vmovdqu64.s %xmm17, %xmm18 {%k6}
+// CHECK: encoding: [0x62,0xa1,0xfe,0x0e,0x7f,0xca]
+ vmovdqu64.s %xmm17, %xmm18 {%k6}
+
+// CHECK: vmovdqu64.s %xmm17, %xmm18 {%k6} {z}
+// CHECK: encoding: [0x62,0xa1,0xfe,0x8e,0x7f,0xca]
+ vmovdqu64.s %xmm17, %xmm18 {%k6} {z}
+
+// CHECK: vmovdqu64.s %ymm24, %ymm27
+// CHECK: encoding: [0x62,0x01,0xfe,0x28,0x7f,0xc3]
+ vmovdqu64.s %ymm24, %ymm27
+
+// CHECK: vmovdqu64.s %ymm24, %ymm27 {%k3}
+// CHECK: encoding: [0x62,0x01,0xfe,0x2b,0x7f,0xc3]
+ vmovdqu64.s %ymm24, %ymm27 {%k3}
+
+// CHECK: vmovdqu64.s %ymm24, %ymm27 {%k3} {z}
+// CHECK: encoding: [0x62,0x01,0xfe,0xab,0x7f,0xc3]
+ vmovdqu64.s %ymm24, %ymm27 {%k3} {z}
+
+// CHECK: vmovdqu64.s %ymm17, %ymm18
+// CHECK: encoding: [0x62,0xa1,0xfe,0x28,0x7f,0xca]
+ vmovdqu64.s %ymm17, %ymm18
+
+// CHECK: vmovdqu64.s %ymm17, %ymm18 {%k6}
+// CHECK: encoding: [0x62,0xa1,0xfe,0x2e,0x7f,0xca]
+ vmovdqu64.s %ymm17, %ymm18 {%k6}
+
+// CHECK: vmovdqu64.s %ymm17, %ymm18 {%k6} {z}
+// CHECK: encoding: [0x62,0xa1,0xfe,0xae,0x7f,0xca]
+ vmovdqu64.s %ymm17, %ymm18 {%k6} {z}
+
+// CHECK: vmovupd.s %xmm27, %xmm17
+// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x11,0xd9]
+ vmovupd.s %xmm27, %xmm17
+
+// CHECK: vmovupd.s %xmm27, %xmm17 {%k2}
+// CHECK: encoding: [0x62,0x21,0xfd,0x0a,0x11,0xd9]
+ vmovupd.s %xmm27, %xmm17 {%k2}
+
+// CHECK: vmovupd.s %xmm27, %xmm17 {%k2} {z}
+// CHECK: encoding: [0x62,0x21,0xfd,0x8a,0x11,0xd9]
+ vmovupd.s %xmm27, %xmm17 {%k2} {z}
+
+// CHECK: vmovupd.s %xmm21, %xmm17
+// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0x11,0xe9]
+ vmovupd.s %xmm21, %xmm17
+
+// CHECK: vmovupd.s %xmm21, %xmm17 {%k2}
+// CHECK: encoding: [0x62,0xa1,0xfd,0x0a,0x11,0xe9]
+ vmovupd.s %xmm21, %xmm17 {%k2}
+
+// CHECK: vmovupd.s %xmm21, %xmm17 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0xfd,0x8a,0x11,0xe9]
+ vmovupd.s %xmm21, %xmm17 {%k2} {z}
+
+// CHECK: vmovupd.s %ymm21, %ymm24
+// CHECK: encoding: [0x62,0x81,0xfd,0x28,0x11,0xe8]
+ vmovupd.s %ymm21, %ymm24
+
+// CHECK: vmovupd.s %ymm21, %ymm24 {%k7}
+// CHECK: encoding: [0x62,0x81,0xfd,0x2f,0x11,0xe8]
+ vmovupd.s %ymm21, %ymm24 {%k7}
+
+// CHECK: vmovupd.s %ymm21, %ymm24 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0xfd,0xaf,0x11,0xe8]
+ vmovupd.s %ymm21, %ymm24 {%k7} {z}
+
+// CHECK: vmovupd.s %ymm27, %ymm21
+// CHECK: encoding: [0x62,0x21,0xfd,0x28,0x11,0xdd]
+ vmovupd.s %ymm27, %ymm21
+
+// CHECK: vmovupd.s %ymm27, %ymm21 {%k6}
+// CHECK: encoding: [0x62,0x21,0xfd,0x2e,0x11,0xdd]
+ vmovupd.s %ymm27, %ymm21 {%k6}
+
+// CHECK: vmovupd.s %ymm27, %ymm21 {%k6} {z}
+// CHECK: encoding: [0x62,0x21,0xfd,0xae,0x11,0xdd]
+ vmovupd.s %ymm27, %ymm21 {%k6} {z}
+
+// CHECK: vmovupd.s %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x81,0xfd,0x08,0x11,0xfb]
+ vmovupd.s %xmm23, %xmm27
+
+// CHECK: vmovupd.s %xmm23, %xmm27 {%k7}
+// CHECK: encoding: [0x62,0x81,0xfd,0x0f,0x11,0xfb]
+ vmovupd.s %xmm23, %xmm27 {%k7}
+
+// CHECK: vmovupd.s %xmm23, %xmm27 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0xfd,0x8f,0x11,0xfb]
+ vmovupd.s %xmm23, %xmm27 {%k7} {z}
+
+// CHECK: vmovupd.s %xmm17, %xmm30
+// CHECK: encoding: [0x62,0x81,0xfd,0x08,0x11,0xce]
+ vmovupd.s %xmm17, %xmm30
+
+// CHECK: vmovupd.s %xmm17, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x81,0xfd,0x0f,0x11,0xce]
+ vmovupd.s %xmm17, %xmm30 {%k7}
+
+// CHECK: vmovupd.s %xmm17, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x81,0xfd,0x8f,0x11,0xce]
+ vmovupd.s %xmm17, %xmm30 {%k7} {z}
+
+// CHECK: vmovupd.s %ymm28, %ymm24
+// CHECK: encoding: [0x62,0x01,0xfd,0x28,0x11,0xe0]
+ vmovupd.s %ymm28, %ymm24
+
+// CHECK: vmovupd.s %ymm28, %ymm24 {%k6}
+// CHECK: encoding: [0x62,0x01,0xfd,0x2e,0x11,0xe0]
+ vmovupd.s %ymm28, %ymm24 {%k6}
+
+// CHECK: vmovupd.s %ymm28, %ymm24 {%k6} {z}
+// CHECK: encoding: [0x62,0x01,0xfd,0xae,0x11,0xe0]
+ vmovupd.s %ymm28, %ymm24 {%k6} {z}
+
+// CHECK: vmovupd.s %ymm22, %ymm20
+// CHECK: encoding: [0x62,0xa1,0xfd,0x28,0x11,0xf4]
+ vmovupd.s %ymm22, %ymm20
+
+// CHECK: vmovupd.s %ymm22, %ymm20 {%k1}
+// CHECK: encoding: [0x62,0xa1,0xfd,0x29,0x11,0xf4]
+ vmovupd.s %ymm22, %ymm20 {%k1}
+
+// CHECK: vmovupd.s %ymm22, %ymm20 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0xfd,0xa9,0x11,0xf4]
+ vmovupd.s %ymm22, %ymm20 {%k1} {z}
+
+// CHECK: vmovups.s %xmm20, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7c,0x08,0x11,0xe5]
+ vmovups.s %xmm20, %xmm21
+
+// CHECK: vmovups.s %xmm20, %xmm21 {%k5}
+// CHECK: encoding: [0x62,0xa1,0x7c,0x0d,0x11,0xe5]
+ vmovups.s %xmm20, %xmm21 {%k5}
+
+// CHECK: vmovups.s %xmm20, %xmm21 {%k5} {z}
+// CHECK: encoding: [0x62,0xa1,0x7c,0x8d,0x11,0xe5]
+ vmovups.s %xmm20, %xmm21 {%k5} {z}
+
+// CHECK: vmovups.s %xmm19, %xmm28
+// CHECK: encoding: [0x62,0x81,0x7c,0x08,0x11,0xdc]
+ vmovups.s %xmm19, %xmm28
+
+// CHECK: vmovups.s %xmm19, %xmm28 {%k5}
+// CHECK: encoding: [0x62,0x81,0x7c,0x0d,0x11,0xdc]
+ vmovups.s %xmm19, %xmm28 {%k5}
+
+// CHECK: vmovups.s %xmm19, %xmm28 {%k5} {z}
+// CHECK: encoding: [0x62,0x81,0x7c,0x8d,0x11,0xdc]
+ vmovups.s %xmm19, %xmm28 {%k5} {z}
+
+// CHECK: vmovups.s %ymm19, %ymm19
+// CHECK: encoding: [0x62,0xa1,0x7c,0x28,0x11,0xdb]
+ vmovups.s %ymm19, %ymm19
+
+// CHECK: vmovups.s %ymm19, %ymm19 {%k5}
+// CHECK: encoding: [0x62,0xa1,0x7c,0x2d,0x11,0xdb]
+ vmovups.s %ymm19, %ymm19 {%k5}
+
+// CHECK: vmovups.s %ymm19, %ymm19 {%k5} {z}
+// CHECK: encoding: [0x62,0xa1,0x7c,0xad,0x11,0xdb]
+ vmovups.s %ymm19, %ymm19 {%k5} {z}
+
+// CHECK: vmovups.s %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x01,0x7c,0x28,0x11,0xee]
+ vmovups.s %ymm29, %ymm30
+
+// CHECK: vmovups.s %ymm29, %ymm30 {%k6}
+// CHECK: encoding: [0x62,0x01,0x7c,0x2e,0x11,0xee]
+ vmovups.s %ymm29, %ymm30 {%k6}
+
+// CHECK: vmovups.s %ymm29, %ymm30 {%k6} {z}
+// CHECK: encoding: [0x62,0x01,0x7c,0xae,0x11,0xee]
+ vmovups.s %ymm29, %ymm30 {%k6} {z}
+
+// CHECK: vmovups.s %xmm25, %xmm23
+// CHECK: encoding: [0x62,0x21,0x7c,0x08,0x11,0xcf]
+ vmovups.s %xmm25, %xmm23
+
+// CHECK: vmovups.s %xmm25, %xmm23 {%k4}
+// CHECK: encoding: [0x62,0x21,0x7c,0x0c,0x11,0xcf]
+ vmovups.s %xmm25, %xmm23 {%k4}
+
+// CHECK: vmovups.s %xmm25, %xmm23 {%k4} {z}
+// CHECK: encoding: [0x62,0x21,0x7c,0x8c,0x11,0xcf]
+ vmovups.s %xmm25, %xmm23 {%k4} {z}
+
+// CHECK: vmovups.s %xmm21, %xmm29
+// CHECK: encoding: [0x62,0x81,0x7c,0x08,0x11,0xed]
+ vmovups.s %xmm21, %xmm29
+
+// CHECK: vmovups.s %xmm21, %xmm29 {%k3}
+// CHECK: encoding: [0x62,0x81,0x7c,0x0b,0x11,0xed]
+ vmovups.s %xmm21, %xmm29 {%k3}
+
+// CHECK: vmovups.s %xmm21, %xmm29 {%k3} {z}
+// CHECK: encoding: [0x62,0x81,0x7c,0x8b,0x11,0xed]
+ vmovups.s %xmm21, %xmm29 {%k3} {z}
+
+// CHECK: vmovups.s %ymm28, %ymm30
+// CHECK: encoding: [0x62,0x01,0x7c,0x28,0x11,0xe6]
+ vmovups.s %ymm28, %ymm30
+
+// CHECK: vmovups.s %ymm28, %ymm30 {%k1}
+// CHECK: encoding: [0x62,0x01,0x7c,0x29,0x11,0xe6]
+ vmovups.s %ymm28, %ymm30 {%k1}
+
+// CHECK: vmovups.s %ymm28, %ymm30 {%k1} {z}
+// CHECK: encoding: [0x62,0x01,0x7c,0xa9,0x11,0xe6]
+ vmovups.s %ymm28, %ymm30 {%k1} {z}
+
+// CHECK: vmovups.s %ymm19, %ymm22
+// CHECK: encoding: [0x62,0xa1,0x7c,0x28,0x11,0xde]
+ vmovups.s %ymm19, %ymm22
+
+// CHECK: vmovups.s %ymm19, %ymm22 {%k1}
+// CHECK: encoding: [0x62,0xa1,0x7c,0x29,0x11,0xde]
+ vmovups.s %ymm19, %ymm22 {%k1}
+
+// CHECK: vmovups.s %ymm19, %ymm22 {%k1} {z}
+// CHECK: encoding: [0x62,0xa1,0x7c,0xa9,0x11,0xde]
+ vmovups.s %ymm19, %ymm22 {%k1} {z}
+
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index 096e90039abd..71f8557cde1c 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -413,7 +413,7 @@ fcmovae %st(1), %st(0) // CHECK: fcmovnb %st(1), %st(0)
fcmova %st(1), %st(0) // CHECK: fcmovnbe %st(1), %st(0)
// rdar://8456417
-.byte 88 + 1 & 15 // CHECK: .byte 9
+.byte (88 + 1) & 15 // CHECK: .byte 9
// rdar://8456412
mov %rdx, %cr0
@@ -1392,3 +1392,19 @@ vmovq %xmm0, %rax
// CHECK: seto 3735928559(%r10,%r9,8)
// CHECK: encoding: [0x43,0x0f,0x90,0x84,0xca,0xef,0xbe,0xad,0xde]
seto 0xdeadbeef(%r10,%r9,8)
+
+// CHECK: monitorx
+// CHECK: encoding: [0x0f,0x01,0xfa]
+ monitorx
+
+// CHECK: monitorx
+// CHECK: encoding: [0x0f,0x01,0xfa]
+ monitorx %rax, %rcx, %rdx
+
+// CHECK: mwaitx
+// CHECK: encoding: [0x0f,0x01,0xfb]
+ mwaitx
+
+// CHECK: mwaitx
+// CHECK: encoding: [0x0f,0x01,0xfb]
+ mwaitx %rax, %rcx, %rbx
diff --git a/test/MC/X86/x86-evenDirective.s b/test/MC/X86/x86-evenDirective.s
new file mode 100644
index 000000000000..6d6555e70776
--- /dev/null
+++ b/test/MC/X86/x86-evenDirective.s
@@ -0,0 +1,47 @@
+# RUN: llvm-mc -triple -x86_64-unknown-unknown -filetype obj -o - %s | llvm-readobj -s -sd \
+# RUN: | FileCheck %s
+
+ .text
+ even_check:
+ .byte 0x00
+ .byte 0x01
+ .byte 0x02
+ .byte 0x03
+ .byte 0x04
+ .byte 0x05
+ .byte 0x06
+ .byte 0x07
+ .byte 0x08
+ .byte 0x09
+ .byte 0x10
+ .even
+ .byte 0x11
+ .byte 0x12
+ .even
+ .byte 0x13
+ .even
+ .byte 0x00
+ .byte 0x01
+ .byte 0x02
+ .byte 0x03
+ .byte 0x04
+ .byte 0x05
+ .byte 0x06
+ .byte 0x07
+ .byte 0x08
+ .byte 0x09
+ .byte 0x10
+ .byte 0x11
+ .byte 0x12
+ .byte 0x13
+ .byte 0x14
+ .byte 0x15
+
+# CHECK: Section {
+# CHECK: Name: .text
+# CHECK: SectionData (
+# CHECK: 0000: 00010203 04050607 08091090 11121390
+# CHECK: 0010: 00010203 04050607 08091011 12131415
+# CHECK: )
+# CHECK: }
+
diff --git a/test/MC/X86/x86_nop.s b/test/MC/X86/x86_nop.s
index 572487bfdaca..feac4e4cf039 100644
--- a/test/MC/X86/x86_nop.s
+++ b/test/MC/X86/x86_nop.s
@@ -22,13 +22,7 @@ inc %eax
inc %eax
// CHECK: 0: 40 incl %eax
-// CHECK: 1: 90 nop
-// CHECK: 2: 90 nop
-// CHECK: 3: 90 nop
-// CHECK: 4: 90 nop
-// CHECK: 5: 90 nop
-// CHECK: 6: 90 nop
-// CHECK: 7: 90 nop
+// CHECK: 1: 8d b4 26 00 00 00 00 leal (%esi), %esi
// CHECK: 8: 40 incl %eax
diff --git a/test/Makefile b/test/Makefile
index 558762e39dc2..03194520caa2 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -134,6 +134,7 @@ lit.site.cfg: FORCE
@$(ECHOPATH) s!@OCAMLFLAGS@!$(addprefix -cclib ,$(LDFLAGS))!g >> lit.tmp
@$(ECHOPATH) s=@HAVE_OCAMLOPT@=$(HAVE_OCAMLOPT)=g >> lit.tmp
@$(ECHOPATH) s=@HAVE_OCAML_OUNIT@=$(HAVE_OCAML_OUNIT)=g >> lit.tmp
+ @$(ECHOPATH) s=@LLVM_INCLUDE_GO_TESTS@=ON=g >> lit.tmp
@$(ECHOPATH) s=@GO_EXECUTABLE@=$(GO)=g >> lit.tmp
@$(ECHOPATH) s!@HOST_CC@!$(CC)!g >> lit.tmp
@$(ECHOPATH) s!@HOST_CXX@!$(CXX)!g >> lit.tmp
@@ -146,6 +147,8 @@ lit.site.cfg: FORCE
@$(ECHOPATH) s=@HOST_ARCH@=$(HOST_ARCH)=g >> lit.tmp
@$(ECHOPATH) s=@HAVE_LIBZ@=$(HAVE_LIBZ)=g >> lit.tmp
@$(ECHOPATH) s=@HAVE_DIA_SDK@=0=g >> lit.tmp
+ @$(ECHOPATH) s=@ENABLE_EXAMPLES@=$(BUILD_EXAMPLES)=g >> lit.tmp
+ @$(ECHOPATH) s=@ENABLE_TIMESTAMPS@=$(ENABLE_TIMESTAMPS)=g >> lit.tmp
@sed -f lit.tmp $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
@-rm -f lit.tmp
diff --git a/test/Object/AMDGPU/elf-definitios.yaml b/test/Object/AMDGPU/elf-definitios.yaml
new file mode 100644
index 000000000000..5f5e200b49b3
--- /dev/null
+++ b/test/Object/AMDGPU/elf-definitios.yaml
@@ -0,0 +1,27 @@
+# RUN: yaml2obj -format=elf %s > %t.o
+# RUN: llvm-readobj -s -file-headers %t.o | FileCheck %s
+
+# CHECK: Machine: EM_AMDGPU (0xE0)
+# CHECK: Sections [
+# CHECK: Section {
+# CHECK: Name: .shf_amdgpu
+# CHECK: Flags [ (0xF00000)
+# CHECK: SHF_AMDGPU_HSA_AGENT (0x800000)
+# CHECK: SHF_AMDGPU_HSA_CODE (0x400000)
+# CHECK: SHF_AMDGPU_HSA_GLOBAL (0x100000)
+# CHECK: SHF_AMDGPU_HSA_READONLY (0x200000)
+# CHECK: }
+
+---
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_REL
+ Machine: EM_AMDGPU
+
+Sections:
+ - Name: .shf_amdgpu
+ Type: SHT_PROGBITS
+ Flags: [ SHF_AMDGPU_HSA_GLOBAL, SHF_AMDGPU_HSA_READONLY,
+ SHF_AMDGPU_HSA_CODE, SHF_AMDGPU_HSA_AGENT]
+...
diff --git a/test/Object/Inputs/coff-short-import-code b/test/Object/Inputs/coff-short-import-code
new file mode 100644
index 000000000000..446279037b53
--- /dev/null
+++ b/test/Object/Inputs/coff-short-import-code
Binary files differ
diff --git a/test/Object/Inputs/coff-short-import-data b/test/Object/Inputs/coff-short-import-data
new file mode 100644
index 000000000000..71b635ba1920
--- /dev/null
+++ b/test/Object/Inputs/coff-short-import-data
Binary files differ
diff --git a/test/Object/Inputs/corrupt-invalid-dynamic-table-offset.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-dynamic-table-offset.elf.x86-64
new file mode 100755
index 000000000000..d164d8243d5c
--- /dev/null
+++ b/test/Object/Inputs/corrupt-invalid-dynamic-table-offset.elf.x86-64
Binary files differ
diff --git a/test/Object/Inputs/corrupt-invalid-dynamic-table-size.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-dynamic-table-size.elf.x86-64
new file mode 100755
index 000000000000..ee29a4162e39
--- /dev/null
+++ b/test/Object/Inputs/corrupt-invalid-dynamic-table-size.elf.x86-64
Binary files differ
diff --git a/test/Object/Inputs/corrupt-invalid-dynamic-table-too-large.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-dynamic-table-too-large.elf.x86-64
new file mode 100755
index 000000000000..122194ae6f0d
--- /dev/null
+++ b/test/Object/Inputs/corrupt-invalid-dynamic-table-too-large.elf.x86-64
Binary files differ
diff --git a/test/Object/Inputs/corrupt-invalid-phentsize.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-phentsize.elf.x86-64
new file mode 100755
index 000000000000..148d4c5fd64b
--- /dev/null
+++ b/test/Object/Inputs/corrupt-invalid-phentsize.elf.x86-64
Binary files differ
diff --git a/test/Object/Inputs/corrupt-invalid-relocation-size.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-relocation-size.elf.x86-64
new file mode 100755
index 000000000000..2798ab34e174
--- /dev/null
+++ b/test/Object/Inputs/corrupt-invalid-relocation-size.elf.x86-64
Binary files differ
diff --git a/test/Object/Inputs/corrupt-invalid-strtab.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-strtab.elf.x86-64
new file mode 100755
index 000000000000..bdaa3cf92082
--- /dev/null
+++ b/test/Object/Inputs/corrupt-invalid-strtab.elf.x86-64
Binary files differ
diff --git a/test/Object/Inputs/corrupt-invalid-virtual-addr.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-virtual-addr.elf.x86-64
new file mode 100755
index 000000000000..58d995933eca
--- /dev/null
+++ b/test/Object/Inputs/corrupt-invalid-virtual-addr.elf.x86-64
Binary files differ
diff --git a/test/Object/Inputs/invalid-symbol-table-size.elf b/test/Object/Inputs/invalid-symbol-table-size.elf
new file mode 100755
index 000000000000..8329abbbd712
--- /dev/null
+++ b/test/Object/Inputs/invalid-symbol-table-size.elf
Binary files differ
diff --git a/test/Object/Inputs/invalid-xindex-size.elf b/test/Object/Inputs/invalid-xindex-size.elf
new file mode 100644
index 000000000000..2852b85ca042
--- /dev/null
+++ b/test/Object/Inputs/invalid-xindex-size.elf
Binary files differ
diff --git a/test/Object/Inputs/main-ret-zero-pe-i386.dll b/test/Object/Inputs/main-ret-zero-pe-i386.dll
new file mode 100755
index 000000000000..d4a343088b17
--- /dev/null
+++ b/test/Object/Inputs/main-ret-zero-pe-i386.dll
Binary files differ
diff --git a/test/Object/Inputs/main-ret-zero-pe-i386.exe b/test/Object/Inputs/main-ret-zero-pe-i386.exe
new file mode 100755
index 000000000000..366767b9b773
--- /dev/null
+++ b/test/Object/Inputs/main-ret-zero-pe-i386.exe
Binary files differ
diff --git a/test/Object/Inputs/no-section-header-string-table.elf-x86-64 b/test/Object/Inputs/no-section-header-string-table.elf-x86-64
new file mode 100644
index 000000000000..9024ad9b7573
--- /dev/null
+++ b/test/Object/Inputs/no-section-header-string-table.elf-x86-64
Binary files differ
diff --git a/test/Object/Inputs/pr25877.lib b/test/Object/Inputs/pr25877.lib
new file mode 100644
index 000000000000..401cbeb172a9
--- /dev/null
+++ b/test/Object/Inputs/pr25877.lib
Binary files differ
diff --git a/test/Object/Inputs/rel-no-sec-table.elf-x86-64 b/test/Object/Inputs/rel-no-sec-table.elf-x86-64
new file mode 100755
index 000000000000..d527305d6597
--- /dev/null
+++ b/test/Object/Inputs/rel-no-sec-table.elf-x86-64
Binary files differ
diff --git a/test/Object/Inputs/shndx.elf b/test/Object/Inputs/shndx.elf
new file mode 100644
index 000000000000..f1e0a3a8df7d
--- /dev/null
+++ b/test/Object/Inputs/shndx.elf
Binary files differ
diff --git a/test/Object/Inputs/trivial-object-test.elf-avr b/test/Object/Inputs/trivial-object-test.elf-avr
new file mode 100755
index 000000000000..e0ff8a3d5569
--- /dev/null
+++ b/test/Object/Inputs/trivial-object-test.elf-avr
Binary files differ
diff --git a/test/Object/X86/nm-ir.ll b/test/Object/X86/nm-ir.ll
index 881397c00a42..d517b09d3e92 100644
--- a/test/Object/X86/nm-ir.ll
+++ b/test/Object/X86/nm-ir.ll
@@ -27,8 +27,8 @@ module asm ".long undef_asm_sym"
@g3 = common global i32 0
@g4 = private global i32 42
-@a1 = alias i32* @g1
-@a2 = internal alias i32* @g1
+@a1 = alias i32, i32* @g1
+@a2 = internal alias i32, i32* @g1
define void @f1() {
ret void
diff --git a/test/Object/archive-format.test b/test/Object/archive-format.test
index 40af9a33d8d7..651a9b9ef157 100644
--- a/test/Object/archive-format.test
+++ b/test/Object/archive-format.test
@@ -6,17 +6,26 @@ RUN: cd %t
RUN: echo -n bar. > 0123456789abcde
RUN: echo -n zed. > 0123456789abcdef
+RUN: mkdir -p foo
+RUN: echo -n bar2 > foo/0123456789abcde
+RUN: echo -n zed2 > foo/0123456789abcdef
RUN: rm -f %t.a
-RUN: llvm-ar --format=gnu rc %t.a 0123456789abcde 0123456789abcdef
+RUN: llvm-ar --format=gnu rc %t.a 0123456789abcde 0123456789abcdef foo/0123456789abcde foo/0123456789abcdef
RUN: cat %t.a | FileCheck -strict-whitespace %s
CHECK: !<arch>
-CHECK-NEXT: // 18 `
+CHECK-NEXT: // 36 `
+CHECK-NEXT: 0123456789abcdef/
CHECK-NEXT: 0123456789abcdef/
CHECK-NEXT: 0123456789abcde/0 0 0 644 4 `
-CHECK-NEXT: bar./0 0 0 0 644 4 `
+CHECK-NEXT: bar.
+CHECK-SAME: /0 0 0 0 644 4 `
CHECK-NEXT: zed.
+CHECK-SAME: 0123456789abcde/0 0 0 644 4 `
+CHECK-NEXT: bar2
+CHECK-SAME: /18 0 0 0 644 4 `
+CHECK-NEXT: zed2
RUN: rm -f %t.a
RUN: llvm-ar --format=bsd rc %t.a 0123456789abcde 0123456789abcdef
@@ -27,3 +36,29 @@ BSD-NEXT: #1/20 0 0 0 644 24 `
BSD-NEXT: 0123456789abcde{{.....}}bar.
BSD-SAME: #1/16 0 0 0 644 20 `
BSD-NEXT: 0123456789abcdefzed.
+
+RUN: rm -f test.a
+RUN: llvm-ar --format=gnu rcT test.a 0123456789abcde 0123456789abcdef
+RUN: cat test.a | FileCheck -strict-whitespace --check-prefix=THIN %s
+THIN: !<thin>
+THIN-NEXT: // 36 `
+THIN-NEXT: 0123456789abcde/
+THIN-NEXT: 0123456789abcdef/{{$}}
+THIN: {{^$}}
+THIN: /0 0 0 0 644 4 `
+THIN-NEXT: /17 0 0 0 644 4 `
+
+RUN: mkdir -p bar
+RUN: rm -f bar/test.a
+RUN: llvm-ar --format=gnu rcT bar/test.a 0123456789abcde 0123456789abcdef foo/0123456789abcde foo/0123456789abcdef
+RUN: cat bar/test.a | FileCheck -strict-whitespace --check-prefix=THIN-PATH %s
+THIN-PATH: !<thin>
+THIN-PATH-NEXT: // 90 `
+THIN-PATH-NEXT: ..{{/|\\}}0123456789abcde/
+THIN-PATH-NEXT: ..{{/|\\}}0123456789abcdef/
+THIN-PATH-NEXT: ..{{/|\\}}foo{{/|\\}}0123456789abcde/
+THIN-PATH-NEXT: ..{{/|\\}}foo{{/|\\}}0123456789abcdef/
+THIN-PATH-NEXT: /0 0 0 0 644 4 `
+THIN-PATH-NEXT: /20 0 0 0 644 4 `
+THIN-PATH-NEXT: /41 0 0 0 644 4 `
+THIN-PATH-NEXT: /65 0 0 0 644 4 `
diff --git a/test/Object/archive-symtab.test b/test/Object/archive-symtab.test
index 6e4c76fb3768..6bad783a8c82 100644
--- a/test/Object/archive-symtab.test
+++ b/test/Object/archive-symtab.test
@@ -8,6 +8,17 @@ CHECK-NEXT: foo in trivial-object-test2.elf-x86-64
CHECK-NEXT: main in trivial-object-test2.elf-x86-64
CHECK-NOT: bar
+
+RUN: rm -f %t.a
+RUN: llvm-ar --format=gnu rcT %t.a %p/Inputs/trivial-object-test.elf-x86-64 %p/Inputs/trivial-object-test2.elf-x86-64
+RUN: llvm-nm -M %t.a | FileCheck --check-prefix=THIN %s
+
+THIN: Archive map
+THIN-NEXT: main in {{.*}}/Inputs/trivial-object-test.elf-x86-64
+THIN-NEXT: foo in {{.*}}/Inputs/trivial-object-test2.elf-x86-64
+THIN-NEXT: main in {{.*}}/Inputs/trivial-object-test2.elf-x86-64
+
+
CHECK: trivial-object-test.elf-x86-64:
CHECK-NEXT: U SomeOtherFunction
CHECK-NEXT: 0000000000000000 T main
@@ -75,7 +86,7 @@ MACHO: Archive map
MACHO-NEXT: _main in trivial-object-test.macho-x86-64
MACHO-NEXT: _foo in trivial-object-test2.macho-x86-64
MACHO-NEXT: _main in trivial-object-test2.macho-x86-64
-MACHO-NOT: bar
+MACHO-NOT: {{^}}bar
MACHO: trivial-object-test.macho-x86-64
MACHO-NEXT: 0000000000000028 s L_.str
@@ -88,6 +99,16 @@ MACHO-NEXT: 0000000000000000 t _bar
MACHO-NEXT: 0000000000000001 T _foo
MACHO-NEXT: 0000000000000002 T _main
+RUN: rm -f %t.a
+RUN: llvm-ar --format=gnu rcsU %t.a %p/Inputs/coff-short-import-code %p/Inputs/coff-short-import-data
+RUN: llvm-nm -M %t.a | FileCheck --check-prefix=COFF-SHORT-IMPORT %s
+
+COFF-SHORT-IMPORT: Archive map
+COFF-SHORT-IMPORT-NEXT: __imp__foo in coff-short-import-code
+COFF-SHORT-IMPORT-NEXT: _foo in coff-short-import-code
+COFF-SHORT-IMPORT-NEXT: __imp__bar in coff-short-import-data
+COFF-SHORT-IMPORT-NOT: _bar in coff-short-import-data
+
Test that we pad the symbol table so that it ends in a multiple of 4 bytes:
8 + 60 + 36 == 104
RUN: rm -f %t.a
diff --git a/test/Object/archive-update.test b/test/Object/archive-update.test
index 147db90ba951..fd1ea4113c39 100644
--- a/test/Object/archive-update.test
+++ b/test/Object/archive-update.test
@@ -7,12 +7,13 @@ Create a file named evenlen that is newer than the evenlen on the source dir.
RUN: mkdir -p %t.older
RUN: echo older > %t.older/evenlen
+RUN: mkdir -p %t.newer
+
Either the shell supports the 'touch' command with a flag to manually set the
mtime or we sleep for over two seconds so that the mtime is definitely
observable.
RUN: touch -m -t 200001010000 %t.older/evenlen || sleep 2.1
-RUN: mkdir -p %t.newer
RUN: echo newer > %t.newer/evenlen
RUN: touch %t.newer/evenlen
@@ -34,3 +35,17 @@ RUN: llvm-ar p %t.a | FileCheck --check-prefix=NEWER %s
NEWER: newer
OLDER: older
+
+RUN: rm -f %t.a
+RUN: echo foo > foo
+RUN: echo bar > bar
+RUN: llvm-ar --format=gnu rcT %t.a foo
+RUN: llvm-ar --format=gnu rcT %t.a bar
+RUN: llvm-ar t %t.a | FileCheck --check-prefix=BOTH-FILES %s
+BOTH-FILES: foo
+BOTH-FILES: bar
+
+RUN: rm -f %t.a
+RUN: llvm-ar --format=gnu rc %t.a foo
+RUN: not llvm-ar --format=gnu rcT %t.a bar 2>&1 | FileCheck --check-prefix=ERROR %s
+ERROR: Cannot convert a regular archive to a thin one.
diff --git a/test/Object/corrupt.test b/test/Object/corrupt.test
index ef72a0979b52..0d9aad378f88 100644
--- a/test/Object/corrupt.test
+++ b/test/Object/corrupt.test
@@ -2,23 +2,71 @@
RUN: not llvm-readobj %p/Inputs/corrupt.elf-x86-64 -sections \
RUN: 2>&1 | FileCheck --check-prefix=SECNAME %s
+SECNAME: Error reading file: Invalid data was encountered while parsing the file.
+
+
// Section data offset past end of file.
RUN: not llvm-readobj %p/Inputs/corrupt.elf-x86-64 -sections -section-data \
RUN: 2>&1 | FileCheck --check-prefix=SECDATA %s
+SECDATA: Error reading file: Invalid data was encountered while parsing the file.
+
+
// Symbol name offset overflows string table.
RUN: not llvm-readobj %p/Inputs/corrupt.elf-x86-64 -symbols \
RUN: 2>&1 | FileCheck --check-prefix=SYMNAME %s
+SYMNAME: Error reading file: Invalid data was encountered while parsing the file.
+
+
// Version index in .gnu.version overflows the version map.
RUN: not llvm-readobj %p/Inputs/corrupt-version.elf-x86_64 -dt \
RUN: 2>&1 | FileCheck --check-prefix=VER %s
-SECNAME: Error reading file: Invalid data was encountered while parsing the file.
+VER: Error reading file: Invalid version entry.
-SECDATA: Error reading file: Invalid data was encountered while parsing the file.
-SECDATA: Error reading file: Invalid data was encountered while parsing the file.
-SYMNAME: Error reading file: Invalid data was encountered while parsing the file.
+// The file is missing the dynamic string table but has references to it.
+RUN: not llvm-readobj -dynamic-table %p/Inputs/corrupt-invalid-strtab.elf.x86-64 \
+RUN: 2>&1 | FileCheck --check-prefix=STRTAB %s
+
+STRTAB: Invalid dynamic string table reference
+
+RUN: not llvm-readobj -program-headers \
+RUN: %p/Inputs/corrupt-invalid-phentsize.elf.x86-64 2>&1 | \
+RUN: FileCheck --check-prefix=PHENTSIZE %s
+
+PHENTSIZE: Invalid program header size
+
+RUN: not llvm-readobj -dynamic-table \
+RUN: %p/Inputs/corrupt-invalid-virtual-addr.elf.x86-64 2>&1 | \
+RUN: FileCheck --check-prefix=VIRTADDR %s
+
+VIRTADDR: Virtual address is not in any segment
+
+
+RUN: not llvm-readobj -dyn-relocations \
+RUN: %p/Inputs/corrupt-invalid-relocation-size.elf.x86-64 2>&1 | \
+RUN: FileCheck --check-prefix=RELOC %s
+
+RELOC: Invalid relocation entry size
+
+RUN: not llvm-readobj -dyn-relocations \
+RUN: %p/Inputs/corrupt-invalid-dynamic-table-size.elf.x86-64 2>&1 | \
+RUN: FileCheck --check-prefix=DYN-TABLE-SIZE %s
+
+DYN-TABLE-SIZE: Invalid dynamic table size
+
+
+RUN: not llvm-readobj -dyn-relocations \
+RUN: %p/Inputs/corrupt-invalid-dynamic-table-offset.elf.x86-64 2>&1 | \
+RUN: FileCheck --check-prefix=DYN-TABLE-OFFSET %s
+
+DYN-TABLE-OFFSET: Invalid data was encountered while parsing the file.
+
+
+RUN: not llvm-readobj -dyn-relocations \
+RUN: %p/Inputs/corrupt-invalid-dynamic-table-too-large.elf.x86-64 2>&1 | \
+RUN: FileCheck --check-prefix=DYN-TABLE-TOO-LARGE %s
-VER: Error reading file: Invalid data was encountered while parsing the file.
+DYN-TABLE-TOO-LARGE: Invalid data was encountered while parsing the file.
diff --git a/test/Object/invalid.test b/test/Object/invalid.test
index 1d5a70b3487d..cc5cd68304c6 100644
--- a/test/Object/invalid.test
+++ b/test/Object/invalid.test
@@ -44,3 +44,11 @@ INVALID-SECTION-INDEX: Invalid section index
RUN: not llvm-readobj -s %p/Inputs/invalid-section-size.elf 2>&1 | FileCheck --check-prefix=INVALID-SECTION-SIZE %s
INVALID-SECTION-SIZE: Invalid section header entry size (e_shentsize) in ELF header
+
+
+RUN: not llvm-readobj -t %p/Inputs/invalid-symbol-table-size.elf 2>&1 | FileCheck --check-prefix=INVALID-SYMTAB-SIZE %s
+INVALID-SYMTAB-SIZE: Invalid symbol table size
+
+
+RUN: not llvm-readobj -t %p/Inputs/invalid-xindex-size.elf 2>&1 | FileCheck --check-prefix=INVALID-XINDEX-SIZE %s
+INVALID-XINDEX-SIZE: Invalid data was encountered while parsing the file.
diff --git a/test/Object/nm-archive.test b/test/Object/nm-archive.test
index 4cd58d33cf28..8f8cdbe9f08c 100644
--- a/test/Object/nm-archive.test
+++ b/test/Object/nm-archive.test
@@ -58,3 +58,12 @@ GNU AR is able to parse the unaligned member and warns about the member with
the unknown format. We should probably simply warn on both. For now just check
that we don't produce an error.
RUN: llvm-nm %p/Inputs/corrupt-archive.a
+
+
+RUN: llvm-nm %p/Inputs/thin.a | FileCheck %s -check-prefix THIN
+
+THIN: IsNAN.o:
+THIN: 00000014 T _ZN4llvm5IsNANEd
+THIN: 00000000 T _ZN4llvm5IsNANEf
+THIN: U __isnan
+THIN: U __isnanf
diff --git a/test/Object/nm-pe-image.test b/test/Object/nm-pe-image.test
new file mode 100644
index 000000000000..d1ca5845ef61
--- /dev/null
+++ b/test/Object/nm-pe-image.test
@@ -0,0 +1,31 @@
+The executable was generated like so:
+$ cat t.c
+int main() { return 0; }
+$ clang --target=i686-windows -c t.c -o t.o
+$ lld-link t.o -out:t.exe -entry:main -debug
+
+It has a mingw-style symbol table in the executable, which MSVC-produced images
+don't have.
+
+RUN: llvm-nm %p/Inputs/main-ret-zero-pe-i386.exe \
+RUN: | FileCheck %s -check-prefix PE-EXE
+
+PE-EXE: 00401000 t .bss
+PE-EXE: 00401000 t .data
+PE-EXE: 00401000 t .text
+PE-EXE: 00401000 T _main
+
+The DLL was generated like so:
+$ cat t.c
+int DllMain(void *mod, long reason, void *reserved) { return 1; }
+$ clang --target=i686-windows -c t.c -o t.o
+$ lld-link t.o -out:t.exe -entry:DllMain -debug -dll
+
+
+RUN: llvm-nm %p/Inputs/main-ret-zero-pe-i386.dll \
+RUN: | FileCheck %s -check-prefix PE-DLL
+
+PE-DLL: 10001000 t .bss
+PE-DLL: 10001000 t .data
+PE-DLL: 10001000 t .text
+PE-DLL: 10001000 T _DllMain
diff --git a/test/Object/no-section-header-string-table.test b/test/Object/no-section-header-string-table.test
new file mode 100644
index 000000000000..1f449328cabc
--- /dev/null
+++ b/test/Object/no-section-header-string-table.test
@@ -0,0 +1,10 @@
+RUN: llvm-readobj %p/Inputs/no-section-header-string-table.elf-x86-64 --sections \
+RUN: | FileCheck %s
+
+CHECK: Type: SHT_PROGBITS (0x1)
+CHECK: Type: SHT_PROGBITS (0x1)
+CHECK: Type: SHT_PROGBITS (0x1)
+CHECK: Type: SHT_RELA (0x4)
+CHECK: Type: SHT_SYMTAB (0x2)
+CHECK: Type: SHT_STRTAB (0x3)
+CHECK: Type: SHT_STRTAB (0x3)
diff --git a/test/Object/obj2yaml.test b/test/Object/obj2yaml.test
index 8054b23eb560..60d46a83a52e 100644
--- a/test/Object/obj2yaml.test
+++ b/test/Object/obj2yaml.test
@@ -3,6 +3,7 @@ RUN: obj2yaml %p/Inputs/trivial-object-test.coff-x86-64 | FileCheck %s --check-p
RUN: obj2yaml %p/Inputs/trivial-object-test.elf-mipsel | FileCheck %s --check-prefix ELF-MIPSEL
RUN: obj2yaml %p/Inputs/trivial-object-test.elf-mips64el | FileCheck %s --check-prefix ELF-MIPS64EL
RUN: obj2yaml %p/Inputs/trivial-object-test.elf-x86-64 | FileCheck %s --check-prefix ELF-X86-64
+RUN: obj2yaml %p/Inputs/trivial-object-test.elf-avr | FileCheck %s --check-prefix ELF-AVR
RUN: obj2yaml %p/Inputs/unwind-section.elf-x86-64 \
RUN: | FileCheck %s --check-prefix ELF-X86-64-UNWIND
@@ -409,8 +410,72 @@ ELF-X86-64-NEXT: Size: 0x0000000000000026
ELF-X86-64-NEXT: - Name: SomeOtherFunction
ELF-X86-64-NEXT: - Name: puts
+
+ELF-AVR: FileHeader:
+ELF-AVR-NEXT: Class: ELFCLASS32
+ELF-AVR-NEXT: Data: ELFDATA2LSB
+ELF-AVR-NEXT: Type: ET_EXEC
+ELF-AVR-NEXT: Machine: EM_AVR
+ELF-AVR-NEXT: Flags: [ EF_AVR_ARCH_AVR2 ]
+ELF-AVR-NEXT: Sections:
+ELF-AVR-NEXT: - Name: .text
+ELF-AVR-NEXT: Type: SHT_PROGBITS
+ELF-AVR-NEXT: Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ELF-AVR-NEXT: AddressAlign: 0x0000000000000002
+ELF-AVR-NEXT: Content: C20E0895
+ELF-AVR-NEXT: - Name: .data
+ELF-AVR-NEXT: Type: SHT_PROGBITS
+ELF-AVR-NEXT: Flags: [ SHF_WRITE, SHF_ALLOC ]
+ELF-AVR-NEXT: Address: 0x0000000000800060
+ELF-AVR-NEXT: AddressAlign: 0x0000000000000001
+ELF-AVR-NEXT: Content: ''
+ELF-AVR-NEXT: Symbols:
+ELF-AVR-NEXT: Local:
+ELF-AVR-NEXT: - Type: STT_SECTION
+ELF-AVR-NEXT: Section: .text
+ELF-AVR-NEXT: - Type: STT_SECTION
+ELF-AVR-NEXT: Section: .data
+ELF-AVR-NEXT: Value: 0x0000000000800060
+ELF-AVR-NEXT: - Name: a.o
+ELF-AVR-NEXT: Type: STT_FILE
+ELF-AVR-NEXT: - Name: main
+ELF-AVR-NEXT: Section: .text
+ELF-AVR-NEXT: Global:
+ELF-AVR-NEXT: - Name: __trampolines_start
+ELF-AVR-NEXT: Section: .text
+ELF-AVR-NEXT: - Name: _etext
+ELF-AVR-NEXT: Section: .text
+ELF-AVR-NEXT: Value: 0x0000000000000004
+ELF-AVR-NEXT: - Name: __data_load_end
+ELF-AVR-NEXT: Value: 0x0000000000000004
+ELF-AVR-NEXT: - Name: __trampolines_end
+ELF-AVR-NEXT: Section: .text
+ELF-AVR-NEXT: - Name: __data_load_start
+ELF-AVR-NEXT: Value: 0x0000000000000004
+ELF-AVR-NEXT: - Name: __dtors_end
+ELF-AVR-NEXT: Section: .text
+ELF-AVR-NEXT: - Name: __eeprom_end
+ELF-AVR-NEXT: Section: .data
+ELF-AVR-NEXT: Value: 0x0000000000810000
+ELF-AVR-NEXT: - Name: __ctors_start
+ELF-AVR-NEXT: Section: .text
+ELF-AVR-NEXT: - Name: __dtors_start
+ELF-AVR-NEXT: Section: .text
+ELF-AVR-NEXT: - Name: __ctors_end
+ELF-AVR-NEXT: Section: .text
+ELF-AVR-NEXT: - Name: _edata
+ELF-AVR-NEXT: Section: .data
+ELF-AVR-NEXT: Value: 0x0000000000800060
+ELF-AVR-NEXT: - Name: _end
+ELF-AVR-NEXT: Section: .data
+ELF-AVR-NEXT: Value: 0x0000000000800060
+
+
ELF-X86-64-UNWIND: - Name: .eh_frame
ELF-X86-64-UNWIND-NEXT: Type: SHT_X86_64_UNWIND
ELF-X86-64-UNWIND-NEXT: Flags: [ SHF_ALLOC ]
ELF-X86-64-UNWIND-NEXT: AddressAlign: 0x0000000000000001
ELF-X86-64-UNWIND-NEXT: Content: ''
+
+RUN: not obj2yaml %t.blah 2>&1 | FileCheck --check-prefix=ENOENT %s
+ENOENT: Error: '{{[Nn]}}o such file or directory'
diff --git a/test/Object/objdump-shndx.test b/test/Object/objdump-shndx.test
new file mode 100644
index 000000000000..dcdb3c2f0931
--- /dev/null
+++ b/test/Object/objdump-shndx.test
@@ -0,0 +1,8 @@
+RUN: llvm-objdump -r %p/Inputs/shndx.elf | FileCheck %s
+
+Test that llvm-objdump can handle shndx. The relocation points to a section
+symbol that has st_shndx == SHN_XINDEX. To print the section name llvm-objdump
+has to use the shndx section.
+
+CHECK: RELOCATION RECORDS FOR [.rela.text]:
+CHECK-NEXT: 0000000000000000 R_X86_64_32 bar+0
diff --git a/test/Object/pr25877.test b/test/Object/pr25877.test
new file mode 100644
index 000000000000..c323d6259fa4
--- /dev/null
+++ b/test/Object/pr25877.test
@@ -0,0 +1,9 @@
+; RUN: echo create %t.a > %t.mri
+; RUN: echo addlib %p/Inputs/pr25877.lib >> %t.mri
+; RUN: echo save >> %t.mri
+; RUN: echo end >> %t.mri
+
+; RUN: llvm-ar -M < %t.mri
+; RUN: llvm-ar t %t.a | FileCheck %s
+
+; CHECK: foo.obj
diff --git a/test/Object/readobj-absent.test b/test/Object/readobj-absent.test
new file mode 100644
index 000000000000..0968f3c31022
--- /dev/null
+++ b/test/Object/readobj-absent.test
@@ -0,0 +1,2 @@
+// Don't crash if required information is absent
+RUN: llvm-readobj -dyn-symbols %p/Inputs/trivial-object-test.elf-x86-64
diff --git a/test/Object/readobj-shared-object.test b/test/Object/readobj-shared-object.test
index 508caca9717d..173581e60c39 100644
--- a/test/Object/readobj-shared-object.test
+++ b/test/Object/readobj-shared-object.test
@@ -300,18 +300,32 @@ ELF: Section: Absolute (0xFFF1)
ELF: }
ELF: ]
-ELF: DynamicSection [ (9 entries)
-ELF: Tag Type Name/Value
-ELF: 00000001 NEEDED SharedLibrary (libc.so.6)
-ELF: 00000001 NEEDED SharedLibrary (libm.so.6)
-ELF: 0000000E SONAME LibrarySoname (libfoo.so)
-ELF: 00000004 HASH {{[0-9a-f]+}}
-ELF: 00000005 STRTAB {{[0-9a-f]+}}
-ELF: 00000006 SYMTAB {{[0-9a-f]+}}
-ELF: 0000000A STRSZ {{[0-9]+}} (bytes)
-ELF: 0000000B SYMENT {{[0-9]+}} (bytes)
-ELF: 00000000 NULL 0x0
-ELF: ]
+ELF32: DynamicSection [ (9 entries)
+ELF32: Tag Type Name/Value
+ELF32: 0x00000001 NEEDED SharedLibrary (libc.so.6)
+ELF32: 0x00000001 NEEDED SharedLibrary (libm.so.6)
+ELF32: 0x0000000E SONAME LibrarySoname (libfoo.so)
+ELF32: 0x00000004 HASH {{[0-9a-f]+}}
+ELF32: 0x00000005 STRTAB {{[0-9a-f]+}}
+ELF32: 0x00000006 SYMTAB {{[0-9a-f]+}}
+ELF32: 0x0000000A STRSZ {{[0-9]+}} (bytes)
+ELF32: 0x0000000B SYMENT {{[0-9]+}} (bytes)
+ELF32: 0x00000000 NULL 0x0
+ELF32: ]
+
+ELF64: DynamicSection [ (9 entries)
+ELF64: Tag Type Name/Value
+ELF64: 0x0000000000000001 NEEDED SharedLibrary (libc.so.6)
+ELF64: 0x0000000000000001 NEEDED SharedLibrary (libm.so.6)
+ELF64: 0x000000000000000E SONAME LibrarySoname (libfoo.so)
+ELF64: 0x0000000000000004 HASH {{[0-9a-f]+}}
+ELF64: 0x0000000000000005 STRTAB {{[0-9a-f]+}}
+ELF64: 0x0000000000000006 SYMTAB {{[0-9a-f]+}}
+ELF64: 0x000000000000000A STRSZ {{[0-9]+}} (bytes)
+ELF64: 0x000000000000000B SYMENT {{[0-9]+}} (bytes)
+ELF64: 0x0000000000000000 NULL 0x0
+ELF64: ]
+
ELF: NeededLibraries [
ELF-NEXT: libc.so.6
diff --git a/test/Object/relocation-executable.test b/test/Object/relocation-executable.test
index 38ad5968af8d..93d4dee3089b 100644
--- a/test/Object/relocation-executable.test
+++ b/test/Object/relocation-executable.test
@@ -35,3 +35,15 @@ RUN: %p/Inputs/hello-world.elf-x86-64 | FileCheck %s --check-prefix=DYN
// DYN-NEXT: Addend: 0x0
// DYN-NEXT: }
// DYN-NEXT: }
+
+RUN: llvm-readobj -dyn-relocations -expand-relocs \
+RUN: %p/Inputs/rel-no-sec-table.elf-x86-64 | FileCheck %s --check-prefix=DYN2
+
+// DYN2: Dynamic Relocations {
+// DYN2-NEXT: Relocation {
+// DYN2-NEXT: Offset: 0x12F0
+// DYN2-NEXT: Type: R_X86_64_GLOB_DAT (6)
+// DYN2-NEXT: Symbol: g
+// DYN2-NEXT: Addend: 0x0
+// DYN2-NEXT: }
+// DYN2-NEXT: }
diff --git a/test/Other/2010-05-06-Printer.ll b/test/Other/2010-05-06-Printer.ll
index 1cbe78dab7de..e57b9825b334 100644
--- a/test/Other/2010-05-06-Printer.ll
+++ b/test/Other/2010-05-06-Printer.ll
@@ -1,4 +1,5 @@
; RUN: llc -O2 -print-after-all < %s 2>/dev/null
+; REQUIRES: default_triple
define void @tester(){
ret void
diff --git a/test/Other/extract-alias.ll b/test/Other/extract-alias.ll
index dbc650ec6903..ac7776dd7f39 100644
--- a/test/Other/extract-alias.ll
+++ b/test/Other/extract-alias.ll
@@ -12,10 +12,10 @@
; CHECK: declare void @a0bar()
; DELETE: @zed = global i32 0
-; DELETE: @zeda0 = alias i32* @zed
-; DELETE-NEXT: @a0foo = alias i32* ()* @foo
-; DELETE-NEXT: @a0a0bar = alias void ()* @bar
-; DELETE-NEXT: @a0bar = alias void ()* @bar
+; DELETE: @zeda0 = alias i32, i32* @zed
+; DELETE-NEXT: @a0foo = alias i32* (), i32* ()* @foo
+; DELETE-NEXT: @a0a0bar = alias void (), void ()* @bar
+; DELETE-NEXT: @a0bar = alias void (), void ()* @bar
; DELETE: declare i32* @foo()
; DELETE: define void @bar() {
; DELETE-NEXT: %c = call i32* @foo()
@@ -23,25 +23,25 @@
; DELETE-NEXT: }
; ALIAS: @zed = external global i32
-; ALIAS: @zeda0 = alias i32* @zed
+; ALIAS: @zeda0 = alias i32, i32* @zed
-; ALIASRE: @a0a0bar = alias void ()* @bar
-; ALIASRE: @a0bar = alias void ()* @bar
+; ALIASRE: @a0a0bar = alias void (), void ()* @bar
+; ALIASRE: @a0bar = alias void (), void ()* @bar
; ALIASRE: declare void @bar()
@zed = global i32 0
-@zeda0 = alias i32* @zed
+@zeda0 = alias i32, i32* @zed
-@a0foo = alias i32* ()* @foo
+@a0foo = alias i32* (), i32* ()* @foo
define i32* @foo() {
call void @a0bar()
ret i32* @zeda0
}
-@a0a0bar = alias void ()* @bar
+@a0a0bar = alias void (), void ()* @bar
-@a0bar = alias void ()* @bar
+@a0bar = alias void (), void ()* @bar
define void @bar() {
%c = call i32* @foo()
diff --git a/test/Other/llvm-nm-without-aliases.ll b/test/Other/llvm-nm-without-aliases.ll
index 6ef72c742328..4df1a751a04f 100644
--- a/test/Other/llvm-nm-without-aliases.ll
+++ b/test/Other/llvm-nm-without-aliases.ll
@@ -12,13 +12,13 @@
; WITH: T bar
; WITH: T foo
-@a0foo = alias void ()* @foo
+@a0foo = alias void (), void ()* @foo
define void @foo() {
ret void
}
-@a0bar = alias void ()* @bar
+@a0bar = alias void (), void ()* @bar
define void @bar() {
ret void
diff --git a/test/Other/opt-twice.ll b/test/Other/opt-twice.ll
new file mode 100644
index 000000000000..6bff52e34e35
--- /dev/null
+++ b/test/Other/opt-twice.ll
@@ -0,0 +1,14 @@
+; The pass here doesn't matter (we use deadargelim), but test
+; that the -run-twice options exists, generates output, and
+; doesn't crash
+; RUN: opt -run-twice -deadargelim -S < %s | FileCheck %s
+
+; CHECK: define internal void @test
+define internal {} @test() {
+ ret {} undef
+}
+
+define void @caller() {
+ call {} @test()
+ ret void
+}
diff --git a/test/SymbolRewriter/rewrite.ll b/test/SymbolRewriter/rewrite.ll
index e8a0db6d606c..1d2365cb1d8f 100644
--- a/test/SymbolRewriter/rewrite.ll
+++ b/test/SymbolRewriter/rewrite.ll
@@ -20,7 +20,7 @@ define i32 @caller() {
}
%struct.S = type { i8 }
-@_ZN1SC1Ev = alias void (%struct.S*)* @_ZN1SC2Ev
+@_ZN1SC1Ev = alias void (%struct.S*), void (%struct.S*)* @_ZN1SC2Ev
define void @_ZN1SC2Ev(%struct.S* %this) unnamed_addr align 2 {
entry:
%this.addr = alloca %struct.S*, align 4
diff --git a/test/TableGen/cast-list-initializer.td b/test/TableGen/cast-list-initializer.td
new file mode 100644
index 000000000000..4c83773a5a79
--- /dev/null
+++ b/test/TableGen/cast-list-initializer.td
@@ -0,0 +1,10 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+
+class Foo<bits<8> b> {
+// CHECK: list<int> ListOfInts = [170];
+// CHECK: list<int> AnotherList = [170, 7];
+ list<int> ListOfInts = [!cast<int>(b)];
+ list<int> AnotherList = [!cast<int>(b), !cast<int>({1, 1, 1})];
+}
+
+def : Foo<{1, 0, 1, 0, 1, 0, 1, 0}>;
diff --git a/test/TableGen/intrinsic-varargs.td b/test/TableGen/intrinsic-varargs.td
index 380d79d4a22a..42ce8a959625 100644
--- a/test/TableGen/intrinsic-varargs.td
+++ b/test/TableGen/intrinsic-varargs.td
@@ -23,8 +23,8 @@ class Intrinsic<string name, list<LLVMType> param_types = []> {
}
// isVoid needs to match the definition in ValueTypes.td
-def isVoid : ValueType<0, 57>; // Produces no value
+def isVoid : ValueType<0, 66>; // Produces no value
def llvm_vararg_ty : LLVMType<isVoid>; // this means vararg here
-// CHECK: /* 0 */ 0, 28, 0,
+// CHECK: /* 0 */ 0, 29, 0,
def int_foo : Intrinsic<"llvm.foo", [llvm_vararg_ty]>;
diff --git a/test/TableGen/trydecode-emission.td b/test/TableGen/trydecode-emission.td
new file mode 100644
index 000000000000..91c0e123857b
--- /dev/null
+++ b/test/TableGen/trydecode-emission.td
@@ -0,0 +1,43 @@
+// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+
+// Check that if decoding of an instruction fails and the instruction does not
+// have a complete decoder method that can determine if the bitpattern is valid
+// or not then the decoder tries to find a more general instruction that
+// matches the bitpattern too.
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+ let InstructionSet = archInstrInfo;
+}
+
+class TestInstruction : Instruction {
+ let Size = 1;
+ let OutOperandList = (outs);
+ let InOperandList = (ins);
+ field bits<8> Inst;
+ field bits<8> SoftFail = 0;
+}
+
+def InstA : TestInstruction {
+ let Inst = {0,0,0,0,?,?,?,?};
+ let AsmString = "InstA";
+}
+
+def InstB : TestInstruction {
+ let Inst = {0,0,0,0,0,0,?,?};
+ let AsmString = "InstB";
+ let DecoderMethod = "DecodeInstB";
+ let hasCompleteDecoder = 0;
+}
+
+// CHECK: /* 0 */ MCD::OPC_ExtractField, 4, 4, // Inst{7-4} ...
+// CHECK-NEXT: /* 3 */ MCD::OPC_FilterValue, 0, 14, 0, // Skip to: 21
+// CHECK-NEXT: /* 7 */ MCD::OPC_CheckField, 2, 2, 0, 5, 0, // Skip to: 18
+// CHECK-NEXT: /* 13 */ MCD::OPC_TryDecode, 24, 0, 0, 0, // Opcode: InstB, skip to: 18
+// CHECK-NEXT: /* 18 */ MCD::OPC_Decode, 23, 1, // Opcode: InstA
+// CHECK-NEXT: /* 21 */ MCD::OPC_Fail,
+
+// CHECK: if (DecodeInstB(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; }
diff --git a/test/TableGen/trydecode-emission2.td b/test/TableGen/trydecode-emission2.td
new file mode 100644
index 000000000000..56ca6d33c241
--- /dev/null
+++ b/test/TableGen/trydecode-emission2.td
@@ -0,0 +1,44 @@
+// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+ let InstructionSet = archInstrInfo;
+}
+
+class TestInstruction : Instruction {
+ let Size = 1;
+ let OutOperandList = (outs);
+ let InOperandList = (ins);
+ field bits<8> Inst;
+ field bits<8> SoftFail = 0;
+}
+
+def InstA : TestInstruction {
+ let Inst = {0,0,0,0,0,0,?,?};
+ let AsmString = "InstA";
+ let DecoderMethod = "DecodeInstA";
+ let hasCompleteDecoder = 0;
+}
+
+def InstB : TestInstruction {
+ let Inst = {0,0,0,?,?,0,1,1};
+ let AsmString = "InstB";
+ let DecoderMethod = "DecodeInstB";
+ let hasCompleteDecoder = 0;
+}
+
+// CHECK: /* 0 */ MCD::OPC_ExtractField, 2, 1, // Inst{2} ...
+// CHECK-NEXT: /* 3 */ MCD::OPC_FilterValue, 0, 29, 0, // Skip to: 36
+// CHECK-NEXT: /* 7 */ MCD::OPC_ExtractField, 5, 3, // Inst{7-5} ...
+// CHECK-NEXT: /* 10 */ MCD::OPC_FilterValue, 0, 22, 0, // Skip to: 36
+// CHECK-NEXT: /* 14 */ MCD::OPC_CheckField, 0, 2, 3, 5, 0, // Skip to: 25
+// CHECK-NEXT: /* 20 */ MCD::OPC_TryDecode, 24, 0, 0, 0, // Opcode: InstB, skip to: 25
+// CHECK-NEXT: /* 25 */ MCD::OPC_CheckField, 3, 2, 0, 5, 0, // Skip to: 36
+// CHECK-NEXT: /* 31 */ MCD::OPC_TryDecode, 23, 1, 0, 0, // Opcode: InstA, skip to: 36
+// CHECK-NEXT: /* 36 */ MCD::OPC_Fail,
+
+// CHECK: if (DecodeInstB(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; }
+// CHECK: if (DecodeInstA(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; }
diff --git a/test/TableGen/trydecode-emission3.td b/test/TableGen/trydecode-emission3.td
new file mode 100644
index 000000000000..ad21eefa897a
--- /dev/null
+++ b/test/TableGen/trydecode-emission3.td
@@ -0,0 +1,44 @@
+// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+ let InstructionSet = archInstrInfo;
+}
+
+class TestInstruction : Instruction {
+ let Size = 1;
+ let OutOperandList = (outs);
+ let InOperandList = (ins);
+ field bits<8> Inst;
+ field bits<8> SoftFail = 0;
+}
+
+def InstA : TestInstruction {
+ let Inst = {0,0,0,0,?,?,?,?};
+ let AsmString = "InstA";
+}
+
+def InstBOp : Operand<i32> {
+ let DecoderMethod = "DecodeInstBOp";
+ let hasCompleteDecoder = 0;
+}
+
+def InstB : TestInstruction {
+ bits<2> op;
+ let Inst{7-2} = {0,0,0,0,0,0};
+ let Inst{1-0} = op;
+ let OutOperandList = (outs InstBOp:$op);
+ let AsmString = "InstB";
+}
+
+// CHECK: /* 0 */ MCD::OPC_ExtractField, 4, 4, // Inst{7-4} ...
+// CHECK-NEXT: /* 3 */ MCD::OPC_FilterValue, 0, 14, 0, // Skip to: 21
+// CHECK-NEXT: /* 7 */ MCD::OPC_CheckField, 2, 2, 0, 5, 0, // Skip to: 18
+// CHECK-NEXT: /* 13 */ MCD::OPC_TryDecode, 24, 0, 0, 0, // Opcode: InstB, skip to: 18
+// CHECK-NEXT: /* 18 */ MCD::OPC_Decode, 23, 1, // Opcode: InstA
+// CHECK-NEXT: /* 21 */ MCD::OPC_Fail,
+
+// CHECK: if (DecodeInstBOp(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; }
diff --git a/test/Transforms/ADCE/basictest.ll b/test/Transforms/ADCE/basictest.ll
index 378d70288f3f..aaacc1842253 100644
--- a/test/Transforms/ADCE/basictest.ll
+++ b/test/Transforms/ADCE/basictest.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -adce -simplifycfg | llvm-dis
+; RUN: opt < %s -passes=adce | llvm-dis
define i32 @Test(i32 %A, i32 %B) {
BB1:
@@ -15,5 +16,3 @@ BB4: ; preds = %BB1
%X = phi i32 [ %A, %BB1 ] ; <i32> [#uses=1]
br label %BB3
}
-
-
diff --git a/test/Transforms/AddDiscriminators/basic.ll b/test/Transforms/AddDiscriminators/basic.ll
index cabf707fe5d6..0588562c7377 100644
--- a/test/Transforms/AddDiscriminators/basic.ll
+++ b/test/Transforms/AddDiscriminators/basic.ll
@@ -11,7 +11,7 @@
; if (i < 10) x = i;
; }
-define void @foo(i32 %i) #0 {
+define void @foo(i32 %i) #0 !dbg !4 {
entry:
%i.addr = alloca i32, align 4
%x = alloca i32, align 4
@@ -37,15 +37,19 @@ if.end: ; preds = %if.then, %entry
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-!llvm.dbg.cu = !{!0}
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "basic.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "basic.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
@@ -55,7 +59,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!11 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
!12 = !DILocation(line: 4, scope: !4)
-; CHECK: ![[FOO:[0-9]+]] = !DISubprogram(name: "foo"
+; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
; CHECK: ![[BLOCK:[0-9]+]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 3)
; CHECK: ![[THEN]] = !DILocation(line: 3, scope: ![[BLOCKFILE:[0-9]+]])
; CHECK: ![[BLOCKFILE]] = !DILexicalBlockFile(scope: ![[BLOCK]],{{.*}} discriminator: 1)
diff --git a/test/Transforms/AddDiscriminators/call.ll b/test/Transforms/AddDiscriminators/call.ll
new file mode 100644
index 000000000000..b123b25f2af2
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/call.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+
+; Discriminator support for calls that are defined in one line:
+; #1 void bar();
+; #2
+; #3 void foo() {
+; #4 bar();bar()/*discriminator 1*/;bar()/*discriminator 2*/;
+; #5 }
+
+; Function Attrs: uwtable
+define void @_Z3foov() #0 {
+ call void @_Z3barv(), !dbg !10
+; CHECK: call void @_Z3barv(), !dbg ![[CALL0:[0-9]+]]
+ call void @_Z3barv(), !dbg !11
+; CHECK: call void @_Z3barv(), !dbg ![[CALL1:[0-9]+]]
+ call void @_Z3barv(), !dbg !12
+; CHECK: call void @_Z3barv(), !dbg ![[CALL2:[0-9]+]]
+ ret void, !dbg !13
+}
+
+declare void @_Z3barv() #1
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915) (llvm/trunk 251830)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "c.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 250915) (llvm/trunk 251830)"}
+!10 = !DILocation(line: 4, column: 3, scope: !4)
+!11 = !DILocation(line: 4, column: 9, scope: !4)
+!12 = !DILocation(line: 4, column: 15, scope: !4)
+!13 = !DILocation(line: 5, column: 1, scope: !4)
+
+; CHECK: ![[CALL1]] = !DILocation(line: 4, column: 9, scope: ![[CALL1BLOCK:[0-9]+]])
+; CHECK: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
+; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 15, scope: ![[CALL2BLOCK:[0-9]+]])
+; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
diff --git a/test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll b/test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll
new file mode 100644
index 000000000000..5e90d32a62eb
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -add-discriminators < %s | FileCheck %s
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+; This checks whether the add-discriminators pass producess valid metadata on
+; llvm.dbg.declare instructions
+;
+; CHECK-LABEL: @test_valid_metadata
+define void @test_valid_metadata() {
+ %a = alloca i8
+ call void @llvm.dbg.declare(metadata i8* %a, metadata !2, metadata !5), !dbg !6
+ %b = alloca i8
+ call void @llvm.dbg.declare(metadata i8* %b, metadata !9, metadata !5), !dbg !11
+ ret void
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = !DILocalVariable(scope: !3)
+!3 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false)
+!4 = !DIFile(filename: "a.cpp", directory: "/tmp")
+!5 = !DIExpression()
+!6 = !DILocation(line: 0, scope: !3, inlinedAt: !7)
+!7 = distinct !DILocation(line: 0, scope: !8)
+!8 = distinct !DISubprogram(linkageName: "test_valid_metadata", scope: null, isLocal: false, isDefinition: true, isOptimized: false)
+!9 = !DILocalVariable(scope: !10)
+!10 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false)
+!11 = !DILocation(line: 0, scope: !10)
diff --git a/test/Transforms/AddDiscriminators/diamond.ll b/test/Transforms/AddDiscriminators/diamond.ll
new file mode 100644
index 000000000000..2ca638a83ec3
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/diamond.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+
+; Discriminator support for diamond-shaped CFG.:
+; #1 void bar(int);
+; #2
+; #3 void foo(int i) {
+; #4 if (i > 10)
+; #5 bar(5); else bar(3);
+; #6 }
+
+; bar(5): discriminator 0
+; bar(3): discriminator 1
+
+; Function Attrs: uwtable
+define void @_Z3fooi(i32 %i) #0 !dbg !4 {
+ %1 = alloca i32, align 4
+ store i32 %i, i32* %1, align 4
+ call void @llvm.dbg.declare(metadata i32* %1, metadata !11, metadata !12), !dbg !13
+ %2 = load i32, i32* %1, align 4, !dbg !14
+ %3 = icmp sgt i32 %2, 10, !dbg !16
+ br i1 %3, label %4, label %5, !dbg !17
+
+; <label>:4 ; preds = %0
+ call void @_Z3bari(i32 5), !dbg !18
+ br label %6, !dbg !18
+
+; <label>:5 ; preds = %0
+ call void @_Z3bari(i32 3), !dbg !19
+; CHECK: call void @_Z3bari(i32 3), !dbg ![[ELSE:[0-9]+]]
+ br label %6
+
+; <label>:6 ; preds = %5, %4
+ ret void, !dbg !20
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @_Z3bari(i32) #2
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 253273)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "a.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 253273)"}
+!11 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !7)
+!12 = !DIExpression()
+!13 = !DILocation(line: 3, column: 14, scope: !4)
+!14 = !DILocation(line: 4, column: 7, scope: !15)
+!15 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 7)
+!16 = !DILocation(line: 4, column: 9, scope: !15)
+!17 = !DILocation(line: 4, column: 7, scope: !4)
+!18 = !DILocation(line: 5, column: 5, scope: !15)
+!19 = !DILocation(line: 5, column: 18, scope: !15)
+!20 = !DILocation(line: 6, column: 1, scope: !4)
+
+; CHECK: ![[ELSE]] = !DILocation(line: 5, column: 18, scope: ![[ELSEBLOCK:[0-9]+]])
+; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
diff --git a/test/Transforms/AddDiscriminators/first-only.ll b/test/Transforms/AddDiscriminators/first-only.ll
index 7f1ea2b15cf6..20d88b55e96e 100644
--- a/test/Transforms/AddDiscriminators/first-only.ll
+++ b/test/Transforms/AddDiscriminators/first-only.ll
@@ -13,7 +13,7 @@
; }
; }
-define void @foo(i32 %i) #0 {
+define void @foo(i32 %i) #0 !dbg !4 {
entry:
%i.addr = alloca i32, align 4
%x = alloca i32, align 4
@@ -46,15 +46,19 @@ if.end: ; preds = %if.then, %entry
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-!llvm.dbg.cu = !{!0}
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "first-only.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "first-only.c", directory: ".")
!6 = !DISubroutineType(types: !{null})
!7 = !{i32 2, !"Dwarf Version", i32 4}
@@ -63,7 +67,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!10 = !DILocation(line: 3, scope: !11)
!11 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
-; CHECK: ![[FOO:[0-9]+]] = !DISubprogram(name: "foo"
+; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
; CHECK: ![[BLOCK1:[0-9]+]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 3)
!12 = !DILocation(line: 3, scope: !13)
diff --git a/test/Transforms/AddDiscriminators/multiple.ll b/test/Transforms/AddDiscriminators/multiple.ll
index 621a7117571c..9a05fcd86864 100644
--- a/test/Transforms/AddDiscriminators/multiple.ll
+++ b/test/Transforms/AddDiscriminators/multiple.ll
@@ -10,7 +10,7 @@
; The two stores inside the if-then-else line must have different discriminator
; values.
-define void @foo(i32 %i) #0 {
+define void @foo(i32 %i) #0 !dbg !4 {
entry:
%i.addr = alloca i32, align 4
%x = alloca i32, align 4
@@ -47,15 +47,19 @@ if.end: ; preds = %if.else, %if.then
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-!llvm.dbg.cu = !{!0}
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "multiple.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "multiple.c", directory: ".")
!6 = !DISubroutineType(types: !{null, !13})
!13 = !DIBasicType(encoding: DW_ATE_signed, name: "int", size: 32, align: 32)
diff --git a/test/Transforms/AddDiscriminators/no-discriminators.ll b/test/Transforms/AddDiscriminators/no-discriminators.ll
index 895967e73b4d..bbba9dc62c4e 100644
--- a/test/Transforms/AddDiscriminators/no-discriminators.ll
+++ b/test/Transforms/AddDiscriminators/no-discriminators.ll
@@ -12,7 +12,7 @@
; altered. If they are, it means that the discriminators pass added a
; new lexical scope.
-define i32 @foo(i64 %i) #0 {
+define i32 @foo(i64 %i) #0 !dbg !4 {
entry:
%retval = alloca i32, align 4
%i.addr = alloca i64, align 8
@@ -44,16 +44,20 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!10, !11}
!llvm.ident = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "no-discriminators", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i64)* @foo, variables: !2)
-; CHECK: ![[FOO:[0-9]+]] = !DISubprogram(name: "foo"
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
!5 = !DIFile(filename: "no-discriminators", directory: ".")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !9}
@@ -63,7 +67,7 @@ attributes #1 = { nounwind readnone }
; CHECK: !{i32 2, !"Dwarf Version", i32 2}
!11 = !{i32 1, !"Debug Info Version", i32 3}
!12 = !{!"clang version 3.5.0 "}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !9)
+!13 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !9)
!14 = !DILocation(line: 1, scope: !4)
!15 = !DILocation(line: 2, scope: !16)
; CHECK: ![[ENTRY]] = !DILocation(line: 2, scope: ![[BLOCK:[0-9]+]])
diff --git a/test/Transforms/AddDiscriminators/oneline.ll b/test/Transforms/AddDiscriminators/oneline.ll
new file mode 100644
index 000000000000..ebee3935dd66
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/oneline.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+
+; Discriminator support for code that is written in one line:
+; #1 int foo(int i) {
+; #2 if (i == 3 || i == 5) return 100; else return 99;
+; #3 }
+
+; i == 3: discriminator 0
+; i == 5: discriminator 1
+; return 100: discriminator 2
+; return 99: discriminator 3
+
+define i32 @_Z3fooi(i32 %i) #0 !dbg !4 {
+ %1 = alloca i32, align 4
+ %2 = alloca i32, align 4
+ store i32 %i, i32* %2, align 4, !tbaa !13
+ call void @llvm.dbg.declare(metadata i32* %2, metadata !9, metadata !17), !dbg !18
+ %3 = load i32, i32* %2, align 4, !dbg !19, !tbaa !13
+ %4 = icmp eq i32 %3, 3, !dbg !21
+ br i1 %4, label %8, label %5, !dbg !22
+
+; <label>:5 ; preds = %0
+ %6 = load i32, i32* %2, align 4, !dbg !23, !tbaa !13
+; CHECK: %6 = load i32, i32* %2, align 4, !dbg ![[THEN1:[0-9]+]],{{.*}}
+
+ %7 = icmp eq i32 %6, 5, !dbg !24
+; CHECK: %7 = icmp eq i32 %6, 5, !dbg ![[THEN2:[0-9]+]]
+
+ br i1 %7, label %8, label %9, !dbg !25
+; CHECK: br i1 %7, label %8, label %9, !dbg ![[THEN3:[0-9]+]]
+
+; <label>:8 ; preds = %5, %0
+ store i32 100, i32* %1, align 4, !dbg !26
+; CHECK: store i32 100, i32* %1, align 4, !dbg ![[ELSE:[0-9]+]]
+
+ br label %10, !dbg !26
+; CHECK: br label %10, !dbg ![[ELSE]]
+
+; <label>:9 ; preds = %5
+ store i32 99, i32* %1, align 4, !dbg !27
+; CHECK: store i32 99, i32* %1, align 4, !dbg ![[COMBINE:[0-9]+]]
+
+ br label %10, !dbg !27
+; CHECK: br label %10, !dbg ![[COMBINE]]
+
+; <label>:10 ; preds = %9, %8
+ %11 = load i32, i32* %1, align 4, !dbg !28
+ ret i32 %11, !dbg !28
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "a.cc", directory: "/usr/local/google/home/dehao/discr")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9}
+!9 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{!"clang version 3.8.0 (trunk 250915)"}
+!13 = !{!14, !14, i64 0}
+!14 = !{!"int", !15, i64 0}
+!15 = !{!"omnipotent char", !16, i64 0}
+!16 = !{!"Simple C/C++ TBAA"}
+!17 = !DIExpression()
+!18 = !DILocation(line: 1, column: 13, scope: !4)
+!19 = !DILocation(line: 2, column: 7, scope: !20)
+!20 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 7)
+!21 = !DILocation(line: 2, column: 9, scope: !20)
+!22 = !DILocation(line: 2, column: 14, scope: !20)
+!23 = !DILocation(line: 2, column: 17, scope: !20)
+!24 = !DILocation(line: 2, column: 19, scope: !20)
+!25 = !DILocation(line: 2, column: 7, scope: !4)
+!26 = !DILocation(line: 2, column: 25, scope: !20)
+!27 = !DILocation(line: 2, column: 42, scope: !20)
+!28 = !DILocation(line: 3, column: 1, scope: !4)
+
+; CHECK: ![[THEN1]] = !DILocation(line: 2, column: 17, scope: ![[THENBLOCK:[0-9]+]])
+; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
+; CHECK: ![[THEN2]] = !DILocation(line: 2, column: 19, scope: ![[THENBLOCK]])
+; CHECK: ![[THEN3]] = !DILocation(line: 2, column: 7, scope: ![[THENBLOCK]])
+; CHECK: ![[ELSE]] = !DILocation(line: 2, column: 25, scope: ![[ELSEBLOCK:[0-9]+]])
+; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
+; CHECK: ![[COMBINE]] = !DILocation(line: 2, column: 42, scope: ![[COMBINEBLOCK:[0-9]+]])
+; CHECK: ![[COMBINEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 3)
diff --git a/test/Transforms/ArgumentPromotion/dbg.ll b/test/Transforms/ArgumentPromotion/dbg.ll
index 17a34cb62239..dbdccacf42ba 100644
--- a/test/Transforms/ArgumentPromotion/dbg.ll
+++ b/test/Transforms/ArgumentPromotion/dbg.ll
@@ -1,10 +1,9 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
-; CHECK: call void @test(i32 %
-; CHECK: !DISubprogram(name: "test",{{.*}} function: void (i32)* @test
declare void @sink(i32)
-define internal void @test(i32** %X) {
+; CHECK: define internal void @test({{.*}} !dbg [[SP:![0-9]+]]
+define internal void @test(i32** %X) !dbg !2 {
%1 = load i32*, i32** %X, align 8
%2 = load i32, i32* %1, align 8
call void @sink(i32 %2)
@@ -12,16 +11,19 @@ define internal void @test(i32** %X) {
}
define void @caller(i32** %Y) {
+; CHECK: call void @test(i32 %
call void @test(i32** %Y)
ret void
}
+; CHECK: [[SP]] = distinct !DISubprogram(name: "test",
+
!llvm.module.flags = !{!0}
!llvm.dbg.cu = !{!3}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !DILocation(line: 8, scope: !2)
-!2 = !DISubprogram(name: "test", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, scope: null, function: void (i32**)* @test)
-!3 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !5, subprograms: !4)
+!2 = distinct !DISubprogram(name: "test", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, scope: null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !5, subprograms: !4)
!4 = !{!2}
!5 = !DIFile(filename: "test.c", directory: "")
diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
index 282d42f75f05..4647e8fd6d9e 100644
--- a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
+++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
@@ -229,7 +229,7 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
@@ -241,6 +241,10 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE]]
@@ -263,7 +267,7 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
@@ -275,6 +279,10 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: dmb
; CHECK: br label %[[DONE]]
@@ -296,7 +304,7 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
@@ -307,6 +315,10 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE]]
@@ -335,7 +347,7 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -350,6 +362,10 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
; CHECK-NOT: dmb
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: dmb
; CHECK: br label %[[DONE]]
diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
index 42d7b781006d..7bb6ffed397d 100644
--- a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
+++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
@@ -91,7 +91,7 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
@@ -103,6 +103,10 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE]]
@@ -125,7 +129,7 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
@@ -137,6 +141,10 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
; CHECK-NOT: fence
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: fence
; CHECK: br label %[[DONE]]
@@ -158,7 +166,7 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
@@ -169,6 +177,10 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE]]
@@ -197,7 +209,7 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -212,6 +224,10 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE]]
diff --git a/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
index 54653000f5d8..f9aa524fac98 100644
--- a/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
+++ b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
@@ -9,17 +9,21 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
; CHECK: [[START]]:
; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
-; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]]
+; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[END:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK: call void @llvm.arm.clrex()
+; CHECK: br label %[[FAILURE_BB]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[END]]
@@ -41,7 +45,7 @@ define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
; CHECK: [[START]]:
; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
@@ -52,6 +56,10 @@ define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[END:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK: call void @llvm.arm.clrex()
+; CHECK: br label %[[FAILURE_BB]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: dmb
; CHECK: br label %[[END]]
@@ -73,7 +81,7 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
; CHECK: [[START]]:
; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
@@ -84,6 +92,10 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
; CHECK-NOT: dmb
; CHECK: br label %[[END:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK: call void @llvm.arm.clrex()
+; CHECK: br label %[[FAILURE_BB]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: dmb
; CHECK: br label %[[END]]
diff --git a/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
new file mode 100644
index 000000000000..792fb1ec4f70
--- /dev/null
+++ b/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
@@ -0,0 +1,82 @@
+; RUN: opt -S %s -atomic-expand -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and
+; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this
+; functionality, please move this test to a target which still is.
+
+define float @float_load_expand(float* %ptr) {
+; CHECK-LABEL: @float_load_expand
+; CHECK: %1 = bitcast float* %ptr to i32*
+; CHECK: %2 = load atomic i32, i32* %1 unordered, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+ %res = load atomic float, float* %ptr unordered, align 4
+ ret float %res
+}
+
+define float @float_load_expand_seq_cst(float* %ptr) {
+; CHECK-LABEL: @float_load_expand_seq_cst
+; CHECK: %1 = bitcast float* %ptr to i32*
+; CHECK: %2 = load atomic i32, i32* %1 seq_cst, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+ %res = load atomic float, float* %ptr seq_cst, align 4
+ ret float %res
+}
+
+define float @float_load_expand_vol(float* %ptr) {
+; CHECK-LABEL: @float_load_expand_vol
+; CHECK: %1 = bitcast float* %ptr to i32*
+; CHECK: %2 = load atomic volatile i32, i32* %1 unordered, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+ %res = load atomic volatile float, float* %ptr unordered, align 4
+ ret float %res
+}
+
+define float @float_load_expand_addr1(float addrspace(1)* %ptr) {
+; CHECK-LABEL: @float_load_expand_addr1
+; CHECK: %1 = bitcast float addrspace(1)* %ptr to i32 addrspace(1)*
+; CHECK: %2 = load atomic i32, i32 addrspace(1)* %1 unordered, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+ %res = load atomic float, float addrspace(1)* %ptr unordered, align 4
+ ret float %res
+}
+
+define void @float_store_expand(float* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float* %ptr to i32*
+; CHECK: store atomic i32 %1, i32* %2 unordered, align 4
+ store atomic float %v, float* %ptr unordered, align 4
+ ret void
+}
+
+define void @float_store_expand_seq_cst(float* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand_seq_cst
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float* %ptr to i32*
+; CHECK: store atomic i32 %1, i32* %2 seq_cst, align 4
+ store atomic float %v, float* %ptr seq_cst, align 4
+ ret void
+}
+
+define void @float_store_expand_vol(float* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand_vol
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float* %ptr to i32*
+; CHECK: store atomic volatile i32 %1, i32* %2 unordered, align 4
+ store atomic volatile float %v, float* %ptr unordered, align 4
+ ret void
+}
+
+define void @float_store_expand_addr1(float addrspace(1)* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand_addr1
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float addrspace(1)* %ptr to i32 addrspace(1)*
+; CHECK: store atomic i32 %1, i32 addrspace(1)* %2 unordered, align 4
+ store atomic float %v, float addrspace(1)* %ptr unordered, align 4
+ ret void
+}
+
diff --git a/test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll b/test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll
new file mode 100644
index 000000000000..029a0e7b3e92
--- /dev/null
+++ b/test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll
@@ -0,0 +1,11 @@
+; RUN: opt -S %s -atomic-expand -mtriple=i686-linux-gnu | FileCheck %s
+
+; This file tests the function `llvm::expandAtomicRMWToCmpXchg`.
+; It isn't technically target specific, but is exposed through a pass that is.
+
+define i8 @test_initial_load(i8* %ptr, i8 %value) {
+ %res = atomicrmw nand i8* %ptr, i8 %value seq_cst
+ ret i8 %res
+}
+; CHECK-LABEL: @test_initial_load
+; CHECK-NEXT: %1 = load i8, i8* %ptr, align 1
diff --git a/test/Transforms/AtomicExpand/X86/lit.local.cfg b/test/Transforms/AtomicExpand/X86/lit.local.cfg
new file mode 100644
index 000000000000..afde89be896d
--- /dev/null
+++ b/test/Transforms/AtomicExpand/X86/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Transforms/BBVectorize/X86/wr-aliases.ll b/test/Transforms/BBVectorize/X86/wr-aliases.ll
index 56448c0e5471..a6ea27fc3ecb 100644
--- a/test/Transforms/BBVectorize/X86/wr-aliases.ll
+++ b/test/Transforms/BBVectorize/X86/wr-aliases.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -bb-vectorize -S < %s | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -disable-basicaa -bb-vectorize -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/BBVectorize/simple3.ll b/test/Transforms/BBVectorize/simple3.ll
index da7f94149414..6edf7f07ac1d 100644
--- a/test/Transforms/BBVectorize/simple3.ll
+++ b/test/Transforms/BBVectorize/simple3.ll
@@ -4,12 +4,12 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; Basic depth-3 chain
define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) {
; CHECK-LABEL: @test1(
-; CHECK: %X1.v.i1.1.1 = insertelement <3 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2.2 = insertelement <3 x double> %X1.v.i1.1.1, double %B2, i32 1
-; CHECK: %X1.v.i1 = insertelement <3 x double> %X1.v.i1.2.2, double %B3, i32 2
-; CHECK: %X1.v.i0.1.3 = insertelement <3 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2.4 = insertelement <3 x double> %X1.v.i0.1.3, double %A2, i32 1
-; CHECK: %X1.v.i0 = insertelement <3 x double> %X1.v.i0.2.4, double %A3, i32 2
+; CHECK: %X1.v.i1.11 = insertelement <3 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.22 = insertelement <3 x double> %X1.v.i1.11, double %B2, i32 1
+; CHECK: %X1.v.i1 = insertelement <3 x double> %X1.v.i1.22, double %B3, i32 2
+; CHECK: %X1.v.i0.13 = insertelement <3 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.24 = insertelement <3 x double> %X1.v.i0.13, double %A2, i32 1
+; CHECK: %X1.v.i0 = insertelement <3 x double> %X1.v.i0.24, double %A3, i32 2
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%X3 = fsub double %A3, %B3
@@ -24,11 +24,11 @@ define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2,
; CHECK: %Z1 = fadd <3 x double> %Y1, %X1.v.i1
%R1 = fmul double %Z1, %Z2
%R = fmul double %R1, %Z3
-; CHECK: %Z1.v.r2.10 = extractelement <3 x double> %Z1, i32 2
+; CHECK: %Z1.v.r210 = extractelement <3 x double> %Z1, i32 2
; CHECK: %Z1.v.r1 = extractelement <3 x double> %Z1, i32 0
; CHECK: %Z1.v.r2 = extractelement <3 x double> %Z1, i32 1
; CHECK: %R1 = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: %R = fmul double %R1, %Z1.v.r2.10
+; CHECK: %R = fmul double %R1, %Z1.v.r210
ret double %R
; CHECK: ret double %R
}
diff --git a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
index d4b94fe62c71..43fcc6051210 100644
--- a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
+++ b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
@@ -1,13 +1,18 @@
-; RUN: opt < %s -O3 -o - | llc -no-integrated-as -o - | grep bork_directive | wc -l | grep 2
+; RUN: opt < %s -O3 | llc -no-integrated-as | FileCheck %s
+; REQUIRES: default_triple
;; We don't want branch folding to fold asm directives.
+; CHECK: bork_directive
+; CHECK: bork_directive
+; CHECK-NOT: bork_directive
+
define void @bork(i32 %param) {
entry:
%tmp = icmp eq i32 %param, 0
br i1 %tmp, label %cond_true, label %cond_false
-cond_true:
+cond_true:
call void asm sideeffect ".bork_directive /* ${0:c}:${1:c} */", "i,i,~{dirflag},~{fpsr},~{flags}"( i32 37, i32 927 )
ret void
diff --git a/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll b/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll
new file mode 100644
index 000000000000..c3c11a1c4949
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll
@@ -0,0 +1,82 @@
+; RUN: opt -S -codegenprepare -mtriple=aarch64-linux %s | FileCheck %s
+
+; Test for CodeGenPrepare::optimizeLoadExt(): simple case: two loads
+; feeding a phi that zext's each loaded value.
+define i32 @test_free_zext(i32* %ptr, i32* %ptr2, i32 %c) {
+; CHECK-LABEL: @test_free_zext(
+bb1:
+; CHECK-LABEL: bb1:
+; CHECK: %[[T1:.*]] = load
+; CHECK: %[[A1:.*]] = and i32 %[[T1]], 65535
+ %load1 = load i32, i32* %ptr, align 4
+ %cmp = icmp ne i32 %c, 0
+ br i1 %cmp, label %bb2, label %bb3
+bb2:
+; CHECK-LABEL: bb2:
+; CHECK: %[[T2:.*]] = load
+; CHECK: %[[A2:.*]] = and i32 %[[T2]], 65535
+ %load2 = load i32, i32* %ptr2, align 4
+ br label %bb3
+bb3:
+; CHECK-LABEL: bb3:
+; CHECK: phi i32 [ %[[A1]], %bb1 ], [ %[[A2]], %bb2 ]
+ %phi = phi i32 [ %load1, %bb1 ], [ %load2, %bb2 ]
+ %and = and i32 %phi, 65535
+ ret i32 %and
+}
+
+; Test for CodeGenPrepare::optimizeLoadExt(): exercise all opcode
+; cases of active bit calculation.
+define i32 @test_free_zext2(i32* %ptr, i16* %dst16, i32* %dst32, i32 %c) {
+; CHECK-LABEL: @test_free_zext2(
+bb1:
+; CHECK-LABEL: bb1:
+; CHECK: %[[T1:.*]] = load
+; CHECK: %[[A1:.*]] = and i32 %[[T1]], 65535
+ %load1 = load i32, i32* %ptr, align 4
+ %cmp = icmp ne i32 %c, 0
+ br i1 %cmp, label %bb2, label %bb4
+bb2:
+; CHECK-LABEL: bb2:
+ %trunc = trunc i32 %load1 to i16
+ store i16 %trunc, i16* %dst16, align 2
+ br i1 %cmp, label %bb3, label %bb4
+bb3:
+; CHECK-LABEL: bb3:
+ %shl = shl i32 %load1, 16
+ store i32 %shl, i32* %dst32, align 4
+ br label %bb4
+bb4:
+; CHECK-LABEL: bb4:
+; CHECK-NOT: and
+; CHECK: ret i32 %[[A1]]
+ %and = and i32 %load1, 65535
+ ret i32 %and
+}
+
+; Test for CodeGenPrepare::optimizeLoadExt(): check case of zext-able
+; load feeding a phi in the same block.
+define void @test_free_zext3(i32* %ptr, i32* %ptr2, i32* %dst, i64* %c) {
+; CHECK-LABEL: @test_free_zext3(
+bb1:
+; CHECK-LABEL: bb1:
+; CHECK: %[[T1:.*]] = load
+; CHECK: %[[A1:.*]] = and i32 %[[T1]], 65535
+ %load1 = load i32, i32* %ptr, align 4
+ br label %loop
+loop:
+; CHECK-LABEL: loop:
+; CHECK: phi i32 [ %[[A1]], %bb1 ], [ %[[A2]], %loop ]
+ %phi = phi i32 [ %load1, %bb1 ], [ %load2, %loop ]
+ %and = and i32 %phi, 65535
+ store i32 %and, i32* %dst, align 4
+ %idx = load volatile i64, i64* %c, align 4
+ %addr = getelementptr inbounds i32, i32* %ptr2, i64 %idx
+; CHECK: %[[T2:.*]] = load i32
+; CHECK: %[[A2:.*]] = and i32 %[[T2]], 65535
+ %load2 = load i32, i32* %addr, align 4
+ %cmp = icmp ne i64 %idx, 0
+ br i1 %cmp, label %loop, label %end
+end:
+ ret void
+}
diff --git a/test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll b/test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll
new file mode 100644
index 000000000000..172541a46080
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll
@@ -0,0 +1,95 @@
+;; AArch64 is arbitralily chosen as a 32/64-bit RISC representative to show the transform in all tests.
+
+; RUN: opt < %s -codegenprepare -S -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=ARM64
+
+; AArch64 widens to 32-bit.
+
+define i32 @widen_switch_i16(i32 %a) {
+entry:
+ %trunc = trunc i32 %a to i16
+ switch i16 %trunc, label %sw.default [
+ i16 1, label %sw.bb0
+ i16 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; ARM64-LABEL: @widen_switch_i16(
+; ARM64: %0 = zext i16 %trunc to i32
+; ARM64-NEXT: switch i32 %0, label %sw.default [
+; ARM64-NEXT: i32 1, label %return
+; ARM64-NEXT: i32 65535, label %sw.bb1
+}
+
+; Widen to 32-bit from a smaller, non-native type.
+
+define i32 @widen_switch_i17(i32 %a) {
+entry:
+ %trunc = trunc i32 %a to i17
+ switch i17 %trunc, label %sw.default [
+ i17 10, label %sw.bb0
+ i17 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; ARM64-LABEL: @widen_switch_i17(
+; ARM64: %0 = zext i17 %trunc to i32
+; ARM64-NEXT: switch i32 %0, label %sw.default [
+; ARM64-NEXT: i32 10, label %return
+; ARM64-NEXT: i32 131071, label %sw.bb1
+}
+
+; If the switch condition is a sign-extended function argument, then the
+; condition and cases should be sign-extended rather than zero-extended
+; because the sign-extension can be optimized away.
+
+define i32 @widen_switch_i16_sext(i2 signext %a) {
+entry:
+ switch i2 %a, label %sw.default [
+ i2 1, label %sw.bb0
+ i2 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; ARM64-LABEL: @widen_switch_i16_sext(
+; ARM64: %0 = sext i2 %a to i32
+; ARM64-NEXT: switch i32 %0, label %sw.default [
+; ARM64-NEXT: i32 1, label %return
+; ARM64-NEXT: i32 -1, label %sw.bb1
+}
+
diff --git a/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll b/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
new file mode 100644
index 000000000000..8c5e01e3634f
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
@@ -0,0 +1,118 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+; The following target lines are needed for the test to exercise what it should.
+; Without these lines, CodeGenPrepare does not try to sink the bitcasts.
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @f()
+
+declare void @g(i8*)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+; CodeGenPrepare will want to sink these bitcasts, but it selects the catchpad
+; blocks as the place to which the bitcast should be sunk. Since catchpads
+; do not allow non-phi instructions before the terminator, this isn't possible.
+
+; CHECK-LABEL: @test(
+define void @test(i32* %addr) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %x = getelementptr i32, i32* %addr, i32 1
+ %p1 = bitcast i32* %x to i8*
+ invoke void @f()
+ to label %invoke.cont unwind label %catch1
+
+; CHECK: invoke.cont:
+; CHECK-NEXT: %y = getelementptr i32, i32* %addr, i32 2
+invoke.cont:
+ %y = getelementptr i32, i32* %addr, i32 2
+ %p2 = bitcast i32* %y to i8*
+ invoke void @f()
+ to label %done unwind label %catch2
+
+done:
+ ret void
+
+catch1:
+ %cs1 = catchswitch within none [label %handler1] unwind to caller
+
+handler1:
+ %cp1 = catchpad within %cs1 []
+ br label %catch.shared
+; CHECK: handler1:
+; CHECK-NEXT: catchpad within %cs1
+; CHECK: %[[p1:[0-9]+]] = bitcast i32* %x to i8*
+
+catch2:
+ %cs2 = catchswitch within none [label %handler2] unwind to caller
+
+handler2:
+ %cp2 = catchpad within %cs2 []
+ br label %catch.shared
+; CHECK: handler2:
+; CHECK: catchpad within %cs2
+; CHECK: %[[p2:[0-9]+]] = bitcast i32* %y to i8*
+
+; CHECK: catch.shared:
+; CHECK-NEXT: %p = phi i8* [ %[[p1]], %handler1 ], [ %[[p2]], %handler2 ]
+catch.shared:
+ %p = phi i8* [ %p1, %handler1 ], [ %p2, %handler2 ]
+ call void @g(i8* %p)
+ unreachable
+}
+
+; CodeGenPrepare will want to hoist these llvm.dbg.value calls to the phi, but
+; there is no insertion point in a catchpad block.
+
+; CHECK-LABEL: @test_dbg_value(
+define void @test_dbg_value() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %a = alloca i8
+ %b = alloca i8
+ invoke void @f() to label %next unwind label %catch.dispatch
+next:
+ invoke void @f() to label %ret unwind label %catch.dispatch
+ret:
+ ret void
+
+catch.dispatch:
+ %p = phi i8* [%a, %entry], [%b, %next]
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+ %cp1 = catchpad within %cs1 []
+ tail call void @llvm.dbg.value(metadata i8* %p, i64 0, metadata !11, metadata !13), !dbg !14
+ call void @g(i8* %p)
+ catchret from %cp1 to label %ret
+
+; CHECK: catch.dispatch:
+; CHECK-NEXT: phi i8
+; CHECK-NEXT: catchswitch
+; CHECK-NOT: llvm.dbg.value
+
+; CHECK: catch:
+; CHECK-NEXT: catchpad
+; CHECK-NEXT: call void @llvm.dbg.value
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: null, subprograms: !3)
+!1 = !DIFile(filename: "t.c", directory: "D:\5Csrc\5Cllvm\5Cbuild")
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: null)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)"}
+!11 = !DILocalVariable(name: "p", scope: !4, file: !1, line: 2, type: !12)
+!12 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!13 = !DIExpression()
+!14 = !DILocation(line: 2, column: 8, scope: !4)
+!15 = !DILocation(line: 3, column: 1, scope: !4)
diff --git a/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll b/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
new file mode 100644
index 000000000000..72d82e2a162e
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s --check-prefix=SLOW
+; RUN: opt -S -codegenprepare -mattr=+bmi < %s | FileCheck %s --check-prefix=FAST_TZ
+; RUN: opt -S -codegenprepare -mattr=+lzcnt < %s | FileCheck %s --check-prefix=FAST_LZ
+
+target triple = "x86_64-unknown-unknown"
+target datalayout = "e-n32:64"
+
+; If the intrinsic is cheap, nothing should change.
+; If the intrinsic is expensive, check if the input is zero to avoid the call.
+; This is undoing speculation that may have been created by SimplifyCFG + InstCombine.
+
+define i64 @cttz(i64 %A) {
+entry:
+ %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+ ret i64 %z
+
+; SLOW-LABEL: @cttz(
+; SLOW: entry:
+; SLOW: %cmpz = icmp eq i64 %A, 0
+; SLOW: br i1 %cmpz, label %cond.end, label %cond.false
+; SLOW: cond.false:
+; SLOW: %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
+; SLOW: br label %cond.end
+; SLOW: cond.end:
+; SLOW: %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+; SLOW: ret i64 %ctz
+
+; FAST_TZ-LABEL: @cttz(
+; FAST_TZ: %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+; FAST_TZ: ret i64 %z
+}
+
+define i64 @ctlz(i64 %A) {
+entry:
+ %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+ ret i64 %z
+
+; SLOW-LABEL: @ctlz(
+; SLOW: entry:
+; SLOW: %cmpz = icmp eq i64 %A, 0
+; SLOW: br i1 %cmpz, label %cond.end, label %cond.false
+; SLOW: cond.false:
+; SLOW: %z = call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+; SLOW: br label %cond.end
+; SLOW: cond.end:
+; SLOW: %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+; SLOW: ret i64 %ctz
+
+; FAST_LZ-LABEL: @ctlz(
+; FAST_LZ: %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+; FAST_LZ: ret i64 %z
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+
diff --git a/test/Transforms/CodeGenPrepare/X86/select.ll b/test/Transforms/CodeGenPrepare/X86/select.ll
new file mode 100644
index 000000000000..a26938ad5ee4
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/select.ll
@@ -0,0 +1,141 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target triple = "x86_64-unknown-unknown"
+
+; Nothing to sink here, but this gets converted to a branch to
+; avoid stalling an out-of-order CPU on a predictable branch.
+
+define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y) {
+entry:
+ %load = load double, double* %b, align 8
+ %cmp = fcmp olt double %load, %a
+ %sel = select i1 %cmp, i32 %x, i32 %y
+ ret i32 %sel
+
+; CHECK-LABEL: @no_sink(
+; CHECK: %load = load double, double* %b, align 8
+; CHECK: %cmp = fcmp olt double %load, %a
+; CHECK: br i1 %cmp, label %select.end, label %select.false
+; CHECK: select.false:
+; CHECK: br label %select.end
+; CHECK: select.end:
+; CHECK: %sel = phi i32 [ %x, %entry ], [ %y, %select.false ]
+; CHECK: ret i32 %sel
+}
+
+
+; An 'fdiv' is expensive, so sink it rather than speculatively execute it.
+
+define float @fdiv_true_sink(float %a, float %b) {
+entry:
+ %div = fdiv float %a, %b
+ %cmp = fcmp ogt float %a, 1.0
+ %sel = select i1 %cmp, float %div, float 2.0
+ ret float %sel
+
+; CHECK-LABEL: @fdiv_true_sink(
+; CHECK: %cmp = fcmp ogt float %a, 1.0
+; CHECK: br i1 %cmp, label %select.true.sink, label %select.end
+; CHECK: select.true.sink:
+; CHECK: %div = fdiv float %a, %b
+; CHECK: br label %select.end
+; CHECK: select.end:
+; CHECK: %sel = phi float [ %div, %select.true.sink ], [ 2.000000e+00, %entry ]
+; CHECK: ret float %sel
+}
+
+define float @fdiv_false_sink(float %a, float %b) {
+entry:
+ %div = fdiv float %a, %b
+ %cmp = fcmp ogt float %a, 3.0
+ %sel = select i1 %cmp, float 4.0, float %div
+ ret float %sel
+
+; CHECK-LABEL: @fdiv_false_sink(
+; CHECK: %cmp = fcmp ogt float %a, 3.0
+; CHECK: br i1 %cmp, label %select.end, label %select.false.sink
+; CHECK: select.false.sink:
+; CHECK: %div = fdiv float %a, %b
+; CHECK: br label %select.end
+; CHECK: select.end:
+; CHECK: %sel = phi float [ 4.000000e+00, %entry ], [ %div, %select.false.sink ]
+; CHECK: ret float %sel
+}
+
+define float @fdiv_both_sink(float %a, float %b) {
+entry:
+ %div1 = fdiv float %a, %b
+ %div2 = fdiv float %b, %a
+ %cmp = fcmp ogt float %a, 5.0
+ %sel = select i1 %cmp, float %div1, float %div2
+ ret float %sel
+
+; CHECK-LABEL: @fdiv_both_sink(
+; CHECK: %cmp = fcmp ogt float %a, 5.0
+; CHECK: br i1 %cmp, label %select.true.sink, label %select.false.sink
+; CHECK: select.true.sink:
+; CHECK: %div1 = fdiv float %a, %b
+; CHECK: br label %select.end
+; CHECK: select.false.sink:
+; CHECK: %div2 = fdiv float %b, %a
+; CHECK: br label %select.end
+; CHECK: select.end:
+; CHECK: %sel = phi float [ %div1, %select.true.sink ], [ %div2, %select.false.sink ]
+; CHECK: ret float %sel
+}
+
+; An 'fadd' is not too expensive, so it's ok to speculate.
+
+define float @fadd_no_sink(float %a, float %b) {
+ %add = fadd float %a, %b
+ %cmp = fcmp ogt float 6.0, %a
+ %sel = select i1 %cmp, float %add, float 7.0
+ ret float %sel
+
+; CHECK-LABEL: @fadd_no_sink(
+; CHECK: %sel = select i1 %cmp, float %add, float 7.0
+}
+
+; Possible enhancement: sinkability is only calculated with the direct
+; operand of the select, so we don't try to sink this. The fdiv cost is not
+; taken into account.
+
+define float @fdiv_no_sink(float %a, float %b) {
+entry:
+ %div = fdiv float %a, %b
+ %add = fadd float %div, %b
+ %cmp = fcmp ogt float %a, 1.0
+ %sel = select i1 %cmp, float %add, float 8.0
+ ret float %sel
+
+; CHECK-LABEL: @fdiv_no_sink(
+; CHECK: %sel = select i1 %cmp, float %add, float 8.0
+}
+
+; Do not transform the CFG if the select operands may have side effects.
+
+declare i64* @bar(i32, i32, i32)
+declare i64* @baz(i32, i32, i32)
+
+define i64* @calls_no_sink(i32 %in) {
+ %call1 = call i64* @bar(i32 1, i32 2, i32 3)
+ %call2 = call i64* @baz(i32 1, i32 2, i32 3)
+ %tobool = icmp ne i32 %in, 0
+ %sel = select i1 %tobool, i64* %call1, i64* %call2
+ ret i64* %sel
+
+; CHECK-LABEL: @calls_no_sink(
+; CHECK: %sel = select i1 %tobool, i64* %call1, i64* %call2
+}
+
+define i32 @sdiv_no_sink(i32 %a, i32 %b) {
+ %div1 = sdiv i32 %a, %b
+ %div2 = sdiv i32 %b, %a
+ %cmp = icmp sgt i32 %a, 5
+ %sel = select i1 %cmp, i32 %div1, i32 %div2
+ ret i32 %sel
+
+; CHECK-LABEL: @sdiv_no_sink(
+; CHECK: %sel = select i1 %cmp, i32 %div1, i32 %div2
+}
+
diff --git a/test/Transforms/CodeGenPrepare/X86/widen_switch.ll b/test/Transforms/CodeGenPrepare/X86/widen_switch.ll
new file mode 100644
index 000000000000..53c9cc073558
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/widen_switch.ll
@@ -0,0 +1,95 @@
+;; x86 is chosen to show the transform when 8-bit and 16-bit registers are available.
+
+; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X86
+
+; No change for x86 because 16-bit registers are part of the architecture.
+
+define i32 @widen_switch_i16(i32 %a) {
+entry:
+ %trunc = trunc i32 %a to i16
+ switch i16 %trunc, label %sw.default [
+ i16 1, label %sw.bb0
+ i16 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; X86-LABEL: @widen_switch_i16(
+; X86: %trunc = trunc i32 %a to i16
+; X86-NEXT: switch i16 %trunc, label %sw.default [
+; X86-NEXT: i16 1, label %return
+; X86-NEXT: i16 -1, label %sw.bb1
+}
+
+; Widen to 32-bit from a smaller, non-native type.
+
+define i32 @widen_switch_i17(i32 %a) {
+entry:
+ %trunc = trunc i32 %a to i17
+ switch i17 %trunc, label %sw.default [
+ i17 10, label %sw.bb0
+ i17 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; X86-LABEL: @widen_switch_i17(
+; X86: %0 = zext i17 %trunc to i32
+; X86-NEXT: switch i32 %0, label %sw.default [
+; X86-NEXT: i32 10, label %return
+; X86-NEXT: i32 131071, label %sw.bb1
+}
+
+; If the switch condition is a sign-extended function argument, then the
+; condition and cases should be sign-extended rather than zero-extended
+; because the sign-extension can be optimized away.
+
+define i32 @widen_switch_i16_sext(i2 signext %a) {
+entry:
+ switch i2 %a, label %sw.default [
+ i2 1, label %sw.bb0
+ i2 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; X86-LABEL: @widen_switch_i16_sext(
+; X86: %0 = sext i2 %a to i8
+; X86-NEXT: switch i8 %0, label %sw.default [
+; X86-NEXT: i8 1, label %return
+; X86-NEXT: i8 -1, label %sw.bb1
+}
+
diff --git a/test/Transforms/CodeGenPrepare/invariant.group.ll b/test/Transforms/CodeGenPrepare/invariant.group.ll
new file mode 100644
index 000000000000..e8f1e42ddcbb
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/invariant.group.ll
@@ -0,0 +1,23 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+@tmp = global i8 0
+
+; CHECK-LABEL: define void @foo() {
+define void @foo() {
+enter:
+ ; CHECK-NOT: !invariant.group
+ ; CHECK-NOT: @llvm.invariant.group.barrier(
+ ; CHECK: %val = load i8, i8* @tmp
+ %val = load i8, i8* @tmp, !invariant.group !0
+ %ptr = call i8* @llvm.invariant.group.barrier(i8* @tmp)
+
+ ; CHECK: store i8 42, i8* @tmp
+ store i8 42, i8* %ptr, !invariant.group !0
+
+ ret void
+}
+; CHECK-LABEL: }
+
+declare i8* @llvm.invariant.group.barrier(i8*)
+
+!0 = !{!"something"} \ No newline at end of file
diff --git a/test/Transforms/CodeGenPrepare/statepoint-relocate.ll b/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
index b6898b373113..b31dfe7f3fa6 100644
--- a/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
+++ b/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
@@ -10,9 +10,9 @@ define i32 @test_sor_basic(i32* %base) gc "statepoint-example" {
; CHECK: getelementptr i32, i32* %base-new, i32 15
entry:
%ptr = getelementptr i32, i32* %base, i32 15
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
- %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7)
- %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+ %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
%ret = load i32, i32* %ptr-new
ret i32 %ret
}
@@ -25,10 +25,10 @@ define i32 @test_sor_two_derived(i32* %base) gc "statepoint-example" {
entry:
%ptr = getelementptr i32, i32* %base, i32 15
%ptr2 = getelementptr i32, i32* %base, i32 12
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
- %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7)
- %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
- %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 9)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
+ %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+ %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 9)
%ret = load i32, i32* %ptr-new
ret i32 %ret
}
@@ -38,9 +38,9 @@ define i32 @test_sor_ooo(i32* %base) gc "statepoint-example" {
; CHECK: getelementptr i32, i32* %base-new, i32 15
entry:
%ptr = getelementptr i32, i32* %base, i32 15
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
- %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
- %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+ %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
%ret = load i32, i32* %ptr-new
ret i32 %ret
}
@@ -50,9 +50,9 @@ define i32 @test_sor_gep_smallint([3 x i32]* %base) gc "statepoint-example" {
; CHECK: getelementptr [3 x i32], [3 x i32]* %base-new, i32 0, i32 2
entry:
%ptr = getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 2
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
- %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 7, i32 7)
- %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
+ %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token %tok, i32 7, i32 7)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
%ret = load i32, i32* %ptr-new
ret i32 %ret
}
@@ -62,27 +62,66 @@ define i32 @test_sor_gep_largeint([3 x i32]* %base) gc "statepoint-example" {
; CHECK-NOT: getelementptr [3 x i32], [3 x i32]* %base-new, i32 0, i32 21
entry:
%ptr = getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 21
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
- %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 7, i32 7)
- %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
+ %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token %tok, i32 7, i32 7)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
%ret = load i32, i32* %ptr-new
ret i32 %ret
}
define i32 @test_sor_noop(i32* %base) gc "statepoint-example" {
; CHECK: getelementptr i32, i32* %base, i32 15
-; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
-; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 9)
+; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 9)
entry:
%ptr = getelementptr i32, i32* %base, i32 15
%ptr2 = getelementptr i32, i32* %base, i32 12
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
- %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
- %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 9)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+ %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 9)
%ret = load i32, i32* %ptr-new
ret i32 %ret
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32)
-declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32, i32, i32)
+define i32 @test_sor_basic_wrong_order(i32* %base) gc "statepoint-example" {
+; CHECK-LABEL: @test_sor_basic_wrong_order
+; Here we have base relocate inserted after derived. Make sure that we don't
+; produce uses of the relocated base pointer before it's definition.
+entry:
+ %ptr = getelementptr i32, i32* %base, i32 15
+ ; CHECK: getelementptr i32, i32* %base, i32 15
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+ %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+ ; CHECK: %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+ ; CHECK-NEXT: getelementptr i32, i32* %base-new, i32 15
+ %ret = load i32, i32* %ptr-new
+ ret i32 %ret
+}
+
+define i32 @test_sor_noop_cross_bb(i1 %external-cond, i32* %base) gc "statepoint-example" {
+; CHECK-LABEL: @test_sor_noop_cross_bb
+; Here base relocate doesn't dominate derived relocate. Make sure that we don't
+; produce undefined use of the relocated base pointer.
+entry:
+ %ptr = getelementptr i32, i32* %base, i32 15
+ ; CHECK: getelementptr i32, i32* %base, i32 15
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+ br i1 %external-cond, label %left, label %right
+
+left:
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+ ; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+ %ret-new = load i32, i32* %ptr-new
+ ret i32 %ret-new
+
+right:
+ %ptr-base = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+ ; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+ %ret-base = load i32, i32* %ptr-base
+ ret i32 %ret-base
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.relocate.p0i32(token, i32, i32)
+declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token, i32, i32)
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index 7541418b06ec..e65d8b28fe7d 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -1,17 +1,52 @@
; RUN: opt < %s -constprop -S | FileCheck %s
; RUN: opt < %s -constprop -disable-simplify-libcalls -S | FileCheck %s --check-prefix=FNOBUILTIN
+declare double @acos(double)
+declare double @asin(double)
+declare double @atan(double)
+declare double @atan2(double, double)
+declare double @ceil(double)
declare double @cos(double)
-
+declare double @cosh(double)
+declare double @exp(double)
+declare double @exp2(double)
+declare double @fabs(double)
+declare double @floor(double)
+declare double @fmod(double, double)
+declare double @log(double)
+declare double @log10(double)
+declare double @pow(double, double)
declare double @sin(double)
-
+declare double @sinh(double)
+declare double @sqrt(double)
declare double @tan(double)
+declare double @tanh(double)
-declare double @sqrt(double)
-declare double @exp2(double)
+declare float @acosf(float)
+declare float @asinf(float)
+declare float @atanf(float)
+declare float @atan2f(float, float)
+declare float @ceilf(float)
+declare float @cosf(float)
+declare float @coshf(float)
+declare float @expf(float)
+declare float @exp2f(float)
+declare float @fabsf(float)
+declare float @floorf(float)
+declare float @fmodf(float, float)
+declare float @logf(float)
+declare float @log10f(float)
+declare float @powf(float, float)
+declare float @sinf(float)
+declare float @sinhf(float)
+declare float @sqrtf(float)
+declare float @tanf(float)
+declare float @tanhf(float)
define double @T() {
; CHECK-LABEL: @T(
+; FNOBUILTIN-LABEL: @T(
+
; CHECK-NOT: call
; CHECK: ret
%A = call double @cos(double 0.000000e+00)
@@ -22,6 +57,119 @@ define double @T() {
%D = call double @sqrt(double 4.000000e+00)
%c = fadd double %b, %D
+ %slot = alloca double
+ %slotf = alloca float
+; FNOBUILTIN: call
+ %1 = call double @acos(double 1.000000e+00)
+ store double %1, double* %slot
+; FNOBUILTIN: call
+ %2 = call double @asin(double 1.000000e+00)
+ store double %2, double* %slot
+; FNOBUILTIN: call
+ %3 = call double @atan(double 3.000000e+00)
+ store double %3, double* %slot
+; FNOBUILTIN: call
+ %4 = call double @atan2(double 3.000000e+00, double 4.000000e+00)
+ store double %4, double* %slot
+; FNOBUILTIN: call
+ %5 = call double @ceil(double 3.000000e+00)
+ store double %5, double* %slot
+; FNOBUILTIN: call
+ %6 = call double @cosh(double 3.000000e+00)
+ store double %6, double* %slot
+; FNOBUILTIN: call
+ %7 = call double @exp(double 3.000000e+00)
+ store double %7, double* %slot
+; FNOBUILTIN: call
+ %8 = call double @exp2(double 3.000000e+00)
+ store double %8, double* %slot
+; FNOBUILTIN: call
+ %9 = call double @fabs(double 3.000000e+00)
+ store double %9, double* %slot
+; FNOBUILTIN: call
+ %10 = call double @floor(double 3.000000e+00)
+ store double %10, double* %slot
+; FNOBUILTIN: call
+ %11 = call double @fmod(double 3.000000e+00, double 4.000000e+00)
+ store double %11, double* %slot
+; FNOBUILTIN: call
+ %12 = call double @log(double 3.000000e+00)
+ store double %12, double* %slot
+; FNOBUILTIN: call
+ %13 = call double @log10(double 3.000000e+00)
+ store double %13, double* %slot
+; FNOBUILTIN: call
+ %14 = call double @pow(double 3.000000e+00, double 4.000000e+00)
+ store double %14, double* %slot
+; FNOBUILTIN: call
+ %15 = call double @sinh(double 3.000000e+00)
+ store double %15, double* %slot
+; FNOBUILTIN: call
+ %16 = call double @tanh(double 3.000000e+00)
+ store double %16, double* %slot
+; FNOBUILTIN: call
+ %17 = call float @acosf(float 1.000000e+00)
+ store float %17, float* %slotf
+; FNOBUILTIN: call
+ %18 = call float @asinf(float 1.000000e+00)
+ store float %18, float* %slotf
+; FNOBUILTIN: call
+ %19 = call float @atanf(float 3.000000e+00)
+ store float %19, float* %slotf
+; FNOBUILTIN: call
+ %20 = call float @atan2f(float 3.000000e+00, float 4.000000e+00)
+ store float %20, float* %slotf
+; FNOBUILTIN: call
+ %21 = call float @ceilf(float 3.000000e+00)
+ store float %21, float* %slotf
+; FNOBUILTIN: call
+ %22 = call float @cosf(float 3.000000e+00)
+ store float %22, float* %slotf
+; FNOBUILTIN: call
+ %23 = call float @coshf(float 3.000000e+00)
+ store float %23, float* %slotf
+; FNOBUILTIN: call
+ %24 = call float @expf(float 3.000000e+00)
+ store float %24, float* %slotf
+; FNOBUILTIN: call
+ %25 = call float @exp2f(float 3.000000e+00)
+ store float %25, float* %slotf
+; FNOBUILTIN: call
+ %26 = call float @fabsf(float 3.000000e+00)
+ store float %26, float* %slotf
+; FNOBUILTIN: call
+ %27 = call float @floorf(float 3.000000e+00)
+ store float %27, float* %slotf
+; FNOBUILTIN: call
+ %28 = call float @fmodf(float 3.000000e+00, float 4.000000e+00)
+ store float %28, float* %slotf
+; FNOBUILTIN: call
+ %29 = call float @logf(float 3.000000e+00)
+ store float %29, float* %slotf
+; FNOBUILTIN: call
+ %30 = call float @log10f(float 3.000000e+00)
+ store float %30, float* %slotf
+; FNOBUILTIN: call
+ %31 = call float @powf(float 3.000000e+00, float 4.000000e+00)
+ store float %31, float* %slotf
+; FNOBUILTIN: call
+ %32 = call float @sinf(float 3.000000e+00)
+ store float %32, float* %slotf
+; FNOBUILTIN: call
+ %33 = call float @sinhf(float 3.000000e+00)
+ store float %33, float* %slotf
+; FNOBUILTIN: call
+ %34 = call float @sqrtf(float 3.000000e+00)
+ store float %34, float* %slotf
+; FNOBUILTIN: call
+ %35 = call float @tanf(float 3.000000e+00)
+ store float %35, float* %slotf
+; FNOBUILTIN: call
+ %36 = call float @tanhf(float 3.000000e+00)
+ store float %36, float* %slotf
+
+; FNOBUILTIN: ret
+
; PR9315
%E = call double @exp2(double 4.0)
%d = fadd double %c, %E
@@ -65,85 +213,9 @@ define double @test_intrinsic_pow() nounwind uwtable ssp {
entry:
; CHECK-LABEL: @test_intrinsic_pow(
; CHECK-NOT: call
+; CHECK: ret
%0 = call double @llvm.pow.f64(double 1.500000e+00, double 3.000000e+00)
ret double %0
}
-declare double @llvm.pow.f64(double, double) nounwind readonly
-
-; Shouldn't fold because of -fno-builtin
-define double @sin_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @sin_(
-; FNOBUILTIN: %1 = call double @sin(double 3.000000e+00)
- %1 = call double @sin(double 3.000000e+00)
- ret double %1
-}
-
-; Shouldn't fold because of -fno-builtin
-define double @sqrt_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @sqrt_(
-; FNOBUILTIN: %1 = call double @sqrt(double 3.000000e+00)
- %1 = call double @sqrt(double 3.000000e+00)
- ret double %1
-}
-; Shouldn't fold because of -fno-builtin
-define float @sqrtf_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @sqrtf_(
-; FNOBUILTIN: %1 = call float @sqrtf(float 3.000000e+00)
- %1 = call float @sqrtf(float 3.000000e+00)
- ret float %1
-}
-declare float @sqrtf(float)
-
-; Shouldn't fold because of -fno-builtin
-define float @sinf_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @sinf_(
-; FNOBUILTIN: %1 = call float @sinf(float 3.000000e+00)
- %1 = call float @sinf(float 3.000000e+00)
- ret float %1
-}
-declare float @sinf(float)
-
-; Shouldn't fold because of -fno-builtin
-define double @tan_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @tan_(
-; FNOBUILTIN: %1 = call double @tan(double 3.000000e+00)
- %1 = call double @tan(double 3.000000e+00)
- ret double %1
-}
-
-; Shouldn't fold because of -fno-builtin
-define double @tanh_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @tanh_(
-; FNOBUILTIN: %1 = call double @tanh(double 3.000000e+00)
- %1 = call double @tanh(double 3.000000e+00)
- ret double %1
-}
-declare double @tanh(double)
-
-; Shouldn't fold because of -fno-builtin
-define double @pow_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @pow_(
-; FNOBUILTIN: %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
- %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
- ret double %1
-}
-declare double @pow(double, double)
-
-; Shouldn't fold because of -fno-builtin
-define double @fmod_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @fmod_(
-; FNOBUILTIN: %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
- %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
- ret double %1
-}
-declare double @fmod(double, double)
-
-; Shouldn't fold because of -fno-builtin
-define double @atan2_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @atan2_(
-; FNOBUILTIN: %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
- %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
- ret double %1
-}
-declare double @atan2(double, double)
+declare double @llvm.pow.f64(double, double) nounwind readonly
diff --git a/test/Transforms/ConstProp/insertvalue.ll b/test/Transforms/ConstProp/insertvalue.ll
index dce2b728b93b..606f7ddc679c 100644
--- a/test/Transforms/ConstProp/insertvalue.ll
+++ b/test/Transforms/ConstProp/insertvalue.ll
@@ -74,3 +74,13 @@ define i32 @test-float-Nan() {
; CHECK: @test-float-Nan
; CHECK: ret i32 2139171423
}
+
+define i16 @test-half-Nan() {
+ %A = bitcast i16 32256 to half
+ %B = insertvalue [1 x half] undef, half %A, 0
+ %C = extractvalue [1 x half] %B, 0
+ %D = bitcast half %C to i16
+ ret i16 %D
+; CHECK: @test-half-Nan
+; CHECK: ret i16 32256
+}
diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll
index 5426ad0f8adb..89387ad06ba8 100644
--- a/test/Transforms/ConstProp/loads.ll
+++ b/test/Transforms/ConstProp/loads.ll
@@ -40,13 +40,16 @@ define i16 @test2_addrspacecast() {
%r = load i16, i16 addrspace(1)* addrspacecast(i32* getelementptr ({{i32,i8},i32}, {{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16 addrspace(1)*)
ret i16 %r
+; FIXME: Should be able to load through a constant addrspacecast.
; 0xBEEF
; LE-LABEL: @test2_addrspacecast(
-; LE: ret i16 -16657
+; XLE: ret i16 -16657
+; LE: load i16, i16 addrspace(1)* addrspacecast
; 0xDEAD
; BE-LABEL: @test2_addrspacecast(
-; BE: ret i16 -8531
+; XBE: ret i16 -8531
+; BE: load i16, i16 addrspace(1)* addrspacecast
}
; Load of second 16 bits of 32-bit value.
diff --git a/test/Transforms/ConstantMerge/merge-both.ll b/test/Transforms/ConstantMerge/merge-both.ll
index 11b0621d42d7..514c789b4701 100644
--- a/test/Transforms/ConstantMerge/merge-both.ll
+++ b/test/Transforms/ConstantMerge/merge-both.ll
@@ -25,7 +25,7 @@ declare void @helper([16 x i8]*)
; CHECK-NEXT: @var7 = internal constant [16 x i8] c"foo1bar2foo3bar\00"
; CHECK-NEXT: @var8 = private constant [16 x i8] c"foo1bar2foo3bar\00", align 16
-@var4a = alias %struct.foobar* @var4
+@var4a = alias %struct.foobar, %struct.foobar* @var4
@llvm.used = appending global [1 x %struct.foobar*] [%struct.foobar* @var4a], section "llvm.metadata"
define i32 @main() {
diff --git a/test/Transforms/CorrelatedValuePropagation/non-null.ll b/test/Transforms/CorrelatedValuePropagation/non-null.ll
index 6bb8bb07c45f..6fb4cb6e3582 100644
--- a/test/Transforms/CorrelatedValuePropagation/non-null.ll
+++ b/test/Transforms/CorrelatedValuePropagation/non-null.ll
@@ -101,3 +101,63 @@ bb:
; CHECK: KEEP2
ret void
}
+
+declare void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+define void @test10(i8* %arg1, i8* %arg2, i32 %non-pointer-arg) {
+; CHECK-LABEL: @test10
+entry:
+ %is_null = icmp eq i8* %arg1, null
+ br i1 %is_null, label %null, label %non_null
+
+non_null:
+ call void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+ ; CHECK: call void @test10_helper(i8* nonnull %arg1, i8* %arg2, i32 %non-pointer-arg)
+ br label %null
+
+null:
+ call void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+ ; CHECK: call void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+ ret void
+}
+
+declare void @test11_helper(i8* %arg)
+define void @test11(i8* %arg1, i8** %arg2) {
+; CHECK-LABEL: @test11
+entry:
+ %is_null = icmp eq i8* %arg1, null
+ br i1 %is_null, label %null, label %non_null
+
+non_null:
+ br label %merge
+
+null:
+ %another_arg = alloca i8
+ br label %merge
+
+merge:
+ %merged_arg = phi i8* [%another_arg, %null], [%arg1, %non_null]
+ call void @test11_helper(i8* %merged_arg)
+ ; CHECK: call void @test11_helper(i8* nonnull %merged_arg)
+ ret void
+}
+
+declare void @test12_helper(i8* %arg)
+define void @test12(i8* %arg1, i8** %arg2) {
+; CHECK-LABEL: @test12
+entry:
+ %is_null = icmp eq i8* %arg1, null
+ br i1 %is_null, label %null, label %non_null
+
+non_null:
+ br label %merge
+
+null:
+ %another_arg = load i8*, i8** %arg2, !nonnull !{}
+ br label %merge
+
+merge:
+ %merged_arg = phi i8* [%another_arg, %null], [%arg1, %non_null]
+ call void @test12_helper(i8* %merged_arg)
+ ; CHECK: call void @test12_helper(i8* nonnull %merged_arg)
+ ret void
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/range.ll b/test/Transforms/CorrelatedValuePropagation/range.ll
index e40c63919078..884cc8bdc125 100644
--- a/test/Transforms/CorrelatedValuePropagation/range.ll
+++ b/test/Transforms/CorrelatedValuePropagation/range.ll
@@ -165,3 +165,27 @@ sw.default:
%or2 = or i1 %cmp7, %cmp8
ret i1 false
}
+
+define i1 @test8(i64* %p) {
+; CHECK-LABEL: @test8
+; CHECK: ret i1 false
+ %a = load i64, i64* %p, !range !{i64 4, i64 255}
+ %res = icmp eq i64 %a, 0
+ ret i1 %res
+}
+
+define i1 @test9(i64* %p) {
+; CHECK-LABEL: @test9
+; CHECK: ret i1 true
+ %a = load i64, i64* %p, !range !{i64 0, i64 1}
+ %res = icmp eq i64 %a, 0
+ ret i1 %res
+}
+
+define i1 @test10(i64* %p) {
+; CHECK-LABEL: @test10
+; CHECK: ret i1 false
+ %a = load i64, i64* %p, !range !{i64 4, i64 8, i64 15, i64 20}
+ %res = icmp eq i64 %a, 0
+ ret i1 %res
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/select.ll b/test/Transforms/CorrelatedValuePropagation/select.ll
index d88e3e462a20..be44bdcd921d 100644
--- a/test/Transforms/CorrelatedValuePropagation/select.ll
+++ b/test/Transforms/CorrelatedValuePropagation/select.ll
@@ -71,5 +71,5 @@ for.body:
if.end:
ret i32 %sel
-; CHECK: ret i32 %[[sel]]
+; CHECK: ret i32 1
}
diff --git a/test/Transforms/CrossDSOCFI/basic.ll b/test/Transforms/CrossDSOCFI/basic.ll
new file mode 100644
index 000000000000..49b3e8f23ccf
--- /dev/null
+++ b/test/Transforms/CrossDSOCFI/basic.ll
@@ -0,0 +1,88 @@
+; RUN: opt -S -cross-dso-cfi < %s | FileCheck %s
+
+; CHECK: define void @__cfi_check(i64 %[[TYPE:.*]], i8* %[[ADDR:.*]]) align 4096
+; CHECK: switch i64 %[[TYPE]], label %[[TRAP:.*]] [
+; CHECK-NEXT: i64 111, label %[[L1:.*]]
+; CHECK-NEXT: i64 222, label %[[L2:.*]]
+; CHECK-NEXT: i64 333, label %[[L3:.*]]
+; CHECK-NEXT: i64 444, label %[[L4:.*]]
+; CHECK-NEXT: {{]$}}
+
+; CHECK: [[TRAP]]:
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-MEXT: unreachable
+
+; CHECK: [[EXIT:.*]]:
+; CHECK-NEXT: ret void
+
+; CHECK: [[L1]]:
+; CHECK-NEXT: call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 111)
+; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[TRAP]]
+
+; CHECK: [[L2]]:
+; CHECK-NEXT: call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 222)
+; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[TRAP]]
+
+; CHECK: [[L3]]:
+; CHECK-NEXT: call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 333)
+; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[TRAP]]
+
+; CHECK: [[L4]]:
+; CHECK-NEXT: call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 444)
+; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[TRAP]]
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@_ZTV1A = constant i8 0
+@_ZTI1A = constant i8 0
+@_ZTS1A = constant i8 0
+@_ZTV1B = constant i8 0
+@_ZTI1B = constant i8 0
+@_ZTS1B = constant i8 0
+
+define signext i8 @f11() {
+entry:
+ ret i8 1
+}
+
+define signext i8 @f12() {
+entry:
+ ret i8 2
+}
+
+define signext i8 @f13() {
+entry:
+ ret i8 3
+}
+
+define i32 @f21() {
+entry:
+ ret i32 4
+}
+
+define i32 @f22() {
+entry:
+ ret i32 5
+}
+
+!llvm.bitsets = !{!0, !1, !2, !3, !4, !7, !8, !9, !10, !11, !12, !13, !14, !15}
+!llvm.module.flags = !{!17}
+
+!0 = !{!"_ZTSFcvE", i8 ()* @f11, i64 0}
+!1 = !{i64 111, i8 ()* @f11, i64 0}
+!2 = !{!"_ZTSFcvE", i8 ()* @f12, i64 0}
+!3 = !{i64 111, i8 ()* @f12, i64 0}
+!4 = !{!"_ZTSFcvE", i8 ()* @f13, i64 0}
+!5 = !{i64 111, i8 ()* @f13, i64 0}
+!6 = !{!"_ZTSFivE", i32 ()* @f21, i64 0}
+!7 = !{i64 222, i32 ()* @f21, i64 0}
+!8 = !{!"_ZTSFivE", i32 ()* @f22, i64 0}
+!9 = !{i64 222, i32 ()* @f22, i64 0}
+!10 = !{!"_ZTS1A", i8* @_ZTV1A, i64 16}
+!11 = !{i64 333, i8* @_ZTV1A, i64 16}
+!12 = !{!"_ZTS1A", i8* @_ZTV1B, i64 16}
+!13 = !{i64 333, i8* @_ZTV1B, i64 16}
+!14 = !{!"_ZTS1B", i8* @_ZTV1B, i64 16}
+!15 = !{i64 444, i8* @_ZTV1B, i64 16}
+!17= !{i32 4, !"Cross-DSO CFI", i32 1}
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index 5ff05f0d6858..10578761cd73 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -45,30 +45,30 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!30}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "name", line: 8, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "vfs_addname", linkageName: "vfs_addname", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !4)
+!0 = !DILocalVariable(name: "name", line: 8, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "vfs_addname", linkageName: "vfs_addname", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !4)
!2 = !DIFile(filename: "tail.c", directory: "/Users/echeng/LLVM/radars/r7927803/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29)
!4 = !DISubroutineType(types: !5)
!5 = !{!6, !6, !9, !9, !9}
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !28, scope: !2, baseType: !7)
!7 = !DIDerivedType(tag: DW_TAG_const_type, size: 8, align: 8, file: !28, scope: !2, baseType: !8)
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "len", line: 9, arg: 0, scope: !1, file: !2, type: !9)
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "hash", line: 10, arg: 0, scope: !1, file: !2, type: !9)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "flags", line: 11, arg: 0, scope: !1, file: !2, type: !9)
+!10 = !DILocalVariable(name: "len", line: 9, arg: 2, scope: !1, file: !2, type: !9)
+!11 = !DILocalVariable(name: "hash", line: 10, arg: 3, scope: !1, file: !2, type: !9)
+!12 = !DILocalVariable(name: "flags", line: 11, arg: 4, scope: !1, file: !2, type: !9)
!13 = !DILocation(line: 13, scope: !14)
!14 = distinct !DILexicalBlock(line: 12, column: 0, file: !28, scope: !1)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "name", line: 17, arg: 0, scope: !16, file: !2, type: !6)
-!16 = !DISubprogram(name: "add_name_internal", linkageName: "add_name_internal", line: 22, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !17)
+!15 = !DILocalVariable(name: "name", line: 17, arg: 1, scope: !16, file: !2, type: !6)
+!16 = distinct !DISubprogram(name: "add_name_internal", linkageName: "add_name_internal", line: 22, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !17)
!17 = !DISubroutineType(types: !18)
!18 = !{!6, !6, !9, !9, !19, !9}
!19 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "len", line: 18, arg: 0, scope: !16, file: !2, type: !9)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "hash", line: 19, arg: 0, scope: !16, file: !2, type: !9)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "extra", line: 20, arg: 0, scope: !16, file: !2, type: !19)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "flags", line: 21, arg: 0, scope: !16, file: !2, type: !9)
+!20 = !DILocalVariable(name: "len", line: 18, arg: 2, scope: !16, file: !2, type: !9)
+!21 = !DILocalVariable(name: "hash", line: 19, arg: 3, scope: !16, file: !2, type: !9)
+!22 = !DILocalVariable(name: "extra", line: 20, arg: 4, scope: !16, file: !2, type: !19)
+!23 = !DILocalVariable(name: "flags", line: 21, arg: 5, scope: !16, file: !2, type: !9)
!24 = !DILocation(line: 23, scope: !25)
!25 = distinct !DILexicalBlock(line: 22, column: 0, file: !28, scope: !16)
!26 = !DILocation(line: 24, scope: !25)
diff --git a/test/Transforms/DeadArgElim/aggregates.ll b/test/Transforms/DeadArgElim/aggregates.ll
index 68d253425587..2eca76a4a4e3 100644
--- a/test/Transforms/DeadArgElim/aggregates.ll
+++ b/test/Transforms/DeadArgElim/aggregates.ll
@@ -159,4 +159,28 @@ define internal i8 @outer() {
%val = load i8, i8* %resptr
ret i8 %val
-} \ No newline at end of file
+}
+
+define internal { i32 } @agg_ret() {
+entry:
+ unreachable
+}
+
+; CHECK-LABEL: define void @PR24906
+; CHECK: %[[invoke:.*]] = invoke i32 @agg_ret()
+; CHECK: %[[oldret:.*]] = insertvalue { i32 } undef, i32 %[[invoke]], 0
+; CHECK: phi { i32 } [ %[[oldret]],
+define void @PR24906() personality i32 (i32)* undef {
+entry:
+ %tmp2 = invoke { i32 } @agg_ret()
+ to label %bb3 unwind label %bb4
+
+bb3:
+ %tmp3 = phi { i32 } [ %tmp2, %entry ]
+ unreachable
+
+bb4:
+ %tmp4 = landingpad { i8*, i32 }
+ cleanup
+ unreachable
+}
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index efafd9bbc9e2..a19d4b1fd1ab 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -14,20 +14,21 @@
; the function->debug info mapping on update to ensure it's accurate when used
; again for the next removal.
-; CHECK: !DISubprogram(name: "f1",{{.*}} function: void ()* @_ZL2f1iz
+; CHECK: define internal void @_ZL2f1iz({{.*}} !dbg [[SP:![0-9]+]]
+; CHECK: [[SP]] = distinct !DISubprogram(name: "f1"
; Check that debug info metadata for subprograms stores pointers to
; updated LLVM functions.
; Function Attrs: uwtable
-define void @_Z2f2v() #0 {
+define void @_Z2f2v() #0 !dbg !4 {
entry:
call void (i32, ...) @_ZL2f1iz(i32 1), !dbg !15
ret void, !dbg !16
}
; Function Attrs: nounwind uwtable
-define internal void @_ZL2f1iz(i32, ...) #1 {
+define internal void @_ZL2f1iz(i32, ...) #1 !dbg !8 {
entry:
call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !17, metadata !18), !dbg !19
ret void, !dbg !20
@@ -47,15 +48,15 @@ attributes #2 = { nounwind readnone }
!llvm.module.flags = !{!12, !13}
!llvm.ident = !{!14}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "dbg.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4, !8}
-!4 = !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: void ()* @_Z2f2v, variables: !2)
+!4 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "dbg.cpp", directory: "/tmp/dbginfo")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
-!8 = !DISubprogram(name: "f1", linkageName: "_ZL2f1iz", line: 1, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !9, function: void (i32, ...)* @_ZL2f1iz, variables: !2)
+!8 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1iz", line: 1, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !9, variables: !2)
!9 = !DISubroutineType(types: !10)
!10 = !{null, !11, null}
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
@@ -64,7 +65,7 @@ attributes #2 = { nounwind readnone }
!14 = !{!"clang version 3.6.0 "}
!15 = !DILocation(line: 5, column: 3, scope: !4)
!16 = !DILocation(line: 6, column: 1, scope: !4)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 1, arg: 1, scope: !8, file: !5, type: !11)
+!17 = !DILocalVariable(name: "", line: 1, arg: 1, scope: !8, file: !5, type: !11)
!18 = !DIExpression()
!19 = !DILocation(line: 1, column: 19, scope: !8)
!20 = !DILocation(line: 2, column: 1, scope: !8)
diff --git a/test/Transforms/DeadArgElim/naked_functions.ll b/test/Transforms/DeadArgElim/naked_functions.ll
new file mode 100644
index 000000000000..b7955a10127d
--- /dev/null
+++ b/test/Transforms/DeadArgElim/naked_functions.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -deadargelim %s | FileCheck %s
+
+; Don't eliminate dead arugments from naked functions.
+; CHECK: define internal i32 @naked(i32 %x)
+
+define internal i32 @naked(i32 %x) #0 {
+ tail call void asm sideeffect inteldialect "mov eax, [esp + $$4]\0A\09ret", "~{eax},~{dirflag},~{fpsr},~{flags}"()
+ unreachable
+}
+
+
+; Don't eliminate dead varargs from naked functions.
+; CHECK: define internal i32 @naked_va(i32 %x, ...)
+
+define internal i32 @naked_va(i32 %x, ...) #0 {
+ tail call void asm sideeffect inteldialect "mov eax, [esp + $$8]\0A\09ret", "~{eax},~{dirflag},~{fpsr},~{flags}"()
+ unreachable
+}
+
+define i32 @f(i32 %x, i32 %y) {
+ %r = call i32 @naked(i32 %x)
+ %s = call i32 (i32, ...) @naked_va(i32 %x, i32 %r)
+
+; Make sure the arguments are still there: not removed or replaced with undef.
+; CHECK: %r = call i32 @naked(i32 %x)
+; CHECK: %s = call i32 (i32, ...) @naked_va(i32 %x, i32 %r)
+
+ ret i32 %s
+}
+
+attributes #0 = { naked }
diff --git a/test/Transforms/DeadArgElim/operandbundle.ll b/test/Transforms/DeadArgElim/operandbundle.ll
new file mode 100644
index 000000000000..aa112b1c0501
--- /dev/null
+++ b/test/Transforms/DeadArgElim/operandbundle.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+define internal void @f(i32 %arg) {
+entry:
+ call void @g() [ "foo"(i32 %arg) ]
+ ret void
+}
+
+; CHECK-LABEL: define internal void @f(
+; CHECK: call void @g() [ "foo"(i32 %arg) ]
+
+declare void @g()
diff --git a/test/Transforms/DeadStoreElimination/calloc-store.ll b/test/Transforms/DeadStoreElimination/calloc-store.ll
new file mode 100644
index 000000000000..daba61332065
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/calloc-store.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+declare noalias i8* @calloc(i64, i64)
+
+define i32* @test1() {
+; CHECK-LABEL: test1
+ %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+ %2 = bitcast i8* %1 to i32*
+ ; This store is dead and should be removed
+ store i32 0, i32* %2, align 4
+; CHECK-NOT: store i32 0, i32* %2, align 4
+ ret i32* %2
+}
+
+define i32* @test2() {
+; CHECK-LABEL: test2
+ %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+ %2 = bitcast i8* %1 to i32*
+ %3 = getelementptr i32, i32* %2, i32 5
+ store i32 0, i32* %3, align 4
+; CHECK-NOT: store i32 0, i32* %2, align 4
+ ret i32* %2
+}
+
+define i32* @test3(i32 *%arg) {
+; CHECK-LABEL: test3
+ store i32 0, i32* %arg, align 4
+; CHECK: store i32 0, i32* %arg, align 4
+ ret i32* %arg
+}
+
+declare void @clobber_memory(i8*)
+define i8* @test4() {
+; CHECK-LABEL: test4
+ %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+ call void @clobber_memory(i8* %1)
+ store i8 0, i8* %1, align 4
+; CHECK: store i8 0, i8* %1, align 4
+ ret i8* %1
+}
+
+define i32* @test5() {
+; CHECK-LABEL: test5
+ %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+ %2 = bitcast i8* %1 to i32*
+ store volatile i32 0, i32* %2, align 4
+; CHECK: store volatile i32 0, i32* %2, align 4
+ ret i32* %2
+}
+
+define i8* @test6() {
+; CHECK-LABEL: test6
+ %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+ store i8 5, i8* %1, align 4
+; CHECK: store i8 5, i8* %1, align 4
+ ret i8* %1
+}
+
+define i8* @test7(i8 %arg) {
+; CHECK-LABEL: test7
+ %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+ store i8 %arg, i8* %1, align 4
+; CHECK: store i8 %arg, i8* %1, align 4
+ ret i8* %1
+}
diff --git a/test/Transforms/DeadStoreElimination/inst-limits.ll b/test/Transforms/DeadStoreElimination/inst-limits.ll
index 54e41c8b413b..5848ab89bc88 100644
--- a/test/Transforms/DeadStoreElimination/inst-limits.ll
+++ b/test/Transforms/DeadStoreElimination/inst-limits.ll
@@ -9,7 +9,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@x = global i32 0, align 4
; Function Attrs: nounwind
-define i32 @test_within_limit() {
+define i32 @test_within_limit() !dbg !4 {
entry:
; The first store; later there is a second store to the same location,
; so this store should be optimized away by DSE.
@@ -245,17 +245,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!11, !13}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "/home/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "test_within_limit", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: i32 ()* @test_within_limit, variables: !2)
+!4 = distinct !DISubprogram(name: "test_within_limit", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "test.c", directory: "/home/tmp")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", scope: !4, type: !8)
+!10 = !DILocalVariable(name: "x", scope: !4, type: !8)
!11 = !{i32 2, !"Dwarf Version", i32 4}
!12 = !{i32* undef}
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index 2ffe0539098e..4f6221db2454 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -350,3 +350,150 @@ define i8* @test25(i8* %p) nounwind {
store i8 %tmp, i8* %p.4, align 1
ret i8* %q
}
+
+; Remove redundant store if loaded value is in another block.
+; CHECK-LABEL: @test26(
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test26(i1 %c, i32* %p) {
+entry:
+ %v = load i32, i32* %p, align 4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ store i32 %v, i32* %p, align 4
+ br label %bb3
+bb3:
+ ret i32 0
+}
+
+; Remove redundant store if loaded value is in another block.
+; CHECK-LABEL: @test27(
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test27(i1 %c, i32* %p) {
+entry:
+ %v = load i32, i32* %p, align 4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ br label %bb3
+bb3:
+ store i32 %v, i32* %p, align 4
+ ret i32 0
+}
+
+; Don't remove redundant store because of may-aliased store.
+; CHECK-LABEL: @test28(
+; CHECK: bb3:
+; CHECK-NEXT: store i32 %v
+define i32 @test28(i1 %c, i32* %p, i32* %p2, i32 %i) {
+entry:
+ %v = load i32, i32* %p, align 4
+
+ ; Might overwrite value at %p
+ store i32 %i, i32* %p2, align 4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ br label %bb3
+bb3:
+ store i32 %v, i32* %p, align 4
+ ret i32 0
+}
+
+; Don't remove redundant store because of may-aliased store.
+; CHECK-LABEL: @test29(
+; CHECK: bb3:
+; CHECK-NEXT: store i32 %v
+define i32 @test29(i1 %c, i32* %p, i32* %p2, i32 %i) {
+entry:
+ %v = load i32, i32* %p, align 4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ ; Might overwrite value at %p
+ store i32 %i, i32* %p2, align 4
+ br label %bb3
+bb3:
+ store i32 %v, i32* %p, align 4
+ ret i32 0
+}
+
+declare void @unknown_func()
+
+; Don't remove redundant store because of unknown call.
+; CHECK-LABEL: @test30(
+; CHECK: bb3:
+; CHECK-NEXT: store i32 %v
+define i32 @test30(i1 %c, i32* %p, i32 %i) {
+entry:
+ %v = load i32, i32* %p, align 4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ ; Might overwrite value at %p
+ call void @unknown_func()
+ br label %bb3
+bb3:
+ store i32 %v, i32* %p, align 4
+ ret i32 0
+}
+
+; Remove redundant store if loaded value is in another block inside a loop.
+; CHECK-LABEL: @test31(
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test31(i1 %c, i32* %p, i32 %i) {
+entry:
+ %v = load i32, i32* %p, align 4
+ br label %bb1
+bb1:
+ store i32 %v, i32* %p, align 4
+ br i1 undef, label %bb1, label %bb2
+bb2:
+ ret i32 0
+}
+
+; Don't remove redundant store in a loop with a may-alias store.
+; CHECK-LABEL: @test32(
+; CHECK: bb1:
+; CHECK-NEXT: store i32 %v
+; CHECK-NEXT: call void @unknown_func
+define i32 @test32(i1 %c, i32* %p, i32 %i) {
+entry:
+ %v = load i32, i32* %p, align 4
+ br label %bb1
+bb1:
+ store i32 %v, i32* %p, align 4
+ ; Might read and overwrite value at %p
+ call void @unknown_func()
+ br i1 undef, label %bb1, label %bb2
+bb2:
+ ret i32 0
+}
+
+; Remove redundant store, which is in the lame loop as the load.
+; CHECK-LABEL: @test33(
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test33(i1 %c, i32* %p, i32 %i) {
+entry:
+ br label %bb1
+bb1:
+ %v = load i32, i32* %p, align 4
+ br label %bb2
+bb2:
+ store i32 %v, i32* %p, align 4
+ ; Might read and overwrite value at %p, but doesn't matter.
+ call void @unknown_func()
+ br i1 undef, label %bb1, label %bb3
+bb3:
+ ret i32 0
+}
+
diff --git a/test/Transforms/EarlyCSE/AArch64/ldstN.ll b/test/Transforms/EarlyCSE/AArch64/ldstN.ll
new file mode 100644
index 000000000000..cc1af31429e1
--- /dev/null
+++ b/test/Transforms/EarlyCSE/AArch64/ldstN.ll
@@ -0,0 +1,18 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>*)
+
+; Although the store and the ld4 are using the same pointer, the
+; data can not be reused because ld4 accesses multiple elements.
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @foo() {
+entry:
+ store <4 x i16> undef, <4 x i16>* undef, align 8
+ %0 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* undef)
+ ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %0
+; CHECK-LABEL: @foo(
+; CHECK: store
+; CHECK-NEXT: call
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/EarlyCSE/atomics.ll b/test/Transforms/EarlyCSE/atomics.ll
new file mode 100644
index 000000000000..21c19cd8e880
--- /dev/null
+++ b/test/Transforms/EarlyCSE/atomics.ll
@@ -0,0 +1,259 @@
+; RUN: opt < %s -S -early-cse | FileCheck %s
+
+; CHECK-LABEL: @test12(
+define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
+ %load0 = load i32, i32* %P1
+ %1 = load atomic i32, i32* %P2 seq_cst, align 4
+ %load1 = load i32, i32* %P1
+ %sel = select i1 %B, i32 %load0, i32 %load1
+ ret i32 %sel
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load i32, i32* %P1
+}
+
+; CHECK-LABEL: @test13(
+; atomic to non-atomic forwarding is legal
+define i32 @test13(i1 %B, i32* %P1) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load i32, i32* %P1
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: ret i32 0
+}
+
+; CHECK-LABEL: @test14(
+; atomic to unordered atomic forwarding is legal
+define i32 @test14(i1 %B, i32* %P1) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1 seq_cst
+ ; CHECK-NEXT: ret i32 0
+}
+
+; CHECK-LABEL: @test15(
+; implementation restriction: can't forward to stonger
+; than unordered
+define i32 @test15(i1 %B, i32* %P1, i32* %P2) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load atomic i32, i32* %P1 seq_cst, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; CHECK-LABEL: @test16(
+; forwarding non-atomic to atomic is wrong! (However,
+; it would be legal to use the later value in place of the
+; former in this particular example. We just don't
+; do that right now.)
+define i32 @test16(i1 %B, i32* %P1, i32* %P2) {
+ %a = load i32, i32* %P1, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst_store(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst_store
+; CHECK: store
+; CHECK: store atomic
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ store atomic i32 0, i32* %P2 seq_cst, align 4
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst
+; CHECK: store
+; CHECK: fence seq_cst
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ fence seq_cst
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_sideeffect(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_sideeffect
+; CHECK: store
+; CHECK: call void asm sideeffect
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ call void asm sideeffect "", ""()
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_memory(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_memory
+; CHECK: store
+; CHECK: call void asm
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ call void asm "", "~{memory}"()
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't remove a volatile load
+define i32 @volatile_load(i1 %B, i32* %P1, i32* %P2) {
+ %a = load i32, i32* %P1, align 4
+ %b = load volatile i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @volatile_load
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load volatile i32, i32* %P1
+}
+
+; Can't remove redundant volatile loads
+define i32 @redundant_volatile_load(i1 %B, i32* %P1, i32* %P2) {
+ %a = load volatile i32, i32* %P1, align 4
+ %b = load volatile i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @redundant_volatile_load
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: sub
+}
+
+; Can't DSE a volatile store
+define void @volatile_store(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @volatile_store
+; CHECK: store volatile
+; CHECK: store
+ store volatile i32 0, i32* %P1, align 4
+ store i32 3, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE a redundant volatile store
+define void @redundant_volatile_store(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @redundant_volatile_store
+; CHECK: store volatile
+; CHECK: store volatile
+ store volatile i32 0, i32* %P1, align 4
+ store volatile i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can value forward from volatiles
+define i32 @test20(i1 %B, i32* %P1, i32* %P2) {
+ %a = load volatile i32, i32* %P1, align 4
+ %b = load i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @test20
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: ret i32 0
+}
+
+; Can DSE a non-volatile store in favor of a volatile one
+; currently a missed optimization
+define void @test21(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test21
+; CHECK: store
+; CHECK: store volatile
+ store i32 0, i32* %P1, align 4
+ store volatile i32 3, i32* %P1, align 4
+ ret void
+}
+
+; Can DSE a normal store in favor of a unordered one
+define void @test22(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test22
+; CHECK-NEXT: store atomic
+ store i32 0, i32* %P1, align 4
+ store atomic i32 3, i32* %P1 unordered, align 4
+ ret void
+}
+
+; Can also DSE a unordered store in favor of a normal one
+define void @test23(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test23
+; CHECK-NEXT: store i32 0
+ store atomic i32 3, i32* %P1 unordered, align 4
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; As an implementation limitation, can't remove ordered stores
+; Note that we could remove the earlier store if we could
+; represent the required ordering.
+define void @test24(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test24
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: store i32 0
+ store atomic i32 3, i32* %P1 release, align 4
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't remove volatile stores - each is independently observable and
+; the count of such stores is an observable program side effect.
+define void @test25(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test25
+; CHECK-NEXT: store volatile
+; CHECK-NEXT: store volatile
+ store volatile i32 3, i32* %P1, align 4
+ store volatile i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can DSE a unordered store in favor of a unordered one
+define void @test26(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test26
+; CHECK-NEXT: store atomic i32 3, i32* %P1 unordered, align 4
+; CHECK-NEXT: ret
+ store atomic i32 0, i32* %P1 unordered, align 4
+ store atomic i32 3, i32* %P1 unordered, align 4
+ ret void
+}
+
+; Can DSE a unordered store in favor of a ordered one,
+; but current don't due to implementation limits
+define void @test27(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test27
+; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4
+; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4
+; CHECK-NEXT: ret
+ store atomic i32 0, i32* %P1 unordered, align 4
+ store atomic i32 3, i32* %P1 release, align 4
+ ret void
+}
+
+; Can DSE an unordered atomic store in favor of an
+; ordered one, but current don't due to implementation limits
+define void @test28(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test28
+; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4
+; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4
+; CHECK-NEXT: ret
+ store atomic i32 0, i32* %P1 unordered, align 4
+ store atomic i32 3, i32* %P1 release, align 4
+ ret void
+}
+
+; As an implementation limitation, can't remove ordered stores
+; see also: @test24
+define void @test29(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test29
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: store atomic
+ store atomic i32 3, i32* %P1 release, align 4
+ store atomic i32 0, i32* %P1 unordered, align 4
+ ret void
+}
diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll
index 43b5e6098f6a..8c9b74b4d0e1 100644
--- a/test/Transforms/EarlyCSE/basic.ll
+++ b/test/Transforms/EarlyCSE/basic.ll
@@ -203,3 +203,77 @@ define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
; CHECK: load i32, i32* %P1
; CHECK: load i32, i32* %P1
}
+
+define void @dse1(i32 *%P) {
+; CHECK-LABEL: @dse1
+; CHECK-NOT: store
+ %v = load i32, i32* %P
+ store i32 %v, i32* %P
+ ret void
+}
+
+define void @dse2(i32 *%P) {
+; CHECK-LABEL: @dse2
+; CHECK-NOT: store
+ %v = load atomic i32, i32* %P seq_cst, align 4
+ store i32 %v, i32* %P
+ ret void
+}
+
+define void @dse3(i32 *%P) {
+; CHECK-LABEL: @dse3
+; CHECK-NOT: store
+ %v = load atomic i32, i32* %P seq_cst, align 4
+ store atomic i32 %v, i32* %P unordered, align 4
+ ret void
+}
+
+define i32 @dse4(i32 *%P, i32 *%Q) {
+; CHECK-LABEL: @dse4
+; CHECK-NOT: store
+; CHECK: ret i32 0
+ %a = load i32, i32* %Q
+ %v = load atomic i32, i32* %P unordered, align 4
+ store atomic i32 %v, i32* %P unordered, align 4
+ %b = load i32, i32* %Q
+ %res = sub i32 %a, %b
+ ret i32 %res
+}
+
+; Note that in this example, %P and %Q could in fact be the same
+; pointer. %v could be different than the value observed for %a
+; and that's okay because we're using relaxed memory ordering.
+; The only guarantee we have to provide is that each of the loads
+; has to observe some value written to that location. We do
+; not have to respect the order in which those writes were done.
+define i32 @dse5(i32 *%P, i32 *%Q) {
+; CHECK-LABEL: @dse5
+; CHECK-NOT: store
+; CHECK: ret i32 0
+ %v = load atomic i32, i32* %P unordered, align 4
+ %a = load atomic i32, i32* %Q unordered, align 4
+ store atomic i32 %v, i32* %P unordered, align 4
+ %b = load atomic i32, i32* %Q unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+}
+
+
+define void @dse_neg1(i32 *%P) {
+; CHECK-LABEL: @dse_neg1
+; CHECK: store
+ %v = load i32, i32* %P
+ store i32 5, i32* %P
+ ret void
+}
+
+; Could remove the store, but only if ordering was somehow
+; encoded.
+define void @dse_neg2(i32 *%P) {
+; CHECK-LABEL: @dse_neg2
+; CHECK: store
+ %v = load i32, i32* %P
+ store atomic i32 %v, i32* %P seq_cst, align 4
+ ret void
+}
+
diff --git a/test/Transforms/EarlyCSE/fence.ll b/test/Transforms/EarlyCSE/fence.ll
new file mode 100644
index 000000000000..c6d47e9fb22e
--- /dev/null
+++ b/test/Transforms/EarlyCSE/fence.ll
@@ -0,0 +1,86 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+; NOTE: This file is testing the current implementation. Some of
+; the transforms used as negative tests below would be legal, but
+; only if reached through a chain of logic which EarlyCSE is incapable
+; of performing. To say it differently, this file tests a conservative
+; version of the memory model. If we want to extend EarlyCSE to be more
+; aggressive in the future, we may need to relax some of the negative tests.
+
+; We can value forward across the fence since we can (semantically)
+; reorder the following load before the fence.
+define i32 @test(i32* %addr.i) {
+; CHECK-LABEL: @test
+; CHECK: store
+; CHECK: fence
+; CHECK-NOT: load
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence release
+ %a = load i32, i32* %addr.i, align 4
+ ret i32 %a
+}
+
+; Same as above
+define i32 @test2(i32* noalias %addr.i, i32* noalias %otheraddr) {
+; CHECK-LABEL: @test2
+; CHECK: load
+; CHECK: fence
+; CHECK-NOT: load
+; CHECK: ret
+ %a = load i32, i32* %addr.i, align 4
+ fence release
+ %a2 = load i32, i32* %addr.i, align 4
+ %res = sub i32 %a, %a2
+ ret i32 %a
+}
+
+; We can not value forward across an acquire barrier since we might
+; be syncronizing with another thread storing to the same variable
+; followed by a release fence. If this thread observed the release
+; had happened, we must present a consistent view of memory at the
+; fence. Note that it would be legal to reorder '%a' after the fence
+; and then remove '%a2'. The current implementation doesn't know how
+; to do this, but if it learned, this test will need revised.
+define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) {
+; CHECK-LABEL: @test3
+; CHECK: load
+; CHECK: fence
+; CHECK: load
+; CHECK: sub
+; CHECK: ret
+ %a = load i32, i32* %addr.i, align 4
+ fence acquire
+ %a2 = load i32, i32* %addr.i, align 4
+ %res = sub i32 %a, %a2
+ ret i32 %res
+}
+
+; We can not dead store eliminate accross the fence. We could in
+; principal reorder the second store above the fence and then DSE either
+; store, but this is beyond the simple last-store DSE which EarlyCSE
+; implements.
+define void @test4(i32* %addr.i) {
+; CHECK-LABEL: @test4
+; CHECK: store
+; CHECK: fence
+; CHECK: store
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence release
+ store i32 5, i32* %addr.i, align 4
+ ret void
+}
+
+; We *could* DSE across this fence, but don't. No other thread can
+; observe the order of the acquire fence and the store.
+define void @test5(i32* %addr.i) {
+; CHECK-LABEL: @test5
+; CHECK: store
+; CHECK: fence
+; CHECK: store
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence acquire
+ store i32 5, i32* %addr.i, align 4
+ ret void
+}
diff --git a/test/Transforms/Float2Int/basic.ll b/test/Transforms/Float2Int/basic.ll
index f4d946914cd4..7f04a594dc80 100644
--- a/test/Transforms/Float2Int/basic.ll
+++ b/test/Transforms/Float2Int/basic.ll
@@ -254,3 +254,13 @@ define i32 @neg_calluser(i32 %value) {
ret i32 %7
}
declare double @g(double)
+
+; CHECK-LABEL: @neg_vector
+; CHECK: %1 = uitofp <4 x i8> %a to <4 x float>
+; CHECK: %2 = fptoui <4 x float> %1 to <4 x i16>
+; CHECK: ret <4 x i16> %2
+define <4 x i16> @neg_vector(<4 x i8> %a) {
+ %1 = uitofp <4 x i8> %a to <4 x float>
+ %2 = fptoui <4 x float> %1 to <4 x i16>
+ ret <4 x i16> %2
+}
diff --git a/test/Transforms/ForcedFunctionAttrs/forced.ll b/test/Transforms/ForcedFunctionAttrs/forced.ll
new file mode 100644
index 000000000000..a41e9c0efbe4
--- /dev/null
+++ b/test/Transforms/ForcedFunctionAttrs/forced.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -S -forceattrs | FileCheck %s --check-prefix=CHECK-CONTROL
+; RUN: opt < %s -S -forceattrs -force-attribute foo:noinline | FileCheck %s --check-prefix=CHECK-FOO
+; RUN: opt < %s -S -passes=forceattrs -force-attribute foo:noinline | FileCheck %s --check-prefix=CHECK-FOO
+
+; CHECK-CONTROL: define void @foo() {
+; CHECK-FOO: define void @foo() #0 {
+define void @foo() {
+ ret void
+}
+
+
+; CHECK-FOO: attributes #0 = { noinline }
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
index ca05d63743b5..b62698a776fb 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
@@ -10,15 +10,16 @@ define i32 @f() {
ret i32 %tmp
}
-; CHECK: define i32 @g() #0
+; CHECK: define i32 @g() #1
define i32 @g() readonly {
ret i32 0
}
-; CHECK: define i32 @h() #0
+; CHECK: define i32 @h() #1
define i32 @h() readnone {
%tmp = load i32, i32* @x ; <i32> [#uses=1]
ret i32 %tmp
}
; CHECK: attributes #0 = { readnone }
+; CHECK: attributes #1 = { norecurse readnone }
diff --git a/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll b/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
deleted file mode 100644
index fa06cc718a93..000000000000
--- a/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: opt < %s -functionattrs -S | FileCheck %s
-
-; CHECK: declare noalias i8* @fopen(i8* nocapture readonly, i8* nocapture readonly) #0
-declare i8* @fopen(i8*, i8*)
-
-; CHECK: declare i8 @strlen(i8* nocapture) #1
-declare i8 @strlen(i8*)
-
-; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) #0
-declare i32* @realloc(i32*, i32)
-
-; Test deliberately wrong declaration
-declare i32 @strcpy(...)
-
-; CHECK-NOT: strcpy{{.*}}noalias
-; CHECK-NOT: strcpy{{.*}}nocapture
-; CHECK-NOT: strcpy{{.*}}nounwind
-; CHECK-NOT: strcpy{{.*}}readonly
-
-; CHECK: attributes #0 = { nounwind }
-; CHECK: attributes #1 = { nounwind readonly }
diff --git a/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
index 1a64a8393804..23bb18e92b4c 100644
--- a/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
+++ b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
@@ -4,7 +4,9 @@
@g = constant i32 1
define void @foo() {
-; CHECK: void @foo() {
+; CHECK: void @foo() #0 {
%tmp = load volatile i32, i32* @g
ret void
}
+
+; CHECK: attributes #0 = { norecurse }
diff --git a/test/Transforms/FunctionAttrs/annotate-1.ll b/test/Transforms/FunctionAttrs/annotate-1.ll
deleted file mode 100644
index 9fba7a9f2882..000000000000
--- a/test/Transforms/FunctionAttrs/annotate-1.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: opt < %s -functionattrs -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -functionattrs -S | FileCheck -check-prefix=CHECK-POSIX %s
-
-declare i8* @fopen(i8*, i8*)
-; CHECK: declare noalias i8* @fopen(i8* nocapture readonly, i8* nocapture readonly) [[G0:#[0-9]]]
-
-declare i8 @strlen(i8*)
-; CHECK: declare i8 @strlen(i8* nocapture) [[G1:#[0-9]]]
-
-declare i32* @realloc(i32*, i32)
-; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) [[G0]]
-
-; Test deliberately wrong declaration
-
-declare i32 @strcpy(...)
-; CHECK: declare i32 @strcpy(...)
-
-declare i32 @gettimeofday(i8*, i8*)
-; CHECK-POSIX: declare i32 @gettimeofday(i8* nocapture, i8* nocapture) [[G0:#[0-9]+]]
-
-; CHECK: attributes [[G0]] = { nounwind }
-; CHECK: attributes [[G1]] = { nounwind readonly }
-; CHECK-POSIX: attributes [[G0]] = { nounwind }
diff --git a/test/Transforms/FunctionAttrs/atomic.ll b/test/Transforms/FunctionAttrs/atomic.ll
index bb867011cc2a..dd915a6027f2 100644
--- a/test/Transforms/FunctionAttrs/atomic.ll
+++ b/test/Transforms/FunctionAttrs/atomic.ll
@@ -19,5 +19,5 @@ entry:
ret i32 %r
}
-; CHECK: attributes #0 = { readnone ssp uwtable }
-; CHECK: attributes #1 = { ssp uwtable }
+; CHECK: attributes #0 = { norecurse readnone ssp uwtable }
+; CHECK: attributes #1 = { norecurse ssp uwtable }
diff --git a/test/Transforms/FunctionAttrs/nonnull.ll b/test/Transforms/FunctionAttrs/nonnull.ll
new file mode 100644
index 000000000000..1fb64b7434ab
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/nonnull.ll
@@ -0,0 +1,74 @@
+; RUN: opt -S -functionattrs %s | FileCheck %s
+declare nonnull i8* @ret_nonnull()
+
+; Return a pointer trivially nonnull (call return attribute)
+define i8* @test1() {
+; CHECK: define nonnull i8* @test1
+ %ret = call i8* @ret_nonnull()
+ ret i8* %ret
+}
+
+; Return a pointer trivially nonnull (argument attribute)
+define i8* @test2(i8* nonnull %p) {
+; CHECK: define nonnull i8* @test2
+ ret i8* %p
+}
+
+; Given an SCC where one of the functions can not be marked nonnull,
+; can we still mark the other one which is trivially nonnull
+define i8* @scc_binder() {
+; CHECK: define i8* @scc_binder
+ call i8* @test3()
+ ret i8* null
+}
+
+define i8* @test3() {
+; CHECK: define nonnull i8* @test3
+ call i8* @scc_binder()
+ %ret = call i8* @ret_nonnull()
+ ret i8* %ret
+}
+
+; Given a mutual recursive set of functions, we can mark them
+; nonnull if neither can ever return null. (In this case, they
+; just never return period.)
+define i8* @test4_helper() {
+; CHECK: define noalias nonnull i8* @test4_helper
+ %ret = call i8* @test4()
+ ret i8* %ret
+}
+
+define i8* @test4() {
+; CHECK: define noalias nonnull i8* @test4
+ %ret = call i8* @test4_helper()
+ ret i8* %ret
+}
+
+; Given a mutual recursive set of functions which *can* return null
+; make sure we haven't marked them as nonnull.
+define i8* @test5_helper() {
+; CHECK: define noalias i8* @test5_helper
+ %ret = call i8* @test5()
+ ret i8* null
+}
+
+define i8* @test5() {
+; CHECK: define noalias i8* @test5
+ %ret = call i8* @test5_helper()
+ ret i8* %ret
+}
+
+; Local analysis, but going through a self recursive phi
+define i8* @test6() {
+entry:
+; CHECK: define nonnull i8* @test6
+ %ret = call i8* @ret_nonnull()
+ br label %loop
+loop:
+ %phi = phi i8* [%ret, %entry], [%phi, %loop]
+ br i1 undef, label %loop, label %exit
+exit:
+ ret i8* %phi
+}
+
+
diff --git a/test/Transforms/FunctionAttrs/norecurse.ll b/test/Transforms/FunctionAttrs/norecurse.ll
new file mode 100644
index 000000000000..47481191d278
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/norecurse.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
+
+; CHECK: define i32 @leaf() #0
+define i32 @leaf() {
+ ret i32 1
+}
+
+; CHECK: define i32 @self_rec() #1
+define i32 @self_rec() {
+ %a = call i32 @self_rec()
+ ret i32 4
+}
+
+; CHECK: define i32 @indirect_rec() #1
+define i32 @indirect_rec() {
+ %a = call i32 @indirect_rec2()
+ ret i32 %a
+}
+; CHECK: define i32 @indirect_rec2() #1
+define i32 @indirect_rec2() {
+ %a = call i32 @indirect_rec()
+ ret i32 %a
+}
+
+; CHECK: define i32 @extern() #1
+define i32 @extern() {
+ %a = call i32 @k()
+ ret i32 %a
+}
+declare i32 @k() readnone
+
+; CHECK: define internal i32 @called_by_norecurse() #0
+define internal i32 @called_by_norecurse() {
+ %a = call i32 @k()
+ ret i32 %a
+}
+define void @m() norecurse {
+ %a = call i32 @called_by_norecurse()
+ ret void
+}
+
+; CHECK: define internal i32 @called_by_norecurse_indirectly() #0
+define internal i32 @called_by_norecurse_indirectly() {
+ %a = call i32 @k()
+ ret i32 %a
+}
+define internal void @o() {
+ %a = call i32 @called_by_norecurse_indirectly()
+ ret void
+}
+define void @p() norecurse {
+ call void @o()
+ ret void
+}
+
+; CHECK: attributes #0 = { norecurse readnone }
+; CHECK: attributes #1 = { readnone }
diff --git a/test/Transforms/FunctionAttrs/optnone.ll b/test/Transforms/FunctionAttrs/optnone.ll
index 7694bfe13aa5..441ff4da65ec 100644
--- a/test/Transforms/FunctionAttrs/optnone.ll
+++ b/test/Transforms/FunctionAttrs/optnone.ll
@@ -16,9 +16,11 @@ define void @test_optnone(i8* %p) noinline optnone {
declare i8 @strlen(i8*) noinline optnone
; CHECK-LABEL: @strlen
-; CHECK: (i8*) #1
+; CHECK: (i8*) #2
; CHECK-LABEL: attributes #0
-; CHECK: = { readnone }
+; CHECK: = { norecurse readnone }
; CHECK-LABEL: attributes #1
+; CHECK: = { noinline norecurse optnone }
+; CHECK-LABEL: attributes #2
; CHECK: = { noinline optnone }
diff --git a/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll b/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll
new file mode 100644
index 000000000000..db9a895f97ea
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll
@@ -0,0 +1,30 @@
+; RUN: opt -functionattrs -S < %s | FileCheck %s
+
+; This checks for an iterator wraparound bug in FunctionAttrs. The previous
+; "incorrect" behavior was inferring readonly for the %x argument in @caller.
+; Inferring readonly for %x *is* actually correct, since @va_func is marked
+; readonly, but FunctionAttrs was inferring readonly for the wrong reasons (and
+; we _need_ the readonly on @va_func to trigger the problematic code path). It
+; is possible that in the future FunctionAttrs becomes smart enough to infer
+; readonly for %x for the right reasons, and at that point this test will have
+; to be marked invalid.
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
+
+define void @va_func(i32* readonly %b, ...) readonly nounwind {
+; CHECK-LABEL: define void @va_func(i32* nocapture readonly %b, ...)
+ entry:
+ %valist = alloca i8
+ call void @llvm.va_start(i8* %valist)
+ call void @llvm.va_end(i8* %valist)
+ %x = call i32 @caller(i32* %b)
+ ret void
+}
+
+define i32 @caller(i32* %x) {
+; CHECK-LABEL: define i32 @caller(i32* nocapture %x)
+ entry:
+ call void(i32*,...) @va_func(i32* null, i32 0, i32 0, i32 0, i32* %x)
+ ret i32 42
+}
diff --git a/test/Transforms/FunctionAttrs/readattrs.ll b/test/Transforms/FunctionAttrs/readattrs.ll
index 7f22e6f2a2c5..aabdfe8d2005 100644
--- a/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/test/Transforms/FunctionAttrs/readattrs.ll
@@ -65,3 +65,41 @@ entry:
store i32 10, i32* %call, align 4
ret void
}
+
+; CHECK: declare void @llvm.masked.scatter
+declare void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*>, i32, <4 x i1>)
+
+; CHECK-NOT: readnone
+; CHECK-NOT: readonly
+; CHECK: define void @test9
+define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) {
+ call void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>)
+ ret void
+}
+
+; CHECK: declare <4 x i32> @llvm.masked.gather
+declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
+; CHECK: readonly
+; CHECK: define <4 x i32> @test10
+define <4 x i32> @test10(<4 x i32*> %ptrs) {
+ %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>, <4 x i32>undef)
+ ret <4 x i32> %res
+}
+
+; CHECK: declare <4 x i32> @test11_1
+declare <4 x i32> @test11_1(<4 x i32*>) argmemonly nounwind readonly
+; CHECK: readonly
+; CHECK-NOT: readnone
+; CHECK: define <4 x i32> @test11_2
+define <4 x i32> @test11_2(<4 x i32*> %ptrs) {
+ %res = call <4 x i32> @test11_1(<4 x i32*> %ptrs)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @test12_1(<4 x i32*>) argmemonly nounwind
+; CHECK-NOT: readnone
+; CHECK: define <4 x i32> @test12_2
+define <4 x i32> @test12_2(<4 x i32*> %ptrs) {
+ %res = call <4 x i32> @test12_1(<4 x i32*> %ptrs)
+ ret <4 x i32> %res
+}
diff --git a/test/Transforms/FunctionImport/Inputs/funcimport.ll b/test/Transforms/FunctionImport/Inputs/funcimport.ll
new file mode 100644
index 000000000000..96555892fe3c
--- /dev/null
+++ b/test/Transforms/FunctionImport/Inputs/funcimport.ll
@@ -0,0 +1,87 @@
+@globalvar = global i32 1, align 4
+@staticvar = internal global i32 1, align 4
+@staticconstvar = internal unnamed_addr constant [2 x i32] [i32 10, i32 20], align 4
+@commonvar = common global i32 0, align 4
+@P = internal global void ()* null, align 8
+
+@weakalias = weak alias void (...), bitcast (void ()* @globalfunc1 to void (...)*)
+@analias = alias void (...), bitcast (void ()* @globalfunc2 to void (...)*)
+@linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc to void (...)*)
+
+define void @globalfunc1() #0 {
+entry:
+ ret void
+}
+
+define void @globalfunc2() #0 {
+entry:
+ ret void
+}
+
+define linkonce_odr void @linkoncefunc() #0 {
+entry:
+ ret void
+}
+
+define i32 @referencestatics(i32 %i) #0 {
+entry:
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ %call = call i32 @staticfunc()
+ %0 = load i32, i32* @staticvar, align 4
+ %add = add nsw i32 %call, %0
+ %1 = load i32, i32* %i.addr, align 4
+ %idxprom = sext i32 %1 to i64
+ %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* @staticconstvar, i64 0, i64 %idxprom
+ %2 = load i32, i32* %arrayidx, align 4
+ %add1 = add nsw i32 %add, %2
+ ret i32 %add1
+}
+
+define i32 @referenceglobals(i32 %i) #0 {
+entry:
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ call void @globalfunc1()
+ %0 = load i32, i32* @globalvar, align 4
+ ret i32 %0
+}
+
+define i32 @referencecommon(i32 %i) #0 {
+entry:
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load i32, i32* @commonvar, align 4
+ ret i32 %0
+}
+
+define void @setfuncptr() #0 {
+entry:
+ store void ()* @staticfunc2, void ()** @P, align 8
+ ret void
+}
+
+define void @callfuncptr() #0 {
+entry:
+ %0 = load void ()*, void ()** @P, align 8
+ call void %0()
+ ret void
+}
+
+@weakvar = weak global i32 1, align 4
+define weak void @weakfunc() #0 {
+entry:
+ ret void
+}
+
+define internal i32 @staticfunc() #0 {
+entry:
+ ret i32 1
+}
+
+define internal void @staticfunc2() #0 {
+entry:
+ ret void
+}
+
+
diff --git a/test/Transforms/FunctionImport/Inputs/funcimport_debug.ll b/test/Transforms/FunctionImport/Inputs/funcimport_debug.ll
new file mode 100644
index 000000000000..35c62a262903
--- /dev/null
+++ b/test/Transforms/FunctionImport/Inputs/funcimport_debug.ll
@@ -0,0 +1,27 @@
+; ModuleID = 'funcimport_debug.o'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @func() #0 !dbg !4 {
+entry:
+ ret void, !dbg !10
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "funcimport_debug.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)"}
+!10 = !DILocation(line: 2, column: 1, scope: !4)
diff --git a/test/Transforms/FunctionImport/funcimport.ll b/test/Transforms/FunctionImport/funcimport.ll
new file mode 100644
index 000000000000..c099b9766477
--- /dev/null
+++ b/test/Transforms/FunctionImport/funcimport.ll
@@ -0,0 +1,75 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: llvm-as -function-summary %s -o %t.bc
+; RUN: llvm-as -function-summary %p/Inputs/funcimport.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Do the import now
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -S | FileCheck %s --check-prefix=CHECK --check-prefix=INSTLIMDEF
+
+; Test import with smaller instruction limit
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -import-instr-limit=5 -S | FileCheck %s --check-prefix=CHECK --check-prefix=INSTLIM5
+; INSTLIM5-NOT: @staticfunc.llvm.2
+
+define i32 @main() #0 {
+entry:
+ call void (...) @weakalias()
+ call void (...) @analias()
+ call void (...) @linkoncealias()
+ %call = call i32 (...) @referencestatics()
+ %call1 = call i32 (...) @referenceglobals()
+ %call2 = call i32 (...) @referencecommon()
+ call void (...) @setfuncptr()
+ call void (...) @callfuncptr()
+ call void (...) @weakfunc()
+ ret i32 0
+}
+
+; Won't import weak alias
+; CHECK-DAG: declare void @weakalias
+declare void @weakalias(...) #1
+
+; Cannot create an alias to available_externally
+; CHECK-DAG: declare void @analias
+declare void @analias(...) #1
+
+; Aliases import the aliasee function
+declare void @linkoncealias(...) #1
+; CHECK-DAG: define linkonce_odr void @linkoncefunc()
+; CHECK-DAG: @linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc to void (...)*
+
+; INSTLIMDEF-DAG: define available_externally i32 @referencestatics(i32 %i)
+; INSTLIM5-DAG: declare i32 @referencestatics(...)
+declare i32 @referencestatics(...) #1
+
+; The import of referencestatics will expose call to staticfunc that
+; should in turn be imported as a promoted/renamed and hidden function.
+; Ensure that the call is to the properly-renamed function.
+; INSTLIMDEF-DAG: %call = call i32 @staticfunc.llvm.2()
+; INSTLIMDEF-DAG: define available_externally hidden i32 @staticfunc.llvm.2()
+
+; CHECK-DAG: define available_externally i32 @referenceglobals(i32 %i)
+declare i32 @referenceglobals(...) #1
+
+; The import of referenceglobals will expose call to globalfunc1 that
+; should in turn be imported.
+; CHECK-DAG: define available_externally void @globalfunc1()
+
+; CHECK-DAG: define available_externally i32 @referencecommon(i32 %i)
+declare i32 @referencecommon(...) #1
+
+; CHECK-DAG: define available_externally void @setfuncptr()
+declare void @setfuncptr(...) #1
+
+; CHECK-DAG: define available_externally void @callfuncptr()
+declare void @callfuncptr(...) #1
+
+; Ensure that all uses of local variable @P which has used in setfuncptr
+; and callfuncptr are to the same promoted/renamed global.
+; CHECK-DAG: @P.llvm.2 = available_externally hidden global void ()* null
+; CHECK-DAG: %0 = load void ()*, void ()** @P.llvm.2,
+; CHECK-DAG: store void ()* @staticfunc2.llvm.2, void ()** @P.llvm.2,
+
+; Won't import weak func
+; CHECK-DAG: declare void @weakfunc(...)
+declare void @weakfunc(...) #1
+
diff --git a/test/Transforms/FunctionImport/funcimport_debug.ll b/test/Transforms/FunctionImport/funcimport_debug.ll
new file mode 100644
index 000000000000..c57b5e14af1b
--- /dev/null
+++ b/test/Transforms/FunctionImport/funcimport_debug.ll
@@ -0,0 +1,45 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: llvm-as -function-summary %s -o %t.bc
+; RUN: llvm-as -function-summary %p/Inputs/funcimport_debug.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Do the import now and confirm that metadata is linked for imported function.
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -S | FileCheck %s
+
+; CHECK: define available_externally void @func()
+; CHECK: distinct !DISubprogram(name: "main"
+; CHECK: distinct !DISubprogram(name: "func"
+
+; ModuleID = 'funcimport_debug.o'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !dbg !4 {
+entry:
+ call void (...) @func(), !dbg !11
+ ret i32 0, !dbg !12
+}
+
+declare void @func(...) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "funcimport_debug.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)"}
+!11 = !DILocation(line: 3, column: 3, scope: !4)
+!12 = !DILocation(line: 4, column: 1, scope: !4)
diff --git a/test/Transforms/GCOVProfiling/function-numbering.ll b/test/Transforms/GCOVProfiling/function-numbering.ll
index 5a704e4d047b..f94d5ad30bbc 100644
--- a/test/Transforms/GCOVProfiling/function-numbering.ll
+++ b/test/Transforms/GCOVProfiling/function-numbering.ll
@@ -22,16 +22,16 @@ target triple = "x86_64-apple-macosx10.10.0"
; GCNO-NOT: == bar ({{[0-9]+}}) @
; GCNO: == baz (1) @
-define void @foo() {
+define void @foo() !dbg !4 {
ret void, !dbg !12
}
-define void @bar() {
+define void @bar() !dbg !7 {
; This function is referenced by the debug info, but no lines have locations.
ret void
}
-define void @baz() {
+define void @baz() !dbg !8 {
ret void, !dbg !13
}
@@ -40,15 +40,15 @@ define void @baz() {
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/function-numbering.ll", directory: "")
!2 = !{}
!3 = !{!4, !7, !8}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/function-numbering.ll", directory: "")
!6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: void ()* @bar, variables: !2)
-!8 = !DISubprogram(name: "baz", line: 3, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: void ()* @baz, variables: !2)
+!7 = distinct !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "baz", line: 3, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
!9 = !{i32 2, !"Dwarf Version", i32 2}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.6.0 "}
diff --git a/test/Transforms/GCOVProfiling/global-ctor.ll b/test/Transforms/GCOVProfiling/global-ctor.ll
index 29c46d6c2107..47600c7bfcad 100644
--- a/test/Transforms/GCOVProfiling/global-ctor.ll
+++ b/test/Transforms/GCOVProfiling/global-ctor.ll
@@ -8,7 +8,7 @@
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_global-ctor.ll, i8* null }]
; Function Attrs: nounwind
-define internal void @__cxx_global_var_init() #0 section ".text.startup" {
+define internal void @__cxx_global_var_init() #0 section ".text.startup" !dbg !4 {
entry:
br label %0
@@ -38,15 +38,15 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
!llvm.gcov = !{!16}
!llvm.ident = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 210217)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 210217)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "/home/nlewycky")
!2 = !{}
!3 = !{!4, !8}
-!4 = !DISubprogram(name: "__cxx_global_var_init", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, function: void ()* @__cxx_global_var_init, variables: !2)
+!4 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, variables: !2)
!5 = !DIFile(filename: "global-ctor.ll", directory: "/home/nlewycky")
!6 = !DIFile(filename: "global-ctor.ll", directory: "/home/nlewycky")
!7 = !DISubroutineType(types: !2)
-!8 = !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_global-ctor.ll", isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !9, type: !7, function: void ()* @_GLOBAL__sub_I_global-ctor.ll, variables: !2)
+!8 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_global-ctor.ll", isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !9, type: !7, variables: !2)
!9 = !DIFile(filename: "<stdin>", directory: "/home/nlewycky")
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/GCOVProfiling/linezero.ll b/test/Transforms/GCOVProfiling/linezero.ll
index 9e172b752d78..e071c4e6dbf7 100644
--- a/test/Transforms/GCOVProfiling/linezero.ll
+++ b/test/Transforms/GCOVProfiling/linezero.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu"
%struct.vector = type { i8 }
; Function Attrs: nounwind
-define i32 @_Z4testv() #0 {
+define i32 @_Z4testv() #0 !dbg !15 {
entry:
%retval = alloca i32, align 4
%__range = alloca %struct.vector*, align 8
@@ -75,7 +75,7 @@ declare i8* @_ZN6vector3endEv(%struct.vector*) #2
declare void @llvm.trap() #3
; Function Attrs: nounwind
-define void @_Z2f1v() #0 {
+define void @_Z2f1v() #0 !dbg !20 {
entry:
br label %0
@@ -93,7 +93,7 @@ attributes #3 = { noreturn nounwind }
!llvm.gcov = !{!25}
!llvm.ident = !{!26}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 209871)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 209871)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "PATTERN")
!2 = !{}
!3 = !{!4}
@@ -108,29 +108,29 @@ attributes #3 = { noreturn nounwind }
!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS6vector")
!13 = !DISubprogram(name: "end", linkageName: "_ZN6vector3endEv", line: 26, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 26, file: !5, scope: !"_ZTS6vector", type: !8)
!14 = !{!15, !20}
-!15 = !DISubprogram(name: "test", linkageName: "_Z4testv", line: 50, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 50, file: !5, scope: !16, type: !17, function: i32 ()* @_Z4testv, variables: !2)
+!15 = distinct !DISubprogram(name: "test", linkageName: "_Z4testv", line: 50, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 50, file: !5, scope: !16, type: !17, variables: !2)
!16 = !DIFile(filename: "linezero.cc", directory: "PATTERN")
!17 = !DISubroutineType(types: !18)
!18 = !{!19}
!19 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!20 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 54, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 54, file: !5, scope: !16, type: !21, function: void ()* @_Z2f1v, variables: !2)
+!20 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 54, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 54, file: !5, scope: !16, type: !21, variables: !2)
!21 = !DISubroutineType(types: !22)
!22 = !{null}
!23 = !{i32 2, !"Dwarf Version", i32 4}
!24 = !{i32 2, !"Debug Info Version", i32 3}
!25 = !{!"PATTERN/linezero.o", !0}
!26 = !{!"clang version 3.5.0 (trunk 209871)"}
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "__range", flags: DIFlagArtificial, scope: !28, type: !29)
+!27 = !DILocalVariable(name: "__range", flags: DIFlagArtificial, scope: !28, type: !29)
!28 = distinct !DILexicalBlock(line: 51, column: 0, file: !5, scope: !15)
!29 = !DIDerivedType(tag: DW_TAG_rvalue_reference_type, baseType: !"_ZTS6vector")
!30 = !DILocation(line: 0, scope: !28)
!31 = !DILocation(line: 51, scope: !28)
-!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "__begin", flags: DIFlagArtificial, scope: !28, type: !10)
-!33 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "__end", flags: DIFlagArtificial, scope: !28, type: !10)
+!32 = !DILocalVariable(name: "__begin", flags: DIFlagArtificial, scope: !28, type: !10)
+!33 = !DILocalVariable(name: "__end", flags: DIFlagArtificial, scope: !28, type: !10)
!34 = !DILocation(line: 51, scope: !35)
!35 = distinct !DILexicalBlock(line: 51, column: 0, file: !5, scope: !36)
!36 = distinct !DILexicalBlock(line: 51, column: 0, file: !5, scope: !28)
-!37 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "spec", line: 51, scope: !28, file: !16, type: !11)
+!37 = !DILocalVariable(name: "spec", line: 51, scope: !28, file: !16, type: !11)
!38 = !DILocation(line: 51, scope: !39)
!39 = distinct !DILexicalBlock(line: 51, column: 0, file: !5, scope: !28)
!40 = !DILocation(line: 51, scope: !41)
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
index 7a4119802c9a..65830bf78025 100644
--- a/test/Transforms/GCOVProfiling/linkagename.ll
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -4,7 +4,7 @@
; RUN: grep _Z3foov %T/linkagename.gcno
; RUN: rm %T/linkagename.gcno
-define void @_Z3foov() {
+define void @_Z3foov() !dbg !5 {
entry:
ret void, !dbg !8
}
@@ -13,12 +13,12 @@ entry:
!llvm.module.flags = !{!10}
!llvm.gcov = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 177323)", isOptimized: false, emissionKind: 0, file: !2, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3, imports: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 177323)", isOptimized: false, emissionKind: 0, file: !2, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3, imports: !3)
!1 = !DIFile(filename: "hello.cc", directory: "/home/nlewycky")
!2 = !DIFile(filename: "hello.cc", directory: "/home/nlewycky")
!3 = !{}
!4 = !{!5}
-!5 = !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !1, type: !6, function: void ()* @_Z3foov, variables: !3)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !1, type: !6, variables: !3)
!6 = !DISubroutineType(types: !7)
!7 = !{null}
!8 = !DILocation(line: 1, scope: !5)
diff --git a/test/Transforms/GCOVProfiling/return-block.ll b/test/Transforms/GCOVProfiling/return-block.ll
index 38b5b75e3c2d..9b502a14bfa2 100644
--- a/test/Transforms/GCOVProfiling/return-block.ll
+++ b/test/Transforms/GCOVProfiling/return-block.ll
@@ -16,7 +16,7 @@ target triple = "x86_64-unknown-linux-gnu"
@A = common global i32 0, align 4
; Function Attrs: nounwind uwtable
-define void @test() #0 {
+define void @test() #0 !dbg !4 {
entry:
tail call void (...) @f() #2, !dbg !14
%0 = load i32, i32* @A, align 4, !dbg !15
@@ -44,11 +44,11 @@ attributes #2 = { nounwind }
!llvm.module.flags = !{!11, !12}
!llvm.ident = !{!13}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 223182)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !8, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 223182)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !8, imports: !2)
!1 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/return-block.ll", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "test", line: 5, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, function: void ()* @test, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 5, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/return-block.ll", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll
index 487e72123cb0..67bfb3c97612 100644
--- a/test/Transforms/GCOVProfiling/version.ll
+++ b/test/Transforms/GCOVProfiling/version.ll
@@ -8,7 +8,7 @@
; RUN: head -c8 %T/version.gcno | grep '^oncg.704'
; RUN: rm %T/version.gcno
-define void @test() {
+define void @test() !dbg !5 {
ret void, !dbg !8
}
@@ -16,11 +16,11 @@ define void @test() {
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 176994)", isOptimized: false, emissionKind: 0, file: !11, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 176994)", isOptimized: false, emissionKind: 0, file: !11, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3)
!2 = !DIFile(filename: "version", directory: "/usr/local/google/home/nlewycky")
!3 = !{}
!4 = !{!5}
-!5 = !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !10, scope: !6, type: !7, function: void ()* @test, variables: !3)
+!5 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !10, scope: !6, type: !7, variables: !3)
!6 = !DIFile(filename: "<stdin>", directory: ".")
!7 = !DISubroutineType(types: !{null})
!8 = !DILocation(line: 1, scope: !5)
diff --git a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
index fdf17e0b46df..a0cf92989b81 100644
--- a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
+++ b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
@@ -17,20 +17,20 @@ target triple = "i386-pc-linux-gnu"
%"struct.std::pair<std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,bool>" = type { %"struct.std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >", i8 }
%"struct.std::pair<void* const,void*>" = type { i8*, i8* }
-@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32)* @pthread_getspecific ; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*)* @pthread_setspecific ; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = weak alias i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i32)* @pthread_cancel ; <i32 (i32)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init ; <i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*)* @pthread_key_create ; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32)* @pthread_key_delete ; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*)* @pthread_mutexattr_init ; <i32 (%struct.__sched_param*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype ; <i32 (%struct.__sched_param*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy ; <i32 (%struct.__sched_param*)*> [#uses=0]
+@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*), i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32), i8* (i32)* @pthread_getspecific ; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*), i32 (i32, i8*)* @pthread_setspecific ; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = weak alias i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*), i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i32), i32 (i32)* @pthread_cancel ; <i32 (i32)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.__sched_param*), i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init ; <i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*), i32 (i32*, void (i8*)*)* @pthread_key_create ; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32), i32 (i32)* @pthread_key_delete ; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*), i32 (%struct.__sched_param*)* @pthread_mutexattr_init ; <i32 (%struct.__sched_param*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.__sched_param*, i32), i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype ; <i32 (%struct.__sched_param*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*), i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy ; <i32 (%struct.__sched_param*)*> [#uses=0]
declare fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind readnone
diff --git a/test/Transforms/GVN/assume-equal.ll b/test/Transforms/GVN/assume-equal.ll
new file mode 100644
index 000000000000..f9304a8fc7c6
--- /dev/null
+++ b/test/Transforms/GVN/assume-equal.ll
@@ -0,0 +1,235 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+
+%struct.A = type { i32 (...)** }
+@_ZTV1A = available_externally unnamed_addr constant [4 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3barEv to i8*)], align 8
+@_ZTI1A = external constant i8*
+
+; Checks if indirect calls can be replaced with direct
+; assuming that %vtable == @_ZTV1A (with alignment).
+; Checking const propagation across other BBs
+; CHECK-LABEL: define void @_Z1gb(
+
+define void @_Z1gb(i1 zeroext %p) {
+entry:
+ %call = tail call noalias i8* @_Znwm(i64 8) #4
+ %0 = bitcast i8* %call to %struct.A*
+ tail call void @_ZN1AC1Ev(%struct.A* %0) #1
+ %1 = bitcast i8* %call to i8***
+ %vtable = load i8**, i8*** %1, align 8
+ %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2)
+ tail call void @llvm.assume(i1 %cmp.vtables)
+ br i1 %p, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)**
+ %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8
+
+ ; CHECK: call i32 @_ZN1A3fooEv(
+ %call2 = tail call i32 %2(%struct.A* %0) #1
+
+ br label %if.end
+
+if.else: ; preds = %entry
+ %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1
+ %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)**
+
+ ; CHECK: call i32 @_ZN1A3barEv(
+ %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8
+
+ %call5 = tail call i32 %3(%struct.A* %0) #1
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ ret void
+}
+
+; Check integration with invariant.group handling
+; CHECK-LABEL: define void @invariantGroupHandling(i1 zeroext %p) {
+define void @invariantGroupHandling(i1 zeroext %p) {
+entry:
+ %call = tail call noalias i8* @_Znwm(i64 8) #4
+ %0 = bitcast i8* %call to %struct.A*
+ tail call void @_ZN1AC1Ev(%struct.A* %0) #1
+ %1 = bitcast i8* %call to i8***
+ %vtable = load i8**, i8*** %1, align 8, !invariant.group !0
+ %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2)
+ tail call void @llvm.assume(i1 %cmp.vtables)
+ br i1 %p, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)**
+ %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8
+
+; CHECK: call i32 @_ZN1A3fooEv(
+ %call2 = tail call i32 %2(%struct.A* %0) #1
+ %vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0
+ %vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)**
+ %call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8
+; FIXME: those loads could be also direct, but right now the invariant.group
+; analysis works only on single block
+; CHECK-NOT: call i32 @_ZN1A3fooEv(
+ %callx = tail call i32 %call1(%struct.A* %0) #1
+
+ %vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0
+ %vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)**
+ %call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8
+; CHECK-NOT: call i32 @_ZN1A3fooEv(
+ %cally = tail call i32 %call4(%struct.A* %0) #1
+
+ %b = bitcast i8* %call to %struct.A**
+ %vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0
+ %vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)**
+ %vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8
+; CHECK-NOT: call i32 @_ZN1A3fooEv(
+ %unknown = tail call i32 %vfun(%struct.A* %0) #1
+
+ br label %if.end
+
+if.else: ; preds = %entry
+ %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1
+ %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)**
+
+ ; CHECK: call i32 @_ZN1A3barEv(
+ %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8
+
+ %call5 = tail call i32 %3(%struct.A* %0) #1
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ ret void
+}
+
+
+; Checking const propagation in the same BB
+; CHECK-LABEL: define i32 @main()
+
+define i32 @main() {
+entry:
+ %call = tail call noalias i8* @_Znwm(i64 8)
+ %0 = bitcast i8* %call to %struct.A*
+ tail call void @_ZN1AC1Ev(%struct.A* %0)
+ %1 = bitcast i8* %call to i8***
+ %vtable = load i8**, i8*** %1, align 8
+ %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2)
+ tail call void @llvm.assume(i1 %cmp.vtables)
+ %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)**
+
+ ; CHECK: call i32 @_ZN1A3fooEv(
+ %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8
+
+ %call2 = tail call i32 %2(%struct.A* %0)
+ ret i32 0
+}
+
+; This tests checks const propatation with fcmp instruction.
+; CHECK-LABEL: define float @_Z1gf(float %p)
+
+define float @_Z1gf(float %p) {
+entry:
+ %p.addr = alloca float, align 4
+ %f = alloca float, align 4
+ store float %p, float* %p.addr, align 4
+
+ store float 3.000000e+00, float* %f, align 4
+ %0 = load float, float* %p.addr, align 4
+ %1 = load float, float* %f, align 4
+ %cmp = fcmp oeq float %1, %0 ; note const on lhs
+ call void @llvm.assume(i1 %cmp)
+
+ ; CHECK: ret float 3.000000e+00
+ ret float %0
+}
+
+; CHECK-LABEL: define float @_Z1hf(float %p)
+
+define float @_Z1hf(float %p) {
+entry:
+ %p.addr = alloca float, align 4
+ store float %p, float* %p.addr, align 4
+
+ %0 = load float, float* %p.addr, align 4
+ %cmp = fcmp nnan ueq float %0, 3.000000e+00
+ call void @llvm.assume(i1 %cmp)
+
+ ; CHECK: ret float 3.000000e+00
+ ret float %0
+}
+
+; CHECK-LABEL: define float @_Z1if(float %p)
+define float @_Z1if(float %p) {
+entry:
+ %p.addr = alloca float, align 4
+ store float %p, float* %p.addr, align 4
+
+ %0 = load float, float* %p.addr, align 4
+ %cmp = fcmp ueq float %0, 3.000000e+00 ; no nnan flag - can't propagate
+ call void @llvm.assume(i1 %cmp)
+
+ ; CHECK-NOT: ret float 3.000000e+00
+ ret float %0
+}
+
+; This test checks if constant propagation works for multiple node edges
+; CHECK-LABEL: define i32 @_Z1ii(i32 %p)
+define i32 @_Z1ii(i32 %p) {
+entry:
+ %cmp = icmp eq i32 %p, 42
+ call void @llvm.assume(i1 %cmp)
+
+ ; CHECK: br i1 true, label %bb2, label %bb2
+ br i1 %cmp, label %bb2, label %bb2
+bb2:
+ call void @llvm.assume(i1 true)
+ ; CHECK: br i1 true, label %bb2, label %bb2
+ br i1 %cmp, label %bb2, label %bb2
+
+ ; CHECK: ret i32 42
+ ret i32 %p
+}
+
+; CHECK-LABEL: define i32 @_Z1ij(i32 %p)
+define i32 @_Z1ij(i32 %p) {
+entry:
+ %cmp = icmp eq i32 %p, 42
+ call void @llvm.assume(i1 %cmp)
+
+ ; CHECK: br i1 true, label %bb2, label %bb2
+ br i1 %cmp, label %bb2, label %bb2
+bb2:
+ ; CHECK-NOT: %cmp2 =
+ %cmp2 = icmp eq i32 %p, 42
+ ; CHECK-NOT: call void @llvm.assume(
+ call void @llvm.assume(i1 %cmp2)
+
+ ; CHECK: br i1 true, label %bb2, label %bb2
+ br i1 %cmp, label %bb2, label %bb2
+
+ ; CHECK: ret i32 42
+ ret i32 %p
+}
+
+; CHECK-LABEL: define i32 @_Z1ik(i32 %p)
+define i32 @_Z1ik(i32 %p) {
+entry:
+ %cmp = icmp eq i32 %p, 42
+ call void @llvm.assume(i1 %cmp)
+
+ ; CHECK: br i1 true, label %bb2, label %bb3
+ br i1 %cmp, label %bb2, label %bb3
+bb2:
+ ; CHECK-NOT: %cmp3 =
+ %cmp3 = icmp eq i32 %p, 43
+ ; CHECK: store i8 undef, i8* null
+ call void @llvm.assume(i1 %cmp3)
+ ret i32 15
+bb3:
+ ret i32 17
+}
+
+declare noalias i8* @_Znwm(i64)
+declare void @_ZN1AC1Ev(%struct.A*)
+declare void @llvm.assume(i1)
+declare i32 @_ZN1A3fooEv(%struct.A*)
+declare i32 @_ZN1A3barEv(%struct.A*)
+
+!0 = !{!"struct A"}
diff --git a/test/Transforms/GVN/crash-no-aa.ll b/test/Transforms/GVN/crash-no-aa.ll
index f076a8d81ace..0d09ecedc6ac 100644
--- a/test/Transforms/GVN/crash-no-aa.ll
+++ b/test/Transforms/GVN/crash-no-aa.ll
@@ -1,4 +1,4 @@
-; RUN: opt -no-aa -gvn -S < %s
+; RUN: opt -disable-basicaa -gvn -S < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-freebsd8.0"
diff --git a/test/Transforms/GVN/funclet.ll b/test/Transforms/GVN/funclet.ll
new file mode 100644
index 000000000000..2669256f0bdc
--- /dev/null
+++ b/test/Transforms/GVN/funclet.ll
@@ -0,0 +1,44 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+%eh.ThrowInfo = type { i32, i8*, i8*, i8* }
+%struct.A = type { i32* }
+
+@"_TI1?AUA@@" = external constant %eh.ThrowInfo
+
+define i8 @f() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %b = alloca i8
+ %c = alloca i8
+ store i8 42, i8* %b
+ store i8 13, i8* %c
+ invoke void @_CxxThrowException(i8* %b, %eh.ThrowInfo* nonnull @"_TI1?AUA@@")
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %catchpad = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ store i8 5, i8* %b
+ catchret from %catchpad to label %try.cont
+
+try.cont: ; preds = %catch
+ %load_b = load i8, i8* %b
+ %load_c = load i8, i8* %c
+ %add = add i8 %load_b, %load_c
+ ret i8 %add
+
+unreachable: ; preds = %entry
+ unreachable
+}
+; CHECK-LABEL: define i8 @f(
+; CHECK: %[[load_b:.*]] = load i8, i8* %b
+; CHECK-NEXT: %[[load_c:.*]] = load i8, i8* %c
+; CHECK-NEXT: %[[add:.*]] = add i8 %[[load_b]], %[[load_c]]
+; CHECK-NEXT: ret i8 %[[add]]
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*)
diff --git a/test/Transforms/GVN/invariant-load.ll b/test/Transforms/GVN/invariant-load.ll
index 982da8cfe486..f74fd3392c18 100644
--- a/test/Transforms/GVN/invariant-load.ll
+++ b/test/Transforms/GVN/invariant-load.ll
@@ -114,6 +114,23 @@ entry:
ret i32 %res
}
+define i32 @test8(i1 %cnd, i32* %p) {
+; CHECK-LABEL: test8
+; CHECK: @bar
+; CHECK: load i32, i32* %p2, !invariant.load
+; CHECK: br label %merge
+entry:
+ %v1 = load i32, i32* %p, !invariant.load !0
+ br i1 %cnd, label %taken, label %merge
+taken:
+ %p2 = call i32* (...) @bar(i32* %p)
+ br label %merge
+merge:
+ %p3 = phi i32* [%p, %entry], [%p2, %taken]
+ %v2 = load i32, i32* %p3, !invariant.load !0
+ %res = sub i32 %v1, %v2
+ ret i32 %res
+}
!0 = !{ }
diff --git a/test/Transforms/GVN/invariant.group.ll b/test/Transforms/GVN/invariant.group.ll
new file mode 100644
index 000000000000..f703fda93f23
--- /dev/null
+++ b/test/Transforms/GVN/invariant.group.ll
@@ -0,0 +1,337 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+
+%struct.A = type { i32 (...)** }
+@_ZTV1A = available_externally unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 8
+@_ZTI1A = external constant i8*
+
+@unknownPtr = external global i8
+
+; CHECK-LABEL: define i8 @simple() {
+define i8 @simple() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ call void @foo(i8* %ptr)
+
+ %a = load i8, i8* %ptr, !invariant.group !0
+ %b = load i8, i8* %ptr, !invariant.group !0
+ %c = load i8, i8* %ptr, !invariant.group !0
+; CHECK: ret i8 42
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @optimizable1() {
+define i8 @optimizable1() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr)
+ %a = load i8, i8* %ptr, !invariant.group !0
+
+ call void @foo(i8* %ptr2); call to use %ptr2
+; CHECK: ret i8 42
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @optimizable2() {
+define i8 @optimizable2() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ call void @foo(i8* %ptr)
+
+ store i8 13, i8* %ptr ; can't use this store with invariant.group
+ %a = load i8, i8* %ptr
+ call void @bar(i8 %a) ; call to use %a
+
+ call void @foo(i8* %ptr)
+ %b = load i8, i8* %ptr, !invariant.group !0
+
+; CHECK: ret i8 42
+ ret i8 %b
+}
+
+; CHECK-LABEL: define i8 @unoptimizable1() {
+define i8 @unoptimizable1() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr
+ call void @foo(i8* %ptr)
+ %a = load i8, i8* %ptr, !invariant.group !0
+; CHECK: ret i8 %a
+ ret i8 %a
+}
+
+; CHECK-LABEL: define void @indirectLoads() {
+define void @indirectLoads() {
+entry:
+ %a = alloca %struct.A*, align 8
+ %0 = bitcast %struct.A** %a to i8*
+
+ %call = call i8* @getPointer(i8* null)
+ %1 = bitcast i8* %call to %struct.A*
+ call void @_ZN1AC1Ev(%struct.A* %1)
+ %2 = bitcast %struct.A* %1 to i8***
+
+; CHECK: %vtable = load {{.*}} !invariant.group
+ %vtable = load i8**, i8*** %2, align 8, !invariant.group !2
+ %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2)
+ call void @llvm.assume(i1 %cmp.vtables)
+
+ store %struct.A* %1, %struct.A** %a, align 8
+ %3 = load %struct.A*, %struct.A** %a, align 8
+ %4 = bitcast %struct.A* %3 to void (%struct.A*)***
+
+; CHECK: call void @_ZN1A3fooEv(
+ %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2
+ %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0
+ %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8
+ call void %5(%struct.A* %3)
+ %6 = load %struct.A*, %struct.A** %a, align 8
+ %7 = bitcast %struct.A* %6 to void (%struct.A*)***
+
+; CHECK: call void @_ZN1A3fooEv(
+ %vtable2 = load void (%struct.A*)**, void (%struct.A*)*** %7, align 8, !invariant.group !2
+ %vfn3 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable2, i64 0
+ %8 = load void (%struct.A*)*, void (%struct.A*)** %vfn3, align 8
+
+ call void %8(%struct.A* %6)
+ %9 = load %struct.A*, %struct.A** %a, align 8
+ %10 = bitcast %struct.A* %9 to void (%struct.A*)***
+
+ %vtable4 = load void (%struct.A*)**, void (%struct.A*)*** %10, align 8, !invariant.group !2
+ %vfn5 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable4, i64 0
+ %11 = load void (%struct.A*)*, void (%struct.A*)** %vfn5, align 8
+; CHECK: call void @_ZN1A3fooEv(
+ call void %11(%struct.A* %9)
+
+ %vtable5 = load i8**, i8*** %2, align 8, !invariant.group !2
+ %vfn6 = getelementptr inbounds i8*, i8** %vtable5, i64 0
+ %12 = bitcast i8** %vfn6 to void (%struct.A*)**
+ %13 = load void (%struct.A*)*, void (%struct.A*)** %12, align 8
+; CHECK: call void @_ZN1A3fooEv(
+ call void %13(%struct.A* %9)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @combiningBitCastWithLoad() {
+define void @combiningBitCastWithLoad() {
+entry:
+ %a = alloca %struct.A*, align 8
+ %0 = bitcast %struct.A** %a to i8*
+
+ %call = call i8* @getPointer(i8* null)
+ %1 = bitcast i8* %call to %struct.A*
+ call void @_ZN1AC1Ev(%struct.A* %1)
+ %2 = bitcast %struct.A* %1 to i8***
+
+; CHECK: %vtable = load {{.*}} !invariant.group
+ %vtable = load i8**, i8*** %2, align 8, !invariant.group !2
+ %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2)
+
+ store %struct.A* %1, %struct.A** %a, align 8
+; CHECK-NOT: !invariant.group
+ %3 = load %struct.A*, %struct.A** %a, align 8
+ %4 = bitcast %struct.A* %3 to void (%struct.A*)***
+
+ %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2
+ %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0
+ %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8
+ call void %5(%struct.A* %3)
+
+ ret void
+}
+
+; CHECK-LABEL:define void @loadCombine() {
+define void @loadCombine() {
+enter:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr
+ call void @foo(i8* %ptr)
+; CHECK: %[[A:.*]] = load i8, i8* %ptr, !invariant.group
+ %a = load i8, i8* %ptr, !invariant.group !0
+; CHECK-NOT: load
+ %b = load i8, i8* %ptr, !invariant.group !1
+; CHECK: call void @bar(i8 %[[A]])
+ call void @bar(i8 %a)
+; CHECK: call void @bar(i8 %[[A]])
+ call void @bar(i8 %b)
+ ret void
+}
+
+; CHECK-LABEL: define void @loadCombine1() {
+define void @loadCombine1() {
+enter:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr
+ call void @foo(i8* %ptr)
+; CHECK: %[[D:.*]] = load i8, i8* %ptr, !invariant.group
+ %c = load i8, i8* %ptr
+; CHECK-NOT: load
+ %d = load i8, i8* %ptr, !invariant.group !1
+; CHECK: call void @bar(i8 %[[D]])
+ call void @bar(i8 %c)
+; CHECK: call void @bar(i8 %[[D]])
+ call void @bar(i8 %d)
+ ret void
+}
+
+; CHECK-LABEL: define void @loadCombine2() {
+define void @loadCombine2() {
+enter:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr
+ call void @foo(i8* %ptr)
+; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group
+ %e = load i8, i8* %ptr, !invariant.group !1
+; CHECK-NOT: load
+ %f = load i8, i8* %ptr
+; CHECK: call void @bar(i8 %[[E]])
+ call void @bar(i8 %e)
+; CHECK: call void @bar(i8 %[[E]])
+ call void @bar(i8 %f)
+ ret void
+}
+
+; CHECK-LABEL: define void @loadCombine3() {
+define void @loadCombine3() {
+enter:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr
+ call void @foo(i8* %ptr)
+; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group ![[OneMD:[0-9]]]
+ %e = load i8, i8* %ptr, !invariant.group !1
+; CHECK-NOT: load
+ %f = load i8, i8* %ptr, !invariant.group !1
+; CHECK: call void @bar(i8 %[[E]])
+ call void @bar(i8 %e)
+; CHECK: call void @bar(i8 %[[E]])
+ call void @bar(i8 %f)
+ ret void
+}
+
+; CHECK-LABEL: define i8 @unoptimizable2() {
+define i8 @unoptimizable2() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr
+ call void @foo(i8* %ptr)
+ %a = load i8, i8* %ptr
+ call void @foo(i8* %ptr)
+ %b = load i8, i8* %ptr, !invariant.group !0
+
+; CHECK: ret i8 %a
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @unoptimizable3() {
+define i8 @unoptimizable3() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ %ptr2 = call i8* @getPointer(i8* %ptr)
+ %a = load i8, i8* %ptr2, !invariant.group !0
+
+; CHECK: ret i8 %a
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @unoptimizable4() {
+define i8 @unoptimizable4() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr)
+ %a = load i8, i8* %ptr2, !invariant.group !0
+
+; CHECK: ret i8 %a
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @volatile1() {
+define i8 @volatile1() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ call void @foo(i8* %ptr)
+ %a = load i8, i8* %ptr, !invariant.group !0
+ %b = load volatile i8, i8* %ptr
+; CHECK: call void @bar(i8 %b)
+ call void @bar(i8 %b)
+
+ %c = load volatile i8, i8* %ptr, !invariant.group !0
+; FIXME: we could change %c to 42, preserving volatile load
+; CHECK: call void @bar(i8 %c)
+ call void @bar(i8 %c)
+; CHECK: ret i8 42
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @volatile2() {
+define i8 @volatile2() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ call void @foo(i8* %ptr)
+ %a = load i8, i8* %ptr, !invariant.group !0
+ %b = load volatile i8, i8* %ptr
+; CHECK: call void @bar(i8 %b)
+ call void @bar(i8 %b)
+
+ %c = load volatile i8, i8* %ptr, !invariant.group !0
+; FIXME: we could change %c to 42, preserving volatile load
+; CHECK: call void @bar(i8 %c)
+ call void @bar(i8 %c)
+; CHECK: ret i8 42
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @fun() {
+define i8 @fun() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ call void @foo(i8* %ptr)
+
+ %a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change
+; CHECK: call void @bar(i8 42)
+ call void @bar(i8 %a)
+
+ call void @foo(i8* %ptr)
+ %b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed
+; CHECK: call void @bar(i8 %b)
+ call void @bar(i8 %b)
+
+ %newPtr = call i8* @getPointer(i8* %ptr)
+ %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr
+; CHECK: call void @bar(i8 %c)
+ call void @bar(i8 %c)
+
+ %unknownValue = load i8, i8* @unknownPtr
+; FIXME: Can assume that %unknownValue == 42
+; CHECK: store i8 %unknownValue, i8* %ptr, !invariant.group !0
+ store i8 %unknownValue, i8* %ptr, !invariant.group !0
+
+ %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr)
+ %d = load i8, i8* %newPtr2, !invariant.group !0 ; Can't step through invariant.group.barrier to get value of %ptr
+; CHECK: ret i8 %d
+ ret i8 %d
+}
+
+declare void @foo(i8*)
+declare void @bar(i8)
+declare i8* @getPointer(i8*)
+declare void @_ZN1A3fooEv(%struct.A*)
+declare void @_ZN1AC1Ev(%struct.A*)
+declare i8* @llvm.invariant.group.barrier(i8*)
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1 %cmp.vtables) #0
+
+
+attributes #0 = { nounwind }
+; CHECK: ![[OneMD]] = !{!"other ptr"}
+!0 = !{!"magic ptr"}
+!1 = !{!"other ptr"}
+!2 = !{!"vtable_of_a"}
diff --git a/test/Transforms/GVN/load-pre-nonlocal.ll b/test/Transforms/GVN/load-pre-nonlocal.ll
index e9827a158ade..e0e886653076 100644
--- a/test/Transforms/GVN/load-pre-nonlocal.ll
+++ b/test/Transforms/GVN/load-pre-nonlocal.ll
@@ -61,7 +61,7 @@ for.end:
; CHECK-NOT: %1 = load i32, i32*
; CHECK: [[LSHR_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
-define i32 @overaligned_load(i32 %a, i32* nocapture %b) {
+define i32 @overaligned_load(i32 %a, i32* nocapture %b) !dbg !13 {
entry:
%cmp = icmp sgt i32 %a, 0, !dbg !14
br i1 %cmp, label %if.then, label %if.else, !dbg !14
@@ -99,7 +99,7 @@ if.end:
!10 = !{}
!11 = !DISubroutineType(types: !10)
!12 = !DIFile(filename: "test.cpp", directory: "/tmp")
-!13 = !DISubprogram(name: "test", scope: !12, file: !12, line: 99, type: !11, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32, i32*)* @overaligned_load, variables: !10)
+!13 = distinct !DISubprogram(name: "test", scope: !12, file: !12, line: 99, type: !11, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !10)
!14 = !DILocation(line: 100, column: 1, scope: !13)
!15 = !DILocation(line: 101, column: 1, scope: !13)
!16 = !DILocation(line: 102, column: 1, scope: !13)
diff --git a/test/Transforms/GVN/no_speculative_loads_with_asan.ll b/test/Transforms/GVN/no_speculative_loads_with_asan.ll
new file mode 100644
index 000000000000..2e790db1b2ad
--- /dev/null
+++ b/test/Transforms/GVN/no_speculative_loads_with_asan.ll
@@ -0,0 +1,57 @@
+; RUN: opt -O3 -S %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+declare noalias i8* @_Znam(i64) #1
+
+define i32 @TestNoAsan() {
+ %1 = tail call noalias i8* @_Znam(i64 2)
+ %2 = getelementptr inbounds i8, i8* %1, i64 1
+ store i8 0, i8* %2, align 1
+ store i8 0, i8* %1, align 1
+ %3 = bitcast i8* %1 to i16*
+ %4 = load i16, i16* %3, align 4
+ %5 = icmp eq i16 %4, 0
+ br i1 %5, label %11, label %6
+
+; <label>:6 ; preds = %0
+ %7 = getelementptr inbounds i8, i8* %1, i64 2
+ %8 = bitcast i8* %7 to i16*
+ %9 = load i16, i16* %8, align 2
+ %10 = sext i16 %9 to i32
+ br label %11
+
+; <label>:11 ; preds = %0, %6
+ %12 = phi i32 [ %10, %6 ], [ 0, %0 ]
+ ret i32 %12
+}
+
+; CHECK-LABEL: @TestNoAsan
+; CHECK: %[[LOAD:[^ ]+]] = load i32
+; CHECK: {{.*}} = ashr i32 %[[LOAD]]
+; CHECK-NOT: {{.*}} = phi
+
+define i32 @TestAsan() sanitize_address {
+ %1 = tail call noalias i8* @_Znam(i64 2)
+ %2 = getelementptr inbounds i8, i8* %1, i64 1
+ store i8 0, i8* %2, align 1
+ store i8 0, i8* %1, align 1
+ %3 = bitcast i8* %1 to i16*
+ %4 = load i16, i16* %3, align 4
+ %5 = icmp eq i16 %4, 0
+ br i1 %5, label %11, label %6
+
+; <label>:6 ; preds = %0
+ %7 = getelementptr inbounds i8, i8* %1, i64 2
+ %8 = bitcast i8* %7 to i16*
+ %9 = load i16, i16* %8, align 2
+ %10 = sext i16 %9 to i32
+ br label %11
+
+; <label>:11 ; preds = %0, %6
+ %12 = phi i32 [ %10, %6 ], [ 0, %0 ]
+ ret i32 %12
+}
+
+; CHECK-LABEL: @TestAsan
+; CHECK-NOT: %[[LOAD:[^ ]+]] = load i32
+; CHECK: {{.*}} = phi
+
diff --git a/test/Transforms/GVN/phi-translate.ll b/test/Transforms/GVN/phi-translate.ll
index 9e37b882f222..67036ab9746c 100644
--- a/test/Transforms/GVN/phi-translate.ll
+++ b/test/Transforms/GVN/phi-translate.ll
@@ -18,7 +18,7 @@ target datalayout = "e-p:64:64:64"
; CHECK-DAG: [[N_LOC]] = !DILocation(line: 47, column: 1, scope: !{{.*}})
@G = external global [100 x i32]
-define i32 @foo(i32 %x, i32 %z) {
+define i32 @foo(i32 %x, i32 %z) !dbg !6 {
entry:
%tobool = icmp eq i32 %z, 0, !dbg !7
br i1 %tobool, label %end, label %then, !dbg !7
@@ -44,7 +44,7 @@ end:
!3 = !{}
!4 = !DISubroutineType(types: !3)
!5 = !DIFile(filename: "a.cc", directory: "/tmp")
-!6 = !DISubprogram(name: "foo", scope: !5, file: !5, line: 42, type: !4, isLocal: false, isDefinition: true, scopeLine: 43, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32, i32)* @foo, variables: !3)
+!6 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 42, type: !4, isLocal: false, isDefinition: true, scopeLine: 43, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
!7 = !DILocation(line: 43, column: 1, scope: !6)
!8 = !DILocation(line: 44, column: 1, scope: !6)
!9 = !DILocation(line: 45, column: 1, scope: !6)
diff --git a/test/Transforms/GVN/pr14166.ll b/test/Transforms/GVN/pr14166.ll
index eafe418dbdc6..ec1b1717f067 100644
--- a/test/Transforms/GVN/pr14166.ll
+++ b/test/Transforms/GVN/pr14166.ll
@@ -1,4 +1,4 @@
-; RUN: opt -gvn -S < %s | FileCheck %s
+; RUN: opt -disable-basicaa -gvn -S < %s | FileCheck %s
target datalayout = "e-p:32:32:32"
target triple = "i386-pc-linux-gnu"
define <2 x i32> @test1() {
diff --git a/test/Transforms/GVN/pr24426.ll b/test/Transforms/GVN/pr24426.ll
new file mode 100644
index 000000000000..76b190f8fc22
--- /dev/null
+++ b/test/Transforms/GVN/pr24426.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -memcpyopt -mldst-motion -gvn -S | FileCheck %s
+
+declare void @check(i8)
+
+declare void @write(i8* %res)
+
+define void @test1() {
+ %1 = alloca [10 x i8]
+ %2 = bitcast [10 x i8]* %1 to i8*
+ call void @write(i8* %2)
+ %3 = load i8, i8* %2
+
+; CHECK-NOT: undef
+ call void @check(i8 %3)
+
+ ret void
+}
+
diff --git a/test/Transforms/GVN/pr25440.ll b/test/Transforms/GVN/pr25440.ll
new file mode 100644
index 000000000000..14e2c30f04b2
--- /dev/null
+++ b/test/Transforms/GVN/pr25440.ll
@@ -0,0 +1,108 @@
+;RUN: opt -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
+target triple = "thumbv7--linux-gnueabi"
+
+%struct.a = type { i16, i16, [1 x %union.a] }
+%union.a = type { i32 }
+
+@length = external global [0 x i32], align 4
+
+; Function Attrs: nounwind
+define fastcc void @foo(%struct.a* nocapture readonly %x) {
+;CHECK-LABEL: foo
+entry:
+ br label %bb0
+
+bb0: ; preds = %land.lhs.true, %entry
+;CHECK: bb0:
+ %x.tr = phi %struct.a* [ %x, %entry ], [ null, %land.lhs.true ]
+ %code1 = getelementptr inbounds %struct.a, %struct.a* %x.tr, i32 0, i32 0
+ %0 = load i16, i16* %code1, align 4
+; CHECK: load i32, i32*
+ %conv = zext i16 %0 to i32
+ switch i32 %conv, label %if.end.50 [
+ i32 43, label %cleanup
+ i32 52, label %if.then.5
+ ]
+
+if.then.5: ; preds = %bb0
+ br i1 undef, label %land.lhs.true, label %if.then.26
+
+land.lhs.true: ; preds = %if.then.5
+ br i1 undef, label %cleanup, label %bb0
+
+if.then.26: ; preds = %if.then.5
+ %x.tr.lcssa163 = phi %struct.a* [ %x.tr, %if.then.5 ]
+ br i1 undef, label %cond.end, label %cond.false
+
+cond.false: ; preds = %if.then.26
+; CHECK: cond.false:
+; CHECK-NOT: load
+ %mode = getelementptr inbounds %struct.a, %struct.a* %x.tr.lcssa163, i32 0, i32 1
+ %bf.load = load i16, i16* %mode, align 2
+ %bf.shl = shl i16 %bf.load, 8
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %if.then.26
+ br i1 undef, label %if.then.44, label %cleanup
+
+if.then.44: ; preds = %cond.end
+ unreachable
+
+if.end.50: ; preds = %bb0
+;%CHECK: if.end.50:
+ %conv.lcssa = phi i32 [ %conv, %bb0 ]
+ %arrayidx52 = getelementptr inbounds [0 x i32], [0 x i32]* @length, i32 0, i32 %conv.lcssa
+ %1 = load i32, i32* %arrayidx52, align 4
+ br i1 undef, label %for.body.57, label %cleanup
+
+for.body.57: ; preds = %if.end.50
+ %i.2157 = add nsw i32 %1, -1
+ unreachable
+
+cleanup: ; preds = %if.end.50, %cond.end, %land.lhs.true, %bb0
+ ret void
+}
+
+@yy_c_buf_p = external unnamed_addr global i8*, align 4
+@dfg_text = external global i8*, align 4
+
+define void @dfg_lex() {
+;CHECK-LABEL: dfg_lex
+entry:
+ br label %while.bodythread-pre-split
+
+while.bodythread-pre-split: ; preds = %while.end, %while.end, %entry
+ br i1 undef, label %if.then.14, label %if.end.15
+
+if.then.14: ; preds = %while.end, %while.bodythread-pre-split
+ %v1 = load i32, i32* bitcast (i8** @dfg_text to i32*), align 4
+ %sub.ptr.sub = sub i32 undef, %v1
+ br label %if.end.15
+
+if.end.15: ; preds = %if.then.14, %while.bodythread-pre-split
+ %v2 = load i8*, i8** @yy_c_buf_p, align 4
+ br label %while.cond.16
+
+while.cond.16: ; preds = %while.cond.16, %if.end.15
+ br i1 undef, label %while.cond.16, label %while.end
+
+while.end: ; preds = %while.cond.16
+ %add.ptr = getelementptr inbounds i8, i8* %v2, i32 undef
+ store i8* %add.ptr, i8** @dfg_text, align 4
+ %sub.ptr.rhs.cast25 = ptrtoint i8* %add.ptr to i32
+ %sub.ptr.sub26 = sub i32 0, %sub.ptr.rhs.cast25
+ switch i32 undef, label %sw.default [
+ i32 65, label %while.bodythread-pre-split
+ i32 3, label %return
+ i32 57, label %while.bodythread-pre-split
+ i32 60, label %if.then.14
+ ]
+
+sw.default: ; preds = %while.end
+ unreachable
+
+return: ; preds = %while.end
+ ret void
+}
diff --git a/test/Transforms/GVN/pre-gep-load.ll b/test/Transforms/GVN/pre-gep-load.ll
index 291af359a7a1..a46dc22ade89 100644
--- a/test/Transforms/GVN/pre-gep-load.ll
+++ b/test/Transforms/GVN/pre-gep-load.ll
@@ -47,3 +47,34 @@ return: ; preds = %sw.default, %sw.bb2
%retval.0 = phi double [ 0.000000e+00, %sw.default ], [ %sub6, %sw.bb2 ], [ %sub, %if.then ]
ret double %retval.0
}
+
+; The load causes the GEP's operands to be PREd earlier than normal. The
+; resulting sext ends up in pre.dest and in the GVN system before that BB is
+; actually processed. Make sure we can deal with the situation.
+
+define void @test_shortcut_safe(i1 %tst, i32 %p1, i32* %a) {
+; CHECK-LABEL: define void @test_shortcut_safe
+; CHECK: [[SEXT1:%.*]] = sext i32 %p1 to i64
+; CHECK: [[PHI1:%.*]] = phi i64 [ [[SEXT1]], {{%.*}} ], [ [[PHI2:%.*]], {{%.*}} ]
+; CHECK: [[SEXT2:%.*]] = sext i32 %p1 to i64
+; CHECK: [[PHI2]] = phi i64 [ [[SEXT2]], {{.*}} ], [ [[PHI1]], {{%.*}} ]
+; CHECK: getelementptr inbounds i32, i32* %a, i64 [[PHI2]]
+
+ br i1 %tst, label %sext1, label %pre.dest
+
+pre.dest:
+ br label %sext.use
+
+sext1:
+ %idxprom = sext i32 %p1 to i64
+ br label %sext.use
+
+sext.use:
+ %idxprom2 = sext i32 %p1 to i64
+ %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %idxprom2
+ %val = load i32, i32* %arrayidx3, align 4
+ tail call void (i32) @g(i32 %val)
+ br label %pre.dest
+}
+
+declare void @g(i32)
diff --git a/test/Transforms/GVN/pre-load.ll b/test/Transforms/GVN/pre-load.ll
index 24221d540f22..685df24f62b6 100644
--- a/test/Transforms/GVN/pre-load.ll
+++ b/test/Transforms/GVN/pre-load.ll
@@ -389,3 +389,44 @@ block5:
; CHECK: block4:
; CHECK-NEXT: phi i32
}
+
+declare void @f()
+declare void @g(i32)
+declare i32 @__CxxFrameHandler3(...)
+
+; Test that loads aren't PRE'd into EH pads.
+define void @test12(i32* %p) personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: @test12(
+block1:
+ invoke void @f()
+ to label %block2 unwind label %catch.dispatch
+
+block2:
+ invoke void @f()
+ to label %block3 unwind label %cleanup
+
+block3:
+ ret void
+
+catch.dispatch:
+ %cs1 = catchswitch within none [label %catch] unwind label %cleanup2
+
+catch:
+ %c = catchpad within %cs1 []
+ catchret from %c to label %block2
+
+cleanup:
+ %c1 = cleanuppad within none []
+ store i32 0, i32* %p
+ cleanupret from %c1 unwind label %cleanup2
+
+; CHECK: cleanup2:
+; CHECK-NOT: phi
+; CHECK-NEXT: %c2 = cleanuppad within none []
+; CHECK-NEXT: %NOTPRE = load i32, i32* %p
+cleanup2:
+ %c2 = cleanuppad within none []
+ %NOTPRE = load i32, i32* %p
+ call void @g(i32 %NOTPRE)
+ cleanupret from %c2 unwind to caller
+}
diff --git a/test/Transforms/GVN/range.ll b/test/Transforms/GVN/range.ll
index 297c6aac88dd..39acc0c35157 100644
--- a/test/Transforms/GVN/range.ll
+++ b/test/Transforms/GVN/range.ll
@@ -1,7 +1,7 @@
; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
define i32 @test1(i32* %p) {
-; CHECK: @test1(i32* %p)
+; CHECK-LABEL: @test1(i32* %p)
; CHECK: %a = load i32, i32* %p, !range !0
; CHECK: %c = add i32 %a, %a
%a = load i32, i32* %p, !range !0
@@ -11,7 +11,7 @@ define i32 @test1(i32* %p) {
}
define i32 @test2(i32* %p) {
-; CHECK: @test2(i32* %p)
+; CHECK-LABEL: @test2(i32* %p)
; CHECK: %a = load i32, i32* %p
; CHECK-NOT: range
; CHECK: %c = add i32 %a, %a
@@ -22,7 +22,7 @@ define i32 @test2(i32* %p) {
}
define i32 @test3(i32* %p) {
-; CHECK: @test3(i32* %p)
+; CHECK-LABEL: @test3(i32* %p)
; CHECK: %a = load i32, i32* %p, !range ![[DISJOINT_RANGE:[0-9]+]]
; CHECK: %c = add i32 %a, %a
%a = load i32, i32* %p, !range !0
@@ -32,7 +32,7 @@ define i32 @test3(i32* %p) {
}
define i32 @test4(i32* %p) {
-; CHECK: @test4(i32* %p)
+; CHECK-LABEL: @test4(i32* %p)
; CHECK: %a = load i32, i32* %p, !range ![[MERGED_RANGE:[0-9]+]]
; CHECK: %c = add i32 %a, %a
%a = load i32, i32* %p, !range !0
@@ -42,7 +42,7 @@ define i32 @test4(i32* %p) {
}
define i32 @test5(i32* %p) {
-; CHECK: @test5(i32* %p)
+; CHECK-LABEL: @test5(i32* %p)
; CHECK: %a = load i32, i32* %p, !range ![[MERGED_SIGNED_RANGE:[0-9]+]]
; CHECK: %c = add i32 %a, %a
%a = load i32, i32* %p, !range !3
@@ -52,7 +52,7 @@ define i32 @test5(i32* %p) {
}
define i32 @test6(i32* %p) {
-; CHECK: @test6(i32* %p)
+; CHECK-LABEL: @test6(i32* %p)
; CHECK: %a = load i32, i32* %p, !range ![[MERGED_TEST6:[0-9]+]]
; CHECK: %c = add i32 %a, %a
%a = load i32, i32* %p, !range !5
@@ -62,7 +62,7 @@ define i32 @test6(i32* %p) {
}
define i32 @test7(i32* %p) {
-; CHECK: @test7(i32* %p)
+; CHECK-LABEL: @test7(i32* %p)
; CHECK: %a = load i32, i32* %p, !range ![[MERGED_TEST7:[0-9]+]]
; CHECK: %c = add i32 %a, %a
%a = load i32, i32* %p, !range !7
@@ -72,7 +72,7 @@ define i32 @test7(i32* %p) {
}
define i32 @test8(i32* %p) {
-; CHECK: @test8(i32* %p)
+; CHECK-LABEL: @test8(i32* %p)
; CHECK: %a = load i32, i32* %p
; CHECK-NOT: range
; CHECK: %c = add i32 %a, %a
@@ -84,17 +84,17 @@ define i32 @test8(i32* %p) {
; CHECK: ![[DISJOINT_RANGE]] = !{i32 0, i32 2, i32 3, i32 5}
; CHECK: ![[MERGED_RANGE]] = !{i32 0, i32 5}
-; CHECK: ![[MERGED_SIGNED_RANGE]] = !{i32 -3, i32 -2, i32 1, i32 2}
+; CHECK: ![[MERGED_SIGNED_RANGE]] = !{i32 -5, i32 -2, i32 1, i32 5}
; CHECK: ![[MERGED_TEST6]] = !{i32 10, i32 1}
; CHECK: ![[MERGED_TEST7]] = !{i32 3, i32 4, i32 5, i32 2}
!0 = !{i32 0, i32 2}
!1 = !{i32 3, i32 5}
!2 = !{i32 2, i32 5}
-!3 = !{i32 -3, i32 -2}
-!4 = !{i32 1, i32 2}
+!3 = !{i32 -5, i32 -2}
+!4 = !{i32 1, i32 5}
!5 = !{i32 10, i32 1}
-!6 = !{i32 12, i32 13}
+!6 = !{i32 12, i32 16}
!7 = !{i32 1, i32 2, i32 3, i32 4}
!8 = !{i32 5, i32 1}
!9 = !{i32 1, i32 5}
diff --git a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
index 584f0bf467fa..c62ec10df790 100644
--- a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
+++ b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
@@ -5,14 +5,14 @@
@A = global i32 0
; CHECK: @A = global i32 0
-@D = internal alias i32* @A
+@D = internal alias i32, i32* @A
; DEAD-NOT: @D
-@L1 = alias i32* @A
-; CHECK: @L1 = alias i32* @A
+@L1 = alias i32, i32* @A
+; CHECK: @L1 = alias i32, i32* @A
-@L2 = internal alias i32* @L1
-; CHECK: @L2 = internal alias i32* @L1
+@L2 = internal alias i32, i32* @L1
+; CHECK: @L2 = internal alias i32, i32* @L1
-@L3 = alias i32* @L2
-; CHECK: @L3 = alias i32* @L2
+@L3 = alias i32, i32* @L2
+; CHECK: @L3 = alias i32, i32* @L2
diff --git a/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll b/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
index 5fb4444c6ba8..17474888d79b 100644
--- a/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
+++ b/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
@@ -1,4 +1,4 @@
; RUN: opt < %s -globaldce
-@A = internal alias void ()* @F
+@A = internal alias void (), void ()* @F
define internal void @F() { ret void }
diff --git a/test/Transforms/GlobalDCE/pr20981.ll b/test/Transforms/GlobalDCE/pr20981.ll
index 0eaa6b899091..c3e06699da77 100644
--- a/test/Transforms/GlobalDCE/pr20981.ll
+++ b/test/Transforms/GlobalDCE/pr20981.ll
@@ -3,8 +3,8 @@
$c1 = comdat any
; CHECK: $c1 = comdat any
-@a1 = linkonce_odr alias void ()* @f1
-; CHECK: @a1 = linkonce_odr alias void ()* @f1
+@a1 = linkonce_odr alias void (), void ()* @f1
+; CHECK: @a1 = linkonce_odr alias void (), void ()* @f1
define linkonce_odr void @f1() comdat($c1) {
ret void
diff --git a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
index a3e90045d64b..e6337adefa13 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
@@ -2,7 +2,7 @@
@g = global i32 0
-@a = alias bitcast (i32* @g to i8*)
+@a = alias i8, bitcast (i32* @g to i8*)
define void @f() {
%tmp = load i8, i8* @a
diff --git a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
index 6933d4a8d96c..42c243d9d7c0 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
@@ -6,14 +6,14 @@ define internal void @f() {
ret void
}
-@a = alias void ()* @f
+@a = alias void (), void ()* @f
define void @g() {
call void() @a()
ret void
}
-@b = internal alias void ()* @g
+@b = internal alias void (), void ()* @g
; CHECK-NOT: @b
define void @h() {
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index a8d618ae522d..f74f2081dc20 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -56,10 +56,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.gv = !{!0}
!0 = !DIGlobalVariable(name: "Stop", line: 2, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !2, variable: i32* @Stop)
-!1 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
+!1 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
!2 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!3 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 4, arg: 0, scope: !4, file: !1, type: !2)
-!4 = !DISubprogram(name: "foo", linkageName: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !5)
+!3 = !DILocalVariable(name: "i", line: 4, arg: 1, scope: !4, file: !1, type: !2)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !5)
!5 = !DISubroutineType(types: !6)
!6 = !{!2, !2}
!7 = !DILocation(line: 5, scope: !8)
@@ -70,7 +70,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!12 = !DILocation(line: 11, scope: !8)
!13 = !DILocation(line: 14, scope: !14)
!14 = distinct !DILexicalBlock(line: 0, column: 0, file: !20, scope: !15)
-!15 = !DISubprogram(name: "bar", linkageName: "bar", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !16)
+!15 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !16)
!16 = !DISubroutineType(types: !17)
!17 = !{!2}
!18 = !DILocation(line: 15, scope: !14)
diff --git a/test/Transforms/GlobalOpt/alias-resolve.ll b/test/Transforms/GlobalOpt/alias-resolve.ll
index 090d78455226..46b90ec29b9d 100644
--- a/test/Transforms/GlobalOpt/alias-resolve.ll
+++ b/test/Transforms/GlobalOpt/alias-resolve.ll
@@ -1,20 +1,20 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
-@foo1 = alias void ()* @foo2
-; CHECK: @foo1 = alias void ()* @bar2
+@foo1 = alias void (), void ()* @foo2
+; CHECK: @foo1 = alias void (), void ()* @bar2
-@foo2 = alias void()* @bar1
-; CHECK: @foo2 = alias void ()* @bar2
+@foo2 = alias void(), void()* @bar1
+; CHECK: @foo2 = alias void (), void ()* @bar2
-@bar1 = alias void ()* @bar2
-; CHECK: @bar1 = alias void ()* @bar2
+@bar1 = alias void (), void ()* @bar2
+; CHECK: @bar1 = alias void (), void ()* @bar2
-@weak1 = weak alias void ()* @bar2
-; CHECK: @weak1 = weak alias void ()* @bar2
+@weak1 = weak alias void (), void ()* @bar2
+; CHECK: @weak1 = weak alias void (), void ()* @bar2
@bar4 = private unnamed_addr constant [2 x i8*] zeroinitializer
-@foo4 = linkonce_odr unnamed_addr alias getelementptr inbounds ([2 x i8*], [2 x i8*]* @bar4, i32 0, i32 1)
-; CHECK: @foo4 = linkonce_odr unnamed_addr alias getelementptr inbounds ([2 x i8*], [2 x i8*]* @bar4, i32 0, i32 1)
+@foo4 = weak_odr unnamed_addr alias i8*, getelementptr inbounds ([2 x i8*], [2 x i8*]* @bar4, i32 0, i32 1)
+; CHECK: @foo4 = weak_odr unnamed_addr alias i8*, getelementptr inbounds ([2 x i8*], [2 x i8*]* @bar4, i32 0, i32 1)
define void @bar2() {
ret void
@@ -37,7 +37,7 @@ entry:
ret void
}
-@foo3 = alias void ()* @bar3
+@foo3 = alias void (), void ()* @bar3
; CHECK-NOT: bar3
define internal void @bar3() {
diff --git a/test/Transforms/GlobalOpt/alias-used-address-space.ll b/test/Transforms/GlobalOpt/alias-used-address-space.ll
index 62e74ba2ab48..367f375ec900 100644
--- a/test/Transforms/GlobalOpt/alias-used-address-space.ll
+++ b/test/Transforms/GlobalOpt/alias-used-address-space.ll
@@ -7,7 +7,7 @@ target datalayout = "p:32:32:32-p1:16:16:16"
@i = internal addrspace(1) global i8 42
; CHECK: @ia = internal addrspace(1) global i8 42
-@ia = internal alias i8 addrspace(1)* @i
+@ia = internal alias i8, i8 addrspace(1)* @i
@llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata"
; CHECK-DAG: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata"
@@ -18,8 +18,8 @@ target datalayout = "p:32:32:32-p1:16:16:16"
@sameAsUsed = global [1 x i8*] [i8* addrspacecast(i8 addrspace(1)* @ca to i8*)]
; CHECK-DAG: @sameAsUsed = global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @c to i8*)]
-@ca = internal alias i8 addrspace(1)* @c
-; CHECK: @ca = internal alias i8 addrspace(1)* @c
+@ca = internal alias i8, i8 addrspace(1)* @c
+; CHECK: @ca = internal alias i8, i8 addrspace(1)* @c
define i8 addrspace(1)* @h() {
ret i8 addrspace(1)* @ca
diff --git a/test/Transforms/GlobalOpt/alias-used-section.ll b/test/Transforms/GlobalOpt/alias-used-section.ll
index 4dab2f5a02f5..a3657dfd16bc 100644
--- a/test/Transforms/GlobalOpt/alias-used-section.ll
+++ b/test/Transforms/GlobalOpt/alias-used-section.ll
@@ -1,7 +1,7 @@
; RUN: opt -S -globalopt < %s | FileCheck %s
@_Z17in_custom_section = internal global i8 42, section "CUSTOM"
-@in_custom_section = internal dllexport alias i8* @_Z17in_custom_section
+@in_custom_section = internal dllexport alias i8, i8* @_Z17in_custom_section
; CHECK: @in_custom_section = internal dllexport global i8 42, section "CUSTOM"
diff --git a/test/Transforms/GlobalOpt/alias-used.ll b/test/Transforms/GlobalOpt/alias-used.ll
index 21f06b7be5ff..9ced3974ee87 100644
--- a/test/Transforms/GlobalOpt/alias-used.ll
+++ b/test/Transforms/GlobalOpt/alias-used.ll
@@ -4,10 +4,10 @@
@i = internal global i8 42
; CHECK: @ia = internal global i8 42
-@ia = internal alias i8* @i
+@ia = internal alias i8, i8* @i
@llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
-; CHECK-DAG: @llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
+; CHECK-DAG: @llvm.used = appending global [3 x i8*] [i8* @ca, i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*)], section "llvm.metadata"
@llvm.compiler.used = appending global [4 x i8*] [i8* bitcast (void ()* @fa3 to i8*), i8* bitcast (void ()* @fa to i8*), i8* @ia, i8* @i], section "llvm.metadata"
; CHECK-DAG: @llvm.compiler.used = appending global [2 x i8*] [i8* bitcast (void ()* @fa3 to i8*), i8* @ia], section "llvm.metadata"
@@ -18,17 +18,17 @@
@other = global i32* bitcast (void ()* @fa to i32*)
; CHECK-DAG: @other = global i32* bitcast (void ()* @f to i32*)
-@fa = internal alias void ()* @f
-; CHECK: @fa = internal alias void ()* @f
+@fa = internal alias void (), void ()* @f
+; CHECK: @fa = internal alias void (), void ()* @f
-@fa2 = internal alias void ()* @f
+@fa2 = internal alias void (), void ()* @f
; CHECK-NOT: @fa2
-@fa3 = internal alias void ()* @f
+@fa3 = internal alias void (), void ()* @f
; CHECK: @fa3
-@ca = internal alias i8* @c
-; CHECK: @ca = internal alias i8* @c
+@ca = internal alias i8, i8* @c
+; CHECK: @ca = internal alias i8, i8* @c
define void @f() {
ret void
diff --git a/test/Transforms/GlobalOpt/assume.ll b/test/Transforms/GlobalOpt/assume.ll
new file mode 100644
index 000000000000..3f3157a38fbb
--- /dev/null
+++ b/test/Transforms/GlobalOpt/assume.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -globalopt < %s | FileCheck %s
+
+; CHECK: @tmp = global i32 42
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+@tmp = global i32 0
+
+define i32 @TheAnswerToLifeTheUniverseAndEverything() {
+ ret i32 42
+}
+
+define void @_GLOBAL__I_a() {
+enter:
+ %tmp1 = call i32 @TheAnswerToLifeTheUniverseAndEverything()
+ store i32 %tmp1, i32* @tmp
+ %cmp = icmp eq i32 %tmp1, 42
+ call void @llvm.assume(i1 %cmp)
+ ret void
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/GlobalOpt/available_externally_global_ctors.ll b/test/Transforms/GlobalOpt/available_externally_global_ctors.ll
new file mode 100644
index 000000000000..7092a5ae2226
--- /dev/null
+++ b/test/Transforms/GlobalOpt/available_externally_global_ctors.ll
@@ -0,0 +1,22 @@
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+; RUN: opt -S -globalopt < %s | FileCheck %s
+
+; Verify that the initialization of the available_externally global is not eliminated
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo_static_init, i8* null }]
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo_static_init, i8* null }]
+@foo_external = available_externally global void ()* null
+
+define internal void @foo_static_init() {
+entry:
+ store void ()* @foo_impl, void ()** @foo_external
+ ret void
+}
+
+define internal void @foo_impl() {
+entry:
+ ret void
+}
+
diff --git a/test/Transforms/GlobalOpt/deadglobal.ll b/test/Transforms/GlobalOpt/deadglobal.ll
index 9563a23b2c29..f5eed44cbb6e 100644
--- a/test/Transforms/GlobalOpt/deadglobal.ll
+++ b/test/Transforms/GlobalOpt/deadglobal.ll
@@ -1,11 +1,14 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
@G1 = internal global i32 123 ; <i32*> [#uses=1]
+@A1 = internal alias i32, i32* @G1
; CHECK-NOT: @G1
; CHECK: @G2
; CHECK-NOT: @G3
+; CHECK-NOT: @A1
+
define void @foo1() {
; CHECK: define void @foo
; CHECK-NEXT: ret
diff --git a/test/Transforms/GlobalOpt/externally-initialized-aggregate.ll b/test/Transforms/GlobalOpt/externally-initialized-aggregate.ll
new file mode 100644
index 000000000000..b446d24f1fd2
--- /dev/null
+++ b/test/Transforms/GlobalOpt/externally-initialized-aggregate.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -S -globalopt | FileCheck %s
+
+; This global is externally_initialized, so if we split it into scalars we
+; should keep that flag set on all of the new globals. This will prevent the
+; store to @a[0] from being constant propagated to the load in @foo, but will not
+; prevent @a[1] from being removed since it is dead.
+; CHECK: @a.0 = internal unnamed_addr externally_initialized global i32 undef
+; CHECK-NOT @a.1
+@a = internal externally_initialized global [2 x i32] undef, align 4
+; This is the same, but a struct rather than an array.
+; CHECK: @b.0 = internal unnamed_addr externally_initialized global i32 undef
+; CHECK-NOT @b.1
+@b = internal externally_initialized global {i32, i32} undef, align 4
+
+define i32 @foo() {
+; CHECK-LABEL: define i32 @foo
+entry:
+; This load uses the split global, but cannot be constant-propagated away.
+; CHECK: %0 = load i32, i32* @a.0
+ %0 = load i32, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i32 0, i32 0), align 4
+ ret i32 %0
+}
+
+define i32 @bar() {
+; CHECK-LABEL: define i32 @bar
+entry:
+; This load uses the split global, but cannot be constant-propagated away.
+; CHECK: %0 = load i32, i32* @b.0
+ %0 = load i32, i32* getelementptr inbounds ({i32, i32}, {i32, i32}* @b, i32 0, i32 0), align 4
+ ret i32 %0
+}
+
+define void @init() {
+; CHECK-LABEL: define void @init
+entry:
+; This store uses the split global, but cannot be constant-propagated away.
+; CHECK: store i32 1, i32* @a.0
+ store i32 1, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i32 0, i32 0), align 4
+; This store can be removed, because the second element of @a is never read.
+; CHECK-NOT: store i32 2, i32* @a.1
+ store i32 2, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i32 0, i32 1), align 4
+
+; This store uses the split global, but cannot be constant-propagated away.
+; CHECK: store i32 3, i32* @b.0
+ store i32 3, i32* getelementptr inbounds ({i32, i32}, {i32, i32}* @b, i32 0, i32 0), align 4
+; This store can be removed, because the second element of @b is never read.
+; CHECK-NOT: store i32 4, i32* @b.1
+ store i32 4, i32* getelementptr inbounds ({i32, i32}, {i32, i32}* @b, i32 0, i32 1), align 4
+ ret void
+}
diff --git a/test/Transforms/GlobalOpt/externally-initialized.ll b/test/Transforms/GlobalOpt/externally-initialized.ll
new file mode 100644
index 000000000000..c01ba10f49c9
--- /dev/null
+++ b/test/Transforms/GlobalOpt/externally-initialized.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -S -globalopt | FileCheck %s
+
+; This global is externally_initialized, which may modify the value between
+; it's static initializer and any code in this module being run, so the only
+; write to it cannot be merged into the static initialiser.
+; CHECK: @a = internal unnamed_addr externally_initialized global i32 undef
+@a = internal externally_initialized global i32 undef
+
+; This global is stored to by the external initialization, so cannot be
+; constant-propagated and removed, despite the fact that there are no writes
+; to it.
+; CHECK: @b = internal unnamed_addr externally_initialized global i32 undef
+@b = internal externally_initialized global i32 undef
+
+
+define void @foo() {
+; CHECK-LABEL: foo
+entry:
+; CHECK: store i32 42, i32* @a
+ store i32 42, i32* @a
+ ret void
+}
+define i32 @bar() {
+; CHECK-LABEL: bar
+entry:
+; CHECK: %val = load i32, i32* @a
+ %val = load i32, i32* @a
+ ret i32 %val
+}
+
+define i32 @baz() {
+; CHECK-LABEL: baz
+entry:
+; CHECK: %val = load i32, i32* @b
+ %val = load i32, i32* @b
+ ret i32 %val
+}
diff --git a/test/Transforms/GlobalOpt/global-demotion.ll b/test/Transforms/GlobalOpt/global-demotion.ll
new file mode 100644
index 000000000000..7965cb896208
--- /dev/null
+++ b/test/Transforms/GlobalOpt/global-demotion.ll
@@ -0,0 +1,80 @@
+; RUN: opt -globalopt -S < %s | FileCheck %s
+
+@G1 = internal global i32 5
+@G2 = internal global i32 5
+@G3 = internal global i32 5
+@G4 = internal global i32 5
+@G5 = internal global i32 5
+
+; CHECK-LABEL: @test1
+define internal i32 @test1() norecurse {
+; CHECK-NOT: @G1
+ store i32 4, i32* @G1
+ %a = load i32, i32* @G1
+; CHECK: ret
+ ret i32 %a
+}
+
+; The load comes before the store which makes @G2 live before the call.
+; CHECK-LABEL: @test2
+define internal i32 @test2() norecurse {
+; CHECK-NOT: %G2
+ %a = load i32, i32* @G2
+ store i32 4, i32* @G2
+; CHECK: ret
+ ret i32 %a
+}
+
+; This global is indexed by a GEP - this makes it partial alias and we bail out.
+; FIXME: We don't actually have to bail out in this case.
+
+; CHECK-LABEL: @test3
+define internal i32 @test3() norecurse {
+; CHECK-NOT: %G3
+ %x = getelementptr i32,i32* @G3, i32 0
+ %a = load i32, i32* %x
+ store i32 4, i32* @G3
+; CHECK: ret
+ ret i32 %a
+}
+
+; The global is casted away to a larger type then loaded. The store only partially
+; covers the load, so we must not demote.
+
+; CHECK-LABEL: @test4
+define internal i32 @test4() norecurse {
+; CHECK-NOT: %G4
+ store i32 4, i32* @G4
+ %x = bitcast i32* @G4 to i64*
+ %a = load i64, i64* %x
+ %b = trunc i64 %a to i32
+; CHECK: ret
+ ret i32 %b
+}
+
+; The global is casted away to a smaller type then loaded. This one is fine.
+
+; CHECK-LABEL: @test5
+define internal i32 @test5() norecurse {
+; CHECK-NOT: @G5
+ store i32 4, i32* @G5
+ %x = bitcast i32* @G5 to i16*
+ %a = load i16, i16* %x
+ %b = zext i16 %a to i32
+; CHECK: ret
+ ret i32 %b
+}
+
+define i32 @main() norecurse {
+ %a = call i32 @test1()
+ %b = call i32 @test2()
+ %c = call i32 @test3()
+ %d = call i32 @test4()
+ %e = call i32 @test5()
+
+ %x = or i32 %a, %b
+ %y = or i32 %x, %c
+ %z = or i32 %y, %d
+ %w = or i32 %z, %e
+ ret i32 %w
+}
diff --git a/test/Transforms/GlobalOpt/invariant.group.barrier.ll b/test/Transforms/GlobalOpt/invariant.group.barrier.ll
new file mode 100644
index 000000000000..54d91d408019
--- /dev/null
+++ b/test/Transforms/GlobalOpt/invariant.group.barrier.ll
@@ -0,0 +1,79 @@
+; RUN: opt -S -globalopt < %s | FileCheck %s
+
+; This test is hint, what could globalOpt optimize and what it can't
+; FIXME: @tmp and @tmp2 can be safely set to 42
+; CHECK: @tmp = global i32 0
+; CHECK: @tmp2 = global i32 0
+; CHECK: @tmp3 = global i32 0
+
+@tmp = global i32 0
+@tmp2 = global i32 0
+@tmp3 = global i32 0
+@ptrToTmp3 = global i32* null
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
+define i32 @TheAnswerToLifeTheUniverseAndEverything() {
+ ret i32 42
+}
+
+define void @_GLOBAL__I_a() {
+enter:
+ call void @_optimizable()
+ call void @_not_optimizable()
+ ret void
+}
+
+define void @_optimizable() {
+enter:
+ %valptr = alloca i32
+
+ %val = call i32 @TheAnswerToLifeTheUniverseAndEverything()
+ store i32 %val, i32* @tmp
+ store i32 %val, i32* %valptr
+
+ %0 = bitcast i32* %valptr to i8*
+ %barr = call i8* @llvm.invariant.group.barrier(i8* %0)
+ %1 = bitcast i8* %barr to i32*
+
+ %val2 = load i32, i32* %1
+ store i32 %val2, i32* @tmp2
+ ret void
+}
+
+; We can't step through invariant.group.barrier here, because that would change
+; this load in @usage_of_globals()
+; val = load i32, i32* %ptrVal, !invariant.group !0
+; into
+; %val = load i32, i32* @tmp3, !invariant.group !0
+; and then we could assume that %val and %val2 to be the same, which coud be
+; false, because @changeTmp3ValAndCallBarrierInside() may change the value
+; of @tmp3.
+define void @_not_optimizable() {
+enter:
+ store i32 13, i32* @tmp3, !invariant.group !0
+
+ %0 = bitcast i32* @tmp3 to i8*
+ %barr = call i8* @llvm.invariant.group.barrier(i8* %0)
+ %1 = bitcast i8* %barr to i32*
+
+ store i32* %1, i32** @ptrToTmp3
+ store i32 42, i32* %1, !invariant.group !0
+
+ ret void
+}
+define void @usage_of_globals() {
+entry:
+ %ptrVal = load i32*, i32** @ptrToTmp3
+ %val = load i32, i32* %ptrVal, !invariant.group !0
+
+ call void @changeTmp3ValAndCallBarrierInside()
+ %val2 = load i32, i32* @tmp3, !invariant.group !0
+ ret void;
+}
+
+declare void @changeTmp3ValAndCallBarrierInside()
+
+declare i8* @llvm.invariant.group.barrier(i8*)
+
+!0 = !{!"something"}
diff --git a/test/Transforms/GlobalOpt/localize-constexpr.ll b/test/Transforms/GlobalOpt/localize-constexpr.ll
new file mode 100644
index 000000000000..3fa7db89b04b
--- /dev/null
+++ b/test/Transforms/GlobalOpt/localize-constexpr.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S < %s -globalopt | FileCheck %s
+
+@G = internal global i32 42
+
+define i8 @f() norecurse {
+; CHECK-LABEL: @f
+; CHECK: alloca
+; CHECK-NOT: @G
+; CHECK: }
+ store i32 42, i32* @G
+ %a = load i8, i8* bitcast (i32* @G to i8*)
+ ret i8 %a
+}
+
+@H = internal global i32 42
+@Halias = alias i32, i32* @H
+
+; @H can't be localized because @Halias uses it, and @Halias can't be converted to an instruction.
+define i8 @g() norecurse {
+; CHECK-LABEL: @g
+; CHECK-NOT: alloca
+; CHECK: @H
+; CHECK: }
+ store i32 42, i32* @H
+ %a = load i8, i8* bitcast (i32* @H to i8*)
+ ret i8 %a
+}
+
diff --git a/test/Transforms/GlobalOpt/metadata.ll b/test/Transforms/GlobalOpt/metadata.ll
index fb60b66ab58e..152d58e6e320 100644
--- a/test/Transforms/GlobalOpt/metadata.ll
+++ b/test/Transforms/GlobalOpt/metadata.ll
@@ -5,7 +5,7 @@
; to that containing %G should likewise drop to null.
@G = internal global i8** null
-define i32 @main(i32 %argc, i8** %argv) {
+define i32 @main(i32 %argc, i8** %argv) norecurse {
; CHECK-LABEL: @main(
; CHECK: %G = alloca
store i8** %argv, i8*** @G
diff --git a/test/Transforms/GlobalOpt/tls.ll b/test/Transforms/GlobalOpt/tls.ll
index f3cb4a65704d..d010b96188f1 100644
--- a/test/Transforms/GlobalOpt/tls.ll
+++ b/test/Transforms/GlobalOpt/tls.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
+; RUN: opt -emulated-tls < %s -globalopt -S | FileCheck %s
declare void @wait()
declare void @signal()
diff --git a/test/Transforms/GlobalOpt/unnamed-addr.ll b/test/Transforms/GlobalOpt/unnamed-addr.ll
index 85ed829c7112..de436c62a347 100644
--- a/test/Transforms/GlobalOpt/unnamed-addr.ll
+++ b/test/Transforms/GlobalOpt/unnamed-addr.ll
@@ -12,7 +12,13 @@
; CHECK: @d = internal unnamed_addr constant [4 x i8] c"foo\00", align 1
; CHECK: @e = linkonce_odr global i32 0
+; CHECK: define internal fastcc void @used_internal() unnamed_addr {
+define internal void @used_internal() {
+ ret void
+}
+
define i32 @get_e() {
+ call void @used_internal()
%t = load i32, i32* @e
ret i32 %t
}
diff --git a/test/Transforms/IndVarSimplify/bec-cmp.ll b/test/Transforms/IndVarSimplify/bec-cmp.ll
new file mode 100644
index 000000000000..06a7d5ebe4dc
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/bec-cmp.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(i32* nocapture %a, i32* nocapture readonly %b, i32 signext %n) #0 {
+entry:
+
+; CHECK-LABEL: @foo
+
+ %cmp.10 = icmp sgt i32 %n, 0
+ br i1 %cmp.10, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge: ; preds = %for.inc
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry
+ ret void
+
+for.body: ; preds = %for.body.lr.ph, %for.inc
+ %i.011 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+ %cmp1 = icmp sgt i32 %i.011, %n
+ br i1 %cmp1, label %if.then, label %for.inc
+
+; CHECK-NOT: br i1 %cmp1, label %if.then, label %for.inc
+; CHECK: br i1 false, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %idxprom = sext i32 %i.011 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, 1
+ %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ store i32 %add, i32* %arrayidx3, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %inc = add nsw i32 %i.011, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/IndVarSimplify/const_phi.ll b/test/Transforms/IndVarSimplify/const_phi.ll
new file mode 100644
index 000000000000..33dc5514d3cc
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/const_phi.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; PR25372
+; We can compute the expression of %phi0 and that is a SCEV
+; constant. However, instcombine can't deduce this, so we can
+; potentially end up trying to handle a constant when replacing
+; congruent IVs.
+
+; CHECK-LABEL: crash
+define void @crash() {
+entry:
+ br i1 false, label %not_taken, label %pre
+
+not_taken:
+ br label %pre
+
+pre:
+; %phi0.pre and %phi1.pre are evaluated by SCEV to constant 0.
+ %phi0.pre = phi i32 [ 0, %entry ], [ 2, %not_taken ]
+ %phi1.pre = phi i32 [ 0, %entry ], [ 1, %not_taken ]
+ br label %loop
+
+loop:
+; %phi0 and %phi1 are evaluated by SCEV to constant 0.
+ %phi0 = phi i32 [ 0, %loop ], [ %phi0.pre, %pre ]
+ %phi1 = phi i32 [ 0, %loop ], [ %phi1.pre, %pre ]
+ br i1 undef, label %exit, label %loop
+
+exit:
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
index 4d14b3681c5d..612f01e3cade 100644
--- a/test/Transforms/IndVarSimplify/eliminate-comparison.ll
+++ b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
@@ -209,3 +209,351 @@ assert77: ; preds = %noassert68
unrolledend: ; preds = %forcond38
ret i32 0
}
+
+declare void @side_effect()
+
+define void @func_13(i32* %len.ptr) {
+; CHECK-LABEL: @func_13(
+ entry:
+ %len = load i32, i32* %len.ptr, !range !0
+ %len.sub.1 = add i32 %len, -1
+ %len.is.zero = icmp eq i32 %len, 0
+ br i1 %len.is.zero, label %leave, label %loop
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ call void @side_effect()
+ %iv.inc = add i32 %iv, 1
+ %iv.cmp = icmp ult i32 %iv, %len
+ br i1 %iv.cmp, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp ult i32 %iv, %len.sub.1
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_14(i32* %len.ptr) {
+; CHECK-LABEL: @func_14(
+ entry:
+ %len = load i32, i32* %len.ptr, !range !0
+ %len.sub.1 = add i32 %len, -1
+ %len.is.zero = icmp eq i32 %len, 0
+ %len.is.int_min = icmp eq i32 %len, 2147483648
+ %no.entry = or i1 %len.is.zero, %len.is.int_min
+ br i1 %no.entry, label %leave, label %loop
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ call void @side_effect()
+ %iv.inc = add i32 %iv, 1
+ %iv.cmp = icmp slt i32 %iv, %len
+ br i1 %iv.cmp, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv, %len.sub.1
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_15(i32* %len.ptr) {
+; CHECK-LABEL: @func_15(
+ entry:
+ %len = load i32, i32* %len.ptr, !range !0
+ %len.add.1 = add i32 %len, 1
+ %len.add.1.is.zero = icmp eq i32 %len.add.1, 0
+ br i1 %len.add.1.is.zero, label %leave, label %loop
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ call void @side_effect()
+ %iv.inc = add i32 %iv, 1
+ %iv.cmp = icmp ult i32 %iv, %len.add.1
+ br i1 %iv.cmp, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp ult i32 %iv, %len
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_16(i32* %len.ptr) {
+; CHECK-LABEL: @func_16(
+ entry:
+ %len = load i32, i32* %len.ptr, !range !0
+ %len.add.5 = add i32 %len, 5
+ %entry.cond.0 = icmp slt i32 %len, 2147483643
+ %entry.cond.1 = icmp slt i32 4, %len.add.5
+ %entry.cond = and i1 %entry.cond.0, %entry.cond.1
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ call void @side_effect()
+ %iv.inc = add i32 %iv, 1
+ %iv.add.4 = add i32 %iv, 4
+ %iv.cmp = icmp slt i32 %iv.add.4, %len.add.5
+ br i1 %iv.cmp, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv, %len
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_17(i32* %len.ptr) {
+; CHECK-LABEL: @func_17(
+ entry:
+ %len = load i32, i32* %len.ptr
+ %len.add.5 = add i32 %len, -5
+ %entry.cond.0 = icmp slt i32 %len, 2147483653 ;; 2147483653 == INT_MIN - (-5)
+ %entry.cond.1 = icmp slt i32 -6, %len.add.5
+ %entry.cond = and i1 %entry.cond.0, %entry.cond.1
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv.2 = phi i32 [ 0, %entry ], [ %iv.2.inc, %be ]
+ %iv = phi i32 [ -6, %entry ], [ %iv.inc, %be ]
+ call void @side_effect()
+ %iv.inc = add i32 %iv, 1
+ %iv.2.inc = add i32 %iv.2, 1
+ %iv.cmp = icmp slt i32 %iv, %len.add.5
+
+; Deduces {-5,+,1} s< (-5 + %len) from {0,+,1} < %len
+; since %len s< INT_MIN - (-5) from the entry condition
+
+; CHECK: br i1 true, label %be, label %leave
+ br i1 %iv.cmp, label %be, label %leave
+
+ be:
+; CHECK: be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv.2, %len
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define i1 @func_18(i16* %tmp20, i32* %len.addr) {
+; CHECK-LABEL: @func_18(
+entry:
+ %len = load i32, i32* %len.addr, !range !0
+ %tmp18 = icmp eq i32 %len, 0
+ br i1 %tmp18, label %bb2, label %bb0.preheader
+
+bb0.preheader:
+ br label %bb0
+
+bb0:
+; CHECK: bb0:
+ %var_0.in = phi i32 [ %var_0, %bb1 ], [ %len, %bb0.preheader ]
+ %var_1 = phi i32 [ %tmp30, %bb1 ], [ 0, %bb0.preheader ]
+ %var_0 = add nsw i32 %var_0.in, -1
+ %tmp23 = icmp ult i32 %var_1, %len
+; CHECK: br i1 true, label %stay, label %bb2.loopexit
+ br i1 %tmp23, label %stay, label %bb2
+
+stay:
+; CHECK: stay:
+ %tmp25 = getelementptr inbounds i16, i16* %tmp20, i32 %var_1
+ %tmp26 = load i16, i16* %tmp25
+ %tmp29 = icmp eq i16 %tmp26, 0
+ br i1 %tmp29, label %bb1, label %bb2
+
+bb1:
+ %tmp30 = add i32 %var_1, 1
+ %tmp31 = icmp eq i32 %var_0, 0
+ br i1 %tmp31, label %bb3, label %bb0
+
+bb2:
+ ret i1 false
+
+bb3:
+ ret i1 true
+}
+
+define void @func_19(i32* %length.ptr) {
+; CHECK-LABEL: @func_19(
+ entry:
+ %length = load i32, i32* %length.ptr, !range !0
+ %length.is.nonzero = icmp ne i32 %length, 0
+ br i1 %length.is.nonzero, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ %iv.inc = add i32 %iv, 1
+ %range.check = icmp ult i32 %iv, %length
+ br i1 %range.check, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv.inc, %length
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_20(i32* %length.ptr) {
+; Like @func_19, but %length is no longer provably positive, so
+; %range.check cannot be proved to be always true.
+
+; CHECK-LABEL: @func_20(
+ entry:
+ %length = load i32, i32* %length.ptr
+ %length.is.nonzero = icmp ne i32 %length, 0
+ br i1 %length.is.nonzero, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ %iv.inc = add i32 %iv, 1
+ %range.check = icmp ult i32 %iv, %length
+ br i1 %range.check, label %be, label %leave
+; CHECK: br i1 %range.check, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv.inc, %length
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_21(i32* %length.ptr) {
+; CHECK-LABEL: @func_21(
+
+; This checks that the backedge condition, (I + 1) < Length - 1 implies
+; (I + 1) < Length
+ entry:
+ %length = load i32, i32* %length.ptr, !range !0
+ %lim = sub i32 %length, 1
+ %entry.cond = icmp sgt i32 %length, 1
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ %iv.inc = add i32 %iv, 1
+ %range.check = icmp slt i32 %iv, %length
+ br i1 %range.check, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv.inc, %lim
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_22(i32* %length.ptr) {
+; CHECK-LABEL: @func_22(
+
+; This checks that the backedge condition, (I + 1) < Length - 1 implies
+; (I + 1) < Length
+ entry:
+ %length = load i32, i32* %length.ptr, !range !0
+ %lim = sub i32 %length, 1
+ %entry.cond = icmp sgt i32 %length, 1
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ %iv.inc = add i32 %iv, 1
+ %range.check = icmp sle i32 %iv, %length
+ br i1 %range.check, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp sle i32 %iv.inc, %lim
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_23(i32* %length.ptr) {
+; CHECK-LABEL: @func_23(
+ entry:
+ %length = load i32, i32* %length.ptr, !range !0
+ %entry.cond = icmp ult i32 4, %length
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 4, %entry ], [ %iv.inc, %be ]
+ %iv.inc = add i32 %iv, 1
+ %range.check = icmp slt i32 %iv, %length
+ br i1 %range.check, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv.inc, %length
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_24(i32* %init.ptr) {
+; CHECK-LABEL: @func_24(
+ entry:
+ %init = load i32, i32* %init.ptr, !range !0
+ %entry.cond = icmp ugt i32 %init, 4
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ %init, %entry ], [ %iv.dec, %be ]
+ %iv.dec = add i32 %iv, -1
+ %range.check = icmp sgt i32 %iv, 4
+ br i1 %range.check, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp sgt i32 %iv.dec, 4
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+
+!0 = !{i32 0, i32 2147483647}
diff --git a/test/Transforms/IndVarSimplify/iv-widen.ll b/test/Transforms/IndVarSimplify/iv-widen.ll
index 464b03ce5595..ccf9fa0aa0ac 100644
--- a/test/Transforms/IndVarSimplify/iv-widen.ll
+++ b/test/Transforms/IndVarSimplify/iv-widen.ll
@@ -6,7 +6,7 @@ target datalayout = "n8:16:32:64"
target triple = "x86_64-apple-darwin"
-; CHECK-LABEL: @sloop
+; CHECK-LABEL: @loop_0
; CHECK-LABEL: B18:
; Only one phi now.
; CHECK: phi
@@ -16,7 +16,7 @@ target triple = "x86_64-apple-darwin"
; One trunc for the dummy() call.
; CHECK-LABEL: exit24:
; CHECK: trunc i64 {{.*}}lcssa.wide to i32
-define void @sloop(i32* %a) {
+define void @loop_0(i32* %a) {
Prologue:
br i1 undef, label %B18, label %B6
@@ -41,4 +41,30 @@ exit24: ; preds = %B18
unreachable
}
+define void @loop_1(i32 %lim) {
+; CHECK-LABEL: @loop_1(
+ entry:
+ %entry.cond = icmp ne i32 %lim, 0
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+; CHECK: %indvars.iv = phi i64 [ 1, %loop.preheader ], [ %indvars.iv.next, %loop ]
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: [[IV_INC:%[^ ]+]] = add nsw i64 %indvars.iv, -1
+; CHECK: call void @dummy.i64(i64 [[IV_INC]])
+
+ %iv = phi i32 [ 1, %entry ], [ %iv.inc, %loop ]
+ %iv.inc = add i32 %iv, 1
+ %iv.inc.sub = add i32 %iv, -1
+ %iv.inc.sub.zext = zext i32 %iv.inc.sub to i64
+ call void @dummy.i64(i64 %iv.inc.sub.zext)
+ %be.cond = icmp ult i32 %iv.inc, %lim
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
declare void @dummy(i32)
+declare void @dummy.i64(i64)
diff --git a/test/Transforms/IndVarSimplify/loop-invariant-conditions.ll b/test/Transforms/IndVarSimplify/loop-invariant-conditions.ll
new file mode 100644
index 000000000000..eee321da2395
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/loop-invariant-conditions.ll
@@ -0,0 +1,279 @@
+; RUN: opt -S -indvars %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test1(i64 %start) {
+; CHECK-LABEL: @test1
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+; CHECK: %cmp1 = icmp slt i64 %start, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test2(i64 %start) {
+; CHECK-LABEL: @test2
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+; CHECK: %cmp1 = icmp sle i64 %start, -1
+ %cmp1 = icmp sle i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+; As long as the test dominates the backedge, we're good
+define void @test3(i64 %start) {
+; CHECK-LABEL: @test3
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+ br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+; CHECK: %cmp1 = icmp slt i64 %start, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test4(i64 %start) {
+; CHECK-LABEL: @test4
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+ br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+; CHECK: %cmp1 = icmp sgt i64 %start, -1
+ %cmp1 = icmp sgt i64 %indvars.iv, -1
+ br i1 %cmp1, label %loop, label %for.end
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test5(i64 %start) {
+; CHECK-LABEL: @test5
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nuw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+ br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+; CHECK: %cmp1 = icmp ugt i64 %start, 100
+ %cmp1 = icmp ugt i64 %indvars.iv, 100
+ br i1 %cmp1, label %loop, label %for.end
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test6(i64 %start) {
+; CHECK-LABEL: @test6
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nuw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+ br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+; CHECK: %cmp1 = icmp ult i64 %start, 100
+ %cmp1 = icmp ult i64 %indvars.iv, 100
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test7(i64 %start, i64* %inc_ptr) {
+; CHECK-LABEL: @test7
+entry:
+ %inc = load i64, i64* %inc_ptr, !range !0
+ %ok = icmp sge i64 %inc, 0
+ br i1 %ok, label %loop, label %for.end
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, %inc
+; CHECK: %cmp1 = icmp slt i64 %start, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+!0 = !{i64 0, i64 100}
+
+; Negative test - we can't show that the internal branch executes, so we can't
+; fold the test to a loop invariant one.
+define void @test1_neg(i64 %start) {
+; CHECK-LABEL: @test1_neg
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+ br i1 %cmp, label %backedge, label %skip
+skip:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %backedge
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+ br label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+; Slightly subtle version of @test4 where the icmp dominates the backedge,
+; but the exit branch doesn't.
+define void @test2_neg(i64 %start) {
+; CHECK-LABEL: @test2_neg
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp, label %backedge, label %skip
+skip:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+ br i1 %cmp1, label %for.end, label %backedge
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+ br label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+; The branch has to exit the loop if the condition is true
+define void @test3_neg(i64 %start) {
+; CHECK-LABEL: @test3_neg
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %loop, label %for.end
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test4_neg(i64 %start) {
+; CHECK-LABEL: @test4_neg
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+ br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+; CHECK: %cmp1 = icmp sgt i64 %indvars.iv, -1
+ %cmp1 = icmp sgt i64 %indvars.iv, -1
+
+; %cmp1 can be made loop invariant only if the branch below goes to
+; %the header when %cmp1 is true.
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test5_neg(i64 %start, i64 %inc) {
+; CHECK-LABEL: @test5_neg
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, %inc
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test8(i64 %start, i64* %inc_ptr) {
+; CHECK-LABEL: @test8
+entry:
+ %inc = load i64, i64* %inc_ptr, !range !1
+ %ok = icmp sge i64 %inc, 0
+ br i1 %ok, label %loop, label %for.end
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, %inc
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+!1 = !{i64 -1, i64 100}
+
+
+declare void @foo()
diff --git a/test/Transforms/IndVarSimplify/pr24356.ll b/test/Transforms/IndVarSimplify/pr24356.ll
new file mode 100644
index 000000000000..eac4204c0e16
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24356.ll
@@ -0,0 +1,63 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@a = common global i32 0, align 4
+
+; Function Attrs: nounwind ssp uwtable
+define void @fn1() {
+; CHECK-LABEL: @fn1(
+bb:
+ br label %bb4.preheader
+
+bb4.preheader: ; preds = %bb, %bb16
+; CHECK-LABEL: bb4.preheader:
+ %b.03 = phi i8 [ 0, %bb ], [ %tmp17, %bb16 ]
+; CHECK: %tmp9 = icmp ugt i8 %b.03, 1
+; CHECK-NOT: %tmp9 = icmp ugt i8 0, 1
+
+ %tmp9 = icmp ugt i8 %b.03, 1
+ br i1 %tmp9, label %bb4.preheader.bb18.loopexit.split_crit_edge, label %bb4.preheader.bb4.preheader.split_crit_edge
+
+bb4.preheader.bb4.preheader.split_crit_edge: ; preds = %bb4.preheader
+ br label %bb4.preheader.split
+
+bb4.preheader.bb18.loopexit.split_crit_edge: ; preds = %bb4.preheader
+ store i32 0, i32* @a, align 4
+ br label %bb18.loopexit.split
+
+bb4.preheader.split: ; preds = %bb4.preheader.bb4.preheader.split_crit_edge
+ br label %bb7
+
+bb4: ; preds = %bb7
+ %tmp6 = icmp slt i32 %storemerge2, 0
+ br i1 %tmp6, label %bb7, label %bb16
+
+bb7: ; preds = %bb4.preheader.split, %bb4
+ %storemerge2 = phi i32 [ 0, %bb4.preheader.split ], [ %tmp14, %bb4 ]
+ %tmp14 = add nsw i32 %storemerge2, 1
+ br i1 false, label %bb18.loopexit, label %bb4
+
+bb16: ; preds = %bb4
+ %tmp14.lcssa5 = phi i32 [ %tmp14, %bb4 ]
+ %tmp17 = add i8 %b.03, -1
+ %tmp2 = icmp eq i8 %tmp17, -2
+ br i1 %tmp2, label %bb18.loopexit1, label %bb4.preheader
+
+bb18.loopexit: ; preds = %bb7
+ br label %bb18.loopexit.split
+
+bb18.loopexit.split: ; preds = %bb4.preheader.bb18.loopexit.split_crit_edge, %bb18.loopexit
+ br label %bb18
+
+bb18.loopexit1: ; preds = %bb16
+ %tmp14.lcssa5.lcssa = phi i32 [ %tmp14.lcssa5, %bb16 ]
+ store i32 %tmp14.lcssa5.lcssa, i32* @a, align 4
+ br label %bb18
+
+bb18: ; preds = %bb18.loopexit1, %bb18.loopexit.split
+ ret void
+}
+
+declare void @abort()
diff --git a/test/Transforms/IndVarSimplify/pr24783.ll b/test/Transforms/IndVarSimplify/pr24783.ll
new file mode 100644
index 000000000000..637cb1e196c5
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24783.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @f(i32* %end.s, i8** %loc, i32 %p) {
+; CHECK-LABEL: @f(
+entry:
+; CHECK: [[P_SEXT:%[0-9a-z]+]] = sext i32 %p to i64
+; CHECK: [[END:%[0-9a-z]+]] = getelementptr i32, i32* %end.s, i64 [[P_SEXT]]
+
+ %end = getelementptr inbounds i32, i32* %end.s, i32 %p
+ %init = bitcast i32* %end.s to i8*
+ br label %while.body.i
+
+while.body.i:
+ %ptr = phi i8* [ %ptr.inc, %while.body.i ], [ %init, %entry ]
+ %ptr.inc = getelementptr inbounds i8, i8* %ptr, i8 1
+ %ptr.inc.cast = bitcast i8* %ptr.inc to i32*
+ %cmp.i = icmp eq i32* %ptr.inc.cast, %end
+ br i1 %cmp.i, label %loop.exit, label %while.body.i
+
+loop.exit:
+; CHECK: loop.exit:
+; CHECK: [[END_BCASTED:%[a-z0-9]+]] = bitcast i32* %scevgep to i8*
+; CHECK: store i8* [[END_BCASTED]], i8** %loc
+ %ptr.inc.lcssa = phi i8* [ %ptr.inc, %while.body.i ]
+ store i8* %ptr.inc.lcssa, i8** %loc
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr24804.ll b/test/Transforms/IndVarSimplify/pr24804.ll
new file mode 100644
index 000000000000..6f89481853ad
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24804.ll
@@ -0,0 +1,25 @@
+; RUN: opt -indvars -loop-idiom -loop-deletion -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Checking for a crash
+
+define void @f(i32* %a) {
+; CHECK-LABEL: @f(
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %for.cond, %entry
+ %iv = phi i32 [ 0, %entry ], [ %add, %for.inc ], [ %iv, %for.cond ]
+ %add = add nsw i32 %iv, 1
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ br i1 undef, label %for.cond, label %for.inc
+
+for.inc: ; preds = %for.cond
+ br i1 undef, label %for.cond, label %for.end
+
+for.end: ; preds = %for.inc
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr24952.ll b/test/Transforms/IndVarSimplify/pr24952.ll
new file mode 100644
index 000000000000..c430cae98f58
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24952.ll
@@ -0,0 +1,27 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+declare void @use(i1)
+
+define void @f() {
+; CHECK-LABEL: @f(
+ entry:
+ %x = alloca i32
+ %y = alloca i32
+ br label %loop
+
+ loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %loop ]
+ %iv.inc = add i32 %iv, 1
+
+ %x.gep = getelementptr i32, i32* %x, i32 %iv
+ %eql = icmp eq i32* %x.gep, %y
+; CHECK-NOT: @use(i1 true)
+ call void @use(i1 %eql)
+
+ ; %be.cond deliberately 'false' -- we want want the trip count to be 0.
+ %be.cond = icmp ult i32 %iv, 0
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr24956.ll b/test/Transforms/IndVarSimplify/pr24956.ll
new file mode 100644
index 000000000000..58688912cc37
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24956.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+; Check that this test does not crash.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define void @re_update_line(i8* %new) {
+; CHECK: @re_update_line(
+entry:
+ %incdec.ptr6 = getelementptr inbounds i8, i8* %new, i64 1
+ br label %for.cond.11.preheader
+
+for.cond.11.preheader: ; preds = %for.inc.26, %entry
+ %n.154 = phi i8* [ %new, %entry ], [ %incdec.ptr27, %for.inc.26 ]
+ %cmp12.52 = icmp ult i8* %n.154, %incdec.ptr6
+ br i1 %cmp12.52, label %land.rhs.16.lr.ph, label %for.inc.26
+
+land.rhs.16.lr.ph: ; preds = %for.cond.11.preheader
+ br label %land.rhs.16
+
+for.cond.11: ; preds = %land.rhs.16
+ %incdec.ptr24 = getelementptr inbounds i8, i8* %p.053, i64 1
+ %cmp12 = icmp ult i8* %p.053, %new
+ br i1 %cmp12, label %land.rhs.16, label %for.inc.26
+
+land.rhs.16: ; preds = %for.cond.11, %land.rhs.16.lr.ph
+ %p.053 = phi i8* [ %n.154, %land.rhs.16.lr.ph ], [ %incdec.ptr24, %for.cond.11 ]
+ br i1 undef, label %for.cond.11, label %for.inc.26
+
+for.inc.26: ; preds = %land.rhs.16, %for.cond.11, %for.cond.11.preheader
+ %incdec.ptr27 = getelementptr inbounds i8, i8* %n.154, i64 1
+ br i1 false, label %for.cond.11.preheader, label %for.end.28
+
+for.end.28: ; preds = %for.inc.26
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr25047.ll b/test/Transforms/IndVarSimplify/pr25047.ll
new file mode 100644
index 000000000000..dc39a78c7eb9
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25047.ll
@@ -0,0 +1,49 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define void @fn1(i1 %c0, i1 %c1) {
+; CHECK-LABEL: @fn1(
+entry:
+ br i1 %c0, label %for.end.34, label %for.cond.1thread-pre-split
+
+for.cond.loopexit: ; preds = %for.end.29, %for.end.7
+ %f.lcssa = phi i32 [ %f.1, %for.end.29 ], [ %f.1, %for.end.7 ]
+ br i1 %c1, label %for.end.34, label %for.cond.1thread-pre-split
+
+for.cond.1thread-pre-split: ; preds = %for.cond.loopexit, %entry
+ %f.047 = phi i32 [ %f.lcssa, %for.cond.loopexit ], [ 0, %entry ]
+ br label %for.cond.1
+
+for.cond.1: ; preds = %for.cond.1, %for.cond.1thread-pre-split
+ br i1 %c1, label %for.cond.4, label %for.cond.1
+
+for.cond.4: ; preds = %for.end.29, %for.cond.1
+ %f.1 = phi i32 [ 0, %for.end.29 ], [ %f.047, %for.cond.1 ]
+ br label %for.cond.5
+
+for.cond.5: ; preds = %for.cond.5, %for.cond.4
+ %h.0 = phi i32 [ 0, %for.cond.4 ], [ %inc, %for.cond.5 ]
+ %cmp = icmp slt i32 %h.0, 1
+ %inc = add nsw i32 %h.0, 1
+ br i1 %cmp, label %for.cond.5, label %for.end.7
+
+for.end.7: ; preds = %for.cond.5
+ %g.lcssa = phi i32 [ %h.0, %for.cond.5 ]
+ %tobool10 = icmp eq i32 %g.lcssa, 0
+ br i1 %tobool10, label %for.end.8, label %for.cond.loopexit
+
+for.end.8: ; preds = %for.end.7
+ br i1 %c1, label %for.cond.19, label %for.end.29
+
+for.cond.19: ; preds = %for.cond.19, %for.end.8
+ br label %for.cond.19
+
+for.end.29: ; preds = %for.end.8
+ %tobool30 = icmp eq i32 %f.1, 0
+ br i1 %tobool30, label %for.cond.4, label %for.cond.loopexit
+
+for.end.34: ; preds = %for.cond.loopexit, %entry
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr25051.ll b/test/Transforms/IndVarSimplify/pr25051.ll
new file mode 100644
index 000000000000..a02d539a66dd
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25051.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define i32 @somefunc(double* %arr) {
+; CHECK-LABEL: @somefunc(
+entry:
+ br label %for.cond.1.preheader
+
+for.cond.1.preheader: ; preds = %for.inc.9, %entry
+ %index3.013 = phi i32 [ 0, %entry ], [ %index3.1.lcssa, %for.inc.9 ]
+ %index.012 = phi i32 [ 0, %entry ], [ %inc10, %for.inc.9 ]
+ %cmp2.9 = icmp sgt i32 %index.012, 0
+ br i1 %cmp2.9, label %for.body.3.lr.ph, label %for.inc.9
+
+for.body.3.lr.ph: ; preds = %for.cond.1.preheader
+ %idxprom5 = sext i32 %index.012 to i64
+ br label %for.body.3
+
+for.body.3: ; preds = %for.body.3, %for.body.3.lr.ph
+ %index3.111 = phi i32 [ %index3.013, %for.body.3.lr.ph ], [ %inc, %for.body.3 ]
+ %index2.010 = phi i32 [ 0, %for.body.3.lr.ph ], [ %inc8, %for.body.3 ]
+ %inc = add nsw i32 %index3.111, 1
+ %idxprom = sext i32 %index3.111 to i64
+ %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
+ %idxprom4 = sext i32 %index2.010 to i64
+ %inc8 = add nsw i32 %index2.010, 1
+ %cmp2 = icmp slt i32 %inc8, %index.012
+ br i1 %cmp2, label %for.body.3, label %for.inc.9.loopexit
+
+for.inc.9.loopexit: ; preds = %for.body.3
+ %inc.lcssa = phi i32 [ %inc, %for.body.3 ]
+ br label %for.inc.9
+
+for.inc.9: ; preds = %for.inc.9.loopexit, %for.cond.1.preheader
+ %index3.1.lcssa = phi i32 [ %index3.013, %for.cond.1.preheader ], [ %inc.lcssa, %for.inc.9.loopexit ]
+ %inc10 = add nsw i32 %index.012, 1
+ %cmp = icmp slt i32 %inc10, 10
+ br i1 %cmp, label %for.cond.1.preheader, label %for.end.11
+
+for.end.11: ; preds = %for.inc.9
+ ret i32 1
+}
diff --git a/test/Transforms/IndVarSimplify/pr25060.ll b/test/Transforms/IndVarSimplify/pr25060.ll
new file mode 100644
index 000000000000..25863fff2d36
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25060.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+define i16 @fn1() {
+; CHECK-LABEL: @fn1(
+entry:
+ br label %bb1
+
+bb1:
+ %i = phi i16 [ 0, %entry ], [ 1, %bb1 ]
+ %storemerge = phi i16 [ %storemerge2, %bb1 ], [ 0, %entry ]
+ %storemerge2 = phi i16 [ 10, %entry ], [ 200, %bb1 ]
+ %tmp10 = icmp eq i16 %i, 1
+ br i1 %tmp10, label %bb5, label %bb1
+
+bb5:
+ %storemerge.lcssa = phi i16 [ %storemerge, %bb1 ]
+; CHECK: ret i16 10
+ ret i16 %storemerge.lcssa
+}
+
+define i16 @fn2() {
+; CHECK-LABEL: @fn2(
+entry:
+ br label %bb1
+
+bb1:
+ %canary = phi i16 [ 0, %entry ], [ %canary.inc, %bb1 ]
+ %i = phi i16 [ 0, %entry ], [ %storemerge, %bb1 ]
+ %storemerge = phi i16 [ 0, %bb1 ], [ 10, %entry ]
+ %canary.inc = add i16 %canary, 1
+ %_tmp10 = icmp eq i16 %i, 10
+ br i1 %_tmp10, label %bb5, label %bb1
+
+bb5:
+; CHECK: ret i16 1
+ ret i16 %canary
+}
diff --git a/test/Transforms/IndVarSimplify/pr25360.ll b/test/Transforms/IndVarSimplify/pr25360.ll
new file mode 100644
index 000000000000..9f6df7051ea8
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25360.ll
@@ -0,0 +1,33 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+
+; Ensure that does not crash
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f() {
+; CHECK-LABEL: @f(
+entry:
+ br label %for.end
+
+for.condt: ; preds = %for.end
+ br i1 true, label %for.cond.0, label %for.end
+
+for.end: ; preds = %for.body.3
+ %inc = select i1 undef, i32 2, i32 1
+ br i1 false, label %for.condt, label %for.cond.0
+
+for.cond.0: ; preds = %for.end, %for.condt
+ %init = phi i32 [ 0, %for.condt ], [ %inc, %for.end ]
+ br i1 true, label %for.end.13, label %for.body.9
+
+for.body.9: ; preds = %for.body.9, %for.cond.0
+ %p1.addr.22 = phi i32 [ %inc10, %for.body.9 ], [ %init, %for.cond.0 ]
+ %inc10 = add i32 %p1.addr.22, 1
+ br i1 true, label %for.end.13, label %for.body.9
+
+for.end.13: ; preds = %for.cond.7.for.end.13_crit_edge, %for.cond.0
+ %p1.addr.2.lcssa = phi i32 [ %inc10, %for.body.9 ], [ %init, %for.cond.0 ]
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr25421.ll b/test/Transforms/IndVarSimplify/pr25421.ll
new file mode 100644
index 000000000000..efb71f9c3039
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25421.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+declare void @use(i1)
+
+define void @f(i32 %x) {
+; CHECK-LABEL: @f(
+ entry:
+ %conv = sext i32 %x to i64
+ %sub = add i64 %conv, -1
+ %ec = icmp sgt i32 %x, 0
+ br i1 %ec, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.inc, %loop ]
+ %iv.inc = add i64 %iv, 1
+ %cmp = icmp slt i64 %iv, %sub
+ call void @use(i1 %cmp)
+; CHECK: call void @use(i1 %cmp)
+; CHECK-NOT: call void @use(i1 true)
+
+ %be.cond = icmp slt i64 %iv.inc, %conv
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr25578.ll b/test/Transforms/IndVarSimplify/pr25578.ll
new file mode 100644
index 000000000000..bc648b517bbe
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25578.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @foo
+define void @foo() {
+entry:
+ br label %L1_header
+
+L1_header:
+ br label %L2_header
+
+; CHECK: L2_header:
+; CHECK: %[[INDVAR:.*]] = phi i64
+; CHECK: %[[TRUNC:.*]] = trunc i64 %[[INDVAR]] to i32
+L2_header:
+ %i = phi i32 [ 0, %L1_header ], [ %i_next, %L2_latch ]
+ %i_prom = sext i32 %i to i64
+ br label %L3_header
+
+L3_header:
+ br i1 undef, label %L3_latch, label %L2_exiting_1
+
+L3_latch:
+ br i1 undef, label %L3_header, label %L2_exiting_2
+
+L2_exiting_1:
+ br i1 undef, label %L2_latch, label %L1_latch
+
+L2_exiting_2:
+ br i1 undef, label %L2_latch, label %L1_latch
+
+L2_latch:
+ %i_next = add nsw i32 %i, 1
+ br label %L2_header
+
+L1_latch:
+; CHECK: L1_latch:
+; CHECK: %i_lcssa = phi i32 [ %[[TRUNC]], %L2_exiting_1 ], [ %[[TRUNC]], %L2_exiting_2 ]
+
+ %i_lcssa = phi i32 [ %i, %L2_exiting_1 ], [ %i, %L2_exiting_2 ]
+ br i1 undef, label %exit, label %L1_header
+
+exit:
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/tripcount_infinite.ll b/test/Transforms/IndVarSimplify/tripcount_infinite.ll
index 0495b50c3e48..658598d3b7e4 100644
--- a/test/Transforms/IndVarSimplify/tripcount_infinite.ll
+++ b/test/Transforms/IndVarSimplify/tripcount_infinite.ll
@@ -1,38 +1,45 @@
; These tests have an infinite trip count. We obviously shouldn't remove the
; loops! :)
;
-; RUN: opt < %s -indvars -adce -simplifycfg -S | grep icmp | wc -l > %t2
-; RUN: llvm-as < %s | llvm-dis | grep icmp | wc -l > %t1
-; RUN: diff %t1 %t2
+; RUN: opt < %s -indvars -adce -simplifycfg -S | FileCheck %s
;; test for (i = 1; i != 100; i += 2)
define i32 @infinite_linear() {
+; CHECK-LABEL: @infinite_linear(
entry:
br label %loop
loop: ; preds = %loop, %entry
+; CHECK-LABEL: loop:
%i = phi i32 [ 1, %entry ], [ %i.next, %loop ] ; <i32> [#uses=3]
%i.next = add i32 %i, 2 ; <i32> [#uses=1]
%c = icmp ne i32 %i, 100 ; <i1> [#uses=1]
+; CHECK: icmp
+; CHECK: br
br i1 %c, label %loop, label %loopexit
loopexit: ; preds = %loop
+; CHECK-LABEL: loopexit:
ret i32 %i
}
;; test for (i = 1; i*i != 63; ++i)
define i32 @infinite_quadratic() {
+; CHECK-LABEL: @infinite_quadratic(
entry:
br label %loop
loop: ; preds = %loop, %entry
+; CHECK-LABEL: loop:
%i = phi i32 [ 1, %entry ], [ %i.next, %loop ] ; <i32> [#uses=4]
%isquare = mul i32 %i, %i ; <i32> [#uses=1]
%i.next = add i32 %i, 1 ; <i32> [#uses=1]
%c = icmp ne i32 %isquare, 63 ; <i1> [#uses=1]
+; CHECK: icmp
+; CHECK: br
br i1 %c, label %loop, label %loopexit
loopexit: ; preds = %loop
+; CHECK-LABEL: loopexit:
ret i32 %i
}
-
diff --git a/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
index eb81ceb700b0..b87cd0550192 100644
--- a/test/Transforms/IndVarSimplify/widen-loop-comp.ll
+++ b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -193,3 +193,163 @@ for.body:
for.end:
ret i32 %sum.0
}
+
+define i32 @test6(i32* %a, i32 %b) {
+; CHECK-LABEL: @test6(
+; CHECK: [[B_SEXT:%[a-z0-9]+]] = sext i32 %b to i64
+; CHECK: for.cond:
+; CHECK: icmp sle i64 %indvars.iv, [[B_SEXT]]
+
+entry:
+ br label %for.cond
+
+for.cond:
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp sle i32 %i.0, %b
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %idxprom = zext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %sum.0, %0
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret i32 %sum.0
+}
+
+define i32 @test7(i32* %a, i32 %b) {
+; CHECK-LABEL: @test7(
+; CHECK: [[B_ZEXT:%[a-z0-9]+]] = zext i32 %b to i64
+; CHECK: [[B_SEXT:%[a-z0-9]+]] = sext i32 %b to i64
+; CHECK: for.cond:
+; CHECK: icmp ule i64 %indvars.iv, [[B_ZEXT]]
+; CHECK: for.body:
+; CHECK: icmp sle i64 %indvars.iv, [[B_SEXT]]
+
+entry:
+ br label %for.cond
+
+for.cond:
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp ule i32 %i.0, %b
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %idxprom = sext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %sum.0, %0
+ %inc = add nsw i32 %i.0, 1
+ %cmp2 = icmp sle i32 %i.0, %b
+ br i1 %cmp2, label %for.cond, label %for.end
+
+for.end:
+ ret i32 %sum.0
+}
+
+define i32 @test8(i32* %a, i32 %b, i32 %init) {
+; CHECK-LABEL: @test8(
+; CHECK: [[INIT_SEXT:%[a-z0-9]+]] = sext i32 %init to i64
+; CHECK: [[B_ZEXT:%[a-z0-9]+]] = zext i32 %b to i64
+; CHECK: for.cond:
+; Note: %indvars.iv is the sign extension of %i.0
+; CHECK: %indvars.iv = phi i64 [ [[INIT_SEXT]], %for.cond.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK: icmp ule i64 %indvars.iv, [[B_ZEXT]]
+
+entry:
+ %e = icmp sgt i32 %init, 0
+ br i1 %e, label %for.cond, label %leave
+
+for.cond:
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %i.0 = phi i32 [ %init, %entry ], [ %inc, %for.body ]
+ %cmp = icmp ule i32 %i.0, %b
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %idxprom = sext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %sum.0, %0
+ %inc = add nsw i32 %i.0, 1
+ %cmp2 = icmp slt i32 0, %inc
+ br i1 %cmp2, label %for.cond, label %for.end
+
+for.end:
+ ret i32 %sum.0
+
+leave:
+ ret i32 0
+}
+
+define i32 @test9(i32* %a, i32 %b, i32 %init) {
+; CHECK-LABEL: @test9(
+; CHECK: [[INIT_ZEXT:%[a-z0-9]+]] = zext i32 %init to i64
+; CHECK: [[B_SEXT:%[a-z0-9]+]] = sext i32 %b to i64
+; CHECK: for.cond:
+; Note: %indvars.iv is the zero extension of %i.0
+; CHECK: %indvars.iv = phi i64 [ [[INIT_ZEXT]], %for.cond.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK: icmp slt i64 %indvars.iv, [[B_SEXT]]
+
+entry:
+ %e = icmp sgt i32 %init, 0
+ br i1 %e, label %for.cond, label %leave
+
+for.cond:
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %i.0 = phi i32 [ %init, %entry ], [ %inc, %for.body ]
+ %cmp = icmp slt i32 %i.0, %b
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %idxprom = zext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %sum.0, %0
+ %inc = add nsw i32 %i.0, 1
+ %cmp2 = icmp slt i32 0, %inc
+ br i1 %cmp2, label %for.cond, label %for.end
+
+for.end:
+ ret i32 %sum.0
+
+leave:
+ ret i32 0
+}
+
+declare void @consume.i64(i64)
+declare void @consume.i1(i1)
+
+define i32 @test10(i32 %v) {
+; CHECK-LABEL: @test10(
+ entry:
+; CHECK-NOT: zext
+ br label %loop
+
+ loop:
+; CHECK: loop:
+; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ]
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: [[MUL:%[a-z0-9]+]] = mul nsw i64 %indvars.iv, -1
+; CHECK: [[MUL_TRUNC:%[a-z0-9]+]] = trunc i64 [[MUL]] to i32
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[MUL_TRUNC]], %v
+; CHECK: call void @consume.i1(i1 [[CMP]])
+
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
+ %i.inc = add i32 %i, 1
+ %iv = mul i32 %i, -1
+ %cmp = icmp eq i32 %iv, %v
+ call void @consume.i1(i1 %cmp)
+ %be.cond = icmp slt i32 %i.inc, 11
+ %ext = sext i32 %iv to i64
+ call void @consume.i64(i64 %ext)
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret i32 22
+}
diff --git a/test/Transforms/IndVarSimplify/zext-nuw.ll b/test/Transforms/IndVarSimplify/zext-nuw.ll
new file mode 100644
index 000000000000..13138de6a507
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/zext-nuw.ll
@@ -0,0 +1,49 @@
+; RUN: opt -indvars -S %s | FileCheck %s
+
+%struct.A = type { i8 }
+
+@c = global %struct.A* null
+@d = global i32 4
+
+define void @_Z3fn1v() {
+ %x2 = load i32, i32* @d
+ %x3 = icmp slt i32 %x2, 1
+ %x4 = select i1 %x3, i32 1, i32 %x2
+ %x5 = load %struct.A*, %struct.A** @c
+ %j.sroa.0.0..sroa_idx = getelementptr %struct.A, %struct.A* %x5, i64 0, i32 0
+ %j.sroa.0.0.copyload = load i8, i8* %j.sroa.0.0..sroa_idx
+ br label %.preheader4.lr.ph
+
+.preheader4.lr.ph: ; preds = %0
+ ; CHECK-NOT: add i64 {{.*}}, 4294967296
+ br label %.preheader4
+
+.preheader4: ; preds = %x22, %.preheader4.lr.ph
+ %k.09 = phi i8* [ undef, %.preheader4.lr.ph ], [ %x25, %x22 ]
+ %x8 = icmp ult i32 0, 4
+ br i1 %x8, label %.preheader.lr.ph, label %x22
+
+.preheader.lr.ph: ; preds = %.preheader4
+ br label %.preheader
+
+.preheader: ; preds = %x17, %.preheader.lr.ph
+ %k.17 = phi i8* [ %k.09, %.preheader.lr.ph ], [ %x19, %x17 ]
+ %v.06 = phi i32 [ 0, %.preheader.lr.ph ], [ %x20, %x17 ]
+ br label %x17
+
+x17: ; preds = %.preheader
+ %x18 = sext i8 %j.sroa.0.0.copyload to i64
+ %x19 = getelementptr i8, i8* %k.17, i64 %x18
+ %x20 = add i32 %v.06, 1
+ %x21 = icmp ult i32 %x20, %x4
+ br i1 %x21, label %.preheader, label %._crit_edge.8
+
+._crit_edge.8: ; preds = %x17
+ %split = phi i8* [ %x19, %x17 ]
+ br label %x22
+
+x22: ; preds = %._crit_edge.8, %.preheader4
+ %k.1.lcssa = phi i8* [ %split, %._crit_edge.8 ], [ %k.09, %.preheader4 ]
+ %x25 = getelementptr i8, i8* %k.1.lcssa
+ br label %.preheader4
+}
diff --git a/test/Transforms/InferFunctionAttrs/annotate.ll b/test/Transforms/InferFunctionAttrs/annotate.ll
new file mode 100644
index 000000000000..963f484eb55e
--- /dev/null
+++ b/test/Transforms/InferFunctionAttrs/annotate.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -inferattrs -S | FileCheck %s
+; RUN: opt < %s -passes=inferattrs -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -inferattrs -S | FileCheck -check-prefix=CHECK-POSIX %s
+
+declare i8* @fopen(i8*, i8*)
+; CHECK: declare noalias i8* @fopen(i8* nocapture readonly, i8* nocapture readonly) [[G0:#[0-9]]]
+
+declare i8 @strlen(i8*)
+; CHECK: declare i8 @strlen(i8* nocapture) [[G1:#[0-9]]]
+
+declare i32* @realloc(i32*, i32)
+; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) [[G0]]
+
+; Test deliberately wrong declaration
+
+declare i32 @strcpy(...)
+; CHECK: declare i32 @strcpy(...)
+
+declare i32 @gettimeofday(i8*, i8*)
+; CHECK-POSIX: declare i32 @gettimeofday(i8* nocapture, i8* nocapture) [[G0:#[0-9]+]]
+
+; CHECK: attributes [[G0]] = { nounwind }
+; CHECK: attributes [[G1]] = { nounwind readonly }
+; CHECK-POSIX: attributes [[G0]] = { nounwind }
diff --git a/test/Transforms/Inline/alloca-dbgdeclare-merge.ll b/test/Transforms/Inline/alloca-dbgdeclare-merge.ll
new file mode 100644
index 000000000000..5314f0b8397d
--- /dev/null
+++ b/test/Transforms/Inline/alloca-dbgdeclare-merge.ll
@@ -0,0 +1,102 @@
+; Test that alloca merging in the inliner places dbg.declare calls immediately
+; after the merged alloca. Not at the end of the entry BB, and definitely not
+; before the alloca.
+;
+; clang -g -S -emit-llvm -Xclang -disable-llvm-optzns
+;
+;__attribute__((always_inline)) void f() {
+; char aaa[100];
+; aaa[10] = 1;
+;}
+;
+;__attribute__((always_inline)) void g() {
+; char bbb[100];
+; bbb[20] = 1;
+;}
+;
+;void h() {
+; f();
+; g();
+;}
+;
+; RUN: opt -always-inline -S < %s | FileCheck %s
+;
+; CHECK: define void @h()
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[AI:.*]] = alloca [100 x i8]
+; CHECK-NEXT: call void @llvm.dbg.declare(metadata [100 x i8]* %[[AI]],
+; CHECK-NEXT: call void @llvm.dbg.declare(metadata [100 x i8]* %[[AI]],
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: alwaysinline nounwind uwtable
+define void @f() #0 !dbg !4 {
+entry:
+ %aaa = alloca [100 x i8], align 16
+ call void @llvm.dbg.declare(metadata [100 x i8]* %aaa, metadata !12, metadata !17), !dbg !18
+ %arrayidx = getelementptr inbounds [100 x i8], [100 x i8]* %aaa, i64 0, i64 10, !dbg !19
+ store i8 1, i8* %arrayidx, align 2, !dbg !20
+ ret void, !dbg !21
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: alwaysinline nounwind uwtable
+define void @g() #0 !dbg !7 {
+entry:
+ %bbb = alloca [100 x i8], align 16
+ call void @llvm.dbg.declare(metadata [100 x i8]* %bbb, metadata !22, metadata !17), !dbg !23
+ %arrayidx = getelementptr inbounds [100 x i8], [100 x i8]* %bbb, i64 0, i64 20, !dbg !24
+ store i8 1, i8* %arrayidx, align 4, !dbg !25
+ ret void, !dbg !26
+}
+
+; Function Attrs: nounwind uwtable
+define void @h() #2 !dbg !8 {
+entry:
+ call void @f(), !dbg !27
+ call void @g(), !dbg !28
+ ret void, !dbg !29
+}
+
+attributes #0 = { alwaysinline nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "../1.c", directory: "/code/llvm-git/build")
+!2 = !{}
+!3 = !{!4, !7, !8}
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, scopeLine: 6, isOptimized: false, variables: !2)
+!8 = distinct !DISubprogram(name: "h", scope: !1, file: !1, line: 11, type: !5, isLocal: false, isDefinition: true, scopeLine: 11, isOptimized: false, variables: !2)
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)"}
+!12 = !DILocalVariable(name: "aaa", scope: !4, file: !1, line: 2, type: !13)
+!13 = !DICompositeType(tag: DW_TAG_array_type, baseType: !14, size: 800, align: 8, elements: !15)
+!14 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!15 = !{!16}
+!16 = !DISubrange(count: 100)
+!17 = !DIExpression()
+!18 = !DILocation(line: 2, column: 8, scope: !4)
+!19 = !DILocation(line: 3, column: 3, scope: !4)
+!20 = !DILocation(line: 3, column: 11, scope: !4)
+!21 = !DILocation(line: 4, column: 1, scope: !4)
+!22 = !DILocalVariable(name: "bbb", scope: !7, file: !1, line: 7, type: !13)
+!23 = !DILocation(line: 7, column: 8, scope: !7)
+!24 = !DILocation(line: 8, column: 3, scope: !7)
+!25 = !DILocation(line: 8, column: 11, scope: !7)
+!26 = !DILocation(line: 9, column: 1, scope: !7)
+!27 = !DILocation(line: 12, column: 3, scope: !8)
+!28 = !DILocation(line: 13, column: 3, scope: !8)
+!29 = !DILocation(line: 14, column: 1, scope: !8)
diff --git a/test/Transforms/Inline/alloca-dbgdeclare.ll b/test/Transforms/Inline/alloca-dbgdeclare.ll
index 286f2931ff22..39575311b4b4 100644
--- a/test/Transforms/Inline/alloca-dbgdeclare.ll
+++ b/test/Transforms/Inline/alloca-dbgdeclare.ll
@@ -34,7 +34,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
; Function Attrs: nounwind
-define void @_Z3fn4v() #0 {
+define void @_Z3fn4v() #0 !dbg !21 {
entry:
; Test that the dbg.declare is moved together with the alloca.
; CHECK: define void @_Z3fn5v()
@@ -61,7 +61,7 @@ _Z3fn31A.exit: ; preds = %entry, %if.then.i
}
; Function Attrs: noreturn nounwind
-define void @_Z3fn5v() #3 {
+define void @_Z3fn5v() #3 !dbg !24 {
entry:
br label %while.body, !dbg !55
@@ -82,7 +82,7 @@ attributes #3 = { noreturn nounwind }
!llvm.module.flags = !{!28, !29}
!llvm.ident = !{!30}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227480) (llvm/trunk 227517)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !25, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227480) (llvm/trunk 227517)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !25, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "")
!2 = !{}
!3 = !{!4}
@@ -97,16 +97,16 @@ attributes #3 = { noreturn nounwind }
!12 = !{!13}
!13 = !DISubrange(count: 2)
!14 = !{!15, !21, !24}
-!15 = !DISubprogram(name: "fn3", linkageName: "_Z3fn31A", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !5, scope: !16, type: !17, function: void (%struct.A*)* @_Z3fn31A, variables: !19)
+!15 = distinct !DISubprogram(name: "fn3", linkageName: "_Z3fn31A", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !5, scope: !16, type: !17, variables: !19)
!16 = !DIFile(filename: "test.cpp", directory: "")
!17 = !DISubroutineType(types: !18)
!18 = !{null, !"_ZTS1A"}
!19 = !{!20}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
-!21 = !DISubprogram(name: "fn4", linkageName: "_Z3fn4v", line: 11, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !5, scope: !16, type: !22, function: void ()* @_Z3fn4v, variables: !2)
+!20 = !DILocalVariable(name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
+!21 = distinct !DISubprogram(name: "fn4", linkageName: "_Z3fn4v", line: 11, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !5, scope: !16, type: !22, variables: !2)
!22 = !DISubroutineType(types: !23)
!23 = !{null}
-!24 = !DISubprogram(name: "fn5", linkageName: "_Z3fn5v", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !16, type: !22, function: void ()* @_Z3fn5v, variables: !2)
+!24 = distinct !DISubprogram(name: "fn5", linkageName: "_Z3fn5v", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !16, type: !22, variables: !2)
!25 = !{!26, !27}
!26 = !DIGlobalVariable(name: "a", line: 4, isLocal: false, isDefinition: true, scope: null, file: !16, type: !"_ZTS1A", variable: %struct.A* @a)
!27 = !DIGlobalVariable(name: "b", line: 4, isLocal: false, isDefinition: true, scope: null, file: !16, type: !"_ZTS1A", variable: %struct.A* @b)
@@ -128,7 +128,7 @@ attributes #3 = { noreturn nounwind }
!43 = !{!37, !37, i64 0}
!44 = !{!38, !38, i64 0}
!45 = !DILocation(line: 9, scope: !15)
-!46 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
+!46 = !DILocalVariable(name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
!47 = distinct !DILocation(line: 11, scope: !21)
!48 = !DIExpression(DW_OP_bit_piece, 32, 160)
!49 = !DILocation(line: 6, scope: !15, inlinedAt: !47)
diff --git a/test/Transforms/Inline/debug-info-duplicate-calls.ll b/test/Transforms/Inline/debug-info-duplicate-calls.ll
index 4e3d9d92fdb3..442ff325863c 100644
--- a/test/Transforms/Inline/debug-info-duplicate-calls.ll
+++ b/test/Transforms/Inline/debug-info-duplicate-calls.ll
@@ -33,10 +33,10 @@
; CHECK: call void @_Z2f1v(), !dbg [[fcs2_f4_f3cs1_f2:![0-9]+]]
; CHECK: call void @_Z2f1v(), !dbg [[fcs2_f4_f3cs2_f2:![0-9]+]]
-; CHECK-DAG: [[F:![0-9]+]] = !DISubprogram(name: "f"
-; CHECK-DAG: [[F2:![0-9]+]] = !DISubprogram(name: "f2"
-; CHECK-DAG: [[F3:![0-9]+]] = !DISubprogram(name: "f3"
-; CHECK-DAG: [[F4:![0-9]+]] = !DISubprogram(name: "f4"
+; CHECK-DAG: [[F:![0-9]+]] = distinct !DISubprogram(name: "f"
+; CHECK-DAG: [[F2:![0-9]+]] = distinct !DISubprogram(name: "f2"
+; CHECK-DAG: [[F3:![0-9]+]] = distinct !DISubprogram(name: "f3"
+; CHECK-DAG: [[F4:![0-9]+]] = distinct !DISubprogram(name: "f4"
; CHECK: [[fcs1_f4_f3cs1_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs1_f4_f3cs1:![0-9]+]])
; CHECK: [[fcs1_f4_f3cs1]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs1_f4:![0-9]+]])
@@ -59,7 +59,7 @@ $_Z2f3v = comdat any
$_Z2f2v = comdat any
; Function Attrs: uwtable
-define void @_Z1fv() #0 {
+define void @_Z1fv() #0 !dbg !4 {
entry:
call void @_Z2f4v(), !dbg !13
call void @_Z2f4v(), !dbg !13
@@ -67,14 +67,14 @@ entry:
}
; Function Attrs: alwaysinline inlinehint uwtable
-define linkonce_odr void @_Z2f4v() #1 comdat {
+define linkonce_odr void @_Z2f4v() #1 comdat !dbg !7 {
entry:
call void @_Z2f3v(), !dbg !15
ret void, !dbg !16
}
; Function Attrs: alwaysinline inlinehint uwtable
-define linkonce_odr void @_Z2f3v() #1 comdat {
+define linkonce_odr void @_Z2f3v() #1 comdat !dbg !8 {
entry:
call void @_Z2f2v(), !dbg !17
call void @_Z2f2v(), !dbg !17
@@ -82,7 +82,7 @@ entry:
}
; Function Attrs: alwaysinline inlinehint uwtable
-define linkonce_odr void @_Z2f2v() #1 comdat {
+define linkonce_odr void @_Z2f2v() #1 comdat !dbg !9 {
entry:
call void @_Z2f1v(), !dbg !19
ret void, !dbg !20
@@ -98,16 +98,16 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!10, !11}
!llvm.ident = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "debug-info-duplicate-calls.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4, !7, !8, !9}
-!4 = !DISubprogram(name: "f", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !1, scope: !5, type: !6, function: void ()* @_Z1fv, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "debug-info-duplicate-calls.cpp", directory: "/tmp/dbginfo")
!6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "f4", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: void ()* @_Z2f4v, variables: !2)
-!8 = !DISubprogram(name: "f3", line: 7, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, function: void ()* @_Z2f3v, variables: !2)
-!9 = !DISubprogram(name: "f2", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: void ()* @_Z2f2v, variables: !2)
+!7 = distinct !DISubprogram(name: "f4", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "f3", line: 7, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
+!9 = distinct !DISubprogram(name: "f2", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 2, !"Debug Info Version", i32 3}
!12 = !{!"clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)"}
diff --git a/test/Transforms/Inline/debug-invoke.ll b/test/Transforms/Inline/debug-invoke.ll
index ca407acdd659..c547559d8c2a 100644
--- a/test/Transforms/Inline/debug-invoke.ll
+++ b/test/Transforms/Inline/debug-invoke.ll
@@ -4,7 +4,7 @@
; CHECK: invoke void @test()
; CHECK-NEXT: to label {{.*}} unwind label {{.*}}, !dbg [[INL_LOC:!.*]]
-; CHECK: [[SP:.*]] = !DISubprogram(
+; CHECK: [[SP:.*]] = distinct !DISubprogram(
; CHECK: [[INL_LOC]] = !DILocation(line: 1, scope: [[SP]], inlinedAt: [[INL_AT:.*]])
; CHECK: [[INL_AT]] = distinct !DILocation(line: 2, scope: [[SP]])
@@ -32,6 +32,6 @@ lpad:
!llvm.module.flags = !{!1}
!1 = !{i32 2, !"Debug Info Version", i32 3}
-!2 = !DISubprogram()
+!2 = distinct !DISubprogram()
!3 = !DILocation(line: 1, scope: !2)
!4 = !DILocation(line: 2, scope: !2)
diff --git a/test/Transforms/Inline/deopt-bundles.ll b/test/Transforms/Inline/deopt-bundles.ll
new file mode 100644
index 000000000000..3e3c52f7d2d5
--- /dev/null
+++ b/test/Transforms/Inline/deopt-bundles.ll
@@ -0,0 +1,203 @@
+; RUN: opt -S -always-inline < %s | FileCheck %s
+
+declare void @f()
+declare i32 @g()
+declare fastcc i32 @g.fastcc()
+
+define i32 @callee_0() alwaysinline {
+ entry:
+ call void @f()
+ ret i32 2
+}
+
+define i32 @caller_0() {
+; CHECK-LABEL: @caller_0(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 2
+ %x = call i32 @callee_0() [ "deopt"(i32 5) ]
+ ret i32 %x
+}
+
+define i32 @callee_1() alwaysinline {
+ entry:
+ call void @f() [ "deopt"() ]
+ call void @f() [ "deopt"(i32 0, i32 1) ]
+ call void @f() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ ret i32 2
+}
+
+define i32 @caller_1() {
+; CHECK-LABEL: @caller_1(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT: call void @f() [ "deopt"(i32 5) ]
+; CHECK-NEXT: call void @f() [ "deopt"(i32 5, i32 0, i32 1) ]
+; CHECK-NEXT: call void @f() [ "deopt"(i32 5, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+; CHECK-NEXT: ret i32 2
+
+ %x = call i32 @callee_1() [ "deopt"(i32 5) ]
+ ret i32 %x
+}
+
+define i32 @callee_2() alwaysinline {
+ entry:
+ %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ ret i32 %v
+}
+
+define i32 @caller_2(i32 %val) {
+; CHECK-LABEL: @caller_2(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT: [[RVAL:%[^ ]+]] = call i32 @g() [ "deopt"(i32 %val, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+; CHECK-NEXT: ret i32 [[RVAL]]
+ %x = call i32 @callee_2() [ "deopt"(i32 %val) ]
+ ret i32 %x
+}
+
+define i32 @callee_3() alwaysinline {
+ entry:
+ %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ ret i32 %v
+}
+
+define i32 @caller_3() personality i8 3 {
+; CHECK-LABEL: @caller_3(
+ entry:
+ %x = invoke i32 @callee_3() [ "deopt"(i32 7) ] to label %normal unwind label %unwind
+; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+
+ normal:
+ ret i32 %x
+
+ unwind:
+ %cleanup = landingpad i8 cleanup
+ ret i32 101
+}
+
+define i32 @callee_4() alwaysinline personality i8 3 {
+ entry:
+ %v = invoke i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] to label %normal unwind label %unwind
+
+ normal:
+ ret i32 %v
+
+ unwind:
+ %cleanup = landingpad i8 cleanup
+ ret i32 100
+}
+
+define i32 @caller_4() {
+; CHECK-LABEL: @caller_4(
+ entry:
+; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+ %x = call i32 @callee_4() [ "deopt"(i32 7) ]
+ ret i32 %x
+}
+
+define i32 @callee_5() alwaysinline personality i8 3 {
+ entry:
+ %v = invoke fastcc i32 @g.fastcc() #0 [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] to label %normal unwind label %unwind
+
+ normal:
+ ret i32 %v
+
+ unwind:
+ %cleanup = landingpad i8 cleanup
+ ret i32 100
+}
+
+define i32 @caller_5() {
+; CHECK-LABEL: @caller_5(
+ entry:
+; CHECK: invoke fastcc i32 @g.fastcc() #[[FOO_BAR_ATTR_IDX:[0-9]+]] [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+ %x = call i32 @callee_5() [ "deopt"(i32 7) ]
+ ret i32 %x
+}
+
+define i32 @callee_6() alwaysinline personality i8 3 {
+ entry:
+ %v = call fastcc i32 @g.fastcc() #0 [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ ret i32 %v
+}
+
+define i32 @caller_6() {
+; CHECK-LABEL: @caller_6(
+ entry:
+; CHECK: call fastcc i32 @g.fastcc() #[[FOO_BAR_ATTR_IDX]] [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+ %x = call i32 @callee_6() [ "deopt"(i32 7) ]
+ ret i32 %x
+}
+
+define i32 @callee_7(i1 %val) alwaysinline personality i8 3 {
+; We want something that PruningFunctionCloner is not smart enough to
+; recognize, but can be recognized by recursivelySimplifyInstruction.
+
+ entry:
+ br i1 %val, label %check, label %precheck
+
+ precheck:
+ br label %check
+
+ check:
+ %p = phi i1 [ %val, %entry ], [ true, %precheck ]
+ br i1 %p, label %do.not, label %do
+
+ do.not:
+ ret i32 0
+
+ do:
+ %v = call fastcc i32 @g.fastcc() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ ret i32 %v
+}
+
+define i32 @caller_7() {
+; CHECK-LABEL: @caller_7(
+ entry:
+; CHECK-NOT: call fastcc i32 @g.fastcc()
+; CHECK: ret i32 0
+ %x = call i32 @callee_7(i1 true) [ "deopt"(i32 7) ]
+ ret i32 %x
+}
+
+define i32 @callee_8(i1 %val) alwaysinline personality i8 3 {
+; We want something that PruningFunctionCloner is not smart enough to
+; recognize, but can be recognized by recursivelySimplifyInstruction.
+
+ entry:
+ br i1 %val, label %check, label %precheck
+
+ precheck:
+ br label %check
+
+ check:
+ %p = phi i1 [ %val, %entry ], [ true, %precheck ]
+ br i1 %p, label %do.not, label %do
+
+ do.not:
+ ret i32 0
+
+ do:
+ %phi = phi i32 [ 0, %check ], [ %v, %do ]
+ %v = call fastcc i32 @g.fastcc() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ %ic = icmp eq i32 %v, 42
+ br i1 %ic, label %do, label %done
+
+ done:
+ ret i32 %phi
+}
+
+define i32 @caller_8() {
+; CHECK-LABEL: @caller_8(
+ entry:
+; CHECK-NOT: call fastcc i32 @g.fastcc()
+; CHECK: ret i32 0
+ %x = call i32 @callee_8(i1 true) [ "deopt"(i32 7) ]
+ ret i32 %x
+}
+
+attributes #0 = { "foo"="bar" }
+
+; CHECK: attributes #[[FOO_BAR_ATTR_IDX]] = { "foo"="bar" }
diff --git a/test/Transforms/Inline/ignore-debug-info.ll b/test/Transforms/Inline/ignore-debug-info.ll
index eb92bc52cc94..f4f046846e82 100644
--- a/test/Transforms/Inline/ignore-debug-info.ll
+++ b/test/Transforms/Inline/ignore-debug-info.ll
@@ -12,11 +12,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
define <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b) {
entry:
- call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+ call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
%mul = fmul <4 x float> %a, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
- call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+ call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
%mul1 = fmul <4 x float> %b, <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>
- call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+ call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
%add = fadd <4 x float> %mul, %mul1
ret <4 x float> %add
}
@@ -27,10 +27,10 @@ define float @outer_vectors(<4 x float> %a, <4 x float> %b) {
; CHECK: ret float
entry:
- call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
- call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+ call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+ call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
%call = call <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b)
- call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+ call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
%vecext = extractelement <4 x float> %call, i32 0
%vecext1 = extractelement <4 x float> %call, i32 1
%add = fadd float %vecext, %vecext1
@@ -47,10 +47,10 @@ attributes #0 = { nounwind readnone }
!llvm.module.flags = !{!3, !4}
!llvm.ident = !{!5}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !{!6}, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !{!6}, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 1, !"Debug Info Version", i32 3}
!5 = !{!""}
-!6 = !DISubprogram()
+!6 = distinct !DISubprogram()
diff --git a/test/Transforms/Inline/inline-assume.ll b/test/Transforms/Inline/inline-assume.ll
new file mode 100644
index 000000000000..4a7dc3edb22d
--- /dev/null
+++ b/test/Transforms/Inline/inline-assume.ll
@@ -0,0 +1,31 @@
+; RUN: opt -inline -S -o - < %s | FileCheck %s
+
+%0 = type opaque
+%struct.Foo = type { i32, %0* }
+
+; Test that we don't crash when inlining @bar (rdar://22521387).
+define void @foo(%struct.Foo* align 4 %a) {
+entry:
+ call fastcc void @bar(%struct.Foo* nonnull align 4 undef)
+
+; CHECK: call void @llvm.assume(i1 undef)
+; CHECK: unreachable
+
+ ret void
+}
+
+define fastcc void @bar(%struct.Foo* align 4 %a) {
+; CHECK-LABEL: @bar
+entry:
+ %b = getelementptr inbounds %struct.Foo, %struct.Foo* %a, i32 0, i32 1
+ br i1 undef, label %if.end, label %if.then.i.i
+
+if.then.i.i:
+ call void @llvm.assume(i1 undef)
+ unreachable
+
+if.end:
+ ret void
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/Inline/inline-cold-callee.ll b/test/Transforms/Inline/inline-cold-callee.ll
new file mode 100644
index 000000000000..1fd9f105db50
--- /dev/null
+++ b/test/Transforms/Inline/inline-cold-callee.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -inline -inlinecold-threshold=0 -S | FileCheck %s
+
+; This tests that a cold callee gets the (lower) inlinecold-threshold even without
+; Cold hint and does not get inlined because the cost exceeds the inlinecold-threshold.
+; A callee with identical body does gets inlined because cost fits within the
+; inline-threshold
+
+define i32 @callee1(i32 %x) !prof !1 {
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+
+ ret i32 %x3
+}
+
+define i32 @callee2(i32 %x) !prof !2 {
+; CHECK-LABEL: @callee2(
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+
+ ret i32 %x3
+}
+
+define i32 @caller2(i32 %y1) !prof !2 {
+; CHECK-LABEL: @caller2(
+; CHECK: call i32 @callee2
+; CHECK-NOT: call i32 @callee1
+; CHECK: ret i32 %x3.i
+ %y2 = call i32 @callee2(i32 %y1)
+ %y3 = call i32 @callee1(i32 %y2)
+ ret i32 %y3
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"MaxFunctionCount", i32 1000}
+!1 = !{!"function_entry_count", i64 100}
+!2 = !{!"function_entry_count", i64 1}
+
diff --git a/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll b/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll
new file mode 100644
index 000000000000..498a995ecd45
--- /dev/null
+++ b/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -inline < %s | FileCheck %s
+
+target datalayout = "e-p3:32:32-p4:64:64-n32"
+
+@lds = internal addrspace(3) global [64 x i64] zeroinitializer
+
+; CHECK-LABEL: @constexpr_addrspacecast_ptr_size_change(
+; CHECK: load i64, i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* addrspacecast (i64 addrspace(3)* getelementptr inbounds ([64 x i64], [64 x i64] addrspace(3)* @lds, i32 0, i32 0) to i64 addrspace(4)*), i64 undef)
+; CHECK-NEXT: br
+define void @constexpr_addrspacecast_ptr_size_change() #0 {
+ %tmp0 = call i32 @foo(i64 addrspace(4)* addrspacecast (i64 addrspace(3)* getelementptr inbounds ([64 x i64], [64 x i64] addrspace(3)* @lds, i32 0, i32 0) to i64 addrspace(4)*)) #1
+ ret void
+}
+
+define i32 @foo(i64 addrspace(4)* %arg) #1 {
+bb:
+ %tmp = getelementptr i64, i64 addrspace(4)* %arg, i64 undef
+ %tmp1 = load i64, i64 addrspace(4)* %tmp
+ br i1 undef, label %bb2, label %bb3
+
+bb2:
+ store i64 0, i64 addrspace(4)* %tmp
+ br label %bb3
+
+bb3:
+ unreachable
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { alwaysinline nounwind }
diff --git a/test/Transforms/Inline/inline-hot-callee.ll b/test/Transforms/Inline/inline-hot-callee.ll
new file mode 100644
index 000000000000..93ea9d43c78d
--- /dev/null
+++ b/test/Transforms/Inline/inline-hot-callee.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -inline -inline-threshold=0 -inlinehint-threshold=100 -S | FileCheck %s
+
+; This tests that a hot callee gets the (higher) inlinehint-threshold even without
+; inline hints and gets inlined because the cost is less than inlinehint-threshold.
+; A cold callee with identical body does not get inlined because cost exceeds the
+; inline-threshold
+
+define i32 @callee1(i32 %x) !prof !1 {
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+
+ ret i32 %x3
+}
+
+define i32 @callee2(i32 %x) !prof !2 {
+; CHECK-LABEL: @callee2(
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+
+ ret i32 %x3
+}
+
+define i32 @caller2(i32 %y1) !prof !2 {
+; CHECK-LABEL: @caller2(
+; CHECK: call i32 @callee2
+; CHECK-NOT: call i32 @callee1
+; CHECK: ret i32 %x3.i
+ %y2 = call i32 @callee2(i32 %y1)
+ %y3 = call i32 @callee1(i32 %y2)
+ ret i32 %y3
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"MaxFunctionCount", i32 10}
+!1 = !{!"function_entry_count", i64 10}
+!2 = !{!"function_entry_count", i64 1}
+
diff --git a/test/Transforms/Inline/inline-optsize.ll b/test/Transforms/Inline/inline-optsize.ll
index 820e56f7f8ef..b01a1f657f31 100644
--- a/test/Transforms/Inline/inline-optsize.ll
+++ b/test/Transforms/Inline/inline-optsize.ll
@@ -3,7 +3,7 @@
; The inline threshold for a function with the optsize attribute is currently
; the same as the global inline threshold for -Os. Check that the optsize
-; function attribute don't alter the function specific inline threshold if the
+; function attribute doesn't alter the function-specific inline threshold if the
; global inline threshold is lower (as for -Oz).
@a = global i32 4
diff --git a/test/Transforms/Inline/inline_dbg_declare.ll b/test/Transforms/Inline/inline_dbg_declare.ll
index f3ad7ef8b8fc..3c701c41459a 100644
--- a/test/Transforms/Inline/inline_dbg_declare.ll
+++ b/test/Transforms/Inline/inline_dbg_declare.ll
@@ -23,7 +23,7 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
target triple = "i686-pc-windows-msvc"
; Function Attrs: nounwind
-define float @foo(float %x) #0 {
+define float @foo(float %x) #0 !dbg !4 {
entry:
%x.addr = alloca float, align 4
store float %x, float* %x.addr, align 4
@@ -38,10 +38,11 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; CHECK: define void @bar
; Function Attrs: nounwind
-define void @bar(float* %dst) #0 {
+define void @bar(float* %dst) #0 !dbg !9 {
entry:
; CHECK: [[x_addr_i:%[a-zA-Z0-9.]+]] = alloca float, align 4
+; CHECK-NEXT: void @llvm.dbg.declare(metadata float* [[x_addr_i]], metadata [[m23:![0-9]+]], metadata !{{[0-9]+}}), !dbg [[m24:![0-9]+]]
%dst.addr = alloca float*, align 4
store float* %dst, float** %dst.addr, align 4
@@ -52,7 +53,6 @@ entry:
%call = call float @foo(float %1), !dbg !22
; CHECK-NOT: call float @foo
-; CHECK: void @llvm.dbg.declare(metadata float* [[x_addr_i]], metadata [[m23:![0-9]+]], metadata !{{[0-9]+}}), !dbg [[m24:![0-9]+]]
%2 = load float*, float** %dst.addr, align 4, !dbg !22
%arrayidx1 = getelementptr inbounds float, float* %2, i32 0, !dbg !22
@@ -67,33 +67,33 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!13, !14}
!llvm.ident = !{!15}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "foo.c", directory: "")
!2 = !{}
!3 = !{!4, !9}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: float (float)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "foo.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
-!9 = !DISubprogram(name: "bar", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !10, function: void (float*)* @bar, variables: !2)
+!9 = distinct !DISubprogram(name: "bar", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !10, variables: !2)
!10 = !DISubroutineType(types: !11)
!11 = !{null, !12}
!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !8)
!13 = !{i32 2, !"Dwarf Version", i32 4}
!14 = !{i32 2, !"Debug Info Version", i32 3}
!15 = !{!"clang version 3.6.0 (trunk)"}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "x", line: 1, arg: 1, scope: !4, file: !5, type: !8)
!17 = !DIExpression()
!18 = !DILocation(line: 1, column: 17, scope: !4)
!19 = !DILocation(line: 3, column: 5, scope: !4)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "dst", line: 6, arg: 1, scope: !9, file: !5, type: !12)
+!20 = !DILocalVariable(name: "dst", line: 6, arg: 1, scope: !9, file: !5, type: !12)
!21 = !DILocation(line: 6, column: 17, scope: !9)
!22 = !DILocation(line: 8, column: 14, scope: !9)
!23 = !DILocation(line: 9, column: 1, scope: !9)
-; CHECK: [[FOO:![0-9]+]] = !DISubprogram(name: "foo",
-; CHECK: [[BAR:![0-9]+]] = !DISubprogram(name: "bar",
-; CHECK: [[m23]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: [[FOO]]
-; CHECK: [[CALL_SITE:![0-9]+]] = distinct !DILocation(line: 8, column: 14, scope: [[BAR]])
-; CHECK: [[m24]] = !DILocation(line: 1, column: 17, scope: [[FOO]], inlinedAt: [[CALL_SITE]])
+; CHECK: [[FOO:![0-9]+]] = distinct !DISubprogram(name: "foo",
+; CHECK: [[BAR:![0-9]+]] = distinct !DISubprogram(name: "bar",
+; CHECK: [[m23]] = !DILocalVariable(name: "x", arg: 1, scope: [[FOO]]
+; CHECK: [[m24]] = !DILocation(line: 1, column: 17, scope: [[FOO]], inlinedAt: [[CALL_SITE:![0-9]+]])
+; CHECK: [[CALL_SITE]] = distinct !DILocation(line: 8, column: 14, scope: [[BAR]])
diff --git a/test/Transforms/Inline/inline_invoke.ll b/test/Transforms/Inline/inline_invoke.ll
index 2ef216e2d38a..6784e16b1d87 100644
--- a/test/Transforms/Inline/inline_invoke.ll
+++ b/test/Transforms/Inline/inline_invoke.ll
@@ -344,4 +344,5 @@ terminate:
; CHECK: attributes [[NUW]] = { nounwind }
; CHECK: attributes #1 = { nounwind readnone }
; CHECK: attributes #2 = { ssp uwtable }
-; CHECK: attributes #3 = { noreturn nounwind }
+; CHECK: attributes #3 = { argmemonly nounwind }
+; CHECK: attributes #4 = { noreturn nounwind }
diff --git a/test/Transforms/Inline/noalias-calls.ll b/test/Transforms/Inline/noalias-calls.ll
index c09d2a673297..56d5c6dc0818 100644
--- a/test/Transforms/Inline/noalias-calls.ll
+++ b/test/Transforms/Inline/noalias-calls.ll
@@ -16,24 +16,25 @@ entry:
ret void
}
-define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 {
+define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 {
entry:
tail call void @hello(i8* %a, i8* %c, i8* %b)
ret void
}
-; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 {
+; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 {
; CHECK: entry:
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 false) #0, !noalias !0
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 false) #0, !noalias !3
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 false) #0, !alias.scope !5
-; CHECK: call void @hey() #0, !noalias !5
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %c, i64 16, i32 16, i1 false) #0, !noalias !3
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 false) #1, !noalias !0
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 false) #1, !noalias !3
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 false) #1, !alias.scope !5
+; CHECK: call void @hey() #1, !noalias !5
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %c, i64 16, i32 16, i1 false) #1, !noalias !3
; CHECK: ret void
; CHECK: }
-attributes #0 = { nounwind }
-attributes #1 = { nounwind uwtable }
+attributes #0 = { nounwind argmemonly }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind uwtable }
; CHECK: !0 = !{!1}
; CHECK: !1 = distinct !{!1, !2, !"hello: %c"}
diff --git a/test/Transforms/Inline/noalias-cs.ll b/test/Transforms/Inline/noalias-cs.ll
index 0bff1882e832..8528a391cf95 100644
--- a/test/Transforms/Inline/noalias-cs.ll
+++ b/test/Transforms/Inline/noalias-cs.ll
@@ -34,13 +34,13 @@ entry:
; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 7
; CHECK: store float %1, float* %arrayidx.i, align 4, !noalias !16
; CHECK: %2 = load float, float* %a, align 4, !alias.scope !16, !noalias !17
-; CHECK: %arrayidx.i.i.1 = getelementptr inbounds float, float* %b, i64 5
-; CHECK: store float %2, float* %arrayidx.i.i.1, align 4, !alias.scope !21, !noalias !22
-; CHECK: %arrayidx1.i.i.2 = getelementptr inbounds float, float* %b, i64 8
-; CHECK: store float %2, float* %arrayidx1.i.i.2, align 4, !alias.scope !23, !noalias !24
+; CHECK: %arrayidx.i.i1 = getelementptr inbounds float, float* %b, i64 5
+; CHECK: store float %2, float* %arrayidx.i.i1, align 4, !alias.scope !21, !noalias !22
+; CHECK: %arrayidx1.i.i2 = getelementptr inbounds float, float* %b, i64 8
+; CHECK: store float %2, float* %arrayidx1.i.i2, align 4, !alias.scope !23, !noalias !24
; CHECK: %3 = load float, float* %a, align 4, !alias.scope !16
-; CHECK: %arrayidx.i.3 = getelementptr inbounds float, float* %b, i64 7
-; CHECK: store float %3, float* %arrayidx.i.3, align 4, !alias.scope !16
+; CHECK: %arrayidx.i3 = getelementptr inbounds float, float* %b, i64 7
+; CHECK: store float %3, float* %arrayidx.i3, align 4, !alias.scope !16
; CHECK: ret void
; CHECK: }
diff --git a/test/Transforms/Inline/noalias2.ll b/test/Transforms/Inline/noalias2.ll
index df135b0a318a..432fccf431c0 100644
--- a/test/Transforms/Inline/noalias2.ll
+++ b/test/Transforms/Inline/noalias2.ll
@@ -61,8 +61,8 @@ entry:
; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 7
; CHECK: store float %1, float* %arrayidx.i, align 4, !alias.scope !14, !noalias !13
; CHECK: %2 = load float, float* %c, align 4, !noalias !15
-; CHECK: %arrayidx.i.1 = getelementptr inbounds float, float* %a, i64 6
-; CHECK: store float %2, float* %arrayidx.i.1, align 4, !alias.scope !19, !noalias !20
+; CHECK: %arrayidx.i1 = getelementptr inbounds float, float* %a, i64 6
+; CHECK: store float %2, float* %arrayidx.i1, align 4, !alias.scope !19, !noalias !20
; CHECK: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8
; CHECK: store float %2, float* %arrayidx1.i, align 4, !alias.scope !20, !noalias !19
; CHECK: %3 = load float, float* %c, align 4
diff --git a/test/Transforms/Inline/zero-cost.ll b/test/Transforms/Inline/zero-cost.ll
new file mode 100644
index 000000000000..8e7194a1963b
--- /dev/null
+++ b/test/Transforms/Inline/zero-cost.ll
@@ -0,0 +1,17 @@
+; RUN: opt -inline -S %s | FileCheck %s
+
+define void @f() {
+entry:
+ tail call void @g()
+ unreachable
+
+; CHECK-LABEL: @f
+; CHECK-NOT: call
+; CHECK: unreachable
+}
+
+define void @g() {
+entry:
+ unreachable
+}
+
diff --git a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
index 7f06f009515f..c303ddd58974 100644
--- a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
+++ b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
@@ -1,7 +1,7 @@
; RUN: opt < %s -instcombine -S | grep icmp
; PR1646
-@__gthrw_pthread_cancel = weak alias i32 (i32)* @pthread_cancel ; <i32 (i32)*> [#uses=1]
+@__gthrw_pthread_cancel = weak alias i32 (i32), i32 (i32)* @pthread_cancel ; <i32 (i32)*> [#uses=1]
@__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*) ; <i8**> [#uses=1]
define weak i32 @pthread_cancel(i32) {
ret i32 0
diff --git a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
index 3793a860e8e9..7c6df1f984a4 100644
--- a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
+++ b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
@@ -1,7 +1,7 @@
; RUN: opt < %s -instcombine -S | grep icmp
; PR1678
-@A = weak alias void ()* @B ; <void ()*> [#uses=1]
+@A = weak alias void (), void ()* @B ; <void ()*> [#uses=1]
define weak void @B() {
ret void
diff --git a/test/Transforms/InstCombine/LandingPadClauses.ll b/test/Transforms/InstCombine/LandingPadClauses.ll
index a4d77cbe8efb..75050c91bbb4 100644
--- a/test/Transforms/InstCombine/LandingPadClauses.ll
+++ b/test/Transforms/InstCombine/LandingPadClauses.ll
@@ -69,9 +69,11 @@ lpad.c:
filter [1 x i32*] [i32* @T1]
catch i32* @T2
unreachable
+; Caught types should not be removed from filters
; CHECK: %c = landingpad
-; CHECK-NEXT: @T1
-; CHECK-NEXT: filter [0 x i32*]
+; CHECK-NEXT: catch i32* @T1
+; CHECK-NEXT: filter [1 x i32*] [i32* @T1]
+; CHECK-NEXT: catch i32* @T2
; CHECK-NEXT: unreachable
lpad.d:
@@ -87,9 +89,10 @@ lpad.e:
catch i32* @T1
filter [3 x i32*] [i32* @T1, i32* @T2, i32* @T2]
unreachable
+; Caught types should not be removed from filters
; CHECK: %e = landingpad
-; CHECK-NEXT: @T1
-; CHECK-NEXT: filter [1 x i32*] [i32* @T2]
+; CHECK-NEXT: catch i32* @T1
+; CHECK-NEXT: filter [2 x i32*] [i32* @T1, i32* @T2]
; CHECK-NEXT: unreachable
lpad.f:
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index 1af2b0ffbf0a..2fe9e8cadeb7 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -395,3 +395,13 @@ define i8 @add_of_mul(i8 %x, i8 %y, i8 %z) {
%sum = add nsw i8 %mA, %mB
ret i8 %sum
}
+
+define i32 @add_of_selects(i1 %A, i32 %B) {
+ %sel0 = select i1 %A, i32 0, i32 -2
+ %sel1 = select i1 %A, i32 %B, i32 2
+ %add = add i32 %sel0, %sel1
+ ret i32 %add
+; CHECK-LABEL: @add_of_selects(
+; CHECK-NEXT: %[[sel:.*]] = select i1 %A, i32 %B, i32 0
+; CHECK-NEXT: ret i32 %[[sel]]
+}
diff --git a/test/Transforms/InstCombine/alias-recursion.ll b/test/Transforms/InstCombine/alias-recursion.ll
index 74254f3e8dff..efc1899e1f47 100644
--- a/test/Transforms/InstCombine/alias-recursion.ll
+++ b/test/Transforms/InstCombine/alias-recursion.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-pc-windows-msvc"
@0 = constant [1 x i8*] zeroinitializer
-@vtbl = alias getelementptr inbounds ([1 x i8*], [1 x i8*]* @0, i32 0, i32 0)
+@vtbl = alias i8*, getelementptr inbounds ([1 x i8*], [1 x i8*]* @0, i32 0, i32 0)
define i32 (%class.A*)* @test() {
; CHECK-LABEL: test
diff --git a/test/Transforms/InstCombine/all-bits-shift.ll b/test/Transforms/InstCombine/all-bits-shift.ll
new file mode 100644
index 000000000000..b9eb19cf2ad1
--- /dev/null
+++ b/test/Transforms/InstCombine/all-bits-shift.ll
@@ -0,0 +1,46 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@d = global i32 15, align 4
+@b = global i32* @d, align 8
+@a = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define signext i32 @main() #1 {
+entry:
+ %0 = load i32*, i32** @b, align 8
+ %1 = load i32, i32* @a, align 4
+ %lnot = icmp eq i32 %1, 0
+ %lnot.ext = zext i1 %lnot to i32
+ %shr.i = lshr i32 2072, %lnot.ext
+ %call.lobit = lshr i32 %shr.i, 7
+ %2 = and i32 %call.lobit, 1
+ %3 = load i32, i32* %0, align 4
+ %or = or i32 %2, %3
+ store i32 %or, i32* %0, align 4
+ %4 = load i32, i32* @a, align 4
+ %lnot.1 = icmp eq i32 %4, 0
+ %lnot.ext.1 = zext i1 %lnot.1 to i32
+ %shr.i.1 = lshr i32 2072, %lnot.ext.1
+ %call.lobit.1 = lshr i32 %shr.i.1, 7
+ %5 = and i32 %call.lobit.1, 1
+ %or.1 = or i32 %5, %or
+ store i32 %or.1, i32* %0, align 4
+ ret i32 %or.1
+
+; Check that both InstCombine and InstSimplify can use computeKnownBits to
+; realize that:
+; ((2072 >> (L == 0)) >> 7) & 1
+; is always zero.
+
+; CHECK-LABEL: @main
+; CHECK: %[[V1:[0-9]+]] = load i32*, i32** @b, align 8
+; CHECK: %[[V2:[0-9]+]] = load i32, i32* %[[V1]], align 4
+; CHECK: ret i32 %[[V2]]
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index b61b75e9f9f3..2ee0372e5e0a 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -163,3 +163,14 @@ entry:
call void (...) @use(i1* %v32, i1* %v64, i1* %v33)
ret void
}
+
+define void @test11() {
+entry:
+; ALL-LABEL: @test11(
+; ALL: %y = alloca i32
+; ALL: call void (...) @use(i32* nonnull @int) [ "blah"(i32* %y) ]
+; ALL: ret void
+ %y = alloca i32
+ call void (...) @use(i32* nonnull @int) [ "blah"(i32* %y) ]
+ ret void
+}
diff --git a/test/Transforms/InstCombine/and-compare.ll b/test/Transforms/InstCombine/and-compare.ll
index 037641b90ad7..53ea81d1c0d4 100644
--- a/test/Transforms/InstCombine/and-compare.ll
+++ b/test/Transforms/InstCombine/and-compare.ll
@@ -1,6 +1,9 @@
; RUN: opt < %s -instcombine -S | \
; RUN: FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
; Should be optimized to one and.
define i1 @test1(i32 %a, i32 %b) {
; CHECK-LABEL: @test1(
@@ -13,3 +16,23 @@ define i1 @test1(i32 %a, i32 %b) {
%tmp = icmp ne i32 %tmp1, %tmp3 ; <i1> [#uses=1]
ret i1 %tmp
}
+
+define zeroext i1 @test2(i64 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: %[[trunc:.*]] = trunc i64 %A to i8
+; CHECK-NEXT: %[[icmp:.*]] = icmp sgt i8 %[[trunc]], -1
+; CHECK-NEXT: ret i1 %[[icmp]]
+ %and = and i64 %A, 128
+ %cmp = icmp eq i64 %and, 0
+ ret i1 %cmp
+}
+
+define zeroext i1 @test3(i64 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: %[[trunc:.*]] = trunc i64 %A to i8
+; CHECK-NEXT: %[[icmp:.*]] = icmp slt i8 %[[trunc]], 0
+; CHECK-NEXT: ret i1 %[[icmp]]
+ %and = and i64 %A, 128
+ %cmp = icmp ne i64 %and, 0
+ ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index 96b535dda99d..326bfda38553 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -77,3 +77,71 @@ define i1 @test8(i32 %i) {
%cond = and i1 %cmp1, %cmp2
ret i1 %cond
}
+
+; combine -x & 1 into x & 1
+define i64 @test9(i64 %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NOT: %sub = sub nsw i64 0, %x
+; CHECK-NOT: %and = and i64 %sub, 1
+; CHECK-NEXT: %and = and i64 %x, 1
+; CHECK-NEXT: ret i64 %and
+ %sub = sub nsw i64 0, %x
+ %and = and i64 %sub, 1
+ ret i64 %and
+}
+
+define i64 @test10(i64 %x) {
+; CHECK-LABEL: @test10(
+; CHECK-NOT: %sub = sub nsw i64 0, %x
+; CHECK-NEXT: %and = and i64 %x, 1
+; CHECK-NOT: %add = add i64 %sub, %and
+; CHECK-NEXT: %add = sub i64 %and, %x
+; CHECK-NEXT: ret i64 %add
+ %sub = sub nsw i64 0, %x
+ %and = and i64 %sub, 1
+ %add = add i64 %sub, %and
+ ret i64 %add
+}
+
+define i64 @fabs_double(double %x) {
+; CHECK-LABEL: @fabs_double(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %and = bitcast double %fabs to i64
+; CHECK-NEXT: ret i64 %and
+ %bc = bitcast double %x to i64
+ %and = and i64 %bc, 9223372036854775807
+ ret i64 %and
+}
+
+define i64 @fabs_double_swap(double %x) {
+; CHECK-LABEL: @fabs_double_swap(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %and = bitcast double %fabs to i64
+; CHECK-NEXT: ret i64 %and
+ %bc = bitcast double %x to i64
+ %and = and i64 9223372036854775807, %bc
+ ret i64 %and
+}
+
+define i32 @fabs_float(float %x) {
+; CHECK-LABEL: @fabs_float(
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: %and = bitcast float %fabs to i32
+; CHECK-NEXT: ret i32 %and
+ %bc = bitcast float %x to i32
+ %and = and i32 %bc, 2147483647
+ ret i32 %and
+}
+
+; Make sure that only a bitcast is transformed.
+
+define i64 @fabs_double_not_bitcast(double %x) {
+; CHECK-LABEL: @fabs_double_not_bitcast(
+; CHECK-NEXT: %bc = fptoui double %x to i64
+; CHECK-NEXT: %and = and i64 %bc, 9223372036854775807
+; CHECK-NEXT: ret i64 %and
+ %bc = fptoui double %x to i64
+ %and = and i64 %bc, 9223372036854775807
+ ret i64 %and
+}
+
diff --git a/test/Transforms/InstCombine/apint-or.ll b/test/Transforms/InstCombine/apint-or.ll
new file mode 100644
index 000000000000..e2312b61f2b9
--- /dev/null
+++ b/test/Transforms/InstCombine/apint-or.ll
@@ -0,0 +1,79 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; These tests are for Integer BitWidth <= 64 && BitWidth % 2 != 0.
+define i23 @test1(i23 %A) {
+ ;; A | ~A == -1
+ %NotA = xor i23 -1, %A
+ %B = or i23 %A, %NotA
+ ret i23 %B
+; CHECK-LABEL: @test1
+; CHECK-NEXT: ret i23 -1
+}
+
+define i39 @test2(i39 %V, i39 %M) {
+ ;; If we have: ((V + N) & C1) | (V & C2)
+ ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+ ;; replace with V+N.
+ %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943
+ %N = and i39 %M, 274877906944
+ %A = add i39 %V, %N
+ %B = and i39 %A, %C1
+ %D = and i39 %V, 274877906943
+ %R = or i39 %B, %D
+ ret i39 %R
+; CHECK-LABEL: @test2
+; CHECK-NEXT: %N = and i39 %M, -274877906944
+; CHECK-NEXT: %A = add i39 %N, %V
+; CHECK-NEXT: ret i39 %A
+}
+
+define i43 @test3(i43 %A, i43 %B) {
+ ;; (~A | ~B) == (~(A & B)) - De Morgan's Law
+ %NotA = xor i43 %A, -1
+ %NotB = xor i43 %B, -1
+ %C1 = or i43 %NotA, %NotB
+ ret i43 %C1
+; CHECK-LABEL: @test3
+; CHECK-NEXT: %C1.demorgan = and i43 %A, %B
+; CHECK-NEXT: %C1 = xor i43 %C1.demorgan, -1
+; CHECK-NEXT: ret i43 %C1
+}
+
+; These tests are for Integer BitWidth > 64 && BitWidth <= 1024.
+define i1023 @test4(i1023 %A) {
+ ;; A | ~A == -1
+ %NotA = xor i1023 -1, %A
+ %B = or i1023 %A, %NotA
+ ret i1023 %B
+; CHECK-LABEL: @test4
+; CHECK-NEXT: ret i1023 -1
+}
+
+define i399 @test5(i399 %V, i399 %M) {
+ ;; If we have: ((V + N) & C1) | (V & C2)
+ ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+ ;; replace with V+N.
+ %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943
+ %N = and i399 %M, 18446742974197923840
+ %A = add i399 %V, %N
+ %B = and i399 %A, %C1
+ %D = and i399 %V, 274877906943
+ %R = or i399 %B, %D
+ ret i399 %R
+; CHECK-LABEL: @test5
+; CHECK-NEXT: %N = and i399 %M, 18446742974197923840
+; CHECK-NEXT: %A = add i399 %N, %V
+; CHECK-NEXT: ret i399 %A
+}
+
+define i129 @test6(i129 %A, i129 %B) {
+ ;; (~A | ~B) == (~(A & B)) - De Morgan's Law
+ %NotA = xor i129 %A, -1
+ %NotB = xor i129 %B, -1
+ %C1 = or i129 %NotA, %NotB
+ ret i129 %C1
+; CHECK-LABEL: @test6
+; CHECK-NEXT: %C1.demorgan = and i129 %A, %B
+; CHECK-NEXT: %C1 = xor i129 %C1.demorgan, -1
+; CHECK-NEXT: ret i129 %C1
+}
diff --git a/test/Transforms/InstCombine/apint-or1.ll b/test/Transforms/InstCombine/apint-or1.ll
deleted file mode 100644
index d4f87ac894d9..000000000000
--- a/test/Transforms/InstCombine/apint-or1.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; This test makes sure that or instructions are properly eliminated.
-; This test is for Integer BitWidth <= 64 && BitWidth % 2 != 0.
-;
-
-; RUN: opt < %s -instcombine -S | not grep or
-
-
-define i7 @test0(i7 %X) {
- %Y = or i7 %X, 0
- ret i7 %Y
-}
-
-define i17 @test1(i17 %X) {
- %Y = or i17 %X, -1
- ret i17 %Y
-}
-
-define i23 @test2(i23 %A) {
- ;; A | ~A == -1
- %NotA = xor i23 -1, %A
- %B = or i23 %A, %NotA
- ret i23 %B
-}
-
-define i39 @test3(i39 %V, i39 %M) {
- ;; If we have: ((V + N) & C1) | (V & C2)
- ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
- ;; replace with V+N.
- %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943
- %N = and i39 %M, 274877906944
- %A = add i39 %V, %N
- %B = and i39 %A, %C1
- %D = and i39 %V, 274877906943
- %R = or i39 %B, %D
- ret i39 %R
-}
diff --git a/test/Transforms/InstCombine/apint-or2.ll b/test/Transforms/InstCombine/apint-or2.ll
deleted file mode 100644
index d7de255f7fd2..000000000000
--- a/test/Transforms/InstCombine/apint-or2.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; This test makes sure that or instructions are properly eliminated.
-; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
-;
-; RUN: opt < %s -instcombine -S | not grep or
-
-
-define i777 @test0(i777 %X) {
- %Y = or i777 %X, 0
- ret i777 %Y
-}
-
-define i117 @test1(i117 %X) {
- %Y = or i117 %X, -1
- ret i117 %Y
-}
-
-define i1023 @test2(i1023 %A) {
- ;; A | ~A == -1
- %NotA = xor i1023 -1, %A
- %B = or i1023 %A, %NotA
- ret i1023 %B
-}
-
-define i399 @test3(i399 %V, i399 %M) {
- ;; If we have: ((V + N) & C1) | (V & C2)
- ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
- ;; replace with V+N.
- %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943
- %N = and i399 %M, 18446742974197923840
- %A = add i399 %V, %N
- %B = and i399 %A, %C1
- %D = and i399 %V, 274877906943
- %R = or i399 %B, %D
- ret i399 %R
-}
diff --git a/test/Transforms/InstCombine/assume-redundant.ll b/test/Transforms/InstCombine/assume-redundant.ll
index 4b869ef2c50e..4bdbcc8d086a 100644
--- a/test/Transforms/InstCombine/assume-redundant.ll
+++ b/test/Transforms/InstCombine/assume-redundant.ll
@@ -47,6 +47,32 @@ for.end: ; preds = %for.body
ret void
}
+declare align 8 i8* @get()
+
+; Check that redundant align assume is removed
+; CHECK-LABEL: @test
+; CHECK-NOT: call void @llvm.assume
+define void @test1() {
+ %p = call align 8 i8* @get()
+ %ptrint = ptrtoint i8* %p to i64
+ %maskedptr = and i64 %ptrint, 7
+ %maskcond = icmp eq i64 %maskedptr, 0
+ call void @llvm.assume(i1 %maskcond)
+ ret void
+}
+
+; Check that redundant align assume is removed
+; CHECK-LABEL: @test
+; CHECK-NOT: call void @llvm.assume
+define void @test3() {
+ %p = alloca i8, align 8
+ %ptrint = ptrtoint i8* %p to i64
+ %maskedptr = and i64 %ptrint, 7
+ %maskcond = icmp eq i64 %maskedptr, 0
+ call void @llvm.assume(i1 %maskcond)
+ ret void
+}
+
; Function Attrs: nounwind
declare void @llvm.assume(i1) #1
diff --git a/test/Transforms/InstCombine/bitcast-alias-function.ll b/test/Transforms/InstCombine/bitcast-alias-function.ll
index 1a598a5d4153..b04308e10e23 100644
--- a/test/Transforms/InstCombine/bitcast-alias-function.ll
+++ b/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -6,46 +6,46 @@ target datalayout = "e-p:32:32:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16
; Cases that should be bitcast
; Test cast between scalars with same bit sizes
-@alias_i32_to_f32 = alias bitcast (i32 (i32)* @func_i32 to float (float)*)
+@alias_i32_to_f32 = alias float (float), bitcast (i32 (i32)* @func_i32 to float (float)*)
; Test cast between vectors with same number of elements and bit sizes
-@alias_v2i32_to_v2f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <2 x float> (<2 x float>)*)
+@alias_v2i32_to_v2f32 = alias <2 x float> (<2 x float>), bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <2 x float> (<2 x float>)*)
; Test cast from vector to scalar with same number of bits
-@alias_v2f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <2 x float> (<2 x float>)*)
+@alias_v2f32_to_i64 = alias <2 x float> (<2 x float>), bitcast (i64 (i64)* @func_i64 to <2 x float> (<2 x float>)*)
; Test cast from scalar to vector with same number of bits
-@alias_i64_to_v2f32 = alias bitcast (<2 x float> (<2 x float>)* @func_v2f32 to i64 (i64)*)
+@alias_i64_to_v2f32 = alias i64 (i64), bitcast (<2 x float> (<2 x float>)* @func_v2f32 to i64 (i64)*)
; Test cast between vectors of pointers
-@alias_v2i32p_to_v2i64p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to <2 x i64*> (<2 x i64*>)*)
+@alias_v2i32p_to_v2i64p = alias <2 x i64*> (<2 x i64*>), bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to <2 x i64*> (<2 x i64*>)*)
; Cases that should be invalid and unchanged
; Test cast between scalars with different bit sizes
-@alias_i64_to_f32 = alias bitcast (i64 (i64)* @func_i64 to float (float)*)
+@alias_i64_to_f32 = alias float (float), bitcast (i64 (i64)* @func_i64 to float (float)*)
; Test cast between vectors with different bit sizes but the
; same number of elements
-@alias_v2i64_to_v2f32 = alias bitcast (<2 x i64> (<2 x i64>)* @func_v2i64 to <2 x float> (<2 x float>)*)
+@alias_v2i64_to_v2f32 = alias <2 x float> (<2 x float>), bitcast (<2 x i64> (<2 x i64>)* @func_v2i64 to <2 x float> (<2 x float>)*)
; Test cast between vectors with same number of bits and different
; numbers of elements
-@alias_v2i32_to_v4f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <4 x float> (<4 x float>)*)
+@alias_v2i32_to_v4f32 = alias <4 x float> (<4 x float>), bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <4 x float> (<4 x float>)*)
; Test cast between scalar and vector with different number of bits
-@alias_i64_to_v4f32 = alias bitcast (<4 x float> (<4 x float>)* @func_v4f32 to i64 (i64)*)
+@alias_i64_to_v4f32 = alias i64 (i64), bitcast (<4 x float> (<4 x float>)* @func_v4f32 to i64 (i64)*)
; Test cast between vector and scalar with different number of bits
-@alias_v4f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x float> (<4 x float>)*)
+@alias_v4f32_to_i64 = alias <4 x float> (<4 x float>), bitcast (i64 (i64)* @func_i64 to <4 x float> (<4 x float>)*)
; Test cast from scalar to vector of pointers with same number of bits
; We don't know the pointer size at this point, so this can't be done
-@alias_i64_to_v2i32p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to i64 (i64)*)
+@alias_i64_to_v2i32p = alias i64 (i64), bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to i64 (i64)*)
; Test cast between vector of pointers and scalar with different number of bits
-@alias_v4i32p_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x i32*> (<4 x i32*>)*)
+@alias_v4i32p_to_i64 = alias <4 x i32*> (<4 x i32*>), bitcast (i64 (i64)* @func_i64 to <4 x i32*> (<4 x i32*>)*)
diff --git a/test/Transforms/InstCombine/bitcast-bitcast.ll b/test/Transforms/InstCombine/bitcast-bitcast.ll
new file mode 100644
index 000000000000..0f46ff53bc18
--- /dev/null
+++ b/test/Transforms/InstCombine/bitcast-bitcast.ll
@@ -0,0 +1,84 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check all scalar / vector combinations for a pair of bitcasts.
+
+define ppc_fp128 @bitcast_bitcast_s_s_s(i128 %a) {
+ %bc1 = bitcast i128 %a to fp128
+ %bc2 = bitcast fp128 %bc1 to ppc_fp128
+ ret ppc_fp128 %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_s_s(
+; CHECK-NEXT: %bc2 = bitcast i128 %a to ppc_fp128
+; CHECK-NEXT: ret ppc_fp128 %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_s_s_v(i64 %a) {
+ %bc1 = bitcast i64 %a to double
+ %bc2 = bitcast double %bc1 to <2 x i32>
+ ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_s_v(
+; CHECK-NEXT: %bc2 = bitcast i64 %a to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> %bc2
+}
+
+define double @bitcast_bitcast_s_v_s(i64 %a) {
+ %bc1 = bitcast i64 %a to <2 x i32>
+ %bc2 = bitcast <2 x i32> %bc1 to double
+ ret double %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_v_s(
+; CHECK-NEXT: %bc2 = bitcast i64 %a to double
+; CHECK-NEXT: ret double %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_s_v_v(i64 %a) {
+ %bc1 = bitcast i64 %a to <4 x i16>
+ %bc2 = bitcast <4 x i16> %bc1 to <2 x i32>
+ ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_v_v(
+; CHECK-NEXT: %bc2 = bitcast i64 %a to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> %bc2
+}
+
+define i64 @bitcast_bitcast_v_s_s(<2 x i32> %a) {
+ %bc1 = bitcast <2 x i32> %a to double
+ %bc2 = bitcast double %bc1 to i64
+ ret i64 %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_s_s(
+; CHECK-NEXT: %bc2 = bitcast <2 x i32> %a to i64
+; CHECK-NEXT: ret i64 %bc2
+}
+
+define <4 x i16> @bitcast_bitcast_v_s_v(<2 x i32> %a) {
+ %bc1 = bitcast <2 x i32> %a to double
+ %bc2 = bitcast double %bc1 to <4 x i16>
+ ret <4 x i16> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_s_v(
+; CHECK-NEXT: %bc2 = bitcast <2 x i32> %a to <4 x i16>
+; CHECK-NEXT: ret <4 x i16> %bc2
+}
+
+define double @bitcast_bitcast_v_v_s(<2 x float> %a) {
+ %bc1 = bitcast <2 x float> %a to <4 x i16>
+ %bc2 = bitcast <4 x i16> %bc1 to double
+ ret double %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_v_s(
+; CHECK-NEXT: %bc2 = bitcast <2 x float> %a to double
+; CHECK-NEXT: ret double %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_v_v_v(<2 x float> %a) {
+ %bc1 = bitcast <2 x float> %a to <4 x i16>
+ %bc2 = bitcast <4 x i16> %bc1 to <2 x i32>
+ ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_v_v(
+; CHECK-NEXT: %bc2 = bitcast <2 x float> %a to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> %bc2
+}
+
diff --git a/test/Transforms/InstCombine/bitcast-vec-canon.ll b/test/Transforms/InstCombine/bitcast-vec-canon.ll
index d27765e89424..97145221099e 100644
--- a/test/Transforms/InstCombine/bitcast-vec-canon.ll
+++ b/test/Transforms/InstCombine/bitcast-vec-canon.ll
@@ -1,22 +1,41 @@
-; RUN: opt < %s -instcombine -S | grep element | count 4
+; RUN: opt < %s -instcombine -S | FileCheck %s
define double @a(<1 x i64> %y) {
%c = bitcast <1 x i64> %y to double
- ret double %c
+ ret double %c
+
+; CHECK-LABEL: @a(
+; CHECK-NEXT: bitcast <1 x i64> %y to <1 x double>
+; CHECK-NEXT: extractelement <1 x double> {{.*}}, i32 0
+; CHECK-NEXT: ret double
}
define i64 @b(<1 x i64> %y) {
%c = bitcast <1 x i64> %y to i64
- ret i64 %c
+ ret i64 %c
+
+; CHECK-LABEL: @b(
+; CHECK-NEXT: extractelement <1 x i64> %y, i32 0
+; CHECK-NEXT: ret i64
}
define <1 x i64> @c(double %y) {
%c = bitcast double %y to <1 x i64>
ret <1 x i64> %c
+
+; CHECK-LABEL: @c(
+; CHECK-NEXT: bitcast double %y to i64
+; CHECK-NEXT: insertelement <1 x i64> undef, i64 {{.*}}, i32 0
+; CHECK-NEXT: ret <1 x i64>
}
define <1 x i64> @d(i64 %y) {
%c = bitcast i64 %y to <1 x i64>
ret <1 x i64> %c
+
+; CHECK-LABEL: @d(
+; CHECK-NEXT: insertelement <1 x i64> undef, i64 %y, i32 0
+; CHECK-NEXT: ret <1 x i64>
}
+
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 579839e4245b..bccd19cc32ea 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -64,6 +64,61 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
; CHECK-NEXT: ret float %add
}
+; Both bitcasts are unnecessary; change the extractelement.
+
+define float @bitcast_extelt1(<2 x float> %A) {
+ %bc1 = bitcast <2 x float> %A to <2 x i32>
+ %ext = extractelement <2 x i32> %bc1, i32 0
+ %bc2 = bitcast i32 %ext to float
+ ret float %bc2
+
+; CHECK-LABEL: @bitcast_extelt1(
+; CHECK-NEXT: %bc2 = extractelement <2 x float> %A, i32 0
+; CHECK-NEXT: ret float %bc2
+}
+
+; Second bitcast can be folded into the first.
+
+define i64 @bitcast_extelt2(<4 x float> %A) {
+ %bc1 = bitcast <4 x float> %A to <2 x double>
+ %ext = extractelement <2 x double> %bc1, i32 1
+ %bc2 = bitcast double %ext to i64
+ ret i64 %bc2
+
+; CHECK-LABEL: @bitcast_extelt2(
+; CHECK-NEXT: %bc = bitcast <4 x float> %A to <2 x i64>
+; CHECK-NEXT: %bc2 = extractelement <2 x i64> %bc, i32 1
+; CHECK-NEXT: ret i64 %bc2
+}
+
+; TODO: This should return %A.
+
+define <2 x i32> @bitcast_extelt3(<2 x i32> %A) {
+ %bc1 = bitcast <2 x i32> %A to <1 x i64>
+ %ext = extractelement <1 x i64> %bc1, i32 0
+ %bc2 = bitcast i64 %ext to <2 x i32>
+ ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_extelt3(
+; CHECK-NEXT: %bc1 = bitcast <2 x i32> %A to <1 x i64>
+; CHECK-NEXT: %ext = extractelement <1 x i64> %bc1, i32 0
+; CHECK-NEXT: %bc2 = bitcast i64 %ext to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> %bc2
+}
+
+; Handle the case where the input is not a vector.
+
+define double @bitcast_extelt4(i128 %A) {
+ %bc1 = bitcast i128 %A to <2 x i64>
+ %ext = extractelement <2 x i64> %bc1, i32 0
+ %bc2 = bitcast i64 %ext to double
+ ret double %bc2
+
+; CHECK-LABEL: @bitcast_extelt4(
+; CHECK-NEXT: %bc = bitcast i128 %A to <2 x double>
+; CHECK-NEXT: %bc2 = extractelement <2 x double> %bc, i32 0
+; CHECK-NEXT: ret double %bc2
+}
define <2 x i32> @test4(i32 %A, i32 %B){
%tmp38 = zext i32 %A to i64
diff --git a/test/Transforms/InstCombine/bitreverse-fold.ll b/test/Transforms/InstCombine/bitreverse-fold.ll
new file mode 100644
index 000000000000..ad7fc3a74644
--- /dev/null
+++ b/test/Transforms/InstCombine/bitreverse-fold.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @test1(i32 %p) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT: ret i32 %p
+ %a = call i32 @llvm.bitreverse.i32(i32 %p)
+ %b = call i32 @llvm.bitreverse.i32(i32 %a)
+ ret i32 %b
+}
+
+declare i32 @llvm.bitreverse.i32(i32) readnone
diff --git a/test/Transforms/InstCombine/bitreverse-recognize.ll b/test/Transforms/InstCombine/bitreverse-recognize.ll
new file mode 100644
index 000000000000..fbd5cb6d139c
--- /dev/null
+++ b/test/Transforms/InstCombine/bitreverse-recognize.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define zeroext i8 @f_u8(i8 zeroext %a) {
+; CHECK-LABEL: @f_u8
+; CHECK-NEXT: %[[A:.*]] = call i8 @llvm.bitreverse.i8(i8 %a)
+; CHECK-NEXT: ret i8 %[[A]]
+ %1 = shl i8 %a, 7
+ %2 = shl i8 %a, 5
+ %3 = and i8 %2, 64
+ %4 = shl i8 %a, 3
+ %5 = and i8 %4, 32
+ %6 = shl i8 %a, 1
+ %7 = and i8 %6, 16
+ %8 = lshr i8 %a, 1
+ %9 = and i8 %8, 8
+ %10 = lshr i8 %a, 3
+ %11 = and i8 %10, 4
+ %12 = lshr i8 %a, 5
+ %13 = and i8 %12, 2
+ %14 = lshr i8 %a, 7
+ %15 = or i8 %14, %1
+ %16 = or i8 %15, %3
+ %17 = or i8 %16, %5
+ %18 = or i8 %17, %7
+ %19 = or i8 %18, %9
+ %20 = or i8 %19, %11
+ %21 = or i8 %20, %13
+ ret i8 %21
+}
+
+; The ANDs with 32 and 64 have been swapped here, so the sequence does not
+; completely match a bitreverse.
+define zeroext i8 @f_u8_fail(i8 zeroext %a) {
+; CHECK-LABEL: @f_u8_fail
+; CHECK-NOT: call
+; CHECK: ret i8
+ %1 = shl i8 %a, 7
+ %2 = shl i8 %a, 5
+ %3 = and i8 %2, 32
+ %4 = shl i8 %a, 3
+ %5 = and i8 %4, 64
+ %6 = shl i8 %a, 1
+ %7 = and i8 %6, 16
+ %8 = lshr i8 %a, 1
+ %9 = and i8 %8, 8
+ %10 = lshr i8 %a, 3
+ %11 = and i8 %10, 4
+ %12 = lshr i8 %a, 5
+ %13 = and i8 %12, 2
+ %14 = lshr i8 %a, 7
+ %15 = or i8 %14, %1
+ %16 = or i8 %15, %3
+ %17 = or i8 %16, %5
+ %18 = or i8 %17, %7
+ %19 = or i8 %18, %9
+ %20 = or i8 %19, %11
+ %21 = or i8 %20, %13
+ ret i8 %21
+}
+
+define zeroext i16 @f_u16(i16 zeroext %a) {
+; CHECK-LABEL: @f_u16
+; CHECK-NEXT: %[[A:.*]] = call i16 @llvm.bitreverse.i16(i16 %a)
+; CHECK-NEXT: ret i16 %[[A]]
+ %1 = shl i16 %a, 15
+ %2 = shl i16 %a, 13
+ %3 = and i16 %2, 16384
+ %4 = shl i16 %a, 11
+ %5 = and i16 %4, 8192
+ %6 = shl i16 %a, 9
+ %7 = and i16 %6, 4096
+ %8 = shl i16 %a, 7
+ %9 = and i16 %8, 2048
+ %10 = shl i16 %a, 5
+ %11 = and i16 %10, 1024
+ %12 = shl i16 %a, 3
+ %13 = and i16 %12, 512
+ %14 = shl i16 %a, 1
+ %15 = and i16 %14, 256
+ %16 = lshr i16 %a, 1
+ %17 = and i16 %16, 128
+ %18 = lshr i16 %a, 3
+ %19 = and i16 %18, 64
+ %20 = lshr i16 %a, 5
+ %21 = and i16 %20, 32
+ %22 = lshr i16 %a, 7
+ %23 = and i16 %22, 16
+ %24 = lshr i16 %a, 9
+ %25 = and i16 %24, 8
+ %26 = lshr i16 %a, 11
+ %27 = and i16 %26, 4
+ %28 = lshr i16 %a, 13
+ %29 = and i16 %28, 2
+ %30 = lshr i16 %a, 15
+ %31 = or i16 %30, %1
+ %32 = or i16 %31, %3
+ %33 = or i16 %32, %5
+ %34 = or i16 %33, %7
+ %35 = or i16 %34, %9
+ %36 = or i16 %35, %11
+ %37 = or i16 %36, %13
+ %38 = or i16 %37, %15
+ %39 = or i16 %38, %17
+ %40 = or i16 %39, %19
+ %41 = or i16 %40, %21
+ %42 = or i16 %41, %23
+ %43 = or i16 %42, %25
+ %44 = or i16 %43, %27
+ %45 = or i16 %44, %29
+ ret i16 %45
+} \ No newline at end of file
diff --git a/test/Transforms/InstCombine/blend_x86.ll b/test/Transforms/InstCombine/blend_x86.ll
index 778d44ba342c..eb0b8d7584ab 100644
--- a/test/Transforms/InstCombine/blend_x86.ll
+++ b/test/Transforms/InstCombine/blend_x86.ll
@@ -2,42 +2,118 @@
define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) {
; CHECK-LABEL: @constant_blendvpd
-; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x double> %ab, <2 x double> %xy
+; CHECK-NEXT: %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %ab, <2 x double> %xy
+; CHECK-NEXT: ret <2 x double> %1
%1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00>)
ret <2 x double> %1
}
+define <2 x double> @constant_blendvpd_zero(<2 x double> %xy, <2 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd_zero
+; CHECK-NEXT: ret <2 x double> %xy
+ %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> zeroinitializer)
+ ret <2 x double> %1
+}
+
+define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) {
+; CHECK-LABEL: @constant_blendvpd_dup
+; CHECK-NEXT: ret <2 x double> %xy
+ %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %xy, <2 x double> %sel)
+ ret <2 x double> %1
+}
+
define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) {
; CHECK-LABEL: @constant_blendvps
-; CHECK: select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %abcd, <4 x float> %xyzw
+; CHECK-NEXT: %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %abcd, <4 x float> %xyzw
+; CHECK-NEXT: ret <4 x float> %1
%1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
ret <4 x float> %1
}
+define <4 x float> @constant_blendvps_zero(<4 x float> %xyzw, <4 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps_zero
+; CHECK-NEXT: ret <4 x float> %xyzw
+ %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> zeroinitializer)
+ ret <4 x float> %1
+}
+
+define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) {
+; CHECK-LABEL: @constant_blendvps_dup
+; CHECK-NEXT: ret <4 x float> %xyzw
+ %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %xyzw, <4 x float> %sel)
+ ret <4 x float> %1
+}
+
define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
; CHECK-LABEL: @constant_pblendvb
-; CHECK: select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %abcd, <16 x i8> %xyzw
+; CHECK-NEXT: %1 = select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %abcd, <16 x i8> %xyzw
+; CHECK-NEXT: ret <16 x i8> %1
%1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
ret <16 x i8> %1
}
+define <16 x i8> @constant_pblendvb_zero(<16 x i8> %xyzw, <16 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb_zero
+; CHECK-NEXT: ret <16 x i8> %xyzw
+ %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> zeroinitializer)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @constant_pblendvb_dup(<16 x i8> %xyzw, <16 x i8> %sel) {
+; CHECK-LABEL: @constant_pblendvb_dup
+; CHECK-NEXT: ret <16 x i8> %xyzw
+ %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %xyzw, <16 x i8> %sel)
+ ret <16 x i8> %1
+}
+
define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
; CHECK-LABEL: @constant_blendvpd_avx
-; CHECK: select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %ab, <4 x double> %xy
+; CHECK-NEXT: %1 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %ab, <4 x double> %xy
+; CHECK-NEXT: ret <4 x double> %1
%1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00, double 0xFFFFFFFFE0000000, double 0.000000e+00>)
ret <4 x double> %1
}
+define <4 x double> @constant_blendvpd_avx_zero(<4 x double> %xy, <4 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd_avx_zero
+; CHECK-NEXT: ret <4 x double> %xy
+ %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> zeroinitializer)
+ ret <4 x double> %1
+}
+
+define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %sel) {
+; CHECK-LABEL: @constant_blendvpd_avx_dup
+; CHECK-NEXT: ret <4 x double> %xy
+ %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %xy, <4 x double> %sel)
+ ret <4 x double> %1
+}
+
define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
; CHECK-LABEL: @constant_blendvps_avx
-; CHECK: select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %abcd, <8 x float> %xyzw
+; CHECK-NEXT: %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %abcd, <8 x float> %xyzw
+; CHECK-NEXT: ret <8 x float> %1
%1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
ret <8 x float> %1
}
+define <8 x float> @constant_blendvps_avx_zero(<8 x float> %xyzw, <8 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps_avx_zero
+; CHECK-NEXT: ret <8 x float> %xyzw
+ %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> zeroinitializer)
+ ret <8 x float> %1
+}
+
+define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %sel) {
+; CHECK-LABEL: @constant_blendvps_avx_dup
+; CHECK-NEXT: ret <8 x float> %xyzw
+ %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %xyzw, <8 x float> %sel)
+ ret <8 x float> %1
+}
+
define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
; CHECK-LABEL: @constant_pblendvb_avx2
-; CHECK: select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %abcd, <32 x i8> %xyzw
+; CHECK-NEXT: %1 = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %abcd, <32 x i8> %xyzw
+; CHECK-NEXT: ret <32 x i8> %1
%1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd,
<32 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
@@ -46,6 +122,20 @@ define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
ret <32 x i8> %1
}
+define <32 x i8> @constant_pblendvb_avx2_zero(<32 x i8> %xyzw, <32 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb_avx2_zero
+; CHECK-NEXT: ret <32 x i8> %xyzw
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> zeroinitializer)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) {
+; CHECK-LABEL: @constant_pblendvb_avx2_dup
+; CHECK-NEXT: ret <32 x i8> %xyzw
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %xyzw, <32 x i8> %sel)
+ ret <32 x i8> %1
+}
+
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
diff --git a/test/Transforms/InstCombine/bswap-fold.ll b/test/Transforms/InstCombine/bswap-fold.ll
index 63b0775e4aff..edf9572f1e11 100644
--- a/test/Transforms/InstCombine/bswap-fold.ll
+++ b/test/Transforms/InstCombine/bswap-fold.ll
@@ -51,7 +51,7 @@ define i32 @test5(i32 %a) nounwind {
define i32 @test6(i32 %a) nounwind {
; CHECK-LABEL: @test6
; CHECK-NEXT: %tmp2 = lshr i32 %a, 24
-; CHECK-NEXT ret i32 %tmp4
+; CHECK-NEXT: ret i32 %tmp2
%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
%tmp4 = and i32 %tmp2, 255
ret i32 %tmp4
@@ -62,7 +62,7 @@ define i16 @test7(i32 %A) {
; CHECK-LABEL: @test7
; CHECK-NEXT: %1 = lshr i32 %A, 16
; CHECK-NEXT: %D = trunc i32 %1 to i16
-; CHECK-NEXT ret i16 %D
+; CHECK-NEXT: ret i16 %D
%B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind
%C = trunc i32 %B to i16
%D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
@@ -73,7 +73,7 @@ define i16 @test8(i64 %A) {
; CHECK-LABEL: @test8
; CHECK-NEXT: %1 = lshr i64 %A, 48
; CHECK-NEXT: %D = trunc i64 %1 to i16
-; CHECK-NEXT ret i16 %D
+; CHECK-NEXT: ret i16 %D
%B = tail call i64 @llvm.bswap.i64(i64 %A) nounwind
%C = trunc i64 %B to i16
%D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
diff --git a/test/Transforms/InstCombine/bswap-known-bits.ll b/test/Transforms/InstCombine/bswap-known-bits.ll
new file mode 100644
index 000000000000..1f3285af65cc
--- /dev/null
+++ b/test/Transforms/InstCombine/bswap-known-bits.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+; Note: This is testing functionality in computeKnownBits. I'd have rather
+; used instsimplify, but the bit test folding is apparently only in instcombine.
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+
+define i1 @test1(i16 %arg) {
+; CHECK-LABEL: @test1
+; CHECK: ret i1 true
+ %a = or i16 %arg, 511
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %and = and i16 %b, 256
+ %res = icmp eq i16 %and, 256
+ ret i1 %res
+}
+
+define i1 @test2(i16 %arg) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 true
+ %a = or i16 %arg, 1
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %and = and i16 %b, 256
+ %res = icmp eq i16 %and, 256
+ ret i1 %res
+}
+
+
+define i1 @test3(i16 %arg) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 true
+ %a = or i16 %arg, 256
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %and = and i16 %b, 1
+ %res = icmp eq i16 %and, 1
+ ret i1 %res
+}
+
+define i1 @test4(i32 %arg) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 true
+ %a = or i32 %arg, 2147483647 ; i32_MAX
+ %b = call i32 @llvm.bswap.i32(i32 %a)
+ %and = and i32 %b, 127
+ %res = icmp eq i32 %and, 127
+ ret i1 %res
+}
diff --git a/test/Transforms/InstCombine/bswap.ll b/test/Transforms/InstCombine/bswap.ll
index ba7df3125f4e..b48b2a57c8ce 100644
--- a/test/Transforms/InstCombine/bswap.ll
+++ b/test/Transforms/InstCombine/bswap.ll
@@ -1,7 +1,7 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
; RUN: opt < %s -instcombine -S | \
-; RUN: grep "call.*llvm.bswap" | count 6
+; RUN: grep "call.*llvm.bswap" | count 7
define i32 @test1(i32 %i) {
%tmp1 = lshr i32 %i, 24 ; <i32> [#uses=1]
@@ -72,3 +72,15 @@ define i32 @test6(i32 %x) nounwind readnone {
ret i32 %tmp7
}
+; PR23863
+define i32 @test7(i32 %x) {
+ %shl = shl i32 %x, 16
+ %shr = lshr i32 %x, 16
+ %or = or i32 %shl, %shr
+ %and2 = shl i32 %or, 8
+ %shl3 = and i32 %and2, -16711936
+ %and4 = lshr i32 %or, 8
+ %shr5 = and i32 %and4, 16711935
+ %or6 = or i32 %shl3, %shr5
+ ret i32 %or6
+}
diff --git a/test/Transforms/InstCombine/call_nonnull_arg.ll b/test/Transforms/InstCombine/call_nonnull_arg.ll
new file mode 100644
index 000000000000..b10411f622be
--- /dev/null
+++ b/test/Transforms/InstCombine/call_nonnull_arg.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; InstCombine should mark null-checked argument as nonnull at callsite
+declare void @dummy(i32*, i32)
+
+define void @test(i32* %a, i32 %b) {
+; CHECK-LABEL: @test
+; CHECK: call void @dummy(i32* nonnull %a, i32 %b)
+entry:
+ %cond1 = icmp eq i32* %a, null
+ br i1 %cond1, label %dead, label %not_null
+not_null:
+ %cond2 = icmp eq i32 %b, 0
+ br i1 %cond2, label %dead, label %not_zero
+not_zero:
+ call void @dummy(i32* %a, i32 %b)
+ ret void
+dead:
+ unreachable
+}
diff --git a/test/Transforms/InstCombine/cast-callee-deopt-bundles.ll b/test/Transforms/InstCombine/cast-callee-deopt-bundles.ll
new file mode 100644
index 000000000000..0f8601b855cf
--- /dev/null
+++ b/test/Transforms/InstCombine/cast-callee-deopt-bundles.ll
@@ -0,0 +1,11 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @foo(i32)
+
+define void @g() {
+; CHECK-LABEL: @g(
+ entry:
+; CHECK: call void @foo(i32 0) [ "deopt"() ]
+ call void bitcast (void (i32)* @foo to void ()*) () [ "deopt"() ]
+ ret void
+}
diff --git a/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll b/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
index 551d0efce5ea..2e87a7d78020 100644
--- a/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
+++ b/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
@@ -10,8 +10,8 @@ define i1 @i32_cast_cmp_oeq_int_0_uitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_oeq_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp oeq
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_oeq_int_n0_uitofp(i32 %i) {
%f = uitofp i32 %i to float
%cmp = fcmp oeq float %f, -0.0
@@ -28,8 +28,8 @@ define i1 @i32_cast_cmp_oeq_int_0_sitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_oeq_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp oeq
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_oeq_int_n0_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp oeq float %f, -0.0
@@ -46,8 +46,8 @@ define i1 @i32_cast_cmp_one_int_0_uitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_one_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp one
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_one_int_n0_uitofp(i32 %i) {
%f = uitofp i32 %i to float
%cmp = fcmp one float %f, -0.0
@@ -64,8 +64,8 @@ define i1 @i32_cast_cmp_one_int_0_sitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_one_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp one
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_one_int_n0_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp one float %f, -0.0
@@ -82,8 +82,8 @@ define i1 @i32_cast_cmp_ueq_int_0_uitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_ueq_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp ueq
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_ueq_int_n0_uitofp(i32 %i) {
%f = uitofp i32 %i to float
%cmp = fcmp ueq float %f, -0.0
@@ -100,8 +100,8 @@ define i1 @i32_cast_cmp_ueq_int_0_sitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_ueq_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp ueq
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_ueq_int_n0_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp ueq float %f, -0.0
@@ -118,8 +118,8 @@ define i1 @i32_cast_cmp_une_int_0_uitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_une_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp une
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_une_int_n0_uitofp(i32 %i) {
%f = uitofp i32 %i to float
%cmp = fcmp une float %f, -0.0
@@ -136,8 +136,8 @@ define i1 @i32_cast_cmp_une_int_0_sitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_une_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp une
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_une_int_n0_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp une float %f, -0.0
@@ -154,8 +154,8 @@ define i1 @i32_cast_cmp_ogt_int_0_uitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_ogt_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp ogt
+; CHECK: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_ogt_int_n0_uitofp(i32 %i) {
%f = uitofp i32 %i to float
%cmp = fcmp ogt float %f, -0.0
@@ -172,8 +172,8 @@ define i1 @i32_cast_cmp_ogt_int_0_sitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_ogt_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp ogt
+; CHECK: icmp sgt i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_ogt_int_n0_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp ogt float %f, -0.0
@@ -261,12 +261,13 @@ define i1 @i32_cast_cmp_oeq_int_0_uitofp_ppcf128(i32 %i) {
ret i1 %cmp
}
-; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp oeq
+; Since 0xFFFFFF fits in a float, and one less and
+; one more than it also fits without rounding, the
+; test can be optimized to an integer compare.
-; XCHECK: icmp eq i32 %i, 16777215
-; XCHECK-NEXT: ret
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_uitofp(
+; CHECK: icmp eq i32 %i, 16777215
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_oeq_int_i24max_uitofp(i32 %i) {
%f = uitofp i32 %i to float
%cmp = fcmp oeq float %f, 0x416FFFFFE0000000
@@ -274,17 +275,18 @@ define i1 @i32_cast_cmp_oeq_int_i24max_uitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp oeq
-
-; XCHECK: icmp eq i32 %i, 16777215
-; XCHECK-NEXT: ret
+; CHECK: icmp eq i32 %i, 16777215
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_oeq_int_i24max_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp oeq float %f, 0x416FFFFFE0000000
ret i1 %cmp
}
+; Though 0x1000000 fits in a float, one more than it
+; would round to it too, hence a single integer comparison
+; does not suffice.
+
; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24maxp1_uitofp(
; CHECK: uitofp
; CHECK: fcmp oeq
@@ -319,10 +321,18 @@ define i1 @i32_cast_cmp_oeq_int_i32umax_uitofp(i32 %i) {
ret i1 %cmp
}
+; 32-bit unsigned integer cannot possibly round up to 1<<33
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_big_uitofp(
+; CHECK-NEXT: ret i1 false
+define i1 @i32_cast_cmp_oeq_int_big_uitofp(i32 %i) {
+ %f = uitofp i32 %i to float
+ %cmp = fcmp oeq float %f, 0x4200000000000000
+ ret i1 %cmp
+}
+
+; 32-bit signed integer cannot possibly round up to 1<<32
; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32umax_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp oeq
-; CHECK-NEXT: ret
+; CHECK-NEXT: ret i1 false
define i1 @i32_cast_cmp_oeq_int_i32umax_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp oeq float %f, 0x41F0000000000000
@@ -379,10 +389,9 @@ define i1 @i32_cast_cmp_oeq_int_negi32umax_uitofp(i32 %i) {
ret i1 %cmp
}
+; 32-bit signed integer cannot possibly round to -1<<32
; CHECK-LABEL: @i32_cast_cmp_oeq_int_negi32umax_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp oeq
-; CHECK-NEXT: ret
+; CHECK-NEXT: ret i1 false
define i1 @i32_cast_cmp_oeq_int_negi32umax_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp oeq float %f, 0xC1F0000000000000
@@ -452,3 +461,30 @@ define i1 @i32_cast_cmp_une_half_sitofp(i32 %i) {
%cmp = fcmp une float %f, 0.5
ret i1 %cmp
}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_inf_uitofp(
+; CHECK-NEXT: ret i1 false
+define i1 @i32_cast_cmp_oeq_int_inf_uitofp(i32 %i) {
+ %f = uitofp i32 %i to float
+ %cmp = fcmp oeq float %f, 0x7FF0000000000000
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_inf_sitofp(
+; CHECK-NEXT: ret i1 false
+define i1 @i32_cast_cmp_oeq_int_inf_sitofp(i32 %i) {
+ %f = sitofp i32 %i to float
+ %cmp = fcmp oeq float %f, 0x7FF0000000000000
+ ret i1 %cmp
+}
+
+; An i128 could round to an IEEE single-precision infinity.
+; CHECK-LABEL: @i128_cast_cmp_oeq_int_inf_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i128_cast_cmp_oeq_int_inf_uitofp(i128 %i) {
+ %f = uitofp i128 %i to float
+ %cmp = fcmp oeq float %f, 0x7FF0000000000000
+ ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/cast-set.ll b/test/Transforms/InstCombine/cast-set.ll
index 47ba920d9286..8f19bdcdfde3 100644
--- a/test/Transforms/InstCombine/cast-set.ll
+++ b/test/Transforms/InstCombine/cast-set.ll
@@ -10,7 +10,7 @@ define i1 @test1(i32 %X) {
; Convert to setne int %X, 12
%c = icmp ne i32 %A, 12 ; <i1> [#uses=1]
ret i1 %c
-; CHECK-LABEL @test1(
+; CHECK-LABEL: @test1(
; CHECK: %c = icmp ne i32 %X, 12
; CHECK: ret i1 %c
}
@@ -21,7 +21,7 @@ define i1 @test2(i32 %X, i32 %Y) {
; Convert to setne int %X, %Y
%c = icmp ne i32 %A, %B ; <i1> [#uses=1]
ret i1 %c
-; CHECK-LABEL @test2(
+; CHECK-LABEL: @test2(
; CHECK: %c = icmp ne i32 %X, %Y
; CHECK: ret i1 %c
}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 7fe54ef8469b..016b6aa64558 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -187,8 +187,8 @@ define i32 @test21(i32 %X) {
%c2 = sext i8 %c1 to i32 ; <i32> [#uses=1]
%RV = and i32 %c2, 255 ; <i32> [#uses=1]
ret i32 %RV
-; CHECK: %c2.1 = and i32 %X, 255
-; CHECK: ret i32 %c2.1
+; CHECK: %c21 = and i32 %X, 255
+; CHECK: ret i32 %c21
}
define i32 @test22(i32 %X) {
@@ -722,7 +722,7 @@ define i1 @test67(i1 %a, i32 %b) {
; CHECK: ret i1 false
}
-%s = type { i32, i32, i32 }
+%s = type { i32, i32, i16 }
define %s @test68(%s *%p, i64 %i) {
; CHECK-LABEL: @test68(
@@ -1062,6 +1062,43 @@ define i8 @test85(i32 %a) {
; CHECK: [[CST:%.*]] = trunc i32 [[SHR]] to i8
}
+define i16 @test86(i16 %v) {
+ %a = sext i16 %v to i32
+ %s = ashr i32 %a, 4
+ %t = trunc i32 %s to i16
+ ret i16 %t
+
+; CHECK-LABEL: @test86(
+; CHECK: [[ASHR:%.*]] = ashr i16 %v, 4
+; CHECK-NEXT: ret i16 [[ASHR]]
+}
+
+define i16 @test87(i16 %v) {
+ %c = sext i16 %v to i32
+ %m = mul nsw i32 %c, 16
+ %a = ashr i32 %m, 16
+ %t = trunc i32 %a to i16
+ ret i16 %t
+
+; CHECK-LABEL: @test87(
+; CHECK: [[ASHR:%.*]] = ashr i16 %v, 12
+; CHECK-NEXT: ret i16 [[ASHR]]
+}
+
+define i16 @test88(i16 %v) {
+ %a = sext i16 %v to i32
+ %s = ashr i32 %a, 18
+ %t = trunc i32 %s to i16
+ ret i16 %t
+
+; Do not optimize to ashr i16 (shift by 18)
+; CHECK-LABEL: @test88(
+; CHECK: [[SEXT:%.*]] = sext i16 %v to i32
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[SEXT]], 18
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[ASHR]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+}
+
; Overflow on a float to int or int to float conversion is undefined (PR21130).
define i8 @overflow_fptosi() {
@@ -1137,3 +1174,14 @@ define i1 @PR23309v2(i32 %A, i32 %B) {
%trunc = trunc i32 %sub to i1
ret i1 %trunc
}
+
+define i16 @PR24763(i8 %V) {
+; CHECK-LABEL: @PR24763(
+; CHECK-NEXT: %[[sh:.*]] = ashr i8
+; CHECK-NEXT: %[[ext:.*]] = sext i8 %[[sh]] to i16
+; CHECK-NEXT: ret i16 %[[ext]]
+ %conv = sext i8 %V to i32
+ %l = lshr i32 %conv, 1
+ %t = trunc i32 %l to i16
+ ret i16 %t
+}
diff --git a/test/Transforms/InstCombine/compare-alloca.ll b/test/Transforms/InstCombine/compare-alloca.ll
new file mode 100644
index 000000000000..ca24da191779
--- /dev/null
+++ b/test/Transforms/InstCombine/compare-alloca.ll
@@ -0,0 +1,97 @@
+; RUN: opt -instcombine -S %s | FileCheck %s
+target datalayout = "p:32:32"
+
+
+define i1 @alloca_argument_compare(i64* %arg) {
+ %alloc = alloca i64
+ %cmp = icmp eq i64* %arg, %alloc
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare
+ ; CHECK: ret i1 false
+}
+
+define i1 @alloca_argument_compare_swapped(i64* %arg) {
+ %alloc = alloca i64
+ %cmp = icmp eq i64* %alloc, %arg
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare_swapped
+ ; CHECK: ret i1 false
+}
+
+define i1 @alloca_argument_compare_ne(i64* %arg) {
+ %alloc = alloca i64
+ %cmp = icmp ne i64* %arg, %alloc
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare_ne
+ ; CHECK: ret i1 true
+}
+
+define i1 @alloca_argument_compare_derived_ptrs(i64* %arg, i64 %x) {
+ %alloc = alloca i64, i64 8
+ %p = getelementptr i64, i64* %arg, i64 %x
+ %q = getelementptr i64, i64* %alloc, i64 3
+ %cmp = icmp eq i64* %p, %q
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare_derived_ptrs
+ ; CHECK: ret i1 false
+}
+
+declare void @escape(i64*)
+define i1 @alloca_argument_compare_escaped_alloca(i64* %arg) {
+ %alloc = alloca i64
+ call void @escape(i64* %alloc)
+ %cmp = icmp eq i64* %alloc, %arg
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare_escaped_alloca
+ ; CHECK: %cmp = icmp eq i64* %alloc, %arg
+ ; CHECK: ret i1 %cmp
+}
+
+declare void @check_compares(i1, i1)
+define void @alloca_argument_compare_two_compares(i64* %p) {
+ %q = alloca i64, i64 8
+ %r = getelementptr i64, i64* %p, i64 1
+ %s = getelementptr i64, i64* %q, i64 2
+ %cmp1 = icmp eq i64* %p, %q
+ %cmp2 = icmp eq i64* %r, %s
+ call void @check_compares(i1 %cmp1, i1 %cmp2)
+ ret void
+ ; We will only fold if there is a single cmp.
+ ; CHECK-LABEL: alloca_argument_compare_two_compares
+ ; CHECK: call void @check_compares(i1 %cmp1, i1 %cmp2)
+}
+
+define i1 @alloca_argument_compare_escaped_through_store(i64* %arg, i64** %ptr) {
+ %alloc = alloca i64
+ %cmp = icmp eq i64* %alloc, %arg
+ %p = getelementptr i64, i64* %alloc, i64 1
+ store i64* %p, i64** %ptr
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare_escaped_through_store
+ ; CHECK: %cmp = icmp eq i64* %alloc, %arg
+ ; CHECK: ret i1 %cmp
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+define i1 @alloca_argument_compare_benign_instrs(i8* %arg) {
+ %alloc = alloca i8
+ call void @llvm.lifetime.start(i64 1, i8* %alloc)
+ %cmp = icmp eq i8* %arg, %alloc
+ %x = load i8, i8* %arg
+ store i8 %x, i8* %alloc
+ call void @llvm.lifetime.end(i64 1, i8* %alloc)
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare_benign_instrs
+ ; CHECK: ret i1 false
+}
+
+declare i64* @allocator()
+define i1 @alloca_call_compare() {
+ %p = alloca i64
+ %q = call i64* @allocator()
+ %cmp = icmp eq i64* %p, %q
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_call_compare
+ ; CHECK: ret i1 false
+}
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll
index 62cd5b3f94d5..0ed0ac7d8d9c 100644
--- a/test/Transforms/InstCombine/compare-signs.ll
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -56,3 +56,43 @@ entry:
; CHECK-NOT: zext
; CHECK: ret i32 %2
}
+
+define i1 @test4a(i32 %a) {
+; CHECK-LABEL: @test4a(
+ entry:
+; CHECK: %c = icmp slt i32 %a, 1
+; CHECK-NEXT: ret i1 %c
+ %l = ashr i32 %a, 31
+ %na = sub i32 0, %a
+ %r = lshr i32 %na, 31
+ %signum = or i32 %l, %r
+ %c = icmp slt i32 %signum, 1
+ ret i1 %c
+}
+
+define i1 @test4b(i64 %a) {
+; CHECK-LABEL: @test4b(
+ entry:
+; CHECK: %c = icmp slt i64 %a, 1
+; CHECK-NEXT: ret i1 %c
+ %l = ashr i64 %a, 63
+ %na = sub i64 0, %a
+ %r = lshr i64 %na, 63
+ %signum = or i64 %l, %r
+ %c = icmp slt i64 %signum, 1
+ ret i1 %c
+}
+
+define i1 @test4c(i64 %a) {
+; CHECK-LABEL: @test4c(
+ entry:
+; CHECK: %c = icmp slt i64 %a, 1
+; CHECK-NEXT: ret i1 %c
+ %l = ashr i64 %a, 63
+ %na = sub i64 0, %a
+ %r = lshr i64 %na, 63
+ %signum = or i64 %l, %r
+ %signum.trunc = trunc i64 %signum to i32
+ %c = icmp slt i32 %signum.trunc, 1
+ ret i1 %c
+}
diff --git a/test/Transforms/InstCombine/constant-fold-alias.ll b/test/Transforms/InstCombine/constant-fold-alias.ll
index c872f57c37e1..810687255f61 100644
--- a/test/Transforms/InstCombine/constant-fold-alias.ll
+++ b/test/Transforms/InstCombine/constant-fold-alias.ll
@@ -6,8 +6,8 @@ target datalayout = "e-p1:16:16-p2:32:32-p3:64:64"
@G2 = global i32 42
@G3 = global [4 x i8] zeroinitializer, align 1
-@A1 = alias bitcast (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 2) to i32*)
-@A2 = alias inttoptr (i64 and (i64 ptrtoint (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 3) to i64), i64 -4) to i32*)
+@A1 = alias i32, bitcast (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 2) to i32*)
+@A2 = alias i32, inttoptr (i64 and (i64 ptrtoint (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 3) to i64), i64 -4) to i32*)
define i64 @f1() {
; This cannot be constant folded because G1 is underaligned.
diff --git a/test/Transforms/InstCombine/ctpop.ll b/test/Transforms/InstCombine/ctpop.ll
new file mode 100644
index 000000000000..38612c92aaa4
--- /dev/null
+++ b/test/Transforms/InstCombine/ctpop.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+
+declare i32 @llvm.ctpop.i32(i32)
+declare i8 @llvm.ctpop.i8(i8)
+declare void @llvm.assume(i1)
+
+define i1 @test1(i32 %arg) {
+; CHECK: @test1
+; CHECK: ret i1 false
+ %and = and i32 %arg, 15
+ %cnt = call i32 @llvm.ctpop.i32(i32 %and)
+ %res = icmp eq i32 %cnt, 9
+ ret i1 %res
+}
+
+define i1 @test2(i32 %arg) {
+; CHECK: @test2
+; CHECK: ret i1 false
+ %and = and i32 %arg, 1
+ %cnt = call i32 @llvm.ctpop.i32(i32 %and)
+ %res = icmp eq i32 %cnt, 2
+ ret i1 %res
+}
+
+define i1 @test3(i32 %arg) {
+; CHECK: @test3
+; CHECK: ret i1 false
+ ;; Use an assume to make all the bits known without triggering constant
+ ;; folding. This is trying to hit a corner case where we have to avoid
+ ;; taking the log of 0.
+ %assume = icmp eq i32 %arg, 0
+ call void @llvm.assume(i1 %assume)
+ %cnt = call i32 @llvm.ctpop.i32(i32 %arg)
+ %res = icmp eq i32 %cnt, 2
+ ret i1 %res
+}
+
+; Negative test for when we know nothing
+define i1 @test4(i8 %arg) {
+; CHECK: @test4
+; CHECK: ret i1 %res
+ %cnt = call i8 @llvm.ctpop.i8(i8 %arg)
+ %res = icmp eq i8 %cnt, 2
+ ret i1 %res
+}
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index 823ec98ebe2b..4b1db9db353b 100644
--- a/test/Transforms/InstCombine/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -3,7 +3,7 @@
@.str = private constant [3 x i8] c"%c\00"
-define void @foo() nounwind ssp {
+define void @foo() nounwind ssp !dbg !0 {
;CHECK: call i32 @putchar{{.+}} !dbg
%1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 97), !dbg !5
ret void, !dbg !7
@@ -15,9 +15,9 @@ declare i32 @printf(i8*, ...)
!llvm.module.flags = !{!10}
!llvm.dbg.sp = !{!0}
-!0 = !DISubprogram(name: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3, function: void ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3)
!1 = !DIFile(filename: "m.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !DILocation(line: 5, column: 2, scope: !6)
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index 3875bcc9b8c6..9c8b2a8e4154 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -6,7 +6,7 @@ declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
declare i8* @foo(i8*, i32, i64, i64) nounwind
-define hidden i8* @foobar(i8* %__dest, i32 %__val, i64 %__len) nounwind inlinehint ssp {
+define hidden i8* @foobar(i8* %__dest, i32 %__val, i64 %__len) nounwind inlinehint ssp !dbg !1 {
entry:
%__dest.addr = alloca i8*, align 8
%__val.addr = alloca i32, align 4
@@ -31,16 +31,16 @@ entry:
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!30}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "__dest", line: 78, arg: 1, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "foobar", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 79, file: !27, scope: !2, type: !4, function: i8* (i8*, i32, i64)* @foobar, variables: !25)
+!0 = !DILocalVariable(name: "__dest", line: 78, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "foobar", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 79, file: !27, scope: !2, type: !4, variables: !25)
!2 = !DIFile(filename: "string.h", directory: "Game")
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 127710)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29, subprograms: !24)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 127710)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29, subprograms: !24)
!4 = !DISubroutineType(types: !5)
!5 = !{!6}
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !3, baseType: null)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "__val", line: 78, arg: 2, scope: !1, file: !2, type: !8)
+!7 = !DILocalVariable(name: "__val", line: 78, arg: 2, scope: !1, file: !2, type: !8)
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "__len", line: 78, arg: 3, scope: !1, file: !2, type: !10)
+!9 = !DILocalVariable(name: "__len", line: 78, arg: 3, scope: !1, file: !2, type: !10)
!10 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", line: 80, file: !27, scope: !3, baseType: !11)
!11 = !DIDerivedType(tag: DW_TAG_typedef, name: "__darwin_size_t", line: 90, file: !27, scope: !3, baseType: !12)
!12 = !DIBasicType(tag: DW_TAG_base_type, name: "long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
diff --git a/test/Transforms/InstCombine/demorgan-zext.ll b/test/Transforms/InstCombine/demorgan-zext.ll
new file mode 100644
index 000000000000..da41fac3e350
--- /dev/null
+++ b/test/Transforms/InstCombine/demorgan-zext.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR22723: Recognize De Morgan's Laws when obfuscated by zexts.
+
+define i32 @demorgan_or(i1 %X, i1 %Y) {
+ %zextX = zext i1 %X to i32
+ %zextY = zext i1 %Y to i32
+ %notX = xor i32 %zextX, 1
+ %notY = xor i32 %zextY, 1
+ %or = or i32 %notX, %notY
+ ret i32 %or
+
+; CHECK-LABEL: demorgan_or(
+; CHECK-NEXT: %[[AND:.*]] = and i1 %X, %Y
+; CHECK-NEXT: %[[ZEXT:.*]] = zext i1 %[[AND]] to i32
+; CHECK-NEXT: %[[XOR:.*]] = xor i32 %[[ZEXT]], 1
+; CHECK-NEXT: ret i32 %[[XOR]]
+}
+
+define i32 @demorgan_and(i1 %X, i1 %Y) {
+ %zextX = zext i1 %X to i32
+ %zextY = zext i1 %Y to i32
+ %notX = xor i32 %zextX, 1
+ %notY = xor i32 %zextY, 1
+ %and = and i32 %notX, %notY
+ ret i32 %and
+
+; CHECK-LABEL: demorgan_and(
+; CHECK-NEXT: %[[OR:.*]] = or i1 %X, %Y
+; CHECK-NEXT: %[[ZEXT:.*]] = zext i1 %[[OR]] to i32
+; CHECK-NEXT: %[[XOR:.*]] = xor i32 %[[ZEXT]], 1
+; CHECK-NEXT: ret i32 %[[XOR]]
+}
+
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index e0ff07baae7c..27a316113e52 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -163,7 +163,7 @@ define i32 @test19(i32 %x) {
; CHECK-LABEL: @test19(
; CHECK-NEXT: icmp eq i32 %x, 1
; CHECK-NEXT: zext i1 %{{.*}} to i32
-; CHECK-NEXT ret i32
+; CHECK-NEXT: ret i32
}
define i32 @test20(i32 %x) {
@@ -270,9 +270,7 @@ define <2 x i32> @test31(<2 x i32> %x) {
%div = udiv <2 x i32> %shr, <i32 2147483647, i32 2147483647>
ret <2 x i32> %div
; CHECK-LABEL: @test31(
-; CHECK-NEXT: %[[shr:.*]] = lshr <2 x i32> %x, <i32 31, i32 31>
-; CHECK-NEXT: udiv <2 x i32> %[[shr]], <i32 2147483647, i32 2147483647>
-; CHECK-NEXT: ret <2 x i32>
+; CHECK-NEXT: ret <2 x i32> zeroinitializer
}
define i32 @test32(i32 %a, i32 %b) {
@@ -325,3 +323,21 @@ define i32 @test36(i32 %A) {
; CHECK-NEXT: %[[shr:.*]] = lshr exact i32 %[[and]], %A
; CHECK-NEXT: ret i32 %[[shr]]
}
+
+define i32 @test37(i32* %b) {
+entry:
+ store i32 0, i32* %b, align 4
+ %0 = load i32, i32* %b, align 4
+ br i1 undef, label %lor.rhs, label %lor.end
+
+lor.rhs: ; preds = %entry
+ %mul = mul nsw i32 undef, %0
+ br label %lor.end
+
+lor.end: ; preds = %lor.rhs, %entry
+ %t.0 = phi i32 [ %0, %entry ], [ %mul, %lor.rhs ]
+ %div = sdiv i32 %t.0, 2
+ ret i32 %div
+; CHECK-LABEL: @test37(
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/InstCombine/exp2-1.ll b/test/Transforms/InstCombine/exp2-1.ll
index 8e6a0e0d93f6..b6a56b9a9a7e 100644
--- a/test/Transforms/InstCombine/exp2-1.ll
+++ b/test/Transforms/InstCombine/exp2-1.ll
@@ -1,7 +1,8 @@
; Test that the exp2 library call simplifier works correctly.
;
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -instcombine -S -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN
+; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=CHECK -check-prefix=INTRINSIC -check-prefix=LDEXP -check-prefix=LDEXPF
+; RUN: opt < %s -instcombine -S -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=INTRINSIC -check-prefix=LDEXP -check-prefix=NOLDEXPF
+; RUN: opt < %s -instcombine -S -mtriple=amdgcn-unknown-unknown | FileCheck %s -check-prefix=INTRINSIC -check-prefix=NOLDEXP -check-prefix=NOLDEXPF
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
@@ -80,21 +81,19 @@ declare double @llvm.exp2.f64(double)
declare float @llvm.exp2.f32(float)
define double @test_simplify9(i8 zeroext %x) {
-; CHECK-LABEL: @test_simplify9(
-; CHECK-WIN-LABEL: @test_simplify9(
+; INTRINSIC-LABEL: @test_simplify9(
%conv = uitofp i8 %x to double
%ret = call double @llvm.exp2.f64(double %conv)
-; CHECK: call double @ldexp
-; CHECK-WIN: call double @ldexp
+; LDEXP: call double @ldexp
+; NOLDEXP-NOT: call double @ldexp
ret double %ret
}
define float @test_simplify10(i8 zeroext %x) {
-; CHECK-LABEL: @test_simplify10(
-; CHECK-WIN-LABEL: @test_simplify10(
+; INTRINSIC-LABEL: @test_simplify10(
%conv = uitofp i8 %x to float
%ret = call float @llvm.exp2.f32(float %conv)
-; CHECK: call float @ldexpf
-; CHECK-WIN-NOT: call float @ldexpf
+; LDEXPF: call float @ldexpf
+; NOLDEXPF-NOT: call float @ldexpf
ret float %ret
}
diff --git a/test/Transforms/InstCombine/extractvalue.ll b/test/Transforms/InstCombine/extractvalue.ll
index 6319590873a2..9c293581a069 100644
--- a/test/Transforms/InstCombine/extractvalue.ll
+++ b/test/Transforms/InstCombine/extractvalue.ll
@@ -48,16 +48,16 @@ define i32 @foo(i32 %a, i32 %b) {
; CHECK: call {{.*}}(i32 [[LOAD]])
; CHECK-NOT: extractvalue
; CHECK: ret i32 [[LOAD]]
-define i32 @extract2gep({i32, i32}* %pair, i32* %P) {
+define i32 @extract2gep({i16, i32}* %pair, i32* %P) {
; The load + extractvalue should be converted
; to an inbounds gep + smaller load.
; The new load should be in the same spot as the old load.
- %L = load {i32, i32}, {i32, i32}* %pair
+ %L = load {i16, i32}, {i16, i32}* %pair
store i32 0, i32* %P
br label %loop
loop:
- %E = extractvalue {i32, i32} %L, 1
+ %E = extractvalue {i16, i32} %L, 1
%C = call i32 @baz(i32 %E)
store i32 %C, i32* %P
%cond = icmp eq i32 %C, 0
@@ -67,17 +67,17 @@ end:
ret i32 %E
}
-; CHECK-LABEL: define i32 @doubleextract2gep(
+; CHECK-LABEL: define i16 @doubleextract2gep(
; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}, {{.*}}* %arg, i64 0, i32 1, i32 1
-; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32, i32* [[GEP]]
-; CHECK-NEXT: ret i32 [[LOAD]]
-define i32 @doubleextract2gep({i32, {i32, i32}}* %arg) {
+; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i16, i16* [[GEP]]
+; CHECK-NEXT: ret i16 [[LOAD]]
+define i16 @doubleextract2gep({i16, {i32, i16}}* %arg) {
; The load + extractvalues should be converted
; to a 3-index inbounds gep + smaller load.
- %L = load {i32, {i32, i32}}, {i32, {i32, i32}}* %arg
- %E1 = extractvalue {i32, {i32, i32}} %L, 1
- %E2 = extractvalue {i32, i32} %E1, 1
- ret i32 %E2
+ %L = load {i16, {i32, i16}}, {i16, {i32, i16}}* %arg
+ %E1 = extractvalue {i16, {i32, i16}} %L, 1
+ %E2 = extractvalue {i32, i16} %E1, 1
+ ret i16 %E2
}
; CHECK: define i32 @nogep-multiuse
diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll
index 0479549bea3f..941270df0e97 100644
--- a/test/Transforms/InstCombine/fabs.ll
+++ b/test/Transforms/InstCombine/fabs.ll
@@ -41,6 +41,7 @@ define fp128 @square_fabs_call_f128(fp128 %x) {
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare fp128 @llvm.fabs.f128(fp128)
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
define float @square_fabs_intrinsic_f32(float %x) {
%mul = fmul float %x, %x
@@ -98,3 +99,27 @@ define float @square_fabs_shrink_call2(float %x) {
; CHECK-NEXT: ret float %sq
}
+; A scalar fabs op makes the sign bit zero, so masking off all of the other bits means we can return zero.
+
+define i32 @fabs_value_tracking_f32(float %x) {
+ %call = call float @llvm.fabs.f32(float %x)
+ %bc = bitcast float %call to i32
+ %and = and i32 %bc, 2147483648
+ ret i32 %and
+
+; CHECK-LABEL: fabs_value_tracking_f32(
+; CHECK: ret i32 0
+}
+
+; TODO: A vector fabs op makes the sign bits zero, so masking off all of the other bits means we can return zero.
+
+define <4 x i32> @fabs_value_tracking_v4f32(<4 x float> %x) {
+ %call = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
+ %bc = bitcast <4 x float> %call to <4 x i32>
+ %and = and <4 x i32> %bc, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
+ ret <4 x i32> %and
+
+; CHECK-LABEL: fabs_value_tracking_v4f32(
+; CHECK: ret <4 x i32> %and
+}
+
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index 4eebdbdfacf1..fd563481b3ed 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -570,7 +570,7 @@ define double @sqrt_intrinsic_arg_squared(double %x) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_arg_squared(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
; CHECK-NEXT: ret double %fabs
}
@@ -584,8 +584,8 @@ define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args1(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -597,8 +597,8 @@ define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args2(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -610,8 +610,8 @@ define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args3(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -623,8 +623,8 @@ define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args4(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -636,8 +636,8 @@ define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args5(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -649,8 +649,8 @@ define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args6(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -675,7 +675,7 @@ define double @sqrt_intrinsic_arg_5th(double %x) #0 {
; CHECK-LABEL: sqrt_intrinsic_arg_5th(
; CHECK-NEXT: %mul = fmul fast double %x, %x
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x)
; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -692,7 +692,7 @@ define float @sqrt_call_squared_f32(float %x) #0 {
ret float %sqrt
; CHECK-LABEL: sqrt_call_squared_f32(
-; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: %fabs = call fast float @llvm.fabs.f32(float %x)
; CHECK-NEXT: ret float %fabs
}
@@ -702,7 +702,7 @@ define double @sqrt_call_squared_f64(double %x) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_call_squared_f64(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
; CHECK-NEXT: ret double %fabs
}
@@ -712,7 +712,114 @@ define fp128 @sqrt_call_squared_f128(fp128 %x) #0 {
ret fp128 %sqrt
; CHECK-LABEL: sqrt_call_squared_f128(
-; CHECK-NEXT: %fabs = call fp128 @llvm.fabs.f128(fp128 %x)
+; CHECK-NEXT: %fabs = call fast fp128 @llvm.fabs.f128(fp128 %x)
; CHECK-NEXT: ret fp128 %fabs
}
+; =========================================================================
+;
+; Test-cases for fmin / fmax
+;
+; =========================================================================
+
+declare double @fmax(double, double)
+declare double @fmin(double, double)
+declare float @fmaxf(float, float)
+declare float @fminf(float, float)
+declare fp128 @fmaxl(fp128, fp128)
+declare fp128 @fminl(fp128, fp128)
+
+; No NaNs is the minimum requirement to replace these calls.
+; This should always be set when unsafe-fp-math is true, but
+; alternate the attributes for additional test coverage.
+; 'nsz' is implied by the definition of fmax or fmin itself.
+attributes #1 = { "no-nans-fp-math" = "true" }
+
+; Shrink and remove the call.
+define float @max1(float %a, float %b) #0 {
+ %c = fpext float %a to double
+ %d = fpext float %b to double
+ %e = call double @fmax(double %c, double %d)
+ %f = fptrunc double %e to float
+ ret float %f
+
+; CHECK-LABEL: max1(
+; CHECK-NEXT: fcmp fast ogt float %a, %b
+; CHECK-NEXT: select {{.*}} float %a, float %b
+; CHECK-NEXT: ret
+}
+
+define float @max2(float %a, float %b) #1 {
+ %c = call float @fmaxf(float %a, float %b)
+ ret float %c
+
+; CHECK-LABEL: max2(
+; CHECK-NEXT: fcmp nnan nsz ogt float %a, %b
+; CHECK-NEXT: select {{.*}} float %a, float %b
+; CHECK-NEXT: ret
+}
+
+
+define double @max3(double %a, double %b) #0 {
+ %c = call double @fmax(double %a, double %b)
+ ret double %c
+
+; CHECK-LABEL: max3(
+; CHECK-NEXT: fcmp fast ogt double %a, %b
+; CHECK-NEXT: select {{.*}} double %a, double %b
+; CHECK-NEXT: ret
+}
+
+define fp128 @max4(fp128 %a, fp128 %b) #1 {
+ %c = call fp128 @fmaxl(fp128 %a, fp128 %b)
+ ret fp128 %c
+
+; CHECK-LABEL: max4(
+; CHECK-NEXT: fcmp nnan nsz ogt fp128 %a, %b
+; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b
+; CHECK-NEXT: ret
+}
+
+; Shrink and remove the call.
+define float @min1(float %a, float %b) #1 {
+ %c = fpext float %a to double
+ %d = fpext float %b to double
+ %e = call double @fmin(double %c, double %d)
+ %f = fptrunc double %e to float
+ ret float %f
+
+; CHECK-LABEL: min1(
+; CHECK-NEXT: fcmp nnan nsz olt float %a, %b
+; CHECK-NEXT: select {{.*}} float %a, float %b
+; CHECK-NEXT: ret
+}
+
+define float @min2(float %a, float %b) #0 {
+ %c = call float @fminf(float %a, float %b)
+ ret float %c
+
+; CHECK-LABEL: min2(
+; CHECK-NEXT: fcmp fast olt float %a, %b
+; CHECK-NEXT: select {{.*}} float %a, float %b
+; CHECK-NEXT: ret
+}
+
+define double @min3(double %a, double %b) #1 {
+ %c = call double @fmin(double %a, double %b)
+ ret double %c
+
+; CHECK-LABEL: min3(
+; CHECK-NEXT: fcmp nnan nsz olt double %a, %b
+; CHECK-NEXT: select {{.*}} double %a, double %b
+; CHECK-NEXT: ret
+}
+
+define fp128 @min4(fp128 %a, fp128 %b) #0 {
+ %c = call fp128 @fminl(fp128 %a, fp128 %b)
+ ret fp128 %c
+
+; CHECK-LABEL: min4(
+; CHECK-NEXT: fcmp fast olt fp128 %a, %b
+; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
index c8763dc199a9..d27fb5d89f09 100644
--- a/test/Transforms/InstCombine/ffs-1.ll
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -1,9 +1,12 @@
; Test that the ffs* library call simplifier works correctly.
;
; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=CHECK-LINUX
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=CHECK-FFS
+; RUN: opt -instcombine -mtriple=arm64-apple-ios9.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s
+; RUN: opt -instcombine -mtriple=arm64-apple-tvos9.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s
+; RUN: opt -instcombine -mtriple=thumbv7k-apple-watchos2.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s
+; RUN: opt -instcombine -mtriple=x86_64-apple-macosx10.11 -S %s | FileCheck --check-prefix=CHECK-FFS %s
+; RUN: opt -instcombine -mtriple=x86_64-freebsd-gnu -S %s | FileCheck --check-prefix=CHECK-FFS %s
declare i32 @ffs(i32)
declare i32 @ffsl(i32)
@@ -19,17 +22,17 @@ define i32 @test_simplify1() {
}
define i32 @test_simplify2() {
-; CHECK-LINUX-LABEL: @test_simplify2(
+; CHECK-FFS-LABEL: @test_simplify2(
%ret = call i32 @ffsl(i32 0)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 0
+; CHECK-FFS-NEXT: ret i32 0
}
define i32 @test_simplify3() {
-; CHECK-LINUX-LABEL: @test_simplify3(
+; CHECK-FFS-LABEL: @test_simplify3(
%ret = call i32 @ffsll(i64 0)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 0
+; CHECK-FFS-NEXT: ret i32 0
}
; Check ffs(c) -> cttz(c) + 1, where 'c' is a constant.
@@ -56,45 +59,45 @@ define i32 @test_simplify6() {
}
define i32 @test_simplify7() {
-; CHECK-LINUX-LABEL: @test_simplify7(
+; CHECK-FFS-LABEL: @test_simplify7(
%ret = call i32 @ffsl(i32 65536)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 17
+; CHECK-FFS-NEXT: ret i32 17
}
define i32 @test_simplify8() {
-; CHECK-LINUX-LABEL: @test_simplify8(
+; CHECK-FFS-LABEL: @test_simplify8(
%ret = call i32 @ffsll(i64 1024)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 11
+; CHECK-FFS-NEXT: ret i32 11
}
define i32 @test_simplify9() {
-; CHECK-LINUX-LABEL: @test_simplify9(
+; CHECK-FFS-LABEL: @test_simplify9(
%ret = call i32 @ffsll(i64 65536)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 17
+; CHECK-FFS-NEXT: ret i32 17
}
define i32 @test_simplify10() {
-; CHECK-LINUX-LABEL: @test_simplify10(
+; CHECK-FFS-LABEL: @test_simplify10(
%ret = call i32 @ffsll(i64 17179869184)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 35
+; CHECK-FFS-NEXT: ret i32 35
}
define i32 @test_simplify11() {
-; CHECK-LINUX-LABEL: @test_simplify11(
+; CHECK-FFS-LABEL: @test_simplify11(
%ret = call i32 @ffsll(i64 281474976710656)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 49
+; CHECK-FFS-NEXT: ret i32 49
}
define i32 @test_simplify12() {
-; CHECK-LINUX-LABEL: @test_simplify12(
+; CHECK-FFS-LABEL: @test_simplify12(
%ret = call i32 @ffsll(i64 1152921504606846976)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 61
+; CHECK-FFS-NEXT: ret i32 61
}
; Check ffs(x) -> x != 0 ? (i32)llvm.cttz(x) + 1 : 0.
@@ -102,7 +105,7 @@ define i32 @test_simplify12() {
define i32 @test_simplify13(i32 %x) {
; CHECK-LABEL: @test_simplify13(
%ret = call i32 @ffs(i32 %x)
-; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
@@ -111,24 +114,24 @@ define i32 @test_simplify13(i32 %x) {
}
define i32 @test_simplify14(i32 %x) {
-; CHECK-LINUX-LABEL: @test_simplify14(
+; CHECK-FFS-LABEL: @test_simplify14(
%ret = call i32 @ffsl(i32 %x)
-; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
-; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
-; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
-; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
+; CHECK-FFS-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; CHECK-FFS-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
+; CHECK-FFS-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
+; CHECK-FFS-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 [[RET]]
+; CHECK-FFS-NEXT: ret i32 [[RET]]
}
define i32 @test_simplify15(i64 %x) {
-; CHECK-LINUX-LABEL: @test_simplify15(
+; CHECK-FFS-LABEL: @test_simplify15(
%ret = call i32 @ffsll(i64 %x)
-; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false)
-; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1
-; CHECK-LINUX-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
-; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
-; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0
+; CHECK-FFS-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; CHECK-FFS-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1
+; CHECK-FFS-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
+; CHECK-FFS-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
+; CHECK-FFS-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 [[RET]]
+; CHECK-FFS-NEXT: ret i32 [[RET]]
}
diff --git a/test/Transforms/InstCombine/fold-phi-load-metadata.ll b/test/Transforms/InstCombine/fold-phi-load-metadata.ll
new file mode 100644
index 000000000000..7fa26b46e25d
--- /dev/null
+++ b/test/Transforms/InstCombine/fold-phi-load-metadata.ll
@@ -0,0 +1,69 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+@g1 = common global i32* null, align 8
+
+%struct.S1 = type { i32, float }
+%struct.S2 = type { float, i32 }
+
+; Check that instcombine preserves metadata when it merges two loads.
+;
+; CHECK: return:
+; CHECK: load i32*, i32** %{{[a-z0-9.]+}}, align 8, !nonnull ![[EMPTYNODE:[0-9]+]]
+; CHECK: load i32, i32* %{{[a-z0-9.]+}}, align 4, !tbaa ![[TBAA:[0-9]+]], !range ![[RANGE:[0-9]+]], !invariant.load ![[EMPTYNODE:[0-9]+]], !alias.scope ![[ALIAS_SCOPE:[0-9]+]], !noalias ![[NOALIAS:[0-9]+]]
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @phi_load_metadata(%struct.S1* %s1, %struct.S2* %s2, i32 %c, i32** %x0, i32 **%x1) #0 {
+entry:
+ %tobool = icmp eq i32 %c, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %i = getelementptr inbounds %struct.S2, %struct.S2* %s2, i64 0, i32 1
+ %val = load i32, i32* %i, align 4, !tbaa !0, !alias.scope !13, !noalias !14, !invariant.load !17, !range !18
+ %p0 = load i32*, i32** %x0, align 8, !nonnull !17
+ br label %return
+
+if.end: ; preds = %entry
+ %i2 = getelementptr inbounds %struct.S1, %struct.S1* %s1, i64 0, i32 0
+ %val2 = load i32, i32* %i2, align 4, !tbaa !2, !alias.scope !15, !noalias !16, !invariant.load !17, !range !19
+ %p1 = load i32*, i32** %x1, align 8, !nonnull !17
+ br label %return
+
+return: ; preds = %if.end, %if.then
+ %retval = phi i32 [ %val, %if.then ], [ %val2, %if.end ]
+ %pval = phi i32* [ %p0, %if.then ], [ %p1, %if.end ]
+ store i32* %pval, i32** @g1, align 8
+ ret i32 %retval
+}
+
+; CHECK: ![[EMPTYNODE]] = !{}
+; CHECK: ![[TBAA]] = !{![[TAG1:[0-9]+]], ![[TAG1]], i64 0}
+; CHECK: ![[TAG1]] = !{!"int", !{{[0-9]+}}, i64 0}
+; CHECK: ![[RANGE]] = !{i32 10, i32 25}
+; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE1:[0-9]+]], ![[SCOPE2:[0-9]+]]}
+; CHECK: ![[SCOPE0]] = distinct !{![[SCOPE0]], !{{[0-9]+}}, !"scope0"}
+; CHECK: ![[SCOPE1]] = distinct !{![[SCOPE1]], !{{[0-9]+}}, !"scope1"}
+; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"}
+; CHECK: ![[NOALIAS]] = !{![[SCOPE3:[0-9]+]]}
+; CHECK: ![[SCOPE3]] = distinct !{![[SCOPE3]], !{{[0-9]+}}, !"scope3"}
+
+!0 = !{!1, !4, i64 4}
+!1 = !{!"", !7, i64 0, !4, i64 4}
+!2 = !{!3, !4, i64 0}
+!3 = !{!"", !4, i64 0, !7, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"float", !5, i64 0}
+!8 = !{!8, !"some domain"}
+!9 = !{!9, !8, !"scope0"}
+!10 = !{!10, !8, !"scope1"}
+!11 = !{!11, !8, !"scope2"}
+!12 = !{!12, !8, !"scope3"}
+!13 = !{!9, !10}
+!14 = !{!11, !12}
+!15 = !{!9, !11}
+!16 = !{!10, !12}
+!17 = !{}
+!18 = !{i32 10, i32 20}
+!19 = !{i32 15, i32 25}
diff --git a/test/Transforms/InstCombine/gc.relocate.ll b/test/Transforms/InstCombine/gc.relocate.ll
index a51aac10eb57..308258a19417 100644
--- a/test/Transforms/InstCombine/gc.relocate.ll
+++ b/test/Transforms/InstCombine/gc.relocate.ll
@@ -6,8 +6,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
; then the return attribute of gc.relocate is dereferenceable(N).
declare zeroext i1 @return_i1()
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
define i32 addrspace(1)* @deref(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-example" {
; Checks that a dereferenceabler pointer
@@ -15,7 +15,38 @@ define i32 addrspace(1)* @deref(i32 addrspace(1)* dereferenceable(8) %dparam) gc
; CHECK: call dereferenceable(8)
entry:
%load = load i32, i32 addrspace(1)* %dparam
- %tok = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
- %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %tok, i32 7, i32 7)
+ %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+ %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 7, i32 7)
ret i32 addrspace(1)* %relocate
}
+
+define i32 @explicit_nonnull(i32 addrspace(1)* nonnull %dparam) gc "statepoint-example" {
+; Checks that a nonnull pointer
+; CHECK-LABEL: @explicit_nonnull
+; CHECK: ret i32 1
+entry:
+ %load = load i32, i32 addrspace(1)* %dparam
+ %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+ %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 7, i32 7)
+ %cmp = icmp eq i32 addrspace(1)* %relocate, null
+ %ret_val = select i1 %cmp, i32 0, i32 1
+ ret i32 %ret_val
+}
+
+define i32 @implicit_nonnull(i32 addrspace(1)* %dparam) gc "statepoint-example" {
+; Checks that a nonnull pointer
+; CHECK-LABEL: @implicit_nonnull
+; CHECK: ret i32 1
+entry:
+ %cond = icmp eq i32 addrspace(1)* %dparam, null
+ br i1 %cond, label %no_gc, label %gc
+gc:
+ %load = load i32, i32 addrspace(1)* %dparam
+ %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+ %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 7, i32 7)
+ %cmp = icmp eq i32 addrspace(1)* %relocate, null
+ %ret_val = select i1 %cmp, i32 0, i32 1
+ ret i32 %ret_val
+no_gc:
+ unreachable
+}
diff --git a/test/Transforms/InstCombine/gepphigep.ll b/test/Transforms/InstCombine/gepphigep.ll
index b98ea4cd1159..cc90d714be73 100644
--- a/test/Transforms/InstCombine/gepphigep.ll
+++ b/test/Transforms/InstCombine/gepphigep.ll
@@ -134,3 +134,53 @@ exit:
; CHECK: getelementptr{{.*}}i64 1
; CHECK: exit:
}
+
+@.str.4 = external unnamed_addr constant [100 x i8], align 1
+
+; Instcombine shouldn't add new PHI nodes while folding GEPs if that will leave
+; old PHI nodes behind as this is not clearly beneficial.
+; CHECK-LABEL: @test5(
+define void @test5(i16 *%idx, i8 **%in) #0 {
+entry:
+ %0 = load i8*, i8** %in
+ %incdec.ptr = getelementptr inbounds i8, i8* %0, i32 1
+ %1 = load i8, i8* %incdec.ptr, align 1
+ %cmp23 = icmp eq i8 %1, 54
+ br i1 %cmp23, label %while.cond, label %if.then.25
+
+if.then.25:
+ call void @g(i8* getelementptr inbounds ([100 x i8], [100 x i8]* @.str.4, i32 0, i32 0))
+ br label %while.cond
+
+while.cond:
+; CHECK-LABEL: while.cond
+; CHECK-NOT: phi i8* [ %0, %entry ], [ %Ptr, %while.body ], [ %0, %if.then.25 ]
+ %Ptr = phi i8* [ %incdec.ptr, %entry ], [ %incdec.ptr32, %while.body], [%incdec.ptr, %if.then.25 ]
+ %2 = load i8, i8* %Ptr
+ %and = and i8 %2, 64
+ %lnot = icmp eq i8 %and, 0
+ br i1 %lnot, label %while.body, label %while.cond.33
+
+while.body:
+ %incdec.ptr32 = getelementptr inbounds i8, i8* %Ptr, i32 1
+ br label %while.cond
+
+while.cond.33:
+ %incdec.ptr34 = getelementptr inbounds i8, i8* %Ptr, i32 1
+ br label %while.cond.57
+
+while.cond.57:
+ %3 = load i8, i8* %incdec.ptr34, align 1
+ %conv59 = zext i8 %3 to i32
+ %arrayidx61 = getelementptr inbounds i16, i16* %idx, i32 %conv59
+ %4 = load i16, i16* %arrayidx61, align 2
+ %and63 = and i16 %4, 2048
+ %tobool64 = icmp eq i16 %and63, 0
+ br i1 %tobool64, label %while.cond.73, label %while.cond.57
+
+while.cond.73:
+ br label %while.cond.73
+
+}
+
+declare void @g(i8*)
diff --git a/test/Transforms/InstCombine/icmp-range.ll b/test/Transforms/InstCombine/icmp-range.ll
index 041adf76b5e1..f035683170e1 100644
--- a/test/Transforms/InstCombine/icmp-range.ll
+++ b/test/Transforms/InstCombine/icmp-range.ll
@@ -54,8 +54,97 @@ define i1 @test_nonzero6(i8* %argw) {
ret i1 %rval
}
+; Constant not in range, should return true.
+define i1 @test_not_in_range(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_not_in_range
+; CHECK: ret i1 true
+ %val = load i32, i32* %arg, !range !0
+ %rval = icmp ne i32 %val, 6
+ ret i1 %rval
+}
+
+; Constant in range, can not fold.
+define i1 @test_in_range(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_in_range
+; CHECK: icmp ne i32 %val, 3
+ %val = load i32, i32* %arg, !range !0
+ %rval = icmp ne i32 %val, 3
+ ret i1 %rval
+}
+
+; Values in range greater than constant.
+define i1 @test_range_sgt_constant(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_range_sgt_constant
+; CHECK: ret i1 true
+ %val = load i32, i32* %arg, !range !0
+ %rval = icmp sgt i32 %val, 0
+ ret i1 %rval
+}
+
+; Values in range less than constant.
+define i1 @test_range_slt_constant(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_range_slt_constant
+; CHECK: ret i1 false
+ %val = load i32, i32* %arg, !range !0
+ %rval = icmp sgt i32 %val, 6
+ ret i1 %rval
+}
+
+; Values in union of multiple sub ranges not equal to constant.
+define i1 @test_multi_range1(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_multi_range1
+; CHECK: ret i1 true
+ %val = load i32, i32* %arg, !range !4
+ %rval = icmp ne i32 %val, 0
+ ret i1 %rval
+}
+
+; Values in multiple sub ranges not equal to constant, but in
+; union of sub ranges could possibly equal to constant. This
+; in theory could also be folded and might be implemented in
+; the future if shown profitable in practice.
+define i1 @test_multi_range2(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_multi_range2
+; CHECK: icmp ne i32 %val, 7
+ %val = load i32, i32* %arg, !range !4
+ %rval = icmp ne i32 %val, 7
+ ret i1 %rval
+}
+
+; Values' ranges overlap each other, so it can not be simplified.
+define i1 @test_two_ranges(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) {
+; CHECK-LABEL: test_two_ranges
+; CHECK: icmp ult i32 %val2, %val1
+ %val1 = load i32, i32* %arg1, !range !5
+ %val2 = load i32, i32* %arg2, !range !6
+ %rval = icmp ult i32 %val2, %val1
+ ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to false.
+define i1 @test_two_ranges2(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) {
+; CHECK-LABEL: test_two_ranges2
+; CHECK: ret i1 false
+ %val1 = load i32, i32* %arg1, !range !0
+ %val2 = load i32, i32* %arg2, !range !6
+ %rval = icmp ult i32 %val2, %val1
+ ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to true.
+define i1 @test_two_ranges3(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) {
+; CHECK-LABEL: test_two_ranges3
+; CHECK: ret i1 true
+ %val1 = load i32, i32* %arg1, !range !0
+ %val2 = load i32, i32* %arg2, !range !6
+ %rval = icmp ugt i32 %val2, %val1
+ ret i1 %rval
+}
!0 = !{i32 1, i32 6}
!1 = !{i32 0, i32 6}
!2 = !{i8 0, i8 1}
!3 = !{i8 0, i8 6}
+!4 = !{i32 1, i32 6, i32 8, i32 10}
+!5 = !{i32 5, i32 10}
+!6 = !{i32 8, i32 16}
diff --git a/test/Transforms/InstCombine/icmp-shr.ll b/test/Transforms/InstCombine/icmp-shr.ll
index 52414b99cca7..4fa85a72baf7 100644
--- a/test/Transforms/InstCombine/icmp-shr.ll
+++ b/test/Transforms/InstCombine/icmp-shr.ll
@@ -376,3 +376,12 @@ define i1 @PR21222(i32 %B) {
%cmp = icmp eq i32 %shr, -2
ret i1 %cmp
}
+
+; CHECK-LABEL: @PR24873(
+; CHECK: %[[icmp:.*]] = icmp ugt i64 %V, 61
+; CHECK-NEXT: ret i1 %[[icmp]]
+define i1 @PR24873(i64 %V) {
+ %ashr = ashr i64 -4611686018427387904, %V
+ %icmp = icmp eq i64 %ashr, -1
+ ret i1 %icmp
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index edcf76d5a7d2..7d6ec96b5328 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -819,8 +819,8 @@ define i1 @test68(i32 %x) nounwind uwtable {
; PR14708
; CHECK-LABEL: @test69(
-; CHECK: %1 = and i32 %c, -33
-; CHECK: %2 = icmp eq i32 %1, 65
+; CHECK: %1 = or i32 %c, 32
+; CHECK: %2 = icmp eq i32 %1, 97
; CHECK: ret i1 %2
define i1 @test69(i32 %c) nounwind uwtable {
%1 = icmp eq i32 %c, 97
@@ -1603,3 +1603,72 @@ define i32 @f7(i32 %a, i32 %b) {
%s = select i1 %cmp, i32 10000, i32 0
ret i32 %s
}
+
+; CHECK: @f8(
+; CHECK-NEXT: [[RESULT:%[a-z0-9]+]] = icmp ne i32 %lim, 0
+; CHECK-NEXT: ret i1 [[RESULT]]
+define i1 @f8(i32 %val, i32 %lim) {
+ %lim.sub = add i32 %lim, -1
+ %val.and = and i32 %val, %lim.sub
+ %r = icmp ult i32 %val.and, %lim
+ ret i1 %r
+}
+
+; CHECK: @f9(
+; CHECK-NEXT: [[RESULT:%[a-z0-9]+]] = icmp ne i32 %lim, 0
+; CHECK-NEXT: ret i1 [[RESULT]]
+define i1 @f9(i32 %val, i32 %lim) {
+ %lim.sub = sub i32 %lim, 1
+ %val.and = and i32 %val, %lim.sub
+ %r = icmp ult i32 %val.and, %lim
+ ret i1 %r
+}
+
+; CHECK: @f10(
+; CHECK: [[CMP:%.*]] = icmp uge i16 %p, mul (i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16), i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16))
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @f10(i16 %p) {
+entry:
+ %cmp580 = icmp ule i16 mul (i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16), i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16)), %p
+ ret i1 %cmp580
+}
+
+; CHECK-LABEL: @cmp_sgt_rhs_dec
+; CHECK-NOT: sub
+; CHECK: icmp sge i32 %conv, %i
+define i1 @cmp_sgt_rhs_dec(float %x, i32 %i) {
+ %conv = fptosi float %x to i32
+ %dec = sub nsw i32 %i, 1
+ %cmp = icmp sgt i32 %conv, %dec
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @cmp_sle_rhs_dec
+; CHECK-NOT: sub
+; CHECK: icmp slt i32 %conv, %i
+define i1 @cmp_sle_rhs_dec(float %x, i32 %i) {
+ %conv = fptosi float %x to i32
+ %dec = sub nsw i32 %i, 1
+ %cmp = icmp sle i32 %conv, %dec
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @cmp_sge_rhs_inc
+; CHECK-NOT: add
+; CHECK: icmp sgt i32 %conv, %i
+define i1 @cmp_sge_rhs_inc(float %x, i32 %i) {
+ %conv = fptosi float %x to i32
+ %inc = add nsw i32 %i, 1
+ %cmp = icmp sge i32 %conv, %inc
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @cmp_slt_rhs_inc
+; CHECK-NOT: add
+; CHECK: icmp sle i32 %conv, %i
+define i1 @cmp_slt_rhs_inc(float %x, i32 %i) {
+ %conv = fptosi float %x to i32
+ %inc = add nsw i32 %i, 1
+ %cmp = icmp slt i32 %conv, %inc
+ ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/inline-intrinsic-assert.ll b/test/Transforms/InstCombine/inline-intrinsic-assert.ll
index af34277563e0..c6446d43cffd 100644
--- a/test/Transforms/InstCombine/inline-intrinsic-assert.ll
+++ b/test/Transforms/InstCombine/inline-intrinsic-assert.ll
@@ -9,7 +9,7 @@ define float @foo(float %f1) #0 {
ret float %call
; CHECK-LABEL: @foo(
-; CHECK-NEXT: call float @llvm.fabs.f32
+; CHECK-NEXT: call fast float @llvm.fabs.f32
; CHECK-NEXT: ret float
}
diff --git a/test/Transforms/InstCombine/insert-extract-shuffle.ll b/test/Transforms/InstCombine/insert-extract-shuffle.ll
index 8929c82def7b..c75c771407e5 100644
--- a/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -24,14 +24,51 @@ define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) {
ret <4 x i16> %vec.3
}
-define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 {
+define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) {
; CHECK-LABEL: @test_vcopyq_lane_p64
-; CHECK: extractelement
-; CHECK: insertelement
-; CHECK-NOT: shufflevector
-entry:
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <1 x i64> %b, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT: shufflevector <2 x i64> %a, <2 x i64> %[[WIDEVEC]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: ret <2 x i64> %res
%elt = extractelement <1 x i64> %b, i32 0
%res = insertelement <2 x i64> %a, i64 %elt, i32 1
ret <2 x i64> %res
}
+; PR2109: https://llvm.org/bugs/show_bug.cgi?id=2109
+
+define <4 x float> @widen_extract2(<4 x float> %ins, <2 x float> %ext) {
+; CHECK-LABEL: @widen_extract2(
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x float> %ext, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: shufflevector <4 x float> %ins, <4 x float> %[[WIDEVEC]], <4 x i32> <i32 0, i32 4, i32 2, i32 5>
+; CHECK-NEXT: ret <4 x float> %i2
+ %e1 = extractelement <2 x float> %ext, i32 0
+ %e2 = extractelement <2 x float> %ext, i32 1
+ %i1 = insertelement <4 x float> %ins, float %e1, i32 1
+ %i2 = insertelement <4 x float> %i1, float %e2, i32 3
+ ret <4 x float> %i2
+}
+
+define <4 x float> @widen_extract3(<4 x float> %ins, <3 x float> %ext) {
+; CHECK-LABEL: @widen_extract3(
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <3 x float> %ext, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+; CHECK-NEXT: shufflevector <4 x float> %ins, <4 x float> %[[WIDEVEC]], <4 x i32> <i32 6, i32 5, i32 4, i32 3>
+; CHECK-NEXT: ret <4 x float> %i3
+ %e1 = extractelement <3 x float> %ext, i32 0
+ %e2 = extractelement <3 x float> %ext, i32 1
+ %e3 = extractelement <3 x float> %ext, i32 2
+ %i1 = insertelement <4 x float> %ins, float %e1, i32 2
+ %i2 = insertelement <4 x float> %i1, float %e2, i32 1
+ %i3 = insertelement <4 x float> %i2, float %e3, i32 0
+ ret <4 x float> %i3
+}
+
+define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) {
+; CHECK-LABEL: @widen_extract4(
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x float> %ext, <2 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: shufflevector <8 x float> %ins, <8 x float> %[[WIDEVEC]], <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x float> %i1
+ %e1 = extractelement <2 x float> %ext, i32 0
+ %i1 = insertelement <8 x float> %ins, float %e1, i32 2
+ ret <8 x float> %i1
+}
+
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index bea063787a75..88f032498271 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -19,6 +19,11 @@ declare i32 @llvm.ctpop.i32(i32) nounwind readnone
declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
declare double @llvm.cos.f64(double %Val) nounwind readonly
declare double @llvm.sin.f64(double %Val) nounwind readonly
+declare double @llvm.floor.f64(double %Val) nounwind readonly
+declare double @llvm.ceil.f64(double %Val) nounwind readonly
+declare double @llvm.trunc.f64(double %Val) nounwind readonly
+declare double @llvm.rint.f64(double %Val) nounwind readonly
+declare double @llvm.nearbyint.f64(double %Val) nounwind readonly
define i8 @uaddtest1(i8 %A, i8 %B) {
%x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
@@ -447,3 +452,63 @@ entry:
; CHECK-LABEL: @sin(
; CHECK: store volatile double 0.000000e+00, double* %P
}
+
+define void @floor(double *%P) {
+entry:
+ %B = tail call double @llvm.floor.f64(double 1.5) nounwind
+ store volatile double %B, double* %P
+ %C = tail call double @llvm.floor.f64(double -1.5) nounwind
+ store volatile double %C, double* %P
+ ret void
+; CHECK-LABEL: @floor(
+; CHECK: store volatile double 1.000000e+00, double* %P, align 8
+; CHECK: store volatile double -2.000000e+00, double* %P, align 8
+}
+
+define void @ceil(double *%P) {
+entry:
+ %B = tail call double @llvm.ceil.f64(double 1.5) nounwind
+ store volatile double %B, double* %P
+ %C = tail call double @llvm.ceil.f64(double -1.5) nounwind
+ store volatile double %C, double* %P
+ ret void
+; CHECK-LABEL: @ceil(
+; CHECK: store volatile double 2.000000e+00, double* %P, align 8
+; CHECK: store volatile double -1.000000e+00, double* %P, align 8
+}
+
+define void @trunc(double *%P) {
+entry:
+ %B = tail call double @llvm.trunc.f64(double 1.5) nounwind
+ store volatile double %B, double* %P
+ %C = tail call double @llvm.trunc.f64(double -1.5) nounwind
+ store volatile double %C, double* %P
+ ret void
+; CHECK-LABEL: @trunc(
+; CHECK: store volatile double 1.000000e+00, double* %P, align 8
+; CHECK: store volatile double -1.000000e+00, double* %P, align 8
+}
+
+define void @rint(double *%P) {
+entry:
+ %B = tail call double @llvm.rint.f64(double 1.5) nounwind
+ store volatile double %B, double* %P
+ %C = tail call double @llvm.rint.f64(double -1.5) nounwind
+ store volatile double %C, double* %P
+ ret void
+; CHECK-LABEL: @rint(
+; CHECK: store volatile double 2.000000e+00, double* %P, align 8
+; CHECK: store volatile double -2.000000e+00, double* %P, align 8
+}
+
+define void @nearbyint(double *%P) {
+entry:
+ %B = tail call double @llvm.nearbyint.f64(double 1.5) nounwind
+ store volatile double %B, double* %P
+ %C = tail call double @llvm.nearbyint.f64(double -1.5) nounwind
+ store volatile double %C, double* %P
+ ret void
+; CHECK-LABEL: @nearbyint(
+; CHECK: store volatile double 2.000000e+00, double* %P, align 8
+; CHECK: store volatile double -2.000000e+00, double* %P, align 8
+}
diff --git a/test/Transforms/InstCombine/lifetime.ll b/test/Transforms/InstCombine/lifetime.ll
new file mode 100644
index 000000000000..e5cbe3401410
--- /dev/null
+++ b/test/Transforms/InstCombine/lifetime.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+declare void @foo(i8* nocapture, i8* nocapture)
+
+define void @bar(i1 %flag) !dbg !4 {
+entry:
+; CHECK-LABEL: @bar(
+; CHECK: %[[T:[^ ]+]] = getelementptr inbounds [1 x i8], [1 x i8]* %text
+; CHECK: %[[B:[^ ]+]] = getelementptr inbounds [1 x i8], [1 x i8]* %buff
+; CHECK: if:
+; CHECK-NEXT: br label %bb2
+; CHECK: bb2:
+; CHECK-NEXT: br label %bb3
+; CHECK: bb3:
+; CHECK-NEXT: call void @llvm.dbg.declare
+; CHECK-NEXT: br label %fin
+; CHECK: call void @llvm.lifetime.start(i64 1, i8* %[[T]])
+; CHECK-NEXT: call void @llvm.lifetime.start(i64 1, i8* %[[B]])
+; CHECK-NEXT: call void @foo(i8* %[[B]], i8* %[[T]])
+; CHECK-NEXT: call void @llvm.lifetime.end(i64 1, i8* %[[B]])
+; CHECK-NEXT: call void @llvm.lifetime.end(i64 1, i8* %[[T]])
+ %text = alloca [1 x i8], align 1
+ %buff = alloca [1 x i8], align 1
+ %0 = getelementptr inbounds [1 x i8], [1 x i8]* %text, i64 0, i64 0
+ %1 = getelementptr inbounds [1 x i8], [1 x i8]* %buff, i64 0, i64 0
+ br i1 %flag, label %if, label %else
+
+if:
+ call void @llvm.lifetime.start(i64 1, i8* %0)
+ call void @llvm.lifetime.start(i64 1, i8* %1)
+ call void @llvm.lifetime.end(i64 1, i8* %1)
+ call void @llvm.lifetime.end(i64 1, i8* %0)
+ br label %bb2
+
+bb2:
+ call void @llvm.lifetime.start(i64 1, i8* %0)
+ call void @llvm.lifetime.start(i64 1, i8* %1)
+ call void @llvm.lifetime.end(i64 1, i8* %0)
+ call void @llvm.lifetime.end(i64 1, i8* %1)
+ br label %bb3
+
+bb3:
+ call void @llvm.lifetime.start(i64 1, i8* %0)
+ call void @llvm.dbg.declare(metadata [1 x i8]* %text, metadata !14, metadata !25), !dbg !26
+ call void @llvm.lifetime.end(i64 1, i8* %0)
+ br label %fin
+
+else:
+ call void @llvm.lifetime.start(i64 1, i8* %0)
+ call void @llvm.lifetime.start(i64 1, i8* %1)
+ call void @foo(i8* %1, i8* %0)
+ call void @llvm.lifetime.end(i64 1, i8* %1)
+ call void @llvm.lifetime.end(i64 1, i8* %0)
+ br label %fin
+
+fin:
+ ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22, !23}
+!llvm.ident = !{!24}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248826) (llvm/trunk 248827)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.cpp", directory: "/home/user")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "bar", linkageName: "bar", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7}
+!7 = !DIBasicType(name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
+!8 = !{!9, !11, !12, !14, !21}
+!9 = !DILocalVariable(name: "Size", arg: 1, scope: !4, file: !1, line: 2, type: !10)
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !DILocalVariable(name: "flag", arg: 2, scope: !4, file: !1, line: 2, type: !7)
+!12 = !DILocalVariable(name: "i", scope: !13, file: !1, line: 3, type: !10)
+!13 = distinct !DILexicalBlock(scope: !4, file: !1, line: 3, column: 3)
+!14 = !DILocalVariable(name: "text", scope: !15, file: !1, line: 4, type: !17)
+!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 3, column: 30)
+!16 = distinct !DILexicalBlock(scope: !13, file: !1, line: 3, column: 3)
+!17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, size: 8, align: 8, elements: !19)
+!18 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!19 = !{!20}
+!20 = !DISubrange(count: 1)
+!21 = !DILocalVariable(name: "buff", scope: !15, file: !1, line: 5, type: !17)
+!22 = !{i32 2, !"Dwarf Version", i32 4}
+!23 = !{i32 2, !"Debug Info Version", i32 3}
+!24 = !{!"clang version 3.8.0 (trunk 248826) (llvm/trunk 248827)"}
+!25 = !DIExpression()
+!26 = !DILocation(line: 4, column: 10, scope: !15)
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index b0bfdc4c4c54..fe1bf1517539 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -148,8 +148,8 @@ define i1 @test8(i32 %X) {
%S = icmp eq i16 %R, 0
ret i1 %S
; CHECK-LABEL: @test8(
-; CHECK-NEXT: and i32 %X, -2
-; CHECK-NEXT: icmp eq i32 {{.*}}, 8
+; CHECK-NEXT: or i32 %X, 1
+; CHECK-NEXT: icmp eq i32 {{.*}}, 9
; CHECK-NEXT: ret i1
}
@@ -233,7 +233,8 @@ define i1 @test10_struct_arr(i32 %x) {
define i1 @test10_struct_arr_noinbounds(i32 %x) {
; CHECK-LABEL: @test10_struct_arr_noinbounds(
-; CHECK-NEXT %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+; CHECK-NEXT: %r = icmp ne i32 %x, 1
+; CHECK-NEXT: ret i1 %r
%p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
%q = load i32, i32* %p
%r = icmp eq i32 %q, 9
diff --git a/test/Transforms/InstCombine/load-combine-metadata-2.ll b/test/Transforms/InstCombine/load-combine-metadata-2.ll
new file mode 100644
index 000000000000..bec0d7d2c36b
--- /dev/null
+++ b/test/Transforms/InstCombine/load-combine-metadata-2.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @test_load_load_combine_metadata(
+; Check that align metadata is combined
+; CHECK: load i32*, i32** %0
+; CHECK-SAME: !align ![[ALIGN:[0-9]+]]
+define void @test_load_load_combine_metadata(i32**, i32**, i32**) {
+ %a = load i32*, i32** %0, !align !0
+ %b = load i32*, i32** %0, !align !1
+ store i32 0, i32* %a
+ store i32 0, i32* %b
+ ret void
+}
+
+; CHECK: ![[ALIGN]] = !{i64 4}
+
+!0 = !{i64 4}
+!1 = !{i64 8} \ No newline at end of file
diff --git a/test/Transforms/InstCombine/load-combine-metadata-3.ll b/test/Transforms/InstCombine/load-combine-metadata-3.ll
new file mode 100644
index 000000000000..bad4bb240590
--- /dev/null
+++ b/test/Transforms/InstCombine/load-combine-metadata-3.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @test_load_load_combine_metadata(
+; Check that dereferenceable metadata is combined
+; CHECK: load i32*, i32** %0
+; CHECK-SAME: !dereferenceable ![[DEREF:[0-9]+]]
+define void @test_load_load_combine_metadata(i32**, i32**, i32**) {
+ %a = load i32*, i32** %0, !dereferenceable !0
+ %b = load i32*, i32** %0, !dereferenceable !1
+ store i32 0, i32* %a
+ store i32 0, i32* %b
+ ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 4}
+
+!0 = !{i64 4}
+!1 = !{i64 8} \ No newline at end of file
diff --git a/test/Transforms/InstCombine/load-combine-metadata-4.ll b/test/Transforms/InstCombine/load-combine-metadata-4.ll
new file mode 100644
index 000000000000..2a1ffcd0605e
--- /dev/null
+++ b/test/Transforms/InstCombine/load-combine-metadata-4.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @test_load_load_combine_metadata(
+; Check that dereferenceable_or_null metadata is combined
+; CHECK: load i32*, i32** %0
+; CHECK-SAME: !dereferenceable_or_null ![[DEREF:[0-9]+]]
+define void @test_load_load_combine_metadata(i32**, i32**, i32**) {
+ %a = load i32*, i32** %0, !dereferenceable_or_null !0
+ %b = load i32*, i32** %0, !dereferenceable_or_null !1
+ store i32 0, i32* %a
+ store i32 0, i32* %b
+ ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 4}
+
+!0 = !{i64 4}
+!1 = !{i64 8}
diff --git a/test/Transforms/InstCombine/load-combine-metadata.ll b/test/Transforms/InstCombine/load-combine-metadata.ll
index 9b9c1fe607b9..24b26fa42135 100644
--- a/test/Transforms/InstCombine/load-combine-metadata.ll
+++ b/test/Transforms/InstCombine/load-combine-metadata.ll
@@ -17,9 +17,9 @@ define void @test_load_load_combine_metadata(i32*, i32*, i32*) {
ret void
}
-; CHECK: ![[RANGE]] = !{i32 0, i32 1, i32 8, i32 9}
-!0 = !{ i32 0, i32 1 }
-!1 = !{ i32 8, i32 9 }
+; CHECK: ![[RANGE]] = !{i32 0, i32 5, i32 7, i32 9}
+!0 = !{ i32 0, i32 5 }
+!1 = !{ i32 7, i32 9 }
!2 = !{!2}
!3 = !{!3, !2}
!4 = !{!4, !2}
diff --git a/test/Transforms/InstCombine/loadstore-metadata.ll b/test/Transforms/InstCombine/loadstore-metadata.ll
index a30c0bc852ea..f72e36a7ea37 100644
--- a/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -31,7 +31,7 @@ define float @test_load_cast_combine_range(i32* %ptr) {
; CHECK-NOT: !range
; CHECK: ret float
entry:
- %l = load i32, i32* %ptr, !range !5
+ %l = load i32, i32* %ptr, !range !6
%c = bitcast i32 %l to float
ret float %c
}
@@ -57,6 +57,39 @@ entry:
ret i32 %c
}
+define i8* @test_load_cast_combine_align(i32** %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves align
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_align(
+; CHECK: load i8*, i8** %{{.*}}, !align !5
+entry:
+ %l = load i32*, i32** %ptr, !align !5
+ %c = bitcast i32* %l to i8*
+ ret i8* %c
+}
+
+define i8* @test_load_cast_combine_deref(i32** %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves dereferenceable
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_deref(
+; CHECK: load i8*, i8** %{{.*}}, !dereferenceable !5
+entry:
+ %l = load i32*, i32** %ptr, !dereferenceable !5
+ %c = bitcast i32* %l to i8*
+ ret i8* %c
+}
+
+define i8* @test_load_cast_combine_deref_or_null(i32** %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves
+; dereferenceable_or_null metadata.
+; CHECK-LABEL: @test_load_cast_combine_deref_or_null(
+; CHECK: load i8*, i8** %{{.*}}, !dereferenceable_or_null !5
+entry:
+ %l = load i32*, i32** %ptr, !dereferenceable_or_null !5
+ %c = bitcast i32* %l to i8*
+ ret i8* %c
+}
+
define void @test_load_cast_combine_loop(float* %src, i32* %dst, i32 %n) {
; Ensure (cast (load (...))) -> (load (cast (...))) preserves loop access
; metadata.
@@ -110,4 +143,5 @@ entry:
!2 = !{ !2, !1 }
!3 = !{ }
!4 = !{ i32 1 }
-!5 = !{ i32 0, i32 42 }
+!5 = !{ i64 8 }
+!6 = !{ i32 0, i32 42 }
diff --git a/test/Transforms/InstCombine/log-pow-nofastmath.ll b/test/Transforms/InstCombine/log-pow-nofastmath.ll
new file mode 100644
index 000000000000..faaef97311ec
--- /dev/null
+++ b/test/Transforms/InstCombine/log-pow-nofastmath.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mylog(double %x, double %y) {
+entry:
+ %pow = call double @llvm.pow.f64(double %x, double %y)
+ %call = call double @log(double %pow)
+ ret double %call
+}
+
+; CHECK-LABEL: define double @mylog(
+; CHECK: %pow = call double @llvm.pow.f64(double %x, double %y)
+; CHECK: %call = call double @log(double %pow)
+; CHECK: ret double %call
+; CHECK: }
+
+define double @test3(double %x) {
+ %call2 = call double @exp2(double %x)
+ %call3 = call double @log(double %call2)
+ ret double %call3
+}
+
+; CHECK-LABEL: @test3
+; CHECK: %call2 = call double @exp2(double %x)
+; CHECK: %call3 = call double @log(double %call2)
+; CHECK: ret double %call3
+; CHECK: }
+
+declare double @log(double)
+declare double @exp2(double)
+declare double @llvm.pow.f64(double, double)
diff --git a/test/Transforms/InstCombine/log-pow.ll b/test/Transforms/InstCombine/log-pow.ll
new file mode 100644
index 000000000000..c5ca1688d34a
--- /dev/null
+++ b/test/Transforms/InstCombine/log-pow.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mylog(double %x, double %y) #0 {
+entry:
+ %pow = call double @llvm.pow.f64(double %x, double %y)
+ %call = call double @log(double %pow) #0
+ ret double %call
+}
+
+; CHECK-LABEL: define double @mylog(
+; CHECK: %log = call fast double @log(double %x) #0
+; CHECK: %mul = fmul fast double %log, %y
+; CHECK: ret double %mul
+; CHECK: }
+
+define double @test2(double ()* %fptr, double %p1) #0 {
+ %call1 = call double %fptr()
+ %pow = call double @log(double %call1)
+ ret double %pow
+}
+
+; CHECK-LABEL: @test2
+; CHECK: log
+
+define double @test3(double %x) #0 {
+ %call2 = call double @exp2(double %x) #0
+ %call3 = call double @log(double %call2) #0
+ ret double %call3
+}
+
+; CHECK-LABEL: @test3
+; CHECK: %call2 = call double @exp2(double %x) #0
+; CHECK: %logmul = fmul fast double %x, 0x3FE62E42FEFA39EF
+; CHECK: ret double %logmul
+; CHECK: }
+
+declare double @log(double) #0
+declare double @exp2(double) #0
+declare double @llvm.pow.f64(double, double)
+
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index 138001ace951..8fcb8214360d 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -186,3 +186,14 @@ define void @test8() {
call void @_ZdaPvj(i8* %naj, i32 32) builtin
ret void
}
+
+declare noalias i8* @"\01??2@YAPEAX_K@Z"(i64) nobuiltin
+declare void @"\01??3@YAXPEAX@Z"(i8*) nobuiltin
+
+; CHECK-LABEL: @test9(
+define void @test9() {
+ ; CHECK-NOT: call
+ %new_long_long = call noalias i8* @"\01??2@YAPEAX_K@Z"(i64 32) builtin
+ call void @"\01??3@YAXPEAX@Z"(i8* %new_long_long) builtin
+ ret void
+}
diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll
index db15bd66b715..f9ff479e3add 100644
--- a/test/Transforms/InstCombine/memcmp-1.ll
+++ b/test/Transforms/InstCombine/memcmp-1.ll
@@ -2,7 +2,7 @@
;
; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32:64"
@foo = constant [4 x i8] c"foo\00"
@hel = constant [4 x i8] c"hel\00"
@@ -70,3 +70,54 @@ define i32 @test_simplify6() {
ret i32 %ret
; CHECK: ret i32 -1
}
+
+; Check memcmp(mem1, mem2, 8)==0 -> *(int64_t*)mem1 == *(int64_t*)mem2
+
+define i1 @test_simplify7(i64 %x, i64 %y) {
+; CHECK-LABEL: @test_simplify7(
+ %x.addr = alloca i64, align 8
+ %y.addr = alloca i64, align 8
+ store i64 %x, i64* %x.addr, align 8
+ store i64 %y, i64* %y.addr, align 8
+ %xptr = bitcast i64* %x.addr to i8*
+ %yptr = bitcast i64* %y.addr to i8*
+ %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 8)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+; CHECK: %cmp = icmp eq i64 %x, %y
+; CHECK: ret i1 %cmp
+}
+
+; Check memcmp(mem1, mem2, 4)==0 -> *(int32_t*)mem1 == *(int32_t*)mem2
+
+define i1 @test_simplify8(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_simplify8(
+ %x.addr = alloca i32, align 4
+ %y.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ %xptr = bitcast i32* %x.addr to i8*
+ %yptr = bitcast i32* %y.addr to i8*
+ %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 4)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+; CHECK: %cmp = icmp eq i32 %x, %y
+; CHECK: ret i1 %cmp
+}
+
+; Check memcmp(mem1, mem2, 2)==0 -> *(int16_t*)mem1 == *(int16_t*)mem2
+
+define i1 @test_simplify9(i16 %x, i16 %y) {
+; CHECK-LABEL: @test_simplify9(
+ %x.addr = alloca i16, align 2
+ %y.addr = alloca i16, align 2
+ store i16 %x, i16* %x.addr, align 2
+ store i16 %y, i16* %y.addr, align 2
+ %xptr = bitcast i16* %x.addr to i8*
+ %yptr = bitcast i16* %y.addr to i8*
+ %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 2)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+; CHECK: %cmp = icmp eq i16 %x, %y
+; CHECK: ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/memset_chk-1.ll b/test/Transforms/InstCombine/memset_chk-1.ll
index 27f7293a6bce..56ea14c8292d 100644
--- a/test/Transforms/InstCombine/memset_chk-1.ll
+++ b/test/Transforms/InstCombine/memset_chk-1.ll
@@ -63,4 +63,30 @@ define i8* @test_no_simplify2() {
ret i8* %ret
}
+; Test that RAUW in SimplifyLibCalls for __memset_chk generates valid IR
+define i32 @test_rauw(i8* %a, i8* %b, i8** %c) {
+; CHECK-LABEL: test_rauw
+entry:
+ %call49 = call i64 @strlen(i8* %a)
+ %add180 = add i64 %call49, 1
+ %yo107 = call i64 @llvm.objectsize.i64.p0i8(i8* %b, i1 false)
+ %call50 = call i8* @__memmove_chk(i8* %b, i8* %a, i64 %add180, i64 %yo107)
+; CHECK: %strlen = call i64 @strlen(i8* %b)
+; CHECK-NEXT: %strchr2 = getelementptr i8, i8* %b, i64 %strlen
+ %call51i = call i8* @strrchr(i8* %b, i32 0)
+ %d = load i8*, i8** %c, align 8
+ %sub182 = ptrtoint i8* %d to i64
+ %sub183 = ptrtoint i8* %b to i64
+ %sub184 = sub i64 %sub182, %sub183
+ %add52.i.i = add nsw i64 %sub184, 1
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %strchr2
+ %call185 = call i8* @__memset_chk(i8* %call51i, i32 0, i64 %add52.i.i, i64 -1)
+ ret i32 4
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i64, i64)
+declare i8* @strrchr(i8*, i32)
+declare i64 @strlen(i8* nocapture)
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
+
declare i8* @__memset_chk(i8*, i32, i64, i64)
diff --git a/test/Transforms/InstCombine/minmax-fp.ll b/test/Transforms/InstCombine/minmax-fp.ll
new file mode 100644
index 000000000000..b90afe3405f7
--- /dev/null
+++ b/test/Transforms/InstCombine/minmax-fp.ll
@@ -0,0 +1,156 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; CHECK-LABEL: @t1
+; CHECK-NEXT: fcmp oge float %a, 5.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 5.000000e+00, float %a
+; CHECK-NEXT: fpext float %1 to double
+define double @t1(float %a) {
+ ; This is the canonical form for a type-changing min/max.
+ %1 = fcmp ult float %a, 5.0
+ %2 = select i1 %1, float %a, float 5.0
+ %3 = fpext float %2 to double
+ ret double %3
+}
+
+; CHECK-LABEL: @t2
+; CHECK-NEXT: fcmp oge float %a, 5.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 5.000000e+00, float %a
+; CHECK-NEXT: fpext float %1 to double
+define double @t2(float %a) {
+ ; Check this is converted into canonical form, as above.
+ %1 = fcmp ult float %a, 5.0
+ %2 = fpext float %a to double
+ %3 = select i1 %1, double %2, double 5.0
+ ret double %3
+}
+
+; CHECK-LABEL: @t4
+; CHECK-NEXT: fcmp oge double %a, 5.000000e+00
+; CHECK-NEXT: select i1 %.inv, double 5.000000e+00, double %a
+; CHECK-NEXT: fptrunc double %1 to float
+define float @t4(double %a) {
+ ; Same again, with trunc.
+ %1 = fcmp ult double %a, 5.0
+ %2 = fptrunc double %a to float
+ %3 = select i1 %1, float %2, float 5.0
+ ret float %3
+}
+
+; CHECK-LABEL: @t5
+; CHECK-NEXT: fcmp ult float %a, 5.000000e+00
+; CHECK-NEXT: fpext float %a to double
+; CHECK-NEXT: select i1 %1, double %2, double 5.001
+define double @t5(float %a) {
+ ; different values, should not be converted.
+ %1 = fcmp ult float %a, 5.0
+ %2 = fpext float %a to double
+ %3 = select i1 %1, double %2, double 5.001
+ ret double %3
+}
+
+; CHECK-LABEL: @t6
+; CHECK-NEXT: fcmp ult float %a, -0.0
+; CHECK-NEXT: fpext float %a to double
+; CHECK-NEXT: select i1 %1, double %2, double 0.0
+define double @t6(float %a) {
+ ; Signed zero, should not be converted
+ %1 = fcmp ult float %a, -0.0
+ %2 = fpext float %a to double
+ %3 = select i1 %1, double %2, double 0.0
+ ret double %3
+}
+
+; CHECK-LABEL: @t7
+; CHECK-NEXT: fcmp ult float %a, 0.0
+; CHECK-NEXT: fpext float %a to double
+; CHECK-NEXT: select i1 %1, double %2, double -0.0
+define double @t7(float %a) {
+ ; Signed zero, should not be converted
+ %1 = fcmp ult float %a, 0.0
+ %2 = fpext float %a to double
+ %3 = select i1 %1, double %2, double -0.0
+ ret double %3
+}
+
+; CHECK-LABEL: @t8
+; CHECK-NEXT: fcmp oge float %a, 5.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 5.000000e+00, float %a
+; CHECK-NEXT: fptoui float %1 to i64
+define i64 @t8(float %a) {
+ %1 = fcmp ult float %a, 5.0
+ %2 = fptoui float %a to i64
+ %3 = select i1 %1, i64 %2, i64 5
+ ret i64 %3
+}
+
+; CHECK-LABEL: @t9
+; CHECK-NEXT: fcmp oge float %a, 0.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 0.000000e+00, float %a
+; CHECK-NEXT: fptosi float %1 to i8
+define i8 @t9(float %a) {
+ %1 = fcmp ult float %a, 0.0
+ %2 = fptosi float %a to i8
+ %3 = select i1 %1, i8 %2, i8 0
+ ret i8 %3
+}
+
+; CHECK-LABEL: @t11
+; CHECK-NEXT: fcmp fast oge float %b, %a
+; CHECK-NEXT: select i1 %.inv, float %a, float %b
+; CHECK-NEXT: fptosi
+define i8 @t11(float %a, float %b) {
+ ; Either operand could be NaN, but fast modifier applied.
+ %1 = fcmp fast ult float %b, %a
+ %2 = fptosi float %a to i8
+ %3 = fptosi float %b to i8
+ %4 = select i1 %1, i8 %3, i8 %2
+ ret i8 %4
+}
+
+; CHECK-LABEL: @t12
+; CHECK-NEXT: fcmp nnan oge float %b, %a
+; CHECK-NEXT: select i1 %.inv, float %a, float %b
+; CHECK-NEXT: fptosi float %.v to i8
+define i8 @t12(float %a, float %b) {
+ ; Either operand could be NaN, but nnan modifier applied.
+ %1 = fcmp nnan ult float %b, %a
+ %2 = fptosi float %a to i8
+ %3 = fptosi float %b to i8
+ %4 = select i1 %1, i8 %3, i8 %2
+ ret i8 %4
+}
+
+; CHECK-LABEL: @t13
+; CHECK-NEXT: fcmp ult float %a, 1.500000e+00
+; CHECK-NEXT: fptosi float %a to i8
+; CHECK-NEXT: select i1 %1, i8 %2, i8 1
+define i8 @t13(float %a) {
+ ; Float and int values do not match.
+ %1 = fcmp ult float %a, 1.5
+ %2 = fptosi float %a to i8
+ %3 = select i1 %1, i8 %2, i8 1
+ ret i8 %3
+}
+
+; CHECK-LABEL: @t14
+; CHECK-NEXT: fcmp ule float %a, 0.000000e+00
+; CHECK-NEXT: fptosi float %a to i8
+; CHECK-NEXT: select i1 %1, i8 %2, i8 0
+define i8 @t14(float %a) {
+ ; <= comparison, where %a could be -0.0. Not safe.
+ %1 = fcmp ule float %a, 0.0
+ %2 = fptosi float %a to i8
+ %3 = select i1 %1, i8 %2, i8 0
+ ret i8 %3
+}
+
+; CHECK-LABEL: @t15
+; CHECK-NEXT: fcmp nsz oge float %a, 0.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 0.000000e+00, float %a
+; CHECK-NEXT: fptosi float %1 to i8
+define i8 @t15(float %a) {
+ %1 = fcmp nsz ule float %a, 0.0
+ %2 = fptosi float %a to i8
+ %3 = select i1 %1, i8 %2, i8 0
+ ret i8 %3
+}
diff --git a/test/Transforms/InstCombine/neon-intrinsics.ll b/test/Transforms/InstCombine/neon-intrinsics.ll
index 3ad09cc6c694..d22fa9c811dc 100644
--- a/test/Transforms/InstCombine/neon-intrinsics.ll
+++ b/test/Transforms/InstCombine/neon-intrinsics.ll
@@ -3,8 +3,8 @@
; The alignment arguments for NEON load/store intrinsics can be increased
; by instcombine. Check for this.
-; CHECK: vld4.v2i32({{.*}}, i32 32)
-; CHECK: vst4.v2i32({{.*}}, i32 16)
+; CHECK: vld4.v2i32.p0i8({{.*}}, i32 32)
+; CHECK: vst4.p0i8.v2i32({{.*}}, i32 16)
@x = common global [8 x i32] zeroinitializer, align 32
@y = common global [8 x i32] zeroinitializer, align 16
@@ -12,14 +12,14 @@
%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
define void @test() nounwind ssp {
- %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* bitcast ([8 x i32]* @x to i8*), i32 1)
+ %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8* bitcast ([8 x i32]* @x to i8*), i32 1)
%tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 1
%tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
%tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 3
- call void @llvm.arm.neon.vst4.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
+ call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
ret void
}
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst4.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/test/Transforms/InstCombine/no_cgscc_assert.ll b/test/Transforms/InstCombine/no_cgscc_assert.ll
index cec5297695b1..3df04d2c8902 100644
--- a/test/Transforms/InstCombine/no_cgscc_assert.ll
+++ b/test/Transforms/InstCombine/no_cgscc_assert.ll
@@ -10,7 +10,7 @@ define float @bar(float %f) #0 {
ret float %call1
; CHECK-LABEL: @bar(
-; CHECK-NEXT: call float @llvm.fabs.f32
+; CHECK-NEXT: call fast float @llvm.fabs.f32
; CHECK-NEXT: ret float
}
diff --git a/test/Transforms/InstCombine/nonnull-attribute.ll b/test/Transforms/InstCombine/nonnull-attribute.ll
new file mode 100644
index 000000000000..74fb09114927
--- /dev/null
+++ b/test/Transforms/InstCombine/nonnull-attribute.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This test makes sure that we do not assume globals in address spaces other
+; than 0 are able to be null.
+
+@as0 = external global i32
+@as1 = external addrspace(1) global i32
+
+declare void @addrspace0(i32*)
+declare void @addrspace1(i32 addrspace(1)*)
+
+; CHECK: call void @addrspace0(i32* nonnull @as0)
+; CHECK: call void @addrspace1(i32 addrspace(1)* @as1)
+
+define void @test() {
+ call void @addrspace0(i32* @as0)
+ call void @addrspace1(i32 addrspace(1)* @as1)
+ ret void
+}
diff --git a/test/Transforms/InstCombine/not.ll b/test/Transforms/InstCombine/not.ll
index 9d59edd7934d..edb402a125ac 100644
--- a/test/Transforms/InstCombine/not.ll
+++ b/test/Transforms/InstCombine/not.ll
@@ -5,51 +5,51 @@
; CHECK-NOT: xor
define i32 @test1(i32 %A) {
- %B = xor i32 %A, -1 ; <i32> [#uses=1]
- %C = xor i32 %B, -1 ; <i32> [#uses=1]
+ %B = xor i32 %A, -1
+ %C = xor i32 %B, -1
ret i32 %C
}
define i1 @test2(i32 %A, i32 %B) {
; Can change into setge
- %cond = icmp sle i32 %A, %B ; <i1> [#uses=1]
- %Ret = xor i1 %cond, true ; <i1> [#uses=1]
+ %cond = icmp sle i32 %A, %B
+ %Ret = xor i1 %cond, true
ret i1 %Ret
}
-; Test that demorgans law can be instcombined
+; Test that De Morgan's law can be instcombined.
define i32 @test3(i32 %A, i32 %B) {
- %a = xor i32 %A, -1 ; <i32> [#uses=1]
- %b = xor i32 %B, -1 ; <i32> [#uses=1]
- %c = and i32 %a, %b ; <i32> [#uses=1]
- %d = xor i32 %c, -1 ; <i32> [#uses=1]
+ %a = xor i32 %A, -1
+ %b = xor i32 %B, -1
+ %c = and i32 %a, %b
+ %d = xor i32 %c, -1
ret i32 %d
}
-; Test that demorgens law can work with constants
+; Test that De Morgan's law can work with constants.
define i32 @test4(i32 %A, i32 %B) {
- %a = xor i32 %A, -1 ; <i32> [#uses=1]
- %c = and i32 %a, 5 ; <i32> [#uses=1]
- %d = xor i32 %c, -1 ; <i32> [#uses=1]
+ %a = xor i32 %A, -1
+ %c = and i32 %a, 5
+ %d = xor i32 %c, -1
ret i32 %d
}
-; test the mirror of demorgans law...
+; Test the mirror of De Morgan's law.
define i32 @test5(i32 %A, i32 %B) {
- %a = xor i32 %A, -1 ; <i32> [#uses=1]
- %b = xor i32 %B, -1 ; <i32> [#uses=1]
- %c = or i32 %a, %b ; <i32> [#uses=1]
- %d = xor i32 %c, -1 ; <i32> [#uses=1]
+ %a = xor i32 %A, -1
+ %b = xor i32 %B, -1
+ %c = or i32 %a, %b
+ %d = xor i32 %c, -1
ret i32 %d
}
; PR2298
-define zeroext i8 @test6(i32 %a, i32 %b) nounwind {
+define zeroext i8 @test6(i32 %a, i32 %b) {
entry:
- %tmp1not = xor i32 %a, -1 ; <i32> [#uses=1]
- %tmp2not = xor i32 %b, -1 ; <i32> [#uses=1]
- %tmp3 = icmp slt i32 %tmp1not, %tmp2not ; <i1> [#uses=1]
- %retval67 = zext i1 %tmp3 to i8 ; <i8> [#uses=1]
+ %tmp1not = xor i32 %a, -1
+ %tmp2not = xor i32 %b, -1
+ %tmp3 = icmp slt i32 %tmp1not, %tmp2not
+ %retval67 = zext i1 %tmp3 to i8
ret i8 %retval67
}
@@ -58,3 +58,4 @@ define <2 x i1> @test7(<2 x i32> %A, <2 x i32> %B) {
%Ret = xor <2 x i1> %cond, <i1 true, i1 true>
ret <2 x i1> %Ret
}
+
diff --git a/test/Transforms/InstCombine/objsize-address-space.ll b/test/Transforms/InstCombine/objsize-address-space.ll
index 6046dad89790..ab4b64dfbf07 100644
--- a/test/Transforms/InstCombine/objsize-address-space.ll
+++ b/test/Transforms/InstCombine/objsize-address-space.ll
@@ -32,7 +32,7 @@ define i16 @foo_as3_i16() nounwind {
ret i16 %1
}
-@a_alias = weak alias [60 x i8] addrspace(3)* @a_as3
+@a_alias = weak alias [60 x i8], [60 x i8] addrspace(3)* @a_as3
define i32 @foo_alias() nounwind {
%1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8], [60 x i8] addrspace(3)* @a_alias, i32 0, i32 0), i1 false)
ret i32 %1
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 335a816e9ece..2af391f907cc 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -219,7 +219,7 @@ define i32 @test13(i8** %esc) {
ret i32 %1
}
-@globalalias = internal alias [60 x i8]* @a
+@globalalias = internal alias [60 x i8], [60 x i8]* @a
; CHECK-LABEL: @test18(
; CHECK-NEXT: ret i32 60
@@ -229,7 +229,7 @@ define i32 @test18() {
ret i32 %1
}
-@globalalias2 = weak alias [60 x i8]* @a
+@globalalias2 = weak alias [60 x i8], [60 x i8]* @a
; CHECK-LABEL: @test19(
; CHECK: llvm.objectsize
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index b91a5954d97e..a2bc4e7d9832 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -182,7 +182,7 @@ define i1 @test19(i32 %A) {
%D = or i1 %B, %C
ret i1 %D
; CHECK-LABEL: @test19(
-; CHECK: and i32
+; CHECK: or i32
; CHECK: icmp eq
; CHECK: ret i1
}
diff --git a/test/Transforms/InstCombine/phi-load-metadata-2.ll b/test/Transforms/InstCombine/phi-load-metadata-2.ll
new file mode 100644
index 000000000000..cfbf2dea8a7a
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-load-metadata-2.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @bar()
+declare void @baz()
+
+; Check that dereferenceable metadata is combined
+; CHECK-LABEL: cont:
+; CHECK: load i32*, i32**
+; CHECK-SAME: !dereferenceable ![[DEREF:[0-9]+]]
+define i32* @test_phi_combine_load_metadata(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) {
+ br i1 %c, label %t, label %f
+t:
+ call void @bar()
+ %v1 = load i32*, i32** %p1, align 8, !dereferenceable !0
+ br label %cont
+
+f:
+ call void @baz()
+ %v2 = load i32*, i32** %p2, align 8, !dereferenceable !1
+ br label %cont
+
+cont:
+ %res = phi i32* [ %v1, %t ], [ %v2, %f ]
+ ret i32* %res
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/InstCombine/phi-load-metadata-3.ll b/test/Transforms/InstCombine/phi-load-metadata-3.ll
new file mode 100644
index 000000000000..39049c9c7181
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-load-metadata-3.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @bar()
+declare void @baz()
+
+; Check that dereferenceable_or_null metadata is combined
+; CHECK-LABEL: cont:
+; CHECK: load i32*, i32**
+; CHECK-SAME: !dereferenceable_or_null ![[DEREF:[0-9]+]]
+define i32* @test_phi_combine_load_metadata(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) {
+ br i1 %c, label %t, label %f
+t:
+ call void @bar()
+ %v1 = load i32*, i32** %p1, align 8, !dereferenceable_or_null !0
+ br label %cont
+
+f:
+ call void @baz()
+ %v2 = load i32*, i32** %p2, align 8, !dereferenceable_or_null !1
+ br label %cont
+
+cont:
+ %res = phi i32* [ %v1, %t ], [ %v2, %f ]
+ ret i32* %res
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/InstCombine/phi-load-metadata.ll b/test/Transforms/InstCombine/phi-load-metadata.ll
new file mode 100644
index 000000000000..004a355ca441
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-load-metadata.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @bar()
+declare void @baz()
+
+; Check that align metadata is combined
+; CHECK-LABEL: cont:
+; CHECK: load i32*, i32**
+; CHECK-SAME: !align ![[ALIGN:[0-9]+]]
+define i32* @test_phi_combine_load_metadata(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) {
+ br i1 %c, label %t, label %f
+t:
+ call void @bar()
+ %v1 = load i32*, i32** %p1, align 8, !align !0
+ br label %cont
+
+f:
+ call void @baz()
+ %v2 = load i32*, i32** %p2, align 8, !align !1
+ br label %cont
+
+cont:
+ %res = phi i32* [ %v1, %t ], [ %v2, %f ]
+ ret i32* %res
+}
+
+; CHECK: ![[ALIGN]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index 54cc4cfe4594..d0441d76d399 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -630,3 +630,133 @@ done:
%y = phi i32 [ undef, %entry ]
ret i32 %y
}
+
+; We should be able to fold the zexts to the other side of the phi
+; even though there's a constant value input to the phi. This is
+; because we can shrink that constant to the smaller phi type.
+
+define i1 @PR24766(i8 %x1, i8 %x2, i8 %condition) {
+entry:
+ %conv = sext i8 %condition to i32
+ switch i32 %conv, label %epilog [
+ i32 0, label %sw1
+ i32 1, label %sw2
+ ]
+
+sw1:
+ %cmp1 = icmp eq i8 %x1, %x2
+ %frombool1 = zext i1 %cmp1 to i8
+ br label %epilog
+
+sw2:
+ %cmp2 = icmp sle i8 %x1, %x2
+ %frombool2 = zext i1 %cmp2 to i8
+ br label %epilog
+
+epilog:
+ %conditionMet = phi i8 [ 0, %entry ], [ %frombool2, %sw2 ], [ %frombool1, %sw1 ]
+ %tobool = icmp ne i8 %conditionMet, 0
+ ret i1 %tobool
+
+; CHECK-LABEL: @PR24766(
+; CHECK: %[[RES:.*]] = phi i1 [ false, %entry ], [ %cmp2, %sw2 ], [ %cmp1, %sw1 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
+; Same as above (a phi with more than 2 operands), but no constants
+
+define i1 @PR24766_no_constants(i8 %x1, i8 %x2, i8 %condition, i1 %another_condition) {
+entry:
+ %frombool0 = zext i1 %another_condition to i8
+ %conv = sext i8 %condition to i32
+ switch i32 %conv, label %epilog [
+ i32 0, label %sw1
+ i32 1, label %sw2
+ ]
+
+sw1:
+ %cmp1 = icmp eq i8 %x1, %x2
+ %frombool1 = zext i1 %cmp1 to i8
+ br label %epilog
+
+sw2:
+ %cmp2 = icmp sle i8 %x1, %x2
+ %frombool2 = zext i1 %cmp2 to i8
+ br label %epilog
+
+epilog:
+ %conditionMet = phi i8 [ %frombool0, %entry ], [ %frombool2, %sw2 ], [ %frombool1, %sw1 ]
+ %tobool = icmp ne i8 %conditionMet, 0
+ ret i1 %tobool
+
+; CHECK-LABEL: @PR24766_no_constants(
+; CHECK: %[[RES:.*]] = phi i1 [ %another_condition, %entry ], [ %cmp2, %sw2 ], [ %cmp1, %sw1 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
+; Same as above (a phi with more than 2 operands), but two constants
+
+define i1 @PR24766_two_constants(i8 %x1, i8 %x2, i8 %condition) {
+entry:
+ %conv = sext i8 %condition to i32
+ switch i32 %conv, label %epilog [
+ i32 0, label %sw1
+ i32 1, label %sw2
+ ]
+
+sw1:
+ %cmp1 = icmp eq i8 %x1, %x2
+ %frombool1 = zext i1 %cmp1 to i8
+ br label %epilog
+
+sw2:
+ %cmp2 = icmp sle i8 %x1, %x2
+ %frombool2 = zext i1 %cmp2 to i8
+ br label %epilog
+
+epilog:
+ %conditionMet = phi i8 [ 0, %entry ], [ 1, %sw2 ], [ %frombool1, %sw1 ]
+ %tobool = icmp ne i8 %conditionMet, 0
+ ret i1 %tobool
+
+; CHECK-LABEL: @PR24766_two_constants(
+; CHECK: %[[RES:.*]] = phi i1 [ false, %entry ], [ true, %sw2 ], [ %cmp1, %sw1 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
+; Same as above (a phi with more than 2 operands), but two constants and two variables
+
+define i1 @PR24766_two_constants_two_var(i8 %x1, i8 %x2, i8 %condition) {
+entry:
+ %conv = sext i8 %condition to i32
+ switch i32 %conv, label %epilog [
+ i32 0, label %sw1
+ i32 1, label %sw2
+ i32 2, label %sw3
+ ]
+
+sw1:
+ %cmp1 = icmp eq i8 %x1, %x2
+ %frombool1 = zext i1 %cmp1 to i8
+ br label %epilog
+
+sw2:
+ %cmp2 = icmp sle i8 %x1, %x2
+ %frombool2 = zext i1 %cmp2 to i8
+ br label %epilog
+
+sw3:
+ %cmp3 = icmp sge i8 %x1, %x2
+ %frombool3 = zext i1 %cmp3 to i8
+ br label %epilog
+
+epilog:
+ %conditionMet = phi i8 [ 0, %entry ], [ %frombool2, %sw2 ], [ %frombool1, %sw1 ], [ 1, %sw3 ]
+ %tobool = icmp ne i8 %conditionMet, 0
+ ret i1 %tobool
+
+; CHECK-LABEL: @PR24766_two_constants_two_var(
+; CHECK: %[[RES:.*]] = phi i1 [ false, %entry ], [ %cmp2, %sw2 ], [ %cmp1, %sw1 ], [ true, %sw3 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
index fb3b7d796160..f2b56fd33d64 100644
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -6,6 +6,8 @@
; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-EXP10
; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-EXP10
; RUN: opt -instcombine -S < %s -mtriple=x86_64-netbsd | FileCheck %s --check-prefix=CHECK-NO-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-tvos9.0 | FileCheck %s --check-prefix=CHECK-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-watchos2.0 | FileCheck %s --check-prefix=CHECK-EXP10
; rdar://7251832
; NOTE: The readonly attribute on the pow call should be preserved
diff --git a/test/Transforms/InstCombine/pow-4.ll b/test/Transforms/InstCombine/pow-4.ll
new file mode 100644
index 000000000000..76ef4c5de923
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-4.ll
@@ -0,0 +1,120 @@
+; Test that the pow library call simplifier works correctly.
+
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; Function Attrs: nounwind readnone
+declare double @llvm.pow.f64(double, double)
+declare float @llvm.pow.f32(float, float)
+
+; pow(x, 4.0f)
+define float @test_simplify_4f(float %x) #0 {
+; CHECK-LABEL: @test_simplify_4f(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul float %x, %x
+; CHECK-NEXT: %2 = fmul float %1, %1
+; CHECK-NEXT: ret float %2
+ %1 = call float @llvm.pow.f32(float %x, float 4.000000e+00)
+ ret float %1
+}
+
+; pow(x, 3.0)
+define double @test_simplify_3(double %x) #0 {
+; CHECK-LABEL: @test_simplify_3(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %x
+; CHECK-NEXT: ret double %2
+ %1 = call double @llvm.pow.f64(double %x, double 3.000000e+00)
+ ret double %1
+}
+
+; pow(x, 4.0)
+define double @test_simplify_4(double %x) #0 {
+; CHECK-LABEL: @test_simplify_4(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %1
+; CHECK-NEXT: ret double %2
+ %1 = call double @llvm.pow.f64(double %x, double 4.000000e+00)
+ ret double %1
+}
+
+; pow(x, 15.0)
+define double @test_simplify_15(double %x) #0 {
+; CHECK-LABEL: @test_simplify_15(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %x
+; CHECK-NEXT: %3 = fmul double %2, %2
+; CHECK-NEXT: %4 = fmul double %3, %3
+; CHECK-NEXT: %5 = fmul double %2, %4
+; CHECK-NEXT: ret double %5
+ %1 = call double @llvm.pow.f64(double %x, double 1.500000e+01)
+ ret double %1
+}
+
+; pow(x, -7.0)
+define double @test_simplify_neg_7(double %x) #0 {
+; CHECK-LABEL: @test_simplify_neg_7(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %x
+; CHECK-NEXT: %3 = fmul double %1, %2
+; CHECK-NEXT: %4 = fmul double %1, %3
+; CHECK-NEXT: %5 = fdiv double 1.000000e+00, %4
+; CHECK-NEXT: ret double %5
+ %1 = call double @llvm.pow.f64(double %x, double -7.000000e+00)
+ ret double %1
+}
+
+; pow(x, -19.0)
+define double @test_simplify_neg_19(double %x) #0 {
+; CHECK-LABEL: @test_simplify_neg_19(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %1
+; CHECK-NEXT: %3 = fmul double %2, %2
+; CHECK-NEXT: %4 = fmul double %3, %3
+; CHECK-NEXT: %5 = fmul double %1, %4
+; CHECK-NEXT: %6 = fmul double %5, %x
+; CHECK-NEXT: %7 = fdiv double 1.000000e+00, %6
+; CHECK-NEXT: ret double %7
+ %1 = call double @llvm.pow.f64(double %x, double -1.900000e+01)
+ ret double %1
+}
+
+; pow(x, 11.23)
+define double @test_simplify_11_23(double %x) #0 {
+; CHECK-LABEL: @test_simplify_11_23(
+; CHECK-NOT: fmul
+; CHECK-NEXT: %1 = call double @llvm.pow.f64(double %x, double 1.123000e+01)
+; CHECK-NEXT: ret double %1
+ %1 = call double @llvm.pow.f64(double %x, double 1.123000e+01)
+ ret double %1
+}
+
+; pow(x, 32.0)
+define double @test_simplify_32(double %x) #0 {
+; CHECK-LABEL: @test_simplify_32(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %1
+; CHECK-NEXT: %3 = fmul double %2, %2
+; CHECK-NEXT: %4 = fmul double %3, %3
+; CHECK-NEXT: %5 = fmul double %4, %4
+; CHECK-NEXT: ret double %5
+ %1 = call double @llvm.pow.f64(double %x, double 3.200000e+01)
+ ret double %1
+}
+
+; pow(x, 33.0)
+define double @test_simplify_33(double %x) #0 {
+; CHECK-LABEL: @test_simplify_33(
+; CHECK-NOT: fmul
+; CHECK-NEXT: %1 = call double @llvm.pow.f64(double %x, double 3.300000e+01)
+; CHECK-NEXT: ret double %1
+ %1 = call double @llvm.pow.f64(double %x, double 3.300000e+01)
+ ret double %1
+}
+
+attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="true" "use-soft-float"="false" }
diff --git a/test/Transforms/InstCombine/pow-exp-nofastmath.ll b/test/Transforms/InstCombine/pow-exp-nofastmath.ll
new file mode 100644
index 000000000000..9e596fa3a723
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-exp-nofastmath.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mypow(double %x, double %y) #0 {
+entry:
+ %call = call double @exp(double %x)
+ %pow = call double @llvm.pow.f64(double %call, double %y)
+ ret double %pow
+}
+
+; CHECK-LABEL: define double @mypow(
+; CHECK: %call = call double @exp(double %x)
+; CHECK: %pow = call double @llvm.pow.f64(double %call, double %y)
+; CHECK: ret double %pow
+; CHECK: }
+
+declare double @exp(double) #1
+declare double @llvm.pow.f64(double, double)
diff --git a/test/Transforms/InstCombine/pow-exp.ll b/test/Transforms/InstCombine/pow-exp.ll
new file mode 100644
index 000000000000..acc512734ec5
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-exp.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mypow(double %x, double %y) #0 {
+entry:
+ %call = call double @exp(double %x)
+ %pow = call double @llvm.pow.f64(double %call, double %y)
+ ret double %pow
+}
+
+; CHECK-LABEL: define double @mypow(
+; CHECK: %mul = fmul fast double %x, %y
+; CHECK: %exp = call fast double @exp(double %mul) #0
+; CHECK: ret double %exp
+; CHECK: }
+
+define double @test2(double ()* %fptr, double %p1) #0 {
+ %call1 = call double %fptr()
+ %pow = call double @llvm.pow.f64(double %call1, double %p1)
+ ret double %pow
+}
+
+; CHECK-LABEL: @test2
+; CHECK: llvm.pow.f64
+
+declare double @exp(double) #1
+declare double @llvm.pow.f64(double, double)
+attributes #0 = { "unsafe-fp-math"="true" }
+attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/pow-exp2.ll b/test/Transforms/InstCombine/pow-exp2.ll
new file mode 100644
index 000000000000..c42cab391e64
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-exp2.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mypow(double %x, double %y) #0 {
+entry:
+ %call = call double @exp2(double %x)
+ %pow = call double @llvm.pow.f64(double %call, double %y)
+ ret double %pow
+}
+
+; CHECK-LABEL: define double @mypow(
+; CHECK: %mul = fmul fast double %x, %y
+; CHECK: %exp2 = call fast double @exp2(double %mul) #0
+; CHECK: ret double %exp2
+; CHECK: }
+
+declare double @exp2(double) #1
+declare double @llvm.pow.f64(double, double)
+attributes #0 = { "unsafe-fp-math"="true" }
+attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/pow-sqrt.ll b/test/Transforms/InstCombine/pow-sqrt.ll
new file mode 100644
index 000000000000..8fc74e4a0024
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-sqrt.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mypow(double %x) #0 {
+entry:
+ %pow = call double @llvm.pow.f64(double %x, double 5.000000e-01)
+ ret double %pow
+}
+
+; CHECK-LABEL: define double @mypow(
+; CHECK: %sqrt = call double @sqrt(double %x) #1
+; CHECK: ret double %sqrt
+; CHECK: }
+
+declare double @llvm.pow.f64(double, double)
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/pr20059.ll b/test/Transforms/InstCombine/pr20059.ll
deleted file mode 100644
index 0ef315936ff2..000000000000
--- a/test/Transforms/InstCombine/pr20059.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt -S -instcombine < %s | FileCheck %s
-
-; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed
-; for an srem operation. This is not a valid optimization because it may cause a trap
-; on div-by-zero.
-
-; CHECK-LABEL: @do_not_reorder
-; CHECK: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2
-define <4 x i32> @do_not_reorder(<4 x i32> %p1, <4 x i32> %p2) {
- %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
- %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
- %retval = srem <4 x i32> %splat1, %splat2
- ret <4 x i32> %retval
-}
diff --git a/test/Transforms/InstCombine/pr24605.ll b/test/Transforms/InstCombine/pr24605.ll
new file mode 100644
index 000000000000..4b7b36137e6a
--- /dev/null
+++ b/test/Transforms/InstCombine/pr24605.ll
@@ -0,0 +1,15 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i1 @f(i8* %a, i8 %b) {
+; CHECK-LABEL: @f(
+entry:
+ %or = or i8 %b, -117
+ %sub = add i8 %or, -1
+ store i8 %sub, i8* %a, align 1
+ %cmp = icmp ugt i8 %or, %sub
+ ret i1 %cmp
+; CHECK: ret i1 true
+}
diff --git a/test/Transforms/InstCombine/pr25745.ll b/test/Transforms/InstCombine/pr25745.ll
new file mode 100644
index 000000000000..3bf9efc92b90
--- /dev/null
+++ b/test/Transforms/InstCombine/pr25745.ll
@@ -0,0 +1,20 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; Checking for a crash
+
+declare void @use.i1(i1 %val)
+declare void @use.i64(i64 %val)
+
+define i64 @f(i32 %x) {
+; CHECK-LABEL: @f(
+ entry:
+ %x.wide = sext i32 %x to i64
+ %minus.x = sub i32 0, %x
+ %minus.x.wide = sext i32 %minus.x to i64
+ %c = icmp slt i32 %x, 0
+ %val = select i1 %c, i64 %x.wide, i64 %minus.x.wide
+ call void @use.i1(i1 %c)
+ call void @use.i64(i64 %x.wide)
+ ret i64 %val
+; CHECK: ret i64 %val
+}
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 79c2ae28105e..0b5b5deb68c5 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -575,7 +575,7 @@ entry:
; CHECK: %0 = shl i8 %tmp4, 2
; CHECK: %tmp54 = and i8 %0, 16
%tmp55 = xor i8 %tmp54, %tmp51
-; CHECK: ret i8 %tmp55.1
+; CHECK: ret i8 %tmp551
ret i8 %tmp55
}
@@ -743,7 +743,7 @@ define i32 @test57(i32 %x) {
%or = or i32 %shl, 7
ret i32 %or
; CHECK-LABEL: @test57(
-; CHECK: %shl = shl i32 %shr.1, 4
+; CHECK: %shl = shl i32 %shr1, 4
}
diff --git a/test/Transforms/InstCombine/sincospi.ll b/test/Transforms/InstCombine/sincospi.ll
index f49fb35cb76a..10342c500961 100644
--- a/test/Transforms/InstCombine/sincospi.ll
+++ b/test/Transforms/InstCombine/sincospi.ll
@@ -90,3 +90,12 @@ define double @test_constant_f64() {
; CHECK-NO-SINCOS: call double @__sinpi
; CHECK-NO-SINCOS: call double @__cospi
}
+
+define double @test_fptr(double (double)* %fptr, double %p1) {
+ %sin = call double @__sinpi(double %p1) #0
+ %cos = call double %fptr(double %p1)
+ %res = fadd double %sin, %cos
+ ret double %res
+; CHECK-LABEL: @test_fptr
+; CHECK: __sinpi
+}
diff --git a/test/Transforms/InstCombine/sqrt-nofast.ll b/test/Transforms/InstCombine/sqrt-nofast.ll
new file mode 100644
index 000000000000..0d1dfc1542a5
--- /dev/null
+++ b/test/Transforms/InstCombine/sqrt-nofast.ll
@@ -0,0 +1,25 @@
+; Check that we skip transformations if the attribute unsafe-fp-math
+; is not set.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @mysqrt(float %x, float %y) #0 {
+entry:
+ %x.addr = alloca float, align 4
+ %y.addr = alloca float, align 4
+ store float %x, float* %x.addr, align 4
+ store float %y, float* %y.addr, align 4
+ %0 = load float, float* %x.addr, align 4
+ %1 = load float, float* %x.addr, align 4
+ %mul = fmul fast float %0, %1
+ %2 = call float @llvm.sqrt.f32(float %mul)
+ ret float %2
+}
+
+declare float @llvm.sqrt.f32(float) #1
+
+; CHECK: define float @mysqrt(float %x, float %y) {
+; CHECK: entry:
+; CHECK: %mul = fmul fast float %x, %x
+; CHECK: %0 = call float @llvm.sqrt.f32(float %mul)
+; CHECK: ret float %0
+; CHECK: }
diff --git a/test/Transforms/InstCombine/statepoint.ll b/test/Transforms/InstCombine/statepoint.ll
index f904f207bfdc..54fb6a7756ff 100644
--- a/test/Transforms/InstCombine/statepoint.ll
+++ b/test/Transforms/InstCombine/statepoint.ll
@@ -7,8 +7,8 @@ declare void @func()
define i1 @test_negative(i32 addrspace(1)* %p) gc "statepoint-example" {
entry:
- %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
- %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+ %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
%cmp = icmp eq i32 addrspace(1)* %pnew, null
ret i1 %cmp
; CHECK-LABEL: test_negative
@@ -18,8 +18,8 @@ entry:
define i1 @test_nonnull(i32 addrspace(1)* nonnull %p) gc "statepoint-example" {
entry:
- %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
- %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+ %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
%cmp = icmp eq i32 addrspace(1)* %pnew, null
ret i1 %cmp
; CHECK-LABEL: test_nonnull
@@ -28,8 +28,8 @@ entry:
define i1 @test_null() gc "statepoint-example" {
entry:
- %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* null)
- %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* null)
+ %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
%cmp = icmp eq i32 addrspace(1)* %pnew, null
ret i1 %cmp
; CHECK-LABEL: test_null
@@ -39,8 +39,8 @@ entry:
define i1 @test_undef() gc "statepoint-example" {
entry:
- %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* undef)
- %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* undef)
+ %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
%cmp = icmp eq i32 addrspace(1)* %pnew, null
ret i1 %cmp
; CHECK-LABEL: test_undef
@@ -48,5 +48,5 @@ entry:
; CHECK: ret i1 undef
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #3
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index 5dfbd7140901..b8730413f1b5 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -113,6 +113,119 @@ for.end: ; preds = %for.cond
; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
}
+define void @dse1(i32* %p) {
+; CHECK-LABEL: dse1
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 0, i32* %p
+ store i32 0, i32* %p
+ ret void
+}
+
+; Slightly subtle: if we're mixing atomic and non-atomic access to the
+; same location, then the contents of the location are undefined if there's
+; an actual race. As such, we're free to pick either store under the
+; assumption that we're not racing with any other thread.
+define void @dse2(i32* %p) {
+; CHECK-LABEL: dse2
+; CHECK-NEXT: store i32 0, i32* %p
+; CHECK-NEXT: ret
+ store atomic i32 0, i32* %p unordered, align 4
+ store i32 0, i32* %p
+ ret void
+}
+
+define void @dse3(i32* %p) {
+; CHECK-LABEL: dse3
+; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4
+; CHECK-NEXT: ret
+ store i32 0, i32* %p
+ store atomic i32 0, i32* %p unordered, align 4
+ ret void
+}
+
+define void @dse4(i32* %p) {
+; CHECK-LABEL: dse4
+; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4
+; CHECK-NEXT: ret
+ store atomic i32 0, i32* %p unordered, align 4
+ store atomic i32 0, i32* %p unordered, align 4
+ ret void
+}
+
+; Implementation limit - could remove unordered store here, but
+; currently don't.
+define void @dse5(i32* %p) {
+; CHECK-LABEL: dse5
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store atomic i32 0, i32* %p unordered, align 4
+ store atomic i32 0, i32* %p seq_cst, align 4
+ ret void
+}
+
+define void @write_back1(i32* %p) {
+; CHECK-LABEL: write_back1
+; CHECK-NEXT: ret
+ %v = load i32, i32* %p
+ store i32 %v, i32* %p
+ ret void
+}
+
+define void @write_back2(i32* %p) {
+; CHECK-LABEL: write_back2
+; CHECK-NEXT: ret
+ %v = load atomic i32, i32* %p unordered, align 4
+ store i32 %v, i32* %p
+ ret void
+}
+
+define void @write_back3(i32* %p) {
+; CHECK-LABEL: write_back3
+; CHECK-NEXT: ret
+ %v = load i32, i32* %p
+ store atomic i32 %v, i32* %p unordered, align 4
+ ret void
+}
+
+define void @write_back4(i32* %p) {
+; CHECK-LABEL: write_back4
+; CHECK-NEXT: ret
+ %v = load atomic i32, i32* %p unordered, align 4
+ store atomic i32 %v, i32* %p unordered, align 4
+ ret void
+}
+
+; Can't remove store due to ordering side effect
+define void @write_back5(i32* %p) {
+; CHECK-LABEL: write_back5
+; CHECK-NEXT: load
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ %v = load atomic i32, i32* %p unordered, align 4
+ store atomic i32 %v, i32* %p seq_cst, align 4
+ ret void
+}
+
+define void @write_back6(i32* %p) {
+; CHECK-LABEL: write_back6
+; CHECK-NEXT: load
+; CHECK-NEXT: ret
+ %v = load atomic i32, i32* %p seq_cst, align 4
+ store atomic i32 %v, i32* %p unordered, align 4
+ ret void
+}
+
+define void @write_back7(i32* %p) {
+; CHECK-LABEL: write_back7
+; CHECK-NEXT: load
+; CHECK-NEXT: ret
+ %v = load atomic volatile i32, i32* %p seq_cst, align 4
+ store atomic i32 %v, i32* %p unordered, align 4
+ ret void
+}
+
!0 = !{!4, !4, i64 0}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/Transforms/InstCombine/strto-1.ll b/test/Transforms/InstCombine/strto-1.ll
index fc35dddcae5a..96f36e8d89c7 100644
--- a/test/Transforms/InstCombine/strto-1.ll
+++ b/test/Transforms/InstCombine/strto-1.ll
@@ -1,6 +1,6 @@
; Test that the strto* library call simplifiers works correctly.
;
-; RUN: opt < %s -instcombine -functionattrs -S | FileCheck %s
+; RUN: opt < %s -instcombine -inferattrs -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/InstCombine/tan-nofastmath.ll b/test/Transforms/InstCombine/tan-nofastmath.ll
new file mode 100644
index 000000000000..0fe7b2c1d522
--- /dev/null
+++ b/test/Transforms/InstCombine/tan-nofastmath.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @mytan(float %x) {
+entry:
+ %call = call float @atanf(float %x)
+ %call1 = call float @tanf(float %call)
+ ret float %call1
+}
+
+; CHECK-LABEL: define float @mytan(
+; CHECK: %call = call float @atanf(float %x)
+; CHECK-NEXT: %call1 = call float @tanf(float %call)
+; CHECK-NEXT: ret float %call1
+; CHECK-NEXT: }
+
+declare float @tanf(float)
+declare float @atanf(float)
diff --git a/test/Transforms/InstCombine/tan.ll b/test/Transforms/InstCombine/tan.ll
new file mode 100644
index 000000000000..15a832f253a9
--- /dev/null
+++ b/test/Transforms/InstCombine/tan.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @mytan(float %x) #0 {
+entry:
+ %call = call float @atanf(float %x)
+ %call1 = call float @tanf(float %call)
+ ret float %call1
+}
+
+; CHECK-LABEL: define float @mytan(
+; CHECK: ret float %x
+
+define float @test2(float ()* %fptr) #0 {
+ %call1 = call float %fptr()
+ %tan = call float @tanf(float %call1)
+ ret float %tan
+}
+
+; CHECK-LABEL: @test2
+; CHECK: tanf
+
+declare float @tanf(float) #0
+declare float @atanf(float) #0
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/token.ll b/test/Transforms/InstCombine/token.ll
new file mode 100644
index 000000000000..0929cf7ebee1
--- /dev/null
+++ b/test/Transforms/InstCombine/token.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+declare i32 @__CxxFrameHandler3(...)
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+bb:
+ unreachable
+
+unreachable:
+ %cl = cleanuppad within none []
+ cleanupret from %cl unwind to caller
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: unreachable:
+; CHECK: %cl = cleanuppad within none []
+; CHECK: cleanupret from %cl unwind to caller
+
+define void @test2(i8 %A, i8 %B) personality i32 (...)* @__CxxFrameHandler3 {
+bb:
+ %X = zext i8 %A to i32
+ invoke void @g(i32 0)
+ to label %cont
+ unwind label %catch
+
+cont:
+ %Y = zext i8 %B to i32
+ invoke void @g(i32 0)
+ to label %unreachable
+ unwind label %catch
+
+catch:
+ %phi = phi i32 [ %X, %bb ], [ %Y, %cont ]
+ %cs = catchswitch within none [label %doit] unwind to caller
+
+doit:
+ %cl = catchpad within %cs []
+ call void @g(i32 %phi)
+ unreachable
+
+unreachable:
+ unreachable
+}
+
+; CHECK-LABEL: define void @test2(
+; CHECK: %X = zext i8 %A to i32
+; CHECK: %Y = zext i8 %B to i32
+; CHECK: %phi = phi i32 [ %X, %bb ], [ %Y, %cont ]
+
+define void @test3(i8 %A, i8 %B) personality i32 (...)* @__CxxFrameHandler3 {
+bb:
+ %X = zext i8 %A to i32
+ invoke void @g(i32 0)
+ to label %cont
+ unwind label %catch
+
+cont:
+ %Y = zext i8 %B to i32
+ invoke void @g(i32 0)
+ to label %cont2
+ unwind label %catch
+
+cont2:
+ invoke void @g(i32 0)
+ to label %unreachable
+ unwind label %catch
+
+catch:
+ %phi = phi i32 [ %X, %bb ], [ %Y, %cont ], [ %Y, %cont2 ]
+ %cs = catchswitch within none [label %doit] unwind to caller
+
+doit:
+ %cl = catchpad within %cs []
+ call void @g(i32 %phi)
+ unreachable
+
+unreachable:
+ unreachable
+}
+
+; CHECK-LABEL: define void @test3(
+; CHECK: %X = zext i8 %A to i32
+; CHECK: %Y = zext i8 %B to i32
+; CHECK: %phi = phi i32 [ %X, %bb ], [ %Y, %cont ], [ %Y, %cont2 ]
+
+
+declare void @g(i32)
diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll
index ee81cf8c3c5d..38f6b2804d63 100644
--- a/test/Transforms/InstCombine/trunc.ll
+++ b/test/Transforms/InstCombine/trunc.ll
@@ -118,3 +118,45 @@ define i8 @test10(i32 %X) {
; CHECK: and
; CHECK: ret
}
+
+; PR25543
+; https://llvm.org/bugs/show_bug.cgi?id=25543
+; This is an extractelement.
+
+define i32 @trunc_bitcast1(<4 x i32> %v) {
+ %bc = bitcast <4 x i32> %v to i128
+ %shr = lshr i128 %bc, 32
+ %ext = trunc i128 %shr to i32
+ ret i32 %ext
+
+; CHECK-LABEL: @trunc_bitcast1(
+; CHECK-NEXT: %ext = extractelement <4 x i32> %v, i32 1
+; CHECK-NEXT: ret i32 %ext
+}
+
+; A bitcast may still be required.
+
+define i32 @trunc_bitcast2(<2 x i64> %v) {
+ %bc = bitcast <2 x i64> %v to i128
+ %shr = lshr i128 %bc, 64
+ %ext = trunc i128 %shr to i32
+ ret i32 %ext
+
+; CHECK-LABEL: @trunc_bitcast2(
+; CHECK-NEXT: %bc1 = bitcast <2 x i64> %v to <4 x i32>
+; CHECK-NEXT: %ext = extractelement <4 x i32> %bc1, i32 2
+; CHECK-NEXT: ret i32 %ext
+}
+
+; The right shift is optional.
+
+define i32 @trunc_bitcast3(<4 x i32> %v) {
+ %bc = bitcast <4 x i32> %v to i128
+ %ext = trunc i128 %bc to i32
+ ret i32 %ext
+
+; CHECK-LABEL: @trunc_bitcast3(
+; CHECK-NEXT: %ext = extractelement <4 x i32> %v, i32 0
+; CHECK-NEXT: ret i32 %ext
+}
+
diff --git a/test/Transforms/InstCombine/unpack-fca.ll b/test/Transforms/InstCombine/unpack-fca.ll
index 48bb157956aa..9b8d10457491 100644
--- a/test/Transforms/InstCombine/unpack-fca.ll
+++ b/test/Transforms/InstCombine/unpack-fca.ll
@@ -5,110 +5,134 @@ target triple = "x86_64-unknown-linux-gnu"
%A__vtbl = type { i8*, i32 (%A*)* }
%A = type { %A__vtbl* }
+%B = type { i8*, i64 }
@A__vtblZ = constant %A__vtbl { i8* null, i32 (%A*)* @A.foo }
declare i32 @A.foo(%A* nocapture %this)
-declare i8* @allocmemory(i64)
-
-define void @storeA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to %A*
+define void @storeA(%A* %a.ptr) {
; CHECK-LABEL: storeA
-; CHECK: store %A__vtbl* @A__vtblZ
- store %A { %A__vtbl* @A__vtblZ }, %A* %1, align 8
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds %A, %A* %a.ptr, i64 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+ store %A { %A__vtbl* @A__vtblZ }, %A* %a.ptr, align 8
+ ret void
+}
+
+define void @storeB(%B* %b.ptr) {
+; CHECK-LABEL: storeB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0
+; CHECK-NEXT: store i8* null, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1
+; CHECK-NEXT: store i64 42, i64* [[GEP2]], align 8
+; CHECK-NEXT: ret void
+ store %B { i8* null, i64 42 }, %B* %b.ptr, align 8
ret void
}
-define void @storeStructOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to { %A }*
+define void @storeStructOfA({ %A }* %sa.ptr) {
; CHECK-LABEL: storeStructOfA
-; CHECK: store %A__vtbl* @A__vtblZ
- store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %1, align 8
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+ store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %sa.ptr, align 8
ret void
}
-define void @storeArrayOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to [1 x %A]*
+define void @storeArrayOfA([1 x %A]* %aa.ptr) {
; CHECK-LABEL: storeArrayOfA
-; CHECK: store %A__vtbl* @A__vtblZ
- store [1 x %A] [%A { %A__vtbl* @A__vtblZ }], [1 x %A]* %1, align 8
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+ store [1 x %A] [%A { %A__vtbl* @A__vtblZ }], [1 x %A]* %aa.ptr, align 8
ret void
}
-define void @storeStructOfArrayOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to { [1 x %A] }*
+define void @storeStructOfArrayOfA({ [1 x %A] }* %saa.ptr) {
; CHECK-LABEL: storeStructOfArrayOfA
-; CHECK: store %A__vtbl* @A__vtblZ
- store { [1 x %A] } { [1 x %A] [%A { %A__vtbl* @A__vtblZ }] }, { [1 x %A] }* %1, align 8
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+ store { [1 x %A] } { [1 x %A] [%A { %A__vtbl* @A__vtblZ }] }, { [1 x %A] }* %saa.ptr, align 8
ret void
}
-define %A @loadA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to %A*
+define %A @loadA(%A* %a.ptr) {
; CHECK-LABEL: loadA
-; CHECK: load %A__vtbl*,
-; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
- %2 = load %A, %A* %1, align 8
- ret %A %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds %A, %A* %a.ptr, i64 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: ret %A [[IV]]
+ %1 = load %A, %A* %a.ptr, align 8
+ ret %A %1
}
-define { %A } @loadStructOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to { %A }*
+define %B @loadB(%B* %b.ptr) {
+; CHECK-LABEL: loadB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0
+; CHECK-NEXT: [[LOAD1:%[a-z0-9\.]+]] = load i8*, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %B undef, i8* [[LOAD1]], 0
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1
+; CHECK-NEXT: [[LOAD2:%[a-z0-9\.]+]] = load i64, i64* [[GEP2]], align 8
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue %B [[IV1]], i64 [[LOAD2]], 1
+; CHECK-NEXT: ret %B [[IV2]]
+ %1 = load %B, %B* %b.ptr, align 8
+ ret %B %1
+}
+
+define { %A } @loadStructOfA({ %A }* %sa.ptr) {
; CHECK-LABEL: loadStructOfA
-; CHECK: load %A__vtbl*,
-; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
-; CHECK: insertvalue { %A } undef, %A {{.*}}, 0
- %2 = load { %A }, { %A }* %1, align 8
- ret { %A } %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue { %A } undef, %A [[IV1]], 0
+; CHECK-NEXT: ret { %A } [[IV2]]
+ %1 = load { %A }, { %A }* %sa.ptr, align 8
+ ret { %A } %1
}
-define [1 x %A] @loadArrayOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to [1 x %A]*
+define [1 x %A] @loadArrayOfA([1 x %A]* %aa.ptr) {
; CHECK-LABEL: loadArrayOfA
-; CHECK: load %A__vtbl*,
-; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
-; CHECK: insertvalue [1 x %A] undef, %A {{.*}}, 0
- %2 = load [1 x %A], [1 x %A]* %1, align 8
- ret [1 x %A] %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue [1 x %A] undef, %A [[IV1]], 0
+; CHECK-NEXT: ret [1 x %A] [[IV2]]
+ %1 = load [1 x %A], [1 x %A]* %aa.ptr, align 8
+ ret [1 x %A] %1
}
-define { [1 x %A] } @loadStructOfArrayOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to { [1 x %A] }*
+define { [1 x %A] } @loadStructOfArrayOfA({ [1 x %A] }* %saa.ptr) {
; CHECK-LABEL: loadStructOfArrayOfA
-; CHECK: load %A__vtbl*,
-; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
-; CHECK: insertvalue [1 x %A] undef, %A {{.*}}, 0
-; CHECK: insertvalue { [1 x %A] } undef, [1 x %A] {{.*}}, 0
- %2 = load { [1 x %A] }, { [1 x %A] }* %1, align 8
- ret { [1 x %A] } %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue [1 x %A] undef, %A [[IV1]], 0
+; CHECK-NEXT: [[IV3:%[a-z0-9\.]+]] = insertvalue { [1 x %A] } undef, [1 x %A] [[IV2]], 0
+; CHECK-NEXT: ret { [1 x %A] } [[IV3]]
+ %1 = load { [1 x %A] }, { [1 x %A] }* %saa.ptr, align 8
+ ret { [1 x %A] } %1
}
-define { %A } @structOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to { %A }*
+define { %A } @structOfA({ %A }* %sa.ptr) {
; CHECK-LABEL: structOfA
-; CHECK: store %A__vtbl* @A__vtblZ
- store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %1, align 8
- %2 = load { %A }, { %A }* %1, align 8
-; CHECK-NOT: load
-; CHECK: ret { %A } { %A { %A__vtbl* @A__vtblZ } }
- ret { %A } %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret { %A } { %A { %A__vtbl* @A__vtblZ } }
+ store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %sa.ptr, align 8
+ %1 = load { %A }, { %A }* %sa.ptr, align 8
+ ret { %A } %1
+}
+
+define %B @structB(%B* %b.ptr) {
+; CHECK-LABEL: structB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0
+; CHECK-NEXT: store i8* null, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1
+; CHECK-NEXT: store i64 42, i64* [[GEP2]], align 8
+; CHECK-NEXT: ret %B { i8* null, i64 42 }
+ store %B { i8* null, i64 42 }, %B* %b.ptr, align 8
+ %1 = load %B, %B* %b.ptr, align 8
+ ret %B %1
}
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 4245c7a3c134..0b9663300c39 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -138,22 +138,6 @@ declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
-; <rdar://problem/6945110>
-define <4 x i32> @kernel3_vertical(<4 x i16> * %src, <8 x i16> * %foo) nounwind {
-entry:
- %tmp = load <4 x i16>, <4 x i16>* %src
- %tmp1 = load <8 x i16>, <8 x i16>* %foo
-; CHECK: %tmp2 = shufflevector
- %tmp2 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; pmovzxwd ignores the upper 64-bits of its input; -instcombine should remove this shuffle:
-; CHECK-NOT: shufflevector
- %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: pmovzxwd
- %0 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
- ret <4 x i32> %0
-}
-declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
-
define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind {
entry:
; CHECK-LABEL: define <4 x float> @dead_shuffle_elt(
@@ -210,130 +194,6 @@ define <4 x float> @test_select(float %f, float %g) {
ret <4 x float> %ret
}
-; We should optimize these two redundant insertqi into one
-; CHECK: define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
-; CHECK-NOT: insertqi
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 32)
- ret <2 x i64> %2
-}
-
-; The result of this insert is the second arg, since the top 64 bits of
-; the result are undefined, and we copy the bottom 64 bits from the
-; second arg
-; CHECK: define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> %i
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
- ret <2 x i64> %1
-}
-
-; Test the several types of ranges and ordering that exist for two insertqi
-; CHECK: define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 16)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 16)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 16)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 16)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 32)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
-; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
-; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> %i
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
- ret <2 x i64> %1
-}
-
-; CHECK: define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> undef
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
- ret <2 x i64> %1
-}
-
-; CHECK: define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> undef
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
- ret <2 x i64> %1
-}
-
-; CHECK: define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> undef
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
- ret <2 x i64> %1
-}
-
-; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
-declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
-
declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
; CHECK-LABEL: @test_vpermilvar_ps(
@@ -394,212 +254,15 @@ define <4 x double> @test_vpermilvar_pd_256_zero(<4 x double> %v) {
ret <4 x double> %a
}
-define <2 x i64> @test_sse2_1() nounwind readnone uwtable {
- %S = bitcast i32 1 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
- %6 = bitcast <8 x i16> %5 to <4 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
- %9 = bitcast <4 x i32> %8 to <2 x i64>
- %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
- %11 = bitcast <2 x i64> %10 to <8 x i16>
- %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
- %13 = bitcast <8 x i16> %12 to <4 x i32>
- %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
- %15 = bitcast <4 x i32> %14 to <2 x i64>
- %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
- ret <2 x i64> %16
-; CHECK: test_sse2_1
-; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
-}
-
-define <4 x i64> @test_avx2_1() nounwind readnone uwtable {
- %S = bitcast i32 1 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
- %6 = bitcast <16 x i16> %5 to <8 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
- %9 = bitcast <8 x i32> %8 to <4 x i64>
- %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
- %11 = bitcast <4 x i64> %10 to <16 x i16>
- %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
- %13 = bitcast <16 x i16> %12 to <8 x i32>
- %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
- %15 = bitcast <8 x i32> %14 to <4 x i64>
- %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
- ret <4 x i64> %16
-; CHECK: test_avx2_1
-; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
-}
-
-define <2 x i64> @test_sse2_0() nounwind readnone uwtable {
- %S = bitcast i32 128 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
- %6 = bitcast <8 x i16> %5 to <4 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
- %9 = bitcast <4 x i32> %8 to <2 x i64>
- %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
- %11 = bitcast <2 x i64> %10 to <8 x i16>
- %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
- %13 = bitcast <8 x i16> %12 to <4 x i32>
- %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
- %15 = bitcast <4 x i32> %14 to <2 x i64>
- %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
- ret <2 x i64> %16
-; CHECK: test_sse2_0
-; CHECK: ret <2 x i64> zeroinitializer
-}
-
-define <4 x i64> @test_avx2_0() nounwind readnone uwtable {
- %S = bitcast i32 128 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
- %6 = bitcast <16 x i16> %5 to <8 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
- %9 = bitcast <8 x i32> %8 to <4 x i64>
- %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
- %11 = bitcast <4 x i64> %10 to <16 x i16>
- %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
- %13 = bitcast <16 x i16> %12 to <8 x i32>
- %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
- %15 = bitcast <8 x i32> %14 to <4 x i64>
- %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
- ret <4 x i64> %16
-; CHECK: test_avx2_0
-; CHECK: ret <4 x i64> zeroinitializer
-}
-define <2 x i64> @test_sse2_psrl_1() nounwind readnone uwtable {
- %S = bitcast i32 1 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
- %6 = bitcast <8 x i16> %5 to <4 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
- %9 = bitcast <4 x i32> %8 to <2 x i64>
- %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
- %11 = bitcast <2 x i64> %10 to <8 x i16>
- %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
- %13 = bitcast <8 x i16> %12 to <4 x i32>
- %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
- %15 = bitcast <4 x i32> %14 to <2 x i64>
- %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
- ret <2 x i64> %16
-; CHECK: test_sse2_psrl_1
-; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
-}
-
-define <4 x i64> @test_avx2_psrl_1() nounwind readnone uwtable {
- %S = bitcast i32 1 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
- %6 = bitcast <16 x i16> %5 to <8 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
- %9 = bitcast <8 x i32> %8 to <4 x i64>
- %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
- %11 = bitcast <4 x i64> %10 to <16 x i16>
- %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
- %13 = bitcast <16 x i16> %12 to <8 x i32>
- %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
- %15 = bitcast <8 x i32> %14 to <4 x i64>
- %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
- ret <4 x i64> %16
-; CHECK: test_avx2_psrl_1
-; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
-}
-
-define <2 x i64> @test_sse2_psrl_0() nounwind readnone uwtable {
- %S = bitcast i32 128 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
- %6 = bitcast <8 x i16> %5 to <4 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
- %9 = bitcast <4 x i32> %8 to <2 x i64>
- %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
- %11 = bitcast <2 x i64> %10 to <8 x i16>
- %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
- %13 = bitcast <8 x i16> %12 to <4 x i32>
- %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
- %15 = bitcast <4 x i32> %14 to <2 x i64>
- %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
- ret <2 x i64> %16
-; CHECK: test_sse2_psrl_0
-; CHECK: ret <2 x i64> zeroinitializer
-}
-
-define <4 x i64> @test_avx2_psrl_0() nounwind readnone uwtable {
- %S = bitcast i32 128 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
- %6 = bitcast <16 x i16> %5 to <8 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
- %9 = bitcast <8 x i32> %8 to <4 x i64>
- %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
- %11 = bitcast <4 x i64> %10 to <16 x i16>
- %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
- %13 = bitcast <16 x i16> %12 to <8 x i32>
- %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
- %15 = bitcast <8 x i32> %14 to <4 x i64>
- %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
- ret <4 x i64> %16
-; CHECK: test_avx2_psrl_0
-; CHECK: ret <4 x i64> zeroinitializer
+define <2 x i64> @PR24922(<2 x i64> %v) {
+; CHECK-LABEL: @PR24922
+; CHECK: select <2 x i1>
+;
+; Check that instcombine doesn't wrongly fold the select statement into a
+; ret <2 x i64> %v
+;
+; FIXME: We should be able to simplify the ConstantExpr in the select mask.
+entry:
+ %result = select <2 x i1> <i1 icmp eq (i64 extractelement (<2 x i64> bitcast (<4 x i32> <i32 15, i32 15, i32 15, i32 15> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer
+ ret <2 x i64> %result
}
-
-declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
-declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
-declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
-declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
-declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
-declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
-declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
-declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
-declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
-declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
-declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
-declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
-declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
-declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
-declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
-declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
-declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
-declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
-declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
-declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
-declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
-declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
-declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
-declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
-
-attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index d4d7f167ef07..d2cd2b90abc2 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -310,16 +310,16 @@ define <4 x i32> @shuffle_17addnuw(<4 x i32> %v1, <4 x i32> %v2) nounwind uwtabl
ret <4 x i32> %r
}
-define <4 x float> @shuffle_17fsub(<4 x float> %v1, <4 x float> %v2) nounwind uwtable {
-; CHECK-LABEL: @shuffle_17fsub(
-; CHECK-NOT: shufflevector
-; CHECK: fsub <4 x float> %v1, %v2
-; CHECK: shufflevector
+define <4 x float> @shuffle_17fsub_fast(<4 x float> %v1, <4 x float> %v2) nounwind uwtable {
+; CHECK-LABEL: @shuffle_17fsub_fast(
+; CHECK-NEXT: [[VAR1:%[a-zA-Z0-9.]+]] = fsub fast <4 x float> %v1, %v2
+; CHECK-NEXT: shufflevector <4 x float> [[VAR1]], <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT: ret <4 x float>
%t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer,
<4 x i32> <i32 1, i32 2, i32 3, i32 0>
%t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer,
<4 x i32> <i32 1, i32 2, i32 3, i32 0>
- %r = fsub <4 x float> %t1, %t2
+ %r = fsub fast <4 x float> %t1, %t2
ret <4 x float> %r
}
@@ -406,6 +406,21 @@ define i32 @pr19737(<4 x i32> %in0) {
ret i32 %rv
}
+; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed
+; for an srem operation. This is not a valid optimization because it may cause a trap
+; on div-by-zero.
+
+define <4 x i32> @pr20059(<4 x i32> %p1, <4 x i32> %p2) {
+; CHECK-LABEL: @pr20059(
+; CHECK-NEXT: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2
+ %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
+ %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
+ %retval = srem <4 x i32> %splat1, %splat2
+ ret <4 x i32> %retval
+}
+
define <4 x i32> @pr20114(<4 x i32> %__mask) {
; CHECK-LABEL: @pr20114
; CHECK: shufflevector
diff --git a/test/Transforms/InstCombine/vector_gep2.ll b/test/Transforms/InstCombine/vector_gep2.ll
index d76a7d56cc7a..1b80ffd101c9 100644
--- a/test/Transforms/InstCombine/vector_gep2.ll
+++ b/test/Transforms/InstCombine/vector_gep2.ll
@@ -9,3 +9,26 @@ define <2 x i8*> @testa(<2 x i8*> %a) {
; CHECK: getelementptr i8, <2 x i8*> %a, <2 x i64> <i64 0, i64 1>
ret <2 x i8*> %g
}
+
+define <8 x double*> @vgep_s_v8i64(double* %a, <8 x i64>%i) {
+; CHECK-LABEL: @vgep_s_v8i64
+; CHECK: getelementptr double, double* %a, <8 x i64> %i
+ %VectorGep = getelementptr double, double* %a, <8 x i64> %i
+ ret <8 x double*> %VectorGep
+}
+
+define <8 x double*> @vgep_s_v8i32(double* %a, <8 x i32>%i) {
+; CHECK-LABEL: @vgep_s_v8i32
+; CHECK: %1 = sext <8 x i32> %i to <8 x i64>
+; CHECK: getelementptr double, double* %a, <8 x i64> %1
+ %VectorGep = getelementptr double, double* %a, <8 x i32> %i
+ ret <8 x double*> %VectorGep
+}
+
+define <8 x i8*> @vgep_v8iPtr_i32(<8 x i8*> %a, i32 %i) {
+; CHECK-LABEL: @vgep_v8iPtr_i32
+; CHECK: %1 = sext i32 %i to i64
+; CHECK: %VectorGep = getelementptr i8, <8 x i8*> %a, i64 %1
+ %VectorGep = getelementptr i8, <8 x i8*> %a, i32 %i
+ ret <8 x i8*> %VectorGep
+}
diff --git a/test/Transforms/InstCombine/x86-f16c.ll b/test/Transforms/InstCombine/x86-f16c.ll
new file mode 100644
index 000000000000..e10b339907e3
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-f16c.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
+declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
+
+;
+; Vector Demanded Bits
+;
+
+; Only bottom 4 elements required.
+define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
+; CHECK-LABEL: @demand_vcvtph2ps_128
+; CHECK-NEXT: %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A)
+; CHECK-NEXT: ret <4 x float> %1
+ %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
+ ret <4 x float> %2
+}
+
+; All 8 elements required.
+define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) {
+; CHECK-LABEL: @demand_vcvtph2ps_256
+; CHECK-NEXT: %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
+; CHECK-NEXT: ret <8 x float> %2
+ %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
+ ret <8 x float> %2
+}
+
+;
+; Constant Folding
+;
+
+define <4 x float> @fold_vcvtph2ps_128() {
+; CHECK-LABEL: @fold_vcvtph2ps_128
+; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00>
+ %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
+ ret <4 x float> %1
+}
+
+define <8 x float> @fold_vcvtph2ps_256() {
+; CHECK-LABEL: @fold_vcvtph2ps_256
+; CHECK-NEXT: ret <8 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00, float 2.000000e+00, float 6.550400e+04, float -1.000000e+00, float -2.000000e+00>
+ %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
+ ret <8 x float> %1
+}
+
+define <4 x float> @fold_vcvtph2ps_128_zero() {
+; CHECK-LABEL: @fold_vcvtph2ps_128_zero
+; CHECK-NEXT: ret <4 x float> zeroinitializer
+ %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+ ret <4 x float> %1
+}
+
+define <8 x float> @fold_vcvtph2ps_256_zero() {
+; CHECK-LABEL: @fold_vcvtph2ps_256_zero
+; CHECK-NEXT: ret <8 x float> zeroinitializer
+ %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+ ret <8 x float> %1
+}
diff --git a/test/Transforms/InstCombine/x86-pmovsx.ll b/test/Transforms/InstCombine/x86-pmovsx.ll
new file mode 100644
index 000000000000..31bdc59b16a8
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-pmovsx.ll
@@ -0,0 +1,136 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
+
+declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
+declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
+
+;
+; Basic sign extension tests
+;
+
+define <4 x i32> @sse41_pmovsxbd(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxbd
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: sext <4 x i8> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+ %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %v)
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @sse41_pmovsxbq(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxbq
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: sext <2 x i8> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %v)
+ ret <2 x i64> %res
+}
+
+define <8 x i16> @sse41_pmovsxbw(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxbw
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: sext <8 x i8> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+
+ %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %v)
+ ret <8 x i16> %res
+}
+
+define <2 x i64> @sse41_pmovsxdq(<4 x i32> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxdq
+; CHECK-NEXT: shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: sext <2 x i32> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %v)
+ ret <2 x i64> %res
+}
+
+define <4 x i32> @sse41_pmovsxwd(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxwd
+; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: sext <4 x i16> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+ %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %v)
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @sse41_pmovsxwq(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxwq
+; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: sext <2 x i16> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %v)
+ ret <2 x i64> %res
+}
+
+define <8 x i32> @avx2_pmovsxbd(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxbd
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: sext <8 x i8> %1 to <8 x i32>
+; CHECK-NEXT: ret <8 x i32> %2
+
+ %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %v)
+ ret <8 x i32> %res
+}
+
+define <4 x i64> @avx2_pmovsxbq(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxbq
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: sext <4 x i8> %1 to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> %2
+
+ %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %v)
+ ret <4 x i64> %res
+}
+
+define <16 x i16> @avx2_pmovsxbw(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxbw
+; CHECK-NEXT: sext <16 x i8> %v to <16 x i16>
+; CHECK-NEXT: ret <16 x i16> %1
+
+ %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %v)
+ ret <16 x i16> %res
+}
+
+define <4 x i64> @avx2_pmovsxdq(<4 x i32> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxdq
+; CHECK-NEXT: sext <4 x i32> %v to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> %1
+
+ %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %v)
+ ret <4 x i64> %res
+}
+
+define <8 x i32> @avx2_pmovsxwd(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxwd
+; CHECK-NEXT: sext <8 x i16> %v to <8 x i32>
+; CHECK-NEXT: ret <8 x i32> %1
+
+ %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %v)
+ ret <8 x i32> %res
+}
+
+define <4 x i64> @avx2_pmovsxwq(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxwq
+; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: sext <4 x i16> %1 to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> %2
+
+ %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %v)
+ ret <4 x i64> %res
+}
diff --git a/test/Transforms/InstCombine/x86-pmovzx.ll b/test/Transforms/InstCombine/x86-pmovzx.ll
new file mode 100644
index 000000000000..31028cba26eb
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-pmovzx.ll
@@ -0,0 +1,136 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
+
+declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
+declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
+
+;
+; Basic zero extension tests
+;
+
+define <4 x i32> @sse41_pmovzxbd(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxbd
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: zext <4 x i8> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+ %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %v)
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @sse41_pmovzxbq(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxbq
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: zext <2 x i8> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %v)
+ ret <2 x i64> %res
+}
+
+define <8 x i16> @sse41_pmovzxbw(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxbw
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: zext <8 x i8> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+
+ %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %v)
+ ret <8 x i16> %res
+}
+
+define <2 x i64> @sse41_pmovzxdq(<4 x i32> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxdq
+; CHECK-NEXT: shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: zext <2 x i32> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %v)
+ ret <2 x i64> %res
+}
+
+define <4 x i32> @sse41_pmovzxwd(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxwd
+; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: zext <4 x i16> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+ %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %v)
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @sse41_pmovzxwq(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxwq
+; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: zext <2 x i16> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %v)
+ ret <2 x i64> %res
+}
+
+define <8 x i32> @avx2_pmovzxbd(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxbd
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: zext <8 x i8> %1 to <8 x i32>
+; CHECK-NEXT: ret <8 x i32> %2
+
+ %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %v)
+ ret <8 x i32> %res
+}
+
+define <4 x i64> @avx2_pmovzxbq(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxbq
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: zext <4 x i8> %1 to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> %2
+
+ %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %v)
+ ret <4 x i64> %res
+}
+
+define <16 x i16> @avx2_pmovzxbw(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxbw
+; CHECK-NEXT: zext <16 x i8> %v to <16 x i16>
+; CHECK-NEXT: ret <16 x i16> %1
+
+ %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %v)
+ ret <16 x i16> %res
+}
+
+define <4 x i64> @avx2_pmovzxdq(<4 x i32> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxdq
+; CHECK-NEXT: zext <4 x i32> %v to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> %1
+
+ %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %v)
+ ret <4 x i64> %res
+}
+
+define <8 x i32> @avx2_pmovzxwd(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxwd
+; CHECK-NEXT: zext <8 x i16> %v to <8 x i32>
+; CHECK-NEXT: ret <8 x i32> %1
+
+ %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %v)
+ ret <8 x i32> %res
+}
+
+define <4 x i64> @avx2_pmovzxwq(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxwq
+; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: zext <4 x i16> %1 to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> %2
+
+ %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %v)
+ ret <4 x i64> %res
+}
diff --git a/test/Transforms/InstCombine/x86-pshufb.ll b/test/Transforms/InstCombine/x86-pshufb.ll
new file mode 100644
index 000000000000..caaaed8910a8
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-pshufb.ll
@@ -0,0 +1,267 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Verify that instcombine is able to fold identity shuffles.
+
+define <16 x i8> @identity_test(<16 x i8> %InVec) {
+; CHECK-LABEL: @identity_test
+; CHECK: ret <16 x i8> %InVec
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @identity_test_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @identity_test_avx2
+; CHECK: ret <32 x i8> %InVec
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+ ret <32 x i8> %1
+}
+
+
+; Verify that instcombine is able to fold byte shuffles with zero masks.
+
+define <16 x i8> @fold_to_zero_vector(<16 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector
+; CHECK: ret <16 x i8> zeroinitializer
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @fold_to_zero_vector_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_avx2
+; CHECK: ret <32 x i8> zeroinitializer
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <32 x i8> %1
+}
+
+; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector
+; with a shuffle mask of all zeroes.
+
+define <16 x i8> @splat_test(<16 x i8> %InVec) {
+; CHECK-LABEL: @splat_test
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> zeroinitializer
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> zeroinitializer)
+ ret <16 x i8> %1
+}
+
+; In the test case below, elements in the low 128-bit lane of the result
+; vector are equal to the lower byte of %InVec (shuffle index 0).
+; Elements in the high 128-bit lane of the result vector are equal to
+; the lower byte in the high 128-bit lane of %InVec (shuffle index 16).
+
+define <32 x i8> @splat_test_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @splat_test_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> zeroinitializer)
+ ret <32 x i8> %1
+}
+
+; Each of the byte shuffles in the following tests is equivalent to a blend between
+; vector %InVec and a vector of all zeroes.
+
+define <16 x i8> @blend1(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend1
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @blend2(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend2
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @blend3(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend3
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 12, i32 13, i32 14, i32 15>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @blend4(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend4
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @blend5(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend5
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @blend6(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend6
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @blend1_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend1_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 48, i32 17, i32 48, i32 19, i32 48, i32 21, i32 48, i32 23, i32 48, i32 25, i32 48, i32 27, i32 48, i32 29, i32 48, i32 31>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15, i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @blend2_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend2_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 32, i32 2, i32 3, i32 32, i32 32, i32 6, i32 7, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 14, i32 15, i32 48, i32 48, i32 18, i32 19, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 26, i32 27, i32 48, i32 48, i32 30, i32 31>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15, i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @blend3_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend3_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 20, i32 21, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 28, i32 29, i32 30, i32 31>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @blend4_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend4_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @blend5_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend5_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @blend6_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend6_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <32 x i8> %1
+}
+
+; movq idiom.
+define <16 x i8> @movq_idiom(<16 x i8> %InVec) {
+; CHECK-LABEL: @movq_idiom
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @movq_idiom_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @movq_idiom_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <32 x i8> %1
+}
+
+; Vector permutations using byte shuffles.
+
+define <16 x i8> @permute1(<16 x i8> %InVec) {
+; CHECK-LABEL: @permute1
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @permute2(<16 x i8> %InVec) {
+; CHECK-LABEL: @permute2
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @permute1_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @permute1_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @permute2_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @permute2_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
+ ret <32 x i8> %1
+}
+
+; Test that instcombine correctly folds a pshufb with values that
+; are not -128 and that are not encoded in four bits.
+
+define <16 x i8> @identity_test2_2(<16 x i8> %InVec) {
+; CHECK-LABEL: @identity_test2_2
+; CHECK: ret <16 x i8> %InVec
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @identity_test_avx2_2(<32 x i8> %InVec) {
+; CHECK-LABEL: @identity_test_avx2_2
+; CHECK: ret <32 x i8> %InVec
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 16, i8 33, i8 66, i8 19, i8 36, i8 69, i8 22, i8 39, i8 72, i8 25, i8 42, i8 75, i8 28, i8 45, i8 78, i8 31, i8 48, i8 81, i8 34, i8 51, i8 84, i8 37, i8 54, i8 87, i8 40, i8 57, i8 90, i8 43, i8 60, i8 93, i8 46, i8 63>)
+ ret <32 x i8> %1
+}
+
+define <16 x i8> @fold_to_zero_vector_2(<16 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_2
+; CHECK: ret <16 x i8> zeroinitializer
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -125, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @fold_to_zero_vector_avx2_2(<32 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_avx2_2
+; CHECK: ret <32 x i8> zeroinitializer
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -127, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15, i8 -126, i8 -2, i8 -52, i8 -31, i8 -5, i8 -8, i8 -34, i8 -67, i8 -100, i8 -119, i8 -101, i8 -23, i8 -16, i8 -2, i8 -12, i8 -16>)
+ ret <32 x i8> %1
+}
+
+define <16 x i8> @permute3(<16 x i8> %InVec) {
+; CHECK-LABEL: @permute3
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 48, i8 17, i8 34, i8 51, i8 20, i8 37, i8 54, i8 23, i8 16, i8 49, i8 66, i8 19, i8 52, i8 69, i8 22, i8 55>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @permute3_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @permute3_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 52, i8 21, i8 38, i8 55, i8 20, i8 37, i8 54, i8 23, i8 28, i8 61, i8 78, i8 31, i8 60, i8 29, i8 30, i8 79, i8 52, i8 21, i8 38, i8 55, i8 20, i8 53, i8 102, i8 23, i8 92, i8 93, i8 94, i8 95, i8 108, i8 109, i8 110, i8 111>)
+ ret <32 x i8> %1
+}
+
+
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
+declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
diff --git a/test/Transforms/InstCombine/x86-sse4a.ll b/test/Transforms/InstCombine/x86-sse4a.ll
new file mode 100644
index 000000000000..815d26bd2254
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-sse4a.ll
@@ -0,0 +1,336 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;
+; EXTRQ
+;
+
+define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_call
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_zero_arg0
+; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_zero_arg1
+; CHECK-NEXT: ret <2 x i64> %x
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_to_extqi
+; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_constant
+; CHECK-NEXT: ret <2 x i64> <i64 255, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_constant_undef
+; CHECK-NEXT: ret <2 x i64> <i64 65535, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+ ret <2 x i64> %1
+}
+
+;
+; EXTRQI
+;
+
+define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_call
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_shuffle_1zuu
+; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
+; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %3
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu
+; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
+; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %3
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_undef
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_zero
+; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_constant
+; CHECK-NEXT: ret <2 x i64> <i64 7, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_constant_undef
+; CHECK-NEXT: ret <2 x i64> <i64 15, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
+ ret <2 x i64> %1
+}
+
+;
+; INSERTQ
+;
+
+define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_call
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_to_insertqi
+; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_constant
+; CHECK-NEXT: ret <2 x i64> <i64 32, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_constant_undef
+; CHECK-NEXT: ret <2 x i64> <i64 33, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
+ ret <2 x i64> %1
+}
+
+;
+; INSERTQI
+;
+
+define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
+; CHECK-LABEL: @test_insertqi_shuffle_04uu
+; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: ret <16 x i8> %1
+ %1 = bitcast <16 x i8> %v to <2 x i64>
+ %2 = bitcast <16 x i8> %i to <2 x i64>
+ %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
+ %4 = bitcast <2 x i64> %3 to <16 x i8>
+ ret <16 x i8> %4
+}
+
+define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
+; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu
+; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: ret <16 x i8> %1
+ %1 = bitcast <16 x i8> %v to <2 x i64>
+ %2 = bitcast <16 x i8> %i to <2 x i64>
+ %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
+ %4 = bitcast <2 x i64> %3 to <16 x i8>
+ ret <16 x i8> %4
+}
+
+define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @test_insertqi_constant
+; CHECK-NEXT: ret <2 x i64> <i64 -131055, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
+ ret <2 x i64> %1
+}
+
+; The result of this insert is the second arg, since the top 64 bits of
+; the result are undefined, and we copy the bottom 64 bits from the
+; second arg
+define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testInsert64Bits
+; CHECK-NEXT: ret <2 x i64> %i
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testZeroLength
+; CHECK-NEXT: ret <2 x i64> %i
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_1
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_2
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_3
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
+ ret <2 x i64> %1
+}
+
+;
+; Vector Demanded Bits
+;
+
+define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_arg0
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_arg1
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_args01
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_ret
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
+ %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_arg0
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_ret
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
+ %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_arg0
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_ret
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
+ %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_arg0
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_arg1
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_args01
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_ret
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
+ %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %2
+}
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq
+declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi
+declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq
+declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
diff --git a/test/Transforms/InstCombine/x86-vector-shifts.ll b/test/Transforms/InstCombine/x86-vector-shifts.ll
new file mode 100644
index 000000000000..59e445a40bef
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-vector-shifts.ll
@@ -0,0 +1,1318 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+;
+; ASHR - Immediate
+;
+
+define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_15
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_64
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
+ ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_15
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_64
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
+ ret <4 x i32> %1
+}
+
+define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_15
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_64
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
+ ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_15
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_64
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
+ ret <8 x i32> %1
+}
+
+;
+; LSHR - Immediate
+;
+
+define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_15
+; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_64
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
+ ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_15
+; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_64
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_15
+; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_64
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
+ ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_15
+; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_64
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
+ ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_15
+; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_64
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
+ ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_15
+; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_64
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
+ ret <4 x i64> %1
+}
+
+;
+; SHL - Immediate
+;
+
+define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_15
+; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_64
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
+ ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_15
+; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_64
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+ %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_15
+; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_64
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+ %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
+ ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_15
+; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_64
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
+ ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_15
+; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_64
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
+ ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+ %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_15
+; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
+ %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_64
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+ %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
+ ret <4 x i64> %1
+}
+
+;
+; ASHR - Constant Vector
+;
+
+define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_15
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_15_splat
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_64
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_15
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_15_splat
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_64
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+ ret <4 x i32> %1
+}
+
+define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_15
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_15_splat
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_64
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_15
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_15_splat
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_64
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+ ret <8 x i32> %1
+}
+
+;
+; LSHR - Constant Vector
+;
+
+define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_15
+; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_15_splat
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_64
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_15
+; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_15_splat
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_64
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_15
+; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_64
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+ ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_15
+; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_15_splat
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_64
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_15
+; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_15_splat
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_64
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+ ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_15
+; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_64
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+ ret <4 x i64> %1
+}
+
+;
+; SHL - Constant Vector
+;
+
+define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_15
+; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_15_splat
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_64
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_15
+; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_15_splat
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_64
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_15
+; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_64
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+ ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_15
+; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_15_splat
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_64
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_15
+; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_15_splat
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_64
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+ ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_15
+; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_64
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+ ret <4 x i64> %1
+}
+
+;
+; Vector Demanded Bits
+;
+
+define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psra_w_var
+; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
+ ret <8 x i16> %2
+}
+
+define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psra_w_var_bc
+; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <8 x i16>
+; CHECK-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
+; CHECK-NEXT: ret <8 x i16> %2
+ %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = bitcast <2 x i64> %1 to <8 x i16>
+ %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
+ ret <8 x i16> %3
+}
+
+define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psra_d_var
+; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
+ ret <4 x i32> %2
+}
+
+define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psra_d_var_bc
+; CHECK-NEXT: %1 = bitcast <8 x i16> %a to <4 x i32>
+; CHECK-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
+; CHECK-NEXT: ret <4 x i32> %2
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = bitcast <8 x i16> %1 to <4 x i32>
+ %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
+ ret <4 x i32> %3
+}
+
+define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psra_w_var
+; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
+ ret <16 x i16> %2
+}
+
+define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psra_d_var
+; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
+ ret <8 x i32> %2
+}
+
+define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psrl_w_var
+; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psrl_d_var
+; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
+ ret <4 x i32> %2
+}
+
+define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psrl_q_var
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
+ ret <2 x i64> %2
+}
+
+define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psrl_w_var
+; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
+ ret <16 x i16> %2
+}
+
+define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
+; CHECK-LABEL: @avx2_psrl_w_var_bc
+; CHECK-NEXT: %1 = bitcast <16 x i8> %a to <8 x i16>
+; CHECK-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
+; CHECK-NEXT: ret <16 x i16> %2
+ %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %2 = bitcast <16 x i8> %1 to <8 x i16>
+ %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
+ ret <16 x i16> %3
+}
+
+define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psrl_d_var
+; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
+ ret <8 x i32> %2
+}
+
+define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psrl_d_var_bc
+; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <4 x i32>
+; CHECK-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
+; CHECK-NEXT: ret <8 x i32> %2
+ %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = bitcast <2 x i64> %1 to <4 x i32>
+ %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
+ ret <8 x i32> %3
+}
+
+define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psrl_q_var
+; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <4 x i64> %1
+ %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
+ ret <4 x i64> %2
+}
+
+define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psll_w_var
+; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psll_d_var
+; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
+ ret <4 x i32> %2
+}
+
+define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psll_q_var
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
+ ret <2 x i64> %2
+}
+
+define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psll_w_var
+; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
+ ret <16 x i16> %2
+}
+
+define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psll_d_var
+; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
+ ret <8 x i32> %2
+}
+
+define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psll_q_var
+; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <4 x i64> %1
+ %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
+ ret <4 x i64> %2
+}
+
+;
+; Constant Folding
+;
+
+define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
+; CHECK-LABEL: @test_sse2_psra_w_0
+; CHECK-NEXT: ret <8 x i16> %A
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
+ %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+ %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
+ ret <8 x i16> %3
+}
+
+define <8 x i16> @test_sse2_psra_w_8() {
+; CHECK-LABEL: @test_sse2_psra_w_8
+; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+ %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
+ %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
+ %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+ %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
+ ret <8 x i16> %4
+}
+
+define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
+; CHECK-LABEL: @test_sse2_psra_d_0
+; CHECK-NEXT: ret <4 x i32> %A
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
+ %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+ %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @sse2_psra_d_8() {
+; CHECK-LABEL: @sse2_psra_d_8
+; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+ %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
+ %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
+ %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+ %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
+ ret <4 x i32> %4
+}
+
+define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
+; CHECK-LABEL: @test_avx2_psra_w_0
+; CHECK-NEXT: ret <16 x i16> %A
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
+ %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+ %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
+ ret <16 x i16> %3
+}
+
+define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
+; CHECK-LABEL: @test_avx2_psra_w_8
+; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+ %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
+ %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
+ %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+ %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
+ ret <16 x i16> %4
+}
+
+define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
+; CHECK-LABEL: @test_avx2_psra_d_0
+; CHECK-NEXT: ret <8 x i32> %A
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
+ %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+ %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
+ ret <8 x i32> %3
+}
+
+define <8 x i32> @test_avx2_psra_d_8() {
+; CHECK-LABEL: @test_avx2_psra_d_8
+; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+ %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
+ %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
+ %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+ %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
+ ret <8 x i32> %4
+}
+
+define <2 x i64> @test_sse2_1() {
+ %S = bitcast i32 1 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+ %6 = bitcast <8 x i16> %5 to <4 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <4 x i32> %8 to <2 x i64>
+ %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <2 x i64> %10 to <8 x i16>
+ %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+ %13 = bitcast <8 x i16> %12 to <4 x i32>
+ %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+ %15 = bitcast <4 x i32> %14 to <2 x i64>
+ %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+ ret <2 x i64> %16
+; CHECK: test_sse2_1
+; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
+}
+
+define <4 x i64> @test_avx2_1() {
+ %S = bitcast i32 1 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+ %6 = bitcast <16 x i16> %5 to <8 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <8 x i32> %8 to <4 x i64>
+ %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <4 x i64> %10 to <16 x i16>
+ %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+ %13 = bitcast <16 x i16> %12 to <8 x i32>
+ %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+ %15 = bitcast <8 x i32> %14 to <4 x i64>
+ %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+ ret <4 x i64> %16
+; CHECK: test_avx2_1
+; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
+}
+
+define <2 x i64> @test_sse2_0() {
+ %S = bitcast i32 128 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+ %6 = bitcast <8 x i16> %5 to <4 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <4 x i32> %8 to <2 x i64>
+ %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <2 x i64> %10 to <8 x i16>
+ %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+ %13 = bitcast <8 x i16> %12 to <4 x i32>
+ %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+ %15 = bitcast <4 x i32> %14 to <2 x i64>
+ %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+ ret <2 x i64> %16
+; CHECK: test_sse2_0
+; CHECK: ret <2 x i64> zeroinitializer
+}
+
+define <4 x i64> @test_avx2_0() {
+ %S = bitcast i32 128 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+ %6 = bitcast <16 x i16> %5 to <8 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <8 x i32> %8 to <4 x i64>
+ %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <4 x i64> %10 to <16 x i16>
+ %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+ %13 = bitcast <16 x i16> %12 to <8 x i32>
+ %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+ %15 = bitcast <8 x i32> %14 to <4 x i64>
+ %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+ ret <4 x i64> %16
+; CHECK: test_avx2_0
+; CHECK: ret <4 x i64> zeroinitializer
+}
+define <2 x i64> @test_sse2_psrl_1() {
+ %S = bitcast i32 1 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
+ %6 = bitcast <8 x i16> %5 to <4 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <4 x i32> %8 to <2 x i64>
+ %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <2 x i64> %10 to <8 x i16>
+ %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
+ %13 = bitcast <8 x i16> %12 to <4 x i32>
+ %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
+ %15 = bitcast <4 x i32> %14 to <2 x i64>
+ %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
+ ret <2 x i64> %16
+; CHECK: test_sse2_psrl_1
+; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
+}
+
+define <4 x i64> @test_avx2_psrl_1() {
+ %S = bitcast i32 1 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
+ %6 = bitcast <16 x i16> %5 to <8 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <8 x i32> %8 to <4 x i64>
+ %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <4 x i64> %10 to <16 x i16>
+ %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
+ %13 = bitcast <16 x i16> %12 to <8 x i32>
+ %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
+ %15 = bitcast <8 x i32> %14 to <4 x i64>
+ %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
+ ret <4 x i64> %16
+; CHECK: test_avx2_psrl_1
+; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
+}
+
+define <2 x i64> @test_sse2_psrl_0() {
+ %S = bitcast i32 128 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
+ %6 = bitcast <8 x i16> %5 to <4 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <4 x i32> %8 to <2 x i64>
+ %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <2 x i64> %10 to <8 x i16>
+ %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
+ %13 = bitcast <8 x i16> %12 to <4 x i32>
+ %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
+ %15 = bitcast <4 x i32> %14 to <2 x i64>
+ %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
+ ret <2 x i64> %16
+; CHECK: test_sse2_psrl_0
+; CHECK: ret <2 x i64> zeroinitializer
+}
+
+define <4 x i64> @test_avx2_psrl_0() {
+ %S = bitcast i32 128 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
+ %6 = bitcast <16 x i16> %5 to <8 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <8 x i32> %8 to <4 x i64>
+ %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <4 x i64> %10 to <16 x i16>
+ %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
+ %13 = bitcast <16 x i16> %12 to <8 x i32>
+ %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
+ %15 = bitcast <8 x i32> %14 to <4 x i64>
+ %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
+ ret <4 x i64> %16
+; CHECK: test_avx2_psrl_0
+; CHECK: ret <4 x i64> zeroinitializer
+}
+
+declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
+
+declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
+
+declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
+declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
+
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/x86-xop.ll b/test/Transforms/InstCombine/x86-xop.ll
new file mode 100644
index 000000000000..176c504989df
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-xop.ll
@@ -0,0 +1,209 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_slt_v2i64
+; CHECK-NEXT: %1 = icmp slt <2 x i64> %a, %b
+; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+ %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_ult_v2i64
+; CHECK-NEXT: %1 = icmp ult <2 x i64> %a, %b
+; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+ %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_sle_v2i64
+; CHECK-NEXT: %1 = icmp sle <2 x i64> %a, %b
+; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+ %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_ule_v2i64
+; CHECK-NEXT: %1 = icmp ule <2 x i64> %a, %b
+; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+ %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %1
+}
+
+define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_sgt_v4i32
+; CHECK-NEXT: %1 = icmp sgt <4 x i32> %a, %b
+; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+ %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_ugt_v4i32
+; CHECK-NEXT: %1 = icmp ugt <4 x i32> %a, %b
+; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+ %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_sge_v4i32
+; CHECK-NEXT: %1 = icmp sge <4 x i32> %a, %b
+; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+ %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_uge_v4i32
+; CHECK-NEXT: %1 = icmp uge <4 x i32> %a, %b
+; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+ %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %1
+}
+
+define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_seq_v8i16
+; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+ %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_ueq_v8i16
+; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+ %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_sne_v8i16
+; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+ %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_une_v8i16
+; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+ %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %1
+}
+
+define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_strue_v16i8
+; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_utrue_v16i8
+; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_sfalse_v16i8
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
+ %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_ufalse_v16i8
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
+ %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %1
+}
+
+declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/Transforms/InstCombine/xor.ll b/test/Transforms/InstCombine/xor.ll
index 33d5a2a9fda5..c8debcbac226 100644
--- a/test/Transforms/InstCombine/xor.ll
+++ b/test/Transforms/InstCombine/xor.ll
@@ -63,8 +63,8 @@ define i32 @test7(i32 %A, i32 %B) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: %A1 = and i32 %A, 7
; CHECK-NEXT: %B1 = and i32 %B, 128
-; CHECK-NEXT: %C1.1 = or i32 %A1, %B1
-; CHECK-NEXT: ret i32 %C1.1
+; CHECK-NEXT: %C11 = or i32 %A1, %B1
+; CHECK-NEXT: ret i32 %C11
%A1 = and i32 %A, 7 ; <i32> [#uses=1]
%B1 = and i32 %B, 128 ; <i32> [#uses=1]
%C1 = xor i32 %A1, %B1 ; <i32> [#uses=1]
@@ -96,8 +96,8 @@ define i1 @test9(i8 %A) {
define i8 @test10(i8 %A) {
; CHECK-LABEL: @test10(
; CHECK-NEXT: %B = and i8 %A, 3
-; CHECK-NEXT: %C.1 = or i8 %B, 4
-; CHECK-NEXT: ret i8 %C.1
+; CHECK-NEXT: %C1 = or i8 %B, 4
+; CHECK-NEXT: ret i8 %C1
%B = and i8 %A, 3 ; <i8> [#uses=1]
%C = xor i8 %B, 4 ; <i8> [#uses=1]
ret i8 %C
diff --git a/test/Transforms/InstSimplify/add-mask.ll b/test/Transforms/InstSimplify/add-mask.ll
new file mode 100644
index 000000000000..1e53cc5bc7fa
--- /dev/null
+++ b/test/Transforms/InstSimplify/add-mask.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+define i1 @test(i32 %a) {
+; CHECK-LABEL: @test
+; CHECK: ret i1 false
+ %rhs = add i32 %a, -1
+ %and = and i32 %a, %rhs
+ %res = icmp eq i32 %and, 1
+ ret i1 %res
+}
+
+define i1 @test2(i32 %a) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 false
+ %rhs = add i32 %a, 1
+ %and = and i32 %a, %rhs
+ %res = icmp eq i32 %and, 1
+ ret i1 %res
+}
+
+define i1 @test3(i32 %a) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 false
+ %rhs = add i32 %a, 7
+ %and = and i32 %a, %rhs
+ %res = icmp eq i32 %and, 1
+ ret i1 %res
+}
+
+@B = external global i32
+declare void @llvm.assume(i1)
+
+; Known bits without a constant
+define i1 @test4(i32 %a) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 false
+ %b = load i32, i32* @B
+ %b.and = and i32 %b, 1
+ %b.cnd = icmp eq i32 %b.and, 1
+ call void @llvm.assume(i1 %b.cnd)
+
+ %rhs = add i32 %a, %b
+ %and = and i32 %a, %rhs
+ %res = icmp eq i32 %and, 1
+ ret i1 %res
+}
+
+; Negative test - even number
+define i1 @test5(i32 %a) {
+; CHECK-LABEL: @test5
+; CHECK: ret i1 %res
+ %rhs = add i32 %a, 2
+ %and = and i32 %a, %rhs
+ %res = icmp eq i32 %and, 1
+ ret i1 %res
+}
+
+define i1 @test6(i32 %a) {
+; CHECK-LABEL: @test6
+; CHECK: ret i1 false
+ %lhs = add i32 %a, -1
+ %and = and i32 %lhs, %a
+ %res = icmp eq i32 %and, 1
+ ret i1 %res
+}
diff --git a/test/Transforms/InstSimplify/apint-or.ll b/test/Transforms/InstSimplify/apint-or.ll
index 5d314db7133d..36844289aaf0 100644
--- a/test/Transforms/InstSimplify/apint-or.ll
+++ b/test/Transforms/InstSimplify/apint-or.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instsimplify -S | not grep or
+; RUN: opt < %s -instsimplify -S | FileCheck %s
; Test the case where integer BitWidth <= 64 && BitWidth % 2 != 0.
define i39 @test1(i39 %V, i39 %M) {
@@ -12,14 +12,28 @@ define i39 @test1(i39 %V, i39 %M) {
%D = and i39 %V, 274877906943
%R = or i39 %B, %D
ret i39 %R
-; CHECK-LABEL @test1
+; CHECK-LABEL: @test1
; CHECK-NEXT: and {{.*}}, -274877906944
; CHECK-NEXT: add
; CHECK-NEXT: ret
}
+define i7 @test2(i7 %X) {
+ %Y = or i7 %X, 0
+ ret i7 %Y
+; CHECK-LABEL: @test2
+; CHECK-NEXT: ret i7 %X
+}
+
+define i17 @test3(i17 %X) {
+ %Y = or i17 %X, -1
+ ret i17 %Y
+; CHECK-LABEL: @test3
+; CHECK-NEXT: ret i17 -1
+}
+
; Test the case where Integer BitWidth > 64 && BitWidth <= 1024.
-define i399 @test2(i399 %V, i399 %M) {
+define i399 @test4(i399 %V, i399 %M) {
;; If we have: ((V + N) & C1) | (V & C2)
;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
;; replace with V+N.
@@ -30,8 +44,22 @@ define i399 @test2(i399 %V, i399 %M) {
%D = and i399 %V, 274877906943
%R = or i399 %B, %D
ret i399 %R
-; CHECK-LABEL @test2
+; CHECK-LABEL: @test4
; CHECK-NEXT: and {{.*}}, 18446742974197923840
; CHECK-NEXT: add
; CHECK-NEXT: ret
}
+
+define i777 @test5(i777 %X) {
+ %Y = or i777 %X, 0
+ ret i777 %Y
+; CHECK-LABEL: @test5
+; CHECK-NEXT: ret i777 %X
+}
+
+define i117 @test6(i117 %X) {
+ %Y = or i117 %X, -1
+ ret i117 %Y
+; CHECK-LABEL: @test6
+; CHECK-NEXT: ret i117 -1
+}
diff --git a/test/Transforms/InstSimplify/bswap.ll b/test/Transforms/InstSimplify/bswap.ll
new file mode 100644
index 000000000000..7bc3af9e307f
--- /dev/null
+++ b/test/Transforms/InstSimplify/bswap.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -S -instsimplify | FileCheck %s
+
+declare i16 @llvm.bswap.i16(i16)
+
+define i1 @test1(i16 %arg) {
+; CHECK-LABEL: @test1
+; CHECK: ret i1 false
+ %a = or i16 %arg, 1
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %res = icmp eq i16 %b, 0
+ ret i1 %res
+}
+
+define i1 @test2(i16 %arg) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 false
+ %a = or i16 %arg, 1024
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %res = icmp eq i16 %b, 0
+ ret i1 %res
+}
+
+define i1 @test3(i16 %arg) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 false
+ %a = and i16 %arg, 1
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %and = and i16 %b, 1
+ %res = icmp eq i16 %and, 1
+ ret i1 %res
+}
+
+define i1 @test4(i16 %arg) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 false
+ %a = and i16 %arg, 511
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %and = and i16 %b, 256
+ %res = icmp eq i16 %and, 1
+ ret i1 %res
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 07c90d8f1eb8..6e66fbfede9f 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -1164,3 +1164,11 @@ define i1 @tautological8(i32 %A, i32 %B) {
; CHECK-LABEL: @tautological8(
; CHECK: ret i1 false
}
+
+define i1 @tautological9(i32 %x) {
+ %add = add nuw i32 %x, 13
+ %cmp = icmp ne i32 %add, 12
+ ret i1 %cmp
+; CHECK-LABEL: @tautological9(
+; CHECK: ret i1 true
+}
diff --git a/test/Transforms/InstSimplify/implies.ll b/test/Transforms/InstSimplify/implies.ll
new file mode 100644
index 000000000000..2e3c9591b079
--- /dev/null
+++ b/test/Transforms/InstSimplify/implies.ll
@@ -0,0 +1,217 @@
+; RUN: opt -S %s -instsimplify | FileCheck %s
+
+; A ==> A -> true
+define i1 @test(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test
+; CHECK: ret i1 true
+ %var29 = icmp slt i32 %i, %length.i
+ %res = icmp uge i1 %var29, %var29
+ ret i1 %res
+}
+
+; i +_{nsw} C_{>0} <s L ==> i <s L -> true
+define i1 @test2(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 true
+ %iplus1 = add nsw i32 %i, 1
+ %var29 = icmp slt i32 %i, %length.i
+ %var30 = icmp slt i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+; i + C_{>0} <s L ==> i <s L -> unknown without the nsw
+define i1 @test2_neg(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2_neg
+; CHECK: ret i1 %res
+ %iplus1 = add i32 %i, 1
+ %var29 = icmp slt i32 %i, %length.i
+ %var30 = icmp slt i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+; sle is not implication
+define i1 @test2_neg2(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2_neg2
+; CHECK: ret i1 %res
+ %iplus1 = add i32 %i, 1
+ %var29 = icmp slt i32 %i, %length.i
+ %var30 = icmp slt i32 %iplus1, %length.i
+ %res = icmp sle i1 %var30, %var29
+ ret i1 %res
+}
+
+; The binary operator has to be an add
+define i1 @test2_neg3(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2_neg3
+; CHECK: ret i1 %res
+ %iplus1 = sub nsw i32 %i, 1
+ %var29 = icmp slt i32 %i, %length.i
+ %var30 = icmp slt i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+; i +_{nsw} C_{>0} <s L ==> i <s L -> true
+; With an inverted conditional (ule B A rather than canonical ugt A B
+define i1 @test3(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 true
+ %iplus1 = add nsw i32 %i, 1
+ %var29 = icmp slt i32 %i, %length.i
+ %var30 = icmp slt i32 %iplus1, %length.i
+ %res = icmp uge i1 %var29, %var30
+ ret i1 %res
+}
+
+; i +_{nuw} C <u L ==> i <u L
+define i1 @test4(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 true
+ %iplus1 = add nuw i32 %i, 1
+ %var29 = icmp ult i32 %i, %length.i
+ %var30 = icmp ult i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+; A ==> A for vectors
+define <4 x i1> @test5(<4 x i1> %vec) {
+; CHECK-LABEL: @test5
+; CHECK: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+ %res = icmp ule <4 x i1> %vec, %vec
+ ret <4 x i1> %res
+}
+
+; Don't crash on vector inputs - pr25040
+define <4 x i1> @test6(<4 x i1> %a, <4 x i1> %b) {
+; CHECK-LABEL: @test6
+; CHECK: ret <4 x i1> %res
+ %res = icmp ule <4 x i1> %a, %b
+ ret <4 x i1> %res
+}
+
+; i +_{nsw} 1 <s L ==> i < L +_{nsw} 1
+define i1 @test7(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test7(
+; CHECK: ret i1 true
+ %iplus1 = add nsw i32 %i, 1
+ %len.plus.one = add nsw i32 %length.i, 1
+ %var29 = icmp slt i32 %i, %len.plus.one
+ %var30 = icmp slt i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+; i +_{nuw} 1 <s L ==> i < L +_{nuw} 1
+define i1 @test8(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test8(
+; CHECK: ret i1 true
+ %iplus1 = add nuw i32 %i, 1
+ %len.plus.one = add nuw i32 %length.i, 1
+ %var29 = icmp ult i32 %i, %len.plus.one
+ %var30 = icmp ult i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+; i +_{nuw} C <s L ==> i < L, even if C is negative
+define i1 @test9(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test9(
+; CHECK: ret i1 true
+ %iplus1 = add nuw i32 %i, -100
+ %var29 = icmp ult i32 %i, %length.i
+ %var30 = icmp ult i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+define i1 @test10(i32 %length.i, i32 %x.full) {
+; CHECK-LABEL: @test10(
+; CHECK: ret i1 true
+
+ %x = and i32 %x.full, 4294901760 ;; 4294901760 == 0xffff0000
+ %large = or i32 %x, 100
+ %small = or i32 %x, 90
+ %known = icmp ult i32 %large, %length.i
+ %to.prove = icmp ult i32 %small, %length.i
+ %res = icmp ule i1 %known, %to.prove
+ ret i1 %res
+}
+
+define i1 @test11(i32 %length.i, i32 %x) {
+; CHECK-LABEL: @test11(
+; CHECK: %res = icmp ule i1 %known, %to.prove
+; CHECK: ret i1 %res
+
+ %large = or i32 %x, 100
+ %small = or i32 %x, 90
+ %known = icmp ult i32 %large, %length.i
+ %to.prove = icmp ult i32 %small, %length.i
+ %res = icmp ule i1 %known, %to.prove
+ ret i1 %res
+}
+
+define i1 @test12(i32 %length.i, i32 %x.full) {
+; CHECK-LABEL: @test12(
+; CHECK: %res = icmp ule i1 %known, %to.prove
+; CHECK: ret i1 %res
+
+ %x = and i32 %x.full, 4294901760 ;; 4294901760 == 0xffff0000
+ %large = or i32 %x, 65536 ;; 65536 == 0x00010000
+ %small = or i32 %x, 90
+ %known = icmp ult i32 %large, %length.i
+ %to.prove = icmp ult i32 %small, %length.i
+ %res = icmp ule i1 %known, %to.prove
+ ret i1 %res
+}
+
+define i1 @test13(i32 %length.i, i32 %x) {
+; CHECK-LABEL: @test13(
+; CHECK: ret i1 true
+
+ %large = add nuw i32 %x, 100
+ %small = add nuw i32 %x, 90
+ %known = icmp ult i32 %large, %length.i
+ %to.prove = icmp ult i32 %small, %length.i
+ %res = icmp ule i1 %known, %to.prove
+ ret i1 %res
+}
+
+define i1 @test14(i32 %length.i, i32 %x.full) {
+; CHECK-LABEL: @test14(
+; CHECK: ret i1 true
+
+ %x = and i32 %x.full, 4294905615 ;; 4294905615 == 0xffff0f0f
+ %large = or i32 %x, 8224 ;; == 0x2020
+ %small = or i32 %x, 4112 ;; == 0x1010
+ %known = icmp ult i32 %large, %length.i
+ %to.prove = icmp ult i32 %small, %length.i
+ %res = icmp ule i1 %known, %to.prove
+ ret i1 %res
+}
+
+define i1 @test15(i32 %length.i, i32 %x) {
+; CHECK-LABEL: @test15(
+; CHECK: %res = icmp ule i1 %known, %to.prove
+; CHECK: ret i1 %res
+
+ %large = add nuw i32 %x, 100
+ %small = add nuw i32 %x, 110
+ %known = icmp ult i32 %large, %length.i
+ %to.prove = icmp ult i32 %small, %length.i
+ %res = icmp ule i1 %known, %to.prove
+ ret i1 %res
+}
+
+; X >=(s) Y == X ==> Y (i1 1 becomes -1 for reasoning)
+define i1 @test_sge(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test_sge
+; CHECK: ret i1 true
+ %iplus1 = add nsw nuw i32 %i, 1
+ %var29 = icmp ult i32 %i, %length.i
+ %var30 = icmp ult i32 %iplus1, %length.i
+ %res = icmp sge i1 %var30, %var29
+ ret i1 %res
+}
diff --git a/test/Transforms/InstSimplify/shift-128-kb.ll b/test/Transforms/InstSimplify/shift-128-kb.ll
new file mode 100644
index 000000000000..3f69ecccaf5b
--- /dev/null
+++ b/test/Transforms/InstSimplify/shift-128-kb.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define zeroext i1 @_Z10isNegativemj(i64 %Val, i32 zeroext %IntegerBitWidth) {
+entry:
+ %conv = zext i32 %IntegerBitWidth to i64
+ %sub = sub i64 128, %conv
+ %conv1 = trunc i64 %sub to i32
+ %conv2 = zext i64 %Val to i128
+ %sh_prom = zext i32 %conv1 to i128
+ %shl = shl i128 %conv2, %sh_prom
+ %shr = ashr i128 %shl, %sh_prom
+ %cmp = icmp slt i128 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @_Z10isNegativemj
+; CHECK-NOT: ret i1 false
+; CHECK: ret i1 %cmp
+
diff --git a/test/Transforms/InstSimplify/shr-nop.ll b/test/Transforms/InstSimplify/shr-nop.ll
index b0dc8731a112..edabcc314ea6 100644
--- a/test/Transforms/InstSimplify/shr-nop.ll
+++ b/test/Transforms/InstSimplify/shr-nop.ll
@@ -244,7 +244,7 @@ define i1 @ashr_ne_opposite_msb(i8 %a) {
}
; CHECK-LABEL: @exact_ashr_eq_shift_gt
-; CHECK-NEXT : ret i1 false
+; CHECK-NEXT: ret i1 false
define i1 @exact_ashr_eq_shift_gt(i8 %a) {
%shr = ashr exact i8 -2, %a
%cmp = icmp eq i8 %shr, -8
@@ -252,7 +252,7 @@ define i1 @exact_ashr_eq_shift_gt(i8 %a) {
}
; CHECK-LABEL: @exact_ashr_ne_shift_gt
-; CHECK-NEXT : ret i1 true
+; CHECK-NEXT: ret i1 true
define i1 @exact_ashr_ne_shift_gt(i8 %a) {
%shr = ashr exact i8 -2, %a
%cmp = icmp ne i8 %shr, -8
@@ -260,7 +260,7 @@ define i1 @exact_ashr_ne_shift_gt(i8 %a) {
}
; CHECK-LABEL: @nonexact_ashr_eq_shift_gt
-; CHECK-NEXT : ret i1 false
+; CHECK-NEXT: ret i1 false
define i1 @nonexact_ashr_eq_shift_gt(i8 %a) {
%shr = ashr i8 -2, %a
%cmp = icmp eq i8 %shr, -8
@@ -268,7 +268,7 @@ define i1 @nonexact_ashr_eq_shift_gt(i8 %a) {
}
; CHECK-LABEL: @nonexact_ashr_ne_shift_gt
-; CHECK-NEXT : ret i1 true
+; CHECK-NEXT: ret i1 true
define i1 @nonexact_ashr_ne_shift_gt(i8 %a) {
%shr = ashr i8 -2, %a
%cmp = icmp ne i8 %shr, -8
@@ -292,7 +292,7 @@ define i1 @exact_lshr_ne_shift_gt(i8 %a) {
}
; CHECK-LABEL: @nonexact_lshr_eq_shift_gt
-; CHECK-NEXT : ret i1 false
+; CHECK-NEXT: ret i1 false
define i1 @nonexact_lshr_eq_shift_gt(i8 %a) {
%shr = lshr i8 2, %a
%cmp = icmp eq i8 %shr, 8
@@ -300,7 +300,7 @@ define i1 @nonexact_lshr_eq_shift_gt(i8 %a) {
}
; CHECK-LABEL: @nonexact_lshr_ne_shift_gt
-; CHECK-NEXT : ret i1 true
+; CHECK-NEXT: ret i1 true
define i1 @nonexact_lshr_ne_shift_gt(i8 %a) {
%shr = ashr i8 2, %a
%cmp = icmp ne i8 %shr, 8
diff --git a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
index c50b6fc61c8e..58f3c1d09cc2 100644
--- a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
+++ b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
@@ -3,11 +3,11 @@
@A = global i32 0
; CHECK: @A = internal global i32 0
-@B = alias i32* @A
-; CHECK: @B = internal alias i32* @A
+@B = alias i32, i32* @A
+; CHECK: @B = internal alias i32, i32* @A
-@C = alias i32* @A
-; CHECK: @C = internal alias i32* @A
+@C = alias i32, i32* @A
+; CHECK: @C = internal alias i32, i32* @A
define i32 @main() {
%tmp = load i32, i32* @C
diff --git a/test/Transforms/Internalize/comdat.ll b/test/Transforms/Internalize/comdat.ll
new file mode 100644
index 000000000000..ac536f7eb656
--- /dev/null
+++ b/test/Transforms/Internalize/comdat.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -internalize -internalize-public-api-list c1 -internalize-public-api-list c2 -internalize-public-api-list c3 -internalize-public-api-list c4 -S | FileCheck %s
+
+$c1 = comdat any
+$c2 = comdat any
+$c3 = comdat any
+$c4 = comdat any
+
+; CHECK: @c1_c = global i32 0, comdat($c1)
+@c1_c = global i32 0, comdat($c1)
+
+; CHECK: @c2_b = internal global i32 0{{$}}
+@c2_b = global i32 0, comdat($c2)
+
+; CHECK: @c3 = global i32 0, comdat{{$}}
+@c3 = global i32 0, comdat
+
+; CHECK: @c4_a = internal global i32 0, comdat($c4)
+@c4_a = internal global i32 0, comdat($c4)
+
+; CHECK: @c1_d = alias i32, i32* @c1_c
+@c1_d = alias i32, i32* @c1_c
+
+; CHECK: @c2_c = internal alias i32, i32* @c2_b
+@c2_c = alias i32, i32* @c2_b
+
+; CHECK: @c4 = alias i32, i32* @c4_a
+@c4 = alias i32, i32* @c4_a
+
+; CHECK: define void @c1() comdat {
+define void @c1() comdat {
+ ret void
+}
+
+; CHECK: define void @c1_a() comdat($c1) {
+define void @c1_a() comdat($c1) {
+ ret void
+}
+
+; CHECK: define internal void @c2() {
+define internal void @c2() comdat {
+ ret void
+}
+
+; CHECK: define internal void @c2_a() {
+define void @c2_a() comdat($c2) {
+ ret void
+}
+
+; CHECK: define void @c3_a() comdat($c3) {
+define void @c3_a() comdat($c3) {
+ ret void
+}
diff --git a/test/Transforms/Internalize/local-visibility.ll b/test/Transforms/Internalize/local-visibility.ll
index b09a136e5263..0d73f21972aa 100644
--- a/test/Transforms/Internalize/local-visibility.ll
+++ b/test/Transforms/Internalize/local-visibility.ll
@@ -10,10 +10,10 @@
; CHECK: @protected.variable = internal global i32 0
@protected.variable = protected global i32 0
-; CHECK: @hidden.alias = internal alias i32* @global
-@hidden.alias = hidden alias i32* @global
-; CHECK: @protected.alias = internal alias i32* @global
-@protected.alias = protected alias i32* @global
+; CHECK: @hidden.alias = internal alias i32, i32* @global
+@hidden.alias = hidden alias i32, i32* @global
+; CHECK: @protected.alias = internal alias i32, i32* @global
+@protected.alias = protected alias i32, i32* @global
; CHECK: define internal void @hidden.function() {
define hidden void @hidden.function() {
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index 32cc4de9285a..46c92bc1f577 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -483,7 +483,7 @@ declare void @g()
declare void @j()
declare void @k()
-; CHECK: define void @h(i32 %p) {
+; CHECK-LABEL: define void @h(i32 %p) {
define void @h(i32 %p) {
%x = icmp ult i32 %p, 5
br i1 %x, label %l1, label %l2
@@ -513,4 +513,36 @@ l5:
; CHECK: }
}
+; CHECK-LABEL: define void @h_con(i32 %p) {
+define void @h_con(i32 %p) {
+ %x = icmp ult i32 %p, 5
+ br i1 %x, label %l1, label %l2
+
+l1:
+ call void @j()
+ br label %l3
+
+l2:
+ call void @k()
+ br label %l3
+
+l3:
+; CHECK: call void @g() [[CON:#[0-9]+]]
+; CHECK-NOT: call void @g() [[CON]]
+ call void @g() convergent
+ %y = icmp ult i32 %p, 5
+ br i1 %y, label %l4, label %l5
+
+l4:
+ call void @j()
+ ret void
+
+l5:
+ call void @k()
+ ret void
+; CHECK: }
+}
+
+
; CHECK: attributes [[NOD]] = { noduplicate }
+; CHECK: attributes [[CON]] = { convergent }
diff --git a/test/Transforms/JumpThreading/implied-cond.ll b/test/Transforms/JumpThreading/implied-cond.ll
new file mode 100644
index 000000000000..3d1717e91261
--- /dev/null
+++ b/test/Transforms/JumpThreading/implied-cond.ll
@@ -0,0 +1,98 @@
+; RUN: opt -jump-threading -S < %s | FileCheck %s
+
+declare void @side_effect(i32)
+
+define void @test0(i32 %i, i32 %len) {
+; CHECK-LABEL: @test0(
+ entry:
+ call void @side_effect(i32 0)
+ %i.inc = add nuw i32 %i, 1
+ %c0 = icmp ult i32 %i.inc, %len
+ br i1 %c0, label %left, label %right
+
+ left:
+; CHECK: entry:
+; CHECK: br i1 %c0, label %left0, label %right
+
+; CHECK: left0:
+; CHECK: call void @side_effect
+; CHECK-NOT: br i1 %c1
+; CHECK: call void @side_effect
+ call void @side_effect(i32 0)
+ %c1 = icmp ult i32 %i, %len
+ br i1 %c1, label %left0, label %right
+
+ left0:
+ call void @side_effect(i32 0)
+ ret void
+
+ right:
+ %t = phi i32 [ 1, %left ], [ 2, %entry ]
+ call void @side_effect(i32 %t)
+ ret void
+}
+
+define void @test1(i32 %i, i32 %len) {
+; CHECK-LABEL: @test1(
+ entry:
+ call void @side_effect(i32 0)
+ %i.inc = add nsw i32 %i, 1
+ %c0 = icmp slt i32 %i.inc, %len
+ br i1 %c0, label %left, label %right
+
+ left:
+; CHECK: entry:
+; CHECK: br i1 %c0, label %left0, label %right
+
+; CHECK: left0:
+; CHECK: call void @side_effect
+; CHECK-NOT: br i1 %c1
+; CHECK: call void @side_effect
+ call void @side_effect(i32 0)
+ %c1 = icmp slt i32 %i, %len
+ br i1 %c1, label %left0, label %right
+
+ left0:
+ call void @side_effect(i32 0)
+ ret void
+
+ right:
+ %t = phi i32 [ 1, %left ], [ 2, %entry ]
+ call void @side_effect(i32 %t)
+ ret void
+}
+
+define void @test2(i32 %i, i32 %len, i1* %c.ptr) {
+; CHECK-LABEL: @test2(
+
+; CHECK: entry:
+; CHECK: br i1 %c0, label %cont, label %right
+; CHECK: cont:
+; CHECK: br i1 %c, label %left0, label %right
+; CHECK: left0:
+; CHECK: call void @side_effect(i32 0)
+; CHECK: call void @side_effect(i32 0)
+ entry:
+ call void @side_effect(i32 0)
+ %i.inc = add nsw i32 %i, 1
+ %c0 = icmp slt i32 %i.inc, %len
+ br i1 %c0, label %cont, label %right
+
+ cont:
+ %c = load i1, i1* %c.ptr
+ br i1 %c, label %left, label %right
+
+ left:
+ call void @side_effect(i32 0)
+ %c1 = icmp slt i32 %i, %len
+ br i1 %c1, label %left0, label %right
+
+ left0:
+ call void @side_effect(i32 0)
+ ret void
+
+ right:
+ %t = phi i32 [ 1, %left ], [ 2, %entry ], [ 3, %cont ]
+ call void @side_effect(i32 %t)
+ ret void
+}
diff --git a/test/Transforms/JumpThreading/phi-known.ll b/test/Transforms/JumpThreading/phi-known.ll
new file mode 100644
index 000000000000..8eaf57f748ac
--- /dev/null
+++ b/test/Transforms/JumpThreading/phi-known.ll
@@ -0,0 +1,66 @@
+; RUN: opt -S -jump-threading %s | FileCheck %s
+
+; Value of predicate known on all inputs (trivial case)
+; Note: InstCombine/EarlyCSE would also get this case
+define void @test(i8* %p, i8** %addr) {
+; CHECK-LABEL: @test
+entry:
+ %cmp0 = icmp eq i8* %p, null
+ br i1 %cmp0, label %exit, label %loop
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NEXT: phi
+; CHECK-NEXT: br label %loop
+ %p1 = phi i8* [%p, %entry], [%p1, %loop]
+ %cmp1 = icmp eq i8* %p1, null
+ br i1 %cmp1, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Value of predicate known on all inputs (non-trivial)
+define void @test2(i8* %p) {
+; CHECK-LABEL: @test2
+entry:
+ %cmp0 = icmp eq i8* %p, null
+ br i1 %cmp0, label %exit, label %loop
+loop:
+ %p1 = phi i8* [%p, %entry], [%p2, %backedge]
+ %cmp1 = icmp eq i8* %p1, null
+ br i1 %cmp1, label %exit, label %backedge
+backedge:
+; CHECK-LABEL: backedge:
+; CHECK-NEXT: phi
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: load
+; CHECK-NEXT: cmp
+; CHECK-NEXT: br
+; CHECK-DAG: label %backedge
+ %addr = bitcast i8* %p1 to i8**
+ %p2 = load i8*, i8** %addr
+ %cmp2 = icmp eq i8* %p2, null
+ br i1 %cmp2, label %exit, label %loop
+exit:
+ ret void
+}
+
+; If the inputs don't branch the same way, we can't rewrite
+; Well, we could unroll this loop exactly twice, but that's
+; a different transform.
+define void @test_mixed(i8* %p) {
+; CHECK-LABEL: @test_mixed
+entry:
+ %cmp0 = icmp eq i8* %p, null
+ br i1 %cmp0, label %exit, label %loop
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NEXT: phi
+; CHECK-NEXT: %cmp1 = icmp
+; CHECK-NEXT: br i1 %cmp1
+ %p1 = phi i8* [%p, %entry], [%p1, %loop]
+ %cmp1 = icmp ne i8* %p1, null
+ br i1 %cmp1, label %exit, label %loop
+exit:
+ ret void
+}
+
diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll
index d0df7725f722..595cacbcbf54 100644
--- a/test/Transforms/JumpThreading/select.ll
+++ b/test/Transforms/JumpThreading/select.ll
@@ -91,6 +91,36 @@ L3:
}
+; Jump threading of indirectbr with select as address. Test increased
+; duplication threshold for cases where indirectbr is being threaded
+; through.
+
+; CHECK-LABEL: @test_indirectbr_thresh(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %L1, label %L3
+; CHECK-NOT: indirectbr
+define void @test_indirectbr_thresh(i1 %cond, i8* %address) nounwind {
+entry:
+ br i1 %cond, label %L0, label %L3
+L0:
+ %indirect.goto.dest = select i1 %cond, i8* blockaddress(@test_indirectbr_thresh, %L1), i8* %address
+ call void @quux()
+ call void @quux()
+ call void @quux()
+ indirectbr i8* %indirect.goto.dest, [label %L1, label %L2, label %L3]
+
+L1:
+ call void @foo()
+ ret void
+L2:
+ call void @bar()
+ ret void
+L3:
+ call void @baz()
+ ret void
+}
+
+
; A more complicated case: the condition is a select based on a comparison.
; CHECK-LABEL: @test_switch_cmp(
diff --git a/test/Transforms/JumpThreading/update-edge-weight.ll b/test/Transforms/JumpThreading/update-edge-weight.ll
new file mode 100644
index 000000000000..58cd71861d8a
--- /dev/null
+++ b/test/Transforms/JumpThreading/update-edge-weight.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -jump-threading %s | FileCheck %s
+
+; Test if edge weights are properly updated after jump threading.
+
+; CHECK: !2 = !{!"branch_weights", i32 1629125526, i32 518358122}
+
+define void @foo(i32 %n) !prof !0 {
+entry:
+ %cmp = icmp sgt i32 %n, 10
+ br i1 %cmp, label %if.then.1, label %if.else.1, !prof !1
+
+if.then.1:
+ tail call void @a()
+ br label %if.cond
+
+if.else.1:
+ tail call void @b()
+ br label %if.cond
+
+if.cond:
+ %cmp1 = icmp sgt i32 %n, 5
+ br i1 %cmp1, label %if.then.2, label %if.else.2, !prof !2
+
+if.then.2:
+ tail call void @c()
+ br label %if.end
+
+if.else.2:
+ tail call void @d()
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+declare void @a()
+declare void @b()
+declare void @c()
+declare void @d()
+
+!0 = !{!"function_entry_count", i64 1}
+!1 = !{!"branch_weights", i32 10, i32 5}
+!2 = !{!"branch_weights", i32 10, i32 1}
diff --git a/test/Transforms/LCSSA/mixed-catch.ll b/test/Transforms/LCSSA/mixed-catch.ll
new file mode 100644
index 000000000000..95d5b17bf081
--- /dev/null
+++ b/test/Transforms/LCSSA/mixed-catch.ll
@@ -0,0 +1,95 @@
+; RUN: opt -lcssa -S < %s | FileCheck %s
+
+; This test is based on the following C++ code:
+;
+; void f()
+; {
+; for (int i=0; i<12; i++) {
+; try {
+; if (i==3)
+; throw i;
+; } catch (int) {
+; continue;
+; } catch (...) { }
+; if (i==3) break;
+; }
+; }
+;
+; The loop info analysis identifies the catch pad for the second catch as being
+; outside the loop (because it returns to %for.end) but the associated
+; catchswitch block is identified as being inside the loop. Because of this
+; analysis, the LCSSA pass wants to create a PHI node in the catchpad block
+; for the catchswitch value, but this is a token, so it can't.
+
+define void @f() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %tmp = alloca i32, align 4
+ %i7 = alloca i32, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, 12
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %cond = icmp eq i32 %i.0, 3
+ br i1 %cond, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ store i32 %i.0, i32* %tmp, align 4
+ %tmp1 = bitcast i32* %tmp to i8*
+ invoke void @_CxxThrowException(i8* %tmp1, %eh.ThrowInfo* nonnull @_TI1H) #1
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %if.then
+ %tmp2 = catchswitch within none [label %catch, label %catch2] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %tmp3 = catchpad within %tmp2 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i32* %i7]
+ catchret from %tmp3 to label %for.inc
+
+catch2: ; preds = %catch.dispatch
+ %tmp4 = catchpad within %tmp2 [i8* null, i32 64, i8* null]
+ catchret from %tmp4 to label %for.end
+
+for.inc: ; preds = %catch, %for.body
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %catch2, %for.cond
+ ret void
+
+unreachable: ; preds = %if.then
+ unreachable
+}
+
+; CHECK-LABEL: define void @f()
+; CHECK: catch2:
+; CHECK-NOT: phi
+; CHECK: %tmp4 = catchpad within %tmp2
+; CHECK: catchret from %tmp4 to label %for.end
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
+%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+
+$"\01??_R0H@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@__ImageBase = external constant i8
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll b/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
index 73862db69819..e2b07facd48e 100644
--- a/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
+++ b/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalsmodref-aa -licm -disable-output
+; RUN: opt < %s -globals-aa -licm -disable-output
@PL_regcomp_parse = internal global i8* null ; <i8**> [#uses=2]
diff --git a/test/Transforms/LICM/argmemonly-call.ll b/test/Transforms/LICM/argmemonly-call.ll
new file mode 100644
index 000000000000..e2640a1c8deb
--- /dev/null
+++ b/test/Transforms/LICM/argmemonly-call.ll
@@ -0,0 +1,69 @@
+; RUN: opt -S -basicaa -licm %s | FileCheck %s
+declare i32 @foo() readonly argmemonly nounwind
+declare i32 @foo2() readonly nounwind
+declare i32 @bar(i32* %loc2) readonly argmemonly nounwind
+
+define void @test(i32* %loc) {
+; CHECK-LABEL: @test
+; CHECK: @foo
+; CHECK-LABEL: loop:
+ br label %loop
+
+loop:
+ %res = call i32 @foo()
+ store i32 %res, i32* %loc
+ br label %loop
+}
+
+; Negative test: show argmemonly is required
+define void @test_neg(i32* %loc) {
+; CHECK-LABEL: @test_neg
+; CHECK-LABEL: loop:
+; CHECK: @foo
+ br label %loop
+
+loop:
+ %res = call i32 @foo2()
+ store i32 %res, i32* %loc
+ br label %loop
+}
+
+define void @test2(i32* noalias %loc, i32* noalias %loc2) {
+; CHECK-LABEL: @test2
+; CHECK: @bar
+; CHECK-LABEL: loop:
+ br label %loop
+
+loop:
+ %res = call i32 @bar(i32* %loc2)
+ store i32 %res, i32* %loc
+ br label %loop
+}
+
+; Negative test: %might clobber gep
+define void @test3(i32* %loc) {
+; CHECK-LABEL: @test3
+; CHECK-LABEL: loop:
+; CHECK: @bar
+ br label %loop
+
+loop:
+ %res = call i32 @bar(i32* %loc)
+ %gep = getelementptr i32, i32 *%loc, i64 1000000
+ store i32 %res, i32* %gep
+ br label %loop
+}
+
+
+; Negative test: %loc might alias %loc2
+define void @test4(i32* %loc, i32* %loc2) {
+; CHECK-LABEL: @test4
+; CHECK-LABEL: loop:
+; CHECK: @bar
+ br label %loop
+
+loop:
+ %res = call i32 @bar(i32* %loc2)
+ store i32 %res, i32* %loc
+ br label %loop
+}
diff --git a/test/Transforms/LICM/debug-value.ll b/test/Transforms/LICM/debug-value.ll
index 62429fdc3216..d8ae5e576641 100644
--- a/test/Transforms/LICM/debug-value.ll
+++ b/test/Transforms/LICM/debug-value.ll
@@ -36,17 +36,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.module.flags = !{!26}
!llvm.dbg.sp = !{!0, !6, !9, !10}
-!0 = !DISubprogram(name: "idamax", line: 112, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "idamax", line: 112, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !3)
!1 = !DIFile(filename: "/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127169)", isOptimized: true, emissionKind: 0, file: !25, enums: !8, retainedTypes: !8, subprograms: !8)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127169)", isOptimized: true, emissionKind: 0, file: !25, enums: !8, retainedTypes: !8, subprograms: !8)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "dscal", line: 206, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
+!6 = distinct !DISubprogram(name: "dscal", line: 206, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
!7 = !DISubroutineType(types: !8)
!8 = !{null}
-!9 = !DISubprogram(name: "daxpy", line: 230, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
-!10 = !DISubprogram(name: "dgefa", line: 267, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
+!9 = distinct !DISubprogram(name: "daxpy", line: 230, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
+!10 = distinct !DISubprogram(name: "dgefa", line: 267, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
!11 = !DILocation(line: 281, column: 9, scope: !12)
!12 = distinct !DILexicalBlock(line: 272, column: 5, file: !25, scope: !13)
!13 = distinct !DILexicalBlock(line: 271, column: 5, file: !25, scope: !14)
@@ -55,7 +55,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!16 = !DILocation(line: 284, column: 10, scope: !17)
!17 = distinct !DILexicalBlock(line: 282, column: 9, file: !25, scope: !12)
!18 = !{double undef}
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "temp", line: 268, scope: !14, file: !1, type: !20)
+!19 = !DILocalVariable(name: "temp", line: 268, scope: !14, file: !1, type: !20)
!20 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
!21 = !DILocation(line: 286, column: 14, scope: !22)
!22 = distinct !DILexicalBlock(line: 285, column: 13, file: !25, scope: !17)
diff --git a/test/Transforms/LICM/hoist-deref-load.ll b/test/Transforms/LICM/hoist-deref-load.ll
index c384a858d1e6..fd10c5d7503d 100644
--- a/test/Transforms/LICM/hoist-deref-load.ll
+++ b/test/Transforms/LICM/hoist-deref-load.ll
@@ -388,5 +388,49 @@ for.end: ; preds = %for.inc, %entry
ret void
}
+; In this test we should be able to only hoist load from %cptr. We can't hoist
+; load from %c because it's dereferenceability can depend on %cmp1 condition.
+; By moving it out of the loop we break this dependency and can not rely
+; on the dereferenceability anymore.
+; In other words this test checks that we strip dereferenceability metadata
+; after hoisting an instruction.
+
+; CHECK-LABEL: @test10
+; CHECK: %c = load i32*, i32** %cptr
+; CHECK-NOT: dereferenceable
+; CHECK: if.then:
+; CHECK: load i32, i32* %c, align 4
+
+define void @test10(i32* noalias %a, i32* %b, i32** dereferenceable(8) %cptr, i32 %n) #0 {
+entry:
+ %cmp11 = icmp sgt i32 %n, 0
+ br i1 %cmp11, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %0, 0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %c = load i32*, i32** %cptr, !dereferenceable !0
+ %1 = load i32, i32* %c, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %2 = load i32, i32* %arrayidx3, align 4
+ %mul = mul nsw i32 %2, %1
+ store i32 %mul, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc, %entry
+ ret void
+}
+
attributes #0 = { nounwind uwtable }
!0 = !{i64 4}
diff --git a/test/Transforms/LICM/hoist-invariant-load.ll b/test/Transforms/LICM/hoist-invariant-load.ll
index aec155b5580a..ed669f383fc3 100644
--- a/test/Transforms/LICM/hoist-invariant-load.ll
+++ b/test/Transforms/LICM/hoist-invariant-load.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: opt < %s -licm -stats -S 2>&1 | grep "1 licm"
+; RUN: opt < %s -licm -disable-basicaa -stats -S 2>&1 | grep "1 licm"
@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
diff --git a/test/Transforms/LICM/pr23608.ll b/test/Transforms/LICM/pr23608.ll
index 249bc6bf5f63..fe6fd1a1810b 100644
--- a/test/Transforms/LICM/pr23608.ll
+++ b/test/Transforms/LICM/pr23608.ll
@@ -31,7 +31,7 @@ bb2: ; preds = %while.cond
br i1 %tobool, label %bb13, label %bb15
bb13: ; preds = %bb2
-; CHECK-LABEL bb13:
+; CHECK-LABEL: bb13:
; CHECK: %tmp8.le = inttoptr
%.lcssa7 = phi i32* [ %tmp8, %bb2 ]
call void @__msan_warning_noreturn()
diff --git a/test/Transforms/LoopDistribute/basic-with-memchecks.ll b/test/Transforms/LoopDistribute/basic-with-memchecks.ll
index 3aced4850411..dce5698595ac 100644
--- a/test/Transforms/LoopDistribute/basic-with-memchecks.ll
+++ b/test/Transforms/LoopDistribute/basic-with-memchecks.ll
@@ -36,7 +36,7 @@ entry:
; Since the checks to A and A + 4 get merged, this will give us a
; total of 8 compares.
;
-; CHECK: for.body.lver.memcheck:
+; CHECK: for.body.lver.check:
; CHECK: = icmp
; CHECK: = icmp
diff --git a/test/Transforms/LoopDistribute/bounds-expansion-bug.ll b/test/Transforms/LoopDistribute/bounds-expansion-bug.ll
new file mode 100644
index 000000000000..5d1aac6c104c
--- /dev/null
+++ b/test/Transforms/LoopDistribute/bounds-expansion-bug.ll
@@ -0,0 +1,106 @@
+; RUN: opt -basicaa -loop-distribute -S < %s | FileCheck %s
+
+; When emitting the memchecks for:
+;
+; for (i = 0; i < n; i++) {
+; A[i + 1] = A[i] * B[i];
+; =======================
+; C[i] = D[i] * E[i];
+; }
+;
+; we had a bug when expanding the bounds for A and C. These are expanded
+; multiple times and rely on the caching in SCEV expansion to avoid any
+; redundancy. However, due to logic in SCEVExpander::ReuseOrCreateCast, we
+; can get earlier expanded values invalidated when casts are used. This test
+; ensure that we are not using the invalidated values.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %a1, i32* %a2,
+ i32* %b,
+ i32* %c1, i32* %c2,
+ i32* %d,
+ i32* %e) {
+entry:
+
+ %cond = icmp eq i32* %e, null
+ br i1 %cond, label %one, label %two
+one:
+ br label %join
+two:
+ br label %join
+join:
+
+; The pointers need to be defined by PHIs in order for the bug to trigger.
+; Because of the PHIs the existing casts won't be at the desired location so a
+; new cast will be emitted and the old cast will get invalidated.
+;
+; These are the steps:
+;
+; 1. After the bounds for A and C are first expanded:
+;
+; join:
+; %a = phi i32* [ %a1, %one ], [ %a2, %two ]
+; %c = phi i32* [ %c1, %one ], [ %c2, %two ]
+; %c5 = bitcast i32* %c to i8*
+; %a3 = bitcast i32* %a to i8*
+;
+; 2. After A is expanded again:
+;
+; join: ; preds = %two, %one
+; %a = phi i32* [ %a1, %one ], [ %a2, %two ]
+; %c = phi i32* [ %c1, %one ], [ %c2, %two ]
+; %a3 = bitcast i32* %a to i8* <--- new
+; %c5 = bitcast i32* %c to i8*
+; %0 = bitcast i32* undef to i8* <--- old, invalidated
+;
+; 3. Finally, when C is expanded again:
+;
+; join: ; preds = %two, %one
+; %a = phi i32* [ %a1, %one ], [ %a2, %two ]
+; %c = phi i32* [ %c1, %one ], [ %c2, %two ]
+; %c5 = bitcast i32* %c to i8* <--- new
+; %a3 = bitcast i32* %a to i8*
+; %0 = bitcast i32* undef to i8* <--- old, invalidated
+; %1 = bitcast i32* undef to i8*
+
+ %a = phi i32* [%a1, %one], [%a2, %two]
+ %c = phi i32* [%c1, %one], [%c2, %two]
+ br label %for.body
+
+
+; CHECK: [[VALUE:%[0-9a-z]+]] = bitcast i32* undef to i8*
+; CHECK-NOT: [[VALUE]]
+
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %join ], [ %add, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %mulA = mul i32 %loadB, %loadA
+
+ %add = add nuw nsw i64 %ind, 1
+ %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+ store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+ %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+ %loadD = load i32, i32* %arrayidxD, align 4
+
+ %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
+ %loadE = load i32, i32* %arrayidxE, align 4
+
+ %mulC = mul i32 %loadD, %loadE
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+ store i32 %mulC, i32* %arrayidxC, align 4
+
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll b/test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll
new file mode 100644
index 000000000000..2ba746dd6b9e
--- /dev/null
+++ b/test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll
@@ -0,0 +1,57 @@
+; RUN: opt -basicaa -loop-distribute -S < %s | FileCheck %s
+
+; If we can't find the bounds for one of the arrays in order to generate the
+; memchecks (e.g., C[i * i] below), loop shold not get distributed.
+;
+; for (i = 0; i < n; i++) {
+; A[i + 1] = A[i] * 3;
+; -------------------------------
+; C[i * i] = B[i] * 2;
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Verify that we didn't distribute by checking that we still have the original
+; number of branches.
+
+@A = common global i32* null, align 8
+@B = common global i32* null, align 8
+@C = common global i32* null, align 8
+
+define void @f() {
+entry:
+ %a = load i32*, i32** @A, align 8
+ %b = load i32*, i32** @B, align 8
+ %c = load i32*, i32** @C, align 8
+ br label %for.body
+; CHECK: br
+
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %mulA = mul i32 %loadA, 3
+
+ %add = add nuw nsw i64 %ind, 1
+ %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+ store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %mulC = mul i32 %loadB, 2
+
+ %ind_2 = mul i64 %ind, %ind
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind_2
+ store i32 %mulC, i32* %arrayidxC, align 4
+
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body
+; CHECK: br
+; CHECK-NOT: br
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
index c633ae95d16f..27a955175b59 100644
--- a/test/Transforms/LoopIdiom/basic.ll
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -424,3 +424,110 @@ exit:
ret void
; CHECK: ret void
}
+
+; Recognize loops with a negative stride.
+define void @test15(i32* nocapture %f) {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+ store i32 0, i32* %arrayidx, align 4
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ %cmp = icmp sgt i64 %indvars.iv, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+; CHECK-LABEL: @test15(
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %f1, i8 0, i64 262148, i32 4, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+; Loop with a negative stride. Verify an aliasing write to f[65536] prevents
+; the creation of a memset.
+define void @test16(i32* nocapture %f) {
+entry:
+ %arrayidx1 = getelementptr inbounds i32, i32* %f, i64 65536
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+ store i32 0, i32* %arrayidx, align 4
+ store i32 1, i32* %arrayidx1, align 4
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ %cmp = icmp sgt i64 %indvars.iv, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+; CHECK-LABEL: @test16(
+; CHECK-NOT: call void @llvm.memset.p0i8.i64
+; CHECK: ret void
+}
+
+; Handle memcpy-able loops with negative stride.
+define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) {
+entry:
+ %conv = sext i32 %c to i64
+ %mul = shl nsw i64 %conv, 2
+ %call = tail call noalias i8* @malloc(i64 %mul)
+ %0 = bitcast i8* %call to i32*
+ %tobool.9 = icmp eq i32 %c, 0
+ br i1 %tobool.9, label %while.end, label %while.body.preheader
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+ %dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ]
+ %dec10 = add nsw i32 %dec10.in, -1
+ %idxprom = sext i32 %dec10 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %1 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom
+ store i32 %1, i32* %arrayidx2, align 4
+ %tobool = icmp eq i32 %dec10, 0
+ br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit: ; preds = %while.body
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %entry
+ ret i32* %0
+; CHECK-LABEL: @test17(
+; CHECK: call void @llvm.memcpy
+; CHECK: ret i32*
+}
+
+declare noalias i8* @malloc(i64)
+
+; Handle memcpy-able loops with negative stride.
+; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) {
+; for (int i = 2047; i >= 0; --i) {
+; a[i] = b[i];
+; }
+; }
+define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ store i32 %0, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ %cmp = icmp sgt i64 %indvars.iv, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+; CHECK-LABEL: @test18(
+; CHECK: call void @llvm.memcpy
+; CHECK: ret
+}
diff --git a/test/Transforms/LoopIdiom/debug-line.ll b/test/Transforms/LoopIdiom/debug-line.ll
index bcd862d7a729..a85e48997548 100644
--- a/test/Transforms/LoopIdiom/debug-line.ll
+++ b/test/Transforms/LoopIdiom/debug-line.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-darwin10.0.0"
-define void @foo(double* nocapture %a) nounwind ssp {
+define void @foo(double* nocapture %a) nounwind ssp !dbg !0 {
entry:
tail call void @llvm.dbg.value(metadata double* %a, i64 0, metadata !5, metadata !DIExpression()), !dbg !8
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !10, metadata !DIExpression()), !dbg !14
@@ -30,17 +30,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.module.flags = !{!19}
!llvm.dbg.sp = !{!0}
-!0 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3, function: void (double*)* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3)
!1 = !DIFile(filename: "li.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127165:127174)", isOptimized: true, emissionKind: 0, file: !18, enums: !9, retainedTypes: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127165:127174)", isOptimized: true, emissionKind: 0, file: !18, enums: !9, retainedTypes: !9)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 2, arg: 1, scope: !0, file: !1, type: !6)
+!5 = !DILocalVariable(name: "a", line: 2, arg: 1, scope: !0, file: !1, type: !6)
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !7)
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
!8 = !DILocation(line: 2, column: 18, scope: !0)
!9 = !{}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3, scope: !11, file: !1, type: !13)
+!10 = !DILocalVariable(name: "i", line: 3, scope: !11, file: !1, type: !13)
!11 = distinct !DILexicalBlock(line: 3, column: 3, file: !18, scope: !12)
!12 = distinct !DILexicalBlock(line: 2, column: 21, file: !18, scope: !0)
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/LoopLoadElim/backward.ll b/test/Transforms/LoopLoadElim/backward.ll
new file mode 100644
index 000000000000..7c750a51a2a3
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/backward.ll
@@ -0,0 +1,32 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+
+; Simple st->ld forwarding derived from a lexical backward dep.
+;
+; for (unsigned i = 0; i < 100; i++)
+; A[i+1] = A[i] + B[i];
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N) {
+entry:
+; CHECK: %load_initial = load i32, i32* %A
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %add, %for.body ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %load = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %load_1 = load i32, i32* %arrayidx2, align 4
+; CHECK: %add = add i32 %load_1, %store_forwarded
+ %add = add i32 %load_1, %load
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ store i32 %add, i32* %arrayidx_next, align 4
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/def-store-before-load.ll b/test/Transforms/LoopLoadElim/def-store-before-load.ll
new file mode 100644
index 000000000000..3dc93f6786e7
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/def-store-before-load.ll
@@ -0,0 +1,35 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+
+; No loop-carried forwarding: The intervening store to A[i] kills the stored
+; value from the previous iteration.
+;
+; for (unsigned i = 0; i < 100; i++) {
+; A[i] = 1;
+; A[i+1] = A[i] + B[i];
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; CHECK-NOT: %store_forwarded
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ store i32 1, i32* %arrayidx, align 4
+ %a = load i32, i32* %arrayidx, align 4
+ %arrayidxB = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %b = load i32, i32* %arrayidxB, align 4
+; CHECK: %add = add i32 %b, %a
+ %add = add i32 %b, %a
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ store i32 %add, i32* %arrayidx_next, align 4
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/forward.ll b/test/Transforms/LoopLoadElim/forward.ll
new file mode 100644
index 000000000000..c2b1816530c1
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/forward.ll
@@ -0,0 +1,47 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+
+; Simple st->ld forwarding derived from a lexical forwrad dep.
+;
+; for (unsigned i = 0; i < 100; i++) {
+; A[i+1] = B[i] + 2;
+; C[i] = A[i] * 2;
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32* %B, i32* %C, i64 %N) {
+
+; CHECK: for.body.lver.check:
+; CHECK: %found.conflict{{.*}} =
+; CHECK-NOT: %found.conflict{{.*}} =
+
+entry:
+; for.body.ph:
+; CHECK: %load_initial = load i32, i32* %A
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; CHECK: %store_forwarded = phi i32 [ %load_initial, %for.body.ph ], [ %a_p1, %for.body ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p1 = add i32 %b, 2
+ store i32 %a_p1, i32* %Aidx_next, align 4
+
+ %a = load i32, i32* %Aidx, align 4
+; CHECK: %c = mul i32 %store_forwarded, 2
+ %c = mul i32 %a, 2
+ store i32 %c, i32* %Cidx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/memcheck.ll b/test/Transforms/LoopLoadElim/memcheck.ll
new file mode 100644
index 000000000000..8eadd437a5ac
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/memcheck.ll
@@ -0,0 +1,52 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+; RUN: opt -loop-load-elim -S -runtime-check-per-loop-load-elim=2 < %s | FileCheck %s --check-prefix=AGGRESSIVE
+
+; This needs two pairs of memchecks (A * { C, D }) for a single load
+; elimination which is considered to expansive by default.
+;
+; for (unsigned i = 0; i < 100; i++) {
+; A[i+1] = B[i] + 2;
+; C[i] = A[i] * 2;
+; D[i] = 2;
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32* %B, i32* %C, i64 %N, i32* %D) {
+entry:
+ br label %for.body
+
+; AGGRESSIVE: for.body.lver.check:
+; AGGRESSIVE: %found.conflict{{.*}} =
+; AGGRESSIVE: %found.conflict{{.*}} =
+; AGGRESSIVE-NOT: %found.conflict{{.*}} =
+
+for.body: ; preds = %for.body, %entry
+; CHECK-NOT: %store_forwarded =
+; AGGRESSIVE: %store_forwarded =
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %Didx = getelementptr inbounds i32, i32* %D, i64 %indvars.iv
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p1 = add i32 %b, 2
+ store i32 %a_p1, i32* %Aidx_next, align 4
+
+ %a = load i32, i32* %Aidx, align 4
+; CHECK: %c = mul i32 %a, 2
+; AGGRESSIVE: %c = mul i32 %store_forwarded, 2
+ %c = mul i32 %a, 2
+ store i32 %c, i32* %Cidx, align 4
+ store i32 2, i32* %Didx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/multiple-stores-same-block.ll b/test/Transforms/LoopLoadElim/multiple-stores-same-block.ll
new file mode 100644
index 000000000000..b0c0f3dee86e
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/multiple-stores-same-block.ll
@@ -0,0 +1,48 @@
+; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s
+
+; In this case the later store forward to the load:
+;
+; for (unsigned i = 0; i < 100; i++) {
+; B[i] = A[i] + 1;
+; A[i+1] = C[i] + 2;
+; A[i+1] = D[i] + 3;
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B,
+ i32* noalias nocapture %C, i32* noalias nocapture readonly %D,
+ i64 %N) {
+entry:
+; CHECK: %load_initial = load i32, i32* %A
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %addD, %for.body ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidxA = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %loadA = load i32, i32* %arrayidxA, align 4
+; CHECK: %addA = add i32 %store_forwarded, 1
+ %addA = add i32 %loadA, 1
+
+ %arrayidxB = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ store i32 %addA, i32* %arrayidxB, align 4
+
+ %arrayidxC = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %loadC = load i32, i32* %arrayidxC, align 4
+ %addC = add i32 %loadC, 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidxA_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ store i32 %addC, i32* %arrayidxA_next, align 4
+
+ %arrayidxD = getelementptr inbounds i32, i32* %D, i64 %indvars.iv
+ %loadD = load i32, i32* %arrayidxD, align 4
+ %addD = add i32 %loadD, 3
+ store i32 %addD, i32* %arrayidxA_next, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/unknown-dep.ll b/test/Transforms/LoopLoadElim/unknown-dep.ll
new file mode 100644
index 000000000000..d2df718ca4c3
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/unknown-dep.ll
@@ -0,0 +1,54 @@
+; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Give up in the presence of unknown deps. Here, the different strides result
+; in unknown dependence:
+;
+; for (unsigned i = 0; i < 100; i++) {
+; A[i+1] = B[i] + 2;
+; A[2*i] = C[i] + 2;
+; D[i] = A[i] + 2;
+; }
+
+define void @f(i32* noalias %A, i32* noalias %B, i32* noalias %C,
+ i32* noalias %D, i64 %N) {
+
+entry:
+; for.body.ph:
+; CHECK-NOT: %load_initial =
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; CHECK-NOT: %store_forwarded =
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Didx = getelementptr inbounds i32, i32* %D, i64 %indvars.iv
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %indvars.m2 = mul nuw nsw i64 %indvars.iv, 2
+ %A2idx = getelementptr inbounds i32, i32* %A, i64 %indvars.m2
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p1 = add i32 %b, 2
+ store i32 %a_p1, i32* %Aidx_next, align 4
+
+ %c = load i32, i32* %Cidx, align 4
+ %a_m2 = add i32 %c, 2
+ store i32 %a_m2, i32* %A2idx, align 4
+
+ %a = load i32, i32* %Aidx, align 4
+; CHECK-NOT: %d = add i32 %store_forwarded, 2
+; CHECK: %d = add i32 %a, 2
+ %d = add i32 %a, 2
+ store i32 %d, i32* %Didx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopReroll/negative.ll b/test/Transforms/LoopReroll/negative.ll
new file mode 100644
index 000000000000..36f6806e1c37
--- /dev/null
+++ b/test/Transforms/LoopReroll/negative.ll
@@ -0,0 +1,48 @@
+; RUN: opt -S -loop-reroll %s | FileCheck %s
+target triple = "aarch64--linux-gnu"
+@buf = global [16 x i8] c"\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A", align 1
+
+define i32 @test1(i32 %len, i8* nocapture readonly %buf) #0 {
+entry:
+ %cmp.13 = icmp sgt i32 %len, 1
+ br i1 %cmp.13, label %while.body.lr.ph, label %while.end
+
+while.body.lr.ph: ; preds = %entry
+ br label %while.body
+
+while.body:
+;CHECK-LABEL: while.body:
+;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ]
+;CHECK-NEXT: %sum4.015 = phi i64 [ 0, %while.body.lr.ph ], [ %add, %while.body ]
+;CHECK-NOT: %sub5 = add nsw i32 %len.addr.014, -1
+;CHECK-NOT: %sub5 = add nsw i32 %len.addr.014, -2
+;CHECK: br i1 %exitcond, label %while.cond.while.end_crit_edge, label %while.body
+
+ %sum4.015 = phi i64 [ 0, %while.body.lr.ph ], [ %add4, %while.body ]
+ %len.addr.014 = phi i32 [ %len, %while.body.lr.ph ], [ %sub5, %while.body ]
+ %idxprom = sext i32 %len.addr.014 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %buf, i64 %idxprom
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i64
+ %add = add i64 %conv, %sum4.015
+ %sub = add nsw i32 %len.addr.014, -1
+ %idxprom1 = sext i32 %sub to i64
+ %arrayidx2 = getelementptr inbounds i8, i8* %buf, i64 %idxprom1
+ %1 = load i8, i8* %arrayidx2, align 1
+ %conv3 = zext i8 %1 to i64
+ %add4 = add i64 %add, %conv3
+ %sub5 = add nsw i32 %len.addr.014, -2
+ %cmp = icmp sgt i32 %sub5, 1
+ br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
+
+while.cond.while.end_crit_edge: ; preds = %while.body
+ %add4.lcssa = phi i64 [ %add4, %while.body ]
+ %phitmp = trunc i64 %add4.lcssa to i32
+ br label %while.end
+
+while.end: ; preds = %while.cond.while.end_crit_edge, %entry
+ %sum4.0.lcssa = phi i32 [ %phitmp, %while.cond.while.end_crit_edge ], [ 0, %entry ]
+ ret i32 %sum4.0.lcssa
+ unreachable
+}
+
diff --git a/test/Transforms/LoopReroll/reroll_with_dbg.ll b/test/Transforms/LoopReroll/reroll_with_dbg.ll
new file mode 100644
index 000000000000..78b457ed94ab
--- /dev/null
+++ b/test/Transforms/LoopReroll/reroll_with_dbg.ll
@@ -0,0 +1,139 @@
+;RUN: opt < %s -loop-reroll -S | FileCheck %s
+;void foo(float * restrict a, float * restrict b, int n) {
+; for(int i = 0; i < n; i+=4) {
+; a[i] = b[i];
+; a[i+1] = b[i+1];
+; a[i+2] = b[i+2];
+; a[i+3] = b[i+3];
+; }
+;}
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv4t--linux-gnueabi"
+
+; Function Attrs: nounwind
+define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %b, i32 %n) #0 !dbg !4 {
+entry:
+;CHECK-LABEL: @foo
+
+ tail call void @llvm.dbg.value(metadata float* %a, i64 0, metadata !12, metadata !22), !dbg !23
+ tail call void @llvm.dbg.value(metadata float* %b, i64 0, metadata !13, metadata !22), !dbg !24
+ tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !14, metadata !22), !dbg !25
+ tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !22), !dbg !26
+ %cmp.30 = icmp sgt i32 %n, 0, !dbg !27
+ br i1 %cmp.30, label %for.body.preheader, label %for.cond.cleanup, !dbg !29
+
+for.body.preheader: ; preds = %entry
+ br label %for.body, !dbg !30
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ br label %for.cond.cleanup, !dbg !32
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void, !dbg !32
+
+for.body: ; preds = %for.body.preheader, %for.body
+;CHECK: for.body:
+;CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, {{.*}} ]
+;CHECK: load
+;CHECK: store
+;CHECK-NOT: load
+;CHECK-NOT: store
+;CHECK: call void @llvm.dbg.value
+;CHECK: %indvar.next = add i32 %indvar, 1
+;CHECK: icmp eq i32 %indvar
+ %i.031 = phi i32 [ %add13, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds float, float* %b, i32 %i.031, !dbg !30
+ %0 = bitcast float* %arrayidx to i32*, !dbg !30
+ %1 = load i32, i32* %0, align 4, !dbg !30, !tbaa !33
+ %arrayidx1 = getelementptr inbounds float, float* %a, i32 %i.031, !dbg !37
+ %2 = bitcast float* %arrayidx1 to i32*, !dbg !38
+ store i32 %1, i32* %2, align 4, !dbg !38, !tbaa !33
+ %add = or i32 %i.031, 1, !dbg !39
+ %arrayidx2 = getelementptr inbounds float, float* %b, i32 %add, !dbg !40
+ %3 = bitcast float* %arrayidx2 to i32*, !dbg !40
+ %4 = load i32, i32* %3, align 4, !dbg !40, !tbaa !33
+ %arrayidx4 = getelementptr inbounds float, float* %a, i32 %add, !dbg !41
+ %5 = bitcast float* %arrayidx4 to i32*, !dbg !42
+ store i32 %4, i32* %5, align 4, !dbg !42, !tbaa !33
+ %add5 = or i32 %i.031, 2, !dbg !43
+ %arrayidx6 = getelementptr inbounds float, float* %b, i32 %add5, !dbg !44
+ %6 = bitcast float* %arrayidx6 to i32*, !dbg !44
+ %7 = load i32, i32* %6, align 4, !dbg !44, !tbaa !33
+ %arrayidx8 = getelementptr inbounds float, float* %a, i32 %add5, !dbg !45
+ %8 = bitcast float* %arrayidx8 to i32*, !dbg !46
+ store i32 %7, i32* %8, align 4, !dbg !46, !tbaa !33
+ %add9 = or i32 %i.031, 3, !dbg !47
+ %arrayidx10 = getelementptr inbounds float, float* %b, i32 %add9, !dbg !48
+ %9 = bitcast float* %arrayidx10 to i32*, !dbg !48
+ %10 = load i32, i32* %9, align 4, !dbg !48, !tbaa !33
+ %arrayidx12 = getelementptr inbounds float, float* %a, i32 %add9, !dbg !49
+ %11 = bitcast float* %arrayidx12 to i32*, !dbg !50
+ store i32 %10, i32* %11, align 4, !dbg !50, !tbaa !33
+ %add13 = add nuw nsw i32 %i.031, 4, !dbg !51
+ tail call void @llvm.dbg.value(metadata i32 %add13, i64 0, metadata !15, metadata !22), !dbg !26
+ %cmp = icmp slt i32 %add13, %n, !dbg !27
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !dbg !29
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+strict-align" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18, !19, !20}
+!llvm.ident = !{!21}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (http://llvm.org/git/clang.git b1fbc23058e7fa1cdd954ab97ba84f1c549c9879) (http://llvm.org/git/llvm.git 054da58c5398a721d4dab7af63d7de8d7a1e1a1c)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "/home/weimingz/llvm-build/release/community-tip")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7, !7, !10}
+!7 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !8)
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 32, align: 32)
+!9 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !{!12, !13, !14, !15}
+!12 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!13 = !DILocalVariable(name: "b", arg: 2, scope: !4, file: !1, line: 1, type: !7)
+!14 = !DILocalVariable(name: "n", arg: 3, scope: !4, file: !1, line: 1, type: !10)
+!15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 2, type: !10)
+!16 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 3)
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 2, !"Debug Info Version", i32 3}
+!19 = !{i32 1, !"wchar_size", i32 4}
+!20 = !{i32 1, !"min_enum_size", i32 4}
+!21 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git b1fbc23058e7fa1cdd954ab97ba84f1c549c9879) (http://llvm.org/git/llvm.git 054da58c5398a721d4dab7af63d7de8d7a1e1a1c)"}
+!22 = !DIExpression()
+!23 = !DILocation(line: 1, column: 27, scope: !4)
+!24 = !DILocation(line: 1, column: 47, scope: !4)
+!25 = !DILocation(line: 1, column: 54, scope: !4)
+!26 = !DILocation(line: 2, column: 11, scope: !16)
+!27 = !DILocation(line: 2, column: 20, scope: !28)
+!28 = distinct !DILexicalBlock(scope: !16, file: !1, line: 2, column: 3)
+!29 = !DILocation(line: 2, column: 3, scope: !16)
+!30 = !DILocation(line: 3, column: 12, scope: !31)
+!31 = distinct !DILexicalBlock(scope: !28, file: !1, line: 2, column: 31)
+!32 = !DILocation(line: 8, column: 1, scope: !4)
+!33 = !{!34, !34, i64 0}
+!34 = !{!"float", !35, i64 0}
+!35 = !{!"omnipotent char", !36, i64 0}
+!36 = !{!"Simple C/C++ TBAA"}
+!37 = !DILocation(line: 3, column: 5, scope: !31)
+!38 = !DILocation(line: 3, column: 10, scope: !31)
+!39 = !DILocation(line: 4, column: 17, scope: !31)
+!40 = !DILocation(line: 4, column: 14, scope: !31)
+!41 = !DILocation(line: 4, column: 5, scope: !31)
+!42 = !DILocation(line: 4, column: 12, scope: !31)
+!43 = !DILocation(line: 5, column: 17, scope: !31)
+!44 = !DILocation(line: 5, column: 14, scope: !31)
+!45 = !DILocation(line: 5, column: 5, scope: !31)
+!46 = !DILocation(line: 5, column: 12, scope: !31)
+!47 = !DILocation(line: 6, column: 17, scope: !31)
+!48 = !DILocation(line: 6, column: 14, scope: !31)
+!49 = !DILocation(line: 6, column: 5, scope: !31)
+!50 = !DILocation(line: 6, column: 12, scope: !31)
+!51 = !DILocation(line: 2, column: 26, scope: !28)
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index 9bcca15ab551..d90841d16270 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -3,7 +3,7 @@
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp {
+define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp !dbg !0 {
; CHECK-LABEL: define i32 @tak(
; CHECK: entry
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x
@@ -72,7 +72,7 @@ for.body:
for.inc:
%dec = add i64 %i.0, -1
- tail call void @llvm.dbg.value(metadata i64 %dec, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !0), metadata !DIExpression()), !dbg !DILocation(scope: !0)
+ tail call void @llvm.dbg.value(metadata i64 %dec, i64 0, metadata !DILocalVariable(scope: !0), metadata !DIExpression()), !dbg !DILocation(scope: !0)
br label %for.cond
for.end:
@@ -84,17 +84,17 @@ for.end:
!llvm.module.flags = !{!20}
!llvm.dbg.sp = !{!0}
-!0 = !DISubprogram(name: "tak", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3, function: i32 (i32, i32, i32)* @tak)
+!0 = distinct !DISubprogram(name: "tak", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3)
!1 = !DIFile(filename: "/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", directory: "/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125492)", isOptimized: true, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125492)", isOptimized: true, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 32, arg: 0, scope: !0, file: !1, type: !5)
+!6 = !DILocalVariable(name: "x", line: 32, arg: 1, scope: !0, file: !1, type: !5)
!7 = !DILocation(line: 32, column: 13, scope: !0)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 32, arg: 0, scope: !0, file: !1, type: !5)
+!8 = !DILocalVariable(name: "y", line: 32, arg: 2, scope: !0, file: !1, type: !5)
!9 = !DILocation(line: 32, column: 20, scope: !0)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "z", line: 32, arg: 0, scope: !0, file: !1, type: !5)
+!10 = !DILocalVariable(name: "z", line: 32, arg: 3, scope: !0, file: !1, type: !5)
!11 = !DILocation(line: 32, column: 27, scope: !0)
!12 = !DILocation(line: 33, column: 3, scope: !13)
!13 = distinct !DILexicalBlock(line: 32, column: 30, file: !18, scope: !0)
diff --git a/test/Transforms/LoopSimplify/dbg-loc.ll b/test/Transforms/LoopSimplify/dbg-loc.ll
index 073319bdac3c..b0e14bbcfd7f 100644
--- a/test/Transforms/LoopSimplify/dbg-loc.ll
+++ b/test/Transforms/LoopSimplify/dbg-loc.ll
@@ -16,7 +16,7 @@ declare void @f3()
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label %for.end, !dbg [[LOOPEXIT_LOC:![0-9]+]]
-define linkonce_odr hidden void @foo(%"Length"* %begin, %"Length"* %end) nounwind ssp uwtable align 2 {
+define linkonce_odr hidden void @foo(%"Length"* %begin, %"Length"* %end) nounwind ssp uwtable align 2 !dbg !6 {
entry:
%cmp.4 = icmp eq %"Length"* %begin, %end, !dbg !7
br i1 %cmp.4, label %for.end, label %for.body, !dbg !8
@@ -80,7 +80,7 @@ eh.resume: ; preds = %catch
!3 = !{}
!4 = !DISubroutineType(types: !3)
!5 = !DIFile(filename: "Vector.h", directory: "/tmp")
-!6 = !DISubprogram(name: "destruct", scope: !5, file: !5, line: 71, type: !4, isLocal: false, isDefinition: true, scopeLine: 72, flags: DIFlagPrototyped, isOptimized: false, function: void (%"Length"*, %"Length"*)* @foo, variables: !3)
+!6 = distinct !DISubprogram(name: "destruct", scope: !5, file: !5, line: 71, type: !4, isLocal: false, isDefinition: true, scopeLine: 72, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
!7 = !DILocation(line: 73, column: 38, scope: !6)
!8 = !DILocation(line: 73, column: 13, scope: !6)
!9 = !DILocation(line: 73, column: 27, scope: !6)
diff --git a/test/Transforms/LoopSimplify/single-backedge.ll b/test/Transforms/LoopSimplify/single-backedge.ll
index 92fbdca8a6d2..6f3db8fb14fc 100644
--- a/test/Transforms/LoopSimplify/single-backedge.ll
+++ b/test/Transforms/LoopSimplify/single-backedge.ll
@@ -30,7 +30,7 @@ BE2: ; preds = %n br label %Loop
!2 = !{}
!3 = !DISubroutineType(types: !2)
!4 = !DIFile(filename: "atomic.cpp", directory: "/tmp")
-!5 = !DISubprogram(name: "test", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = distinct !DISubprogram(name: "test", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!6 = !DILocation(line: 100, column: 1, scope: !5)
!7 = !DILocation(line: 101, column: 1, scope: !5)
!8 = !DILocation(line: 102, column: 1, scope: !5)
diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll b/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll
new file mode 100644
index 000000000000..bf61112a3c3e
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll
@@ -0,0 +1,156 @@
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s
+
+; Test that loops with different maximum offsets for different address
+; spaces are correctly handled.
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_i32(
+; OPT: {{^}}.lr.ph:
+; OPT: %lsr.iv2 = phi i8 addrspace(1)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv2, i64 4095
+; OPT: load i8, i8 addrspace(1)* %scevgep4, align 1
+define void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+ %tmp = icmp sgt i32 %n, 0
+ br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader: ; preds = %bb
+ br label %.lr.ph
+
+._crit_edge.loopexit: ; preds = %.lr.ph
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.loopexit, %bb
+ ret void
+
+.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+ %tmp1 = add nuw nsw i64 %indvars.iv, 4095
+ %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1
+ %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1
+ %tmp4 = sext i8 %tmp3 to i32
+ %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
+ %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
+ %tmp7 = add nsw i32 %tmp6, %tmp4
+ store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_p1_i32(
+; OPT: {{^}}.lr.ph.preheader:
+; OPT: %scevgep2 = getelementptr i8, i8 addrspace(1)* %arg1, i64 4096
+; OPT: br label %.lr.ph
+
+; OPT: {{^}}.lr.ph:
+; OPT: %lsr.iv3 = phi i8 addrspace(1)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv3, i64 1
+define void @test_global_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+ %tmp = icmp sgt i32 %n, 0
+ br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader: ; preds = %bb
+ br label %.lr.ph
+
+._crit_edge.loopexit: ; preds = %.lr.ph
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.loopexit, %bb
+ ret void
+
+.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+ %tmp1 = add nuw nsw i64 %indvars.iv, 4096
+ %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1
+ %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1
+ %tmp4 = sext i8 %tmp3 to i32
+ %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
+ %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
+ %tmp7 = add nsw i32 %tmp6, %tmp4
+ store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_i32(
+; OPT: {{^}}.lr.ph
+; OPT: %lsr.iv2 = phi i8 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv2, i32 65535
+; OPT: %tmp4 = load i8, i8 addrspace(3)* %scevgep4, align 1
+define void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+ %tmp = icmp sgt i32 %n, 0
+ br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader: ; preds = %bb
+ br label %.lr.ph
+
+._crit_edge.loopexit: ; preds = %.lr.ph
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.loopexit, %bb
+ ret void
+
+.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+ %tmp1 = add nuw nsw i64 %indvars.iv, 65535
+ %tmp2 = trunc i64 %tmp1 to i32
+ %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2
+ %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1
+ %tmp5 = sext i8 %tmp4 to i32
+ %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
+ %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
+ %tmp8 = add nsw i32 %tmp7, %tmp5
+ store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_p1_i32(
+; OPT: {{^}}.lr.ph.preheader:
+; OPT: %scevgep2 = getelementptr i8, i8 addrspace(3)* %arg1, i32 65536
+; OPT: br label %.lr.ph
+
+; OPT: {{^}}.lr.ph:
+; OPT: %lsr.iv3 = phi i8 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv3, i32 1
+define void @test_local_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+ %tmp = icmp sgt i32 %n, 0
+ br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader: ; preds = %bb
+ br label %.lr.ph
+
+._crit_edge.loopexit: ; preds = %.lr.ph
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.loopexit, %bb
+ ret void
+
+.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+ %tmp1 = add nuw nsw i64 %indvars.iv, 65536
+ %tmp2 = trunc i64 %tmp1 to i32
+ %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2
+ %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1
+ %tmp5 = sext i8 %tmp4 to i32
+ %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
+ %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
+ %tmp8 = add nsw i32 %tmp7, %tmp5
+ store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hawaii" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg b/test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..6baccf05fff0
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll b/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll
new file mode 100644
index 000000000000..bd80302a68b8
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll
@@ -0,0 +1,113 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -print-lsr-output < %s 2>&1 | FileCheck %s
+
+; Test various conditions where OptimizeLoopTermCond doesn't look at a
+; memory instruction use and fails to find the address space.
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; CHECK-LABEL: @local_cmp_user(
+; CHECK: bb11:
+; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ -2, %entry ]
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ undef, %entry ]
+
+; CHECK: bb:
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
+; CHECK: %scevgep = getelementptr i8, i8 addrspace(3)* %t, i32 %lsr.iv.next2
+; CHECK: %c1 = icmp ult i8 addrspace(3)* %scevgep, undef
+define void @local_cmp_user() nounwind {
+entry:
+ br label %bb11
+
+bb11:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
+ %ii = shl i32 %i, 1
+ %c0 = icmp eq i32 %i, undef
+ br i1 %c0, label %bb13, label %bb
+
+bb:
+ %t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef
+ %p = getelementptr i8, i8 addrspace(3)* %t, i32 %ii
+ %c1 = icmp ult i8 addrspace(3)* %p, undef
+ %i.next = add i32 %i, 1
+ br i1 %c1, label %bb11, label %bb13
+
+bb13:
+ unreachable
+}
+
+; CHECK-LABEL: @global_cmp_user(
+; CHECK: %lsr.iv.next = add i64 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i64 %lsr.iv1, 2
+; CHECK: %scevgep = getelementptr i8, i8 addrspace(1)* %t, i64 %lsr.iv.next2
+define void @global_cmp_user() nounwind {
+entry:
+ br label %bb11
+
+bb11:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %bb ]
+ %ii = shl i64 %i, 1
+ %c0 = icmp eq i64 %i, undef
+ br i1 %c0, label %bb13, label %bb
+
+bb:
+ %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
+ %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii
+ %c1 = icmp ult i8 addrspace(1)* %p, undef
+ %i.next = add i64 %i, 1
+ br i1 %c1, label %bb11, label %bb13
+
+bb13:
+ unreachable
+}
+
+; CHECK-LABEL: @global_gep_user(
+; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i32 %lsr.iv1
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
+define void @global_gep_user() nounwind {
+entry:
+ br label %bb11
+
+bb11:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
+ %ii = shl i32 %i, 1
+ %c0 = icmp eq i32 %i, undef
+ br i1 %c0, label %bb13, label %bb
+
+bb:
+ %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
+ %p = getelementptr i8, i8 addrspace(1)* %t, i32 %ii
+ %c1 = icmp ult i8 addrspace(1)* %p, undef
+ %i.next = add i32 %i, 1
+ br i1 %c1, label %bb11, label %bb13
+
+bb13:
+ unreachable
+}
+
+; CHECK-LABEL: @global_sext_scale_user(
+; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
+define void @global_sext_scale_user() nounwind {
+entry:
+ br label %bb11
+
+bb11:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
+ %ii = shl i32 %i, 1
+ %ii.ext = sext i32 %ii to i64
+ %c0 = icmp eq i32 %i, undef
+ br i1 %c0, label %bb13, label %bb
+
+bb:
+ %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
+ %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
+ %c1 = icmp ult i8 addrspace(1)* %p, undef
+ %i.next = add i32 %i, 1
+ br i1 %c1, label %bb11, label %bb13
+
+bb13:
+ unreachable
+}
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index 2ad6c2ea52da..788842101080 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -239,33 +239,33 @@ define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i
%counter.04 = phi i32 [ 0, %.lr.ph ], [ %44, %11 ]
%result.03 = phi <16 x i8> [ zeroinitializer, %.lr.ph ], [ %41, %11 ]
%.012 = phi <16 x i8>* [ %data, %.lr.ph ], [ %43, %11 ]
- %12 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %.05, i32 1) nounwind
+ %12 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %.05, i32 1) nounwind
%13 = getelementptr inbounds i8, i8* %.05, i32 %ref_stride
- %14 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %13, i32 1) nounwind
+ %14 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %13, i32 1) nounwind
%15 = shufflevector <1 x i64> %12, <1 x i64> %14, <2 x i32> <i32 0, i32 1>
%16 = bitcast <2 x i64> %15 to <16 x i8>
%17 = getelementptr inbounds <16 x i8>, <16 x i8>* %.012, i32 1
store <16 x i8> %16, <16 x i8>* %.012, align 4
%18 = getelementptr inbounds i8, i8* %.05, i32 %2
- %19 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %18, i32 1) nounwind
+ %19 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %18, i32 1) nounwind
%20 = getelementptr inbounds i8, i8* %.05, i32 %3
- %21 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %20, i32 1) nounwind
+ %21 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %20, i32 1) nounwind
%22 = shufflevector <1 x i64> %19, <1 x i64> %21, <2 x i32> <i32 0, i32 1>
%23 = bitcast <2 x i64> %22 to <16 x i8>
%24 = getelementptr inbounds <16 x i8>, <16 x i8>* %.012, i32 2
store <16 x i8> %23, <16 x i8>* %17, align 4
%25 = getelementptr inbounds i8, i8* %.05, i32 %4
- %26 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %25, i32 1) nounwind
+ %26 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %25, i32 1) nounwind
%27 = getelementptr inbounds i8, i8* %.05, i32 %5
- %28 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %27, i32 1) nounwind
+ %28 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %27, i32 1) nounwind
%29 = shufflevector <1 x i64> %26, <1 x i64> %28, <2 x i32> <i32 0, i32 1>
%30 = bitcast <2 x i64> %29 to <16 x i8>
%31 = getelementptr inbounds <16 x i8>, <16 x i8>* %.012, i32 3
store <16 x i8> %30, <16 x i8>* %24, align 4
%32 = getelementptr inbounds i8, i8* %.05, i32 %6
- %33 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %32, i32 1) nounwind
+ %33 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %32, i32 1) nounwind
%34 = getelementptr inbounds i8, i8* %.05, i32 %7
- %35 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %34, i32 1) nounwind
+ %35 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %34, i32 1) nounwind
%36 = shufflevector <1 x i64> %33, <1 x i64> %35, <2 x i32> <i32 0, i32 1>
%37 = bitcast <2 x i64> %36 to <16 x i8>
store <16 x i8> %37, <16 x i8>* %31, align 4
@@ -290,7 +290,7 @@ define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i
ret void
}
-declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8*, i32) nounwind readonly
; Handle chains in which the same offset is used for both loads and
; stores to the same array.
@@ -328,32 +328,32 @@ for.body: ; preds = %for.body, %entry
%i.0110 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%src.addr = phi i8* [ %src, %entry ], [ %add.ptr45, %for.body ]
%add.ptr = getelementptr inbounds i8, i8* %src.addr, i32 %idx.neg
- %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr, i32 1)
+ %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr, i32 1)
%add.ptr3 = getelementptr inbounds i8, i8* %src.addr, i32 %idx.neg2
- %vld2 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr3, i32 1)
+ %vld2 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr3, i32 1)
%add.ptr7 = getelementptr inbounds i8, i8* %src.addr, i32 %idx.neg6
- %vld3 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr7, i32 1)
+ %vld3 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr7, i32 1)
%add.ptr11 = getelementptr inbounds i8, i8* %src.addr, i32 %idx.neg10
- %vld4 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr11, i32 1)
- %vld5 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %src.addr, i32 1)
+ %vld4 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr11, i32 1)
+ %vld5 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %src.addr, i32 1)
%add.ptr17 = getelementptr inbounds i8, i8* %src.addr, i32 %stride
- %vld6 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr17, i32 1)
+ %vld6 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr17, i32 1)
%add.ptr20 = getelementptr inbounds i8, i8* %src.addr, i32 %mul5
- %vld7 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr20, i32 1)
+ %vld7 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr20, i32 1)
%add.ptr23 = getelementptr inbounds i8, i8* %src.addr, i32 %mul1
- %vld8 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr23, i32 1)
+ %vld8 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr23, i32 1)
%vadd1 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld1, <8 x i8> %vld2) nounwind
%vadd2 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld2, <8 x i8> %vld3) nounwind
%vadd3 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld3, <8 x i8> %vld4) nounwind
%vadd4 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld4, <8 x i8> %vld5) nounwind
%vadd5 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld5, <8 x i8> %vld6) nounwind
%vadd6 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld6, <8 x i8> %vld7) nounwind
- tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr3, <8 x i8> %vadd1, i32 1)
- tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr7, <8 x i8> %vadd2, i32 1)
- tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr11, <8 x i8> %vadd3, i32 1)
- tail call void @llvm.arm.neon.vst1.v8i8(i8* %src.addr, <8 x i8> %vadd4, i32 1)
- tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr17, <8 x i8> %vadd5, i32 1)
- tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr20, <8 x i8> %vadd6, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr3, <8 x i8> %vadd1, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr7, <8 x i8> %vadd2, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr11, <8 x i8> %vadd3, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %src.addr, <8 x i8> %vadd4, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr17, <8 x i8> %vadd5, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr20, <8 x i8> %vadd6, i32 1)
%inc = add nsw i32 %i.0110, 1
%add.ptr45 = getelementptr inbounds i8, i8* %src.addr, i32 8
%exitcond = icmp eq i32 %inc, 4
@@ -363,8 +363,8 @@ for.end: ; preds = %for.body
ret void
}
-declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i8(i8*, <8 x i8>, i32) nounwind
declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg b/test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg
new file mode 100644
index 000000000000..2cb98eb371b2
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'NVPTX' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll b/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll
new file mode 100644
index 000000000000..a16065b4dfbd
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; This confirms that NVPTXTTI considers a 64-to-32 integer trunc free. If such
+; truncs were not considered free, LSR would promote (int)i as a separate
+; induction variable in the following example.
+;
+; for (long i = begin; i != end; i += stride)
+; use((int)i);
+;
+; That would be worthless, because "i" is simulated by two 32-bit registers and
+; truncating it to 32-bit is as simple as directly using the register that
+; contains the low bits.
+define void @trunc_is_free(i64 %begin, i64 %stride, i64 %end) {
+; CHECK-LABEL: @trunc_is_free(
+entry:
+ %cmp.4 = icmp eq i64 %begin, %end
+ br i1 %cmp.4, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+; CHECK: for.body:
+ %i.05 = phi i64 [ %add, %for.body ], [ %begin, %for.body.preheader ]
+ %conv = trunc i64 %i.05 to i32
+; CHECK: trunc i64 %{{[^ ]+}} to i32
+ tail call void @_Z3usei(i32 %conv) #2
+ %add = add nsw i64 %i.05, %stride
+ %cmp = icmp eq i64 %add, %end
+ br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+declare void @_Z3usei(i32)
+
+!nvvm.annotations = !{!0}
+!0 = !{void (i64, i64, i64)* @trunc_is_free, !"kernel", i32 1}
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
index 24be0dc42d6d..7925bf01020e 100644
--- a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
@@ -23,7 +23,7 @@
; X32: add
; X32: add
; X32: add
-; X32: leal
+; X32: add
; X32: %for.body.3
define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
entry:
diff --git a/test/Transforms/LoopStrengthReduce/funclet.ll b/test/Transforms/LoopStrengthReduce/funclet.ll
new file mode 100644
index 000000000000..5d20646141c4
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/funclet.ll
@@ -0,0 +1,216 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+declare i32 @_except_handler3(...)
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @external(i32*)
+declare void @reserve()
+
+define void @f() personality i32 (...)* @_except_handler3 {
+entry:
+ br label %throw
+
+throw: ; preds = %throw, %entry
+ %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+ invoke void @reserve()
+ to label %throw unwind label %pad
+
+pad: ; preds = %throw
+ %phi2 = phi i8* [ %tmp96, %throw ]
+ %cs = catchswitch within none [label %unreachable] unwind label %blah2
+
+unreachable:
+ catchpad within %cs []
+ unreachable
+
+blah2:
+ %cleanuppadi4.i.i.i = cleanuppad within none []
+ br label %loop_body
+
+loop_body: ; preds = %iter, %pad
+ %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %blah2 ]
+ %tmp100 = icmp eq i8* %tmp99, undef
+ br i1 %tmp100, label %unwind_out, label %iter
+
+iter: ; preds = %loop_body
+ %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+ br i1 undef, label %unwind_out, label %loop_body
+
+unwind_out: ; preds = %iter, %loop_body
+ cleanupret from %cleanuppadi4.i.i.i unwind to caller
+}
+
+; CHECK-LABEL: define void @f(
+; CHECK: cleanuppad within none []
+; CHECK-NEXT: ptrtoint i8* %phi2 to i32
+
+define void @g() personality i32 (...)* @_except_handler3 {
+entry:
+ br label %throw
+
+throw: ; preds = %throw, %entry
+ %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+ invoke void @reserve()
+ to label %throw unwind label %pad
+
+pad:
+ %phi2 = phi i8* [ %tmp96, %throw ]
+ %cs = catchswitch within none [label %unreachable, label %blah] unwind to caller
+
+unreachable:
+ catchpad within %cs []
+ unreachable
+
+blah:
+ %catchpad = catchpad within %cs []
+ br label %loop_body
+
+unwind_out:
+ catchret from %catchpad to label %leave
+
+leave:
+ ret void
+
+loop_body: ; preds = %iter, %pad
+ %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %blah ]
+ %tmp100 = icmp eq i8* %tmp99, undef
+ br i1 %tmp100, label %unwind_out, label %iter
+
+iter: ; preds = %loop_body
+ %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+ br i1 undef, label %unwind_out, label %loop_body
+}
+
+; CHECK-LABEL: define void @g(
+; CHECK: blah:
+; CHECK-NEXT: catchpad within %cs []
+; CHECK-NEXT: ptrtoint i8* %phi2 to i32
+
+
+define void @h() personality i32 (...)* @_except_handler3 {
+entry:
+ br label %throw
+
+throw: ; preds = %throw, %entry
+ %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+ invoke void @reserve()
+ to label %throw unwind label %pad
+
+pad:
+ %cs = catchswitch within none [label %unreachable, label %blug] unwind to caller
+
+unreachable:
+ catchpad within %cs []
+ unreachable
+
+blug:
+ %phi2 = phi i8* [ %tmp96, %pad ]
+ %catchpad = catchpad within %cs []
+ br label %loop_body
+
+unwind_out:
+ catchret from %catchpad to label %leave
+
+leave:
+ ret void
+
+loop_body: ; preds = %iter, %pad
+ %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %blug ]
+ %tmp100 = icmp eq i8* %tmp99, undef
+ br i1 %tmp100, label %unwind_out, label %iter
+
+iter: ; preds = %loop_body
+ %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+ br i1 undef, label %unwind_out, label %loop_body
+}
+
+; CHECK-LABEL: define void @h(
+; CHECK: blug:
+; CHECK: catchpad within %cs []
+; CHECK-NEXT: ptrtoint i8* %phi2 to i32
+
+define void @i() personality i32 (...)* @_except_handler3 {
+entry:
+ br label %throw
+
+throw: ; preds = %throw, %entry
+ %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+ invoke void @reserve()
+ to label %throw unwind label %catchpad
+
+catchpad: ; preds = %throw
+ %phi2 = phi i8* [ %tmp96, %throw ]
+ %cs = catchswitch within none [label %cp_body] unwind label %cleanuppad
+
+cp_body:
+ catchpad within %cs []
+ br label %loop_head
+
+cleanuppad:
+ cleanuppad within none []
+ br label %loop_head
+
+loop_head:
+ br label %loop_body
+
+loop_body: ; preds = %iter, %catchpad
+ %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %loop_head ]
+ %tmp100 = icmp eq i8* %tmp99, undef
+ br i1 %tmp100, label %unwind_out, label %iter
+
+iter: ; preds = %loop_body
+ %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+ br i1 undef, label %unwind_out, label %loop_body
+
+unwind_out: ; preds = %iter, %loop_body
+ unreachable
+}
+
+; CHECK-LABEL: define void @i(
+; CHECK: ptrtoint i8* %phi2 to i32
+
+define void @test1(i32* %b, i32* %c) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %d.0 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.inc ]
+ invoke void @external(i32* %d.0)
+ to label %for.inc unwind label %catch.dispatch
+
+for.inc: ; preds = %for.cond
+ %incdec.ptr = getelementptr inbounds i32, i32* %d.0, i32 1
+ br label %for.cond
+
+catch.dispatch: ; preds = %for.cond
+ %cs = catchswitch within none [label %catch] unwind label %catch.dispatch.2
+
+catch: ; preds = %catch.dispatch
+ %0 = catchpad within %cs [i8* null, i32 64, i8* null]
+ catchret from %0 to label %try.cont
+
+try.cont: ; preds = %catch
+ invoke void @external(i32* %c)
+ to label %try.cont.7 unwind label %catch.dispatch.2
+
+catch.dispatch.2: ; preds = %try.cont, %catchendblock
+ %e.0 = phi i32* [ %c, %try.cont ], [ %b, %catch.dispatch ]
+ %cs2 = catchswitch within none [label %catch.4] unwind to caller
+
+catch.4: ; preds = %catch.dispatch.2
+ catchpad within %cs2 [i8* null, i32 64, i8* null]
+ unreachable
+
+try.cont.7: ; preds = %try.cont
+ ret void
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: for.cond:
+; CHECK: %d.0 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.inc ]
+
+; CHECK: catch.dispatch.2:
+; CHECK: %e.0 = phi i32* [ %c, %try.cont ], [ %b, %catch.dispatch ]
diff --git a/test/Transforms/LoopStrengthReduce/pr12018.ll b/test/Transforms/LoopStrengthReduce/pr12018.ll
index b15961a77904..bb5d1654fada 100644
--- a/test/Transforms/LoopStrengthReduce/pr12018.ll
+++ b/test/Transforms/LoopStrengthReduce/pr12018.ll
@@ -16,7 +16,7 @@ for.body: ; preds = %_ZN8nsTArray9Elemen
%tmp = bitcast %struct.nsTArrayHeader* %add.ptr.i to %struct.nsTArray*
%arrayidx = getelementptr inbounds %struct.nsTArray, %struct.nsTArray* %tmp, i32 %i.06
%add = add nsw i32 %i.06, 1
- call void @llvm.dbg.value(metadata %struct.nsTArray* %aValues, i64 0, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !DISubprogram())
+ call void @llvm.dbg.value(metadata %struct.nsTArray* %aValues, i64 0, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !1)
br label %_ZN8nsTArray9ElementAtEi.exit
_ZN8nsTArray9ElementAtEi.exit: ; preds = %for.body
@@ -35,4 +35,5 @@ declare %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev()
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !DISubprogram())
+!0 = !DILocalVariable(scope: !1)
+!1 = distinct !DISubprogram()
diff --git a/test/Transforms/LoopStrengthReduce/pr25541.ll b/test/Transforms/LoopStrengthReduce/pr25541.ll
new file mode 100644
index 000000000000..011998b90893
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr25541.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+define void @f() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ br label %for.cond.i
+
+for.cond.i: ; preds = %for.inc.i, %entry
+ %_First.addr.0.i = phi i32* [ null, %entry ], [ %incdec.ptr.i, %for.inc.i ]
+ invoke void @g()
+ to label %for.inc.i unwind label %catch.dispatch.i
+
+catch.dispatch.i: ; preds = %for.cond.i
+ %cs = catchswitch within none [label %for.cond.1.preheader.i] unwind to caller
+
+for.cond.1.preheader.i: ; preds = %catch.dispatch.i
+ %0 = catchpad within %cs [i8* null, i32 64, i8* null]
+ %cmp.i = icmp eq i32* %_First.addr.0.i, null
+ br label %for.cond.1.i
+
+for.cond.1.i: ; preds = %for.body.i, %for.cond.1.preheader.i
+ br i1 %cmp.i, label %for.end.i, label %for.body.i
+
+for.body.i: ; preds = %for.cond.1.i
+ call void @g()
+ br label %for.cond.1.i
+
+for.inc.i: ; preds = %for.cond.i
+ %incdec.ptr.i = getelementptr inbounds i32, i32* %_First.addr.0.i, i64 1
+ br label %for.cond.i
+
+for.end.i: ; preds = %for.cond.1.i
+ catchret from %0 to label %leave
+
+leave: ; preds = %for.end.i
+ ret void
+}
+
+; CHECK-LABEL: define void @f(
+; CHECK: %[[PHI:.*]] = phi i64 [ %[[IV_NEXT:.*]], {{.*}} ], [ 0, {{.*}} ]
+; CHECK: %[[ITOP:.*]] = inttoptr i64 %[[PHI]] to i32*
+; CHECK: %[[CMP:.*]] = icmp eq i32* %[[ITOP]], null
+; CHECK: %[[IV_NEXT]] = add i64 %[[PHI]], -4
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
index 483becc0e7b8..c6d6690e4302 100644
--- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -28,7 +28,7 @@ exit:
; sure they aren't marked as post-inc users.
;
; CHECK-LABEL: IV Users for loop %test2.loop
-; CHECK: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us)),+,33554432}<%test2.loop> in %f = ashr i32 %sext.us, 24
+; CHECK: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us))<nuw><nsw>,+,33554432}<%test2.loop> in %f = ashr i32 %sext.us, 24
define i32 @test2() {
entry:
br label %test2.loop
diff --git a/test/Transforms/LoopStrengthReduce/sext-ind-var.ll b/test/Transforms/LoopStrengthReduce/sext-ind-var.ll
new file mode 100644
index 000000000000..3cf8f536fa71
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/sext-ind-var.ll
@@ -0,0 +1,140 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+; LSR used not to be able to generate a float* induction variable in
+; these cases due to scalar evolution not propagating nsw from an
+; instruction to the SCEV, preventing distributing sext into the
+; corresponding addrec.
+
+; Test this pattern:
+;
+; for (int i = 0; i < numIterations; ++i)
+; sum += ptr[i + offset];
+;
+define float @testadd(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @testadd
+; CHECK: sext i32 %offset to i64
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+ %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+ %index32 = add nuw nsw i32 %i, %offset
+ %index64 = sext i32 %index32 to i64
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %addend = load float, float* %ptr, align 4
+ %nextsum = fadd float %sum, %addend
+ %nexti = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret float %nextsum
+}
+
+; Test this pattern:
+;
+; for (int i = 0; i < numIterations; ++i)
+; sum += ptr[i - offset];
+;
+define float @testsub(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @testsub
+; CHECK: sub i32 0, %offset
+; CHECK: sext i32
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+ %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+ %index32 = sub nuw nsw i32 %i, %offset
+ %index64 = sext i32 %index32 to i64
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %addend = load float, float* %ptr, align 4
+ %nextsum = fadd float %sum, %addend
+ %nexti = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret float %nextsum
+}
+
+; Test this pattern:
+;
+; for (int i = 0; i < numIterations; ++i)
+; sum += ptr[i * stride];
+;
+define float @testmul(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @testmul
+; CHECK: sext i32 %stride to i64
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+ %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+ %index32 = mul nuw nsw i32 %i, %stride
+ %index64 = sext i32 %index32 to i64
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %addend = load float, float* %ptr, align 4
+ %nextsum = fadd float %sum, %addend
+ %nexti = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret float %nextsum
+}
+
+; Test this pattern:
+;
+; for (int i = 0; i < numIterations; ++i)
+; sum += ptr[3 * (i << 7)];
+;
+; The multiplication by 3 is to make the address calculation expensive
+; enough to force the introduction of a pointer induction variable.
+define float @testshl(float* %input, i32 %numIterations) {
+; CHECK-LABEL: @testshl
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+ %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+ %index32 = shl nuw nsw i32 %i, 7
+ %index32mul = mul nuw nsw i32 %index32, 3
+ %index64 = sext i32 %index32mul to i64
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %addend = load float, float* %ptr, align 4
+ %nextsum = fadd float %sum, %addend
+ %nexti = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret float %nextsum
+}
diff --git a/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg b/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..6baccf05fff0
--- /dev/null
+++ b/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll b/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll
new file mode 100644
index 000000000000..3cbb70274da5
--- /dev/null
+++ b/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll
@@ -0,0 +1,33 @@
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test_unroll_convergent_barrier(
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK-NOT: br
+define void @test_unroll_convergent_barrier(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(1)* noalias nocapture %in) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx.in = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %indvars.iv
+ %arrayidx.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %indvars.iv
+ %load = load i32, i32 addrspace(1)* %arrayidx.in
+ call void @llvm.AMDGPU.barrier.global() #1
+ %add = add i32 %load, %sum.02
+ store i32 %add, i32 addrspace(1)* %arrayidx.out
+ %indvars.iv.next = add i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next, 4
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare void @llvm.AMDGPU.barrier.global() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind convergent }
diff --git a/test/Transforms/LoopUnroll/X86/partial.ll b/test/Transforms/LoopUnroll/X86/partial.ll
index 4566f792deb4..104a38779e52 100644
--- a/test/Transforms/LoopUnroll/X86/partial.ll
+++ b/test/Transforms/LoopUnroll/X86/partial.ll
@@ -86,17 +86,20 @@ for.body: ; preds = %entry, %for.body
%reduction.026 = phi i16 [ %add14, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i16, i16* %arr, i64 %indvars.iv
%0 = load i16, i16* %arrayidx, align 2
- %add = add i16 %0, %reduction.026
+ %mul = shl i16 %0, 1
+ %add = add i16 %mul, %reduction.026
%sext = mul i64 %indvars.iv, 12884901888
%idxprom3 = ashr exact i64 %sext, 32
%arrayidx4 = getelementptr inbounds i16, i16* %arr, i64 %idxprom3
%1 = load i16, i16* %arrayidx4, align 2
- %add7 = add i16 %add, %1
+ %mul2 = shl i16 %1, 1
+ %add7 = add i16 %add, %mul2
%sext28 = mul i64 %indvars.iv, 21474836480
%idxprom10 = ashr exact i64 %sext28, 32
%arrayidx11 = getelementptr inbounds i16, i16* %arr, i64 %idxprom10
%2 = load i16, i16* %arrayidx11, align 2
- %add14 = add i16 %add7, %2
+ %mul3 = shl i16 %2, 1
+ %add14 = add i16 %add7, %mul3
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll b/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll
deleted file mode 100644
index ac814526647e..000000000000
--- a/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; Check that we don't crash on corner cases.
-; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo1() {
-entry:
- br label %for.body
-
-for.body:
- %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %idx = zext i32 undef to i64
- %add.ptr = getelementptr inbounds i64, i64* null, i64 %idx
- %inc = add nuw nsw i64 %phi, 1
- %cmp = icmp ult i64 %inc, 999
- br i1 %cmp, label %for.body, label %for.exit
-
-for.exit:
- ret void
-}
-
-define void @foo2() {
-entry:
- br label %for.body
-
-for.body:
- %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %x = getelementptr i32, <4 x i32*> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %inc = add nuw nsw i64 %phi, 1
- %cmp = icmp ult i64 %inc, 999
- br i1 %cmp, label %for.body, label %for.exit
-
-for.exit:
- ret void
-}
diff --git a/test/Transforms/LoopUnroll/full-unroll-crashers.ll b/test/Transforms/LoopUnroll/full-unroll-crashers.ll
new file mode 100644
index 000000000000..e932851042ad
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-crashers.ll
@@ -0,0 +1,102 @@
+; Check that we don't crash on corner cases.
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
+
+define void @foo1() {
+entry:
+ br label %for.body
+
+for.body:
+ %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %idx = zext i32 undef to i64
+ %add.ptr = getelementptr inbounds i64, i64* null, i64 %idx
+ %inc = add nuw nsw i64 %phi, 1
+ %cmp = icmp ult i64 %inc, 999
+ br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+ ret void
+}
+
+define void @foo2() {
+entry:
+ br label %for.body
+
+for.body:
+ %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %x = getelementptr i32, <4 x i32*> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %inc = add nuw nsw i64 %phi, 1
+ %cmp = icmp ult i64 %inc, 999
+ br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+ ret void
+}
+
+define void @cmp_undef() {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
+ %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+ %x1 = load i32, i32* %arrayidx1, align 4
+ %cmp = icmp eq i32 %x1, undef
+ br i1 %cmp, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %iv.1 = add nuw nsw i64 %iv.0, 1
+ %exitcond = icmp eq i64 %iv.1, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc
+ ret void
+}
+
+define void @switch() {
+entry:
+ br label %for.body
+
+for.body:
+ %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
+ %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+ %x1 = load i32, i32* %arrayidx1, align 4
+ switch i32 %x1, label %l1 [
+ ]
+
+l1:
+ %x2 = add i32 %x1, 2
+ br label %for.inc
+
+for.inc:
+ %iv.1 = add nuw nsw i64 %iv.0, 1
+ %exitcond = icmp eq i64 %iv.1, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define <4 x i32> @vec_load() {
+entry:
+ br label %for.body
+
+for.body:
+ %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %vec_phi = phi <4 x i32> [ <i32 0, i32 0, i32 0, i32 0>, %entry ], [ %r, %for.body ]
+ %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %phi
+ %bc = bitcast i32* %arrayidx to <4 x i32>*
+ %x = load <4 x i32>, < 4 x i32>* %bc, align 4
+ %r = add <4 x i32> %x, %vec_phi
+ %inc = add nuw nsw i64 %phi, 1
+ %cmp = icmp ult i64 %inc, 999
+ br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+ ret <4 x i32> %r
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
new file mode 100644
index 000000000000..5df48e8c380b
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@unknown_global = internal unnamed_addr global [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
+@weak_constant = weak unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
+
+; Though @unknown_global is initialized with constant values, we can't consider
+; it as a constant, so we shouldn't unroll the loop.
+; CHECK-LABEL: @foo
+; CHECK: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @unknown_global, i64 0, i64 %iv
+define i32 @foo(i32* noalias nocapture readonly %src) {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %r = phi i32 [ 0, %entry ], [ %add, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @unknown_global, i64 0, i64 %iv
+ %const_array_element = load i32, i32* %array_const_idx, align 4
+ %mul = mul nsw i32 %src_element, %const_array_element
+ %add = add nsw i32 %mul, %r
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond86.i = icmp eq i64 %inc, 9
+ br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end: ; preds = %loop
+ %r.lcssa = phi i32 [ %r, %loop ]
+ ret i32 %r.lcssa
+}
+
+; Similarly, we can't consider 'weak' symbols as a known constant value, so we
+; shouldn't unroll the loop.
+; CHECK-LABEL: @foo2
+; CHECK: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @weak_constant, i64 0, i64 %iv
+define i32 @foo2(i32* noalias nocapture readonly %src) {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %r = phi i32 [ 0, %entry ], [ %add, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @weak_constant, i64 0, i64 %iv
+ %const_array_element = load i32, i32* %array_const_idx, align 4
+ %mul = mul nsw i32 %src_element, %const_array_element
+ %add = add nsw i32 %mul, %r
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond86.i = icmp eq i64 %inc, 9
+ br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end: ; preds = %loop
+ %r.lcssa = phi i32 [ %r, %loop ]
+ ret i32 %r.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll
new file mode 100644
index 000000000000..cd8cfd75424f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1], align 16
+
+; We should be able to propagate constant data through different types of
+; casts. For example, in this test we have a load, which becomes constant after
+; unrolling, which then is truncated to i8. Obviously, truncated value is also a
+; constant, which can be used in the further simplifications.
+;
+; We expect this loop to be unrolled, because in this case load would become
+; constant, which is 0 in many cases, and which, in its turn, helps to simplify
+; following multiplication and addition. In total, unrolling should help to
+; optimize ~60% of all instructions in this case.
+;
+; CHECK-LABEL: @const_load_trunc
+; CHECK-NOT: br i1
+; CHECK: ret i8 %
+define i8 @const_load_trunc(i32* noalias nocapture readonly %src) {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %r = phi i8 [ 0, %entry ], [ %add, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
+ %const_array_element = load i32, i32* %array_const_idx, align 4
+ %x = trunc i32 %src_element to i8
+ %y = trunc i32 %const_array_element to i8
+ %mul = mul nsw i8 %x, %y
+ %add = add nsw i8 %mul, %r
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond86.i = icmp eq i64 %inc, 10
+ br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end: ; preds = %loop
+ %r.lcssa = phi i8 [ %r, %loop ]
+ ret i8 %r.lcssa
+}
+
+; The same test as before, but with ZEXT instead of TRUNC.
+; CHECK-LABEL: @const_load_zext
+; CHECK-NOT: br i1
+; CHECK: ret i64 %
+define i64 @const_load_zext(i32* noalias nocapture readonly %src) {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %r = phi i64 [ 0, %entry ], [ %add, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
+ %const_array_element = load i32, i32* %array_const_idx, align 4
+ %x = zext i32 %src_element to i64
+ %y = zext i32 %const_array_element to i64
+ %mul = mul nsw i64 %x, %y
+ %add = add nsw i64 %mul, %r
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond86.i = icmp eq i64 %inc, 10
+ br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end: ; preds = %loop
+ %r.lcssa = phi i64 [ %r, %loop ]
+ ret i64 %r.lcssa
+}
+
+; The same test as the first one, but with SEXT instead of TRUNC.
+; CHECK-LABEL: @const_load_sext
+; CHECK-NOT: br i1
+; CHECK: ret i64 %
+define i64 @const_load_sext(i32* noalias nocapture readonly %src) {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %r = phi i64 [ 0, %entry ], [ %add, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
+ %const_array_element = load i32, i32* %array_const_idx, align 4
+ %x = sext i32 %src_element to i64
+ %y = sext i32 %const_array_element to i64
+ %mul = mul nsw i64 %x, %y
+ %add = add nsw i64 %mul, %r
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond86.i = icmp eq i64 %inc, 10
+ br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end: ; preds = %loop
+ %r.lcssa = phi i64 [ %r, %loop ]
+ ret i64 %r.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
new file mode 100644
index 000000000000..f7758fa22008
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
@@ -0,0 +1,207 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
+
+; We should be able to propagate constant data through comparisons.
+; For example, in this test we have a load, which becomes constant after
+; unrolling, making comparison with 0 also known to be 0 (false) - and that
+; will trigger further simplifications.
+;
+; We expect this loop to be unrolled, because in this case load would become
+; constant, which is always 1, and which, in its turn, helps to simplify
+; following comparison, zero-extension, and addition. In total, unrolling should help to
+; optimize more than 50% of all instructions in this case.
+;
+; CHECK-LABEL: @const_compare
+; CHECK-NOT: br i1 %
+; CHECK: ret i32
+define i32 @const_compare(i32* noalias nocapture readonly %b) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
+ %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.body ]
+ %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+ %x1 = load i32, i32* %arrayidx1, align 4
+ %cmp = icmp eq i32 %x1, 0
+ %cast = zext i1 %cmp to i32
+ %iv.1 = add nuw nsw i64 %iv.0, 1
+ %r.1 = add i32 %r.0, %cast
+ %exitcond = icmp eq i64 %iv.1, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc
+ ret i32 %r.1
+}
+
+; If we can figure out result of comparison on each iteration, we can resolve
+; the depending branch. That means, that the unrolled version of the loop would
+; have less code, because we don't need not-taken basic blocks there.
+; This test checks that this is taken into consideration.
+; We expect this loop to be unrolled, because the most complicated part of its
+; body (if.then block) is never actually executed.
+; CHECK-LABEL: @branch_folded
+; CHECK-NOT: br i1 %
+; CHECK: ret i32
+define i32 @branch_folded(i32* noalias nocapture readonly %b) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
+ %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ]
+ %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+ %x1 = load i32, i32* %arrayidx1, align 4
+ %cmp = icmp eq i32 %x1, 0
+ %iv.1 = add nuw nsw i64 %iv.0, 1
+ br i1 %cmp, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv.0
+ %x2 = load i32, i32* %arrayidx2, align 4
+ %add = add nsw i32 %x2, %r.0
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %r.1 = phi i32 [ %add, %if.then ], [ %x1, %for.body ]
+ %exitcond = icmp eq i64 %iv.1, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc
+ ret i32 %r.1
+}
+
+; This test is similar to the previous one, but in this we use IV in comparison
+; (not a loaded value as we did there).
+; CHECK-LABEL: @branch_iv
+; CHECK-NOT: br i1 %
+; CHECK: ret i64
+define i64 @branch_iv(i64* noalias nocapture readonly %b) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ]
+ %r.030 = phi i64 [ 0, %entry ], [ %r.1, %for.inc ]
+ %cmp3 = icmp eq i64 %indvars.iv, 5
+ %tmp3 = add nuw nsw i64 %indvars.iv, 1
+ br i1 %cmp3, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 %tmp3
+ %tmp1 = load i64, i64* %arrayidx2, align 4
+ %add = add nsw i64 %tmp1, %r.030
+ br label %for.inc
+
+for.inc: ; preds = %if.then, %for.body
+ %r.1 = phi i64 [ %add, %if.then ], [ %r.030, %for.body ]
+ %exitcond = icmp eq i64 %tmp3, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc
+ ret i64 %r.1
+}
+
+; Induction variables are often casted to another type, and that shouldn't
+; prevent us from folding branches. Tthis test specifically checks if we can
+; handle this. Other than thatm it's similar to the previous test.
+; CHECK-LABEL: @branch_iv_trunc
+; CHECK-NOT: br i1 %
+; CHECK: ret i32
+define i32 @branch_iv_trunc(i32* noalias nocapture readonly %b) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ]
+ %r.030 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ]
+ %tmp2 = trunc i64 %indvars.iv to i32
+ %cmp3 = icmp eq i32 %tmp2, 5
+ %tmp3 = add nuw nsw i64 %indvars.iv, 1
+ br i1 %cmp3, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %tmp3
+ %tmp1 = load i32, i32* %arrayidx2, align 4
+ %add = add nsw i32 %tmp1, %r.030
+ br label %for.inc
+
+for.inc: ; preds = %if.then, %for.body
+ %r.1 = phi i32 [ %add, %if.then ], [ %r.030, %for.body ]
+ %exitcond = icmp eq i64 %tmp3, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc
+ ret i32 %r.1
+}
+
+; Check that we don't crash when we analyze icmp with pointer-typed IV and a
+; pointer.
+; CHECK-LABEL: @ptr_cmp_crash
+; CHECK: ret void
+define void @ptr_cmp_crash() {
+entry:
+ br label %while.body
+
+while.body:
+ %iv.0 = phi i32* [ getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 0), %entry ], [ %iv.1, %while.body ]
+ %iv.1 = getelementptr inbounds i32, i32* %iv.0, i64 1
+ %exitcond = icmp eq i32* %iv.1, getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 9)
+ br i1 %exitcond, label %loop.exit, label %while.body
+
+loop.exit:
+ ret void
+}
+
+; Check that we don't crash when we analyze ptrtoint cast.
+; CHECK-LABEL: @ptrtoint_cast_crash
+; CHECK: ret void
+define void @ptrtoint_cast_crash(i8 * %a) {
+entry:
+ %limit = getelementptr i8, i8* %a, i64 512
+ br label %loop.body
+
+loop.body:
+ %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]
+ %cast = ptrtoint i8* %iv.0 to i64
+ %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1
+ %exitcond = icmp ne i8* %iv.1, %limit
+ br i1 %exitcond, label %loop.body, label %loop.exit
+
+loop.exit:
+ ret void
+}
+
+; Loop unroller should be able to predict that a comparison would become
+; constant if the operands are pointers with the same base and constant
+; offsets.
+; We expect this loop to be unrolled, since most of its instructions would
+; become constant after it.
+; CHECK-LABEL: @ptr_cmp
+; CHECK-NOT: br i1 %
+; CHECK: ret i64
+define i64 @ptr_cmp(i8 * %a) {
+entry:
+ %limit = getelementptr i8, i8* %a, i64 40
+ %start.iv2 = getelementptr i8, i8* %a, i64 7
+ br label %loop.body
+
+loop.body:
+ %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]
+ %iv2.0 = phi i8* [ %start.iv2, %entry ], [ %iv2.1, %loop.body ]
+ %r.0 = phi i64 [ 0, %entry ], [ %r.1, %loop.body ]
+ %cast = ptrtoint i8* %iv.0 to i64
+ %cmp = icmp eq i8* %iv2.0, %iv.0
+ %sub = sext i1 %cmp to i64
+ %mul = mul i64 %sub, %cast
+ %r.1 = add i64 %r.0, %mul
+ %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1
+ %iv2.1 = getelementptr inbounds i8, i8* %iv2.0, i64 1
+ %exitcond = icmp ne i8* %iv.1, %limit
+ br i1 %exitcond, label %loop.body, label %loop.exit
+
+loop.exit:
+ ret i64 %r.1
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
new file mode 100644
index 000000000000..dd8582e6877f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define i64 @propagate_loop_phis() {
+; CHECK-LABEL: @propagate_loop_phis(
+; CHECK-NOT: br i1
+; CHECK: ret i64 3
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %x0 = phi i64 [ 0, %entry ], [ %x2, %loop ]
+ %x1 = or i64 %x0, 1
+ %x2 = or i64 %x1, 2
+ %inc = add nuw nsw i64 %iv, 1
+ %cond = icmp sge i64 %inc, 10
+ br i1 %cond, label %loop.end, label %loop
+
+loop.end:
+ %x.lcssa = phi i64 [ %x2, %loop ]
+ ret i64 %x.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/pr18861.ll b/test/Transforms/LoopUnroll/pr18861.ll
index 62f26106afb2..c01eef1d0e5d 100644
--- a/test/Transforms/LoopUnroll/pr18861.ll
+++ b/test/Transforms/LoopUnroll/pr18861.ll
@@ -2,42 +2,85 @@
@b = external global i32, align 4
+; Test that LoopUnroll does not break LCSSA form.
+;
+; In this function we have a following CFG:
+; ( entry )
+; |
+; v
+; ( outer.header ) <--
+; | \
+; v |
+; --> ( inner.header ) |
+; / / \ |
+; \ / \ |
+; \ v v /
+; ( inner.latch ) ( outer.latch )
+; |
+; v
+; ( exit )
+;
+; When the inner loop is unrolled, we inner.latch block has only one
+; predecessor and one successor, so it can be merged with exit block.
+; During the merge, however, we remove an LCSSA definition for
+; %storemerge1.lcssa, breaking LCSSA form for the outer loop.
+
; Function Attrs: nounwind uwtable
define void @fn1() #0 {
entry:
- br label %for.cond1thread-pre-split
-
-for.cond1thread-pre-split: ; preds = %for.inc8, %entry
- %storemerge1 = phi i32 [ 0, %entry ], [ %inc9, %for.inc8 ]
- br i1 undef, label %for.inc8, label %for.cond2.preheader.lr.ph
+ br label %outer.header
-for.cond2.preheader.lr.ph: ; preds = %for.cond1thread-pre-split
- br label %for.cond2.preheader
+outer.header: ; preds = %outer.latch, %entry
+ %storemerge1 = phi i32 [ 0, %entry ], [ %inc9, %outer.latch ]
+ br label %inner.header
-for.cond2.preheader: ; preds = %for.inc5, %for.cond2.preheader.lr.ph
- br label %for.cond2
-
-for.cond2: ; preds = %for.body3, %for.cond2.preheader
- %storemerge = phi i32 [ %add, %for.body3 ], [ 0, %for.cond2.preheader ]
+inner.header: ; preds = %inner.latch, %outer.header
+ %storemerge = phi i32 [ %add, %inner.latch ], [ 0, %outer.header ]
%cmp = icmp slt i32 %storemerge, 1
- br i1 %cmp, label %for.body3, label %for.inc5
+ br i1 %cmp, label %inner.latch, label %outer.latch
-for.body3: ; preds = %for.cond2
+inner.latch: ; preds = %inner.header
%tobool4 = icmp eq i32 %storemerge, 0
%add = add nsw i32 %storemerge, 1
- br i1 %tobool4, label %for.cond2, label %if.then
+ br i1 %tobool4, label %inner.header, label %exit
-if.then: ; preds = %for.body3
- store i32 %storemerge1, i32* @b, align 4
+exit: ; preds = %inner.latch
+ %storemerge1.lcssa = phi i32 [ %storemerge1, %inner.latch ]
+ store i32 %storemerge1.lcssa, i32* @b, align 4
ret void
-for.inc5: ; preds = %for.cond2
- br i1 undef, label %for.cond1.for.inc8_crit_edge, label %for.cond2.preheader
+outer.latch: ; preds = %inner.header
+ %inc9 = add nsw i32 %storemerge1, 1
+ br label %outer.header
+}
-for.cond1.for.inc8_crit_edge: ; preds = %for.inc5
- br label %for.inc8
+; This case is similar to the previous one, and has the same CFG.
+; The difference is that loop unrolling doesn't remove any LCSSA definition,
+; yet breaks LCSSA form for the outer loop. It happens because before unrolling
+; block inner.latch was inside outer loop (and consequently, didn't require
+; LCSSA definition for %x), but after unrolling it occurs out of the outer
+; loop, so we need to insert an LCSSA definition to keep LCSSA.
-for.inc8: ; preds = %for.cond1.for.inc8_crit_edge, %for.cond1thread-pre-split
- %inc9 = add nsw i32 %storemerge1, 1
- br label %for.cond1thread-pre-split
+; Function Attrs: nounwind uwtable
+define void @fn2() {
+entry:
+ br label %outer.header
+
+outer.header:
+ br label %inner.header
+
+inner.header:
+ %x = load i32, i32* undef, align 4
+ br i1 true, label %outer.latch, label %inner.latch
+
+inner.latch:
+ %inc6 = add nsw i32 %x, 1
+ store i32 %inc6, i32* undef, align 4
+ br i1 false, label %inner.header, label %exit
+
+exit:
+ ret void
+
+outer.latch:
+ br label %outer.header
}
diff --git a/test/Transforms/LoopUnroll/rebuild_lcssa.ll b/test/Transforms/LoopUnroll/rebuild_lcssa.ll
new file mode 100644
index 000000000000..49498492344a
--- /dev/null
+++ b/test/Transforms/LoopUnroll/rebuild_lcssa.ll
@@ -0,0 +1,119 @@
+; RUN: opt < %s -loop-unroll -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; This test shows how unrolling an inner loop could break LCSSA for an outer
+; loop, and there is no cheap way to recover it.
+;
+; In this case the inner loop, L3, is being unrolled. It only runs one
+; iteration, so unrolling basically means replacing
+; br i1 true, label %exit, label %L3_header
+; with
+; br label %exit
+;
+; However, this change messes up the loops structure: for instance, block
+; L3_body no longer belongs to L2. It becomes an exit block for L2, so LCSSA
+; phis for definitions in L2 should now be placed there. In particular, we need
+; to insert such a definition for %y1.
+
+; CHECK-LABEL: @foo1
+define void @foo1() {
+entry:
+ br label %L1_header
+
+L1_header:
+ br label %L2_header
+
+L2_header:
+ %y1 = phi i64 [ undef, %L1_header ], [ %x.lcssa, %L2_latch ]
+ br label %L3_header
+
+L3_header:
+ %y2 = phi i64 [ 0, %L3_latch ], [ %y1, %L2_header ]
+ %x = add i64 undef, -1
+ br i1 true, label %L2_latch, label %L3_body
+
+L2_latch:
+ %x.lcssa = phi i64 [ %x, %L3_header ]
+ br label %L2_header
+
+; CHECK: L3_body:
+; CHECK-NEXT: %y1.lcssa = phi i64 [ %y1, %L3_header ]
+L3_body:
+ store i64 %y1, i64* undef
+ br i1 false, label %L3_latch, label %L1_latch
+
+L3_latch:
+ br i1 true, label %exit, label %L3_header
+
+L1_latch:
+ %y.lcssa = phi i64 [ %y2, %L3_body ]
+ br label %L1_header
+
+exit:
+ ret void
+}
+
+; Additional tests for some corner cases.
+;
+; CHECK-LABEL: @foo2
+define void @foo2() {
+entry:
+ br label %L1_header
+
+L1_header:
+ br label %L2_header
+
+L2_header:
+ %a = phi i64 [ undef, %L1_header ], [ %dec_us, %L3_header ]
+ br label %L3_header
+
+L3_header:
+ %b = phi i64 [ 0, %L3_latch ], [ %a, %L2_header ]
+ %dec_us = add i64 undef, -1
+ br i1 true, label %L2_header, label %L3_break_to_L1
+
+; CHECK: L3_break_to_L1:
+; CHECK-NEXT: %a.lcssa = phi i64 [ %a, %L3_header ]
+L3_break_to_L1:
+ br i1 false, label %L3_latch, label %L1_latch
+
+L1_latch:
+ %b_lcssa = phi i64 [ %b, %L3_break_to_L1 ]
+ br label %L1_header
+
+L3_latch:
+ br i1 true, label %Exit, label %L3_header
+
+Exit:
+ ret void
+}
+
+; CHECK-LABEL: @foo3
+define void @foo3() {
+entry:
+ br label %L1_header
+
+L1_header:
+ %a = phi i8* [ %b, %L1_latch ], [ null, %entry ]
+ br i1 undef, label %L2_header, label %L1_latch
+
+L2_header:
+ br i1 undef, label %L2_latch, label %L1_latch
+
+; CHECK: L2_latch:
+; CHECK-NEXT: %a.lcssa = phi i8* [ %a, %L2_header ]
+L2_latch:
+ br i1 true, label %L2_exit, label %L2_header
+
+L1_latch:
+ %b = phi i8* [ undef, %L1_header ], [ null, %L2_header ]
+ br label %L1_header
+
+L2_exit:
+ %a_lcssa1 = phi i8* [ %a, %L2_latch ]
+ br label %Exit
+
+Exit:
+ %a_lcssa2 = phi i8* [ %a_lcssa1, %L2_exit ]
+ ret void
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
index de61e847a5a7..dcf159a09a1d 100644
--- a/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -15,7 +15,7 @@
; CHECK-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
; CHECK-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
-define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly !dbg !6 {
entry:
%cmp1 = icmp eq i32 %n, 0, !dbg !7
br i1 %cmp1, label %for.end, label %for.body, !dbg !7
@@ -44,7 +44,7 @@ for.end: ; preds = %for.body, %entry
!3 = !{}
!4 = !DISubroutineType(types: !3)
!5 = !DIFile(filename: "test.cpp", directory: "/tmp")
-!6 = !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32*, i32)* @test, variables: !3)
+!6 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
!7 = !DILocation(line: 100, column: 1, scope: !6)
!8 = !DILocation(line: 101, column: 1, scope: !6)
!9 = !DILocation(line: 102, column: 1, scope: !6)
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll
index 8e0d77513cc1..b915b4fdf489 100644
--- a/test/Transforms/LoopUnroll/unroll-pragmas.ll
+++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll
@@ -256,3 +256,69 @@ for.end: ; preds = %for.body
ret void
}
!12 = !{!12, !4}
+
+; #pragma clang loop unroll(enable)
+; Loop should be fully unrolled.
+;
+; CHECK-LABEL: @loop64_with_enable(
+; CHECK-NOT: br i1
+define void @loop64_with_enable(i32* nocapture %a) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13
+
+for.end: ; preds = %for.body
+ ret void
+}
+!13 = !{!13, !14}
+!14 = !{!"llvm.loop.unroll.enable"}
+
+; #pragma clang loop unroll(enable)
+; Loop has a runtime trip count and should be runtime unrolled and duplicated
+; (original and 8x).
+;
+; CHECK-LABEL: @runtime_loop_with_enable(
+; CHECK: for.body.prol:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
+; CHECK: for.body:
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
+entry:
+ %cmp3 = icmp sgt i32 %b, 0
+ br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %b
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+!15 = !{!15, !14}
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
index abeea209f639..a35596aff11c 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -34,7 +34,7 @@
; CHECK-NEXT: br label %loop_begin.us1
; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us5, %.split.split.us
-; CHECK-NEXT: %var_val.us.2 = load i32, i32* %var
+; CHECK-NEXT: %var_val.us2 = load i32, i32* %var
; CHECK-NEXT: switch i32 2, label %default.us-lcssa.us-lcssa.us [
; CHECK-NEXT: i32 1, label %inc.us4
; CHECK-NEXT: i32 2, label %dec.us3
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
index 02552ea5cc40..20f03c987eb7 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -65,7 +65,7 @@
; CHECK-NEXT: br label %loop_begin.us1
; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us6, %.split.split.us
-; CHECK-NEXT: %var_val.us.2 = load i32, i32* %var
+; CHECK-NEXT: %var_val.us2 = load i32, i32* %var
; CHECK-NEXT: switch i32 %c, label %second_switch.us3 [
; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge.us
; CHECK-NEXT: ]
diff --git a/test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll b/test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll
new file mode 100644
index 000000000000..0f74614da98e
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-unswitch -S | FileCheck %s
+
+; In cases where two address spaces do not have the same size pointer, the
+; input for the addrspacecast should not be used as a substitute for itself
+; when manipulating the pointer.
+
+target datalayout = "e-m:e-p:16:16-p1:32:16-i32:16-i64:16-n8:16"
+
+define void @foo() {
+; CHECK-LABEL: @foo
+entry:
+ %arrayidx.i1 = getelementptr inbounds i16, i16* undef, i16 undef
+ %arrayidx.i = addrspacecast i16* %arrayidx.i1 to i16 addrspace(1)*
+ br i1 undef, label %for.body.i, label %bar.exit
+
+for.body.i: ; preds = %for.body.i, %entry
+; When we call makeLoopInvariant (i.e. trivial LICM) on this load, it
+; will try to find the base object to prove deferenceability. If we look
+; through the addrspacecast, we'll fail an assertion about bitwidths matching
+; CHECK-LABEL: for.body.i
+; CHECK: %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+ %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+ %cmp1.i = icmp eq i16 %0, 0
+ br i1 %cmp1.i, label %bar.exit, label %for.body.i
+
+bar.exit: ; preds = %for.body.i, %entry
+ ret void
+}
diff --git a/test/Transforms/LoopUnswitch/LIV-loop-condtion.ll b/test/Transforms/LoopUnswitch/LIV-loop-condtion.ll
new file mode 100644
index 000000000000..dd436f474ea0
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/LIV-loop-condtion.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-unswitch -loop-unswitch-threshold=0 -S 2>&1 | FileCheck %s
+
+; This is to test trivial loop unswitch only happens when trivial condition
+; itself is an LIV loop condition (not partial LIV which could occur in and/or).
+
+define i32 @test(i1 %cond1, i32 %var1) {
+entry:
+ br label %loop_begin
+
+loop_begin:
+ %var3 = phi i32 [%var1, %entry], [%var2, %do_something]
+ %cond2 = icmp eq i32 %var3, 10
+ %cond.and = and i1 %cond1, %cond2
+
+; %cond.and only has %cond1 as LIV so no unswitch should happen.
+; CHECK: br i1 %cond.and, label %do_something, label %loop_exit
+ br i1 %cond.and, label %do_something, label %loop_exit
+
+do_something:
+ %var2 = add i32 %var3, 1
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+declare void @some_func() noreturn \ No newline at end of file
diff --git a/test/Transforms/LoopUnswitch/basictest.ll b/test/Transforms/LoopUnswitch/basictest.ll
index e990144d5ccc..a02a463764dd 100644
--- a/test/Transforms/LoopUnswitch/basictest.ll
+++ b/test/Transforms/LoopUnswitch/basictest.ll
@@ -64,5 +64,44 @@ loop_exit:
; CHECK: }
}
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the convergent call that is not control-dependent on the unswitch condition.
+
+; CHECK-LABEL: @test3(
+define i32 @test3(i32* %var) {
+ %mem = alloca i32
+ store i32 2, i32* %mem
+ %c = load i32, i32* %mem
+
+ br label %loop_begin
+
+loop_begin:
+
+ %var_val = load i32, i32* %var
+
+; CHECK: call void @conv()
+; CHECK-NOT: call void @conv()
+ call void @conv() convergent
+
+ switch i32 %c, label %default [
+ i32 1, label %inc
+ i32 2, label %dec
+ ]
+
+inc:
+ call void @incf() noreturn nounwind
+ br label %loop_begin
+dec:
+ call void @decf() noreturn nounwind
+ br label %loop_begin
+default:
+ br label %loop_exit
+loop_exit:
+ ret i32 0
+; CHECK: }
+}
+
+
declare void @incf() noreturn
declare void @decf() noreturn
+declare void @conv() convergent
diff --git a/test/Transforms/LoopUnswitch/cleanuppad.ll b/test/Transforms/LoopUnswitch/cleanuppad.ll
new file mode 100644
index 000000000000..b06ebd7235c8
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/cleanuppad.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S -loop-unswitch < %s | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+define void @f(i32 %doit, i1 %x, i1 %y) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %tobool = icmp eq i32 %doit, 0
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ br i1 %x, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ br i1 %tobool, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ br i1 %y, label %for.inc, label %delete.notnull
+
+delete.notnull: ; preds = %if.then
+ invoke void @g()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %delete.notnull
+ br label %for.inc
+
+lpad: ; preds = %delete.notnull
+ %cp = cleanuppad within none []
+ cleanupret from %cp unwind to caller
+
+for.inc: ; preds = %invoke.cont, %if.then, %for.body
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
+
+; CHECK-LABEL: define void @f(
+; CHECK: cleanuppad within none []
+; CHECK-NOT: cleanuppad
+
+attributes #0 = { ssp uwtable }
diff --git a/test/Transforms/LoopUnswitch/cold-loop.ll b/test/Transforms/LoopUnswitch/cold-loop.ll
new file mode 100644
index 000000000000..1fbc08038bbd
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/cold-loop.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -loop-unswitch -loop-unswitch-with-block-frequency -S 2>&1 | FileCheck %s
+
+;; trivial condition should be unswithed regardless of coldness.
+define i32 @test1(i1 %cond1, i1 %cond2) !prof !1 {
+ br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0
+
+loop_begin:
+; CHECK: br i1 true, label %continue, label %loop_exit.loopexit
+ br i1 %cond2, label %continue, label %loop_exit ; trivial condition
+
+continue:
+ call void @some_func1() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+;; cold non-trivial condition should not be unswitched.
+define i32 @test2(i32* %var, i1 %cond1, i1 %cond2) !prof !1 {
+ br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0
+
+loop_begin:
+ store i32 1, i32* %var
+; CHECK: br i1 %cond2, label %continue1, label %continue2
+ br i1 %cond2, label %continue1, label %continue2 ; non-trivial condition
+
+continue1:
+ call void @some_func1() noreturn nounwind
+ br label %joint
+
+continue2:
+ call void @some_func2() noreturn nounwind
+ br label %joint
+
+joint:
+;; unswitching will duplicate these calls.
+ call void @some_func3() noreturn nounwind
+ call void @some_func4() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+declare void @some_func1() noreturn
+declare void @some_func2() noreturn
+declare void @some_func3() noreturn
+declare void @some_func4() noreturn
+
+!0 = !{!"branch_weights", i32 1, i32 100000000}
+!1 = !{!"function_entry_count", i64 100}
diff --git a/test/Transforms/LoopUnswitch/copy-metadata.ll b/test/Transforms/LoopUnswitch/copy-metadata.ll
new file mode 100644
index 000000000000..2a634c25a23d
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/copy-metadata.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -loop-unswitch -S < %s 2>&1 | FileCheck %s
+
+; This test checks if unswitched condition preserve make.implicit metadata.
+
+define i32 @test(i1 %cond) {
+; CHECK: br i1 %cond, label %..split_crit_edge, label %.loop_exit.split_crit_edge, !make.implicit !0
+ br label %loop_begin
+
+loop_begin:
+; CHECK: br i1 true, label %continue, label %loop_exit, !make.implicit !0
+ br i1 %cond, label %continue, label %loop_exit, !make.implicit !0
+
+continue:
+ call void @some_func()
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+declare void @some_func()
+
+!0 = !{}
diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll
index e79d874d9ca6..3d1c895edec9 100644
--- a/test/Transforms/LoopUnswitch/infinite-loop.ll
+++ b/test/Transforms/LoopUnswitch/infinite-loop.ll
@@ -9,23 +9,23 @@
; It can trivially unswitch on the false cas of condition %a though.
; STATS: 2 loop-unswitch - Number of branches unswitched
-; STATS: 1 loop-unswitch - Number of unswitches that are trivial
+; STATS: 2 loop-unswitch - Number of unswitches that are trivial
; CHECK-LABEL: @func_16(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split
; CHECK: entry.split:
-; CHECK-NEXT: br i1 %b, label %cond.end.us, label %abort1
+; CHECK-NEXT: br i1 %b, label %cond.end, label %abort1.split
-; CHECK: cond.end.us:
-; CHECK-NEXT: br label %cond.end.us
+; CHECK: cond.end:
+; CHECK-NEXT: br label %cond.end
; CHECK: abort0.split:
; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]
; CHECK-NEXT: unreachable
-; CHECK: abort1:
+; CHECK: abort1.split:
; CHECK-NEXT: call void @end1() [[NOR_NUW]]
; CHECK-NEXT: unreachable
diff --git a/test/Transforms/LoopUnswitch/trivial-unswitch.ll b/test/Transforms/LoopUnswitch/trivial-unswitch.ll
new file mode 100644
index 000000000000..db3328278dae
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/trivial-unswitch.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -loop-unswitch -loop-unswitch-threshold=0 -verify-loop-info -S < %s 2>&1 | FileCheck %s
+
+; This test contains two trivial unswitch condition in one loop.
+; LoopUnswitch pass should be able to unswitch the second one
+; after unswitching the first one.
+
+
+; CHECK: br i1 %cond1, label %..split_crit_edge, label %.loop_exit.split_crit_edge
+
+; CHECK: ..split_crit_edge: ; preds = %0
+; CHECK: br label %.split
+
+; CHECK: .split: ; preds = %..split_crit_edge
+; CHECK: br i1 %cond2, label %.split..split.split_crit_edge, label %.split.loop_exit.split1_crit_edge
+
+; CHECK: .split..split.split_crit_edge: ; preds = %.split
+; CHECK: br label %.split.split
+
+; CHECK: .split.split: ; preds = %.split..split.split_crit_edge
+; CHECK: br label %loop_begin
+
+; CHECK: loop_begin: ; preds = %do_something, %.split.split
+; CHECK: br i1 true, label %continue, label %loop_exit
+
+; CHECK: continue: ; preds = %loop_begin
+; CHECK: %var_val = load i32, i32* %var
+; CHECK: br i1 true, label %do_something, label %loop_exit
+
+define i32 @test(i32* %var, i1 %cond1, i1 %cond2) {
+ br label %loop_begin
+
+loop_begin:
+ br i1 %cond1, label %continue, label %loop_exit ; first trivial condition
+
+continue:
+ %var_val = load i32, i32* %var
+ br i1 %cond2, label %do_something, label %loop_exit ; second trivial condition
+
+do_something:
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+declare void @some_func() noreturn \ No newline at end of file
diff --git a/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll b/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
index f16ee4171da9..58315a73ec13 100644
--- a/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
+++ b/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses=true | FileCheck %s
-; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -enable-interleaved-mem-accesses=true | FileCheck %s --check-prefix=FORCE-VEC
+; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 | FileCheck %s
+; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s --check-prefix=FORCE-VEC
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnueabi"
diff --git a/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
new file mode 100644
index 000000000000..65f5c4e6266b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
@@ -0,0 +1,54 @@
+; RUN: opt -S < %s -loop-vectorize -instcombine 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+;; See https://llvm.org/bugs/show_bug.cgi?id=25490
+;; Due to the data structures used, the LLVM IR was not determinisic.
+;; This test comes from the PR.
+
+;; CHECK-LABEL: @test(
+; CHECK: load <16 x i8>
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: load <16 x i8>
+; CHECK-NEXT: zext <16 x i8>
+; CHECK-NEXT: zext <16 x i8>
+define void @test(i32 %n, i8* nocapture %a, i8* nocapture %b, i8* nocapture readonly %c) {
+entry:
+ %cmp.28 = icmp eq i32 %n, 0
+ br i1 %cmp.28, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i8, i8* %c, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
+ %1 = load i8, i8* %arrayidx2, align 1
+ %conv3 = zext i8 %1 to i32
+ %mul = mul nuw nsw i32 %conv3, %conv
+ %shr.26 = lshr i32 %mul, 8
+ %conv4 = trunc i32 %shr.26 to i8
+ store i8 %conv4, i8* %arrayidx2, align 1
+ %arrayidx8 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
+ %2 = load i8, i8* %arrayidx8, align 1
+ %conv9 = zext i8 %2 to i32
+ %mul10 = mul nuw nsw i32 %conv9, %conv
+ %shr11.27 = lshr i32 %mul10, 8
+ %conv12 = trunc i32 %shr11.27 to i8
+ store i8 %conv12, i8* %arrayidx8, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
diff --git a/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
new file mode 100644
index 000000000000..a0e741a3cdbe
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -debug-only=loop-vectorize -loop-vectorize -instcombine < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnueabi"
+
+@AB = common global [1024 x i8] zeroinitializer, align 4
+@CD = common global [1024 x i8] zeroinitializer, align 4
+
+define void @test_byte_interleaved_cost(i8 %C, i8 %D) {
+entry:
+ br label %for.body
+
+; 8xi8 and 16xi8 are valid i8 vector types, so the cost of the interleaved
+; access group is 2.
+
+; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %tmp = load i8, i8* %arrayidx0, align 4
+; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: %tmp = load i8, i8* %arrayidx0, align 4
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %indvars.iv
+ %tmp = load i8, i8* %arrayidx0, align 4
+ %tmp1 = or i64 %indvars.iv, 1
+ %arrayidx1 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %tmp1
+ %tmp2 = load i8, i8* %arrayidx1, align 4
+ %add = add nsw i8 %tmp, %C
+ %mul = mul nsw i8 %tmp2, %D
+ %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %indvars.iv
+ store i8 %add, i8* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %tmp1
+ store i8 %mul, i8* %arrayidx3, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp slt i64 %indvars.iv.next, 1024
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
new file mode 100644
index 000000000000..eee310491805
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@@ -0,0 +1,243 @@
+; RUN: opt -S < %s -basicaa -loop-vectorize -force-vector-interleave=1 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; CHECK-LABEL: @add_a(
+; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: add nuw nsw <16 x i8>
+; CHECK: store <16 x i8>
+; Function Attrs: nounwind
+define void @add_a(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+entry:
+ %cmp8 = icmp sgt i32 %len, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx
+ %conv = zext i8 %0 to i32
+ %add = add nuw nsw i32 %conv, 2
+ %conv1 = trunc i32 %add to i8
+ %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+ store i8 %conv1, i8* %arrayidx3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_b(
+; CHECK: load <8 x i16>, <8 x i16>*
+; CHECK: add nuw nsw <8 x i16>
+; CHECK: store <8 x i16>
+; Function Attrs: nounwind
+define void @add_b(i16* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
+entry:
+ %cmp9 = icmp sgt i32 %len, 0
+ br i1 %cmp9, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
+ %0 = load i16, i16* %arrayidx
+ %conv8 = zext i16 %0 to i32
+ %add = add nuw nsw i32 %conv8, 2
+ %conv1 = trunc i32 %add to i16
+ %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv
+ store i16 %conv1, i16* %arrayidx3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_c(
+; CHECK: load <8 x i8>, <8 x i8>*
+; CHECK: add nuw nsw <8 x i16>
+; CHECK: store <8 x i16>
+; Function Attrs: nounwind
+define void @add_c(i8* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
+entry:
+ %cmp8 = icmp sgt i32 %len, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx
+ %conv = zext i8 %0 to i32
+ %add = add nuw nsw i32 %conv, 2
+ %conv1 = trunc i32 %add to i16
+ %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv
+ store i16 %conv1, i16* %arrayidx3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_d(
+; CHECK: load <4 x i16>
+; CHECK: add nsw <4 x i32>
+; CHECK: store <4 x i32>
+define void @add_d(i16* noalias nocapture readonly %p, i32* noalias nocapture %q, i32 %len) #0 {
+entry:
+ %cmp7 = icmp sgt i32 %len, 0
+ br i1 %cmp7, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
+ %0 = load i16, i16* %arrayidx
+ %conv = sext i16 %0 to i32
+ %add = add nsw i32 %conv, 2
+ %arrayidx2 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv
+ store i32 %add, i32* %arrayidx2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_e(
+; CHECK: load <16 x i8>
+; CHECK: shl <16 x i8>
+; CHECK: add nuw nsw <16 x i8>
+; CHECK: or <16 x i8>
+; CHECK: mul nuw nsw <16 x i8>
+; CHECK: and <16 x i8>
+; CHECK: xor <16 x i8>
+; CHECK: mul nuw nsw <16 x i8>
+; CHECK: store <16 x i8>
+define void @add_e(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
+entry:
+ %cmp.32 = icmp sgt i32 %len, 0
+ br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ %conv11 = zext i8 %arg2 to i32
+ %conv13 = zext i8 %arg1 to i32
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx
+ %conv = zext i8 %0 to i32
+ %add = shl i32 %conv, 4
+ %conv2 = add nuw nsw i32 %add, 32
+ %or = or i32 %conv, 51
+ %mul = mul nuw nsw i32 %or, 60
+ %and = and i32 %conv2, %conv13
+ %mul.masked = and i32 %mul, 252
+ %conv17 = xor i32 %mul.masked, %conv11
+ %mul18 = mul nuw nsw i32 %conv17, %and
+ %conv19 = trunc i32 %mul18 to i8
+ %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+ store i8 %conv19, i8* %arrayidx21
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_f
+; CHECK: load <8 x i16>
+; CHECK: trunc <8 x i16>
+; CHECK: shl <8 x i8>
+; CHECK: add nsw <8 x i8>
+; CHECK: or <8 x i8>
+; CHECK: mul nuw nsw <8 x i8>
+; CHECK: and <8 x i8>
+; CHECK: xor <8 x i8>
+; CHECK: mul nuw nsw <8 x i8>
+; CHECK: store <8 x i8>
+define void @add_f(i16* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
+entry:
+ %cmp.32 = icmp sgt i32 %len, 0
+ br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ %conv11 = zext i8 %arg2 to i32
+ %conv13 = zext i8 %arg1 to i32
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
+ %0 = load i16, i16* %arrayidx
+ %conv = sext i16 %0 to i32
+ %add = shl i32 %conv, 4
+ %conv2 = add nsw i32 %add, 32
+ %or = and i32 %conv, 204
+ %conv8 = or i32 %or, 51
+ %mul = mul nuw nsw i32 %conv8, 60
+ %and = and i32 %conv2, %conv13
+ %mul.masked = and i32 %mul, 252
+ %conv17 = xor i32 %mul.masked, %conv11
+ %mul18 = mul nuw nsw i32 %conv17, %and
+ %conv19 = trunc i32 %mul18 to i8
+ %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+ store i8 %conv19, i8* %arrayidx21
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_g
+; CHECK: load <16 x i8>
+; CHECK: xor <16 x i8>
+; CHECK: icmp ult <16 x i8>
+; CHECK: select <16 x i1> {{.*}}, <16 x i8>
+; CHECK: store <16 x i8>
+define void @add_g(i8* noalias nocapture readonly %p, i8* noalias nocapture readonly %q, i8* noalias nocapture %r, i8 %arg1, i32 %len) #0 {
+ %1 = icmp sgt i32 %len, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0
+ %2 = sext i8 %arg1 to i64
+ br label %3
+
+._crit_edge: ; preds = %3, %0
+ ret void
+
+; <label>:3 ; preds = %3, %.lr.ph
+ %indvars.iv = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next, %3 ]
+ %x4 = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+ %x5 = load i8, i8* %x4
+ %x7 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+ %x8 = load i8, i8* %x7
+ %x9 = zext i8 %x5 to i32
+ %x10 = xor i32 %x9, 255
+ %x11 = icmp ult i32 %x10, 24
+ %x12 = select i1 %x11, i32 %x10, i32 24
+ %x13 = trunc i32 %x12 to i8
+ store i8 %x13, i8* %x4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %._crit_edge, label %3
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll b/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
new file mode 100644
index 000000000000..be08a63b212c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
@@ -0,0 +1,191 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: @reduction_i8
+;
+; char reduction_i8(char *a, char *b, int n) {
+; char sum = 0;
+; for (int i = 0; i < n; ++i)
+; sum += (a[i] + b[i]);
+; return sum;
+; }
+;
+; CHECK: vector.body:
+; CHECK: phi <16 x i8>
+; CHECK: load <16 x i8>
+; CHECK: load <16 x i8>
+; CHECK: add <16 x i8>
+; CHECK: add <16 x i8>
+;
+; CHECK: middle.block:
+; CHECK: shufflevector <16 x i8>
+; CHECK: add <16 x i8>
+; CHECK: shufflevector <16 x i8>
+; CHECK: add <16 x i8>
+; CHECK: shufflevector <16 x i8>
+; CHECK: add <16 x i8>
+; CHECK: shufflevector <16 x i8>
+; CHECK: add <16 x i8>
+; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <16 x i8>
+; CHECK: zext i8 [[Rdx]] to i32
+;
+define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
+entry:
+ %cmp.12 = icmp sgt i32 %n, 0
+ br i1 %cmp.12, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:
+ %add5.lcssa = phi i32 [ %add5, %for.body ]
+ %conv6 = trunc i32 %add5.lcssa to i8
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %sum.0.lcssa = phi i8 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
+ ret i8 %sum.0.lcssa
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %sum.013 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
+ %1 = load i8, i8* %arrayidx2, align 1
+ %conv3 = zext i8 %1 to i32
+ %conv4 = and i32 %sum.013, 255
+ %add = add nuw nsw i32 %conv, %conv4
+ %add5 = add nuw nsw i32 %add, %conv3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
+}
+
+; CHECK-LABEL: @reduction_i16_1
+;
+; short reduction_i16_1(short *a, short *b, int n) {
+; short sum = 0;
+; for (int i = 0; i < n; ++i)
+; sum += (a[i] + b[i]);
+; return sum;
+; }
+;
+; CHECK: vector.body:
+; CHECK: phi <8 x i16>
+; CHECK: load <8 x i16>
+; CHECK: load <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: add <8 x i16>
+;
+; CHECK: middle.block:
+; CHECK: shufflevector <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: shufflevector <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: shufflevector <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <8 x i16>
+; CHECK: zext i16 [[Rdx]] to i32
+;
+define i16 @reduction_i16_1(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %n) {
+entry:
+ %cmp.16 = icmp sgt i32 %n, 0
+ br i1 %cmp.16, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:
+ %add5.lcssa = phi i32 [ %add5, %for.body ]
+ %conv6 = trunc i32 %add5.lcssa to i16
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %sum.0.lcssa = phi i16 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
+ ret i16 %sum.0.lcssa
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %sum.017 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 %indvars.iv
+ %0 = load i16, i16* %arrayidx, align 2
+ %conv.14 = zext i16 %0 to i32
+ %arrayidx2 = getelementptr inbounds i16, i16* %b, i64 %indvars.iv
+ %1 = load i16, i16* %arrayidx2, align 2
+ %conv3.15 = zext i16 %1 to i32
+ %conv4.13 = and i32 %sum.017, 65535
+ %add = add nuw nsw i32 %conv.14, %conv4.13
+ %add5 = add nuw nsw i32 %add, %conv3.15
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
+}
+
+; CHECK-LABEL: @reduction_i16_2
+;
+; short reduction_i16_2(char *a, char *b, int n) {
+; short sum = 0;
+; for (int i = 0; i < n; ++i)
+; sum += (a[i] + b[i]);
+; return sum;
+; }
+;
+; CHECK: vector.body:
+; CHECK: phi <8 x i16>
+; CHECK: [[Ld1:%[a-zA-Z0-9.]+]] = load <8 x i8>
+; CHECK: zext <8 x i8> [[Ld1]] to <8 x i16>
+; CHECK: [[Ld2:%[a-zA-Z0-9.]+]] = load <8 x i8>
+; CHECK: zext <8 x i8> [[Ld2]] to <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: add <8 x i16>
+;
+; CHECK: middle.block:
+; CHECK: shufflevector <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: shufflevector <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: shufflevector <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <8 x i16>
+; CHECK: zext i16 [[Rdx]] to i32
+;
+define i16 @reduction_i16_2(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
+entry:
+ %cmp.14 = icmp sgt i32 %n, 0
+ br i1 %cmp.14, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:
+ %add5.lcssa = phi i32 [ %add5, %for.body ]
+ %conv6 = trunc i32 %add5.lcssa to i16
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %sum.0.lcssa = phi i16 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
+ ret i16 %sum.0.lcssa
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %sum.015 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
+ %1 = load i8, i8* %arrayidx2, align 1
+ %conv3 = zext i8 %1 to i32
+ %conv4.13 = and i32 %sum.015, 65535
+ %add = add nuw nsw i32 %conv, %conv4.13
+ %add5 = add nuw nsw i32 %add, %conv3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
+}
diff --git a/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll b/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
new file mode 100644
index 000000000000..de3626b57d83
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -debug-only=loop-vectorize -loop-vectorize -instcombine < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "armv8--linux-gnueabihf"
+
+@AB = common global [1024 x i8] zeroinitializer, align 4
+@CD = common global [1024 x i8] zeroinitializer, align 4
+
+define void @test_byte_interleaved_cost(i8 %C, i8 %D) {
+entry:
+ br label %for.body
+
+; 8xi8 and 16xi8 are valid i8 vector types, so the cost of the interleaved
+; access group is 2.
+
+; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %tmp = load i8, i8* %arrayidx0, align 4
+; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: %tmp = load i8, i8* %arrayidx0, align 4
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %indvars.iv
+ %tmp = load i8, i8* %arrayidx0, align 4
+ %tmp1 = or i64 %indvars.iv, 1
+ %arrayidx1 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %tmp1
+ %tmp2 = load i8, i8* %arrayidx1, align 4
+ %add = add nsw i8 %tmp, %C
+ %mul = mul nsw i8 %tmp2, %D
+ %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %indvars.iv
+ store i8 %add, i8* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %tmp1
+ store i8 %mul, i8* %arrayidx3, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp slt i64 %indvars.iv.next, 1024
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/ARM/vector_cast.ll b/test/Transforms/LoopVectorize/ARM/vector_cast.ll
new file mode 100644
index 000000000000..78af9960e064
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/vector_cast.ll
@@ -0,0 +1,37 @@
+; RUN: opt -loop-vectorize -tbaa -S -mattr=+neon < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabi"
+
+; This requires the loop vectorizer to create an interleaved access group
+; for the stores to the struct. Here we need to perform a bitcast from a vector
+; of pointers to a vector i32s.
+
+%class.A = type { i8*, i32 }
+
+; CHECK-LABEL: test0
+define void @test0(%class.A* %StartPtr, %class.A* %APtr) {
+entry:
+ br label %for.body.i
+
+for.body.i:
+ %addr = phi %class.A* [ %StartPtr, %entry ], [ %incdec.ptr.i, %for.body.i ]
+ %Data.i.i = getelementptr inbounds %class.A, %class.A* %addr, i32 0, i32 0
+ store i8* null, i8** %Data.i.i, align 4, !tbaa !8
+ %Length.i.i = getelementptr inbounds %class.A, %class.A* %addr, i32 0, i32 1
+ store i32 0, i32* %Length.i.i, align 4, !tbaa !11
+ %incdec.ptr.i = getelementptr inbounds %class.A, %class.A* %addr, i32 1
+ %cmp.i = icmp eq %class.A* %incdec.ptr.i, %APtr
+ br i1 %cmp.i, label %exit, label %for.body.i
+
+exit:
+ ret void
+}
+
+!5 = !{!"any pointer", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!9, !5, i64 0}
+!9 = !{!5, i64 0, !10, i64 4}
+!10 = !{!"int", !6, i64 0}
+!11 = !{!9, !10, i64 4}
diff --git a/test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll b/test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll
new file mode 100644
index 000000000000..3491e08bbaa2
--- /dev/null
+++ b/test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -basicaa -loop-vectorize < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %c) #0 {
+entry:
+ br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: fmul <4 x double> %{{[^,]+}}, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEXT: fmul <4 x double> %{{[^,]+}}, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+ %0 = load double, double* %arrayidx, align 8
+ %mul = fmul double %0, 2.000000e+00
+ %mul3 = fmul double %0, %mul
+ %arrayidx5 = getelementptr inbounds double, double* %c, i64 %indvars.iv
+ %1 = load double, double* %arrayidx5, align 8
+ %mul6 = fmul double %1, 3.000000e+00
+ %mul9 = fmul double %1, %mul6
+ %add = fadd double %mul3, %mul9
+ %mul12 = fmul double %0, 4.000000e+00
+ %mul15 = fmul double %mul12, %1
+ %add16 = fadd double %mul15, %add
+ %add17 = fadd double %add16, 1.000000e+00
+ %arrayidx19 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add17, double* %arrayidx19, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+
diff --git a/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll b/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
new file mode 100644
index 000000000000..0cb845520246
--- /dev/null
+++ b/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -basicaa -loop-vectorize < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+entry:
+ br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: <2 x double>
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %0
+ %1 = load double, double* %arrayidx, align 8
+ %add = fadd double %1, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr8" }
+
diff --git a/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index 8c375ccfd315..abe7d6de3f35 100644
--- a/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -499,4 +499,146 @@ for.end: ; preds = %for.cond
ret void
}
+; void foo7 (double * __restrict__ out, double ** __restrict__ in,
+; bool * __restrict__ trigger, unsigned size) {
+;
+; for (unsigned i=0; i<size; i++)
+; if (trigger[i] && (in[i] != 0))
+; out[i] = (double) 0.5;
+; }
+
+;AVX512-LABEL: @foo7
+;AVX512: call <8 x double*> @llvm.masked.load.v8p0f64(<8 x double*>*
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+define void @foo7(double* noalias %out, double** noalias %in, i8* noalias %trigger, i32 %size) #0 {
+entry:
+ %out.addr = alloca double*, align 8
+ %in.addr = alloca double**, align 8
+ %trigger.addr = alloca i8*, align 8
+ %size.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ store double* %out, double** %out.addr, align 8
+ store double** %in, double*** %in.addr, align 8
+ store i8* %trigger, i8** %trigger.addr, align 8
+ store i32 %size, i32* %size.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %size.addr, align 4
+ %cmp = icmp ult i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %idxprom = zext i32 %2 to i64
+ %3 = load i8*, i8** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom
+ %4 = load i8, i8* %arrayidx, align 1
+ %tobool = trunc i8 %4 to i1
+ br i1 %tobool, label %land.lhs.true, label %if.end
+
+land.lhs.true: ; preds = %for.body
+ %5 = load i32, i32* %i, align 4
+ %idxprom1 = zext i32 %5 to i64
+ %6 = load double**, double*** %in.addr, align 8
+ %arrayidx2 = getelementptr inbounds double*, double** %6, i64 %idxprom1
+ %7 = load double*, double** %arrayidx2, align 8
+ %cmp3 = icmp ne double* %7, null
+ br i1 %cmp3, label %if.then, label %if.end
+
+if.then: ; preds = %land.lhs.true
+ %8 = load i32, i32* %i, align 4
+ %idxprom4 = zext i32 %8 to i64
+ %9 = load double*, double** %out.addr, align 8
+ %arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4
+ store double 5.000000e-01, double* %arrayidx5, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %land.lhs.true, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %10 = load i32, i32* %i, align 4
+ %inc = add i32 %10, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+;typedef int (*fp)();
+;void foo8 (double* __restrict__ out, fp* __restrict__ in, bool * __restrict__ trigger, unsigned size) {
+;
+; for (unsigned i=0; i<size; i++)
+; if (trigger[i] && (in[i] != 0))
+; out[i] = (double) 0.5;
+;}
+
+;AVX512-LABEL: @foo8
+;AVX512: call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f(<8 x i32 ()*>* %
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+define void @foo8(double* noalias %out, i32 ()** noalias %in, i8* noalias %trigger, i32 %size) #0 {
+entry:
+ %out.addr = alloca double*, align 8
+ %in.addr = alloca i32 ()**, align 8
+ %trigger.addr = alloca i8*, align 8
+ %size.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ store double* %out, double** %out.addr, align 8
+ store i32 ()** %in, i32 ()*** %in.addr, align 8
+ store i8* %trigger, i8** %trigger.addr, align 8
+ store i32 %size, i32* %size.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %size.addr, align 4
+ %cmp = icmp ult i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %idxprom = zext i32 %2 to i64
+ %3 = load i8*, i8** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom
+ %4 = load i8, i8* %arrayidx, align 1
+ %tobool = trunc i8 %4 to i1
+ br i1 %tobool, label %land.lhs.true, label %if.end
+
+land.lhs.true: ; preds = %for.body
+ %5 = load i32, i32* %i, align 4
+ %idxprom1 = zext i32 %5 to i64
+ %6 = load i32 ()**, i32 ()*** %in.addr, align 8
+ %arrayidx2 = getelementptr inbounds i32 ()*, i32 ()** %6, i64 %idxprom1
+ %7 = load i32 ()*, i32 ()** %arrayidx2, align 8
+ %cmp3 = icmp ne i32 ()* %7, null
+ br i1 %cmp3, label %if.then, label %if.end
+
+if.then: ; preds = %land.lhs.true
+ %8 = load i32, i32* %i, align 4
+ %idxprom4 = zext i32 %8 to i64
+ %9 = load double*, double** %out.addr, align 8
+ %arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4
+ store double 5.000000e-01, double* %arrayidx5, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %land.lhs.true, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %10 = load i32, i32* %i, align 4
+ %inc = add i32 %10, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
index ba8e11e58749..74c0c16086fe 100644
--- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -60,7 +60,7 @@ for.body: ; preds = %for.body, %entry
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 32
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
for.end: ; preds = %for.body
@@ -111,7 +111,7 @@ for.body: ; preds = %for.body, %entry
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 32
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
@@ -162,7 +162,7 @@ for.body: ; preds = %for.body, %entry
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 32
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
for.end: ; preds = %for.body
diff --git a/test/Transforms/LoopVectorize/X86/no_fpmath.ll b/test/Transforms/LoopVectorize/X86/no_fpmath.ll
new file mode 100644
index 000000000000..0bb78ce177fe
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/no_fpmath.ll
@@ -0,0 +1,104 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; CHECK: remark: no_fpmath.c:6:11: loop not vectorized: cannot prove it is safe to reorder floating-point operations
+; CHECK: remark: no_fpmath.c:6:14: loop not vectorized:
+; CHECK: remark: no_fpmath.c:17:14: vectorized loop (vectorization width: 2, interleaved count: 2)
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Function Attrs: nounwind readonly ssp uwtable
+define double @cond_sum(i32* nocapture readonly %v, i32 %n) #0 !dbg !4 {
+entry:
+ %cmp.7 = icmp sgt i32 %n, 0, !dbg !3
+ br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !8
+
+for.body.preheader: ; preds = %entry
+ br label %for.body, !dbg !9
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ %add.lcssa = phi double [ %add, %for.body ]
+ br label %for.cond.cleanup, !dbg !10
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ %a.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ]
+ ret double %a.0.lcssa, !dbg !10
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !9
+ %0 = load i32, i32* %arrayidx, align 4, !dbg !9, !tbaa !11
+ %cmp1 = icmp eq i32 %0, 0, !dbg !15
+ %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !9
+ %add = fadd double %a.08, %cond, !dbg !16
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !8
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !8
+ %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !8
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8, !llvm.loop !17
+}
+
+; Function Attrs: nounwind readonly ssp uwtable
+define double @cond_sum_loop_hint(i32* nocapture readonly %v, i32 %n) #0 !dbg !20 {
+entry:
+ %cmp.7 = icmp sgt i32 %n, 0, !dbg !19
+ br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !21
+
+for.body.preheader: ; preds = %entry
+ br label %for.body, !dbg !22
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ %add.lcssa = phi double [ %add, %for.body ]
+ br label %for.cond.cleanup, !dbg !23
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ %a.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ]
+ ret double %a.0.lcssa, !dbg !23
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !22
+ %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !11
+ %cmp1 = icmp eq i32 %0, 0, !dbg !24
+ %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !22
+ %add = fadd double %a.08, %cond, !dbg !25
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !21
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21
+ %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !21
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !21, !llvm.loop !26
+}
+
+attributes #0 = { nounwind }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{i32 1, !"PIC Level", i32 2}
+!2 = !{!"clang version 3.7.0"}
+!3 = !DILocation(line: 5, column: 20, scope: !4)
+!4 = distinct !DISubprogram(name: "cond_sum", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !7)
+!5 = !DIFile(filename: "no_fpmath.c", directory: "")
+!6 = !DISubroutineType(types: !7)
+!7 = !{}
+!8 = !DILocation(line: 5, column: 3, scope: !4)
+!9 = !DILocation(line: 6, column: 14, scope: !4)
+!10 = !DILocation(line: 9, column: 3, scope: !4)
+!11 = !{!12, !12, i64 0}
+!12 = !{!"int", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C/C++ TBAA"}
+!15 = !DILocation(line: 6, column: 19, scope: !4)
+!16 = !DILocation(line: 6, column: 11, scope: !4)
+!17 = distinct !{!17, !18}
+!18 = !{!"llvm.loop.unroll.disable"}
+!19 = !DILocation(line: 16, column: 20, scope: !20)
+!20 = distinct !DISubprogram(name: "cond_sum_loop_hint", scope: !5, file: !5, line: 12, type: !6, isLocal: false, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, variables: !7)
+!21 = !DILocation(line: 16, column: 3, scope: !20)
+!22 = !DILocation(line: 17, column: 14, scope: !20)
+!23 = !DILocation(line: 20, column: 3, scope: !20)
+!24 = !DILocation(line: 17, column: 19, scope: !20)
+!25 = !DILocation(line: 17, column: 11, scope: !20)
+!26 = distinct !{!26, !27, !18}
+!27 = !{!"llvm.loop.vectorize.enable", i1 true}
diff --git a/test/Transforms/LoopVectorize/X86/powof2div.ll b/test/Transforms/LoopVectorize/X86/powof2div.ll
index 6bc738a7d143..3e4bef6d4d07 100644
--- a/test/Transforms/LoopVectorize/X86/powof2div.ll
+++ b/test/Transforms/LoopVectorize/X86/powof2div.ll
@@ -6,10 +6,10 @@ target triple = "x86_64-unknown-linux-gnu"
@Foo = common global %struct.anon zeroinitializer, align 4
-;CHECK-LABEL: @foo(
-;CHECK: load <4 x i32>, <4 x i32>*
-;CHECK: sdiv <4 x i32>
-;CHECK: store <4 x i32>
+; CHECK-LABEL: @foo(
+; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: sdiv <4 x i32>
+; CHECK: store <4 x i32>
define void @foo(){
entry:
diff --git a/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
index 3741b95d9859..6393002d5071 100644
--- a/test/Transforms/LoopVectorize/X86/reduction-crash.ll
+++ b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -mcpu=prescott < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -mcpu=prescott -disable-basicaa < %s | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
target triple = "i386-apple-darwin"
diff --git a/test/Transforms/LoopVectorize/X86/reg-usage.ll b/test/Transforms/LoopVectorize/X86/reg-usage.ll
new file mode 100644
index 000000000000..47a6e1029eda
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/reg-usage.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = global [1024 x i8] zeroinitializer, align 16
+@b = global [1024 x i8] zeroinitializer, align 16
+
+define i32 @foo() {
+; This function has a loop of SAD pattern. Here we check when VF = 16 the
+; register usage doesn't exceed 16.
+;
+; CHECK-LABEL: foo
+; CHECK: LV(REG): VF = 4
+; CHECK-NEXT: LV(REG): Found max usage: 4
+; CHECK: LV(REG): VF = 8
+; CHECK-NEXT: LV(REG): Found max usage: 7
+; CHECK: LV(REG): VF = 16
+; CHECK-NEXT: LV(REG): Found max usage: 13
+
+entry:
+ br label %for.body
+
+for.cond.cleanup:
+ %add.lcssa = phi i32 [ %add, %for.body ]
+ ret i32 %add.lcssa
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
+ %1 = load i8, i8* %arrayidx2, align 1
+ %conv3 = zext i8 %1 to i32
+ %sub = sub nsw i32 %conv, %conv3
+ %ispos = icmp sgt i32 %sub, -1
+ %neg = sub nsw i32 0, %sub
+ %2 = select i1 %ispos, i32 %sub, i32 %neg
+ %add = add nsw i32 %2, %s.015
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+define i64 @bar(i64* nocapture %a) {
+; CHECK-LABEL: bar
+; CHECK: LV(REG): VF = 2
+; CHECK: LV(REG): Found max usage: 4
+;
+entry:
+ br label %for.body
+
+for.cond.cleanup:
+ %add2.lcssa = phi i64 [ %add2, %for.body ]
+ ret i64 %add2.lcssa
+
+for.body:
+ %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
+ %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
+ %0 = load i64, i64* %arrayidx, align 8
+ %add = add nsw i64 %0, %i.012
+ store i64 %add, i64* %arrayidx, align 8
+ %add2 = add nsw i64 %add, %s.011
+ %inc = add nuw nsw i64 %i.012, 1
+ %exitcond = icmp eq i64 %inc, 1024
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
diff --git a/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll b/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
new file mode 100644
index 000000000000..fe9d59efc8b3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
@@ -0,0 +1,46 @@
+; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -mcpu=corei7-avx -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = global [1000 x i8] zeroinitializer, align 16
+@b = global [1000 x i8] zeroinitializer, align 16
+@c = global [1000 x i8] zeroinitializer, align 16
+@u = global [1000 x i32] zeroinitializer, align 16
+@v = global [1000 x i32] zeroinitializer, align 16
+@w = global [1000 x i32] zeroinitializer, align 16
+
+; Tests that the vectorization factor is determined by the smallest instead of
+; widest type in the loop for maximum bandwidth when
+; -vectorizer-maximize-bandwidth is indicated.
+;
+; CHECK-label: foo
+; CHECK: LV: Selecting VF: 32.
+define void @foo() {
+entry:
+ br label %for.body
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds [1000 x i8], [1000 x i8]* @b, i64 0, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx, align 1
+ %arrayidx2 = getelementptr inbounds [1000 x i8], [1000 x i8]* @c, i64 0, i64 %indvars.iv
+ %1 = load i8, i8* %arrayidx2, align 1
+ %add = add i8 %1, %0
+ %arrayidx6 = getelementptr inbounds [1000 x i8], [1000 x i8]* @a, i64 0, i64 %indvars.iv
+ store i8 %add, i8* %arrayidx6, align 1
+ %arrayidx8 = getelementptr inbounds [1000 x i32], [1000 x i32]* @v, i64 0, i64 %indvars.iv
+ %2 = load i32, i32* %arrayidx8, align 4
+ %arrayidx10 = getelementptr inbounds [1000 x i32], [1000 x i32]* @w, i64 0, i64 %indvars.iv
+ %3 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %3, %2
+ %arrayidx13 = getelementptr inbounds [1000 x i32], [1000 x i32]* @u, i64 0, i64 %indvars.iv
+ store i32 %add11, i32* %arrayidx13, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
diff --git a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
index 6cd3c9c3bc01..cca829b9457e 100644
--- a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
+++ b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
@@ -17,7 +17,7 @@ target triple = "x86_64-apple-macosx10.8.0"
; widest vector count.
;
; CHECK: test_consecutive_store
-; CHECK: The Widest type: 64 bits
+; CHECK: The Smallest and Widest types: 64 / 64 bits.
define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 {
%4 = load %0*, %0** %2, align 8
%5 = icmp eq %0** %0, %1
@@ -51,7 +51,7 @@ define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwt
; p[i][y] = (int*) (1 + q[i]);
; }
; CHECK: test_nonconsecutive_store
-; CHECK: The Widest type: 16 bits
+; CHECK: The Smallest and Widest types: 16 / 16 bits.
define void @test_nonconsecutive_store() nounwind ssp uwtable {
br label %1
@@ -93,7 +93,7 @@ define void @test_nonconsecutive_store() nounwind ssp uwtable {
;; Now we check the same rules for loads. We should take consecutive loads of
;; pointer types into account.
; CHECK: test_consecutive_ptr_load
-; CHECK: The Widest type: 64 bits
+; CHECK: The Smallest and Widest types: 8 / 64 bits.
define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
br label %1
@@ -117,7 +117,7 @@ define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
;; However, we should not take unconsecutive loads of pointers into account.
; CHECK: test_nonconsecutive_ptr_load
-; CHECK: The Widest type: 16 bits
+; CHECK: LV: The Smallest and Widest types: 16 / 16 bits.
define void @test_nonconsecutive_ptr_load() nounwind ssp uwtable {
br label %1
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 65cabb05f2fb..02fab4447341 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -25,7 +25,7 @@
; File, line, and column should match those specified in the metadata
; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info
-; CHECK: remark: source.cpp:13:5: loop not vectorized: vector width and interleave count are explicitly set to 1
+; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
; CHECK: remark: source.cpp:19:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info
; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization
@@ -45,7 +45,7 @@
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
+define void @_Z4testPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
entry:
%cmp10 = icmp sgt i32 %Length, 0, !dbg !12
br i1 %cmp10, label %for.body, label %for.end, !dbg !12, !llvm.loop !14
@@ -67,7 +67,7 @@ for.end: ; preds = %for.body, %entry
}
; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 {
+define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 !dbg !7 {
entry:
%cmp4 = icmp sgt i32 %Length, 0, !dbg !25
br i1 %cmp4, label %for.body, label %for.end, !dbg !25, !llvm.loop !27
@@ -87,7 +87,7 @@ for.end: ; preds = %for.body, %entry
}
; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 {
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 !dbg !8 {
entry:
%cmp9 = icmp sgt i32 %Length, 0, !dbg !32
br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32, !llvm.loop !34
@@ -122,15 +122,15 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "source.cpp", directory: ".")
!2 = !{}
!3 = !{!4, !7, !8}
-!4 = !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @_Z4testPii, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "source.cpp", directory: ".")
!6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @_Z13test_disabledPii, variables: !2)
-!8 = !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !1, scope: !5, type: !6, function: void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, variables: !2)
+!7 = distinct !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !1, scope: !5, type: !6, variables: !2)
!9 = !{i32 2, !"Dwarf Version", i32 2}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.5.0"}
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
new file mode 100644
index 000000000000..df8c668f1262
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
@@ -0,0 +1,113 @@
+; RUN: opt < %s -loop-vectorize -pass-remarks-analysis='loop-vectorize' -mtriple=x86_64-unknown-linux -S 2>&1 | FileCheck %s
+
+; Verify analysis remarks are generated when interleaving is not beneficial.
+; CHECK: remark: vectorization-remarks-profitable.c:5:17: the cost-model indicates that vectorization is not beneficial
+; CHECK: remark: vectorization-remarks-profitable.c:5:17: the cost-model indicates that interleaving is not beneficial and is explicitly disabled or interleave count is set to 1
+; CHECK: remark: vectorization-remarks-profitable.c:12:17: the cost-model indicates that vectorization is not beneficial
+; CHECK: remark: vectorization-remarks-profitable.c:12:17: the cost-model indicates that interleaving is not beneficial
+
+; First loop.
+; #pragma clang loop interleave(disable) unroll(disable)
+; for(int i = 0; i < n; i++) {
+; out[i] = *in[i];
+; }
+
+; Second loop.
+; #pragma clang loop unroll(disable)
+; for(int i = 0; i < n; i++) {
+; out[i] = *in[i];
+; }
+
+; ModuleID = 'vectorization-remarks-profitable.ll'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Function Attrs: nounwind uwtable
+define void @do_not_interleave(float** noalias nocapture readonly %in, float* noalias nocapture %out, i32 %size) #0 !dbg !4 {
+entry:
+ %cmp.4 = icmp eq i32 %size, 0, !dbg !10
+ br i1 %cmp.4, label %for.end, label %for.body.preheader, !dbg !11
+
+for.body.preheader: ; preds = %entry
+ br label %for.body, !dbg !12
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds float*, float** %in, i64 %indvars.iv, !dbg !12
+ %0 = bitcast float** %arrayidx to i32**, !dbg !12
+ %1 = load i32*, i32** %0, align 8, !dbg !12
+ %2 = load i32, i32* %1, align 4, !dbg !13
+ %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !14
+ %3 = bitcast float* %arrayidx2 to i32*, !dbg !15
+ store i32 %2, i32* %3, align 4, !dbg !15
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !11
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !11
+ %exitcond = icmp eq i32 %lftr.wideiv, %size, !dbg !11
+ br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !11, !llvm.loop !16
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end, !dbg !19
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void, !dbg !19
+}
+
+; Function Attrs: nounwind uwtable
+define void @interleave_not_profitable(float** noalias nocapture readonly %in, float* noalias nocapture %out, i32 %size) #0 !dbg !6 {
+entry:
+ %cmp.4 = icmp eq i32 %size, 0, !dbg !20
+ br i1 %cmp.4, label %for.end, label %for.body, !dbg !21
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float*, float** %in, i64 %indvars.iv, !dbg !22
+ %0 = bitcast float** %arrayidx to i32**, !dbg !22
+ %1 = load i32*, i32** %0, align 8, !dbg !22
+ %2 = load i32, i32* %1, align 4, !dbg !23
+ %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !24
+ %3 = bitcast float* %arrayidx2 to i32*, !dbg !25
+ store i32 %2, i32* %3, align 4, !dbg !25
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !21
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21
+ %exitcond = icmp eq i32 %lftr.wideiv, %size, !dbg !21
+ br i1 %exitcond, label %for.end, label %for.body, !dbg !21, !llvm.loop !26
+
+for.end: ; preds = %for.body, %entry
+ ret void, !dbg !27
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250016)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "vectorization-remarks-profitable.c", directory: "")
+!2 = !{}
+!3 = !{!4, !6}
+!4 = distinct !DISubprogram(name: "do_not_interleave", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !2)
+!6 = distinct !DISubprogram(name: "interleave_not_profitable", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 250016)"}
+!10 = !DILocation(line: 4, column: 23, scope: !4)
+!11 = !DILocation(line: 4, column: 3, scope: !4)
+!12 = !DILocation(line: 5, column: 17, scope: !4)
+!13 = !DILocation(line: 5, column: 16, scope: !4)
+!14 = !DILocation(line: 5, column: 7, scope: !4)
+!15 = !DILocation(line: 5, column: 14, scope: !4)
+!16 = distinct !{!16, !17, !18}
+!17 = !{!"llvm.loop.interleave.count", i32 1}
+!18 = !{!"llvm.loop.unroll.disable"}
+!19 = !DILocation(line: 6, column: 1, scope: !4)
+!20 = !DILocation(line: 11, column: 23, scope: !6)
+!21 = !DILocation(line: 11, column: 3, scope: !6)
+!22 = !DILocation(line: 12, column: 17, scope: !6)
+!23 = !DILocation(line: 12, column: 16, scope: !6)
+!24 = !DILocation(line: 12, column: 7, scope: !6)
+!25 = !DILocation(line: 12, column: 14, scope: !6)
+!26 = distinct !{!26, !18}
+!27 = !DILocation(line: 13, column: 1, scope: !6)
+
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index 8640950be32e..77a405ebb434 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -9,13 +9,13 @@
; DEBUG-OUTPUT-NOT: .loc
; DEBUG-OUTPUT-NOT: {{.*}}.debug_info
-; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vector width and interleave count are explicitly set to 1
; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization width: 4, interleaved count: 1)
-; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved by 4 (vectorization not beneficial)
+; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved loop (interleaved count: 4)
+; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-define i32 @foo(i32 %n) #0 {
+define i32 @foo(i32 %n) #0 !dbg !4 {
entry:
%diff = alloca i32, align 4
%cb = alloca [16 x i8], align 16
@@ -52,7 +52,7 @@ declare void @ibar(i32*) #1
!1 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/LoopVectorize/conditional-assignment.ll b/test/Transforms/LoopVectorize/conditional-assignment.ll
index f41f08df07a6..8d820e277b26 100644
--- a/test/Transforms/LoopVectorize/conditional-assignment.ll
+++ b/test/Transforms/LoopVectorize/conditional-assignment.ll
@@ -6,7 +6,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
; Function Attrs: nounwind ssp uwtable
-define void @conditional_store(i32* noalias nocapture %indices) #0 {
+define void @conditional_store(i32* noalias nocapture %indices) #0 !dbg !4 {
entry:
br label %for.body, !dbg !10
@@ -36,11 +36,11 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "source.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "conditional_store", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*)* @conditional_store, variables: !2)
+!4 = distinct !DISubprogram(name: "conditional_store", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "source.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/control-flow.ll b/test/Transforms/LoopVectorize/control-flow.ll
index c56f9122e462..a2fc69a6e907 100644
--- a/test/Transforms/LoopVectorize/control-flow.ll
+++ b/test/Transforms/LoopVectorize/control-flow.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' 2>&1 | FileCheck %s
; C/C++ code for control flow test
; int test(int *A, int Length) {
@@ -20,7 +20,7 @@
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind optsize ssp uwtable
-define i32 @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
+define i32 @_Z4testPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
entry:
%cmp8 = icmp sgt i32 %Length, 0, !dbg !10
br i1 %cmp8, label %for.body.preheader, label %end, !dbg !10
@@ -55,11 +55,11 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "source.cpp", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 (i32*, i32)* @_Z4testPii, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "source.cpp", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
index c7440f84b2c9..f68b6865b072 100644
--- a/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
@C = global [1024 x i32] zeroinitializer, align 16
; CHECK-LABEL: @test(
-define i32 @test() #0 {
+define i32 @test() #0 !dbg !3 {
entry:
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !9, metadata !DIExpression()), !dbg !18
br label %for.body, !dbg !18
@@ -44,16 +44,16 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!26}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang", isOptimized: true, emissionKind: 0, file: !25, enums: !1, retainedTypes: !1, subprograms: !2, globals: !11)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang", isOptimized: true, emissionKind: 0, file: !25, enums: !1, retainedTypes: !1, subprograms: !2, globals: !11)
!1 = !{}
!2 = !{!3}
-!3 = !DISubprogram(name: "test", linkageName: "test", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !25, scope: !4, type: !5, function: i32 ()* @test, variables: !8)
+!3 = distinct !DISubprogram(name: "test", linkageName: "test", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !25, scope: !4, type: !5, variables: !8)
!4 = !DIFile(filename: "test", directory: "/path/to/somewhere")
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !{!9}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 6, scope: !10, file: !4, type: !7)
+!9 = !DILocalVariable(name: "i", line: 6, scope: !10, file: !4, type: !7)
!10 = distinct !DILexicalBlock(line: 6, column: 0, file: !25, scope: !3)
!11 = !{!12, !16, !17}
!12 = !DIGlobalVariable(name: "A", line: 1, isLocal: false, isDefinition: true, scope: null, file: !4, type: !13, variable: [1024 x i32]* @A)
diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll
index e691afdd6933..0214f1c4847c 100644
--- a/test/Transforms/LoopVectorize/debugloc.ll
+++ b/test/Transforms/LoopVectorize/debugloc.ll
@@ -12,12 +12,12 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK: load <2 x i32>, <2 x i32>* {{.*}}, !dbg ![[LOC2]]
; CHECK: add <2 x i32> {{.*}}, !dbg ![[LOC2]]
; CHECK: add i64 %index, 2, !dbg ![[LOC]]
-; CHECK: icmp eq i64 %index.next, %end.idx.rnd.down, !dbg ![[LOC]]
+; CHECK: icmp eq i64 %index.next, %n.vec, !dbg ![[LOC]]
; CHECK: middle.block
-; CHECK: add <2 x i32> %rdx.vec.exit.phi, %rdx.shuf, !dbg ![[LOC2]]
+; CHECK: add <2 x i32> %{{.*}}, %rdx.shuf, !dbg ![[LOC2]]
; CHECK: extractelement <2 x i32> %bin.rdx, i32 0, !dbg ![[LOC2]]
-define i32 @f(i32* nocapture %a, i32 %size) #0 {
+define i32 @f(i32* nocapture %a, i32 %size) #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !13, metadata !DIExpression()), !dbg !19
tail call void @llvm.dbg.value(metadata i32 %size, i64 0, metadata !14, metadata !DIExpression()), !dbg !19
@@ -63,11 +63,11 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!18, !27}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 185038) (llvm/trunk 185097)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 185038) (llvm/trunk 185097)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "-", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, function: i32 (i32*, i32)* @f, variables: !12)
+!4 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !12)
!5 = !DIFile(filename: "<stdin>", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
!6 = !DIFile(filename: "<stdin>", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
!7 = !DISubroutineType(types: !8)
@@ -76,10 +76,10 @@ attributes #1 = { nounwind readnone }
!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
!12 = !{!13, !14, !15, !16}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !4, file: !6, type: !10)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "size", line: 3, arg: 2, scope: !4, file: !6, type: !11)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "sum", line: 4, scope: !4, file: !6, type: !11)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 5, scope: !17, file: !6, type: !11)
+!13 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !4, file: !6, type: !10)
+!14 = !DILocalVariable(name: "size", line: 3, arg: 2, scope: !4, file: !6, type: !11)
+!15 = !DILocalVariable(name: "sum", line: 4, scope: !4, file: !6, type: !11)
+!16 = !DILocalVariable(name: "i", line: 5, scope: !17, file: !6, type: !11)
!17 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !4)
!18 = !{i32 2, !"Dwarf Version", i32 3}
!19 = !DILocation(line: 3, scope: !4)
diff --git a/test/Transforms/LoopVectorize/gep_with_bitcast.ll b/test/Transforms/LoopVectorize/gep_with_bitcast.ll
new file mode 100644
index 000000000000..ab2fd5e4e1c6
--- /dev/null
+++ b/test/Transforms/LoopVectorize/gep_with_bitcast.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; Vectorization of loop with bitcast between GEP and load
+; Simplified source code:
+;void foo (double** __restrict__ in, bool * __restrict__ res) {
+;
+; for (int i = 0; i < 4096; ++i)
+; res[i] = ((unsigned long long)in[i] == 0);
+;}
+
+; CHECK-LABEL: @foo
+; CHECK: vector.body
+; CHECK: %0 = getelementptr inbounds double*, double** %in, i64 %index
+; CHECK: %1 = bitcast double** %0 to <4 x i64>*
+; CHECK: %wide.load = load <4 x i64>, <4 x i64>* %1, align 8
+; CHECK: %2 = icmp eq <4 x i64> %wide.load, zeroinitializer
+; CHECK: br i1
+
+define void @foo(double** noalias nocapture readonly %in, double** noalias nocapture readnone %out, i8* noalias nocapture %res) #0 {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double*, double** %in, i64 %indvars.iv
+ %tmp53 = bitcast double** %arrayidx to i64*
+ %tmp54 = load i64, i64* %tmp53, align 8
+ %cmp1 = icmp eq i64 %tmp54, 0
+ %arrayidx3 = getelementptr inbounds i8, i8* %res, i64 %indvars.iv
+ %frombool = zext i1 %cmp1 to i8
+ store i8 %frombool, i8* %arrayidx3, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 4096
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+} \ No newline at end of file
diff --git a/test/Transforms/LoopVectorize/if-pred-stores.ll b/test/Transforms/LoopVectorize/if-pred-stores.ll
index 991d027ada5c..0d70f557f834 100644
--- a/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -1,5 +1,8 @@
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -simplifycfg < %s | FileCheck %s --check-prefix=UNROLL
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL-NOSIMPLIFY
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg < %s | FileCheck %s --check-prefix=VEC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg -instcombine < %s | FileCheck %s --check-prefix=VEC-IC
+
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
@@ -14,27 +17,49 @@ entry:
; VEC: %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true>
; VEC: %[[v11:.+]] = extractelement <2 x i1> %[[v10]], i32 0
; VEC: %[[v12:.+]] = icmp eq i1 %[[v11]], true
+; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
+; VEC: %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0
; VEC: br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]]
;
; VEC: [[cond]]:
-; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
-; VEC: %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0
; VEC: store i32 %[[v13]], i32* %[[v14]], align 4
; VEC: br label %[[else:.+]]
;
; VEC: [[else]]:
; VEC: %[[v15:.+]] = extractelement <2 x i1> %[[v10]], i32 1
; VEC: %[[v16:.+]] = icmp eq i1 %[[v15]], true
+; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
+; VEC: %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1
; VEC: br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]]
;
; VEC: [[cond2]]:
-; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
-; VEC: %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1
; VEC: store i32 %[[v17]], i32* %[[v18]], align 4
; VEC: br label %[[else2:.+]]
;
; VEC: [[else2]]:
+; VEC-IC-LABEL: test
+; VEC-IC: %[[v1:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100>
+; VEC-IC: %[[v2:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20>
+; VEC-IC: %[[v3:.+]] = extractelement <2 x i1> %[[v1]], i32 0
+; VEC-IC: br i1 %[[v3]], label %[[cond:.+]], label %[[else:.+]]
+;
+; VEC-IC: [[cond]]:
+; VEC-IC: %[[v4:.+]] = extractelement <2 x i32> %[[v2]], i32 0
+; VEC-IC: store i32 %[[v4]], i32* %{{.*}}, align 4
+; VEC-IC: br label %[[else:.+]]
+;
+; VEC-IC: [[else]]:
+; VEC-IC: %[[v5:.+]] = extractelement <2 x i1> %[[v1]], i32 1
+; VEC-IC: br i1 %[[v5]], label %[[cond2:.+]], label %[[else2:.+]]
+;
+; VEC-IC: [[cond2]]:
+; VEC-IC: %[[v6:.+]] = extractelement <2 x i32> %[[v2]], i32 1
+; VEC-IC: store i32 %[[v6]], i32* %{{.*}}, align 4
+; VEC-IC: br label %[[else2:.+]]
+;
+; VEC-IC: [[else2]]:
+
; UNROLL-LABEL: test
; UNROLL: vector.body:
; UNROLL: %[[IND:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 0
@@ -90,9 +115,9 @@ for.end:
; vectorized loop body.
; PR18724
-; UNROLL-LABEL: bug18724
-; UNROLL: store i32
-; UNROLL: store i32
+; UNROLL-NOSIMPLIFY-LABEL: bug18724
+; UNROLL-NOSIMPLIFY: store i32
+; UNROLL-NOSIMPLIFY: store i32
define void @bug18724() {
entry:
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index 2fbb2de797ae..59ee66a4a35d 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -6,8 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK-LABEL: @multi_int_induction(
; CHECK: vector.body:
; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK: %normalized.idx = sub i64 %index, 0
-; CHECK: %[[VAR:.*]] = trunc i64 %normalized.idx to i32
+; CHECK: %[[VAR:.*]] = trunc i64 %index to i32
; CHECK: %offset.idx = add i32 190, %[[VAR]]
define void @multi_int_induction(i32* %A, i32 %N) {
for.body.lr.ph:
@@ -113,12 +112,11 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
; condition and branch directly to the scalar loop.
; CHECK-LABEL: max_i32_backedgetaken
-; CHECK: %backedge.overflow = icmp eq i32 -1, -1
-; CHECK: br i1 %backedge.overflow, label %scalar.ph, label %overflow.checked
+; CHECK: br i1 true, label %scalar.ph, label %min.iters.checked
; CHECK: scalar.ph:
-; CHECK: %bc.resume.val = phi i32 [ %resume.val, %middle.block ], [ 0, %0 ]
-; CHECK: %bc.merge.rdx = phi i32 [ 1, %0 ], [ %5, %middle.block ]
+; CHECK: %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %0 ]
+; CHECK: %bc.merge.rdx = phi i32 [ 1, %0 ], [ 1, %min.iters.checked ], [ %5, %middle.block ]
define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
@@ -142,11 +140,10 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
; CHECK-LABEL: testoverflowcheck
; CHECK: entry
; CHECK: %[[LOAD:.*]] = load i8
-; CHECK: %[[VAL:.*]] = zext i8 %[[LOAD]] to i32
; CHECK: br
; CHECK: scalar.ph
-; CHECK: phi i32 [ %{{.*}}, %middle.block ], [ %[[VAL]], %entry ]
+; CHECK: phi i8 [ %{{.*}}, %middle.block ], [ %[[LOAD]], %entry ]
@e = global i8 1, align 1
@d = common global i32 0, align 4
diff --git a/test/Transforms/LoopVectorize/miniters.ll b/test/Transforms/LoopVectorize/miniters.ll
new file mode 100644
index 000000000000..81cb2d4ca5a1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/miniters.ll
@@ -0,0 +1,45 @@
+; RUN: opt %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s -check-prefix=UNROLL
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@b = common global [1000 x i32] zeroinitializer, align 16
+@c = common global [1000 x i32] zeroinitializer, align 16
+@a = common global [1000 x i32] zeroinitializer, align 16
+
+; Generate min.iters.check to skip the vector loop and jump to scalar.ph directly when loop iteration number is less than VF * UF.
+; CHECK-LABEL: foo(
+; CHECK: %min.iters.check = icmp ult i64 %N, 4
+; CHECK: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+; UNROLL-LABEL: foo(
+; UNROLL: %min.iters.check = icmp ult i64 %N, 8
+; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+
+define void @foo(i64 %N) {
+entry:
+ %cmp.8 = icmp sgt i64 %N, 0
+ br i1 %cmp.8, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %i.09 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @b, i64 0, i64 %i.09
+ %tmp = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds [1000 x i32], [1000 x i32]* @c, i64 0, i64 %i.09
+ %tmp1 = load i32, i32* %arrayidx1, align 4
+ %add = add nsw i32 %tmp1, %tmp
+ %arrayidx2 = getelementptr inbounds [1000 x i32], [1000 x i32]* @a, i64 0, i64 %i.09
+ store i32 %add, i32* %arrayidx2, align 4
+ %inc = add nuw nsw i64 %i.09, 1
+ %exitcond = icmp eq i64 %inc, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll
index 5a0356fe11a2..19a401213fd5 100644
--- a/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -412,10 +412,10 @@ for.end:
; Turn this into a max reduction in the presence of a no-nans-fp-math attribute.
; CHECK-LABEL: @max_red_float(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @max_red_float(float %max) #0 {
@@ -427,7 +427,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ogt float %0, %max.red.08
+ %cmp3 = fcmp fast ogt float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -438,10 +438,10 @@ for.end:
}
; CHECK-LABEL: @max_red_float_ge(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @max_red_float_ge(float %max) #0 {
@@ -453,7 +453,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp oge float %0, %max.red.08
+ %cmp3 = fcmp fast oge float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -464,10 +464,10 @@ for.end:
}
; CHECK-LABEL: @inverted_max_red_float(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @inverted_max_red_float(float %max) #0 {
@@ -479,7 +479,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp olt float %0, %max.red.08
+ %cmp3 = fcmp fast olt float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -490,10 +490,10 @@ for.end:
}
; CHECK-LABEL: @inverted_max_red_float_le(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @inverted_max_red_float_le(float %max) #0 {
@@ -505,7 +505,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ole float %0, %max.red.08
+ %cmp3 = fcmp fast ole float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -516,10 +516,10 @@ for.end:
}
; CHECK-LABEL: @unordered_max_red_float(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @unordered_max_red_float(float %max) #0 {
@@ -531,7 +531,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ugt float %0, %max.red.08
+ %cmp3 = fcmp fast ugt float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -542,10 +542,10 @@ for.end:
}
; CHECK-LABEL: @unordered_max_red_float_ge(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @unordered_max_red_float_ge(float %max) #0 {
@@ -557,7 +557,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp uge float %0, %max.red.08
+ %cmp3 = fcmp fast uge float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -568,10 +568,10 @@ for.end:
}
; CHECK-LABEL: @inverted_unordered_max_red_float(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @inverted_unordered_max_red_float(float %max) #0 {
@@ -583,7 +583,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ult float %0, %max.red.08
+ %cmp3 = fcmp fast ult float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -594,10 +594,10 @@ for.end:
}
; CHECK-LABEL: @inverted_unordered_max_red_float_le(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @inverted_unordered_max_red_float_le(float %max) #0 {
@@ -609,7 +609,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ule float %0, %max.red.08
+ %cmp3 = fcmp fast ule float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -623,10 +623,10 @@ for.end:
; Turn this into a min reduction in the presence of a no-nans-fp-math attribute.
; CHECK-LABEL: @min_red_float(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @min_red_float(float %min) #0 {
@@ -638,7 +638,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp olt float %0, %min.red.08
+ %cmp3 = fcmp fast olt float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -649,10 +649,10 @@ for.end:
}
; CHECK-LABEL: @min_red_float_le(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @min_red_float_le(float %min) #0 {
@@ -664,7 +664,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ole float %0, %min.red.08
+ %cmp3 = fcmp fast ole float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -675,10 +675,10 @@ for.end:
}
; CHECK-LABEL: @inverted_min_red_float(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @inverted_min_red_float(float %min) #0 {
@@ -690,7 +690,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ogt float %0, %min.red.08
+ %cmp3 = fcmp fast ogt float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -701,10 +701,10 @@ for.end:
}
; CHECK-LABEL: @inverted_min_red_float_ge(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @inverted_min_red_float_ge(float %min) #0 {
@@ -716,7 +716,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp oge float %0, %min.red.08
+ %cmp3 = fcmp fast oge float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -727,10 +727,10 @@ for.end:
}
; CHECK-LABEL: @unordered_min_red_float(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @unordered_min_red_float(float %min) #0 {
@@ -742,7 +742,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ult float %0, %min.red.08
+ %cmp3 = fcmp fast ult float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -753,10 +753,10 @@ for.end:
}
; CHECK-LABEL: @unordered_min_red_float_le(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @unordered_min_red_float_le(float %min) #0 {
@@ -768,7 +768,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ule float %0, %min.red.08
+ %cmp3 = fcmp fast ule float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -779,10 +779,10 @@ for.end:
}
; CHECK-LABEL: @inverted_unordered_min_red_float(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @inverted_unordered_min_red_float(float %min) #0 {
@@ -794,7 +794,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ugt float %0, %min.red.08
+ %cmp3 = fcmp fast ugt float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -805,10 +805,10 @@ for.end:
}
; CHECK-LABEL: @inverted_unordered_min_red_float_ge(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @inverted_unordered_min_red_float_ge(float %min) #0 {
@@ -820,7 +820,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp uge float %0, %min.red.08
+ %cmp3 = fcmp fast uge float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -832,10 +832,10 @@ for.end:
; Make sure we handle doubles, too.
; CHECK-LABEL: @min_red_double(
-; CHECK: fcmp olt <2 x double>
+; CHECK: fcmp fast olt <2 x double>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x double>
+; CHECK: fcmp fast olt <2 x double>
; CHECK: select i1
define double @min_red_double(double %min) #0 {
@@ -847,7 +847,7 @@ for.body:
%min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x double], [1024 x double]* @dA, i64 0, i64 %indvars.iv
%0 = load double, double* %arrayidx, align 4
- %cmp3 = fcmp olt double %0, %min.red.08
+ %cmp3 = fcmp fast olt double %0, %min.red.08
%min.red.0 = select i1 %cmp3, double %0, double %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -871,7 +871,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ogt float %0, %max.red.08
+ %cmp3 = fcmp fast ogt float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
diff --git a/test/Transforms/LoopVectorize/no_array_bounds.ll b/test/Transforms/LoopVectorize/no_array_bounds.ll
index f7c7ff7732b9..13cec71fc455 100644
--- a/test/Transforms/LoopVectorize/no_array_bounds.ll
+++ b/test/Transforms/LoopVectorize/no_array_bounds.ll
@@ -17,7 +17,7 @@
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind ssp uwtable
-define void @_Z4testPiS_i(i32* nocapture %A, i32* nocapture %B, i32 %number) #0 {
+define void @_Z4testPiS_i(i32* nocapture %A, i32* nocapture %B, i32 %number) #0 !dbg !4 {
entry:
%cmp25 = icmp sgt i32 %number, 0, !dbg !10
br i1 %cmp25, label %for.body.preheader, label %for.end15, !dbg !10, !llvm.loop !12
@@ -72,11 +72,11 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "no_array_bounds.cpp", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, function: void (i32*, i32*, i32)* @_Z4testPiS_i, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "no_array_bounds.cpp", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll
index 7030b6b4df2d..2683b42dc717 100644
--- a/test/Transforms/LoopVectorize/no_outside_user.ll
+++ b/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s 2>&1 | FileCheck %s
; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable
; CHECK: remark: {{.*}}: loop not vectorized: use of induction value outside of the loop is not handled by vectorizer
diff --git a/test/Transforms/LoopVectorize/no_switch.ll b/test/Transforms/LoopVectorize/no_switch.ll
index 1f139c26d790..842d262d3192 100644
--- a/test/Transforms/LoopVectorize/no_switch.ll
+++ b/test/Transforms/LoopVectorize/no_switch.ll
@@ -1,9 +1,17 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -S 2>&1 | FileCheck %s -check-prefix=NOANALYSIS
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -pass-remarks-missed='loop-vectorize' -S 2>&1 | FileCheck %s -check-prefix=MOREINFO
; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
-; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info (Force=true, Vector Width=4)
; CHECK: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization
+; NOANALYSIS-NOT: remark: {{.*}}
+; NOANALYSIS: warning: source.cpp:4:5: loop not interleaved: failed explicitly specified loop interleaving
+
+; MOREINFO: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
+; MOREINFO: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info (Force=true, Vector Width=4)
+; MOREINFO: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization
+
; CHECK: _Z11test_switchPii
; CHECK-NOT: x i32>
; CHECK: ret
@@ -11,7 +19,7 @@
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 {
+define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
entry:
%cmp18 = icmp sgt i32 %Length, 0, !dbg !10
br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
@@ -59,11 +67,11 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "source.cpp", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "test_switch", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @_Z11test_switchPii, variables: !2)
+!4 = distinct !DISubprogram(name: "test_switch", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "source.cpp", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/nontemporal.ll b/test/Transforms/LoopVectorize/nontemporal.ll
new file mode 100644
index 000000000000..106b19031228
--- /dev/null
+++ b/test/Transforms/LoopVectorize/nontemporal.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+; CHECK-LABEL: @foo(
+define void @foo(float* noalias %a, float* noalias %b, float* noalias %c, i32 %N) {
+entry:
+ %cmp.4 = icmp sgt i32 %N, 0
+ br i1 %cmp.4, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+
+; Check that we don't lose !nontemporal hint when vectorizing loads.
+; CHECK: %wide.load{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
+ %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4, !nontemporal !0
+
+; Check that we don't introduce !nontemporal hint when the original scalar loads didn't have it.
+; CHECK: %wide.load{{[0-9]+}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4{{$}}
+ %arrayidx2 = getelementptr inbounds float, float* %c, i64 %indvars.iv
+ %1 = load float, float* %arrayidx2, align 4
+ %add = fadd float %0, %1
+
+; Check that we don't lose !nontemporal hint when vectorizing stores.
+; CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
+ %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv
+ store float %add, float* %arrayidx4, align 4, !nontemporal !0
+
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+; CHECK: ret void
+ ret void
+}
+
+!0 = !{i32 1}
diff --git a/test/Transforms/LoopVectorize/optsize.ll b/test/Transforms/LoopVectorize/optsize.ll
index e183fda099a2..513657cd3723 100644
--- a/test/Transforms/LoopVectorize/optsize.ll
+++ b/test/Transforms/LoopVectorize/optsize.ll
@@ -1,18 +1,17 @@
; This test verifies that the loop vectorizer will NOT produce a tail
-; loop with Optimize for size attibute.
+; loop with the optimize for size or the minimize size attributes.
; REQUIRES: asserts
-; RUN: opt < %s -loop-vectorize -Os -debug -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
-
-;CHECK-NOT: <2 x i8>
-;CHECK-NOT: <4 x i8>
-;CHECK: Aborting. A tail loop is required in Os.
+; RUN: opt < %s -loop-vectorize -S | FileCheck %s
target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
@tab = common global [32 x i8] zeroinitializer, align 1
-; Function Attrs: nounwind optsize
-define i32 @foo() #0 {
+define i32 @foo_optsize() #0 {
+; CHECK-LABEL: @foo_optsize(
+; CHECK-NOT: <2 x i8>
+; CHECK-NOT: <4 x i8>
+
entry:
br label %for.body
@@ -31,4 +30,30 @@ for.end: ; preds = %for.body
ret i32 0
}
-attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { optsize }
+
+define i32 @foo_minsize() #1 {
+; CHECK-LABEL: @foo_minsize(
+; CHECK-NOT: <2 x i8>
+; CHECK-NOT: <4 x i8>
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+ %0 = load i8, i8* %arrayidx, align 1
+ %cmp1 = icmp eq i8 %0, 0
+ %. = select i1 %cmp1, i8 2, i8 1
+ store i8 %., i8* %arrayidx, align 1
+ %inc = add nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %i.08, 202
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 0
+}
+
+attributes #1 = { minsize }
+
diff --git a/test/Transforms/LoopVectorize/ptr-induction.ll b/test/Transforms/LoopVectorize/ptr-induction.ll
new file mode 100644
index 000000000000..47d33352763d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ptr-induction.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; This testcase causes SCEV to return a pointer-typed exit value.
+
+; CHECK: @f
+; Expect that the pointer indvar has been converted into an integer indvar.
+; CHECK: %index.next = add i64 %index, 4
+define i32 @f(i32* readonly %a, i32* readnone %b) #0 {
+entry:
+ %cmp.6 = icmp ult i32* %a, %b
+ br i1 %cmp.6, label %while.body.preheader, label %while.end
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+ %a.pn = phi i32* [ %incdec.ptr8, %while.body ], [ %a, %while.body.preheader ]
+ %acc.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+ %incdec.ptr8 = getelementptr inbounds i32, i32* %a.pn, i64 1
+ %0 = load i32, i32* %incdec.ptr8, align 1
+ %add = add nuw nsw i32 %0, %acc.07
+ %exitcond = icmp eq i32* %incdec.ptr8, %b
+ br i1 %exitcond, label %while.cond.while.end_crit_edge, label %while.body
+
+while.cond.while.end_crit_edge: ; preds = %while.body
+ %add.lcssa = phi i32 [ %add, %while.body ]
+ br label %while.end
+
+while.end: ; preds = %while.cond.while.end_crit_edge, %entry
+ %acc.0.lcssa = phi i32 [ %add.lcssa, %while.cond.while.end_crit_edge ], [ 0, %entry ]
+ ret i32 %acc.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index 647e58a7e41f..63b138f1d560 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -175,8 +175,8 @@ for.end: ; preds = %for.body, %entry
}
;CHECK-LABEL: @reduction_and(
-;CHECK: and <4 x i32>
;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1>
+;CHECK: and <4 x i32>
;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
;CHECK: and <4 x i32>
;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll
index 6b63a0d8db6c..88dd2e4d66ca 100644
--- a/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -96,8 +96,7 @@ loopend:
; CHECK-LABEL: @reverse_forward_induction_i64_i8(
; CHECK: vector.body
; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK: %normalized.idx = sub i64 %index, 0
-; CHECK: %offset.idx = sub i64 1023, %normalized.idx
+; CHECK: %offset.idx = sub i64 1023, %index
; CHECK: trunc i64 %index to i8
define void @reverse_forward_induction_i64_i8() {
@@ -122,10 +121,8 @@ while.end:
; CHECK-LABEL: @reverse_forward_induction_i64_i8_signed(
; CHECK: vector.body:
-; CHECK: %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK: %normalized.idx = sub i64 %index, 129
-; CHECK: %offset.idx = sub i64 1023, %normalized.idx
-; CHECK: trunc i64 %index to i8
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %offset.idx = sub i64 1023, %index
define void @reverse_forward_induction_i64_i8_signed() {
entry:
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index 1f07d3f69594..3673b71db30d 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -11,9 +11,9 @@ target triple = "x86_64-apple-macosx10.9.0"
;CHECK-LABEL: define i32 @foo
;CHECK: for.body.preheader:
-;CHECK: br i1 %cmp.zero, label %middle.block, label %vector.memcheck, !dbg [[BODY_LOC:![0-9]+]]
+;CHECK: br i1 %cmp.zero, label %scalar.ph, label %vector.memcheck, !dbg [[BODY_LOC:![0-9]+]]
;CHECK: vector.memcheck:
-;CHECK: br i1 %memcheck.conflict, label %middle.block, label %vector.ph, !dbg [[BODY_LOC]]
+;CHECK: br i1 %memcheck.conflict, label %scalar.ph, label %vector.ph, !dbg [[BODY_LOC]]
;CHECK: load <4 x float>
define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp {
entry:
@@ -73,7 +73,7 @@ loopexit:
!2 = !{}
!3 = !DISubroutineType(types: !2)
!4 = !DIFile(filename: "test.cpp", directory: "/tmp")
-!5 = !DISubprogram(name: "foo", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = distinct !DISubprogram(name: "foo", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!6 = !DILocation(line: 100, column: 1, scope: !5)
!7 = !DILocation(line: 101, column: 1, scope: !5)
!8 = !DILocation(line: 102, column: 1, scope: !5)
diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll
index 6bc71e160ccd..a7f692cef170 100644
--- a/test/Transforms/LoopVectorize/runtime-limit.ll
+++ b/test/Transforms/LoopVectorize/runtime-limit.ll
@@ -1,12 +1,25 @@
-; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s -check-prefix=OVERRIDE
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -pragma-vectorize-memory-check-threshold=6 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
+
+; First loop produced diagnostic pass remark.
+;CHECK: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
+; Second loop produces diagnostic analysis remark.
+;CHECK: remark: {{.*}}:0:0: loop not vectorized: cannot prove it is safe to reorder memory operations
+
+; First loop produced diagnostic pass remark.
+;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
+; Second loop produces diagnostic pass remark.
+;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
; We are vectorizing with 6 runtime checks.
;CHECK-LABEL: func1x6(
;CHECK: <4 x i32>
;CHECK: ret
+;OVERRIDE-LABEL: func1x6(
+;OVERRIDE: <4 x i32>
+;OVERRIDE: ret
define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
entry:
br label %for.body
@@ -41,6 +54,10 @@ for.end: ; preds = %for.body
;CHECK-LABEL: func2x6(
;CHECK-NOT: <4 x i32>
;CHECK: ret
+; We vectorize with 12 checks if a vectorization hint is provided.
+;OVERRIDE-LABEL: func2x6(
+;OVERRIDE: <4 x i32>
+;OVERRIDE: ret
define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
entry:
br label %for.body
diff --git a/test/Transforms/LowerBitSets/function-ext.ll b/test/Transforms/LowerBitSets/function-ext.ll
new file mode 100644
index 000000000000..2a83bef2f074
--- /dev/null
+++ b/test/Transforms/LowerBitSets/function-ext.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+
+; Tests that we correctly handle external references, including the case where
+; all functions in a bitset are external references.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @foo()
+
+; CHECK: @[[JT:.*]] = private constant [1 x <{ i8, i32, i8, i8, i8 }>] [<{ i8, i32, i8, i8, i8 }> <{ i8 -23, i32 trunc (i64 sub (i64 sub (i64 ptrtoint (void ()* @foo to i64), i64 ptrtoint ([1 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)), i64 5) to i32), i8 -52, i8 -52, i8 -52 }>], section ".text"
+
+define i1 @bar(i8* %ptr) {
+ ; CHECK: icmp eq i64 {{.*}}, ptrtoint ([1 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)
+ %p = call i1 @llvm.bitset.test(i8* %ptr, metadata !"void")
+ ret i1 %p
+}
+
+declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+!0 = !{!"void", void ()* @foo, i64 0}
+
+!llvm.bitsets = !{!0}
diff --git a/test/Transforms/LowerBitSets/function.ll b/test/Transforms/LowerBitSets/function.ll
new file mode 100644
index 000000000000..bf4043d61c41
--- /dev/null
+++ b/test/Transforms/LowerBitSets/function.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+
+; Tests that we correctly create a jump table for bitsets containing 2 or more
+; functions.
+
+target triple = "x86_64-unknown-linux-gnu"
+target datalayout = "e-p:64:64"
+
+; CHECK: @[[JT:.*]] = private constant [2 x <{ i8, i32, i8, i8, i8 }>] [<{ i8, i32, i8, i8, i8 }> <{ i8 -23, i32 trunc (i64 sub (i64 sub (i64 ptrtoint (void ()* @[[FNAME:.*]] to i64), i64 ptrtoint ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)), i64 5) to i32), i8 -52, i8 -52, i8 -52 }>, <{ i8, i32, i8, i8, i8 }> <{ i8 -23, i32 trunc (i64 sub (i64 sub (i64 ptrtoint (void ()* @[[GNAME:.*]] to i64), i64 ptrtoint ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)), i64 13) to i32), i8 -52, i8 -52, i8 -52 }>], section ".text"
+
+; CHECK: @f = alias void (), bitcast ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to void ()*)
+; CHECK: @g = alias void (), bitcast (<{ i8, i32, i8, i8, i8 }>* getelementptr inbounds ([2 x <{ i8, i32, i8, i8, i8 }>], [2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]], i64 0, i64 1) to void ()*)
+
+; CHECK: define private void @[[FNAME]]() {
+define void @f() {
+ ret void
+}
+
+; CHECK: define private void @[[GNAME]]() {
+define void @g() {
+ ret void
+}
+
+!0 = !{!"bitset1", void ()* @f, i32 0}
+!1 = !{!"bitset1", void ()* @g, i32 0}
+
+!llvm.bitsets = !{ !0, !1 }
+
+declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+define i1 @foo(i8* %p) {
+ ; CHECK: sub i64 {{.*}}, ptrtoint ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)
+ %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset1")
+ ret i1 %x
+}
diff --git a/test/Transforms/LowerBitSets/nonstring.ll b/test/Transforms/LowerBitSets/nonstring.ll
new file mode 100644
index 000000000000..e61c9123e086
--- /dev/null
+++ b/test/Transforms/LowerBitSets/nonstring.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+
+; Tests that non-string metadata nodes may be used as bitset identifiers.
+
+target datalayout = "e-p:32:32"
+
+; CHECK: @[[ANAME:.*]] = private constant { i32 }
+; CHECK: @[[BNAME:.*]] = private constant { [2 x i32] }
+
+@a = constant i32 1
+@b = constant [2 x i32] [i32 2, i32 3]
+
+!0 = !{!2, i32* @a, i32 0}
+!1 = !{!3, [2 x i32]* @b, i32 0}
+!2 = distinct !{}
+!3 = distinct !{}
+
+!llvm.bitsets = !{ !0, !1 }
+
+declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK-LABEL: @foo
+define i1 @foo(i8* %p) {
+ ; CHECK: icmp eq i32 {{.*}}, ptrtoint ({ i32 }* @[[ANAME]] to i32)
+ %x = call i1 @llvm.bitset.test(i8* %p, metadata !2)
+ ret i1 %x
+}
+
+; CHECK-LABEL: @bar
+define i1 @bar(i8* %p) {
+ ; CHECK: icmp eq i32 {{.*}}, ptrtoint ({ [2 x i32] }* @[[BNAME]] to i32)
+ %x = call i1 @llvm.bitset.test(i8* %p, metadata !3)
+ ret i1 %x
+}
diff --git a/test/Transforms/LowerBitSets/pr25902.ll b/test/Transforms/LowerBitSets/pr25902.ll
new file mode 100644
index 000000000000..b9a1203ec0e1
--- /dev/null
+++ b/test/Transforms/LowerBitSets/pr25902.ll
@@ -0,0 +1,21 @@
+; PR25902: gold plugin crash.
+; RUN: opt -mtriple=i686-pc -S -lowerbitsets < %s
+
+define void @f(void ()* %p) {
+entry:
+ %a = bitcast void ()* %p to i8*, !nosanitize !1
+ %b = call i1 @llvm.bitset.test(i8* %a, metadata !"_ZTSFvvE"), !nosanitize !1
+ ret void
+}
+
+define void @g() {
+entry:
+ ret void
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+
+!llvm.bitsets = !{!0}
+
+!0 = !{!"_ZTSFvvE", void ()* @g, i64 0}
+!1 = !{}
diff --git a/test/Transforms/LowerBitSets/simple.ll b/test/Transforms/LowerBitSets/simple.ll
index 0fcdf0b36d63..a22d998e2008 100644
--- a/test/Transforms/LowerBitSets/simple.ll
+++ b/test/Transforms/LowerBitSets/simple.ll
@@ -6,8 +6,8 @@ target datalayout = "e-p:32:32"
; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } { i32 1, [0 x i8] zeroinitializer, [63 x i32] zeroinitializer, [4 x i8] zeroinitializer, i32 3, [0 x i8] zeroinitializer, [2 x i32] [i32 4, i32 5] }
@a = constant i32 1
-@b = constant [63 x i32] zeroinitializer
-@c = constant i32 3
+@b = hidden constant [63 x i32] zeroinitializer
+@c = protected constant i32 3
@d = constant [2 x i32] [i32 4, i32 5]
; CHECK: [[BA:@[^ ]*]] = private constant [68 x i8] c"\03\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\02\00\01"
@@ -26,26 +26,26 @@ target datalayout = "e-p:32:32"
!4 = !{!"bitset2", i32* @c, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset2", i32* @c, i32 0}
+; Entries whose second operand is null (the result of a global being DCE'd)
+; should be ignored.
+!5 = !{!"bitset2", null, i32 0}
+
; Offset 0, 4 byte alignment
-!5 = !{!"bitset3", i32* @a, i32 0}
+!6 = !{!"bitset3", i32* @a, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @a, i32 0}
-!6 = !{!"bitset3", i32* @c, i32 0}
+!7 = !{!"bitset3", i32* @c, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @c, i32 0}
-; Entries whose second operand is null (the result of a global being DCE'd)
-; should be ignored.
-!7 = !{!"bitset2", null, i32 0}
-
!llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6, !7 }
-; CHECK: @bits_use{{[0-9]*}} = private alias i8* @bits{{[0-9]*}}
-; CHECK: @bits_use.{{[0-9]*}} = private alias i8* @bits{{[0-9]*}}
-; CHECK: @bits_use.{{[0-9]*}} = private alias i8* @bits{{[0-9]*}}
+; CHECK: @bits_use{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
+; CHECK: @bits_use.{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
+; CHECK: @bits_use.{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
-; CHECK: @a = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 0)
-; CHECK: @b = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
-; CHECK: @c = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
-; CHECK: @d = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
+; CHECK: @a = alias i32, getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 0)
+; CHECK: @b = hidden alias [63 x i32], getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
+; CHECK: @c = protected alias i32, getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
+; CHECK: @d = alias [2 x i32], getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
; CHECK-DARWIN: @aptr = constant i32* getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G:@[^ ]*]], i32 0, i32 0)
@aptr = constant i32* @a
@@ -61,8 +61,8 @@ target datalayout = "e-p:32:32"
; CHECK-DARWIN: [[G]] = private constant
-; CHECK: @bits{{[0-9]*}} = private alias getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
-; CHECK: @bits.{{[0-9]*}} = private alias getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
+; CHECK: @bits{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
+; CHECK: @bits.{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
diff --git a/test/Transforms/LowerExpectIntrinsic/basic.ll b/test/Transforms/LowerExpectIntrinsic/basic.ll
index 73d9f44ee7e5..69e67cd7c1dd 100644
--- a/test/Transforms/LowerExpectIntrinsic/basic.ll
+++ b/test/Transforms/LowerExpectIntrinsic/basic.ll
@@ -1,5 +1,5 @@
; RUN: opt -lower-expect -strip-dead-prototypes -S -o - < %s | FileCheck %s
-; RUN: opt -S -passes=lower-expect < %s | opt -strip-dead-prototypes -S | FileCheck %s
+; RUN: opt -S -passes='function(lower-expect),strip-dead-prototypes' < %s | FileCheck %s
; CHECK-LABEL: @test1(
define i32 @test1(i32 %x) nounwind uwtable ssp {
diff --git a/test/Transforms/LowerSwitch/delete-default-block-crash.ll b/test/Transforms/LowerSwitch/delete-default-block-crash.ll
new file mode 100644
index 000000000000..23588d56c335
--- /dev/null
+++ b/test/Transforms/LowerSwitch/delete-default-block-crash.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -lowerswitch -disable-output
+
+; This test verify -lowerswitch does not crash after deleting the default block.
+
+declare i32 @f(i32)
+
+define i32 @unreachable(i32 %x) {
+
+entry:
+ switch i32 %x, label %unreachable [
+ i32 5, label %a
+ i32 6, label %a
+ i32 7, label %a
+ i32 10, label %b
+ i32 20, label %b
+ i32 30, label %b
+ i32 40, label %b
+ ]
+unreachable:
+ unreachable
+a:
+ %0 = call i32 @f(i32 0)
+ ret i32 %0
+b:
+ %1 = call i32 @f(i32 1)
+ ret i32 %1
+}
diff --git a/test/Transforms/LowerSwitch/feature.ll b/test/Transforms/LowerSwitch/feature.ll
index b82d93455436..09d25f0b06d4 100644
--- a/test/Transforms/LowerSwitch/feature.ll
+++ b/test/Transforms/LowerSwitch/feature.ll
@@ -4,49 +4,49 @@
; On output we should got binary comparison tree. Check that all is fine.
;CHECK: entry:
-;CHECK-NEXT: br label %NodeBlock.19
+;CHECK-NEXT: br label %NodeBlock19
-;CHECK: NodeBlock.19: ; preds = %entry
-;CHECK-NEXT: %Pivot.20 = icmp slt i32 %tmp158, 10
-;CHECK-NEXT: br i1 %Pivot.20, label %NodeBlock.5, label %NodeBlock.17
+;CHECK: NodeBlock19: ; preds = %entry
+;CHECK-NEXT: %Pivot20 = icmp slt i32 %tmp158, 10
+;CHECK-NEXT: br i1 %Pivot20, label %NodeBlock5, label %NodeBlock17
-;CHECK: NodeBlock.17: ; preds = %NodeBlock.19
-;CHECK-NEXT: %Pivot.18 = icmp slt i32 %tmp158, 13
-;CHECK-NEXT: br i1 %Pivot.18, label %NodeBlock.9, label %NodeBlock.15
+;CHECK: NodeBlock17: ; preds = %NodeBlock19
+;CHECK-NEXT: %Pivot18 = icmp slt i32 %tmp158, 13
+;CHECK-NEXT: br i1 %Pivot18, label %NodeBlock9, label %NodeBlock15
-;CHECK: NodeBlock.15: ; preds = %NodeBlock.17
-;CHECK-NEXT: %Pivot.16 = icmp slt i32 %tmp158, 14
-;CHECK-NEXT: br i1 %Pivot.16, label %bb330, label %NodeBlock.13
+;CHECK: NodeBlock15: ; preds = %NodeBlock17
+;CHECK-NEXT: %Pivot16 = icmp slt i32 %tmp158, 14
+;CHECK-NEXT: br i1 %Pivot16, label %bb330, label %NodeBlock13
-;CHECK: NodeBlock.13: ; preds = %NodeBlock.15
-;CHECK-NEXT: %Pivot.14 = icmp slt i32 %tmp158, 15
-;CHECK-NEXT: br i1 %Pivot.14, label %bb332, label %LeafBlock.11
+;CHECK: NodeBlock13: ; preds = %NodeBlock15
+;CHECK-NEXT: %Pivot14 = icmp slt i32 %tmp158, 15
+;CHECK-NEXT: br i1 %Pivot14, label %bb332, label %LeafBlock11
-;CHECK: LeafBlock.11: ; preds = %NodeBlock.13
+;CHECK: LeafBlock11: ; preds = %NodeBlock13
;CHECK-NEXT: %SwitchLeaf12 = icmp eq i32 %tmp158, 15
;CHECK-NEXT: br i1 %SwitchLeaf12, label %bb334, label %NewDefault
-;CHECK: NodeBlock.9: ; preds = %NodeBlock.17
-;CHECK-NEXT: %Pivot.10 = icmp slt i32 %tmp158, 11
-;CHECK-NEXT: br i1 %Pivot.10, label %bb324, label %NodeBlock.7
+;CHECK: NodeBlock9: ; preds = %NodeBlock17
+;CHECK-NEXT: %Pivot10 = icmp slt i32 %tmp158, 11
+;CHECK-NEXT: br i1 %Pivot10, label %bb324, label %NodeBlock7
-;CHECK: NodeBlock.7: ; preds = %NodeBlock.9
-;CHECK-NEXT: %Pivot.8 = icmp slt i32 %tmp158, 12
-;CHECK-NEXT: br i1 %Pivot.8, label %bb326, label %bb328
+;CHECK: NodeBlock7: ; preds = %NodeBlock9
+;CHECK-NEXT: %Pivot8 = icmp slt i32 %tmp158, 12
+;CHECK-NEXT: br i1 %Pivot8, label %bb326, label %bb328
-;CHECK: NodeBlock.5: ; preds = %NodeBlock.19
-;CHECK-NEXT: %Pivot.6 = icmp slt i32 %tmp158, 7
-;CHECK-NEXT: br i1 %Pivot.6, label %NodeBlock, label %NodeBlock.3
+;CHECK: NodeBlock5: ; preds = %NodeBlock19
+;CHECK-NEXT: %Pivot6 = icmp slt i32 %tmp158, 7
+;CHECK-NEXT: br i1 %Pivot6, label %NodeBlock, label %NodeBlock3
-;CHECK: NodeBlock.3: ; preds = %NodeBlock.5
-;CHECK-NEXT: %Pivot.4 = icmp slt i32 %tmp158, 8
-;CHECK-NEXT: br i1 %Pivot.4, label %bb, label %NodeBlock.1
+;CHECK: NodeBlock3: ; preds = %NodeBlock5
+;CHECK-NEXT: %Pivot4 = icmp slt i32 %tmp158, 8
+;CHECK-NEXT: br i1 %Pivot4, label %bb, label %NodeBlock1
-;CHECK: NodeBlock.1: ; preds = %NodeBlock.3
-;CHECK-NEXT: %Pivot.2 = icmp slt i32 %tmp158, 9
-;CHECK-NEXT: br i1 %Pivot.2, label %bb338, label %bb322
+;CHECK: NodeBlock1: ; preds = %NodeBlock3
+;CHECK-NEXT: %Pivot2 = icmp slt i32 %tmp158, 9
+;CHECK-NEXT: br i1 %Pivot2, label %bb338, label %bb322
-;CHECK: NodeBlock: ; preds = %NodeBlock.5
+;CHECK: NodeBlock: ; preds = %NodeBlock5
;CHECK-NEXT: %Pivot = icmp slt i32 %tmp158, 0
;CHECK-NEXT: br i1 %Pivot, label %LeafBlock, label %bb338
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index 8a2eedd96baf..6aaf594b3056 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -1,6 +1,6 @@
; RUN: opt < %s -mem2reg -S | FileCheck %s
-define double @testfunc(i32 %i, double %j) nounwind ssp {
+define double @testfunc(i32 %i, double %j) nounwind ssp !dbg !1 {
entry:
%i_addr = alloca i32 ; <i32*> [#uses=2]
%j_addr = alloca double ; <double*> [#uses=2]
@@ -10,8 +10,8 @@ entry:
call void @llvm.dbg.declare(metadata i32* %i_addr, metadata !0, metadata !DIExpression()), !dbg !8
; CHECK: call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata ![[IVAR:[0-9]*]], metadata {{.*}})
; CHECK: call void @llvm.dbg.value(metadata double %j, i64 0, metadata ![[JVAR:[0-9]*]], metadata {{.*}})
-; CHECK: ![[IVAR]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i"
-; CHECK: ![[JVAR]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j"
+; CHECK: ![[IVAR]] = !DILocalVariable(name: "i"
+; CHECK: ![[JVAR]] = !DILocalVariable(name: "j"
store i32 %i, i32* %i_addr
call void @llvm.dbg.declare(metadata double* %j_addr, metadata !9, metadata !DIExpression()), !dbg !8
store double %j, double* %j_addr
@@ -35,16 +35,16 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!14}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 0, scope: !1, file: !2, type: !7)
-!1 = !DISubprogram(name: "testfunc", linkageName: "testfunc", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !12, scope: !2, type: !4, function: double (i32, double)* @testfunc)
+!0 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !7)
+!1 = distinct !DISubprogram(name: "testfunc", linkageName: "testfunc", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !12, scope: !2, type: !4)
!2 = !DIFile(filename: "testfunc.c", directory: "/tmp")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13, subprograms: !{!1})
!4 = !DISubroutineType(types: !5)
!5 = !{!6, !7, !6}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !DILocation(line: 2, scope: !1)
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 2, arg: 0, scope: !1, file: !2, type: !6)
+!9 = !DILocalVariable(name: "j", line: 2, arg: 2, scope: !1, file: !2, type: !6)
!10 = !DILocation(line: 3, scope: !11)
!11 = distinct !DILexicalBlock(line: 2, column: 0, file: !12, scope: !1)
!12 = !DIFile(filename: "testfunc.c", directory: "/tmp")
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index 831221b7f97f..071d708e1fe8 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -1,10 +1,18 @@
-; RUN: opt -mem2reg < %s | llvm-dis | grep ".dbg " | count 7
+; RUN: opt -S -mem2reg <%s | FileCheck %s
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
declare void @foo(i32, i64, i8*)
-define void @baz(i32 %a) nounwind ssp {
+define void @baz(i32 %a) nounwind ssp !dbg !1 {
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %"alloca point" = bitcast i32 0 to i32{{$}}
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a,{{.*}}, !dbg
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a,{{.*}}, !dbg
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 55,{{.*}}, !dbg
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* bitcast (void (i32)* @baz to i8*),{{.*}}, !dbg
+; CHECK-NEXT: call void @foo({{.*}}, !dbg
+; CHECK-NEXT: br label %return, !dbg
entry:
%x_addr.i = alloca i32 ; <i32*> [#uses=2]
%y_addr.i = alloca i64 ; <i64*> [#uses=2]
@@ -26,30 +34,32 @@ entry:
call void @foo(i32 %1, i64 %2, i8* %3) nounwind, !dbg !18
br label %return, !dbg !19
+; CHECK-LABEL: return:
+; CHECK-NEXT: ret void, !dbg
return: ; preds = %entry
ret void, !dbg !19
}
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!22}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 8, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "baz", linkageName: "baz", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 8, file: !20, scope: !2, type: !4, function: void (i32)* @baz)
+!0 = !DILocalVariable(name: "a", line: 8, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "baz", linkageName: "baz", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 8, file: !20, scope: !2, type: !4)
!2 = !DIFile(filename: "bar.c", directory: "/tmp/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21, subprograms: !{!1})
!4 = !DISubroutineType(types: !5)
!5 = !{null, !6}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!7 = !DILocation(line: 8, scope: !1)
!8 = !DILocation(line: 9, scope: !1)
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 4, arg: 0, scope: !10, file: !2, type: !6)
-!10 = !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !20, scope: !2, type: !11)
+!9 = !DILocalVariable(name: "x", line: 4, arg: 1, scope: !10, file: !2, type: !6)
+!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !20, scope: !2, type: !11)
!11 = !DISubroutineType(types: !12)
!12 = !{null, !6, !13, !14}
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
!14 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !20, scope: !2, baseType: null)
!15 = !DILocation(line: 4, scope: !10, inlinedAt: !8)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 4, arg: 0, scope: !10, file: !2, type: !13)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "z", line: 4, arg: 0, scope: !10, file: !2, type: !14)
+!16 = !DILocalVariable(name: "y", line: 4, arg: 2, scope: !10, file: !2, type: !13)
+!17 = !DILocalVariable(name: "z", line: 4, arg: 3, scope: !10, file: !2, type: !14)
!18 = !DILocation(line: 5, scope: !10, inlinedAt: !8)
!19 = !DILocation(line: 10, scope: !1)
!20 = !DIFile(filename: "bar.c", directory: "/tmp/")
diff --git a/test/Transforms/Mem2Reg/optnone.ll b/test/Transforms/Mem2Reg/optnone.ll
new file mode 100644
index 000000000000..41ee77aff79d
--- /dev/null
+++ b/test/Transforms/Mem2Reg/optnone.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -mem2reg -S | FileCheck %s
+
+; This function is optnone, so the allocas should not be eliminated.
+
+; CHECK-LABEL: @testfunc
+; CHECK: alloca
+; CHECK: alloca
+define double @testfunc(i32 %i, double %j) optnone noinline {
+ %I = alloca i32 ; <i32*> [#uses=4]
+ %J = alloca double ; <double*> [#uses=2]
+ store i32 %i, i32* %I
+ store double %j, double* %J
+ %t1 = load i32, i32* %I ; <i32> [#uses=1]
+ %t2 = add i32 %t1, 1 ; <i32> [#uses=1]
+ store i32 %t2, i32* %I
+ %t3 = load i32, i32* %I ; <i32> [#uses=1]
+ %t4 = sitofp i32 %t3 to double ; <double> [#uses=1]
+ %t5 = load double, double* %J ; <double> [#uses=1]
+ %t6 = fmul double %t4, %t5 ; <double> [#uses=1]
+ ret double %t6
+}
diff --git a/test/Transforms/Mem2Reg/pr24179.ll b/test/Transforms/Mem2Reg/pr24179.ll
new file mode 100644
index 000000000000..e4216ce4daa1
--- /dev/null
+++ b/test/Transforms/Mem2Reg/pr24179.ll
@@ -0,0 +1,44 @@
+; RUN: opt -mem2reg < %s -S | FileCheck %s
+
+declare i32 @def(i32)
+declare i1 @use(i32)
+
+; Special case of a single-BB alloca does not apply here since the load
+; is affected by the following store. Expect this case to be identified
+; and a PHI node to be created.
+define void @test1() {
+; CHECK-LABEL: @test1(
+ entry:
+ %t = alloca i32
+ br label %loop
+
+ loop:
+ %v = load i32, i32* %t
+ %c = call i1 @use(i32 %v)
+; CHECK: [[PHI:%.*]] = phi i32 [ undef, %entry ], [ %n, %loop ]
+; CHECK: call i1 @use(i32 [[PHI]])
+ %n = call i32 @def(i32 7)
+ store i32 %n, i32* %t
+ br i1 %c, label %loop, label %exit
+
+ exit:
+ ret void
+}
+
+; Same as above, except there is no following store. The alloca should just be
+; replaced with an undef
+define void @test2() {
+; CHECK-LABEL: @test2(
+ entry:
+ %t = alloca i32
+ br label %loop
+
+ loop:
+ %v = load i32, i32* %t
+ %c = call i1 @use(i32 %v)
+; CHECK: %c = call i1 @use(i32 undef)
+ br i1 %c, label %loop, label %exit
+
+ exit:
+ ret void
+}
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 731847440d76..6181543cfc63 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -206,5 +206,6 @@ declare void @f1(%struct.big* nocapture sret)
declare void @f2(%struct.big*)
; CHECK: attributes [[NUW]] = { nounwind }
-; CHECK: attributes #1 = { nounwind ssp }
-; CHECK: attributes #2 = { nounwind ssp uwtable }
+; CHECK: attributes #1 = { argmemonly nounwind }
+; CHECK: attributes #2 = { nounwind ssp }
+; CHECK: attributes #3 = { nounwind ssp uwtable }
diff --git a/test/Transforms/MemCpyOpt/nontemporal.ll b/test/Transforms/MemCpyOpt/nontemporal.ll
new file mode 100644
index 000000000000..d9dafcc7b816
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/nontemporal.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Verify that we don't combine nontemporal stores into memset calls.
+
+define void @nontemporal_stores_1(<4 x float>* nocapture %dst) {
+; CHECK-LABEL: @nontemporal_stores_1
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr2, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr3, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr4, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr5, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr6, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr7, align 16, !nontemporal !0
+; CHECK-NEXT: ret void
+entry:
+ store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+ %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1
+ store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+ %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 2
+ store <4 x float> zeroinitializer, <4 x float>* %ptr2, align 16, !nontemporal !0
+ %ptr3 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 3
+ store <4 x float> zeroinitializer, <4 x float>* %ptr3, align 16, !nontemporal !0
+ %ptr4 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 4
+ store <4 x float> zeroinitializer, <4 x float>* %ptr4, align 16, !nontemporal !0
+ %ptr5 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 5
+ store <4 x float> zeroinitializer, <4 x float>* %ptr5, align 16, !nontemporal !0
+ %ptr6 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 6
+ store <4 x float> zeroinitializer, <4 x float>* %ptr6, align 16, !nontemporal !0
+ %ptr7 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 7
+ store <4 x float> zeroinitializer, <4 x float>* %ptr7, align 16, !nontemporal !0
+ ret void
+}
+
+define void @nontemporal_stores_2(<4 x float>* nocapture %dst) {
+; CHECK-LABEL: @nontemporal_stores_2
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+; CHECK-NEXT: ret void
+entry:
+ store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+ %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1
+ store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+ ret void
+}
+
+!0 = !{i32 1}
diff --git a/test/Transforms/MergeFunc/apply_function_attributes.ll b/test/Transforms/MergeFunc/apply_function_attributes.ll
new file mode 100644
index 000000000000..e9ede4518206
--- /dev/null
+++ b/test/Transforms/MergeFunc/apply_function_attributes.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+%Opaque_type = type opaque
+%S2i = type <{ i64, i64 }>
+%D2i = type <{ i64, i64 }>
+%Di = type <{ i32 }>
+%Si = type <{ i32 }>
+
+define void @B(%Opaque_type* sret %a, %S2i* %b, i32* %xp, i32* %yp) {
+ %x = load i32, i32* %xp
+ %y = load i32, i32* %yp
+ %sum = add i32 %x, %y
+ %sum2 = add i32 %sum, %y
+ %sum3 = add i32 %sum2, %y
+ ret void
+}
+
+define void @C(%Opaque_type* sret %a, %S2i* %b, i32* %xp, i32* %yp) {
+ %x = load i32, i32* %xp
+ %y = load i32, i32* %yp
+ %sum = add i32 %x, %y
+ %sum2 = add i32 %sum, %y
+ %sum3 = add i32 %sum2, %y
+ ret void
+}
+
+define void @A(%Opaque_type* sret %a, %D2i* %b, i32* %xp, i32* %yp) {
+ %x = load i32, i32* %xp
+ %y = load i32, i32* %yp
+ %sum = add i32 %x, %y
+ %sum2 = add i32 %sum, %y
+ %sum3 = add i32 %sum2, %y
+ ret void
+}
+
+; Make sure we transfer the parameter attributes to the call site.
+; CHECK-LABEL: define void @C(%Opaque_type* sret
+; CHECK: tail call void bitcast (void (%Opaque_type*, %D2i*, i32*, i32*)* @A to void (%Opaque_type*, %S2i*, i32*, i32*)*)(%Opaque_type* sret %0, %S2i* %1, i32* %2, i32* %3)
+; CHECK: ret void
+
+
+; Make sure we transfer the parameter attributes to the call site.
+; CHECK-LABEL: define void @B(%Opaque_type* sret
+; CHECK: %5 = bitcast
+; CHECK: tail call void @A(%Opaque_type* sret %0, %D2i* %5, i32* %2, i32* %3)
+; CHECK: ret void
+
diff --git a/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
index b955e3c9582e..806ca3c17a6a 100644
--- a/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
+++ b/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
@@ -63,14 +63,6 @@ lpad:
resume { i8*, i32 } zeroinitializer
}
-define i8 @call_with_same_range() {
-; CHECK-LABEL: @call_with_same_range
-; CHECK: tail call i8 @call_with_range
- bitcast i8 0 to i8
- %out = call i8 @dummy(), !range !0
- ret i8 %out
-}
-
define i8 @invoke_with_same_range() personality i8* undef {
; CHECK-LABEL: @invoke_with_same_range()
; CHECK: tail call i8 @invoke_with_range()
@@ -84,6 +76,16 @@ lpad:
resume { i8*, i32 } zeroinitializer
}
+define i8 @call_with_same_range() {
+; CHECK-LABEL: @call_with_same_range
+; CHECK: tail call i8 @call_with_range
+ bitcast i8 0 to i8
+ %out = call i8 @dummy(), !range !0
+ ret i8 %out
+}
+
+
+
declare i8 @dummy();
declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/MergeFunc/constant-entire-value.ll b/test/Transforms/MergeFunc/constant-entire-value.ll
new file mode 100644
index 000000000000..cb193d06ee41
--- /dev/null
+++ b/test/Transforms/MergeFunc/constant-entire-value.ll
@@ -0,0 +1,42 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; RUN: opt -S -mergefunc < %s | FileCheck -check-prefix=NOPLUS %s
+
+; This makes sure that zeros in constants don't cause problems with string based
+; memory comparisons
+define internal i32 @sum(i32 %x, i32 %y) {
+; CHECK-LABEL: @sum
+ %sum = add i32 %x, %y
+ %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 2 ], 2
+ %sum2 = add i32 %sum, %1
+ %sum3 = add i32 %sum2, %y
+ ret i32 %sum3
+}
+
+define internal i32 @add(i32 %x, i32 %y) {
+; CHECK-LABEL: @add
+ %sum = add i32 %x, %y
+ %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 1 ], 2
+ %sum2 = add i32 %sum, %1
+ %sum3 = add i32 %sum2, %y
+ ret i32 %sum3
+}
+
+define internal i32 @plus(i32 %x, i32 %y) {
+; NOPLUS-NOT: @plus
+ %sum = add i32 %x, %y
+ %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 5 ], 2
+ %sum2 = add i32 %sum, %1
+ %sum3 = add i32 %sum2, %y
+ ret i32 %sum3
+}
+
+define internal i32 @next(i32 %x, i32 %y) {
+; CHECK-LABEL: @next
+ %sum = add i32 %x, %y
+ %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 5 ], 2
+ %sum2 = add i32 %sum, %1
+ %sum3 = add i32 %sum2, %y
+ ret i32 %sum3
+}
+
diff --git a/test/Transforms/MergeFunc/crash2.ll b/test/Transforms/MergeFunc/crash2.ll
new file mode 100644
index 000000000000..4b3a3f911e7e
--- /dev/null
+++ b/test/Transforms/MergeFunc/crash2.ll
@@ -0,0 +1,54 @@
+; RUN: opt %s -mergefunc -globalopt -S -o - | FileCheck %s
+
+; Make sure we don't crash on this example. This test is supposed to test that
+; MergeFunctions clears its GlobalNumbers value map. If this map still contains
+; entries when running globalopt and the MergeFunctions instance is still alive
+; the optimization of @G would cause an assert because globalopt would do an
+; RAUW on @G which still exists as an entry in the GlobalNumbers ValueMap which
+; causes an assert in the ValueHandle call back because we are RAUWing with a
+; different type (AllocaInst) than its key type (GlobalValue).
+
+@G = internal global i8** null
+@G2 = internal global i8** null
+
+define i32 @main(i32 %argc, i8** %argv) norecurse {
+; CHECK: alloca
+ store i8** %argv, i8*** @G
+ ret i32 0
+}
+
+define internal i8** @dead1(i64 %p) {
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ %tmp = load i8**, i8*** @G
+ ret i8** %tmp
+}
+
+define internal i8** @dead2(i64 %p) {
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ %tmp = load i8**, i8*** @G2
+ ret i8** %tmp
+}
+
+define void @left(i64 %p) {
+entry-block:
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ ret void
+}
+
+define void @right(i64 %p) {
+entry-block:
+ call void @left(i64 %p)
+ call void @left(i64 %p)
+ call void @left(i64 %p)
+ call void @left(i64 %p)
+ ret void
+}
diff --git a/test/Transforms/MergeFunc/gep-base-type.ll b/test/Transforms/MergeFunc/gep-base-type.ll
new file mode 100644
index 000000000000..bfbb247fb3a5
--- /dev/null
+++ b/test/Transforms/MergeFunc/gep-base-type.ll
@@ -0,0 +1,46 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; These should not be merged, the type of the GEP pointer argument does not have
+; the same stride.
+
+%"struct1" = type <{ i8*, i32, [4 x i8] }>
+%"struct2" = type { i8*, { i64, i64 } }
+
+define internal %struct2* @Ffunc(%struct2* %P, i64 %i) {
+; CHECK-LABEL: @Ffunc(
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: ret
+ %1 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+ %2 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+ %3 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+ %4 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+ %5 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+ %6 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+ ret %struct2* %6
+}
+
+
+define internal %struct1* @Gfunc(%struct1* %P, i64 %i) {
+; CHECK-LABEL: @Gfunc(
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: ret
+ %1 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+ %2 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+ %3 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+ %4 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+ %5 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+ %6 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+ ret %struct1* %6
+}
+
diff --git a/test/Transforms/MergeFunc/inttoptr-address-space.ll b/test/Transforms/MergeFunc/inttoptr-address-space.ll
index 5f672debd919..86deb2c94953 100644
--- a/test/Transforms/MergeFunc/inttoptr-address-space.ll
+++ b/test/Transforms/MergeFunc/inttoptr-address-space.ll
@@ -21,7 +21,7 @@ define internal i8* @func35(%.qux.2585 addrspace(1)* nocapture %this) align 2 {
bb:
; CHECK-LABEL: @func35(
; CHECK: %[[V2:.+]] = bitcast %.qux.2585 addrspace(1)* %{{.*}} to %.qux.2496 addrspace(1)*
-; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496 addrspace(1)* %[[V2]])
+; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496 addrspace(1)* nocapture %[[V2]])
; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
%tmp = getelementptr inbounds %.qux.2585, %.qux.2585 addrspace(1)* %this, i32 0, i32 2
%tmp1 = load i8*, i8* addrspace(1)* %tmp, align 4
diff --git a/test/Transforms/MergeFunc/inttoptr.ll b/test/Transforms/MergeFunc/inttoptr.ll
index 0abbf6239a34..05ae766a6e37 100644
--- a/test/Transforms/MergeFunc/inttoptr.ll
+++ b/test/Transforms/MergeFunc/inttoptr.ll
@@ -48,7 +48,7 @@ define internal i8* @func35(%.qux.2585* nocapture %this) align 2 {
bb:
; CHECK-LABEL: @func35(
; CHECK: %[[V2:.+]] = bitcast %.qux.2585* %{{.*}} to %.qux.2496*
-; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496* %[[V2]])
+; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496* nocapture %[[V2]])
; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
%tmp = getelementptr inbounds %.qux.2585, %.qux.2585* %this, i32 0, i32 2
%tmp1 = load i8*, i8** %tmp, align 4
diff --git a/test/Transforms/MergeFunc/merge-block-address-other-function.ll b/test/Transforms/MergeFunc/merge-block-address-other-function.ll
new file mode 100644
index 000000000000..ca1a6f2fe2ab
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-block-address-other-function.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+ %retval = alloca i32, align 4
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load i32, i32* %i.addr, align 4
+ %cmp = icmp eq i32 %0, 1
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i32 3, i32* %retval
+ br label %return
+
+if.end:
+ %1 = load i32, i32* %i.addr, align 4
+ %cmp1 = icmp eq i32 %1, 3
+ br i1 %cmp1, label %if.then.2, label %if.end.3
+
+if.then.2:
+ store i32 56, i32* %retval
+ br label %return
+
+if.end.3:
+ store i32 0, i32* %retval
+ br label %return
+
+return:
+ %2 = load i32, i32* %retval
+ ret i32 %2
+}
+
+
+define internal i8* @Afunc(i32* %P) {
+ store i32 1, i32* %P
+ store i32 3, i32* %P
+ ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
+
+define internal i8* @Bfunc(i32* %P) {
+; CHECK-NOT: @Bfunc
+ store i32 1, i32* %P
+ store i32 3, i32* %P
+ ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
diff --git a/test/Transforms/MergeFunc/merge-block-address.ll b/test/Transforms/MergeFunc/merge-block-address.ll
new file mode 100644
index 000000000000..4ce13e5da874
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-block-address.ll
@@ -0,0 +1,91 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; These two functions are identical. The basic block labels are the same, and
+; induce the same CFG. We are testing that block addresses within different
+; functions are compared by their value, and not based on order. Both functions
+; come from the same C-code, but in the first the two val_0/val_1 basic blocks
+; are in a different order (they were manually switched post-compilation).
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+ %i.addr = alloca i32, align 4
+ %ret = alloca i32, align 4
+ %l = alloca i8*, align 8
+ store i32 %i, i32* %i.addr, align 4
+ store i32 0, i32* %ret, align 4
+ store i8* blockaddress(@_Z1fi, %val_0), i8** %l, align 8
+ %0 = load i32, i32* %i.addr, align 4
+ %and = and i32 %0, 256
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i8* blockaddress(@_Z1fi, %val_1), i8** %l, align 8
+ br label %if.end
+
+if.end:
+ %1 = load i8*, i8** %l, align 8
+ br label %indirectgoto
+
+val_1:
+ store i32 42, i32* %ret, align 4
+ br label %end
+
+val_0:
+ store i32 12, i32* %ret, align 4
+ br label %end
+
+
+end:
+ %2 = load i32, i32* %ret, align 4
+ ret i32 %2
+
+indirectgoto:
+ %indirect.goto.dest = phi i8* [ %1, %if.end ]
+ indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
+define i32 @_Z1gi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1gi
+; CHECK-NEXT: tail call i32 @_Z1fi
+; CHECK-NEXT: ret
+entry:
+ %i.addr = alloca i32, align 4
+ %ret = alloca i32, align 4
+ %l = alloca i8*, align 8
+ store i32 %i, i32* %i.addr, align 4
+ store i32 0, i32* %ret, align 4
+ store i8* blockaddress(@_Z1gi, %val_0), i8** %l, align 8
+ %0 = load i32, i32* %i.addr, align 4
+ %and = and i32 %0, 256
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i8* blockaddress(@_Z1gi, %val_1), i8** %l, align 8
+ br label %if.end
+
+if.end:
+ %1 = load i8*, i8** %l, align 8
+ br label %indirectgoto
+
+val_0:
+ store i32 12, i32* %ret, align 4
+ br label %end
+
+val_1:
+ store i32 42, i32* %ret, align 4
+ br label %end
+
+end:
+ %2 = load i32, i32* %ret, align 4
+ ret i32 %2
+
+indirectgoto:
+ %indirect.goto.dest = phi i8* [ %1, %if.end ]
+ indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
diff --git a/test/Transforms/MergeFunc/merge-const-ptr-and-int.ll b/test/Transforms/MergeFunc/merge-const-ptr-and-int.ll
new file mode 100644
index 000000000000..8c86ab1fbc33
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-const-ptr-and-int.ll
@@ -0,0 +1,20 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; RUN: opt -mergefunc -S < %s | FileCheck -check-prefix=MERGE %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Afunc and Bfunc differ only in that one returns i64, the other a pointer.
+; These should be merged.
+define internal i64 @Afunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64 @Afunc
+ store i32 4, i32* %P
+ store i32 6, i32* %Q
+ ret i64 0
+}
+
+define internal i64* @Bfunc(i32* %P, i32* %Q) {
+; MERGE-NOT: @Bfunc
+ store i32 4, i32* %P
+ store i32 6, i32* %Q
+ ret i64* null
+}
+
diff --git a/test/Transforms/MergeFunc/merge-different-vector-types.ll b/test/Transforms/MergeFunc/merge-different-vector-types.ll
new file mode 100644
index 000000000000..7696139b332d
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-different-vector-types.ll
@@ -0,0 +1,18 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; RUN: opt -mergefunc -S < %s | FileCheck -check-prefix=MERGE %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Merging should still work even if the values are wrapped in a vector.
+define internal <2 x i64> @Mfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal <2 x i64> @Mfunc
+ store i32 1, i32* %P
+ store i32 1, i32* %Q
+ ret <2 x i64> <i64 0, i64 0>
+}
+
+define internal <2 x i64*> @Nfunc(i32* %P, i32* %Q) {
+; MERGE-NOT: @Nfunc
+ store i32 1, i32* %P
+ store i32 1, i32* %Q
+ ret <2 x i64*> <i64* null, i64* null>
+}
diff --git a/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll b/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll
new file mode 100644
index 000000000000..3024a9a76a7e
--- /dev/null
+++ b/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll
@@ -0,0 +1,96 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; There is a slight different in these two functions, in that the label values
+; are switched. They are thus not mergeable. This tests that block addresses
+; referring to blocks within each respective compared function are correctly
+; ordered.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1fi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1fi
+; CHECK-NEXT: entry:
+; CHECK-NEXT: alloca
+entry:
+ %i.addr = alloca i32, align 4
+ %ret = alloca i32, align 4
+ %l = alloca i8*, align 8
+ store i32 %i, i32* %i.addr, align 4
+ store i32 0, i32* %ret, align 4
+; Right here, this is val_0, and later the if might assign val_1
+ store i8* blockaddress(@_Z1fi, %val_0), i8** %l, align 8
+ %0 = load i32, i32* %i.addr, align 4
+ %and = and i32 %0, 256
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i8* blockaddress(@_Z1fi, %val_1), i8** %l, align 8
+ br label %if.end
+
+if.end:
+ %1 = load i8*, i8** %l, align 8
+ br label %indirectgoto
+
+val_0:
+ store i32 12, i32* %ret, align 4
+ br label %end
+
+val_1:
+ store i32 42, i32* %ret, align 4
+ br label %end
+
+end:
+ %2 = load i32, i32* %ret, align 4
+ ret i32 %2
+
+indirectgoto:
+ %indirect.goto.dest = phi i8* [ %1, %if.end ]
+ indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1gi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1gi
+; CHECK-NEXT: entry:
+; CHECK-NEXT: alloca
+entry:
+ %i.addr = alloca i32, align 4
+ %ret = alloca i32, align 4
+ %l = alloca i8*, align 8
+ store i32 %i, i32* %i.addr, align 4
+ store i32 0, i32* %ret, align 4
+; This time, we store val_1 initially, and later the if might assign val_0
+ store i8* blockaddress(@_Z1gi, %val_1), i8** %l, align 8
+ %0 = load i32, i32* %i.addr, align 4
+ %and = and i32 %0, 256
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i8* blockaddress(@_Z1gi, %val_0), i8** %l, align 8
+ br label %if.end
+
+if.end:
+ %1 = load i8*, i8** %l, align 8
+ br label %indirectgoto
+
+val_0:
+ store i32 12, i32* %ret, align 4
+ br label %end
+
+val_1:
+ store i32 42, i32* %ret, align 4
+ br label %end
+
+end:
+ %2 = load i32, i32* %ret, align 4
+ ret i32 %2
+
+indirectgoto:
+ %indirect.goto.dest = phi i8* [ %1, %if.end ]
+ indirectbr i8* %indirect.goto.dest, [label %val_1, label %val_0]
+}
+
diff --git a/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll b/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll
new file mode 100644
index 000000000000..e1aa30ac55a2
--- /dev/null
+++ b/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll
@@ -0,0 +1,61 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; We should not merge these two functions, because the blocks are different.
+; This tests the handling of block addresses from different functions.
+; ModuleID = '<stdin>'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+define internal i8* @Afunc(i32* %P) {
+; CHECK-LABEL: @Afunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 1, i32* %P
+ store i32 3, i32* %P
+ ret i8* blockaddress(@_Z1fi, %if.then)
+}
+
+define internal i8* @Bfunc(i32* %P) {
+; CHECK-LABEL: @Bfunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 1, i32* %P
+ store i32 3, i32* %P
+ ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
+
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+ %retval = alloca i32, align 4
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load i32, i32* %i.addr, align 4
+ %cmp = icmp eq i32 %0, 1
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i32 3, i32* %retval
+ br label %return
+
+if.end:
+ %1 = load i32, i32* %i.addr, align 4
+ %cmp1 = icmp eq i32 %1, 3
+ br i1 %cmp1, label %if.then.2, label %if.end.3
+
+if.then.2:
+ store i32 56, i32* %retval
+ br label %return
+
+if.end.3:
+ store i32 0, i32* %retval
+ br label %return
+
+return:
+ %2 = load i32, i32* %retval
+ ret i32 %2
+}
diff --git a/test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll b/test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll
new file mode 100644
index 000000000000..c0c6dab792d0
--- /dev/null
+++ b/test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll
@@ -0,0 +1,24 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; These should not be merged, as the datalayout says a pointer is 64 bits. No
+; sext/zext is specified, so these functions could lower differently.
+define internal i32 @Ffunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i32 @Ffunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 1, i32* %P
+ store i32 3, i32* %Q
+ ret i32 0
+}
+
+define internal i64* @Gfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64* @Gfunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 1, i32* %P
+ store i32 3, i32* %Q
+ ret i64* null
+}
diff --git a/test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll b/test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll
new file mode 100644
index 000000000000..6bd656408ac1
--- /dev/null
+++ b/test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll
@@ -0,0 +1,23 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; These should not be merged, as 1 != 0.
+define internal i64 @Ifunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64 @Ifunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 10, i32* %P
+ store i32 10, i32* %Q
+ ret i64 1
+}
+
+define internal i64* @Jfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64* @Jfunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 10, i32* %P
+ store i32 10, i32* %Q
+ ret i64* null
+}
diff --git a/test/Transforms/MergeFunc/ranges-multiple.ll b/test/Transforms/MergeFunc/ranges-multiple.ll
new file mode 100644
index 000000000000..bfa775d217a7
--- /dev/null
+++ b/test/Transforms/MergeFunc/ranges-multiple.ll
@@ -0,0 +1,44 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+define i1 @cmp_with_range(i8*, i8*) {
+ %v1 = load i8, i8* %0, !range !0
+ %v2 = load i8, i8* %1, !range !0
+ %out = icmp eq i8 %v1, %v2
+ ret i1 %out
+}
+
+define i1 @cmp_no_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_no_range
+; CHECK-NEXT: %v1 = load i8, i8* %0
+; CHECK-NEXT: %v2 = load i8, i8* %1
+; CHECK-NEXT: %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT: ret i1 %out
+ %v1 = load i8, i8* %0
+ %v2 = load i8, i8* %1
+ %out = icmp eq i8 %v1, %v2
+ ret i1 %out
+}
+
+define i1 @cmp_different_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_different_range
+; CHECK-NEXT: %v1 = load i8, i8* %0, !range !1
+; CHECK-NEXT: %v2 = load i8, i8* %1, !range !1
+; CHECK-NEXT: %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT: ret i1 %out
+ %v1 = load i8, i8* %0, !range !1
+ %v2 = load i8, i8* %1, !range !1
+ %out = icmp eq i8 %v1, %v2
+ ret i1 %out
+}
+
+define i1 @cmp_with_same_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_with_same_range
+; CHECK: tail call i1 @cmp_with_range
+ %v1 = load i8, i8* %0, !range !0
+ %v2 = load i8, i8* %1, !range !0
+ %out = icmp eq i8 %v1, %v2
+ ret i1 %out
+}
+
+; The comparison must check every element of the range, not just the first pair.
+!0 = !{i8 0, i8 2, i8 21, i8 30}
+!1 = !{i8 0, i8 2, i8 21, i8 25}
diff --git a/test/Transforms/MergeFunc/ranges.ll b/test/Transforms/MergeFunc/ranges.ll
index 46a0c76cc7d1..44e71300703b 100644
--- a/test/Transforms/MergeFunc/ranges.ll
+++ b/test/Transforms/MergeFunc/ranges.ll
@@ -8,10 +8,10 @@ define i1 @cmp_with_range(i8*, i8*) {
define i1 @cmp_no_range(i8*, i8*) {
; CHECK-LABEL: @cmp_no_range
-; CHECK-NEXT %v1 = load i8, i8* %0
-; CHECK-NEXT %v2 = load i8, i8* %1
-; CHECK-NEXT %out = icmp eq i8 %v1, %v2
-; CHECK-NEXT ret i1 %out
+; CHECK-NEXT: %v1 = load i8, i8* %0
+; CHECK-NEXT: %v2 = load i8, i8* %1
+; CHECK-NEXT: %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT: ret i1 %out
%v1 = load i8, i8* %0
%v2 = load i8, i8* %1
%out = icmp eq i8 %v1, %v2
diff --git a/test/Transforms/MergeFunc/self-referential-global.ll b/test/Transforms/MergeFunc/self-referential-global.ll
new file mode 100644
index 000000000000..d3d1c62aa7fe
--- /dev/null
+++ b/test/Transforms/MergeFunc/self-referential-global.ll
@@ -0,0 +1,40 @@
+; RUN: opt -mergefunc -disable-output < %s
+
+; A linked list type and simple payload
+%LL = type { %S, %LL* }
+%S = type { void (%S*, i32)* }
+
+; Table refers to itself via GEP
+@Table = internal global [3 x %LL] [%LL { %S { void (%S*, i32)* @B }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }, %LL { %S { void (%S*, i32)* @A }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }, %LL { %S { void (%S*, i32)* @A }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }], align 16
+
+; The body of this is irrelevant; it is long so that mergefunc doesn't skip it as a small function.
+define internal void @A(%S* %self, i32 %a) {
+ %1 = add i32 %a, 32
+ %2 = add i32 %1, 32
+ %3 = add i32 %2, 32
+ %4 = add i32 %3, 32
+ %5 = add i32 %4, 32
+ %6 = add i32 %5, 32
+ %7 = add i32 %6, 32
+ %8 = add i32 %7, 32
+ %9 = add i32 %8, 32
+ %10 = add i32 %9, 32
+ %11 = add i32 %10, 32
+ ret void
+}
+
+define internal void @B(%S* %self, i32 %a) {
+ %1 = add i32 %a, 32
+ %2 = add i32 %1, 32
+ %3 = add i32 %2, 32
+ %4 = add i32 %3, 32
+ %5 = add i32 %4, 32
+ %6 = add i32 %5, 32
+ %7 = add i32 %6, 32
+ %8 = add i32 %7, 32
+ %9 = add i32 %8, 32
+ %10 = add i32 %9, 32
+ %11 = add i32 %10, 32
+ ret void
+}
+
diff --git a/test/Transforms/MergeFunc/undef-different-types.ll b/test/Transforms/MergeFunc/undef-different-types.ll
new file mode 100644
index 000000000000..4694146e55f4
--- /dev/null
+++ b/test/Transforms/MergeFunc/undef-different-types.ll
@@ -0,0 +1,21 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; RUN: opt -mergefunc -S < %s | FileCheck -check-prefix=MERGE %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Cfunc and Dfunc differ only in that one returns i64, the other a pointer, and
+; both return undef. They should be merged. Note undef cannot be merged with
+; anything else, because this implies the ordering will be inconsistent (i.e.
+; -1 == undef and undef == 1, but -1 < 1, so we must have undef != <any int>).
+define internal i64 @Cfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64 @Cfunc
+ store i32 4, i32* %P
+ store i32 6, i32* %Q
+ ret i64 undef
+}
+
+define internal i64* @Dfunc(i32* %P, i32* %Q) {
+; MERGE-NOT: @Dfunc
+ store i32 4, i32* %P
+ store i32 6, i32* %Q
+ ret i64* undef
+}
diff --git a/test/Transforms/MetaRenamer/metarenamer.ll b/test/Transforms/MetaRenamer/metarenamer.ll
index e126bed9b232..213fbe3bbff7 100644
--- a/test/Transforms/MetaRenamer/metarenamer.ll
+++ b/test/Transforms/MetaRenamer/metarenamer.ll
@@ -12,7 +12,7 @@ target triple = "x86_64-pc-linux-gnu"
@func_5_xxx.static_local_3_xxx = internal global i32 3, align 4
@global_3_xxx = common global i32 0, align 4
-@func_7_xxx = weak alias i32 (...)* @aliased_func_7_xxx
+@func_7_xxx = weak alias i32 (...), i32 (...)* @aliased_func_7_xxx
define i32 @aliased_func_7_xxx(...) {
ret i32 0
diff --git a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
index 92fbd20d2982..be219404d5be 100644
--- a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
+++ b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
@@ -123,4 +123,21 @@ define void @reassociate_gep_128(float* %a, i128 %i, i128 %j) {
ret void
}
+%struct.complex = type { float, float }
+
+declare void @bar(%struct.complex*)
+
+define void @different_types(%struct.complex* %input, i64 %i) {
+; CHECK-LABEL: @different_types(
+ %t1 = getelementptr %struct.complex, %struct.complex* %input, i64 %i
+ call void @bar(%struct.complex* %t1)
+ %j = add i64 %i, 5
+ %t2 = getelementptr %struct.complex, %struct.complex* %input, i64 %j, i32 0
+; CHECK: [[cast:[^ ]+]] = bitcast %struct.complex* %t1 to float*
+; CHECK-NEXT: %t2 = getelementptr float, float* [[cast]], i64 10
+; CHECK-NEXT: call void @foo(float* %t2)
+ call void @foo(float* %t2)
+ ret void
+}
+
declare void @llvm.assume(i1)
diff --git a/test/Transforms/NaryReassociate/nary-add.ll b/test/Transforms/NaryReassociate/nary-add.ll
index b3093ff6ecd6..654ef2c49617 100644
--- a/test/Transforms/NaryReassociate/nary-add.ll
+++ b/test/Transforms/NaryReassociate/nary-add.ll
@@ -17,8 +17,9 @@ define void @left_reassociate(i32 %a, i32 %b, i32 %c) {
call void @foo(i32 %1)
%2 = add i32 %b, %c
%3 = add i32 %a, %2
-; CHECK: add i32 [[BASE]], %b
+; CHECK: [[RESULT:%[a-zA-Z0-9]+]] = add i32 [[BASE]], %b
call void @foo(i32 %3)
+; CHECK-NEXT: call void @foo(i32 [[RESULT]])
ret void
}
@@ -35,8 +36,9 @@ define void @right_reassociate(i32 %a, i32 %b, i32 %c) {
call void @foo(i32 %1)
%2 = add i32 %a, %b
%3 = add i32 %2, %c
-; CHECK: add i32 [[BASE]], %b
+; CHECK: [[RESULT:%[a-zA-Z0-9]+]] = add i32 [[BASE]], %b
call void @foo(i32 %3)
+; CHECK-NEXT: call void @foo(i32 [[RESULT]])
ret void
}
diff --git a/test/Transforms/NaryReassociate/nary-mul.ll b/test/Transforms/NaryReassociate/nary-mul.ll
new file mode 100644
index 000000000000..467843c7a39a
--- /dev/null
+++ b/test/Transforms/NaryReassociate/nary-mul.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+
+declare void @foo(i32)
+
+; CHECK-LABEL: @bar(
+define void @bar(i32 %a, i32 %b, i32 %c) {
+ %1 = mul i32 %a, %c
+; CHECK: [[BASE:%[a-zA-Z0-9]+]] = mul i32 %a, %c
+ call void @foo(i32 %1)
+ %2 = mul i32 %a, %b
+ %3 = mul i32 %2, %c
+; CHECK: [[RESULT:%[a-zA-Z0-9]+]] = mul i32 [[BASE]], %b
+ call void @foo(i32 %3)
+; CHECK-NEXT: call void @foo(i32 [[RESULT]])
+ ret void
+}
+
diff --git a/test/Transforms/NaryReassociate/pr24301.ll b/test/Transforms/NaryReassociate/pr24301.ll
new file mode 100644
index 000000000000..898707831f95
--- /dev/null
+++ b/test/Transforms/NaryReassociate/pr24301.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+
+define i32 @foo(i32 %tmp4) {
+; CHECK-LABEL: @foo(
+entry:
+ %tmp5 = add i32 %tmp4, 8
+ %tmp13 = add i32 %tmp4, -128 ; deleted
+ %tmp14 = add i32 %tmp13, 8 ; => %tmp5 + -128
+ %tmp21 = add i32 119, %tmp4
+ ; do not rewrite %tmp23 against %tmp13 because %tmp13 is already deleted
+ %tmp23 = add i32 %tmp21, -128
+; CHECK: %tmp23 = add i32 %tmp21, -128
+ ret i32 %tmp23
+}
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 9fc5ad1f1008..fc1d087794d6 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -2684,8 +2684,8 @@ define {<2 x float>, <2 x float>} @"\01-[A z]"({}* %self, i8* nocapture %_cmd) n
invoke.cont:
%0 = bitcast {}* %self to i8*
%1 = tail call i8* @objc_retain(i8* %0) nounwind
- tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !2), metadata !DIExpression()), !dbg !DILocation(scope: !2)
- tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !2), metadata !DIExpression()), !dbg !DILocation(scope: !2)
+ tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !DILocalVariable(scope: !2), metadata !DIExpression()), !dbg !DILocation(scope: !2)
+ tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !DILocalVariable(scope: !2), metadata !DIExpression()), !dbg !DILocation(scope: !2)
%ivar = load i64, i64* @"OBJC_IVAR_$_A.myZ", align 8
%add.ptr = getelementptr i8, i8* %0, i64 %ivar
%tmp1 = bitcast i8* %add.ptr to float*
@@ -3018,7 +3018,7 @@ define void @test67(i8* %x) {
!0 = !{}
!1 = !{i32 1, !"Debug Info Version", i32 3}
-!2 = !DISubprogram()
+!2 = distinct !DISubprogram()
; CHECK: attributes #0 = { nounwind readnone }
; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
index db3a780f91b0..ef8d8e52d1cc 100644
--- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -34,7 +34,7 @@ target triple = "x86_64-apple-macosx10.9.0"
@"\01L_OBJC_SELECTOR_REFERENCES_5" = internal global i8* getelementptr inbounds ([14 x i8], [14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
@llvm.used = appending global [6 x i8*] [i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_" to i8*), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1" to i8*), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_5" to i8*)], section "llvm.metadata"
-define i32 @main() uwtable ssp personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) {
+define i32 @main() uwtable ssp personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) !dbg !5 {
entry:
%tmp = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", align 8, !dbg !37
%tmp1 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !dbg !37, !invariant.load !38
@@ -84,7 +84,7 @@ declare void @objc_end_catch()
declare void @objc_exception_rethrow()
-define internal fastcc void @ThrowFunc(i8* %obj) uwtable noinline ssp {
+define internal fastcc void @ThrowFunc(i8* %obj) uwtable noinline ssp !dbg !27 {
entry:
%tmp = call i8* @objc_retain(i8* %obj) nounwind
call void @llvm.dbg.value(metadata i8* %obj, i64 0, metadata !32, metadata !DIExpression()), !dbg !55
@@ -113,16 +113,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!33, !34, !35, !36, !61}
-!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: true, runtimeVersion: 2, emissionKind: 0, file: !60, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: true, runtimeVersion: 2, emissionKind: 0, file: !60, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
!1 = !{}
!3 = !{!5, !27}
-!5 = !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 10, file: !60, scope: !6, type: !7, function: i32 ()* @main, variables: !11)
+!5 = distinct !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 10, file: !60, scope: !6, type: !7, variables: !11)
!6 = !DIFile(filename: "test.m", directory: "/Volumes/Files/gottesmmcab/Radar/12906997")
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!11 = !{!12, !21, !25}
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "obj", line: 11, scope: !13, file: !6, type: !14)
+!12 = !DILocalVariable(name: "obj", line: 11, scope: !13, file: !6, type: !14)
!13 = distinct !DILexicalBlock(line: 10, column: 0, file: !60, scope: !5)
!14 = !DIDerivedType(tag: DW_TAG_typedef, name: "id", line: 11, file: !60, baseType: !15)
!15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !60, baseType: !16)
@@ -131,17 +131,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!18 = !DIDerivedType(tag: DW_TAG_member, name: "isa", size: 64, file: !60, scope: !16, baseType: !19)
!19 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, baseType: !20)
!20 = !DICompositeType(tag: DW_TAG_structure_type, name: "objc_class", flags: DIFlagFwdDecl, file: !60)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ok", line: 13, scope: !22, file: !6, type: !23)
+!21 = !DILocalVariable(name: "ok", line: 13, scope: !22, file: !6, type: !23)
!22 = distinct !DILexicalBlock(line: 12, column: 0, file: !60, scope: !13)
!23 = !DIDerivedType(tag: DW_TAG_typedef, name: "BOOL", line: 62, file: !60, baseType: !24)
!24 = !DIBasicType(tag: DW_TAG_base_type, name: "signed char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "obj2", line: 15, scope: !26, file: !6, type: !14)
+!25 = !DILocalVariable(name: "obj2", line: 15, scope: !26, file: !6, type: !14)
!26 = distinct !DILexicalBlock(line: 14, column: 0, file: !60, scope: !22)
-!27 = !DISubprogram(name: "ThrowFunc", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !60, scope: !6, type: !28, function: void (i8*)* @ThrowFunc, variables: !31)
+!27 = distinct !DISubprogram(name: "ThrowFunc", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !60, scope: !6, type: !28, variables: !31)
!28 = !DISubroutineType(types: !29)
!29 = !{null, !14}
!31 = !{!32}
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "obj", line: 4, arg: 1, scope: !27, file: !6, type: !14)
+!32 = !DILocalVariable(name: "obj", line: 4, arg: 1, scope: !27, file: !6, type: !14)
!33 = !{i32 1, !"Objective-C Version", i32 2}
!34 = !{i32 1, !"Objective-C Image Info Version", i32 0}
!35 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index 464426abfb06..cf14a1f9a663 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -819,5 +819,7 @@ entry:
ret void
}
-; CHECK: attributes [[NUW]] = { nounwind }
+
+; CHECK: attributes #0 = { argmemonly nounwind }
; CHECK: attributes #1 = { nonlazybind }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/provenance.ll b/test/Transforms/ObjCARC/provenance.ll
index aa5a932a86e9..2587c11d01ed 100644
--- a/test/Transforms/ObjCARC/provenance.ll
+++ b/test/Transforms/ObjCARC/provenance.ll
@@ -1,4 +1,4 @@
-; RUN: opt -disable-output -pa-eval %s 2>&1 | FileCheck %s
+; RUN: opt -disable-output -disable-basicaa -pa-eval %s 2>&1 | FileCheck %s
@"\01l_objc_msgSend_fixup_" = global i8 0
@g1 = global i8 0, section "__OBJC,__message_refs,literal_pointers,no_dead_strip"
diff --git a/test/Transforms/PGOProfile/Inputs/branch1.proftext b/test/Transforms/PGOProfile/Inputs/branch1.proftext
new file mode 100644
index 000000000000..3e28112706f1
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/branch1.proftext
@@ -0,0 +1,6 @@
+test_br_1
+25571299074
+2
+3
+2
+
diff --git a/test/Transforms/PGOProfile/Inputs/branch2.proftext b/test/Transforms/PGOProfile/Inputs/branch2.proftext
new file mode 100644
index 000000000000..7d9bd72b29f2
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/branch2.proftext
@@ -0,0 +1,6 @@
+test_br_2
+29667547796
+2
+1
+1
+
diff --git a/test/Transforms/PGOProfile/Inputs/criticaledge.proftext b/test/Transforms/PGOProfile/Inputs/criticaledge.proftext
new file mode 100644
index 000000000000..f369ba7c3504
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/criticaledge.proftext
@@ -0,0 +1,17 @@
+test_criticalEdge
+82323253069
+8
+2
+1
+2
+2
+0
+1
+2
+1
+
+<stdin>:bar
+12884901887
+1
+7
+
diff --git a/test/Transforms/PGOProfile/Inputs/diag.proftext b/test/Transforms/PGOProfile/Inputs/diag.proftext
new file mode 100644
index 000000000000..aaa137e3a420
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/diag.proftext
@@ -0,0 +1,5 @@
+foo
+12884999999
+1
+1
+
diff --git a/test/Transforms/PGOProfile/Inputs/landingpad.proftext b/test/Transforms/PGOProfile/Inputs/landingpad.proftext
new file mode 100644
index 000000000000..b2bd451611bf
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/landingpad.proftext
@@ -0,0 +1,14 @@
+foo
+59130013419
+4
+3
+1
+2
+0
+
+bar
+24868915205
+2
+1
+2
+
diff --git a/test/Transforms/PGOProfile/Inputs/loop1.proftext b/test/Transforms/PGOProfile/Inputs/loop1.proftext
new file mode 100644
index 000000000000..58c05fbe1676
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/loop1.proftext
@@ -0,0 +1,6 @@
+test_simple_for
+34137660316
+2
+96
+4
+
diff --git a/test/Transforms/PGOProfile/Inputs/loop2.proftext b/test/Transforms/PGOProfile/Inputs/loop2.proftext
new file mode 100644
index 000000000000..1c429ea5d5f4
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/loop2.proftext
@@ -0,0 +1,7 @@
+test_nested_for
+53929068288
+3
+33
+10
+6
+
diff --git a/test/Transforms/PGOProfile/Inputs/switch.proftext b/test/Transforms/PGOProfile/Inputs/switch.proftext
new file mode 100644
index 000000000000..7b406b87ef70
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/switch.proftext
@@ -0,0 +1,8 @@
+test_switch
+46200943743
+4
+0
+5
+2
+3
+
diff --git a/test/Transforms/PGOProfile/branch1.ll b/test/Transforms/PGOProfile/branch1.ll
new file mode 100644
index 000000000000..cc354d3425c6
--- /dev/null
+++ b/test/Transforms/PGOProfile/branch1.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/branch1.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_br_1 = private constant [9 x i8] c"test_br_1"
+
+define i32 @test_br_1(i32 %i) {
+entry:
+; GEN: entry:
+; GEN-NOT: llvm.instrprof.increment
+ %cmp = icmp sgt i32 %i, 0
+ br i1 %cmp, label %if.then, label %if.end
+; USE: br i1 %cmp, label %if.then, label %if.end
+; USE-SAME: !prof ![[BW_ENTRY:[0-9]+]]
+; USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 2, i32 1}
+
+if.then:
+; GEN: if.then:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_1, i32 0, i32 0), i64 25571299074, i32 2, i32 1)
+ %add = add nsw i32 %i, 2
+ br label %if.end
+
+if.end:
+; GEN: if.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_1, i32 0, i32 0), i64 25571299074, i32 2, i32 0)
+ %retv = phi i32 [ %add, %if.then ], [ %i, %entry ]
+ ret i32 %retv
+}
diff --git a/test/Transforms/PGOProfile/branch2.ll b/test/Transforms/PGOProfile/branch2.ll
new file mode 100644
index 000000000000..1e8bc5ec2a38
--- /dev/null
+++ b/test/Transforms/PGOProfile/branch2.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/branch2.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_br_2 = private constant [9 x i8] c"test_br_2"
+
+define i32 @test_br_2(i32 %i) {
+entry:
+; GEN: entry:
+; GEN-NOT: llvm.instrprof.increment
+ %cmp = icmp sgt i32 %i, 0
+ br i1 %cmp, label %if.then, label %if.else
+; USE: br i1 %cmp, label %if.then, label %if.else
+; USE-SAME: !prof ![[BW_ENTRY:[0-9]+]]
+; USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 1, i32 1}
+
+if.then:
+; GEN: if.then:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_2, i32 0, i32 0), i64 29667547796, i32 2, i32 0)
+ %add = add nsw i32 %i, 2
+ br label %if.end
+
+if.else:
+; GEN: if.else:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_2, i32 0, i32 0), i64 29667547796, i32 2, i32 1)
+ %sub = sub nsw i32 %i, 2
+ br label %if.end
+
+if.end:
+; GEN: if.end:
+; GEN-NOT: llvm.instrprof.increment
+ %retv = phi i32 [ %add, %if.then ], [ %sub, %if.else ]
+ ret i32 %retv
+; GEN: ret
+}
diff --git a/test/Transforms/PGOProfile/criticaledge.ll b/test/Transforms/PGOProfile/criticaledge.ll
new file mode 100644
index 000000000000..0089bbea1558
--- /dev/null
+++ b/test/Transforms/PGOProfile/criticaledge.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/criticaledge.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_criticalEdge = private constant [17 x i8] c"test_criticalEdge"
+; GEN: @__profn__stdin__bar = private constant [11 x i8] c"<stdin>:bar"
+
+define i32 @test_criticalEdge(i32 %i, i32 %j) {
+entry:
+; CHECK: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+ switch i32 %i, label %sw.default [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb1
+ i32 3, label %sw.bb2
+ i32 4, label %sw.bb2
+; CHECK: i32 3, label %entry.sw.bb2_crit_edge
+; CHECK: i32 4, label %entry.sw.bb2_crit_edge1
+ i32 5, label %sw.bb2
+ ]
+; USE: ]
+; USE-SAME: !prof ![[BW_SWITCH:[0-9]+]]
+
+; CHECK: entry.sw.bb2_crit_edge1:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 1)
+; CHECK: br label %sw.bb2
+
+; CHECK: entry.sw.bb2_crit_edge:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 0)
+; CHECK: br label %sw.bb2
+
+sw.bb:
+; GEN: sw.bb:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 5)
+ %call = call i32 @bar(i32 2)
+ br label %sw.epilog
+
+sw.bb1:
+; GEN: sw.bb1:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 4)
+ %call2 = call i32 @bar(i32 1024)
+ br label %sw.epilog
+
+sw.bb2:
+; GEN: sw.bb2:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %cmp = icmp eq i32 %j, 2
+ br i1 %cmp, label %if.then, label %if.end
+; USE: br i1 %cmp, label %if.then, label %if.end
+; USE-SAME: !prof ![[BW_SW_BB2:[0-9]+]]
+
+if.then:
+; GEN: if.then:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 2)
+ %call4 = call i32 @bar(i32 4)
+ br label %return
+
+if.end:
+; GEN: if.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 3)
+ %call5 = call i32 @bar(i32 8)
+ br label %sw.epilog
+
+sw.default:
+; GEN: sw.default:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %call6 = call i32 @bar(i32 32)
+ %cmp7 = icmp sgt i32 %j, 10
+ br i1 %cmp7, label %if.then8, label %if.end9
+; USE: br i1 %cmp7, label %if.then8, label %if.end9
+; USE-SAME: !prof ![[BW_SW_DEFAULT:[0-9]+]]
+
+if.then8:
+; GEN: if.then8:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 7)
+ %add = add nsw i32 %call6, 10
+ br label %if.end9
+
+if.end9:
+; GEN: if.end9:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 6)
+ %res.0 = phi i32 [ %add, %if.then8 ], [ %call6, %sw.default ]
+ br label %sw.epilog
+
+sw.epilog:
+; GEN: sw.epilog:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %res.1 = phi i32 [ %res.0, %if.end9 ], [ %call5, %if.end ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
+ br label %return
+
+return:
+; GEN: return:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %retval = phi i32 [ %res.1, %sw.epilog ], [ %call4, %if.then ]
+ ret i32 %retval
+}
+
+define internal i32 @bar(i32 %i) {
+entry:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__bar, i32 0, i32 0), i64 12884901887, i32 1, i32 0)
+ ret i32 %i
+}
+
+; USE: ![[BW_SWITCH]] = !{!"branch_weights", i32 2, i32 1, i32 0, i32 2, i32 1, i32 1}
+; USE: ![[BW_SW_BB2]] = !{!"branch_weights", i32 2, i32 2}
+; USE: ![[BW_SW_DEFAULT]] = !{!"branch_weights", i32 1, i32 1}
diff --git a/test/Transforms/PGOProfile/diag_mismatch.ll b/test/Transforms/PGOProfile/diag_mismatch.ll
new file mode 100644
index 000000000000..a2d0b20620f0
--- /dev/null
+++ b/test/Transforms/PGOProfile/diag_mismatch.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-profdata merge %S/Inputs/diag.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s
+
+; CHECK: Function control flow change detected (hash mismatch) foo
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo() {
+entry:
+ ret i32 0
+}
diff --git a/test/Transforms/PGOProfile/diag_no_funcprofdata.ll b/test/Transforms/PGOProfile/diag_no_funcprofdata.ll
new file mode 100644
index 000000000000..2e5ec0444b42
--- /dev/null
+++ b/test/Transforms/PGOProfile/diag_no_funcprofdata.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-profdata merge %S/Inputs/diag.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s
+
+; CHECK: No profile data available for function bar
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @bar() {
+entry:
+ ret i32 0
+}
diff --git a/test/Transforms/PGOProfile/diag_no_profile.ll b/test/Transforms/PGOProfile/diag_no_profile.ll
new file mode 100644
index 000000000000..ce7b59b8f69d
--- /dev/null
+++ b/test/Transforms/PGOProfile/diag_no_profile.ll
@@ -0,0 +1,9 @@
+; RUN: not opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo() {
+entry:
+ ret i32 0
+}
diff --git a/test/Transforms/PGOProfile/landingpad.ll b/test/Transforms/PGOProfile/landingpad.ll
new file mode 100644
index 000000000000..33fe62fbae03
--- /dev/null
+++ b/test/Transforms/PGOProfile/landingpad.ll
@@ -0,0 +1,124 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/landingpad.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@val = global i32 0, align 4
+@_ZTIi = external constant i8*
+; GEN: @__profn_bar = private constant [3 x i8] c"bar"
+; GEN: @__profn_foo = private constant [3 x i8] c"foo"
+
+define i32 @bar(i32 %i) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %rem = srem i32 %i, 3
+ %tobool = icmp ne i32 %rem, 0
+ br i1 %tobool, label %if.then, label %if.end
+; USE: br i1 %tobool, label %if.then, label %if.end
+; USE-SAME: !prof ![[BW_BAR_ENTRY:[0-9]+]]
+
+if.then:
+; GEN: if.then:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_bar, i32 0, i32 0), i64 24868915205, i32 2, i32 1)
+ %exception = call i8* @__cxa_allocate_exception(i64 4)
+ %tmp = bitcast i8* %exception to i32*
+ store i32 %i, i32* %tmp, align 16
+ call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+ unreachable
+
+if.end:
+; GEN: if.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_bar, i32 0, i32 0), i64 24868915205, i32 2, i32 0)
+ ret i32 0
+}
+
+declare i8* @__cxa_allocate_exception(i64)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+define i32 @foo(i32 %i) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %rem = srem i32 %i, 2
+ %tobool = icmp ne i32 %rem, 0
+ br i1 %tobool, label %if.then, label %if.end
+; USE: br i1 %tobool, label %if.then, label %if.end
+; USE-SAME: !prof ![[BW_FOO_ENTRY:[0-9]+]]
+
+if.then:
+; GEN: if.then:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %mul = mul nsw i32 %i, 7
+ %call = invoke i32 @bar(i32 %mul)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+; GEN: invoke.cont:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 59130013419, i32 4, i32 1)
+ br label %if.end
+
+lpad:
+; GEN: lpad:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %tmp = landingpad { i8*, i32 }
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ %tmp1 = extractvalue { i8*, i32 } %tmp, 0
+ %tmp2 = extractvalue { i8*, i32 } %tmp, 1
+ br label %catch.dispatch
+
+catch.dispatch:
+; GEN: catch.dispatch:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %tmp3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
+ %matches = icmp eq i32 %tmp2, %tmp3
+ br i1 %matches, label %catch, label %eh.resume
+; USE: br i1 %matches, label %catch, label %eh.resume
+; USE-SAME: !prof ![[BW_CATCH_DISPATCH:[0-9]+]]
+
+catch:
+; GEN: catch:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 59130013419, i32 4, i32 2)
+ %tmp4 = call i8* @__cxa_begin_catch(i8* %tmp1)
+ %tmp5 = bitcast i8* %tmp4 to i32*
+ %tmp6 = load i32, i32* %tmp5, align 4
+ %tmp7 = load i32, i32* @val, align 4
+ %sub = sub nsw i32 %tmp7, %tmp6
+ store i32 %sub, i32* @val, align 4
+ call void @__cxa_end_catch()
+ br label %try.cont
+
+try.cont:
+; GEN: try.cont:
+; GEN-NOT: call void @llvm.instrprof.increment
+ ret i32 -1
+
+if.end:
+; GEN: if.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 59130013419, i32 4, i32 0)
+ %tmp8 = load i32, i32* @val, align 4
+ %add = add nsw i32 %tmp8, %i
+ store i32 %add, i32* @val, align 4
+ br label %try.cont
+
+eh.resume:
+; GEN: eh.resume:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 59130013419, i32 4, i32 3)
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %tmp1, 0
+ %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %tmp2, 1
+ resume { i8*, i32 } %lpad.val3
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+; USE: ![[BW_BAR_ENTRY]] = !{!"branch_weights", i32 2, i32 1}
+; USE: ![[BW_FOO_ENTRY]] = !{!"branch_weights", i32 3, i32 2}
+; USE: ![[BW_CATCH_DISPATCH]] = !{!"branch_weights", i32 2, i32 0}
diff --git a/test/Transforms/PGOProfile/loop1.ll b/test/Transforms/PGOProfile/loop1.ll
new file mode 100644
index 000000000000..aa5aa86b1e54
--- /dev/null
+++ b/test/Transforms/PGOProfile/loop1.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/loop1.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_simple_for = private constant [15 x i8] c"test_simple_for"
+
+define i32 @test_simple_for(i32 %n) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+ br label %for.cond
+
+for.cond:
+; GEN: for.cond:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %i = phi i32 [ 0, %entry ], [ %inc1, %for.inc ]
+ %sum = phi i32 [ 1, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i, %n
+ br i1 %cmp, label %for.body, label %for.end
+; USE: br i1 %cmp, label %for.body, label %for.end
+; USE-SAME: !prof ![[BW_FOR_COND:[0-9]+]]
+; USE: ![[BW_FOR_COND]] = !{!"branch_weights", i32 96, i32 4}
+
+for.body:
+; GEN: for.body:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %inc = add nsw i32 %sum, 1
+ br label %for.inc
+
+for.inc:
+; GEN: for.inc:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_simple_for, i32 0, i32 0), i64 34137660316, i32 2, i32 0)
+ %inc1 = add nsw i32 %i, 1
+ br label %for.cond
+
+for.end:
+; GEN: for.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_simple_for, i32 0, i32 0), i64 34137660316, i32 2, i32 1)
+ ret i32 %sum
+}
diff --git a/test/Transforms/PGOProfile/loop2.ll b/test/Transforms/PGOProfile/loop2.ll
new file mode 100644
index 000000000000..ec3e16d461bc
--- /dev/null
+++ b/test/Transforms/PGOProfile/loop2.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/loop2.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_nested_for = private constant [15 x i8] c"test_nested_for"
+
+define i32 @test_nested_for(i32 %r, i32 %s) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+ br label %for.cond.outer
+
+for.cond.outer:
+; GEN: for.cond.outer:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %i.0 = phi i32 [ 0, %entry ], [ %inc.2, %for.inc.outer ]
+ %sum.0 = phi i32 [ 1, %entry ], [ %sum.1, %for.inc.outer ]
+ %cmp = icmp slt i32 %i.0, %r
+ br i1 %cmp, label %for.body.outer, label %for.end.outer
+; USE: br i1 %cmp, label %for.body.outer, label %for.end.outer
+; USE-SAME: !prof ![[BW_FOR_COND_OUTER:[0-9]+]]
+
+for.body.outer:
+; GEN: for.body.outer:
+; GEN-NOT: call void @llvm.instrprof.increment
+ br label %for.cond.inner
+
+for.cond.inner:
+; GEN: for.cond.inner:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %j.0 = phi i32 [ 0, %for.body.outer ], [ %inc.1, %for.inc.inner ]
+ %sum.1 = phi i32 [ %sum.0, %for.body.outer ], [ %inc, %for.inc.inner ]
+ %cmp2 = icmp slt i32 %j.0, %s
+ br i1 %cmp2, label %for.body.inner, label %for.end.inner
+; USE: br i1 %cmp2, label %for.body.inner, label %for.end.inner
+; USE-SAME: !prof ![[BW_FOR_COND_INNER:[0-9]+]]
+
+for.body.inner:
+; GEN: for.body.inner:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %inc = add nsw i32 %sum.1, 1
+ br label %for.inc.inner
+
+for.inc.inner:
+; GEN: for.inc.inner:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_nested_for, i32 0, i32 0), i64 53929068288, i32 3, i32 0)
+ %inc.1 = add nsw i32 %j.0, 1
+ br label %for.cond.inner
+
+for.end.inner:
+; GEN: for.end.inner:
+ br label %for.inc.outer
+
+for.inc.outer:
+; GEN: for.inc.outer:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_nested_for, i32 0, i32 0), i64 53929068288, i32 3, i32 1)
+ %inc.2 = add nsw i32 %i.0, 1
+ br label %for.cond.outer
+
+for.end.outer:
+; GEN: for.end.outer:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_nested_for, i32 0, i32 0), i64 53929068288, i32 3, i32 2)
+ ret i32 %sum.0
+}
+
+; USE-DAG: ![[BW_FOR_COND_OUTER]] = !{!"branch_weights", i32 10, i32 6}
+; USE-DAG: ![[BW_FOR_COND_INNER]] = !{!"branch_weights", i32 33, i32 10}
+
diff --git a/test/Transforms/PGOProfile/single_bb.ll b/test/Transforms/PGOProfile/single_bb.ll
new file mode 100644
index 000000000000..f904d09b8e7a
--- /dev/null
+++ b/test/Transforms/PGOProfile/single_bb.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_single_bb = private constant [9 x i8] c"single_bb"
+
+define i32 @single_bb() {
+entry:
+; GEN: entry:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_single_bb, i32 0, i32 0), i64 12884901887, i32 1, i32 0)
+ ret i32 0
+}
diff --git a/test/Transforms/PGOProfile/switch.ll b/test/Transforms/PGOProfile/switch.ll
new file mode 100644
index 000000000000..3177dc0bd040
--- /dev/null
+++ b/test/Transforms/PGOProfile/switch.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/switch.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_switch = private constant [11 x i8] c"test_switch"
+
+define void @test_switch(i32 %i) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+ switch i32 %i, label %sw.default [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb1
+ i32 3, label %sw.bb2
+ ]
+; USE: ]
+; USE-SAME: !prof ![[BW_SWITCH:[0-9]+]]
+; USE: ![[BW_SWITCH]] = !{!"branch_weights", i32 3, i32 2, i32 0, i32 5}
+
+sw.bb:
+; GEN: sw.bb:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn_test_switch, i32 0, i32 0), i64 46200943743, i32 4, i32 2)
+ br label %sw.epilog
+
+sw.bb1:
+; GEN: sw.bb1:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn_test_switch, i32 0, i32 0), i64 46200943743, i32 4, i32 0)
+ br label %sw.epilog
+
+sw.bb2:
+; GEN: sw.bb2:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn_test_switch, i32 0, i32 0), i64 46200943743, i32 4, i32 1)
+ br label %sw.epilog
+
+sw.default:
+; GEN: sw.default:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn_test_switch, i32 0, i32 0), i64 46200943743, i32 4, i32 3)
+ br label %sw.epilog
+
+sw.epilog:
+; GEN: sw.epilog:
+; GEN-NOT: call void @llvm.instrprof.increment
+ ret void
+; GEN: ret void
+}
diff --git a/test/Transforms/PlaceSafepoints/basic.ll b/test/Transforms/PlaceSafepoints/basic.ll
index 32aa4da68f21..8cdbc217b849 100644
--- a/test/Transforms/PlaceSafepoints/basic.ll
+++ b/test/Transforms/PlaceSafepoints/basic.ll
@@ -74,7 +74,7 @@ define i1 @test_call_with_result() gc "statepoint-example" {
; CHECK: gc.statepoint.p0f_isVoidf
; CHECK: gc.statepoint.p0f_i1i1f
; CHECK: (i64 2882400000, i32 0, i1 (i1)* @i1_return_i1, i32 1, i32 0, i1 false, i32 0, i32 0)
-; CHECK: %call1.2 = call i1 @llvm.experimental.gc.result.i1
+; CHECK: %call12 = call i1 @llvm.experimental.gc.result.i1
entry:
%call1 = tail call i1 (i1) @i1_return_i1(i1 false)
ret i1 %call1
diff --git a/test/Transforms/PlaceSafepoints/call_gc_result.ll b/test/Transforms/PlaceSafepoints/call_gc_result.ll
index d78a0989c3b1..f2929bfd58ab 100644
--- a/test/Transforms/PlaceSafepoints/call_gc_result.ll
+++ b/test/Transforms/PlaceSafepoints/call_gc_result.ll
@@ -21,8 +21,8 @@ branch2:
merge:
;; CHECK: %phi = phi i32 [ %a, %branch2 ], [ %b, %branch1 ]
-;; CHECK-NEXT: %safepoint_token.1 = call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @foo, i32 0, i32 0, i32 0, i32 0)
-;; CHECK-NEXT: %ret.2 = call i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token.1)
+;; CHECK-NEXT: %safepoint_token1 = call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @foo, i32 0, i32 0, i32 0, i32 0)
+;; CHECK-NEXT: %ret2 = call i32 @llvm.experimental.gc.result.i32(token %safepoint_token1)
%phi = phi i32 [ %a, %branch2 ], [ %b, %branch1 ]
%ret = call i32 @foo()
ret i32 %ret
diff --git a/test/Transforms/PlaceSafepoints/finite-loops.ll b/test/Transforms/PlaceSafepoints/finite-loops.ll
index 3cc7158afcfe..b98073d6a6e6 100644
--- a/test/Transforms/PlaceSafepoints/finite-loops.ll
+++ b/test/Transforms/PlaceSafepoints/finite-loops.ll
@@ -1,6 +1,7 @@
; Tests to ensure that we are not placing backedge safepoints in
; loops which are clearly finite.
-;; RUN: opt %s -place-safepoints -S | FileCheck %s
+;; RUN: opt %s -place-safepoints -spp-counted-loop-trip-width=32 -S | FileCheck %s
+;; RUN: opt %s -place-safepoints -spp-counted-loop-trip-width=64 -S | FileCheck %s -check-prefix=COUNTED-64
; A simple counted loop with trivially known range
@@ -10,6 +11,7 @@ define void @test1(i32) gc "statepoint-example" {
; CHECK: statepoint
; CHECK-LABEL: loop
; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
entry:
br label %loop
@@ -31,6 +33,7 @@ define void @test2(i32) gc "statepoint-example" {
; CHECK: statepoint
; CHECK-LABEL: loop
; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
entry:
br label %loop
@@ -55,6 +58,7 @@ define void @test3(i8 %upper) gc "statepoint-example" {
; CHECK: statepoint
; CHECK-LABEL: loop
; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
entry:
br label %loop
@@ -69,6 +73,65 @@ exit:
ret void
}
+; The range is a 64 bit value
+define void @test4(i64 %upper) gc "statepoint-example" {
+; CHECK-LABEL: test4
+; CHECK-LABEL: entry
+; CHECK: statepoint
+; CHECK-LABEL: loop
+; CHECK: statepoint
+; CHECK-LABEL: exit
+
+; COUNTED-64-LABEL: test4
+; COUNTED-64-LABEL: entry
+; COUNTED-64: statepoint
+; COUNTED-64-LABEL: loop
+; COUNTED-64-NOT: statepoint
+; COUNTED-64-LABEL: exit
+
+entry:
+ br label %loop
+
+loop:
+ %counter = phi i64 [ 0 , %entry ], [ %counter.inc , %loop ]
+ %counter.inc = add i64 %counter, 1
+ %counter.cmp = icmp slt i64 %counter.inc, %upper
+ br i1 %counter.cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; This loop can run infinitely (for %upper == INT64_MAX) so it needs a
+; safepoint.
+define void @test5(i64 %upper) gc "statepoint-example" {
+; CHECK-LABEL: test5
+; CHECK-LABEL: entry
+; CHECK: statepoint
+; CHECK-LABEL: loop
+; CHECK: statepoint
+; CHECK-LABEL: exit
+
+; COUNTED-64-LABEL: test5
+; COUNTED-64-LABEL: entry
+; COUNTED-64: statepoint
+; COUNTED-64-LABEL: loop
+; COUNTED-64: statepoint
+; COUNTED-64-LABEL: exit
+
+entry:
+ br label %loop
+
+loop:
+ %counter = phi i64 [ 0 , %entry ], [ %counter.inc , %loop ]
+ %counter.inc = add i64 %counter, 1
+ %counter.cmp = icmp sle i64 %counter.inc, %upper
+ br i1 %counter.cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+
; This function is inlined when inserting a poll.
declare void @do_safepoint()
diff --git a/test/Transforms/PlaceSafepoints/patchable-statepoints.ll b/test/Transforms/PlaceSafepoints/patchable-statepoints.ll
index 9387f42bf0ab..2303ac7ef515 100644
--- a/test/Transforms/PlaceSafepoints/patchable-statepoints.ll
+++ b/test/Transforms/PlaceSafepoints/patchable-statepoints.ll
@@ -7,7 +7,7 @@ define void @test_id() gc "statepoint-example" personality i32 ()* @personality_
; CHECK-LABEL: @test_id(
entry:
; CHECK-LABEL: entry:
-; CHECK: invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 100, i32 0, void ()* @f
+; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 100, i32 0, void ()* @f
invoke void @f() "statepoint-id"="100" to label %normal_return unwind label %exceptional_return
normal_return:
@@ -22,7 +22,7 @@ define void @test_num_patch_bytes() gc "statepoint-example" personality i32 ()*
; CHECK-LABEL: @test_num_patch_bytes(
entry:
; CHECK-LABEL: entry:
-; CHECK: invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 99, void ()* null,
+; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 99, void ()* @f,
invoke void @f() "statepoint-num-patch-bytes"="99" to label %normal_return unwind label %exceptional_return
normal_return:
diff --git a/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll b/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll
index 6048f63c7f7b..eaefefa7ad1d 100644
--- a/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll
+++ b/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll
@@ -6,7 +6,7 @@
define i64 addrspace(1)* @test_invoke_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @personality {
; CHECK-LABEL: @test_invoke_format(
; CHECK-LABEL: entry:
-; CHECK: invoke coldcc i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: invoke coldcc token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
entry:
%ret_val = invoke coldcc i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
to label %normal_return unwind label %exceptional_return
@@ -23,7 +23,7 @@ exceptional_return:
define i64 addrspace(1)* @test_call_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" {
; CHECK-LABEL: @test_call_format(
; CHECK-LABEL: entry:
-; CHECK: call coldcc i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: call coldcc token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
entry:
%ret_val = call coldcc i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
ret i64 addrspace(1)* %ret_val
diff --git a/test/Transforms/PlaceSafepoints/statepoint-format.ll b/test/Transforms/PlaceSafepoints/statepoint-format.ll
index 496091f552d1..c3712a3ace00 100644
--- a/test/Transforms/PlaceSafepoints/statepoint-format.ll
+++ b/test/Transforms/PlaceSafepoints/statepoint-format.ll
@@ -6,7 +6,7 @@
define i64 addrspace(1)* @test_invoke_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @personality {
; CHECK-LABEL: @test_invoke_format(
; CHECK-LABEL: entry:
-; CHECK: invoke i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: invoke token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
entry:
%ret_val = invoke i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
to label %normal_return unwind label %exceptional_return
@@ -23,7 +23,7 @@ exceptional_return:
define i64 addrspace(1)* @test_call_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" {
; CHECK-LABEL: @test_call_format(
; CHECK-LABEL: entry:
-; CHECK: call i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: call token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
entry:
%ret_val = call i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
ret i64 addrspace(1)* %ret_val
diff --git a/test/Transforms/PruneEH/operand-bundles.ll b/test/Transforms/PruneEH/operand-bundles.ll
new file mode 100644
index 000000000000..efe8f62a8fb2
--- /dev/null
+++ b/test/Transforms/PruneEH/operand-bundles.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -prune-eh -S | FileCheck %s
+
+declare void @nounwind() nounwind
+
+define internal void @foo() {
+ call void @nounwind()
+ ret void
+}
+
+define i32 @caller() personality i32 (...)* @__gxx_personality_v0 {
+; CHECK-LABEL: @caller(
+; CHECK-NOT: invoke
+; CHECK: call void @foo() [ "foo"(i32 0, i8 1) ]
+ invoke void @foo() [ "foo"(i32 0, i8 1) ]
+ to label %Normal unwind label %Except
+
+Normal: ; preds = %0
+ ret i32 0
+
+Except: ; preds = %0
+ landingpad { i8*, i32 }
+ catch i8* null
+ ret i32 1
+}
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/Reassociate/fast-ReassociateVector.ll b/test/Transforms/Reassociate/fast-ReassociateVector.ll
index 9fbb5ccfe9a2..fb76b9d990b0 100644
--- a/test/Transforms/Reassociate/fast-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/fast-ReassociateVector.ll
@@ -16,9 +16,9 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
; CHECK-LABEL: @test2
-; CHECK-NEXT: fadd fast <2 x float> %c, %b
-; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2
-; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast <2 x float> %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast <2 x float> %a, %a
+; CHECK-NEXT: fmul fast <2 x float> [[TMP2]], [[TMP1]]
; CHECK-NEXT: ret <2 x float>
%t0 = fmul fast <2 x float> %a, %b
@@ -133,8 +133,8 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
; Check x*y+y*x -> x*y*2.
define <2 x double> @test11(<2 x double> %x, <2 x double> %y) {
; CHECK-LABEL: @test11
-; CHECK-NEXT: %factor = fmul fast <2 x double> %y, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %x
+; CHECK-NEXT: %factor = fmul fast <2 x double> %x, <double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %y
; CHECK-NEXT: ret <2 x double> %tmp1
%1 = fmul fast <2 x double> %x, %y
diff --git a/test/Transforms/Reassociate/fast-basictest.ll b/test/Transforms/Reassociate/fast-basictest.ll
index 64b74e3e8c16..c8a2bd9c1935 100644
--- a/test/Transforms/Reassociate/fast-basictest.ll
+++ b/test/Transforms/Reassociate/fast-basictest.ll
@@ -108,7 +108,7 @@ define float @test7(float %A, float %B, float %C) {
; CHECK-LABEL: @test7
; CHECK-NEXT: fadd fast float %C, %B
; CHECK-NEXT: fmul fast float %A, %A
-; CHECK-NEXT: fmul fast float %1, %tmp2
+; CHECK-NEXT: fmul fast float %tmp3, %tmp2
; CHECK-NEXT: ret float
%aa = fmul fast float %A, %A
diff --git a/test/Transforms/Reassociate/fast-fp-commute.ll b/test/Transforms/Reassociate/fast-fp-commute.ll
index ad89607a21e4..6565bbb3d201 100644
--- a/test/Transforms/Reassociate/fast-fp-commute.ll
+++ b/test/Transforms/Reassociate/fast-fp-commute.ll
@@ -33,8 +33,8 @@ define float @test2(float %x, float %y) {
define float @test3(float %x, float %y) {
; CHECK-LABEL: test3
-; CHECK-NEXT: %factor = fmul fast float %y, 2.000000e+00
-; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x
+; CHECK-NEXT: %factor = fmul fast float %x, 2.000000e+00
+; CHECK-NEXT: %tmp1 = fmul fast float %factor, %y
; CHECK-NEXT: ret float %tmp1
%1 = fmul fast float %x, %y
diff --git a/test/Transforms/Reassociate/fast-multistep.ll b/test/Transforms/Reassociate/fast-multistep.ll
index 45e15c7f3539..aea997cdcbda 100644
--- a/test/Transforms/Reassociate/fast-multistep.ll
+++ b/test/Transforms/Reassociate/fast-multistep.ll
@@ -3,9 +3,9 @@
define float @fmultistep1(float %a, float %b, float %c) {
; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
; CHECK-LABEL: @fmultistep1
-; CHECK-NEXT: fadd fast float %c, %b
-; CHECK-NEXT: fmul fast float %a, %tmp2
-; CHECK-NEXT: fmul fast float %tmp3, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast float %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast float %a, %a
+; CHECK-NEXT: fmul fast float [[TMP2]], [[TMP1]]
; CHECK-NEXT: ret float
%t0 = fmul fast float %a, %b
diff --git a/test/Transforms/Reassociate/fp-expr.ll b/test/Transforms/Reassociate/fp-expr.ll
new file mode 100644
index 000000000000..5af3b1991c9e
--- /dev/null
+++ b/test/Transforms/Reassociate/fp-expr.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -reassociate < %s | FileCheck %s
+
+define void @test1() {
+; CHECK-LABEL: @test1
+; CHECK: call
+; CHECK: fsub
+; CHECK: fadd
+ %tmp = tail call <4 x float> @blam()
+ %tmp23 = fsub fast <4 x float> undef, %tmp
+ %tmp24 = fadd fast <4 x float> %tmp23, undef
+ tail call void @wombat(<4 x float> %tmp24)
+ ret void
+}
+
+define half @test2() {
+; CHECK-LABEL: @test2
+; CHECK: fsub
+; CHECK: fsub
+; CHECK: fadd
+ %tmp15 = fsub fast half undef, undef
+ %tmp17 = fsub fast half undef, %tmp15
+ %tmp18 = fadd fast half undef, %tmp17
+ ret half %tmp18
+}
+
+
+
+; Function Attrs: optsize
+declare <4 x float> @blam()
+
+; Function Attrs: optsize
+declare void @wombat(<4 x float>)
+
diff --git a/test/Transforms/Reassociate/multistep.ll b/test/Transforms/Reassociate/multistep.ll
index c499646a8b6a..5685bb949537 100644
--- a/test/Transforms/Reassociate/multistep.ll
+++ b/test/Transforms/Reassociate/multistep.ll
@@ -8,9 +8,9 @@ define i64 @multistep1(i64 %a, i64 %b, i64 %c) {
%t2 = mul i64 %a, %c
%t3 = mul i64 %a, %t2 ; a*(a*c)
%t4 = add i64 %t1, %t3
-; CHECK-NEXT: add i64 %c, %b
-; CHECK-NEXT: mul i64 %a, %tmp{{.*}}
-; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = add i64 %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = mul i64 %a, %a
+; CHECK-NEXT: mul i64 [[TMP2]], [[TMP1]]
; CHECK-NEXT: ret
ret i64 %t4
}
diff --git a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
new file mode 100644
index 000000000000..c2cdffce61e4
--- /dev/null
+++ b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+; CHECK-LABEL: faddsubAssoc1
+; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
+; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500
+; CHECK: fsub fast half [[TMP2]], [[TMP1]]
+; CHECK: ret
+; Input is A op (B op C)
+define half @faddsubAssoc1(half %a, half %b) {
+ %tmp1 = fmul fast half %b, 0xH4200 ; 3*b
+ %tmp2 = fmul fast half %a, 0xH4500 ; 5*a
+ %tmp3 = fmul fast half %b, 0xH4000 ; 2*b
+ %tmp4 = fsub fast half %tmp2, %tmp1 ; 5 * a - 3 * b
+ %tmp5 = fsub fast half %tmp3, %tmp4 ; 2 * b - ( 5 * a - 3 * b)
+ ret half %tmp5 ; = 5 * (b - a)
+}
+
+; CHECK-LABEL: faddsubAssoc2
+; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
+; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH3C00
+; CHECK: fadd fast half [[TMP2]], [[TMP1]]
+; CHECK: ret
+; Input is (A op B) op C
+define half @faddsubAssoc2(half %a, half %b) {
+ %tmp1 = fmul fast half %b, 0xH4200 ; 3*b
+ %tmp2 = fmul fast half %a, 0xH4500 ; 5*a
+ %tmp3 = fmul fast half %b, 0xH4000 ; 2*b
+ %tmp4 = fadd fast half %tmp2, %tmp1 ; 5 * a + 3 * b
+ %tmp5 = fsub fast half %tmp4, %tmp3 ; (5 * a + 3 * b) - (2 * b)
+ ret half %tmp5 ; = 5 * a + b
+}
+
diff --git a/test/Transforms/Reassociate/secondary.ll b/test/Transforms/Reassociate/secondary.ll
index a52000ada537..388cd6bcb6fe 100644
--- a/test/Transforms/Reassociate/secondary.ll
+++ b/test/Transforms/Reassociate/secondary.ll
@@ -6,7 +6,7 @@
; CHECK: define
; CHECK-NOT: undef
-; CHECK: %factor = mul i32 %tmp3, -2
+; CHECK: %factor = mul i32 %tmp3.neg, 2
; CHECK-NOT: undef
; CHECK: }
diff --git a/test/Transforms/Reassociate/vaarg_movable.ll b/test/Transforms/Reassociate/vaarg_movable.ll
new file mode 100644
index 000000000000..be4fe121fae9
--- /dev/null
+++ b/test/Transforms/Reassociate/vaarg_movable.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S -reassociate -die < %s | FileCheck %s
+
+; The two va_arg instructions depend on the memory/context, are therfore not
+; identical and the sub should not be optimized to 0 by reassociate.
+;
+; CHECK-LABEL: @func(
+; ...
+; CHECK: %v0 = va_arg i8** %varargs, i32
+; CHECK: %v1 = va_arg i8** %varargs, i32
+; CHECK: %v0.neg = sub i32 0, %v0
+; CHECK: %sub = add i32 %v0.neg, 1
+; CHECK: %add = add i32 %sub, %v1
+; ...
+; CHECK: ret i32 %add
+define i32 @func(i32 %dummy, ...) {
+ %varargs = alloca i8*, align 8
+ %varargs1 = bitcast i8** %varargs to i8*
+ call void @llvm.va_start(i8* %varargs1)
+ %v0 = va_arg i8** %varargs, i32
+ %v1 = va_arg i8** %varargs, i32
+ %sub = sub nsw i32 %v1, %v0
+ %add = add nsw i32 %sub, 1
+ call void @llvm.va_end(i8* %varargs1)
+ ret i32 %add
+}
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll
index a22689805fb5..0bed6f358808 100644
--- a/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) {
%xor1 = xor i32 %xor, %and
ret i32 %xor1
; CHECK-LABEL: @xor_special2(
-; CHECK: %xor = xor i32 %y, 123
-; CHECK: %xor1 = xor i32 %xor, %x
+; CHECK: %xor = xor i32 %x, 123
+; CHECK: %xor1 = xor i32 %xor, %y
; CHECK: ret i32 %xor1
}
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
index 1ff1abedd31c..3fd7fd9282f1 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
@@ -1,6 +1,6 @@
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %merged_value base %base_phi
+; CHECK: derived %merged_value base %merged_value.base
declare void @site_for_call_safpeoint()
@@ -18,11 +18,11 @@ there:
merge:
; CHECK-LABEL: merge:
-; CHECK: %base_phi = phi i64 addrspace(1)* [ %base_obj_x, %here ], [ %base_obj_y, %there ]
+; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %here ], [ %base_obj_y, %there ]
%merged_value = phi i64 addrspace(1)* [ %x, %here ], [ %y, %there ]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %merged_value
}
declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
index f4292a998485..19f1423eea03 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
@@ -1,8 +1,9 @@
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %next_x base %base_obj_x
-; CHECK: derived %next_y base %base_obj_y
-; CHECK: derived %next base %base_phi
+; CHECK: Base Pairs (w/o Relocation):
+; CHECK-DAG: derived %next base %next.base
+; CHECK-DAG: derived %next_x base %base_obj_x
+; CHECK-DAG: derived %next_y base %base_obj_y
declare i1 @runtime_value()
declare void @do_safepoint()
@@ -30,8 +31,8 @@ false:
merge:
%next = phi i64 addrspace(1)* [ %next_x, %true ], [ %next_y, %false ]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
index 2a7a7444adec..a28c925f7828 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
@@ -19,8 +19,8 @@ loop: ; preds = %loop, %entry
; CHECK-DAG: [ %next.relocated.casted, %loop ]
%current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
%next = getelementptr i64, i64 addrspace(1)* %current, i32 1
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll
new file mode 100644
index 000000000000..5ebff642347d
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll
@@ -0,0 +1,20 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %select base @global
+
+@global = external addrspace(1) global i8
+
+define i8 @test(i1 %cond) gc "statepoint-example" {
+ %derived1 = getelementptr i8, i8 addrspace(1)* @global, i64 1
+ %derived2 = getelementptr i8, i8 addrspace(1)* @global, i64 2
+ %select = select i1 %cond, i8 addrspace(1)* %derived1, i8 addrspace(1)* %derived2
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @extern, i32 0, i32 0, i32 0, i32 0)
+; CHECK-NOT: relocate
+; CHECK: %load = load i8, i8 addrspace(1)* %select
+ %load = load i8, i8 addrspace(1)* %select
+ ret i8 %load
+}
+
+declare void @extern() gc "statepoint-example"
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll
new file mode 100644
index 000000000000..8e43e638f989
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll
@@ -0,0 +1,19 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %derived base @global
+
+@global = external addrspace(1) global i8
+
+define i8 @test(i64 %offset) gc "statepoint-example" {
+ %derived = getelementptr i8, i8 addrspace(1)* @global, i64 %offset
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @extern, i32 0, i32 0, i32 0, i32 0)
+; CHECK-NOT: relocate
+; CHECK-NOT: remat
+; CHECK: %load = load i8, i8 addrspace(1)* %derived
+ %load = load i8, i8 addrspace(1)* %derived
+ ret i8 %load
+}
+
+declare void @extern() gc "statepoint-example"
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
index c2877d8b5393..802ce5d79a33 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
@@ -13,9 +13,9 @@ there:
merge:
%merged_value = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %derived_obj, %there ]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %merged_value
}
declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
index f72201b5138e..e0035d353887 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
@@ -12,9 +12,9 @@ loop:
%current.i32 = bitcast i64 addrspace(1)* %current to i32 addrspace(1)*
%next.i32 = getelementptr i32, i32 addrspace(1)* %current.i32, i32 1
%next.i64 = bitcast i32 addrspace(1)* %next.i32 to i64 addrspace(1)*
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
index 90d91d21d7b4..4e0bb14cb453 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
@@ -1,7 +1,7 @@
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %obj_to_consume base %base_phi
+; CHECK: derived %obj_to_consume base %obj_to_consume
declare void @foo()
declare i64 addrspace(1)* @generate_obj()
@@ -13,10 +13,10 @@ entry:
loop:
; CHECK: loop:
-; CHECK: %safepoint_token1 = call i32 (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
+; CHECK: %safepoint_token1 = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
; CHECK-NEXT: %obj2 = call i64 addrspace(1)* @llvm.experimental.gc.result
- %safepoint_token1 = call i32 (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- %obj2 = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32 %safepoint_token1)
+ %safepoint_token1 = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %obj2 = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token %safepoint_token1)
switch i32 %condition, label %dest_a [
i32 0, label %dest_b
i32 1, label %dest_c
@@ -33,21 +33,20 @@ dest_c:
merge:
; CHECK: merge:
-; CHECK: %base_phi = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
; CHECK: %obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
%obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
- %safepoint_token3 = call i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @consume_obj, i32 1, i32 0, i64 addrspace(1)* %obj_to_consume, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token3 = call token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @consume_obj, i32 1, i32 0, i64 addrspace(1)* %obj_to_consume, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %merge.split
merge.split: ; preds = %merge
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
; Function Attrs: nounwind
-declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32) #0
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_p1i64f(i64, i32, i64 addrspace(1)* ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) \ No newline at end of file
+declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token) #0
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_p1i64f(i64, i32, i64 addrspace(1)* ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
index 9b73377e31cb..c5acd2962f9e 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
@@ -1,6 +1,6 @@
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %merged_value base %base_phi
+; CHECK: derived %merged_value base %merged_value.base
declare void @foo()
@@ -20,11 +20,11 @@ there:
merge:
; CHECK: merge:
-; CHECK: %base_phi = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %base_obj_y, %there ]
+; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %base_obj_y, %there ]
; CHECK-NEXT: %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
%merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %merged_value
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
index 0b785d72d63c..95a42846a2fe 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
@@ -1,6 +1,6 @@
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %merged_value base %base_phi
+; CHECK: derived %merged_value base %merged_value.base
declare void @site_for_call_safpeoint()
@@ -30,12 +30,12 @@ there:
merge:
; CHECK: merge:
-; CHECK: %base_phi = phi i64 addrspace(1)* [ %base_obj_x, %merge_here ], [ %base_obj_y, %there ]
+; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %merge_here ], [ %base_obj_y, %there ]
; CHECK-NEXT: %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
%merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %merged_value
}
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
index 467429b4d27d..49cf20eab191 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
@@ -1,6 +1,6 @@
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %merged_value base %base_phi
+; CHECK: derived %merged_value base %merged_value.base
declare void @site_for_call_safpeoint()
@@ -24,7 +24,7 @@ bump_here_b:
merge_here:
; CHECK: merge_here:
-; CHECK-DAG: %base_phi
+; CHECK-DAG: %x.base
; CHECK-DAG: phi i64 addrspace(1)*
; CHECK-DAG: [ %base_obj_x, %bump_here_a ]
; CHECK-DAG: [ %base_obj_y, %bump_here_b ]
@@ -37,16 +37,16 @@ there:
merge:
; CHECK: merge:
-; CHECK-DAG: %base_phi1
+; CHECK-DAG: %merged_value.base
; CHECK-DAG: phi i64 addrspace(1)*
; CHECK-DAG: %merge_here
; CHECK-DAG: [ %base_obj_y, %there ]
; CHECK: %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
%merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %merged_value
}
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
index b89be7daa67b..e5ef42dda24b 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
@@ -24,7 +24,7 @@ check_for_null:
loop_back:
%next_element_ptr = getelementptr i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr, i32 1
%next_index = add i32 %index, 1
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop_check
not_found:
@@ -35,4 +35,4 @@ found:
}
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
index 848633b2a275..946d89a08e27 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
@@ -13,9 +13,9 @@ loop:
%condition = call i1 @runtime_value()
%maybe_next = getelementptr i64, i64 addrspace(1)* %current, i32 1
%next = select i1 %condition, i64 addrspace(1)* %maybe_next, i64 addrspace(1)* %current
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
index 879f3f9e24da..cd0473a67678 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
@@ -16,7 +16,7 @@ loop:
; CHECK-DAG: [ %obj.relocated.casted, %loop ]
; CHECK-DAG: [ %obj, %entry ]
call void @use_obj(i64 addrspace(1)* %obj)
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
@@ -55,9 +55,9 @@ define i64 addrspace(1)* @test1(i32 %caller, i8 addrspace(1)* %a, i8 addrspace(1
merge:
; CHECK: merge:
-; CHECK-NEXT: %base_phi = phi i64 addrspace(1)* [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_R]], %right ], !is_base_value !0
+; CHECK-NEXT: %value.base = phi i64 addrspace(1)* [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_R]], %right ], !is_base_value !0
%value = phi i64 addrspace(1)* [ %a.cast, %left], [ %a.cast, %left], [ %a.cast, %left], [ %b.cast, %right]
- %safepoint_token = call i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @parse_point, i32 1, i32 0, i64 addrspace(1)* %value, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @parse_point, i32 1, i32 0, i64 addrspace(1)* %value, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %value
}
@@ -74,16 +74,15 @@ entry:
loop: ; preds = %loop, %entry
; CHECK-LABEL: loop
-; CHECK: %base_phi = phi i64 addrspace(1)*
+; CHECK: %current.base = phi i64 addrspace(1)*
; CHECK-DAG: [ %base_obj, %entry ]
; Given the two selects are equivelent, so are their base phis - ideally,
; we'd have commoned these, but that's a missed optimization, not correctness.
-; CHECK-DAG: [ [[DISCARD:%base_select.*.relocated.casted]], %loop ]
-; CHECK-NOT: base_phi2
+; CHECK-DAG: [ [[DISCARD:%.*.base.relocated.casted]], %loop ]
+; CHECK-NOT: extra.base
; CHECK: next = select
-; CHECK: base_select
+; CHECK: extra2.base = select
; CHECK: extra2 = select
-; CHECK: base_select
; CHECK: statepoint
;; Both 'next' and 'extra2' are live across the backedge safepoint...
%current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
@@ -91,10 +90,62 @@ loop: ; preds = %loop, %entry
%nexta = getelementptr i64, i64 addrspace(1)* %current, i32 1
%next = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
%extra2 = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
+define i64 addrspace(1)* @test3(i1 %cnd, i64 addrspace(1)* %obj,
+ i64 addrspace(1)* %obj2)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test3
+entry:
+ br i1 %cnd, label %merge, label %taken
+taken:
+ br label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: gc.statepoint
+ %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %taken ]
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %bdv
+}
+
+define i64 addrspace(1)* @test4(i1 %cnd, i64 addrspace(1)* %obj,
+ i64 addrspace(1)* %obj2)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test4
+entry:
+ br i1 %cnd, label %merge, label %taken
+taken:
+ br label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: gc.statepoint
+ %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj, %taken ]
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %bdv
+}
+
+define i64 addrspace(1)* @test5(i1 %cnd, i64 addrspace(1)* %obj,
+ i64 addrspace(1)* %obj2)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test5
+entry:
+ br label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: br i1
+ %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %merge ]
+ br i1 %cnd, label %merge, label %next
+next:
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %bdv
+}
+
+
declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-vector.ll b/test/Transforms/RewriteStatepointsForGC/base-vector.ll
new file mode 100644
index 000000000000..6084efeb0509
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/base-vector.ll
@@ -0,0 +1,167 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -S | FileCheck %s
+
+define i64 addrspace(1)* @test(<2 x i64 addrspace(1)*> %vec, i32 %idx) gc "statepoint-example" {
+; CHECK-LABEL: @test
+; CHECK: extractelement
+; CHECK: extractelement
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%base_ee, %base_ee)
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%base_ee, %obj)
+; Note that the second extractelement is actually redundant here. A correct output would
+; be to reuse the existing obj as a base since it is actually a base pointer.
+entry:
+ %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+
+ ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test2(<2 x i64 addrspace(1)*>* %ptr, i1 %cnd, i32 %idx1, i32 %idx2)
+ gc "statepoint-example" {
+; CHECK-LABEL: test2
+entry:
+ br i1 %cnd, label %taken, label %untaken
+taken:
+ %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+untaken:
+ %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+merge:
+ %vec = phi <2 x i64 addrspace(1)*> [%obja, %taken], [%objb, %untaken]
+ br i1 %cnd, label %taken2, label %untaken2
+taken2:
+ %obj0 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx1
+ br label %merge2
+untaken2:
+ %obj1 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx2
+ br label %merge2
+merge2:
+; CHECK-LABEL: merge2:
+; CHECK-NEXT: %obj = phi i64 addrspace(1)*
+; CHECK-NEXT: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%obj, %obj)
+ %obj = phi i64 addrspace(1)* [%obj0, %taken2], [%obj1, %untaken2]
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test3(i64 addrspace(1)* %ptr)
+ gc "statepoint-example" {
+; CHECK-LABEL: test3
+entry:
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %ptr, i32 0
+ %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+; CHECK: insertelement
+; CHECK: extractelement
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%obj, %obj)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %obj
+}
+define i64 addrspace(1)* @test4(i64 addrspace(1)* %ptr)
+ gc "statepoint-example" {
+; CHECK-LABEL: test4
+entry:
+ %derived = getelementptr i64, i64 addrspace(1)* %ptr, i64 16
+ %veca = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %derived, i32 0
+ %vec = insertelement <2 x i64 addrspace(1)*> %veca, i64 addrspace(1)* %ptr, i32 1
+ %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%ptr, %obj)
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%ptr, %ptr)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %obj
+}
+
+declare void @use(i64 addrspace(1)*)
+
+; When we can optimize an extractelement from a known
+; index and avoid introducing new base pointer instructions
+define void @test5(i1 %cnd, i64 addrspace(1)* %obj)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test5
+; CHECK: gc.relocate
+; CHECK-DAG: (%obj, %bdv)
+entry:
+ %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+ %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @use(i64 addrspace(1)* %bdv)
+ ret void
+}
+
+; When we fundementally have to duplicate
+define void @test6(i1 %cnd, i64 addrspace(1)* %obj, i64 %idx)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test6
+; CHECK: %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+; CHECK: %vec.base = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj, i32 0, !is_base_value !0
+; CHECK: %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+; CHECK: %bdv.base = extractelement <2 x i64 addrspace(1)*> %vec.base, i64 %idx, !is_base_value !0
+; CHECK: %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%bdv.base, %bdv)
+entry:
+ %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+ %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @use(i64 addrspace(1)* %bdv)
+ ret void
+}
+
+; A more complicated example involving vector and scalar bases.
+; This is derived from a failing test case when we didn't have correct
+; insertelement handling.
+define i64 addrspace(1)* @test7(i1 %cnd, i64 addrspace(1)* %obj,
+ i64 addrspace(1)* %obj2)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test7
+entry:
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj2, i32 0
+ br label %merge1
+merge1:
+; CHECK-LABEL: merge1:
+; CHECK: vec2.base
+; CHECK: vec2
+; CHECK: gep
+; CHECK: vec3.base
+; CHECK: vec3
+ %vec2 = phi <2 x i64 addrspace(1)*> [ %vec, %entry ], [ %vec3, %merge1 ]
+ %gep = getelementptr i64, i64 addrspace(1)* %obj2, i64 1
+ %vec3 = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+ br i1 %cnd, label %merge1, label %next1
+next1:
+; CHECK-LABEL: next1:
+; CHECK: bdv.base =
+; CHECK: bdv =
+ %bdv = extractelement <2 x i64 addrspace(1)*> %vec2, i32 0
+ br label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK: %objb.base
+; CHECK: %objb
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%objb.base, %objb)
+
+ %objb = phi i64 addrspace(1)* [ %obj, %next1 ], [ %bdv, %merge ]
+ br i1 %cnd, label %merge, label %next
+next:
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %objb
+}
+
+
+declare void @do_safepoint()
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/basics.ll b/test/Transforms/RewriteStatepointsForGC/basics.ll
index 2a61924a5927..48f464356865 100644
--- a/test/Transforms/RewriteStatepointsForGC/basics.ll
+++ b/test/Transforms/RewriteStatepointsForGC/basics.ll
@@ -10,7 +10,7 @@ define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %obj
}
@@ -23,8 +23,8 @@ define i8 addrspace(1)* @test2(i8 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %obj
}
@@ -39,7 +39,7 @@ define i8 @test3(i8 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-NEXT: load i8, i8 addrspace(1)* %obj.relocated
entry:
%derived = getelementptr i8, i8 addrspace(1)* %obj, i64 10
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
%a = load i8, i8 addrspace(1)* %derived
%b = load i8, i8 addrspace(1)* %obj
@@ -57,14 +57,14 @@ taken:
; CHECK-LABEL: taken:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %merge
untaken:
; CHECK-LABEL: untaken:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %merge
merge:
@@ -81,8 +81,8 @@ define i8 addrspace(1)* @test5(i8 addrspace(1)* %obj) gc "ocaml" {
; CHECK-NEXT: gc.statepoint
; CHECK-NOT: %obj.relocated = call coldcc i8 addrspace(1)*
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %obj
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/codegen-cond.ll b/test/Transforms/RewriteStatepointsForGC/codegen-cond.ll
new file mode 100644
index 000000000000..8221cd0e0f82
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/codegen-cond.ll
@@ -0,0 +1,74 @@
+; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s
+
+; A null test of a single value
+define i1 @test(i8 addrspace(1)* %p, i1 %rare) gc "statepoint-example" {
+; CHECK-LABEL: @test
+entry:
+ %cond = icmp eq i8 addrspace(1)* %p, null
+ br i1 %rare, label %safepoint, label %continue, !prof !0
+safepoint:
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @safepoint, i32 0, i32 0, i32 0, i32 0)
+ br label %continue
+continue:
+; CHECK-LABEL: continue:
+; CHECK: phi
+; CHECK-DAG: [ %p.relocated, %safepoint ]
+; CHECK-DAG: [ %p, %entry ]
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+ br i1 %cond, label %taken, label %untaken
+taken:
+ ret i1 true
+untaken:
+ ret i1 false
+}
+
+; Comparing two pointers
+define i1 @test2(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+ %cond = icmp eq i8 addrspace(1)* %p, %q
+ br i1 %rare, label %safepoint, label %continue, !prof !0
+safepoint:
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @safepoint, i32 0, i32 0, i32 0, i32 0)
+ br label %continue
+continue:
+; CHECK-LABEL: continue:
+; CHECK: phi
+; CHECK-DAG: [ %q.relocated, %safepoint ]
+; CHECK-DAG: [ %q, %entry ]
+; CHECK: phi
+; CHECK-DAG: [ %p.relocated, %safepoint ]
+; CHECK-DAG: [ %p, %entry ]
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+ br i1 %cond, label %taken, label %untaken
+taken:
+ ret i1 true
+untaken:
+ ret i1 false
+}
+
+; Sanity check that nothing bad happens if already last instruction
+; before terminator
+define i1 @test3(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test3
+entry:
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @safepoint, i32 0, i32 0, i32 0, i32 0)
+; CHECK: gc.statepoint
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+ %cond = icmp eq i8 addrspace(1)* %p, %q
+ br i1 %cond, label %taken, label %untaken
+taken:
+ ret i1 true
+untaken:
+ ret i1 false
+}
+
+declare void @safepoint()
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+
+!0 = !{!"branch_weights", i32 1, i32 10000}
diff --git a/test/Transforms/RewriteStatepointsForGC/constants.ll b/test/Transforms/RewriteStatepointsForGC/constants.ll
index a30fdd7034a4..b30f64beba09 100644
--- a/test/Transforms/RewriteStatepointsForGC/constants.ll
+++ b/test/Transforms/RewriteStatepointsForGC/constants.ll
@@ -1,7 +1,7 @@
; RUN: opt -S -rewrite-statepoints-for-gc %s | FileCheck %s
declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
; constants don't get relocated.
define i8 @test() gc "statepoint-example" {
@@ -9,7 +9,7 @@ define i8 @test() gc "statepoint-example" {
; CHECK: gc.statepoint
; CHECK-NEXT: load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
%res = load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
ret i8 %res
}
@@ -22,7 +22,7 @@ define i8 @test2(i8 addrspace(1)* %p) gc "statepoint-example" {
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: icmp
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
%cmp = icmp eq i8 addrspace(1)* %p, null
br i1 %cmp, label %taken, label %not_taken
@@ -52,9 +52,44 @@ define i8 @test3(i1 %always_true) gc "statepoint-example" {
; CHECK: gc.statepoint
; CHECK-NEXT: load i8, i8 addrspace(1)* @G
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
%res = load i8, i8 addrspace(1)* @G, align 1
ret i8 %res
}
+; Even for source languages without constant references, we can
+; see constants can show up along paths where the value is dead.
+; This is particular relevant when computing bases of PHIs.
+define i8 addrspace(1)* @test4(i8 addrspace(1)* %p) gc "statepoint-example" {
+; CHECK-LABEL: @test4
+entry:
+ %is_null = icmp eq i8 addrspace(1)* %p, null
+ br i1 %is_null, label %split, label %join
+
+split:
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ %arg_value_addr.i = getelementptr inbounds i8, i8 addrspace(1)* %p, i64 8
+ %arg_value_addr_casted.i = bitcast i8 addrspace(1)* %arg_value_addr.i to i8 addrspace(1)* addrspace(1)*
+ br label %join
+
+join:
+; CHECK-LABEL: join
+; CHECK: %addr2.base =
+ %addr2 = phi i8 addrspace(1)* addrspace(1)* [ %arg_value_addr_casted.i, %split ], [ inttoptr (i64 8 to i8 addrspace(1)* addrspace(1)*), %entry ]
+ ;; NOTE: This particular example can be jump-threaded, but in general,
+ ;; we can't, and have to deal with the resulting IR.
+ br i1 %is_null, label %early-exit, label %use
+
+early-exit:
+ ret i8 addrspace(1)* null
+
+use:
+; CHECK-LABEL: use:
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ %res = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %addr2, align 1
+ ret i8 addrspace(1)* %res
+}
+
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll
new file mode 100644
index 000000000000..6af2a3012b5c
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll
@@ -0,0 +1,25 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %merged_value.base
+
+declare void @site_for_call_safpeoint()
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition) gc "statepoint-example" {
+entry:
+ br i1 %runtime_condition, label %here, label %there
+
+here: ; preds = %entry
+ %x = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
+ br label %merge
+
+there: ; preds = %entry
+ %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
+ br label %merge
+
+merge: ; preds = %there, %here
+; CHECK-LABEL: merge:
+; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %here ], [ %base_obj_y, %there ]
+ %merged_value = phi i64 addrspace(1)* [ %x, %here ], [ %y, %there ]
+ call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %merged_value
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll
new file mode 100644
index 000000000000..8c486d6b3896
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll
@@ -0,0 +1,35 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+
+declare i1 @runtime_value() "gc-leaf-function"
+
+declare void @do_safepoint()
+
+define void @select_of_phi(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y) gc "statepoint-example" {
+entry:
+ br label %loop
+
+loop: ; preds = %merge, %entry
+ %current_x = phi i64 addrspace(1)* [ %base_obj_x, %entry ], [ %next_x, %merge ]
+ %current_y = phi i64 addrspace(1)* [ %base_obj_y, %entry ], [ %next_y, %merge ]
+ %current = phi i64 addrspace(1)* [ null, %entry ], [ %next, %merge ]
+ %condition = call i1 @runtime_value()
+ %next_x = getelementptr i64, i64 addrspace(1)* %current_x, i32 1
+ %next_y = getelementptr i64, i64 addrspace(1)* %current_y, i32 1
+ br i1 %condition, label %true, label %false
+
+true: ; preds = %loop
+ br label %merge
+
+false: ; preds = %loop
+ br label %merge
+
+merge: ; preds = %false, %true
+ %next = phi i64 addrspace(1)* [ %next_x, %true ], [ %next_y, %false ]
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
+; CHECK: Base Pairs (w/o Relocation):
+; CHECK-DAG: derived %next base %next.base
+; CHECK-DAG: derived %next_x base %base_obj_x
+; CHECK-DAG: derived %next_y base %base_obj_y
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll
new file mode 100644
index 000000000000..ae793b2cb630
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll
@@ -0,0 +1,24 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %next base %base_obj
+
+declare void @do_safepoint()
+
+define void @test(i64 addrspace(1)* %base_obj) gc "statepoint-example" {
+entry:
+ %obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
+ br label %loop
+
+loop: ; preds = %loop, %entry
+; CHECK-LABEL: loop:
+; CHECK: phi i64 addrspace(1)*
+; CHECK-DAG: [ %base_obj.relocated.casted, %loop ]
+; CHECK-DAG: [ %base_obj, %entry ]
+; CHECK: %current = phi i64 addrspace(1)*
+; CHECK-DAG: [ %obj, %entry ]
+; CHECK-DAG: [ %next.relocated.casted, %loop ]
+ %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
+ %next = getelementptr i64, i64 addrspace(1)* %current, i32 1
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll
new file mode 100644
index 000000000000..2b9485388f80
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll
@@ -0,0 +1,19 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %base_obj
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj, i1 %runtime_condition) gc "statepoint-example" {
+entry:
+ br i1 %runtime_condition, label %merge, label %there
+
+there: ; preds = %entry
+ %derived_obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
+ br label %merge
+
+merge: ; preds = %there, %entry
+ %merged_value = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %derived_obj, %there ]
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %merged_value
+}
+
+declare void @foo()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll
new file mode 100644
index 000000000000..71bb309d1301
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll
@@ -0,0 +1,19 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %next.i64 base %base_obj
+
+define void @test(i64 addrspace(1)* %base_obj) gc "statepoint-example" {
+entry:
+ %obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next.i64, %loop ]
+ %current.i32 = bitcast i64 addrspace(1)* %current to i32 addrspace(1)*
+ %next.i32 = getelementptr i32, i32 addrspace(1)* %current.i32, i32 1
+ %next.i64 = bitcast i32 addrspace(1)* %next.i32 to i64 addrspace(1)*
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll
new file mode 100644
index 000000000000..3fcbf26a6fc0
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll
@@ -0,0 +1,44 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %obj_to_consume base %obj_to_consume
+
+declare void @foo()
+
+declare i64 addrspace(1)* @generate_obj()
+
+declare void @consume_obj(i64 addrspace(1)*)
+
+define void @test(i32 %condition) gc "statepoint-example" {
+entry:
+ br label %loop
+
+loop: ; preds = %merge.split, %entry
+; CHECK: loop:
+; CHECK: [[TOKEN_0:%[^ ]+]] = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
+; CHECK-NEXT: [[RESULT_0:%[^ ]+]] = call i64 addrspace(1)* @llvm.experimental.gc.result
+ %0 = call i64 addrspace(1)* @generate_obj() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ switch i32 %condition, label %dest_a [
+ i32 0, label %dest_b
+ i32 1, label %dest_c
+ ]
+
+dest_a: ; preds = %loop
+ br label %merge
+
+dest_b: ; preds = %loop
+ br label %merge
+
+dest_c: ; preds = %loop
+ br label %merge
+
+merge: ; preds = %dest_c, %dest_b, %dest_a
+; CHECK: merge:
+; CHECK: %obj_to_consume = phi i64 addrspace(1)* [ [[RESULT_0]], %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
+ %obj_to_consume = phi i64 addrspace(1)* [ %0, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
+ call void @consume_obj(i64 addrspace(1)* %obj_to_consume) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %merge.split
+
+merge.split: ; preds = %merge
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll
new file mode 100644
index 000000000000..4d43d7f7307c
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll
@@ -0,0 +1,28 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %merged_value.base
+
+declare void @foo()
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition) gc "statepoint-example" {
+entry:
+ br i1 %runtime_condition, label %here, label %there
+
+here: ; preds = %entry
+ br label %bump
+
+bump: ; preds = %here
+ br label %merge
+
+there: ; preds = %entry
+ %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
+ br label %merge
+
+merge: ; preds = %there, %bump
+; CHECK: merge:
+; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %base_obj_y, %there ]
+; CHECK-NEXT: %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
+ %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %merged_value
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll
new file mode 100644
index 000000000000..2d555d179c29
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll
@@ -0,0 +1,37 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %merged_value.base
+
+declare void @site_for_call_safpeoint()
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition_x, i1 %runtime_condition_y) gc "statepoint-example" {
+entry:
+ br i1 %runtime_condition_x, label %here, label %there
+
+here: ; preds = %entry
+ br i1 %runtime_condition_y, label %bump_here_a, label %bump_here_b
+
+bump_here_a: ; preds = %here
+ %x_a = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
+ br label %merge_here
+
+bump_here_b: ; preds = %here
+ %x_b = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 2
+ br label %merge_here
+
+merge_here: ; preds = %bump_here_b, %bump_here_a
+ %x = phi i64 addrspace(1)* [ %x_a, %bump_here_a ], [ %x_b, %bump_here_b ]
+ br label %merge
+
+there: ; preds = %entry
+ %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
+ br label %merge
+
+merge: ; preds = %there, %merge_here
+; CHECK: merge:
+; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %merge_here ], [ %base_obj_y, %there ]
+; CHECK-NEXT: %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
+ %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
+ call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %merged_value
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll
new file mode 100644
index 000000000000..e90ef63184ee
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll
@@ -0,0 +1,45 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %merged_value.base
+
+declare void @site_for_call_safpeoint()
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition_x, i1 %runtime_condition_y) gc "statepoint-example" {
+entry:
+ br i1 %runtime_condition_x, label %here, label %there
+
+here: ; preds = %entry
+ br i1 %runtime_condition_y, label %bump_here_a, label %bump_here_b
+
+bump_here_a: ; preds = %here
+ %x_a = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
+ br label %merge_here
+
+bump_here_b: ; preds = %here
+ %x_b = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 2
+ br label %merge_here
+
+merge_here: ; preds = %bump_here_b, %bump_here_a
+; CHECK: merge_here:
+; CHECK-DAG: %x.base
+; CHECK-DAG: phi i64 addrspace(1)*
+; CHECK-DAG: [ %base_obj_x, %bump_here_a ]
+; CHECK-DAG: [ %base_obj_y, %bump_here_b ]
+ %x = phi i64 addrspace(1)* [ %x_a, %bump_here_a ], [ %x_b, %bump_here_b ]
+ br label %merge
+
+there: ; preds = %entry
+ %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
+ br label %merge
+
+merge: ; preds = %there, %merge_here
+; CHECK: merge:
+; CHECK-DAG: %merged_value.base
+; CHECK-DAG: phi i64 addrspace(1)*
+; CHECK-DAG: %merge_here
+; CHECK-DAG: [ %base_obj_y, %there ]
+; CHECK: %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
+ %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
+ call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %merged_value
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll
new file mode 100644
index 000000000000..628696ba2c2f
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll
@@ -0,0 +1,37 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %next_element_ptr base %array_obj
+
+define i32 @null_in_array(i64 addrspace(1)* %array_obj) gc "statepoint-example" {
+entry:
+ %array_len_pointer.i64 = getelementptr i64, i64 addrspace(1)* %array_obj, i32 1
+ %array_len_pointer.i32 = bitcast i64 addrspace(1)* %array_len_pointer.i64 to i32 addrspace(1)*
+ %array_len = load i32, i32 addrspace(1)* %array_len_pointer.i32
+ %array_elems = bitcast i32 addrspace(1)* %array_len_pointer.i32 to i64 addrspace(1)* addrspace(1)*
+ br label %loop_check
+
+loop_check: ; preds = %loop_back, %entry
+ %index = phi i32 [ 0, %entry ], [ %next_index, %loop_back ]
+ %current_element_ptr = phi i64 addrspace(1)* addrspace(1)* [ %array_elems, %entry ], [ %next_element_ptr, %loop_back ]
+ %index_lt = icmp ult i32 %index, %array_len
+ br i1 %index_lt, label %check_for_null, label %not_found
+
+check_for_null: ; preds = %loop_check
+ %current_element = load i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr
+ %is_null = icmp eq i64 addrspace(1)* %current_element, null
+ br i1 %is_null, label %found, label %loop_back
+
+loop_back: ; preds = %check_for_null
+ %next_element_ptr = getelementptr i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr, i32 1
+ %next_index = add i32 %index, 1
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop_check
+
+not_found: ; preds = %loop_check
+ ret i32 -1
+
+found: ; preds = %check_for_null
+ ret i32 %index
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll
new file mode 100644
index 000000000000..a82af3b96892
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll
@@ -0,0 +1,20 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %next base %base_obj
+
+declare i1 @runtime_value() "gc-leaf-function"
+
+define void @maybe_GEP(i64 addrspace(1)* %base_obj) gc "statepoint-example" {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %current = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %next, %loop ]
+ %condition = call i1 @runtime_value()
+ %maybe_next = getelementptr i64, i64 addrspace(1)* %current, i32 1
+ %next = select i1 %condition, i64 addrspace(1)* %maybe_next, i64 addrspace(1)* %current
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll
new file mode 100644
index 000000000000..a378d1502add
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll
@@ -0,0 +1,151 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S 2>&1 | FileCheck %s
+
+; The rewriting needs to make %obj loop variant by inserting a phi
+; of the original value and it's relocation.
+
+declare i64 addrspace(1)* @generate_obj() "gc-leaf-function"
+
+declare void @use_obj(i64 addrspace(1)*) "gc-leaf-function"
+
+define void @def_use_safepoint() gc "statepoint-example" {
+; CHECK-LABEL: def_use_safepoint
+; CHECK: phi i64 addrspace(1)*
+; CHECK-DAG: [ %obj.relocated.casted, %loop ]
+; CHECK-DAG: [ %obj, %entry ]
+entry:
+ %obj = call i64 addrspace(1)* @generate_obj()
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ call void @use_obj(i64 addrspace(1)* %obj)
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
+
+declare void @do_safepoint()
+
+declare void @parse_point(i64 addrspace(1)*)
+
+define i64 addrspace(1)* @test1(i32 %caller, i8 addrspace(1)* %a, i8 addrspace(1)* %b, i32 %unknown) gc "statepoint-example" {
+; CHECK-LABEL: test1
+entry:
+ br i1 undef, label %left, label %right
+
+left: ; preds = %entry
+; CHECK: left:
+; CHECK-NEXT: %a.cast = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
+; CHECK-NEXT: [[CAST_L:%.*]] = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
+; Our safepoint placement pass calls removeUnreachableBlocks, which does a bunch
+; of simplifications to branch instructions. This bug is visible only when
+; there are multiple branches into the same block from the same predecessor, and
+; the following ceremony is to make that artefact survive a call to
+; removeUnreachableBlocks. As an example, "br i1 undef, label %merge, label %merge"
+; will get simplified to "br label %merge" by removeUnreachableBlocks.
+ %a.cast = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
+ switch i32 %unknown, label %right [
+ i32 0, label %merge
+ i32 1, label %merge
+ i32 5, label %merge
+ i32 3, label %right
+ ]
+
+right: ; preds = %left, %left, %entry
+; CHECK: right:
+; CHECK-NEXT: %b.cast = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
+; CHECK-NEXT: [[CAST_R:%.*]] = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
+ %b.cast = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
+ br label %merge
+
+merge: ; preds = %right, %left, %left, %left
+; CHECK: merge:
+; CHECK-NEXT: %value.base = phi i64 addrspace(1)* [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_R]], %right ], !is_base_value !0
+ %value = phi i64 addrspace(1)* [ %a.cast, %left ], [ %a.cast, %left ], [ %a.cast, %left ], [ %b.cast, %right ]
+ call void @parse_point(i64 addrspace(1)* %value) [ "deopt"(i32 0, i32 0, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %value
+}
+
+;; The purpose of this test is to ensure that when two live values share a
+;; base defining value with inherent conflicts, we end up with a *single*
+;; base phi/select per such node. This is testing an optimization, not a
+;; fundemental correctness criteria
+define void @test2(i1 %cnd, i64 addrspace(1)* %base_obj, i64 addrspace(1)* %base_arg2) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+ %obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
+ br label %loop
+; CHECK-LABEL: loop
+; CHECK: %current.base = phi i64 addrspace(1)*
+; CHECK-DAG: [ %base_obj, %entry ]
+
+; Given the two selects are equivelent, so are their base phis - ideally,
+; we'd have commoned these, but that's a missed optimization, not correctness.
+; CHECK-DAG: [ [[DISCARD:%.*.base.relocated.casted]], %loop ]
+; CHECK-NOT: extra.base
+; CHECK: next = select
+; CHECK: extra2.base = select
+; CHECK: extra2 = select
+; CHECK: statepoint
+;; Both 'next' and 'extra2' are live across the backedge safepoint...
+
+loop: ; preds = %loop, %entry
+ %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
+ %extra = phi i64 addrspace(1)* [ %obj, %entry ], [ %extra2, %loop ]
+ %nexta = getelementptr i64, i64 addrspace(1)* %current, i32 1
+ %next = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
+ %extra2 = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
+
+define i64 addrspace(1)* @test3(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+entry:
+ br i1 %cnd, label %merge, label %taken
+
+taken: ; preds = %entry
+ br label %merge
+
+merge: ; preds = %taken, %entry
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: gc.statepoint
+ %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %taken ]
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %bdv
+}
+
+define i64 addrspace(1)* @test4(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
+; CHECK-LABEL: @test4
+entry:
+ br i1 %cnd, label %merge, label %taken
+
+taken: ; preds = %entry
+ br label %merge
+
+merge: ; preds = %taken, %entry
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: gc.statepoint
+ %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj, %taken ]
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %bdv
+}
+
+define i64 addrspace(1)* @test5(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
+; CHECK-LABEL: @test5
+entry:
+ br label %merge
+
+merge: ; preds = %merge, %entry
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: br i1
+ %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %merge ]
+ br i1 %cnd, label %merge, label %next
+
+next: ; preds = %merge
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %bdv
+}
+
+declare void @foo()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll
new file mode 100644
index 000000000000..96b7390b77bc
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll
@@ -0,0 +1,167 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S | FileCheck %s
+
+
+define i64 addrspace(1)* @test(<2 x i64 addrspace(1)*> %vec, i32 %idx) gc "statepoint-example" {
+; CHECK-LABEL: @test
+; CHECK: extractelement
+; CHECK: extractelement
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%base_ee, %base_ee)
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%base_ee, %obj)
+; Note that the second extractelement is actually redundant here. A correct output would
+; be to reuse the existing obj as a base since it is actually a base pointer.
+entry:
+ %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx
+ call void @do_safepoint() [ "deopt"() ]
+ ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test2(<2 x i64 addrspace(1)*>* %ptr, i1 %cnd, i32 %idx1, i32 %idx2) gc "statepoint-example" {
+; CHECK-LABEL: test2
+entry:
+ br i1 %cnd, label %taken, label %untaken
+
+taken: ; preds = %entry
+ %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+
+untaken: ; preds = %entry
+ %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+ %vec = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
+ br i1 %cnd, label %taken2, label %untaken2
+
+taken2: ; preds = %merge
+ %obj0 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx1
+ br label %merge2
+
+untaken2: ; preds = %merge
+ %obj1 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx2
+ br label %merge2
+
+merge2: ; preds = %untaken2, %taken2
+; CHECK-LABEL: merge2:
+; CHECK-NEXT: %obj = phi i64 addrspace(1)*
+; CHECK-NEXT: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%obj, %obj)
+ %obj = phi i64 addrspace(1)* [ %obj0, %taken2 ], [ %obj1, %untaken2 ]
+ call void @do_safepoint() [ "deopt"() ]
+ ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test3(i64 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test3
+; CHECK: insertelement
+; CHECK: extractelement
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%obj, %obj)
+entry:
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %ptr, i32 0
+ %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+ call void @do_safepoint() [ "deopt"() ]
+ ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test4(i64 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test4
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%ptr, %obj)
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%ptr, %ptr)
+; When we can optimize an extractelement from a known
+; index and avoid introducing new base pointer instructions
+entry:
+ %derived = getelementptr i64, i64 addrspace(1)* %ptr, i64 16
+ %veca = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %derived, i32 0
+ %vec = insertelement <2 x i64 addrspace(1)*> %veca, i64 addrspace(1)* %ptr, i32 1
+ %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+ call void @do_safepoint() [ "deopt"() ]
+ ret i64 addrspace(1)* %obj
+}
+
+declare void @use(i64 addrspace(1)*) "gc-leaf-function"
+
+define void @test5(i1 %cnd, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test5
+; CHECK: gc.relocate
+; CHECK-DAG: (%obj, %bdv)
+; When we fundementally have to duplicate
+entry:
+ %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+ %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ call void @use(i64 addrspace(1)* %bdv)
+ ret void
+}
+
+define void @test6(i1 %cnd, i64 addrspace(1)* %obj, i64 %idx) gc "statepoint-example" {
+; CHECK-LABEL: @test6
+; CHECK: %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+; CHECK: %vec.base = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj, i32 0, !is_base_value !0
+; CHECK: %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+; CHECK: %bdv.base = extractelement <2 x i64 addrspace(1)*> %vec.base, i64 %idx, !is_base_value !0
+; CHECK: %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%bdv.base, %bdv)
+; A more complicated example involving vector and scalar bases.
+; This is derived from a failing test case when we didn't have correct
+; insertelement handling.
+entry:
+ %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+ %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ call void @use(i64 addrspace(1)* %bdv)
+ ret void
+}
+
+define i64 addrspace(1)* @test7(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
+; CHECK-LABEL: @test7
+entry:
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj2, i32 0
+ br label %merge1
+
+merge1: ; preds = %merge1, %entry
+; CHECK-LABEL: merge1:
+; CHECK: vec2.base
+; CHECK: vec2
+; CHECK: gep
+; CHECK: vec3.base
+; CHECK: vec3
+ %vec2 = phi <2 x i64 addrspace(1)*> [ %vec, %entry ], [ %vec3, %merge1 ]
+ %gep = getelementptr i64, i64 addrspace(1)* %obj2, i64 1
+ %vec3 = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+ br i1 %cnd, label %merge1, label %next1
+
+next1: ; preds = %merge1
+; CHECK-LABEL: next1:
+; CHECK: bdv.base =
+; CHECK: bdv =
+ %bdv = extractelement <2 x i64 addrspace(1)*> %vec2, i32 0
+ br label %merge
+
+merge: ; preds = %merge, %next1
+; CHECK-LABEL: merge:
+; CHECK: %objb.base
+; CHECK: %objb
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%objb.base, %objb)
+ %objb = phi i64 addrspace(1)* [ %obj, %next1 ], [ %bdv, %merge ]
+ br i1 %cnd, label %merge, label %next
+
+next: ; preds = %merge
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %objb
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll
new file mode 100644
index 000000000000..c0dc6940e5db
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles < %s | FileCheck %s
+
+declare void @g()
+declare i32 @h()
+
+define i32 addrspace(1)* @f0(i32 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: @f0(
+ entry:
+; CHECK: [[TOKEN_0:%[^ ]+]] = call token {{[^@]*}} @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @g, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg)
+ call void @g() [ "deopt"(i32 100) ]
+
+; CHECK: %arg.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_0]], i32 8, i32 8)
+ ret i32 addrspace(1)* %arg
+}
+
+define i32 addrspace(1)* @f1(i32 addrspace(1)* %arg) gc "statepoint-example" personality i32 8 {
+; CHECK-LABEL: @f1(
+ entry:
+; CHECK: [[TOKEN_1:%[^ ]+]] = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @g, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg)
+ invoke void @g() [ "deopt"(i32 100) ] to label %normal_dest unwind label %unwind_dest
+
+ normal_dest:
+; CHECK: %arg.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_1]], i32 8, i32 8)
+ ret i32 addrspace(1)* %arg
+
+ unwind_dest:
+ %lpad = landingpad token cleanup
+ resume token undef
+}
+
+define i32 addrspace(1)* @f2(i32 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: @f2(
+ entry:
+; CHECK: [[TOKEN_2:%[^ ]+]] = call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @h, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg)
+ %val = call i32 @h() [ "deopt"(i32 100) ]
+
+; CHECK: [[RESULT_F2:%[^ ]+]] = call i32 @llvm.experimental.gc.result.i32(token [[TOKEN_2]])
+; CHECK: %arg.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_2]], i32 8, i32 8)
+; CHECK: %arg.relocated.casted = bitcast i8 addrspace(1)* %arg.relocated to i32 addrspace(1)*
+
+ store i32 %val, i32 addrspace(1)* %arg
+; CHECK: store i32 [[RESULT_F2]], i32 addrspace(1)* %arg.relocated.casted
+ ret i32 addrspace(1)* %arg
+}
+
+define i32 addrspace(1)* @f3(i32 addrspace(1)* %arg) gc "statepoint-example" personality i32 8 {
+; CHECK-LABEL: @f3(
+ entry:
+; CHECK: [[TOKEN_3:%[^ ]+]] = invoke token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @h, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg)
+ %val = invoke i32 @h() [ "deopt"(i32 100) ] to label %normal_dest unwind label %unwind_dest
+
+ normal_dest:
+; CHECK: [[RESULT_F3:%[^ ]+]] = call i32 @llvm.experimental.gc.result.i32(token [[TOKEN_3]])
+; CHECK: [[ARG_RELOCATED:%[^ ]+]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_3]], i32 8, i32 8)
+; CHECK: [[ARG_RELOCATED_CASTED:%[^ ]+]] = bitcast i8 addrspace(1)* [[ARG_RELOCATED]] to i32 addrspace(1)*
+
+ store i32 %val, i32 addrspace(1)* %arg
+
+; CHECK: store i32 [[RESULT_F3]], i32 addrspace(1)* [[ARG_RELOCATED_CASTED]]
+ ret i32 addrspace(1)* %arg
+
+ unwind_dest:
+ %lpad = landingpad token cleanup
+ resume token undef
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll
new file mode 100644
index 000000000000..48c45eaa1b01
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll
@@ -0,0 +1,88 @@
+; This is a collection of really basic tests for gc.statepoint rewriting.
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-rematerialization-threshold=0 -S | FileCheck %s
+
+; Trivial relocation over a single call
+
+declare void @foo()
+
+define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test1
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; Two safepoints in a row (i.e. consistent liveness)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %obj
+}
+
+define i8 addrspace(1)* @test2(i8 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
+; A simple derived pointer
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %obj
+}
+
+define i8 @test3(i8 addrspace(1)* %obj) gc "statepoint-example" {
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %derived.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: load i8, i8 addrspace(1)* %derived.relocated
+; CHECK-NEXT: load i8, i8 addrspace(1)* %obj.relocated
+; Tests to make sure we visit both the taken and untaken predeccessor
+; of merge. This was a bug in the dataflow liveness at one point.
+ %derived = getelementptr i8, i8 addrspace(1)* %obj, i64 10
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ %a = load i8, i8 addrspace(1)* %derived
+ %b = load i8, i8 addrspace(1)* %obj
+ %c = sub i8 %a, %b
+ ret i8 %c
+}
+
+define i8 addrspace(1)* @test4(i1 %cmp, i8 addrspace(1)* %obj) gc "statepoint-example" {
+entry:
+ br i1 %cmp, label %taken, label %untaken
+
+taken: ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %merge
+
+untaken: ; preds = %entry
+; CHECK-LABEL: untaken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %.0 = phi i8 addrspace(1)* [ %obj.relocated, %taken ], [ %obj.relocated2, %untaken ]
+; CHECK-NEXT: ret i8 addrspace(1)* %.0
+; When run over a function which doesn't opt in, should do nothing!
+ ret i8 addrspace(1)* %obj
+}
+
+define i8 addrspace(1)* @test5(i8 addrspace(1)* %obj) gc "ocaml" {
+; CHECK-LABEL: @test5
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NOT: %obj.relocated = call coldcc i8 addrspace(1)*
+ %0 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %obj
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll
new file mode 100644
index 000000000000..f0da0c06db0a
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll
@@ -0,0 +1,81 @@
+; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S < %s | FileCheck %s
+
+; A null test of a single value
+
+define i1 @test(i8 addrspace(1)* %p, i1 %rare) gc "statepoint-example" {
+; CHECK-LABEL: @test
+entry:
+ %cond = icmp eq i8 addrspace(1)* %p, null
+ br i1 %rare, label %safepoint, label %continue, !prof !0
+
+safepoint: ; preds = %entry
+ call void @safepoint() [ "deopt"() ]
+ br label %continue
+
+continue: ; preds = %safepoint, %entry
+; CHECK-LABEL: continue:
+; CHECK: phi
+; CHECK-DAG: [ %p.relocated, %safepoint ]
+; CHECK-DAG: [ %p, %entry ]
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+; Comparing two pointers
+ br i1 %cond, label %taken, label %untaken
+
+taken: ; preds = %continue
+ ret i1 true
+
+untaken: ; preds = %continue
+ ret i1 false
+}
+
+define i1 @test2(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+ %cond = icmp eq i8 addrspace(1)* %p, %q
+ br i1 %rare, label %safepoint, label %continue, !prof !0
+
+safepoint: ; preds = %entry
+ call void @safepoint() [ "deopt"() ]
+ br label %continue
+
+continue: ; preds = %safepoint, %entry
+; CHECK-LABEL: continue:
+; CHECK: phi
+; CHECK-DAG: [ %q.relocated, %safepoint ]
+; CHECK-DAG: [ %q, %entry ]
+; CHECK: phi
+; CHECK-DAG: [ %p.relocated, %safepoint ]
+; CHECK-DAG: [ %p, %entry ]
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+; Sanity check that nothing bad happens if already last instruction
+; before terminator
+ br i1 %cond, label %taken, label %untaken
+
+taken: ; preds = %continue
+ ret i1 true
+
+untaken: ; preds = %continue
+ ret i1 false
+}
+
+define i1 @test3(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+; CHECK: gc.statepoint
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+entry:
+ call void @safepoint() [ "deopt"() ]
+ %cond = icmp eq i8 addrspace(1)* %p, %q
+ br i1 %cond, label %taken, label %untaken
+
+taken: ; preds = %entry
+ ret i1 true
+
+untaken: ; preds = %entry
+ ret i1 false
+}
+
+declare void @safepoint()
+!0 = !{!"branch_weights", i32 1, i32 10000}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll
new file mode 100644
index 000000000000..eede1b09d161
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll
@@ -0,0 +1,51 @@
+; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles %s | FileCheck %s
+
+; constants don't get relocated.
+@G = addrspace(1) global i8 5
+
+declare void @foo()
+
+define i8 @test() gc "statepoint-example" {
+; CHECK-LABEL: @test
+; CHECK: gc.statepoint
+; CHECK-NEXT: load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
+; Mostly just here to show reasonable code test can come from.
+entry:
+ call void @foo() [ "deopt"() ]
+ %res = load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
+ ret i8 %res
+}
+
+define i8 @test2(i8 addrspace(1)* %p) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+; CHECK: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: icmp
+; Globals don't move and thus don't get relocated
+entry:
+ call void @foo() [ "deopt"() ]
+ %cmp = icmp eq i8 addrspace(1)* %p, null
+ br i1 %cmp, label %taken, label %not_taken
+
+taken: ; preds = %not_taken, %entry
+ ret i8 0
+
+not_taken: ; preds = %entry
+ %cmp2 = icmp ne i8 addrspace(1)* %p, null
+ br i1 %cmp2, label %taken, label %dead
+
+dead: ; preds = %not_taken
+ %addr = getelementptr i8, i8 addrspace(1)* %p, i32 15
+ %res = load i8, i8 addrspace(1)* %addr
+ ret i8 %res
+}
+
+define i8 @test3(i1 %always_true) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+; CHECK: gc.statepoint
+; CHECK-NEXT: load i8, i8 addrspace(1)* @G
+entry:
+ call void @foo() [ "deopt"() ]
+ %res = load i8, i8 addrspace(1)* @G, align 1
+ ret i8 %res
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll
new file mode 100644
index 000000000000..f04c6784a878
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll
@@ -0,0 +1,104 @@
+; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles < %s | FileCheck %s
+
+; CHECK: declare i8 addrspace(1)* @some_function_ret_deref()
+; CHECK: define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* %a)
+; CHECK: define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* %a)
+; CHECK: define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* %a)
+
+declare void @foo()
+
+declare i8 addrspace(1)* @some_function() "gc-leaf-function"
+
+declare void @some_function_consumer(i8 addrspace(1)*) "gc-leaf-function"
+
+declare dereferenceable(4) i8 addrspace(1)* @some_function_ret_deref() "gc-leaf-function"
+declare noalias i8 addrspace(1)* @some_function_ret_noalias() "gc-leaf-function"
+
+define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* dereferenceable(4) %a) gc "statepoint-example" {
+entry:
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* dereferenceable_or_null(4) %a) gc "statepoint-example" {
+entry:
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* noalias %a) gc "statepoint-example" {
+entry:
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_deref_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_deref_retval(
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+entry:
+ %a = call dereferenceable(4) i8 addrspace(1)* @some_function()
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_deref_or_null_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_deref_or_null_retval(
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+entry:
+ %a = call dereferenceable_or_null(4) i8 addrspace(1)* @some_function()
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_noalias_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_noalias_retval(
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+entry:
+ %a = call noalias i8 addrspace(1)* @some_function()
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 @test_md(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_md(
+; CHECK: %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
+entry:
+ %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 %tmp
+}
+
+define i8 addrspace(1)* @test_decl_only_attribute(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_decl_only_attribute(
+; No change here, but the prototype of some_function_ret_deref should have changed.
+; CHECK: call i8 addrspace(1)* @some_function_ret_deref()
+entry:
+ %a = call i8 addrspace(1)* @some_function_ret_deref()
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_decl_only_noalias(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_decl_only_noalias(
+; No change here, but the prototype of some_function_ret_noalias should have changed.
+; CHECK: call i8 addrspace(1)* @some_function_ret_noalias()
+entry:
+ %a = call i8 addrspace(1)* @some_function_ret_noalias()
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_callsite_arg_attribute(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_callsite_arg_attribute(
+; CHECK: call void @some_function_consumer(i8 addrspace(1)* %ptr)
+; CHECK: !0 = !{!1, !1, i64 0}
+; CHECK: !1 = !{!"red", !2}
+; CHECK: !2 = !{!"blue"}
+entry:
+ call void @some_function_consumer(i8 addrspace(1)* dereferenceable(4) noalias %ptr)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %ptr
+}
+!0 = !{!1, !1, i64 0, i64 1}
+!1 = !{!"red", !2}
+!2 = !{!"blue"}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll
new file mode 100644
index 000000000000..0d53af704df2
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll
@@ -0,0 +1,22 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S 2>&1 | FileCheck %s
+
+; This test is to verify gc.relocate can handle pointer to vector of
+; pointers (<2 x i32 addrspace(1)*> addrspace(1)* in this case).
+; The old scheme to create a gc.relocate of <2 x i32 addrspace(1)*> addrspace(1)*
+; type will fail because llvm does not support mangling vector of pointers.
+; The new scheme will create all gc.relocate to i8 addrspace(1)* type and
+; then bitcast to the correct type.
+
+declare void @foo()
+
+declare void @use(...) "gc-leaf-function"
+
+define void @test1(<2 x i32 addrspace(1)*> addrspace(1)* %obj) gc "statepoint-example" {
+entry:
+; CHECK: %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %statepoint_token, i32 7, i32 7)
+; CHECK-NEXT: %obj.relocated.casted = bitcast i8 addrspace(1)* %obj.relocated to <2 x i32 addrspace(1)*> addrspace(1)*
+
+ call void @foo() [ "deopt"() ]
+ call void (...) @use(<2 x i32 addrspace(1)*> addrspace(1)* %obj)
+ ret void
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll
new file mode 100644
index 000000000000..00f28938cee9
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll
@@ -0,0 +1,149 @@
+; Test that we can correctly handle vectors of pointers in statepoint
+; rewriting. Currently, we scalarize, but that's an implementation detail.
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S | FileCheck %s
+
+; A non-vector relocation for comparison
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: test
+; CHECK: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
+; A base vector from a argument
+entry:
+ call void @do_safepoint() [ "deopt"() ]
+ ret i64 addrspace(1)* %obj
+}
+
+define <2 x i64 addrspace(1)*> @test2(<2 x i64 addrspace(1)*> %obj) gc "statepoint-example" {
+; CHECK-LABEL: test2
+; CHECK: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
+; A base vector from a load
+entry:
+ call void @do_safepoint() [ "deopt"() ]
+ ret <2 x i64 addrspace(1)*> %obj
+}
+
+define <2 x i64 addrspace(1)*> @test3(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test3
+; CHECK: load
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
+; When a statepoint is an invoke rather than a call
+entry:
+ %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ call void @do_safepoint() [ "deopt"() ]
+ ret <2 x i64 addrspace(1)*> %obj
+}
+
+declare i32 @fake_personality_function()
+
+define <2 x i64 addrspace(1)*> @test4(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" personality i32 ()* @fake_personality_function {
+; CHECK-LABEL: test4
+; CHECK: load
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+entry:
+ %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ invoke void @do_safepoint() [ "deopt"() ]
+ to label %normal_return unwind label %exceptional_return
+
+normal_return: ; preds = %entry
+; CHECK-LABEL: normal_return:
+; CHECK: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
+ ret <2 x i64 addrspace(1)*> %obj
+
+exceptional_return: ; preds = %entry
+; CHECK-LABEL: exceptional_return:
+; CHECK: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %13
+; Can we handle an insert element with a constant offset? This effectively
+; tests both the equal and inequal case since we have to relocate both indices
+; in the vector.
+ %landing_pad4 = landingpad token
+ cleanup
+ ret <2 x i64 addrspace(1)*> %obj
+}
+
+define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p) gc "statepoint-example" {
+; CHECK-LABEL: test5
+; CHECK: insertelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
+; A base vector from a load
+entry:
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %p, i32 0
+ call void @do_safepoint() [ "deopt"() ]
+ ret <2 x i64 addrspace(1)*> %vec
+}
+
+define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test6
+entry:
+ br i1 %cnd, label %taken, label %untaken
+
+taken: ; preds = %entry
+ %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+
+untaken: ; preds = %entry
+ %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+; CHECK-LABEL: merge:
+; CHECK-NEXT: = phi
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
+ %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
+ call void @do_safepoint() [ "deopt"() ]
+ ret <2 x i64 addrspace(1)*> %obj
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll
new file mode 100644
index 000000000000..c5b213f4c82d
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll
@@ -0,0 +1,165 @@
+; A collection of liveness test cases to ensure we're reporting the
+; correct live values at statepoints
+; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-rematerialization-threshold=0 -S < %s | FileCheck %s
+
+; Tests to make sure we consider %obj live in both the taken and untaken
+; predeccessor of merge.
+
+define i64 addrspace(1)* @test1(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test1
+entry:
+ br i1 %cmp, label %taken, label %untaken
+
+taken: ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: br label %merge
+ call void @foo() [ "deopt"() ]
+ br label %merge
+
+untaken: ; preds = %entry
+; CHECK-LABEL: untaken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: br label %merge
+ call void @foo() [ "deopt"() ]
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %.0 = phi i64 addrspace(1)* [ %obj.relocated.casted, %taken ], [ %obj.relocated2.casted, %untaken ]
+; CHECK-NEXT: ret i64 addrspace(1)* %.0
+; A local kill should not effect liveness in predecessor block
+ ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test2(i1 %cmp, i64 addrspace(1)** %loc) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: br
+ call void @foo() [ "deopt"() ]
+ br i1 %cmp, label %taken, label %untaken
+
+taken: ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: %obj = load
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
+; A local kill should effect values live from a successor phi. Also, we
+; should only propagate liveness from a phi to the appropriate predecessors.
+ %obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
+ call void @foo() [ "deopt"() ]
+ ret i64 addrspace(1)* %obj
+
+untaken: ; preds = %entry
+ ret i64 addrspace(1)* null
+}
+
+define i64 addrspace(1)* @test3(i1 %cmp, i64 addrspace(1)** %loc) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+entry:
+ br i1 %cmp, label %taken, label %untaken
+
+taken: ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj = load
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: br label %merge
+ call void @foo() [ "deopt"() ]
+ %obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
+ call void @foo() [ "deopt"() ]
+ br label %merge
+
+untaken: ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: br label %merge
+; A base pointer must be live if it is needed at a later statepoint,
+; even if the base pointer is otherwise unused.
+ call void @foo() [ "deopt"() ]
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+ %phi = phi i64 addrspace(1)* [ %obj, %taken ], [ null, %untaken ]
+ ret i64 addrspace(1)* %phi
+}
+
+define i64 addrspace(1)* @test4(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test4
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %derived = getelementptr
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %derived.relocated =
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: %obj.relocated =
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %derived.relocated2 =
+; CHECK-NEXT: bitcast
+
+; Note: It's legal to relocate obj again, but not strictly needed
+; CHECK-NEXT: %obj.relocated3 =
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret i64 addrspace(1)* %derived.relocated2.casted
+;
+; Make sure that a phi def visited during iteration is considered a kill.
+; Also, liveness after base pointer analysis can change based on new uses,
+; not just new defs.
+ %derived = getelementptr i64, i64 addrspace(1)* %obj, i64 8
+ call void @foo() [ "deopt"() ]
+ call void @foo() [ "deopt"() ]
+ ret i64 addrspace(1)* %derived
+}
+
+declare void @consume(...) readonly "gc-leaf-function"
+
+define i64 addrspace(1)* @test5(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test5
+entry:
+ br i1 %cmp, label %taken, label %untaken
+
+taken: ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: br label %merge
+ call void @foo() [ "deopt"() ]
+ br label %merge
+
+untaken: ; preds = %entry
+; CHECK-LABEL: untaken:
+; CHECK-NEXT: br label %merge
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %.0 = phi i64 addrspace(1)*
+; CHECK-NEXT: %obj2a = phi
+; CHECK-NEXT: @consume
+; CHECK-NEXT: br label %final
+ %obj2a = phi i64 addrspace(1)* [ %obj, %taken ], [ null, %untaken ]
+ call void (...) @consume(i64 addrspace(1)* %obj2a)
+ br label %final
+
+final: ; preds = %merge
+; CHECK-LABEL: final:
+; CHECK-NEXT: @consume
+; CHECK-NEXT: ret i64 addrspace(1)* %.0
+ call void (...) @consume(i64 addrspace(1)* %obj2a)
+ ret i64 addrspace(1)* %obj
+}
+
+declare void @foo()
+
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll
new file mode 100644
index 000000000000..8f5c0ff4a710
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles < %s | FileCheck %s
+
+declare void @f()
+declare i32 @personality_function()
+
+define void @test_id() gc "statepoint-example" personality i32 ()* @personality_function {
+; CHECK-LABEL: @test_id(
+entry:
+; CHECK-LABEL: entry:
+; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 100, i32 0, void ()* @f
+ invoke void @f() "statepoint-id"="100" to label %normal_return unwind label %exceptional_return
+
+normal_return:
+ ret void
+
+exceptional_return:
+ %landing_pad4 = landingpad {i8*, i32} cleanup
+ ret void
+}
+
+define void @test_num_patch_bytes() gc "statepoint-example" personality i32 ()* @personality_function {
+; CHECK-LABEL: @test_num_patch_bytes(
+entry:
+; CHECK-LABEL: entry:
+; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 99, void ()* @f,
+ invoke void @f() "statepoint-num-patch-bytes"="99" to label %normal_return unwind label %exceptional_return
+
+normal_return:
+ ret void
+
+exceptional_return:
+ %landing_pad4 = landingpad {i8*, i32} cleanup
+ ret void
+}
+
+declare void @do_safepoint()
+define void @gc.safepoint_poll() {
+entry:
+ call void @do_safepoint()
+ ret void
+}
+
+; CHECK-NOT: statepoint-id
+; CHECK-NOT: statepoint-num-patch_bytes
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll
new file mode 100644
index 000000000000..e0bd542aa5d5
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll
@@ -0,0 +1,62 @@
+; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S < %s | FileCheck %s
+
+; Test to make sure we destroy LCSSA's single entry phi nodes before
+; running liveness
+
+declare void @consume(...) "gc-leaf-function"
+
+define void @test6(i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test6
+entry:
+ br label %next
+
+next: ; preds = %entry
+; CHECK-LABEL: next:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
+; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
+; Need to delete unreachable gc.statepoint call
+ %obj2 = phi i64 addrspace(1)* [ %obj, %entry ]
+ call void @foo() [ "deopt"() ]
+ call void (...) @consume(i64 addrspace(1)* %obj2)
+ call void (...) @consume(i64 addrspace(1)* %obj)
+ ret void
+}
+
+define void @test7() gc "statepoint-example" {
+; CHECK-LABEL: test7
+; CHECK-NOT: gc.statepoint
+; Need to delete unreachable gc.statepoint invoke - tested seperately given
+; a correct implementation could only remove the instructions, not the block
+ ret void
+
+unreached: ; preds = %unreached
+ %obj = phi i64 addrspace(1)* [ null, %unreached ]
+ call void @foo() [ "deopt"() ]
+ call void (...) @consume(i64 addrspace(1)* %obj)
+ br label %unreached
+}
+
+define void @test8() gc "statepoint-example" personality i32 ()* undef {
+; CHECK-LABEL: test8
+; CHECK-NOT: gc.statepoint
+; Bound the last check-not
+ ret void
+
+unreached: ; No predecessors!
+ invoke void @foo() [ "deopt"() ]
+; CHECK-LABEL: @foo
+ to label %normal_return unwind label %exceptional_return
+
+normal_return: ; preds = %unreached
+ ret void
+
+exceptional_return: ; preds = %unreached
+ %landing_pad4 = landingpad { i8*, i32 }
+ cleanup
+ ret void
+}
+
+declare void @foo()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll
new file mode 100644
index 000000000000..688cf36168d4
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll
@@ -0,0 +1,32 @@
+
+;; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -verify -S < %s | FileCheck %s
+;; This test is to verify that RewriteStatepointsForGC correctly relocates values
+;; defined by invoke instruction results.
+
+declare i64* addrspace(1)* @non_gc_call() "gc-leaf-function"
+
+declare void @gc_call()
+
+declare i32* @fake_personality_function()
+
+define i64* addrspace(1)* @test() gc "statepoint-example" personality i32* ()* @fake_personality_function {
+; CHECK-LABEL: @test(
+
+entry:
+ %obj = invoke i64* addrspace(1)* @non_gc_call()
+ to label %normal_dest unwind label %unwind_dest
+
+unwind_dest: ; preds = %entry
+ %lpad = landingpad { i8*, i32 }
+ cleanup
+ resume { i8*, i32 } undef
+
+normal_dest: ; preds = %entry
+; CHECK: normal_dest:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+
+ call void @gc_call() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64* addrspace(1)* %obj
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll
new file mode 100644
index 000000000000..584dc32b7529
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll
@@ -0,0 +1,279 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-rematerialization-threshold=0 -S 2>&1 | FileCheck %s
+
+
+declare void @foo()
+
+declare void @use(...) "gc-leaf-function"
+
+define i64 addrspace(1)* @test1(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2, i1 %condition) gc "statepoint-example" {
+; CHECK-LABEL: @test1
+; CHECK-DAG: %obj.relocated
+; CHECK-DAG: %obj2.relocated
+entry:
+ call void @foo() [ "deopt"() ]
+ br label %joint
+
+joint: ; preds = %joint2, %entry
+; CHECK-LABEL: joint:
+; CHECK: %phi1 = phi i64 addrspace(1)* [ %obj.relocated.casted, %entry ], [ %obj3, %joint2 ]
+ %phi1 = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj3, %joint2 ]
+ br i1 %condition, label %use, label %joint2
+
+use: ; preds = %joint
+ br label %joint2
+
+joint2: ; preds = %use, %joint
+; CHECK-LABEL: joint2:
+; CHECK: %phi2 = phi i64 addrspace(1)* [ %obj.relocated.casted, %use ], [ %obj2.relocated.casted, %joint ]
+; CHECK: %obj3 = getelementptr i64, i64 addrspace(1)* %obj2.relocated.casted, i32 1
+ %phi2 = phi i64 addrspace(1)* [ %obj, %use ], [ %obj2, %joint ]
+ %obj3 = getelementptr i64, i64 addrspace(1)* %obj2, i32 1
+ br label %joint
+}
+
+declare i64 addrspace(1)* @generate_obj() "gc-leaf-function"
+
+declare void @consume_obj(i64 addrspace(1)*) "gc-leaf-function"
+
+declare i1 @rt() "gc-leaf-function"
+
+define void @test2() gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+ %obj_init = call i64 addrspace(1)* @generate_obj()
+ %obj = getelementptr i64, i64 addrspace(1)* %obj_init, i32 42
+ br label %loop
+
+loop: ; preds = %loop.backedge, %entry
+; CHECK: loop:
+; CHECK-DAG: [ %obj_init.relocated.casted, %loop.backedge ]
+; CHECK-DAG: [ %obj_init, %entry ]
+; CHECK-DAG: [ %obj.relocated.casted, %loop.backedge ]
+; CHECK-DAG: [ %obj, %entry ]
+; CHECK-NOT: %location = getelementptr i64, i64 addrspace(1)* %obj, i32 %index
+ %index = phi i32 [ 0, %entry ], [ %index.inc, %loop.backedge ]
+ %location = getelementptr i64, i64 addrspace(1)* %obj, i32 %index
+ call void @consume_obj(i64 addrspace(1)* %location)
+ %index.inc = add i32 %index, 1
+ %condition = call i1 @rt()
+ br i1 %condition, label %loop_x, label %loop_y
+
+loop_x: ; preds = %loop
+ br label %loop.backedge
+
+loop.backedge: ; preds = %loop_y, %loop_x
+ call void @do_safepoint() [ "deopt"() ]
+ br label %loop
+
+loop_y: ; preds = %loop
+ br label %loop.backedge
+}
+
+declare void @some_call(i8 addrspace(1)*) "gc-leaf-function"
+
+define void @relocate_merge(i1 %cnd, i8 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: @relocate_merge
+
+bci_0:
+ br i1 %cnd, label %if_branch, label %else_branch
+
+if_branch: ; preds = %bci_0
+; CHECK-LABEL: if_branch:
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+ call void @foo() [ "deopt"() ]
+ br label %join
+
+else_branch: ; preds = %bci_0
+; CHECK-LABEL: else_branch:
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; We need to end up with a single relocation phi updated from both paths
+ call void @foo() [ "deopt"() ]
+ br label %join
+
+join: ; preds = %else_branch, %if_branch
+; CHECK-LABEL: join:
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: [ %arg.relocated, %if_branch ]
+; CHECK-DAG: [ %arg.relocated2, %else_branch ]
+; CHECK-NOT: phi
+ call void @some_call(i8 addrspace(1)* %arg)
+ ret void
+}
+
+; Make sure a use in a statepoint gets properly relocated at a previous one.
+; This is basically just making sure that statepoints aren't accidentally
+; treated specially.
+define void @test3(i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+; CHECK: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.statepoint
+entry:
+ call void undef(i64 undef) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ %0 = call i32 undef(i64 addrspace(1)* %obj) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret void
+}
+
+; Check specifically for the case where the result of a statepoint needs to
+; be relocated itself
+define void @test4() gc "statepoint-example" {
+; CHECK-LABEL: @test4
+; CHECK: gc.statepoint
+; CHECK: gc.result
+; CHECK: gc.statepoint
+; CHECK: [[RELOCATED:%[^ ]+]] = call {{.*}}gc.relocate
+; CHECK: @use(i8 addrspace(1)* [[RELOCATED]])
+ %1 = call i8 addrspace(1)* undef() [ "deopt"() ]
+ %2 = call i8 addrspace(1)* undef() [ "deopt"() ]
+ call void (...) @use(i8 addrspace(1)* %1)
+ unreachable
+}
+
+; Test updating a phi where not all inputs are live to begin with
+define void @test5(i8 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: test5
+entry:
+ %0 = call i8 addrspace(1)* undef() [ "deopt"() ]
+ switch i32 undef, label %kill [
+ i32 10, label %merge
+ i32 13, label %merge
+ ]
+
+kill: ; preds = %entry
+ br label %merge
+
+merge: ; preds = %kill, %entry, %entry
+; CHECK: merge:
+; CHECK: %test = phi i8 addrspace(1)
+; CHECK-DAG: [ null, %kill ]
+; CHECK-DAG: [ %arg.relocated, %entry ]
+; CHECK-DAG: [ %arg.relocated, %entry ]
+ %test = phi i8 addrspace(1)* [ null, %kill ], [ %arg, %entry ], [ %arg, %entry ]
+ call void (...) @use(i8 addrspace(1)* %test)
+ unreachable
+}
+
+; Check to make sure we handle values live over an entry statepoint
+define void @test6(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3) gc "statepoint-example" {
+; CHECK-LABEL: @test6
+entry:
+ br i1 undef, label %gc.safepoint_poll.exit2, label %do_safepoint
+
+do_safepoint: ; preds = %entry
+; CHECK-LABEL: do_safepoint:
+; CHECK: gc.statepoint
+; CHECK: arg1.relocated =
+; CHECK: arg2.relocated =
+; CHECK: arg3.relocated =
+ call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3) ]
+ br label %gc.safepoint_poll.exit2
+
+gc.safepoint_poll.exit2: ; preds = %do_safepoint, %entry
+; CHECK-LABEL: gc.safepoint_poll.exit2:
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: [ %arg3, %entry ]
+; CHECK-DAG: [ %arg3.relocated, %do_safepoint ]
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: [ %arg2, %entry ]
+; CHECK-DAG: [ %arg2.relocated, %do_safepoint ]
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: [ %arg1, %entry ]
+; CHECK-DAG: [ %arg1.relocated, %do_safepoint ]
+ call void (...) @use(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
+ ret void
+}
+
+; Check relocation in a loop nest where a relocation happens in the outer
+; but not the inner loop
+define void @test_outer_loop(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i1 %cmp) gc "statepoint-example" {
+; CHECK-LABEL: @test_outer_loop
+
+bci_0:
+ br label %outer-loop
+
+outer-loop: ; preds = %outer-inc, %bci_0
+; CHECK-LABEL: outer-loop:
+; CHECK: phi i8 addrspace(1)* [ %arg2, %bci_0 ], [ %arg2.relocated, %outer-inc ]
+; CHECK: phi i8 addrspace(1)* [ %arg1, %bci_0 ], [ %arg1.relocated, %outer-inc ]
+ br label %inner-loop
+
+inner-loop: ; preds = %inner-loop, %outer-loop
+ br i1 %cmp, label %inner-loop, label %outer-inc
+
+outer-inc: ; preds = %inner-loop
+; CHECK-LABEL: outer-inc:
+; CHECK: %arg1.relocated
+; CHECK: %arg2.relocated
+ call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2) ]
+ br label %outer-loop
+}
+
+; Check that both inner and outer loops get phis when relocation is in
+; inner loop
+define void @test_inner_loop(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i1 %cmp) gc "statepoint-example" {
+; CHECK-LABEL: @test_inner_loop
+
+bci_0:
+ br label %outer-loop
+
+outer-loop: ; preds = %outer-inc, %bci_0
+; CHECK-LABEL: outer-loop:
+; CHECK: phi i8 addrspace(1)* [ %arg2, %bci_0 ], [ %arg2.relocated, %outer-inc ]
+; CHECK: phi i8 addrspace(1)* [ %arg1, %bci_0 ], [ %arg1.relocated, %outer-inc ]
+ br label %inner-loop
+; CHECK-LABEL: inner-loop
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: %outer-loop ]
+; CHECK-DAG: [ %arg2.relocated, %inner-loop ]
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: %outer-loop ]
+; CHECK-DAG: [ %arg1.relocated, %inner-loop ]
+; CHECK: gc.statepoint
+; CHECK: %arg1.relocated
+; CHECK: %arg2.relocated
+
+inner-loop: ; preds = %inner-loop, %outer-loop
+ call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2) ]
+ br i1 %cmp, label %inner-loop, label %outer-inc
+
+outer-inc: ; preds = %inner-loop
+; CHECK-LABEL: outer-inc:
+; This test shows why updating just those uses of the original value being
+; relocated dominated by the inserted relocation is not always sufficient.
+ br label %outer-loop
+}
+
+define i64 addrspace(1)* @test7(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2, i1 %condition) gc "statepoint-example" {
+; CHECK-LABEL: @test7
+entry:
+ br i1 %condition, label %branch2, label %join
+
+branch2: ; preds = %entry
+ br i1 %condition, label %callbb, label %join2
+
+callbb: ; preds = %branch2
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %join
+
+join: ; preds = %callbb, %entry
+; CHECK-LABEL: join:
+; CHECK: phi i64 addrspace(1)* [ %obj.relocated.casted, %callbb ], [ %obj, %entry ]
+; CHECK: phi i64 addrspace(1)*
+; CHECK-DAG: [ %obj, %entry ]
+; CHECK-DAG: [ %obj2.relocated.casted, %callbb ]
+ %phi1 = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %callbb ]
+ br label %join2
+
+join2: ; preds = %join, %branch2
+; CHECK-LABEL: join2:
+; CHECK: phi2 = phi i64 addrspace(1)*
+; CHECK-DAG: %join ]
+; CHECK-DAG: [ %obj2, %branch2 ]
+ %phi2 = phi i64 addrspace(1)* [ %obj, %join ], [ %obj2, %branch2 ]
+ ret i64 addrspace(1)* %phi2
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll
new file mode 100644
index 000000000000..0020c5116c13
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll
@@ -0,0 +1,150 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S 2>&1 | FileCheck %s
+
+
+declare void @use_obj16(i16 addrspace(1)*) "gc-leaf-function"
+declare void @use_obj32(i32 addrspace(1)*) "gc-leaf-function"
+declare void @use_obj64(i64 addrspace(1)*) "gc-leaf-function"
+
+declare void @do_safepoint()
+
+define void @test_gep_const(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_gep_const
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %base)
+ call void @use_obj32(i32 addrspace(1)* %ptr)
+ ret void
+}
+
+define void @test_gep_idx(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" {
+; CHECK-LABEL: test_gep_idx
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 %idx
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %base)
+ call void @use_obj32(i32 addrspace(1)* %ptr)
+ ret void
+}
+
+define void @test_bitcast(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_bitcast
+entry:
+ %ptr = bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %base)
+ call void @use_obj64(i64 addrspace(1)* %ptr)
+ ret void
+}
+
+define void @test_bitcast_gep(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_bitcast_gep
+entry:
+ %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %base)
+ call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+ ret void
+}
+
+define void @test_intersecting_chains(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" {
+; CHECK-LABEL: test_intersecting_chains
+entry:
+ %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
+ %ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+ call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
+ ret void
+}
+
+define void @test_cost_threshold(i32 addrspace(1)* %base, i32 %idx1, i32 %idx2, i32 %idx3) gc "statepoint-example" {
+; CHECK-LABEL: test_cost_threshold
+entry:
+ %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ %ptr.gep2 = getelementptr i32, i32 addrspace(1)* %ptr.gep, i32 %idx1
+ %ptr.gep3 = getelementptr i32, i32 addrspace(1)* %ptr.gep2, i32 %idx2
+ %ptr.gep4 = getelementptr i32, i32 addrspace(1)* %ptr.gep3, i32 %idx3
+ %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep4 to i64 addrspace(1)*
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+ ret void
+}
+
+define void @test_two_derived(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_two_derived
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ %ptr2 = getelementptr i32, i32 addrspace(1)* %base, i32 12
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %ptr)
+ call void @use_obj32(i32 addrspace(1)* %ptr2)
+ ret void
+}
+
+define void @test_gep_smallint_array([3 x i32] addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_gep_smallint_array
+entry:
+ %ptr = getelementptr [3 x i32], [3 x i32] addrspace(1)* %base, i32 0, i32 2
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %ptr)
+ ret void
+}
+
+declare i32 @fake_personality_function()
+
+define void @test_invoke(i32 addrspace(1)* %base) gc "statepoint-example" personality i32 ()* @fake_personality_function {
+; CHECK-LABEL: test_invoke
+entry:
+ %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
+ %ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
+ invoke void @do_safepoint() [ "deopt"() ]
+ to label %normal unwind label %exception
+
+normal: ; preds = %entry
+ call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+ call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
+ ret void
+
+exception: ; preds = %entry
+ %landing_pad4 = landingpad token
+ cleanup
+ call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+ call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
+ ret void
+}
+
+define void @test_loop(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_loop
+entry:
+ %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ call void @use_obj32(i32 addrspace(1)* %ptr.gep)
+ call void @do_safepoint() [ "deopt"() ]
+ br label %loop
+}
+
+define void @test_too_long(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_too_long
+entry:
+ %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ %ptr.gep1 = getelementptr i32, i32 addrspace(1)* %ptr.gep, i32 15
+ %ptr.gep2 = getelementptr i32, i32 addrspace(1)* %ptr.gep1, i32 15
+ %ptr.gep3 = getelementptr i32, i32 addrspace(1)* %ptr.gep2, i32 15
+ %ptr.gep4 = getelementptr i32, i32 addrspace(1)* %ptr.gep3, i32 15
+ %ptr.gep5 = getelementptr i32, i32 addrspace(1)* %ptr.gep4, i32 15
+ %ptr.gep6 = getelementptr i32, i32 addrspace(1)* %ptr.gep5, i32 15
+ %ptr.gep7 = getelementptr i32, i32 addrspace(1)* %ptr.gep6, i32 15
+ %ptr.gep8 = getelementptr i32, i32 addrspace(1)* %ptr.gep7, i32 15
+ %ptr.gep9 = getelementptr i32, i32 addrspace(1)* %ptr.gep8, i32 15
+ %ptr.gep10 = getelementptr i32, i32 addrspace(1)* %ptr.gep9, i32 15
+ %ptr.gep11 = getelementptr i32, i32 addrspace(1)* %ptr.gep10, i32 15
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %ptr.gep11)
+ ret void
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll
new file mode 100644
index 000000000000..e1d0140c1dcd
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll
@@ -0,0 +1,32 @@
+; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -verify -S < %s | FileCheck %s
+
+declare i8 addrspace(1)* @gc_call()
+
+declare i32* @fake_personality_function()
+
+define i8 addrspace(1)* @test(i1 %c) gc "statepoint-example" personality i32* ()* @fake_personality_function {
+; CHECK-LABEL: @test(
+entry:
+ br i1 %c, label %gc_invoke, label %normal_dest
+
+gc_invoke:
+; CHECK: [[TOKEN:%[^ ]+]] = invoke token {{[^@]+}}@llvm.experimental.gc.statepoint{{[^@]+}}@gc_call
+ %obj = invoke i8 addrspace(1)* @gc_call() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ to label %normal_dest unwind label %unwind_dest
+
+unwind_dest:
+; CHECK: unwind_dest:
+ %lpad = landingpad { i8*, i32 }
+ cleanup
+ resume { i8*, i32 } undef
+
+; CHECK: [[NORMAL_DEST_SPLIT:[^:]+:]]
+; CHECK-NEXT: [[RET_VAL:%[^ ]+]] = call i8 addrspace(1)* @llvm.experimental.gc.result.p1i8(token [[TOKEN]])
+; CHECK-NEXT: br label %normal_dest
+
+normal_dest:
+; CHECK: normal_dest:
+; CHECK-NEXT: %merge = phi i8 addrspace(1)* [ null, %entry ], [ %obj2, %normal_dest1 ]
+ %merge = phi i8 addrspace(1)* [ null, %entry ], [ %obj, %gc_invoke ]
+ ret i8 addrspace(1)* %merge
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
index 5913db21fcf3..b4954f6a9b60 100644
--- a/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
+++ b/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
@@ -5,18 +5,20 @@ declare i8 addrspace(1)* @some_function()
declare void @some_function_consumer(i8 addrspace(1)*)
declare dereferenceable(4) i8 addrspace(1)* @some_function_ret_deref()
; CHECK: declare i8 addrspace(1)* @some_function_ret_deref()
+declare noalias i8 addrspace(1)* @some_function_ret_noalias()
+; CHECK: declare i8 addrspace(1)* @some_function_ret_noalias()
define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* dereferenceable(4) %a) gc "statepoint-example" {
; CHECK: define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* %a)
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %a
}
define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* dereferenceable_or_null(4) %a) gc "statepoint-example" {
; CHECK: define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* %a)
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %a
}
@@ -25,7 +27,7 @@ define i8 addrspace(1)* @test_deref_retval() gc "statepoint-example" {
entry:
%a = call dereferenceable(4) i8 addrspace(1)* @some_function()
; CHECK: %a = call i8 addrspace(1)* @some_function()
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %a
}
@@ -34,7 +36,7 @@ define i8 addrspace(1)* @test_deref_or_null_retval() gc "statepoint-example" {
entry:
%a = call dereferenceable_or_null(4) i8 addrspace(1)* @some_function()
; CHECK: %a = call i8 addrspace(1)* @some_function()
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %a
}
@@ -43,7 +45,7 @@ define i8 @test_md(i8 addrspace(1)* %ptr) gc "statepoint-example" {
entry:
; CHECK: %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
%tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 %tmp
}
@@ -53,7 +55,7 @@ entry:
; No change here, but the prototype of some_function_ret_deref should have changed.
; CHECK: call i8 addrspace(1)* @some_function_ret_deref()
%a = call i8 addrspace(1)* @some_function_ret_deref()
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %a
}
@@ -62,11 +64,46 @@ define i8 addrspace(1)* @test_callsite_arg_attribute(i8 addrspace(1)* %ptr) gc "
entry:
; CHECK: call void @some_function_consumer(i8 addrspace(1)* %ptr)
call void @some_function_consumer(i8 addrspace(1)* dereferenceable(4) %ptr)
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %ptr
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* noalias %a) gc "statepoint-example" {
+; CHECK: define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* %a)
+entry:
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_noalias_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_noalias_retval(
+entry:
+ %a = call noalias i8 addrspace(1)* @some_function()
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_decl_only_noalias(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_decl_only_noalias(
+entry:
+; No change here, but the prototype of some_function_ret_noalias should have changed.
+; CHECK: call i8 addrspace(1)* @some_function_ret_noalias()
+ %a = call i8 addrspace(1)* @some_function_ret_noalias()
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_callsite_arg_noalias(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_callsite_arg_noalias(
+entry:
+; CHECK: call void @some_function_consumer(i8 addrspace(1)* %ptr)
+ call void @some_function_consumer(i8 addrspace(1)* noalias %ptr)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %ptr
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
!0 = !{!1, !1, i64 0, i64 1}
!1 = !{!"red", !2}
diff --git a/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll b/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll
index 355ffa634f3c..3cd4bc65d1a5 100644
--- a/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll
+++ b/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll
@@ -8,12 +8,12 @@
declare void @foo()
declare void @use(...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
define void @test1(<2 x i32 addrspace(1)*> addrspace(1)* %obj) gc "statepoint-example" {
entry:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
-; CHECK: %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %safepoint_token, i32 7, i32 7)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+; CHECK: %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7)
; CHECK-NEXT: %obj.relocated.casted = bitcast i8 addrspace(1)* %obj.relocated to <2 x i32 addrspace(1)*> addrspace(1)*
call void (...) @use(<2 x i32 addrspace(1)*> addrspace(1)* %obj)
ret void
diff --git a/test/Transforms/RewriteStatepointsForGC/live-vector.ll b/test/Transforms/RewriteStatepointsForGC/live-vector.ll
index 26ad73737adc..584fd7add1b6 100644
--- a/test/Transforms/RewriteStatepointsForGC/live-vector.ll
+++ b/test/Transforms/RewriteStatepointsForGC/live-vector.ll
@@ -10,7 +10,7 @@ define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-NEXT: bitcast
; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
entry:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %obj
}
@@ -28,7 +28,7 @@ define <2 x i64 addrspace(1)*> @test2(<2 x i64 addrspace(1)*> %obj) gc "statepoi
; CHECK-NEXT: insertelement
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
entry:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
ret <2 x i64 addrspace(1)*> %obj
}
@@ -48,7 +48,7 @@ define <2 x i64 addrspace(1)*> @test3(<2 x i64 addrspace(1)*>* %ptr) gc "statepo
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
entry:
%obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
ret <2 x i64 addrspace(1)*> %obj
}
@@ -63,7 +63,7 @@ define <2 x i64 addrspace(1)*> @test4(<2 x i64 addrspace(1)*>* %ptr) gc "statepo
; CHECK-NEXT: gc.statepoint
entry:
%obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
to label %normal_return unwind label %exceptional_return
; CHECK-LABEL: normal_return:
@@ -86,7 +86,7 @@ normal_return: ; preds = %entry
; CHECK-NEXT: insertelement
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %14
exceptional_return: ; preds = %entry
- %landing_pad4 = landingpad { i8*, i32 }
+ %landing_pad4 = landingpad token
cleanup
ret <2 x i64 addrspace(1)*> %obj
}
@@ -110,7 +110,7 @@ define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p)
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
entry:
%vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %p, i32 0
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
ret <2 x i64 addrspace(1)*> %vec
}
@@ -121,9 +121,6 @@ define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr)
; CHECK-LABEL: test6
; CHECK-LABEL: merge:
; CHECK-NEXT: = phi
-; CHECK-NEXT: = phi
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: gc.statepoint
@@ -131,12 +128,6 @@ define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr)
; CHECK-NEXT: bitcast
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
@@ -151,11 +142,11 @@ untaken:
merge:
%obj = phi <2 x i64 addrspace(1)*> [%obja, %taken], [%objb, %untaken]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
ret <2 x i64 addrspace(1)*> %obj
}
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll b/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
index 6bc4d5324494..207003c17b5f 100644
--- a/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
+++ b/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
@@ -16,7 +16,7 @@ taken:
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
; CHECK-NEXT: bitcast
; CHECK-NEXT: br label %merge
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %merge
untaken:
@@ -25,7 +25,7 @@ untaken:
; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
; CHECK-NEXT: bitcast
; CHECK-NEXT: br label %merge
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %merge
merge:
@@ -42,7 +42,7 @@ entry:
; CHECK-LABEL: entry:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: br
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br i1 %cmp, label %taken, label %untaken
taken:
@@ -54,7 +54,7 @@ taken:
; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
%obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %obj
untaken:
@@ -76,16 +76,16 @@ taken:
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
; CHECK-NEXT: bitcast
; CHECK-NEXT: br label %merge
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
%obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %merge
untaken:
; CHECK-LABEL: taken:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: br label %merge
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %merge
merge:
@@ -114,9 +114,9 @@ entry:
; CHECK-NEXT: ret i64 addrspace(1)* %derived.relocated1.casted
;
%derived = getelementptr i64, i64 addrspace(1)* %obj, i64 8
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %derived
}
@@ -136,7 +136,7 @@ taken:
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
; CHECK-NEXT: bitcast
; CHECK-NEXT: br label %merge
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %merge
untaken:
@@ -163,4 +163,4 @@ final:
declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/preprocess.ll b/test/Transforms/RewriteStatepointsForGC/preprocess.ll
index 012fff5c9e19..e1657497485b 100644
--- a/test/Transforms/RewriteStatepointsForGC/preprocess.ll
+++ b/test/Transforms/RewriteStatepointsForGC/preprocess.ll
@@ -17,7 +17,7 @@ next:
; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
%obj2 = phi i64 addrspace(1)* [ %obj, %entry ]
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
call void (...) @consume(i64 addrspace(1)* %obj2)
call void (...) @consume(i64 addrspace(1)* %obj)
ret void
@@ -33,7 +33,7 @@ define void @test7() gc "statepoint-example" {
unreached:
%obj = phi i64 addrspace(1)* [null, %unreached]
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
call void (...) @consume(i64 addrspace(1)* %obj)
br label %unreached
}
@@ -46,7 +46,7 @@ define void @test8() gc "statepoint-example" personality i32 ()* undef {
ret void
unreached:
- invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
to label %normal_return unwind label %exceptional_return
normal_return: ; preds = %entry
@@ -62,4 +62,4 @@ declare void @foo()
; Bound the last check-not
; CHECK-LABEL: @foo
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll b/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
index 1a5289b26656..d11441e9346f 100644
--- a/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
+++ b/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
@@ -25,9 +25,9 @@ normal_dest:
;; CHECK-NEXT: gc.statepoint
;; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
;; CHECK-NEXT: bitcast
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @gc_call, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @gc_call, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i64* addrspace(1)* %obj
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/relocation.ll b/test/Transforms/RewriteStatepointsForGC/relocation.ll
index d7a84e5820c8..deea377c5a28 100644
--- a/test/Transforms/RewriteStatepointsForGC/relocation.ll
+++ b/test/Transforms/RewriteStatepointsForGC/relocation.ll
@@ -9,7 +9,7 @@ entry:
; CHECK-LABEL: @test1
; CHECK-DAG: %obj.relocated
; CHECK-DAG: %obj2.relocated
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %joint
joint:
@@ -61,7 +61,7 @@ loop_x:
br label %loop.backedge
loop.backedge:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
br label %loop
loop_y:
@@ -79,14 +79,14 @@ if_branch:
; CHECK-LABEL: if_branch:
; CHECK: gc.statepoint
; CHECK: gc.relocate
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %join
else_branch:
; CHECK-LABEL: else_branch:
; CHECK: gc.statepoint
; CHECK: gc.relocate
- %safepoint_token1 = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %join
join:
@@ -110,8 +110,8 @@ entry:
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: gc.statepoint
- %safepoint_token = call i32 (i64, i32, void (i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64 0, i32 0, void (i64)* undef, i32 1, i32 0, i64 undef, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- %safepoint_token1 = call i32 (i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64 0, i32 0, i32 (i64 addrspace(1)*)* undef, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void (i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64 0, i32 0, void (i64)* undef, i32 1, i32 0, i64 undef, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token1 = call token (i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64 0, i32 0, i32 (i64 addrspace(1)*)* undef, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret void
}
@@ -124,9 +124,9 @@ define void @test4() gc "statepoint-example" {
; CHECK: gc.statepoint
; CHECK: gc.relocate
; CHECK: @use(i8 addrspace(1)* %res.relocated)
- %safepoint_token2 = tail call i32 (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
- %res = call i8 addrspace(1)* @llvm.experimental.gc.result.ptr.p1i8(i32 %safepoint_token2)
- call i32 (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token2 = tail call token (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
+ %res = call i8 addrspace(1)* @llvm.experimental.gc.result.p1i8(token %safepoint_token2)
+ call token (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
call void (...) @use(i8 addrspace(1)* %res)
unreachable
}
@@ -136,7 +136,7 @@ define void @test4() gc "statepoint-example" {
define void @test5(i8 addrspace(1)* %arg) gc "statepoint-example" {
; CHECK-LABEL: test5
entry:
- call i32 (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
switch i32 undef, label %kill [
i32 10, label %merge
i32 13, label %merge
@@ -170,7 +170,7 @@ do_safepoint:
; CHECK: arg1.relocated =
; CHECK: arg2.relocated =
; CHECK: arg3.relocated =
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 3, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 3, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
br label %gc.safepoint_poll.exit2
gc.safepoint_poll.exit2:
@@ -209,7 +209,7 @@ outer-inc:
; CHECK-LABEL: outer-inc:
; CHECK: %arg1.relocated
; CHECK: %arg2.relocated
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
br label %outer-loop
}
@@ -232,13 +232,13 @@ inner-loop:
; CHECK: phi i8 addrspace(1)*
; CHECK-DAG: %outer-loop ]
; CHECK-DAG: [ %arg2.relocated, %inner-loop ]
-; CHECKL phi i8 addrspace(1)*
+; CHECK: phi i8 addrspace(1)*
; CHECK-DAG: %outer-loop ]
; CHECK-DAG: [ %arg1.relocated, %inner-loop ]
; CHECK: gc.statepoint
; CHECK: %arg1.relocated
; CHECK: %arg2.relocated
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
br i1 %cmp, label %inner-loop, label %outer-inc
outer-inc:
@@ -258,7 +258,7 @@ branch2:
br i1 %condition, label %callbb, label %join2
callbb:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %join
join:
@@ -285,11 +285,11 @@ join2:
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_p1i8f(i64, i32, i8 addrspace(1)* ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64, i32, void (i64)*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...)
-declare i8 addrspace(1)* @llvm.experimental.gc.result.ptr.p1i8(i32) #3
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_p1i8f(i64, i32, i8 addrspace(1)* ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64, i32, void (i64)*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...)
+declare i8 addrspace(1)* @llvm.experimental.gc.result.p1i8(token) #3
diff --git a/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll b/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll
index f04e7c797cad..445ab7bd768d 100644
--- a/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll
+++ b/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll
@@ -10,8 +10,8 @@ define void @"test_gep_const"(i32 addrspace(1)* %base) gc "statepoint-example" {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15
; CHECK: getelementptr i32, i32 addrspace(1)* %base, i32 15
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %sp, i32 7, i32 7)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
; CHECK: bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
; CHECK: getelementptr i32, i32 addrspace(1)* %base.relocated.casted, i32 15
call void @use_obj32(i32 addrspace(1)* %base)
@@ -24,8 +24,8 @@ define void @"test_gep_idx"(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-ex
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %base, i32 %idx
; CHECK: getelementptr
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %sp, i32 7, i32 7)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
; CHECK: getelementptr i32, i32 addrspace(1)* %base.relocated.casted, i32 %idx
call void @use_obj32(i32 addrspace(1)* %base)
@@ -38,8 +38,8 @@ define void @"test_bitcast"(i32 addrspace(1)* %base) gc "statepoint-example" {
entry:
%ptr = bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
; CHECK: bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %sp, i32 7, i32 7)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
; CHECK: bitcast i32 addrspace(1)* %base.relocated.casted to i64 addrspace(1)*
call void @use_obj32(i32 addrspace(1)* %base)
@@ -47,6 +47,40 @@ entry:
ret void
}
+define void @"test_bitcast_bitcast"(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_bitcast_bitcast
+entry:
+ %ptr1 = bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
+ %ptr2 = bitcast i64 addrspace(1)* %ptr1 to i16 addrspace(1)*
+ ; CHECK: bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
+ ; CHECK: bitcast i64 addrspace(1)* %ptr1 to i16 addrspace(1)*
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
+ ; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
+ ; CHECK: bitcast i32 addrspace(1)* %base.relocated.casted to i64 addrspace(1)*
+ ; CHECK: bitcast i64 addrspace(1)* %ptr1.remat to i16 addrspace(1)*
+ call void @use_obj32(i32 addrspace(1)* %base)
+ call void @use_obj16(i16 addrspace(1)* %ptr2)
+ ret void
+}
+
+define void @"test_addrspacecast_addrspacecast"(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_addrspacecast_addrspacecast
+entry:
+ %ptr1 = addrspacecast i32 addrspace(1)* %base to i32*
+ %ptr2 = addrspacecast i32* %ptr1 to i32 addrspace(1)*
+ ; CHECK: addrspacecast i32 addrspace(1)* %base to i32*
+ ; CHECK: addrspacecast i32* %ptr1 to i32 addrspace(1)*
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
+ ; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
+ ; CHECK: %ptr2.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 8)
+ ; CHECK: %ptr2.relocated.casted = bitcast i8 addrspace(1)* %ptr2.relocated to i32 addrspace(1)*
+ call void @use_obj32(i32 addrspace(1)* %base)
+ call void @use_obj32(i32 addrspace(1)* %ptr2)
+ ret void
+}
+
define void @"test_bitcast_gep"(i32 addrspace(1)* %base) gc "statepoint-example" {
; CHECK-LABEL: test_bitcast_gep
entry:
@@ -54,7 +88,7 @@ entry:
; CHECK: getelementptr
%ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
; CHECK: bitcast
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: gc.relocate
; CHECK: bitcast
; CHECK: getelementptr
@@ -73,7 +107,7 @@ entry:
; CHECK: bitcast
%ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
; CHECK: bitcast
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: getelementptr
; CHECK: bitcast
; CHECK: getelementptr
@@ -96,7 +130,7 @@ entry:
; CHECK: getelementptr
%ptr.cast = bitcast i32 addrspace(1)* %ptr.gep4 to i64 addrspace(1)*
; CHECK: bitcast
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: gc.relocate
; CHECK: bitcast
; CHECK: gc.relocate
@@ -112,7 +146,7 @@ entry:
%ptr2 = getelementptr i32, i32 addrspace(1)* %base, i32 12
; CHECK: getelementptr
; CHECK: getelementptr
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: gc.relocate
; CHECK: bitcast
; CHECK: getelementptr
@@ -127,7 +161,7 @@ define void @"test_gep_smallint_array"([3 x i32] addrspace(1)* %base) gc "statep
entry:
%ptr = getelementptr [3 x i32], [3 x i32] addrspace(1)* %base, i32 0, i32 2
; CHECK: getelementptr
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: gc.relocate
; CHECK: bitcast
; CHECK: getelementptr
@@ -146,7 +180,7 @@ entry:
; CHECK: bitcast
%ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
; CHECK: bitcast
- %sp = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
to label %normal unwind label %exception
normal:
@@ -163,7 +197,7 @@ normal:
exception:
; CHECK-LABEL: exception:
- %landing_pad4 = landingpad { i8*, i32 }
+ %landing_pad4 = landingpad token
cleanup
; CHECK: gc.relocate
; CHECK: bitcast
@@ -187,7 +221,7 @@ loop:
; CHECK: phi i32 addrspace(1)* [ %ptr.gep, %entry ], [ %ptr.gep.remat, %loop ]
; CHECK: phi i32 addrspace(1)* [ %base, %entry ], [ %base.relocated.casted, %loop ]
call void @use_obj32(i32 addrspace(1)* %ptr.gep)
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: gc.relocate
; CHECK: bitcast
; CHECK: getelementptr
@@ -209,7 +243,7 @@ entry:
%ptr.gep9 = getelementptr i32, i32 addrspace(1)* %ptr.gep8, i32 15
%ptr.gep10 = getelementptr i32, i32 addrspace(1)* %ptr.gep9, i32 15
%ptr.gep11 = getelementptr i32, i32 addrspace(1)* %ptr.gep10, i32 15
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: gc.relocate
; CHECK: bitcast
; CHECK: gc.relocate
@@ -219,4 +253,4 @@ entry:
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/SCCP/global-alias-constprop.ll b/test/Transforms/SCCP/global-alias-constprop.ll
new file mode 100644
index 000000000000..be7e083e6a67
--- /dev/null
+++ b/test/Transforms/SCCP/global-alias-constprop.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -sccp -S | FileCheck %s
+
+@0 = private unnamed_addr constant [2 x i32] [i32 -1, i32 1]
+@"\01??_7A@@6B@" = unnamed_addr alias i32, getelementptr inbounds ([2 x i32], [2 x i32]* @0, i32 0, i32 1)
+
+; CHECK: ret i32 1
+
+define i32 @main() {
+ %a = load i32, i32* @"\01??_7A@@6B@"
+ ret i32 %a
+}
diff --git a/test/Transforms/SLPVectorizer/AArch64/commute.ll b/test/Transforms/SLPVectorizer/AArch64/commute.ll
index 1cff73d9f695..2bce59c62000 100644
--- a/test/Transforms/SLPVectorizer/AArch64/commute.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/commute.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -slp-vectorizer %s | FileCheck %s
+; RUN: opt -S -slp-vectorizer %s -slp-threshold=-10 | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
diff --git a/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
new file mode 100644
index 000000000000..8f8bf2648aa2
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
@@ -0,0 +1,270 @@
+; RUN: opt -slp-vectorizer -slp-threshold=-6 -S < %s | FileCheck %s
+
+; FIXME: The threshold is changed to keep this test case a bit smaller.
+; The AArch64 cost model should not give such high costs to select statements.
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux"
+
+; CHECK-LABEL: test_select
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: select <4 x i1>
+define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias nocapture readonly %blk2, i32 %lx, i32 %h) {
+entry:
+ %cmp.22 = icmp sgt i32 %h, 0
+ br i1 %cmp.22, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %idx.ext = sext i32 %lx to i64
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %s.026 = phi i32 [ 0, %for.body.lr.ph ], [ %add27, %for.body ]
+ %j.025 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %p2.024 = phi i32* [ %blk2, %for.body.lr.ph ], [ %add.ptr29, %for.body ]
+ %p1.023 = phi i32* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %for.body ]
+ %0 = load i32, i32* %p1.023, align 4
+ %1 = load i32, i32* %p2.024, align 4
+ %sub = sub nsw i32 %0, %1
+ %cmp2 = icmp slt i32 %sub, 0
+ %sub3 = sub nsw i32 0, %sub
+ %sub3.sub = select i1 %cmp2, i32 %sub3, i32 %sub
+ %add = add nsw i32 %sub3.sub, %s.026
+ %arrayidx4 = getelementptr inbounds i32, i32* %p1.023, i64 1
+ %2 = load i32, i32* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds i32, i32* %p2.024, i64 1
+ %3 = load i32, i32* %arrayidx5, align 4
+ %sub6 = sub nsw i32 %2, %3
+ %cmp7 = icmp slt i32 %sub6, 0
+ %sub9 = sub nsw i32 0, %sub6
+ %v.1 = select i1 %cmp7, i32 %sub9, i32 %sub6
+ %add11 = add nsw i32 %add, %v.1
+ %arrayidx12 = getelementptr inbounds i32, i32* %p1.023, i64 2
+ %4 = load i32, i32* %arrayidx12, align 4
+ %arrayidx13 = getelementptr inbounds i32, i32* %p2.024, i64 2
+ %5 = load i32, i32* %arrayidx13, align 4
+ %sub14 = sub nsw i32 %4, %5
+ %cmp15 = icmp slt i32 %sub14, 0
+ %sub17 = sub nsw i32 0, %sub14
+ %sub17.sub14 = select i1 %cmp15, i32 %sub17, i32 %sub14
+ %add19 = add nsw i32 %add11, %sub17.sub14
+ %arrayidx20 = getelementptr inbounds i32, i32* %p1.023, i64 3
+ %6 = load i32, i32* %arrayidx20, align 4
+ %arrayidx21 = getelementptr inbounds i32, i32* %p2.024, i64 3
+ %7 = load i32, i32* %arrayidx21, align 4
+ %sub22 = sub nsw i32 %6, %7
+ %cmp23 = icmp slt i32 %sub22, 0
+ %sub25 = sub nsw i32 0, %sub22
+ %v.3 = select i1 %cmp23, i32 %sub25, i32 %sub22
+ %add27 = add nsw i32 %add19, %v.3
+ %add.ptr = getelementptr inbounds i32, i32* %p1.023, i64 %idx.ext
+ %add.ptr29 = getelementptr inbounds i32, i32* %p2.024, i64 %idx.ext
+ %inc = add nuw nsw i32 %j.025, 1
+ %exitcond = icmp eq i32 %inc, %h
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %s.0.lcssa = phi i32 [ 0, %entry ], [ %add27, %for.end.loopexit ]
+ ret i32 %s.0.lcssa
+}
+
+;; Check whether SLP can find a reduction phi whose incoming blocks are not
+;; the same as the block containing the phi.
+;;
+;; Came from code like,
+;;
+;; int s = 0;
+;; for (int j = 0; j < h; j++) {
+;; s += p1[0] * p2[0]
+;; s += p1[1] * p2[1];
+;; s += p1[2] * p2[2];
+;; s += p1[3] * p2[3];
+;; if (s >= lim)
+;; break;
+;; p1 += lx;
+;; p2 += lx;
+;; }
+define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) {
+; CHECK-LABEL: reduction_with_br
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: mul nsw <4 x i32>
+entry:
+ %cmp.16 = icmp sgt i32 %h, 0
+ br i1 %cmp.16, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %idx.ext = sext i32 %lx to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %if.end
+ %s.020 = phi i32 [ 0, %for.body.lr.ph ], [ %add13, %if.end ]
+ %j.019 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ]
+ %p2.018 = phi i32* [ %blk2, %for.body.lr.ph ], [ %add.ptr16, %if.end ]
+ %p1.017 = phi i32* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %if.end ]
+ %0 = load i32, i32* %p1.017, align 4
+ %1 = load i32, i32* %p2.018, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, %s.020
+ %arrayidx2 = getelementptr inbounds i32, i32* %p1.017, i64 1
+ %2 = load i32, i32* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %p2.018, i64 1
+ %3 = load i32, i32* %arrayidx3, align 4
+ %mul4 = mul nsw i32 %3, %2
+ %add5 = add nsw i32 %add, %mul4
+ %arrayidx6 = getelementptr inbounds i32, i32* %p1.017, i64 2
+ %4 = load i32, i32* %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds i32, i32* %p2.018, i64 2
+ %5 = load i32, i32* %arrayidx7, align 4
+ %mul8 = mul nsw i32 %5, %4
+ %add9 = add nsw i32 %add5, %mul8
+ %arrayidx10 = getelementptr inbounds i32, i32* %p1.017, i64 3
+ %6 = load i32, i32* %arrayidx10, align 4
+ %arrayidx11 = getelementptr inbounds i32, i32* %p2.018, i64 3
+ %7 = load i32, i32* %arrayidx11, align 4
+ %mul12 = mul nsw i32 %7, %6
+ %add13 = add nsw i32 %add9, %mul12
+ %cmp14 = icmp slt i32 %add13, %lim
+ br i1 %cmp14, label %if.end, label %for.end.loopexit
+
+if.end: ; preds = %for.body
+ %add.ptr = getelementptr inbounds i32, i32* %p1.017, i64 %idx.ext
+ %add.ptr16 = getelementptr inbounds i32, i32* %p2.018, i64 %idx.ext
+ %inc = add nuw nsw i32 %j.019, 1
+ %cmp = icmp slt i32 %inc, %h
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body, %if.end
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %s.1 = phi i32 [ 0, %entry ], [ %add13, %for.end.loopexit ]
+ ret i32 %s.1
+}
+
+; CHECK: test_unrolled_select
+; CHECK: load <8 x i8>
+; CHECK: load <8 x i8>
+; CHECK: select <8 x i1>
+define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) #0 {
+entry:
+ %cmp.43 = icmp sgt i32 %h, 0
+ br i1 %cmp.43, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %idx.ext = sext i32 %lx to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %if.end.86
+ %s.047 = phi i32 [ 0, %for.body.lr.ph ], [ %add82, %if.end.86 ]
+ %j.046 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end.86 ]
+ %p2.045 = phi i8* [ %blk2, %for.body.lr.ph ], [ %add.ptr88, %if.end.86 ]
+ %p1.044 = phi i8* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %if.end.86 ]
+ %0 = load i8, i8* %p1.044, align 1
+ %conv = zext i8 %0 to i32
+ %1 = load i8, i8* %p2.045, align 1
+ %conv2 = zext i8 %1 to i32
+ %sub = sub nsw i32 %conv, %conv2
+ %cmp3 = icmp slt i32 %sub, 0
+ %sub5 = sub nsw i32 0, %sub
+ %sub5.sub = select i1 %cmp3, i32 %sub5, i32 %sub
+ %add = add nsw i32 %sub5.sub, %s.047
+ %arrayidx6 = getelementptr inbounds i8, i8* %p1.044, i64 1
+ %2 = load i8, i8* %arrayidx6, align 1
+ %conv7 = zext i8 %2 to i32
+ %arrayidx8 = getelementptr inbounds i8, i8* %p2.045, i64 1
+ %3 = load i8, i8* %arrayidx8, align 1
+ %conv9 = zext i8 %3 to i32
+ %sub10 = sub nsw i32 %conv7, %conv9
+ %cmp11 = icmp slt i32 %sub10, 0
+ %sub14 = sub nsw i32 0, %sub10
+ %v.1 = select i1 %cmp11, i32 %sub14, i32 %sub10
+ %add16 = add nsw i32 %add, %v.1
+ %arrayidx17 = getelementptr inbounds i8, i8* %p1.044, i64 2
+ %4 = load i8, i8* %arrayidx17, align 1
+ %conv18 = zext i8 %4 to i32
+ %arrayidx19 = getelementptr inbounds i8, i8* %p2.045, i64 2
+ %5 = load i8, i8* %arrayidx19, align 1
+ %conv20 = zext i8 %5 to i32
+ %sub21 = sub nsw i32 %conv18, %conv20
+ %cmp22 = icmp slt i32 %sub21, 0
+ %sub25 = sub nsw i32 0, %sub21
+ %sub25.sub21 = select i1 %cmp22, i32 %sub25, i32 %sub21
+ %add27 = add nsw i32 %add16, %sub25.sub21
+ %arrayidx28 = getelementptr inbounds i8, i8* %p1.044, i64 3
+ %6 = load i8, i8* %arrayidx28, align 1
+ %conv29 = zext i8 %6 to i32
+ %arrayidx30 = getelementptr inbounds i8, i8* %p2.045, i64 3
+ %7 = load i8, i8* %arrayidx30, align 1
+ %conv31 = zext i8 %7 to i32
+ %sub32 = sub nsw i32 %conv29, %conv31
+ %cmp33 = icmp slt i32 %sub32, 0
+ %sub36 = sub nsw i32 0, %sub32
+ %v.3 = select i1 %cmp33, i32 %sub36, i32 %sub32
+ %add38 = add nsw i32 %add27, %v.3
+ %arrayidx39 = getelementptr inbounds i8, i8* %p1.044, i64 4
+ %8 = load i8, i8* %arrayidx39, align 1
+ %conv40 = zext i8 %8 to i32
+ %arrayidx41 = getelementptr inbounds i8, i8* %p2.045, i64 4
+ %9 = load i8, i8* %arrayidx41, align 1
+ %conv42 = zext i8 %9 to i32
+ %sub43 = sub nsw i32 %conv40, %conv42
+ %cmp44 = icmp slt i32 %sub43, 0
+ %sub47 = sub nsw i32 0, %sub43
+ %sub47.sub43 = select i1 %cmp44, i32 %sub47, i32 %sub43
+ %add49 = add nsw i32 %add38, %sub47.sub43
+ %arrayidx50 = getelementptr inbounds i8, i8* %p1.044, i64 5
+ %10 = load i8, i8* %arrayidx50, align 1
+ %conv51 = zext i8 %10 to i32
+ %arrayidx52 = getelementptr inbounds i8, i8* %p2.045, i64 5
+ %11 = load i8, i8* %arrayidx52, align 1
+ %conv53 = zext i8 %11 to i32
+ %sub54 = sub nsw i32 %conv51, %conv53
+ %cmp55 = icmp slt i32 %sub54, 0
+ %sub58 = sub nsw i32 0, %sub54
+ %v.5 = select i1 %cmp55, i32 %sub58, i32 %sub54
+ %add60 = add nsw i32 %add49, %v.5
+ %arrayidx61 = getelementptr inbounds i8, i8* %p1.044, i64 6
+ %12 = load i8, i8* %arrayidx61, align 1
+ %conv62 = zext i8 %12 to i32
+ %arrayidx63 = getelementptr inbounds i8, i8* %p2.045, i64 6
+ %13 = load i8, i8* %arrayidx63, align 1
+ %conv64 = zext i8 %13 to i32
+ %sub65 = sub nsw i32 %conv62, %conv64
+ %cmp66 = icmp slt i32 %sub65, 0
+ %sub69 = sub nsw i32 0, %sub65
+ %sub69.sub65 = select i1 %cmp66, i32 %sub69, i32 %sub65
+ %add71 = add nsw i32 %add60, %sub69.sub65
+ %arrayidx72 = getelementptr inbounds i8, i8* %p1.044, i64 7
+ %14 = load i8, i8* %arrayidx72, align 1
+ %conv73 = zext i8 %14 to i32
+ %arrayidx74 = getelementptr inbounds i8, i8* %p2.045, i64 7
+ %15 = load i8, i8* %arrayidx74, align 1
+ %conv75 = zext i8 %15 to i32
+ %sub76 = sub nsw i32 %conv73, %conv75
+ %cmp77 = icmp slt i32 %sub76, 0
+ %sub80 = sub nsw i32 0, %sub76
+ %v.7 = select i1 %cmp77, i32 %sub80, i32 %sub76
+ %add82 = add nsw i32 %add71, %v.7
+ %cmp83 = icmp slt i32 %add82, %lim
+ br i1 %cmp83, label %if.end.86, label %for.end.loopexit
+
+if.end.86: ; preds = %for.body
+ %add.ptr = getelementptr inbounds i8, i8* %p1.044, i64 %idx.ext
+ %add.ptr88 = getelementptr inbounds i8, i8* %p2.045, i64 %idx.ext
+ %inc = add nuw nsw i32 %j.046, 1
+ %cmp = icmp slt i32 %inc, %h
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body, %if.end.86
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %s.1 = phi i32 [ 0, %entry ], [ %add82, %for.end.loopexit ]
+ ret i32 %s.1
+}
+
diff --git a/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll b/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll
new file mode 100644
index 000000000000..87d021d534cf
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll
@@ -0,0 +1,76 @@
+; RUN: opt -S -basicaa -slp-vectorizer -dce < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+; CHECK-LABEL: @foo
+define void @foo(float* noalias %a, float* noalias %b, float* noalias %c) {
+entry:
+; Check that we don't lose !nontemporal hint when vectorizing loads.
+; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
+ %b1 = load float, float* %b, align 4, !nontemporal !0
+ %arrayidx.1 = getelementptr inbounds float, float* %b, i64 1
+ %b2 = load float, float* %arrayidx.1, align 4, !nontemporal !0
+ %arrayidx.2 = getelementptr inbounds float, float* %b, i64 2
+ %b3 = load float, float* %arrayidx.2, align 4, !nontemporal !0
+ %arrayidx.3 = getelementptr inbounds float, float* %b, i64 3
+ %b4 = load float, float* %arrayidx.3, align 4, !nontemporal !0
+
+; Check that we don't introduce !nontemporal hint when the original scalar loads didn't have it.
+; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4{{$}}
+ %c1 = load float, float* %c, align 4
+ %arrayidx2.1 = getelementptr inbounds float, float* %c, i64 1
+ %c2 = load float, float* %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds float, float* %c, i64 2
+ %c3 = load float, float* %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds float, float* %c, i64 3
+ %c4 = load float, float* %arrayidx2.3, align 4
+
+ %a1 = fadd float %b1, %c1
+ %a2 = fadd float %b2, %c2
+ %a3 = fadd float %b3, %c3
+ %a4 = fadd float %b4, %c4
+
+; Check that we don't lose !nontemporal hint when vectorizing stores.
+; CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
+ store float %a1, float* %a, align 4, !nontemporal !0
+ %arrayidx3.1 = getelementptr inbounds float, float* %a, i64 1
+ store float %a2, float* %arrayidx3.1, align 4, !nontemporal !0
+ %arrayidx3.2 = getelementptr inbounds float, float* %a, i64 2
+ store float %a3, float* %arrayidx3.2, align 4, !nontemporal !0
+ %arrayidx3.3 = getelementptr inbounds float, float* %a, i64 3
+ store float %a4, float* %arrayidx3.3, align 4, !nontemporal !0
+
+; CHECK: ret void
+ ret void
+}
+
+; CHECK-LABEL: @foo2
+define void @foo2(float* noalias %a, float* noalias %b) {
+entry:
+; Check that we don't mark vector load with !nontemporal attribute if some of
+; the original scalar loads don't have it.
+; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4{{$}}
+ %b1 = load float, float* %b, align 4, !nontemporal !0
+ %arrayidx.1 = getelementptr inbounds float, float* %b, i64 1
+ %b2 = load float, float* %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds float, float* %b, i64 2
+ %b3 = load float, float* %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds float, float* %b, i64 3
+ %b4 = load float, float* %arrayidx.3, align 4, !nontemporal !0
+
+; Check that we don't mark vector store with !nontemporal attribute if some of
+; the original scalar stores don't have it.
+; CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 4{{$}}
+ store float %b1, float* %a, align 4, !nontemporal !0
+ %arrayidx3.1 = getelementptr inbounds float, float* %a, i64 1
+ store float %b2, float* %arrayidx3.1, align 4
+ %arrayidx3.2 = getelementptr inbounds float, float* %a, i64 2
+ store float %b3, float* %arrayidx3.2, align 4
+ %arrayidx3.3 = getelementptr inbounds float, float* %a, i64 3
+ store float %b4, float* %arrayidx3.3, align 4, !nontemporal !0
+
+; CHECK: ret void
+ ret void
+}
+
+!0 = !{i32 1}
diff --git a/test/Transforms/SLPVectorizer/X86/bad_types.ll b/test/Transforms/SLPVectorizer/X86/bad_types.ll
index 2d8f3832ee29..98c29068bb96 100644
--- a/test/Transforms/SLPVectorizer/X86/bad_types.ll
+++ b/test/Transforms/SLPVectorizer/X86/bad_types.ll
@@ -47,4 +47,30 @@ exit:
ret void
}
+define i8 @test3(i8 *%addr) {
+; Check that we do not vectorize types that are padded to a bigger ones.
+;
+; CHECK-LABEL: @test3
+; CHECK-NOT: <4 x i2>
+; CHECK: ret i8
+entry:
+ %a = bitcast i8* %addr to i2*
+ %a0 = getelementptr inbounds i2, i2* %a, i64 0
+ %a1 = getelementptr inbounds i2, i2* %a, i64 1
+ %a2 = getelementptr inbounds i2, i2* %a, i64 2
+ %a3 = getelementptr inbounds i2, i2* %a, i64 3
+ %l0 = load i2, i2* %a0, align 1
+ %l1 = load i2, i2* %a1, align 1
+ %l2 = load i2, i2* %a2, align 1
+ %l3 = load i2, i2* %a3, align 1
+ br label %bb1
+bb1: ; preds = %entry
+ %p0 = phi i2 [ %l0, %entry ]
+ %p1 = phi i2 [ %l1, %entry ]
+ %p2 = phi i2 [ %l2, %entry ]
+ %p3 = phi i2 [ %l3, %entry ]
+ %r = zext i2 %p2 to i8
+ ret i8 %r
+}
+
declare void @f(i64, i64)
diff --git a/test/Transforms/SLPVectorizer/X86/commutativity.ll b/test/Transforms/SLPVectorizer/X86/commutativity.ll
new file mode 100644
index 000000000000..2798ccb15e48
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/commutativity.ll
@@ -0,0 +1,78 @@
+; RUN: opt -slp-vectorizer < %s -S | FileCheck %s
+
+; Verify that the SLP vectorizer is able to figure out that commutativity
+; offers the possibility to splat/broadcast %c and thus make it profitable
+; to vectorize this case
+
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+@cle = external unnamed_addr global [32 x i8], align 16
+@cle32 = external unnamed_addr global [32 x i32], align 16
+
+
+; Check that we correctly detect a splat/broadcast by leveraging the
+; commutativity property of `xor`.
+
+; CHECK-LABEL: @splat
+; CHECK: store <16 x i8>
+define void @splat(i8 %a, i8 %b, i8 %c) {
+ %1 = xor i8 %c, %a
+ store i8 %1, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 0), align 16
+ %2 = xor i8 %a, %c
+ store i8 %2, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 1)
+ %3 = xor i8 %a, %c
+ store i8 %3, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 2)
+ %4 = xor i8 %a, %c
+ store i8 %4, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 3)
+ %5 = xor i8 %c, %a
+ store i8 %5, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 4)
+ %6 = xor i8 %c, %b
+ store i8 %6, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 5)
+ %7 = xor i8 %c, %a
+ store i8 %7, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 6)
+ %8 = xor i8 %c, %b
+ store i8 %8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 7)
+ %9 = xor i8 %a, %c
+ store i8 %9, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 8)
+ %10 = xor i8 %a, %c
+ store i8 %10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 9)
+ %11 = xor i8 %a, %c
+ store i8 %11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 10)
+ %12 = xor i8 %a, %c
+ store i8 %12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 11)
+ %13 = xor i8 %a, %c
+ store i8 %13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 12)
+ %14 = xor i8 %a, %c
+ store i8 %14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 13)
+ %15 = xor i8 %a, %c
+ store i8 %15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 14)
+ %16 = xor i8 %a, %c
+ store i8 %16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 15)
+ ret void
+}
+
+
+
+; Check that we correctly detect that we can have the same opcode on one side by
+; leveraging the commutativity property of `xor`.
+
+; CHECK-LABEL: @same_opcode_on_one_side
+; CHECK: store <4 x i32>
+define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
+ %add1 = add i32 %c, %a
+ %add2 = add i32 %c, %a
+ %add3 = add i32 %a, %c
+ %add4 = add i32 %c, %a
+ %1 = xor i32 %add1, %a
+ store i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 0), align 16
+ %2 = xor i32 %b, %add2
+ store i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 1)
+ %3 = xor i32 %c, %add3
+ store i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 2)
+ %4 = xor i32 %a, %add4
+ store i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 3)
+ ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/debug_info.ll b/test/Transforms/SLPVectorizer/X86/debug_info.ll
index 1d349fbc98b5..4472225811b1 100644
--- a/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -21,7 +21,7 @@ target triple = "x86_64-apple-macosx10.7.0"
;CHECK: ![[LOC]] = !DILocation(line: 4, scope:
;CHECK: ![[LOC2]] = !DILocation(line: 7, scope:
-define i32 @depth(double* nocapture %A, i32 %m) #0 {
+define i32 @depth(double* nocapture %A, i32 %m) #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata double* %A, i64 0, metadata !12, metadata !DIExpression()), !dbg !19
tail call void @llvm.dbg.value(metadata i32 %m, i64 0, metadata !13, metadata !DIExpression()), !dbg !19
@@ -57,11 +57,11 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!18, !32}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "file.c", directory: "/Users/nadav")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "depth", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (double*, i32)* @depth, variables: !11)
+!4 = distinct !DISubprogram(name: "depth", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !11)
!5 = !DIFile(filename: "file.c", directory: "/Users/nadav")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !9, !8}
@@ -69,11 +69,11 @@ attributes #1 = { nounwind readnone }
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
!10 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
!11 = !{!12, !13, !14, !15, !16}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "A", line: 1, arg: 1, scope: !4, file: !5, type: !9)
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "m", line: 1, arg: 2, scope: !4, file: !5, type: !8)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y0", line: 2, scope: !4, file: !5, type: !10)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y1", line: 2, scope: !4, file: !5, type: !10)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3, scope: !17, file: !5, type: !8)
+!12 = !DILocalVariable(name: "A", line: 1, arg: 1, scope: !4, file: !5, type: !9)
+!13 = !DILocalVariable(name: "m", line: 1, arg: 2, scope: !4, file: !5, type: !8)
+!14 = !DILocalVariable(name: "y0", line: 2, scope: !4, file: !5, type: !10)
+!15 = !DILocalVariable(name: "y1", line: 2, scope: !4, file: !5, type: !10)
+!16 = !DILocalVariable(name: "i", line: 3, scope: !17, file: !5, type: !8)
!17 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
!18 = !{i32 2, !"Dwarf Version", i32 2}
!19 = !DILocation(line: 1, scope: !4)
diff --git a/test/Transforms/SLPVectorizer/X86/horizontal.ll b/test/Transforms/SLPVectorizer/X86/horizontal.ll
index 83b2e01f04e0..62cf4c1fcfb3 100644
--- a/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -1,4 +1,4 @@
-; RUN: opt -slp-vectorizer -slp-vectorize-hor -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSTORE
+; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSTORE
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
diff --git a/test/Transforms/SLPVectorizer/X86/pr23510.ll b/test/Transforms/SLPVectorizer/X86/pr23510.ll
new file mode 100644
index 000000000000..efdb0ecd9996
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr23510.ll
@@ -0,0 +1,38 @@
+; PR23510
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @_Z3fooPml(
+; CHECK: lshr <2 x i64>
+; CHECK: lshr <2 x i64>
+
+@total = global i64 0, align 8
+
+define void @_Z3fooPml(i64* nocapture %a, i64 %i) {
+entry:
+ %tmp = load i64, i64* %a, align 8
+ %shr = lshr i64 %tmp, 4
+ store i64 %shr, i64* %a, align 8
+ %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 1
+ %tmp1 = load i64, i64* %arrayidx1, align 8
+ %shr2 = lshr i64 %tmp1, 4
+ store i64 %shr2, i64* %arrayidx1, align 8
+ %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 %i
+ %tmp2 = load i64, i64* %arrayidx3, align 8
+ %tmp3 = load i64, i64* @total, align 8
+ %add = add i64 %tmp3, %tmp2
+ store i64 %add, i64* @total, align 8
+ %tmp4 = load i64, i64* %a, align 8
+ %shr5 = lshr i64 %tmp4, 4
+ store i64 %shr5, i64* %a, align 8
+ %tmp5 = load i64, i64* %arrayidx1, align 8
+ %shr7 = lshr i64 %tmp5, 4
+ store i64 %shr7, i64* %arrayidx1, align 8
+ %tmp6 = load i64, i64* %arrayidx3, align 8
+ %tmp7 = load i64, i64* @total, align 8
+ %add9 = add i64 %tmp7, %tmp6
+ store i64 %add9, i64* @total, align 8
+ ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/schedule_budget.ll b/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
new file mode 100644
index 000000000000..2cb2373381c7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Test if the budget for the scheduling region size works.
+; We test with a reduced budget of 16 which should prevent vectorizing the loads.
+
+declare void @unknown()
+
+; CHECK-LABEL: @test
+; CHECK: load float
+; CHECK: load float
+; CHECK: load float
+; CHECK: load float
+; CHECK: call void @unknown
+; CHECK: store float
+; CHECK: store float
+; CHECK: store float
+; CHECK: store float
+; CHECK: load <4 x float>
+; CHECK: store <4 x float>
+define void @test(float * %a, float * %b, float * %c, float * %d) {
+entry:
+ ; Don't vectorize these loads.
+ %l0 = load float, float* %a
+ %a1 = getelementptr inbounds float, float* %a, i64 1
+ %l1 = load float, float* %a1
+ %a2 = getelementptr inbounds float, float* %a, i64 2
+ %l2 = load float, float* %a2
+ %a3 = getelementptr inbounds float, float* %a, i64 3
+ %l3 = load float, float* %a3
+
+ ; some unrelated instructions inbetween to enlarge the scheduling region
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+
+ ; Don't vectorize these stores because their operands are too far away.
+ store float %l0, float* %b
+ %b1 = getelementptr inbounds float, float* %b, i64 1
+ store float %l1, float* %b1
+ %b2 = getelementptr inbounds float, float* %b, i64 2
+ store float %l2, float* %b2
+ %b3 = getelementptr inbounds float, float* %b, i64 3
+ store float %l3, float* %b3
+
+ ; But still vectorize the following instructions, because even if the budget
+ ; is exceeded there is a minimum region size.
+ %l4 = load float, float* %c
+ %c1 = getelementptr inbounds float, float* %c, i64 1
+ %l5 = load float, float* %c1
+ %c2 = getelementptr inbounds float, float* %c, i64 2
+ %l6 = load float, float* %c2
+ %c3 = getelementptr inbounds float, float* %c, i64 3
+ %l7 = load float, float* %c3
+
+ store float %l4, float* %d
+ %d1 = getelementptr inbounds float, float* %d, i64 1
+ store float %l5, float* %d1
+ %d2 = getelementptr inbounds float, float* %d, i64 2
+ store float %l6, float* %d2
+ %d3 = getelementptr inbounds float, float* %d, i64 3
+ store float %l7, float* %d3
+
+ ret void
+}
+
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index ad2794167a5e..7b5daa9d7823 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -sroa -S | FileCheck %s
-; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
+; RUN: opt < %s -passes=sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
@@ -1610,3 +1610,26 @@ entry:
%load = load atomic volatile i64, i64* %ptr seq_cst, align 8
ret void
}
+
+define i16 @PR24463() {
+; Ensure we can handle a very interesting case where there is an integer-based
+; rewrite of the uses of the alloca, but where one of the integers in that is
+; a sub-integer that requires extraction *and* extends past the end of the
+; alloca. In this case, we should extract the i8 and then zext it to i16.
+;
+; CHECK-LABEL: @PR24463(
+; CHECK-NOT: alloca
+; CHECK: %[[SHIFT:.*]] = lshr i16 0, 8
+; CHECK: %[[TRUNC:.*]] = trunc i16 %[[SHIFT]] to i8
+; CHECK: %[[ZEXT:.*]] = zext i8 %[[TRUNC]] to i16
+; CHECK: ret i16 %[[ZEXT]]
+entry:
+ %alloca = alloca [3 x i8]
+ %gep1 = getelementptr inbounds [3 x i8], [3 x i8]* %alloca, i64 0, i64 1
+ %bc1 = bitcast i8* %gep1 to i16*
+ store i16 0, i16* %bc1
+ %gep2 = getelementptr inbounds [3 x i8], [3 x i8]* %alloca, i64 0, i64 2
+ %bc2 = bitcast i8* %gep2 to i16*
+ %load = load i16, i16* %bc2
+ ret i16 %load
+}
diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll
index 4de7bfcb898d..ea41a20fd38e 100644
--- a/test/Transforms/SROA/big-endian.ll
+++ b/test/Transforms/SROA/big-endian.ll
@@ -1,5 +1,4 @@
; RUN: opt < %s -sroa -S | FileCheck %s
-; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
diff --git a/test/Transforms/SROA/fca.ll b/test/Transforms/SROA/fca.ll
index 6eaa73f53189..707f680e64e8 100644
--- a/test/Transforms/SROA/fca.ll
+++ b/test/Transforms/SROA/fca.ll
@@ -1,5 +1,4 @@
; RUN: opt < %s -sroa -S | FileCheck %s
-; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
define { i32, i32 } @test0(i32 %x, i32 %y) {
diff --git a/test/Transforms/SafeStack/AArch64/abi.ll b/test/Transforms/SafeStack/AArch64/abi.ll
new file mode 100644
index 000000000000..cdec923eb74c
--- /dev/null
+++ b/test/Transforms/SafeStack/AArch64/abi.ll
@@ -0,0 +1,20 @@
+; RUN: opt -safe-stack -S -mtriple=aarch64-linux-android < %s -o - | FileCheck %s
+
+
+define void @foo() nounwind uwtable safestack {
+entry:
+; CHECK: %[[TP:.*]] = call i8* @llvm.aarch64.thread.pointer()
+; CHECK: %[[SPA0:.*]] = getelementptr i8, i8* %[[TP]], i32 72
+; CHECK: %[[SPA:.*]] = bitcast i8* %[[SPA0]] to i8**
+; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK: store i8* %[[USST]], i8** %[[SPA]]
+
+ %a = alloca i8, align 8
+ call void @Capture(i8* %a)
+
+; CHECK: store i8* %[[USP]], i8** %[[SPA]]
+ ret void
+}
+
+declare void @Capture(i8*)
diff --git a/test/Transforms/SafeStack/AArch64/lit.local.cfg b/test/Transforms/SafeStack/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..cec29af5bbe4
--- /dev/null
+++ b/test/Transforms/SafeStack/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/SafeStack/ARM/abi.ll b/test/Transforms/SafeStack/ARM/abi.ll
new file mode 100644
index 000000000000..e33c913ae916
--- /dev/null
+++ b/test/Transforms/SafeStack/ARM/abi.ll
@@ -0,0 +1,18 @@
+; RUN: opt -safe-stack -S -mtriple=arm-linux-android < %s -o - | FileCheck %s
+
+
+define void @foo() nounwind uwtable safestack {
+entry:
+; CHECK: %[[SPA:.*]] = call i8** @__safestack_pointer_address()
+; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK: store i8* %[[USST]], i8** %[[SPA]]
+
+ %a = alloca i8, align 8
+ call void @Capture(i8* %a)
+
+; CHECK: store i8* %[[USP]], i8** %[[SPA]]
+ ret void
+}
+
+declare void @Capture(i8*)
diff --git a/test/Transforms/SafeStack/ARM/lit.local.cfg b/test/Transforms/SafeStack/ARM/lit.local.cfg
new file mode 100644
index 000000000000..98c6700c209d
--- /dev/null
+++ b/test/Transforms/SafeStack/ARM/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'ARM' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/SafeStack/ARM/setjmp.ll b/test/Transforms/SafeStack/ARM/setjmp.ll
new file mode 100644
index 000000000000..8c57908bbe4b
--- /dev/null
+++ b/test/Transforms/SafeStack/ARM/setjmp.ll
@@ -0,0 +1,34 @@
+; Test stack pointer restore after setjmp() with the function-call safestack ABI.
+; RUN: opt -safe-stack -S -mtriple=arm-linux-androideabi < %s -o - | FileCheck %s
+
+@env = global [64 x i32] zeroinitializer, align 4
+
+define void @f(i32 %b) safestack {
+entry:
+; CHECK: %[[SPA:.*]] = call i8** @__safestack_pointer_address()
+; CHECK: %[[USDP:.*]] = alloca i8*
+; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: store i8* %[[USP]], i8** %[[USDP]]
+; CHECK: call i32 @setjmp
+
+ %call = call i32 @setjmp(i32* getelementptr inbounds ([64 x i32], [64 x i32]* @env, i32 0, i32 0)) returns_twice
+
+; CHECK: %[[USP2:.*]] = load i8*, i8** %[[USDP]]
+; CHECK: store i8* %[[USP2]], i8** %[[SPA]]
+
+ %tobool = icmp eq i32 %b, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+ %0 = alloca [42 x i8], align 1
+ %.sub = getelementptr inbounds [42 x i8], [42 x i8]* %0, i32 0, i32 0
+ call void @_Z7CapturePv(i8* %.sub)
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+declare i32 @setjmp(i32*) returns_twice
+
+declare void @_Z7CapturePv(i8*)
diff --git a/test/Transforms/SafeStack/X86/abi.ll b/test/Transforms/SafeStack/X86/abi.ll
new file mode 100644
index 000000000000..f437c4f7157d
--- /dev/null
+++ b/test/Transforms/SafeStack/X86/abi.ll
@@ -0,0 +1,30 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s --check-prefix=TLS
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s --check-prefix=TLS
+; RUN: opt -safe-stack -S -mtriple=i686-linux-android < %s -o - | FileCheck %s --check-prefix=DIRECT-TLS32
+; RUN: opt -safe-stack -S -mtriple=x86_64-linux-android < %s -o - | FileCheck %s --check-prefix=DIRECT-TLS64
+
+
+define void @foo() nounwind uwtable safestack {
+entry:
+; TLS: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; TLS: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; TLS: store i8* %[[USST]], i8** @__safestack_unsafe_stack_ptr
+
+; DIRECT-TLS32: %[[USP:.*]] = load i8*, i8* addrspace(256)* inttoptr (i32 36 to i8* addrspace(256)*)
+; DIRECT-TLS32: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; DIRECT-TLS32: store i8* %[[USST]], i8* addrspace(256)* inttoptr (i32 36 to i8* addrspace(256)*)
+
+; DIRECT-TLS64: %[[USP:.*]] = load i8*, i8* addrspace(257)* inttoptr (i32 72 to i8* addrspace(257)*)
+; DIRECT-TLS64: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; DIRECT-TLS64: store i8* %[[USST]], i8* addrspace(257)* inttoptr (i32 72 to i8* addrspace(257)*)
+
+ %a = alloca i8, align 8
+ call void @Capture(i8* %a)
+
+; TLS: store i8* %[[USP]], i8** @__safestack_unsafe_stack_ptr
+; DIRECT-TLS32: store i8* %[[USP]], i8* addrspace(256)* inttoptr (i32 36 to i8* addrspace(256)*)
+; DIRECT-TLS64: store i8* %[[USP]], i8* addrspace(257)* inttoptr (i32 72 to i8* addrspace(257)*)
+ ret void
+}
+
+declare void @Capture(i8*)
diff --git a/test/Transforms/SafeStack/X86/lit.local.cfg b/test/Transforms/SafeStack/X86/lit.local.cfg
new file mode 100644
index 000000000000..e71f3cc4c41e
--- /dev/null
+++ b/test/Transforms/SafeStack/X86/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'X86' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/SafeStack/array.ll b/test/Transforms/SafeStack/array.ll
index 6036bfc2c9c5..564213e6d58f 100644
--- a/test/Transforms/SafeStack/array.ll
+++ b/test/Transforms/SafeStack/array.ll
@@ -1,9 +1,14 @@
; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -safe-stack-usp-storage=single-thread -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck -check-prefix=SINGLE-THREAD %s
; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -safe-stack-usp-storage=single-thread -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck -check-prefix=SINGLE-THREAD %s
; array [4 x i8]
; Requires protector.
+; CHECK: @__safestack_unsafe_stack_ptr = external thread_local(initialexec) global i8*
+; SINGLE-THREAD: @__safestack_unsafe_stack_ptr = external global i8*
+
define void @foo(i8* %a) nounwind uwtable safestack {
entry:
; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
@@ -35,4 +40,52 @@ entry:
ret void
}
+; Load from an array at a fixed offset, no overflow.
+define i8 @StaticArrayFixedSafe() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: define i8 @StaticArrayFixedSafe(
+ ; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i8
+ %buf = alloca i8, i32 4, align 1
+ %gep = getelementptr inbounds i8, i8* %buf, i32 2
+ %x = load i8, i8* %gep, align 1
+ ret i8 %x
+}
+
+; Load from an array at a fixed offset with overflow.
+define i8 @StaticArrayFixedUnsafe() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: define i8 @StaticArrayFixedUnsafe(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i8
+ %buf = alloca i8, i32 4, align 1
+ %gep = getelementptr inbounds i8, i8* %buf, i32 5
+ %x = load i8, i8* %gep, align 1
+ ret i8 %x
+}
+
+; Load from an array at an unknown offset.
+define i8 @StaticArrayVariableUnsafe(i32 %ofs) nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: define i8 @StaticArrayVariableUnsafe(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i8
+ %buf = alloca i8, i32 4, align 1
+ %gep = getelementptr inbounds i8, i8* %buf, i32 %ofs
+ %x = load i8, i8* %gep, align 1
+ ret i8 %x
+}
+
+; Load from an array of an unknown size.
+define i8 @DynamicArrayUnsafe(i32 %sz) nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: define i8 @DynamicArrayUnsafe(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i8
+ %buf = alloca i8, i32 %sz, align 1
+ %gep = getelementptr inbounds i8, i8* %buf, i32 2
+ %x = load i8, i8* %gep, align 1
+ ret i8 %x
+}
+
declare i8* @strcpy(i8*, i8*)
diff --git a/test/Transforms/SafeStack/byval.ll b/test/Transforms/SafeStack/byval.ll
new file mode 100644
index 000000000000..f9a06e54d2df
--- /dev/null
+++ b/test/Transforms/SafeStack/byval.ll
@@ -0,0 +1,51 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.S = type { [100 x i32] }
+
+; Safe access to a byval argument.
+define i32 @ByValSafe(%struct.S* byval nocapture readonly align 8 %zzz) norecurse nounwind readonly safestack uwtable {
+entry:
+ ; CHECK-LABEL: @ByValSafe
+ ; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i32
+ %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 3
+ %0 = load i32, i32* %arrayidx, align 4
+ ret i32 %0
+}
+
+; Unsafe access to a byval argument.
+; Argument is copied to the unsafe stack.
+define i32 @ByValUnsafe(%struct.S* byval nocapture readonly align 8 %zzz, i64 %idx) norecurse nounwind readonly safestack uwtable {
+entry:
+ ; CHECK-LABEL: @ByValUnsafe
+ ; CHECK: %[[A:.*]] = load {{.*}} @__safestack_unsafe_stack_ptr
+ ; CHECK: store {{.*}} @__safestack_unsafe_stack_ptr
+ ; CHECK: %[[B:.*]] = getelementptr i8, i8* %[[A]], i32 -400
+ ; CHECK: %[[C:.*]] = bitcast %struct.S* %zzz to i8*
+ ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[B]], i8* %[[C]], i64 400, i32 8, i1 false)
+ ; CHECK: ret i32
+ %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 %idx
+ %0 = load i32, i32* %arrayidx, align 4
+ ret i32 %0
+}
+
+; Highly aligned byval argument.
+define i32 @ByValUnsafeAligned(%struct.S* byval nocapture readonly align 64 %zzz, i64 %idx) norecurse nounwind readonly safestack uwtable {
+entry:
+ ; CHECK-LABEL: @ByValUnsafeAligned
+ ; CHECK: %[[A:.*]] = load {{.*}} @__safestack_unsafe_stack_ptr
+ ; CHECK: %[[B:.*]] = ptrtoint i8* %[[A]] to i64
+ ; CHECK: and i64 %[[B]], -64
+ ; CHECK: ret i32
+ %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 0
+ %0 = load i32, i32* %arrayidx, align 64
+ %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 %idx
+ %1 = load i32, i32* %arrayidx2, align 4
+ %add = add nsw i32 %1, %0
+ ret i32 %add
+}
+
diff --git a/test/Transforms/SafeStack/call.ll b/test/Transforms/SafeStack/call.ll
index ac12ec02b0b1..cbac4ce1bb0d 100644
--- a/test/Transforms/SafeStack/call.ll
+++ b/test/Transforms/SafeStack/call.ll
@@ -6,10 +6,11 @@
; no arrays / no nested arrays
; Requires no protector.
-; CHECK-LABEL: @foo(
define void @foo(i8* %a) nounwind uwtable safestack {
entry:
+ ; CHECK-LABEL: define void @foo(
; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
%a.addr = alloca i8*, align 8
store i8* %a, i8** %a.addr, align 8
%0 = load i8*, i8** %a.addr, align 8
@@ -18,3 +19,160 @@ entry:
}
declare i32 @printf(i8*, ...)
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @call_memset(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_memset
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 %len, i32 1, i1 false)
+ ret void
+}
+
+define void @call_constant_memset() safestack {
+entry:
+ ; CHECK-LABEL: define void @call_constant_memset
+ ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 2
+ call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 7, i32 1, i1 false)
+ ret void
+}
+
+define void @call_constant_overflow_memset() safestack {
+entry:
+ ; CHECK-LABEL: define void @call_constant_overflow_memset
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 7
+ call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 5, i32 1, i1 false)
+ ret void
+}
+
+define void @call_constant_underflow_memset() safestack {
+entry:
+ ; CHECK-LABEL: define void @call_constant_underflow_memset
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr [10 x i8], [10 x i8]* %q, i32 0, i32 -1
+ call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 3, i32 1, i1 false)
+ ret void
+}
+
+; Readnone nocapture -> safe
+define void @call_readnone(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_readnone
+ ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @readnone(i8* %arraydecay)
+ ret void
+}
+
+; Arg0 is readnone, arg1 is not. Pass alloca ptr as arg0 -> safe
+define void @call_readnone0_0(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_readnone0_0
+ ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @readnone0(i8* %arraydecay, i8* zeroinitializer)
+ ret void
+}
+
+; Arg0 is readnone, arg1 is not. Pass alloca ptr as arg1 -> unsafe
+define void @call_readnone0_1(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_readnone0_1
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @readnone0(i8 *zeroinitializer, i8* %arraydecay)
+ ret void
+}
+
+; Readonly nocapture -> unsafe
+define void @call_readonly(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_readonly
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @readonly(i8* %arraydecay)
+ ret void
+}
+
+; Readonly nocapture -> unsafe
+define void @call_arg_readonly(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_arg_readonly
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @arg_readonly(i8* %arraydecay)
+ ret void
+}
+
+; Readwrite nocapture -> unsafe
+define void @call_readwrite(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_readwrite
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @readwrite(i8* %arraydecay)
+ ret void
+}
+
+; Captures the argument -> unsafe
+define void @call_capture(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_capture
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @capture(i8* %arraydecay)
+ ret void
+}
+
+; Lifetime intrinsics are always safe.
+define void @call_lifetime(i32* %p) {
+ ; CHECK-LABEL: define void @call_lifetime
+ ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+entry:
+ %q = alloca [100 x i8], align 16
+ %0 = bitcast [100 x i8]* %q to i8*
+ call void @llvm.lifetime.start(i64 100, i8* %0)
+ call void @llvm.lifetime.end(i64 100, i8* %0)
+ ret void
+}
+
+declare void @readonly(i8* nocapture) readonly
+declare void @arg_readonly(i8* readonly nocapture)
+declare void @readwrite(i8* nocapture)
+declare void @capture(i8* readnone) readnone
+
+declare void @readnone(i8* nocapture) readnone
+declare void @readnone0(i8* nocapture readnone, i8* nocapture)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind argmemonly
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind argmemonly
diff --git a/test/Transforms/SafeStack/cast.ll b/test/Transforms/SafeStack/cast.ll
index df6273a117c3..23f525d5e0b1 100644
--- a/test/Transforms/SafeStack/cast.ll
+++ b/test/Transforms/SafeStack/cast.ll
@@ -4,14 +4,36 @@
@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
; PtrToInt/IntToPtr Cast
-; Requires no protector.
-; CHECK-LABEL: @foo(
-define void @foo() nounwind uwtable safestack {
+define void @IntToPtr() nounwind uwtable safestack {
entry:
+ ; CHECK-LABEL: @IntToPtr(
; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
%a = alloca i32, align 4
%0 = ptrtoint i32* %a to i64
%1 = inttoptr i64 %0 to i32*
ret void
}
+
+define i8 @BitCastNarrow() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @BitCastNarrow(
+ ; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i8
+ %a = alloca i32, align 4
+ %0 = bitcast i32* %a to i8*
+ %1 = load i8, i8* %0, align 1
+ ret i8 %1
+}
+
+define i64 @BitCastWide() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @BitCastWide(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i64
+ %a = alloca i32, align 4
+ %0 = bitcast i32* %a to i64*
+ %1 = load i64, i64* %0, align 1
+ ret i64 %1
+}
diff --git a/test/Transforms/SafeStack/debug-loc.ll b/test/Transforms/SafeStack/debug-loc.ll
new file mode 100644
index 000000000000..e72d0e9d2ff2
--- /dev/null
+++ b/test/Transforms/SafeStack/debug-loc.ll
@@ -0,0 +1,83 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+
+; Test debug location for the local variables moved onto the unsafe stack.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.S = type { [100 x i8] }
+
+; Function Attrs: safestack uwtable
+define void @f(%struct.S* byval align 8 %zzz) #0 !dbg !12 {
+; CHECK: define void @f
+
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+
+ %xxx = alloca %struct.S, align 1
+ call void @llvm.dbg.declare(metadata %struct.S* %zzz, metadata !18, metadata !19), !dbg !20
+ call void @llvm.dbg.declare(metadata %struct.S* %xxx, metadata !21, metadata !19), !dbg !22
+
+; dbg.declare for %zzz and %xxx are gone; replaced with dbg.declare based off the unsafe stack pointer
+; CHECK-NOT: call void @llvm.dbg.declare
+; CHECK: call void @llvm.dbg.declare(metadata i8* %[[USP]], metadata ![[VAR_ARG:.*]], metadata ![[EXPR_ARG:.*]])
+; CHECK-NOT: call void @llvm.dbg.declare
+; CHECK: call void @llvm.dbg.declare(metadata i8* %[[USP]], metadata ![[VAR_LOCAL:.*]], metadata ![[EXPR_LOCAL:.*]])
+; CHECK-NOT: call void @llvm.dbg.declare
+
+ call void @Capture(%struct.S* %zzz), !dbg !23
+ call void @Capture(%struct.S* %xxx), !dbg !24
+
+; dbg.declare appears before the first use
+; CHECK: call void @Capture
+; CHECK: call void @Capture
+
+ ret void, !dbg !25
+}
+
+; CHECK-DAG: ![[VAR_ARG]] = !DILocalVariable(name: "zzz"
+; 100 aligned up to 8
+; CHECK-DAG: ![[EXPR_ARG]] = !DIExpression(DW_OP_deref, DW_OP_minus, 104
+
+; CHECK-DAG: ![[VAR_LOCAL]] = !DILocalVariable(name: "xxx"
+; CHECK-DAG: ![[EXPR_LOCAL]] = !DIExpression(DW_OP_deref, DW_OP_minus, 208
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @Capture(%struct.S*) #2
+
+attributes #0 = { safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !16}
+!llvm.ident = !{!17}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 254019) (llvm/trunk 254036)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !11)
+!1 = !DIFile(filename: "../llvm/2.cc", directory: "/code/build-llvm")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !1, line: 4, size: 800, align: 8, elements: !5, identifier: "_ZTS1S")
+!5 = !{!6}
+!6 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !"_ZTS1S", file: !1, line: 5, baseType: !7, size: 800, align: 8)
+!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 800, align: 8, elements: !9)
+!8 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!9 = !{!10}
+!10 = !DISubrange(count: 100)
+!11 = !{!12}
+!12 = distinct !DISubprogram(name: "f", linkageName: "_Z1f1S", scope: !1, file: !1, line: 10, type: !13, isLocal: false, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!13 = !DISubroutineType(types: !14)
+!14 = !{null, !"_ZTS1S"}
+!15 = !{i32 2, !"Dwarf Version", i32 4}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{!"clang version 3.8.0 (trunk 254019) (llvm/trunk 254036)"}
+!18 = !DILocalVariable(name: "zzz", arg: 1, scope: !12, file: !1, line: 10, type: !"_ZTS1S")
+!19 = !DIExpression()
+!20 = !DILocation(line: 10, column: 10, scope: !12)
+!21 = !DILocalVariable(name: "xxx", scope: !12, file: !1, line: 11, type: !"_ZTS1S")
+!22 = !DILocation(line: 11, column: 5, scope: !12)
+!23 = !DILocation(line: 12, column: 3, scope: !12)
+!24 = !DILocation(line: 13, column: 3, scope: !12)
+!25 = !DILocation(line: 14, column: 1, scope: !12)
diff --git a/test/Transforms/SafeStack/ret.ll b/test/Transforms/SafeStack/ret.ll
new file mode 100644
index 000000000000..b2b8e5665297
--- /dev/null
+++ b/test/Transforms/SafeStack/ret.ll
@@ -0,0 +1,17 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Returns an alloca address.
+; Requires protector.
+
+define i64 @foo() nounwind readnone safestack {
+entry:
+ ; CHECK-LABEL: define i64 @foo(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i64
+ %x = alloca [100 x i32], align 16
+ %0 = ptrtoint [100 x i32]* %x to i64
+ ret i64 %0
+}
diff --git a/test/Transforms/SafeStack/setjmp2.ll b/test/Transforms/SafeStack/setjmp2.ll
index 65fd920d63da..bb15d7e03ace 100644
--- a/test/Transforms/SafeStack/setjmp2.ll
+++ b/test/Transforms/SafeStack/setjmp2.ll
@@ -25,7 +25,7 @@ entry:
; CHECK-NEXT: %[[INTTOPTR:.*]] = inttoptr i64 %[[AND]] to i8*
; CHECK-NEXT: store i8* %[[INTTOPTR]], i8** @__safestack_unsafe_stack_ptr
; CHECK-NEXT: store i8* %[[INTTOPTR]], i8** %unsafe_stack_dynamic_ptr
- ; CHECK-NEXT: %[[ALLOCA:.*]] = inttoptr i64 %[[SUB]] to i32*
+ ; CHECK-NEXT: %[[ALLOCA:.*]] = bitcast i8* %[[INTTOPTR]] to i32*
%a = alloca i32, i32 %size
; CHECK: setjmp
diff --git a/test/Transforms/SafeStack/store.ll b/test/Transforms/SafeStack/store.ll
new file mode 100644
index 000000000000..f493dd038bb8
--- /dev/null
+++ b/test/Transforms/SafeStack/store.ll
@@ -0,0 +1,63 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+define void @bad_store() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @bad_store(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %a = alloca i32, align 4
+ %0 = ptrtoint i32* %a to i64
+ %1 = inttoptr i64 %0 to i64*
+ store i64 zeroinitializer, i64* %1
+ ret void
+}
+
+define void @good_store() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @good_store(
+ ; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %a = alloca i32, align 4
+ %0 = bitcast i32* %a to i8*
+ store i8 zeroinitializer, i8* %0
+ ret void
+}
+
+define void @overflow_gep_store() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @overflow_gep_store(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %a = alloca i32, align 4
+ %0 = bitcast i32* %a to i8*
+ %1 = getelementptr i8, i8* %0, i32 4
+ store i8 zeroinitializer, i8* %1
+ ret void
+}
+
+define void @underflow_gep_store() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @underflow_gep_store(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %a = alloca i32, align 4
+ %0 = bitcast i32* %a to i8*
+ %1 = getelementptr i8, i8* %0, i32 -1
+ store i8 zeroinitializer, i8* %1
+ ret void
+}
+
+define void @good_gep_store() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @good_gep_store(
+ ; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %a = alloca i32, align 4
+ %0 = bitcast i32* %a to i8*
+ %1 = getelementptr i8, i8* %0, i32 3
+ store i8 zeroinitializer, i8* %1
+ ret void
+}
diff --git a/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof b/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof
index cc7f0d4f2773..30e26cc81841 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof
@@ -1,2 +1,2 @@
empty:100:0
-1.-3: 10
+ 1.-3: 10
diff --git a/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof b/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof
index abcb0ba38415..62227746655e 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof
@@ -1,3 +1,3 @@
3empty:100:BAD
-0: 0
-1: 100
+ 0: 0
+ 1: 100
diff --git a/test/Transforms/SampleProfile/Inputs/bad_mangle.prof b/test/Transforms/SampleProfile/Inputs/bad_mangle.prof
index 50fe86119b71..33b4c42cab44 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_mangle.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_mangle.prof
@@ -1,3 +1,3 @@
double convert<std::string, float>(float):2909472:181842
-0: 181842
-1: 181842
+ 0: 181842
+ 1: 181842
diff --git a/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof b/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof
index 038c45f77e30..608affa3ff94 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof
@@ -1,3 +1,3 @@
empty:100:0
-0: 0
-1: BAD
+ 0: 0
+ 1: BAD
diff --git a/test/Transforms/SampleProfile/Inputs/bad_samples.prof b/test/Transforms/SampleProfile/Inputs/bad_samples.prof
index a121d8c1ac40..bce7db9708d2 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_samples.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_samples.prof
@@ -1,2 +1,2 @@
empty:100:0
-1.3: -10
+ 1.3: -10
diff --git a/test/Transforms/SampleProfile/Inputs/branch.prof b/test/Transforms/SampleProfile/Inputs/branch.prof
index cd1cb5b1f16b..ac958e325c40 100644
--- a/test/Transforms/SampleProfile/Inputs/branch.prof
+++ b/test/Transforms/SampleProfile/Inputs/branch.prof
@@ -1,8 +1,10 @@
main:15680:0
-0: 0
-4: 0
-7: 0
-9: 10226
-10: 2243
-16: 0
-18: 0
+ 1: 2500
+ 4: 1000
+ 5: 1000
+ 6: 800
+ 7: 500
+ 9: 10226
+ 10: 2243
+ 16: 0
+ 18: 0
diff --git a/test/Transforms/SampleProfile/Inputs/calls.prof b/test/Transforms/SampleProfile/Inputs/calls.prof
index 57d3887dfb65..be64a1ead428 100644
--- a/test/Transforms/SampleProfile/Inputs/calls.prof
+++ b/test/Transforms/SampleProfile/Inputs/calls.prof
@@ -1,10 +1,10 @@
_Z3sumii:105580:5279
-0: 5279
-1: 5279
-2: 5279
+ 0: 5279
+ 1: 5279
+ 2: 5279
main:225715:0
-2.1: 5553
-3: 5391
-# This indicates that at line 3 of this function, the 'then' branch
-# of the conditional is taken (discriminator '1').
-3.1: 5752 _Z3sumii:5860
+ 2.1: 5553
+ 3: 5391
+ # This indicates that at line 3 of this function, the 'then' branch
+ # of the conditional is taken (discriminator '1').
+ 3.1: 5752 _Z3sumii:5860
diff --git a/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof b/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof
new file mode 100644
index 000000000000..528e42ca3880
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof
@@ -0,0 +1,10 @@
+main:20111403:0
+ 2.1: 404065
+ 3: 443089
+ 3.1: 0
+ 4: 404066
+ 6: 0
+ 7: 0
+ 3.1: _Z12never_calledi:0
+ 0: 0
+ 1: 0
diff --git a/test/Transforms/SampleProfile/Inputs/coverage-warning.prof b/test/Transforms/SampleProfile/Inputs/coverage-warning.prof
new file mode 100644
index 000000000000..57989b837a0f
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/coverage-warning.prof
@@ -0,0 +1,5 @@
+foo:30000:100
+ 2: 28000
+ 3: 1000
+# This profile is stale. Function foo() does not have a line 8 anymore.
+ 8: 1700
diff --git a/test/Transforms/SampleProfile/Inputs/discriminator.prof b/test/Transforms/SampleProfile/Inputs/discriminator.prof
index a6bcbc511a16..0c2561d725c3 100644
--- a/test/Transforms/SampleProfile/Inputs/discriminator.prof
+++ b/test/Transforms/SampleProfile/Inputs/discriminator.prof
@@ -1,8 +1,8 @@
foo:1000:0
-1: 1
-2: 1
-2.1: 100
-3: 100
-3.1: 5
-4: 100
-5: 1
+ 1: 1
+ 2: 1
+ 2.1: 100
+ 3: 100
+ 3.1: 5
+ 4: 100
+ 5: 1
diff --git a/test/Transforms/SampleProfile/Inputs/entry_counts.prof b/test/Transforms/SampleProfile/Inputs/entry_counts.prof
index 5c2172b5a4d3..95addc9f7a11 100644
--- a/test/Transforms/SampleProfile/Inputs/entry_counts.prof
+++ b/test/Transforms/SampleProfile/Inputs/entry_counts.prof
@@ -1,3 +1,3 @@
empty:100:13293
-0: 0
-1: 100
+ 0: 0
+ 1: 100
diff --git a/test/Transforms/SampleProfile/Inputs/fnptr.binprof b/test/Transforms/SampleProfile/Inputs/fnptr.binprof
index 14d7fd555dae..a074f53db945 100644
--- a/test/Transforms/SampleProfile/Inputs/fnptr.binprof
+++ b/test/Transforms/SampleProfile/Inputs/fnptr.binprof
Binary files differ
diff --git a/test/Transforms/SampleProfile/Inputs/fnptr.prof b/test/Transforms/SampleProfile/Inputs/fnptr.prof
index 6a3b4e2315bb..2491c427393b 100644
--- a/test/Transforms/SampleProfile/Inputs/fnptr.prof
+++ b/test/Transforms/SampleProfile/Inputs/fnptr.prof
@@ -1,12 +1,12 @@
_Z3fooi:7711:610
-1: 610
+ 1: 610
_Z3bari:20301:1437
-1: 1437
+ 1: 1437
main:184019:0
-4: 534
-6: 2080
-9: 2064 _Z3bari:1471 _Z3fooi:631
-5.1: 1075
-5: 1075
-7: 534
-4.2: 534
+ 4: 534
+ 6: 2080
+ 9: 2064 _Z3bari:1471 _Z3fooi:631
+ 5.1: 1075
+ 5: 1075
+ 7: 534
+ 4.2: 534
diff --git a/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo b/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo
new file mode 100644
index 000000000000..93f22ce30533
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo
Binary files differ
diff --git a/test/Transforms/SampleProfile/Inputs/inline-coverage.prof b/test/Transforms/SampleProfile/Inputs/inline-coverage.prof
new file mode 100644
index 000000000000..3d792733149a
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/inline-coverage.prof
@@ -0,0 +1,7 @@
+main:501438:0
+ 2.1: 23478
+ 3: 23478
+ 4: 0
+ 0: 0
+ 3: _Z3fool:172746
+ 1: 31878
diff --git a/test/Transforms/SampleProfile/Inputs/inline-hint.prof b/test/Transforms/SampleProfile/Inputs/inline-hint.prof
new file mode 100644
index 000000000000..a6840346eb43
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/inline-hint.prof
@@ -0,0 +1,3 @@
+_Z6hot_fnRxi:700:0
+_Z7cold_fnRxi:1:0
+other:299:0
diff --git a/test/Transforms/SampleProfile/Inputs/inline.prof b/test/Transforms/SampleProfile/Inputs/inline.prof
new file mode 100644
index 000000000000..386cdf8a7b5e
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/inline.prof
@@ -0,0 +1,7 @@
+main:225715:0
+ 2.1: 5553
+ 3: 5391
+ 3.1: _Z3sumii:5860
+ 0: 5279
+ 1: 5279
+ 2: 5279
diff --git a/test/Transforms/SampleProfile/Inputs/nolocinfo.prof b/test/Transforms/SampleProfile/Inputs/nolocinfo.prof
new file mode 100644
index 000000000000..fc69aa8ae783
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/nolocinfo.prof
@@ -0,0 +1,3 @@
+foo:30000:100
+ 2: 28000
+ 3: 1000
diff --git a/test/Transforms/SampleProfile/Inputs/offset.prof b/test/Transforms/SampleProfile/Inputs/offset.prof
new file mode 100644
index 000000000000..b07ce3504fb2
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/offset.prof
@@ -0,0 +1,4 @@
+_Z3fooi:300:1
+ 65532: 1000
+ 65533: 10
+ 65535: 990
diff --git a/test/Transforms/SampleProfile/Inputs/propagate.prof b/test/Transforms/SampleProfile/Inputs/propagate.prof
index b28609be66c1..ee9c6d62dfd1 100644
--- a/test/Transforms/SampleProfile/Inputs/propagate.prof
+++ b/test/Transforms/SampleProfile/Inputs/propagate.prof
@@ -1,17 +1,17 @@
_Z3fooiil:58139:0
-0: 0
-1: 0
-2: 0
-4: 1
-5: 10
-6: 0
-7: 5
-8: 3
-9: 0
-10: 0
-11: 6339
-12: 16191
-13: 8141
-16: 1
-18: 0
-19: 0
+ 0: 0
+ 1: 0
+ 2: 0
+ 4: 1
+ 5: 10
+ 6: 2
+ 7: 5
+ 8: 3
+ 9: 0
+ 10: 0
+ 11: 6339
+ 12: 16191
+ 13: 8141
+ 16: 1
+ 18: 0
+ 19: 0
diff --git a/test/Transforms/SampleProfile/Inputs/remarks.prof b/test/Transforms/SampleProfile/Inputs/remarks.prof
new file mode 100644
index 000000000000..1e905834cf41
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/remarks.prof
@@ -0,0 +1,7 @@
+main:623868:0
+ 0: 0
+ 0: _Z3foov:623868
+ 3: 18346
+ 4: 0
+ 6: 19475
+ 2: 18305
diff --git a/test/Transforms/SampleProfile/Inputs/syntax.prof b/test/Transforms/SampleProfile/Inputs/syntax.prof
index f3738912a9dc..465212d86e84 100644
--- a/test/Transforms/SampleProfile/Inputs/syntax.prof
+++ b/test/Transforms/SampleProfile/Inputs/syntax.prof
@@ -1,3 +1,3 @@
empty:100:0
-0: 0
-1: 100
+ 0: 0
+ 1: 100
diff --git a/test/Transforms/SampleProfile/branch.ll b/test/Transforms/SampleProfile/branch.ll
index 25bd455a044c..1700749f0be9 100644
--- a/test/Transforms/SampleProfile/branch.ll
+++ b/test/Transforms/SampleProfile/branch.ll
@@ -4,14 +4,14 @@
;
; #include <stdio.h>
; #include <stdlib.h>
-;
+
; int main(int argc, char *argv[]) {
; if (argc < 2)
; return 1;
; double result;
; int limit = atoi(argv[1]);
; if (limit > 100) {
-; double s = 23.041968;
+; double s = 23.041968 * atoi(argv[2]);
; for (int u = 0; u < limit; u++) {
; double x = s;
; s = x + 3.049 + (double)u;
@@ -19,7 +19,7 @@
; }
; result = s;
; } else {
-; result = 0;
+; result = atoi(argv[2]);
; }
; printf("result is %lf\n", result);
; return 0;
@@ -27,117 +27,213 @@
@.str = private unnamed_addr constant [15 x i8] c"result is %lf\0A\00", align 1
-; Function Attrs: nounwind uwtable
-define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 {
+; Function Attrs: uwtable
+define i32 @main(i32 %argc, i8** %argv) #0 !dbg !6 {
; CHECK: Printing analysis 'Branch Probability Analysis' for function 'main':
entry:
- tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !13, metadata !DIExpression()), !dbg !27
- tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !14, metadata !DIExpression()), !dbg !27
- %cmp = icmp slt i32 %argc, 2, !dbg !28
- br i1 %cmp, label %return, label %if.end, !dbg !28
-; CHECK: edge entry -> return probability is 0 / 1 = 0%
-; CHECK: edge entry -> if.end probability is 1 / 1 = 100%
+ %retval = alloca i32, align 4
+ %argc.addr = alloca i32, align 4
+ %argv.addr = alloca i8**, align 8
+ %result = alloca double, align 8
+ %limit = alloca i32, align 4
+ %s = alloca double, align 8
+ %u = alloca i32, align 4
+ %x = alloca double, align 8
+ store i32 0, i32* %retval, align 4
+ store i32 %argc, i32* %argc.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !16, metadata !17), !dbg !18
+ store i8** %argv, i8*** %argv.addr, align 8
+ call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !19, metadata !17), !dbg !20
+ %0 = load i32, i32* %argc.addr, align 4, !dbg !21
+ %cmp = icmp slt i32 %0, 2, !dbg !23
+ br i1 %cmp, label %if.then, label %if.end, !dbg !24
+; CHECK: edge entry -> if.then probability is 0x4ccccccd / 0x80000000 = 60.00%
+; CHECK: edge entry -> if.end probability is 0x33333333 / 0x80000000 = 40.00%
+
+if.then: ; preds = %entry
+ store i32 1, i32* %retval, align 4, !dbg !25
+ br label %return, !dbg !25
if.end: ; preds = %entry
- %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1, !dbg !30
- %0 = load i8*, i8** %arrayidx, align 8, !dbg !30, !tbaa !31
- %call = tail call i32 @atoi(i8* %0) #4, !dbg !30
- tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !17, metadata !DIExpression()), !dbg !30
- %cmp1 = icmp sgt i32 %call, 100, !dbg !35
- br i1 %cmp1, label %for.body, label %if.end6, !dbg !35
-; CHECK: edge if.end -> for.body probability is 0 / 1 = 0%
-; CHECK: edge if.end -> if.end6 probability is 1 / 1 = 100%
-
-for.body: ; preds = %if.end, %for.body
- %u.016 = phi i32 [ %inc, %for.body ], [ 0, %if.end ]
- %s.015 = phi double [ %sub, %for.body ], [ 0x40370ABE6A337A81, %if.end ]
- %add = fadd double %s.015, 3.049000e+00, !dbg !36
- %conv = sitofp i32 %u.016 to double, !dbg !36
- %add4 = fadd double %add, %conv, !dbg !36
- tail call void @llvm.dbg.value(metadata double %add4, i64 0, metadata !18, metadata !DIExpression()), !dbg !36
- %div = fdiv double 3.940000e+00, %s.015, !dbg !37
- %mul = fmul double %div, 3.200000e-01, !dbg !37
- %add5 = fadd double %add4, %mul, !dbg !37
- %sub = fsub double %add4, %add5, !dbg !37
- tail call void @llvm.dbg.value(metadata double %sub, i64 0, metadata !18, metadata !DIExpression()), !dbg !37
- %inc = add nsw i32 %u.016, 1, !dbg !38
- tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !21, metadata !DIExpression()), !dbg !38
- %exitcond = icmp eq i32 %inc, %call, !dbg !38
- br i1 %exitcond, label %if.end6, label %for.body, !dbg !38
-; CHECK: edge for.body -> if.end6 probability is 0 / 10226 = 0%
-; CHECK: edge for.body -> for.body probability is 10226 / 10226 = 100% [HOT edge]
-
-if.end6: ; preds = %for.body, %if.end
- %result.0 = phi double [ 0.000000e+00, %if.end ], [ %sub, %for.body ]
- %call7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i64 0, i64 0), double %result.0), !dbg !39
- br label %return, !dbg !40
-; CHECK: edge if.end6 -> return probability is 16 / 16 = 100% [HOT edge]
-
-return: ; preds = %entry, %if.end6
- %retval.0 = phi i32 [ 0, %if.end6 ], [ 1, %entry ]
- ret i32 %retval.0, !dbg !41
-}
+ call void @llvm.dbg.declare(metadata double* %result, metadata !26, metadata !17), !dbg !27
+ call void @llvm.dbg.declare(metadata i32* %limit, metadata !28, metadata !17), !dbg !29
+ %1 = load i8**, i8*** %argv.addr, align 8, !dbg !30
+ %arrayidx = getelementptr inbounds i8*, i8** %1, i64 1, !dbg !30
+ %2 = load i8*, i8** %arrayidx, align 8, !dbg !30
+ %call = call i32 @atoi(i8* %2) #4, !dbg !31
+ store i32 %call, i32* %limit, align 4, !dbg !29
+ %3 = load i32, i32* %limit, align 4, !dbg !32
+ %cmp1 = icmp sgt i32 %3, 100, !dbg !34
+ br i1 %cmp1, label %if.then.2, label %if.else, !dbg !35
+; CHECK: edge if.end -> if.then.2 probability is 0x66666666 / 0x80000000 = 80.00%
+; CHECK: edge if.end -> if.else probability is 0x1999999a / 0x80000000 = 20.00%
-; Function Attrs: nounwind readonly
-declare i32 @atoi(i8* nocapture) #1
+if.then.2: ; preds = %if.end
+ call void @llvm.dbg.declare(metadata double* %s, metadata !36, metadata !17), !dbg !38
+ %4 = load i8**, i8*** %argv.addr, align 8, !dbg !39
+ %arrayidx3 = getelementptr inbounds i8*, i8** %4, i64 2, !dbg !39
+ %5 = load i8*, i8** %arrayidx3, align 8, !dbg !39
+ %call4 = call i32 @atoi(i8* %5) #4, !dbg !40
+ %conv = sitofp i32 %call4 to double, !dbg !40
+ %mul = fmul double 0x40370ABE6A337A81, %conv, !dbg !41
+ store double %mul, double* %s, align 8, !dbg !38
+ call void @llvm.dbg.declare(metadata i32* %u, metadata !42, metadata !17), !dbg !44
+ store i32 0, i32* %u, align 4, !dbg !44
+ br label %for.cond, !dbg !45
+
+for.cond: ; preds = %for.inc, %if.then.2
+ %6 = load i32, i32* %u, align 4, !dbg !46
+ %7 = load i32, i32* %limit, align 4, !dbg !48
+ %cmp5 = icmp slt i32 %6, %7, !dbg !49
+ br i1 %cmp5, label %for.body, label %for.end, !dbg !50
+
+for.body: ; preds = %for.cond
+ call void @llvm.dbg.declare(metadata double* %x, metadata !51, metadata !17), !dbg !53
+ %8 = load double, double* %s, align 8, !dbg !54
+ store double %8, double* %x, align 8, !dbg !53
+ %9 = load double, double* %x, align 8, !dbg !55
+ %add = fadd double %9, 3.049000e+00, !dbg !56
+ %10 = load i32, i32* %u, align 4, !dbg !57
+ %conv6 = sitofp i32 %10 to double, !dbg !57
+ %add7 = fadd double %add, %conv6, !dbg !58
+ store double %add7, double* %s, align 8, !dbg !59
+ %11 = load double, double* %s, align 8, !dbg !60
+ %12 = load double, double* %x, align 8, !dbg !61
+ %div = fdiv double 3.940000e+00, %12, !dbg !62
+ %mul8 = fmul double %div, 3.200000e-01, !dbg !63
+ %add9 = fadd double %11, %mul8, !dbg !64
+ %13 = load double, double* %s, align 8, !dbg !65
+ %sub = fsub double %13, %add9, !dbg !65
+ store double %sub, double* %s, align 8, !dbg !65
+ br label %for.inc, !dbg !66
+
+for.inc: ; preds = %for.body
+ %14 = load i32, i32* %u, align 4, !dbg !67
+ %inc = add nsw i32 %14, 1, !dbg !67
+ store i32 %inc, i32* %u, align 4, !dbg !67
+ br label %for.cond, !dbg !68
-; Function Attrs: nounwind
-declare i32 @printf(i8* nocapture readonly, ...) #2
+for.end: ; preds = %for.cond
+ %15 = load double, double* %s, align 8, !dbg !69
+ store double %15, double* %result, align 8, !dbg !70
+ br label %if.end.13, !dbg !71
+
+if.else: ; preds = %if.end
+ %16 = load i8**, i8*** %argv.addr, align 8, !dbg !72
+ %arrayidx10 = getelementptr inbounds i8*, i8** %16, i64 2, !dbg !72
+ %17 = load i8*, i8** %arrayidx10, align 8, !dbg !72
+ %call11 = call i32 @atoi(i8* %17) #4, !dbg !74
+ %conv12 = sitofp i32 %call11 to double, !dbg !74
+ store double %conv12, double* %result, align 8, !dbg !75
+ br label %if.end.13
+
+if.end.13: ; preds = %if.else, %for.end
+ %18 = load double, double* %result, align 8, !dbg !76
+ %call14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i32 0, i32 0), double %18), !dbg !77
+ store i32 0, i32* %retval, align 4, !dbg !78
+ br label %return, !dbg !78
+
+return: ; preds = %if.end.13, %if.then
+ %19 = load i32, i32* %retval, align 4, !dbg !79
+ ret i32 %19, !dbg !79
+}
; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind readonly
+declare i32 @atoi(i8*) #2
+
+declare i32 @printf(i8*, ...) #3
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { nounwind readonly }
!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!25, !42}
-!llvm.ident = !{!26}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 192896) (llvm/trunk 192895)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "branch.cc", directory: ".")
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248211) (llvm/trunk 248217)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !5)
+!1 = !DIFile(filename: "test.cc", directory: "/ssd/llvm_commit")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: i32 (i32, i8**)* @main, variables: !12)
-!5 = !DIFile(filename: "branch.cc", directory: ".")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8, !8, !9}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
-!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !11)
-!11 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!12 = !{!13, !14, !15, !17, !18, !21, !23}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 4, arg: 1, scope: !4, file: !5, type: !8)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 4, arg: 2, scope: !4, file: !5, type: !9)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "result", line: 7, scope: !4, file: !5, type: !16)
-!16 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "limit", line: 8, scope: !4, file: !5, type: !8)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "s", line: 10, scope: !19, file: !5, type: !16)
-!19 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !20)
-!20 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !4)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "u", line: 11, scope: !22, file: !5, type: !8)
-!22 = distinct !DILexicalBlock(line: 11, column: 0, file: !1, scope: !19)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 12, scope: !24, file: !5, type: !16)
-!24 = distinct !DILexicalBlock(line: 11, column: 0, file: !1, scope: !22)
-!25 = !{i32 2, !"Dwarf Version", i32 4}
-!26 = !{!"clang version 3.4 (trunk 192896) (llvm/trunk 192895)"}
-!27 = !DILocation(line: 4, scope: !4)
-!28 = !DILocation(line: 5, scope: !29)
-!29 = distinct !DILexicalBlock(line: 5, column: 0, file: !1, scope: !4)
-!30 = !DILocation(line: 8, scope: !4)
-!31 = !{!32, !32, i64 0}
-!32 = !{!"any pointer", !33, i64 0}
-!33 = !{!"omnipotent char", !34, i64 0}
-!34 = !{!"Simple C/C++ TBAA"}
-!35 = !DILocation(line: 9, scope: !20)
-!36 = !DILocation(line: 13, scope: !24)
-!37 = !DILocation(line: 14, scope: !24)
-!38 = !DILocation(line: 11, scope: !22)
-!39 = !DILocation(line: 20, scope: !4)
-!40 = !DILocation(line: 21, scope: !4)
-!41 = !DILocation(line: 22, scope: !4)
-!42 = !{i32 1, !"Debug Info Version", i32 3}
+!4 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float)
+!5 = !{!6}
+!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 4, type: !7, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9, !10}
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64, align: 64)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64, align: 64)
+!12 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 248211) (llvm/trunk 248217)"}
+!16 = !DILocalVariable(name: "argc", arg: 1, scope: !6, file: !1, line: 4, type: !9)
+!17 = !DIExpression()
+!18 = !DILocation(line: 4, column: 15, scope: !6)
+!19 = !DILocalVariable(name: "argv", arg: 2, scope: !6, file: !1, line: 4, type: !10)
+!20 = !DILocation(line: 4, column: 27, scope: !6)
+!21 = !DILocation(line: 5, column: 8, scope: !22)
+!22 = distinct !DILexicalBlock(scope: !6, file: !1, line: 5, column: 8)
+!23 = !DILocation(line: 5, column: 13, scope: !22)
+!24 = !DILocation(line: 5, column: 8, scope: !6)
+!25 = !DILocation(line: 6, column: 6, scope: !22)
+!26 = !DILocalVariable(name: "result", scope: !6, file: !1, line: 7, type: !4)
+!27 = !DILocation(line: 7, column: 11, scope: !6)
+!28 = !DILocalVariable(name: "limit", scope: !6, file: !1, line: 8, type: !9)
+!29 = !DILocation(line: 8, column: 8, scope: !6)
+!30 = !DILocation(line: 8, column: 21, scope: !6)
+!31 = !DILocation(line: 8, column: 16, scope: !6)
+!32 = !DILocation(line: 9, column: 8, scope: !33)
+!33 = distinct !DILexicalBlock(scope: !6, file: !1, line: 9, column: 8)
+!34 = !DILocation(line: 9, column: 14, scope: !33)
+!35 = !DILocation(line: 9, column: 8, scope: !6)
+!36 = !DILocalVariable(name: "s", scope: !37, file: !1, line: 10, type: !4)
+!37 = distinct !DILexicalBlock(scope: !33, file: !1, line: 9, column: 21)
+!38 = !DILocation(line: 10, column: 13, scope: !37)
+!39 = !DILocation(line: 10, column: 34, scope: !37)
+!40 = !DILocation(line: 10, column: 29, scope: !37)
+!41 = !DILocation(line: 10, column: 27, scope: !37)
+!42 = !DILocalVariable(name: "u", scope: !43, file: !1, line: 11, type: !9)
+!43 = distinct !DILexicalBlock(scope: !37, file: !1, line: 11, column: 6)
+!44 = !DILocation(line: 11, column: 15, scope: !43)
+!45 = !DILocation(line: 11, column: 11, scope: !43)
+!46 = !DILocation(line: 11, column: 22, scope: !47)
+!47 = distinct !DILexicalBlock(scope: !43, file: !1, line: 11, column: 6)
+!48 = !DILocation(line: 11, column: 26, scope: !47)
+!49 = !DILocation(line: 11, column: 24, scope: !47)
+!50 = !DILocation(line: 11, column: 6, scope: !43)
+!51 = !DILocalVariable(name: "x", scope: !52, file: !1, line: 12, type: !4)
+!52 = distinct !DILexicalBlock(scope: !47, file: !1, line: 11, column: 38)
+!53 = !DILocation(line: 12, column: 15, scope: !52)
+!54 = !DILocation(line: 12, column: 19, scope: !52)
+!55 = !DILocation(line: 13, column: 12, scope: !52)
+!56 = !DILocation(line: 13, column: 14, scope: !52)
+!57 = !DILocation(line: 13, column: 32, scope: !52)
+!58 = !DILocation(line: 13, column: 22, scope: !52)
+!59 = !DILocation(line: 13, column: 10, scope: !52)
+!60 = !DILocation(line: 14, column: 13, scope: !52)
+!61 = !DILocation(line: 14, column: 24, scope: !52)
+!62 = !DILocation(line: 14, column: 22, scope: !52)
+!63 = !DILocation(line: 14, column: 26, scope: !52)
+!64 = !DILocation(line: 14, column: 15, scope: !52)
+!65 = !DILocation(line: 14, column: 10, scope: !52)
+!66 = !DILocation(line: 15, column: 6, scope: !52)
+!67 = !DILocation(line: 11, column: 34, scope: !47)
+!68 = !DILocation(line: 11, column: 6, scope: !47)
+!69 = !DILocation(line: 16, column: 15, scope: !37)
+!70 = !DILocation(line: 16, column: 13, scope: !37)
+!71 = !DILocation(line: 17, column: 4, scope: !37)
+!72 = !DILocation(line: 18, column: 20, scope: !73)
+!73 = distinct !DILexicalBlock(scope: !33, file: !1, line: 17, column: 11)
+!74 = !DILocation(line: 18, column: 15, scope: !73)
+!75 = !DILocation(line: 18, column: 13, scope: !73)
+!76 = !DILocation(line: 20, column: 30, scope: !6)
+!77 = !DILocation(line: 20, column: 4, scope: !6)
+!78 = !DILocation(line: 21, column: 4, scope: !6)
+!79 = !DILocation(line: 22, column: 2, scope: !6)
diff --git a/test/Transforms/SampleProfile/calls.ll b/test/Transforms/SampleProfile/calls.ll
index 10f43a1a6126..53ea9297d7d0 100644
--- a/test/Transforms/SampleProfile/calls.ll
+++ b/test/Transforms/SampleProfile/calls.ll
@@ -24,7 +24,7 @@
@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
; Function Attrs: nounwind uwtable
-define i32 @_Z3sumii(i32 %x, i32 %y) {
+define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 {
entry:
%x.addr = alloca i32, align 4
%y.addr = alloca i32, align 4
@@ -37,7 +37,7 @@ entry:
}
; Function Attrs: uwtable
-define i32 @main() {
+define i32 @main() !dbg !7 {
entry:
%retval = alloca i32, align 4
%s = alloca i32, align 4
@@ -52,8 +52,8 @@ while.cond: ; preds = %if.end, %entry
store i32 %inc, i32* %i, align 4, !dbg !14
%cmp = icmp slt i32 %0, 400000000, !dbg !14
br i1 %cmp, label %while.body, label %while.end, !dbg !14
-; CHECK: edge while.cond -> while.body probability is 5391 / 5391 = 100% [HOT edge]
-; CHECK: edge while.cond -> while.end probability is 0 / 5391 = 0%
+; CHECK: edge while.cond -> while.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge while.cond -> while.end probability is 0x00000000 / 0x80000000 = 0.00%
while.body: ; preds = %while.cond
%1 = load i32, i32* %i, align 4, !dbg !16
@@ -63,8 +63,8 @@ while.body: ; preds = %while.cond
; both branches out of while.body had the same weight. In reality,
; the edge while.body->if.then is taken most of the time.
;
-; CHECK: edge while.body -> if.then probability is 5752 / 5752 = 100% [HOT edge]
-; CHECK: edge while.body -> if.else probability is 0 / 5752 = 0%
+; CHECK: edge while.body -> if.then probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge while.body -> if.else probability is 0x00000000 / 0x80000000 = 0.00%
if.then: ; preds = %while.body
@@ -92,14 +92,14 @@ declare i32 @printf(i8*, ...) #2
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "calls.cc", directory: ".")
!2 = !{}
!3 = !{!4, !7}
-!4 = !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 (i32, i32)* @_Z3sumii, variables: !2)
+!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "calls.cc", directory: ".")
!6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
!8 = !{i32 2, !"Dwarf Version", i32 4}
!9 = !{i32 1, !"Debug Info Version", i32 3}
!10 = !{!"clang version 3.5 "}
diff --git a/test/Transforms/SampleProfile/cov-zero-samples.ll b/test/Transforms/SampleProfile/cov-zero-samples.ll
new file mode 100644
index 000000000000..d81e6438ee01
--- /dev/null
+++ b/test/Transforms/SampleProfile/cov-zero-samples.ll
@@ -0,0 +1,142 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/cov-zero-samples.prof -sample-profile-check-record-coverage=100 -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+;
+; CHECK: remark: cov-zero-samples.cc:9:25: Applied 404065 samples from profile (offset: 2.1)
+; CHECK: remark: cov-zero-samples.cc:10:9: Applied 443089 samples from profile (offset: 3)
+; CHECK: remark: cov-zero-samples.cc:10:36: Applied 0 samples from profile (offset: 3.1)
+; CHECK: remark: cov-zero-samples.cc:11:12: Applied 404066 samples from profile (offset: 4)
+; CHECK: remark: cov-zero-samples.cc:13:25: Applied 0 samples from profile (offset: 6)
+; CHECK: remark: cov-zero-samples.cc:14:3: Applied 0 samples from profile (offset: 7)
+; CHECK: remark: cov-zero-samples.cc:10:9: most popular destination for conditional branches at cov-zero-samples.cc:9:3
+; CHECK: remark: cov-zero-samples.cc:11:12: most popular destination for conditional branches at cov-zero-samples.cc:10:9
+;
+; Coverage for this profile should be 100%
+; CHECK-NOT: warning: cov-zero-samples.cc:1:
+
+@N = global i64 8000000000, align 8
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z12never_calledi(i32 %i) !dbg !4 {
+entry:
+ ret i32 0, !dbg !32
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+; Function Attrs: norecurse uwtable
+define i32 @main() !dbg !8 {
+entry:
+ %retval = alloca i32, align 4
+ %sum = alloca i32, align 4
+ %i = alloca i64, align 8
+ store i32 0, i32* %retval, align 4
+ call void @llvm.dbg.declare(metadata i32* %sum, metadata !33, metadata !19), !dbg !34
+ store i32 0, i32* %sum, align 4, !dbg !34
+ call void @llvm.dbg.declare(metadata i64* %i, metadata !35, metadata !19), !dbg !37
+ store i64 0, i64* %i, align 8, !dbg !37
+ br label %for.cond, !dbg !38
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i64, i64* %i, align 8, !dbg !39
+ %1 = load volatile i64, i64* @N, align 8, !dbg !42
+ %cmp = icmp slt i64 %0, %1, !dbg !43
+ br i1 %cmp, label %for.body, label %for.end, !dbg !44
+
+for.body: ; preds = %for.cond
+ %2 = load i64, i64* %i, align 8, !dbg !45
+ %3 = load volatile i64, i64* @N, align 8, !dbg !48
+ %cmp1 = icmp sgt i64 %2, %3, !dbg !49
+ br i1 %cmp1, label %if.then, label %if.end, !dbg !50
+
+if.then: ; preds = %for.body
+ %4 = load i64, i64* %i, align 8, !dbg !51
+ %conv = trunc i64 %4 to i32, !dbg !51
+ %call = call i32 @_Z12never_calledi(i32 %conv), !dbg !53
+ %5 = load i32, i32* %sum, align 4, !dbg !54
+ %add = add nsw i32 %5, %call, !dbg !54
+ store i32 %add, i32* %sum, align 4, !dbg !54
+ br label %if.end, !dbg !55
+
+if.end: ; preds = %if.then, %for.body
+ %6 = load i64, i64* %i, align 8, !dbg !56
+ %div = sdiv i64 %6, 239, !dbg !57
+ %7 = load i32, i32* %sum, align 4, !dbg !58
+ %conv2 = sext i32 %7 to i64, !dbg !58
+ %mul = mul nsw i64 %conv2, %div, !dbg !58
+ %conv3 = trunc i64 %mul to i32, !dbg !58
+ store i32 %conv3, i32* %sum, align 4, !dbg !58
+ br label %for.inc, !dbg !59
+
+for.inc: ; preds = %if.end
+ %8 = load i64, i64* %i, align 8, !dbg !60
+ %inc = add nsw i64 %8, 1, !dbg !60
+ store i64 %inc, i64* %i, align 8, !dbg !60
+ br label %for.cond, !dbg !62
+
+for.end: ; preds = %for.cond
+ %9 = load i32, i32* %sum, align 4, !dbg !63
+ %call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %9), !dbg !64
+ ret i32 0, !dbg !65
+}
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !16}
+!llvm.ident = !{!17}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 253667) (llvm/trunk 253670)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !11)
+!1 = !DIFile(filename: "cov-zero-samples.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !8}
+!4 = distinct !DISubprogram(name: "never_called", linkageName: "_Z12never_calledi", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !9, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!7}
+!11 = !{!12}
+!12 = !DIGlobalVariable(name: "N", scope: !0, file: !1, line: 3, type: !13, isLocal: false, isDefinition: true, variable: i64* @N)
+!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !14)
+!14 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!15 = !{i32 2, !"Dwarf Version", i32 4}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{!"clang version 3.8.0 (trunk 253667) (llvm/trunk 253670)"}
+!19 = !DIExpression()
+!31 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 3)
+!32 = !DILocation(line: 5, column: 27, scope: !31)
+!33 = !DILocalVariable(name: "sum", scope: !8, file: !1, line: 8, type: !7)
+!34 = !DILocation(line: 8, column: 7, scope: !8)
+!35 = !DILocalVariable(name: "i", scope: !36, file: !1, line: 9, type: !14)
+!36 = distinct !DILexicalBlock(scope: !8, file: !1, line: 9, column: 3)
+!37 = !DILocation(line: 9, column: 18, scope: !36)
+!38 = !DILocation(line: 9, column: 8, scope: !36)
+!39 = !DILocation(line: 9, column: 25, scope: !40)
+!40 = !DILexicalBlockFile(scope: !41, file: !1, discriminator: 1)
+!41 = distinct !DILexicalBlock(scope: !36, file: !1, line: 9, column: 3)
+!42 = !DILocation(line: 9, column: 29, scope: !40)
+!43 = !DILocation(line: 9, column: 27, scope: !40)
+!44 = !DILocation(line: 9, column: 3, scope: !40)
+!45 = !DILocation(line: 10, column: 9, scope: !46)
+!46 = distinct !DILexicalBlock(scope: !47, file: !1, line: 10, column: 9)
+!47 = distinct !DILexicalBlock(scope: !41, file: !1, line: 9, column: 37)
+!48 = !DILocation(line: 10, column: 13, scope: !46)
+!49 = !DILocation(line: 10, column: 11, scope: !46)
+!50 = !DILocation(line: 10, column: 9, scope: !47)
+!51 = !DILocation(line: 10, column: 36, scope: !52)
+!52 = !DILexicalBlockFile(scope: !46, file: !1, discriminator: 1)
+!53 = !DILocation(line: 10, column: 23, scope: !52)
+!54 = !DILocation(line: 10, column: 20, scope: !52)
+!55 = !DILocation(line: 10, column: 16, scope: !52)
+!56 = !DILocation(line: 11, column: 12, scope: !47)
+!57 = !DILocation(line: 11, column: 14, scope: !47)
+!58 = !DILocation(line: 11, column: 9, scope: !47)
+!59 = !DILocation(line: 12, column: 3, scope: !47)
+!60 = !DILocation(line: 9, column: 33, scope: !61)
+!61 = !DILexicalBlockFile(scope: !41, file: !1, discriminator: 2)
+!62 = !DILocation(line: 9, column: 3, scope: !61)
+!63 = !DILocation(line: 13, column: 25, scope: !8)
+!64 = !DILocation(line: 13, column: 3, scope: !8)
+!65 = !DILocation(line: 14, column: 3, scope: !8)
diff --git a/test/Transforms/SampleProfile/coverage-warning.ll b/test/Transforms/SampleProfile/coverage-warning.ll
new file mode 100644
index 000000000000..14a2710b0810
--- /dev/null
+++ b/test/Transforms/SampleProfile/coverage-warning.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/coverage-warning.prof -sample-profile-check-record-coverage=90 -sample-profile-check-sample-coverage=100 -o /dev/null 2>&1 | FileCheck %s
+define i32 @foo(i32 %i) !dbg !4 {
+; The profile has samples for line locations that are no longer present.
+; Coverage does not reach 90%, so we should get this warning:
+;
+; CHECK: warning: coverage-warning.c:1: 2 of 3 available profile records (66%) were applied
+; CHECK: warning: coverage-warning.c:1: 29000 of 30700 available profile samples (94%) were applied
+entry:
+ %retval = alloca i32, align 4
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load i32, i32* %i.addr, align 4, !dbg !9
+ %cmp = icmp sgt i32 %0, 1000, !dbg !10
+ br i1 %cmp, label %if.then, label %if.end, !dbg !9
+
+if.then: ; preds = %entry
+ store i32 30, i32* %retval, align 4, !dbg !11
+ br label %return, !dbg !11
+
+if.end: ; preds = %entry
+ store i32 3, i32* %retval, align 4, !dbg !12
+ br label %return, !dbg !12
+
+return: ; preds = %if.end, %if.then
+ %1 = load i32, i32* %retval, align 4, !dbg !13
+ ret i32 %1, !dbg !13
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251524) (llvm/trunk 251531)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "coverage-warning.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !2)
+!6 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{!"clang version 3.8.0 (trunk 251524) (llvm/trunk 251531)"}
+!9 = !DILocation(line: 2, column: 7, scope: !4)
+!10 = !DILocation(line: 2, column: 9, scope: !4)
+!11 = !DILocation(line: 3, column: 5, scope: !4)
+!12 = !DILocation(line: 4, column: 3, scope: !4)
+!13 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/Transforms/SampleProfile/discriminator.ll b/test/Transforms/SampleProfile/discriminator.ll
index 2445c5c5d609..0915fc884f82 100644
--- a/test/Transforms/SampleProfile/discriminator.ll
+++ b/test/Transforms/SampleProfile/discriminator.ll
@@ -21,7 +21,7 @@
; This means that the predicate 'i < 5' (line 3) is executed 100 times,
; but the then branch (line 3.1) is only executed 5 times.
-define i32 @foo(i32 %i) #0 {
+define i32 @foo(i32 %i) #0 !dbg !4 {
; CHECK: Printing analysis 'Branch Probability Analysis' for function 'foo':
entry:
%i.addr = alloca i32, align 4
@@ -34,15 +34,15 @@ while.cond: ; preds = %if.end, %entry
%0 = load i32, i32* %i.addr, align 4, !dbg !12
%cmp = icmp slt i32 %0, 100, !dbg !12
br i1 %cmp, label %while.body, label %while.end, !dbg !12
-; CHECK: edge while.cond -> while.body probability is 100 / 101 = 99.0099% [HOT edge]
-; CHECK: edge while.cond -> while.end probability is 1 / 101 = 0.990099%
+; CHECK: edge while.cond -> while.body probability is 0x7ebb907a / 0x80000000 = 99.01% [HOT edge]
+; CHECK: edge while.cond -> while.end probability is 0x01446f86 / 0x80000000 = 0.99%
while.body: ; preds = %while.cond
%1 = load i32, i32* %i.addr, align 4, !dbg !14
%cmp1 = icmp slt i32 %1, 50, !dbg !14
br i1 %cmp1, label %if.then, label %if.end, !dbg !14
-; CHECK: edge while.body -> if.then probability is 5 / 100 = 5%
-; CHECK: edge while.body -> if.end probability is 95 / 100 = 95% [HOT edge]
+; CHECK: edge while.body -> if.then probability is 0x06666666 / 0x80000000 = 5.00%
+; CHECK: edge while.body -> if.end probability is 0x7999999a / 0x80000000 = 95.00% [HOT edge]
if.then: ; preds = %while.body
%2 = load i32, i32* %x, align 4, !dbg !17
@@ -66,11 +66,11 @@ while.end: ; preds = %while.cond
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "discriminator.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "discriminator.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/SampleProfile/entry_counts.ll b/test/Transforms/SampleProfile/entry_counts.ll
index bf66e693a9d1..50cd575295a9 100644
--- a/test/Transforms/SampleProfile/entry_counts.ll
+++ b/test/Transforms/SampleProfile/entry_counts.ll
@@ -3,7 +3,7 @@
; According to the profile, function empty() was called 13,293 times.
; CHECK: {{.*}} = !{!"function_entry_count", i64 13293}
-define void @empty() {
+define void @empty() !dbg !4 {
entry:
ret void, !dbg !9
}
@@ -12,11 +12,11 @@ entry:
!llvm.module.flags = !{!6, !7}
!llvm.ident = !{!8}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 237249) (llvm/trunk 237261)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "entry_counts.c", directory: "/usr/local/google/home/dnovillo/llvm/test/pgo")
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 237249) (llvm/trunk 237261)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "entry_counts.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, function: void ()* @empty, variables: !2)
+!4 = distinct !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2)
!5 = !DISubroutineType(types: !2)
!6 = !{i32 2, !"Dwarf Version", i32 4}
!7 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/SampleProfile/fnptr.ll b/test/Transforms/SampleProfile/fnptr.ll
index 368da42fc8a1..7b07ca9679bb 100644
--- a/test/Transforms/SampleProfile/fnptr.ll
+++ b/test/Transforms/SampleProfile/fnptr.ll
@@ -5,12 +5,12 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s
-; CHECK: edge for.body3 -> if.then probability is 534 / 2598 = 20.5543%
-; CHECK: edge for.body3 -> if.else probability is 2064 / 2598 = 79.4457%
-; CHECK: edge for.inc -> for.inc12 probability is 1052 / 2598 = 40.4927%
-; CHECK: edge for.inc -> for.body3 probability is 1546 / 2598 = 59.5073%
-; CHECK: edge for.inc12 -> for.end14 probability is 518 / 1052 = 49.2395%
-; CHECK: edge for.inc12 -> for.cond1.preheader probability is 534 / 1052 = 50.7605%
+; CHECK: edge for.body3 -> if.then probability is 0x1a4f3959 / 0x80000000 = 20.55%
+; CHECK: edge for.body3 -> if.else probability is 0x65b0c6a7 / 0x80000000 = 79.45%
+; CHECK: edge for.inc -> for.inc12 probability is 0x33d4a4c1 / 0x80000000 = 40.49%
+; CHECK: edge for.inc -> for.body3 probability is 0x4c2b5b3f / 0x80000000 = 59.51%
+; CHECK: edge for.inc12 -> for.end14 probability is 0x3f06d04e / 0x80000000 = 49.24%
+; CHECK: edge for.inc12 -> for.cond1.preheader probability is 0x40f92fb2 / 0x80000000 = 50.76%
; Original C++ test case.
;
@@ -46,7 +46,7 @@
@.str = private unnamed_addr constant [9 x i8] c"S = %lf\0A\00", align 1
-define double @_Z3fooi(i32 %x) #0 {
+define double @_Z3fooi(i32 %x) #0 !dbg !3 {
entry:
%conv = sitofp i32 %x to double, !dbg !2
%call = tail call double @sin(double %conv) #3, !dbg !8
@@ -56,7 +56,7 @@ entry:
declare double @sin(double) #1
-define double @_Z3bari(i32 %x) #0 {
+define double @_Z3bari(i32 %x) #0 !dbg !10 {
entry:
%conv = sitofp i32 %x to double, !dbg !9
%call = tail call double @cos(double %conv) #3, !dbg !11
@@ -66,7 +66,7 @@ entry:
declare double @cos(double) #1
-define i32 @main() #2 {
+define i32 @main() #2 !dbg !13 {
entry:
br label %for.cond1.preheader, !dbg !12
@@ -130,17 +130,17 @@ declare i32 @printf(i8* nocapture readonly, ...) #1
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !{!"clang version 3.6.0 "}
!2 = !DILocation(line: 9, column: 3, scope: !3)
-!3 = !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !4, scope: !5, type: !6, function: double (i32)* @_Z3fooi, variables: !7)
+!3 = distinct !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !4, scope: !5, type: !6, variables: !7)
!4 = !DIFile(filename: "fnptr.cc", directory: ".")
!5 = !DIFile(filename: "fnptr.cc", directory: ".")
!6 = !DISubroutineType(types: !7)
!7 = !{}
!8 = !DILocation(line: 9, column: 14, scope: !3)
!9 = !DILocation(line: 13, column: 3, scope: !10)
-!10 = !DISubprogram(name: "bar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !4, scope: !5, type: !6, function: double (i32)* @_Z3bari, variables: !7)
+!10 = distinct !DISubprogram(name: "bar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !4, scope: !5, type: !6, variables: !7)
!11 = !DILocation(line: 13, column: 14, scope: !10)
!12 = !DILocation(line: 19, column: 3, scope: !13)
-!13 = !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !4, scope: !5, type: !6, function: i32 ()* @main, variables: !7)
+!13 = distinct !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !4, scope: !5, type: !6, variables: !7)
!14 = !DILocation(line: 20, column: 5, scope: !13)
!15 = !DILocation(line: 21, column: 15, scope: !13)
!16 = !DILocation(line: 22, column: 11, scope: !13)
diff --git a/test/Transforms/SampleProfile/gcc-simple.ll b/test/Transforms/SampleProfile/gcc-simple.ll
new file mode 100644
index 000000000000..1ae927158c11
--- /dev/null
+++ b/test/Transforms/SampleProfile/gcc-simple.ll
@@ -0,0 +1,218 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/gcc-simple.afdo -S | FileCheck %s
+; XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
+; Original code:
+;
+; #include <stdlib.h>
+;
+; long long int foo(long i) {
+; if (rand() < 500) return 2; else if (rand() > 5000) return 10; else return 90;
+; }
+;
+; int main() {
+; long long int sum = 0;
+; for (int k = 0; k < 3000; k++)
+; for (int i = 0; i < 200000; i++) sum += foo(i);
+; return sum > 0 ? 0 : 1;
+; }
+;
+; This test was compiled down to bytecode at -O0 to avoid inlining foo() into
+; main(). The profile was generated using a GCC-generated binary (also compiled
+; at -O0). The conversion from the Linux Perf profile to the GCC autofdo
+; profile used the converter at https://github.com/google/autofdo
+;
+; $ gcc -g -O0 gcc-simple.cc -o gcc-simple
+; $ perf record -b ./gcc-simple
+; $ create_gcov --binary=gcc-simple --gcov=gcc-simple.afdo
+
+define i64 @_Z3fool(i64 %i) #0 !dbg !4 {
+; CHECK: !prof ![[EC1:[0-9]+]]
+entry:
+ %retval = alloca i64, align 8
+ %i.addr = alloca i64, align 8
+ store i64 %i, i64* %i.addr, align 8
+ call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !16, metadata !17), !dbg !18
+ %call = call i32 @rand() #3, !dbg !19
+ %cmp = icmp slt i32 %call, 500, !dbg !21
+ br i1 %cmp, label %if.then, label %if.else, !dbg !22
+; CHECK: !prof ![[PROF1:[0-9]+]]
+
+if.then: ; preds = %entry
+ store i64 2, i64* %retval, align 8, !dbg !23
+ br label %return, !dbg !23
+
+if.else: ; preds = %entry
+ %call1 = call i32 @rand() #3, !dbg !25
+ %cmp2 = icmp sgt i32 %call1, 5000, !dbg !28
+ br i1 %cmp2, label %if.then.3, label %if.else.4, !dbg !29
+; CHECK: !prof ![[PROF2:[0-9]+]]
+
+if.then.3: ; preds = %if.else
+ store i64 10, i64* %retval, align 8, !dbg !30
+ br label %return, !dbg !30
+
+if.else.4: ; preds = %if.else
+ store i64 90, i64* %retval, align 8, !dbg !32
+ br label %return, !dbg !32
+
+return: ; preds = %if.else.4, %if.then.3, %if.then
+ %0 = load i64, i64* %retval, align 8, !dbg !34
+ ret i64 %0, !dbg !34
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind
+declare i32 @rand() #2
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !dbg !9 {
+; CHECK: !prof ![[EC2:[0-9]+]]
+entry:
+ %retval = alloca i32, align 4
+ %sum = alloca i64, align 8
+ %k = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ call void @llvm.dbg.declare(metadata i64* %sum, metadata !35, metadata !17), !dbg !36
+ store i64 0, i64* %sum, align 8, !dbg !36
+ call void @llvm.dbg.declare(metadata i32* %k, metadata !37, metadata !17), !dbg !39
+ store i32 0, i32* %k, align 4, !dbg !39
+ br label %for.cond, !dbg !40
+
+for.cond: ; preds = %for.inc.4, %entry
+ %0 = load i32, i32* %k, align 4, !dbg !41
+ %cmp = icmp slt i32 %0, 3000, !dbg !45
+ br i1 %cmp, label %for.body, label %for.end.6, !dbg !46
+; CHECK: !prof ![[PROF3:[0-9]+]]
+
+for.body: ; preds = %for.cond
+ call void @llvm.dbg.declare(metadata i32* %i, metadata !47, metadata !17), !dbg !49
+ store i32 0, i32* %i, align 4, !dbg !49
+ br label %for.cond.1, !dbg !50
+
+for.cond.1: ; preds = %for.inc, %for.body
+ %1 = load i32, i32* %i, align 4, !dbg !51
+ %cmp2 = icmp slt i32 %1, 200000, !dbg !55
+ br i1 %cmp2, label %for.body.3, label %for.end, !dbg !56
+; CHECK: !prof ![[PROF4:[0-9]+]]
+
+for.body.3: ; preds = %for.cond.1
+ %2 = load i32, i32* %i, align 4, !dbg !57
+ %conv = sext i32 %2 to i64, !dbg !57
+ %call = call i64 @_Z3fool(i64 %conv), !dbg !59
+ %3 = load i64, i64* %sum, align 8, !dbg !60
+ %add = add nsw i64 %3, %call, !dbg !60
+ store i64 %add, i64* %sum, align 8, !dbg !60
+ br label %for.inc, !dbg !61
+
+for.inc: ; preds = %for.body.3
+ %4 = load i32, i32* %i, align 4, !dbg !62
+ %inc = add nsw i32 %4, 1, !dbg !62
+ store i32 %inc, i32* %i, align 4, !dbg !62
+ br label %for.cond.1, !dbg !64
+
+for.end: ; preds = %for.cond.1
+ br label %for.inc.4, !dbg !65
+
+for.inc.4: ; preds = %for.end
+ %5 = load i32, i32* %k, align 4, !dbg !67
+ %inc5 = add nsw i32 %5, 1, !dbg !67
+ store i32 %inc5, i32* %k, align 4, !dbg !67
+ br label %for.cond, !dbg !68
+
+for.end.6: ; preds = %for.cond
+ %6 = load i64, i64* %sum, align 8, !dbg !69
+ %cmp7 = icmp sgt i64 %6, 0, !dbg !70
+ %cond = select i1 %cmp7, i32 0, i32 1, !dbg !69
+ ret i32 %cond, !dbg !71
+}
+
+; CHECK ![[EC1]] = !{!"function_entry_count", i64 24108}
+; CHECK ![[PROF1]] = !{!"branch_weights", i32 1, i32 30124}
+; CHECK ![[PROF2]] = !{!"branch_weights", i32 30177, i32 29579}
+; CHECK ![[EC2]] = !{!"function_entry_count", i64 0}
+; CHECK ![[PROF3]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK ![[PROF4]] = !{!"branch_weights", i32 1, i32 20238}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 247554) (llvm/trunk 247557)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "discriminator.cc", directory: "/usr/local/google/home/dnovillo/llvm/test/autofdo")
+!2 = !{}
+!3 = !{!4, !9}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !8}
+!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!8 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!10 = !DISubroutineType(types: !11)
+!11 = !{!12}
+!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 247554) (llvm/trunk 247557)"}
+!16 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !8)
+!17 = !DIExpression()
+!18 = !DILocation(line: 3, column: 24, scope: !4)
+!19 = !DILocation(line: 4, column: 7, scope: !20)
+!20 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 7)
+!21 = !DILocation(line: 4, column: 14, scope: !20)
+!22 = !DILocation(line: 4, column: 7, scope: !4)
+!23 = !DILocation(line: 4, column: 21, scope: !24)
+!24 = !DILexicalBlockFile(scope: !20, file: !1, discriminator: 1)
+!25 = !DILocation(line: 4, column: 40, scope: !26)
+!26 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 2)
+!27 = distinct !DILexicalBlock(scope: !20, file: !1, line: 4, column: 40)
+!28 = !DILocation(line: 4, column: 47, scope: !27)
+!29 = !DILocation(line: 4, column: 40, scope: !20)
+!30 = !DILocation(line: 4, column: 55, scope: !31)
+!31 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 3)
+!32 = !DILocation(line: 4, column: 71, scope: !33)
+!33 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 4)
+!34 = !DILocation(line: 5, column: 1, scope: !4)
+!35 = !DILocalVariable(name: "sum", scope: !9, file: !1, line: 8, type: !7)
+!36 = !DILocation(line: 8, column: 17, scope: !9)
+!37 = !DILocalVariable(name: "k", scope: !38, file: !1, line: 9, type: !12)
+!38 = distinct !DILexicalBlock(scope: !9, file: !1, line: 9, column: 3)
+!39 = !DILocation(line: 9, column: 12, scope: !38)
+!40 = !DILocation(line: 9, column: 8, scope: !38)
+!41 = !DILocation(line: 9, column: 19, scope: !42)
+!42 = !DILexicalBlockFile(scope: !43, file: !1, discriminator: 2)
+!43 = !DILexicalBlockFile(scope: !44, file: !1, discriminator: 1)
+!44 = distinct !DILexicalBlock(scope: !38, file: !1, line: 9, column: 3)
+!45 = !DILocation(line: 9, column: 21, scope: !44)
+!46 = !DILocation(line: 9, column: 3, scope: !38)
+!47 = !DILocalVariable(name: "i", scope: !48, file: !1, line: 10, type: !12)
+!48 = distinct !DILexicalBlock(scope: !44, file: !1, line: 10, column: 5)
+!49 = !DILocation(line: 10, column: 14, scope: !48)
+!50 = !DILocation(line: 10, column: 10, scope: !48)
+!51 = !DILocation(line: 10, column: 21, scope: !52)
+!52 = !DILexicalBlockFile(scope: !53, file: !1, discriminator: 5)
+!53 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 1)
+!54 = distinct !DILexicalBlock(scope: !48, file: !1, line: 10, column: 5)
+!55 = !DILocation(line: 10, column: 23, scope: !54)
+!56 = !DILocation(line: 10, column: 5, scope: !48)
+!57 = !DILocation(line: 10, column: 49, scope: !58)
+!58 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 2)
+!59 = !DILocation(line: 10, column: 45, scope: !54)
+!60 = !DILocation(line: 10, column: 42, scope: !54)
+!61 = !DILocation(line: 10, column: 38, scope: !54)
+!62 = !DILocation(line: 10, column: 34, scope: !63)
+!63 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 4)
+!64 = !DILocation(line: 10, column: 5, scope: !54)
+!65 = !DILocation(line: 10, column: 50, scope: !66)
+!66 = !DILexicalBlockFile(scope: !48, file: !1, discriminator: 3)
+!67 = !DILocation(line: 9, column: 30, scope: !44)
+!68 = !DILocation(line: 9, column: 3, scope: !44)
+!69 = !DILocation(line: 11, column: 10, scope: !9)
+!70 = !DILocation(line: 11, column: 14, scope: !9)
+!71 = !DILocation(line: 11, column: 3, scope: !9)
diff --git a/test/Transforms/SampleProfile/inline-coverage.ll b/test/Transforms/SampleProfile/inline-coverage.ll
new file mode 100644
index 000000000000..7248540b4f7c
--- /dev/null
+++ b/test/Transforms/SampleProfile/inline-coverage.ll
@@ -0,0 +1,135 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-coverage.prof -sample-profile-check-record-coverage=100 -sample-profile-check-sample-coverage=110 -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+;
+; Original code:
+;
+; 1 #include <stdlib.h>
+; 2
+; 3 long long int foo(long i) {
+; 4 return rand() * i;
+; 5 }
+; 6
+; 7 int main() {
+; 8 long long int sum = 0;
+; 9 for (int i = 0; i < 200000 * 3000; i++)
+; 10 sum += foo(i);
+; 11 return sum > 0 ? 0 : 1;
+; 12 }
+;
+; CHECK: remark: coverage.cc:10:12: inlined hot callee '_Z3fool' with 172746 samples into 'main'
+; CHECK: remark: coverage.cc:9:19: Applied 23478 samples from profile (offset: 2.1)
+; CHECK: remark: coverage.cc:10:16: Applied 23478 samples from profile (offset: 3)
+; CHECK: remark: coverage.cc:4:10: Applied 31878 samples from profile (offset: 1)
+; CHECK: remark: coverage.cc:11:10: Applied 0 samples from profile (offset: 4)
+; CHECK: remark: coverage.cc:10:16: most popular destination for conditional branches at coverage.cc:9:3
+;
+; There is one sample record with 0 samples at offset 4 in main() that we never
+; use:
+; CHECK: warning: coverage.cc:7: 4 of 5 available profile records (80%) were applied
+;
+; Since the unused sample record contributes no samples, sample coverage should
+; be 100%. Note that we get this warning because we are requesting an impossible
+; 110% coverage check.
+; CHECK: warning: coverage.cc:7: 78834 of 78834 available profile samples (100%) were applied
+
+define i64 @_Z3fool(i64 %i) !dbg !4 {
+entry:
+ %i.addr = alloca i64, align 8
+ store i64 %i, i64* %i.addr, align 8
+ call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !16, metadata !17), !dbg !18
+ %call = call i32 @rand(), !dbg !19
+ %conv = sext i32 %call to i64, !dbg !19
+ %0 = load i64, i64* %i.addr, align 8, !dbg !20
+ %mul = mul nsw i64 %conv, %0, !dbg !21
+ ret i64 %mul, !dbg !22
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+declare i32 @rand()
+
+define i32 @main() !dbg !9 {
+entry:
+ %retval = alloca i32, align 4
+ %sum = alloca i64, align 8
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ call void @llvm.dbg.declare(metadata i64* %sum, metadata !23, metadata !17), !dbg !24
+ store i64 0, i64* %sum, align 8, !dbg !24
+ call void @llvm.dbg.declare(metadata i32* %i, metadata !25, metadata !17), !dbg !27
+ store i32 0, i32* %i, align 4, !dbg !27
+ br label %for.cond, !dbg !28
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4, !dbg !29
+ %cmp = icmp slt i32 %0, 600000000, !dbg !32
+ br i1 %cmp, label %for.body, label %for.end, !dbg !33
+
+for.body: ; preds = %for.cond
+ %1 = load i32, i32* %i, align 4, !dbg !34
+ %conv = sext i32 %1 to i64, !dbg !34
+ %call = call i64 @_Z3fool(i64 %conv), !dbg !35
+ %2 = load i64, i64* %sum, align 8, !dbg !36
+ %add = add nsw i64 %2, %call, !dbg !36
+ store i64 %add, i64* %sum, align 8, !dbg !36
+ br label %for.inc, !dbg !37
+
+for.inc: ; preds = %for.body
+ %3 = load i32, i32* %i, align 4, !dbg !38
+ %inc = add nsw i32 %3, 1, !dbg !38
+ store i32 %inc, i32* %i, align 4, !dbg !38
+ br label %for.cond, !dbg !39
+
+for.end: ; preds = %for.cond
+ %4 = load i64, i64* %sum, align 8, !dbg !40
+ %cmp1 = icmp sgt i64 %4, 0, !dbg !41
+ %cond = select i1 %cmp1, i32 0, i32 1, !dbg !40
+ ret i32 %cond, !dbg !42
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251738) (llvm/trunk 251737)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "coverage.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !9}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !8}
+!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!8 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!10 = !DISubroutineType(types: !11)
+!11 = !{!12}
+!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 251738) (llvm/trunk 251737)"}
+!16 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !8)
+!17 = !DIExpression()
+!18 = !DILocation(line: 3, column: 24, scope: !4)
+!19 = !DILocation(line: 4, column: 10, scope: !4)
+!20 = !DILocation(line: 4, column: 19, scope: !4)
+!21 = !DILocation(line: 4, column: 17, scope: !4)
+!22 = !DILocation(line: 4, column: 3, scope: !4)
+!23 = !DILocalVariable(name: "sum", scope: !9, file: !1, line: 8, type: !7)
+!24 = !DILocation(line: 8, column: 17, scope: !9)
+!25 = !DILocalVariable(name: "i", scope: !26, file: !1, line: 9, type: !12)
+!26 = distinct !DILexicalBlock(scope: !9, file: !1, line: 9, column: 3)
+!27 = !DILocation(line: 9, column: 12, scope: !26)
+!28 = !DILocation(line: 9, column: 8, scope: !26)
+!29 = !DILocation(line: 9, column: 19, scope: !30)
+!30 = !DILexicalBlockFile(scope: !31, file: !1, discriminator: 1)
+!31 = distinct !DILexicalBlock(scope: !26, file: !1, line: 9, column: 3)
+!32 = !DILocation(line: 9, column: 21, scope: !30)
+!33 = !DILocation(line: 9, column: 3, scope: !30)
+!34 = !DILocation(line: 10, column: 16, scope: !31)
+!35 = !DILocation(line: 10, column: 12, scope: !31)
+!36 = !DILocation(line: 10, column: 9, scope: !31)
+!37 = !DILocation(line: 10, column: 5, scope: !31)
+!38 = !DILocation(line: 9, column: 39, scope: !31)
+!39 = !DILocation(line: 9, column: 3, scope: !31)
+!40 = !DILocation(line: 11, column: 10, scope: !9)
+!41 = !DILocation(line: 11, column: 14, scope: !9)
+!42 = !DILocation(line: 11, column: 3, scope: !9)
diff --git a/test/Transforms/SampleProfile/inline-hint.ll b/test/Transforms/SampleProfile/inline-hint.ll
new file mode 100644
index 000000000000..16c4e64ec5bb
--- /dev/null
+++ b/test/Transforms/SampleProfile/inline-hint.ll
@@ -0,0 +1,38 @@
+; RUN: opt %s -sample-profile -sample-profile-file=%S/Inputs/inline-hint.prof -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+;
+; CHECK: Applied cold hint to globally cold function '_Z7cold_fnRxi' with 0.1
+define void @_Z7cold_fnRxi() !dbg !4 {
+entry:
+ ret void, !dbg !29
+}
+
+; CHECK: Applied inline hint to globally hot function '_Z6hot_fnRxi' with 70.0
+define void @_Z6hot_fnRxi() #0 !dbg !10 {
+entry:
+ ret void, !dbg !38
+}
+
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19}
+
+!1 = !DIFile(filename: "inline-hint.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !10, !11, !14}
+!4 = distinct !DISubprogram(name: "cold_fn", linkageName: "_Z7cold_fnRxi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7, !9}
+!7 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !8, size: 64, align: 64)
+!8 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = distinct !DISubprogram(name: "hot_fn", linkageName: "_Z6hot_fnRxi", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!11 = distinct !DISubprogram(name: "compute", linkageName: "_Z7computex", scope: !1, file: !1, line: 11, type: !12, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!12 = !DISubroutineType(types: !13)
+!13 = !{!8, !8}
+!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 21, type: !15, isLocal: false, isDefinition: true, scopeLine: 21, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!15 = !DISubroutineType(types: !16)
+!16 = !{!9}
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 2, !"Debug Info Version", i32 3}
+!19 = !{!"clang version 3.8.0 (trunk 254067) (llvm/trunk 254079)"}
+!29 = !DILocation(line: 5, column: 1, scope: !4)
+!38 = !DILocation(line: 9, column: 1, scope: !10)
diff --git a/test/Transforms/SampleProfile/inline.ll b/test/Transforms/SampleProfile/inline.ll
new file mode 100644
index 000000000000..590a20f9d1d1
--- /dev/null
+++ b/test/Transforms/SampleProfile/inline.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -sample-profile-inline-hot-threshold=1 -S | FileCheck %s
+
+; Original C++ test case
+;
+; #include <stdio.h>
+;
+; int sum(int x, int y) {
+; return x + y;
+; }
+;
+; int main() {
+; int s, i = 0;
+; while (i++ < 20000 * 20000)
+; if (i != 100) s = sum(i, s); else s = 30;
+; printf("sum is %d\n", s);
+; return 0;
+; }
+;
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 {
+entry:
+ %x.addr = alloca i32, align 4
+ %y.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ %0 = load i32, i32* %x.addr, align 4, !dbg !11
+ %1 = load i32, i32* %y.addr, align 4, !dbg !11
+ %add = add nsw i32 %0, %1, !dbg !11
+ ret i32 %add, !dbg !11
+}
+
+; Function Attrs: uwtable
+define i32 @main() !dbg !7 {
+entry:
+ %retval = alloca i32, align 4
+ %s = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval
+ store i32 0, i32* %i, align 4, !dbg !12
+ br label %while.cond, !dbg !13
+
+while.cond: ; preds = %if.end, %entry
+ %0 = load i32, i32* %i, align 4, !dbg !14
+ %inc = add nsw i32 %0, 1, !dbg !14
+ store i32 %inc, i32* %i, align 4, !dbg !14
+ %cmp = icmp slt i32 %0, 400000000, !dbg !14
+ br i1 %cmp, label %while.body, label %while.end, !dbg !14
+
+while.body: ; preds = %while.cond
+ %1 = load i32, i32* %i, align 4, !dbg !16
+ %cmp1 = icmp ne i32 %1, 100, !dbg !16
+ br i1 %cmp1, label %if.then, label %if.else, !dbg !16
+
+
+if.then: ; preds = %while.body
+ %2 = load i32, i32* %i, align 4, !dbg !18
+ %3 = load i32, i32* %s, align 4, !dbg !18
+ %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18
+; CHECK-NOT: call i32 @_Z3sumii
+ store i32 %call, i32* %s, align 4, !dbg !18
+ br label %if.end, !dbg !18
+
+if.else: ; preds = %while.body
+ store i32 30, i32* %s, align 4, !dbg !20
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ br label %while.cond, !dbg !22
+
+while.end: ; preds = %while.cond
+ %4 = load i32, i32* %s, align 4, !dbg !24
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24
+ ret i32 0, !dbg !25
+}
+
+declare i32 @printf(i8*, ...) #2
+
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !7}
+!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "calls.cc", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!11 = !DILocation(line: 4, scope: !4)
+!12 = !DILocation(line: 8, scope: !7)
+!13 = !DILocation(line: 9, scope: !7)
+!14 = !DILocation(line: 9, scope: !15)
+!15 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !7)
+!16 = !DILocation(line: 10, scope: !17)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !19)
+!19 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !17)
+!20 = !DILocation(line: 10, scope: !21)
+!21 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17)
+!22 = !DILocation(line: 10, scope: !23)
+!23 = !DILexicalBlockFile(discriminator: 3, file: !1, scope: !17)
+!24 = !DILocation(line: 11, scope: !7)
+!25 = !DILocation(line: 12, scope: !7)
diff --git a/test/Transforms/SampleProfile/nolocinfo.ll b/test/Transforms/SampleProfile/nolocinfo.ll
new file mode 100644
index 000000000000..08bca20984dd
--- /dev/null
+++ b/test/Transforms/SampleProfile/nolocinfo.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/nolocinfo.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s
+
+define i32 @foo(i32 %i) !dbg !4 {
+entry:
+ %i.addr = alloca i32, align 4
+ %0 = load i32, i32* %i.addr, align 4
+ %cmp = icmp sgt i32 %0, 1000
+
+; Remarks for conditional branches need debug location information for the
+; referring branch. When that is not present, the compiler should not abort.
+;
+; CHECK: remark: nolocinfo.c:3:5: most popular destination for conditional branches at <UNKNOWN LOCATION>
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ ret i32 0, !dbg !18
+
+if.end:
+ ret i32 1
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251335) (llvm/trunk 251344)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "nolocinfo.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 251335) (llvm/trunk 251344)"}
+!15 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 7)
+!18 = !DILocation(line: 3, column: 5, scope: !15)
diff --git a/test/Transforms/SampleProfile/offset.ll b/test/Transforms/SampleProfile/offset.ll
new file mode 100644
index 000000000000..499b2826402d
--- /dev/null
+++ b/test/Transforms/SampleProfile/offset.ll
@@ -0,0 +1,82 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/offset.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original C++ code for this test case:
+;
+; a.cc:
+; #1
+; #2
+; #3
+; #4
+; #5 int foo(int a) {
+; #6 #include "a.b"
+; #7}
+;
+; a.b:
+; #1 if (a > 0) {
+; #2 return 10;
+; #3 } else {
+; #4 return 20;
+; #5 }
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z3fooi(i32 %a) #0 !dbg !4 {
+entry:
+ %retval = alloca i32, align 4
+ %a.addr = alloca i32, align 4
+ store i32 %a, i32* %a.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !11, metadata !12), !dbg !13
+ %0 = load i32, i32* %a.addr, align 4, !dbg !14
+ %cmp = icmp sgt i32 %0, 0, !dbg !18
+ br i1 %cmp, label %if.then, label %if.else, !dbg !19
+; CHECK: edge entry -> if.then probability is 0x0147ae14 / 0x80000000 = 1.00%
+; CHECK: edge entry -> if.else probability is 0x7eb851ec / 0x80000000 = 99.00% [HOT edge]
+
+if.then: ; preds = %entry
+ store i32 10, i32* %retval, align 4, !dbg !20
+ br label %return, !dbg !20
+
+if.else: ; preds = %entry
+ store i32 20, i32* %retval, align 4, !dbg !22
+ br label %return, !dbg !22
+
+return: ; preds = %if.else, %if.then
+ %1 = load i32, i32* %retval, align 4, !dbg !24
+ ret i32 %1, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250750)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "a.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 250750)"}
+!11 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 5, type: !7)
+!12 = !DIExpression()
+!13 = !DILocation(line: 5, column: 13, scope: !4)
+!14 = !DILocation(line: 1, column: 5, scope: !15)
+!15 = distinct !DILexicalBlock(scope: !17, file: !16, line: 1, column: 5)
+!16 = !DIFile(filename: "./a.b", directory: "/tmp")
+!17 = !DILexicalBlockFile(scope: !4, file: !16, discriminator: 0)
+!18 = !DILocation(line: 1, column: 7, scope: !15)
+!19 = !DILocation(line: 1, column: 5, scope: !17)
+!20 = !DILocation(line: 2, column: 3, scope: !21)
+!21 = distinct !DILexicalBlock(scope: !15, file: !16, line: 1, column: 12)
+!22 = !DILocation(line: 4, column: 3, scope: !23)
+!23 = distinct !DILexicalBlock(scope: !15, file: !16, line: 3, column: 8)
+!24 = !DILocation(line: 7, column: 1, scope: !25)
+!25 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 0)
diff --git a/test/Transforms/SampleProfile/propagate.ll b/test/Transforms/SampleProfile/propagate.ll
index 620d125875f7..eef7b162eb7a 100644
--- a/test/Transforms/SampleProfile/propagate.ll
+++ b/test/Transforms/SampleProfile/propagate.ll
@@ -40,7 +40,7 @@ target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [24 x i8] c"foo(%d, %d, %ld) = %ld\0A\00", align 1
; Function Attrs: nounwind uwtable
-define i64 @_Z3fooiil(i32 %x, i32 %y, i64 %N) #0 {
+define i64 @_Z3fooiil(i32 %x, i32 %y, i64 %N) #0 !dbg !4 {
entry:
%retval = alloca i64, align 8
%x.addr = alloca i32, align 4
@@ -73,8 +73,8 @@ for.cond: ; preds = %for.inc16, %if.else
%5 = load i64, i64* %N.addr, align 8, !dbg !15
%cmp1 = icmp slt i64 %4, %5, !dbg !15
br i1 %cmp1, label %for.body, label %for.end18, !dbg !15
-; CHECK: edge for.cond -> for.body probability is 10 / 10 = 100% [HOT edge]
-; CHECK: edge for.cond -> for.end18 probability is 0 / 10 = 0%
+; CHECK: edge for.cond -> for.body probability is 0x745d1746 / 0x80000000 = 90.91% [HOT edge]
+; CHECK: edge for.cond -> for.end18 probability is 0x0ba2e8ba / 0x80000000 = 9.09%
for.body: ; preds = %for.cond
%6 = load i64, i64* %i, align 8, !dbg !18
@@ -82,8 +82,8 @@ for.body: ; preds = %for.cond
%div = sdiv i64 %7, 3, !dbg !18
%cmp2 = icmp sgt i64 %6, %div, !dbg !18
br i1 %cmp2, label %if.then3, label %if.end, !dbg !18
-; CHECK: edge for.body -> if.then3 probability is 1 / 5 = 20%
-; CHECK: edge for.body -> if.end probability is 4 / 5 = 80%
+; CHECK: edge for.body -> if.then3 probability is 0x1999999a / 0x80000000 = 20.00%
+; CHECK: edge for.body -> if.end probability is 0x66666666 / 0x80000000 = 80.00%
if.then3: ; preds = %for.body
%8 = load i32, i32* %x.addr, align 4, !dbg !21
@@ -97,8 +97,8 @@ if.end: ; preds = %if.then3, %for.body
%div4 = sdiv i64 %10, 4, !dbg !22
%cmp5 = icmp sgt i64 %9, %div4, !dbg !22
br i1 %cmp5, label %if.then6, label %if.else7, !dbg !22
-; CHECK: edge if.end -> if.then6 probability is 3 / 6342 = 0.0473037%
-; CHECK: edge if.end -> if.else7 probability is 6339 / 6342 = 99.9527% [HOT edge]
+; CHECK: edge if.end -> if.then6 probability is 0x000f801f / 0x80000000 = 0.05%
+; CHECK: edge if.end -> if.else7 probability is 0x7ff07fe1 / 0x80000000 = 99.95% [HOT edge]
if.then6: ; preds = %if.end
%11 = load i32, i32* %y.addr, align 4, !dbg !24
@@ -119,8 +119,8 @@ for.cond8: ; preds = %for.inc, %if.else7
%14 = load i64, i64* %i, align 8, !dbg !28
%cmp10 = icmp slt i64 %conv9, %14, !dbg !28
br i1 %cmp10, label %for.body11, label %for.end, !dbg !28
-; CHECK: edge for.cond8 -> for.body11 probability is 16191 / 16191 = 100% [HOT edge]
-; CHECK: edge for.cond8 -> for.end probability is 0 / 16191 = 0%
+; CHECK: edge for.cond8 -> for.body11 probability is 0x5bfc7472 / 0x80000000 = 71.86%
+; CHECK: edge for.cond8 -> for.end probability is 0x24038b8e / 0x80000000 = 28.14%
for.body11: ; preds = %for.cond8
%15 = load i32, i32* %j, align 4, !dbg !31
@@ -167,7 +167,7 @@ return: ; preds = %if.end19, %if.then
}
; Function Attrs: uwtable
-define i32 @main() #1 {
+define i32 @main() #1 !dbg !7 {
entry:
%retval = alloca i32, align 4
%x = alloca i32, align 4
@@ -198,14 +198,14 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "propagate.cc", directory: ".")
!2 = !{}
!3 = !{!4, !7}
-!4 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: i64 (i32, i32, i64)* @_Z3fooiil, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "propagate.cc", directory: ".")
!6 = !DISubroutineType(types: !{null})
-!7 = !DISubprogram(name: "main", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!7 = distinct !DISubprogram(name: "main", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !1, scope: !5, type: !6, variables: !2)
!8 = !{i32 2, !"Dwarf Version", i32 4}
!9 = !{i32 1, !"Debug Info Version", i32 3}
!10 = !{!"clang version 3.5 "}
diff --git a/test/Transforms/SampleProfile/remarks.ll b/test/Transforms/SampleProfile/remarks.ll
new file mode 100644
index 000000000000..a0e6a9deb8a8
--- /dev/null
+++ b/test/Transforms/SampleProfile/remarks.ll
@@ -0,0 +1,185 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/remarks.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s
+;
+; Original test case.
+;
+; 1 #include <stdlib.h>
+; 2
+; 3 long long foo() {
+; 4 long long int sum = 0;
+; 5 for (int i = 0; i < 500000000; i++)
+; 6 if (i < 1000)
+; 7 sum -= i;
+; 8 else
+; 9 sum += -i * rand();
+; 10 return sum;
+; 11 }
+; 12
+; 13 int main() { return foo() > 0; }
+
+; We are expecting foo() to be inlined in main() (almost all the cycles are
+; spent inside foo).
+; CHECK: remark: remarks.cc:13:21: inlined hot callee '_Z3foov' with 623868 samples into 'main'
+
+; The back edge for the loop is the hottest edge in the loop subgraph.
+; CHECK: remark: remarks.cc:6:9: most popular destination for conditional branches at remarks.cc:5:3
+
+; The predicate almost always chooses the 'else' branch.
+; CHECK: remark: remarks.cc:9:15: most popular destination for conditional branches at remarks.cc:6:9
+
+; Function Attrs: nounwind uwtable
+define i64 @_Z3foov() #0 !dbg !4 {
+entry:
+ %sum = alloca i64, align 8
+ %i = alloca i32, align 4
+ %0 = bitcast i64* %sum to i8*, !dbg !19
+ call void @llvm.lifetime.start(i64 8, i8* %0) #4, !dbg !19
+ call void @llvm.dbg.declare(metadata i64* %sum, metadata !9, metadata !20), !dbg !21
+ store i64 0, i64* %sum, align 8, !dbg !21, !tbaa !22
+ %1 = bitcast i32* %i to i8*, !dbg !26
+ call void @llvm.lifetime.start(i64 4, i8* %1) #4, !dbg !26
+ call void @llvm.dbg.declare(metadata i32* %i, metadata !10, metadata !20), !dbg !27
+ store i32 0, i32* %i, align 4, !dbg !27, !tbaa !28
+ br label %for.cond, !dbg !26
+
+for.cond: ; preds = %for.inc, %entry
+ %2 = load i32, i32* %i, align 4, !dbg !30, !tbaa !28
+ %cmp = icmp slt i32 %2, 500000000, !dbg !34
+ br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !35
+
+for.cond.cleanup: ; preds = %for.cond
+ %3 = bitcast i32* %i to i8*, !dbg !36
+ call void @llvm.lifetime.end(i64 4, i8* %3) #4, !dbg !36
+ br label %for.end
+
+for.body: ; preds = %for.cond
+ %4 = load i32, i32* %i, align 4, !dbg !38, !tbaa !28
+ %cmp1 = icmp slt i32 %4, 1000, !dbg !40
+ br i1 %cmp1, label %if.then, label %if.else, !dbg !41
+
+if.then: ; preds = %for.body
+ %5 = load i32, i32* %i, align 4, !dbg !42, !tbaa !28
+ %conv = sext i32 %5 to i64, !dbg !42
+ %6 = load i64, i64* %sum, align 8, !dbg !43, !tbaa !22
+ %sub = sub nsw i64 %6, %conv, !dbg !43
+ store i64 %sub, i64* %sum, align 8, !dbg !43, !tbaa !22
+ br label %if.end, !dbg !44
+
+if.else: ; preds = %for.body
+ %7 = load i32, i32* %i, align 4, !dbg !45, !tbaa !28
+ %sub2 = sub nsw i32 0, %7, !dbg !46
+ %call = call i32 @rand() #4, !dbg !47
+ %mul = mul nsw i32 %sub2, %call, !dbg !48
+ %conv3 = sext i32 %mul to i64, !dbg !46
+ %8 = load i64, i64* %sum, align 8, !dbg !49, !tbaa !22
+ %add = add nsw i64 %8, %conv3, !dbg !49
+ store i64 %add, i64* %sum, align 8, !dbg !49, !tbaa !22
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ br label %for.inc, !dbg !50
+
+for.inc: ; preds = %if.end
+ %9 = load i32, i32* %i, align 4, !dbg !51, !tbaa !28
+ %inc = add nsw i32 %9, 1, !dbg !51
+ store i32 %inc, i32* %i, align 4, !dbg !51, !tbaa !28
+ br label %for.cond, !dbg !52
+
+for.end: ; preds = %for.cond.cleanup
+ %10 = load i64, i64* %sum, align 8, !dbg !53, !tbaa !22
+ %11 = bitcast i64* %sum to i8*, !dbg !54
+ call void @llvm.lifetime.end(i64 8, i8* %11) #4, !dbg !54
+ ret i64 %10, !dbg !55
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
+
+; Function Attrs: nounwind
+declare i32 @rand() #3
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !dbg !13 {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ %call = call i64 @_Z3foov(), !dbg !56
+ %cmp = icmp sgt i64 %call, 0, !dbg !57
+ %conv = zext i1 %cmp to i32, !dbg !56
+ ret i32 %conv, !dbg !58
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind argmemonly }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251041) (llvm/trunk 251053)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "remarks.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !13}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!8 = !{!9, !10}
+!9 = !DILocalVariable(name: "sum", scope: !4, file: !1, line: 4, type: !7)
+!10 = !DILocalVariable(name: "i", scope: !11, file: !1, line: 5, type: !12)
+!11 = distinct !DILexicalBlock(scope: !4, file: !1, line: 5, column: 3)
+!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 13, type: !14, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
+!14 = !DISubroutineType(types: !15)
+!15 = !{!12}
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 3}
+!18 = !{!"clang version 3.8.0 (trunk 251041) (llvm/trunk 251053)"}
+!19 = !DILocation(line: 4, column: 3, scope: !4)
+!20 = !DIExpression()
+!21 = !DILocation(line: 4, column: 17, scope: !4)
+!22 = !{!23, !23, i64 0}
+!23 = !{!"long long", !24, i64 0}
+!24 = !{!"omnipotent char", !25, i64 0}
+!25 = !{!"Simple C/C++ TBAA"}
+!26 = !DILocation(line: 5, column: 8, scope: !11)
+!27 = !DILocation(line: 5, column: 12, scope: !11)
+!28 = !{!29, !29, i64 0}
+!29 = !{!"int", !24, i64 0}
+!30 = !DILocation(line: 5, column: 19, scope: !31)
+!31 = !DILexicalBlockFile(scope: !32, file: !1, discriminator: 3)
+!32 = !DILexicalBlockFile(scope: !33, file: !1, discriminator: 1)
+!33 = distinct !DILexicalBlock(scope: !11, file: !1, line: 5, column: 3)
+!34 = !DILocation(line: 5, column: 21, scope: !33)
+!35 = !DILocation(line: 5, column: 3, scope: !11)
+!36 = !DILocation(line: 5, column: 3, scope: !37)
+!37 = !DILexicalBlockFile(scope: !33, file: !1, discriminator: 2)
+!38 = !DILocation(line: 6, column: 9, scope: !39)
+!39 = distinct !DILexicalBlock(scope: !33, file: !1, line: 6, column: 9)
+!40 = !DILocation(line: 6, column: 11, scope: !39)
+!41 = !DILocation(line: 6, column: 9, scope: !33)
+!42 = !DILocation(line: 7, column: 14, scope: !39)
+!43 = !DILocation(line: 7, column: 11, scope: !39)
+!44 = !DILocation(line: 7, column: 7, scope: !39)
+!45 = !DILocation(line: 9, column: 15, scope: !39)
+!46 = !DILocation(line: 9, column: 14, scope: !39)
+!47 = !DILocation(line: 9, column: 19, scope: !39)
+!48 = !DILocation(line: 9, column: 17, scope: !39)
+!49 = !DILocation(line: 9, column: 11, scope: !39)
+!50 = !DILocation(line: 6, column: 13, scope: !39)
+!51 = !DILocation(line: 5, column: 35, scope: !33)
+!52 = !DILocation(line: 5, column: 3, scope: !33)
+!53 = !DILocation(line: 10, column: 10, scope: !4)
+!54 = !DILocation(line: 11, column: 1, scope: !4)
+!55 = !DILocation(line: 10, column: 3, scope: !4)
+!56 = !DILocation(line: 13, column: 21, scope: !13)
+!57 = !DILocation(line: 13, column: 27, scope: !13)
+!58 = !DILocation(line: 13, column: 14, scope: !13)
diff --git a/test/Transforms/SampleProfile/syntax.ll b/test/Transforms/SampleProfile/syntax.ll
index ed38a175288a..debbc7c87ddb 100644
--- a/test/Transforms/SampleProfile/syntax.ll
+++ b/test/Transforms/SampleProfile/syntax.ll
@@ -13,7 +13,7 @@ entry:
}
; NO-DEBUG: warning: No debug information found in function empty: Function profile not used
; MISSING-FILE: missing.prof: Could not open profile:
-; BAD-FN-HEADER: error: {{.*}}bad_fn_header.prof:1: Expected 'mangled_name:NUM:NUM', found 3empty:100:BAD
+; BAD-FN-HEADER: error: {{.*}}bad_fn_header.prof: Could not open profile: Unrecognized sample profile encoding format
; BAD-SAMPLE-LINE: error: {{.*}}bad_sample_line.prof:3: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1: BAD
; BAD-LINE-VALUES: error: {{.*}}bad_line_values.prof:2: Expected 'mangled_name:NUM:NUM', found -1: 10
; BAD-DISCRIMINATOR-VALUE: error: {{.*}}bad_discriminator_value.prof:2: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1.-3: 10
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index 8e5d4ff773dd..4daa610ccdcb 100644
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.6.0"
; CHECK: llvm.dbg.value
; CHECK: llvm.dbg.value
-define i32 @f(i32 %a, i32 %b) nounwind ssp {
+define i32 @f(i32 %a, i32 %b) nounwind ssp !dbg !1 {
entry:
%a.addr = alloca i32, align 4
%b.addr = alloca i32, align 4
@@ -42,17 +42,17 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!20}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131941)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !17)
-!1 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !18, scope: !2, type: !3, function: i32 (i32, i32)* @f)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131941)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !17)
+!1 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !18, scope: !2, type: !3)
!2 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b")
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !1, file: !2, type: !5)
+!6 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !1, file: !2, type: !5)
!7 = !DILocation(line: 1, column: 11, scope: !1)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 1, arg: 2, scope: !1, file: !2, type: !5)
+!8 = !DILocalVariable(name: "b", line: 1, arg: 2, scope: !1, file: !2, type: !5)
!9 = !DILocation(line: 1, column: 18, scope: !1)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 2, scope: !11, file: !2, type: !5)
+!10 = !DILocalVariable(name: "c", line: 2, scope: !11, file: !2, type: !5)
!11 = distinct !DILexicalBlock(line: 1, column: 21, file: !18, scope: !1)
!12 = !DILocation(line: 2, column: 9, scope: !11)
!13 = !DILocation(line: 2, column: 14, scope: !11)
diff --git a/test/Transforms/Scalarizer/dbginfo.ll b/test/Transforms/Scalarizer/dbginfo.ll
index 3770a3e8c642..09252a09d4b4 100644
--- a/test/Transforms/Scalarizer/dbginfo.ll
+++ b/test/Transforms/Scalarizer/dbginfo.ll
@@ -2,7 +2,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Function Attrs: nounwind uwtable
-define void @f1(<4 x i32>* nocapture %a, <4 x i32>* nocapture readonly %b, <4 x i32>* nocapture readonly %c) #0 {
+define void @f1(<4 x i32>* nocapture %a, <4 x i32>* nocapture readonly %b, <4 x i32>* nocapture readonly %c) #0 !dbg !4 {
; CHECK: @f1(
; CHECK: %a.i0 = bitcast <4 x i32>* %a to i32*
; CHECK: %a.i1 = getelementptr i32, i32* %a.i0, i32 1
@@ -57,11 +57,11 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!18, !26}
!llvm.ident = !{!19}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 194134) (llvm/trunk 194126)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 194134) (llvm/trunk 194126)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "/tmp/add.c", directory: "/home/richards/llvm/build")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "f1", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: void (<4 x i32>*, <4 x i32>*, <4 x i32>*)* @f1, variables: !14)
+!4 = distinct !DISubprogram(name: "f1", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !14)
!5 = !DIFile(filename: "/tmp/add.c", directory: "/home/richards/llvm/build")
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8, !8, !8}
@@ -72,9 +72,9 @@ attributes #1 = { nounwind readnone }
!12 = !{!13}
!13 = !DISubrange(count: 4)
!14 = !{!15, !16, !17}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !4, file: !5, type: !8)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 3, arg: 2, scope: !4, file: !5, type: !8)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 3, arg: 3, scope: !4, file: !5, type: !8)
+!15 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "b", line: 3, arg: 2, scope: !4, file: !5, type: !8)
+!17 = !DILocalVariable(name: "c", line: 3, arg: 3, scope: !4, file: !5, type: !8)
!18 = !{i32 2, !"Dwarf Version", i32 4}
!19 = !{!"clang version 3.4 (trunk 194134) (llvm/trunk 194126)"}
!20 = !DILocation(line: 3, scope: !4)
diff --git a/test/Transforms/Scalarizer/store-bug.ll b/test/Transforms/Scalarizer/store-bug.ll
new file mode 100644
index 000000000000..84c2b3f840a0
--- /dev/null
+++ b/test/Transforms/Scalarizer/store-bug.ll
@@ -0,0 +1,25 @@
+; RUN: opt -scalarizer -scalarize-load-store -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; This input caused the scalarizer not to clear cached results
+; properly.
+;
+; Any regressions should trigger an assert in the scalarizer.
+
+define void @func(<4 x float> %val, <4 x float> *%ptr) {
+ store <4 x float> %val, <4 x float> *%ptr
+ ret void
+; CHECK: store float %val.i0, float* %ptr.i0, align 16
+; CHECK: store float %val.i1, float* %ptr.i1, align 4
+; CHECK: store float %val.i2, float* %ptr.i2, align 8
+; CHECK: store float %val.i3, float* %ptr.i3, align 4
+}
+
+define void @func.copy(<4 x float> %val, <4 x float> *%ptr) {
+ store <4 x float> %val, <4 x float> *%ptr
+ ret void
+; CHECK: store float %val.i0, float* %ptr.i0, align 16
+; CHECK: store float %val.i1, float* %ptr.i1, align 4
+; CHECK: store float %val.i2, float* %ptr.i2, align 8
+; CHECK: store float %val.i3, float* %ptr.i3, align 4
+}
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
index 527634db0f5b..6f117697dded 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
@@ -6,9 +6,9 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:
; IR-LABEL: @sum_of_array(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 33
define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
%tmp = sext i32 %y to i64
%tmp1 = sext i32 %x to i64
@@ -38,7 +38,7 @@ define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output)
; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 255
; IR: add i32 %x, 256
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
@@ -71,9 +71,9 @@ define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(
; DS instructions have a larger immediate offset, so make sure these are OK.
; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 255
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16128
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16383
define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
%tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y
%tmp4 = load float, float addrspace(3)* %tmp2, align 4
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index 073313d40e77..e7b3545839c3 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -52,9 +52,9 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
; IR-LABEL: @sum_of_array(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
; @sum_of_array2 is very similar to @sum_of_array. The only difference is in
; the order of "sext" and "add" when computing the array indices. @sum_of_array
@@ -95,9 +95,9 @@ define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {
; IR-LABEL: @sum_of_array2(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
; This function loads
@@ -145,9 +145,9 @@ define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
; IR-LABEL: @sum_of_array3(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
; This function loads
@@ -191,6 +191,44 @@ define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) {
; IR-LABEL: @sum_of_array4(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
+
+
+; The source code is:
+; p0 = &input[sext(x + y)];
+; p1 = &input[sext(x + (y + 5))];
+;
+; Without reuniting extensions, SeparateConstOffsetFromGEP would emit
+; p0 = &input[sext(x + y)];
+; t1 = &input[sext(x) + sext(y)];
+; p1 = &t1[5];
+;
+; With reuniting extensions, it merges p0 and t1 and thus emits
+; p0 = &input[sext(x + y)];
+; p1 = &p0[5];
+define void @reunion(i32 %x, i32 %y, float* %input) {
+; IR-LABEL: @reunion(
+; PTX-LABEL: reunion(
+entry:
+ %xy = add nsw i32 %x, %y
+ %0 = sext i32 %xy to i64
+ %p0 = getelementptr inbounds float, float* %input, i64 %0
+ %v0 = load float, float* %p0, align 4
+; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0:%rd[0-9]+]]{{\]}}
+ call void @use(float %v0)
+
+ %y5 = add nsw i32 %y, 5
+ %xy5 = add nsw i32 %x, %y5
+ %1 = sext i32 %xy5 to i64
+ %p1 = getelementptr inbounds float, float* %input, i64 %1
+; IR: getelementptr inbounds float, float* %p0, i64 5
+ %v1 = load float, float* %p1, align 4
+; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0]]+20{{\]}}
+ call void @use(float %v1)
+
+ ret void
+}
+
+declare void @use(float)
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index 2fdd158a35ed..eeeac1963741 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -44,7 +44,7 @@ entry:
; CHECK: add i32 %j, -2
; CHECK: sext
; CHECK: getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 32
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 32
; We should be able to trace into sext/zext if it can be distributed to both
; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b)
@@ -65,7 +65,7 @@ define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) {
}
; CHECK-LABEL: @ext_add_no_overflow(
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 33
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 33
; Verifies we handle nested sext/zext correctly.
define void @sext_zext(i32 %a, i32 %b, float** %out1, float** %out2) {
@@ -110,7 +110,7 @@ entry:
}
; CHECK-LABEL: @sext_or(
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 32
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 32
; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b +
; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't
@@ -125,7 +125,7 @@ entry:
}
; CHECK-LABEL: @expr(
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 0
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 160
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 160
; CHECK: store i64 %b5, i64* %out
; d + sext(a +nsw (b +nsw (c +nsw 8))) => (d + sext(a) + sext(b) + sext(c)) + 8
@@ -143,7 +143,7 @@ entry:
; CHECK: sext i32
; CHECK: sext i32
; CHECK: sext i32
-; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 8
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 8
; Verifies we handle "sub" correctly.
define float* @sub(i64 %i, i64 %j) {
@@ -155,7 +155,7 @@ define float* @sub(i64 %i, i64 %j) {
; CHECK-LABEL: @sub(
; CHECK: %[[j2:[a-zA-Z0-9]+]] = sub i64 0, %j
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 -155
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 -155
%struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
@@ -173,7 +173,7 @@ entry:
; CHECK-LABEL: @packed_struct(
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [1024 x %struct.Packed], [1024 x %struct.Packed]* %s, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1, i64 %{{[a-zA-Z0-9]+}}
; CHECK: [[CASTED_PTR:%[a-zA-Z0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
-; CHECK: %uglygep = getelementptr i8, i8* [[CASTED_PTR]], i64 100
+; CHECK: %uglygep = getelementptr inbounds i8, i8* [[CASTED_PTR]], i64 100
; CHECK: bitcast i8* %uglygep to i64*
; We shouldn't be able to extract the 8 from "zext(a +nuw (b + 8))",
@@ -272,7 +272,7 @@ entry:
%ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8*
-; CHECK: getelementptr i8, i8* [[PTR1]], i64 -64
+; CHECK: getelementptr inbounds i8, i8* [[PTR1]], i64 -64
; CHECK: bitcast
ret %struct2* %ptr2
; CHECK-NEXT: ret
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
index d1a0f33d5a21..601ca5291353 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
@@ -25,7 +25,7 @@ then:
%or = or i64 %i, 3
%p = getelementptr inbounds float, float* %input, i64 %or
; CHECK: [[base:[^ ]+]] = getelementptr float, float* %input, i64 %i
-; CHECK: getelementptr float, float* [[base]], i64 3
+; CHECK: getelementptr inbounds float, float* [[base]], i64 3
ret float* %p
exit:
diff --git a/test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll
new file mode 100644
index 000000000000..e32d711143dc
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -simplifycfg -mtriple=aarch64 < %s | FileCheck %s
+
+define i32 @ctlz(i32 %A) {
+; CHECK-LABEL: @ctlz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end:
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+define i32 @cttz(i32 %A) {
+; CHECK-LABEL: @cttz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+ %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end:
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
diff --git a/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll
new file mode 100644
index 000000000000..ffcf2175091f
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -simplifycfg -mtriple=arm -mattr=+v6t2 < %s | FileCheck %s
+
+define i32 @ctlz(i32 %A) {
+; CHECK-LABEL: @ctlz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end:
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+define i32 @cttz(i32 %A) {
+; CHECK-LABEL: @cttz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+ %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end:
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
diff --git a/test/Transforms/SimplifyCFG/ARM/lit.local.cfg b/test/Transforms/SimplifyCFG/ARM/lit.local.cfg
new file mode 100644
index 000000000000..5a3b8565213d
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/ARM/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+ config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll
new file mode 100644
index 000000000000..b4bfb51dd142
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -simplifycfg -mtriple=mips-linux-gnu < %s | FileCheck %s
+
+define i32 @ctlz(i32 %A) {
+; CHECK-LABEL: @ctlz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end:
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+define i32 @cttz(i32 %A) {
+; CHECK-LABEL: @cttz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+ %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end:
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
diff --git a/test/Transforms/SimplifyCFG/Mips/lit.local.cfg b/test/Transforms/SimplifyCFG/Mips/lit.local.cfg
new file mode 100644
index 000000000000..683bfdccb742
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/Mips/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Mips' in targets:
+ config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/PR25267.ll b/test/Transforms/SimplifyCFG/PR25267.ll
new file mode 100644
index 000000000000..a13d45a0f271
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/PR25267.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+define void @f() {
+entry:
+ br label %for.cond
+
+for.cond:
+ %phi = phi i1 [ false, %entry ], [ true, %for.body ]
+ %select = select i1 %phi, i32 1, i32 2
+ br label %for.body
+
+for.body:
+ switch i32 %select, label %for.cond [
+ i32 1, label %return
+ i32 2, label %for.body
+ ]
+
+return:
+ ret void
+}
+
+; CHECK-LABEL: define void @f(
+; CHECK: br label %[[LABEL:.*]]
+; CHECK: br label %[[LABEL]]
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index c23a96df52ee..73f9a0f88aca 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -69,3 +69,29 @@ end:
ret i8* %x10
}
+
+define i32* @test5(i32 %a, i32 %b, i32 %c, i32* dereferenceable(10) %ptr1,
+ i32* dereferenceable(10) %ptr2, i32** dereferenceable(10) %ptr3) nounwind {
+; CHECK-LABEL: @test5(
+entry:
+ %tmp1 = icmp eq i32 %b, 0
+ br i1 %tmp1, label %bb1, label %bb3
+
+bb1: ; preds = %entry
+ %tmp2 = icmp sgt i32 %c, 1
+ br i1 %tmp2, label %bb2, label %bb3
+; CHECK: bb1:
+; CHECK-NEXT: icmp sgt i32 %c, 1
+; CHECK-NEXT: load i32*, i32** %ptr3
+; CHECK-NOT: dereferenceable
+; CHECK-NEXT: select i1 %tmp2, i32* %tmp3, i32* %ptr2
+; CHECK-NEXT: ret i32* %tmp3.ptr2
+
+bb2: ; preds = bb1
+ %tmp3 = load i32*, i32** %ptr3, !dereferenceable !{i64 10}
+ br label %bb3
+
+bb3: ; preds = %bb2, %entry
+ %tmp4 = phi i32* [ %ptr1, %entry ], [ %ptr2, %bb1 ], [ %tmp3, %bb2 ]
+ ret i32* %tmp4
+}
diff --git a/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll b/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
index 69f6c69059d4..bee80e6acce0 100644
--- a/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
+++ b/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
@@ -7,9 +7,7 @@ define i64 @test1(i64 %A) {
; ALL-LABEL: @test1(
; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]]
; ALL: ret
entry:
%tobool = icmp eq i64 %A, 0
@@ -28,9 +26,7 @@ define i32 @test2(i32 %A) {
; ALL-LABEL: @test2(
; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]]
; ALL: ret
entry:
%tobool = icmp eq i32 %A, 0
@@ -50,9 +46,7 @@ define signext i16 @test3(i16 signext %A) {
; ALL-LABEL: @test3(
; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]]
; ALL: ret
entry:
%tobool = icmp eq i16 %A, 0
@@ -72,9 +66,7 @@ define i64 @test1b(i64 %A) {
; ALL-LABEL: @test1b(
; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]]
; ALL: ret
entry:
%tobool = icmp eq i64 %A, 0
@@ -94,9 +86,7 @@ define i32 @test2b(i32 %A) {
; ALL-LABEL: @test2b(
; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]]
; ALL: ret
entry:
%tobool = icmp eq i32 %A, 0
@@ -116,9 +106,7 @@ define signext i16 @test3b(i16 signext %A) {
; ALL-LABEL: @test3b(
; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]]
; ALL: ret
entry:
%tobool = icmp eq i16 %A, 0
diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll
index d228499b2ec5..d4a9c81e506d 100644
--- a/test/Transforms/SimplifyCFG/basictest.ll
+++ b/test/Transforms/SimplifyCFG/basictest.ll
@@ -50,7 +50,7 @@ define i8 @test6f() {
; CHECK: alloca i8, align 1
; CHECK-NEXT: call i8 @test6g
; CHECK-NEXT: icmp eq i8 %tmp, 0
-; CHECK-NEXT: load i8, i8* %r, align 1{{$}}
+; CHECK-NEXT: load i8, i8* %r, align 1, !dbg !{{[0-9]+$}}
bb0:
%r = alloca i8, align 1
@@ -58,7 +58,7 @@ bb0:
%tmp1 = icmp eq i8 %tmp, 0
br i1 %tmp1, label %bb2, label %bb1
bb1:
- %tmp3 = load i8, i8* %r, align 1, !range !2, !tbaa !1
+ %tmp3 = load i8, i8* %r, align 1, !range !2, !tbaa !1, !dbg !5
%tmp4 = icmp eq i8 %tmp3, 1
br i1 %tmp4, label %bb2, label %bb3
bb2:
@@ -69,6 +69,16 @@ bb3:
}
declare i8 @test6g(i8*)
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!8, !9}
+
!0 = !{!1, !1, i64 0}
!1 = !{!"foo"}
!2 = !{i8 0, i8 2}
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, file: !7, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !4, subprograms: !4, globals: !4)
+!4 = !{}
+!5 = !DILocation(line: 23, scope: !6)
+!6 = distinct !DISubprogram(name: "foo", scope: !3, file: !7, line: 1, type: !DISubroutineType(types: !4), isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !4)
+!7 = !DIFile(filename: "foo.c", directory: "/")
+!8 = !{i32 2, !"Dwarf Version", i32 2}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
index fac5b186e89d..34871063bbcc 100644
--- a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
@@ -4,7 +4,7 @@
@0 = external hidden constant [5 x %0], align 4
-define void @foo(i32) nounwind ssp {
+define void @foo(i32) nounwind ssp !dbg !0 {
Entry:
%1 = icmp slt i32 %0, 0, !dbg !5
br i1 %1, label %BB5, label %BB1, !dbg !5
@@ -41,14 +41,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.sp = !{!0}
-!0 = !DISubprogram(name: "foo", line: 231, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !15, scope: !1, type: !3, function: void (i32)* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 231, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !15, scope: !1, type: !3)
!1 = !DIFile(filename: "a.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang (trunk 129006)", isOptimized: true, emissionKind: 0, file: !15, enums: !4, retainedTypes: !4)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang (trunk 129006)", isOptimized: true, emissionKind: 0, file: !15, enums: !4, retainedTypes: !4)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !DILocation(line: 131, column: 2, scope: !0)
!6 = !DILocation(line: 134, column: 2, scope: !0)
-!7 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "bar", line: 232, scope: !8, file: !1, type: !9)
+!7 = !DILocalVariable(name: "bar", line: 232, scope: !8, file: !1, type: !9)
!8 = distinct !DILexicalBlock(line: 231, column: 1, file: !15, scope: !0)
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !10)
!10 = !DIDerivedType(tag: DW_TAG_const_type, scope: !2, baseType: !11)
diff --git a/test/Transforms/SimplifyCFG/empty-cleanuppad.ll b/test/Transforms/SimplifyCFG/empty-cleanuppad.ll
new file mode 100644
index 000000000000..57b362889955
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/empty-cleanuppad.ll
@@ -0,0 +1,415 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; ModuleID = 'cppeh-simplify.cpp'
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+
+; This case arises when two objects with empty destructors are cleaned up.
+;
+; void f1() {
+; S a;
+; S b;
+; g();
+; }
+;
+; In this case, both cleanup pads can be eliminated and the invoke can be
+; converted to a call.
+;
+; CHECK: define void @f1()
+; CHECK: entry:
+; CHECK: call void @g()
+; CHECK: ret void
+; CHECK-NOT: cleanuppad
+; CHECK: }
+;
+define void @f1() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ invoke void @g() to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ ret void
+
+ehcleanup: ; preds = %entry
+ %0 = cleanuppad within none []
+ cleanupret from %0 unwind label %ehcleanup.1
+
+ehcleanup.1: ; preds = %ehcleanup
+ %1 = cleanuppad within none []
+ cleanupret from %1 unwind to caller
+}
+
+
+; This case arises when an object with an empty destructor must be cleaned up
+; outside of a try-block and an object with a non-empty destructor must be
+; cleaned up within the try-block.
+;
+; void f2() {
+; S a;
+; try {
+; S2 b;
+; g();
+; } catch (...) {}
+; }
+;
+; In this case, the outermost cleanup pad can be eliminated and the catch block
+; should unwind to the caller (that is, exception handling continues with the
+; parent frame of the caller).
+;
+; CHECK: define void @f2()
+; CHECK: entry:
+; CHECK: invoke void @g()
+; CHECK: ehcleanup:
+; CHECK: cleanuppad within none
+; CHECK: call void @"\01??1S2@@QEAA@XZ"(%struct.S2* %b)
+; CHECK: cleanupret from %0 unwind label %catch.dispatch
+; CHECK: catch.dispatch:
+; CHECK: catchswitch within none [label %catch] unwind to caller
+; CHECK: catch:
+; CHECK: catchpad
+; CHECK: catchret
+; CHECK-NOT: cleanuppad
+; CHECK: }
+;
+define void @f2() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %b = alloca %struct.S2, align 1
+ invoke void @g() to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ br label %try.cont
+
+ehcleanup: ; preds = %entry
+ %0 = cleanuppad within none []
+ call void @"\01??1S2@@QEAA@XZ"(%struct.S2* %b)
+ cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %ehcleanup
+ %cs1 = catchswitch within none [label %catch] unwind label %ehcleanup.1
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+ catchret from %1 to label %catchret.dest
+
+catchret.dest: ; preds = %catch
+ br label %try.cont
+
+try.cont: ; preds = %catchret.dest, %invoke.cont
+ ret void
+
+ehcleanup.1:
+ %2 = cleanuppad within none []
+ cleanupret from %2 unwind to caller
+}
+
+
+; This case arises when an object with a non-empty destructor must be cleaned up
+; outside of a try-block and an object with an empty destructor must be cleaned
+; within the try-block.
+;
+; void f3() {
+; S2 a;
+; try {
+; S b;
+; g();
+; } catch (...) {}
+; }
+;
+; In this case the inner cleanup pad should be eliminated and the invoke of g()
+; should unwind directly to the catchpad.
+;
+; CHECK-LABEL: define void @f3()
+; CHECK: entry:
+; CHECK: invoke void @g()
+; CHECK: to label %try.cont unwind label %catch.dispatch
+; CHECK: catch.dispatch:
+; CHECK-NEXT: catchswitch within none [label %catch] unwind label %ehcleanup.1
+; CHECK: catch:
+; CHECK: catchpad within %cs1 [i8* null, i32 64, i8* null]
+; CHECK: catchret
+; CHECK: ehcleanup.1:
+; CHECK: cleanuppad
+; CHECK: call void @"\01??1S2@@QEAA@XZ"(%struct.S2* %a)
+; CHECK: cleanupret from %cp3 unwind to caller
+; CHECK: }
+;
+define void @f3() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %a = alloca %struct.S2, align 1
+ invoke void @g() to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ br label %try.cont
+
+ehcleanup: ; preds = %entry
+ %0 = cleanuppad within none []
+ cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %ehcleanup
+ %cs1 = catchswitch within none [label %catch] unwind label %ehcleanup.1
+
+catch: ; preds = %catch.dispatch
+ %cp2 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+ catchret from %cp2 to label %catchret.dest
+
+catchret.dest: ; preds = %catch
+ br label %try.cont
+
+try.cont: ; preds = %catchret.dest, %invoke.cont
+ ret void
+
+ehcleanup.1:
+ %cp3 = cleanuppad within none []
+ call void @"\01??1S2@@QEAA@XZ"(%struct.S2* %a)
+ cleanupret from %cp3 unwind to caller
+}
+
+
+; This case arises when an object with an empty destructor may require cleanup
+; from either inside or outside of a try-block.
+;
+; void f4() {
+; S a;
+; g();
+; try {
+; g();
+; } catch (...) {}
+; }
+;
+; In this case, the cleanuppad should be eliminated, the invoke outside of the
+; catch block should be converted to a call (that is, that is, exception
+; handling continues with the parent frame of the caller).)
+;
+; CHECK-LABEL: define void @f4()
+; CHECK: entry:
+; CHECK: call void @g
+; Note: The cleanuppad simplification will insert an unconditional branch here
+; but it will be eliminated, placing the following invoke in the entry BB.
+; CHECK: invoke void @g()
+; CHECK: to label %try.cont unwind label %catch.dispatch
+; CHECK: catch.dispatch:
+; CHECK: catchswitch within none [label %catch] unwind to caller
+; CHECK: catch:
+; CHECK: catchpad
+; CHECK: catchret
+; CHECK-NOT: cleanuppad
+; CHECK: }
+;
+define void @f4() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ invoke void @g()
+ to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ invoke void @g()
+ to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %invoke.cont
+ %cs1 = catchswitch within none [label %catch] unwind label %ehcleanup
+
+catch: ; preds = %catch.dispatch
+ %0 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+ catchret from %0 to label %try.cont
+
+try.cont: ; preds = %catch, %invoke.cont
+ ret void
+
+ehcleanup:
+ %cp2 = cleanuppad within none []
+ cleanupret from %cp2 unwind to caller
+}
+
+; This case tests simplification of an otherwise empty cleanup pad that contains
+; a PHI node.
+;
+; int f6() {
+; int state = 1;
+; try {
+; S a;
+; g();
+; state = 2;
+; g();
+; } catch (...) {
+; return state;
+; }
+; return 0;
+; }
+;
+; In this case, the cleanup pad should be eliminated and the PHI node in the
+; cleanup pad should be sunk into the catch dispatch block.
+;
+; CHECK-LABEL: define i32 @f6()
+; CHECK: entry:
+; CHECK: invoke void @g()
+; CHECK: invoke.cont:
+; CHECK: invoke void @g()
+; CHECK-NOT: ehcleanup:
+; CHECK-NOT: cleanuppad
+; CHECK: catch.dispatch:
+; CHECK: %state.0 = phi i32 [ 2, %invoke.cont ], [ 1, %entry ]
+; CHECK: }
+define i32 @f6() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ invoke void @g()
+ to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ invoke void @g()
+ to label %return unwind label %ehcleanup
+
+ehcleanup: ; preds = %invoke.cont, %entry
+ %state.0 = phi i32 [ 2, %invoke.cont ], [ 1, %entry ]
+ %0 = cleanuppad within none []
+ cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %ehcleanup
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+ catchret from %1 to label %return
+
+return: ; preds = %invoke.cont, %catch
+ %retval.0 = phi i32 [ %state.0, %catch ], [ 0, %invoke.cont ]
+ ret i32 %retval.0
+}
+
+; This case tests another variation of simplification of an otherwise empty
+; cleanup pad that contains a PHI node.
+;
+; int f7() {
+; int state = 1;
+; try {
+; g();
+; state = 2;
+; S a;
+; g();
+; state = 3;
+; g();
+; } catch (...) {
+; return state;
+; }
+; return 0;
+; }
+;
+; In this case, the cleanup pad should be eliminated and the PHI node in the
+; cleanup pad should be merged with the PHI node in the catch dispatch block.
+;
+; CHECK-LABEL: define i32 @f7()
+; CHECK: entry:
+; CHECK: invoke void @g()
+; CHECK: invoke.cont:
+; CHECK: invoke void @g()
+; CHECK: invoke.cont.1:
+; CHECK: invoke void @g()
+; CHECK-NOT: ehcleanup:
+; CHECK-NOT: cleanuppad
+; CHECK: catch.dispatch:
+; CHECK: %state.1 = phi i32 [ 1, %entry ], [ 3, %invoke.cont.1 ], [ 2, %invoke.cont ]
+; CHECK: }
+define i32 @f7() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ invoke void @g()
+ to label %invoke.cont unwind label %catch.dispatch
+
+invoke.cont: ; preds = %entry
+ invoke void @g()
+ to label %invoke.cont.1 unwind label %ehcleanup
+
+invoke.cont.1: ; preds = %invoke.cont
+ invoke void @g()
+ to label %return unwind label %ehcleanup
+
+ehcleanup: ; preds = %invoke.cont.1, %invoke.cont
+ %state.0 = phi i32 [ 3, %invoke.cont.1 ], [ 2, %invoke.cont ]
+ %0 = cleanuppad within none []
+ cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %ehcleanup, %entry
+ %state.1 = phi i32 [ %state.0, %ehcleanup ], [ 1, %entry ]
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+ catchret from %1 to label %return
+
+return: ; preds = %invoke.cont.1, %catch
+ %retval.0 = phi i32 [ %state.1, %catch ], [ 0, %invoke.cont.1 ]
+ ret i32 %retval.0
+}
+
+; This case tests a scenario where an empty cleanup pad is not dominated by all
+; of the predecessors of its successor, but the successor references a PHI node
+; in the empty cleanup pad.
+;
+; Conceptually, the case being modeled is something like this:
+;
+; int f8() {
+; int x = 1;
+; try {
+; S a;
+; g();
+; x = 2;
+; retry:
+; g();
+; return
+; } catch (...) {
+; use_x(x);
+; }
+; goto retry;
+; }
+;
+; While that C++ syntax isn't legal, the IR below is.
+;
+; In this case, the PHI node that is sunk from ehcleanup to catch.dispatch
+; should have an incoming value entry for path from 'foo' that references the
+; PHI node itself.
+;
+; CHECK-LABEL: define void @f8()
+; CHECK: entry:
+; CHECK: invoke void @g()
+; CHECK: invoke.cont:
+; CHECK: invoke void @g()
+; CHECK-NOT: ehcleanup:
+; CHECK-NOT: cleanuppad
+; CHECK: catch.dispatch:
+; CHECK: %x = phi i32 [ 2, %invoke.cont ], [ 1, %entry ], [ %x, %catch.cont ]
+; CHECK: }
+define void @f8() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ invoke void @g()
+ to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ invoke void @g()
+ to label %return unwind label %ehcleanup
+
+ehcleanup: ; preds = %invoke.cont, %entry
+ %x = phi i32 [ 2, %invoke.cont ], [ 1, %entry ]
+ %0 = cleanuppad within none []
+ cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %ehcleanup, %catch.cont
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+ call void @use_x(i32 %x)
+ catchret from %1 to label %catch.cont
+
+catch.cont: ; preds = %catch
+ invoke void @g()
+ to label %return unwind label %catch.dispatch
+
+return: ; preds = %invoke.cont, %catch.cont
+ ret void
+}
+
+%struct.S = type { i8 }
+%struct.S2 = type { i8 }
+declare void @"\01??1S2@@QEAA@XZ"(%struct.S2*)
+declare void @g()
+declare void @use_x(i32 %x)
+
+declare i32 @__CxxFrameHandler3(...)
+
diff --git a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
index 345cf6282e3c..887373a2d3db 100644
--- a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
+++ b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
@@ -1,6 +1,6 @@
; RUN: opt -simplifycfg -S < %s | FileCheck %s
-define i32 @foo(i32 %i) nounwind ssp {
+define i32 @foo(i32 %i) nounwind ssp !dbg !0 {
call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !6, metadata !DIExpression()), !dbg !7
call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !9, metadata !DIExpression()), !dbg !11
%1 = icmp ne i32 %i, 0, !dbg !12
@@ -32,16 +32,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.module.flags = !{!21}
!llvm.dbg.sp = !{!0}
-!0 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !1, type: !3, function: i32 (i32)* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !1, type: !3)
!1 = !DIFile(filename: "b.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !20, enums: !8, retainedTypes: !8)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !20, enums: !8, retainedTypes: !8)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 1, scope: !0, file: !1, type: !5)
+!6 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !0, file: !1, type: !5)
!7 = !DILocation(line: 2, column: 13, scope: !0)
!8 = !{i32 0}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3, scope: !10, file: !1, type: !5)
+!9 = !DILocalVariable(name: "k", line: 3, scope: !10, file: !1, type: !5)
!10 = distinct !DILexicalBlock(line: 2, column: 16, file: !20, scope: !0)
!11 = !DILocation(line: 3, column: 12, scope: !10)
!12 = !DILocation(line: 4, column: 3, scope: !10)
diff --git a/test/Transforms/SimplifyCFG/implied-cond.ll b/test/Transforms/SimplifyCFG/implied-cond.ll
new file mode 100644
index 000000000000..317adc4c3472
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/implied-cond.ll
@@ -0,0 +1,81 @@
+; RUN: opt %s -S -simplifycfg | FileCheck %s
+; Check for when one branch implies the value of a successors conditional and
+; it's not simply the same conditional repeated.
+
+define void @test(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test
+ %iplus1 = add nsw i32 %i, 1
+ %var29 = icmp slt i32 %iplus1, %length.i
+; CHECK: br i1 %var29, label %in_bounds, label %out_of_bounds
+ br i1 %var29, label %next, label %out_of_bounds
+
+next:
+; CHECK-LABEL: in_bounds:
+; CHECK-NEXT: ret void
+ %var30 = icmp slt i32 %i, %length.i
+ br i1 %var30, label %in_bounds, label %out_of_bounds2
+
+in_bounds:
+ ret void
+
+out_of_bounds:
+ call void @foo(i64 0)
+ unreachable
+
+out_of_bounds2:
+ call void @foo(i64 1)
+ unreachable
+}
+
+; If the add is not nsw, it's not safe to use the fact about i+1 to imply the
+; i condition since it could have overflowed.
+define void @test_neg(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test_neg
+ %iplus1 = add i32 %i, 1
+ %var29 = icmp slt i32 %iplus1, %length.i
+; CHECK: br i1 %var29, label %next, label %out_of_bounds
+ br i1 %var29, label %next, label %out_of_bounds
+
+next:
+ %var30 = icmp slt i32 %i, %length.i
+; CHECK: br i1 %var30, label %in_bounds, label %out_of_bounds2
+ br i1 %var30, label %in_bounds, label %out_of_bounds2
+
+in_bounds:
+ ret void
+
+out_of_bounds:
+ call void @foo(i64 0)
+ unreachable
+
+out_of_bounds2:
+ call void @foo(i64 1)
+ unreachable
+}
+
+
+define void @test2(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2
+ %iplus100 = add nsw i32 %i, 100
+ %var29 = icmp slt i32 %iplus100, %length.i
+; CHECK: br i1 %var29, label %in_bounds, label %out_of_bounds
+ br i1 %var29, label %next, label %out_of_bounds
+
+next:
+ %var30 = icmp slt i32 %i, %length.i
+ br i1 %var30, label %in_bounds, label %out_of_bounds2
+
+in_bounds:
+ ret void
+
+out_of_bounds:
+ call void @foo(i64 0)
+ unreachable
+
+out_of_bounds2:
+ call void @foo(i64 1)
+ unreachable
+}
+
+declare void @foo(i64)
+
diff --git a/test/Transforms/SimplifyCFG/invoke_unwind.ll b/test/Transforms/SimplifyCFG/invoke_unwind.ll
index 3b4c09d96f77..100bfd4e9e3e 100644
--- a/test/Transforms/SimplifyCFG/invoke_unwind.ll
+++ b/test/Transforms/SimplifyCFG/invoke_unwind.ll
@@ -17,4 +17,17 @@ Rethrow:
resume { i8*, i32 } %exn
}
+define i32 @test2() personality i32 (...)* @__gxx_personality_v0 {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: call void @bar() [ "foo"(i32 100) ]
+; CHECK-NEXT: ret i32 0
+ invoke void @bar( ) [ "foo"(i32 100) ]
+ to label %1 unwind label %Rethrow
+ ret i32 0
+Rethrow:
+ %exn = landingpad {i8*, i32}
+ catch i8* null
+ resume { i8*, i32 } %exn
+}
+
declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll b/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll
new file mode 100644
index 000000000000..fe498b5334e8
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll
@@ -0,0 +1,215 @@
+; RUN: opt -S < %s -simplifycfg -simplifycfg-merge-cond-stores=true -simplifycfg-merge-cond-stores-aggressively=false -phi-node-folding-threshold=2 | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+; This is a bit reversal that has been run through the early optimizer (-mem2reg -gvn -instcombine).
+; There should be no additional PHIs created at all. The store should be on its own in a predicated
+; block and there should be no PHIs.
+
+; CHECK-LABEL: @f
+; Exactly 15 phis, as there are 15 in the original test case.
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK-NOT: select
+; CHECK: br i1 {{.*}}, label %[[L:.*]], label %[[R:.*]]
+; CHECK: [[L]] ; preds =
+; CHECK-NEXT: store
+; CHECK-NEXT: br label %[[R]]
+; CHECK: [[R]] ; preds =
+; CHECK-NEXT: ret i32 0
+
+define i32 @f(i32* %b) {
+entry:
+ %0 = load i32, i32* %b, align 4
+ %and = and i32 %0, 1
+ %tobool = icmp eq i32 %and, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %or = or i32 %0, -2147483648
+ store i32 %or, i32* %b, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ %1 = phi i32 [ %0, %entry ], [ %or, %if.then ]
+ %and1 = and i32 %1, 2
+ %tobool2 = icmp eq i32 %and1, 0
+ br i1 %tobool2, label %if.end5, label %if.then3
+
+if.then3: ; preds = %if.end
+ %or4 = or i32 %1, 1073741824
+ store i32 %or4, i32* %b, align 4
+ br label %if.end5
+
+if.end5: ; preds = %if.end, %if.then3
+ %2 = phi i32 [ %1, %if.end ], [ %or4, %if.then3 ]
+ %and6 = and i32 %2, 4
+ %tobool7 = icmp eq i32 %and6, 0
+ br i1 %tobool7, label %if.end10, label %if.then8
+
+if.then8: ; preds = %if.end5
+ %or9 = or i32 %2, 536870912
+ store i32 %or9, i32* %b, align 4
+ br label %if.end10
+
+if.end10: ; preds = %if.end5, %if.then8
+ %3 = phi i32 [ %2, %if.end5 ], [ %or9, %if.then8 ]
+ %and11 = and i32 %3, 8
+ %tobool12 = icmp eq i32 %and11, 0
+ br i1 %tobool12, label %if.end15, label %if.then13
+
+if.then13: ; preds = %if.end10
+ %or14 = or i32 %3, 268435456
+ store i32 %or14, i32* %b, align 4
+ br label %if.end15
+
+if.end15: ; preds = %if.end10, %if.then13
+ %4 = phi i32 [ %3, %if.end10 ], [ %or14, %if.then13 ]
+ %and16 = and i32 %4, 16
+ %tobool17 = icmp eq i32 %and16, 0
+ br i1 %tobool17, label %if.end20, label %if.then18
+
+if.then18: ; preds = %if.end15
+ %or19 = or i32 %4, 134217728
+ store i32 %or19, i32* %b, align 4
+ br label %if.end20
+
+if.end20: ; preds = %if.end15, %if.then18
+ %5 = phi i32 [ %4, %if.end15 ], [ %or19, %if.then18 ]
+ %and21 = and i32 %5, 32
+ %tobool22 = icmp eq i32 %and21, 0
+ br i1 %tobool22, label %if.end25, label %if.then23
+
+if.then23: ; preds = %if.end20
+ %or24 = or i32 %5, 67108864
+ store i32 %or24, i32* %b, align 4
+ br label %if.end25
+
+if.end25: ; preds = %if.end20, %if.then23
+ %6 = phi i32 [ %5, %if.end20 ], [ %or24, %if.then23 ]
+ %and26 = and i32 %6, 64
+ %tobool27 = icmp eq i32 %and26, 0
+ br i1 %tobool27, label %if.end30, label %if.then28
+
+if.then28: ; preds = %if.end25
+ %or29 = or i32 %6, 33554432
+ store i32 %or29, i32* %b, align 4
+ br label %if.end30
+
+if.end30: ; preds = %if.end25, %if.then28
+ %7 = phi i32 [ %6, %if.end25 ], [ %or29, %if.then28 ]
+ %and31 = and i32 %7, 256
+ %tobool32 = icmp eq i32 %and31, 0
+ br i1 %tobool32, label %if.end35, label %if.then33
+
+if.then33: ; preds = %if.end30
+ %or34 = or i32 %7, 8388608
+ store i32 %or34, i32* %b, align 4
+ br label %if.end35
+
+if.end35: ; preds = %if.end30, %if.then33
+ %8 = phi i32 [ %7, %if.end30 ], [ %or34, %if.then33 ]
+ %and36 = and i32 %8, 512
+ %tobool37 = icmp eq i32 %and36, 0
+ br i1 %tobool37, label %if.end40, label %if.then38
+
+if.then38: ; preds = %if.end35
+ %or39 = or i32 %8, 4194304
+ store i32 %or39, i32* %b, align 4
+ br label %if.end40
+
+if.end40: ; preds = %if.end35, %if.then38
+ %9 = phi i32 [ %8, %if.end35 ], [ %or39, %if.then38 ]
+ %and41 = and i32 %9, 1024
+ %tobool42 = icmp eq i32 %and41, 0
+ br i1 %tobool42, label %if.end45, label %if.then43
+
+if.then43: ; preds = %if.end40
+ %or44 = or i32 %9, 2097152
+ store i32 %or44, i32* %b, align 4
+ br label %if.end45
+
+if.end45: ; preds = %if.end40, %if.then43
+ %10 = phi i32 [ %9, %if.end40 ], [ %or44, %if.then43 ]
+ %and46 = and i32 %10, 2048
+ %tobool47 = icmp eq i32 %and46, 0
+ br i1 %tobool47, label %if.end50, label %if.then48
+
+if.then48: ; preds = %if.end45
+ %or49 = or i32 %10, 1048576
+ store i32 %or49, i32* %b, align 4
+ br label %if.end50
+
+if.end50: ; preds = %if.end45, %if.then48
+ %11 = phi i32 [ %10, %if.end45 ], [ %or49, %if.then48 ]
+ %and51 = and i32 %11, 4096
+ %tobool52 = icmp eq i32 %and51, 0
+ br i1 %tobool52, label %if.end55, label %if.then53
+
+if.then53: ; preds = %if.end50
+ %or54 = or i32 %11, 524288
+ store i32 %or54, i32* %b, align 4
+ br label %if.end55
+
+if.end55: ; preds = %if.end50, %if.then53
+ %12 = phi i32 [ %11, %if.end50 ], [ %or54, %if.then53 ]
+ %and56 = and i32 %12, 8192
+ %tobool57 = icmp eq i32 %and56, 0
+ br i1 %tobool57, label %if.end60, label %if.then58
+
+if.then58: ; preds = %if.end55
+ %or59 = or i32 %12, 262144
+ store i32 %or59, i32* %b, align 4
+ br label %if.end60
+
+if.end60: ; preds = %if.end55, %if.then58
+ %13 = phi i32 [ %12, %if.end55 ], [ %or59, %if.then58 ]
+ %and61 = and i32 %13, 16384
+ %tobool62 = icmp eq i32 %and61, 0
+ br i1 %tobool62, label %if.end65, label %if.then63
+
+if.then63: ; preds = %if.end60
+ %or64 = or i32 %13, 131072
+ store i32 %or64, i32* %b, align 4
+ br label %if.end65
+
+if.end65: ; preds = %if.end60, %if.then63
+ %14 = phi i32 [ %13, %if.end60 ], [ %or64, %if.then63 ]
+ %and66 = and i32 %14, 32768
+ %tobool67 = icmp eq i32 %and66, 0
+ br i1 %tobool67, label %if.end70, label %if.then68
+
+if.then68: ; preds = %if.end65
+ %or69 = or i32 %14, 65536
+ store i32 %or69, i32* %b, align 4
+ br label %if.end70
+
+if.end70: ; preds = %if.end65, %if.then68
+ %15 = phi i32 [ %14, %if.end65 ], [ %or69, %if.then68 ]
+ %and71 = and i32 %15, 128
+ %tobool72 = icmp eq i32 %and71, 0
+ br i1 %tobool72, label %if.end75, label %if.then73
+
+if.then73: ; preds = %if.end70
+ %or74 = or i32 %15, 16777216
+ store i32 %or74, i32* %b, align 4
+ br label %if.end75
+
+if.end75: ; preds = %if.end70, %if.then73
+ ret i32 0
+}
diff --git a/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
new file mode 100644
index 000000000000..77e3158d9bbd
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
@@ -0,0 +1,241 @@
+; RUN: opt -simplifycfg -instcombine < %s -simplifycfg-merge-cond-stores=true -simplifycfg-merge-cond-stores-aggressively=false -phi-node-folding-threshold=2 -S | FileCheck %s
+
+; CHECK-LABEL: @test_simple
+; This test should succeed and end up if-converted.
+; CHECK: icmp eq i32 %b, 0
+; CHECK-NEXT: icmp ne i32 %a, 0
+; CHECK-NEXT: xor i1 %x2, true
+; CHECK-NEXT: %[[x:.*]] = or i1 %{{.*}}, %{{.*}}
+; CHECK-NEXT: br i1 %[[x]]
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret
+define void @test_simple(i32* %p, i32 %a, i32 %b) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+ store i32 0, i32* %p
+ br label %fallthrough
+
+fallthrough:
+ %x2 = icmp eq i32 %b, 0
+ br i1 %x2, label %end, label %yes2
+
+yes2:
+ store i32 1, i32* %p
+ br label %end
+
+end:
+ ret void
+}
+
+; CHECK-LABEL: @test_recursive
+; This test should entirely fold away, leaving one large basic block.
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret
+define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+ store i32 0, i32* %p
+ br label %fallthrough
+
+fallthrough:
+ %x2 = icmp eq i32 %b, 0
+ br i1 %x2, label %next, label %yes2
+
+yes2:
+ store i32 1, i32* %p
+ br label %next
+
+next:
+ %x3 = icmp eq i32 %c, 0
+ br i1 %x3, label %fallthrough2, label %yes3
+
+yes3:
+ store i32 2, i32* %p
+ br label %fallthrough2
+
+fallthrough2:
+ %x4 = icmp eq i32 %d, 0
+ br i1 %x4, label %end, label %yes4
+
+yes4:
+ store i32 3, i32* %p
+ br label %end
+
+
+end:
+ ret void
+}
+
+; CHECK-LABEL: @test_not_ifconverted
+; The code in each diamond is too large - it won't be if-converted so our
+; heuristics should say no.
+; CHECK: store
+; CHECK: store
+; CHECK: ret
+define void @test_not_ifconverted(i32* %p, i32 %a, i32 %b) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+ %y1 = or i32 %b, 55
+ %y2 = add i32 %y1, 24
+ %y3 = and i32 %y2, 67
+ store i32 %y3, i32* %p
+ br label %fallthrough
+
+fallthrough:
+ %x2 = icmp eq i32 %b, 0
+ br i1 %x2, label %end, label %yes2
+
+yes2:
+ %z1 = or i32 %a, 55
+ %z2 = add i32 %z1, 24
+ %z3 = and i32 %z2, 67
+ store i32 %z3, i32* %p
+ br label %end
+
+end:
+ ret void
+}
+
+; CHECK-LABEL: @test_aliasing1
+; The store to %p clobbers the previous store, so if-converting this would
+; be illegal.
+; CHECK: store
+; CHECK: store
+; CHECK: ret
+define void @test_aliasing1(i32* %p, i32 %a, i32 %b) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+ store i32 0, i32* %p
+ br label %fallthrough
+
+fallthrough:
+ %y1 = load i32, i32* %p
+ %x2 = icmp eq i32 %y1, 0
+ br i1 %x2, label %end, label %yes2
+
+yes2:
+ store i32 1, i32* %p
+ br label %end
+
+end:
+ ret void
+}
+
+; CHECK-LABEL: @test_aliasing2
+; The load from %q aliases with %p, so if-converting this would be illegal.
+; CHECK: store
+; CHECK: store
+; CHECK: ret
+define void @test_aliasing2(i32* %p, i32* %q, i32 %a, i32 %b) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+ store i32 0, i32* %p
+ br label %fallthrough
+
+fallthrough:
+ %y1 = load i32, i32* %q
+ %x2 = icmp eq i32 %y1, 0
+ br i1 %x2, label %end, label %yes2
+
+yes2:
+ store i32 1, i32* %p
+ br label %end
+
+end:
+ ret void
+}
+
+declare void @f()
+
+; CHECK-LABEL: @test_diamond_simple
+; This should get if-converted.
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test_diamond_simple(i32* %p, i32* %q, i32 %a, i32 %b) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %no1, label %yes1
+
+yes1:
+ store i32 0, i32* %p
+ br label %fallthrough
+
+no1:
+ %z1 = add i32 %a, %b
+ br label %fallthrough
+
+fallthrough:
+ %z2 = phi i32 [ %z1, %no1 ], [ 0, %yes1 ]
+ %x2 = icmp eq i32 %b, 0
+ br i1 %x2, label %no2, label %yes2
+
+yes2:
+ store i32 1, i32* %p
+ br label %end
+
+no2:
+ %z3 = sub i32 %z2, %b
+ br label %end
+
+end:
+ %z4 = phi i32 [ %z3, %no2 ], [ 3, %yes2 ]
+ ret i32 %z4
+}
+
+; CHECK-LABEL: @test_diamond_alias3
+; Now there is a call to f() in the bottom branch. The store in the first
+; branch would now be reordered with respect to the call if we if-converted,
+; so we must not.
+; CHECK: store
+; CHECK: store
+; CHECK: ret
+define i32 @test_diamond_alias3(i32* %p, i32* %q, i32 %a, i32 %b) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %no1, label %yes1
+
+yes1:
+ store i32 0, i32* %p
+ br label %fallthrough
+
+no1:
+ call void @f()
+ %z1 = add i32 %a, %b
+ br label %fallthrough
+
+fallthrough:
+ %z2 = phi i32 [ %z1, %no1 ], [ 0, %yes1 ]
+ %x2 = icmp eq i32 %b, 0
+ br i1 %x2, label %no2, label %yes2
+
+yes2:
+ store i32 1, i32* %p
+ br label %end
+
+no2:
+ call void @f()
+ %z3 = sub i32 %z2, %b
+ br label %end
+
+end:
+ %z4 = phi i32 [ %z3, %no2 ], [ 3, %yes2 ]
+ ret i32 %z4
+}
diff --git a/test/Transforms/SimplifyCFG/no_speculative_loads_with_asan.ll b/test/Transforms/SimplifyCFG/no_speculative_loads_with_asan.ll
new file mode 100644
index 000000000000..063bde83f7b3
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/no_speculative_loads_with_asan.ll
@@ -0,0 +1,40 @@
+; RUN: opt -simplifycfg -S %s | FileCheck %s
+; Make sure we don't speculate loads under AddressSanitizer.
+@g = global i32 0, align 4
+
+define i32 @TestNoAsan(i32 %cond) nounwind readonly uwtable {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %return, label %if.then
+
+if.then: ; preds = %entry
+ %0 = load i32, i32* @g, align 4
+ br label %return
+
+return: ; preds = %entry, %if.then
+ %retval = phi i32 [ %0, %if.then ], [ 0, %entry ]
+ ret i32 %retval
+; CHECK-LABEL: @TestNoAsan
+; CHECK: %[[LOAD:[^ ]*]] = load
+; CHECK: select{{.*}}[[LOAD]]
+; CHECK: ret i32
+}
+
+define i32 @TestAsan(i32 %cond) nounwind readonly uwtable sanitize_address {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %return, label %if.then
+
+if.then: ; preds = %entry
+ %0 = load i32, i32* @g, align 4
+ br label %return
+
+return: ; preds = %entry, %if.then
+ %retval = phi i32 [ %0, %if.then ], [ 0, %entry ]
+ ret i32 %retval
+; CHECK-LABEL: @TestAsan
+; CHECK: br i1
+; CHECK: load i32, i32* @g
+; CHECK: br label
+; CHECK: ret i32
+}
diff --git a/test/Transforms/SimplifyCFG/preserve-load-metadata-2.ll b/test/Transforms/SimplifyCFG/preserve-load-metadata-2.ll
new file mode 100644
index 000000000000..94d3565ce985
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-load-metadata-2.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @bar(i32*)
+declare void @baz(i32*)
+
+; CHECK-LABEL: @test_load_combine_metadata(
+; Check that dereferenceable metadata is combined
+; CHECK: load i32*, i32** %p
+; CHECK-SAME: !dereferenceable ![[DEREF:[0-9]+]]
+; CHECK: t:
+; CHECK: f:
+define void @test_load_combine_metadata(i1 %c, i32** %p) {
+ br i1 %c, label %t, label %f
+
+t:
+ %v1 = load i32*, i32** %p, !dereferenceable !0
+ call void @bar(i32* %v1)
+ br label %cont
+
+f:
+ %v2 = load i32*, i32** %p, !dereferenceable !1
+ call void @baz(i32* %v2)
+ br label %cont
+
+cont:
+ ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/SimplifyCFG/preserve-load-metadata-3.ll b/test/Transforms/SimplifyCFG/preserve-load-metadata-3.ll
new file mode 100644
index 000000000000..92bdf6ec5c1a
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-load-metadata-3.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @bar(i32*)
+declare void @baz(i32*)
+
+; CHECK-LABEL: @test_load_combine_metadata(
+; Check that dereferenceable_or_null metadata is combined
+; CHECK: load i32*, i32** %p
+; CHECK-SAME: !dereferenceable_or_null ![[DEREF:[0-9]+]]
+; CHECK: t:
+; CHECK: f:
+define void @test_load_combine_metadata(i1 %c, i32** %p) {
+ br i1 %c, label %t, label %f
+
+t:
+ %v1 = load i32*, i32** %p, !dereferenceable_or_null !0
+ call void @bar(i32* %v1)
+ br label %cont
+
+f:
+ %v2 = load i32*, i32** %p, !dereferenceable_or_null !1
+ call void @baz(i32* %v2)
+ br label %cont
+
+cont:
+ ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/SimplifyCFG/preserve-load-metadata.ll b/test/Transforms/SimplifyCFG/preserve-load-metadata.ll
new file mode 100644
index 000000000000..89815c843152
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-load-metadata.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @bar(i32*)
+declare void @baz(i32*)
+
+; CHECK-LABEL: @test_load_combine_metadata(
+; Check that align metadata is combined
+; CHECK: load i32*, i32** %p
+; CHECK-SAME: !align ![[ALIGN:[0-9]+]]
+; CHECK: t:
+; CHECK: f:
+define void @test_load_combine_metadata(i1 %c, i32** %p) {
+ br i1 %c, label %t, label %f
+
+t:
+ %v1 = load i32*, i32** %p, !align !0
+ call void @bar(i32* %v1)
+ br label %cont
+
+f:
+ %v2 = load i32*, i32** %p, !align !1
+ call void @baz(i32* %v2)
+ br label %cont
+
+cont:
+ ret void
+}
+
+; CHECK: ![[ALIGN]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/SimplifyCFG/preserve-make-implicit-on-switch-to-br.ll b/test/Transforms/SimplifyCFG/preserve-make-implicit-on-switch-to-br.ll
new file mode 100644
index 000000000000..0e95336bbc1f
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-make-implicit-on-switch-to-br.ll
@@ -0,0 +1,30 @@
+; RUN: opt %s -simplifycfg -S | FileCheck %s
+
+; ConstantFoldTerminator function can convert SwitchInst with one case (and default) to
+; a conditional BranchInst. This test checks the converted BranchInst preserve the
+; make.implicit metadata.
+
+declare i32 @consume(i32*)
+declare void @trap()
+
+define i32 @copy-metadata(i32* %x) {
+
+entry:
+ %x.int = ptrtoint i32* %x to i64
+
+; CHECK: br i1 %cond, label %is_null, label %default, !make.implicit !0
+ switch i64 %x.int, label %default [
+ i64 0, label %is_null
+ ], !make.implicit !0
+
+default:
+ %0 = call i32 @consume(i32* %x)
+ ret i32 %0
+
+is_null:
+ call void @trap()
+ unreachable
+}
+
+!0 = !{}
+
diff --git a/test/Transforms/SimplifyCFG/speculate-math.ll b/test/Transforms/SimplifyCFG/speculate-math.ll
index 0ba93d29117a..5655d5d78821 100644
--- a/test/Transforms/SimplifyCFG/speculate-math.ll
+++ b/test/Transforms/SimplifyCFG/speculate-math.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -simplifycfg -phi-node-folding-threshold=2 < %s | FileCheck %s
+; RUN: opt -S -simplifycfg < %s | FileCheck %s --check-prefix=EXPENSIVE --check-prefix=ALL
+; RUN: opt -S -simplifycfg -speculate-one-expensive-inst=false < %s | FileCheck %s --check-prefix=CHEAP --check-prefix=ALL
declare float @llvm.sqrt.f32(float) nounwind readonly
declare float @llvm.fma.f32(float, float, float) nounwind readonly
@@ -7,8 +8,26 @@ declare float @llvm.fabs.f32(float) nounwind readonly
declare float @llvm.minnum.f32(float, float) nounwind readonly
declare float @llvm.maxnum.f32(float, float) nounwind readonly
-; CHECK-LABEL: @sqrt_test(
-; CHECK: select
+; ALL-LABEL: @fdiv_test(
+; EXPENSIVE: select i1 %cmp, double %div, double 0.0
+; CHEAP-NOT: select
+
+define double @fdiv_test(double %a, double %b) {
+entry:
+ %cmp = fcmp ogt double %a, 0.0
+ br i1 %cmp, label %cond.true, label %cond.end
+
+cond.true:
+ %div = fdiv double %b, %a
+ br label %cond.end
+
+cond.end:
+ %cond = phi double [ %div, %cond.true ], [ 0.0, %entry ]
+ ret double %cond
+}
+
+; ALL-LABEL: @sqrt_test(
+; ALL: select
define void @sqrt_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
entry:
%cmp.i = fcmp olt float %a, 0.000000e+00
@@ -24,8 +43,8 @@ test_sqrt.exit: ; preds = %cond.else.i, %entry
ret void
}
-; CHECK-LABEL: @fabs_test(
-; CHECK: select
+; ALL-LABEL: @fabs_test(
+; ALL: select
define void @fabs_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
entry:
%cmp.i = fcmp olt float %a, 0.000000e+00
@@ -41,8 +60,8 @@ test_fabs.exit: ; preds = %cond.else.i, %entry
ret void
}
-; CHECK-LABEL: @fma_test(
-; CHECK: select
+; ALL-LABEL: @fma_test(
+; ALL: select
define void @fma_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
entry:
%cmp.i = fcmp olt float %a, 0.000000e+00
@@ -58,8 +77,8 @@ test_fma.exit: ; preds = %cond.else.i, %entry
ret void
}
-; CHECK-LABEL: @fmuladd_test(
-; CHECK: select
+; ALL-LABEL: @fmuladd_test(
+; ALL: select
define void @fmuladd_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
entry:
%cmp.i = fcmp olt float %a, 0.000000e+00
@@ -75,8 +94,8 @@ test_fmuladd.exit: ; preds = %cond.else.i, %en
ret void
}
-; CHECK-LABEL: @minnum_test(
-; CHECK: select
+; ALL-LABEL: @minnum_test(
+; ALL: select
define void @minnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
entry:
%cmp.i = fcmp olt float %a, 0.000000e+00
@@ -92,8 +111,8 @@ test_minnum.exit: ; preds = %cond.else.i, %ent
ret void
}
-; CHECK-LABEL: @maxnum_test(
-; CHECK: select
+; ALL-LABEL: @maxnum_test(
+; ALL: select
define void @maxnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
entry:
%cmp.i = fcmp olt float %a, 0.000000e+00
diff --git a/test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll b/test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll
index 994e47eb0d64..53daa8292da7 100644
--- a/test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll
+++ b/test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll
@@ -3,15 +3,15 @@
; not optimized into call
declare i64 addrspace(1)* @gc_call()
-declare i32 @llvm.experimental.gc.statepoint.p0f_p1i64f(i64, i32, i64 addrspace(1)* ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_p1i64f(i64, i32, i64 addrspace(1)* ()*, i32, i32, ...)
declare i32* @fake_personality_function()
define i32 @test() gc "statepoint-example" personality i32* ()* @fake_personality_function {
; CHECK-LABEL: test
entry:
; CHECK-LABEL: entry:
- ; CHECK-NEXT: %sp = invoke i32 (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f
- %sp = invoke i32 (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @gc_call, i32 0, i32 0, i32 0, i32 0)
+ ; CHECK-NEXT: %sp = invoke token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f
+ %sp = invoke token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @gc_call, i32 0, i32 0, i32 0, i32 0)
to label %normal unwind label %exception
exception:
diff --git a/test/Transforms/SimplifyCFG/switch-dead-default.ll b/test/Transforms/SimplifyCFG/switch-dead-default.ll
new file mode 100644
index 000000000000..e5c2ef65b318
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/switch-dead-default.ll
@@ -0,0 +1,179 @@
+; RUN: opt %s -S -simplifycfg | FileCheck %s
+declare void @foo(i32)
+
+define void @test(i1 %a) {
+; CHECK-LABEL: @test
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+ switch i1 %a, label %default [i1 1, label %true
+ i1 0, label %false]
+true:
+ call void @foo(i32 1)
+ ret void
+false:
+ call void @foo(i32 3)
+ ret void
+default:
+ call void @foo(i32 2)
+ ret void
+}
+
+define void @test2(i2 %a) {
+; CHECK-LABEL: @test2
+ switch i2 %a, label %default [i2 0, label %case0
+ i2 1, label %case1
+ i2 2, label %case2
+ i2 3, label %case3]
+case0:
+ call void @foo(i32 0)
+ ret void
+case1:
+ call void @foo(i32 1)
+ ret void
+case2:
+ call void @foo(i32 2)
+ ret void
+case3:
+ call void @foo(i32 3)
+ ret void
+default:
+; CHECK-LABEL: default1:
+; CHECK-NEXT: unreachable
+ call void @foo(i32 4)
+ ret void
+}
+
+; This one is a negative test - we know the value of the default,
+; but that's about it
+define void @test3(i2 %a) {
+; CHECK-LABEL: @test3
+ switch i2 %a, label %default [i2 0, label %case0
+ i2 1, label %case1
+ i2 2, label %case2]
+
+case0:
+ call void @foo(i32 0)
+ ret void
+case1:
+ call void @foo(i32 1)
+ ret void
+case2:
+ call void @foo(i32 2)
+ ret void
+default:
+; CHECK-LABEL: default:
+; CHECK-NEXT: call void @foo
+ call void @foo(i32 0)
+ ret void
+}
+
+; Negative test - check for possible overflow when computing
+; number of possible cases.
+define void @test4(i128 %a) {
+; CHECK-LABEL: @test4
+ switch i128 %a, label %default [i128 0, label %case0
+ i128 1, label %case1]
+
+case0:
+ call void @foo(i32 0)
+ ret void
+case1:
+ call void @foo(i32 1)
+ ret void
+default:
+; CHECK-LABEL: default:
+; CHECK-NEXT: call void @foo
+ call void @foo(i32 0)
+ ret void
+}
+
+; All but one bit known zero
+define void @test5(i8 %a) {
+; CHECK-LABEL: @test5
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+ %cmp = icmp ult i8 %a, 2
+ call void @llvm.assume(i1 %cmp)
+ switch i8 %a, label %default [i8 1, label %true
+ i8 0, label %false]
+true:
+ call void @foo(i32 1)
+ ret void
+false:
+ call void @foo(i32 3)
+ ret void
+default:
+ call void @foo(i32 2)
+ ret void
+}
+
+;; All but one bit known one
+define void @test6(i8 %a) {
+; CHECK-LABEL: @test6
+; CHECK: @llvm.assume
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+ %and = and i8 %a, 254
+ %cmp = icmp eq i8 %and, 254
+ call void @llvm.assume(i1 %cmp)
+ switch i8 %a, label %default [i8 255, label %true
+ i8 254, label %false]
+true:
+ call void @foo(i32 1)
+ ret void
+false:
+ call void @foo(i32 3)
+ ret void
+default:
+ call void @foo(i32 2)
+ ret void
+}
+
+; Check that we can eliminate both dead cases and dead defaults
+; within a single run of simplify-cfg
+define void @test7(i8 %a) {
+; CHECK-LABEL: @test7
+; CHECK: @llvm.assume
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+ %and = and i8 %a, 254
+ %cmp = icmp eq i8 %and, 254
+ call void @llvm.assume(i1 %cmp)
+ switch i8 %a, label %default [i8 255, label %true
+ i8 254, label %false
+ i8 0, label %also_dead]
+true:
+ call void @foo(i32 1)
+ ret void
+false:
+ call void @foo(i32 3)
+ ret void
+also_dead:
+ call void @foo(i32 5)
+ ret void
+default:
+ call void @foo(i32 2)
+ ret void
+}
+
+;; All but one bit known undef
+;; Note: This is currently testing an optimization which doesn't trigger. The
+;; case this is protecting against is that a bit could be assumed both zero
+;; *or* one given we know it's undef. ValueTracking doesn't do this today,
+;; but it doesn't hurt to confirm.
+define void @test8(i8 %a) {
+; CHECK-LABEL: @test8(
+; CHECK: switch i8
+ %and = and i8 %a, 254
+ %cmp = icmp eq i8 %and, undef
+ call void @llvm.assume(i1 %cmp)
+ switch i8 %a, label %default [i8 255, label %true
+ i8 254, label %false]
+true:
+ call void @foo(i32 1)
+ ret void
+false:
+ call void @foo(i32 3)
+ ret void
+default:
+ call void @foo(i32 2)
+ ret void
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/SimplifyCFG/trap-debugloc.ll b/test/Transforms/SimplifyCFG/trap-debugloc.ll
index fedf6b172d75..2887aaf52eee 100644
--- a/test/Transforms/SimplifyCFG/trap-debugloc.ll
+++ b/test/Transforms/SimplifyCFG/trap-debugloc.ll
@@ -1,7 +1,7 @@
; RUN: opt -S -simplifycfg < %s | FileCheck %s
; Radar 9342286
; Assign DebugLoc to trap instruction.
-define void @foo() nounwind ssp {
+define void @foo() nounwind ssp !dbg !0 {
; CHECK: call void @llvm.trap(), !dbg
store i32 42, i32* null, !dbg !5
ret void, !dbg !7
@@ -11,9 +11,9 @@ define void @foo() nounwind ssp {
!llvm.module.flags = !{!10}
!llvm.dbg.sp = !{!0}
-!0 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3, function: void ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3)
!1 = !DIFile(filename: "foo.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !DILocation(line: 4, column: 2, scope: !6)
diff --git a/test/Transforms/SimplifyCFG/wineh-unreachable.ll b/test/Transforms/SimplifyCFG/wineh-unreachable.ll
new file mode 100644
index 000000000000..670119467dae
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/wineh-unreachable.ll
@@ -0,0 +1,83 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+declare void @Personality()
+declare void @f()
+
+; CHECK-LABEL: define void @test1()
+define void @test1() personality i8* bitcast (void ()* @Personality to i8*) {
+entry:
+ ; CHECK: call void @f()
+ invoke void @f()
+ to label %exit unwind label %unreachable.unwind
+exit:
+ ret void
+unreachable.unwind:
+ cleanuppad within none []
+ unreachable
+}
+
+; CHECK-LABEL: define void @test2()
+define void @test2() personality i8* bitcast (void ()* @Personality to i8*) {
+entry:
+ invoke void @f()
+ to label %exit unwind label %catch.pad
+catch.pad:
+ %cs1 = catchswitch within none [label %catch.body] unwind label %unreachable.unwind
+ ; CHECK: catch.pad:
+ ; CHECK-NEXT: catchswitch within none [label %catch.body] unwind to caller
+catch.body:
+ ; CHECK: catch.body:
+ ; CHECK-NEXT: catchpad within %cs1
+ ; CHECK-NEXT: call void @f()
+ ; CHECK-NEXT: unreachable
+ %catch = catchpad within %cs1 []
+ call void @f()
+ catchret from %catch to label %unreachable
+exit:
+ ret void
+unreachable.unwind:
+ cleanuppad within none []
+ unreachable
+unreachable:
+ unreachable
+}
+
+; CHECK-LABEL: define void @test3()
+define void @test3() personality i8* bitcast (void ()* @Personality to i8*) {
+entry:
+ invoke void @f()
+ to label %exit unwind label %cleanup.pad
+cleanup.pad:
+ ; CHECK: %cleanup = cleanuppad within none []
+ ; CHECK-NEXT: call void @f()
+ ; CHECK-NEXT: unreachable
+ %cleanup = cleanuppad within none []
+ invoke void @f()
+ to label %cleanup.ret unwind label %unreachable.unwind
+cleanup.ret:
+ ; This cleanupret should be rewritten to unreachable,
+ ; and merged into the pred block.
+ cleanupret from %cleanup unwind label %unreachable.unwind
+exit:
+ ret void
+unreachable.unwind:
+ cleanuppad within none []
+ unreachable
+}
+
+; CHECK-LABEL: define void @test5()
+define void @test5() personality i8* bitcast (void ()* @Personality to i8*) {
+entry:
+ invoke void @f()
+ to label %exit unwind label %catch.pad
+
+catch.pad:
+ %cs1 = catchswitch within none [label %catch.body] unwind to caller
+
+catch.body:
+ %catch = catchpad within %cs1 []
+ catchret from %catch to label %exit
+
+exit:
+ unreachable
+}
diff --git a/test/Transforms/Sink/catchswitch.ll b/test/Transforms/Sink/catchswitch.ll
new file mode 100644
index 000000000000..2648f85f3eb4
--- /dev/null
+++ b/test/Transforms/Sink/catchswitch.ll
@@ -0,0 +1,37 @@
+; RUN: opt -sink -S < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+define void @h() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %call = call i32 @g(i32 1) readnone
+ invoke void @_CxxThrowException(i8* null, i8* null) noreturn
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %cp = catchpad within %cs [i8* null, i32 64, i8* null]
+ catchret from %cp to label %try.cont
+
+try.cont: ; preds = %catch
+ call void @k(i32 %call)
+ ret void
+
+unreachable: ; preds = %entry
+ unreachable
+}
+
+declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare i32 @g(i32) readnone
+
+declare void @k(i32)
+
+; CHECK-LABEL: define void @h(
+; CHECK: call i32 @g(i32 1)
+; CHECK-NEXT: invoke void @_CxxThrowException(
diff --git a/test/Transforms/Sink/landingpad.ll b/test/Transforms/Sink/landingpad.ll
new file mode 100644
index 000000000000..10548fd5b7d4
--- /dev/null
+++ b/test/Transforms/Sink/landingpad.ll
@@ -0,0 +1,33 @@
+; Test that we don't sink landingpads
+; RUN: opt -sink -S < %s | FileCheck %s
+
+declare hidden void @g()
+declare void @h()
+declare i32 @__gxx_personality_v0(...)
+
+define void @f() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ invoke void @g()
+ to label %invoke.cont.15 unwind label %lpad
+
+invoke.cont.15:
+ unreachable
+
+; CHECK: lpad:
+; CHECK: %0 = landingpad { i8*, i32 }
+lpad:
+ %0 = landingpad { i8*, i32 }
+ catch i8* null
+ invoke void @h()
+ to label %invoke.cont unwind label %lpad.1
+
+; CHECK: invoke.cont
+; CHECK-NOT: %0 = landingpad { i8*, i32 }
+invoke.cont:
+ ret void
+
+lpad.1:
+ %1 = landingpad { i8*, i32 }
+ cleanup
+ resume { i8*, i32 } %1
+}
diff --git a/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
index 278250a9c80e..f2853aca698f 100644
--- a/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
+++ b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
@@ -57,10 +57,10 @@ bb:
; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B1]], i32 16383
+; CHECK: getelementptr inbounds float, float addrspace(3)* [[B1]], i32 16383
; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B2]], i32 16383
+; CHECK: getelementptr inbounds float, float addrspace(3)* [[B2]], i32 16383
define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
bb:
%i2 = shl nsw i32 %i, 1
diff --git a/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll b/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll
new file mode 100644
index 000000000000..cb73565b152e
--- /dev/null
+++ b/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; CUDA code
+; __global__ void foo(int b, int s) {
+; #pragma unroll
+; for (int i = 0; i < 4; ++i) {
+; if (cond(i))
+; use((b + i) * s);
+; }
+; }
+define void @foo(i32 %b, i32 %s) {
+; CHECK-LABEL: .visible .entry foo(
+entry:
+; CHECK: ld.param.u32 [[s:%r[0-9]+]], [foo_param_1];
+; CHECK: ld.param.u32 [[b:%r[0-9]+]], [foo_param_0];
+ %call = tail call zeroext i1 @cond(i32 0)
+ br i1 %call, label %if.then, label %for.inc
+
+if.then: ; preds = %entry
+ %mul = mul nsw i32 %b, %s
+; CHECK: mul.lo.s32 [[a0:%r[0-9]+]], [[b]], [[s]]
+ tail call void @use(i32 %mul)
+ br label %for.inc
+
+for.inc: ; preds = %entry, %if.then
+ %call.1 = tail call zeroext i1 @cond(i32 1)
+ br i1 %call.1, label %if.then.1, label %for.inc.1
+
+if.then.1: ; preds = %for.inc
+ %add.1 = add nsw i32 %b, 1
+ %mul.1 = mul nsw i32 %add.1, %s
+; CHECK: add.s32 [[a1:%r[0-9]+]], [[a0]], [[s]]
+ tail call void @use(i32 %mul.1)
+ br label %for.inc.1
+
+for.inc.1: ; preds = %if.then.1, %for.inc
+ %call.2 = tail call zeroext i1 @cond(i32 2)
+ br i1 %call.2, label %if.then.2, label %for.inc.2
+
+if.then.2: ; preds = %for.inc.1
+ %add.2 = add nsw i32 %b, 2
+ %mul.2 = mul nsw i32 %add.2, %s
+; CHECK: add.s32 [[a2:%r[0-9]+]], [[a1]], [[s]]
+ tail call void @use(i32 %mul.2)
+ br label %for.inc.2
+
+for.inc.2: ; preds = %if.then.2, %for.inc.1
+ %call.3 = tail call zeroext i1 @cond(i32 3)
+ br i1 %call.3, label %if.then.3, label %for.inc.3
+
+if.then.3: ; preds = %for.inc.2
+ %add.3 = add nsw i32 %b, 3
+ %mul.3 = mul nsw i32 %add.3, %s
+; CHECK: add.s32 [[a3:%r[0-9]+]], [[a2]], [[s]]
+ tail call void @use(i32 %mul.3)
+ br label %for.inc.3
+
+for.inc.3: ; preds = %if.then.3, %for.inc.2
+ ret void
+}
+
+declare zeroext i1 @cond(i32)
+
+declare void @use(i32)
+
+!nvvm.annotations = !{!0}
+
+!0 = !{void (i32, i32)* @foo, !"kernel", i32 1}
diff --git a/test/Transforms/StripDeadPrototypes/basic.ll b/test/Transforms/StripDeadPrototypes/basic.ll
new file mode 100644
index 000000000000..6845faf7d03e
--- /dev/null
+++ b/test/Transforms/StripDeadPrototypes/basic.ll
@@ -0,0 +1,12 @@
+; RUN: opt -strip-dead-prototypes -S -o - < %s | FileCheck %s
+; RUN: opt -S -passes=strip-dead-prototypes < %s | FileCheck %s
+
+; CHECK: declare i32 @f
+declare i32 @f()
+; CHECK-NOT: declare i32 @g
+declare i32 @g()
+
+define i32 @foo() {
+ %call = call i32 @f()
+ ret i32 %call
+}
diff --git a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
index 3b1fd74b9813..32d7e77b20df 100644
--- a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
+++ b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
@@ -4,7 +4,7 @@
@x = common global i32 0 ; <i32*> [#uses=0]
-define void @foo() nounwind readnone optsize ssp {
+define void @foo() nounwind readnone optsize ssp !dbg !0 {
entry:
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !5, metadata !{}), !dbg !10
ret void, !dbg !11
@@ -18,12 +18,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.lv.foo = !{!5}
!llvm.dbg.gv = !{!8}
-!0 = !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3, function: void ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3)
!1 = !DIFile(filename: "b.c", directory: "/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12, enums: !4, retainedTypes: !4)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 3, scope: !6, file: !1, type: !7)
+!5 = !DILocalVariable(name: "y", line: 3, scope: !6, file: !1, type: !7)
!6 = distinct !DILexicalBlock(line: 2, column: 0, file: !12, scope: !0)
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !DIGlobalVariable(name: "x", line: 1, isLocal: false, isDefinition: true, scope: !1, file: !1, type: !7, variable: i32* @x)
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index f77ed11d912c..ba8979c9772f 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -1,5 +1,5 @@
; RUN: opt -strip-dead-debug-info -disable-output < %s
-define i32 @foo() nounwind ssp {
+define i32 @foo() nounwind ssp !dbg !0 {
entry:
ret i32 0, !dbg !8
}
@@ -7,9 +7,9 @@ entry:
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!14}
-!0 = !DISubprogram(name: "foo", linkageName: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !10, scope: !1, type: !3, function: i32 ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !10, scope: !1, type: !3)
!1 = !DIFile(filename: "/tmp/a.c", directory: "/Volumes/Lalgate/clean/D.CW")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.8 (trunk 112062)", isOptimized: true, emissionKind: 1, file: !10, enums: !11, retainedTypes: !11, subprograms: !12, globals: !13)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.8 (trunk 112062)", isOptimized: true, emissionKind: 1, file: !10, enums: !11, retainedTypes: !11, subprograms: !12, globals: !13)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/StripSymbols/strip-dead-debug-info.ll b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
index 08eff003dfca..39038c955617 100644
--- a/test/Transforms/StripSymbols/strip-dead-debug-info.ll
+++ b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
@@ -10,13 +10,13 @@
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #0
; Function Attrs: nounwind readnone ssp
-define i32 @fn() #1 {
+define i32 @fn() #1 !dbg !6 {
entry:
ret i32 0, !dbg !18
}
; Function Attrs: nounwind readonly ssp
-define i32 @foo(i32 %i) #2 {
+define i32 @foo(i32 %i) #2 !dbg !10 {
entry:
tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !15, metadata !DIExpression()), !dbg !20
%.0 = load i32, i32* @xyz, align 4
@@ -30,22 +30,22 @@ attributes #2 = { nounwind readonly ssp }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!25}
-!0 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !23, globals: !24)
+!0 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !23, globals: !24)
!1 = !DIFile(filename: "g.c", directory: "/tmp/")
!2 = !{null}
-!3 = !DISubprogram(name: "bar", line: 5, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !4)
+!3 = distinct !DISubprogram(name: "bar", line: 5, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !4)
!4 = !DISubroutineType(types: !2)
!5 = !DIFile(filename: "g.c", directory: "/tmp/")
-!6 = !DISubprogram(name: "fn", linkageName: "fn", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !7, function: i32 ()* @fn)
+!6 = distinct !DISubprogram(name: "fn", linkageName: "fn", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !7)
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !11, function: i32 (i32)* @foo)
+!10 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !11)
!11 = !DISubroutineType(types: !12)
!12 = !{!9, !9}
-!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "bb", line: 5, scope: !14, file: !5, type: !9)
+!13 = !DILocalVariable(name: "bb", line: 5, scope: !14, file: !5, type: !9)
!14 = distinct !DILexicalBlock(line: 5, column: 0, file: !1, scope: !3)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 7, arg: 0, scope: !10, file: !5, type: !9)
+!15 = !DILocalVariable(name: "i", line: 7, arg: 1, scope: !10, file: !5, type: !9)
!16 = !DIGlobalVariable(name: "abcd", line: 2, isLocal: true, isDefinition: true, scope: !5, file: !5, type: !9)
!17 = !DIGlobalVariable(name: "xyz", line: 3, isLocal: false, isDefinition: true, scope: !5, file: !5, type: !9, variable: i32* @xyz)
!18 = !DILocation(line: 6, scope: !19)
diff --git a/test/Transforms/StructurizeCFG/nested-loop-order.ll b/test/Transforms/StructurizeCFG/nested-loop-order.ll
index fee1ff0433b5..8a506c3e3962 100644
--- a/test/Transforms/StructurizeCFG/nested-loop-order.ll
+++ b/test/Transforms/StructurizeCFG/nested-loop-order.ll
@@ -41,7 +41,7 @@ ENDIF: ; preds = %LOOP
br i1 %tmp31, label %IF29, label %ENDIF28
; CHECK: Flow:
-; CHECK br i1 %{{[0-9]+}}, label %Flow, label %LOOP
+; CHECK: br i1 %{{[0-9]+}}, label %Flow2, label %LOOP
; CHECK: IF29:
; CHECK: br label %Flow1
diff --git a/test/Transforms/TailCallElim/basic.ll b/test/Transforms/TailCallElim/basic.ll
index 2488b552d8f3..b303fa743ca9 100644
--- a/test/Transforms/TailCallElim/basic.ll
+++ b/test/Transforms/TailCallElim/basic.ll
@@ -156,7 +156,7 @@ define void @test9(i32* byval %a) {
declare void @ctor(%struct.X*)
define void @test10(%struct.X* noalias sret %agg.result, i1 zeroext %b) {
-; CHECK-LABEL @test10
+; CHECK-LABEL: @test10
entry:
%x = alloca %struct.X, align 8
br i1 %b, label %if.then, label %if.end
@@ -188,3 +188,13 @@ define void @test11() {
; CHECK: call void @test11_helper2
ret void
}
+
+; PR25928
+define void @test12() {
+entry:
+; CHECK-LABEL: @test12
+; CHECK: {{^ *}} call void undef(i8* undef) [ "foo"(i8* %e) ]
+ %e = alloca i8
+ call void undef(i8* undef) [ "foo"(i8* %e) ]
+ unreachable
+}
diff --git a/test/Transforms/TailCallElim/notail.ll b/test/Transforms/TailCallElim/notail.ll
new file mode 100644
index 000000000000..e6fdbd1ec77d
--- /dev/null
+++ b/test/Transforms/TailCallElim/notail.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
+
+; CHECK: tail call void @callee0()
+; CHECK: notail call void @callee1()
+
+define void @foo1(i32 %a) {
+entry:
+ %tobool = icmp eq i32 %a, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then:
+ call void @callee0()
+ br label %if.end
+
+if.else:
+ notail call void @callee1()
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+declare void @callee0()
+declare void @callee1()
diff --git a/test/Transforms/Util/lowerswitch.ll b/test/Transforms/Util/lowerswitch.ll
index 6e21f916c038..1eddb43c1a06 100644
--- a/test/Transforms/Util/lowerswitch.ll
+++ b/test/Transforms/Util/lowerswitch.ll
@@ -3,7 +3,7 @@
; Test that we don't crash and have a different basic block for each incoming edge.
define void @test0() {
; CHECK-LABEL: @test0
-; CHECK: %merge = phi i64 [ 1, %BB3 ], [ 0, %NewDefault ], [ 0, %NodeBlock.5 ], [ 0, %LeafBlock.1 ]
+; CHECK: %merge = phi i64 [ 1, %BB3 ], [ 0, %NewDefault ], [ 0, %NodeBlock5 ], [ 0, %LeafBlock1 ]
BB1:
switch i32 undef, label %BB2 [
i32 3, label %BB2
@@ -43,9 +43,9 @@ bb2:
bb3:
; CHECK-LABEL: bb3
-; CHECK: %tmp = phi i32 [ 1, %NodeBlock ], [ 0, %bb2 ], [ 1, %LeafBlock.3 ]
+; CHECK: %tmp = phi i32 [ 1, %NodeBlock ], [ 0, %bb2 ], [ 1, %LeafBlock3 ]
%tmp = phi i32 [ 1, %bb1 ], [ 0, %bb2 ], [ 1, %bb1 ], [ 1, %bb1 ]
-; CHECK-NEXT: %tmp2 = phi i32 [ 2, %NodeBlock ], [ 5, %bb2 ], [ 2, %LeafBlock.3 ]
+; CHECK-NEXT: %tmp2 = phi i32 [ 2, %NodeBlock ], [ 5, %bb2 ], [ 2, %LeafBlock3 ]
%tmp2 = phi i32 [ 2, %bb1 ], [ 2, %bb1 ], [ 5, %bb2 ], [ 2, %bb1 ]
br label %exit
diff --git a/test/Transforms/Util/simplify-dbg-declare-load.ll b/test/Transforms/Util/simplify-dbg-declare-load.ll
new file mode 100644
index 000000000000..0357a5e6facb
--- /dev/null
+++ b/test/Transforms/Util/simplify-dbg-declare-load.ll
@@ -0,0 +1,52 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+%foo = type { i64, i32, i32 }
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #0
+
+; Function Attrs: sspreq
+define void @julia_fastshortest_6256() #1 {
+top:
+ %cp = alloca %foo, align 8
+ call void @llvm.dbg.declare(metadata %foo* %cp, metadata !1, metadata !16), !dbg !17
+ br i1 undef, label %idxend, label %fail
+
+fail: ; preds = %top
+ unreachable
+
+idxend: ; preds = %top
+; CHECK-NOT call void @llvm.dbg.value(metadata %foo* %cp, i64 0, metadata !1, metadata !16), !dbg !17
+ %0 = load volatile %foo, %foo* %cp, align 8
+; CHECK: call void @llvm.dbg.value(metadata %foo %0, i64 0, metadata !1, metadata !16), !dbg !17
+ store volatile %foo %0, %foo* undef, align 8
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { sspreq }
+
+!llvm.module.flags = !{!0}
+!llvm.dbg.cu = !{}
+
+!0 = !{i32 1, !"Debug Info Version", i32 3}
+!1 = !DILocalVariable(name: "cp", scope: !2, file: !3, line: 106, type: !12)
+!2 = distinct !DISubprogram(name: "fastshortest", linkageName: "julia_fastshortest_6256", scope: null, file: !3, type: !4, isLocal: false, isDefinition: true, isOptimized: true, variables: !11)
+!3 = !DIFile(filename: "grisu/fastshortest.jl", directory: ".")
+!4 = !DISubroutineType(types: !5)
+!5 = !{!6, !7}
+!6 = !DIBasicType(name: "Float64", size: 64, align: 64, encoding: DW_ATE_unsigned)
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64, align: 64)
+!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "jl_value_t", file: !9, line: 71, align: 64, elements: !10)
+!9 = !DIFile(filename: "julia.h", directory: "")
+!10 = !{!7}
+!11 = !{}
+!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "Float", size: 128, align: 64, elements: !13, runtimeLang: DW_LANG_Julia)
+!13 = !{!14, !15, !15}
+!14 = !DIBasicType(name: "UInt64", size: 64, align: 64, encoding: DW_ATE_unsigned)
+!15 = !DIBasicType(name: "Int32", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!16 = !DIExpression()
+!17 = !DILocation(line: 106, scope: !2)
diff --git a/test/Verifier/alias.ll b/test/Verifier/alias.ll
index dd04ae05f634..1847c0d4214e 100644
--- a/test/Verifier/alias.ll
+++ b/test/Verifier/alias.ll
@@ -2,18 +2,24 @@
declare void @f()
-@fa = alias void ()* @f
+@fa = alias void (), void ()* @f
; CHECK: Alias must point to a definition
; CHECK-NEXT: @fa
@g = external global i32
-@ga = alias i32* @g
+@ga = alias i32, i32* @g
; CHECK: Alias must point to a definition
; CHECK-NEXT: @ga
+define available_externally void @f2() {
+ ret void
+}
+@fa2 = alias void(), void()* @f2
+; CHECK: Alias must point to a definition
+; CHECK-NEXT: @fa2
-@test2_a = alias i32* @test2_b
-@test2_b = alias i32* @test2_a
+@test2_a = alias i32, i32* @test2_b
+@test2_b = alias i32, i32* @test2_a
; CHECK: Aliases cannot form a cycle
; CHECK-NEXT: i32* @test2_a
; CHECK-NEXT: Aliases cannot form a cycle
@@ -21,7 +27,7 @@ declare void @f()
@test3_a = global i32 42
-@test3_b = weak alias i32* @test3_a
-@test3_c = alias i32* @test3_b
+@test3_b = weak alias i32, i32* @test3_a
+@test3_c = alias i32, i32* @test3_b
; CHECK: Alias cannot point to a weak alias
; CHECK-NEXT: i32* @test3_c
diff --git a/test/Verifier/align-md.ll b/test/Verifier/align-md.ll
new file mode 100644
index 000000000000..2de489ec21eb
--- /dev/null
+++ b/test/Verifier/align-md.ll
@@ -0,0 +1,59 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare i8* @foo()
+
+define void @f1() {
+entry:
+ call i8* @foo(), !align !{i64 2}
+ ret void
+}
+; CHECK: align applies only to load instructions
+; CHECK-NEXT: call i8* @foo()
+
+define i8 @f2(i8* %x) {
+entry:
+ %y = load i8, i8* %x, !align !{i64 2}
+ ret i8 %y
+}
+; CHECK: align applies only to pointer types
+; CHECK-NEXT: load i8, i8* %x
+
+define i8* @f3(i8** %x) {
+entry:
+ %y = load i8*, i8** %x, !align !{}
+ ret i8* %y
+}
+; CHECK: align takes one operand
+; CHECK-NEXT: load i8*, i8** %x
+
+define i8* @f4(i8** %x) {
+entry:
+ %y = load i8*, i8** %x, !align !{!"str"}
+ ret i8* %y
+}
+; CHECK: align metadata value must be an i64!
+; CHECK-NEXT: load i8*, i8** %x
+
+define i8* @f5(i8** %x) {
+entry:
+ %y = load i8*, i8** %x, !align !{i32 2}
+ ret i8* %y
+}
+; CHECK: align metadata value must be an i64!
+; CHECK-NEXT: load i8*, i8** %x
+
+define i8* @f6(i8** %x) {
+entry:
+ %y = load i8*, i8** %x, !align !{i64 3}
+ ret i8* %y
+}
+; CHECK: align metadata value must be a power of 2!
+; CHECK-NEXT: load i8*, i8** %x
+
+define i8* @f7(i8** %x) {
+entry:
+ %y = load i8*, i8** %x, !align !{i64 1073741824}
+ ret i8* %y
+}
+; CHECK: alignment is larger that implementation defined limit
+; CHECK-NEXT: load i8*, i8** %x \ No newline at end of file
diff --git a/test/Verifier/atomics.ll b/test/Verifier/atomics.ll
new file mode 100644
index 000000000000..e49a0eb7beb2
--- /dev/null
+++ b/test/Verifier/atomics.ll
@@ -0,0 +1,14 @@
+; RUN: not opt -verify < %s 2>&1 | FileCheck %s
+
+; CHECK: atomic store operand must have integer, pointer, or floating point type!
+; CHECK: atomic load operand must have integer, pointer, or floating point type!
+
+define void @foo(x86_mmx* %P, x86_mmx %v) {
+ store atomic x86_mmx %v, x86_mmx* %P unordered, align 8
+ ret void
+}
+
+define x86_mmx @bar(x86_mmx* %P) {
+ %v = load atomic x86_mmx, x86_mmx* %P unordered, align 8
+ ret x86_mmx %v
+}
diff --git a/test/Verifier/bitcast-alias-address-space.ll b/test/Verifier/bitcast-alias-address-space.ll
index d9794d9e338a..d5f2266aa619 100644
--- a/test/Verifier/bitcast-alias-address-space.ll
+++ b/test/Verifier/bitcast-alias-address-space.ll
@@ -7,4 +7,4 @@ target datalayout = "e-p:32:32:32-p1:16:16:16-p2:32:32:32-i1:8:32-i8:8:32-i16:16
@data = addrspace(2) global i32 27
-@illegal_alias_data = alias bitcast (i32 addrspace(2)* @data to i32 addrspace(1)*)
+@illegal_alias_data = alias i32, bitcast (i32 addrspace(2)* @data to i32 addrspace(1)*)
diff --git a/test/Verifier/dbg-null-retained-type.ll b/test/Verifier/dbg-null-retained-type.ll
new file mode 100644
index 000000000000..f0368c8c4877
--- /dev/null
+++ b/test/Verifier/dbg-null-retained-type.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as -disable-output <%s 2>&1 | FileCheck %s
+; CHECK: assembly parsed, but does not verify
+; CHECK-NEXT: invalid retained type
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!llvm.dbg.cu = !{!1}
+!1 = distinct !DICompileUnit(file: !2, language: DW_LANG_C99, retainedTypes: !3)
+!2 = !DIFile(filename: "file.c", directory: "/path/to/dir")
+!3 = !{null}
diff --git a/test/Verifier/dbg-typerefs.ll b/test/Verifier/dbg-typerefs.ll
index dc50fce0715c..2370f8b64e5b 100644
--- a/test/Verifier/dbg-typerefs.ll
+++ b/test/Verifier/dbg-typerefs.ll
@@ -14,7 +14,7 @@
; Add a minimal compile unit to resolve some of the type references.
!llvm.dbg.cu = !{!5}
-!5 = !DICompileUnit(file: !6, language: DW_LANG_C99, retainedTypes: !7)
+!5 = distinct !DICompileUnit(file: !6, language: DW_LANG_C99, retainedTypes: !7)
!6 = !DIFile(filename: "file.c", directory: "/path/to/dir")
!7 = !{!8, !9}
!8 = !DICompositeType(tag: DW_TAG_structure_type, identifier: "1.good")
diff --git a/test/Verifier/dbg.ll b/test/Verifier/dbg.ll
index 395806b1299a..d5728a4e8272 100644
--- a/test/Verifier/dbg.ll
+++ b/test/Verifier/dbg.ll
@@ -2,7 +2,7 @@
define void @foo() {
entry:
- br label %exit, !dbg !DILocation(scope: !DISubprogram(), inlinedAt: !{})
+ br label %exit, !dbg !DILocation(scope: !1, inlinedAt: !{})
; CHECK: inlined-at should be a location
; CHECK-NEXT: !{{[0-9]+}} = !DILocation(line: 0, scope: !{{[0-9]+}}, inlinedAt: ![[IA:[0-9]+]])
; CHECK-NEXT: ![[IA]] = !{}
@@ -16,3 +16,4 @@ exit:
!llvm.module.flags = !{!0}
!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/dereferenceable-md.ll b/test/Verifier/dereferenceable-md.ll
new file mode 100644
index 000000000000..94c89c332022
--- /dev/null
+++ b/test/Verifier/dereferenceable-md.ll
@@ -0,0 +1,86 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare i8* @foo()
+
+define void @f1() {
+entry:
+ call i8* @foo(), !dereferenceable !{i64 2}
+ ret void
+}
+; CHECK: dereferenceable, dereferenceable_or_null apply only to load instructions, use attributes for calls or invokes
+; CHECK-NEXT: call i8* @foo()
+
+define void @f2() {
+entry:
+ call i8* @foo(), !dereferenceable_or_null !{i64 2}
+ ret void
+}
+; CHECK: dereferenceable, dereferenceable_or_null apply only to load instructions, use attributes for calls or invokes
+; CHECK-NEXT: call i8* @foo()
+
+define i8 @f3(i8* %x) {
+entry:
+ %y = load i8, i8* %x, !dereferenceable !{i64 2}
+ ret i8 %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null apply only to pointer types
+; CHECK-NEXT: load i8, i8* %x
+
+define i8 @f4(i8* %x) {
+entry:
+ %y = load i8, i8* %x, !dereferenceable_or_null !{i64 2}
+ ret i8 %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null apply only to pointer types
+; CHECK-NEXT: load i8, i8* %x
+
+define i8* @f5(i8** %x) {
+entry:
+ %y = load i8*, i8** %x, !dereferenceable !{}
+ ret i8* %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null take one operand
+; CHECK-NEXT: load i8*, i8** %x
+
+
+define i8* @f6(i8** %x) {
+entry:
+ %y = load i8*, i8** %x, !dereferenceable_or_null !{}
+ ret i8* %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null take one operand
+; CHECK-NEXT: load i8*, i8** %x
+
+define i8* @f7(i8** %x) {
+entry:
+ %y = load i8*, i8** %x, !dereferenceable !{!"str"}
+ ret i8* %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null metadata value must be an i64!
+; CHECK-NEXT: load i8*, i8** %x
+
+
+define i8* @f8(i8** %x) {
+entry:
+ %y = load i8*, i8** %x, !dereferenceable_or_null !{!"str"}
+ ret i8* %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null metadata value must be an i64!
+; CHECK-NEXT: load i8*, i8** %x
+
+define i8* @f9(i8** %x) {
+entry:
+ %y = load i8*, i8** %x, !dereferenceable !{i32 2}
+ ret i8* %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null metadata value must be an i64!
+; CHECK-NEXT: load i8*, i8** %x
+
+
+define i8* @f10(i8** %x) {
+entry:
+ %y = load i8*, i8** %x, !dereferenceable_or_null !{i32 2}
+ ret i8* %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null metadata value must be an i64!
+; CHECK-NEXT: load i8*, i8** %x \ No newline at end of file
diff --git a/test/Verifier/func-dbg.ll b/test/Verifier/func-dbg.ll
new file mode 100644
index 000000000000..e56de94d18c9
--- /dev/null
+++ b/test/Verifier/func-dbg.ll
@@ -0,0 +1,25 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+define i32 @foo() !dbg !4 {
+entry:
+ ret i32 0, !dbg !6
+}
+
+define i32 @bar() !dbg !5 {
+entry:
+; CHECK: !dbg attachment points at wrong subprogram for function
+ ret i32 0, !dbg !6
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 0, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "dwarf-test.c", directory: "test")
+!2 = !{}
+!3 = !{!4, !5}
+!4 = distinct !DISubprogram(name: "foo", scope: !0, isDefinition: true)
+!5 = distinct !DISubprogram(name: "bar", scope: !0, isDefinition: true)
+!6 = !DILocation(line: 7, scope: !4)
+!7 = !{i32 2, !"Dwarf Version", i32 3}
+!8 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Verifier/gc_relocate_addrspace.ll b/test/Verifier/gc_relocate_addrspace.ll
index ddc1b230a8c2..ccf1fbbe95ca 100644
--- a/test/Verifier/gc_relocate_addrspace.ll
+++ b/test/Verifier/gc_relocate_addrspace.ll
@@ -3,21 +3,21 @@
; address space with the relocated value.
; CHECK: gc.relocate: relocating a pointer shouldn't change its address space
-; CHECK-NEXT: %obj.relocated = call coldcc i8* @llvm.experimental.gc.relocate.p0i8(i32 %safepoint_token, i32 7, i32 7) ;
+; CHECK-NEXT: %obj.relocated = call coldcc i8* @llvm.experimental.gc.relocate.p0i8(token %safepoint_token, i32 7, i32 7) ;
declare void @foo()
; Function Attrs: nounwind
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) #0
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) #0
define void @test1(i64 addrspace(1)* %obj) gc "statepoint-example" {
entry:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj)
- %obj.relocated = call coldcc i8* @llvm.experimental.gc.relocate.p0i8(i32 %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj)
+ %obj.relocated = call coldcc i8* @llvm.experimental.gc.relocate.p0i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
ret void
}
; Function Attrs: nounwind
-declare i8* @llvm.experimental.gc.relocate.p0i8(i32, i32, i32) #0
+declare i8* @llvm.experimental.gc.relocate.p0i8(token, i32, i32) #0
attributes #0 = { nounwind }
diff --git a/test/Verifier/gc_relocate_operand.ll b/test/Verifier/gc_relocate_operand.ll
index c28b8d870365..f7c919ec1e93 100644
--- a/test/Verifier/gc_relocate_operand.ll
+++ b/test/Verifier/gc_relocate_operand.ll
@@ -5,17 +5,17 @@
declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
define void @test1(i64 %obj) gc "statepoint-example" {
entry:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i64 %obj)
- %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i64 %obj)
+ %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
ret void
}
; Function Attrs: nounwind
-declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32, i32, i32) #0
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) #0
attributes #0 = { nounwind }
diff --git a/test/Verifier/gc_relocate_return.ll b/test/Verifier/gc_relocate_return.ll
index 3957d4c0cec8..77207f6c47b2 100644
--- a/test/Verifier/gc_relocate_return.ll
+++ b/test/Verifier/gc_relocate_return.ll
@@ -6,17 +6,17 @@
declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
define void @test1(<2 x i32 addrspace(1)*> addrspace(1)* %obj) gc "statepoint-example" {
entry:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, <2 x i32 addrspace(1)*> addrspace(1)* %obj)
- %obj.relocated = call coldcc i8 @llvm.experimental.gc.relocate.i8(i32 %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, <2 x i32 addrspace(1)*> addrspace(1)* %obj)
+ %obj.relocated = call coldcc i8 @llvm.experimental.gc.relocate.i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
ret void
}
; Function Attrs: nounwind
-declare i8 @llvm.experimental.gc.relocate.i8(i32, i32, i32) #0
+declare i8 @llvm.experimental.gc.relocate.i8(token, i32, i32) #0
attributes #0 = { nounwind }
diff --git a/test/Verifier/invalid-eh.ll b/test/Verifier/invalid-eh.ll
new file mode 100644
index 000000000000..906b24a15c30
--- /dev/null
+++ b/test/Verifier/invalid-eh.ll
@@ -0,0 +1,38 @@
+; RUN: sed -e s/.T1:// %s | not llvm-as -disable-output 2>&1 | FileCheck --check-prefix=CHECK1 %s
+; RUN: sed -e s/.T2:// %s | not llvm-as -disable-output 2>&1 | FileCheck --check-prefix=CHECK2 %s
+; RUN: sed -e s/.T3:// %s | not llvm-as -disable-output 2>&1 | FileCheck --check-prefix=CHECK3 %s
+; RUN: sed -e s/.T4:// %s | not llvm-as -disable-output 2>&1 | FileCheck --check-prefix=CHECK4 %s
+
+;T1: define void @f() {
+;T1: entry:
+;T1: catchret from undef to label %next
+;T1: ; CHECK1: CatchReturnInst needs to be provided a CatchPad
+;T1: next:
+;T1: unreachable
+;T1: }
+
+;T2: define void @f() {
+;T2: entry:
+;T2: %x = cleanuppad within none []
+;T2: ; catchret's first operand's operator must be catchpad
+;T2: catchret from %x to label %entry
+;T2: ; CHECK2: CatchReturnInst needs to be provided a CatchPad
+;T2: }
+
+;T3: define void @f() {
+;T3: entry:
+;T3: cleanupret from undef unwind label %next
+;T3: ; CHECK3: CleanupReturnInst needs to be provided a CleanupPad
+;T3: next:
+;T3: unreachable
+;T3: }
+
+;T4: define void @f() {
+;T4: entry:
+;T4: %cs = catchswitch within none [label %next] unwind to caller
+;T4: next:
+;T4: %x = catchpad within %cs []
+;T4: ; cleanupret first operand's operator must be cleanuppad
+;T4: cleanupret from %x unwind to caller
+;T4: ; CHECK4: CleanupReturnInst needs to be provided a CleanupPad
+;T4: }
diff --git a/test/Verifier/invalid-patchable-statepoint.ll b/test/Verifier/invalid-patchable-statepoint.ll
deleted file mode 100644
index 4783fa57f8fa..000000000000
--- a/test/Verifier/invalid-patchable-statepoint.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: not opt -verify 2>&1 < %s | FileCheck %s
-
-; CHECK: gc.statepoint must have null as call target if number of patchable bytes is non zero
-
-define i1 @invalid_patchable_statepoint() gc "statepoint-example" {
-entry:
- %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 3, i1 ()* @func, i32 0, i32 0, i32 0, i32 0)
- %call1 = call i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
- ret i1 %call1
-}
-
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i1 @llvm.experimental.gc.result.i1(i32)
-declare i1 @func()
diff --git a/test/Verifier/invalid-statepoint.ll b/test/Verifier/invalid-statepoint.ll
index 6c3525a16df5..6b4bc087b3e0 100644
--- a/test/Verifier/invalid-statepoint.ll
+++ b/test/Verifier/invalid-statepoint.ll
@@ -5,15 +5,15 @@
declare zeroext i1 @return0i1()
; Function Attrs: nounwind
-declare i32 @llvm.experimental.gc.statepoint.p0f0i1f(i64, i32, i1 ()*, i32, i32, ...) #0
+declare token @llvm.experimental.gc.statepoint.p0f0i1f(i64, i32, i1 ()*, i32, i32, ...) #0
; Function Attrs: nounwind
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #0
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #0
define i32 addrspace(1)* @0(i32 addrspace(1)* %dparam) {
%a00 = load i32, i32 addrspace(1)* %dparam
- %to0 = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f0i1f(i64 0, i32 0, i1 ()* @return0i1, i32 9, i32 0, i2 0, i32 addrspace(1)* %dparam)
- %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %to0, i32 2, i32 6)
+ %to0 = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f0i1f(i64 0, i32 0, i1 ()* @return0i1, i32 9, i32 0, i2 0, i32 addrspace(1)* %dparam)
+ %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %to0, i32 2, i32 6)
ret i32 addrspace(1)* %relocate
}
diff --git a/test/Verifier/invalid-statepoint2.ll b/test/Verifier/invalid-statepoint2.ll
index c8b453c31e69..10bcd4f4f318 100644
--- a/test/Verifier/invalid-statepoint2.ll
+++ b/test/Verifier/invalid-statepoint2.ll
@@ -3,16 +3,16 @@
; CHECK: gc.statepoint: number of deoptimization arguments must be a constant integer
declare void @use(...)
-declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32, i32, i32)
-declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32, i32, i32)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
+declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token, i32, i32)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
declare i32 @"personality_function"()
;; Basic usage
define i64 addrspace(1)* @test1(i8 addrspace(1)* %arg, i32 %val) gc "statepoint-example" {
entry:
%cast = bitcast i8 addrspace(1)* %arg to i64 addrspace(1)*
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 %val, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
- %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %safepoint_token, i32 12, i32 13)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 %val, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+ %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %safepoint_token, i32 12, i32 13)
ret i64 addrspace(1)* %reloc
}
diff --git a/test/Verifier/invoke.ll b/test/Verifier/invoke.ll
index b56b72f84b9e..8fa9923c0cb8 100644
--- a/test/Verifier/invoke.ll
+++ b/test/Verifier/invoke.ll
@@ -2,7 +2,7 @@
; PR1042
define i32 @foo() {
-; CHECK: The unwind destination does not have a landingpad instruction
+; CHECK: The unwind destination does not have an exception handling instruction
%A = invoke i32 @foo( )
to label %L unwind label %L ; <i32> [#uses=1]
L: ; preds = %0, %0
@@ -18,7 +18,7 @@ L1: ; preds = %0
L2: ; preds = %0
br label %L
L: ; preds = %L2, %L1, %L1
-; CHECK: The unwind destination does not have a landingpad instruction
+; CHECK: The unwind destination does not have an exception handling instruction
ret i32 %A
}
diff --git a/test/Verifier/llvm.dbg.declare-address.ll b/test/Verifier/llvm.dbg.declare-address.ll
index ba132ad53465..90cf72aea681 100644
--- a/test/Verifier/llvm.dbg.declare-address.ll
+++ b/test/Verifier/llvm.dbg.declare-address.ll
@@ -6,7 +6,7 @@
define void @foo(i32 %a) {
entry:
%s = alloca i32
- call void @llvm.dbg.declare(metadata !"", metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1), metadata !DIExpression()), !dbg !DILocation(scope: !1)
+ call void @llvm.dbg.declare(metadata !"", metadata !DILocalVariable(scope: !1), metadata !DIExpression()), !dbg !DILocation(scope: !1)
ret void
}
@@ -14,4 +14,4 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
!llvm.module.flags = !{!0}
!0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram()
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/llvm.dbg.declare-expression.ll b/test/Verifier/llvm.dbg.declare-expression.ll
index 4c1bd65230a9..54ee1f750d4a 100644
--- a/test/Verifier/llvm.dbg.declare-expression.ll
+++ b/test/Verifier/llvm.dbg.declare-expression.ll
@@ -6,7 +6,7 @@
define void @foo(i32 %a) {
entry:
%s = alloca i32
- call void @llvm.dbg.declare(metadata i32* %s, metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1), metadata !"")
+ call void @llvm.dbg.declare(metadata i32* %s, metadata !DILocalVariable(scope: !1), metadata !"")
ret void
}
@@ -14,4 +14,4 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
!llvm.module.flags = !{!0}
!0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram()
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/llvm.dbg.declare-variable.ll b/test/Verifier/llvm.dbg.declare-variable.ll
index deef50fd717d..6f415b7c1fa0 100644
--- a/test/Verifier/llvm.dbg.declare-variable.ll
+++ b/test/Verifier/llvm.dbg.declare-variable.ll
@@ -14,4 +14,4 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
!llvm.module.flags = !{!0}
!0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram()
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/llvm.dbg.intrinsic-dbg-attachment.ll b/test/Verifier/llvm.dbg.intrinsic-dbg-attachment.ll
index 717794b4274b..9612643aa9d3 100644
--- a/test/Verifier/llvm.dbg.intrinsic-dbg-attachment.ll
+++ b/test/Verifier/llvm.dbg.intrinsic-dbg-attachment.ll
@@ -4,7 +4,7 @@ entry:
call void @llvm.dbg.value(
metadata i8* undef,
i64 0,
- metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1),
+ metadata !DILocalVariable(scope: !1),
metadata !DIExpression())
; CHECK-LABEL: llvm.dbg.value intrinsic requires a !dbg attachment
; CHECK-NEXT: call void @llvm.dbg.value({{.*}})
@@ -13,7 +13,7 @@ entry:
call void @llvm.dbg.declare(
metadata i8* undef,
- metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1),
+ metadata !DILocalVariable(scope: !1),
metadata !DIExpression())
; CHECK-LABEL: llvm.dbg.declare intrinsic requires a !dbg attachment
; CHECK-NEXT: call void @llvm.dbg.declare({{.*}})
@@ -23,7 +23,7 @@ entry:
call void @llvm.dbg.value(
metadata i8* undef,
i64 0,
- metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1),
+ metadata !DILocalVariable(scope: !1),
metadata !DIExpression()),
!dbg !DILocation(scope: !2)
; CHECK-LABEL: mismatched subprogram between llvm.dbg.value variable and !dbg attachment
@@ -31,13 +31,13 @@ entry:
; CHECK-NEXT: label %entry
; CHECK-NEXT: void ()* @foo
; CHECK-NEXT: ![[VAR]] = !DILocalVariable({{.*}}scope: ![[VARSP:[0-9]+]]
-; CHECK-NEXT: ![[VARSP]] = !DISubprogram(
+; CHECK-NEXT: ![[VARSP]] = distinct !DISubprogram(
; CHECK-NEXT: ![[LOC]] = !DILocation({{.*}}scope: ![[LOCSP:[0-9]+]]
-; CHECK-NEXT: ![[LOCSP]] = !DISubprogram(
+; CHECK-NEXT: ![[LOCSP]] = distinct !DISubprogram(
call void @llvm.dbg.declare(
metadata i8* undef,
- metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1),
+ metadata !DILocalVariable(scope: !1),
metadata !DIExpression()),
!dbg !DILocation(scope: !2)
; CHECK-LABEL: mismatched subprogram between llvm.dbg.declare variable and !dbg attachment
@@ -45,9 +45,9 @@ entry:
; CHECK-NEXT: label %entry
; CHECK-NEXT: void ()* @foo
; CHECK-NEXT: ![[VAR]] = !DILocalVariable({{.*}}scope: ![[VARSP:[0-9]+]]
-; CHECK-NEXT: ![[VARSP]] = !DISubprogram(
+; CHECK-NEXT: ![[VARSP]] = distinct !DISubprogram(
; CHECK-NEXT: ![[LOC]] = !DILocation({{.*}}scope: ![[LOCSP:[0-9]+]]
-; CHECK-NEXT: ![[LOCSP]] = !DISubprogram(
+; CHECK-NEXT: ![[LOCSP]] = distinct !DISubprogram(
ret void
}
@@ -57,5 +57,5 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
!llvm.module.flags = !{!0}
!0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram(name: "foo")
-!2 = !DISubprogram(name: "bar")
+!1 = distinct !DISubprogram(name: "foo")
+!2 = distinct !DISubprogram(name: "bar")
diff --git a/test/Verifier/llvm.dbg.value-expression.ll b/test/Verifier/llvm.dbg.value-expression.ll
index 78a7c50f233d..dd3c29f91073 100644
--- a/test/Verifier/llvm.dbg.value-expression.ll
+++ b/test/Verifier/llvm.dbg.value-expression.ll
@@ -6,7 +6,7 @@
define void @foo(i32 %a) {
entry:
%s = alloca i32
- call void @llvm.dbg.value(metadata i32* %s, i64 0, metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1), metadata !""), !dbg !DILocation(scope: !1)
+ call void @llvm.dbg.value(metadata i32* %s, i64 0, metadata !DILocalVariable(scope: !1), metadata !""), !dbg !DILocation(scope: !1)
ret void
}
@@ -14,4 +14,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
!llvm.module.flags = !{!0}
!0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram()
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/llvm.dbg.value-value.ll b/test/Verifier/llvm.dbg.value-value.ll
index 1acb5f8132fa..e1d02de484c6 100644
--- a/test/Verifier/llvm.dbg.value-value.ll
+++ b/test/Verifier/llvm.dbg.value-value.ll
@@ -6,7 +6,7 @@
define void @foo(i32 %a) {
entry:
%s = alloca i32
- call void @llvm.dbg.value(metadata !"", i64 0, metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1), metadata !DIExpression()), !dbg !DILocation(scope: !1)
+ call void @llvm.dbg.value(metadata !"", i64 0, metadata !DILocalVariable(scope: !1), metadata !DIExpression()), !dbg !DILocation(scope: !1)
ret void
}
@@ -14,4 +14,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
!llvm.module.flags = !{!0}
!0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram()
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/llvm.dbg.value-variable.ll b/test/Verifier/llvm.dbg.value-variable.ll
index 66329f9fdaa7..745f7ada5873 100644
--- a/test/Verifier/llvm.dbg.value-variable.ll
+++ b/test/Verifier/llvm.dbg.value-variable.ll
@@ -14,4 +14,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
!llvm.module.flags = !{!0}
!0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram()
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/metadata-function-dbg.ll b/test/Verifier/metadata-function-dbg.ll
new file mode 100644
index 000000000000..2a6fd8bbb48e
--- /dev/null
+++ b/test/Verifier/metadata-function-dbg.ll
@@ -0,0 +1,23 @@
+; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s
+
+define void @foo() !dbg !4 !dbg !4 {
+ unreachable
+}
+
+; CHECK-NOT: !dbg
+; CHECK: function !dbg attachment must be a subprogram
+; CHECK-NEXT: void ()* @bar
+; CHECK-NEXT: !{{[0-9]+}} = !{}
+define void @bar() !dbg !6 {
+ unreachable
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+
+!llvm.dbg.cu = !{!1}
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !3)
+!2 = !DIFile(filename: "t.c", directory: "/path/to/dir")
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !2)
+!6 = !{}
diff --git a/test/Verifier/operand-bundles.ll b/test/Verifier/operand-bundles.ll
new file mode 100644
index 000000000000..d822568a0445
--- /dev/null
+++ b/test/Verifier/operand-bundles.ll
@@ -0,0 +1,49 @@
+; RUN: not opt -verify < %s 2>&1 | FileCheck %s
+
+; Operand bundles uses are like regular uses, and need to be dominated
+; by their defs.
+
+declare void @g()
+
+define void @f0(i32* %ptr) {
+; CHECK: Instruction does not dominate all uses!
+; CHECK-NEXT: %x = add i32 42, 1
+; CHECK-NEXT: call void @g() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+
+ entry:
+ %l = load i32, i32* %ptr
+ call void @g() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.0, i64 100, i32 %l) ]
+ %x = add i32 42, 1
+ ret void
+}
+
+define void @f1(i32* %ptr) personality i8 3 {
+; CHECK: Instruction does not dominate all uses!
+; CHECK-NEXT: %x = add i32 42, 1
+; CHECK-NEXT: invoke void @g() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+
+ entry:
+ %l = load i32, i32* %ptr
+ invoke void @g() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.0, i64 100, i32 %l) ] to label %normal unwind label %exception
+
+exception:
+ %cleanup = landingpad i8 cleanup
+ br label %normal
+
+normal:
+ %x = add i32 42, 1
+ ret void
+}
+
+define void @f_deopt(i32* %ptr) {
+; CHECK: Multiple deopt operand bundles
+; CHECK-NEXT: call void @g() [ "deopt"(i32 42, i64 100, i32 %x), "deopt"(float 0.000000e+00, i64 100, i32 %l) ]
+; CHECK-NOT: call void @g() [ "deopt"(i32 42, i64 120, i32 %x) ]
+
+ entry:
+ %l = load i32, i32* %ptr
+ call void @g() [ "deopt"(i32 42, i64 100, i32 %x), "deopt"(float 0.0, i64 100, i32 %l) ]
+ call void @g() [ "deopt"(i32 42, i64 120) ] ;; The verifier should not complain about this one
+ %x = add i32 42, 1
+ ret void
+}
diff --git a/test/Verifier/statepoint.ll b/test/Verifier/statepoint.ll
index 2807620f79ea..c07a85b9bd36 100644
--- a/test/Verifier/statepoint.ll
+++ b/test/Verifier/statepoint.ll
@@ -1,20 +1,20 @@
; RUN: opt -S %s -verify | FileCheck %s
declare void @use(...)
-declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32, i32, i32)
-declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32, i32, i32)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
+declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token, i32, i32)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
declare i32 @"personality_function"()
;; Basic usage
define i64 addrspace(1)* @test1(i8 addrspace(1)* %arg) gc "statepoint-example" {
entry:
%cast = bitcast i8 addrspace(1)* %arg to i64 addrspace(1)*
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
- %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %safepoint_token, i32 12, i32 13)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+ %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %safepoint_token, i32 12, i32 13)
;; It is perfectly legal to relocate the same value multiple times...
- %reloc2 = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %safepoint_token, i32 12, i32 13)
- %reloc3 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %safepoint_token, i32 13, i32 12)
+ %reloc2 = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %safepoint_token, i32 12, i32 13)
+ %reloc3 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 13, i32 12)
ret i64 addrspace(1)* %reloc
; CHECK-LABEL: test1
; CHECK: statepoint
@@ -39,8 +39,8 @@ notequal:
ret void
equal:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
- %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %safepoint_token, i32 12, i32 13)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+ %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %safepoint_token, i32 12, i32 13)
call void undef(i64 addrspace(1)* %reloc)
ret void
; CHECK-LABEL: test2
@@ -57,7 +57,7 @@ define i8 addrspace(1)* @test3(i8 addrspace(1)* %obj, i8 addrspace(1)* %obj1) gc
entry:
; CHECK-LABEL: entry
; CHECK: statepoint
- %0 = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj, i8 addrspace(1)* %obj1)
+ %0 = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj, i8 addrspace(1)* %obj1)
to label %normal_dest unwind label %exceptional_return
normal_dest:
@@ -65,18 +65,17 @@ normal_dest:
; CHECK: gc.relocate
; CHECK: gc.relocate
; CHECK: ret
- %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %0, i32 12, i32 12)
- %obj1.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %0, i32 12, i32 12)
+ %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 12, i32 12)
+ %obj1.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 12, i32 12)
ret i8 addrspace(1)* %obj.relocated
exceptional_return:
; CHECK-LABEL: exceptional_return
; CHECK: gc.relocate
; CHECK: gc.relocate
- %landing_pad = landingpad { i8*, i32 }
+ %landing_pad = landingpad token
cleanup
- %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
- %obj.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %relocate_token, i32 12, i32 12)
- %obj1.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %relocate_token, i32 12, i32 12)
+ %obj.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %landing_pad, i32 12, i32 12)
+ %obj1.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %landing_pad, i32 12, i32 12)
ret i8 addrspace(1)* %obj1.relocated1
}
diff --git a/test/Verifier/token1.ll b/test/Verifier/token1.ll
new file mode 100644
index 000000000000..ac7ff30948ea
--- /dev/null
+++ b/test/Verifier/token1.ll
@@ -0,0 +1,11 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @f(token %A, token %B) {
+entry:
+ br label %bb
+
+bb:
+ %phi = phi token [ %A, %bb ], [ %B, %entry]
+; CHECK: PHI nodes cannot have token type!
+ br label %bb
+}
diff --git a/test/Verifier/token2.ll b/test/Verifier/token2.ll
new file mode 100644
index 000000000000..b58079de770d
--- /dev/null
+++ b/test/Verifier/token2.ll
@@ -0,0 +1,11 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @f(token %A, token %B) {
+entry:
+ br label %bb
+
+bb:
+ %sel = select i1 undef, token %A, token %B
+; CHECK: select values cannot have token type
+ br label %bb
+}
diff --git a/test/Verifier/token3.ll b/test/Verifier/token3.ll
new file mode 100644
index 000000000000..2cce6b83e7fd
--- /dev/null
+++ b/test/Verifier/token3.ll
@@ -0,0 +1,8 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @f(token %A, token %B) {
+entry:
+ alloca token
+; CHECK: invalid type for alloca
+ ret void
+}
diff --git a/test/Verifier/token4.ll b/test/Verifier/token4.ll
new file mode 100644
index 000000000000..87a8b14efa00
--- /dev/null
+++ b/test/Verifier/token4.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+@GV = external global token
+; CHECK: invalid type for global variable
diff --git a/test/Verifier/token5.ll b/test/Verifier/token5.ll
new file mode 100644
index 000000000000..6fc1b045375f
--- /dev/null
+++ b/test/Verifier/token5.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @f(token %A) {
+entry:
+ ret void
+}
+; CHECK: Function takes token but isn't an intrinsic
diff --git a/test/Verifier/token6.ll b/test/Verifier/token6.ll
new file mode 100644
index 000000000000..9614b91db737
--- /dev/null
+++ b/test/Verifier/token6.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define token @f() {
+entry:
+ ret token undef
+}
+; CHECK: Functions returns a token but isn't an intrinsic
diff --git a/test/Verifier/token7.ll b/test/Verifier/token7.ll
new file mode 100644
index 000000000000..939878cc4275
--- /dev/null
+++ b/test/Verifier/token7.ll
@@ -0,0 +1,8 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @f() {
+entry:
+ call token () undef ()
+ ret void
+}
+; CHECK: Return type cannot be token for indirect call!
diff --git a/test/lit.cfg b/test/lit.cfg
index 6d3c41256422..36b4c7044083 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -221,6 +221,32 @@ else:
# also have a post-assertion to not match a trailing hyphen (foo-).
NOJUNK = r"(?<!\.|-|\^|/)"
+
+def find_tool_substitution(pattern):
+ # Extract the tool name from the pattern. This relies on the tool
+ # name being surrounded by \b word match operators. If the
+ # pattern starts with "| ", include it in the string to be
+ # substituted.
+ tool_match = re.match(r"^(\\)?((\| )?)\W+b([0-9A-Za-z-_]+)\\b\W*$",
+ pattern)
+ tool_pipe = tool_match.group(2)
+ tool_name = tool_match.group(4)
+ # Did the user specify the tool path + arguments? This allows things like
+ # llvm-lit "-Dllc=llc -enable-misched -verify-machineinstrs"
+ tool_path = lit_config.params.get(tool_name)
+ if tool_path is None:
+ tool_path = lit.util.which(tool_name, llvm_tools_dir)
+ if tool_path is None:
+ return tool_name, tool_path, tool_pipe
+ if (tool_name == "llc" and
+ 'LLVM_ENABLE_MACHINE_VERIFIER' in os.environ and
+ os.environ['LLVM_ENABLE_MACHINE_VERIFIER'] == "1"):
+ tool_path += " -verify-machineinstrs"
+ if (tool_name == "llvm-go"):
+ tool_path += " go=" + config.go_executable
+ return tool_name, tool_path, tool_pipe
+
+
for pattern in [r"\bbugpoint\b(?!-)",
NOJUNK + r"\bllc\b",
r"\blli\b",
@@ -235,7 +261,6 @@ for pattern in [r"\bbugpoint\b(?!-)",
r"\bllvm-dsymutil\b",
r"\bllvm-dwarfdump\b",
r"\bllvm-extract\b",
- r"\bllvm-go\b",
r"\bllvm-lib\b",
r"\bllvm-link\b",
r"\bllvm-lto\b",
@@ -248,12 +273,13 @@ for pattern in [r"\bbugpoint\b(?!-)",
r"\bllvm-readobj\b",
r"\bllvm-rtdyld\b",
r"\bllvm-size\b",
+ r"\bllvm-split\b",
r"\bllvm-tblgen\b",
r"\bllvm-c-test\b",
- r"\bmacho-dump\b",
NOJUNK + r"\bopt\b",
r"\bFileCheck\b",
r"\bobj2yaml\b",
+ NOJUNK + r"\bsancov\b",
r"\byaml2obj\b",
r"\byaml-bench\b",
r"\bverify-uselistorder\b",
@@ -261,29 +287,29 @@ for pattern in [r"\bbugpoint\b(?!-)",
# for during testing.
r"\| \bcount\b",
r"\| \bnot\b"]:
- # Extract the tool name from the pattern. This relies on the tool
- # name being surrounded by \b word match operators. If the
- # pattern starts with "| ", include it in the string to be
- # substituted.
- tool_match = re.match(r"^(\\)?((\| )?)\W+b([0-9A-Za-z-_]+)\\b\W*$",
- pattern)
- tool_pipe = tool_match.group(2)
- tool_name = tool_match.group(4)
- # Did the user specify the tool path + arguments? This allows things like
- # llvm-lit "-Dllc=llc -enable-misched -verify-machineinstrs"
- tool_path = lit_config.params.get(tool_name)
- if tool_path is None:
- tool_path = lit.util.which(tool_name, llvm_tools_dir)
- if tool_path is None:
+ tool_name, tool_path, tool_pipe = find_tool_substitution(pattern)
+ if not tool_path:
# Warn, but still provide a substitution.
lit_config.note('Did not find ' + tool_name + ' in ' + llvm_tools_dir)
tool_path = llvm_tools_dir + '/' + tool_name
- if (tool_name == "llc" and
- 'LLVM_ENABLE_MACHINE_VERIFIER' in os.environ and
- os.environ['LLVM_ENABLE_MACHINE_VERIFIER'] == "1"):
- tool_path += " -verify-machineinstrs"
config.substitutions.append((pattern, tool_pipe + tool_path))
+# For tools that are optional depending on the config, we won't warn
+# if they're missing.
+for pattern in [r"\bllvm-go\b",
+ r"\bKaleidoscope-Ch3\b",
+ r"\bKaleidoscope-Ch4\b",
+ r"\bKaleidoscope-Ch5\b",
+ r"\bKaleidoscope-Ch6\b",
+ r"\bKaleidoscope-Ch7\b",
+ r"\bKaleidoscope-Ch8\b"]:
+ tool_name, tool_path, tool_pipe = find_tool_substitution(pattern)
+ if not tool_path:
+ # Provide a substitution anyway, for the sake of consistent errors.
+ tool_path = llvm_tools_dir + '/' + tool_name
+ config.substitutions.append((pattern, tool_pipe + tool_path))
+
+
### Targets
config.targets = frozenset(config.targets_to_build.split())
@@ -314,6 +340,8 @@ if config.llvm_use_sanitizer == "Address":
if (config.llvm_use_sanitizer == "Memory" or
config.llvm_use_sanitizer == "MemoryWithOrigins"):
config.available_features.add("msan")
+else:
+ config.available_features.add("not_msan")
if config.llvm_use_sanitizer == "Undefined":
config.available_features.add("ubsan")
else:
@@ -324,8 +352,7 @@ if lit_config.params.get("run_long_tests", None) == "true":
config.available_features.add("long_tests")
# Direct object generation
-# Suppress x86_64-mingw32 while investigating since r219108.
-if not 'hexagon' in config.target_triple and not re.match(r'^x86_64.*-(mingw32|windows-gnu|win32)', config.target_triple):
+if not 'hexagon' in config.target_triple:
config.available_features.add("object-emission")
if config.have_zlib == "1":
@@ -333,7 +360,14 @@ if config.have_zlib == "1":
else:
config.available_features.add("nozlib")
-# Native compilation: host arch == target arch
+# LLVM can be configured with an empty default triple
+# Some tests are "generic" and require a valid default triple
+if config.target_triple:
+ config.available_features.add("default_triple")
+ if re.match(r'^x86_64.*-linux', config.target_triple):
+ config.available_features.add("x86_64-linux")
+
+# Native compilation: host arch == default triple arch
# FIXME: Consider cases that target can be executed
# even if host_triple were different from target_triple.
if config.host_triple == config.target_triple:
@@ -345,7 +379,7 @@ def have_ld_plugin_support():
if not os.path.exists(os.path.join(config.llvm_shlib_dir, 'LLVMgold.so')):
return False
- ld_cmd = subprocess.Popen([config.gold_executable, '--help'], stdout = subprocess.PIPE)
+ ld_cmd = subprocess.Popen([config.gold_executable, '--help'], stdout = subprocess.PIPE, env={'LANG': 'C'})
ld_out = ld_cmd.stdout.read().decode()
ld_cmd.wait()
@@ -366,7 +400,7 @@ def have_ld_plugin_support():
if 'elf32ppc' in emulations:
config.available_features.add('ld_emu_elf32ppc')
- ld_version = subprocess.Popen([config.gold_executable, '--version'], stdout = subprocess.PIPE)
+ ld_version = subprocess.Popen([config.gold_executable, '--version'], stdout = subprocess.PIPE, env={'LANG': 'C'})
if not 'GNU gold' in ld_version.stdout.read().decode():
return False
ld_version.wait()
@@ -417,10 +451,18 @@ if 'darwin' == sys.platform:
config.available_features.add('fma3')
sysctl_cmd.wait()
+if platform.system() in ['Windows'] and re.match(r'.*-win32$', config.target_triple):
+ # For tests that require Windows to run.
+ config.available_features.add('system-windows')
+
# .debug_frame is not emitted for targeting Windows x64.
if not re.match(r'^x86_64.*-(mingw32|windows-gnu|win32)', config.target_triple):
config.available_features.add('debug_frame')
+# Check if we are embedding timestamps.
+if config.enable_timestamps == '1':
+ config.available_features.add('timestamps')
+
# Check if we should use gmalloc.
use_gmalloc_str = lit_config.params.get('use_gmalloc', None)
if use_gmalloc_str is not None:
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 0f5d223a3abf..ae5814f02f41 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -19,10 +19,12 @@ config.ocamlfind_executable = "@OCAMLFIND@"
config.have_ocamlopt = "@HAVE_OCAMLOPT@"
config.have_ocaml_ounit = "@HAVE_OCAML_OUNIT@"
config.ocaml_flags = "@OCAMLFLAGS@"
+config.include_go_tests = "@LLVM_INCLUDE_GO_TESTS@"
config.go_executable = "@GO_EXECUTABLE@"
config.enable_shared = @ENABLE_SHARED@
config.enable_assertions = @ENABLE_ASSERTIONS@
config.targets_to_build = "@TARGETS_TO_BUILD@"
+config.native_target = "@LLVM_NATIVE_ARCH@"
config.llvm_bindings = "@LLVM_BINDINGS@".split(' ')
config.host_os = "@HOST_OS@"
config.host_arch = "@HOST_ARCH@"
@@ -34,6 +36,8 @@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
config.have_zlib = "@HAVE_LIBZ@"
config.have_dia_sdk = @HAVE_DIA_SDK@
config.enable_ffi = "@LLVM_ENABLE_FFI@"
+config.enable_timestamps = "@ENABLE_TIMESTAMPS@"
+config.test_examples = "@ENABLE_EXAMPLES@"
# Support substitution of the tools_dir with user parameters. This is
# used when we can't determine the tool dir at configuration time.
diff --git a/test/tools/dsymutil/ARM/dummy-debug-map-amr64.map b/test/tools/dsymutil/ARM/dummy-debug-map-amr64.map
new file mode 100644
index 000000000000..a23e0c34c3f8
--- /dev/null
+++ b/test/tools/dsymutil/ARM/dummy-debug-map-amr64.map
@@ -0,0 +1,15 @@
+# This is a dummy debug map used for some tests where the contents of the
+# map are just an implementation detail. The tests wanting to use that file
+# should put all there object files in an explicitely named sub-directory
+# of Inputs, and they should be named 1.o, 2.o, ...
+# As not finding an object file or symbols isn't a fatal error for dsymutil,
+# you can extend this file with as much object files and symbols as needed.
+
+---
+triple: 'arm64-apple-darwin'
+objects:
+ - filename: 1.o
+ symbols:
+ - { sym: _bar, objAddr: 0x0, binAddr: 0x10000, size: 0x10 }
+...
+
diff --git a/test/tools/dsymutil/ARM/empty-map.test b/test/tools/dsymutil/ARM/empty-map.test
new file mode 100644
index 000000000000..54d9a35cc6e4
--- /dev/null
+++ b/test/tools/dsymutil/ARM/empty-map.test
@@ -0,0 +1,8 @@
+# REQUIRES: object-emission
+# RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs -y %s -o - 2>&1 | FileCheck %s
+
+---
+triple: 'thumbv7-apple-darwin'
+...
+
+# CHECK: warning: no debug symbols in executable (-arch armv7)
diff --git a/test/tools/dsymutil/ARM/fat-arch-name.test b/test/tools/dsymutil/ARM/fat-arch-name.test
new file mode 100644
index 000000000000..ac7af1e55d55
--- /dev/null
+++ b/test/tools/dsymutil/ARM/fat-arch-name.test
@@ -0,0 +1,21 @@
+# REQUIRES: object-emission
+# RUN: llvm-dsymutil -no-output %p/../Inputs/fat-test.arm.dylib -o %t.dSYM -verbose 2>&1 | FileCheck %s
+
+# We detect thumb triples from the binaries, because those are the only ones
+# that are guaranteed to be able to generate a Target instance (for example
+# we would detect armv7m-apple-darwin as non-thumb triple, but you can't
+# instantiate a Target from that). In the user-visible architecture names, and
+# in the lipo invocation, we need to rewrite the thumb arch names to the arm
+# ones.
+
+# CHECK: warning: no debug symbols in executable (-arch armv7)
+
+# CHECK: warning: no debug symbols in executable (-arch armv7s)
+
+# CHECK: warning: no debug symbols in executable (-arch arm64)
+
+# CHECK: Running lipo
+# CHECK-NEXT: lipo -create
+# CHECK-SAME: -segalign armv7
+# CHECK-SAME: -segalign armv7s
+# CHECK-SAME: -segalign arm64
diff --git a/test/tools/dsymutil/ARM/fat-arch-not-found.test b/test/tools/dsymutil/ARM/fat-arch-not-found.test
new file mode 100644
index 000000000000..e15d9b69b5fe
--- /dev/null
+++ b/test/tools/dsymutil/ARM/fat-arch-not-found.test
@@ -0,0 +1,13 @@
+# REQUIRES: object-emission
+# RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs -y %s -o - 2>&1 | FileCheck %s
+
+---
+triple: 'armv7-apple-darwin'
+objects:
+ - filename: libfat-test.a(fat-test.o)
+ symbols:
+ - { sym: _armv7_var, objAddr: 0x0, binAddr: 0x1000, size: 0x4 }
+...
+
+# CHECK: libfat-test.a(fat-test.o): No object file for requested architecture
+
diff --git a/test/tools/dsymutil/ARM/inlined-low_pc.c b/test/tools/dsymutil/ARM/inlined-low_pc.c
new file mode 100644
index 000000000000..7ade33e3e44b
--- /dev/null
+++ b/test/tools/dsymutil/ARM/inlined-low_pc.c
@@ -0,0 +1,15 @@
+/* Compiled with: clang -arch=arm64 -O2 -g -c inlined_low_pc.c */
+
+static int foo(int i) { return 42 + i; }
+int bar(int a) { return foo(a); }
+
+// RUN: llvm-dsymutil -f -y %p/dummy-debug-map-amr64.map -oso-prepend-path %p/../Inputs/inlined-low_pc -o - | llvm-dwarfdump - | FileCheck %s
+
+// CHECK: DW_TAG_subprogram
+// CHECK: DW_AT_low_pc{{.*}}0x0000000000010000
+// CHECK: DW_AT_name{{.*}}"bar"
+// CHECK-NOT: NULL
+// CHECK: DW_TAG_inlined_subroutine
+// CHECK-NEXT: DW_AT_abstract_origin{{.*}}"foo"
+// CHECK-NEXT: DW_AT_low_pc{{.*}}0x0000000000010000
+
diff --git a/test/tools/dsymutil/ARM/lit.local.cfg b/test/tools/dsymutil/ARM/lit.local.cfg
new file mode 100644
index 000000000000..442cd554bfed
--- /dev/null
+++ b/test/tools/dsymutil/ARM/lit.local.cfg
@@ -0,0 +1,7 @@
+if not 'ARM' in config.root.targets:
+ config.unsupported = True
+if not 'AArch64' in config.root.targets:
+ config.unsupported = True
+
+config.suffixes = ['.test', '.cpp', '.c']
+
diff --git a/test/tools/dsymutil/Inputs/absolute_sym.macho.i386 b/test/tools/dsymutil/Inputs/absolute_sym.macho.i386
new file mode 100755
index 000000000000..5ca0f2d6868a
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/absolute_sym.macho.i386
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/absolute_sym.macho.i386.o b/test/tools/dsymutil/Inputs/absolute_sym.macho.i386.o
new file mode 100644
index 000000000000..445e32271cf5
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/absolute_sym.macho.i386.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/basic.macho.i386 b/test/tools/dsymutil/Inputs/basic.macho.i386
new file mode 100755
index 000000000000..ee6be096fe5a
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/basic.macho.i386
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/basic2-custom-linetable.macho.x86_64.o b/test/tools/dsymutil/Inputs/basic2-custom-linetable.macho.x86_64.o
new file mode 100644
index 000000000000..07b36a20c1da
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/basic2-custom-linetable.macho.x86_64.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/basic2.c b/test/tools/dsymutil/Inputs/basic2.c
index 13c6d07c2a08..4524e9bc6cc3 100644
--- a/test/tools/dsymutil/Inputs/basic2.c
+++ b/test/tools/dsymutil/Inputs/basic2.c
@@ -20,3 +20,9 @@ int foo(int arg) {
return bar(arg+val) + inc() + baz++;
}
+/* This file was also used to create basic2-custom-linetable.macho.x86_64.o
+ with a custom clang that had different settings for the linetable
+ encoding constants: line_base == -1 and line_range == 4.
+
+ clang -c -g basic2.c -o basic2-custom-linetable.macho.x86_64.o
+*/
diff --git a/test/tools/dsymutil/Inputs/dead-stripped/1.o b/test/tools/dsymutil/Inputs/dead-stripped/1.o
new file mode 100644
index 000000000000..fbdffbc61a77
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/dead-stripped/1.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/empty_range/1.o b/test/tools/dsymutil/Inputs/empty_range/1.o
new file mode 100644
index 000000000000..671bf4a0ccdb
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/empty_range/1.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/fat-test.arm.dylib b/test/tools/dsymutil/Inputs/fat-test.arm.dylib
new file mode 100755
index 000000000000..a8d4f37be4de
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/fat-test.arm.dylib
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/fat-test.arm.o b/test/tools/dsymutil/Inputs/fat-test.arm.o
new file mode 100644
index 000000000000..1cf16803e892
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/fat-test.arm.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/fat-test.c b/test/tools/dsymutil/Inputs/fat-test.c
new file mode 100644
index 000000000000..f6c4c76fe2f4
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/fat-test.c
@@ -0,0 +1,28 @@
+/* Compile with:
+ clang -c -g -arch x86_64h -arch x86_64 -arch i386 fat-test.c
+ libtool -static -o libfat-test.a fat-test.o
+
+ to generate a dylib instead:
+ clang -arch ... -arch ... -arch ... -dynamiclib fat-test.o -o fat-test.dylib
+
+ To reduce the size of the fat binary:
+ lipo -thin i386 -o fat-test.i386.o fat-test.o
+ lipo -thin x86_64 -o fat-test.x86_64.o fat-test.o
+ lipo -thin x86_64h -o fat-test.x86_64h.o fat-test.o
+ lipo -create -arch x86_64h fat-test.x86_64h.o -arch x86_64 fat-test.x86_64.o -arch i386 fat-test.i386.o -o fat-test.o -segalign i386 8 -segalign x86_64 8 -segalign x86_64h 8
+ */
+#ifdef __x86_64h__
+int x86_64h_var;
+#elif defined(__x86_64__)
+int x86_64_var;
+#elif defined(__i386__)
+int i386_var;
+#elif defined(__ARM_ARCH_7S__)
+int armv7s_var;
+#elif defined(__ARM_ARCH_7A__)
+int armv7_var;
+#elif defined(__ARM64_ARCH_8__)
+int arm64_var;
+#else
+#error "Unknown architecture"
+#endif
diff --git a/test/tools/dsymutil/Inputs/fat-test.dylib b/test/tools/dsymutil/Inputs/fat-test.dylib
new file mode 100755
index 000000000000..4def340a9507
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/fat-test.dylib
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/fat-test.o b/test/tools/dsymutil/Inputs/fat-test.o
new file mode 100644
index 000000000000..8159cc749772
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/fat-test.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/frame-dw2.ll b/test/tools/dsymutil/Inputs/frame-dw2.ll
index 7ffc93397315..d07c529d3108 100644
--- a/test/tools/dsymutil/Inputs/frame-dw2.ll
+++ b/test/tools/dsymutil/Inputs/frame-dw2.ll
@@ -4,7 +4,7 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
target triple = "i386-apple-macosx10.11.0"
; Function Attrs: nounwind ssp
-define i32 @bar(i32 %b) #0 {
+define i32 @bar(i32 %b) #0 !dbg !4 {
entry:
%b.addr = alloca i32, align 4
%var = alloca i32, align 4
@@ -24,7 +24,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare i32 @foo(i32*) #2
; Function Attrs: nounwind ssp
-define i32 @baz(i32 %b) #0 {
+define i32 @baz(i32 %b) #0 !dbg !8 {
entry:
%b.addr = alloca i32, align 4
store i32 %b, i32* %b.addr, align 4
@@ -42,29 +42,29 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!9, !10, !11}
!llvm.ident = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "frame.c", directory: "/tmp")
!2 = !{}
!3 = !{!4, !8}
-!4 = !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @bar, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7, !7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @baz, variables: !2)
+!8 = distinct !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!9 = !{i32 2, !"Dwarf Version", i32 2}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{i32 1, !"PIC Level", i32 2}
!12 = !{!"clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)"}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7)
+!13 = !DILocalVariable(name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7)
!14 = !DIExpression()
!15 = !DILocation(line: 3, column: 13, scope: !4)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var", scope: !4, file: !1, line: 4, type: !7)
+!16 = !DILocalVariable(name: "var", scope: !4, file: !1, line: 4, type: !7)
!17 = !DILocation(line: 4, column: 6, scope: !4)
!18 = !DILocation(line: 4, column: 12, scope: !4)
!19 = !DILocation(line: 4, column: 14, scope: !4)
!20 = !DILocation(line: 5, column: 9, scope: !4)
!21 = !DILocation(line: 5, column: 2, scope: !4)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7)
+!22 = !DILocalVariable(name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7)
!23 = !DILocation(line: 8, column: 13, scope: !8)
!24 = !DILocation(line: 9, column: 13, scope: !8)
!25 = !DILocation(line: 9, column: 9, scope: !8)
diff --git a/test/tools/dsymutil/Inputs/frame-dw4.ll b/test/tools/dsymutil/Inputs/frame-dw4.ll
index c8674b13e585..f3df896a3bb5 100644
--- a/test/tools/dsymutil/Inputs/frame-dw4.ll
+++ b/test/tools/dsymutil/Inputs/frame-dw4.ll
@@ -4,7 +4,7 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
target triple = "i386-apple-macosx10.11.0"
; Function Attrs: nounwind ssp
-define i32 @bar(i32 %b) #0 {
+define i32 @bar(i32 %b) #0 !dbg !4 {
entry:
%b.addr = alloca i32, align 4
%var = alloca i32, align 4
@@ -24,7 +24,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare i32 @foo(i32*) #2
; Function Attrs: nounwind ssp
-define i32 @baz(i32 %b) #0 {
+define i32 @baz(i32 %b) #0 !dbg !8 {
entry:
%b.addr = alloca i32, align 4
store i32 %b, i32* %b.addr, align 4
@@ -42,29 +42,29 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!9, !10, !11}
!llvm.ident = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "frame.c", directory: "/tmp")
!2 = !{}
!3 = !{!4, !8}
-!4 = !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @bar, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7, !7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @baz, variables: !2)
+!8 = distinct !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!9 = !{i32 2, !"Dwarf Version", i32 4}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{i32 1, !"PIC Level", i32 2}
!12 = !{!"clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)"}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7)
+!13 = !DILocalVariable(name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7)
!14 = !DIExpression()
!15 = !DILocation(line: 3, column: 13, scope: !4)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var", scope: !4, file: !1, line: 4, type: !7)
+!16 = !DILocalVariable(name: "var", scope: !4, file: !1, line: 4, type: !7)
!17 = !DILocation(line: 4, column: 6, scope: !4)
!18 = !DILocation(line: 4, column: 12, scope: !4)
!19 = !DILocation(line: 4, column: 14, scope: !4)
!20 = !DILocation(line: 5, column: 9, scope: !4)
!21 = !DILocation(line: 5, column: 2, scope: !4)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7)
+!22 = !DILocalVariable(name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7)
!23 = !DILocation(line: 8, column: 13, scope: !8)
!24 = !DILocation(line: 9, column: 13, scope: !8)
!25 = !DILocation(line: 9, column: 9, scope: !8)
diff --git a/test/tools/dsymutil/Inputs/inlined-low_pc/1.o b/test/tools/dsymutil/Inputs/inlined-low_pc/1.o
new file mode 100644
index 000000000000..7ab4e9205bbd
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/inlined-low_pc/1.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/libfat-test.a b/test/tools/dsymutil/Inputs/libfat-test.a
new file mode 100644
index 000000000000..6b34b0fd6f90
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/libfat-test.a
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/mismatch/1.o b/test/tools/dsymutil/Inputs/mismatch/1.o
new file mode 100644
index 000000000000..2a855ae293dc
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/mismatch/1.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/mismatch/mismatch.pcm b/test/tools/dsymutil/Inputs/mismatch/mismatch.pcm
new file mode 100644
index 000000000000..94f3cf5fffb6
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/mismatch/mismatch.pcm
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/modules/1.o b/test/tools/dsymutil/Inputs/modules/1.o
new file mode 100644
index 000000000000..eca930c905c4
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/modules/1.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/modules/Bar.pcm b/test/tools/dsymutil/Inputs/modules/Bar.pcm
new file mode 100644
index 000000000000..f3c0d07f2c50
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/modules/Bar.pcm
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/modules/Foo.pcm b/test/tools/dsymutil/Inputs/modules/Foo.pcm
new file mode 100644
index 000000000000..4a39a06c24dd
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/modules/Foo.pcm
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/odr-anon-namespace/1.o b/test/tools/dsymutil/Inputs/odr-anon-namespace/1.o
new file mode 100644
index 000000000000..20d4b7fd84ed
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/odr-anon-namespace/1.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/odr-anon-namespace/2.o b/test/tools/dsymutil/Inputs/odr-anon-namespace/2.o
new file mode 100644
index 000000000000..df8e567bc3a5
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/odr-anon-namespace/2.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/odr-member-functions/1.o b/test/tools/dsymutil/Inputs/odr-member-functions/1.o
new file mode 100644
index 000000000000..f957a9b9d423
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/odr-member-functions/1.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/odr-member-functions/2.o b/test/tools/dsymutil/Inputs/odr-member-functions/2.o
new file mode 100644
index 000000000000..c696866fcad6
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/odr-member-functions/2.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/odr-member-functions/3.o b/test/tools/dsymutil/Inputs/odr-member-functions/3.o
new file mode 100644
index 000000000000..962bf6e2b382
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/odr-member-functions/3.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/odr-uniquing/1.o b/test/tools/dsymutil/Inputs/odr-uniquing/1.o
new file mode 100644
index 000000000000..c52093030055
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/odr-uniquing/1.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/odr-uniquing/2.o b/test/tools/dsymutil/Inputs/odr-uniquing/2.o
new file mode 100644
index 000000000000..c52093030055
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/odr-uniquing/2.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/submodules/1.o b/test/tools/dsymutil/Inputs/submodules/1.o
new file mode 100644
index 000000000000..d38e30f67c2c
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/submodules/1.o
Binary files differ
diff --git a/test/tools/dsymutil/Inputs/submodules/Parent.pcm b/test/tools/dsymutil/Inputs/submodules/Parent.pcm
new file mode 100644
index 000000000000..e6909d7afa5e
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/submodules/Parent.pcm
Binary files differ
diff --git a/test/tools/dsymutil/X86/basic-linking-bundle.test b/test/tools/dsymutil/X86/basic-linking-bundle.test
new file mode 100644
index 000000000000..c07fa5894f36
--- /dev/null
+++ b/test/tools/dsymutil/X86/basic-linking-bundle.test
@@ -0,0 +1,38 @@
+RUN: rm -rf %T/basic-linking-bundle
+RUN: mkdir -p %T/basic-linking-bundle/dsymdest
+RUN: cat %p/../Inputs/basic.macho.x86_64 > %T/basic-linking-bundle/basic.macho.x86_64
+
+RUN: llvm-dsymutil -oso-prepend-path=%p/.. %T/basic-linking-bundle/basic.macho.x86_64
+
+Check that the object file in the bundle exists and is sane:
+RUN: llvm-dwarfdump %T/basic-linking-bundle/basic.macho.x86_64.dSYM/Contents/Resources/DWARF/basic.macho.x86_64 | FileCheck %S/basic-linking-x86.test
+
+Check that llvm-dwarfdump recognizes the bundle as a dSYM:
+RUN: llvm-dwarfdump %T/basic-linking-bundle/basic.macho.x86_64.dSYM | FileCheck %S/basic-linking-x86.test
+
+RUN: FileCheck %s --input-file %T/basic-linking-bundle/basic.macho.x86_64.dSYM/Contents/Info.plist
+
+RUN: llvm-dsymutil -oso-prepend-path=%p/.. %T/basic-linking-bundle/basic.macho.x86_64 -o %T/basic-linking-bundle/dsymdest/basic.macho.x86_64.dSYM
+RUN: llvm-dwarfdump %T/basic-linking-bundle/dsymdest/basic.macho.x86_64.dSYM/Contents/Resources/DWARF/basic.macho.x86_64 | FileCheck %S/basic-linking-x86.test
+RUN: FileCheck %s --input-file %T/basic-linking-bundle/dsymdest/basic.macho.x86_64.dSYM/Contents/Info.plist
+
+CHECK: <?xml version="1.0" encoding="UTF-8"?>
+CHECK-NEXT: <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+CHECK-NEXT: <plist version="1.0">
+CHECK-NEXT: <dict>
+CHECK-NEXT: <key>CFBundleDevelopmentRegion</key>
+CHECK-NEXT: <string>English</string>
+CHECK-NEXT: <key>CFBundleIdentifier</key>
+CHECK-NEXT: <string>com.apple.xcode.dsym.basic.macho.x86_64</string>
+CHECK-NEXT: <key>CFBundleInfoDictionaryVersion</key>
+CHECK-NEXT: <string>6.0</string>
+CHECK-NEXT: <key>CFBundlePackageType</key>
+CHECK-NEXT: <string>dSYM</string>
+CHECK-NEXT: <key>CFBundleSignature</key>
+CHECK-NEXT: <string>????</string>
+CHECK-NEXT: <key>CFBundleShortVersionString</key>
+CHECK-NEXT: <string>1.0</string>
+CHECK-NEXT: <key>CFBundleVersion</key>
+CHECK-NEXT: <string>1</string>
+CHECK-NEXT: </dict>
+CHECK-NEXT: </plist>
diff --git a/test/tools/dsymutil/X86/basic-linking-x86.test b/test/tools/dsymutil/X86/basic-linking-x86.test
index 19b4e3bef663..37797a323504 100644
--- a/test/tools/dsymutil/X86/basic-linking-x86.test
+++ b/test/tools/dsymutil/X86/basic-linking-x86.test
@@ -1,13 +1,12 @@
-REQUIRES: shell
RUN: cat %p/../Inputs/basic.macho.x86_64 > %t1
-RUN: llvm-dsymutil -oso-prepend-path=%p/.. %t1
+RUN: llvm-dsymutil -f -oso-prepend-path=%p/.. %t1
RUN: llvm-dwarfdump %t1.dwarf | FileCheck %s
-RUN: llvm-dsymutil -o %t2 -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64
+RUN: llvm-dsymutil -f -o %t2 -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64
RUN: llvm-dwarfdump %t2 | FileCheck %s
-RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC
-RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE
-RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dsymutil -y -o - - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC
-RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dsymutil -o - -y - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE
+RUN: llvm-dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC
+RUN: llvm-dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE
+RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dsymutil -f -y -o - - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC
+RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dsymutil -f -o - -y - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE
CHECK: file format Mach-O 64-bit x86-64
diff --git a/test/tools/dsymutil/X86/basic-lto-dw4-linking-x86.test b/test/tools/dsymutil/X86/basic-lto-dw4-linking-x86.test
index ad3ba5a15b6b..56b78588b758 100644
--- a/test/tools/dsymutil/X86/basic-lto-dw4-linking-x86.test
+++ b/test/tools/dsymutil/X86/basic-lto-dw4-linking-x86.test
@@ -1,5 +1,4 @@
-REQUIRES: shell
-RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-lto-dw4.macho.x86_64 | llvm-dwarfdump - | FileCheck %s
+RUN: llvm-dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-lto-dw4.macho.x86_64 | llvm-dwarfdump - | FileCheck %s
CHECK: file format Mach-O 64-bit x86-64
diff --git a/test/tools/dsymutil/X86/basic-lto-linking-x86.test b/test/tools/dsymutil/X86/basic-lto-linking-x86.test
index 395234e96166..68103aa44ca9 100644
--- a/test/tools/dsymutil/X86/basic-lto-linking-x86.test
+++ b/test/tools/dsymutil/X86/basic-lto-linking-x86.test
@@ -1,6 +1,5 @@
-REQUIRES: shell
-RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-lto.macho.x86_64 | llvm-dwarfdump - | FileCheck %s
-RUN: llvm-dsymutil -oso-prepend-path=%p/.. -dump-debug-map %p/../Inputs/basic-lto.macho.x86_64 | llvm-dsymutil -o - -y - | llvm-dwarfdump - | FileCheck %s
+RUN: llvm-dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-lto.macho.x86_64 | llvm-dwarfdump - | FileCheck %s
+RUN: llvm-dsymutil -oso-prepend-path=%p/.. -dump-debug-map %p/../Inputs/basic-lto.macho.x86_64 | llvm-dsymutil -f -o - -y - | llvm-dwarfdump - | FileCheck %s
CHECK: file format Mach-O 64-bit x86-64
@@ -117,7 +116,7 @@ CHECK: DW_AT_type [DW_FORM_ref_addr] (0x0000000000000063)
CHECK: DW_AT_location [DW_FORM_data4] (0x00000025)
CHECK: DW_TAG_lexical_block [14] *
CHECK: DW_AT_low_pc [DW_FORM_addr] (0x0000000100000f94)
-CHECK DW_AT_high_pc [DW_FORM_addr] (0x0000000100000fa7)
+CHECK: DW_AT_high_pc [DW_FORM_addr] (0x0000000100000fa7)
CHECK: DW_TAG_inlined_subroutine [15]
CHECK: DW_AT_abstract_origin [DW_FORM_ref4] (cu + 0x009a => {0x000001d4} "inc")
CHECK: DW_AT_ranges [DW_FORM_data4] (0x00000000
diff --git a/test/tools/dsymutil/X86/custom-line-table.test b/test/tools/dsymutil/X86/custom-line-table.test
new file mode 100644
index 000000000000..86fd7e294686
--- /dev/null
+++ b/test/tools/dsymutil/X86/custom-line-table.test
@@ -0,0 +1,40 @@
+# RUN: llvm-dsymutil -oso-prepend-path %p/../Inputs -y %s -f -o - | llvm-dwarfdump - -debug-dump=line | FileCheck %s
+
+# This test runs dsymutil on an object file with non-standard (as far
+# as llvm is concerned) line table settings.
+
+---
+triple: 'x86_64-apple-darwin'
+objects:
+ - filename: basic2-custom-linetable.macho.x86_64.o
+ symbols:
+ - { sym: _foo, objAddr: 0x0, binAddr: 0x1000, size: 0x12 }
+
+# CHECK: 0x0000000000001000 19 0 {{.*}} is_stmt
+# CHECK: 0x0000000000001012 20 14 {{.*}} is_stmt prologue_end
+# CHECK: 0x0000000000001015 20 18 {{.*}}
+# CHECK: 0x0000000000001017 20 17 {{.*}}
+# CHECK: 0x0000000000001019 20 10 {{.*}}
+# CHECK: 0x000000000000101e 20 25 {{.*}}
+# CHECK: 0x0000000000001026 20 23 {{.*}}
+# CHECK: 0x000000000000102b 20 36 {{.*}}
+# CHECK: 0x000000000000103c 20 31 {{.*}}
+# CHECK: 0x000000000000103e 20 3 {{.*}}
+# CHECK: 0x0000000000001046 20 3 {{.*}} end_sequence
+
+ - { sym: _inc, objAddr: 0x0, binAddr: 0x2000, size: 0x12 }
+
+# CHECK: 0x0000000000002000 14 0 {{.*}} is_stmt
+# CHECK: 0x0000000000002004 15 10 {{.*}} is_stmt prologue_end
+# CHECK: 0x0000000000002013 15 3 {{.*}}
+# CHECK: 0x0000000000002015 15 3 {{.*}} end_sequence
+
+ - { sym: _unused1, objAddr: 0x0, binAddr: 0x3000, size: 0x12 }
+
+# CHECK: 0x0000000000003000 10 0 {{.*}} is_stmt
+# CHECK: 0x0000000000003004 11 7 {{.*}} is_stmt prologue_end
+# CHECK: 0x000000000000300e 11 3 {{.*}}
+# CHECK: 0x0000000000003013 12 1 {{.*}} is_stmt
+# CHECK: 0x000000000000301c 12 1 {{.*}} is_stmt end_sequence
+...
+
diff --git a/test/tools/dsymutil/X86/dead-stripped.cpp b/test/tools/dsymutil/X86/dead-stripped.cpp
new file mode 100644
index 000000000000..ecab7580ec0f
--- /dev/null
+++ b/test/tools/dsymutil/X86/dead-stripped.cpp
@@ -0,0 +1,48 @@
+// RUN: llvm-dsymutil -f -y %p/dummy-debug-map.map -oso-prepend-path %p/../Inputs/dead-stripped -o - | llvm-dwarfdump - -debug-dump=info | FileCheck %s
+
+// The test was compiled with:
+// clang++ -O2 -g -c dead-strip.cpp -o 1.o
+
+// The goal of the test is to exercise dsymutil's behavior in presence of
+// functions/variables that have been dead-stripped by the linker but
+// that are still present in the linked debug info (in this case because
+// they have been DW_TAG_import'd in another namespace).
+
+// Everything in the N namespace bellow doesn't have a debug map entry, and
+// thus is considered dead (::foo() has a debug map entry, otherwse dsymutil
+// would just drop the CU altogether).
+
+namespace N {
+int blah = 42;
+// This is actually a dsymutil-classic bug that we reproduced
+// CHECK: DW_TAG_variable
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_location
+
+__attribute__((always_inline)) int foo() { return blah; }
+// CHECK: DW_TAG_subprogram
+// CHECK-NOT: DW_AT_low_pc
+// CHECK-NOT: DW_AT_high_pc
+// CHECK: DW_AT_frame_base
+
+// CHECK: DW_TAG_subprogram
+
+int bar(unsigned i) {
+ int val = foo();
+ if (i)
+ return val + bar(i-1);
+ return foo();
+}
+// CHECK: DW_TAG_subprogram
+// CHECK-NOT: DW_AT_low_pc
+// CHECK-NOT: DW_AT_high_pc
+// CHECK: DW_AT_frame_base
+// CHECK-NOT: DW_AT_location
+// CHECK-NOT: DW_AT_low_pc
+// CHECK-NOT: DW_AT_high_pc
+
+}
+// CHECK: TAG_imported_module
+using namespace N;
+
+void foo() {}
diff --git a/test/tools/dsymutil/X86/dsym-companion.test b/test/tools/dsymutil/X86/dsym-companion.test
new file mode 100644
index 000000000000..4327a2953d97
--- /dev/null
+++ b/test/tools/dsymutil/X86/dsym-companion.test
@@ -0,0 +1,339 @@
+RUN: llvm-dsymutil -o - %p/../Inputs/basic.macho.i386 -f | llvm-readobj -file-headers -program-headers -sections -symbols - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK32
+RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 -f | llvm-readobj -file-headers -program-headers -sections -symbols - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK64
+
+This test checks that the dSYM companion binaries generated in 32 and 64 bits
+are correct. The check are pretty strict (we check even the offsets and sizes
+of the sections) in order to test the VM address layout algorithm. As the
+debug sections are generated, this is a bit risky, but I don't expect
+llvm-dsymutil's output to change much for these tiny C programs so this should
+be OK.
+The 32bits version doesn't have object files, thus it has basically no debug
+sections.
+
+CHECK32: Format: Mach-O 32-bit i386
+CHECK32: Arch: i386
+CHECK32: AddressSize: 32bit
+CHECK64: Format: Mach-O 64-bit x86-64
+CHECK64: Arch: x86_64
+CHECK64: AddressSize: 64bit
+CHECK: MachHeader {
+CHECK32: Magic: Magic (0xFEEDFACE)
+CHECK32: CpuType: X86 (0x7)
+CHECK32: CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+CHECK64: Magic: Magic64 (0xFEEDFACF)
+CHECK64: CpuType: X86-64 (0x1000007)
+CHECK64: CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+CHECK: FileType: DWARFSymbol (0xA)
+CHECK: NumOfLoadCommands: 7
+CHECK: Flags [ (0x0)
+CHECK: }
+CHECK: Sections [
+CHECK: Section {
+CHECK: Name: __text
+CHECK: Segment: __TEXT
+CHECK32: Address: 0x1E90
+CHECK32: Size: 0x11A
+CHECK64: Address: 0x100000EA0
+CHECK64: Size: 0x109
+CHECK: Offset: 0
+CHECK: Alignment: 4
+CHECK: RelocationOffset: 0x0
+CHECK: RelocationCount: 0
+CHECK: Type: 0x0
+CHECK: Attributes [ (0x800004)
+CHECK: PureInstructions (0x800000)
+CHECK: SomeInstructions (0x4)
+CHECK: ]
+CHECK: Reserved1: 0x0
+CHECK: Reserved2: 0x0
+CHECK: }
+CHECK: Section {
+CHECK: Name: __unwind_info
+CHECK: Segment: __TEXT
+CHECK32: Address: 0x1FAC
+CHECK64: Address: 0x100000FAC
+CHECK: Size: 0x48
+CHECK: Offset: 0
+CHECK: Alignment: 2
+CHECK: RelocationOffset: 0x0
+CHECK: RelocationCount: 0
+CHECK: Type: 0x0
+CHECK: Attributes [ (0x0)
+CHECK: ]
+CHECK: Reserved1: 0x0
+CHECK: Reserved2: 0x0
+CHECK: }
+CHECK32: Section {
+CHECK32: Name: __nl_symbol_ptr
+CHECK32: Segment: __DATA
+CHECK32: Address: 0x2000
+CHECK32: Size: 0x4
+CHECK32: Offset: 0
+CHECK32: Alignment: 2
+CHECK32: RelocationOffset: 0x0
+CHECK32: RelocationCount: 0
+CHECK32: Type: 0x6
+CHECK32: Attributes [ (0x0)
+CHECK32: ]
+CHECK32: Reserved1: 0x0
+CHECK32: Reserved2: 0x0
+CHECK32: }
+CHECK: Section {
+CHECK: Name: __data
+CHECK: Segment: __DATA
+CHECK32: Address: 0x2004
+CHECK64: Address: 0x100001000
+CHECK: Size: 0x4
+CHECK: Offset: 0
+CHECK: Alignment: 2
+CHECK: RelocationOffset: 0x0
+CHECK: RelocationCount: 0
+CHECK: Type: 0x0
+CHECK: Attributes [ (0x0)
+CHECK: ]
+CHECK: Reserved1: 0x0
+CHECK: Reserved2: 0x0
+CHECK: }
+CHECK: Section {
+CHECK: Name: __common
+CHECK: Segment: __DATA
+CHECK32: Address: 0x2008
+CHECK64: Address: 0x100001004
+CHECK: Size: 0x4
+CHECK: Offset: 0
+CHECK: Alignment: 2
+CHECK: RelocationOffset: 0x0
+CHECK: RelocationCount: 0
+CHECK: Type: LocReloc (0x1)
+CHECK: Attributes [ (0x0)
+CHECK: ]
+CHECK: Reserved1: 0x0
+CHECK: Reserved2: 0x0
+CHECK: }
+CHECK: Section {
+CHECK: Name: __bss
+CHECK: Segment: __DATA
+CHECK32: Address: 0x200C
+CHECK64: Address: 0x100001008
+CHECK: Size: 0x4
+CHECK: Offset: 0
+CHECK: Alignment: 2
+CHECK: RelocationOffset: 0x0
+CHECK: RelocationCount: 0
+CHECK: Type: LocReloc (0x1)
+CHECK: Attributes [ (0x0)
+CHECK: ]
+CHECK: Reserved1: 0x0
+CHECK: Reserved2: 0x0
+CHECK: }
+CHECK64: Section {
+CHECK64: Name: __debug_line
+CHECK64: Segment: __DWARF
+CHECK64: Address: 0x100003000
+CHECK64: Size: 0xEA
+CHECK64: Offset: 8192
+CHECK64: Alignment: 0
+CHECK64: RelocationOffset: 0x0
+CHECK64: RelocationCount: 0
+CHECK64: Type: 0x0
+CHECK64: Attributes [ (0x0)
+CHECK64: ]
+CHECK64: Reserved1: 0x0
+CHECK64: Reserved2: 0x0
+CHECK64: }
+CHECK64: Section {
+CHECK64: Name: __debug_pubnames
+CHECK64: Segment: __DWARF
+CHECK64: Address: 0x1000030EA
+CHECK64: Size: 0x7F
+CHECK64: Offset: 8426
+CHECK64: Alignment: 0
+CHECK64: RelocationOffset: 0x0
+CHECK64: RelocationCount: 0
+CHECK64: Type: 0x0
+CHECK64: Attributes [ (0x0)
+CHECK64: ]
+CHECK64: Reserved1: 0x0
+CHECK64: Reserved2: 0x0
+CHECK64: }
+CHECK64: Section {
+CHECK64: Name: __debug_pubtypes
+CHECK64: Segment: __DWARF
+CHECK64: Address: 0x100003169
+CHECK64: Size: 0x57
+CHECK64: Offset: 8553
+CHECK64: Alignment: 0
+CHECK64: RelocationOffset: 0x0
+CHECK64: RelocationCount: 0
+CHECK64: Type: 0x0
+CHECK64: Attributes [ (0x0)
+CHECK64: ]
+CHECK64: Reserved1: 0x0
+CHECK64: Reserved2: 0x0
+CHECK64: }
+CHECK64: Section {
+CHECK64: Name: __debug_aranges
+CHECK64: Segment: __DWARF
+CHECK64: Address: 0x1000031C0
+CHECK64: Size: 0xB0
+CHECK64: Offset: 8640
+CHECK64: Alignment: 0
+CHECK64: RelocationOffset: 0x0
+CHECK64: RelocationCount: 0
+CHECK64: Type: 0x0
+CHECK64: Attributes [ (0x0)
+CHECK64: ]
+CHECK64: Reserved1: 0x0
+CHECK64: Reserved2: 0x0
+CHECK64: }
+CHECK64: Section {
+CHECK64: Name: __debug_info
+CHECK64: Segment: __DWARF
+CHECK64: Address: 0x100003270
+CHECK64: Size: 0x1BC
+CHECK64: Offset: 8816
+CHECK64: Alignment: 0
+CHECK64: RelocationOffset: 0x0
+CHECK64: RelocationCount: 0
+CHECK64: Type: 0x0
+CHECK64: Attributes [ (0x0)
+CHECK64: ]
+CHECK64: Reserved1: 0x0
+CHECK64: Reserved2: 0x0
+CHECK64: }
+CHECK: Section {
+CHECK: Name: __debug_abbrev
+CHECK: Segment: __DWARF
+CHECK32: Address: 0x4000
+CHECK32: Size: 0x1
+CHECK32: Offset: 8192
+CHECK64: Address: 0x10000342C
+CHECK64: Size: 0x8F
+CHECK64: Offset: 9260
+CHECK: Alignment: 0
+CHECK: RelocationOffset: 0x0
+CHECK: RelocationCount: 0
+CHECK: Type: 0x0
+CHECK: Attributes [ (0x0)
+CHECK: ]
+CHECK: Reserved1: 0x0
+CHECK: Reserved2: 0x0
+CHECK: }
+CHECK: Section {
+CHECK: Name: __debug_str
+CHECK: Segment: __DWARF
+CHECK32: Address: 0x4001
+CHECK32: Size: 0x1
+CHECK32: Offset: 8193
+CHECK64: Address: 0x1000034BB
+CHECK64: Size: 0x9F
+CHECK64: Offset: 9403
+CHECK: Alignment: 0
+CHECK: RelocationOffset: 0x0
+CHECK: RelocationCount: 0
+CHECK: Type: 0x0
+CHECK: Attributes [ (0x0)
+CHECK: ]
+CHECK: Reserved1: 0x0
+CHECK: Reserved2: 0x0
+CHECK: }
+CHECK: ]
+CHECK: Symbols [
+CHECK: Symbol {
+CHECK: Name: _inc (2)
+CHECK: Type: Section (0xE)
+CHECK: Section: __text
+CHECK: RefType: UndefinedNonLazy (0x0)
+CHECK: Flags [ (0x0)
+CHECK: ]
+CHECK32: Value: 0x1F20
+CHECK64: Value: 0x100000F20
+CHECK: }
+CHECK: Symbol {
+CHECK: Name: _inc (2)
+CHECK: Type: Section (0xE)
+CHECK: Section: __text
+CHECK: RefType: UndefinedNonLazy (0x0)
+CHECK: Flags [ (0x0)
+CHECK: ]
+CHECK32: Value: 0x1F90
+CHECK64: Value: 0x100000F90
+CHECK: }
+CHECK: Symbol {
+CHECK: Name: _baz (7)
+CHECK: Type: Section (0xE)
+CHECK: Section: __data
+CHECK: RefType: UndefinedNonLazy (0x0)
+CHECK: Flags [ (0x0)
+CHECK: ]
+CHECK32: Value: 0x2004
+CHECK64: Value: 0x100001000
+CHECK: }
+CHECK: Symbol {
+CHECK: Name: _private_int (12)
+CHECK: Type: Section (0xE)
+CHECK: Section: __bss
+CHECK: RefType: UndefinedNonLazy (0x0)
+CHECK: Flags [ (0x0)
+CHECK: ]
+CHECK32: Value: 0x200C
+CHECK64: Value: 0x100001008
+CHECK: }
+CHECK: Symbol {
+CHECK: Name: __mh_execute_header (25)
+CHECK: Extern
+CHECK: Type: Section (0xE)
+CHECK: Section: __text
+CHECK: RefType: UndefinedNonLazy (0x0)
+CHECK: Flags [ (0x10)
+CHECK: ReferencedDynamically (0x10)
+CHECK: ]
+CHECK32: Value: 0x1000
+CHECK64: Value: 0x100000000
+CHECK: }
+CHECK: Symbol {
+CHECK: Name: _bar (45)
+CHECK: Extern
+CHECK: Type: Section (0xE)
+CHECK: Section: __text
+CHECK: RefType: UndefinedNonLazy (0x0)
+CHECK: Flags [ (0x0)
+CHECK: ]
+CHECK32: Value: 0x1F40
+CHECK64: Value: 0x100000F40
+CHECK: }
+CHECK: Symbol {
+CHECK: Name: _foo (50)
+CHECK: Extern
+CHECK: Type: Section (0xE)
+CHECK: Section: __text
+CHECK: RefType: UndefinedNonLazy (0x0)
+CHECK: Flags [ (0x0)
+CHECK: ]
+CHECK32: Value: 0x1EC0
+CHECK64: Value: 0x100000ED0
+CHECK: }
+CHECK: Symbol {
+CHECK: Name: _main (55)
+CHECK: Extern
+CHECK: Type: Section (0xE)
+CHECK: Section: __text
+CHECK: RefType: UndefinedNonLazy (0x0)
+CHECK: Flags [ (0x0)
+CHECK: ]
+CHECK32: Value: 0x1E90
+CHECK64: Value: 0x100000EA0
+CHECK: }
+CHECK: Symbol {
+CHECK: Name: _val (61)
+CHECK: Extern
+CHECK: Type: Section (0xE)
+CHECK: Section: __common
+CHECK: RefType: UndefinedNonLazy (0x0)
+CHECK: Flags [ (0x0)
+CHECK: ]
+CHECK32: Value: 0x2008
+CHECK64: Value: 0x100001004
+CHECK: }
+CHECK: ]
+
diff --git a/test/tools/dsymutil/X86/dummy-debug-map.map b/test/tools/dsymutil/X86/dummy-debug-map.map
new file mode 100644
index 000000000000..f9bc7b099858
--- /dev/null
+++ b/test/tools/dsymutil/X86/dummy-debug-map.map
@@ -0,0 +1,22 @@
+# This is a dummy debug map used for some tests where the contents of the
+# map are just an implementation detail. The tests wanting to use that file
+# should put all there object files in an explicitely named sub-directory
+# of Inputs, and they should be named 1.o, 2.o, ...
+# As not finding an object file or symbols isn't a fatal error for dsymutil,
+# you can extend this file with as much object files and symbols as needed.
+
+---
+triple: 'x86_64-apple-darwin'
+objects:
+ - filename: 1.o
+ symbols:
+ - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x10000, size: 0x10 }
+ - filename: 2.o
+ symbols:
+ - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x20000, size: 0x10 }
+ - filename: 3.o
+ symbols:
+ - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x30000, size: 0x10 }
+ - { sym: __ZN1S3bazIiEEvT_, objAddr: 0x0, binAddr: 0x30010, size: 0x10 }
+...
+
diff --git a/test/tools/dsymutil/X86/empty_range.s b/test/tools/dsymutil/X86/empty_range.s
new file mode 100644
index 000000000000..dfe734f1b2bb
--- /dev/null
+++ b/test/tools/dsymutil/X86/empty_range.s
@@ -0,0 +1,61 @@
+# This test verifies that an empty range list in the .debug_ranges section
+# doesn't crash llvm-dsymutil. As clang does not produce this kind of debug
+# info anymore, we used this hand-crafted assembly file to produce a testcase
+# Compile with:
+# llvm-mc -triple x86_64-apple-darwin -filetype=obj -o 1.o empty_range.o
+
+# RUN: llvm-dsymutil -f -y %p/dummy-debug-map.map -oso-prepend-path %p/../Inputs/empty_range -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+ .section __TEXT,__text,regular,pure_instructions
+ .macosx_version_min 10, 11
+ .globl __Z3foov
+ .align 4, 0x90
+__Z3foov: ## @_Z3foov
+Lfunc_begin0:
+ pushq %rbp
+ movq %rsp, %rbp
+ popq %rbp
+ retq
+Lfunc_end0:
+ .section __DWARF,__debug_abbrev,regular,debug
+Lsection_abbrev:
+ .byte 1 ## Abbreviation Code
+ .byte 17 ## DW_TAG_compile_unit
+ .byte 1 ## DW_CHILDREN_yes
+ .byte 0 ## EOM(1)
+ .byte 0 ## EOM(2)
+ .byte 2 ## Abbreviation Code
+ .byte 46 ## DW_TAG_subprogram
+ .byte 0 ## DW_CHILDREN_no
+ .byte 17 ## DW_AT_low_pc
+ .byte 1 ## DW_FORM_addr
+ .byte 0x55 ## DW_AT_ranges
+ .byte 6 ## DW_FORM_data4
+ .byte 0 ## EOM(1)
+ .byte 0 ## EOM(2)
+ .byte 0 ## EOM(3)
+ .section __DWARF,__debug_info,regular,debug
+Lsection_info:
+ .long 22 ## Length of Unit
+ .short 2 ## DWARF version number
+ .long 0 ## Offset Into Abbrev. Section
+ .byte 8 ## Address Size (in bytes)
+ .byte 1 ## Abbrev [1] DW_TAG_compile_unit
+ .byte 2 ## Abbrev [2] DW_TAG_subprogram
+ .quad Lfunc_begin0 ## DW_AT_low_pc
+ .long 0 ## DW_AT_ranges (pointing at an empty entry)
+ .byte 0 ## End Of Children Mark
+ .section __DWARF,__debug_ranges,regular,debug
+Ldebug_range:
+ .long 0
+ .long 0
+
+# CHECK: DW_TAG_compile_unit
+# CHECK: DW_TAG_subprogram
+# CHECK-NEXT: DW_AT_low_pc{{.*}}(0x0000000000010000)
+# CHECK-NEXT: DW_AT_ranges{{.*}}(0x00000000)
+
+# There was a bug that would use the currently active object file when a
+# debug map object isn't found. Check that we only linked one file.
+# CHECK-NOT: DW_TAG_compile_unit
+
diff --git a/test/tools/dsymutil/X86/fat-archive-input-i386.test b/test/tools/dsymutil/X86/fat-archive-input-i386.test
new file mode 100644
index 000000000000..f4ea288768c8
--- /dev/null
+++ b/test/tools/dsymutil/X86/fat-archive-input-i386.test
@@ -0,0 +1,16 @@
+# REQUIRES: object-emission
+# RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs -y %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+---
+triple: 'i386-apple-darwin'
+objects:
+ - filename: libfat-test.a(fat-test.o)
+ symbols:
+ - { sym: _i386_var, objAddr: 0x0, binAddr: 0x1000, size: 0x4 }
+...
+
+# CHECK: .debug_info contents:
+# CHECK: DW_TAG_variable
+# CHECK-NOT: {{DW_TAG|NULL}}
+# CHECK: DW_AT_name{{.*}}"i386_var"
+
diff --git a/test/tools/dsymutil/X86/fat-object-input-x86_64.test b/test/tools/dsymutil/X86/fat-object-input-x86_64.test
new file mode 100644
index 000000000000..cdd5a4c08d9c
--- /dev/null
+++ b/test/tools/dsymutil/X86/fat-object-input-x86_64.test
@@ -0,0 +1,16 @@
+# REQUIRES: object-emission
+# RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs -y %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+---
+triple: 'x86_64-apple-darwin'
+objects:
+ - filename: fat-test.o
+ symbols:
+ - { sym: _x86_64_var, objAddr: 0x0, binAddr: 0x1000, size: 0x4 }
+...
+
+# CHECK: .debug_info contents:
+# CHECK: DW_TAG_variable
+# CHECK-NOT: {{DW_TAG|NULL}}
+# CHECK: DW_AT_name{{.*}}"x86_64_var"
+
diff --git a/test/tools/dsymutil/X86/fat-object-input-x86_64h.test b/test/tools/dsymutil/X86/fat-object-input-x86_64h.test
new file mode 100644
index 000000000000..53aed1ec4443
--- /dev/null
+++ b/test/tools/dsymutil/X86/fat-object-input-x86_64h.test
@@ -0,0 +1,16 @@
+# REQUIRES: object-emission
+# RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs -y %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+---
+triple: 'x86_64h-apple-darwin'
+objects:
+ - filename: fat-test.o
+ symbols:
+ - { sym: _x86_64h_var, objAddr: 0x0, binAddr: 0x1000, size: 0x4 }
+...
+
+# CHECK: .debug_info contents:
+# CHECK: DW_TAG_variable
+# CHECK-NOT: {{DW_TAG|NULL}}
+# CHECK: DW_AT_name{{.*}}"x86_64h_var"
+
diff --git a/test/tools/dsymutil/X86/frame-1.test b/test/tools/dsymutil/X86/frame-1.test
index 7852e68a142a..27bc17d75984 100644
--- a/test/tools/dsymutil/X86/frame-1.test
+++ b/test/tools/dsymutil/X86/frame-1.test
@@ -2,14 +2,14 @@
# RUN: rm -rf %t
# RUN: mkdir -p %t
# RUN: llc -filetype=obj %p/../Inputs/frame-dw2.ll -o %t/frame-dw2.o
-# RUN: llvm-dsymutil -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s
+# RUN: llvm-dsymutil -f -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s
# This test is meant to verify that identical CIEs will get reused
# in the same file but also inbetween files. For this to happen, we
# link twice the same file using this made-up debug map:
---
-triple: 'i386-unknown-unknown-macho'
+triple: 'i386-apple-darwin'
objects:
- filename: frame-dw2.o
symbols:
diff --git a/test/tools/dsymutil/X86/frame-2.test b/test/tools/dsymutil/X86/frame-2.test
index 168e342a4f74..89a7670f86f0 100644
--- a/test/tools/dsymutil/X86/frame-2.test
+++ b/test/tools/dsymutil/X86/frame-2.test
@@ -3,7 +3,7 @@
# RUN: mkdir -p %t
# RUN: llc -filetype=obj %p/../Inputs/frame-dw2.ll -o %t/frame-dw2.o
# RUN: llc -filetype=obj %p/../Inputs/frame-dw4.ll -o %t/frame-dw4.o
-# RUN: llvm-dsymutil -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s
+# RUN: llvm-dsymutil -f -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s
# Check the handling of multiple different CIEs. To have CIEs that
# appear to be different, use a dwarf2 version of the file along with
@@ -12,7 +12,7 @@
# appears again. This is a behavior we inherited from dsymutil-classic
# but this should be fixed (see comment in patchFrameInfoForObject())
---
-triple: 'i386-unknown-unknown-macho'
+triple: 'i386-apple-darwin'
objects:
- filename: frame-dw2.o
symbols:
diff --git a/test/tools/dsymutil/X86/lit.local.cfg b/test/tools/dsymutil/X86/lit.local.cfg
index c8625f4d9d24..05f8b38b3346 100644
--- a/test/tools/dsymutil/X86/lit.local.cfg
+++ b/test/tools/dsymutil/X86/lit.local.cfg
@@ -1,2 +1,4 @@
if not 'X86' in config.root.targets:
config.unsupported = True
+
+config.suffixes = ['.test', '.cpp', '.m', '.s']
diff --git a/test/tools/dsymutil/X86/mismatch.m b/test/tools/dsymutil/X86/mismatch.m
new file mode 100644
index 000000000000..33ae782df057
--- /dev/null
+++ b/test/tools/dsymutil/X86/mismatch.m
@@ -0,0 +1,23 @@
+/* Compile with:
+ cat >modules.modulemap <<EOF
+ module mismatch {
+ header "mismatch.h"
+ }
+ EOF
+ echo "struct s;"> mismatch.h
+ clang -cc1 -emit-obj -fmodules -fmodule-map-file=modules.modulemap \
+ -fmodule-format=obj -g -dwarf-ext-refs -fmodules-cache-path=. \
+ -fdisable-module-hash mismatch.m -o 1.o
+ echo > mismatch.h
+ clang -cc1 -emit-obj -fmodules -fmodule-map-file=modules.modulemap \
+ -fmodule-format=obj -g -dwarf-ext-refs -fmodules-cache-path=. \
+ -fdisable-module-hash mismatch.m -o /dev/null
+*/
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/mismatch \
+// RUN: -y %p/dummy-debug-map.map -o %t.bin 2>&1 | FileCheck %s
+
+@import mismatch;
+
+void f() {}
+// CHECK: warning: hash mismatch
diff --git a/test/tools/dsymutil/X86/modules.m b/test/tools/dsymutil/X86/modules.m
new file mode 100644
index 000000000000..046a8c1304a0
--- /dev/null
+++ b/test/tools/dsymutil/X86/modules.m
@@ -0,0 +1,117 @@
+/* Compile with:
+ cat >modules.modulemap <<EOF
+ module Foo {
+ header "Foo.h"
+ export *
+ }
+ module Bar {
+ header "Bar.h"
+ export *
+ }
+EOF
+ clang -D BAR_H -E -o Bar.h modules.m
+ clang -D FOO_H -E -o Foo.h modules.m
+ clang -cc1 -emit-obj -fmodules -fmodule-map-file=modules.modulemap \
+ -fmodule-format=obj -g -dwarf-ext-refs -fmodules-cache-path=. \
+ -fdisable-module-hash modules.m -o 1.o
+*/
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/modules \
+// RUN: -y %p/dummy-debug-map.map -o - \
+// RUN: | llvm-dwarfdump --debug-dump=info - | FileCheck %s
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/modules -y \
+// RUN: %p/dummy-debug-map.map -o %t 2>&1 | FileCheck --check-prefix=WARN %s
+
+// WARN-NOT: warning: hash mismatch
+
+// ---------------------------------------------------------------------
+#ifdef BAR_H
+// ---------------------------------------------------------------------
+// CHECK: DW_TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: DW_TAG_module
+// CHECK-NEXT: DW_AT_name{{.*}}"Bar"
+// CHECK: 0x0[[BAR:.*]]: DW_TAG_structure_type
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name {{.*}}"Bar"
+// CHECK-NOT: DW_TAG
+// CHECK: DW_TAG_member
+// CHECK: DW_AT_name {{.*}}"value"
+// CHECK: DW_TAG_structure_type
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name {{.*}}"PruneMeNot"
+
+struct Bar {
+ int value;
+};
+
+struct PruneMeNot;
+
+#else
+// ---------------------------------------------------------------------
+#ifdef FOO_H
+// ---------------------------------------------------------------------
+// CHECK: DW_TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: 0x0[[FOO:.*]]: DW_TAG_module
+// CHECK-NEXT: DW_AT_name{{.*}}"Foo"
+// CHECK-NOT: DW_TAG
+// CHECK: DW_TAG_typedef
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_type [DW_FORM_ref_addr] (0x{{0*}}[[BAR]])
+// CHECK: DW_TAG_structure_type
+// CHECK-NEXT: DW_AT_name{{.*}}"S"
+// CHECK-NOT: DW_TAG
+// CHECK: 0x0[[INTERFACE:.*]]: DW_TAG_structure_type
+// CHECK-NEXT: DW_AT_name{{.*}}"Foo"
+
+@import Bar;
+typedef struct Bar Bar;
+struct S {};
+
+@interface Foo {
+ int ivar;
+}
+@end
+
+// ---------------------------------------------------------------------
+#else
+// ---------------------------------------------------------------------
+
+// CHECK: DW_TAG_compile_unit
+// CHECK: DW_AT_low_pc
+// CHECK-NOT:DW_TAG
+// CHECK: DW_TAG_module
+// CHECK-NEXT: DW_AT_name{{.*}}"Foo"
+// CHECK-NOT: DW_TAG
+// CHECK: DW_TAG_typedef
+// CHECK-NOT: DW_TAG
+// CHECK: NULL
+//
+// CHECK: DW_TAG_imported_declaration
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_import [DW_FORM_ref_addr] (0x{{0*}}[[FOO]]
+//
+// CHECK: DW_TAG_subprogram
+// CHECK: DW_AT_name {{.*}}"main"
+//
+// CHECK: DW_TAG_variable
+// CHECK: DW_TAG_variable
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name{{.*}}"foo"
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_type {{.*}}{0x{{0*}}[[PTR:.*]]}
+//
+// CHECK: 0x{{0*}}[[PTR]]: DW_TAG_pointer_type
+// CHECK-NEXT DW_AT_type [DW_FORM_ref_addr] {0x{{0*}}[[INTERFACE]])
+
+@import Foo;
+int main(int argc, char **argv) {
+ Bar bar;
+ Foo *foo = 0;
+ bar.value = 42;
+ return bar.value;
+}
+#endif
+#endif
diff --git a/test/tools/dsymutil/X86/multiple-inputs.test b/test/tools/dsymutil/X86/multiple-inputs.test
new file mode 100644
index 000000000000..01b4f1bf52b5
--- /dev/null
+++ b/test/tools/dsymutil/X86/multiple-inputs.test
@@ -0,0 +1,31 @@
+RUN: rm -rf %T/multiple-inputs
+RUN: mkdir -p %T/multiple-inputs
+
+RUN: cat %p/../Inputs/basic.macho.x86_64 > %T/multiple-inputs/basic.macho.x86_64
+RUN: cat %p/../Inputs/basic-archive.macho.x86_64 > %T/multiple-inputs/basic-archive.macho.x86_64
+RUN: cat %p/../Inputs/basic-lto.macho.x86_64 > %T/multiple-inputs/basic-lto.macho.x86_64
+RUN: cat %p/../Inputs/basic-lto-dw4.macho.x86_64 > %T/multiple-inputs/basic-lto-dw4.macho.x86_64
+
+# Multiple inputs in flat mode
+RUN: llvm-dsymutil -f -oso-prepend-path=%p/.. %T/multiple-inputs/basic.macho.x86_64 %T/multiple-inputs/basic-archive.macho.x86_64 %T/multiple-inputs/basic-lto.macho.x86_64 %T/multiple-inputs/basic-lto-dw4.macho.x86_64
+RUN: llvm-dwarfdump %T/multiple-inputs/basic.macho.x86_64.dwarf \
+RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=BASIC
+RUN: llvm-dwarfdump %T/multiple-inputs/basic-archive.macho.x86_64.dwarf \
+RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=ARCHIVE
+RUN: llvm-dwarfdump %T/multiple-inputs/basic-lto.macho.x86_64.dwarf | FileCheck %S/basic-lto-linking-x86.test
+RUN: llvm-dwarfdump %T/multiple-inputs/basic-lto-dw4.macho.x86_64.dwarf | FileCheck %S/basic-lto-dw4-linking-x86.test
+
+# Multiple inputs that end up in the same named bundle
+RUN: llvm-dsymutil -oso-prepend-path=%p/.. %T/multiple-inputs/basic.macho.x86_64 %T/multiple-inputs/basic-archive.macho.x86_64 %T/multiple-inputs/basic-lto.macho.x86_64 %T/multiple-inputs/basic-lto-dw4.macho.x86_64 -o %t.dSYM
+RUN: llvm-dwarfdump %t.dSYM/Contents/Resources/DWARF/basic.macho.x86_64 \
+RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=BASIC
+RUN: llvm-dwarfdump %t.dSYM/Contents/Resources/DWARF/basic-archive.macho.x86_64 \
+RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=ARCHIVE
+RUN: llvm-dwarfdump %t.dSYM/Contents/Resources/DWARF/basic-lto.macho.x86_64 | FileCheck %S/basic-lto-linking-x86.test
+RUN: llvm-dwarfdump %t.dSYM/Contents/Resources/DWARF/basic-lto-dw4.macho.x86_64 | FileCheck %S/basic-lto-dw4-linking-x86.test
+
+# Multiple inputs in a named bundle in flat mode... impossible.
+RUN: not llvm-dsymutil -f -oso-prepend-path=%p/.. %T/multiple-inputs/basic.macho.x86_64 %T/multiple-inputs/basic-archive.macho.x86_64 %T/multiple-inputs/basic-lto.macho.x86_64 %T/multiple-inputs/basic-lto-dw4.macho.x86_64 -o %t.dSYM 2>&1 | FileCheck %s
+
+CHECK: error: cannot use -o with multiple inputs in flat mode
+
diff --git a/test/tools/dsymutil/X86/odr-anon-namespace.cpp b/test/tools/dsymutil/X86/odr-anon-namespace.cpp
new file mode 100644
index 000000000000..a66fc830b67a
--- /dev/null
+++ b/test/tools/dsymutil/X86/odr-anon-namespace.cpp
@@ -0,0 +1,65 @@
+/* Compile with:
+ for FILE in `seq 2`; do
+ clang -g -c odr-anon-namespace.cpp -DFILE$FILE -o odr-anon-namespace/$FILE.o
+ done
+ */
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/odr-anon-namespace -y %p/dummy-debug-map.map -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+#ifdef FILE1
+// Currently llvm-dsymutil will unique the contents of anonymous
+// namespaces if they are from the same file/line. Force this
+// namespace to appear different eventhough it's the same (this
+// uniquing is actually a bug kept for backward compatibility, see the
+// comments in DeclContextTree::getChildDeclContext()).
+#line 42
+#endif
+namespace {
+class C {};
+}
+
+void foo() {
+ C c;
+}
+
+// Keep the ifdef guards for FILE1 and FILE2 even if all the code is
+// above to clearly show what the CHECK lines are testing.
+#ifdef FILE1
+
+// CHECK: TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: AT_name{{.*}}"odr-anon-namespace.cpp"
+
+// CHECK: DW_TAG_variable
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name {{.*}}"c"
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_type {{.*}}0x00000000[[C_FILE1:[0-9a-f]*]]
+
+// CHECK: DW_TAG_namespace
+// CHECK-NOT: {{DW_AT_name|NULL|DW_TAG}}
+// CHECK: 0x[[C_FILE1]]:{{.*}}DW_TAG_class_type
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name{{.*}}"C"
+
+#elif defined(FILE2)
+
+// CHECK: TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: AT_name{{.*}}"odr-anon-namespace.cpp"
+
+// CHECK: DW_TAG_variable
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name {{.*}}"c"
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_type {{.*}}0x00000000[[C_FILE2:[0-9a-f]*]]
+
+// CHECK: DW_TAG_namespace
+// CHECK-NOT: {{DW_AT_name|NULL|DW_TAG}}
+// CHECK: 0x[[C_FILE2]]:{{.*}}DW_TAG_class_type
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name{{.*}}"C"
+
+#else
+#error "You must define which file you generate"
+#endif
diff --git a/test/tools/dsymutil/X86/odr-member-functions.cpp b/test/tools/dsymutil/X86/odr-member-functions.cpp
new file mode 100644
index 000000000000..737d5a7abf61
--- /dev/null
+++ b/test/tools/dsymutil/X86/odr-member-functions.cpp
@@ -0,0 +1,109 @@
+/* Compile with:
+ for FILE in `seq 3`; do
+ clang -g -c odr-member-functions.cpp -DFILE$FILE -o odr-member-functions/$FILE.o
+ done
+ */
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/odr-member-functions -y %p/dummy-debug-map.map -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+struct S {
+ __attribute__((always_inline)) void foo() { bar(); }
+ __attribute__((always_inline)) void foo(int i) { if (i) bar(); }
+ void bar();
+
+ template<typename T> void baz(T t) {}
+};
+
+#ifdef FILE1
+void foo() {
+ S s;
+}
+
+// CHECK: TAG_compile_unit
+// CHECK-NOT: {{DW_TAG|NULL}}
+// CHECK: AT_name{{.*}}"odr-member-functions.cpp"
+
+// CHECK: 0x[[S:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// CHECK-NOT: {{DW_TAG|NULL}}
+// CHECK: DW_AT_name{{.*}}"S"
+// CHECK-NOT: NULL
+// CHECK: 0x[[FOO:[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+// CHECK-NEXT: DW_AT_MIPS_linkage_name{{.*}}"_ZN1S3fooEv"
+// CHECK: NULL
+// CHECK: 0x[[FOOI:[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+// CHECK-NEXT: DW_AT_MIPS_linkage_name{{.*}}"_ZN1S3fooEi"
+
+#elif defined(FILE2)
+void foo() {
+ S s;
+ // Check that the overloaded member functions are resolved correctly
+ s.foo();
+ s.foo(1);
+}
+
+// CHECK: TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: AT_name{{.*}}"odr-member-functions.cpp"
+
+// Normal member functions should be desribed by the type in the first
+// CU, thus we should be able to reuse its definition and avoid
+// reemiting it.
+// CHECK-NOT: DW_TAG_structure_type
+
+// CHECK: 0x[[FOO_SUB:[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+// CHECK-NEXT: DW_AT_specification{{.*}}[[FOO]]
+// CHECK-NOT: DW_TAG_structure_type
+// CHECK: 0x[[FOOI_SUB:[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+// CHECK-NEXT: DW_AT_specification{{.*}}[[FOOI]]
+// CHECK-NOT: DW_TAG_structure_type
+
+// CHECK: DW_TAG_variable
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name {{.*}}"s"
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_type {{.*}}[[S]])
+// CHECK: DW_TAG_inlined_subroutine
+// CHECK-NEXT: DW_AT_abstract_origin{{.*}}[[FOO_SUB]]
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_call_line{{.*}}40
+// CHECK: DW_TAG_inlined_subroutine
+// CHECK-NEXT: DW_AT_abstract_origin{{.*}}[[FOOI_SUB]]
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_call_line{{.*}}41
+
+#elif defined(FILE3)
+void foo() {
+ S s;
+ s.baz<int>(42);
+}
+
+// CHECK: TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: AT_name{{.*}}"odr-member-functions.cpp"
+
+// Template or other implicit members will be included in the type
+// only if they are generated. Thus actually creating a new type.
+// CHECK: DW_TAG_structure_type
+
+// Skip 'normal' member functions
+// CHECK: DW_TAG_subprogram
+// CHECK: DW_TAG_subprogram
+// CHECK: DW_TAG_subprogram
+
+// This is the 'baz' member
+// CHECK: 0x[[BAZ:[0-9a-f]*]]: DW_TAG_subprogram
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_MIPS_linkage_name {{.*}}"_ZN1S3bazIiEEvT_"
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name {{.*}}"baz<int>"
+
+// Skip foo3
+// CHECK: DW_TAG_subprogram
+
+// baz instanciation:
+// CHECK: DW_TAG_subprogram
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_specification {{.*}}[[BAZ]] "_ZN1S3bazIiEEvT_"
+#else
+#error "You must define which file you generate"
+#endif
diff --git a/test/tools/dsymutil/X86/odr-uniquing.cpp b/test/tools/dsymutil/X86/odr-uniquing.cpp
new file mode 100644
index 000000000000..bb7ae50a2c72
--- /dev/null
+++ b/test/tools/dsymutil/X86/odr-uniquing.cpp
@@ -0,0 +1,187 @@
+/* Compile with:
+ clang -g -c odr-uniquing.cpp -o odr-uniquing/1.o
+ cp odr-uniquing/1.o odr-uniquing/2.o
+ The aim of these test is to check that all the 'type types' that
+ should be uniqued through the ODR really are.
+
+ The resulting object file is linked against itself using a fake
+ debug map. The end result is:
+ - with ODR uniquing: all types (expect for the union for now) in
+ the second CU should point back to the types of the first CU.
+ - without ODR uniquing: all types are re-emited in the second CU
+ */
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing -y %p/dummy-debug-map.map -o - | llvm-dwarfdump -debug-dump=info - | FileCheck -check-prefix=ODR -check-prefix=CHECK %s
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing -y %p/dummy-debug-map.map -no-odr -o - | llvm-dwarfdump -debug-dump=info - | FileCheck -check-prefix=NOODR -check-prefix=CHECK %s
+
+// The first compile unit contains all the types:
+// CHECK: TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: AT_name{{.*}}"odr-uniquing.cpp"
+
+struct S {
+ struct Nested {};
+};
+
+// CHECK: 0x[[S:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// CHECK-NEXT: DW_AT_name{{.*}}"S"
+// CHECK-NOT: NULL
+// CHECK: 0x[[NESTED:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name{{.*}}"Nested"
+// CHECK: NULL
+
+namespace N {
+class C {};
+}
+
+// CHECK: DW_TAG_namespace
+// CHECK-NEXT: DW_AT_name{{.*}}"N"
+// CHECK-NOT: NULL
+// CHECK: 0x[[NC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type
+// CHECK-NEXT: DW_AT_name{{.*}}"C"
+// CHECK: NULL
+
+union U {
+ class C {} C;
+ struct S {} S;
+};
+
+// CHECK: 0x[[U:[0-9a-f]*]]:{{.*}}DW_TAG_union_type
+// CHECK-NEXT: DW_AT_name{{.*}}"U"
+// CHECK-NOT: NULL
+// CHECK: 0x[[UC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type
+// CHECK-NOT: NULL
+// CHECK: 0x[[US:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// CHECK: NULL
+
+typedef S AliasForS;
+
+// CHECK: 0x[[ALIASFORS:[0-9a-f]*]]:{{.*}}DW_TAG_typedef
+// CHECK-NEXT: DW_AT_type{{.*}}[[S]]
+// CHECK-NEXT: DW_AT_name{{.*}}"AliasForS"
+
+namespace {
+class AnonC {};
+}
+
+// CHECK: DW_TAG_namespace
+// CHECK-NOT: {{DW_AT_name|NULL|DW_TAG}}
+// CHECK: 0x[[ANONC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type
+// CHECK-NEXT: DW_AT_name{{.*}}"AnonC"
+
+// This function is only here to hold objects that refer to the above types.
+void foo() {
+ AliasForS s;
+ S::Nested n;
+ N::C nc;
+ AnonC ac;
+ U u;
+}
+
+// The second CU contents depend on wether we disabled ODR uniquing or
+// not.
+
+// CHECK: TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: AT_name{{.*}}"odr-uniquing.cpp"
+
+// The union itself is not uniqued for now (for dsymutil-compatibility),
+// but the types defined inside it should be.
+// ODR: DW_TAG_union_type
+// ODR-NEXT: DW_AT_name{{.*}}"U"
+// ODR: DW_TAG_member
+// ODR-NEXT: DW_AT_name{{.*}}"C"
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_type{{.*}}[[UC]]
+// ODR: DW_TAG_member
+// ODR-NEXT: DW_AT_name{{.*}}"S"
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_type{{.*}}[[US]]
+
+// Check that the variables point to the right type
+// ODR: DW_TAG_subprogram
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_name{{.*}}"foo"
+// ODR-NOT: NULL
+// ODR: DW_TAG_variable
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_name{{.*}}"s"
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_type{{.*}}[[ALIASFORS]]
+// ODR: DW_AT_name{{.*}}"n"
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_type{{.*}}[[NESTED]]
+// ODR: DW_TAG_variable
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_name{{.*}}"nc"
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_type{{.*}}[[NC]]
+// ODR: DW_TAG_variable
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_name{{.*}}"ac"
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_type{{.*}}[[ANONC]]
+
+// With no ODR uniquing, we should get copies of all the types:
+
+// This is "struct S"
+// NOODR: 0x[[DUP_S:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// NOODR-NEXT: DW_AT_name{{.*}}"S"
+// NOODR-NOT: NULL
+// NOODR: 0x[[DUP_NESTED:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"Nested"
+
+// This is "class N::C"
+// NOODR: DW_TAG_namespace
+// NOODR-NEXT: DW_AT_name{{.*}}"N"
+// NOODR: 0x[[DUP_NC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type
+// NOODR-NEXT: DW_AT_name{{.*}}"C"
+
+// This is "union U"
+// NOODR: 0x[[DUP_U:[0-9a-f]*]]:{{.*}}DW_TAG_union_type
+// NOODR-NEXT: DW_AT_name{{.*}}"U"
+// NOODR-NOT: NULL
+// NOODR: 0x[[DUP_UC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type
+// NOODR-NOT: NULL
+// NOODR: 0x[[DUP_US:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// NOODR: NULL
+
+// Check that the variables point to the right type
+// NOODR: DW_TAG_subprogram
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"foo"
+// NOODR-NOT: NULL
+// NOODR: DW_TAG_variable
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"s"
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_type{{.*}}0x[[DUP_ALIASFORS:[0-9a-f]*]]
+// NOODR: DW_TAG_variable
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"n"
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_type{{.*}}[[DUP_NESTED]]
+// NOODR: DW_TAG_variable
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"nc"
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_type{{.*}}[[DUP_NC]]
+// NOODR: DW_TAG_variable
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"ac"
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_type{{.*}}0x[[DUP_ANONC:[0-9a-f]*]]
+
+// This is "AliasForS"
+// NOODR: 0x[[DUP_ALIASFORS]]:{{.*}}DW_TAG_typedef
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"AliasForS"
+
+// This is "(anonymous namespace)::AnonC"
+// NOODR: DW_TAG_namespace
+// NOODR-NOT: {{DW_AT_name|NULL|DW_TAG}}
+// NOODR: 0x[[DUP_ANONC]]:{{.*}}DW_TAG_class_type
+// NOODR-NEXT: DW_AT_name{{.*}}"AnonC"
+
diff --git a/test/tools/dsymutil/X86/submodules.m b/test/tools/dsymutil/X86/submodules.m
new file mode 100644
index 000000000000..b2425a91cbda
--- /dev/null
+++ b/test/tools/dsymutil/X86/submodules.m
@@ -0,0 +1,52 @@
+/* Compile with:
+ cat >modules.modulemap <<EOF
+ module Parent {
+ module Child {
+ header "Child.h"
+ }
+ module Empty {
+ header "Empty.h"
+ }
+ }
+EOF
+ clang -D CHILD_H -E -o Child.h submodules.m
+ touch empty.h
+ clang -cc1 -emit-obj -fmodules -fmodule-map-file=modules.modulemap \
+ -fmodule-format=obj -g -dwarf-ext-refs -fmodules-cache-path=. \
+ -fdisable-module-hash submodules.m -o 1.o
+*/
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/submodules \
+// RUN: -y %p/dummy-debug-map.map -o - \
+// RUN: | llvm-dwarfdump --debug-dump=info - | FileCheck %s
+
+// ---------------------------------------------------------------------
+#ifdef CHILD_H
+// ---------------------------------------------------------------------
+
+// CHECK: DW_TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: DW_TAG_module
+// CHECK-NEXT: DW_AT_name{{.*}}"Parent"
+// CHECK: DW_TAG_module
+// CHECK-NEXT: DW_AT_name{{.*}}"Child"
+// CHECK: DW_TAG_structure_type
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name {{.*}}"PruneMeNot"
+struct PruneMeNot;
+
+// ---------------------------------------------------------------------
+#else
+// ---------------------------------------------------------------------
+
+// CHECK: DW_TAG_compile_unit
+// CHECK: DW_TAG_module
+// CHECK-NEXT: DW_AT_name{{.*}}"Parent"
+// CHECK: 0x0[[EMPTY:.*]]: DW_TAG_module
+// CHECK-NEXT: DW_AT_name{{.*}}"Empty"
+
+// CHECK: DW_AT_import {{.*}}0x{{0*}}[[EMPTY]]
+@import Parent.Child;
+@import Parent.Empty;
+int main(int argc, char **argv) { return 0; }
+#endif
diff --git a/test/tools/dsymutil/absolute_symbol.test b/test/tools/dsymutil/absolute_symbol.test
new file mode 100644
index 000000000000..cdd6ae832136
--- /dev/null
+++ b/test/tools/dsymutil/absolute_symbol.test
@@ -0,0 +1,16 @@
+RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path %p %p/Inputs/absolute_sym.macho.i386 | FileCheck %s
+
+The tested object file has been created by the dummy Objective-C code:
+@interface Foo
+@end
+
+@implementation Foo
+@end
+
+int main() { return 0; }
+
+compiled for i386. This create an absolute symbol .objc_class_name_Foo
+We must not consider this symbol for debug info linking as its address
+might conflict with other real symbols in the same file.
+
+CHECK-NOT: objc_class_name_Foo
diff --git a/test/tools/dsymutil/arch-option.test b/test/tools/dsymutil/arch-option.test
new file mode 100644
index 000000000000..0920efb529d6
--- /dev/null
+++ b/test/tools/dsymutil/arch-option.test
@@ -0,0 +1,39 @@
+Processing of the -arch option happens at debug map parsing time, thus just
+looking at the dumped debug maps is enough to validate their effects.
+
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib | FileCheck %s -check-prefix=ARM64 -check-prefix=ARMV7S -check-prefix=ARMV7 -check-prefix=CHECK
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch all | FileCheck %s -check-prefix=ARM64 -check-prefix=ARMV7S -check-prefix=ARMV7 -check-prefix=CHECK
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch='*' | FileCheck %s -check-prefix=ARM64 -check-prefix=ARMV7S -check-prefix=ARMV7 -check-prefix=CHECK
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm64 | FileCheck %s -check-prefix=ARM64 -check-prefix=CHECK
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm | FileCheck %s -check-prefix=ARMV7S -check-prefix=ARMV7 -check-prefix=CHECK
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch armv7 | FileCheck %s -check-prefix=ARMV7 -check-prefix=CHECK
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm64 -arch armv7s | FileCheck %s -check-prefix=ARM64 -check-prefix=ARMV7S -check-prefix=CHECK
+RUN: not llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm42 2>&1 | FileCheck %s -check-prefix=BADARCH
+RUN: not llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch i386 2>&1 | FileCheck %s -check-prefix=EMPTY
+
+
+ARMV7: ---
+ARMV7-NOT: ...
+ARMV7: triple: 'thumbv7-apple-darwin'
+ARMV7-NOT: ...
+ARMV7: sym: _armv7_var
+ARMV7-NOT: ---
+
+ARMV7S: ---
+ARMV7S-NOT: ...
+ARMV7S: triple: 'thumbv7s-apple-darwin'
+ARMV7S-NOT: ...
+ARMV7S: sym: _armv7s_var
+ARMV7S-NOT: ---
+
+ARM64: ---
+ARM64-NOT: ...
+ARM64: triple: 'arm64-apple-darwin'
+ARM64-NOT: ...
+ARM64: sym: _arm64_var
+ARM64-NOT: ---
+
+CHECK: ...
+
+BADARCH: error: Unsupported cpu architecture: 'arm42'
+EMPTY: error: no architecture to link
diff --git a/test/tools/dsymutil/archive-timestamp.test b/test/tools/dsymutil/archive-timestamp.test
new file mode 100644
index 000000000000..f3f2162fa595
--- /dev/null
+++ b/test/tools/dsymutil/archive-timestamp.test
@@ -0,0 +1,24 @@
+# RUN: llvm-dsymutil -no-output -oso-prepend-path=%p -y %s 2>&1 | FileCheck %s
+
+# This is the archive member part of basic-archive.macho.x86_64 debug map with corrupted timestamps.
+
+# CHECK: warning: {{.*}}libbasic.a(basic2.macho.x86_64.o): {{[Nn]o}} such file
+# CHECK: warning: {{.*}}libbasic.a(basic3.macho.x86_64.o): {{[Nn]o}} such file
+
+---
+triple: 'x86_64-apple-darwin'
+objects:
+ - filename: '/Inputs/libbasic.a(basic2.macho.x86_64.o)'
+ timestamp: 141869239
+ symbols:
+ - { sym: _foo, objAddr: 0x0000000000000020, binAddr: 0x0000000100000ED0, size: 0x00000050 }
+ - { sym: _private_int, objAddr: 0x0000000000000560, binAddr: 0x0000000100001004, size: 0x00000000 }
+ - { sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F20, size: 0x00000017 }
+ - { sym: _baz, objAddr: 0x0000000000000310, binAddr: 0x0000000100001000, size: 0x00000000 }
+ - filename: '/Inputs/libbasic.a(basic3.macho.x86_64.o)'
+ timestamp: 418692393
+ symbols:
+ - { sym: _val, objAddr: 0x0000000000000004, binAddr: 0x0000000100001008, size: 0x00000000 }
+ - { sym: _bar, objAddr: 0x0000000000000020, binAddr: 0x0000000100000F40, size: 0x00000050 }
+ - { sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F90, size: 0x00000019 }
+...
diff --git a/test/tools/dsymutil/basic-linking.test b/test/tools/dsymutil/basic-linking.test
index ec6a5b771461..bff5b5df9e66 100644
--- a/test/tools/dsymutil/basic-linking.test
+++ b/test/tools/dsymutil/basic-linking.test
@@ -1,6 +1,7 @@
-RUN: llvm-dsymutil -no-output -v -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s
-RUN: llvm-dsymutil -no-output -v -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO
-RUN: llvm-dsymutil -no-output -v -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE
+RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s
+RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO
+RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE
+RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 %p/Inputs/basic-lto.macho.x86_64 %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LTO --check-prefix=CHECK-ARCHIVE
This test check the basic Dwarf linking process through the debug dumps.
diff --git a/test/tools/dsymutil/debug-map-parsing.test b/test/tools/dsymutil/debug-map-parsing.test
index 5091dfbfc24d..2b9d0917609d 100644
--- a/test/tools/dsymutil/debug-map-parsing.test
+++ b/test/tools/dsymutil/debug-map-parsing.test
@@ -1,15 +1,16 @@
-RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s
-RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO
-RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE
-RUN: llvm-dsymutil -v -dump-debug-map %p/Inputs/basic.macho.x86_64 2>&1 | FileCheck %s --check-prefix=NOT-FOUND
-RUN: not llvm-dsymutil -v -dump-debug-map %p/Inputs/inexistant 2>&1 | FileCheck %s --check-prefix=NO-EXECUTABLE
+RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s
+RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO
+RUN: llvm-dsymutil -verbose -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE
+RUN: llvm-dsymutil -dump-debug-map %p/Inputs/basic.macho.x86_64 2>&1 | FileCheck %s --check-prefix=NOT-FOUND
+RUN: not llvm-dsymutil -dump-debug-map %p/Inputs/inexistant 2>&1 | FileCheck %s --check-prefix=NO-EXECUTABLE
Check that We can parse the debug map of the basic executable.
CHECK-NOT: error
CHECK: ---
-CHECK: triple: 'x86_64-unknown-unknown-macho'
+CHECK: triple: 'x86_64-apple-darwin'
+CHECK: binary-path:{{.*}}/Inputs/basic.macho.x86_64
CHECK: filename:{{.*}}/Inputs/basic1.macho.x86_64.o
CHECK-DAG: sym: _main, objAddr: 0x0000000000000000, binAddr: 0x0000000100000EA0, size: 0x00000024
CHECK: filename{{.*}}/Inputs/basic2.macho.x86_64.o
@@ -28,7 +29,8 @@ Check that we can parse the debug-map of the basic-lto executable
CHECK-LTO-NOT: error
CHECK-LTO: ---
-CHECK-LTO: triple: 'x86_64-unknown-unknown-macho'
+CHECK-LTO: triple: 'x86_64-apple-darwin'
+CHECK-LTO: binary-path:{{.*}}/Inputs/basic-lto.macho.x86_64
CHECK-LTO: /Inputs/basic-lto.macho.x86_64.o
CHECK-LTO-DAG: sym: _bar, objAddr: 0x0000000000000050, binAddr: 0x0000000100000F90, size: 0x00000024
CHECK-LTO-DAG: sym: _baz, objAddr: 0x0000000000000658, binAddr: 0x0000000100001000, size: 0x00000000
@@ -51,7 +53,8 @@ CHECK-ARCHIVE-NEXT: found member in current archive.
CHECK-ARCHIVE-NEXT: trying to open {{.*}}/libbasic.a(basic3.macho.x86_64.o)'
CHECK-ARCHIVE-NEXT: found member in current archive.
CHECK-ARCHIVE: ---
-CHECK-ARCHIVE: triple: 'x86_64-unknown-unknown-macho'
+CHECK-ARCHIVE: triple: 'x86_64-apple-darwin'
+CHECK-ARCHIVE: binary-path:{{.*}}/Inputs/basic-archive.macho.x86_64
CHECK-ARCHIVE: /Inputs/basic1.macho.x86_64.o
CHECK-ARCHIVE-DAG: sym: _main, objAddr: 0x0000000000000000, binAddr: 0x0000000100000EA0, size: 0x00000024
CHECK-ARCHIVE: /Inputs/./libbasic.a(basic2.macho.x86_64.o)
@@ -72,7 +75,8 @@ NOT-FOUND: cannot open{{.*}}"/Inputs/basic1.macho.x86_64.o": {{[Nn]o}} such file
NOT-FOUND: cannot open{{.*}}"/Inputs/basic2.macho.x86_64.o": {{[Nn]o}} such file
NOT-FOUND: cannot open{{.*}}"/Inputs/basic3.macho.x86_64.o": {{[Nn]o}} such file
NOT-FOUND: ---
-NOT-FOUND-NEXT: triple: 'x86_64-unknown-unknown-macho'
+NOT-FOUND-NEXT: triple: 'x86_64-apple-darwin'
+NOT-FOUND-NEXT: binary-path:{{.*}}/Inputs/basic.macho.x86_64
NOT-FOUND-NEXT: ...
Check that we correctly error out on invalid executatble.
diff --git a/test/tools/dsymutil/dump-symtab.test b/test/tools/dsymutil/dump-symtab.test
new file mode 100644
index 000000000000..b83ac7f7ad17
--- /dev/null
+++ b/test/tools/dsymutil/dump-symtab.test
@@ -0,0 +1,44 @@
+RUN: llvm-dsymutil -s %p/Inputs/fat-test.dylib | FileCheck -check-prefix=ALL -check-prefix=I386 %s
+RUN: llvm-dsymutil -arch i386 -s %p/Inputs/fat-test.dylib | FileCheck -check-prefix=I386 -check-prefix=ONE %s
+
+
+ALL: ----------------------------------------------------------------------
+ALL-NEXT: Symbol table for: '{{.*}}fat-test.dylib' (x86_64)
+ALL-NEXT: ----------------------------------------------------------------------
+ALL-NEXT: Index n_strx n_type n_sect n_desc n_value
+ALL-NEXT: ======== -------- ------------------ ------ ------ ----------------
+ALL-NEXT: [ 0] 00000002 64 (N_SO ) 00 0000 0000000000000000 '/Inputs/'
+ALL-NEXT: [ 1] 0000000b 64 (N_SO ) 00 0000 0000000000000000 'fat-test.c'
+ALL-NEXT: [ 2] 00000016 66 (N_OSO ) 03 0001 0000000055b1d0b9 '/Inputs/fat-test.o'
+ALL-NEXT: [ 3] 00000029 20 (N_GSYM ) 00 0000 0000000000000000 '_x86_64_var'
+ALL-NEXT: [ 4] 00000001 64 (N_SO ) 01 0000 0000000000000000
+ALL-NEXT: [ 5] 00000035 0f ( SECT EXT) 02 0000 0000000000001000 '_x86_64_var'
+ALL-NEXT: [ 6] 00000041 01 ( UNDF EXT) 00 0100 0000000000000000 'dyld_stub_binder'
+
+I386: ----------------------------------------------------------------------
+I386-NEXT: Symbol table for: '{{.*}}fat-test.dylib' (i386)
+I386-NEXT: ----------------------------------------------------------------------
+I386-NEXT: Index n_strx n_type n_sect n_desc n_value
+I386-NEXT: ======== -------- ------------------ ------ ------ ----------------
+I386-NEXT: [ 0] 00000002 64 (N_SO ) 00 0000 0000000000000000 '/Inputs/'
+I386-NEXT: [ 1] 0000000b 64 (N_SO ) 00 0000 0000000000000000 'fat-test.c'
+I386-NEXT: [ 2] 00000016 66 (N_OSO ) 03 0001 0000000055b1d0b9 '/Inputs/fat-test.o'
+I386-NEXT: [ 3] 00000029 20 (N_GSYM ) 00 0000 0000000000000000 '_i386_var'
+I386-NEXT: [ 4] 00000001 64 (N_SO ) 01 0000 0000000000000000
+I386-NEXT: [ 5] 00000033 0f ( SECT EXT) 02 0000 0000000000001000 '_i386_var'
+I386-NEXT: [ 6] 0000003d 01 ( UNDF EXT) 00 0100 0000000000000000 'dyld_stub_binder'
+
+ONE-NOT: Symbol table
+
+ALL: ----------------------------------------------------------------------
+ALL-NEXT: Symbol table for: '{{.*}}fat-test.dylib' (x86_64h)
+ALL-NEXT: ----------------------------------------------------------------------
+ALL-NEXT: Index n_strx n_type n_sect n_desc n_value
+ALL-NEXT: ======== -------- ------------------ ------ ------ ----------------
+ALL-NEXT: [ 0] 00000002 64 (N_SO ) 00 0000 0000000000000000 '/Inputs/'
+ALL-NEXT: [ 1] 0000000b 64 (N_SO ) 00 0000 0000000000000000 'fat-test.c'
+ALL-NEXT: [ 2] 00000016 66 (N_OSO ) 08 0001 0000000055b1d0b9 '/Inputs/fat-test.o'
+ALL-NEXT: [ 3] 00000029 20 (N_GSYM ) 00 0000 0000000000000000 '_x86_64h_var'
+ALL-NEXT: [ 4] 00000001 64 (N_SO ) 01 0000 0000000000000000
+ALL-NEXT: [ 5] 00000036 0f ( SECT EXT) 02 0000 0000000000001000 '_x86_64h_var'
+ALL-NEXT: [ 6] 00000043 01 ( UNDF EXT) 00 0100 0000000000000000 'dyld_stub_binder' \ No newline at end of file
diff --git a/test/tools/dsymutil/fat-binary-output.test b/test/tools/dsymutil/fat-binary-output.test
new file mode 100644
index 000000000000..fafef14ebe9b
--- /dev/null
+++ b/test/tools/dsymutil/fat-binary-output.test
@@ -0,0 +1,32 @@
+RUN: llvm-dsymutil -f -verbose -no-output %p/Inputs/fat-test.dylib -oso-prepend-path %p | FileCheck %s
+
+This test doesn't produce any filesytstem output, we just look at the verbose
+log output.
+
+For each arch in the binary, check that we emit the right triple with the right
+file and the right symbol inside it (each slice has a different symbol, so that
+means that the logic is looking at the right file slice too).
+
+After the link of each architecture, check that lipo is correctly invoked to
+generate the fat output binary.
+
+CHECK: triple: 'x86_64-apple-darwin'
+CHECK: - filename: {{'?}}[[INPUTS_PATH:.*]]fat-test.o
+CHECK: DW_AT_name{{.*}} "x86_64_var"
+
+CHECK: triple: 'i386-apple-darwin'
+CHECK: - filename: {{'?}}[[INPUTS_PATH]]fat-test.o
+CHECK: DW_AT_name{{.*}} "i386_var"
+
+CHECK: triple: 'x86_64h-apple-darwin'
+CHECK: - filename: {{'?}}[[INPUTS_PATH]]fat-test.o
+CHECK: DW_AT_name{{.*}} "x86_64h_var"
+
+CHECK: Running lipo
+CHECK-NEXT: lipo -create
+CHECK-SAME: [[INPUTS_PATH]]fat-test.dylib.tmp{{......}}.dwarf
+CHECK-SAME: [[INPUTS_PATH]]fat-test.dylib.tmp{{......}}.dwarf
+CHECK-SAME: [[INPUTS_PATH]]fat-test.dylib.tmp{{......}}.dwarf
+CHECK-SAME: -segalign x86_64 20 -segalign i386 20 -segalign x86_64h 20
+CHECK-SAME: -output [[INPUTS_PATH]]fat-test.dylib.dwarf
+
diff --git a/test/tools/dsymutil/yaml-object-address-rewrite.test b/test/tools/dsymutil/yaml-object-address-rewrite.test
index dcb39be891cd..749719fc5bd9 100644
--- a/test/tools/dsymutil/yaml-object-address-rewrite.test
+++ b/test/tools/dsymutil/yaml-object-address-rewrite.test
@@ -1,16 +1,19 @@
-# RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p -y %s | FileCheck %s
+# RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p -y %s | FileCheck %s
#
# The YAML debug map bellow is the one from basic-archive.macho.x86_64 with
# the object addresses set to zero. Check that the YAML import is able to
# rewrite these addresses to the right values.
#
# CHECK: ---
-# CHECK-NEXT: triple:{{.*}}'x86_64-unknown-unknown-macho'
+# CHECK-NEXT: triple:{{.*}}'x86_64-apple-darwin'
+# CHECK-NEXT: binary-path:{{.*}}''
# CHECK-NEXT: objects:
# CHECK-NEXT: filename:{{.*}}/Inputs/basic1.macho.x86_64.o
+# CHECK-NEXT: timestamp: 0
# CHECK-NEXT: symbols:
# CHECK-NEXT: sym: _main, objAddr: 0x0000000000000000, binAddr: 0x0000000100000EA0, size: 0x00000024
# CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic2.macho.x86_64.o)'
+# CHECK-NEXT: timestamp: 0
# CHECK-NEXT: symbols:
# CHECK-DAG: sym: _foo, objAddr: 0x0000000000000020, binAddr: 0x0000000100000ED0, size: 0x00000050
# CHECK-DAG: sym: _private_int, objAddr: 0x0000000000000560, binAddr: 0x0000000100001004, size: 0x00000000
@@ -18,6 +21,7 @@
# CHECK-DAG: sym: _baz, objAddr: 0x0000000000000310, binAddr: 0x0000000100001000, size: 0x00000000
# CHECK-NOT: { sym:
# CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic3.macho.x86_64.o)'
+# CHECK-NEXT: timestamp: 0
# CHECK-NEXT: symbols:
# CHECK-DAG: sym: _val, objAddr: 0x0000000000000004, binAddr: 0x0000000100001008, size: 0x00000000
# CHECK-DAG: sym: _bar, objAddr: 0x0000000000000020, binAddr: 0x0000000100000F40, size: 0x00000050
@@ -25,7 +29,7 @@
# CHECK-NOT: { sym:
# CHECK-NEXT: ...
---
-triple: 'x86_64-unknown-unknown-macho'
+triple: 'x86_64-apple-darwin'
objects:
- filename: /Inputs/basic1.macho.x86_64.o
symbols:
diff --git a/test/tools/gold/Inputs/comdat.ll b/test/tools/gold/Inputs/comdat.ll
deleted file mode 100644
index 464aefa49dc1..000000000000
--- a/test/tools/gold/Inputs/comdat.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-$c2 = comdat any
-$c1 = comdat any
-
-; This is only present in this file. The linker will keep $c1 from the first
-; file and this will be undefined.
-@will_be_undefined = global i32 1, comdat($c1)
-
-@v1 = weak_odr global i32 41, comdat($c2)
-define weak_odr protected i32 @f1(i8* %this) comdat($c2) {
-bb20:
- store i8* %this, i8** null
- br label %bb21
-bb21:
- ret i32 41
-}
-
-@r21 = global i32* @v1
-@r22 = global i32(i8*)* @f1
-
-@a21 = alias i32* @v1
-@a22 = alias bitcast (i32* @v1 to i16*)
-
-@a23 = alias i32(i8*)* @f1
-@a24 = alias bitcast (i32(i8*)* @f1 to i16*)
-@a25 = alias i16* @a24
diff --git a/test/tools/gold/Inputs/linkonce-weak.ll b/test/tools/gold/Inputs/linkonce-weak.ll
deleted file mode 100644
index f42af8faa844..000000000000
--- a/test/tools/gold/Inputs/linkonce-weak.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-define weak_odr void @f() {
- ret void
-}
diff --git a/test/tools/gold/PowerPC/lit.local.cfg b/test/tools/gold/PowerPC/lit.local.cfg
new file mode 100644
index 000000000000..d968938d24ce
--- /dev/null
+++ b/test/tools/gold/PowerPC/lit.local.cfg
@@ -0,0 +1,3 @@
+if (not 'ld_plugin' in config.available_features or
+ not 'PowerPC' in config.root.targets):
+ config.unsupported = True
diff --git a/test/tools/gold/mtriple.ll b/test/tools/gold/PowerPC/mtriple.ll
index 87b226014cab..87b226014cab 100644
--- a/test/tools/gold/mtriple.ll
+++ b/test/tools/gold/PowerPC/mtriple.ll
diff --git a/test/tools/gold/Inputs/alias-1.ll b/test/tools/gold/X86/Inputs/alias-1.ll
index 96183aa9537c..96183aa9537c 100644
--- a/test/tools/gold/Inputs/alias-1.ll
+++ b/test/tools/gold/X86/Inputs/alias-1.ll
diff --git a/test/tools/gold/X86/Inputs/available-externally.ll b/test/tools/gold/X86/Inputs/available-externally.ll
new file mode 100644
index 000000000000..cbc5c12c65d5
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/available-externally.ll
@@ -0,0 +1,3 @@
+define void @zed() {
+ ret void
+}
diff --git a/test/tools/gold/Inputs/bcsection.s b/test/tools/gold/X86/Inputs/bcsection.s
index ede1e5c532dd..ede1e5c532dd 100644
--- a/test/tools/gold/Inputs/bcsection.s
+++ b/test/tools/gold/X86/Inputs/bcsection.s
diff --git a/test/tools/gold/X86/Inputs/comdat.ll b/test/tools/gold/X86/Inputs/comdat.ll
new file mode 100644
index 000000000000..734115180fff
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/comdat.ll
@@ -0,0 +1,25 @@
+$c2 = comdat any
+$c1 = comdat any
+
+; This is only present in this file. The linker will keep $c1 from the first
+; file and this will be undefined.
+@will_be_undefined = global i32 1, comdat($c1)
+
+@v1 = weak_odr global i32 41, comdat($c2)
+define weak_odr protected i32 @f1(i8* %this) comdat($c2) {
+bb20:
+ store i8* %this, i8** null
+ br label %bb21
+bb21:
+ ret i32 41
+}
+
+@r21 = global i32* @v1
+@r22 = global i32(i8*)* @f1
+
+@a21 = alias i32, i32* @v1
+@a22 = alias i16, bitcast (i32* @v1 to i16*)
+
+@a23 = alias i32(i8*), i32(i8*)* @f1
+@a24 = alias i16, bitcast (i32(i8*)* @f1 to i16*)
+@a25 = alias i16, i16* @a24
diff --git a/test/tools/gold/X86/Inputs/comdat2.ll b/test/tools/gold/X86/Inputs/comdat2.ll
new file mode 100644
index 000000000000..5b7f74cf0b24
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/comdat2.ll
@@ -0,0 +1,9 @@
+$foo = comdat any
+@foo = global i8 1, comdat
+define void @zed() {
+ call void @bar()
+ ret void
+}
+define void @bar() comdat($foo) {
+ ret void
+}
diff --git a/test/tools/gold/Inputs/common.ll b/test/tools/gold/X86/Inputs/common.ll
index 46f199e167af..46f199e167af 100644
--- a/test/tools/gold/Inputs/common.ll
+++ b/test/tools/gold/X86/Inputs/common.ll
diff --git a/test/tools/gold/X86/Inputs/ctors2.ll b/test/tools/gold/X86/Inputs/ctors2.ll
new file mode 100644
index 000000000000..af1590eb277c
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/ctors2.ll
@@ -0,0 +1,5 @@
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @bar, i8* null }]
+
+define void @bar() {
+ ret void
+}
diff --git a/test/tools/gold/Inputs/drop-debug.bc b/test/tools/gold/X86/Inputs/drop-debug.bc
index f9c471f8e0d1..f9c471f8e0d1 100644
--- a/test/tools/gold/Inputs/drop-debug.bc
+++ b/test/tools/gold/X86/Inputs/drop-debug.bc
Binary files differ
diff --git a/test/tools/gold/X86/Inputs/drop-linkage.ll b/test/tools/gold/X86/Inputs/drop-linkage.ll
new file mode 100644
index 000000000000..075306114331
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/drop-linkage.ll
@@ -0,0 +1,9 @@
+$foo = comdat any
+define linkonce void @foo() comdat {
+ ret void
+}
+
+define void @bar() {
+ call void @foo()
+ ret void
+}
diff --git a/test/tools/gold/Inputs/invalid.bc b/test/tools/gold/X86/Inputs/invalid.bc
index 2e7ca8d2e105..2e7ca8d2e105 100644
--- a/test/tools/gold/Inputs/invalid.bc
+++ b/test/tools/gold/X86/Inputs/invalid.bc
Binary files differ
diff --git a/test/tools/gold/Inputs/linker-script.export b/test/tools/gold/X86/Inputs/linker-script.export
index 2062a081ffe2..2062a081ffe2 100644
--- a/test/tools/gold/Inputs/linker-script.export
+++ b/test/tools/gold/X86/Inputs/linker-script.export
diff --git a/test/tools/gold/X86/Inputs/linkonce-weak.ll b/test/tools/gold/X86/Inputs/linkonce-weak.ll
new file mode 100644
index 000000000000..3b7dad1b1eff
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/linkonce-weak.ll
@@ -0,0 +1,19 @@
+define weak_odr void @f() !dbg !4 {
+ ret void, !dbg !10
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "linkonce-weak.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"}
+!10 = !DILocation(line: 2, column: 1, scope: !4)
diff --git a/test/tools/gold/Inputs/pr19901-1.ll b/test/tools/gold/X86/Inputs/pr19901-1.ll
index 2f7153268aca..2f7153268aca 100644
--- a/test/tools/gold/Inputs/pr19901-1.ll
+++ b/test/tools/gold/X86/Inputs/pr19901-1.ll
diff --git a/test/tools/gold/X86/Inputs/resolve-to-alias.ll b/test/tools/gold/X86/Inputs/resolve-to-alias.ll
new file mode 100644
index 000000000000..eff02a6f4d1e
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/resolve-to-alias.ll
@@ -0,0 +1,4 @@
+@bar = alias void (), void ()* @zed
+define void @zed() {
+ ret void
+}
diff --git a/test/tools/gold/X86/Inputs/thinlto.ll b/test/tools/gold/X86/Inputs/thinlto.ll
new file mode 100644
index 000000000000..4e0840f3691e
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/thinlto.ll
@@ -0,0 +1,4 @@
+define void @g() {
+entry:
+ ret void
+}
diff --git a/test/tools/gold/X86/Inputs/type-merge.ll b/test/tools/gold/X86/Inputs/type-merge.ll
new file mode 100644
index 000000000000..4dc214922dc5
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/type-merge.ll
@@ -0,0 +1,5 @@
+define void @zed() {
+ call void @bar()
+ ret void
+}
+declare void @bar()
diff --git a/test/tools/gold/X86/Inputs/type-merge2.ll b/test/tools/gold/X86/Inputs/type-merge2.ll
new file mode 100644
index 000000000000..a354757ee2e7
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/type-merge2.ll
@@ -0,0 +1,5 @@
+%zed = type { i16 }
+define void @bar(%zed* %this) {
+ store %zed* %this, %zed** null
+ ret void
+}
diff --git a/test/tools/gold/Inputs/weak.ll b/test/tools/gold/X86/Inputs/weak.ll
index 53b1d1650d1b..53b1d1650d1b 100644
--- a/test/tools/gold/Inputs/weak.ll
+++ b/test/tools/gold/X86/Inputs/weak.ll
diff --git a/test/tools/gold/X86/alias.ll b/test/tools/gold/X86/alias.ll
new file mode 100644
index 000000000000..c659f73d7e83
--- /dev/null
+++ b/test/tools/gold/X86/alias.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/alias-1.ll -o %t2.o
+; RUN: %gold -shared -o %t3.o -plugin %llvmshlibdir/LLVMgold.so %t2.o %t.o \
+; RUN: -plugin-opt=emit-llvm
+; RUN: llvm-dis < %t3.o -o - | FileCheck %s
+
+; CHECK-NOT: alias
+; CHECK: @a = global i32 42
+; CHECK-NEXT: @b = global i32 1
+; CHECK-NOT: alias
+
+@a = weak alias i32, i32* @b
+@b = global i32 1
diff --git a/test/tools/gold/X86/alias2.ll b/test/tools/gold/X86/alias2.ll
new file mode 100644
index 000000000000..4727e0508fae
--- /dev/null
+++ b/test/tools/gold/X86/alias2.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: %gold -shared -o %t2.bc -plugin %llvmshlibdir/LLVMgold.so %t.o -plugin-opt=emit-llvm
+; RUN: llvm-dis %t2.bc -o - | FileCheck %s
+
+@bar = alias void (), void ()* @zed
+define void @foo() {
+ call void @bar()
+ ret void
+}
+define void @zed() {
+ ret void
+}
+
+; CHECK: @bar = alias void (), void ()* @zed
+
+; CHECK: define void @foo() {
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+
+; CHECK: define void @zed() {
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
diff --git a/test/tools/gold/X86/available-externally.ll b/test/tools/gold/X86/available-externally.ll
new file mode 100644
index 000000000000..d47a536dc094
--- /dev/null
+++ b/test/tools/gold/X86/available-externally.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/available-externally.ll -o %t2.o
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t2.o %t.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+define void @foo() {
+ call void @bar()
+ call void @zed()
+ ret void
+}
+define available_externally void @bar() {
+ ret void
+}
+define available_externally void @zed() {
+ ret void
+}
+
+; CHECK-DAG: define available_externally void @bar() {
+; CHECK-DAG: define void @zed() {
diff --git a/test/tools/gold/X86/bad-alias.ll b/test/tools/gold/X86/bad-alias.ll
new file mode 100644
index 000000000000..c4e3c3fe82fc
--- /dev/null
+++ b/test/tools/gold/X86/bad-alias.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as %s -o %t.o
+
+; RUN: not %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t.o -o %t2.o 2>&1 | FileCheck %s
+
+; CHECK: Unable to determine comdat of alias!
+
+@g1 = global i32 1
+@g2 = global i32 2
+
+@a = alias i32, inttoptr(i32 sub (i32 ptrtoint (i32* @g1 to i32),
+ i32 ptrtoint (i32* @g2 to i32)) to i32*)
diff --git a/test/tools/gold/X86/bcsection.ll b/test/tools/gold/X86/bcsection.ll
new file mode 100644
index 000000000000..f7ebe375770e
--- /dev/null
+++ b/test/tools/gold/X86/bcsection.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as -o %T/bcsection.bc %s
+
+; RUN: llvm-mc -I=%T -filetype=obj -triple=x86_64-unknown-unknown -o %T/bcsection.bco %p/Inputs/bcsection.s
+; RUN: llvm-nm -no-llvm-bc %T/bcsection.bco | count 0
+; RUN: %gold -r -o %T/bcsection.o -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so %T/bcsection.bco
+; RUN: llvm-nm -no-llvm-bc %T/bcsection.o | FileCheck %s
+
+target triple = "x86_64-unknown-unknown"
+
+; CHECK: main
+define i32 @main() {
+ ret i32 0
+}
diff --git a/test/tools/gold/coff.ll b/test/tools/gold/X86/coff.ll
index 5d8a1c9da5fc..5d8a1c9da5fc 100644
--- a/test/tools/gold/coff.ll
+++ b/test/tools/gold/X86/coff.ll
diff --git a/test/tools/gold/X86/comdat.ll b/test/tools/gold/X86/comdat.ll
new file mode 100644
index 000000000000..f65ca73c8063
--- /dev/null
+++ b/test/tools/gold/X86/comdat.ll
@@ -0,0 +1,65 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/comdat.ll -o %t2.o
+; RUN: %gold -shared -o %t3.o -plugin %llvmshlibdir/LLVMgold.so %t.o %t2.o \
+; RUN: -plugin-opt=emit-llvm
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+$c1 = comdat any
+
+@v1 = weak_odr global i32 42, comdat($c1)
+define weak_odr i32 @f1(i8*) comdat($c1) {
+bb10:
+ br label %bb11
+bb11:
+ ret i32 42
+}
+
+@r11 = global i32* @v1
+@r12 = global i32 (i8*)* @f1
+
+@a11 = alias i32, i32* @v1
+@a12 = alias i16, bitcast (i32* @v1 to i16*)
+
+@a13 = alias i32 (i8*), i32 (i8*)* @f1
+@a14 = alias i16, bitcast (i32 (i8*)* @f1 to i16*)
+@a15 = alias i16, i16* @a14
+
+; CHECK: $c1 = comdat any
+; CHECK: $c2 = comdat any
+
+; CHECK-DAG: @v1 = weak_odr global i32 42, comdat($c1)
+
+; CHECK-DAG: @r11 = global i32* @v1{{$}}
+; CHECK-DAG: @r12 = global i32 (i8*)* @f1{{$}}
+
+; CHECK-DAG: @r21 = global i32* @v1{{$}}
+; CHECK-DAG: @r22 = global i32 (i8*)* @f1{{$}}
+
+; CHECK-DAG: @v1.1 = internal global i32 41, comdat($c2)
+
+; CHECK-DAG: @a11 = alias i32, i32* @v1{{$}}
+; CHECK-DAG: @a12 = alias i16, bitcast (i32* @v1 to i16*)
+
+; CHECK-DAG: @a13 = alias i32 (i8*), i32 (i8*)* @f1{{$}}
+; CHECK-DAG: @a14 = alias i16, bitcast (i32 (i8*)* @f1 to i16*)
+
+; CHECK-DAG: @a21 = alias i32, i32* @v1.1{{$}}
+; CHECK-DAG: @a22 = alias i16, bitcast (i32* @v1.1 to i16*)
+
+; CHECK-DAG: @a23 = alias i32 (i8*), i32 (i8*)* @f1.2{{$}}
+; CHECK-DAG: @a24 = alias i16, bitcast (i32 (i8*)* @f1.2 to i16*)
+
+; CHECK: define weak_odr protected i32 @f1(i8*) comdat($c1) {
+; CHECK-NEXT: bb10:
+; CHECK-NEXT: br label %bb11{{$}}
+; CHECK: bb11:
+; CHECK-NEXT: ret i32 42
+; CHECK-NEXT: }
+
+; CHECK: define internal i32 @f1.2(i8* %this) comdat($c2) {
+; CHECK-NEXT: bb20:
+; CHECK-NEXT: store i8* %this, i8** null
+; CHECK-NEXT: br label %bb21
+; CHECK: bb21:
+; CHECK-NEXT: ret i32 41
+; CHECK-NEXT: }
diff --git a/test/tools/gold/X86/comdat2.ll b/test/tools/gold/X86/comdat2.ll
new file mode 100644
index 000000000000..2156efd207bb
--- /dev/null
+++ b/test/tools/gold/X86/comdat2.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-as %p/Inputs/comdat2.ll -o %t2.bc
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t.bc %t2.bc -o %t3.bc
+; RUN: llvm-dis %t3.bc -o - | FileCheck %s
+
+
+$foo = comdat any
+@foo = global i8 0, comdat
+
+; CHECK: @foo = global i8 0, comdat
+
+; CHECK: define void @zed() {
+; CHECK: call void @bar()
+; CHECK: ret void
+; CHECK: }
+
+; CHECK: declare void @bar()
diff --git a/test/tools/gold/common.ll b/test/tools/gold/X86/common.ll
index ef18e6831045..ef18e6831045 100644
--- a/test/tools/gold/common.ll
+++ b/test/tools/gold/X86/common.ll
diff --git a/test/tools/gold/X86/ctors.ll b/test/tools/gold/X86/ctors.ll
new file mode 100644
index 000000000000..24c8e342beb0
--- /dev/null
+++ b/test/tools/gold/X86/ctors.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t.o -o %t2.o
+; RUN: llvm-dis %t2.o -o - | FileCheck %s
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }]
+
+define internal void @foo() {
+ ret void
+}
+
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }]
diff --git a/test/tools/gold/X86/ctors2.ll b/test/tools/gold/X86/ctors2.ll
new file mode 100644
index 000000000000..c39cb7132d93
--- /dev/null
+++ b/test/tools/gold/X86/ctors2.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/ctors2.ll -o %t2.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }]
+
+define void @foo() {
+ ret void
+}
+
+; CHECK: @llvm.global_ctors = appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }, { i32, void ()*, i8* } { i32 65535, void ()* @bar, i8* null }]
diff --git a/test/tools/gold/X86/disable-verify.ll b/test/tools/gold/X86/disable-verify.ll
new file mode 100644
index 000000000000..5b8dbb054478
--- /dev/null
+++ b/test/tools/gold/X86/disable-verify.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as %s -o %t.o
+; REQUIRES: asserts
+
+; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=disable-verify \
+; RUN: --plugin-opt=-debug-pass=Arguments \
+; RUN: -shared %t.o -o %t2.o 2>&1 | FileCheck %s
+
+; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=-debug-pass=Arguments \
+; RUN: -shared %t.o -o %t2.o 2>&1 | FileCheck %s -check-prefix=VERIFY
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; -disable-verify should disable output verification from the optimization
+; pipeline.
+; CHECK: Pass Arguments: {{.*}} -verify -forceattrs
+; CHECK-NOT: -verify
+
+; VERIFY: Pass Arguments: {{.*}} -verify {{.*}} -verify
+
+define void @f() {
+entry:
+ ret void
+}
diff --git a/test/tools/gold/drop-debug.ll b/test/tools/gold/X86/drop-debug.ll
index b8c4d8c62a9b..b8c4d8c62a9b 100644
--- a/test/tools/gold/drop-debug.ll
+++ b/test/tools/gold/X86/drop-debug.ll
diff --git a/test/tools/gold/X86/drop-linkage.ll b/test/tools/gold/X86/drop-linkage.ll
new file mode 100644
index 000000000000..d4c1dd052c79
--- /dev/null
+++ b/test/tools/gold/X86/drop-linkage.ll
@@ -0,0 +1,14 @@
+; RUN: llc %s -o %t.s
+; RUN: llvm-mc %t.s -o %t.o -filetype=obj
+; RUN: llvm-as %p/Inputs/drop-linkage.ll -o %t2.o
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+define void @foo() {
+ ret void
+}
+
+; CHECK: declare extern_weak void @foo(){{$}}
diff --git a/test/tools/gold/X86/emit-llvm.ll b/test/tools/gold/X86/emit-llvm.ll
new file mode 100644
index 000000000000..4a6d5963cff0
--- /dev/null
+++ b/test/tools/gold/X86/emit-llvm.ll
@@ -0,0 +1,93 @@
+; RUN: llvm-as %s -o %t.o
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: --plugin-opt=generate-api-file \
+; RUN: -shared %t.o -o %t2.o
+; RUN: llvm-dis %t2.o -o - | FileCheck %s
+; RUN: FileCheck --check-prefix=API %s < %T/../apifile.txt
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: -m elf_x86_64 --plugin-opt=save-temps \
+; RUN: -shared %t.o -o %t3.o
+; RUN: llvm-dis %t3.o.bc -o - | FileCheck %s
+; RUN: llvm-dis %t3.o.opt.bc -o - | FileCheck --check-prefix=OPT %s
+; RUN: llvm-dis %t3.o.opt.bc -o - | FileCheck --check-prefix=OPT2 %s
+; RUN: llvm-nm %t3.o.o | FileCheck --check-prefix=NM %s
+
+; RUN: rm -f %t4.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: -m elf_x86_64 --plugin-opt=disable-output \
+; RUN: -shared %t.o -o %t4.o
+; RUN: not test -a %t4.o
+
+; NM: T f3
+
+target triple = "x86_64-unknown-linux-gnu"
+
+@g7 = extern_weak global i32
+; CHECK-DAG: @g7 = extern_weak global i32
+
+@g8 = external global i32
+
+; CHECK-DAG: define internal void @f1()
+; OPT2-NOT: @f1
+define hidden void @f1() {
+ ret void
+}
+
+; CHECK-DAG: define hidden void @f2()
+; OPT-DAG: define hidden void @f2()
+define hidden void @f2() {
+ ret void
+}
+
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (void ()* @f2 to i8*)]
+
+; CHECK-DAG: define void @f3()
+; OPT-DAG: define void @f3()
+define void @f3() {
+ call void @f4()
+ ret void
+}
+
+; CHECK-DAG: define internal void @f4()
+; OPT2-NOT: @f4
+define linkonce_odr void @f4() {
+ ret void
+}
+
+; CHECK-DAG: define linkonce_odr void @f5()
+; OPT-DAG: define linkonce_odr void @f5()
+define linkonce_odr void @f5() {
+ ret void
+}
+@g5 = global void()* @f5
+
+; CHECK-DAG: define internal void @f6() unnamed_addr
+; OPT-DAG: define internal void @f6() unnamed_addr
+define linkonce_odr void @f6() unnamed_addr {
+ ret void
+}
+@g6 = global void()* @f6
+
+define i32* @f7() {
+ ret i32* @g7
+}
+
+define i32* @f8() {
+ ret i32* @g8
+}
+
+; API: f1 PREVAILING_DEF_IRONLY
+; API: f2 PREVAILING_DEF_IRONLY
+; API: f3 PREVAILING_DEF_IRONLY_EXP
+; API: f4 PREVAILING_DEF_IRONLY_EXP
+; API: f5 PREVAILING_DEF_IRONLY_EXP
+; API: f6 PREVAILING_DEF_IRONLY_EXP
+; API: f7 PREVAILING_DEF_IRONLY_EXP
+; API: f8 PREVAILING_DEF_IRONLY_EXP
+; API: g7 UNDEF
+; API: g8 UNDEF
+; API: g5 PREVAILING_DEF_IRONLY_EXP
+; API: g6 PREVAILING_DEF_IRONLY_EXP
diff --git a/test/tools/gold/invalid.ll b/test/tools/gold/X86/invalid.ll
index 858cd47adbed..858cd47adbed 100644
--- a/test/tools/gold/invalid.ll
+++ b/test/tools/gold/X86/invalid.ll
diff --git a/test/tools/gold/linker-script.ll b/test/tools/gold/X86/linker-script.ll
index 7c88b0ffd5fb..7c88b0ffd5fb 100644
--- a/test/tools/gold/linker-script.ll
+++ b/test/tools/gold/X86/linker-script.ll
diff --git a/test/tools/gold/X86/linkonce-weak.ll b/test/tools/gold/X86/linkonce-weak.ll
new file mode 100644
index 000000000000..3397c3480a7c
--- /dev/null
+++ b/test/tools/gold/X86/linkonce-weak.ll
@@ -0,0 +1,39 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/linkonce-weak.ll -o %t2.o
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t2.o %t.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+define linkonce_odr void @f() !dbg !4 {
+ ret void, !dbg !10
+}
+
+; Test that we get a weak_odr regardless of the order of the files
+; CHECK: define weak_odr void @f()
+
+; Test that we only get a single DISubprogram for @f
+; CHECK: !DISubprogram(name: "f"
+; CHECK-NOT: !DISubprogram(name: "f"
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "linkonce-weak.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"}
+!10 = !DILocation(line: 2, column: 1, scope: !4)
diff --git a/test/tools/gold/X86/lit.local.cfg b/test/tools/gold/X86/lit.local.cfg
new file mode 100644
index 000000000000..ddcd48ca470d
--- /dev/null
+++ b/test/tools/gold/X86/lit.local.cfg
@@ -0,0 +1,3 @@
+if (not 'ld_plugin' in config.available_features or
+ not 'X86' in config.root.targets):
+ config.unsupported = True
diff --git a/test/tools/gold/no-map-whole-file.ll b/test/tools/gold/X86/no-map-whole-file.ll
index 4c261d70a249..4c261d70a249 100644
--- a/test/tools/gold/no-map-whole-file.ll
+++ b/test/tools/gold/X86/no-map-whole-file.ll
diff --git a/test/tools/gold/opt-level.ll b/test/tools/gold/X86/opt-level.ll
index 3deb0af37a74..3deb0af37a74 100644
--- a/test/tools/gold/opt-level.ll
+++ b/test/tools/gold/X86/opt-level.ll
diff --git a/test/tools/gold/X86/parallel.ll b/test/tools/gold/X86/parallel.ll
new file mode 100644
index 000000000000..00a0bafda251
--- /dev/null
+++ b/test/tools/gold/X86/parallel.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as -o %t.bc %s
+; RUN: env LD_PRELOAD=%llvmshlibdir/LLVMgold.so %gold -plugin %llvmshlibdir/LLVMgold.so -u foo -u bar -plugin-opt jobs=2 -plugin-opt save-temps -m elf_x86_64 -o %t %t.bc
+; RUN: llvm-nm %t.o0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-nm %t.o1 | FileCheck --check-prefix=CHECK1 %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK0-NOT: bar
+; CHECK0: T foo
+; CHECK0-NOT: bar
+define void @foo() {
+ call void @bar()
+ ret void
+}
+
+; CHECK1-NOT: foo
+; CHECK1: T bar
+; CHECK1-NOT: foo
+define void @bar() {
+ call void @foo()
+ ret void
+}
diff --git a/test/tools/gold/pr19901.ll b/test/tools/gold/X86/pr19901.ll
index 71bb134ead7b..71bb134ead7b 100644
--- a/test/tools/gold/pr19901.ll
+++ b/test/tools/gold/X86/pr19901.ll
diff --git a/test/tools/gold/X86/pr25907.ll b/test/tools/gold/X86/pr25907.ll
new file mode 100644
index 000000000000..502938cf8126
--- /dev/null
+++ b/test/tools/gold/X86/pr25907.ll
@@ -0,0 +1,28 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: -shared %t.o -o %t2
+; RUN: llvm-nm %t2 | FileCheck %s
+; CHECK: T main
+
+@main.L = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@main, %L1), i8* blockaddress(@main, %L2), i8* null], align 16
+
+define i32 @main() #0 {
+entry:
+ br label %L1
+
+L1: ; preds = %entry, %L1
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %L1 ]
+ %inc = add i32 %i.0, 1
+ %idxprom = zext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds [3 x i8*], [3 x i8*]* @main.L, i64 0, i64 %idxprom
+ %0 = load i8*, i8** %arrayidx, align 8, !tbaa !1
+ indirectbr i8* %0, [label %L1, label %L2]
+
+L2: ; preds = %L1
+ ret i32 0
+}
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"any pointer", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
diff --git a/test/tools/gold/X86/pr25915.ll b/test/tools/gold/X86/pr25915.ll
new file mode 100644
index 000000000000..2d8807e04bb4
--- /dev/null
+++ b/test/tools/gold/X86/pr25915.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: -plugin-opt=emit-llvm \
+; RUN: -shared %t.o -o %t2
+; RUN: llvm-dis %t2 -o - | FileCheck %s
+; CHECK-NOT: subprograms
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 256170) (llvm/trunk 256171)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2)
+!1 = !DIFile(filename: "pr25915.cc", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.8.0 (trunk 256170) (llvm/trunk 256171)"}
diff --git a/test/tools/gold/X86/remarks.ll b/test/tools/gold/X86/remarks.ll
new file mode 100644
index 000000000000..51bd121cebce
--- /dev/null
+++ b/test/tools/gold/X86/remarks.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as %s -o %t.o
+
+; RUN: not %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: -plugin-opt=-pass-remarks=inline %t.o -o %t2.o 2>&1 | FileCheck %s
+
+; RUN: not %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: %t.o -o %t2.o 2>&1 | FileCheck -allow-empty --check-prefix=NO-REMARK %s
+
+
+; CHECK: f inlined into _start
+; NO-REMARK-NOT: inlined
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i32 @bar()
+
+define i32 @f() {
+ %a = call i32 @bar()
+ ret i32 %a
+}
+
+define i32 @_start() {
+ %call = call i32 @f()
+ ret i32 %call
+}
diff --git a/test/tools/gold/X86/resolve-to-alias.ll b/test/tools/gold/X86/resolve-to-alias.ll
new file mode 100644
index 000000000000..102da6f80f4d
--- /dev/null
+++ b/test/tools/gold/X86/resolve-to-alias.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/resolve-to-alias.ll -o %t2.o
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t.o %t2.o -o %t.bc
+; RUN: llvm-dis %t.bc -o %t.ll
+; RUN: FileCheck --check-prefix=PASS1 %s < %t.ll
+; RUN: FileCheck --check-prefix=PASS2 %s < %t.ll
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t2.o %t.o -o %t.bc
+; RUN: llvm-dis %t.bc -o %t.ll
+; RUN: FileCheck --check-prefix=PASS1 %s < %t.ll
+; RUN: FileCheck --check-prefix=PASS2 %s < %t.ll
+
+define void @foo() {
+ call void @bar()
+ ret void
+}
+declare void @bar()
+
+; PASS1: @bar = alias void (), void ()* @zed
+
+; PASS1: define void @foo() {
+; PASS1-NEXT: call void @bar()
+; PASS1-NEXT: ret void
+; PASS1-NEXT: }
+
+; PASS2: define void @zed() {
+; PASS2-NEXT: ret void
+; PASS2-NEXT: }
diff --git a/test/tools/gold/slp-vectorize.ll b/test/tools/gold/X86/slp-vectorize.ll
index 30950b2d2de8..30950b2d2de8 100644
--- a/test/tools/gold/slp-vectorize.ll
+++ b/test/tools/gold/X86/slp-vectorize.ll
diff --git a/test/tools/gold/stats.ll b/test/tools/gold/X86/stats.ll
index b3c829798df2..b3c829798df2 100644
--- a/test/tools/gold/stats.ll
+++ b/test/tools/gold/X86/stats.ll
diff --git a/test/tools/gold/X86/thinlto.ll b/test/tools/gold/X86/thinlto.ll
new file mode 100644
index 000000000000..97def3d7a14d
--- /dev/null
+++ b/test/tools/gold/X86/thinlto.ll
@@ -0,0 +1,34 @@
+; First ensure that the ThinLTO handling in the gold plugin handles
+; bitcode without function summary sections gracefully.
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/thinlto.ll -o %t2.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=thinlto \
+; RUN: -shared %t.o %t2.o -o %t3
+
+; RUN: llvm-as -function-summary %s -o %t.o
+; RUN: llvm-as -function-summary %p/Inputs/thinlto.ll -o %t2.o
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=thinlto \
+; RUN: -shared %t.o %t2.o -o %t3
+; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
+; RUN: not test -e %t3
+
+; COMBINED: <MODULE_STRTAB_BLOCK
+; COMBINED-NEXT: <ENTRY {{.*}} record string = '{{.*}}/test/tools/gold/X86/Output/thinlto.ll.tmp{{.*}}.o'
+; COMBINED-NEXT: <ENTRY {{.*}} record string = '{{.*}}/test/tools/gold/X86/Output/thinlto.ll.tmp{{.*}}.o'
+; COMBINED-NEXT: </MODULE_STRTAB_BLOCK
+; COMBINED-NEXT: <FUNCTION_SUMMARY_BLOCK
+; COMBINED-NEXT: <COMBINED_ENTRY
+; COMBINED-NEXT: <COMBINED_ENTRY
+; COMBINED-NEXT: </FUNCTION_SUMMARY_BLOCK
+; COMBINED-NEXT: <VALUE_SYMTAB
+; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
+; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
+; COMBINED-NEXT: </VALUE_SYMTAB
+
+define void @f() {
+entry:
+ ret void
+}
diff --git a/test/tools/gold/X86/type-merge.ll b/test/tools/gold/X86/type-merge.ll
new file mode 100644
index 000000000000..d903894345d4
--- /dev/null
+++ b/test/tools/gold/X86/type-merge.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/type-merge.ll -o %t2.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+define void @foo() {
+ call void @bar(i8* null)
+ ret void
+}
+declare void @bar(i8*)
+
+; CHECK: define void @foo() {
+; CHECK-NEXT: call void @bar(i8* null)
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+
+; CHECK: declare void @bar(i8*)
+
+; CHECK: define void @zed() {
+; CHECK-NEXT: call void bitcast (void (i8*)* @bar to void ()*)()
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
diff --git a/test/tools/gold/X86/type-merge2.ll b/test/tools/gold/X86/type-merge2.ll
new file mode 100644
index 000000000000..42ad0dafb29b
--- /dev/null
+++ b/test/tools/gold/X86/type-merge2.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/type-merge2.ll -o %t2.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+%zed = type { i8 }
+define void @foo() {
+ call void @bar(%zed* null)
+ ret void
+}
+declare void @bar(%zed*)
+
+; CHECK: %zed = type { i8 }
+; CHECK-NEXT: %zed.0 = type { i16 }
+
+; CHECK: define void @foo() {
+; CHECK-NEXT: call void bitcast (void (%zed.0*)* @bar to void (%zed*)*)(%zed* null)
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+
+; CHECK: define void @bar(%zed.0* %this) {
+; CHECK-NEXT: store %zed.0* %this, %zed.0** null
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
diff --git a/test/tools/gold/X86/unnamed-addr.ll b/test/tools/gold/X86/unnamed-addr.ll
new file mode 100644
index 000000000000..290f73d85484
--- /dev/null
+++ b/test/tools/gold/X86/unnamed-addr.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as %s -o %t.o
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: --plugin-opt=emit-llvm \
+; RUN: -shared %t.o -o %t2.o
+; RUN: llvm-dis %t2.o -o - | FileCheck %s
+
+@a = internal unnamed_addr constant i8 42
+
+define i8* @f() {
+ ret i8* @a
+}
+
+; CHECK: @a = internal unnamed_addr constant i8 42
diff --git a/test/tools/gold/vectorize.ll b/test/tools/gold/X86/vectorize.ll
index 5f003dd02e21..5f003dd02e21 100644
--- a/test/tools/gold/vectorize.ll
+++ b/test/tools/gold/X86/vectorize.ll
diff --git a/test/tools/gold/weak.ll b/test/tools/gold/X86/weak.ll
index 6d8d7a871f1b..6d8d7a871f1b 100644
--- a/test/tools/gold/weak.ll
+++ b/test/tools/gold/X86/weak.ll
diff --git a/test/tools/gold/alias.ll b/test/tools/gold/alias.ll
deleted file mode 100644
index b4edb05a4e46..000000000000
--- a/test/tools/gold/alias.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llvm-as %s -o %t.o
-; RUN: llvm-as %p/Inputs/alias-1.ll -o %t2.o
-; RUN: %gold -shared -o %t3.o -plugin %llvmshlibdir/LLVMgold.so %t2.o %t.o \
-; RUN: -plugin-opt=emit-llvm
-; RUN: llvm-dis < %t3.o -o - | FileCheck %s
-
-; CHECK-NOT: alias
-; CHECK: @a = global i32 42
-; CHECK-NEXT: @b = global i32 1
-; CHECK-NOT: alias
-
-@a = weak alias i32* @b
-@b = global i32 1
diff --git a/test/tools/gold/bad-alias.ll b/test/tools/gold/bad-alias.ll
deleted file mode 100644
index a98bf710b454..000000000000
--- a/test/tools/gold/bad-alias.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llvm-as %s -o %t.o
-
-; RUN: not %gold -plugin %llvmshlibdir/LLVMgold.so \
-; RUN: --plugin-opt=emit-llvm \
-; RUN: -shared %t.o -o %t2.o 2>&1 | FileCheck %s
-
-; CHECK: Unable to determine comdat of alias!
-
-@g1 = global i32 1
-@g2 = global i32 2
-
-@a = alias inttoptr(i32 sub (i32 ptrtoint (i32* @g1 to i32),
- i32 ptrtoint (i32* @g2 to i32)) to i32*)
diff --git a/test/tools/gold/bcsection.ll b/test/tools/gold/bcsection.ll
deleted file mode 100644
index 37d2994cc780..000000000000
--- a/test/tools/gold/bcsection.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llvm-as -o %T/bcsection.bc %s
-
-; RUN: llvm-mc -I=%T -filetype=obj -o %T/bcsection.bco %p/Inputs/bcsection.s
-; RUN: llvm-nm -no-llvm-bc %T/bcsection.bco | count 0
-; RUN: %gold -r -o %T/bcsection.o -plugin %llvmshlibdir/LLVMgold.so %T/bcsection.bco
-; RUN: llvm-nm -no-llvm-bc %T/bcsection.o | FileCheck %s
-
-; CHECK: main
-define i32 @main() {
- ret i32 0
-}
diff --git a/test/tools/gold/comdat.ll b/test/tools/gold/comdat.ll
deleted file mode 100644
index 370bf5641f31..000000000000
--- a/test/tools/gold/comdat.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; RUN: llvm-as %s -o %t.o
-; RUN: llvm-as %p/Inputs/comdat.ll -o %t2.o
-; RUN: %gold -shared -o %t3.o -plugin %llvmshlibdir/LLVMgold.so %t.o %t2.o \
-; RUN: -plugin-opt=emit-llvm
-; RUN: llvm-dis %t3.o -o - | FileCheck %s
-
-$c1 = comdat any
-
-@v1 = weak_odr global i32 42, comdat($c1)
-define weak_odr i32 @f1(i8*) comdat($c1) {
-bb10:
- br label %bb11
-bb11:
- ret i32 42
-}
-
-@r11 = global i32* @v1
-@r12 = global i32 (i8*)* @f1
-
-@a11 = alias i32* @v1
-@a12 = alias bitcast (i32* @v1 to i16*)
-
-@a13 = alias i32 (i8*)* @f1
-@a14 = alias bitcast (i32 (i8*)* @f1 to i16*)
-@a15 = alias i16* @a14
-
-; CHECK: $c1 = comdat any
-; CHECK: $c2 = comdat any
-
-; CHECK: @v1 = weak_odr global i32 42, comdat($c1)
-
-; CHECK: @r11 = global i32* @v1{{$}}
-; CHECK: @r12 = global i32 (i8*)* @f1{{$}}
-
-; CHECK: @r21 = global i32* @v1{{$}}
-; CHECK: @r22 = global i32 (i8*)* @f1{{$}}
-
-; CHECK: @v11 = internal global i32 41, comdat($c2)
-
-; CHECK: @a11 = alias i32* @v1{{$}}
-; CHECK: @a12 = alias bitcast (i32* @v1 to i16*)
-
-; CHECK: @a13 = alias i32 (i8*)* @f1{{$}}
-; CHECK: @a14 = alias bitcast (i32 (i8*)* @f1 to i16*)
-
-; CHECK: @a21 = alias i32* @v11{{$}}
-; CHECK: @a22 = alias bitcast (i32* @v11 to i16*)
-
-; CHECK: @a23 = alias i32 (i8*)* @f12{{$}}
-; CHECK: @a24 = alias bitcast (i32 (i8*)* @f12 to i16*)
-
-; CHECK: define weak_odr protected i32 @f1(i8*) comdat($c1) {
-; CHECK-NEXT: bb10:
-; CHECK-NEXT: br label %bb11{{$}}
-; CHECK: bb11:
-; CHECK-NEXT: ret i32 42
-; CHECK-NEXT: }
-
-; CHECK: define internal i32 @f12(i8* %this) comdat($c2) {
-; CHECK-NEXT: bb20:
-; CHECK-NEXT: store i8* %this, i8** null
-; CHECK-NEXT: br label %bb21
-; CHECK: bb21:
-; CHECK-NEXT: ret i32 41
-; CHECK-NEXT: }
diff --git a/test/tools/gold/emit-llvm.ll b/test/tools/gold/emit-llvm.ll
deleted file mode 100644
index bfb90c4bc28a..000000000000
--- a/test/tools/gold/emit-llvm.ll
+++ /dev/null
@@ -1,92 +0,0 @@
-; RUN: llvm-as %s -o %t.o
-
-; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
-; RUN: --plugin-opt=emit-llvm \
-; RUN: --plugin-opt=generate-api-file \
-; RUN: -shared %t.o -o %t2.o
-; RUN: llvm-dis %t2.o -o - | FileCheck %s
-; RUN: FileCheck --check-prefix=API %s < %T/../apifile.txt
-
-; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
-; RUN: -m elf_x86_64 --plugin-opt=save-temps \
-; RUN: -shared %t.o -o %t3.o
-; RUN: llvm-dis %t3.o.bc -o - | FileCheck %s
-; RUN: llvm-dis %t3.o.opt.bc -o - | FileCheck --check-prefix=OPT %s
-; RUN: llvm-nm %t3.o.o | FileCheck --check-prefix=NM %s
-
-; RUN: rm -f %t4.o
-; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
-; RUN: -m elf_x86_64 --plugin-opt=disable-output \
-; RUN: -shared %t.o -o %t4.o
-; RUN: not test -a %t4.o
-
-; NM: T f3
-
-target triple = "x86_64-unknown-linux-gnu"
-
-@g7 = extern_weak global i32
-; CHECK-DAG: @g7 = extern_weak global i32
-
-@g8 = external global i32
-
-; CHECK: define internal void @f1()
-; OPT-NOT: @f1
-define hidden void @f1() {
- ret void
-}
-
-; CHECK: define hidden void @f2()
-; OPT: define hidden void @f2()
-define hidden void @f2() {
- ret void
-}
-
-@llvm.used = appending global [1 x i8*] [ i8* bitcast (void ()* @f2 to i8*)]
-
-; CHECK: define void @f3()
-; OPT: define void @f3()
-define void @f3() {
- call void @f4()
- ret void
-}
-
-; CHECK: define internal void @f4()
-; OPT-NOT: @f4
-define linkonce_odr void @f4() {
- ret void
-}
-
-; CHECK: define linkonce_odr void @f5()
-; OPT: define linkonce_odr void @f5()
-define linkonce_odr void @f5() {
- ret void
-}
-@g5 = global void()* @f5
-
-; CHECK: define internal void @f6() unnamed_addr
-; OPT: define internal void @f6() unnamed_addr
-define linkonce_odr void @f6() unnamed_addr {
- ret void
-}
-@g6 = global void()* @f6
-
-define i32* @f7() {
- ret i32* @g7
-}
-
-define i32* @f8() {
- ret i32* @g8
-}
-
-; API: f1 PREVAILING_DEF_IRONLY
-; API: f2 PREVAILING_DEF_IRONLY
-; API: f3 PREVAILING_DEF_IRONLY_EXP
-; API: f4 PREVAILING_DEF_IRONLY_EXP
-; API: f5 PREVAILING_DEF_IRONLY_EXP
-; API: f6 PREVAILING_DEF_IRONLY_EXP
-; API: f7 PREVAILING_DEF_IRONLY_EXP
-; API: f8 PREVAILING_DEF_IRONLY_EXP
-; API: g7 UNDEF
-; API: g8 UNDEF
-; API: g5 PREVAILING_DEF_IRONLY_EXP
-; API: g6 PREVAILING_DEF_IRONLY_EXP
diff --git a/test/tools/gold/linkonce-weak.ll b/test/tools/gold/linkonce-weak.ll
deleted file mode 100644
index a0cccea56cfe..000000000000
--- a/test/tools/gold/linkonce-weak.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llvm-as %s -o %t.o
-; RUN: llvm-as %p/Inputs/linkonce-weak.ll -o %t2.o
-
-; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
-; RUN: --plugin-opt=emit-llvm \
-; RUN: -shared %t.o %t2.o -o %t3.o
-; RUN: llvm-dis %t3.o -o - | FileCheck %s
-
-; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
-; RUN: --plugin-opt=emit-llvm \
-; RUN: -shared %t2.o %t.o -o %t3.o
-; RUN: llvm-dis %t3.o -o - | FileCheck %s
-
-define linkonce_odr void @f() {
- ret void
-}
-
-; Test that we get a weak_odr regardless of the order of the files
-; CHECK: define weak_odr void @f() {
diff --git a/test/tools/gold/lit.local.cfg b/test/tools/gold/lit.local.cfg
deleted file mode 100644
index a59549d47abe..000000000000
--- a/test/tools/gold/lit.local.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-if (not 'ld_plugin' in config.available_features or
- not 'X86' in config.root.targets or
- not 'PowerPC' in config.root.targets):
- config.unsupported = True
diff --git a/test/tools/gold/remarks.ll b/test/tools/gold/remarks.ll
deleted file mode 100644
index c4fa7f787f26..000000000000
--- a/test/tools/gold/remarks.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llvm-as %s -o %t.o
-
-; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
-; RUN: -plugin-opt=-pass-remarks=inline %t.o -o %t2.o 2>&1 | FileCheck %s
-
-; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
-; RUN: %t.o -o %t2.o 2>&1 | FileCheck -allow-empty --check-prefix=NO-REMARK %s
-
-
-; CHECK: f inlined into _start
-; NO-REMARK-NOT: inlined
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i32 @f() {
- ret i32 0
-}
-
-define i32 @_start() {
- %call = call i32 @f()
- ret i32 %call
-}
diff --git a/test/tools/llvm-cxxdump/trivial.test b/test/tools/llvm-cxxdump/trivial.test
index 450ed3b3780d..2c36620bff2f 100644
--- a/test/tools/llvm-cxxdump/trivial.test
+++ b/test/tools/llvm-cxxdump/trivial.test
@@ -56,3 +56,6 @@ ELF-I386: _ZTS1A: 1A
ELF-I386-NEXT: _ZTV1A[0]: 0
ELF-I386-NEXT: _ZTV1A[4]: _ZTI1A
ELF-I386-NEXT: _ZTV1A[8]: _ZN1A1fEv
+
+RUN: not llvm-cxxdump %t.blah 2>&1 | FileCheck --check-prefix=ENOENT %s
+ENOENT: {{.*}}.blah: {{[Nn]}}o such file or directory
diff --git a/test/tools/llvm-dwp/Inputs/simple/notypes/a.dwo b/test/tools/llvm-dwp/Inputs/simple/notypes/a.dwo
new file mode 100644
index 000000000000..7bdb2a7b9f82
--- /dev/null
+++ b/test/tools/llvm-dwp/Inputs/simple/notypes/a.dwo
Binary files differ
diff --git a/test/tools/llvm-dwp/Inputs/simple/notypes/b.dwo b/test/tools/llvm-dwp/Inputs/simple/notypes/b.dwo
new file mode 100644
index 000000000000..f41243dc722b
--- /dev/null
+++ b/test/tools/llvm-dwp/Inputs/simple/notypes/b.dwo
Binary files differ
diff --git a/test/tools/llvm-dwp/Inputs/simple/types/a.dwo b/test/tools/llvm-dwp/Inputs/simple/types/a.dwo
new file mode 100644
index 000000000000..1fc71ca8d175
--- /dev/null
+++ b/test/tools/llvm-dwp/Inputs/simple/types/a.dwo
Binary files differ
diff --git a/test/tools/llvm-dwp/Inputs/simple/types/b.dwo b/test/tools/llvm-dwp/Inputs/simple/types/b.dwo
new file mode 100644
index 000000000000..a69cd69eb995
--- /dev/null
+++ b/test/tools/llvm-dwp/Inputs/simple/types/b.dwo
Binary files differ
diff --git a/test/tools/llvm-dwp/Inputs/type_dedup/a.dwo b/test/tools/llvm-dwp/Inputs/type_dedup/a.dwo
new file mode 100644
index 000000000000..cfd54c5fbc1a
--- /dev/null
+++ b/test/tools/llvm-dwp/Inputs/type_dedup/a.dwo
Binary files differ
diff --git a/test/tools/llvm-dwp/Inputs/type_dedup/b.dwo b/test/tools/llvm-dwp/Inputs/type_dedup/b.dwo
new file mode 100644
index 000000000000..278369f80d66
--- /dev/null
+++ b/test/tools/llvm-dwp/Inputs/type_dedup/b.dwo
Binary files differ
diff --git a/test/tools/llvm-dwp/X86/lit.local.cfg b/test/tools/llvm-dwp/X86/lit.local.cfg
new file mode 100644
index 000000000000..05f8b38b3346
--- /dev/null
+++ b/test/tools/llvm-dwp/X86/lit.local.cfg
@@ -0,0 +1,4 @@
+if not 'X86' in config.root.targets:
+ config.unsupported = True
+
+config.suffixes = ['.test', '.cpp', '.m', '.s']
diff --git a/test/tools/llvm-dwp/X86/simple.test b/test/tools/llvm-dwp/X86/simple.test
new file mode 100644
index 000000000000..d7365c814435
--- /dev/null
+++ b/test/tools/llvm-dwp/X86/simple.test
@@ -0,0 +1,98 @@
+RUN: llvm-dwp %p/../Inputs/simple/notypes/a.dwo %p/../Inputs/simple/notypes/b.dwo -o %t
+RUN: llvm-dwarfdump %t | FileCheck --check-prefix=CHECK --check-prefix=NOTYP %s
+RUN: llvm-objdump -h %t | FileCheck --check-prefix=NOTYPOBJ %s
+RUN: llvm-dwp %p/../Inputs/simple/types/a.dwo %p/../Inputs/simple/types/b.dwo -o %t
+RUN: llvm-dwarfdump %t | FileCheck --check-prefix=CHECK --check-prefix=TYPES %s
+
+FIXME: For some reason, piping straight from llvm-dwp to llvm-dwarfdump doesn't behave well - looks like dwarfdump is reading/closes before dwp has finished.
+
+DWP from non-type-unit debug info for these two translation units:
+a.cpp:
+ struct foo { };
+ foo a;
+
+b.cpp:
+ struct bar { };
+ void b(bar) {
+ }
+
+CHECK-LABEL: .debug_abbrev.dwo contents:
+CHECK-LABEL: Abbrev table for offset:
+CHECK: 0x0000[[AAOFF:.*]]
+CHECK: DW_TAG_compile_unit
+CHECK: DW_TAG_variable
+CHECK: DW_TAG_structure_type
+CHECK-LABEL: Abbrev table for offset:
+CHECK: 0x0000[[BAOFF:.*]]
+CHECK: DW_TAG_compile_unit
+CHECK: DW_TAG_structure_type
+CHECK: DW_TAG_subprogram
+CHECK: DW_TAG_formal_parameter
+
+CHECK: .debug_info.dwo contents:
+CHECK: [[AOFF:0x[0-9a-f]*]]:
+CHECK-LABEL: Compile Unit: length = {{.*}} version = 0x0004 abbr_offset =
+CHECK: 0x[[AAOFF]] addr_size = 0x08 (next unit at [[BOFF:.*]])
+CHECK: DW_TAG_compile_unit
+CHECK: DW_AT_name {{.*}} "a.cpp"
+CHECK: DW_AT_GNU_dwo_id {{.*}} ([[DWOA:.*]])
+CHECK: DW_TAG_variable
+CHECK: DW_AT_name {{.*}} "a"
+CHECK: DW_TAG_structure_type
+NOTYP: DW_AT_name {{.*}} "foo"
+TYPES: DW_AT_signature {{.*}} ([[FOOSIG:.*]])
+
+CHECK: [[BOFF]]:
+CHECK-LABEL: Compile Unit: length = {{.*}} version = 0x0004 abbr_offset =
+CHECK: 0x[[BAOFF]] addr_size = 0x08 (next unit at [[XOFF:.*]])
+CHECK: DW_AT_name {{.*}} "b.cpp"
+CHECK: DW_AT_GNU_dwo_id {{.*}} ([[DWOB:.*]])
+CHECK: DW_TAG_structure_type
+NOTYP: DW_AT_name {{.*}} "bar"
+TYPES: DW_AT_signature {{.*}} ([[BARSIG:.*]])
+CHECK: DW_TAG_subprogram
+CHECK: DW_AT_name {{.*}} "b"
+CHECK: DW_TAG_formal_parameter
+
+NOTYP-NOT: .debug_types.dwo contents:
+TYPES-LABEL: .debug_types.dwo contents:
+TYPES: [[FOOUOFF:0x[0-9a-f]*]]:
+TYPES-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset =
+TYPES: 0x[[AAOFF]] addr_size = 0x08 type_signature = [[FOOSIG]] type_offset = 0x[[FOOOFF:.*]] (next unit at [[BARUOFF:.*]])
+TYPES: DW_TAG_type_unit
+TYPES: [[FOOOFF]]: DW_TAG_structure_type
+TYPES: DW_AT_name {{.*}} "foo"
+TYPES: [[BARUOFF]]:
+TYPES-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset =
+TYPES: 0x[[BAOFF]] addr_size = 0x08 type_signature = [[BARSIG]] type_offset = 0x001e (next unit at [[XUOFF:.*]])
+TYPES: DW_TAG_type_unit
+TYPES: 0x00000042: DW_TAG_structure_type
+TYPES: DW_AT_name {{.*}} "bar"
+
+CHECK-LABEL: .debug_cu_index contents:
+CHECK: Index Signature INFO ABBREV LINE STR_OFFSETS
+TYPES: 1 [[DWOA]] {{\[}}[[AOFF]], [[BOFF]]) [0x0000[[AAOFF]], 0x0000[[BAOFF]]) [0x00000000, 0x0000001a) [0x00000000, 0x00000010)
+TYPES: 3 [[DWOB]] {{\[}}[[BOFF]], [[XOFF]]) [0x0000[[BAOFF]], 0x00000099) [0x0000001a, 0x00000034) [0x00000010, 0x00000024)
+NOTYP: 3 [[DWOA]] {{\[}}[[AOFF]], [[BOFF]]) [0x0000[[AAOFF]], 0x0000[[BAOFF]]) [0x00000000, 0x00000011) [0x00000000, 0x00000010)
+NOTYP: 4 [[DWOB]] {{\[}}[[BOFF]], [[XOFF]]) [0x0000[[BAOFF]], 0x00000075) [0x00000011, 0x00000022) [0x00000010, 0x00000024)
+
+CHECK-LABEL: .debug_tu_index contents:
+NOTYP-NOT: Index
+TYPES: Index Signature TYPES ABBREV LINE STR_OFFSETS
+TYPES: 1 [[FOOSIG]] {{\[}}[[FOOUOFF]], [[BARUOFF]]) [0x0000[[AAOFF]], 0x0000[[BAOFF]]) [0x00000000, 0x0000001a) [0x00000000, 0x00000010)
+TYPES: 4 [[BARSIG]] {{\[}}[[BARUOFF]], [[XUOFF]]) [0x0000[[BAOFF]], 0x00000099) [0x0000001a, 0x00000034) [0x00000010, 0x00000024)
+
+Ensure we do not create a debug_tu_index, even an empty or malformed one.
+NOTYPOBJ-NOT: .debug_tu_index
+
+CHECK-LABEL: .debug_str.dwo contents:
+CHECK: "clang version
+CHECK: 0x[[ACPP:.*]]: "a.cpp"
+CHECK-NOT: "clang version
+CHECK: 0x[[BCPP:.*]]: "b.cpp"
+
+CHECK-LABEL: .debug_str_offsets.dwo contents:
+CHECK: : 00000000
+CHECK: : [[ACPP]]
+CHECK: : 00000000
+CHECK: : [[BCPP]]
diff --git a/test/tools/llvm-dwp/X86/type_dedup.test b/test/tools/llvm-dwp/X86/type_dedup.test
new file mode 100644
index 000000000000..3005705fcaa0
--- /dev/null
+++ b/test/tools/llvm-dwp/X86/type_dedup.test
@@ -0,0 +1,35 @@
+RUN: llvm-dwp %p/../Inputs/type_dedup/a.dwo %p/../Inputs/type_dedup/b.dwo -o %t
+RUN: llvm-dwarfdump %t | FileCheck %s
+
+a.cpp:
+ struct common { };
+ common a1;
+ struct adistinct { };
+ adistinct a2;
+
+b.cpp:
+ struct common { };
+ common b1;
+ struct bdistinct { };
+ bdistinct b2;
+
+CHECK-LABEL: .debug_types.dwo contents:
+CHECK: [[COMMONUOFF:0x[0-9a-f]*]]:
+CHECK-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset =
+CHECK: 0x0000 addr_size = 0x08 type_signature = [[COMMONSIG:0x[0-9a-f]*]] type_offset = 0x[[COMMONOFF:.*]] (next unit at [[AUOFF:.*]])
+CHECK: DW_TAG_type_unit
+CHECK: [[COMMONOFF]]: DW_TAG_structure_type
+CHECK: DW_AT_name {{.*}} "common"
+CHECK: [[AUOFF]]:
+CHECK-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset =
+CHECK: 0x0000 addr_size = 0x08 type_signature = [[ASIG:0x[0-9a-f]*]] type_offset = 0x[[AOFF:.*]] (next unit at [[BUOFF:.*]])
+CHECK: DW_TAG_type_unit
+CHECK: 0x00000042: DW_TAG_structure_type
+CHECK: DW_AT_name {{.*}} "adistinct"
+CHECK: [[BUOFF]]:
+CHECK-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset =
+CHECK: 0x{{.*}} addr_size = 0x08 type_signature = [[BSIG:0x[0-9a-f]*]] type_offset = 0x[[BOFF:.*]] (next unit at [[XUOFF:.*]])
+CHECK: DW_TAG_type_unit
+CHECK: 0x00000066: DW_TAG_structure_type
+CHECK: DW_AT_name {{.*}} "bdistinct"
+CHECK-NOT: Type Unit
diff --git a/test/tools/llvm-lto/Inputs/thinlto.ll b/test/tools/llvm-lto/Inputs/thinlto.ll
new file mode 100644
index 000000000000..4e0840f3691e
--- /dev/null
+++ b/test/tools/llvm-lto/Inputs/thinlto.ll
@@ -0,0 +1,4 @@
+define void @g() {
+entry:
+ ret void
+}
diff --git a/test/tools/llvm-lto/thinlto.ll b/test/tools/llvm-lto/thinlto.ll
new file mode 100644
index 000000000000..5bd9dd975d2d
--- /dev/null
+++ b/test/tools/llvm-lto/thinlto.ll
@@ -0,0 +1,24 @@
+; Test combined function index generation for ThinLTO via llvm-lto.
+; RUN: llvm-as -function-summary %s -o %t.o
+; RUN: llvm-as -function-summary %p/Inputs/thinlto.ll -o %t2.o
+; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o
+; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
+; RUN: not test -e %t3
+
+; COMBINED: <MODULE_STRTAB_BLOCK
+; COMBINED-NEXT: <ENTRY {{.*}} record string = '{{.*}}thinlto.ll.tmp{{.*}}.o'
+; COMBINED-NEXT: <ENTRY {{.*}} record string = '{{.*}}thinlto.ll.tmp{{.*}}.o'
+; COMBINED-NEXT: </MODULE_STRTAB_BLOCK
+; COMBINED-NEXT: <FUNCTION_SUMMARY_BLOCK
+; COMBINED-NEXT: <COMBINED_ENTRY
+; COMBINED-NEXT: <COMBINED_ENTRY
+; COMBINED-NEXT: </FUNCTION_SUMMARY_BLOCK
+; COMBINED-NEXT: <VALUE_SYMTAB
+; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
+; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
+; COMBINED-NEXT: </VALUE_SYMTAB
+
+define void @f() {
+entry:
+ ret void
+}
diff --git a/test/tools/llvm-mc/basic.test b/test/tools/llvm-mc/basic.test
new file mode 100644
index 000000000000..edac65257e55
--- /dev/null
+++ b/test/tools/llvm-mc/basic.test
@@ -0,0 +1,3 @@
+# RUN: not llvm-mc %t.blah -o %t2 2>&1 | FileCheck --check-prefix=ENOENT %s
+
+# ENOENT: {{.*}}.blah: {{[Nn]}}o such file or directory
diff --git a/test/tools/llvm-mc/fatal_warnings.test b/test/tools/llvm-mc/fatal_warnings.test
new file mode 100644
index 000000000000..e9405ada7a04
--- /dev/null
+++ b/test/tools/llvm-mc/fatal_warnings.test
@@ -0,0 +1,4 @@
+# RUN: not llvm-mc --fatal-warnings %s 2>&1 | FileCheck %s
+
+# CHECK: error: .warning directive invoked in source file
+.warning
diff --git a/test/tools/llvm-mc/line_end_with_space.test b/test/tools/llvm-mc/line_end_with_space.test
index 2ce313990af0..673d05a68067 100644
--- a/test/tools/llvm-mc/line_end_with_space.test
+++ b/test/tools/llvm-mc/line_end_with_space.test
@@ -1,2 +1 @@
RUN: llvm-mc -disassemble %s
- \ No newline at end of file
diff --git a/test/tools/llvm-mc/lit.local.cfg b/test/tools/llvm-mc/lit.local.cfg
new file mode 100644
index 000000000000..a1bda0f1fdc8
--- /dev/null
+++ b/test/tools/llvm-mc/lit.local.cfg
@@ -0,0 +1,4 @@
+# Requires a non-empty default triple for these tests
+if 'default_triple' not in config.available_features:
+ config.unsupported = True
+
diff --git a/test/tools/llvm-mc/no_warnings.test b/test/tools/llvm-mc/no_warnings.test
new file mode 100644
index 000000000000..973dc271a581
--- /dev/null
+++ b/test/tools/llvm-mc/no_warnings.test
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --no-warn %s 2>&1 | FileCheck %s
+
+# CHECK-NOT: warning:
+.warning
diff --git a/test/tools/llvm-nm/X86/IRobj.test b/test/tools/llvm-nm/X86/IRobj.test
new file mode 100644
index 000000000000..e6fa517d00e8
--- /dev/null
+++ b/test/tools/llvm-nm/X86/IRobj.test
@@ -0,0 +1,11 @@
+# RUN: llvm-nm -format darwin %p/Inputs/test.IRobj-x86_64 | FileCheck %s
+
+# CHECK: ---------------- (LTO,RODATA) external _global_const
+# CHECK: ---------------- (LTO,DATA) external _global_data
+# CHECK: ---------------- (LTO,CODE) external _global_func
+# CHECK: ---------------- (LTO,RODATA) private external _hidden_const
+# CHECK: ---------------- (LTO,DATA) private external _hidden_data
+# CHECK: ---------------- (LTO,CODE) private external _hidden_func
+# CHECK: ---------------- (LTO,RODATA) non-external _static_const
+# CHECK: ---------------- (LTO,DATA) non-external _static_data
+# CHECK: ---------------- (LTO,CODE) non-external _static_func
diff --git a/test/tools/llvm-nm/X86/Inputs/hello.obj.macho-x86_64 b/test/tools/llvm-nm/X86/Inputs/hello.obj.macho-x86_64
new file mode 100644
index 000000000000..2b59a1cfc63b
--- /dev/null
+++ b/test/tools/llvm-nm/X86/Inputs/hello.obj.macho-x86_64
Binary files differ
diff --git a/test/tools/llvm-nm/X86/Inputs/test.IRobj-x86_64 b/test/tools/llvm-nm/X86/Inputs/test.IRobj-x86_64
new file mode 100644
index 000000000000..8bfa857fa7d1
--- /dev/null
+++ b/test/tools/llvm-nm/X86/Inputs/test.IRobj-x86_64
Binary files differ
diff --git a/test/tools/llvm-nm/X86/externalonly.test b/test/tools/llvm-nm/X86/externalonly.test
new file mode 100644
index 000000000000..c37412987865
--- /dev/null
+++ b/test/tools/llvm-nm/X86/externalonly.test
@@ -0,0 +1,4 @@
+# RUN: llvm-nm -g %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s
+
+# CHECK-NOT: EH_frame0
+# CHECK: _main
diff --git a/test/tools/llvm-nm/X86/groupingflags.test b/test/tools/llvm-nm/X86/groupingflags.test
new file mode 100644
index 000000000000..a1a258506e97
--- /dev/null
+++ b/test/tools/llvm-nm/X86/groupingflags.test
@@ -0,0 +1,5 @@
+# RUN: llvm-nm -gjp %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s
+
+# CHECK: _main
+# CHECK: _main.eh
+# CHECK: _printf
diff --git a/test/tools/llvm-nm/X86/posixMachO.test b/test/tools/llvm-nm/X86/posixMachO.test
new file mode 100644
index 000000000000..a0d114237ce0
--- /dev/null
+++ b/test/tools/llvm-nm/X86/posixMachO.test
@@ -0,0 +1,7 @@
+# RUN: llvm-nm -P %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s
+
+# CHECK: EH_frame0 s 104 0
+# CHECK: L_.str s 59 0
+# CHECK: _main T 0 0
+# CHECK: _main.eh S 128 0
+# CHECK: _printf U 0 0
diff --git a/test/tools/llvm-nm/lit.local.cfg b/test/tools/llvm-nm/lit.local.cfg
new file mode 100644
index 000000000000..c8625f4d9d24
--- /dev/null
+++ b/test/tools/llvm-nm/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+ config.unsupported = True
diff --git a/test/tools/llvm-objdump/AArch64/elf-aarch64-mapping-symbols.test b/test/tools/llvm-objdump/AArch64/elf-aarch64-mapping-symbols.test
new file mode 100644
index 000000000000..cb9560d74dfb
--- /dev/null
+++ b/test/tools/llvm-objdump/AArch64/elf-aarch64-mapping-symbols.test
@@ -0,0 +1,30 @@
+# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-freebsd %s -o %t
+# RUN: llvm-objdump -d %t | FileCheck %s
+
+.section .mysection,"ax",@progbits
+.globl _start
+_start:
+ adr x1,msg
+msg: .asciz "Hello, world\n"
+msgend:
+
+.section .myothersection,"ax",@progbits
+ adrp x1,mystr
+mystr:
+ .asciz "blah"
+ .size mystr, 4
+
+# CHECK: Disassembly of section .mysection:
+# CHECK: _start:
+# CHECK: 0: 21 00 00 10 adr x1, #4
+# CHECK: msg:
+# CHECK: 4: 48 65 6c 6c .word
+# CHECK: 8: 6f 2c 20 77 .word
+# CHECK: c: 6f 72 6c 64 .word
+# CHECK: 10: 0a 00 .short
+# CHECK: Disassembly of section .myothersection:
+# CHECK: $x.2:
+# CHECK: 0: 01 00 00 90 adrp x1, #0
+# CHECK: mystr:
+# CHECK: 4: 62 6c 61 68 .word
+# CHECK: 8: 00 .byte
diff --git a/test/tools/llvm-objdump/Inputs/eh_frame.macho-arm64 b/test/tools/llvm-objdump/Inputs/eh_frame.macho-arm64
new file mode 100644
index 000000000000..1c5413230d7a
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/eh_frame.macho-arm64
Binary files differ
diff --git a/test/tools/llvm-objdump/Inputs/libbogus1.a b/test/tools/llvm-objdump/Inputs/libbogus1.a
new file mode 100644
index 000000000000..510c1455527d
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/libbogus1.a
@@ -0,0 +1,13 @@
+!<arch>
+hello.c 1444941273 124 0 100644 10% `
+#include <stdio.h>
+#include <stdlib.h>
+int
+main()
+{
+ printf("Hello World\n");
+ return EXIT_SUCCESS;
+}
+foo.c 1444941645 124 0 100644 1% `
+void foo(void){}
+
diff --git a/test/tools/llvm-objdump/Inputs/libbogus2.a b/test/tools/llvm-objdump/Inputs/libbogus2.a
new file mode 100644
index 000000000000..2ccb7f31c09d
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/libbogus2.a
@@ -0,0 +1,13 @@
+!<arch>
+hello.c 1444941273 124 0 100644 102 `
+#include <stdio.h>
+#include <stdlib.h>
+int
+main()
+{
+ printf("Hello World\n");
+ return EXIT_SUCCESS;
+}
+foo.c 1444941645 124 0 100644 1% `
+void foo(void){}
+
diff --git a/test/tools/llvm-objdump/Inputs/libbogus3.a b/test/tools/llvm-objdump/Inputs/libbogus3.a
new file mode 100644
index 000000000000..f15a7329f9f4
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/libbogus3.a
@@ -0,0 +1,16 @@
+!<arch>
+hello.c 1444941273 124 0 100644 102 `
+#include <stdio.h>
+#include <stdlib.h>
+int
+main()
+{
+ printf("Hello World\n");
+ return EXIT_SUCCESS;
+}
+foo.c 1444941645 124 0 100644 171 `
+void foo(void){}
+
+bar.c 1445026190 124 0 100644 17 `
+void foo(void){}
+
diff --git a/test/tools/llvm-objdump/Inputs/section-filter.obj b/test/tools/llvm-objdump/Inputs/section-filter.obj
new file mode 100644
index 000000000000..7dc5dae26b79
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/section-filter.obj
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/Inputs/disassemble-data.obj b/test/tools/llvm-objdump/X86/Inputs/disassemble-data.obj
new file mode 100644
index 000000000000..6381120a1c83
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/disassemble-data.obj
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386 b/test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386
new file mode 100755
index 000000000000..c0fbc8803630
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/Inputs/internal.exe.coff-x86_64 b/test/tools/llvm-objdump/X86/Inputs/internal.exe.coff-x86_64
new file mode 100755
index 000000000000..0c38deb7cf0b
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/internal.exe.coff-x86_64
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/00000031.a b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/00000031.a
new file mode 100644
index 000000000000..b784d8163f54
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/00000031.a
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0001.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0001.macho
new file mode 100644
index 000000000000..d81f9f00f4a7
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0001.macho
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0006.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0006.macho
new file mode 100644
index 000000000000..13a4e9aea7fa
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0006.macho
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0010.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0010.macho
new file mode 100644
index 000000000000..4c7e78d93b62
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0010.macho
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0040.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0040.macho
new file mode 100644
index 000000000000..f0765a4ce51a
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0040.macho
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0080.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0080.macho
new file mode 100644
index 000000000000..53e3a97b4aa9
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0080.macho
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0261.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0261.macho
new file mode 100644
index 000000000000..a573da3b2fbc
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0261.macho
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0337.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0337.macho
new file mode 100644
index 000000000000..cc438414f112
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0337.macho
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/coff-dis-internal.test b/test/tools/llvm-objdump/X86/coff-dis-internal.test
new file mode 100644
index 000000000000..530dadc8142e
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/coff-dis-internal.test
@@ -0,0 +1,3 @@
+RUN: llvm-objdump -d %p/Inputs/internal.exe.coff-x86_64 | FileCheck %s
+
+CHECK: callq {{.*}} <foo>
diff --git a/test/tools/llvm-objdump/X86/coff-disassemble-export.test b/test/tools/llvm-objdump/X86/coff-disassemble-export.test
new file mode 100644
index 000000000000..2f0b211815cf
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/coff-disassemble-export.test
@@ -0,0 +1,8 @@
+// RUN: llvm-objdump -d %p/Inputs/disassemble.dll.coff-i386 | \
+// RUN: FileCheck %s
+
+// CHECK-LABEL: g:
+// CHECK: calll 8 <f>
+
+// CHECK-LABEL: f:
+// CHECK: calll -24 <g>
diff --git a/test/tools/llvm-objdump/X86/disassemble-data.test b/test/tools/llvm-objdump/X86/disassemble-data.test
new file mode 100644
index 000000000000..e9c4e7e1e5f2
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/disassemble-data.test
@@ -0,0 +1,4 @@
+// This test checks that -D disassembles from a data section
+// RUN: llvm-objdump -D %p/Inputs/disassemble-data.obj | FileCheck %s
+
+// CHECK: Disassembly of section .data: \ No newline at end of file
diff --git a/test/tools/llvm-objdump/X86/macho-symbol-table.test b/test/tools/llvm-objdump/X86/macho-symbol-table.test
index 826d78af68b1..19c619e73d07 100644
--- a/test/tools/llvm-objdump/X86/macho-symbol-table.test
+++ b/test/tools/llvm-objdump/X86/macho-symbol-table.test
@@ -1,8 +1,8 @@
RUN: llvm-objdump -macho -t %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s
CHECK: SYMBOL TABLE:
-CHECK: 000000000000003b l F __TEXT,__cstring L_.str
-CHECK: 0000000000000068 l F __TEXT,__eh_frame EH_frame0
+CHECK: 000000000000003b l __TEXT,__cstring L_.str
+CHECK: 0000000000000068 l __TEXT,__eh_frame EH_frame0
CHECK: 0000000000000000 g F __TEXT,__text _main
-CHECK: 0000000000000080 g F __TEXT,__eh_frame _main.eh
+CHECK: 0000000000000080 g __TEXT,__eh_frame _main.eh
CHECK: 0000000000000000 *UND* _printf
diff --git a/test/tools/llvm-objdump/X86/malformed-machos.test b/test/tools/llvm-objdump/X86/malformed-machos.test
new file mode 100644
index 000000000000..a47e43443c5d
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/malformed-machos.test
@@ -0,0 +1,41 @@
+// These test checks that llvm-objdump will not crash with malformed Mach-O
+// files. So the check line is not all that important but the bug fixes to
+// make sure llvm-objdump is robust is what matters.
+# RUN: llvm-objdump -macho -objc-meta-data \
+# RUN: %p/Inputs/malformed-machos/mem-crup-0001.macho \
+# RUN: | FileCheck -check-prefix=m0001 %s
+
+# m0001: (method_t extends past the end of the section)
+
+# RUN: llvm-objdump -macho -objc-meta-data \
+# RUN: %p/Inputs/malformed-machos/mem-crup-0006.macho \
+# RUN: | FileCheck -check-prefix=m0006 %s
+
+# m0006: ivarLayout 0x8
+
+# RUN: llvm-objdump -macho -objc-meta-data \
+# RUN: %p/Inputs/malformed-machos/mem-crup-0006.macho \
+# RUN: | FileCheck -check-prefix=m0010 %s
+
+# m0010: 00000000000010e0 0x10e8 _OBJC_CLASS_
+
+# RUN: llvm-objdump -macho -objc-meta-data \
+# RUN: %p/Inputs/malformed-machos/mem-crup-0040.macho \
+# RUN: | FileCheck -check-prefix=m0040 %s
+
+# m0040: 00000000000010a0 0xf39 -[tiny_dylib init]
+
+# RUN: llvm-objdump -macho -objc-meta-data \
+# RUN: %p/Inputs/malformed-machos/mem-crup-0080.macho \
+# RUN: | FileCheck -check-prefix=m0080 %s
+
+# m0080: data 0xf960000 (struct class_ro_t *)
+
+# RUN: llvm-objdump -macho -objc-meta-data \
+# RUN: %p/Inputs/malformed-machos/mem-crup-0261.macho
+
+# RUN: llvm-objdump -macho -disassemble \
+# RUN: %p/Inputs/malformed-machos/mem-crup-0337.macho \
+# RUN: | FileCheck -check-prefix=m0337 %s
+
+# m0337: subq $16, %rsp
diff --git a/test/tools/llvm-objdump/eh_frame-arm64.test b/test/tools/llvm-objdump/eh_frame-arm64.test
new file mode 100644
index 000000000000..f25e035a266e
--- /dev/null
+++ b/test/tools/llvm-objdump/eh_frame-arm64.test
@@ -0,0 +1,23 @@
+# RUN: llvm-objdump -unwind-info %p/Inputs/eh_frame.macho-arm64 2>/dev/null | FileCheck %s
+
+# CHECK: Contents of __eh_frame section:
+# CHECK: CIE:
+# CHECK: Length: 16
+# CHECK: CIE ID: 0
+# CHECK: Version: 1
+# CHECK: Augmentation String: zR
+# CHECK: Code Alignment Factor: 1
+# CHECK: Data Alignment Factor: -8
+# CHECK: Return Address Register: 30
+# CHECK: Augmentation Data Length: 1
+# CHECK: FDE Address Pointer Encoding: 16
+# CHECK: Instructions:
+# CHECK: 0c 1f 00
+# CHECK: FDE:
+# CHECK: Length: 32
+# CHECK: CIE Offset: 0
+# CHECK: PC Begin: ffffffffffffffe4
+# CHECK: PC Range: 0000000000000020
+# CHECK: Augmentation Data Length: 0
+# CHECK: Instructions:
+# CHECK: 48 0e 10 9e 01 9d 02 00 00 00 00
diff --git a/test/tools/llvm-objdump/malformed-archives.test b/test/tools/llvm-objdump/malformed-archives.test
new file mode 100644
index 000000000000..e0f165d37ed7
--- /dev/null
+++ b/test/tools/llvm-objdump/malformed-archives.test
@@ -0,0 +1,20 @@
+// These test checks that llvm-objdump will not crash with malformed Archive
+// files. So the check line is not all that important but the bug fixes to
+// make sure llvm-objdump is robust is what matters.
+# RUN: llvm-objdump -macho -archive-headers \
+# RUN: %p/Inputs/libbogus1.a \
+# RUN: 2>&1 | FileCheck -check-prefix=bogus1 %s
+
+# bogus1: Invalid data was encountered while parsing the file
+
+# RUN: not llvm-objdump -macho -archive-headers \
+# RUN: %p/Inputs/libbogus2.a \
+# RUN: 2>&1 | FileCheck -check-prefix=bogus2 %s
+
+# bogus2: LLVM ERROR: Invalid data was encountered while parsing the file
+
+# RUN: not llvm-objdump -macho -archive-headers \
+# RUN: %p/Inputs/libbogus3.a \
+# RUN: 2>&1 | FileCheck -check-prefix=bogus3 %s
+
+# bogus3: LLVM ERROR: Invalid data was encountered while parsing the file
diff --git a/test/tools/llvm-objdump/section-filter.test b/test/tools/llvm-objdump/section-filter.test
new file mode 100644
index 000000000000..9c7ab31b0d72
--- /dev/null
+++ b/test/tools/llvm-objdump/section-filter.test
@@ -0,0 +1,7 @@
+// This test checks that --section works correctly
+// RUN: llvm-objdump -h %p/Inputs/section-filter.obj -j=.text \
+// RUN: --section=.bss | FileCheck %s
+
+# CHECK: .text
+# CHECK-NOT: .data
+# CHECK: .bss \ No newline at end of file
diff --git a/test/tools/llvm-pdbdump/regex-filter.test b/test/tools/llvm-pdbdump/regex-filter.test
index 8b9eca63f585..cfc910e07171 100644
--- a/test/tools/llvm-pdbdump/regex-filter.test
+++ b/test/tools/llvm-pdbdump/regex-filter.test
@@ -10,6 +10,10 @@
; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=EXCLUDE_WHOLE_CLASS %s
; RUN: llvm-pdbdump -symbols -globals -exclude-compilands="FilterTest.obj" \
; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=EXCLUDE_COMPILAND %s
+; RUN: llvm-pdbdump -types -include-types="FilterTestClass" \
+; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=INCLUDE_ONLY_TYPES %s
+; RUN: llvm-pdbdump -types -symbols -globals -include-symbols="[[:<:]](IntGlobalVar|DoubleGlobalVar)[[:>:]]" \
+; RUN: %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=INCLUDE_ONLY_VARS %s
; NO_FILTER: ---TYPES---
; NO_FILTER: Enums:
@@ -73,3 +77,19 @@
; EXCLUDE_COMPILAND-NOT: __cdecl main
; EXCLUDE_COMPILAND: * Linker *
; EXCLUDE_COMPILAND: ---GLOBALS---
+
+; Everything but types are displayed normally. But FilterTestClass is
+; the only type that should be displayed.
+; INCLUDE_ONLY_TYPES: ---TYPES---
+; INCLUDE_ONLY_TYPES-NOT: GlobalTypedef
+; INCLUDE_ONLY_TYPES: class FilterTestClass
+
+; We should only see DoubleGlobalVar and IntGlobalVar. This means that even
+; variables printed in class definitions should be filtered out.
+; INCLUDE_ONLY_VARS: ---TYPES---
+; INCLUDE_ONLY_VARS: class FilterTestClass
+; INCLUDE_ONLY_VARS-NOT: IntMemberVar
+; INCLUDE_ONLY_VARS-NOT: IntDoubleVar
+; INCLUDE_ONLY_VARS: ---GLOBALS---
+; INCLUDE_ONLY_VARS: DoubleGlobalVar
+; INCLUDE_ONLY_VARS: IntGlobalVar
diff --git a/test/tools/llvm-profdata/Inputs/basic.proftext b/test/tools/llvm-profdata/Inputs/basic.proftext
new file mode 100644
index 000000000000..db934da7c07a
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/basic.proftext
@@ -0,0 +1,19 @@
+foo
+10
+2
+499500
+179900
+
+main
+16650
+4
+1
+1000
+1000000
+499500
+
+foo2
+10
+2
+500500
+180100
diff --git a/test/tools/llvm-profdata/Inputs/c-general.profraw b/test/tools/llvm-profdata/Inputs/c-general.profraw
index e8cef21de5f2..a4d94858a9e7 100644
--- a/test/tools/llvm-profdata/Inputs/c-general.profraw
+++ b/test/tools/llvm-profdata/Inputs/c-general.profraw
Binary files differ
diff --git a/test/tools/llvm-profdata/Inputs/compat.profdata.v2 b/test/tools/llvm-profdata/Inputs/compat.profdata.v2
new file mode 100644
index 000000000000..969867584a99
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/compat.profdata.v2
Binary files differ
diff --git a/test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov b/test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov
new file mode 100644
index 000000000000..0099280af2f9
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov
Binary files differ
diff --git a/test/tools/llvm-profdata/Inputs/inline-samples.afdo b/test/tools/llvm-profdata/Inputs/inline-samples.afdo
new file mode 100644
index 000000000000..f8680d86fcb4
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/inline-samples.afdo
@@ -0,0 +1,20 @@
+main:366846:0
+ 2.1: 60401
+ 4: 0
+ 3: 0
+ 0: 0
+ 2.3: 60401
+ 1: 0
+ 2.3: _Z3fool:246044
+ 1.2: 39280
+ 1.4: 46871
+ 1: 60401
+ 1.3: _Z3bari:0
+ 1.2: 0
+ 1.1: 0
+ 1.8: _Z3bari:0
+ 1.2: 0
+ 1.1: 0
+ 1.7: _Z3bari:99492
+ 1.2: 46732
+ 1.1: 52760
diff --git a/test/tools/llvm-profdata/Inputs/overflow-instr.proftext b/test/tools/llvm-profdata/Inputs/overflow-instr.proftext
new file mode 100644
index 000000000000..48d1db88bcdf
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/overflow-instr.proftext
@@ -0,0 +1,6 @@
+overflow
+1
+3
+18446744073709551615
+9223372036854775808
+18446744073709551615
diff --git a/test/tools/llvm-profdata/Inputs/overflow-sample.proftext b/test/tools/llvm-profdata/Inputs/overflow-sample.proftext
new file mode 100644
index 000000000000..a5486bbd819c
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/overflow-sample.proftext
@@ -0,0 +1,7 @@
+_Z3bari:18446744073709551615:1000
+ 1: 18446744073709551615
+_Z3fooi:18446744073709551615:1000
+ 1: 18446744073709551615
+main:1000:0
+ 1: 500 _Z3bari:18446744073709551615
+ 2: 500 _Z3fooi:18446744073709551615
diff --git a/test/tools/llvm-profdata/Inputs/sample-profile.proftext b/test/tools/llvm-profdata/Inputs/sample-profile.proftext
index 9dc6d4310da9..54c821243afa 100644
--- a/test/tools/llvm-profdata/Inputs/sample-profile.proftext
+++ b/test/tools/llvm-profdata/Inputs/sample-profile.proftext
@@ -1,12 +1,12 @@
_Z3bari:20301:1437
-1: 1437
+ 1: 1437
_Z3fooi:7711:610
-1: 610
+ 1: 610
main:184019:0
-4: 534
-4.2: 534
-5: 1075
-5.1: 1075
-6: 2080
-7: 534
-9: 2064 _Z3bari:1471 _Z3fooi:631
+ 4: 534
+ 4.2: 534
+ 5: 1075
+ 5.1: 1075
+ 6: 2080
+ 7: 534
+ 9: 2064 _Z3bari:1471 _Z3fooi:631
diff --git a/test/tools/llvm-profdata/Inputs/text-format-errors.text.bin b/test/tools/llvm-profdata/Inputs/text-format-errors.text.bin
new file mode 100644
index 000000000000..9e2e3f5c2b83
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/text-format-errors.text.bin
@@ -0,0 +1 @@
+ÿåбôÉ”¨ \ No newline at end of file
diff --git a/test/tools/llvm-profdata/Inputs/vp-malform.proftext b/test/tools/llvm-profdata/Inputs/vp-malform.proftext
new file mode 100644
index 000000000000..2db3096cecf1
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/vp-malform.proftext
@@ -0,0 +1,42 @@
+foo
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+999000
+359800
+
+foo2
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+1001000
+360200
+
+main
+# Func Hash:
+16650
+# Num Counters:
+4
+# Counter Values:
+2
+2000
+2000000
+999000
+# NumValueKinds
+1
+# Value Kind IPVK_IndirectCallTarget
+0
+# NumSites
+3
+# Values for each site
+0
+2
+# !!!! Malformed Value/Count pair
+foo+100
+foo2:1000
+1
+foo2:20000
diff --git a/test/tools/llvm-profdata/Inputs/vp-malform2.proftext b/test/tools/llvm-profdata/Inputs/vp-malform2.proftext
new file mode 100644
index 000000000000..02ed5a968d80
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/vp-malform2.proftext
@@ -0,0 +1,32 @@
+foo
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+999000
+359800
+
+main
+# Func Hash:
+16650
+# Num Counters:
+4
+# Counter Values:
+2
+2000
+2000000
+999000
+# NumValueKinds
+1
+# Value Kind IPVK_IndirectCallTarget
+0
+# NumSites
+3
+# Values for each site
+0
+# !! Malformed value site, missing one value
+2
+foo:100
+1
+foo2:20000
diff --git a/test/tools/llvm-profdata/Inputs/vp-truncate.proftext b/test/tools/llvm-profdata/Inputs/vp-truncate.proftext
new file mode 100644
index 000000000000..98b4b572b65c
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/vp-truncate.proftext
@@ -0,0 +1,36 @@
+foo
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+999000
+359800
+
+foo2
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+1001000
+360200
+
+main
+# Func Hash:
+16650
+# Num Counters:
+4
+# Counter Values:
+2
+2000
+2000000
+999000
+# NumValueKinds
+1
+# Value Kind IPVK_IndirectCallTarget
+0
+# NumSites
+3
+# Values for each site
+0
diff --git a/test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata b/test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata
new file mode 100644
index 000000000000..4ed07660f654
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata
Binary files differ
diff --git a/test/tools/llvm-profdata/Inputs/weight-instr-foo.profdata b/test/tools/llvm-profdata/Inputs/weight-instr-foo.profdata
new file mode 100644
index 000000000000..581ef39a55b2
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/weight-instr-foo.profdata
Binary files differ
diff --git a/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext b/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext
new file mode 100644
index 000000000000..a910f745e6c7
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext
@@ -0,0 +1,8 @@
+bar:1772037:35370
+ 17: 35370
+ 18: 35370
+ 19: 7005
+ 20: 29407
+ 21: 12170
+ 23: 18150 bar:19829
+ 25: 36666
diff --git a/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext b/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext
new file mode 100644
index 000000000000..155ec5d00315
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext
@@ -0,0 +1,8 @@
+foo:1763288:35327
+ 7: 35327
+ 8: 35327
+ 9: 6930
+ 10: 29341
+ 11: 11906
+ 13: 18185 foo:19531
+ 15: 36458
diff --git a/test/tools/llvm-profdata/c-general.test b/test/tools/llvm-profdata/c-general.test
index 01435303d445..efa9bfa18d73 100644
--- a/test/tools/llvm-profdata/c-general.test
+++ b/test/tools/llvm-profdata/c-general.test
@@ -6,7 +6,7 @@ REGENERATE: $ SRC=path/to/llvm
REGENERATE: $ CFE=$SRC/tools/clang
REGENERATE: $ TESTDIR=$SRC/test/tools/llvm-profdata
REGENERATE: $ CFE_TESTDIR=$CFE/test/Profile
-REGENERATE: $ clang -o a.out -fprofile-instr-generate $CFE_TESTDIR/test/Profile/c-general.c
+REGENERATE: $ clang -o a.out -fprofile-instr-generate $CFE_TESTDIR/c-general.c
REGENERATE: $ LLVM_PROFILE_FILE=$TESTDIR/Inputs/c-general.profraw ./a.out
RUN: llvm-profdata show %p/Inputs/c-general.profraw -o - | FileCheck %s -check-prefix=CHECK
@@ -14,11 +14,11 @@ RUN: llvm-profdata show %p/Inputs/c-general.profraw -o - --function=switches | F
SWITCHES-LABEL: Counters:
SWITCHES-NEXT: switches:
-SWITCHES-NEXT: Hash: 0x0000000000000013
+SWITCHES-NEXT: Hash: 0x2618e4f23f2e8daa
SWITCHES-NEXT: Counters: 19
SWITCHES-NEXT: Function count: 1
SWITCHES-LABEL: Functions shown: 1
-CHECK-LABEL: Total functions: 11
+CHECK-LABEL: Total functions: 12
CHECK-NEXT: Maximum function count: 1
CHECK-NEXT: Maximum internal block count: 100
diff --git a/test/tools/llvm-profdata/compat.proftext b/test/tools/llvm-profdata/compat.proftext
index 14da3374b5e9..139202d162e6 100644
--- a/test/tools/llvm-profdata/compat.proftext
+++ b/test/tools/llvm-profdata/compat.proftext
@@ -45,3 +45,23 @@ large_numbers
# SUMMARY: Total functions: 3
# SUMMARY: Maximum function count: 2305843009213693952
# SUMMARY: Maximum internal block count: 1152921504606846976
+
+# RUN: llvm-profdata show %S/Inputs/compat.profdata.v2 -all-functions --counts | FileCheck %s -check-prefix=FORMATV2
+
+# FORMATV2: Counters:
+# FORMATV2-NEXT: foo:
+# FORMATV2-NEXT: Hash: 0x000000000000000a
+# FORMATV2-NEXT: Counters: 2
+# FORMATV2-NEXT: Function count: 499500
+# FORMATV2-NEXT: Block counts: [179900]
+# FORMATV2-NEXT: main:
+# FORMATV2-NEXT: Hash: 0x000000000000410a
+# FORMATV2-NEXT: Counters: 4
+# FORMATV2-NEXT: Function count: 1
+# FORMATV2-NEXT: Block counts: [1000, 1000000, 499500]
+# FORMATV2-NEXT: Functions shown: 2
+# FORMATV2-NEXT: Total functions: 2
+# FORMATV2-NEXT: Maximum function count: 499500
+# FORMATV2-NEXT: Maximum internal block count: 1000000
+
+
diff --git a/test/tools/llvm-profdata/count-mismatch.proftext b/test/tools/llvm-profdata/count-mismatch.proftext
index 1a2e73fbffdb..b42b11dbcf9d 100644
--- a/test/tools/llvm-profdata/count-mismatch.proftext
+++ b/test/tools/llvm-profdata/count-mismatch.proftext
@@ -14,7 +14,8 @@ foo
# The hash matches, but we can't combine these because the number of
# counters differs.
-# MERGE_ERRS: count-mismatch.proftext: foo: Function count mismatch
+# MERGE_ERRS: count-mismatch.proftext: foo: Function basic block count change detected (counter mismatch)
+# MERGE_ERRS: Make sure that all profile data to be merged is generated from the same binary.
foo
1024
3
diff --git a/test/tools/llvm-profdata/gcc-gcov-sample-profile.test b/test/tools/llvm-profdata/gcc-gcov-sample-profile.test
new file mode 100644
index 000000000000..dbcc74e1284f
--- /dev/null
+++ b/test/tools/llvm-profdata/gcc-gcov-sample-profile.test
@@ -0,0 +1,29 @@
+The input gcov file has been generated on a little endian machine. Expect
+failures on big endian systems.
+
+XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
+
+Tests for sample profiles encoded in GCC's gcov format.
+
+1- Show all functions. This profile has a single main() function with several
+ inlined callees.
+RUN: llvm-profdata show --sample %p/Inputs/gcc-sample-profile.gcov | FileCheck %s --check-prefix=SHOW1
+SHOW1: Function: main: 364084, 0, 6 sampled lines
+SHOW1: 2.3: inlined callee: _Z3fool: 243786, 0, 3 sampled lines
+SHOW1: 1.3: inlined callee: _Z3bari: 0, 0, 2 sampled lines
+SHOW1: 1.7: inlined callee: _Z3bari: 98558, 0, 2 sampled lines
+SHOW1: 1.8: inlined callee: _Z3bari: 0, 0, 2 sampled lines
+
+2- Convert the profile to text encoding and check that they are both
+ identical.
+RUN: llvm-profdata merge --sample %p/Inputs/gcc-sample-profile.gcov --text -o - | llvm-profdata show --sample - -o %t-text
+RUN: llvm-profdata show --sample %p/Inputs/gcc-sample-profile.gcov -o %t-gcov
+RUN: diff %t-text %t-gcov
+
+4- Merge the gcov and text encodings of the profile and check that the
+ counters have doubled.
+RUN: llvm-profdata merge --sample --text %p/Inputs/gcc-sample-profile.gcov -o %t-gcov
+RUN: llvm-profdata merge --sample --text %p/Inputs/gcc-sample-profile.gcov %t-gcov -o - | FileCheck %s --check-prefix=MERGE1
+MERGE1: main:728168:0
+MERGE1: 2.3: 120298
+MERGE1: 2.3: _Z3fool:487572
diff --git a/test/tools/llvm-profdata/inline-samples.test b/test/tools/llvm-profdata/inline-samples.test
new file mode 100644
index 000000000000..421f002da9f9
--- /dev/null
+++ b/test/tools/llvm-profdata/inline-samples.test
@@ -0,0 +1,30 @@
+Tests for conversion between text and binary encoded sample profiles.
+
+1- Encode the original profile into binary form. All the tests below will use
+ the binary profile.
+RUN: llvm-profdata merge --sample %p/Inputs/inline-samples.afdo -o %t.profbin
+
+2- Show all functions. This profile has a single main() function with several
+ inlined callees.
+RUN: llvm-profdata show --sample %t.profbin | FileCheck %s --check-prefix=SHOW1
+SHOW1: Function: main: 366846, 0, 6 sampled lines
+SHOW1: 2.3: inlined callee: _Z3fool: 246044, 0, 3 sampled lines
+SHOW1: 1.3: inlined callee: _Z3bari: 0, 0, 2 sampled lines
+SHOW1: 1.7: inlined callee: _Z3bari: 99492, 0, 2 sampled lines
+SHOW1: 1.2: 46732
+SHOW1: 1.8: inlined callee: _Z3bari: 0, 0, 2 sampled lines
+
+3- Convert the binary profile to text encoding and check that they are both
+ identical.
+RUN: llvm-profdata merge --sample %t.profbin --text -o - | llvm-profdata show --sample - -o %t-bintext
+RUN: llvm-profdata show --sample %p/Inputs/inline-samples.afdo -o %t-text
+RUN: diff %t-bintext %t-text
+
+4- Merge the binary and text encodings of the profile and check that the
+ counters have doubled.
+RUN: llvm-profdata merge --sample --text %t.profbin %p/Inputs/inline-samples.afdo -o - | FileCheck %s --check-prefix=MERGE1
+MERGE1: main:733692:0
+MERGE1: 2.3: 120802
+MERGE1: 2.3: _Z3fool:492088
+MERGE1: 1.7: _Z3bari:198984
+MERGE1: 1.1: 105520
diff --git a/test/tools/llvm-profdata/overflow-instr.test b/test/tools/llvm-profdata/overflow-instr.test
new file mode 100644
index 000000000000..5b9a94af9b29
--- /dev/null
+++ b/test/tools/llvm-profdata/overflow-instr.test
@@ -0,0 +1,17 @@
+Tests for overflow when merging instrumented profiles.
+
+1- Merge profile having maximum counts with itself and verify overflow detected and saturation occurred
+RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW
+RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW
+MERGE_OVERFLOW: {{.*}}: overflow: Counter overflow
+SHOW_OVERFLOW: Total functions: 1
+SHOW_OVERFLOW-NEXT: Maximum function count: 18446744073709551615
+SHOW_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615
+
+2- Merge profile having maximum counts by itself and verify no overflow
+RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_NO_OVERFLOW -allow-empty
+RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW
+MERGE_NO_OVERFLOW-NOT: {{.*}}: overflow: Counter overflow
+SHOW_NO_OVERFLOW: Total functions: 1
+SHOW_NO_OVERFLOW-NEXT: Maximum function count: 18446744073709551615
+SHOW_NO_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615
diff --git a/test/tools/llvm-profdata/overflow-sample.test b/test/tools/llvm-profdata/overflow-sample.test
new file mode 100644
index 000000000000..cd6268db2ab9
--- /dev/null
+++ b/test/tools/llvm-profdata/overflow-sample.test
@@ -0,0 +1,43 @@
+Tests for overflow when merging sampled profiles.
+
+1- Merge profile having maximum counts with itself and verify overflow detected
+RUN: llvm-profdata merge -sample %p/Inputs/overflow-sample.proftext %p/Inputs/overflow-sample.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW
+RUN: llvm-profdata show -sample %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW
+MERGE_OVERFLOW: {{.*}}: main: Counter overflow
+SHOW_OVERFLOW: Function: main: 2000, 0, 2 sampled lines
+SHOW_OVERFLOW-NEXT: Samples collected in the function's body {
+SHOW_OVERFLOW-NEXT: 1: 1000, calls: _Z3bari:18446744073709551615
+SHOW_OVERFLOW-NEXT: 2: 1000, calls: _Z3fooi:18446744073709551615
+SHOW_OVERFLOW-NEXT: }
+SHOW_OVERFLOW-NEXT: No inlined callsites in this function
+SHOW_OVERFLOW-NEXT: Function: _Z3fooi: 18446744073709551615, 2000, 1 sampled lines
+SHOW_OVERFLOW-NEXT: Samples collected in the function's body {
+SHOW_OVERFLOW-NEXT: 1: 18446744073709551615
+SHOW_OVERFLOW-NEXT: }
+SHOW_OVERFLOW-NEXT: No inlined callsites in this function
+SHOW_OVERFLOW-NEXT: Function: _Z3bari: 18446744073709551615, 2000, 1 sampled lines
+SHOW_OVERFLOW-NEXT: Samples collected in the function's body {
+SHOW_OVERFLOW-NEXT: 1: 18446744073709551615
+SHOW_OVERFLOW-NEXT: }
+SHOW_OVERFLOW-NEXT: No inlined callsites in this function
+
+2- Merge profile having maximum counts by itself and verify no overflow
+RUN: llvm-profdata merge -sample %p/Inputs/overflow-sample.proftext -o %t.out 2>&1 | FileCheck %s -allow-empty -check-prefix=MERGE_NO_OVERFLOW
+RUN: llvm-profdata show -sample %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW
+MERGE_NO_OVERFLOW-NOT: {{.*}}: main: Counter overflow
+SHOW_NO_OVERFLOW: Function: main: 1000, 0, 2 sampled lines
+SHOW_NO_OVERFLOW-NEXT: Samples collected in the function's body {
+SHOW_NO_OVERFLOW-NEXT: 1: 500, calls: _Z3bari:18446744073709551615
+SHOW_NO_OVERFLOW-NEXT: 2: 500, calls: _Z3fooi:18446744073709551615
+SHOW_NO_OVERFLOW-NEXT: }
+SHOW_NO_OVERFLOW-NEXT: No inlined callsites in this function
+SHOW_NO_OVERFLOW-NEXT: Function: _Z3fooi: 18446744073709551615, 1000, 1 sampled lines
+SHOW_NO_OVERFLOW-NEXT: Samples collected in the function's body {
+SHOW_NO_OVERFLOW-NEXT: 1: 18446744073709551615
+SHOW_NO_OVERFLOW-NEXT: }
+SHOW_NO_OVERFLOW-NEXT: No inlined callsites in this function
+SHOW_NO_OVERFLOW-NEXT: Function: _Z3bari: 18446744073709551615, 1000, 1 sampled lines
+SHOW_NO_OVERFLOW-NEXT: Samples collected in the function's body {
+SHOW_NO_OVERFLOW-NEXT: 1: 18446744073709551615
+SHOW_NO_OVERFLOW-NEXT: }
+SHOW_NO_OVERFLOW-NEXT: No inlined callsites in this function
diff --git a/test/tools/llvm-profdata/overflow.proftext b/test/tools/llvm-profdata/overflow.proftext
deleted file mode 100644
index cbf3bf161823..000000000000
--- a/test/tools/llvm-profdata/overflow.proftext
+++ /dev/null
@@ -1,12 +0,0 @@
-# RUN: llvm-profdata merge %s -o %t.out 2>&1 | FileCheck %s
-# CHECK: overflow.proftext: overflow: Counter overflow
-
-overflow
-1
-1
-9223372036854775808
-
-overflow
-1
-1
-9223372036854775808
diff --git a/test/tools/llvm-profdata/raw-32-bits-be.test b/test/tools/llvm-profdata/raw-32-bits-be.test
index 86ac56d39f26..d20c36022fa6 100644
--- a/test/tools/llvm-profdata/raw-32-bits-be.test
+++ b/test/tools/llvm-profdata/raw-32-bits-be.test
@@ -1,27 +1,36 @@
RUN: printf '\377lprofR\201' > %t
-RUN: printf '\0\0\0\0\0\0\0\1' >> %t
+RUN: printf '\0\0\0\0\0\0\0\2' >> %t
RUN: printf '\0\0\0\0\0\0\0\2' >> %t
RUN: printf '\0\0\0\0\0\0\0\3' >> %t
RUN: printf '\0\0\0\0\0\0\0\6' >> %t
RUN: printf '\0\0\0\0\1\0\0\0' >> %t
RUN: printf '\0\0\0\0\2\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\3' >> %t
RUN: printf '\0\0\0\1' >> %t
RUN: printf '\0\0\0\0\0\0\0\1' >> %t
RUN: printf '\2\0\0\0' >> %t
RUN: printf '\1\0\0\0' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\3' >> %t
RUN: printf '\0\0\0\2' >> %t
RUN: printf '\0\0\0\0\0\0\0\2' >> %t
RUN: printf '\2\0\0\03' >> %t
RUN: printf '\1\0\0\10' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\023' >> %t
RUN: printf '\0\0\0\0\0\0\0\067' >> %t
RUN: printf '\0\0\0\0\0\0\0\101' >> %t
-RUN: printf 'foobar' >> %t
+RUN: printf 'foobar\0\0' >> %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s
diff --git a/test/tools/llvm-profdata/raw-32-bits-le.test b/test/tools/llvm-profdata/raw-32-bits-le.test
index 9325e7eb0f52..1bd81a87fbac 100644
--- a/test/tools/llvm-profdata/raw-32-bits-le.test
+++ b/test/tools/llvm-profdata/raw-32-bits-le.test
@@ -1,27 +1,36 @@
RUN: printf '\201Rforpl\377' > %t
-RUN: printf '\1\0\0\0\0\0\0\0' >> %t
+RUN: printf '\2\0\0\0\0\0\0\0' >> %t
RUN: printf '\2\0\0\0\0\0\0\0' >> %t
RUN: printf '\3\0\0\0\0\0\0\0' >> %t
RUN: printf '\6\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\1\0\0\0\0' >> %t
RUN: printf '\0\0\0\2\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\3\0\0\0' >> %t
RUN: printf '\1\0\0\0' >> %t
RUN: printf '\1\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\2' >> %t
RUN: printf '\0\0\0\1' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\3\0\0\0' >> %t
RUN: printf '\2\0\0\0' >> %t
RUN: printf '\02\0\0\0\0\0\0\0' >> %t
RUN: printf '\03\0\0\2' >> %t
RUN: printf '\10\0\0\1' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\023\0\0\0\0\0\0\0' >> %t
RUN: printf '\067\0\0\0\0\0\0\0' >> %t
RUN: printf '\101\0\0\0\0\0\0\0' >> %t
-RUN: printf 'foobar' >> %t
+RUN: printf 'foobar\0\0' >> %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s
diff --git a/test/tools/llvm-profdata/raw-64-bits-be.test b/test/tools/llvm-profdata/raw-64-bits-be.test
index b97d8b5dac6d..902cedd476ab 100644
--- a/test/tools/llvm-profdata/raw-64-bits-be.test
+++ b/test/tools/llvm-profdata/raw-64-bits-be.test
@@ -1,27 +1,36 @@
RUN: printf '\377lprofr\201' > %t
-RUN: printf '\0\0\0\0\0\0\0\1' >> %t
+RUN: printf '\0\0\0\0\0\0\0\2' >> %t
RUN: printf '\0\0\0\0\0\0\0\2' >> %t
RUN: printf '\0\0\0\0\0\0\0\3' >> %t
RUN: printf '\0\0\0\0\0\0\0\6' >> %t
RUN: printf '\0\0\0\1\0\4\0\0' >> %t
RUN: printf '\0\0\0\2\0\4\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\3' >> %t
RUN: printf '\0\0\0\1' >> %t
RUN: printf '\0\0\0\0\0\0\0\1' >> %t
RUN: printf '\0\0\0\2\0\4\0\0' >> %t
RUN: printf '\0\0\0\1\0\4\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\3' >> %t
RUN: printf '\0\0\0\2' >> %t
RUN: printf '\0\0\0\0\0\0\0\02' >> %t
RUN: printf '\0\0\0\2\0\4\0\03' >> %t
RUN: printf '\0\0\0\1\0\4\0\10' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\0\0\0\0\0\023' >> %t
RUN: printf '\0\0\0\0\0\0\0\067' >> %t
RUN: printf '\0\0\0\0\0\0\0\101' >> %t
-RUN: printf 'foobar' >> %t
+RUN: printf 'foobar\0\0' >> %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s
diff --git a/test/tools/llvm-profdata/raw-64-bits-le.test b/test/tools/llvm-profdata/raw-64-bits-le.test
index 0e6853811ec4..d2f410a6bb95 100644
--- a/test/tools/llvm-profdata/raw-64-bits-le.test
+++ b/test/tools/llvm-profdata/raw-64-bits-le.test
@@ -1,27 +1,36 @@
RUN: printf '\201rforpl\377' > %t
-RUN: printf '\1\0\0\0\0\0\0\0' >> %t
+RUN: printf '\2\0\0\0\0\0\0\0' >> %t
RUN: printf '\2\0\0\0\0\0\0\0' >> %t
RUN: printf '\3\0\0\0\0\0\0\0' >> %t
RUN: printf '\6\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\4\0\1\0\0\0' >> %t
RUN: printf '\0\0\4\0\2\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\3\0\0\0' >> %t
RUN: printf '\1\0\0\0' >> %t
RUN: printf '\1\0\0\0\0\0\0\0' >> %t
RUN: printf '\0\0\4\0\2\0\0\0' >> %t
RUN: printf '\0\0\4\0\1\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\03\0\0\0' >> %t
RUN: printf '\02\0\0\0' >> %t
RUN: printf '\02\0\0\0\0\0\0\0' >> %t
RUN: printf '\03\0\4\0\2\0\0\0' >> %t
RUN: printf '\10\0\4\0\1\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
RUN: printf '\023\0\0\0\0\0\0\0' >> %t
RUN: printf '\067\0\0\0\0\0\0\0' >> %t
RUN: printf '\101\0\0\0\0\0\0\0' >> %t
-RUN: printf 'foobar' >> %t
+RUN: printf 'foobar\0\0' >> %t
RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s
diff --git a/test/tools/llvm-profdata/raw-magic-but-no-header.test b/test/tools/llvm-profdata/raw-magic-but-no-header.test
index b2a697042b0a..76894faa183c 100644
--- a/test/tools/llvm-profdata/raw-magic-but-no-header.test
+++ b/test/tools/llvm-profdata/raw-magic-but-no-header.test
@@ -3,4 +3,4 @@ RUN: not llvm-profdata show %t 2>&1 | FileCheck %s
RUN: printf '\377lprofr\201' > %t
RUN: not llvm-profdata show %t 2>&1 | FileCheck %s
-CHECK: error: {{.+}}: Invalid profile data (file header is corrupt)
+CHECK: error: {{.+}}: Invalid instrumentation profile data (file header is corrupt)
diff --git a/test/tools/llvm-profdata/raw-two-profiles.test b/test/tools/llvm-profdata/raw-two-profiles.test
index be78793215ed..09eb121adf3f 100644
--- a/test/tools/llvm-profdata/raw-two-profiles.test
+++ b/test/tools/llvm-profdata/raw-two-profiles.test
@@ -1,48 +1,51 @@
RUN: printf '\201rforpl\377' > %t-foo.profraw
-RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\2\0\0\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\3\0\0\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw
RUN: printf '\0\0\4\0\2\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\3\0\0\0' >> %t-foo.profraw
RUN: printf '\1\0\0\0' >> %t-foo.profraw
RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\0\0\4\0\2\0\0\0' >> %t-foo.profraw
RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\023\0\0\0\0\0\0\0' >> %t-foo.profraw
-RUN: printf 'foo' >> %t-foo.profraw
+RUN: printf 'foo\0\0\0\0\0' >> %t-foo.profraw
RUN: printf '\201rforpl\377' > %t-bar.profraw
-RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\2\0\0\0\0\0\0\0' >> %t-bar.profraw
RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bar.profraw
RUN: printf '\2\0\0\0\0\0\0\0' >> %t-bar.profraw
RUN: printf '\3\0\0\0\0\0\0\0' >> %t-bar.profraw
RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw
RUN: printf '\0\0\6\0\2\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
RUN: printf '\3\0\0\0' >> %t-bar.profraw
RUN: printf '\2\0\0\0' >> %t-bar.profraw
RUN: printf '\2\0\0\0\0\0\0\0' >> %t-bar.profraw
RUN: printf '\0\0\6\0\2\0\0\0' >> %t-bar.profraw
RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
RUN: printf '\067\0\0\0\0\0\0\0' >> %t-bar.profraw
RUN: printf '\101\0\0\0\0\0\0\0' >> %t-bar.profraw
-RUN: printf 'bar' >> %t-bar.profraw
-
-Versions of the profiles that are padded to eight byte alignment.
-RUN: cat %t-foo.profraw > %t-foo-padded.profraw
-RUN: printf '\0\0\0\0\0' >> %t-foo-padded.profraw
-RUN: cat %t-bar.profraw > %t-bar-padded.profraw
-RUN: printf '\0\0\0\0\0' >> %t-bar-padded.profraw
-
-RUN: cat %t-foo-padded.profraw %t-bar.profraw > %t-pad-between.profraw
-RUN: cat %t-foo-padded.profraw %t-bar-padded.profraw > %t-pad.profraw
+RUN: printf 'bar\0\0\0\0\0' >> %t-bar.profraw
-RUN: llvm-profdata show %t-pad-between.profraw -all-functions -counts | FileCheck %s
+RUN: cat %t-foo.profraw %t-bar.profraw > %t-pad.profraw
RUN: llvm-profdata show %t-pad.profraw -all-functions -counts | FileCheck %s
CHECK: Counters:
diff --git a/test/tools/llvm-profdata/sample-profile-basic.test b/test/tools/llvm-profdata/sample-profile-basic.test
index 0651c513e965..5116b98f3335 100644
--- a/test/tools/llvm-profdata/sample-profile-basic.test
+++ b/test/tools/llvm-profdata/sample-profile-basic.test
@@ -3,15 +3,15 @@ Basic tests for sample profiles.
1- Show all functions
RUN: llvm-profdata show --sample %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=SHOW1
SHOW1: Function: main: 184019, 0, 7 sampled lines
-SHOW1: line offset: 9, discriminator: 0, number of samples: 2064, calls: _Z3fooi:631 _Z3bari:1471
+SHOW1: 9: 2064, calls: _Z3fooi:631 _Z3bari:1471
SHOW1: Function: _Z3fooi: 7711, 610, 1 sampled lines
SHOW1: Function: _Z3bari: 20301, 1437, 1 sampled lines
-SHOW1: line offset: 1, discriminator: 0, number of samples: 1437
+SHOW1: 1: 1437
2- Show only bar
RUN: llvm-profdata show --sample --function=_Z3bari %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=SHOW2
SHOW2: Function: _Z3bari: 20301, 1437, 1 sampled lines
-SHOW2: line offset: 1, discriminator: 0, number of samples: 1437
+SHOW2: 1: 1437
SHOW2-NOT: Function: main: 184019, 0, 7 sampled lines
SHOW2-NOT: Function: _Z3fooi: 7711, 610, 1 sampled lines
@@ -28,3 +28,7 @@ RUN: llvm-profdata merge --sample --text %p/Inputs/sample-profile.proftext %t-bi
MERGE1: main:368038:0
MERGE1: 9: 4128 _Z3fooi:1262 _Z3bari:2942
MERGE1: _Z3fooi:15422:1220
+
+5- Detect invalid text encoding (e.g. instrumentation profile text format).
+RUN: not llvm-profdata show --sample %p/Inputs/foo3bar3-1.proftext 2>&1 | FileCheck %s --check-prefix=BADTEXT
+BADTEXT: error: {{.+}}: Unrecognized sample profile encoding format
diff --git a/test/tools/llvm-profdata/text-dump.test b/test/tools/llvm-profdata/text-dump.test
new file mode 100644
index 000000000000..94a78d9dbac3
--- /dev/null
+++ b/test/tools/llvm-profdata/text-dump.test
@@ -0,0 +1,21 @@
+Basic tests for testing text dump functions.
+
+RUN: llvm-profdata show --all-functions -counts --text %p/Inputs/basic.proftext > %t-basic.proftext1
+RUN: llvm-profdata merge -o %t-basic.proftext2 --text %p/Inputs/basic.proftext
+
+RUN: llvm-profdata merge -binary -o %t-basic.profdata1 %t-basic.proftext1
+RUN: llvm-profdata merge -o %t-basic.profdata2 %t-basic.proftext2
+
+RUN: llvm-profdata show --all-functions -counts %t-basic.profdata1 > %t-basic.dump3
+RUN: llvm-profdata show --all-functions -counts %t-basic.profdata2 > %t-basic.dump4
+
+RUN: llvm-profdata merge -text -o %t-basic.proftext5 %t-basic.profdata1
+RUN: llvm-profdata merge -text -o %t-basic.proftext6 %t-basic.profdata2
+
+RUN: diff %t-basic.dump3 %t-basic.dump4
+RUN: diff %t-basic.proftext5 %t-basic.proftext6
+
+RUN: not llvm-profdata merge -gcc -o %t-basic-profdata3 %t-basic.proftext2 2>&1 | FileCheck %s --check-prefix=UNKNOWN
+UNKNOWN: Unknown
+
+
diff --git a/test/tools/llvm-profdata/text-format-errors.test b/test/tools/llvm-profdata/text-format-errors.test
index 01513e4fcb9e..05de2e38af1f 100644
--- a/test/tools/llvm-profdata/text-format-errors.test
+++ b/test/tools/llvm-profdata/text-format-errors.test
@@ -1,10 +1,29 @@
+Tests for instrumentation profile bad encoding.
+
+1- Detect invalid count
RUN: not llvm-profdata show %p/Inputs/invalid-count-later.proftext 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
-RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.proftext %p/Inputs/invalid-count-later.profdata -o %t.out 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
-INVALID-COUNT-LATER: error: {{.*}}invalid-count-later.proftext: Malformed profile data
+RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.proftext %p/Inputs/invalid-count-later.proftext -o %t.out 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
+INVALID-COUNT-LATER: error: {{.*}}invalid-count-later.proftext: Malformed instrumentation profile data
+2- Detect bad hash
RUN: not llvm-profdata show %p/Inputs/bad-hash.proftext 2>&1 | FileCheck %s --check-prefix=BAD-HASH
RUN: not llvm-profdata merge %p/Inputs/bad-hash.proftext %p/Inputs/bad-hash.proftext -o %t.out 2>&1 | FileCheck %s --check-prefix=BAD-HASH
-BAD-HASH: error: {{.*}}bad-hash.proftext: Malformed profile data
+BAD-HASH: error: {{.*}}bad-hash.proftext: Malformed instrumentation profile data
+3- Detect no counts
RUN: not llvm-profdata show %p/Inputs/no-counts.proftext 2>&1 | FileCheck %s --check-prefix=NO-COUNTS
-NO-COUNTS: error: {{.*}}no-counts.proftext: Malformed profile data
+NO-COUNTS: error: {{.*}}no-counts.proftext: Malformed instrumentation profile data
+
+4- Detect binary input
+RUN: not llvm-profdata show %p/Inputs/text-format-errors.text.bin 2>&1 | FileCheck %s --check-prefix=BINARY
+BINARY: error: {{.+}}: Unrecognized instrumentation profile encoding format
+BINARY: Perhaps you forgot to use the -sample option?
+
+5- Detect malformed value profile data
+RUN: not llvm-profdata show %p/Inputs/vp-malform.proftext 2>&1 | FileCheck %s --check-prefix=VP
+RUN: not llvm-profdata show %p/Inputs/vp-malform2.proftext 2>&1 | FileCheck %s --check-prefix=VP
+VP: Malformed instrumentation profile data
+
+6- Detect truncated value profile data
+RUN: not llvm-profdata show %p/Inputs/vp-truncate.proftext 2>&1 | FileCheck %s --check-prefix=VPTRUNC
+VPTRUNC: Truncated profile data
diff --git a/test/tools/llvm-profdata/value-prof.proftext b/test/tools/llvm-profdata/value-prof.proftext
new file mode 100644
index 000000000000..ca2b1f822097
--- /dev/null
+++ b/test/tools/llvm-profdata/value-prof.proftext
@@ -0,0 +1,57 @@
+# RUN: llvm-profdata show -ic-targets -all-functions %s | FileCheck %s --check-prefix=IC
+# RUN: llvm-profdata show -ic-targets -counts -text -all-functions %s | FileCheck %s --check-prefix=ICTEXT
+# RUN: llvm-profdata merge -o %t.profdata %s
+# RUN: llvm-profdata show -ic-targets -all-functions %t.profdata | FileCheck %s --check-prefix=IC
+
+foo
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+999000
+359800
+
+foo2
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+1001000
+360200
+
+main
+# Func Hash:
+16650
+# Num Counters:
+4
+# Counter Values:
+2
+2000
+2000000
+999000
+# NumValueKinds
+1
+# Value Kind IPVK_IndirectCallTarget
+0
+# NumSites
+3
+# Values for each site
+0
+2
+foo:100
+foo2:1000
+1
+foo2:20000
+
+#IC: Indirect Call Site Count: 3
+#IC-NEXT: Indirect Target Results:
+#IC-NEXT: [ 1, foo, 100 ]
+#IC-NEXT: [ 1, foo2, 1000 ]
+#IC-NEXT: [ 2, foo2, 20000 ]
+
+#ICTEXT: foo:100
+#ICTEXT-NEXT: foo2:1000
+#ICTEXT-NEXT: 1
+#ICTEXT-NEXT: foo2:20000
diff --git a/test/tools/llvm-profdata/weight-instr.test b/test/tools/llvm-profdata/weight-instr.test
new file mode 100644
index 000000000000..7294cf3b01f0
--- /dev/null
+++ b/test/tools/llvm-profdata/weight-instr.test
@@ -0,0 +1,69 @@
+Tests for weighted merge of instrumented profiles.
+
+1- Merge the foo and bar profiles with unity weight and verify the combined output
+RUN: llvm-profdata merge -instr -weighted-input=1,%p/Inputs/weight-instr-bar.profdata -weighted-input=1,%p/Inputs/weight-instr-foo.profdata -o %t
+RUN: llvm-profdata show -instr -all-functions %t | FileCheck %s -check-prefix=1X_1X_WEIGHT
+RUN: llvm-profdata merge -instr -weighted-input=1,%p/Inputs/weight-instr-bar.profdata %p/Inputs/weight-instr-foo.profdata -o %t
+RUN: llvm-profdata show -instr -all-functions %t | FileCheck %s -check-prefix=1X_1X_WEIGHT
+1X_1X_WEIGHT: Counters:
+1X_1X_WEIGHT-NEXT: usage:
+1X_1X_WEIGHT-NEXT: Hash: 0x0000000000000000
+1X_1X_WEIGHT-NEXT: Counters: 1
+1X_1X_WEIGHT-NEXT: Function count: 0
+1X_1X_WEIGHT-NEXT: foo:
+1X_1X_WEIGHT-NEXT: Hash: 0x000000000000028a
+1X_1X_WEIGHT-NEXT: Counters: 3
+1X_1X_WEIGHT-NEXT: Function count: 866988873
+1X_1X_WEIGHT-NEXT: bar:
+1X_1X_WEIGHT-NEXT: Hash: 0x000000000000028a
+1X_1X_WEIGHT-NEXT: Counters: 3
+1X_1X_WEIGHT-NEXT: Function count: 866988873
+1X_1X_WEIGHT-NEXT: main:
+1X_1X_WEIGHT-NEXT: Hash: 0x7d31c47ea98f8248
+1X_1X_WEIGHT-NEXT: Counters: 60
+1X_1X_WEIGHT-NEXT: Function count: 2
+1X_1X_WEIGHT-NEXT: Functions shown: 4
+1X_1X_WEIGHT-NEXT: Total functions: 4
+1X_1X_WEIGHT-NEXT: Maximum function count: 866988873
+1X_1X_WEIGHT-NEXT: Maximum internal block count: 267914296
+
+2- Merge the foo and bar profiles with weight 3x and 5x respectively and verify the combined output
+RUN: llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=5,%p/Inputs/weight-instr-foo.profdata -o %t
+RUN: llvm-profdata show -instr -all-functions %t | FileCheck %s -check-prefix=3X_5X_WEIGHT
+3X_5X_WEIGHT: Counters:
+3X_5X_WEIGHT-NEXT: usage:
+3X_5X_WEIGHT-NEXT: Hash: 0x0000000000000000
+3X_5X_WEIGHT-NEXT: Counters: 1
+3X_5X_WEIGHT-NEXT: Function count: 0
+3X_5X_WEIGHT-NEXT: foo:
+3X_5X_WEIGHT-NEXT: Hash: 0x000000000000028a
+3X_5X_WEIGHT-NEXT: Counters: 3
+3X_5X_WEIGHT-NEXT: Function count: 4334944365
+3X_5X_WEIGHT-NEXT: bar:
+3X_5X_WEIGHT-NEXT: Hash: 0x000000000000028a
+3X_5X_WEIGHT-NEXT: Counters: 3
+3X_5X_WEIGHT-NEXT: Function count: 2600966619
+3X_5X_WEIGHT-NEXT: main:
+3X_5X_WEIGHT-NEXT: Hash: 0x7d31c47ea98f8248
+3X_5X_WEIGHT-NEXT: Counters: 60
+3X_5X_WEIGHT-NEXT: Function count: 8
+3X_5X_WEIGHT-NEXT: Functions shown: 4
+3X_5X_WEIGHT-NEXT: Total functions: 4
+3X_5X_WEIGHT-NEXT: Maximum function count: 4334944365
+3X_5X_WEIGHT-NEXT: Maximum internal block count: 1339571480
+
+3- Bad merge: invalid weight
+RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=0,%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=0.75,%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=-5,%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=,%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+INVALID_WEIGHT: error: Input weight must be a positive integer.
+
+4- Bad merge: input path does not exist
+RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/does-not-exist.profdata -weighted-input=2,%p/Inputs/does-not-exist-either.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_INPUT
+INVALID_INPUT: {{.*}}: {{.*}}does-not-exist.profdata: {{[Nn]}}o such file or directory
+
+5- No inputs
+RUN: not llvm-profdata merge -instr -o %t.out 2>&1 | FileCheck %s -check-prefix=NO_INPUT
+NO_INPUT: {{.*}}: No input files specified. See llvm-profdata{{(\.EXE|\.exe)?}} merge -help
diff --git a/test/tools/llvm-profdata/weight-sample.test b/test/tools/llvm-profdata/weight-sample.test
new file mode 100644
index 000000000000..7b22c5f88f1f
--- /dev/null
+++ b/test/tools/llvm-profdata/weight-sample.test
@@ -0,0 +1,56 @@
+Tests for weighted merge of sample profiles.
+
+1- Merge the foo and bar profiles with unity weight and verify the combined output
+RUN: llvm-profdata merge -sample -text -weighted-input=1,%p/Inputs/weight-sample-bar.proftext -weighted-input=1,%p/Inputs/weight-sample-foo.proftext -o - | FileCheck %s -check-prefix=1X_1X_WEIGHT
+RUN: llvm-profdata merge -sample -text -weighted-input=1,%p/Inputs/weight-sample-bar.proftext %p/Inputs/weight-sample-foo.proftext -o - | FileCheck %s -check-prefix=1X_1X_WEIGHT
+1X_1X_WEIGHT: foo:1763288:35327
+1X_1X_WEIGHT-NEXT: 7: 35327
+1X_1X_WEIGHT-NEXT: 8: 35327
+1X_1X_WEIGHT-NEXT: 9: 6930
+1X_1X_WEIGHT-NEXT: 10: 29341
+1X_1X_WEIGHT-NEXT: 11: 11906
+1X_1X_WEIGHT-NEXT: 13: 18185 foo:19531
+1X_1X_WEIGHT-NEXT: 15: 36458
+1X_1X_WEIGHT-NEXT: bar:1772037:35370
+1X_1X_WEIGHT-NEXT: 17: 35370
+1X_1X_WEIGHT-NEXT: 18: 35370
+1X_1X_WEIGHT-NEXT: 19: 7005
+1X_1X_WEIGHT-NEXT: 20: 29407
+1X_1X_WEIGHT-NEXT: 21: 12170
+1X_1X_WEIGHT-NEXT: 23: 18150 bar:19829
+1X_1X_WEIGHT-NEXT: 25: 36666
+
+2- Merge the foo and bar profiles with weight 3x and 5x respectively and verify the combined output
+RUN: llvm-profdata merge -sample -text -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=5,%p/Inputs/weight-sample-foo.proftext -o - | FileCheck %s -check-prefix=3X_5X_WEIGHT
+3X_5X_WEIGHT: foo:8816440:176635
+3X_5X_WEIGHT-NEXT: 7: 176635
+3X_5X_WEIGHT-NEXT: 8: 176635
+3X_5X_WEIGHT-NEXT: 9: 34650
+3X_5X_WEIGHT-NEXT: 10: 146705
+3X_5X_WEIGHT-NEXT: 11: 59530
+3X_5X_WEIGHT-NEXT: 13: 90925 foo:97655
+3X_5X_WEIGHT-NEXT: 15: 182290
+3X_5X_WEIGHT-NEXT: bar:5316111:106110
+3X_5X_WEIGHT-NEXT: 17: 106110
+3X_5X_WEIGHT-NEXT: 18: 106110
+3X_5X_WEIGHT-NEXT: 19: 21015
+3X_5X_WEIGHT-NEXT: 20: 88221
+3X_5X_WEIGHT-NEXT: 21: 36510
+3X_5X_WEIGHT-NEXT: 23: 54450 bar:59487
+3X_5X_WEIGHT-NEXT: 25: 109998
+
+3- Bad merge: invalid weight
+RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=0,%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=0.75,%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=-5,%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=,%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+INVALID_WEIGHT: error: Input weight must be a positive integer.
+
+4- Bad merge: input path does not exist
+RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/does-not-exist.proftext -weighted-input=2,%p/Inputs/does-not-exist-either.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_INPUT
+INVALID_INPUT: {{.*}}: {{.*}}does-not-exist.proftext: {{[Nn]}}o such file or directory
+
+5- No inputs
+RUN: not llvm-profdata merge -sample -o %t.out 2>&1 | FileCheck %s -check-prefix=NO_INPUT
+NO_INPUT: {{.*}}: No input files specified. See llvm-profdata{{(\.EXE|\.exe)?}} merge -help
diff --git a/test/tools/llvm-readobj/ARM/attribute-4.s b/test/tools/llvm-readobj/ARM/attribute-4.s
index dd0a4a6d6a73..2c27785410ed 100644
--- a/test/tools/llvm-readobj/ARM/attribute-4.s
+++ b/test/tools/llvm-readobj/ARM/attribute-4.s
@@ -15,6 +15,13 @@
@CHECK-OBJ-NEXT: TagName: FP_arch
@CHECK-OBJ-NEXT: Description: VFPv3-D16
+.eabi_attribute Tag_Advanced_SIMD_arch, 4
+@CHECK: .eabi_attribute 12, 4
+@CHECK-OBJ: Tag: 12
+@CHECK-OBJ-NEXT: Value: 4
+@CHECK-OBJ-NEXT: TagName: Advanced_SIMD_arch
+@CHECK-OBJ-NEXT: Description: ARMv8.1-a NEON
+
.eabi_attribute Tag_PCS_config, 4
@CHECK: .eabi_attribute 13, 4
@CHECK-OBJ: Tag: 13
diff --git a/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-i386 b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-i386
new file mode 100644
index 000000000000..c222a899ba71
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-i386
Binary files differ
diff --git a/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc
new file mode 100644
index 000000000000..fd337f343e00
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc
Binary files differ
diff --git a/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc64 b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc64
new file mode 100644
index 000000000000..25ce5c71b38b
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc64
Binary files differ
diff --git a/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-x86_64 b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-x86_64
new file mode 100644
index 000000000000..612e7b2112ff
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-x86_64
Binary files differ
diff --git a/test/tools/llvm-readobj/Inputs/mips-rld-map-rel.elf-mipsel b/test/tools/llvm-readobj/Inputs/mips-rld-map-rel.elf-mipsel
new file mode 100755
index 000000000000..a8e396c2ec36
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/mips-rld-map-rel.elf-mipsel
Binary files differ
diff --git a/test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri b/test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri
new file mode 100755
index 000000000000..9566ed5c0f14
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri
Binary files differ
diff --git a/test/tools/llvm-readobj/Inputs/verdef.elf-x86-64 b/test/tools/llvm-readobj/Inputs/verdef.elf-x86-64
new file mode 100755
index 000000000000..4b907694e800
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/verdef.elf-x86-64
Binary files differ
diff --git a/test/tools/llvm-readobj/amdgpu-elf-defs.test b/test/tools/llvm-readobj/amdgpu-elf-defs.test
new file mode 100644
index 000000000000..9a576e8158f9
--- /dev/null
+++ b/test/tools/llvm-readobj/amdgpu-elf-defs.test
@@ -0,0 +1,28 @@
+RUN: llvm-readobj -program-headers -sections -symbols -file-headers \
+RUN: %p/Inputs/trivial.elf-amdhsa-kaveri | FileCheck %s
+
+CHECK: ElfHeader {
+CHECK: Ident {
+CHECK: Class: 64-bit (0x2)
+CHECK: DataEncoding: LittleEndian (0x1)
+CHECK: Machine: EM_AMDGPU (0xE0)
+
+
+CHECK: Section {
+CHECK: Name: .text
+CHECK: Type: SHT_PROGBITS (0x1)
+CHECK: Flags [ (0xC00007
+CHECK: SHF_ALLOC (0x2)
+CHECK: SHF_AMDGPU_HSA_AGENT (0x800000)
+CHECK: SHF_AMDGPU_HSA_CODE (0x400000)
+CHECK: SHF_EXECINSTR (0x4)
+CHECK: SHF_WRITE (0x1)
+
+CHECK: Symbol {
+CHECK: Name: hello_world
+CHECK: Value: 0x0
+CHECK: Binding: Local (0x0)
+CHECK: Type: AMDGPU_HSA_KERNEL (0xA)
+
+CHECK: ProgramHeader {
+CHECK: Type: PT_AMDGPU_HSA_LOAD_CODE_AGENT (0x60000003)
diff --git a/test/tools/llvm-readobj/basic.test b/test/tools/llvm-readobj/basic.test
new file mode 100644
index 000000000000..fc6349bcfbf4
--- /dev/null
+++ b/test/tools/llvm-readobj/basic.test
@@ -0,0 +1,2 @@
+RUN: not llvm-readobj %t.blah 2>&1 | FileCheck --check-prefix=ENOENT %s
+ENOENT: {{.*}}.blah: {{[Nn]}}o such file or directory
diff --git a/test/tools/llvm-readobj/codeview-linetables.test b/test/tools/llvm-readobj/codeview-linetables.test
index d124e6e2d454..880b6d52f095 100644
--- a/test/tools/llvm-readobj/codeview-linetables.test
+++ b/test/tools/llvm-readobj/codeview-linetables.test
@@ -50,7 +50,7 @@ MFUN32: ]
MFUN32-NEXT: Subsection [
MFUN32-NEXT: Type: 0xF2
MFUN32-NEXT: PayloadSize: 0x30
-MFUN32: FunctionName: _x
+MFUN32: LinkageName: _x
MFUN32-NEXT: ]
MFUN32-NEXT: Subsection [
MFUN32-NEXT: Type: 0xF5
@@ -69,7 +69,7 @@ MFUN32: ]
MFUN32-NEXT: Subsection [
MFUN32-NEXT: Type: 0xF2
MFUN32-NEXT: PayloadSize: 0x30
-MFUN32: FunctionName: _y
+MFUN32: LinkageName: _y
MFUN32-NEXT: ]
MFUN32-NEXT: Subsection [
MFUN32-NEXT: Type: 0xF5
@@ -88,7 +88,7 @@ MFUN32: ]
MFUN32-NEXT: Subsection [
MFUN32-NEXT: Type: 0xF2
MFUN32-NEXT: PayloadSize: 0x40
-MFUN32: FunctionName: _f
+MFUN32: LinkageName: _f
MFUN32-NEXT: ]
MFUN32-NEXT: Subsection [
MFUN32-NEXT: Type: 0xF4
@@ -103,7 +103,7 @@ MFUN32-NEXT: Type: 0xF1
MFUN32-NEXT: PayloadSize: 0x8
MFUN32: ]
MFUN32-NEXT: FunctionLineTable [
-MFUN32-NEXT: FunctionName: _x
+MFUN32-NEXT: LinkageName: _x
MFUN32-NEXT: Flags: 0x0
MFUN32-NEXT: CodeSize: 0xA
MFUN32-NEXT: FilenameSegment [
@@ -114,7 +114,7 @@ MFUN32-NEXT: +0x8: 5
MFUN32-NEXT: ]
MFUN32-NEXT: ]
MFUN32-NEXT: FunctionLineTable [
-MFUN32-NEXT: FunctionName: _y
+MFUN32-NEXT: LinkageName: _y
MFUN32-NEXT: Flags: 0x0
MFUN32-NEXT: CodeSize: 0xA
MFUN32-NEXT: FilenameSegment [
@@ -125,7 +125,7 @@ MFUN32-NEXT: +0x8: 9
MFUN32-NEXT: ]
MFUN32-NEXT: ]
MFUN32-NEXT: FunctionLineTable [
-MFUN32-NEXT: FunctionName: _f
+MFUN32-NEXT: LinkageName: _f
MFUN32-NEXT: Flags: 0x0
MFUN32-NEXT: CodeSize: 0x14
MFUN32-NEXT: FilenameSegment [
@@ -158,7 +158,7 @@ MFUN64: ]
MFUN64-NEXT: Subsection [
MFUN64-NEXT: Type: 0xF2
MFUN64-NEXT: PayloadSize: 0x30
-MFUN64: FunctionName: x
+MFUN64: LinkageName: x
MFUN64-NEXT: ]
MFUN64-NEXT: Subsection [
MFUN64-NEXT: Type: 0xF1
@@ -173,7 +173,7 @@ MFUN64: ]
MFUN64-NEXT: Subsection [
MFUN64-NEXT: Type: 0xF2
MFUN64-NEXT: PayloadSize: 0x30
-MFUN64: FunctionName: y
+MFUN64: LinkageName: y
MFUN64-NEXT: ]
MFUN64-NEXT: Subsection [
MFUN64-NEXT: Type: 0xF1
@@ -188,7 +188,7 @@ MFUN64: ]
MFUN64-NEXT: Subsection [
MFUN64-NEXT: Type: 0xF2
MFUN64-NEXT: PayloadSize: 0x40
-MFUN64: FunctionName: f
+MFUN64: LinkageName: f
MFUN64-NEXT: ]
MFUN64-NEXT: Subsection [
MFUN64-NEXT: Type: 0xF4
@@ -203,7 +203,7 @@ MFUN64-NEXT: Type: 0xF1
MFUN64-NEXT: PayloadSize: 0x8
MFUN64: ]
MFUN64-NEXT: FunctionLineTable [
-MFUN64-NEXT: FunctionName: x
+MFUN64-NEXT: LinkageName: x
MFUN64-NEXT: Flags: 0x0
MFUN64-NEXT: CodeSize: 0xE
MFUN64-NEXT: FilenameSegment [
@@ -214,7 +214,7 @@ MFUN64-NEXT: +0x9: 5
MFUN64-NEXT: ]
MFUN64-NEXT: ]
MFUN64-NEXT: FunctionLineTable [
-MFUN64-NEXT: FunctionName: y
+MFUN64-NEXT: LinkageName: y
MFUN64-NEXT: Flags: 0x0
MFUN64-NEXT: CodeSize: 0xE
MFUN64-NEXT: FilenameSegment [
@@ -225,7 +225,7 @@ MFUN64-NEXT: +0x9: 9
MFUN64-NEXT: ]
MFUN64-NEXT: ]
MFUN64-NEXT: FunctionLineTable [
-MFUN64-NEXT: FunctionName: f
+MFUN64-NEXT: LinkageName: f
MFUN64-NEXT: Flags: 0x0
MFUN64-NEXT: CodeSize: 0x18
MFUN64-NEXT: FilenameSegment [
@@ -286,7 +286,7 @@ MFILE32: ]
MFILE32-NEXT: Subsection [
MFILE32-NEXT: Type: 0xF2
MFILE32-NEXT: PayloadSize: 0x64
-MFILE32: FunctionName: _f
+MFILE32: LinkageName: _f
MFILE32-NEXT: ]
MFILE32-NEXT: Subsection [
MFILE32-NEXT: Type: 0xF4
@@ -301,7 +301,7 @@ MFILE32-NEXT: Type: 0xF1
MFILE32-NEXT: PayloadSize: 0x8
MFILE32: ]
MFILE32-NEXT: FunctionLineTable [
-MFILE32-NEXT: FunctionName: _f
+MFILE32-NEXT: LinkageName: _f
MFILE32-NEXT: Flags: 0x0
MFILE32-NEXT: CodeSize: 0x14
MFILE32-NEXT: FilenameSegment [
@@ -343,7 +343,7 @@ MFILE64: ]
MFILE64-NEXT: Subsection [
MFILE64-NEXT: Type: 0xF2
MFILE64-NEXT: PayloadSize: 0x64
-MFILE64: FunctionName: f
+MFILE64: LinkageName: f
MFILE64-NEXT: ]
MFILE64-NEXT: Subsection [
MFILE64-NEXT: Type: 0xF4
@@ -358,7 +358,7 @@ MFILE64-NEXT: Type: 0xF1
MFILE64-NEXT: PayloadSize: 0x8
MFILE64: ]
MFILE64-NEXT: FunctionLineTable [
-MFILE64-NEXT: FunctionName: f
+MFILE64-NEXT: LinkageName: f
MFILE64-NEXT: Flags: 0x0
MFILE64-NEXT: CodeSize: 0x18
MFILE64-NEXT: FilenameSegment [
@@ -406,7 +406,7 @@ MCOMDAT-NEXT: Section: ?f@@YAHXZ
MCOMDAT-NEXT: CodeSize: 0x7
MCOMDAT-NEXT: }
MCOMDAT: FunctionLineTable [
-MCOMDAT-NEXT: FunctionName: ?f@@YAHXZ
+MCOMDAT-NEXT: LinkageName: ?f@@YAHXZ
MCOMDAT-NEXT: Flags: 0x0
MCOMDAT-NEXT: CodeSize: 0x7
MCOMDAT-NEXT: FilenameSegment [
@@ -422,7 +422,7 @@ MCOMDAT-NEXT: Section: ?g@@YAHXZ
MCOMDAT-NEXT: CodeSize: 0x7
MCOMDAT-NEXT: }
MCOMDAT: FunctionLineTable [
-MCOMDAT-NEXT: FunctionName: ?g@@YAHXZ
+MCOMDAT-NEXT: LinkageName: ?g@@YAHXZ
MCOMDAT-NEXT: Flags: 0x0
MCOMDAT-NEXT: CodeSize: 0x7
MCOMDAT-NEXT: FilenameSegment [
diff --git a/test/tools/llvm-readobj/elf-gnuhash.test b/test/tools/llvm-readobj/elf-gnuhash.test
new file mode 100644
index 000000000000..8642a4dc9d7b
--- /dev/null
+++ b/test/tools/llvm-readobj/elf-gnuhash.test
@@ -0,0 +1,63 @@
+// Check dumping of the GNU Hash section
+// The input was generated using the following:
+// $ llvm-mc -filetype=obj -triple=i386-pc-linux -o example-i386.o example.s
+// $ llvm-mc -filetype=obj -triple=x86_64-pc-linux -o example-x86_64.o example.s
+// $ llvm-mc -filetype=obj -triple=powerpc-pc-linux -o example-ppc.o example.s
+// $ llvm-mc -filetype=obj -triple=powerpc64-pc-linux -o example-ppc64.o example.s
+// $ ld -shared -m elf_i386 -hash-style=gnu -o gnuhash.so.elf-i386 example-i386.o
+// $ ld -shared -m elf_x86_64 -hash-style=gnu -o gnuhash.so.elf-x86_64 example-x86_64.o
+// $ ld -shared -m elf32ppc -hash-style=gnu -o gnuhash.so.elf-ppc example-ppc.o
+// $ ld -shared -m elf64ppc -hash-style=gnu -o gnuhash.so.elf-ppc64 example-ppc64.o
+// $ cat example.s
+// .globl foo
+// foo:
+
+RUN: llvm-readobj -gnu-hash-table %p/Inputs/gnuhash.so.elf-i386 | FileCheck %s -check-prefix I386
+RUN: llvm-readobj -gnu-hash-table %p/Inputs/gnuhash.so.elf-x86_64 | FileCheck %s -check-prefix X86_64
+RUN: llvm-readobj -gnu-hash-table %p/Inputs/gnuhash.so.elf-ppc | FileCheck %s -check-prefix PPC
+RUN: llvm-readobj -gnu-hash-table %p/Inputs/gnuhash.so.elf-ppc64 | FileCheck %s -check-prefix PPC64
+
+I386: Arch: i386
+I386: GnuHashTable {
+I386-NEXT: Num Buckets: 3
+I386-NEXT: First Hashed Symbol Index: 1
+I386-NEXT: Num Mask Words: 1
+I386-NEXT: Shift Count: 5
+I386-NEXT: Bloom Filter: [0x39004608]
+I386-NEXT: Buckets: [1, 4, 0]
+I386-NEXT: Values: [0xB887388, 0xECD54542, 0x7C92E3BB, 0x1C5871D9]
+I386-NEXT: }
+
+X86_64: Arch: x86_64
+X86_64: GnuHashTable {
+X86_64-NEXT: Num Buckets: 3
+X86_64-NEXT: First Hashed Symbol Index: 1
+X86_64-NEXT: Num Mask Words: 1
+X86_64-NEXT: Shift Count: 6
+X86_64-NEXT: Bloom Filter: [0x800000001204288]
+X86_64-NEXT: Buckets: [1, 4, 0]
+X86_64-NEXT: Values: [0xB887388, 0xECD54542, 0x7C92E3BB, 0x1C5871D9]
+X86_64-NEXT: }
+
+PPC: Arch: powerpc
+PPC: GnuHashTable {
+PPC-NEXT: Num Buckets: 3
+PPC-NEXT: First Hashed Symbol Index: 1
+PPC-NEXT: Num Mask Words: 1
+PPC-NEXT: Shift Count: 5
+PPC-NEXT: Bloom Filter: [0x3D00460A]
+PPC-NEXT: Buckets: [1, 5, 0]
+PPC-NEXT: Values: [0xEEBEC3A, 0xB887388, 0xECD54542, 0x7C92E3BB, 0x1C5871D9]
+PPC-NEXT: }
+
+PPC64: Arch: powerpc64
+PPC64: GnuHashTable {
+PPC64-NEXT: Num Buckets: 3
+PPC64-NEXT: First Hashed Symbol Index: 1
+PPC64-NEXT: Num Mask Words: 1
+PPC64-NEXT: Shift Count: 6
+PPC64-NEXT: Bloom Filter: [0x800000001204288]
+PPC64-NEXT: Buckets: [1, 4, 0]
+PPC64-NEXT: Values: [0xB887388, 0xECD54542, 0x7C92E3BB, 0x1C5871D9]
+PPC64-NEXT: }
+
diff --git a/test/tools/llvm-readobj/elf-versioninfo.test b/test/tools/llvm-readobj/elf-versioninfo.test
new file mode 100644
index 000000000000..e8113e4b2fed
--- /dev/null
+++ b/test/tools/llvm-readobj/elf-versioninfo.test
@@ -0,0 +1,81 @@
+// Test that llvm-readobj dumps version info tags correctly.
+
+RUN: llvm-readobj -dynamic-table -V %p/Inputs/verdef.elf-x86-64 | FileCheck %s
+
+CHECK: 0x000000006FFFFFF0 VERSYM 0x24C
+CHECK: 0x000000006FFFFFFC VERDEF 0x25C
+CHECK: 0x000000006FFFFFFD VERDEFNUM 3
+
+CHECK: Version symbols {
+CHECK-NEXT: Section Name: .gnu.version (20)
+CHECK-NEXT: Address: 0x24C
+CHECK-NEXT: Offset: 0x24C
+CHECK-NEXT: Link: 1
+CHECK-NEXT: Symbols [
+CHECK-NEXT: Symbol {
+CHECK-NEXT: Version: 0
+CHECK-NEXT: Name: @
+CHECK-NEXT: }
+CHECK-NEXT: Symbol {
+CHECK-NEXT: Version: 1
+CHECK-NEXT: Name: _end@
+CHECK-NEXT: }
+CHECK-NEXT: Symbol {
+CHECK-NEXT: Version: 1
+CHECK-NEXT: Name: _edata@
+CHECK-NEXT: }
+CHECK-NEXT: Symbol {
+CHECK-NEXT: Version: 3
+CHECK-NEXT: Name: goo@@VERSION2
+CHECK-NEXT: }
+CHECK-NEXT: Symbol {
+CHECK-NEXT: Version: 1
+CHECK-NEXT: Name: __bss_start@
+CHECK-NEXT: }
+CHECK-NEXT: Symbol {
+CHECK-NEXT: Version: 2
+CHECK-NEXT: Name: foo@@VERSION1
+CHECK-NEXT: }
+CHECK-NEXT: Symbol {
+CHECK-NEXT: Version: 2
+CHECK-NEXT: Name: VERSION1@@VERSION1
+CHECK-NEXT: }
+CHECK-NEXT: Symbol {
+CHECK-NEXT: Version: 3
+CHECK-NEXT: Name: VERSION2@@VERSION2
+CHECK-NEXT: }
+CHECK-NEXT: ]
+CHECK-NEXT: }
+
+CHECK: Version definition {
+CHECK-NEXT: Section Name: .gnu.version_d (70)
+CHECK-NEXT: Address: 0x25C
+CHECK-NEXT: Offset: 0x25C
+CHECK-NEXT: Link: 2
+CHECK-NEXT: Entries [
+CHECK-NEXT: Entry {
+CHECK-NEXT: Offset: 0x0
+CHECK-NEXT: Rev: 1
+CHECK-NEXT: Flags: 1
+CHECK-NEXT: Index: 1
+CHECK-NEXT: Cnt: 1
+CHECK-NEXT: Name: blah
+CHECK-NEXT: }
+CHECK-NEXT: Entry {
+CHECK-NEXT: Offset: 0x1C
+CHECK-NEXT: Rev: 1
+CHECK-NEXT: Flags: 0
+CHECK-NEXT: Index: 2
+CHECK-NEXT: Cnt: 1
+CHECK-NEXT: Name: VERSION1
+CHECK-NEXT: }
+CHECK-NEXT: Entry {
+CHECK-NEXT: Offset: 0x38
+CHECK-NEXT: Rev: 1
+CHECK-NEXT: Flags: 0
+CHECK-NEXT: Index: 3
+CHECK-NEXT: Cnt: 2
+CHECK-NEXT: Name: VERSION2
+CHECK-NEXT: }
+CHECK-NEXT: ]
+CHECK-NEXT: }
diff --git a/test/tools/llvm-readobj/file-headers.test b/test/tools/llvm-readobj/file-headers.test
index fd030ef0b56e..2d67089d6118 100644
--- a/test/tools/llvm-readobj/file-headers.test
+++ b/test/tools/llvm-readobj/file-headers.test
@@ -330,16 +330,8 @@ COFF-UNKNOWN-NEXT: Characteristics [ (0x0)
COFF-UNKNOWN-NEXT: ]
COFF-UNKNOWN-NEXT: }
-COFF-IMPORTLIB: Format: COFF-<unknown arch>
-COFF-IMPORTLIB-NEXT: Arch: unknown
-COFF-IMPORTLIB-NEXT: AddressSize: 32bit
-COFF-IMPORTLIB-NEXT: ImageFileHeader {
-COFF-IMPORTLIB-NEXT: Machine: IMAGE_FILE_MACHINE_UNKNOWN (0x0)
-COFF-IMPORTLIB-NEXT: SectionCount: 0
-COFF-IMPORTLIB-NEXT: TimeDateStamp: 1970-09-09 19:52:32 (0x14C0000)
-COFF-IMPORTLIB-NEXT: PointerToSymbolTable: 0x0
-COFF-IMPORTLIB-NEXT: SymbolCount: 0
-COFF-IMPORTLIB-NEXT: OptionalHeaderSize: 0
-COFF-IMPORTLIB-NEXT: Characteristics [ (0x0)
-COFF-IMPORTLIB-NEXT: ]
-COFF-IMPORTLIB-NEXT: }
+COFF-IMPORTLIB: Format: COFF-import-file
+COFF-IMPORTLIB-NEXT: Type: code
+COFF-IMPORTLIB-NEXT: Name type: noprefix
+COFF-IMPORTLIB-NEXT: Symbol: __imp__func
+COFF-IMPORTLIB-NEXT: Symbol: _func
diff --git a/test/tools/llvm-readobj/mips-rld-map-rel.test b/test/tools/llvm-readobj/mips-rld-map-rel.test
new file mode 100644
index 000000000000..adde78784d63
--- /dev/null
+++ b/test/tools/llvm-readobj/mips-rld-map-rel.test
@@ -0,0 +1,24 @@
+# Check DT_MIPS_RLD_MAP_REL .dynamic section tag reading
+
+RUN: llvm-readobj -dynamic-table %p/Inputs/mips-rld-map-rel.elf-mipsel | \
+RUN: FileCheck %s
+
+CHECK: DynamicSection [ (16 entries)
+CHECK-NEXT: Tag Type Name/Value
+CHECK-NEXT: 0x00000004 HASH 0x220
+CHECK-NEXT: 0x00000005 STRTAB 0x2FC
+CHECK-NEXT: 0x00000006 SYMTAB 0x25C
+CHECK-NEXT: 0x0000000A STRSZ 72 (bytes)
+CHECK-NEXT: 0x0000000B SYMENT 16 (bytes)
+CHECK-NEXT: 0x70000035 MIPS_RLD_MAP_REL 0x101E0
+CHECK-NEXT: 0x00000015 DEBUG 0x0
+CHECK-NEXT: 0x00000003 PLTGOT 0x10390
+CHECK-NEXT: 0x70000001 MIPS_RLD_VERSION 1
+CHECK-NEXT: 0x70000005 MIPS_FLAGS NOTPOT
+CHECK-NEXT: 0x70000006 MIPS_BASE_ADDRESS 0x0
+CHECK-NEXT: 0x7000000A MIPS_LOCAL_GOTNO 2
+CHECK-NEXT: 0x70000011 MIPS_SYMTABNO 10
+CHECK-NEXT: 0x70000012 MIPS_UNREFEXTNO 15
+CHECK-NEXT: 0x70000013 MIPS_GOTSYM 0xA
+CHECK-NEXT: 0x00000000 NULL 0x0
+CHECK-NEXT: ]
diff --git a/test/tools/llvm-readobj/sections-ext.test b/test/tools/llvm-readobj/sections-ext.test
index 19b7aa0516d1..70ae0f22a0e4 100644
--- a/test/tools/llvm-readobj/sections-ext.test
+++ b/test/tools/llvm-readobj/sections-ext.test
@@ -223,6 +223,7 @@ MACHO-X86-64-NEXT: SomeInstructions (0x4)
MACHO-X86-64-NEXT: ]
MACHO-X86-64-NEXT: Reserved1: 0x0
MACHO-X86-64-NEXT: Reserved2: 0x0
+MACHO-X86-64-NEXT: Reserved3: 0x0
MACHO-X86-64-NEXT: Relocations [
MACHO-X86-64-NEXT: 0xE 1 2 1 X86_64_RELOC_BRANCH 0 _SomeOtherFunction
MACHO-X86-64-NEXT: 0x9 1 2 1 X86_64_RELOC_BRANCH 0 _puts
@@ -260,6 +261,7 @@ MACHO-X86-64-NEXT: Attributes [ (0x0)
MACHO-X86-64-NEXT: ]
MACHO-X86-64-NEXT: Reserved1: 0x0
MACHO-X86-64-NEXT: Reserved2: 0x0
+MACHO-X86-64-NEXT: Reserved3: 0x0
MACHO-X86-64-NEXT: Relocations [
MACHO-X86-64-NEXT: ]
MACHO-X86-64-NEXT: Symbols [
@@ -514,6 +516,7 @@ MACHO-PPC64-NEXT: SomeInstructions (0x4)
MACHO-PPC64-NEXT: ]
MACHO-PPC64-NEXT: Reserved1: 0x0
MACHO-PPC64-NEXT: Reserved2: 0x0
+MACHO-PPC64-NEXT: Reserved3: 0x0
MACHO-PPC64-NEXT: Relocations [
MACHO-PPC64-NEXT: Relocation {
MACHO-PPC64-NEXT: Offset: 0x24
@@ -587,6 +590,7 @@ MACHO-PPC64-NEXT: SomeInstructions (0x4)
MACHO-PPC64-NEXT: ]
MACHO-PPC64-NEXT: Reserved1: 0x0
MACHO-PPC64-NEXT: Reserved2: 0x20
+MACHO-PPC64-NEXT: Reserved3: 0x0
MACHO-PPC64-NEXT: Relocations [
MACHO-PPC64-NEXT: Relocation {
MACHO-PPC64-NEXT: Offset: 0x14
@@ -639,6 +643,7 @@ MACHO-PPC64-NEXT: Attributes [ (0x0)
MACHO-PPC64-NEXT: ]
MACHO-PPC64-NEXT: Reserved1: 0x0
MACHO-PPC64-NEXT: Reserved2: 0x0
+MACHO-PPC64-NEXT: Reserved3: 0x0
MACHO-PPC64-NEXT: Relocations [
MACHO-PPC64-NEXT: ]
MACHO-PPC64-NEXT: Symbols [
@@ -672,6 +677,7 @@ MACHO-PPC64-NEXT: Attributes [ (0x0)
MACHO-PPC64-NEXT: ]
MACHO-PPC64-NEXT: Reserved1: 0x1
MACHO-PPC64-NEXT: Reserved2: 0x0
+MACHO-PPC64-NEXT: Reserved3: 0x0
MACHO-PPC64-NEXT: Relocations [
MACHO-PPC64-NEXT: ]
MACHO-PPC64-NEXT: Symbols [
@@ -695,6 +701,7 @@ MACHO-PPC64-NEXT: Attributes [ (0x0)
MACHO-PPC64-NEXT: ]
MACHO-PPC64-NEXT: Reserved1: 0x2
MACHO-PPC64-NEXT: Reserved2: 0x0
+MACHO-PPC64-NEXT: Reserved3: 0x0
MACHO-PPC64-NEXT: Relocations [
MACHO-PPC64-NEXT: Relocation {
MACHO-PPC64-NEXT: Offset: 0x0
diff --git a/test/tools/llvm-readobj/sections.test b/test/tools/llvm-readobj/sections.test
index fe734d77e34b..54654e7070ef 100644
--- a/test/tools/llvm-readobj/sections.test
+++ b/test/tools/llvm-readobj/sections.test
@@ -172,6 +172,7 @@ MACHO-X86-64-NEXT: SomeInstructions (0x4)
MACHO-X86-64-NEXT: ]
MACHO-X86-64-NEXT: Reserved1: 0x0
MACHO-X86-64-NEXT: Reserved2: 0x0
+MACHO-X86-64-NEXT: Reserved3: 0x0
MACHO-X86-64-NEXT: }
MACHO-X86-64-NEXT: Section {
MACHO-X86-64-NEXT: Index: 1
@@ -188,6 +189,7 @@ MACHO-X86-64-NEXT: Attributes [ (0x0)
MACHO-X86-64-NEXT: ]
MACHO-X86-64-NEXT: Reserved1: 0x0
MACHO-X86-64-NEXT: Reserved2: 0x0
+MACHO-X86-64-NEXT: Reserved3: 0x0
MACHO-X86-64-NEXT: }
MACHO-X86-64-NEXT:]
@@ -296,6 +298,7 @@ MACHO-PPC64-NEXT: SomeInstructions (0x4)
MACHO-PPC64-NEXT: ]
MACHO-PPC64-NEXT: Reserved1: 0x0
MACHO-PPC64-NEXT: Reserved2: 0x0
+MACHO-PPC64-NEXT: Reserved3: 0x0
MACHO-PPC64-NEXT: }
MACHO-PPC64-NEXT: Section {
MACHO-PPC64-NEXT: Index: 1
@@ -314,6 +317,7 @@ MACHO-PPC64-NEXT: SomeInstructions (0x4)
MACHO-PPC64-NEXT: ]
MACHO-PPC64-NEXT: Reserved1: 0x0
MACHO-PPC64-NEXT: Reserved2: 0x20
+MACHO-PPC64-NEXT: Reserved3: 0x0
MACHO-PPC64-NEXT: }
MACHO-PPC64-NEXT: Section {
MACHO-PPC64-NEXT: Index: 2
@@ -330,6 +334,7 @@ MACHO-PPC64-NEXT: Attributes [ (0x0)
MACHO-PPC64-NEXT: ]
MACHO-PPC64-NEXT: Reserved1: 0x0
MACHO-PPC64-NEXT: Reserved2: 0x0
+MACHO-PPC64-NEXT: Reserved3: 0x0
MACHO-PPC64-NEXT: }
MACHO-PPC64-NEXT: Section {
MACHO-PPC64-NEXT: Index: 3
@@ -346,6 +351,7 @@ MACHO-PPC64-NEXT: Attributes [ (0x0)
MACHO-PPC64-NEXT: ]
MACHO-PPC64-NEXT: Reserved1: 0x1
MACHO-PPC64-NEXT: Reserved2: 0x0
+MACHO-PPC64-NEXT: Reserved3: 0x0
MACHO-PPC64-NEXT: }
MACHO-PPC64-NEXT: Section {
MACHO-PPC64-NEXT: Index: 4
@@ -362,6 +368,7 @@ MACHO-PPC64-NEXT: Attributes [ (0x0)
MACHO-PPC64-NEXT: ]
MACHO-PPC64-NEXT: Reserved1: 0x2
MACHO-PPC64-NEXT: Reserved2: 0x0
+MACHO-PPC64-NEXT: Reserved3: 0x0
MACHO-PPC64-NEXT: }
MACHO-PPC64-NEXT: ]
diff --git a/test/tools/llvm-size/basic.test b/test/tools/llvm-size/basic.test
new file mode 100644
index 000000000000..8b2d66eef3ce
--- /dev/null
+++ b/test/tools/llvm-size/basic.test
@@ -0,0 +1,2 @@
+RUN: llvm-size %t.blah 2>&1 | FileCheck --check-prefix=ENOENT %s
+ENOENT: {{.*}}llvm-size{{(\.EXE|\.exe)?}}: {{.*}}.blah: {{[Nn]}}o such file or directory
diff --git a/test/tools/llvm-split/alias.ll b/test/tools/llvm-split/alias.ll
new file mode 100644
index 000000000000..18e0e7f12d69
--- /dev/null
+++ b/test/tools/llvm-split/alias.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-split -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0-DAG: @afoo = alias [2 x i8*], [2 x i8*]* @foo
+; CHECK1-DAG: @afoo = external global [2 x i8*]
+@afoo = alias [2 x i8*], [2 x i8*]* @foo
+
+; CHECK0-DAG: declare void @abar()
+; CHECK1-DAG: @abar = alias void (), void ()* @bar
+@abar = alias void (), void ()* @bar
+
+@foo = global [2 x i8*] [i8* bitcast (void ()* @bar to i8*), i8* bitcast (void ()* @abar to i8*)]
+
+define void @bar() {
+ store [2 x i8*] zeroinitializer, [2 x i8*]* @foo
+ store [2 x i8*] zeroinitializer, [2 x i8*]* @afoo
+ ret void
+}
diff --git a/test/tools/llvm-split/comdat.ll b/test/tools/llvm-split/comdat.ll
new file mode 100644
index 000000000000..45faf4bfe26e
--- /dev/null
+++ b/test/tools/llvm-split/comdat.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-split -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+$foo = comdat any
+
+; CHECK0: define void @foo()
+; CHECK1: declare void @foo()
+define void @foo() comdat {
+ call void @bar()
+ ret void
+}
+
+; CHECK0: define void @bar()
+; CHECK1: declare void @bar()
+define void @bar() comdat($foo) {
+ call void @foo()
+ ret void
+}
diff --git a/test/tools/llvm-split/function.ll b/test/tools/llvm-split/function.ll
new file mode 100644
index 000000000000..37272dbbcee2
--- /dev/null
+++ b/test/tools/llvm-split/function.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-split -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0: define void @foo()
+; CHECK1: declare void @foo()
+define void @foo() {
+ call void @bar()
+ ret void
+}
+
+; CHECK0: declare void @bar()
+; CHECK1: define void @bar()
+define void @bar() {
+ call void @foo()
+ ret void
+}
diff --git a/test/tools/llvm-split/global.ll b/test/tools/llvm-split/global.ll
new file mode 100644
index 000000000000..6d2425691e10
--- /dev/null
+++ b/test/tools/llvm-split/global.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-split -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0: @foo = global i8* bitcast
+; CHECK1: @foo = external global i8*
+@foo = global i8* bitcast (i8** @bar to i8*)
+
+; CHECK0: @bar = external global i8*
+; CHECK1: @bar = global i8* bitcast
+@bar = global i8* bitcast (i8** @foo to i8*)
diff --git a/test/tools/llvm-split/internal.ll b/test/tools/llvm-split/internal.ll
new file mode 100644
index 000000000000..ce4272c5f0dd
--- /dev/null
+++ b/test/tools/llvm-split/internal.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-split -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0: define hidden void @foo()
+; CHECK1: declare hidden void @foo()
+define internal void @foo() {
+ call void @bar()
+ ret void
+}
+
+; CHECK0: declare void @bar()
+; CHECK1: define void @bar()
+define void @bar() {
+ call void @foo()
+ ret void
+}
diff --git a/test/tools/llvm-split/unnamed.ll b/test/tools/llvm-split/unnamed.ll
new file mode 100644
index 000000000000..fd24b4ca92bb
--- /dev/null
+++ b/test/tools/llvm-split/unnamed.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-split -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0: declare hidden void @__llvmsplit_unnamed()
+; CHECK1: define hidden void @__llvmsplit_unnamed()
+define internal void @0() {
+ ; CHECK1: call void @foo()
+ call void @foo()
+ ret void
+}
+
+; CHECK0: declare hidden void @__llvmsplit_unnamed.1()
+; CHECK1: define hidden void @__llvmsplit_unnamed.1()
+define internal void @1() {
+ ; CHECK1: call void @foo()
+ ; CHECK1: call void @foo()
+ call void @foo()
+ call void @foo()
+ ret void
+}
+
+; CHECK0: define void @foo()
+; CHECK1: declare void @foo()
+define void @foo() {
+ ; CHECK0: call void @__llvmsplit_unnamed.1()
+ ; CHECK0: call void @__llvmsplit_unnamed()
+ call void @1()
+ call void @0()
+ ret void
+}
diff --git a/test/tools/llvm-symbolizer/Inputs/addr.exe b/test/tools/llvm-symbolizer/Inputs/addr.exe
new file mode 100755
index 000000000000..38d88b65741e
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/addr.exe
Binary files differ
diff --git a/test/tools/llvm-symbolizer/Inputs/addr.inp b/test/tools/llvm-symbolizer/Inputs/addr.inp
new file mode 100644
index 000000000000..4de096479dae
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/addr.inp
@@ -0,0 +1 @@
+0x40054d
diff --git a/test/tools/llvm-symbolizer/Inputs/coff-dwarf.cpp b/test/tools/llvm-symbolizer/Inputs/coff-dwarf.cpp
new file mode 100644
index 000000000000..3a832a9fcff2
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/coff-dwarf.cpp
@@ -0,0 +1,19 @@
+// To generate the corresponding EXE, run:
+// clang-cl -MD -O2 -gdwarf -c coff-dwarf.cpp && lld-link -debug coff-dwarf.obj
+
+extern "C" int puts(const char *str);
+
+void __declspec(noinline) foo() {
+ puts("foo1");
+ puts("foo2");
+}
+
+// LLVM should inline this into main.
+static void bar() {
+ foo();
+}
+
+int main() {
+ bar();
+ return 0;
+}
diff --git a/test/tools/llvm-symbolizer/Inputs/coff-dwarf.exe b/test/tools/llvm-symbolizer/Inputs/coff-dwarf.exe
new file mode 100644
index 000000000000..018053b979b9
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/coff-dwarf.exe
Binary files differ
diff --git a/test/tools/llvm-symbolizer/Inputs/coff-exports.cpp b/test/tools/llvm-symbolizer/Inputs/coff-exports.cpp
new file mode 100644
index 000000000000..23b44b329022
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/coff-exports.cpp
@@ -0,0 +1,20 @@
+// To generate the corresponding EXE, run:
+// clang-cl -MD -c coff-exports.cpp && lld-link /MANIFEST:NO coff-exports.obj
+
+#define EXPORT __declspec(dllexport)
+
+extern "C" int puts(const char *str);
+
+EXPORT void __declspec(noinline) foo() {
+ puts("foo1");
+ puts("foo2");
+}
+
+void bar() {
+ foo();
+}
+
+EXPORT int main() {
+ bar();
+ return 0;
+}
diff --git a/test/tools/llvm-symbolizer/Inputs/coff-exports.exe b/test/tools/llvm-symbolizer/Inputs/coff-exports.exe
new file mode 100644
index 000000000000..939205e3f82e
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/coff-exports.exe
Binary files differ
diff --git a/test/tools/llvm-symbolizer/coff-dwarf.test b/test/tools/llvm-symbolizer/coff-dwarf.test
new file mode 100644
index 000000000000..043c175a8d3e
--- /dev/null
+++ b/test/tools/llvm-symbolizer/coff-dwarf.test
@@ -0,0 +1,16 @@
+RUN: grep '^ADDR:' %s | sed -s 's/ADDR: //' \
+RUN: | llvm-symbolizer --inlining --relative-address -obj="%p/Inputs/coff-dwarf.exe" \
+RUN: | FileCheck %s
+
+This test relies on UnDecorateSymbolName, which is Windows-only.
+REQUIRES: system-windows
+
+ADDR: 0x5009
+ADDR: 0x5038
+
+CHECK: foo(void)
+CHECK: coff-dwarf.cpp:7
+CHECK: bar(void)
+CHECK: coff-dwarf.cpp:13
+CHECK: main
+CHECK: coff-dwarf.cpp:17
diff --git a/test/tools/llvm-symbolizer/coff-exports.test b/test/tools/llvm-symbolizer/coff-exports.test
new file mode 100644
index 000000000000..cad1935a03f9
--- /dev/null
+++ b/test/tools/llvm-symbolizer/coff-exports.test
@@ -0,0 +1,20 @@
+RUN: grep '^ADDR:' %s | sed -s 's/ADDR: //' \
+RUN: | llvm-symbolizer --inlining --relative-address -obj="%p/Inputs/coff-exports.exe" \
+RUN: | FileCheck %s
+
+This test relies on UnDecorateSymbolName, which is Windows-only.
+REQUIRES: system-windows
+
+ADDR: 0x500A
+ADDR: 0x5038
+ADDR: 0x504B
+
+We get the expected stack trace, except 'foo' appears for the 'bar' frame
+because 'bar' isn't in the export table.
+
+CHECK: foo(void)
+CHECK: ??:0:0
+CHECK: foo(void)
+CHECK: ??:0:0
+CHECK: main
+CHECK: ??:0:0
diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp b/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp
index f1f98af41907..e317ed33589e 100644
--- a/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp
+++ b/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp
@@ -16,3 +16,10 @@ int main() {
NS::Foo f;
f.bar();
}
+
+extern "C" {
+void __cdecl foo_cdecl() {}
+void __stdcall foo_stdcall() {}
+void __fastcall foo_fastcall() {}
+void __vectorcall foo_vectorcall() {}
+}
diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.exe b/test/tools/llvm-symbolizer/pdb/Inputs/test.exe
index 80fb34bb6dcb..a4f148e67c2f 100644
--- a/test/tools/llvm-symbolizer/pdb/Inputs/test.exe
+++ b/test/tools/llvm-symbolizer/pdb/Inputs/test.exe
Binary files differ
diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input b/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input
deleted file mode 100644
index affda60449b2..000000000000
--- a/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input
+++ /dev/null
@@ -1,4 +0,0 @@
-0x401030
-0x401040
-0x401060
-0x500000
diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb b/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb
index 974e565e87f4..d26d33a862d8 100644
--- a/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb
+++ b/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb
Binary files differ
diff --git a/test/tools/llvm-symbolizer/pdb/pdb.test b/test/tools/llvm-symbolizer/pdb/pdb.test
index b5d0f15fbcb8..fd058fbbee3b 100644
--- a/test/tools/llvm-symbolizer/pdb/pdb.test
+++ b/test/tools/llvm-symbolizer/pdb/pdb.test
@@ -1,18 +1,45 @@
-RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" < "%p/Inputs/test.exe.input" | \
-RUN: FileCheck %s --check-prefix=CHECK
-RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false < \
-RUN: "%p/Inputs/test.exe.input" | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
-
-CHECK: foo(void)
-CHECK-NEXT: test.cpp:10
-CHECK: _main
-CHECK-NEXT: test.cpp:13:0
-CHECK: NS::Foo::bar(void)
-CHECK-NEXT: test.cpp:6:0
-
-CHECK-NO-DEMANGLE: foo
-CHECK-NO-DEMANGLE-NEXT: test.cpp:10
-CHECK-NO-DEMANGLE: _main
-CHECK-LINKAGE-NAME-NEXT: test.cpp:13:0
-CHECK-NO-DEMANGLE: bar
-CHECK-LINKAGE-NAME-NEXT: test.cpp:6:0
+RUN: grep '^ADDR:' %s | sed -s 's/ADDR: //' \
+RUN: | llvm-symbolizer -obj="%p/Inputs/test.exe" \
+RUN: | FileCheck %s --check-prefix=CHECK
+RUN: grep '^ADDR:' %s | sed -s 's/ADDR: //' \
+RUN: | llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false \
+RUN: | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
+
+Subtract ImageBase from all the offsets and run the test again with
+--relative-address.
+
+RUN: grep '^ADDR:' %s | sed -s 's/ADDR: //' \
+RUN: | %python -c 'import sys;print "\n".join([hex(int(x, 16) - 0x400000) for x in sys.stdin])' \
+RUN: | llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false --relative-address \
+RUN: | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
+
+ADDR: 0x401000
+ADDR: 0x401010
+ADDR: 0x401070
+ADDR: 0x401030
+ADDR: 0x401040
+ADDR: 0x401050
+ADDR: 0x401060
+ADDR: 0x500000
+
+CHECK: foo(void)
+CHECK-NEXT: test.cpp:10
+CHECK: main
+CHECK-NEXT: test.cpp:13:0
+CHECK: NS::Foo::bar(void)
+CHECK-NEXT: test.cpp:6:0
+CHECK: {{^foo_cdecl$}}
+CHECK: {{^foo_stdcall$}}
+CHECK: {{^foo_fastcall$}}
+CHECK: {{^foo_vectorcall$}}
+
+CHECK-NO-DEMANGLE: ?foo@@YAXXZ
+CHECK-NO-DEMANGLE-NEXT: test.cpp:10
+CHECK-NO-DEMANGLE: _main
+CHECK-NO-DEMANGLE-NEXT: test.cpp:13
+CHECK-NO-DEMANGLE: ?bar@Foo@NS@@QAEXXZ
+CHECK-NO-DEMANGLE-NEXT: test.cpp:6
+CHECK-NO-DEMANGLE: _foo_cdecl
+CHECK-NO-DEMANGLE: _foo_stdcall@0
+CHECK-NO-DEMANGLE: @foo_fastcall@0
+CHECK-NO-DEMANGLE: foo_vectorcall@@0
diff --git a/test/tools/llvm-symbolizer/sym.test b/test/tools/llvm-symbolizer/sym.test
new file mode 100644
index 000000000000..01a6692222e7
--- /dev/null
+++ b/test/tools/llvm-symbolizer/sym.test
@@ -0,0 +1,30 @@
+#Source:
+##include <stdio.h>
+#static inline int inctwo (int *a) {
+# printf ("%d\n",(*a)++);
+# return (*a)++;
+#}
+#static inline int inc (int *a) {
+# printf ("%d\n",inctwo(a));
+# return (*a)++;
+#}
+#
+#
+#int main () {
+# int x = 1;
+# return inc(&x);
+#}
+#
+#Build as : clang -g -O2 addr.c
+
+RUN: llvm-symbolizer -print-address -obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck %s
+RUN: llvm-symbolizer -inlining -print-address -pretty-print -obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck --check-prefix="PRETTY" %s
+
+#CHECK: 0x40054d
+#CHECK: main
+#CHECK: {{[/\]+}}tmp{{[/\]+}}x.c:14:0
+#
+#PRETTY: {{[0x]+}}40054d: inctwo at {{[/\]+}}tmp{{[/\]+}}x.c:3:3
+#PRETTY: (inlined by) inc at {{[/\]+}}tmp{{[/\]+}}x.c:7:0
+#PRETTY (inlined by) main at {{[/\]+}}tmp{{[/\]+}}x.c:14:0
+
diff --git a/test/tools/lto/opt-level.ll b/test/tools/lto/opt-level.ll
index 013a1f408821..7bce0c606c4c 100644
--- a/test/tools/lto/opt-level.ll
+++ b/test/tools/lto/opt-level.ll
@@ -1,8 +1,9 @@
; RUN: llvm-as %s -o %t.o
-; RUN: env DYLD_LIBRARY_PATH=%llvmshlibdir %ld64 -arch x86_64 -dylib -mllvm -O0 -o %t.dylib %t.o
+; RUN: %ld64 -lto_library %llvmshlibdir/libLTO.dylib -arch x86_64 -dylib -mllvm -O0 -o %t.dylib %t.o
; RUN: llvm-nm -no-llvm-bc %t.dylib | FileCheck --check-prefix=CHECK-O0 %s
-; RUN: env DYLD_LIBRARY_PATH=%llvmshlibdir %ld64 -arch x86_64 -dylib -mllvm -O2 -o %t.dylib %t.o
+; RUN: %ld64 -lto_library %llvmshlibdir/libLTO.dylib -arch x86_64 -dylib -mllvm -O2 -o %t.dylib %t.o
; RUN: llvm-nm -no-llvm-bc %t.dylib | FileCheck --check-prefix=CHECK-O2 %s
+; REQUIRES: X86
target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/tools/sancov/Inputs/blacklist.txt b/test/tools/sancov/Inputs/blacklist.txt
new file mode 100644
index 000000000000..b782960652bb
--- /dev/null
+++ b/test/tools/sancov/Inputs/blacklist.txt
@@ -0,0 +1 @@
+fun:bar*
diff --git a/test/tools/sancov/Inputs/foo.cpp b/test/tools/sancov/Inputs/foo.cpp
new file mode 100644
index 000000000000..71b35b32392d
--- /dev/null
+++ b/test/tools/sancov/Inputs/foo.cpp
@@ -0,0 +1,5 @@
+#include <stdio.h>
+#include <string>
+
+__attribute__((noinline))
+void foo() { printf("foo\n"); }
diff --git a/test/tools/sancov/Inputs/test-linux_x86_64 b/test/tools/sancov/Inputs/test-linux_x86_64
new file mode 100755
index 000000000000..2d141b693b2e
--- /dev/null
+++ b/test/tools/sancov/Inputs/test-linux_x86_64
Binary files differ
diff --git a/test/tools/sancov/Inputs/test-linux_x86_64-1.sancov b/test/tools/sancov/Inputs/test-linux_x86_64-1.sancov
new file mode 100644
index 000000000000..a1c7f7b6cc38
--- /dev/null
+++ b/test/tools/sancov/Inputs/test-linux_x86_64-1.sancov
Binary files differ
diff --git a/test/tools/sancov/Inputs/test-linux_x86_64.sancov b/test/tools/sancov/Inputs/test-linux_x86_64.sancov
new file mode 100644
index 000000000000..e5ed81ed906e
--- /dev/null
+++ b/test/tools/sancov/Inputs/test-linux_x86_64.sancov
Binary files differ
diff --git a/test/tools/sancov/Inputs/test.cpp b/test/tools/sancov/Inputs/test.cpp
new file mode 100644
index 000000000000..5690409a2781
--- /dev/null
+++ b/test/tools/sancov/Inputs/test.cpp
@@ -0,0 +1,19 @@
+// compile & generate coverage data using:
+// clang++ -g -o test-linux_x86_64 -fsanitize=address -fsanitize-coverage=bb test.cpp foo.cpp
+// ASAN_OPTIONS="coverage=1" ./test-linux_x86_64 && mv test-linux_x86_64.*.sancov test-linux_x86_64.sancov
+// ASAN_OPTIONS="coverage=1" ./test-linux_x86_64 1 && mv test-linux_x86_64.*.sancov test-linux_x86_64-1.sancov
+
+#include <stdio.h>
+#include <string>
+
+void foo();
+
+__attribute__((noinline))
+std::string bar(std::string str) { printf("bar\n"); return str; }
+
+int main(int argc, char **argv) {
+ if (argc == 2)
+ foo();
+ bar("str");
+ printf("main\n");
+}
diff --git a/test/tools/sancov/blacklist.test b/test/tools/sancov/blacklist.test
new file mode 100644
index 000000000000..c07f5cdd109e
--- /dev/null
+++ b/test/tools/sancov/blacklist.test
@@ -0,0 +1,5 @@
+REQUIRES: x86_64-linux
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -covered-functions -blacklist %p/Inputs/blacklist.txt %p/Inputs/test-linux_x86_64.sancov | FileCheck %s
+
+CHECK-NOT: Inputs{{[/\\]}}test.cpp:12 bar(std::string)
+CHECK: Inputs{{[/\\]}}test.cpp:14 main
diff --git a/test/tools/sancov/covered_functions.test b/test/tools/sancov/covered_functions.test
new file mode 100644
index 000000000000..5e0696bf8615
--- /dev/null
+++ b/test/tools/sancov/covered_functions.test
@@ -0,0 +1,13 @@
+REQUIRES: x86_64-linux
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -covered-functions %p/Inputs/test-linux_x86_64.sancov | FileCheck %s
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -covered-functions -strip_path_prefix=Inputs/ %p/Inputs/test-linux_x86_64.sancov | FileCheck --check-prefix=STRIP_PATH %s
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -demangle=0 -covered-functions %p/Inputs/test-linux_x86_64.sancov | FileCheck --check-prefix=NO_DEMANGLE %s
+
+CHECK: Inputs{{[/\\]}}test.cpp:12 bar(std::string)
+CHECK: Inputs{{[/\\]}}test.cpp:14 main
+
+STRIP_PATH: {{^}}test.cpp:12 bar(std::string)
+STRIP_PATH: {{^}}test.cpp:14 main
+
+NO_DEMANGLE: test.cpp:12 _Z3barSs
+NO_DEMANGLE: test.cpp:14 main
diff --git a/test/tools/sancov/not_covered_functions.test b/test/tools/sancov/not_covered_functions.test
new file mode 100644
index 000000000000..8bcbac7f7b1b
--- /dev/null
+++ b/test/tools/sancov/not_covered_functions.test
@@ -0,0 +1,8 @@
+REQUIRES: x86_64-linux
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -not-covered-functions %p/Inputs/test-linux_x86_64.sancov | FileCheck %s
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -not-covered-functions %p/Inputs/test-linux_x86_64-1.sancov | FileCheck --check-prefix=CHECK1 --allow-empty %s
+
+CHECK: Inputs{{[/\\]}}foo.cpp:5 foo()
+CHECK-NOT: {{.*__sanitizer.*}}
+CHECK1-NOT: {{.+}}
+
diff --git a/test/tools/sancov/print.test b/test/tools/sancov/print.test
new file mode 100644
index 000000000000..c67bbaa842a8
--- /dev/null
+++ b/test/tools/sancov/print.test
@@ -0,0 +1,11 @@
+REQUIRES: x86_64-linux
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -print %p/Inputs/test-linux_x86_64.sancov | FileCheck %s
+
+CHECK: 0x4dbe2b
+CHECK: 0x4dbf72
+CHECK: 0x4dbfec
+CHECK: 0x4dc033
+CHECK: 0x4dc06a
+CHECK: 0x4dc09d
+CHECK: 0x4dc0d0
+